From df95ada79073b3969b91dd0e35c853da85fcba1b Mon Sep 17 00:00:00 2001
From: chessai <chessai1996@gmail.com>
Date: Wed, 26 Jun 2024 19:18:58 -0500
Subject: [PATCH 1/4] fold in chainweb-storage

Change-Id: I3739cec07a0fefe6b1f1f2aae4fff245b6b593ca
---
 Setup.hs                                      | 445 ----------
 cabal.project                                 |   8 +-
 cabal.project.freeze                          |   1 -
 chainweb.cabal                                |  56 +-
 vendored/chainweb-storage/LICENSE             |  30 +
 vendored/chainweb-storage/README.md           |  33 +
 .../src/Chainweb/Storage/DedupStore.hs        | 436 ++++++++++
 .../src/Chainweb/Storage/Table.hs             | 184 +++++
 .../src/Chainweb/Storage/Table/Forgetful.hs   |  34 +
 .../src/Chainweb/Storage/Table/HashMap.hs     |  65 ++
 .../src/Chainweb/Storage/Table/RocksDB.hs     | 780 ++++++++++++++++++
 11 files changed, 1603 insertions(+), 469 deletions(-)
 delete mode 100644 Setup.hs
 create mode 100644 vendored/chainweb-storage/LICENSE
 create mode 100644 vendored/chainweb-storage/README.md
 create mode 100644 vendored/chainweb-storage/src/Chainweb/Storage/DedupStore.hs
 create mode 100644 vendored/chainweb-storage/src/Chainweb/Storage/Table.hs
 create mode 100644 vendored/chainweb-storage/src/Chainweb/Storage/Table/Forgetful.hs
 create mode 100644 vendored/chainweb-storage/src/Chainweb/Storage/Table/HashMap.hs
 create mode 100644 vendored/chainweb-storage/src/Chainweb/Storage/Table/RocksDB.hs

diff --git a/Setup.hs b/Setup.hs
deleted file mode 100644
index 74302cb25e..0000000000
--- a/Setup.hs
+++ /dev/null
@@ -1,445 +0,0 @@
--- ------------------------------------------------------ --
--- Copyright © 2019-2023 Kadena LLC <chainweb-dev@kadena.io>
--- Copyright © 2019 Colin Woodbury <colin@fosskers.ca>
--- Copyright © 2015-2018 Lars Kuhtz <lakuhtz@gmail.com>
--- Copyright © 2014 AlephCloud Systems, Inc.
--- ------------------------------------------------------ --
-
-{-# LANGUAGE CPP #-}
-{-# LANGUAGE LambdaCase #-}
-{-# LANGUAGE MultiWayIf #-}
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-
-{-# OPTIONS_HADDOCK show-extensions #-}
-
--- | This module contains a @Setup.hs@ script that hooks into the cabal build
--- process at the end of the configuration phase and generates a module with
--- package information for each component of the cabal package.
---
--- The modules are created in the /autogen/ build directories where also the
--- @Path_@ modules are created by cabal's simple build setup.
---
--- = Usage as Setup Script
---
--- There are three ways how this module can be used:
---
--- 1. Copy the code of this module into a file called @Setup.hs@ in the root
---    directory of your package.
---
--- 2. If the /configuration-tools/ package is already installed in the system
---    where the build is done, following code can be used as @Setup.hs@ script:
---
---    > module Main (main) where
---    >
---    > import Configuration.Utils.Setup
---
--- 3. For usage within a more complex @Setup.hs@ script you shall import this
---    module qualified and use the 'mkPkgInfoModules' function. For example:
---
---    > module Main (main) where
---    >
---    > import qualified Configuration.Utils.Setup as ConfTools
---    >
---    > main :: IO ()
---    > main = defaultMainWithHooks (ConfTools.mkPkgInfoModules simpleUserHooks)
---    >
---
--- With all methods the field @Build-Type@ in the package description (cabal) file
--- must be set to @Custom@:
---
--- > Build-Type: Custom
---
---
--- = Integration With "Configuration.Utils"
---
--- You can integrate the information provided by the @PkgInfo@ modules with the
--- command line interface of an application by importing the respective module
--- for the component and using the
--- 'Configuration.Utils.runWithPkgInfoConfiguration' function from the module
--- "Configuration.Utils" as show in the following example:
---
--- > {-# LANGUAGE OverloadedStrings #-}
--- > {-# LANGUAGE FlexibleInstances #-}
--- >
--- > module Main
--- > ( main
--- > ) where
--- >
--- > import Configuration.Utils
--- > import PkgInfo
--- >
--- > instance FromJSON (() -> ()) where parseJSON _ = pure id
--- >
--- > mainInfo :: ProgramInfo ()
--- > mainInfo = programInfo "Hello World" (pure id) ()
--- >
--- > main :: IO ()
--- > main = runWithPkgInfoConfiguration mainInfo pkgInfo . const $ putStrLn "hello world"
---
--- With that the resulting application supports the following additional command
--- line options:
---
--- [@--version@, @-v@]
---     prints the version of the application and exits.
---
--- [@--info@, @-i@]
---     prints a short info message for the application and exits.
---
--- [@--long-info@]
---     print a detailed info message for the application and exits.
---     Beside component name, package name, version, revision, and copyright
---     the message also contain information about the compiler that
---     was used for the build, the build architecture, build flags,
---     the author, the license type, and a list of all direct and
---     indirect dependencies along with their licenses and copyrights.
---
--- [@--license@]
---     prints the text of the lincense of the application and exits.
---
-module Main
-( main
-) where
-
-import qualified Distribution.Compat.Graph as Graph
-import qualified Distribution.InstalledPackageInfo as I
-import Distribution.PackageDescription
-import Distribution.Pretty
-import Distribution.Simple
-import Distribution.Simple.BuildPaths
-import Distribution.Simple.LocalBuildInfo
-import Distribution.Simple.PackageIndex
-import Distribution.Simple.Setup
-import Distribution.Text
-import Distribution.Types.LocalBuildInfo
-import Distribution.Types.UnqualComponentName
-import Distribution.Utils.Path
-import Distribution.Utils.ShortText
-
-import System.Process
-
-import Control.Applicative
-import Control.Monad
-
-import qualified Data.ByteString as B
-import Data.ByteString.Char8 (pack)
-import Data.Char (isSpace)
-import Data.List (intercalate)
-import Data.Maybe (fromMaybe)
-import Data.Monoid
-
-import System.Directory
-    (canonicalizePath, createDirectoryIfMissing, doesDirectoryExist,
-    doesFileExist, getCurrentDirectory)
-import System.Environment (lookupEnv)
-import System.Exit (ExitCode(ExitSuccess))
-import System.FilePath (isDrive, takeDirectory, (</>))
-
--- | Include this function when your setup doesn't contain any
--- extra functionality.
---
-main :: IO ()
-main = defaultMainWithHooks (mkPkgInfoModules simpleUserHooks)
-
--- | Modifies the given record of hooks by adding functionality that
--- creates a package info module for each component of the cabal package.
---
--- This function is intended for usage in more complex @Setup.hs@ scripts.
--- If your setup doesn't contain any other function you can just import
--- the 'main' function from this module.
---
--- The modules are created in the /autogen/ build directories where also the
--- @Path_@ modules are created by cabal's simple build setup.
---
-mkPkgInfoModules
-    :: UserHooks
-    -> UserHooks
-mkPkgInfoModules hooks = hooks
-    { postConf = mkPkgInfoModulesPostConf (postConf hooks)
-    }
-
--- -------------------------------------------------------------------------- --
--- Compat Implementations
-
-prettyLicense :: I.InstalledPackageInfo -> String
-prettyLicense = either prettyShow prettyShow . I.license
-
-ft :: ShortText -> String
-ft = fromShortText
-
--- -------------------------------------------------------------------------- --
--- Cabal 2.0
-
-mkPkgInfoModulesPostConf
-    :: (Args -> ConfigFlags -> PackageDescription -> LocalBuildInfo -> IO ())
-    -> Args
-    -> ConfigFlags
-    -> PackageDescription
-    -> LocalBuildInfo
-    -> IO ()
-mkPkgInfoModulesPostConf hook args flags pkgDesc bInfo = do
-    mapM_ (updatePkgInfoModule pkgDesc bInfo) $ Graph.toList $ componentGraph bInfo
-    hook args flags pkgDesc bInfo
-
-updatePkgInfoModule :: PackageDescription -> LocalBuildInfo -> ComponentLocalBuildInfo -> IO ()
-updatePkgInfoModule pkgDesc bInfo clbInfo = do
-    createDirectoryIfMissing True dirName
-    moduleBytes <- pkgInfoModule moduleName cName pkgDesc bInfo
-    updateFile fileName moduleBytes
-
-    -- legacy module
-    legacyModuleBytes <- pkgInfoModule legacyModuleName cName pkgDesc bInfo
-    updateFile legacyFileName legacyModuleBytes
-
-  where
-    dirName = autogenComponentModulesDir bInfo clbInfo
-    cName = unUnqualComponentName <$> componentNameString (componentLocalName clbInfo)
-
-    moduleName = pkgInfoModuleName
-    fileName = dirName ++ "/" ++ moduleName ++ ".hs"
-
-    legacyModuleName = legacyPkgInfoModuleName cName
-    legacyFileName = dirName ++ "/" ++ legacyModuleName ++ ".hs"
-
--- -------------------------------------------------------------------------- --
--- Generate PkgInfo Module
-
-pkgInfoModuleName :: String
-pkgInfoModuleName = "PkgInfo"
-
-updateFile :: FilePath -> B.ByteString -> IO ()
-updateFile fileName content = do
-    x <- doesFileExist fileName
-    if | not x -> update
-       | otherwise -> do
-           oldRevisionFile <- B.readFile fileName
-           when (oldRevisionFile /= content) update
-  where
-    update = B.writeFile fileName content
-
-legacyPkgInfoModuleName :: Maybe String -> String
-legacyPkgInfoModuleName Nothing = "PkgInfo"
-legacyPkgInfoModuleName (Just cn) = "PkgInfo_" ++ map tr cn
-  where
-    tr '-' = '_'
-    tr c = c
-
-trim :: String -> String
-trim = f . f
-  where f = reverse . dropWhile isSpace
-
--- -------------------------------------------------------------------------- --
--- VCS
-
-getVCS :: IO (Maybe KnownRepoType)
-getVCS = getCurrentDirectory >>= getVcsOfDir
-  where
-    getVcsOfDir d = do
-        canonicDir <- canonicalizePath d
-        doesDirectoryExist (canonicDir </> ".hg") >>= \x0 -> if x0
-        then return (Just Mercurial)
-        else doesDirectoryExist (canonicDir </> ".git") >>= \x1 -> if x1
-            then return $ Just Git
-            else if isDrive canonicDir
-                then return Nothing
-                else getVcsOfDir (takeDirectory canonicDir)
-
--- | Returns tag, revision, and branch name.
---
-hgInfo :: IO (String, String, String)
-hgInfo = do
-    tag <- trim <$> readProcess "hg" ["id", "-r", "max(ancestors(\".\") and tag())", "-t"] ""
-    rev <- trim <$> readProcess "hg" ["id", "-i"] ""
-    branch <- trim <$> readProcess "hg" ["id", "-b"] ""
-    return (tag, rev, branch)
-
--- | Returns tag, revision, and branch name.
---
-gitInfo :: IO (String, String, String)
-gitInfo = do
-    tag <- do
-        (exitCode, out, _err) <- readProcessWithExitCode "git" ["describe", "--exact-match", "--tags", "--abbrev=0"] ""
-        case exitCode of
-            ExitSuccess -> return $ trim out
-            _ -> return ""
-    rev <- trim <$> readProcess "git" ["rev-parse", "--short", "HEAD"] ""
-    branch <- trim <$> readProcess "git" ["rev-parse", "--abbrev-ref", "HEAD"] ""
-    return (tag, rev, branch)
-
--- | Returns tag, revision, and branch name.
---
-gitlabCiVcsInfo :: IO (Maybe (String, String, String))
-gitlabCiVcsInfo = do
-    lookupEnv "CI_COMMIT_SHORT_SHA" >>= \case
-        Nothing -> return Nothing
-        Just _ -> do
-            rev <- fromMaybe "" <$> lookupEnv "CI_COMMIT_SHORT_SHA"
-            branch <- fromMaybe "" <$> lookupEnv "CI_COMMIT_REF_NAME"
-            tag <- fromMaybe "" <$> lookupEnv "CI_COMMIT_TAG"
-            return $ Just (tag, rev, branch)
-
--- | The file format is revision, branch, and tag separate by newline
--- characters.
---
-fileVcsInfo :: IO (Maybe (String, String, String))
-fileVcsInfo = do
-    doesFileExist ".vcs-info" >>= \x -> if x
-    then do
-        (rev : branch : tag : _) <- (<> ["", "", ""]) . lines <$> readFile ".vcs-info"
-        return $ Just (tag, rev, branch)
-    else return Nothing
-
--- | Returns tag, revision, and branch name.
---
-noVcsInfo :: IO (String, String, String)
-noVcsInfo = return ("", "", "")
-
--- | Returns tag, revision, and branch name.
---
-getVcsInfo :: IO (String, String, String)
-getVcsInfo = getVCS >>= \case
-    Just Mercurial -> hgInfo
-    Just Git -> gitInfo
-    _ -> gitlabCiVcsInfo >>= \case
-        Just a -> return a
-        Nothing -> fileVcsInfo >>= \case
-            Just a -> return a
-            Nothing -> noVcsInfo
-
--- -------------------------------------------------------------------------- --
--- Generate Module
-
-pkgInfoModule :: String -> Maybe String -> PackageDescription -> LocalBuildInfo -> IO B.ByteString
-pkgInfoModule moduleName cName pkgDesc bInfo = do
-    (tag, revision, branch) <- getVcsInfo
-
-    let vcsBranch = if branch == "default" || branch == "master" then "" else branch
-        vcsVersion = intercalate "-" . filter (/= "") $ [tag, revision, vcsBranch]
-        flags = map (unFlagName . fst) . filter snd . unFlagAssignment . configConfigurationsFlags . configFlags $ bInfo
-
-    licenseString <- licenseFilesText pkgDesc
-
-    return $ B.intercalate "\n"
-            [ "{-# LANGUAGE OverloadedStrings #-}"
-            , "{-# LANGUAGE RankNTypes #-}"
-            , ""
-            , "module " <> pack moduleName <> " " <> deprecatedMsg <> " where"
-            , ""
-            , "    import Data.String (IsString)"
-            , "    import Data.Monoid"
-            , "    import Prelude hiding ((<>))"
-            , ""
-            , "    name :: IsString a => Maybe a"
-            , "    name = " <> maybe "Nothing" (\x -> "Just \"" <> pack x <> "\"") cName
-            , ""
-            , "    tag :: IsString a => a"
-            , "    tag = \"" <> pack tag <> "\""
-            , ""
-            , "    revision :: IsString a => a"
-            , "    revision = \"" <> pack revision <> "\""
-            , ""
-            , "    branch :: IsString a => a"
-            , "    branch = \"" <> pack branch <> "\""
-            , ""
-            , "    branch' :: IsString a => a"
-            , "    branch' = \"" <> pack vcsBranch <> "\""
-            , ""
-            , "    vcsVersion :: IsString a => a"
-            , "    vcsVersion = \"" <> pack vcsVersion <> "\""
-            , ""
-            , "    compiler :: IsString a => a"
-            , "    compiler = \"" <> (pack . display . compilerId . compiler) bInfo <> "\""
-            , ""
-            , "    flags :: IsString a => [a]"
-            , "    flags = " <> (pack . show) flags
-            , ""
-            , "    optimisation :: IsString a => a"
-            , "    optimisation = \"" <> (displayOptimisationLevel . withOptimization) bInfo <> "\""
-            , ""
-            , "    arch :: IsString a => a"
-            , "    arch = \"" <> (pack . display . hostPlatform) bInfo <> "\""
-            , ""
-            , "    license :: IsString a => a"
-            , "    license = \"" <> (pack . prettyShow . license) pkgDesc <> "\""
-            , ""
-            , "    licenseText :: IsString a => a"
-            , "    licenseText = " <> (pack . show) licenseString
-            , ""
-            , "    copyright :: IsString a => a"
-            , "    copyright = " <> (pack . show . copyright) pkgDesc
-            , ""
-            , "    author :: IsString a => a"
-            , "    author = \"" <> (pack . ft . author) pkgDesc <> "\""
-            , ""
-            , "    homepage :: IsString a => a"
-            , "    homepage = \"" <> (pack . ft . homepage) pkgDesc <> "\""
-            , ""
-            , "    package :: IsString a => a"
-            , "    package = \"" <> (pack . display . package) pkgDesc <> "\""
-            , ""
-            , "    packageName :: IsString a => a"
-            , "    packageName = \"" <> (pack . display . packageName) pkgDesc <> "\""
-            , ""
-            , "    packageVersion :: IsString a => a"
-            , "    packageVersion = \"" <> (pack . display . packageVersion) pkgDesc <> "\""
-            , ""
-            , "    dependencies :: IsString a => [a]"
-            , "    dependencies = " <> (pack . show . map (display . packageId) . allPackages . installedPkgs) bInfo
-            , ""
-            , "    dependenciesWithLicenses :: IsString a => [a]"
-            , "    dependenciesWithLicenses = " <> (pack . show . map pkgIdWithLicense . allPackages . installedPkgs) bInfo
-            , ""
-            , "    versionString :: (Monoid a, IsString a) => a"
-            , "    versionString = case name of"
-            , "        Nothing -> package <> \" (revision \" <> vcsVersion <> \")\""
-            , "        Just n -> n <> \"-\" <> packageVersion <> \" (package \" <> package <> \" revision \" <> vcsVersion <> \")\""
-            , ""
-            , "    info :: (Monoid a, IsString a) => a"
-            , "    info = versionString <> \"\\n\" <> copyright"
-            , ""
-            , "    longInfo :: (Monoid a, IsString a) => a"
-            , "    longInfo = info <> \"\\n\\n\""
-            , "        <> \"Author: \" <> author <> \"\\n\""
-            , "        <> \"License: \" <> license <> \"\\n\""
-            , "        <> \"Homepage: \" <> homepage <> \"\\n\""
-            , "        <> \"Build with: \" <> compiler <> \" (\" <> arch <> \")\" <> \"\\n\""
-            , "        <> \"Build flags: \" <> mconcat (map (\\x -> \" \" <> x) flags) <> \"\\n\""
-            , "        <> \"Optimisation: \" <> optimisation <> \"\\n\\n\""
-            , "        <> \"Dependencies:\\n\" <> mconcat (map (\\x -> \"    \" <> x <> \"\\n\") dependenciesWithLicenses)"
-            , ""
-            , "    pkgInfo :: (Monoid a, IsString a) => (a, a, a, a)"
-            , "    pkgInfo ="
-            , "        ( info"
-            , "        , longInfo"
-            , "        , versionString"
-            , "        , licenseText"
-            , "        )"
-            , ""
-            ]
-  where
-    displayOptimisationLevel NoOptimisation = "none"
-    displayOptimisationLevel NormalOptimisation = "normal"
-    displayOptimisationLevel MaximumOptimisation = "maximum"
-
-    deprecatedMsg = if moduleName /= pkgInfoModuleName
-        then "{-# DEPRECATED \"Update to Cabal 2.0 or later and use just PkgInfo as module name.\" #-}"
-        else ""
-
-licenseFilesText :: PackageDescription -> IO B.ByteString
-licenseFilesText pkgDesc =
-    B.intercalate "\n------------------------------------------------------------\n" <$> mapM fileTextStr
-        (licenseFiles pkgDesc)
-  where
-    fileTextStr = fileText . getSymbolicPath
-    fileText file = doesFileExist file >>= \x -> if x
-        then B.readFile file
-        else return ""
-
-pkgIdWithLicense :: I.InstalledPackageInfo -> String
-pkgIdWithLicense a = (display . packageId) a
-    ++ " ["
-    ++ prettyLicense a
-    ++ (if cr /= "" then ", " ++ cr else "")
-    ++ "]"
-  where
-    cr = (unwords . words . ft . I.copyright) a
diff --git a/cabal.project b/cabal.project
index 0a22c1ab57..a6d0b218a0 100644
--- a/cabal.project
+++ b/cabal.project
@@ -1,4 +1,4 @@
-packages: chainweb.cabal
+packages: .
 
 debug-info: True
 
@@ -84,12 +84,6 @@ source-repository-package
     tag: 1d260bfaa48312b54851057885de4c43c420e35f
     --sha256: 0fzq4mzaszj5clvixx9mn1x6r4dcrnwvbl2znd0p5mmy5h2jr0hh
 
-source-repository-package
-    type: git
-    location: https://github.com/kadena-io/chainweb-storage.git
-    tag: 4b45c1ab9c070c6d16a058bcbab0c06ac0fb6d4e
-    --sha256: 0m6c7kl6x5a3k02q2i7qzfx91kxz19dzav0piqfxra52bq0x3sm6
-
 source-repository-package
     type: git
     location: https://github.com/kadena-io/rocksdb-haskell.git
diff --git a/cabal.project.freeze b/cabal.project.freeze
index 5c02e6683f..1be9de4a7f 100644
--- a/cabal.project.freeze
+++ b/cabal.project.freeze
@@ -63,7 +63,6 @@ constraints: any.Boolean ==0.2.4,
              any.cborg ==0.2.10.0,
              any.cereal ==0.5.8.3,
              chainweb -debug -ed25519 -ghc-flags,
-             any.chainweb-storage ==0.1.0.0,
              any.character-ps ==0.1,
              any.charset ==0.3.10,
              any.chronos ==1.1.5.1,
diff --git a/chainweb.cabal b/chainweb.cabal
index a8b1bfde49..d9b9105c4d 100644
--- a/chainweb.cabal
+++ b/chainweb.cabal
@@ -10,9 +10,9 @@ license:      BSD-3-Clause
 license-file: LICENSE
 author:       Chainweb Dev Team
 maintainer:   chainweb-dev@kadena.io
-copyright:    Copyright (C) 2018 - 2023 Kadena LLC
+copyright:    Copyright (C) 2018 - 2024 Kadena LLC
 category:     Blockchain, Currency, Bitcoin, Kadena
-build-type:   Custom
+build-type:   Simple
 
 tested-with:
     GHC == 9.6
@@ -108,19 +108,43 @@ common warning-flags
         -- problems with ghci.
         -Wno-missing-home-modules
 
-custom-setup
-    setup-depends:
-          Cabal >= 3.8
-        , base >= 4.12 && < 5
-        , bytestring >= 0.10.12
-        , directory >= 1.3
-        , filepath >= 1.4
-        , process >= 1.5
-
 -- -------------------------------------------------------------------------- --
 -- Chainweb Library
 -- -------------------------------------------------------------------------- --
 
+library chainweb-storage
+  -- import: warning-flags, debugging-flags
+  hs-source-dirs: vendored/chainweb-storage/src
+  default-language: Haskell2010
+  exposed-modules:
+    Chainweb.Storage.Table
+    Chainweb.Storage.Table.Forgetful
+    Chainweb.Storage.Table.HashMap
+    Chainweb.Storage.Table.RocksDB
+    Chainweb.Storage.DedupStore
+  build-depends:
+    , base >=4.10 && <5
+    , bytestring >=0.10
+    , containers >=0.5
+    , cryptonite >= 0.25
+    , deepseq >=1.4
+    , directory >=1.3
+    , exceptions >=0.10
+    , filepath
+    , hashable >=1.2
+    , lens >=4.16
+    , memory >=0.14
+    , mtl >= 2.2
+    , nothunks >= 0.1.0.0
+    , rocksdb-haskell-kadena >=1.1.0
+    , stm >=2.4
+    , streaming >=0.2
+    , temporary >=1.3
+    , text >=1.2
+    , transformers >=0.5
+    , unordered-containers >=0.2
+    , vector >=0.12
+
 library
     import: warning-flags, debugging-flags
     default-language: Haskell2010
@@ -371,7 +395,7 @@ library
         , bytestring >= 0.10.12
         , case-insensitive >= 1.2
         , cassava >= 0.5.1
-        , chainweb-storage >= 0.1
+        , chainweb:chainweb-storage
         , chronos >= 1.1
         , clock >= 0.7
         , configuration-tools >= 0.6
@@ -568,7 +592,7 @@ test-suite chainweb-tests
             -- due to change in error message that is inlined in pact
         , bytestring >= 0.10.12
         , case-insensitive >= 1.2
-        , chainweb-storage >= 0.1
+        , chainweb:chainweb-storage
         , crypton-connection >=0.2
         , containers >= 0.5
         , crypton >= 0.31
@@ -668,7 +692,7 @@ executable chainweb-node
         -- external
         , async >= 2.2
         , base >= 4.12 && < 5
-        , chainweb-storage >= 0.1
+        , chainweb:chainweb-storage
         , configuration-tools >= 0.6
         , deepseq >= 1.4
         , directory >= 1.3
@@ -750,7 +774,7 @@ executable cwtool
         , case-insensitive >= 1.2
         , cassava >= 0.5.1
         , cereal >= 0.5
-        , chainweb-storage >= 0.1
+        , chainweb:chainweb-storage
         , chronos >= 1.1
         , configuration-tools >= 0.6
         , crypton-connection >=0.2
@@ -845,7 +869,7 @@ benchmark bench
         , bytestring >= 0.10.12
         , case-insensitive >= 1.2
         , chainweb
-        , chainweb-storage >= 0.1
+        , chainweb:chainweb-storage
         , containers >= 0.5
         , criterion
         , deepseq >= 1.4
diff --git a/vendored/chainweb-storage/LICENSE b/vendored/chainweb-storage/LICENSE
new file mode 100644
index 0000000000..5a492b5e94
--- /dev/null
+++ b/vendored/chainweb-storage/LICENSE
@@ -0,0 +1,30 @@
+Copyright (c) 2019-2022, Kadena LLC
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * Neither the name of Lars Kuhtz nor the names of other
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendored/chainweb-storage/README.md b/vendored/chainweb-storage/README.md
new file mode 100644
index 0000000000..67af61c042
--- /dev/null
+++ b/vendored/chainweb-storage/README.md
@@ -0,0 +1,33 @@
+This package provides some storage tools that are used in
+[chainweb](https://github.com/kadena-io/chainweb).
+
+## Installation
+
+This package depends on RocksDb and a C++ compiler which must be installed prior to building.
+
+On Ubuntu:
+
+```bash
+sudo apt-get install librocksdb-dev g++
+```
+
+On MacOSX using [homebrew](https://brew.sh):
+
+```bash
+brew install rocksdb
+```
+
+On Windows using [msys2](https://www.msys2.org):
+
+```bash
+pacman -S mingw-w64-x86_64-rocksdb g++
+```
+
+On windows, for GHC to find installed libraries you'll have to add the following
+settings to your `cabal.project.local` file:
+
+```cabal
+package rocksdb-haskell
+  extra-lib-dirs: C:\\msys64\\mingw64\\lib
+  extra-include-dirs: C:\\msys64\\mingw64\\include
+```
diff --git a/vendored/chainweb-storage/src/Chainweb/Storage/DedupStore.hs b/vendored/chainweb-storage/src/Chainweb/Storage/DedupStore.hs
new file mode 100644
index 0000000000..57991d3996
--- /dev/null
+++ b/vendored/chainweb-storage/src/Chainweb/Storage/DedupStore.hs
@@ -0,0 +1,436 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE ConstraintKinds #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DerivingStrategies #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE FlexibleContexts #-}
+
+-- |
+-- Module: Chainweb.Storage.DedupStore
+-- Copyright: Copyright © 2019 Kadena LLC.
+-- License: MIT
+-- Maintainer: Lars Kuhtz <lars@kadena.io>
+-- Stability: experimental
+--
+-- A Deduplicated Key-Value Store
+--
+module Chainweb.Storage.DedupStore
+(
+-- * Deduplicated Key-Value Store
+  DedupStore(..)
+, newDedupStore
+, dedupInsert
+, dedupLookup
+, DedupStoreException(..)
+
+-- * Low level Deduplicated Chunk Store
+, dedupStore
+, dedupStore'
+, dedupRestore
+) where
+
+import Control.DeepSeq
+import Control.Exception (Exception)
+import Control.Monad
+import Control.Monad.Catch (throwM)
+
+import qualified Crypto.Hash as C
+
+import Data.Bits
+import qualified Data.ByteArray as BA
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Builder as BB
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString.Unsafe as BU
+import Data.Hashable
+import Data.Int
+import qualified Data.List as L
+import Data.String
+import qualified Data.Text as T
+import Data.Word
+
+import Foreign.Ptr
+import Foreign.Storable
+
+import GHC.Generics
+import GHC.Stack
+
+import System.IO.Unsafe
+
+-- internal modules
+
+import Chainweb.Storage.Table
+import Chainweb.Storage.Table.RocksDB
+
+-- -------------------------------------------------------------------------- --
+-- Utils
+
+fst3 :: (a, b, c) -> a
+fst3 (a, _, _) = a
+{-# INLINE fst3 #-}
+
+snd3 :: (a, b, c) -> b
+snd3 (_, a, _) = a
+{-# INLINE snd3 #-}
+
+third3 :: (a, b, c) -> c
+third3 (_, _, a) = a
+{-# INLINE third3 #-}
+
+-- -------------------------------------------------------------------------- --
+-- Dedup Hash
+
+dedupHashAlg :: C.Blake2b_256
+dedupHashAlg = C.Blake2b_256
+
+type DedupHashAlg = C.Blake2b_256
+
+dedupHashSize :: Int
+dedupHashSize = C.hashDigestSize dedupHashAlg
+
+newtype DedupHash = DedupHash { _dedupHashBytes :: B.ByteString }
+    deriving (Show, Eq, Ord, Generic)
+    deriving newtype (NFData, Hashable, BA.ByteArrayAccess)
+
+-- -------------------------------------------------------------------------- --
+-- DedupStore
+
+-- | Exception thrown by the DedupStore
+--
+newtype DedupStoreException = DedupStoreCorruptedData T.Text
+    deriving (Eq, Show, Ord, Generic)
+    deriving newtype (NFData, Hashable, IsString)
+
+instance Exception DedupStoreException
+
+-- | A deduplicating key value store
+--
+data DedupStore k v = DedupStore
+    { _dedupRoots :: !(RocksDbTable k DedupHash)
+    , _dedupChunks :: !(Casify RocksDbTable Chunk)
+    , _dedupValueCodec :: !(Codec v)
+    }
+
+-- | Create a new 'DedupStore'
+--
+newDedupStore
+    :: HasCallStack
+    => RocksDb
+    -> Codec v
+    -> Codec k
+    -> [B.ByteString]
+    -> DedupStore k v
+newDedupStore rdb vc kc n = DedupStore
+    { _dedupRoots = newTable rdb dedupHashCodec kc (n <> ["roots"])
+    , _dedupChunks = Casify $ newTable rdb dedupChunkCodec dedupHashCodec (n <> ["chunks"])
+    , _dedupValueCodec = vc
+    }
+  where
+    dedupHashCodec = Codec
+        { _codecEncode = _dedupHashBytes
+        , _codecDecode = \l -> do
+            unless (B.length l == dedupHashSize)
+                $ throwM $ DedupStoreCorruptedData "Internal key has wrong size"
+            return (DedupHash l)
+        }
+
+    dedupChunkCodec = Codec
+        { _codecEncode = \(Chunk t h b) -> B.singleton t <> _dedupHashBytes h <> b
+        , _codecDecode = \bs -> case B.uncons bs of
+            Nothing -> throwM $ DedupStoreCorruptedData "Got empty chunk"
+            Just (a, b) -> case B.splitAt dedupHashSize b of
+                (c, d)
+                    | B.length c < dedupHashSize
+                        -> throwM $ DedupStoreCorruptedData "Got too short chunk"
+                    | otherwise -> return $ Chunk a (DedupHash c) d
+        }
+
+-- | Insert a new items into a 'DedupStore'.
+--
+dedupInsert :: DedupStore k v -> k -> v -> IO ()
+dedupInsert store k v = do
+    dedupKey <- dedupStore (_dedupChunks store)
+        $ BL.fromStrict $ _codecEncode (_dedupValueCodec store) v
+    tableInsert (_dedupRoots store) k dedupKey 
+{-# INLINE dedupInsert #-}
+
+-- | @dedupLookup db k@ returns 'Just' the value at key @k@ in the
+-- 'DedupStore' @db@ if it exists, or 'Nothing' if the @k@ doesn't exist in
+-- the table.
+--
+dedupLookup :: DedupStore k v -> k -> IO (Maybe v)
+dedupLookup store k = do
+    tableLookup (_dedupRoots store) k >>= \case
+        Nothing -> return Nothing
+        Just dedupKey -> do
+            dedupRestore (_dedupChunks store) dedupKey >>= \case
+                Nothing -> return Nothing
+                Just bytes -> Just <$>
+                    _codecDecode (_dedupValueCodec store) (BL.toStrict bytes)
+{-# INLINE dedupLookup #-}
+
+-- -------------------------------------------------------------------------- --
+-- Low-Level Deduplication CAS
+
+-- | A chunk of data
+--
+data Chunk = Chunk Word8 DedupHash B.ByteString
+    deriving (Show, Eq, Ord, Generic)
+
+instance IsCasValue Chunk where
+    type CasKeyType Chunk = DedupHash
+    casKey (Chunk _ h _) = h
+
+type DedupCas cas = Cas cas Chunk
+
+-- | Store a sequence of bytes in a deduplicated content addressed store.
+-- Returns the hash of the bytes, that can be used to query the data from the
+-- store.
+--
+dedupStore
+    :: HasCallStack
+    => DedupCas cas
+    => cas
+    -> BL.ByteString
+    -> IO DedupHash
+dedupStore store bytes = third3 <$> dedupStore' store bytes
+{-# INLINE dedupStore #-}
+
+-- | Store a sequence of bytes in a deduplicated content addressed store.
+-- Returns the hash of the bytes, that can be used to query the data from the
+-- store.
+--
+-- It also returns the number of chunk hits and chunk misses that are a measure
+-- for the deduplication rate.
+--
+dedupStore'
+    :: HasCallStack
+    => DedupCas cas
+    => cas
+    -> BL.ByteString
+    -> IO (Int, Int, DedupHash)
+dedupStore' store = go0
+  where
+    go0 :: HasCallStack => BL.ByteString -> IO (Int, Int, DedupHash)
+    go0 bytes = mapM (hashAndStore 0x0) (toChunks $ roll bytes) >>= \case
+        [] -> error "Chainweb.Storage.DedupStore.dedupStore.go0: Data.ByteString.Lazy.toChunks must not return an empty list"
+        [x] -> return x
+        l -> do
+            (h, m, r) <- go1 (third3 <$> l)
+            return (sum (fst3 <$> l) + h, sum (snd3 <$> l) + m, r)
+
+    go1 :: HasCallStack => [DedupHash] -> IO (Int, Int, DedupHash)
+    go1 hashes = mapM (hashAndStore 0x1) (rollHashes hashes) >>= \case
+        [] -> error "Chainweb.Storage.DedupStore.dedupStore.go1: Data.ByteString.Lazy.toChunks must not return an empty list"
+        [x] -> return x
+        l -> do
+            (h, m, r) <- go1 (third3 <$> l)
+            return (sum (fst3 <$> l) + h, sum (snd3 <$> l) + m, r)
+
+    toChunks x = case BL.toChunks x of
+        [] -> [""]
+        l -> l
+    {-# INLINE toChunks #-}
+
+    -- This is called as the dedup hashes are produced, in a bottom up way. We
+    -- could optimize for data base queries (at the cost of more memory
+    -- overhead) by first computing the keys, checking for keys top-down, and
+    -- inserting the data in a second pass only for those keys that aren't in
+    -- the database already.
+    --
+    hashAndStore :: Word8 -> B.ByteString -> IO (Int, Int, DedupHash)
+    hashAndStore tag c = do
+        let h = DedupHash $ BA.convert $ C.hash @_ @DedupHashAlg (B.cons tag c)
+        (hit, miss) <- tableMember store h >>= \x -> if x
+            then return (1, 0)
+            else do
+                casInsert store (Chunk tag h c) 
+                return (0, 1)
+        return (hit, miss, h)
+    {-# INLINE hashAndStore #-}
+    {-# SCC hashAndStore #-}
+
+    rollHashes :: [DedupHash] -> [B.ByteString]
+    rollHashes hs = fmap (B.concat . fmap _dedupHashBytes) $ rollLazy pickInt64 8 2 11 hs
+    {-# INLINE rollHashes #-}
+    {-# SCC rollHashes #-}
+
+    pickInt64 :: DedupHash -> Int64
+    pickInt64 s = unsafePerformIO $ BU.unsafeUseAsCString
+        (_dedupHashBytes s)
+        (peek . castPtr)
+    {-# INLINE pickInt64 #-}
+    {-# SCC pickInt64 #-}
+{-# SCC dedupStore' #-}
+
+-- | Retrieve a sequence of bytes from a deduplicated content addressable store.
+--
+dedupRestore
+    :: DedupCas cas
+    => cas
+    -> DedupHash
+    -> IO (Maybe BL.ByteString)
+dedupRestore store key = fmap BB.toLazyByteString <$> do
+    tableLookup store key >>= \case
+        Nothing -> return Nothing
+        Just (Chunk 0x0 _ b) -> return $ Just $ BB.byteString b
+        Just (Chunk 0x1 _ b) -> Just <$> splitHashes b
+        _ -> throwM $ DedupStoreCorruptedData  "Unknown chunk tag"
+
+  where
+    go h = do
+        tableLookup store h >>= \case
+            Nothing -> throwM $ DedupStoreCorruptedData "Missing chunk from store"
+            Just (Chunk 0x0 _ b) -> return (BB.byteString b)
+            Just (Chunk 0x1 _ b) -> splitHashes b
+            _ -> throwM $ DedupStoreCorruptedData "Unknown chunk tag"
+
+    splitHashes b
+        | B.null b = return mempty
+        | otherwise = case B.splitAt (C.hashDigestSize C.Blake2b_256) b of
+            (h, r) -> (<>) <$> go (DedupHash h) <*> splitHashes r
+{-# SCC dedupRestore #-}
+
+-- -------------------------------------------------------------------------- --
+-- Rabin-Karp Rolling Hash
+--
+
+-- | Re-chunks a lazy 'BL.ByteString' using the Rabin-Karp rolling hash
+-- function. The expected chunk size of the resuling 'BL.ByteString' is 8k, with
+-- a minimal chunk size of 128 bytes and a maximal chunk size of 64k.
+--
+-- The implementation is adapted from https://github.com/ekmett/hash, Copyright
+-- 2012-2013 Edward Kmett.
+--
+roll :: BL.ByteString -> BL.ByteString
+roll z = BL.fromChunks $ go seed 0 z za ze
+  where
+    z' = BL.unpack z
+    za = BL.unpack (BL.replicate window 0) <> z'
+    {-# SCC za #-}
+    ze = z'
+    {-# SCC ze #-}
+
+    go
+        :: Int64
+            -- ^ current hash
+        -> Int64
+            -- ^ current byte count in input string
+        -> BL.ByteString
+            -- ^ input bytestring
+        -> [Word8]
+            -- ^ bytes from start of window (starts all 0 before input string)
+        -> [Word8]
+            -- ^ bytes after end window
+        -> [B.ByteString]
+            -- ^ Chunk
+    go !h !c !bs (x:xs) (y:ys)
+        | ((h' .&. mask == mask) && c >= minSize) || c >= maxSize = case BL.splitAt (c + 1) bs of
+            (l,r) -> {-# SCC concat #-} B.concat (BL.toChunks l) : go seed 0 r xs ys
+        | otherwise = go h' (c + 1) bs xs ys
+      where
+        !x' = if c < window then 0 else fromIntegral x
+        !h' = rem (rem (h - magic_d * x') magic_q * magic_r + fromIntegral y) magic_q
+        {-# SCC h' #-}
+    go _ _ bs _ _ = [B.concat $ BL.toChunks bs]
+    {-# SCC go #-}
+
+    magic_d = powqp magic_r (window-1) magic_q
+    mask    = 8191
+    minSize = 128
+    maxSize = 65536
+    magic_r = 256
+    magic_q = 32749
+    window  = 128
+    seed = 0
+{-# INLINE roll #-}
+{-# SCC roll #-}
+
+-- | Breaks an input list into chunks using Rabin-Karp rolling hash with the
+-- provided expected, minimal, and maximal chunks size.
+--
+-- The implementation is adapted from https://github.com/ekmett/hash, Copyright
+-- 2012-2013 Edward Kmett.
+--
+rollLazy
+    :: forall a
+    . (a -> Int64)
+        -- ^ hash function (64 bit should be sufficient for must use cases)
+    -> Int
+        -- ^ expected size (as power of 2)
+    -> Int
+        -- ^ min size (as power of 2). Must be equal or smaller than expected.
+    -> Int
+        -- ^ max size (as power of 2). Must be equal or larger than expected.
+    -> [a]
+    -> [[a]]
+rollLazy _ _ _ _ [] = []
+rollLazy f expectedPow2 minSizePow2 maxSizePow2 z
+    = go seed 0 z (L.replicate window (head z) <> z) z
+  where
+    go
+        :: Int64
+            -- current hash
+        -> Int
+            -- current item count in input string
+        -> [a]
+            -- input items.
+        -> [a]
+            -- items from start of window (starts all 0 before input string). We
+            -- drop the first item of this on each rehash
+        -> [a]
+            -- items after end window. We add the first item from this on each
+            -- rehash.
+        -> [[a]]
+            -- Chunks
+    go !h !c !bs (x:xs) (y:ys)
+
+        -- Found new chunk
+        | ((h' .&. mask == mask) && c >= minSize) || c >= maxSize
+            = case L.splitAt (c + 1) bs of
+                (l,r) -> l : go seed 0 r xs ys
+
+        -- Continue
+        | otherwise
+            = go h' (c + 1) bs xs ys
+      where
+        !x' = if c < window then 0 else f x
+        !h' = rem (rem (h - magic_d * x') magic_q * magic_r + f y) magic_q
+    go _ _ bs _ _ = [bs]
+
+    mask = (2^expectedPow2) - 1
+    minSize = 2^minSizePow2
+    maxSize = 2^maxSizePow2
+
+    magic_d = powqp magic_r (fromIntegral window - 1) magic_q
+    magic_r = 256
+    magic_q = 32749
+
+    window :: Int
+    window  = minSize
+
+    seed :: Int64
+    seed = 0
+{-# INLINE rollLazy #-}
+{-# SCC rollLazy #-}
+
+-- | Calculates @powqp m n p = 'rem' (m^n) p@ under the assumption that @m * p *
+-- p@ can fit in an @Int64@.
+--
+-- This function is copied from https://github.com/ekmett/hash, Copyright
+-- 2012-2013 Edward Kmett.
+--
+powqp :: Int64 -> Int64 -> Int64 -> Int64
+powqp _ 0 _ = 1
+powqp m 1 p = rem m p
+powqp m n p = case quotRem n 2 of
+  (q, 1) | k <- powqp m q p -> rem (k * k * m) p
+  (q, _) | k <- powqp m q p -> rem (k * k) p
+{-# INLINE powqp #-}
+{-# SCC powqp #-}
+
diff --git a/vendored/chainweb-storage/src/Chainweb/Storage/Table.hs b/vendored/chainweb-storage/src/Chainweb/Storage/Table.hs
new file mode 100644
index 0000000000..484a1719dd
--- /dev/null
+++ b/vendored/chainweb-storage/src/Chainweb/Storage/Table.hs
@@ -0,0 +1,184 @@
+{-# LANGUAGE ConstraintKinds #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DerivingStrategies #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FunctionalDependencies #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE InstanceSigs #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE QuantifiedConstraints #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE UndecidableInstances #-}
+
+-- |
+-- Module: Chainweb.Storage.Table
+-- Copyright: Copyright © 2019 Kadena LLC.
+-- License: MIT
+-- Maintainer: Lars Kuhtz <lars@kadena.io>, Edmund Noble <edmund@kadena.io>
+-- Stability: experimental
+-- Description: Key Value Store
+--
+-- API for Key-Value Stores
+module Chainweb.Storage.Table
+  ( IsCasValue (..)
+  , ReadableTable (..)
+  , tableLookupBatch
+  , ReadableTable1
+  , ReadableCas
+  , Table (..)
+  , Table1
+  , Casify(..)
+  , Cas
+  , casInsert
+  , casInsertBatch
+  , casDelete
+  , casDeleteBatch
+  , IterableTable (..)
+  , IterableTable1
+  , IterableCas
+  , Entry (..)
+  , Iterator (..)
+  , Iterator1
+  , CasIterator
+  , tableLookupM
+  , casLookupM
+  , TableException (..)
+  )
+where
+
+import Control.Exception (Exception, SomeException)
+import Control.Lens
+import Control.Monad.Catch (throwM)
+
+import Data.Coerce
+import Data.Foldable
+import Data.Maybe
+import Data.Text (Text)
+
+import GHC.Generics
+import GHC.Stack
+
+-- | The class of content-addressable values.
+--
+-- The casKey function must be morally injective:
+--
+-- prop> casKey a /= casKey b || a == b
+--
+-- Usually, 'casKey' is a cryptographic, i.e. collision resistant, hash
+-- function.
+class Eq (CasKeyType v) => IsCasValue v where
+    type CasKeyType v
+    casKey :: v -> CasKeyType v
+
+-- | Read-Only View of a Key-Value Store
+--
+class ReadableTable t k v | t -> k v where
+    tableLookup :: t -> k -> IO (Maybe v)
+    tableLookupBatch' :: t -> Traversal s r k (Maybe v) -> s -> IO r
+    tableLookupBatch' t l = l (tableLookup t)
+    tableMember :: t -> k -> IO Bool
+    tableMember t k = isJust <$> tableLookup t k
+
+tableLookupBatch :: (ReadableTable t k v, Each s t' k (Maybe v)) => t -> s -> IO t'
+tableLookupBatch t = tableLookupBatch' t each
+
+type ReadableCas t v = ReadableTable t (CasKeyType v) v
+type ReadableTable1 t = forall k v. ReadableTable (t k v) k v
+
+class ReadableTable t k v => Table t k v | t -> k v where
+    tableInsert :: t -> k -> v -> IO ()
+    tableInsertBatch :: t -> [(k, v)] -> IO ()
+    tableInsertBatch t kvs = traverse_ (uncurry (tableInsert t)) kvs
+    tableDelete :: t -> k -> IO ()
+    tableDeleteBatch :: t -> [k] -> IO ()
+    tableDeleteBatch t ks = traverse_ (tableDelete t) ks
+type Table1 t = forall k v. Table (t k v) k v
+
+type Cas t v = Table t (CasKeyType v) v
+
+casInsert :: (IsCasValue v, Cas t v) => t -> v -> IO ()
+casInsert t v = tableInsert t (casKey v) v
+
+casInsertBatch :: (IsCasValue v, Cas t v) => t -> [v] -> IO ()
+casInsertBatch t vs = tableInsertBatch t [(casKey v, v) | v <- vs]
+
+casDelete :: (IsCasValue v, Cas t v) => t -> v -> IO ()
+casDelete t = tableDelete t . casKey
+
+casDeleteBatch :: (IsCasValue v, Cas t v) => t -> [v] -> IO ()
+casDeleteBatch t = tableDeleteBatch t . fmap casKey
+
+class (Table t k v, Iterator i k v) => IterableTable t i k v | t -> i k v where
+    -- the created iterator must be positioned at the start of the table.
+    withTableIterator :: t -> (i -> IO a) -> IO a
+type IterableTable1 t i = forall k v. IterableTable (t k v) (i k v) k v
+
+type IterableCas t v = IterableTable t (CasKeyType v) v
+
+data Entry k v = Entry !k !v
+    deriving (Eq, Show, Ord)
+
+class Iterator i k v | i -> k v where
+    iterSeek :: i -> k -> IO ()
+    iterLast :: i -> IO ()
+    iterFirst :: i -> IO ()
+    iterNext :: i -> IO ()
+    iterPrev :: i -> IO ()
+    iterEntry :: i -> IO (Maybe (Entry k v))
+    iterKey :: i -> IO (Maybe k)
+    iterKey i = (fmap . fmap) (\(Entry k _) -> k) $ iterEntry i
+    iterValue :: i -> IO (Maybe v)
+    iterValue i = (fmap . fmap) (\(Entry _ v) -> v) $ iterEntry i
+    iterValid :: i -> IO Bool
+    iterValid i = isJust <$> iterKey i
+type Iterator1 i = forall k v. Iterator (i k v) k v
+
+type CasIterator i v = Iterator i (CasKeyType v) v
+
+-- | A newtype wrapper that takes only a single type constructor. This useful in
+-- situations where a Higher Order type constructor for a CAS is required. A
+-- type synonym doesn't work in this situation because type synonyms must be
+-- fully applied.
+--
+newtype Casify t v = Casify { unCasify :: t (CasKeyType v) v }
+instance forall t k v. (CasKeyType v ~ k, ReadableTable (t k v) k v) => ReadableTable (Casify t v) k v where
+    tableLookup = coerce @(t k v -> k -> IO (Maybe v)) tableLookup
+    -- can't seem to write `coerce` without the resulting instantiation being impredicative
+    tableLookupBatch' (Casify t) b = tableLookupBatch' t b
+    tableMember = coerce @(t k v -> k -> IO Bool) tableMember
+instance forall t k v. (CasKeyType v ~ k, Table (t k v) k v) => Table (Casify t v) k v where
+    tableInsert = coerce @(t k v -> k -> v -> IO ()) tableInsert
+    tableInsertBatch = coerce @(t k v -> [(k, v)] -> IO ()) tableInsertBatch
+    tableDelete = coerce @(t k v -> k -> IO ()) tableDelete
+    tableDeleteBatch = coerce @(t k v -> [k] -> IO ()) tableDeleteBatch
+-- TODO: why is this Iterator superclass needed?
+instance forall t i k v. (CasKeyType v ~ k, IterableTable (t k v) i k v, Iterator i k v) => IterableTable (Casify t v) i k v where
+    withTableIterator :: forall a. Casify t v -> (i -> IO a) -> IO a
+    withTableIterator = coerce @(t k v -> (i -> IO a) -> IO a) withTableIterator
+
+-- | Lookup a value by its key in a key-value store and throw an
+-- 'TableException' if the value doesn't exist in the store
+tableLookupM :: (HasCallStack, ReadableTable t k v) => t -> k -> IO v
+tableLookupM cas k =
+    tableLookup cas k >>= \case
+        Nothing ->
+            throwM . TableException $
+                "tableLookupM: lookup failed for table key"
+        Just v -> return $! v
+
+casLookupM :: (HasCallStack, ReadableCas t v) => t -> CasKeyType v -> IO v
+casLookupM = tableLookupM
+
+-- | Exceptions that are thrown by instances of 'IsCas'.
+data TableException
+    = TableException !Text
+    | TableImplementationException !SomeException
+  deriving (Show, Generic)
+
+instance Exception TableException
+
diff --git a/vendored/chainweb-storage/src/Chainweb/Storage/Table/Forgetful.hs b/vendored/chainweb-storage/src/Chainweb/Storage/Table/Forgetful.hs
new file mode 100644
index 0000000000..a1dfff3079
--- /dev/null
+++ b/vendored/chainweb-storage/src/Chainweb/Storage/Table/Forgetful.hs
@@ -0,0 +1,34 @@
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+
+-- |
+-- Module: Chainweb.Storage.Table.Forgetful
+-- Copyright: Copyright © 2019 Kadena LLC.
+-- License: MIT
+-- Maintainer: Lars Kuhtz <lars@kadena.io>
+-- Stability: experimental
+--
+-- A key-value store that forgets everything that is stored in it.
+--
+module Chainweb.Storage.Table.Forgetful
+( ForgetfulTable(..)
+) where
+
+import Chainweb.Storage.Table
+
+-- | A key-value store that forgets everything that is added to it.
+--
+data ForgetfulTable k v = ForgetfulTable
+
+instance ReadableTable (ForgetfulTable k v) k v where
+    tableLookup _ _ = return Nothing
+    tableMember _ _ = return False
+    {-# INLINE tableLookup #-}
+
+instance Table (ForgetfulTable k v) k v where
+    tableInsert _ _ _ = return ()
+    tableDelete _ _ = return ()
+    {-# INLINE tableInsert #-}
+    {-# INLINE tableDelete #-}
+
diff --git a/vendored/chainweb-storage/src/Chainweb/Storage/Table/HashMap.hs b/vendored/chainweb-storage/src/Chainweb/Storage/Table/HashMap.hs
new file mode 100644
index 0000000000..62565979cf
--- /dev/null
+++ b/vendored/chainweb-storage/src/Chainweb/Storage/Table/HashMap.hs
@@ -0,0 +1,65 @@
+{-# LANGUAGE ExistentialQuantification #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE FunctionalDependencies #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE UndecidableInstances #-}
+
+-- |
+-- Module: Chainweb.Storage.Table.HashMap
+-- Copyright: Copyright © 2019 Kadena LLC.
+-- License: MIT
+-- Maintainer: Lars Kuhtz <lars@kadena.io>
+-- Stability: experimental
+-- Description:
+--
+-- A thread-safe in-memory 'Table' implementation based on 'HM.HashMap'.
+--
+module Chainweb.Storage.Table.HashMap
+( HashMapTable
+, emptyTable
+, toList
+, size
+) where
+
+import Control.Concurrent.STM.TVar
+import Control.Monad ((<$!>))
+import Control.Monad.STM
+
+import Data.Hashable
+import qualified Data.HashMap.Strict as HM
+
+-- internal modules
+
+import Chainweb.Storage.Table
+
+-- | An 'IsTable' implementation that is base on 'HM.HashMap'.
+--
+newtype HashMapTable k v = HashMapTable (TVar (HM.HashMap k v))
+
+instance (Hashable k, Eq k) => ReadableTable (HashMapTable k v) k v where
+    tableLookup (HashMapTable var) k  = HM.lookup k <$!> readTVarIO var
+    tableMember (HashMapTable var) k  = 
+        HM.member k <$> readTVarIO var
+
+instance (Hashable k, Eq k) => Table (HashMapTable k v) k v where
+    tableInsert (HashMapTable var) k v =
+        atomically $ modifyTVar' var (HM.insert k v)
+    tableDelete (HashMapTable var) k =
+        atomically $ modifyTVar' var (HM.delete k)
+
+-- | Create new empty CAS
+--
+emptyTable :: (Hashable k, Eq k) => IO (HashMapTable k v)
+emptyTable = HashMapTable <$> newTVarIO mempty
+
+-- | Return all entries of CAS as List
+--
+toList :: HashMapTable k v -> IO [v]
+toList (HashMapTable var) = HM.elems <$!> readTVarIO var
+
+-- | The number of items in the CAS
+--
+size :: HashMapTable k v -> IO Int
+size (HashMapTable var) = HM.size <$!> readTVarIO var
+
diff --git a/vendored/chainweb-storage/src/Chainweb/Storage/Table/RocksDB.hs b/vendored/chainweb-storage/src/Chainweb/Storage/Table/RocksDB.hs
new file mode 100644
index 0000000000..5bce7aa6d7
--- /dev/null
+++ b/vendored/chainweb-storage/src/Chainweb/Storage/Table/RocksDB.hs
@@ -0,0 +1,780 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DerivingVia #-}
+{-# LANGUAGE ExistentialQuantification #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE ViewPatterns #-}
+
+-- |
+-- Module: Chainweb.Storage.Table.RocksDB
+-- Copyright: Copyright © 2019 Kadena LLC.
+-- License: MIT
+-- Maintainer: Lars Kuhtz <lars@kadena.io>
+-- Stability: experimental
+--
+-- Persisted key-value and and content-addressable-key-value stores with a
+-- RocksDB backend.
+--
+-- A 'RocksDbTable' provides a typed key-value store with an additional iterator
+-- interace.
+--
+-- 'RocksDbCas' adds an 'IsCas' instance for a 'RocksDbTable' where the value
+-- type is content-addressable.
+--
+-- TODO: Abstract the 'RocksDbTable' API into a typeclass so that one can
+-- provide alternative implementations for it.
+--
+module Chainweb.Storage.Table.RocksDB
+  ( RocksDb(..)
+  -- , rocksDbHandle
+  -- , rocksDbNamespace
+  , openRocksDb
+  , openReadOnlyRocksDb
+  , closeRocksDb
+  , withRocksDb
+  , withReadOnlyRocksDb
+  , withTempRocksDb
+  , destroyRocksDb
+  , resetOpenRocksDb
+  , modernDefaultOptions
+
+  -- * Rocks DB Table
+  , Codec(..)
+  , RocksDbTable
+  , newTable
+  , tableLookup
+  , tableLookupBatch
+  , tableInsert
+  , tableDelete
+
+  -- * Batch Updates
+  , RocksDbUpdate(..)
+  , updateBatch
+
+  -- * Rocks DB Table Iterator
+  , RocksDbTableIter(..)
+
+  -- ** Streams
+  , iterToEntryStream
+  , iterToValueStream
+  , iterToReverseValueStream
+  , iterToKeyStream
+
+  -- ** Extremal Table Entries
+  , tableMaxKey
+  , tableMaxValue
+  , tableMaxEntry
+  , tableMinKey
+  , tableMinValue
+  , tableMinEntry
+
+  -- * RocksDB-specific tools
+  , checkpointRocksDb
+  , deleteRangeRocksDb
+  , compactRangeRocksDb
+  ) where
+
+import Control.Exception(evaluate)
+import Control.Lens
+import Control.Monad
+import Control.Monad.Catch
+import Control.Monad.IO.Class
+
+import Data.ByteString(ByteString)
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as B8
+import qualified Data.ByteString.Internal as BI
+import qualified Data.ByteString.Unsafe as BU
+import Data.Coerce
+import Data.Foldable
+import Data.Maybe
+
+import Chainweb.Storage.Table
+import Data.String
+import qualified Data.Text as T
+
+import Foreign
+import Foreign.C
+
+import qualified Database.RocksDB.Base as R
+import qualified Database.RocksDB.ReadOptions as R
+import qualified Database.RocksDB.C as C
+import qualified Database.RocksDB.Internal as R
+import qualified Database.RocksDB.Iterator as I
+
+import GHC.Generics (Generic)
+import qualified GHC.Foreign as GHC
+import qualified GHC.IO.Encoding as GHC
+import GHC.Stack
+
+import NoThunks.Class
+
+import qualified Streaming.Prelude as S
+
+import System.Directory
+import System.IO.Temp
+
+-- -------------------------------------------------------------------------- --
+-- Utils
+
+-- | A newtype wrapper for tagger values as "expected" outcomes of some
+-- computation.
+--
+newtype Expected a = Expected { getExpected :: a }
+    deriving (Show, Eq, Ord, Generic, Functor)
+    deriving newtype (NoThunks)
+
+-- | A newtype wrapper for tagger values as "actual" outcomes of some
+-- computation.
+--
+newtype Actual a = Actual { getActual :: a }
+    deriving (Show, Eq, Ord, Generic, Functor)
+    deriving newtype (NoThunks)
+
+-- | A textual message that describes the 'Expected' and the 'Actual' outcome of
+-- some computation.
+--
+unexpectedMsg :: Show a => T.Text -> Expected a -> Actual a -> T.Text
+unexpectedMsg msg expected actual = msg
+    <> ", expected: " <> sshow (getExpected expected)
+    <> ", actual: " <> sshow (getActual actual)
+
+-- | Show a value as any type that is an instance of 'IsString'.
+--
+sshow :: Show a => IsString b => a -> b
+sshow = fromString . show
+
+-- -------------------------------------------------------------------------- --
+-- RocksDb
+
+-- | This wrapper allows to create key namespaces. This is, for instance, useful
+-- for testing when running several concurrent tests on the same rocks db
+-- instances. Key namespaces must not contain the character '-'.
+--
+data RocksDb = RocksDb
+    { _rocksDbHandle :: !R.DB
+    , _rocksDbNamespace :: !B.ByteString
+    }
+
+instance NoThunks RocksDb where
+    wNoThunks ctx (RocksDb a b) = allNoThunks
+        [ noThunks ctx (InspectHeapNamed @"Data.RocksDB.Base.DB" a)
+        , noThunks ctx b
+        ]
+    showTypeOf _ = "Chainweb.Storage.Table.RocksDB.RocksDb"
+
+
+-- makeLenses ''RocksDb
+
+modernDefaultOptions :: R.Options
+modernDefaultOptions = R.defaultOptions
+    { R.maxOpenFiles = -1
+    , R.writeBufferSize = 64 `shift` 20
+    , R.createIfMissing = True
+    }
+
+-- | Open a 'RocksDb' instance with the default namespace. If no rocks db exists
+-- at the provided directory path, a new database is created.
+--
+openRocksDb :: FilePath -> C.OptionsPtr -> IO RocksDb
+openRocksDb path opts_ptr = do
+    GHC.setFileSystemEncoding GHC.utf8
+    createDirectoryIfMissing True path
+    db <- withFilePath path $ \path_ptr ->
+        fmap R.DB
+        $ R.throwIfErr "open"
+        $ C.c_rocksdb_open opts_ptr path_ptr
+    let rdb = RocksDb db mempty
+    initializeRocksDb rdb
+    return rdb
+
+openReadOnlyRocksDb :: FilePath -> C.OptionsPtr -> IO RocksDb
+openReadOnlyRocksDb path opts_ptr = do
+    GHC.setFileSystemEncoding GHC.utf8
+    createDirectoryIfMissing True path
+    db <- withFilePath path $ \path_ptr ->
+        fmap R.DB
+        $ R.throwIfErr "open"
+        $ C.c_rocksdb_open_for_read_only opts_ptr path_ptr (0 :: CChar) -- ignore extant wal file
+    let rdb = RocksDb db mempty
+    -- can't initialize read-only rocksdb
+    return rdb
+
+withOpts :: R.Options -> (R.Options' -> IO a) -> IO a
+withOpts opts =
+    bracket (R.mkOpts opts) R.freeOpts
+
+-- | Each table key starts with @_rocksDbNamespace db <> "-"@. Here we insert a
+-- dummy key that is guaranteed to be appear after any other key in the
+-- database. We rely on its existence that in the implementation of
+-- 'tableIteratorLast'.
+--
+initializeRocksDb :: RocksDb -> IO ()
+initializeRocksDb db = R.put
+    (_rocksDbHandle db)
+    R.defaultWriteOptions
+    (_rocksDbNamespace db <> ".")
+    ""
+
+-- | Open a 'RocksDb' and reset it if it already exists.
+--
+resetOpenRocksDb :: FilePath -> IO RocksDb
+resetOpenRocksDb path = do
+    destroyRocksDb path
+    db <- RocksDb <$> R.open path opts <*> mempty
+    initializeRocksDb db
+    return db
+  where
+    opts = modernDefaultOptions { R.errorIfExists = True }
+
+-- | Close a 'RocksDb' instance.
+--
+closeRocksDb :: RocksDb -> IO ()
+closeRocksDb (RocksDb (R.DB db_ptr) _) = C.c_rocksdb_close db_ptr
+
+-- | Provide a computation with a 'RocksDb' instance. If no rocks db exists at
+-- the provided directory path, a new database is created.
+--
+withRocksDb :: FilePath -> R.Options -> (RocksDb -> IO a) -> IO a
+withRocksDb path opts act =
+    withOpts opts $ \(R.Options' opts_ptr _ _) ->
+        bracket (openRocksDb path opts_ptr) closeRocksDb act
+
+-- | Provide a computation with a read-only 'RocksDb' instance. If no rocks db exists at
+-- the provided directory path, a new database is created(?) TODO
+--
+withReadOnlyRocksDb :: FilePath -> R.Options -> (RocksDb -> IO a) -> IO a
+withReadOnlyRocksDb path opts act =
+    withOpts opts $ \(R.Options' opts_ptr _ _) ->
+        bracket (openReadOnlyRocksDb path opts_ptr) closeRocksDb act
+
+-- | Provide a computation with a temporary 'RocksDb'. The database is deleted
+-- when the computation exits.
+--
+withTempRocksDb :: String -> (RocksDb -> IO a) -> IO a
+withTempRocksDb template f = withSystemTempDirectory template $ \dir ->
+    withRocksDb dir modernDefaultOptions f
+
+-- | Delete the RocksDb instance.
+--
+-- This is the prefered method of deleting an rocks db instance. A rocks db
+-- instance may store files in different locations. This function guarantees
+-- that all files are deleted.
+--
+destroyRocksDb :: FilePath -> IO ()
+destroyRocksDb path = R.destroy path opts
+  where
+    opts = modernDefaultOptions { R.createIfMissing = False }
+
+-- -------------------------------------------------------------------------- --
+-- RocksDb Table
+
+-- NOTE: the implementation currently doesnt' support iteration over nested
+-- namespaces. This could easily be added via an iterator that iterates over
+-- all respective prefixes ending with '/' or '$' and using a codec that
+-- could handle the types in all sub-namespaces. This could for instance
+-- be useful for iterating over the complete database.
+
+-- | A binary codec for encoding and decoding values that are stored in a
+-- 'RocksDb' Table.
+--
+data Codec a = Codec
+    { _codecEncode :: !(a -> B.ByteString)
+        -- ^ encode a value.
+    , _codecDecode :: !(forall m. MonadThrow m => B.ByteString -> m a)
+        -- ^ decode a value. Throws an exception of decoding fails.
+    }
+
+instance NoThunks (Codec a) where
+    -- NoThunks does not look inside of closures for captured thunks
+    wNoThunks _ _ = return Nothing
+    showTypeOf _ = "Chainweb.Storage.Table.RocksDB.Codec"
+
+-- | A logical table in a 'RocksDb'. Tables in a rocks db are have isolated key
+-- namespaces.
+--
+data RocksDbTable k v = RocksDbTable
+    { _rocksDbTableValueCodec :: !(Codec v)
+    , _rocksDbTableKeyCodec :: !(Codec k)
+    , _rocksDbTableNamespace :: !B.ByteString
+    , _rocksDbTableDb :: !R.DB
+    }
+
+instance NoThunks (RocksDbTable k v) where
+    wNoThunks ctx (RocksDbTable a b c d) = allNoThunks
+        [ noThunks ctx a
+        , noThunks ctx b
+        , noThunks ctx c
+        , noThunks ctx (InspectHeapNamed @"Data.RocksDB.Base.DB" d)
+        ]
+    showTypeOf _ = "Chainweb.Storage.Table.RocksDB.RocksDbTable"
+
+-- | Create a new 'RocksDbTable' in the given 'RocksDb'.
+--
+-- Table name components must NOT contain any of '$', '%', and '/'. A user
+-- error is raised if the namespace contains any of these characters.
+--
+newTable
+    :: HasCallStack
+    => RocksDb
+    -> Codec v
+    -> Codec k
+    -> [B.ByteString]
+    -> RocksDbTable k v
+newTable db valCodec keyCodec namespace
+    | any (B8.any (\x -> x `elem` ['$', '%', '/'])) namespace
+        = error $ "Chainweb.Storage.Table.RocksDb.newTable: invalid character in table namespace: " <> sshow namespace
+    | otherwise
+        = RocksDbTable valCodec keyCodec ns (_rocksDbHandle db)
+  where
+    ns = _rocksDbNamespace db <> "-" <> B.intercalate "/" namespace
+
+
+-- -------------------------------------------------------------------------- --
+-- Batches
+
+-- | A single update operation for a 'RocksDbTable'
+--
+data RocksDbUpdate
+    = forall k v . RocksDbDelete
+        { _rocksDbUpdateTable :: !(RocksDbTable k v)
+        , _rocksDbUpdateKey :: !k
+        }
+    | forall k v . RocksDbInsert
+        { _rocksDbUpdateTable :: !(RocksDbTable k v)
+        , _rocksDbUpdateKey :: !k
+        , _rocksDbUpdateValue :: !v
+        }
+
+rocksDbUpdateDb :: RocksDbUpdate -> R.DB
+rocksDbUpdateDb (RocksDbDelete t _) = _rocksDbTableDb t
+rocksDbUpdateDb (RocksDbInsert t _ _) = _rocksDbTableDb t
+
+-- | Atomically execute a batch of rocks db updates.
+--
+-- All tables in the batch operations belong to the same rocks db instance.
+-- Otherwise an error is raised.
+--
+updateBatch :: HasCallStack => [RocksDbUpdate] -> IO ()
+updateBatch [] = return ()
+updateBatch batch = R.write rdb R.defaultWriteOptions $ checkMkOp <$> batch
+  where
+    rdb = rocksDbUpdateDb $ head batch
+
+    checkMkOp o
+        | rdb == rocksDbUpdateDb o = mkOp o
+        | otherwise = error "Chainweb.Storage.Table.RocksDB.updateBatch: all operations in a batch must be for the same RocksDB instance."
+
+    mkOp (RocksDbDelete t k) = R.Del (encKey t k)
+    mkOp (RocksDbInsert t k v) = R.Put (encKey t k) (encVal t v)
+
+-- -------------------------------------------------------------------------- --
+-- Table Iterator
+
+-- | An iterator for a 'RocksDbTable'. An interator is a stateful object that
+-- represents an enumeration of a subset of the entries of an immutable snapshop
+-- of the table.
+--
+-- An iterator should be used single threaded. Since the iterator retains the
+-- snapshot of the database, one should avoid storing iterators over longer
+-- periods of time. After usage it should be released in a timely manner.
+--
+-- The recommended way to created a 'RocksDbTableIter' is to use the function
+-- `withTableIterator`.
+--
+data RocksDbTableIter k v = RocksDbTableIter
+    { _rocksDbTableIterValueCodec :: !(Codec v)
+    , _rocksDbTableIterKeyCodec :: !(Codec k)
+    , _rocksDbTableIterNamespace :: !B.ByteString
+    , _rocksDbTableIter :: !I.Iterator
+    }
+
+instance NoThunks (RocksDbTableIter k v) where
+    wNoThunks ctx (RocksDbTableIter a b c d) = allNoThunks
+        [ noThunks ctx a
+        , noThunks ctx b
+        , noThunks ctx c
+        , noThunks ctx (InspectHeapNamed @"Data.RocksDB.Iterator.Iterator" d)
+        ]
+    showTypeOf _ = "Chainweb.Storage.Table.RocksDB.RocksDbTableIterator"
+
+instance Iterator (RocksDbTableIter k v) k v where
+  iterValid it =
+      I.iterValid (_rocksDbTableIter it)
+
+  iterSeek it = I.iterSeek (_rocksDbTableIter it) . encIterKey it
+
+  iterFirst it =
+      I.iterFirst (_rocksDbTableIter it)
+
+  iterLast it =
+      I.iterLast (_rocksDbTableIter it)
+
+  iterNext = I.iterNext . _rocksDbTableIter
+
+  iterPrev = I.iterPrev . _rocksDbTableIter
+
+  iterEntry it =
+      I.iterEntry (_rocksDbTableIter it) >>= \case
+          Nothing -> return Nothing
+          Just (k, v) -> do
+              k' <- decIterKey it k
+              v' <- decIterVal it v
+              return $! Just $! Entry k' v'
+
+  iterValue it = I.iterValue (_rocksDbTableIter it) >>= \case
+      Nothing -> return Nothing
+      Just v -> Just <$> (evaluate =<< decIterVal it v)
+
+  iterKey it = I.iterKey (_rocksDbTableIter it) >>= \case
+      Nothing -> return Nothing
+      Just k -> Just <$> (evaluate =<< decIterKey it k)
+
+instance IterableTable (RocksDbTable k v) (RocksDbTableIter k v) k v where
+    withTableIterator db k = R.withReadOptions readOptions $ \opts_ptr ->
+        I.withIter (_rocksDbTableDb db) opts_ptr $ \iter -> do
+            let tableIter = makeTableIter iter
+            iterFirst tableIter
+            k tableIter
+      where
+        readOptions = fold
+            [ R.setLowerBound (namespaceFirst $ _rocksDbTableNamespace db)
+            , R.setUpperBound (namespaceLast $ _rocksDbTableNamespace db)
+            -- TODO: this setting tells rocksdb to use prefix seek *when it can*.
+            -- the question remains: is it actually being used?
+            , R.setAutoPrefixMode True
+            ]
+        makeTableIter =
+            RocksDbTableIter
+                (_rocksDbTableValueCodec db)
+                (_rocksDbTableKeyCodec db)
+                (_rocksDbTableNamespace db)
+
+-- | Returns the stream of key-value pairs of an 'RocksDbTableIter'.
+--
+-- The iterator must be released after the stream is consumed. Releasing the
+-- iterator to early while the stream is still in use results in a runtime
+-- error. Not releasing the iterator after the processing of the stream has
+-- finished results in a memory leak.
+--
+iterToEntryStream :: RocksDbTableIter k v -> S.Stream (S.Of (Entry k v)) IO ()
+iterToEntryStream it =
+    liftIO (iterEntry it) >>= \case
+        Nothing -> return ()
+        Just x -> S.yield x >> liftIO (iterNext it) >> iterToEntryStream it
+
+-- | Returns the stream of values of an 'RocksDbTableIter'.
+--
+-- The iterator must be released after the stream is consumed. Releasing the
+-- iterator to early while the stream is still in use results in a runtime
+-- error. Not releasing the iterator after the processing of the stream has
+-- finished results in a memory leak.
+--
+iterToValueStream :: Show k => RocksDbTableIter k v -> S.Stream (S.Of v) IO ()
+iterToValueStream it = do
+    liftIO (iterValue it) >>= \case
+        Nothing -> return ()
+        Just x -> S.yield x >> liftIO (iterNext it) >> iterToValueStream it
+
+-- Returns the stream of key-value pairs of an 'RocksDbTableIter' in reverse
+-- order.
+--
+-- The iterator must be released after the stream is consumed. Releasing the
+-- iterator to early while the stream is still in use results in a runtime
+-- error. Not releasing the iterator after the processing of the stream has
+-- finished results in a memory leak.
+--
+iterToReverseValueStream :: RocksDbTableIter k v -> S.Stream (S.Of v) IO ()
+iterToReverseValueStream it =
+    liftIO (iterValue it) >>= \case
+        Nothing -> return ()
+        Just x -> S.yield x >> liftIO (iterPrev it) >> iterToReverseValueStream it
+
+-- | Returns the stream of keys of an 'RocksDbTableIter'.
+--
+-- The iterator must be released after the stream is consumed. Releasing the
+-- iterator to early while the stream is still in use results in a runtime
+-- error. Not releasing the iterator after the processing of the stream has
+-- finished results in a memory leak.
+--
+iterToKeyStream :: RocksDbTableIter k v -> S.Stream (S.Of k) IO ()
+iterToKeyStream it =
+    liftIO (iterKey it) >>= \case
+        Nothing -> return ()
+        Just x -> S.yield x >> liftIO (iterNext it) >> iterToKeyStream it
+
+-- Extremal Table Entries
+
+-- | The maximum key in a 'RocksDbTable'.
+--
+tableMaxKey :: RocksDbTable k v -> IO (Maybe k)
+tableMaxKey = flip withTableIterator $ \i -> iterLast i *> iterKey i
+
+-- | The maximum value in a 'RocksDbTable'.
+--
+tableMaxValue :: RocksDbTable k v -> IO (Maybe v)
+tableMaxValue = flip withTableIterator $ \i -> iterLast i *> iterValue i
+
+-- | The maximum key-value in a 'RocksDbTable'.
+--
+tableMaxEntry :: RocksDbTable k v -> IO (Maybe (Entry k v))
+tableMaxEntry = flip withTableIterator $ \i -> iterLast i *> iterEntry i
+
+-- | The minimum key in a 'RocksDbTable'.
+--
+tableMinKey :: RocksDbTable k v -> IO (Maybe k)
+tableMinKey = flip withTableIterator $ \i -> iterFirst i *> iterKey i
+
+-- | The minimum value in a 'RocksDbTable'.
+--
+tableMinValue :: RocksDbTable k v -> IO (Maybe v)
+tableMinValue = flip withTableIterator $ \i -> iterFirst i *> iterValue i
+
+-- | The minimum key-value in a 'RocksDbTable'.
+--
+tableMinEntry :: RocksDbTable k v -> IO (Maybe (Entry k v))
+tableMinEntry = flip withTableIterator $ \i -> iterFirst i *> iterEntry i
+
+-- -------------------------------------------------------------------------- --
+-- CAS
+
+-- | For a 'IsCasValue' @v@ with 'CasKeyType v ~ k@,  a 'RocksDbTable k v' is an
+-- instance of 'HasCasLookup'.
+--
+instance forall k v. ReadableTable (RocksDbTable k v) k v where
+    tableLookup db k = do
+        maybeBytes <- getRocksDb (_rocksDbTableDb db) mempty (encKey db k)
+        traverse (decVal db) maybeBytes
+
+    tableMember db k =
+        isJust <$> getRocksDb (_rocksDbTableDb db) mempty (encKey db k)
+
+    -- | @tableLookupBatch db ks@ returns for each @k@ in @ks@ 'Just' the value at
+    -- key @k@ in the 'RocksDbTable' @db@ if it exists, or 'Nothing' if the @k@
+    -- doesn't exist in the table.
+    --
+    tableLookupBatch' db t = unsafePartsOf t $ \ks -> do
+        results <- multiGet (_rocksDbTableDb db) mempty (map (encKey db) ks)
+        forM results $ \case
+            Left e -> error $ "Chainweb.Storage.Table.RocksDB.tableLookupBatch: " <> e
+            Right x -> traverse (decVal db) x
+
+-- | For a 'IsCasValue' @v@ with 'CasKeyType v ~ k@,  a 'RocksDbTable k v' is an
+-- instance of 'IsCas'.
+--
+instance Table (RocksDbTable k v) k v where
+    tableInsert db k v = R.put
+        (_rocksDbTableDb db)
+        R.defaultWriteOptions
+        (encKey db k)
+        (encVal db v)
+
+    tableDelete db k = R.delete
+        (_rocksDbTableDb db)
+        R.defaultWriteOptions
+        (encKey db k)
+
+    tableInsertBatch db kvs =
+        updateBatch (uncurry (RocksDbInsert db) <$> kvs)
+
+    tableDeleteBatch db ks =
+        updateBatch (RocksDbDelete db <$> ks)
+
+-- -------------------------------------------------------------------------- --
+-- Exceptions
+
+-- | Excpeptions that can be thrown by functions in this module.
+--
+data RocksDbException
+    = RocksDbTableIterInvalidKeyNamespace !(Expected B.ByteString) !(Actual B.ByteString)
+    deriving (Eq, Ord, Generic, NoThunks)
+
+instance Exception RocksDbException where
+    displayException (RocksDbTableIterInvalidKeyNamespace e a)
+        = T.unpack $ unexpectedMsg "Chainweb.Storage.Table.RocksDB: invalid table key" e a
+
+instance Show RocksDbException where
+    show = displayException
+
+-- -------------------------------------------------------------------------- --
+-- Table Utils
+
+encVal :: RocksDbTable k v -> v -> B.ByteString
+encVal = _codecEncode . _rocksDbTableValueCodec
+
+encKey :: RocksDbTable k v -> k -> B.ByteString
+encKey it k = namespaceFirst ns <> _codecEncode (_rocksDbTableKeyCodec it) k
+  where
+    ns = _rocksDbTableNamespace it
+
+decVal :: MonadThrow m => RocksDbTable k v -> B.ByteString -> m v
+decVal tbl = _codecDecode $ _rocksDbTableValueCodec tbl
+
+-- -------------------------------------------------------------------------- --
+-- Iter Utils
+
+namespaceFirst :: B.ByteString -> B.ByteString
+namespaceFirst ns = ns <> "$"
+
+namespaceLast :: B.ByteString -> B.ByteString
+namespaceLast ns = ns <> "%"
+
+encIterKey :: RocksDbTableIter k v -> k -> B.ByteString
+encIterKey it k = namespaceFirst ns <> _codecEncode (_rocksDbTableIterKeyCodec it) k
+  where
+    ns = _rocksDbTableIterNamespace it
+
+decIterVal :: MonadThrow m => RocksDbTableIter k v -> B.ByteString -> m v
+decIterVal i = _codecDecode (_rocksDbTableIterValueCodec i)
+
+decIterKey :: MonadThrow m => RocksDbTableIter k v -> B.ByteString -> m k
+decIterKey it k =
+    _codecDecode (_rocksDbTableIterKeyCodec it) (B.drop (B.length prefix) k)
+  where
+    prefix = namespaceFirst $ _rocksDbTableIterNamespace it
+
+checked :: HasCallStack => String -> (Ptr CString -> IO a) -> IO a
+checked whatWasIDoing act = alloca $ \errPtr -> do
+    poke errPtr (nullPtr :: CString)
+    r <- act errPtr
+    err <- peek errPtr
+    unless (err == nullPtr) $ do
+        errStr <- B.packCString err
+        let msg = unwords ["Chainweb.Storage.Table.RocksDB.checked: error while", whatWasIDoing <> ":", B8.unpack errStr]
+        C.c_rocksdb_free err
+        error msg
+    return r
+
+-- to unconditionally flush the WAL log before making the checkpoint, set logSizeFlushThreshold to zero.
+-- to *never* flush the WAL log, set logSizeFlushThreshold to maxBound :: CULong.
+checkpointRocksDb :: RocksDb -> CULong -> FilePath -> IO ()
+checkpointRocksDb RocksDb { _rocksDbHandle = R.DB dbPtr } logSizeFlushThreshold path =
+    bracket mkCheckpointObject C.rocksdb_checkpoint_object_destroy mkCheckpoint
+  where
+    mkCheckpointObject =
+        checked "creating checkpoint object" $
+            C.rocksdb_checkpoint_object_create dbPtr
+    mkCheckpoint cp =
+        withCString path $ \path' ->
+            checked "creating checkpoint" $
+                C.rocksdb_checkpoint_create cp path' logSizeFlushThreshold
+
+validateRangeOrdered :: HasCallStack => RocksDbTable k v -> (Maybe k, Maybe k) -> (B.ByteString, B.ByteString)
+validateRangeOrdered table (Just (encKey table -> l), Just (encKey table -> u))
+    | l >= u =
+        error "Chainweb.Storage.Table.RocksDB.validateRangeOrdered: range bounds not ordered according to codec"
+    | otherwise = (l, u)
+validateRangeOrdered table (l, u) =
+    ( maybe (namespaceFirst (_rocksDbTableNamespace table)) (encKey table) l
+    , maybe (namespaceLast (_rocksDbTableNamespace table)) (encKey table) u
+    )
+
+-- | Batch delete a range of keys in a table.
+-- Throws if the range of the *encoded keys* is not ordered (lower, upper).
+deleteRangeRocksDb :: HasCallStack => RocksDbTable k v -> (Maybe k, Maybe k) -> IO ()
+deleteRangeRocksDb table range = do
+    let !range' = validateRangeOrdered table range
+    let R.DB dbPtr = _rocksDbTableDb table
+    R.withCWriteOpts R.defaultWriteOptions $ \optsPtr ->
+        BU.unsafeUseAsCStringLen (fst range') $ \(minKeyPtr, minKeyLen) ->
+        BU.unsafeUseAsCStringLen (snd range') $ \(maxKeyPtr, maxKeyLen) ->
+        checked "Chainweb.Storage.Table.RocksDB.deleteRangeRocksDb" $
+            C.rocksdb_delete_range dbPtr optsPtr
+                minKeyPtr (fromIntegral minKeyLen :: CSize)
+                maxKeyPtr (fromIntegral maxKeyLen :: CSize)
+
+compactRangeRocksDb :: HasCallStack => RocksDbTable k v -> (Maybe k, Maybe k) -> IO ()
+compactRangeRocksDb table range =
+    BU.unsafeUseAsCStringLen (fst range') $ \(minKeyPtr, minKeyLen) ->
+        BU.unsafeUseAsCStringLen (snd range') $ \(maxKeyPtr, maxKeyLen) ->
+        C.rocksdb_compact_range dbPtr
+            minKeyPtr (fromIntegral minKeyLen :: CSize)
+            maxKeyPtr (fromIntegral maxKeyLen :: CSize)
+  where
+    !range' = validateRangeOrdered table range
+    R.DB dbPtr = _rocksDbTableDb table
+
+-- | Read a value by key.
+-- One less copy than the version in rocksdb-haskell by using unsafePackCStringFinalizer.
+getRocksDb :: MonadIO m => R.DB -> R.ReadOptions -> ByteString -> m (Maybe ByteString)
+getRocksDb (R.DB db_ptr) opts key = liftIO $ R.withReadOptions opts $ \opts_ptr ->
+    BU.unsafeUseAsCStringLen key $ \(key_ptr, klen) ->
+    alloca $ \vlen_ptr -> do
+        val_ptr <- checked "Chainweb.Storage.Table.RocksDB.get" $
+            C.c_rocksdb_get db_ptr opts_ptr key_ptr (R.intToCSize klen) vlen_ptr
+        vlen <- peek vlen_ptr
+        if val_ptr == nullPtr
+            then return Nothing
+            else do
+                Just <$> BU.unsafePackCStringFinalizer
+                    ((coerce :: Ptr CChar -> Ptr Word8) val_ptr)
+                    (R.cSizeToInt vlen)
+                    (C.c_rocksdb_free val_ptr)
+
+-- | Marshal a 'FilePath' (Haskell string) into a `NUL` terminated C string using
+-- temporary storage.
+-- On Linux, UTF-8 is almost always the encoding used.
+-- When on Windows, UTF-8 can also be used, although the default for those devices is
+-- UTF-16. For a more detailed explanation, please refer to
+-- https://msdn.microsoft.com/en-us/library/windows/desktop/dd374081(v=vs.85).aspx.
+withFilePath :: FilePath -> (CString -> IO a) -> IO a
+withFilePath = GHC.withCString GHC.utf8
+
+-- -------------------------------------------------------------------------- --
+-- Multi Get
+
+multiGet
+    :: MonadIO m
+    => R.DB
+    -> R.ReadOptions
+    -> [ByteString]
+    -> m [Either String (Maybe ByteString)]
+multiGet (R.DB db_ptr) opts keys = liftIO $ R.withReadOptions opts $ \opts_ptr ->
+    allocaArray len $ \keysArray ->
+    allocaArray len $ \keySizesArray ->
+    allocaArray len $ \valuesArray ->
+    allocaArray len $ \valueSizesArray ->
+    allocaArray len $ \errsArray ->
+        let go i (key : ks) =
+                BU.unsafeUseAsCStringLen key $ \(keyPtr, keyLen) -> do
+                    pokeElemOff keysArray i keyPtr
+                    pokeElemOff keySizesArray i (R.intToCSize keyLen)
+                    go (i + 1) ks
+
+            go _ [] = do
+                C.rocksdb_multi_get db_ptr opts_ptr (R.intToCSize len)
+                    keysArray keySizesArray
+                    valuesArray valueSizesArray
+                    errsArray
+                forM [0..len-1] $ \i -> do
+                  valuePtr <- peekElemOff valuesArray i
+                  if valuePtr /= nullPtr
+                    then do
+                      valueLen <- R.cSizeToInt <$> peekElemOff valueSizesArray i
+                      r <- BU.unsafePackCStringFinalizer (castPtr valuePtr) valueLen (C.c_rocksdb_free valuePtr)
+                      return $ Right $ Just r
+                    else do
+                      errPtr <- peekElemOff errsArray i
+                      if errPtr /= nullPtr
+                        then do
+                          errLen <- BI.c_strlen errPtr
+                          err <- B8.unpack <$> BU.unsafePackCStringFinalizer (castPtr errPtr) (fromIntegral errLen :: Int) (C.c_rocksdb_free errPtr)
+                          return $ Left err
+                        else
+                          return $ Right Nothing
+        in go 0 keys
+  where
+    len = length keys

From f6140bc72a8c2dfb9511e987d7ea240f2febcfa1 Mon Sep 17 00:00:00 2001
From: chessai <chessai1996@gmail.com>
Date: Wed, 26 Jun 2024 20:15:31 -0500
Subject: [PATCH 2/4] fold in rocksdb-haskell-kadena

Change-Id: If6e2cceb4b712b80c3a5466035e309f7215adf8a
---
 cabal.project                                 |     9 -
 cabal.project.freeze                          |     4 +-
 chainweb.cabal                                |   463 +-
 vendored/rocksdb-haskell-kadena/CHANGELOG.md  |    35 +
 vendored/rocksdb-haskell-kadena/LICENSE       |    31 +
 vendored/rocksdb-haskell-kadena/README.md     |    47 +
 .../cpp/chainweb-rocksdb.cpp                  |   132 +
 .../cpp/chainweb-rocksdb.h                    |    15 +
 .../rocksdb-8.3.2/db/arena_wrapped_db_iter.cc |   163 +
 .../rocksdb-8.3.2/db/arena_wrapped_db_iter.h  |   127 +
 .../rocksdb-8.3.2/db/blob/blob_constants.h    |    16 +
 .../rocksdb-8.3.2/db/blob/blob_contents.cc    |    42 +
 .../rocksdb-8.3.2/db/blob/blob_contents.h     |    59 +
 .../db/blob/blob_counting_iterator.h          |   150 +
 .../rocksdb-8.3.2/db/blob/blob_fetcher.cc     |    34 +
 .../rocksdb-8.3.2/db/blob/blob_fetcher.h      |    37 +
 .../db/blob/blob_file_addition.cc             |   156 +
 .../db/blob/blob_file_addition.h              |    67 +
 .../db/blob/blob_file_builder.cc              |   426 +
 .../rocksdb-8.3.2/db/blob/blob_file_builder.h |   112 +
 .../rocksdb-8.3.2/db/blob/blob_file_cache.cc  |   101 +
 .../rocksdb-8.3.2/db/blob/blob_file_cache.h   |    56 +
 .../db/blob/blob_file_completion_callback.h   |    84 +
 .../db/blob/blob_file_garbage.cc              |   134 +
 .../rocksdb-8.3.2/db/blob/blob_file_garbage.h |    57 +
 .../rocksdb-8.3.2/db/blob/blob_file_meta.cc   |    62 +
 .../rocksdb-8.3.2/db/blob/blob_file_meta.h    |   170 +
 .../rocksdb-8.3.2/db/blob/blob_file_reader.cc |   624 +
 .../rocksdb-8.3.2/db/blob/blob_file_reader.h  |   112 +
 .../db/blob/blob_garbage_meter.cc             |   100 +
 .../db/blob/blob_garbage_meter.h              |   102 +
 .../rocksdb-8.3.2/db/blob/blob_index.h        |   187 +
 .../rocksdb-8.3.2/db/blob/blob_log_format.cc  |   143 +
 .../rocksdb-8.3.2/db/blob/blob_log_format.h   |   164 +
 .../db/blob/blob_log_sequential_reader.cc     |   134 +
 .../db/blob/blob_log_sequential_reader.h      |    83 +
 .../rocksdb-8.3.2/db/blob/blob_log_writer.cc  |   178 +
 .../rocksdb-8.3.2/db/blob/blob_log_writer.h   |    83 +
 .../rocksdb-8.3.2/db/blob/blob_read_request.h |    58 +
 .../rocksdb-8.3.2/db/blob/blob_source.cc      |   459 +
 .../rocksdb-8.3.2/db/blob/blob_source.h       |   161 +
 .../db/blob/prefetch_buffer_collection.cc     |    21 +
 .../db/blob/prefetch_buffer_collection.h      |    38 +
 .../rocksdb-8.3.2/db/builder.cc               |   447 +
 .../rocksdb-8.3.2/db/builder.h                |    78 +
 .../rocksdb-8.3.2/db/c.cc                     |  6559 +++++
 .../rocksdb-8.3.2/db/column_family.cc         |  1708 ++
 .../rocksdb-8.3.2/db/column_family.h          |   851 +
 .../db/compaction/clipping_iterator.h         |   281 +
 .../rocksdb-8.3.2/db/compaction/compaction.cc |   857 +
 .../rocksdb-8.3.2/db/compaction/compaction.h  |   565 +
 .../compaction/compaction_iteration_stats.h   |    49 +
 .../db/compaction/compaction_iterator.cc      |  1445 +
 .../db/compaction/compaction_iterator.h       |   530 +
 .../db/compaction/compaction_job.cc           |  2054 ++
 .../db/compaction/compaction_job.h            |   504 +
 .../db/compaction/compaction_outputs.cc       |   784 +
 .../db/compaction/compaction_outputs.h        |   403 +
 .../db/compaction/compaction_picker.cc        |  1225 +
 .../db/compaction/compaction_picker.h         |   318 +
 .../db/compaction/compaction_picker_fifo.cc   |   471 +
 .../db/compaction/compaction_picker_fifo.h    |    60 +
 .../db/compaction/compaction_picker_level.cc  |   888 +
 .../db/compaction/compaction_picker_level.h   |    33 +
 .../compaction/compaction_picker_universal.cc |  1452 +
 .../compaction/compaction_picker_universal.h  |    30 +
 .../db/compaction/compaction_service_job.cc   |   833 +
 .../db/compaction/compaction_state.cc         |    46 +
 .../db/compaction/compaction_state.h          |    42 +
 .../rocksdb-8.3.2/db/compaction/file_pri.h    |    92 +
 .../db/compaction/sst_partitioner.cc          |    83 +
 .../db/compaction/subcompaction_state.cc      |   106 +
 .../db/compaction/subcompaction_state.h       |   220 +
 .../rocksdb-8.3.2/db/convenience.cc           |    81 +
 .../rocksdb-8.3.2/db/db_filesnapshot.cc       |   436 +
 .../db/db_impl/compacted_db_impl.cc           |   260 +
 .../db/db_impl/compacted_db_impl.h            |   157 +
 .../rocksdb-8.3.2/db/db_impl/db_impl.cc       |  6132 +++++
 .../rocksdb-8.3.2/db/db_impl/db_impl.h        |  2841 ++
 .../db/db_impl/db_impl_compaction_flush.cc    |  3980 +++
 .../rocksdb-8.3.2/db/db_impl/db_impl_debug.cc |   314 +
 .../db/db_impl/db_impl_experimental.cc        |   159 +
 .../rocksdb-8.3.2/db/db_impl/db_impl_files.cc |  1014 +
 .../rocksdb-8.3.2/db/db_impl/db_impl_open.cc  |  2202 ++
 .../db/db_impl/db_impl_readonly.cc            |   334 +
 .../db/db_impl/db_impl_readonly.h             |   173 +
 .../db/db_impl/db_impl_secondary.cc           |   964 +
 .../db/db_impl/db_impl_secondary.h            |   406 +
 .../rocksdb-8.3.2/db/db_impl/db_impl_write.cc |  2487 ++
 .../rocksdb-8.3.2/db/db_info_dumper.cc        |   147 +
 .../rocksdb-8.3.2/db/db_info_dumper.h         |    15 +
 .../rocksdb-8.3.2/db/db_iter.cc               |  1708 ++
 .../rocksdb-8.3.2/db/db_iter.h                |   411 +
 .../rocksdb-8.3.2/db/db_test_util.h           |  1340 +
 .../db/db_with_timestamp_test_util.h          |   126 +
 .../rocksdb-8.3.2/db/dbformat.cc              |   213 +
 .../rocksdb-8.3.2/db/dbformat.h               |   869 +
 .../rocksdb-8.3.2/db/error_handler.cc         |   792 +
 .../rocksdb-8.3.2/db/error_handler.h          |   124 +
 .../rocksdb-8.3.2/db/event_helpers.cc         |   323 +
 .../rocksdb-8.3.2/db/event_helpers.h          |    78 +
 .../rocksdb-8.3.2/db/experimental.cc          |   144 +
 .../db/external_sst_file_ingestion_job.cc     |  1104 +
 .../db/external_sst_file_ingestion_job.h      |   238 +
 .../rocksdb-8.3.2/db/file_indexer.cc          |   218 +
 .../rocksdb-8.3.2/db/file_indexer.h           |   140 +
 .../rocksdb-8.3.2/db/flush_job.cc             |  1097 +
 .../rocksdb-8.3.2/db/flush_job.h              |   200 +
 .../rocksdb-8.3.2/db/flush_scheduler.cc       |    86 +
 .../rocksdb-8.3.2/db/flush_scheduler.h        |    55 +
 .../rocksdb-8.3.2/db/forward_iterator.cc      |  1070 +
 .../rocksdb-8.3.2/db/forward_iterator.h       |   166 +
 .../db/history_trimming_iterator.h            |    95 +
 .../db/import_column_family_job.cc            |   364 +
 .../db/import_column_family_job.h             |    82 +
 .../rocksdb-8.3.2/db/internal_stats.cc        |  2121 ++
 .../rocksdb-8.3.2/db/internal_stats.h         |   875 +
 .../rocksdb-8.3.2/db/job_context.h            |   237 +
 .../rocksdb-8.3.2/db/kv_checksum.h            |   484 +
 .../rocksdb-8.3.2/db/log_format.h             |    55 +
 .../rocksdb-8.3.2/db/log_reader.cc            |   934 +
 .../rocksdb-8.3.2/db/log_reader.h             |   241 +
 .../rocksdb-8.3.2/db/log_writer.cc            |   279 +
 .../rocksdb-8.3.2/db/log_writer.h             |   144 +
 .../db/logs_with_prep_tracker.cc              |    67 +
 .../rocksdb-8.3.2/db/logs_with_prep_tracker.h |    62 +
 .../rocksdb-8.3.2/db/lookup_key.h             |    68 +
 .../rocksdb-8.3.2/db/malloc_stats.cc          |    53 +
 .../rocksdb-8.3.2/db/malloc_stats.h           |    22 +
 .../rocksdb-8.3.2/db/memtable.cc              |  1675 ++
 .../rocksdb-8.3.2/db/memtable.h               |   660 +
 .../rocksdb-8.3.2/db/memtable_list.cc         |   987 +
 .../rocksdb-8.3.2/db/memtable_list.h          |   471 +
 .../rocksdb-8.3.2/db/merge_context.h          |   147 +
 .../rocksdb-8.3.2/db/merge_helper.cc          |   606 +
 .../rocksdb-8.3.2/db/merge_helper.h           |   224 +
 .../rocksdb-8.3.2/db/merge_operator.cc        |    85 +
 .../rocksdb-8.3.2/db/output_validator.cc      |    33 +
 .../rocksdb-8.3.2/db/output_validator.h       |    48 +
 .../db/periodic_task_scheduler.cc             |   111 +
 .../db/periodic_task_scheduler.h              |   108 +
 .../db/pinned_iterators_manager.h             |    92 +
 .../rocksdb-8.3.2/db/post_memtable_callback.h |    25 +
 .../rocksdb-8.3.2/db/pre_release_callback.h   |    37 +
 .../rocksdb-8.3.2/db/range_del_aggregator.cc  |   555 +
 .../rocksdb-8.3.2/db/range_del_aggregator.h   |   478 +
 .../db/range_tombstone_fragmenter.cc          |   502 +
 .../db/range_tombstone_fragmenter.h           |   356 +
 .../rocksdb-8.3.2/db/read_callback.h          |    54 +
 .../rocksdb-8.3.2/db/repair.cc                |   839 +
 .../rocksdb-8.3.2/db/seqno_to_time_mapping.cc |   341 +
 .../rocksdb-8.3.2/db/seqno_to_time_mapping.h  |   189 +
 .../rocksdb-8.3.2/db/snapshot_checker.h       |    58 +
 .../rocksdb-8.3.2/db/snapshot_impl.cc         |    25 +
 .../rocksdb-8.3.2/db/snapshot_impl.h          |   239 +
 .../rocksdb-8.3.2/db/table_cache.cc           |   710 +
 .../rocksdb-8.3.2/db/table_cache.h            |   283 +
 .../db/table_cache_sync_and_async.h           |   130 +
 .../db/table_properties_collector.cc          |    74 +
 .../db/table_properties_collector.h           |   177 +
 .../rocksdb-8.3.2/db/transaction_log_impl.cc  |   296 +
 .../rocksdb-8.3.2/db/transaction_log_impl.h   |   128 +
 .../db/trim_history_scheduler.cc              |    54 +
 .../rocksdb-8.3.2/db/trim_history_scheduler.h |    46 +
 .../rocksdb-8.3.2/db/version_builder.cc       |  1426 +
 .../rocksdb-8.3.2/db/version_builder.h        |    93 +
 .../rocksdb-8.3.2/db/version_edit.cc          |  1059 +
 .../rocksdb-8.3.2/db/version_edit.h           |   702 +
 .../rocksdb-8.3.2/db/version_edit_handler.cc  |  1017 +
 .../rocksdb-8.3.2/db/version_edit_handler.h   |   334 +
 .../rocksdb-8.3.2/db/version_set.cc           |  7286 +++++
 .../rocksdb-8.3.2/db/version_set.h            |  1714 ++
 .../db/version_set_sync_and_async.h           |   172 +
 .../rocksdb-8.3.2/db/version_util.h           |    72 +
 .../rocksdb-8.3.2/db/wal_edit.cc              |   211 +
 .../rocksdb-8.3.2/db/wal_edit.h               |   177 +
 .../rocksdb-8.3.2/db/wal_manager.cc           |   527 +
 .../rocksdb-8.3.2/db/wal_manager.h            |   136 +
 .../db/wide/wide_column_serialization.cc      |   182 +
 .../db/wide/wide_column_serialization.h       |    77 +
 .../rocksdb-8.3.2/db/wide/wide_columns.cc     |    22 +
 .../rocksdb-8.3.2/db/write_batch.cc           |  3142 +++
 .../rocksdb-8.3.2/db/write_batch_base.cc      |    94 +
 .../rocksdb-8.3.2/db/write_batch_internal.h   |   401 +
 .../rocksdb-8.3.2/db/write_callback.h         |    27 +
 .../rocksdb-8.3.2/db/write_controller.cc      |   121 +
 .../rocksdb-8.3.2/db/write_controller.h       |   148 +
 .../rocksdb-8.3.2/db/write_stall_stats.cc     |   179 +
 .../rocksdb-8.3.2/db/write_stall_stats.h      |    47 +
 .../rocksdb-8.3.2/db/write_thread.cc          |   844 +
 .../rocksdb-8.3.2/db/write_thread.h           |   464 +
 .../db_stress_tool/db_stress_common.h         |   674 +
 .../db_stress_compaction_filter.h             |    96 +
 .../db_stress_tool/db_stress_driver.h         |    18 +
 .../db_stress_tool/db_stress_env_wrapper.h    |    78 +
 .../db_stress_tool/db_stress_listener.h       |   269 +
 .../db_stress_tool/db_stress_shared_state.h   |   437 +
 .../db_stress_tool/db_stress_stat.h           |   219 +
 .../db_stress_table_properties_collector.h    |    65 +
 .../db_stress_tool/db_stress_test_base.h      |   327 +
 .../db_stress_tool/expected_state.h           |   329 +
 .../db_stress_tool/expected_value.h           |   206 +
 .../db_stress_tool/multi_ops_txns_stress.h    |   446 +
 .../rocksdb-8.3.2/env/composite_env.cc        |   534 +
 .../rocksdb-8.3.2/env/composite_env_wrapper.h |   378 +
 .../rocksdb-8.3.2/env/emulated_clock.h        |   114 +
 .../rocksdb-8.3.2/env/env.cc                  |  1233 +
 .../rocksdb-8.3.2/env/env_chroot.cc           |   148 +
 .../rocksdb-8.3.2/env/env_chroot.h            |    55 +
 .../rocksdb-8.3.2/env/env_encryption.cc       |  1346 +
 .../rocksdb-8.3.2/env/env_encryption_ctr.h    |   114 +
 .../rocksdb-8.3.2/env/env_posix.cc            |   524 +
 .../rocksdb-8.3.2/env/file_system.cc          |   277 +
 .../rocksdb-8.3.2/env/file_system_tracer.cc   |   564 +
 .../rocksdb-8.3.2/env/file_system_tracer.h    |   461 +
 .../rocksdb-8.3.2/env/fs_posix.cc             |  1284 +
 .../rocksdb-8.3.2/env/fs_readonly.h           |   105 +
 .../rocksdb-8.3.2/env/fs_remap.cc             |   341 +
 .../rocksdb-8.3.2/env/fs_remap.h              |   137 +
 .../rocksdb-8.3.2/env/io_posix.cc             |  1733 ++
 .../rocksdb-8.3.2/env/io_posix.h              |   523 +
 .../rocksdb-8.3.2/env/mock_env.cc             |  1058 +
 .../rocksdb-8.3.2/env/mock_env.h              |   144 +
 .../rocksdb-8.3.2/env/unique_id_gen.cc        |   164 +
 .../rocksdb-8.3.2/env/unique_id_gen.h         |    85 +
 .../rocksdb-8.3.2/file/delete_scheduler.cc    |   409 +
 .../rocksdb-8.3.2/file/delete_scheduler.h     |   147 +
 .../file/file_prefetch_buffer.cc              |   955 +
 .../rocksdb-8.3.2/file/file_prefetch_buffer.h |   471 +
 .../rocksdb-8.3.2/file/file_util.cc           |   277 +
 .../rocksdb-8.3.2/file/file_util.h            |    91 +
 .../rocksdb-8.3.2/file/filename.cc            |   523 +
 .../rocksdb-8.3.2/file/filename.h             |   188 +
 .../rocksdb-8.3.2/file/line_file_reader.cc    |    73 +
 .../rocksdb-8.3.2/file/line_file_reader.h     |    60 +
 .../file/random_access_file_reader.cc         |   599 +
 .../file/random_access_file_reader.h          |   210 +
 .../rocksdb-8.3.2/file/read_write_util.cc     |    33 +
 .../rocksdb-8.3.2/file/read_write_util.h      |    31 +
 .../rocksdb-8.3.2/file/readahead_file_info.h  |    33 +
 .../rocksdb-8.3.2/file/readahead_raf.cc       |   169 +
 .../rocksdb-8.3.2/file/readahead_raf.h        |    29 +
 .../file/sequence_file_reader.cc              |   320 +
 .../rocksdb-8.3.2/file/sequence_file_reader.h |   119 +
 .../file/sst_file_manager_impl.cc             |   507 +
 .../file/sst_file_manager_impl.h              |   193 +
 .../file/writable_file_writer.cc              |   989 +
 .../rocksdb-8.3.2/file/writable_file_writer.h |   320 +
 .../rocksdb-8.3.2/fuzz/util.h                 |    29 +
 .../include/rocksdb/advanced_cache.h          |   623 +
 .../include/rocksdb/advanced_options.h        |  1177 +
 .../rocksdb/block_cache_trace_writer.h        |   149 +
 .../rocksdb-8.3.2/include/rocksdb/c.h         |  2864 ++
 .../rocksdb-8.3.2/include/rocksdb/cache.h     |   442 +
 .../include/rocksdb/cache_bench_tool.h        |    14 +
 .../rocksdb-8.3.2/include/rocksdb/cleanable.h |   128 +
 .../include/rocksdb/compaction_filter.h       |   363 +
 .../include/rocksdb/compaction_job_stats.h    |   109 +
 .../include/rocksdb/comparator.h              |   164 +
 .../include/rocksdb/compression_type.h        |    40 +
 .../include/rocksdb/concurrent_task_limiter.h |    51 +
 .../include/rocksdb/configurable.h            |   390 +
 .../include/rocksdb/convenience.h             |   466 +
 .../include/rocksdb/customizable.h            |   229 +
 .../include/rocksdb/data_structure.h          |   186 +
 .../rocksdb-8.3.2/include/rocksdb/db.h        |  1980 ++
 .../include/rocksdb/db_bench_tool.h           |    11 +
 .../include/rocksdb/db_dump_tool.h            |    43 +
 .../include/rocksdb/db_stress_tool.h          |    11 +
 .../rocksdb-8.3.2/include/rocksdb/env.h       |  1882 ++
 .../include/rocksdb/env_encryption.h          |   463 +
 .../include/rocksdb/experimental.h            |    56 +
 .../include/rocksdb/file_checksum.h           |   146 +
 .../include/rocksdb/file_system.h             |  1849 ++
 .../include/rocksdb/filter_policy.h           |   206 +
 .../include/rocksdb/flush_block_policy.h      |    75 +
 .../include/rocksdb/functor_wrapper.h         |    56 +
 .../rocksdb-8.3.2/include/rocksdb/io_status.h |   244 +
 .../include/rocksdb/iostats_context.h         |    98 +
 .../rocksdb-8.3.2/include/rocksdb/iterator.h  |   144 +
 .../rocksdb-8.3.2/include/rocksdb/ldb_tool.h  |    42 +
 .../rocksdb-8.3.2/include/rocksdb/listener.h  |   840 +
 .../include/rocksdb/memory_allocator.h        |    87 +
 .../include/rocksdb/memtablerep.h             |   421 +
 .../include/rocksdb/merge_operator.h          |   286 +
 .../rocksdb-8.3.2/include/rocksdb/metadata.h  |   258 +
 .../rocksdb-8.3.2/include/rocksdb/options.h   |  2083 ++
 .../include/rocksdb/perf_context.h            |   314 +
 .../include/rocksdb/perf_level.h              |    36 +
 .../include/rocksdb/persistent_cache.h        |    74 +
 .../rocksdb-8.3.2/include/rocksdb/port_defs.h |    22 +
 .../include/rocksdb/rate_limiter.h            |   159 +
 .../include/rocksdb/rocksdb_namespace.h       |    16 +
 .../include/rocksdb/secondary_cache.h         |   139 +
 .../rocksdb-8.3.2/include/rocksdb/slice.h     |   264 +
 .../include/rocksdb/slice_transform.h         |   135 +
 .../rocksdb-8.3.2/include/rocksdb/snapshot.h  |    53 +
 .../include/rocksdb/sst_dump_tool.h           |    17 +
 .../include/rocksdb/sst_file_manager.h        |   136 +
 .../include/rocksdb/sst_file_reader.h         |    45 +
 .../include/rocksdb/sst_file_writer.h         |   189 +
 .../include/rocksdb/sst_partitioner.h         |   142 +
 .../include/rocksdb/statistics.h              |   757 +
 .../include/rocksdb/stats_history.h           |    70 +
 .../rocksdb-8.3.2/include/rocksdb/status.h    |   574 +
 .../include/rocksdb/system_clock.h            |   114 +
 .../rocksdb-8.3.2/include/rocksdb/table.h     |   927 +
 .../include/rocksdb/table_properties.h        |   332 +
 .../include/rocksdb/table_reader_caller.h     |    41 +
 .../include/rocksdb/thread_status.h           |   190 +
 .../include/rocksdb/threadpool.h              |    67 +
 .../include/rocksdb/trace_reader_writer.h     |    52 +
 .../include/rocksdb/trace_record.h            |   248 +
 .../include/rocksdb/trace_record_result.h     |   187 +
 .../include/rocksdb/transaction_log.h         |   122 +
 .../rocksdb-8.3.2/include/rocksdb/types.h     |    94 +
 .../rocksdb-8.3.2/include/rocksdb/unique_id.h |    55 +
 .../include/rocksdb/universal_compaction.h    |    96 +
 .../include/rocksdb/utilities/agg_merge.h     |   138 +
 .../include/rocksdb/utilities/backup_engine.h |   689 +
 .../rocksdb/utilities/cache_dump_load.h       |   140 +
 .../include/rocksdb/utilities/checkpoint.h    |    63 +
 .../include/rocksdb/utilities/convenience.h   |    10 +
 .../rocksdb/utilities/customizable_util.h     |   322 +
 .../include/rocksdb/utilities/db_ttl.h        |    70 +
 .../include/rocksdb/utilities/debug.h         |    46 +
 .../include/rocksdb/utilities/env_mirror.h    |   179 +
 .../rocksdb/utilities/info_log_finder.h       |    19 +
 .../include/rocksdb/utilities/ldb_cmd.h       |   316 +
 .../utilities/ldb_cmd_execute_result.h        |    75 +
 .../rocksdb/utilities/leveldb_options.h       |   145 +
 .../utilities/lua/rocks_lua_custom_library.h  |    43 +
 .../rocksdb/utilities/lua/rocks_lua_util.h    |    55 +
 .../include/rocksdb/utilities/memory_util.h   |    48 +
 .../rocksdb/utilities/object_registry.h       |   583 +
 .../utilities/optimistic_transaction_db.h     |    98 +
 .../utilities/option_change_migration.h       |    24 +
 .../include/rocksdb/utilities/options_type.h  |  1221 +
 .../include/rocksdb/utilities/options_util.h  |   105 +
 .../include/rocksdb/utilities/replayer.h      |    85 +
 .../include/rocksdb/utilities/sim_cache.h     |    93 +
 .../include/rocksdb/utilities/stackable_db.h  |   587 +
 .../utilities/table_properties_collectors.h   |    88 +
 .../include/rocksdb/utilities/transaction.h   |   683 +
 .../rocksdb/utilities/transaction_db.h        |   506 +
 .../rocksdb/utilities/transaction_db_mutex.h  |    89 +
 .../utilities/write_batch_with_index.h        |   307 +
 .../rocksdb-8.3.2/include/rocksdb/version.h   |    43 +
 .../include/rocksdb/wal_filter.h              |   111 +
 .../include/rocksdb/wide_columns.h            |   210 +
 .../include/rocksdb/write_batch.h             |   494 +
 .../include/rocksdb/write_batch_base.h        |   144 +
 .../include/rocksdb/write_buffer_manager.h    |   183 +
 .../compaction_filter_factory_jnicallback.h   |    37 +
 .../java/rocksjni/comparatorjnicallback.h     |   137 +
 .../java/rocksjni/cplusplus_to_java_convert.h |    37 +
 .../rocksjni/event_listener_jnicallback.h     |   122 +
 .../rocksdb-8.3.2/java/rocksjni/jnicallback.h |    32 +
 .../java/rocksjni/loggerjnicallback.h         |    51 +
 .../rocksdb-8.3.2/java/rocksjni/portal.h      |  8706 ++++++
 .../java/rocksjni/statisticsjni.h             |    34 +
 .../java/rocksjni/table_filter_jnicallback.h  |    36 +
 .../java/rocksjni/trace_writer_jnicallback.h  |    36 +
 .../transaction_notifier_jnicallback.h        |    42 +
 .../java/rocksjni/wal_filter_jnicallback.h    |    42 +
 .../rocksjni/writebatchhandlerjnicallback.h   |    92 +
 .../rocksdb-8.3.2/logging/auto_roll_logger.cc |   368 +
 .../rocksdb-8.3.2/logging/auto_roll_logger.h  |   166 +
 .../rocksdb-8.3.2/logging/env_logger.h        |   195 +
 .../rocksdb-8.3.2/logging/event_logger.cc     |    68 +
 .../rocksdb-8.3.2/logging/event_logger.h      |   202 +
 .../rocksdb-8.3.2/logging/log_buffer.cc       |    91 +
 .../rocksdb-8.3.2/logging/log_buffer.h        |    57 +
 .../rocksdb-8.3.2/logging/logging.h           |    62 +
 .../rocksdb-8.3.2/memory/allocator.h          |    58 +
 .../rocksdb-8.3.2/memory/arena.cc             |   170 +
 .../rocksdb-8.3.2/memory/arena.h              |   135 +
 .../rocksdb-8.3.2/memory/concurrent_arena.cc  |    45 +
 .../rocksdb-8.3.2/memory/concurrent_arena.h   |   215 +
 .../memory/jemalloc_nodump_allocator.cc       |   303 +
 .../memory/jemalloc_nodump_allocator.h        |    99 +
 .../memory/memkind_kmem_allocator.cc          |    44 +
 .../memory/memkind_kmem_allocator.h           |    43 +
 .../rocksdb-8.3.2/memory/memory_allocator.cc  |    80 +
 .../memory/memory_allocator_impl.h            |    47 +
 .../rocksdb-8.3.2/memory/memory_usage.h       |    38 +
 .../rocksdb-8.3.2/memtable/alloc_tracker.cc   |    63 +
 .../memtable/hash_linklist_rep.cc             |   924 +
 .../memtable/hash_skiplist_rep.cc             |   391 +
 .../rocksdb-8.3.2/memtable/inlineskiplist.h   |  1051 +
 .../rocksdb-8.3.2/memtable/skiplist.h         |   498 +
 .../rocksdb-8.3.2/memtable/skiplistrep.cc     |   368 +
 .../rocksdb-8.3.2/memtable/stl_wrappers.h     |    33 +
 .../rocksdb-8.3.2/memtable/vectorrep.cc       |   307 +
 .../memtable/write_buffer_manager.cc          |   185 +
 .../monitoring/file_read_sample.h             |    23 +
 .../rocksdb-8.3.2/monitoring/histogram.cc     |   270 +
 .../rocksdb-8.3.2/monitoring/histogram.h      |   143 +
 .../monitoring/histogram_windowing.cc         |   194 +
 .../monitoring/histogram_windowing.h          |    84 +
 .../monitoring/in_memory_stats_history.cc     |    50 +
 .../monitoring/in_memory_stats_history.h      |    74 +
 .../monitoring/instrumented_mutex.cc          |    90 +
 .../monitoring/instrumented_mutex.h           |   126 +
 .../monitoring/iostats_context.cc             |    78 +
 .../monitoring/iostats_context_imp.h          |    62 +
 .../rocksdb-8.3.2/monitoring/perf_context.cc  |   313 +
 .../monitoring/perf_context_imp.h             |    96 +
 .../rocksdb-8.3.2/monitoring/perf_level.cc    |    23 +
 .../rocksdb-8.3.2/monitoring/perf_level_imp.h |    14 +
 .../monitoring/perf_step_timer.h              |    77 +
 .../monitoring/persistent_stats_history.cc    |   170 +
 .../monitoring/persistent_stats_history.h     |    83 +
 .../rocksdb-8.3.2/monitoring/statistics.cc    |   534 +
 .../monitoring/statistics_impl.h              |   144 +
 .../monitoring/thread_status_impl.cc          |   163 +
 .../monitoring/thread_status_updater.cc       |   328 +
 .../monitoring/thread_status_updater.h        |   226 +
 .../monitoring/thread_status_updater_debug.cc |    43 +
 .../monitoring/thread_status_util.cc          |   208 +
 .../monitoring/thread_status_util.h           |   139 +
 .../monitoring/thread_status_util_debug.cc    |    46 +
 .../rocksdb-8.3.2/options/cf_options.cc       |  1196 +
 .../rocksdb-8.3.2/options/cf_options.h        |   347 +
 .../rocksdb-8.3.2/options/configurable.cc     |   712 +
 .../options/configurable_helper.h             |   185 +
 .../rocksdb-8.3.2/options/configurable_test.h |   116 +
 .../rocksdb-8.3.2/options/customizable.cc     |   133 +
 .../rocksdb-8.3.2/options/db_options.cc       |  1079 +
 .../rocksdb-8.3.2/options/db_options.h        |   152 +
 .../rocksdb-8.3.2/options/options.cc          |   692 +
 .../rocksdb-8.3.2/options/options_helper.cc   |  1424 +
 .../rocksdb-8.3.2/options/options_helper.h    |   116 +
 .../rocksdb-8.3.2/options/options_parser.cc   |   736 +
 .../rocksdb-8.3.2/options/options_parser.h    |   149 +
 .../rocksdb-8.3.2/port/jemalloc_helper.h      |   107 +
 .../rocksdb-8.3.2/port/lang.h                 |    97 +
 .../rocksdb-8.3.2/port/likely.h               |    18 +
 .../rocksdb-8.3.2/port/malloc.h               |    17 +
 .../rocksdb-8.3.2/port/mmap.cc                |    98 +
 .../rocksdb-8.3.2/port/mmap.h                 |    70 +
 .../rocksdb-8.3.2/port/port.h                 |    21 +
 .../rocksdb-8.3.2/port/port_dirent.h          |    44 +
 .../rocksdb-8.3.2/port/port_example.h         |   101 +
 .../rocksdb-8.3.2/port/port_posix.cc          |   300 +
 .../rocksdb-8.3.2/port/port_posix.h           |   243 +
 .../rocksdb-8.3.2/port/stack_trace.cc         |   336 +
 .../rocksdb-8.3.2/port/stack_trace.h          |    31 +
 .../rocksdb-8.3.2/port/sys_time.h             |    63 +
 .../rocksdb-8.3.2/port/util_logger.h          |    18 +
 .../rocksdb-8.3.2/port/win/env_default.cc     |    45 +
 .../rocksdb-8.3.2/port/win/env_win.cc         |  1437 +
 .../rocksdb-8.3.2/port/win/env_win.h          |   305 +
 .../rocksdb-8.3.2/port/win/io_win.cc          |  1101 +
 .../rocksdb-8.3.2/port/win/io_win.h           |   508 +
 .../rocksdb-8.3.2/port/win/port_win.cc        |   303 +
 .../rocksdb-8.3.2/port/win/port_win.h         |   379 +
 .../rocksdb-8.3.2/port/win/win_logger.cc      |   192 +
 .../rocksdb-8.3.2/port/win/win_logger.h       |    64 +
 .../rocksdb-8.3.2/port/win/win_thread.cc      |   170 +
 .../rocksdb-8.3.2/port/win/win_thread.h       |   117 +
 .../rocksdb-8.3.2/port/win/xpress_win.h       |    26 +
 .../rocksdb-8.3.2/port/xpress.h               |    17 +
 .../table/adaptive/adaptive_table_factory.cc  |   125 +
 .../table/adaptive/adaptive_table_factory.h   |    56 +
 .../block_based/binary_search_index_reader.cc |    73 +
 .../block_based/binary_search_index_reader.h  |    48 +
 .../rocksdb-8.3.2/table/block_based/block.cc  |  1291 +
 .../rocksdb-8.3.2/table/block_based/block.h   |   921 +
 .../block_based/block_based_table_builder.cc  |  2027 ++
 .../block_based/block_based_table_builder.h   |   209 +
 .../block_based/block_based_table_factory.cc  |   962 +
 .../block_based/block_based_table_factory.h   |   102 +
 .../block_based/block_based_table_iterator.cc |   500 +
 .../block_based/block_based_table_iterator.h  |   310 +
 .../block_based/block_based_table_reader.cc   |  3043 +++
 .../block_based/block_based_table_reader.h    |   745 +
 .../block_based_table_reader_impl.h           |   203 +
 .../block_based_table_reader_sync_and_async.h |   758 +
 .../table/block_based/block_builder.cc        |   234 +
 .../table/block_based/block_builder.h         |   104 +
 .../table/block_based/block_cache.cc          |   106 +
 .../table/block_based/block_cache.h           |   140 +
 .../table/block_based/block_prefetcher.cc     |   120 +
 .../table/block_based/block_prefetcher.h      |    72 +
 .../table/block_based/block_prefix_index.cc   |   226 +
 .../table/block_based/block_prefix_index.h    |    70 +
 .../table/block_based/block_type.h            |    34 +
 .../table/block_based/cachable_entry.h        |   244 +
 .../table/block_based/data_block_footer.cc    |    59 +
 .../table/block_based/data_block_footer.h     |    25 +
 .../block_based/data_block_hash_index.cc      |    94 +
 .../table/block_based/data_block_hash_index.h |   137 +
 .../table/block_based/filter_block.h          |   183 +
 .../block_based/filter_block_reader_common.cc |   163 +
 .../block_based/filter_block_reader_common.h  |    76 +
 .../table/block_based/filter_policy.cc        |  1966 ++
 .../block_based/filter_policy_internal.h      |   340 +
 .../table/block_based/flush_block_policy.cc   |   132 +
 .../block_based/flush_block_policy_impl.h     |    40 +
 .../table/block_based/full_filter_block.cc    |   290 +
 .../table/block_based/full_filter_block.h     |   147 +
 .../table/block_based/hash_index_reader.cc    |   147 +
 .../table/block_based/hash_index_reader.h     |    49 +
 .../table/block_based/index_builder.cc        |   282 +
 .../table/block_based/index_builder.h         |   455 +
 .../table/block_based/index_reader_common.cc  |    57 +
 .../table/block_based/index_reader_common.h   |    85 +
 .../block_based/mock_block_based_table.h      |    62 +
 .../block_based/parsed_full_filter_block.cc   |    23 +
 .../block_based/parsed_full_filter_block.h    |    47 +
 .../block_based/partitioned_filter_block.cc   |   554 +
 .../block_based/partitioned_filter_block.h    |   182 +
 .../block_based/partitioned_index_iterator.cc |   163 +
 .../block_based/partitioned_index_iterator.h  |   160 +
 .../block_based/partitioned_index_reader.cc   |   224 +
 .../block_based/partitioned_index_reader.h    |    56 +
 .../table/block_based/reader_common.cc        |    52 +
 .../table/block_based/reader_common.h         |    34 +
 .../block_based/uncompression_dict_reader.cc  |   126 +
 .../block_based/uncompression_dict_reader.h   |    61 +
 .../rocksdb-8.3.2/table/block_fetcher.cc      |   405 +
 .../rocksdb-8.3.2/table/block_fetcher.h       |   142 +
 .../table/compaction_merging_iterator.cc      |   370 +
 .../table/compaction_merging_iterator.h       |    44 +
 .../table/cuckoo/cuckoo_table_builder.cc      |   551 +
 .../table/cuckoo/cuckoo_table_builder.h       |   136 +
 .../table/cuckoo/cuckoo_table_factory.cc      |   100 +
 .../table/cuckoo/cuckoo_table_factory.h       |    80 +
 .../table/cuckoo/cuckoo_table_reader.cc       |   412 +
 .../table/cuckoo/cuckoo_table_reader.h        |   100 +
 .../rocksdb-8.3.2/table/format.cc             |   573 +
 .../rocksdb-8.3.2/table/format.h              |   378 +
 .../rocksdb-8.3.2/table/get_context.cc        |   616 +
 .../rocksdb-8.3.2/table/get_context.h         |   245 +
 .../rocksdb-8.3.2/table/internal_iterator.h   |   224 +
 .../rocksdb-8.3.2/table/iter_heap.h           |    44 +
 .../rocksdb-8.3.2/table/iterator.cc           |   130 +
 .../rocksdb-8.3.2/table/iterator_wrapper.h    |   190 +
 .../rocksdb-8.3.2/table/merging_iterator.cc   |  1725 ++
 .../rocksdb-8.3.2/table/merging_iterator.h    |    97 +
 .../rocksdb-8.3.2/table/meta_blocks.cc        |   564 +
 .../rocksdb-8.3.2/table/meta_blocks.h         |   172 +
 .../rocksdb-8.3.2/table/mock_table.h          |    94 +
 .../rocksdb-8.3.2/table/multiget_context.h    |   405 +
 .../table/persistent_cache_helper.cc          |   111 +
 .../table/persistent_cache_helper.h           |    46 +
 .../table/persistent_cache_options.h          |    34 +
 .../table/plain/plain_table_bloom.cc          |    78 +
 .../table/plain/plain_table_bloom.h           |   132 +
 .../table/plain/plain_table_builder.cc        |   335 +
 .../table/plain/plain_table_builder.h         |   152 +
 .../table/plain/plain_table_factory.cc        |   296 +
 .../table/plain/plain_table_factory.h         |   180 +
 .../table/plain/plain_table_index.cc          |   211 +
 .../table/plain/plain_table_index.h           |   246 +
 .../table/plain/plain_table_key_coding.cc     |   507 +
 .../table/plain/plain_table_key_coding.h      |   199 +
 .../table/plain/plain_table_reader.cc         |   771 +
 .../table/plain/plain_table_reader.h          |   243 +
 .../table/scoped_arena_iterator.h             |    57 +
 .../rocksdb-8.3.2/table/sst_file_dumper.cc    |   520 +
 .../rocksdb-8.3.2/table/sst_file_dumper.h     |    99 +
 .../rocksdb-8.3.2/table/sst_file_reader.cc    |   101 +
 .../rocksdb-8.3.2/table/sst_file_writer.cc    |   436 +
 .../table/sst_file_writer_collectors.h        |    95 +
 .../rocksdb-8.3.2/table/table_builder.h       |   228 +
 .../rocksdb-8.3.2/table/table_factory.cc      |    52 +
 .../rocksdb-8.3.2/table/table_properties.cc   |   351 +
 .../table/table_properties_internal.h         |    14 +
 .../rocksdb-8.3.2/table/table_reader.h        |   187 +
 .../rocksdb-8.3.2/table/two_level_iterator.cc |   220 +
 .../rocksdb-8.3.2/table/two_level_iterator.h  |    43 +
 .../rocksdb-8.3.2/table/unique_id.cc          |   223 +
 .../rocksdb-8.3.2/table/unique_id_impl.h      |    93 +
 .../rocksdb-8.3.2/test_util/mock_time_env.h   |    78 +
 .../test_util/secondary_cache_test_util.h     |   119 +
 .../rocksdb-8.3.2/test_util/sync_point.cc     |    82 +
 .../rocksdb-8.3.2/test_util/sync_point.h      |   180 +
 .../test_util/sync_point_impl.cc              |   152 +
 .../rocksdb-8.3.2/test_util/sync_point_impl.h |    96 +
 .../rocksdb-8.3.2/test_util/testharness.h     |   124 +
 .../rocksdb-8.3.2/test_util/testutil.h        |   860 +
 .../test_util/transaction_test_util.cc        |   400 +
 .../test_util/transaction_test_util.h         |   147 +
 .../rocksdb-8.3.2/third-party/gcc/ppc-asm.h   |   390 +
 .../gtest-1.8.1/fused-src/gtest/gtest.h       | 22115 ++++++++++++++++
 .../block_cache_trace_analyzer.h              |   397 +
 .../rocksdb-8.3.2/tools/dump/db_dump_tool.cc  |   258 +
 .../tools/io_tracer_parser_tool.h             |    38 +
 .../rocksdb-8.3.2/tools/ldb_cmd_impl.h        |   744 +
 .../tools/simulated_hybrid_file_system.h      |   124 +
 .../rocksdb-8.3.2/tools/trace_analyzer_tool.h |   329 +
 .../trace_replay/block_cache_tracer.cc        |   509 +
 .../trace_replay/block_cache_tracer.h         |   239 +
 .../rocksdb-8.3.2/trace_replay/io_tracer.cc   |   303 +
 .../rocksdb-8.3.2/trace_replay/io_tracer.h    |   185 +
 .../trace_replay/trace_record.cc              |   206 +
 .../trace_replay/trace_record_handler.cc      |   190 +
 .../trace_replay/trace_record_handler.h       |    46 +
 .../trace_replay/trace_record_result.cc       |   146 +
 .../trace_replay/trace_replay.cc              |   622 +
 .../rocksdb-8.3.2/trace_replay/trace_replay.h |   183 +
 .../rocksdb-8.3.2/util/aligned_buffer.h       |   235 +
 .../rocksdb-8.3.2/util/async_file_reader.cc   |    81 +
 .../rocksdb-8.3.2/util/async_file_reader.h    |   144 +
 .../rocksdb-8.3.2/util/autovector.h           |   393 +
 .../rocksdb-8.3.2/util/bloom_impl.h           |   489 +
 .../rocksdb-8.3.2/util/build_version.cc       |    79 +
 .../rocksdb-8.3.2/util/build_version.cc.in    |    79 +
 .../rocksdb-8.3.2/util/cast_util.h            |    42 +
 .../rocksdb-8.3.2/util/channel.h              |    69 +
 .../rocksdb-8.3.2/util/cleanable.cc           |   181 +
 .../rocksdb-8.3.2/util/coding.cc              |    90 +
 .../rocksdb-8.3.2/util/coding.h               |   389 +
 .../rocksdb-8.3.2/util/coding_lean.h          |   101 +
 .../util/compaction_job_stats_impl.cc         |    92 +
 .../rocksdb-8.3.2/util/comparator.cc          |   383 +
 .../rocksdb-8.3.2/util/compression.cc         |   122 +
 .../rocksdb-8.3.2/util/compression.h          |  1795 ++
 .../util/compression_context_cache.cc         |   106 +
 .../util/compression_context_cache.h          |    47 +
 .../util/concurrent_task_limiter_impl.cc      |    64 +
 .../util/concurrent_task_limiter_impl.h       |    67 +
 .../rocksdb-8.3.2/util/core_local.h           |    84 +
 .../rocksdb-8.3.2/util/coro_utils.h           |   112 +
 .../rocksdb-8.3.2/util/crc32c.cc              |  1292 +
 .../rocksdb-8.3.2/util/crc32c.h               |    56 +
 .../rocksdb-8.3.2/util/crc32c_arm64.cc        |   213 +
 .../rocksdb-8.3.2/util/crc32c_arm64.h         |    52 +
 .../rocksdb-8.3.2/util/crc32c_ppc.h           |    22 +
 .../rocksdb-8.3.2/util/crc32c_ppc_constants.h |   900 +
 .../rocksdb-8.3.2/util/data_structure.cc      |    18 +
 .../rocksdb-8.3.2/util/defer.h                |    82 +
 .../rocksdb-8.3.2/util/distributed_mutex.h    |    50 +
 .../rocksdb-8.3.2/util/duplicate_detector.h   |    69 +
 .../rocksdb-8.3.2/util/dynamic_bloom.cc       |    70 +
 .../rocksdb-8.3.2/util/dynamic_bloom.h        |   214 +
 .../rocksdb-8.3.2/util/fastrange.h            |   114 +
 .../util/file_checksum_helper.cc              |   170 +
 .../rocksdb-8.3.2/util/file_checksum_helper.h |   101 +
 .../rocksdb-8.3.2/util/gflags_compat.h        |    29 +
 .../rocksdb-8.3.2/util/hash.cc                |   201 +
 .../rocksdb-8.3.2/util/hash.h                 |   137 +
 .../rocksdb-8.3.2/util/hash128.h              |    26 +
 .../rocksdb-8.3.2/util/hash_containers.h      |    51 +
 .../rocksdb-8.3.2/util/hash_map.h             |    67 +
 .../rocksdb-8.3.2/util/heap.h                 |   174 +
 .../rocksdb-8.3.2/util/kv_map.h               |    33 +
 .../rocksdb-8.3.2/util/math.h                 |   299 +
 .../rocksdb-8.3.2/util/math128.h              |   316 +
 .../rocksdb-8.3.2/util/murmurhash.cc          |   196 +
 .../rocksdb-8.3.2/util/murmurhash.h           |    43 +
 .../rocksdb-8.3.2/util/mutexlock.h            |   181 +
 .../rocksdb-8.3.2/util/ppc-opcode.h           |    27 +
 .../rocksdb-8.3.2/util/random.cc              |    63 +
 .../rocksdb-8.3.2/util/random.h               |   190 +
 .../rocksdb-8.3.2/util/rate_limiter.cc        |   376 +
 .../rocksdb-8.3.2/util/rate_limiter_impl.h    |   146 +
 .../rocksdb-8.3.2/util/repeatable_thread.h    |   149 +
 .../rocksdb-8.3.2/util/ribbon_alg.h           |  1225 +
 .../rocksdb-8.3.2/util/ribbon_config.cc       |   506 +
 .../rocksdb-8.3.2/util/ribbon_config.h        |   182 +
 .../rocksdb-8.3.2/util/ribbon_impl.h          |  1137 +
 .../rocksdb-8.3.2/util/set_comparator.h       |    24 +
 .../util/single_thread_executor.h             |    56 +
 .../rocksdb-8.3.2/util/slice.cc               |   367 +
 .../rocksdb-8.3.2/util/status.cc              |   162 +
 .../rocksdb-8.3.2/util/stderr_logger.cc       |    30 +
 .../rocksdb-8.3.2/util/stderr_logger.h        |    31 +
 .../rocksdb-8.3.2/util/stop_watch.h           |   136 +
 .../rocksdb-8.3.2/util/string_util.cc         |   502 +
 .../rocksdb-8.3.2/util/string_util.h          |   175 +
 .../rocksdb-8.3.2/util/thread_guard.h         |    41 +
 .../rocksdb-8.3.2/util/thread_local.cc        |   521 +
 .../rocksdb-8.3.2/util/thread_local.h         |   100 +
 .../rocksdb-8.3.2/util/thread_operation.h     |   113 +
 .../rocksdb-8.3.2/util/threadpool_imp.cc      |   551 +
 .../rocksdb-8.3.2/util/threadpool_imp.h       |   120 +
 .../rocksdb-8.3.2/util/timer.h                |   340 +
 .../rocksdb-8.3.2/util/timer_queue.h          |   231 +
 .../rocksdb-8.3.2/util/udt_util.h             |    77 +
 .../util/user_comparator_wrapper.h            |    64 +
 .../rocksdb-8.3.2/util/vector_iterator.h      |   118 +
 .../rocksdb-8.3.2/util/work_queue.h           |   150 +
 .../rocksdb-8.3.2/util/xxhash.cc              |    48 +
 .../rocksdb-8.3.2/util/xxhash.h               |  6360 +++++
 .../rocksdb-8.3.2/util/xxph3.h                |  1764 ++
 .../utilities/agg_merge/agg_merge.cc          |   238 +
 .../utilities/agg_merge/agg_merge_impl.h      |    49 +
 .../utilities/agg_merge/test_agg_merge.h      |    47 +
 .../utilities/backup/backup_engine.cc         |  3355 +++
 .../utilities/backup/backup_engine_impl.h     |    34 +
 .../blob_db/blob_compaction_filter.cc         |   488 +
 .../blob_db/blob_compaction_filter.h          |   202 +
 .../utilities/blob_db/blob_db.cc              |   112 +
 .../rocksdb-8.3.2/utilities/blob_db/blob_db.h |   264 +
 .../utilities/blob_db/blob_db_gc_stats.h      |    54 +
 .../utilities/blob_db/blob_db_impl.cc         |  2185 ++
 .../utilities/blob_db/blob_db_impl.h          |   501 +
 .../blob_db/blob_db_impl_filesnapshot.cc      |   111 +
 .../utilities/blob_db/blob_db_iterator.h      |   148 +
 .../utilities/blob_db/blob_db_listener.h      |    69 +
 .../utilities/blob_db/blob_dump_tool.h        |    56 +
 .../utilities/blob_db/blob_file.cc            |   316 +
 .../utilities/blob_db/blob_file.h             |   244 +
 .../utilities/cache_dump_load.cc              |    67 +
 .../utilities/cache_dump_load_impl.cc         |   369 +
 .../utilities/cache_dump_load_impl.h          |   356 +
 .../cassandra/cassandra_compaction_filter.cc  |   106 +
 .../cassandra/cassandra_compaction_filter.h   |    57 +
 .../utilities/cassandra/cassandra_options.h   |    41 +
 .../utilities/cassandra/format.cc             |   367 +
 .../utilities/cassandra/format.h              |   183 +
 .../utilities/cassandra/merge_operator.cc     |    80 +
 .../utilities/cassandra/merge_operator.h      |    44 +
 .../utilities/cassandra/serialize.h           |    81 +
 .../utilities/cassandra/test_utils.h          |    42 +
 .../utilities/checkpoint/checkpoint_impl.cc   |   470 +
 .../utilities/checkpoint/checkpoint_impl.h    |    64 +
 .../utilities/compaction_filters.cc           |    52 +
 .../layered_compaction_filter_base.h          |    41 +
 .../remove_emptyvalue_compactionfilter.cc     |    24 +
 .../remove_emptyvalue_compactionfilter.h      |    26 +
 .../utilities/convenience/info_log_finder.cc  |    26 +
 .../rocksdb-8.3.2/utilities/counted_fs.cc     |   379 +
 .../rocksdb-8.3.2/utilities/counted_fs.h      |   158 +
 .../rocksdb-8.3.2/utilities/debug.cc          |   118 +
 .../rocksdb-8.3.2/utilities/env_mirror.cc     |   273 +
 .../rocksdb-8.3.2/utilities/env_timed.cc      |   181 +
 .../rocksdb-8.3.2/utilities/env_timed.h       |    95 +
 .../utilities/fault_injection_env.cc          |   555 +
 .../utilities/fault_injection_env.h           |   258 +
 .../utilities/fault_injection_fs.cc           |  1071 +
 .../utilities/fault_injection_fs.h            |   593 +
 .../fault_injection_secondary_cache.cc        |   136 +
 .../fault_injection_secondary_cache.h         |   109 +
 .../leveldb_options/leveldb_options.cc        |    57 +
 .../utilities/memory/memory_util.cc           |    50 +
 .../utilities/memory_allocators.h             |   103 +
 .../utilities/merge_operators.cc              |   115 +
 .../rocksdb-8.3.2/utilities/merge_operators.h |    36 +
 .../utilities/merge_operators/bytesxor.cc     |    57 +
 .../utilities/merge_operators/bytesxor.h      |    40 +
 .../utilities/merge_operators/max.cc          |    64 +
 .../utilities/merge_operators/max_operator.h  |    35 +
 .../utilities/merge_operators/put.cc          |    74 +
 .../utilities/merge_operators/put_operator.h  |    56 +
 .../utilities/merge_operators/sortlist.cc     |    95 +
 .../utilities/merge_operators/sortlist.h      |    42 +
 .../string_append/stringappend.cc             |    76 +
 .../string_append/stringappend.h              |    32 +
 .../string_append/stringappend2.cc            |   130 +
 .../string_append/stringappend2.h             |    52 +
 .../utilities/merge_operators/uint64add.cc    |    56 +
 .../utilities/merge_operators/uint64add.h     |    35 +
 .../utilities/object_registry.cc              |   381 +
 .../option_change_migration.cc                |   176 +
 .../utilities/options/options_util.cc         |   117 +
 .../persistent_cache/block_cache_tier.cc      |   420 +
 .../persistent_cache/block_cache_tier.h       |   154 +
 .../persistent_cache/block_cache_tier_file.cc |   608 +
 .../persistent_cache/block_cache_tier_file.h  |   291 +
 .../block_cache_tier_file_buffer.h            |   127 +
 .../block_cache_tier_metadata.cc              |    84 +
 .../block_cache_tier_metadata.h               |   122 +
 .../utilities/persistent_cache/hash_table.h   |   237 +
 .../persistent_cache/hash_table_evictable.h   |   166 +
 .../utilities/persistent_cache/lrulist.h      |   172 +
 .../persistent_cache/persistent_cache_test.h  |   284 +
 .../persistent_cache/persistent_cache_tier.cc |   165 +
 .../persistent_cache/persistent_cache_tier.h  |   340 +
 .../persistent_cache/persistent_cache_util.h  |    67 +
 .../persistent_cache/volatile_tier_impl.cc    |   138 +
 .../persistent_cache/volatile_tier_impl.h     |   139 +
 .../simulator_cache/cache_simulator.cc        |   287 +
 .../simulator_cache/cache_simulator.h         |   231 +
 .../utilities/simulator_cache/sim_cache.cc    |   372 +
 .../compact_on_deletion_collector.cc          |   221 +
 .../compact_on_deletion_collector.h           |    68 +
 .../trace/file_trace_reader_writer.cc         |   133 +
 .../trace/file_trace_reader_writer.h          |    48 +
 .../utilities/trace/replayer_impl.cc          |   314 +
 .../utilities/trace/replayer_impl.h           |    84 +
 .../transactions/lock/lock_manager.cc         |    27 +
 .../transactions/lock/lock_manager.h          |    80 +
 .../transactions/lock/lock_tracker.h          |   207 +
 .../lock/point/point_lock_manager.cc          |   719 +
 .../lock/point/point_lock_manager.h           |   222 +
 .../lock/point/point_lock_manager_test.h      |   324 +
 .../lock/point/point_lock_tracker.cc          |   255 +
 .../lock/point/point_lock_tracker.h           |    97 +
 .../lock/range/range_lock_manager.h           |    34 +
 .../lock/range/range_tree/lib/db.h            |    76 +
 .../lock/range/range_tree/lib/ft/comparator.h |   138 +
 .../lock/range/range_tree/lib/ft/ft-status.h  |   102 +
 .../range_tree/lib/locktree/concurrent_tree.h |   174 +
 .../range/range_tree/lib/locktree/keyrange.h  |   141 +
 .../range_tree/lib/locktree/lock_request.h    |   255 +
 .../range/range_tree/lib/locktree/locktree.h  |   580 +
 .../range_tree/lib/locktree/range_buffer.h    |   178 +
 .../range/range_tree/lib/locktree/treenode.h  |   302 +
 .../range/range_tree/lib/locktree/txnid_set.h |    92 +
 .../lock/range/range_tree/lib/locktree/wfg.h  |   124 +
 .../range/range_tree/lib/portability/memory.h |   215 +
 .../lib/portability/toku_assert_subst.h       |    39 +
 .../range_tree/lib/portability/toku_atomic.h  |   130 +
 .../lib/portability/toku_external_pthread.h   |    83 +
 .../lib/portability/toku_instrumentation.h    |   286 +
 .../lib/portability/toku_portability.h        |    87 +
 .../range_tree/lib/portability/toku_pthread.h |   520 +
 .../lib/portability/toku_race_tools.h         |   179 +
 .../range_tree/lib/portability/toku_time.h    |   197 +
 .../range_tree/lib/portability/txn_subst.h    |    27 +
 .../lock/range/range_tree/lib/util/dbt.h      |    98 +
 .../range_tree/lib/util/growable_array.h      |   144 +
 .../lock/range/range_tree/lib/util/memarena.h |   141 +
 .../lock/range/range_tree/lib/util/omt.h      |   794 +
 .../lock/range/range_tree/lib/util/omt_impl.h |  1295 +
 .../range_tree/lib/util/partitioned_counter.h |   165 +
 .../lock/range/range_tree/lib/util/status.h   |    76 +
 .../range_tree/range_tree_lock_manager.h      |   135 +
 .../range_tree/range_tree_lock_tracker.h      |   146 +
 .../transactions/optimistic_transaction.cc    |   194 +
 .../transactions/optimistic_transaction.h     |    99 +
 .../optimistic_transaction_db_impl.cc         |   109 +
 .../optimistic_transaction_db_impl.h          |    86 +
 .../transactions/pessimistic_transaction.cc   |  1177 +
 .../transactions/pessimistic_transaction.h    |   311 +
 .../pessimistic_transaction_db.cc             |   780 +
 .../transactions/pessimistic_transaction_db.h |   316 +
 .../transactions/snapshot_checker.cc          |    37 +
 .../transactions/transaction_base.cc          |   757 +
 .../utilities/transactions/transaction_base.h |   382 +
 .../transactions/transaction_db_mutex_impl.cc |   133 +
 .../transactions/transaction_db_mutex_impl.h  |    24 +
 .../utilities/transactions/transaction_test.h |   578 +
 .../transactions/transaction_util.cc          |   204 +
 .../utilities/transactions/transaction_util.h |    83 +
 .../transactions/write_prepared_txn.cc        |   515 +
 .../transactions/write_prepared_txn.h         |   117 +
 .../transactions/write_prepared_txn_db.cc     |  1038 +
 .../transactions/write_prepared_txn_db.h      |  1123 +
 .../transactions/write_unprepared_txn.cc      |  1056 +
 .../transactions/write_unprepared_txn.h       |   339 +
 .../transactions/write_unprepared_txn_db.cc   |   476 +
 .../transactions/write_unprepared_txn_db.h    |   106 +
 .../utilities/ttl/db_ttl_impl.cc              |   617 +
 .../rocksdb-8.3.2/utilities/ttl/db_ttl_impl.h |   243 +
 .../rocksdb-8.3.2/utilities/wal_filter.cc     |    22 +
 .../write_batch_with_index.cc                 |   694 +
 .../write_batch_with_index_internal.cc        |   742 +
 .../write_batch_with_index_internal.h         |   342 +
 .../src/Database/RocksDB.hs                   |    17 +
 .../src/Database/RocksDB/Base.hs              |   336 +
 .../src/Database/RocksDB/C.hsc                |   415 +
 .../src/Database/RocksDB/Internal.hs          |   217 +
 .../src/Database/RocksDB/Iterator.hs          |   198 +
 .../src/Database/RocksDB/ReadOptions.hs       |    93 +
 .../src/Database/RocksDB/Types.hs             |   147 +
 .../rocksdb-haskell-kadena/unpack-rocksdb.sh  |   354 +
 861 files changed, 319552 insertions(+), 16 deletions(-)
 create mode 100644 vendored/rocksdb-haskell-kadena/CHANGELOG.md
 create mode 100644 vendored/rocksdb-haskell-kadena/LICENSE
 create mode 100644 vendored/rocksdb-haskell-kadena/README.md
 create mode 100644 vendored/rocksdb-haskell-kadena/cpp/chainweb-rocksdb.cpp
 create mode 100644 vendored/rocksdb-haskell-kadena/cpp/chainweb-rocksdb.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/arena_wrapped_db_iter.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/arena_wrapped_db_iter.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_constants.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_contents.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_contents.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_counting_iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_fetcher.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_fetcher.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_addition.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_addition.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_builder.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_builder.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_cache.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_cache.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_completion_callback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_garbage.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_garbage.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_meta.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_meta.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_garbage_meter.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_garbage_meter.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_index.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_format.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_format.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_sequential_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_sequential_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_writer.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_writer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_read_request.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_source.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_source.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/prefetch_buffer_collection.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/prefetch_buffer_collection.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/builder.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/builder.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/c.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/column_family.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/column_family.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/clipping_iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_iteration_stats.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_iterator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_job.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_job.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_outputs.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_outputs.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_fifo.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_fifo.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_level.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_level.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_universal.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_universal.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_service_job.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_state.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_state.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/file_pri.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/sst_partitioner.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/subcompaction_state.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/subcompaction_state.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/convenience.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_filesnapshot.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/compacted_db_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/compacted_db_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_compaction_flush.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_debug.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_experimental.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_files.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_open.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_readonly.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_readonly.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_secondary.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_secondary.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_write.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_info_dumper.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_info_dumper.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_iter.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_iter.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_test_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_with_timestamp_test_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/dbformat.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/dbformat.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/error_handler.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/error_handler.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/event_helpers.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/event_helpers.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/experimental.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/external_sst_file_ingestion_job.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/external_sst_file_ingestion_job.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/file_indexer.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/file_indexer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_job.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_job.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_scheduler.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_scheduler.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/forward_iterator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/forward_iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/history_trimming_iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/import_column_family_job.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/import_column_family_job.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/internal_stats.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/internal_stats.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/job_context.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/kv_checksum.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_format.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_writer.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_writer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/logs_with_prep_tracker.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/logs_with_prep_tracker.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/lookup_key.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/malloc_stats.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/malloc_stats.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable_list.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable_list.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_context.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_helper.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_helper.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_operator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/output_validator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/output_validator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/periodic_task_scheduler.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/periodic_task_scheduler.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/pinned_iterators_manager.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/post_memtable_callback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/pre_release_callback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_del_aggregator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_del_aggregator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_tombstone_fragmenter.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_tombstone_fragmenter.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/read_callback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/repair.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/seqno_to_time_mapping.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/seqno_to_time_mapping.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/snapshot_checker.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/snapshot_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/snapshot_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_cache.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_cache.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_cache_sync_and_async.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_properties_collector.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_properties_collector.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/transaction_log_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/transaction_log_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/trim_history_scheduler.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/trim_history_scheduler.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_builder.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_builder.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit_handler.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit_handler.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_set.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_set.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_set_sync_and_async.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_edit.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_edit.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_manager.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_manager.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wide/wide_column_serialization.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wide/wide_column_serialization.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wide/wide_columns.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_batch.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_batch_base.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_batch_internal.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_callback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_controller.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_controller.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_stall_stats.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_stall_stats.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_thread.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_thread.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_common.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_compaction_filter.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_driver.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_env_wrapper.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_listener.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_shared_state.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_stat.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_table_properties_collector.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_test_base.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/expected_state.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/expected_value.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/multi_ops_txns_stress.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/composite_env.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/composite_env_wrapper.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/emulated_clock.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_chroot.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_chroot.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_encryption.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_encryption_ctr.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_posix.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/file_system.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/file_system_tracer.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/file_system_tracer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_posix.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_readonly.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_remap.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_remap.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/io_posix.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/io_posix.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/mock_env.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/mock_env.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/unique_id_gen.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/unique_id_gen.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/delete_scheduler.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/delete_scheduler.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_prefetch_buffer.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_prefetch_buffer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_util.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/filename.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/filename.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/line_file_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/line_file_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/random_access_file_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/random_access_file_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/read_write_util.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/read_write_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/readahead_file_info.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/readahead_raf.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/readahead_raf.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sequence_file_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sequence_file_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sst_file_manager_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sst_file_manager_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/writable_file_writer.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/writable_file_writer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/fuzz/util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/advanced_cache.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/advanced_options.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/block_cache_trace_writer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/c.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/cache.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/cache_bench_tool.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/cleanable.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/compaction_filter.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/compaction_job_stats.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/comparator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/compression_type.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/concurrent_task_limiter.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/configurable.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/convenience.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/customizable.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/data_structure.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db_bench_tool.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db_dump_tool.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db_stress_tool.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/env.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/env_encryption.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/experimental.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/file_checksum.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/file_system.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/filter_policy.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/flush_block_policy.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/functor_wrapper.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/io_status.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/iostats_context.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/ldb_tool.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/listener.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/memory_allocator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/memtablerep.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/merge_operator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/metadata.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/options.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/perf_context.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/perf_level.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/persistent_cache.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/port_defs.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/rate_limiter.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/rocksdb_namespace.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/secondary_cache.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/slice.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/slice_transform.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/snapshot.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_dump_tool.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_file_manager.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_file_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_file_writer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_partitioner.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/statistics.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/stats_history.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/status.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/system_clock.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/table.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/table_properties.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/table_reader_caller.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/thread_status.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/threadpool.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/trace_reader_writer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/trace_record.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/trace_record_result.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/transaction_log.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/types.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/unique_id.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/universal_compaction.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/agg_merge.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/backup_engine.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/cache_dump_load.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/checkpoint.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/convenience.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/customizable_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/db_ttl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/debug.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/env_mirror.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/info_log_finder.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/ldb_cmd.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/ldb_cmd_execute_result.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/leveldb_options.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/lua/rocks_lua_custom_library.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/lua/rocks_lua_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/memory_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/object_registry.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/optimistic_transaction_db.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/option_change_migration.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/options_type.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/options_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/replayer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/sim_cache.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/stackable_db.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/table_properties_collectors.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/transaction.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/transaction_db.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/transaction_db_mutex.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/write_batch_with_index.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/version.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/wal_filter.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/wide_columns.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/write_batch.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/write_batch_base.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/write_buffer_manager.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/compaction_filter_factory_jnicallback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/comparatorjnicallback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/cplusplus_to_java_convert.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/event_listener_jnicallback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/jnicallback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/loggerjnicallback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/portal.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/statisticsjni.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/table_filter_jnicallback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/trace_writer_jnicallback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/transaction_notifier_jnicallback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/wal_filter_jnicallback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/writebatchhandlerjnicallback.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/auto_roll_logger.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/auto_roll_logger.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/env_logger.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/event_logger.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/event_logger.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/log_buffer.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/log_buffer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/logging.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/allocator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/arena.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/arena.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/concurrent_arena.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/concurrent_arena.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/jemalloc_nodump_allocator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/jemalloc_nodump_allocator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memkind_kmem_allocator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memkind_kmem_allocator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memory_allocator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memory_allocator_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memory_usage.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/alloc_tracker.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/hash_linklist_rep.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/hash_skiplist_rep.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/inlineskiplist.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/skiplist.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/skiplistrep.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/stl_wrappers.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/vectorrep.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/write_buffer_manager.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/file_read_sample.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram_windowing.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram_windowing.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/in_memory_stats_history.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/in_memory_stats_history.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/instrumented_mutex.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/instrumented_mutex.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/iostats_context.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/iostats_context_imp.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_context.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_context_imp.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_level.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_level_imp.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_step_timer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/persistent_stats_history.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/persistent_stats_history.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/statistics.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/statistics_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_updater.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_updater.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_updater_debug.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_util.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_util_debug.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/cf_options.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/cf_options.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/configurable.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/configurable_helper.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/configurable_test.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/customizable.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/db_options.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/db_options.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_helper.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_helper.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_parser.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_parser.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/jemalloc_helper.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/lang.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/likely.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/malloc.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/mmap.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/mmap.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_dirent.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_example.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_posix.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_posix.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/stack_trace.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/stack_trace.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/sys_time.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/util_logger.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_default.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_win.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_win.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/io_win.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/io_win.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/port_win.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/port_win.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_logger.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_logger.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_thread.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_thread.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/xpress_win.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/xpress.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/adaptive/adaptive_table_factory.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/adaptive/adaptive_table_factory.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/binary_search_index_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/binary_search_index_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_builder.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_builder.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_factory.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_factory.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_iterator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader_sync_and_async.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_builder.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_builder.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_cache.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_cache.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefetcher.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefetcher.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefix_index.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefix_index.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_type.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/cachable_entry.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_footer.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_footer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_hash_index.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_hash_index.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_block.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_block_reader_common.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_block_reader_common.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_policy.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_policy_internal.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/flush_block_policy.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/flush_block_policy_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/full_filter_block.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/full_filter_block.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/hash_index_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/hash_index_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_builder.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_builder.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_reader_common.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_reader_common.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/mock_block_based_table.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/parsed_full_filter_block.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/parsed_full_filter_block.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_filter_block.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_filter_block.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_iterator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/reader_common.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/reader_common.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/uncompression_dict_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/uncompression_dict_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_fetcher.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_fetcher.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/compaction_merging_iterator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/compaction_merging_iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_builder.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_builder.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_factory.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_factory.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/format.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/format.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/get_context.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/get_context.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/internal_iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/iter_heap.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/iterator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/iterator_wrapper.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/merging_iterator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/merging_iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/meta_blocks.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/meta_blocks.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/mock_table.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/multiget_context.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/persistent_cache_helper.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/persistent_cache_helper.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/persistent_cache_options.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_bloom.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_bloom.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_builder.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_builder.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_factory.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_factory.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_index.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_index.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_key_coding.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_key_coding.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/scoped_arena_iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_dumper.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_dumper.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_writer.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_writer_collectors.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_builder.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_factory.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_properties.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_properties_internal.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/two_level_iterator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/two_level_iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/unique_id.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/unique_id_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/mock_time_env.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/secondary_cache_test_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/testharness.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/testutil.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/transaction_test_util.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/transaction_test_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/third-party/gcc/ppc-asm.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/third-party/gtest-1.8.1/fused-src/gtest/gtest.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/block_cache_analyzer/block_cache_trace_analyzer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/dump/db_dump_tool.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/io_tracer_parser_tool.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/ldb_cmd_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/simulated_hybrid_file_system.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/trace_analyzer_tool.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/block_cache_tracer.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/block_cache_tracer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/io_tracer.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/io_tracer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record_handler.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record_handler.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record_result.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_replay.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_replay.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/aligned_buffer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/async_file_reader.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/async_file_reader.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/autovector.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/bloom_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/build_version.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/build_version.cc.in
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/cast_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/channel.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/cleanable.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coding.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coding.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coding_lean.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compaction_job_stats_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/comparator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression_context_cache.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression_context_cache.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/concurrent_task_limiter_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/concurrent_task_limiter_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/core_local.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coro_utils.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_arm64.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_arm64.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_ppc.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_ppc_constants.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/data_structure.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/defer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/distributed_mutex.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/duplicate_detector.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/dynamic_bloom.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/dynamic_bloom.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/fastrange.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/file_checksum_helper.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/file_checksum_helper.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/gflags_compat.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash128.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash_containers.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash_map.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/heap.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/kv_map.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/math.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/math128.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/murmurhash.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/murmurhash.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/mutexlock.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ppc-opcode.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/random.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/random.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/rate_limiter.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/rate_limiter_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/repeatable_thread.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_alg.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_config.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_config.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/set_comparator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/single_thread_executor.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/slice.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/status.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/stderr_logger.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/stderr_logger.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/stop_watch.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/string_util.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/string_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_guard.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_local.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_local.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_operation.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/threadpool_imp.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/threadpool_imp.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/timer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/timer_queue.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/udt_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/user_comparator_wrapper.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/vector_iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/work_queue.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/xxhash.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/xxhash.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/xxph3.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/agg_merge/agg_merge.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/agg_merge/agg_merge_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/agg_merge/test_agg_merge.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/backup/backup_engine.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/backup/backup_engine_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_compaction_filter.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_compaction_filter.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_gc_stats.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_impl_filesnapshot.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_iterator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_listener.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_dump_tool.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_file.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_file.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cache_dump_load.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cache_dump_load_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cache_dump_load_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/cassandra_compaction_filter.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/cassandra_compaction_filter.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/cassandra_options.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/format.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/format.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/merge_operator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/merge_operator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/serialize.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/test_utils.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/checkpoint/checkpoint_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/checkpoint/checkpoint_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters/layered_compaction_filter_base.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/convenience/info_log_finder.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/counted_fs.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/counted_fs.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/debug.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/env_mirror.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/env_timed.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/env_timed.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_env.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_env.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_fs.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_fs.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_secondary_cache.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_secondary_cache.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/leveldb_options/leveldb_options.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/memory/memory_util.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/memory_allocators.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/bytesxor.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/bytesxor.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/max.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/max_operator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/put.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/put_operator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/sortlist.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/sortlist.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend2.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend2.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/uint64add.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/uint64add.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/object_registry.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/option_change_migration/option_change_migration.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/options/options_util.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_file.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_file.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_file_buffer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_metadata.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_metadata.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/hash_table.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/hash_table_evictable.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/lrulist.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_test.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_tier.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_tier.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/volatile_tier_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/volatile_tier_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/simulator_cache/cache_simulator.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/simulator_cache/cache_simulator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/simulator_cache/sim_cache.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/table_properties_collectors/compact_on_deletion_collector.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/table_properties_collectors/compact_on_deletion_collector.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/file_trace_reader_writer.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/file_trace_reader_writer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/replayer_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/replayer_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/lock_manager.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/lock_manager.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/lock_tracker.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_manager.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_manager.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_manager_test.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_tracker.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_tracker.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_lock_manager.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/db.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/ft/comparator.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/ft/ft-status.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/memory.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_assert_subst.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_atomic.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_instrumentation.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_portability.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_pthread.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_race_tools.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/txn_subst.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/dbt.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/memarena.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/omt.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/omt_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/status.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction_db_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction_db_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction_db.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction_db.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/snapshot_checker.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_base.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_base.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_db_mutex_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_db_mutex_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_test.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_util.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_util.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn_db.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn_db.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn_db.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn_db.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/ttl/db_ttl_impl.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/ttl/db_ttl_impl.h
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/wal_filter.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/write_batch_with_index/write_batch_with_index.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/write_batch_with_index/write_batch_with_index_internal.cc
 create mode 100644 vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/write_batch_with_index/write_batch_with_index_internal.h
 create mode 100644 vendored/rocksdb-haskell-kadena/src/Database/RocksDB.hs
 create mode 100644 vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Base.hs
 create mode 100644 vendored/rocksdb-haskell-kadena/src/Database/RocksDB/C.hsc
 create mode 100644 vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Internal.hs
 create mode 100644 vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Iterator.hs
 create mode 100644 vendored/rocksdb-haskell-kadena/src/Database/RocksDB/ReadOptions.hs
 create mode 100644 vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Types.hs
 create mode 100755 vendored/rocksdb-haskell-kadena/unpack-rocksdb.sh

diff --git a/cabal.project b/cabal.project
index a6d0b218a0..7666a1b523 100644
--- a/cabal.project
+++ b/cabal.project
@@ -52,9 +52,6 @@ package pact
     -- avoid conflict with cryptonite during linking
     flags: +cryptonite-ed25519 -build-tool
 
-package rocksdb-haskell-kadena
-    ghc-options: -Wwarn -optc-w -optcxx-w
-
 package crypton
     flags: +support_pclmuldq
 
@@ -84,12 +81,6 @@ source-repository-package
     tag: 1d260bfaa48312b54851057885de4c43c420e35f
     --sha256: 0fzq4mzaszj5clvixx9mn1x6r4dcrnwvbl2znd0p5mmy5h2jr0hh
 
-source-repository-package
-    type: git
-    location: https://github.com/kadena-io/rocksdb-haskell.git
-    tag: cede9de2932a4ead1bd82fd7709b19ab7b19b33d
-    --sha256: 1dngd44va6h66vwpdpwmnj0zcky87m4vzykjwv49p2km12lwq9mf
-
 source-repository-package
     type: git
     location: https://github.com/kadena-io/rosetta.git
diff --git a/cabal.project.freeze b/cabal.project.freeze
index 1be9de4a7f..e23231bf7b 100644
--- a/cabal.project.freeze
+++ b/cabal.project.freeze
@@ -62,7 +62,7 @@ constraints: any.Boolean ==0.2.4,
              any.cassava ==0.5.3.1,
              any.cborg ==0.2.10.0,
              any.cereal ==0.5.8.3,
-             chainweb -debug -ed25519 -ghc-flags,
+             chainweb -debug -ed25519 -ghc-flags -rocksdb-with-tbb,
              any.character-ps ==0.1,
              any.charset ==0.3.10,
              any.chronos ==1.1.5.1,
@@ -228,8 +228,6 @@ constraints: any.Boolean ==0.2.4,
              any.resource-pool ==0.4.0.0,
              any.resourcet ==1.3.0,
              any.retry ==0.9.3.1,
-             any.rocksdb-haskell-kadena ==1.1.0,
-             rocksdb-haskell-kadena -with-tbb,
              any.rosetta ==1.0.1,
              any.rts ==1.0.2,
              any.run-st ==0.1.3.3,
diff --git a/chainweb.cabal b/chainweb.cabal
index d9b9105c4d..7913456e3c 100644
--- a/chainweb.cabal
+++ b/chainweb.cabal
@@ -85,6 +85,11 @@ flag ghc-flags
     default: False
     manual: True
 
+flag rocksdb-with-tbb
+    description: Build RocksDB with tbb support
+    default: False
+    manual: True
+
 common debugging-flags
     if flag(debug)
         ghc-options:
@@ -112,8 +117,458 @@ common warning-flags
 -- Chainweb Library
 -- -------------------------------------------------------------------------- --
 
+library rocksdb-haskell-kadena
+  import: warning-flags, debugging-flags
+  hs-source-dirs: vendored/rocksdb-haskell-kadena/src
+  default-language: Haskell2010
+
+  cxx-sources:
+    vendored/rocksdb-haskell-kadena/cpp/chainweb-rocksdb.cpp
+  include-dirs:
+    vendored/rocksdb-haskell-kadena/cpp
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include
+
+  install-includes:
+    vendored/rocksdb-haskell-kadena/cpp/chainweb-rocksdb.h
+  exposed-modules:
+    Database.RocksDB
+    Database.RocksDB.Base
+    Database.RocksDB.C
+    Database.RocksDB.Internal
+    Database.RocksDB.Iterator
+    Database.RocksDB.ReadOptions
+    Database.RocksDB.Types
+
+  other-extensions:
+    CPP,
+    ForeignFunctionInterface,
+    EmptyDataDecls,
+    RecordWildCards
+
+  build-depends:
+    , base >= 4 && < 5
+    , binary
+    , bytestring
+    , data-default
+    , directory
+    , filepath
+    , transformers
+
+  ghc-options:
+    -optc-w
+    -optcxx-w
+    -- -optl-static-libstdc++
+  -- ld-options: -static-libstdc++
+
+  -- All of the following libraries are optional. If one of the libraries
+  -- is not available on the build host, it can be commented out in the
+  -- list below.
+  --
+  extra-libraries:
+    snappy
+    gflags
+    z
+    bz2
+    lz4
+    zstd
+    stdc++
+
+  if flag(rocksdb-with-tbb)
+    extra-libraries:
+      tbb
+
+  -- These options are extracted from CXXFLAGS the RocksDB Makefile
+  cxx-options:
+    -fno-rtti
+    -g
+    -W
+    -Wextra
+    -Wall
+    -Wno-invalid-offsetof
+    -Wsign-compare
+    -Wshadow
+    -Wunused-parameter
+    -std=c++2a
+    -faligned-new
+    -DNO_THREEWAY_CRC32C
+    -DHAVE_ALIGNED_NEW
+    -DROCKSDB_PLATFORM_POSIX
+    -DROCKSDB_LIB_IO_POSIX
+    -DSNAPPY
+    -DGFLAGS=1
+    -DZLIB
+    -DBZIP2
+    -DLZ4
+    -DZSTD
+    -DROCKSDB_BACKTRACE
+    -mtune=generic
+    -DROCKSDB_SUPPORT_THREAD_LOCAL
+    -fno-omit-frame-pointer
+    -momit-leaf-frame-pointer
+    -DNDEBUG
+    -Woverloaded-virtual
+    -Wnon-virtual-dtor
+    -Wno-missing-field-initializers
+    -O2
+
+  if arch(x86_64)
+    cxx-options:
+      -march=x86-64
+      -msse3
+
+  if flag(rocksdb-with-tbb)
+    cxx-options:
+      -DTBB
+
+  if os(linux)
+    cxx-options:
+      -DOS_LINUX
+      -fno-builtin-memcmp
+      -DROCKSDB_FALLOCATE_PRESENT
+      -DROCKSDB_MALLOC_USABLE_SIZE
+      -DROCKSDB_PTHREAD_ADAPTIVE_MUTEX
+      -DROCKSDB_RANGESYNC_PRESENT
+      -DROCKSDB_SCHED_GETCPU_PRESENT
+      -DROCKSDB_AUXV_GETAUXVAL_PRESENT
+
+  if os(darwin)
+    cxx-options:
+      -DOS_MACOSX
+      -Wshorten-64-to-32
+      -DHAVE_FULLFSYNC
+
+  -- These sources are extracted from LIB_OBJECTS the RocksDB Makefile
+  cxx-sources:
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/cache/cache.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/cache/cache_entry_roles.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/cache/cache_key.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/cache/cache_helpers.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/cache/cache_reservation_manager.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/cache/charged_cache.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/cache/clock_cache.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/cache/lru_cache.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/cache/compressed_secondary_cache.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/cache/secondary_cache.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/cache/secondary_cache_adapter.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/cache/sharded_cache.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/arena_wrapped_db_iter.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_contents.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_fetcher.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_addition.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_builder.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_cache.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_garbage.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_meta.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_garbage_meter.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_format.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_sequential_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_writer.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_source.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/prefetch_buffer_collection.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/builder.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/c.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/column_family.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_iterator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_job.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_fifo.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_level.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_universal.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_service_job.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_state.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_outputs.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/sst_partitioner.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/subcompaction_state.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/convenience.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_filesnapshot.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/compacted_db_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_compaction_flush.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_debug.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_experimental.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_files.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_open.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_readonly.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_secondary.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_write.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_info_dumper.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_iter.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/dbformat.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/error_handler.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/event_helpers.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/experimental.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/external_sst_file_ingestion_job.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/file_indexer.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_job.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_scheduler.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/forward_iterator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/import_column_family_job.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/internal_stats.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/logs_with_prep_tracker.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_writer.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/malloc_stats.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable_list.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_helper.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_operator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/output_validator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/periodic_task_scheduler.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_del_aggregator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_tombstone_fragmenter.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/repair.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/seqno_to_time_mapping.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/snapshot_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_cache.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_properties_collector.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/transaction_log_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/trim_history_scheduler.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_builder.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit_handler.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_set.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_edit.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_manager.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wide/wide_column_serialization.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wide/wide_columns.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_batch.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_batch_base.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_controller.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_stall_stats.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_thread.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/composite_env.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_chroot.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_encryption.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_posix.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/file_system.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_posix.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_remap.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/file_system_tracer.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/io_posix.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/mock_env.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/unique_id_gen.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/delete_scheduler.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_prefetch_buffer.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_util.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/filename.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/line_file_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/random_access_file_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/read_write_util.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/readahead_raf.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sequence_file_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sst_file_manager_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/writable_file_writer.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/auto_roll_logger.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/event_logger.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/log_buffer.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/arena.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/concurrent_arena.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/jemalloc_nodump_allocator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memkind_kmem_allocator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memory_allocator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/alloc_tracker.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/hash_linklist_rep.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/hash_skiplist_rep.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/skiplistrep.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/vectorrep.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/write_buffer_manager.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram_windowing.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/in_memory_stats_history.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/instrumented_mutex.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/iostats_context.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_context.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_level.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/persistent_stats_history.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/statistics.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_updater.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_updater_debug.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_util.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_util_debug.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/cf_options.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/configurable.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/customizable.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/db_options.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_helper.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_parser.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/mmap.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_posix.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_default.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_win.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/io_win.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/port_win.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_logger.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_thread.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/stack_trace.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/adaptive/adaptive_table_factory.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/binary_search_index_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_builder.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_factory.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_iterator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_builder.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_cache.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefetcher.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefix_index.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_hash_index.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_footer.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_block_reader_common.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_policy.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/flush_block_policy.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/full_filter_block.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/hash_index_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_builder.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_reader_common.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/parsed_full_filter_block.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_filter_block.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_iterator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/reader_common.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/uncompression_dict_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_fetcher.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_builder.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_factory.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/format.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/get_context.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/iterator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/merging_iterator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/compaction_merging_iterator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/meta_blocks.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/persistent_cache_helper.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_bloom.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_builder.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_factory.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_index.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_key_coding.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_dumper.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_writer.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_factory.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_properties.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/two_level_iterator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/unique_id.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/transaction_test_util.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/dump/db_dump_tool.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record_handler.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record_result.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_replay.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/block_cache_tracer.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/io_tracer.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/async_file_reader.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/build_version.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/cleanable.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coding.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compaction_job_stats_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/comparator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression_context_cache.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/concurrent_task_limiter_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_arm64.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/data_structure.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/dynamic_bloom.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/murmurhash.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/random.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/rate_limiter.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_config.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/slice.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/file_checksum_helper.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/status.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/stderr_logger.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/string_util.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_local.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/threadpool_imp.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/xxhash.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/agg_merge/agg_merge.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/backup/backup_engine.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_compaction_filter.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_impl_filesnapshot.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_file.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cache_dump_load.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cache_dump_load_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/cassandra_compaction_filter.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/format.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/merge_operator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/checkpoint/checkpoint_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/convenience/info_log_finder.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/counted_fs.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/debug.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/env_mirror.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/env_timed.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_env.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_fs.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_secondary_cache.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/leveldb_options/leveldb_options.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/memory/memory_util.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/max.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/put.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/sortlist.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend2.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/uint64add.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/bytesxor.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/object_registry.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/option_change_migration/option_change_migration.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/options/options_util.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_file.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_metadata.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_tier.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/volatile_tier_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/simulator_cache/cache_simulator.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/simulator_cache/sim_cache.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/table_properties_collectors/compact_on_deletion_collector.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/file_trace_reader_writer.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/replayer_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/lock_manager.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_tracker.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_manager.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction_db_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction_db.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/snapshot_checker.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_base.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_db_mutex_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_util.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn_db.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn_db.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/ttl/db_ttl_impl.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/wal_filter.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/write_batch_with_index/write_batch_with_index.cc
+    vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/write_batch_with_index/write_batch_with_index_internal.cc
+
+  if os(windows)
+    cxx-sources:
+      vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_default.cc
+      vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_win.cc
+      vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/io_win.cc
+      vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/port_win.cc
+      vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_logger.cc
+      vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_thread.cc
+
 library chainweb-storage
-  -- import: warning-flags, debugging-flags
+  import: warning-flags, debugging-flags
   hs-source-dirs: vendored/chainweb-storage/src
   default-language: Haskell2010
   exposed-modules:
@@ -136,7 +591,7 @@ library chainweb-storage
     , memory >=0.14
     , mtl >= 2.2
     , nothunks >= 0.1.0.0
-    , rocksdb-haskell-kadena >=1.1.0
+    , chainweb:rocksdb-haskell-kadena
     , stm >=2.4
     , streaming >=0.2
     , temporary >=1.3
@@ -624,7 +1079,7 @@ test-suite chainweb-tests
         , resource-pool >= 0.4
         , resourcet >= 1.3
         , retry >= 0.7
-        , rocksdb-haskell-kadena >= 1.1.0
+        , chainweb:rocksdb-haskell-kadena
         , rosetta >= 1.0
         , scheduler >= 1.4
         , servant >= 0.20.1
@@ -810,7 +1265,7 @@ executable cwtool
         , retry >= 0.9
         , wreq >= 0.5
         , regex-tdfa >= 1.3
-        , rocksdb-haskell-kadena >= 1.1.0
+        , chainweb:rocksdb-haskell-kadena
         , safe-exceptions >= 0.1
         , servant-client >= 0.18.2
         , servant-client-core >= 0.18.2
diff --git a/vendored/rocksdb-haskell-kadena/CHANGELOG.md b/vendored/rocksdb-haskell-kadena/CHANGELOG.md
new file mode 100644
index 0000000000..2fee918d08
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/CHANGELOG.md
@@ -0,0 +1,35 @@
+## 1.0.1
+ * Add support for UTF-8 characters in a database's path
+
+## 1.0.0
+
+ * First version by Serokell
+
+## 0.3x.0
+
+ * ResourceT is no longer compulsory
+
+
+## 0.2.0
+
+ * requires LevelDB v1.7
+ * support for filter policy (LevelDB v1.5), either custom or using the built-in
+   bloom filter implementation
+ * write batch values no longer require a `memcpy` to be early-finalizer-safe
+   (introduced in 0.1.1)
+
+
+## 0.1.0
+
+ * memory (foreign pointers) is managed through
+   [ResourceT](http://hackage.haskell.org/package/resourcet). Note that this
+   requires to lift monadic actions inside the `MonadResource` monad, see the
+   examples.
+ * links against shared library (LevelDB v1.3 or higher)
+ * LevelDB 1.3 API fully supported (including custom comparators, excluding
+   custom environments)
+
+
+## 0.0.x
+
+ * experimental releases
diff --git a/vendored/rocksdb-haskell-kadena/LICENSE b/vendored/rocksdb-haskell-kadena/LICENSE
new file mode 100644
index 0000000000..1a671f848b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/LICENSE
@@ -0,0 +1,31 @@
+Copyright (c) 2011, Kim Altintop
+Copyright (c) 2014, Alexander Thiemann
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * Neither the name of Kim Altintop nor the names of other
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendored/rocksdb-haskell-kadena/README.md b/vendored/rocksdb-haskell-kadena/README.md
new file mode 100644
index 0000000000..eeaa639a44
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/README.md
@@ -0,0 +1,47 @@
+This library provides Haskell bindings to
+[RocksDB](http://rocksdb.org)
+
+[![Build Status](https://travis-ci.org/serokell/rocksdb-haskell.svg?branch=master)](https://travis-ci.org/serokell/rocksdb-haskell)
+[![Windows build status](https://ci.appveyor.com/api/projects/status/x4dmt91wuk8dglw0/branch/master?svg=true)](https://ci.appveyor.com/project/jagajaga/rocksdb-haskell)
+
+## History
+
+Version 0.1.0:
+
+* initial release of this fork.
+
+## Installation
+
+Prerequisites:
+
+* [GHC 8.*](http://www.haskell.org/ghc)
+* [Cabal](http://www.haskell.org/cabal), version 1.3 or higher
+* [RocksDB](http://rocksdb.org)
+* Optional: [Snappy](http://code.google.com/p/snappy),
+  if compression support is desired
+
+To install the latest version from hackage:
+
+```shell
+$ cabal install rocksdb-haskell
+```
+
+To install from checked-out source:
+
+```shell
+$ cabal install
+```
+
+## Notes
+
+This library is in very early stage and has seen very limited testing. Comments
+and contributions are welcome.
+
+## Bugs and Contributing
+
+Please report issues via http://github.com/serokell/rocksdb-haskell/issues.<br />
+Patches are best submitted as pull requests.
+
+## License
+
+BSD 3, see LICENSE file.
diff --git a/vendored/rocksdb-haskell-kadena/cpp/chainweb-rocksdb.cpp b/vendored/rocksdb-haskell-kadena/cpp/chainweb-rocksdb.cpp
new file mode 100644
index 0000000000..2a22d5dd8c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/cpp/chainweb-rocksdb.cpp
@@ -0,0 +1,132 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+// The code not taken from the LevelDB and RocksDB authors is
+// Copyright (c) 2022 Kadena.
+
+// copied from rocksdb/c.cc
+#include <stdlib.h>
+#include "rocksdb/db.h"
+#include "rocksdb/slice_transform.h"
+
+#include <vector>
+#include <unordered_set>
+#include <map>
+
+using rocksdb::DB;
+using rocksdb::DBOptions;
+using rocksdb::DbPath;
+using rocksdb::Env;
+using rocksdb::EnvOptions;
+using rocksdb::Options;
+using rocksdb::ReadOptions;
+using rocksdb::Slice;
+using rocksdb::SliceParts;
+using rocksdb::SliceTransform;
+using rocksdb::Snapshot;
+using rocksdb::SstFileWriter;
+using rocksdb::Status;
+using rocksdb::WritableFile;
+using rocksdb::WriteOptions;
+
+using std::shared_ptr;
+using std::vector;
+using std::unordered_set;
+using std::map;
+
+class DelimitedPrefixTransform : public SliceTransform {
+ protected:
+  std::vector<char> *delims;
+ public:
+
+  explicit DelimitedPrefixTransform(std::vector<char> *_delims) : delims(_delims) { }
+  ~DelimitedPrefixTransform() {
+  }
+
+  static const char* kClassName() { return "rocksdb-haskell.DelimitedPrefix"; }
+  static const char* kNickName() { return "table"; }
+  const char* Name() const override { return kClassName(); }
+  Slice Transform(const Slice& src) const override {
+    size_t prefix_end;
+    for (char c : *delims) {
+      if ((prefix_end = std::string(src.data()).find(c)) != std::string::npos) {
+        return Slice(src.data(), prefix_end);
+      }
+    }
+    // we must return the entire string if there's no symbol because of the law:
+    // prefix(prefix(str)) == prefix(str)
+    // this prevents us from implementing a real InDomain, which will go badly
+    // (redundant prefix seeking)
+    // *if* we mix prefixed and nonprefixed keys in the same database
+    return src;
+  }
+
+  bool InDomain(const Slice&) const override {
+    return true;
+  }
+
+  bool InRange(const Slice&) const override {
+    return true;
+  }
+
+  bool FullLengthEnabled(size_t*) const override {
+    return false;
+  }
+
+  bool SameResultWhenAppended(const Slice&) const override {
+    return false;
+  }
+};
+
+std::vector<char> std_delimiters { '$', '%' };
+
+DelimitedPrefixTransform dollar_percent_delimited_transform(&std_delimiters);
+
+extern "C" {
+
+struct rocksdb_t                 { DB*               rep; };
+struct rocksdb_options_t         { Options           rep; };
+struct rocksdb_writeoptions_t    { WriteOptions      rep; };
+struct rocksdb_readoptions_t     { ReadOptions       rep; };
+
+static bool SaveError(char** errptr, const Status& s) {
+  assert(errptr != nullptr);
+  if (s.ok()) {
+    return false;
+  } else if (*errptr == nullptr) {
+    *errptr = strdup(s.ToString().c_str());
+  } else {
+    // TODO(sanjay): Merge with existing error?
+    // This is a bug if *errptr is not created by malloc()
+    free(*errptr);
+    *errptr = strdup(s.ToString().c_str());
+  }
+  return true;
+}
+// end of code copied from rocksdb/c.cc
+
+void rocksdb_delete_range(rocksdb_t* db,
+                          const rocksdb_writeoptions_t* options,
+                          const char* start_key, size_t start_key_len,
+                          const char* end_key, size_t end_key_len,
+                          char** errptr) {
+      SaveError(errptr, db->rep->DeleteRange(options->rep, nullptr,
+                                         Slice(start_key, start_key_len),
+                                         Slice(end_key, end_key_len)));
+}
+
+void rocksdb_readoptions_set_auto_prefix_mode(rocksdb_readoptions_t* options, bool auto_prefix_mode) {
+  options->rep.auto_prefix_mode = auto_prefix_mode;
+}
+
+const void* rocksdb_options_table_prefix_extractor() {
+  return &dollar_percent_delimited_transform;
+}
+
+}
diff --git a/vendored/rocksdb-haskell-kadena/cpp/chainweb-rocksdb.h b/vendored/rocksdb-haskell-kadena/cpp/chainweb-rocksdb.h
new file mode 100644
index 0000000000..8ebb8829e4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/cpp/chainweb-rocksdb.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include "rocksdb/c.h"
+
+extern ROCKSDB_LIBRARY_API void rocksdb_delete_range(rocksdb_t* db,
+                          const rocksdb_writeoptions_t* options,
+                          const char* start_key, size_t start_key_len,
+                          const char* end_key, size_t end_key_len,
+                          char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_auto_prefix_mode(
+    rocksdb_readoptions_t* options, int auto_prefix_mode);
+
+extern ROCKSDB_LIBRARY_API void* rocksdb_options_table_prefix_extractor(
+    char *delims, int delimsLen);
\ No newline at end of file
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/arena_wrapped_db_iter.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/arena_wrapped_db_iter.cc
new file mode 100644
index 0000000000..2dbfff79a9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/arena_wrapped_db_iter.cc
@@ -0,0 +1,163 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/arena_wrapped_db_iter.h"
+
+#include "memory/arena.h"
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/options.h"
+#include "table/internal_iterator.h"
+#include "table/iterator_wrapper.h"
+#include "util/user_comparator_wrapper.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status ArenaWrappedDBIter::GetProperty(std::string prop_name,
+                                       std::string* prop) {
+  if (prop_name == "rocksdb.iterator.super-version-number") {
+    // First try to pass the value returned from inner iterator.
+    if (!db_iter_->GetProperty(prop_name, prop).ok()) {
+      *prop = std::to_string(sv_number_);
+    }
+    return Status::OK();
+  }
+  return db_iter_->GetProperty(prop_name, prop);
+}
+
+void ArenaWrappedDBIter::Init(
+    Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions,
+    const MutableCFOptions& mutable_cf_options, const Version* version,
+    const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iteration,
+    uint64_t version_number, ReadCallback* read_callback, DBImpl* db_impl,
+    ColumnFamilyData* cfd, bool expose_blob_index, bool allow_refresh) {
+  auto mem = arena_.AllocateAligned(sizeof(DBIter));
+  db_iter_ =
+      new (mem) DBIter(env, read_options, ioptions, mutable_cf_options,
+                       ioptions.user_comparator, /* iter */ nullptr, version,
+                       sequence, true, max_sequential_skip_in_iteration,
+                       read_callback, db_impl, cfd, expose_blob_index);
+  sv_number_ = version_number;
+  read_options_ = read_options;
+  allow_refresh_ = allow_refresh;
+  memtable_range_tombstone_iter_ = nullptr;
+  if (!env->GetFileSystem()->use_async_io()) {
+    read_options_.async_io = false;
+  }
+}
+
+Status ArenaWrappedDBIter::Refresh() {
+  if (cfd_ == nullptr || db_impl_ == nullptr || !allow_refresh_) {
+    return Status::NotSupported("Creating renew iterator is not allowed.");
+  }
+  assert(db_iter_ != nullptr);
+  // TODO(yiwu): For last_seq_same_as_publish_seq_==false, this is not the
+  // correct behavior. Will be corrected automatically when we take a snapshot
+  // here for the case of WritePreparedTxnDB.
+  uint64_t cur_sv_number = cfd_->GetSuperVersionNumber();
+  TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1");
+  TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2");
+  auto reinit_internal_iter = [&]() {
+    Env* env = db_iter_->env();
+    db_iter_->~DBIter();
+    arena_.~Arena();
+    new (&arena_) Arena();
+
+    SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_);
+    SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
+    if (read_callback_) {
+      read_callback_->Refresh(latest_seq);
+    }
+    Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options,
+         sv->current, latest_seq,
+         sv->mutable_cf_options.max_sequential_skip_in_iterations,
+         cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_,
+         allow_refresh_);
+
+    InternalIterator* internal_iter = db_impl_->NewInternalIterator(
+        read_options_, cfd_, sv, &arena_, latest_seq,
+        /* allow_unprepared_value */ true, /* db_iter */ this);
+    SetIterUnderDBIter(internal_iter);
+  };
+  while (true) {
+    if (sv_number_ != cur_sv_number) {
+      reinit_internal_iter();
+      break;
+    } else {
+      SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
+      // Refresh range-tombstones in MemTable
+      if (!read_options_.ignore_range_deletions) {
+        SuperVersion* sv = cfd_->GetThreadLocalSuperVersion(db_impl_);
+        TEST_SYNC_POINT_CALLBACK("ArenaWrappedDBIter::Refresh:SV", nullptr);
+        auto t = sv->mem->NewRangeTombstoneIterator(
+            read_options_, latest_seq, false /* immutable_memtable */);
+        if (!t || t->empty()) {
+          // If memtable_range_tombstone_iter_ points to a non-empty tombstone
+          // iterator, then it means sv->mem is not the memtable that
+          // memtable_range_tombstone_iter_ points to, so SV must have changed
+          // after the sv_number_ != cur_sv_number check above. We will fall
+          // back to re-init the InternalIterator, and the tombstone iterator
+          // will be freed during db_iter destruction there.
+          if (memtable_range_tombstone_iter_) {
+            assert(!*memtable_range_tombstone_iter_ ||
+                   sv_number_ != cfd_->GetSuperVersionNumber());
+          }
+          delete t;
+        } else {  // current mutable memtable has range tombstones
+          if (!memtable_range_tombstone_iter_) {
+            delete t;
+            db_impl_->ReturnAndCleanupSuperVersion(cfd_, sv);
+            // The memtable under DBIter did not have range tombstone before
+            // refresh.
+            reinit_internal_iter();
+            break;
+          } else {
+            delete *memtable_range_tombstone_iter_;
+            *memtable_range_tombstone_iter_ = new TruncatedRangeDelIterator(
+                std::unique_ptr<FragmentedRangeTombstoneIterator>(t),
+                &cfd_->internal_comparator(), nullptr, nullptr);
+          }
+        }
+        db_impl_->ReturnAndCleanupSuperVersion(cfd_, sv);
+      }
+      // Refresh latest sequence number
+      db_iter_->set_sequence(latest_seq);
+      db_iter_->set_valid(false);
+      // Check again if the latest super version number is changed
+      uint64_t latest_sv_number = cfd_->GetSuperVersionNumber();
+      if (latest_sv_number != cur_sv_number) {
+        // If the super version number is changed after refreshing,
+        // fallback to Re-Init the InternalIterator
+        cur_sv_number = latest_sv_number;
+        continue;
+      }
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+ArenaWrappedDBIter* NewArenaWrappedDbIterator(
+    Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions,
+    const MutableCFOptions& mutable_cf_options, const Version* version,
+    const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations,
+    uint64_t version_number, ReadCallback* read_callback, DBImpl* db_impl,
+    ColumnFamilyData* cfd, bool expose_blob_index, bool allow_refresh) {
+  ArenaWrappedDBIter* iter = new ArenaWrappedDBIter();
+  iter->Init(env, read_options, ioptions, mutable_cf_options, version, sequence,
+             max_sequential_skip_in_iterations, version_number, read_callback,
+             db_impl, cfd, expose_blob_index, allow_refresh);
+  if (db_impl != nullptr && cfd != nullptr && allow_refresh) {
+    iter->StoreRefreshInfo(db_impl, cfd, read_callback, expose_blob_index);
+  }
+
+  return iter;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/arena_wrapped_db_iter.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/arena_wrapped_db_iter.h
new file mode 100644
index 0000000000..f15be306d2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/arena_wrapped_db_iter.h
@@ -0,0 +1,127 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <stdint.h>
+
+#include <string>
+
+#include "db/db_impl/db_impl.h"
+#include "db/db_iter.h"
+#include "db/range_del_aggregator.h"
+#include "memory/arena.h"
+#include "options/cf_options.h"
+#include "rocksdb/db.h"
+#include "rocksdb/iterator.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Arena;
+class Version;
+
+// A wrapper iterator which wraps DB Iterator and the arena, with which the DB
+// iterator is supposed to be allocated. This class is used as an entry point of
+// a iterator hierarchy whose memory can be allocated inline. In that way,
+// accessing the iterator tree can be more cache friendly. It is also faster
+// to allocate.
+// When using the class's Iterator interface, the behavior is exactly
+// the same as the inner DBIter.
+class ArenaWrappedDBIter : public Iterator {
+ public:
+  ~ArenaWrappedDBIter() override {
+    if (db_iter_ != nullptr) {
+      db_iter_->~DBIter();
+    } else {
+      assert(false);
+    }
+  }
+
+  // Get the arena to be used to allocate memory for DBIter to be wrapped,
+  // as well as child iterators in it.
+  virtual Arena* GetArena() { return &arena_; }
+
+  const ReadOptions& GetReadOptions() { return read_options_; }
+
+  // Set the internal iterator wrapped inside the DB Iterator. Usually it is
+  // a merging iterator.
+  virtual void SetIterUnderDBIter(InternalIterator* iter) {
+    db_iter_->SetIter(iter);
+  }
+
+  void SetMemtableRangetombstoneIter(TruncatedRangeDelIterator** iter) {
+    memtable_range_tombstone_iter_ = iter;
+  }
+
+  bool Valid() const override { return db_iter_->Valid(); }
+  void SeekToFirst() override { db_iter_->SeekToFirst(); }
+  void SeekToLast() override { db_iter_->SeekToLast(); }
+  // 'target' does not contain timestamp, even if user timestamp feature is
+  // enabled.
+  void Seek(const Slice& target) override { db_iter_->Seek(target); }
+  void SeekForPrev(const Slice& target) override {
+    db_iter_->SeekForPrev(target);
+  }
+  void Next() override { db_iter_->Next(); }
+  void Prev() override { db_iter_->Prev(); }
+  Slice key() const override { return db_iter_->key(); }
+  Slice value() const override { return db_iter_->value(); }
+  const WideColumns& columns() const override { return db_iter_->columns(); }
+  Status status() const override { return db_iter_->status(); }
+  Slice timestamp() const override { return db_iter_->timestamp(); }
+  bool IsBlob() const { return db_iter_->IsBlob(); }
+
+  Status GetProperty(std::string prop_name, std::string* prop) override;
+
+  Status Refresh() override;
+
+  void Init(Env* env, const ReadOptions& read_options,
+            const ImmutableOptions& ioptions,
+            const MutableCFOptions& mutable_cf_options, const Version* version,
+            const SequenceNumber& sequence,
+            uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
+            ReadCallback* read_callback, DBImpl* db_impl, ColumnFamilyData* cfd,
+            bool expose_blob_index, bool allow_refresh);
+
+  // Store some parameters so we can refresh the iterator at a later point
+  // with these same params
+  void StoreRefreshInfo(DBImpl* db_impl, ColumnFamilyData* cfd,
+                        ReadCallback* read_callback, bool expose_blob_index) {
+    db_impl_ = db_impl;
+    cfd_ = cfd;
+    read_callback_ = read_callback;
+    expose_blob_index_ = expose_blob_index;
+  }
+
+ private:
+  DBIter* db_iter_ = nullptr;
+  Arena arena_;
+  uint64_t sv_number_;
+  ColumnFamilyData* cfd_ = nullptr;
+  DBImpl* db_impl_ = nullptr;
+  ReadOptions read_options_;
+  ReadCallback* read_callback_;
+  bool expose_blob_index_ = false;
+  bool allow_refresh_ = true;
+  // If this is nullptr, it means the mutable memtable does not contain range
+  // tombstone when added under this DBIter.
+  TruncatedRangeDelIterator** memtable_range_tombstone_iter_ = nullptr;
+};
+
+// Generate the arena wrapped iterator class.
+// `db_impl` and `cfd` are used for reneweal. If left null, renewal will not
+// be supported.
+extern ArenaWrappedDBIter* NewArenaWrappedDbIterator(
+    Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions,
+    const MutableCFOptions& mutable_cf_options, const Version* version,
+    const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations,
+    uint64_t version_number, ReadCallback* read_callback,
+    DBImpl* db_impl = nullptr, ColumnFamilyData* cfd = nullptr,
+    bool expose_blob_index = false, bool allow_refresh = true);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_constants.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_constants.h
new file mode 100644
index 0000000000..a5d09ac767
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_constants.h
@@ -0,0 +1,16 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstdint>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+constexpr uint64_t kInvalidBlobFileNumber = 0;
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_contents.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_contents.cc
new file mode 100644
index 0000000000..8b793c5d26
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_contents.cc
@@ -0,0 +1,42 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/blob/blob_contents.h"
+
+#include <cassert>
+
+#include "cache/cache_entry_roles.h"
+#include "cache/cache_helpers.h"
+#include "port/malloc.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+size_t BlobContents::ApproximateMemoryUsage() const {
+  size_t usage = 0;
+
+  if (allocation_) {
+    MemoryAllocator* const allocator = allocation_.get_deleter().allocator;
+
+    if (allocator) {
+      usage += allocator->UsableSize(allocation_.get(), data_.size());
+    } else {
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+      usage += malloc_usable_size(allocation_.get());
+#else
+      usage += data_.size();
+#endif
+    }
+  }
+
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+  usage += malloc_usable_size(const_cast<BlobContents*>(this));
+#else
+  usage += sizeof(*this);
+#endif
+
+  return usage;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_contents.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_contents.h
new file mode 100644
index 0000000000..15a672a0ae
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_contents.h
@@ -0,0 +1,59 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <memory>
+
+#include "memory/memory_allocator_impl.h"
+#include "rocksdb/advanced_cache.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A class representing a single uncompressed value read from a blob file.
+class BlobContents {
+ public:
+  BlobContents(CacheAllocationPtr&& allocation, size_t size)
+      : allocation_(std::move(allocation)), data_(allocation_.get(), size) {}
+
+  BlobContents(const BlobContents&) = delete;
+  BlobContents& operator=(const BlobContents&) = delete;
+
+  BlobContents(BlobContents&&) = default;
+  BlobContents& operator=(BlobContents&&) = default;
+
+  ~BlobContents() = default;
+
+  const Slice& data() const { return data_; }
+  size_t size() const { return data_.size(); }
+
+  size_t ApproximateMemoryUsage() const;
+
+  // For TypedCacheInterface
+  const Slice& ContentSlice() const { return data_; }
+  static constexpr CacheEntryRole kCacheEntryRole = CacheEntryRole::kBlobValue;
+
+ private:
+  CacheAllocationPtr allocation_;
+  Slice data_;
+};
+
+class BlobContentsCreator : public Cache::CreateContext {
+ public:
+  static void Create(std::unique_ptr<BlobContents>* out, size_t* out_charge,
+                     const Slice& contents, MemoryAllocator* alloc) {
+    auto raw = new BlobContents(AllocateAndCopyBlock(contents, alloc),
+                                contents.size());
+    out->reset(raw);
+    if (out_charge) {
+      *out_charge = raw->ApproximateMemoryUsage();
+    }
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_counting_iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_counting_iterator.h
new file mode 100644
index 0000000000..b21651f66f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_counting_iterator.h
@@ -0,0 +1,150 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cassert>
+
+#include "db/blob/blob_garbage_meter.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/status.h"
+#include "table/internal_iterator.h"
+#include "test_util/sync_point.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// An internal iterator that passes each key-value encountered to
+// BlobGarbageMeter as inflow in order to measure the total number and size of
+// blobs in the compaction input on a per-blob file basis.
+class BlobCountingIterator : public InternalIterator {
+ public:
+  BlobCountingIterator(InternalIterator* iter,
+                       BlobGarbageMeter* blob_garbage_meter)
+      : iter_(iter), blob_garbage_meter_(blob_garbage_meter) {
+    assert(iter_);
+    assert(blob_garbage_meter_);
+
+    UpdateAndCountBlobIfNeeded();
+  }
+
+  bool Valid() const override { return iter_->Valid() && status_.ok(); }
+
+  void SeekToFirst() override {
+    iter_->SeekToFirst();
+    UpdateAndCountBlobIfNeeded();
+  }
+
+  void SeekToLast() override {
+    iter_->SeekToLast();
+    UpdateAndCountBlobIfNeeded();
+  }
+
+  void Seek(const Slice& target) override {
+    iter_->Seek(target);
+    UpdateAndCountBlobIfNeeded();
+  }
+
+  void SeekForPrev(const Slice& target) override {
+    iter_->SeekForPrev(target);
+    UpdateAndCountBlobIfNeeded();
+  }
+
+  void Next() override {
+    assert(Valid());
+
+    iter_->Next();
+    UpdateAndCountBlobIfNeeded();
+  }
+
+  bool NextAndGetResult(IterateResult* result) override {
+    assert(Valid());
+
+    const bool res = iter_->NextAndGetResult(result);
+    UpdateAndCountBlobIfNeeded();
+    return res;
+  }
+
+  void Prev() override {
+    assert(Valid());
+
+    iter_->Prev();
+    UpdateAndCountBlobIfNeeded();
+  }
+
+  Slice key() const override {
+    assert(Valid());
+    return iter_->key();
+  }
+
+  Slice user_key() const override {
+    assert(Valid());
+    return iter_->user_key();
+  }
+
+  Slice value() const override {
+    assert(Valid());
+    return iter_->value();
+  }
+
+  Status status() const override { return status_; }
+
+  bool PrepareValue() override {
+    assert(Valid());
+    return iter_->PrepareValue();
+  }
+
+  bool MayBeOutOfLowerBound() override {
+    assert(Valid());
+    return iter_->MayBeOutOfLowerBound();
+  }
+
+  IterBoundCheck UpperBoundCheckResult() override {
+    assert(Valid());
+    return iter_->UpperBoundCheckResult();
+  }
+
+  void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
+    iter_->SetPinnedItersMgr(pinned_iters_mgr);
+  }
+
+  bool IsKeyPinned() const override {
+    assert(Valid());
+    return iter_->IsKeyPinned();
+  }
+
+  bool IsValuePinned() const override {
+    assert(Valid());
+    return iter_->IsValuePinned();
+  }
+
+  Status GetProperty(std::string prop_name, std::string* prop) override {
+    return iter_->GetProperty(prop_name, prop);
+  }
+
+  bool IsDeleteRangeSentinelKey() const override {
+    return iter_->IsDeleteRangeSentinelKey();
+  }
+
+ private:
+  void UpdateAndCountBlobIfNeeded() {
+    assert(!iter_->Valid() || iter_->status().ok());
+
+    if (!iter_->Valid()) {
+      status_ = iter_->status();
+      return;
+    }
+
+    TEST_SYNC_POINT(
+        "BlobCountingIterator::UpdateAndCountBlobIfNeeded:ProcessInFlow");
+
+    status_ = blob_garbage_meter_->ProcessInFlow(key(), value());
+  }
+
+  InternalIterator* iter_;
+  BlobGarbageMeter* blob_garbage_meter_;
+  Status status_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_fetcher.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_fetcher.cc
new file mode 100644
index 0000000000..124429f93a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_fetcher.cc
@@ -0,0 +1,34 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/blob/blob_fetcher.h"
+
+#include "db/version_set.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status BlobFetcher::FetchBlob(const Slice& user_key,
+                              const Slice& blob_index_slice,
+                              FilePrefetchBuffer* prefetch_buffer,
+                              PinnableSlice* blob_value,
+                              uint64_t* bytes_read) const {
+  assert(version_);
+
+  return version_->GetBlob(read_options_, user_key, blob_index_slice,
+                           prefetch_buffer, blob_value, bytes_read);
+}
+
+Status BlobFetcher::FetchBlob(const Slice& user_key,
+                              const BlobIndex& blob_index,
+                              FilePrefetchBuffer* prefetch_buffer,
+                              PinnableSlice* blob_value,
+                              uint64_t* bytes_read) const {
+  assert(version_);
+
+  return version_->GetBlob(read_options_, user_key, blob_index, prefetch_buffer,
+                           blob_value, bytes_read);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_fetcher.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_fetcher.h
new file mode 100644
index 0000000000..8aeaf965d2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_fetcher.h
@@ -0,0 +1,37 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/options.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Version;
+class Slice;
+class FilePrefetchBuffer;
+class PinnableSlice;
+class BlobIndex;
+
+// A thin wrapper around the blob retrieval functionality of Version.
+class BlobFetcher {
+ public:
+  BlobFetcher(const Version* version, const ReadOptions& read_options)
+      : version_(version), read_options_(read_options) {}
+
+  Status FetchBlob(const Slice& user_key, const Slice& blob_index_slice,
+                   FilePrefetchBuffer* prefetch_buffer,
+                   PinnableSlice* blob_value, uint64_t* bytes_read) const;
+
+  Status FetchBlob(const Slice& user_key, const BlobIndex& blob_index,
+                   FilePrefetchBuffer* prefetch_buffer,
+                   PinnableSlice* blob_value, uint64_t* bytes_read) const;
+
+ private:
+  const Version* version_;
+  ReadOptions read_options_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_addition.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_addition.cc
new file mode 100644
index 0000000000..71b1bb7fca
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_addition.cc
@@ -0,0 +1,156 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/blob/blob_file_addition.h"
+
+#include <ostream>
+#include <sstream>
+
+#include "logging/event_logger.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "test_util/sync_point.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Tags for custom fields. Note that these get persisted in the manifest,
+// so existing tags should not be modified.
+enum BlobFileAddition::CustomFieldTags : uint32_t {
+  kEndMarker,
+
+  // Add forward compatible fields here
+
+  /////////////////////////////////////////////////////////////////////
+
+  kForwardIncompatibleMask = 1 << 6,
+
+  // Add forward incompatible fields here
+};
+
+void BlobFileAddition::EncodeTo(std::string* output) const {
+  PutVarint64(output, blob_file_number_);
+  PutVarint64(output, total_blob_count_);
+  PutVarint64(output, total_blob_bytes_);
+  PutLengthPrefixedSlice(output, checksum_method_);
+  PutLengthPrefixedSlice(output, checksum_value_);
+
+  // Encode any custom fields here. The format to use is a Varint32 tag (see
+  // CustomFieldTags above) followed by a length prefixed slice. Unknown custom
+  // fields will be ignored during decoding unless they're in the forward
+  // incompatible range.
+
+  TEST_SYNC_POINT_CALLBACK("BlobFileAddition::EncodeTo::CustomFields", output);
+
+  PutVarint32(output, kEndMarker);
+}
+
+Status BlobFileAddition::DecodeFrom(Slice* input) {
+  constexpr char class_name[] = "BlobFileAddition";
+
+  if (!GetVarint64(input, &blob_file_number_)) {
+    return Status::Corruption(class_name, "Error decoding blob file number");
+  }
+
+  if (!GetVarint64(input, &total_blob_count_)) {
+    return Status::Corruption(class_name, "Error decoding total blob count");
+  }
+
+  if (!GetVarint64(input, &total_blob_bytes_)) {
+    return Status::Corruption(class_name, "Error decoding total blob bytes");
+  }
+
+  Slice checksum_method;
+  if (!GetLengthPrefixedSlice(input, &checksum_method)) {
+    return Status::Corruption(class_name, "Error decoding checksum method");
+  }
+  checksum_method_ = checksum_method.ToString();
+
+  Slice checksum_value;
+  if (!GetLengthPrefixedSlice(input, &checksum_value)) {
+    return Status::Corruption(class_name, "Error decoding checksum value");
+  }
+  checksum_value_ = checksum_value.ToString();
+
+  while (true) {
+    uint32_t custom_field_tag = 0;
+    if (!GetVarint32(input, &custom_field_tag)) {
+      return Status::Corruption(class_name, "Error decoding custom field tag");
+    }
+
+    if (custom_field_tag == kEndMarker) {
+      break;
+    }
+
+    if (custom_field_tag & kForwardIncompatibleMask) {
+      return Status::Corruption(
+          class_name, "Forward incompatible custom field encountered");
+    }
+
+    Slice custom_field_value;
+    if (!GetLengthPrefixedSlice(input, &custom_field_value)) {
+      return Status::Corruption(class_name,
+                                "Error decoding custom field value");
+    }
+  }
+
+  return Status::OK();
+}
+
+std::string BlobFileAddition::DebugString() const {
+  std::ostringstream oss;
+
+  oss << *this;
+
+  return oss.str();
+}
+
+std::string BlobFileAddition::DebugJSON() const {
+  JSONWriter jw;
+
+  jw << *this;
+
+  jw.EndObject();
+
+  return jw.Get();
+}
+
+bool operator==(const BlobFileAddition& lhs, const BlobFileAddition& rhs) {
+  return lhs.GetBlobFileNumber() == rhs.GetBlobFileNumber() &&
+         lhs.GetTotalBlobCount() == rhs.GetTotalBlobCount() &&
+         lhs.GetTotalBlobBytes() == rhs.GetTotalBlobBytes() &&
+         lhs.GetChecksumMethod() == rhs.GetChecksumMethod() &&
+         lhs.GetChecksumValue() == rhs.GetChecksumValue();
+}
+
+bool operator!=(const BlobFileAddition& lhs, const BlobFileAddition& rhs) {
+  return !(lhs == rhs);
+}
+
+std::ostream& operator<<(std::ostream& os,
+                         const BlobFileAddition& blob_file_addition) {
+  os << "blob_file_number: " << blob_file_addition.GetBlobFileNumber()
+     << " total_blob_count: " << blob_file_addition.GetTotalBlobCount()
+     << " total_blob_bytes: " << blob_file_addition.GetTotalBlobBytes()
+     << " checksum_method: " << blob_file_addition.GetChecksumMethod()
+     << " checksum_value: "
+     << Slice(blob_file_addition.GetChecksumValue()).ToString(/* hex */ true);
+
+  return os;
+}
+
+JSONWriter& operator<<(JSONWriter& jw,
+                       const BlobFileAddition& blob_file_addition) {
+  jw << "BlobFileNumber" << blob_file_addition.GetBlobFileNumber()
+     << "TotalBlobCount" << blob_file_addition.GetTotalBlobCount()
+     << "TotalBlobBytes" << blob_file_addition.GetTotalBlobBytes()
+     << "ChecksumMethod" << blob_file_addition.GetChecksumMethod()
+     << "ChecksumValue"
+     << Slice(blob_file_addition.GetChecksumValue()).ToString(/* hex */ true);
+
+  return jw;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_addition.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_addition.h
new file mode 100644
index 0000000000..43b1a0bcbe
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_addition.h
@@ -0,0 +1,67 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <iosfwd>
+#include <string>
+
+#include "db/blob/blob_constants.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class JSONWriter;
+class Slice;
+class Status;
+
+class BlobFileAddition {
+ public:
+  BlobFileAddition() = default;
+
+  BlobFileAddition(uint64_t blob_file_number, uint64_t total_blob_count,
+                   uint64_t total_blob_bytes, std::string checksum_method,
+                   std::string checksum_value)
+      : blob_file_number_(blob_file_number),
+        total_blob_count_(total_blob_count),
+        total_blob_bytes_(total_blob_bytes),
+        checksum_method_(std::move(checksum_method)),
+        checksum_value_(std::move(checksum_value)) {
+    assert(checksum_method_.empty() == checksum_value_.empty());
+  }
+
+  uint64_t GetBlobFileNumber() const { return blob_file_number_; }
+  uint64_t GetTotalBlobCount() const { return total_blob_count_; }
+  uint64_t GetTotalBlobBytes() const { return total_blob_bytes_; }
+  const std::string& GetChecksumMethod() const { return checksum_method_; }
+  const std::string& GetChecksumValue() const { return checksum_value_; }
+
+  void EncodeTo(std::string* output) const;
+  Status DecodeFrom(Slice* input);
+
+  std::string DebugString() const;
+  std::string DebugJSON() const;
+
+ private:
+  enum CustomFieldTags : uint32_t;
+
+  uint64_t blob_file_number_ = kInvalidBlobFileNumber;
+  uint64_t total_blob_count_ = 0;
+  uint64_t total_blob_bytes_ = 0;
+  std::string checksum_method_;
+  std::string checksum_value_;
+};
+
+bool operator==(const BlobFileAddition& lhs, const BlobFileAddition& rhs);
+bool operator!=(const BlobFileAddition& lhs, const BlobFileAddition& rhs);
+
+std::ostream& operator<<(std::ostream& os,
+                         const BlobFileAddition& blob_file_addition);
+JSONWriter& operator<<(JSONWriter& jw,
+                       const BlobFileAddition& blob_file_addition);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_builder.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_builder.cc
new file mode 100644
index 0000000000..21c1e5d417
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_builder.cc
@@ -0,0 +1,426 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/blob/blob_file_builder.h"
+
+#include <cassert>
+
+#include "db/blob/blob_contents.h"
+#include "db/blob/blob_file_addition.h"
+#include "db/blob/blob_file_completion_callback.h"
+#include "db/blob/blob_index.h"
+#include "db/blob/blob_log_format.h"
+#include "db/blob/blob_log_writer.h"
+#include "db/blob/blob_source.h"
+#include "db/event_helpers.h"
+#include "db/version_set.h"
+#include "file/filename.h"
+#include "file/read_write_util.h"
+#include "file/writable_file_writer.h"
+#include "logging/logging.h"
+#include "options/cf_options.h"
+#include "options/options_helper.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "test_util/sync_point.h"
+#include "trace_replay/io_tracer.h"
+#include "util/compression.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+BlobFileBuilder::BlobFileBuilder(
+    VersionSet* versions, FileSystem* fs,
+    const ImmutableOptions* immutable_options,
+    const MutableCFOptions* mutable_cf_options, const FileOptions* file_options,
+    std::string db_id, std::string db_session_id, int job_id,
+    uint32_t column_family_id, const std::string& column_family_name,
+    Env::IOPriority io_priority, Env::WriteLifeTimeHint write_hint,
+    const std::shared_ptr<IOTracer>& io_tracer,
+    BlobFileCompletionCallback* blob_callback,
+    BlobFileCreationReason creation_reason,
+    std::vector<std::string>* blob_file_paths,
+    std::vector<BlobFileAddition>* blob_file_additions)
+    : BlobFileBuilder([versions]() { return versions->NewFileNumber(); }, fs,
+                      immutable_options, mutable_cf_options, file_options,
+                      db_id, db_session_id, job_id, column_family_id,
+                      column_family_name, io_priority, write_hint, io_tracer,
+                      blob_callback, creation_reason, blob_file_paths,
+                      blob_file_additions) {}
+
+BlobFileBuilder::BlobFileBuilder(
+    std::function<uint64_t()> file_number_generator, FileSystem* fs,
+    const ImmutableOptions* immutable_options,
+    const MutableCFOptions* mutable_cf_options, const FileOptions* file_options,
+    std::string db_id, std::string db_session_id, int job_id,
+    uint32_t column_family_id, const std::string& column_family_name,
+    Env::IOPriority io_priority, Env::WriteLifeTimeHint write_hint,
+    const std::shared_ptr<IOTracer>& io_tracer,
+    BlobFileCompletionCallback* blob_callback,
+    BlobFileCreationReason creation_reason,
+    std::vector<std::string>* blob_file_paths,
+    std::vector<BlobFileAddition>* blob_file_additions)
+    : file_number_generator_(std::move(file_number_generator)),
+      fs_(fs),
+      immutable_options_(immutable_options),
+      min_blob_size_(mutable_cf_options->min_blob_size),
+      blob_file_size_(mutable_cf_options->blob_file_size),
+      blob_compression_type_(mutable_cf_options->blob_compression_type),
+      prepopulate_blob_cache_(mutable_cf_options->prepopulate_blob_cache),
+      file_options_(file_options),
+      db_id_(std::move(db_id)),
+      db_session_id_(std::move(db_session_id)),
+      job_id_(job_id),
+      column_family_id_(column_family_id),
+      column_family_name_(column_family_name),
+      io_priority_(io_priority),
+      write_hint_(write_hint),
+      io_tracer_(io_tracer),
+      blob_callback_(blob_callback),
+      creation_reason_(creation_reason),
+      blob_file_paths_(blob_file_paths),
+      blob_file_additions_(blob_file_additions),
+      blob_count_(0),
+      blob_bytes_(0) {
+  assert(file_number_generator_);
+  assert(fs_);
+  assert(immutable_options_);
+  assert(file_options_);
+  assert(blob_file_paths_);
+  assert(blob_file_paths_->empty());
+  assert(blob_file_additions_);
+  assert(blob_file_additions_->empty());
+}
+
+BlobFileBuilder::~BlobFileBuilder() = default;
+
+Status BlobFileBuilder::Add(const Slice& key, const Slice& value,
+                            std::string* blob_index) {
+  assert(blob_index);
+  assert(blob_index->empty());
+
+  if (value.size() < min_blob_size_) {
+    return Status::OK();
+  }
+
+  {
+    const Status s = OpenBlobFileIfNeeded();
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  Slice blob = value;
+  std::string compressed_blob;
+
+  {
+    const Status s = CompressBlobIfNeeded(&blob, &compressed_blob);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  uint64_t blob_file_number = 0;
+  uint64_t blob_offset = 0;
+
+  {
+    const Status s =
+        WriteBlobToFile(key, blob, &blob_file_number, &blob_offset);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  {
+    const Status s = CloseBlobFileIfNeeded();
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  {
+    const Status s =
+        PutBlobIntoCacheIfNeeded(value, blob_file_number, blob_offset);
+    if (!s.ok()) {
+      ROCKS_LOG_WARN(immutable_options_->info_log,
+                     "Failed to pre-populate the blob into blob cache: %s",
+                     s.ToString().c_str());
+    }
+  }
+
+  BlobIndex::EncodeBlob(blob_index, blob_file_number, blob_offset, blob.size(),
+                        blob_compression_type_);
+
+  return Status::OK();
+}
+
+Status BlobFileBuilder::Finish() {
+  if (!IsBlobFileOpen()) {
+    return Status::OK();
+  }
+
+  return CloseBlobFile();
+}
+
+bool BlobFileBuilder::IsBlobFileOpen() const { return !!writer_; }
+
+Status BlobFileBuilder::OpenBlobFileIfNeeded() {
+  if (IsBlobFileOpen()) {
+    return Status::OK();
+  }
+
+  assert(!blob_count_);
+  assert(!blob_bytes_);
+
+  assert(file_number_generator_);
+  const uint64_t blob_file_number = file_number_generator_();
+
+  assert(immutable_options_);
+  assert(!immutable_options_->cf_paths.empty());
+  std::string blob_file_path =
+      BlobFileName(immutable_options_->cf_paths.front().path, blob_file_number);
+
+  if (blob_callback_) {
+    blob_callback_->OnBlobFileCreationStarted(
+        blob_file_path, column_family_name_, job_id_, creation_reason_);
+  }
+
+  std::unique_ptr<FSWritableFile> file;
+
+  {
+    assert(file_options_);
+    Status s = NewWritableFile(fs_, blob_file_path, &file, *file_options_);
+
+    TEST_SYNC_POINT_CALLBACK(
+        "BlobFileBuilder::OpenBlobFileIfNeeded:NewWritableFile", &s);
+
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  // Note: files get added to blob_file_paths_ right after the open, so they
+  // can be cleaned up upon failure. Contrast this with blob_file_additions_,
+  // which only contains successfully written files.
+  assert(blob_file_paths_);
+  blob_file_paths_->emplace_back(std::move(blob_file_path));
+
+  assert(file);
+  file->SetIOPriority(io_priority_);
+  file->SetWriteLifeTimeHint(write_hint_);
+  FileTypeSet tmp_set = immutable_options_->checksum_handoff_file_types;
+  Statistics* const statistics = immutable_options_->stats;
+  std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
+      std::move(file), blob_file_paths_->back(), *file_options_,
+      immutable_options_->clock, io_tracer_, statistics,
+      immutable_options_->listeners,
+      immutable_options_->file_checksum_gen_factory.get(),
+      tmp_set.Contains(FileType::kBlobFile), false));
+
+  constexpr bool do_flush = false;
+
+  std::unique_ptr<BlobLogWriter> blob_log_writer(new BlobLogWriter(
+      std::move(file_writer), immutable_options_->clock, statistics,
+      blob_file_number, immutable_options_->use_fsync, do_flush));
+
+  constexpr bool has_ttl = false;
+  constexpr ExpirationRange expiration_range;
+
+  BlobLogHeader header(column_family_id_, blob_compression_type_, has_ttl,
+                       expiration_range);
+
+  {
+    Status s = blob_log_writer->WriteHeader(header);
+
+    TEST_SYNC_POINT_CALLBACK(
+        "BlobFileBuilder::OpenBlobFileIfNeeded:WriteHeader", &s);
+
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  writer_ = std::move(blob_log_writer);
+
+  assert(IsBlobFileOpen());
+
+  return Status::OK();
+}
+
+Status BlobFileBuilder::CompressBlobIfNeeded(
+    Slice* blob, std::string* compressed_blob) const {
+  assert(blob);
+  assert(compressed_blob);
+  assert(compressed_blob->empty());
+  assert(immutable_options_);
+
+  if (blob_compression_type_ == kNoCompression) {
+    return Status::OK();
+  }
+
+  // TODO: allow user CompressionOptions, including max_compressed_bytes_per_kb
+  CompressionOptions opts;
+  CompressionContext context(blob_compression_type_);
+  constexpr uint64_t sample_for_compression = 0;
+
+  CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(),
+                       blob_compression_type_, sample_for_compression);
+
+  constexpr uint32_t compression_format_version = 2;
+
+  bool success = false;
+
+  {
+    StopWatch stop_watch(immutable_options_->clock, immutable_options_->stats,
+                         BLOB_DB_COMPRESSION_MICROS);
+    success =
+        CompressData(*blob, info, compression_format_version, compressed_blob);
+  }
+
+  if (!success) {
+    return Status::Corruption("Error compressing blob");
+  }
+
+  *blob = Slice(*compressed_blob);
+
+  return Status::OK();
+}
+
+Status BlobFileBuilder::WriteBlobToFile(const Slice& key, const Slice& blob,
+                                        uint64_t* blob_file_number,
+                                        uint64_t* blob_offset) {
+  assert(IsBlobFileOpen());
+  assert(blob_file_number);
+  assert(blob_offset);
+
+  uint64_t key_offset = 0;
+
+  Status s = writer_->AddRecord(key, blob, &key_offset, blob_offset);
+
+  TEST_SYNC_POINT_CALLBACK("BlobFileBuilder::WriteBlobToFile:AddRecord", &s);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  *blob_file_number = writer_->get_log_number();
+
+  ++blob_count_;
+  blob_bytes_ += BlobLogRecord::kHeaderSize + key.size() + blob.size();
+
+  return Status::OK();
+}
+
+Status BlobFileBuilder::CloseBlobFile() {
+  assert(IsBlobFileOpen());
+
+  BlobLogFooter footer;
+  footer.blob_count = blob_count_;
+
+  std::string checksum_method;
+  std::string checksum_value;
+
+  Status s = writer_->AppendFooter(footer, &checksum_method, &checksum_value);
+
+  TEST_SYNC_POINT_CALLBACK("BlobFileBuilder::WriteBlobToFile:AppendFooter", &s);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  const uint64_t blob_file_number = writer_->get_log_number();
+
+  if (blob_callback_) {
+    s = blob_callback_->OnBlobFileCompleted(
+        blob_file_paths_->back(), column_family_name_, job_id_,
+        blob_file_number, creation_reason_, s, checksum_value, checksum_method,
+        blob_count_, blob_bytes_);
+  }
+
+  assert(blob_file_additions_);
+  blob_file_additions_->emplace_back(blob_file_number, blob_count_, blob_bytes_,
+                                     std::move(checksum_method),
+                                     std::move(checksum_value));
+
+  assert(immutable_options_);
+  ROCKS_LOG_INFO(immutable_options_->logger,
+                 "[%s] [JOB %d] Generated blob file #%" PRIu64 ": %" PRIu64
+                 " total blobs, %" PRIu64 " total bytes",
+                 column_family_name_.c_str(), job_id_, blob_file_number,
+                 blob_count_, blob_bytes_);
+
+  writer_.reset();
+  blob_count_ = 0;
+  blob_bytes_ = 0;
+
+  return s;
+}
+
+Status BlobFileBuilder::CloseBlobFileIfNeeded() {
+  assert(IsBlobFileOpen());
+
+  const WritableFileWriter* const file_writer = writer_->file();
+  assert(file_writer);
+
+  if (file_writer->GetFileSize() < blob_file_size_) {
+    return Status::OK();
+  }
+
+  return CloseBlobFile();
+}
+
+void BlobFileBuilder::Abandon(const Status& s) {
+  if (!IsBlobFileOpen()) {
+    return;
+  }
+  if (blob_callback_) {
+    // BlobFileBuilder::Abandon() is called because of error while writing to
+    // Blob files. So we can ignore the below error.
+    blob_callback_
+        ->OnBlobFileCompleted(blob_file_paths_->back(), column_family_name_,
+                              job_id_, writer_->get_log_number(),
+                              creation_reason_, s, "", "", blob_count_,
+                              blob_bytes_)
+        .PermitUncheckedError();
+  }
+
+  writer_.reset();
+  blob_count_ = 0;
+  blob_bytes_ = 0;
+}
+
+Status BlobFileBuilder::PutBlobIntoCacheIfNeeded(const Slice& blob,
+                                                 uint64_t blob_file_number,
+                                                 uint64_t blob_offset) const {
+  Status s = Status::OK();
+
+  BlobSource::SharedCacheInterface blob_cache{immutable_options_->blob_cache};
+  auto statistics = immutable_options_->statistics.get();
+  bool warm_cache =
+      prepopulate_blob_cache_ == PrepopulateBlobCache::kFlushOnly &&
+      creation_reason_ == BlobFileCreationReason::kFlush;
+
+  if (blob_cache && warm_cache) {
+    const OffsetableCacheKey base_cache_key(db_id_, db_session_id_,
+                                            blob_file_number);
+    const CacheKey cache_key = base_cache_key.WithOffset(blob_offset);
+    const Slice key = cache_key.AsSlice();
+
+    const Cache::Priority priority = Cache::Priority::BOTTOM;
+
+    s = blob_cache.InsertSaved(key, blob, nullptr /*context*/, priority,
+                               immutable_options_->lowest_used_cache_tier);
+
+    if (s.ok()) {
+      RecordTick(statistics, BLOB_DB_CACHE_ADD);
+      RecordTick(statistics, BLOB_DB_CACHE_BYTES_WRITE, blob.size());
+    } else {
+      RecordTick(statistics, BLOB_DB_CACHE_ADD_FAILURES);
+    }
+  }
+
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_builder.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_builder.h
new file mode 100644
index 0000000000..8e7aab502d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_builder.h
@@ -0,0 +1,112 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <cinttypes>
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/advanced_options.h"
+#include "rocksdb/compression_type.h"
+#include "rocksdb/env.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class VersionSet;
+class FileSystem;
+class SystemClock;
+struct ImmutableOptions;
+struct MutableCFOptions;
+struct FileOptions;
+class BlobFileAddition;
+class Status;
+class Slice;
+class BlobLogWriter;
+class IOTracer;
+class BlobFileCompletionCallback;
+
+class BlobFileBuilder {
+ public:
+  BlobFileBuilder(VersionSet* versions, FileSystem* fs,
+                  const ImmutableOptions* immutable_options,
+                  const MutableCFOptions* mutable_cf_options,
+                  const FileOptions* file_options, std::string db_id,
+                  std::string db_session_id, int job_id,
+                  uint32_t column_family_id,
+                  const std::string& column_family_name,
+                  Env::IOPriority io_priority,
+                  Env::WriteLifeTimeHint write_hint,
+                  const std::shared_ptr<IOTracer>& io_tracer,
+                  BlobFileCompletionCallback* blob_callback,
+                  BlobFileCreationReason creation_reason,
+                  std::vector<std::string>* blob_file_paths,
+                  std::vector<BlobFileAddition>* blob_file_additions);
+
+  BlobFileBuilder(std::function<uint64_t()> file_number_generator,
+                  FileSystem* fs, const ImmutableOptions* immutable_options,
+                  const MutableCFOptions* mutable_cf_options,
+                  const FileOptions* file_options, std::string db_id,
+                  std::string db_session_id, int job_id,
+                  uint32_t column_family_id,
+                  const std::string& column_family_name,
+                  Env::IOPriority io_priority,
+                  Env::WriteLifeTimeHint write_hint,
+                  const std::shared_ptr<IOTracer>& io_tracer,
+                  BlobFileCompletionCallback* blob_callback,
+                  BlobFileCreationReason creation_reason,
+                  std::vector<std::string>* blob_file_paths,
+                  std::vector<BlobFileAddition>* blob_file_additions);
+
+  BlobFileBuilder(const BlobFileBuilder&) = delete;
+  BlobFileBuilder& operator=(const BlobFileBuilder&) = delete;
+
+  ~BlobFileBuilder();
+
+  Status Add(const Slice& key, const Slice& value, std::string* blob_index);
+  Status Finish();
+  void Abandon(const Status& s);
+
+ private:
+  bool IsBlobFileOpen() const;
+  Status OpenBlobFileIfNeeded();
+  Status CompressBlobIfNeeded(Slice* blob, std::string* compressed_blob) const;
+  Status WriteBlobToFile(const Slice& key, const Slice& blob,
+                         uint64_t* blob_file_number, uint64_t* blob_offset);
+  Status CloseBlobFile();
+  Status CloseBlobFileIfNeeded();
+
+  Status PutBlobIntoCacheIfNeeded(const Slice& blob, uint64_t blob_file_number,
+                                  uint64_t blob_offset) const;
+
+  std::function<uint64_t()> file_number_generator_;
+  FileSystem* fs_;
+  const ImmutableOptions* immutable_options_;
+  uint64_t min_blob_size_;
+  uint64_t blob_file_size_;
+  CompressionType blob_compression_type_;
+  PrepopulateBlobCache prepopulate_blob_cache_;
+  const FileOptions* file_options_;
+  const std::string db_id_;
+  const std::string db_session_id_;
+  int job_id_;
+  uint32_t column_family_id_;
+  std::string column_family_name_;
+  Env::IOPriority io_priority_;
+  Env::WriteLifeTimeHint write_hint_;
+  std::shared_ptr<IOTracer> io_tracer_;
+  BlobFileCompletionCallback* blob_callback_;
+  BlobFileCreationReason creation_reason_;
+  std::vector<std::string>* blob_file_paths_;
+  std::vector<BlobFileAddition>* blob_file_additions_;
+  std::unique_ptr<BlobLogWriter> writer_;
+  uint64_t blob_count_;
+  uint64_t blob_bytes_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_cache.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_cache.cc
new file mode 100644
index 0000000000..deebf8d344
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_cache.cc
@@ -0,0 +1,101 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/blob/blob_file_cache.h"
+
+#include <cassert>
+#include <memory>
+
+#include "db/blob/blob_file_reader.h"
+#include "options/cf_options.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/slice.h"
+#include "test_util/sync_point.h"
+#include "trace_replay/io_tracer.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+BlobFileCache::BlobFileCache(Cache* cache,
+                             const ImmutableOptions* immutable_options,
+                             const FileOptions* file_options,
+                             uint32_t column_family_id,
+                             HistogramImpl* blob_file_read_hist,
+                             const std::shared_ptr<IOTracer>& io_tracer)
+    : cache_(cache),
+      mutex_(kNumberOfMutexStripes, kGetSliceNPHash64UnseededFnPtr),
+      immutable_options_(immutable_options),
+      file_options_(file_options),
+      column_family_id_(column_family_id),
+      blob_file_read_hist_(blob_file_read_hist),
+      io_tracer_(io_tracer) {
+  assert(cache_);
+  assert(immutable_options_);
+  assert(file_options_);
+}
+
+Status BlobFileCache::GetBlobFileReader(
+    const ReadOptions& read_options, uint64_t blob_file_number,
+    CacheHandleGuard<BlobFileReader>* blob_file_reader) {
+  assert(blob_file_reader);
+  assert(blob_file_reader->IsEmpty());
+
+  const Slice key = GetSliceForKey(&blob_file_number);
+
+  assert(cache_);
+
+  TypedHandle* handle = cache_.Lookup(key);
+  if (handle) {
+    *blob_file_reader = cache_.Guard(handle);
+    return Status::OK();
+  }
+
+  TEST_SYNC_POINT("BlobFileCache::GetBlobFileReader:DoubleCheck");
+
+  // Check again while holding mutex
+  MutexLock lock(mutex_.get(key));
+
+  handle = cache_.Lookup(key);
+  if (handle) {
+    *blob_file_reader = cache_.Guard(handle);
+    return Status::OK();
+  }
+
+  assert(immutable_options_);
+  Statistics* const statistics = immutable_options_->stats;
+
+  RecordTick(statistics, NO_FILE_OPENS);
+
+  std::unique_ptr<BlobFileReader> reader;
+
+  {
+    assert(file_options_);
+    const Status s = BlobFileReader::Create(
+        *immutable_options_, read_options, *file_options_, column_family_id_,
+        blob_file_read_hist_, blob_file_number, io_tracer_, &reader);
+    if (!s.ok()) {
+      RecordTick(statistics, NO_FILE_ERRORS);
+      return s;
+    }
+  }
+
+  {
+    constexpr size_t charge = 1;
+
+    const Status s = cache_.Insert(key, reader.get(), charge, &handle);
+    if (!s.ok()) {
+      RecordTick(statistics, NO_FILE_ERRORS);
+      return s;
+    }
+  }
+
+  reader.release();
+
+  *blob_file_reader = cache_.Guard(handle);
+
+  return Status::OK();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_cache.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_cache.h
new file mode 100644
index 0000000000..a80be7c55e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_cache.h
@@ -0,0 +1,56 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cinttypes>
+
+#include "cache/typed_cache.h"
+#include "db/blob/blob_file_reader.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Cache;
+struct ImmutableOptions;
+struct FileOptions;
+class HistogramImpl;
+class Status;
+class Slice;
+class IOTracer;
+
+class BlobFileCache {
+ public:
+  BlobFileCache(Cache* cache, const ImmutableOptions* immutable_options,
+                const FileOptions* file_options, uint32_t column_family_id,
+                HistogramImpl* blob_file_read_hist,
+                const std::shared_ptr<IOTracer>& io_tracer);
+
+  BlobFileCache(const BlobFileCache&) = delete;
+  BlobFileCache& operator=(const BlobFileCache&) = delete;
+
+  Status GetBlobFileReader(const ReadOptions& read_options,
+                           uint64_t blob_file_number,
+                           CacheHandleGuard<BlobFileReader>* blob_file_reader);
+
+ private:
+  using CacheInterface =
+      BasicTypedCacheInterface<BlobFileReader, CacheEntryRole::kMisc>;
+  using TypedHandle = CacheInterface::TypedHandle;
+  CacheInterface cache_;
+  // Note: mutex_ below is used to guard against multiple threads racing to open
+  // the same file.
+  Striped<port::Mutex, Slice> mutex_;
+  const ImmutableOptions* immutable_options_;
+  const FileOptions* file_options_;
+  uint32_t column_family_id_;
+  HistogramImpl* blob_file_read_hist_;
+  std::shared_ptr<IOTracer> io_tracer_;
+
+  static constexpr size_t kNumberOfMutexStripes = 1 << 7;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_completion_callback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_completion_callback.h
new file mode 100644
index 0000000000..9159677315
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_completion_callback.h
@@ -0,0 +1,84 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include "db/error_handler.h"
+#include "db/event_helpers.h"
+#include "file/sst_file_manager_impl.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class BlobFileCompletionCallback {
+ public:
+  BlobFileCompletionCallback(
+      SstFileManager* sst_file_manager, InstrumentedMutex* mutex,
+      ErrorHandler* error_handler, EventLogger* event_logger,
+      const std::vector<std::shared_ptr<EventListener>>& listeners,
+      const std::string& dbname)
+      : event_logger_(event_logger), listeners_(listeners), dbname_(dbname) {
+    sst_file_manager_ = sst_file_manager;
+    mutex_ = mutex;
+    error_handler_ = error_handler;
+  }
+
+  void OnBlobFileCreationStarted(const std::string& file_name,
+                                 const std::string& column_family_name,
+                                 int job_id,
+                                 BlobFileCreationReason creation_reason) {
+    // Notify the listeners.
+    EventHelpers::NotifyBlobFileCreationStarted(listeners_, dbname_,
+                                                column_family_name, file_name,
+                                                job_id, creation_reason);
+  }
+
+  Status OnBlobFileCompleted(const std::string& file_name,
+                             const std::string& column_family_name, int job_id,
+                             uint64_t file_number,
+                             BlobFileCreationReason creation_reason,
+                             const Status& report_status,
+                             const std::string& checksum_value,
+                             const std::string& checksum_method,
+                             uint64_t blob_count, uint64_t blob_bytes) {
+    Status s;
+
+    auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager_);
+    if (sfm) {
+      // Report new blob files to SstFileManagerImpl
+      s = sfm->OnAddFile(file_name);
+      if (sfm->IsMaxAllowedSpaceReached()) {
+        s = Status::SpaceLimit("Max allowed space was reached");
+        TEST_SYNC_POINT(
+            "BlobFileCompletionCallback::CallBack::MaxAllowedSpaceReached");
+        InstrumentedMutexLock l(mutex_);
+        error_handler_->SetBGError(s, BackgroundErrorReason::kFlush);
+      }
+    }
+
+    // Notify the listeners.
+    EventHelpers::LogAndNotifyBlobFileCreationFinished(
+        event_logger_, listeners_, dbname_, column_family_name, file_name,
+        job_id, file_number, creation_reason,
+        (!report_status.ok() ? report_status : s),
+        (checksum_value.empty() ? kUnknownFileChecksum : checksum_value),
+        (checksum_method.empty() ? kUnknownFileChecksumFuncName
+                                 : checksum_method),
+        blob_count, blob_bytes);
+    return s;
+  }
+
+ private:
+  SstFileManager* sst_file_manager_;
+  InstrumentedMutex* mutex_;
+  ErrorHandler* error_handler_;
+  EventLogger* event_logger_;
+  std::vector<std::shared_ptr<EventListener>> listeners_;
+  std::string dbname_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_garbage.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_garbage.cc
new file mode 100644
index 0000000000..52c336f499
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_garbage.cc
@@ -0,0 +1,134 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/blob/blob_file_garbage.h"
+
+#include <ostream>
+#include <sstream>
+
+#include "logging/event_logger.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "test_util/sync_point.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Tags for custom fields. Note that these get persisted in the manifest,
+// so existing tags should not be modified.
+enum BlobFileGarbage::CustomFieldTags : uint32_t {
+  kEndMarker,
+
+  // Add forward compatible fields here
+
+  /////////////////////////////////////////////////////////////////////
+
+  kForwardIncompatibleMask = 1 << 6,
+
+  // Add forward incompatible fields here
+};
+
+void BlobFileGarbage::EncodeTo(std::string* output) const {
+  PutVarint64(output, blob_file_number_);
+  PutVarint64(output, garbage_blob_count_);
+  PutVarint64(output, garbage_blob_bytes_);
+
+  // Encode any custom fields here. The format to use is a Varint32 tag (see
+  // CustomFieldTags above) followed by a length prefixed slice. Unknown custom
+  // fields will be ignored during decoding unless they're in the forward
+  // incompatible range.
+
+  TEST_SYNC_POINT_CALLBACK("BlobFileGarbage::EncodeTo::CustomFields", output);
+
+  PutVarint32(output, kEndMarker);
+}
+
+Status BlobFileGarbage::DecodeFrom(Slice* input) {
+  constexpr char class_name[] = "BlobFileGarbage";
+
+  if (!GetVarint64(input, &blob_file_number_)) {
+    return Status::Corruption(class_name, "Error decoding blob file number");
+  }
+
+  if (!GetVarint64(input, &garbage_blob_count_)) {
+    return Status::Corruption(class_name, "Error decoding garbage blob count");
+  }
+
+  if (!GetVarint64(input, &garbage_blob_bytes_)) {
+    return Status::Corruption(class_name, "Error decoding garbage blob bytes");
+  }
+
+  while (true) {
+    uint32_t custom_field_tag = 0;
+    if (!GetVarint32(input, &custom_field_tag)) {
+      return Status::Corruption(class_name, "Error decoding custom field tag");
+    }
+
+    if (custom_field_tag == kEndMarker) {
+      break;
+    }
+
+    if (custom_field_tag & kForwardIncompatibleMask) {
+      return Status::Corruption(
+          class_name, "Forward incompatible custom field encountered");
+    }
+
+    Slice custom_field_value;
+    if (!GetLengthPrefixedSlice(input, &custom_field_value)) {
+      return Status::Corruption(class_name,
+                                "Error decoding custom field value");
+    }
+  }
+
+  return Status::OK();
+}
+
+std::string BlobFileGarbage::DebugString() const {
+  std::ostringstream oss;
+
+  oss << *this;
+
+  return oss.str();
+}
+
+std::string BlobFileGarbage::DebugJSON() const {
+  JSONWriter jw;
+
+  jw << *this;
+
+  jw.EndObject();
+
+  return jw.Get();
+}
+
+bool operator==(const BlobFileGarbage& lhs, const BlobFileGarbage& rhs) {
+  return lhs.GetBlobFileNumber() == rhs.GetBlobFileNumber() &&
+         lhs.GetGarbageBlobCount() == rhs.GetGarbageBlobCount() &&
+         lhs.GetGarbageBlobBytes() == rhs.GetGarbageBlobBytes();
+}
+
+bool operator!=(const BlobFileGarbage& lhs, const BlobFileGarbage& rhs) {
+  return !(lhs == rhs);
+}
+
+std::ostream& operator<<(std::ostream& os,
+                         const BlobFileGarbage& blob_file_garbage) {
+  os << "blob_file_number: " << blob_file_garbage.GetBlobFileNumber()
+     << " garbage_blob_count: " << blob_file_garbage.GetGarbageBlobCount()
+     << " garbage_blob_bytes: " << blob_file_garbage.GetGarbageBlobBytes();
+
+  return os;
+}
+
+JSONWriter& operator<<(JSONWriter& jw,
+                       const BlobFileGarbage& blob_file_garbage) {
+  jw << "BlobFileNumber" << blob_file_garbage.GetBlobFileNumber()
+     << "GarbageBlobCount" << blob_file_garbage.GetGarbageBlobCount()
+     << "GarbageBlobBytes" << blob_file_garbage.GetGarbageBlobBytes();
+
+  return jw;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_garbage.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_garbage.h
new file mode 100644
index 0000000000..6dc14ddcae
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_garbage.h
@@ -0,0 +1,57 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstdint>
+#include <iosfwd>
+#include <string>
+
+#include "db/blob/blob_constants.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class JSONWriter;
+class Slice;
+class Status;
+
+class BlobFileGarbage {
+ public:
+  BlobFileGarbage() = default;
+
+  BlobFileGarbage(uint64_t blob_file_number, uint64_t garbage_blob_count,
+                  uint64_t garbage_blob_bytes)
+      : blob_file_number_(blob_file_number),
+        garbage_blob_count_(garbage_blob_count),
+        garbage_blob_bytes_(garbage_blob_bytes) {}
+
+  uint64_t GetBlobFileNumber() const { return blob_file_number_; }
+  uint64_t GetGarbageBlobCount() const { return garbage_blob_count_; }
+  uint64_t GetGarbageBlobBytes() const { return garbage_blob_bytes_; }
+
+  void EncodeTo(std::string* output) const;
+  Status DecodeFrom(Slice* input);
+
+  std::string DebugString() const;
+  std::string DebugJSON() const;
+
+ private:
+  enum CustomFieldTags : uint32_t;
+
+  uint64_t blob_file_number_ = kInvalidBlobFileNumber;
+  uint64_t garbage_blob_count_ = 0;
+  uint64_t garbage_blob_bytes_ = 0;
+};
+
+bool operator==(const BlobFileGarbage& lhs, const BlobFileGarbage& rhs);
+bool operator!=(const BlobFileGarbage& lhs, const BlobFileGarbage& rhs);
+
+std::ostream& operator<<(std::ostream& os,
+                         const BlobFileGarbage& blob_file_garbage);
+JSONWriter& operator<<(JSONWriter& jw,
+                       const BlobFileGarbage& blob_file_garbage);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_meta.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_meta.cc
new file mode 100644
index 0000000000..4913137e59
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_meta.cc
@@ -0,0 +1,62 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/blob/blob_file_meta.h"
+
+#include <ostream>
+#include <sstream>
+
+#include "db/blob/blob_log_format.h"
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+uint64_t SharedBlobFileMetaData::GetBlobFileSize() const {
+  return BlobLogHeader::kSize + total_blob_bytes_ + BlobLogFooter::kSize;
+}
+
+std::string SharedBlobFileMetaData::DebugString() const {
+  std::ostringstream oss;
+  oss << (*this);
+
+  return oss.str();
+}
+
+std::ostream& operator<<(std::ostream& os,
+                         const SharedBlobFileMetaData& shared_meta) {
+  os << "blob_file_number: " << shared_meta.GetBlobFileNumber()
+     << " total_blob_count: " << shared_meta.GetTotalBlobCount()
+     << " total_blob_bytes: " << shared_meta.GetTotalBlobBytes()
+     << " checksum_method: " << shared_meta.GetChecksumMethod()
+     << " checksum_value: "
+     << Slice(shared_meta.GetChecksumValue()).ToString(/* hex */ true);
+
+  return os;
+}
+
+std::string BlobFileMetaData::DebugString() const {
+  std::ostringstream oss;
+  oss << (*this);
+
+  return oss.str();
+}
+
+std::ostream& operator<<(std::ostream& os, const BlobFileMetaData& meta) {
+  const auto& shared_meta = meta.GetSharedMeta();
+  assert(shared_meta);
+  os << (*shared_meta);
+
+  os << " linked_ssts: {";
+  for (uint64_t file_number : meta.GetLinkedSsts()) {
+    os << ' ' << file_number;
+  }
+  os << " }";
+
+  os << " garbage_blob_count: " << meta.GetGarbageBlobCount()
+     << " garbage_blob_bytes: " << meta.GetGarbageBlobBytes();
+
+  return os;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_meta.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_meta.h
new file mode 100644
index 0000000000..d7c8a12433
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_meta.h
@@ -0,0 +1,170 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cassert>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <unordered_set>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// SharedBlobFileMetaData represents the immutable part of blob files' metadata,
+// like the blob file number, total number and size of blobs, or checksum
+// method and value. There is supposed to be one object of this class per blob
+// file (shared across all versions that include the blob file in question);
+// hence, the type is neither copyable nor movable. A blob file can be marked
+// obsolete when the corresponding SharedBlobFileMetaData object is destroyed.
+
+class SharedBlobFileMetaData {
+ public:
+  static std::shared_ptr<SharedBlobFileMetaData> Create(
+      uint64_t blob_file_number, uint64_t total_blob_count,
+      uint64_t total_blob_bytes, std::string checksum_method,
+      std::string checksum_value) {
+    return std::shared_ptr<SharedBlobFileMetaData>(new SharedBlobFileMetaData(
+        blob_file_number, total_blob_count, total_blob_bytes,
+        std::move(checksum_method), std::move(checksum_value)));
+  }
+
+  template <typename Deleter>
+  static std::shared_ptr<SharedBlobFileMetaData> Create(
+      uint64_t blob_file_number, uint64_t total_blob_count,
+      uint64_t total_blob_bytes, std::string checksum_method,
+      std::string checksum_value, Deleter deleter) {
+    return std::shared_ptr<SharedBlobFileMetaData>(
+        new SharedBlobFileMetaData(blob_file_number, total_blob_count,
+                                   total_blob_bytes, std::move(checksum_method),
+                                   std::move(checksum_value)),
+        deleter);
+  }
+
+  SharedBlobFileMetaData(const SharedBlobFileMetaData&) = delete;
+  SharedBlobFileMetaData& operator=(const SharedBlobFileMetaData&) = delete;
+
+  SharedBlobFileMetaData(SharedBlobFileMetaData&&) = delete;
+  SharedBlobFileMetaData& operator=(SharedBlobFileMetaData&&) = delete;
+
+  uint64_t GetBlobFileSize() const;
+  uint64_t GetBlobFileNumber() const { return blob_file_number_; }
+  uint64_t GetTotalBlobCount() const { return total_blob_count_; }
+  uint64_t GetTotalBlobBytes() const { return total_blob_bytes_; }
+  const std::string& GetChecksumMethod() const { return checksum_method_; }
+  const std::string& GetChecksumValue() const { return checksum_value_; }
+
+  std::string DebugString() const;
+
+ private:
+  SharedBlobFileMetaData(uint64_t blob_file_number, uint64_t total_blob_count,
+                         uint64_t total_blob_bytes, std::string checksum_method,
+                         std::string checksum_value)
+      : blob_file_number_(blob_file_number),
+        total_blob_count_(total_blob_count),
+        total_blob_bytes_(total_blob_bytes),
+        checksum_method_(std::move(checksum_method)),
+        checksum_value_(std::move(checksum_value)) {
+    assert(checksum_method_.empty() == checksum_value_.empty());
+  }
+
+  uint64_t blob_file_number_;
+  uint64_t total_blob_count_;
+  uint64_t total_blob_bytes_;
+  std::string checksum_method_;
+  std::string checksum_value_;
+};
+
+std::ostream& operator<<(std::ostream& os,
+                         const SharedBlobFileMetaData& shared_meta);
+
+// BlobFileMetaData contains the part of the metadata for blob files that can
+// vary across versions, like the amount of garbage in the blob file. In
+// addition, BlobFileMetaData objects point to and share the ownership of the
+// SharedBlobFileMetaData object for the corresponding blob file. Similarly to
+// SharedBlobFileMetaData, BlobFileMetaData are not copyable or movable. They
+// are meant to be jointly owned by the versions in which the blob file has the
+// same (immutable *and* mutable) state.
+
+class BlobFileMetaData {
+ public:
+  using LinkedSsts = std::unordered_set<uint64_t>;
+
+  static std::shared_ptr<BlobFileMetaData> Create(
+      std::shared_ptr<SharedBlobFileMetaData> shared_meta,
+      LinkedSsts linked_ssts, uint64_t garbage_blob_count,
+      uint64_t garbage_blob_bytes) {
+    return std::shared_ptr<BlobFileMetaData>(
+        new BlobFileMetaData(std::move(shared_meta), std::move(linked_ssts),
+                             garbage_blob_count, garbage_blob_bytes));
+  }
+
+  BlobFileMetaData(const BlobFileMetaData&) = delete;
+  BlobFileMetaData& operator=(const BlobFileMetaData&) = delete;
+
+  BlobFileMetaData(BlobFileMetaData&&) = delete;
+  BlobFileMetaData& operator=(BlobFileMetaData&&) = delete;
+
+  const std::shared_ptr<SharedBlobFileMetaData>& GetSharedMeta() const {
+    return shared_meta_;
+  }
+
+  uint64_t GetBlobFileSize() const {
+    assert(shared_meta_);
+    return shared_meta_->GetBlobFileSize();
+  }
+
+  uint64_t GetBlobFileNumber() const {
+    assert(shared_meta_);
+    return shared_meta_->GetBlobFileNumber();
+  }
+  uint64_t GetTotalBlobCount() const {
+    assert(shared_meta_);
+    return shared_meta_->GetTotalBlobCount();
+  }
+  uint64_t GetTotalBlobBytes() const {
+    assert(shared_meta_);
+    return shared_meta_->GetTotalBlobBytes();
+  }
+  const std::string& GetChecksumMethod() const {
+    assert(shared_meta_);
+    return shared_meta_->GetChecksumMethod();
+  }
+  const std::string& GetChecksumValue() const {
+    assert(shared_meta_);
+    return shared_meta_->GetChecksumValue();
+  }
+
+  const LinkedSsts& GetLinkedSsts() const { return linked_ssts_; }
+
+  uint64_t GetGarbageBlobCount() const { return garbage_blob_count_; }
+  uint64_t GetGarbageBlobBytes() const { return garbage_blob_bytes_; }
+
+  std::string DebugString() const;
+
+ private:
+  BlobFileMetaData(std::shared_ptr<SharedBlobFileMetaData> shared_meta,
+                   LinkedSsts linked_ssts, uint64_t garbage_blob_count,
+                   uint64_t garbage_blob_bytes)
+      : shared_meta_(std::move(shared_meta)),
+        linked_ssts_(std::move(linked_ssts)),
+        garbage_blob_count_(garbage_blob_count),
+        garbage_blob_bytes_(garbage_blob_bytes) {
+    assert(shared_meta_);
+    assert(garbage_blob_count_ <= shared_meta_->GetTotalBlobCount());
+    assert(garbage_blob_bytes_ <= shared_meta_->GetTotalBlobBytes());
+  }
+
+  std::shared_ptr<SharedBlobFileMetaData> shared_meta_;
+  LinkedSsts linked_ssts_;
+  uint64_t garbage_blob_count_;
+  uint64_t garbage_blob_bytes_;
+};
+
+std::ostream& operator<<(std::ostream& os, const BlobFileMetaData& meta);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_reader.cc
new file mode 100644
index 0000000000..7abd1ce6b5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_reader.cc
@@ -0,0 +1,624 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/blob/blob_file_reader.h"
+
+#include <cassert>
+#include <string>
+
+#include "db/blob/blob_contents.h"
+#include "db/blob/blob_log_format.h"
+#include "file/file_prefetch_buffer.h"
+#include "file/filename.h"
+#include "monitoring/statistics_impl.h"
+#include "options/cf_options.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "table/multiget_context.h"
+#include "test_util/sync_point.h"
+#include "util/compression.h"
+#include "util/crc32c.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status BlobFileReader::Create(
+    const ImmutableOptions& immutable_options, const ReadOptions& read_options,
+    const FileOptions& file_options, uint32_t column_family_id,
+    HistogramImpl* blob_file_read_hist, uint64_t blob_file_number,
+    const std::shared_ptr<IOTracer>& io_tracer,
+    std::unique_ptr<BlobFileReader>* blob_file_reader) {
+  assert(blob_file_reader);
+  assert(!*blob_file_reader);
+
+  uint64_t file_size = 0;
+  std::unique_ptr<RandomAccessFileReader> file_reader;
+
+  {
+    const Status s =
+        OpenFile(immutable_options, file_options, blob_file_read_hist,
+                 blob_file_number, io_tracer, &file_size, &file_reader);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  assert(file_reader);
+
+  Statistics* const statistics = immutable_options.stats;
+
+  CompressionType compression_type = kNoCompression;
+
+  {
+    const Status s =
+        ReadHeader(file_reader.get(), read_options, column_family_id,
+                   statistics, &compression_type);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  {
+    const Status s =
+        ReadFooter(file_reader.get(), read_options, file_size, statistics);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  blob_file_reader->reset(
+      new BlobFileReader(std::move(file_reader), file_size, compression_type,
+                         immutable_options.clock, statistics));
+
+  return Status::OK();
+}
+
+Status BlobFileReader::OpenFile(
+    const ImmutableOptions& immutable_options, const FileOptions& file_opts,
+    HistogramImpl* blob_file_read_hist, uint64_t blob_file_number,
+    const std::shared_ptr<IOTracer>& io_tracer, uint64_t* file_size,
+    std::unique_ptr<RandomAccessFileReader>* file_reader) {
+  assert(file_size);
+  assert(file_reader);
+
+  const auto& cf_paths = immutable_options.cf_paths;
+  assert(!cf_paths.empty());
+
+  const std::string blob_file_path =
+      BlobFileName(cf_paths.front().path, blob_file_number);
+
+  FileSystem* const fs = immutable_options.fs.get();
+  assert(fs);
+
+  constexpr IODebugContext* dbg = nullptr;
+
+  {
+    TEST_SYNC_POINT("BlobFileReader::OpenFile:GetFileSize");
+
+    const Status s =
+        fs->GetFileSize(blob_file_path, IOOptions(), file_size, dbg);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  if (*file_size < BlobLogHeader::kSize + BlobLogFooter::kSize) {
+    return Status::Corruption("Malformed blob file");
+  }
+
+  std::unique_ptr<FSRandomAccessFile> file;
+
+  {
+    TEST_SYNC_POINT("BlobFileReader::OpenFile:NewRandomAccessFile");
+
+    const Status s =
+        fs->NewRandomAccessFile(blob_file_path, file_opts, &file, dbg);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  assert(file);
+
+  if (immutable_options.advise_random_on_open) {
+    file->Hint(FSRandomAccessFile::kRandom);
+  }
+
+  file_reader->reset(new RandomAccessFileReader(
+      std::move(file), blob_file_path, immutable_options.clock, io_tracer,
+      immutable_options.stats, BLOB_DB_BLOB_FILE_READ_MICROS,
+      blob_file_read_hist, immutable_options.rate_limiter.get(),
+      immutable_options.listeners));
+
+  return Status::OK();
+}
+
+Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader,
+                                  const ReadOptions& read_options,
+                                  uint32_t column_family_id,
+                                  Statistics* statistics,
+                                  CompressionType* compression_type) {
+  assert(file_reader);
+  assert(compression_type);
+
+  Slice header_slice;
+  Buffer buf;
+  AlignedBuf aligned_buf;
+
+  {
+    TEST_SYNC_POINT("BlobFileReader::ReadHeader:ReadFromFile");
+
+    constexpr uint64_t read_offset = 0;
+    constexpr size_t read_size = BlobLogHeader::kSize;
+
+    // TODO: rate limit reading headers from blob files.
+    const Status s =
+        ReadFromFile(file_reader, read_options, read_offset, read_size,
+                     statistics, &header_slice, &buf, &aligned_buf,
+                     Env::IO_TOTAL /* rate_limiter_priority */);
+    if (!s.ok()) {
+      return s;
+    }
+
+    TEST_SYNC_POINT_CALLBACK("BlobFileReader::ReadHeader:TamperWithResult",
+                             &header_slice);
+  }
+
+  BlobLogHeader header;
+
+  {
+    const Status s = header.DecodeFrom(header_slice);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  constexpr ExpirationRange no_expiration_range;
+
+  if (header.has_ttl || header.expiration_range != no_expiration_range) {
+    return Status::Corruption("Unexpected TTL blob file");
+  }
+
+  if (header.column_family_id != column_family_id) {
+    return Status::Corruption("Column family ID mismatch");
+  }
+
+  *compression_type = header.compression;
+
+  return Status::OK();
+}
+
+Status BlobFileReader::ReadFooter(const RandomAccessFileReader* file_reader,
+                                  const ReadOptions& read_options,
+                                  uint64_t file_size, Statistics* statistics) {
+  assert(file_size >= BlobLogHeader::kSize + BlobLogFooter::kSize);
+  assert(file_reader);
+
+  Slice footer_slice;
+  Buffer buf;
+  AlignedBuf aligned_buf;
+
+  {
+    TEST_SYNC_POINT("BlobFileReader::ReadFooter:ReadFromFile");
+
+    const uint64_t read_offset = file_size - BlobLogFooter::kSize;
+    constexpr size_t read_size = BlobLogFooter::kSize;
+
+    // TODO: rate limit reading footers from blob files.
+    const Status s =
+        ReadFromFile(file_reader, read_options, read_offset, read_size,
+                     statistics, &footer_slice, &buf, &aligned_buf,
+                     Env::IO_TOTAL /* rate_limiter_priority */);
+    if (!s.ok()) {
+      return s;
+    }
+
+    TEST_SYNC_POINT_CALLBACK("BlobFileReader::ReadFooter:TamperWithResult",
+                             &footer_slice);
+  }
+
+  BlobLogFooter footer;
+
+  {
+    const Status s = footer.DecodeFrom(footer_slice);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  constexpr ExpirationRange no_expiration_range;
+
+  if (footer.expiration_range != no_expiration_range) {
+    return Status::Corruption("Unexpected TTL blob file");
+  }
+
+  return Status::OK();
+}
+
+Status BlobFileReader::ReadFromFile(const RandomAccessFileReader* file_reader,
+                                    const ReadOptions& read_options,
+                                    uint64_t read_offset, size_t read_size,
+                                    Statistics* statistics, Slice* slice,
+                                    Buffer* buf, AlignedBuf* aligned_buf,
+                                    Env::IOPriority rate_limiter_priority) {
+  assert(slice);
+  assert(buf);
+  assert(aligned_buf);
+
+  assert(file_reader);
+
+  RecordTick(statistics, BLOB_DB_BLOB_FILE_BYTES_READ, read_size);
+
+  Status s;
+
+  IOOptions io_options;
+  s = file_reader->PrepareIOOptions(read_options, io_options);
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (file_reader->use_direct_io()) {
+    constexpr char* scratch = nullptr;
+
+    s = file_reader->Read(io_options, read_offset, read_size, slice, scratch,
+                          aligned_buf, rate_limiter_priority);
+  } else {
+    buf->reset(new char[read_size]);
+    constexpr AlignedBuf* aligned_scratch = nullptr;
+
+    s = file_reader->Read(io_options, read_offset, read_size, slice, buf->get(),
+                          aligned_scratch, rate_limiter_priority);
+  }
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (slice->size() != read_size) {
+    return Status::Corruption("Failed to read data from blob file");
+  }
+
+  return Status::OK();
+}
+
+BlobFileReader::BlobFileReader(
+    std::unique_ptr<RandomAccessFileReader>&& file_reader, uint64_t file_size,
+    CompressionType compression_type, SystemClock* clock,
+    Statistics* statistics)
+    : file_reader_(std::move(file_reader)),
+      file_size_(file_size),
+      compression_type_(compression_type),
+      clock_(clock),
+      statistics_(statistics) {
+  assert(file_reader_);
+}
+
+BlobFileReader::~BlobFileReader() = default;
+
+Status BlobFileReader::GetBlob(
+    const ReadOptions& read_options, const Slice& user_key, uint64_t offset,
+    uint64_t value_size, CompressionType compression_type,
+    FilePrefetchBuffer* prefetch_buffer, MemoryAllocator* allocator,
+    std::unique_ptr<BlobContents>* result, uint64_t* bytes_read) const {
+  assert(result);
+
+  const uint64_t key_size = user_key.size();
+
+  if (!IsValidBlobOffset(offset, key_size, value_size, file_size_)) {
+    return Status::Corruption("Invalid blob offset");
+  }
+
+  if (compression_type != compression_type_) {
+    return Status::Corruption("Compression type mismatch when reading blob");
+  }
+
+  // Note: if verify_checksum is set, we read the entire blob record to be able
+  // to perform the verification; otherwise, we just read the blob itself. Since
+  // the offset in BlobIndex actually points to the blob value, we need to make
+  // an adjustment in the former case.
+  const uint64_t adjustment =
+      read_options.verify_checksums
+          ? BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size)
+          : 0;
+  assert(offset >= adjustment);
+
+  const uint64_t record_offset = offset - adjustment;
+  const uint64_t record_size = value_size + adjustment;
+
+  Slice record_slice;
+  Buffer buf;
+  AlignedBuf aligned_buf;
+
+  bool prefetched = false;
+
+  if (prefetch_buffer) {
+    Status s;
+    constexpr bool for_compaction = true;
+
+    IOOptions io_options;
+    s = file_reader_->PrepareIOOptions(read_options, io_options);
+    if (!s.ok()) {
+      return s;
+    }
+    prefetched = prefetch_buffer->TryReadFromCache(
+        io_options, file_reader_.get(), record_offset,
+        static_cast<size_t>(record_size), &record_slice, &s,
+        read_options.rate_limiter_priority, for_compaction);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  if (!prefetched) {
+    TEST_SYNC_POINT("BlobFileReader::GetBlob:ReadFromFile");
+    PERF_COUNTER_ADD(blob_read_count, 1);
+    PERF_COUNTER_ADD(blob_read_byte, record_size);
+    PERF_TIMER_GUARD(blob_read_time);
+    const Status s = ReadFromFile(
+        file_reader_.get(), read_options, record_offset,
+        static_cast<size_t>(record_size), statistics_, &record_slice, &buf,
+        &aligned_buf, read_options.rate_limiter_priority);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  TEST_SYNC_POINT_CALLBACK("BlobFileReader::GetBlob:TamperWithResult",
+                           &record_slice);
+
+  if (read_options.verify_checksums) {
+    const Status s = VerifyBlob(record_slice, user_key, value_size);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  const Slice value_slice(record_slice.data() + adjustment, value_size);
+
+  {
+    const Status s = UncompressBlobIfNeeded(
+        value_slice, compression_type, allocator, clock_, statistics_, result);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  if (bytes_read) {
+    *bytes_read = record_size;
+  }
+
+  return Status::OK();
+}
+
+void BlobFileReader::MultiGetBlob(
+    const ReadOptions& read_options, MemoryAllocator* allocator,
+    autovector<std::pair<BlobReadRequest*, std::unique_ptr<BlobContents>>>&
+        blob_reqs,
+    uint64_t* bytes_read) const {
+  const size_t num_blobs = blob_reqs.size();
+  assert(num_blobs > 0);
+  assert(num_blobs <= MultiGetContext::MAX_BATCH_SIZE);
+
+#ifndef NDEBUG
+  for (size_t i = 0; i < num_blobs - 1; ++i) {
+    assert(blob_reqs[i].first->offset <= blob_reqs[i + 1].first->offset);
+  }
+#endif  // !NDEBUG
+
+  std::vector<FSReadRequest> read_reqs;
+  autovector<uint64_t> adjustments;
+  uint64_t total_len = 0;
+  read_reqs.reserve(num_blobs);
+  for (size_t i = 0; i < num_blobs; ++i) {
+    BlobReadRequest* const req = blob_reqs[i].first;
+    assert(req);
+    assert(req->user_key);
+    assert(req->status);
+
+    const size_t key_size = req->user_key->size();
+    const uint64_t offset = req->offset;
+    const uint64_t value_size = req->len;
+
+    if (!IsValidBlobOffset(offset, key_size, value_size, file_size_)) {
+      *req->status = Status::Corruption("Invalid blob offset");
+      continue;
+    }
+    if (req->compression != compression_type_) {
+      *req->status =
+          Status::Corruption("Compression type mismatch when reading a blob");
+      continue;
+    }
+
+    const uint64_t adjustment =
+        read_options.verify_checksums
+            ? BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size)
+            : 0;
+    assert(req->offset >= adjustment);
+    adjustments.push_back(adjustment);
+
+    FSReadRequest read_req = {};
+    read_req.offset = req->offset - adjustment;
+    read_req.len = req->len + adjustment;
+    read_reqs.emplace_back(read_req);
+    total_len += read_req.len;
+  }
+
+  RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, total_len);
+
+  Buffer buf;
+  AlignedBuf aligned_buf;
+
+  Status s;
+  bool direct_io = file_reader_->use_direct_io();
+  if (direct_io) {
+    for (size_t i = 0; i < read_reqs.size(); ++i) {
+      read_reqs[i].scratch = nullptr;
+    }
+  } else {
+    buf.reset(new char[total_len]);
+    std::ptrdiff_t pos = 0;
+    for (size_t i = 0; i < read_reqs.size(); ++i) {
+      read_reqs[i].scratch = buf.get() + pos;
+      pos += read_reqs[i].len;
+    }
+  }
+  TEST_SYNC_POINT("BlobFileReader::MultiGetBlob:ReadFromFile");
+  PERF_COUNTER_ADD(blob_read_count, num_blobs);
+  PERF_COUNTER_ADD(blob_read_byte, total_len);
+  s = file_reader_->MultiRead(IOOptions(), read_reqs.data(), read_reqs.size(),
+                              direct_io ? &aligned_buf : nullptr,
+                              read_options.rate_limiter_priority);
+  if (!s.ok()) {
+    for (auto& req : read_reqs) {
+      req.status.PermitUncheckedError();
+    }
+    for (auto& blob_req : blob_reqs) {
+      BlobReadRequest* const req = blob_req.first;
+      assert(req);
+      assert(req->status);
+
+      if (!req->status->IsCorruption()) {
+        // Avoid overwriting corruption status.
+        *req->status = s;
+      }
+    }
+    return;
+  }
+
+  assert(s.ok());
+
+  uint64_t total_bytes = 0;
+  for (size_t i = 0, j = 0; i < num_blobs; ++i) {
+    BlobReadRequest* const req = blob_reqs[i].first;
+    assert(req);
+    assert(req->user_key);
+    assert(req->status);
+
+    if (!req->status->ok()) {
+      continue;
+    }
+
+    assert(j < read_reqs.size());
+    auto& read_req = read_reqs[j++];
+    const auto& record_slice = read_req.result;
+    if (read_req.status.ok() && record_slice.size() != read_req.len) {
+      read_req.status =
+          IOStatus::Corruption("Failed to read data from blob file");
+    }
+
+    *req->status = read_req.status;
+    if (!req->status->ok()) {
+      continue;
+    }
+
+    // Verify checksums if enabled
+    if (read_options.verify_checksums) {
+      *req->status = VerifyBlob(record_slice, *req->user_key, req->len);
+      if (!req->status->ok()) {
+        continue;
+      }
+    }
+
+    // Uncompress blob if needed
+    Slice value_slice(record_slice.data() + adjustments[i], req->len);
+    *req->status =
+        UncompressBlobIfNeeded(value_slice, compression_type_, allocator,
+                               clock_, statistics_, &blob_reqs[i].second);
+    if (req->status->ok()) {
+      total_bytes += record_slice.size();
+    }
+  }
+
+  if (bytes_read) {
+    *bytes_read = total_bytes;
+  }
+}
+
+Status BlobFileReader::VerifyBlob(const Slice& record_slice,
+                                  const Slice& user_key, uint64_t value_size) {
+  PERF_TIMER_GUARD(blob_checksum_time);
+
+  BlobLogRecord record;
+
+  const Slice header_slice(record_slice.data(), BlobLogRecord::kHeaderSize);
+
+  {
+    const Status s = record.DecodeHeaderFrom(header_slice);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  if (record.key_size != user_key.size()) {
+    return Status::Corruption("Key size mismatch when reading blob");
+  }
+
+  if (record.value_size != value_size) {
+    return Status::Corruption("Value size mismatch when reading blob");
+  }
+
+  record.key =
+      Slice(record_slice.data() + BlobLogRecord::kHeaderSize, record.key_size);
+  if (record.key != user_key) {
+    return Status::Corruption("Key mismatch when reading blob");
+  }
+
+  record.value = Slice(record.key.data() + record.key_size, value_size);
+
+  {
+    TEST_SYNC_POINT_CALLBACK("BlobFileReader::VerifyBlob:CheckBlobCRC",
+                             &record);
+
+    const Status s = record.CheckBlobCRC();
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  return Status::OK();
+}
+
+Status BlobFileReader::UncompressBlobIfNeeded(
+    const Slice& value_slice, CompressionType compression_type,
+    MemoryAllocator* allocator, SystemClock* clock, Statistics* statistics,
+    std::unique_ptr<BlobContents>* result) {
+  assert(result);
+
+  if (compression_type == kNoCompression) {
+    BlobContentsCreator::Create(result, nullptr, value_slice, allocator);
+    return Status::OK();
+  }
+
+  UncompressionContext context(compression_type);
+  UncompressionInfo info(context, UncompressionDict::GetEmptyDict(),
+                         compression_type);
+
+  size_t uncompressed_size = 0;
+  constexpr uint32_t compression_format_version = 2;
+
+  CacheAllocationPtr output;
+
+  {
+    PERF_TIMER_GUARD(blob_decompress_time);
+    StopWatch stop_watch(clock, statistics, BLOB_DB_DECOMPRESSION_MICROS);
+    output = UncompressData(info, value_slice.data(), value_slice.size(),
+                            &uncompressed_size, compression_format_version,
+                            allocator);
+  }
+
+  TEST_SYNC_POINT_CALLBACK(
+      "BlobFileReader::UncompressBlobIfNeeded:TamperWithResult", &output);
+
+  if (!output) {
+    return Status::Corruption("Unable to uncompress blob");
+  }
+
+  result->reset(new BlobContents(std::move(output), uncompressed_size));
+
+  return Status::OK();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_reader.h
new file mode 100644
index 0000000000..990e325408
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_file_reader.h
@@ -0,0 +1,112 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cinttypes>
+#include <memory>
+
+#include "db/blob/blob_read_request.h"
+#include "file/random_access_file_reader.h"
+#include "rocksdb/compression_type.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Status;
+struct ImmutableOptions;
+struct FileOptions;
+class HistogramImpl;
+struct ReadOptions;
+class Slice;
+class FilePrefetchBuffer;
+class BlobContents;
+class Statistics;
+
+class BlobFileReader {
+ public:
+  static Status Create(const ImmutableOptions& immutable_options,
+                       const ReadOptions& read_options,
+                       const FileOptions& file_options,
+                       uint32_t column_family_id,
+                       HistogramImpl* blob_file_read_hist,
+                       uint64_t blob_file_number,
+                       const std::shared_ptr<IOTracer>& io_tracer,
+                       std::unique_ptr<BlobFileReader>* reader);
+
+  BlobFileReader(const BlobFileReader&) = delete;
+  BlobFileReader& operator=(const BlobFileReader&) = delete;
+
+  ~BlobFileReader();
+
+  Status GetBlob(const ReadOptions& read_options, const Slice& user_key,
+                 uint64_t offset, uint64_t value_size,
+                 CompressionType compression_type,
+                 FilePrefetchBuffer* prefetch_buffer,
+                 MemoryAllocator* allocator,
+                 std::unique_ptr<BlobContents>* result,
+                 uint64_t* bytes_read) const;
+
+  // offsets must be sorted in ascending order by caller.
+  void MultiGetBlob(
+      const ReadOptions& read_options, MemoryAllocator* allocator,
+      autovector<std::pair<BlobReadRequest*, std::unique_ptr<BlobContents>>>&
+          blob_reqs,
+      uint64_t* bytes_read) const;
+
+  CompressionType GetCompressionType() const { return compression_type_; }
+
+  uint64_t GetFileSize() const { return file_size_; }
+
+ private:
+  BlobFileReader(std::unique_ptr<RandomAccessFileReader>&& file_reader,
+                 uint64_t file_size, CompressionType compression_type,
+                 SystemClock* clock, Statistics* statistics);
+
+  static Status OpenFile(const ImmutableOptions& immutable_options,
+                         const FileOptions& file_opts,
+                         HistogramImpl* blob_file_read_hist,
+                         uint64_t blob_file_number,
+                         const std::shared_ptr<IOTracer>& io_tracer,
+                         uint64_t* file_size,
+                         std::unique_ptr<RandomAccessFileReader>* file_reader);
+
+  static Status ReadHeader(const RandomAccessFileReader* file_reader,
+                           const ReadOptions& read_options,
+                           uint32_t column_family_id, Statistics* statistics,
+                           CompressionType* compression_type);
+
+  static Status ReadFooter(const RandomAccessFileReader* file_reader,
+                           const ReadOptions& read_options, uint64_t file_size,
+                           Statistics* statistics);
+
+  using Buffer = std::unique_ptr<char[]>;
+
+  static Status ReadFromFile(const RandomAccessFileReader* file_reader,
+                             const ReadOptions& read_options,
+                             uint64_t read_offset, size_t read_size,
+                             Statistics* statistics, Slice* slice, Buffer* buf,
+                             AlignedBuf* aligned_buf,
+                             Env::IOPriority rate_limiter_priority);
+
+  static Status VerifyBlob(const Slice& record_slice, const Slice& user_key,
+                           uint64_t value_size);
+
+  static Status UncompressBlobIfNeeded(const Slice& value_slice,
+                                       CompressionType compression_type,
+                                       MemoryAllocator* allocator,
+                                       SystemClock* clock,
+                                       Statistics* statistics,
+                                       std::unique_ptr<BlobContents>* result);
+
+  std::unique_ptr<RandomAccessFileReader> file_reader_;
+  uint64_t file_size_;
+  CompressionType compression_type_;
+  SystemClock* clock_;
+  Statistics* statistics_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_garbage_meter.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_garbage_meter.cc
new file mode 100644
index 0000000000..d328d7ff4c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_garbage_meter.cc
@@ -0,0 +1,100 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/blob/blob_garbage_meter.h"
+
+#include "db/blob/blob_index.h"
+#include "db/blob/blob_log_format.h"
+#include "db/dbformat.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status BlobGarbageMeter::ProcessInFlow(const Slice& key, const Slice& value) {
+  uint64_t blob_file_number = kInvalidBlobFileNumber;
+  uint64_t bytes = 0;
+
+  const Status s = Parse(key, value, &blob_file_number, &bytes);
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (blob_file_number == kInvalidBlobFileNumber) {
+    return Status::OK();
+  }
+
+  flows_[blob_file_number].AddInFlow(bytes);
+
+  return Status::OK();
+}
+
+Status BlobGarbageMeter::ProcessOutFlow(const Slice& key, const Slice& value) {
+  uint64_t blob_file_number = kInvalidBlobFileNumber;
+  uint64_t bytes = 0;
+
+  const Status s = Parse(key, value, &blob_file_number, &bytes);
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (blob_file_number == kInvalidBlobFileNumber) {
+    return Status::OK();
+  }
+
+  // Note: in order to measure the amount of additional garbage, we only need to
+  // track the outflow for preexisting files, i.e. those that also had inflow.
+  // (Newly written files would only have outflow.)
+  auto it = flows_.find(blob_file_number);
+  if (it == flows_.end()) {
+    return Status::OK();
+  }
+
+  it->second.AddOutFlow(bytes);
+
+  return Status::OK();
+}
+
+Status BlobGarbageMeter::Parse(const Slice& key, const Slice& value,
+                               uint64_t* blob_file_number, uint64_t* bytes) {
+  assert(blob_file_number);
+  assert(*blob_file_number == kInvalidBlobFileNumber);
+  assert(bytes);
+  assert(*bytes == 0);
+
+  ParsedInternalKey ikey;
+
+  {
+    constexpr bool log_err_key = false;
+    const Status s = ParseInternalKey(key, &ikey, log_err_key);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  if (ikey.type != kTypeBlobIndex) {
+    return Status::OK();
+  }
+
+  BlobIndex blob_index;
+
+  {
+    const Status s = blob_index.DecodeFrom(value);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  if (blob_index.IsInlined() || blob_index.HasTTL()) {
+    return Status::Corruption("Unexpected TTL/inlined blob index");
+  }
+
+  *blob_file_number = blob_index.file_number();
+  *bytes =
+      blob_index.size() +
+      BlobLogRecord::CalculateAdjustmentForRecordHeader(ikey.user_key.size());
+
+  return Status::OK();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_garbage_meter.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_garbage_meter.h
new file mode 100644
index 0000000000..a6c04b0b24
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_garbage_meter.h
@@ -0,0 +1,102 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <unordered_map>
+
+#include "db/blob/blob_constants.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+
+// A class that can be used to compute the amount of additional garbage
+// generated by a compaction. It parses the keys and blob references in the
+// input and output of a compaction, and aggregates the "inflow" and "outflow"
+// on a per-blob file basis. The amount of additional garbage for any given blob
+// file can then be computed by subtracting the outflow from the inflow.
+class BlobGarbageMeter {
+ public:
+  // A class to store the number and total size of blobs on a per-blob file
+  // basis.
+  class BlobStats {
+   public:
+    void Add(uint64_t bytes) {
+      ++count_;
+      bytes_ += bytes;
+    }
+    void Add(uint64_t count, uint64_t bytes) {
+      count_ += count;
+      bytes_ += bytes;
+    }
+
+    uint64_t GetCount() const { return count_; }
+    uint64_t GetBytes() const { return bytes_; }
+
+   private:
+    uint64_t count_ = 0;
+    uint64_t bytes_ = 0;
+  };
+
+  // A class to keep track of the "inflow" and the "outflow" and to compute the
+  // amount of additional garbage for a given blob file.
+  class BlobInOutFlow {
+   public:
+    void AddInFlow(uint64_t bytes) {
+      in_flow_.Add(bytes);
+      assert(IsValid());
+    }
+    void AddOutFlow(uint64_t bytes) {
+      out_flow_.Add(bytes);
+      assert(IsValid());
+    }
+
+    const BlobStats& GetInFlow() const { return in_flow_; }
+    const BlobStats& GetOutFlow() const { return out_flow_; }
+
+    bool IsValid() const {
+      return in_flow_.GetCount() >= out_flow_.GetCount() &&
+             in_flow_.GetBytes() >= out_flow_.GetBytes();
+    }
+    bool HasGarbage() const {
+      assert(IsValid());
+      return in_flow_.GetCount() > out_flow_.GetCount();
+    }
+    uint64_t GetGarbageCount() const {
+      assert(IsValid());
+      assert(HasGarbage());
+      return in_flow_.GetCount() - out_flow_.GetCount();
+    }
+    uint64_t GetGarbageBytes() const {
+      assert(IsValid());
+      assert(HasGarbage());
+      return in_flow_.GetBytes() - out_flow_.GetBytes();
+    }
+
+   private:
+    BlobStats in_flow_;
+    BlobStats out_flow_;
+  };
+
+  Status ProcessInFlow(const Slice& key, const Slice& value);
+  Status ProcessOutFlow(const Slice& key, const Slice& value);
+
+  const std::unordered_map<uint64_t, BlobInOutFlow>& flows() const {
+    return flows_;
+  }
+
+ private:
+  static Status Parse(const Slice& key, const Slice& value,
+                      uint64_t* blob_file_number, uint64_t* bytes);
+
+  std::unordered_map<uint64_t, BlobInOutFlow> flows_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_index.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_index.h
new file mode 100644
index 0000000000..e9944d7844
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_index.h
@@ -0,0 +1,187 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <sstream>
+#include <string>
+
+#include "rocksdb/compression_type.h"
+#include "util/coding.h"
+#include "util/compression.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// BlobIndex is a pointer to the blob and metadata of the blob. The index is
+// stored in base DB as ValueType::kTypeBlobIndex.
+// There are three types of blob index:
+//
+//    kInlinedTTL:
+//      +------+------------+---------------+
+//      | type | expiration | value         |
+//      +------+------------+---------------+
+//      | char | varint64   | variable size |
+//      +------+------------+---------------+
+//
+//    kBlob:
+//      +------+-------------+----------+----------+-------------+
+//      | type | file number | offset   | size     | compression |
+//      +------+-------------+----------+----------+-------------+
+//      | char | varint64    | varint64 | varint64 | char        |
+//      +------+-------------+----------+----------+-------------+
+//
+//    kBlobTTL:
+//      +------+------------+-------------+----------+----------+-------------+
+//      | type | expiration | file number | offset   | size     | compression |
+//      +------+------------+-------------+----------+----------+-------------+
+//      | char | varint64   | varint64    | varint64 | varint64 | char        |
+//      +------+------------+-------------+----------+----------+-------------+
+//
+// There isn't a kInlined (without TTL) type since we can store it as a plain
+// value (i.e. ValueType::kTypeValue).
+class BlobIndex {
+ public:
+  enum class Type : unsigned char {
+    kInlinedTTL = 0,
+    kBlob = 1,
+    kBlobTTL = 2,
+    kUnknown = 3,
+  };
+
+  BlobIndex() : type_(Type::kUnknown) {}
+
+  BlobIndex(const BlobIndex&) = default;
+  BlobIndex& operator=(const BlobIndex&) = default;
+
+  bool IsInlined() const { return type_ == Type::kInlinedTTL; }
+
+  bool HasTTL() const {
+    return type_ == Type::kInlinedTTL || type_ == Type::kBlobTTL;
+  }
+
+  uint64_t expiration() const {
+    assert(HasTTL());
+    return expiration_;
+  }
+
+  const Slice& value() const {
+    assert(IsInlined());
+    return value_;
+  }
+
+  uint64_t file_number() const {
+    assert(!IsInlined());
+    return file_number_;
+  }
+
+  uint64_t offset() const {
+    assert(!IsInlined());
+    return offset_;
+  }
+
+  uint64_t size() const {
+    assert(!IsInlined());
+    return size_;
+  }
+
+  CompressionType compression() const {
+    assert(!IsInlined());
+    return compression_;
+  }
+
+  Status DecodeFrom(Slice slice) {
+    const char* kErrorMessage = "Error while decoding blob index";
+    assert(slice.size() > 0);
+    type_ = static_cast<Type>(*slice.data());
+    if (type_ >= Type::kUnknown) {
+      return Status::Corruption(kErrorMessage,
+                                "Unknown blob index type: " +
+                                    std::to_string(static_cast<char>(type_)));
+    }
+    slice = Slice(slice.data() + 1, slice.size() - 1);
+    if (HasTTL()) {
+      if (!GetVarint64(&slice, &expiration_)) {
+        return Status::Corruption(kErrorMessage, "Corrupted expiration");
+      }
+    }
+    if (IsInlined()) {
+      value_ = slice;
+    } else {
+      if (GetVarint64(&slice, &file_number_) && GetVarint64(&slice, &offset_) &&
+          GetVarint64(&slice, &size_) && slice.size() == 1) {
+        compression_ = static_cast<CompressionType>(*slice.data());
+      } else {
+        return Status::Corruption(kErrorMessage, "Corrupted blob offset");
+      }
+    }
+    return Status::OK();
+  }
+
+  std::string DebugString(bool output_hex) const {
+    std::ostringstream oss;
+
+    if (IsInlined()) {
+      oss << "[inlined blob] value:" << value_.ToString(output_hex);
+    } else {
+      oss << "[blob ref] file:" << file_number_ << " offset:" << offset_
+          << " size:" << size_
+          << " compression: " << CompressionTypeToString(compression_);
+    }
+
+    if (HasTTL()) {
+      oss << " exp:" << expiration_;
+    }
+
+    return oss.str();
+  }
+
+  static void EncodeInlinedTTL(std::string* dst, uint64_t expiration,
+                               const Slice& value) {
+    assert(dst != nullptr);
+    dst->clear();
+    dst->reserve(1 + kMaxVarint64Length + value.size());
+    dst->push_back(static_cast<char>(Type::kInlinedTTL));
+    PutVarint64(dst, expiration);
+    dst->append(value.data(), value.size());
+  }
+
+  static void EncodeBlob(std::string* dst, uint64_t file_number,
+                         uint64_t offset, uint64_t size,
+                         CompressionType compression) {
+    assert(dst != nullptr);
+    dst->clear();
+    dst->reserve(kMaxVarint64Length * 3 + 2);
+    dst->push_back(static_cast<char>(Type::kBlob));
+    PutVarint64(dst, file_number);
+    PutVarint64(dst, offset);
+    PutVarint64(dst, size);
+    dst->push_back(static_cast<char>(compression));
+  }
+
+  static void EncodeBlobTTL(std::string* dst, uint64_t expiration,
+                            uint64_t file_number, uint64_t offset,
+                            uint64_t size, CompressionType compression) {
+    assert(dst != nullptr);
+    dst->clear();
+    dst->reserve(kMaxVarint64Length * 4 + 2);
+    dst->push_back(static_cast<char>(Type::kBlobTTL));
+    PutVarint64(dst, expiration);
+    PutVarint64(dst, file_number);
+    PutVarint64(dst, offset);
+    PutVarint64(dst, size);
+    dst->push_back(static_cast<char>(compression));
+  }
+
+ private:
+  Type type_ = Type::kUnknown;
+  uint64_t expiration_ = 0;
+  Slice value_;
+  uint64_t file_number_ = 0;
+  uint64_t offset_ = 0;
+  uint64_t size_ = 0;
+  CompressionType compression_ = kNoCompression;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_format.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_format.cc
new file mode 100644
index 0000000000..8e26281e37
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_format.cc
@@ -0,0 +1,143 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include "db/blob/blob_log_format.h"
+
+#include "util/coding.h"
+#include "util/crc32c.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void BlobLogHeader::EncodeTo(std::string* dst) {
+  assert(dst != nullptr);
+  dst->clear();
+  dst->reserve(BlobLogHeader::kSize);
+  PutFixed32(dst, kMagicNumber);
+  PutFixed32(dst, version);
+  PutFixed32(dst, column_family_id);
+  unsigned char flags = (has_ttl ? 1 : 0);
+  dst->push_back(flags);
+  dst->push_back(compression);
+  PutFixed64(dst, expiration_range.first);
+  PutFixed64(dst, expiration_range.second);
+}
+
+Status BlobLogHeader::DecodeFrom(Slice src) {
+  const char* kErrorMessage = "Error while decoding blob log header";
+  if (src.size() != BlobLogHeader::kSize) {
+    return Status::Corruption(kErrorMessage,
+                              "Unexpected blob file header size");
+  }
+  uint32_t magic_number;
+  unsigned char flags;
+  if (!GetFixed32(&src, &magic_number) || !GetFixed32(&src, &version) ||
+      !GetFixed32(&src, &column_family_id)) {
+    return Status::Corruption(
+        kErrorMessage,
+        "Error decoding magic number, version and column family id");
+  }
+  if (magic_number != kMagicNumber) {
+    return Status::Corruption(kErrorMessage, "Magic number mismatch");
+  }
+  if (version != kVersion1) {
+    return Status::Corruption(kErrorMessage, "Unknown header version");
+  }
+  flags = src.data()[0];
+  compression = static_cast<CompressionType>(src.data()[1]);
+  has_ttl = (flags & 1) == 1;
+  src.remove_prefix(2);
+  if (!GetFixed64(&src, &expiration_range.first) ||
+      !GetFixed64(&src, &expiration_range.second)) {
+    return Status::Corruption(kErrorMessage, "Error decoding expiration range");
+  }
+  return Status::OK();
+}
+
+void BlobLogFooter::EncodeTo(std::string* dst) {
+  assert(dst != nullptr);
+  dst->clear();
+  dst->reserve(BlobLogFooter::kSize);
+  PutFixed32(dst, kMagicNumber);
+  PutFixed64(dst, blob_count);
+  PutFixed64(dst, expiration_range.first);
+  PutFixed64(dst, expiration_range.second);
+  crc = crc32c::Value(dst->c_str(), dst->size());
+  crc = crc32c::Mask(crc);
+  PutFixed32(dst, crc);
+}
+
+Status BlobLogFooter::DecodeFrom(Slice src) {
+  const char* kErrorMessage = "Error while decoding blob log footer";
+  if (src.size() != BlobLogFooter::kSize) {
+    return Status::Corruption(kErrorMessage,
+                              "Unexpected blob file footer size");
+  }
+  uint32_t src_crc = 0;
+  src_crc = crc32c::Value(src.data(), BlobLogFooter::kSize - sizeof(uint32_t));
+  src_crc = crc32c::Mask(src_crc);
+  uint32_t magic_number = 0;
+  if (!GetFixed32(&src, &magic_number) || !GetFixed64(&src, &blob_count) ||
+      !GetFixed64(&src, &expiration_range.first) ||
+      !GetFixed64(&src, &expiration_range.second) || !GetFixed32(&src, &crc)) {
+    return Status::Corruption(kErrorMessage, "Error decoding content");
+  }
+  if (magic_number != kMagicNumber) {
+    return Status::Corruption(kErrorMessage, "Magic number mismatch");
+  }
+  if (src_crc != crc) {
+    return Status::Corruption(kErrorMessage, "CRC mismatch");
+  }
+  return Status::OK();
+}
+
+void BlobLogRecord::EncodeHeaderTo(std::string* dst) {
+  assert(dst != nullptr);
+  dst->clear();
+  dst->reserve(BlobLogRecord::kHeaderSize + key.size() + value.size());
+  PutFixed64(dst, key.size());
+  PutFixed64(dst, value.size());
+  PutFixed64(dst, expiration);
+  header_crc = crc32c::Value(dst->c_str(), dst->size());
+  header_crc = crc32c::Mask(header_crc);
+  PutFixed32(dst, header_crc);
+  blob_crc = crc32c::Value(key.data(), key.size());
+  blob_crc = crc32c::Extend(blob_crc, value.data(), value.size());
+  blob_crc = crc32c::Mask(blob_crc);
+  PutFixed32(dst, blob_crc);
+}
+
+Status BlobLogRecord::DecodeHeaderFrom(Slice src) {
+  const char* kErrorMessage = "Error while decoding blob record";
+  if (src.size() != BlobLogRecord::kHeaderSize) {
+    return Status::Corruption(kErrorMessage,
+                              "Unexpected blob record header size");
+  }
+  uint32_t src_crc = 0;
+  src_crc = crc32c::Value(src.data(), BlobLogRecord::kHeaderSize - 8);
+  src_crc = crc32c::Mask(src_crc);
+  if (!GetFixed64(&src, &key_size) || !GetFixed64(&src, &value_size) ||
+      !GetFixed64(&src, &expiration) || !GetFixed32(&src, &header_crc) ||
+      !GetFixed32(&src, &blob_crc)) {
+    return Status::Corruption(kErrorMessage, "Error decoding content");
+  }
+  if (src_crc != header_crc) {
+    return Status::Corruption(kErrorMessage, "Header CRC mismatch");
+  }
+  return Status::OK();
+}
+
+Status BlobLogRecord::CheckBlobCRC() const {
+  uint32_t expected_crc = 0;
+  expected_crc = crc32c::Value(key.data(), key.size());
+  expected_crc = crc32c::Extend(expected_crc, value.data(), value.size());
+  expected_crc = crc32c::Mask(expected_crc);
+  if (expected_crc != blob_crc) {
+    return Status::Corruption("Blob CRC mismatch");
+  }
+  return Status::OK();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_format.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_format.h
new file mode 100644
index 0000000000..607db23678
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_format.h
@@ -0,0 +1,164 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Log format information shared by reader and writer.
+
+#pragma once
+
+#include <memory>
+#include <utility>
+
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+constexpr uint32_t kMagicNumber = 2395959;  // 0x00248f37
+constexpr uint32_t kVersion1 = 1;
+
+using ExpirationRange = std::pair<uint64_t, uint64_t>;
+
+// clang-format off
+
+// Format of blob log file header (30 bytes):
+//
+//    +--------------+---------+---------+-------+-------------+-------------------+
+//    | magic number | version |  cf id  | flags | compression | expiration range  |
+//    +--------------+---------+---------+-------+-------------+-------------------+
+//    |   Fixed32    | Fixed32 | Fixed32 | char  |    char     | Fixed64   Fixed64 |
+//    +--------------+---------+---------+-------+-------------+-------------------+
+//
+// List of flags:
+//   has_ttl: Whether the file contain TTL data.
+//
+// Expiration range in the header is a rough range based on
+// blob_db_options.ttl_range_secs.
+
+// clang-format on
+
+struct BlobLogHeader {
+  static constexpr size_t kSize = 30;
+
+  BlobLogHeader() = default;
+  BlobLogHeader(uint32_t _column_family_id, CompressionType _compression,
+                bool _has_ttl, const ExpirationRange& _expiration_range)
+      : column_family_id(_column_family_id),
+        compression(_compression),
+        has_ttl(_has_ttl),
+        expiration_range(_expiration_range) {}
+
+  uint32_t version = kVersion1;
+  uint32_t column_family_id = 0;
+  CompressionType compression = kNoCompression;
+  bool has_ttl = false;
+  ExpirationRange expiration_range;
+
+  void EncodeTo(std::string* dst);
+
+  Status DecodeFrom(Slice slice);
+};
+
+// clang-format off
+
+// Format of blob log file footer (32 bytes):
+//
+//    +--------------+------------+-------------------+------------+
+//    | magic number | blob count | expiration range  | footer CRC |
+//    +--------------+------------+-------------------+------------+
+//    |   Fixed32    |  Fixed64   | Fixed64 + Fixed64 |   Fixed32  |
+//    +--------------+------------+-------------------+------------+
+//
+// The footer will be presented only when the blob file is properly closed.
+//
+// Unlike the same field in file header, expiration range in the footer is the
+// range of smallest and largest expiration of the data in this file.
+
+// clang-format on
+
+struct BlobLogFooter {
+  static constexpr size_t kSize = 32;
+
+  uint64_t blob_count = 0;
+  ExpirationRange expiration_range = std::make_pair(0, 0);
+  uint32_t crc = 0;
+
+  void EncodeTo(std::string* dst);
+
+  Status DecodeFrom(Slice slice);
+};
+
+// clang-format off
+
+// Blob record format (32 bytes header + key + value):
+//
+//    +------------+--------------+------------+------------+----------+---------+-----------+
+//    | key length | value length | expiration | header CRC | blob CRC |   key   |   value   |
+//    +------------+--------------+------------+------------+----------+---------+-----------+
+//    |   Fixed64  |   Fixed64    |  Fixed64   |  Fixed32   | Fixed32  | key len | value len |
+//    +------------+--------------+------------+------------+----------+---------+-----------+
+//
+// If file has has_ttl = false, expiration field is always 0, and the blob
+// doesn't has expiration.
+//
+// Also note that if compression is used, value is compressed value and value
+// length is compressed value length.
+//
+// Header CRC is the checksum of (key_len + val_len + expiration), while
+// blob CRC is the checksum of (key + value).
+//
+// We could use variable length encoding (Varint64) to save more space, but it
+// make reader more complicated.
+
+// clang-format on
+
+struct BlobLogRecord {
+  // header include fields up to blob CRC
+  static constexpr size_t kHeaderSize = 32;
+
+  // Note that the offset field of BlobIndex actually points to the blob value
+  // as opposed to the start of the blob record. The following method can
+  // be used to calculate the adjustment needed to read the blob record header.
+  static constexpr uint64_t CalculateAdjustmentForRecordHeader(
+      uint64_t key_size) {
+    return key_size + kHeaderSize;
+  }
+
+  uint64_t key_size = 0;
+  uint64_t value_size = 0;
+  uint64_t expiration = 0;
+  uint32_t header_crc = 0;
+  uint32_t blob_crc = 0;
+  Slice key;
+  Slice value;
+  std::unique_ptr<char[]> key_buf;
+  std::unique_ptr<char[]> value_buf;
+
+  uint64_t record_size() const { return kHeaderSize + key_size + value_size; }
+
+  void EncodeHeaderTo(std::string* dst);
+
+  Status DecodeHeaderFrom(Slice src);
+
+  Status CheckBlobCRC() const;
+};
+
+// Checks whether a blob offset is potentially valid or not.
+inline bool IsValidBlobOffset(uint64_t value_offset, uint64_t key_size,
+                              uint64_t value_size, uint64_t file_size) {
+  if (value_offset <
+      BlobLogHeader::kSize + BlobLogRecord::kHeaderSize + key_size) {
+    return false;
+  }
+
+  if (value_offset + value_size + BlobLogFooter::kSize > file_size) {
+    return false;
+  }
+
+  return true;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_sequential_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_sequential_reader.cc
new file mode 100644
index 0000000000..2ed4306819
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_sequential_reader.cc
@@ -0,0 +1,134 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include "db/blob/blob_log_sequential_reader.h"
+
+#include "file/random_access_file_reader.h"
+#include "monitoring/statistics_impl.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+BlobLogSequentialReader::BlobLogSequentialReader(
+    std::unique_ptr<RandomAccessFileReader>&& file_reader, SystemClock* clock,
+    Statistics* statistics)
+    : file_(std::move(file_reader)),
+      clock_(clock),
+      statistics_(statistics),
+      next_byte_(0) {}
+
+BlobLogSequentialReader::~BlobLogSequentialReader() = default;
+
+Status BlobLogSequentialReader::ReadSlice(uint64_t size, Slice* slice,
+                                          char* buf) {
+  assert(slice);
+  assert(file_);
+
+  StopWatch read_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS);
+  // TODO: rate limit `BlobLogSequentialReader` reads (it appears unused?)
+  Status s =
+      file_->Read(IOOptions(), next_byte_, static_cast<size_t>(size), slice,
+                  buf, nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
+  next_byte_ += size;
+  if (!s.ok()) {
+    return s;
+  }
+  RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, slice->size());
+  if (slice->size() != size) {
+    return Status::Corruption("EOF reached while reading record");
+  }
+  return s;
+}
+
+Status BlobLogSequentialReader::ReadHeader(BlobLogHeader* header) {
+  assert(header);
+  assert(next_byte_ == 0);
+
+  static_assert(BlobLogHeader::kSize <= sizeof(header_buf_),
+                "Buffer is smaller than BlobLogHeader::kSize");
+
+  Status s = ReadSlice(BlobLogHeader::kSize, &buffer_, header_buf_);
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (buffer_.size() != BlobLogHeader::kSize) {
+    return Status::Corruption("EOF reached before file header");
+  }
+
+  return header->DecodeFrom(buffer_);
+}
+
+Status BlobLogSequentialReader::ReadRecord(BlobLogRecord* record,
+                                           ReadLevel level,
+                                           uint64_t* blob_offset) {
+  assert(record);
+  static_assert(BlobLogRecord::kHeaderSize <= sizeof(header_buf_),
+                "Buffer is smaller than BlobLogRecord::kHeaderSize");
+
+  Status s = ReadSlice(BlobLogRecord::kHeaderSize, &buffer_, header_buf_);
+  if (!s.ok()) {
+    return s;
+  }
+  if (buffer_.size() != BlobLogRecord::kHeaderSize) {
+    return Status::Corruption("EOF reached before record header");
+  }
+
+  s = record->DecodeHeaderFrom(buffer_);
+  if (!s.ok()) {
+    return s;
+  }
+
+  uint64_t kb_size = record->key_size + record->value_size;
+  if (blob_offset != nullptr) {
+    *blob_offset = next_byte_ + record->key_size;
+  }
+
+  switch (level) {
+    case kReadHeader:
+      next_byte_ += kb_size;
+      break;
+
+    case kReadHeaderKey:
+      record->key_buf.reset(new char[record->key_size]);
+      s = ReadSlice(record->key_size, &record->key, record->key_buf.get());
+      next_byte_ += record->value_size;
+      break;
+
+    case kReadHeaderKeyBlob:
+      record->key_buf.reset(new char[record->key_size]);
+      s = ReadSlice(record->key_size, &record->key, record->key_buf.get());
+      if (s.ok()) {
+        record->value_buf.reset(new char[record->value_size]);
+        s = ReadSlice(record->value_size, &record->value,
+                      record->value_buf.get());
+      }
+      if (s.ok()) {
+        s = record->CheckBlobCRC();
+      }
+      break;
+  }
+  return s;
+}
+
+Status BlobLogSequentialReader::ReadFooter(BlobLogFooter* footer) {
+  assert(footer);
+  static_assert(BlobLogFooter::kSize <= sizeof(header_buf_),
+                "Buffer is smaller than BlobLogFooter::kSize");
+
+  Status s = ReadSlice(BlobLogFooter::kSize, &buffer_, header_buf_);
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (buffer_.size() != BlobLogFooter::kSize) {
+    return Status::Corruption("EOF reached before file footer");
+  }
+
+  return footer->DecodeFrom(buffer_);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_sequential_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_sequential_reader.h
new file mode 100644
index 0000000000..98afa8518b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_sequential_reader.h
@@ -0,0 +1,83 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+#include <memory>
+
+#include "db/blob/blob_log_format.h"
+#include "rocksdb/slice.h"
+
+#define MAX_HEADER_SIZE(a, b, c) (a > b ? (a > c ? a : c) : (b > c ? b : c))
+
+namespace ROCKSDB_NAMESPACE {
+
+class RandomAccessFileReader;
+class Env;
+class Statistics;
+class Status;
+class SystemClock;
+
+/**
+ * BlobLogSequentialReader is a general purpose log stream reader
+ * implementation. The actual job of reading from the device is implemented by
+ * the RandomAccessFileReader interface.
+ *
+ * Please see BlobLogWriter for details on the file and record layout.
+ */
+
+class BlobLogSequentialReader {
+ public:
+  enum ReadLevel {
+    kReadHeader,
+    kReadHeaderKey,
+    kReadHeaderKeyBlob,
+  };
+
+  // Create a reader that will return log records from "*file_reader".
+  BlobLogSequentialReader(std::unique_ptr<RandomAccessFileReader>&& file_reader,
+                          SystemClock* clock, Statistics* statistics);
+
+  // No copying allowed
+  BlobLogSequentialReader(const BlobLogSequentialReader&) = delete;
+  BlobLogSequentialReader& operator=(const BlobLogSequentialReader&) = delete;
+
+  ~BlobLogSequentialReader();
+
+  Status ReadHeader(BlobLogHeader* header);
+
+  // Read the next record into *record.  Returns true if read
+  // successfully, false if we hit end of the input. The contents filled in
+  // *record will only be valid until the next mutating operation on this
+  // reader.
+  // If blob_offset is non-null, return offset of the blob through it.
+  Status ReadRecord(BlobLogRecord* record, ReadLevel level = kReadHeader,
+                    uint64_t* blob_offset = nullptr);
+
+  Status ReadFooter(BlobLogFooter* footer);
+
+  void ResetNextByte() { next_byte_ = 0; }
+
+  uint64_t GetNextByte() const { return next_byte_; }
+
+ private:
+  Status ReadSlice(uint64_t size, Slice* slice, char* buf);
+
+  const std::unique_ptr<RandomAccessFileReader> file_;
+  SystemClock* clock_;
+
+  Statistics* statistics_;
+
+  Slice buffer_;
+  char header_buf_[MAX_HEADER_SIZE(BlobLogHeader::kSize, BlobLogFooter::kSize,
+                                   BlobLogRecord::kHeaderSize)];
+
+  // which byte to read next
+  uint64_t next_byte_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#undef MAX_HEADER_SIZE
\ No newline at end of file
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_writer.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_writer.cc
new file mode 100644
index 0000000000..bf5ef27c1d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_writer.cc
@@ -0,0 +1,178 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/blob/blob_log_writer.h"
+
+#include <cstdint>
+#include <string>
+
+#include "db/blob/blob_log_format.h"
+#include "file/writable_file_writer.h"
+#include "monitoring/statistics_impl.h"
+#include "rocksdb/system_clock.h"
+#include "test_util/sync_point.h"
+#include "util/coding.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+BlobLogWriter::BlobLogWriter(std::unique_ptr<WritableFileWriter>&& dest,
+                             SystemClock* clock, Statistics* statistics,
+                             uint64_t log_number, bool use_fs, bool do_flush,
+                             uint64_t boffset)
+    : dest_(std::move(dest)),
+      clock_(clock),
+      statistics_(statistics),
+      log_number_(log_number),
+      block_offset_(boffset),
+      use_fsync_(use_fs),
+      do_flush_(do_flush),
+      last_elem_type_(kEtNone) {}
+
+BlobLogWriter::~BlobLogWriter() = default;
+
+Status BlobLogWriter::Sync() {
+  TEST_SYNC_POINT("BlobLogWriter::Sync");
+
+  StopWatch sync_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_SYNC_MICROS);
+  Status s = dest_->Sync(use_fsync_);
+  RecordTick(statistics_, BLOB_DB_BLOB_FILE_SYNCED);
+  return s;
+}
+
+Status BlobLogWriter::WriteHeader(BlobLogHeader& header) {
+  assert(block_offset_ == 0);
+  assert(last_elem_type_ == kEtNone);
+  std::string str;
+  header.EncodeTo(&str);
+
+  Status s = dest_->Append(Slice(str));
+  if (s.ok()) {
+    block_offset_ += str.size();
+    if (do_flush_) {
+      s = dest_->Flush();
+    }
+  }
+  last_elem_type_ = kEtFileHdr;
+  RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN,
+             BlobLogHeader::kSize);
+  return s;
+}
+
+Status BlobLogWriter::AppendFooter(BlobLogFooter& footer,
+                                   std::string* checksum_method,
+                                   std::string* checksum_value) {
+  assert(block_offset_ != 0);
+  assert(last_elem_type_ == kEtFileHdr || last_elem_type_ == kEtRecord);
+
+  std::string str;
+  footer.EncodeTo(&str);
+
+  Status s;
+  if (dest_->seen_error()) {
+    s.PermitUncheckedError();
+    return Status::IOError("Seen Error. Skip closing.");
+  } else {
+    s = dest_->Append(Slice(str));
+    if (s.ok()) {
+      block_offset_ += str.size();
+
+      s = Sync();
+
+      if (s.ok()) {
+        s = dest_->Close();
+
+        if (s.ok()) {
+          assert(!!checksum_method == !!checksum_value);
+
+          if (checksum_method) {
+            assert(checksum_method->empty());
+
+            std::string method = dest_->GetFileChecksumFuncName();
+            if (method != kUnknownFileChecksumFuncName) {
+              *checksum_method = std::move(method);
+            }
+          }
+          if (checksum_value) {
+            assert(checksum_value->empty());
+
+            std::string value = dest_->GetFileChecksum();
+            if (value != kUnknownFileChecksum) {
+              *checksum_value = std::move(value);
+            }
+          }
+        }
+      }
+    }
+
+    dest_.reset();
+  }
+
+  last_elem_type_ = kEtFileFooter;
+  RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN,
+             BlobLogFooter::kSize);
+  return s;
+}
+
+Status BlobLogWriter::AddRecord(const Slice& key, const Slice& val,
+                                uint64_t expiration, uint64_t* key_offset,
+                                uint64_t* blob_offset) {
+  assert(block_offset_ != 0);
+  assert(last_elem_type_ == kEtFileHdr || last_elem_type_ == kEtRecord);
+
+  std::string buf;
+  ConstructBlobHeader(&buf, key, val, expiration);
+
+  Status s = EmitPhysicalRecord(buf, key, val, key_offset, blob_offset);
+  return s;
+}
+
+Status BlobLogWriter::AddRecord(const Slice& key, const Slice& val,
+                                uint64_t* key_offset, uint64_t* blob_offset) {
+  assert(block_offset_ != 0);
+  assert(last_elem_type_ == kEtFileHdr || last_elem_type_ == kEtRecord);
+
+  std::string buf;
+  ConstructBlobHeader(&buf, key, val, 0);
+
+  Status s = EmitPhysicalRecord(buf, key, val, key_offset, blob_offset);
+  return s;
+}
+
+void BlobLogWriter::ConstructBlobHeader(std::string* buf, const Slice& key,
+                                        const Slice& val, uint64_t expiration) {
+  BlobLogRecord record;
+  record.key = key;
+  record.value = val;
+  record.expiration = expiration;
+  record.EncodeHeaderTo(buf);
+}
+
+Status BlobLogWriter::EmitPhysicalRecord(const std::string& headerbuf,
+                                         const Slice& key, const Slice& val,
+                                         uint64_t* key_offset,
+                                         uint64_t* blob_offset) {
+  StopWatch write_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_WRITE_MICROS);
+  Status s = dest_->Append(Slice(headerbuf));
+  if (s.ok()) {
+    s = dest_->Append(key);
+  }
+  if (s.ok()) {
+    s = dest_->Append(val);
+  }
+  if (do_flush_ && s.ok()) {
+    s = dest_->Flush();
+  }
+
+  *key_offset = block_offset_ + BlobLogRecord::kHeaderSize;
+  *blob_offset = *key_offset + key.size();
+  block_offset_ = *blob_offset + val.size();
+  last_elem_type_ = kEtRecord;
+  RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN,
+             BlobLogRecord::kHeaderSize + key.size() + val.size());
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_writer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_writer.h
new file mode 100644
index 0000000000..c1f9f31ad0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_log_writer.h
@@ -0,0 +1,83 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "db/blob/blob_log_format.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class WritableFileWriter;
+class SystemClock;
+/**
+ * BlobLogWriter is the blob log stream writer. It provides an append-only
+ * abstraction for writing blob data.
+ *
+ *
+ * Look at blob_db_format.h to see the details of the record formats.
+ */
+
+class BlobLogWriter {
+ public:
+  // Create a writer that will append data to "*dest".
+  // "*dest" must be initially empty.
+  // "*dest" must remain live while this BlobLogWriter is in use.
+  BlobLogWriter(std::unique_ptr<WritableFileWriter>&& dest, SystemClock* clock,
+                Statistics* statistics, uint64_t log_number, bool use_fsync,
+                bool do_flush, uint64_t boffset = 0);
+  // No copying allowed
+  BlobLogWriter(const BlobLogWriter&) = delete;
+  BlobLogWriter& operator=(const BlobLogWriter&) = delete;
+
+  ~BlobLogWriter();
+
+  static void ConstructBlobHeader(std::string* buf, const Slice& key,
+                                  const Slice& val, uint64_t expiration);
+
+  Status AddRecord(const Slice& key, const Slice& val, uint64_t* key_offset,
+                   uint64_t* blob_offset);
+
+  Status AddRecord(const Slice& key, const Slice& val, uint64_t expiration,
+                   uint64_t* key_offset, uint64_t* blob_offset);
+
+  Status EmitPhysicalRecord(const std::string& headerbuf, const Slice& key,
+                            const Slice& val, uint64_t* key_offset,
+                            uint64_t* blob_offset);
+
+  Status AppendFooter(BlobLogFooter& footer, std::string* checksum_method,
+                      std::string* checksum_value);
+
+  Status WriteHeader(BlobLogHeader& header);
+
+  WritableFileWriter* file() { return dest_.get(); }
+
+  const WritableFileWriter* file() const { return dest_.get(); }
+
+  uint64_t get_log_number() const { return log_number_; }
+
+  Status Sync();
+
+ private:
+  std::unique_ptr<WritableFileWriter> dest_;
+  SystemClock* clock_;
+  Statistics* statistics_;
+  uint64_t log_number_;
+  uint64_t block_offset_;  // Current offset in block
+  bool use_fsync_;
+  bool do_flush_;
+
+ public:
+  enum ElemType { kEtNone, kEtFileHdr, kEtRecord, kEtFileFooter };
+  ElemType last_elem_type_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_read_request.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_read_request.h
new file mode 100644
index 0000000000..f9668ca2ef
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_read_request.h
@@ -0,0 +1,58 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cinttypes>
+
+#include "rocksdb/compression_type.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A read Blob request structure for use in BlobSource::MultiGetBlob and
+// BlobFileReader::MultiGetBlob.
+struct BlobReadRequest {
+  // User key to lookup the paired blob
+  const Slice* user_key = nullptr;
+
+  // File offset in bytes
+  uint64_t offset = 0;
+
+  // Length to read in bytes
+  size_t len = 0;
+
+  // Blob compression type
+  CompressionType compression = kNoCompression;
+
+  // Output parameter set by MultiGetBlob() to point to the data buffer, and
+  // the number of valid bytes
+  PinnableSlice* result = nullptr;
+
+  // Status of read
+  Status* status = nullptr;
+
+  BlobReadRequest(const Slice& _user_key, uint64_t _offset, size_t _len,
+                  CompressionType _compression, PinnableSlice* _result,
+                  Status* _status)
+      : user_key(&_user_key),
+        offset(_offset),
+        len(_len),
+        compression(_compression),
+        result(_result),
+        status(_status) {}
+
+  BlobReadRequest() = default;
+  BlobReadRequest(const BlobReadRequest& other) = default;
+  BlobReadRequest& operator=(const BlobReadRequest& other) = default;
+};
+
+using BlobFileReadRequests =
+    std::tuple<uint64_t /* file_number */, uint64_t /* file_size */,
+               autovector<BlobReadRequest>>;
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_source.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_source.cc
new file mode 100644
index 0000000000..b524982e53
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_source.cc
@@ -0,0 +1,459 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/blob/blob_source.h"
+
+#include <cassert>
+#include <string>
+
+#include "cache/cache_reservation_manager.h"
+#include "cache/charged_cache.h"
+#include "db/blob/blob_contents.h"
+#include "db/blob/blob_file_reader.h"
+#include "db/blob/blob_log_format.h"
+#include "monitoring/statistics_impl.h"
+#include "options/cf_options.h"
+#include "table/get_context.h"
+#include "table/multiget_context.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+BlobSource::BlobSource(const ImmutableOptions* immutable_options,
+                       const std::string& db_id,
+                       const std::string& db_session_id,
+                       BlobFileCache* blob_file_cache)
+    : db_id_(db_id),
+      db_session_id_(db_session_id),
+      statistics_(immutable_options->statistics.get()),
+      blob_file_cache_(blob_file_cache),
+      blob_cache_(immutable_options->blob_cache),
+      lowest_used_cache_tier_(immutable_options->lowest_used_cache_tier) {
+  auto bbto =
+      immutable_options->table_factory->GetOptions<BlockBasedTableOptions>();
+  if (bbto &&
+      bbto->cache_usage_options.options_overrides.at(CacheEntryRole::kBlobCache)
+              .charged == CacheEntryRoleOptions::Decision::kEnabled) {
+    blob_cache_ = SharedCacheInterface{std::make_shared<ChargedCache>(
+        immutable_options->blob_cache, bbto->block_cache)};
+  }
+}
+
+BlobSource::~BlobSource() = default;
+
+Status BlobSource::GetBlobFromCache(
+    const Slice& cache_key, CacheHandleGuard<BlobContents>* cached_blob) const {
+  assert(blob_cache_);
+  assert(!cache_key.empty());
+  assert(cached_blob);
+  assert(cached_blob->IsEmpty());
+
+  Cache::Handle* cache_handle = nullptr;
+  cache_handle = GetEntryFromCache(cache_key);
+  if (cache_handle != nullptr) {
+    *cached_blob =
+        CacheHandleGuard<BlobContents>(blob_cache_.get(), cache_handle);
+
+    assert(cached_blob->GetValue());
+
+    PERF_COUNTER_ADD(blob_cache_hit_count, 1);
+    RecordTick(statistics_, BLOB_DB_CACHE_HIT);
+    RecordTick(statistics_, BLOB_DB_CACHE_BYTES_READ,
+               cached_blob->GetValue()->size());
+
+    return Status::OK();
+  }
+
+  RecordTick(statistics_, BLOB_DB_CACHE_MISS);
+
+  return Status::NotFound("Blob not found in cache");
+}
+
+Status BlobSource::PutBlobIntoCache(
+    const Slice& cache_key, std::unique_ptr<BlobContents>* blob,
+    CacheHandleGuard<BlobContents>* cached_blob) const {
+  assert(blob_cache_);
+  assert(!cache_key.empty());
+  assert(blob);
+  assert(*blob);
+  assert(cached_blob);
+  assert(cached_blob->IsEmpty());
+
+  TypedHandle* cache_handle = nullptr;
+  const Status s = InsertEntryIntoCache(cache_key, blob->get(),
+                                        &cache_handle, Cache::Priority::BOTTOM);
+  if (s.ok()) {
+    blob->release();
+
+    assert(cache_handle != nullptr);
+    *cached_blob =
+        CacheHandleGuard<BlobContents>(blob_cache_.get(), cache_handle);
+
+    assert(cached_blob->GetValue());
+
+    RecordTick(statistics_, BLOB_DB_CACHE_ADD);
+    RecordTick(statistics_, BLOB_DB_CACHE_BYTES_WRITE,
+               cached_blob->GetValue()->size());
+
+  } else {
+    RecordTick(statistics_, BLOB_DB_CACHE_ADD_FAILURES);
+  }
+
+  return s;
+}
+
+BlobSource::TypedHandle* BlobSource::GetEntryFromCache(const Slice& key) const {
+  return blob_cache_.LookupFull(key, nullptr /* context */,
+                                Cache::Priority::BOTTOM, statistics_,
+                                lowest_used_cache_tier_);
+}
+
+void BlobSource::PinCachedBlob(CacheHandleGuard<BlobContents>* cached_blob,
+                               PinnableSlice* value) {
+  assert(cached_blob);
+  assert(cached_blob->GetValue());
+  assert(value);
+
+  // To avoid copying the cached blob into the buffer provided by the
+  // application, we can simply transfer ownership of the cache handle to
+  // the target PinnableSlice. This has the potential to save a lot of
+  // CPU, especially with large blob values.
+
+  value->Reset();
+
+  constexpr Cleanable* cleanable = nullptr;
+  value->PinSlice(cached_blob->GetValue()->data(), cleanable);
+
+  cached_blob->TransferTo(value);
+}
+
+void BlobSource::PinOwnedBlob(std::unique_ptr<BlobContents>* owned_blob,
+                              PinnableSlice* value) {
+  assert(owned_blob);
+  assert(*owned_blob);
+  assert(value);
+
+  BlobContents* const blob = owned_blob->release();
+  assert(blob);
+
+  value->Reset();
+  value->PinSlice(
+      blob->data(),
+      [](void* arg1, void* /* arg2 */) {
+        delete static_cast<BlobContents*>(arg1);
+      },
+      blob, nullptr);
+}
+
+Status BlobSource::InsertEntryIntoCache(const Slice& key, BlobContents* value,
+                                        TypedHandle** cache_handle,
+                                        Cache::Priority priority) const {
+  return blob_cache_.InsertFull(key, value, value->ApproximateMemoryUsage(),
+                                cache_handle, priority,
+                                lowest_used_cache_tier_);
+}
+
+Status BlobSource::GetBlob(const ReadOptions& read_options,
+                           const Slice& user_key, uint64_t file_number,
+                           uint64_t offset, uint64_t file_size,
+                           uint64_t value_size,
+                           CompressionType compression_type,
+                           FilePrefetchBuffer* prefetch_buffer,
+                           PinnableSlice* value, uint64_t* bytes_read) {
+  assert(value);
+
+  Status s;
+
+  const CacheKey cache_key = GetCacheKey(file_number, file_size, offset);
+
+  CacheHandleGuard<BlobContents> blob_handle;
+
+  // First, try to get the blob from the cache
+  //
+  // If blob cache is enabled, we'll try to read from it.
+  if (blob_cache_) {
+    Slice key = cache_key.AsSlice();
+    s = GetBlobFromCache(key, &blob_handle);
+    if (s.ok()) {
+      PinCachedBlob(&blob_handle, value);
+
+      // For consistency, the size of on-disk (possibly compressed) blob record
+      // is assigned to bytes_read.
+      uint64_t adjustment =
+          read_options.verify_checksums
+              ? BlobLogRecord::CalculateAdjustmentForRecordHeader(
+                    user_key.size())
+              : 0;
+      assert(offset >= adjustment);
+
+      uint64_t record_size = value_size + adjustment;
+      if (bytes_read) {
+        *bytes_read = record_size;
+      }
+      return s;
+    }
+  }
+
+  assert(blob_handle.IsEmpty());
+
+  const bool no_io = read_options.read_tier == kBlockCacheTier;
+  if (no_io) {
+    s = Status::Incomplete("Cannot read blob(s): no disk I/O allowed");
+    return s;
+  }
+
+  // Can't find the blob from the cache. Since I/O is allowed, read from the
+  // file.
+  std::unique_ptr<BlobContents> blob_contents;
+
+  {
+    CacheHandleGuard<BlobFileReader> blob_file_reader;
+    s = blob_file_cache_->GetBlobFileReader(read_options, file_number,
+                                            &blob_file_reader);
+    if (!s.ok()) {
+      return s;
+    }
+
+    assert(blob_file_reader.GetValue());
+
+    if (compression_type != blob_file_reader.GetValue()->GetCompressionType()) {
+      return Status::Corruption("Compression type mismatch when reading blob");
+    }
+
+    MemoryAllocator* const allocator =
+        (blob_cache_ && read_options.fill_cache)
+            ? blob_cache_.get()->memory_allocator()
+            : nullptr;
+
+    uint64_t read_size = 0;
+    s = blob_file_reader.GetValue()->GetBlob(
+        read_options, user_key, offset, value_size, compression_type,
+        prefetch_buffer, allocator, &blob_contents, &read_size);
+    if (!s.ok()) {
+      return s;
+    }
+    if (bytes_read) {
+      *bytes_read = read_size;
+    }
+  }
+
+  if (blob_cache_ && read_options.fill_cache) {
+    // If filling cache is allowed and a cache is configured, try to put the
+    // blob to the cache.
+    Slice key = cache_key.AsSlice();
+    s = PutBlobIntoCache(key, &blob_contents, &blob_handle);
+    if (!s.ok()) {
+      return s;
+    }
+
+    PinCachedBlob(&blob_handle, value);
+  } else {
+    PinOwnedBlob(&blob_contents, value);
+  }
+
+  assert(s.ok());
+  return s;
+}
+
+void BlobSource::MultiGetBlob(const ReadOptions& read_options,
+                              autovector<BlobFileReadRequests>& blob_reqs,
+                              uint64_t* bytes_read) {
+  assert(blob_reqs.size() > 0);
+
+  uint64_t total_bytes_read = 0;
+  uint64_t bytes_read_in_file = 0;
+
+  for (auto& [file_number, file_size, blob_reqs_in_file] : blob_reqs) {
+    // sort blob_reqs_in_file by file offset.
+    std::sort(
+        blob_reqs_in_file.begin(), blob_reqs_in_file.end(),
+        [](const BlobReadRequest& lhs, const BlobReadRequest& rhs) -> bool {
+          return lhs.offset < rhs.offset;
+        });
+
+    MultiGetBlobFromOneFile(read_options, file_number, file_size,
+                            blob_reqs_in_file, &bytes_read_in_file);
+
+    total_bytes_read += bytes_read_in_file;
+  }
+
+  if (bytes_read) {
+    *bytes_read = total_bytes_read;
+  }
+}
+
+void BlobSource::MultiGetBlobFromOneFile(const ReadOptions& read_options,
+                                         uint64_t file_number,
+                                         uint64_t /*file_size*/,
+                                         autovector<BlobReadRequest>& blob_reqs,
+                                         uint64_t* bytes_read) {
+  const size_t num_blobs = blob_reqs.size();
+  assert(num_blobs > 0);
+  assert(num_blobs <= MultiGetContext::MAX_BATCH_SIZE);
+
+#ifndef NDEBUG
+  for (size_t i = 0; i < num_blobs - 1; ++i) {
+    assert(blob_reqs[i].offset <= blob_reqs[i + 1].offset);
+  }
+#endif  // !NDEBUG
+
+  using Mask = uint64_t;
+  Mask cache_hit_mask = 0;
+
+  uint64_t total_bytes = 0;
+  const OffsetableCacheKey base_cache_key(db_id_, db_session_id_, file_number);
+
+  if (blob_cache_) {
+    size_t cached_blob_count = 0;
+    for (size_t i = 0; i < num_blobs; ++i) {
+      auto& req = blob_reqs[i];
+
+      CacheHandleGuard<BlobContents> blob_handle;
+      const CacheKey cache_key = base_cache_key.WithOffset(req.offset);
+      const Slice key = cache_key.AsSlice();
+
+      const Status s = GetBlobFromCache(key, &blob_handle);
+
+      if (s.ok()) {
+        assert(req.status);
+        *req.status = s;
+
+        PinCachedBlob(&blob_handle, req.result);
+
+        // Update the counter for the number of valid blobs read from the cache.
+        ++cached_blob_count;
+
+        // For consistency, the size of each on-disk (possibly compressed) blob
+        // record is accumulated to total_bytes.
+        uint64_t adjustment =
+            read_options.verify_checksums
+                ? BlobLogRecord::CalculateAdjustmentForRecordHeader(
+                      req.user_key->size())
+                : 0;
+        assert(req.offset >= adjustment);
+        total_bytes += req.len + adjustment;
+        cache_hit_mask |= (Mask{1} << i);  // cache hit
+      }
+    }
+
+    // All blobs were read from the cache.
+    if (cached_blob_count == num_blobs) {
+      if (bytes_read) {
+        *bytes_read = total_bytes;
+      }
+      return;
+    }
+  }
+
+  const bool no_io = read_options.read_tier == kBlockCacheTier;
+  if (no_io) {
+    for (size_t i = 0; i < num_blobs; ++i) {
+      if (!(cache_hit_mask & (Mask{1} << i))) {
+        BlobReadRequest& req = blob_reqs[i];
+        assert(req.status);
+
+        *req.status =
+            Status::Incomplete("Cannot read blob(s): no disk I/O allowed");
+      }
+    }
+    return;
+  }
+
+  {
+    // Find the rest of blobs from the file since I/O is allowed.
+    autovector<std::pair<BlobReadRequest*, std::unique_ptr<BlobContents>>>
+        _blob_reqs;
+    uint64_t _bytes_read = 0;
+
+    for (size_t i = 0; i < num_blobs; ++i) {
+      if (!(cache_hit_mask & (Mask{1} << i))) {
+        _blob_reqs.emplace_back(&blob_reqs[i], std::unique_ptr<BlobContents>());
+      }
+    }
+
+    CacheHandleGuard<BlobFileReader> blob_file_reader;
+    Status s = blob_file_cache_->GetBlobFileReader(read_options, file_number,
+                                                   &blob_file_reader);
+    if (!s.ok()) {
+      for (size_t i = 0; i < _blob_reqs.size(); ++i) {
+        BlobReadRequest* const req = _blob_reqs[i].first;
+        assert(req);
+        assert(req->status);
+
+        *req->status = s;
+      }
+      return;
+    }
+
+    assert(blob_file_reader.GetValue());
+
+    MemoryAllocator* const allocator =
+        (blob_cache_ && read_options.fill_cache)
+            ? blob_cache_.get()->memory_allocator()
+            : nullptr;
+
+    blob_file_reader.GetValue()->MultiGetBlob(read_options, allocator,
+                                              _blob_reqs, &_bytes_read);
+
+    if (blob_cache_ && read_options.fill_cache) {
+      // If filling cache is allowed and a cache is configured, try to put
+      // the blob(s) to the cache.
+      for (auto& [req, blob_contents] : _blob_reqs) {
+        assert(req);
+
+        if (req->status->ok()) {
+          CacheHandleGuard<BlobContents> blob_handle;
+          const CacheKey cache_key = base_cache_key.WithOffset(req->offset);
+          const Slice key = cache_key.AsSlice();
+          s = PutBlobIntoCache(key, &blob_contents, &blob_handle);
+          if (!s.ok()) {
+            *req->status = s;
+          } else {
+            PinCachedBlob(&blob_handle, req->result);
+          }
+        }
+      }
+    } else {
+      for (auto& [req, blob_contents] : _blob_reqs) {
+        assert(req);
+
+        if (req->status->ok()) {
+          PinOwnedBlob(&blob_contents, req->result);
+        }
+      }
+    }
+
+    total_bytes += _bytes_read;
+    if (bytes_read) {
+      *bytes_read = total_bytes;
+    }
+  }
+}
+
+bool BlobSource::TEST_BlobInCache(uint64_t file_number, uint64_t file_size,
+                                  uint64_t offset, size_t* charge) const {
+  const CacheKey cache_key = GetCacheKey(file_number, file_size, offset);
+  const Slice key = cache_key.AsSlice();
+
+  CacheHandleGuard<BlobContents> blob_handle;
+  const Status s = GetBlobFromCache(key, &blob_handle);
+
+  if (s.ok() && blob_handle.GetValue() != nullptr) {
+    if (charge) {
+      const Cache* const cache = blob_handle.GetCache();
+      assert(cache);
+
+      Cache::Handle* const handle = blob_handle.GetCacheHandle();
+      assert(handle);
+
+      *charge = cache->GetUsage(handle);
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_source.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_source.h
new file mode 100644
index 0000000000..d5e009b54d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/blob_source.h
@@ -0,0 +1,161 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cinttypes>
+#include <memory>
+
+#include "cache/cache_key.h"
+#include "cache/typed_cache.h"
+#include "db/blob/blob_contents.h"
+#include "db/blob/blob_file_cache.h"
+#include "db/blob/blob_read_request.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "table/block_based/cachable_entry.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct ImmutableOptions;
+class Status;
+class FilePrefetchBuffer;
+class Slice;
+
+// BlobSource is a class that provides universal access to blobs, regardless of
+// whether they are in the blob cache, secondary cache, or (remote) storage.
+// Depending on user settings, it always fetch blobs from multi-tier cache and
+// storage with minimal cost.
+class BlobSource {
+ public:
+  BlobSource(const ImmutableOptions* immutable_options,
+             const std::string& db_id, const std::string& db_session_id,
+             BlobFileCache* blob_file_cache);
+
+  BlobSource(const BlobSource&) = delete;
+  BlobSource& operator=(const BlobSource&) = delete;
+
+  ~BlobSource();
+
+  // Read a blob from the underlying cache or one blob file.
+  //
+  // If successful, returns ok and sets "*value" to the newly retrieved
+  // uncompressed blob. If there was an error while fetching the blob, sets
+  // "*value" to empty and returns a non-ok status.
+  //
+  // Note: For consistency, whether the blob is found in the cache or on disk,
+  // sets "*bytes_read" to the size of on-disk (possibly compressed) blob
+  // record.
+  Status GetBlob(const ReadOptions& read_options, const Slice& user_key,
+                 uint64_t file_number, uint64_t offset, uint64_t file_size,
+                 uint64_t value_size, CompressionType compression_type,
+                 FilePrefetchBuffer* prefetch_buffer, PinnableSlice* value,
+                 uint64_t* bytes_read);
+
+  // Read multiple blobs from the underlying cache or blob file(s).
+  //
+  // If successful, returns ok and sets "result" in the elements of "blob_reqs"
+  // to the newly retrieved uncompressed blobs. If there was an error while
+  // fetching one of blobs, sets its "result" to empty and sets its
+  // corresponding "status" to a non-ok status.
+  //
+  // Note:
+  //  - The main difference between this function and MultiGetBlobFromOneFile is
+  //    that this function can read multiple blobs from multiple blob files.
+  //
+  //  - For consistency, whether the blob is found in the cache or on disk, sets
+  //  "*bytes_read" to the total size of on-disk (possibly compressed) blob
+  //  records.
+  void MultiGetBlob(const ReadOptions& read_options,
+                    autovector<BlobFileReadRequests>& blob_reqs,
+                    uint64_t* bytes_read);
+
+  // Read multiple blobs from the underlying cache or one blob file.
+  //
+  // If successful, returns ok and sets "result" in the elements of "blob_reqs"
+  // to the newly retrieved uncompressed blobs. If there was an error while
+  // fetching one of blobs, sets its "result" to empty and sets its
+  // corresponding "status" to a non-ok status.
+  //
+  // Note:
+  //  - The main difference between this function and MultiGetBlob is that this
+  //  function is only used for the case where the demanded blobs are stored in
+  //  one blob file. MultiGetBlob will call this function multiple times if the
+  //  demanded blobs are stored in multiple blob files.
+  //
+  //  - For consistency, whether the blob is found in the cache or on disk, sets
+  //  "*bytes_read" to the total size of on-disk (possibly compressed) blob
+  //  records.
+  void MultiGetBlobFromOneFile(const ReadOptions& read_options,
+                               uint64_t file_number, uint64_t file_size,
+                               autovector<BlobReadRequest>& blob_reqs,
+                               uint64_t* bytes_read);
+
+  inline Status GetBlobFileReader(
+      const ReadOptions& read_options, uint64_t blob_file_number,
+      CacheHandleGuard<BlobFileReader>* blob_file_reader) {
+    return blob_file_cache_->GetBlobFileReader(read_options, blob_file_number,
+                                               blob_file_reader);
+  }
+
+  inline Cache* GetBlobCache() const { return blob_cache_.get(); }
+
+  bool TEST_BlobInCache(uint64_t file_number, uint64_t file_size,
+                        uint64_t offset, size_t* charge = nullptr) const;
+
+  // For TypedSharedCacheInterface
+  void Create(BlobContents** out, const char* buf, size_t size,
+              MemoryAllocator* alloc);
+
+  using SharedCacheInterface =
+      FullTypedSharedCacheInterface<BlobContents, BlobContentsCreator>;
+  using TypedHandle = SharedCacheInterface::TypedHandle;
+
+ private:
+  Status GetBlobFromCache(const Slice& cache_key,
+                          CacheHandleGuard<BlobContents>* cached_blob) const;
+
+  Status PutBlobIntoCache(const Slice& cache_key,
+                          std::unique_ptr<BlobContents>* blob,
+                          CacheHandleGuard<BlobContents>* cached_blob) const;
+
+  static void PinCachedBlob(CacheHandleGuard<BlobContents>* cached_blob,
+                            PinnableSlice* value);
+
+  static void PinOwnedBlob(std::unique_ptr<BlobContents>* owned_blob,
+                           PinnableSlice* value);
+
+  TypedHandle* GetEntryFromCache(const Slice& key) const;
+
+  Status InsertEntryIntoCache(const Slice& key, BlobContents* value,
+                              TypedHandle** cache_handle,
+                              Cache::Priority priority) const;
+
+  inline CacheKey GetCacheKey(uint64_t file_number, uint64_t /*file_size*/,
+                              uint64_t offset) const {
+    OffsetableCacheKey base_cache_key(db_id_, db_session_id_, file_number);
+    return base_cache_key.WithOffset(offset);
+  }
+
+  const std::string& db_id_;
+  const std::string& db_session_id_;
+
+  Statistics* statistics_;
+
+  // A cache to store blob file reader.
+  BlobFileCache* blob_file_cache_;
+
+  // A cache to store uncompressed blobs.
+  mutable SharedCacheInterface blob_cache_;
+
+  // The control option of how the cache tiers will be used. Currently rocksdb
+  // support block/blob cache (volatile tier) and secondary cache (this tier
+  // isn't strictly speaking a non-volatile tier since the compressed cache in
+  // this tier is in volatile memory).
+  const CacheTier lowest_used_cache_tier_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/prefetch_buffer_collection.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/prefetch_buffer_collection.cc
new file mode 100644
index 0000000000..079576f518
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/prefetch_buffer_collection.cc
@@ -0,0 +1,21 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/blob/prefetch_buffer_collection.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+FilePrefetchBuffer* PrefetchBufferCollection::GetOrCreatePrefetchBuffer(
+    uint64_t file_number) {
+  auto& prefetch_buffer = prefetch_buffers_[file_number];
+  if (!prefetch_buffer) {
+    prefetch_buffer.reset(
+        new FilePrefetchBuffer(readahead_size_, readahead_size_));
+  }
+
+  return prefetch_buffer.get();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/prefetch_buffer_collection.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/prefetch_buffer_collection.h
new file mode 100644
index 0000000000..b973eddc04
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/blob/prefetch_buffer_collection.h
@@ -0,0 +1,38 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <unordered_map>
+
+#include "file/file_prefetch_buffer.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A class that owns a collection of FilePrefetchBuffers using the file number
+// as key. Used for implementing compaction readahead for blob files. Designed
+// to be accessed by a single thread only: every (sub)compaction needs its own
+// buffers since they are guaranteed to read different blobs from different
+// positions even when reading the same file.
+class PrefetchBufferCollection {
+ public:
+  explicit PrefetchBufferCollection(uint64_t readahead_size)
+      : readahead_size_(readahead_size) {
+    assert(readahead_size_ > 0);
+  }
+
+  FilePrefetchBuffer* GetOrCreatePrefetchBuffer(uint64_t file_number);
+
+ private:
+  uint64_t readahead_size_;
+  std::unordered_map<uint64_t, std::unique_ptr<FilePrefetchBuffer>>
+      prefetch_buffers_;  // maps file number to prefetch buffer
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/builder.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/builder.cc
new file mode 100644
index 0000000000..a99bb57e8c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/builder.cc
@@ -0,0 +1,447 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/builder.h"
+
+#include <algorithm>
+#include <deque>
+#include <vector>
+
+#include "db/blob/blob_file_builder.h"
+#include "db/compaction/compaction_iterator.h"
+#include "db/event_helpers.h"
+#include "db/internal_stats.h"
+#include "db/merge_helper.h"
+#include "db/output_validator.h"
+#include "db/range_del_aggregator.h"
+#include "db/table_cache.h"
+#include "db/version_edit.h"
+#include "file/file_util.h"
+#include "file/filename.h"
+#include "file/read_write_util.h"
+#include "file/writable_file_writer.h"
+#include "monitoring/iostats_context_imp.h"
+#include "monitoring/thread_status_util.h"
+#include "options/options_helper.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
+#include "table/block_based/block_based_table_builder.h"
+#include "table/format.h"
+#include "table/internal_iterator.h"
+#include "table/unique_id_impl.h"
+#include "test_util/sync_point.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class TableFactory;
+
+TableBuilder* NewTableBuilder(const TableBuilderOptions& tboptions,
+                              WritableFileWriter* file) {
+  assert((tboptions.column_family_id ==
+          TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) ==
+         tboptions.column_family_name.empty());
+  return tboptions.ioptions.table_factory->NewTableBuilder(tboptions, file);
+}
+
+Status BuildTable(
+    const std::string& dbname, VersionSet* versions,
+    const ImmutableDBOptions& db_options, const TableBuilderOptions& tboptions,
+    const FileOptions& file_options, const ReadOptions& read_options,
+    TableCache* table_cache, InternalIterator* iter,
+    std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
+        range_del_iters,
+    FileMetaData* meta, std::vector<BlobFileAddition>* blob_file_additions,
+    std::vector<SequenceNumber> snapshots,
+    SequenceNumber earliest_write_conflict_snapshot,
+    SequenceNumber job_snapshot, SnapshotChecker* snapshot_checker,
+    bool paranoid_file_checks, InternalStats* internal_stats,
+    IOStatus* io_status, const std::shared_ptr<IOTracer>& io_tracer,
+    BlobFileCreationReason blob_creation_reason,
+    const SeqnoToTimeMapping& seqno_to_time_mapping, EventLogger* event_logger,
+    int job_id, const Env::IOPriority io_priority,
+    TableProperties* table_properties, Env::WriteLifeTimeHint write_hint,
+    const std::string* full_history_ts_low,
+    BlobFileCompletionCallback* blob_callback, Version* version,
+    uint64_t* num_input_entries, uint64_t* memtable_payload_bytes,
+    uint64_t* memtable_garbage_bytes) {
+  assert((tboptions.column_family_id ==
+          TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) ==
+         tboptions.column_family_name.empty());
+  auto& mutable_cf_options = tboptions.moptions;
+  auto& ioptions = tboptions.ioptions;
+  // Reports the IOStats for flush for every following bytes.
+  const size_t kReportFlushIOStatsEvery = 1048576;
+  OutputValidator output_validator(
+      tboptions.internal_comparator,
+      /*enable_order_check=*/
+      mutable_cf_options.check_flush_compaction_key_order,
+      /*enable_hash=*/paranoid_file_checks);
+  Status s;
+  meta->fd.file_size = 0;
+  iter->SeekToFirst();
+  std::unique_ptr<CompactionRangeDelAggregator> range_del_agg(
+      new CompactionRangeDelAggregator(&tboptions.internal_comparator,
+                                       snapshots, full_history_ts_low));
+  uint64_t num_unfragmented_tombstones = 0;
+  uint64_t total_tombstone_payload_bytes = 0;
+  for (auto& range_del_iter : range_del_iters) {
+    num_unfragmented_tombstones +=
+        range_del_iter->num_unfragmented_tombstones();
+    total_tombstone_payload_bytes +=
+        range_del_iter->total_tombstone_payload_bytes();
+    range_del_agg->AddTombstones(std::move(range_del_iter));
+  }
+
+  std::string fname = TableFileName(ioptions.cf_paths, meta->fd.GetNumber(),
+                                    meta->fd.GetPathId());
+  std::vector<std::string> blob_file_paths;
+  std::string file_checksum = kUnknownFileChecksum;
+  std::string file_checksum_func_name = kUnknownFileChecksumFuncName;
+  EventHelpers::NotifyTableFileCreationStarted(ioptions.listeners, dbname,
+                                               tboptions.column_family_name,
+                                               fname, job_id, tboptions.reason);
+  Env* env = db_options.env;
+  assert(env);
+  FileSystem* fs = db_options.fs.get();
+  assert(fs);
+
+  TableProperties tp;
+  bool table_file_created = false;
+  if (iter->Valid() || !range_del_agg->IsEmpty()) {
+    std::unique_ptr<CompactionFilter> compaction_filter;
+    if (ioptions.compaction_filter_factory != nullptr &&
+        ioptions.compaction_filter_factory->ShouldFilterTableFileCreation(
+            tboptions.reason)) {
+      CompactionFilter::Context context;
+      context.is_full_compaction = false;
+      context.is_manual_compaction = false;
+      context.column_family_id = tboptions.column_family_id;
+      context.reason = tboptions.reason;
+      compaction_filter =
+          ioptions.compaction_filter_factory->CreateCompactionFilter(context);
+      if (compaction_filter != nullptr &&
+          !compaction_filter->IgnoreSnapshots()) {
+        s.PermitUncheckedError();
+        return Status::NotSupported(
+            "CompactionFilter::IgnoreSnapshots() = false is not supported "
+            "anymore.");
+      }
+    }
+
+    TableBuilder* builder;
+    std::unique_ptr<WritableFileWriter> file_writer;
+    {
+      std::unique_ptr<FSWritableFile> file;
+#ifndef NDEBUG
+      bool use_direct_writes = file_options.use_direct_writes;
+      TEST_SYNC_POINT_CALLBACK("BuildTable:create_file", &use_direct_writes);
+#endif  // !NDEBUG
+      IOStatus io_s = NewWritableFile(fs, fname, &file, file_options);
+      assert(s.ok());
+      s = io_s;
+      if (io_status->ok()) {
+        *io_status = io_s;
+      }
+      if (!s.ok()) {
+        EventHelpers::LogAndNotifyTableFileCreationFinished(
+            event_logger, ioptions.listeners, dbname,
+            tboptions.column_family_name, fname, job_id, meta->fd,
+            kInvalidBlobFileNumber, tp, tboptions.reason, s, file_checksum,
+            file_checksum_func_name);
+        return s;
+      }
+
+      table_file_created = true;
+      FileTypeSet tmp_set = ioptions.checksum_handoff_file_types;
+      file->SetIOPriority(io_priority);
+      file->SetWriteLifeTimeHint(write_hint);
+      file_writer.reset(new WritableFileWriter(
+          std::move(file), fname, file_options, ioptions.clock, io_tracer,
+          ioptions.stats, ioptions.listeners,
+          ioptions.file_checksum_gen_factory.get(),
+          tmp_set.Contains(FileType::kTableFile), false));
+
+      builder = NewTableBuilder(tboptions, file_writer.get());
+    }
+
+    auto ucmp = tboptions.internal_comparator.user_comparator();
+    MergeHelper merge(
+        env, ucmp, ioptions.merge_operator.get(), compaction_filter.get(),
+        ioptions.logger, true /* internal key corruption is not ok */,
+        snapshots.empty() ? 0 : snapshots.back(), snapshot_checker);
+
+    std::unique_ptr<BlobFileBuilder> blob_file_builder(
+        (mutable_cf_options.enable_blob_files &&
+         tboptions.level_at_creation >=
+             mutable_cf_options.blob_file_starting_level &&
+         blob_file_additions)
+            ? new BlobFileBuilder(
+                  versions, fs, &ioptions, &mutable_cf_options, &file_options,
+                  tboptions.db_id, tboptions.db_session_id, job_id,
+                  tboptions.column_family_id, tboptions.column_family_name,
+                  io_priority, write_hint, io_tracer, blob_callback,
+                  blob_creation_reason, &blob_file_paths, blob_file_additions)
+            : nullptr);
+
+    const std::atomic<bool> kManualCompactionCanceledFalse{false};
+    CompactionIterator c_iter(
+        iter, ucmp, &merge, kMaxSequenceNumber, &snapshots,
+        earliest_write_conflict_snapshot, job_snapshot, snapshot_checker, env,
+        ShouldReportDetailedTime(env, ioptions.stats),
+        true /* internal key corruption is not ok */, range_del_agg.get(),
+        blob_file_builder.get(), ioptions.allow_data_in_errors,
+        ioptions.enforce_single_del_contracts,
+        /*manual_compaction_canceled=*/kManualCompactionCanceledFalse,
+        /*compaction=*/nullptr, compaction_filter.get(),
+        /*shutting_down=*/nullptr, db_options.info_log, full_history_ts_low);
+
+    c_iter.SeekToFirst();
+    for (; c_iter.Valid(); c_iter.Next()) {
+      const Slice& key = c_iter.key();
+      const Slice& value = c_iter.value();
+      const ParsedInternalKey& ikey = c_iter.ikey();
+      // Generate a rolling 64-bit hash of the key and values
+      // Note :
+      // Here "key" integrates 'sequence_number'+'kType'+'user key'.
+      s = output_validator.Add(key, value);
+      if (!s.ok()) {
+        break;
+      }
+      builder->Add(key, value);
+
+      s = meta->UpdateBoundaries(key, value, ikey.sequence, ikey.type);
+      if (!s.ok()) {
+        break;
+      }
+
+      // TODO(noetzli): Update stats after flush, too.
+      if (io_priority == Env::IO_HIGH &&
+          IOSTATS(bytes_written) >= kReportFlushIOStatsEvery) {
+        ThreadStatusUtil::SetThreadOperationProperty(
+            ThreadStatus::FLUSH_BYTES_WRITTEN, IOSTATS(bytes_written));
+      }
+    }
+    if (!s.ok()) {
+      c_iter.status().PermitUncheckedError();
+    } else if (!c_iter.status().ok()) {
+      s = c_iter.status();
+    }
+
+    if (s.ok()) {
+      auto range_del_it = range_del_agg->NewIterator();
+      Slice last_tombstone_start_user_key{};
+      for (range_del_it->SeekToFirst(); range_del_it->Valid();
+           range_del_it->Next()) {
+        auto tombstone = range_del_it->Tombstone();
+        auto kv = tombstone.Serialize();
+        builder->Add(kv.first.Encode(), kv.second);
+        InternalKey tombstone_end = tombstone.SerializeEndKey();
+        meta->UpdateBoundariesForRange(kv.first, tombstone_end, tombstone.seq_,
+                                       tboptions.internal_comparator);
+        if (version) {
+          if (last_tombstone_start_user_key.empty() ||
+              ucmp->CompareWithoutTimestamp(last_tombstone_start_user_key,
+                                            range_del_it->start_key()) < 0) {
+            SizeApproximationOptions approx_opts;
+            approx_opts.files_size_error_margin = 0.1;
+            meta->compensated_range_deletion_size += versions->ApproximateSize(
+                approx_opts, read_options, version, kv.first.Encode(),
+                tombstone_end.Encode(), 0 /* start_level */, -1 /* end_level */,
+                TableReaderCaller::kFlush);
+          }
+          last_tombstone_start_user_key = range_del_it->start_key();
+        }
+      }
+    }
+
+    TEST_SYNC_POINT("BuildTable:BeforeFinishBuildTable");
+    const bool empty = builder->IsEmpty();
+    if (num_input_entries != nullptr) {
+      *num_input_entries =
+          c_iter.num_input_entry_scanned() + num_unfragmented_tombstones;
+    }
+    if (!s.ok() || empty) {
+      builder->Abandon();
+    } else {
+      std::string seqno_time_mapping_str;
+      seqno_to_time_mapping.Encode(
+          seqno_time_mapping_str, meta->fd.smallest_seqno,
+          meta->fd.largest_seqno, meta->file_creation_time);
+      builder->SetSeqnoTimeTableProperties(
+          seqno_time_mapping_str,
+          ioptions.compaction_style == CompactionStyle::kCompactionStyleFIFO
+              ? meta->file_creation_time
+              : meta->oldest_ancester_time);
+      s = builder->Finish();
+    }
+    if (io_status->ok()) {
+      *io_status = builder->io_status();
+    }
+
+    if (s.ok() && !empty) {
+      uint64_t file_size = builder->FileSize();
+      meta->fd.file_size = file_size;
+      meta->tail_size = builder->GetTailSize();
+      meta->marked_for_compaction = builder->NeedCompact();
+      assert(meta->fd.GetFileSize() > 0);
+      tp = builder
+               ->GetTableProperties();  // refresh now that builder is finished
+      if (memtable_payload_bytes != nullptr &&
+          memtable_garbage_bytes != nullptr) {
+        const CompactionIterationStats& ci_stats = c_iter.iter_stats();
+        uint64_t total_payload_bytes = ci_stats.total_input_raw_key_bytes +
+                                       ci_stats.total_input_raw_value_bytes +
+                                       total_tombstone_payload_bytes;
+        uint64_t total_payload_bytes_written =
+            (tp.raw_key_size + tp.raw_value_size);
+        // Prevent underflow, which may still happen at this point
+        // since we only support inserts, deletes, and deleteRanges.
+        if (total_payload_bytes_written <= total_payload_bytes) {
+          *memtable_payload_bytes = total_payload_bytes;
+          *memtable_garbage_bytes =
+              total_payload_bytes - total_payload_bytes_written;
+        } else {
+          *memtable_payload_bytes = 0;
+          *memtable_garbage_bytes = 0;
+        }
+      }
+      if (table_properties) {
+        *table_properties = tp;
+      }
+    }
+    delete builder;
+
+    // Finish and check for file errors
+    TEST_SYNC_POINT("BuildTable:BeforeSyncTable");
+    if (s.ok() && !empty) {
+      StopWatch sw(ioptions.clock, ioptions.stats, TABLE_SYNC_MICROS);
+      *io_status = file_writer->Sync(ioptions.use_fsync);
+    }
+    TEST_SYNC_POINT("BuildTable:BeforeCloseTableFile");
+    if (s.ok() && io_status->ok() && !empty) {
+      *io_status = file_writer->Close();
+    }
+    if (s.ok() && io_status->ok() && !empty) {
+      // Add the checksum information to file metadata.
+      meta->file_checksum = file_writer->GetFileChecksum();
+      meta->file_checksum_func_name = file_writer->GetFileChecksumFuncName();
+      file_checksum = meta->file_checksum;
+      file_checksum_func_name = meta->file_checksum_func_name;
+      // Set unique_id only if db_id and db_session_id exist
+      if (!tboptions.db_id.empty() && !tboptions.db_session_id.empty()) {
+        if (!GetSstInternalUniqueId(tboptions.db_id, tboptions.db_session_id,
+                                    meta->fd.GetNumber(), &(meta->unique_id))
+                 .ok()) {
+          // if failed to get unique id, just set it Null
+          meta->unique_id = kNullUniqueId64x2;
+        }
+      }
+    }
+
+    if (s.ok()) {
+      s = *io_status;
+    }
+
+    if (blob_file_builder) {
+      if (s.ok()) {
+        s = blob_file_builder->Finish();
+      } else {
+        blob_file_builder->Abandon(s);
+      }
+      blob_file_builder.reset();
+    }
+
+    // TODO Also check the IO status when create the Iterator.
+
+    TEST_SYNC_POINT("BuildTable:BeforeOutputValidation");
+    if (s.ok() && !empty) {
+      // Verify that the table is usable
+      // We set for_compaction to false and don't OptimizeForCompactionTableRead
+      // here because this is a special case after we finish the table building.
+      // No matter whether use_direct_io_for_flush_and_compaction is true,
+      // the goal is to cache it here for further user reads.
+      std::unique_ptr<InternalIterator> it(table_cache->NewIterator(
+          read_options, file_options, tboptions.internal_comparator, *meta,
+          nullptr /* range_del_agg */, mutable_cf_options.prefix_extractor,
+          nullptr,
+          (internal_stats == nullptr) ? nullptr
+                                      : internal_stats->GetFileReadHist(0),
+          TableReaderCaller::kFlush, /*arena=*/nullptr,
+          /*skip_filter=*/false, tboptions.level_at_creation,
+          MaxFileSizeForL0MetaPin(mutable_cf_options),
+          /*smallest_compaction_key=*/nullptr,
+          /*largest_compaction_key*/ nullptr,
+          /*allow_unprepared_value*/ false,
+          mutable_cf_options.block_protection_bytes_per_key));
+      s = it->status();
+      if (s.ok() && paranoid_file_checks) {
+        OutputValidator file_validator(tboptions.internal_comparator,
+                                       /*enable_order_check=*/true,
+                                       /*enable_hash=*/true);
+        for (it->SeekToFirst(); it->Valid(); it->Next()) {
+          // Generate a rolling 64-bit hash of the key and values
+          file_validator.Add(it->key(), it->value()).PermitUncheckedError();
+        }
+        s = it->status();
+        if (s.ok() && !output_validator.CompareValidator(file_validator)) {
+          s = Status::Corruption("Paranoid checksums do not match");
+        }
+      }
+    }
+  }
+
+  // Check for input iterator errors
+  if (!iter->status().ok()) {
+    s = iter->status();
+  }
+
+  if (!s.ok() || meta->fd.GetFileSize() == 0) {
+    TEST_SYNC_POINT("BuildTable:BeforeDeleteFile");
+
+    constexpr IODebugContext* dbg = nullptr;
+
+    if (table_file_created) {
+      Status ignored = fs->DeleteFile(fname, IOOptions(), dbg);
+      ignored.PermitUncheckedError();
+    }
+
+    assert(blob_file_additions || blob_file_paths.empty());
+
+    if (blob_file_additions) {
+      for (const std::string& blob_file_path : blob_file_paths) {
+        Status ignored = DeleteDBFile(&db_options, blob_file_path, dbname,
+                                      /*force_bg=*/false, /*force_fg=*/false);
+        ignored.PermitUncheckedError();
+        TEST_SYNC_POINT("BuildTable::AfterDeleteFile");
+      }
+    }
+  }
+
+  Status status_for_listener = s;
+  if (meta->fd.GetFileSize() == 0) {
+    fname = "(nil)";
+    if (s.ok()) {
+      status_for_listener = Status::Aborted("Empty SST file not kept");
+    }
+  }
+  // Output to event logger and fire events.
+  EventHelpers::LogAndNotifyTableFileCreationFinished(
+      event_logger, ioptions.listeners, dbname, tboptions.column_family_name,
+      fname, job_id, meta->fd, meta->oldest_blob_file_number, tp,
+      tboptions.reason, status_for_listener, file_checksum,
+      file_checksum_func_name);
+
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/builder.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/builder.h
new file mode 100644
index 0000000000..6a6a1866a1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/builder.h
@@ -0,0 +1,78 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "db/range_tombstone_fragmenter.h"
+#include "db/seqno_to_time_mapping.h"
+#include "db/table_properties_collector.h"
+#include "db/version_set.h"
+#include "logging/event_logger.h"
+#include "options/cf_options.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/env.h"
+#include "rocksdb/listener.h"
+#include "rocksdb/options.h"
+#include "rocksdb/status.h"
+#include "rocksdb/table_properties.h"
+#include "rocksdb/types.h"
+#include "table/scoped_arena_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct FileMetaData;
+
+class VersionSet;
+class BlobFileAddition;
+class SnapshotChecker;
+class TableCache;
+class TableBuilder;
+class WritableFileWriter;
+class InternalStats;
+class BlobFileCompletionCallback;
+
+// Convenience function for NewTableBuilder on the embedded table_factory.
+TableBuilder* NewTableBuilder(const TableBuilderOptions& tboptions,
+                              WritableFileWriter* file);
+
+// Build a Table file from the contents of *iter.  The generated file
+// will be named according to number specified in meta. On success, the rest of
+// *meta will be filled with metadata about the generated table.
+// If no data is present in *iter, meta->file_size will be set to
+// zero, and no Table file will be produced.
+//
+// @param column_family_name Name of the column family that is also identified
+//    by column_family_id, or empty string if unknown.
+extern Status BuildTable(
+    const std::string& dbname, VersionSet* versions,
+    const ImmutableDBOptions& db_options, const TableBuilderOptions& tboptions,
+    const FileOptions& file_options, const ReadOptions& read_options,
+    TableCache* table_cache, InternalIterator* iter,
+    std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
+        range_del_iters,
+    FileMetaData* meta, std::vector<BlobFileAddition>* blob_file_additions,
+    std::vector<SequenceNumber> snapshots,
+    SequenceNumber earliest_write_conflict_snapshot,
+    SequenceNumber job_snapshot, SnapshotChecker* snapshot_checker,
+    bool paranoid_file_checks, InternalStats* internal_stats,
+    IOStatus* io_status, const std::shared_ptr<IOTracer>& io_tracer,
+    BlobFileCreationReason blob_creation_reason,
+    const SeqnoToTimeMapping& seqno_to_time_mapping,
+    EventLogger* event_logger = nullptr, int job_id = 0,
+    const Env::IOPriority io_priority = Env::IO_HIGH,
+    TableProperties* table_properties = nullptr,
+    Env::WriteLifeTimeHint write_hint = Env::WLTH_NOT_SET,
+    const std::string* full_history_ts_low = nullptr,
+    BlobFileCompletionCallback* blob_callback = nullptr,
+    Version* version = nullptr, uint64_t* num_input_entries = nullptr,
+    uint64_t* memtable_payload_bytes = nullptr,
+    uint64_t* memtable_garbage_bytes = nullptr);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/c.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/c.cc
new file mode 100644
index 0000000000..dde8f69fbe
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/c.cc
@@ -0,0 +1,6559 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/c.h"
+
+#include <cstdlib>
+#include <map>
+#include <unordered_set>
+#include <vector>
+
+#include "port/port.h"
+#include "rocksdb/advanced_cache.h"
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/experimental.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/perf_context.h"
+#include "rocksdb/rate_limiter.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/status.h"
+#include "rocksdb/table.h"
+#include "rocksdb/universal_compaction.h"
+#include "rocksdb/utilities/backup_engine.h"
+#include "rocksdb/utilities/checkpoint.h"
+#include "rocksdb/utilities/db_ttl.h"
+#include "rocksdb/utilities/memory_util.h"
+#include "rocksdb/utilities/optimistic_transaction_db.h"
+#include "rocksdb/utilities/options_util.h"
+#include "rocksdb/utilities/table_properties_collectors.h"
+#include "rocksdb/utilities/transaction.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "rocksdb/utilities/write_batch_with_index.h"
+#include "rocksdb/write_batch.h"
+#include "utilities/merge_operators.h"
+
+using ROCKSDB_NAMESPACE::BackupEngine;
+using ROCKSDB_NAMESPACE::BackupEngineOptions;
+using ROCKSDB_NAMESPACE::BackupID;
+using ROCKSDB_NAMESPACE::BackupInfo;
+using ROCKSDB_NAMESPACE::BatchResult;
+using ROCKSDB_NAMESPACE::BlockBasedTableOptions;
+using ROCKSDB_NAMESPACE::BottommostLevelCompaction;
+using ROCKSDB_NAMESPACE::BytewiseComparator;
+using ROCKSDB_NAMESPACE::Cache;
+using ROCKSDB_NAMESPACE::Checkpoint;
+using ROCKSDB_NAMESPACE::ColumnFamilyDescriptor;
+using ROCKSDB_NAMESPACE::ColumnFamilyHandle;
+using ROCKSDB_NAMESPACE::ColumnFamilyMetaData;
+using ROCKSDB_NAMESPACE::ColumnFamilyOptions;
+using ROCKSDB_NAMESPACE::CompactionFilter;
+using ROCKSDB_NAMESPACE::CompactionFilterFactory;
+using ROCKSDB_NAMESPACE::CompactionOptionsFIFO;
+using ROCKSDB_NAMESPACE::CompactRangeOptions;
+using ROCKSDB_NAMESPACE::Comparator;
+using ROCKSDB_NAMESPACE::CompressionType;
+using ROCKSDB_NAMESPACE::ConfigOptions;
+using ROCKSDB_NAMESPACE::CuckooTableOptions;
+using ROCKSDB_NAMESPACE::DB;
+using ROCKSDB_NAMESPACE::DBOptions;
+using ROCKSDB_NAMESPACE::DbPath;
+using ROCKSDB_NAMESPACE::Env;
+using ROCKSDB_NAMESPACE::EnvOptions;
+using ROCKSDB_NAMESPACE::FileLock;
+using ROCKSDB_NAMESPACE::FilterPolicy;
+using ROCKSDB_NAMESPACE::FlushOptions;
+using ROCKSDB_NAMESPACE::HyperClockCacheOptions;
+using ROCKSDB_NAMESPACE::InfoLogLevel;
+using ROCKSDB_NAMESPACE::IngestExternalFileOptions;
+using ROCKSDB_NAMESPACE::Iterator;
+using ROCKSDB_NAMESPACE::LevelMetaData;
+using ROCKSDB_NAMESPACE::LiveFileMetaData;
+using ROCKSDB_NAMESPACE::Logger;
+using ROCKSDB_NAMESPACE::LRUCacheOptions;
+using ROCKSDB_NAMESPACE::MemoryAllocator;
+using ROCKSDB_NAMESPACE::MemoryUtil;
+using ROCKSDB_NAMESPACE::MergeOperator;
+using ROCKSDB_NAMESPACE::NewBloomFilterPolicy;
+using ROCKSDB_NAMESPACE::NewCompactOnDeletionCollectorFactory;
+using ROCKSDB_NAMESPACE::NewGenericRateLimiter;
+using ROCKSDB_NAMESPACE::NewLRUCache;
+using ROCKSDB_NAMESPACE::NewRibbonFilterPolicy;
+using ROCKSDB_NAMESPACE::OptimisticTransactionDB;
+using ROCKSDB_NAMESPACE::OptimisticTransactionOptions;
+using ROCKSDB_NAMESPACE::Options;
+using ROCKSDB_NAMESPACE::PerfContext;
+using ROCKSDB_NAMESPACE::PerfLevel;
+using ROCKSDB_NAMESPACE::PinnableSlice;
+using ROCKSDB_NAMESPACE::PrepopulateBlobCache;
+using ROCKSDB_NAMESPACE::RandomAccessFile;
+using ROCKSDB_NAMESPACE::Range;
+using ROCKSDB_NAMESPACE::RateLimiter;
+using ROCKSDB_NAMESPACE::ReadOptions;
+using ROCKSDB_NAMESPACE::RestoreOptions;
+using ROCKSDB_NAMESPACE::SequentialFile;
+using ROCKSDB_NAMESPACE::Slice;
+using ROCKSDB_NAMESPACE::SliceParts;
+using ROCKSDB_NAMESPACE::SliceTransform;
+using ROCKSDB_NAMESPACE::Snapshot;
+using ROCKSDB_NAMESPACE::SstFileMetaData;
+using ROCKSDB_NAMESPACE::SstFileWriter;
+using ROCKSDB_NAMESPACE::Status;
+using ROCKSDB_NAMESPACE::TablePropertiesCollectorFactory;
+using ROCKSDB_NAMESPACE::Transaction;
+using ROCKSDB_NAMESPACE::TransactionDB;
+using ROCKSDB_NAMESPACE::TransactionDBOptions;
+using ROCKSDB_NAMESPACE::TransactionLogIterator;
+using ROCKSDB_NAMESPACE::TransactionOptions;
+using ROCKSDB_NAMESPACE::WALRecoveryMode;
+using ROCKSDB_NAMESPACE::WritableFile;
+using ROCKSDB_NAMESPACE::WriteBatch;
+using ROCKSDB_NAMESPACE::WriteBatchWithIndex;
+using ROCKSDB_NAMESPACE::WriteOptions;
+
+using std::unordered_set;
+using std::vector;
+
+extern "C" {
+
+struct rocksdb_t {
+  DB* rep;
+};
+struct rocksdb_backup_engine_t {
+  BackupEngine* rep;
+};
+struct rocksdb_backup_engine_info_t {
+  std::vector<BackupInfo> rep;
+};
+struct rocksdb_restore_options_t {
+  RestoreOptions rep;
+};
+struct rocksdb_iterator_t {
+  Iterator* rep;
+};
+struct rocksdb_writebatch_t {
+  WriteBatch rep;
+};
+struct rocksdb_writebatch_wi_t {
+  WriteBatchWithIndex* rep;
+};
+struct rocksdb_snapshot_t {
+  const Snapshot* rep;
+};
+struct rocksdb_flushoptions_t {
+  FlushOptions rep;
+};
+struct rocksdb_fifo_compaction_options_t {
+  CompactionOptionsFIFO rep;
+};
+struct rocksdb_readoptions_t {
+  ReadOptions rep;
+  // stack variables to set pointers to in ReadOptions
+  Slice upper_bound;
+  Slice lower_bound;
+  Slice timestamp;
+  Slice iter_start_ts;
+};
+struct rocksdb_writeoptions_t {
+  WriteOptions rep;
+};
+struct rocksdb_options_t {
+  Options rep;
+};
+struct rocksdb_compactoptions_t {
+  CompactRangeOptions rep;
+  Slice full_history_ts_low;
+};
+struct rocksdb_block_based_table_options_t {
+  BlockBasedTableOptions rep;
+};
+struct rocksdb_cuckoo_table_options_t {
+  CuckooTableOptions rep;
+};
+struct rocksdb_seqfile_t {
+  SequentialFile* rep;
+};
+struct rocksdb_randomfile_t {
+  RandomAccessFile* rep;
+};
+struct rocksdb_writablefile_t {
+  WritableFile* rep;
+};
+struct rocksdb_wal_iterator_t {
+  TransactionLogIterator* rep;
+};
+struct rocksdb_wal_readoptions_t {
+  TransactionLogIterator::ReadOptions rep;
+};
+struct rocksdb_filelock_t {
+  FileLock* rep;
+};
+struct rocksdb_logger_t {
+  std::shared_ptr<Logger> rep;
+};
+struct rocksdb_lru_cache_options_t {
+  LRUCacheOptions rep;
+};
+struct rocksdb_hyper_clock_cache_options_t {
+  HyperClockCacheOptions rep;
+};
+struct rocksdb_memory_allocator_t {
+  std::shared_ptr<MemoryAllocator> rep;
+};
+struct rocksdb_cache_t {
+  std::shared_ptr<Cache> rep;
+};
+struct rocksdb_livefiles_t {
+  std::vector<LiveFileMetaData> rep;
+};
+struct rocksdb_column_family_handle_t {
+  ColumnFamilyHandle* rep;
+};
+struct rocksdb_column_family_metadata_t {
+  ColumnFamilyMetaData rep;
+};
+struct rocksdb_level_metadata_t {
+  const LevelMetaData* rep;
+};
+struct rocksdb_sst_file_metadata_t {
+  const SstFileMetaData* rep;
+};
+struct rocksdb_envoptions_t {
+  EnvOptions rep;
+};
+struct rocksdb_ingestexternalfileoptions_t {
+  IngestExternalFileOptions rep;
+};
+struct rocksdb_sstfilewriter_t {
+  SstFileWriter* rep;
+};
+struct rocksdb_ratelimiter_t {
+  std::shared_ptr<RateLimiter> rep;
+};
+struct rocksdb_perfcontext_t {
+  PerfContext* rep;
+};
+struct rocksdb_pinnableslice_t {
+  PinnableSlice rep;
+};
+struct rocksdb_transactiondb_options_t {
+  TransactionDBOptions rep;
+};
+struct rocksdb_transactiondb_t {
+  TransactionDB* rep;
+};
+struct rocksdb_transaction_options_t {
+  TransactionOptions rep;
+};
+struct rocksdb_transaction_t {
+  Transaction* rep;
+};
+struct rocksdb_backup_engine_options_t {
+  BackupEngineOptions rep;
+};
+struct rocksdb_checkpoint_t {
+  Checkpoint* rep;
+};
+struct rocksdb_optimistictransactiondb_t {
+  OptimisticTransactionDB* rep;
+};
+struct rocksdb_optimistictransaction_options_t {
+  OptimisticTransactionOptions rep;
+};
+
+struct rocksdb_compactionfiltercontext_t {
+  CompactionFilter::Context rep;
+};
+
+struct rocksdb_compactionfilter_t : public CompactionFilter {
+  void* state_;
+  void (*destructor_)(void*);
+  unsigned char (*filter_)(void*, int level, const char* key, size_t key_length,
+                           const char* existing_value, size_t value_length,
+                           char** new_value, size_t* new_value_length,
+                           unsigned char* value_changed);
+  const char* (*name_)(void*);
+  unsigned char ignore_snapshots_;
+
+  ~rocksdb_compactionfilter_t() override { (*destructor_)(state_); }
+
+  bool Filter(int level, const Slice& key, const Slice& existing_value,
+              std::string* new_value, bool* value_changed) const override {
+    char* c_new_value = nullptr;
+    size_t new_value_length = 0;
+    unsigned char c_value_changed = 0;
+    unsigned char result =
+        (*filter_)(state_, level, key.data(), key.size(), existing_value.data(),
+                   existing_value.size(), &c_new_value, &new_value_length,
+                   &c_value_changed);
+    if (c_value_changed) {
+      new_value->assign(c_new_value, new_value_length);
+      *value_changed = true;
+    }
+    return result;
+  }
+
+  const char* Name() const override { return (*name_)(state_); }
+
+  bool IgnoreSnapshots() const override { return ignore_snapshots_; }
+};
+
+struct rocksdb_compactionfilterfactory_t : public CompactionFilterFactory {
+  void* state_;
+  void (*destructor_)(void*);
+  rocksdb_compactionfilter_t* (*create_compaction_filter_)(
+      void*, rocksdb_compactionfiltercontext_t* context);
+  const char* (*name_)(void*);
+
+  ~rocksdb_compactionfilterfactory_t() override { (*destructor_)(state_); }
+
+  std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+      const CompactionFilter::Context& context) override {
+    rocksdb_compactionfiltercontext_t ccontext;
+    ccontext.rep = context;
+    CompactionFilter* cf = (*create_compaction_filter_)(state_, &ccontext);
+    return std::unique_ptr<CompactionFilter>(cf);
+  }
+
+  const char* Name() const override { return (*name_)(state_); }
+};
+
+struct rocksdb_comparator_t : public Comparator {
+  void* state_;
+  void (*destructor_)(void*);
+  int (*compare_)(void*, const char* a, size_t alen, const char* b,
+                  size_t blen);
+  const char* (*name_)(void*);
+  int (*compare_ts_)(void*, const char* a_ts, size_t a_tslen, const char* b_ts,
+                     size_t b_tslen);
+  int (*compare_without_ts_)(void*, const char* a, size_t alen,
+                             unsigned char a_has_ts, const char* b, size_t blen,
+                             unsigned char b_has_ts);
+
+  rocksdb_comparator_t() : Comparator() {}
+
+  rocksdb_comparator_t(size_t ts_size) : Comparator(ts_size) {}
+
+  ~rocksdb_comparator_t() override { (*destructor_)(state_); }
+
+  int Compare(const Slice& a, const Slice& b) const override {
+    return (*compare_)(state_, a.data(), a.size(), b.data(), b.size());
+  }
+
+  int CompareTimestamp(const Slice& a_ts, const Slice& b_ts) const override {
+    if (compare_ts_ == nullptr) {
+      return 0;
+    }
+    return (*compare_ts_)(state_, a_ts.data(), a_ts.size(), b_ts.data(),
+                          b_ts.size());
+  }
+
+  int CompareWithoutTimestamp(const Slice& a, bool a_has_ts, const Slice& b,
+                              bool b_has_ts) const override {
+    if (compare_without_ts_ == nullptr) {
+      return Compare(a, b);
+    }
+    return (*compare_without_ts_)(state_, a.data(), a.size(), a_has_ts,
+                                  b.data(), b.size(), b_has_ts);
+  }
+
+  const char* Name() const override { return (*name_)(state_); }
+
+  // No-ops since the C binding does not support key shortening methods.
+  void FindShortestSeparator(std::string*, const Slice&) const override {}
+  void FindShortSuccessor(std::string* /*key*/) const override {}
+};
+
+struct rocksdb_filterpolicy_t : public FilterPolicy {
+  void* state_;
+  void (*destructor_)(void*);
+  const char* (*name_)(void*);
+
+  ~rocksdb_filterpolicy_t() override { (*destructor_)(state_); }
+
+  const char* Name() const override { return (*name_)(state_); }
+};
+
+struct rocksdb_mergeoperator_t : public MergeOperator {
+  void* state_;
+  void (*destructor_)(void*);
+  const char* (*name_)(void*);
+  char* (*full_merge_)(void*, const char* key, size_t key_length,
+                       const char* existing_value, size_t existing_value_length,
+                       const char* const* operands_list,
+                       const size_t* operands_list_length, int num_operands,
+                       unsigned char* success, size_t* new_value_length);
+  char* (*partial_merge_)(void*, const char* key, size_t key_length,
+                          const char* const* operands_list,
+                          const size_t* operands_list_length, int num_operands,
+                          unsigned char* success, size_t* new_value_length);
+  void (*delete_value_)(void*, const char* value, size_t value_length);
+
+  ~rocksdb_mergeoperator_t() override { (*destructor_)(state_); }
+
+  const char* Name() const override { return (*name_)(state_); }
+
+  bool FullMergeV2(const MergeOperationInput& merge_in,
+                   MergeOperationOutput* merge_out) const override {
+    size_t n = merge_in.operand_list.size();
+    std::vector<const char*> operand_pointers(n);
+    std::vector<size_t> operand_sizes(n);
+    for (size_t i = 0; i < n; i++) {
+      Slice operand(merge_in.operand_list[i]);
+      operand_pointers[i] = operand.data();
+      operand_sizes[i] = operand.size();
+    }
+
+    const char* existing_value_data = nullptr;
+    size_t existing_value_len = 0;
+    if (merge_in.existing_value != nullptr) {
+      existing_value_data = merge_in.existing_value->data();
+      existing_value_len = merge_in.existing_value->size();
+    }
+
+    unsigned char success;
+    size_t new_value_len;
+    char* tmp_new_value = (*full_merge_)(
+        state_, merge_in.key.data(), merge_in.key.size(), existing_value_data,
+        existing_value_len, &operand_pointers[0], &operand_sizes[0],
+        static_cast<int>(n), &success, &new_value_len);
+    merge_out->new_value.assign(tmp_new_value, new_value_len);
+
+    if (delete_value_ != nullptr) {
+      (*delete_value_)(state_, tmp_new_value, new_value_len);
+    } else {
+      free(tmp_new_value);
+    }
+
+    return success;
+  }
+
+  bool PartialMergeMulti(const Slice& key,
+                         const std::deque<Slice>& operand_list,
+                         std::string* new_value,
+                         Logger* /*logger*/) const override {
+    size_t operand_count = operand_list.size();
+    std::vector<const char*> operand_pointers(operand_count);
+    std::vector<size_t> operand_sizes(operand_count);
+    for (size_t i = 0; i < operand_count; ++i) {
+      Slice operand(operand_list[i]);
+      operand_pointers[i] = operand.data();
+      operand_sizes[i] = operand.size();
+    }
+
+    unsigned char success;
+    size_t new_value_len;
+    char* tmp_new_value = (*partial_merge_)(
+        state_, key.data(), key.size(), &operand_pointers[0], &operand_sizes[0],
+        static_cast<int>(operand_count), &success, &new_value_len);
+    new_value->assign(tmp_new_value, new_value_len);
+
+    if (delete_value_ != nullptr) {
+      (*delete_value_)(state_, tmp_new_value, new_value_len);
+    } else {
+      free(tmp_new_value);
+    }
+
+    return success;
+  }
+};
+
+struct rocksdb_dbpath_t {
+  DbPath rep;
+};
+
+struct rocksdb_env_t {
+  Env* rep;
+  bool is_default;
+};
+
+struct rocksdb_slicetransform_t : public SliceTransform {
+  void* state_;
+  void (*destructor_)(void*);
+  const char* (*name_)(void*);
+  char* (*transform_)(void*, const char* key, size_t length,
+                      size_t* dst_length);
+  unsigned char (*in_domain_)(void*, const char* key, size_t length);
+  unsigned char (*in_range_)(void*, const char* key, size_t length);
+
+  ~rocksdb_slicetransform_t() override { (*destructor_)(state_); }
+
+  const char* Name() const override { return (*name_)(state_); }
+
+  Slice Transform(const Slice& src) const override {
+    size_t len;
+    char* dst = (*transform_)(state_, src.data(), src.size(), &len);
+    return Slice(dst, len);
+  }
+
+  bool InDomain(const Slice& src) const override {
+    return (*in_domain_)(state_, src.data(), src.size());
+  }
+
+  bool InRange(const Slice& src) const override {
+    return (*in_range_)(state_, src.data(), src.size());
+  }
+};
+
+struct rocksdb_universal_compaction_options_t {
+  ROCKSDB_NAMESPACE::CompactionOptionsUniversal* rep;
+};
+
+static bool SaveError(char** errptr, const Status& s) {
+  assert(errptr != nullptr);
+  if (s.ok()) {
+    return false;
+  } else if (*errptr == nullptr) {
+    *errptr = strdup(s.ToString().c_str());
+  } else {
+    // TODO(sanjay): Merge with existing error?
+    // This is a bug if *errptr is not created by malloc()
+    free(*errptr);
+    *errptr = strdup(s.ToString().c_str());
+  }
+  return true;
+}
+
+static char* CopyString(const std::string& str) {
+  char* result = reinterpret_cast<char*>(malloc(sizeof(char) * str.size()));
+  memcpy(result, str.data(), sizeof(char) * str.size());
+  return result;
+}
+
+rocksdb_t* rocksdb_open(const rocksdb_options_t* options, const char* name,
+                        char** errptr) {
+  DB* db;
+  if (SaveError(errptr, DB::Open(options->rep, std::string(name), &db))) {
+    return nullptr;
+  }
+  rocksdb_t* result = new rocksdb_t;
+  result->rep = db;
+  return result;
+}
+
+rocksdb_t* rocksdb_open_with_ttl(const rocksdb_options_t* options,
+                                 const char* name, int ttl, char** errptr) {
+  ROCKSDB_NAMESPACE::DBWithTTL* db;
+  if (SaveError(errptr, ROCKSDB_NAMESPACE::DBWithTTL::Open(
+                            options->rep, std::string(name), &db, ttl))) {
+    return nullptr;
+  }
+  rocksdb_t* result = new rocksdb_t;
+  result->rep = db;
+  return result;
+}
+
+rocksdb_t* rocksdb_open_for_read_only(const rocksdb_options_t* options,
+                                      const char* name,
+                                      unsigned char error_if_wal_file_exists,
+                                      char** errptr) {
+  DB* db;
+  if (SaveError(errptr, DB::OpenForReadOnly(options->rep, std::string(name),
+                                            &db, error_if_wal_file_exists))) {
+    return nullptr;
+  }
+  rocksdb_t* result = new rocksdb_t;
+  result->rep = db;
+  return result;
+}
+
+rocksdb_t* rocksdb_open_as_secondary(const rocksdb_options_t* options,
+                                     const char* name,
+                                     const char* secondary_path,
+                                     char** errptr) {
+  DB* db;
+  if (SaveError(errptr,
+                DB::OpenAsSecondary(options->rep, std::string(name),
+                                    std::string(secondary_path), &db))) {
+    return nullptr;
+  }
+  rocksdb_t* result = new rocksdb_t;
+  result->rep = db;
+  return result;
+}
+
+rocksdb_backup_engine_t* rocksdb_backup_engine_open(
+    const rocksdb_options_t* options, const char* path, char** errptr) {
+  BackupEngine* be;
+  if (SaveError(errptr, BackupEngine::Open(
+                            options->rep.env,
+                            BackupEngineOptions(path, nullptr, true,
+                                                options->rep.info_log.get()),
+                            &be))) {
+    return nullptr;
+  }
+  rocksdb_backup_engine_t* result = new rocksdb_backup_engine_t;
+  result->rep = be;
+  return result;
+}
+
+rocksdb_backup_engine_t* rocksdb_backup_engine_open_opts(
+    const rocksdb_backup_engine_options_t* options, rocksdb_env_t* env,
+    char** errptr) {
+  BackupEngine* be;
+  if (SaveError(errptr, BackupEngine::Open(options->rep, env->rep, &be))) {
+    return nullptr;
+  }
+  rocksdb_backup_engine_t* result = new rocksdb_backup_engine_t;
+  result->rep = be;
+  return result;
+}
+
+void rocksdb_backup_engine_create_new_backup(rocksdb_backup_engine_t* be,
+                                             rocksdb_t* db, char** errptr) {
+  SaveError(errptr, be->rep->CreateNewBackup(db->rep));
+}
+
+void rocksdb_backup_engine_create_new_backup_flush(
+    rocksdb_backup_engine_t* be, rocksdb_t* db,
+    unsigned char flush_before_backup, char** errptr) {
+  SaveError(errptr, be->rep->CreateNewBackup(db->rep, flush_before_backup));
+}
+
+void rocksdb_backup_engine_purge_old_backups(rocksdb_backup_engine_t* be,
+                                             uint32_t num_backups_to_keep,
+                                             char** errptr) {
+  SaveError(errptr, be->rep->PurgeOldBackups(num_backups_to_keep));
+}
+
+rocksdb_restore_options_t* rocksdb_restore_options_create() {
+  return new rocksdb_restore_options_t;
+}
+
+void rocksdb_restore_options_destroy(rocksdb_restore_options_t* opt) {
+  delete opt;
+}
+
+void rocksdb_restore_options_set_keep_log_files(rocksdb_restore_options_t* opt,
+                                                int v) {
+  opt->rep.keep_log_files = v;
+}
+
+void rocksdb_backup_engine_verify_backup(rocksdb_backup_engine_t* be,
+                                         uint32_t backup_id, char** errptr) {
+  SaveError(errptr, be->rep->VerifyBackup(static_cast<BackupID>(backup_id)));
+}
+
+void rocksdb_backup_engine_restore_db_from_latest_backup(
+    rocksdb_backup_engine_t* be, const char* db_dir, const char* wal_dir,
+    const rocksdb_restore_options_t* restore_options, char** errptr) {
+  SaveError(errptr, be->rep->RestoreDBFromLatestBackup(std::string(db_dir),
+                                                       std::string(wal_dir),
+                                                       restore_options->rep));
+}
+
+void rocksdb_backup_engine_restore_db_from_backup(
+    rocksdb_backup_engine_t* be, const char* db_dir, const char* wal_dir,
+    const rocksdb_restore_options_t* restore_options, const uint32_t backup_id,
+    char** errptr) {
+  SaveError(errptr, be->rep->RestoreDBFromBackup(backup_id, std::string(db_dir),
+                                                 std::string(wal_dir),
+                                                 restore_options->rep));
+}
+
+const rocksdb_backup_engine_info_t* rocksdb_backup_engine_get_backup_info(
+    rocksdb_backup_engine_t* be) {
+  rocksdb_backup_engine_info_t* result = new rocksdb_backup_engine_info_t;
+  be->rep->GetBackupInfo(&result->rep);
+  return result;
+}
+
+int rocksdb_backup_engine_info_count(const rocksdb_backup_engine_info_t* info) {
+  return static_cast<int>(info->rep.size());
+}
+
+int64_t rocksdb_backup_engine_info_timestamp(
+    const rocksdb_backup_engine_info_t* info, int index) {
+  return info->rep[index].timestamp;
+}
+
+uint32_t rocksdb_backup_engine_info_backup_id(
+    const rocksdb_backup_engine_info_t* info, int index) {
+  return info->rep[index].backup_id;
+}
+
+uint64_t rocksdb_backup_engine_info_size(
+    const rocksdb_backup_engine_info_t* info, int index) {
+  return info->rep[index].size;
+}
+
+uint32_t rocksdb_backup_engine_info_number_files(
+    const rocksdb_backup_engine_info_t* info, int index) {
+  return info->rep[index].number_files;
+}
+
+void rocksdb_backup_engine_info_destroy(
+    const rocksdb_backup_engine_info_t* info) {
+  delete info;
+}
+
+void rocksdb_backup_engine_close(rocksdb_backup_engine_t* be) {
+  delete be->rep;
+  delete be;
+}
+
+rocksdb_backup_engine_options_t* rocksdb_backup_engine_options_create(
+    const char* backup_dir) {
+  return new rocksdb_backup_engine_options_t{
+      BackupEngineOptions(std::string(backup_dir))};
+}
+
+void rocksdb_backup_engine_options_set_backup_dir(
+    rocksdb_backup_engine_options_t* options, const char* backup_dir) {
+  options->rep.backup_dir = std::string(backup_dir);
+}
+
+void rocksdb_backup_engine_options_set_env(
+    rocksdb_backup_engine_options_t* options, rocksdb_env_t* env) {
+  options->rep.backup_env = (env ? env->rep : nullptr);
+}
+
+void rocksdb_backup_engine_options_set_share_table_files(
+    rocksdb_backup_engine_options_t* options, unsigned char val) {
+  options->rep.share_table_files = val;
+}
+
+unsigned char rocksdb_backup_engine_options_get_share_table_files(
+    rocksdb_backup_engine_options_t* options) {
+  return options->rep.share_table_files;
+}
+
+void rocksdb_backup_engine_options_set_sync(
+    rocksdb_backup_engine_options_t* options, unsigned char val) {
+  options->rep.sync = val;
+}
+
+unsigned char rocksdb_backup_engine_options_get_sync(
+    rocksdb_backup_engine_options_t* options) {
+  return options->rep.sync;
+}
+
+void rocksdb_backup_engine_options_set_destroy_old_data(
+    rocksdb_backup_engine_options_t* options, unsigned char val) {
+  options->rep.destroy_old_data = val;
+}
+
+unsigned char rocksdb_backup_engine_options_get_destroy_old_data(
+    rocksdb_backup_engine_options_t* options) {
+  return options->rep.destroy_old_data;
+}
+
+void rocksdb_backup_engine_options_set_backup_log_files(
+    rocksdb_backup_engine_options_t* options, unsigned char val) {
+  options->rep.backup_log_files = val;
+}
+
+unsigned char rocksdb_backup_engine_options_get_backup_log_files(
+    rocksdb_backup_engine_options_t* options) {
+  return options->rep.backup_log_files;
+}
+
+void rocksdb_backup_engine_options_set_backup_rate_limit(
+    rocksdb_backup_engine_options_t* options, uint64_t limit) {
+  options->rep.backup_rate_limit = limit;
+}
+
+uint64_t rocksdb_backup_engine_options_get_backup_rate_limit(
+    rocksdb_backup_engine_options_t* options) {
+  return options->rep.backup_rate_limit;
+}
+
+void rocksdb_backup_engine_options_set_restore_rate_limit(
+    rocksdb_backup_engine_options_t* options, uint64_t limit) {
+  options->rep.restore_rate_limit = limit;
+}
+
+uint64_t rocksdb_backup_engine_options_get_restore_rate_limit(
+    rocksdb_backup_engine_options_t* options) {
+  return options->rep.restore_rate_limit;
+}
+
+void rocksdb_backup_engine_options_set_max_background_operations(
+    rocksdb_backup_engine_options_t* options, int val) {
+  options->rep.max_background_operations = val;
+}
+
+int rocksdb_backup_engine_options_get_max_background_operations(
+    rocksdb_backup_engine_options_t* options) {
+  return options->rep.max_background_operations;
+}
+
+void rocksdb_backup_engine_options_set_callback_trigger_interval_size(
+    rocksdb_backup_engine_options_t* options, uint64_t size) {
+  options->rep.callback_trigger_interval_size = size;
+}
+
+uint64_t rocksdb_backup_engine_options_get_callback_trigger_interval_size(
+    rocksdb_backup_engine_options_t* options) {
+  return options->rep.callback_trigger_interval_size;
+}
+
+void rocksdb_backup_engine_options_set_max_valid_backups_to_open(
+    rocksdb_backup_engine_options_t* options, int val) {
+  options->rep.max_valid_backups_to_open = val;
+}
+
+int rocksdb_backup_engine_options_get_max_valid_backups_to_open(
+    rocksdb_backup_engine_options_t* options) {
+  return options->rep.max_valid_backups_to_open;
+}
+
+void rocksdb_backup_engine_options_set_share_files_with_checksum_naming(
+    rocksdb_backup_engine_options_t* options, int val) {
+  options->rep.share_files_with_checksum_naming =
+      static_cast<BackupEngineOptions::ShareFilesNaming>(val);
+}
+
+int rocksdb_backup_engine_options_get_share_files_with_checksum_naming(
+    rocksdb_backup_engine_options_t* options) {
+  return static_cast<int>(options->rep.share_files_with_checksum_naming);
+}
+
+void rocksdb_backup_engine_options_destroy(
+    rocksdb_backup_engine_options_t* options) {
+  delete options;
+}
+
+rocksdb_checkpoint_t* rocksdb_checkpoint_object_create(rocksdb_t* db,
+                                                       char** errptr) {
+  Checkpoint* checkpoint;
+  if (SaveError(errptr, Checkpoint::Create(db->rep, &checkpoint))) {
+    return nullptr;
+  }
+  rocksdb_checkpoint_t* result = new rocksdb_checkpoint_t;
+  result->rep = checkpoint;
+  return result;
+}
+
+void rocksdb_checkpoint_create(rocksdb_checkpoint_t* checkpoint,
+                               const char* checkpoint_dir,
+                               uint64_t log_size_for_flush, char** errptr) {
+  SaveError(errptr, checkpoint->rep->CreateCheckpoint(
+                        std::string(checkpoint_dir), log_size_for_flush));
+}
+
+void rocksdb_checkpoint_object_destroy(rocksdb_checkpoint_t* checkpoint) {
+  delete checkpoint->rep;
+  delete checkpoint;
+}
+
+void rocksdb_close(rocksdb_t* db) {
+  delete db->rep;
+  delete db;
+}
+
+void rocksdb_options_set_uint64add_merge_operator(rocksdb_options_t* opt) {
+  opt->rep.merge_operator =
+      ROCKSDB_NAMESPACE::MergeOperators::CreateUInt64AddOperator();
+}
+
+rocksdb_t* rocksdb_open_and_trim_history(
+    const rocksdb_options_t* db_options, const char* name,
+    int num_column_families, const char* const* column_family_names,
+    const rocksdb_options_t* const* column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles, char* trim_ts,
+    size_t trim_tslen, char** errptr) {
+  std::vector<ColumnFamilyDescriptor> column_families;
+  for (int i = 0; i < num_column_families; i++) {
+    column_families.push_back(ColumnFamilyDescriptor(
+        std::string(column_family_names[i]),
+        ColumnFamilyOptions(column_family_options[i]->rep)));
+  }
+
+  std::string trim_ts_(trim_ts, trim_tslen);
+
+  DB* db;
+  std::vector<ColumnFamilyHandle*> handles;
+  if (SaveError(errptr, DB::OpenAndTrimHistory(
+                            DBOptions(db_options->rep), std::string(name),
+                            column_families, &handles, &db, trim_ts_))) {
+    return nullptr;
+  }
+
+  for (size_t i = 0; i < handles.size(); i++) {
+    rocksdb_column_family_handle_t* c_handle =
+        new rocksdb_column_family_handle_t;
+    c_handle->rep = handles[i];
+    column_family_handles[i] = c_handle;
+  }
+  rocksdb_t* result = new rocksdb_t;
+  result->rep = db;
+  return result;
+}
+
+rocksdb_t* rocksdb_open_column_families(
+    const rocksdb_options_t* db_options, const char* name,
+    int num_column_families, const char* const* column_family_names,
+    const rocksdb_options_t* const* column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles, char** errptr) {
+  std::vector<ColumnFamilyDescriptor> column_families;
+  for (int i = 0; i < num_column_families; i++) {
+    column_families.push_back(ColumnFamilyDescriptor(
+        std::string(column_family_names[i]),
+        ColumnFamilyOptions(column_family_options[i]->rep)));
+  }
+
+  DB* db;
+  std::vector<ColumnFamilyHandle*> handles;
+  if (SaveError(errptr, DB::Open(DBOptions(db_options->rep), std::string(name),
+                                 column_families, &handles, &db))) {
+    return nullptr;
+  }
+
+  for (size_t i = 0; i < handles.size(); i++) {
+    rocksdb_column_family_handle_t* c_handle =
+        new rocksdb_column_family_handle_t;
+    c_handle->rep = handles[i];
+    column_family_handles[i] = c_handle;
+  }
+  rocksdb_t* result = new rocksdb_t;
+  result->rep = db;
+  return result;
+}
+
+rocksdb_t* rocksdb_open_column_families_with_ttl(
+    const rocksdb_options_t* db_options, const char* name,
+    int num_column_families, const char* const* column_family_names,
+    const rocksdb_options_t* const* column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles, const int* ttls,
+    char** errptr) {
+  std::vector<int32_t> ttls_vec;
+  std::vector<ColumnFamilyDescriptor> column_families;
+  for (int i = 0; i < num_column_families; i++) {
+    ttls_vec.push_back(ttls[i]);
+
+    column_families.push_back(ColumnFamilyDescriptor(
+        std::string(column_family_names[i]),
+        ColumnFamilyOptions(column_family_options[i]->rep)));
+  }
+
+  ROCKSDB_NAMESPACE::DBWithTTL* db;
+  std::vector<ColumnFamilyHandle*> handles;
+  if (SaveError(errptr, ROCKSDB_NAMESPACE::DBWithTTL::Open(
+                            DBOptions(db_options->rep), std::string(name),
+                            column_families, &handles, &db, ttls_vec))) {
+    return nullptr;
+  }
+
+  for (size_t i = 0; i < handles.size(); i++) {
+    rocksdb_column_family_handle_t* c_handle =
+        new rocksdb_column_family_handle_t;
+    c_handle->rep = handles[i];
+    column_family_handles[i] = c_handle;
+  }
+  rocksdb_t* result = new rocksdb_t;
+  result->rep = db;
+  return result;
+}
+
+rocksdb_t* rocksdb_open_for_read_only_column_families(
+    const rocksdb_options_t* db_options, const char* name,
+    int num_column_families, const char* const* column_family_names,
+    const rocksdb_options_t* const* column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles,
+    unsigned char error_if_wal_file_exists, char** errptr) {
+  std::vector<ColumnFamilyDescriptor> column_families;
+  for (int i = 0; i < num_column_families; i++) {
+    column_families.push_back(ColumnFamilyDescriptor(
+        std::string(column_family_names[i]),
+        ColumnFamilyOptions(column_family_options[i]->rep)));
+  }
+
+  DB* db;
+  std::vector<ColumnFamilyHandle*> handles;
+  if (SaveError(errptr,
+                DB::OpenForReadOnly(DBOptions(db_options->rep),
+                                    std::string(name), column_families,
+                                    &handles, &db, error_if_wal_file_exists))) {
+    return nullptr;
+  }
+
+  for (size_t i = 0; i < handles.size(); i++) {
+    rocksdb_column_family_handle_t* c_handle =
+        new rocksdb_column_family_handle_t;
+    c_handle->rep = handles[i];
+    column_family_handles[i] = c_handle;
+  }
+  rocksdb_t* result = new rocksdb_t;
+  result->rep = db;
+  return result;
+}
+
+rocksdb_t* rocksdb_open_as_secondary_column_families(
+    const rocksdb_options_t* db_options, const char* name,
+    const char* secondary_path, int num_column_families,
+    const char* const* column_family_names,
+    const rocksdb_options_t* const* column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles, char** errptr) {
+  std::vector<ColumnFamilyDescriptor> column_families;
+  for (int i = 0; i != num_column_families; ++i) {
+    column_families.emplace_back(
+        std::string(column_family_names[i]),
+        ColumnFamilyOptions(column_family_options[i]->rep));
+  }
+  DB* db;
+  std::vector<ColumnFamilyHandle*> handles;
+  if (SaveError(errptr, DB::OpenAsSecondary(DBOptions(db_options->rep),
+                                            std::string(name),
+                                            std::string(secondary_path),
+                                            column_families, &handles, &db))) {
+    return nullptr;
+  }
+  for (size_t i = 0; i != handles.size(); ++i) {
+    rocksdb_column_family_handle_t* c_handle =
+        new rocksdb_column_family_handle_t;
+    c_handle->rep = handles[i];
+    column_family_handles[i] = c_handle;
+  }
+  rocksdb_t* result = new rocksdb_t;
+  result->rep = db;
+  return result;
+}
+
+char** rocksdb_list_column_families(const rocksdb_options_t* options,
+                                    const char* name, size_t* lencfs,
+                                    char** errptr) {
+  std::vector<std::string> fams;
+  SaveError(errptr, DB::ListColumnFamilies(DBOptions(options->rep),
+                                           std::string(name), &fams));
+
+  *lencfs = fams.size();
+  char** column_families =
+      static_cast<char**>(malloc(sizeof(char*) * fams.size()));
+  for (size_t i = 0; i < fams.size(); i++) {
+    column_families[i] = strdup(fams[i].c_str());
+  }
+  return column_families;
+}
+
+void rocksdb_list_column_families_destroy(char** list, size_t len) {
+  for (size_t i = 0; i < len; ++i) {
+    free(list[i]);
+  }
+  free(list);
+}
+
+rocksdb_column_family_handle_t* rocksdb_create_column_family(
+    rocksdb_t* db, const rocksdb_options_t* column_family_options,
+    const char* column_family_name, char** errptr) {
+  rocksdb_column_family_handle_t* handle = new rocksdb_column_family_handle_t;
+  SaveError(errptr, db->rep->CreateColumnFamily(
+                        ColumnFamilyOptions(column_family_options->rep),
+                        std::string(column_family_name), &(handle->rep)));
+  return handle;
+}
+
+rocksdb_column_family_handle_t* rocksdb_create_column_family_with_ttl(
+    rocksdb_t* db, const rocksdb_options_t* column_family_options,
+    const char* column_family_name, int ttl, char** errptr) {
+  ROCKSDB_NAMESPACE::DBWithTTL* db_with_ttl =
+      static_cast<ROCKSDB_NAMESPACE::DBWithTTL*>(db->rep);
+  rocksdb_column_family_handle_t* handle = new rocksdb_column_family_handle_t;
+  SaveError(errptr, db_with_ttl->CreateColumnFamilyWithTtl(
+                        ColumnFamilyOptions(column_family_options->rep),
+                        std::string(column_family_name), &(handle->rep), ttl));
+  return handle;
+}
+
+void rocksdb_drop_column_family(rocksdb_t* db,
+                                rocksdb_column_family_handle_t* handle,
+                                char** errptr) {
+  SaveError(errptr, db->rep->DropColumnFamily(handle->rep));
+}
+
+uint32_t rocksdb_column_family_handle_get_id(
+    rocksdb_column_family_handle_t* handle) {
+  return handle->rep->GetID();
+}
+
+char* rocksdb_column_family_handle_get_name(
+    rocksdb_column_family_handle_t* handle, size_t* name_len) {
+  auto name = handle->rep->GetName();
+  *name_len = name.size();
+  return CopyString(name);
+}
+
+void rocksdb_column_family_handle_destroy(
+    rocksdb_column_family_handle_t* handle) {
+  delete handle->rep;
+  delete handle;
+}
+
+void rocksdb_put(rocksdb_t* db, const rocksdb_writeoptions_t* options,
+                 const char* key, size_t keylen, const char* val, size_t vallen,
+                 char** errptr) {
+  SaveError(errptr,
+            db->rep->Put(options->rep, Slice(key, keylen), Slice(val, vallen)));
+}
+
+void rocksdb_put_cf(rocksdb_t* db, const rocksdb_writeoptions_t* options,
+                    rocksdb_column_family_handle_t* column_family,
+                    const char* key, size_t keylen, const char* val,
+                    size_t vallen, char** errptr) {
+  SaveError(errptr, db->rep->Put(options->rep, column_family->rep,
+                                 Slice(key, keylen), Slice(val, vallen)));
+}
+
+void rocksdb_put_with_ts(rocksdb_t* db, const rocksdb_writeoptions_t* options,
+                         const char* key, size_t keylen, const char* ts,
+                         size_t tslen, const char* val, size_t vallen,
+                         char** errptr) {
+  SaveError(errptr, db->rep->Put(options->rep, Slice(key, keylen),
+                                 Slice(ts, tslen), Slice(val, vallen)));
+}
+
+void rocksdb_put_cf_with_ts(rocksdb_t* db,
+                            const rocksdb_writeoptions_t* options,
+                            rocksdb_column_family_handle_t* column_family,
+                            const char* key, size_t keylen, const char* ts,
+                            size_t tslen, const char* val, size_t vallen,
+                            char** errptr) {
+  SaveError(errptr,
+            db->rep->Put(options->rep, column_family->rep, Slice(key, keylen),
+                         Slice(ts, tslen), Slice(val, vallen)));
+}
+
+void rocksdb_delete(rocksdb_t* db, const rocksdb_writeoptions_t* options,
+                    const char* key, size_t keylen, char** errptr) {
+  SaveError(errptr, db->rep->Delete(options->rep, Slice(key, keylen)));
+}
+
+void rocksdb_delete_cf(rocksdb_t* db, const rocksdb_writeoptions_t* options,
+                       rocksdb_column_family_handle_t* column_family,
+                       const char* key, size_t keylen, char** errptr) {
+  SaveError(errptr, db->rep->Delete(options->rep, column_family->rep,
+                                    Slice(key, keylen)));
+}
+
+void rocksdb_delete_with_ts(rocksdb_t* db,
+                            const rocksdb_writeoptions_t* options,
+                            const char* key, size_t keylen, const char* ts,
+                            size_t tslen, char** errptr) {
+  SaveError(errptr, db->rep->Delete(options->rep, Slice(key, keylen),
+                                    Slice(ts, tslen)));
+}
+
+void rocksdb_delete_cf_with_ts(rocksdb_t* db,
+                               const rocksdb_writeoptions_t* options,
+                               rocksdb_column_family_handle_t* column_family,
+                               const char* key, size_t keylen, const char* ts,
+                               size_t tslen, char** errptr) {
+  SaveError(errptr, db->rep->Delete(options->rep, column_family->rep,
+                                    Slice(key, keylen), Slice(ts, tslen)));
+}
+
+void rocksdb_singledelete(rocksdb_t* db, const rocksdb_writeoptions_t* options,
+                          const char* key, size_t keylen, char** errptr) {
+  SaveError(errptr, db->rep->SingleDelete(options->rep, Slice(key, keylen)));
+}
+
+void rocksdb_singledelete_cf(rocksdb_t* db,
+                             const rocksdb_writeoptions_t* options,
+                             rocksdb_column_family_handle_t* column_family,
+                             const char* key, size_t keylen, char** errptr) {
+  SaveError(errptr, db->rep->SingleDelete(options->rep, column_family->rep,
+                                          Slice(key, keylen)));
+}
+
+void rocksdb_singledelete_with_ts(rocksdb_t* db,
+                                  const rocksdb_writeoptions_t* options,
+                                  const char* key, size_t keylen,
+                                  const char* ts, size_t tslen, char** errptr) {
+  SaveError(errptr, db->rep->SingleDelete(options->rep, Slice(key, keylen),
+                                          Slice(ts, tslen)));
+}
+
+void rocksdb_singledelete_cf_with_ts(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, const char* ts, size_t tslen, char** errptr) {
+  SaveError(errptr,
+            db->rep->SingleDelete(options->rep, column_family->rep,
+                                  Slice(key, keylen), Slice(ts, tslen)));
+}
+
+void rocksdb_increase_full_history_ts_low(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
+    const char* ts_low, size_t ts_lowlen, char** errptr) {
+  std::string ts(ts_low, ts_lowlen);
+  SaveError(errptr, db->rep->IncreaseFullHistoryTsLow(column_family->rep, ts));
+}
+
+char* rocksdb_get_full_history_ts_low(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
+    size_t* ts_len, char** errptr) {
+  char* result = nullptr;
+  std::string tmp;
+  Status s = db->rep->GetFullHistoryTsLow(column_family->rep, &tmp);
+  if (s.ok()) {
+    *ts_len = tmp.size();
+    result = CopyString(tmp);
+  } else {
+    *ts_len = 0;
+    SaveError(errptr, s);
+  }
+  return result;
+}
+
+void rocksdb_delete_range_cf(rocksdb_t* db,
+                             const rocksdb_writeoptions_t* options,
+                             rocksdb_column_family_handle_t* column_family,
+                             const char* start_key, size_t start_key_len,
+                             const char* end_key, size_t end_key_len,
+                             char** errptr) {
+  SaveError(errptr, db->rep->DeleteRange(options->rep, column_family->rep,
+                                         Slice(start_key, start_key_len),
+                                         Slice(end_key, end_key_len)));
+}
+
+void rocksdb_merge(rocksdb_t* db, const rocksdb_writeoptions_t* options,
+                   const char* key, size_t keylen, const char* val,
+                   size_t vallen, char** errptr) {
+  SaveError(errptr, db->rep->Merge(options->rep, Slice(key, keylen),
+                                   Slice(val, vallen)));
+}
+
+void rocksdb_merge_cf(rocksdb_t* db, const rocksdb_writeoptions_t* options,
+                      rocksdb_column_family_handle_t* column_family,
+                      const char* key, size_t keylen, const char* val,
+                      size_t vallen, char** errptr) {
+  SaveError(errptr, db->rep->Merge(options->rep, column_family->rep,
+                                   Slice(key, keylen), Slice(val, vallen)));
+}
+
+void rocksdb_write(rocksdb_t* db, const rocksdb_writeoptions_t* options,
+                   rocksdb_writebatch_t* batch, char** errptr) {
+  SaveError(errptr, db->rep->Write(options->rep, &batch->rep));
+}
+
+char* rocksdb_get(rocksdb_t* db, const rocksdb_readoptions_t* options,
+                  const char* key, size_t keylen, size_t* vallen,
+                  char** errptr) {
+  char* result = nullptr;
+  std::string tmp;
+  Status s = db->rep->Get(options->rep, Slice(key, keylen), &tmp);
+  if (s.ok()) {
+    *vallen = tmp.size();
+    result = CopyString(tmp);
+  } else {
+    *vallen = 0;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+  }
+  return result;
+}
+
+char* rocksdb_get_cf(rocksdb_t* db, const rocksdb_readoptions_t* options,
+                     rocksdb_column_family_handle_t* column_family,
+                     const char* key, size_t keylen, size_t* vallen,
+                     char** errptr) {
+  char* result = nullptr;
+  std::string tmp;
+  Status s =
+      db->rep->Get(options->rep, column_family->rep, Slice(key, keylen), &tmp);
+  if (s.ok()) {
+    *vallen = tmp.size();
+    result = CopyString(tmp);
+  } else {
+    *vallen = 0;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+  }
+  return result;
+}
+
+char* rocksdb_get_with_ts(rocksdb_t* db, const rocksdb_readoptions_t* options,
+                          const char* key, size_t keylen, size_t* vallen,
+                          char** ts, size_t* tslen, char** errptr) {
+  char* result = nullptr;
+  std::string tmp_val;
+  std::string tmp_ts;
+  Status s = db->rep->Get(options->rep, Slice(key, keylen), &tmp_val, &tmp_ts);
+  if (s.ok()) {
+    *vallen = tmp_val.size();
+    result = CopyString(tmp_val);
+    *tslen = tmp_ts.size();
+    *ts = CopyString(tmp_ts);
+  } else {
+    *vallen = 0;
+    *tslen = 0;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+  }
+  return result;
+}
+
+char* rocksdb_get_cf_with_ts(rocksdb_t* db,
+                             const rocksdb_readoptions_t* options,
+                             rocksdb_column_family_handle_t* column_family,
+                             const char* key, size_t keylen, size_t* vallen,
+                             char** ts, size_t* tslen, char** errptr) {
+  char* result = nullptr;
+  std::string tmp;
+  std::string tmp_ts;
+  Status s = db->rep->Get(options->rep, column_family->rep, Slice(key, keylen),
+                          &tmp, &tmp_ts);
+  if (s.ok()) {
+    *vallen = tmp.size();
+    result = CopyString(tmp);
+    *tslen = tmp_ts.size();
+    *ts = CopyString(tmp_ts);
+  } else {
+    *vallen = 0;
+    *tslen = 0;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+  }
+  return result;
+}
+
+void rocksdb_multi_get(rocksdb_t* db, const rocksdb_readoptions_t* options,
+                       size_t num_keys, const char* const* keys_list,
+                       const size_t* keys_list_sizes, char** values_list,
+                       size_t* values_list_sizes, char** errs) {
+  std::vector<Slice> keys(num_keys);
+  for (size_t i = 0; i < num_keys; i++) {
+    keys[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  std::vector<std::string> values(num_keys);
+  std::vector<Status> statuses = db->rep->MultiGet(options->rep, keys, &values);
+  for (size_t i = 0; i < num_keys; i++) {
+    if (statuses[i].ok()) {
+      values_list[i] = CopyString(values[i]);
+      values_list_sizes[i] = values[i].size();
+      errs[i] = nullptr;
+    } else {
+      values_list[i] = nullptr;
+      values_list_sizes[i] = 0;
+      if (!statuses[i].IsNotFound()) {
+        errs[i] = strdup(statuses[i].ToString().c_str());
+      } else {
+        errs[i] = nullptr;
+      }
+    }
+  }
+}
+
+void rocksdb_multi_get_with_ts(rocksdb_t* db,
+                               const rocksdb_readoptions_t* options,
+                               size_t num_keys, const char* const* keys_list,
+                               const size_t* keys_list_sizes,
+                               char** values_list, size_t* values_list_sizes,
+                               char** timestamp_list,
+                               size_t* timestamp_list_sizes, char** errs) {
+  std::vector<Slice> keys(num_keys);
+  for (size_t i = 0; i < num_keys; i++) {
+    keys[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  std::vector<std::string> values(num_keys);
+  std::vector<std::string> timestamps(num_keys);
+  std::vector<Status> statuses =
+      db->rep->MultiGet(options->rep, keys, &values, &timestamps);
+  for (size_t i = 0; i < num_keys; i++) {
+    if (statuses[i].ok()) {
+      values_list[i] = CopyString(values[i]);
+      values_list_sizes[i] = values[i].size();
+      timestamp_list[i] = CopyString(timestamps[i]);
+      timestamp_list_sizes[i] = timestamps[i].size();
+      errs[i] = nullptr;
+    } else {
+      values_list[i] = nullptr;
+      values_list_sizes[i] = 0;
+      timestamp_list[i] = nullptr;
+      timestamp_list_sizes[i] = 0;
+      if (!statuses[i].IsNotFound()) {
+        errs[i] = strdup(statuses[i].ToString().c_str());
+      } else {
+        errs[i] = nullptr;
+      }
+    }
+  }
+}
+
+void rocksdb_multi_get_cf(
+    rocksdb_t* db, const rocksdb_readoptions_t* options,
+    const rocksdb_column_family_handle_t* const* column_families,
+    size_t num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, char** values_list,
+    size_t* values_list_sizes, char** errs) {
+  std::vector<Slice> keys(num_keys);
+  std::vector<ColumnFamilyHandle*> cfs(num_keys);
+  for (size_t i = 0; i < num_keys; i++) {
+    keys[i] = Slice(keys_list[i], keys_list_sizes[i]);
+    cfs[i] = column_families[i]->rep;
+  }
+  std::vector<std::string> values(num_keys);
+  std::vector<Status> statuses =
+      db->rep->MultiGet(options->rep, cfs, keys, &values);
+  for (size_t i = 0; i < num_keys; i++) {
+    if (statuses[i].ok()) {
+      values_list[i] = CopyString(values[i]);
+      values_list_sizes[i] = values[i].size();
+      errs[i] = nullptr;
+    } else {
+      values_list[i] = nullptr;
+      values_list_sizes[i] = 0;
+      if (!statuses[i].IsNotFound()) {
+        errs[i] = strdup(statuses[i].ToString().c_str());
+      } else {
+        errs[i] = nullptr;
+      }
+    }
+  }
+}
+
+void rocksdb_multi_get_cf_with_ts(
+    rocksdb_t* db, const rocksdb_readoptions_t* options,
+    const rocksdb_column_family_handle_t* const* column_families,
+    size_t num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, char** values_list,
+    size_t* values_list_sizes, char** timestamps_list,
+    size_t* timestamps_list_sizes, char** errs) {
+  std::vector<Slice> keys(num_keys);
+  std::vector<ColumnFamilyHandle*> cfs(num_keys);
+  for (size_t i = 0; i < num_keys; i++) {
+    keys[i] = Slice(keys_list[i], keys_list_sizes[i]);
+    cfs[i] = column_families[i]->rep;
+  }
+  std::vector<std::string> values(num_keys);
+  std::vector<std::string> timestamps(num_keys);
+  std::vector<Status> statuses =
+      db->rep->MultiGet(options->rep, cfs, keys, &values, &timestamps);
+  for (size_t i = 0; i < num_keys; i++) {
+    if (statuses[i].ok()) {
+      values_list[i] = CopyString(values[i]);
+      values_list_sizes[i] = values[i].size();
+      timestamps_list[i] = CopyString(timestamps[i]);
+      timestamps_list_sizes[i] = timestamps[i].size();
+      errs[i] = nullptr;
+    } else {
+      values_list[i] = nullptr;
+      values_list_sizes[i] = 0;
+      timestamps_list[i] = nullptr;
+      timestamps_list_sizes[i] = 0;
+      if (!statuses[i].IsNotFound()) {
+        errs[i] = strdup(statuses[i].ToString().c_str());
+      } else {
+        errs[i] = nullptr;
+      }
+    }
+  }
+}
+
+void rocksdb_batched_multi_get_cf(rocksdb_t* db,
+                                  const rocksdb_readoptions_t* options,
+                                  rocksdb_column_family_handle_t* column_family,
+                                  size_t num_keys, const char* const* keys_list,
+                                  const size_t* keys_list_sizes,
+                                  rocksdb_pinnableslice_t** values, char** errs,
+                                  const bool sorted_input) {
+  Slice* key_slices = new Slice[num_keys];
+  PinnableSlice* value_slices = new PinnableSlice[num_keys];
+  Status* statuses = new Status[num_keys];
+  for (size_t i = 0; i < num_keys; ++i) {
+    key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+
+  db->rep->MultiGet(options->rep, column_family->rep, num_keys, key_slices,
+                    value_slices, statuses, sorted_input);
+
+  for (size_t i = 0; i < num_keys; ++i) {
+    if (statuses[i].ok()) {
+      values[i] = new (rocksdb_pinnableslice_t);
+      values[i]->rep = std::move(value_slices[i]);
+      errs[i] = nullptr;
+    } else {
+      values[i] = nullptr;
+      if (!statuses[i].IsNotFound()) {
+        errs[i] = strdup(statuses[i].ToString().c_str());
+      } else {
+        errs[i] = nullptr;
+      }
+    }
+  }
+
+  delete[] key_slices;
+  delete[] value_slices;
+  delete[] statuses;
+}
+
+unsigned char rocksdb_key_may_exist(rocksdb_t* db,
+                                    const rocksdb_readoptions_t* options,
+                                    const char* key, size_t key_len,
+                                    char** value, size_t* val_len,
+                                    const char* timestamp, size_t timestamp_len,
+                                    unsigned char* value_found) {
+  std::string tmp;
+  std::string time;
+  if (timestamp) {
+    time.assign(timestamp, timestamp_len);
+  }
+  bool found = false;
+  const bool result = db->rep->KeyMayExist(options->rep, Slice(key, key_len),
+                                           &tmp, timestamp ? &time : nullptr,
+                                           value_found ? &found : nullptr);
+  if (value_found) {
+    *value_found = found;
+    if (found) {
+      *val_len = tmp.size();
+      *value = CopyString(tmp);
+    }
+  }
+  return result;
+}
+
+unsigned char rocksdb_key_may_exist_cf(
+    rocksdb_t* db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t key_len, char** value, size_t* val_len, const char* timestamp,
+    size_t timestamp_len, unsigned char* value_found) {
+  std::string tmp;
+  std::string time;
+  if (timestamp) {
+    time.assign(timestamp, timestamp_len);
+  }
+  bool found = false;
+  const bool result = db->rep->KeyMayExist(
+      options->rep, column_family->rep, Slice(key, key_len), &tmp,
+      timestamp ? &time : nullptr, value_found ? &found : nullptr);
+  if (value_found) {
+    *value_found = found;
+    if (found) {
+      *val_len = tmp.size();
+      *value = CopyString(tmp);
+    }
+  }
+  return result;
+}
+
+rocksdb_iterator_t* rocksdb_create_iterator(
+    rocksdb_t* db, const rocksdb_readoptions_t* options) {
+  rocksdb_iterator_t* result = new rocksdb_iterator_t;
+  result->rep = db->rep->NewIterator(options->rep);
+  return result;
+}
+
+rocksdb_wal_iterator_t* rocksdb_get_updates_since(
+    rocksdb_t* db, uint64_t seq_number,
+    const rocksdb_wal_readoptions_t* options, char** errptr) {
+  std::unique_ptr<TransactionLogIterator> iter;
+  TransactionLogIterator::ReadOptions ro;
+  if (options != nullptr) {
+    ro = options->rep;
+  }
+  if (SaveError(errptr, db->rep->GetUpdatesSince(seq_number, &iter, ro))) {
+    return nullptr;
+  }
+  rocksdb_wal_iterator_t* result = new rocksdb_wal_iterator_t;
+  result->rep = iter.release();
+  return result;
+}
+
+void rocksdb_wal_iter_next(rocksdb_wal_iterator_t* iter) { iter->rep->Next(); }
+
+unsigned char rocksdb_wal_iter_valid(const rocksdb_wal_iterator_t* iter) {
+  return iter->rep->Valid();
+}
+
+void rocksdb_wal_iter_status(const rocksdb_wal_iterator_t* iter,
+                             char** errptr) {
+  SaveError(errptr, iter->rep->status());
+}
+
+void rocksdb_wal_iter_destroy(const rocksdb_wal_iterator_t* iter) {
+  delete iter->rep;
+  delete iter;
+}
+
+rocksdb_writebatch_t* rocksdb_wal_iter_get_batch(
+    const rocksdb_wal_iterator_t* iter, uint64_t* seq) {
+  rocksdb_writebatch_t* result = rocksdb_writebatch_create();
+  BatchResult wal_batch = iter->rep->GetBatch();
+  result->rep = std::move(*wal_batch.writeBatchPtr);
+  if (seq != nullptr) {
+    *seq = wal_batch.sequence;
+  }
+  return result;
+}
+
+uint64_t rocksdb_get_latest_sequence_number(rocksdb_t* db) {
+  return db->rep->GetLatestSequenceNumber();
+}
+
+rocksdb_iterator_t* rocksdb_create_iterator_cf(
+    rocksdb_t* db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family) {
+  rocksdb_iterator_t* result = new rocksdb_iterator_t;
+  result->rep = db->rep->NewIterator(options->rep, column_family->rep);
+  return result;
+}
+
+void rocksdb_create_iterators(rocksdb_t* db, rocksdb_readoptions_t* opts,
+                              rocksdb_column_family_handle_t** column_families,
+                              rocksdb_iterator_t** iterators, size_t size,
+                              char** errptr) {
+  std::vector<ColumnFamilyHandle*> column_families_vec;
+  for (size_t i = 0; i < size; i++) {
+    column_families_vec.push_back(column_families[i]->rep);
+  }
+
+  std::vector<Iterator*> res;
+  Status status = db->rep->NewIterators(opts->rep, column_families_vec, &res);
+  assert(res.size() == size);
+  if (SaveError(errptr, status)) {
+    return;
+  }
+
+  for (size_t i = 0; i < size; i++) {
+    iterators[i] = new rocksdb_iterator_t;
+    iterators[i]->rep = res[i];
+  }
+}
+
+const rocksdb_snapshot_t* rocksdb_create_snapshot(rocksdb_t* db) {
+  rocksdb_snapshot_t* result = new rocksdb_snapshot_t;
+  result->rep = db->rep->GetSnapshot();
+  return result;
+}
+
+void rocksdb_release_snapshot(rocksdb_t* db,
+                              const rocksdb_snapshot_t* snapshot) {
+  db->rep->ReleaseSnapshot(snapshot->rep);
+  delete snapshot;
+}
+
+char* rocksdb_property_value(rocksdb_t* db, const char* propname) {
+  std::string tmp;
+  if (db->rep->GetProperty(Slice(propname), &tmp)) {
+    // We use strdup() since we expect human readable output.
+    return strdup(tmp.c_str());
+  } else {
+    return nullptr;
+  }
+}
+
+int rocksdb_property_int(rocksdb_t* db, const char* propname,
+                         uint64_t* out_val) {
+  if (db->rep->GetIntProperty(Slice(propname), out_val)) {
+    return 0;
+  } else {
+    return -1;
+  }
+}
+
+int rocksdb_property_int_cf(rocksdb_t* db,
+                            rocksdb_column_family_handle_t* column_family,
+                            const char* propname, uint64_t* out_val) {
+  if (db->rep->GetIntProperty(column_family->rep, Slice(propname), out_val)) {
+    return 0;
+  } else {
+    return -1;
+  }
+}
+
+char* rocksdb_property_value_cf(rocksdb_t* db,
+                                rocksdb_column_family_handle_t* column_family,
+                                const char* propname) {
+  std::string tmp;
+  if (db->rep->GetProperty(column_family->rep, Slice(propname), &tmp)) {
+    // We use strdup() since we expect human readable output.
+    return strdup(tmp.c_str());
+  } else {
+    return nullptr;
+  }
+}
+
+void rocksdb_approximate_sizes(rocksdb_t* db, int num_ranges,
+                               const char* const* range_start_key,
+                               const size_t* range_start_key_len,
+                               const char* const* range_limit_key,
+                               const size_t* range_limit_key_len,
+                               uint64_t* sizes, char** errptr) {
+  Range* ranges = new Range[num_ranges];
+  for (int i = 0; i < num_ranges; i++) {
+    ranges[i].start = Slice(range_start_key[i], range_start_key_len[i]);
+    ranges[i].limit = Slice(range_limit_key[i], range_limit_key_len[i]);
+  }
+  Status s = db->rep->GetApproximateSizes(ranges, num_ranges, sizes);
+  if (!s.ok()) {
+    SaveError(errptr, s);
+  }
+  delete[] ranges;
+}
+
+void rocksdb_approximate_sizes_cf(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
+    int num_ranges, const char* const* range_start_key,
+    const size_t* range_start_key_len, const char* const* range_limit_key,
+    const size_t* range_limit_key_len, uint64_t* sizes, char** errptr) {
+  Range* ranges = new Range[num_ranges];
+  for (int i = 0; i < num_ranges; i++) {
+    ranges[i].start = Slice(range_start_key[i], range_start_key_len[i]);
+    ranges[i].limit = Slice(range_limit_key[i], range_limit_key_len[i]);
+  }
+  Status s = db->rep->GetApproximateSizes(column_family->rep, ranges,
+                                          num_ranges, sizes);
+  if (!s.ok()) {
+    SaveError(errptr, s);
+  }
+  delete[] ranges;
+}
+
+void rocksdb_delete_file(rocksdb_t* db, const char* name) {
+  db->rep->DeleteFile(name);
+}
+
+const rocksdb_livefiles_t* rocksdb_livefiles(rocksdb_t* db) {
+  rocksdb_livefiles_t* result = new rocksdb_livefiles_t;
+  db->rep->GetLiveFilesMetaData(&result->rep);
+  return result;
+}
+
+void rocksdb_compact_range(rocksdb_t* db, const char* start_key,
+                           size_t start_key_len, const char* limit_key,
+                           size_t limit_key_len) {
+  Slice a, b;
+  db->rep->CompactRange(
+      CompactRangeOptions(),
+      // Pass nullptr Slice if corresponding "const char*" is nullptr
+      (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr),
+      (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr));
+}
+
+void rocksdb_compact_range_cf(rocksdb_t* db,
+                              rocksdb_column_family_handle_t* column_family,
+                              const char* start_key, size_t start_key_len,
+                              const char* limit_key, size_t limit_key_len) {
+  Slice a, b;
+  db->rep->CompactRange(
+      CompactRangeOptions(), column_family->rep,
+      // Pass nullptr Slice if corresponding "const char*" is nullptr
+      (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr),
+      (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr));
+}
+
+void rocksdb_suggest_compact_range(rocksdb_t* db, const char* start_key,
+                                   size_t start_key_len, const char* limit_key,
+                                   size_t limit_key_len, char** errptr) {
+  Slice a, b;
+  Status s = ROCKSDB_NAMESPACE::experimental::SuggestCompactRange(
+      db->rep,
+      (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr),
+      (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr));
+  SaveError(errptr, s);
+}
+
+void rocksdb_suggest_compact_range_cf(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
+    const char* start_key, size_t start_key_len, const char* limit_key,
+    size_t limit_key_len, char** errptr) {
+  Slice a, b;
+  Status s = db->rep->SuggestCompactRange(
+      column_family->rep,
+      (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr),
+      (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr));
+  SaveError(errptr, s);
+}
+
+void rocksdb_compact_range_opt(rocksdb_t* db, rocksdb_compactoptions_t* opt,
+                               const char* start_key, size_t start_key_len,
+                               const char* limit_key, size_t limit_key_len) {
+  Slice a, b;
+  db->rep->CompactRange(
+      opt->rep,
+      // Pass nullptr Slice if corresponding "const char*" is nullptr
+      (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr),
+      (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr));
+}
+
+void rocksdb_compact_range_cf_opt(rocksdb_t* db,
+                                  rocksdb_column_family_handle_t* column_family,
+                                  rocksdb_compactoptions_t* opt,
+                                  const char* start_key, size_t start_key_len,
+                                  const char* limit_key, size_t limit_key_len) {
+  Slice a, b;
+  db->rep->CompactRange(
+      opt->rep, column_family->rep,
+      // Pass nullptr Slice if corresponding "const char*" is nullptr
+      (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr),
+      (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr));
+}
+
+void rocksdb_flush(rocksdb_t* db, const rocksdb_flushoptions_t* options,
+                   char** errptr) {
+  SaveError(errptr, db->rep->Flush(options->rep));
+}
+
+void rocksdb_flush_cf(rocksdb_t* db, const rocksdb_flushoptions_t* options,
+                      rocksdb_column_family_handle_t* column_family,
+                      char** errptr) {
+  SaveError(errptr, db->rep->Flush(options->rep, column_family->rep));
+}
+
+void rocksdb_flush_cfs(rocksdb_t* db, const rocksdb_flushoptions_t* options,
+                       rocksdb_column_family_handle_t** column_families,
+                       int num_column_families, char** errptr) {
+  std::vector<ColumnFamilyHandle*> column_family_handles;
+  for (int i = 0; i < num_column_families; i++) {
+    column_family_handles.push_back(column_families[i]->rep);
+  }
+
+  SaveError(errptr, db->rep->Flush(options->rep, column_family_handles));
+}
+
+void rocksdb_flush_wal(rocksdb_t* db, unsigned char sync, char** errptr) {
+  SaveError(errptr, db->rep->FlushWAL(sync));
+}
+
+void rocksdb_disable_file_deletions(rocksdb_t* db, char** errptr) {
+  SaveError(errptr, db->rep->DisableFileDeletions());
+}
+
+void rocksdb_enable_file_deletions(rocksdb_t* db, unsigned char force,
+                                   char** errptr) {
+  SaveError(errptr, db->rep->EnableFileDeletions(force));
+}
+
+void rocksdb_destroy_db(const rocksdb_options_t* options, const char* name,
+                        char** errptr) {
+  SaveError(errptr, DestroyDB(name, options->rep));
+}
+
+void rocksdb_repair_db(const rocksdb_options_t* options, const char* name,
+                       char** errptr) {
+  SaveError(errptr, RepairDB(name, options->rep));
+}
+
+void rocksdb_iter_destroy(rocksdb_iterator_t* iter) {
+  delete iter->rep;
+  delete iter;
+}
+
+unsigned char rocksdb_iter_valid(const rocksdb_iterator_t* iter) {
+  return iter->rep->Valid();
+}
+
+void rocksdb_iter_seek_to_first(rocksdb_iterator_t* iter) {
+  iter->rep->SeekToFirst();
+}
+
+void rocksdb_iter_seek_to_last(rocksdb_iterator_t* iter) {
+  iter->rep->SeekToLast();
+}
+
+void rocksdb_iter_seek(rocksdb_iterator_t* iter, const char* k, size_t klen) {
+  iter->rep->Seek(Slice(k, klen));
+}
+
+void rocksdb_iter_seek_for_prev(rocksdb_iterator_t* iter, const char* k,
+                                size_t klen) {
+  iter->rep->SeekForPrev(Slice(k, klen));
+}
+
+void rocksdb_iter_next(rocksdb_iterator_t* iter) { iter->rep->Next(); }
+
+void rocksdb_iter_prev(rocksdb_iterator_t* iter) { iter->rep->Prev(); }
+
+const char* rocksdb_iter_key(const rocksdb_iterator_t* iter, size_t* klen) {
+  Slice s = iter->rep->key();
+  *klen = s.size();
+  return s.data();
+}
+
+const char* rocksdb_iter_value(const rocksdb_iterator_t* iter, size_t* vlen) {
+  Slice s = iter->rep->value();
+  *vlen = s.size();
+  return s.data();
+}
+
+const char* rocksdb_iter_timestamp(const rocksdb_iterator_t* iter,
+                                   size_t* tslen) {
+  Slice s = iter->rep->timestamp();
+  *tslen = s.size();
+  return s.data();
+}
+
+void rocksdb_iter_get_error(const rocksdb_iterator_t* iter, char** errptr) {
+  SaveError(errptr, iter->rep->status());
+}
+
+rocksdb_writebatch_t* rocksdb_writebatch_create() {
+  return new rocksdb_writebatch_t;
+}
+
+rocksdb_writebatch_t* rocksdb_writebatch_create_from(const char* rep,
+                                                     size_t size) {
+  rocksdb_writebatch_t* b = new rocksdb_writebatch_t;
+  b->rep = WriteBatch(std::string(rep, size));
+  return b;
+}
+
+void rocksdb_writebatch_destroy(rocksdb_writebatch_t* b) { delete b; }
+
+void rocksdb_writebatch_clear(rocksdb_writebatch_t* b) { b->rep.Clear(); }
+
+int rocksdb_writebatch_count(rocksdb_writebatch_t* b) { return b->rep.Count(); }
+
+void rocksdb_writebatch_put(rocksdb_writebatch_t* b, const char* key,
+                            size_t klen, const char* val, size_t vlen) {
+  b->rep.Put(Slice(key, klen), Slice(val, vlen));
+}
+
+void rocksdb_writebatch_put_cf(rocksdb_writebatch_t* b,
+                               rocksdb_column_family_handle_t* column_family,
+                               const char* key, size_t klen, const char* val,
+                               size_t vlen) {
+  b->rep.Put(column_family->rep, Slice(key, klen), Slice(val, vlen));
+}
+
+void rocksdb_writebatch_put_cf_with_ts(
+    rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, const char* ts, size_t tslen, const char* val,
+    size_t vlen) {
+  b->rep.Put(column_family->rep, Slice(key, klen), Slice(ts, tslen),
+             Slice(val, vlen));
+}
+
+void rocksdb_writebatch_putv(rocksdb_writebatch_t* b, int num_keys,
+                             const char* const* keys_list,
+                             const size_t* keys_list_sizes, int num_values,
+                             const char* const* values_list,
+                             const size_t* values_list_sizes) {
+  std::vector<Slice> key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  std::vector<Slice> value_slices(num_values);
+  for (int i = 0; i < num_values; i++) {
+    value_slices[i] = Slice(values_list[i], values_list_sizes[i]);
+  }
+  b->rep.Put(SliceParts(key_slices.data(), num_keys),
+             SliceParts(value_slices.data(), num_values));
+}
+
+void rocksdb_writebatch_putv_cf(rocksdb_writebatch_t* b,
+                                rocksdb_column_family_handle_t* column_family,
+                                int num_keys, const char* const* keys_list,
+                                const size_t* keys_list_sizes, int num_values,
+                                const char* const* values_list,
+                                const size_t* values_list_sizes) {
+  std::vector<Slice> key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  std::vector<Slice> value_slices(num_values);
+  for (int i = 0; i < num_values; i++) {
+    value_slices[i] = Slice(values_list[i], values_list_sizes[i]);
+  }
+  b->rep.Put(column_family->rep, SliceParts(key_slices.data(), num_keys),
+             SliceParts(value_slices.data(), num_values));
+}
+
+void rocksdb_writebatch_merge(rocksdb_writebatch_t* b, const char* key,
+                              size_t klen, const char* val, size_t vlen) {
+  b->rep.Merge(Slice(key, klen), Slice(val, vlen));
+}
+
+void rocksdb_writebatch_merge_cf(rocksdb_writebatch_t* b,
+                                 rocksdb_column_family_handle_t* column_family,
+                                 const char* key, size_t klen, const char* val,
+                                 size_t vlen) {
+  b->rep.Merge(column_family->rep, Slice(key, klen), Slice(val, vlen));
+}
+
+void rocksdb_writebatch_mergev(rocksdb_writebatch_t* b, int num_keys,
+                               const char* const* keys_list,
+                               const size_t* keys_list_sizes, int num_values,
+                               const char* const* values_list,
+                               const size_t* values_list_sizes) {
+  std::vector<Slice> key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  std::vector<Slice> value_slices(num_values);
+  for (int i = 0; i < num_values; i++) {
+    value_slices[i] = Slice(values_list[i], values_list_sizes[i]);
+  }
+  b->rep.Merge(SliceParts(key_slices.data(), num_keys),
+               SliceParts(value_slices.data(), num_values));
+}
+
+void rocksdb_writebatch_mergev_cf(rocksdb_writebatch_t* b,
+                                  rocksdb_column_family_handle_t* column_family,
+                                  int num_keys, const char* const* keys_list,
+                                  const size_t* keys_list_sizes, int num_values,
+                                  const char* const* values_list,
+                                  const size_t* values_list_sizes) {
+  std::vector<Slice> key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  std::vector<Slice> value_slices(num_values);
+  for (int i = 0; i < num_values; i++) {
+    value_slices[i] = Slice(values_list[i], values_list_sizes[i]);
+  }
+  b->rep.Merge(column_family->rep, SliceParts(key_slices.data(), num_keys),
+               SliceParts(value_slices.data(), num_values));
+}
+
+void rocksdb_writebatch_delete(rocksdb_writebatch_t* b, const char* key,
+                               size_t klen) {
+  b->rep.Delete(Slice(key, klen));
+}
+
+void rocksdb_writebatch_singledelete(rocksdb_writebatch_t* b, const char* key,
+                                     size_t klen) {
+  b->rep.SingleDelete(Slice(key, klen));
+}
+
+void rocksdb_writebatch_delete_cf(rocksdb_writebatch_t* b,
+                                  rocksdb_column_family_handle_t* column_family,
+                                  const char* key, size_t klen) {
+  b->rep.Delete(column_family->rep, Slice(key, klen));
+}
+
+void rocksdb_writebatch_delete_cf_with_ts(
+    rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, const char* ts, size_t tslen) {
+  b->rep.Delete(column_family->rep, Slice(key, klen), Slice(ts, tslen));
+}
+
+void rocksdb_writebatch_singledelete_cf(
+    rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen) {
+  b->rep.SingleDelete(column_family->rep, Slice(key, klen));
+}
+
+void rocksdb_writebatch_singledelete_cf_with_ts(
+    rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, const char* ts, size_t tslen) {
+  b->rep.SingleDelete(column_family->rep, Slice(key, klen), Slice(ts, tslen));
+}
+
+void rocksdb_writebatch_deletev(rocksdb_writebatch_t* b, int num_keys,
+                                const char* const* keys_list,
+                                const size_t* keys_list_sizes) {
+  std::vector<Slice> key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  b->rep.Delete(SliceParts(key_slices.data(), num_keys));
+}
+
+void rocksdb_writebatch_deletev_cf(
+    rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family,
+    int num_keys, const char* const* keys_list, const size_t* keys_list_sizes) {
+  std::vector<Slice> key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  b->rep.Delete(column_family->rep, SliceParts(key_slices.data(), num_keys));
+}
+
+void rocksdb_writebatch_delete_range(rocksdb_writebatch_t* b,
+                                     const char* start_key,
+                                     size_t start_key_len, const char* end_key,
+                                     size_t end_key_len) {
+  b->rep.DeleteRange(Slice(start_key, start_key_len),
+                     Slice(end_key, end_key_len));
+}
+
+void rocksdb_writebatch_delete_range_cf(
+    rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family,
+    const char* start_key, size_t start_key_len, const char* end_key,
+    size_t end_key_len) {
+  b->rep.DeleteRange(column_family->rep, Slice(start_key, start_key_len),
+                     Slice(end_key, end_key_len));
+}
+
+void rocksdb_writebatch_delete_rangev(rocksdb_writebatch_t* b, int num_keys,
+                                      const char* const* start_keys_list,
+                                      const size_t* start_keys_list_sizes,
+                                      const char* const* end_keys_list,
+                                      const size_t* end_keys_list_sizes) {
+  std::vector<Slice> start_key_slices(num_keys);
+  std::vector<Slice> end_key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    start_key_slices[i] = Slice(start_keys_list[i], start_keys_list_sizes[i]);
+    end_key_slices[i] = Slice(end_keys_list[i], end_keys_list_sizes[i]);
+  }
+  b->rep.DeleteRange(SliceParts(start_key_slices.data(), num_keys),
+                     SliceParts(end_key_slices.data(), num_keys));
+}
+
+void rocksdb_writebatch_delete_rangev_cf(
+    rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family,
+    int num_keys, const char* const* start_keys_list,
+    const size_t* start_keys_list_sizes, const char* const* end_keys_list,
+    const size_t* end_keys_list_sizes) {
+  std::vector<Slice> start_key_slices(num_keys);
+  std::vector<Slice> end_key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    start_key_slices[i] = Slice(start_keys_list[i], start_keys_list_sizes[i]);
+    end_key_slices[i] = Slice(end_keys_list[i], end_keys_list_sizes[i]);
+  }
+  b->rep.DeleteRange(column_family->rep,
+                     SliceParts(start_key_slices.data(), num_keys),
+                     SliceParts(end_key_slices.data(), num_keys));
+}
+
+void rocksdb_writebatch_put_log_data(rocksdb_writebatch_t* b, const char* blob,
+                                     size_t len) {
+  b->rep.PutLogData(Slice(blob, len));
+}
+
+class H : public WriteBatch::Handler {
+ public:
+  void* state_;
+  void (*put_)(void*, const char* k, size_t klen, const char* v, size_t vlen);
+  void (*deleted_)(void*, const char* k, size_t klen);
+  void Put(const Slice& key, const Slice& value) override {
+    (*put_)(state_, key.data(), key.size(), value.data(), value.size());
+  }
+  void Delete(const Slice& key) override {
+    (*deleted_)(state_, key.data(), key.size());
+  }
+};
+
+void rocksdb_writebatch_iterate(rocksdb_writebatch_t* b, void* state,
+                                void (*put)(void*, const char* k, size_t klen,
+                                            const char* v, size_t vlen),
+                                void (*deleted)(void*, const char* k,
+                                                size_t klen)) {
+  H handler;
+  handler.state_ = state;
+  handler.put_ = put;
+  handler.deleted_ = deleted;
+  b->rep.Iterate(&handler);
+}
+
+const char* rocksdb_writebatch_data(rocksdb_writebatch_t* b, size_t* size) {
+  *size = b->rep.GetDataSize();
+  return b->rep.Data().c_str();
+}
+
+void rocksdb_writebatch_set_save_point(rocksdb_writebatch_t* b) {
+  b->rep.SetSavePoint();
+}
+
+void rocksdb_writebatch_rollback_to_save_point(rocksdb_writebatch_t* b,
+                                               char** errptr) {
+  SaveError(errptr, b->rep.RollbackToSavePoint());
+}
+
+void rocksdb_writebatch_pop_save_point(rocksdb_writebatch_t* b, char** errptr) {
+  SaveError(errptr, b->rep.PopSavePoint());
+}
+
+rocksdb_writebatch_wi_t* rocksdb_writebatch_wi_create(
+    size_t reserved_bytes, unsigned char overwrite_key) {
+  rocksdb_writebatch_wi_t* b = new rocksdb_writebatch_wi_t;
+  b->rep = new WriteBatchWithIndex(BytewiseComparator(), reserved_bytes,
+                                   overwrite_key);
+  return b;
+}
+
+void rocksdb_writebatch_wi_destroy(rocksdb_writebatch_wi_t* b) {
+  if (b->rep) {
+    delete b->rep;
+  }
+  delete b;
+}
+
+void rocksdb_writebatch_wi_clear(rocksdb_writebatch_wi_t* b) {
+  b->rep->Clear();
+}
+
+int rocksdb_writebatch_wi_count(rocksdb_writebatch_wi_t* b) {
+  return b->rep->GetWriteBatch()->Count();
+}
+
+void rocksdb_writebatch_wi_put(rocksdb_writebatch_wi_t* b, const char* key,
+                               size_t klen, const char* val, size_t vlen) {
+  b->rep->Put(Slice(key, klen), Slice(val, vlen));
+}
+
+void rocksdb_writebatch_wi_put_cf(rocksdb_writebatch_wi_t* b,
+                                  rocksdb_column_family_handle_t* column_family,
+                                  const char* key, size_t klen, const char* val,
+                                  size_t vlen) {
+  b->rep->Put(column_family->rep, Slice(key, klen), Slice(val, vlen));
+}
+
+void rocksdb_writebatch_wi_putv(rocksdb_writebatch_wi_t* b, int num_keys,
+                                const char* const* keys_list,
+                                const size_t* keys_list_sizes, int num_values,
+                                const char* const* values_list,
+                                const size_t* values_list_sizes) {
+  std::vector<Slice> key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  std::vector<Slice> value_slices(num_values);
+  for (int i = 0; i < num_values; i++) {
+    value_slices[i] = Slice(values_list[i], values_list_sizes[i]);
+  }
+  b->rep->Put(SliceParts(key_slices.data(), num_keys),
+              SliceParts(value_slices.data(), num_values));
+}
+
+void rocksdb_writebatch_wi_putv_cf(
+    rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family,
+    int num_keys, const char* const* keys_list, const size_t* keys_list_sizes,
+    int num_values, const char* const* values_list,
+    const size_t* values_list_sizes) {
+  std::vector<Slice> key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  std::vector<Slice> value_slices(num_values);
+  for (int i = 0; i < num_values; i++) {
+    value_slices[i] = Slice(values_list[i], values_list_sizes[i]);
+  }
+  b->rep->Put(column_family->rep, SliceParts(key_slices.data(), num_keys),
+              SliceParts(value_slices.data(), num_values));
+}
+
+void rocksdb_writebatch_wi_merge(rocksdb_writebatch_wi_t* b, const char* key,
+                                 size_t klen, const char* val, size_t vlen) {
+  b->rep->Merge(Slice(key, klen), Slice(val, vlen));
+}
+
+void rocksdb_writebatch_wi_merge_cf(
+    rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, const char* val, size_t vlen) {
+  b->rep->Merge(column_family->rep, Slice(key, klen), Slice(val, vlen));
+}
+
+void rocksdb_writebatch_wi_mergev(rocksdb_writebatch_wi_t* b, int num_keys,
+                                  const char* const* keys_list,
+                                  const size_t* keys_list_sizes, int num_values,
+                                  const char* const* values_list,
+                                  const size_t* values_list_sizes) {
+  std::vector<Slice> key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  std::vector<Slice> value_slices(num_values);
+  for (int i = 0; i < num_values; i++) {
+    value_slices[i] = Slice(values_list[i], values_list_sizes[i]);
+  }
+  b->rep->Merge(SliceParts(key_slices.data(), num_keys),
+                SliceParts(value_slices.data(), num_values));
+}
+
+void rocksdb_writebatch_wi_mergev_cf(
+    rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family,
+    int num_keys, const char* const* keys_list, const size_t* keys_list_sizes,
+    int num_values, const char* const* values_list,
+    const size_t* values_list_sizes) {
+  std::vector<Slice> key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  std::vector<Slice> value_slices(num_values);
+  for (int i = 0; i < num_values; i++) {
+    value_slices[i] = Slice(values_list[i], values_list_sizes[i]);
+  }
+  b->rep->Merge(column_family->rep, SliceParts(key_slices.data(), num_keys),
+                SliceParts(value_slices.data(), num_values));
+}
+
+void rocksdb_writebatch_wi_delete(rocksdb_writebatch_wi_t* b, const char* key,
+                                  size_t klen) {
+  b->rep->Delete(Slice(key, klen));
+}
+
+void rocksdb_writebatch_wi_singledelete(rocksdb_writebatch_wi_t* b,
+                                        const char* key, size_t klen) {
+  b->rep->SingleDelete(Slice(key, klen));
+}
+
+void rocksdb_writebatch_wi_delete_cf(
+    rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen) {
+  b->rep->Delete(column_family->rep, Slice(key, klen));
+}
+
+void rocksdb_writebatch_wi_singledelete_cf(
+    rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen) {
+  b->rep->SingleDelete(column_family->rep, Slice(key, klen));
+}
+
+void rocksdb_writebatch_wi_deletev(rocksdb_writebatch_wi_t* b, int num_keys,
+                                   const char* const* keys_list,
+                                   const size_t* keys_list_sizes) {
+  std::vector<Slice> key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  b->rep->Delete(SliceParts(key_slices.data(), num_keys));
+}
+
+void rocksdb_writebatch_wi_deletev_cf(
+    rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family,
+    int num_keys, const char* const* keys_list, const size_t* keys_list_sizes) {
+  std::vector<Slice> key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  b->rep->Delete(column_family->rep, SliceParts(key_slices.data(), num_keys));
+}
+
+void rocksdb_writebatch_wi_delete_range(rocksdb_writebatch_wi_t* b,
+                                        const char* start_key,
+                                        size_t start_key_len,
+                                        const char* end_key,
+                                        size_t end_key_len) {
+  b->rep->DeleteRange(Slice(start_key, start_key_len),
+                      Slice(end_key, end_key_len));
+}
+
+void rocksdb_writebatch_wi_delete_range_cf(
+    rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family,
+    const char* start_key, size_t start_key_len, const char* end_key,
+    size_t end_key_len) {
+  b->rep->DeleteRange(column_family->rep, Slice(start_key, start_key_len),
+                      Slice(end_key, end_key_len));
+}
+
+void rocksdb_writebatch_wi_delete_rangev(rocksdb_writebatch_wi_t* b,
+                                         int num_keys,
+                                         const char* const* start_keys_list,
+                                         const size_t* start_keys_list_sizes,
+                                         const char* const* end_keys_list,
+                                         const size_t* end_keys_list_sizes) {
+  std::vector<Slice> start_key_slices(num_keys);
+  std::vector<Slice> end_key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    start_key_slices[i] = Slice(start_keys_list[i], start_keys_list_sizes[i]);
+    end_key_slices[i] = Slice(end_keys_list[i], end_keys_list_sizes[i]);
+  }
+  b->rep->DeleteRange(SliceParts(start_key_slices.data(), num_keys),
+                      SliceParts(end_key_slices.data(), num_keys));
+}
+
+void rocksdb_writebatch_wi_delete_rangev_cf(
+    rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family,
+    int num_keys, const char* const* start_keys_list,
+    const size_t* start_keys_list_sizes, const char* const* end_keys_list,
+    const size_t* end_keys_list_sizes) {
+  std::vector<Slice> start_key_slices(num_keys);
+  std::vector<Slice> end_key_slices(num_keys);
+  for (int i = 0; i < num_keys; i++) {
+    start_key_slices[i] = Slice(start_keys_list[i], start_keys_list_sizes[i]);
+    end_key_slices[i] = Slice(end_keys_list[i], end_keys_list_sizes[i]);
+  }
+  b->rep->DeleteRange(column_family->rep,
+                      SliceParts(start_key_slices.data(), num_keys),
+                      SliceParts(end_key_slices.data(), num_keys));
+}
+
+void rocksdb_writebatch_wi_put_log_data(rocksdb_writebatch_wi_t* b,
+                                        const char* blob, size_t len) {
+  b->rep->PutLogData(Slice(blob, len));
+}
+
+void rocksdb_writebatch_wi_iterate(
+    rocksdb_writebatch_wi_t* b, void* state,
+    void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen),
+    void (*deleted)(void*, const char* k, size_t klen)) {
+  H handler;
+  handler.state_ = state;
+  handler.put_ = put;
+  handler.deleted_ = deleted;
+  b->rep->GetWriteBatch()->Iterate(&handler);
+}
+
+const char* rocksdb_writebatch_wi_data(rocksdb_writebatch_wi_t* b,
+                                       size_t* size) {
+  WriteBatch* wb = b->rep->GetWriteBatch();
+  *size = wb->GetDataSize();
+  return wb->Data().c_str();
+}
+
+void rocksdb_writebatch_wi_set_save_point(rocksdb_writebatch_wi_t* b) {
+  b->rep->SetSavePoint();
+}
+
+void rocksdb_writebatch_wi_rollback_to_save_point(rocksdb_writebatch_wi_t* b,
+                                                  char** errptr) {
+  SaveError(errptr, b->rep->RollbackToSavePoint());
+}
+
+rocksdb_iterator_t* rocksdb_writebatch_wi_create_iterator_with_base(
+    rocksdb_writebatch_wi_t* wbwi, rocksdb_iterator_t* base_iterator) {
+  rocksdb_iterator_t* result = new rocksdb_iterator_t;
+  result->rep = wbwi->rep->NewIteratorWithBase(base_iterator->rep);
+  delete base_iterator;
+  return result;
+}
+
+rocksdb_iterator_t* rocksdb_writebatch_wi_create_iterator_with_base_cf(
+    rocksdb_writebatch_wi_t* wbwi, rocksdb_iterator_t* base_iterator,
+    rocksdb_column_family_handle_t* column_family) {
+  rocksdb_iterator_t* result = new rocksdb_iterator_t;
+  result->rep =
+      wbwi->rep->NewIteratorWithBase(column_family->rep, base_iterator->rep);
+  delete base_iterator;
+  return result;
+}
+
+char* rocksdb_writebatch_wi_get_from_batch(rocksdb_writebatch_wi_t* wbwi,
+                                           const rocksdb_options_t* options,
+                                           const char* key, size_t keylen,
+                                           size_t* vallen, char** errptr) {
+  char* result = nullptr;
+  std::string tmp;
+  Status s = wbwi->rep->GetFromBatch(options->rep, Slice(key, keylen), &tmp);
+  if (s.ok()) {
+    *vallen = tmp.size();
+    result = CopyString(tmp);
+  } else {
+    *vallen = 0;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+  }
+  return result;
+}
+
+char* rocksdb_writebatch_wi_get_from_batch_cf(
+    rocksdb_writebatch_wi_t* wbwi, const rocksdb_options_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, size_t* vallen, char** errptr) {
+  char* result = nullptr;
+  std::string tmp;
+  Status s = wbwi->rep->GetFromBatch(column_family->rep, options->rep,
+                                     Slice(key, keylen), &tmp);
+  if (s.ok()) {
+    *vallen = tmp.size();
+    result = CopyString(tmp);
+  } else {
+    *vallen = 0;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+  }
+  return result;
+}
+
+char* rocksdb_writebatch_wi_get_from_batch_and_db(
+    rocksdb_writebatch_wi_t* wbwi, rocksdb_t* db,
+    const rocksdb_readoptions_t* options, const char* key, size_t keylen,
+    size_t* vallen, char** errptr) {
+  char* result = nullptr;
+  std::string tmp;
+  Status s = wbwi->rep->GetFromBatchAndDB(db->rep, options->rep,
+                                          Slice(key, keylen), &tmp);
+  if (s.ok()) {
+    *vallen = tmp.size();
+    result = CopyString(tmp);
+  } else {
+    *vallen = 0;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+  }
+  return result;
+}
+
+char* rocksdb_writebatch_wi_get_from_batch_and_db_cf(
+    rocksdb_writebatch_wi_t* wbwi, rocksdb_t* db,
+    const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, size_t* vallen, char** errptr) {
+  char* result = nullptr;
+  std::string tmp;
+  Status s = wbwi->rep->GetFromBatchAndDB(
+      db->rep, options->rep, column_family->rep, Slice(key, keylen), &tmp);
+  if (s.ok()) {
+    *vallen = tmp.size();
+    result = CopyString(tmp);
+  } else {
+    *vallen = 0;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+  }
+  return result;
+}
+
+void rocksdb_write_writebatch_wi(rocksdb_t* db,
+                                 const rocksdb_writeoptions_t* options,
+                                 rocksdb_writebatch_wi_t* wbwi, char** errptr) {
+  WriteBatch* wb = wbwi->rep->GetWriteBatch();
+  SaveError(errptr, db->rep->Write(options->rep, wb));
+}
+
+void rocksdb_load_latest_options(
+    const char* db_path, rocksdb_env_t* env, bool ignore_unknown_options,
+    rocksdb_cache_t* cache, rocksdb_options_t** db_options,
+    size_t* num_column_families, char*** list_column_family_names,
+    rocksdb_options_t*** list_column_family_options, char** errptr) {
+  DBOptions db_opt;
+  std::vector<ColumnFamilyDescriptor> cf_descs;
+  ConfigOptions config_opts;
+  config_opts.ignore_unknown_options = ignore_unknown_options;
+  config_opts.input_strings_escaped = true;
+  config_opts.env = env->rep;
+  Status s = LoadLatestOptions(config_opts, std::string(db_path), &db_opt,
+                               &cf_descs, &cache->rep);
+  if (s.ok()) {
+    char** cf_names = (char**)malloc(cf_descs.size() * sizeof(char*));
+    rocksdb_options_t** cf_options = (rocksdb_options_t**)malloc(
+        cf_descs.size() * sizeof(rocksdb_options_t*));
+    for (size_t i = 0; i < cf_descs.size(); ++i) {
+      cf_names[i] = strdup(cf_descs[i].name.c_str());
+      cf_options[i] = new rocksdb_options_t{
+          Options(DBOptions(), std::move(cf_descs[i].options))};
+    }
+    *num_column_families = cf_descs.size();
+    *db_options = new rocksdb_options_t{
+        Options(std::move(db_opt), ColumnFamilyOptions())};
+    *list_column_family_names = cf_names;
+    *list_column_family_options = cf_options;
+  } else {
+    *num_column_families = 0;
+    *db_options = nullptr;
+    *list_column_family_names = nullptr;
+    *list_column_family_options = nullptr;
+    SaveError(errptr, s);
+  }
+}
+
+void rocksdb_load_latest_options_destroy(
+    rocksdb_options_t* db_options, char** list_column_family_names,
+    rocksdb_options_t** list_column_family_options, size_t len) {
+  rocksdb_options_destroy(db_options);
+  if (list_column_family_names) {
+    for (size_t i = 0; i < len; ++i) {
+      free(list_column_family_names[i]);
+    }
+    free(list_column_family_names);
+  }
+  if (list_column_family_options) {
+    for (size_t i = 0; i < len; ++i) {
+      rocksdb_options_destroy(list_column_family_options[i]);
+    }
+    free(list_column_family_options);
+  }
+}
+
+rocksdb_block_based_table_options_t* rocksdb_block_based_options_create() {
+  return new rocksdb_block_based_table_options_t;
+}
+
+void rocksdb_block_based_options_destroy(
+    rocksdb_block_based_table_options_t* options) {
+  delete options;
+}
+
+void rocksdb_block_based_options_set_checksum(
+    rocksdb_block_based_table_options_t* opt, char v) {
+  opt->rep.checksum = static_cast<ROCKSDB_NAMESPACE::ChecksumType>(v);
+}
+
+void rocksdb_block_based_options_set_block_size(
+    rocksdb_block_based_table_options_t* options, size_t block_size) {
+  options->rep.block_size = block_size;
+}
+
+void rocksdb_block_based_options_set_block_size_deviation(
+    rocksdb_block_based_table_options_t* options, int block_size_deviation) {
+  options->rep.block_size_deviation = block_size_deviation;
+}
+
+void rocksdb_block_based_options_set_block_restart_interval(
+    rocksdb_block_based_table_options_t* options, int block_restart_interval) {
+  options->rep.block_restart_interval = block_restart_interval;
+}
+
+void rocksdb_block_based_options_set_index_block_restart_interval(
+    rocksdb_block_based_table_options_t* options,
+    int index_block_restart_interval) {
+  options->rep.index_block_restart_interval = index_block_restart_interval;
+}
+
+void rocksdb_block_based_options_set_metadata_block_size(
+    rocksdb_block_based_table_options_t* options,
+    uint64_t metadata_block_size) {
+  options->rep.metadata_block_size = metadata_block_size;
+}
+
+void rocksdb_block_based_options_set_partition_filters(
+    rocksdb_block_based_table_options_t* options,
+    unsigned char partition_filters) {
+  options->rep.partition_filters = partition_filters;
+}
+
+void rocksdb_block_based_options_set_optimize_filters_for_memory(
+    rocksdb_block_based_table_options_t* options,
+    unsigned char optimize_filters_for_memory) {
+  options->rep.optimize_filters_for_memory = optimize_filters_for_memory;
+}
+
+void rocksdb_block_based_options_set_use_delta_encoding(
+    rocksdb_block_based_table_options_t* options,
+    unsigned char use_delta_encoding) {
+  options->rep.use_delta_encoding = use_delta_encoding;
+}
+
+void rocksdb_block_based_options_set_filter_policy(
+    rocksdb_block_based_table_options_t* options,
+    rocksdb_filterpolicy_t* filter_policy) {
+  options->rep.filter_policy.reset(filter_policy);
+}
+
+void rocksdb_block_based_options_set_no_block_cache(
+    rocksdb_block_based_table_options_t* options,
+    unsigned char no_block_cache) {
+  options->rep.no_block_cache = no_block_cache;
+}
+
+void rocksdb_block_based_options_set_block_cache(
+    rocksdb_block_based_table_options_t* options,
+    rocksdb_cache_t* block_cache) {
+  if (block_cache) {
+    options->rep.block_cache = block_cache->rep;
+  }
+}
+
+void rocksdb_block_based_options_set_whole_key_filtering(
+    rocksdb_block_based_table_options_t* options, unsigned char v) {
+  options->rep.whole_key_filtering = v;
+}
+
+void rocksdb_block_based_options_set_format_version(
+    rocksdb_block_based_table_options_t* options, int v) {
+  options->rep.format_version = v;
+}
+
+void rocksdb_block_based_options_set_index_type(
+    rocksdb_block_based_table_options_t* options, int v) {
+  options->rep.index_type = static_cast<BlockBasedTableOptions::IndexType>(v);
+}
+
+void rocksdb_block_based_options_set_data_block_index_type(
+    rocksdb_block_based_table_options_t* options, int v) {
+  options->rep.data_block_index_type =
+      static_cast<BlockBasedTableOptions::DataBlockIndexType>(v);
+}
+
+void rocksdb_block_based_options_set_data_block_hash_ratio(
+    rocksdb_block_based_table_options_t* options, double v) {
+  options->rep.data_block_hash_table_util_ratio = v;
+}
+
+void rocksdb_block_based_options_set_cache_index_and_filter_blocks(
+    rocksdb_block_based_table_options_t* options, unsigned char v) {
+  options->rep.cache_index_and_filter_blocks = v;
+}
+
+void rocksdb_block_based_options_set_cache_index_and_filter_blocks_with_high_priority(
+    rocksdb_block_based_table_options_t* options, unsigned char v) {
+  options->rep.cache_index_and_filter_blocks_with_high_priority = v;
+}
+
+void rocksdb_block_based_options_set_pin_l0_filter_and_index_blocks_in_cache(
+    rocksdb_block_based_table_options_t* options, unsigned char v) {
+  options->rep.pin_l0_filter_and_index_blocks_in_cache = v;
+}
+
+void rocksdb_block_based_options_set_pin_top_level_index_and_filter(
+    rocksdb_block_based_table_options_t* options, unsigned char v) {
+  options->rep.pin_top_level_index_and_filter = v;
+}
+
+void rocksdb_options_set_block_based_table_factory(
+    rocksdb_options_t* opt,
+    rocksdb_block_based_table_options_t* table_options) {
+  if (table_options) {
+    opt->rep.table_factory.reset(
+        ROCKSDB_NAMESPACE::NewBlockBasedTableFactory(table_options->rep));
+  }
+}
+
+rocksdb_cuckoo_table_options_t* rocksdb_cuckoo_options_create() {
+  return new rocksdb_cuckoo_table_options_t;
+}
+
+void rocksdb_cuckoo_options_destroy(rocksdb_cuckoo_table_options_t* options) {
+  delete options;
+}
+
+void rocksdb_cuckoo_options_set_hash_ratio(
+    rocksdb_cuckoo_table_options_t* options, double v) {
+  options->rep.hash_table_ratio = v;
+}
+
+void rocksdb_cuckoo_options_set_max_search_depth(
+    rocksdb_cuckoo_table_options_t* options, uint32_t v) {
+  options->rep.max_search_depth = v;
+}
+
+void rocksdb_cuckoo_options_set_cuckoo_block_size(
+    rocksdb_cuckoo_table_options_t* options, uint32_t v) {
+  options->rep.cuckoo_block_size = v;
+}
+
+void rocksdb_cuckoo_options_set_identity_as_first_hash(
+    rocksdb_cuckoo_table_options_t* options, unsigned char v) {
+  options->rep.identity_as_first_hash = v;
+}
+
+void rocksdb_cuckoo_options_set_use_module_hash(
+    rocksdb_cuckoo_table_options_t* options, unsigned char v) {
+  options->rep.use_module_hash = v;
+}
+
+void rocksdb_options_set_cuckoo_table_factory(
+    rocksdb_options_t* opt, rocksdb_cuckoo_table_options_t* table_options) {
+  if (table_options) {
+    opt->rep.table_factory.reset(
+        ROCKSDB_NAMESPACE::NewCuckooTableFactory(table_options->rep));
+  }
+}
+
+void rocksdb_set_options(rocksdb_t* db, int count, const char* const keys[],
+                         const char* const values[], char** errptr) {
+  std::unordered_map<std::string, std::string> options_map;
+  for (int i = 0; i < count; i++) options_map[keys[i]] = values[i];
+  SaveError(errptr, db->rep->SetOptions(options_map));
+}
+
+void rocksdb_set_options_cf(rocksdb_t* db,
+                            rocksdb_column_family_handle_t* handle, int count,
+                            const char* const keys[],
+                            const char* const values[], char** errptr) {
+  std::unordered_map<std::string, std::string> options_map;
+  for (int i = 0; i < count; i++) options_map[keys[i]] = values[i];
+  SaveError(errptr, db->rep->SetOptions(handle->rep, options_map));
+}
+
+rocksdb_options_t* rocksdb_options_create() { return new rocksdb_options_t; }
+
+void rocksdb_options_destroy(rocksdb_options_t* options) { delete options; }
+
+rocksdb_options_t* rocksdb_options_create_copy(rocksdb_options_t* options) {
+  return new rocksdb_options_t(*options);
+}
+
+void rocksdb_options_increase_parallelism(rocksdb_options_t* opt,
+                                          int total_threads) {
+  opt->rep.IncreaseParallelism(total_threads);
+}
+
+void rocksdb_options_optimize_for_point_lookup(rocksdb_options_t* opt,
+                                               uint64_t block_cache_size_mb) {
+  opt->rep.OptimizeForPointLookup(block_cache_size_mb);
+}
+
+void rocksdb_options_optimize_level_style_compaction(
+    rocksdb_options_t* opt, uint64_t memtable_memory_budget) {
+  opt->rep.OptimizeLevelStyleCompaction(memtable_memory_budget);
+}
+
+void rocksdb_options_optimize_universal_style_compaction(
+    rocksdb_options_t* opt, uint64_t memtable_memory_budget) {
+  opt->rep.OptimizeUniversalStyleCompaction(memtable_memory_budget);
+}
+
+void rocksdb_options_set_allow_ingest_behind(rocksdb_options_t* opt,
+                                             unsigned char v) {
+  opt->rep.allow_ingest_behind = v;
+}
+
+unsigned char rocksdb_options_get_allow_ingest_behind(rocksdb_options_t* opt) {
+  return opt->rep.allow_ingest_behind;
+}
+
+void rocksdb_options_set_compaction_filter(rocksdb_options_t* opt,
+                                           rocksdb_compactionfilter_t* filter) {
+  opt->rep.compaction_filter = filter;
+}
+
+void rocksdb_options_set_compaction_filter_factory(
+    rocksdb_options_t* opt, rocksdb_compactionfilterfactory_t* factory) {
+  opt->rep.compaction_filter_factory =
+      std::shared_ptr<CompactionFilterFactory>(factory);
+}
+
+void rocksdb_options_compaction_readahead_size(rocksdb_options_t* opt,
+                                               size_t s) {
+  opt->rep.compaction_readahead_size = s;
+}
+
+size_t rocksdb_options_get_compaction_readahead_size(rocksdb_options_t* opt) {
+  return opt->rep.compaction_readahead_size;
+}
+
+void rocksdb_options_set_comparator(rocksdb_options_t* opt,
+                                    rocksdb_comparator_t* cmp) {
+  opt->rep.comparator = cmp;
+}
+
+void rocksdb_options_set_merge_operator(
+    rocksdb_options_t* opt, rocksdb_mergeoperator_t* merge_operator) {
+  opt->rep.merge_operator = std::shared_ptr<MergeOperator>(merge_operator);
+}
+
+void rocksdb_options_set_create_if_missing(rocksdb_options_t* opt,
+                                           unsigned char v) {
+  opt->rep.create_if_missing = v;
+}
+
+unsigned char rocksdb_options_get_create_if_missing(rocksdb_options_t* opt) {
+  return opt->rep.create_if_missing;
+}
+
+void rocksdb_options_set_create_missing_column_families(rocksdb_options_t* opt,
+                                                        unsigned char v) {
+  opt->rep.create_missing_column_families = v;
+}
+
+unsigned char rocksdb_options_get_create_missing_column_families(
+    rocksdb_options_t* opt) {
+  return opt->rep.create_missing_column_families;
+}
+
+void rocksdb_options_set_error_if_exists(rocksdb_options_t* opt,
+                                         unsigned char v) {
+  opt->rep.error_if_exists = v;
+}
+
+unsigned char rocksdb_options_get_error_if_exists(rocksdb_options_t* opt) {
+  return opt->rep.error_if_exists;
+}
+
+void rocksdb_options_set_paranoid_checks(rocksdb_options_t* opt,
+                                         unsigned char v) {
+  opt->rep.paranoid_checks = v;
+}
+
+unsigned char rocksdb_options_get_paranoid_checks(rocksdb_options_t* opt) {
+  return opt->rep.paranoid_checks;
+}
+
+void rocksdb_options_set_db_paths(rocksdb_options_t* opt,
+                                  const rocksdb_dbpath_t** dbpath_values,
+                                  size_t num_paths) {
+  std::vector<DbPath> db_paths(num_paths);
+  for (size_t i = 0; i < num_paths; ++i) {
+    db_paths[i] = dbpath_values[i]->rep;
+  }
+  opt->rep.db_paths = db_paths;
+}
+
+void rocksdb_options_set_env(rocksdb_options_t* opt, rocksdb_env_t* env) {
+  opt->rep.env = (env ? env->rep : nullptr);
+}
+
+void rocksdb_options_set_info_log(rocksdb_options_t* opt, rocksdb_logger_t* l) {
+  if (l) {
+    opt->rep.info_log = l->rep;
+  }
+}
+
+void rocksdb_options_set_info_log_level(rocksdb_options_t* opt, int v) {
+  opt->rep.info_log_level = static_cast<InfoLogLevel>(v);
+}
+
+int rocksdb_options_get_info_log_level(rocksdb_options_t* opt) {
+  return static_cast<int>(opt->rep.info_log_level);
+}
+
+void rocksdb_options_set_db_write_buffer_size(rocksdb_options_t* opt,
+                                              size_t s) {
+  opt->rep.db_write_buffer_size = s;
+}
+
+size_t rocksdb_options_get_db_write_buffer_size(rocksdb_options_t* opt) {
+  return opt->rep.db_write_buffer_size;
+}
+
+void rocksdb_options_set_write_buffer_size(rocksdb_options_t* opt, size_t s) {
+  opt->rep.write_buffer_size = s;
+}
+
+size_t rocksdb_options_get_write_buffer_size(rocksdb_options_t* opt) {
+  return opt->rep.write_buffer_size;
+}
+
+void rocksdb_options_set_max_open_files(rocksdb_options_t* opt, int n) {
+  opt->rep.max_open_files = n;
+}
+
+int rocksdb_options_get_max_open_files(rocksdb_options_t* opt) {
+  return opt->rep.max_open_files;
+}
+
+void rocksdb_options_set_max_file_opening_threads(rocksdb_options_t* opt,
+                                                  int n) {
+  opt->rep.max_file_opening_threads = n;
+}
+
+int rocksdb_options_get_max_file_opening_threads(rocksdb_options_t* opt) {
+  return opt->rep.max_file_opening_threads;
+}
+
+void rocksdb_options_set_max_total_wal_size(rocksdb_options_t* opt,
+                                            uint64_t n) {
+  opt->rep.max_total_wal_size = n;
+}
+
+uint64_t rocksdb_options_get_max_total_wal_size(rocksdb_options_t* opt) {
+  return opt->rep.max_total_wal_size;
+}
+
+void rocksdb_options_set_target_file_size_base(rocksdb_options_t* opt,
+                                               uint64_t n) {
+  opt->rep.target_file_size_base = n;
+}
+
+uint64_t rocksdb_options_get_target_file_size_base(rocksdb_options_t* opt) {
+  return opt->rep.target_file_size_base;
+}
+
+void rocksdb_options_set_target_file_size_multiplier(rocksdb_options_t* opt,
+                                                     int n) {
+  opt->rep.target_file_size_multiplier = n;
+}
+
+int rocksdb_options_get_target_file_size_multiplier(rocksdb_options_t* opt) {
+  return opt->rep.target_file_size_multiplier;
+}
+
+void rocksdb_options_set_max_bytes_for_level_base(rocksdb_options_t* opt,
+                                                  uint64_t n) {
+  opt->rep.max_bytes_for_level_base = n;
+}
+
+uint64_t rocksdb_options_get_max_bytes_for_level_base(rocksdb_options_t* opt) {
+  return opt->rep.max_bytes_for_level_base;
+}
+
+void rocksdb_options_set_level_compaction_dynamic_level_bytes(
+    rocksdb_options_t* opt, unsigned char v) {
+  opt->rep.level_compaction_dynamic_level_bytes = v;
+}
+
+unsigned char rocksdb_options_get_level_compaction_dynamic_level_bytes(
+    rocksdb_options_t* opt) {
+  return opt->rep.level_compaction_dynamic_level_bytes;
+}
+
+void rocksdb_options_set_max_bytes_for_level_multiplier(rocksdb_options_t* opt,
+                                                        double n) {
+  opt->rep.max_bytes_for_level_multiplier = n;
+}
+
+double rocksdb_options_get_max_bytes_for_level_multiplier(
+    rocksdb_options_t* opt) {
+  return opt->rep.max_bytes_for_level_multiplier;
+}
+
+void rocksdb_options_set_max_compaction_bytes(rocksdb_options_t* opt,
+                                              uint64_t n) {
+  opt->rep.max_compaction_bytes = n;
+}
+
+uint64_t rocksdb_options_get_max_compaction_bytes(rocksdb_options_t* opt) {
+  return opt->rep.max_compaction_bytes;
+}
+
+void rocksdb_options_set_max_bytes_for_level_multiplier_additional(
+    rocksdb_options_t* opt, int* level_values, size_t num_levels) {
+  opt->rep.max_bytes_for_level_multiplier_additional.resize(num_levels);
+  for (size_t i = 0; i < num_levels; ++i) {
+    opt->rep.max_bytes_for_level_multiplier_additional[i] = level_values[i];
+  }
+}
+
+void rocksdb_options_enable_statistics(rocksdb_options_t* opt) {
+  opt->rep.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+}
+
+void rocksdb_options_set_skip_stats_update_on_db_open(rocksdb_options_t* opt,
+                                                      unsigned char val) {
+  opt->rep.skip_stats_update_on_db_open = val;
+}
+
+unsigned char rocksdb_options_get_skip_stats_update_on_db_open(
+    rocksdb_options_t* opt) {
+  return opt->rep.skip_stats_update_on_db_open;
+}
+
+void rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open(
+    rocksdb_options_t* opt, unsigned char val) {
+  opt->rep.skip_checking_sst_file_sizes_on_db_open = val;
+}
+
+unsigned char rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(
+    rocksdb_options_t* opt) {
+  return opt->rep.skip_checking_sst_file_sizes_on_db_open;
+}
+
+/* Blob Options Settings */
+void rocksdb_options_set_enable_blob_files(rocksdb_options_t* opt,
+                                           unsigned char val) {
+  opt->rep.enable_blob_files = val;
+}
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_enable_blob_files(
+    rocksdb_options_t* opt) {
+  return opt->rep.enable_blob_files;
+}
+
+void rocksdb_options_set_min_blob_size(rocksdb_options_t* opt, uint64_t val) {
+  opt->rep.min_blob_size = val;
+}
+
+uint64_t rocksdb_options_get_min_blob_size(rocksdb_options_t* opt) {
+  return opt->rep.min_blob_size;
+}
+
+void rocksdb_options_set_blob_file_size(rocksdb_options_t* opt, uint64_t val) {
+  opt->rep.blob_file_size = val;
+}
+
+uint64_t rocksdb_options_get_blob_file_size(rocksdb_options_t* opt) {
+  return opt->rep.blob_file_size;
+}
+
+void rocksdb_options_set_blob_compression_type(rocksdb_options_t* opt,
+                                               int val) {
+  opt->rep.blob_compression_type = static_cast<CompressionType>(val);
+}
+
+int rocksdb_options_get_blob_compression_type(rocksdb_options_t* opt) {
+  return opt->rep.blob_compression_type;
+}
+
+void rocksdb_options_set_enable_blob_gc(rocksdb_options_t* opt,
+                                        unsigned char val) {
+  opt->rep.enable_blob_garbage_collection = val;
+}
+
+unsigned char rocksdb_options_get_enable_blob_gc(rocksdb_options_t* opt) {
+  return opt->rep.enable_blob_garbage_collection;
+}
+
+void rocksdb_options_set_blob_gc_age_cutoff(rocksdb_options_t* opt,
+                                            double val) {
+  opt->rep.blob_garbage_collection_age_cutoff = val;
+}
+
+double rocksdb_options_get_blob_gc_age_cutoff(rocksdb_options_t* opt) {
+  return opt->rep.blob_garbage_collection_age_cutoff;
+}
+
+void rocksdb_options_set_blob_gc_force_threshold(rocksdb_options_t* opt,
+                                                 double val) {
+  opt->rep.blob_garbage_collection_force_threshold = val;
+}
+
+double rocksdb_options_get_blob_gc_force_threshold(rocksdb_options_t* opt) {
+  return opt->rep.blob_garbage_collection_force_threshold;
+}
+
+void rocksdb_options_set_blob_compaction_readahead_size(rocksdb_options_t* opt,
+                                                        uint64_t val) {
+  opt->rep.blob_compaction_readahead_size = val;
+}
+
+uint64_t rocksdb_options_get_blob_compaction_readahead_size(
+    rocksdb_options_t* opt) {
+  return opt->rep.blob_compaction_readahead_size;
+}
+
+void rocksdb_options_set_blob_file_starting_level(rocksdb_options_t* opt,
+                                                  int val) {
+  opt->rep.blob_file_starting_level = val;
+}
+
+int rocksdb_options_get_blob_file_starting_level(rocksdb_options_t* opt) {
+  return opt->rep.blob_file_starting_level;
+}
+
+void rocksdb_options_set_blob_cache(rocksdb_options_t* opt,
+                                    rocksdb_cache_t* blob_cache) {
+  opt->rep.blob_cache = blob_cache->rep;
+}
+
+void rocksdb_options_set_prepopulate_blob_cache(rocksdb_options_t* opt, int t) {
+  opt->rep.prepopulate_blob_cache = static_cast<PrepopulateBlobCache>(t);
+}
+
+int rocksdb_options_get_prepopulate_blob_cache(rocksdb_options_t* opt) {
+  return static_cast<int>(opt->rep.prepopulate_blob_cache);
+}
+
+void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) {
+  opt->rep.num_levels = n;
+}
+
+int rocksdb_options_get_num_levels(rocksdb_options_t* opt) {
+  return opt->rep.num_levels;
+}
+
+void rocksdb_options_set_level0_file_num_compaction_trigger(
+    rocksdb_options_t* opt, int n) {
+  opt->rep.level0_file_num_compaction_trigger = n;
+}
+
+int rocksdb_options_get_level0_file_num_compaction_trigger(
+    rocksdb_options_t* opt) {
+  return opt->rep.level0_file_num_compaction_trigger;
+}
+
+void rocksdb_options_set_level0_slowdown_writes_trigger(rocksdb_options_t* opt,
+                                                        int n) {
+  opt->rep.level0_slowdown_writes_trigger = n;
+}
+
+int rocksdb_options_get_level0_slowdown_writes_trigger(rocksdb_options_t* opt) {
+  return opt->rep.level0_slowdown_writes_trigger;
+}
+
+void rocksdb_options_set_level0_stop_writes_trigger(rocksdb_options_t* opt,
+                                                    int n) {
+  opt->rep.level0_stop_writes_trigger = n;
+}
+
+int rocksdb_options_get_level0_stop_writes_trigger(rocksdb_options_t* opt) {
+  return opt->rep.level0_stop_writes_trigger;
+}
+
+void rocksdb_options_set_wal_recovery_mode(rocksdb_options_t* opt, int mode) {
+  opt->rep.wal_recovery_mode = static_cast<WALRecoveryMode>(mode);
+}
+
+int rocksdb_options_get_wal_recovery_mode(rocksdb_options_t* opt) {
+  return static_cast<int>(opt->rep.wal_recovery_mode);
+}
+
+void rocksdb_options_set_compression(rocksdb_options_t* opt, int t) {
+  opt->rep.compression = static_cast<CompressionType>(t);
+}
+
+int rocksdb_options_get_compression(rocksdb_options_t* opt) {
+  return opt->rep.compression;
+}
+
+void rocksdb_options_set_bottommost_compression(rocksdb_options_t* opt, int t) {
+  opt->rep.bottommost_compression = static_cast<CompressionType>(t);
+}
+
+int rocksdb_options_get_bottommost_compression(rocksdb_options_t* opt) {
+  return opt->rep.bottommost_compression;
+}
+
+void rocksdb_options_set_compression_per_level(rocksdb_options_t* opt,
+                                               const int* level_values,
+                                               size_t num_levels) {
+  opt->rep.compression_per_level.resize(num_levels);
+  for (size_t i = 0; i < num_levels; ++i) {
+    opt->rep.compression_per_level[i] =
+        static_cast<CompressionType>(level_values[i]);
+  }
+}
+
+void rocksdb_options_set_bottommost_compression_options(rocksdb_options_t* opt,
+                                                        int w_bits, int level,
+                                                        int strategy,
+                                                        int max_dict_bytes,
+                                                        unsigned char enabled) {
+  opt->rep.bottommost_compression_opts.window_bits = w_bits;
+  opt->rep.bottommost_compression_opts.level = level;
+  opt->rep.bottommost_compression_opts.strategy = strategy;
+  opt->rep.bottommost_compression_opts.max_dict_bytes = max_dict_bytes;
+  opt->rep.bottommost_compression_opts.enabled = enabled;
+}
+
+void rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes(
+    rocksdb_options_t* opt, int zstd_max_train_bytes, unsigned char enabled) {
+  opt->rep.bottommost_compression_opts.zstd_max_train_bytes =
+      zstd_max_train_bytes;
+  opt->rep.bottommost_compression_opts.enabled = enabled;
+}
+
+void rocksdb_options_set_bottommost_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t* opt, unsigned char use_zstd_dict_trainer,
+    unsigned char enabled) {
+  opt->rep.bottommost_compression_opts.use_zstd_dict_trainer =
+      use_zstd_dict_trainer;
+  opt->rep.bottommost_compression_opts.enabled = enabled;
+}
+
+unsigned char
+rocksdb_options_get_bottommost_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t* opt) {
+  return opt->rep.bottommost_compression_opts.use_zstd_dict_trainer;
+}
+
+void rocksdb_options_set_bottommost_compression_options_max_dict_buffer_bytes(
+    rocksdb_options_t* opt, uint64_t max_dict_buffer_bytes,
+    unsigned char enabled) {
+  opt->rep.bottommost_compression_opts.max_dict_buffer_bytes =
+      max_dict_buffer_bytes;
+  opt->rep.bottommost_compression_opts.enabled = enabled;
+}
+
+void rocksdb_options_set_compression_options(rocksdb_options_t* opt, int w_bits,
+                                             int level, int strategy,
+                                             int max_dict_bytes) {
+  opt->rep.compression_opts.window_bits = w_bits;
+  opt->rep.compression_opts.level = level;
+  opt->rep.compression_opts.strategy = strategy;
+  opt->rep.compression_opts.max_dict_bytes = max_dict_bytes;
+}
+
+void rocksdb_options_set_compression_options_zstd_max_train_bytes(
+    rocksdb_options_t* opt, int zstd_max_train_bytes) {
+  opt->rep.compression_opts.zstd_max_train_bytes = zstd_max_train_bytes;
+}
+
+int rocksdb_options_get_compression_options_zstd_max_train_bytes(
+    rocksdb_options_t* opt) {
+  return opt->rep.compression_opts.zstd_max_train_bytes;
+}
+
+void rocksdb_options_set_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t* opt, unsigned char use_zstd_dict_trainer) {
+  opt->rep.compression_opts.use_zstd_dict_trainer = use_zstd_dict_trainer;
+}
+
+unsigned char rocksdb_options_get_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t* opt) {
+  return opt->rep.compression_opts.use_zstd_dict_trainer;
+}
+
+void rocksdb_options_set_compression_options_parallel_threads(
+    rocksdb_options_t* opt, int value) {
+  opt->rep.compression_opts.parallel_threads = value;
+}
+
+int rocksdb_options_get_compression_options_parallel_threads(
+    rocksdb_options_t* opt) {
+  return opt->rep.compression_opts.parallel_threads;
+}
+
+void rocksdb_options_set_compression_options_max_dict_buffer_bytes(
+    rocksdb_options_t* opt, uint64_t max_dict_buffer_bytes) {
+  opt->rep.compression_opts.max_dict_buffer_bytes = max_dict_buffer_bytes;
+}
+
+uint64_t rocksdb_options_get_compression_options_max_dict_buffer_bytes(
+    rocksdb_options_t* opt) {
+  return opt->rep.compression_opts.max_dict_buffer_bytes;
+}
+
+void rocksdb_options_set_prefix_extractor(
+    rocksdb_options_t* opt, rocksdb_slicetransform_t* prefix_extractor) {
+  opt->rep.prefix_extractor.reset(prefix_extractor);
+}
+
+void rocksdb_options_set_use_fsync(rocksdb_options_t* opt, int use_fsync) {
+  opt->rep.use_fsync = use_fsync;
+}
+
+int rocksdb_options_get_use_fsync(rocksdb_options_t* opt) {
+  return opt->rep.use_fsync;
+}
+
+void rocksdb_options_set_db_log_dir(rocksdb_options_t* opt,
+                                    const char* db_log_dir) {
+  opt->rep.db_log_dir = db_log_dir;
+}
+
+void rocksdb_options_set_wal_dir(rocksdb_options_t* opt, const char* v) {
+  opt->rep.wal_dir = v;
+}
+
+void rocksdb_options_set_WAL_ttl_seconds(rocksdb_options_t* opt, uint64_t ttl) {
+  opt->rep.WAL_ttl_seconds = ttl;
+}
+
+uint64_t rocksdb_options_get_WAL_ttl_seconds(rocksdb_options_t* opt) {
+  return opt->rep.WAL_ttl_seconds;
+}
+
+void rocksdb_options_set_WAL_size_limit_MB(rocksdb_options_t* opt,
+                                           uint64_t limit) {
+  opt->rep.WAL_size_limit_MB = limit;
+}
+
+uint64_t rocksdb_options_get_WAL_size_limit_MB(rocksdb_options_t* opt) {
+  return opt->rep.WAL_size_limit_MB;
+}
+
+void rocksdb_options_set_manifest_preallocation_size(rocksdb_options_t* opt,
+                                                     size_t v) {
+  opt->rep.manifest_preallocation_size = v;
+}
+
+size_t rocksdb_options_get_manifest_preallocation_size(rocksdb_options_t* opt) {
+  return opt->rep.manifest_preallocation_size;
+}
+
+void rocksdb_options_set_use_direct_reads(rocksdb_options_t* opt,
+                                          unsigned char v) {
+  opt->rep.use_direct_reads = v;
+}
+
+unsigned char rocksdb_options_get_use_direct_reads(rocksdb_options_t* opt) {
+  return opt->rep.use_direct_reads;
+}
+
+void rocksdb_options_set_use_direct_io_for_flush_and_compaction(
+    rocksdb_options_t* opt, unsigned char v) {
+  opt->rep.use_direct_io_for_flush_and_compaction = v;
+}
+
+unsigned char rocksdb_options_get_use_direct_io_for_flush_and_compaction(
+    rocksdb_options_t* opt) {
+  return opt->rep.use_direct_io_for_flush_and_compaction;
+}
+
+void rocksdb_options_set_allow_mmap_reads(rocksdb_options_t* opt,
+                                          unsigned char v) {
+  opt->rep.allow_mmap_reads = v;
+}
+
+unsigned char rocksdb_options_get_allow_mmap_reads(rocksdb_options_t* opt) {
+  return opt->rep.allow_mmap_reads;
+}
+
+void rocksdb_options_set_allow_mmap_writes(rocksdb_options_t* opt,
+                                           unsigned char v) {
+  opt->rep.allow_mmap_writes = v;
+}
+
+unsigned char rocksdb_options_get_allow_mmap_writes(rocksdb_options_t* opt) {
+  return opt->rep.allow_mmap_writes;
+}
+
+void rocksdb_options_set_is_fd_close_on_exec(rocksdb_options_t* opt,
+                                             unsigned char v) {
+  opt->rep.is_fd_close_on_exec = v;
+}
+
+unsigned char rocksdb_options_get_is_fd_close_on_exec(rocksdb_options_t* opt) {
+  return opt->rep.is_fd_close_on_exec;
+}
+
+void rocksdb_options_set_stats_dump_period_sec(rocksdb_options_t* opt,
+                                               unsigned int v) {
+  opt->rep.stats_dump_period_sec = v;
+}
+
+unsigned int rocksdb_options_get_stats_dump_period_sec(rocksdb_options_t* opt) {
+  return opt->rep.stats_dump_period_sec;
+}
+
+void rocksdb_options_set_stats_persist_period_sec(rocksdb_options_t* opt,
+                                                  unsigned int v) {
+  opt->rep.stats_persist_period_sec = v;
+}
+
+unsigned int rocksdb_options_get_stats_persist_period_sec(
+    rocksdb_options_t* opt) {
+  return opt->rep.stats_persist_period_sec;
+}
+
+void rocksdb_options_set_advise_random_on_open(rocksdb_options_t* opt,
+                                               unsigned char v) {
+  opt->rep.advise_random_on_open = v;
+}
+
+unsigned char rocksdb_options_get_advise_random_on_open(
+    rocksdb_options_t* opt) {
+  return opt->rep.advise_random_on_open;
+}
+
+void rocksdb_options_set_access_hint_on_compaction_start(rocksdb_options_t* opt,
+                                                         int v) {
+  switch (v) {
+    case 0:
+      opt->rep.access_hint_on_compaction_start =
+          ROCKSDB_NAMESPACE::Options::NONE;
+      break;
+    case 1:
+      opt->rep.access_hint_on_compaction_start =
+          ROCKSDB_NAMESPACE::Options::NORMAL;
+      break;
+    case 2:
+      opt->rep.access_hint_on_compaction_start =
+          ROCKSDB_NAMESPACE::Options::SEQUENTIAL;
+      break;
+    case 3:
+      opt->rep.access_hint_on_compaction_start =
+          ROCKSDB_NAMESPACE::Options::WILLNEED;
+      break;
+    default:
+      assert(0);
+  }
+}
+
+int rocksdb_options_get_access_hint_on_compaction_start(
+    rocksdb_options_t* opt) {
+  return opt->rep.access_hint_on_compaction_start;
+}
+
+void rocksdb_options_set_use_adaptive_mutex(rocksdb_options_t* opt,
+                                            unsigned char v) {
+  opt->rep.use_adaptive_mutex = v;
+}
+
+unsigned char rocksdb_options_get_use_adaptive_mutex(rocksdb_options_t* opt) {
+  return opt->rep.use_adaptive_mutex;
+}
+
+void rocksdb_options_set_wal_bytes_per_sync(rocksdb_options_t* opt,
+                                            uint64_t v) {
+  opt->rep.wal_bytes_per_sync = v;
+}
+
+uint64_t rocksdb_options_get_wal_bytes_per_sync(rocksdb_options_t* opt) {
+  return opt->rep.wal_bytes_per_sync;
+}
+
+void rocksdb_options_set_bytes_per_sync(rocksdb_options_t* opt, uint64_t v) {
+  opt->rep.bytes_per_sync = v;
+}
+
+uint64_t rocksdb_options_get_bytes_per_sync(rocksdb_options_t* opt) {
+  return opt->rep.bytes_per_sync;
+}
+
+void rocksdb_options_set_writable_file_max_buffer_size(rocksdb_options_t* opt,
+                                                       uint64_t v) {
+  opt->rep.writable_file_max_buffer_size = static_cast<size_t>(v);
+}
+
+uint64_t rocksdb_options_get_writable_file_max_buffer_size(
+    rocksdb_options_t* opt) {
+  return opt->rep.writable_file_max_buffer_size;
+}
+
+void rocksdb_options_set_allow_concurrent_memtable_write(rocksdb_options_t* opt,
+                                                         unsigned char v) {
+  opt->rep.allow_concurrent_memtable_write = v;
+}
+
+unsigned char rocksdb_options_get_allow_concurrent_memtable_write(
+    rocksdb_options_t* opt) {
+  return opt->rep.allow_concurrent_memtable_write;
+}
+
+void rocksdb_options_set_enable_write_thread_adaptive_yield(
+    rocksdb_options_t* opt, unsigned char v) {
+  opt->rep.enable_write_thread_adaptive_yield = v;
+}
+
+unsigned char rocksdb_options_get_enable_write_thread_adaptive_yield(
+    rocksdb_options_t* opt) {
+  return opt->rep.enable_write_thread_adaptive_yield;
+}
+
+void rocksdb_options_set_max_sequential_skip_in_iterations(
+    rocksdb_options_t* opt, uint64_t v) {
+  opt->rep.max_sequential_skip_in_iterations = v;
+}
+
+uint64_t rocksdb_options_get_max_sequential_skip_in_iterations(
+    rocksdb_options_t* opt) {
+  return opt->rep.max_sequential_skip_in_iterations;
+}
+
+void rocksdb_options_set_max_write_buffer_number(rocksdb_options_t* opt,
+                                                 int n) {
+  opt->rep.max_write_buffer_number = n;
+}
+
+int rocksdb_options_get_max_write_buffer_number(rocksdb_options_t* opt) {
+  return opt->rep.max_write_buffer_number;
+}
+
+void rocksdb_options_set_min_write_buffer_number_to_merge(
+    rocksdb_options_t* opt, int n) {
+  opt->rep.min_write_buffer_number_to_merge = n;
+}
+
+int rocksdb_options_get_min_write_buffer_number_to_merge(
+    rocksdb_options_t* opt) {
+  return opt->rep.min_write_buffer_number_to_merge;
+}
+
+void rocksdb_options_set_max_write_buffer_number_to_maintain(
+    rocksdb_options_t* opt, int n) {
+  opt->rep.max_write_buffer_number_to_maintain = n;
+}
+
+int rocksdb_options_get_max_write_buffer_number_to_maintain(
+    rocksdb_options_t* opt) {
+  return opt->rep.max_write_buffer_number_to_maintain;
+}
+
+void rocksdb_options_set_max_write_buffer_size_to_maintain(
+    rocksdb_options_t* opt, int64_t n) {
+  opt->rep.max_write_buffer_size_to_maintain = n;
+}
+
+int64_t rocksdb_options_get_max_write_buffer_size_to_maintain(
+    rocksdb_options_t* opt) {
+  return opt->rep.max_write_buffer_size_to_maintain;
+}
+
+void rocksdb_options_set_enable_pipelined_write(rocksdb_options_t* opt,
+                                                unsigned char v) {
+  opt->rep.enable_pipelined_write = v;
+}
+
+unsigned char rocksdb_options_get_enable_pipelined_write(
+    rocksdb_options_t* opt) {
+  return opt->rep.enable_pipelined_write;
+}
+
+void rocksdb_options_set_unordered_write(rocksdb_options_t* opt,
+                                         unsigned char v) {
+  opt->rep.unordered_write = v;
+}
+
+unsigned char rocksdb_options_get_unordered_write(rocksdb_options_t* opt) {
+  return opt->rep.unordered_write;
+}
+
+void rocksdb_options_set_max_subcompactions(rocksdb_options_t* opt,
+                                            uint32_t n) {
+  opt->rep.max_subcompactions = n;
+}
+
+uint32_t rocksdb_options_get_max_subcompactions(rocksdb_options_t* opt) {
+  return opt->rep.max_subcompactions;
+}
+
+void rocksdb_options_set_max_background_jobs(rocksdb_options_t* opt, int n) {
+  opt->rep.max_background_jobs = n;
+}
+
+int rocksdb_options_get_max_background_jobs(rocksdb_options_t* opt) {
+  return opt->rep.max_background_jobs;
+}
+
+void rocksdb_options_set_max_background_compactions(rocksdb_options_t* opt,
+                                                    int n) {
+  opt->rep.max_background_compactions = n;
+}
+
+int rocksdb_options_get_max_background_compactions(rocksdb_options_t* opt) {
+  return opt->rep.max_background_compactions;
+}
+
+void rocksdb_options_set_max_background_flushes(rocksdb_options_t* opt, int n) {
+  opt->rep.max_background_flushes = n;
+}
+
+int rocksdb_options_get_max_background_flushes(rocksdb_options_t* opt) {
+  return opt->rep.max_background_flushes;
+}
+
+void rocksdb_options_set_experimental_mempurge_threshold(rocksdb_options_t* opt,
+                                                         double v) {
+  opt->rep.experimental_mempurge_threshold = v;
+}
+
+double rocksdb_options_get_experimental_mempurge_threshold(
+    rocksdb_options_t* opt) {
+  return opt->rep.experimental_mempurge_threshold;
+}
+
+void rocksdb_options_set_max_log_file_size(rocksdb_options_t* opt, size_t v) {
+  opt->rep.max_log_file_size = v;
+}
+
+size_t rocksdb_options_get_max_log_file_size(rocksdb_options_t* opt) {
+  return opt->rep.max_log_file_size;
+}
+
+void rocksdb_options_set_log_file_time_to_roll(rocksdb_options_t* opt,
+                                               size_t v) {
+  opt->rep.log_file_time_to_roll = v;
+}
+
+size_t rocksdb_options_get_log_file_time_to_roll(rocksdb_options_t* opt) {
+  return opt->rep.log_file_time_to_roll;
+}
+
+void rocksdb_options_set_keep_log_file_num(rocksdb_options_t* opt, size_t v) {
+  opt->rep.keep_log_file_num = v;
+}
+
+size_t rocksdb_options_get_keep_log_file_num(rocksdb_options_t* opt) {
+  return opt->rep.keep_log_file_num;
+}
+
+void rocksdb_options_set_recycle_log_file_num(rocksdb_options_t* opt,
+                                              size_t v) {
+  opt->rep.recycle_log_file_num = v;
+}
+
+size_t rocksdb_options_get_recycle_log_file_num(rocksdb_options_t* opt) {
+  return opt->rep.recycle_log_file_num;
+}
+
+void rocksdb_options_set_soft_pending_compaction_bytes_limit(
+    rocksdb_options_t* opt, size_t v) {
+  opt->rep.soft_pending_compaction_bytes_limit = v;
+}
+
+size_t rocksdb_options_get_soft_pending_compaction_bytes_limit(
+    rocksdb_options_t* opt) {
+  return opt->rep.soft_pending_compaction_bytes_limit;
+}
+
+void rocksdb_options_set_hard_pending_compaction_bytes_limit(
+    rocksdb_options_t* opt, size_t v) {
+  opt->rep.hard_pending_compaction_bytes_limit = v;
+}
+
+size_t rocksdb_options_get_hard_pending_compaction_bytes_limit(
+    rocksdb_options_t* opt) {
+  return opt->rep.hard_pending_compaction_bytes_limit;
+}
+
+void rocksdb_options_set_max_manifest_file_size(rocksdb_options_t* opt,
+                                                size_t v) {
+  opt->rep.max_manifest_file_size = v;
+}
+
+size_t rocksdb_options_get_max_manifest_file_size(rocksdb_options_t* opt) {
+  return opt->rep.max_manifest_file_size;
+}
+
+void rocksdb_options_set_table_cache_numshardbits(rocksdb_options_t* opt,
+                                                  int v) {
+  opt->rep.table_cache_numshardbits = v;
+}
+
+int rocksdb_options_get_table_cache_numshardbits(rocksdb_options_t* opt) {
+  return opt->rep.table_cache_numshardbits;
+}
+
+void rocksdb_options_set_arena_block_size(rocksdb_options_t* opt, size_t v) {
+  opt->rep.arena_block_size = v;
+}
+
+size_t rocksdb_options_get_arena_block_size(rocksdb_options_t* opt) {
+  return opt->rep.arena_block_size;
+}
+
+void rocksdb_options_set_disable_auto_compactions(rocksdb_options_t* opt,
+                                                  int disable) {
+  opt->rep.disable_auto_compactions = disable;
+}
+
+unsigned char rocksdb_options_get_disable_auto_compactions(
+    rocksdb_options_t* opt) {
+  return opt->rep.disable_auto_compactions;
+}
+
+void rocksdb_options_set_optimize_filters_for_hits(rocksdb_options_t* opt,
+                                                   int v) {
+  opt->rep.optimize_filters_for_hits = v;
+}
+
+unsigned char rocksdb_options_get_optimize_filters_for_hits(
+    rocksdb_options_t* opt) {
+  return opt->rep.optimize_filters_for_hits;
+}
+
+void rocksdb_options_set_delete_obsolete_files_period_micros(
+    rocksdb_options_t* opt, uint64_t v) {
+  opt->rep.delete_obsolete_files_period_micros = v;
+}
+
+uint64_t rocksdb_options_get_delete_obsolete_files_period_micros(
+    rocksdb_options_t* opt) {
+  return opt->rep.delete_obsolete_files_period_micros;
+}
+
+void rocksdb_options_prepare_for_bulk_load(rocksdb_options_t* opt) {
+  opt->rep.PrepareForBulkLoad();
+}
+
+void rocksdb_options_set_memtable_vector_rep(rocksdb_options_t* opt) {
+  opt->rep.memtable_factory.reset(new ROCKSDB_NAMESPACE::VectorRepFactory);
+}
+
+void rocksdb_options_set_memtable_prefix_bloom_size_ratio(
+    rocksdb_options_t* opt, double v) {
+  opt->rep.memtable_prefix_bloom_size_ratio = v;
+}
+
+double rocksdb_options_get_memtable_prefix_bloom_size_ratio(
+    rocksdb_options_t* opt) {
+  return opt->rep.memtable_prefix_bloom_size_ratio;
+}
+
+void rocksdb_options_set_memtable_huge_page_size(rocksdb_options_t* opt,
+                                                 size_t v) {
+  opt->rep.memtable_huge_page_size = v;
+}
+
+size_t rocksdb_options_get_memtable_huge_page_size(rocksdb_options_t* opt) {
+  return opt->rep.memtable_huge_page_size;
+}
+
+void rocksdb_options_set_hash_skip_list_rep(rocksdb_options_t* opt,
+                                            size_t bucket_count,
+                                            int32_t skiplist_height,
+                                            int32_t skiplist_branching_factor) {
+  ROCKSDB_NAMESPACE::MemTableRepFactory* factory =
+      ROCKSDB_NAMESPACE::NewHashSkipListRepFactory(
+          bucket_count, skiplist_height, skiplist_branching_factor);
+  opt->rep.memtable_factory.reset(factory);
+}
+
+void rocksdb_options_set_hash_link_list_rep(rocksdb_options_t* opt,
+                                            size_t bucket_count) {
+  opt->rep.memtable_factory.reset(
+      ROCKSDB_NAMESPACE::NewHashLinkListRepFactory(bucket_count));
+}
+
+void rocksdb_options_set_plain_table_factory(rocksdb_options_t* opt,
+                                             uint32_t user_key_len,
+                                             int bloom_bits_per_key,
+                                             double hash_table_ratio,
+                                             size_t index_sparseness) {
+  ROCKSDB_NAMESPACE::PlainTableOptions options;
+  options.user_key_len = user_key_len;
+  options.bloom_bits_per_key = bloom_bits_per_key;
+  options.hash_table_ratio = hash_table_ratio;
+  options.index_sparseness = index_sparseness;
+
+  ROCKSDB_NAMESPACE::TableFactory* factory =
+      ROCKSDB_NAMESPACE::NewPlainTableFactory(options);
+  opt->rep.table_factory.reset(factory);
+}
+
+void rocksdb_options_set_max_successive_merges(rocksdb_options_t* opt,
+                                               size_t v) {
+  opt->rep.max_successive_merges = v;
+}
+
+size_t rocksdb_options_get_max_successive_merges(rocksdb_options_t* opt) {
+  return opt->rep.max_successive_merges;
+}
+
+void rocksdb_options_set_bloom_locality(rocksdb_options_t* opt, uint32_t v) {
+  opt->rep.bloom_locality = v;
+}
+
+uint32_t rocksdb_options_get_bloom_locality(rocksdb_options_t* opt) {
+  return opt->rep.bloom_locality;
+}
+
+void rocksdb_options_set_inplace_update_support(rocksdb_options_t* opt,
+                                                unsigned char v) {
+  opt->rep.inplace_update_support = v;
+}
+
+unsigned char rocksdb_options_get_inplace_update_support(
+    rocksdb_options_t* opt) {
+  return opt->rep.inplace_update_support;
+}
+
+void rocksdb_options_set_inplace_update_num_locks(rocksdb_options_t* opt,
+                                                  size_t v) {
+  opt->rep.inplace_update_num_locks = v;
+}
+
+size_t rocksdb_options_get_inplace_update_num_locks(rocksdb_options_t* opt) {
+  return opt->rep.inplace_update_num_locks;
+}
+
+void rocksdb_options_set_report_bg_io_stats(rocksdb_options_t* opt, int v) {
+  opt->rep.report_bg_io_stats = v;
+}
+
+unsigned char rocksdb_options_get_report_bg_io_stats(rocksdb_options_t* opt) {
+  return opt->rep.report_bg_io_stats;
+}
+
+void rocksdb_options_set_compaction_style(rocksdb_options_t* opt, int style) {
+  opt->rep.compaction_style =
+      static_cast<ROCKSDB_NAMESPACE::CompactionStyle>(style);
+}
+
+int rocksdb_options_get_compaction_style(rocksdb_options_t* opt) {
+  return opt->rep.compaction_style;
+}
+
+void rocksdb_options_set_universal_compaction_options(
+    rocksdb_options_t* opt, rocksdb_universal_compaction_options_t* uco) {
+  opt->rep.compaction_options_universal = *(uco->rep);
+}
+
+void rocksdb_options_set_fifo_compaction_options(
+    rocksdb_options_t* opt, rocksdb_fifo_compaction_options_t* fifo) {
+  opt->rep.compaction_options_fifo = fifo->rep;
+}
+
+char* rocksdb_options_statistics_get_string(rocksdb_options_t* opt) {
+  ROCKSDB_NAMESPACE::Statistics* statistics = opt->rep.statistics.get();
+  if (statistics) {
+    return strdup(statistics->ToString().c_str());
+  }
+  return nullptr;
+}
+
+void rocksdb_options_set_ratelimiter(rocksdb_options_t* opt,
+                                     rocksdb_ratelimiter_t* limiter) {
+  if (limiter) {
+    opt->rep.rate_limiter = limiter->rep;
+  }
+}
+
+void rocksdb_options_set_atomic_flush(rocksdb_options_t* opt,
+                                      unsigned char atomic_flush) {
+  opt->rep.atomic_flush = atomic_flush;
+}
+
+unsigned char rocksdb_options_get_atomic_flush(rocksdb_options_t* opt) {
+  return opt->rep.atomic_flush;
+}
+
+void rocksdb_options_set_manual_wal_flush(rocksdb_options_t* opt,
+                                          unsigned char manual_wal_flush) {
+  opt->rep.manual_wal_flush = manual_wal_flush;
+}
+
+unsigned char rocksdb_options_get_manual_wal_flush(rocksdb_options_t* opt) {
+  return opt->rep.manual_wal_flush;
+}
+
+void rocksdb_options_set_wal_compression(rocksdb_options_t* opt, int val) {
+  opt->rep.wal_compression = static_cast<CompressionType>(val);
+}
+
+int rocksdb_options_get_wal_compression(rocksdb_options_t* opt) {
+  return opt->rep.wal_compression;
+}
+
+rocksdb_ratelimiter_t* rocksdb_ratelimiter_create(int64_t rate_bytes_per_sec,
+                                                  int64_t refill_period_us,
+                                                  int32_t fairness) {
+  rocksdb_ratelimiter_t* rate_limiter = new rocksdb_ratelimiter_t;
+  rate_limiter->rep.reset(
+      NewGenericRateLimiter(rate_bytes_per_sec, refill_period_us, fairness));
+  return rate_limiter;
+}
+
+void rocksdb_ratelimiter_destroy(rocksdb_ratelimiter_t* limiter) {
+  delete limiter;
+}
+
+void rocksdb_options_set_row_cache(rocksdb_options_t* opt,
+                                   rocksdb_cache_t* cache) {
+  if (cache) {
+    opt->rep.row_cache = cache->rep;
+  }
+}
+
+void rocksdb_options_add_compact_on_deletion_collector_factory(
+    rocksdb_options_t* opt, size_t window_size, size_t num_dels_trigger) {
+  std::shared_ptr<ROCKSDB_NAMESPACE::TablePropertiesCollectorFactory>
+      compact_on_del =
+          NewCompactOnDeletionCollectorFactory(window_size, num_dels_trigger);
+  opt->rep.table_properties_collector_factories.emplace_back(compact_on_del);
+}
+
+void rocksdb_set_perf_level(int v) {
+  PerfLevel level = static_cast<PerfLevel>(v);
+  SetPerfLevel(level);
+}
+
+rocksdb_perfcontext_t* rocksdb_perfcontext_create() {
+  rocksdb_perfcontext_t* context = new rocksdb_perfcontext_t;
+  context->rep = ROCKSDB_NAMESPACE::get_perf_context();
+  return context;
+}
+
+void rocksdb_perfcontext_reset(rocksdb_perfcontext_t* context) {
+  context->rep->Reset();
+}
+
+char* rocksdb_perfcontext_report(rocksdb_perfcontext_t* context,
+                                 unsigned char exclude_zero_counters) {
+  return strdup(context->rep->ToString(exclude_zero_counters).c_str());
+}
+
+uint64_t rocksdb_perfcontext_metric(rocksdb_perfcontext_t* context,
+                                    int metric) {
+  PerfContext* rep = context->rep;
+  switch (metric) {
+    case rocksdb_user_key_comparison_count:
+      return rep->user_key_comparison_count;
+    case rocksdb_block_cache_hit_count:
+      return rep->block_cache_hit_count;
+    case rocksdb_block_read_count:
+      return rep->block_read_count;
+    case rocksdb_block_read_byte:
+      return rep->block_read_byte;
+    case rocksdb_block_read_time:
+      return rep->block_read_time;
+    case rocksdb_block_checksum_time:
+      return rep->block_checksum_time;
+    case rocksdb_block_decompress_time:
+      return rep->block_decompress_time;
+    case rocksdb_get_read_bytes:
+      return rep->get_read_bytes;
+    case rocksdb_multiget_read_bytes:
+      return rep->multiget_read_bytes;
+    case rocksdb_iter_read_bytes:
+      return rep->iter_read_bytes;
+    case rocksdb_internal_key_skipped_count:
+      return rep->internal_key_skipped_count;
+    case rocksdb_internal_delete_skipped_count:
+      return rep->internal_delete_skipped_count;
+    case rocksdb_internal_recent_skipped_count:
+      return rep->internal_recent_skipped_count;
+    case rocksdb_internal_merge_count:
+      return rep->internal_merge_count;
+    case rocksdb_get_snapshot_time:
+      return rep->get_snapshot_time;
+    case rocksdb_get_from_memtable_time:
+      return rep->get_from_memtable_time;
+    case rocksdb_get_from_memtable_count:
+      return rep->get_from_memtable_count;
+    case rocksdb_get_post_process_time:
+      return rep->get_post_process_time;
+    case rocksdb_get_from_output_files_time:
+      return rep->get_from_output_files_time;
+    case rocksdb_seek_on_memtable_time:
+      return rep->seek_on_memtable_time;
+    case rocksdb_seek_on_memtable_count:
+      return rep->seek_on_memtable_count;
+    case rocksdb_next_on_memtable_count:
+      return rep->next_on_memtable_count;
+    case rocksdb_prev_on_memtable_count:
+      return rep->prev_on_memtable_count;
+    case rocksdb_seek_child_seek_time:
+      return rep->seek_child_seek_time;
+    case rocksdb_seek_child_seek_count:
+      return rep->seek_child_seek_count;
+    case rocksdb_seek_min_heap_time:
+      return rep->seek_min_heap_time;
+    case rocksdb_seek_max_heap_time:
+      return rep->seek_max_heap_time;
+    case rocksdb_seek_internal_seek_time:
+      return rep->seek_internal_seek_time;
+    case rocksdb_find_next_user_entry_time:
+      return rep->find_next_user_entry_time;
+    case rocksdb_write_wal_time:
+      return rep->write_wal_time;
+    case rocksdb_write_memtable_time:
+      return rep->write_memtable_time;
+    case rocksdb_write_delay_time:
+      return rep->write_delay_time;
+    case rocksdb_write_pre_and_post_process_time:
+      return rep->write_pre_and_post_process_time;
+    case rocksdb_db_mutex_lock_nanos:
+      return rep->db_mutex_lock_nanos;
+    case rocksdb_db_condition_wait_nanos:
+      return rep->db_condition_wait_nanos;
+    case rocksdb_merge_operator_time_nanos:
+      return rep->merge_operator_time_nanos;
+    case rocksdb_read_index_block_nanos:
+      return rep->read_index_block_nanos;
+    case rocksdb_read_filter_block_nanos:
+      return rep->read_filter_block_nanos;
+    case rocksdb_new_table_block_iter_nanos:
+      return rep->new_table_block_iter_nanos;
+    case rocksdb_new_table_iterator_nanos:
+      return rep->new_table_iterator_nanos;
+    case rocksdb_block_seek_nanos:
+      return rep->block_seek_nanos;
+    case rocksdb_find_table_nanos:
+      return rep->find_table_nanos;
+    case rocksdb_bloom_memtable_hit_count:
+      return rep->bloom_memtable_hit_count;
+    case rocksdb_bloom_memtable_miss_count:
+      return rep->bloom_memtable_miss_count;
+    case rocksdb_bloom_sst_hit_count:
+      return rep->bloom_sst_hit_count;
+    case rocksdb_bloom_sst_miss_count:
+      return rep->bloom_sst_miss_count;
+    case rocksdb_key_lock_wait_time:
+      return rep->key_lock_wait_time;
+    case rocksdb_key_lock_wait_count:
+      return rep->key_lock_wait_count;
+    case rocksdb_env_new_sequential_file_nanos:
+      return rep->env_new_sequential_file_nanos;
+    case rocksdb_env_new_random_access_file_nanos:
+      return rep->env_new_random_access_file_nanos;
+    case rocksdb_env_new_writable_file_nanos:
+      return rep->env_new_writable_file_nanos;
+    case rocksdb_env_reuse_writable_file_nanos:
+      return rep->env_reuse_writable_file_nanos;
+    case rocksdb_env_new_random_rw_file_nanos:
+      return rep->env_new_random_rw_file_nanos;
+    case rocksdb_env_new_directory_nanos:
+      return rep->env_new_directory_nanos;
+    case rocksdb_env_file_exists_nanos:
+      return rep->env_file_exists_nanos;
+    case rocksdb_env_get_children_nanos:
+      return rep->env_get_children_nanos;
+    case rocksdb_env_get_children_file_attributes_nanos:
+      return rep->env_get_children_file_attributes_nanos;
+    case rocksdb_env_delete_file_nanos:
+      return rep->env_delete_file_nanos;
+    case rocksdb_env_create_dir_nanos:
+      return rep->env_create_dir_nanos;
+    case rocksdb_env_create_dir_if_missing_nanos:
+      return rep->env_create_dir_if_missing_nanos;
+    case rocksdb_env_delete_dir_nanos:
+      return rep->env_delete_dir_nanos;
+    case rocksdb_env_get_file_size_nanos:
+      return rep->env_get_file_size_nanos;
+    case rocksdb_env_get_file_modification_time_nanos:
+      return rep->env_get_file_modification_time_nanos;
+    case rocksdb_env_rename_file_nanos:
+      return rep->env_rename_file_nanos;
+    case rocksdb_env_link_file_nanos:
+      return rep->env_link_file_nanos;
+    case rocksdb_env_lock_file_nanos:
+      return rep->env_lock_file_nanos;
+    case rocksdb_env_unlock_file_nanos:
+      return rep->env_unlock_file_nanos;
+    case rocksdb_env_new_logger_nanos:
+      return rep->env_new_logger_nanos;
+    case rocksdb_number_async_seek:
+      return rep->number_async_seek;
+    case rocksdb_blob_cache_hit_count:
+      return rep->blob_cache_hit_count;
+    case rocksdb_blob_read_count:
+      return rep->blob_read_count;
+    case rocksdb_blob_read_byte:
+      return rep->blob_read_byte;
+    case rocksdb_blob_read_time:
+      return rep->blob_read_time;
+    case rocksdb_blob_checksum_time:
+      return rep->blob_checksum_time;
+    case rocksdb_blob_decompress_time:
+      return rep->blob_decompress_time;
+    case rocksdb_internal_range_del_reseek_count:
+      return rep->internal_range_del_reseek_count;
+    case rocksdb_block_read_cpu_time:
+      return rep->block_read_cpu_time;
+    default:
+      break;
+  }
+  return 0;
+}
+
+void rocksdb_perfcontext_destroy(rocksdb_perfcontext_t* context) {
+  delete context;
+}
+
+/*
+TODO:
+DB::OpenForReadOnly
+DB::KeyMayExist
+DB::GetOptions
+DB::GetSortedWalFiles
+DB::GetLatestSequenceNumber
+DB::GetUpdatesSince
+DB::GetDbIdentity
+DB::RunManualCompaction
+custom cache
+table_properties_collectors
+*/
+
+rocksdb_compactionfilter_t* rocksdb_compactionfilter_create(
+    void* state, void (*destructor)(void*),
+    unsigned char (*filter)(void*, int level, const char* key,
+                            size_t key_length, const char* existing_value,
+                            size_t value_length, char** new_value,
+                            size_t* new_value_length,
+                            unsigned char* value_changed),
+    const char* (*name)(void*)) {
+  rocksdb_compactionfilter_t* result = new rocksdb_compactionfilter_t;
+  result->state_ = state;
+  result->destructor_ = destructor;
+  result->filter_ = filter;
+  result->ignore_snapshots_ = true;
+  result->name_ = name;
+  return result;
+}
+
+void rocksdb_compactionfilter_set_ignore_snapshots(
+    rocksdb_compactionfilter_t* filter, unsigned char whether_ignore) {
+  filter->ignore_snapshots_ = whether_ignore;
+}
+
+void rocksdb_compactionfilter_destroy(rocksdb_compactionfilter_t* filter) {
+  delete filter;
+}
+
+unsigned char rocksdb_compactionfiltercontext_is_full_compaction(
+    rocksdb_compactionfiltercontext_t* context) {
+  return context->rep.is_full_compaction;
+}
+
+unsigned char rocksdb_compactionfiltercontext_is_manual_compaction(
+    rocksdb_compactionfiltercontext_t* context) {
+  return context->rep.is_manual_compaction;
+}
+
+rocksdb_compactionfilterfactory_t* rocksdb_compactionfilterfactory_create(
+    void* state, void (*destructor)(void*),
+    rocksdb_compactionfilter_t* (*create_compaction_filter)(
+        void*, rocksdb_compactionfiltercontext_t* context),
+    const char* (*name)(void*)) {
+  rocksdb_compactionfilterfactory_t* result =
+      new rocksdb_compactionfilterfactory_t;
+  result->state_ = state;
+  result->destructor_ = destructor;
+  result->create_compaction_filter_ = create_compaction_filter;
+  result->name_ = name;
+  return result;
+}
+
+void rocksdb_compactionfilterfactory_destroy(
+    rocksdb_compactionfilterfactory_t* factory) {
+  delete factory;
+}
+
+rocksdb_comparator_t* rocksdb_comparator_create(
+    void* state, void (*destructor)(void*),
+    int (*compare)(void*, const char* a, size_t alen, const char* b,
+                   size_t blen),
+    const char* (*name)(void*)) {
+  rocksdb_comparator_t* result = new rocksdb_comparator_t;
+  result->state_ = state;
+  result->destructor_ = destructor;
+  result->compare_ = compare;
+  result->name_ = name;
+  result->compare_ts_ = nullptr;
+  result->compare_without_ts_ = nullptr;
+  return result;
+}
+
+void rocksdb_comparator_destroy(rocksdb_comparator_t* cmp) { delete cmp; }
+
+rocksdb_comparator_t* rocksdb_comparator_with_ts_create(
+    void* state, void (*destructor)(void*),
+    int (*compare)(void*, const char* a, size_t alen, const char* b,
+                   size_t blen),
+    int (*compare_ts)(void*, const char* a_ts, size_t a_tslen, const char* b_ts,
+                      size_t b_tslen),
+    int (*compare_without_ts)(void*, const char* a, size_t alen,
+                              unsigned char a_has_ts, const char* b,
+                              size_t blen, unsigned char b_has_ts),
+    const char* (*name)(void*), size_t timestamp_size) {
+  rocksdb_comparator_t* result = new rocksdb_comparator_t(timestamp_size);
+  result->state_ = state;
+  result->destructor_ = destructor;
+  result->compare_ = compare;
+  result->compare_ts_ = compare_ts;
+  result->compare_without_ts_ = compare_without_ts;
+  result->name_ = name;
+  return result;
+}
+
+void rocksdb_filterpolicy_destroy(rocksdb_filterpolicy_t* filter) {
+  delete filter;
+}
+
+rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_format(
+    double bits_per_key, bool original_format) {
+  // Make a rocksdb_filterpolicy_t, but override all of its methods so
+  // they delegate to a NewBloomFilterPolicy() instead of user
+  // supplied C functions.
+  struct Wrapper : public rocksdb_filterpolicy_t {
+    const FilterPolicy* rep_;
+    ~Wrapper() override { delete rep_; }
+    const char* Name() const override { return rep_->Name(); }
+    const char* CompatibilityName() const override {
+      return rep_->CompatibilityName();
+    }
+    // No need to override GetFilterBitsBuilder if this one is overridden
+    ROCKSDB_NAMESPACE::FilterBitsBuilder* GetBuilderWithContext(
+        const ROCKSDB_NAMESPACE::FilterBuildingContext& context)
+        const override {
+      return rep_->GetBuilderWithContext(context);
+    }
+    ROCKSDB_NAMESPACE::FilterBitsReader* GetFilterBitsReader(
+        const Slice& contents) const override {
+      return rep_->GetFilterBitsReader(contents);
+    }
+    static void DoNothing(void*) {}
+  };
+  Wrapper* wrapper = new Wrapper;
+  wrapper->rep_ = NewBloomFilterPolicy(bits_per_key, original_format);
+  wrapper->state_ = nullptr;
+  wrapper->destructor_ = &Wrapper::DoNothing;
+  return wrapper;
+}
+
+rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_full(
+    double bits_per_key) {
+  return rocksdb_filterpolicy_create_bloom_format(bits_per_key, false);
+}
+
+rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom(double bits_per_key) {
+  return rocksdb_filterpolicy_create_bloom_format(bits_per_key, true);
+}
+
+rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon_format(
+    double bloom_equivalent_bits_per_key, int bloom_before_level) {
+  // Make a rocksdb_filterpolicy_t, but override all of its methods so
+  // they delegate to a NewRibbonFilterPolicy() instead of user
+  // supplied C functions.
+  struct Wrapper : public rocksdb_filterpolicy_t {
+    const FilterPolicy* rep_;
+    ~Wrapper() override { delete rep_; }
+    const char* Name() const override { return rep_->Name(); }
+    const char* CompatibilityName() const override {
+      return rep_->CompatibilityName();
+    }
+    ROCKSDB_NAMESPACE::FilterBitsBuilder* GetBuilderWithContext(
+        const ROCKSDB_NAMESPACE::FilterBuildingContext& context)
+        const override {
+      return rep_->GetBuilderWithContext(context);
+    }
+    ROCKSDB_NAMESPACE::FilterBitsReader* GetFilterBitsReader(
+        const Slice& contents) const override {
+      return rep_->GetFilterBitsReader(contents);
+    }
+    static void DoNothing(void*) {}
+  };
+  Wrapper* wrapper = new Wrapper;
+  wrapper->rep_ =
+      NewRibbonFilterPolicy(bloom_equivalent_bits_per_key, bloom_before_level);
+  wrapper->state_ = nullptr;
+  wrapper->destructor_ = &Wrapper::DoNothing;
+  return wrapper;
+}
+
+rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon(
+    double bloom_equivalent_bits_per_key) {
+  return rocksdb_filterpolicy_create_ribbon_format(
+      bloom_equivalent_bits_per_key, /*bloom_before_level = disabled*/ -1);
+}
+
+rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon_hybrid(
+    double bloom_equivalent_bits_per_key, int bloom_before_level) {
+  return rocksdb_filterpolicy_create_ribbon_format(
+      bloom_equivalent_bits_per_key, bloom_before_level);
+}
+
+rocksdb_mergeoperator_t* rocksdb_mergeoperator_create(
+    void* state, void (*destructor)(void*),
+    char* (*full_merge)(void*, const char* key, size_t key_length,
+                        const char* existing_value,
+                        size_t existing_value_length,
+                        const char* const* operands_list,
+                        const size_t* operands_list_length, int num_operands,
+                        unsigned char* success, size_t* new_value_length),
+    char* (*partial_merge)(void*, const char* key, size_t key_length,
+                           const char* const* operands_list,
+                           const size_t* operands_list_length, int num_operands,
+                           unsigned char* success, size_t* new_value_length),
+    void (*delete_value)(void*, const char* value, size_t value_length),
+    const char* (*name)(void*)) {
+  rocksdb_mergeoperator_t* result = new rocksdb_mergeoperator_t;
+  result->state_ = state;
+  result->destructor_ = destructor;
+  result->full_merge_ = full_merge;
+  result->partial_merge_ = partial_merge;
+  result->delete_value_ = delete_value;
+  result->name_ = name;
+  return result;
+}
+
+void rocksdb_mergeoperator_destroy(rocksdb_mergeoperator_t* merge_operator) {
+  delete merge_operator;
+}
+
+rocksdb_readoptions_t* rocksdb_readoptions_create() {
+  return new rocksdb_readoptions_t;
+}
+
+void rocksdb_readoptions_destroy(rocksdb_readoptions_t* opt) { delete opt; }
+
+void rocksdb_readoptions_set_verify_checksums(rocksdb_readoptions_t* opt,
+                                              unsigned char v) {
+  opt->rep.verify_checksums = v;
+}
+
+unsigned char rocksdb_readoptions_get_verify_checksums(
+    rocksdb_readoptions_t* opt) {
+  return opt->rep.verify_checksums;
+}
+
+void rocksdb_readoptions_set_fill_cache(rocksdb_readoptions_t* opt,
+                                        unsigned char v) {
+  opt->rep.fill_cache = v;
+}
+
+unsigned char rocksdb_readoptions_get_fill_cache(rocksdb_readoptions_t* opt) {
+  return opt->rep.fill_cache;
+}
+
+void rocksdb_readoptions_set_snapshot(rocksdb_readoptions_t* opt,
+                                      const rocksdb_snapshot_t* snap) {
+  opt->rep.snapshot = (snap ? snap->rep : nullptr);
+}
+
+void rocksdb_readoptions_set_iterate_upper_bound(rocksdb_readoptions_t* opt,
+                                                 const char* key,
+                                                 size_t keylen) {
+  if (key == nullptr) {
+    opt->upper_bound = Slice();
+    opt->rep.iterate_upper_bound = nullptr;
+
+  } else {
+    opt->upper_bound = Slice(key, keylen);
+    opt->rep.iterate_upper_bound = &opt->upper_bound;
+  }
+}
+
+void rocksdb_readoptions_set_iterate_lower_bound(rocksdb_readoptions_t* opt,
+                                                 const char* key,
+                                                 size_t keylen) {
+  if (key == nullptr) {
+    opt->lower_bound = Slice();
+    opt->rep.iterate_lower_bound = nullptr;
+  } else {
+    opt->lower_bound = Slice(key, keylen);
+    opt->rep.iterate_lower_bound = &opt->lower_bound;
+  }
+}
+
+void rocksdb_readoptions_set_read_tier(rocksdb_readoptions_t* opt, int v) {
+  opt->rep.read_tier = static_cast<ROCKSDB_NAMESPACE::ReadTier>(v);
+}
+
+int rocksdb_readoptions_get_read_tier(rocksdb_readoptions_t* opt) {
+  return static_cast<int>(opt->rep.read_tier);
+}
+
+void rocksdb_readoptions_set_tailing(rocksdb_readoptions_t* opt,
+                                     unsigned char v) {
+  opt->rep.tailing = v;
+}
+
+unsigned char rocksdb_readoptions_get_tailing(rocksdb_readoptions_t* opt) {
+  return opt->rep.tailing;
+}
+
+void rocksdb_readoptions_set_managed(rocksdb_readoptions_t* opt,
+                                     unsigned char v) {
+  opt->rep.managed = v;
+}
+
+void rocksdb_readoptions_set_readahead_size(rocksdb_readoptions_t* opt,
+                                            size_t v) {
+  opt->rep.readahead_size = v;
+}
+
+size_t rocksdb_readoptions_get_readahead_size(rocksdb_readoptions_t* opt) {
+  return opt->rep.readahead_size;
+}
+
+void rocksdb_readoptions_set_prefix_same_as_start(rocksdb_readoptions_t* opt,
+                                                  unsigned char v) {
+  opt->rep.prefix_same_as_start = v;
+}
+
+unsigned char rocksdb_readoptions_get_prefix_same_as_start(
+    rocksdb_readoptions_t* opt) {
+  return opt->rep.prefix_same_as_start;
+}
+
+void rocksdb_readoptions_set_pin_data(rocksdb_readoptions_t* opt,
+                                      unsigned char v) {
+  opt->rep.pin_data = v;
+}
+
+unsigned char rocksdb_readoptions_get_pin_data(rocksdb_readoptions_t* opt) {
+  return opt->rep.pin_data;
+}
+
+void rocksdb_readoptions_set_total_order_seek(rocksdb_readoptions_t* opt,
+                                              unsigned char v) {
+  opt->rep.total_order_seek = v;
+}
+
+unsigned char rocksdb_readoptions_get_total_order_seek(
+    rocksdb_readoptions_t* opt) {
+  return opt->rep.total_order_seek;
+}
+
+void rocksdb_readoptions_set_max_skippable_internal_keys(
+    rocksdb_readoptions_t* opt, uint64_t v) {
+  opt->rep.max_skippable_internal_keys = v;
+}
+
+uint64_t rocksdb_readoptions_get_max_skippable_internal_keys(
+    rocksdb_readoptions_t* opt) {
+  return opt->rep.max_skippable_internal_keys;
+}
+
+void rocksdb_readoptions_set_background_purge_on_iterator_cleanup(
+    rocksdb_readoptions_t* opt, unsigned char v) {
+  opt->rep.background_purge_on_iterator_cleanup = v;
+}
+
+unsigned char rocksdb_readoptions_get_background_purge_on_iterator_cleanup(
+    rocksdb_readoptions_t* opt) {
+  return opt->rep.background_purge_on_iterator_cleanup;
+}
+
+void rocksdb_readoptions_set_ignore_range_deletions(rocksdb_readoptions_t* opt,
+                                                    unsigned char v) {
+  opt->rep.ignore_range_deletions = v;
+}
+
+unsigned char rocksdb_readoptions_get_ignore_range_deletions(
+    rocksdb_readoptions_t* opt) {
+  return opt->rep.ignore_range_deletions;
+}
+
+void rocksdb_readoptions_set_deadline(rocksdb_readoptions_t* opt,
+                                      uint64_t microseconds) {
+  opt->rep.deadline = std::chrono::microseconds(microseconds);
+}
+
+uint64_t rocksdb_readoptions_get_deadline(rocksdb_readoptions_t* opt) {
+  return opt->rep.deadline.count();
+}
+
+void rocksdb_readoptions_set_io_timeout(rocksdb_readoptions_t* opt,
+                                        uint64_t microseconds) {
+  opt->rep.io_timeout = std::chrono::microseconds(microseconds);
+}
+
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_readoptions_get_io_timeout(rocksdb_readoptions_t* opt) {
+  return opt->rep.io_timeout.count();
+}
+
+void rocksdb_readoptions_set_async_io(rocksdb_readoptions_t* opt,
+                                      unsigned char v) {
+  opt->rep.async_io = v;
+}
+
+unsigned char rocksdb_readoptions_get_async_io(rocksdb_readoptions_t* opt) {
+  return opt->rep.async_io;
+}
+
+void rocksdb_readoptions_set_timestamp(rocksdb_readoptions_t* opt,
+                                       const char* ts, size_t tslen) {
+  if (ts == nullptr) {
+    opt->timestamp = Slice();
+    opt->rep.timestamp = nullptr;
+  } else {
+    opt->timestamp = Slice(ts, tslen);
+    opt->rep.timestamp = &opt->timestamp;
+  }
+}
+
+void rocksdb_readoptions_set_iter_start_ts(rocksdb_readoptions_t* opt,
+                                           const char* ts, size_t tslen) {
+  if (ts == nullptr) {
+    opt->iter_start_ts = Slice();
+    opt->rep.iter_start_ts = nullptr;
+  } else {
+    opt->iter_start_ts = Slice(ts, tslen);
+    opt->rep.iter_start_ts = &opt->iter_start_ts;
+  }
+}
+
+rocksdb_writeoptions_t* rocksdb_writeoptions_create() {
+  return new rocksdb_writeoptions_t;
+}
+
+void rocksdb_writeoptions_destroy(rocksdb_writeoptions_t* opt) { delete opt; }
+
+void rocksdb_writeoptions_set_sync(rocksdb_writeoptions_t* opt,
+                                   unsigned char v) {
+  opt->rep.sync = v;
+}
+
+unsigned char rocksdb_writeoptions_get_sync(rocksdb_writeoptions_t* opt) {
+  return opt->rep.sync;
+}
+
+void rocksdb_writeoptions_disable_WAL(rocksdb_writeoptions_t* opt,
+                                      int disable) {
+  opt->rep.disableWAL = disable;
+}
+
+unsigned char rocksdb_writeoptions_get_disable_WAL(
+    rocksdb_writeoptions_t* opt) {
+  return opt->rep.disableWAL;
+}
+
+void rocksdb_writeoptions_set_ignore_missing_column_families(
+    rocksdb_writeoptions_t* opt, unsigned char v) {
+  opt->rep.ignore_missing_column_families = v;
+}
+
+unsigned char rocksdb_writeoptions_get_ignore_missing_column_families(
+    rocksdb_writeoptions_t* opt) {
+  return opt->rep.ignore_missing_column_families;
+}
+
+void rocksdb_writeoptions_set_no_slowdown(rocksdb_writeoptions_t* opt,
+                                          unsigned char v) {
+  opt->rep.no_slowdown = v;
+}
+
+unsigned char rocksdb_writeoptions_get_no_slowdown(
+    rocksdb_writeoptions_t* opt) {
+  return opt->rep.no_slowdown;
+}
+
+void rocksdb_writeoptions_set_low_pri(rocksdb_writeoptions_t* opt,
+                                      unsigned char v) {
+  opt->rep.low_pri = v;
+}
+
+unsigned char rocksdb_writeoptions_get_low_pri(rocksdb_writeoptions_t* opt) {
+  return opt->rep.low_pri;
+}
+
+void rocksdb_writeoptions_set_memtable_insert_hint_per_batch(
+    rocksdb_writeoptions_t* opt, unsigned char v) {
+  opt->rep.memtable_insert_hint_per_batch = v;
+}
+
+unsigned char rocksdb_writeoptions_get_memtable_insert_hint_per_batch(
+    rocksdb_writeoptions_t* opt) {
+  return opt->rep.memtable_insert_hint_per_batch;
+}
+
+rocksdb_compactoptions_t* rocksdb_compactoptions_create() {
+  return new rocksdb_compactoptions_t;
+}
+
+void rocksdb_compactoptions_destroy(rocksdb_compactoptions_t* opt) {
+  delete opt;
+}
+
+void rocksdb_compactoptions_set_bottommost_level_compaction(
+    rocksdb_compactoptions_t* opt, unsigned char v) {
+  opt->rep.bottommost_level_compaction =
+      static_cast<BottommostLevelCompaction>(v);
+}
+
+unsigned char rocksdb_compactoptions_get_bottommost_level_compaction(
+    rocksdb_compactoptions_t* opt) {
+  return static_cast<unsigned char>(opt->rep.bottommost_level_compaction);
+}
+
+void rocksdb_compactoptions_set_exclusive_manual_compaction(
+    rocksdb_compactoptions_t* opt, unsigned char v) {
+  opt->rep.exclusive_manual_compaction = v;
+}
+
+unsigned char rocksdb_compactoptions_get_exclusive_manual_compaction(
+    rocksdb_compactoptions_t* opt) {
+  return opt->rep.exclusive_manual_compaction;
+}
+
+void rocksdb_compactoptions_set_change_level(rocksdb_compactoptions_t* opt,
+                                             unsigned char v) {
+  opt->rep.change_level = v;
+}
+
+unsigned char rocksdb_compactoptions_get_change_level(
+    rocksdb_compactoptions_t* opt) {
+  return opt->rep.change_level;
+}
+
+void rocksdb_compactoptions_set_target_level(rocksdb_compactoptions_t* opt,
+                                             int n) {
+  opt->rep.target_level = n;
+}
+
+int rocksdb_compactoptions_get_target_level(rocksdb_compactoptions_t* opt) {
+  return opt->rep.target_level;
+}
+
+void rocksdb_compactoptions_set_full_history_ts_low(
+    rocksdb_compactoptions_t* opt, char* ts, size_t tslen) {
+  if (ts == nullptr) {
+    opt->full_history_ts_low = Slice();
+    opt->rep.full_history_ts_low = nullptr;
+  } else {
+    opt->full_history_ts_low = Slice(ts, tslen);
+    opt->rep.full_history_ts_low = &opt->full_history_ts_low;
+  }
+}
+
+rocksdb_flushoptions_t* rocksdb_flushoptions_create() {
+  return new rocksdb_flushoptions_t;
+}
+
+void rocksdb_flushoptions_destroy(rocksdb_flushoptions_t* opt) { delete opt; }
+
+void rocksdb_flushoptions_set_wait(rocksdb_flushoptions_t* opt,
+                                   unsigned char v) {
+  opt->rep.wait = v;
+}
+
+unsigned char rocksdb_flushoptions_get_wait(rocksdb_flushoptions_t* opt) {
+  return opt->rep.wait;
+}
+
+rocksdb_memory_allocator_t* rocksdb_jemalloc_nodump_allocator_create(
+    char** errptr) {
+  rocksdb_memory_allocator_t* allocator = new rocksdb_memory_allocator_t;
+  ROCKSDB_NAMESPACE::JemallocAllocatorOptions options;
+  SaveError(errptr, ROCKSDB_NAMESPACE::NewJemallocNodumpAllocator(
+                        options, &allocator->rep));
+  return allocator;
+}
+
+void rocksdb_memory_allocator_destroy(rocksdb_memory_allocator_t* allocator) {
+  delete allocator;
+}
+
+rocksdb_lru_cache_options_t* rocksdb_lru_cache_options_create() {
+  return new rocksdb_lru_cache_options_t;
+}
+
+void rocksdb_lru_cache_options_destroy(rocksdb_lru_cache_options_t* opt) {
+  delete opt;
+}
+
+void rocksdb_lru_cache_options_set_capacity(rocksdb_lru_cache_options_t* opt,
+                                            size_t capacity) {
+  opt->rep.capacity = capacity;
+}
+
+void rocksdb_lru_cache_options_set_num_shard_bits(
+    rocksdb_lru_cache_options_t* opt, int num_shard_bits) {
+  opt->rep.num_shard_bits = num_shard_bits;
+}
+
+void rocksdb_lru_cache_options_set_memory_allocator(
+    rocksdb_lru_cache_options_t* opt, rocksdb_memory_allocator_t* allocator) {
+  opt->rep.memory_allocator = allocator->rep;
+}
+
+rocksdb_cache_t* rocksdb_cache_create_lru(size_t capacity) {
+  rocksdb_cache_t* c = new rocksdb_cache_t;
+  c->rep = NewLRUCache(capacity);
+  return c;
+}
+
+rocksdb_cache_t* rocksdb_cache_create_lru_with_strict_capacity_limit(
+    size_t capacity) {
+  rocksdb_cache_t* c = new rocksdb_cache_t;
+  c->rep = NewLRUCache(capacity);
+  c->rep->SetStrictCapacityLimit(true);
+  return c;
+}
+
+rocksdb_cache_t* rocksdb_cache_create_lru_opts(
+    const rocksdb_lru_cache_options_t* opt) {
+  rocksdb_cache_t* c = new rocksdb_cache_t;
+  c->rep = NewLRUCache(opt->rep);
+  return c;
+}
+
+rocksdb_hyper_clock_cache_options_t* rocksdb_hyper_clock_cache_options_create(
+    size_t capacity, size_t estimated_entry_charge) {
+  return new rocksdb_hyper_clock_cache_options_t{
+      HyperClockCacheOptions(capacity, estimated_entry_charge)};
+}
+
+void rocksdb_hyper_clock_cache_options_destroy(
+    rocksdb_hyper_clock_cache_options_t* opt) {
+  delete opt;
+}
+
+void rocksdb_hyper_clock_cache_options_set_capacity(
+    rocksdb_hyper_clock_cache_options_t* opts, size_t capacity) {
+  opts->rep.capacity = capacity;
+}
+
+void rocksdb_hyper_clock_cache_options_set_estimated_entry_charge(
+    rocksdb_hyper_clock_cache_options_t* opts, size_t estimated_entry_charge) {
+  opts->rep.estimated_entry_charge = estimated_entry_charge;
+}
+
+void rocksdb_hyper_clock_cache_options_set_num_shard_bits(
+    rocksdb_hyper_clock_cache_options_t* opts, int num_shard_bits) {
+  opts->rep.num_shard_bits = num_shard_bits;
+}
+
+void rocksdb_hyper_clock_cache_options_set_memory_allocator(
+    rocksdb_hyper_clock_cache_options_t* opts,
+    rocksdb_memory_allocator_t* memory_allocator) {
+  opts->rep.memory_allocator = memory_allocator->rep;
+}
+
+rocksdb_cache_t* rocksdb_cache_create_hyper_clock(
+    size_t capacity, size_t estimated_entry_charge) {
+  HyperClockCacheOptions opts(capacity, estimated_entry_charge);
+  rocksdb_cache_t* c = new rocksdb_cache_t;
+  c->rep = opts.MakeSharedCache();
+  return c;
+}
+
+rocksdb_cache_t* rocksdb_cache_create_hyper_clock_opts(
+    const rocksdb_hyper_clock_cache_options_t* opts) {
+  rocksdb_cache_t* c = new rocksdb_cache_t;
+  c->rep = opts->rep.MakeSharedCache();
+  return c;
+}
+
+void rocksdb_cache_destroy(rocksdb_cache_t* cache) { delete cache; }
+
+void rocksdb_cache_disown_data(rocksdb_cache_t* cache) {
+  cache->rep->DisownData();
+}
+
+void rocksdb_cache_set_capacity(rocksdb_cache_t* cache, size_t capacity) {
+  cache->rep->SetCapacity(capacity);
+}
+
+size_t rocksdb_cache_get_capacity(const rocksdb_cache_t* cache) {
+  return cache->rep->GetCapacity();
+}
+
+size_t rocksdb_cache_get_usage(const rocksdb_cache_t* cache) {
+  return cache->rep->GetUsage();
+}
+
+size_t rocksdb_cache_get_pinned_usage(const rocksdb_cache_t* cache) {
+  return cache->rep->GetPinnedUsage();
+}
+
+size_t rocksdb_cache_get_table_address_count(const rocksdb_cache_t* cache) {
+  return cache->rep->GetTableAddressCount();
+}
+
+size_t rocksdb_cache_get_occupancy_count(const rocksdb_cache_t* cache) {
+  return cache->rep->GetOccupancyCount();
+}
+
+rocksdb_dbpath_t* rocksdb_dbpath_create(const char* path,
+                                        uint64_t target_size) {
+  rocksdb_dbpath_t* result = new rocksdb_dbpath_t;
+  result->rep.path = std::string(path);
+  result->rep.target_size = target_size;
+  return result;
+}
+
+void rocksdb_dbpath_destroy(rocksdb_dbpath_t* dbpath) { delete dbpath; }
+
+rocksdb_env_t* rocksdb_create_default_env() {
+  rocksdb_env_t* result = new rocksdb_env_t;
+  result->rep = Env::Default();
+  result->is_default = true;
+  return result;
+}
+
+rocksdb_env_t* rocksdb_create_mem_env() {
+  rocksdb_env_t* result = new rocksdb_env_t;
+  result->rep = ROCKSDB_NAMESPACE::NewMemEnv(Env::Default());
+  result->is_default = false;
+  return result;
+}
+
+void rocksdb_env_set_background_threads(rocksdb_env_t* env, int n) {
+  env->rep->SetBackgroundThreads(n);
+}
+
+int rocksdb_env_get_background_threads(rocksdb_env_t* env) {
+  return env->rep->GetBackgroundThreads();
+}
+
+void rocksdb_env_set_bottom_priority_background_threads(rocksdb_env_t* env,
+                                                        int n) {
+  env->rep->SetBackgroundThreads(n, Env::BOTTOM);
+}
+
+int rocksdb_env_get_bottom_priority_background_threads(rocksdb_env_t* env) {
+  return env->rep->GetBackgroundThreads(Env::BOTTOM);
+}
+
+void rocksdb_env_set_high_priority_background_threads(rocksdb_env_t* env,
+                                                      int n) {
+  env->rep->SetBackgroundThreads(n, Env::HIGH);
+}
+
+int rocksdb_env_get_high_priority_background_threads(rocksdb_env_t* env) {
+  return env->rep->GetBackgroundThreads(Env::HIGH);
+}
+
+void rocksdb_env_set_low_priority_background_threads(rocksdb_env_t* env,
+                                                     int n) {
+  env->rep->SetBackgroundThreads(n, Env::LOW);
+}
+
+int rocksdb_env_get_low_priority_background_threads(rocksdb_env_t* env) {
+  return env->rep->GetBackgroundThreads(Env::LOW);
+}
+
+void rocksdb_env_join_all_threads(rocksdb_env_t* env) {
+  env->rep->WaitForJoin();
+}
+
+void rocksdb_env_lower_thread_pool_io_priority(rocksdb_env_t* env) {
+  env->rep->LowerThreadPoolIOPriority();
+}
+
+void rocksdb_env_lower_high_priority_thread_pool_io_priority(
+    rocksdb_env_t* env) {
+  env->rep->LowerThreadPoolIOPriority(Env::HIGH);
+}
+
+void rocksdb_env_lower_thread_pool_cpu_priority(rocksdb_env_t* env) {
+  env->rep->LowerThreadPoolCPUPriority();
+}
+
+void rocksdb_env_lower_high_priority_thread_pool_cpu_priority(
+    rocksdb_env_t* env) {
+  env->rep->LowerThreadPoolCPUPriority(Env::HIGH);
+}
+
+void rocksdb_env_destroy(rocksdb_env_t* env) {
+  if (!env->is_default) delete env->rep;
+  delete env;
+}
+
+rocksdb_envoptions_t* rocksdb_envoptions_create() {
+  rocksdb_envoptions_t* opt = new rocksdb_envoptions_t;
+  return opt;
+}
+
+void rocksdb_envoptions_destroy(rocksdb_envoptions_t* opt) { delete opt; }
+
+rocksdb_sstfilewriter_t* rocksdb_sstfilewriter_create(
+    const rocksdb_envoptions_t* env, const rocksdb_options_t* io_options) {
+  rocksdb_sstfilewriter_t* writer = new rocksdb_sstfilewriter_t;
+  writer->rep = new SstFileWriter(env->rep, io_options->rep);
+  return writer;
+}
+
+void rocksdb_create_dir_if_missing(rocksdb_env_t* env, const char* path,
+                                   char** errptr) {
+  SaveError(errptr, env->rep->CreateDirIfMissing(std::string(path)));
+}
+
+rocksdb_sstfilewriter_t* rocksdb_sstfilewriter_create_with_comparator(
+    const rocksdb_envoptions_t* env, const rocksdb_options_t* io_options,
+    const rocksdb_comparator_t* /*comparator*/) {
+  rocksdb_sstfilewriter_t* writer = new rocksdb_sstfilewriter_t;
+  writer->rep = new SstFileWriter(env->rep, io_options->rep);
+  return writer;
+}
+
+void rocksdb_sstfilewriter_open(rocksdb_sstfilewriter_t* writer,
+                                const char* name, char** errptr) {
+  SaveError(errptr, writer->rep->Open(std::string(name)));
+}
+
+void rocksdb_sstfilewriter_add(rocksdb_sstfilewriter_t* writer, const char* key,
+                               size_t keylen, const char* val, size_t vallen,
+                               char** errptr) {
+  SaveError(errptr, writer->rep->Put(Slice(key, keylen), Slice(val, vallen)));
+}
+
+void rocksdb_sstfilewriter_put(rocksdb_sstfilewriter_t* writer, const char* key,
+                               size_t keylen, const char* val, size_t vallen,
+                               char** errptr) {
+  SaveError(errptr, writer->rep->Put(Slice(key, keylen), Slice(val, vallen)));
+}
+
+void rocksdb_sstfilewriter_put_with_ts(rocksdb_sstfilewriter_t* writer,
+                                       const char* key, size_t keylen,
+                                       const char* ts, size_t tslen,
+                                       const char* val, size_t vallen,
+                                       char** errptr) {
+  SaveError(errptr, writer->rep->Put(Slice(key, keylen), Slice(ts, tslen),
+                                     Slice(val, vallen)));
+}
+
+void rocksdb_sstfilewriter_merge(rocksdb_sstfilewriter_t* writer,
+                                 const char* key, size_t keylen,
+                                 const char* val, size_t vallen,
+                                 char** errptr) {
+  SaveError(errptr, writer->rep->Merge(Slice(key, keylen), Slice(val, vallen)));
+}
+
+void rocksdb_sstfilewriter_delete(rocksdb_sstfilewriter_t* writer,
+                                  const char* key, size_t keylen,
+                                  char** errptr) {
+  SaveError(errptr, writer->rep->Delete(Slice(key, keylen)));
+}
+
+void rocksdb_sstfilewriter_delete_with_ts(rocksdb_sstfilewriter_t* writer,
+                                          const char* key, size_t keylen,
+                                          const char* ts, size_t tslen,
+                                          char** errptr) {
+  SaveError(errptr, writer->rep->Delete(Slice(key, keylen), Slice(ts, tslen)));
+}
+
+void rocksdb_sstfilewriter_delete_range(rocksdb_sstfilewriter_t* writer,
+                                        const char* begin_key,
+                                        size_t begin_keylen,
+                                        const char* end_key, size_t end_keylen,
+                                        char** errptr) {
+  SaveError(errptr, writer->rep->DeleteRange(Slice(begin_key, begin_keylen),
+                                             Slice(end_key, end_keylen)));
+}
+
+void rocksdb_sstfilewriter_finish(rocksdb_sstfilewriter_t* writer,
+                                  char** errptr) {
+  SaveError(errptr, writer->rep->Finish(nullptr));
+}
+
+void rocksdb_sstfilewriter_file_size(rocksdb_sstfilewriter_t* writer,
+                                     uint64_t* file_size) {
+  *file_size = writer->rep->FileSize();
+}
+
+void rocksdb_sstfilewriter_destroy(rocksdb_sstfilewriter_t* writer) {
+  delete writer->rep;
+  delete writer;
+}
+
+rocksdb_ingestexternalfileoptions_t*
+rocksdb_ingestexternalfileoptions_create() {
+  rocksdb_ingestexternalfileoptions_t* opt =
+      new rocksdb_ingestexternalfileoptions_t;
+  return opt;
+}
+
+void rocksdb_ingestexternalfileoptions_set_move_files(
+    rocksdb_ingestexternalfileoptions_t* opt, unsigned char move_files) {
+  opt->rep.move_files = move_files;
+}
+
+void rocksdb_ingestexternalfileoptions_set_snapshot_consistency(
+    rocksdb_ingestexternalfileoptions_t* opt,
+    unsigned char snapshot_consistency) {
+  opt->rep.snapshot_consistency = snapshot_consistency;
+}
+
+void rocksdb_ingestexternalfileoptions_set_allow_global_seqno(
+    rocksdb_ingestexternalfileoptions_t* opt,
+    unsigned char allow_global_seqno) {
+  opt->rep.allow_global_seqno = allow_global_seqno;
+}
+
+void rocksdb_ingestexternalfileoptions_set_allow_blocking_flush(
+    rocksdb_ingestexternalfileoptions_t* opt,
+    unsigned char allow_blocking_flush) {
+  opt->rep.allow_blocking_flush = allow_blocking_flush;
+}
+
+void rocksdb_ingestexternalfileoptions_set_ingest_behind(
+    rocksdb_ingestexternalfileoptions_t* opt, unsigned char ingest_behind) {
+  opt->rep.ingest_behind = ingest_behind;
+}
+
+void rocksdb_ingestexternalfileoptions_set_fail_if_not_bottommost_level(
+    rocksdb_ingestexternalfileoptions_t* opt,
+    unsigned char fail_if_not_bottommost_level) {
+  opt->rep.fail_if_not_bottommost_level = fail_if_not_bottommost_level;
+}
+
+void rocksdb_ingestexternalfileoptions_destroy(
+    rocksdb_ingestexternalfileoptions_t* opt) {
+  delete opt;
+}
+
+void rocksdb_ingest_external_file(
+    rocksdb_t* db, const char* const* file_list, const size_t list_len,
+    const rocksdb_ingestexternalfileoptions_t* opt, char** errptr) {
+  std::vector<std::string> files(list_len);
+  for (size_t i = 0; i < list_len; ++i) {
+    files[i] = std::string(file_list[i]);
+  }
+  SaveError(errptr, db->rep->IngestExternalFile(files, opt->rep));
+}
+
+void rocksdb_ingest_external_file_cf(
+    rocksdb_t* db, rocksdb_column_family_handle_t* handle,
+    const char* const* file_list, const size_t list_len,
+    const rocksdb_ingestexternalfileoptions_t* opt, char** errptr) {
+  std::vector<std::string> files(list_len);
+  for (size_t i = 0; i < list_len; ++i) {
+    files[i] = std::string(file_list[i]);
+  }
+  SaveError(errptr, db->rep->IngestExternalFile(handle->rep, files, opt->rep));
+}
+
+void rocksdb_try_catch_up_with_primary(rocksdb_t* db, char** errptr) {
+  SaveError(errptr, db->rep->TryCatchUpWithPrimary());
+}
+
+rocksdb_slicetransform_t* rocksdb_slicetransform_create(
+    void* state, void (*destructor)(void*),
+    char* (*transform)(void*, const char* key, size_t length,
+                       size_t* dst_length),
+    unsigned char (*in_domain)(void*, const char* key, size_t length),
+    unsigned char (*in_range)(void*, const char* key, size_t length),
+    const char* (*name)(void*)) {
+  rocksdb_slicetransform_t* result = new rocksdb_slicetransform_t;
+  result->state_ = state;
+  result->destructor_ = destructor;
+  result->transform_ = transform;
+  result->in_domain_ = in_domain;
+  result->in_range_ = in_range;
+  result->name_ = name;
+  return result;
+}
+
+void rocksdb_slicetransform_destroy(rocksdb_slicetransform_t* st) { delete st; }
+
+struct SliceTransformWrapper : public rocksdb_slicetransform_t {
+  const SliceTransform* rep_;
+  ~SliceTransformWrapper() override { delete rep_; }
+  const char* Name() const override { return rep_->Name(); }
+  std::string GetId() const override { return rep_->GetId(); }
+  Slice Transform(const Slice& src) const override {
+    return rep_->Transform(src);
+  }
+  bool InDomain(const Slice& src) const override { return rep_->InDomain(src); }
+  bool InRange(const Slice& src) const override { return rep_->InRange(src); }
+  static void DoNothing(void*) {}
+};
+
+rocksdb_slicetransform_t* rocksdb_slicetransform_create_fixed_prefix(
+    size_t prefixLen) {
+  SliceTransformWrapper* wrapper = new SliceTransformWrapper;
+  wrapper->rep_ = ROCKSDB_NAMESPACE::NewFixedPrefixTransform(prefixLen);
+  wrapper->state_ = nullptr;
+  wrapper->destructor_ = &SliceTransformWrapper::DoNothing;
+  return wrapper;
+}
+
+rocksdb_slicetransform_t* rocksdb_slicetransform_create_noop() {
+  SliceTransformWrapper* wrapper = new SliceTransformWrapper;
+  wrapper->rep_ = ROCKSDB_NAMESPACE::NewNoopTransform();
+  wrapper->state_ = nullptr;
+  wrapper->destructor_ = &SliceTransformWrapper::DoNothing;
+  return wrapper;
+}
+
+rocksdb_universal_compaction_options_t*
+rocksdb_universal_compaction_options_create() {
+  rocksdb_universal_compaction_options_t* result =
+      new rocksdb_universal_compaction_options_t;
+  result->rep = new ROCKSDB_NAMESPACE::CompactionOptionsUniversal;
+  return result;
+}
+
+void rocksdb_universal_compaction_options_set_size_ratio(
+    rocksdb_universal_compaction_options_t* uco, int ratio) {
+  uco->rep->size_ratio = ratio;
+}
+
+int rocksdb_universal_compaction_options_get_size_ratio(
+    rocksdb_universal_compaction_options_t* uco) {
+  return uco->rep->size_ratio;
+}
+
+void rocksdb_universal_compaction_options_set_min_merge_width(
+    rocksdb_universal_compaction_options_t* uco, int w) {
+  uco->rep->min_merge_width = w;
+}
+
+int rocksdb_universal_compaction_options_get_min_merge_width(
+    rocksdb_universal_compaction_options_t* uco) {
+  return uco->rep->min_merge_width;
+}
+
+void rocksdb_universal_compaction_options_set_max_merge_width(
+    rocksdb_universal_compaction_options_t* uco, int w) {
+  uco->rep->max_merge_width = w;
+}
+
+int rocksdb_universal_compaction_options_get_max_merge_width(
+    rocksdb_universal_compaction_options_t* uco) {
+  return uco->rep->max_merge_width;
+}
+
+void rocksdb_universal_compaction_options_set_max_size_amplification_percent(
+    rocksdb_universal_compaction_options_t* uco, int p) {
+  uco->rep->max_size_amplification_percent = p;
+}
+
+int rocksdb_universal_compaction_options_get_max_size_amplification_percent(
+    rocksdb_universal_compaction_options_t* uco) {
+  return uco->rep->max_size_amplification_percent;
+}
+
+void rocksdb_universal_compaction_options_set_compression_size_percent(
+    rocksdb_universal_compaction_options_t* uco, int p) {
+  uco->rep->compression_size_percent = p;
+}
+
+int rocksdb_universal_compaction_options_get_compression_size_percent(
+    rocksdb_universal_compaction_options_t* uco) {
+  return uco->rep->compression_size_percent;
+}
+
+void rocksdb_universal_compaction_options_set_stop_style(
+    rocksdb_universal_compaction_options_t* uco, int style) {
+  uco->rep->stop_style =
+      static_cast<ROCKSDB_NAMESPACE::CompactionStopStyle>(style);
+}
+
+int rocksdb_universal_compaction_options_get_stop_style(
+    rocksdb_universal_compaction_options_t* uco) {
+  return static_cast<int>(uco->rep->stop_style);
+}
+
+void rocksdb_universal_compaction_options_destroy(
+    rocksdb_universal_compaction_options_t* uco) {
+  delete uco->rep;
+  delete uco;
+}
+
+rocksdb_fifo_compaction_options_t* rocksdb_fifo_compaction_options_create() {
+  rocksdb_fifo_compaction_options_t* result =
+      new rocksdb_fifo_compaction_options_t;
+  result->rep = CompactionOptionsFIFO();
+  return result;
+}
+
+void rocksdb_fifo_compaction_options_set_allow_compaction(
+    rocksdb_fifo_compaction_options_t* fifo_opts, unsigned char allow_compaction) {
+  fifo_opts->rep.allow_compaction = allow_compaction;
+}
+
+unsigned char rocksdb_fifo_compaction_options_get_allow_compaction(
+    rocksdb_fifo_compaction_options_t* fifo_opts) {
+  return fifo_opts->rep.allow_compaction;
+}
+
+void rocksdb_fifo_compaction_options_set_max_table_files_size(
+    rocksdb_fifo_compaction_options_t* fifo_opts, uint64_t size) {
+  fifo_opts->rep.max_table_files_size = size;
+}
+
+uint64_t rocksdb_fifo_compaction_options_get_max_table_files_size(
+    rocksdb_fifo_compaction_options_t* fifo_opts) {
+  return fifo_opts->rep.max_table_files_size;
+}
+
+void rocksdb_fifo_compaction_options_destroy(
+    rocksdb_fifo_compaction_options_t* fifo_opts) {
+  delete fifo_opts;
+}
+
+void rocksdb_options_set_min_level_to_compress(rocksdb_options_t* opt,
+                                               int level) {
+  if (level >= 0) {
+    assert(level <= opt->rep.num_levels);
+    opt->rep.compression_per_level.resize(opt->rep.num_levels);
+    for (int i = 0; i < level; i++) {
+      opt->rep.compression_per_level[i] = ROCKSDB_NAMESPACE::kNoCompression;
+    }
+    for (int i = level; i < opt->rep.num_levels; i++) {
+      opt->rep.compression_per_level[i] = opt->rep.compression;
+    }
+  }
+}
+
+int rocksdb_livefiles_count(const rocksdb_livefiles_t* lf) {
+  return static_cast<int>(lf->rep.size());
+}
+
+const char* rocksdb_livefiles_column_family_name(const rocksdb_livefiles_t* lf,
+                                                 int index) {
+  return lf->rep[index].column_family_name.c_str();
+}
+
+const char* rocksdb_livefiles_name(const rocksdb_livefiles_t* lf, int index) {
+  return lf->rep[index].name.c_str();
+}
+
+int rocksdb_livefiles_level(const rocksdb_livefiles_t* lf, int index) {
+  return lf->rep[index].level;
+}
+
+size_t rocksdb_livefiles_size(const rocksdb_livefiles_t* lf, int index) {
+  return lf->rep[index].size;
+}
+
+const char* rocksdb_livefiles_smallestkey(const rocksdb_livefiles_t* lf,
+                                          int index, size_t* size) {
+  *size = lf->rep[index].smallestkey.size();
+  return lf->rep[index].smallestkey.data();
+}
+
+const char* rocksdb_livefiles_largestkey(const rocksdb_livefiles_t* lf,
+                                         int index, size_t* size) {
+  *size = lf->rep[index].largestkey.size();
+  return lf->rep[index].largestkey.data();
+}
+
+uint64_t rocksdb_livefiles_entries(const rocksdb_livefiles_t* lf, int index) {
+  return lf->rep[index].num_entries;
+}
+
+uint64_t rocksdb_livefiles_deletions(const rocksdb_livefiles_t* lf, int index) {
+  return lf->rep[index].num_deletions;
+}
+
+extern void rocksdb_livefiles_destroy(const rocksdb_livefiles_t* lf) {
+  delete lf;
+}
+
+void rocksdb_get_options_from_string(const rocksdb_options_t* base_options,
+                                     const char* opts_str,
+                                     rocksdb_options_t* new_options,
+                                     char** errptr) {
+  SaveError(errptr,
+            GetOptionsFromString(base_options->rep, std::string(opts_str),
+                                 &new_options->rep));
+}
+
+void rocksdb_delete_file_in_range(rocksdb_t* db, const char* start_key,
+                                  size_t start_key_len, const char* limit_key,
+                                  size_t limit_key_len, char** errptr) {
+  Slice a, b;
+  SaveError(
+      errptr,
+      DeleteFilesInRange(
+          db->rep, db->rep->DefaultColumnFamily(),
+          (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr),
+          (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr)));
+}
+
+void rocksdb_delete_file_in_range_cf(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
+    const char* start_key, size_t start_key_len, const char* limit_key,
+    size_t limit_key_len, char** errptr) {
+  Slice a, b;
+  SaveError(
+      errptr,
+      DeleteFilesInRange(
+          db->rep, column_family->rep,
+          (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr),
+          (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr)));
+}
+
+/* MetaData */
+
+rocksdb_column_family_metadata_t* rocksdb_get_column_family_metadata(
+    rocksdb_t* db) {
+  rocksdb_column_family_metadata_t* meta = new rocksdb_column_family_metadata_t;
+  db->rep->GetColumnFamilyMetaData(&meta->rep);
+  return meta;
+}
+
+rocksdb_column_family_metadata_t* rocksdb_get_column_family_metadata_cf(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family) {
+  rocksdb_column_family_metadata_t* meta = new rocksdb_column_family_metadata_t;
+  db->rep->GetColumnFamilyMetaData(column_family->rep, &meta->rep);
+  return meta;
+}
+
+void rocksdb_column_family_metadata_destroy(
+    rocksdb_column_family_metadata_t* cf_meta) {
+  delete cf_meta;
+}
+
+uint64_t rocksdb_column_family_metadata_get_size(
+    rocksdb_column_family_metadata_t* cf_meta) {
+  return cf_meta->rep.size;
+}
+
+size_t rocksdb_column_family_metadata_get_file_count(
+    rocksdb_column_family_metadata_t* cf_meta) {
+  return cf_meta->rep.file_count;
+}
+
+char* rocksdb_column_family_metadata_get_name(
+    rocksdb_column_family_metadata_t* cf_meta) {
+  return strdup(cf_meta->rep.name.c_str());
+}
+
+size_t rocksdb_column_family_metadata_get_level_count(
+    rocksdb_column_family_metadata_t* cf_meta) {
+  return cf_meta->rep.levels.size();
+}
+
+rocksdb_level_metadata_t* rocksdb_column_family_metadata_get_level_metadata(
+    rocksdb_column_family_metadata_t* cf_meta, size_t i) {
+  if (i >= cf_meta->rep.levels.size()) {
+    return NULL;
+  }
+  rocksdb_level_metadata_t* level_meta =
+      (rocksdb_level_metadata_t*)malloc(sizeof(rocksdb_level_metadata_t));
+  level_meta->rep = &cf_meta->rep.levels[i];
+
+  return level_meta;
+}
+
+void rocksdb_level_metadata_destroy(rocksdb_level_metadata_t* level_meta) {
+  // Only free the base pointer as its parent rocksdb_column_family_metadata_t
+  // has the ownership of its rep.
+  free(level_meta);
+}
+
+int rocksdb_level_metadata_get_level(rocksdb_level_metadata_t* level_meta) {
+  return level_meta->rep->level;
+}
+
+uint64_t rocksdb_level_metadata_get_size(rocksdb_level_metadata_t* level_meta) {
+  return level_meta->rep->size;
+}
+
+size_t rocksdb_level_metadata_get_file_count(
+    rocksdb_level_metadata_t* level_meta) {
+  return level_meta->rep->files.size();
+}
+
+rocksdb_sst_file_metadata_t* rocksdb_level_metadata_get_sst_file_metadata(
+    rocksdb_level_metadata_t* level_meta, size_t i) {
+  if (i >= level_meta->rep->files.size()) {
+    return nullptr;
+  }
+  rocksdb_sst_file_metadata_t* file_meta =
+      (rocksdb_sst_file_metadata_t*)malloc(sizeof(rocksdb_sst_file_metadata_t));
+  file_meta->rep = &level_meta->rep->files[i];
+  return file_meta;
+}
+
+void rocksdb_sst_file_metadata_destroy(rocksdb_sst_file_metadata_t* file_meta) {
+  // Only free the base pointer as its parent rocksdb_level_metadata_t
+  // has the ownership of its rep.
+  free(file_meta);
+}
+
+char* rocksdb_sst_file_metadata_get_relative_filename(
+    rocksdb_sst_file_metadata_t* file_meta) {
+  return strdup(file_meta->rep->relative_filename.c_str());
+}
+
+uint64_t rocksdb_sst_file_metadata_get_size(
+    rocksdb_sst_file_metadata_t* file_meta) {
+  return file_meta->rep->size;
+}
+
+char* rocksdb_sst_file_metadata_get_smallestkey(
+    rocksdb_sst_file_metadata_t* file_meta, size_t* key_len) {
+  *key_len = file_meta->rep->smallestkey.size();
+  return CopyString(file_meta->rep->smallestkey);
+}
+
+char* rocksdb_sst_file_metadata_get_largestkey(
+    rocksdb_sst_file_metadata_t* file_meta, size_t* key_len) {
+  *key_len = file_meta->rep->largestkey.size();
+  return CopyString(file_meta->rep->largestkey);
+}
+
+/* Transactions */
+
+rocksdb_transactiondb_options_t* rocksdb_transactiondb_options_create() {
+  return new rocksdb_transactiondb_options_t;
+}
+
+void rocksdb_transactiondb_options_destroy(
+    rocksdb_transactiondb_options_t* opt) {
+  delete opt;
+}
+
+void rocksdb_transactiondb_options_set_max_num_locks(
+    rocksdb_transactiondb_options_t* opt, int64_t max_num_locks) {
+  opt->rep.max_num_locks = max_num_locks;
+}
+
+void rocksdb_transactiondb_options_set_num_stripes(
+    rocksdb_transactiondb_options_t* opt, size_t num_stripes) {
+  opt->rep.num_stripes = num_stripes;
+}
+
+void rocksdb_transactiondb_options_set_transaction_lock_timeout(
+    rocksdb_transactiondb_options_t* opt, int64_t txn_lock_timeout) {
+  opt->rep.transaction_lock_timeout = txn_lock_timeout;
+}
+
+void rocksdb_transactiondb_options_set_default_lock_timeout(
+    rocksdb_transactiondb_options_t* opt, int64_t default_lock_timeout) {
+  opt->rep.default_lock_timeout = default_lock_timeout;
+}
+
+rocksdb_transaction_options_t* rocksdb_transaction_options_create() {
+  return new rocksdb_transaction_options_t;
+}
+
+void rocksdb_transaction_options_destroy(rocksdb_transaction_options_t* opt) {
+  delete opt;
+}
+
+void rocksdb_transaction_options_set_set_snapshot(
+    rocksdb_transaction_options_t* opt, unsigned char v) {
+  opt->rep.set_snapshot = v;
+}
+
+void rocksdb_transaction_options_set_deadlock_detect(
+    rocksdb_transaction_options_t* opt, unsigned char v) {
+  opt->rep.deadlock_detect = v;
+}
+
+void rocksdb_transaction_options_set_lock_timeout(
+    rocksdb_transaction_options_t* opt, int64_t lock_timeout) {
+  opt->rep.lock_timeout = lock_timeout;
+}
+
+void rocksdb_transaction_options_set_expiration(
+    rocksdb_transaction_options_t* opt, int64_t expiration) {
+  opt->rep.expiration = expiration;
+}
+
+void rocksdb_transaction_options_set_deadlock_detect_depth(
+    rocksdb_transaction_options_t* opt, int64_t depth) {
+  opt->rep.deadlock_detect_depth = depth;
+}
+
+void rocksdb_transaction_options_set_max_write_batch_size(
+    rocksdb_transaction_options_t* opt, size_t size) {
+  opt->rep.max_write_batch_size = size;
+}
+
+void rocksdb_transaction_options_set_skip_prepare(
+    rocksdb_transaction_options_t* opt, unsigned char v) {
+  opt->rep.skip_prepare = v;
+}
+
+rocksdb_optimistictransaction_options_t*
+rocksdb_optimistictransaction_options_create() {
+  return new rocksdb_optimistictransaction_options_t;
+}
+
+void rocksdb_optimistictransaction_options_destroy(
+    rocksdb_optimistictransaction_options_t* opt) {
+  delete opt;
+}
+
+void rocksdb_optimistictransaction_options_set_set_snapshot(
+    rocksdb_optimistictransaction_options_t* opt, unsigned char v) {
+  opt->rep.set_snapshot = v;
+}
+
+char* rocksdb_optimistictransactiondb_property_value(
+    rocksdb_optimistictransactiondb_t* db, const char* propname) {
+  std::string tmp;
+  if (db->rep->GetProperty(Slice(propname), &tmp)) {
+    // We use strdup() since we expect human readable output.
+    return strdup(tmp.c_str());
+  } else {
+    return nullptr;
+  }
+}
+
+int rocksdb_optimistictransactiondb_property_int(
+    rocksdb_optimistictransactiondb_t* db, const char* propname,
+    uint64_t* out_val) {
+  if (db->rep->GetIntProperty(Slice(propname), out_val)) {
+    return 0;
+  } else {
+    return -1;
+  }
+}
+
+rocksdb_column_family_handle_t* rocksdb_transactiondb_create_column_family(
+    rocksdb_transactiondb_t* txn_db,
+    const rocksdb_options_t* column_family_options,
+    const char* column_family_name, char** errptr) {
+  rocksdb_column_family_handle_t* handle = new rocksdb_column_family_handle_t;
+  SaveError(errptr, txn_db->rep->CreateColumnFamily(
+                        ColumnFamilyOptions(column_family_options->rep),
+                        std::string(column_family_name), &(handle->rep)));
+  return handle;
+}
+
+rocksdb_transactiondb_t* rocksdb_transactiondb_open(
+    const rocksdb_options_t* options,
+    const rocksdb_transactiondb_options_t* txn_db_options, const char* name,
+    char** errptr) {
+  TransactionDB* txn_db;
+  if (SaveError(errptr, TransactionDB::Open(options->rep, txn_db_options->rep,
+                                            std::string(name), &txn_db))) {
+    return nullptr;
+  }
+  rocksdb_transactiondb_t* result = new rocksdb_transactiondb_t;
+  result->rep = txn_db;
+  return result;
+}
+
+rocksdb_transactiondb_t* rocksdb_transactiondb_open_column_families(
+    const rocksdb_options_t* options,
+    const rocksdb_transactiondb_options_t* txn_db_options, const char* name,
+    int num_column_families, const char* const* column_family_names,
+    const rocksdb_options_t* const* column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles, char** errptr) {
+  std::vector<ColumnFamilyDescriptor> column_families;
+  for (int i = 0; i < num_column_families; i++) {
+    column_families.push_back(ColumnFamilyDescriptor(
+        std::string(column_family_names[i]),
+        ColumnFamilyOptions(column_family_options[i]->rep)));
+  }
+
+  TransactionDB* txn_db;
+  std::vector<ColumnFamilyHandle*> handles;
+  if (SaveError(errptr, TransactionDB::Open(options->rep, txn_db_options->rep,
+                                            std::string(name), column_families,
+                                            &handles, &txn_db))) {
+    return nullptr;
+  }
+
+  for (size_t i = 0; i < handles.size(); i++) {
+    rocksdb_column_family_handle_t* c_handle =
+        new rocksdb_column_family_handle_t;
+    c_handle->rep = handles[i];
+    column_family_handles[i] = c_handle;
+  }
+  rocksdb_transactiondb_t* result = new rocksdb_transactiondb_t;
+  result->rep = txn_db;
+  return result;
+}
+
+const rocksdb_snapshot_t* rocksdb_transactiondb_create_snapshot(
+    rocksdb_transactiondb_t* txn_db) {
+  rocksdb_snapshot_t* result = new rocksdb_snapshot_t;
+  result->rep = txn_db->rep->GetSnapshot();
+  return result;
+}
+
+void rocksdb_transactiondb_release_snapshot(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_snapshot_t* snapshot) {
+  txn_db->rep->ReleaseSnapshot(snapshot->rep);
+  delete snapshot;
+}
+
+char* rocksdb_transactiondb_property_value(rocksdb_transactiondb_t* db,
+                                           const char* propname) {
+  std::string tmp;
+  if (db->rep->GetProperty(Slice(propname), &tmp)) {
+    // We use strdup() since we expect human readable output.
+    return strdup(tmp.c_str());
+  } else {
+    return nullptr;
+  }
+}
+
+int rocksdb_transactiondb_property_int(rocksdb_transactiondb_t* db,
+                                       const char* propname,
+                                       uint64_t* out_val) {
+  if (db->rep->GetIntProperty(Slice(propname), out_val)) {
+    return 0;
+  } else {
+    return -1;
+  }
+}
+
+rocksdb_transaction_t* rocksdb_transaction_begin(
+    rocksdb_transactiondb_t* txn_db,
+    const rocksdb_writeoptions_t* write_options,
+    const rocksdb_transaction_options_t* txn_options,
+    rocksdb_transaction_t* old_txn) {
+  if (old_txn == nullptr) {
+    rocksdb_transaction_t* result = new rocksdb_transaction_t;
+    result->rep = txn_db->rep->BeginTransaction(write_options->rep,
+                                                txn_options->rep, nullptr);
+    return result;
+  }
+  old_txn->rep = txn_db->rep->BeginTransaction(write_options->rep,
+                                               txn_options->rep, old_txn->rep);
+  return old_txn;
+}
+
+rocksdb_transaction_t** rocksdb_transactiondb_get_prepared_transactions(
+    rocksdb_transactiondb_t* txn_db, size_t* cnt) {
+  std::vector<Transaction*> txns;
+  txn_db->rep->GetAllPreparedTransactions(&txns);
+  *cnt = txns.size();
+  if (txns.empty()) {
+    return nullptr;
+  } else {
+    rocksdb_transaction_t** buf = (rocksdb_transaction_t**)malloc(
+        txns.size() * sizeof(rocksdb_transaction_t*));
+    for (size_t i = 0; i < txns.size(); i++) {
+      buf[i] = new rocksdb_transaction_t;
+      buf[i]->rep = txns[i];
+    }
+    return buf;
+  }
+}
+
+void rocksdb_transaction_set_name(rocksdb_transaction_t* txn, const char* name,
+                                  size_t name_len, char** errptr) {
+  std::string str = std::string(name, name_len);
+  SaveError(errptr, txn->rep->SetName(str));
+}
+
+char* rocksdb_transaction_get_name(rocksdb_transaction_t* txn,
+                                   size_t* name_len) {
+  auto name = txn->rep->GetName();
+  *name_len = name.size();
+  return CopyString(name);
+}
+
+void rocksdb_transaction_prepare(rocksdb_transaction_t* txn, char** errptr) {
+  SaveError(errptr, txn->rep->Prepare());
+}
+
+rocksdb_writebatch_wi_t* rocksdb_transaction_get_writebatch_wi(
+    rocksdb_transaction_t* txn) {
+  rocksdb_writebatch_wi_t* wi =
+      (rocksdb_writebatch_wi_t*)malloc(sizeof(rocksdb_writebatch_wi_t));
+  wi->rep = txn->rep->GetWriteBatch();
+
+  return wi;
+}
+
+void rocksdb_transaction_rebuild_from_writebatch(
+    rocksdb_transaction_t* txn, rocksdb_writebatch_t* writebatch,
+    char** errptr) {
+  SaveError(errptr, txn->rep->RebuildFromWriteBatch(&writebatch->rep));
+}
+
+void rocksdb_transaction_rebuild_from_writebatch_wi(rocksdb_transaction_t* txn,
+                                                    rocksdb_writebatch_wi_t* wi,
+                                                    char** errptr) {
+  SaveError(errptr, txn->rep->RebuildFromWriteBatch(wi->rep->GetWriteBatch()));
+}
+
+void rocksdb_transaction_commit(rocksdb_transaction_t* txn, char** errptr) {
+  SaveError(errptr, txn->rep->Commit());
+}
+
+void rocksdb_transaction_rollback(rocksdb_transaction_t* txn, char** errptr) {
+  SaveError(errptr, txn->rep->Rollback());
+}
+
+void rocksdb_transaction_set_savepoint(rocksdb_transaction_t* txn) {
+  txn->rep->SetSavePoint();
+}
+
+void rocksdb_transaction_rollback_to_savepoint(rocksdb_transaction_t* txn,
+                                               char** errptr) {
+  SaveError(errptr, txn->rep->RollbackToSavePoint());
+}
+
+void rocksdb_transaction_destroy(rocksdb_transaction_t* txn) {
+  delete txn->rep;
+  delete txn;
+}
+
+const rocksdb_snapshot_t* rocksdb_transaction_get_snapshot(
+    rocksdb_transaction_t* txn) {
+  // This will be freed later on using free, so use malloc here to avoid a
+  // mismatch
+  rocksdb_snapshot_t* result =
+      (rocksdb_snapshot_t*)malloc(sizeof(rocksdb_snapshot_t));
+  result->rep = txn->rep->GetSnapshot();
+  return result;
+}
+
+// Read a key inside a transaction
+char* rocksdb_transaction_get(rocksdb_transaction_t* txn,
+                              const rocksdb_readoptions_t* options,
+                              const char* key, size_t klen, size_t* vlen,
+                              char** errptr) {
+  char* result = nullptr;
+  std::string tmp;
+  Status s = txn->rep->Get(options->rep, Slice(key, klen), &tmp);
+  if (s.ok()) {
+    *vlen = tmp.size();
+    result = CopyString(tmp);
+  } else {
+    *vlen = 0;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+  }
+  return result;
+}
+
+rocksdb_pinnableslice_t* rocksdb_transaction_get_pinned(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    const char* key, size_t klen, char** errptr) {
+  rocksdb_pinnableslice_t* v = new (rocksdb_pinnableslice_t);
+  Status s = txn->rep->Get(options->rep, Slice(key, klen), &v->rep);
+  if (!s.ok()) {
+    delete (v);
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+    return nullptr;
+  }
+  return v;
+}
+
+char* rocksdb_transaction_get_cf(rocksdb_transaction_t* txn,
+                                 const rocksdb_readoptions_t* options,
+                                 rocksdb_column_family_handle_t* column_family,
+                                 const char* key, size_t klen, size_t* vlen,
+                                 char** errptr) {
+  char* result = nullptr;
+  std::string tmp;
+  Status s =
+      txn->rep->Get(options->rep, column_family->rep, Slice(key, klen), &tmp);
+  if (s.ok()) {
+    *vlen = tmp.size();
+    result = CopyString(tmp);
+  } else {
+    *vlen = 0;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+  }
+  return result;
+}
+
+rocksdb_pinnableslice_t* rocksdb_transaction_get_pinned_cf(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key, size_t klen,
+    char** errptr) {
+  rocksdb_pinnableslice_t* v = new (rocksdb_pinnableslice_t);
+  Status s = txn->rep->Get(options->rep, column_family->rep, Slice(key, klen),
+                           &v->rep);
+  if (!s.ok()) {
+    delete (v);
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+    return nullptr;
+  }
+  return v;
+}
+
+// Read a key inside a transaction
+char* rocksdb_transaction_get_for_update(rocksdb_transaction_t* txn,
+                                         const rocksdb_readoptions_t* options,
+                                         const char* key, size_t klen,
+                                         size_t* vlen, unsigned char exclusive,
+                                         char** errptr) {
+  char* result = nullptr;
+  std::string tmp;
+  Status s =
+      txn->rep->GetForUpdate(options->rep, Slice(key, klen), &tmp, exclusive);
+  if (s.ok()) {
+    *vlen = tmp.size();
+    result = CopyString(tmp);
+  } else {
+    *vlen = 0;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+  }
+  return result;
+}
+
+rocksdb_pinnableslice_t* rocksdb_transaction_get_pinned_for_update(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    const char* key, size_t klen, unsigned char exclusive, char** errptr) {
+  rocksdb_pinnableslice_t* v = new (rocksdb_pinnableslice_t);
+  Status s = txn->rep->GetForUpdate(options->rep, Slice(key, klen),
+                                    v->rep.GetSelf(), exclusive);
+  v->rep.PinSelf();
+  if (!s.ok()) {
+    delete (v);
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+    return nullptr;
+  }
+  return v;
+}
+
+char* rocksdb_transaction_get_for_update_cf(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key, size_t klen,
+    size_t* vlen, unsigned char exclusive, char** errptr) {
+  char* result = nullptr;
+  std::string tmp;
+  Status s = txn->rep->GetForUpdate(options->rep, column_family->rep,
+                                    Slice(key, klen), &tmp, exclusive);
+  if (s.ok()) {
+    *vlen = tmp.size();
+    result = CopyString(tmp);
+  } else {
+    *vlen = 0;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+  }
+  return result;
+}
+
+rocksdb_pinnableslice_t* rocksdb_transaction_get_pinned_for_update_cf(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key, size_t klen,
+    unsigned char exclusive, char** errptr) {
+  rocksdb_pinnableslice_t* v = new (rocksdb_pinnableslice_t);
+  Status s = txn->rep->GetForUpdate(options->rep, column_family->rep,
+                                    Slice(key, klen), &v->rep, exclusive);
+  if (!s.ok()) {
+    delete (v);
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+    return nullptr;
+  }
+  return v;
+}
+
+void rocksdb_transaction_multi_get(rocksdb_transaction_t* txn,
+                                   const rocksdb_readoptions_t* options,
+                                   size_t num_keys,
+                                   const char* const* keys_list,
+                                   const size_t* keys_list_sizes,
+                                   char** values_list,
+                                   size_t* values_list_sizes, char** errs) {
+  std::vector<Slice> keys(num_keys);
+  for (size_t i = 0; i < num_keys; i++) {
+    keys[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  std::vector<std::string> values(num_keys);
+  std::vector<Status> statuses =
+      txn->rep->MultiGet(options->rep, keys, &values);
+  for (size_t i = 0; i < num_keys; i++) {
+    if (statuses[i].ok()) {
+      values_list[i] = CopyString(values[i]);
+      values_list_sizes[i] = values[i].size();
+      errs[i] = nullptr;
+    } else {
+      values_list[i] = nullptr;
+      values_list_sizes[i] = 0;
+      if (!statuses[i].IsNotFound()) {
+        errs[i] = strdup(statuses[i].ToString().c_str());
+      } else {
+        errs[i] = nullptr;
+      }
+    }
+  }
+}
+
+void rocksdb_transaction_multi_get_for_update(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    size_t num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, char** values_list,
+    size_t* values_list_sizes, char** errs) {
+  std::vector<Slice> keys(num_keys);
+  for (size_t i = 0; i < num_keys; i++) {
+    keys[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  std::vector<std::string> values(num_keys);
+  std::vector<Status> statuses =
+      txn->rep->MultiGetForUpdate(options->rep, keys, &values);
+  for (size_t i = 0; i < num_keys; i++) {
+    if (statuses[i].ok()) {
+      values_list[i] = CopyString(values[i]);
+      values_list_sizes[i] = values[i].size();
+      errs[i] = nullptr;
+    } else {
+      values_list[i] = nullptr;
+      values_list_sizes[i] = 0;
+      if (!statuses[i].IsNotFound()) {
+        errs[i] = strdup(statuses[i].ToString().c_str());
+      } else {
+        errs[i] = nullptr;
+      }
+    }
+  }
+}
+
+void rocksdb_transaction_multi_get_cf(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    const rocksdb_column_family_handle_t* const* column_families,
+    size_t num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, char** values_list,
+    size_t* values_list_sizes, char** errs) {
+  std::vector<Slice> keys(num_keys);
+  std::vector<ColumnFamilyHandle*> cfs(num_keys);
+  for (size_t i = 0; i < num_keys; i++) {
+    keys[i] = Slice(keys_list[i], keys_list_sizes[i]);
+    cfs[i] = column_families[i]->rep;
+  }
+  std::vector<std::string> values(num_keys);
+  std::vector<Status> statuses =
+      txn->rep->MultiGet(options->rep, cfs, keys, &values);
+  for (size_t i = 0; i < num_keys; i++) {
+    if (statuses[i].ok()) {
+      values_list[i] = CopyString(values[i]);
+      values_list_sizes[i] = values[i].size();
+      errs[i] = nullptr;
+    } else {
+      values_list[i] = nullptr;
+      values_list_sizes[i] = 0;
+      if (!statuses[i].IsNotFound()) {
+        errs[i] = strdup(statuses[i].ToString().c_str());
+      } else {
+        errs[i] = nullptr;
+      }
+    }
+  }
+}
+
+void rocksdb_transaction_multi_get_for_update_cf(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    const rocksdb_column_family_handle_t* const* column_families,
+    size_t num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, char** values_list,
+    size_t* values_list_sizes, char** errs) {
+  std::vector<Slice> keys(num_keys);
+  std::vector<ColumnFamilyHandle*> cfs(num_keys);
+  for (size_t i = 0; i < num_keys; i++) {
+    keys[i] = Slice(keys_list[i], keys_list_sizes[i]);
+    cfs[i] = column_families[i]->rep;
+  }
+  std::vector<std::string> values(num_keys);
+  std::vector<Status> statuses =
+      txn->rep->MultiGetForUpdate(options->rep, cfs, keys, &values);
+  for (size_t i = 0; i < num_keys; i++) {
+    if (statuses[i].ok()) {
+      values_list[i] = CopyString(values[i]);
+      values_list_sizes[i] = values[i].size();
+      errs[i] = nullptr;
+    } else {
+      values_list[i] = nullptr;
+      values_list_sizes[i] = 0;
+      if (!statuses[i].IsNotFound()) {
+        errs[i] = strdup(statuses[i].ToString().c_str());
+      } else {
+        errs[i] = nullptr;
+      }
+    }
+  }
+}
+
+// Read a key outside a transaction
+char* rocksdb_transactiondb_get(rocksdb_transactiondb_t* txn_db,
+                                const rocksdb_readoptions_t* options,
+                                const char* key, size_t klen, size_t* vlen,
+                                char** errptr) {
+  char* result = nullptr;
+  std::string tmp;
+  Status s = txn_db->rep->Get(options->rep, Slice(key, klen), &tmp);
+  if (s.ok()) {
+    *vlen = tmp.size();
+    result = CopyString(tmp);
+  } else {
+    *vlen = 0;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+  }
+  return result;
+}
+
+rocksdb_pinnableslice_t* rocksdb_transactiondb_get_pinned(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options,
+    const char* key, size_t klen, char** errptr) {
+  rocksdb_pinnableslice_t* v = new (rocksdb_pinnableslice_t);
+  Status s = txn_db->rep->Get(options->rep, txn_db->rep->DefaultColumnFamily(),
+                              Slice(key, klen), &v->rep);
+  if (!s.ok()) {
+    delete (v);
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+    return nullptr;
+  }
+  return v;
+}
+
+char* rocksdb_transactiondb_get_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, size_t* vallen, char** errptr) {
+  char* result = nullptr;
+  std::string tmp;
+  Status s = txn_db->rep->Get(options->rep, column_family->rep,
+                              Slice(key, keylen), &tmp);
+  if (s.ok()) {
+    *vallen = tmp.size();
+    result = CopyString(tmp);
+  } else {
+    *vallen = 0;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+  }
+  return result;
+}
+
+rocksdb_pinnableslice_t* rocksdb_transactiondb_get_pinned_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, char** errptr) {
+  rocksdb_pinnableslice_t* v = new (rocksdb_pinnableslice_t);
+  Status s = txn_db->rep->Get(options->rep, column_family->rep,
+                              Slice(key, keylen), &v->rep);
+  if (!s.ok()) {
+    delete (v);
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+    return nullptr;
+  }
+  return v;
+}
+
+void rocksdb_transactiondb_multi_get(rocksdb_transactiondb_t* txn_db,
+                                     const rocksdb_readoptions_t* options,
+                                     size_t num_keys,
+                                     const char* const* keys_list,
+                                     const size_t* keys_list_sizes,
+                                     char** values_list,
+                                     size_t* values_list_sizes, char** errs) {
+  std::vector<Slice> keys(num_keys);
+  for (size_t i = 0; i < num_keys; i++) {
+    keys[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+  std::vector<std::string> values(num_keys);
+  std::vector<Status> statuses =
+      txn_db->rep->MultiGet(options->rep, keys, &values);
+  for (size_t i = 0; i < num_keys; i++) {
+    if (statuses[i].ok()) {
+      values_list[i] = CopyString(values[i]);
+      values_list_sizes[i] = values[i].size();
+      errs[i] = nullptr;
+    } else {
+      values_list[i] = nullptr;
+      values_list_sizes[i] = 0;
+      if (!statuses[i].IsNotFound()) {
+        errs[i] = strdup(statuses[i].ToString().c_str());
+      } else {
+        errs[i] = nullptr;
+      }
+    }
+  }
+}
+
+void rocksdb_transactiondb_multi_get_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options,
+    const rocksdb_column_family_handle_t* const* column_families,
+    size_t num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, char** values_list,
+    size_t* values_list_sizes, char** errs) {
+  std::vector<Slice> keys(num_keys);
+  std::vector<ColumnFamilyHandle*> cfs(num_keys);
+  for (size_t i = 0; i < num_keys; i++) {
+    keys[i] = Slice(keys_list[i], keys_list_sizes[i]);
+    cfs[i] = column_families[i]->rep;
+  }
+  std::vector<std::string> values(num_keys);
+  std::vector<Status> statuses =
+      txn_db->rep->MultiGet(options->rep, cfs, keys, &values);
+  for (size_t i = 0; i < num_keys; i++) {
+    if (statuses[i].ok()) {
+      values_list[i] = CopyString(values[i]);
+      values_list_sizes[i] = values[i].size();
+      errs[i] = nullptr;
+    } else {
+      values_list[i] = nullptr;
+      values_list_sizes[i] = 0;
+      if (!statuses[i].IsNotFound()) {
+        errs[i] = strdup(statuses[i].ToString().c_str());
+      } else {
+        errs[i] = nullptr;
+      }
+    }
+  }
+}
+
+// Put a key inside a transaction
+void rocksdb_transaction_put(rocksdb_transaction_t* txn, const char* key,
+                             size_t klen, const char* val, size_t vlen,
+                             char** errptr) {
+  SaveError(errptr, txn->rep->Put(Slice(key, klen), Slice(val, vlen)));
+}
+
+void rocksdb_transaction_put_cf(rocksdb_transaction_t* txn,
+                                rocksdb_column_family_handle_t* column_family,
+                                const char* key, size_t klen, const char* val,
+                                size_t vlen, char** errptr) {
+  SaveError(errptr, txn->rep->Put(column_family->rep, Slice(key, klen),
+                                  Slice(val, vlen)));
+}
+
+void rocksdb_transaction_set_commit_timestamp(rocksdb_transaction_t* txn,
+                                              uint64_t commit_timestamp) {
+  txn->rep->SetCommitTimestamp(commit_timestamp);
+}
+
+void rocksdb_transaction_set_read_timestamp_for_validation(
+    rocksdb_transaction_t* txn, uint64_t read_timestamp) {
+  txn->rep->SetReadTimestampForValidation(read_timestamp);
+}
+
+// Put a key outside a transaction
+void rocksdb_transactiondb_put(rocksdb_transactiondb_t* txn_db,
+                               const rocksdb_writeoptions_t* options,
+                               const char* key, size_t klen, const char* val,
+                               size_t vlen, char** errptr) {
+  SaveError(errptr,
+            txn_db->rep->Put(options->rep, Slice(key, klen), Slice(val, vlen)));
+}
+
+void rocksdb_transactiondb_put_cf(rocksdb_transactiondb_t* txn_db,
+                                  const rocksdb_writeoptions_t* options,
+                                  rocksdb_column_family_handle_t* column_family,
+                                  const char* key, size_t keylen,
+                                  const char* val, size_t vallen,
+                                  char** errptr) {
+  SaveError(errptr, txn_db->rep->Put(options->rep, column_family->rep,
+                                     Slice(key, keylen), Slice(val, vallen)));
+}
+
+// Write batch into transaction db
+void rocksdb_transactiondb_write(rocksdb_transactiondb_t* db,
+                                 const rocksdb_writeoptions_t* options,
+                                 rocksdb_writebatch_t* batch, char** errptr) {
+  SaveError(errptr, db->rep->Write(options->rep, &batch->rep));
+}
+
+// Merge a key inside a transaction
+void rocksdb_transaction_merge(rocksdb_transaction_t* txn, const char* key,
+                               size_t klen, const char* val, size_t vlen,
+                               char** errptr) {
+  SaveError(errptr, txn->rep->Merge(Slice(key, klen), Slice(val, vlen)));
+}
+
+void rocksdb_transaction_merge_cf(rocksdb_transaction_t* txn,
+                                  rocksdb_column_family_handle_t* column_family,
+                                  const char* key, size_t klen, const char* val,
+                                  size_t vlen, char** errptr) {
+  SaveError(errptr, txn->rep->Merge(column_family->rep, Slice(key, klen),
+                                    Slice(val, vlen)));
+}
+
+// Merge a key outside a transaction
+void rocksdb_transactiondb_merge(rocksdb_transactiondb_t* txn_db,
+                                 const rocksdb_writeoptions_t* options,
+                                 const char* key, size_t klen, const char* val,
+                                 size_t vlen, char** errptr) {
+  SaveError(errptr, txn_db->rep->Merge(options->rep, Slice(key, klen),
+                                       Slice(val, vlen)));
+}
+
+void rocksdb_transactiondb_merge_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key, size_t klen,
+    const char* val, size_t vlen, char** errptr) {
+  SaveError(errptr, txn_db->rep->Merge(options->rep, column_family->rep,
+                                       Slice(key, klen), Slice(val, vlen)));
+}
+
+// Delete a key inside a transaction
+void rocksdb_transaction_delete(rocksdb_transaction_t* txn, const char* key,
+                                size_t klen, char** errptr) {
+  SaveError(errptr, txn->rep->Delete(Slice(key, klen)));
+}
+
+void rocksdb_transaction_delete_cf(
+    rocksdb_transaction_t* txn, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, char** errptr) {
+  SaveError(errptr, txn->rep->Delete(column_family->rep, Slice(key, klen)));
+}
+
+// Delete a key outside a transaction
+void rocksdb_transactiondb_delete(rocksdb_transactiondb_t* txn_db,
+                                  const rocksdb_writeoptions_t* options,
+                                  const char* key, size_t klen, char** errptr) {
+  SaveError(errptr, txn_db->rep->Delete(options->rep, Slice(key, klen)));
+}
+
+void rocksdb_transactiondb_delete_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, char** errptr) {
+  SaveError(errptr, txn_db->rep->Delete(options->rep, column_family->rep,
+                                        Slice(key, keylen)));
+}
+
+// Create an iterator inside a transaction
+rocksdb_iterator_t* rocksdb_transaction_create_iterator(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options) {
+  rocksdb_iterator_t* result = new rocksdb_iterator_t;
+  result->rep = txn->rep->GetIterator(options->rep);
+  return result;
+}
+
+// Create an iterator inside a transaction with column family
+rocksdb_iterator_t* rocksdb_transaction_create_iterator_cf(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family) {
+  rocksdb_iterator_t* result = new rocksdb_iterator_t;
+  result->rep = txn->rep->GetIterator(options->rep, column_family->rep);
+  return result;
+}
+
+// Create an iterator outside a transaction
+rocksdb_iterator_t* rocksdb_transactiondb_create_iterator(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options) {
+  rocksdb_iterator_t* result = new rocksdb_iterator_t;
+  result->rep = txn_db->rep->NewIterator(options->rep);
+  return result;
+}
+
+rocksdb_iterator_t* rocksdb_transactiondb_create_iterator_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family) {
+  rocksdb_iterator_t* result = new rocksdb_iterator_t;
+  result->rep = txn_db->rep->NewIterator(options->rep, column_family->rep);
+  return result;
+}
+
+void rocksdb_transactiondb_close(rocksdb_transactiondb_t* txn_db) {
+  delete txn_db->rep;
+  delete txn_db;
+}
+
+void rocksdb_transactiondb_flush_wal(rocksdb_transactiondb_t* txn_db,
+                                     unsigned char sync, char** errptr) {
+  SaveError(errptr, txn_db->rep->FlushWAL(sync));
+}
+
+void rocksdb_transactiondb_flush(rocksdb_transactiondb_t* txn_db,
+                                 const rocksdb_flushoptions_t* options,
+                                 char** errptr) {
+  SaveError(errptr, txn_db->rep->Flush(options->rep));
+}
+
+void rocksdb_transactiondb_flush_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_flushoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, char** errptr) {
+  SaveError(errptr, txn_db->rep->Flush(options->rep, column_family->rep));
+}
+
+void rocksdb_transactiondb_flush_cfs(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_flushoptions_t* options,
+    rocksdb_column_family_handle_t** column_families, int num_column_families,
+    char** errptr) {
+  std::vector<ColumnFamilyHandle*> column_family_handles;
+  for (int i = 0; i < num_column_families; i++) {
+    column_family_handles.push_back(column_families[i]->rep);
+  }
+
+  SaveError(errptr, txn_db->rep->Flush(options->rep, column_family_handles));
+}
+
+rocksdb_checkpoint_t* rocksdb_transactiondb_checkpoint_object_create(
+    rocksdb_transactiondb_t* txn_db, char** errptr) {
+  Checkpoint* checkpoint;
+  if (SaveError(errptr, Checkpoint::Create(txn_db->rep, &checkpoint))) {
+    return nullptr;
+  }
+  rocksdb_checkpoint_t* result = new rocksdb_checkpoint_t;
+  result->rep = checkpoint;
+  return result;
+}
+
+rocksdb_optimistictransactiondb_t* rocksdb_optimistictransactiondb_open(
+    const rocksdb_options_t* options, const char* name, char** errptr) {
+  OptimisticTransactionDB* otxn_db;
+  if (SaveError(errptr, OptimisticTransactionDB::Open(
+                            options->rep, std::string(name), &otxn_db))) {
+    return nullptr;
+  }
+  rocksdb_optimistictransactiondb_t* result =
+      new rocksdb_optimistictransactiondb_t;
+  result->rep = otxn_db;
+  return result;
+}
+
+rocksdb_optimistictransactiondb_t*
+rocksdb_optimistictransactiondb_open_column_families(
+    const rocksdb_options_t* db_options, const char* name,
+    int num_column_families, const char* const* column_family_names,
+    const rocksdb_options_t* const* column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles, char** errptr) {
+  std::vector<ColumnFamilyDescriptor> column_families;
+  for (int i = 0; i < num_column_families; i++) {
+    column_families.push_back(ColumnFamilyDescriptor(
+        std::string(column_family_names[i]),
+        ColumnFamilyOptions(column_family_options[i]->rep)));
+  }
+
+  OptimisticTransactionDB* otxn_db;
+  std::vector<ColumnFamilyHandle*> handles;
+  if (SaveError(errptr, OptimisticTransactionDB::Open(
+                            DBOptions(db_options->rep), std::string(name),
+                            column_families, &handles, &otxn_db))) {
+    return nullptr;
+  }
+
+  for (size_t i = 0; i < handles.size(); i++) {
+    rocksdb_column_family_handle_t* c_handle =
+        new rocksdb_column_family_handle_t;
+    c_handle->rep = handles[i];
+    column_family_handles[i] = c_handle;
+  }
+  rocksdb_optimistictransactiondb_t* result =
+      new rocksdb_optimistictransactiondb_t;
+  result->rep = otxn_db;
+  return result;
+}
+
+rocksdb_t* rocksdb_optimistictransactiondb_get_base_db(
+    rocksdb_optimistictransactiondb_t* otxn_db) {
+  DB* base_db = otxn_db->rep->GetBaseDB();
+
+  if (base_db != nullptr) {
+    rocksdb_t* result = new rocksdb_t;
+    result->rep = base_db;
+    return result;
+  }
+
+  return nullptr;
+}
+
+void rocksdb_optimistictransactiondb_close_base_db(rocksdb_t* base_db) {
+  delete base_db;
+}
+
+rocksdb_transaction_t* rocksdb_optimistictransaction_begin(
+    rocksdb_optimistictransactiondb_t* otxn_db,
+    const rocksdb_writeoptions_t* write_options,
+    const rocksdb_optimistictransaction_options_t* otxn_options,
+    rocksdb_transaction_t* old_txn) {
+  if (old_txn == nullptr) {
+    rocksdb_transaction_t* result = new rocksdb_transaction_t;
+    result->rep = otxn_db->rep->BeginTransaction(write_options->rep,
+                                                 otxn_options->rep, nullptr);
+    return result;
+  }
+  old_txn->rep = otxn_db->rep->BeginTransaction(
+      write_options->rep, otxn_options->rep, old_txn->rep);
+  return old_txn;
+}
+
+// Write batch into OptimisticTransactionDB
+void rocksdb_optimistictransactiondb_write(
+    rocksdb_optimistictransactiondb_t* otxn_db,
+    const rocksdb_writeoptions_t* options, rocksdb_writebatch_t* batch,
+    char** errptr) {
+  SaveError(errptr, otxn_db->rep->Write(options->rep, &batch->rep));
+}
+
+void rocksdb_optimistictransactiondb_close(
+    rocksdb_optimistictransactiondb_t* otxn_db) {
+  delete otxn_db->rep;
+  delete otxn_db;
+}
+
+rocksdb_checkpoint_t* rocksdb_optimistictransactiondb_checkpoint_object_create(
+    rocksdb_optimistictransactiondb_t* otxn_db, char** errptr) {
+  Checkpoint* checkpoint;
+  if (SaveError(errptr, Checkpoint::Create(otxn_db->rep, &checkpoint))) {
+    return nullptr;
+  }
+  rocksdb_checkpoint_t* result = new rocksdb_checkpoint_t;
+  result->rep = checkpoint;
+  return result;
+}
+
+void rocksdb_free(void* ptr) { free(ptr); }
+
+rocksdb_pinnableslice_t* rocksdb_get_pinned(
+    rocksdb_t* db, const rocksdb_readoptions_t* options, const char* key,
+    size_t keylen, char** errptr) {
+  rocksdb_pinnableslice_t* v = new (rocksdb_pinnableslice_t);
+  Status s = db->rep->Get(options->rep, db->rep->DefaultColumnFamily(),
+                          Slice(key, keylen), &v->rep);
+  if (!s.ok()) {
+    delete (v);
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+    return nullptr;
+  }
+  return v;
+}
+
+rocksdb_pinnableslice_t* rocksdb_get_pinned_cf(
+    rocksdb_t* db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, char** errptr) {
+  rocksdb_pinnableslice_t* v = new (rocksdb_pinnableslice_t);
+  Status s = db->rep->Get(options->rep, column_family->rep, Slice(key, keylen),
+                          &v->rep);
+  if (!s.ok()) {
+    delete v;
+    if (!s.IsNotFound()) {
+      SaveError(errptr, s);
+    }
+    return nullptr;
+  }
+  return v;
+}
+
+void rocksdb_pinnableslice_destroy(rocksdb_pinnableslice_t* v) { delete v; }
+
+const char* rocksdb_pinnableslice_value(const rocksdb_pinnableslice_t* v,
+                                        size_t* vlen) {
+  if (!v) {
+    *vlen = 0;
+    return nullptr;
+  }
+
+  *vlen = v->rep.size();
+  return v->rep.data();
+}
+
+// container to keep databases and caches in order to use
+// ROCKSDB_NAMESPACE::MemoryUtil
+struct rocksdb_memory_consumers_t {
+  std::vector<rocksdb_t*> dbs;
+  std::unordered_set<rocksdb_cache_t*> caches;
+};
+
+// initializes new container of memory consumers
+rocksdb_memory_consumers_t* rocksdb_memory_consumers_create() {
+  return new rocksdb_memory_consumers_t;
+}
+
+// adds datatabase to the container of memory consumers
+void rocksdb_memory_consumers_add_db(rocksdb_memory_consumers_t* consumers,
+                                     rocksdb_t* db) {
+  consumers->dbs.push_back(db);
+}
+
+// adds cache to the container of memory consumers
+void rocksdb_memory_consumers_add_cache(rocksdb_memory_consumers_t* consumers,
+                                        rocksdb_cache_t* cache) {
+  consumers->caches.insert(cache);
+}
+
+// deletes container with memory consumers
+void rocksdb_memory_consumers_destroy(rocksdb_memory_consumers_t* consumers) {
+  delete consumers;
+}
+
+// contains memory usage statistics provided by ROCKSDB_NAMESPACE::MemoryUtil
+struct rocksdb_memory_usage_t {
+  uint64_t mem_table_total;
+  uint64_t mem_table_unflushed;
+  uint64_t mem_table_readers_total;
+  uint64_t cache_total;
+};
+
+// estimates amount of memory occupied by consumers (dbs and caches)
+rocksdb_memory_usage_t* rocksdb_approximate_memory_usage_create(
+    rocksdb_memory_consumers_t* consumers, char** errptr) {
+  vector<DB*> dbs;
+  for (auto db : consumers->dbs) {
+    dbs.push_back(db->rep);
+  }
+
+  unordered_set<const Cache*> cache_set;
+  for (auto cache : consumers->caches) {
+    cache_set.insert(const_cast<const Cache*>(cache->rep.get()));
+  }
+
+  std::map<ROCKSDB_NAMESPACE::MemoryUtil::UsageType, uint64_t> usage_by_type;
+
+  auto status = MemoryUtil::GetApproximateMemoryUsageByType(dbs, cache_set,
+                                                            &usage_by_type);
+  if (SaveError(errptr, status)) {
+    return nullptr;
+  }
+
+  auto result = new rocksdb_memory_usage_t;
+  result->mem_table_total = usage_by_type[MemoryUtil::kMemTableTotal];
+  result->mem_table_unflushed = usage_by_type[MemoryUtil::kMemTableUnFlushed];
+  result->mem_table_readers_total =
+      usage_by_type[MemoryUtil::kTableReadersTotal];
+  result->cache_total = usage_by_type[MemoryUtil::kCacheTotal];
+  return result;
+}
+
+uint64_t rocksdb_approximate_memory_usage_get_mem_table_total(
+    rocksdb_memory_usage_t* memory_usage) {
+  return memory_usage->mem_table_total;
+}
+
+uint64_t rocksdb_approximate_memory_usage_get_mem_table_unflushed(
+    rocksdb_memory_usage_t* memory_usage) {
+  return memory_usage->mem_table_unflushed;
+}
+
+uint64_t rocksdb_approximate_memory_usage_get_mem_table_readers_total(
+    rocksdb_memory_usage_t* memory_usage) {
+  return memory_usage->mem_table_readers_total;
+}
+
+uint64_t rocksdb_approximate_memory_usage_get_cache_total(
+    rocksdb_memory_usage_t* memory_usage) {
+  return memory_usage->cache_total;
+}
+
+void rocksdb_options_set_dump_malloc_stats(rocksdb_options_t* opt,
+                                           unsigned char val) {
+  opt->rep.dump_malloc_stats = val;
+}
+
+void rocksdb_options_set_memtable_whole_key_filtering(rocksdb_options_t* opt,
+                                                      unsigned char val) {
+  opt->rep.memtable_whole_key_filtering = val;
+}
+
+void rocksdb_options_set_avoid_unnecessary_blocking_io(rocksdb_options_t* opt,
+                                                       unsigned char val) {
+  opt->rep.avoid_unnecessary_blocking_io = val;
+}
+
+unsigned char rocksdb_options_get_avoid_unnecessary_blocking_io(
+    rocksdb_options_t* opt) {
+  return opt->rep.avoid_unnecessary_blocking_io;
+}
+
+// deletes container with memory usage estimates
+void rocksdb_approximate_memory_usage_destroy(rocksdb_memory_usage_t* usage) {
+  delete usage;
+}
+
+void rocksdb_cancel_all_background_work(rocksdb_t* db, unsigned char wait) {
+  CancelAllBackgroundWork(db->rep, wait);
+}
+
+void rocksdb_disable_manual_compaction(rocksdb_t* db) {
+  db->rep->DisableManualCompaction();
+}
+
+void rocksdb_enable_manual_compaction(rocksdb_t* db) {
+  db->rep->EnableManualCompaction();
+}
+
+}  // end extern "C"
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/column_family.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/column_family.cc
new file mode 100644
index 0000000000..de94426d71
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/column_family.cc
@@ -0,0 +1,1708 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/column_family.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <limits>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "db/blob/blob_file_cache.h"
+#include "db/blob/blob_source.h"
+#include "db/compaction/compaction_picker.h"
+#include "db/compaction/compaction_picker_fifo.h"
+#include "db/compaction/compaction_picker_level.h"
+#include "db/compaction/compaction_picker_universal.h"
+#include "db/db_impl/db_impl.h"
+#include "db/internal_stats.h"
+#include "db/job_context.h"
+#include "db/range_del_aggregator.h"
+#include "db/table_properties_collector.h"
+#include "db/version_set.h"
+#include "db/write_controller.h"
+#include "file/sst_file_manager_impl.h"
+#include "logging/logging.h"
+#include "monitoring/thread_status_util.h"
+#include "options/options_helper.h"
+#include "port/port.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/table.h"
+#include "table/merging_iterator.h"
+#include "util/autovector.h"
+#include "util/cast_util.h"
+#include "util/compression.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+ColumnFamilyHandleImpl::ColumnFamilyHandleImpl(
+    ColumnFamilyData* column_family_data, DBImpl* db, InstrumentedMutex* mutex)
+    : cfd_(column_family_data), db_(db), mutex_(mutex) {
+  if (cfd_ != nullptr) {
+    cfd_->Ref();
+  }
+}
+
+ColumnFamilyHandleImpl::~ColumnFamilyHandleImpl() {
+  if (cfd_ != nullptr) {
+    for (auto& listener : cfd_->ioptions()->listeners) {
+      listener->OnColumnFamilyHandleDeletionStarted(this);
+    }
+    // Job id == 0 means that this is not our background process, but rather
+    // user thread
+    // Need to hold some shared pointers owned by the initial_cf_options
+    // before final cleaning up finishes.
+    ColumnFamilyOptions initial_cf_options_copy = cfd_->initial_cf_options();
+    JobContext job_context(0);
+    mutex_->Lock();
+    bool dropped = cfd_->IsDropped();
+    if (cfd_->UnrefAndTryDelete()) {
+      if (dropped) {
+        db_->FindObsoleteFiles(&job_context, false, true);
+      }
+    }
+    mutex_->Unlock();
+    if (job_context.HaveSomethingToDelete()) {
+      bool defer_purge =
+          db_->immutable_db_options().avoid_unnecessary_blocking_io;
+      db_->PurgeObsoleteFiles(job_context, defer_purge);
+    }
+    job_context.Clean();
+  }
+}
+
+uint32_t ColumnFamilyHandleImpl::GetID() const { return cfd()->GetID(); }
+
+const std::string& ColumnFamilyHandleImpl::GetName() const {
+  return cfd()->GetName();
+}
+
+Status ColumnFamilyHandleImpl::GetDescriptor(ColumnFamilyDescriptor* desc) {
+  // accessing mutable cf-options requires db mutex.
+  InstrumentedMutexLock l(mutex_);
+  *desc = ColumnFamilyDescriptor(cfd()->GetName(), cfd()->GetLatestCFOptions());
+  return Status::OK();
+}
+
+const Comparator* ColumnFamilyHandleImpl::GetComparator() const {
+  return cfd()->user_comparator();
+}
+
+void GetIntTblPropCollectorFactory(
+    const ImmutableCFOptions& ioptions,
+    IntTblPropCollectorFactories* int_tbl_prop_collector_factories) {
+  assert(int_tbl_prop_collector_factories);
+
+  auto& collector_factories = ioptions.table_properties_collector_factories;
+  for (size_t i = 0; i < ioptions.table_properties_collector_factories.size();
+       ++i) {
+    assert(collector_factories[i]);
+    int_tbl_prop_collector_factories->emplace_back(
+        new UserKeyTablePropertiesCollectorFactory(collector_factories[i]));
+  }
+}
+
+Status CheckCompressionSupported(const ColumnFamilyOptions& cf_options) {
+  if (!cf_options.compression_per_level.empty()) {
+    for (size_t level = 0; level < cf_options.compression_per_level.size();
+         ++level) {
+      if (!CompressionTypeSupported(cf_options.compression_per_level[level])) {
+        return Status::InvalidArgument(
+            "Compression type " +
+            CompressionTypeToString(cf_options.compression_per_level[level]) +
+            " is not linked with the binary.");
+      }
+    }
+  } else {
+    if (!CompressionTypeSupported(cf_options.compression)) {
+      return Status::InvalidArgument(
+          "Compression type " +
+          CompressionTypeToString(cf_options.compression) +
+          " is not linked with the binary.");
+    }
+  }
+  if (cf_options.compression_opts.zstd_max_train_bytes > 0) {
+    if (cf_options.compression_opts.use_zstd_dict_trainer) {
+      if (!ZSTD_TrainDictionarySupported()) {
+        return Status::InvalidArgument(
+            "zstd dictionary trainer cannot be used because ZSTD 1.1.3+ "
+            "is not linked with the binary.");
+      }
+    } else if (!ZSTD_FinalizeDictionarySupported()) {
+      return Status::InvalidArgument(
+          "zstd finalizeDictionary cannot be used because ZSTD 1.4.5+ "
+          "is not linked with the binary.");
+    }
+    if (cf_options.compression_opts.max_dict_bytes == 0) {
+      return Status::InvalidArgument(
+          "The dictionary size limit (`CompressionOptions::max_dict_bytes`) "
+          "should be nonzero if we're using zstd's dictionary generator.");
+    }
+  }
+
+  if (!CompressionTypeSupported(cf_options.blob_compression_type)) {
+    std::ostringstream oss;
+    oss << "The specified blob compression type "
+        << CompressionTypeToString(cf_options.blob_compression_type)
+        << " is not available.";
+
+    return Status::InvalidArgument(oss.str());
+  }
+
+  return Status::OK();
+}
+
+Status CheckConcurrentWritesSupported(const ColumnFamilyOptions& cf_options) {
+  if (cf_options.inplace_update_support) {
+    return Status::InvalidArgument(
+        "In-place memtable updates (inplace_update_support) is not compatible "
+        "with concurrent writes (allow_concurrent_memtable_write)");
+  }
+  if (!cf_options.memtable_factory->IsInsertConcurrentlySupported()) {
+    return Status::InvalidArgument(
+        "Memtable doesn't concurrent writes (allow_concurrent_memtable_write)");
+  }
+  return Status::OK();
+}
+
+Status CheckCFPathsSupported(const DBOptions& db_options,
+                             const ColumnFamilyOptions& cf_options) {
+  // More than one cf_paths are supported only in universal
+  // and level compaction styles. This function also checks the case
+  // in which cf_paths is not specified, which results in db_paths
+  // being used.
+  if ((cf_options.compaction_style != kCompactionStyleUniversal) &&
+      (cf_options.compaction_style != kCompactionStyleLevel)) {
+    if (cf_options.cf_paths.size() > 1) {
+      return Status::NotSupported(
+          "More than one CF paths are only supported in "
+          "universal and level compaction styles. ");
+    } else if (cf_options.cf_paths.empty() && db_options.db_paths.size() > 1) {
+      return Status::NotSupported(
+          "More than one DB paths are only supported in "
+          "universal and level compaction styles. ");
+    }
+  }
+  return Status::OK();
+}
+
+namespace {
+const uint64_t kDefaultTtl = 0xfffffffffffffffe;
+const uint64_t kDefaultPeriodicCompSecs = 0xfffffffffffffffe;
+}  // anonymous namespace
+
+ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options,
+                                    const ColumnFamilyOptions& src) {
+  ColumnFamilyOptions result = src;
+  size_t clamp_max = std::conditional<
+      sizeof(size_t) == 4, std::integral_constant<size_t, 0xffffffff>,
+      std::integral_constant<uint64_t, 64ull << 30>>::type::value;
+  ClipToRange(&result.write_buffer_size, (static_cast<size_t>(64)) << 10,
+              clamp_max);
+  // if user sets arena_block_size, we trust user to use this value. Otherwise,
+  // calculate a proper value from writer_buffer_size;
+  if (result.arena_block_size <= 0) {
+    result.arena_block_size =
+        std::min(size_t{1024 * 1024}, result.write_buffer_size / 8);
+
+    // Align up to 4k
+    const size_t align = 4 * 1024;
+    result.arena_block_size =
+        ((result.arena_block_size + align - 1) / align) * align;
+  }
+  result.min_write_buffer_number_to_merge =
+      std::min(result.min_write_buffer_number_to_merge,
+               result.max_write_buffer_number - 1);
+  if (result.min_write_buffer_number_to_merge < 1) {
+    result.min_write_buffer_number_to_merge = 1;
+  }
+
+  if (db_options.atomic_flush && result.min_write_buffer_number_to_merge > 1) {
+    ROCKS_LOG_WARN(
+        db_options.logger,
+        "Currently, if atomic_flush is true, then triggering flush for any "
+        "column family internally (non-manual flush) will trigger flushing "
+        "all column families even if the number of memtables is smaller "
+        "min_write_buffer_number_to_merge. Therefore, configuring "
+        "min_write_buffer_number_to_merge > 1 is not compatible and should "
+        "be satinized to 1. Not doing so will lead to data loss and "
+        "inconsistent state across multiple column families when WAL is "
+        "disabled, which is a common setting for atomic flush");
+
+    result.min_write_buffer_number_to_merge = 1;
+  }
+
+  if (result.num_levels < 1) {
+    result.num_levels = 1;
+  }
+  if (result.compaction_style == kCompactionStyleLevel &&
+      result.num_levels < 2) {
+    result.num_levels = 2;
+  }
+
+  if (result.compaction_style == kCompactionStyleUniversal &&
+      db_options.allow_ingest_behind && result.num_levels < 3) {
+    result.num_levels = 3;
+  }
+
+  if (result.max_write_buffer_number < 2) {
+    result.max_write_buffer_number = 2;
+  }
+  // fall back max_write_buffer_number_to_maintain if
+  // max_write_buffer_size_to_maintain is not set
+  if (result.max_write_buffer_size_to_maintain < 0) {
+    result.max_write_buffer_size_to_maintain =
+        result.max_write_buffer_number *
+        static_cast<int64_t>(result.write_buffer_size);
+  } else if (result.max_write_buffer_size_to_maintain == 0 &&
+             result.max_write_buffer_number_to_maintain < 0) {
+    result.max_write_buffer_number_to_maintain = result.max_write_buffer_number;
+  }
+  // bloom filter size shouldn't exceed 1/4 of memtable size.
+  if (result.memtable_prefix_bloom_size_ratio > 0.25) {
+    result.memtable_prefix_bloom_size_ratio = 0.25;
+  } else if (result.memtable_prefix_bloom_size_ratio < 0) {
+    result.memtable_prefix_bloom_size_ratio = 0;
+  }
+
+  if (!result.prefix_extractor) {
+    assert(result.memtable_factory);
+    Slice name = result.memtable_factory->Name();
+    if (name.compare("HashSkipListRepFactory") == 0 ||
+        name.compare("HashLinkListRepFactory") == 0) {
+      result.memtable_factory = std::make_shared<SkipListFactory>();
+    }
+  }
+
+  if (result.compaction_style == kCompactionStyleFIFO) {
+    // since we delete level0 files in FIFO compaction when there are too many
+    // of them, these options don't really mean anything
+    result.level0_slowdown_writes_trigger = std::numeric_limits<int>::max();
+    result.level0_stop_writes_trigger = std::numeric_limits<int>::max();
+  }
+
+  if (result.max_bytes_for_level_multiplier <= 0) {
+    result.max_bytes_for_level_multiplier = 1;
+  }
+
+  if (result.level0_file_num_compaction_trigger == 0) {
+    ROCKS_LOG_WARN(db_options.logger,
+                   "level0_file_num_compaction_trigger cannot be 0");
+    result.level0_file_num_compaction_trigger = 1;
+  }
+
+  if (result.level0_stop_writes_trigger <
+          result.level0_slowdown_writes_trigger ||
+      result.level0_slowdown_writes_trigger <
+          result.level0_file_num_compaction_trigger) {
+    ROCKS_LOG_WARN(db_options.logger,
+                   "This condition must be satisfied: "
+                   "level0_stop_writes_trigger(%d) >= "
+                   "level0_slowdown_writes_trigger(%d) >= "
+                   "level0_file_num_compaction_trigger(%d)",
+                   result.level0_stop_writes_trigger,
+                   result.level0_slowdown_writes_trigger,
+                   result.level0_file_num_compaction_trigger);
+    if (result.level0_slowdown_writes_trigger <
+        result.level0_file_num_compaction_trigger) {
+      result.level0_slowdown_writes_trigger =
+          result.level0_file_num_compaction_trigger;
+    }
+    if (result.level0_stop_writes_trigger <
+        result.level0_slowdown_writes_trigger) {
+      result.level0_stop_writes_trigger = result.level0_slowdown_writes_trigger;
+    }
+    ROCKS_LOG_WARN(db_options.logger,
+                   "Adjust the value to "
+                   "level0_stop_writes_trigger(%d)"
+                   "level0_slowdown_writes_trigger(%d)"
+                   "level0_file_num_compaction_trigger(%d)",
+                   result.level0_stop_writes_trigger,
+                   result.level0_slowdown_writes_trigger,
+                   result.level0_file_num_compaction_trigger);
+  }
+
+  if (result.soft_pending_compaction_bytes_limit == 0) {
+    result.soft_pending_compaction_bytes_limit =
+        result.hard_pending_compaction_bytes_limit;
+  } else if (result.hard_pending_compaction_bytes_limit > 0 &&
+             result.soft_pending_compaction_bytes_limit >
+                 result.hard_pending_compaction_bytes_limit) {
+    result.soft_pending_compaction_bytes_limit =
+        result.hard_pending_compaction_bytes_limit;
+  }
+
+  // When the DB is stopped, it's possible that there are some .trash files that
+  // were not deleted yet, when we open the DB we will find these .trash files
+  // and schedule them to be deleted (or delete immediately if SstFileManager
+  // was not used)
+  auto sfm =
+      static_cast<SstFileManagerImpl*>(db_options.sst_file_manager.get());
+  for (size_t i = 0; i < result.cf_paths.size(); i++) {
+    DeleteScheduler::CleanupDirectory(db_options.env, sfm,
+                                      result.cf_paths[i].path)
+        .PermitUncheckedError();
+  }
+
+  if (result.cf_paths.empty()) {
+    result.cf_paths = db_options.db_paths;
+  }
+
+  if (result.level_compaction_dynamic_level_bytes) {
+    if (result.compaction_style != kCompactionStyleLevel) {
+      ROCKS_LOG_WARN(db_options.info_log.get(),
+                     "level_compaction_dynamic_level_bytes only makes sense"
+                     "for level-based compaction");
+      result.level_compaction_dynamic_level_bytes = false;
+    } else if (result.cf_paths.size() > 1U) {
+      // we don't yet know how to make both of this feature and multiple
+      // DB path work.
+      ROCKS_LOG_WARN(db_options.info_log.get(),
+                     "multiple cf_paths/db_paths and"
+                     "level_compaction_dynamic_level_bytes"
+                     "can't be used together");
+      result.level_compaction_dynamic_level_bytes = false;
+    }
+  }
+
+  if (result.max_compaction_bytes == 0) {
+    result.max_compaction_bytes = result.target_file_size_base * 25;
+  }
+
+  bool is_block_based_table = (result.table_factory->IsInstanceOf(
+      TableFactory::kBlockBasedTableName()));
+
+  const uint64_t kAdjustedTtl = 30 * 24 * 60 * 60;
+  if (result.ttl == kDefaultTtl) {
+    if (is_block_based_table &&
+        result.compaction_style != kCompactionStyleFIFO) {
+      result.ttl = kAdjustedTtl;
+    } else {
+      result.ttl = 0;
+    }
+  }
+
+  const uint64_t kAdjustedPeriodicCompSecs = 30 * 24 * 60 * 60;
+
+  // Turn on periodic compactions and set them to occur once every 30 days if
+  // compaction filters are used and periodic_compaction_seconds is set to the
+  // default value.
+  if (result.compaction_style != kCompactionStyleFIFO) {
+    if ((result.compaction_filter != nullptr ||
+         result.compaction_filter_factory != nullptr) &&
+        result.periodic_compaction_seconds == kDefaultPeriodicCompSecs &&
+        is_block_based_table) {
+      result.periodic_compaction_seconds = kAdjustedPeriodicCompSecs;
+    }
+  } else {
+    // result.compaction_style == kCompactionStyleFIFO
+    if (result.ttl == 0) {
+      if (is_block_based_table) {
+        if (result.periodic_compaction_seconds == kDefaultPeriodicCompSecs) {
+          result.periodic_compaction_seconds = kAdjustedPeriodicCompSecs;
+        }
+        result.ttl = result.periodic_compaction_seconds;
+      }
+    } else if (result.periodic_compaction_seconds != 0) {
+      result.ttl = std::min(result.ttl, result.periodic_compaction_seconds);
+    }
+  }
+
+  // TTL compactions would work similar to Periodic Compactions in Universal in
+  // most of the cases. So, if ttl is set, execute the periodic compaction
+  // codepath.
+  if (result.compaction_style == kCompactionStyleUniversal && result.ttl != 0) {
+    if (result.periodic_compaction_seconds != 0) {
+      result.periodic_compaction_seconds =
+          std::min(result.ttl, result.periodic_compaction_seconds);
+    } else {
+      result.periodic_compaction_seconds = result.ttl;
+    }
+  }
+
+  if (result.periodic_compaction_seconds == kDefaultPeriodicCompSecs) {
+    result.periodic_compaction_seconds = 0;
+  }
+
+  return result;
+}
+
+int SuperVersion::dummy = 0;
+void* const SuperVersion::kSVInUse = &SuperVersion::dummy;
+void* const SuperVersion::kSVObsolete = nullptr;
+
+SuperVersion::~SuperVersion() {
+  for (auto td : to_delete) {
+    delete td;
+  }
+}
+
+SuperVersion* SuperVersion::Ref() {
+  refs.fetch_add(1, std::memory_order_relaxed);
+  return this;
+}
+
+bool SuperVersion::Unref() {
+  // fetch_sub returns the previous value of ref
+  uint32_t previous_refs = refs.fetch_sub(1);
+  assert(previous_refs > 0);
+  return previous_refs == 1;
+}
+
+void SuperVersion::Cleanup() {
+  assert(refs.load(std::memory_order_relaxed) == 0);
+  // Since this SuperVersion object is being deleted,
+  // decrement reference to the immutable MemtableList
+  // this SV object was pointing to.
+  imm->Unref(&to_delete);
+  MemTable* m = mem->Unref();
+  if (m != nullptr) {
+    auto* memory_usage = current->cfd()->imm()->current_memory_usage();
+    assert(*memory_usage >= m->ApproximateMemoryUsage());
+    *memory_usage -= m->ApproximateMemoryUsage();
+    to_delete.push_back(m);
+  }
+  current->Unref();
+  cfd->UnrefAndTryDelete();
+}
+
+void SuperVersion::Init(ColumnFamilyData* new_cfd, MemTable* new_mem,
+                        MemTableListVersion* new_imm, Version* new_current) {
+  cfd = new_cfd;
+  mem = new_mem;
+  imm = new_imm;
+  current = new_current;
+  cfd->Ref();
+  mem->Ref();
+  imm->Ref();
+  current->Ref();
+  refs.store(1, std::memory_order_relaxed);
+}
+
+namespace {
+void SuperVersionUnrefHandle(void* ptr) {
+  // UnrefHandle is called when a thread exits or a ThreadLocalPtr gets
+  // destroyed. When the former happens, the thread shouldn't see kSVInUse.
+  // When the latter happens, only super_version_ holds a reference
+  // to ColumnFamilyData, so no further queries are possible.
+  SuperVersion* sv = static_cast<SuperVersion*>(ptr);
+  bool was_last_ref __attribute__((__unused__));
+  was_last_ref = sv->Unref();
+  // Thread-local SuperVersions can't outlive ColumnFamilyData::super_version_.
+  // This is important because we can't do SuperVersion cleanup here.
+  // That would require locking DB mutex, which would deadlock because
+  // SuperVersionUnrefHandle is called with locked ThreadLocalPtr mutex.
+  assert(!was_last_ref);
+}
+}  // anonymous namespace
+
+std::vector<std::string> ColumnFamilyData::GetDbPaths() const {
+  std::vector<std::string> paths;
+  paths.reserve(ioptions_.cf_paths.size());
+  for (const DbPath& db_path : ioptions_.cf_paths) {
+    paths.emplace_back(db_path.path);
+  }
+  return paths;
+}
+
+const uint32_t ColumnFamilyData::kDummyColumnFamilyDataId =
+    std::numeric_limits<uint32_t>::max();
+
+ColumnFamilyData::ColumnFamilyData(
+    uint32_t id, const std::string& name, Version* _dummy_versions,
+    Cache* _table_cache, WriteBufferManager* write_buffer_manager,
+    const ColumnFamilyOptions& cf_options, const ImmutableDBOptions& db_options,
+    const FileOptions* file_options, ColumnFamilySet* column_family_set,
+    BlockCacheTracer* const block_cache_tracer,
+    const std::shared_ptr<IOTracer>& io_tracer, const std::string& db_id,
+    const std::string& db_session_id)
+    : id_(id),
+      name_(name),
+      dummy_versions_(_dummy_versions),
+      current_(nullptr),
+      refs_(0),
+      initialized_(false),
+      dropped_(false),
+      internal_comparator_(cf_options.comparator),
+      initial_cf_options_(SanitizeOptions(db_options, cf_options)),
+      ioptions_(db_options, initial_cf_options_),
+      mutable_cf_options_(initial_cf_options_),
+      is_delete_range_supported_(
+          cf_options.table_factory->IsDeleteRangeSupported()),
+      write_buffer_manager_(write_buffer_manager),
+      mem_(nullptr),
+      imm_(ioptions_.min_write_buffer_number_to_merge,
+           ioptions_.max_write_buffer_number_to_maintain,
+           ioptions_.max_write_buffer_size_to_maintain),
+      super_version_(nullptr),
+      super_version_number_(0),
+      local_sv_(new ThreadLocalPtr(&SuperVersionUnrefHandle)),
+      next_(nullptr),
+      prev_(nullptr),
+      log_number_(0),
+      column_family_set_(column_family_set),
+      queued_for_flush_(false),
+      queued_for_compaction_(false),
+      prev_compaction_needed_bytes_(0),
+      allow_2pc_(db_options.allow_2pc),
+      last_memtable_id_(0),
+      db_paths_registered_(false),
+      mempurge_used_(false),
+      next_epoch_number_(1) {
+  if (id_ != kDummyColumnFamilyDataId) {
+    // TODO(cc): RegisterDbPaths can be expensive, considering moving it
+    // outside of this constructor which might be called with db mutex held.
+    // TODO(cc): considering using ioptions_.fs, currently some tests rely on
+    // EnvWrapper, that's the main reason why we use env here.
+    Status s = ioptions_.env->RegisterDbPaths(GetDbPaths());
+    if (s.ok()) {
+      db_paths_registered_ = true;
+    } else {
+      ROCKS_LOG_ERROR(
+          ioptions_.logger,
+          "Failed to register data paths of column family (id: %d, name: %s)",
+          id_, name_.c_str());
+    }
+  }
+  Ref();
+
+  // Convert user defined table properties collector factories to internal ones.
+  GetIntTblPropCollectorFactory(ioptions_, &int_tbl_prop_collector_factories_);
+
+  // if _dummy_versions is nullptr, then this is a dummy column family.
+  if (_dummy_versions != nullptr) {
+    internal_stats_.reset(
+        new InternalStats(ioptions_.num_levels, ioptions_.clock, this));
+    table_cache_.reset(new TableCache(ioptions_, file_options, _table_cache,
+                                      block_cache_tracer, io_tracer,
+                                      db_session_id));
+    blob_file_cache_.reset(
+        new BlobFileCache(_table_cache, ioptions(), soptions(), id_,
+                          internal_stats_->GetBlobFileReadHist(), io_tracer));
+    blob_source_.reset(new BlobSource(ioptions(), db_id, db_session_id,
+                                      blob_file_cache_.get()));
+
+    if (ioptions_.compaction_style == kCompactionStyleLevel) {
+      compaction_picker_.reset(
+          new LevelCompactionPicker(ioptions_, &internal_comparator_));
+    } else if (ioptions_.compaction_style == kCompactionStyleUniversal) {
+      compaction_picker_.reset(
+          new UniversalCompactionPicker(ioptions_, &internal_comparator_));
+    } else if (ioptions_.compaction_style == kCompactionStyleFIFO) {
+      compaction_picker_.reset(
+          new FIFOCompactionPicker(ioptions_, &internal_comparator_));
+    } else if (ioptions_.compaction_style == kCompactionStyleNone) {
+      compaction_picker_.reset(
+          new NullCompactionPicker(ioptions_, &internal_comparator_));
+      ROCKS_LOG_WARN(ioptions_.logger,
+                     "Column family %s does not use any background compaction. "
+                     "Compactions can only be done via CompactFiles\n",
+                     GetName().c_str());
+    } else {
+      ROCKS_LOG_ERROR(ioptions_.logger,
+                      "Unable to recognize the specified compaction style %d. "
+                      "Column family %s will use kCompactionStyleLevel.\n",
+                      ioptions_.compaction_style, GetName().c_str());
+      compaction_picker_.reset(
+          new LevelCompactionPicker(ioptions_, &internal_comparator_));
+    }
+
+    if (column_family_set_->NumberOfColumnFamilies() < 10) {
+      ROCKS_LOG_INFO(ioptions_.logger,
+                     "--------------- Options for column family [%s]:\n",
+                     name.c_str());
+      initial_cf_options_.Dump(ioptions_.logger);
+    } else {
+      ROCKS_LOG_INFO(ioptions_.logger, "\t(skipping printing options)\n");
+    }
+  }
+
+  RecalculateWriteStallConditions(mutable_cf_options_);
+
+  if (cf_options.table_factory->IsInstanceOf(
+          TableFactory::kBlockBasedTableName()) &&
+      cf_options.table_factory->GetOptions<BlockBasedTableOptions>()) {
+    const BlockBasedTableOptions* bbto =
+        cf_options.table_factory->GetOptions<BlockBasedTableOptions>();
+    const auto& options_overrides = bbto->cache_usage_options.options_overrides;
+    const auto file_metadata_charged =
+        options_overrides.at(CacheEntryRole::kFileMetadata).charged;
+    if (bbto->block_cache &&
+        file_metadata_charged == CacheEntryRoleOptions::Decision::kEnabled) {
+      // TODO(hx235): Add a `ConcurrentCacheReservationManager` at DB scope
+      // responsible for reservation of `ObsoleteFileInfo` so that we can keep
+      // this `file_metadata_cache_res_mgr_` nonconcurrent
+      file_metadata_cache_res_mgr_.reset(new ConcurrentCacheReservationManager(
+          std::make_shared<
+              CacheReservationManagerImpl<CacheEntryRole::kFileMetadata>>(
+              bbto->block_cache)));
+    }
+  }
+}
+
+// DB mutex held
+ColumnFamilyData::~ColumnFamilyData() {
+  assert(refs_.load(std::memory_order_relaxed) == 0);
+  // remove from linked list
+  auto prev = prev_;
+  auto next = next_;
+  prev->next_ = next;
+  next->prev_ = prev;
+
+  if (!dropped_ && column_family_set_ != nullptr) {
+    // If it's dropped, it's already removed from column family set
+    // If column_family_set_ == nullptr, this is dummy CFD and not in
+    // ColumnFamilySet
+    column_family_set_->RemoveColumnFamily(this);
+  }
+
+  if (current_ != nullptr) {
+    current_->Unref();
+  }
+
+  // It would be wrong if this ColumnFamilyData is in flush_queue_ or
+  // compaction_queue_ and we destroyed it
+  assert(!queued_for_flush_);
+  assert(!queued_for_compaction_);
+  assert(super_version_ == nullptr);
+
+  if (dummy_versions_ != nullptr) {
+    // List must be empty
+    assert(dummy_versions_->Next() == dummy_versions_);
+    bool deleted __attribute__((__unused__));
+    deleted = dummy_versions_->Unref();
+    assert(deleted);
+  }
+
+  if (mem_ != nullptr) {
+    delete mem_->Unref();
+  }
+  autovector<MemTable*> to_delete;
+  imm_.current()->Unref(&to_delete);
+  for (MemTable* m : to_delete) {
+    delete m;
+  }
+
+  if (db_paths_registered_) {
+    // TODO(cc): considering using ioptions_.fs, currently some tests rely on
+    // EnvWrapper, that's the main reason why we use env here.
+    Status s = ioptions_.env->UnregisterDbPaths(GetDbPaths());
+    if (!s.ok()) {
+      ROCKS_LOG_ERROR(
+          ioptions_.logger,
+          "Failed to unregister data paths of column family (id: %d, name: %s)",
+          id_, name_.c_str());
+    }
+  }
+}
+
+bool ColumnFamilyData::UnrefAndTryDelete() {
+  int old_refs = refs_.fetch_sub(1);
+  assert(old_refs > 0);
+
+  if (old_refs == 1) {
+    assert(super_version_ == nullptr);
+    delete this;
+    return true;
+  }
+
+  if (old_refs == 2 && super_version_ != nullptr) {
+    // Only the super_version_ holds me
+    SuperVersion* sv = super_version_;
+    super_version_ = nullptr;
+
+    // Release SuperVersion references kept in ThreadLocalPtr.
+    local_sv_.reset();
+
+    if (sv->Unref()) {
+      // Note: sv will delete this ColumnFamilyData during Cleanup()
+      assert(sv->cfd == this);
+      sv->Cleanup();
+      delete sv;
+      return true;
+    }
+  }
+  return false;
+}
+
+void ColumnFamilyData::SetDropped() {
+  // can't drop default CF
+  assert(id_ != 0);
+  dropped_ = true;
+  write_controller_token_.reset();
+
+  // remove from column_family_set
+  column_family_set_->RemoveColumnFamily(this);
+}
+
+ColumnFamilyOptions ColumnFamilyData::GetLatestCFOptions() const {
+  return BuildColumnFamilyOptions(initial_cf_options_, mutable_cf_options_);
+}
+
+uint64_t ColumnFamilyData::OldestLogToKeep() {
+  auto current_log = GetLogNumber();
+
+  if (allow_2pc_) {
+    auto imm_prep_log = imm()->PrecomputeMinLogContainingPrepSection();
+    auto mem_prep_log = mem()->GetMinLogContainingPrepSection();
+
+    if (imm_prep_log > 0 && imm_prep_log < current_log) {
+      current_log = imm_prep_log;
+    }
+
+    if (mem_prep_log > 0 && mem_prep_log < current_log) {
+      current_log = mem_prep_log;
+    }
+  }
+
+  return current_log;
+}
+
+const double kIncSlowdownRatio = 0.8;
+const double kDecSlowdownRatio = 1 / kIncSlowdownRatio;
+const double kNearStopSlowdownRatio = 0.6;
+const double kDelayRecoverSlowdownRatio = 1.4;
+
+namespace {
+// If penalize_stop is true, we further reduce slowdown rate.
+std::unique_ptr<WriteControllerToken> SetupDelay(
+    WriteController* write_controller, uint64_t compaction_needed_bytes,
+    uint64_t prev_compaction_need_bytes, bool penalize_stop,
+    bool auto_compactions_disabled) {
+  const uint64_t kMinWriteRate = 16 * 1024u;  // Minimum write rate 16KB/s.
+
+  uint64_t max_write_rate = write_controller->max_delayed_write_rate();
+  uint64_t write_rate = write_controller->delayed_write_rate();
+
+  if (auto_compactions_disabled) {
+    // When auto compaction is disabled, always use the value user gave.
+    write_rate = max_write_rate;
+  } else if (write_controller->NeedsDelay() && max_write_rate > kMinWriteRate) {
+    // If user gives rate less than kMinWriteRate, don't adjust it.
+    //
+    // If already delayed, need to adjust based on previous compaction debt.
+    // When there are two or more column families require delay, we always
+    // increase or reduce write rate based on information for one single
+    // column family. It is likely to be OK but we can improve if there is a
+    // problem.
+    // Ignore compaction_needed_bytes = 0 case because compaction_needed_bytes
+    // is only available in level-based compaction
+    //
+    // If the compaction debt stays the same as previously, we also further slow
+    // down. It usually means a mem table is full. It's mainly for the case
+    // where both of flush and compaction are much slower than the speed we
+    // insert to mem tables, so we need to actively slow down before we get
+    // feedback signal from compaction and flushes to avoid the full stop
+    // because of hitting the max write buffer number.
+    //
+    // If DB just falled into the stop condition, we need to further reduce
+    // the write rate to avoid the stop condition.
+    if (penalize_stop) {
+      // Penalize the near stop or stop condition by more aggressive slowdown.
+      // This is to provide the long term slowdown increase signal.
+      // The penalty is more than the reward of recovering to the normal
+      // condition.
+      write_rate = static_cast<uint64_t>(static_cast<double>(write_rate) *
+                                         kNearStopSlowdownRatio);
+      if (write_rate < kMinWriteRate) {
+        write_rate = kMinWriteRate;
+      }
+    } else if (prev_compaction_need_bytes > 0 &&
+               prev_compaction_need_bytes <= compaction_needed_bytes) {
+      write_rate = static_cast<uint64_t>(static_cast<double>(write_rate) *
+                                         kIncSlowdownRatio);
+      if (write_rate < kMinWriteRate) {
+        write_rate = kMinWriteRate;
+      }
+    } else if (prev_compaction_need_bytes > compaction_needed_bytes) {
+      // We are speeding up by ratio of kSlowdownRatio when we have paid
+      // compaction debt. But we'll never speed up to faster than the write rate
+      // given by users.
+      write_rate = static_cast<uint64_t>(static_cast<double>(write_rate) *
+                                         kDecSlowdownRatio);
+      if (write_rate > max_write_rate) {
+        write_rate = max_write_rate;
+      }
+    }
+  }
+  return write_controller->GetDelayToken(write_rate);
+}
+
+int GetL0ThresholdSpeedupCompaction(int level0_file_num_compaction_trigger,
+                                    int level0_slowdown_writes_trigger) {
+  // SanitizeOptions() ensures it.
+  assert(level0_file_num_compaction_trigger <= level0_slowdown_writes_trigger);
+
+  if (level0_file_num_compaction_trigger < 0) {
+    return std::numeric_limits<int>::max();
+  }
+
+  const int64_t twice_level0_trigger =
+      static_cast<int64_t>(level0_file_num_compaction_trigger) * 2;
+
+  const int64_t one_fourth_trigger_slowdown =
+      static_cast<int64_t>(level0_file_num_compaction_trigger) +
+      ((level0_slowdown_writes_trigger - level0_file_num_compaction_trigger) /
+       4);
+
+  assert(twice_level0_trigger >= 0);
+  assert(one_fourth_trigger_slowdown >= 0);
+
+  // 1/4 of the way between L0 compaction trigger threshold and slowdown
+  // condition.
+  // Or twice as compaction trigger, if it is smaller.
+  int64_t res = std::min(twice_level0_trigger, one_fourth_trigger_slowdown);
+  if (res >= std::numeric_limits<int32_t>::max()) {
+    return std::numeric_limits<int32_t>::max();
+  } else {
+    // res fits in int
+    return static_cast<int>(res);
+  }
+}
+}  // anonymous namespace
+
+std::pair<WriteStallCondition, WriteStallCause>
+ColumnFamilyData::GetWriteStallConditionAndCause(
+    int num_unflushed_memtables, int num_l0_files,
+    uint64_t num_compaction_needed_bytes,
+    const MutableCFOptions& mutable_cf_options,
+    const ImmutableCFOptions& immutable_cf_options) {
+  if (num_unflushed_memtables >= mutable_cf_options.max_write_buffer_number) {
+    return {WriteStallCondition::kStopped, WriteStallCause::kMemtableLimit};
+  } else if (!mutable_cf_options.disable_auto_compactions &&
+             num_l0_files >= mutable_cf_options.level0_stop_writes_trigger) {
+    return {WriteStallCondition::kStopped, WriteStallCause::kL0FileCountLimit};
+  } else if (!mutable_cf_options.disable_auto_compactions &&
+             mutable_cf_options.hard_pending_compaction_bytes_limit > 0 &&
+             num_compaction_needed_bytes >=
+                 mutable_cf_options.hard_pending_compaction_bytes_limit) {
+    return {WriteStallCondition::kStopped,
+            WriteStallCause::kPendingCompactionBytes};
+  } else if (mutable_cf_options.max_write_buffer_number > 3 &&
+             num_unflushed_memtables >=
+                 mutable_cf_options.max_write_buffer_number - 1 &&
+             num_unflushed_memtables - 1 >=
+                 immutable_cf_options.min_write_buffer_number_to_merge) {
+    return {WriteStallCondition::kDelayed, WriteStallCause::kMemtableLimit};
+  } else if (!mutable_cf_options.disable_auto_compactions &&
+             mutable_cf_options.level0_slowdown_writes_trigger >= 0 &&
+             num_l0_files >=
+                 mutable_cf_options.level0_slowdown_writes_trigger) {
+    return {WriteStallCondition::kDelayed, WriteStallCause::kL0FileCountLimit};
+  } else if (!mutable_cf_options.disable_auto_compactions &&
+             mutable_cf_options.soft_pending_compaction_bytes_limit > 0 &&
+             num_compaction_needed_bytes >=
+                 mutable_cf_options.soft_pending_compaction_bytes_limit) {
+    return {WriteStallCondition::kDelayed,
+            WriteStallCause::kPendingCompactionBytes};
+  }
+  return {WriteStallCondition::kNormal, WriteStallCause::kNone};
+}
+
+WriteStallCondition ColumnFamilyData::RecalculateWriteStallConditions(
+    const MutableCFOptions& mutable_cf_options) {
+  auto write_stall_condition = WriteStallCondition::kNormal;
+  if (current_ != nullptr) {
+    auto* vstorage = current_->storage_info();
+    auto write_controller = column_family_set_->write_controller_;
+    uint64_t compaction_needed_bytes =
+        vstorage->estimated_compaction_needed_bytes();
+
+    auto write_stall_condition_and_cause = GetWriteStallConditionAndCause(
+        imm()->NumNotFlushed(), vstorage->l0_delay_trigger_count(),
+        vstorage->estimated_compaction_needed_bytes(), mutable_cf_options,
+        *ioptions());
+    write_stall_condition = write_stall_condition_and_cause.first;
+    auto write_stall_cause = write_stall_condition_and_cause.second;
+
+    bool was_stopped = write_controller->IsStopped();
+    bool needed_delay = write_controller->NeedsDelay();
+
+    if (write_stall_condition == WriteStallCondition::kStopped &&
+        write_stall_cause == WriteStallCause::kMemtableLimit) {
+      write_controller_token_ = write_controller->GetStopToken();
+      internal_stats_->AddCFStats(InternalStats::MEMTABLE_LIMIT_STOPS, 1);
+      ROCKS_LOG_WARN(
+          ioptions_.logger,
+          "[%s] Stopping writes because we have %d immutable memtables "
+          "(waiting for flush), max_write_buffer_number is set to %d",
+          name_.c_str(), imm()->NumNotFlushed(),
+          mutable_cf_options.max_write_buffer_number);
+    } else if (write_stall_condition == WriteStallCondition::kStopped &&
+               write_stall_cause == WriteStallCause::kL0FileCountLimit) {
+      write_controller_token_ = write_controller->GetStopToken();
+      internal_stats_->AddCFStats(InternalStats::L0_FILE_COUNT_LIMIT_STOPS, 1);
+      if (compaction_picker_->IsLevel0CompactionInProgress()) {
+        internal_stats_->AddCFStats(
+            InternalStats::L0_FILE_COUNT_LIMIT_STOPS_WITH_ONGOING_COMPACTION,
+            1);
+      }
+      ROCKS_LOG_WARN(ioptions_.logger,
+                     "[%s] Stopping writes because we have %d level-0 files",
+                     name_.c_str(), vstorage->l0_delay_trigger_count());
+    } else if (write_stall_condition == WriteStallCondition::kStopped &&
+               write_stall_cause == WriteStallCause::kPendingCompactionBytes) {
+      write_controller_token_ = write_controller->GetStopToken();
+      internal_stats_->AddCFStats(
+          InternalStats::PENDING_COMPACTION_BYTES_LIMIT_STOPS, 1);
+      ROCKS_LOG_WARN(
+          ioptions_.logger,
+          "[%s] Stopping writes because of estimated pending compaction "
+          "bytes %" PRIu64,
+          name_.c_str(), compaction_needed_bytes);
+    } else if (write_stall_condition == WriteStallCondition::kDelayed &&
+               write_stall_cause == WriteStallCause::kMemtableLimit) {
+      write_controller_token_ =
+          SetupDelay(write_controller, compaction_needed_bytes,
+                     prev_compaction_needed_bytes_, was_stopped,
+                     mutable_cf_options.disable_auto_compactions);
+      internal_stats_->AddCFStats(InternalStats::MEMTABLE_LIMIT_DELAYS, 1);
+      ROCKS_LOG_WARN(
+          ioptions_.logger,
+          "[%s] Stalling writes because we have %d immutable memtables "
+          "(waiting for flush), max_write_buffer_number is set to %d "
+          "rate %" PRIu64,
+          name_.c_str(), imm()->NumNotFlushed(),
+          mutable_cf_options.max_write_buffer_number,
+          write_controller->delayed_write_rate());
+    } else if (write_stall_condition == WriteStallCondition::kDelayed &&
+               write_stall_cause == WriteStallCause::kL0FileCountLimit) {
+      // L0 is the last two files from stopping.
+      bool near_stop = vstorage->l0_delay_trigger_count() >=
+                       mutable_cf_options.level0_stop_writes_trigger - 2;
+      write_controller_token_ =
+          SetupDelay(write_controller, compaction_needed_bytes,
+                     prev_compaction_needed_bytes_, was_stopped || near_stop,
+                     mutable_cf_options.disable_auto_compactions);
+      internal_stats_->AddCFStats(InternalStats::L0_FILE_COUNT_LIMIT_DELAYS, 1);
+      if (compaction_picker_->IsLevel0CompactionInProgress()) {
+        internal_stats_->AddCFStats(
+            InternalStats::L0_FILE_COUNT_LIMIT_DELAYS_WITH_ONGOING_COMPACTION,
+            1);
+      }
+      ROCKS_LOG_WARN(ioptions_.logger,
+                     "[%s] Stalling writes because we have %d level-0 files "
+                     "rate %" PRIu64,
+                     name_.c_str(), vstorage->l0_delay_trigger_count(),
+                     write_controller->delayed_write_rate());
+    } else if (write_stall_condition == WriteStallCondition::kDelayed &&
+               write_stall_cause == WriteStallCause::kPendingCompactionBytes) {
+      // If the distance to hard limit is less than 1/4 of the gap between soft
+      // and
+      // hard bytes limit, we think it is near stop and speed up the slowdown.
+      bool near_stop =
+          mutable_cf_options.hard_pending_compaction_bytes_limit > 0 &&
+          (compaction_needed_bytes -
+           mutable_cf_options.soft_pending_compaction_bytes_limit) >
+              3 *
+                  (mutable_cf_options.hard_pending_compaction_bytes_limit -
+                   mutable_cf_options.soft_pending_compaction_bytes_limit) /
+                  4;
+
+      write_controller_token_ =
+          SetupDelay(write_controller, compaction_needed_bytes,
+                     prev_compaction_needed_bytes_, was_stopped || near_stop,
+                     mutable_cf_options.disable_auto_compactions);
+      internal_stats_->AddCFStats(
+          InternalStats::PENDING_COMPACTION_BYTES_LIMIT_DELAYS, 1);
+      ROCKS_LOG_WARN(
+          ioptions_.logger,
+          "[%s] Stalling writes because of estimated pending compaction "
+          "bytes %" PRIu64 " rate %" PRIu64,
+          name_.c_str(), vstorage->estimated_compaction_needed_bytes(),
+          write_controller->delayed_write_rate());
+    } else {
+      assert(write_stall_condition == WriteStallCondition::kNormal);
+      if (vstorage->l0_delay_trigger_count() >=
+          GetL0ThresholdSpeedupCompaction(
+              mutable_cf_options.level0_file_num_compaction_trigger,
+              mutable_cf_options.level0_slowdown_writes_trigger)) {
+        write_controller_token_ =
+            write_controller->GetCompactionPressureToken();
+        ROCKS_LOG_INFO(
+            ioptions_.logger,
+            "[%s] Increasing compaction threads because we have %d level-0 "
+            "files ",
+            name_.c_str(), vstorage->l0_delay_trigger_count());
+      } else if (vstorage->estimated_compaction_needed_bytes() >=
+                 mutable_cf_options.soft_pending_compaction_bytes_limit / 4) {
+        // Increase compaction threads if bytes needed for compaction exceeds
+        // 1/4 of threshold for slowing down.
+        // If soft pending compaction byte limit is not set, always speed up
+        // compaction.
+        write_controller_token_ =
+            write_controller->GetCompactionPressureToken();
+        if (mutable_cf_options.soft_pending_compaction_bytes_limit > 0) {
+          ROCKS_LOG_INFO(
+              ioptions_.logger,
+              "[%s] Increasing compaction threads because of estimated pending "
+              "compaction "
+              "bytes %" PRIu64,
+              name_.c_str(), vstorage->estimated_compaction_needed_bytes());
+        }
+      } else {
+        write_controller_token_.reset();
+      }
+      // If the DB recovers from delay conditions, we reward with reducing
+      // double the slowdown ratio. This is to balance the long term slowdown
+      // increase signal.
+      if (needed_delay) {
+        uint64_t write_rate = write_controller->delayed_write_rate();
+        write_controller->set_delayed_write_rate(static_cast<uint64_t>(
+            static_cast<double>(write_rate) * kDelayRecoverSlowdownRatio));
+        // Set the low pri limit to be 1/4 the delayed write rate.
+        // Note we don't reset this value even after delay condition is relased.
+        // Low-pri rate will continue to apply if there is a compaction
+        // pressure.
+        write_controller->low_pri_rate_limiter()->SetBytesPerSecond(write_rate /
+                                                                    4);
+      }
+    }
+    prev_compaction_needed_bytes_ = compaction_needed_bytes;
+  }
+  return write_stall_condition;
+}
+
+const FileOptions* ColumnFamilyData::soptions() const {
+  return &(column_family_set_->file_options_);
+}
+
+void ColumnFamilyData::SetCurrent(Version* current_version) {
+  current_ = current_version;
+}
+
+uint64_t ColumnFamilyData::GetNumLiveVersions() const {
+  return VersionSet::GetNumLiveVersions(dummy_versions_);
+}
+
+uint64_t ColumnFamilyData::GetTotalSstFilesSize() const {
+  return VersionSet::GetTotalSstFilesSize(dummy_versions_);
+}
+
+uint64_t ColumnFamilyData::GetTotalBlobFileSize() const {
+  return VersionSet::GetTotalBlobFileSize(dummy_versions_);
+}
+
+uint64_t ColumnFamilyData::GetLiveSstFilesSize() const {
+  return current_->GetSstFilesSize();
+}
+
+MemTable* ColumnFamilyData::ConstructNewMemtable(
+    const MutableCFOptions& mutable_cf_options, SequenceNumber earliest_seq) {
+  return new MemTable(internal_comparator_, ioptions_, mutable_cf_options,
+                      write_buffer_manager_, earliest_seq, id_);
+}
+
+void ColumnFamilyData::CreateNewMemtable(
+    const MutableCFOptions& mutable_cf_options, SequenceNumber earliest_seq) {
+  if (mem_ != nullptr) {
+    delete mem_->Unref();
+  }
+  SetMemtable(ConstructNewMemtable(mutable_cf_options, earliest_seq));
+  mem_->Ref();
+}
+
+bool ColumnFamilyData::NeedsCompaction() const {
+  return !mutable_cf_options_.disable_auto_compactions &&
+         compaction_picker_->NeedsCompaction(current_->storage_info());
+}
+
+Compaction* ColumnFamilyData::PickCompaction(
+    const MutableCFOptions& mutable_options,
+    const MutableDBOptions& mutable_db_options, LogBuffer* log_buffer) {
+  auto* result = compaction_picker_->PickCompaction(
+      GetName(), mutable_options, mutable_db_options, current_->storage_info(),
+      log_buffer);
+  if (result != nullptr) {
+    result->SetInputVersion(current_);
+  }
+  return result;
+}
+
+bool ColumnFamilyData::RangeOverlapWithCompaction(
+    const Slice& smallest_user_key, const Slice& largest_user_key,
+    int level) const {
+  return compaction_picker_->RangeOverlapWithCompaction(
+      smallest_user_key, largest_user_key, level);
+}
+
+Status ColumnFamilyData::RangesOverlapWithMemtables(
+    const autovector<Range>& ranges, SuperVersion* super_version,
+    bool allow_data_in_errors, bool* overlap) {
+  assert(overlap != nullptr);
+  *overlap = false;
+  // Create an InternalIterator over all unflushed memtables
+  Arena arena;
+  // TODO: plumb Env::IOActivity
+  ReadOptions read_opts;
+  read_opts.total_order_seek = true;
+  MergeIteratorBuilder merge_iter_builder(&internal_comparator_, &arena);
+  merge_iter_builder.AddIterator(
+      super_version->mem->NewIterator(read_opts, &arena));
+  super_version->imm->AddIterators(read_opts, &merge_iter_builder,
+                                   false /* add_range_tombstone_iter */);
+  ScopedArenaIterator memtable_iter(merge_iter_builder.Finish());
+
+  auto read_seq = super_version->current->version_set()->LastSequence();
+  ReadRangeDelAggregator range_del_agg(&internal_comparator_, read_seq);
+  auto* active_range_del_iter = super_version->mem->NewRangeTombstoneIterator(
+      read_opts, read_seq, false /* immutable_memtable */);
+  range_del_agg.AddTombstones(
+      std::unique_ptr<FragmentedRangeTombstoneIterator>(active_range_del_iter));
+  Status status;
+  status = super_version->imm->AddRangeTombstoneIterators(
+      read_opts, nullptr /* arena */, &range_del_agg);
+  // AddRangeTombstoneIterators always return Status::OK.
+  assert(status.ok());
+
+  for (size_t i = 0; i < ranges.size() && status.ok() && !*overlap; ++i) {
+    auto* vstorage = super_version->current->storage_info();
+    auto* ucmp = vstorage->InternalComparator()->user_comparator();
+    InternalKey range_start(ranges[i].start, kMaxSequenceNumber,
+                            kValueTypeForSeek);
+    memtable_iter->Seek(range_start.Encode());
+    status = memtable_iter->status();
+    ParsedInternalKey seek_result;
+
+    if (status.ok() && memtable_iter->Valid()) {
+      status = ParseInternalKey(memtable_iter->key(), &seek_result,
+                                allow_data_in_errors);
+    }
+
+    if (status.ok()) {
+      if (memtable_iter->Valid() &&
+          ucmp->Compare(seek_result.user_key, ranges[i].limit) <= 0) {
+        *overlap = true;
+      } else if (range_del_agg.IsRangeOverlapped(ranges[i].start,
+                                                 ranges[i].limit)) {
+        *overlap = true;
+      }
+    }
+  }
+  return status;
+}
+
+const int ColumnFamilyData::kCompactAllLevels = -1;
+const int ColumnFamilyData::kCompactToBaseLevel = -2;
+
+Compaction* ColumnFamilyData::CompactRange(
+    const MutableCFOptions& mutable_cf_options,
+    const MutableDBOptions& mutable_db_options, int input_level,
+    int output_level, const CompactRangeOptions& compact_range_options,
+    const InternalKey* begin, const InternalKey* end,
+    InternalKey** compaction_end, bool* conflict,
+    uint64_t max_file_num_to_ignore, const std::string& trim_ts) {
+  auto* result = compaction_picker_->CompactRange(
+      GetName(), mutable_cf_options, mutable_db_options,
+      current_->storage_info(), input_level, output_level,
+      compact_range_options, begin, end, compaction_end, conflict,
+      max_file_num_to_ignore, trim_ts);
+  if (result != nullptr) {
+    result->SetInputVersion(current_);
+  }
+  TEST_SYNC_POINT("ColumnFamilyData::CompactRange:Return");
+  return result;
+}
+
+SuperVersion* ColumnFamilyData::GetReferencedSuperVersion(DBImpl* db) {
+  SuperVersion* sv = GetThreadLocalSuperVersion(db);
+  sv->Ref();
+  if (!ReturnThreadLocalSuperVersion(sv)) {
+    // This Unref() corresponds to the Ref() in GetThreadLocalSuperVersion()
+    // when the thread-local pointer was populated. So, the Ref() earlier in
+    // this function still prevents the returned SuperVersion* from being
+    // deleted out from under the caller.
+    sv->Unref();
+  }
+  return sv;
+}
+
+SuperVersion* ColumnFamilyData::GetThreadLocalSuperVersion(DBImpl* db) {
+  // The SuperVersion is cached in thread local storage to avoid acquiring
+  // mutex when SuperVersion does not change since the last use. When a new
+  // SuperVersion is installed, the compaction or flush thread cleans up
+  // cached SuperVersion in all existing thread local storage. To avoid
+  // acquiring mutex for this operation, we use atomic Swap() on the thread
+  // local pointer to guarantee exclusive access. If the thread local pointer
+  // is being used while a new SuperVersion is installed, the cached
+  // SuperVersion can become stale. In that case, the background thread would
+  // have swapped in kSVObsolete. We re-check the value at when returning
+  // SuperVersion back to thread local, with an atomic compare and swap.
+  // The superversion will need to be released if detected to be stale.
+  void* ptr = local_sv_->Swap(SuperVersion::kSVInUse);
+  // Invariant:
+  // (1) Scrape (always) installs kSVObsolete in ThreadLocal storage
+  // (2) the Swap above (always) installs kSVInUse, ThreadLocal storage
+  // should only keep kSVInUse before ReturnThreadLocalSuperVersion call
+  // (if no Scrape happens).
+  assert(ptr != SuperVersion::kSVInUse);
+  SuperVersion* sv = static_cast<SuperVersion*>(ptr);
+  if (sv == SuperVersion::kSVObsolete ||
+      sv->version_number != super_version_number_.load()) {
+    RecordTick(ioptions_.stats, NUMBER_SUPERVERSION_ACQUIRES);
+    SuperVersion* sv_to_delete = nullptr;
+
+    if (sv && sv->Unref()) {
+      RecordTick(ioptions_.stats, NUMBER_SUPERVERSION_CLEANUPS);
+      db->mutex()->Lock();
+      // NOTE: underlying resources held by superversion (sst files) might
+      // not be released until the next background job.
+      sv->Cleanup();
+      if (db->immutable_db_options().avoid_unnecessary_blocking_io) {
+        db->AddSuperVersionsToFreeQueue(sv);
+        db->SchedulePurge();
+      } else {
+        sv_to_delete = sv;
+      }
+    } else {
+      db->mutex()->Lock();
+    }
+    sv = super_version_->Ref();
+    db->mutex()->Unlock();
+
+    delete sv_to_delete;
+  }
+  assert(sv != nullptr);
+  return sv;
+}
+
+bool ColumnFamilyData::ReturnThreadLocalSuperVersion(SuperVersion* sv) {
+  assert(sv != nullptr);
+  // Put the SuperVersion back
+  void* expected = SuperVersion::kSVInUse;
+  if (local_sv_->CompareAndSwap(static_cast<void*>(sv), expected)) {
+    // When we see kSVInUse in the ThreadLocal, we are sure ThreadLocal
+    // storage has not been altered and no Scrape has happened. The
+    // SuperVersion is still current.
+    return true;
+  } else {
+    // ThreadLocal scrape happened in the process of this GetImpl call (after
+    // thread local Swap() at the beginning and before CompareAndSwap()).
+    // This means the SuperVersion it holds is obsolete.
+    assert(expected == SuperVersion::kSVObsolete);
+  }
+  return false;
+}
+
+void ColumnFamilyData::InstallSuperVersion(SuperVersionContext* sv_context,
+                                           InstrumentedMutex* db_mutex) {
+  db_mutex->AssertHeld();
+  return InstallSuperVersion(sv_context, mutable_cf_options_);
+}
+
+void ColumnFamilyData::InstallSuperVersion(
+    SuperVersionContext* sv_context,
+    const MutableCFOptions& mutable_cf_options) {
+  SuperVersion* new_superversion = sv_context->new_superversion.release();
+  new_superversion->mutable_cf_options = mutable_cf_options;
+  new_superversion->Init(this, mem_, imm_.current(), current_);
+  SuperVersion* old_superversion = super_version_;
+  super_version_ = new_superversion;
+  ++super_version_number_;
+  super_version_->version_number = super_version_number_;
+  if (old_superversion == nullptr || old_superversion->current != current() ||
+      old_superversion->mem != mem_ ||
+      old_superversion->imm != imm_.current()) {
+    // Should not recalculate slow down condition if nothing has changed, since
+    // currently RecalculateWriteStallConditions() treats it as further slowing
+    // down is needed.
+    super_version_->write_stall_condition =
+        RecalculateWriteStallConditions(mutable_cf_options);
+  } else {
+    super_version_->write_stall_condition =
+        old_superversion->write_stall_condition;
+  }
+  if (old_superversion != nullptr) {
+    // Reset SuperVersions cached in thread local storage.
+    // This should be done before old_superversion->Unref(). That's to ensure
+    // that local_sv_ never holds the last reference to SuperVersion, since
+    // it has no means to safely do SuperVersion cleanup.
+    ResetThreadLocalSuperVersions();
+
+    if (old_superversion->mutable_cf_options.write_buffer_size !=
+        mutable_cf_options.write_buffer_size) {
+      mem_->UpdateWriteBufferSize(mutable_cf_options.write_buffer_size);
+    }
+    if (old_superversion->write_stall_condition !=
+        new_superversion->write_stall_condition) {
+      sv_context->PushWriteStallNotification(
+          old_superversion->write_stall_condition,
+          new_superversion->write_stall_condition, GetName(), ioptions());
+    }
+    if (old_superversion->Unref()) {
+      old_superversion->Cleanup();
+      sv_context->superversions_to_free.push_back(old_superversion);
+    }
+  }
+}
+
+void ColumnFamilyData::ResetThreadLocalSuperVersions() {
+  autovector<void*> sv_ptrs;
+  local_sv_->Scrape(&sv_ptrs, SuperVersion::kSVObsolete);
+  for (auto ptr : sv_ptrs) {
+    assert(ptr);
+    if (ptr == SuperVersion::kSVInUse) {
+      continue;
+    }
+    auto sv = static_cast<SuperVersion*>(ptr);
+    bool was_last_ref __attribute__((__unused__));
+    was_last_ref = sv->Unref();
+    // sv couldn't have been the last reference because
+    // ResetThreadLocalSuperVersions() is called before
+    // unref'ing super_version_.
+    assert(!was_last_ref);
+  }
+}
+
+Status ColumnFamilyData::ValidateOptions(
+    const DBOptions& db_options, const ColumnFamilyOptions& cf_options) {
+  Status s;
+  s = CheckCompressionSupported(cf_options);
+  if (s.ok() && db_options.allow_concurrent_memtable_write) {
+    s = CheckConcurrentWritesSupported(cf_options);
+  }
+  if (s.ok() && db_options.unordered_write &&
+      cf_options.max_successive_merges != 0) {
+    s = Status::InvalidArgument(
+        "max_successive_merges > 0 is incompatible with unordered_write");
+  }
+  if (s.ok()) {
+    s = CheckCFPathsSupported(db_options, cf_options);
+  }
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (cf_options.ttl > 0 && cf_options.ttl != kDefaultTtl) {
+    if (!cf_options.table_factory->IsInstanceOf(
+            TableFactory::kBlockBasedTableName())) {
+      return Status::NotSupported(
+          "TTL is only supported in Block-Based Table format. ");
+    }
+  }
+
+  if (cf_options.periodic_compaction_seconds > 0 &&
+      cf_options.periodic_compaction_seconds != kDefaultPeriodicCompSecs) {
+    if (!cf_options.table_factory->IsInstanceOf(
+            TableFactory::kBlockBasedTableName())) {
+      return Status::NotSupported(
+          "Periodic Compaction is only supported in "
+          "Block-Based Table format. ");
+    }
+  }
+
+  if (cf_options.enable_blob_garbage_collection) {
+    if (cf_options.blob_garbage_collection_age_cutoff < 0.0 ||
+        cf_options.blob_garbage_collection_age_cutoff > 1.0) {
+      return Status::InvalidArgument(
+          "The age cutoff for blob garbage collection should be in the range "
+          "[0.0, 1.0].");
+    }
+    if (cf_options.blob_garbage_collection_force_threshold < 0.0 ||
+        cf_options.blob_garbage_collection_force_threshold > 1.0) {
+      return Status::InvalidArgument(
+          "The garbage ratio threshold for forcing blob garbage collection "
+          "should be in the range [0.0, 1.0].");
+    }
+  }
+
+  if (cf_options.compaction_style == kCompactionStyleFIFO &&
+      db_options.max_open_files != -1 && cf_options.ttl > 0) {
+    return Status::NotSupported(
+        "FIFO compaction only supported with max_open_files = -1.");
+  }
+
+  std::vector<uint32_t> supported{0, 1, 2, 4, 8};
+  if (std::find(supported.begin(), supported.end(),
+                cf_options.memtable_protection_bytes_per_key) ==
+      supported.end()) {
+    return Status::NotSupported(
+        "Memtable per key-value checksum protection only supports 0, 1, 2, 4 "
+        "or 8 bytes per key.");
+  }
+  if (std::find(supported.begin(), supported.end(),
+                cf_options.block_protection_bytes_per_key) == supported.end()) {
+    return Status::NotSupported(
+        "Block per key-value checksum protection only supports 0, 1, 2, 4 "
+        "or 8 bytes per key.");
+  }
+
+  if (!cf_options.compaction_options_fifo.file_temperature_age_thresholds
+           .empty()) {
+    if (cf_options.compaction_style != kCompactionStyleFIFO) {
+      return Status::NotSupported(
+          "Option file_temperature_age_thresholds only supports FIFO "
+          "compaction.");
+    } else if (cf_options.num_levels > 1) {
+      return Status::NotSupported(
+          "Option file_temperature_age_thresholds is only supported when "
+          "num_levels = 1.");
+    } else {
+      const auto& ages =
+          cf_options.compaction_options_fifo.file_temperature_age_thresholds;
+      assert(ages.size() >= 1);
+      // check that age is sorted
+      for (size_t i = 0; i < ages.size() - 1; ++i) {
+        if (ages[i].age >= ages[i + 1].age) {
+          return Status::NotSupported(
+              "Option file_temperature_age_thresholds requires elements to be "
+              "sorted in increasing order with respect to `age` field.");
+        }
+      }
+    }
+  }
+  return s;
+}
+
+Status ColumnFamilyData::SetOptions(
+    const DBOptions& db_opts,
+    const std::unordered_map<std::string, std::string>& options_map) {
+  ColumnFamilyOptions cf_opts =
+      BuildColumnFamilyOptions(initial_cf_options_, mutable_cf_options_);
+  ConfigOptions config_opts;
+  config_opts.mutable_options_only = true;
+  Status s = GetColumnFamilyOptionsFromMap(config_opts, cf_opts, options_map,
+                                           &cf_opts);
+  if (s.ok()) {
+    s = ValidateOptions(db_opts, cf_opts);
+  }
+  if (s.ok()) {
+    mutable_cf_options_ = MutableCFOptions(cf_opts);
+    mutable_cf_options_.RefreshDerivedOptions(ioptions_);
+  }
+  return s;
+}
+
+// REQUIRES: DB mutex held
+Env::WriteLifeTimeHint ColumnFamilyData::CalculateSSTWriteHint(int level) {
+  if (initial_cf_options_.compaction_style != kCompactionStyleLevel) {
+    return Env::WLTH_NOT_SET;
+  }
+  if (level == 0) {
+    return Env::WLTH_MEDIUM;
+  }
+  int base_level = current_->storage_info()->base_level();
+
+  // L1: medium, L2: long, ...
+  if (level - base_level >= 2) {
+    return Env::WLTH_EXTREME;
+  } else if (level < base_level) {
+    // There is no restriction which prevents level passed in to be smaller
+    // than base_level.
+    return Env::WLTH_MEDIUM;
+  }
+  return static_cast<Env::WriteLifeTimeHint>(
+      level - base_level + static_cast<int>(Env::WLTH_MEDIUM));
+}
+
+Status ColumnFamilyData::AddDirectories(
+    std::map<std::string, std::shared_ptr<FSDirectory>>* created_dirs) {
+  Status s;
+  assert(created_dirs != nullptr);
+  assert(data_dirs_.empty());
+  for (auto& p : ioptions_.cf_paths) {
+    auto existing_dir = created_dirs->find(p.path);
+
+    if (existing_dir == created_dirs->end()) {
+      std::unique_ptr<FSDirectory> path_directory;
+      s = DBImpl::CreateAndNewDirectory(ioptions_.fs.get(), p.path,
+                                        &path_directory);
+      if (!s.ok()) {
+        return s;
+      }
+      assert(path_directory != nullptr);
+      data_dirs_.emplace_back(path_directory.release());
+      (*created_dirs)[p.path] = data_dirs_.back();
+    } else {
+      data_dirs_.emplace_back(existing_dir->second);
+    }
+  }
+  assert(data_dirs_.size() == ioptions_.cf_paths.size());
+  return s;
+}
+
+FSDirectory* ColumnFamilyData::GetDataDir(size_t path_id) const {
+  if (data_dirs_.empty()) {
+    return nullptr;
+  }
+
+  assert(path_id < data_dirs_.size());
+  return data_dirs_[path_id].get();
+}
+
+void ColumnFamilyData::RecoverEpochNumbers() {
+  assert(current_);
+  auto* vstorage = current_->storage_info();
+  assert(vstorage);
+  vstorage->RecoverEpochNumbers(this);
+}
+
+ColumnFamilySet::ColumnFamilySet(const std::string& dbname,
+                                 const ImmutableDBOptions* db_options,
+                                 const FileOptions& file_options,
+                                 Cache* table_cache,
+                                 WriteBufferManager* _write_buffer_manager,
+                                 WriteController* _write_controller,
+                                 BlockCacheTracer* const block_cache_tracer,
+                                 const std::shared_ptr<IOTracer>& io_tracer,
+                                 const std::string& db_id,
+                                 const std::string& db_session_id)
+    : max_column_family_(0),
+      file_options_(file_options),
+      dummy_cfd_(new ColumnFamilyData(
+          ColumnFamilyData::kDummyColumnFamilyDataId, "", nullptr, nullptr,
+          nullptr, ColumnFamilyOptions(), *db_options, &file_options_, nullptr,
+          block_cache_tracer, io_tracer, db_id, db_session_id)),
+      default_cfd_cache_(nullptr),
+      db_name_(dbname),
+      db_options_(db_options),
+      table_cache_(table_cache),
+      write_buffer_manager_(_write_buffer_manager),
+      write_controller_(_write_controller),
+      block_cache_tracer_(block_cache_tracer),
+      io_tracer_(io_tracer),
+      db_id_(db_id),
+      db_session_id_(db_session_id) {
+  // initialize linked list
+  dummy_cfd_->prev_ = dummy_cfd_;
+  dummy_cfd_->next_ = dummy_cfd_;
+}
+
+ColumnFamilySet::~ColumnFamilySet() {
+  while (column_family_data_.size() > 0) {
+    // cfd destructor will delete itself from column_family_data_
+    auto cfd = column_family_data_.begin()->second;
+    bool last_ref __attribute__((__unused__));
+    last_ref = cfd->UnrefAndTryDelete();
+    assert(last_ref);
+  }
+  bool dummy_last_ref __attribute__((__unused__));
+  dummy_last_ref = dummy_cfd_->UnrefAndTryDelete();
+  assert(dummy_last_ref);
+}
+
+ColumnFamilyData* ColumnFamilySet::GetDefault() const {
+  assert(default_cfd_cache_ != nullptr);
+  return default_cfd_cache_;
+}
+
+ColumnFamilyData* ColumnFamilySet::GetColumnFamily(uint32_t id) const {
+  auto cfd_iter = column_family_data_.find(id);
+  if (cfd_iter != column_family_data_.end()) {
+    return cfd_iter->second;
+  } else {
+    return nullptr;
+  }
+}
+
+ColumnFamilyData* ColumnFamilySet::GetColumnFamily(
+    const std::string& name) const {
+  auto cfd_iter = column_families_.find(name);
+  if (cfd_iter != column_families_.end()) {
+    auto cfd = GetColumnFamily(cfd_iter->second);
+    assert(cfd != nullptr);
+    return cfd;
+  } else {
+    return nullptr;
+  }
+}
+
+uint32_t ColumnFamilySet::GetNextColumnFamilyID() {
+  return ++max_column_family_;
+}
+
+uint32_t ColumnFamilySet::GetMaxColumnFamily() { return max_column_family_; }
+
+void ColumnFamilySet::UpdateMaxColumnFamily(uint32_t new_max_column_family) {
+  max_column_family_ = std::max(new_max_column_family, max_column_family_);
+}
+
+size_t ColumnFamilySet::NumberOfColumnFamilies() const {
+  return column_families_.size();
+}
+
+// under a DB mutex AND write thread
+ColumnFamilyData* ColumnFamilySet::CreateColumnFamily(
+    const std::string& name, uint32_t id, Version* dummy_versions,
+    const ColumnFamilyOptions& options) {
+  assert(column_families_.find(name) == column_families_.end());
+  ColumnFamilyData* new_cfd = new ColumnFamilyData(
+      id, name, dummy_versions, table_cache_, write_buffer_manager_, options,
+      *db_options_, &file_options_, this, block_cache_tracer_, io_tracer_,
+      db_id_, db_session_id_);
+  column_families_.insert({name, id});
+  column_family_data_.insert({id, new_cfd});
+  max_column_family_ = std::max(max_column_family_, id);
+  // add to linked list
+  new_cfd->next_ = dummy_cfd_;
+  auto prev = dummy_cfd_->prev_;
+  new_cfd->prev_ = prev;
+  prev->next_ = new_cfd;
+  dummy_cfd_->prev_ = new_cfd;
+  if (id == 0) {
+    default_cfd_cache_ = new_cfd;
+  }
+  return new_cfd;
+}
+
+// under a DB mutex AND from a write thread
+void ColumnFamilySet::RemoveColumnFamily(ColumnFamilyData* cfd) {
+  auto cfd_iter = column_family_data_.find(cfd->GetID());
+  assert(cfd_iter != column_family_data_.end());
+  column_family_data_.erase(cfd_iter);
+  column_families_.erase(cfd->GetName());
+}
+
+// under a DB mutex OR from a write thread
+bool ColumnFamilyMemTablesImpl::Seek(uint32_t column_family_id) {
+  if (column_family_id == 0) {
+    // optimization for common case
+    current_ = column_family_set_->GetDefault();
+  } else {
+    current_ = column_family_set_->GetColumnFamily(column_family_id);
+  }
+  handle_.SetCFD(current_);
+  return current_ != nullptr;
+}
+
+uint64_t ColumnFamilyMemTablesImpl::GetLogNumber() const {
+  assert(current_ != nullptr);
+  return current_->GetLogNumber();
+}
+
+MemTable* ColumnFamilyMemTablesImpl::GetMemTable() const {
+  assert(current_ != nullptr);
+  return current_->mem();
+}
+
+ColumnFamilyHandle* ColumnFamilyMemTablesImpl::GetColumnFamilyHandle() {
+  assert(current_ != nullptr);
+  return &handle_;
+}
+
+uint32_t GetColumnFamilyID(ColumnFamilyHandle* column_family) {
+  uint32_t column_family_id = 0;
+  if (column_family != nullptr) {
+    auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+    column_family_id = cfh->GetID();
+  }
+  return column_family_id;
+}
+
+const Comparator* GetColumnFamilyUserComparator(
+    ColumnFamilyHandle* column_family) {
+  if (column_family != nullptr) {
+    return column_family->GetComparator();
+  }
+  return nullptr;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/column_family.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/column_family.h
new file mode 100644
index 0000000000..9ec093010d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/column_family.h
@@ -0,0 +1,851 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <atomic>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "cache/cache_reservation_manager.h"
+#include "db/memtable_list.h"
+#include "db/table_cache.h"
+#include "db/table_properties_collector.h"
+#include "db/write_batch_internal.h"
+#include "db/write_controller.h"
+#include "options/cf_options.h"
+#include "rocksdb/compaction_job_stats.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "trace_replay/block_cache_tracer.h"
+#include "util/hash_containers.h"
+#include "util/thread_local.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Version;
+class VersionSet;
+class VersionStorageInfo;
+class MemTable;
+class MemTableListVersion;
+class CompactionPicker;
+class Compaction;
+class InternalKey;
+class InternalStats;
+class ColumnFamilyData;
+class DBImpl;
+class LogBuffer;
+class InstrumentedMutex;
+class InstrumentedMutexLock;
+struct SuperVersionContext;
+class BlobFileCache;
+class BlobSource;
+
+extern const double kIncSlowdownRatio;
+// This file contains a list of data structures for managing column family
+// level metadata.
+//
+// The basic relationships among classes declared here are illustrated as
+// following:
+//
+//       +----------------------+    +----------------------+   +--------+
+//   +---+ ColumnFamilyHandle 1 | +--+ ColumnFamilyHandle 2 |   | DBImpl |
+//   |   +----------------------+ |  +----------------------+   +----+---+
+//   | +--------------------------+                                  |
+//   | |                               +-----------------------------+
+//   | |                               |
+//   | | +-----------------------------v-------------------------------+
+//   | | |                                                             |
+//   | | |                      ColumnFamilySet                        |
+//   | | |                                                             |
+//   | | +-------------+--------------------------+----------------+---+
+//   | |               |                          |                |
+//   | +-------------------------------------+    |                |
+//   |                 |                     |    |                v
+//   |   +-------------v-------------+ +-----v----v---------+
+//   |   |                           | |                    |
+//   |   |     ColumnFamilyData 1    | | ColumnFamilyData 2 |    ......
+//   |   |                           | |                    |
+//   +--->                           | |                    |
+//       |                 +---------+ |                    |
+//       |                 | MemTable| |                    |
+//       |                 |  List   | |                    |
+//       +--------+---+--+-+----+----+ +--------------------++
+//                |   |  |      |
+//                |   |  |      |
+//                |   |  |      +-----------------------+
+//                |   |  +-----------+                  |
+//                v   +--------+     |                  |
+//       +--------+--------+   |     |                  |
+//       |                 |   |     |       +----------v----------+
+// +---> |SuperVersion 1.a +----------------->                     |
+//       |                 +------+  |       | MemTableListVersion |
+//       +---+-------------+   |  |  |       |                     |
+//           |                 |  |  |       +----+------------+---+
+//           |      current    |  |  |            |            |
+//           |   +-------------+  |  |mem         |            |
+//           |   |                |  |            |            |
+//         +-v---v-------+    +---v--v---+  +-----v----+  +----v-----+
+//         |             |    |          |  |          |  |          |
+//         | Version 1.a |    | memtable |  | memtable |  | memtable |
+//         |             |    |   1.a    |  |   1.b    |  |   1.c    |
+//         +-------------+    |          |  |          |  |          |
+//                            +----------+  +----------+  +----------+
+//
+// DBImpl keeps a ColumnFamilySet, which references to all column families by
+// pointing to respective ColumnFamilyData object of each column family.
+// This is how DBImpl can list and operate on all the column families.
+// ColumnFamilyHandle also points to ColumnFamilyData directly, so that
+// when a user executes a query, it can directly find memtables and Version
+// as well as SuperVersion to the column family, without going through
+// ColumnFamilySet.
+//
+// ColumnFamilySet points to the latest view of the LSM-tree (list of memtables
+// and SST files) indirectly, while ongoing operations may hold references
+// to a current or an out-of-date SuperVersion, which in turn points to a
+// point-in-time view of the LSM-tree. This guarantees the memtables and SST
+// files being operated on will not go away, until the SuperVersion is
+// unreferenced to 0 and destoryed.
+//
+// The following graph illustrates a possible referencing relationships:
+//
+// Column       +--------------+      current       +-----------+
+// Family +---->+              +------------------->+           |
+//  Data        | SuperVersion +----------+         | Version A |
+//              |      3       |   imm    |         |           |
+// Iter2 +----->+              |  +-------v------+  +-----------+
+//              +-----+--------+  | MemtableList +----------------> Empty
+//                    |           |   Version r  |  +-----------+
+//                    |           +--------------+  |           |
+//                    +------------------+   current| Version B |
+//              +--------------+         |   +----->+           |
+//              |              |         |   |      +-----+-----+
+// Compaction +>+ SuperVersion +-------------+            ^
+//    Job       |      2       +------+  |                |current
+//              |              +----+ |  |     mem        |    +------------+
+//              +--------------+    | |  +--------------------->            |
+//                                  | +------------------------> MemTable a |
+//                                  |          mem        |    |            |
+//              +--------------+    |                     |    +------------+
+//              |              +--------------------------+
+//  Iter1 +-----> SuperVersion |    |                          +------------+
+//              |      1       +------------------------------>+            |
+//              |              +-+  |        mem               | MemTable b |
+//              +--------------+ |  |                          |            |
+//                               |  |    +--------------+      +-----^------+
+//                               |  |imm | MemtableList |            |
+//                               |  +--->+   Version s  +------------+
+//                               |       +--------------+
+//                               |       +--------------+
+//                               |       | MemtableList |
+//                               +------>+   Version t  +-------->  Empty
+//                                 imm   +--------------+
+//
+// In this example, even if the current LSM-tree consists of Version A and
+// memtable a, which is also referenced by SuperVersion, two older SuperVersion
+// SuperVersion2 and Superversion1 still exist, and are referenced by a
+// compaction job and an old iterator Iter1, respectively. SuperVersion2
+// contains Version B, memtable a and memtable b; SuperVersion1 contains
+// Version B and memtable b (mutable). As a result, Version B and memtable b
+// are prevented from being destroyed or deleted.
+
+// ColumnFamilyHandleImpl is the class that clients use to access different
+// column families. It has non-trivial destructor, which gets called when client
+// is done using the column family
+class ColumnFamilyHandleImpl : public ColumnFamilyHandle {
+ public:
+  // create while holding the mutex
+  ColumnFamilyHandleImpl(ColumnFamilyData* cfd, DBImpl* db,
+                         InstrumentedMutex* mutex);
+  // destroy without mutex
+  virtual ~ColumnFamilyHandleImpl();
+  virtual ColumnFamilyData* cfd() const { return cfd_; }
+
+  virtual uint32_t GetID() const override;
+  virtual const std::string& GetName() const override;
+  virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) override;
+  virtual const Comparator* GetComparator() const override;
+
+ private:
+  ColumnFamilyData* cfd_;
+  DBImpl* db_;
+  InstrumentedMutex* mutex_;
+};
+
+// Does not ref-count ColumnFamilyData
+// We use this dummy ColumnFamilyHandleImpl because sometimes MemTableInserter
+// calls DBImpl methods. When this happens, MemTableInserter need access to
+// ColumnFamilyHandle (same as the client would need). In that case, we feed
+// MemTableInserter dummy ColumnFamilyHandle and enable it to call DBImpl
+// methods
+class ColumnFamilyHandleInternal : public ColumnFamilyHandleImpl {
+ public:
+  ColumnFamilyHandleInternal()
+      : ColumnFamilyHandleImpl(nullptr, nullptr, nullptr),
+        internal_cfd_(nullptr) {}
+
+  void SetCFD(ColumnFamilyData* _cfd) { internal_cfd_ = _cfd; }
+  virtual ColumnFamilyData* cfd() const override { return internal_cfd_; }
+
+ private:
+  ColumnFamilyData* internal_cfd_;
+};
+
+// holds references to memtable, all immutable memtables and version
+struct SuperVersion {
+  // Accessing members of this class is not thread-safe and requires external
+  // synchronization (ie db mutex held or on write thread).
+  ColumnFamilyData* cfd;
+  MemTable* mem;
+  MemTableListVersion* imm;
+  Version* current;
+  MutableCFOptions mutable_cf_options;
+  // Version number of the current SuperVersion
+  uint64_t version_number;
+  WriteStallCondition write_stall_condition;
+
+  // should be called outside the mutex
+  SuperVersion() = default;
+  ~SuperVersion();
+  SuperVersion* Ref();
+  // If Unref() returns true, Cleanup() should be called with mutex held
+  // before deleting this SuperVersion.
+  bool Unref();
+
+  // call these two methods with db mutex held
+  // Cleanup unrefs mem, imm and current. Also, it stores all memtables
+  // that needs to be deleted in to_delete vector. Unrefing those
+  // objects needs to be done in the mutex
+  void Cleanup();
+  void Init(ColumnFamilyData* new_cfd, MemTable* new_mem,
+            MemTableListVersion* new_imm, Version* new_current);
+
+  // The value of dummy is not actually used. kSVInUse takes its address as a
+  // mark in the thread local storage to indicate the SuperVersion is in use
+  // by thread. This way, the value of kSVInUse is guaranteed to have no
+  // conflict with SuperVersion object address and portable on different
+  // platform.
+  static int dummy;
+  static void* const kSVInUse;
+  static void* const kSVObsolete;
+
+ private:
+  std::atomic<uint32_t> refs;
+  // We need to_delete because during Cleanup(), imm->Unref() returns
+  // all memtables that we need to free through this vector. We then
+  // delete all those memtables outside of mutex, during destruction
+  autovector<MemTable*> to_delete;
+};
+
+extern Status CheckCompressionSupported(const ColumnFamilyOptions& cf_options);
+
+extern Status CheckConcurrentWritesSupported(
+    const ColumnFamilyOptions& cf_options);
+
+extern Status CheckCFPathsSupported(const DBOptions& db_options,
+                                    const ColumnFamilyOptions& cf_options);
+
+extern ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options,
+                                           const ColumnFamilyOptions& src);
+// Wrap user defined table properties collector factories `from cf_options`
+// into internal ones in int_tbl_prop_collector_factories. Add a system internal
+// one too.
+extern void GetIntTblPropCollectorFactory(
+    const ImmutableCFOptions& ioptions,
+    IntTblPropCollectorFactories* int_tbl_prop_collector_factories);
+
+class ColumnFamilySet;
+
+// This class keeps all the data that a column family needs.
+// Most methods require DB mutex held, unless otherwise noted
+class ColumnFamilyData {
+ public:
+  ~ColumnFamilyData();
+
+  // thread-safe
+  uint32_t GetID() const { return id_; }
+  // thread-safe
+  const std::string& GetName() const { return name_; }
+
+  // Ref() can only be called from a context where the caller can guarantee
+  // that ColumnFamilyData is alive (while holding a non-zero ref already,
+  // holding a DB mutex, or as the leader in a write batch group).
+  void Ref() { refs_.fetch_add(1); }
+
+  // UnrefAndTryDelete() decreases the reference count and do free if needed,
+  // return true if this is freed else false, UnrefAndTryDelete() can only
+  // be called while holding a DB mutex, or during single-threaded recovery.
+  bool UnrefAndTryDelete();
+
+  // SetDropped() can only be called under following conditions:
+  // 1) Holding a DB mutex,
+  // 2) from single-threaded write thread, AND
+  // 3) from single-threaded VersionSet::LogAndApply()
+  // After dropping column family no other operation on that column family
+  // will be executed. All the files and memory will be, however, kept around
+  // until client drops the column family handle. That way, client can still
+  // access data from dropped column family.
+  // Column family can be dropped and still alive. In that state:
+  // *) Compaction and flush is not executed on the dropped column family.
+  // *) Client can continue reading from column family. Writes will fail unless
+  // WriteOptions::ignore_missing_column_families is true
+  // When the dropped column family is unreferenced, then we:
+  // *) Remove column family from the linked list maintained by ColumnFamilySet
+  // *) delete all memory associated with that column family
+  // *) delete all the files associated with that column family
+  void SetDropped();
+  bool IsDropped() const { return dropped_.load(std::memory_order_relaxed); }
+
+  // thread-safe
+  int NumberLevels() const { return ioptions_.num_levels; }
+
+  void SetLogNumber(uint64_t log_number) { log_number_ = log_number; }
+  uint64_t GetLogNumber() const { return log_number_; }
+
+  // thread-safe
+  const FileOptions* soptions() const;
+  const ImmutableOptions* ioptions() const { return &ioptions_; }
+  // REQUIRES: DB mutex held
+  // This returns the MutableCFOptions used by current SuperVersion
+  // You should use this API to reference MutableCFOptions most of the time.
+  const MutableCFOptions* GetCurrentMutableCFOptions() const {
+    return &(super_version_->mutable_cf_options);
+  }
+  // REQUIRES: DB mutex held
+  // This returns the latest MutableCFOptions, which may be not in effect yet.
+  const MutableCFOptions* GetLatestMutableCFOptions() const {
+    return &mutable_cf_options_;
+  }
+
+  // REQUIRES: DB mutex held
+  // Build ColumnFamiliesOptions with immutable options and latest mutable
+  // options.
+  ColumnFamilyOptions GetLatestCFOptions() const;
+
+  bool is_delete_range_supported() { return is_delete_range_supported_; }
+
+  // Validate CF options against DB options
+  static Status ValidateOptions(const DBOptions& db_options,
+                                const ColumnFamilyOptions& cf_options);
+  // REQUIRES: DB mutex held
+  Status SetOptions(
+      const DBOptions& db_options,
+      const std::unordered_map<std::string, std::string>& options_map);
+
+  InternalStats* internal_stats() { return internal_stats_.get(); }
+
+  MemTableList* imm() { return &imm_; }
+  MemTable* mem() { return mem_; }
+
+  bool IsEmpty() {
+    return mem()->GetFirstSequenceNumber() == 0 && imm()->NumNotFlushed() == 0;
+  }
+
+  Version* current() { return current_; }
+  Version* dummy_versions() { return dummy_versions_; }
+  void SetCurrent(Version* _current);
+  uint64_t GetNumLiveVersions() const;    // REQUIRE: DB mutex held
+  uint64_t GetTotalSstFilesSize() const;  // REQUIRE: DB mutex held
+  uint64_t GetLiveSstFilesSize() const;   // REQUIRE: DB mutex held
+  uint64_t GetTotalBlobFileSize() const;  // REQUIRE: DB mutex held
+  void SetMemtable(MemTable* new_mem) {
+    uint64_t memtable_id = last_memtable_id_.fetch_add(1) + 1;
+    new_mem->SetID(memtable_id);
+    mem_ = new_mem;
+  }
+
+  // calculate the oldest log needed for the durability of this column family
+  uint64_t OldestLogToKeep();
+
+  // See Memtable constructor for explanation of earliest_seq param.
+  MemTable* ConstructNewMemtable(const MutableCFOptions& mutable_cf_options,
+                                 SequenceNumber earliest_seq);
+  void CreateNewMemtable(const MutableCFOptions& mutable_cf_options,
+                         SequenceNumber earliest_seq);
+
+  TableCache* table_cache() const { return table_cache_.get(); }
+  BlobSource* blob_source() const { return blob_source_.get(); }
+
+  // See documentation in compaction_picker.h
+  // REQUIRES: DB mutex held
+  bool NeedsCompaction() const;
+  // REQUIRES: DB mutex held
+  Compaction* PickCompaction(const MutableCFOptions& mutable_options,
+                             const MutableDBOptions& mutable_db_options,
+                             LogBuffer* log_buffer);
+
+  // Check if the passed range overlap with any running compactions.
+  // REQUIRES: DB mutex held
+  bool RangeOverlapWithCompaction(const Slice& smallest_user_key,
+                                  const Slice& largest_user_key,
+                                  int level) const;
+
+  // Check if the passed ranges overlap with any unflushed memtables
+  // (immutable or mutable).
+  //
+  // @param super_version A referenced SuperVersion that will be held for the
+  //    duration of this function.
+  //
+  // Thread-safe
+  Status RangesOverlapWithMemtables(const autovector<Range>& ranges,
+                                    SuperVersion* super_version,
+                                    bool allow_data_in_errors, bool* overlap);
+
+  // A flag to tell a manual compaction is to compact all levels together
+  // instead of a specific level.
+  static const int kCompactAllLevels;
+  // A flag to tell a manual compaction's output is base level.
+  static const int kCompactToBaseLevel;
+  // REQUIRES: DB mutex held
+  Compaction* CompactRange(const MutableCFOptions& mutable_cf_options,
+                           const MutableDBOptions& mutable_db_options,
+                           int input_level, int output_level,
+                           const CompactRangeOptions& compact_range_options,
+                           const InternalKey* begin, const InternalKey* end,
+                           InternalKey** compaction_end, bool* manual_conflict,
+                           uint64_t max_file_num_to_ignore,
+                           const std::string& trim_ts);
+
+  CompactionPicker* compaction_picker() { return compaction_picker_.get(); }
+  // thread-safe
+  const Comparator* user_comparator() const {
+    return internal_comparator_.user_comparator();
+  }
+  // thread-safe
+  const InternalKeyComparator& internal_comparator() const {
+    return internal_comparator_;
+  }
+
+  const IntTblPropCollectorFactories* int_tbl_prop_collector_factories() const {
+    return &int_tbl_prop_collector_factories_;
+  }
+
+  SuperVersion* GetSuperVersion() { return super_version_; }
+  // thread-safe
+  // Return a already referenced SuperVersion to be used safely.
+  SuperVersion* GetReferencedSuperVersion(DBImpl* db);
+  // thread-safe
+  // Get SuperVersion stored in thread local storage. If it does not exist,
+  // get a reference from a current SuperVersion.
+  SuperVersion* GetThreadLocalSuperVersion(DBImpl* db);
+  // Try to return SuperVersion back to thread local storage. Return true on
+  // success and false on failure. It fails when the thread local storage
+  // contains anything other than SuperVersion::kSVInUse flag.
+  bool ReturnThreadLocalSuperVersion(SuperVersion* sv);
+  // thread-safe
+  uint64_t GetSuperVersionNumber() const {
+    return super_version_number_.load();
+  }
+  // will return a pointer to SuperVersion* if previous SuperVersion
+  // if its reference count is zero and needs deletion or nullptr if not
+  // As argument takes a pointer to allocated SuperVersion to enable
+  // the clients to allocate SuperVersion outside of mutex.
+  // IMPORTANT: Only call this from DBImpl::InstallSuperVersion()
+  void InstallSuperVersion(SuperVersionContext* sv_context,
+                           const MutableCFOptions& mutable_cf_options);
+  void InstallSuperVersion(SuperVersionContext* sv_context,
+                           InstrumentedMutex* db_mutex);
+
+  void ResetThreadLocalSuperVersions();
+
+  // Protected by DB mutex
+  void set_queued_for_flush(bool value) { queued_for_flush_ = value; }
+  void set_queued_for_compaction(bool value) { queued_for_compaction_ = value; }
+  bool queued_for_flush() { return queued_for_flush_; }
+  bool queued_for_compaction() { return queued_for_compaction_; }
+
+  static std::pair<WriteStallCondition, WriteStallCause>
+  GetWriteStallConditionAndCause(
+      int num_unflushed_memtables, int num_l0_files,
+      uint64_t num_compaction_needed_bytes,
+      const MutableCFOptions& mutable_cf_options,
+      const ImmutableCFOptions& immutable_cf_options);
+
+  // Recalculate some stall conditions, which are changed only during
+  // compaction, adding new memtable and/or recalculation of compaction score.
+  WriteStallCondition RecalculateWriteStallConditions(
+      const MutableCFOptions& mutable_cf_options);
+
+  void set_initialized() { initialized_.store(true); }
+
+  bool initialized() const { return initialized_.load(); }
+
+  const ColumnFamilyOptions& initial_cf_options() {
+    return initial_cf_options_;
+  }
+
+  Env::WriteLifeTimeHint CalculateSSTWriteHint(int level);
+
+  // created_dirs remembers directory created, so that we don't need to call
+  // the same data creation operation again.
+  Status AddDirectories(
+      std::map<std::string, std::shared_ptr<FSDirectory>>* created_dirs);
+
+  FSDirectory* GetDataDir(size_t path_id) const;
+
+  // full_history_ts_low_ can only increase.
+  void SetFullHistoryTsLow(std::string ts_low) {
+    assert(!ts_low.empty());
+    const Comparator* ucmp = user_comparator();
+    assert(ucmp);
+    if (full_history_ts_low_.empty() ||
+        ucmp->CompareTimestamp(ts_low, full_history_ts_low_) > 0) {
+      full_history_ts_low_ = std::move(ts_low);
+    }
+  }
+
+  const std::string& GetFullHistoryTsLow() const {
+    return full_history_ts_low_;
+  }
+
+  ThreadLocalPtr* TEST_GetLocalSV() { return local_sv_.get(); }
+  WriteBufferManager* write_buffer_mgr() { return write_buffer_manager_; }
+  std::shared_ptr<CacheReservationManager>
+  GetFileMetadataCacheReservationManager() {
+    return file_metadata_cache_res_mgr_;
+  }
+
+  SequenceNumber GetFirstMemtableSequenceNumber() const;
+
+  static const uint32_t kDummyColumnFamilyDataId;
+
+  // Keep track of whether the mempurge feature was ever used.
+  void SetMempurgeUsed() { mempurge_used_ = true; }
+  bool GetMempurgeUsed() { return mempurge_used_; }
+
+  // Allocate and return a new epoch number
+  uint64_t NewEpochNumber() { return next_epoch_number_.fetch_add(1); }
+
+  // Get the next epoch number to be assigned
+  uint64_t GetNextEpochNumber() const { return next_epoch_number_.load(); }
+
+  // Set the next epoch number to be assigned
+  void SetNextEpochNumber(uint64_t next_epoch_number) {
+    next_epoch_number_.store(next_epoch_number);
+  }
+
+  // Reset the next epoch number to be assigned
+  void ResetNextEpochNumber() { next_epoch_number_.store(1); }
+
+  // Recover the next epoch number of this CF and epoch number
+  // of its files (if missing)
+  void RecoverEpochNumbers();
+
+ private:
+  friend class ColumnFamilySet;
+  ColumnFamilyData(uint32_t id, const std::string& name,
+                   Version* dummy_versions, Cache* table_cache,
+                   WriteBufferManager* write_buffer_manager,
+                   const ColumnFamilyOptions& options,
+                   const ImmutableDBOptions& db_options,
+                   const FileOptions* file_options,
+                   ColumnFamilySet* column_family_set,
+                   BlockCacheTracer* const block_cache_tracer,
+                   const std::shared_ptr<IOTracer>& io_tracer,
+                   const std::string& db_id, const std::string& db_session_id);
+
+  std::vector<std::string> GetDbPaths() const;
+
+  uint32_t id_;
+  const std::string name_;
+  Version* dummy_versions_;  // Head of circular doubly-linked list of versions.
+  Version* current_;         // == dummy_versions->prev_
+
+  std::atomic<int> refs_;  // outstanding references to ColumnFamilyData
+  std::atomic<bool> initialized_;
+  std::atomic<bool> dropped_;  // true if client dropped it
+
+  const InternalKeyComparator internal_comparator_;
+  IntTblPropCollectorFactories int_tbl_prop_collector_factories_;
+
+  const ColumnFamilyOptions initial_cf_options_;
+  const ImmutableOptions ioptions_;
+  MutableCFOptions mutable_cf_options_;
+
+  const bool is_delete_range_supported_;
+
+  std::unique_ptr<TableCache> table_cache_;
+  std::unique_ptr<BlobFileCache> blob_file_cache_;
+  std::unique_ptr<BlobSource> blob_source_;
+
+  std::unique_ptr<InternalStats> internal_stats_;
+
+  WriteBufferManager* write_buffer_manager_;
+
+  MemTable* mem_;
+  MemTableList imm_;
+  SuperVersion* super_version_;
+
+  // An ordinal representing the current SuperVersion. Updated by
+  // InstallSuperVersion(), i.e. incremented every time super_version_
+  // changes.
+  std::atomic<uint64_t> super_version_number_;
+
+  // Thread's local copy of SuperVersion pointer
+  // This needs to be destructed before mutex_
+  std::unique_ptr<ThreadLocalPtr> local_sv_;
+
+  // pointers for a circular linked list. we use it to support iterations over
+  // all column families that are alive (note: dropped column families can also
+  // be alive as long as client holds a reference)
+  ColumnFamilyData* next_;
+  ColumnFamilyData* prev_;
+
+  // This is the earliest log file number that contains data from this
+  // Column Family. All earlier log files must be ignored and not
+  // recovered from
+  uint64_t log_number_;
+
+  // An object that keeps all the compaction stats
+  // and picks the next compaction
+  std::unique_ptr<CompactionPicker> compaction_picker_;
+
+  ColumnFamilySet* column_family_set_;
+
+  std::unique_ptr<WriteControllerToken> write_controller_token_;
+
+  // If true --> this ColumnFamily is currently present in DBImpl::flush_queue_
+  bool queued_for_flush_;
+
+  // If true --> this ColumnFamily is currently present in
+  // DBImpl::compaction_queue_
+  bool queued_for_compaction_;
+
+  uint64_t prev_compaction_needed_bytes_;
+
+  // if the database was opened with 2pc enabled
+  bool allow_2pc_;
+
+  // Memtable id to track flush.
+  std::atomic<uint64_t> last_memtable_id_;
+
+  // Directories corresponding to cf_paths.
+  std::vector<std::shared_ptr<FSDirectory>> data_dirs_;
+
+  bool db_paths_registered_;
+
+  std::string full_history_ts_low_;
+
+  // For charging memory usage of file metadata created for newly added files to
+  // a Version associated with this CFD
+  std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr_;
+  bool mempurge_used_;
+
+  std::atomic<uint64_t> next_epoch_number_;
+};
+
+// ColumnFamilySet has interesting thread-safety requirements
+// * CreateColumnFamily() or RemoveColumnFamily() -- need to be protected by DB
+// mutex AND executed in the write thread.
+// CreateColumnFamily() should ONLY be called from VersionSet::LogAndApply() AND
+// single-threaded write thread. It is also called during Recovery and in
+// DumpManifest().
+// RemoveColumnFamily() is only called from SetDropped(). DB mutex needs to be
+// held and it needs to be executed from the write thread. SetDropped() also
+// guarantees that it will be called only from single-threaded LogAndApply(),
+// but this condition is not that important.
+// * Iteration -- hold DB mutex. If you want to release the DB mutex in the
+// body of the iteration, wrap in a RefedColumnFamilySet.
+// * GetDefault() -- thread safe
+// * GetColumnFamily() -- either inside of DB mutex or from a write thread
+// * GetNextColumnFamilyID(), GetMaxColumnFamily(), UpdateMaxColumnFamily(),
+// NumberOfColumnFamilies -- inside of DB mutex
+class ColumnFamilySet {
+ public:
+  // ColumnFamilySet supports iteration
+  class iterator {
+   public:
+    explicit iterator(ColumnFamilyData* cfd) : current_(cfd) {}
+    // NOTE: minimum operators for for-loop iteration
+    iterator& operator++() {
+      current_ = current_->next_;
+      return *this;
+    }
+    bool operator!=(const iterator& other) const {
+      return this->current_ != other.current_;
+    }
+    ColumnFamilyData* operator*() { return current_; }
+
+   private:
+    ColumnFamilyData* current_;
+  };
+
+  ColumnFamilySet(const std::string& dbname,
+                  const ImmutableDBOptions* db_options,
+                  const FileOptions& file_options, Cache* table_cache,
+                  WriteBufferManager* _write_buffer_manager,
+                  WriteController* _write_controller,
+                  BlockCacheTracer* const block_cache_tracer,
+                  const std::shared_ptr<IOTracer>& io_tracer,
+                  const std::string& db_id, const std::string& db_session_id);
+  ~ColumnFamilySet();
+
+  ColumnFamilyData* GetDefault() const;
+  // GetColumnFamily() calls return nullptr if column family is not found
+  ColumnFamilyData* GetColumnFamily(uint32_t id) const;
+  ColumnFamilyData* GetColumnFamily(const std::string& name) const;
+  // this call will return the next available column family ID. it guarantees
+  // that there is no column family with id greater than or equal to the
+  // returned value in the current running instance or anytime in RocksDB
+  // instance history.
+  uint32_t GetNextColumnFamilyID();
+  uint32_t GetMaxColumnFamily();
+  void UpdateMaxColumnFamily(uint32_t new_max_column_family);
+  size_t NumberOfColumnFamilies() const;
+
+  ColumnFamilyData* CreateColumnFamily(const std::string& name, uint32_t id,
+                                       Version* dummy_version,
+                                       const ColumnFamilyOptions& options);
+
+  iterator begin() { return iterator(dummy_cfd_->next_); }
+  iterator end() { return iterator(dummy_cfd_); }
+
+  Cache* get_table_cache() { return table_cache_; }
+
+  WriteBufferManager* write_buffer_manager() { return write_buffer_manager_; }
+
+  WriteController* write_controller() { return write_controller_; }
+
+ private:
+  friend class ColumnFamilyData;
+  // helper function that gets called from cfd destructor
+  // REQUIRES: DB mutex held
+  void RemoveColumnFamily(ColumnFamilyData* cfd);
+
+  // column_families_ and column_family_data_ need to be protected:
+  // * when mutating both conditions have to be satisfied:
+  // 1. DB mutex locked
+  // 2. thread currently in single-threaded write thread
+  // * when reading, at least one condition needs to be satisfied:
+  // 1. DB mutex locked
+  // 2. accessed from a single-threaded write thread
+  UnorderedMap<std::string, uint32_t> column_families_;
+  UnorderedMap<uint32_t, ColumnFamilyData*> column_family_data_;
+
+  uint32_t max_column_family_;
+  const FileOptions file_options_;
+
+  ColumnFamilyData* dummy_cfd_;
+  // We don't hold the refcount here, since default column family always exists
+  // We are also not responsible for cleaning up default_cfd_cache_. This is
+  // just a cache that makes common case (accessing default column family)
+  // faster
+  ColumnFamilyData* default_cfd_cache_;
+
+  const std::string db_name_;
+  const ImmutableDBOptions* const db_options_;
+  Cache* table_cache_;
+  WriteBufferManager* write_buffer_manager_;
+  WriteController* write_controller_;
+  BlockCacheTracer* const block_cache_tracer_;
+  std::shared_ptr<IOTracer> io_tracer_;
+  const std::string& db_id_;
+  std::string db_session_id_;
+};
+
+// A wrapper for ColumnFamilySet that supports releasing DB mutex during each
+// iteration over the iterator, because the cfd is Refed and Unrefed during
+// each iteration to prevent concurrent CF drop from destroying it (until
+// Unref).
+class RefedColumnFamilySet {
+ public:
+  explicit RefedColumnFamilySet(ColumnFamilySet* cfs) : wrapped_(cfs) {}
+
+  class iterator {
+   public:
+    explicit iterator(ColumnFamilySet::iterator wrapped) : wrapped_(wrapped) {
+      MaybeRef(*wrapped_);
+    }
+    ~iterator() { MaybeUnref(*wrapped_); }
+    inline void MaybeRef(ColumnFamilyData* cfd) {
+      if (cfd->GetID() != ColumnFamilyData::kDummyColumnFamilyDataId) {
+        cfd->Ref();
+      }
+    }
+    inline void MaybeUnref(ColumnFamilyData* cfd) {
+      if (cfd->GetID() != ColumnFamilyData::kDummyColumnFamilyDataId) {
+        cfd->UnrefAndTryDelete();
+      }
+    }
+    // NOTE: minimum operators for for-loop iteration
+    inline iterator& operator++() {
+      ColumnFamilyData* old = *wrapped_;
+      ++wrapped_;
+      // Can only unref & potentially free cfd after accessing its next_
+      MaybeUnref(old);
+      MaybeRef(*wrapped_);
+      return *this;
+    }
+    inline bool operator!=(const iterator& other) const {
+      return this->wrapped_ != other.wrapped_;
+    }
+    inline ColumnFamilyData* operator*() { return *wrapped_; }
+
+   private:
+    ColumnFamilySet::iterator wrapped_;
+  };
+
+  iterator begin() { return iterator(wrapped_->begin()); }
+  iterator end() { return iterator(wrapped_->end()); }
+
+ private:
+  ColumnFamilySet* wrapped_;
+};
+
+// We use ColumnFamilyMemTablesImpl to provide WriteBatch a way to access
+// memtables of different column families (specified by ID in the write batch)
+class ColumnFamilyMemTablesImpl : public ColumnFamilyMemTables {
+ public:
+  explicit ColumnFamilyMemTablesImpl(ColumnFamilySet* column_family_set)
+      : column_family_set_(column_family_set), current_(nullptr) {}
+
+  // Constructs a ColumnFamilyMemTablesImpl equivalent to one constructed
+  // with the arguments used to construct *orig.
+  explicit ColumnFamilyMemTablesImpl(ColumnFamilyMemTablesImpl* orig)
+      : column_family_set_(orig->column_family_set_), current_(nullptr) {}
+
+  // sets current_ to ColumnFamilyData with column_family_id
+  // returns false if column family doesn't exist
+  // REQUIRES: use this function of DBImpl::column_family_memtables_ should be
+  //           under a DB mutex OR from a write thread
+  bool Seek(uint32_t column_family_id) override;
+
+  // Returns log number of the selected column family
+  // REQUIRES: under a DB mutex OR from a write thread
+  uint64_t GetLogNumber() const override;
+
+  // REQUIRES: Seek() called first
+  // REQUIRES: use this function of DBImpl::column_family_memtables_ should be
+  //           under a DB mutex OR from a write thread
+  virtual MemTable* GetMemTable() const override;
+
+  // Returns column family handle for the selected column family
+  // REQUIRES: use this function of DBImpl::column_family_memtables_ should be
+  //           under a DB mutex OR from a write thread
+  virtual ColumnFamilyHandle* GetColumnFamilyHandle() override;
+
+  // Cannot be called while another thread is calling Seek().
+  // REQUIRES: use this function of DBImpl::column_family_memtables_ should be
+  //           under a DB mutex OR from a write thread
+  virtual ColumnFamilyData* current() override { return current_; }
+
+ private:
+  ColumnFamilySet* column_family_set_;
+  ColumnFamilyData* current_;
+  ColumnFamilyHandleInternal handle_;
+};
+
+extern uint32_t GetColumnFamilyID(ColumnFamilyHandle* column_family);
+
+extern const Comparator* GetColumnFamilyUserComparator(
+    ColumnFamilyHandle* column_family);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/clipping_iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/clipping_iterator.h
new file mode 100644
index 0000000000..3f50cdd9dd
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/clipping_iterator.h
@@ -0,0 +1,281 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cassert>
+
+#include "rocksdb/comparator.h"
+#include "table/internal_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// An internal iterator that wraps another one and ensures that any keys
+// returned are strictly within a range [start, end). If the underlying
+// iterator has already performed the bounds checking, it relies on that result;
+// otherwise, it performs the necessary key comparisons itself. Both bounds
+// are optional.
+class ClippingIterator : public InternalIterator {
+ public:
+  ClippingIterator(InternalIterator* iter, const Slice* start, const Slice* end,
+                   const CompareInterface* cmp)
+      : iter_(iter), start_(start), end_(end), cmp_(cmp), valid_(false) {
+    assert(iter_);
+    assert(cmp_);
+    assert(!start_ || !end_ || cmp_->Compare(*start_, *end_) <= 0);
+
+    UpdateAndEnforceBounds();
+  }
+
+  bool Valid() const override { return valid_; }
+
+  void SeekToFirst() override {
+    if (start_) {
+      iter_->Seek(*start_);
+    } else {
+      iter_->SeekToFirst();
+    }
+
+    UpdateAndEnforceUpperBound();
+  }
+
+  void SeekToLast() override {
+    if (end_) {
+      iter_->SeekForPrev(*end_);
+
+      // Upper bound is exclusive, so we need a key which is strictly smaller
+      if (iter_->Valid() && cmp_->Compare(iter_->key(), *end_) == 0) {
+        iter_->Prev();
+      }
+    } else {
+      iter_->SeekToLast();
+    }
+
+    UpdateAndEnforceLowerBound();
+  }
+
+  void Seek(const Slice& target) override {
+    if (start_ && cmp_->Compare(target, *start_) < 0) {
+      iter_->Seek(*start_);
+      UpdateAndEnforceUpperBound();
+      return;
+    }
+
+    if (end_ && cmp_->Compare(target, *end_) >= 0) {
+      valid_ = false;
+      return;
+    }
+
+    iter_->Seek(target);
+    UpdateAndEnforceUpperBound();
+  }
+
+  void SeekForPrev(const Slice& target) override {
+    if (start_ && cmp_->Compare(target, *start_) < 0) {
+      valid_ = false;
+      return;
+    }
+
+    if (end_ && cmp_->Compare(target, *end_) >= 0) {
+      iter_->SeekForPrev(*end_);
+
+      // Upper bound is exclusive, so we need a key which is strictly smaller
+      if (iter_->Valid() && cmp_->Compare(iter_->key(), *end_) == 0) {
+        iter_->Prev();
+      }
+
+      UpdateAndEnforceLowerBound();
+      return;
+    }
+
+    iter_->SeekForPrev(target);
+    UpdateAndEnforceLowerBound();
+  }
+
+  void Next() override {
+    assert(valid_);
+    iter_->Next();
+    UpdateAndEnforceUpperBound();
+  }
+
+  bool NextAndGetResult(IterateResult* result) override {
+    assert(valid_);
+    assert(result);
+
+    IterateResult res;
+    valid_ = iter_->NextAndGetResult(&res);
+
+    if (!valid_) {
+      return false;
+    }
+
+    if (end_) {
+      EnforceUpperBoundImpl(res.bound_check_result);
+
+      if (!valid_) {
+        return false;
+      }
+    }
+
+    res.bound_check_result = IterBoundCheck::kInbound;
+    *result = res;
+
+    return true;
+  }
+
+  void Prev() override {
+    assert(valid_);
+    iter_->Prev();
+    UpdateAndEnforceLowerBound();
+  }
+
+  Slice key() const override {
+    assert(valid_);
+    return iter_->key();
+  }
+
+  Slice user_key() const override {
+    assert(valid_);
+    return iter_->user_key();
+  }
+
+  Slice value() const override {
+    assert(valid_);
+    return iter_->value();
+  }
+
+  Status status() const override { return iter_->status(); }
+
+  bool PrepareValue() override {
+    assert(valid_);
+
+    if (iter_->PrepareValue()) {
+      return true;
+    }
+
+    assert(!iter_->Valid());
+    valid_ = false;
+    return false;
+  }
+
+  bool MayBeOutOfLowerBound() override {
+    assert(valid_);
+    return false;
+  }
+
+  IterBoundCheck UpperBoundCheckResult() override {
+    assert(valid_);
+    return IterBoundCheck::kInbound;
+  }
+
+  void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
+    iter_->SetPinnedItersMgr(pinned_iters_mgr);
+  }
+
+  bool IsKeyPinned() const override {
+    assert(valid_);
+    return iter_->IsKeyPinned();
+  }
+
+  bool IsValuePinned() const override {
+    assert(valid_);
+    return iter_->IsValuePinned();
+  }
+
+  Status GetProperty(std::string prop_name, std::string* prop) override {
+    return iter_->GetProperty(prop_name, prop);
+  }
+
+  bool IsDeleteRangeSentinelKey() const override {
+    assert(valid_);
+    return iter_->IsDeleteRangeSentinelKey();
+  }
+
+ private:
+  void UpdateValid() {
+    assert(!iter_->Valid() || iter_->status().ok());
+
+    valid_ = iter_->Valid();
+  }
+
+  void EnforceUpperBoundImpl(IterBoundCheck bound_check_result) {
+    if (bound_check_result == IterBoundCheck::kInbound) {
+      return;
+    }
+
+    if (bound_check_result == IterBoundCheck::kOutOfBound) {
+      valid_ = false;
+      return;
+    }
+
+    assert(bound_check_result == IterBoundCheck::kUnknown);
+
+    if (cmp_->Compare(key(), *end_) >= 0) {
+      valid_ = false;
+    }
+  }
+
+  void EnforceUpperBound() {
+    if (!valid_) {
+      return;
+    }
+
+    if (!end_) {
+      return;
+    }
+
+    EnforceUpperBoundImpl(iter_->UpperBoundCheckResult());
+  }
+
+  void EnforceLowerBound() {
+    if (!valid_) {
+      return;
+    }
+
+    if (!start_) {
+      return;
+    }
+
+    if (!iter_->MayBeOutOfLowerBound()) {
+      return;
+    }
+
+    if (cmp_->Compare(key(), *start_) < 0) {
+      valid_ = false;
+    }
+  }
+
+  void AssertBounds() {
+    assert(!valid_ || !start_ || cmp_->Compare(key(), *start_) >= 0);
+    assert(!valid_ || !end_ || cmp_->Compare(key(), *end_) < 0);
+  }
+
+  void UpdateAndEnforceBounds() {
+    UpdateValid();
+    EnforceUpperBound();
+    EnforceLowerBound();
+    AssertBounds();
+  }
+
+  void UpdateAndEnforceUpperBound() {
+    UpdateValid();
+    EnforceUpperBound();
+    AssertBounds();
+  }
+
+  void UpdateAndEnforceLowerBound() {
+    UpdateValid();
+    EnforceLowerBound();
+    AssertBounds();
+  }
+
+  InternalIterator* iter_;
+  const Slice* start_;
+  const Slice* end_;
+  const CompareInterface* cmp_;
+  bool valid_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction.cc
new file mode 100644
index 0000000000..1832f13031
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction.cc
@@ -0,0 +1,857 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/compaction/compaction.h"
+
+#include <cinttypes>
+#include <vector>
+
+#include "db/column_family.h"
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/sst_partitioner.h"
+#include "test_util/sync_point.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const uint64_t kRangeTombstoneSentinel =
+    PackSequenceAndType(kMaxSequenceNumber, kTypeRangeDeletion);
+
+int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
+                      const InternalKey& b) {
+  auto c = user_cmp->CompareWithoutTimestamp(a.user_key(), b.user_key());
+  if (c != 0) {
+    return c;
+  }
+  auto a_footer = ExtractInternalKeyFooter(a.Encode());
+  auto b_footer = ExtractInternalKeyFooter(b.Encode());
+  if (a_footer == kRangeTombstoneSentinel) {
+    if (b_footer != kRangeTombstoneSentinel) {
+      return -1;
+    }
+  } else if (b_footer == kRangeTombstoneSentinel) {
+    return 1;
+  }
+  return 0;
+}
+
+int sstableKeyCompare(const Comparator* user_cmp, const InternalKey* a,
+                      const InternalKey& b) {
+  if (a == nullptr) {
+    return -1;
+  }
+  return sstableKeyCompare(user_cmp, *a, b);
+}
+
+int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
+                      const InternalKey* b) {
+  if (b == nullptr) {
+    return -1;
+  }
+  return sstableKeyCompare(user_cmp, a, *b);
+}
+
+uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
+  uint64_t sum = 0;
+  for (size_t i = 0; i < files.size() && files[i]; i++) {
+    sum += files[i]->fd.GetFileSize();
+  }
+  return sum;
+}
+
+void Compaction::SetInputVersion(Version* _input_version) {
+  input_version_ = _input_version;
+  cfd_ = input_version_->cfd();
+
+  cfd_->Ref();
+  input_version_->Ref();
+  edit_.SetColumnFamily(cfd_->GetID());
+}
+
+void Compaction::GetBoundaryKeys(
+    VersionStorageInfo* vstorage,
+    const std::vector<CompactionInputFiles>& inputs, Slice* smallest_user_key,
+    Slice* largest_user_key, int exclude_level) {
+  bool initialized = false;
+  const Comparator* ucmp = vstorage->InternalComparator()->user_comparator();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    if (inputs[i].files.empty() || inputs[i].level == exclude_level) {
+      continue;
+    }
+    if (inputs[i].level == 0) {
+      // we need to consider all files on level 0
+      for (const auto* f : inputs[i].files) {
+        const Slice& start_user_key = f->smallest.user_key();
+        if (!initialized ||
+            ucmp->Compare(start_user_key, *smallest_user_key) < 0) {
+          *smallest_user_key = start_user_key;
+        }
+        const Slice& end_user_key = f->largest.user_key();
+        if (!initialized ||
+            ucmp->Compare(end_user_key, *largest_user_key) > 0) {
+          *largest_user_key = end_user_key;
+        }
+        initialized = true;
+      }
+    } else {
+      // we only need to consider the first and last file
+      const Slice& start_user_key = inputs[i].files[0]->smallest.user_key();
+      if (!initialized ||
+          ucmp->Compare(start_user_key, *smallest_user_key) < 0) {
+        *smallest_user_key = start_user_key;
+      }
+      const Slice& end_user_key = inputs[i].files.back()->largest.user_key();
+      if (!initialized || ucmp->Compare(end_user_key, *largest_user_key) > 0) {
+        *largest_user_key = end_user_key;
+      }
+      initialized = true;
+    }
+  }
+}
+
+std::vector<CompactionInputFiles> Compaction::PopulateWithAtomicBoundaries(
+    VersionStorageInfo* vstorage, std::vector<CompactionInputFiles> inputs) {
+  const Comparator* ucmp = vstorage->InternalComparator()->user_comparator();
+  for (size_t i = 0; i < inputs.size(); i++) {
+    if (inputs[i].level == 0 || inputs[i].files.empty()) {
+      continue;
+    }
+    inputs[i].atomic_compaction_unit_boundaries.reserve(inputs[i].files.size());
+    AtomicCompactionUnitBoundary cur_boundary;
+    size_t first_atomic_idx = 0;
+    auto add_unit_boundary = [&](size_t to) {
+      if (first_atomic_idx == to) return;
+      for (size_t k = first_atomic_idx; k < to; k++) {
+        inputs[i].atomic_compaction_unit_boundaries.push_back(cur_boundary);
+      }
+      first_atomic_idx = to;
+    };
+    for (size_t j = 0; j < inputs[i].files.size(); j++) {
+      const auto* f = inputs[i].files[j];
+      if (j == 0) {
+        // First file in a level.
+        cur_boundary.smallest = &f->smallest;
+        cur_boundary.largest = &f->largest;
+      } else if (sstableKeyCompare(ucmp, *cur_boundary.largest, f->smallest) ==
+                 0) {
+        // SSTs overlap but the end key of the previous file was not
+        // artificially extended by a range tombstone. Extend the current
+        // boundary.
+        cur_boundary.largest = &f->largest;
+      } else {
+        // Atomic compaction unit has ended.
+        add_unit_boundary(j);
+        cur_boundary.smallest = &f->smallest;
+        cur_boundary.largest = &f->largest;
+      }
+    }
+    add_unit_boundary(inputs[i].files.size());
+    assert(inputs[i].files.size() ==
+           inputs[i].atomic_compaction_unit_boundaries.size());
+  }
+  return inputs;
+}
+
+// helper function to determine if compaction is creating files at the
+// bottommost level
+bool Compaction::IsBottommostLevel(
+    int output_level, VersionStorageInfo* vstorage,
+    const std::vector<CompactionInputFiles>& inputs) {
+  int output_l0_idx;
+  if (output_level == 0) {
+    output_l0_idx = 0;
+    for (const auto* file : vstorage->LevelFiles(0)) {
+      if (inputs[0].files.back() == file) {
+        break;
+      }
+      ++output_l0_idx;
+    }
+    assert(static_cast<size_t>(output_l0_idx) < vstorage->LevelFiles(0).size());
+  } else {
+    output_l0_idx = -1;
+  }
+  Slice smallest_key, largest_key;
+  GetBoundaryKeys(vstorage, inputs, &smallest_key, &largest_key);
+  return !vstorage->RangeMightExistAfterSortedRun(smallest_key, largest_key,
+                                                  output_level, output_l0_idx);
+}
+
+// test function to validate the functionality of IsBottommostLevel()
+// function -- determines if compaction with inputs and storage is bottommost
+bool Compaction::TEST_IsBottommostLevel(
+    int output_level, VersionStorageInfo* vstorage,
+    const std::vector<CompactionInputFiles>& inputs) {
+  return IsBottommostLevel(output_level, vstorage, inputs);
+}
+
+bool Compaction::IsFullCompaction(
+    VersionStorageInfo* vstorage,
+    const std::vector<CompactionInputFiles>& inputs) {
+  size_t num_files_in_compaction = 0;
+  size_t total_num_files = 0;
+  for (int l = 0; l < vstorage->num_levels(); l++) {
+    total_num_files += vstorage->NumLevelFiles(l);
+  }
+  for (size_t i = 0; i < inputs.size(); i++) {
+    num_files_in_compaction += inputs[i].size();
+  }
+  return num_files_in_compaction == total_num_files;
+}
+
+Compaction::Compaction(
+    VersionStorageInfo* vstorage, const ImmutableOptions& _immutable_options,
+    const MutableCFOptions& _mutable_cf_options,
+    const MutableDBOptions& _mutable_db_options,
+    std::vector<CompactionInputFiles> _inputs, int _output_level,
+    uint64_t _target_file_size, uint64_t _max_compaction_bytes,
+    uint32_t _output_path_id, CompressionType _compression,
+    CompressionOptions _compression_opts, Temperature _output_temperature,
+    uint32_t _max_subcompactions, std::vector<FileMetaData*> _grandparents,
+    bool _manual_compaction, const std::string& _trim_ts, double _score,
+    bool _deletion_compaction, bool l0_files_might_overlap,
+    CompactionReason _compaction_reason,
+    BlobGarbageCollectionPolicy _blob_garbage_collection_policy,
+    double _blob_garbage_collection_age_cutoff)
+    : input_vstorage_(vstorage),
+      start_level_(_inputs[0].level),
+      output_level_(_output_level),
+      target_output_file_size_(_target_file_size),
+      max_compaction_bytes_(_max_compaction_bytes),
+      max_subcompactions_(_max_subcompactions),
+      immutable_options_(_immutable_options),
+      mutable_cf_options_(_mutable_cf_options),
+      input_version_(nullptr),
+      number_levels_(vstorage->num_levels()),
+      cfd_(nullptr),
+      output_path_id_(_output_path_id),
+      output_compression_(_compression),
+      output_compression_opts_(_compression_opts),
+      output_temperature_(_output_temperature),
+      deletion_compaction_(_deletion_compaction),
+      l0_files_might_overlap_(l0_files_might_overlap),
+      inputs_(PopulateWithAtomicBoundaries(vstorage, std::move(_inputs))),
+      grandparents_(std::move(_grandparents)),
+      score_(_score),
+      bottommost_level_(
+          // For simplicity, we don't support the concept of "bottommost level"
+          // with
+          // `CompactionReason::kExternalSstIngestion` and
+          // `CompactionReason::kRefitLevel`
+          (_compaction_reason == CompactionReason::kExternalSstIngestion ||
+           _compaction_reason == CompactionReason::kRefitLevel)
+              ? false
+              : IsBottommostLevel(output_level_, vstorage, inputs_)),
+      is_full_compaction_(IsFullCompaction(vstorage, inputs_)),
+      is_manual_compaction_(_manual_compaction),
+      trim_ts_(_trim_ts),
+      is_trivial_move_(false),
+      compaction_reason_(_compaction_reason),
+      notify_on_compaction_completion_(false),
+      enable_blob_garbage_collection_(
+          _blob_garbage_collection_policy == BlobGarbageCollectionPolicy::kForce
+              ? true
+              : (_blob_garbage_collection_policy ==
+                         BlobGarbageCollectionPolicy::kDisable
+                     ? false
+                     : mutable_cf_options()->enable_blob_garbage_collection)),
+      blob_garbage_collection_age_cutoff_(
+          _blob_garbage_collection_age_cutoff < 0 ||
+                  _blob_garbage_collection_age_cutoff > 1
+              ? mutable_cf_options()->blob_garbage_collection_age_cutoff
+              : _blob_garbage_collection_age_cutoff),
+      penultimate_level_(
+          // For simplicity, we don't support the concept of "penultimate level"
+          // with `CompactionReason::kExternalSstIngestion` and
+          // `CompactionReason::kRefitLevel`
+          _compaction_reason == CompactionReason::kExternalSstIngestion ||
+                  _compaction_reason == CompactionReason::kRefitLevel
+              ? Compaction::kInvalidLevel
+              : EvaluatePenultimateLevel(vstorage, immutable_options_,
+                                         start_level_, output_level_)) {
+  MarkFilesBeingCompacted(true);
+  if (is_manual_compaction_) {
+    compaction_reason_ = CompactionReason::kManualCompaction;
+  }
+  if (max_subcompactions_ == 0) {
+    max_subcompactions_ = _mutable_db_options.max_subcompactions;
+  }
+
+  // for the non-bottommost levels, it tries to build files match the target
+  // file size, but not guaranteed. It could be 2x the size of the target size.
+  max_output_file_size_ =
+      bottommost_level_ || grandparents_.empty() ||
+              !_immutable_options.level_compaction_dynamic_file_size
+          ? target_output_file_size_
+          : 2 * target_output_file_size_;
+
+#ifndef NDEBUG
+  for (size_t i = 1; i < inputs_.size(); ++i) {
+    assert(inputs_[i].level > inputs_[i - 1].level);
+  }
+#endif
+
+  // setup input_levels_
+  {
+    input_levels_.resize(num_input_levels());
+    for (size_t which = 0; which < num_input_levels(); which++) {
+      DoGenerateLevelFilesBrief(&input_levels_[which], inputs_[which].files,
+                                &arena_);
+    }
+  }
+
+  GetBoundaryKeys(vstorage, inputs_, &smallest_user_key_, &largest_user_key_);
+
+  // Every compaction regardless of any compaction reason may respect the
+  // existing compact cursor in the output level to split output files
+  output_split_key_ = nullptr;
+  if (immutable_options_.compaction_style == kCompactionStyleLevel &&
+      immutable_options_.compaction_pri == kRoundRobin) {
+    const InternalKey* cursor =
+        &input_vstorage_->GetCompactCursors()[output_level_];
+    if (cursor->size() != 0) {
+      const Slice& cursor_user_key = ExtractUserKey(cursor->Encode());
+      auto ucmp = vstorage->InternalComparator()->user_comparator();
+      // May split output files according to the cursor if it in the user-key
+      // range
+      if (ucmp->CompareWithoutTimestamp(cursor_user_key, smallest_user_key_) >
+              0 &&
+          ucmp->CompareWithoutTimestamp(cursor_user_key, largest_user_key_) <=
+              0) {
+        output_split_key_ = cursor;
+      }
+    }
+  }
+
+  PopulatePenultimateLevelOutputRange();
+}
+
+void Compaction::PopulatePenultimateLevelOutputRange() {
+  if (!SupportsPerKeyPlacement()) {
+    return;
+  }
+
+  // exclude the last level, the range of all input levels is the safe range
+  // of keys that can be moved up.
+  int exclude_level = number_levels_ - 1;
+  penultimate_output_range_type_ = PenultimateOutputRangeType::kNonLastRange;
+
+  // For universal compaction, the penultimate_output_range could be extended if
+  // all penultimate level files are included in the compaction (which includes
+  // the case that the penultimate level is empty).
+  if (immutable_options_.compaction_style == kCompactionStyleUniversal) {
+    exclude_level = kInvalidLevel;
+    penultimate_output_range_type_ = PenultimateOutputRangeType::kFullRange;
+    std::set<uint64_t> penultimate_inputs;
+    for (const auto& input_lvl : inputs_) {
+      if (input_lvl.level == penultimate_level_) {
+        for (const auto& file : input_lvl.files) {
+          penultimate_inputs.emplace(file->fd.GetNumber());
+        }
+      }
+    }
+    auto penultimate_files = input_vstorage_->LevelFiles(penultimate_level_);
+    for (const auto& file : penultimate_files) {
+      if (penultimate_inputs.find(file->fd.GetNumber()) ==
+          penultimate_inputs.end()) {
+        exclude_level = number_levels_ - 1;
+        penultimate_output_range_type_ =
+            PenultimateOutputRangeType::kNonLastRange;
+        break;
+      }
+    }
+  }
+
+  GetBoundaryKeys(input_vstorage_, inputs_,
+                  &penultimate_level_smallest_user_key_,
+                  &penultimate_level_largest_user_key_, exclude_level);
+}
+
+Compaction::~Compaction() {
+  if (input_version_ != nullptr) {
+    input_version_->Unref();
+  }
+  if (cfd_ != nullptr) {
+    cfd_->UnrefAndTryDelete();
+  }
+}
+
+bool Compaction::SupportsPerKeyPlacement() const {
+  return penultimate_level_ != kInvalidLevel;
+}
+
+int Compaction::GetPenultimateLevel() const { return penultimate_level_; }
+
+// smallest_key and largest_key include timestamps if user-defined timestamp is
+// enabled.
+bool Compaction::OverlapPenultimateLevelOutputRange(
+    const Slice& smallest_key, const Slice& largest_key) const {
+  if (!SupportsPerKeyPlacement()) {
+    return false;
+  }
+  const Comparator* ucmp =
+      input_vstorage_->InternalComparator()->user_comparator();
+
+  return ucmp->CompareWithoutTimestamp(
+             smallest_key, penultimate_level_largest_user_key_) <= 0 &&
+         ucmp->CompareWithoutTimestamp(
+             largest_key, penultimate_level_smallest_user_key_) >= 0;
+}
+
+// key includes timestamp if user-defined timestamp is enabled.
+bool Compaction::WithinPenultimateLevelOutputRange(const Slice& key) const {
+  if (!SupportsPerKeyPlacement()) {
+    return false;
+  }
+
+  if (penultimate_level_smallest_user_key_.empty() ||
+      penultimate_level_largest_user_key_.empty()) {
+    return false;
+  }
+
+  const Comparator* ucmp =
+      input_vstorage_->InternalComparator()->user_comparator();
+
+  return ucmp->CompareWithoutTimestamp(
+             key, penultimate_level_smallest_user_key_) >= 0 &&
+         ucmp->CompareWithoutTimestamp(
+             key, penultimate_level_largest_user_key_) <= 0;
+}
+
+bool Compaction::InputCompressionMatchesOutput() const {
+  int base_level = input_vstorage_->base_level();
+  bool matches =
+      (GetCompressionType(input_vstorage_, mutable_cf_options_, start_level_,
+                          base_level) == output_compression_);
+  if (matches) {
+    TEST_SYNC_POINT("Compaction::InputCompressionMatchesOutput:Matches");
+    return true;
+  }
+  TEST_SYNC_POINT("Compaction::InputCompressionMatchesOutput:DidntMatch");
+  return matches;
+}
+
+bool Compaction::IsTrivialMove() const {
+  // Avoid a move if there is lots of overlapping grandparent data.
+  // Otherwise, the move could create a parent file that will require
+  // a very expensive merge later on.
+  // If start_level_== output_level_, the purpose is to force compaction
+  // filter to be applied to that level, and thus cannot be a trivial move.
+
+  // Check if start level have files with overlapping ranges
+  if (start_level_ == 0 && input_vstorage_->level0_non_overlapping() == false &&
+      l0_files_might_overlap_) {
+    // We cannot move files from L0 to L1 if the L0 files in the LSM-tree are
+    // overlapping, unless we are sure that files picked in L0 don't overlap.
+    return false;
+  }
+
+  if (is_manual_compaction_ &&
+      (immutable_options_.compaction_filter != nullptr ||
+       immutable_options_.compaction_filter_factory != nullptr)) {
+    // This is a manual compaction and we have a compaction filter that should
+    // be executed, we cannot do a trivial move
+    return false;
+  }
+
+  if (start_level_ == output_level_) {
+    // It doesn't make sense if compaction picker picks files just to trivial
+    // move to the same level.
+    return false;
+  }
+
+  if (compaction_reason_ == CompactionReason::kChangeTemperature) {
+    // Changing temperature usually requires rewriting the file.
+    return false;
+  }
+
+  // Used in universal compaction, where trivial move can be done if the
+  // input files are non overlapping
+  if ((mutable_cf_options_.compaction_options_universal.allow_trivial_move) &&
+      (output_level_ != 0) &&
+      (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal)) {
+    return is_trivial_move_;
+  }
+
+  if (!(start_level_ != output_level_ && num_input_levels() == 1 &&
+        input(0, 0)->fd.GetPathId() == output_path_id() &&
+        InputCompressionMatchesOutput())) {
+    return false;
+  }
+
+  // assert inputs_.size() == 1
+
+  std::unique_ptr<SstPartitioner> partitioner = CreateSstPartitioner();
+
+  for (const auto& file : inputs_.front().files) {
+    std::vector<FileMetaData*> file_grand_parents;
+    if (output_level_ + 1 >= number_levels_) {
+      continue;
+    }
+    input_vstorage_->GetOverlappingInputs(output_level_ + 1, &file->smallest,
+                                          &file->largest, &file_grand_parents);
+    const auto compaction_size =
+        file->fd.GetFileSize() + TotalFileSize(file_grand_parents);
+    if (compaction_size > max_compaction_bytes_) {
+      return false;
+    }
+
+    if (partitioner.get() != nullptr) {
+      if (!partitioner->CanDoTrivialMove(file->smallest.user_key(),
+                                         file->largest.user_key())) {
+        return false;
+      }
+    }
+  }
+
+  // PerKeyPlacement compaction should never be trivial move.
+  if (SupportsPerKeyPlacement()) {
+    return false;
+  }
+
+  return true;
+}
+
+void Compaction::AddInputDeletions(VersionEdit* out_edit) {
+  for (size_t which = 0; which < num_input_levels(); which++) {
+    for (size_t i = 0; i < inputs_[which].size(); i++) {
+      out_edit->DeleteFile(level(which), inputs_[which][i]->fd.GetNumber());
+    }
+  }
+}
+
+bool Compaction::KeyNotExistsBeyondOutputLevel(
+    const Slice& user_key, std::vector<size_t>* level_ptrs) const {
+  assert(input_version_ != nullptr);
+  assert(level_ptrs != nullptr);
+  assert(level_ptrs->size() == static_cast<size_t>(number_levels_));
+  if (bottommost_level_) {
+    return true;
+  } else if (output_level_ != 0 &&
+             cfd_->ioptions()->compaction_style == kCompactionStyleLevel) {
+    // Maybe use binary search to find right entry instead of linear search?
+    const Comparator* user_cmp = cfd_->user_comparator();
+    for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) {
+      const std::vector<FileMetaData*>& files =
+          input_vstorage_->LevelFiles(lvl);
+      for (; level_ptrs->at(lvl) < files.size(); level_ptrs->at(lvl)++) {
+        auto* f = files[level_ptrs->at(lvl)];
+        if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) {
+          // We've advanced far enough
+          // In the presence of user-defined timestamp, we may need to handle
+          // the case in which f->smallest.user_key() (including ts) has the
+          // same user key, but the ts part is smaller. If so,
+          // Compare(user_key, f->smallest.user_key()) returns -1.
+          // That's why we need CompareWithoutTimestamp().
+          if (user_cmp->CompareWithoutTimestamp(user_key,
+                                                f->smallest.user_key()) >= 0) {
+            // Key falls in this file's range, so it may
+            // exist beyond output level
+            return false;
+          }
+          break;
+        }
+      }
+    }
+    return true;
+  }
+  return false;
+}
+
+// Mark (or clear) each file that is being compacted
+void Compaction::MarkFilesBeingCompacted(bool mark_as_compacted) {
+  for (size_t i = 0; i < num_input_levels(); i++) {
+    for (size_t j = 0; j < inputs_[i].size(); j++) {
+      assert(mark_as_compacted ? !inputs_[i][j]->being_compacted
+                               : inputs_[i][j]->being_compacted);
+      inputs_[i][j]->being_compacted = mark_as_compacted;
+    }
+  }
+}
+
+// Sample output:
+// If compacting 3 L0 files, 2 L3 files and 1 L4 file, and outputting to L5,
+// print: "3@0 + 2@3 + 1@4 files to L5"
+const char* Compaction::InputLevelSummary(
+    InputLevelSummaryBuffer* scratch) const {
+  int len = 0;
+  bool is_first = true;
+  for (auto& input_level : inputs_) {
+    if (input_level.empty()) {
+      continue;
+    }
+    if (!is_first) {
+      len +=
+          snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, " + ");
+      len = std::min(len, static_cast<int>(sizeof(scratch->buffer)));
+    } else {
+      is_first = false;
+    }
+    len += snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len,
+                    "%" ROCKSDB_PRIszt "@%d", input_level.size(),
+                    input_level.level);
+    len = std::min(len, static_cast<int>(sizeof(scratch->buffer)));
+  }
+  snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len,
+           " files to L%d", output_level());
+
+  return scratch->buffer;
+}
+
+uint64_t Compaction::CalculateTotalInputSize() const {
+  uint64_t size = 0;
+  for (auto& input_level : inputs_) {
+    for (auto f : input_level.files) {
+      size += f->fd.GetFileSize();
+    }
+  }
+  return size;
+}
+
+void Compaction::ReleaseCompactionFiles(Status status) {
+  MarkFilesBeingCompacted(false);
+  cfd_->compaction_picker()->ReleaseCompactionFiles(this, status);
+}
+
+void Compaction::ResetNextCompactionIndex() {
+  assert(input_version_ != nullptr);
+  input_vstorage_->ResetNextCompactionIndex(start_level_);
+}
+
+namespace {
+int InputSummary(const std::vector<FileMetaData*>& files, char* output,
+                 int len) {
+  *output = '\0';
+  int write = 0;
+  for (size_t i = 0; i < files.size(); i++) {
+    int sz = len - write;
+    int ret;
+    char sztxt[16];
+    AppendHumanBytes(files.at(i)->fd.GetFileSize(), sztxt, 16);
+    ret = snprintf(output + write, sz, "%" PRIu64 "(%s) ",
+                   files.at(i)->fd.GetNumber(), sztxt);
+    if (ret < 0 || ret >= sz) break;
+    write += ret;
+  }
+  // if files.size() is non-zero, overwrite the last space
+  return write - !!files.size();
+}
+}  // namespace
+
+void Compaction::Summary(char* output, int len) {
+  int write =
+      snprintf(output, len, "Base version %" PRIu64 " Base level %d, inputs: [",
+               input_version_->GetVersionNumber(), start_level_);
+  if (write < 0 || write >= len) {
+    return;
+  }
+
+  for (size_t level_iter = 0; level_iter < num_input_levels(); ++level_iter) {
+    if (level_iter > 0) {
+      write += snprintf(output + write, len - write, "], [");
+      if (write < 0 || write >= len) {
+        return;
+      }
+    }
+    write +=
+        InputSummary(inputs_[level_iter].files, output + write, len - write);
+    if (write < 0 || write >= len) {
+      return;
+    }
+  }
+
+  snprintf(output + write, len - write, "]");
+}
+
+uint64_t Compaction::OutputFilePreallocationSize() const {
+  uint64_t preallocation_size = 0;
+
+  for (const auto& level_files : inputs_) {
+    for (const auto& file : level_files.files) {
+      preallocation_size += file->fd.GetFileSize();
+    }
+  }
+
+  if (max_output_file_size_ != std::numeric_limits<uint64_t>::max() &&
+      (immutable_options_.compaction_style == kCompactionStyleLevel ||
+       output_level() > 0)) {
+    preallocation_size = std::min(max_output_file_size_, preallocation_size);
+  }
+
+  // Over-estimate slightly so we don't end up just barely crossing
+  // the threshold
+  // No point to preallocate more than 1GB.
+  return std::min(uint64_t{1073741824},
+                  preallocation_size + (preallocation_size / 10));
+}
+
+std::unique_ptr<CompactionFilter> Compaction::CreateCompactionFilter() const {
+  if (!cfd_->ioptions()->compaction_filter_factory) {
+    return nullptr;
+  }
+
+  if (!cfd_->ioptions()
+           ->compaction_filter_factory->ShouldFilterTableFileCreation(
+               TableFileCreationReason::kCompaction)) {
+    return nullptr;
+  }
+
+  CompactionFilter::Context context;
+  context.is_full_compaction = is_full_compaction_;
+  context.is_manual_compaction = is_manual_compaction_;
+  context.column_family_id = cfd_->GetID();
+  context.reason = TableFileCreationReason::kCompaction;
+  return cfd_->ioptions()->compaction_filter_factory->CreateCompactionFilter(
+      context);
+}
+
+std::unique_ptr<SstPartitioner> Compaction::CreateSstPartitioner() const {
+  if (!immutable_options_.sst_partitioner_factory) {
+    return nullptr;
+  }
+
+  SstPartitioner::Context context;
+  context.is_full_compaction = is_full_compaction_;
+  context.is_manual_compaction = is_manual_compaction_;
+  context.output_level = output_level_;
+  context.smallest_user_key = smallest_user_key_;
+  context.largest_user_key = largest_user_key_;
+  return immutable_options_.sst_partitioner_factory->CreatePartitioner(context);
+}
+
+bool Compaction::IsOutputLevelEmpty() const {
+  return inputs_.back().level != output_level_ || inputs_.back().empty();
+}
+
+bool Compaction::ShouldFormSubcompactions() const {
+  if (cfd_ == nullptr) {
+    return false;
+  }
+
+  // Round-Robin pri under leveled compaction allows subcompactions by default
+  // and the number of subcompactions can be larger than max_subcompactions_
+  if (cfd_->ioptions()->compaction_pri == kRoundRobin &&
+      cfd_->ioptions()->compaction_style == kCompactionStyleLevel) {
+    return output_level_ > 0;
+  }
+
+  if (max_subcompactions_ <= 1) {
+    return false;
+  }
+
+  if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel) {
+    return (start_level_ == 0 || is_manual_compaction_) && output_level_ > 0;
+  } else if (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal) {
+    return number_levels_ > 1 && output_level_ > 0;
+  } else {
+    return false;
+  }
+}
+
+bool Compaction::DoesInputReferenceBlobFiles() const {
+  assert(input_version_);
+
+  const VersionStorageInfo* storage_info = input_version_->storage_info();
+  assert(storage_info);
+
+  if (storage_info->GetBlobFiles().empty()) {
+    return false;
+  }
+
+  for (size_t i = 0; i < inputs_.size(); ++i) {
+    for (const FileMetaData* meta : inputs_[i].files) {
+      assert(meta);
+
+      if (meta->oldest_blob_file_number != kInvalidBlobFileNumber) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+uint64_t Compaction::MinInputFileOldestAncesterTime(
+    const InternalKey* start, const InternalKey* end) const {
+  uint64_t min_oldest_ancester_time = std::numeric_limits<uint64_t>::max();
+  const InternalKeyComparator& icmp =
+      column_family_data()->internal_comparator();
+  for (const auto& level_files : inputs_) {
+    for (const auto& file : level_files.files) {
+      if (start != nullptr && icmp.Compare(file->largest, *start) < 0) {
+        continue;
+      }
+      if (end != nullptr && icmp.Compare(file->smallest, *end) > 0) {
+        continue;
+      }
+      uint64_t oldest_ancester_time = file->TryGetOldestAncesterTime();
+      if (oldest_ancester_time != 0) {
+        min_oldest_ancester_time =
+            std::min(min_oldest_ancester_time, oldest_ancester_time);
+      }
+    }
+  }
+  return min_oldest_ancester_time;
+}
+
+uint64_t Compaction::MinInputFileEpochNumber() const {
+  uint64_t min_epoch_number = std::numeric_limits<uint64_t>::max();
+  for (const auto& inputs_per_level : inputs_) {
+    for (const auto& file : inputs_per_level.files) {
+      min_epoch_number = std::min(min_epoch_number, file->epoch_number);
+    }
+  }
+  return min_epoch_number;
+}
+
+int Compaction::EvaluatePenultimateLevel(
+    const VersionStorageInfo* vstorage,
+    const ImmutableOptions& immutable_options, const int start_level,
+    const int output_level) {
+  // TODO: currently per_key_placement feature only support level and universal
+  //  compaction
+  if (immutable_options.compaction_style != kCompactionStyleLevel &&
+      immutable_options.compaction_style != kCompactionStyleUniversal) {
+    return kInvalidLevel;
+  }
+  if (output_level != immutable_options.num_levels - 1) {
+    return kInvalidLevel;
+  }
+
+  int penultimate_level = output_level - 1;
+  assert(penultimate_level < immutable_options.num_levels);
+  if (penultimate_level <= 0) {
+    return kInvalidLevel;
+  }
+
+  // If the penultimate level is not within input level -> output level range
+  // check if the penultimate output level is empty, if it's empty, it could
+  // also be locked for the penultimate output.
+  // TODO: ideally, it only needs to check if there's a file within the
+  //  compaction output key range. For simplicity, it just check if there's any
+  //  file on the penultimate level.
+  if (start_level == immutable_options.num_levels - 1 &&
+      (immutable_options.compaction_style != kCompactionStyleUniversal ||
+       !vstorage->LevelFiles(penultimate_level).empty())) {
+    return kInvalidLevel;
+  }
+
+  bool supports_per_key_placement =
+      immutable_options.preclude_last_level_data_seconds > 0;
+
+  // it could be overridden by unittest
+  TEST_SYNC_POINT_CALLBACK("Compaction::SupportsPerKeyPlacement:Enabled",
+                           &supports_per_key_placement);
+  if (!supports_per_key_placement) {
+    return kInvalidLevel;
+  }
+
+  return penultimate_level;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction.h
new file mode 100644
index 0000000000..4fa1dce71a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction.h
@@ -0,0 +1,565 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include "db/version_set.h"
+#include "memory/arena.h"
+#include "options/cf_options.h"
+#include "rocksdb/sst_partitioner.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+// The file contains class Compaction, as well as some helper functions
+// and data structures used by the class.
+
+// Utility for comparing sstable boundary keys. Returns -1 if either a or b is
+// null which provides the property that a==null indicates a key that is less
+// than any key and b==null indicates a key that is greater than any key. Note
+// that the comparison is performed primarily on the user-key portion of the
+// key. If the user-keys compare equal, an additional test is made to sort
+// range tombstone sentinel keys before other keys with the same user-key. The
+// result is that 2 user-keys will compare equal if they differ purely on
+// their sequence number and value, but the range tombstone sentinel for that
+// user-key will compare not equal. This is necessary because the range
+// tombstone sentinel key is set as the largest key for an sstable even though
+// that key never appears in the database. We don't want adjacent sstables to
+// be considered overlapping if they are separated by the range tombstone
+// sentinel.
+int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
+                      const InternalKey& b);
+int sstableKeyCompare(const Comparator* user_cmp, const InternalKey* a,
+                      const InternalKey& b);
+int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
+                      const InternalKey* b);
+
+// An AtomicCompactionUnitBoundary represents a range of keys [smallest,
+// largest] that exactly spans one ore more neighbouring SSTs on the same
+// level. Every pair of  SSTs in this range "overlap" (i.e., the largest
+// user key of one file is the smallest user key of the next file). These
+// boundaries are propagated down to RangeDelAggregator during compaction
+// to provide safe truncation boundaries for range tombstones.
+struct AtomicCompactionUnitBoundary {
+  const InternalKey* smallest = nullptr;
+  const InternalKey* largest = nullptr;
+};
+
+// The structure that manages compaction input files associated
+// with the same physical level.
+struct CompactionInputFiles {
+  int level;
+  std::vector<FileMetaData*> files;
+  std::vector<AtomicCompactionUnitBoundary> atomic_compaction_unit_boundaries;
+  inline bool empty() const { return files.empty(); }
+  inline size_t size() const { return files.size(); }
+  inline void clear() { files.clear(); }
+  inline FileMetaData* operator[](size_t i) const { return files[i]; }
+};
+
+class Version;
+class ColumnFamilyData;
+class VersionStorageInfo;
+class CompactionFilter;
+
+// A Compaction encapsulates metadata about a compaction.
+class Compaction {
+ public:
+  Compaction(VersionStorageInfo* input_version,
+             const ImmutableOptions& immutable_options,
+             const MutableCFOptions& mutable_cf_options,
+             const MutableDBOptions& mutable_db_options,
+             std::vector<CompactionInputFiles> inputs, int output_level,
+             uint64_t target_file_size, uint64_t max_compaction_bytes,
+             uint32_t output_path_id, CompressionType compression,
+             CompressionOptions compression_opts,
+             Temperature output_temperature, uint32_t max_subcompactions,
+             std::vector<FileMetaData*> grandparents,
+             bool manual_compaction = false, const std::string& trim_ts = "",
+             double score = -1, bool deletion_compaction = false,
+             bool l0_files_might_overlap = true,
+             CompactionReason compaction_reason = CompactionReason::kUnknown,
+             BlobGarbageCollectionPolicy blob_garbage_collection_policy =
+                 BlobGarbageCollectionPolicy::kUseDefault,
+             double blob_garbage_collection_age_cutoff = -1);
+
+  // The type of the penultimate level output range
+  enum class PenultimateOutputRangeType : int {
+    kNotSupported,  // it cannot output to the penultimate level
+    kFullRange,     // any data could be output to the penultimate level
+    kNonLastRange,  // only the keys within non_last_level compaction inputs can
+                    // be outputted to the penultimate level
+    kDisabled,      // no data can be outputted to the penultimate level
+  };
+
+  // No copying allowed
+  Compaction(const Compaction&) = delete;
+  void operator=(const Compaction&) = delete;
+
+  ~Compaction();
+
+  // Returns the level associated to the specified compaction input level.
+  // If compaction_input_level is not specified, then input_level is set to 0.
+  int level(size_t compaction_input_level = 0) const {
+    return inputs_[compaction_input_level].level;
+  }
+
+  int start_level() const { return start_level_; }
+
+  // Outputs will go to this level
+  int output_level() const { return output_level_; }
+
+  // Returns the number of input levels in this compaction.
+  size_t num_input_levels() const { return inputs_.size(); }
+
+  // Return the object that holds the edits to the descriptor done
+  // by this compaction.
+  VersionEdit* edit() { return &edit_; }
+
+  // Returns the number of input files associated to the specified
+  // compaction input level.
+  // The function will return 0 if when "compaction_input_level" < 0
+  // or "compaction_input_level" >= "num_input_levels()".
+  size_t num_input_files(size_t compaction_input_level) const {
+    if (compaction_input_level < inputs_.size()) {
+      return inputs_[compaction_input_level].size();
+    }
+    return 0;
+  }
+
+  // Returns input version of the compaction
+  Version* input_version() const { return input_version_; }
+
+  // Returns the ColumnFamilyData associated with the compaction.
+  ColumnFamilyData* column_family_data() const { return cfd_; }
+
+  // Returns the file meta data of the 'i'th input file at the
+  // specified compaction input level.
+  // REQUIREMENT: "compaction_input_level" must be >= 0 and
+  //              < "input_levels()"
+  FileMetaData* input(size_t compaction_input_level, size_t i) const {
+    assert(compaction_input_level < inputs_.size());
+    return inputs_[compaction_input_level][i];
+  }
+
+  const std::vector<AtomicCompactionUnitBoundary>* boundaries(
+      size_t compaction_input_level) const {
+    assert(compaction_input_level < inputs_.size());
+    return &inputs_[compaction_input_level].atomic_compaction_unit_boundaries;
+  }
+
+  // Returns the list of file meta data of the specified compaction
+  // input level.
+  // REQUIREMENT: "compaction_input_level" must be >= 0 and
+  //              < "input_levels()"
+  const std::vector<FileMetaData*>* inputs(
+      size_t compaction_input_level) const {
+    assert(compaction_input_level < inputs_.size());
+    return &inputs_[compaction_input_level].files;
+  }
+
+  const std::vector<CompactionInputFiles>* inputs() { return &inputs_; }
+
+  // Returns the LevelFilesBrief of the specified compaction input level.
+  const LevelFilesBrief* input_levels(size_t compaction_input_level) const {
+    return &input_levels_[compaction_input_level];
+  }
+
+  // Maximum size of files to build during this compaction.
+  uint64_t max_output_file_size() const { return max_output_file_size_; }
+
+  // Target output file size for this compaction
+  uint64_t target_output_file_size() const { return target_output_file_size_; }
+
+  // What compression for output
+  CompressionType output_compression() const { return output_compression_; }
+
+  // What compression options for output
+  const CompressionOptions& output_compression_opts() const {
+    return output_compression_opts_;
+  }
+
+  // Whether need to write output file to second DB path.
+  uint32_t output_path_id() const { return output_path_id_; }
+
+  // Is this a trivial compaction that can be implemented by just
+  // moving a single input file to the next level (no merging or splitting)
+  bool IsTrivialMove() const;
+
+  // The split user key in the output level if this compaction is required to
+  // split the output files according to the existing cursor in the output
+  // level under round-robin compaction policy. Empty indicates no required
+  // splitting key
+  const InternalKey* GetOutputSplitKey() const { return output_split_key_; }
+
+  // If true, then the compaction can be done by simply deleting input files.
+  bool deletion_compaction() const { return deletion_compaction_; }
+
+  // Add all inputs to this compaction as delete operations to *edit.
+  void AddInputDeletions(VersionEdit* edit);
+
+  // Returns true if the available information we have guarantees that
+  // the input "user_key" does not exist in any level beyond "output_level()".
+  bool KeyNotExistsBeyondOutputLevel(const Slice& user_key,
+                                     std::vector<size_t>* level_ptrs) const;
+
+  // Clear all files to indicate that they are not being compacted
+  // Delete this compaction from the list of running compactions.
+  //
+  // Requirement: DB mutex held
+  void ReleaseCompactionFiles(Status status);
+
+  // Returns the summary of the compaction in "output" with maximum "len"
+  // in bytes.  The caller is responsible for the memory management of
+  // "output".
+  void Summary(char* output, int len);
+
+  // Return the score that was used to pick this compaction run.
+  double score() const { return score_; }
+
+  // Is this compaction creating a file in the bottom most level?
+  bool bottommost_level() const { return bottommost_level_; }
+
+  // Is the compaction compact to the last level
+  bool is_last_level() const {
+    return output_level_ == immutable_options_.num_levels - 1;
+  }
+
+  // Does this compaction include all sst files?
+  bool is_full_compaction() const { return is_full_compaction_; }
+
+  // Was this compaction triggered manually by the client?
+  bool is_manual_compaction() const { return is_manual_compaction_; }
+
+  std::string trim_ts() const { return trim_ts_; }
+
+  // Used when allow_trivial_move option is set in
+  // Universal compaction. If all the input files are
+  // non overlapping, then is_trivial_move_ variable
+  // will be set true, else false
+  void set_is_trivial_move(bool trivial_move) {
+    is_trivial_move_ = trivial_move;
+  }
+
+  // Used when allow_trivial_move option is set in
+  // Universal compaction. Returns true, if the input files
+  // are non-overlapping and can be trivially moved.
+  bool is_trivial_move() const { return is_trivial_move_; }
+
+  // How many total levels are there?
+  int number_levels() const { return number_levels_; }
+
+  // Return the ImmutableOptions that should be used throughout the compaction
+  // procedure
+  const ImmutableOptions* immutable_options() const {
+    return &immutable_options_;
+  }
+
+  // Return the MutableCFOptions that should be used throughout the compaction
+  // procedure
+  const MutableCFOptions* mutable_cf_options() const {
+    return &mutable_cf_options_;
+  }
+
+  // Returns the size in bytes that the output file should be preallocated to.
+  // In level compaction, that is max_file_size_. In universal compaction, that
+  // is the sum of all input file sizes.
+  uint64_t OutputFilePreallocationSize() const;
+
+  void SetInputVersion(Version* input_version);
+
+  struct InputLevelSummaryBuffer {
+    char buffer[128];
+  };
+
+  const char* InputLevelSummary(InputLevelSummaryBuffer* scratch) const;
+
+  uint64_t CalculateTotalInputSize() const;
+
+  // In case of compaction error, reset the nextIndex that is used
+  // to pick up the next file to be compacted from files_by_size_
+  void ResetNextCompactionIndex();
+
+  // Create a CompactionFilter from compaction_filter_factory
+  std::unique_ptr<CompactionFilter> CreateCompactionFilter() const;
+
+  // Create a SstPartitioner from sst_partitioner_factory
+  std::unique_ptr<SstPartitioner> CreateSstPartitioner() const;
+
+  // Is the input level corresponding to output_level_ empty?
+  bool IsOutputLevelEmpty() const;
+
+  // Should this compaction be broken up into smaller ones run in parallel?
+  bool ShouldFormSubcompactions() const;
+
+  // Returns true iff at least one input file references a blob file.
+  //
+  // PRE: input version has been set.
+  bool DoesInputReferenceBlobFiles() const;
+
+  // test function to validate the functionality of IsBottommostLevel()
+  // function -- determines if compaction with inputs and storage is bottommost
+  static bool TEST_IsBottommostLevel(
+      int output_level, VersionStorageInfo* vstorage,
+      const std::vector<CompactionInputFiles>& inputs);
+
+  TablePropertiesCollection GetOutputTableProperties() const {
+    return output_table_properties_;
+  }
+
+  void SetOutputTableProperties(TablePropertiesCollection tp) {
+    output_table_properties_ = std::move(tp);
+  }
+
+  Slice GetSmallestUserKey() const { return smallest_user_key_; }
+
+  Slice GetLargestUserKey() const { return largest_user_key_; }
+
+  Slice GetPenultimateLevelSmallestUserKey() const {
+    return penultimate_level_smallest_user_key_;
+  }
+
+  Slice GetPenultimateLevelLargestUserKey() const {
+    return penultimate_level_largest_user_key_;
+  }
+
+  PenultimateOutputRangeType GetPenultimateOutputRangeType() const {
+    return penultimate_output_range_type_;
+  }
+
+  // Return true if the compaction supports per_key_placement
+  bool SupportsPerKeyPlacement() const;
+
+  // Get per_key_placement penultimate output level, which is `last_level - 1`
+  // if per_key_placement feature is supported. Otherwise, return -1.
+  int GetPenultimateLevel() const;
+
+  // Return true if the given range is overlap with penultimate level output
+  // range.
+  // Both smallest_key and largest_key include timestamps if user-defined
+  // timestamp is enabled.
+  bool OverlapPenultimateLevelOutputRange(const Slice& smallest_key,
+                                          const Slice& largest_key) const;
+
+  // Return true if the key is within penultimate level output range for
+  // per_key_placement feature, which is safe to place the key to the
+  // penultimate level. different compaction strategy has different rules.
+  // If per_key_placement is not supported, always return false.
+  // TODO: currently it doesn't support moving data from the last level to the
+  //  penultimate level
+  //  key includes timestamp if user-defined timestamp is enabled.
+  bool WithinPenultimateLevelOutputRange(const Slice& key) const;
+
+  CompactionReason compaction_reason() const { return compaction_reason_; }
+
+  const std::vector<FileMetaData*>& grandparents() const {
+    return grandparents_;
+  }
+
+  uint64_t max_compaction_bytes() const { return max_compaction_bytes_; }
+
+  Temperature output_temperature() const { return output_temperature_; }
+
+  uint32_t max_subcompactions() const { return max_subcompactions_; }
+
+  bool enable_blob_garbage_collection() const {
+    return enable_blob_garbage_collection_;
+  }
+
+  double blob_garbage_collection_age_cutoff() const {
+    return blob_garbage_collection_age_cutoff_;
+  }
+
+  // start and end are sub compact range. Null if no boundary.
+  // This is used to filter out some input files' ancester's time range.
+  uint64_t MinInputFileOldestAncesterTime(const InternalKey* start,
+                                          const InternalKey* end) const;
+  // Return the minimum epoch number among
+  // input files' associated with this compaction
+  uint64_t MinInputFileEpochNumber() const;
+
+  // Called by DBImpl::NotifyOnCompactionCompleted to make sure number of
+  // compaction begin and compaction completion callbacks match.
+  void SetNotifyOnCompactionCompleted() {
+    notify_on_compaction_completion_ = true;
+  }
+
+  bool ShouldNotifyOnCompactionCompleted() const {
+    return notify_on_compaction_completion_;
+  }
+
+  static constexpr int kInvalidLevel = -1;
+
+  // Evaluate penultimate output level. If the compaction supports
+  // per_key_placement feature, it returns the penultimate level number.
+  // Otherwise, it's set to kInvalidLevel (-1), which means
+  // output_to_penultimate_level is not supported.
+  // Note: even the penultimate level output is supported (PenultimateLevel !=
+  // kInvalidLevel), some key range maybe unsafe to be outputted to the
+  // penultimate level. The safe key range is populated by
+  // `PopulatePenultimateLevelOutputRange()`.
+  // Which could potentially disable all penultimate level output.
+  static int EvaluatePenultimateLevel(const VersionStorageInfo* vstorage,
+                                      const ImmutableOptions& immutable_options,
+                                      const int start_level,
+                                      const int output_level);
+
+ private:
+  // mark (or clear) all files that are being compacted
+  void MarkFilesBeingCompacted(bool mark_as_compacted);
+
+  // get the smallest and largest key present in files to be compacted
+  static void GetBoundaryKeys(VersionStorageInfo* vstorage,
+                              const std::vector<CompactionInputFiles>& inputs,
+                              Slice* smallest_key, Slice* largest_key,
+                              int exclude_level = -1);
+
+  // populate penultimate level output range, which will be used to determine if
+  // a key is safe to output to the penultimate level (details see
+  // `Compaction::WithinPenultimateLevelOutputRange()`.
+  void PopulatePenultimateLevelOutputRange();
+
+  // Get the atomic file boundaries for all files in the compaction. Necessary
+  // in order to avoid the scenario described in
+  // https://github.com/facebook/rocksdb/pull/4432#discussion_r221072219 and
+  // plumb down appropriate key boundaries to RangeDelAggregator during
+  // compaction.
+  static std::vector<CompactionInputFiles> PopulateWithAtomicBoundaries(
+      VersionStorageInfo* vstorage, std::vector<CompactionInputFiles> inputs);
+
+  // helper function to determine if compaction with inputs and storage is
+  // bottommost
+  static bool IsBottommostLevel(
+      int output_level, VersionStorageInfo* vstorage,
+      const std::vector<CompactionInputFiles>& inputs);
+
+  static bool IsFullCompaction(VersionStorageInfo* vstorage,
+                               const std::vector<CompactionInputFiles>& inputs);
+
+  VersionStorageInfo* input_vstorage_;
+
+  const int start_level_;   // the lowest level to be compacted
+  const int output_level_;  // levels to which output files are stored
+  uint64_t target_output_file_size_;
+  uint64_t max_output_file_size_;
+  uint64_t max_compaction_bytes_;
+  uint32_t max_subcompactions_;
+  const ImmutableOptions immutable_options_;
+  const MutableCFOptions mutable_cf_options_;
+  Version* input_version_;
+  VersionEdit edit_;
+  const int number_levels_;
+  ColumnFamilyData* cfd_;
+  Arena arena_;  // Arena used to allocate space for file_levels_
+
+  const uint32_t output_path_id_;
+  CompressionType output_compression_;
+  CompressionOptions output_compression_opts_;
+  Temperature output_temperature_;
+  // If true, then the compaction can be done by simply deleting input files.
+  const bool deletion_compaction_;
+  // should it split the output file using the compact cursor?
+  const InternalKey* output_split_key_;
+
+  // L0 files in LSM-tree might be overlapping. But the compaction picking
+  // logic might pick a subset of the files that aren't overlapping. if
+  // that is the case, set the value to false. Otherwise, set it true.
+  bool l0_files_might_overlap_;
+
+  // Compaction input files organized by level. Constant after construction
+  const std::vector<CompactionInputFiles> inputs_;
+
+  // A copy of inputs_, organized more closely in memory
+  autovector<LevelFilesBrief, 2> input_levels_;
+
+  // State used to check for number of overlapping grandparent files
+  // (grandparent == "output_level_ + 1")
+  std::vector<FileMetaData*> grandparents_;
+  const double score_;  // score that was used to pick this compaction.
+
+  // Is this compaction creating a file in the bottom most level?
+  const bool bottommost_level_;
+  // Does this compaction include all sst files?
+  const bool is_full_compaction_;
+
+  // Is this compaction requested by the client?
+  const bool is_manual_compaction_;
+
+  // The data with timestamp > trim_ts_ will be removed
+  const std::string trim_ts_;
+
+  // True if we can do trivial move in Universal multi level
+  // compaction
+  bool is_trivial_move_;
+
+  // Does input compression match the output compression?
+  bool InputCompressionMatchesOutput() const;
+
+  // table properties of output files
+  TablePropertiesCollection output_table_properties_;
+
+  // smallest user keys in compaction
+  // includes timestamp if user-defined timestamp is enabled.
+  Slice smallest_user_key_;
+
+  // largest user keys in compaction
+  // includes timestamp if user-defined timestamp is enabled.
+  Slice largest_user_key_;
+
+  // Reason for compaction
+  CompactionReason compaction_reason_;
+
+  // Notify on compaction completion only if listener was notified on compaction
+  // begin.
+  bool notify_on_compaction_completion_;
+
+  // Enable/disable GC collection for blobs during compaction.
+  bool enable_blob_garbage_collection_;
+
+  // Blob garbage collection age cutoff.
+  double blob_garbage_collection_age_cutoff_;
+
+  // only set when per_key_placement feature is enabled, -1 (kInvalidLevel)
+  // means not supported.
+  const int penultimate_level_;
+
+  // Key range for penultimate level output
+  // includes timestamp if user-defined timestamp is enabled.
+  // penultimate_output_range_type_ shows the range type
+  Slice penultimate_level_smallest_user_key_;
+  Slice penultimate_level_largest_user_key_;
+  PenultimateOutputRangeType penultimate_output_range_type_ =
+      PenultimateOutputRangeType::kNotSupported;
+};
+
+#ifndef NDEBUG
+// Helper struct only for tests, which contains the data to decide if a key
+// should be output to the penultimate level.
+// TODO: remove this when the public feature knob is available
+struct PerKeyPlacementContext {
+  const int level;
+  const Slice key;
+  const Slice value;
+  const SequenceNumber seq_num;
+
+  bool& output_to_penultimate_level;
+
+  PerKeyPlacementContext(int _level, Slice _key, Slice _value,
+                         SequenceNumber _seq_num,
+                         bool& _output_to_penultimate_level)
+      : level(_level),
+        key(_key),
+        value(_value),
+        seq_num(_seq_num),
+        output_to_penultimate_level(_output_to_penultimate_level) {}
+};
+#endif /* !NDEBUG */
+
+// Return sum of sizes of all files in `files`.
+extern uint64_t TotalFileSize(const std::vector<FileMetaData*>& files);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_iteration_stats.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_iteration_stats.h
new file mode 100644
index 0000000000..1b1c28b57a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_iteration_stats.h
@@ -0,0 +1,49 @@
+//  Copyright (c) 2016-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstdint>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct CompactionIterationStats {
+  // Compaction statistics
+
+  // Doesn't include records skipped because of
+  // CompactionFilter::Decision::kRemoveAndSkipUntil.
+  int64_t num_record_drop_user = 0;
+
+  int64_t num_record_drop_hidden = 0;
+  int64_t num_record_drop_obsolete = 0;
+  int64_t num_record_drop_range_del = 0;
+  int64_t num_range_del_drop_obsolete = 0;
+  // Deletions obsoleted before bottom level due to file gap optimization.
+  int64_t num_optimized_del_drop_obsolete = 0;
+  uint64_t total_filter_time = 0;
+
+  // Input statistics
+  // TODO(noetzli): The stats are incomplete. They are lacking everything
+  // consumed by MergeHelper.
+  uint64_t num_input_records = 0;
+  uint64_t num_input_deletion_records = 0;
+  uint64_t num_input_corrupt_records = 0;
+  uint64_t total_input_raw_key_bytes = 0;
+  uint64_t total_input_raw_value_bytes = 0;
+
+  // Single-Delete diagnostics for exceptional situations
+  uint64_t num_single_del_fallthru = 0;
+  uint64_t num_single_del_mismatch = 0;
+
+  // Blob related statistics
+  uint64_t num_blobs_read = 0;
+  uint64_t total_blob_bytes_read = 0;
+  uint64_t num_blobs_relocated = 0;
+  uint64_t total_blob_bytes_relocated = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_iterator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_iterator.cc
new file mode 100644
index 0000000000..5be7b565ac
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_iterator.cc
@@ -0,0 +1,1445 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/compaction/compaction_iterator.h"
+
+#include <iterator>
+#include <limits>
+
+#include "db/blob/blob_fetcher.h"
+#include "db/blob/blob_file_builder.h"
+#include "db/blob/blob_index.h"
+#include "db/blob/prefetch_buffer_collection.h"
+#include "db/snapshot_checker.h"
+#include "db/wide/wide_column_serialization.h"
+#include "logging/logging.h"
+#include "port/likely.h"
+#include "rocksdb/listener.h"
+#include "table/internal_iterator.h"
+#include "test_util/sync_point.h"
+
+namespace ROCKSDB_NAMESPACE {
+CompactionIterator::CompactionIterator(
+    InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
+    SequenceNumber last_sequence, std::vector<SequenceNumber>* snapshots,
+    SequenceNumber earliest_write_conflict_snapshot,
+    SequenceNumber job_snapshot, const SnapshotChecker* snapshot_checker,
+    Env* env, bool report_detailed_time, bool expect_valid_internal_key,
+    CompactionRangeDelAggregator* range_del_agg,
+    BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
+    bool enforce_single_del_contracts,
+    const std::atomic<bool>& manual_compaction_canceled,
+    const Compaction* compaction, const CompactionFilter* compaction_filter,
+    const std::atomic<bool>* shutting_down,
+    const std::shared_ptr<Logger> info_log,
+    const std::string* full_history_ts_low,
+    const SequenceNumber preserve_time_min_seqno,
+    const SequenceNumber preclude_last_level_min_seqno)
+    : CompactionIterator(
+          input, cmp, merge_helper, last_sequence, snapshots,
+          earliest_write_conflict_snapshot, job_snapshot, snapshot_checker, env,
+          report_detailed_time, expect_valid_internal_key, range_del_agg,
+          blob_file_builder, allow_data_in_errors, enforce_single_del_contracts,
+          manual_compaction_canceled,
+          std::unique_ptr<CompactionProxy>(
+              compaction ? new RealCompaction(compaction) : nullptr),
+          compaction_filter, shutting_down, info_log, full_history_ts_low,
+          preserve_time_min_seqno, preclude_last_level_min_seqno) {}
+
+CompactionIterator::CompactionIterator(
+    InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
+    SequenceNumber /*last_sequence*/, std::vector<SequenceNumber>* snapshots,
+    SequenceNumber earliest_write_conflict_snapshot,
+    SequenceNumber job_snapshot, const SnapshotChecker* snapshot_checker,
+    Env* env, bool report_detailed_time, bool expect_valid_internal_key,
+    CompactionRangeDelAggregator* range_del_agg,
+    BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
+    bool enforce_single_del_contracts,
+    const std::atomic<bool>& manual_compaction_canceled,
+    std::unique_ptr<CompactionProxy> compaction,
+    const CompactionFilter* compaction_filter,
+    const std::atomic<bool>* shutting_down,
+    const std::shared_ptr<Logger> info_log,
+    const std::string* full_history_ts_low,
+    const SequenceNumber preserve_time_min_seqno,
+    const SequenceNumber preclude_last_level_min_seqno)
+    : input_(input, cmp,
+             !compaction || compaction->DoesInputReferenceBlobFiles()),
+      cmp_(cmp),
+      merge_helper_(merge_helper),
+      snapshots_(snapshots),
+      earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot),
+      job_snapshot_(job_snapshot),
+      snapshot_checker_(snapshot_checker),
+      env_(env),
+      clock_(env_->GetSystemClock().get()),
+      report_detailed_time_(report_detailed_time),
+      expect_valid_internal_key_(expect_valid_internal_key),
+      range_del_agg_(range_del_agg),
+      blob_file_builder_(blob_file_builder),
+      compaction_(std::move(compaction)),
+      compaction_filter_(compaction_filter),
+      shutting_down_(shutting_down),
+      manual_compaction_canceled_(manual_compaction_canceled),
+      bottommost_level_(!compaction_ ? false
+                                     : compaction_->bottommost_level() &&
+                                           !compaction_->allow_ingest_behind()),
+      // snapshots_ cannot be nullptr, but we will assert later in the body of
+      // the constructor.
+      visible_at_tip_(snapshots_ ? snapshots_->empty() : false),
+      earliest_snapshot_(!snapshots_ || snapshots_->empty()
+                             ? kMaxSequenceNumber
+                             : snapshots_->at(0)),
+      info_log_(info_log),
+      allow_data_in_errors_(allow_data_in_errors),
+      enforce_single_del_contracts_(enforce_single_del_contracts),
+      timestamp_size_(cmp_ ? cmp_->timestamp_size() : 0),
+      full_history_ts_low_(full_history_ts_low),
+      current_user_key_sequence_(0),
+      current_user_key_snapshot_(0),
+      merge_out_iter_(merge_helper_),
+      blob_garbage_collection_cutoff_file_number_(
+          ComputeBlobGarbageCollectionCutoffFileNumber(compaction_.get())),
+      blob_fetcher_(CreateBlobFetcherIfNeeded(compaction_.get())),
+      prefetch_buffers_(
+          CreatePrefetchBufferCollectionIfNeeded(compaction_.get())),
+      current_key_committed_(false),
+      cmp_with_history_ts_low_(0),
+      level_(compaction_ == nullptr ? 0 : compaction_->level()),
+      preserve_time_min_seqno_(preserve_time_min_seqno),
+      preclude_last_level_min_seqno_(preclude_last_level_min_seqno) {
+  assert(snapshots_ != nullptr);
+  assert(preserve_time_min_seqno_ <= preclude_last_level_min_seqno_);
+
+  if (compaction_ != nullptr) {
+    level_ptrs_ = std::vector<size_t>(compaction_->number_levels(), 0);
+  }
+#ifndef NDEBUG
+  // findEarliestVisibleSnapshot assumes this ordering.
+  for (size_t i = 1; i < snapshots_->size(); ++i) {
+    assert(snapshots_->at(i - 1) < snapshots_->at(i));
+  }
+  assert(timestamp_size_ == 0 || !full_history_ts_low_ ||
+         timestamp_size_ == full_history_ts_low_->size());
+#endif
+  input_.SetPinnedItersMgr(&pinned_iters_mgr_);
+  // The default `merge_until_status_` does not need to be checked since it is
+  // overwritten as soon as `MergeUntil()` is called
+  merge_until_status_.PermitUncheckedError();
+  TEST_SYNC_POINT_CALLBACK("CompactionIterator:AfterInit", compaction_.get());
+}
+
+CompactionIterator::~CompactionIterator() {
+  // input_ Iterator lifetime is longer than pinned_iters_mgr_ lifetime
+  input_.SetPinnedItersMgr(nullptr);
+}
+
+void CompactionIterator::ResetRecordCounts() {
+  iter_stats_.num_record_drop_user = 0;
+  iter_stats_.num_record_drop_hidden = 0;
+  iter_stats_.num_record_drop_obsolete = 0;
+  iter_stats_.num_record_drop_range_del = 0;
+  iter_stats_.num_range_del_drop_obsolete = 0;
+  iter_stats_.num_optimized_del_drop_obsolete = 0;
+}
+
+void CompactionIterator::SeekToFirst() {
+  NextFromInput();
+  PrepareOutput();
+}
+
+void CompactionIterator::Next() {
+  // If there is a merge output, return it before continuing to process the
+  // input.
+  if (merge_out_iter_.Valid()) {
+    merge_out_iter_.Next();
+
+    // Check if we returned all records of the merge output.
+    if (merge_out_iter_.Valid()) {
+      key_ = merge_out_iter_.key();
+      value_ = merge_out_iter_.value();
+      Status s = ParseInternalKey(key_, &ikey_, allow_data_in_errors_);
+      // MergeUntil stops when it encounters a corrupt key and does not
+      // include them in the result, so we expect the keys here to be valid.
+      if (!s.ok()) {
+        ROCKS_LOG_FATAL(
+            info_log_, "Invalid ikey %s in compaction. %s",
+            allow_data_in_errors_ ? key_.ToString(true).c_str() : "hidden",
+            s.getState());
+        assert(false);
+      }
+
+      // Keep current_key_ in sync.
+      if (0 == timestamp_size_) {
+        current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
+      } else {
+        Slice ts = ikey_.GetTimestamp(timestamp_size_);
+        current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type, &ts);
+      }
+      key_ = current_key_.GetInternalKey();
+      ikey_.user_key = current_key_.GetUserKey();
+      validity_info_.SetValid(ValidContext::kMerge1);
+    } else {
+      if (merge_until_status_.IsMergeInProgress()) {
+        // `Status::MergeInProgress()` tells us that the previous `MergeUntil()`
+        // produced only merge operands. Those merge operands were accessed and
+        // written out using `merge_out_iter_`. Since `merge_out_iter_` is
+        // exhausted at this point, all merge operands have been written out.
+        //
+        // Still, there may be a base value (PUT, DELETE, SINGLEDEL, etc.) that
+        // needs to be written out. Normally, `CompactionIterator` would skip it
+        // on the basis that it has already output something in the same
+        // snapshot stripe. To prevent this, we reset `has_current_user_key_` to
+        // trick the future iteration from finding out the snapshot stripe is
+        // unchanged.
+        has_current_user_key_ = false;
+      }
+      // We consumed all pinned merge operands, release pinned iterators
+      pinned_iters_mgr_.ReleasePinnedData();
+      // MergeHelper moves the iterator to the first record after the merged
+      // records, so even though we reached the end of the merge output, we do
+      // not want to advance the iterator.
+      NextFromInput();
+    }
+  } else {
+    // Only advance the input iterator if there is no merge output and the
+    // iterator is not already at the next record.
+    if (!at_next_) {
+      AdvanceInputIter();
+    }
+    NextFromInput();
+  }
+
+  if (Valid()) {
+    // Record that we've outputted a record for the current key.
+    has_outputted_key_ = true;
+  }
+
+  PrepareOutput();
+}
+
+bool CompactionIterator::InvokeFilterIfNeeded(bool* need_skip,
+                                              Slice* skip_until) {
+  if (!compaction_filter_) {
+    return true;
+  }
+
+  if (ikey_.type != kTypeValue && ikey_.type != kTypeBlobIndex &&
+      ikey_.type != kTypeWideColumnEntity) {
+    return true;
+  }
+
+  CompactionFilter::Decision decision =
+      CompactionFilter::Decision::kUndetermined;
+  CompactionFilter::ValueType value_type =
+      ikey_.type == kTypeValue ? CompactionFilter::ValueType::kValue
+      : ikey_.type == kTypeBlobIndex
+          ? CompactionFilter::ValueType::kBlobIndex
+          : CompactionFilter::ValueType::kWideColumnEntity;
+
+  // Hack: pass internal key to BlobIndexCompactionFilter since it needs
+  // to get sequence number.
+  assert(compaction_filter_);
+  const Slice& filter_key =
+      (ikey_.type != kTypeBlobIndex ||
+       !compaction_filter_->IsStackedBlobDbInternalCompactionFilter())
+          ? ikey_.user_key
+          : key_;
+
+  compaction_filter_value_.clear();
+  compaction_filter_skip_until_.Clear();
+
+  std::vector<std::pair<std::string, std::string>> new_columns;
+
+  {
+    StopWatchNano timer(clock_, report_detailed_time_);
+
+    if (ikey_.type == kTypeBlobIndex) {
+      decision = compaction_filter_->FilterBlobByKey(
+          level_, filter_key, &compaction_filter_value_,
+          compaction_filter_skip_until_.rep());
+      if (decision == CompactionFilter::Decision::kUndetermined &&
+          !compaction_filter_->IsStackedBlobDbInternalCompactionFilter()) {
+        if (!compaction_) {
+          status_ =
+              Status::Corruption("Unexpected blob index outside of compaction");
+          validity_info_.Invalidate();
+          return false;
+        }
+
+        TEST_SYNC_POINT_CALLBACK(
+            "CompactionIterator::InvokeFilterIfNeeded::TamperWithBlobIndex",
+            &value_);
+
+        // For integrated BlobDB impl, CompactionIterator reads blob value.
+        // For Stacked BlobDB impl, the corresponding CompactionFilter's
+        // FilterV2 method should read the blob value.
+        BlobIndex blob_index;
+        Status s = blob_index.DecodeFrom(value_);
+        if (!s.ok()) {
+          status_ = s;
+          validity_info_.Invalidate();
+          return false;
+        }
+
+        FilePrefetchBuffer* prefetch_buffer =
+            prefetch_buffers_ ? prefetch_buffers_->GetOrCreatePrefetchBuffer(
+                                    blob_index.file_number())
+                              : nullptr;
+
+        uint64_t bytes_read = 0;
+
+        assert(blob_fetcher_);
+
+        s = blob_fetcher_->FetchBlob(ikey_.user_key, blob_index,
+                                     prefetch_buffer, &blob_value_,
+                                     &bytes_read);
+        if (!s.ok()) {
+          status_ = s;
+          validity_info_.Invalidate();
+          return false;
+        }
+
+        ++iter_stats_.num_blobs_read;
+        iter_stats_.total_blob_bytes_read += bytes_read;
+
+        value_type = CompactionFilter::ValueType::kValue;
+      }
+    }
+
+    if (decision == CompactionFilter::Decision::kUndetermined) {
+      const Slice* existing_val = nullptr;
+      const WideColumns* existing_col = nullptr;
+
+      WideColumns existing_columns;
+
+      if (ikey_.type != kTypeWideColumnEntity) {
+        if (!blob_value_.empty()) {
+          existing_val = &blob_value_;
+        } else {
+          existing_val = &value_;
+        }
+      } else {
+        Slice value_copy = value_;
+        const Status s =
+            WideColumnSerialization::Deserialize(value_copy, existing_columns);
+
+        if (!s.ok()) {
+          status_ = s;
+          validity_info_.Invalidate();
+          return false;
+        }
+
+        existing_col = &existing_columns;
+      }
+
+      decision = compaction_filter_->FilterV3(
+          level_, filter_key, value_type, existing_val, existing_col,
+          &compaction_filter_value_, &new_columns,
+          compaction_filter_skip_until_.rep());
+    }
+
+    iter_stats_.total_filter_time +=
+        env_ != nullptr && report_detailed_time_ ? timer.ElapsedNanos() : 0;
+  }
+
+  if (decision == CompactionFilter::Decision::kUndetermined) {
+    // Should not reach here, since FilterV2/FilterV3 should never return
+    // kUndetermined.
+    status_ = Status::NotSupported(
+        "FilterV2/FilterV3 should never return kUndetermined");
+    validity_info_.Invalidate();
+    return false;
+  }
+
+  if (decision == CompactionFilter::Decision::kRemoveAndSkipUntil &&
+      cmp_->Compare(*compaction_filter_skip_until_.rep(), ikey_.user_key) <=
+          0) {
+    // Can't skip to a key smaller than the current one.
+    // Keep the key as per FilterV2/FilterV3 documentation.
+    decision = CompactionFilter::Decision::kKeep;
+  }
+
+  if (decision == CompactionFilter::Decision::kRemove) {
+    // convert the current key to a delete; key_ is pointing into
+    // current_key_ at this point, so updating current_key_ updates key()
+    ikey_.type = kTypeDeletion;
+    current_key_.UpdateInternalKey(ikey_.sequence, kTypeDeletion);
+    // no value associated with delete
+    value_.clear();
+    iter_stats_.num_record_drop_user++;
+  } else if (decision == CompactionFilter::Decision::kPurge) {
+    // convert the current key to a single delete; key_ is pointing into
+    // current_key_ at this point, so updating current_key_ updates key()
+    ikey_.type = kTypeSingleDeletion;
+    current_key_.UpdateInternalKey(ikey_.sequence, kTypeSingleDeletion);
+    // no value associated with single delete
+    value_.clear();
+    iter_stats_.num_record_drop_user++;
+  } else if (decision == CompactionFilter::Decision::kChangeValue) {
+    if (ikey_.type != kTypeValue) {
+      ikey_.type = kTypeValue;
+      current_key_.UpdateInternalKey(ikey_.sequence, kTypeValue);
+    }
+
+    value_ = compaction_filter_value_;
+  } else if (decision == CompactionFilter::Decision::kRemoveAndSkipUntil) {
+    *need_skip = true;
+    compaction_filter_skip_until_.ConvertFromUserKey(kMaxSequenceNumber,
+                                                     kValueTypeForSeek);
+    *skip_until = compaction_filter_skip_until_.Encode();
+  } else if (decision == CompactionFilter::Decision::kChangeBlobIndex) {
+    // Only the StackableDB-based BlobDB impl's compaction filter should return
+    // kChangeBlobIndex. Decision about rewriting blob and changing blob index
+    // in the integrated BlobDB impl is made in subsequent call to
+    // PrepareOutput() and its callees.
+    if (!compaction_filter_->IsStackedBlobDbInternalCompactionFilter()) {
+      status_ = Status::NotSupported(
+          "Only stacked BlobDB's internal compaction filter can return "
+          "kChangeBlobIndex.");
+      validity_info_.Invalidate();
+      return false;
+    }
+
+    if (ikey_.type != kTypeBlobIndex) {
+      ikey_.type = kTypeBlobIndex;
+      current_key_.UpdateInternalKey(ikey_.sequence, kTypeBlobIndex);
+    }
+
+    value_ = compaction_filter_value_;
+  } else if (decision == CompactionFilter::Decision::kIOError) {
+    if (!compaction_filter_->IsStackedBlobDbInternalCompactionFilter()) {
+      status_ = Status::NotSupported(
+          "CompactionFilter for integrated BlobDB should not return kIOError");
+      validity_info_.Invalidate();
+      return false;
+    }
+
+    status_ = Status::IOError("Failed to access blob during compaction filter");
+    validity_info_.Invalidate();
+    return false;
+  } else if (decision == CompactionFilter::Decision::kChangeWideColumnEntity) {
+    WideColumns sorted_columns;
+
+    sorted_columns.reserve(new_columns.size());
+    for (const auto& column : new_columns) {
+      sorted_columns.emplace_back(column.first, column.second);
+    }
+
+    std::sort(sorted_columns.begin(), sorted_columns.end(),
+              [](const WideColumn& lhs, const WideColumn& rhs) {
+                return lhs.name().compare(rhs.name()) < 0;
+              });
+
+    {
+      const Status s = WideColumnSerialization::Serialize(
+          sorted_columns, compaction_filter_value_);
+      if (!s.ok()) {
+        status_ = s;
+        validity_info_.Invalidate();
+        return false;
+      }
+    }
+
+    if (ikey_.type != kTypeWideColumnEntity) {
+      ikey_.type = kTypeWideColumnEntity;
+      current_key_.UpdateInternalKey(ikey_.sequence, kTypeWideColumnEntity);
+    }
+
+    value_ = compaction_filter_value_;
+  }
+
+  return true;
+}
+
+void CompactionIterator::NextFromInput() {
+  at_next_ = false;
+  validity_info_.Invalidate();
+
+  while (!Valid() && input_.Valid() && !IsPausingManualCompaction() &&
+         !IsShuttingDown()) {
+    key_ = input_.key();
+    value_ = input_.value();
+    blob_value_.Reset();
+    iter_stats_.num_input_records++;
+    is_range_del_ = input_.IsDeleteRangeSentinelKey();
+
+    Status pik_status = ParseInternalKey(key_, &ikey_, allow_data_in_errors_);
+    if (!pik_status.ok()) {
+      iter_stats_.num_input_corrupt_records++;
+
+      // If `expect_valid_internal_key_` is false, return the corrupted key
+      // and let the caller decide what to do with it.
+      if (expect_valid_internal_key_) {
+        status_ = pik_status;
+        return;
+      }
+      key_ = current_key_.SetInternalKey(key_);
+      has_current_user_key_ = false;
+      current_user_key_sequence_ = kMaxSequenceNumber;
+      current_user_key_snapshot_ = 0;
+      validity_info_.SetValid(ValidContext::kParseKeyError);
+      break;
+    }
+    TEST_SYNC_POINT_CALLBACK("CompactionIterator:ProcessKV", &ikey_);
+    if (is_range_del_) {
+      validity_info_.SetValid(kRangeDeletion);
+      break;
+    }
+    // Update input statistics
+    if (ikey_.type == kTypeDeletion || ikey_.type == kTypeSingleDeletion ||
+        ikey_.type == kTypeDeletionWithTimestamp) {
+      iter_stats_.num_input_deletion_records++;
+    }
+    iter_stats_.total_input_raw_key_bytes += key_.size();
+    iter_stats_.total_input_raw_value_bytes += value_.size();
+
+    // If need_skip is true, we should seek the input iterator
+    // to internal key skip_until and continue from there.
+    bool need_skip = false;
+    // Points either into compaction_filter_skip_until_ or into
+    // merge_helper_->compaction_filter_skip_until_.
+    Slice skip_until;
+
+    bool user_key_equal_without_ts = false;
+    int cmp_ts = 0;
+    if (has_current_user_key_) {
+      user_key_equal_without_ts =
+          cmp_->EqualWithoutTimestamp(ikey_.user_key, current_user_key_);
+      // if timestamp_size_ > 0, then curr_ts_ has been initialized by a
+      // previous key.
+      cmp_ts = timestamp_size_ ? cmp_->CompareTimestamp(
+                                     ExtractTimestampFromUserKey(
+                                         ikey_.user_key, timestamp_size_),
+                                     curr_ts_)
+                               : 0;
+    }
+
+    // Check whether the user key changed. After this if statement current_key_
+    // is a copy of the current input key (maybe converted to a delete by the
+    // compaction filter). ikey_.user_key is pointing to the copy.
+    if (!has_current_user_key_ || !user_key_equal_without_ts || cmp_ts != 0) {
+      // First occurrence of this user key
+      // Copy key for output
+      key_ = current_key_.SetInternalKey(key_, &ikey_);
+
+      int prev_cmp_with_ts_low =
+          !full_history_ts_low_ ? 0
+          : curr_ts_.empty()
+              ? 0
+              : cmp_->CompareTimestamp(curr_ts_, *full_history_ts_low_);
+
+      // If timestamp_size_ > 0, then copy from ikey_ to curr_ts_ for the use
+      // in next iteration to compare with the timestamp of next key.
+      UpdateTimestampAndCompareWithFullHistoryLow();
+
+      // If
+      // (1) !has_current_user_key_, OR
+      // (2) timestamp is disabled, OR
+      // (3) all history will be preserved, OR
+      // (4) user key (excluding timestamp) is different from previous key, OR
+      // (5) timestamp is NO older than *full_history_ts_low_, OR
+      // (6) timestamp is the largest one older than full_history_ts_low_,
+      // then current_user_key_ must be treated as a different user key.
+      // This means, if a user key (excluding ts) is the same as the previous
+      // user key, and its ts is older than *full_history_ts_low_, then we
+      // consider this key for GC, e.g. it may be dropped if certain conditions
+      // match.
+      if (!has_current_user_key_ || !timestamp_size_ || !full_history_ts_low_ ||
+          !user_key_equal_without_ts || cmp_with_history_ts_low_ >= 0 ||
+          prev_cmp_with_ts_low >= 0) {
+        // Initialize for future comparison for rule (A) and etc.
+        current_user_key_sequence_ = kMaxSequenceNumber;
+        current_user_key_snapshot_ = 0;
+        has_current_user_key_ = true;
+      }
+      current_user_key_ = ikey_.user_key;
+
+      has_outputted_key_ = false;
+
+      last_key_seq_zeroed_ = false;
+
+      current_key_committed_ = KeyCommitted(ikey_.sequence);
+
+      // Apply the compaction filter to the first committed version of the user
+      // key.
+      if (current_key_committed_ &&
+          !InvokeFilterIfNeeded(&need_skip, &skip_until)) {
+        break;
+      }
+    } else {
+      // Update the current key to reflect the new sequence number/type without
+      // copying the user key.
+      // TODO(rven): Compaction filter does not process keys in this path
+      // Need to have the compaction filter process multiple versions
+      // if we have versions on both sides of a snapshot
+      current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
+      key_ = current_key_.GetInternalKey();
+      ikey_.user_key = current_key_.GetUserKey();
+
+      // Note that newer version of a key is ordered before older versions. If a
+      // newer version of a key is committed, so as the older version. No need
+      // to query snapshot_checker_ in that case.
+      if (UNLIKELY(!current_key_committed_)) {
+        assert(snapshot_checker_ != nullptr);
+        current_key_committed_ = KeyCommitted(ikey_.sequence);
+        // Apply the compaction filter to the first committed version of the
+        // user key.
+        if (current_key_committed_ &&
+            !InvokeFilterIfNeeded(&need_skip, &skip_until)) {
+          break;
+        }
+      }
+    }
+
+    if (UNLIKELY(!current_key_committed_)) {
+      assert(snapshot_checker_ != nullptr);
+      validity_info_.SetValid(ValidContext::kCurrentKeyUncommitted);
+      break;
+    }
+
+    // If there are no snapshots, then this kv affect visibility at tip.
+    // Otherwise, search though all existing snapshots to find the earliest
+    // snapshot that is affected by this kv.
+    SequenceNumber last_sequence = current_user_key_sequence_;
+    current_user_key_sequence_ = ikey_.sequence;
+    SequenceNumber last_snapshot = current_user_key_snapshot_;
+    SequenceNumber prev_snapshot = 0;  // 0 means no previous snapshot
+    current_user_key_snapshot_ =
+        visible_at_tip_
+            ? earliest_snapshot_
+            : findEarliestVisibleSnapshot(ikey_.sequence, &prev_snapshot);
+
+    if (need_skip) {
+      // This case is handled below.
+    } else if (clear_and_output_next_key_) {
+      // In the previous iteration we encountered a single delete that we could
+      // not compact out.  We will keep this Put, but can drop it's data.
+      // (See Optimization 3, below.)
+      if (ikey_.type != kTypeValue && ikey_.type != kTypeBlobIndex &&
+          ikey_.type != kTypeWideColumnEntity) {
+        ROCKS_LOG_FATAL(info_log_, "Unexpected key %s for compaction output",
+                        ikey_.DebugString(allow_data_in_errors_, true).c_str());
+        assert(false);
+      }
+      if (current_user_key_snapshot_ < last_snapshot) {
+        ROCKS_LOG_FATAL(info_log_,
+                        "key %s, current_user_key_snapshot_ (%" PRIu64
+                        ") < last_snapshot (%" PRIu64 ")",
+                        ikey_.DebugString(allow_data_in_errors_, true).c_str(),
+                        current_user_key_snapshot_, last_snapshot);
+        assert(false);
+      }
+
+      if (ikey_.type == kTypeBlobIndex || ikey_.type == kTypeWideColumnEntity) {
+        ikey_.type = kTypeValue;
+        current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
+      }
+
+      value_.clear();
+      validity_info_.SetValid(ValidContext::kKeepSDAndClearPut);
+      clear_and_output_next_key_ = false;
+    } else if (ikey_.type == kTypeSingleDeletion) {
+      // We can compact out a SingleDelete if:
+      // 1) We encounter the corresponding PUT -OR- we know that this key
+      //    doesn't appear past this output level
+      // =AND=
+      // 2) We've already returned a record in this snapshot -OR-
+      //    there are no earlier earliest_write_conflict_snapshot.
+      //
+      // A note about 2) above:
+      // we try to determine whether there is any earlier write conflict
+      // checking snapshot by calling DefinitelyInSnapshot() with seq and
+      // earliest_write_conflict_snapshot as arguments. For write-prepared
+      // and write-unprepared transactions, if earliest_write_conflict_snapshot
+      // is evicted from WritePreparedTxnDB::commit_cache, then
+      // DefinitelyInSnapshot(seq, earliest_write_conflict_snapshot) returns
+      // false, even if the seq is actually visible within
+      // earliest_write_conflict_snapshot. Consequently, CompactionIterator
+      // may try to zero out its sequence number, thus hitting assertion error
+      // in debug mode or cause incorrect DBIter return result.
+      // We observe that earliest_write_conflict_snapshot >= earliest_snapshot,
+      // and the seq zeroing logic depends on
+      // DefinitelyInSnapshot(seq, earliest_snapshot). Therefore, if we cannot
+      // determine whether seq is **definitely** in
+      // earliest_write_conflict_snapshot, then we can additionally check if
+      // seq is definitely in earliest_snapshot. If the latter holds, then the
+      // former holds too.
+      //
+      // Rule 1 is needed for SingleDelete correctness.  Rule 2 is needed to
+      // allow Transactions to do write-conflict checking (if we compacted away
+      // all keys, then we wouldn't know that a write happened in this
+      // snapshot).  If there is no earlier snapshot, then we know that there
+      // are no active transactions that need to know about any writes.
+      //
+      // Optimization 3:
+      // If we encounter a SingleDelete followed by a PUT and Rule 2 is NOT
+      // true, then we must output a SingleDelete.  In this case, we will decide
+      // to also output the PUT.  While we are compacting less by outputting the
+      // PUT now, hopefully this will lead to better compaction in the future
+      // when Rule 2 is later true (Ie, We are hoping we can later compact out
+      // both the SingleDelete and the Put, while we couldn't if we only
+      // outputted the SingleDelete now).
+      // In this case, we can save space by removing the PUT's value as it will
+      // never be read.
+      //
+      // Deletes and Merges are not supported on the same key that has a
+      // SingleDelete as it is not possible to correctly do any partial
+      // compaction of such a combination of operations.  The result of mixing
+      // those operations for a given key is documented as being undefined.  So
+      // we can choose how to handle such a combinations of operations.  We will
+      // try to compact out as much as we can in these cases.
+      // We will report counts on these anomalous cases.
+      //
+      // Note: If timestamp is enabled, then record will be eligible for
+      // deletion, only if, along with above conditions (Rule 1 and Rule 2)
+      // full_history_ts_low_ is specified and timestamp for that key is less
+      // than *full_history_ts_low_. If it's not eligible for deletion, then we
+      // will output the SingleDelete. For Optimization 3 also, if
+      // full_history_ts_low_ is specified and timestamp for the key is less
+      // than *full_history_ts_low_ then only optimization will be applied.
+
+      // The easiest way to process a SingleDelete during iteration is to peek
+      // ahead at the next key.
+      const bool is_timestamp_eligible_for_gc =
+          (timestamp_size_ == 0 ||
+           (full_history_ts_low_ && cmp_with_history_ts_low_ < 0));
+
+      ParsedInternalKey next_ikey;
+      AdvanceInputIter();
+      while (input_.Valid() && input_.IsDeleteRangeSentinelKey() &&
+             ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_)
+                 .ok() &&
+             cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key)) {
+        // skip range tombstone start keys with the same user key
+        // since they are not "real" point keys.
+        AdvanceInputIter();
+      }
+
+      // Check whether the next key exists, is not corrupt, and is the same key
+      // as the single delete.
+      if (input_.Valid() &&
+          ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_)
+              .ok() &&
+          cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key)) {
+        assert(!input_.IsDeleteRangeSentinelKey());
+#ifndef NDEBUG
+        const Compaction* c =
+            compaction_ ? compaction_->real_compaction() : nullptr;
+#endif
+        TEST_SYNC_POINT_CALLBACK(
+            "CompactionIterator::NextFromInput:SingleDelete:1",
+            const_cast<Compaction*>(c));
+        if (last_key_seq_zeroed_) {
+          ++iter_stats_.num_record_drop_hidden;
+          ++iter_stats_.num_record_drop_obsolete;
+          assert(bottommost_level_);
+          AdvanceInputIter();
+        } else if (prev_snapshot == 0 ||
+                   DefinitelyNotInSnapshot(next_ikey.sequence, prev_snapshot)) {
+          // Check whether the next key belongs to the same snapshot as the
+          // SingleDelete.
+
+          TEST_SYNC_POINT_CALLBACK(
+              "CompactionIterator::NextFromInput:SingleDelete:2", nullptr);
+          if (next_ikey.type == kTypeSingleDeletion) {
+            // We encountered two SingleDeletes for same key in a row. This
+            // could be due to unexpected user input. If write-(un)prepared
+            // transaction is used, this could also be due to releasing an old
+            // snapshot between a Put and its matching SingleDelete.
+            // Skip the first SingleDelete and let the next iteration decide
+            // how to handle the second SingleDelete.
+
+            // First SingleDelete has been skipped since we already called
+            // input_.Next().
+            ++iter_stats_.num_record_drop_obsolete;
+            ++iter_stats_.num_single_del_mismatch;
+          } else if (next_ikey.type == kTypeDeletion) {
+            std::ostringstream oss;
+            oss << "Found SD and type: " << static_cast<int>(next_ikey.type)
+                << " on the same key, violating the contract "
+                   "of SingleDelete. Check your application to make sure the "
+                   "application does not mix SingleDelete and Delete for "
+                   "the same key. If you are using "
+                   "write-prepared/write-unprepared transactions, and use "
+                   "SingleDelete to delete certain keys, then make sure "
+                   "TransactionDBOptions::rollback_deletion_type_callback is "
+                   "configured properly. Mixing SD and DEL can lead to "
+                   "undefined behaviors";
+            ++iter_stats_.num_record_drop_obsolete;
+            ++iter_stats_.num_single_del_mismatch;
+            if (enforce_single_del_contracts_) {
+              ROCKS_LOG_ERROR(info_log_, "%s", oss.str().c_str());
+              validity_info_.Invalidate();
+              status_ = Status::Corruption(oss.str());
+              return;
+            }
+            ROCKS_LOG_WARN(info_log_, "%s", oss.str().c_str());
+          } else if (!is_timestamp_eligible_for_gc) {
+            // We cannot drop the SingleDelete as timestamp is enabled, and
+            // timestamp of this key is greater than or equal to
+            // *full_history_ts_low_. We will output the SingleDelete.
+            validity_info_.SetValid(ValidContext::kKeepTsHistory);
+          } else if (has_outputted_key_ ||
+                     DefinitelyInSnapshot(ikey_.sequence,
+                                          earliest_write_conflict_snapshot_) ||
+                     (earliest_snapshot_ < earliest_write_conflict_snapshot_ &&
+                      DefinitelyInSnapshot(ikey_.sequence,
+                                           earliest_snapshot_))) {
+            // Found a matching value, we can drop the single delete and the
+            // value.  It is safe to drop both records since we've already
+            // outputted a key in this snapshot, or there is no earlier
+            // snapshot (Rule 2 above).
+
+            // Note: it doesn't matter whether the second key is a Put or if it
+            // is an unexpected Merge or Delete.  We will compact it out
+            // either way. We will maintain counts of how many mismatches
+            // happened
+            if (next_ikey.type != kTypeValue &&
+                next_ikey.type != kTypeBlobIndex &&
+                next_ikey.type != kTypeWideColumnEntity) {
+              ++iter_stats_.num_single_del_mismatch;
+            }
+
+            ++iter_stats_.num_record_drop_hidden;
+            ++iter_stats_.num_record_drop_obsolete;
+            // Already called input_.Next() once.  Call it a second time to
+            // skip past the second key.
+            AdvanceInputIter();
+          } else {
+            // Found a matching value, but we cannot drop both keys since
+            // there is an earlier snapshot and we need to leave behind a record
+            // to know that a write happened in this snapshot (Rule 2 above).
+            // Clear the value and output the SingleDelete. (The value will be
+            // outputted on the next iteration.)
+
+            // Setting valid_ to true will output the current SingleDelete
+            validity_info_.SetValid(ValidContext::kKeepSDForConflictCheck);
+
+            // Set up the Put to be outputted in the next iteration.
+            // (Optimization 3).
+            clear_and_output_next_key_ = true;
+            TEST_SYNC_POINT_CALLBACK(
+                "CompactionIterator::NextFromInput:KeepSDForWW",
+                /*arg=*/nullptr);
+          }
+        } else {
+          // We hit the next snapshot without hitting a put, so the iterator
+          // returns the single delete.
+          validity_info_.SetValid(ValidContext::kKeepSDForSnapshot);
+          TEST_SYNC_POINT_CALLBACK(
+              "CompactionIterator::NextFromInput:SingleDelete:3",
+              const_cast<Compaction*>(c));
+        }
+      } else {
+        // We are at the end of the input, could not parse the next key, or hit
+        // a different key. The iterator returns the single delete if the key
+        // possibly exists beyond the current output level.  We set
+        // has_current_user_key to false so that if the iterator is at the next
+        // key, we do not compare it again against the previous key at the next
+        // iteration. If the next key is corrupt, we return before the
+        // comparison, so the value of has_current_user_key does not matter.
+        has_current_user_key_ = false;
+        if (compaction_ != nullptr &&
+            DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) &&
+            compaction_->KeyNotExistsBeyondOutputLevel(ikey_.user_key,
+                                                       &level_ptrs_) &&
+            is_timestamp_eligible_for_gc) {
+          // Key doesn't exist outside of this range.
+          // Can compact out this SingleDelete.
+          ++iter_stats_.num_record_drop_obsolete;
+          ++iter_stats_.num_single_del_fallthru;
+          if (!bottommost_level_) {
+            ++iter_stats_.num_optimized_del_drop_obsolete;
+          }
+        } else if (last_key_seq_zeroed_) {
+          // Skip.
+          ++iter_stats_.num_record_drop_hidden;
+          ++iter_stats_.num_record_drop_obsolete;
+          assert(bottommost_level_);
+        } else {
+          // Output SingleDelete
+          validity_info_.SetValid(ValidContext::kKeepSD);
+        }
+      }
+
+      if (Valid()) {
+        at_next_ = true;
+      }
+    } else if (last_snapshot == current_user_key_snapshot_ ||
+               (last_snapshot > 0 &&
+                last_snapshot < current_user_key_snapshot_)) {
+      // If the earliest snapshot is which this key is visible in
+      // is the same as the visibility of a previous instance of the
+      // same key, then this kv is not visible in any snapshot.
+      // Hidden by an newer entry for same user key
+      //
+      // Note: Dropping this key will not affect TransactionDB write-conflict
+      // checking since there has already been a record returned for this key
+      // in this snapshot.
+      if (last_sequence < current_user_key_sequence_) {
+        ROCKS_LOG_FATAL(info_log_,
+                        "key %s, last_sequence (%" PRIu64
+                        ") < current_user_key_sequence_ (%" PRIu64 ")",
+                        ikey_.DebugString(allow_data_in_errors_, true).c_str(),
+                        last_sequence, current_user_key_sequence_);
+        assert(false);
+      }
+
+      ++iter_stats_.num_record_drop_hidden;  // rule (A)
+      AdvanceInputIter();
+    } else if (compaction_ != nullptr &&
+               (ikey_.type == kTypeDeletion ||
+                (ikey_.type == kTypeDeletionWithTimestamp &&
+                 cmp_with_history_ts_low_ < 0)) &&
+               DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) &&
+               compaction_->KeyNotExistsBeyondOutputLevel(ikey_.user_key,
+                                                          &level_ptrs_)) {
+      // TODO(noetzli): This is the only place where we use compaction_
+      // (besides the constructor). We should probably get rid of this
+      // dependency and find a way to do similar filtering during flushes.
+      //
+      // For this user key:
+      // (1) there is no data in higher levels
+      // (2) data in lower levels will have larger sequence numbers
+      // (3) data in layers that are being compacted here and have
+      //     smaller sequence numbers will be dropped in the next
+      //     few iterations of this loop (by rule (A) above).
+      // Therefore this deletion marker is obsolete and can be dropped.
+      //
+      // Note:  Dropping this Delete will not affect TransactionDB
+      // write-conflict checking since it is earlier than any snapshot.
+      //
+      // It seems that we can also drop deletion later than earliest snapshot
+      // given that:
+      // (1) The deletion is earlier than earliest_write_conflict_snapshot, and
+      // (2) No value exist earlier than the deletion.
+      //
+      // Note also that a deletion marker of type kTypeDeletionWithTimestamp
+      // will be treated as a different user key unless the timestamp is older
+      // than *full_history_ts_low_.
+      ++iter_stats_.num_record_drop_obsolete;
+      if (!bottommost_level_) {
+        ++iter_stats_.num_optimized_del_drop_obsolete;
+      }
+      AdvanceInputIter();
+    } else if ((ikey_.type == kTypeDeletion ||
+                (ikey_.type == kTypeDeletionWithTimestamp &&
+                 cmp_with_history_ts_low_ < 0)) &&
+               bottommost_level_) {
+      // Handle the case where we have a delete key at the bottom most level
+      // We can skip outputting the key iff there are no subsequent puts for
+      // this key
+      assert(!compaction_ || compaction_->KeyNotExistsBeyondOutputLevel(
+                                 ikey_.user_key, &level_ptrs_));
+      ParsedInternalKey next_ikey;
+      AdvanceInputIter();
+#ifndef NDEBUG
+      const Compaction* c =
+          compaction_ ? compaction_->real_compaction() : nullptr;
+#endif
+      TEST_SYNC_POINT_CALLBACK(
+          "CompactionIterator::NextFromInput:BottommostDelete:1",
+          const_cast<Compaction*>(c));
+      // Skip over all versions of this key that happen to occur in the same
+      // snapshot range as the delete.
+      //
+      // Note that a deletion marker of type kTypeDeletionWithTimestamp will be
+      // considered to have a different user key unless the timestamp is older
+      // than *full_history_ts_low_.
+      //
+      // Range tombstone start keys are skipped as they are not "real" keys.
+      while (!IsPausingManualCompaction() && !IsShuttingDown() &&
+             input_.Valid() &&
+             (ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_)
+                  .ok()) &&
+             cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key) &&
+             (prev_snapshot == 0 || input_.IsDeleteRangeSentinelKey() ||
+              DefinitelyNotInSnapshot(next_ikey.sequence, prev_snapshot))) {
+        AdvanceInputIter();
+      }
+      // If you find you still need to output a row with this key, we need to
+      // output the delete too
+      if (input_.Valid() &&
+          (ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_)
+               .ok()) &&
+          cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key)) {
+        validity_info_.SetValid(ValidContext::kKeepDel);
+        at_next_ = true;
+      }
+    } else if (ikey_.type == kTypeMerge) {
+      if (!merge_helper_->HasOperator()) {
+        status_ = Status::InvalidArgument(
+            "merge_operator is not properly initialized.");
+        return;
+      }
+
+      pinned_iters_mgr_.StartPinning();
+
+      // We know the merge type entry is not hidden, otherwise we would
+      // have hit (A)
+      // We encapsulate the merge related state machine in a different
+      // object to minimize change to the existing flow.
+      merge_until_status_ = merge_helper_->MergeUntil(
+          &input_, range_del_agg_, prev_snapshot, bottommost_level_,
+          allow_data_in_errors_, blob_fetcher_.get(), full_history_ts_low_,
+          prefetch_buffers_.get(), &iter_stats_);
+      merge_out_iter_.SeekToFirst();
+
+      if (!merge_until_status_.ok() &&
+          !merge_until_status_.IsMergeInProgress()) {
+        status_ = merge_until_status_;
+        return;
+      } else if (merge_out_iter_.Valid()) {
+        // NOTE: key, value, and ikey_ refer to old entries.
+        //       These will be correctly set below.
+        key_ = merge_out_iter_.key();
+        value_ = merge_out_iter_.value();
+        pik_status = ParseInternalKey(key_, &ikey_, allow_data_in_errors_);
+        // MergeUntil stops when it encounters a corrupt key and does not
+        // include them in the result, so we expect the keys here to valid.
+        if (!pik_status.ok()) {
+          ROCKS_LOG_FATAL(
+              info_log_, "Invalid key %s in compaction. %s",
+              allow_data_in_errors_ ? key_.ToString(true).c_str() : "hidden",
+              pik_status.getState());
+          assert(false);
+        }
+        // Keep current_key_ in sync.
+        current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
+        key_ = current_key_.GetInternalKey();
+        ikey_.user_key = current_key_.GetUserKey();
+        validity_info_.SetValid(ValidContext::kMerge2);
+      } else {
+        // all merge operands were filtered out. reset the user key, since the
+        // batch consumed by the merge operator should not shadow any keys
+        // coming after the merges
+        has_current_user_key_ = false;
+        pinned_iters_mgr_.ReleasePinnedData();
+
+        if (merge_helper_->FilteredUntil(&skip_until)) {
+          need_skip = true;
+        }
+      }
+    } else {
+      // 1. new user key -OR-
+      // 2. different snapshot stripe
+      // If user-defined timestamp is enabled, we consider keys for GC if they
+      // are below history_ts_low_. CompactionRangeDelAggregator::ShouldDelete()
+      // only considers range deletions that are at or below history_ts_low_ and
+      // trim_ts_. We drop keys here that are below history_ts_low_ and are
+      // covered by a range tombstone that is at or below history_ts_low_ and
+      // trim_ts.
+      bool should_delete = false;
+      if (!timestamp_size_ || cmp_with_history_ts_low_ < 0) {
+        should_delete = range_del_agg_->ShouldDelete(
+            key_, RangeDelPositioningMode::kForwardTraversal);
+      }
+      if (should_delete) {
+        ++iter_stats_.num_record_drop_hidden;
+        ++iter_stats_.num_record_drop_range_del;
+        AdvanceInputIter();
+      } else {
+        validity_info_.SetValid(ValidContext::kNewUserKey);
+      }
+    }
+
+    if (need_skip) {
+      SkipUntil(skip_until);
+    }
+  }
+
+  if (!Valid() && IsShuttingDown()) {
+    status_ = Status::ShutdownInProgress();
+  }
+
+  if (IsPausingManualCompaction()) {
+    status_ = Status::Incomplete(Status::SubCode::kManualCompactionPaused);
+  }
+
+  // Propagate corruption status from memtable itereator
+  if (!input_.Valid() && input_.status().IsCorruption()) {
+    status_ = input_.status();
+  }
+}
+
+bool CompactionIterator::ExtractLargeValueIfNeededImpl() {
+  if (!blob_file_builder_) {
+    return false;
+  }
+
+  blob_index_.clear();
+  const Status s = blob_file_builder_->Add(user_key(), value_, &blob_index_);
+
+  if (!s.ok()) {
+    status_ = s;
+    validity_info_.Invalidate();
+
+    return false;
+  }
+
+  if (blob_index_.empty()) {
+    return false;
+  }
+
+  value_ = blob_index_;
+
+  return true;
+}
+
+void CompactionIterator::ExtractLargeValueIfNeeded() {
+  assert(ikey_.type == kTypeValue);
+
+  if (!ExtractLargeValueIfNeededImpl()) {
+    return;
+  }
+
+  ikey_.type = kTypeBlobIndex;
+  current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
+}
+
+void CompactionIterator::GarbageCollectBlobIfNeeded() {
+  assert(ikey_.type == kTypeBlobIndex);
+
+  if (!compaction_) {
+    return;
+  }
+
+  // GC for integrated BlobDB
+  if (compaction_->enable_blob_garbage_collection()) {
+    TEST_SYNC_POINT_CALLBACK(
+        "CompactionIterator::GarbageCollectBlobIfNeeded::TamperWithBlobIndex",
+        &value_);
+
+    BlobIndex blob_index;
+
+    {
+      const Status s = blob_index.DecodeFrom(value_);
+
+      if (!s.ok()) {
+        status_ = s;
+        validity_info_.Invalidate();
+
+        return;
+      }
+    }
+
+    if (blob_index.file_number() >=
+        blob_garbage_collection_cutoff_file_number_) {
+      return;
+    }
+
+    FilePrefetchBuffer* prefetch_buffer =
+        prefetch_buffers_ ? prefetch_buffers_->GetOrCreatePrefetchBuffer(
+                                blob_index.file_number())
+                          : nullptr;
+
+    uint64_t bytes_read = 0;
+
+    {
+      assert(blob_fetcher_);
+
+      const Status s = blob_fetcher_->FetchBlob(
+          user_key(), blob_index, prefetch_buffer, &blob_value_, &bytes_read);
+
+      if (!s.ok()) {
+        status_ = s;
+        validity_info_.Invalidate();
+
+        return;
+      }
+    }
+
+    ++iter_stats_.num_blobs_read;
+    iter_stats_.total_blob_bytes_read += bytes_read;
+
+    ++iter_stats_.num_blobs_relocated;
+    iter_stats_.total_blob_bytes_relocated += blob_index.size();
+
+    value_ = blob_value_;
+
+    if (ExtractLargeValueIfNeededImpl()) {
+      return;
+    }
+
+    ikey_.type = kTypeValue;
+    current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
+
+    return;
+  }
+
+  // GC for stacked BlobDB
+  if (compaction_filter_ &&
+      compaction_filter_->IsStackedBlobDbInternalCompactionFilter()) {
+    const auto blob_decision = compaction_filter_->PrepareBlobOutput(
+        user_key(), value_, &compaction_filter_value_);
+
+    if (blob_decision == CompactionFilter::BlobDecision::kCorruption) {
+      status_ =
+          Status::Corruption("Corrupted blob reference encountered during GC");
+      validity_info_.Invalidate();
+
+      return;
+    }
+
+    if (blob_decision == CompactionFilter::BlobDecision::kIOError) {
+      status_ = Status::IOError("Could not relocate blob during GC");
+      validity_info_.Invalidate();
+
+      return;
+    }
+
+    if (blob_decision == CompactionFilter::BlobDecision::kChangeValue) {
+      value_ = compaction_filter_value_;
+
+      return;
+    }
+  }
+}
+
+void CompactionIterator::DecideOutputLevel() {
+  assert(compaction_->SupportsPerKeyPlacement());
+  output_to_penultimate_level_ = false;
+  // if the key is newer than the cutoff sequence or within the earliest
+  // snapshot, it should output to the penultimate level.
+  if (ikey_.sequence > preclude_last_level_min_seqno_ ||
+      ikey_.sequence > earliest_snapshot_) {
+    output_to_penultimate_level_ = true;
+  }
+
+#ifndef NDEBUG
+  // Could be overridden by unittest
+  PerKeyPlacementContext context(level_, ikey_.user_key, value_, ikey_.sequence,
+                                 output_to_penultimate_level_);
+  TEST_SYNC_POINT_CALLBACK("CompactionIterator::PrepareOutput.context",
+                           &context);
+  if (ikey_.sequence > earliest_snapshot_) {
+    output_to_penultimate_level_ = true;
+  }
+#endif  // NDEBUG
+
+  if (output_to_penultimate_level_) {
+    // If it's decided to output to the penultimate level, but unsafe to do so,
+    // still output to the last level. For example, moving the data from a lower
+    // level to a higher level outside of the higher-level input key range is
+    // considered unsafe, because the key may conflict with higher-level SSTs
+    // not from this compaction.
+    // TODO: add statistic for declined output_to_penultimate_level
+    bool safe_to_penultimate_level =
+        compaction_->WithinPenultimateLevelOutputRange(ikey_.user_key);
+    if (!safe_to_penultimate_level) {
+      output_to_penultimate_level_ = false;
+      // It could happen when disable/enable `last_level_temperature` while
+      // holding a snapshot. When `last_level_temperature` is not set
+      // (==kUnknown), the data newer than any snapshot is pushed to the last
+      // level, but when the per_key_placement feature is enabled on the fly,
+      // the data later than the snapshot has to be moved to the penultimate
+      // level, which may or may not be safe. So the user needs to make sure all
+      // snapshot is released before enabling `last_level_temperature` feature
+      // We will migrate the feature to `last_level_temperature` and maybe make
+      // it not dynamically changeable.
+      if (ikey_.sequence > earliest_snapshot_) {
+        status_ = Status::Corruption(
+            "Unsafe to store Seq later than snapshot in the last level if "
+            "per_key_placement is enabled");
+      }
+    }
+  }
+}
+
+void CompactionIterator::PrepareOutput() {
+  if (Valid()) {
+    if (LIKELY(!is_range_del_)) {
+      if (ikey_.type == kTypeValue) {
+        ExtractLargeValueIfNeeded();
+      } else if (ikey_.type == kTypeBlobIndex) {
+        GarbageCollectBlobIfNeeded();
+      }
+    }
+
+    if (compaction_ != nullptr && compaction_->SupportsPerKeyPlacement()) {
+      DecideOutputLevel();
+    }
+
+    // Zeroing out the sequence number leads to better compression.
+    // If this is the bottommost level (no files in lower levels)
+    // and the earliest snapshot is larger than this seqno
+    // and the userkey differs from the last userkey in compaction
+    // then we can squash the seqno to zero.
+    //
+    // This is safe for TransactionDB write-conflict checking since transactions
+    // only care about sequence number larger than any active snapshots.
+    //
+    // Can we do the same for levels above bottom level as long as
+    // KeyNotExistsBeyondOutputLevel() return true?
+    if (Valid() && compaction_ != nullptr &&
+        !compaction_->allow_ingest_behind() && bottommost_level_ &&
+        DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) &&
+        ikey_.type != kTypeMerge && current_key_committed_ &&
+        !output_to_penultimate_level_ &&
+        ikey_.sequence < preserve_time_min_seqno_ && !is_range_del_) {
+      if (ikey_.type == kTypeDeletion ||
+          (ikey_.type == kTypeSingleDeletion && timestamp_size_ == 0)) {
+        ROCKS_LOG_FATAL(
+            info_log_,
+            "Unexpected key %s for seq-zero optimization. "
+            "earliest_snapshot %" PRIu64
+            ", earliest_write_conflict_snapshot %" PRIu64
+            " job_snapshot %" PRIu64
+            ". timestamp_size: %d full_history_ts_low_ %s. validity %x",
+            ikey_.DebugString(allow_data_in_errors_, true).c_str(),
+            earliest_snapshot_, earliest_write_conflict_snapshot_,
+            job_snapshot_, static_cast<int>(timestamp_size_),
+            full_history_ts_low_ != nullptr
+                ? Slice(*full_history_ts_low_).ToString(true).c_str()
+                : "null",
+            validity_info_.rep);
+        assert(false);
+      }
+      ikey_.sequence = 0;
+      last_key_seq_zeroed_ = true;
+      TEST_SYNC_POINT_CALLBACK("CompactionIterator::PrepareOutput:ZeroingSeq",
+                               &ikey_);
+      if (!timestamp_size_) {
+        current_key_.UpdateInternalKey(0, ikey_.type);
+      } else if (full_history_ts_low_ && cmp_with_history_ts_low_ < 0) {
+        // We can also zero out timestamp for better compression.
+        // For the same user key (excluding timestamp), the timestamp-based
+        // history can be collapsed to save some space if the timestamp is
+        // older than *full_history_ts_low_.
+        const std::string kTsMin(timestamp_size_, static_cast<char>(0));
+        const Slice ts_slice = kTsMin;
+        ikey_.SetTimestamp(ts_slice);
+        current_key_.UpdateInternalKey(0, ikey_.type, &ts_slice);
+      }
+    }
+  }
+}
+
+inline SequenceNumber CompactionIterator::findEarliestVisibleSnapshot(
+    SequenceNumber in, SequenceNumber* prev_snapshot) {
+  assert(snapshots_->size());
+  if (snapshots_->size() == 0) {
+    ROCKS_LOG_FATAL(info_log_,
+                    "No snapshot left in findEarliestVisibleSnapshot");
+  }
+  auto snapshots_iter =
+      std::lower_bound(snapshots_->begin(), snapshots_->end(), in);
+  assert(prev_snapshot != nullptr);
+  if (snapshots_iter == snapshots_->begin()) {
+    *prev_snapshot = 0;
+  } else {
+    *prev_snapshot = *std::prev(snapshots_iter);
+    if (*prev_snapshot >= in) {
+      ROCKS_LOG_FATAL(info_log_,
+                      "*prev_snapshot (%" PRIu64 ") >= in (%" PRIu64
+                      ") in findEarliestVisibleSnapshot",
+                      *prev_snapshot, in);
+      assert(false);
+    }
+  }
+  if (snapshot_checker_ == nullptr) {
+    return snapshots_iter != snapshots_->end() ? *snapshots_iter
+                                               : kMaxSequenceNumber;
+  }
+  bool has_released_snapshot = !released_snapshots_.empty();
+  for (; snapshots_iter != snapshots_->end(); ++snapshots_iter) {
+    auto cur = *snapshots_iter;
+    if (in > cur) {
+      ROCKS_LOG_FATAL(info_log_,
+                      "in (%" PRIu64 ") > cur (%" PRIu64
+                      ") in findEarliestVisibleSnapshot",
+                      in, cur);
+      assert(false);
+    }
+    // Skip if cur is in released_snapshots.
+    if (has_released_snapshot && released_snapshots_.count(cur) > 0) {
+      continue;
+    }
+    auto res = snapshot_checker_->CheckInSnapshot(in, cur);
+    if (res == SnapshotCheckerResult::kInSnapshot) {
+      return cur;
+    } else if (res == SnapshotCheckerResult::kSnapshotReleased) {
+      released_snapshots_.insert(cur);
+    }
+    *prev_snapshot = cur;
+  }
+  return kMaxSequenceNumber;
+}
+
+uint64_t CompactionIterator::ComputeBlobGarbageCollectionCutoffFileNumber(
+    const CompactionProxy* compaction) {
+  if (!compaction) {
+    return 0;
+  }
+
+  if (!compaction->enable_blob_garbage_collection()) {
+    return 0;
+  }
+
+  const Version* const version = compaction->input_version();
+  assert(version);
+
+  const VersionStorageInfo* const storage_info = version->storage_info();
+  assert(storage_info);
+
+  const auto& blob_files = storage_info->GetBlobFiles();
+
+  const size_t cutoff_index = static_cast<size_t>(
+      compaction->blob_garbage_collection_age_cutoff() * blob_files.size());
+
+  if (cutoff_index >= blob_files.size()) {
+    return std::numeric_limits<uint64_t>::max();
+  }
+
+  const auto& meta = blob_files[cutoff_index];
+  assert(meta);
+
+  return meta->GetBlobFileNumber();
+}
+
+std::unique_ptr<BlobFetcher> CompactionIterator::CreateBlobFetcherIfNeeded(
+    const CompactionProxy* compaction) {
+  if (!compaction) {
+    return nullptr;
+  }
+
+  const Version* const version = compaction->input_version();
+  if (!version) {
+    return nullptr;
+  }
+
+  ReadOptions read_options;
+  read_options.io_activity = Env::IOActivity::kCompaction;
+  read_options.fill_cache = false;
+
+  return std::unique_ptr<BlobFetcher>(new BlobFetcher(version, read_options));
+}
+
+std::unique_ptr<PrefetchBufferCollection>
+CompactionIterator::CreatePrefetchBufferCollectionIfNeeded(
+    const CompactionProxy* compaction) {
+  if (!compaction) {
+    return nullptr;
+  }
+
+  if (!compaction->input_version()) {
+    return nullptr;
+  }
+
+  if (compaction->allow_mmap_reads()) {
+    return nullptr;
+  }
+
+  const uint64_t readahead_size = compaction->blob_compaction_readahead_size();
+  if (!readahead_size) {
+    return nullptr;
+  }
+
+  return std::unique_ptr<PrefetchBufferCollection>(
+      new PrefetchBufferCollection(readahead_size));
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_iterator.h
new file mode 100644
index 0000000000..ea2dc062e2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_iterator.h
@@ -0,0 +1,530 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <algorithm>
+#include <cinttypes>
+#include <deque>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "db/compaction/compaction.h"
+#include "db/compaction/compaction_iteration_stats.h"
+#include "db/merge_helper.h"
+#include "db/pinned_iterators_manager.h"
+#include "db/range_del_aggregator.h"
+#include "db/snapshot_checker.h"
+#include "options/cf_options.h"
+#include "rocksdb/compaction_filter.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class BlobFileBuilder;
+class BlobFetcher;
+class PrefetchBufferCollection;
+
+// A wrapper of internal iterator whose purpose is to count how
+// many entries there are in the iterator.
+class SequenceIterWrapper : public InternalIterator {
+ public:
+  SequenceIterWrapper(InternalIterator* iter, const Comparator* cmp,
+                      bool need_count_entries)
+      : icmp_(cmp),
+        inner_iter_(iter),
+        need_count_entries_(need_count_entries) {}
+  bool Valid() const override { return inner_iter_->Valid(); }
+  Status status() const override { return inner_iter_->status(); }
+  void Next() override {
+    num_itered_++;
+    inner_iter_->Next();
+  }
+  void Seek(const Slice& target) override {
+    if (!need_count_entries_) {
+      inner_iter_->Seek(target);
+    } else {
+      // For flush cases, we need to count total number of entries, so we
+      // do Next() rather than Seek().
+      while (inner_iter_->Valid() &&
+             icmp_.Compare(inner_iter_->key(), target) < 0) {
+        Next();
+      }
+    }
+  }
+  Slice key() const override { return inner_iter_->key(); }
+  Slice value() const override { return inner_iter_->value(); }
+
+  // Unused InternalIterator methods
+  void SeekToFirst() override { assert(false); }
+  void Prev() override { assert(false); }
+  void SeekForPrev(const Slice& /* target */) override { assert(false); }
+  void SeekToLast() override { assert(false); }
+
+  uint64_t num_itered() const { return num_itered_; }
+  bool IsDeleteRangeSentinelKey() const override {
+    assert(Valid());
+    return inner_iter_->IsDeleteRangeSentinelKey();
+  }
+
+ private:
+  InternalKeyComparator icmp_;
+  InternalIterator* inner_iter_;  // not owned
+  uint64_t num_itered_ = 0;
+  bool need_count_entries_;
+};
+
+class CompactionIterator {
+ public:
+  // A wrapper around Compaction. Has a much smaller interface, only what
+  // CompactionIterator uses. Tests can override it.
+  class CompactionProxy {
+   public:
+    virtual ~CompactionProxy() = default;
+
+    virtual int level() const = 0;
+
+    virtual bool KeyNotExistsBeyondOutputLevel(
+        const Slice& user_key, std::vector<size_t>* level_ptrs) const = 0;
+
+    virtual bool bottommost_level() const = 0;
+
+    virtual int number_levels() const = 0;
+
+    // Result includes timestamp if user-defined timestamp is enabled.
+    virtual Slice GetLargestUserKey() const = 0;
+
+    virtual bool allow_ingest_behind() const = 0;
+
+    virtual bool allow_mmap_reads() const = 0;
+
+    virtual bool enable_blob_garbage_collection() const = 0;
+
+    virtual double blob_garbage_collection_age_cutoff() const = 0;
+
+    virtual uint64_t blob_compaction_readahead_size() const = 0;
+
+    virtual const Version* input_version() const = 0;
+
+    virtual bool DoesInputReferenceBlobFiles() const = 0;
+
+    virtual const Compaction* real_compaction() const = 0;
+
+    virtual bool SupportsPerKeyPlacement() const = 0;
+
+    // `key` includes timestamp if user-defined timestamp is enabled.
+    virtual bool WithinPenultimateLevelOutputRange(const Slice& key) const = 0;
+  };
+
+  class RealCompaction : public CompactionProxy {
+   public:
+    explicit RealCompaction(const Compaction* compaction)
+        : compaction_(compaction) {
+      assert(compaction_);
+      assert(compaction_->immutable_options());
+      assert(compaction_->mutable_cf_options());
+    }
+
+    int level() const override { return compaction_->level(); }
+
+    bool KeyNotExistsBeyondOutputLevel(
+        const Slice& user_key, std::vector<size_t>* level_ptrs) const override {
+      return compaction_->KeyNotExistsBeyondOutputLevel(user_key, level_ptrs);
+    }
+
+    bool bottommost_level() const override {
+      return compaction_->bottommost_level();
+    }
+
+    int number_levels() const override { return compaction_->number_levels(); }
+
+    // Result includes timestamp if user-defined timestamp is enabled.
+    Slice GetLargestUserKey() const override {
+      return compaction_->GetLargestUserKey();
+    }
+
+    bool allow_ingest_behind() const override {
+      return compaction_->immutable_options()->allow_ingest_behind;
+    }
+
+    bool allow_mmap_reads() const override {
+      return compaction_->immutable_options()->allow_mmap_reads;
+    }
+
+    bool enable_blob_garbage_collection() const override {
+      return compaction_->enable_blob_garbage_collection();
+    }
+
+    double blob_garbage_collection_age_cutoff() const override {
+      return compaction_->blob_garbage_collection_age_cutoff();
+    }
+
+    uint64_t blob_compaction_readahead_size() const override {
+      return compaction_->mutable_cf_options()->blob_compaction_readahead_size;
+    }
+
+    const Version* input_version() const override {
+      return compaction_->input_version();
+    }
+
+    bool DoesInputReferenceBlobFiles() const override {
+      return compaction_->DoesInputReferenceBlobFiles();
+    }
+
+    const Compaction* real_compaction() const override { return compaction_; }
+
+    bool SupportsPerKeyPlacement() const override {
+      return compaction_->SupportsPerKeyPlacement();
+    }
+
+    // Check if key is within penultimate level output range, to see if it's
+    // safe to output to the penultimate level for per_key_placement feature.
+    // `key` includes timestamp if user-defined timestamp is enabled.
+    bool WithinPenultimateLevelOutputRange(const Slice& key) const override {
+      return compaction_->WithinPenultimateLevelOutputRange(key);
+    }
+
+   private:
+    const Compaction* compaction_;
+  };
+
+  CompactionIterator(
+      InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
+      SequenceNumber last_sequence, std::vector<SequenceNumber>* snapshots,
+      SequenceNumber earliest_write_conflict_snapshot,
+      SequenceNumber job_snapshot, const SnapshotChecker* snapshot_checker,
+      Env* env, bool report_detailed_time, bool expect_valid_internal_key,
+      CompactionRangeDelAggregator* range_del_agg,
+      BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
+      bool enforce_single_del_contracts,
+      const std::atomic<bool>& manual_compaction_canceled,
+      const Compaction* compaction = nullptr,
+      const CompactionFilter* compaction_filter = nullptr,
+      const std::atomic<bool>* shutting_down = nullptr,
+      const std::shared_ptr<Logger> info_log = nullptr,
+      const std::string* full_history_ts_low = nullptr,
+      const SequenceNumber preserve_time_min_seqno = kMaxSequenceNumber,
+      const SequenceNumber preclude_last_level_min_seqno = kMaxSequenceNumber);
+
+  // Constructor with custom CompactionProxy, used for tests.
+  CompactionIterator(
+      InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
+      SequenceNumber last_sequence, std::vector<SequenceNumber>* snapshots,
+      SequenceNumber earliest_write_conflict_snapshot,
+      SequenceNumber job_snapshot, const SnapshotChecker* snapshot_checker,
+      Env* env, bool report_detailed_time, bool expect_valid_internal_key,
+      CompactionRangeDelAggregator* range_del_agg,
+      BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
+      bool enforce_single_del_contracts,
+      const std::atomic<bool>& manual_compaction_canceled,
+      std::unique_ptr<CompactionProxy> compaction,
+      const CompactionFilter* compaction_filter = nullptr,
+      const std::atomic<bool>* shutting_down = nullptr,
+      const std::shared_ptr<Logger> info_log = nullptr,
+      const std::string* full_history_ts_low = nullptr,
+      const SequenceNumber preserve_time_min_seqno = kMaxSequenceNumber,
+      const SequenceNumber preclude_last_level_min_seqno = kMaxSequenceNumber);
+
+  ~CompactionIterator();
+
+  void ResetRecordCounts();
+
+  // Seek to the beginning of the compaction iterator output.
+  //
+  // REQUIRED: Call only once.
+  void SeekToFirst();
+
+  // Produces the next record in the compaction.
+  //
+  // REQUIRED: SeekToFirst() has been called.
+  void Next();
+
+  // Getters
+  const Slice& key() const { return key_; }
+  const Slice& value() const { return value_; }
+  const Status& status() const { return status_; }
+  const ParsedInternalKey& ikey() const { return ikey_; }
+  inline bool Valid() const { return validity_info_.IsValid(); }
+  const Slice& user_key() const {
+    if (UNLIKELY(is_range_del_)) {
+      return ikey_.user_key;
+    }
+    return current_user_key_;
+  }
+  const CompactionIterationStats& iter_stats() const { return iter_stats_; }
+  uint64_t num_input_entry_scanned() const { return input_.num_itered(); }
+  // If the current key should be placed on penultimate level, only valid if
+  // per_key_placement is supported
+  bool output_to_penultimate_level() const {
+    return output_to_penultimate_level_;
+  }
+  Status InputStatus() const { return input_.status(); }
+
+  bool IsDeleteRangeSentinelKey() const { return is_range_del_; }
+
+ private:
+  // Processes the input stream to find the next output
+  void NextFromInput();
+
+  // Do final preparations before presenting the output to the callee.
+  void PrepareOutput();
+
+  // Decide the current key should be output to the last level or penultimate
+  // level, only call for compaction supports per key placement
+  void DecideOutputLevel();
+
+  // Passes the output value to the blob file builder (if any), and replaces it
+  // with the corresponding blob reference if it has been actually written to a
+  // blob file (i.e. if it passed the value size check). Returns true if the
+  // value got extracted to a blob file, false otherwise.
+  bool ExtractLargeValueIfNeededImpl();
+
+  // Extracts large values as described above, and updates the internal key's
+  // type to kTypeBlobIndex if the value got extracted. Should only be called
+  // for regular values (kTypeValue).
+  void ExtractLargeValueIfNeeded();
+
+  // Relocates valid blobs residing in the oldest blob files if garbage
+  // collection is enabled. Relocated blobs are written to new blob files or
+  // inlined in the LSM tree depending on the current settings (i.e.
+  // enable_blob_files and min_blob_size). Should only be called for blob
+  // references (kTypeBlobIndex).
+  //
+  // Note: the stacked BlobDB implementation's compaction filter based GC
+  // algorithm is also called from here.
+  void GarbageCollectBlobIfNeeded();
+
+  // Invoke compaction filter if needed.
+  // Return true on success, false on failures (e.g.: kIOError).
+  bool InvokeFilterIfNeeded(bool* need_skip, Slice* skip_until);
+
+  // Given a sequence number, return the sequence number of the
+  // earliest snapshot that this sequence number is visible in.
+  // The snapshots themselves are arranged in ascending order of
+  // sequence numbers.
+  // Employ a sequential search because the total number of
+  // snapshots are typically small.
+  inline SequenceNumber findEarliestVisibleSnapshot(
+      SequenceNumber in, SequenceNumber* prev_snapshot);
+
+  inline bool KeyCommitted(SequenceNumber sequence) {
+    return snapshot_checker_ == nullptr ||
+           snapshot_checker_->CheckInSnapshot(sequence, job_snapshot_) ==
+               SnapshotCheckerResult::kInSnapshot;
+  }
+
+  bool DefinitelyInSnapshot(SequenceNumber seq, SequenceNumber snapshot);
+
+  bool DefinitelyNotInSnapshot(SequenceNumber seq, SequenceNumber snapshot);
+
+  // Extract user-defined timestamp from user key if possible and compare it
+  // with *full_history_ts_low_ if applicable.
+  inline void UpdateTimestampAndCompareWithFullHistoryLow() {
+    if (!timestamp_size_) {
+      return;
+    }
+    Slice ts = ExtractTimestampFromUserKey(ikey_.user_key, timestamp_size_);
+    curr_ts_.assign(ts.data(), ts.size());
+    if (full_history_ts_low_) {
+      cmp_with_history_ts_low_ =
+          cmp_->CompareTimestamp(ts, *full_history_ts_low_);
+    }
+  }
+
+  static uint64_t ComputeBlobGarbageCollectionCutoffFileNumber(
+      const CompactionProxy* compaction);
+  static std::unique_ptr<BlobFetcher> CreateBlobFetcherIfNeeded(
+      const CompactionProxy* compaction);
+  static std::unique_ptr<PrefetchBufferCollection>
+  CreatePrefetchBufferCollectionIfNeeded(const CompactionProxy* compaction);
+
+  SequenceIterWrapper input_;
+  const Comparator* cmp_;
+  MergeHelper* merge_helper_;
+  const std::vector<SequenceNumber>* snapshots_;
+  // List of snapshots released during compaction.
+  // findEarliestVisibleSnapshot() find them out from return of
+  // snapshot_checker, and make sure they will not be returned as
+  // earliest visible snapshot of an older value.
+  // See WritePreparedTransactionTest::ReleaseSnapshotDuringCompaction3.
+  std::unordered_set<SequenceNumber> released_snapshots_;
+  const SequenceNumber earliest_write_conflict_snapshot_;
+  const SequenceNumber job_snapshot_;
+  const SnapshotChecker* const snapshot_checker_;
+  Env* env_;
+  SystemClock* clock_;
+  const bool report_detailed_time_;
+  const bool expect_valid_internal_key_;
+  CompactionRangeDelAggregator* range_del_agg_;
+  BlobFileBuilder* blob_file_builder_;
+  std::unique_ptr<CompactionProxy> compaction_;
+  const CompactionFilter* compaction_filter_;
+  const std::atomic<bool>* shutting_down_;
+  const std::atomic<bool>& manual_compaction_canceled_;
+  const bool bottommost_level_;
+  const bool visible_at_tip_;
+  const SequenceNumber earliest_snapshot_;
+
+  std::shared_ptr<Logger> info_log_;
+
+  const bool allow_data_in_errors_;
+
+  const bool enforce_single_del_contracts_;
+
+  // Comes from comparator.
+  const size_t timestamp_size_;
+
+  // Lower bound timestamp to retain full history in terms of user-defined
+  // timestamp. If a key's timestamp is older than full_history_ts_low_, then
+  // the key *may* be eligible for garbage collection (GC). The skipping logic
+  // is in `NextFromInput()` and `PrepareOutput()`.
+  // If nullptr, NO GC will be performed and all history will be preserved.
+  const std::string* const full_history_ts_low_;
+
+  // State
+  //
+  enum ValidContext : uint8_t {
+    kMerge1 = 0,
+    kMerge2 = 1,
+    kParseKeyError = 2,
+    kCurrentKeyUncommitted = 3,
+    kKeepSDAndClearPut = 4,
+    kKeepTsHistory = 5,
+    kKeepSDForConflictCheck = 6,
+    kKeepSDForSnapshot = 7,
+    kKeepSD = 8,
+    kKeepDel = 9,
+    kNewUserKey = 10,
+    kRangeDeletion = 11,
+  };
+
+  struct ValidityInfo {
+    inline bool IsValid() const { return rep & 1; }
+    ValidContext GetContext() const {
+      return static_cast<ValidContext>(rep >> 1);
+    }
+    inline void SetValid(uint8_t ctx) { rep = (ctx << 1) | 1; }
+    inline void Invalidate() { rep = 0; }
+
+    uint8_t rep{0};
+  } validity_info_;
+
+  // Points to a copy of the current compaction iterator output (current_key_)
+  // if valid.
+  Slice key_;
+  // Points to the value in the underlying iterator that corresponds to the
+  // current output.
+  Slice value_;
+  // The status is OK unless compaction iterator encounters a merge operand
+  // while not having a merge operator defined.
+  Status status_;
+  // Stores the user key, sequence number and type of the current compaction
+  // iterator output (or current key in the underlying iterator during
+  // NextFromInput()).
+  ParsedInternalKey ikey_;
+  // Stores whether ikey_.user_key is valid. If set to false, the user key is
+  // not compared against the current key in the underlying iterator.
+  bool has_current_user_key_ = false;
+  // If false, the iterator holds a copy of the current compaction iterator
+  // output (or current key in the underlying iterator during NextFromInput()).
+  bool at_next_ = false;
+
+  IterKey current_key_;
+  Slice current_user_key_;
+  std::string curr_ts_;
+  SequenceNumber current_user_key_sequence_;
+  SequenceNumber current_user_key_snapshot_;
+
+  // True if the iterator has already returned a record for the current key.
+  bool has_outputted_key_ = false;
+
+  // truncated the value of the next key and output it without applying any
+  // compaction rules.  This is used for outputting a put after a single delete.
+  bool clear_and_output_next_key_ = false;
+
+  MergeOutputIterator merge_out_iter_;
+  Status merge_until_status_;
+  // PinnedIteratorsManager used to pin input_ Iterator blocks while reading
+  // merge operands and then releasing them after consuming them.
+  PinnedIteratorsManager pinned_iters_mgr_;
+
+  uint64_t blob_garbage_collection_cutoff_file_number_;
+
+  std::unique_ptr<BlobFetcher> blob_fetcher_;
+  std::unique_ptr<PrefetchBufferCollection> prefetch_buffers_;
+
+  std::string blob_index_;
+  PinnableSlice blob_value_;
+  std::string compaction_filter_value_;
+  InternalKey compaction_filter_skip_until_;
+  // "level_ptrs" holds indices that remember which file of an associated
+  // level we were last checking during the last call to compaction->
+  // KeyNotExistsBeyondOutputLevel(). This allows future calls to the function
+  // to pick off where it left off since each subcompaction's key range is
+  // increasing so a later call to the function must be looking for a key that
+  // is in or beyond the last file checked during the previous call
+  std::vector<size_t> level_ptrs_;
+  CompactionIterationStats iter_stats_;
+
+  // Used to avoid purging uncommitted values. The application can specify
+  // uncommitted values by providing a SnapshotChecker object.
+  bool current_key_committed_;
+
+  // Saved result of ucmp->CompareTimestamp(current_ts_, *full_history_ts_low_)
+  int cmp_with_history_ts_low_;
+
+  const int level_;
+
+  // True if the previous internal key (same user key)'s sequence number has
+  // just been zeroed out during bottommost compaction.
+  bool last_key_seq_zeroed_{false};
+
+  // True if the current key should be output to the penultimate level if
+  // possible, compaction logic makes the final decision on which level to
+  // output to.
+  bool output_to_penultimate_level_{false};
+
+  // min seqno for preserving the time information.
+  const SequenceNumber preserve_time_min_seqno_ = kMaxSequenceNumber;
+
+  // min seqno to preclude the data from the last level, if the key seqno larger
+  // than this, it will be output to penultimate level
+  const SequenceNumber preclude_last_level_min_seqno_ = kMaxSequenceNumber;
+
+  void AdvanceInputIter() { input_.Next(); }
+
+  void SkipUntil(const Slice& skip_until) { input_.Seek(skip_until); }
+
+  bool IsShuttingDown() {
+    // This is a best-effort facility, so memory_order_relaxed is sufficient.
+    return shutting_down_ && shutting_down_->load(std::memory_order_relaxed);
+  }
+
+  bool IsPausingManualCompaction() {
+    // This is a best-effort facility, so memory_order_relaxed is sufficient.
+    return manual_compaction_canceled_.load(std::memory_order_relaxed);
+  }
+
+  // Stores whether the current compaction iterator output
+  // is a range tombstone start key.
+  bool is_range_del_{false};
+};
+
+inline bool CompactionIterator::DefinitelyInSnapshot(SequenceNumber seq,
+                                                     SequenceNumber snapshot) {
+  return ((seq) <= (snapshot) &&
+          (snapshot_checker_ == nullptr ||
+           LIKELY(snapshot_checker_->CheckInSnapshot((seq), (snapshot)) ==
+                  SnapshotCheckerResult::kInSnapshot)));
+}
+
+inline bool CompactionIterator::DefinitelyNotInSnapshot(
+    SequenceNumber seq, SequenceNumber snapshot) {
+  return ((seq) > (snapshot) ||
+          (snapshot_checker_ != nullptr &&
+           UNLIKELY(snapshot_checker_->CheckInSnapshot((seq), (snapshot)) ==
+                    SnapshotCheckerResult::kNotInSnapshot)));
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_job.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_job.cc
new file mode 100644
index 0000000000..ed152f28c6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_job.cc
@@ -0,0 +1,2054 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/compaction/compaction_job.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <memory>
+#include <optional>
+#include <set>
+#include <utility>
+#include <vector>
+
+#include "db/blob/blob_counting_iterator.h"
+#include "db/blob/blob_file_addition.h"
+#include "db/blob/blob_file_builder.h"
+#include "db/builder.h"
+#include "db/compaction/clipping_iterator.h"
+#include "db/compaction/compaction_state.h"
+#include "db/db_impl/db_impl.h"
+#include "db/dbformat.h"
+#include "db/error_handler.h"
+#include "db/event_helpers.h"
+#include "db/history_trimming_iterator.h"
+#include "db/log_writer.h"
+#include "db/merge_helper.h"
+#include "db/range_del_aggregator.h"
+#include "db/version_edit.h"
+#include "db/version_set.h"
+#include "file/filename.h"
+#include "file/read_write_util.h"
+#include "file/sst_file_manager_impl.h"
+#include "file/writable_file_writer.h"
+#include "logging/log_buffer.h"
+#include "logging/logging.h"
+#include "monitoring/iostats_context_imp.h"
+#include "monitoring/thread_status_util.h"
+#include "options/configurable_helper.h"
+#include "options/options_helper.h"
+#include "port/port.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/status.h"
+#include "rocksdb/table.h"
+#include "rocksdb/utilities/options_type.h"
+#include "table/merging_iterator.h"
+#include "table/table_builder.h"
+#include "table/unique_id_impl.h"
+#include "test_util/sync_point.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const char* GetCompactionReasonString(CompactionReason compaction_reason) {
+  switch (compaction_reason) {
+    case CompactionReason::kUnknown:
+      return "Unknown";
+    case CompactionReason::kLevelL0FilesNum:
+      return "LevelL0FilesNum";
+    case CompactionReason::kLevelMaxLevelSize:
+      return "LevelMaxLevelSize";
+    case CompactionReason::kUniversalSizeAmplification:
+      return "UniversalSizeAmplification";
+    case CompactionReason::kUniversalSizeRatio:
+      return "UniversalSizeRatio";
+    case CompactionReason::kUniversalSortedRunNum:
+      return "UniversalSortedRunNum";
+    case CompactionReason::kFIFOMaxSize:
+      return "FIFOMaxSize";
+    case CompactionReason::kFIFOReduceNumFiles:
+      return "FIFOReduceNumFiles";
+    case CompactionReason::kFIFOTtl:
+      return "FIFOTtl";
+    case CompactionReason::kManualCompaction:
+      return "ManualCompaction";
+    case CompactionReason::kFilesMarkedForCompaction:
+      return "FilesMarkedForCompaction";
+    case CompactionReason::kBottommostFiles:
+      return "BottommostFiles";
+    case CompactionReason::kTtl:
+      return "Ttl";
+    case CompactionReason::kFlush:
+      return "Flush";
+    case CompactionReason::kExternalSstIngestion:
+      return "ExternalSstIngestion";
+    case CompactionReason::kPeriodicCompaction:
+      return "PeriodicCompaction";
+    case CompactionReason::kChangeTemperature:
+      return "ChangeTemperature";
+    case CompactionReason::kForcedBlobGC:
+      return "ForcedBlobGC";
+    case CompactionReason::kRoundRobinTtl:
+      return "RoundRobinTtl";
+    case CompactionReason::kRefitLevel:
+      return "RefitLevel";
+    case CompactionReason::kNumOfReasons:
+      // fall through
+    default:
+      assert(false);
+      return "Invalid";
+  }
+}
+
+const char* GetCompactionPenultimateOutputRangeTypeString(
+    Compaction::PenultimateOutputRangeType range_type) {
+  switch (range_type) {
+    case Compaction::PenultimateOutputRangeType::kNotSupported:
+      return "NotSupported";
+    case Compaction::PenultimateOutputRangeType::kFullRange:
+      return "FullRange";
+    case Compaction::PenultimateOutputRangeType::kNonLastRange:
+      return "NonLastRange";
+    case Compaction::PenultimateOutputRangeType::kDisabled:
+      return "Disabled";
+    default:
+      assert(false);
+      return "Invalid";
+  }
+}
+
+CompactionJob::CompactionJob(
+    int job_id, Compaction* compaction, const ImmutableDBOptions& db_options,
+    const MutableDBOptions& mutable_db_options, const FileOptions& file_options,
+    VersionSet* versions, const std::atomic<bool>* shutting_down,
+    LogBuffer* log_buffer, FSDirectory* db_directory,
+    FSDirectory* output_directory, FSDirectory* blob_output_directory,
+    Statistics* stats, InstrumentedMutex* db_mutex,
+    ErrorHandler* db_error_handler,
+    std::vector<SequenceNumber> existing_snapshots,
+    SequenceNumber earliest_write_conflict_snapshot,
+    const SnapshotChecker* snapshot_checker, JobContext* job_context,
+    std::shared_ptr<Cache> table_cache, EventLogger* event_logger,
+    bool paranoid_file_checks, bool measure_io_stats, const std::string& dbname,
+    CompactionJobStats* compaction_job_stats, Env::Priority thread_pri,
+    const std::shared_ptr<IOTracer>& io_tracer,
+    const std::atomic<bool>& manual_compaction_canceled,
+    const std::string& db_id, const std::string& db_session_id,
+    std::string full_history_ts_low, std::string trim_ts,
+    BlobFileCompletionCallback* blob_callback, int* bg_compaction_scheduled,
+    int* bg_bottom_compaction_scheduled)
+    : compact_(new CompactionState(compaction)),
+      compaction_stats_(compaction->compaction_reason(), 1),
+      db_options_(db_options),
+      mutable_db_options_copy_(mutable_db_options),
+      log_buffer_(log_buffer),
+      output_directory_(output_directory),
+      stats_(stats),
+      bottommost_level_(false),
+      write_hint_(Env::WLTH_NOT_SET),
+      compaction_job_stats_(compaction_job_stats),
+      job_id_(job_id),
+      dbname_(dbname),
+      db_id_(db_id),
+      db_session_id_(db_session_id),
+      file_options_(file_options),
+      env_(db_options.env),
+      io_tracer_(io_tracer),
+      fs_(db_options.fs, io_tracer),
+      file_options_for_read_(
+          fs_->OptimizeForCompactionTableRead(file_options, db_options_)),
+      versions_(versions),
+      shutting_down_(shutting_down),
+      manual_compaction_canceled_(manual_compaction_canceled),
+      db_directory_(db_directory),
+      blob_output_directory_(blob_output_directory),
+      db_mutex_(db_mutex),
+      db_error_handler_(db_error_handler),
+      existing_snapshots_(std::move(existing_snapshots)),
+      earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot),
+      snapshot_checker_(snapshot_checker),
+      job_context_(job_context),
+      table_cache_(std::move(table_cache)),
+      event_logger_(event_logger),
+      paranoid_file_checks_(paranoid_file_checks),
+      measure_io_stats_(measure_io_stats),
+      thread_pri_(thread_pri),
+      full_history_ts_low_(std::move(full_history_ts_low)),
+      trim_ts_(std::move(trim_ts)),
+      blob_callback_(blob_callback),
+      extra_num_subcompaction_threads_reserved_(0),
+      bg_compaction_scheduled_(bg_compaction_scheduled),
+      bg_bottom_compaction_scheduled_(bg_bottom_compaction_scheduled) {
+  assert(compaction_job_stats_ != nullptr);
+  assert(log_buffer_ != nullptr);
+
+  const auto* cfd = compact_->compaction->column_family_data();
+  ThreadStatusUtil::SetEnableTracking(db_options_.enable_thread_tracking);
+  ThreadStatusUtil::SetColumnFamily(cfd);
+  ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION);
+  ReportStartedCompaction(compaction);
+}
+
+CompactionJob::~CompactionJob() {
+  assert(compact_ == nullptr);
+  ThreadStatusUtil::ResetThreadStatus();
+}
+
+void CompactionJob::ReportStartedCompaction(Compaction* compaction) {
+  ThreadStatusUtil::SetThreadOperationProperty(ThreadStatus::COMPACTION_JOB_ID,
+                                               job_id_);
+
+  ThreadStatusUtil::SetThreadOperationProperty(
+      ThreadStatus::COMPACTION_INPUT_OUTPUT_LEVEL,
+      (static_cast<uint64_t>(compact_->compaction->start_level()) << 32) +
+          compact_->compaction->output_level());
+
+  // In the current design, a CompactionJob is always created
+  // for non-trivial compaction.
+  assert(compaction->IsTrivialMove() == false ||
+         compaction->is_manual_compaction() == true);
+
+  ThreadStatusUtil::SetThreadOperationProperty(
+      ThreadStatus::COMPACTION_PROP_FLAGS,
+      compaction->is_manual_compaction() +
+          (compaction->deletion_compaction() << 1));
+
+  ThreadStatusUtil::SetThreadOperationProperty(
+      ThreadStatus::COMPACTION_TOTAL_INPUT_BYTES,
+      compaction->CalculateTotalInputSize());
+
+  IOSTATS_RESET(bytes_written);
+  IOSTATS_RESET(bytes_read);
+  ThreadStatusUtil::SetThreadOperationProperty(
+      ThreadStatus::COMPACTION_BYTES_WRITTEN, 0);
+  ThreadStatusUtil::SetThreadOperationProperty(
+      ThreadStatus::COMPACTION_BYTES_READ, 0);
+
+  // Set the thread operation after operation properties
+  // to ensure GetThreadList() can always show them all together.
+  ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION);
+
+  compaction_job_stats_->is_manual_compaction =
+      compaction->is_manual_compaction();
+  compaction_job_stats_->is_full_compaction = compaction->is_full_compaction();
+}
+
+void CompactionJob::Prepare() {
+  AutoThreadOperationStageUpdater stage_updater(
+      ThreadStatus::STAGE_COMPACTION_PREPARE);
+
+  // Generate file_levels_ for compaction before making Iterator
+  auto* c = compact_->compaction;
+  ColumnFamilyData* cfd = c->column_family_data();
+  assert(cfd != nullptr);
+  assert(cfd->current()->storage_info()->NumLevelFiles(
+             compact_->compaction->level()) > 0);
+
+  write_hint_ = cfd->CalculateSSTWriteHint(c->output_level());
+  bottommost_level_ = c->bottommost_level();
+
+  if (c->ShouldFormSubcompactions()) {
+    StopWatch sw(db_options_.clock, stats_, SUBCOMPACTION_SETUP_TIME);
+    GenSubcompactionBoundaries();
+  }
+  if (boundaries_.size() > 1) {
+    for (size_t i = 0; i <= boundaries_.size(); i++) {
+      compact_->sub_compact_states.emplace_back(
+          c, (i != 0) ? std::optional<Slice>(boundaries_[i - 1]) : std::nullopt,
+          (i != boundaries_.size()) ? std::optional<Slice>(boundaries_[i])
+                                    : std::nullopt,
+          static_cast<uint32_t>(i));
+      // assert to validate that boundaries don't have same user keys (without
+      // timestamp part).
+      assert(i == 0 || i == boundaries_.size() ||
+             cfd->user_comparator()->CompareWithoutTimestamp(
+                 boundaries_[i - 1], boundaries_[i]) < 0);
+    }
+    RecordInHistogram(stats_, NUM_SUBCOMPACTIONS_SCHEDULED,
+                      compact_->sub_compact_states.size());
+  } else {
+    compact_->sub_compact_states.emplace_back(c, std::nullopt, std::nullopt,
+                                              /*sub_job_id*/ 0);
+  }
+
+  // collect all seqno->time information from the input files which will be used
+  // to encode seqno->time to the output files.
+  uint64_t preserve_time_duration =
+      std::max(c->immutable_options()->preserve_internal_time_seconds,
+               c->immutable_options()->preclude_last_level_data_seconds);
+
+  if (preserve_time_duration > 0) {
+    const ReadOptions read_options(Env::IOActivity::kCompaction);
+    // setup seqno_time_mapping_
+    seqno_time_mapping_.SetMaxTimeDuration(preserve_time_duration);
+    for (const auto& each_level : *c->inputs()) {
+      for (const auto& fmd : each_level.files) {
+        std::shared_ptr<const TableProperties> tp;
+        Status s =
+            cfd->current()->GetTableProperties(read_options, &tp, fmd, nullptr);
+        if (s.ok()) {
+          seqno_time_mapping_.Add(tp->seqno_to_time_mapping)
+              .PermitUncheckedError();
+          seqno_time_mapping_.Add(fmd->fd.smallest_seqno,
+                                  fmd->oldest_ancester_time);
+        }
+      }
+    }
+
+    auto status = seqno_time_mapping_.Sort();
+    if (!status.ok()) {
+      ROCKS_LOG_WARN(db_options_.info_log,
+                     "Invalid sequence number to time mapping: Status: %s",
+                     status.ToString().c_str());
+    }
+    int64_t _current_time = 0;
+    status = db_options_.clock->GetCurrentTime(&_current_time);
+    if (!status.ok()) {
+      ROCKS_LOG_WARN(db_options_.info_log,
+                     "Failed to get current time in compaction: Status: %s",
+                     status.ToString().c_str());
+      // preserve all time information
+      preserve_time_min_seqno_ = 0;
+      preclude_last_level_min_seqno_ = 0;
+    } else {
+      seqno_time_mapping_.TruncateOldEntries(_current_time);
+      uint64_t preserve_time =
+          static_cast<uint64_t>(_current_time) > preserve_time_duration
+              ? _current_time - preserve_time_duration
+              : 0;
+      preserve_time_min_seqno_ =
+          seqno_time_mapping_.GetOldestSequenceNum(preserve_time);
+      if (c->immutable_options()->preclude_last_level_data_seconds > 0) {
+        uint64_t preclude_last_level_time =
+            static_cast<uint64_t>(_current_time) >
+                    c->immutable_options()->preclude_last_level_data_seconds
+                ? _current_time -
+                      c->immutable_options()->preclude_last_level_data_seconds
+                : 0;
+        preclude_last_level_min_seqno_ =
+            seqno_time_mapping_.GetOldestSequenceNum(preclude_last_level_time);
+      }
+    }
+  }
+}
+
+uint64_t CompactionJob::GetSubcompactionsLimit() {
+  return extra_num_subcompaction_threads_reserved_ +
+         std::max(
+             std::uint64_t(1),
+             static_cast<uint64_t>(compact_->compaction->max_subcompactions()));
+}
+
+void CompactionJob::AcquireSubcompactionResources(
+    int num_extra_required_subcompactions) {
+  TEST_SYNC_POINT("CompactionJob::AcquireSubcompactionResources:0");
+  TEST_SYNC_POINT("CompactionJob::AcquireSubcompactionResources:1");
+  int max_db_compactions =
+      DBImpl::GetBGJobLimits(
+          mutable_db_options_copy_.max_background_flushes,
+          mutable_db_options_copy_.max_background_compactions,
+          mutable_db_options_copy_.max_background_jobs,
+          versions_->GetColumnFamilySet()
+              ->write_controller()
+              ->NeedSpeedupCompaction())
+          .max_compactions;
+  InstrumentedMutexLock l(db_mutex_);
+  // Apply min function first since We need to compute the extra subcompaction
+  // against compaction limits. And then try to reserve threads for extra
+  // subcompactions. The actual number of reserved threads could be less than
+  // the desired number.
+  int available_bg_compactions_against_db_limit =
+      std::max(max_db_compactions - *bg_compaction_scheduled_ -
+                   *bg_bottom_compaction_scheduled_,
+               0);
+  // Reservation only supports backgrdoun threads of which the priority is
+  // between BOTTOM and HIGH. Need to degrade the priority to HIGH if the
+  // origin thread_pri_ is higher than that. Similar to ReleaseThreads().
+  extra_num_subcompaction_threads_reserved_ =
+      env_->ReserveThreads(std::min(num_extra_required_subcompactions,
+                                    available_bg_compactions_against_db_limit),
+                           std::min(thread_pri_, Env::Priority::HIGH));
+
+  // Update bg_compaction_scheduled_ or bg_bottom_compaction_scheduled_
+  // depending on if this compaction has the bottommost priority
+  if (thread_pri_ == Env::Priority::BOTTOM) {
+    *bg_bottom_compaction_scheduled_ +=
+        extra_num_subcompaction_threads_reserved_;
+  } else {
+    *bg_compaction_scheduled_ += extra_num_subcompaction_threads_reserved_;
+  }
+}
+
+void CompactionJob::ShrinkSubcompactionResources(uint64_t num_extra_resources) {
+  // Do nothing when we have zero resources to shrink
+  if (num_extra_resources == 0) return;
+  db_mutex_->Lock();
+  // We cannot release threads more than what we reserved before
+  int extra_num_subcompaction_threads_released = env_->ReleaseThreads(
+      (int)num_extra_resources, std::min(thread_pri_, Env::Priority::HIGH));
+  // Update the number of reserved threads and the number of background
+  // scheduled compactions for this compaction job
+  extra_num_subcompaction_threads_reserved_ -=
+      extra_num_subcompaction_threads_released;
+  // TODO (zichen): design a test case with new subcompaction partitioning
+  // when the number of actual partitions is less than the number of planned
+  // partitions
+  assert(extra_num_subcompaction_threads_released == (int)num_extra_resources);
+  // Update bg_compaction_scheduled_ or bg_bottom_compaction_scheduled_
+  // depending on if this compaction has the bottommost priority
+  if (thread_pri_ == Env::Priority::BOTTOM) {
+    *bg_bottom_compaction_scheduled_ -=
+        extra_num_subcompaction_threads_released;
+  } else {
+    *bg_compaction_scheduled_ -= extra_num_subcompaction_threads_released;
+  }
+  db_mutex_->Unlock();
+  TEST_SYNC_POINT("CompactionJob::ShrinkSubcompactionResources:0");
+}
+
+void CompactionJob::ReleaseSubcompactionResources() {
+  if (extra_num_subcompaction_threads_reserved_ == 0) {
+    return;
+  }
+  {
+    InstrumentedMutexLock l(db_mutex_);
+    // The number of reserved threads becomes larger than 0 only if the
+    // compaction prioity is round robin and there is no sufficient
+    // sub-compactions available
+
+    // The scheduled compaction must be no less than 1 + extra number
+    // subcompactions using acquired resources since this compaction job has not
+    // finished yet
+    assert(*bg_bottom_compaction_scheduled_ >=
+               1 + extra_num_subcompaction_threads_reserved_ ||
+           *bg_compaction_scheduled_ >=
+               1 + extra_num_subcompaction_threads_reserved_);
+  }
+  ShrinkSubcompactionResources(extra_num_subcompaction_threads_reserved_);
+}
+
+struct RangeWithSize {
+  Range range;
+  uint64_t size;
+
+  RangeWithSize(const Slice& a, const Slice& b, uint64_t s = 0)
+      : range(a, b), size(s) {}
+};
+
+void CompactionJob::GenSubcompactionBoundaries() {
+  // The goal is to find some boundary keys so that we can evenly partition
+  // the compaction input data into max_subcompactions ranges.
+  // For every input file, we ask TableReader to estimate 128 anchor points
+  // that evenly partition the input file into 128 ranges and the range
+  // sizes. This can be calculated by scanning index blocks of the file.
+  // Once we have the anchor points for all the input files, we merge them
+  // together and try to find keys dividing ranges evenly.
+  // For example, if we have two input files, and each returns following
+  // ranges:
+  //   File1: (a1, 1000), (b1, 1200), (c1, 1100)
+  //   File2: (a2, 1100), (b2, 1000), (c2, 1000)
+  // We total sort the keys to following:
+  //  (a1, 1000), (a2, 1100), (b1, 1200), (b2, 1000), (c1, 1100), (c2, 1000)
+  // We calculate the total size by adding up all ranges' size, which is 6400.
+  // If we would like to partition into 2 subcompactions, the target of the
+  // range size is 3200. Based on the size, we take "b1" as the partition key
+  // since the first three ranges would hit 3200.
+  //
+  // Note that the ranges are actually overlapping. For example, in the example
+  // above, the range ending with "b1" is overlapping with the range ending with
+  // "b2". So the size 1000+1100+1200 is an underestimation of data size up to
+  // "b1". In extreme cases where we only compact N L0 files, a range can
+  // overlap with N-1 other ranges. Since we requested a relatively large number
+  // (128) of ranges from each input files, even N range overlapping would
+  // cause relatively small inaccuracy.
+  const ReadOptions read_options(Env::IOActivity::kCompaction);
+  auto* c = compact_->compaction;
+  if (c->max_subcompactions() <= 1 &&
+      !(c->immutable_options()->compaction_pri == kRoundRobin &&
+        c->immutable_options()->compaction_style == kCompactionStyleLevel)) {
+    return;
+  }
+  auto* cfd = c->column_family_data();
+  const Comparator* cfd_comparator = cfd->user_comparator();
+  const InternalKeyComparator& icomp = cfd->internal_comparator();
+
+  auto* v = compact_->compaction->input_version();
+  int base_level = v->storage_info()->base_level();
+  InstrumentedMutexUnlock unlock_guard(db_mutex_);
+
+  uint64_t total_size = 0;
+  std::vector<TableReader::Anchor> all_anchors;
+  int start_lvl = c->start_level();
+  int out_lvl = c->output_level();
+
+  for (size_t lvl_idx = 0; lvl_idx < c->num_input_levels(); lvl_idx++) {
+    int lvl = c->level(lvl_idx);
+    if (lvl >= start_lvl && lvl <= out_lvl) {
+      const LevelFilesBrief* flevel = c->input_levels(lvl_idx);
+      size_t num_files = flevel->num_files;
+
+      if (num_files == 0) {
+        continue;
+      }
+
+      for (size_t i = 0; i < num_files; i++) {
+        FileMetaData* f = flevel->files[i].file_metadata;
+        std::vector<TableReader::Anchor> my_anchors;
+        Status s = cfd->table_cache()->ApproximateKeyAnchors(
+            read_options, icomp, *f,
+            c->mutable_cf_options()->block_protection_bytes_per_key,
+            my_anchors);
+        if (!s.ok() || my_anchors.empty()) {
+          my_anchors.emplace_back(f->largest.user_key(), f->fd.GetFileSize());
+        }
+        for (auto& ac : my_anchors) {
+          // Can be optimize to avoid this loop.
+          total_size += ac.range_size;
+        }
+
+        all_anchors.insert(all_anchors.end(), my_anchors.begin(),
+                           my_anchors.end());
+      }
+    }
+  }
+  // Here we total sort all the anchor points across all files and go through
+  // them in the sorted order to find partitioning boundaries.
+  // Not the most efficient implementation. A much more efficient algorithm
+  // probably exists. But they are more complex. If performance turns out to
+  // be a problem, we can optimize.
+  std::sort(
+      all_anchors.begin(), all_anchors.end(),
+      [cfd_comparator](TableReader::Anchor& a, TableReader::Anchor& b) -> bool {
+        return cfd_comparator->CompareWithoutTimestamp(a.user_key, b.user_key) <
+               0;
+      });
+
+  // Remove duplicated entries from boundaries.
+  all_anchors.erase(
+      std::unique(all_anchors.begin(), all_anchors.end(),
+                  [cfd_comparator](TableReader::Anchor& a,
+                                   TableReader::Anchor& b) -> bool {
+                    return cfd_comparator->CompareWithoutTimestamp(
+                               a.user_key, b.user_key) == 0;
+                  }),
+      all_anchors.end());
+
+  // Get the number of planned subcompactions, may update reserve threads
+  // and update extra_num_subcompaction_threads_reserved_ for round-robin
+  uint64_t num_planned_subcompactions;
+  if (c->immutable_options()->compaction_pri == kRoundRobin &&
+      c->immutable_options()->compaction_style == kCompactionStyleLevel) {
+    // For round-robin compaction prioity, we need to employ more
+    // subcompactions (may exceed the max_subcompaction limit). The extra
+    // subcompactions will be executed using reserved threads and taken into
+    // account bg_compaction_scheduled or bg_bottom_compaction_scheduled.
+
+    // Initialized by the number of input files
+    num_planned_subcompactions = static_cast<uint64_t>(c->num_input_files(0));
+    uint64_t max_subcompactions_limit = GetSubcompactionsLimit();
+    if (max_subcompactions_limit < num_planned_subcompactions) {
+      // Assert two pointers are not empty so that we can use extra
+      // subcompactions against db compaction limits
+      assert(bg_bottom_compaction_scheduled_ != nullptr);
+      assert(bg_compaction_scheduled_ != nullptr);
+      // Reserve resources when max_subcompaction is not sufficient
+      AcquireSubcompactionResources(
+          (int)(num_planned_subcompactions - max_subcompactions_limit));
+      // Subcompactions limit changes after acquiring additional resources.
+      // Need to call GetSubcompactionsLimit() again to update the number
+      // of planned subcompactions
+      num_planned_subcompactions =
+          std::min(num_planned_subcompactions, GetSubcompactionsLimit());
+    } else {
+      num_planned_subcompactions = max_subcompactions_limit;
+    }
+  } else {
+    num_planned_subcompactions = GetSubcompactionsLimit();
+  }
+
+  TEST_SYNC_POINT_CALLBACK("CompactionJob::GenSubcompactionBoundaries:0",
+                           &num_planned_subcompactions);
+  if (num_planned_subcompactions == 1) return;
+
+  // Group the ranges into subcompactions
+  uint64_t target_range_size = std::max(
+      total_size / num_planned_subcompactions,
+      MaxFileSizeForLevel(
+          *(c->mutable_cf_options()), out_lvl,
+          c->immutable_options()->compaction_style, base_level,
+          c->immutable_options()->level_compaction_dynamic_level_bytes));
+
+  if (target_range_size >= total_size) {
+    return;
+  }
+
+  uint64_t next_threshold = target_range_size;
+  uint64_t cumulative_size = 0;
+  uint64_t num_actual_subcompactions = 1U;
+  for (TableReader::Anchor& anchor : all_anchors) {
+    cumulative_size += anchor.range_size;
+    if (cumulative_size > next_threshold) {
+      next_threshold += target_range_size;
+      num_actual_subcompactions++;
+      boundaries_.push_back(anchor.user_key);
+    }
+    if (num_actual_subcompactions == num_planned_subcompactions) {
+      break;
+    }
+  }
+  TEST_SYNC_POINT_CALLBACK("CompactionJob::GenSubcompactionBoundaries:1",
+                           &num_actual_subcompactions);
+  // Shrink extra subcompactions resources when extra resrouces are acquired
+  ShrinkSubcompactionResources(
+      std::min((int)(num_planned_subcompactions - num_actual_subcompactions),
+               extra_num_subcompaction_threads_reserved_));
+}
+
+Status CompactionJob::Run() {
+  AutoThreadOperationStageUpdater stage_updater(
+      ThreadStatus::STAGE_COMPACTION_RUN);
+  TEST_SYNC_POINT("CompactionJob::Run():Start");
+  log_buffer_->FlushBufferToLog();
+  LogCompaction();
+
+  const size_t num_threads = compact_->sub_compact_states.size();
+  assert(num_threads > 0);
+  const uint64_t start_micros = db_options_.clock->NowMicros();
+
+  // Launch a thread for each of subcompactions 1...num_threads-1
+  std::vector<port::Thread> thread_pool;
+  thread_pool.reserve(num_threads - 1);
+  for (size_t i = 1; i < compact_->sub_compact_states.size(); i++) {
+    thread_pool.emplace_back(&CompactionJob::ProcessKeyValueCompaction, this,
+                             &compact_->sub_compact_states[i]);
+  }
+
+  // Always schedule the first subcompaction (whether or not there are also
+  // others) in the current thread to be efficient with resources
+  ProcessKeyValueCompaction(&compact_->sub_compact_states[0]);
+
+  // Wait for all other threads (if there are any) to finish execution
+  for (auto& thread : thread_pool) {
+    thread.join();
+  }
+
+  compaction_stats_.SetMicros(db_options_.clock->NowMicros() - start_micros);
+
+  for (auto& state : compact_->sub_compact_states) {
+    compaction_stats_.AddCpuMicros(state.compaction_job_stats.cpu_micros);
+    state.RemoveLastEmptyOutput();
+  }
+
+  RecordTimeToHistogram(stats_, COMPACTION_TIME,
+                        compaction_stats_.stats.micros);
+  RecordTimeToHistogram(stats_, COMPACTION_CPU_TIME,
+                        compaction_stats_.stats.cpu_micros);
+
+  TEST_SYNC_POINT("CompactionJob::Run:BeforeVerify");
+
+  // Check if any thread encountered an error during execution
+  Status status;
+  IOStatus io_s;
+  bool wrote_new_blob_files = false;
+
+  for (const auto& state : compact_->sub_compact_states) {
+    if (!state.status.ok()) {
+      status = state.status;
+      io_s = state.io_status;
+      break;
+    }
+
+    if (state.Current().HasBlobFileAdditions()) {
+      wrote_new_blob_files = true;
+    }
+  }
+
+  if (io_status_.ok()) {
+    io_status_ = io_s;
+  }
+  if (status.ok()) {
+    constexpr IODebugContext* dbg = nullptr;
+
+    if (output_directory_) {
+      io_s = output_directory_->FsyncWithDirOptions(
+          IOOptions(), dbg,
+          DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
+    }
+
+    if (io_s.ok() && wrote_new_blob_files && blob_output_directory_ &&
+        blob_output_directory_ != output_directory_) {
+      io_s = blob_output_directory_->FsyncWithDirOptions(
+          IOOptions(), dbg,
+          DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
+    }
+  }
+  if (io_status_.ok()) {
+    io_status_ = io_s;
+  }
+  if (status.ok()) {
+    status = io_s;
+  }
+  if (status.ok()) {
+    thread_pool.clear();
+    std::vector<const CompactionOutputs::Output*> files_output;
+    for (const auto& state : compact_->sub_compact_states) {
+      for (const auto& output : state.GetOutputs()) {
+        files_output.emplace_back(&output);
+      }
+    }
+    ColumnFamilyData* cfd = compact_->compaction->column_family_data();
+    auto& prefix_extractor =
+        compact_->compaction->mutable_cf_options()->prefix_extractor;
+    std::atomic<size_t> next_file_idx(0);
+    auto verify_table = [&](Status& output_status) {
+      while (true) {
+        size_t file_idx = next_file_idx.fetch_add(1);
+        if (file_idx >= files_output.size()) {
+          break;
+        }
+        // Verify that the table is usable
+        // We set for_compaction to false and don't
+        // OptimizeForCompactionTableRead here because this is a special case
+        // after we finish the table building No matter whether
+        // use_direct_io_for_flush_and_compaction is true, we will regard this
+        // verification as user reads since the goal is to cache it here for
+        // further user reads
+        const ReadOptions verify_table_read_options(
+            Env::IOActivity::kCompaction);
+        InternalIterator* iter = cfd->table_cache()->NewIterator(
+            verify_table_read_options, file_options_,
+            cfd->internal_comparator(), files_output[file_idx]->meta,
+            /*range_del_agg=*/nullptr, prefix_extractor,
+            /*table_reader_ptr=*/nullptr,
+            cfd->internal_stats()->GetFileReadHist(
+                compact_->compaction->output_level()),
+            TableReaderCaller::kCompactionRefill, /*arena=*/nullptr,
+            /*skip_filters=*/false, compact_->compaction->output_level(),
+            MaxFileSizeForL0MetaPin(
+                *compact_->compaction->mutable_cf_options()),
+            /*smallest_compaction_key=*/nullptr,
+            /*largest_compaction_key=*/nullptr,
+            /*allow_unprepared_value=*/false,
+            compact_->compaction->mutable_cf_options()
+                ->block_protection_bytes_per_key);
+        auto s = iter->status();
+
+        if (s.ok() && paranoid_file_checks_) {
+          OutputValidator validator(cfd->internal_comparator(),
+                                    /*_enable_order_check=*/true,
+                                    /*_enable_hash=*/true);
+          for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+            s = validator.Add(iter->key(), iter->value());
+            if (!s.ok()) {
+              break;
+            }
+          }
+          if (s.ok()) {
+            s = iter->status();
+          }
+          if (s.ok() &&
+              !validator.CompareValidator(files_output[file_idx]->validator)) {
+            s = Status::Corruption("Paranoid checksums do not match");
+          }
+        }
+
+        delete iter;
+
+        if (!s.ok()) {
+          output_status = s;
+          break;
+        }
+      }
+    };
+    for (size_t i = 1; i < compact_->sub_compact_states.size(); i++) {
+      thread_pool.emplace_back(
+          verify_table, std::ref(compact_->sub_compact_states[i].status));
+    }
+    verify_table(compact_->sub_compact_states[0].status);
+    for (auto& thread : thread_pool) {
+      thread.join();
+    }
+
+    for (const auto& state : compact_->sub_compact_states) {
+      if (!state.status.ok()) {
+        status = state.status;
+        break;
+      }
+    }
+  }
+
+  ReleaseSubcompactionResources();
+  TEST_SYNC_POINT("CompactionJob::ReleaseSubcompactionResources:0");
+  TEST_SYNC_POINT("CompactionJob::ReleaseSubcompactionResources:1");
+
+  TablePropertiesCollection tp;
+  for (const auto& state : compact_->sub_compact_states) {
+    for (const auto& output : state.GetOutputs()) {
+      auto fn =
+          TableFileName(state.compaction->immutable_options()->cf_paths,
+                        output.meta.fd.GetNumber(), output.meta.fd.GetPathId());
+      tp[fn] = output.table_properties;
+    }
+  }
+  compact_->compaction->SetOutputTableProperties(std::move(tp));
+
+  // Finish up all book-keeping to unify the subcompaction results
+  compact_->AggregateCompactionStats(compaction_stats_, *compaction_job_stats_);
+  UpdateCompactionStats();
+
+  RecordCompactionIOStats();
+  LogFlush(db_options_.info_log);
+  TEST_SYNC_POINT("CompactionJob::Run():End");
+
+  compact_->status = status;
+  return status;
+}
+
+Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
+  assert(compact_);
+
+  AutoThreadOperationStageUpdater stage_updater(
+      ThreadStatus::STAGE_COMPACTION_INSTALL);
+  db_mutex_->AssertHeld();
+  Status status = compact_->status;
+
+  ColumnFamilyData* cfd = compact_->compaction->column_family_data();
+  assert(cfd);
+
+  int output_level = compact_->compaction->output_level();
+  cfd->internal_stats()->AddCompactionStats(output_level, thread_pri_,
+                                            compaction_stats_);
+
+  if (status.ok()) {
+    status = InstallCompactionResults(mutable_cf_options);
+  }
+  if (!versions_->io_status().ok()) {
+    io_status_ = versions_->io_status();
+  }
+
+  VersionStorageInfo::LevelSummaryStorage tmp;
+  auto vstorage = cfd->current()->storage_info();
+  const auto& stats = compaction_stats_.stats;
+
+  double read_write_amp = 0.0;
+  double write_amp = 0.0;
+  double bytes_read_per_sec = 0;
+  double bytes_written_per_sec = 0;
+
+  const uint64_t bytes_read_non_output_and_blob =
+      stats.bytes_read_non_output_levels + stats.bytes_read_blob;
+  const uint64_t bytes_read_all =
+      stats.bytes_read_output_level + bytes_read_non_output_and_blob;
+  const uint64_t bytes_written_all =
+      stats.bytes_written + stats.bytes_written_blob;
+
+  if (bytes_read_non_output_and_blob > 0) {
+    read_write_amp = (bytes_written_all + bytes_read_all) /
+                     static_cast<double>(bytes_read_non_output_and_blob);
+    write_amp =
+        bytes_written_all / static_cast<double>(bytes_read_non_output_and_blob);
+  }
+  if (stats.micros > 0) {
+    bytes_read_per_sec = bytes_read_all / static_cast<double>(stats.micros);
+    bytes_written_per_sec =
+        bytes_written_all / static_cast<double>(stats.micros);
+  }
+
+  const std::string& column_family_name = cfd->GetName();
+
+  constexpr double kMB = 1048576.0;
+
+  ROCKS_LOG_BUFFER(
+      log_buffer_,
+      "[%s] compacted to: %s, MB/sec: %.1f rd, %.1f wr, level %d, "
+      "files in(%d, %d) out(%d +%d blob) "
+      "MB in(%.1f, %.1f +%.1f blob) out(%.1f +%.1f blob), "
+      "read-write-amplify(%.1f) write-amplify(%.1f) %s, records in: %" PRIu64
+      ", records dropped: %" PRIu64 " output_compression: %s\n",
+      column_family_name.c_str(), vstorage->LevelSummary(&tmp),
+      bytes_read_per_sec, bytes_written_per_sec,
+      compact_->compaction->output_level(),
+      stats.num_input_files_in_non_output_levels,
+      stats.num_input_files_in_output_level, stats.num_output_files,
+      stats.num_output_files_blob, stats.bytes_read_non_output_levels / kMB,
+      stats.bytes_read_output_level / kMB, stats.bytes_read_blob / kMB,
+      stats.bytes_written / kMB, stats.bytes_written_blob / kMB, read_write_amp,
+      write_amp, status.ToString().c_str(), stats.num_input_records,
+      stats.num_dropped_records,
+      CompressionTypeToString(compact_->compaction->output_compression())
+          .c_str());
+
+  const auto& blob_files = vstorage->GetBlobFiles();
+  if (!blob_files.empty()) {
+    assert(blob_files.front());
+    assert(blob_files.back());
+
+    ROCKS_LOG_BUFFER(
+        log_buffer_,
+        "[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64 "\n",
+        column_family_name.c_str(), blob_files.front()->GetBlobFileNumber(),
+        blob_files.back()->GetBlobFileNumber());
+  }
+
+  if (compaction_stats_.has_penultimate_level_output) {
+    ROCKS_LOG_BUFFER(
+        log_buffer_,
+        "[%s] has Penultimate Level output: %" PRIu64
+        ", level %d, number of files: %" PRIu64 ", number of records: %" PRIu64,
+        column_family_name.c_str(),
+        compaction_stats_.penultimate_level_stats.bytes_written,
+        compact_->compaction->GetPenultimateLevel(),
+        compaction_stats_.penultimate_level_stats.num_output_files,
+        compaction_stats_.penultimate_level_stats.num_output_records);
+  }
+
+  UpdateCompactionJobStats(stats);
+
+  auto stream = event_logger_->LogToBuffer(log_buffer_, 8192);
+  stream << "job" << job_id_ << "event"
+         << "compaction_finished"
+         << "compaction_time_micros" << stats.micros
+         << "compaction_time_cpu_micros" << stats.cpu_micros << "output_level"
+         << compact_->compaction->output_level() << "num_output_files"
+         << stats.num_output_files << "total_output_size"
+         << stats.bytes_written;
+
+  if (stats.num_output_files_blob > 0) {
+    stream << "num_blob_output_files" << stats.num_output_files_blob
+           << "total_blob_output_size" << stats.bytes_written_blob;
+  }
+
+  stream << "num_input_records" << stats.num_input_records
+         << "num_output_records" << stats.num_output_records
+         << "num_subcompactions" << compact_->sub_compact_states.size()
+         << "output_compression"
+         << CompressionTypeToString(compact_->compaction->output_compression());
+
+  stream << "num_single_delete_mismatches"
+         << compaction_job_stats_->num_single_del_mismatch;
+  stream << "num_single_delete_fallthrough"
+         << compaction_job_stats_->num_single_del_fallthru;
+
+  if (measure_io_stats_) {
+    stream << "file_write_nanos" << compaction_job_stats_->file_write_nanos;
+    stream << "file_range_sync_nanos"
+           << compaction_job_stats_->file_range_sync_nanos;
+    stream << "file_fsync_nanos" << compaction_job_stats_->file_fsync_nanos;
+    stream << "file_prepare_write_nanos"
+           << compaction_job_stats_->file_prepare_write_nanos;
+  }
+
+  stream << "lsm_state";
+  stream.StartArray();
+  for (int level = 0; level < vstorage->num_levels(); ++level) {
+    stream << vstorage->NumLevelFiles(level);
+  }
+  stream.EndArray();
+
+  if (!blob_files.empty()) {
+    assert(blob_files.front());
+    stream << "blob_file_head" << blob_files.front()->GetBlobFileNumber();
+
+    assert(blob_files.back());
+    stream << "blob_file_tail" << blob_files.back()->GetBlobFileNumber();
+  }
+
+  if (compaction_stats_.has_penultimate_level_output) {
+    InternalStats::CompactionStats& pl_stats =
+        compaction_stats_.penultimate_level_stats;
+    stream << "penultimate_level_num_output_files" << pl_stats.num_output_files;
+    stream << "penultimate_level_bytes_written" << pl_stats.bytes_written;
+    stream << "penultimate_level_num_output_records"
+           << pl_stats.num_output_records;
+    stream << "penultimate_level_num_output_files_blob"
+           << pl_stats.num_output_files_blob;
+    stream << "penultimate_level_bytes_written_blob"
+           << pl_stats.bytes_written_blob;
+  }
+
+  CleanupCompaction();
+  return status;
+}
+
+void CompactionJob::NotifyOnSubcompactionBegin(
+    SubcompactionState* sub_compact) {
+  Compaction* c = compact_->compaction;
+
+  if (db_options_.listeners.empty()) {
+    return;
+  }
+  if (shutting_down_->load(std::memory_order_acquire)) {
+    return;
+  }
+  if (c->is_manual_compaction() &&
+      manual_compaction_canceled_.load(std::memory_order_acquire)) {
+    return;
+  }
+
+  sub_compact->notify_on_subcompaction_completion = true;
+
+  SubcompactionJobInfo info{};
+  sub_compact->BuildSubcompactionJobInfo(info);
+  info.job_id = static_cast<int>(job_id_);
+  info.thread_id = env_->GetThreadID();
+
+  for (const auto& listener : db_options_.listeners) {
+    listener->OnSubcompactionBegin(info);
+  }
+  info.status.PermitUncheckedError();
+
+}
+
+void CompactionJob::NotifyOnSubcompactionCompleted(
+    SubcompactionState* sub_compact) {
+
+  if (db_options_.listeners.empty()) {
+    return;
+  }
+  if (shutting_down_->load(std::memory_order_acquire)) {
+    return;
+  }
+
+  if (sub_compact->notify_on_subcompaction_completion == false) {
+    return;
+  }
+
+  SubcompactionJobInfo info{};
+  sub_compact->BuildSubcompactionJobInfo(info);
+  info.job_id = static_cast<int>(job_id_);
+  info.thread_id = env_->GetThreadID();
+
+  for (const auto& listener : db_options_.listeners) {
+    listener->OnSubcompactionCompleted(info);
+  }
+}
+
+void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
+  assert(sub_compact);
+  assert(sub_compact->compaction);
+  if (db_options_.compaction_service) {
+    CompactionServiceJobStatus comp_status =
+        ProcessKeyValueCompactionWithCompactionService(sub_compact);
+    if (comp_status == CompactionServiceJobStatus::kSuccess ||
+        comp_status == CompactionServiceJobStatus::kFailure) {
+      return;
+    }
+    // fallback to local compaction
+    assert(comp_status == CompactionServiceJobStatus::kUseLocal);
+  }
+
+  uint64_t prev_cpu_micros = db_options_.clock->CPUMicros();
+
+  ColumnFamilyData* cfd = sub_compact->compaction->column_family_data();
+
+  // Create compaction filter and fail the compaction if
+  // IgnoreSnapshots() = false because it is not supported anymore
+  const CompactionFilter* compaction_filter =
+      cfd->ioptions()->compaction_filter;
+  std::unique_ptr<CompactionFilter> compaction_filter_from_factory = nullptr;
+  if (compaction_filter == nullptr) {
+    compaction_filter_from_factory =
+        sub_compact->compaction->CreateCompactionFilter();
+    compaction_filter = compaction_filter_from_factory.get();
+  }
+  if (compaction_filter != nullptr && !compaction_filter->IgnoreSnapshots()) {
+    sub_compact->status = Status::NotSupported(
+        "CompactionFilter::IgnoreSnapshots() = false is not supported "
+        "anymore.");
+    return;
+  }
+
+  NotifyOnSubcompactionBegin(sub_compact);
+
+  auto range_del_agg = std::make_unique<CompactionRangeDelAggregator>(
+      &cfd->internal_comparator(), existing_snapshots_, &full_history_ts_low_,
+      &trim_ts_);
+
+  // TODO: since we already use C++17, should use
+  // std::optional<const Slice> instead.
+  const std::optional<Slice> start = sub_compact->start;
+  const std::optional<Slice> end = sub_compact->end;
+
+  std::optional<Slice> start_without_ts;
+  std::optional<Slice> end_without_ts;
+
+  ReadOptions read_options;
+  read_options.verify_checksums = true;
+  read_options.fill_cache = false;
+  read_options.rate_limiter_priority = GetRateLimiterPriority();
+  read_options.io_activity = Env::IOActivity::kCompaction;
+  // Compaction iterators shouldn't be confined to a single prefix.
+  // Compactions use Seek() for
+  // (a) concurrent compactions,
+  // (b) CompactionFilter::Decision::kRemoveAndSkipUntil.
+  read_options.total_order_seek = true;
+
+  // Remove the timestamps from boundaries because boundaries created in
+  // GenSubcompactionBoundaries doesn't strip away the timestamp.
+  size_t ts_sz = cfd->user_comparator()->timestamp_size();
+  if (start.has_value()) {
+    read_options.iterate_lower_bound = &start.value();
+    if (ts_sz > 0) {
+      start_without_ts = StripTimestampFromUserKey(start.value(), ts_sz);
+      read_options.iterate_lower_bound = &start_without_ts.value();
+    }
+  }
+  if (end.has_value()) {
+    read_options.iterate_upper_bound = &end.value();
+    if (ts_sz > 0) {
+      end_without_ts = StripTimestampFromUserKey(end.value(), ts_sz);
+      read_options.iterate_upper_bound = &end_without_ts.value();
+    }
+  }
+
+  // Although the v2 aggregator is what the level iterator(s) know about,
+  // the AddTombstones calls will be propagated down to the v1 aggregator.
+  std::unique_ptr<InternalIterator> raw_input(versions_->MakeInputIterator(
+      read_options, sub_compact->compaction, range_del_agg.get(),
+      file_options_for_read_, start, end));
+  InternalIterator* input = raw_input.get();
+
+  IterKey start_ikey;
+  IterKey end_ikey;
+  Slice start_slice;
+  Slice end_slice;
+  Slice start_user_key{};
+  Slice end_user_key{};
+
+  static constexpr char kMaxTs[] =
+      "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff";
+  Slice ts_slice;
+  std::string max_ts;
+  if (ts_sz > 0) {
+    if (ts_sz <= strlen(kMaxTs)) {
+      ts_slice = Slice(kMaxTs, ts_sz);
+    } else {
+      max_ts = std::string(ts_sz, '\xff');
+      ts_slice = Slice(max_ts);
+    }
+  }
+
+  if (start.has_value()) {
+    start_ikey.SetInternalKey(start.value(), kMaxSequenceNumber,
+                              kValueTypeForSeek);
+    if (ts_sz > 0) {
+      start_ikey.UpdateInternalKey(kMaxSequenceNumber, kValueTypeForSeek,
+                                   &ts_slice);
+    }
+    start_slice = start_ikey.GetInternalKey();
+    start_user_key = start_ikey.GetUserKey();
+  }
+  if (end.has_value()) {
+    end_ikey.SetInternalKey(end.value(), kMaxSequenceNumber, kValueTypeForSeek);
+    if (ts_sz > 0) {
+      end_ikey.UpdateInternalKey(kMaxSequenceNumber, kValueTypeForSeek,
+                                 &ts_slice);
+    }
+    end_slice = end_ikey.GetInternalKey();
+    end_user_key = end_ikey.GetUserKey();
+  }
+
+  std::unique_ptr<InternalIterator> clip;
+  if (start.has_value() || end.has_value()) {
+    clip = std::make_unique<ClippingIterator>(
+        raw_input.get(), start.has_value() ? &start_slice : nullptr,
+        end.has_value() ? &end_slice : nullptr, &cfd->internal_comparator());
+    input = clip.get();
+  }
+
+  std::unique_ptr<InternalIterator> blob_counter;
+
+  if (sub_compact->compaction->DoesInputReferenceBlobFiles()) {
+    BlobGarbageMeter* meter = sub_compact->Current().CreateBlobGarbageMeter();
+    blob_counter = std::make_unique<BlobCountingIterator>(input, meter);
+    input = blob_counter.get();
+  }
+
+  std::unique_ptr<InternalIterator> trim_history_iter;
+  if (ts_sz > 0 && !trim_ts_.empty()) {
+    trim_history_iter = std::make_unique<HistoryTrimmingIterator>(
+        input, cfd->user_comparator(), trim_ts_);
+    input = trim_history_iter.get();
+  }
+
+  input->SeekToFirst();
+
+  AutoThreadOperationStageUpdater stage_updater(
+      ThreadStatus::STAGE_COMPACTION_PROCESS_KV);
+
+  // I/O measurement variables
+  PerfLevel prev_perf_level = PerfLevel::kEnableTime;
+  const uint64_t kRecordStatsEvery = 1000;
+  uint64_t prev_write_nanos = 0;
+  uint64_t prev_fsync_nanos = 0;
+  uint64_t prev_range_sync_nanos = 0;
+  uint64_t prev_prepare_write_nanos = 0;
+  uint64_t prev_cpu_write_nanos = 0;
+  uint64_t prev_cpu_read_nanos = 0;
+  if (measure_io_stats_) {
+    prev_perf_level = GetPerfLevel();
+    SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex);
+    prev_write_nanos = IOSTATS(write_nanos);
+    prev_fsync_nanos = IOSTATS(fsync_nanos);
+    prev_range_sync_nanos = IOSTATS(range_sync_nanos);
+    prev_prepare_write_nanos = IOSTATS(prepare_write_nanos);
+    prev_cpu_write_nanos = IOSTATS(cpu_write_nanos);
+    prev_cpu_read_nanos = IOSTATS(cpu_read_nanos);
+  }
+
+  MergeHelper merge(
+      env_, cfd->user_comparator(), cfd->ioptions()->merge_operator.get(),
+      compaction_filter, db_options_.info_log.get(),
+      false /* internal key corruption is expected */,
+      existing_snapshots_.empty() ? 0 : existing_snapshots_.back(),
+      snapshot_checker_, compact_->compaction->level(), db_options_.stats);
+
+  const MutableCFOptions* mutable_cf_options =
+      sub_compact->compaction->mutable_cf_options();
+  assert(mutable_cf_options);
+
+  std::vector<std::string> blob_file_paths;
+
+  // TODO: BlobDB to support output_to_penultimate_level compaction, which needs
+  //  2 builders, so may need to move to `CompactionOutputs`
+  std::unique_ptr<BlobFileBuilder> blob_file_builder(
+      (mutable_cf_options->enable_blob_files &&
+       sub_compact->compaction->output_level() >=
+           mutable_cf_options->blob_file_starting_level)
+          ? new BlobFileBuilder(
+                versions_, fs_.get(),
+                sub_compact->compaction->immutable_options(),
+                mutable_cf_options, &file_options_, db_id_, db_session_id_,
+                job_id_, cfd->GetID(), cfd->GetName(), Env::IOPriority::IO_LOW,
+                write_hint_, io_tracer_, blob_callback_,
+                BlobFileCreationReason::kCompaction, &blob_file_paths,
+                sub_compact->Current().GetBlobFileAdditionsPtr())
+          : nullptr);
+
+  TEST_SYNC_POINT("CompactionJob::Run():Inprogress");
+  TEST_SYNC_POINT_CALLBACK(
+      "CompactionJob::Run():PausingManualCompaction:1",
+      reinterpret_cast<void*>(
+          const_cast<std::atomic<bool>*>(&manual_compaction_canceled_)));
+
+  const std::string* const full_history_ts_low =
+      full_history_ts_low_.empty() ? nullptr : &full_history_ts_low_;
+  const SequenceNumber job_snapshot_seq =
+      job_context_ ? job_context_->GetJobSnapshotSequence()
+                   : kMaxSequenceNumber;
+
+  auto c_iter = std::make_unique<CompactionIterator>(
+      input, cfd->user_comparator(), &merge, versions_->LastSequence(),
+      &existing_snapshots_, earliest_write_conflict_snapshot_, job_snapshot_seq,
+      snapshot_checker_, env_, ShouldReportDetailedTime(env_, stats_),
+      /*expect_valid_internal_key=*/true, range_del_agg.get(),
+      blob_file_builder.get(), db_options_.allow_data_in_errors,
+      db_options_.enforce_single_del_contracts, manual_compaction_canceled_,
+      sub_compact->compaction, compaction_filter, shutting_down_,
+      db_options_.info_log, full_history_ts_low, preserve_time_min_seqno_,
+      preclude_last_level_min_seqno_);
+  c_iter->SeekToFirst();
+
+  // Assign range delete aggregator to the target output level, which makes sure
+  // it only output to single level
+  sub_compact->AssignRangeDelAggregator(std::move(range_del_agg));
+
+  const auto& c_iter_stats = c_iter->iter_stats();
+
+  // define the open and close functions for the compaction files, which will be
+  // used open/close output files when needed.
+  const CompactionFileOpenFunc open_file_func =
+      [this, sub_compact](CompactionOutputs& outputs) {
+        return this->OpenCompactionOutputFile(sub_compact, outputs);
+      };
+
+  const CompactionFileCloseFunc close_file_func =
+      [this, sub_compact, start_user_key, end_user_key](
+          CompactionOutputs& outputs, const Status& status,
+          const Slice& next_table_min_key) {
+        return this->FinishCompactionOutputFile(
+            status, sub_compact, outputs, next_table_min_key,
+            sub_compact->start.has_value() ? &start_user_key : nullptr,
+            sub_compact->end.has_value() ? &end_user_key : nullptr);
+      };
+
+  Status status;
+  TEST_SYNC_POINT_CALLBACK(
+      "CompactionJob::ProcessKeyValueCompaction()::Processing",
+      reinterpret_cast<void*>(
+          const_cast<Compaction*>(sub_compact->compaction)));
+  while (status.ok() && !cfd->IsDropped() && c_iter->Valid()) {
+    // Invariant: c_iter.status() is guaranteed to be OK if c_iter->Valid()
+    // returns true.
+    assert(!end.has_value() || cfd->user_comparator()->Compare(
+                                   c_iter->user_key(), end.value()) < 0);
+
+    if (c_iter_stats.num_input_records % kRecordStatsEvery ==
+        kRecordStatsEvery - 1) {
+      RecordDroppedKeys(c_iter_stats, &sub_compact->compaction_job_stats);
+      c_iter->ResetRecordCounts();
+      RecordCompactionIOStats();
+    }
+
+    // Add current compaction_iterator key to target compaction output, if the
+    // output file needs to be close or open, it will call the `open_file_func`
+    // and `close_file_func`.
+    // TODO: it would be better to have the compaction file open/close moved
+    // into `CompactionOutputs` which has the output file information.
+    status = sub_compact->AddToOutput(*c_iter, open_file_func, close_file_func);
+    if (!status.ok()) {
+      break;
+    }
+
+    TEST_SYNC_POINT_CALLBACK(
+        "CompactionJob::Run():PausingManualCompaction:2",
+        reinterpret_cast<void*>(
+            const_cast<std::atomic<bool>*>(&manual_compaction_canceled_)));
+    c_iter->Next();
+    if (c_iter->status().IsManualCompactionPaused()) {
+      break;
+    }
+  }
+
+  sub_compact->compaction_job_stats.num_blobs_read =
+      c_iter_stats.num_blobs_read;
+  sub_compact->compaction_job_stats.total_blob_bytes_read =
+      c_iter_stats.total_blob_bytes_read;
+  sub_compact->compaction_job_stats.num_input_deletion_records =
+      c_iter_stats.num_input_deletion_records;
+  sub_compact->compaction_job_stats.num_corrupt_keys =
+      c_iter_stats.num_input_corrupt_records;
+  sub_compact->compaction_job_stats.num_single_del_fallthru =
+      c_iter_stats.num_single_del_fallthru;
+  sub_compact->compaction_job_stats.num_single_del_mismatch =
+      c_iter_stats.num_single_del_mismatch;
+  sub_compact->compaction_job_stats.total_input_raw_key_bytes +=
+      c_iter_stats.total_input_raw_key_bytes;
+  sub_compact->compaction_job_stats.total_input_raw_value_bytes +=
+      c_iter_stats.total_input_raw_value_bytes;
+
+  RecordTick(stats_, FILTER_OPERATION_TOTAL_TIME,
+             c_iter_stats.total_filter_time);
+
+  if (c_iter_stats.num_blobs_relocated > 0) {
+    RecordTick(stats_, BLOB_DB_GC_NUM_KEYS_RELOCATED,
+               c_iter_stats.num_blobs_relocated);
+  }
+  if (c_iter_stats.total_blob_bytes_relocated > 0) {
+    RecordTick(stats_, BLOB_DB_GC_BYTES_RELOCATED,
+               c_iter_stats.total_blob_bytes_relocated);
+  }
+
+  RecordDroppedKeys(c_iter_stats, &sub_compact->compaction_job_stats);
+  RecordCompactionIOStats();
+
+  if (status.ok() && cfd->IsDropped()) {
+    status =
+        Status::ColumnFamilyDropped("Column family dropped during compaction");
+  }
+  if ((status.ok() || status.IsColumnFamilyDropped()) &&
+      shutting_down_->load(std::memory_order_relaxed)) {
+    status = Status::ShutdownInProgress("Database shutdown");
+  }
+  if ((status.ok() || status.IsColumnFamilyDropped()) &&
+      (manual_compaction_canceled_.load(std::memory_order_relaxed))) {
+    status = Status::Incomplete(Status::SubCode::kManualCompactionPaused);
+  }
+  if (status.ok()) {
+    status = input->status();
+  }
+  if (status.ok()) {
+    status = c_iter->status();
+  }
+
+  // Call FinishCompactionOutputFile() even if status is not ok: it needs to
+  // close the output files. Open file function is also passed, in case there's
+  // only range-dels, no file was opened, to save the range-dels, it need to
+  // create a new output file.
+  status = sub_compact->CloseCompactionFiles(status, open_file_func,
+                                             close_file_func);
+
+  if (blob_file_builder) {
+    if (status.ok()) {
+      status = blob_file_builder->Finish();
+    } else {
+      blob_file_builder->Abandon(status);
+    }
+    blob_file_builder.reset();
+    sub_compact->Current().UpdateBlobStats();
+  }
+
+  sub_compact->compaction_job_stats.cpu_micros =
+      db_options_.clock->CPUMicros() - prev_cpu_micros;
+
+  if (measure_io_stats_) {
+    sub_compact->compaction_job_stats.file_write_nanos +=
+        IOSTATS(write_nanos) - prev_write_nanos;
+    sub_compact->compaction_job_stats.file_fsync_nanos +=
+        IOSTATS(fsync_nanos) - prev_fsync_nanos;
+    sub_compact->compaction_job_stats.file_range_sync_nanos +=
+        IOSTATS(range_sync_nanos) - prev_range_sync_nanos;
+    sub_compact->compaction_job_stats.file_prepare_write_nanos +=
+        IOSTATS(prepare_write_nanos) - prev_prepare_write_nanos;
+    sub_compact->compaction_job_stats.cpu_micros -=
+        (IOSTATS(cpu_write_nanos) - prev_cpu_write_nanos +
+         IOSTATS(cpu_read_nanos) - prev_cpu_read_nanos) /
+        1000;
+    if (prev_perf_level != PerfLevel::kEnableTimeAndCPUTimeExceptForMutex) {
+      SetPerfLevel(prev_perf_level);
+    }
+  }
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+  if (!status.ok()) {
+    if (c_iter) {
+      c_iter->status().PermitUncheckedError();
+    }
+    if (input) {
+      input->status().PermitUncheckedError();
+    }
+  }
+#endif  // ROCKSDB_ASSERT_STATUS_CHECKED
+
+  blob_counter.reset();
+  clip.reset();
+  raw_input.reset();
+  sub_compact->status = status;
+  NotifyOnSubcompactionCompleted(sub_compact);
+}
+
+uint64_t CompactionJob::GetCompactionId(SubcompactionState* sub_compact) const {
+  return (uint64_t)job_id_ << 32 | sub_compact->sub_job_id;
+}
+
+void CompactionJob::RecordDroppedKeys(
+    const CompactionIterationStats& c_iter_stats,
+    CompactionJobStats* compaction_job_stats) {
+  if (c_iter_stats.num_record_drop_user > 0) {
+    RecordTick(stats_, COMPACTION_KEY_DROP_USER,
+               c_iter_stats.num_record_drop_user);
+  }
+  if (c_iter_stats.num_record_drop_hidden > 0) {
+    RecordTick(stats_, COMPACTION_KEY_DROP_NEWER_ENTRY,
+               c_iter_stats.num_record_drop_hidden);
+    if (compaction_job_stats) {
+      compaction_job_stats->num_records_replaced +=
+          c_iter_stats.num_record_drop_hidden;
+    }
+  }
+  if (c_iter_stats.num_record_drop_obsolete > 0) {
+    RecordTick(stats_, COMPACTION_KEY_DROP_OBSOLETE,
+               c_iter_stats.num_record_drop_obsolete);
+    if (compaction_job_stats) {
+      compaction_job_stats->num_expired_deletion_records +=
+          c_iter_stats.num_record_drop_obsolete;
+    }
+  }
+  if (c_iter_stats.num_record_drop_range_del > 0) {
+    RecordTick(stats_, COMPACTION_KEY_DROP_RANGE_DEL,
+               c_iter_stats.num_record_drop_range_del);
+  }
+  if (c_iter_stats.num_range_del_drop_obsolete > 0) {
+    RecordTick(stats_, COMPACTION_RANGE_DEL_DROP_OBSOLETE,
+               c_iter_stats.num_range_del_drop_obsolete);
+  }
+  if (c_iter_stats.num_optimized_del_drop_obsolete > 0) {
+    RecordTick(stats_, COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE,
+               c_iter_stats.num_optimized_del_drop_obsolete);
+  }
+}
+
+Status CompactionJob::FinishCompactionOutputFile(
+    const Status& input_status, SubcompactionState* sub_compact,
+    CompactionOutputs& outputs, const Slice& next_table_min_key,
+    const Slice* comp_start_user_key, const Slice* comp_end_user_key) {
+  AutoThreadOperationStageUpdater stage_updater(
+      ThreadStatus::STAGE_COMPACTION_SYNC_FILE);
+  assert(sub_compact != nullptr);
+  assert(outputs.HasBuilder());
+
+  FileMetaData* meta = outputs.GetMetaData();
+  uint64_t output_number = meta->fd.GetNumber();
+  assert(output_number != 0);
+
+  ColumnFamilyData* cfd = sub_compact->compaction->column_family_data();
+  std::string file_checksum = kUnknownFileChecksum;
+  std::string file_checksum_func_name = kUnknownFileChecksumFuncName;
+
+  // Check for iterator errors
+  Status s = input_status;
+
+  // Add range tombstones
+  auto earliest_snapshot = kMaxSequenceNumber;
+  if (existing_snapshots_.size() > 0) {
+    earliest_snapshot = existing_snapshots_[0];
+  }
+  if (s.ok()) {
+    CompactionIterationStats range_del_out_stats;
+    // if the compaction supports per_key_placement, only output range dels to
+    // the penultimate level.
+    // Note: Use `bottommost_level_ = true` for both bottommost and
+    // output_to_penultimate_level compaction here, as it's only used to decide
+    // if range dels could be dropped.
+    if (outputs.HasRangeDel()) {
+      s = outputs.AddRangeDels(comp_start_user_key, comp_end_user_key,
+                               range_del_out_stats, bottommost_level_,
+                               cfd->internal_comparator(), earliest_snapshot,
+                               next_table_min_key, full_history_ts_low_);
+    }
+    RecordDroppedKeys(range_del_out_stats, &sub_compact->compaction_job_stats);
+    TEST_SYNC_POINT("CompactionJob::FinishCompactionOutputFile1");
+  }
+
+  const uint64_t current_entries = outputs.NumEntries();
+
+  s = outputs.Finish(s, seqno_time_mapping_);
+
+  if (s.ok()) {
+    // With accurate smallest and largest key, we can get a slightly more
+    // accurate oldest ancester time.
+    // This makes oldest ancester time in manifest more accurate than in
+    // table properties. Not sure how to resolve it.
+    if (meta->smallest.size() > 0 && meta->largest.size() > 0) {
+      uint64_t refined_oldest_ancester_time;
+      Slice new_smallest = meta->smallest.user_key();
+      Slice new_largest = meta->largest.user_key();
+      if (!new_largest.empty() && !new_smallest.empty()) {
+        refined_oldest_ancester_time =
+            sub_compact->compaction->MinInputFileOldestAncesterTime(
+                &(meta->smallest), &(meta->largest));
+        if (refined_oldest_ancester_time !=
+            std::numeric_limits<uint64_t>::max()) {
+          meta->oldest_ancester_time = refined_oldest_ancester_time;
+        }
+      }
+    }
+  }
+
+  // Finish and check for file errors
+  IOStatus io_s = outputs.WriterSyncClose(s, db_options_.clock, stats_,
+                                          db_options_.use_fsync);
+
+  if (s.ok() && io_s.ok()) {
+    file_checksum = meta->file_checksum;
+    file_checksum_func_name = meta->file_checksum_func_name;
+  }
+
+  if (s.ok()) {
+    s = io_s;
+  }
+  if (sub_compact->io_status.ok()) {
+    sub_compact->io_status = io_s;
+    // Since this error is really a copy of the
+    // "normal" status, it does not also need to be checked
+    sub_compact->io_status.PermitUncheckedError();
+  }
+
+  TableProperties tp;
+  if (s.ok()) {
+    tp = outputs.GetTableProperties();
+  }
+
+  if (s.ok() && current_entries == 0 && tp.num_range_deletions == 0) {
+    // If there is nothing to output, no necessary to generate a sst file.
+    // This happens when the output level is bottom level, at the same time
+    // the sub_compact output nothing.
+    std::string fname =
+        TableFileName(sub_compact->compaction->immutable_options()->cf_paths,
+                      meta->fd.GetNumber(), meta->fd.GetPathId());
+
+    // TODO(AR) it is not clear if there are any larger implications if
+    // DeleteFile fails here
+    Status ds = env_->DeleteFile(fname);
+    if (!ds.ok()) {
+      ROCKS_LOG_WARN(
+          db_options_.info_log,
+          "[%s] [JOB %d] Unable to remove SST file for table #%" PRIu64
+          " at bottom level%s",
+          cfd->GetName().c_str(), job_id_, output_number,
+          meta->marked_for_compaction ? " (need compaction)" : "");
+    }
+
+    // Also need to remove the file from outputs, or it will be added to the
+    // VersionEdit.
+    outputs.RemoveLastOutput();
+    meta = nullptr;
+  }
+
+  if (s.ok() && (current_entries > 0 || tp.num_range_deletions > 0)) {
+    // Output to event logger and fire events.
+    outputs.UpdateTableProperties();
+    ROCKS_LOG_INFO(db_options_.info_log,
+                   "[%s] [JOB %d] Generated table #%" PRIu64 ": %" PRIu64
+                   " keys, %" PRIu64 " bytes%s, temperature: %s",
+                   cfd->GetName().c_str(), job_id_, output_number,
+                   current_entries, meta->fd.file_size,
+                   meta->marked_for_compaction ? " (need compaction)" : "",
+                   temperature_to_string[meta->temperature].c_str());
+  }
+  std::string fname;
+  FileDescriptor output_fd;
+  uint64_t oldest_blob_file_number = kInvalidBlobFileNumber;
+  Status status_for_listener = s;
+  if (meta != nullptr) {
+    fname = GetTableFileName(meta->fd.GetNumber());
+    output_fd = meta->fd;
+    oldest_blob_file_number = meta->oldest_blob_file_number;
+  } else {
+    fname = "(nil)";
+    if (s.ok()) {
+      status_for_listener = Status::Aborted("Empty SST file not kept");
+    }
+  }
+  EventHelpers::LogAndNotifyTableFileCreationFinished(
+      event_logger_, cfd->ioptions()->listeners, dbname_, cfd->GetName(), fname,
+      job_id_, output_fd, oldest_blob_file_number, tp,
+      TableFileCreationReason::kCompaction, status_for_listener, file_checksum,
+      file_checksum_func_name);
+
+  // Report new file to SstFileManagerImpl
+  auto sfm =
+      static_cast<SstFileManagerImpl*>(db_options_.sst_file_manager.get());
+  if (sfm && meta != nullptr && meta->fd.GetPathId() == 0) {
+    Status add_s = sfm->OnAddFile(fname);
+    if (!add_s.ok() && s.ok()) {
+      s = add_s;
+    }
+    if (sfm->IsMaxAllowedSpaceReached()) {
+      // TODO(ajkr): should we return OK() if max space was reached by the final
+      // compaction output file (similarly to how flush works when full)?
+      s = Status::SpaceLimit("Max allowed space was reached");
+      TEST_SYNC_POINT(
+          "CompactionJob::FinishCompactionOutputFile:MaxAllowedSpaceReached");
+      InstrumentedMutexLock l(db_mutex_);
+      db_error_handler_->SetBGError(s, BackgroundErrorReason::kCompaction);
+    }
+  }
+
+  outputs.ResetBuilder();
+  return s;
+}
+
+Status CompactionJob::InstallCompactionResults(
+    const MutableCFOptions& mutable_cf_options) {
+  assert(compact_);
+
+  db_mutex_->AssertHeld();
+
+  const ReadOptions read_options(Env::IOActivity::kCompaction);
+  auto* compaction = compact_->compaction;
+  assert(compaction);
+
+  {
+    Compaction::InputLevelSummaryBuffer inputs_summary;
+    if (compaction_stats_.has_penultimate_level_output) {
+      ROCKS_LOG_BUFFER(
+          log_buffer_,
+          "[%s] [JOB %d] Compacted %s => output_to_penultimate_level: %" PRIu64
+          " bytes + last: %" PRIu64 " bytes. Total: %" PRIu64 " bytes",
+          compaction->column_family_data()->GetName().c_str(), job_id_,
+          compaction->InputLevelSummary(&inputs_summary),
+          compaction_stats_.penultimate_level_stats.bytes_written,
+          compaction_stats_.stats.bytes_written,
+          compaction_stats_.TotalBytesWritten());
+    } else {
+      ROCKS_LOG_BUFFER(log_buffer_,
+                       "[%s] [JOB %d] Compacted %s => %" PRIu64 " bytes",
+                       compaction->column_family_data()->GetName().c_str(),
+                       job_id_, compaction->InputLevelSummary(&inputs_summary),
+                       compaction_stats_.TotalBytesWritten());
+    }
+  }
+
+  VersionEdit* const edit = compaction->edit();
+  assert(edit);
+
+  // Add compaction inputs
+  compaction->AddInputDeletions(edit);
+
+  std::unordered_map<uint64_t, BlobGarbageMeter::BlobStats> blob_total_garbage;
+
+  for (const auto& sub_compact : compact_->sub_compact_states) {
+    sub_compact.AddOutputsEdit(edit);
+
+    for (const auto& blob : sub_compact.Current().GetBlobFileAdditions()) {
+      edit->AddBlobFile(blob);
+    }
+
+    if (sub_compact.Current().GetBlobGarbageMeter()) {
+      const auto& flows = sub_compact.Current().GetBlobGarbageMeter()->flows();
+
+      for (const auto& pair : flows) {
+        const uint64_t blob_file_number = pair.first;
+        const BlobGarbageMeter::BlobInOutFlow& flow = pair.second;
+
+        assert(flow.IsValid());
+        if (flow.HasGarbage()) {
+          blob_total_garbage[blob_file_number].Add(flow.GetGarbageCount(),
+                                                   flow.GetGarbageBytes());
+        }
+      }
+    }
+  }
+
+  for (const auto& pair : blob_total_garbage) {
+    const uint64_t blob_file_number = pair.first;
+    const BlobGarbageMeter::BlobStats& stats = pair.second;
+
+    edit->AddBlobFileGarbage(blob_file_number, stats.GetCount(),
+                             stats.GetBytes());
+  }
+
+  if ((compaction->compaction_reason() ==
+           CompactionReason::kLevelMaxLevelSize ||
+       compaction->compaction_reason() == CompactionReason::kRoundRobinTtl) &&
+      compaction->immutable_options()->compaction_pri == kRoundRobin) {
+    int start_level = compaction->start_level();
+    if (start_level > 0) {
+      auto vstorage = compaction->input_version()->storage_info();
+      edit->AddCompactCursor(start_level,
+                             vstorage->GetNextCompactCursor(
+                                 start_level, compaction->num_input_files(0)));
+    }
+  }
+
+  return versions_->LogAndApply(compaction->column_family_data(),
+                                mutable_cf_options, read_options, edit,
+                                db_mutex_, db_directory_);
+}
+
+void CompactionJob::RecordCompactionIOStats() {
+  RecordTick(stats_, COMPACT_READ_BYTES, IOSTATS(bytes_read));
+  RecordTick(stats_, COMPACT_WRITE_BYTES, IOSTATS(bytes_written));
+  CompactionReason compaction_reason =
+      compact_->compaction->compaction_reason();
+  if (compaction_reason == CompactionReason::kFilesMarkedForCompaction) {
+    RecordTick(stats_, COMPACT_READ_BYTES_MARKED, IOSTATS(bytes_read));
+    RecordTick(stats_, COMPACT_WRITE_BYTES_MARKED, IOSTATS(bytes_written));
+  } else if (compaction_reason == CompactionReason::kPeriodicCompaction) {
+    RecordTick(stats_, COMPACT_READ_BYTES_PERIODIC, IOSTATS(bytes_read));
+    RecordTick(stats_, COMPACT_WRITE_BYTES_PERIODIC, IOSTATS(bytes_written));
+  } else if (compaction_reason == CompactionReason::kTtl) {
+    RecordTick(stats_, COMPACT_READ_BYTES_TTL, IOSTATS(bytes_read));
+    RecordTick(stats_, COMPACT_WRITE_BYTES_TTL, IOSTATS(bytes_written));
+  }
+  ThreadStatusUtil::IncreaseThreadOperationProperty(
+      ThreadStatus::COMPACTION_BYTES_READ, IOSTATS(bytes_read));
+  IOSTATS_RESET(bytes_read);
+  ThreadStatusUtil::IncreaseThreadOperationProperty(
+      ThreadStatus::COMPACTION_BYTES_WRITTEN, IOSTATS(bytes_written));
+  IOSTATS_RESET(bytes_written);
+}
+
+Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact,
+                                               CompactionOutputs& outputs) {
+  assert(sub_compact != nullptr);
+
+  // no need to lock because VersionSet::next_file_number_ is atomic
+  uint64_t file_number = versions_->NewFileNumber();
+  std::string fname = GetTableFileName(file_number);
+  // Fire events.
+  ColumnFamilyData* cfd = sub_compact->compaction->column_family_data();
+  EventHelpers::NotifyTableFileCreationStarted(
+      cfd->ioptions()->listeners, dbname_, cfd->GetName(), fname, job_id_,
+      TableFileCreationReason::kCompaction);
+  // Make the output file
+  std::unique_ptr<FSWritableFile> writable_file;
+#ifndef NDEBUG
+  bool syncpoint_arg = file_options_.use_direct_writes;
+  TEST_SYNC_POINT_CALLBACK("CompactionJob::OpenCompactionOutputFile",
+                           &syncpoint_arg);
+#endif
+
+  // Pass temperature of the last level files to FileSystem.
+  FileOptions fo_copy = file_options_;
+  Temperature temperature = sub_compact->compaction->output_temperature();
+  // only set for the last level compaction and also it's not output to
+  // penultimate level (when preclude_last_level feature is enabled)
+  if (temperature == Temperature::kUnknown &&
+      sub_compact->compaction->is_last_level() &&
+      !sub_compact->IsCurrentPenultimateLevel()) {
+    temperature =
+        sub_compact->compaction->mutable_cf_options()->last_level_temperature;
+  }
+  fo_copy.temperature = temperature;
+
+  Status s;
+  IOStatus io_s = NewWritableFile(fs_.get(), fname, &writable_file, fo_copy);
+  s = io_s;
+  if (sub_compact->io_status.ok()) {
+    sub_compact->io_status = io_s;
+    // Since this error is really a copy of the io_s that is checked below as s,
+    // it does not also need to be checked.
+    sub_compact->io_status.PermitUncheckedError();
+  }
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(
+        db_options_.info_log,
+        "[%s] [JOB %d] OpenCompactionOutputFiles for table #%" PRIu64
+        " fails at NewWritableFile with status %s",
+        sub_compact->compaction->column_family_data()->GetName().c_str(),
+        job_id_, file_number, s.ToString().c_str());
+    LogFlush(db_options_.info_log);
+    EventHelpers::LogAndNotifyTableFileCreationFinished(
+        event_logger_, cfd->ioptions()->listeners, dbname_, cfd->GetName(),
+        fname, job_id_, FileDescriptor(), kInvalidBlobFileNumber,
+        TableProperties(), TableFileCreationReason::kCompaction, s,
+        kUnknownFileChecksum, kUnknownFileChecksumFuncName);
+    return s;
+  }
+
+  // Try to figure out the output file's oldest ancester time.
+  int64_t temp_current_time = 0;
+  auto get_time_status = db_options_.clock->GetCurrentTime(&temp_current_time);
+  // Safe to proceed even if GetCurrentTime fails. So, log and proceed.
+  if (!get_time_status.ok()) {
+    ROCKS_LOG_WARN(db_options_.info_log,
+                   "Failed to get current time. Status: %s",
+                   get_time_status.ToString().c_str());
+  }
+  uint64_t current_time = static_cast<uint64_t>(temp_current_time);
+  InternalKey tmp_start, tmp_end;
+  if (sub_compact->start.has_value()) {
+    tmp_start.SetMinPossibleForUserKey(sub_compact->start.value());
+  }
+  if (sub_compact->end.has_value()) {
+    tmp_end.SetMinPossibleForUserKey(sub_compact->end.value());
+  }
+  uint64_t oldest_ancester_time =
+      sub_compact->compaction->MinInputFileOldestAncesterTime(
+          sub_compact->start.has_value() ? &tmp_start : nullptr,
+          sub_compact->end.has_value() ? &tmp_end : nullptr);
+  if (oldest_ancester_time == std::numeric_limits<uint64_t>::max()) {
+    oldest_ancester_time = current_time;
+  }
+
+  // Initialize a SubcompactionState::Output and add it to sub_compact->outputs
+  uint64_t epoch_number = sub_compact->compaction->MinInputFileEpochNumber();
+  {
+    FileMetaData meta;
+    meta.fd = FileDescriptor(file_number,
+                             sub_compact->compaction->output_path_id(), 0);
+    meta.oldest_ancester_time = oldest_ancester_time;
+    meta.file_creation_time = current_time;
+    meta.epoch_number = epoch_number;
+    meta.temperature = temperature;
+    assert(!db_id_.empty());
+    assert(!db_session_id_.empty());
+    s = GetSstInternalUniqueId(db_id_, db_session_id_, meta.fd.GetNumber(),
+                               &meta.unique_id);
+    if (!s.ok()) {
+      ROCKS_LOG_ERROR(db_options_.info_log,
+                      "[%s] [JOB %d] file #%" PRIu64
+                      " failed to generate unique id: %s.",
+                      cfd->GetName().c_str(), job_id_, meta.fd.GetNumber(),
+                      s.ToString().c_str());
+      return s;
+    }
+
+    outputs.AddOutput(std::move(meta), cfd->internal_comparator(),
+                      sub_compact->compaction->mutable_cf_options()
+                          ->check_flush_compaction_key_order,
+                      paranoid_file_checks_);
+  }
+
+  writable_file->SetIOPriority(GetRateLimiterPriority());
+  writable_file->SetWriteLifeTimeHint(write_hint_);
+  FileTypeSet tmp_set = db_options_.checksum_handoff_file_types;
+  writable_file->SetPreallocationBlockSize(static_cast<size_t>(
+      sub_compact->compaction->OutputFilePreallocationSize()));
+  const auto& listeners =
+      sub_compact->compaction->immutable_options()->listeners;
+  outputs.AssignFileWriter(new WritableFileWriter(
+      std::move(writable_file), fname, fo_copy, db_options_.clock, io_tracer_,
+      db_options_.stats, listeners, db_options_.file_checksum_gen_factory.get(),
+      tmp_set.Contains(FileType::kTableFile), false));
+
+  TableBuilderOptions tboptions(
+      *cfd->ioptions(), *(sub_compact->compaction->mutable_cf_options()),
+      cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(),
+      sub_compact->compaction->output_compression(),
+      sub_compact->compaction->output_compression_opts(), cfd->GetID(),
+      cfd->GetName(), sub_compact->compaction->output_level(),
+      bottommost_level_, TableFileCreationReason::kCompaction,
+      0 /* oldest_key_time */, current_time, db_id_, db_session_id_,
+      sub_compact->compaction->max_output_file_size(), file_number);
+
+  outputs.NewBuilder(tboptions);
+
+  LogFlush(db_options_.info_log);
+  return s;
+}
+
+void CompactionJob::CleanupCompaction() {
+  for (SubcompactionState& sub_compact : compact_->sub_compact_states) {
+    sub_compact.Cleanup(table_cache_.get());
+  }
+  delete compact_;
+  compact_ = nullptr;
+}
+
+namespace {
+void CopyPrefix(const Slice& src, size_t prefix_length, std::string* dst) {
+  assert(prefix_length > 0);
+  size_t length = src.size() > prefix_length ? prefix_length : src.size();
+  dst->assign(src.data(), length);
+}
+}  // namespace
+
+
+void CompactionJob::UpdateCompactionStats() {
+  assert(compact_);
+
+  Compaction* compaction = compact_->compaction;
+  compaction_stats_.stats.num_input_files_in_non_output_levels = 0;
+  compaction_stats_.stats.num_input_files_in_output_level = 0;
+  for (int input_level = 0;
+       input_level < static_cast<int>(compaction->num_input_levels());
+       ++input_level) {
+    if (compaction->level(input_level) != compaction->output_level()) {
+      UpdateCompactionInputStatsHelper(
+          &compaction_stats_.stats.num_input_files_in_non_output_levels,
+          &compaction_stats_.stats.bytes_read_non_output_levels, input_level);
+    } else {
+      UpdateCompactionInputStatsHelper(
+          &compaction_stats_.stats.num_input_files_in_output_level,
+          &compaction_stats_.stats.bytes_read_output_level, input_level);
+    }
+  }
+
+  assert(compaction_job_stats_);
+  compaction_stats_.stats.bytes_read_blob =
+      compaction_job_stats_->total_blob_bytes_read;
+
+  compaction_stats_.stats.num_dropped_records =
+      compaction_stats_.DroppedRecords();
+}
+
+void CompactionJob::UpdateCompactionInputStatsHelper(int* num_files,
+                                                     uint64_t* bytes_read,
+                                                     int input_level) {
+  const Compaction* compaction = compact_->compaction;
+  auto num_input_files = compaction->num_input_files(input_level);
+  *num_files += static_cast<int>(num_input_files);
+
+  for (size_t i = 0; i < num_input_files; ++i) {
+    const auto* file_meta = compaction->input(input_level, i);
+    *bytes_read += file_meta->fd.GetFileSize();
+    compaction_stats_.stats.num_input_records +=
+        static_cast<uint64_t>(file_meta->num_entries);
+  }
+}
+
+void CompactionJob::UpdateCompactionJobStats(
+    const InternalStats::CompactionStats& stats) const {
+  compaction_job_stats_->elapsed_micros = stats.micros;
+
+  // input information
+  compaction_job_stats_->total_input_bytes =
+      stats.bytes_read_non_output_levels + stats.bytes_read_output_level;
+  compaction_job_stats_->num_input_records = stats.num_input_records;
+  compaction_job_stats_->num_input_files =
+      stats.num_input_files_in_non_output_levels +
+      stats.num_input_files_in_output_level;
+  compaction_job_stats_->num_input_files_at_output_level =
+      stats.num_input_files_in_output_level;
+
+  // output information
+  compaction_job_stats_->total_output_bytes = stats.bytes_written;
+  compaction_job_stats_->total_output_bytes_blob = stats.bytes_written_blob;
+  compaction_job_stats_->num_output_records = stats.num_output_records;
+  compaction_job_stats_->num_output_files = stats.num_output_files;
+  compaction_job_stats_->num_output_files_blob = stats.num_output_files_blob;
+
+  if (stats.num_output_files > 0) {
+    CopyPrefix(compact_->SmallestUserKey(),
+               CompactionJobStats::kMaxPrefixLength,
+               &compaction_job_stats_->smallest_output_key_prefix);
+    CopyPrefix(compact_->LargestUserKey(), CompactionJobStats::kMaxPrefixLength,
+               &compaction_job_stats_->largest_output_key_prefix);
+  }
+}
+
+void CompactionJob::LogCompaction() {
+  Compaction* compaction = compact_->compaction;
+  ColumnFamilyData* cfd = compaction->column_family_data();
+
+  // Let's check if anything will get logged. Don't prepare all the info if
+  // we're not logging
+  if (db_options_.info_log_level <= InfoLogLevel::INFO_LEVEL) {
+    Compaction::InputLevelSummaryBuffer inputs_summary;
+    ROCKS_LOG_INFO(
+        db_options_.info_log, "[%s] [JOB %d] Compacting %s, score %.2f",
+        cfd->GetName().c_str(), job_id_,
+        compaction->InputLevelSummary(&inputs_summary), compaction->score());
+    char scratch[2345];
+    compaction->Summary(scratch, sizeof(scratch));
+    ROCKS_LOG_INFO(db_options_.info_log, "[%s]: Compaction start summary: %s\n",
+                   cfd->GetName().c_str(), scratch);
+    // build event logger report
+    auto stream = event_logger_->Log();
+    stream << "job" << job_id_ << "event"
+           << "compaction_started"
+           << "compaction_reason"
+           << GetCompactionReasonString(compaction->compaction_reason());
+    for (size_t i = 0; i < compaction->num_input_levels(); ++i) {
+      stream << ("files_L" + std::to_string(compaction->level(i)));
+      stream.StartArray();
+      for (auto f : *compaction->inputs(i)) {
+        stream << f->fd.GetNumber();
+      }
+      stream.EndArray();
+    }
+    stream << "score" << compaction->score() << "input_data_size"
+           << compaction->CalculateTotalInputSize() << "oldest_snapshot_seqno"
+           << (existing_snapshots_.empty()
+                   ? int64_t{-1}  // Use -1 for "none"
+                   : static_cast<int64_t>(existing_snapshots_[0]));
+    if (compaction->SupportsPerKeyPlacement()) {
+      stream << "preclude_last_level_min_seqno"
+             << preclude_last_level_min_seqno_;
+      stream << "penultimate_output_level" << compaction->GetPenultimateLevel();
+      stream << "penultimate_output_range"
+             << GetCompactionPenultimateOutputRangeTypeString(
+                    compaction->GetPenultimateOutputRangeType());
+
+      if (compaction->GetPenultimateOutputRangeType() ==
+          Compaction::PenultimateOutputRangeType::kDisabled) {
+        ROCKS_LOG_WARN(
+            db_options_.info_log,
+            "[%s] [JOB %d] Penultimate level output is disabled, likely "
+            "because of the range conflict in the penultimate level",
+            cfd->GetName().c_str(), job_id_);
+      }
+    }
+  }
+}
+
+std::string CompactionJob::GetTableFileName(uint64_t file_number) {
+  return TableFileName(compact_->compaction->immutable_options()->cf_paths,
+                       file_number, compact_->compaction->output_path_id());
+}
+
+Env::IOPriority CompactionJob::GetRateLimiterPriority() {
+  if (versions_ && versions_->GetColumnFamilySet() &&
+      versions_->GetColumnFamilySet()->write_controller()) {
+    WriteController* write_controller =
+        versions_->GetColumnFamilySet()->write_controller();
+    if (write_controller->NeedsDelay() || write_controller->IsStopped()) {
+      return Env::IO_USER;
+    }
+  }
+
+  return Env::IO_LOW;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_job.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_job.h
new file mode 100644
index 0000000000..a930c15f1f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_job.h
@@ -0,0 +1,504 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <atomic>
+#include <deque>
+#include <functional>
+#include <limits>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "db/blob/blob_file_completion_callback.h"
+#include "db/column_family.h"
+#include "db/compaction/compaction_iterator.h"
+#include "db/compaction/compaction_outputs.h"
+#include "db/flush_scheduler.h"
+#include "db/internal_stats.h"
+#include "db/job_context.h"
+#include "db/log_writer.h"
+#include "db/memtable_list.h"
+#include "db/range_del_aggregator.h"
+#include "db/seqno_to_time_mapping.h"
+#include "db/version_edit.h"
+#include "db/write_controller.h"
+#include "db/write_thread.h"
+#include "logging/event_logger.h"
+#include "options/cf_options.h"
+#include "options/db_options.h"
+#include "port/port.h"
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/compaction_job_stats.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/transaction_log.h"
+#include "table/scoped_arena_iterator.h"
+#include "util/autovector.h"
+#include "util/stop_watch.h"
+#include "util/thread_local.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Arena;
+class CompactionState;
+class ErrorHandler;
+class MemTable;
+class SnapshotChecker;
+class SystemClock;
+class TableCache;
+class Version;
+class VersionEdit;
+class VersionSet;
+
+class SubcompactionState;
+
+// CompactionJob is responsible for executing the compaction. Each (manual or
+// automated) compaction corresponds to a CompactionJob object, and usually
+// goes through the stages of `Prepare()`->`Run()`->`Install()`. CompactionJob
+// will divide the compaction into subcompactions and execute them in parallel
+// if needed.
+//
+// CompactionJob has 2 main stats:
+// 1. CompactionJobStats compaction_job_stats_
+//    CompactionJobStats is a public data structure which is part of Compaction
+//    event listener that rocksdb share the job stats with the user.
+//    Internally it's an aggregation of all the compaction_job_stats from each
+//    `SubcompactionState`:
+//                                           +------------------------+
+//                                           | SubcompactionState     |
+//                                           |                        |
+//                                +--------->|   compaction_job_stats |
+//                                |          |                        |
+//                                |          +------------------------+
+// +------------------------+     |
+// | CompactionJob          |     |          +------------------------+
+// |                        |     |          | SubcompactionState     |
+// |   compaction_job_stats +-----+          |                        |
+// |                        |     +--------->|   compaction_job_stats |
+// |                        |     |          |                        |
+// +------------------------+     |          +------------------------+
+//                                |
+//                                |          +------------------------+
+//                                |          | SubcompactionState     |
+//                                |          |                        |
+//                                +--------->+   compaction_job_stats |
+//                                |          |                        |
+//                                |          +------------------------+
+//                                |
+//                                |          +------------------------+
+//                                |          |       ...              |
+//                                +--------->+                        |
+//                                           +------------------------+
+//
+// 2. CompactionStatsFull compaction_stats_
+//    `CompactionStatsFull` is an internal stats about the compaction, which
+//    is eventually sent to `ColumnFamilyData::internal_stats_` and used for
+//    logging and public metrics.
+//    Internally, it's an aggregation of stats_ from each `SubcompactionState`.
+//    It has 2 parts, normal stats about the main compaction information and
+//    the penultimate level output stats.
+//    `SubcompactionState` maintains the CompactionOutputs for normal output and
+//    the penultimate level output if exists, the per_level stats is
+//    stored with the outputs.
+//                                                +---------------------------+
+//                                                | SubcompactionState        |
+//                                                |                           |
+//                                                | +----------------------+  |
+//                                                | | CompactionOutputs    |  |
+//                                                | | (normal output)      |  |
+//                                            +---->|   stats_             |  |
+//                                            |   | +----------------------+  |
+//                                            |   |                           |
+//                                            |   | +----------------------+  |
+// +--------------------------------+         |   | | CompactionOutputs    |  |
+// | CompactionJob                  |         |   | | (penultimate_level)  |  |
+// |                                |    +--------->|   stats_             |  |
+// |   compaction_stats_            |    |    |   | +----------------------+  |
+// |    +-------------------------+ |    |    |   |                           |
+// |    |stats (normal)           |------|----+   +---------------------------+
+// |    +-------------------------+ |    |    |
+// |                                |    |    |
+// |    +-------------------------+ |    |    |   +---------------------------+
+// |    |penultimate_level_stats  +------+    |   | SubcompactionState        |
+// |    +-------------------------+ |    |    |   |                           |
+// |                                |    |    |   | +----------------------+  |
+// |                                |    |    |   | | CompactionOutputs    |  |
+// +--------------------------------+    |    |   | | (normal output)      |  |
+//                                       |    +---->|   stats_             |  |
+//                                       |        | +----------------------+  |
+//                                       |        |                           |
+//                                       |        | +----------------------+  |
+//                                       |        | | CompactionOutputs    |  |
+//                                       |        | | (penultimate_level)  |  |
+//                                       +--------->|   stats_             |  |
+//                                                | +----------------------+  |
+//                                                |                           |
+//                                                +---------------------------+
+
+class CompactionJob {
+ public:
+  CompactionJob(
+      int job_id, Compaction* compaction, const ImmutableDBOptions& db_options,
+      const MutableDBOptions& mutable_db_options,
+      const FileOptions& file_options, VersionSet* versions,
+      const std::atomic<bool>* shutting_down, LogBuffer* log_buffer,
+      FSDirectory* db_directory, FSDirectory* output_directory,
+      FSDirectory* blob_output_directory, Statistics* stats,
+      InstrumentedMutex* db_mutex, ErrorHandler* db_error_handler,
+      std::vector<SequenceNumber> existing_snapshots,
+      SequenceNumber earliest_write_conflict_snapshot,
+      const SnapshotChecker* snapshot_checker, JobContext* job_context,
+      std::shared_ptr<Cache> table_cache, EventLogger* event_logger,
+      bool paranoid_file_checks, bool measure_io_stats,
+      const std::string& dbname, CompactionJobStats* compaction_job_stats,
+      Env::Priority thread_pri, const std::shared_ptr<IOTracer>& io_tracer,
+      const std::atomic<bool>& manual_compaction_canceled,
+      const std::string& db_id = "", const std::string& db_session_id = "",
+      std::string full_history_ts_low = "", std::string trim_ts = "",
+      BlobFileCompletionCallback* blob_callback = nullptr,
+      int* bg_compaction_scheduled = nullptr,
+      int* bg_bottom_compaction_scheduled = nullptr);
+
+  virtual ~CompactionJob();
+
+  // no copy/move
+  CompactionJob(CompactionJob&& job) = delete;
+  CompactionJob(const CompactionJob& job) = delete;
+  CompactionJob& operator=(const CompactionJob& job) = delete;
+
+  // REQUIRED: mutex held
+  // Prepare for the compaction by setting up boundaries for each subcompaction
+  void Prepare();
+  // REQUIRED mutex not held
+  // Launch threads for each subcompaction and wait for them to finish. After
+  // that, verify table is usable and finally do bookkeeping to unify
+  // subcompaction results
+  Status Run();
+
+  // REQUIRED: mutex held
+  // Add compaction input/output to the current version
+  Status Install(const MutableCFOptions& mutable_cf_options);
+
+  // Return the IO status
+  IOStatus io_status() const { return io_status_; }
+
+ protected:
+  void UpdateCompactionStats();
+  void LogCompaction();
+  virtual void RecordCompactionIOStats();
+  void CleanupCompaction();
+
+  // Call compaction filter. Then iterate through input and compact the
+  // kv-pairs
+  void ProcessKeyValueCompaction(SubcompactionState* sub_compact);
+
+  CompactionState* compact_;
+  InternalStats::CompactionStatsFull compaction_stats_;
+  const ImmutableDBOptions& db_options_;
+  const MutableDBOptions mutable_db_options_copy_;
+  LogBuffer* log_buffer_;
+  FSDirectory* output_directory_;
+  Statistics* stats_;
+  // Is this compaction creating a file in the bottom most level?
+  bool bottommost_level_;
+
+  Env::WriteLifeTimeHint write_hint_;
+
+  IOStatus io_status_;
+
+  CompactionJobStats* compaction_job_stats_;
+
+ private:
+  friend class CompactionJobTestBase;
+
+  // Generates a histogram representing potential divisions of key ranges from
+  // the input. It adds the starting and/or ending keys of certain input files
+  // to the working set and then finds the approximate size of data in between
+  // each consecutive pair of slices. Then it divides these ranges into
+  // consecutive groups such that each group has a similar size.
+  void GenSubcompactionBoundaries();
+
+  // Get the number of planned subcompactions based on max_subcompactions and
+  // extra reserved resources
+  uint64_t GetSubcompactionsLimit();
+
+  // Additional reserved threads are reserved and the number is stored in
+  // extra_num_subcompaction_threads_reserved__. For now, this happens only if
+  // the compaction priority is round-robin and max_subcompactions is not
+  // sufficient (extra resources may be needed)
+  void AcquireSubcompactionResources(int num_extra_required_subcompactions);
+
+  // Additional threads may be reserved during IncreaseSubcompactionResources()
+  // if num_actual_subcompactions is less than num_planned_subcompactions.
+  // Additional threads will be released and the bg_compaction_scheduled_ or
+  // bg_bottom_compaction_scheduled_ will be updated if they are used.
+  // DB Mutex lock is required.
+  void ShrinkSubcompactionResources(uint64_t num_extra_resources);
+
+  // Release all reserved threads and update the compaction limits.
+  void ReleaseSubcompactionResources();
+
+  CompactionServiceJobStatus ProcessKeyValueCompactionWithCompactionService(
+      SubcompactionState* sub_compact);
+
+  // update the thread status for starting a compaction.
+  void ReportStartedCompaction(Compaction* compaction);
+
+  Status FinishCompactionOutputFile(const Status& input_status,
+                                    SubcompactionState* sub_compact,
+                                    CompactionOutputs& outputs,
+                                    const Slice& next_table_min_key,
+                                    const Slice* comp_start_user_key,
+                                    const Slice* comp_end_user_key);
+  Status InstallCompactionResults(const MutableCFOptions& mutable_cf_options);
+  Status OpenCompactionOutputFile(SubcompactionState* sub_compact,
+                                  CompactionOutputs& outputs);
+  void UpdateCompactionJobStats(
+      const InternalStats::CompactionStats& stats) const;
+  void RecordDroppedKeys(const CompactionIterationStats& c_iter_stats,
+                         CompactionJobStats* compaction_job_stats = nullptr);
+
+  void UpdateCompactionInputStatsHelper(int* num_files, uint64_t* bytes_read,
+                                        int input_level);
+
+  void NotifyOnSubcompactionBegin(SubcompactionState* sub_compact);
+
+  void NotifyOnSubcompactionCompleted(SubcompactionState* sub_compact);
+
+  uint32_t job_id_;
+
+  // DBImpl state
+  const std::string& dbname_;
+  const std::string db_id_;
+  const std::string db_session_id_;
+  const FileOptions file_options_;
+
+  Env* env_;
+  std::shared_ptr<IOTracer> io_tracer_;
+  FileSystemPtr fs_;
+  // env_option optimized for compaction table reads
+  FileOptions file_options_for_read_;
+  VersionSet* versions_;
+  const std::atomic<bool>* shutting_down_;
+  const std::atomic<bool>& manual_compaction_canceled_;
+  FSDirectory* db_directory_;
+  FSDirectory* blob_output_directory_;
+  InstrumentedMutex* db_mutex_;
+  ErrorHandler* db_error_handler_;
+  // If there were two snapshots with seq numbers s1 and
+  // s2 and s1 < s2, and if we find two instances of a key k1 then lies
+  // entirely within s1 and s2, then the earlier version of k1 can be safely
+  // deleted because that version is not visible in any snapshot.
+  std::vector<SequenceNumber> existing_snapshots_;
+
+  // This is the earliest snapshot that could be used for write-conflict
+  // checking by a transaction.  For any user-key newer than this snapshot, we
+  // should make sure not to remove evidence that a write occurred.
+  SequenceNumber earliest_write_conflict_snapshot_;
+
+  const SnapshotChecker* const snapshot_checker_;
+
+  JobContext* job_context_;
+
+  std::shared_ptr<Cache> table_cache_;
+
+  EventLogger* event_logger_;
+
+  bool paranoid_file_checks_;
+  bool measure_io_stats_;
+  // Stores the Slices that designate the boundaries for each subcompaction
+  std::vector<std::string> boundaries_;
+  Env::Priority thread_pri_;
+  std::string full_history_ts_low_;
+  std::string trim_ts_;
+  BlobFileCompletionCallback* blob_callback_;
+
+  uint64_t GetCompactionId(SubcompactionState* sub_compact) const;
+  // Stores the number of reserved threads in shared env_ for the number of
+  // extra subcompaction in kRoundRobin compaction priority
+  int extra_num_subcompaction_threads_reserved_;
+
+  // Stores the pointer to bg_compaction_scheduled_,
+  // bg_bottom_compaction_scheduled_ in DBImpl. Mutex is required when accessing
+  // or updating it.
+  int* bg_compaction_scheduled_;
+  int* bg_bottom_compaction_scheduled_;
+
+  // Stores the sequence number to time mapping gathered from all input files
+  // it also collects the smallest_seqno -> oldest_ancester_time from the SST.
+  SeqnoToTimeMapping seqno_time_mapping_;
+
+  // Minimal sequence number for preserving the time information. The time info
+  // older than this sequence number won't be preserved after the compaction and
+  // if it's bottommost compaction, the seq num will be zeroed out.
+  SequenceNumber preserve_time_min_seqno_ = kMaxSequenceNumber;
+
+  // Minimal sequence number to preclude the data from the last level. If the
+  // key has bigger (newer) sequence number than this, it will be precluded from
+  // the last level (output to penultimate level).
+  SequenceNumber preclude_last_level_min_seqno_ = kMaxSequenceNumber;
+
+  // Get table file name in where it's outputting to, which should also be in
+  // `output_directory_`.
+  virtual std::string GetTableFileName(uint64_t file_number);
+  // The rate limiter priority (io_priority) is determined dynamically here.
+  // The Compaction Read and Write priorities are the same for different
+  // scenarios, such as write stalled.
+  Env::IOPriority GetRateLimiterPriority();
+};
+
+// CompactionServiceInput is used the pass compaction information between two
+// db instances. It contains the information needed to do a compaction. It
+// doesn't contain the LSM tree information, which is passed though MANIFEST
+// file.
+struct CompactionServiceInput {
+  ColumnFamilyDescriptor column_family;
+
+  DBOptions db_options;
+
+  std::vector<SequenceNumber> snapshots;
+
+  // SST files for compaction, it should already be expended to include all the
+  // files needed for this compaction, for both input level files and output
+  // level files.
+  std::vector<std::string> input_files;
+  int output_level;
+
+  // db_id is used to generate unique id of sst on the remote compactor
+  std::string db_id;
+
+  // information for subcompaction
+  bool has_begin = false;
+  std::string begin;
+  bool has_end = false;
+  std::string end;
+
+  // serialization interface to read and write the object
+  static Status Read(const std::string& data_str, CompactionServiceInput* obj);
+  Status Write(std::string* output);
+
+  // Initialize a dummy ColumnFamilyDescriptor
+  CompactionServiceInput() : column_family("", ColumnFamilyOptions()) {}
+
+#ifndef NDEBUG
+  bool TEST_Equals(CompactionServiceInput* other);
+  bool TEST_Equals(CompactionServiceInput* other, std::string* mismatch);
+#endif  // NDEBUG
+};
+
+// CompactionServiceOutputFile is the metadata for the output SST file
+struct CompactionServiceOutputFile {
+  std::string file_name;
+  SequenceNumber smallest_seqno;
+  SequenceNumber largest_seqno;
+  std::string smallest_internal_key;
+  std::string largest_internal_key;
+  uint64_t oldest_ancester_time;
+  uint64_t file_creation_time;
+  uint64_t epoch_number;
+  uint64_t paranoid_hash;
+  bool marked_for_compaction;
+  UniqueId64x2 unique_id;
+
+  CompactionServiceOutputFile() = default;
+  CompactionServiceOutputFile(
+      const std::string& name, SequenceNumber smallest, SequenceNumber largest,
+      std::string _smallest_internal_key, std::string _largest_internal_key,
+      uint64_t _oldest_ancester_time, uint64_t _file_creation_time,
+      uint64_t _epoch_number, uint64_t _paranoid_hash,
+      bool _marked_for_compaction, UniqueId64x2 _unique_id)
+      : file_name(name),
+        smallest_seqno(smallest),
+        largest_seqno(largest),
+        smallest_internal_key(std::move(_smallest_internal_key)),
+        largest_internal_key(std::move(_largest_internal_key)),
+        oldest_ancester_time(_oldest_ancester_time),
+        file_creation_time(_file_creation_time),
+        epoch_number(_epoch_number),
+        paranoid_hash(_paranoid_hash),
+        marked_for_compaction(_marked_for_compaction),
+        unique_id(std::move(_unique_id)) {}
+};
+
+// CompactionServiceResult contains the compaction result from a different db
+// instance, with these information, the primary db instance with write
+// permission is able to install the result to the DB.
+struct CompactionServiceResult {
+  Status status;
+  std::vector<CompactionServiceOutputFile> output_files;
+  int output_level;
+
+  // location of the output files
+  std::string output_path;
+
+  // some statistics about the compaction
+  uint64_t num_output_records = 0;
+  uint64_t total_bytes = 0;
+  uint64_t bytes_read = 0;
+  uint64_t bytes_written = 0;
+  CompactionJobStats stats;
+
+  // serialization interface to read and write the object
+  static Status Read(const std::string& data_str, CompactionServiceResult* obj);
+  Status Write(std::string* output);
+
+#ifndef NDEBUG
+  bool TEST_Equals(CompactionServiceResult* other);
+  bool TEST_Equals(CompactionServiceResult* other, std::string* mismatch);
+#endif  // NDEBUG
+};
+
+// CompactionServiceCompactionJob is an read-only compaction job, it takes
+// input information from `compaction_service_input` and put result information
+// in `compaction_service_result`, the SST files are generated to `output_path`.
+class CompactionServiceCompactionJob : private CompactionJob {
+ public:
+  CompactionServiceCompactionJob(
+      int job_id, Compaction* compaction, const ImmutableDBOptions& db_options,
+      const MutableDBOptions& mutable_db_options,
+      const FileOptions& file_options, VersionSet* versions,
+      const std::atomic<bool>* shutting_down, LogBuffer* log_buffer,
+      FSDirectory* output_directory, Statistics* stats,
+      InstrumentedMutex* db_mutex, ErrorHandler* db_error_handler,
+      std::vector<SequenceNumber> existing_snapshots,
+      std::shared_ptr<Cache> table_cache, EventLogger* event_logger,
+      const std::string& dbname, const std::shared_ptr<IOTracer>& io_tracer,
+      const std::atomic<bool>& manual_compaction_canceled,
+      const std::string& db_id, const std::string& db_session_id,
+      std::string output_path,
+      const CompactionServiceInput& compaction_service_input,
+      CompactionServiceResult* compaction_service_result);
+
+  // Run the compaction in current thread and return the result
+  Status Run();
+
+  void CleanupCompaction();
+
+  IOStatus io_status() const { return CompactionJob::io_status(); }
+
+ protected:
+  void RecordCompactionIOStats() override;
+
+ private:
+  // Get table file name in output_path
+  std::string GetTableFileName(uint64_t file_number) override;
+  // Specific the compaction output path, otherwise it uses default DB path
+  const std::string output_path_;
+
+  // Compaction job input
+  const CompactionServiceInput& compaction_input_;
+
+  // Compaction job result
+  CompactionServiceResult* compaction_result_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_outputs.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_outputs.cc
new file mode 100644
index 0000000000..f6e0dbd7d4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_outputs.cc
@@ -0,0 +1,784 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/compaction/compaction_outputs.h"
+
+#include "db/builder.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void CompactionOutputs::NewBuilder(const TableBuilderOptions& tboptions) {
+  builder_.reset(NewTableBuilder(tboptions, file_writer_.get()));
+}
+
+Status CompactionOutputs::Finish(const Status& intput_status,
+                                 const SeqnoToTimeMapping& seqno_time_mapping) {
+  FileMetaData* meta = GetMetaData();
+  assert(meta != nullptr);
+  Status s = intput_status;
+  if (s.ok()) {
+    std::string seqno_time_mapping_str;
+    seqno_time_mapping.Encode(seqno_time_mapping_str, meta->fd.smallest_seqno,
+                              meta->fd.largest_seqno, meta->file_creation_time);
+    builder_->SetSeqnoTimeTableProperties(seqno_time_mapping_str,
+                                          meta->oldest_ancester_time);
+    s = builder_->Finish();
+
+  } else {
+    builder_->Abandon();
+  }
+  Status io_s = builder_->io_status();
+  if (s.ok()) {
+    s = io_s;
+  } else {
+    io_s.PermitUncheckedError();
+  }
+  const uint64_t current_bytes = builder_->FileSize();
+  if (s.ok()) {
+    meta->fd.file_size = current_bytes;
+    meta->tail_size = builder_->GetTailSize();
+    meta->marked_for_compaction = builder_->NeedCompact();
+  }
+  current_output().finished = true;
+  stats_.bytes_written += current_bytes;
+  stats_.num_output_files = outputs_.size();
+
+  return s;
+}
+
+IOStatus CompactionOutputs::WriterSyncClose(const Status& input_status,
+                                            SystemClock* clock,
+                                            Statistics* statistics,
+                                            bool use_fsync) {
+  IOStatus io_s;
+  if (input_status.ok()) {
+    StopWatch sw(clock, statistics, COMPACTION_OUTFILE_SYNC_MICROS);
+    io_s = file_writer_->Sync(use_fsync);
+  }
+  if (input_status.ok() && io_s.ok()) {
+    io_s = file_writer_->Close();
+  }
+
+  if (input_status.ok() && io_s.ok()) {
+    FileMetaData* meta = GetMetaData();
+    meta->file_checksum = file_writer_->GetFileChecksum();
+    meta->file_checksum_func_name = file_writer_->GetFileChecksumFuncName();
+  }
+
+  file_writer_.reset();
+
+  return io_s;
+}
+
+bool CompactionOutputs::UpdateFilesToCutForTTLStates(
+    const Slice& internal_key) {
+  if (!files_to_cut_for_ttl_.empty()) {
+    const InternalKeyComparator* icmp =
+        &compaction_->column_family_data()->internal_comparator();
+    if (cur_files_to_cut_for_ttl_ != -1) {
+      // Previous key is inside the range of a file
+      if (icmp->Compare(internal_key,
+                        files_to_cut_for_ttl_[cur_files_to_cut_for_ttl_]
+                            ->largest.Encode()) > 0) {
+        next_files_to_cut_for_ttl_ = cur_files_to_cut_for_ttl_ + 1;
+        cur_files_to_cut_for_ttl_ = -1;
+        return true;
+      }
+    } else {
+      // Look for the key position
+      while (next_files_to_cut_for_ttl_ <
+             static_cast<int>(files_to_cut_for_ttl_.size())) {
+        if (icmp->Compare(internal_key,
+                          files_to_cut_for_ttl_[next_files_to_cut_for_ttl_]
+                              ->smallest.Encode()) >= 0) {
+          if (icmp->Compare(internal_key,
+                            files_to_cut_for_ttl_[next_files_to_cut_for_ttl_]
+                                ->largest.Encode()) <= 0) {
+            // With in the current file
+            cur_files_to_cut_for_ttl_ = next_files_to_cut_for_ttl_;
+            return true;
+          }
+          // Beyond the current file
+          next_files_to_cut_for_ttl_++;
+        } else {
+          // Still fall into the gap
+          break;
+        }
+      }
+    }
+  }
+  return false;
+}
+
+size_t CompactionOutputs::UpdateGrandparentBoundaryInfo(
+    const Slice& internal_key) {
+  size_t curr_key_boundary_switched_num = 0;
+  const std::vector<FileMetaData*>& grandparents = compaction_->grandparents();
+
+  if (grandparents.empty()) {
+    return curr_key_boundary_switched_num;
+  }
+  assert(!internal_key.empty());
+  InternalKey ikey;
+  ikey.DecodeFrom(internal_key);
+  assert(ikey.Valid());
+
+  const Comparator* ucmp = compaction_->column_family_data()->user_comparator();
+
+  // Move the grandparent_index_ to the file containing the current user_key.
+  // If there are multiple files containing the same user_key, make sure the
+  // index points to the last file containing the key.
+  while (grandparent_index_ < grandparents.size()) {
+    if (being_grandparent_gap_) {
+      if (sstableKeyCompare(ucmp, ikey,
+                            grandparents[grandparent_index_]->smallest) < 0) {
+        break;
+      }
+      if (seen_key_) {
+        curr_key_boundary_switched_num++;
+        grandparent_overlapped_bytes_ +=
+            grandparents[grandparent_index_]->fd.GetFileSize();
+        grandparent_boundary_switched_num_++;
+      }
+      being_grandparent_gap_ = false;
+    } else {
+      int cmp_result = sstableKeyCompare(
+          ucmp, ikey, grandparents[grandparent_index_]->largest);
+      // If it's same key, make sure grandparent_index_ is pointing to the last
+      // one.
+      if (cmp_result < 0 ||
+          (cmp_result == 0 &&
+           (grandparent_index_ == grandparents.size() - 1 ||
+            sstableKeyCompare(ucmp, ikey,
+                              grandparents[grandparent_index_ + 1]->smallest) <
+                0))) {
+        break;
+      }
+      if (seen_key_) {
+        curr_key_boundary_switched_num++;
+        grandparent_boundary_switched_num_++;
+      }
+      being_grandparent_gap_ = true;
+      grandparent_index_++;
+    }
+  }
+
+  // If the first key is in the middle of a grandparent file, adding it to the
+  // overlap
+  if (!seen_key_ && !being_grandparent_gap_) {
+    assert(grandparent_overlapped_bytes_ == 0);
+    grandparent_overlapped_bytes_ =
+        GetCurrentKeyGrandparentOverlappedBytes(internal_key);
+  }
+
+  seen_key_ = true;
+  return curr_key_boundary_switched_num;
+}
+
+uint64_t CompactionOutputs::GetCurrentKeyGrandparentOverlappedBytes(
+    const Slice& internal_key) const {
+  // no overlap with any grandparent file
+  if (being_grandparent_gap_) {
+    return 0;
+  }
+  uint64_t overlapped_bytes = 0;
+
+  const std::vector<FileMetaData*>& grandparents = compaction_->grandparents();
+  const Comparator* ucmp = compaction_->column_family_data()->user_comparator();
+  InternalKey ikey;
+  ikey.DecodeFrom(internal_key);
+#ifndef NDEBUG
+  // make sure the grandparent_index_ is pointing to the last files containing
+  // the current key.
+  int cmp_result =
+      sstableKeyCompare(ucmp, ikey, grandparents[grandparent_index_]->largest);
+  assert(
+      cmp_result < 0 ||
+      (cmp_result == 0 &&
+       (grandparent_index_ == grandparents.size() - 1 ||
+        sstableKeyCompare(
+            ucmp, ikey, grandparents[grandparent_index_ + 1]->smallest) < 0)));
+  assert(sstableKeyCompare(ucmp, ikey,
+                           grandparents[grandparent_index_]->smallest) >= 0);
+#endif
+  overlapped_bytes += grandparents[grandparent_index_]->fd.GetFileSize();
+
+  // go backwards to find all overlapped files, one key can overlap multiple
+  // files. In the following example, if the current output key is `c`, and one
+  // compaction file was cut before `c`, current `c` can overlap with 3 files:
+  //  [a b]               [c...
+  // [b, b] [c, c] [c, c] [c, d]
+  for (int64_t i = static_cast<int64_t>(grandparent_index_) - 1;
+       i >= 0 && sstableKeyCompare(ucmp, ikey, grandparents[i]->largest) == 0;
+       i--) {
+    overlapped_bytes += grandparents[i]->fd.GetFileSize();
+  }
+
+  return overlapped_bytes;
+}
+
+bool CompactionOutputs::ShouldStopBefore(const CompactionIterator& c_iter) {
+  assert(c_iter.Valid());
+  const Slice& internal_key = c_iter.key();
+#ifndef NDEBUG
+  bool should_stop = false;
+  std::pair<bool*, const Slice> p{&should_stop, internal_key};
+  TEST_SYNC_POINT_CALLBACK(
+      "CompactionOutputs::ShouldStopBefore::manual_decision", (void*)&p);
+  if (should_stop) {
+    return true;
+  }
+#endif  // NDEBUG
+  const uint64_t previous_overlapped_bytes = grandparent_overlapped_bytes_;
+  const InternalKeyComparator* icmp =
+      &compaction_->column_family_data()->internal_comparator();
+  size_t num_grandparent_boundaries_crossed = 0;
+  bool should_stop_for_ttl = false;
+  // Always update grandparent information like overlapped file number, size
+  // etc., and TTL states.
+  // If compaction_->output_level() == 0, there is no need to update grandparent
+  // info, and that `grandparent` should be empty.
+  if (compaction_->output_level() > 0) {
+    num_grandparent_boundaries_crossed =
+        UpdateGrandparentBoundaryInfo(internal_key);
+    should_stop_for_ttl = UpdateFilesToCutForTTLStates(internal_key);
+  }
+
+  if (!HasBuilder()) {
+    return false;
+  }
+
+  if (should_stop_for_ttl) {
+    return true;
+  }
+
+  // If there's user defined partitioner, check that first
+  if (partitioner_ && partitioner_->ShouldPartition(PartitionerRequest(
+                          last_key_for_partitioner_, c_iter.user_key(),
+                          current_output_file_size_)) == kRequired) {
+    return true;
+  }
+
+  // files output to Level 0 won't be split
+  if (compaction_->output_level() == 0) {
+    return false;
+  }
+
+  // reach the max file size
+  if (current_output_file_size_ >= compaction_->max_output_file_size()) {
+    return true;
+  }
+
+  // Check if it needs to split for RoundRobin
+  // Invalid local_output_split_key indicates that we do not need to split
+  if (local_output_split_key_ != nullptr && !is_split_) {
+    // Split occurs when the next key is larger than/equal to the cursor
+    if (icmp->Compare(internal_key, local_output_split_key_->Encode()) >= 0) {
+      is_split_ = true;
+      return true;
+    }
+  }
+
+  // only check if the current key is going to cross the grandparents file
+  // boundary (either the file beginning or ending).
+  if (num_grandparent_boundaries_crossed > 0) {
+    // Cut the file before the current key if the size of the current output
+    // file + its overlapped grandparent files is bigger than
+    // max_compaction_bytes. Which is to prevent future bigger than
+    // max_compaction_bytes compaction from the current output level.
+    if (grandparent_overlapped_bytes_ + current_output_file_size_ >
+        compaction_->max_compaction_bytes()) {
+      return true;
+    }
+
+    // Cut the file if including the key is going to add a skippable file on
+    // the grandparent level AND its size is reasonably big (1/8 of target file
+    // size). For example, if it's compacting the files L0 + L1:
+    //  L0:  [1,   21]
+    //  L1:    [3,   23]
+    //  L2: [2, 4] [11, 15] [22, 24]
+    // Without this break, it will output as:
+    //  L1: [1,3, 21,23]
+    // With this break, it will output as (assuming [11, 15] at L2 is bigger
+    // than 1/8 of target size):
+    //  L1: [1,3] [21,23]
+    // Then for the future compactions, [11,15] won't be included.
+    // For random datasets (either evenly distributed or skewed), it rarely
+    // triggers this condition, but if the user is adding 2 different datasets
+    // without any overlap, it may likely happen.
+    // More details, check PR #1963
+    const size_t num_skippable_boundaries_crossed =
+        being_grandparent_gap_ ? 2 : 3;
+    if (compaction_->immutable_options()->compaction_style ==
+            kCompactionStyleLevel &&
+        compaction_->immutable_options()->level_compaction_dynamic_file_size &&
+        num_grandparent_boundaries_crossed >=
+            num_skippable_boundaries_crossed &&
+        grandparent_overlapped_bytes_ - previous_overlapped_bytes >
+            compaction_->target_output_file_size() / 8) {
+      return true;
+    }
+
+    // Pre-cut the output file if it's reaching a certain size AND it's at the
+    // boundary of a grandparent file. It can reduce the future compaction size,
+    // the cost is having smaller files.
+    // The pre-cut size threshold is based on how many grandparent boundaries
+    // it has seen before. Basically, if it has seen no boundary at all, then it
+    // will pre-cut at 50% target file size. Every boundary it has seen
+    // increases the threshold by 5%, max at 90%, which it will always cut.
+    // The idea is based on if it has seen more boundaries before, it will more
+    // likely to see another boundary (file cutting opportunity) before the
+    // target file size. The test shows it can generate larger files than a
+    // static threshold like 75% and has a similar write amplification
+    // improvement.
+    if (compaction_->immutable_options()->compaction_style ==
+            kCompactionStyleLevel &&
+        compaction_->immutable_options()->level_compaction_dynamic_file_size &&
+        current_output_file_size_ >=
+            ((compaction_->target_output_file_size() + 99) / 100) *
+                (50 + std::min(grandparent_boundary_switched_num_ * 5,
+                               size_t{40}))) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+Status CompactionOutputs::AddToOutput(
+    const CompactionIterator& c_iter,
+    const CompactionFileOpenFunc& open_file_func,
+    const CompactionFileCloseFunc& close_file_func) {
+  Status s;
+  bool is_range_del = c_iter.IsDeleteRangeSentinelKey();
+  if (is_range_del && compaction_->bottommost_level()) {
+    // We don't consider range tombstone for bottommost level since:
+    // 1. there is no grandparent and hence no overlap to consider
+    // 2. range tombstone may be dropped at bottommost level.
+    return s;
+  }
+  const Slice& key = c_iter.key();
+  if (ShouldStopBefore(c_iter) && HasBuilder()) {
+    s = close_file_func(*this, c_iter.InputStatus(), key);
+    if (!s.ok()) {
+      return s;
+    }
+    // reset grandparent information
+    grandparent_boundary_switched_num_ = 0;
+    grandparent_overlapped_bytes_ =
+        GetCurrentKeyGrandparentOverlappedBytes(key);
+    if (UNLIKELY(is_range_del)) {
+      // lower bound for this new output file, this is needed as the lower bound
+      // does not come from the smallest point key in this case.
+      range_tombstone_lower_bound_.DecodeFrom(key);
+    } else {
+      range_tombstone_lower_bound_.Clear();
+    }
+  }
+
+  // Open output file if necessary
+  if (!HasBuilder()) {
+    s = open_file_func(*this);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  // c_iter may emit range deletion keys, so update `last_key_for_partitioner_`
+  // here before returning below when `is_range_del` is true
+  if (partitioner_) {
+    last_key_for_partitioner_.assign(c_iter.user_key().data_,
+                                     c_iter.user_key().size_);
+  }
+
+  if (UNLIKELY(is_range_del)) {
+    return s;
+  }
+
+  assert(builder_ != nullptr);
+  const Slice& value = c_iter.value();
+  s = current_output().validator.Add(key, value);
+  if (!s.ok()) {
+    return s;
+  }
+  builder_->Add(key, value);
+
+  stats_.num_output_records++;
+  current_output_file_size_ = builder_->EstimatedFileSize();
+
+  if (blob_garbage_meter_) {
+    s = blob_garbage_meter_->ProcessOutFlow(key, value);
+  }
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  const ParsedInternalKey& ikey = c_iter.ikey();
+  s = current_output().meta.UpdateBoundaries(key, value, ikey.sequence,
+                                             ikey.type);
+
+  return s;
+}
+
+namespace {
+void SetMaxSeqAndTs(InternalKey& internal_key, const Slice& user_key,
+                    const size_t ts_sz) {
+  if (ts_sz) {
+    static constexpr char kTsMax[] = "\xff\xff\xff\xff\xff\xff\xff\xff\xff";
+    if (ts_sz <= strlen(kTsMax)) {
+      internal_key = InternalKey(user_key, kMaxSequenceNumber,
+                                 kTypeRangeDeletion, Slice(kTsMax, ts_sz));
+    } else {
+      internal_key =
+          InternalKey(user_key, kMaxSequenceNumber, kTypeRangeDeletion,
+                      std::string(ts_sz, '\xff'));
+    }
+  } else {
+    internal_key.Set(user_key, kMaxSequenceNumber, kTypeRangeDeletion);
+  }
+}
+}  // namespace
+
+Status CompactionOutputs::AddRangeDels(
+    const Slice* comp_start_user_key, const Slice* comp_end_user_key,
+    CompactionIterationStats& range_del_out_stats, bool bottommost_level,
+    const InternalKeyComparator& icmp, SequenceNumber earliest_snapshot,
+    const Slice& next_table_min_key, const std::string& full_history_ts_low) {
+  // The following example does not happen since
+  // CompactionOutput::ShouldStopBefore() always return false for the first
+  // point key. But we should consider removing this dependency. Suppose for the
+  // first compaction output file,
+  //  - next_table_min_key.user_key == comp_start_user_key
+  //  - no point key is in the output file
+  //  - there is a range tombstone @seqno to be added that covers
+  //  comp_start_user_key
+  // Then meta.smallest will be set to comp_start_user_key@seqno
+  // and meta.largest will be set to comp_start_user_key@kMaxSequenceNumber
+  // which violates the assumption that meta.smallest should be <= meta.largest.
+  assert(HasRangeDel());
+  FileMetaData& meta = current_output().meta;
+  const Comparator* ucmp = icmp.user_comparator();
+  InternalKey lower_bound_buf, upper_bound_buf;
+  Slice lower_bound_guard, upper_bound_guard;
+  std::string smallest_user_key;
+  const Slice *lower_bound, *upper_bound;
+
+  // We first determine the internal key lower_bound and upper_bound for
+  // this output file. All and only range tombstones that overlap with
+  // [lower_bound, upper_bound] should be added to this file. File
+  // boundaries (meta.smallest/largest) should be updated accordingly when
+  // extended by range tombstones.
+  size_t output_size = outputs_.size();
+  if (output_size == 1) {
+    // This is the first file in the subcompaction.
+    //
+    // When outputting a range tombstone that spans a subcompaction boundary,
+    // the files on either side of that boundary need to include that
+    // boundary's user key. Otherwise, the spanning range tombstone would lose
+    // coverage.
+    //
+    // To achieve this while preventing files from overlapping in internal key
+    // (an LSM invariant violation), we allow the earlier file to include the
+    // boundary user key up to `kMaxSequenceNumber,kTypeRangeDeletion`. The
+    // later file can begin at the boundary user key at the newest key version
+    // it contains. At this point that version number is unknown since we have
+    // not processed the range tombstones yet, so permit any version. Same story
+    // applies to timestamp, and a non-nullptr `comp_start_user_key` should have
+    // `kMaxTs` here, which similarly permits any timestamp.
+    if (comp_start_user_key) {
+      lower_bound_buf.Set(*comp_start_user_key, kMaxSequenceNumber,
+                          kTypeRangeDeletion);
+      lower_bound_guard = lower_bound_buf.Encode();
+      lower_bound = &lower_bound_guard;
+    } else {
+      lower_bound = nullptr;
+    }
+  } else {
+    // For subsequent output tables, only include range tombstones from min
+    // key onwards since the previous file was extended to contain range
+    // tombstones falling before min key.
+    if (range_tombstone_lower_bound_.size() > 0) {
+      assert(meta.smallest.size() == 0 ||
+             icmp.Compare(range_tombstone_lower_bound_, meta.smallest) < 0);
+      lower_bound_guard = range_tombstone_lower_bound_.Encode();
+    } else {
+      assert(meta.smallest.size() > 0);
+      lower_bound_guard = meta.smallest.Encode();
+    }
+    lower_bound = &lower_bound_guard;
+  }
+
+  const size_t ts_sz = ucmp->timestamp_size();
+  if (next_table_min_key.empty()) {
+    // Last file of the subcompaction.
+    if (comp_end_user_key) {
+      upper_bound_buf.Set(*comp_end_user_key, kMaxSequenceNumber,
+                          kTypeRangeDeletion);
+      upper_bound_guard = upper_bound_buf.Encode();
+      upper_bound = &upper_bound_guard;
+    } else {
+      upper_bound = nullptr;
+    }
+  } else {
+    // There is another file coming whose coverage will begin at
+    // `next_table_min_key`. The current file needs to extend range tombstone
+    // coverage through its own keys (through `meta.largest`) and through user
+    // keys preceding `next_table_min_key`'s user key.
+    ParsedInternalKey next_table_min_key_parsed;
+    ParseInternalKey(next_table_min_key, &next_table_min_key_parsed,
+                     false /* log_err_key */)
+        .PermitUncheckedError();
+    assert(next_table_min_key_parsed.sequence < kMaxSequenceNumber);
+    assert(meta.largest.size() == 0 ||
+           icmp.Compare(meta.largest.Encode(), next_table_min_key) < 0);
+    assert(!lower_bound || icmp.Compare(*lower_bound, next_table_min_key) <= 0);
+    if (meta.largest.size() > 0 &&
+        ucmp->EqualWithoutTimestamp(meta.largest.user_key(),
+                                    next_table_min_key_parsed.user_key)) {
+      // Caution: this assumes meta.largest.Encode() lives longer than
+      // upper_bound, which is only true if meta.largest is never updated.
+      // This just happens to be the case here since meta.largest serves
+      // as the upper_bound.
+      upper_bound_guard = meta.largest.Encode();
+    } else {
+      SetMaxSeqAndTs(upper_bound_buf, next_table_min_key_parsed.user_key,
+                     ts_sz);
+      upper_bound_guard = upper_bound_buf.Encode();
+    }
+    upper_bound = &upper_bound_guard;
+  }
+  if (lower_bound && upper_bound &&
+      icmp.Compare(*lower_bound, *upper_bound) > 0) {
+    assert(meta.smallest.size() == 0 &&
+           ucmp->EqualWithoutTimestamp(ExtractUserKey(*lower_bound),
+                                       ExtractUserKey(*upper_bound)));
+    // This can only happen when lower_bound have the same user key as
+    // next_table_min_key and that there is no point key in the current
+    // compaction output file.
+    return Status::OK();
+  }
+  // The end key of the subcompaction must be bigger or equal to the upper
+  // bound. If the end of subcompaction is null or the upper bound is null,
+  // it means that this file is the last file in the compaction. So there
+  // will be no overlapping between this file and others.
+  assert(comp_end_user_key == nullptr || upper_bound == nullptr ||
+         ucmp->CompareWithoutTimestamp(ExtractUserKey(*upper_bound),
+                                       *comp_end_user_key) <= 0);
+  auto it = range_del_agg_->NewIterator(lower_bound, upper_bound);
+  Slice last_tombstone_start_user_key{};
+  bool reached_lower_bound = false;
+  const ReadOptions read_options(Env::IOActivity::kCompaction);
+  for (it->SeekToFirst(); it->Valid(); it->Next()) {
+    auto tombstone = it->Tombstone();
+    auto kv = tombstone.Serialize();
+    InternalKey tombstone_end = tombstone.SerializeEndKey();
+    // TODO: the underlying iterator should support clamping the bounds.
+    // tombstone_end.Encode is of form user_key@kMaxSeqno
+    // if it is equal to lower_bound, there is no need to include
+    // such range tombstone.
+    if (!reached_lower_bound && lower_bound &&
+        icmp.Compare(tombstone_end.Encode(), *lower_bound) <= 0) {
+      continue;
+    }
+    assert(!lower_bound ||
+           icmp.Compare(*lower_bound, tombstone_end.Encode()) <= 0);
+    reached_lower_bound = true;
+
+    // Garbage collection for range tombstones.
+    // If user-defined timestamp is enabled, range tombstones are dropped if
+    // they are at bottommost_level, below full_history_ts_low and not visible
+    // in any snapshot. trim_ts_ is passed to the constructor for
+    // range_del_agg_, and range_del_agg_ internally drops tombstones above
+    // trim_ts_.
+    if (bottommost_level && tombstone.seq_ <= earliest_snapshot &&
+        (ts_sz == 0 ||
+         (!full_history_ts_low.empty() &&
+          ucmp->CompareTimestamp(tombstone.ts_, full_history_ts_low) < 0))) {
+      // TODO(andrewkr): tombstones that span multiple output files are
+      // counted for each compaction output file, so lots of double
+      // counting.
+      range_del_out_stats.num_range_del_drop_obsolete++;
+      range_del_out_stats.num_record_drop_obsolete++;
+      continue;
+    }
+
+    assert(lower_bound == nullptr ||
+           ucmp->CompareWithoutTimestamp(ExtractUserKey(*lower_bound),
+                                         kv.second) < 0);
+    InternalKey tombstone_start = kv.first;
+    if (lower_bound &&
+        ucmp->CompareWithoutTimestamp(tombstone_start.user_key(),
+                                      ExtractUserKey(*lower_bound)) < 0) {
+      // This just updates the non-timestamp portion of `tombstone_start`'s user
+      // key. Ideally there would be a simpler API usage
+      ParsedInternalKey tombstone_start_parsed;
+      ParseInternalKey(tombstone_start.Encode(), &tombstone_start_parsed,
+                       false /* log_err_key */)
+          .PermitUncheckedError();
+      // timestamp should be from where sequence number is from, which is from
+      // tombstone in this case
+      std::string ts =
+          tombstone_start_parsed.GetTimestamp(ucmp->timestamp_size())
+              .ToString();
+      tombstone_start_parsed.user_key = ExtractUserKey(*lower_bound);
+      tombstone_start.SetFrom(tombstone_start_parsed, ts);
+    }
+    if (upper_bound != nullptr &&
+        icmp.Compare(*upper_bound, tombstone_start.Encode()) < 0) {
+      break;
+    }
+    // Here we show that *only* range tombstones that overlap with
+    // [lower_bound, upper_bound] are added to the current file, and
+    // sanity checking invariants that should hold:
+    // - [tombstone_start, tombstone_end] overlaps with [lower_bound,
+    // upper_bound]
+    // - meta.smallest <= meta.largest
+    // Corresponding assertions are made, the proof is broken is any of them
+    // fails.
+    // TODO: show that *all* range tombstones that overlap with
+    //  [lower_bound, upper_bound] are added.
+    // TODO: some invariant about boundaries are correctly updated.
+    //
+    // Note that `tombstone_start` is updated in the if condition above, we use
+    // tombstone_start to refer to its initial value, i.e.,
+    // it->Tombstone().first, and use tombstone_start* to refer to its value
+    // after the update.
+    //
+    // To show [lower_bound, upper_bound] overlaps with [tombstone_start,
+    // tombstone_end]:
+    // lower_bound <= upper_bound from the if condition right after all
+    // bounds are initialized. We assume each tombstone fragment has
+    // start_key.user_key < end_key.user_key, so
+    // tombstone_start < tombstone_end by
+    // FragmentedTombstoneIterator::Tombstone(). So these two ranges are both
+    // non-emtpy. The flag `reached_lower_bound` and the if logic before it
+    // ensures lower_bound <= tombstone_end. tombstone_start is only updated
+    // if it has a smaller user_key than lower_bound user_key, so
+    // tombstone_start <= tombstone_start*. The above if condition implies
+    // tombstone_start* <= upper_bound. So we have
+    // tombstone_start <= upper_bound and lower_bound <= tombstone_end
+    // and the two ranges overlap.
+    //
+    // To show meta.smallest <= meta.largest:
+    // From the implementation of UpdateBoundariesForRange(), it suffices to
+    // prove that when it is first called in this function, its parameters
+    // satisfy `start <= end`, where start = max(tombstone_start*, lower_bound)
+    // and end = min(tombstone_end, upper_bound). From the above proof we have
+    // lower_bound <= tombstone_end and lower_bound <= upper_bound. We only need
+    // to show that tombstone_start* <= min(tombstone_end, upper_bound).
+    // Note that tombstone_start*.user_key = max(tombstone_start.user_key,
+    // lower_bound.user_key). Assuming tombstone_end always has
+    // kMaxSequenceNumber and lower_bound.seqno < kMaxSequenceNumber.
+    // Since lower_bound <= tombstone_end and lower_bound.seqno <
+    // tombstone_end.seqno (in absolute number order, not internal key order),
+    // lower_bound.user_key < tombstone_end.user_key.
+    // Since lower_bound.user_key < tombstone_end.user_key and
+    // tombstone_start.user_key < tombstone_end.user_key, tombstone_start* <
+    // tombstone_end. Since tombstone_start* <= upper_bound from the above proof
+    // and tombstone_start* < tombstone_end, tombstone_start* <=
+    // min(tombstone_end, upper_bound), so the two ranges overlap.
+
+    // Range tombstone is not supported by output validator yet.
+    builder_->Add(kv.first.Encode(), kv.second);
+    if (lower_bound &&
+        icmp.Compare(tombstone_start.Encode(), *lower_bound) < 0) {
+      tombstone_start.DecodeFrom(*lower_bound);
+    }
+    if (upper_bound && icmp.Compare(*upper_bound, tombstone_end.Encode()) < 0) {
+      tombstone_end.DecodeFrom(*upper_bound);
+    }
+    assert(icmp.Compare(tombstone_start, tombstone_end) <= 0);
+    meta.UpdateBoundariesForRange(tombstone_start, tombstone_end,
+                                  tombstone.seq_, icmp);
+    if (!bottommost_level) {
+      bool start_user_key_changed =
+          last_tombstone_start_user_key.empty() ||
+          ucmp->CompareWithoutTimestamp(last_tombstone_start_user_key,
+                                        it->start_key()) < 0;
+      last_tombstone_start_user_key = it->start_key();
+      if (start_user_key_changed) {
+        // If tombstone_start >= tombstone_end, then either no key range is
+        // covered, or that they have the same user key. If they have the same
+        // user key, then the internal key range should only be within this
+        // level, and no keys from older levels is covered.
+        if (ucmp->CompareWithoutTimestamp(tombstone_start.user_key(),
+                                          tombstone_end.user_key()) < 0) {
+          SizeApproximationOptions approx_opts;
+          approx_opts.files_size_error_margin = 0.1;
+          auto approximate_covered_size =
+              compaction_->input_version()->version_set()->ApproximateSize(
+                  approx_opts, read_options, compaction_->input_version(),
+                  tombstone_start.Encode(), tombstone_end.Encode(),
+                  compaction_->output_level() + 1 /* start_level */,
+                  -1 /* end_level */, kCompaction);
+          meta.compensated_range_deletion_size += approximate_covered_size;
+        }
+      }
+    }
+  }
+  return Status::OK();
+}
+
+void CompactionOutputs::FillFilesToCutForTtl() {
+  if (compaction_->immutable_options()->compaction_style !=
+          kCompactionStyleLevel ||
+      compaction_->immutable_options()->compaction_pri !=
+          kMinOverlappingRatio ||
+      compaction_->mutable_cf_options()->ttl == 0 ||
+      compaction_->num_input_levels() < 2 || compaction_->bottommost_level()) {
+    return;
+  }
+
+  // We define new file with the oldest ancestor time to be younger than 1/4
+  // TTL, and an old one to be older than 1/2 TTL time.
+  int64_t temp_current_time;
+  auto get_time_status =
+      compaction_->immutable_options()->clock->GetCurrentTime(
+          &temp_current_time);
+  if (!get_time_status.ok()) {
+    return;
+  }
+
+  auto current_time = static_cast<uint64_t>(temp_current_time);
+  if (current_time < compaction_->mutable_cf_options()->ttl) {
+    return;
+  }
+
+  uint64_t old_age_thres =
+      current_time - compaction_->mutable_cf_options()->ttl / 2;
+  const std::vector<FileMetaData*>& olevel =
+      *(compaction_->inputs(compaction_->num_input_levels() - 1));
+  for (FileMetaData* file : olevel) {
+    // Worth filtering out by start and end?
+    uint64_t oldest_ancester_time = file->TryGetOldestAncesterTime();
+    // We put old files if they are not too small to prevent a flood
+    // of small files.
+    if (oldest_ancester_time < old_age_thres &&
+        file->fd.GetFileSize() >
+            compaction_->mutable_cf_options()->target_file_size_base / 2) {
+      files_to_cut_for_ttl_.push_back(file);
+    }
+  }
+}
+
+CompactionOutputs::CompactionOutputs(const Compaction* compaction,
+                                     const bool is_penultimate_level)
+    : compaction_(compaction), is_penultimate_level_(is_penultimate_level) {
+  partitioner_ = compaction->output_level() == 0
+                     ? nullptr
+                     : compaction->CreateSstPartitioner();
+
+  if (compaction->output_level() != 0) {
+    FillFilesToCutForTtl();
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_outputs.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_outputs.h
new file mode 100644
index 0000000000..1b65ee662f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_outputs.h
@@ -0,0 +1,403 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "db/blob/blob_garbage_meter.h"
+#include "db/compaction/compaction.h"
+#include "db/compaction/compaction_iterator.h"
+#include "db/internal_stats.h"
+#include "db/output_validator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class CompactionOutputs;
+using CompactionFileOpenFunc = std::function<Status(CompactionOutputs&)>;
+using CompactionFileCloseFunc =
+    std::function<Status(CompactionOutputs&, const Status&, const Slice&)>;
+
+// Files produced by subcompaction, most of the functions are used by
+// compaction_job Open/Close compaction file functions.
+class CompactionOutputs {
+ public:
+  // compaction output file
+  struct Output {
+    Output(FileMetaData&& _meta, const InternalKeyComparator& _icmp,
+           bool _enable_order_check, bool _enable_hash, bool _finished,
+           uint64_t precalculated_hash)
+        : meta(std::move(_meta)),
+          validator(_icmp, _enable_order_check, _enable_hash,
+                    precalculated_hash),
+          finished(_finished) {}
+    FileMetaData meta;
+    OutputValidator validator;
+    bool finished;
+    std::shared_ptr<const TableProperties> table_properties;
+  };
+
+  CompactionOutputs() = delete;
+
+  explicit CompactionOutputs(const Compaction* compaction,
+                             const bool is_penultimate_level);
+
+  // Add generated output to the list
+  void AddOutput(FileMetaData&& meta, const InternalKeyComparator& icmp,
+                 bool enable_order_check, bool enable_hash,
+                 bool finished = false, uint64_t precalculated_hash = 0) {
+    outputs_.emplace_back(std::move(meta), icmp, enable_order_check,
+                          enable_hash, finished, precalculated_hash);
+  }
+
+  // Set new table builder for the current output
+  void NewBuilder(const TableBuilderOptions& tboptions);
+
+  // Assign a new WritableFileWriter to the current output
+  void AssignFileWriter(WritableFileWriter* writer) {
+    file_writer_.reset(writer);
+  }
+
+  // TODO: Remove it when remote compaction support tiered compaction
+  void SetTotalBytes(uint64_t bytes) { stats_.bytes_written += bytes; }
+  void SetNumOutputRecords(uint64_t num) { stats_.num_output_records = num; }
+
+  // TODO: Move the BlobDB builder into CompactionOutputs
+  const std::vector<BlobFileAddition>& GetBlobFileAdditions() const {
+    if (is_penultimate_level_) {
+      assert(blob_file_additions_.empty());
+    }
+    return blob_file_additions_;
+  }
+
+  std::vector<BlobFileAddition>* GetBlobFileAdditionsPtr() {
+    assert(!is_penultimate_level_);
+    return &blob_file_additions_;
+  }
+
+  bool HasBlobFileAdditions() const { return !blob_file_additions_.empty(); }
+
+  BlobGarbageMeter* CreateBlobGarbageMeter() {
+    assert(!is_penultimate_level_);
+    blob_garbage_meter_ = std::make_unique<BlobGarbageMeter>();
+    return blob_garbage_meter_.get();
+  }
+
+  BlobGarbageMeter* GetBlobGarbageMeter() const {
+    if (is_penultimate_level_) {
+      // blobdb doesn't support per_key_placement yet
+      assert(blob_garbage_meter_ == nullptr);
+      return nullptr;
+    }
+    return blob_garbage_meter_.get();
+  }
+
+  void UpdateBlobStats() {
+    assert(!is_penultimate_level_);
+    stats_.num_output_files_blob = blob_file_additions_.size();
+    for (const auto& blob : blob_file_additions_) {
+      stats_.bytes_written_blob += blob.GetTotalBlobBytes();
+    }
+  }
+
+  // Finish the current output file
+  Status Finish(const Status& intput_status,
+                const SeqnoToTimeMapping& seqno_time_mapping);
+
+  // Update output table properties from table builder
+  void UpdateTableProperties() {
+    current_output().table_properties =
+        std::make_shared<TableProperties>(GetTableProperties());
+  }
+
+  IOStatus WriterSyncClose(const Status& intput_status, SystemClock* clock,
+                           Statistics* statistics, bool use_fsync);
+
+  TableProperties GetTableProperties() {
+    return builder_->GetTableProperties();
+  }
+
+  Slice SmallestUserKey() const {
+    if (!outputs_.empty() && outputs_[0].finished) {
+      return outputs_[0].meta.smallest.user_key();
+    } else {
+      return Slice{nullptr, 0};
+    }
+  }
+
+  Slice LargestUserKey() const {
+    if (!outputs_.empty() && outputs_.back().finished) {
+      return outputs_.back().meta.largest.user_key();
+    } else {
+      return Slice{nullptr, 0};
+    }
+  }
+
+  // In case the last output file is empty, which doesn't need to keep.
+  void RemoveLastEmptyOutput() {
+    if (!outputs_.empty() && !outputs_.back().meta.fd.file_size) {
+      // An error occurred, so ignore the last output.
+      outputs_.pop_back();
+    }
+  }
+
+  // Remove the last output, for example the last output doesn't have data (no
+  // entry and no range-dels), but file_size might not be 0, as it has SST
+  // metadata.
+  void RemoveLastOutput() {
+    assert(!outputs_.empty());
+    outputs_.pop_back();
+  }
+
+  bool HasBuilder() const { return builder_ != nullptr; }
+
+  FileMetaData* GetMetaData() { return &current_output().meta; }
+
+  bool HasOutput() const { return !outputs_.empty(); }
+
+  uint64_t NumEntries() const { return builder_->NumEntries(); }
+
+  void ResetBuilder() {
+    builder_.reset();
+    current_output_file_size_ = 0;
+  }
+
+  // Add range deletions from the range_del_agg_ to the current output file.
+  // Input parameters, `range_tombstone_lower_bound_` and current output's
+  // metadata determine the bounds on range deletions to add. Updates output
+  // file metadata boundary if extended by range tombstones.
+  //
+  // @param comp_start_user_key and comp_end_user_key include timestamp if
+  // user-defined timestamp is enabled. Their timestamp should be max timestamp.
+  // @param next_table_min_key internal key lower bound for the next compaction
+  // output.
+  // @param full_history_ts_low used for range tombstone garbage collection.
+  Status AddRangeDels(const Slice* comp_start_user_key,
+                      const Slice* comp_end_user_key,
+                      CompactionIterationStats& range_del_out_stats,
+                      bool bottommost_level, const InternalKeyComparator& icmp,
+                      SequenceNumber earliest_snapshot,
+                      const Slice& next_table_min_key,
+                      const std::string& full_history_ts_low);
+
+  // if the outputs have range delete, range delete is also data
+  bool HasRangeDel() const {
+    return range_del_agg_ && !range_del_agg_->IsEmpty();
+  }
+
+ private:
+  friend class SubcompactionState;
+
+  void FillFilesToCutForTtl();
+
+  void SetOutputSlitKey(const std::optional<Slice> start,
+                        const std::optional<Slice> end) {
+    const InternalKeyComparator* icmp =
+        &compaction_->column_family_data()->internal_comparator();
+
+    const InternalKey* output_split_key = compaction_->GetOutputSplitKey();
+    // Invalid output_split_key indicates that we do not need to split
+    if (output_split_key != nullptr) {
+      // We may only split the output when the cursor is in the range. Split
+      if ((!end.has_value() ||
+           icmp->user_comparator()->Compare(
+               ExtractUserKey(output_split_key->Encode()), end.value()) < 0) &&
+          (!start.has_value() || icmp->user_comparator()->Compare(
+                                     ExtractUserKey(output_split_key->Encode()),
+                                     start.value()) > 0)) {
+        local_output_split_key_ = output_split_key;
+      }
+    }
+  }
+
+  // Returns true iff we should stop building the current output
+  // before processing the current key in compaction iterator.
+  bool ShouldStopBefore(const CompactionIterator& c_iter);
+
+  void Cleanup() {
+    if (builder_ != nullptr) {
+      // May happen if we get a shutdown call in the middle of compaction
+      builder_->Abandon();
+      builder_.reset();
+    }
+  }
+
+  // Updates states related to file cutting for TTL.
+  // Returns a boolean value indicating whether the current
+  // compaction output file should be cut before `internal_key`.
+  //
+  // @param internal_key the current key to be added to output.
+  bool UpdateFilesToCutForTTLStates(const Slice& internal_key);
+
+  // update tracked grandparents information like grandparent index, if it's
+  // in the gap between 2 grandparent files, accumulated grandparent files size
+  // etc.
+  // It returns how many boundaries it crosses by including current key.
+  size_t UpdateGrandparentBoundaryInfo(const Slice& internal_key);
+
+  // helper function to get the overlapped grandparent files size, it's only
+  // used for calculating the first key's overlap.
+  uint64_t GetCurrentKeyGrandparentOverlappedBytes(
+      const Slice& internal_key) const;
+
+  // Add current key from compaction_iterator to the output file. If needed
+  // close and open new compaction output with the functions provided.
+  Status AddToOutput(const CompactionIterator& c_iter,
+                     const CompactionFileOpenFunc& open_file_func,
+                     const CompactionFileCloseFunc& close_file_func);
+
+  // Close the current output. `open_file_func` is needed for creating new file
+  // for range-dels only output file.
+  Status CloseOutput(const Status& curr_status,
+                     const CompactionFileOpenFunc& open_file_func,
+                     const CompactionFileCloseFunc& close_file_func) {
+    Status status = curr_status;
+    // handle subcompaction containing only range deletions
+    if (status.ok() && !HasBuilder() && !HasOutput() && HasRangeDel()) {
+      status = open_file_func(*this);
+    }
+    if (HasBuilder()) {
+      const Slice empty_key{};
+      Status s = close_file_func(*this, status, empty_key);
+      if (!s.ok() && status.ok()) {
+        status = s;
+      }
+    }
+
+    return status;
+  }
+
+  // This subcompaction's output could be empty if compaction was aborted before
+  // this subcompaction had a chance to generate any output files. When
+  // subcompactions are executed sequentially this is more likely and will be
+  // particularly likely for the later subcompactions to be empty. Once they are
+  // run in parallel however it should be much rarer.
+  // It's caller's responsibility to make sure it's not empty.
+  Output& current_output() {
+    assert(!outputs_.empty());
+    return outputs_.back();
+  }
+
+  // Assign the range_del_agg to the target output level. There's only one
+  // range-del-aggregator per compaction outputs, for
+  // output_to_penultimate_level compaction it is only assigned to the
+  // penultimate level.
+  void AssignRangeDelAggregator(
+      std::unique_ptr<CompactionRangeDelAggregator>&& range_del_agg) {
+    assert(range_del_agg_ == nullptr);
+    range_del_agg_ = std::move(range_del_agg);
+  }
+
+  const Compaction* compaction_;
+
+  // current output builder and writer
+  std::unique_ptr<TableBuilder> builder_;
+  std::unique_ptr<WritableFileWriter> file_writer_;
+  uint64_t current_output_file_size_ = 0;
+
+  // all the compaction outputs so far
+  std::vector<Output> outputs_;
+
+  // BlobDB info
+  std::vector<BlobFileAddition> blob_file_additions_;
+  std::unique_ptr<BlobGarbageMeter> blob_garbage_meter_;
+
+  // Basic compaction output stats for this level's outputs
+  InternalStats::CompactionOutputsStats stats_;
+
+  // indicate if this CompactionOutputs obj for penultimate_level, should always
+  // be false if per_key_placement feature is not enabled.
+  const bool is_penultimate_level_;
+  std::unique_ptr<CompactionRangeDelAggregator> range_del_agg_ = nullptr;
+
+  // partitioner information
+  std::string last_key_for_partitioner_;
+  std::unique_ptr<SstPartitioner> partitioner_;
+
+  // A flag determines if this subcompaction has been split by the cursor
+  // for RoundRobin compaction
+  bool is_split_ = false;
+
+  // We also maintain the output split key for each subcompaction to avoid
+  // repetitive comparison in ShouldStopBefore()
+  const InternalKey* local_output_split_key_ = nullptr;
+
+  // Some identified files with old oldest ancester time and the range should be
+  // isolated out so that the output file(s) in that range can be merged down
+  // for TTL and clear the timestamps for the range.
+  std::vector<FileMetaData*> files_to_cut_for_ttl_;
+  int cur_files_to_cut_for_ttl_ = -1;
+  int next_files_to_cut_for_ttl_ = 0;
+
+  // An index that used to speed up ShouldStopBefore().
+  size_t grandparent_index_ = 0;
+
+  // if the output key is being grandparent files gap, so:
+  //  key > grandparents[grandparent_index_ - 1].largest &&
+  //  key < grandparents[grandparent_index_].smallest
+  bool being_grandparent_gap_ = true;
+
+  // The number of bytes overlapping between the current output and
+  // grandparent files used in ShouldStopBefore().
+  uint64_t grandparent_overlapped_bytes_ = 0;
+
+  // A flag determines whether the key has been seen in ShouldStopBefore()
+  bool seen_key_ = false;
+
+  // for the current output file, how many file boundaries has it crossed,
+  // basically number of files overlapped * 2
+  size_t grandparent_boundary_switched_num_ = 0;
+
+  // The smallest key of the current output file, this is set when current
+  // output file's smallest key is a range tombstone start key.
+  InternalKey range_tombstone_lower_bound_;
+};
+
+// helper struct to concatenate the last level and penultimate level outputs
+// which could be replaced by std::ranges::join_view() in c++20
+struct OutputIterator {
+ public:
+  explicit OutputIterator(const std::vector<CompactionOutputs::Output>& a,
+                          const std::vector<CompactionOutputs::Output>& b)
+      : a_(a), b_(b) {
+    within_a = !a_.empty();
+    idx_ = 0;
+  }
+
+  OutputIterator begin() { return *this; }
+
+  OutputIterator end() { return *this; }
+
+  size_t size() { return a_.size() + b_.size(); }
+
+  const CompactionOutputs::Output& operator*() const {
+    return within_a ? a_[idx_] : b_[idx_];
+  }
+
+  OutputIterator& operator++() {
+    idx_++;
+    if (within_a && idx_ >= a_.size()) {
+      within_a = false;
+      idx_ = 0;
+    }
+    assert(within_a || idx_ <= b_.size());
+    return *this;
+  }
+
+  bool operator!=(const OutputIterator& /*rhs*/) const {
+    return within_a || idx_ < b_.size();
+  }
+
+ private:
+  const std::vector<CompactionOutputs::Output>& a_;
+  const std::vector<CompactionOutputs::Output>& b_;
+  bool within_a;
+  size_t idx_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker.cc
new file mode 100644
index 0000000000..a8f314a5bb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker.cc
@@ -0,0 +1,1225 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/compaction/compaction_picker.h"
+
+#include <cinttypes>
+#include <limits>
+#include <queue>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "db/column_family.h"
+#include "file/filename.h"
+#include "logging/log_buffer.h"
+#include "logging/logging.h"
+#include "monitoring/statistics_impl.h"
+#include "test_util/sync_point.h"
+#include "util/random.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files,
+                           size_t min_files_to_compact,
+                           uint64_t max_compact_bytes_per_del_file,
+                           uint64_t max_compaction_bytes,
+                           CompactionInputFiles* comp_inputs) {
+  TEST_SYNC_POINT("FindIntraL0Compaction");
+
+  size_t start = 0;
+
+  if (level_files.size() == 0 || level_files[start]->being_compacted) {
+    return false;
+  }
+
+  size_t compact_bytes = static_cast<size_t>(level_files[start]->fd.file_size);
+  size_t compact_bytes_per_del_file = std::numeric_limits<size_t>::max();
+  // Compaction range will be [start, limit).
+  size_t limit;
+  // Pull in files until the amount of compaction work per deleted file begins
+  // increasing or maximum total compaction size is reached.
+  size_t new_compact_bytes_per_del_file = 0;
+  for (limit = start + 1; limit < level_files.size(); ++limit) {
+    compact_bytes += static_cast<size_t>(level_files[limit]->fd.file_size);
+    new_compact_bytes_per_del_file = compact_bytes / (limit - start);
+    if (level_files[limit]->being_compacted ||
+        new_compact_bytes_per_del_file > compact_bytes_per_del_file ||
+        compact_bytes > max_compaction_bytes) {
+      break;
+    }
+    compact_bytes_per_del_file = new_compact_bytes_per_del_file;
+  }
+
+  if ((limit - start) >= min_files_to_compact &&
+      compact_bytes_per_del_file < max_compact_bytes_per_del_file) {
+    assert(comp_inputs != nullptr);
+    comp_inputs->level = 0;
+    for (size_t i = start; i < limit; ++i) {
+      comp_inputs->files.push_back(level_files[i]);
+    }
+    return true;
+  }
+  return false;
+}
+
+// Determine compression type, based on user options, level of the output
+// file and whether compression is disabled.
+// If enable_compression is false, then compression is always disabled no
+// matter what the values of the other two parameters are.
+// Otherwise, the compression type is determined based on options and level.
+CompressionType GetCompressionType(const VersionStorageInfo* vstorage,
+                                   const MutableCFOptions& mutable_cf_options,
+                                   int level, int base_level,
+                                   const bool enable_compression) {
+  if (!enable_compression) {
+    // disable compression
+    return kNoCompression;
+  }
+
+  // If bottommost_compression is set and we are compacting to the
+  // bottommost level then we should use it.
+  if (mutable_cf_options.bottommost_compression != kDisableCompressionOption &&
+      level >= (vstorage->num_non_empty_levels() - 1)) {
+    return mutable_cf_options.bottommost_compression;
+  }
+  // If the user has specified a different compression level for each level,
+  // then pick the compression for that level.
+  if (!mutable_cf_options.compression_per_level.empty()) {
+    assert(level == 0 || level >= base_level);
+    int idx = (level == 0) ? 0 : level - base_level + 1;
+
+    const int n =
+        static_cast<int>(mutable_cf_options.compression_per_level.size()) - 1;
+    // It is possible for level_ to be -1; in that case, we use level
+    // 0's compression.  This occurs mostly in backwards compatibility
+    // situations when the builder doesn't know what level the file
+    // belongs to.  Likewise, if level is beyond the end of the
+    // specified compression levels, use the last value.
+    return mutable_cf_options
+        .compression_per_level[std::max(0, std::min(idx, n))];
+  } else {
+    return mutable_cf_options.compression;
+  }
+}
+
+CompressionOptions GetCompressionOptions(const MutableCFOptions& cf_options,
+                                         const VersionStorageInfo* vstorage,
+                                         int level,
+                                         const bool enable_compression) {
+  if (!enable_compression) {
+    return cf_options.compression_opts;
+  }
+  // If bottommost_compression_opts is enabled and we are compacting to the
+  // bottommost level then we should use the specified compression options.
+  if (level >= (vstorage->num_non_empty_levels() - 1) &&
+      cf_options.bottommost_compression_opts.enabled) {
+    return cf_options.bottommost_compression_opts;
+  }
+  return cf_options.compression_opts;
+}
+
+CompactionPicker::CompactionPicker(const ImmutableOptions& ioptions,
+                                   const InternalKeyComparator* icmp)
+    : ioptions_(ioptions), icmp_(icmp) {}
+
+CompactionPicker::~CompactionPicker() {}
+
+// Delete this compaction from the list of running compactions.
+void CompactionPicker::ReleaseCompactionFiles(Compaction* c, Status status) {
+  UnregisterCompaction(c);
+  if (!status.ok()) {
+    c->ResetNextCompactionIndex();
+  }
+}
+
+void CompactionPicker::GetRange(const CompactionInputFiles& inputs,
+                                InternalKey* smallest,
+                                InternalKey* largest) const {
+  const int level = inputs.level;
+  assert(!inputs.empty());
+  smallest->Clear();
+  largest->Clear();
+
+  if (level == 0) {
+    for (size_t i = 0; i < inputs.size(); i++) {
+      FileMetaData* f = inputs[i];
+      if (i == 0) {
+        *smallest = f->smallest;
+        *largest = f->largest;
+      } else {
+        if (icmp_->Compare(f->smallest, *smallest) < 0) {
+          *smallest = f->smallest;
+        }
+        if (icmp_->Compare(f->largest, *largest) > 0) {
+          *largest = f->largest;
+        }
+      }
+    }
+  } else {
+    *smallest = inputs[0]->smallest;
+    *largest = inputs[inputs.size() - 1]->largest;
+  }
+}
+
+void CompactionPicker::GetRange(const CompactionInputFiles& inputs1,
+                                const CompactionInputFiles& inputs2,
+                                InternalKey* smallest,
+                                InternalKey* largest) const {
+  assert(!inputs1.empty() || !inputs2.empty());
+  if (inputs1.empty()) {
+    GetRange(inputs2, smallest, largest);
+  } else if (inputs2.empty()) {
+    GetRange(inputs1, smallest, largest);
+  } else {
+    InternalKey smallest1, smallest2, largest1, largest2;
+    GetRange(inputs1, &smallest1, &largest1);
+    GetRange(inputs2, &smallest2, &largest2);
+    *smallest =
+        icmp_->Compare(smallest1, smallest2) < 0 ? smallest1 : smallest2;
+    *largest = icmp_->Compare(largest1, largest2) < 0 ? largest2 : largest1;
+  }
+}
+
+void CompactionPicker::GetRange(const std::vector<CompactionInputFiles>& inputs,
+                                InternalKey* smallest, InternalKey* largest,
+                                int exclude_level) const {
+  InternalKey current_smallest;
+  InternalKey current_largest;
+  bool initialized = false;
+  for (const auto& in : inputs) {
+    if (in.empty() || in.level == exclude_level) {
+      continue;
+    }
+    GetRange(in, &current_smallest, &current_largest);
+    if (!initialized) {
+      *smallest = current_smallest;
+      *largest = current_largest;
+      initialized = true;
+    } else {
+      if (icmp_->Compare(current_smallest, *smallest) < 0) {
+        *smallest = current_smallest;
+      }
+      if (icmp_->Compare(current_largest, *largest) > 0) {
+        *largest = current_largest;
+      }
+    }
+  }
+  assert(initialized);
+}
+
+bool CompactionPicker::ExpandInputsToCleanCut(const std::string& /*cf_name*/,
+                                              VersionStorageInfo* vstorage,
+                                              CompactionInputFiles* inputs,
+                                              InternalKey** next_smallest) {
+  // This isn't good compaction
+  assert(!inputs->empty());
+
+  const int level = inputs->level;
+  // GetOverlappingInputs will always do the right thing for level-0.
+  // So we don't need to do any expansion if level == 0.
+  if (level == 0) {
+    return true;
+  }
+
+  InternalKey smallest, largest;
+
+  // Keep expanding inputs until we are sure that there is a "clean cut"
+  // boundary between the files in input and the surrounding files.
+  // This will ensure that no parts of a key are lost during compaction.
+  int hint_index = -1;
+  size_t old_size;
+  do {
+    old_size = inputs->size();
+    GetRange(*inputs, &smallest, &largest);
+    inputs->clear();
+    vstorage->GetOverlappingInputs(level, &smallest, &largest, &inputs->files,
+                                   hint_index, &hint_index, true,
+                                   next_smallest);
+  } while (inputs->size() > old_size);
+
+  // we started off with inputs non-empty and the previous loop only grew
+  // inputs. thus, inputs should be non-empty here
+  assert(!inputs->empty());
+
+  // If, after the expansion, there are files that are already under
+  // compaction, then we must drop/cancel this compaction.
+  if (AreFilesInCompaction(inputs->files)) {
+    return false;
+  }
+  return true;
+}
+
+bool CompactionPicker::RangeOverlapWithCompaction(
+    const Slice& smallest_user_key, const Slice& largest_user_key,
+    int level) const {
+  const Comparator* ucmp = icmp_->user_comparator();
+  for (Compaction* c : compactions_in_progress_) {
+    if (c->output_level() == level &&
+        ucmp->CompareWithoutTimestamp(smallest_user_key,
+                                      c->GetLargestUserKey()) <= 0 &&
+        ucmp->CompareWithoutTimestamp(largest_user_key,
+                                      c->GetSmallestUserKey()) >= 0) {
+      // Overlap
+      return true;
+    }
+    if (c->SupportsPerKeyPlacement()) {
+      if (c->OverlapPenultimateLevelOutputRange(smallest_user_key,
+                                                largest_user_key)) {
+        return true;
+      }
+    }
+  }
+  // Did not overlap with any running compaction in level `level`
+  return false;
+}
+
+bool CompactionPicker::FilesRangeOverlapWithCompaction(
+    const std::vector<CompactionInputFiles>& inputs, int level,
+    int penultimate_level) const {
+  bool is_empty = true;
+  for (auto& in : inputs) {
+    if (!in.empty()) {
+      is_empty = false;
+      break;
+    }
+  }
+  if (is_empty) {
+    // No files in inputs
+    return false;
+  }
+
+  // TODO: Intra L0 compactions can have the ranges overlapped, but the input
+  //  files cannot be overlapped in the order of L0 files.
+  InternalKey smallest, largest;
+  GetRange(inputs, &smallest, &largest, Compaction::kInvalidLevel);
+  if (penultimate_level != Compaction::kInvalidLevel) {
+    if (ioptions_.compaction_style == kCompactionStyleUniversal) {
+      if (RangeOverlapWithCompaction(smallest.user_key(), largest.user_key(),
+                                     penultimate_level)) {
+        return true;
+      }
+    } else {
+      InternalKey penultimate_smallest, penultimate_largest;
+      GetRange(inputs, &penultimate_smallest, &penultimate_largest, level);
+      if (RangeOverlapWithCompaction(penultimate_smallest.user_key(),
+                                     penultimate_largest.user_key(),
+                                     penultimate_level)) {
+        return true;
+      }
+    }
+  }
+
+  return RangeOverlapWithCompaction(smallest.user_key(), largest.user_key(),
+                                    level);
+}
+
+// Returns true if any one of specified files are being compacted
+bool CompactionPicker::AreFilesInCompaction(
+    const std::vector<FileMetaData*>& files) {
+  for (size_t i = 0; i < files.size(); i++) {
+    if (files[i]->being_compacted) {
+      return true;
+    }
+  }
+  return false;
+}
+
+Compaction* CompactionPicker::CompactFiles(
+    const CompactionOptions& compact_options,
+    const std::vector<CompactionInputFiles>& input_files, int output_level,
+    VersionStorageInfo* vstorage, const MutableCFOptions& mutable_cf_options,
+    const MutableDBOptions& mutable_db_options, uint32_t output_path_id) {
+#ifndef NDEBUG
+  assert(input_files.size());
+  // This compaction output should not overlap with a running compaction as
+  // `SanitizeCompactionInputFiles` should've checked earlier and db mutex
+  // shouldn't have been released since.
+  int start_level = Compaction::kInvalidLevel;
+  for (const auto& in : input_files) {
+    // input_files should already be sorted by level
+    if (!in.empty()) {
+      start_level = in.level;
+      break;
+    }
+  }
+  assert(output_level == 0 ||
+         !FilesRangeOverlapWithCompaction(
+             input_files, output_level,
+             Compaction::EvaluatePenultimateLevel(vstorage, ioptions_,
+                                                  start_level, output_level)));
+#endif /* !NDEBUG */
+
+  CompressionType compression_type;
+  if (compact_options.compression == kDisableCompressionOption) {
+    int base_level;
+    if (ioptions_.compaction_style == kCompactionStyleLevel) {
+      base_level = vstorage->base_level();
+    } else {
+      base_level = 1;
+    }
+    compression_type = GetCompressionType(vstorage, mutable_cf_options,
+                                          output_level, base_level);
+  } else {
+    // TODO(ajkr): `CompactionOptions` offers configurable `CompressionType`
+    // without configurable `CompressionOptions`, which is inconsistent.
+    compression_type = compact_options.compression;
+  }
+  auto c = new Compaction(
+      vstorage, ioptions_, mutable_cf_options, mutable_db_options, input_files,
+      output_level, compact_options.output_file_size_limit,
+      mutable_cf_options.max_compaction_bytes, output_path_id, compression_type,
+      GetCompressionOptions(mutable_cf_options, vstorage, output_level),
+      Temperature::kUnknown, compact_options.max_subcompactions,
+      /* grandparents */ {}, true);
+  RegisterCompaction(c);
+  return c;
+}
+
+Status CompactionPicker::GetCompactionInputsFromFileNumbers(
+    std::vector<CompactionInputFiles>* input_files,
+    std::unordered_set<uint64_t>* input_set, const VersionStorageInfo* vstorage,
+    const CompactionOptions& /*compact_options*/) const {
+  if (input_set->size() == 0U) {
+    return Status::InvalidArgument(
+        "Compaction must include at least one file.");
+  }
+  assert(input_files);
+
+  std::vector<CompactionInputFiles> matched_input_files;
+  matched_input_files.resize(vstorage->num_levels());
+  int first_non_empty_level = -1;
+  int last_non_empty_level = -1;
+  // TODO(yhchiang): use a lazy-initialized mapping from
+  //                 file_number to FileMetaData in Version.
+  for (int level = 0; level < vstorage->num_levels(); ++level) {
+    for (auto file : vstorage->LevelFiles(level)) {
+      auto iter = input_set->find(file->fd.GetNumber());
+      if (iter != input_set->end()) {
+        matched_input_files[level].files.push_back(file);
+        input_set->erase(iter);
+        last_non_empty_level = level;
+        if (first_non_empty_level == -1) {
+          first_non_empty_level = level;
+        }
+      }
+    }
+  }
+
+  if (!input_set->empty()) {
+    std::string message(
+        "Cannot find matched SST files for the following file numbers:");
+    for (auto fn : *input_set) {
+      message += " ";
+      message += std::to_string(fn);
+    }
+    return Status::InvalidArgument(message);
+  }
+
+  for (int level = first_non_empty_level; level <= last_non_empty_level;
+       ++level) {
+    matched_input_files[level].level = level;
+    input_files->emplace_back(std::move(matched_input_files[level]));
+  }
+
+  return Status::OK();
+}
+
+// Returns true if any one of the parent files are being compacted
+bool CompactionPicker::IsRangeInCompaction(VersionStorageInfo* vstorage,
+                                           const InternalKey* smallest,
+                                           const InternalKey* largest,
+                                           int level, int* level_index) {
+  std::vector<FileMetaData*> inputs;
+  assert(level < NumberLevels());
+
+  vstorage->GetOverlappingInputs(level, smallest, largest, &inputs,
+                                 level_index ? *level_index : 0, level_index);
+  return AreFilesInCompaction(inputs);
+}
+
+// Populates the set of inputs of all other levels that overlap with the
+// start level.
+// Now we assume all levels except start level and output level are empty.
+// Will also attempt to expand "start level" if that doesn't expand
+// "output level" or cause "level" to include a file for compaction that has an
+// overlapping user-key with another file.
+// REQUIRES: input_level and output_level are different
+// REQUIRES: inputs->empty() == false
+// Returns false if files on parent level are currently in compaction, which
+// means that we can't compact them
+bool CompactionPicker::SetupOtherInputs(
+    const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
+    VersionStorageInfo* vstorage, CompactionInputFiles* inputs,
+    CompactionInputFiles* output_level_inputs, int* parent_index,
+    int base_index, bool only_expand_towards_right) {
+  assert(!inputs->empty());
+  assert(output_level_inputs->empty());
+  const int input_level = inputs->level;
+  const int output_level = output_level_inputs->level;
+  if (input_level == output_level) {
+    // no possibility of conflict
+    return true;
+  }
+
+  // For now, we only support merging two levels, start level and output level.
+  // We need to assert other levels are empty.
+  for (int l = input_level + 1; l < output_level; l++) {
+    assert(vstorage->NumLevelFiles(l) == 0);
+  }
+
+  InternalKey smallest, largest;
+
+  // Get the range one last time.
+  GetRange(*inputs, &smallest, &largest);
+
+  // Populate the set of next-level files (inputs_GetOutputLevelInputs()) to
+  // include in compaction
+  vstorage->GetOverlappingInputs(output_level, &smallest, &largest,
+                                 &output_level_inputs->files, *parent_index,
+                                 parent_index);
+  if (AreFilesInCompaction(output_level_inputs->files)) {
+    return false;
+  }
+  if (!output_level_inputs->empty()) {
+    if (!ExpandInputsToCleanCut(cf_name, vstorage, output_level_inputs)) {
+      return false;
+    }
+  }
+
+  // See if we can further grow the number of inputs in "level" without
+  // changing the number of "level+1" files we pick up. We also choose NOT
+  // to expand if this would cause "level" to include some entries for some
+  // user key, while excluding other entries for the same user key. This
+  // can happen when one user key spans multiple files.
+  if (!output_level_inputs->empty()) {
+    const uint64_t limit = mutable_cf_options.max_compaction_bytes;
+    const uint64_t output_level_inputs_size =
+        TotalFileSize(output_level_inputs->files);
+    const uint64_t inputs_size = TotalFileSize(inputs->files);
+    bool expand_inputs = false;
+
+    CompactionInputFiles expanded_inputs;
+    expanded_inputs.level = input_level;
+    // Get closed interval of output level
+    InternalKey all_start, all_limit;
+    GetRange(*inputs, *output_level_inputs, &all_start, &all_limit);
+    bool try_overlapping_inputs = true;
+    if (only_expand_towards_right) {
+      // Round-robin compaction only allows expansion towards the larger side.
+      vstorage->GetOverlappingInputs(input_level, &smallest, &all_limit,
+                                     &expanded_inputs.files, base_index,
+                                     nullptr);
+    } else {
+      vstorage->GetOverlappingInputs(input_level, &all_start, &all_limit,
+                                     &expanded_inputs.files, base_index,
+                                     nullptr);
+    }
+    uint64_t expanded_inputs_size = TotalFileSize(expanded_inputs.files);
+    if (!ExpandInputsToCleanCut(cf_name, vstorage, &expanded_inputs)) {
+      try_overlapping_inputs = false;
+    }
+    if (try_overlapping_inputs && expanded_inputs.size() > inputs->size() &&
+        (mutable_cf_options.ignore_max_compaction_bytes_for_input ||
+         output_level_inputs_size + expanded_inputs_size < limit) &&
+        !AreFilesInCompaction(expanded_inputs.files)) {
+      InternalKey new_start, new_limit;
+      GetRange(expanded_inputs, &new_start, &new_limit);
+      CompactionInputFiles expanded_output_level_inputs;
+      expanded_output_level_inputs.level = output_level;
+      vstorage->GetOverlappingInputs(output_level, &new_start, &new_limit,
+                                     &expanded_output_level_inputs.files,
+                                     *parent_index, parent_index);
+      assert(!expanded_output_level_inputs.empty());
+      if (!AreFilesInCompaction(expanded_output_level_inputs.files) &&
+          ExpandInputsToCleanCut(cf_name, vstorage,
+                                 &expanded_output_level_inputs) &&
+          expanded_output_level_inputs.size() == output_level_inputs->size()) {
+        expand_inputs = true;
+      }
+    }
+    if (!expand_inputs) {
+      vstorage->GetCleanInputsWithinInterval(input_level, &all_start,
+                                             &all_limit, &expanded_inputs.files,
+                                             base_index, nullptr);
+      expanded_inputs_size = TotalFileSize(expanded_inputs.files);
+      if (expanded_inputs.size() > inputs->size() &&
+          (mutable_cf_options.ignore_max_compaction_bytes_for_input ||
+           output_level_inputs_size + expanded_inputs_size < limit) &&
+          !AreFilesInCompaction(expanded_inputs.files)) {
+        expand_inputs = true;
+      }
+    }
+    if (expand_inputs) {
+      ROCKS_LOG_INFO(ioptions_.logger,
+                     "[%s] Expanding@%d %" ROCKSDB_PRIszt "+%" ROCKSDB_PRIszt
+                     "(%" PRIu64 "+%" PRIu64 " bytes) to %" ROCKSDB_PRIszt
+                     "+%" ROCKSDB_PRIszt " (%" PRIu64 "+%" PRIu64 " bytes)\n",
+                     cf_name.c_str(), input_level, inputs->size(),
+                     output_level_inputs->size(), inputs_size,
+                     output_level_inputs_size, expanded_inputs.size(),
+                     output_level_inputs->size(), expanded_inputs_size,
+                     output_level_inputs_size);
+      inputs->files = expanded_inputs.files;
+    }
+  } else {
+    // Likely to be trivial move. Expand files if they are still trivial moves,
+    // but limit to mutable_cf_options.max_compaction_bytes or 8 files so that
+    // we don't create too much compaction pressure for the next level.
+  }
+  return true;
+}
+
+void CompactionPicker::GetGrandparents(
+    VersionStorageInfo* vstorage, const CompactionInputFiles& inputs,
+    const CompactionInputFiles& output_level_inputs,
+    std::vector<FileMetaData*>* grandparents) {
+  InternalKey start, limit;
+  GetRange(inputs, output_level_inputs, &start, &limit);
+  // Compute the set of grandparent files that overlap this compaction
+  // (parent == level+1; grandparent == level+2 or the first
+  // level after that has overlapping files)
+  for (int level = output_level_inputs.level + 1; level < NumberLevels();
+       level++) {
+    vstorage->GetOverlappingInputs(level, &start, &limit, grandparents);
+    if (!grandparents->empty()) {
+      break;
+    }
+  }
+}
+
+Compaction* CompactionPicker::CompactRange(
+    const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
+    const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
+    int input_level, int output_level,
+    const CompactRangeOptions& compact_range_options, const InternalKey* begin,
+    const InternalKey* end, InternalKey** compaction_end, bool* manual_conflict,
+    uint64_t max_file_num_to_ignore, const std::string& trim_ts) {
+  // CompactionPickerFIFO has its own implementation of compact range
+  assert(ioptions_.compaction_style != kCompactionStyleFIFO);
+
+  if (input_level == ColumnFamilyData::kCompactAllLevels) {
+    assert(ioptions_.compaction_style == kCompactionStyleUniversal);
+
+    // Universal compaction with more than one level always compacts all the
+    // files together to the last level.
+    assert(vstorage->num_levels() > 1);
+    // DBImpl::CompactRange() set output level to be the last level
+    if (ioptions_.allow_ingest_behind) {
+      assert(output_level == vstorage->num_levels() - 2);
+    } else {
+      assert(output_level == vstorage->num_levels() - 1);
+    }
+    // DBImpl::RunManualCompaction will make full range for universal compaction
+    assert(begin == nullptr);
+    assert(end == nullptr);
+    *compaction_end = nullptr;
+
+    int start_level = 0;
+    for (; start_level < vstorage->num_levels() &&
+           vstorage->NumLevelFiles(start_level) == 0;
+         start_level++) {
+    }
+    if (start_level == vstorage->num_levels()) {
+      return nullptr;
+    }
+
+    if ((start_level == 0) && (!level0_compactions_in_progress_.empty())) {
+      *manual_conflict = true;
+      // Only one level 0 compaction allowed
+      return nullptr;
+    }
+
+    std::vector<CompactionInputFiles> inputs(vstorage->num_levels() -
+                                             start_level);
+    for (int level = start_level; level < vstorage->num_levels(); level++) {
+      inputs[level - start_level].level = level;
+      auto& files = inputs[level - start_level].files;
+      for (FileMetaData* f : vstorage->LevelFiles(level)) {
+        files.push_back(f);
+      }
+      if (AreFilesInCompaction(files)) {
+        *manual_conflict = true;
+        return nullptr;
+      }
+    }
+
+    // 2 non-exclusive manual compactions could run at the same time producing
+    // overlaping outputs in the same level.
+    if (FilesRangeOverlapWithCompaction(
+            inputs, output_level,
+            Compaction::EvaluatePenultimateLevel(vstorage, ioptions_,
+                                                 start_level, output_level))) {
+      // This compaction output could potentially conflict with the output
+      // of a currently running compaction, we cannot run it.
+      *manual_conflict = true;
+      return nullptr;
+    }
+
+    Compaction* c = new Compaction(
+        vstorage, ioptions_, mutable_cf_options, mutable_db_options,
+        std::move(inputs), output_level,
+        MaxFileSizeForLevel(mutable_cf_options, output_level,
+                            ioptions_.compaction_style),
+        /* max_compaction_bytes */ LLONG_MAX,
+        compact_range_options.target_path_id,
+        GetCompressionType(vstorage, mutable_cf_options, output_level, 1),
+        GetCompressionOptions(mutable_cf_options, vstorage, output_level),
+        Temperature::kUnknown, compact_range_options.max_subcompactions,
+        /* grandparents */ {}, /* is manual */ true, trim_ts, /* score */ -1,
+        /* deletion_compaction */ false, /* l0_files_might_overlap */ true,
+        CompactionReason::kUnknown,
+        compact_range_options.blob_garbage_collection_policy,
+        compact_range_options.blob_garbage_collection_age_cutoff);
+
+    RegisterCompaction(c);
+    vstorage->ComputeCompactionScore(ioptions_, mutable_cf_options);
+    return c;
+  }
+
+  CompactionInputFiles inputs;
+  inputs.level = input_level;
+  bool covering_the_whole_range = true;
+
+  // All files are 'overlapping' in universal style compaction.
+  // We have to compact the entire range in one shot.
+  if (ioptions_.compaction_style == kCompactionStyleUniversal) {
+    begin = nullptr;
+    end = nullptr;
+  }
+
+  vstorage->GetOverlappingInputs(input_level, begin, end, &inputs.files);
+  if (inputs.empty()) {
+    return nullptr;
+  }
+
+  if ((input_level == 0) && (!level0_compactions_in_progress_.empty())) {
+    // Only one level 0 compaction allowed
+    TEST_SYNC_POINT("CompactionPicker::CompactRange:Conflict");
+    *manual_conflict = true;
+    return nullptr;
+  }
+
+  // Avoid compacting too much in one shot in case the range is large.
+  // But we cannot do this for level-0 since level-0 files can overlap
+  // and we must not pick one file and drop another older file if the
+  // two files overlap.
+  if (input_level > 0) {
+    const uint64_t limit = mutable_cf_options.max_compaction_bytes;
+    uint64_t input_level_total = 0;
+    int hint_index = -1;
+    InternalKey* smallest = nullptr;
+    InternalKey* largest = nullptr;
+    for (size_t i = 0; i + 1 < inputs.size(); ++i) {
+      if (!smallest) {
+        smallest = &inputs[i]->smallest;
+      }
+      largest = &inputs[i]->largest;
+
+      uint64_t input_file_size = inputs[i]->fd.GetFileSize();
+      uint64_t output_level_total = 0;
+      if (output_level < vstorage->num_non_empty_levels()) {
+        std::vector<FileMetaData*> files;
+        vstorage->GetOverlappingInputsRangeBinarySearch(
+            output_level, smallest, largest, &files, hint_index, &hint_index);
+        for (const auto& file : files) {
+          output_level_total += file->fd.GetFileSize();
+        }
+      }
+
+      input_level_total += input_file_size;
+
+      if (input_level_total + output_level_total >= limit) {
+        covering_the_whole_range = false;
+        // still include the current file, so the compaction could be larger
+        // than max_compaction_bytes, which is also to make sure the compaction
+        // can make progress even `max_compaction_bytes` is small (e.g. smaller
+        // than an SST file).
+        inputs.files.resize(i + 1);
+        break;
+      }
+    }
+  }
+
+  assert(compact_range_options.target_path_id <
+         static_cast<uint32_t>(ioptions_.cf_paths.size()));
+
+  // for BOTTOM LEVEL compaction only, use max_file_num_to_ignore to filter out
+  // files that are created during the current compaction.
+  if (compact_range_options.bottommost_level_compaction ==
+          BottommostLevelCompaction::kForceOptimized &&
+      max_file_num_to_ignore != std::numeric_limits<uint64_t>::max()) {
+    assert(input_level == output_level);
+    // inputs_shrunk holds a continuous subset of input files which were all
+    // created before the current manual compaction
+    std::vector<FileMetaData*> inputs_shrunk;
+    size_t skip_input_index = inputs.size();
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      if (inputs[i]->fd.GetNumber() < max_file_num_to_ignore) {
+        inputs_shrunk.push_back(inputs[i]);
+      } else if (!inputs_shrunk.empty()) {
+        // inputs[i] was created during the current manual compaction and
+        // need to be skipped
+        skip_input_index = i;
+        break;
+      }
+    }
+    if (inputs_shrunk.empty()) {
+      return nullptr;
+    }
+    if (inputs.size() != inputs_shrunk.size()) {
+      inputs.files.swap(inputs_shrunk);
+    }
+    // set covering_the_whole_range to false if there is any file that need to
+    // be compacted in the range of inputs[skip_input_index+1, inputs.size())
+    for (size_t i = skip_input_index + 1; i < inputs.size(); ++i) {
+      if (inputs[i]->fd.GetNumber() < max_file_num_to_ignore) {
+        covering_the_whole_range = false;
+      }
+    }
+  }
+
+  InternalKey key_storage;
+  InternalKey* next_smallest = &key_storage;
+  if (ExpandInputsToCleanCut(cf_name, vstorage, &inputs, &next_smallest) ==
+      false) {
+    // manual compaction is now multi-threaded, so it can
+    // happen that ExpandWhileOverlapping fails
+    // we handle it higher in RunManualCompaction
+    *manual_conflict = true;
+    return nullptr;
+  }
+
+  if (covering_the_whole_range || !next_smallest) {
+    *compaction_end = nullptr;
+  } else {
+    **compaction_end = *next_smallest;
+  }
+
+  CompactionInputFiles output_level_inputs;
+  if (output_level == ColumnFamilyData::kCompactToBaseLevel) {
+    assert(input_level == 0);
+    output_level = vstorage->base_level();
+    assert(output_level > 0);
+  }
+  output_level_inputs.level = output_level;
+  if (input_level != output_level) {
+    int parent_index = -1;
+    if (!SetupOtherInputs(cf_name, mutable_cf_options, vstorage, &inputs,
+                          &output_level_inputs, &parent_index, -1)) {
+      // manual compaction is now multi-threaded, so it can
+      // happen that SetupOtherInputs fails
+      // we handle it higher in RunManualCompaction
+      *manual_conflict = true;
+      return nullptr;
+    }
+  }
+
+  std::vector<CompactionInputFiles> compaction_inputs({inputs});
+  if (!output_level_inputs.empty()) {
+    compaction_inputs.push_back(output_level_inputs);
+  }
+  for (size_t i = 0; i < compaction_inputs.size(); i++) {
+    if (AreFilesInCompaction(compaction_inputs[i].files)) {
+      *manual_conflict = true;
+      return nullptr;
+    }
+  }
+
+  // 2 non-exclusive manual compactions could run at the same time producing
+  // overlaping outputs in the same level.
+  if (FilesRangeOverlapWithCompaction(
+          compaction_inputs, output_level,
+          Compaction::EvaluatePenultimateLevel(vstorage, ioptions_, input_level,
+                                               output_level))) {
+    // This compaction output could potentially conflict with the output
+    // of a currently running compaction, we cannot run it.
+    *manual_conflict = true;
+    return nullptr;
+  }
+
+  std::vector<FileMetaData*> grandparents;
+  GetGrandparents(vstorage, inputs, output_level_inputs, &grandparents);
+  Compaction* compaction = new Compaction(
+      vstorage, ioptions_, mutable_cf_options, mutable_db_options,
+      std::move(compaction_inputs), output_level,
+      MaxFileSizeForLevel(mutable_cf_options, output_level,
+                          ioptions_.compaction_style, vstorage->base_level(),
+                          ioptions_.level_compaction_dynamic_level_bytes),
+      mutable_cf_options.max_compaction_bytes,
+      compact_range_options.target_path_id,
+      GetCompressionType(vstorage, mutable_cf_options, output_level,
+                         vstorage->base_level()),
+      GetCompressionOptions(mutable_cf_options, vstorage, output_level),
+      Temperature::kUnknown, compact_range_options.max_subcompactions,
+      std::move(grandparents), /* is manual */ true, trim_ts, /* score */ -1,
+      /* deletion_compaction */ false, /* l0_files_might_overlap */ true,
+      CompactionReason::kUnknown,
+      compact_range_options.blob_garbage_collection_policy,
+      compact_range_options.blob_garbage_collection_age_cutoff);
+
+  TEST_SYNC_POINT_CALLBACK("CompactionPicker::CompactRange:Return", compaction);
+  RegisterCompaction(compaction);
+
+  // Creating a compaction influences the compaction score because the score
+  // takes running compactions into account (by skipping files that are already
+  // being compacted). Since we just changed compaction score, we recalculate it
+  // here
+  vstorage->ComputeCompactionScore(ioptions_, mutable_cf_options);
+
+  return compaction;
+}
+
+namespace {
+// Test whether two files have overlapping key-ranges.
+bool HaveOverlappingKeyRanges(const Comparator* c, const SstFileMetaData& a,
+                              const SstFileMetaData& b) {
+  if (c->CompareWithoutTimestamp(a.smallestkey, b.smallestkey) >= 0) {
+    if (c->CompareWithoutTimestamp(a.smallestkey, b.largestkey) <= 0) {
+      // b.smallestkey <= a.smallestkey <= b.largestkey
+      return true;
+    }
+  } else if (c->CompareWithoutTimestamp(a.largestkey, b.smallestkey) >= 0) {
+    // a.smallestkey < b.smallestkey <= a.largestkey
+    return true;
+  }
+  if (c->CompareWithoutTimestamp(a.largestkey, b.largestkey) <= 0) {
+    if (c->CompareWithoutTimestamp(a.largestkey, b.smallestkey) >= 0) {
+      // b.smallestkey <= a.largestkey <= b.largestkey
+      return true;
+    }
+  } else if (c->CompareWithoutTimestamp(a.smallestkey, b.largestkey) <= 0) {
+    // a.smallestkey <= b.largestkey < a.largestkey
+    return true;
+  }
+  return false;
+}
+}  // namespace
+
+Status CompactionPicker::SanitizeCompactionInputFilesForAllLevels(
+    std::unordered_set<uint64_t>* input_files,
+    const ColumnFamilyMetaData& cf_meta, const int output_level) const {
+  auto& levels = cf_meta.levels;
+  auto comparator = icmp_->user_comparator();
+
+  // TODO(yhchiang): add is_adjustable to CompactionOptions
+
+  // the smallest and largest key of the current compaction input
+  std::string smallestkey;
+  std::string largestkey;
+  // a flag for initializing smallest and largest key
+  bool is_first = false;
+  const int kNotFound = -1;
+
+  // For each level, it does the following things:
+  // 1. Find the first and the last compaction input files
+  //    in the current level.
+  // 2. Include all files between the first and the last
+  //    compaction input files.
+  // 3. Update the compaction key-range.
+  // 4. For all remaining levels, include files that have
+  //    overlapping key-range with the compaction key-range.
+  for (int l = 0; l <= output_level; ++l) {
+    auto& current_files = levels[l].files;
+    int first_included = static_cast<int>(current_files.size());
+    int last_included = kNotFound;
+
+    // identify the first and the last compaction input files
+    // in the current level.
+    for (size_t f = 0; f < current_files.size(); ++f) {
+      const uint64_t file_number = TableFileNameToNumber(current_files[f].name);
+      if (input_files->find(file_number) == input_files->end()) {
+        continue;
+      }
+      first_included = std::min(first_included, static_cast<int>(f));
+      last_included = std::max(last_included, static_cast<int>(f));
+      if (is_first == false) {
+        smallestkey = current_files[f].smallestkey;
+        largestkey = current_files[f].largestkey;
+        is_first = true;
+      }
+    }
+    if (last_included == kNotFound) {
+      continue;
+    }
+
+    if (l != 0) {
+      // expand the compaction input of the current level if it
+      // has overlapping key-range with other non-compaction input
+      // files in the same level.
+      while (first_included > 0) {
+        if (comparator->CompareWithoutTimestamp(
+                current_files[first_included - 1].largestkey,
+                current_files[first_included].smallestkey) < 0) {
+          break;
+        }
+        first_included--;
+      }
+
+      while (last_included < static_cast<int>(current_files.size()) - 1) {
+        if (comparator->CompareWithoutTimestamp(
+                current_files[last_included + 1].smallestkey,
+                current_files[last_included].largestkey) > 0) {
+          break;
+        }
+        last_included++;
+      }
+    } else if (output_level > 0) {
+      last_included = static_cast<int>(current_files.size() - 1);
+    }
+
+    // include all files between the first and the last compaction input files.
+    for (int f = first_included; f <= last_included; ++f) {
+      if (current_files[f].being_compacted) {
+        return Status::Aborted("Necessary compaction input file " +
+                               current_files[f].name +
+                               " is currently being compacted.");
+      }
+
+      input_files->insert(TableFileNameToNumber(current_files[f].name));
+    }
+
+    // update smallest and largest key
+    if (l == 0) {
+      for (int f = first_included; f <= last_included; ++f) {
+        if (comparator->CompareWithoutTimestamp(
+                smallestkey, current_files[f].smallestkey) > 0) {
+          smallestkey = current_files[f].smallestkey;
+        }
+        if (comparator->CompareWithoutTimestamp(
+                largestkey, current_files[f].largestkey) < 0) {
+          largestkey = current_files[f].largestkey;
+        }
+      }
+    } else {
+      if (comparator->CompareWithoutTimestamp(
+              smallestkey, current_files[first_included].smallestkey) > 0) {
+        smallestkey = current_files[first_included].smallestkey;
+      }
+      if (comparator->CompareWithoutTimestamp(
+              largestkey, current_files[last_included].largestkey) < 0) {
+        largestkey = current_files[last_included].largestkey;
+      }
+    }
+
+    SstFileMetaData aggregated_file_meta;
+    aggregated_file_meta.smallestkey = smallestkey;
+    aggregated_file_meta.largestkey = largestkey;
+
+    // For all lower levels, include all overlapping files.
+    // We need to add overlapping files from the current level too because even
+    // if there no input_files in level l, we would still need to add files
+    // which overlap with the range containing the input_files in levels 0 to l
+    // Level 0 doesn't need to be handled this way because files are sorted by
+    // time and not by key
+    for (int m = std::max(l, 1); m <= output_level; ++m) {
+      for (auto& next_lv_file : levels[m].files) {
+        if (HaveOverlappingKeyRanges(comparator, aggregated_file_meta,
+                                     next_lv_file)) {
+          if (next_lv_file.being_compacted) {
+            return Status::Aborted(
+                "File " + next_lv_file.name +
+                " that has overlapping key range with one of the compaction "
+                " input file is currently being compacted.");
+          }
+          input_files->insert(TableFileNameToNumber(next_lv_file.name));
+        }
+      }
+    }
+  }
+  if (RangeOverlapWithCompaction(smallestkey, largestkey, output_level)) {
+    return Status::Aborted(
+        "A running compaction is writing to the same output level in an "
+        "overlapping key range");
+  }
+  return Status::OK();
+}
+
+Status CompactionPicker::SanitizeCompactionInputFiles(
+    std::unordered_set<uint64_t>* input_files,
+    const ColumnFamilyMetaData& cf_meta, const int output_level) const {
+  assert(static_cast<int>(cf_meta.levels.size()) - 1 ==
+         cf_meta.levels[cf_meta.levels.size() - 1].level);
+  if (output_level >= static_cast<int>(cf_meta.levels.size())) {
+    return Status::InvalidArgument(
+        "Output level for column family " + cf_meta.name +
+        " must between [0, " +
+        std::to_string(cf_meta.levels[cf_meta.levels.size() - 1].level) + "].");
+  }
+
+  if (output_level > MaxOutputLevel()) {
+    return Status::InvalidArgument(
+        "Exceed the maximum output level defined by "
+        "the current compaction algorithm --- " +
+        std::to_string(MaxOutputLevel()));
+  }
+
+  if (output_level < 0) {
+    return Status::InvalidArgument("Output level cannot be negative.");
+  }
+
+  if (input_files->size() == 0) {
+    return Status::InvalidArgument(
+        "A compaction must contain at least one file.");
+  }
+
+  Status s = SanitizeCompactionInputFilesForAllLevels(input_files, cf_meta,
+                                                      output_level);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  // for all input files, check whether the file number matches
+  // any currently-existing files.
+  for (auto file_num : *input_files) {
+    bool found = false;
+    int input_file_level = -1;
+    for (const auto& level_meta : cf_meta.levels) {
+      for (const auto& file_meta : level_meta.files) {
+        if (file_num == TableFileNameToNumber(file_meta.name)) {
+          if (file_meta.being_compacted) {
+            return Status::Aborted("Specified compaction input file " +
+                                   MakeTableFileName("", file_num) +
+                                   " is already being compacted.");
+          }
+          found = true;
+          input_file_level = level_meta.level;
+          break;
+        }
+      }
+      if (found) {
+        break;
+      }
+    }
+    if (!found) {
+      return Status::InvalidArgument(
+          "Specified compaction input file " + MakeTableFileName("", file_num) +
+          " does not exist in column family " + cf_meta.name + ".");
+    }
+    if (input_file_level > output_level) {
+      return Status::InvalidArgument(
+          "Cannot compact file to up level, input file: " +
+          MakeTableFileName("", file_num) + " level " +
+          std::to_string(input_file_level) + " > output level " +
+          std::to_string(output_level));
+    }
+  }
+
+  return Status::OK();
+}
+
+void CompactionPicker::RegisterCompaction(Compaction* c) {
+  if (c == nullptr) {
+    return;
+  }
+  assert(ioptions_.compaction_style != kCompactionStyleLevel ||
+         c->output_level() == 0 ||
+         !FilesRangeOverlapWithCompaction(*c->inputs(), c->output_level(),
+                                          c->GetPenultimateLevel()));
+  // CompactionReason::kExternalSstIngestion's start level is just a placeholder
+  // number without actual meaning as file ingestion technically does not have
+  // an input level like other compactions
+  if ((c->start_level() == 0 &&
+       c->compaction_reason() != CompactionReason::kExternalSstIngestion) ||
+      ioptions_.compaction_style == kCompactionStyleUniversal) {
+    level0_compactions_in_progress_.insert(c);
+  }
+  compactions_in_progress_.insert(c);
+  TEST_SYNC_POINT_CALLBACK("CompactionPicker::RegisterCompaction:Registered",
+                           c);
+}
+
+void CompactionPicker::UnregisterCompaction(Compaction* c) {
+  if (c == nullptr) {
+    return;
+  }
+  if (c->start_level() == 0 ||
+      ioptions_.compaction_style == kCompactionStyleUniversal) {
+    level0_compactions_in_progress_.erase(c);
+  }
+  compactions_in_progress_.erase(c);
+}
+
+void CompactionPicker::PickFilesMarkedForCompaction(
+    const std::string& cf_name, VersionStorageInfo* vstorage, int* start_level,
+    int* output_level, CompactionInputFiles* start_level_inputs) {
+  if (vstorage->FilesMarkedForCompaction().empty()) {
+    return;
+  }
+
+  auto continuation = [&, cf_name](std::pair<int, FileMetaData*> level_file) {
+    // If it's being compacted it has nothing to do here.
+    // If this assert() fails that means that some function marked some
+    // files as being_compacted, but didn't call ComputeCompactionScore()
+    assert(!level_file.second->being_compacted);
+    *start_level = level_file.first;
+    *output_level =
+        (*start_level == 0) ? vstorage->base_level() : *start_level + 1;
+
+    if (*start_level == 0 && !level0_compactions_in_progress()->empty()) {
+      return false;
+    }
+
+    start_level_inputs->files = {level_file.second};
+    start_level_inputs->level = *start_level;
+    return ExpandInputsToCleanCut(cf_name, vstorage, start_level_inputs);
+  };
+
+  // take a chance on a random file first
+  Random64 rnd(/* seed */ reinterpret_cast<uint64_t>(vstorage));
+  size_t random_file_index = static_cast<size_t>(rnd.Uniform(
+      static_cast<uint64_t>(vstorage->FilesMarkedForCompaction().size())));
+  TEST_SYNC_POINT_CALLBACK("CompactionPicker::PickFilesMarkedForCompaction",
+                           &random_file_index);
+
+  if (continuation(vstorage->FilesMarkedForCompaction()[random_file_index])) {
+    // found the compaction!
+    return;
+  }
+
+  for (auto& level_file : vstorage->FilesMarkedForCompaction()) {
+    if (continuation(level_file)) {
+      // found the compaction!
+      return;
+    }
+  }
+  start_level_inputs->files.clear();
+}
+
+bool CompactionPicker::GetOverlappingL0Files(
+    VersionStorageInfo* vstorage, CompactionInputFiles* start_level_inputs,
+    int output_level, int* parent_index) {
+  // Two level 0 compaction won't run at the same time, so don't need to worry
+  // about files on level 0 being compacted.
+  assert(level0_compactions_in_progress()->empty());
+  InternalKey smallest, largest;
+  GetRange(*start_level_inputs, &smallest, &largest);
+  // Note that the next call will discard the file we placed in
+  // c->inputs_[0] earlier and replace it with an overlapping set
+  // which will include the picked file.
+  start_level_inputs->files.clear();
+  vstorage->GetOverlappingInputs(0, &smallest, &largest,
+                                 &(start_level_inputs->files));
+
+  // If we include more L0 files in the same compaction run it can
+  // cause the 'smallest' and 'largest' key to get extended to a
+  // larger range. So, re-invoke GetRange to get the new key range
+  GetRange(*start_level_inputs, &smallest, &largest);
+  if (IsRangeInCompaction(vstorage, &smallest, &largest, output_level,
+                          parent_index)) {
+    return false;
+  }
+  assert(!start_level_inputs->files.empty());
+
+  return true;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker.h
new file mode 100644
index 0000000000..0556e99275
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker.h
@@ -0,0 +1,318 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <memory>
+#include <set>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "db/compaction/compaction.h"
+#include "db/version_set.h"
+#include "options/cf_options.h"
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// The file contains an abstract class CompactionPicker, and its two
+// sub-classes LevelCompactionPicker and NullCompactionPicker, as
+// well as some helper functions used by them.
+
+class LogBuffer;
+class Compaction;
+class VersionStorageInfo;
+struct CompactionInputFiles;
+
+// An abstract class to pick compactions from an existing LSM-tree.
+//
+// Each compaction style inherits the class and implement the
+// interface to form automatic compactions. If NeedCompaction() is true,
+// then call PickCompaction() to find what files need to be compacted
+// and where to put the output files.
+//
+// Non-virtual functions CompactRange() and CompactFiles() are used to
+// pick files to compact based on users' DB::CompactRange() and
+// DB::CompactFiles() requests, respectively. There is little
+// compaction style specific logic for them.
+class CompactionPicker {
+ public:
+  CompactionPicker(const ImmutableOptions& ioptions,
+                   const InternalKeyComparator* icmp);
+  virtual ~CompactionPicker();
+
+  // Pick level and inputs for a new compaction.
+  //
+  // Returns nullptr if there is no compaction to be done.
+  // Otherwise returns a pointer to a heap-allocated object that
+  // describes the compaction.  Caller should delete the result.
+  virtual Compaction* PickCompaction(const std::string& cf_name,
+                                     const MutableCFOptions& mutable_cf_options,
+                                     const MutableDBOptions& mutable_db_options,
+                                     VersionStorageInfo* vstorage,
+                                     LogBuffer* log_buffer) = 0;
+
+  // Return a compaction object for compacting the range [begin,end] in
+  // the specified level.  Returns nullptr if there is nothing in that
+  // level that overlaps the specified range.  Caller should delete
+  // the result.
+  //
+  // The returned Compaction might not include the whole requested range.
+  // In that case, compaction_end will be set to the next key that needs
+  // compacting. In case the compaction will compact the whole range,
+  // compaction_end will be set to nullptr.
+  // Client is responsible for compaction_end storage -- when called,
+  // *compaction_end should point to valid InternalKey!
+  virtual Compaction* CompactRange(
+      const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
+      const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
+      int input_level, int output_level,
+      const CompactRangeOptions& compact_range_options,
+      const InternalKey* begin, const InternalKey* end,
+      InternalKey** compaction_end, bool* manual_conflict,
+      uint64_t max_file_num_to_ignore, const std::string& trim_ts);
+
+  // The maximum allowed output level.  Default value is NumberLevels() - 1.
+  virtual int MaxOutputLevel() const { return NumberLevels() - 1; }
+
+  virtual bool NeedsCompaction(const VersionStorageInfo* vstorage) const = 0;
+
+// Sanitize the input set of compaction input files.
+// When the input parameters do not describe a valid compaction, the
+// function will try to fix the input_files by adding necessary
+// files.  If it's not possible to conver an invalid input_files
+// into a valid one by adding more files, the function will return a
+// non-ok status with specific reason.
+//
+  Status SanitizeCompactionInputFiles(std::unordered_set<uint64_t>* input_files,
+                                      const ColumnFamilyMetaData& cf_meta,
+                                      const int output_level) const;
+
+  // Free up the files that participated in a compaction
+  //
+  // Requirement: DB mutex held
+  void ReleaseCompactionFiles(Compaction* c, Status status);
+
+  // Returns true if any one of the specified files are being compacted
+  bool AreFilesInCompaction(const std::vector<FileMetaData*>& files);
+
+  // Takes a list of CompactionInputFiles and returns a (manual) Compaction
+  // object.
+  //
+  // Caller must provide a set of input files that has been passed through
+  // `SanitizeCompactionInputFiles` earlier. The lock should not be released
+  // between that call and this one.
+  Compaction* CompactFiles(const CompactionOptions& compact_options,
+                           const std::vector<CompactionInputFiles>& input_files,
+                           int output_level, VersionStorageInfo* vstorage,
+                           const MutableCFOptions& mutable_cf_options,
+                           const MutableDBOptions& mutable_db_options,
+                           uint32_t output_path_id);
+
+  // Converts a set of compaction input file numbers into
+  // a list of CompactionInputFiles.
+  Status GetCompactionInputsFromFileNumbers(
+      std::vector<CompactionInputFiles>* input_files,
+      std::unordered_set<uint64_t>* input_set,
+      const VersionStorageInfo* vstorage,
+      const CompactionOptions& compact_options) const;
+
+  // Is there currently a compaction involving level 0 taking place
+  bool IsLevel0CompactionInProgress() const {
+    return !level0_compactions_in_progress_.empty();
+  }
+
+  // Return true if the passed key range overlap with a compaction output
+  // that is currently running.
+  bool RangeOverlapWithCompaction(const Slice& smallest_user_key,
+                                  const Slice& largest_user_key,
+                                  int level) const;
+
+  // Stores the minimal range that covers all entries in inputs in
+  // *smallest, *largest.
+  // REQUIRES: inputs is not empty
+  void GetRange(const CompactionInputFiles& inputs, InternalKey* smallest,
+                InternalKey* largest) const;
+
+  // Stores the minimal range that covers all entries in inputs1 and inputs2
+  // in *smallest, *largest.
+  // REQUIRES: inputs is not empty
+  void GetRange(const CompactionInputFiles& inputs1,
+                const CompactionInputFiles& inputs2, InternalKey* smallest,
+                InternalKey* largest) const;
+
+  // Stores the minimal range that covers all entries in inputs
+  // in *smallest, *largest.
+  // REQUIRES: inputs is not empty (at least on entry have one file)
+  void GetRange(const std::vector<CompactionInputFiles>& inputs,
+                InternalKey* smallest, InternalKey* largest,
+                int exclude_level) const;
+
+  int NumberLevels() const { return ioptions_.num_levels; }
+
+  // Add more files to the inputs on "level" to make sure that
+  // no newer version of a key is compacted to "level+1" while leaving an older
+  // version in a "level". Otherwise, any Get() will search "level" first,
+  // and will likely return an old/stale value for the key, since it always
+  // searches in increasing order of level to find the value. This could
+  // also scramble the order of merge operands. This function should be
+  // called any time a new Compaction is created, and its inputs_[0] are
+  // populated.
+  //
+  // Will return false if it is impossible to apply this compaction.
+  bool ExpandInputsToCleanCut(const std::string& cf_name,
+                              VersionStorageInfo* vstorage,
+                              CompactionInputFiles* inputs,
+                              InternalKey** next_smallest = nullptr);
+
+  // Returns true if any one of the parent files are being compacted
+  bool IsRangeInCompaction(VersionStorageInfo* vstorage,
+                           const InternalKey* smallest,
+                           const InternalKey* largest, int level, int* index);
+
+  // Returns true if the key range that `inputs` files cover overlap with the
+  // key range of a currently running compaction.
+  bool FilesRangeOverlapWithCompaction(
+      const std::vector<CompactionInputFiles>& inputs, int level,
+      int penultimate_level) const;
+
+  bool SetupOtherInputs(const std::string& cf_name,
+                        const MutableCFOptions& mutable_cf_options,
+                        VersionStorageInfo* vstorage,
+                        CompactionInputFiles* inputs,
+                        CompactionInputFiles* output_level_inputs,
+                        int* parent_index, int base_index,
+                        bool only_expand_towards_right = false);
+
+  void GetGrandparents(VersionStorageInfo* vstorage,
+                       const CompactionInputFiles& inputs,
+                       const CompactionInputFiles& output_level_inputs,
+                       std::vector<FileMetaData*>* grandparents);
+
+  void PickFilesMarkedForCompaction(const std::string& cf_name,
+                                    VersionStorageInfo* vstorage,
+                                    int* start_level, int* output_level,
+                                    CompactionInputFiles* start_level_inputs);
+
+  bool GetOverlappingL0Files(VersionStorageInfo* vstorage,
+                             CompactionInputFiles* start_level_inputs,
+                             int output_level, int* parent_index);
+
+  // Register this compaction in the set of running compactions
+  void RegisterCompaction(Compaction* c);
+
+  // Remove this compaction from the set of running compactions
+  void UnregisterCompaction(Compaction* c);
+
+  std::set<Compaction*>* level0_compactions_in_progress() {
+    return &level0_compactions_in_progress_;
+  }
+  std::unordered_set<Compaction*>* compactions_in_progress() {
+    return &compactions_in_progress_;
+  }
+
+  const InternalKeyComparator* icmp() const { return icmp_; }
+
+ protected:
+  const ImmutableOptions& ioptions_;
+
+// A helper function to SanitizeCompactionInputFiles() that
+// sanitizes "input_files" by adding necessary files.
+  virtual Status SanitizeCompactionInputFilesForAllLevels(
+      std::unordered_set<uint64_t>* input_files,
+      const ColumnFamilyMetaData& cf_meta, const int output_level) const;
+
+  // Keeps track of all compactions that are running on Level0.
+  // Protected by DB mutex
+  std::set<Compaction*> level0_compactions_in_progress_;
+
+  // Keeps track of all compactions that are running.
+  // Protected by DB mutex
+  std::unordered_set<Compaction*> compactions_in_progress_;
+
+  const InternalKeyComparator* const icmp_;
+};
+
+// A dummy compaction that never triggers any automatic
+// compaction.
+class NullCompactionPicker : public CompactionPicker {
+ public:
+  NullCompactionPicker(const ImmutableOptions& ioptions,
+                       const InternalKeyComparator* icmp)
+      : CompactionPicker(ioptions, icmp) {}
+  virtual ~NullCompactionPicker() {}
+
+  // Always return "nullptr"
+  Compaction* PickCompaction(const std::string& /*cf_name*/,
+                             const MutableCFOptions& /*mutable_cf_options*/,
+                             const MutableDBOptions& /*mutable_db_options*/,
+                             VersionStorageInfo* /*vstorage*/,
+                             LogBuffer* /* log_buffer */) override {
+    return nullptr;
+  }
+
+  // Always return "nullptr"
+  Compaction* CompactRange(const std::string& /*cf_name*/,
+                           const MutableCFOptions& /*mutable_cf_options*/,
+                           const MutableDBOptions& /*mutable_db_options*/,
+                           VersionStorageInfo* /*vstorage*/,
+                           int /*input_level*/, int /*output_level*/,
+                           const CompactRangeOptions& /*compact_range_options*/,
+                           const InternalKey* /*begin*/,
+                           const InternalKey* /*end*/,
+                           InternalKey** /*compaction_end*/,
+                           bool* /*manual_conflict*/,
+                           uint64_t /*max_file_num_to_ignore*/,
+                           const std::string& /*trim_ts*/) override {
+    return nullptr;
+  }
+
+  // Always returns false.
+  virtual bool NeedsCompaction(
+      const VersionStorageInfo* /*vstorage*/) const override {
+    return false;
+  }
+};
+
+// Attempts to find an intra L0 compaction conforming to the given parameters.
+//
+// @param level_files                     Metadata for L0 files.
+// @param min_files_to_compact            Minimum number of files required to
+//                                        do the compaction.
+// @param max_compact_bytes_per_del_file  Maximum average size in bytes per
+//                                        file that is going to get deleted by
+//                                        the compaction.
+// @param max_compaction_bytes            Maximum total size in bytes (in terms
+//                                        of compensated file size) for files
+//                                        to be compacted.
+// @param [out] comp_inputs               If a compaction was found, will be
+//                                        initialized with corresponding input
+//                                        files. Cannot be nullptr.
+//
+// @return                                true iff compaction was found.
+bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files,
+                           size_t min_files_to_compact,
+                           uint64_t max_compact_bytes_per_del_file,
+                           uint64_t max_compaction_bytes,
+                           CompactionInputFiles* comp_inputs);
+
+CompressionType GetCompressionType(const VersionStorageInfo* vstorage,
+                                   const MutableCFOptions& mutable_cf_options,
+                                   int level, int base_level,
+                                   const bool enable_compression = true);
+
+CompressionOptions GetCompressionOptions(
+    const MutableCFOptions& mutable_cf_options,
+    const VersionStorageInfo* vstorage, int level,
+    const bool enable_compression = true);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_fifo.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_fifo.cc
new file mode 100644
index 0000000000..9aa24302e2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_fifo.cc
@@ -0,0 +1,471 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/compaction/compaction_picker_fifo.h"
+
+#include <cinttypes>
+#include <string>
+#include <vector>
+
+#include "db/column_family.h"
+#include "logging/log_buffer.h"
+#include "logging/logging.h"
+#include "options/options_helper.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+uint64_t GetTotalFilesSize(const std::vector<FileMetaData*>& files) {
+  uint64_t total_size = 0;
+  for (const auto& f : files) {
+    total_size += f->fd.file_size;
+  }
+  return total_size;
+}
+}  // anonymous namespace
+
+bool FIFOCompactionPicker::NeedsCompaction(
+    const VersionStorageInfo* vstorage) const {
+  const int kLevel0 = 0;
+  return vstorage->CompactionScore(kLevel0) >= 1;
+}
+
+Compaction* FIFOCompactionPicker::PickTTLCompaction(
+    const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
+    const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
+    LogBuffer* log_buffer) {
+  assert(mutable_cf_options.ttl > 0);
+
+  const int kLevel0 = 0;
+  const std::vector<FileMetaData*>& level_files = vstorage->LevelFiles(kLevel0);
+  uint64_t total_size = GetTotalFilesSize(level_files);
+
+  int64_t _current_time;
+  auto status = ioptions_.clock->GetCurrentTime(&_current_time);
+  if (!status.ok()) {
+    ROCKS_LOG_BUFFER(log_buffer,
+                     "[%s] FIFO compaction: Couldn't get current time: %s. "
+                     "Not doing compactions based on TTL. ",
+                     cf_name.c_str(), status.ToString().c_str());
+    return nullptr;
+  }
+  const uint64_t current_time = static_cast<uint64_t>(_current_time);
+
+  if (!level0_compactions_in_progress_.empty()) {
+    ROCKS_LOG_BUFFER(
+        log_buffer,
+        "[%s] FIFO compaction: Already executing compaction. No need "
+        "to run parallel compactions since compactions are very fast",
+        cf_name.c_str());
+    return nullptr;
+  }
+
+  std::vector<CompactionInputFiles> inputs;
+  inputs.emplace_back();
+  inputs[0].level = 0;
+
+  // avoid underflow
+  if (current_time > mutable_cf_options.ttl) {
+    for (auto ritr = level_files.rbegin(); ritr != level_files.rend(); ++ritr) {
+      FileMetaData* f = *ritr;
+      assert(f);
+      if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) {
+        uint64_t creation_time =
+            f->fd.table_reader->GetTableProperties()->creation_time;
+        if (creation_time == 0 ||
+            creation_time >= (current_time - mutable_cf_options.ttl)) {
+          break;
+        }
+      }
+      total_size -= f->fd.file_size;
+      inputs[0].files.push_back(f);
+    }
+  }
+
+  // Return a nullptr and proceed to size-based FIFO compaction if:
+  // 1. there are no files older than ttl OR
+  // 2. there are a few files older than ttl, but deleting them will not bring
+  //    the total size to be less than max_table_files_size threshold.
+  if (inputs[0].files.empty() ||
+      total_size >
+          mutable_cf_options.compaction_options_fifo.max_table_files_size) {
+    return nullptr;
+  }
+
+  for (const auto& f : inputs[0].files) {
+    uint64_t creation_time = 0;
+    assert(f);
+    if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) {
+      creation_time = f->fd.table_reader->GetTableProperties()->creation_time;
+    }
+    ROCKS_LOG_BUFFER(log_buffer,
+                     "[%s] FIFO compaction: picking file %" PRIu64
+                     " with creation time %" PRIu64 " for deletion",
+                     cf_name.c_str(), f->fd.GetNumber(), creation_time);
+  }
+
+  Compaction* c = new Compaction(
+      vstorage, ioptions_, mutable_cf_options, mutable_db_options,
+      std::move(inputs), 0, 0, 0, 0, kNoCompression,
+      mutable_cf_options.compression_opts, Temperature::kUnknown,
+      /* max_subcompactions */ 0, {}, /* is manual */ false,
+      /* trim_ts */ "", vstorage->CompactionScore(0),
+      /* is deletion compaction */ true, /* l0_files_might_overlap */ true,
+      CompactionReason::kFIFOTtl);
+  return c;
+}
+
+// The size-based compaction picker for FIFO.
+//
+// When the entire column family size exceeds max_table_files_size, FIFO will
+// try to delete the oldest sst file(s) until the resulting column family size
+// is smaller than max_table_files_size.
+//
+// This function also takes care the case where a DB is migrating from level /
+// universal compaction to FIFO compaction.  During the migration, the column
+// family will also have non-L0 files while FIFO can only create L0 files.
+// In this case, this function will first purge the sst files in the bottom-
+// most non-empty level first, and the DB will eventually converge to the
+// regular FIFO case where there're only L0 files.  Note that during the
+// migration case, the purge order will only be an approximation of "FIFO"
+// as entries inside lower-level files might sometimes be newer than some
+// entries inside upper-level files.
+Compaction* FIFOCompactionPicker::PickSizeCompaction(
+    const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
+    const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
+    LogBuffer* log_buffer) {
+  // compute the total size and identify the last non-empty level
+  int last_level = 0;
+  uint64_t total_size = 0;
+  for (int level = 0; level < vstorage->num_levels(); ++level) {
+    auto level_size = GetTotalFilesSize(vstorage->LevelFiles(level));
+    total_size += level_size;
+    if (level_size > 0) {
+      last_level = level;
+    }
+  }
+  const std::vector<FileMetaData*>& last_level_files =
+      vstorage->LevelFiles(last_level);
+
+  if (last_level == 0 &&
+      total_size <=
+          mutable_cf_options.compaction_options_fifo.max_table_files_size) {
+    // total size not exceeded, try to find intra level 0 compaction if enabled
+    const std::vector<FileMetaData*>& level0_files = vstorage->LevelFiles(0);
+    if (mutable_cf_options.compaction_options_fifo.allow_compaction &&
+        level0_files.size() > 0) {
+      CompactionInputFiles comp_inputs;
+      // try to prevent same files from being compacted multiple times, which
+      // could produce large files that may never TTL-expire. Achieve this by
+      // disallowing compactions with files larger than memtable (inflate its
+      // size by 10% to account for uncompressed L0 files that may have size
+      // slightly greater than memtable size limit).
+      size_t max_compact_bytes_per_del_file =
+          static_cast<size_t>(MultiplyCheckOverflow(
+              static_cast<uint64_t>(mutable_cf_options.write_buffer_size),
+              1.1));
+      if (FindIntraL0Compaction(
+              level0_files,
+              mutable_cf_options
+                  .level0_file_num_compaction_trigger /* min_files_to_compact */
+              ,
+              max_compact_bytes_per_del_file,
+              mutable_cf_options.max_compaction_bytes, &comp_inputs)) {
+        Compaction* c = new Compaction(
+            vstorage, ioptions_, mutable_cf_options, mutable_db_options,
+            {comp_inputs}, 0, 16 * 1024 * 1024 /* output file size limit */,
+            0 /* max compaction bytes, not applicable */,
+            0 /* output path ID */, mutable_cf_options.compression,
+            mutable_cf_options.compression_opts, Temperature::kUnknown,
+            0 /* max_subcompactions */, {}, /* is manual */ false,
+            /* trim_ts */ "", vstorage->CompactionScore(0),
+            /* is deletion compaction */ false,
+            /* l0_files_might_overlap */ true,
+            CompactionReason::kFIFOReduceNumFiles);
+        return c;
+      }
+    }
+
+    ROCKS_LOG_BUFFER(
+        log_buffer,
+        "[%s] FIFO compaction: nothing to do. Total size %" PRIu64
+        ", max size %" PRIu64 "\n",
+        cf_name.c_str(), total_size,
+        mutable_cf_options.compaction_options_fifo.max_table_files_size);
+    return nullptr;
+  }
+
+  if (!level0_compactions_in_progress_.empty()) {
+    ROCKS_LOG_BUFFER(
+        log_buffer,
+        "[%s] FIFO compaction: Already executing compaction. No need "
+        "to run parallel compactions since compactions are very fast",
+        cf_name.c_str());
+    return nullptr;
+  }
+
+  std::vector<CompactionInputFiles> inputs;
+  inputs.emplace_back();
+  inputs[0].level = last_level;
+
+  if (last_level == 0) {
+    // In L0, right-most files are the oldest files.
+    for (auto ritr = last_level_files.rbegin(); ritr != last_level_files.rend();
+         ++ritr) {
+      auto f = *ritr;
+      total_size -= f->fd.file_size;
+      inputs[0].files.push_back(f);
+      char tmp_fsize[16];
+      AppendHumanBytes(f->fd.GetFileSize(), tmp_fsize, sizeof(tmp_fsize));
+      ROCKS_LOG_BUFFER(log_buffer,
+                       "[%s] FIFO compaction: picking file %" PRIu64
+                       " with size %s for deletion",
+                       cf_name.c_str(), f->fd.GetNumber(), tmp_fsize);
+      if (total_size <=
+          mutable_cf_options.compaction_options_fifo.max_table_files_size) {
+        break;
+      }
+    }
+  } else if (total_size >
+             mutable_cf_options.compaction_options_fifo.max_table_files_size) {
+    // If the last level is non-L0, we actually don't know which file is
+    // logically the oldest since the file creation time only represents
+    // when this file was compacted to this level, which is independent
+    // to when the entries in this file were first inserted.
+    //
+    // As a result, we delete files from the left instead.  This means the sst
+    // file with the smallest key will be deleted first.  This design decision
+    // better serves a major type of FIFO use cases where smaller keys are
+    // associated with older data.
+    for (const auto& f : last_level_files) {
+      total_size -= f->fd.file_size;
+      inputs[0].files.push_back(f);
+      char tmp_fsize[16];
+      AppendHumanBytes(f->fd.GetFileSize(), tmp_fsize, sizeof(tmp_fsize));
+      ROCKS_LOG_BUFFER(
+          log_buffer,
+          "[%s] FIFO compaction: picking file %" PRIu64
+          " with size %s for deletion under total size %" PRIu64
+          " vs max table files size %" PRIu64,
+          cf_name.c_str(), f->fd.GetNumber(), tmp_fsize, total_size,
+          mutable_cf_options.compaction_options_fifo.max_table_files_size);
+
+      if (total_size <=
+          mutable_cf_options.compaction_options_fifo.max_table_files_size) {
+        break;
+      }
+    }
+  } else {
+    ROCKS_LOG_BUFFER(
+        log_buffer,
+        "[%s] FIFO compaction: nothing to do. Total size %" PRIu64
+        ", max size %" PRIu64 "\n",
+        cf_name.c_str(), total_size,
+        mutable_cf_options.compaction_options_fifo.max_table_files_size);
+    return nullptr;
+  }
+
+  Compaction* c = new Compaction(
+      vstorage, ioptions_, mutable_cf_options, mutable_db_options,
+      std::move(inputs), last_level,
+      /* target_file_size */ 0,
+      /* max_compaction_bytes */ 0,
+      /* output_path_id */ 0, kNoCompression,
+      mutable_cf_options.compression_opts, Temperature::kUnknown,
+      /* max_subcompactions */ 0, {}, /* is manual */ false,
+      /* trim_ts */ "", vstorage->CompactionScore(0),
+      /* is deletion compaction */ true,
+      /* l0_files_might_overlap */ true, CompactionReason::kFIFOMaxSize);
+  return c;
+}
+
+Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction(
+    const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
+    const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
+    LogBuffer* log_buffer) {
+  const std::vector<FileTemperatureAge>& ages =
+      mutable_cf_options.compaction_options_fifo
+          .file_temperature_age_thresholds;
+  if (ages.empty()) {
+    return nullptr;
+  }
+
+  // Does not apply to multi-level FIFO.
+  if (vstorage->num_levels() > 1) {
+    return nullptr;
+  }
+
+  const int kLevel0 = 0;
+  const std::vector<FileMetaData*>& level_files = vstorage->LevelFiles(kLevel0);
+  if (level_files.empty()) {
+    return nullptr;
+  }
+
+  int64_t _current_time;
+  auto status = ioptions_.clock->GetCurrentTime(&_current_time);
+  if (!status.ok()) {
+    ROCKS_LOG_BUFFER(
+        log_buffer,
+        "[%s] FIFO compaction: Couldn't get current time: %s. "
+        "Not doing compactions based on file temperature-age threshold. ",
+        cf_name.c_str(), status.ToString().c_str());
+    return nullptr;
+  }
+  const uint64_t current_time = static_cast<uint64_t>(_current_time);
+
+  if (!level0_compactions_in_progress_.empty()) {
+    ROCKS_LOG_BUFFER(
+        log_buffer,
+        "[%s] FIFO compaction: Already executing compaction. Parallel "
+        "compactions are not supported",
+        cf_name.c_str());
+    return nullptr;
+  }
+
+  std::vector<CompactionInputFiles> inputs;
+  inputs.emplace_back();
+  inputs[0].level = 0;
+
+  // avoid underflow
+  uint64_t min_age = ages[0].age;
+  // kLastTemperature means target temperature is to be determined.
+  Temperature compaction_target_temp = Temperature::kLastTemperature;
+  if (current_time > min_age) {
+    uint64_t create_time_threshold = current_time - min_age;
+    uint64_t compaction_size = 0;
+    // We will ideally identify a file qualifying for temperature change by
+    // knowing the timestamp for the youngest entry in the file. However, right
+    // now we don't have the information. We infer it by looking at timestamp of
+    // the previous file's (which is just younger) oldest entry's timestamp.
+    Temperature cur_target_temp;
+    // avoid index underflow
+    assert(level_files.size() >= 1);
+    for (size_t index = level_files.size() - 1; index >= 1; --index) {
+      // Try to add cur_file to compaction inputs.
+      FileMetaData* cur_file = level_files[index];
+      // prev_file is just younger than cur_file
+      FileMetaData* prev_file = level_files[index - 1];
+      if (cur_file->being_compacted) {
+        // Should not happen since we check for
+        // `level0_compactions_in_progress_` above. Here we simply just don't
+        // schedule anything.
+        return nullptr;
+      }
+      uint64_t oldest_ancestor_time = prev_file->TryGetOldestAncesterTime();
+      if (oldest_ancestor_time == kUnknownOldestAncesterTime) {
+        // Older files might not have enough information. It is possible to
+        // handle these files by looking at newer files, but maintaining the
+        // logic isn't worth it.
+        break;
+      }
+      if (oldest_ancestor_time > create_time_threshold) {
+        // cur_file is too fresh
+        break;
+      }
+      cur_target_temp = ages[0].temperature;
+      for (size_t i = 1; i < ages.size(); ++i) {
+        if (current_time >= ages[i].age &&
+            oldest_ancestor_time <= current_time - ages[i].age) {
+          cur_target_temp = ages[i].temperature;
+        }
+      }
+      if (cur_file->temperature == cur_target_temp) {
+        if (inputs[0].empty()) {
+          continue;
+        } else {
+          break;
+        }
+      }
+
+      // cur_file needs to change temperature
+      if (compaction_target_temp == Temperature::kLastTemperature) {
+        assert(inputs[0].empty());
+        compaction_target_temp = cur_target_temp;
+      } else if (cur_target_temp != compaction_target_temp) {
+        assert(!inputs[0].empty());
+        break;
+      }
+      if (inputs[0].empty() || compaction_size + cur_file->fd.GetFileSize() <=
+                                   mutable_cf_options.max_compaction_bytes) {
+        inputs[0].files.push_back(cur_file);
+        compaction_size += cur_file->fd.GetFileSize();
+        ROCKS_LOG_BUFFER(
+            log_buffer,
+            "[%s] FIFO compaction: picking file %" PRIu64
+            " with next file's oldest time %" PRIu64 " for temperature %s.",
+            cf_name.c_str(), cur_file->fd.GetNumber(), oldest_ancestor_time,
+            temperature_to_string[cur_target_temp].c_str());
+      }
+      if (compaction_size > mutable_cf_options.max_compaction_bytes) {
+        break;
+      }
+    }
+  }
+
+  if (inputs[0].files.empty()) {
+    return nullptr;
+  }
+
+  Compaction* c = new Compaction(
+      vstorage, ioptions_, mutable_cf_options, mutable_db_options,
+      std::move(inputs), 0, 0 /* output file size limit */,
+      0 /* max compaction bytes, not applicable */, 0 /* output path ID */,
+      mutable_cf_options.compression, mutable_cf_options.compression_opts,
+      compaction_target_temp,
+      /* max_subcompactions */ 0, {}, /* is manual */ false, /* trim_ts */ "",
+      vstorage->CompactionScore(0),
+      /* is deletion compaction */ false, /* l0_files_might_overlap */ true,
+      CompactionReason::kChangeTemperature);
+  return c;
+}
+
+Compaction* FIFOCompactionPicker::PickCompaction(
+    const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
+    const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
+    LogBuffer* log_buffer) {
+  Compaction* c = nullptr;
+  if (mutable_cf_options.ttl > 0) {
+    c = PickTTLCompaction(cf_name, mutable_cf_options, mutable_db_options,
+                          vstorage, log_buffer);
+  }
+  if (c == nullptr) {
+    c = PickSizeCompaction(cf_name, mutable_cf_options, mutable_db_options,
+                           vstorage, log_buffer);
+  }
+  if (c == nullptr) {
+    c = PickTemperatureChangeCompaction(
+        cf_name, mutable_cf_options, mutable_db_options, vstorage, log_buffer);
+  }
+  RegisterCompaction(c);
+  return c;
+}
+
+Compaction* FIFOCompactionPicker::CompactRange(
+    const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
+    const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
+    int input_level, int output_level,
+    const CompactRangeOptions& /*compact_range_options*/,
+    const InternalKey* /*begin*/, const InternalKey* /*end*/,
+    InternalKey** compaction_end, bool* /*manual_conflict*/,
+    uint64_t /*max_file_num_to_ignore*/, const std::string& /*trim_ts*/) {
+#ifdef NDEBUG
+  (void)input_level;
+  (void)output_level;
+#endif
+  assert(input_level == 0);
+  assert(output_level == 0);
+  *compaction_end = nullptr;
+  LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, ioptions_.logger);
+  Compaction* c = PickCompaction(cf_name, mutable_cf_options,
+                                 mutable_db_options, vstorage, &log_buffer);
+  log_buffer.FlushBufferToLog();
+  return c;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_fifo.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_fifo.h
new file mode 100644
index 0000000000..df21a1bde0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_fifo.h
@@ -0,0 +1,60 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "db/compaction/compaction_picker.h"
+
+namespace ROCKSDB_NAMESPACE {
+class FIFOCompactionPicker : public CompactionPicker {
+ public:
+  FIFOCompactionPicker(const ImmutableOptions& ioptions,
+                       const InternalKeyComparator* icmp)
+      : CompactionPicker(ioptions, icmp) {}
+
+  virtual Compaction* PickCompaction(const std::string& cf_name,
+                                     const MutableCFOptions& mutable_cf_options,
+                                     const MutableDBOptions& mutable_db_options,
+                                     VersionStorageInfo* version,
+                                     LogBuffer* log_buffer) override;
+
+  virtual Compaction* CompactRange(
+      const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
+      const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
+      int input_level, int output_level,
+      const CompactRangeOptions& compact_range_options,
+      const InternalKey* begin, const InternalKey* end,
+      InternalKey** compaction_end, bool* manual_conflict,
+      uint64_t max_file_num_to_ignore, const std::string& trim_ts) override;
+
+  // The maximum allowed output level.  Always returns 0.
+  virtual int MaxOutputLevel() const override { return 0; }
+
+  virtual bool NeedsCompaction(
+      const VersionStorageInfo* vstorage) const override;
+
+ private:
+  Compaction* PickTTLCompaction(const std::string& cf_name,
+                                const MutableCFOptions& mutable_cf_options,
+                                const MutableDBOptions& mutable_db_options,
+                                VersionStorageInfo* version,
+                                LogBuffer* log_buffer);
+
+  Compaction* PickSizeCompaction(const std::string& cf_name,
+                                 const MutableCFOptions& mutable_cf_options,
+                                 const MutableDBOptions& mutable_db_options,
+                                 VersionStorageInfo* version,
+                                 LogBuffer* log_buffer);
+
+  Compaction* PickTemperatureChangeCompaction(
+      const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
+      const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
+      LogBuffer* log_buffer);
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_level.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_level.cc
new file mode 100644
index 0000000000..c436689bb6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_level.cc
@@ -0,0 +1,888 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/compaction/compaction_picker_level.h"
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "db/version_edit.h"
+#include "logging/log_buffer.h"
+#include "test_util/sync_point.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+bool LevelCompactionPicker::NeedsCompaction(
+    const VersionStorageInfo* vstorage) const {
+  if (!vstorage->ExpiredTtlFiles().empty()) {
+    return true;
+  }
+  if (!vstorage->FilesMarkedForPeriodicCompaction().empty()) {
+    return true;
+  }
+  if (!vstorage->BottommostFilesMarkedForCompaction().empty()) {
+    return true;
+  }
+  if (!vstorage->FilesMarkedForCompaction().empty()) {
+    return true;
+  }
+  if (!vstorage->FilesMarkedForForcedBlobGC().empty()) {
+    return true;
+  }
+  for (int i = 0; i <= vstorage->MaxInputLevel(); i++) {
+    if (vstorage->CompactionScore(i) >= 1) {
+      return true;
+    }
+  }
+  return false;
+}
+
+namespace {
+// A class to build a leveled compaction step-by-step.
+class LevelCompactionBuilder {
+ public:
+  LevelCompactionBuilder(const std::string& cf_name,
+                         VersionStorageInfo* vstorage,
+                         CompactionPicker* compaction_picker,
+                         LogBuffer* log_buffer,
+                         const MutableCFOptions& mutable_cf_options,
+                         const ImmutableOptions& ioptions,
+                         const MutableDBOptions& mutable_db_options)
+      : cf_name_(cf_name),
+        vstorage_(vstorage),
+        compaction_picker_(compaction_picker),
+        log_buffer_(log_buffer),
+        mutable_cf_options_(mutable_cf_options),
+        ioptions_(ioptions),
+        mutable_db_options_(mutable_db_options) {}
+
+  // Pick and return a compaction.
+  Compaction* PickCompaction();
+
+  // Pick the initial files to compact to the next level. (or together
+  // in Intra-L0 compactions)
+  void SetupInitialFiles();
+
+  // If the initial files are from L0 level, pick other L0
+  // files if needed.
+  bool SetupOtherL0FilesIfNeeded();
+
+  // Compaction with round-robin compaction priority allows more files to be
+  // picked to form a large compaction
+  void SetupOtherFilesWithRoundRobinExpansion();
+  // Based on initial files, setup other files need to be compacted
+  // in this compaction, accordingly.
+  bool SetupOtherInputsIfNeeded();
+
+  Compaction* GetCompaction();
+
+  // From `start_level_`, pick files to compact to `output_level_`.
+  // Returns false if there is no file to compact.
+  // If it returns true, inputs->files.size() will be exactly one for
+  // all compaction priorities except round-robin. For round-robin,
+  // multiple consecutive files may be put into inputs->files.
+  // If level is 0 and there is already a compaction on that level, this
+  // function will return false.
+  bool PickFileToCompact();
+
+  // Return true if a L0 trivial move is picked up.
+  bool TryPickL0TrivialMove();
+
+  // For L0->L0, picks the longest span of files that aren't currently
+  // undergoing compaction for which work-per-deleted-file decreases. The span
+  // always starts from the newest L0 file.
+  //
+  // Intra-L0 compaction is independent of all other files, so it can be
+  // performed even when L0->base_level compactions are blocked.
+  //
+  // Returns true if `inputs` is populated with a span of files to be compacted;
+  // otherwise, returns false.
+  bool PickIntraL0Compaction();
+
+  // Return true if TrivialMove is extended. `start_index` is the index of
+  // the initial file picked, which should already be in `start_level_inputs_`.
+  bool TryExtendNonL0TrivialMove(int start_index,
+                                 bool only_expand_right = false);
+
+  // Picks a file from level_files to compact.
+  // level_files is a vector of (level, file metadata) in ascending order of
+  // level. If compact_to_next_level is true, compact the file to the next
+  // level, otherwise, compact to the same level as the input file.
+  void PickFileToCompact(
+      const autovector<std::pair<int, FileMetaData*>>& level_files,
+      bool compact_to_next_level);
+
+  const std::string& cf_name_;
+  VersionStorageInfo* vstorage_;
+  CompactionPicker* compaction_picker_;
+  LogBuffer* log_buffer_;
+  int start_level_ = -1;
+  int output_level_ = -1;
+  int parent_index_ = -1;
+  int base_index_ = -1;
+  double start_level_score_ = 0;
+  bool is_manual_ = false;
+  bool is_l0_trivial_move_ = false;
+  CompactionInputFiles start_level_inputs_;
+  std::vector<CompactionInputFiles> compaction_inputs_;
+  CompactionInputFiles output_level_inputs_;
+  std::vector<FileMetaData*> grandparents_;
+  CompactionReason compaction_reason_ = CompactionReason::kUnknown;
+
+  const MutableCFOptions& mutable_cf_options_;
+  const ImmutableOptions& ioptions_;
+  const MutableDBOptions& mutable_db_options_;
+  // Pick a path ID to place a newly generated file, with its level
+  static uint32_t GetPathId(const ImmutableCFOptions& ioptions,
+                            const MutableCFOptions& mutable_cf_options,
+                            int level);
+
+  static const int kMinFilesForIntraL0Compaction = 4;
+};
+
+void LevelCompactionBuilder::PickFileToCompact(
+    const autovector<std::pair<int, FileMetaData*>>& level_files,
+    bool compact_to_next_level) {
+  for (auto& level_file : level_files) {
+    // If it's being compacted it has nothing to do here.
+    // If this assert() fails that means that some function marked some
+    // files as being_compacted, but didn't call ComputeCompactionScore()
+    assert(!level_file.second->being_compacted);
+    start_level_ = level_file.first;
+    if ((compact_to_next_level &&
+         start_level_ == vstorage_->num_non_empty_levels() - 1) ||
+        (start_level_ == 0 &&
+         !compaction_picker_->level0_compactions_in_progress()->empty())) {
+      continue;
+    }
+    if (compact_to_next_level) {
+      output_level_ =
+          (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1;
+    } else {
+      output_level_ = start_level_;
+    }
+    start_level_inputs_.files = {level_file.second};
+    start_level_inputs_.level = start_level_;
+    if (compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
+                                                   &start_level_inputs_)) {
+      return;
+    }
+  }
+  start_level_inputs_.files.clear();
+}
+
+void LevelCompactionBuilder::SetupInitialFiles() {
+  // Find the compactions by size on all levels.
+  bool skipped_l0_to_base = false;
+  for (int i = 0; i < compaction_picker_->NumberLevels() - 1; i++) {
+    start_level_score_ = vstorage_->CompactionScore(i);
+    start_level_ = vstorage_->CompactionScoreLevel(i);
+    assert(i == 0 || start_level_score_ <= vstorage_->CompactionScore(i - 1));
+    if (start_level_score_ >= 1) {
+      if (skipped_l0_to_base && start_level_ == vstorage_->base_level()) {
+        // If L0->base_level compaction is pending, don't schedule further
+        // compaction from base level. Otherwise L0->base_level compaction
+        // may starve.
+        continue;
+      }
+      output_level_ =
+          (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1;
+      bool picked_file_to_compact = PickFileToCompact();
+      TEST_SYNC_POINT_CALLBACK("PostPickFileToCompact",
+                               &picked_file_to_compact);
+      if (picked_file_to_compact) {
+        // found the compaction!
+        if (start_level_ == 0) {
+          // L0 score = `num L0 files` / `level0_file_num_compaction_trigger`
+          compaction_reason_ = CompactionReason::kLevelL0FilesNum;
+        } else {
+          // L1+ score = `Level files size` / `MaxBytesForLevel`
+          compaction_reason_ = CompactionReason::kLevelMaxLevelSize;
+        }
+        break;
+      } else {
+        // didn't find the compaction, clear the inputs
+        start_level_inputs_.clear();
+        if (start_level_ == 0) {
+          skipped_l0_to_base = true;
+          // L0->base_level may be blocked due to ongoing L0->base_level
+          // compactions. It may also be blocked by an ongoing compaction from
+          // base_level downwards.
+          //
+          // In these cases, to reduce L0 file count and thus reduce likelihood
+          // of write stalls, we can attempt compacting a span of files within
+          // L0.
+          if (PickIntraL0Compaction()) {
+            output_level_ = 0;
+            compaction_reason_ = CompactionReason::kLevelL0FilesNum;
+            break;
+          }
+        }
+      }
+    } else {
+      // Compaction scores are sorted in descending order, no further scores
+      // will be >= 1.
+      break;
+    }
+  }
+  if (!start_level_inputs_.empty()) {
+    return;
+  }
+
+  // if we didn't find a compaction, check if there are any files marked for
+  // compaction
+  parent_index_ = base_index_ = -1;
+
+  compaction_picker_->PickFilesMarkedForCompaction(
+      cf_name_, vstorage_, &start_level_, &output_level_, &start_level_inputs_);
+  if (!start_level_inputs_.empty()) {
+    compaction_reason_ = CompactionReason::kFilesMarkedForCompaction;
+    return;
+  }
+
+  // Bottommost Files Compaction on deleting tombstones
+  PickFileToCompact(vstorage_->BottommostFilesMarkedForCompaction(), false);
+  if (!start_level_inputs_.empty()) {
+    compaction_reason_ = CompactionReason::kBottommostFiles;
+    return;
+  }
+
+  // TTL Compaction
+  if (ioptions_.compaction_pri == kRoundRobin &&
+      !vstorage_->ExpiredTtlFiles().empty()) {
+    auto expired_files = vstorage_->ExpiredTtlFiles();
+    // the expired files list should already be sorted by level
+    start_level_ = expired_files.front().first;
+#ifndef NDEBUG
+    for (const auto& file : expired_files) {
+      assert(start_level_ <= file.first);
+    }
+#endif
+    if (start_level_ > 0) {
+      output_level_ = start_level_ + 1;
+      if (PickFileToCompact()) {
+        compaction_reason_ = CompactionReason::kRoundRobinTtl;
+        return;
+      }
+    }
+  }
+
+  PickFileToCompact(vstorage_->ExpiredTtlFiles(), true);
+  if (!start_level_inputs_.empty()) {
+    compaction_reason_ = CompactionReason::kTtl;
+    return;
+  }
+
+  // Periodic Compaction
+  PickFileToCompact(vstorage_->FilesMarkedForPeriodicCompaction(), false);
+  if (!start_level_inputs_.empty()) {
+    compaction_reason_ = CompactionReason::kPeriodicCompaction;
+    return;
+  }
+
+  // Forced blob garbage collection
+  PickFileToCompact(vstorage_->FilesMarkedForForcedBlobGC(), false);
+  if (!start_level_inputs_.empty()) {
+    compaction_reason_ = CompactionReason::kForcedBlobGC;
+    return;
+  }
+}
+
+bool LevelCompactionBuilder::SetupOtherL0FilesIfNeeded() {
+  if (start_level_ == 0 && output_level_ != 0 && !is_l0_trivial_move_) {
+    return compaction_picker_->GetOverlappingL0Files(
+        vstorage_, &start_level_inputs_, output_level_, &parent_index_);
+  }
+  return true;
+}
+
+void LevelCompactionBuilder::SetupOtherFilesWithRoundRobinExpansion() {
+  // We only expand when the start level is not L0 under round robin
+  assert(start_level_ >= 1);
+
+  // For round-robin compaction priority, we have 3 constraints when picking
+  // multiple files.
+  // Constraint 1: We can only pick consecutive files
+  //  -> Constraint 1a: When a file is being compacted (or some input files
+  //                    are being compacted after expanding, we cannot
+  //                    choose it and have to stop choosing more files
+  //  -> Constraint 1b: When we reach the last file (with largest keys), we
+  //                    cannot choose more files (the next file will be the
+  //                    first one)
+  // Constraint 2: We should ensure the total compaction bytes (including the
+  //               overlapped files from the next level) is no more than
+  //               mutable_cf_options_.max_compaction_bytes
+  // Constraint 3: We try our best to pick as many files as possible so that
+  //               the post-compaction level size is less than
+  //               MaxBytesForLevel(start_level_)
+  // Constraint 4: We do not expand if it is possible to apply a trivial move
+  // Constraint 5 (TODO): Try to pick minimal files to split into the target
+  //               number of subcompactions
+  TEST_SYNC_POINT("LevelCompactionPicker::RoundRobin");
+
+  // Only expand the inputs when we have selected a file in start_level_inputs_
+  if (start_level_inputs_.size() == 0) return;
+
+  uint64_t start_lvl_bytes_no_compacting = 0;
+  uint64_t curr_bytes_to_compact = 0;
+  uint64_t start_lvl_max_bytes_to_compact = 0;
+  const std::vector<FileMetaData*>& level_files =
+      vstorage_->LevelFiles(start_level_);
+  // Constraint 3 (pre-calculate the ideal max bytes to compact)
+  for (auto f : level_files) {
+    if (!f->being_compacted) {
+      start_lvl_bytes_no_compacting += f->fd.GetFileSize();
+    }
+  }
+  if (start_lvl_bytes_no_compacting >
+      vstorage_->MaxBytesForLevel(start_level_)) {
+    start_lvl_max_bytes_to_compact = start_lvl_bytes_no_compacting -
+                                     vstorage_->MaxBytesForLevel(start_level_);
+  }
+
+  size_t start_index = vstorage_->FilesByCompactionPri(start_level_)[0];
+  InternalKey smallest, largest;
+  // Constraint 4 (No need to check again later)
+  compaction_picker_->GetRange(start_level_inputs_, &smallest, &largest);
+  CompactionInputFiles output_level_inputs;
+  output_level_inputs.level = output_level_;
+  vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest,
+                                  &output_level_inputs.files);
+  if (output_level_inputs.empty()) {
+    if (TryExtendNonL0TrivialMove((int)start_index,
+                                  true /* only_expand_right */)) {
+      return;
+    }
+  }
+  // Constraint 3
+  if (start_level_inputs_[0]->fd.GetFileSize() >=
+      start_lvl_max_bytes_to_compact) {
+    return;
+  }
+  CompactionInputFiles tmp_start_level_inputs;
+  tmp_start_level_inputs = start_level_inputs_;
+  // TODO (zichen): Future parallel round-robin may also need to update this
+  // Constraint 1b (only expand till the end)
+  for (size_t i = start_index + 1; i < level_files.size(); i++) {
+    auto* f = level_files[i];
+    if (f->being_compacted) {
+      // Constraint 1a
+      return;
+    }
+
+    tmp_start_level_inputs.files.push_back(f);
+    if (!compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
+                                                    &tmp_start_level_inputs) ||
+        compaction_picker_->FilesRangeOverlapWithCompaction(
+            {tmp_start_level_inputs}, output_level_,
+            Compaction::EvaluatePenultimateLevel(
+                vstorage_, ioptions_, start_level_, output_level_))) {
+      // Constraint 1a
+      tmp_start_level_inputs.clear();
+      return;
+    }
+
+    curr_bytes_to_compact = 0;
+    for (auto start_lvl_f : tmp_start_level_inputs.files) {
+      curr_bytes_to_compact += start_lvl_f->fd.GetFileSize();
+    }
+
+    // Check whether any output level files are locked
+    compaction_picker_->GetRange(tmp_start_level_inputs, &smallest, &largest);
+    vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest,
+                                    &output_level_inputs.files);
+    if (!output_level_inputs.empty() &&
+        !compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
+                                                    &output_level_inputs)) {
+      // Constraint 1a
+      tmp_start_level_inputs.clear();
+      return;
+    }
+
+    uint64_t start_lvl_curr_bytes_to_compact = curr_bytes_to_compact;
+    for (auto output_lvl_f : output_level_inputs.files) {
+      curr_bytes_to_compact += output_lvl_f->fd.GetFileSize();
+    }
+    if (curr_bytes_to_compact > mutable_cf_options_.max_compaction_bytes) {
+      // Constraint 2
+      tmp_start_level_inputs.clear();
+      return;
+    }
+
+    start_level_inputs_.files = tmp_start_level_inputs.files;
+    // Constraint 3
+    if (start_lvl_curr_bytes_to_compact > start_lvl_max_bytes_to_compact) {
+      return;
+    }
+  }
+}
+
+bool LevelCompactionBuilder::SetupOtherInputsIfNeeded() {
+  // Setup input files from output level. For output to L0, we only compact
+  // spans of files that do not interact with any pending compactions, so don't
+  // need to consider other levels.
+  if (output_level_ != 0) {
+    output_level_inputs_.level = output_level_;
+    bool round_robin_expanding =
+        ioptions_.compaction_pri == kRoundRobin &&
+        compaction_reason_ == CompactionReason::kLevelMaxLevelSize;
+    if (round_robin_expanding) {
+      SetupOtherFilesWithRoundRobinExpansion();
+    }
+    if (!is_l0_trivial_move_ &&
+        !compaction_picker_->SetupOtherInputs(
+            cf_name_, mutable_cf_options_, vstorage_, &start_level_inputs_,
+            &output_level_inputs_, &parent_index_, base_index_,
+            round_robin_expanding)) {
+      return false;
+    }
+
+    compaction_inputs_.push_back(start_level_inputs_);
+    if (!output_level_inputs_.empty()) {
+      compaction_inputs_.push_back(output_level_inputs_);
+    }
+
+    // In some edge cases we could pick a compaction that will be compacting
+    // a key range that overlap with another running compaction, and both
+    // of them have the same output level. This could happen if
+    // (1) we are running a non-exclusive manual compaction
+    // (2) AddFile ingest a new file into the LSM tree
+    // We need to disallow this from happening.
+    if (compaction_picker_->FilesRangeOverlapWithCompaction(
+            compaction_inputs_, output_level_,
+            Compaction::EvaluatePenultimateLevel(
+                vstorage_, ioptions_, start_level_, output_level_))) {
+      // This compaction output could potentially conflict with the output
+      // of a currently running compaction, we cannot run it.
+      return false;
+    }
+    if (!is_l0_trivial_move_) {
+      compaction_picker_->GetGrandparents(vstorage_, start_level_inputs_,
+                                          output_level_inputs_, &grandparents_);
+    }
+  } else {
+    compaction_inputs_.push_back(start_level_inputs_);
+  }
+  return true;
+}
+
+Compaction* LevelCompactionBuilder::PickCompaction() {
+  // Pick up the first file to start compaction. It may have been extended
+  // to a clean cut.
+  SetupInitialFiles();
+  if (start_level_inputs_.empty()) {
+    return nullptr;
+  }
+  assert(start_level_ >= 0 && output_level_ >= 0);
+
+  // If it is a L0 -> base level compaction, we need to set up other L0
+  // files if needed.
+  if (!SetupOtherL0FilesIfNeeded()) {
+    return nullptr;
+  }
+
+  // Pick files in the output level and expand more files in the start level
+  // if needed.
+  if (!SetupOtherInputsIfNeeded()) {
+    return nullptr;
+  }
+
+  // Form a compaction object containing the files we picked.
+  Compaction* c = GetCompaction();
+
+  TEST_SYNC_POINT_CALLBACK("LevelCompactionPicker::PickCompaction:Return", c);
+
+  return c;
+}
+
+Compaction* LevelCompactionBuilder::GetCompaction() {
+  // TryPickL0TrivialMove() does not apply to the case when compacting L0 to an
+  // empty output level. So L0 files is picked in PickFileToCompact() by
+  // compaction score. We may still be able to do trivial move when this file
+  // does not overlap with other L0s. This happens when
+  // compaction_inputs_[0].size() == 1 since SetupOtherL0FilesIfNeeded() did not
+  // pull in more L0s.
+  assert(!compaction_inputs_.empty());
+  bool l0_files_might_overlap =
+      start_level_ == 0 && !is_l0_trivial_move_ &&
+      (compaction_inputs_.size() > 1 || compaction_inputs_[0].size() > 1);
+  auto c = new Compaction(
+      vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_,
+      std::move(compaction_inputs_), output_level_,
+      MaxFileSizeForLevel(mutable_cf_options_, output_level_,
+                          ioptions_.compaction_style, vstorage_->base_level(),
+                          ioptions_.level_compaction_dynamic_level_bytes),
+      mutable_cf_options_.max_compaction_bytes,
+      GetPathId(ioptions_, mutable_cf_options_, output_level_),
+      GetCompressionType(vstorage_, mutable_cf_options_, output_level_,
+                         vstorage_->base_level()),
+      GetCompressionOptions(mutable_cf_options_, vstorage_, output_level_),
+      Temperature::kUnknown,
+      /* max_subcompactions */ 0, std::move(grandparents_), is_manual_,
+      /* trim_ts */ "", start_level_score_, false /* deletion_compaction */,
+      l0_files_might_overlap, compaction_reason_);
+
+  // If it's level 0 compaction, make sure we don't execute any other level 0
+  // compactions in parallel
+  compaction_picker_->RegisterCompaction(c);
+
+  // Creating a compaction influences the compaction score because the score
+  // takes running compactions into account (by skipping files that are already
+  // being compacted). Since we just changed compaction score, we recalculate it
+  // here
+  vstorage_->ComputeCompactionScore(ioptions_, mutable_cf_options_);
+  return c;
+}
+
+/*
+ * Find the optimal path to place a file
+ * Given a level, finds the path where levels up to it will fit in levels
+ * up to and including this path
+ */
+uint32_t LevelCompactionBuilder::GetPathId(
+    const ImmutableCFOptions& ioptions,
+    const MutableCFOptions& mutable_cf_options, int level) {
+  uint32_t p = 0;
+  assert(!ioptions.cf_paths.empty());
+
+  // size remaining in the most recent path
+  uint64_t current_path_size = ioptions.cf_paths[0].target_size;
+
+  uint64_t level_size;
+  int cur_level = 0;
+
+  // max_bytes_for_level_base denotes L1 size.
+  // We estimate L0 size to be the same as L1.
+  level_size = mutable_cf_options.max_bytes_for_level_base;
+
+  // Last path is the fallback
+  while (p < ioptions.cf_paths.size() - 1) {
+    if (level_size <= current_path_size) {
+      if (cur_level == level) {
+        // Does desired level fit in this path?
+        return p;
+      } else {
+        current_path_size -= level_size;
+        if (cur_level > 0) {
+          if (ioptions.level_compaction_dynamic_level_bytes) {
+            // Currently, level_compaction_dynamic_level_bytes is ignored when
+            // multiple db paths are specified. https://github.com/facebook/
+            // rocksdb/blob/main/db/column_family.cc.
+            // Still, adding this check to avoid accidentally using
+            // max_bytes_for_level_multiplier_additional
+            level_size = static_cast<uint64_t>(
+                level_size * mutable_cf_options.max_bytes_for_level_multiplier);
+          } else {
+            level_size = static_cast<uint64_t>(
+                level_size * mutable_cf_options.max_bytes_for_level_multiplier *
+                mutable_cf_options.MaxBytesMultiplerAdditional(cur_level));
+          }
+        }
+        cur_level++;
+        continue;
+      }
+    }
+    p++;
+    current_path_size = ioptions.cf_paths[p].target_size;
+  }
+  return p;
+}
+
+bool LevelCompactionBuilder::TryPickL0TrivialMove() {
+  if (vstorage_->base_level() <= 0) {
+    return false;
+  }
+  if (start_level_ == 0 && mutable_cf_options_.compression_per_level.empty() &&
+      !vstorage_->LevelFiles(output_level_).empty() &&
+      ioptions_.db_paths.size() <= 1) {
+    // Try to pick trivial move from L0 to L1. We start from the oldest
+    // file. We keep expanding to newer files if it would form a
+    // trivial move.
+    // For now we don't support it with
+    // mutable_cf_options_.compression_per_level to prevent the logic
+    // of determining whether L0 can be trivial moved to the next level.
+    // We skip the case where output level is empty, since in this case, at
+    // least the oldest file would qualify for trivial move, and this would
+    // be a surprising behavior with few benefits.
+
+    // We search from the oldest file from the newest. In theory, there are
+    // files in the middle can form trivial move too, but it is probably
+    // uncommon and we ignore these cases for simplicity.
+    const std::vector<FileMetaData*>& level_files =
+        vstorage_->LevelFiles(start_level_);
+
+    InternalKey my_smallest, my_largest;
+    for (auto it = level_files.rbegin(); it != level_files.rend(); ++it) {
+      CompactionInputFiles output_level_inputs;
+      output_level_inputs.level = output_level_;
+      FileMetaData* file = *it;
+      if (it == level_files.rbegin()) {
+        my_smallest = file->smallest;
+        my_largest = file->largest;
+      } else {
+        if (compaction_picker_->icmp()->Compare(file->largest, my_smallest) <
+            0) {
+          my_smallest = file->smallest;
+        } else if (compaction_picker_->icmp()->Compare(file->smallest,
+                                                       my_largest) > 0) {
+          my_largest = file->largest;
+        } else {
+          break;
+        }
+      }
+      vstorage_->GetOverlappingInputs(output_level_, &my_smallest, &my_largest,
+                                      &output_level_inputs.files);
+      if (output_level_inputs.empty()) {
+        assert(!file->being_compacted);
+        start_level_inputs_.files.push_back(file);
+      } else {
+        break;
+      }
+    }
+  }
+
+  if (!start_level_inputs_.empty()) {
+    // Sort files by key range. Not sure it's 100% necessary but it's cleaner
+    // to always keep files sorted by key the key ranges don't overlap.
+    std::sort(start_level_inputs_.files.begin(),
+              start_level_inputs_.files.end(),
+              [icmp = compaction_picker_->icmp()](FileMetaData* f1,
+                                                  FileMetaData* f2) -> bool {
+                return (icmp->Compare(f1->smallest, f2->smallest) < 0);
+              });
+
+    is_l0_trivial_move_ = true;
+    return true;
+  }
+  return false;
+}
+
+bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index,
+                                                       bool only_expand_right) {
+  if (start_level_inputs_.size() == 1 &&
+      (ioptions_.db_paths.empty() || ioptions_.db_paths.size() == 1) &&
+      (mutable_cf_options_.compression_per_level.empty())) {
+    // Only file of `index`, and it is likely a trivial move. Try to
+    // expand if it is still a trivial move, but not beyond
+    // max_compaction_bytes or 4 files, so that we don't create too
+    // much compaction pressure for the next level.
+    // Ignore if there are more than one DB path, as it would be hard
+    // to predict whether it is a trivial move.
+    const std::vector<FileMetaData*>& level_files =
+        vstorage_->LevelFiles(start_level_);
+    const size_t kMaxMultiTrivialMove = 4;
+    FileMetaData* initial_file = start_level_inputs_.files[0];
+    size_t total_size = initial_file->fd.GetFileSize();
+    CompactionInputFiles output_level_inputs;
+    output_level_inputs.level = output_level_;
+    // Expand towards right
+    for (int i = start_index + 1;
+         i < static_cast<int>(level_files.size()) &&
+         start_level_inputs_.size() < kMaxMultiTrivialMove;
+         i++) {
+      FileMetaData* next_file = level_files[i];
+      if (next_file->being_compacted) {
+        break;
+      }
+      vstorage_->GetOverlappingInputs(output_level_, &(initial_file->smallest),
+                                      &(next_file->largest),
+                                      &output_level_inputs.files);
+      if (!output_level_inputs.empty()) {
+        break;
+      }
+      if (i < static_cast<int>(level_files.size()) - 1 &&
+          compaction_picker_->icmp()
+                  ->user_comparator()
+                  ->CompareWithoutTimestamp(
+                      next_file->largest.user_key(),
+                      level_files[i + 1]->smallest.user_key()) == 0) {
+        TEST_SYNC_POINT_CALLBACK(
+            "LevelCompactionBuilder::TryExtendNonL0TrivialMove:NoCleanCut",
+            nullptr);
+        // Not a clean up after adding the next file. Skip.
+        break;
+      }
+      total_size += next_file->fd.GetFileSize();
+      if (total_size > mutable_cf_options_.max_compaction_bytes) {
+        break;
+      }
+      start_level_inputs_.files.push_back(next_file);
+    }
+    // Expand towards left
+    if (!only_expand_right) {
+      for (int i = start_index - 1;
+           i >= 0 && start_level_inputs_.size() < kMaxMultiTrivialMove; i--) {
+        FileMetaData* next_file = level_files[i];
+        if (next_file->being_compacted) {
+          break;
+        }
+        vstorage_->GetOverlappingInputs(output_level_, &(next_file->smallest),
+                                        &(initial_file->largest),
+                                        &output_level_inputs.files);
+        if (!output_level_inputs.empty()) {
+          break;
+        }
+        if (i > 0 && compaction_picker_->icmp()
+                             ->user_comparator()
+                             ->CompareWithoutTimestamp(
+                                 next_file->smallest.user_key(),
+                                 level_files[i - 1]->largest.user_key()) == 0) {
+          // Not a clean up after adding the next file. Skip.
+          break;
+        }
+        total_size += next_file->fd.GetFileSize();
+        if (total_size > mutable_cf_options_.max_compaction_bytes) {
+          break;
+        }
+        // keep `files` sorted in increasing order by key range
+        start_level_inputs_.files.insert(start_level_inputs_.files.begin(),
+                                         next_file);
+      }
+    }
+    return start_level_inputs_.size() > 1;
+  }
+  return false;
+}
+
+bool LevelCompactionBuilder::PickFileToCompact() {
+  // level 0 files are overlapping. So we cannot pick more
+  // than one concurrent compactions at this level. This
+  // could be made better by looking at key-ranges that are
+  // being compacted at level 0.
+  if (start_level_ == 0 &&
+      !compaction_picker_->level0_compactions_in_progress()->empty()) {
+    TEST_SYNC_POINT("LevelCompactionPicker::PickCompactionBySize:0");
+    return false;
+  }
+
+  start_level_inputs_.clear();
+  start_level_inputs_.level = start_level_;
+
+  assert(start_level_ >= 0);
+
+  if (TryPickL0TrivialMove()) {
+    return true;
+  }
+
+  const std::vector<FileMetaData*>& level_files =
+      vstorage_->LevelFiles(start_level_);
+
+  // Pick the file with the highest score in this level that is not already
+  // being compacted.
+  const std::vector<int>& file_scores =
+      vstorage_->FilesByCompactionPri(start_level_);
+
+  unsigned int cmp_idx;
+  for (cmp_idx = vstorage_->NextCompactionIndex(start_level_);
+       cmp_idx < file_scores.size(); cmp_idx++) {
+    int index = file_scores[cmp_idx];
+    auto* f = level_files[index];
+
+    // do not pick a file to compact if it is being compacted
+    // from n-1 level.
+    if (f->being_compacted) {
+      if (ioptions_.compaction_pri == kRoundRobin) {
+        // TODO(zichen): this file may be involved in one compaction from
+        // an upper level, cannot advance the cursor for round-robin policy.
+        // Currently, we do not pick any file to compact in this case. We
+        // should fix this later to ensure a compaction is picked but the
+        // cursor shall not be advanced.
+        return false;
+      }
+      continue;
+    }
+
+    start_level_inputs_.files.push_back(f);
+    if (!compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
+                                                    &start_level_inputs_) ||
+        compaction_picker_->FilesRangeOverlapWithCompaction(
+            {start_level_inputs_}, output_level_,
+            Compaction::EvaluatePenultimateLevel(
+                vstorage_, ioptions_, start_level_, output_level_))) {
+      // A locked (pending compaction) input-level file was pulled in due to
+      // user-key overlap.
+      start_level_inputs_.clear();
+
+      if (ioptions_.compaction_pri == kRoundRobin) {
+        return false;
+      }
+      continue;
+    }
+
+    // Now that input level is fully expanded, we check whether any output
+    // files are locked due to pending compaction.
+    //
+    // Note we rely on ExpandInputsToCleanCut() to tell us whether any output-
+    // level files are locked, not just the extra ones pulled in for user-key
+    // overlap.
+    InternalKey smallest, largest;
+    compaction_picker_->GetRange(start_level_inputs_, &smallest, &largest);
+    CompactionInputFiles output_level_inputs;
+    output_level_inputs.level = output_level_;
+    vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest,
+                                    &output_level_inputs.files);
+    if (output_level_inputs.empty()) {
+      if (start_level_ > 0 &&
+          TryExtendNonL0TrivialMove(index,
+                                    ioptions_.compaction_pri ==
+                                        kRoundRobin /* only_expand_right */)) {
+        break;
+      }
+    } else {
+      if (!compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
+                                                      &output_level_inputs)) {
+        start_level_inputs_.clear();
+        if (ioptions_.compaction_pri == kRoundRobin) {
+          return false;
+        }
+        continue;
+      }
+    }
+
+    base_index_ = index;
+    break;
+  }
+
+  // store where to start the iteration in the next call to PickCompaction
+  if (ioptions_.compaction_pri != kRoundRobin) {
+    vstorage_->SetNextCompactionIndex(start_level_, cmp_idx);
+  }
+  return start_level_inputs_.size() > 0;
+}
+
+bool LevelCompactionBuilder::PickIntraL0Compaction() {
+  start_level_inputs_.clear();
+  const std::vector<FileMetaData*>& level_files =
+      vstorage_->LevelFiles(0 /* level */);
+  if (level_files.size() <
+          static_cast<size_t>(
+              mutable_cf_options_.level0_file_num_compaction_trigger + 2) ||
+      level_files[0]->being_compacted) {
+    // If L0 isn't accumulating much files beyond the regular trigger, don't
+    // resort to L0->L0 compaction yet.
+    return false;
+  }
+  return FindIntraL0Compaction(level_files, kMinFilesForIntraL0Compaction,
+                               std::numeric_limits<uint64_t>::max(),
+                               mutable_cf_options_.max_compaction_bytes,
+                               &start_level_inputs_);
+}
+}  // namespace
+
+Compaction* LevelCompactionPicker::PickCompaction(
+    const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
+    const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
+    LogBuffer* log_buffer) {
+  LevelCompactionBuilder builder(cf_name, vstorage, this, log_buffer,
+                                 mutable_cf_options, ioptions_,
+                                 mutable_db_options);
+  return builder.PickCompaction();
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_level.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_level.h
new file mode 100644
index 0000000000..6eb0f586f4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_level.h
@@ -0,0 +1,33 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "db/compaction/compaction_picker.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Picking compactions for leveled compaction. See wiki page
+// https://github.com/facebook/rocksdb/wiki/Leveled-Compaction
+// for description of Leveled compaction.
+class LevelCompactionPicker : public CompactionPicker {
+ public:
+  LevelCompactionPicker(const ImmutableOptions& ioptions,
+                        const InternalKeyComparator* icmp)
+      : CompactionPicker(ioptions, icmp) {}
+  virtual Compaction* PickCompaction(const std::string& cf_name,
+                                     const MutableCFOptions& mutable_cf_options,
+                                     const MutableDBOptions& mutable_db_options,
+                                     VersionStorageInfo* vstorage,
+                                     LogBuffer* log_buffer) override;
+
+  virtual bool NeedsCompaction(
+      const VersionStorageInfo* vstorage) const override;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_universal.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_universal.cc
new file mode 100644
index 0000000000..8ad43efcce
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_universal.cc
@@ -0,0 +1,1452 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/compaction/compaction_picker_universal.h"
+
+#include <cinttypes>
+#include <limits>
+#include <queue>
+#include <string>
+#include <utility>
+
+#include "db/column_family.h"
+#include "file/filename.h"
+#include "logging/log_buffer.h"
+#include "logging/logging.h"
+#include "monitoring/statistics_impl.h"
+#include "test_util/sync_point.h"
+#include "util/random.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+// A helper class that form universal compactions. The class is used by
+// UniversalCompactionPicker::PickCompaction().
+// The usage is to create the class, and get the compaction object by calling
+// PickCompaction().
+class UniversalCompactionBuilder {
+ public:
+  UniversalCompactionBuilder(
+      const ImmutableOptions& ioptions, const InternalKeyComparator* icmp,
+      const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
+      const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
+      UniversalCompactionPicker* picker, LogBuffer* log_buffer)
+      : ioptions_(ioptions),
+        icmp_(icmp),
+        cf_name_(cf_name),
+        mutable_cf_options_(mutable_cf_options),
+        mutable_db_options_(mutable_db_options),
+        vstorage_(vstorage),
+        picker_(picker),
+        log_buffer_(log_buffer) {}
+
+  // Form and return the compaction object. The caller owns return object.
+  Compaction* PickCompaction();
+
+ private:
+  struct SortedRun {
+    SortedRun(int _level, FileMetaData* _file, uint64_t _size,
+              uint64_t _compensated_file_size, bool _being_compacted)
+        : level(_level),
+          file(_file),
+          size(_size),
+          compensated_file_size(_compensated_file_size),
+          being_compacted(_being_compacted) {
+      assert(compensated_file_size > 0);
+      assert(level != 0 || file != nullptr);
+    }
+
+    void Dump(char* out_buf, size_t out_buf_size,
+              bool print_path = false) const;
+
+    // sorted_run_count is added into the string to print
+    void DumpSizeInfo(char* out_buf, size_t out_buf_size,
+                      size_t sorted_run_count) const;
+
+    int level;
+    // `file` Will be null for level > 0. For level = 0, the sorted run is
+    // for this file.
+    FileMetaData* file;
+    // For level > 0, `size` and `compensated_file_size` are sum of sizes all
+    // files in the level. `being_compacted` should be the same for all files
+    // in a non-zero level. Use the value here.
+    uint64_t size;
+    uint64_t compensated_file_size;
+    bool being_compacted;
+  };
+
+  // Pick Universal compaction to limit read amplification
+  Compaction* PickCompactionToReduceSortedRuns(
+      unsigned int ratio, unsigned int max_number_of_files_to_compact);
+
+  // Pick Universal compaction to limit space amplification.
+  Compaction* PickCompactionToReduceSizeAmp();
+
+  // Try to pick incremental compaction to reduce space amplification.
+  // It will return null if it cannot find a fanout within the threshold.
+  // Fanout is defined as
+  //    total size of files to compact at output level
+  //  --------------------------------------------------
+  //    total size of files to compact at other levels
+  Compaction* PickIncrementalForReduceSizeAmp(double fanout_threshold);
+
+  Compaction* PickDeleteTriggeredCompaction();
+
+  // Form a compaction from the sorted run indicated by start_index to the
+  // oldest sorted run.
+  // The caller is responsible for making sure that those files are not in
+  // compaction.
+  Compaction* PickCompactionToOldest(size_t start_index,
+                                     CompactionReason compaction_reason);
+
+  Compaction* PickCompactionWithSortedRunRange(
+      size_t start_index, size_t end_index, CompactionReason compaction_reason);
+
+  // Try to pick periodic compaction. The caller should only call it
+  // if there is at least one file marked for periodic compaction.
+  // null will be returned if no such a compaction can be formed
+  // because some files are being compacted.
+  Compaction* PickPeriodicCompaction();
+
+  // Used in universal compaction when the allow_trivial_move
+  // option is set. Checks whether there are any overlapping files
+  // in the input. Returns true if the input files are non
+  // overlapping.
+  bool IsInputFilesNonOverlapping(Compaction* c);
+
+  uint64_t GetMaxOverlappingBytes() const;
+
+  const ImmutableOptions& ioptions_;
+  const InternalKeyComparator* icmp_;
+  double score_;
+  std::vector<SortedRun> sorted_runs_;
+  const std::string& cf_name_;
+  const MutableCFOptions& mutable_cf_options_;
+  const MutableDBOptions& mutable_db_options_;
+  VersionStorageInfo* vstorage_;
+  UniversalCompactionPicker* picker_;
+  LogBuffer* log_buffer_;
+
+  static std::vector<SortedRun> CalculateSortedRuns(
+      const VersionStorageInfo& vstorage);
+
+  // Pick a path ID to place a newly generated file, with its estimated file
+  // size.
+  static uint32_t GetPathId(const ImmutableCFOptions& ioptions,
+                            const MutableCFOptions& mutable_cf_options,
+                            uint64_t file_size);
+};
+
+// Used in universal compaction when trivial move is enabled.
+// This structure is used for the construction of min heap
+// that contains the file meta data, the level of the file
+// and the index of the file in that level
+
+struct InputFileInfo {
+  InputFileInfo() : f(nullptr), level(0), index(0) {}
+
+  FileMetaData* f;
+  size_t level;
+  size_t index;
+};
+
+// Used in universal compaction when trivial move is enabled.
+// This comparator is used for the construction of min heap
+// based on the smallest key of the file.
+struct SmallestKeyHeapComparator {
+  explicit SmallestKeyHeapComparator(const Comparator* ucmp) { ucmp_ = ucmp; }
+
+  bool operator()(InputFileInfo i1, InputFileInfo i2) const {
+    return (ucmp_->CompareWithoutTimestamp(i1.f->smallest.user_key(),
+                                           i2.f->smallest.user_key()) > 0);
+  }
+
+ private:
+  const Comparator* ucmp_;
+};
+
+using SmallestKeyHeap =
+    std::priority_queue<InputFileInfo, std::vector<InputFileInfo>,
+                        SmallestKeyHeapComparator>;
+
+// This function creates the heap that is used to find if the files are
+// overlapping during universal compaction when the allow_trivial_move
+// is set.
+SmallestKeyHeap create_level_heap(Compaction* c, const Comparator* ucmp) {
+  SmallestKeyHeap smallest_key_priority_q =
+      SmallestKeyHeap(SmallestKeyHeapComparator(ucmp));
+
+  InputFileInfo input_file;
+
+  for (size_t l = 0; l < c->num_input_levels(); l++) {
+    if (c->num_input_files(l) != 0) {
+      if (l == 0 && c->start_level() == 0) {
+        for (size_t i = 0; i < c->num_input_files(0); i++) {
+          input_file.f = c->input(0, i);
+          input_file.level = 0;
+          input_file.index = i;
+          smallest_key_priority_q.push(std::move(input_file));
+        }
+      } else {
+        input_file.f = c->input(l, 0);
+        input_file.level = l;
+        input_file.index = 0;
+        smallest_key_priority_q.push(std::move(input_file));
+      }
+    }
+  }
+  return smallest_key_priority_q;
+}
+
+#ifndef NDEBUG
+// smallest_seqno and largest_seqno are set iff. `files` is not empty.
+void GetSmallestLargestSeqno(const std::vector<FileMetaData*>& files,
+                             SequenceNumber* smallest_seqno,
+                             SequenceNumber* largest_seqno) {
+  bool is_first = true;
+  for (FileMetaData* f : files) {
+    assert(f->fd.smallest_seqno <= f->fd.largest_seqno);
+    if (is_first) {
+      is_first = false;
+      *smallest_seqno = f->fd.smallest_seqno;
+      *largest_seqno = f->fd.largest_seqno;
+    } else {
+      if (f->fd.smallest_seqno < *smallest_seqno) {
+        *smallest_seqno = f->fd.smallest_seqno;
+      }
+      if (f->fd.largest_seqno > *largest_seqno) {
+        *largest_seqno = f->fd.largest_seqno;
+      }
+    }
+  }
+}
+#endif
+}  // namespace
+
+// Algorithm that checks to see if there are any overlapping
+// files in the input
+bool UniversalCompactionBuilder::IsInputFilesNonOverlapping(Compaction* c) {
+  auto comparator = icmp_->user_comparator();
+  int first_iter = 1;
+
+  InputFileInfo prev, curr, next;
+
+  SmallestKeyHeap smallest_key_priority_q =
+      create_level_heap(c, icmp_->user_comparator());
+
+  while (!smallest_key_priority_q.empty()) {
+    curr = smallest_key_priority_q.top();
+    smallest_key_priority_q.pop();
+
+    if (first_iter) {
+      prev = curr;
+      first_iter = 0;
+    } else {
+      if (comparator->CompareWithoutTimestamp(
+              prev.f->largest.user_key(), curr.f->smallest.user_key()) >= 0) {
+        // found overlapping files, return false
+        return false;
+      }
+      assert(comparator->CompareWithoutTimestamp(
+                 curr.f->largest.user_key(), prev.f->largest.user_key()) > 0);
+      prev = curr;
+    }
+
+    next.f = nullptr;
+
+    if (c->level(curr.level) != 0 &&
+        curr.index < c->num_input_files(curr.level) - 1) {
+      next.f = c->input(curr.level, curr.index + 1);
+      next.level = curr.level;
+      next.index = curr.index + 1;
+    }
+
+    if (next.f) {
+      smallest_key_priority_q.push(std::move(next));
+    }
+  }
+  return true;
+}
+
+bool UniversalCompactionPicker::NeedsCompaction(
+    const VersionStorageInfo* vstorage) const {
+  const int kLevel0 = 0;
+  if (vstorage->CompactionScore(kLevel0) >= 1) {
+    return true;
+  }
+  if (!vstorage->FilesMarkedForPeriodicCompaction().empty()) {
+    return true;
+  }
+  if (!vstorage->FilesMarkedForCompaction().empty()) {
+    return true;
+  }
+  return false;
+}
+
+Compaction* UniversalCompactionPicker::PickCompaction(
+    const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
+    const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
+    LogBuffer* log_buffer) {
+  UniversalCompactionBuilder builder(ioptions_, icmp_, cf_name,
+                                     mutable_cf_options, mutable_db_options,
+                                     vstorage, this, log_buffer);
+  return builder.PickCompaction();
+}
+
+void UniversalCompactionBuilder::SortedRun::Dump(char* out_buf,
+                                                 size_t out_buf_size,
+                                                 bool print_path) const {
+  if (level == 0) {
+    assert(file != nullptr);
+    if (file->fd.GetPathId() == 0 || !print_path) {
+      snprintf(out_buf, out_buf_size, "file %" PRIu64, file->fd.GetNumber());
+    } else {
+      snprintf(out_buf, out_buf_size,
+               "file %" PRIu64
+               "(path "
+               "%" PRIu32 ")",
+               file->fd.GetNumber(), file->fd.GetPathId());
+    }
+  } else {
+    snprintf(out_buf, out_buf_size, "level %d", level);
+  }
+}
+
+void UniversalCompactionBuilder::SortedRun::DumpSizeInfo(
+    char* out_buf, size_t out_buf_size, size_t sorted_run_count) const {
+  if (level == 0) {
+    assert(file != nullptr);
+    snprintf(out_buf, out_buf_size,
+             "file %" PRIu64 "[%" ROCKSDB_PRIszt
+             "] "
+             "with size %" PRIu64 " (compensated size %" PRIu64 ")",
+             file->fd.GetNumber(), sorted_run_count, file->fd.GetFileSize(),
+             file->compensated_file_size);
+  } else {
+    snprintf(out_buf, out_buf_size,
+             "level %d[%" ROCKSDB_PRIszt
+             "] "
+             "with size %" PRIu64 " (compensated size %" PRIu64 ")",
+             level, sorted_run_count, size, compensated_file_size);
+  }
+}
+
+std::vector<UniversalCompactionBuilder::SortedRun>
+UniversalCompactionBuilder::CalculateSortedRuns(
+    const VersionStorageInfo& vstorage) {
+  std::vector<UniversalCompactionBuilder::SortedRun> ret;
+  for (FileMetaData* f : vstorage.LevelFiles(0)) {
+    ret.emplace_back(0, f, f->fd.GetFileSize(), f->compensated_file_size,
+                     f->being_compacted);
+  }
+  for (int level = 1; level < vstorage.num_levels(); level++) {
+    uint64_t total_compensated_size = 0U;
+    uint64_t total_size = 0U;
+    bool being_compacted = false;
+    for (FileMetaData* f : vstorage.LevelFiles(level)) {
+      total_compensated_size += f->compensated_file_size;
+      total_size += f->fd.GetFileSize();
+      // Size amp, read amp and periodic compactions always include all files
+      // for a non-zero level. However, a delete triggered compaction and
+      // a trivial move might pick a subset of files in a sorted run. So
+      // always check all files in a sorted run and mark the entire run as
+      // being compacted if one or more files are being compacted
+      if (f->being_compacted) {
+        being_compacted = f->being_compacted;
+      }
+    }
+    if (total_compensated_size > 0) {
+      ret.emplace_back(level, nullptr, total_size, total_compensated_size,
+                       being_compacted);
+    }
+  }
+  return ret;
+}
+
+// Universal style of compaction. Pick files that are contiguous in
+// time-range to compact.
+Compaction* UniversalCompactionBuilder::PickCompaction() {
+  const int kLevel0 = 0;
+  score_ = vstorage_->CompactionScore(kLevel0);
+  sorted_runs_ = CalculateSortedRuns(*vstorage_);
+
+  if (sorted_runs_.size() == 0 ||
+      (vstorage_->FilesMarkedForPeriodicCompaction().empty() &&
+       vstorage_->FilesMarkedForCompaction().empty() &&
+       sorted_runs_.size() < (unsigned int)mutable_cf_options_
+                                 .level0_file_num_compaction_trigger)) {
+    ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: nothing to do\n",
+                     cf_name_.c_str());
+    TEST_SYNC_POINT_CALLBACK(
+        "UniversalCompactionBuilder::PickCompaction:Return", nullptr);
+    return nullptr;
+  }
+  VersionStorageInfo::LevelSummaryStorage tmp;
+  ROCKS_LOG_BUFFER_MAX_SZ(
+      log_buffer_, 3072,
+      "[%s] Universal: sorted runs: %" ROCKSDB_PRIszt " files: %s\n",
+      cf_name_.c_str(), sorted_runs_.size(), vstorage_->LevelSummary(&tmp));
+
+  Compaction* c = nullptr;
+  // Periodic compaction has higher priority than other type of compaction
+  // because it's a hard requirement.
+  if (!vstorage_->FilesMarkedForPeriodicCompaction().empty()) {
+    // Always need to do a full compaction for periodic compaction.
+    c = PickPeriodicCompaction();
+    TEST_SYNC_POINT_CALLBACK("PostPickPeriodicCompaction", c);
+  }
+
+  // Check for size amplification.
+  if (c == nullptr &&
+      sorted_runs_.size() >=
+          static_cast<size_t>(
+              mutable_cf_options_.level0_file_num_compaction_trigger)) {
+    if ((c = PickCompactionToReduceSizeAmp()) != nullptr) {
+      TEST_SYNC_POINT("PickCompactionToReduceSizeAmpReturnNonnullptr");
+      ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: compacting for size amp\n",
+                       cf_name_.c_str());
+    } else {
+      // Size amplification is within limits. Try reducing read
+      // amplification while maintaining file size ratios.
+      unsigned int ratio =
+          mutable_cf_options_.compaction_options_universal.size_ratio;
+
+      if ((c = PickCompactionToReduceSortedRuns(ratio, UINT_MAX)) != nullptr) {
+        TEST_SYNC_POINT("PickCompactionToReduceSortedRunsReturnNonnullptr");
+        ROCKS_LOG_BUFFER(log_buffer_,
+                         "[%s] Universal: compacting for size ratio\n",
+                         cf_name_.c_str());
+      } else {
+        // Size amplification and file size ratios are within configured limits.
+        // If max read amplification is exceeding configured limits, then force
+        // compaction without looking at filesize ratios and try to reduce
+        // the number of files to fewer than level0_file_num_compaction_trigger.
+        // This is guaranteed by NeedsCompaction()
+        assert(sorted_runs_.size() >=
+               static_cast<size_t>(
+                   mutable_cf_options_.level0_file_num_compaction_trigger));
+        // Get the total number of sorted runs that are not being compacted
+        int num_sr_not_compacted = 0;
+        for (size_t i = 0; i < sorted_runs_.size(); i++) {
+          if (sorted_runs_[i].being_compacted == false) {
+            num_sr_not_compacted++;
+          }
+        }
+
+        // The number of sorted runs that are not being compacted is greater
+        // than the maximum allowed number of sorted runs
+        if (num_sr_not_compacted >
+            mutable_cf_options_.level0_file_num_compaction_trigger) {
+          unsigned int num_files =
+              num_sr_not_compacted -
+              mutable_cf_options_.level0_file_num_compaction_trigger + 1;
+          if ((c = PickCompactionToReduceSortedRuns(UINT_MAX, num_files)) !=
+              nullptr) {
+            ROCKS_LOG_BUFFER(log_buffer_,
+                             "[%s] Universal: compacting for file num -- %u\n",
+                             cf_name_.c_str(), num_files);
+          }
+        }
+      }
+    }
+  }
+
+  if (c == nullptr) {
+    if ((c = PickDeleteTriggeredCompaction()) != nullptr) {
+      TEST_SYNC_POINT("PickDeleteTriggeredCompactionReturnNonnullptr");
+      ROCKS_LOG_BUFFER(log_buffer_,
+                       "[%s] Universal: delete triggered compaction\n",
+                       cf_name_.c_str());
+    }
+  }
+
+  if (c == nullptr) {
+    TEST_SYNC_POINT_CALLBACK(
+        "UniversalCompactionBuilder::PickCompaction:Return", nullptr);
+    return nullptr;
+  }
+
+  if (mutable_cf_options_.compaction_options_universal.allow_trivial_move ==
+          true &&
+      c->compaction_reason() != CompactionReason::kPeriodicCompaction) {
+    c->set_is_trivial_move(IsInputFilesNonOverlapping(c));
+  }
+
+// validate that all the chosen files of L0 are non overlapping in time
+#ifndef NDEBUG
+  bool is_first = true;
+
+  size_t level_index = 0U;
+  if (c->start_level() == 0) {
+    for (auto f : *c->inputs(0)) {
+      assert(f->fd.smallest_seqno <= f->fd.largest_seqno);
+      if (is_first) {
+        is_first = false;
+      }
+    }
+    level_index = 1U;
+  }
+  for (; level_index < c->num_input_levels(); level_index++) {
+    if (c->num_input_files(level_index) != 0) {
+      SequenceNumber smallest_seqno = 0U;
+      SequenceNumber largest_seqno = 0U;
+      GetSmallestLargestSeqno(*(c->inputs(level_index)), &smallest_seqno,
+                              &largest_seqno);
+      if (is_first) {
+        is_first = false;
+      }
+    }
+  }
+#endif
+  // update statistics
+  size_t num_files = 0;
+  for (auto& each_level : *c->inputs()) {
+    num_files += each_level.files.size();
+  }
+  RecordInHistogram(ioptions_.stats, NUM_FILES_IN_SINGLE_COMPACTION, num_files);
+
+  picker_->RegisterCompaction(c);
+  vstorage_->ComputeCompactionScore(ioptions_, mutable_cf_options_);
+
+  TEST_SYNC_POINT_CALLBACK("UniversalCompactionBuilder::PickCompaction:Return",
+                           c);
+  return c;
+}
+
+uint32_t UniversalCompactionBuilder::GetPathId(
+    const ImmutableCFOptions& ioptions,
+    const MutableCFOptions& mutable_cf_options, uint64_t file_size) {
+  // Two conditions need to be satisfied:
+  // (1) the target path needs to be able to hold the file's size
+  // (2) Total size left in this and previous paths need to be not
+  //     smaller than expected future file size before this new file is
+  //     compacted, which is estimated based on size_ratio.
+  // For example, if now we are compacting files of size (1, 1, 2, 4, 8),
+  // we will make sure the target file, probably with size of 16, will be
+  // placed in a path so that eventually when new files are generated and
+  // compacted to (1, 1, 2, 4, 8, 16), all those files can be stored in or
+  // before the path we chose.
+  //
+  // TODO(sdong): now the case of multiple column families is not
+  // considered in this algorithm. So the target size can be violated in
+  // that case. We need to improve it.
+  uint64_t accumulated_size = 0;
+  uint64_t future_size =
+      file_size *
+      (100 - mutable_cf_options.compaction_options_universal.size_ratio) / 100;
+  uint32_t p = 0;
+  assert(!ioptions.cf_paths.empty());
+  for (; p < ioptions.cf_paths.size() - 1; p++) {
+    uint64_t target_size = ioptions.cf_paths[p].target_size;
+    if (target_size > file_size &&
+        accumulated_size + (target_size - file_size) > future_size) {
+      return p;
+    }
+    accumulated_size += target_size;
+  }
+  return p;
+}
+
+//
+// Consider compaction files based on their size differences with
+// the next file in time order.
+//
+Compaction* UniversalCompactionBuilder::PickCompactionToReduceSortedRuns(
+    unsigned int ratio, unsigned int max_number_of_files_to_compact) {
+  unsigned int min_merge_width =
+      mutable_cf_options_.compaction_options_universal.min_merge_width;
+  unsigned int max_merge_width =
+      mutable_cf_options_.compaction_options_universal.max_merge_width;
+
+  const SortedRun* sr = nullptr;
+  bool done = false;
+  size_t start_index = 0;
+  unsigned int candidate_count = 0;
+
+  unsigned int max_files_to_compact =
+      std::min(max_merge_width, max_number_of_files_to_compact);
+  min_merge_width = std::max(min_merge_width, 2U);
+
+  // Caller checks the size before executing this function. This invariant is
+  // important because otherwise we may have a possible integer underflow when
+  // dealing with unsigned types.
+  assert(sorted_runs_.size() > 0);
+
+  // Considers a candidate file only if it is smaller than the
+  // total size accumulated so far.
+  for (size_t loop = 0; loop < sorted_runs_.size(); loop++) {
+    candidate_count = 0;
+
+    // Skip files that are already being compacted
+    for (sr = nullptr; loop < sorted_runs_.size(); loop++) {
+      sr = &sorted_runs_[loop];
+
+      if (!sr->being_compacted) {
+        candidate_count = 1;
+        break;
+      }
+      char file_num_buf[kFormatFileNumberBufSize];
+      sr->Dump(file_num_buf, sizeof(file_num_buf));
+      ROCKS_LOG_BUFFER(log_buffer_,
+                       "[%s] Universal: %s"
+                       "[%d] being compacted, skipping",
+                       cf_name_.c_str(), file_num_buf, loop);
+
+      sr = nullptr;
+    }
+
+    // This file is not being compacted. Consider it as the
+    // first candidate to be compacted.
+    uint64_t candidate_size = sr != nullptr ? sr->compensated_file_size : 0;
+    if (sr != nullptr) {
+      char file_num_buf[kFormatFileNumberBufSize];
+      sr->Dump(file_num_buf, sizeof(file_num_buf), true);
+      ROCKS_LOG_BUFFER(log_buffer_,
+                       "[%s] Universal: Possible candidate %s[%d].",
+                       cf_name_.c_str(), file_num_buf, loop);
+    }
+
+    // Check if the succeeding files need compaction.
+    for (size_t i = loop + 1;
+         candidate_count < max_files_to_compact && i < sorted_runs_.size();
+         i++) {
+      const SortedRun* succeeding_sr = &sorted_runs_[i];
+      if (succeeding_sr->being_compacted) {
+        break;
+      }
+      // Pick files if the total/last candidate file size (increased by the
+      // specified ratio) is still larger than the next candidate file.
+      // candidate_size is the total size of files picked so far with the
+      // default kCompactionStopStyleTotalSize; with
+      // kCompactionStopStyleSimilarSize, it's simply the size of the last
+      // picked file.
+      double sz = candidate_size * (100.0 + ratio) / 100.0;
+      if (sz < static_cast<double>(succeeding_sr->size)) {
+        break;
+      }
+      if (mutable_cf_options_.compaction_options_universal.stop_style ==
+          kCompactionStopStyleSimilarSize) {
+        // Similar-size stopping rule: also check the last picked file isn't
+        // far larger than the next candidate file.
+        sz = (succeeding_sr->size * (100.0 + ratio)) / 100.0;
+        if (sz < static_cast<double>(candidate_size)) {
+          // If the small file we've encountered begins a run of similar-size
+          // files, we'll pick them up on a future iteration of the outer
+          // loop. If it's some lonely straggler, it'll eventually get picked
+          // by the last-resort read amp strategy which disregards size ratios.
+          break;
+        }
+        candidate_size = succeeding_sr->compensated_file_size;
+      } else {  // default kCompactionStopStyleTotalSize
+        candidate_size += succeeding_sr->compensated_file_size;
+      }
+      candidate_count++;
+    }
+
+    // Found a series of consecutive files that need compaction.
+    if (candidate_count >= (unsigned int)min_merge_width) {
+      start_index = loop;
+      done = true;
+      break;
+    } else {
+      for (size_t i = loop;
+           i < loop + candidate_count && i < sorted_runs_.size(); i++) {
+        const SortedRun* skipping_sr = &sorted_runs_[i];
+        char file_num_buf[256];
+        skipping_sr->DumpSizeInfo(file_num_buf, sizeof(file_num_buf), loop);
+        ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: Skipping %s",
+                         cf_name_.c_str(), file_num_buf);
+      }
+    }
+  }
+  if (!done || candidate_count <= 1) {
+    return nullptr;
+  }
+  size_t first_index_after = start_index + candidate_count;
+  // Compression is enabled if files compacted earlier already reached
+  // size ratio of compression.
+  bool enable_compression = true;
+  int ratio_to_compress =
+      mutable_cf_options_.compaction_options_universal.compression_size_percent;
+  if (ratio_to_compress >= 0) {
+    uint64_t total_size = 0;
+    for (auto& sorted_run : sorted_runs_) {
+      total_size += sorted_run.compensated_file_size;
+    }
+
+    uint64_t older_file_size = 0;
+    for (size_t i = sorted_runs_.size() - 1; i >= first_index_after; i--) {
+      older_file_size += sorted_runs_[i].size;
+      if (older_file_size * 100L >= total_size * (long)ratio_to_compress) {
+        enable_compression = false;
+        break;
+      }
+    }
+  }
+
+  uint64_t estimated_total_size = 0;
+  for (unsigned int i = 0; i < first_index_after; i++) {
+    estimated_total_size += sorted_runs_[i].size;
+  }
+  uint32_t path_id =
+      GetPathId(ioptions_, mutable_cf_options_, estimated_total_size);
+  int start_level = sorted_runs_[start_index].level;
+  int output_level;
+  if (first_index_after == sorted_runs_.size()) {
+    output_level = vstorage_->num_levels() - 1;
+  } else if (sorted_runs_[first_index_after].level == 0) {
+    output_level = 0;
+  } else {
+    output_level = sorted_runs_[first_index_after].level - 1;
+  }
+
+  // last level is reserved for the files ingested behind
+  if (ioptions_.allow_ingest_behind &&
+      (output_level == vstorage_->num_levels() - 1)) {
+    assert(output_level > 1);
+    output_level--;
+  }
+
+  std::vector<CompactionInputFiles> inputs(vstorage_->num_levels());
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    inputs[i].level = start_level + static_cast<int>(i);
+  }
+  for (size_t i = start_index; i < first_index_after; i++) {
+    auto& picking_sr = sorted_runs_[i];
+    if (picking_sr.level == 0) {
+      FileMetaData* picking_file = picking_sr.file;
+      inputs[0].files.push_back(picking_file);
+    } else {
+      auto& files = inputs[picking_sr.level - start_level].files;
+      for (auto* f : vstorage_->LevelFiles(picking_sr.level)) {
+        files.push_back(f);
+      }
+    }
+    char file_num_buf[256];
+    picking_sr.DumpSizeInfo(file_num_buf, sizeof(file_num_buf), i);
+    ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: Picking %s",
+                     cf_name_.c_str(), file_num_buf);
+  }
+
+  std::vector<FileMetaData*> grandparents;
+  // Include grandparents for potential file cutting in incremental
+  // mode. It is for aligning file cutting boundaries across levels,
+  // so that subsequent compactions can pick files with aligned
+  // buffer.
+  // Single files are only picked up in incremental mode, so that
+  // there is no need for full range.
+  if (mutable_cf_options_.compaction_options_universal.incremental &&
+      first_index_after < sorted_runs_.size() &&
+      sorted_runs_[first_index_after].level > 1) {
+    grandparents = vstorage_->LevelFiles(sorted_runs_[first_index_after].level);
+  }
+
+  if (output_level != 0 &&
+      picker_->FilesRangeOverlapWithCompaction(
+          inputs, output_level,
+          Compaction::EvaluatePenultimateLevel(vstorage_, ioptions_,
+                                               start_level, output_level))) {
+    return nullptr;
+  }
+  CompactionReason compaction_reason;
+  if (max_number_of_files_to_compact == UINT_MAX) {
+    compaction_reason = CompactionReason::kUniversalSizeRatio;
+  } else {
+    compaction_reason = CompactionReason::kUniversalSortedRunNum;
+  }
+  return new Compaction(vstorage_, ioptions_, mutable_cf_options_,
+                        mutable_db_options_, std::move(inputs), output_level,
+                        MaxFileSizeForLevel(mutable_cf_options_, output_level,
+                                            kCompactionStyleUniversal),
+                        GetMaxOverlappingBytes(), path_id,
+                        GetCompressionType(vstorage_, mutable_cf_options_,
+                                           output_level, 1, enable_compression),
+                        GetCompressionOptions(mutable_cf_options_, vstorage_,
+                                              output_level, enable_compression),
+                        Temperature::kUnknown,
+                        /* max_subcompactions */ 0, grandparents,
+                        /* is manual */ false, /* trim_ts */ "", score_,
+                        false /* deletion_compaction */,
+                        /* l0_files_might_overlap */ true, compaction_reason);
+}
+
+// Look at overall size amplification. If size amplification
+// exceeds the configured value, then do a compaction
+// of the candidate files all the way upto the earliest
+// base file (overrides configured values of file-size ratios,
+// min_merge_width and max_merge_width).
+//
+Compaction* UniversalCompactionBuilder::PickCompactionToReduceSizeAmp() {
+  // percentage flexibility while reducing size amplification
+  uint64_t ratio = mutable_cf_options_.compaction_options_universal
+                       .max_size_amplification_percent;
+
+  unsigned int candidate_count = 0;
+  uint64_t candidate_size = 0;
+  size_t start_index = 0;
+  const SortedRun* sr = nullptr;
+
+  assert(!sorted_runs_.empty());
+  if (sorted_runs_.back().being_compacted) {
+    return nullptr;
+  }
+
+  // Skip files that are already being compacted
+  for (size_t loop = 0; loop + 1 < sorted_runs_.size(); loop++) {
+    sr = &sorted_runs_[loop];
+    if (!sr->being_compacted) {
+      start_index = loop;  // Consider this as the first candidate.
+      break;
+    }
+    char file_num_buf[kFormatFileNumberBufSize];
+    sr->Dump(file_num_buf, sizeof(file_num_buf), true);
+    ROCKS_LOG_BUFFER(log_buffer_,
+                     "[%s] Universal: skipping %s[%d] compacted %s",
+                     cf_name_.c_str(), file_num_buf, loop,
+                     " cannot be a candidate to reduce size amp.\n");
+    sr = nullptr;
+  }
+
+  if (sr == nullptr) {
+    return nullptr;  // no candidate files
+  }
+  {
+    char file_num_buf[kFormatFileNumberBufSize];
+    sr->Dump(file_num_buf, sizeof(file_num_buf), true);
+    ROCKS_LOG_BUFFER(
+        log_buffer_,
+        "[%s] Universal: First candidate %s[%" ROCKSDB_PRIszt "] %s",
+        cf_name_.c_str(), file_num_buf, start_index, " to reduce size amp.\n");
+  }
+
+  // size of the base sorted run for size amp calculation
+  uint64_t base_sr_size = sorted_runs_.back().size;
+  size_t sr_end_idx = sorted_runs_.size() - 1;
+  // If tiered compaction is enabled and the last sorted run is the last level
+  if (ioptions_.preclude_last_level_data_seconds > 0 &&
+      ioptions_.num_levels > 2 &&
+      sorted_runs_.back().level == ioptions_.num_levels - 1 &&
+      sorted_runs_.size() > 1) {
+    sr_end_idx = sorted_runs_.size() - 2;
+    base_sr_size = sorted_runs_[sr_end_idx].size;
+  }
+
+  // keep adding up all the remaining files
+  for (size_t loop = start_index; loop < sr_end_idx; loop++) {
+    sr = &sorted_runs_[loop];
+    if (sr->being_compacted) {
+      // TODO with incremental compaction is supported, we might want to
+      // schedule some incremental compactions in parallel if needed.
+      char file_num_buf[kFormatFileNumberBufSize];
+      sr->Dump(file_num_buf, sizeof(file_num_buf), true);
+      ROCKS_LOG_BUFFER(
+          log_buffer_, "[%s] Universal: Possible candidate %s[%d] %s",
+          cf_name_.c_str(), file_num_buf, start_index,
+          " is already being compacted. No size amp reduction possible.\n");
+      return nullptr;
+    }
+    candidate_size += sr->compensated_file_size;
+    candidate_count++;
+  }
+  if (candidate_count == 0) {
+    return nullptr;
+  }
+
+  // size amplification = percentage of additional size
+  if (candidate_size * 100 < ratio * base_sr_size) {
+    ROCKS_LOG_BUFFER(
+        log_buffer_,
+        "[%s] Universal: size amp not needed. newer-files-total-size %" PRIu64
+        " earliest-file-size %" PRIu64,
+        cf_name_.c_str(), candidate_size, base_sr_size);
+    return nullptr;
+  } else {
+    ROCKS_LOG_BUFFER(
+        log_buffer_,
+        "[%s] Universal: size amp needed. newer-files-total-size %" PRIu64
+        " earliest-file-size %" PRIu64,
+        cf_name_.c_str(), candidate_size, base_sr_size);
+  }
+  // Since incremental compaction can't include more than second last
+  // level, it can introduce penalty, compared to full compaction. We
+  // hard code the pentalty to be 80%. If we end up with a compaction
+  // fanout higher than 80% of full level compactions, we fall back
+  // to full level compaction.
+  // The 80% threshold is arbitrary and can be adjusted or made
+  // configurable in the future.
+  // This also prevent the case when compaction falls behind and we
+  // need to compact more levels for compactions to catch up.
+  if (mutable_cf_options_.compaction_options_universal.incremental) {
+    double fanout_threshold = static_cast<double>(base_sr_size) /
+                              static_cast<double>(candidate_size) * 1.8;
+    Compaction* picked = PickIncrementalForReduceSizeAmp(fanout_threshold);
+    if (picked != nullptr) {
+      // As the feature is still incremental, picking incremental compaction
+      // might fail and we will fall bck to compacting full level.
+      return picked;
+    }
+  }
+  return PickCompactionWithSortedRunRange(
+      start_index, sr_end_idx, CompactionReason::kUniversalSizeAmplification);
+}
+
+Compaction* UniversalCompactionBuilder::PickIncrementalForReduceSizeAmp(
+    double fanout_threshold) {
+  // Try find all potential compactions with total size just over
+  // options.max_compaction_size / 2, and take the one with the lowest
+  // fanout (defined in declaration of the function).
+  // This is done by having a sliding window of the files at the second
+  // lowest level, and keep expanding while finding overlapping in the
+  // last level. Once total size exceeds the size threshold, calculate
+  // the fanout value. And then shrinking from the small side of the
+  // window. Keep doing it until the end.
+  // Finally, we try to include upper level files if they fall into
+  // the range.
+  //
+  // Note that it is a similar problem as leveled compaction's
+  // kMinOverlappingRatio priority, but instead of picking single files
+  // we expand to a target compaction size. The reason is that in
+  // leveled compaction, actual fanout value tends to high, e.g. 10, so
+  // even with single file in down merging level, the extra size
+  // compacted in boundary files is at a lower ratio. But here users
+  // often have size of second last level size to be 1/4, 1/3 or even
+  // 1/2 of the bottommost level, so picking single file in second most
+  // level will cause significant waste, which is not desirable.
+  //
+  // This algorithm has lots of room to improve to pick more efficient
+  // compactions.
+  assert(sorted_runs_.size() >= 2);
+  int second_last_level = sorted_runs_[sorted_runs_.size() - 2].level;
+  if (second_last_level == 0) {
+    // Can't split Level 0.
+    return nullptr;
+  }
+  int output_level = sorted_runs_.back().level;
+  const std::vector<FileMetaData*>& bottom_files =
+      vstorage_->LevelFiles(output_level);
+  const std::vector<FileMetaData*>& files =
+      vstorage_->LevelFiles(second_last_level);
+  assert(!bottom_files.empty());
+  assert(!files.empty());
+
+  //  std::unordered_map<uint64_t, uint64_t> file_to_order;
+
+  int picked_start_idx = 0;
+  int picked_end_idx = 0;
+  double picked_fanout = fanout_threshold;
+
+  // Use half target compaction bytes as anchor to stop growing second most
+  // level files, and reserve growing space for more overlapping bottom level,
+  // clean cut, files from other levels, etc.
+  uint64_t comp_thres_size = mutable_cf_options_.max_compaction_bytes / 2;
+  int start_idx = 0;
+  int bottom_end_idx = 0;
+  int bottom_start_idx = 0;
+  uint64_t non_bottom_size = 0;
+  uint64_t bottom_size = 0;
+  bool end_bottom_size_counted = false;
+  for (int end_idx = 0; end_idx < static_cast<int>(files.size()); end_idx++) {
+    FileMetaData* end_file = files[end_idx];
+
+    // Include bottom most level files smaller than the current second
+    // last level file.
+    int num_skipped = 0;
+    while (bottom_end_idx < static_cast<int>(bottom_files.size()) &&
+           icmp_->Compare(bottom_files[bottom_end_idx]->largest,
+                          end_file->smallest) < 0) {
+      if (!end_bottom_size_counted) {
+        bottom_size += bottom_files[bottom_end_idx]->fd.file_size;
+      }
+      bottom_end_idx++;
+      end_bottom_size_counted = false;
+      num_skipped++;
+    }
+
+    if (num_skipped > 1) {
+      // At least a file in the bottom most level falls into the file gap. No
+      // reason to include the file. We cut the range and start a new sliding
+      // window.
+      start_idx = end_idx;
+    }
+
+    if (start_idx == end_idx) {
+      // new sliding window.
+      non_bottom_size = 0;
+      bottom_size = 0;
+      bottom_start_idx = bottom_end_idx;
+      end_bottom_size_counted = false;
+    }
+
+    non_bottom_size += end_file->fd.file_size;
+
+    // Include all overlapping files in bottom level.
+    while (bottom_end_idx < static_cast<int>(bottom_files.size()) &&
+           icmp_->Compare(bottom_files[bottom_end_idx]->smallest,
+                          end_file->largest) < 0) {
+      if (!end_bottom_size_counted) {
+        bottom_size += bottom_files[bottom_end_idx]->fd.file_size;
+        end_bottom_size_counted = true;
+      }
+      if (icmp_->Compare(bottom_files[bottom_end_idx]->largest,
+                         end_file->largest) > 0) {
+        // next level file cross large boundary of current file.
+        break;
+      }
+      bottom_end_idx++;
+      end_bottom_size_counted = false;
+    }
+
+    if ((non_bottom_size + bottom_size > comp_thres_size ||
+         end_idx == static_cast<int>(files.size()) - 1) &&
+        non_bottom_size > 0) {  // Do we alow 0 size file at all?
+      // If it is a better compaction, remember it in picked* variables.
+      double fanout = static_cast<double>(bottom_size) /
+                      static_cast<double>(non_bottom_size);
+      if (fanout < picked_fanout) {
+        picked_start_idx = start_idx;
+        picked_end_idx = end_idx;
+        picked_fanout = fanout;
+      }
+      // Shrink from the start end to under comp_thres_size
+      while (non_bottom_size + bottom_size > comp_thres_size &&
+             start_idx <= end_idx) {
+        non_bottom_size -= files[start_idx]->fd.file_size;
+        start_idx++;
+        if (start_idx < static_cast<int>(files.size())) {
+          while (bottom_start_idx <= bottom_end_idx &&
+                 icmp_->Compare(bottom_files[bottom_start_idx]->largest,
+                                files[start_idx]->smallest) < 0) {
+            bottom_size -= bottom_files[bottom_start_idx]->fd.file_size;
+            bottom_start_idx++;
+          }
+        }
+      }
+    }
+  }
+
+  if (picked_fanout >= fanout_threshold) {
+    assert(picked_fanout == fanout_threshold);
+    return nullptr;
+  }
+
+  std::vector<CompactionInputFiles> inputs;
+  CompactionInputFiles bottom_level_inputs;
+  CompactionInputFiles second_last_level_inputs;
+  second_last_level_inputs.level = second_last_level;
+  bottom_level_inputs.level = output_level;
+  for (int i = picked_start_idx; i <= picked_end_idx; i++) {
+    if (files[i]->being_compacted) {
+      return nullptr;
+    }
+    second_last_level_inputs.files.push_back(files[i]);
+  }
+  assert(!second_last_level_inputs.empty());
+  if (!picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
+                                       &second_last_level_inputs,
+                                       /*next_smallest=*/nullptr)) {
+    return nullptr;
+  }
+  // We might be able to avoid this binary search if we save and expand
+  // from bottom_start_idx and bottom_end_idx, but for now, we use
+  // SetupOtherInputs() for simplicity.
+  int parent_index = -1;  // Create and use bottom_start_idx?
+  if (!picker_->SetupOtherInputs(cf_name_, mutable_cf_options_, vstorage_,
+                                 &second_last_level_inputs,
+                                 &bottom_level_inputs, &parent_index,
+                                 /*base_index=*/-1)) {
+    return nullptr;
+  }
+
+  // Try to include files in upper levels if they fall into the range.
+  // Since we need to go from lower level up and this is in the reverse
+  // order, compared to level order, we first write to an reversed
+  // data structure and finally copy them to compaction inputs.
+  InternalKey smallest, largest;
+  picker_->GetRange(second_last_level_inputs, &smallest, &largest);
+  std::vector<CompactionInputFiles> inputs_reverse;
+  for (auto it = ++(++sorted_runs_.rbegin()); it != sorted_runs_.rend(); it++) {
+    SortedRun& sr = *it;
+    if (sr.level == 0) {
+      break;
+    }
+    std::vector<FileMetaData*> level_inputs;
+    vstorage_->GetCleanInputsWithinInterval(sr.level, &smallest, &largest,
+                                            &level_inputs);
+    if (!level_inputs.empty()) {
+      inputs_reverse.push_back({});
+      inputs_reverse.back().level = sr.level;
+      inputs_reverse.back().files = level_inputs;
+      picker_->GetRange(inputs_reverse.back(), &smallest, &largest);
+    }
+  }
+  for (auto it = inputs_reverse.rbegin(); it != inputs_reverse.rend(); it++) {
+    inputs.push_back(*it);
+  }
+
+  inputs.push_back(second_last_level_inputs);
+  inputs.push_back(bottom_level_inputs);
+
+  int start_level = Compaction::kInvalidLevel;
+  for (const auto& in : inputs) {
+    if (!in.empty()) {
+      // inputs should already be sorted by level
+      start_level = in.level;
+      break;
+    }
+  }
+
+  // intra L0 compactions outputs could have overlap
+  if (output_level != 0 &&
+      picker_->FilesRangeOverlapWithCompaction(
+          inputs, output_level,
+          Compaction::EvaluatePenultimateLevel(vstorage_, ioptions_,
+                                               start_level, output_level))) {
+    return nullptr;
+  }
+
+  // TODO support multi paths?
+  uint32_t path_id = 0;
+  return new Compaction(
+      vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_,
+      std::move(inputs), output_level,
+      MaxFileSizeForLevel(mutable_cf_options_, output_level,
+                          kCompactionStyleUniversal),
+      GetMaxOverlappingBytes(), path_id,
+      GetCompressionType(vstorage_, mutable_cf_options_, output_level, 1,
+                         true /* enable_compression */),
+      GetCompressionOptions(mutable_cf_options_, vstorage_, output_level,
+                            true /* enable_compression */),
+      Temperature::kUnknown,
+      /* max_subcompactions */ 0, /* grandparents */ {}, /* is manual */ false,
+      /* trim_ts */ "", score_, false /* deletion_compaction */,
+      /* l0_files_might_overlap */ true,
+      CompactionReason::kUniversalSizeAmplification);
+}
+
+// Pick files marked for compaction. Typically, files are marked by
+// CompactOnDeleteCollector due to the presence of tombstones.
+Compaction* UniversalCompactionBuilder::PickDeleteTriggeredCompaction() {
+  CompactionInputFiles start_level_inputs;
+  int output_level;
+  std::vector<CompactionInputFiles> inputs;
+  std::vector<FileMetaData*> grandparents;
+
+  if (vstorage_->num_levels() == 1) {
+    // This is single level universal. Since we're basically trying to reclaim
+    // space by processing files marked for compaction due to high tombstone
+    // density, let's do the same thing as compaction to reduce size amp which
+    // has the same goals.
+    int start_index = -1;
+
+    start_level_inputs.level = 0;
+    start_level_inputs.files.clear();
+    output_level = 0;
+    // Find the first file marked for compaction. Ignore the last file
+    for (size_t loop = 0; loop + 1 < sorted_runs_.size(); loop++) {
+      SortedRun* sr = &sorted_runs_[loop];
+      if (sr->being_compacted) {
+        continue;
+      }
+      FileMetaData* f = vstorage_->LevelFiles(0)[loop];
+      if (f->marked_for_compaction) {
+        start_level_inputs.files.push_back(f);
+        start_index =
+            static_cast<int>(loop);  // Consider this as the first candidate.
+        break;
+      }
+    }
+    if (start_index < 0) {
+      // Either no file marked, or they're already being compacted
+      return nullptr;
+    }
+
+    for (size_t loop = start_index + 1; loop < sorted_runs_.size(); loop++) {
+      SortedRun* sr = &sorted_runs_[loop];
+      if (sr->being_compacted) {
+        break;
+      }
+
+      FileMetaData* f = vstorage_->LevelFiles(0)[loop];
+      start_level_inputs.files.push_back(f);
+    }
+    if (start_level_inputs.size() <= 1) {
+      // If only the last file in L0 is marked for compaction, ignore it
+      return nullptr;
+    }
+    inputs.push_back(start_level_inputs);
+  } else {
+    int start_level;
+
+    // For multi-level universal, the strategy is to make this look more like
+    // leveled. We pick one of the files marked for compaction and compact with
+    // overlapping files in the adjacent level.
+    picker_->PickFilesMarkedForCompaction(cf_name_, vstorage_, &start_level,
+                                          &output_level, &start_level_inputs);
+    if (start_level_inputs.empty()) {
+      return nullptr;
+    }
+
+    // Pick the first non-empty level after the start_level
+    for (output_level = start_level + 1; output_level < vstorage_->num_levels();
+         output_level++) {
+      if (vstorage_->NumLevelFiles(output_level) != 0) {
+        break;
+      }
+    }
+
+    // If all higher levels are empty, pick the highest level as output level
+    if (output_level == vstorage_->num_levels()) {
+      if (start_level == 0) {
+        output_level = vstorage_->num_levels() - 1;
+      } else {
+        // If start level is non-zero and all higher levels are empty, this
+        // compaction will translate into a trivial move. Since the idea is
+        // to reclaim space and trivial move doesn't help with that, we
+        // skip compaction in this case and return nullptr
+        return nullptr;
+      }
+    }
+    if (ioptions_.allow_ingest_behind &&
+        output_level == vstorage_->num_levels() - 1) {
+      assert(output_level > 1);
+      output_level--;
+    }
+
+    if (output_level != 0) {
+      if (start_level == 0) {
+        if (!picker_->GetOverlappingL0Files(vstorage_, &start_level_inputs,
+                                            output_level, nullptr)) {
+          return nullptr;
+        }
+      }
+
+      CompactionInputFiles output_level_inputs;
+      int parent_index = -1;
+
+      output_level_inputs.level = output_level;
+      if (!picker_->SetupOtherInputs(cf_name_, mutable_cf_options_, vstorage_,
+                                     &start_level_inputs, &output_level_inputs,
+                                     &parent_index, -1)) {
+        return nullptr;
+      }
+      inputs.push_back(start_level_inputs);
+      if (!output_level_inputs.empty()) {
+        inputs.push_back(output_level_inputs);
+      }
+      if (picker_->FilesRangeOverlapWithCompaction(
+              inputs, output_level,
+              Compaction::EvaluatePenultimateLevel(
+                  vstorage_, ioptions_, start_level, output_level))) {
+        return nullptr;
+      }
+
+      picker_->GetGrandparents(vstorage_, start_level_inputs,
+                               output_level_inputs, &grandparents);
+    } else {
+      inputs.push_back(start_level_inputs);
+    }
+  }
+
+  uint64_t estimated_total_size = 0;
+  // Use size of the output level as estimated file size
+  for (FileMetaData* f : vstorage_->LevelFiles(output_level)) {
+    estimated_total_size += f->fd.GetFileSize();
+  }
+  uint32_t path_id =
+      GetPathId(ioptions_, mutable_cf_options_, estimated_total_size);
+  return new Compaction(
+      vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_,
+      std::move(inputs), output_level,
+      MaxFileSizeForLevel(mutable_cf_options_, output_level,
+                          kCompactionStyleUniversal),
+      /* max_grandparent_overlap_bytes */ GetMaxOverlappingBytes(), path_id,
+      GetCompressionType(vstorage_, mutable_cf_options_, output_level, 1),
+      GetCompressionOptions(mutable_cf_options_, vstorage_, output_level),
+      Temperature::kUnknown,
+      /* max_subcompactions */ 0, grandparents, /* is manual */ false,
+      /* trim_ts */ "", score_, false /* deletion_compaction */,
+      /* l0_files_might_overlap */ true,
+      CompactionReason::kFilesMarkedForCompaction);
+}
+
+Compaction* UniversalCompactionBuilder::PickCompactionToOldest(
+    size_t start_index, CompactionReason compaction_reason) {
+  return PickCompactionWithSortedRunRange(start_index, sorted_runs_.size() - 1,
+                                          compaction_reason);
+}
+
+Compaction* UniversalCompactionBuilder::PickCompactionWithSortedRunRange(
+    size_t start_index, size_t end_index, CompactionReason compaction_reason) {
+  assert(start_index < sorted_runs_.size());
+
+  // Estimate total file size
+  uint64_t estimated_total_size = 0;
+  for (size_t loop = start_index; loop <= end_index; loop++) {
+    estimated_total_size += sorted_runs_[loop].size;
+  }
+  uint32_t path_id =
+      GetPathId(ioptions_, mutable_cf_options_, estimated_total_size);
+  int start_level = sorted_runs_[start_index].level;
+
+  std::vector<CompactionInputFiles> inputs(vstorage_->num_levels());
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    inputs[i].level = start_level + static_cast<int>(i);
+  }
+  for (size_t loop = start_index; loop <= end_index; loop++) {
+    auto& picking_sr = sorted_runs_[loop];
+    if (picking_sr.level == 0) {
+      FileMetaData* f = picking_sr.file;
+      inputs[0].files.push_back(f);
+    } else {
+      auto& files = inputs[picking_sr.level - start_level].files;
+      for (auto* f : vstorage_->LevelFiles(picking_sr.level)) {
+        files.push_back(f);
+      }
+    }
+    std::string comp_reason_print_string;
+    if (compaction_reason == CompactionReason::kPeriodicCompaction) {
+      comp_reason_print_string = "periodic compaction";
+    } else if (compaction_reason ==
+               CompactionReason::kUniversalSizeAmplification) {
+      comp_reason_print_string = "size amp";
+    } else {
+      assert(false);
+      comp_reason_print_string = "unknown: ";
+      comp_reason_print_string.append(
+          std::to_string(static_cast<int>(compaction_reason)));
+    }
+
+    char file_num_buf[256];
+    picking_sr.DumpSizeInfo(file_num_buf, sizeof(file_num_buf), loop);
+    ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: %s picking %s",
+                     cf_name_.c_str(), comp_reason_print_string.c_str(),
+                     file_num_buf);
+  }
+
+  int output_level;
+  if (end_index == sorted_runs_.size() - 1) {
+    // output files at the last level, unless it's reserved
+    output_level = vstorage_->num_levels() - 1;
+    // last level is reserved for the files ingested behind
+    if (ioptions_.allow_ingest_behind) {
+      assert(output_level > 1);
+      output_level--;
+    }
+  } else {
+    // if it's not including all sorted_runs, it can only output to the level
+    // above the `end_index + 1` sorted_run.
+    output_level = sorted_runs_[end_index + 1].level - 1;
+  }
+
+  // intra L0 compactions outputs could have overlap
+  if (output_level != 0 &&
+      picker_->FilesRangeOverlapWithCompaction(
+          inputs, output_level,
+          Compaction::EvaluatePenultimateLevel(vstorage_, ioptions_,
+                                               start_level, output_level))) {
+    return nullptr;
+  }
+
+  // We never check size for
+  // compaction_options_universal.compression_size_percent,
+  // because we always compact all the files, so always compress.
+  return new Compaction(
+      vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_,
+      std::move(inputs), output_level,
+      MaxFileSizeForLevel(mutable_cf_options_, output_level,
+                          kCompactionStyleUniversal),
+      GetMaxOverlappingBytes(), path_id,
+      GetCompressionType(vstorage_, mutable_cf_options_, output_level, 1,
+                         true /* enable_compression */),
+      GetCompressionOptions(mutable_cf_options_, vstorage_, output_level,
+                            true /* enable_compression */),
+      Temperature::kUnknown,
+      /* max_subcompactions */ 0, /* grandparents */ {}, /* is manual */ false,
+      /* trim_ts */ "", score_, false /* deletion_compaction */,
+      /* l0_files_might_overlap */ true, compaction_reason);
+}
+
+Compaction* UniversalCompactionBuilder::PickPeriodicCompaction() {
+  ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: Periodic Compaction",
+                   cf_name_.c_str());
+
+  // In universal compaction, sorted runs contain older data are almost always
+  // generated earlier too. To simplify the problem, we just try to trigger
+  // a full compaction. We start from the oldest sorted run and include
+  // all sorted runs, until we hit a sorted already being compacted.
+  // Since usually the largest (which is usually the oldest) sorted run is
+  // included anyway, doing a full compaction won't increase write
+  // amplification much.
+
+  // Get some information from marked files to check whether a file is
+  // included in the compaction.
+
+  size_t start_index = sorted_runs_.size();
+  while (start_index > 0 && !sorted_runs_[start_index - 1].being_compacted) {
+    start_index--;
+  }
+  if (start_index == sorted_runs_.size()) {
+    return nullptr;
+  }
+
+  // There is a rare corner case where we can't pick up all the files
+  // because some files are being compacted and we end up with picking files
+  // but none of them need periodic compaction. Unless we simply recompact
+  // the last sorted run (either the last level or last L0 file), we would just
+  // execute the compaction, in order to simplify  the logic.
+  if (start_index == sorted_runs_.size() - 1) {
+    bool included_file_marked = false;
+    int start_level = sorted_runs_[start_index].level;
+    FileMetaData* start_file = sorted_runs_[start_index].file;
+    for (const std::pair<int, FileMetaData*>& level_file_pair :
+         vstorage_->FilesMarkedForPeriodicCompaction()) {
+      if (start_level != 0) {
+        // Last sorted run is a level
+        if (start_level == level_file_pair.first) {
+          included_file_marked = true;
+          break;
+        }
+      } else {
+        // Last sorted run is a L0 file.
+        if (start_file == level_file_pair.second) {
+          included_file_marked = true;
+          break;
+        }
+      }
+    }
+    if (!included_file_marked) {
+      ROCKS_LOG_BUFFER(log_buffer_,
+                       "[%s] Universal: Cannot form a compaction covering file "
+                       "marked for periodic compaction",
+                       cf_name_.c_str());
+      return nullptr;
+    }
+  }
+
+  Compaction* c = PickCompactionToOldest(start_index,
+                                         CompactionReason::kPeriodicCompaction);
+
+  TEST_SYNC_POINT_CALLBACK(
+      "UniversalCompactionPicker::PickPeriodicCompaction:Return", c);
+
+  return c;
+}
+
+uint64_t UniversalCompactionBuilder::GetMaxOverlappingBytes() const {
+  if (!mutable_cf_options_.compaction_options_universal.incremental) {
+    return std::numeric_limits<uint64_t>::max();
+  } else {
+    // Try to align cutting boundary with files at the next level if the
+    // file isn't end up with 1/2 of target size, or it would overlap
+    // with two full size files at the next level.
+    return mutable_cf_options_.target_file_size_base / 2 * 3;
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_universal.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_universal.h
new file mode 100644
index 0000000000..cb16059692
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_picker_universal.h
@@ -0,0 +1,30 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "db/compaction/compaction_picker.h"
+
+namespace ROCKSDB_NAMESPACE {
+class UniversalCompactionPicker : public CompactionPicker {
+ public:
+  UniversalCompactionPicker(const ImmutableOptions& ioptions,
+                            const InternalKeyComparator* icmp)
+      : CompactionPicker(ioptions, icmp) {}
+  virtual Compaction* PickCompaction(const std::string& cf_name,
+                                     const MutableCFOptions& mutable_cf_options,
+                                     const MutableDBOptions& mutable_db_options,
+                                     VersionStorageInfo* vstorage,
+                                     LogBuffer* log_buffer) override;
+  virtual int MaxOutputLevel() const override { return NumberLevels() - 1; }
+
+  virtual bool NeedsCompaction(
+      const VersionStorageInfo* vstorage) const override;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_service_job.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_service_job.cc
new file mode 100644
index 0000000000..3149bb5002
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_service_job.cc
@@ -0,0 +1,833 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/compaction/compaction_job.h"
+#include "db/compaction/compaction_state.h"
+#include "logging/logging.h"
+#include "monitoring/iostats_context_imp.h"
+#include "monitoring/thread_status_util.h"
+#include "options/options_helper.h"
+#include "rocksdb/utilities/options_type.h"
+
+namespace ROCKSDB_NAMESPACE {
+class SubcompactionState;
+
+CompactionServiceJobStatus
+CompactionJob::ProcessKeyValueCompactionWithCompactionService(
+    SubcompactionState* sub_compact) {
+  assert(sub_compact);
+  assert(sub_compact->compaction);
+  assert(db_options_.compaction_service);
+
+  const Compaction* compaction = sub_compact->compaction;
+  CompactionServiceInput compaction_input;
+  compaction_input.output_level = compaction->output_level();
+  compaction_input.db_id = db_id_;
+
+  const std::vector<CompactionInputFiles>& inputs =
+      *(compact_->compaction->inputs());
+  for (const auto& files_per_level : inputs) {
+    for (const auto& file : files_per_level.files) {
+      compaction_input.input_files.emplace_back(
+          MakeTableFileName(file->fd.GetNumber()));
+    }
+  }
+  compaction_input.column_family.name =
+      compaction->column_family_data()->GetName();
+  compaction_input.column_family.options =
+      compaction->column_family_data()->GetLatestCFOptions();
+  compaction_input.db_options =
+      BuildDBOptions(db_options_, mutable_db_options_copy_);
+  compaction_input.snapshots = existing_snapshots_;
+  compaction_input.has_begin = sub_compact->start.has_value();
+  compaction_input.begin =
+      compaction_input.has_begin ? sub_compact->start->ToString() : "";
+  compaction_input.has_end = sub_compact->end.has_value();
+  compaction_input.end =
+      compaction_input.has_end ? sub_compact->end->ToString() : "";
+
+  std::string compaction_input_binary;
+  Status s = compaction_input.Write(&compaction_input_binary);
+  if (!s.ok()) {
+    sub_compact->status = s;
+    return CompactionServiceJobStatus::kFailure;
+  }
+
+  std::ostringstream input_files_oss;
+  bool is_first_one = true;
+  for (const auto& file : compaction_input.input_files) {
+    input_files_oss << (is_first_one ? "" : ", ") << file;
+    is_first_one = false;
+  }
+
+  ROCKS_LOG_INFO(
+      db_options_.info_log,
+      "[%s] [JOB %d] Starting remote compaction (output level: %d): %s",
+      compaction_input.column_family.name.c_str(), job_id_,
+      compaction_input.output_level, input_files_oss.str().c_str());
+  CompactionServiceJobInfo info(dbname_, db_id_, db_session_id_,
+                                GetCompactionId(sub_compact), thread_pri_);
+  CompactionServiceJobStatus compaction_status =
+      db_options_.compaction_service->StartV2(info, compaction_input_binary);
+  switch (compaction_status) {
+    case CompactionServiceJobStatus::kSuccess:
+      break;
+    case CompactionServiceJobStatus::kFailure:
+      sub_compact->status = Status::Incomplete(
+          "CompactionService failed to start compaction job.");
+      ROCKS_LOG_WARN(db_options_.info_log,
+                     "[%s] [JOB %d] Remote compaction failed to start.",
+                     compaction_input.column_family.name.c_str(), job_id_);
+      return compaction_status;
+    case CompactionServiceJobStatus::kUseLocal:
+      ROCKS_LOG_INFO(
+          db_options_.info_log,
+          "[%s] [JOB %d] Remote compaction fallback to local by API Start.",
+          compaction_input.column_family.name.c_str(), job_id_);
+      return compaction_status;
+    default:
+      assert(false);  // unknown status
+      break;
+  }
+
+  ROCKS_LOG_INFO(db_options_.info_log,
+                 "[%s] [JOB %d] Waiting for remote compaction...",
+                 compaction_input.column_family.name.c_str(), job_id_);
+  std::string compaction_result_binary;
+  compaction_status = db_options_.compaction_service->WaitForCompleteV2(
+      info, &compaction_result_binary);
+
+  if (compaction_status == CompactionServiceJobStatus::kUseLocal) {
+    ROCKS_LOG_INFO(db_options_.info_log,
+                   "[%s] [JOB %d] Remote compaction fallback to local by API "
+                   "WaitForComplete.",
+                   compaction_input.column_family.name.c_str(), job_id_);
+    return compaction_status;
+  }
+
+  CompactionServiceResult compaction_result;
+  s = CompactionServiceResult::Read(compaction_result_binary,
+                                    &compaction_result);
+
+  if (compaction_status == CompactionServiceJobStatus::kFailure) {
+    if (s.ok()) {
+      if (compaction_result.status.ok()) {
+        sub_compact->status = Status::Incomplete(
+            "CompactionService failed to run the compaction job (even though "
+            "the internal status is okay).");
+      } else {
+        // set the current sub compaction status with the status returned from
+        // remote
+        sub_compact->status = compaction_result.status;
+      }
+    } else {
+      sub_compact->status = Status::Incomplete(
+          "CompactionService failed to run the compaction job (and no valid "
+          "result is returned).");
+      compaction_result.status.PermitUncheckedError();
+    }
+    ROCKS_LOG_WARN(db_options_.info_log,
+                   "[%s] [JOB %d] Remote compaction failed.",
+                   compaction_input.column_family.name.c_str(), job_id_);
+    return compaction_status;
+  }
+
+  if (!s.ok()) {
+    sub_compact->status = s;
+    compaction_result.status.PermitUncheckedError();
+    return CompactionServiceJobStatus::kFailure;
+  }
+  sub_compact->status = compaction_result.status;
+
+  std::ostringstream output_files_oss;
+  is_first_one = true;
+  for (const auto& file : compaction_result.output_files) {
+    output_files_oss << (is_first_one ? "" : ", ") << file.file_name;
+    is_first_one = false;
+  }
+
+  ROCKS_LOG_INFO(db_options_.info_log,
+                 "[%s] [JOB %d] Receive remote compaction result, output path: "
+                 "%s, files: %s",
+                 compaction_input.column_family.name.c_str(), job_id_,
+                 compaction_result.output_path.c_str(),
+                 output_files_oss.str().c_str());
+
+  if (!s.ok()) {
+    sub_compact->status = s;
+    return CompactionServiceJobStatus::kFailure;
+  }
+
+  for (const auto& file : compaction_result.output_files) {
+    uint64_t file_num = versions_->NewFileNumber();
+    auto src_file = compaction_result.output_path + "/" + file.file_name;
+    auto tgt_file = TableFileName(compaction->immutable_options()->cf_paths,
+                                  file_num, compaction->output_path_id());
+    s = fs_->RenameFile(src_file, tgt_file, IOOptions(), nullptr);
+    if (!s.ok()) {
+      sub_compact->status = s;
+      return CompactionServiceJobStatus::kFailure;
+    }
+
+    FileMetaData meta;
+    uint64_t file_size;
+    s = fs_->GetFileSize(tgt_file, IOOptions(), &file_size, nullptr);
+    if (!s.ok()) {
+      sub_compact->status = s;
+      return CompactionServiceJobStatus::kFailure;
+    }
+    meta.fd = FileDescriptor(file_num, compaction->output_path_id(), file_size,
+                             file.smallest_seqno, file.largest_seqno);
+    meta.smallest.DecodeFrom(file.smallest_internal_key);
+    meta.largest.DecodeFrom(file.largest_internal_key);
+    meta.oldest_ancester_time = file.oldest_ancester_time;
+    meta.file_creation_time = file.file_creation_time;
+    meta.epoch_number = file.epoch_number;
+    meta.marked_for_compaction = file.marked_for_compaction;
+    meta.unique_id = file.unique_id;
+
+    auto cfd = compaction->column_family_data();
+    sub_compact->Current().AddOutput(std::move(meta),
+                                     cfd->internal_comparator(), false, false,
+                                     true, file.paranoid_hash);
+  }
+  sub_compact->compaction_job_stats = compaction_result.stats;
+  sub_compact->Current().SetNumOutputRecords(
+      compaction_result.num_output_records);
+  sub_compact->Current().SetTotalBytes(compaction_result.total_bytes);
+  RecordTick(stats_, REMOTE_COMPACT_READ_BYTES, compaction_result.bytes_read);
+  RecordTick(stats_, REMOTE_COMPACT_WRITE_BYTES,
+             compaction_result.bytes_written);
+  return CompactionServiceJobStatus::kSuccess;
+}
+
+std::string CompactionServiceCompactionJob::GetTableFileName(
+    uint64_t file_number) {
+  return MakeTableFileName(output_path_, file_number);
+}
+
+void CompactionServiceCompactionJob::RecordCompactionIOStats() {
+  compaction_result_->bytes_read += IOSTATS(bytes_read);
+  compaction_result_->bytes_written += IOSTATS(bytes_written);
+  CompactionJob::RecordCompactionIOStats();
+}
+
+CompactionServiceCompactionJob::CompactionServiceCompactionJob(
+    int job_id, Compaction* compaction, const ImmutableDBOptions& db_options,
+    const MutableDBOptions& mutable_db_options, const FileOptions& file_options,
+    VersionSet* versions, const std::atomic<bool>* shutting_down,
+    LogBuffer* log_buffer, FSDirectory* output_directory, Statistics* stats,
+    InstrumentedMutex* db_mutex, ErrorHandler* db_error_handler,
+    std::vector<SequenceNumber> existing_snapshots,
+    std::shared_ptr<Cache> table_cache, EventLogger* event_logger,
+    const std::string& dbname, const std::shared_ptr<IOTracer>& io_tracer,
+    const std::atomic<bool>& manual_compaction_canceled,
+    const std::string& db_id, const std::string& db_session_id,
+    std::string output_path,
+    const CompactionServiceInput& compaction_service_input,
+    CompactionServiceResult* compaction_service_result)
+    : CompactionJob(
+          job_id, compaction, db_options, mutable_db_options, file_options,
+          versions, shutting_down, log_buffer, nullptr, output_directory,
+          nullptr, stats, db_mutex, db_error_handler,
+          std::move(existing_snapshots), kMaxSequenceNumber, nullptr, nullptr,
+          std::move(table_cache), event_logger,
+          compaction->mutable_cf_options()->paranoid_file_checks,
+          compaction->mutable_cf_options()->report_bg_io_stats, dbname,
+          &(compaction_service_result->stats), Env::Priority::USER, io_tracer,
+          manual_compaction_canceled, db_id, db_session_id,
+          compaction->column_family_data()->GetFullHistoryTsLow()),
+      output_path_(std::move(output_path)),
+      compaction_input_(compaction_service_input),
+      compaction_result_(compaction_service_result) {}
+
+Status CompactionServiceCompactionJob::Run() {
+  AutoThreadOperationStageUpdater stage_updater(
+      ThreadStatus::STAGE_COMPACTION_RUN);
+
+  auto* c = compact_->compaction;
+  assert(c->column_family_data() != nullptr);
+  assert(c->column_family_data()->current()->storage_info()->NumLevelFiles(
+             compact_->compaction->level()) > 0);
+
+  write_hint_ =
+      c->column_family_data()->CalculateSSTWriteHint(c->output_level());
+  bottommost_level_ = c->bottommost_level();
+
+  Slice begin = compaction_input_.begin;
+  Slice end = compaction_input_.end;
+  compact_->sub_compact_states.emplace_back(
+      c,
+      compaction_input_.has_begin ? std::optional<Slice>(begin)
+                                  : std::optional<Slice>(),
+      compaction_input_.has_end ? std::optional<Slice>(end)
+                                : std::optional<Slice>(),
+      /*sub_job_id*/ 0);
+
+  log_buffer_->FlushBufferToLog();
+  LogCompaction();
+  const uint64_t start_micros = db_options_.clock->NowMicros();
+  // Pick the only sub-compaction we should have
+  assert(compact_->sub_compact_states.size() == 1);
+  SubcompactionState* sub_compact = compact_->sub_compact_states.data();
+
+  ProcessKeyValueCompaction(sub_compact);
+
+  compaction_stats_.stats.micros =
+      db_options_.clock->NowMicros() - start_micros;
+  compaction_stats_.stats.cpu_micros =
+      sub_compact->compaction_job_stats.cpu_micros;
+
+  RecordTimeToHistogram(stats_, COMPACTION_TIME,
+                        compaction_stats_.stats.micros);
+  RecordTimeToHistogram(stats_, COMPACTION_CPU_TIME,
+                        compaction_stats_.stats.cpu_micros);
+
+  Status status = sub_compact->status;
+  IOStatus io_s = sub_compact->io_status;
+
+  if (io_status_.ok()) {
+    io_status_ = io_s;
+  }
+
+  if (status.ok()) {
+    constexpr IODebugContext* dbg = nullptr;
+
+    if (output_directory_) {
+      io_s = output_directory_->FsyncWithDirOptions(IOOptions(), dbg,
+                                                    DirFsyncOptions());
+    }
+  }
+  if (io_status_.ok()) {
+    io_status_ = io_s;
+  }
+  if (status.ok()) {
+    status = io_s;
+  }
+  if (status.ok()) {
+    // TODO: Add verify_table()
+  }
+
+  // Finish up all book-keeping to unify the subcompaction results
+  compact_->AggregateCompactionStats(compaction_stats_, *compaction_job_stats_);
+  UpdateCompactionStats();
+  RecordCompactionIOStats();
+
+  LogFlush(db_options_.info_log);
+  compact_->status = status;
+  compact_->status.PermitUncheckedError();
+
+  // Build compaction result
+  compaction_result_->output_level = compact_->compaction->output_level();
+  compaction_result_->output_path = output_path_;
+  for (const auto& output_file : sub_compact->GetOutputs()) {
+    auto& meta = output_file.meta;
+    compaction_result_->output_files.emplace_back(
+        MakeTableFileName(meta.fd.GetNumber()), meta.fd.smallest_seqno,
+        meta.fd.largest_seqno, meta.smallest.Encode().ToString(),
+        meta.largest.Encode().ToString(), meta.oldest_ancester_time,
+        meta.file_creation_time, meta.epoch_number,
+        output_file.validator.GetHash(), meta.marked_for_compaction,
+        meta.unique_id);
+  }
+  InternalStats::CompactionStatsFull compaction_stats;
+  sub_compact->AggregateCompactionStats(compaction_stats);
+  compaction_result_->num_output_records =
+      compaction_stats.stats.num_output_records;
+  compaction_result_->total_bytes = compaction_stats.TotalBytesWritten();
+
+  return status;
+}
+
+void CompactionServiceCompactionJob::CleanupCompaction() {
+  CompactionJob::CleanupCompaction();
+}
+
+// Internal binary format for the input and result data
+enum BinaryFormatVersion : uint32_t {
+  kOptionsString = 1,  // Use string format similar to Option string format
+};
+
+static std::unordered_map<std::string, OptionTypeInfo> cfd_type_info = {
+    {"name",
+     {offsetof(struct ColumnFamilyDescriptor, name), OptionType::kEncodedString,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+    {"options",
+     {offsetof(struct ColumnFamilyDescriptor, options),
+      OptionType::kConfigurable, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone,
+      [](const ConfigOptions& opts, const std::string& /*name*/,
+         const std::string& value, void* addr) {
+        auto cf_options = static_cast<ColumnFamilyOptions*>(addr);
+        return GetColumnFamilyOptionsFromString(opts, ColumnFamilyOptions(),
+                                                value, cf_options);
+      },
+      [](const ConfigOptions& opts, const std::string& /*name*/,
+         const void* addr, std::string* value) {
+        const auto cf_options = static_cast<const ColumnFamilyOptions*>(addr);
+        std::string result;
+        auto status =
+            GetStringFromColumnFamilyOptions(opts, *cf_options, &result);
+        *value = "{" + result + "}";
+        return status;
+      },
+      [](const ConfigOptions& opts, const std::string& name, const void* addr1,
+         const void* addr2, std::string* mismatch) {
+        const auto this_one = static_cast<const ColumnFamilyOptions*>(addr1);
+        const auto that_one = static_cast<const ColumnFamilyOptions*>(addr2);
+        auto this_conf = CFOptionsAsConfigurable(*this_one);
+        auto that_conf = CFOptionsAsConfigurable(*that_one);
+        std::string mismatch_opt;
+        bool result =
+            this_conf->AreEquivalent(opts, that_conf.get(), &mismatch_opt);
+        if (!result) {
+          *mismatch = name + "." + mismatch_opt;
+        }
+        return result;
+      }}},
+};
+
+static std::unordered_map<std::string, OptionTypeInfo> cs_input_type_info = {
+    {"column_family",
+     OptionTypeInfo::Struct(
+         "column_family", &cfd_type_info,
+         offsetof(struct CompactionServiceInput, column_family),
+         OptionVerificationType::kNormal, OptionTypeFlags::kNone)},
+    {"db_options",
+     {offsetof(struct CompactionServiceInput, db_options),
+      OptionType::kConfigurable, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone,
+      [](const ConfigOptions& opts, const std::string& /*name*/,
+         const std::string& value, void* addr) {
+        auto options = static_cast<DBOptions*>(addr);
+        return GetDBOptionsFromString(opts, DBOptions(), value, options);
+      },
+      [](const ConfigOptions& opts, const std::string& /*name*/,
+         const void* addr, std::string* value) {
+        const auto options = static_cast<const DBOptions*>(addr);
+        std::string result;
+        auto status = GetStringFromDBOptions(opts, *options, &result);
+        *value = "{" + result + "}";
+        return status;
+      },
+      [](const ConfigOptions& opts, const std::string& name, const void* addr1,
+         const void* addr2, std::string* mismatch) {
+        const auto this_one = static_cast<const DBOptions*>(addr1);
+        const auto that_one = static_cast<const DBOptions*>(addr2);
+        auto this_conf = DBOptionsAsConfigurable(*this_one);
+        auto that_conf = DBOptionsAsConfigurable(*that_one);
+        std::string mismatch_opt;
+        bool result =
+            this_conf->AreEquivalent(opts, that_conf.get(), &mismatch_opt);
+        if (!result) {
+          *mismatch = name + "." + mismatch_opt;
+        }
+        return result;
+      }}},
+    {"snapshots", OptionTypeInfo::Vector<uint64_t>(
+                      offsetof(struct CompactionServiceInput, snapshots),
+                      OptionVerificationType::kNormal, OptionTypeFlags::kNone,
+                      {0, OptionType::kUInt64T})},
+    {"input_files", OptionTypeInfo::Vector<std::string>(
+                        offsetof(struct CompactionServiceInput, input_files),
+                        OptionVerificationType::kNormal, OptionTypeFlags::kNone,
+                        {0, OptionType::kEncodedString})},
+    {"output_level",
+     {offsetof(struct CompactionServiceInput, output_level), OptionType::kInt,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+    {"db_id",
+     {offsetof(struct CompactionServiceInput, db_id),
+      OptionType::kEncodedString}},
+    {"has_begin",
+     {offsetof(struct CompactionServiceInput, has_begin), OptionType::kBoolean,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+    {"begin",
+     {offsetof(struct CompactionServiceInput, begin),
+      OptionType::kEncodedString, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"has_end",
+     {offsetof(struct CompactionServiceInput, has_end), OptionType::kBoolean,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+    {"end",
+     {offsetof(struct CompactionServiceInput, end), OptionType::kEncodedString,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+};
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    cs_output_file_type_info = {
+        {"file_name",
+         {offsetof(struct CompactionServiceOutputFile, file_name),
+          OptionType::kEncodedString, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"smallest_seqno",
+         {offsetof(struct CompactionServiceOutputFile, smallest_seqno),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"largest_seqno",
+         {offsetof(struct CompactionServiceOutputFile, largest_seqno),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"smallest_internal_key",
+         {offsetof(struct CompactionServiceOutputFile, smallest_internal_key),
+          OptionType::kEncodedString, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"largest_internal_key",
+         {offsetof(struct CompactionServiceOutputFile, largest_internal_key),
+          OptionType::kEncodedString, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"oldest_ancester_time",
+         {offsetof(struct CompactionServiceOutputFile, oldest_ancester_time),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"file_creation_time",
+         {offsetof(struct CompactionServiceOutputFile, file_creation_time),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"epoch_number",
+         {offsetof(struct CompactionServiceOutputFile, epoch_number),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"paranoid_hash",
+         {offsetof(struct CompactionServiceOutputFile, paranoid_hash),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"marked_for_compaction",
+         {offsetof(struct CompactionServiceOutputFile, marked_for_compaction),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"unique_id",
+         OptionTypeInfo::Array<uint64_t, 2>(
+             offsetof(struct CompactionServiceOutputFile, unique_id),
+             OptionVerificationType::kNormal, OptionTypeFlags::kNone,
+             {0, OptionType::kUInt64T})},
+};
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    compaction_job_stats_type_info = {
+        {"elapsed_micros",
+         {offsetof(struct CompactionJobStats, elapsed_micros),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"cpu_micros",
+         {offsetof(struct CompactionJobStats, cpu_micros), OptionType::kUInt64T,
+          OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+        {"num_input_records",
+         {offsetof(struct CompactionJobStats, num_input_records),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"num_blobs_read",
+         {offsetof(struct CompactionJobStats, num_blobs_read),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"num_input_files",
+         {offsetof(struct CompactionJobStats, num_input_files),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"num_input_files_at_output_level",
+         {offsetof(struct CompactionJobStats, num_input_files_at_output_level),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"num_output_records",
+         {offsetof(struct CompactionJobStats, num_output_records),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"num_output_files",
+         {offsetof(struct CompactionJobStats, num_output_files),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"num_output_files_blob",
+         {offsetof(struct CompactionJobStats, num_output_files_blob),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"is_full_compaction",
+         {offsetof(struct CompactionJobStats, is_full_compaction),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"is_manual_compaction",
+         {offsetof(struct CompactionJobStats, is_manual_compaction),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"total_input_bytes",
+         {offsetof(struct CompactionJobStats, total_input_bytes),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"total_blob_bytes_read",
+         {offsetof(struct CompactionJobStats, total_blob_bytes_read),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"total_output_bytes",
+         {offsetof(struct CompactionJobStats, total_output_bytes),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"total_output_bytes_blob",
+         {offsetof(struct CompactionJobStats, total_output_bytes_blob),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"num_records_replaced",
+         {offsetof(struct CompactionJobStats, num_records_replaced),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"total_input_raw_key_bytes",
+         {offsetof(struct CompactionJobStats, total_input_raw_key_bytes),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"total_input_raw_value_bytes",
+         {offsetof(struct CompactionJobStats, total_input_raw_value_bytes),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"num_input_deletion_records",
+         {offsetof(struct CompactionJobStats, num_input_deletion_records),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"num_expired_deletion_records",
+         {offsetof(struct CompactionJobStats, num_expired_deletion_records),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"num_corrupt_keys",
+         {offsetof(struct CompactionJobStats, num_corrupt_keys),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"file_write_nanos",
+         {offsetof(struct CompactionJobStats, file_write_nanos),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"file_range_sync_nanos",
+         {offsetof(struct CompactionJobStats, file_range_sync_nanos),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"file_fsync_nanos",
+         {offsetof(struct CompactionJobStats, file_fsync_nanos),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"file_prepare_write_nanos",
+         {offsetof(struct CompactionJobStats, file_prepare_write_nanos),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"smallest_output_key_prefix",
+         {offsetof(struct CompactionJobStats, smallest_output_key_prefix),
+          OptionType::kEncodedString, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"largest_output_key_prefix",
+         {offsetof(struct CompactionJobStats, largest_output_key_prefix),
+          OptionType::kEncodedString, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"num_single_del_fallthru",
+         {offsetof(struct CompactionJobStats, num_single_del_fallthru),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"num_single_del_mismatch",
+         {offsetof(struct CompactionJobStats, num_single_del_mismatch),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+};
+
+namespace {
+// this is a helper struct to serialize and deserialize class Status, because
+// Status's members are not public.
+struct StatusSerializationAdapter {
+  uint8_t code;
+  uint8_t subcode;
+  uint8_t severity;
+  std::string message;
+
+  StatusSerializationAdapter() = default;
+  explicit StatusSerializationAdapter(const Status& s) {
+    code = s.code();
+    subcode = s.subcode();
+    severity = s.severity();
+    auto msg = s.getState();
+    message = msg ? msg : "";
+  }
+
+  Status GetStatus() const {
+    return Status{static_cast<Status::Code>(code),
+                  static_cast<Status::SubCode>(subcode),
+                  static_cast<Status::Severity>(severity), message};
+  }
+};
+}  // namespace
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    status_adapter_type_info = {
+        {"code",
+         {offsetof(struct StatusSerializationAdapter, code),
+          OptionType::kUInt8T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"subcode",
+         {offsetof(struct StatusSerializationAdapter, subcode),
+          OptionType::kUInt8T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"severity",
+         {offsetof(struct StatusSerializationAdapter, severity),
+          OptionType::kUInt8T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"message",
+         {offsetof(struct StatusSerializationAdapter, message),
+          OptionType::kEncodedString, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+};
+
+static std::unordered_map<std::string, OptionTypeInfo> cs_result_type_info = {
+    {"status",
+     {offsetof(struct CompactionServiceResult, status),
+      OptionType::kCustomizable, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone,
+      [](const ConfigOptions& opts, const std::string& /*name*/,
+         const std::string& value, void* addr) {
+        auto status_obj = static_cast<Status*>(addr);
+        StatusSerializationAdapter adapter;
+        Status s = OptionTypeInfo::ParseType(
+            opts, value, status_adapter_type_info, &adapter);
+        *status_obj = adapter.GetStatus();
+        return s;
+      },
+      [](const ConfigOptions& opts, const std::string& /*name*/,
+         const void* addr, std::string* value) {
+        const auto status_obj = static_cast<const Status*>(addr);
+        StatusSerializationAdapter adapter(*status_obj);
+        std::string result;
+        Status s = OptionTypeInfo::SerializeType(opts, status_adapter_type_info,
+                                                 &adapter, &result);
+        *value = "{" + result + "}";
+        return s;
+      },
+      [](const ConfigOptions& opts, const std::string& /*name*/,
+         const void* addr1, const void* addr2, std::string* mismatch) {
+        const auto status1 = static_cast<const Status*>(addr1);
+        const auto status2 = static_cast<const Status*>(addr2);
+
+        StatusSerializationAdapter adatper1(*status1);
+        StatusSerializationAdapter adapter2(*status2);
+        return OptionTypeInfo::TypesAreEqual(opts, status_adapter_type_info,
+                                             &adatper1, &adapter2, mismatch);
+      }}},
+    {"output_files",
+     OptionTypeInfo::Vector<CompactionServiceOutputFile>(
+         offsetof(struct CompactionServiceResult, output_files),
+         OptionVerificationType::kNormal, OptionTypeFlags::kNone,
+         OptionTypeInfo::Struct("output_files", &cs_output_file_type_info, 0,
+                                OptionVerificationType::kNormal,
+                                OptionTypeFlags::kNone))},
+    {"output_level",
+     {offsetof(struct CompactionServiceResult, output_level), OptionType::kInt,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+    {"output_path",
+     {offsetof(struct CompactionServiceResult, output_path),
+      OptionType::kEncodedString, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"num_output_records",
+     {offsetof(struct CompactionServiceResult, num_output_records),
+      OptionType::kUInt64T, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"total_bytes",
+     {offsetof(struct CompactionServiceResult, total_bytes),
+      OptionType::kUInt64T, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"bytes_read",
+     {offsetof(struct CompactionServiceResult, bytes_read),
+      OptionType::kUInt64T, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"bytes_written",
+     {offsetof(struct CompactionServiceResult, bytes_written),
+      OptionType::kUInt64T, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"stats", OptionTypeInfo::Struct(
+                  "stats", &compaction_job_stats_type_info,
+                  offsetof(struct CompactionServiceResult, stats),
+                  OptionVerificationType::kNormal, OptionTypeFlags::kNone)},
+};
+
+Status CompactionServiceInput::Read(const std::string& data_str,
+                                    CompactionServiceInput* obj) {
+  if (data_str.size() <= sizeof(BinaryFormatVersion)) {
+    return Status::InvalidArgument("Invalid CompactionServiceInput string");
+  }
+  auto format_version = DecodeFixed32(data_str.data());
+  if (format_version == kOptionsString) {
+    ConfigOptions cf;
+    cf.invoke_prepare_options = false;
+    cf.ignore_unknown_options = true;
+    return OptionTypeInfo::ParseType(
+        cf, data_str.substr(sizeof(BinaryFormatVersion)), cs_input_type_info,
+        obj);
+  } else {
+    return Status::NotSupported(
+        "Compaction Service Input data version not supported: " +
+        std::to_string(format_version));
+  }
+}
+
+Status CompactionServiceInput::Write(std::string* output) {
+  char buf[sizeof(BinaryFormatVersion)];
+  EncodeFixed32(buf, kOptionsString);
+  output->append(buf, sizeof(BinaryFormatVersion));
+  ConfigOptions cf;
+  cf.invoke_prepare_options = false;
+  return OptionTypeInfo::SerializeType(cf, cs_input_type_info, this, output);
+}
+
+Status CompactionServiceResult::Read(const std::string& data_str,
+                                     CompactionServiceResult* obj) {
+  if (data_str.size() <= sizeof(BinaryFormatVersion)) {
+    return Status::InvalidArgument("Invalid CompactionServiceResult string");
+  }
+  auto format_version = DecodeFixed32(data_str.data());
+  if (format_version == kOptionsString) {
+    ConfigOptions cf;
+    cf.invoke_prepare_options = false;
+    cf.ignore_unknown_options = true;
+    return OptionTypeInfo::ParseType(
+        cf, data_str.substr(sizeof(BinaryFormatVersion)), cs_result_type_info,
+        obj);
+  } else {
+    return Status::NotSupported(
+        "Compaction Service Result data version not supported: " +
+        std::to_string(format_version));
+  }
+}
+
+Status CompactionServiceResult::Write(std::string* output) {
+  char buf[sizeof(BinaryFormatVersion)];
+  EncodeFixed32(buf, kOptionsString);
+  output->append(buf, sizeof(BinaryFormatVersion));
+  ConfigOptions cf;
+  cf.invoke_prepare_options = false;
+  return OptionTypeInfo::SerializeType(cf, cs_result_type_info, this, output);
+}
+
+#ifndef NDEBUG
+bool CompactionServiceResult::TEST_Equals(CompactionServiceResult* other) {
+  std::string mismatch;
+  return TEST_Equals(other, &mismatch);
+}
+
+bool CompactionServiceResult::TEST_Equals(CompactionServiceResult* other,
+                                          std::string* mismatch) {
+  ConfigOptions cf;
+  cf.invoke_prepare_options = false;
+  return OptionTypeInfo::TypesAreEqual(cf, cs_result_type_info, this, other,
+                                       mismatch);
+}
+
+bool CompactionServiceInput::TEST_Equals(CompactionServiceInput* other) {
+  std::string mismatch;
+  return TEST_Equals(other, &mismatch);
+}
+
+bool CompactionServiceInput::TEST_Equals(CompactionServiceInput* other,
+                                         std::string* mismatch) {
+  ConfigOptions cf;
+  cf.invoke_prepare_options = false;
+  return OptionTypeInfo::TypesAreEqual(cf, cs_input_type_info, this, other,
+                                       mismatch);
+}
+#endif  // NDEBUG
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_state.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_state.cc
new file mode 100644
index 0000000000..ee4b0c1896
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_state.cc
@@ -0,0 +1,46 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/compaction/compaction_state.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Slice CompactionState::SmallestUserKey() {
+  for (const auto& sub_compact_state : sub_compact_states) {
+    Slice smallest = sub_compact_state.SmallestUserKey();
+    if (!smallest.empty()) {
+      return smallest;
+    }
+  }
+  // If there is no finished output, return an empty slice.
+  return Slice{nullptr, 0};
+}
+
+Slice CompactionState::LargestUserKey() {
+  for (auto it = sub_compact_states.rbegin(); it < sub_compact_states.rend();
+       ++it) {
+    Slice largest = it->LargestUserKey();
+    if (!largest.empty()) {
+      return largest;
+    }
+  }
+  // If there is no finished output, return an empty slice.
+  return Slice{nullptr, 0};
+}
+
+void CompactionState::AggregateCompactionStats(
+    InternalStats::CompactionStatsFull& compaction_stats,
+    CompactionJobStats& compaction_job_stats) {
+  for (const auto& sc : sub_compact_states) {
+    sc.AggregateCompactionStats(compaction_stats);
+    compaction_job_stats.Add(sc.compaction_job_stats);
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_state.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_state.h
new file mode 100644
index 0000000000..cc5b66c682
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/compaction_state.h
@@ -0,0 +1,42 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "db/compaction/compaction.h"
+#include "db/compaction/subcompaction_state.h"
+#include "db/internal_stats.h"
+
+// Data structures used for compaction_job and compaction_service_job which has
+// the list of sub_compact_states and the aggregated information for the
+// compaction.
+namespace ROCKSDB_NAMESPACE {
+
+// Maintains state for the entire compaction
+class CompactionState {
+ public:
+  Compaction* const compaction;
+
+  // REQUIRED: subcompaction states are stored in order of increasing key-range
+  std::vector<SubcompactionState> sub_compact_states;
+  Status status;
+
+  void AggregateCompactionStats(
+      InternalStats::CompactionStatsFull& compaction_stats,
+      CompactionJobStats& compaction_job_stats);
+
+  explicit CompactionState(Compaction* c) : compaction(c) {}
+
+  Slice SmallestUserKey();
+
+  Slice LargestUserKey();
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/file_pri.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/file_pri.h
new file mode 100644
index 0000000000..82dddcf938
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/file_pri.h
@@ -0,0 +1,92 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+#include <algorithm>
+
+#include "db/version_edit.h"
+
+namespace ROCKSDB_NAMESPACE {
+// We boost files that are closer to TTL limit. This boosting could be
+// through FileMetaData.compensated_file_size but this compensated size
+// is widely used as something similar to file size so dramatically boost
+// the value might cause unintended consequences.
+//
+// This boosting algorithm can go very fancy, but here we use a simple
+// formula which can satisify:
+// (1) Different levels are triggered slightly differently to avoid
+//     too many cascading cases
+// (2) Files in the same level get boosting more when TTL gets closer.
+//
+// Don't do any boosting before TTL has past by half. This is to make
+// sure lower write amp for most of the case. And all levels should be
+// fully boosted when total TTL compaction threshold triggers.
+// Differientiate boosting ranges of each level by 1/2. This will make
+// range for each level exponentially increasing. We could do it by
+// having them to be equal, or go even fancier. We can adjust it after
+// we observe the behavior in production.
+// The threshold starting boosting:
+// +------------------------------------------------------------------ +
+// ^                            ^   ^     ^       ^                 ^
+// Age 0                        ... |     |    second last level    thresold
+//                                  |     |
+//                                  |  third last level
+//                                  |
+//                            forth last level
+//
+// We arbitrarily set with 0 when a file is aged boost_age_start and
+// grow linearly. The ratio is arbitrarily set so that when the next level
+// starts to boost, the previous level's boosting amount is 16.
+class FileTtlBooster {
+ public:
+  FileTtlBooster(uint64_t current_time, uint64_t ttl, int num_non_empty_levels,
+                 int level)
+      : current_time_(current_time) {
+    if (ttl == 0 || level == 0 || level >= num_non_empty_levels - 1) {
+      enabled_ = false;
+      boost_age_start_ = 0;
+      boost_step_ = 1;
+    } else {
+      enabled_ = true;
+      uint64_t all_boost_start_age = ttl / 2;
+      uint64_t all_boost_age_range = (ttl / 32) * 31 - all_boost_start_age;
+      uint64_t boost_age_range =
+          all_boost_age_range >> (num_non_empty_levels - level - 1);
+      boost_age_start_ = all_boost_start_age + boost_age_range;
+      const uint64_t kBoostRatio = 16;
+      // prevent 0 value to avoid divide 0 error.
+      boost_step_ = std::max(boost_age_range / kBoostRatio, uint64_t{1});
+    }
+  }
+
+  uint64_t GetBoostScore(FileMetaData* f) {
+    if (!enabled_) {
+      return 1;
+    }
+    uint64_t oldest_ancester_time = f->TryGetOldestAncesterTime();
+    if (oldest_ancester_time >= current_time_) {
+      return 1;
+    }
+    uint64_t age = current_time_ - oldest_ancester_time;
+    if (age > boost_age_start_) {
+      // Use integer just for convenience.
+      // We could make all file_to_order double if we want.
+      // Technically this can overflow if users override timing and
+      // give a very high current time. Ignore the case for simplicity.
+      // Boosting is addition to current value, so +1. This will effectively
+      // make boosting to kick in after the first boost_step_ is reached.
+      return (age - boost_age_start_) / boost_step_ + 1;
+    }
+    return 1;
+  }
+
+ private:
+  bool enabled_;
+  uint64_t current_time_;
+  uint64_t boost_age_start_;
+  uint64_t boost_step_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/sst_partitioner.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/sst_partitioner.cc
new file mode 100644
index 0000000000..2f4d879357
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/sst_partitioner.cc
@@ -0,0 +1,83 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include "rocksdb/sst_partitioner.h"
+
+#include <algorithm>
+
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "rocksdb/utilities/options_type.h"
+
+namespace ROCKSDB_NAMESPACE {
+static std::unordered_map<std::string, OptionTypeInfo>
+    sst_fixed_prefix_type_info = {
+        {"length",
+         {0, OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+};
+
+SstPartitionerFixedPrefixFactory::SstPartitionerFixedPrefixFactory(size_t len)
+    : len_(len) {
+  RegisterOptions("Length", &len_, &sst_fixed_prefix_type_info);
+}
+
+PartitionerResult SstPartitionerFixedPrefix::ShouldPartition(
+    const PartitionerRequest& request) {
+  Slice last_key_fixed(*request.prev_user_key);
+  if (last_key_fixed.size() > len_) {
+    last_key_fixed.size_ = len_;
+  }
+  Slice current_key_fixed(*request.current_user_key);
+  if (current_key_fixed.size() > len_) {
+    current_key_fixed.size_ = len_;
+  }
+  return last_key_fixed.compare(current_key_fixed) != 0 ? kRequired
+                                                        : kNotRequired;
+}
+
+bool SstPartitionerFixedPrefix::CanDoTrivialMove(
+    const Slice& smallest_user_key, const Slice& largest_user_key) {
+  return ShouldPartition(PartitionerRequest(smallest_user_key, largest_user_key,
+                                            0)) == kNotRequired;
+}
+
+std::unique_ptr<SstPartitioner>
+SstPartitionerFixedPrefixFactory::CreatePartitioner(
+    const SstPartitioner::Context& /* context */) const {
+  return std::unique_ptr<SstPartitioner>(new SstPartitionerFixedPrefix(len_));
+}
+
+std::shared_ptr<SstPartitionerFactory> NewSstPartitionerFixedPrefixFactory(
+    size_t prefix_len) {
+  return std::make_shared<SstPartitionerFixedPrefixFactory>(prefix_len);
+}
+
+namespace {
+static int RegisterSstPartitionerFactories(ObjectLibrary& library,
+                                           const std::string& /*arg*/) {
+  library.AddFactory<SstPartitionerFactory>(
+      SstPartitionerFixedPrefixFactory::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<SstPartitionerFactory>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new SstPartitionerFixedPrefixFactory(0));
+        return guard->get();
+      });
+  return 1;
+}
+}  // namespace
+
+Status SstPartitionerFactory::CreateFromString(
+    const ConfigOptions& options, const std::string& value,
+    std::shared_ptr<SstPartitionerFactory>* result) {
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    RegisterSstPartitionerFactories(*(ObjectLibrary::Default().get()), "");
+  });
+  return LoadSharedObject<SstPartitionerFactory>(options, value, result);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/subcompaction_state.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/subcompaction_state.cc
new file mode 100644
index 0000000000..0c56471e92
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/subcompaction_state.cc
@@ -0,0 +1,106 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/compaction/subcompaction_state.h"
+
+#include "rocksdb/sst_partitioner.h"
+
+namespace ROCKSDB_NAMESPACE {
+void SubcompactionState::AggregateCompactionStats(
+    InternalStats::CompactionStatsFull& compaction_stats) const {
+  compaction_stats.stats.Add(compaction_outputs_.stats_);
+  if (HasPenultimateLevelOutputs()) {
+    compaction_stats.has_penultimate_level_output = true;
+    compaction_stats.penultimate_level_stats.Add(
+        penultimate_level_outputs_.stats_);
+  }
+}
+
+OutputIterator SubcompactionState::GetOutputs() const {
+  return OutputIterator(penultimate_level_outputs_.outputs_,
+                        compaction_outputs_.outputs_);
+}
+
+void SubcompactionState::Cleanup(Cache* cache) {
+  penultimate_level_outputs_.Cleanup();
+  compaction_outputs_.Cleanup();
+
+  if (!status.ok()) {
+    for (const auto& out : GetOutputs()) {
+      // If this file was inserted into the table cache then remove
+      // them here because this compaction was not committed.
+      TableCache::Evict(cache, out.meta.fd.GetNumber());
+    }
+  }
+  // TODO: sub_compact.io_status is not checked like status. Not sure if thats
+  // intentional. So ignoring the io_status as of now.
+  io_status.PermitUncheckedError();
+}
+
+Slice SubcompactionState::SmallestUserKey() const {
+  if (has_penultimate_level_outputs_) {
+    Slice a = compaction_outputs_.SmallestUserKey();
+    Slice b = penultimate_level_outputs_.SmallestUserKey();
+    if (a.empty()) {
+      return b;
+    }
+    if (b.empty()) {
+      return a;
+    }
+    const Comparator* user_cmp =
+        compaction->column_family_data()->user_comparator();
+    if (user_cmp->Compare(a, b) > 0) {
+      return b;
+    } else {
+      return a;
+    }
+  } else {
+    return compaction_outputs_.SmallestUserKey();
+  }
+}
+
+Slice SubcompactionState::LargestUserKey() const {
+  if (has_penultimate_level_outputs_) {
+    Slice a = compaction_outputs_.LargestUserKey();
+    Slice b = penultimate_level_outputs_.LargestUserKey();
+    if (a.empty()) {
+      return b;
+    }
+    if (b.empty()) {
+      return a;
+    }
+    const Comparator* user_cmp =
+        compaction->column_family_data()->user_comparator();
+    if (user_cmp->Compare(a, b) < 0) {
+      return b;
+    } else {
+      return a;
+    }
+  } else {
+    return compaction_outputs_.LargestUserKey();
+  }
+}
+
+Status SubcompactionState::AddToOutput(
+    const CompactionIterator& iter,
+    const CompactionFileOpenFunc& open_file_func,
+    const CompactionFileCloseFunc& close_file_func) {
+  // update target output first
+  is_current_penultimate_level_ = iter.output_to_penultimate_level();
+  current_outputs_ = is_current_penultimate_level_ ? &penultimate_level_outputs_
+                                                   : &compaction_outputs_;
+  if (is_current_penultimate_level_) {
+    has_penultimate_level_outputs_ = true;
+  }
+
+  return Current().AddToOutput(iter, open_file_func, close_file_func);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/subcompaction_state.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/subcompaction_state.h
new file mode 100644
index 0000000000..b933a62a51
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/compaction/subcompaction_state.h
@@ -0,0 +1,220 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <optional>
+
+#include "db/blob/blob_file_addition.h"
+#include "db/blob/blob_garbage_meter.h"
+#include "db/compaction/compaction.h"
+#include "db/compaction/compaction_iterator.h"
+#include "db/compaction/compaction_outputs.h"
+#include "db/internal_stats.h"
+#include "db/output_validator.h"
+#include "db/range_del_aggregator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Maintains state and outputs for each sub-compaction
+// It contains 2 `CompactionOutputs`:
+//  1. one for the normal output files
+//  2. another for the penultimate level outputs
+// a `current` pointer maintains the current output group, when calling
+// `AddToOutput()`, it checks the output of the current compaction_iterator key
+// and point `current` to the target output group. By default, it just points to
+// normal compaction_outputs, if the compaction_iterator key should be placed on
+// the penultimate level, `current` is changed to point to
+// `penultimate_level_outputs`.
+// The later operations uses `Current()` to get the target group.
+//
+// +----------+          +-----------------------------+      +---------+
+// | *current |--------> | compaction_outputs          |----->| output  |
+// +----------+          +-----------------------------+      +---------+
+//       |                                                    | output  |
+//       |                                                    +---------+
+//       |                                                    |  ...    |
+//       |
+//       |               +-----------------------------+      +---------+
+//       +-------------> | penultimate_level_outputs   |----->| output  |
+//                       +-----------------------------+      +---------+
+//                                                            |  ...    |
+
+class SubcompactionState {
+ public:
+  const Compaction* compaction;
+
+  // The boundaries of the key-range this compaction is interested in. No two
+  // sub-compactions may have overlapping key-ranges.
+  // 'start' is inclusive, 'end' is exclusive, and nullptr means unbounded
+  const std::optional<Slice> start, end;
+
+  // The return status of this sub-compaction
+  Status status;
+
+  // The return IO Status of this sub-compaction
+  IOStatus io_status;
+
+  // Notify on sub-compaction completion only if listener was notified on
+  // sub-compaction begin.
+  bool notify_on_subcompaction_completion = false;
+
+  // compaction job stats for this sub-compaction
+  CompactionJobStats compaction_job_stats;
+
+  // sub-compaction job id, which is used to identify different sub-compaction
+  // within the same compaction job.
+  const uint32_t sub_job_id;
+
+  Slice SmallestUserKey() const;
+
+  Slice LargestUserKey() const;
+
+  // Get all outputs from the subcompaction. For per_key_placement compaction,
+  // it returns both the last level outputs and penultimate level outputs.
+  OutputIterator GetOutputs() const;
+
+  // Assign range dels aggregator, for each range_del, it can only be assigned
+  // to one output level, for per_key_placement, it's going to be the
+  // penultimate level.
+  // TODO: This does not work for per_key_placement + user-defined timestamp +
+  //  DeleteRange() combo. If user-defined timestamp is enabled,
+  //  it is possible for a range tombstone to belong to bottommost level (
+  //  seqno < earliest snapshot) without being dropped (garbage collection
+  //  for user-defined timestamp).
+  void AssignRangeDelAggregator(
+      std::unique_ptr<CompactionRangeDelAggregator>&& range_del_agg) {
+    if (compaction->SupportsPerKeyPlacement()) {
+      penultimate_level_outputs_.AssignRangeDelAggregator(
+          std::move(range_del_agg));
+    } else {
+      compaction_outputs_.AssignRangeDelAggregator(std::move(range_del_agg));
+    }
+  }
+
+  void RemoveLastEmptyOutput() {
+    compaction_outputs_.RemoveLastEmptyOutput();
+    penultimate_level_outputs_.RemoveLastEmptyOutput();
+  }
+
+  void BuildSubcompactionJobInfo(
+      SubcompactionJobInfo& subcompaction_job_info) const {
+    const Compaction* c = compaction;
+    const ColumnFamilyData* cfd = c->column_family_data();
+
+    subcompaction_job_info.cf_id = cfd->GetID();
+    subcompaction_job_info.cf_name = cfd->GetName();
+    subcompaction_job_info.status = status;
+    subcompaction_job_info.subcompaction_job_id = static_cast<int>(sub_job_id);
+    subcompaction_job_info.base_input_level = c->start_level();
+    subcompaction_job_info.output_level = c->output_level();
+    subcompaction_job_info.stats = compaction_job_stats;
+  }
+
+  SubcompactionState() = delete;
+  SubcompactionState(const SubcompactionState&) = delete;
+  SubcompactionState& operator=(const SubcompactionState&) = delete;
+
+  SubcompactionState(Compaction* c, const std::optional<Slice> _start,
+                     const std::optional<Slice> _end, uint32_t _sub_job_id)
+      : compaction(c),
+        start(_start),
+        end(_end),
+        sub_job_id(_sub_job_id),
+        compaction_outputs_(c, /*is_penultimate_level=*/false),
+        penultimate_level_outputs_(c, /*is_penultimate_level=*/true) {
+    assert(compaction != nullptr);
+    // Set output split key (used for RoundRobin feature) only for normal
+    // compaction_outputs, output to penultimate_level feature doesn't support
+    // RoundRobin feature (and may never going to be supported, because for
+    // RoundRobin, the data time is mostly naturally sorted, no need to have
+    // per-key placement with output_to_penultimate_level).
+    compaction_outputs_.SetOutputSlitKey(start, end);
+  }
+
+  SubcompactionState(SubcompactionState&& state) noexcept
+      : compaction(state.compaction),
+        start(state.start),
+        end(state.end),
+        status(std::move(state.status)),
+        io_status(std::move(state.io_status)),
+        notify_on_subcompaction_completion(
+            state.notify_on_subcompaction_completion),
+        compaction_job_stats(std::move(state.compaction_job_stats)),
+        sub_job_id(state.sub_job_id),
+        compaction_outputs_(std::move(state.compaction_outputs_)),
+        penultimate_level_outputs_(std::move(state.penultimate_level_outputs_)),
+        is_current_penultimate_level_(state.is_current_penultimate_level_),
+        has_penultimate_level_outputs_(state.has_penultimate_level_outputs_) {
+    current_outputs_ = is_current_penultimate_level_
+                           ? &penultimate_level_outputs_
+                           : &compaction_outputs_;
+  }
+
+  bool HasPenultimateLevelOutputs() const {
+    return has_penultimate_level_outputs_ ||
+           penultimate_level_outputs_.HasRangeDel();
+  }
+
+  bool IsCurrentPenultimateLevel() const {
+    return is_current_penultimate_level_;
+  }
+
+  // Add all the new files from this compaction to version_edit
+  void AddOutputsEdit(VersionEdit* out_edit) const {
+    for (const auto& file : penultimate_level_outputs_.outputs_) {
+      out_edit->AddFile(compaction->GetPenultimateLevel(), file.meta);
+    }
+    for (const auto& file : compaction_outputs_.outputs_) {
+      out_edit->AddFile(compaction->output_level(), file.meta);
+    }
+  }
+
+  void Cleanup(Cache* cache);
+
+  void AggregateCompactionStats(
+      InternalStats::CompactionStatsFull& compaction_stats) const;
+
+  CompactionOutputs& Current() const {
+    assert(current_outputs_);
+    return *current_outputs_;
+  }
+
+  // Add compaction_iterator key/value to the `Current` output group.
+  Status AddToOutput(const CompactionIterator& iter,
+                     const CompactionFileOpenFunc& open_file_func,
+                     const CompactionFileCloseFunc& close_file_func);
+
+  // Close all compaction output files, both output_to_penultimate_level outputs
+  // and normal outputs.
+  Status CloseCompactionFiles(const Status& curr_status,
+                              const CompactionFileOpenFunc& open_file_func,
+                              const CompactionFileCloseFunc& close_file_func) {
+    // Call FinishCompactionOutputFile() even if status is not ok: it needs to
+    // close the output file.
+    // CloseOutput() may open new compaction output files.
+    is_current_penultimate_level_ = true;
+    Status s = penultimate_level_outputs_.CloseOutput(
+        curr_status, open_file_func, close_file_func);
+    is_current_penultimate_level_ = false;
+    s = compaction_outputs_.CloseOutput(s, open_file_func, close_file_func);
+    return s;
+  }
+
+ private:
+  // State kept for output being generated
+  CompactionOutputs compaction_outputs_;
+  CompactionOutputs penultimate_level_outputs_;
+  CompactionOutputs* current_outputs_ = &compaction_outputs_;
+  bool is_current_penultimate_level_ = false;
+  bool has_penultimate_level_outputs_ = false;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/convenience.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/convenience.cc
new file mode 100644
index 0000000000..32cdfafaab
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/convenience.cc
@@ -0,0 +1,81 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+
+#include "rocksdb/convenience.h"
+
+#include "db/db_impl/db_impl.h"
+#include "util/cast_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void CancelAllBackgroundWork(DB* db, bool wait) {
+  (static_cast_with_check<DBImpl>(db->GetRootDB()))
+      ->CancelAllBackgroundWork(wait);
+}
+
+Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
+                          const Slice* begin, const Slice* end,
+                          bool include_end) {
+  RangePtr range(begin, end);
+  return DeleteFilesInRanges(db, column_family, &range, 1, include_end);
+}
+
+Status DeleteFilesInRanges(DB* db, ColumnFamilyHandle* column_family,
+                           const RangePtr* ranges, size_t n, bool include_end) {
+  return (static_cast_with_check<DBImpl>(db->GetRootDB()))
+      ->DeleteFilesInRanges(column_family, ranges, n, include_end);
+}
+
+Status VerifySstFileChecksum(const Options& options,
+                             const EnvOptions& env_options,
+                             const std::string& file_path) {
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  return VerifySstFileChecksum(options, env_options, read_options, file_path);
+}
+Status VerifySstFileChecksum(const Options& options,
+                             const EnvOptions& env_options,
+                             const ReadOptions& read_options,
+                             const std::string& file_path,
+                             const SequenceNumber& largest_seqno) {
+  std::unique_ptr<FSRandomAccessFile> file;
+  uint64_t file_size;
+  InternalKeyComparator internal_comparator(options.comparator);
+  ImmutableOptions ioptions(options);
+
+  Status s = ioptions.fs->NewRandomAccessFile(
+      file_path, FileOptions(env_options), &file, nullptr);
+  if (s.ok()) {
+    s = ioptions.fs->GetFileSize(file_path, IOOptions(), &file_size, nullptr);
+  } else {
+    return s;
+  }
+  std::unique_ptr<TableReader> table_reader;
+  std::unique_ptr<RandomAccessFileReader> file_reader(
+      new RandomAccessFileReader(
+          std::move(file), file_path, ioptions.clock, nullptr /* io_tracer */,
+          ioptions.stats /* stats */,
+          Histograms::SST_READ_MICROS /* hist_type */,
+          nullptr /* file_read_hist */, ioptions.rate_limiter.get()));
+  const bool kImmortal = true;
+  auto reader_options = TableReaderOptions(
+      ioptions, options.prefix_extractor, env_options, internal_comparator,
+      options.block_protection_bytes_per_key, false /* skip_filters */,
+      !kImmortal, false /* force_direct_prefetch */, -1 /* level */);
+  reader_options.largest_seqno = largest_seqno;
+  s = ioptions.table_factory->NewTableReader(
+      reader_options, std::move(file_reader), file_size, &table_reader,
+      false /* prefetch_index_and_filter_in_cache */);
+  if (!s.ok()) {
+    return s;
+  }
+  s = table_reader->VerifyChecksum(read_options,
+                                   TableReaderCaller::kUserVerifyChecksum);
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_filesnapshot.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_filesnapshot.cc
new file mode 100644
index 0000000000..f035041c7b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_filesnapshot.cc
@@ -0,0 +1,436 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "db/db_impl/db_impl.h"
+#include "db/job_context.h"
+#include "db/version_set.h"
+#include "file/file_util.h"
+#include "file/filename.h"
+#include "logging/logging.h"
+#include "port/port.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/metadata.h"
+#include "rocksdb/types.h"
+#include "test_util/sync_point.h"
+#include "util/file_checksum_helper.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status DBImpl::FlushForGetLiveFiles() {
+  mutex_.AssertHeld();
+
+  // flush all dirty data to disk.
+  Status status;
+  if (immutable_db_options_.atomic_flush) {
+    mutex_.Unlock();
+    status = AtomicFlushMemTables(FlushOptions(), FlushReason::kGetLiveFiles);
+    if (status.IsColumnFamilyDropped()) {
+      status = Status::OK();
+    }
+    mutex_.Lock();
+  } else {
+    for (auto cfd : versions_->GetRefedColumnFamilySet()) {
+      if (cfd->IsDropped()) {
+        continue;
+      }
+      mutex_.Unlock();
+      status = FlushMemTable(cfd, FlushOptions(), FlushReason::kGetLiveFiles);
+      TEST_SYNC_POINT("DBImpl::GetLiveFiles:1");
+      TEST_SYNC_POINT("DBImpl::GetLiveFiles:2");
+      mutex_.Lock();
+      if (!status.ok() && !status.IsColumnFamilyDropped()) {
+        break;
+      } else if (status.IsColumnFamilyDropped()) {
+        status = Status::OK();
+      }
+    }
+  }
+  return status;
+}
+
+Status DBImpl::GetLiveFiles(std::vector<std::string>& ret,
+                            uint64_t* manifest_file_size, bool flush_memtable) {
+  *manifest_file_size = 0;
+
+  mutex_.Lock();
+
+  if (flush_memtable) {
+    Status status = FlushForGetLiveFiles();
+    if (!status.ok()) {
+      mutex_.Unlock();
+      ROCKS_LOG_ERROR(immutable_db_options_.info_log, "Cannot Flush data %s\n",
+                      status.ToString().c_str());
+      return status;
+    }
+  }
+
+  // Make a set of all of the live table and blob files
+  std::vector<uint64_t> live_table_files;
+  std::vector<uint64_t> live_blob_files;
+  for (auto cfd : *versions_->GetColumnFamilySet()) {
+    if (cfd->IsDropped()) {
+      continue;
+    }
+    cfd->current()->AddLiveFiles(&live_table_files, &live_blob_files);
+  }
+
+  ret.clear();
+  ret.reserve(live_table_files.size() + live_blob_files.size() +
+              3);  // for CURRENT + MANIFEST + OPTIONS
+
+  // create names of the live files. The names are not absolute
+  // paths, instead they are relative to dbname_.
+  for (const auto& table_file_number : live_table_files) {
+    ret.emplace_back(MakeTableFileName("", table_file_number));
+  }
+
+  for (const auto& blob_file_number : live_blob_files) {
+    ret.emplace_back(BlobFileName("", blob_file_number));
+  }
+
+  ret.emplace_back(CurrentFileName(""));
+  ret.emplace_back(DescriptorFileName("", versions_->manifest_file_number()));
+  // The OPTIONS file number is zero in read-write mode when OPTIONS file
+  // writing failed and the DB was configured with
+  // `fail_if_options_file_error == false`. In read-only mode the OPTIONS file
+  // number is zero when no OPTIONS file exist at all. In those cases we do not
+  // record any OPTIONS file in the live file list.
+  if (versions_->options_file_number() != 0) {
+    ret.emplace_back(OptionsFileName("", versions_->options_file_number()));
+  }
+
+  // find length of manifest file while holding the mutex lock
+  *manifest_file_size = versions_->manifest_file_size();
+
+  mutex_.Unlock();
+  return Status::OK();
+}
+
+Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) {
+  // Record tracked WALs as a (minimum) cross-check for directory scan
+  std::vector<uint64_t> required_by_manifest;
+
+  // If caller disabled deletions, this function should return files that are
+  // guaranteed not to be deleted until deletions are re-enabled. We need to
+  // wait for pending purges to finish since WalManager doesn't know which
+  // files are going to be purged. Additional purges won't be scheduled as
+  // long as deletions are disabled (so the below loop must terminate).
+  // Also note that we disable deletions anyway to avoid the case where a
+  // file is deleted in the middle of the scan, causing IO error.
+  Status deletions_disabled = DisableFileDeletions();
+  {
+    InstrumentedMutexLock l(&mutex_);
+    while (pending_purge_obsolete_files_ > 0 || bg_purge_scheduled_ > 0) {
+      bg_cv_.Wait();
+    }
+
+    // Record tracked WALs as a (minimum) cross-check for directory scan
+    const auto& manifest_wals = versions_->GetWalSet().GetWals();
+    required_by_manifest.reserve(manifest_wals.size());
+    for (const auto& wal : manifest_wals) {
+      required_by_manifest.push_back(wal.first);
+    }
+  }
+
+  Status s = wal_manager_.GetSortedWalFiles(files);
+
+  // DisableFileDeletions / EnableFileDeletions not supported in read-only DB
+  if (deletions_disabled.ok()) {
+    Status s2 = EnableFileDeletions(/*force*/ false);
+    assert(s2.ok());
+    s2.PermitUncheckedError();
+  } else {
+    assert(deletions_disabled.IsNotSupported());
+  }
+
+  if (s.ok()) {
+    // Verify includes those required by manifest (one sorted list is superset
+    // of the other)
+    auto required = required_by_manifest.begin();
+    auto included = files.begin();
+
+    while (required != required_by_manifest.end()) {
+      if (included == files.end() || *required < (*included)->LogNumber()) {
+        // FAIL - did not find
+        return Status::Corruption(
+            "WAL file " + std::to_string(*required) +
+            " required by manifest but not in directory list");
+      }
+      if (*required == (*included)->LogNumber()) {
+        ++required;
+        ++included;
+      } else {
+        assert(*required > (*included)->LogNumber());
+        ++included;
+      }
+    }
+  }
+
+  return s;
+}
+
+Status DBImpl::GetCurrentWalFile(std::unique_ptr<LogFile>* current_log_file) {
+  uint64_t current_logfile_number;
+  {
+    InstrumentedMutexLock l(&mutex_);
+    current_logfile_number = logfile_number_;
+  }
+
+  return wal_manager_.GetLiveWalFile(current_logfile_number, current_log_file);
+}
+
+Status DBImpl::GetLiveFilesStorageInfo(
+    const LiveFilesStorageInfoOptions& opts,
+    std::vector<LiveFileStorageInfo>* files) {
+  // To avoid returning partial results, only move results to files on success.
+  assert(files);
+  files->clear();
+  std::vector<LiveFileStorageInfo> results;
+
+  // NOTE: This implementation was largely migrated from Checkpoint.
+
+  Status s;
+  VectorLogPtr live_wal_files;
+  bool flush_memtable = true;
+  if (!immutable_db_options_.allow_2pc) {
+    if (opts.wal_size_for_flush == std::numeric_limits<uint64_t>::max()) {
+      flush_memtable = false;
+    } else if (opts.wal_size_for_flush > 0) {
+      // If the outstanding log files are small, we skip the flush.
+      s = GetSortedWalFiles(live_wal_files);
+
+      if (!s.ok()) {
+        return s;
+      }
+
+      // Don't flush column families if total log size is smaller than
+      // log_size_for_flush. We copy the log files instead.
+      // We may be able to cover 2PC case too.
+      uint64_t total_wal_size = 0;
+      for (auto& wal : live_wal_files) {
+        total_wal_size += wal->SizeFileBytes();
+      }
+      if (total_wal_size < opts.wal_size_for_flush) {
+        flush_memtable = false;
+      }
+      live_wal_files.clear();
+    }
+  }
+
+  // This is a modified version of GetLiveFiles, to get access to more
+  // metadata.
+  mutex_.Lock();
+  if (flush_memtable) {
+    Status status = FlushForGetLiveFiles();
+    if (!status.ok()) {
+      mutex_.Unlock();
+      ROCKS_LOG_ERROR(immutable_db_options_.info_log, "Cannot Flush data %s\n",
+                      status.ToString().c_str());
+      return status;
+    }
+  }
+
+  // Make a set of all of the live table and blob files
+  for (auto cfd : *versions_->GetColumnFamilySet()) {
+    if (cfd->IsDropped()) {
+      continue;
+    }
+    VersionStorageInfo& vsi = *cfd->current()->storage_info();
+    auto& cf_paths = cfd->ioptions()->cf_paths;
+
+    auto GetDir = [&](size_t path_id) {
+      // Matching TableFileName() behavior
+      if (path_id >= cf_paths.size()) {
+        assert(false);
+        return cf_paths.back().path;
+      } else {
+        return cf_paths[path_id].path;
+      }
+    };
+
+    for (int level = 0; level < vsi.num_levels(); ++level) {
+      const auto& level_files = vsi.LevelFiles(level);
+      for (const auto& meta : level_files) {
+        assert(meta);
+
+        results.emplace_back();
+        LiveFileStorageInfo& info = results.back();
+
+        info.relative_filename = MakeTableFileName(meta->fd.GetNumber());
+        info.directory = GetDir(meta->fd.GetPathId());
+        info.file_number = meta->fd.GetNumber();
+        info.file_type = kTableFile;
+        info.size = meta->fd.GetFileSize();
+        if (opts.include_checksum_info) {
+          info.file_checksum_func_name = meta->file_checksum_func_name;
+          info.file_checksum = meta->file_checksum;
+          if (info.file_checksum_func_name.empty()) {
+            info.file_checksum_func_name = kUnknownFileChecksumFuncName;
+            info.file_checksum = kUnknownFileChecksum;
+          }
+        }
+        info.temperature = meta->temperature;
+      }
+    }
+    const auto& blob_files = vsi.GetBlobFiles();
+    for (const auto& meta : blob_files) {
+      assert(meta);
+
+      results.emplace_back();
+      LiveFileStorageInfo& info = results.back();
+
+      info.relative_filename = BlobFileName(meta->GetBlobFileNumber());
+      info.directory = GetDir(/* path_id */ 0);
+      info.file_number = meta->GetBlobFileNumber();
+      info.file_type = kBlobFile;
+      info.size = meta->GetBlobFileSize();
+      if (opts.include_checksum_info) {
+        info.file_checksum_func_name = meta->GetChecksumMethod();
+        info.file_checksum = meta->GetChecksumValue();
+        if (info.file_checksum_func_name.empty()) {
+          info.file_checksum_func_name = kUnknownFileChecksumFuncName;
+          info.file_checksum = kUnknownFileChecksum;
+        }
+      }
+      // TODO?: info.temperature
+    }
+  }
+
+  // Capture some final info before releasing mutex
+  const uint64_t manifest_number = versions_->manifest_file_number();
+  const uint64_t manifest_size = versions_->manifest_file_size();
+  const uint64_t options_number = versions_->options_file_number();
+  const uint64_t options_size = versions_->options_file_size_;
+  const uint64_t min_log_num = MinLogNumberToKeep();
+
+  mutex_.Unlock();
+
+  std::string manifest_fname = DescriptorFileName(manifest_number);
+  {  // MANIFEST
+    results.emplace_back();
+    LiveFileStorageInfo& info = results.back();
+
+    info.relative_filename = manifest_fname;
+    info.directory = GetName();
+    info.file_number = manifest_number;
+    info.file_type = kDescriptorFile;
+    info.size = manifest_size;
+    info.trim_to_size = true;
+    if (opts.include_checksum_info) {
+      info.file_checksum_func_name = kUnknownFileChecksumFuncName;
+      info.file_checksum = kUnknownFileChecksum;
+    }
+  }
+
+  {  // CURRENT
+    results.emplace_back();
+    LiveFileStorageInfo& info = results.back();
+
+    info.relative_filename = kCurrentFileName;
+    info.directory = GetName();
+    info.file_type = kCurrentFile;
+    // CURRENT could be replaced so we have to record the contents as needed.
+    info.replacement_contents = manifest_fname + "\n";
+    info.size = manifest_fname.size() + 1;
+    if (opts.include_checksum_info) {
+      info.file_checksum_func_name = kUnknownFileChecksumFuncName;
+      info.file_checksum = kUnknownFileChecksum;
+    }
+  }
+
+  // The OPTIONS file number is zero in read-write mode when OPTIONS file
+  // writing failed and the DB was configured with
+  // `fail_if_options_file_error == false`. In read-only mode the OPTIONS file
+  // number is zero when no OPTIONS file exist at all. In those cases we do not
+  // record any OPTIONS file in the live file list.
+  if (options_number != 0) {
+    results.emplace_back();
+    LiveFileStorageInfo& info = results.back();
+
+    info.relative_filename = OptionsFileName(options_number);
+    info.directory = GetName();
+    info.file_number = options_number;
+    info.file_type = kOptionsFile;
+    info.size = options_size;
+    if (opts.include_checksum_info) {
+      info.file_checksum_func_name = kUnknownFileChecksumFuncName;
+      info.file_checksum = kUnknownFileChecksum;
+    }
+  }
+
+  // Some legacy testing stuff  TODO: carefully clean up obsolete parts
+  TEST_SYNC_POINT("CheckpointImpl::CreateCheckpoint:FlushDone");
+
+  TEST_SYNC_POINT("CheckpointImpl::CreateCheckpoint:SavedLiveFiles1");
+  TEST_SYNC_POINT("CheckpointImpl::CreateCheckpoint:SavedLiveFiles2");
+
+  if (s.ok()) {
+    // To maximize the effectiveness of track_and_verify_wals_in_manifest,
+    // sync WAL when it is enabled.
+    s = FlushWAL(
+        immutable_db_options_.track_and_verify_wals_in_manifest /* sync */);
+    if (s.IsNotSupported()) {  // read-only DB or similar
+      s = Status::OK();
+    }
+  }
+
+  TEST_SYNC_POINT("CheckpointImpl::CreateCustomCheckpoint:AfterGetLive1");
+  TEST_SYNC_POINT("CheckpointImpl::CreateCustomCheckpoint:AfterGetLive2");
+
+  // If we have more than one column family, we also need to get WAL files.
+  if (s.ok()) {
+    s = GetSortedWalFiles(live_wal_files);
+  }
+  if (!s.ok()) {
+    return s;
+  }
+
+  size_t wal_size = live_wal_files.size();
+
+  ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                 "Number of log files %" ROCKSDB_PRIszt, live_wal_files.size());
+
+  // Link WAL files. Copy exact size of last one because it is the only one
+  // that has changes after the last flush.
+  auto wal_dir = immutable_db_options_.GetWalDir();
+  for (size_t i = 0; s.ok() && i < wal_size; ++i) {
+    if ((live_wal_files[i]->Type() == kAliveLogFile) &&
+        (!flush_memtable || live_wal_files[i]->LogNumber() >= min_log_num)) {
+      results.emplace_back();
+      LiveFileStorageInfo& info = results.back();
+      auto f = live_wal_files[i]->PathName();
+      assert(!f.empty() && f[0] == '/');
+      info.relative_filename = f.substr(1);
+      info.directory = wal_dir;
+      info.file_number = live_wal_files[i]->LogNumber();
+      info.file_type = kWalFile;
+      info.size = live_wal_files[i]->SizeFileBytes();
+      // Only last should need to be trimmed
+      info.trim_to_size = (i + 1 == wal_size);
+      if (opts.include_checksum_info) {
+        info.file_checksum_func_name = kUnknownFileChecksumFuncName;
+        info.file_checksum = kUnknownFileChecksum;
+      }
+    }
+  }
+
+  if (s.ok()) {
+    // Only move results to output on success.
+    *files = std::move(results);
+  }
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/compacted_db_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/compacted_db_impl.cc
new file mode 100644
index 0000000000..3d824baf29
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/compacted_db_impl.cc
@@ -0,0 +1,260 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/db_impl/compacted_db_impl.h"
+
+#include "db/db_impl/db_impl.h"
+#include "db/version_set.h"
+#include "logging/logging.h"
+#include "table/get_context.h"
+#include "util/cast_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+extern void MarkKeyMayExist(void* arg);
+extern bool SaveValue(void* arg, const ParsedInternalKey& parsed_key,
+                      const Slice& v, bool hit_and_return);
+
+CompactedDBImpl::CompactedDBImpl(const DBOptions& options,
+                                 const std::string& dbname)
+    : DBImpl(options, dbname, /*seq_per_batch*/ false, +/*batch_per_txn*/ true,
+             /*read_only*/ true),
+      cfd_(nullptr),
+      version_(nullptr),
+      user_comparator_(nullptr) {}
+
+CompactedDBImpl::~CompactedDBImpl() {}
+
+size_t CompactedDBImpl::FindFile(const Slice& key) {
+  size_t right = files_.num_files - 1;
+  auto cmp = [&](const FdWithKeyRange& f, const Slice& k) -> bool {
+    return user_comparator_->Compare(ExtractUserKey(f.largest_key), k) < 0;
+  };
+  return static_cast<size_t>(
+      std::lower_bound(files_.files, files_.files + right, key, cmp) -
+      files_.files);
+}
+
+Status CompactedDBImpl::Get(const ReadOptions& options, ColumnFamilyHandle*,
+                            const Slice& key, PinnableSlice* value) {
+  return Get(options, /*column_family*/ nullptr, key, value,
+             /*timestamp*/ nullptr);
+}
+
+Status CompactedDBImpl::Get(const ReadOptions& options, ColumnFamilyHandle*,
+                            const Slice& key, PinnableSlice* value,
+                            std::string* timestamp) {
+  if (options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call Get with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+  assert(user_comparator_);
+  if (options.timestamp) {
+    const Status s = FailIfTsMismatchCf(
+        DefaultColumnFamily(), *(options.timestamp), /*ts_for_read=*/true);
+    if (!s.ok()) {
+      return s;
+    }
+  } else {
+    const Status s = FailIfCfHasTs(DefaultColumnFamily());
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  // Clear the timestamps for returning results so that we can distinguish
+  // between tombstone or key that has never been written
+  if (timestamp) {
+    timestamp->clear();
+  }
+
+  GetWithTimestampReadCallback read_cb(kMaxSequenceNumber);
+  std::string* ts =
+      user_comparator_->timestamp_size() > 0 ? timestamp : nullptr;
+  LookupKey lkey(key, kMaxSequenceNumber, options.timestamp);
+  GetContext get_context(user_comparator_, nullptr, nullptr, nullptr,
+                         GetContext::kNotFound, lkey.user_key(), value,
+                         /*columns=*/nullptr, ts, nullptr, nullptr, true,
+                         nullptr, nullptr, nullptr, nullptr, &read_cb);
+
+  const FdWithKeyRange& f = files_.files[FindFile(lkey.user_key())];
+  if (user_comparator_->CompareWithoutTimestamp(
+          key, /*a_has_ts=*/false,
+          ExtractUserKeyAndStripTimestamp(f.smallest_key,
+                                          user_comparator_->timestamp_size()),
+          /*b_has_ts=*/false) < 0) {
+    return Status::NotFound();
+  }
+  Status s = f.fd.table_reader->Get(options, lkey.internal_key(), &get_context,
+                                    nullptr);
+  if (!s.ok() && !s.IsNotFound()) {
+    return s;
+  }
+  if (get_context.State() == GetContext::kFound) {
+    return Status::OK();
+  }
+  return Status::NotFound();
+}
+
+std::vector<Status> CompactedDBImpl::MultiGet(
+    const ReadOptions& options, const std::vector<ColumnFamilyHandle*>&,
+    const std::vector<Slice>& keys, std::vector<std::string>* values) {
+  return MultiGet(options, keys, values, /*timestamps*/ nullptr);
+}
+
+std::vector<Status> CompactedDBImpl::MultiGet(
+    const ReadOptions& options, const std::vector<ColumnFamilyHandle*>&,
+    const std::vector<Slice>& keys, std::vector<std::string>* values,
+    std::vector<std::string>* timestamps) {
+  assert(user_comparator_);
+  size_t num_keys = keys.size();
+
+  if (options.timestamp) {
+    Status s = FailIfTsMismatchCf(DefaultColumnFamily(), *(options.timestamp),
+                                  /*ts_for_read=*/true);
+    if (!s.ok()) {
+      return std::vector<Status>(num_keys, s);
+    }
+  } else {
+    Status s = FailIfCfHasTs(DefaultColumnFamily());
+    if (!s.ok()) {
+      return std::vector<Status>(num_keys, s);
+    }
+  }
+
+  // Clear the timestamps for returning results so that we can distinguish
+  // between tombstone or key that has never been written
+  if (timestamps) {
+    for (auto& ts : *timestamps) {
+      ts.clear();
+    }
+  }
+
+  GetWithTimestampReadCallback read_cb(kMaxSequenceNumber);
+  autovector<TableReader*, 16> reader_list;
+  for (const auto& key : keys) {
+    LookupKey lkey(key, kMaxSequenceNumber, options.timestamp);
+    const FdWithKeyRange& f = files_.files[FindFile(lkey.user_key())];
+    if (user_comparator_->CompareWithoutTimestamp(
+            key, /*a_has_ts=*/false,
+            ExtractUserKeyAndStripTimestamp(f.smallest_key,
+                                            user_comparator_->timestamp_size()),
+            /*b_has_ts=*/false) < 0) {
+      reader_list.push_back(nullptr);
+    } else {
+      f.fd.table_reader->Prepare(lkey.internal_key());
+      reader_list.push_back(f.fd.table_reader);
+    }
+  }
+  std::vector<Status> statuses(num_keys, Status::NotFound());
+  values->resize(num_keys);
+  if (timestamps) {
+    timestamps->resize(num_keys);
+  }
+  int idx = 0;
+  for (auto* r : reader_list) {
+    if (r != nullptr) {
+      PinnableSlice pinnable_val;
+      std::string& value = (*values)[idx];
+      LookupKey lkey(keys[idx], kMaxSequenceNumber, options.timestamp);
+      std::string* timestamp = timestamps ? &(*timestamps)[idx] : nullptr;
+      GetContext get_context(
+          user_comparator_, nullptr, nullptr, nullptr, GetContext::kNotFound,
+          lkey.user_key(), &pinnable_val, /*columns=*/nullptr,
+          user_comparator_->timestamp_size() > 0 ? timestamp : nullptr, nullptr,
+          nullptr, true, nullptr, nullptr, nullptr, nullptr, &read_cb);
+      Status s = r->Get(options, lkey.internal_key(), &get_context, nullptr);
+      assert(static_cast<size_t>(idx) < statuses.size());
+      if (!s.ok() && !s.IsNotFound()) {
+        statuses[idx] = s;
+      } else {
+        value.assign(pinnable_val.data(), pinnable_val.size());
+        if (get_context.State() == GetContext::kFound) {
+          statuses[idx] = Status::OK();
+        }
+      }
+    }
+    ++idx;
+  }
+  return statuses;
+}
+
+Status CompactedDBImpl::Init(const Options& options) {
+  SuperVersionContext sv_context(/* create_superversion */ true);
+  mutex_.Lock();
+  ColumnFamilyDescriptor cf(kDefaultColumnFamilyName,
+                            ColumnFamilyOptions(options));
+  Status s = Recover({cf}, true /* read only */, false, true);
+  if (s.ok()) {
+    cfd_ = static_cast_with_check<ColumnFamilyHandleImpl>(DefaultColumnFamily())
+               ->cfd();
+    cfd_->InstallSuperVersion(&sv_context, &mutex_);
+  }
+  mutex_.Unlock();
+  sv_context.Clean();
+  if (!s.ok()) {
+    return s;
+  }
+  NewThreadStatusCfInfo(cfd_);
+  version_ = cfd_->GetSuperVersion()->current;
+  user_comparator_ = cfd_->user_comparator();
+  auto* vstorage = version_->storage_info();
+  if (vstorage->num_non_empty_levels() == 0) {
+    return Status::NotSupported("no file exists");
+  }
+  const LevelFilesBrief& l0 = vstorage->LevelFilesBrief(0);
+  // L0 should not have files
+  if (l0.num_files > 1) {
+    return Status::NotSupported("L0 contain more than 1 file");
+  }
+  if (l0.num_files == 1) {
+    if (vstorage->num_non_empty_levels() > 1) {
+      return Status::NotSupported("Both L0 and other level contain files");
+    }
+    files_ = l0;
+    return Status::OK();
+  }
+
+  for (int i = 1; i < vstorage->num_non_empty_levels() - 1; ++i) {
+    if (vstorage->LevelFilesBrief(i).num_files > 0) {
+      return Status::NotSupported("Other levels also contain files");
+    }
+  }
+
+  int level = vstorage->num_non_empty_levels() - 1;
+  if (vstorage->LevelFilesBrief(level).num_files > 0) {
+    files_ = vstorage->LevelFilesBrief(level);
+    return Status::OK();
+  }
+  return Status::NotSupported("no file exists");
+}
+
+Status CompactedDBImpl::Open(const Options& options, const std::string& dbname,
+                             DB** dbptr) {
+  *dbptr = nullptr;
+
+  if (options.max_open_files != -1) {
+    return Status::InvalidArgument("require max_open_files = -1");
+  }
+  if (options.merge_operator.get() != nullptr) {
+    return Status::InvalidArgument("merge operator is not supported");
+  }
+  DBOptions db_options(options);
+  std::unique_ptr<CompactedDBImpl> db(new CompactedDBImpl(db_options, dbname));
+  Status s = db->Init(options);
+  if (s.ok()) {
+    s = db->StartPeriodicTaskScheduler();
+  }
+  if (s.ok()) {
+    ROCKS_LOG_INFO(db->immutable_db_options_.info_log,
+                   "Opened the db as fully compacted mode");
+    LogFlush(db->immutable_db_options_.info_log);
+    *dbptr = db.release();
+  }
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/compacted_db_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/compacted_db_impl.h
new file mode 100644
index 0000000000..9879d81b61
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/compacted_db_impl.h
@@ -0,0 +1,157 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <string>
+#include <vector>
+
+#include "db/db_impl/db_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// TODO: Share common structure with DBImplSecondary and DBImplReadOnly
+class CompactedDBImpl : public DBImpl {
+ public:
+  CompactedDBImpl(const DBOptions& options, const std::string& dbname);
+  // No copying allowed
+  CompactedDBImpl(const CompactedDBImpl&) = delete;
+  void operator=(const CompactedDBImpl&) = delete;
+
+  ~CompactedDBImpl() override;
+
+  static Status Open(const Options& options, const std::string& dbname,
+                     DB** dbptr);
+
+  // Implementations of the DB interface
+  using DB::Get;
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     PinnableSlice* value) override;
+
+  Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
+             const Slice& key, PinnableSlice* value,
+             std::string* timestamp) override;
+
+  using DB::MultiGet;
+  // Note that CompactedDBImpl::MultiGet is not the optimized version of
+  // MultiGet to use.
+  // TODO: optimize CompactedDBImpl::MultiGet, see DBImpl::MultiGet for details.
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& options, const std::vector<ColumnFamilyHandle*>&,
+      const std::vector<Slice>& keys,
+      std::vector<std::string>* values) override;
+
+  std::vector<Status> MultiGet(const ReadOptions& options,
+                               const std::vector<ColumnFamilyHandle*>&,
+                               const std::vector<Slice>& keys,
+                               std::vector<std::string>* values,
+                               std::vector<std::string>* timestamps) override;
+
+  using DBImpl::Put;
+  virtual Status Put(const WriteOptions& /*options*/,
+                     ColumnFamilyHandle* /*column_family*/,
+                     const Slice& /*key*/, const Slice& /*value*/) override {
+    return Status::NotSupported("Not supported in compacted db mode.");
+  }
+
+  using DBImpl::PutEntity;
+  Status PutEntity(const WriteOptions& /* options */,
+                   ColumnFamilyHandle* /* column_family */,
+                   const Slice& /* key */,
+                   const WideColumns& /* columns */) override {
+    return Status::NotSupported("Not supported in compacted db mode.");
+  }
+
+  using DBImpl::Merge;
+  virtual Status Merge(const WriteOptions& /*options*/,
+                       ColumnFamilyHandle* /*column_family*/,
+                       const Slice& /*key*/, const Slice& /*value*/) override {
+    return Status::NotSupported("Not supported in compacted db mode.");
+  }
+
+  using DBImpl::Delete;
+  virtual Status Delete(const WriteOptions& /*options*/,
+                        ColumnFamilyHandle* /*column_family*/,
+                        const Slice& /*key*/) override {
+    return Status::NotSupported("Not supported in compacted db mode.");
+  }
+  virtual Status Write(const WriteOptions& /*options*/,
+                       WriteBatch* /*updates*/) override {
+    return Status::NotSupported("Not supported in compacted db mode.");
+  }
+  using DBImpl::CompactRange;
+  virtual Status CompactRange(const CompactRangeOptions& /*options*/,
+                              ColumnFamilyHandle* /*column_family*/,
+                              const Slice* /*begin*/,
+                              const Slice* /*end*/) override {
+    return Status::NotSupported("Not supported in compacted db mode.");
+  }
+
+  virtual Status DisableFileDeletions() override {
+    return Status::NotSupported("Not supported in compacted db mode.");
+  }
+  virtual Status EnableFileDeletions(bool /*force*/) override {
+    return Status::NotSupported("Not supported in compacted db mode.");
+  }
+  virtual Status GetLiveFiles(std::vector<std::string>& ret,
+                              uint64_t* manifest_file_size,
+                              bool /*flush_memtable*/) override {
+    return DBImpl::GetLiveFiles(ret, manifest_file_size,
+                                false /* flush_memtable */);
+  }
+  using DBImpl::Flush;
+  virtual Status Flush(const FlushOptions& /*options*/,
+                       ColumnFamilyHandle* /*column_family*/) override {
+    return Status::NotSupported("Not supported in compacted db mode.");
+  }
+
+  virtual Status SyncWAL() override {
+    return Status::NotSupported("Not supported in compacted db mode.");
+  }
+
+  using DB::IngestExternalFile;
+  virtual Status IngestExternalFile(
+      ColumnFamilyHandle* /*column_family*/,
+      const std::vector<std::string>& /*external_files*/,
+      const IngestExternalFileOptions& /*ingestion_options*/) override {
+    return Status::NotSupported("Not supported in compacted db mode.");
+  }
+  using DB::CreateColumnFamilyWithImport;
+  virtual Status CreateColumnFamilyWithImport(
+      const ColumnFamilyOptions& /*options*/,
+      const std::string& /*column_family_name*/,
+      const ImportColumnFamilyOptions& /*import_options*/,
+      const ExportImportFilesMetaData& /*metadata*/,
+      ColumnFamilyHandle** /*handle*/) override {
+    return Status::NotSupported("Not supported in compacted db mode.");
+  }
+
+  using DB::ClipColumnFamily;
+  virtual Status ClipColumnFamily(ColumnFamilyHandle* /*column_family*/,
+                                  const Slice& /*begin*/,
+                                  const Slice& /*end*/) override {
+    return Status::NotSupported("Not supported in compacted db mode.");
+  }
+
+  // FIXME: some missing overrides for more "write" functions
+  // Share with DBImplReadOnly?
+
+ protected:
+  Status FlushForGetLiveFiles() override {
+    // No-op for read-only DB
+    return Status::OK();
+  }
+
+ private:
+  friend class DB;
+  inline size_t FindFile(const Slice& key);
+  Status Init(const Options& options);
+
+  ColumnFamilyData* cfd_;
+  Version* version_;
+  const Comparator* user_comparator_;
+  LevelFilesBrief files_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl.cc
new file mode 100644
index 0000000000..2ed0caf7f2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl.cc
@@ -0,0 +1,6132 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "db/db_impl/db_impl.h"
+
+#include <stdint.h>
+#ifdef OS_SOLARIS
+#include <alloca.h>
+#endif
+
+#include <algorithm>
+#include <cinttypes>
+#include <cstdio>
+#include <map>
+#include <set>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "db/arena_wrapped_db_iter.h"
+#include "db/builder.h"
+#include "db/compaction/compaction_job.h"
+#include "db/db_info_dumper.h"
+#include "db/db_iter.h"
+#include "db/dbformat.h"
+#include "db/error_handler.h"
+#include "db/event_helpers.h"
+#include "db/external_sst_file_ingestion_job.h"
+#include "db/flush_job.h"
+#include "db/forward_iterator.h"
+#include "db/import_column_family_job.h"
+#include "db/job_context.h"
+#include "db/log_reader.h"
+#include "db/log_writer.h"
+#include "db/malloc_stats.h"
+#include "db/memtable.h"
+#include "db/memtable_list.h"
+#include "db/merge_context.h"
+#include "db/merge_helper.h"
+#include "db/periodic_task_scheduler.h"
+#include "db/range_tombstone_fragmenter.h"
+#include "db/table_cache.h"
+#include "db/table_properties_collector.h"
+#include "db/transaction_log_impl.h"
+#include "db/version_set.h"
+#include "db/write_batch_internal.h"
+#include "db/write_callback.h"
+#include "env/unique_id_gen.h"
+#include "file/file_util.h"
+#include "file/filename.h"
+#include "file/random_access_file_reader.h"
+#include "file/sst_file_manager_impl.h"
+#include "logging/auto_roll_logger.h"
+#include "logging/log_buffer.h"
+#include "logging/logging.h"
+#include "monitoring/in_memory_stats_history.h"
+#include "monitoring/instrumented_mutex.h"
+#include "monitoring/iostats_context_imp.h"
+#include "monitoring/perf_context_imp.h"
+#include "monitoring/persistent_stats_history.h"
+#include "monitoring/thread_status_updater.h"
+#include "monitoring/thread_status_util.h"
+#include "options/cf_options.h"
+#include "options/options_helper.h"
+#include "options/options_parser.h"
+#include "port/port.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/stats_history.h"
+#include "rocksdb/status.h"
+#include "rocksdb/table.h"
+#include "rocksdb/version.h"
+#include "rocksdb/write_buffer_manager.h"
+#include "table/block_based/block.h"
+#include "table/block_based/block_based_table_factory.h"
+#include "table/get_context.h"
+#include "table/merging_iterator.h"
+#include "table/multiget_context.h"
+#include "table/sst_file_dumper.h"
+#include "table/table_builder.h"
+#include "table/two_level_iterator.h"
+#include "table/unique_id_impl.h"
+#include "test_util/sync_point.h"
+#include "trace_replay/trace_replay.h"
+#include "util/autovector.h"
+#include "util/cast_util.h"
+#include "util/coding.h"
+#include "util/compression.h"
+#include "util/crc32c.h"
+#include "util/defer.h"
+#include "util/distributed_mutex.h"
+#include "util/hash_containers.h"
+#include "util/mutexlock.h"
+#include "util/stop_watch.h"
+#include "util/string_util.h"
+#include "utilities/trace/replayer_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const std::string kDefaultColumnFamilyName("default");
+const std::string kPersistentStatsColumnFamilyName(
+    "___rocksdb_stats_history___");
+void DumpRocksDBBuildVersion(Logger* log);
+
+CompressionType GetCompressionFlush(
+    const ImmutableCFOptions& ioptions,
+    const MutableCFOptions& mutable_cf_options) {
+  // Compressing memtable flushes might not help unless the sequential load
+  // optimization is used for leveled compaction. Otherwise the CPU and
+  // latency overhead is not offset by saving much space.
+  if (ioptions.compaction_style == kCompactionStyleUniversal &&
+      mutable_cf_options.compaction_options_universal
+              .compression_size_percent >= 0) {
+    return kNoCompression;
+  }
+  if (mutable_cf_options.compression_per_level.empty()) {
+    return mutable_cf_options.compression;
+  } else {
+    // For leveled compress when min_level_to_compress != 0.
+    return mutable_cf_options.compression_per_level[0];
+  }
+}
+
+namespace {
+void DumpSupportInfo(Logger* logger) {
+  ROCKS_LOG_HEADER(logger, "Compression algorithms supported:");
+  for (auto& compression : OptionsHelper::compression_type_string_map) {
+    if (compression.second != kNoCompression &&
+        compression.second != kDisableCompressionOption) {
+      ROCKS_LOG_HEADER(logger, "\t%s supported: %d", compression.first.c_str(),
+                       CompressionTypeSupported(compression.second));
+    }
+  }
+  ROCKS_LOG_HEADER(logger, "Fast CRC32 supported: %s",
+                   crc32c::IsFastCrc32Supported().c_str());
+
+  ROCKS_LOG_HEADER(logger, "DMutex implementation: %s", DMutex::kName());
+}
+}  // namespace
+
+DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
+               const bool seq_per_batch, const bool batch_per_txn,
+               bool read_only)
+    : dbname_(dbname),
+      own_info_log_(options.info_log == nullptr),
+      init_logger_creation_s_(),
+      initial_db_options_(SanitizeOptions(dbname, options, read_only,
+                                          &init_logger_creation_s_)),
+      env_(initial_db_options_.env),
+      io_tracer_(std::make_shared<IOTracer>()),
+      immutable_db_options_(initial_db_options_),
+      fs_(immutable_db_options_.fs, io_tracer_),
+      mutable_db_options_(initial_db_options_),
+      stats_(immutable_db_options_.stats),
+#ifdef COERCE_CONTEXT_SWITCH
+      mutex_(stats_, immutable_db_options_.clock, DB_MUTEX_WAIT_MICROS, &bg_cv_,
+             immutable_db_options_.use_adaptive_mutex),
+#else   // COERCE_CONTEXT_SWITCH
+      mutex_(stats_, immutable_db_options_.clock, DB_MUTEX_WAIT_MICROS,
+             immutable_db_options_.use_adaptive_mutex),
+#endif  // COERCE_CONTEXT_SWITCH
+      default_cf_handle_(nullptr),
+      error_handler_(this, immutable_db_options_, &mutex_),
+      event_logger_(immutable_db_options_.info_log.get()),
+      max_total_in_memory_state_(0),
+      file_options_(BuildDBOptions(immutable_db_options_, mutable_db_options_)),
+      file_options_for_compaction_(fs_->OptimizeForCompactionTableWrite(
+          file_options_, immutable_db_options_)),
+      seq_per_batch_(seq_per_batch),
+      batch_per_txn_(batch_per_txn),
+      next_job_id_(1),
+      shutting_down_(false),
+      db_lock_(nullptr),
+      manual_compaction_paused_(false),
+      bg_cv_(&mutex_),
+      logfile_number_(0),
+      log_dir_synced_(false),
+      log_empty_(true),
+      persist_stats_cf_handle_(nullptr),
+      log_sync_cv_(&log_write_mutex_),
+      total_log_size_(0),
+      is_snapshot_supported_(true),
+      write_buffer_manager_(immutable_db_options_.write_buffer_manager.get()),
+      write_thread_(immutable_db_options_),
+      nonmem_write_thread_(immutable_db_options_),
+      write_controller_(mutable_db_options_.delayed_write_rate),
+      last_batch_group_size_(0),
+      unscheduled_flushes_(0),
+      unscheduled_compactions_(0),
+      bg_bottom_compaction_scheduled_(0),
+      bg_compaction_scheduled_(0),
+      num_running_compactions_(0),
+      bg_flush_scheduled_(0),
+      num_running_flushes_(0),
+      bg_purge_scheduled_(0),
+      disable_delete_obsolete_files_(0),
+      pending_purge_obsolete_files_(0),
+      delete_obsolete_files_last_run_(immutable_db_options_.clock->NowMicros()),
+      last_stats_dump_time_microsec_(0),
+      has_unpersisted_data_(false),
+      unable_to_release_oldest_log_(false),
+      num_running_ingest_file_(0),
+      wal_manager_(immutable_db_options_, file_options_, io_tracer_,
+                   seq_per_batch),
+      bg_work_paused_(0),
+      bg_compaction_paused_(0),
+      refitting_level_(false),
+      opened_successfully_(false),
+      periodic_task_scheduler_(),
+      two_write_queues_(options.two_write_queues),
+      manual_wal_flush_(options.manual_wal_flush),
+      // last_sequencee_ is always maintained by the main queue that also writes
+      // to the memtable. When two_write_queues_ is disabled last seq in
+      // memtable is the same as last seq published to the readers. When it is
+      // enabled but seq_per_batch_ is disabled, last seq in memtable still
+      // indicates last published seq since wal-only writes that go to the 2nd
+      // queue do not consume a sequence number. Otherwise writes performed by
+      // the 2nd queue could change what is visible to the readers. In this
+      // cases, last_seq_same_as_publish_seq_==false, the 2nd queue maintains a
+      // separate variable to indicate the last published sequence.
+      last_seq_same_as_publish_seq_(
+          !(seq_per_batch && options.two_write_queues)),
+      // Since seq_per_batch_ is currently set only by WritePreparedTxn which
+      // requires a custom gc for compaction, we use that to set use_custom_gc_
+      // as well.
+      use_custom_gc_(seq_per_batch),
+      shutdown_initiated_(false),
+      own_sfm_(options.sst_file_manager == nullptr),
+      closed_(false),
+      atomic_flush_install_cv_(&mutex_),
+      blob_callback_(immutable_db_options_.sst_file_manager.get(), &mutex_,
+                     &error_handler_, &event_logger_,
+                     immutable_db_options_.listeners, dbname_),
+      lock_wal_count_(0) {
+  // !batch_per_trx_ implies seq_per_batch_ because it is only unset for
+  // WriteUnprepared, which should use seq_per_batch_.
+  assert(batch_per_txn_ || seq_per_batch_);
+
+  // Reserve ten files or so for other uses and give the rest to TableCache.
+  // Give a large number for setting of "infinite" open files.
+  const int table_cache_size = (mutable_db_options_.max_open_files == -1)
+                                   ? TableCache::kInfiniteCapacity
+                                   : mutable_db_options_.max_open_files - 10;
+  LRUCacheOptions co;
+  co.capacity = table_cache_size;
+  co.num_shard_bits = immutable_db_options_.table_cache_numshardbits;
+  co.metadata_charge_policy = kDontChargeCacheMetadata;
+  // TODO: Consider a non-fixed seed once test fallout (prefetch_test) is
+  // dealt with
+  co.hash_seed = 0;
+  table_cache_ = NewLRUCache(co);
+  SetDbSessionId();
+  assert(!db_session_id_.empty());
+
+  periodic_task_functions_.emplace(PeriodicTaskType::kDumpStats,
+                                   [this]() { this->DumpStats(); });
+  periodic_task_functions_.emplace(PeriodicTaskType::kPersistStats,
+                                   [this]() { this->PersistStats(); });
+  periodic_task_functions_.emplace(PeriodicTaskType::kFlushInfoLog,
+                                   [this]() { this->FlushInfoLog(); });
+  periodic_task_functions_.emplace(
+      PeriodicTaskType::kRecordSeqnoTime,
+      [this]() { this->RecordSeqnoToTimeMapping(); });
+
+  versions_.reset(new VersionSet(dbname_, &immutable_db_options_, file_options_,
+                                 table_cache_.get(), write_buffer_manager_,
+                                 &write_controller_, &block_cache_tracer_,
+                                 io_tracer_, db_id_, db_session_id_));
+  column_family_memtables_.reset(
+      new ColumnFamilyMemTablesImpl(versions_->GetColumnFamilySet()));
+
+  DumpRocksDBBuildVersion(immutable_db_options_.info_log.get());
+  DumpDBFileSummary(immutable_db_options_, dbname_, db_session_id_);
+  immutable_db_options_.Dump(immutable_db_options_.info_log.get());
+  mutable_db_options_.Dump(immutable_db_options_.info_log.get());
+  DumpSupportInfo(immutable_db_options_.info_log.get());
+
+  max_total_wal_size_.store(mutable_db_options_.max_total_wal_size,
+                            std::memory_order_relaxed);
+  if (write_buffer_manager_) {
+    wbm_stall_.reset(new WBMStallInterface());
+  }
+}
+
+Status DBImpl::Resume() {
+  ROCKS_LOG_INFO(immutable_db_options_.info_log, "Resuming DB");
+
+  InstrumentedMutexLock db_mutex(&mutex_);
+
+  if (!error_handler_.IsDBStopped() && !error_handler_.IsBGWorkStopped()) {
+    // Nothing to do
+    return Status::OK();
+  }
+
+  if (error_handler_.IsRecoveryInProgress()) {
+    // Don't allow a mix of manual and automatic recovery
+    return Status::Busy();
+  }
+
+  mutex_.Unlock();
+  Status s = error_handler_.RecoverFromBGError(true);
+  mutex_.Lock();
+  return s;
+}
+
+// This function implements the guts of recovery from a background error. It
+// is eventually called for both manual as well as automatic recovery. It does
+// the following -
+// 1. Wait for currently scheduled background flush/compaction to exit, in
+//    order to inadvertently causing an error and thinking recovery failed
+// 2. Flush memtables if there's any data for all the CFs. This may result
+//    another error, which will be saved by error_handler_ and reported later
+//    as the recovery status
+// 3. Find and delete any obsolete files
+// 4. Schedule compactions if needed for all the CFs. This is needed as the
+//    flush in the prior step might have been a no-op for some CFs, which
+//    means a new super version wouldn't have been installed
+Status DBImpl::ResumeImpl(DBRecoverContext context) {
+  mutex_.AssertHeld();
+
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  WaitForBackgroundWork();
+
+  Status s;
+  if (shutdown_initiated_) {
+    // Returning shutdown status to SFM during auto recovery will cause it
+    // to abort the recovery and allow the shutdown to progress
+    s = Status::ShutdownInProgress();
+  }
+
+  if (s.ok()) {
+    Status bg_error = error_handler_.GetBGError();
+    if (bg_error.severity() > Status::Severity::kHardError) {
+      ROCKS_LOG_INFO(
+          immutable_db_options_.info_log,
+          "DB resume requested but failed due to Fatal/Unrecoverable error");
+      s = bg_error;
+    }
+  }
+
+  // Make sure the IO Status stored in version set is set to OK.
+  bool file_deletion_disabled = !IsFileDeletionsEnabled();
+  if (s.ok()) {
+    IOStatus io_s = versions_->io_status();
+    if (io_s.IsIOError()) {
+      // If resuming from IOError resulted from MANIFEST write, then assert
+      // that we must have already set the MANIFEST writer to nullptr during
+      // clean-up phase MANIFEST writing. We must have also disabled file
+      // deletions.
+      assert(!versions_->descriptor_log_);
+      assert(file_deletion_disabled);
+      // Since we are trying to recover from MANIFEST write error, we need to
+      // switch to a new MANIFEST anyway. The old MANIFEST can be corrupted.
+      // Therefore, force writing a dummy version edit because we do not know
+      // whether there are flush jobs with non-empty data to flush, triggering
+      // appends to MANIFEST.
+      VersionEdit edit;
+      auto cfh =
+          static_cast_with_check<ColumnFamilyHandleImpl>(default_cf_handle_);
+      assert(cfh);
+      ColumnFamilyData* cfd = cfh->cfd();
+      const MutableCFOptions& cf_opts = *cfd->GetLatestMutableCFOptions();
+      s = versions_->LogAndApply(cfd, cf_opts, read_options, &edit, &mutex_,
+                                 directories_.GetDbDir());
+      if (!s.ok()) {
+        io_s = versions_->io_status();
+        if (!io_s.ok()) {
+          s = error_handler_.SetBGError(io_s,
+                                        BackgroundErrorReason::kManifestWrite);
+        }
+      }
+    }
+  }
+
+  // We cannot guarantee consistency of the WAL. So force flush Memtables of
+  // all the column families
+  if (s.ok()) {
+    FlushOptions flush_opts;
+    // We allow flush to stall write since we are trying to resume from error.
+    flush_opts.allow_write_stall = true;
+    if (immutable_db_options_.atomic_flush) {
+      mutex_.Unlock();
+      s = AtomicFlushMemTables(flush_opts, context.flush_reason);
+      mutex_.Lock();
+    } else {
+      for (auto cfd : versions_->GetRefedColumnFamilySet()) {
+        if (cfd->IsDropped()) {
+          continue;
+        }
+        InstrumentedMutexUnlock u(&mutex_);
+        s = FlushMemTable(cfd, flush_opts, context.flush_reason);
+        if (!s.ok()) {
+          break;
+        }
+      }
+    }
+    if (!s.ok()) {
+      ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                     "DB resume requested but failed due to Flush failure [%s]",
+                     s.ToString().c_str());
+    }
+  }
+
+  JobContext job_context(0);
+  FindObsoleteFiles(&job_context, true);
+  mutex_.Unlock();
+
+  job_context.manifest_file_number = 1;
+  if (job_context.HaveSomethingToDelete()) {
+    PurgeObsoleteFiles(job_context);
+  }
+  job_context.Clean();
+
+  if (s.ok()) {
+    assert(versions_->io_status().ok());
+    // If we reach here, we should re-enable file deletions if it was disabled
+    // during previous error handling.
+    if (file_deletion_disabled) {
+      // Always return ok
+      s = EnableFileDeletions(/*force=*/true);
+      if (!s.ok()) {
+        ROCKS_LOG_INFO(
+            immutable_db_options_.info_log,
+            "DB resume requested but could not enable file deletions [%s]",
+            s.ToString().c_str());
+        assert(false);
+      }
+    }
+  }
+
+  mutex_.Lock();
+  if (s.ok()) {
+    // This will notify and unblock threads waiting for error recovery to
+    // finish. Those previouly waiting threads can now proceed, which may
+    // include closing the db.
+    s = error_handler_.ClearBGError();
+  } else {
+    // NOTE: this is needed to pass ASSERT_STATUS_CHECKED
+    // in the DBSSTTest.DBWithMaxSpaceAllowedRandomized test.
+    // See https://github.com/facebook/rocksdb/pull/7715#issuecomment-754947952
+    error_handler_.GetRecoveryError().PermitUncheckedError();
+  }
+
+  if (s.ok()) {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log, "Successfully resumed DB");
+  } else {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log, "Failed to resume DB [%s]",
+                   s.ToString().c_str());
+  }
+
+  // Check for shutdown again before scheduling further compactions,
+  // since we released and re-acquired the lock above
+  if (shutdown_initiated_) {
+    s = Status::ShutdownInProgress();
+  }
+  if (s.ok()) {
+    for (auto cfd : *versions_->GetColumnFamilySet()) {
+      SchedulePendingCompaction(cfd);
+    }
+    MaybeScheduleFlushOrCompaction();
+  }
+
+  // Wake up any waiters - in this case, it could be the shutdown thread
+  bg_cv_.SignalAll();
+
+  // No need to check BGError again. If something happened, event listener would
+  // be notified and the operation causing it would have failed
+  return s;
+}
+
+void DBImpl::WaitForBackgroundWork() {
+  // Wait for background work to finish
+  while (bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ ||
+         bg_flush_scheduled_) {
+    bg_cv_.Wait();
+  }
+}
+
+// Will lock the mutex_,  will wait for completion if wait is true
+void DBImpl::CancelAllBackgroundWork(bool wait) {
+  ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                 "Shutdown: canceling all background work");
+
+  for (uint8_t task_type = 0;
+       task_type < static_cast<uint8_t>(PeriodicTaskType::kMax); task_type++) {
+    Status s = periodic_task_scheduler_.Unregister(
+        static_cast<PeriodicTaskType>(task_type));
+    if (!s.ok()) {
+      ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                     "Failed to unregister periodic task %d, status: %s",
+                     task_type, s.ToString().c_str());
+    }
+  }
+
+  InstrumentedMutexLock l(&mutex_);
+  if (!shutting_down_.load(std::memory_order_acquire) &&
+      has_unpersisted_data_.load(std::memory_order_relaxed) &&
+      !mutable_db_options_.avoid_flush_during_shutdown) {
+    if (immutable_db_options_.atomic_flush) {
+      mutex_.Unlock();
+      Status s = AtomicFlushMemTables(FlushOptions(), FlushReason::kShutDown);
+      s.PermitUncheckedError();  //**TODO: What to do on error?
+      mutex_.Lock();
+    } else {
+      for (auto cfd : versions_->GetRefedColumnFamilySet()) {
+        if (!cfd->IsDropped() && cfd->initialized() && !cfd->mem()->IsEmpty()) {
+          InstrumentedMutexUnlock u(&mutex_);
+          Status s = FlushMemTable(cfd, FlushOptions(), FlushReason::kShutDown);
+          s.PermitUncheckedError();  //**TODO: What to do on error?
+        }
+      }
+    }
+  }
+
+  shutting_down_.store(true, std::memory_order_release);
+  bg_cv_.SignalAll();
+  if (!wait) {
+    return;
+  }
+  WaitForBackgroundWork();
+}
+
+Status DBImpl::MaybeReleaseTimestampedSnapshotsAndCheck() {
+  size_t num_snapshots = 0;
+  ReleaseTimestampedSnapshotsOlderThan(std::numeric_limits<uint64_t>::max(),
+                                       &num_snapshots);
+
+  // If there is unreleased snapshot, fail the close call
+  if (num_snapshots > 0) {
+    return Status::Aborted("Cannot close DB with unreleased snapshot.");
+  }
+
+  return Status::OK();
+}
+
+Status DBImpl::CloseHelper() {
+  // Guarantee that there is no background error recovery in progress before
+  // continuing with the shutdown
+  mutex_.Lock();
+  shutdown_initiated_ = true;
+  error_handler_.CancelErrorRecovery();
+  while (error_handler_.IsRecoveryInProgress()) {
+    bg_cv_.Wait();
+  }
+  mutex_.Unlock();
+
+  // Below check is added as recovery_error_ is not checked and it causes crash
+  // in DBSSTTest.DBWithMaxSpaceAllowedWithBlobFiles when space limit is
+  // reached.
+  error_handler_.GetRecoveryError().PermitUncheckedError();
+
+  // CancelAllBackgroundWork called with false means we just set the shutdown
+  // marker. After this we do a variant of the waiting and unschedule work
+  // (to consider: moving all the waiting into CancelAllBackgroundWork(true))
+  CancelAllBackgroundWork(false);
+
+  // Cancel manual compaction if there's any
+  if (HasPendingManualCompaction()) {
+    DisableManualCompaction();
+  }
+  mutex_.Lock();
+  // Unschedule all tasks for this DB
+  for (uint8_t i = 0; i < static_cast<uint8_t>(TaskType::kCount); i++) {
+    env_->UnSchedule(GetTaskTag(i), Env::Priority::BOTTOM);
+    env_->UnSchedule(GetTaskTag(i), Env::Priority::LOW);
+    env_->UnSchedule(GetTaskTag(i), Env::Priority::HIGH);
+  }
+
+  Status ret = Status::OK();
+
+  // Wait for background work to finish
+  while (bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ ||
+         bg_flush_scheduled_ || bg_purge_scheduled_ ||
+         pending_purge_obsolete_files_ ||
+         error_handler_.IsRecoveryInProgress()) {
+    TEST_SYNC_POINT("DBImpl::~DBImpl:WaitJob");
+    bg_cv_.Wait();
+  }
+  TEST_SYNC_POINT_CALLBACK("DBImpl::CloseHelper:PendingPurgeFinished",
+                           &files_grabbed_for_purge_);
+  EraseThreadStatusDbInfo();
+  flush_scheduler_.Clear();
+  trim_history_scheduler_.Clear();
+
+  while (!flush_queue_.empty()) {
+    const FlushRequest& flush_req = PopFirstFromFlushQueue();
+    for (const auto& iter : flush_req.cfd_to_max_mem_id_to_persist) {
+      iter.first->UnrefAndTryDelete();
+    }
+  }
+
+  while (!compaction_queue_.empty()) {
+    auto cfd = PopFirstFromCompactionQueue();
+    cfd->UnrefAndTryDelete();
+  }
+
+  if (default_cf_handle_ != nullptr || persist_stats_cf_handle_ != nullptr) {
+    // we need to delete handle outside of lock because it does its own locking
+    mutex_.Unlock();
+    if (default_cf_handle_) {
+      delete default_cf_handle_;
+      default_cf_handle_ = nullptr;
+    }
+    if (persist_stats_cf_handle_) {
+      delete persist_stats_cf_handle_;
+      persist_stats_cf_handle_ = nullptr;
+    }
+    mutex_.Lock();
+  }
+
+  // Clean up obsolete files due to SuperVersion release.
+  // (1) Need to delete to obsolete files before closing because RepairDB()
+  // scans all existing files in the file system and builds manifest file.
+  // Keeping obsolete files confuses the repair process.
+  // (2) Need to check if we Open()/Recover() the DB successfully before
+  // deleting because if VersionSet recover fails (may be due to corrupted
+  // manifest file), it is not able to identify live files correctly. As a
+  // result, all "live" files can get deleted by accident. However, corrupted
+  // manifest is recoverable by RepairDB().
+  if (opened_successfully_) {
+    JobContext job_context(next_job_id_.fetch_add(1));
+    FindObsoleteFiles(&job_context, true);
+
+    mutex_.Unlock();
+    // manifest number starting from 2
+    job_context.manifest_file_number = 1;
+    if (job_context.HaveSomethingToDelete()) {
+      PurgeObsoleteFiles(job_context);
+    }
+    job_context.Clean();
+    mutex_.Lock();
+  }
+  {
+    InstrumentedMutexLock lock(&log_write_mutex_);
+    for (auto l : logs_to_free_) {
+      delete l;
+    }
+    for (auto& log : logs_) {
+      uint64_t log_number = log.writer->get_log_number();
+      Status s = log.ClearWriter();
+      if (!s.ok()) {
+        ROCKS_LOG_WARN(
+            immutable_db_options_.info_log,
+            "Unable to Sync WAL file %s with error -- %s",
+            LogFileName(immutable_db_options_.GetWalDir(), log_number).c_str(),
+            s.ToString().c_str());
+        // Retain the first error
+        if (ret.ok()) {
+          ret = s;
+        }
+      }
+    }
+    logs_.clear();
+  }
+
+  // Table cache may have table handles holding blocks from the block cache.
+  // We need to release them before the block cache is destroyed. The block
+  // cache may be destroyed inside versions_.reset(), when column family data
+  // list is destroyed, so leaving handles in table cache after
+  // versions_.reset() may cause issues.
+  // Here we clean all unreferenced handles in table cache.
+  // Now we assume all user queries have finished, so only version set itself
+  // can possibly hold the blocks from block cache. After releasing unreferenced
+  // handles here, only handles held by version set left and inside
+  // versions_.reset(), we will release them. There, we need to make sure every
+  // time a handle is released, we erase it from the cache too. By doing that,
+  // we can guarantee that after versions_.reset(), table cache is empty
+  // so the cache can be safely destroyed.
+  table_cache_->EraseUnRefEntries();
+
+  for (auto& txn_entry : recovered_transactions_) {
+    delete txn_entry.second;
+  }
+
+  // versions need to be destroyed before table_cache since it can hold
+  // references to table_cache.
+  versions_.reset();
+  mutex_.Unlock();
+  if (db_lock_ != nullptr) {
+    // TODO: Check for unlock error
+    env_->UnlockFile(db_lock_).PermitUncheckedError();
+  }
+
+  ROCKS_LOG_INFO(immutable_db_options_.info_log, "Shutdown complete");
+  LogFlush(immutable_db_options_.info_log);
+
+  // If the sst_file_manager was allocated by us during DB::Open(), ccall
+  // Close() on it before closing the info_log. Otherwise, background thread
+  // in SstFileManagerImpl might try to log something
+  if (immutable_db_options_.sst_file_manager && own_sfm_) {
+    auto sfm = static_cast<SstFileManagerImpl*>(
+        immutable_db_options_.sst_file_manager.get());
+    sfm->Close();
+  }
+
+  if (immutable_db_options_.info_log && own_info_log_) {
+    Status s = immutable_db_options_.info_log->Close();
+    if (!s.ok() && !s.IsNotSupported() && ret.ok()) {
+      ret = s;
+    }
+  }
+
+  if (write_buffer_manager_ && wbm_stall_) {
+    write_buffer_manager_->RemoveDBFromQueue(wbm_stall_.get());
+  }
+
+  IOStatus io_s = directories_.Close(IOOptions(), nullptr /* dbg */);
+  if (!io_s.ok()) {
+    ret = io_s;
+  }
+  if (ret.IsAborted()) {
+    // Reserve IsAborted() error for those where users didn't release
+    // certain resource and they can release them and come back and
+    // retry. In this case, we wrap this exception to something else.
+    return Status::Incomplete(ret.ToString());
+  }
+
+  return ret;
+}
+
+Status DBImpl::CloseImpl() { return CloseHelper(); }
+
+DBImpl::~DBImpl() {
+  // TODO: remove this.
+  init_logger_creation_s_.PermitUncheckedError();
+
+  InstrumentedMutexLock closing_lock_guard(&closing_mutex_);
+  if (closed_) {
+    return;
+  }
+
+  closed_ = true;
+
+  {
+    const Status s = MaybeReleaseTimestampedSnapshotsAndCheck();
+    s.PermitUncheckedError();
+  }
+
+  closing_status_ = CloseImpl();
+  closing_status_.PermitUncheckedError();
+}
+
+void DBImpl::MaybeIgnoreError(Status* s) const {
+  if (s->ok() || immutable_db_options_.paranoid_checks) {
+    // No change needed
+  } else {
+    ROCKS_LOG_WARN(immutable_db_options_.info_log, "Ignoring error %s",
+                   s->ToString().c_str());
+    *s = Status::OK();
+  }
+}
+
+const Status DBImpl::CreateArchivalDirectory() {
+  if (immutable_db_options_.WAL_ttl_seconds > 0 ||
+      immutable_db_options_.WAL_size_limit_MB > 0) {
+    std::string archivalPath =
+        ArchivalDirectory(immutable_db_options_.GetWalDir());
+    return env_->CreateDirIfMissing(archivalPath);
+  }
+  return Status::OK();
+}
+
+void DBImpl::PrintStatistics() {
+  auto dbstats = immutable_db_options_.stats;
+  if (dbstats) {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log, "STATISTICS:\n %s",
+                   dbstats->ToString().c_str());
+  }
+}
+
+Status DBImpl::StartPeriodicTaskScheduler() {
+
+#ifndef NDEBUG
+  // It only used by test to disable scheduler
+  bool disable_scheduler = false;
+  TEST_SYNC_POINT_CALLBACK(
+      "DBImpl::StartPeriodicTaskScheduler:DisableScheduler",
+      &disable_scheduler);
+  if (disable_scheduler) {
+    return Status::OK();
+  }
+
+  {
+    InstrumentedMutexLock l(&mutex_);
+    TEST_SYNC_POINT_CALLBACK("DBImpl::StartPeriodicTaskScheduler:Init",
+                             &periodic_task_scheduler_);
+  }
+
+#endif  // !NDEBUG
+  if (mutable_db_options_.stats_dump_period_sec > 0) {
+    Status s = periodic_task_scheduler_.Register(
+        PeriodicTaskType::kDumpStats,
+        periodic_task_functions_.at(PeriodicTaskType::kDumpStats),
+        mutable_db_options_.stats_dump_period_sec);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  if (mutable_db_options_.stats_persist_period_sec > 0) {
+    Status s = periodic_task_scheduler_.Register(
+        PeriodicTaskType::kPersistStats,
+        periodic_task_functions_.at(PeriodicTaskType::kPersistStats),
+        mutable_db_options_.stats_persist_period_sec);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  Status s = periodic_task_scheduler_.Register(
+      PeriodicTaskType::kFlushInfoLog,
+      periodic_task_functions_.at(PeriodicTaskType::kFlushInfoLog));
+
+  return s;
+}
+
+Status DBImpl::RegisterRecordSeqnoTimeWorker() {
+  uint64_t min_time_duration = std::numeric_limits<uint64_t>::max();
+  uint64_t max_time_duration = std::numeric_limits<uint64_t>::min();
+  {
+    InstrumentedMutexLock l(&mutex_);
+
+    for (auto cfd : *versions_->GetColumnFamilySet()) {
+      // preserve time is the max of 2 options.
+      uint64_t preserve_time_duration =
+          std::max(cfd->ioptions()->preserve_internal_time_seconds,
+                   cfd->ioptions()->preclude_last_level_data_seconds);
+      if (!cfd->IsDropped() && preserve_time_duration > 0) {
+        min_time_duration = std::min(preserve_time_duration, min_time_duration);
+        max_time_duration = std::max(preserve_time_duration, max_time_duration);
+      }
+    }
+    if (min_time_duration == std::numeric_limits<uint64_t>::max()) {
+      seqno_time_mapping_.Resize(0, 0);
+    } else {
+      seqno_time_mapping_.Resize(min_time_duration, max_time_duration);
+    }
+  }
+
+  uint64_t seqno_time_cadence = 0;
+  if (min_time_duration != std::numeric_limits<uint64_t>::max()) {
+    // round up to 1 when the time_duration is smaller than
+    // kMaxSeqnoTimePairsPerCF
+    seqno_time_cadence =
+        (min_time_duration + SeqnoToTimeMapping::kMaxSeqnoTimePairsPerCF - 1) /
+        SeqnoToTimeMapping::kMaxSeqnoTimePairsPerCF;
+  }
+
+  Status s;
+  if (seqno_time_cadence == 0) {
+    s = periodic_task_scheduler_.Unregister(PeriodicTaskType::kRecordSeqnoTime);
+  } else {
+    s = periodic_task_scheduler_.Register(
+        PeriodicTaskType::kRecordSeqnoTime,
+        periodic_task_functions_.at(PeriodicTaskType::kRecordSeqnoTime),
+        seqno_time_cadence);
+  }
+
+  return s;
+}
+
+// esitmate the total size of stats_history_
+size_t DBImpl::EstimateInMemoryStatsHistorySize() const {
+  size_t size_total =
+      sizeof(std::map<uint64_t, std::map<std::string, uint64_t>>);
+  if (stats_history_.size() == 0) return size_total;
+  size_t size_per_slice =
+      sizeof(uint64_t) + sizeof(std::map<std::string, uint64_t>);
+  // non-empty map, stats_history_.begin() guaranteed to exist
+  for (const auto& pairs : stats_history_.begin()->second) {
+    size_per_slice +=
+        pairs.first.capacity() + sizeof(pairs.first) + sizeof(pairs.second);
+  }
+  size_total = size_per_slice * stats_history_.size();
+  return size_total;
+}
+
+void DBImpl::PersistStats() {
+  TEST_SYNC_POINT("DBImpl::PersistStats:Entry");
+  if (shutdown_initiated_) {
+    return;
+  }
+  TEST_SYNC_POINT("DBImpl::PersistStats:StartRunning");
+  uint64_t now_seconds =
+      immutable_db_options_.clock->NowMicros() / kMicrosInSecond;
+
+  Statistics* statistics = immutable_db_options_.stats;
+  if (!statistics) {
+    return;
+  }
+  size_t stats_history_size_limit = 0;
+  {
+    InstrumentedMutexLock l(&mutex_);
+    stats_history_size_limit = mutable_db_options_.stats_history_buffer_size;
+  }
+
+  std::map<std::string, uint64_t> stats_map;
+  if (!statistics->getTickerMap(&stats_map)) {
+    return;
+  }
+  ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                 "------- PERSISTING STATS -------");
+
+  if (immutable_db_options_.persist_stats_to_disk) {
+    WriteBatch batch;
+    Status s = Status::OK();
+    if (stats_slice_initialized_) {
+      ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                     "Reading %" ROCKSDB_PRIszt " stats from statistics\n",
+                     stats_slice_.size());
+      for (const auto& stat : stats_map) {
+        if (s.ok()) {
+          char key[100];
+          int length =
+              EncodePersistentStatsKey(now_seconds, stat.first, 100, key);
+          // calculate the delta from last time
+          if (stats_slice_.find(stat.first) != stats_slice_.end()) {
+            uint64_t delta = stat.second - stats_slice_[stat.first];
+            s = batch.Put(persist_stats_cf_handle_,
+                          Slice(key, std::min(100, length)),
+                          std::to_string(delta));
+          }
+        }
+      }
+    }
+    stats_slice_initialized_ = true;
+    std::swap(stats_slice_, stats_map);
+    if (s.ok()) {
+      WriteOptions wo;
+      wo.low_pri = true;
+      wo.no_slowdown = true;
+      wo.sync = false;
+      s = Write(wo, &batch);
+    }
+    if (!s.ok()) {
+      ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                     "Writing to persistent stats CF failed -- %s",
+                     s.ToString().c_str());
+    } else {
+      ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                     "Writing %" ROCKSDB_PRIszt " stats with timestamp %" PRIu64
+                     " to persistent stats CF succeeded",
+                     stats_slice_.size(), now_seconds);
+    }
+    // TODO(Zhongyi): add purging for persisted data
+  } else {
+    InstrumentedMutexLock l(&stats_history_mutex_);
+    // calculate the delta from last time
+    if (stats_slice_initialized_) {
+      std::map<std::string, uint64_t> stats_delta;
+      for (const auto& stat : stats_map) {
+        if (stats_slice_.find(stat.first) != stats_slice_.end()) {
+          stats_delta[stat.first] = stat.second - stats_slice_[stat.first];
+        }
+      }
+      ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                     "Storing %" ROCKSDB_PRIszt " stats with timestamp %" PRIu64
+                     " to in-memory stats history",
+                     stats_slice_.size(), now_seconds);
+      stats_history_[now_seconds] = stats_delta;
+    }
+    stats_slice_initialized_ = true;
+    std::swap(stats_slice_, stats_map);
+    TEST_SYNC_POINT("DBImpl::PersistStats:StatsCopied");
+
+    // delete older stats snapshots to control memory consumption
+    size_t stats_history_size = EstimateInMemoryStatsHistorySize();
+    bool purge_needed = stats_history_size > stats_history_size_limit;
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "[Pre-GC] In-memory stats history size: %" ROCKSDB_PRIszt
+                   " bytes, slice count: %" ROCKSDB_PRIszt,
+                   stats_history_size, stats_history_.size());
+    while (purge_needed && !stats_history_.empty()) {
+      stats_history_.erase(stats_history_.begin());
+      purge_needed =
+          EstimateInMemoryStatsHistorySize() > stats_history_size_limit;
+    }
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "[Post-GC] In-memory stats history size: %" ROCKSDB_PRIszt
+                   " bytes, slice count: %" ROCKSDB_PRIszt,
+                   stats_history_size, stats_history_.size());
+  }
+  TEST_SYNC_POINT("DBImpl::PersistStats:End");
+}
+
+bool DBImpl::FindStatsByTime(uint64_t start_time, uint64_t end_time,
+                             uint64_t* new_time,
+                             std::map<std::string, uint64_t>* stats_map) {
+  assert(new_time);
+  assert(stats_map);
+  if (!new_time || !stats_map) return false;
+  // lock when search for start_time
+  {
+    InstrumentedMutexLock l(&stats_history_mutex_);
+    auto it = stats_history_.lower_bound(start_time);
+    if (it != stats_history_.end() && it->first < end_time) {
+      // make a copy for timestamp and stats_map
+      *new_time = it->first;
+      *stats_map = it->second;
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
+
+Status DBImpl::GetStatsHistory(
+    uint64_t start_time, uint64_t end_time,
+    std::unique_ptr<StatsHistoryIterator>* stats_iterator) {
+  if (!stats_iterator) {
+    return Status::InvalidArgument("stats_iterator not preallocated.");
+  }
+  if (immutable_db_options_.persist_stats_to_disk) {
+    stats_iterator->reset(
+        new PersistentStatsHistoryIterator(start_time, end_time, this));
+  } else {
+    stats_iterator->reset(
+        new InMemoryStatsHistoryIterator(start_time, end_time, this));
+  }
+  return (*stats_iterator)->status();
+}
+
+void DBImpl::DumpStats() {
+  TEST_SYNC_POINT("DBImpl::DumpStats:1");
+  std::string stats;
+  if (shutdown_initiated_) {
+    return;
+  }
+
+  // Also probe block cache(s) for problems, dump to info log
+  UnorderedSet<Cache*> probed_caches;
+  TEST_SYNC_POINT("DBImpl::DumpStats:StartRunning");
+  {
+    InstrumentedMutexLock l(&mutex_);
+    for (auto cfd : versions_->GetRefedColumnFamilySet()) {
+      if (!cfd->initialized()) {
+        continue;
+      }
+
+      // Release DB mutex for gathering cache entry stats. Pass over all
+      // column families for this first so that other stats are dumped
+      // near-atomically.
+      InstrumentedMutexUnlock u(&mutex_);
+      cfd->internal_stats()->CollectCacheEntryStats(/*foreground=*/false);
+
+      // Probe block cache for problems (if not already via another CF)
+      if (immutable_db_options_.info_log) {
+        auto* table_factory = cfd->ioptions()->table_factory.get();
+        assert(table_factory != nullptr);
+        Cache* cache =
+            table_factory->GetOptions<Cache>(TableFactory::kBlockCacheOpts());
+        if (cache && probed_caches.insert(cache).second) {
+          cache->ReportProblems(immutable_db_options_.info_log);
+        }
+      }
+    }
+
+    const std::string* property = &DB::Properties::kDBStats;
+    const DBPropertyInfo* property_info = GetPropertyInfo(*property);
+    assert(property_info != nullptr);
+    assert(!property_info->need_out_of_mutex);
+    default_cf_internal_stats_->GetStringProperty(*property_info, *property,
+                                                  &stats);
+
+    property = &InternalStats::kPeriodicCFStats;
+    property_info = GetPropertyInfo(*property);
+    assert(property_info != nullptr);
+    assert(!property_info->need_out_of_mutex);
+    for (auto cfd : *versions_->GetColumnFamilySet()) {
+      if (cfd->initialized()) {
+        cfd->internal_stats()->GetStringProperty(*property_info, *property,
+                                                 &stats);
+      }
+    }
+  }
+  TEST_SYNC_POINT("DBImpl::DumpStats:2");
+  ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                 "------- DUMPING STATS -------");
+  ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s", stats.c_str());
+  if (immutable_db_options_.dump_malloc_stats) {
+    stats.clear();
+    DumpMallocStats(&stats);
+    if (!stats.empty()) {
+      ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                     "------- Malloc STATS -------");
+      ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s", stats.c_str());
+    }
+  }
+
+  PrintStatistics();
+}
+
+// Periodically flush info log out of application buffer at a low frequency.
+// This improves debuggability in case of RocksDB hanging since it ensures the
+// log messages leading up to the hang will eventually become visible in the
+// log.
+void DBImpl::FlushInfoLog() {
+  if (shutdown_initiated_) {
+    return;
+  }
+  TEST_SYNC_POINT("DBImpl::FlushInfoLog:StartRunning");
+  LogFlush(immutable_db_options_.info_log);
+}
+
+Status DBImpl::TablesRangeTombstoneSummary(ColumnFamilyHandle* column_family,
+                                           int max_entries_to_print,
+                                           std::string* out_str) {
+  auto* cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  ColumnFamilyData* cfd = cfh->cfd();
+
+  SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
+  Version* version = super_version->current;
+
+  Status s =
+      version->TablesRangeTombstoneSummary(max_entries_to_print, out_str);
+
+  CleanupSuperVersion(super_version);
+  return s;
+}
+
+void DBImpl::ScheduleBgLogWriterClose(JobContext* job_context) {
+  mutex_.AssertHeld();
+  if (!job_context->logs_to_free.empty()) {
+    for (auto l : job_context->logs_to_free) {
+      AddToLogsToFreeQueue(l);
+    }
+    job_context->logs_to_free.clear();
+  }
+}
+
+FSDirectory* DBImpl::GetDataDir(ColumnFamilyData* cfd, size_t path_id) const {
+  assert(cfd);
+  FSDirectory* ret_dir = cfd->GetDataDir(path_id);
+  if (ret_dir == nullptr) {
+    return directories_.GetDataDir(path_id);
+  }
+  return ret_dir;
+}
+
+Status DBImpl::SetOptions(
+    ColumnFamilyHandle* column_family,
+    const std::unordered_map<std::string, std::string>& options_map) {
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  auto* cfd =
+      static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
+  if (options_map.empty()) {
+    ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                   "SetOptions() on column family [%s], empty input",
+                   cfd->GetName().c_str());
+    return Status::InvalidArgument("empty input");
+  }
+
+  MutableCFOptions new_options;
+  Status s;
+  Status persist_options_status;
+  SuperVersionContext sv_context(/* create_superversion */ true);
+  {
+    auto db_options = GetDBOptions();
+    InstrumentedMutexLock l(&mutex_);
+    s = cfd->SetOptions(db_options, options_map);
+    if (s.ok()) {
+      new_options = *cfd->GetLatestMutableCFOptions();
+      // Append new version to recompute compaction score.
+      VersionEdit dummy_edit;
+      s = versions_->LogAndApply(cfd, new_options, read_options, &dummy_edit,
+                                 &mutex_, directories_.GetDbDir());
+      // Trigger possible flush/compactions. This has to be before we persist
+      // options to file, otherwise there will be a deadlock with writer
+      // thread.
+      InstallSuperVersionAndScheduleWork(cfd, &sv_context, new_options);
+
+      persist_options_status = WriteOptionsFile(
+          false /*need_mutex_lock*/, true /*need_enter_write_thread*/);
+      bg_cv_.SignalAll();
+    }
+  }
+  sv_context.Clean();
+
+  ROCKS_LOG_INFO(
+      immutable_db_options_.info_log,
+      "SetOptions() on column family [%s], inputs:", cfd->GetName().c_str());
+  for (const auto& o : options_map) {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s: %s\n", o.first.c_str(),
+                   o.second.c_str());
+  }
+  if (s.ok()) {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "[%s] SetOptions() succeeded", cfd->GetName().c_str());
+    new_options.Dump(immutable_db_options_.info_log.get());
+    if (!persist_options_status.ok()) {
+      // NOTE: WriteOptionsFile already logs on failure
+      s = persist_options_status;
+    }
+  } else {
+    persist_options_status.PermitUncheckedError();  // less important
+    ROCKS_LOG_WARN(immutable_db_options_.info_log, "[%s] SetOptions() failed",
+                   cfd->GetName().c_str());
+  }
+  LogFlush(immutable_db_options_.info_log);
+  return s;
+}
+
+Status DBImpl::SetDBOptions(
+    const std::unordered_map<std::string, std::string>& options_map) {
+  if (options_map.empty()) {
+    ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                   "SetDBOptions(), empty input.");
+    return Status::InvalidArgument("empty input");
+  }
+
+  MutableDBOptions new_options;
+  Status s;
+  Status persist_options_status = Status::OK();
+  bool wal_changed = false;
+  WriteContext write_context;
+  {
+    InstrumentedMutexLock l(&mutex_);
+    s = GetMutableDBOptionsFromStrings(mutable_db_options_, options_map,
+                                       &new_options);
+
+    if (new_options.bytes_per_sync == 0) {
+      new_options.bytes_per_sync = 1024 * 1024;
+    }
+
+    if (MutableDBOptionsAreEqual(mutable_db_options_, new_options)) {
+      ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                     "SetDBOptions(), input option value is not changed, "
+                     "skipping updating.");
+      persist_options_status.PermitUncheckedError();
+      return s;
+    }
+
+    DBOptions new_db_options =
+        BuildDBOptions(immutable_db_options_, new_options);
+    if (s.ok()) {
+      s = ValidateOptions(new_db_options);
+    }
+    if (s.ok()) {
+      for (auto c : *versions_->GetColumnFamilySet()) {
+        if (!c->IsDropped()) {
+          auto cf_options = c->GetLatestCFOptions();
+          s = ColumnFamilyData::ValidateOptions(new_db_options, cf_options);
+          if (!s.ok()) {
+            break;
+          }
+        }
+      }
+    }
+    if (s.ok()) {
+      const BGJobLimits current_bg_job_limits =
+          GetBGJobLimits(mutable_db_options_.max_background_flushes,
+                         mutable_db_options_.max_background_compactions,
+                         mutable_db_options_.max_background_jobs,
+                         /* parallelize_compactions */ true);
+      const BGJobLimits new_bg_job_limits = GetBGJobLimits(
+          new_options.max_background_flushes,
+          new_options.max_background_compactions,
+          new_options.max_background_jobs, /* parallelize_compactions */ true);
+
+      const bool max_flushes_increased =
+          new_bg_job_limits.max_flushes > current_bg_job_limits.max_flushes;
+      const bool max_compactions_increased =
+          new_bg_job_limits.max_compactions >
+          current_bg_job_limits.max_compactions;
+
+      if (max_flushes_increased || max_compactions_increased) {
+        if (max_flushes_increased) {
+          env_->IncBackgroundThreadsIfNeeded(new_bg_job_limits.max_flushes,
+                                             Env::Priority::HIGH);
+        }
+
+        if (max_compactions_increased) {
+          env_->IncBackgroundThreadsIfNeeded(new_bg_job_limits.max_compactions,
+                                             Env::Priority::LOW);
+        }
+
+        MaybeScheduleFlushOrCompaction();
+      }
+
+      mutex_.Unlock();
+      if (new_options.stats_dump_period_sec == 0) {
+        s = periodic_task_scheduler_.Unregister(PeriodicTaskType::kDumpStats);
+      } else {
+        s = periodic_task_scheduler_.Register(
+            PeriodicTaskType::kDumpStats,
+            periodic_task_functions_.at(PeriodicTaskType::kDumpStats),
+            new_options.stats_dump_period_sec);
+      }
+      if (new_options.max_total_wal_size !=
+          mutable_db_options_.max_total_wal_size) {
+        max_total_wal_size_.store(new_options.max_total_wal_size,
+                                  std::memory_order_release);
+      }
+      if (s.ok()) {
+        if (new_options.stats_persist_period_sec == 0) {
+          s = periodic_task_scheduler_.Unregister(
+              PeriodicTaskType::kPersistStats);
+        } else {
+          s = periodic_task_scheduler_.Register(
+              PeriodicTaskType::kPersistStats,
+              periodic_task_functions_.at(PeriodicTaskType::kPersistStats),
+              new_options.stats_persist_period_sec);
+        }
+      }
+      mutex_.Lock();
+      if (!s.ok()) {
+        return s;
+      }
+
+      write_controller_.set_max_delayed_write_rate(
+          new_options.delayed_write_rate);
+      table_cache_.get()->SetCapacity(new_options.max_open_files == -1
+                                          ? TableCache::kInfiniteCapacity
+                                          : new_options.max_open_files - 10);
+      wal_changed = mutable_db_options_.wal_bytes_per_sync !=
+                    new_options.wal_bytes_per_sync;
+      mutable_db_options_ = new_options;
+      file_options_for_compaction_ = FileOptions(new_db_options);
+      file_options_for_compaction_ = fs_->OptimizeForCompactionTableWrite(
+          file_options_for_compaction_, immutable_db_options_);
+      versions_->ChangeFileOptions(mutable_db_options_);
+      // TODO(xiez): clarify why apply optimize for read to write options
+      file_options_for_compaction_ = fs_->OptimizeForCompactionTableRead(
+          file_options_for_compaction_, immutable_db_options_);
+      file_options_for_compaction_.compaction_readahead_size =
+          mutable_db_options_.compaction_readahead_size;
+      WriteThread::Writer w;
+      write_thread_.EnterUnbatched(&w, &mutex_);
+      if (total_log_size_ > GetMaxTotalWalSize() || wal_changed) {
+        Status purge_wal_status = SwitchWAL(&write_context);
+        if (!purge_wal_status.ok()) {
+          ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                         "Unable to purge WAL files in SetDBOptions() -- %s",
+                         purge_wal_status.ToString().c_str());
+        }
+      }
+      persist_options_status = WriteOptionsFile(
+          false /*need_mutex_lock*/, false /*need_enter_write_thread*/);
+      write_thread_.ExitUnbatched(&w);
+    } else {
+      // To get here, we must have had invalid options and will not attempt to
+      // persist the options, which means the status is "OK/Uninitialized.
+      persist_options_status.PermitUncheckedError();
+    }
+  }
+  ROCKS_LOG_INFO(immutable_db_options_.info_log, "SetDBOptions(), inputs:");
+  for (const auto& o : options_map) {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s: %s\n", o.first.c_str(),
+                   o.second.c_str());
+  }
+  if (s.ok()) {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log, "SetDBOptions() succeeded");
+    new_options.Dump(immutable_db_options_.info_log.get());
+    if (!persist_options_status.ok()) {
+      if (immutable_db_options_.fail_if_options_file_error) {
+        s = Status::IOError(
+            "SetDBOptions() succeeded, but unable to persist options",
+            persist_options_status.ToString());
+      }
+      ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                     "Unable to persist options in SetDBOptions() -- %s",
+                     persist_options_status.ToString().c_str());
+    }
+  } else {
+    ROCKS_LOG_WARN(immutable_db_options_.info_log, "SetDBOptions failed");
+  }
+  LogFlush(immutable_db_options_.info_log);
+  return s;
+}
+
+// return the same level if it cannot be moved
+int DBImpl::FindMinimumEmptyLevelFitting(
+    ColumnFamilyData* cfd, const MutableCFOptions& /*mutable_cf_options*/,
+    int level) {
+  mutex_.AssertHeld();
+  const auto* vstorage = cfd->current()->storage_info();
+  int minimum_level = level;
+  for (int i = level - 1; i > 0; --i) {
+    // stop if level i is not empty
+    if (vstorage->NumLevelFiles(i) > 0) break;
+    // stop if level i is too small (cannot fit the level files)
+    if (vstorage->MaxBytesForLevel(i) < vstorage->NumLevelBytes(level)) {
+      break;
+    }
+
+    minimum_level = i;
+  }
+  return minimum_level;
+}
+
+Status DBImpl::FlushWAL(bool sync) {
+  if (manual_wal_flush_) {
+    IOStatus io_s;
+    {
+      // We need to lock log_write_mutex_ since logs_ might change concurrently
+      InstrumentedMutexLock wl(&log_write_mutex_);
+      log::Writer* cur_log_writer = logs_.back().writer;
+      io_s = cur_log_writer->WriteBuffer();
+    }
+    if (!io_s.ok()) {
+      ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL flush error %s",
+                      io_s.ToString().c_str());
+      // In case there is a fs error we should set it globally to prevent the
+      // future writes
+      IOStatusCheck(io_s);
+      // whether sync or not, we should abort the rest of function upon error
+      return static_cast<Status>(io_s);
+    }
+    if (!sync) {
+      ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "FlushWAL sync=false");
+      return static_cast<Status>(io_s);
+    }
+  }
+  if (!sync) {
+    return Status::OK();
+  }
+  // sync = true
+  ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "FlushWAL sync=true");
+  return SyncWAL();
+}
+
+bool DBImpl::WALBufferIsEmpty() {
+  InstrumentedMutexLock l(&log_write_mutex_);
+  log::Writer* cur_log_writer = logs_.back().writer;
+  auto res = cur_log_writer->BufferIsEmpty();
+  return res;
+}
+
+Status DBImpl::SyncWAL() {
+  TEST_SYNC_POINT("DBImpl::SyncWAL:Begin");
+  autovector<log::Writer*, 1> logs_to_sync;
+  bool need_log_dir_sync;
+  uint64_t current_log_number;
+
+  {
+    InstrumentedMutexLock l(&log_write_mutex_);
+    assert(!logs_.empty());
+
+    // This SyncWAL() call only cares about logs up to this number.
+    current_log_number = logfile_number_;
+
+    while (logs_.front().number <= current_log_number &&
+           logs_.front().IsSyncing()) {
+      log_sync_cv_.Wait();
+    }
+    // First check that logs are safe to sync in background.
+    for (auto it = logs_.begin();
+         it != logs_.end() && it->number <= current_log_number; ++it) {
+      if (!it->writer->file()->writable_file()->IsSyncThreadSafe()) {
+        return Status::NotSupported(
+            "SyncWAL() is not supported for this implementation of WAL file",
+            immutable_db_options_.allow_mmap_writes
+                ? "try setting Options::allow_mmap_writes to false"
+                : Slice());
+      }
+    }
+    for (auto it = logs_.begin();
+         it != logs_.end() && it->number <= current_log_number; ++it) {
+      auto& log = *it;
+      log.PrepareForSync();
+      logs_to_sync.push_back(log.writer);
+    }
+
+    need_log_dir_sync = !log_dir_synced_;
+  }
+
+  TEST_SYNC_POINT("DBWALTest::SyncWALNotWaitWrite:1");
+  RecordTick(stats_, WAL_FILE_SYNCED);
+  Status status;
+  IOStatus io_s;
+  for (log::Writer* log : logs_to_sync) {
+    io_s = log->file()->SyncWithoutFlush(immutable_db_options_.use_fsync);
+    if (!io_s.ok()) {
+      status = io_s;
+      break;
+    }
+  }
+  if (!io_s.ok()) {
+    ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL Sync error %s",
+                    io_s.ToString().c_str());
+    // In case there is a fs error we should set it globally to prevent the
+    // future writes
+    IOStatusCheck(io_s);
+  }
+  if (status.ok() && need_log_dir_sync) {
+    status = directories_.GetWalDir()->FsyncWithDirOptions(
+        IOOptions(), nullptr,
+        DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
+  }
+  TEST_SYNC_POINT("DBWALTest::SyncWALNotWaitWrite:2");
+
+  TEST_SYNC_POINT("DBImpl::SyncWAL:BeforeMarkLogsSynced:1");
+  VersionEdit synced_wals;
+  {
+    InstrumentedMutexLock l(&log_write_mutex_);
+    if (status.ok()) {
+      MarkLogsSynced(current_log_number, need_log_dir_sync, &synced_wals);
+    } else {
+      MarkLogsNotSynced(current_log_number);
+    }
+  }
+  if (status.ok() && synced_wals.IsWalAddition()) {
+    InstrumentedMutexLock l(&mutex_);
+    // TODO: plumb Env::IOActivity
+    const ReadOptions read_options;
+    status = ApplyWALToManifest(read_options, &synced_wals);
+  }
+
+  TEST_SYNC_POINT("DBImpl::SyncWAL:BeforeMarkLogsSynced:2");
+
+  return status;
+}
+
+Status DBImpl::ApplyWALToManifest(const ReadOptions& read_options,
+                                  VersionEdit* synced_wals) {
+  // not empty, write to MANIFEST.
+  mutex_.AssertHeld();
+
+  Status status = versions_->LogAndApplyToDefaultColumnFamily(
+      read_options, synced_wals, &mutex_, directories_.GetDbDir());
+  if (!status.ok() && versions_->io_status().IsIOError()) {
+    status = error_handler_.SetBGError(versions_->io_status(),
+                                       BackgroundErrorReason::kManifestWrite);
+  }
+  return status;
+}
+
+Status DBImpl::LockWAL() {
+  {
+    InstrumentedMutexLock lock(&mutex_);
+    if (lock_wal_count_ > 0) {
+      assert(lock_wal_write_token_);
+      ++lock_wal_count_;
+    } else {
+      // NOTE: this will "unnecessarily" wait for other non-LockWAL() write
+      // stalls to clear before LockWAL returns, however fixing that would
+      // not be simple because if we notice the primary queue is already
+      // stalled, that stall might clear while we release DB mutex in
+      // EnterUnbatched() for the nonmem queue. And if we work around that in
+      // the naive way, we could deadlock by locking the two queues in different
+      // orders.
+
+      WriteThread::Writer w;
+      write_thread_.EnterUnbatched(&w, &mutex_);
+      WriteThread::Writer nonmem_w;
+      if (two_write_queues_) {
+        nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
+      }
+
+      // NOTE: releasing mutex in EnterUnbatched might mean we are actually
+      // now lock_wal_count > 0
+      if (lock_wal_count_ == 0) {
+        assert(!lock_wal_write_token_);
+        lock_wal_write_token_ = write_controller_.GetStopToken();
+      }
+      ++lock_wal_count_;
+
+      if (two_write_queues_) {
+        nonmem_write_thread_.ExitUnbatched(&nonmem_w);
+      }
+      write_thread_.ExitUnbatched(&w);
+    }
+  }
+  // NOTE: avoid I/O holding DB mutex
+  Status s = FlushWAL(/*sync=*/false);
+  if (!s.ok()) {
+    // Non-OK return should not be in locked state
+    UnlockWAL().PermitUncheckedError();
+  }
+  return s;
+}
+
+Status DBImpl::UnlockWAL() {
+  bool signal = false;
+  uint64_t maybe_stall_begun_count = 0;
+  uint64_t nonmem_maybe_stall_begun_count = 0;
+  {
+    InstrumentedMutexLock lock(&mutex_);
+    if (lock_wal_count_ == 0) {
+      return Status::Aborted("No LockWAL() in effect");
+    }
+    --lock_wal_count_;
+    if (lock_wal_count_ == 0) {
+      lock_wal_write_token_.reset();
+      signal = true;
+      // For the last UnlockWAL, we don't want to return from UnlockWAL()
+      // until the thread(s) that called BeginWriteStall() have had a chance to
+      // call EndWriteStall(), so that no_slowdown writes after UnlockWAL() are
+      // guaranteed to succeed if there's no other source of stall.
+      maybe_stall_begun_count = write_thread_.GetBegunCountOfOutstandingStall();
+      if (two_write_queues_) {
+        nonmem_maybe_stall_begun_count =
+            nonmem_write_thread_.GetBegunCountOfOutstandingStall();
+      }
+    }
+  }
+  if (signal) {
+    // SignalAll outside of mutex for efficiency
+    bg_cv_.SignalAll();
+  }
+  // Ensure stalls have cleared
+  if (maybe_stall_begun_count) {
+    write_thread_.WaitForStallEndedCount(maybe_stall_begun_count);
+  }
+  if (nonmem_maybe_stall_begun_count) {
+    nonmem_write_thread_.WaitForStallEndedCount(nonmem_maybe_stall_begun_count);
+  }
+  return Status::OK();
+}
+
+void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir,
+                            VersionEdit* synced_wals) {
+  log_write_mutex_.AssertHeld();
+  if (synced_dir && logfile_number_ == up_to) {
+    log_dir_synced_ = true;
+  }
+  for (auto it = logs_.begin(); it != logs_.end() && it->number <= up_to;) {
+    auto& wal = *it;
+    assert(wal.IsSyncing());
+
+    if (wal.number < logs_.back().number) {
+      // Inactive WAL
+      if (immutable_db_options_.track_and_verify_wals_in_manifest &&
+          wal.GetPreSyncSize() > 0) {
+        synced_wals->AddWal(wal.number, WalMetadata(wal.GetPreSyncSize()));
+      }
+      if (wal.GetPreSyncSize() == wal.writer->file()->GetFlushedSize()) {
+        // Fully synced
+        logs_to_free_.push_back(wal.ReleaseWriter());
+        it = logs_.erase(it);
+      } else {
+        assert(wal.GetPreSyncSize() < wal.writer->file()->GetFlushedSize());
+        wal.FinishSync();
+        ++it;
+      }
+    } else {
+      assert(wal.number == logs_.back().number);
+      // Active WAL
+      wal.FinishSync();
+      ++it;
+    }
+  }
+  log_sync_cv_.SignalAll();
+}
+
+void DBImpl::MarkLogsNotSynced(uint64_t up_to) {
+  log_write_mutex_.AssertHeld();
+  for (auto it = logs_.begin(); it != logs_.end() && it->number <= up_to;
+       ++it) {
+    auto& wal = *it;
+    wal.FinishSync();
+  }
+  log_sync_cv_.SignalAll();
+}
+
+SequenceNumber DBImpl::GetLatestSequenceNumber() const {
+  return versions_->LastSequence();
+}
+
+void DBImpl::SetLastPublishedSequence(SequenceNumber seq) {
+  versions_->SetLastPublishedSequence(seq);
+}
+
+Status DBImpl::GetFullHistoryTsLow(ColumnFamilyHandle* column_family,
+                                   std::string* ts_low) {
+  if (ts_low == nullptr) {
+    return Status::InvalidArgument("ts_low is nullptr");
+  }
+  ColumnFamilyData* cfd = nullptr;
+  if (column_family == nullptr) {
+    cfd = default_cf_handle_->cfd();
+  } else {
+    auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+    assert(cfh != nullptr);
+    cfd = cfh->cfd();
+  }
+  assert(cfd != nullptr && cfd->user_comparator() != nullptr);
+  if (cfd->user_comparator()->timestamp_size() == 0) {
+    return Status::InvalidArgument(
+        "Timestamp is not enabled in this column family");
+  }
+  InstrumentedMutexLock l(&mutex_);
+  *ts_low = cfd->GetFullHistoryTsLow();
+  assert(cfd->user_comparator()->timestamp_size() == ts_low->size());
+  return Status::OK();
+}
+
+InternalIterator* DBImpl::NewInternalIterator(const ReadOptions& read_options,
+                                              Arena* arena,
+                                              SequenceNumber sequence,
+                                              ColumnFamilyHandle* column_family,
+                                              bool allow_unprepared_value) {
+  ColumnFamilyData* cfd;
+  if (column_family == nullptr) {
+    cfd = default_cf_handle_->cfd();
+  } else {
+    auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+    cfd = cfh->cfd();
+  }
+
+  mutex_.Lock();
+  SuperVersion* super_version = cfd->GetSuperVersion()->Ref();
+  mutex_.Unlock();
+  return NewInternalIterator(read_options, cfd, super_version, arena, sequence,
+                             allow_unprepared_value);
+}
+
+void DBImpl::SchedulePurge() {
+  mutex_.AssertHeld();
+  assert(opened_successfully_);
+
+  // Purge operations are put into High priority queue
+  bg_purge_scheduled_++;
+  env_->Schedule(&DBImpl::BGWorkPurge, this, Env::Priority::HIGH, nullptr);
+}
+
+void DBImpl::BackgroundCallPurge() {
+  mutex_.Lock();
+
+  while (!logs_to_free_queue_.empty()) {
+    assert(!logs_to_free_queue_.empty());
+    log::Writer* log_writer = *(logs_to_free_queue_.begin());
+    logs_to_free_queue_.pop_front();
+    mutex_.Unlock();
+    delete log_writer;
+    mutex_.Lock();
+  }
+  while (!superversions_to_free_queue_.empty()) {
+    assert(!superversions_to_free_queue_.empty());
+    SuperVersion* sv = superversions_to_free_queue_.front();
+    superversions_to_free_queue_.pop_front();
+    mutex_.Unlock();
+    delete sv;
+    mutex_.Lock();
+  }
+
+  assert(bg_purge_scheduled_ > 0);
+
+  // Can't use iterator to go over purge_files_ because inside the loop we're
+  // unlocking the mutex that protects purge_files_.
+  while (!purge_files_.empty()) {
+    auto it = purge_files_.begin();
+    // Need to make a copy of the PurgeFilesInfo before unlocking the mutex.
+    PurgeFileInfo purge_file = it->second;
+
+    const std::string& fname = purge_file.fname;
+    const std::string& dir_to_sync = purge_file.dir_to_sync;
+    FileType type = purge_file.type;
+    uint64_t number = purge_file.number;
+    int job_id = purge_file.job_id;
+
+    purge_files_.erase(it);
+
+    mutex_.Unlock();
+    DeleteObsoleteFileImpl(job_id, fname, dir_to_sync, type, number);
+    mutex_.Lock();
+  }
+
+  bg_purge_scheduled_--;
+
+  bg_cv_.SignalAll();
+  // IMPORTANT:there should be no code after calling SignalAll. This call may
+  // signal the DB destructor that it's OK to proceed with destruction. In
+  // that case, all DB variables will be dealloacated and referencing them
+  // will cause trouble.
+  mutex_.Unlock();
+}
+
+namespace {
+
+// A `SuperVersionHandle` holds a non-null `SuperVersion*` pointing at a
+// `SuperVersion` referenced once for this object. It also contains the state
+// needed to clean up the `SuperVersion` reference from outside of `DBImpl`
+// using `CleanupSuperVersionHandle()`.
+struct SuperVersionHandle {
+  // `_super_version` must be non-nullptr and `Ref()`'d once as long as the
+  // `SuperVersionHandle` may use it.
+  SuperVersionHandle(DBImpl* _db, InstrumentedMutex* _mu,
+                     SuperVersion* _super_version, bool _background_purge)
+      : db(_db),
+        mu(_mu),
+        super_version(_super_version),
+        background_purge(_background_purge) {}
+
+  DBImpl* db;
+  InstrumentedMutex* mu;
+  SuperVersion* super_version;
+  bool background_purge;
+};
+
+static void CleanupSuperVersionHandle(void* arg1, void* /*arg2*/) {
+  SuperVersionHandle* sv_handle = reinterpret_cast<SuperVersionHandle*>(arg1);
+
+  if (sv_handle->super_version->Unref()) {
+    // Job id == 0 means that this is not our background process, but rather
+    // user thread
+    JobContext job_context(0);
+
+    sv_handle->mu->Lock();
+    sv_handle->super_version->Cleanup();
+    sv_handle->db->FindObsoleteFiles(&job_context, false, true);
+    if (sv_handle->background_purge) {
+      sv_handle->db->ScheduleBgLogWriterClose(&job_context);
+      sv_handle->db->AddSuperVersionsToFreeQueue(sv_handle->super_version);
+      sv_handle->db->SchedulePurge();
+    }
+    sv_handle->mu->Unlock();
+
+    if (!sv_handle->background_purge) {
+      delete sv_handle->super_version;
+    }
+    if (job_context.HaveSomethingToDelete()) {
+      sv_handle->db->PurgeObsoleteFiles(job_context,
+                                        sv_handle->background_purge);
+    }
+    job_context.Clean();
+  }
+
+  delete sv_handle;
+}
+
+struct GetMergeOperandsState {
+  MergeContext merge_context;
+  PinnedIteratorsManager pinned_iters_mgr;
+  SuperVersionHandle* sv_handle;
+};
+
+static void CleanupGetMergeOperandsState(void* arg1, void* /*arg2*/) {
+  GetMergeOperandsState* state = static_cast<GetMergeOperandsState*>(arg1);
+  CleanupSuperVersionHandle(state->sv_handle /* arg1 */, nullptr /* arg2 */);
+  delete state;
+}
+
+}  // namespace
+
+InternalIterator* DBImpl::NewInternalIterator(
+    const ReadOptions& read_options, ColumnFamilyData* cfd,
+    SuperVersion* super_version, Arena* arena, SequenceNumber sequence,
+    bool allow_unprepared_value, ArenaWrappedDBIter* db_iter) {
+  InternalIterator* internal_iter;
+  assert(arena != nullptr);
+  // Need to create internal iterator from the arena.
+  MergeIteratorBuilder merge_iter_builder(
+      &cfd->internal_comparator(), arena,
+      !read_options.total_order_seek &&
+          super_version->mutable_cf_options.prefix_extractor != nullptr,
+      read_options.iterate_upper_bound);
+  // Collect iterator for mutable memtable
+  auto mem_iter = super_version->mem->NewIterator(read_options, arena);
+  Status s;
+  if (!read_options.ignore_range_deletions) {
+    TruncatedRangeDelIterator* mem_tombstone_iter = nullptr;
+    auto range_del_iter = super_version->mem->NewRangeTombstoneIterator(
+        read_options, sequence, false /* immutable_memtable */);
+    if (range_del_iter == nullptr || range_del_iter->empty()) {
+      delete range_del_iter;
+    } else {
+      mem_tombstone_iter = new TruncatedRangeDelIterator(
+          std::unique_ptr<FragmentedRangeTombstoneIterator>(range_del_iter),
+          &cfd->ioptions()->internal_comparator, nullptr /* smallest */,
+          nullptr /* largest */);
+    }
+    merge_iter_builder.AddPointAndTombstoneIterator(mem_iter,
+                                                    mem_tombstone_iter);
+  } else {
+    merge_iter_builder.AddIterator(mem_iter);
+  }
+
+  // Collect all needed child iterators for immutable memtables
+  if (s.ok()) {
+    super_version->imm->AddIterators(read_options, &merge_iter_builder,
+                                     !read_options.ignore_range_deletions);
+  }
+  TEST_SYNC_POINT_CALLBACK("DBImpl::NewInternalIterator:StatusCallback", &s);
+  if (s.ok()) {
+    // Collect iterators for files in L0 - Ln
+    if (read_options.read_tier != kMemtableTier) {
+      super_version->current->AddIterators(read_options, file_options_,
+                                           &merge_iter_builder,
+                                           allow_unprepared_value);
+    }
+    internal_iter = merge_iter_builder.Finish(
+        read_options.ignore_range_deletions ? nullptr : db_iter);
+    SuperVersionHandle* cleanup = new SuperVersionHandle(
+        this, &mutex_, super_version,
+        read_options.background_purge_on_iterator_cleanup ||
+            immutable_db_options_.avoid_unnecessary_blocking_io);
+    internal_iter->RegisterCleanup(CleanupSuperVersionHandle, cleanup, nullptr);
+
+    return internal_iter;
+  } else {
+    CleanupSuperVersion(super_version);
+  }
+  return NewErrorInternalIterator<Slice>(s, arena);
+}
+
+ColumnFamilyHandle* DBImpl::DefaultColumnFamily() const {
+  return default_cf_handle_;
+}
+
+ColumnFamilyHandle* DBImpl::PersistentStatsColumnFamily() const {
+  return persist_stats_cf_handle_;
+}
+
+Status DBImpl::Get(const ReadOptions& read_options,
+                   ColumnFamilyHandle* column_family, const Slice& key,
+                   PinnableSlice* value) {
+  return Get(read_options, column_family, key, value, /*timestamp=*/nullptr);
+}
+
+Status DBImpl::Get(const ReadOptions& read_options,
+                   ColumnFamilyHandle* column_family, const Slice& key,
+                   PinnableSlice* value, std::string* timestamp) {
+  assert(value != nullptr);
+  value->Reset();
+  GetImplOptions get_impl_options;
+  get_impl_options.column_family = column_family;
+  get_impl_options.value = value;
+  get_impl_options.timestamp = timestamp;
+  Status s = GetImpl(read_options, key, get_impl_options);
+  return s;
+}
+
+Status DBImpl::GetEntity(const ReadOptions& read_options,
+                         ColumnFamilyHandle* column_family, const Slice& key,
+                         PinnableWideColumns* columns) {
+  if (!column_family) {
+    return Status::InvalidArgument(
+        "Cannot call GetEntity without a column family handle");
+  }
+
+  if (!columns) {
+    return Status::InvalidArgument(
+        "Cannot call GetEntity without a PinnableWideColumns object");
+  }
+
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call GetEntity with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+
+  columns->Reset();
+
+  GetImplOptions get_impl_options;
+  get_impl_options.column_family = column_family;
+  get_impl_options.columns = columns;
+
+  return GetImpl(read_options, key, get_impl_options);
+}
+
+bool DBImpl::ShouldReferenceSuperVersion(const MergeContext& merge_context) {
+  // If both thresholds are reached, a function returning merge operands as
+  // `PinnableSlice`s should reference the `SuperVersion` to avoid large and/or
+  // numerous `memcpy()`s.
+  //
+  // The below constants enable the optimization conservatively. They are
+  // verified to not regress `GetMergeOperands()` latency in the following
+  // scenarios.
+  //
+  // - CPU: two socket Intel(R) Xeon(R) Gold 6138 CPU @ 2.00GHz
+  // - `GetMergeOperands()` threads: 1 - 32
+  // - Entry size: 32 bytes - 4KB
+  // - Merges per key: 1 - 16K
+  // - LSM component: memtable
+  //
+  // TODO(ajkr): expand measurement to SST files.
+  static const size_t kNumBytesForSvRef = 32768;
+  static const size_t kLog2AvgBytesForSvRef = 8;  // 256 bytes
+
+  size_t num_bytes = 0;
+  for (const Slice& sl : merge_context.GetOperands()) {
+    num_bytes += sl.size();
+  }
+  return num_bytes >= kNumBytesForSvRef &&
+         (num_bytes >> kLog2AvgBytesForSvRef) >=
+             merge_context.GetOperands().size();
+}
+
+Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
+                       GetImplOptions& get_impl_options) {
+  assert(get_impl_options.value != nullptr ||
+         get_impl_options.merge_operands != nullptr ||
+         get_impl_options.columns != nullptr);
+
+  assert(get_impl_options.column_family);
+
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call Get with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+
+  if (read_options.timestamp) {
+    const Status s = FailIfTsMismatchCf(get_impl_options.column_family,
+                                        *(read_options.timestamp),
+                                        /*ts_for_read=*/true);
+    if (!s.ok()) {
+      return s;
+    }
+  } else {
+    const Status s = FailIfCfHasTs(get_impl_options.column_family);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  // Clear the timestamps for returning results so that we can distinguish
+  // between tombstone or key that has never been written
+  if (get_impl_options.timestamp) {
+    get_impl_options.timestamp->clear();
+  }
+
+  GetWithTimestampReadCallback read_cb(0);  // Will call Refresh
+
+  PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock);
+  StopWatch sw(immutable_db_options_.clock, stats_, DB_GET);
+  PERF_TIMER_GUARD(get_snapshot_time);
+
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(
+      get_impl_options.column_family);
+  auto cfd = cfh->cfd();
+
+  if (tracer_) {
+    // TODO: This mutex should be removed later, to improve performance when
+    // tracing is enabled.
+    InstrumentedMutexLock lock(&trace_mutex_);
+    if (tracer_) {
+      // TODO: maybe handle the tracing status?
+      tracer_->Get(get_impl_options.column_family, key).PermitUncheckedError();
+    }
+  }
+
+  if (get_impl_options.get_merge_operands_options != nullptr) {
+    for (int i = 0; i < get_impl_options.get_merge_operands_options
+                            ->expected_max_number_of_operands;
+         ++i) {
+      get_impl_options.merge_operands[i].Reset();
+    }
+  }
+
+  // Acquire SuperVersion
+  SuperVersion* sv = GetAndRefSuperVersion(cfd);
+
+  TEST_SYNC_POINT("DBImpl::GetImpl:1");
+  TEST_SYNC_POINT("DBImpl::GetImpl:2");
+
+  SequenceNumber snapshot;
+  if (read_options.snapshot != nullptr) {
+    if (get_impl_options.callback) {
+      // Already calculated based on read_options.snapshot
+      snapshot = get_impl_options.callback->max_visible_seq();
+    } else {
+      snapshot =
+          reinterpret_cast<const SnapshotImpl*>(read_options.snapshot)->number_;
+    }
+  } else {
+    // Note that the snapshot is assigned AFTER referencing the super
+    // version because otherwise a flush happening in between may compact away
+    // data for the snapshot, so the reader would see neither data that was be
+    // visible to the snapshot before compaction nor the newer data inserted
+    // afterwards.
+    snapshot = GetLastPublishedSequence();
+    if (get_impl_options.callback) {
+      // The unprep_seqs are not published for write unprepared, so it could be
+      // that max_visible_seq is larger. Seek to the std::max of the two.
+      // However, we still want our callback to contain the actual snapshot so
+      // that it can do the correct visibility filtering.
+      get_impl_options.callback->Refresh(snapshot);
+
+      // Internally, WriteUnpreparedTxnReadCallback::Refresh would set
+      // max_visible_seq = max(max_visible_seq, snapshot)
+      //
+      // Currently, the commented out assert is broken by
+      // InvalidSnapshotReadCallback, but if write unprepared recovery followed
+      // the regular transaction flow, then this special read callback would not
+      // be needed.
+      //
+      // assert(callback->max_visible_seq() >= snapshot);
+      snapshot = get_impl_options.callback->max_visible_seq();
+    }
+  }
+  // If timestamp is used, we use read callback to ensure <key,t,s> is returned
+  // only if t <= read_opts.timestamp and s <= snapshot.
+  // HACK: temporarily overwrite input struct field but restore
+  SaveAndRestore<ReadCallback*> restore_callback(&get_impl_options.callback);
+  const Comparator* ucmp = get_impl_options.column_family->GetComparator();
+  assert(ucmp);
+  if (ucmp->timestamp_size() > 0) {
+    assert(!get_impl_options
+                .callback);  // timestamp with callback is not supported
+    read_cb.Refresh(snapshot);
+    get_impl_options.callback = &read_cb;
+  }
+  TEST_SYNC_POINT("DBImpl::GetImpl:3");
+  TEST_SYNC_POINT("DBImpl::GetImpl:4");
+
+  // Prepare to store a list of merge operations if merge occurs.
+  MergeContext merge_context;
+  SequenceNumber max_covering_tombstone_seq = 0;
+
+  Status s;
+  // First look in the memtable, then in the immutable memtable (if any).
+  // s is both in/out. When in, s could either be OK or MergeInProgress.
+  // merge_operands will contain the sequence of merges in the latter case.
+  LookupKey lkey(key, snapshot, read_options.timestamp);
+  PERF_TIMER_STOP(get_snapshot_time);
+
+  bool skip_memtable = (read_options.read_tier == kPersistedTier &&
+                        has_unpersisted_data_.load(std::memory_order_relaxed));
+  bool done = false;
+  std::string* timestamp =
+      ucmp->timestamp_size() > 0 ? get_impl_options.timestamp : nullptr;
+  if (!skip_memtable) {
+    // Get value associated with key
+    if (get_impl_options.get_value) {
+      if (sv->mem->Get(
+              lkey,
+              get_impl_options.value ? get_impl_options.value->GetSelf()
+                                     : nullptr,
+              get_impl_options.columns, timestamp, &s, &merge_context,
+              &max_covering_tombstone_seq, read_options,
+              false /* immutable_memtable */, get_impl_options.callback,
+              get_impl_options.is_blob_index)) {
+        done = true;
+
+        if (get_impl_options.value) {
+          get_impl_options.value->PinSelf();
+        }
+
+        RecordTick(stats_, MEMTABLE_HIT);
+      } else if ((s.ok() || s.IsMergeInProgress()) &&
+                 sv->imm->Get(lkey,
+                              get_impl_options.value
+                                  ? get_impl_options.value->GetSelf()
+                                  : nullptr,
+                              get_impl_options.columns, timestamp, &s,
+                              &merge_context, &max_covering_tombstone_seq,
+                              read_options, get_impl_options.callback,
+                              get_impl_options.is_blob_index)) {
+        done = true;
+
+        if (get_impl_options.value) {
+          get_impl_options.value->PinSelf();
+        }
+
+        RecordTick(stats_, MEMTABLE_HIT);
+      }
+    } else {
+      // Get Merge Operands associated with key, Merge Operands should not be
+      // merged and raw values should be returned to the user.
+      if (sv->mem->Get(lkey, /*value=*/nullptr, /*columns=*/nullptr,
+                       /*timestamp=*/nullptr, &s, &merge_context,
+                       &max_covering_tombstone_seq, read_options,
+                       false /* immutable_memtable */, nullptr, nullptr,
+                       false)) {
+        done = true;
+        RecordTick(stats_, MEMTABLE_HIT);
+      } else if ((s.ok() || s.IsMergeInProgress()) &&
+                 sv->imm->GetMergeOperands(lkey, &s, &merge_context,
+                                           &max_covering_tombstone_seq,
+                                           read_options)) {
+        done = true;
+        RecordTick(stats_, MEMTABLE_HIT);
+      }
+    }
+    if (!done && !s.ok() && !s.IsMergeInProgress()) {
+      ReturnAndCleanupSuperVersion(cfd, sv);
+      return s;
+    }
+  }
+  TEST_SYNC_POINT("DBImpl::GetImpl:PostMemTableGet:0");
+  TEST_SYNC_POINT("DBImpl::GetImpl:PostMemTableGet:1");
+  PinnedIteratorsManager pinned_iters_mgr;
+  if (!done) {
+    PERF_TIMER_GUARD(get_from_output_files_time);
+    sv->current->Get(
+        read_options, lkey, get_impl_options.value, get_impl_options.columns,
+        timestamp, &s, &merge_context, &max_covering_tombstone_seq,
+        &pinned_iters_mgr,
+        get_impl_options.get_value ? get_impl_options.value_found : nullptr,
+        nullptr, nullptr,
+        get_impl_options.get_value ? get_impl_options.callback : nullptr,
+        get_impl_options.get_value ? get_impl_options.is_blob_index : nullptr,
+        get_impl_options.get_value);
+    RecordTick(stats_, MEMTABLE_MISS);
+  }
+
+  {
+    PERF_TIMER_GUARD(get_post_process_time);
+
+    RecordTick(stats_, NUMBER_KEYS_READ);
+    size_t size = 0;
+    if (s.ok()) {
+      if (get_impl_options.get_value) {
+        if (get_impl_options.value) {
+          size = get_impl_options.value->size();
+        } else if (get_impl_options.columns) {
+          size = get_impl_options.columns->serialized_size();
+        }
+      } else {
+        // Return all merge operands for get_impl_options.key
+        *get_impl_options.number_of_operands =
+            static_cast<int>(merge_context.GetNumOperands());
+        if (*get_impl_options.number_of_operands >
+            get_impl_options.get_merge_operands_options
+                ->expected_max_number_of_operands) {
+          s = Status::Incomplete(
+              Status::SubCode::KMergeOperandsInsufficientCapacity);
+        } else {
+          // Each operand depends on one of the following resources: `sv`,
+          // `pinned_iters_mgr`, or `merge_context`. It would be crazy expensive
+          // to reference `sv` for each operand relying on it because `sv` is
+          // (un)ref'd in all threads using the DB. Furthermore, we do not track
+          // on which resource each operand depends.
+          //
+          // To solve this, we bundle the resources in a `GetMergeOperandsState`
+          // and manage them with a `SharedCleanablePtr` shared among the
+          // `PinnableSlice`s we return. This bundle includes one `sv` reference
+          // and ownership of the `merge_context` and `pinned_iters_mgr`
+          // objects.
+          bool ref_sv = ShouldReferenceSuperVersion(merge_context);
+          if (ref_sv) {
+            assert(!merge_context.GetOperands().empty());
+            SharedCleanablePtr shared_cleanable;
+            GetMergeOperandsState* state = nullptr;
+            state = new GetMergeOperandsState();
+            state->merge_context = std::move(merge_context);
+            state->pinned_iters_mgr = std::move(pinned_iters_mgr);
+
+            sv->Ref();
+
+            state->sv_handle = new SuperVersionHandle(
+                this, &mutex_, sv,
+                immutable_db_options_.avoid_unnecessary_blocking_io);
+
+            shared_cleanable.Allocate();
+            shared_cleanable->RegisterCleanup(CleanupGetMergeOperandsState,
+                                              state /* arg1 */,
+                                              nullptr /* arg2 */);
+            for (size_t i = 0; i < state->merge_context.GetOperands().size();
+                 ++i) {
+              const Slice& sl = state->merge_context.GetOperands()[i];
+              size += sl.size();
+
+              get_impl_options.merge_operands->PinSlice(
+                  sl, nullptr /* cleanable */);
+              if (i == state->merge_context.GetOperands().size() - 1) {
+                shared_cleanable.MoveAsCleanupTo(
+                    get_impl_options.merge_operands);
+              } else {
+                shared_cleanable.RegisterCopyWith(
+                    get_impl_options.merge_operands);
+              }
+              get_impl_options.merge_operands++;
+            }
+          } else {
+            for (const Slice& sl : merge_context.GetOperands()) {
+              size += sl.size();
+              get_impl_options.merge_operands->PinSelf(sl);
+              get_impl_options.merge_operands++;
+            }
+          }
+        }
+      }
+      RecordTick(stats_, BYTES_READ, size);
+      PERF_COUNTER_ADD(get_read_bytes, size);
+    }
+
+    ReturnAndCleanupSuperVersion(cfd, sv);
+
+    RecordInHistogram(stats_, BYTES_PER_READ, size);
+  }
+  return s;
+}
+
+std::vector<Status> DBImpl::MultiGet(
+    const ReadOptions& read_options,
+    const std::vector<ColumnFamilyHandle*>& column_family,
+    const std::vector<Slice>& keys, std::vector<std::string>* values) {
+  return MultiGet(read_options, column_family, keys, values,
+                  /*timestamps=*/nullptr);
+}
+
+std::vector<Status> DBImpl::MultiGet(
+    const ReadOptions& read_options,
+    const std::vector<ColumnFamilyHandle*>& column_family,
+    const std::vector<Slice>& keys, std::vector<std::string>* values,
+    std::vector<std::string>* timestamps) {
+  PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock);
+  StopWatch sw(immutable_db_options_.clock, stats_, DB_MULTIGET);
+  PERF_TIMER_GUARD(get_snapshot_time);
+
+  size_t num_keys = keys.size();
+  assert(column_family.size() == num_keys);
+  std::vector<Status> stat_list(num_keys);
+
+  bool should_fail = false;
+  for (size_t i = 0; i < num_keys; ++i) {
+    assert(column_family[i]);
+    if (read_options.timestamp) {
+      stat_list[i] = FailIfTsMismatchCf(
+          column_family[i], *(read_options.timestamp), /*ts_for_read=*/true);
+      if (!stat_list[i].ok()) {
+        should_fail = true;
+      }
+    } else {
+      stat_list[i] = FailIfCfHasTs(column_family[i]);
+      if (!stat_list[i].ok()) {
+        should_fail = true;
+      }
+    }
+  }
+
+  if (should_fail) {
+    for (auto& s : stat_list) {
+      if (s.ok()) {
+        s = Status::Incomplete(
+            "DB not queried due to invalid argument(s) in the same MultiGet");
+      }
+    }
+    return stat_list;
+  }
+
+  if (tracer_) {
+    // TODO: This mutex should be removed later, to improve performance when
+    // tracing is enabled.
+    InstrumentedMutexLock lock(&trace_mutex_);
+    if (tracer_) {
+      // TODO: maybe handle the tracing status?
+      tracer_->MultiGet(column_family, keys).PermitUncheckedError();
+    }
+  }
+
+  SequenceNumber consistent_seqnum;
+
+  UnorderedMap<uint32_t, MultiGetColumnFamilyData> multiget_cf_data(
+      column_family.size());
+  for (auto cf : column_family) {
+    auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(cf);
+    auto cfd = cfh->cfd();
+    if (multiget_cf_data.find(cfd->GetID()) == multiget_cf_data.end()) {
+      multiget_cf_data.emplace(cfd->GetID(),
+                               MultiGetColumnFamilyData(cfh, nullptr));
+    }
+  }
+
+  std::function<MultiGetColumnFamilyData*(
+      UnorderedMap<uint32_t, MultiGetColumnFamilyData>::iterator&)>
+      iter_deref_lambda =
+          [](UnorderedMap<uint32_t, MultiGetColumnFamilyData>::iterator&
+                 cf_iter) { return &cf_iter->second; };
+
+  bool unref_only =
+      MultiCFSnapshot<UnorderedMap<uint32_t, MultiGetColumnFamilyData>>(
+          read_options, nullptr, iter_deref_lambda, &multiget_cf_data,
+          &consistent_seqnum);
+
+  TEST_SYNC_POINT("DBImpl::MultiGet:AfterGetSeqNum1");
+  TEST_SYNC_POINT("DBImpl::MultiGet:AfterGetSeqNum2");
+
+  // Contain a list of merge operations if merge occurs.
+  MergeContext merge_context;
+
+  // Note: this always resizes the values array
+  values->resize(num_keys);
+  if (timestamps) {
+    timestamps->resize(num_keys);
+  }
+
+  // Keep track of bytes that we read for statistics-recording later
+  uint64_t bytes_read = 0;
+  PERF_TIMER_STOP(get_snapshot_time);
+
+  // For each of the given keys, apply the entire "get" process as follows:
+  // First look in the memtable, then in the immutable memtable (if any).
+  // s is both in/out. When in, s could either be OK or MergeInProgress.
+  // merge_operands will contain the sequence of merges in the latter case.
+  size_t num_found = 0;
+  size_t keys_read;
+  uint64_t curr_value_size = 0;
+
+  GetWithTimestampReadCallback timestamp_read_callback(0);
+  ReadCallback* read_callback = nullptr;
+  if (read_options.timestamp && read_options.timestamp->size() > 0) {
+    timestamp_read_callback.Refresh(consistent_seqnum);
+    read_callback = &timestamp_read_callback;
+  }
+
+  for (keys_read = 0; keys_read < num_keys; ++keys_read) {
+    merge_context.Clear();
+    Status& s = stat_list[keys_read];
+    std::string* value = &(*values)[keys_read];
+    std::string* timestamp = timestamps ? &(*timestamps)[keys_read] : nullptr;
+
+    LookupKey lkey(keys[keys_read], consistent_seqnum, read_options.timestamp);
+    auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(
+        column_family[keys_read]);
+    SequenceNumber max_covering_tombstone_seq = 0;
+    auto mgd_iter = multiget_cf_data.find(cfh->cfd()->GetID());
+    assert(mgd_iter != multiget_cf_data.end());
+    auto mgd = mgd_iter->second;
+    auto super_version = mgd.super_version;
+    bool skip_memtable =
+        (read_options.read_tier == kPersistedTier &&
+         has_unpersisted_data_.load(std::memory_order_relaxed));
+    bool done = false;
+    if (!skip_memtable) {
+      if (super_version->mem->Get(
+              lkey, value, /*columns=*/nullptr, timestamp, &s, &merge_context,
+              &max_covering_tombstone_seq, read_options,
+              false /* immutable_memtable */, read_callback)) {
+        done = true;
+        RecordTick(stats_, MEMTABLE_HIT);
+      } else if (super_version->imm->Get(lkey, value, /*columns=*/nullptr,
+                                         timestamp, &s, &merge_context,
+                                         &max_covering_tombstone_seq,
+                                         read_options, read_callback)) {
+        done = true;
+        RecordTick(stats_, MEMTABLE_HIT);
+      }
+    }
+    if (!done) {
+      PinnableSlice pinnable_val;
+      PERF_TIMER_GUARD(get_from_output_files_time);
+      PinnedIteratorsManager pinned_iters_mgr;
+      super_version->current->Get(read_options, lkey, &pinnable_val,
+                                  /*columns=*/nullptr, timestamp, &s,
+                                  &merge_context, &max_covering_tombstone_seq,
+                                  &pinned_iters_mgr, /*value_found=*/nullptr,
+                                  /*key_exists=*/nullptr,
+                                  /*seq=*/nullptr, read_callback);
+      value->assign(pinnable_val.data(), pinnable_val.size());
+      RecordTick(stats_, MEMTABLE_MISS);
+    }
+
+    if (s.ok()) {
+      bytes_read += value->size();
+      num_found++;
+      curr_value_size += value->size();
+      if (curr_value_size > read_options.value_size_soft_limit) {
+        while (++keys_read < num_keys) {
+          stat_list[keys_read] = Status::Aborted();
+        }
+        break;
+      }
+    }
+    if (read_options.deadline.count() &&
+        immutable_db_options_.clock->NowMicros() >
+            static_cast<uint64_t>(read_options.deadline.count())) {
+      break;
+    }
+  }
+
+  if (keys_read < num_keys) {
+    // The only reason to break out of the loop is when the deadline is
+    // exceeded
+    assert(immutable_db_options_.clock->NowMicros() >
+           static_cast<uint64_t>(read_options.deadline.count()));
+    for (++keys_read; keys_read < num_keys; ++keys_read) {
+      stat_list[keys_read] = Status::TimedOut();
+    }
+  }
+
+  // Post processing (decrement reference counts and record statistics)
+  PERF_TIMER_GUARD(get_post_process_time);
+
+  for (auto mgd_iter : multiget_cf_data) {
+    auto mgd = mgd_iter.second;
+    if (!unref_only) {
+      ReturnAndCleanupSuperVersion(mgd.cfd, mgd.super_version);
+    } else {
+      mgd.cfd->GetSuperVersion()->Unref();
+    }
+  }
+  RecordTick(stats_, NUMBER_MULTIGET_CALLS);
+  RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys);
+  RecordTick(stats_, NUMBER_MULTIGET_KEYS_FOUND, num_found);
+  RecordTick(stats_, NUMBER_MULTIGET_BYTES_READ, bytes_read);
+  RecordInHistogram(stats_, BYTES_PER_MULTIGET, bytes_read);
+  PERF_COUNTER_ADD(multiget_read_bytes, bytes_read);
+  PERF_TIMER_STOP(get_post_process_time);
+
+  return stat_list;
+}
+
+template <class T>
+bool DBImpl::MultiCFSnapshot(
+    const ReadOptions& read_options, ReadCallback* callback,
+    std::function<MultiGetColumnFamilyData*(typename T::iterator&)>&
+        iter_deref_func,
+    T* cf_list, SequenceNumber* snapshot) {
+  PERF_TIMER_GUARD(get_snapshot_time);
+
+  bool last_try = false;
+  if (cf_list->size() == 1) {
+    // Fast path for a single column family. We can simply get the thread loca
+    // super version
+    auto cf_iter = cf_list->begin();
+    auto node = iter_deref_func(cf_iter);
+    node->super_version = GetAndRefSuperVersion(node->cfd);
+    if (read_options.snapshot != nullptr) {
+      // Note: In WritePrepared txns this is not necessary but not harmful
+      // either.  Because prep_seq > snapshot => commit_seq > snapshot so if
+      // a snapshot is specified we should be fine with skipping seq numbers
+      // that are greater than that.
+      //
+      // In WriteUnprepared, we cannot set snapshot in the lookup key because we
+      // may skip uncommitted data that should be visible to the transaction for
+      // reading own writes.
+      *snapshot =
+          static_cast<const SnapshotImpl*>(read_options.snapshot)->number_;
+      if (callback) {
+        *snapshot = std::max(*snapshot, callback->max_visible_seq());
+      }
+    } else {
+      // Since we get and reference the super version before getting
+      // the snapshot number, without a mutex protection, it is possible
+      // that a memtable switch happened in the middle and not all the
+      // data for this snapshot is available. But it will contain all
+      // the data available in the super version we have, which is also
+      // a valid snapshot to read from.
+      // We shouldn't get snapshot before finding and referencing the super
+      // version because a flush happening in between may compact away data for
+      // the snapshot, but the snapshot is earlier than the data overwriting it,
+      // so users may see wrong results.
+      *snapshot = GetLastPublishedSequence();
+    }
+  } else {
+    // If we end up with the same issue of memtable geting sealed during 2
+    // consecutive retries, it means the write rate is very high. In that case
+    // its probably ok to take the mutex on the 3rd try so we can succeed for
+    // sure
+    constexpr int num_retries = 3;
+    for (int i = 0; i < num_retries; ++i) {
+      last_try = (i == num_retries - 1);
+      bool retry = false;
+
+      if (i > 0) {
+        for (auto cf_iter = cf_list->begin(); cf_iter != cf_list->end();
+             ++cf_iter) {
+          auto node = iter_deref_func(cf_iter);
+          SuperVersion* super_version = node->super_version;
+          ColumnFamilyData* cfd = node->cfd;
+          if (super_version != nullptr) {
+            ReturnAndCleanupSuperVersion(cfd, super_version);
+          }
+          node->super_version = nullptr;
+        }
+      }
+      if (read_options.snapshot == nullptr) {
+        if (last_try) {
+          TEST_SYNC_POINT("DBImpl::MultiGet::LastTry");
+          // We're close to max number of retries. For the last retry,
+          // acquire the lock so we're sure to succeed
+          mutex_.Lock();
+        }
+        *snapshot = GetLastPublishedSequence();
+      } else {
+        *snapshot =
+            static_cast_with_check<const SnapshotImpl>(read_options.snapshot)
+                ->number_;
+      }
+      for (auto cf_iter = cf_list->begin(); cf_iter != cf_list->end();
+           ++cf_iter) {
+        auto node = iter_deref_func(cf_iter);
+        if (!last_try) {
+          node->super_version = GetAndRefSuperVersion(node->cfd);
+        } else {
+          node->super_version = node->cfd->GetSuperVersion()->Ref();
+        }
+        TEST_SYNC_POINT("DBImpl::MultiGet::AfterRefSV");
+        if (read_options.snapshot != nullptr || last_try) {
+          // If user passed a snapshot, then we don't care if a memtable is
+          // sealed or compaction happens because the snapshot would ensure
+          // that older key versions are kept around. If this is the last
+          // retry, then we have the lock so nothing bad can happen
+          continue;
+        }
+        // We could get the earliest sequence number for the whole list of
+        // memtables, which will include immutable memtables as well, but that
+        // might be tricky to maintain in case we decide, in future, to do
+        // memtable compaction.
+        if (!last_try) {
+          SequenceNumber seq =
+              node->super_version->mem->GetEarliestSequenceNumber();
+          if (seq > *snapshot) {
+            retry = true;
+            break;
+          }
+        }
+      }
+      if (!retry) {
+        if (last_try) {
+          mutex_.Unlock();
+        }
+        break;
+      }
+    }
+  }
+
+  // Keep track of bytes that we read for statistics-recording later
+  PERF_TIMER_STOP(get_snapshot_time);
+
+  return last_try;
+}
+
+void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys,
+                      ColumnFamilyHandle** column_families, const Slice* keys,
+                      PinnableSlice* values, Status* statuses,
+                      const bool sorted_input) {
+  MultiGet(read_options, num_keys, column_families, keys, values,
+           /* timestamps */ nullptr, statuses, sorted_input);
+}
+
+void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys,
+                      ColumnFamilyHandle** column_families, const Slice* keys,
+                      PinnableSlice* values, std::string* timestamps,
+                      Status* statuses, const bool sorted_input) {
+  MultiGetCommon(read_options, num_keys, column_families, keys, values,
+                 /* columns */ nullptr, timestamps, statuses, sorted_input);
+}
+
+void DBImpl::MultiGetCommon(const ReadOptions& read_options,
+                            const size_t num_keys,
+                            ColumnFamilyHandle** column_families,
+                            const Slice* keys, PinnableSlice* values,
+                            PinnableWideColumns* columns,
+                            std::string* timestamps, Status* statuses,
+                            const bool sorted_input) {
+  if (num_keys == 0) {
+    return;
+  }
+
+  bool should_fail = false;
+  for (size_t i = 0; i < num_keys; ++i) {
+    ColumnFamilyHandle* cfh = column_families[i];
+    assert(cfh);
+    if (read_options.timestamp) {
+      statuses[i] = FailIfTsMismatchCf(cfh, *(read_options.timestamp),
+                                       /*ts_for_read=*/true);
+      if (!statuses[i].ok()) {
+        should_fail = true;
+      }
+    } else {
+      statuses[i] = FailIfCfHasTs(cfh);
+      if (!statuses[i].ok()) {
+        should_fail = true;
+      }
+    }
+  }
+  if (should_fail) {
+    for (size_t i = 0; i < num_keys; ++i) {
+      if (statuses[i].ok()) {
+        statuses[i] = Status::Incomplete(
+            "DB not queried due to invalid argument(s) in the same MultiGet");
+      }
+    }
+    return;
+  }
+
+  if (tracer_) {
+    // TODO: This mutex should be removed later, to improve performance when
+    // tracing is enabled.
+    InstrumentedMutexLock lock(&trace_mutex_);
+    if (tracer_) {
+      // TODO: maybe handle the tracing status?
+      tracer_->MultiGet(num_keys, column_families, keys).PermitUncheckedError();
+    }
+  }
+
+  autovector<KeyContext, MultiGetContext::MAX_BATCH_SIZE> key_context;
+  autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE> sorted_keys;
+  sorted_keys.resize(num_keys);
+  for (size_t i = 0; i < num_keys; ++i) {
+    PinnableSlice* val = nullptr;
+    PinnableWideColumns* col = nullptr;
+
+    if (values) {
+      val = &values[i];
+      val->Reset();
+    } else {
+      assert(columns);
+
+      col = &columns[i];
+      col->Reset();
+    }
+
+    key_context.emplace_back(column_families[i], keys[i], val, col,
+                             timestamps ? &timestamps[i] : nullptr,
+                             &statuses[i]);
+  }
+  for (size_t i = 0; i < num_keys; ++i) {
+    sorted_keys[i] = &key_context[i];
+  }
+  PrepareMultiGetKeys(num_keys, sorted_input, &sorted_keys);
+
+  autovector<MultiGetColumnFamilyData, MultiGetContext::MAX_BATCH_SIZE>
+      multiget_cf_data;
+  size_t cf_start = 0;
+  ColumnFamilyHandle* cf = sorted_keys[0]->column_family;
+
+  for (size_t i = 0; i < num_keys; ++i) {
+    KeyContext* key_ctx = sorted_keys[i];
+    if (key_ctx->column_family != cf) {
+      multiget_cf_data.emplace_back(cf, cf_start, i - cf_start, nullptr);
+      cf_start = i;
+      cf = key_ctx->column_family;
+    }
+  }
+
+  multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr);
+
+  std::function<MultiGetColumnFamilyData*(
+      autovector<MultiGetColumnFamilyData,
+                 MultiGetContext::MAX_BATCH_SIZE>::iterator&)>
+      iter_deref_lambda =
+          [](autovector<MultiGetColumnFamilyData,
+                        MultiGetContext::MAX_BATCH_SIZE>::iterator& cf_iter) {
+            return &(*cf_iter);
+          };
+
+  SequenceNumber consistent_seqnum;
+  bool unref_only = MultiCFSnapshot<
+      autovector<MultiGetColumnFamilyData, MultiGetContext::MAX_BATCH_SIZE>>(
+      read_options, nullptr, iter_deref_lambda, &multiget_cf_data,
+      &consistent_seqnum);
+
+  GetWithTimestampReadCallback timestamp_read_callback(0);
+  ReadCallback* read_callback = nullptr;
+  if (read_options.timestamp && read_options.timestamp->size() > 0) {
+    timestamp_read_callback.Refresh(consistent_seqnum);
+    read_callback = &timestamp_read_callback;
+  }
+
+  Status s;
+  auto cf_iter = multiget_cf_data.begin();
+  for (; cf_iter != multiget_cf_data.end(); ++cf_iter) {
+    s = MultiGetImpl(read_options, cf_iter->start, cf_iter->num_keys,
+                     &sorted_keys, cf_iter->super_version, consistent_seqnum,
+                     read_callback);
+    if (!s.ok()) {
+      break;
+    }
+  }
+  if (!s.ok()) {
+    assert(s.IsTimedOut() || s.IsAborted());
+    for (++cf_iter; cf_iter != multiget_cf_data.end(); ++cf_iter) {
+      for (size_t i = cf_iter->start; i < cf_iter->start + cf_iter->num_keys;
+           ++i) {
+        *sorted_keys[i]->s = s;
+      }
+    }
+  }
+
+  for (const auto& iter : multiget_cf_data) {
+    if (!unref_only) {
+      ReturnAndCleanupSuperVersion(iter.cfd, iter.super_version);
+    } else {
+      iter.cfd->GetSuperVersion()->Unref();
+    }
+  }
+}
+
+namespace {
+// Order keys by CF ID, followed by key contents
+struct CompareKeyContext {
+  inline bool operator()(const KeyContext* lhs, const KeyContext* rhs) {
+    ColumnFamilyHandleImpl* cfh =
+        static_cast<ColumnFamilyHandleImpl*>(lhs->column_family);
+    uint32_t cfd_id1 = cfh->cfd()->GetID();
+    const Comparator* comparator = cfh->cfd()->user_comparator();
+    cfh = static_cast<ColumnFamilyHandleImpl*>(rhs->column_family);
+    uint32_t cfd_id2 = cfh->cfd()->GetID();
+
+    if (cfd_id1 < cfd_id2) {
+      return true;
+    } else if (cfd_id1 > cfd_id2) {
+      return false;
+    }
+
+    // Both keys are from the same column family
+    int cmp = comparator->CompareWithoutTimestamp(
+        *(lhs->key), /*a_has_ts=*/false, *(rhs->key), /*b_has_ts=*/false);
+    if (cmp < 0) {
+      return true;
+    }
+    return false;
+  }
+};
+
+}  // anonymous namespace
+
+void DBImpl::PrepareMultiGetKeys(
+    size_t num_keys, bool sorted_input,
+    autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys) {
+  if (sorted_input) {
+#ifndef NDEBUG
+    assert(std::is_sorted(sorted_keys->begin(), sorted_keys->end(),
+                          CompareKeyContext()));
+#endif
+    return;
+  }
+
+  std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys,
+            CompareKeyContext());
+}
+
+void DBImpl::MultiGet(const ReadOptions& read_options,
+                      ColumnFamilyHandle* column_family, const size_t num_keys,
+                      const Slice* keys, PinnableSlice* values,
+                      Status* statuses, const bool sorted_input) {
+  MultiGet(read_options, column_family, num_keys, keys, values,
+           /* timestamps */ nullptr, statuses, sorted_input);
+}
+
+void DBImpl::MultiGet(const ReadOptions& read_options,
+                      ColumnFamilyHandle* column_family, const size_t num_keys,
+                      const Slice* keys, PinnableSlice* values,
+                      std::string* timestamps, Status* statuses,
+                      const bool sorted_input) {
+  MultiGetCommon(read_options, column_family, num_keys, keys, values,
+                 /* columns */ nullptr, timestamps, statuses, sorted_input);
+}
+
+void DBImpl::MultiGetCommon(const ReadOptions& read_options,
+                            ColumnFamilyHandle* column_family,
+                            const size_t num_keys, const Slice* keys,
+                            PinnableSlice* values, PinnableWideColumns* columns,
+                            std::string* timestamps, Status* statuses,
+                            bool sorted_input) {
+  if (tracer_) {
+    // TODO: This mutex should be removed later, to improve performance when
+    // tracing is enabled.
+    InstrumentedMutexLock lock(&trace_mutex_);
+    if (tracer_) {
+      // TODO: maybe handle the tracing status?
+      tracer_->MultiGet(num_keys, column_family, keys).PermitUncheckedError();
+    }
+  }
+  autovector<KeyContext, MultiGetContext::MAX_BATCH_SIZE> key_context;
+  autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE> sorted_keys;
+  sorted_keys.resize(num_keys);
+  for (size_t i = 0; i < num_keys; ++i) {
+    PinnableSlice* val = nullptr;
+    PinnableWideColumns* col = nullptr;
+
+    if (values) {
+      val = &values[i];
+      val->Reset();
+    } else {
+      assert(columns);
+
+      col = &columns[i];
+      col->Reset();
+    }
+
+    key_context.emplace_back(column_family, keys[i], val, col,
+                             timestamps ? &timestamps[i] : nullptr,
+                             &statuses[i]);
+  }
+  for (size_t i = 0; i < num_keys; ++i) {
+    sorted_keys[i] = &key_context[i];
+  }
+  PrepareMultiGetKeys(num_keys, sorted_input, &sorted_keys);
+  MultiGetWithCallback(read_options, column_family, nullptr, &sorted_keys);
+}
+
+void DBImpl::MultiGetWithCallback(
+    const ReadOptions& read_options, ColumnFamilyHandle* column_family,
+    ReadCallback* callback,
+    autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys) {
+  std::array<MultiGetColumnFamilyData, 1> multiget_cf_data;
+  multiget_cf_data[0] = MultiGetColumnFamilyData(column_family, nullptr);
+  std::function<MultiGetColumnFamilyData*(
+      std::array<MultiGetColumnFamilyData, 1>::iterator&)>
+      iter_deref_lambda =
+          [](std::array<MultiGetColumnFamilyData, 1>::iterator& cf_iter) {
+            return &(*cf_iter);
+          };
+
+  size_t num_keys = sorted_keys->size();
+  SequenceNumber consistent_seqnum;
+  bool unref_only = MultiCFSnapshot<std::array<MultiGetColumnFamilyData, 1>>(
+      read_options, callback, iter_deref_lambda, &multiget_cf_data,
+      &consistent_seqnum);
+#ifndef NDEBUG
+  assert(!unref_only);
+#else
+  // Silence unused variable warning
+  (void)unref_only;
+#endif  // NDEBUG
+
+  if (callback && read_options.snapshot == nullptr) {
+    // The unprep_seqs are not published for write unprepared, so it could be
+    // that max_visible_seq is larger. Seek to the std::max of the two.
+    // However, we still want our callback to contain the actual snapshot so
+    // that it can do the correct visibility filtering.
+    callback->Refresh(consistent_seqnum);
+
+    // Internally, WriteUnpreparedTxnReadCallback::Refresh would set
+    // max_visible_seq = max(max_visible_seq, snapshot)
+    //
+    // Currently, the commented out assert is broken by
+    // InvalidSnapshotReadCallback, but if write unprepared recovery followed
+    // the regular transaction flow, then this special read callback would not
+    // be needed.
+    //
+    // assert(callback->max_visible_seq() >= snapshot);
+    consistent_seqnum = callback->max_visible_seq();
+  }
+
+  GetWithTimestampReadCallback timestamp_read_callback(0);
+  ReadCallback* read_callback = callback;
+  if (read_options.timestamp && read_options.timestamp->size() > 0) {
+    assert(!read_callback);  // timestamp with callback is not supported
+    timestamp_read_callback.Refresh(consistent_seqnum);
+    read_callback = &timestamp_read_callback;
+  }
+
+  Status s = MultiGetImpl(read_options, 0, num_keys, sorted_keys,
+                          multiget_cf_data[0].super_version, consistent_seqnum,
+                          read_callback);
+  assert(s.ok() || s.IsTimedOut() || s.IsAborted());
+  ReturnAndCleanupSuperVersion(multiget_cf_data[0].cfd,
+                               multiget_cf_data[0].super_version);
+}
+
+// The actual implementation of batched MultiGet. Parameters -
+// start_key - Index in the sorted_keys vector to start processing from
+// num_keys - Number of keys to lookup, starting with sorted_keys[start_key]
+// sorted_keys - The entire batch of sorted keys for this CF
+//
+// The per key status is returned in the KeyContext structures pointed to by
+// sorted_keys. An overall Status is also returned, with the only possible
+// values being Status::OK() and Status::TimedOut(). The latter indicates
+// that the call exceeded read_options.deadline
+Status DBImpl::MultiGetImpl(
+    const ReadOptions& read_options, size_t start_key, size_t num_keys,
+    autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys,
+    SuperVersion* super_version, SequenceNumber snapshot,
+    ReadCallback* callback) {
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call MultiGet with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+  PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock);
+  StopWatch sw(immutable_db_options_.clock, stats_, DB_MULTIGET);
+
+  assert(sorted_keys);
+  // Clear the timestamps for returning results so that we can distinguish
+  // between tombstone or key that has never been written
+  for (auto* kctx : *sorted_keys) {
+    assert(kctx);
+    if (kctx->timestamp) {
+      kctx->timestamp->clear();
+    }
+  }
+
+  // For each of the given keys, apply the entire "get" process as follows:
+  // First look in the memtable, then in the immutable memtable (if any).
+  // s is both in/out. When in, s could either be OK or MergeInProgress.
+  // merge_operands will contain the sequence of merges in the latter case.
+  size_t keys_left = num_keys;
+  Status s;
+  uint64_t curr_value_size = 0;
+  while (keys_left) {
+    if (read_options.deadline.count() &&
+        immutable_db_options_.clock->NowMicros() >
+            static_cast<uint64_t>(read_options.deadline.count())) {
+      s = Status::TimedOut();
+      break;
+    }
+
+    size_t batch_size = (keys_left > MultiGetContext::MAX_BATCH_SIZE)
+                            ? MultiGetContext::MAX_BATCH_SIZE
+                            : keys_left;
+    MultiGetContext ctx(sorted_keys, start_key + num_keys - keys_left,
+                        batch_size, snapshot, read_options, GetFileSystem(),
+                        stats_);
+    MultiGetRange range = ctx.GetMultiGetRange();
+    range.AddValueSize(curr_value_size);
+    bool lookup_current = false;
+
+    keys_left -= batch_size;
+    for (auto mget_iter = range.begin(); mget_iter != range.end();
+         ++mget_iter) {
+      mget_iter->merge_context.Clear();
+      *mget_iter->s = Status::OK();
+    }
+
+    bool skip_memtable =
+        (read_options.read_tier == kPersistedTier &&
+         has_unpersisted_data_.load(std::memory_order_relaxed));
+    if (!skip_memtable) {
+      super_version->mem->MultiGet(read_options, &range, callback,
+                                   false /* immutable_memtable */);
+      if (!range.empty()) {
+        super_version->imm->MultiGet(read_options, &range, callback);
+      }
+      if (!range.empty()) {
+        lookup_current = true;
+        uint64_t left = range.KeysLeft();
+        RecordTick(stats_, MEMTABLE_MISS, left);
+      }
+    }
+    if (lookup_current) {
+      PERF_TIMER_GUARD(get_from_output_files_time);
+      super_version->current->MultiGet(read_options, &range, callback);
+    }
+    curr_value_size = range.GetValueSize();
+    if (curr_value_size > read_options.value_size_soft_limit) {
+      s = Status::Aborted();
+      break;
+    }
+  }
+
+  // Post processing (decrement reference counts and record statistics)
+  PERF_TIMER_GUARD(get_post_process_time);
+  size_t num_found = 0;
+  uint64_t bytes_read = 0;
+  for (size_t i = start_key; i < start_key + num_keys - keys_left; ++i) {
+    KeyContext* key = (*sorted_keys)[i];
+    assert(key);
+    assert(key->s);
+
+    if (key->s->ok()) {
+      if (key->value) {
+        bytes_read += key->value->size();
+      } else {
+        assert(key->columns);
+        bytes_read += key->columns->serialized_size();
+      }
+
+      num_found++;
+    }
+  }
+  if (keys_left) {
+    assert(s.IsTimedOut() || s.IsAborted());
+    for (size_t i = start_key + num_keys - keys_left; i < start_key + num_keys;
+         ++i) {
+      KeyContext* key = (*sorted_keys)[i];
+      *key->s = s;
+    }
+  }
+
+  RecordTick(stats_, NUMBER_MULTIGET_CALLS);
+  RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys);
+  RecordTick(stats_, NUMBER_MULTIGET_KEYS_FOUND, num_found);
+  RecordTick(stats_, NUMBER_MULTIGET_BYTES_READ, bytes_read);
+  RecordInHistogram(stats_, BYTES_PER_MULTIGET, bytes_read);
+  PERF_COUNTER_ADD(multiget_read_bytes, bytes_read);
+  PERF_TIMER_STOP(get_post_process_time);
+
+  return s;
+}
+
+void DBImpl::MultiGetEntity(const ReadOptions& options, size_t num_keys,
+                            ColumnFamilyHandle** column_families,
+                            const Slice* keys, PinnableWideColumns* results,
+                            Status* statuses, bool sorted_input) {
+  MultiGetCommon(options, num_keys, column_families, keys, /* values */ nullptr,
+                 results, /* timestamps */ nullptr, statuses, sorted_input);
+}
+
+void DBImpl::MultiGetEntity(const ReadOptions& options,
+                            ColumnFamilyHandle* column_family, size_t num_keys,
+                            const Slice* keys, PinnableWideColumns* results,
+                            Status* statuses, bool sorted_input) {
+  MultiGetCommon(options, column_family, num_keys, keys, /* values */ nullptr,
+                 results, /* timestamps */ nullptr, statuses, sorted_input);
+}
+
+Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& cf_options,
+                                  const std::string& column_family,
+                                  ColumnFamilyHandle** handle) {
+  assert(handle != nullptr);
+  Status s = CreateColumnFamilyImpl(cf_options, column_family, handle);
+  if (s.ok()) {
+    s = WriteOptionsFile(true /*need_mutex_lock*/,
+                         true /*need_enter_write_thread*/);
+  }
+  return s;
+}
+
+Status DBImpl::CreateColumnFamilies(
+    const ColumnFamilyOptions& cf_options,
+    const std::vector<std::string>& column_family_names,
+    std::vector<ColumnFamilyHandle*>* handles) {
+  assert(handles != nullptr);
+  handles->clear();
+  size_t num_cf = column_family_names.size();
+  Status s;
+  bool success_once = false;
+  for (size_t i = 0; i < num_cf; i++) {
+    ColumnFamilyHandle* handle;
+    s = CreateColumnFamilyImpl(cf_options, column_family_names[i], &handle);
+    if (!s.ok()) {
+      break;
+    }
+    handles->push_back(handle);
+    success_once = true;
+  }
+  if (success_once) {
+    Status persist_options_status = WriteOptionsFile(
+        true /*need_mutex_lock*/, true /*need_enter_write_thread*/);
+    if (s.ok() && !persist_options_status.ok()) {
+      s = persist_options_status;
+    }
+  }
+  return s;
+}
+
+Status DBImpl::CreateColumnFamilies(
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    std::vector<ColumnFamilyHandle*>* handles) {
+  assert(handles != nullptr);
+  handles->clear();
+  size_t num_cf = column_families.size();
+  Status s;
+  bool success_once = false;
+  for (size_t i = 0; i < num_cf; i++) {
+    ColumnFamilyHandle* handle;
+    s = CreateColumnFamilyImpl(column_families[i].options,
+                               column_families[i].name, &handle);
+    if (!s.ok()) {
+      break;
+    }
+    handles->push_back(handle);
+    success_once = true;
+  }
+  if (success_once) {
+    Status persist_options_status = WriteOptionsFile(
+        true /*need_mutex_lock*/, true /*need_enter_write_thread*/);
+    if (s.ok() && !persist_options_status.ok()) {
+      s = persist_options_status;
+    }
+  }
+  return s;
+}
+
+Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options,
+                                      const std::string& column_family_name,
+                                      ColumnFamilyHandle** handle) {
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  Status s;
+  *handle = nullptr;
+
+  DBOptions db_options =
+      BuildDBOptions(immutable_db_options_, mutable_db_options_);
+  s = ColumnFamilyData::ValidateOptions(db_options, cf_options);
+  if (s.ok()) {
+    for (auto& cf_path : cf_options.cf_paths) {
+      s = env_->CreateDirIfMissing(cf_path.path);
+      if (!s.ok()) {
+        break;
+      }
+    }
+  }
+  if (!s.ok()) {
+    return s;
+  }
+
+  SuperVersionContext sv_context(/* create_superversion */ true);
+  {
+    InstrumentedMutexLock l(&mutex_);
+
+    if (versions_->GetColumnFamilySet()->GetColumnFamily(column_family_name) !=
+        nullptr) {
+      return Status::InvalidArgument("Column family already exists");
+    }
+    VersionEdit edit;
+    edit.AddColumnFamily(column_family_name);
+    uint32_t new_id = versions_->GetColumnFamilySet()->GetNextColumnFamilyID();
+    edit.SetColumnFamily(new_id);
+    edit.SetLogNumber(logfile_number_);
+    edit.SetComparatorName(cf_options.comparator->Name());
+
+    // LogAndApply will both write the creation in MANIFEST and create
+    // ColumnFamilyData object
+    {  // write thread
+      WriteThread::Writer w;
+      write_thread_.EnterUnbatched(&w, &mutex_);
+      // LogAndApply will both write the creation in MANIFEST and create
+      // ColumnFamilyData object
+      s = versions_->LogAndApply(nullptr, MutableCFOptions(cf_options),
+                                 read_options, &edit, &mutex_,
+                                 directories_.GetDbDir(), false, &cf_options);
+      write_thread_.ExitUnbatched(&w);
+    }
+    if (s.ok()) {
+      auto* cfd =
+          versions_->GetColumnFamilySet()->GetColumnFamily(column_family_name);
+      assert(cfd != nullptr);
+      std::map<std::string, std::shared_ptr<FSDirectory>> dummy_created_dirs;
+      s = cfd->AddDirectories(&dummy_created_dirs);
+    }
+    if (s.ok()) {
+      auto* cfd =
+          versions_->GetColumnFamilySet()->GetColumnFamily(column_family_name);
+      assert(cfd != nullptr);
+      InstallSuperVersionAndScheduleWork(cfd, &sv_context,
+                                         *cfd->GetLatestMutableCFOptions());
+
+      if (!cfd->mem()->IsSnapshotSupported()) {
+        is_snapshot_supported_ = false;
+      }
+
+      cfd->set_initialized();
+
+      *handle = new ColumnFamilyHandleImpl(cfd, this, &mutex_);
+      ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                     "Created column family [%s] (ID %u)",
+                     column_family_name.c_str(), (unsigned)cfd->GetID());
+    } else {
+      ROCKS_LOG_ERROR(immutable_db_options_.info_log,
+                      "Creating column family [%s] FAILED -- %s",
+                      column_family_name.c_str(), s.ToString().c_str());
+    }
+  }  // InstrumentedMutexLock l(&mutex_)
+
+  if (cf_options.preserve_internal_time_seconds > 0 ||
+      cf_options.preclude_last_level_data_seconds > 0) {
+    s = RegisterRecordSeqnoTimeWorker();
+  }
+  sv_context.Clean();
+  // this is outside the mutex
+  if (s.ok()) {
+    NewThreadStatusCfInfo(
+        static_cast_with_check<ColumnFamilyHandleImpl>(*handle)->cfd());
+  }
+  return s;
+}
+
+Status DBImpl::DropColumnFamily(ColumnFamilyHandle* column_family) {
+  assert(column_family != nullptr);
+  Status s = DropColumnFamilyImpl(column_family);
+  if (s.ok()) {
+    s = WriteOptionsFile(true /*need_mutex_lock*/,
+                         true /*need_enter_write_thread*/);
+  }
+  return s;
+}
+
+Status DBImpl::DropColumnFamilies(
+    const std::vector<ColumnFamilyHandle*>& column_families) {
+  Status s;
+  bool success_once = false;
+  for (auto* handle : column_families) {
+    s = DropColumnFamilyImpl(handle);
+    if (!s.ok()) {
+      break;
+    }
+    success_once = true;
+  }
+  if (success_once) {
+    Status persist_options_status = WriteOptionsFile(
+        true /*need_mutex_lock*/, true /*need_enter_write_thread*/);
+    if (s.ok() && !persist_options_status.ok()) {
+      s = persist_options_status;
+    }
+  }
+  return s;
+}
+
+Status DBImpl::DropColumnFamilyImpl(ColumnFamilyHandle* column_family) {
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  auto cfd = cfh->cfd();
+  if (cfd->GetID() == 0) {
+    return Status::InvalidArgument("Can't drop default column family");
+  }
+
+  bool cf_support_snapshot = cfd->mem()->IsSnapshotSupported();
+
+  VersionEdit edit;
+  edit.DropColumnFamily();
+  edit.SetColumnFamily(cfd->GetID());
+
+  Status s;
+  {
+    InstrumentedMutexLock l(&mutex_);
+    if (cfd->IsDropped()) {
+      s = Status::InvalidArgument("Column family already dropped!\n");
+    }
+    if (s.ok()) {
+      // we drop column family from a single write thread
+      WriteThread::Writer w;
+      write_thread_.EnterUnbatched(&w, &mutex_);
+      s = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
+                                 read_options, &edit, &mutex_,
+                                 directories_.GetDbDir());
+      write_thread_.ExitUnbatched(&w);
+    }
+    if (s.ok()) {
+      auto* mutable_cf_options = cfd->GetLatestMutableCFOptions();
+      max_total_in_memory_state_ -= mutable_cf_options->write_buffer_size *
+                                    mutable_cf_options->max_write_buffer_number;
+    }
+
+    if (!cf_support_snapshot) {
+      // Dropped Column Family doesn't support snapshot. Need to recalculate
+      // is_snapshot_supported_.
+      bool new_is_snapshot_supported = true;
+      for (auto c : *versions_->GetColumnFamilySet()) {
+        if (!c->IsDropped() && !c->mem()->IsSnapshotSupported()) {
+          new_is_snapshot_supported = false;
+          break;
+        }
+      }
+      is_snapshot_supported_ = new_is_snapshot_supported;
+    }
+    bg_cv_.SignalAll();
+  }
+
+  if (cfd->ioptions()->preserve_internal_time_seconds > 0 ||
+      cfd->ioptions()->preclude_last_level_data_seconds > 0) {
+    s = RegisterRecordSeqnoTimeWorker();
+  }
+
+  if (s.ok()) {
+    // Note that here we erase the associated cf_info of the to-be-dropped
+    // cfd before its ref-count goes to zero to avoid having to erase cf_info
+    // later inside db_mutex.
+    EraseThreadStatusCfInfo(cfd);
+    assert(cfd->IsDropped());
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "Dropped column family with id %u\n", cfd->GetID());
+  } else {
+    ROCKS_LOG_ERROR(immutable_db_options_.info_log,
+                    "Dropping column family with id %u FAILED -- %s\n",
+                    cfd->GetID(), s.ToString().c_str());
+  }
+
+  return s;
+}
+
+bool DBImpl::KeyMayExist(const ReadOptions& read_options,
+                         ColumnFamilyHandle* column_family, const Slice& key,
+                         std::string* value, std::string* timestamp,
+                         bool* value_found) {
+  assert(value != nullptr);
+  assert(read_options.io_activity == Env::IOActivity::kUnknown);
+
+  if (value_found != nullptr) {
+    // falsify later if key-may-exist but can't fetch value
+    *value_found = true;
+  }
+  // TODO: plumb Env::IOActivity
+  ReadOptions roptions = read_options;
+  roptions.read_tier = kBlockCacheTier;  // read from block cache only
+  PinnableSlice pinnable_val;
+  GetImplOptions get_impl_options;
+  get_impl_options.column_family = column_family;
+  get_impl_options.value = &pinnable_val;
+  get_impl_options.value_found = value_found;
+  get_impl_options.timestamp = timestamp;
+  auto s = GetImpl(roptions, key, get_impl_options);
+  value->assign(pinnable_val.data(), pinnable_val.size());
+
+  // If block_cache is enabled and the index block of the table didn't
+  // not present in block_cache, the return value will be Status::Incomplete.
+  // In this case, key may still exist in the table.
+  return s.ok() || s.IsIncomplete();
+}
+
+Iterator* DBImpl::NewIterator(const ReadOptions& read_options,
+                              ColumnFamilyHandle* column_family) {
+  if (read_options.managed) {
+    return NewErrorIterator(
+        Status::NotSupported("Managed iterator is not supported anymore."));
+  }
+  Iterator* result = nullptr;
+  if (read_options.read_tier == kPersistedTier) {
+    return NewErrorIterator(Status::NotSupported(
+        "ReadTier::kPersistedData is not yet supported in iterators."));
+  }
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return NewErrorIterator(Status::InvalidArgument(
+        "Cannot call NewIterator with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`"));
+  }
+
+  assert(column_family);
+
+  if (read_options.timestamp) {
+    const Status s = FailIfTsMismatchCf(
+        column_family, *(read_options.timestamp), /*ts_for_read=*/true);
+    if (!s.ok()) {
+      return NewErrorIterator(s);
+    }
+  } else {
+    const Status s = FailIfCfHasTs(column_family);
+    if (!s.ok()) {
+      return NewErrorIterator(s);
+    }
+  }
+
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  ColumnFamilyData* cfd = cfh->cfd();
+  assert(cfd != nullptr);
+  ReadCallback* read_callback = nullptr;  // No read callback provided.
+  if (read_options.tailing) {
+    SuperVersion* sv = cfd->GetReferencedSuperVersion(this);
+    auto iter = new ForwardIterator(this, read_options, cfd, sv,
+                                    /* allow_unprepared_value */ true);
+    result = NewDBIterator(
+        env_, read_options, *cfd->ioptions(), sv->mutable_cf_options,
+        cfd->user_comparator(), iter, sv->current, kMaxSequenceNumber,
+        sv->mutable_cf_options.max_sequential_skip_in_iterations, read_callback,
+        this, cfd);
+  } else {
+    // Note: no need to consider the special case of
+    // last_seq_same_as_publish_seq_==false since NewIterator is overridden in
+    // WritePreparedTxnDB
+    result = NewIteratorImpl(read_options, cfd,
+                             (read_options.snapshot != nullptr)
+                                 ? read_options.snapshot->GetSequenceNumber()
+                                 : kMaxSequenceNumber,
+                             read_callback);
+  }
+  return result;
+}
+
+ArenaWrappedDBIter* DBImpl::NewIteratorImpl(const ReadOptions& read_options,
+                                            ColumnFamilyData* cfd,
+                                            SequenceNumber snapshot,
+                                            ReadCallback* read_callback,
+                                            bool expose_blob_index,
+                                            bool allow_refresh) {
+  SuperVersion* sv = cfd->GetReferencedSuperVersion(this);
+
+  TEST_SYNC_POINT("DBImpl::NewIterator:1");
+  TEST_SYNC_POINT("DBImpl::NewIterator:2");
+
+  if (snapshot == kMaxSequenceNumber) {
+    // Note that the snapshot is assigned AFTER referencing the super
+    // version because otherwise a flush happening in between may compact away
+    // data for the snapshot, so the reader would see neither data that was be
+    // visible to the snapshot before compaction nor the newer data inserted
+    // afterwards.
+    // Note that the super version might not contain all the data available
+    // to this snapshot, but in that case it can see all the data in the
+    // super version, which is a valid consistent state after the user
+    // calls NewIterator().
+    snapshot = versions_->LastSequence();
+    TEST_SYNC_POINT("DBImpl::NewIterator:3");
+    TEST_SYNC_POINT("DBImpl::NewIterator:4");
+  }
+
+  // Try to generate a DB iterator tree in continuous memory area to be
+  // cache friendly. Here is an example of result:
+  // +-------------------------------+
+  // |                               |
+  // | ArenaWrappedDBIter            |
+  // |  +                            |
+  // |  +---> Inner Iterator   ------------+
+  // |  |                            |     |
+  // |  |    +-- -- -- -- -- -- -- --+     |
+  // |  +--- | Arena                 |     |
+  // |       |                       |     |
+  // |          Allocated Memory:    |     |
+  // |       |   +-------------------+     |
+  // |       |   | DBIter            | <---+
+  // |           |  +                |
+  // |       |   |  +-> iter_  ------------+
+  // |       |   |                   |     |
+  // |       |   +-------------------+     |
+  // |       |   | MergingIterator   | <---+
+  // |           |  +                |
+  // |       |   |  +->child iter1  ------------+
+  // |       |   |  |                |          |
+  // |           |  +->child iter2  ----------+ |
+  // |       |   |  |                |        | |
+  // |       |   |  +->child iter3  --------+ | |
+  // |           |                   |      | | |
+  // |       |   +-------------------+      | | |
+  // |       |   | Iterator1         | <--------+
+  // |       |   +-------------------+      | |
+  // |       |   | Iterator2         | <------+
+  // |       |   +-------------------+      |
+  // |       |   | Iterator3         | <----+
+  // |       |   +-------------------+
+  // |       |                       |
+  // +-------+-----------------------+
+  //
+  // ArenaWrappedDBIter inlines an arena area where all the iterators in
+  // the iterator tree are allocated in the order of being accessed when
+  // querying.
+  // Laying out the iterators in the order of being accessed makes it more
+  // likely that any iterator pointer is close to the iterator it points to so
+  // that they are likely to be in the same cache line and/or page.
+  ArenaWrappedDBIter* db_iter = NewArenaWrappedDbIterator(
+      env_, read_options, *cfd->ioptions(), sv->mutable_cf_options, sv->current,
+      snapshot, sv->mutable_cf_options.max_sequential_skip_in_iterations,
+      sv->version_number, read_callback, this, cfd, expose_blob_index,
+      read_options.snapshot != nullptr ? false : allow_refresh);
+
+  InternalIterator* internal_iter = NewInternalIterator(
+      db_iter->GetReadOptions(), cfd, sv, db_iter->GetArena(), snapshot,
+      /* allow_unprepared_value */ true, db_iter);
+  db_iter->SetIterUnderDBIter(internal_iter);
+
+  return db_iter;
+}
+
+Status DBImpl::NewIterators(
+    const ReadOptions& read_options,
+    const std::vector<ColumnFamilyHandle*>& column_families,
+    std::vector<Iterator*>* iterators) {
+  if (read_options.managed) {
+    return Status::NotSupported("Managed iterator is not supported anymore.");
+  }
+  if (read_options.read_tier == kPersistedTier) {
+    return Status::NotSupported(
+        "ReadTier::kPersistedData is not yet supported in iterators.");
+  }
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call NewIterators with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+
+  if (read_options.timestamp) {
+    for (auto* cf : column_families) {
+      assert(cf);
+      const Status s = FailIfTsMismatchCf(cf, *(read_options.timestamp),
+                                          /*ts_for_read=*/true);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  } else {
+    for (auto* cf : column_families) {
+      assert(cf);
+      const Status s = FailIfCfHasTs(cf);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  }
+
+  ReadCallback* read_callback = nullptr;  // No read callback provided.
+  iterators->clear();
+  iterators->reserve(column_families.size());
+  if (read_options.tailing) {
+    for (auto cfh : column_families) {
+      auto cfd = static_cast_with_check<ColumnFamilyHandleImpl>(cfh)->cfd();
+      SuperVersion* sv = cfd->GetReferencedSuperVersion(this);
+      auto iter = new ForwardIterator(this, read_options, cfd, sv,
+                                      /* allow_unprepared_value */ true);
+      iterators->push_back(NewDBIterator(
+          env_, read_options, *cfd->ioptions(), sv->mutable_cf_options,
+          cfd->user_comparator(), iter, sv->current, kMaxSequenceNumber,
+          sv->mutable_cf_options.max_sequential_skip_in_iterations,
+          read_callback, this, cfd));
+    }
+  } else {
+    // Note: no need to consider the special case of
+    // last_seq_same_as_publish_seq_==false since NewIterators is overridden in
+    // WritePreparedTxnDB
+    auto snapshot = read_options.snapshot != nullptr
+                        ? read_options.snapshot->GetSequenceNumber()
+                        : versions_->LastSequence();
+    for (size_t i = 0; i < column_families.size(); ++i) {
+      auto* cfd =
+          static_cast_with_check<ColumnFamilyHandleImpl>(column_families[i])
+              ->cfd();
+      iterators->push_back(
+          NewIteratorImpl(read_options, cfd, snapshot, read_callback));
+    }
+  }
+
+  return Status::OK();
+}
+
+const Snapshot* DBImpl::GetSnapshot() { return GetSnapshotImpl(false); }
+
+const Snapshot* DBImpl::GetSnapshotForWriteConflictBoundary() {
+  return GetSnapshotImpl(true);
+}
+
+std::pair<Status, std::shared_ptr<const Snapshot>>
+DBImpl::CreateTimestampedSnapshot(SequenceNumber snapshot_seq, uint64_t ts) {
+  assert(ts != std::numeric_limits<uint64_t>::max());
+
+  auto ret = CreateTimestampedSnapshotImpl(snapshot_seq, ts, /*lock=*/true);
+  return ret;
+}
+
+std::shared_ptr<const SnapshotImpl> DBImpl::GetTimestampedSnapshot(
+    uint64_t ts) const {
+  InstrumentedMutexLock lock_guard(&mutex_);
+  return timestamped_snapshots_.GetSnapshot(ts);
+}
+
+void DBImpl::ReleaseTimestampedSnapshotsOlderThan(uint64_t ts,
+                                                  size_t* remaining_total_ss) {
+  autovector<std::shared_ptr<const SnapshotImpl>> snapshots_to_release;
+  {
+    InstrumentedMutexLock lock_guard(&mutex_);
+    timestamped_snapshots_.ReleaseSnapshotsOlderThan(ts, snapshots_to_release);
+  }
+  snapshots_to_release.clear();
+
+  if (remaining_total_ss) {
+    InstrumentedMutexLock lock_guard(&mutex_);
+    *remaining_total_ss = static_cast<size_t>(snapshots_.count());
+  }
+}
+
+Status DBImpl::GetTimestampedSnapshots(
+    uint64_t ts_lb, uint64_t ts_ub,
+    std::vector<std::shared_ptr<const Snapshot>>& timestamped_snapshots) const {
+  if (ts_lb >= ts_ub) {
+    return Status::InvalidArgument(
+        "timestamp lower bound must be smaller than upper bound");
+  }
+  timestamped_snapshots.clear();
+  InstrumentedMutexLock lock_guard(&mutex_);
+  timestamped_snapshots_.GetSnapshots(ts_lb, ts_ub, timestamped_snapshots);
+  return Status::OK();
+}
+
+SnapshotImpl* DBImpl::GetSnapshotImpl(bool is_write_conflict_boundary,
+                                      bool lock) {
+  int64_t unix_time = 0;
+  immutable_db_options_.clock->GetCurrentTime(&unix_time)
+      .PermitUncheckedError();  // Ignore error
+  SnapshotImpl* s = new SnapshotImpl;
+
+  if (lock) {
+    mutex_.Lock();
+  } else {
+    mutex_.AssertHeld();
+  }
+  // returns null if the underlying memtable does not support snapshot.
+  if (!is_snapshot_supported_) {
+    if (lock) {
+      mutex_.Unlock();
+    }
+    delete s;
+    return nullptr;
+  }
+  auto snapshot_seq = GetLastPublishedSequence();
+  SnapshotImpl* snapshot =
+      snapshots_.New(s, snapshot_seq, unix_time, is_write_conflict_boundary);
+  if (lock) {
+    mutex_.Unlock();
+  }
+  return snapshot;
+}
+
+std::pair<Status, std::shared_ptr<const SnapshotImpl>>
+DBImpl::CreateTimestampedSnapshotImpl(SequenceNumber snapshot_seq, uint64_t ts,
+                                      bool lock) {
+  int64_t unix_time = 0;
+  immutable_db_options_.clock->GetCurrentTime(&unix_time)
+      .PermitUncheckedError();  // Ignore error
+  SnapshotImpl* s = new SnapshotImpl;
+
+  const bool need_update_seq = (snapshot_seq != kMaxSequenceNumber);
+
+  if (lock) {
+    mutex_.Lock();
+  } else {
+    mutex_.AssertHeld();
+  }
+  // returns null if the underlying memtable does not support snapshot.
+  if (!is_snapshot_supported_) {
+    if (lock) {
+      mutex_.Unlock();
+    }
+    delete s;
+    return std::make_pair(
+        Status::NotSupported("Memtable does not support snapshot"), nullptr);
+  }
+
+  // Caller is not write thread, thus didn't provide a valid snapshot_seq.
+  // Obtain seq from db.
+  if (!need_update_seq) {
+    snapshot_seq = GetLastPublishedSequence();
+  }
+
+  std::shared_ptr<const SnapshotImpl> latest =
+      timestamped_snapshots_.GetSnapshot(std::numeric_limits<uint64_t>::max());
+
+  // If there is already a latest timestamped snapshot, then we need to do some
+  // checks.
+  if (latest) {
+    uint64_t latest_snap_ts = latest->GetTimestamp();
+    SequenceNumber latest_snap_seq = latest->GetSequenceNumber();
+    assert(latest_snap_seq <= snapshot_seq);
+    bool needs_create_snap = true;
+    Status status;
+    std::shared_ptr<const SnapshotImpl> ret;
+    if (latest_snap_ts > ts) {
+      // A snapshot created later cannot have smaller timestamp than a previous
+      // timestamped snapshot.
+      needs_create_snap = false;
+      std::ostringstream oss;
+      oss << "snapshot exists with larger timestamp " << latest_snap_ts << " > "
+          << ts;
+      status = Status::InvalidArgument(oss.str());
+    } else if (latest_snap_ts == ts) {
+      if (latest_snap_seq == snapshot_seq) {
+        // We are requesting the same sequence number and timestamp, thus can
+        // safely reuse (share) the current latest timestamped snapshot.
+        needs_create_snap = false;
+        ret = latest;
+      } else if (latest_snap_seq < snapshot_seq) {
+        // There may have been writes to the database since the latest
+        // timestamped snapshot, yet we are still requesting the same
+        // timestamp. In this case, we cannot create the new timestamped
+        // snapshot.
+        needs_create_snap = false;
+        std::ostringstream oss;
+        oss << "Allocated seq is " << snapshot_seq
+            << ", while snapshot exists with smaller seq " << latest_snap_seq
+            << " but same timestamp " << ts;
+        status = Status::InvalidArgument(oss.str());
+      }
+    }
+    if (!needs_create_snap) {
+      if (lock) {
+        mutex_.Unlock();
+      }
+      delete s;
+      return std::make_pair(status, ret);
+    } else {
+      status.PermitUncheckedError();
+    }
+  }
+
+  SnapshotImpl* snapshot =
+      snapshots_.New(s, snapshot_seq, unix_time,
+                     /*is_write_conflict_boundary=*/true, ts);
+
+  std::shared_ptr<const SnapshotImpl> ret(
+      snapshot,
+      std::bind(&DBImpl::ReleaseSnapshot, this, std::placeholders::_1));
+  timestamped_snapshots_.AddSnapshot(ret);
+
+  // Caller is from write thread, and we need to update database's sequence
+  // number.
+  if (need_update_seq) {
+    assert(versions_);
+    if (last_seq_same_as_publish_seq_) {
+      versions_->SetLastSequence(snapshot_seq);
+    } else {
+      // TODO: support write-prepared/write-unprepared transactions with two
+      // write queues.
+      assert(false);
+    }
+  }
+
+  if (lock) {
+    mutex_.Unlock();
+  }
+  return std::make_pair(Status::OK(), ret);
+}
+
+namespace {
+using CfdList = autovector<ColumnFamilyData*, 2>;
+bool CfdListContains(const CfdList& list, ColumnFamilyData* cfd) {
+  for (const ColumnFamilyData* t : list) {
+    if (t == cfd) {
+      return true;
+    }
+  }
+  return false;
+}
+}  //  namespace
+
+void DBImpl::ReleaseSnapshot(const Snapshot* s) {
+  if (s == nullptr) {
+    // DBImpl::GetSnapshot() can return nullptr when snapshot
+    // not supported by specifying the condition:
+    // inplace_update_support enabled.
+    return;
+  }
+  const SnapshotImpl* casted_s = reinterpret_cast<const SnapshotImpl*>(s);
+  {
+    InstrumentedMutexLock l(&mutex_);
+    snapshots_.Delete(casted_s);
+    uint64_t oldest_snapshot;
+    if (snapshots_.empty()) {
+      oldest_snapshot = GetLastPublishedSequence();
+    } else {
+      oldest_snapshot = snapshots_.oldest()->number_;
+    }
+    // Avoid to go through every column family by checking a global threshold
+    // first.
+    if (oldest_snapshot > bottommost_files_mark_threshold_) {
+      CfdList cf_scheduled;
+      for (auto* cfd : *versions_->GetColumnFamilySet()) {
+        if (!cfd->ioptions()->allow_ingest_behind) {
+          cfd->current()->storage_info()->UpdateOldestSnapshot(oldest_snapshot);
+          if (!cfd->current()
+                   ->storage_info()
+                   ->BottommostFilesMarkedForCompaction()
+                   .empty()) {
+            SchedulePendingCompaction(cfd);
+            MaybeScheduleFlushOrCompaction();
+            cf_scheduled.push_back(cfd);
+          }
+        }
+      }
+
+      // Calculate a new threshold, skipping those CFs where compactions are
+      // scheduled. We do not do the same pass as the previous loop because
+      // mutex might be unlocked during the loop, making the result inaccurate.
+      SequenceNumber new_bottommost_files_mark_threshold = kMaxSequenceNumber;
+      for (auto* cfd : *versions_->GetColumnFamilySet()) {
+        if (CfdListContains(cf_scheduled, cfd) ||
+            cfd->ioptions()->allow_ingest_behind) {
+          continue;
+        }
+        new_bottommost_files_mark_threshold = std::min(
+            new_bottommost_files_mark_threshold,
+            cfd->current()->storage_info()->bottommost_files_mark_threshold());
+      }
+      bottommost_files_mark_threshold_ = new_bottommost_files_mark_threshold;
+    }
+  }
+  delete casted_s;
+}
+
+Status DBImpl::GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
+                                        TablePropertiesCollection* props) {
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  auto cfd = cfh->cfd();
+
+  // Increment the ref count
+  mutex_.Lock();
+  auto version = cfd->current();
+  version->Ref();
+  mutex_.Unlock();
+
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  auto s = version->GetPropertiesOfAllTables(read_options, props);
+
+  // Decrement the ref count
+  mutex_.Lock();
+  version->Unref();
+  mutex_.Unlock();
+
+  return s;
+}
+
+Status DBImpl::GetPropertiesOfTablesInRange(ColumnFamilyHandle* column_family,
+                                            const Range* range, std::size_t n,
+                                            TablePropertiesCollection* props) {
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  auto cfd = cfh->cfd();
+
+  // Increment the ref count
+  mutex_.Lock();
+  auto version = cfd->current();
+  version->Ref();
+  mutex_.Unlock();
+
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  auto s = version->GetPropertiesOfTablesInRange(read_options, range, n, props);
+
+  // Decrement the ref count
+  mutex_.Lock();
+  version->Unref();
+  mutex_.Unlock();
+
+  return s;
+}
+
+
+const std::string& DBImpl::GetName() const { return dbname_; }
+
+Env* DBImpl::GetEnv() const { return env_; }
+
+FileSystem* DB::GetFileSystem() const {
+  const auto& fs = GetEnv()->GetFileSystem();
+  return fs.get();
+}
+
+FileSystem* DBImpl::GetFileSystem() const {
+  return immutable_db_options_.fs.get();
+}
+
+SystemClock* DBImpl::GetSystemClock() const {
+  return immutable_db_options_.clock;
+}
+
+
+Status DBImpl::StartIOTrace(const TraceOptions& trace_options,
+                            std::unique_ptr<TraceWriter>&& trace_writer) {
+  assert(trace_writer != nullptr);
+  return io_tracer_->StartIOTrace(GetSystemClock(), trace_options,
+                                  std::move(trace_writer));
+}
+
+Status DBImpl::EndIOTrace() {
+  io_tracer_->EndIOTrace();
+  return Status::OK();
+}
+
+
+Options DBImpl::GetOptions(ColumnFamilyHandle* column_family) const {
+  InstrumentedMutexLock l(&mutex_);
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  return Options(BuildDBOptions(immutable_db_options_, mutable_db_options_),
+                 cfh->cfd()->GetLatestCFOptions());
+}
+
+DBOptions DBImpl::GetDBOptions() const {
+  InstrumentedMutexLock l(&mutex_);
+  return BuildDBOptions(immutable_db_options_, mutable_db_options_);
+}
+
+bool DBImpl::GetProperty(ColumnFamilyHandle* column_family,
+                         const Slice& property, std::string* value) {
+  const DBPropertyInfo* property_info = GetPropertyInfo(property);
+  value->clear();
+  auto cfd =
+      static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
+  if (property_info == nullptr) {
+    return false;
+  } else if (property_info->handle_int) {
+    uint64_t int_value;
+    bool ret_value =
+        GetIntPropertyInternal(cfd, *property_info, false, &int_value);
+    if (ret_value) {
+      *value = std::to_string(int_value);
+    }
+    return ret_value;
+  } else if (property_info->handle_string) {
+    if (property_info->need_out_of_mutex) {
+      return cfd->internal_stats()->GetStringProperty(*property_info, property,
+                                                      value);
+    } else {
+      InstrumentedMutexLock l(&mutex_);
+      return cfd->internal_stats()->GetStringProperty(*property_info, property,
+                                                      value);
+    }
+  } else if (property_info->handle_string_dbimpl) {
+    if (property_info->need_out_of_mutex) {
+      return (this->*(property_info->handle_string_dbimpl))(value);
+    } else {
+      InstrumentedMutexLock l(&mutex_);
+      return (this->*(property_info->handle_string_dbimpl))(value);
+    }
+  }
+  // Shouldn't reach here since exactly one of handle_string and handle_int
+  // should be non-nullptr.
+  assert(false);
+  return false;
+}
+
+bool DBImpl::GetMapProperty(ColumnFamilyHandle* column_family,
+                            const Slice& property,
+                            std::map<std::string, std::string>* value) {
+  const DBPropertyInfo* property_info = GetPropertyInfo(property);
+  value->clear();
+  auto cfd =
+      static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
+  if (property_info == nullptr) {
+    return false;
+  } else if (property_info->handle_map) {
+    if (property_info->need_out_of_mutex) {
+      return cfd->internal_stats()->GetMapProperty(*property_info, property,
+                                                   value);
+    } else {
+      InstrumentedMutexLock l(&mutex_);
+      return cfd->internal_stats()->GetMapProperty(*property_info, property,
+                                                   value);
+    }
+  }
+  // If we reach this point it means that handle_map is not provided for the
+  // requested property
+  return false;
+}
+
+bool DBImpl::GetIntProperty(ColumnFamilyHandle* column_family,
+                            const Slice& property, uint64_t* value) {
+  const DBPropertyInfo* property_info = GetPropertyInfo(property);
+  if (property_info == nullptr || property_info->handle_int == nullptr) {
+    return false;
+  }
+  auto cfd =
+      static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
+  return GetIntPropertyInternal(cfd, *property_info, false, value);
+}
+
+bool DBImpl::GetIntPropertyInternal(ColumnFamilyData* cfd,
+                                    const DBPropertyInfo& property_info,
+                                    bool is_locked, uint64_t* value) {
+  assert(property_info.handle_int != nullptr);
+  if (!property_info.need_out_of_mutex) {
+    if (is_locked) {
+      mutex_.AssertHeld();
+      return cfd->internal_stats()->GetIntProperty(property_info, value, this);
+    } else {
+      InstrumentedMutexLock l(&mutex_);
+      return cfd->internal_stats()->GetIntProperty(property_info, value, this);
+    }
+  } else {
+    SuperVersion* sv = nullptr;
+    if (is_locked) {
+      mutex_.Unlock();
+    }
+    sv = GetAndRefSuperVersion(cfd);
+
+    bool ret = cfd->internal_stats()->GetIntPropertyOutOfMutex(
+        property_info, sv->current, value);
+
+    ReturnAndCleanupSuperVersion(cfd, sv);
+    if (is_locked) {
+      mutex_.Lock();
+    }
+
+    return ret;
+  }
+}
+
+bool DBImpl::GetPropertyHandleOptionsStatistics(std::string* value) {
+  assert(value != nullptr);
+  Statistics* statistics = immutable_db_options_.stats;
+  if (!statistics) {
+    return false;
+  }
+  *value = statistics->ToString();
+  return true;
+}
+
+Status DBImpl::ResetStats() {
+  InstrumentedMutexLock l(&mutex_);
+  for (auto* cfd : *versions_->GetColumnFamilySet()) {
+    if (cfd->initialized()) {
+      cfd->internal_stats()->Clear();
+    }
+  }
+  return Status::OK();
+}
+
+bool DBImpl::GetAggregatedIntProperty(const Slice& property,
+                                      uint64_t* aggregated_value) {
+  const DBPropertyInfo* property_info = GetPropertyInfo(property);
+  if (property_info == nullptr || property_info->handle_int == nullptr) {
+    return false;
+  }
+
+  uint64_t sum = 0;
+  bool ret = true;
+  {
+    // Needs mutex to protect the list of column families.
+    InstrumentedMutexLock l(&mutex_);
+    uint64_t value;
+    for (auto* cfd : versions_->GetRefedColumnFamilySet()) {
+      if (!cfd->initialized()) {
+        continue;
+      }
+      ret = GetIntPropertyInternal(cfd, *property_info, true, &value);
+      // GetIntPropertyInternal may release db mutex and re-acquire it.
+      mutex_.AssertHeld();
+      if (ret) {
+        sum += value;
+      } else {
+        ret = false;
+        break;
+      }
+    }
+  }
+  *aggregated_value = sum;
+  return ret;
+}
+
+SuperVersion* DBImpl::GetAndRefSuperVersion(ColumnFamilyData* cfd) {
+  // TODO(ljin): consider using GetReferencedSuperVersion() directly
+  return cfd->GetThreadLocalSuperVersion(this);
+}
+
+// REQUIRED: this function should only be called on the write thread or if the
+// mutex is held.
+SuperVersion* DBImpl::GetAndRefSuperVersion(uint32_t column_family_id) {
+  auto column_family_set = versions_->GetColumnFamilySet();
+  auto cfd = column_family_set->GetColumnFamily(column_family_id);
+  if (!cfd) {
+    return nullptr;
+  }
+
+  return GetAndRefSuperVersion(cfd);
+}
+
+void DBImpl::CleanupSuperVersion(SuperVersion* sv) {
+  // Release SuperVersion
+  if (sv->Unref()) {
+    bool defer_purge = immutable_db_options().avoid_unnecessary_blocking_io;
+    {
+      InstrumentedMutexLock l(&mutex_);
+      sv->Cleanup();
+      if (defer_purge) {
+        AddSuperVersionsToFreeQueue(sv);
+        SchedulePurge();
+      }
+    }
+    if (!defer_purge) {
+      delete sv;
+    }
+    RecordTick(stats_, NUMBER_SUPERVERSION_CLEANUPS);
+  }
+  RecordTick(stats_, NUMBER_SUPERVERSION_RELEASES);
+}
+
+void DBImpl::ReturnAndCleanupSuperVersion(ColumnFamilyData* cfd,
+                                          SuperVersion* sv) {
+  if (!cfd->ReturnThreadLocalSuperVersion(sv)) {
+    CleanupSuperVersion(sv);
+  }
+}
+
+// REQUIRED: this function should only be called on the write thread.
+void DBImpl::ReturnAndCleanupSuperVersion(uint32_t column_family_id,
+                                          SuperVersion* sv) {
+  auto column_family_set = versions_->GetColumnFamilySet();
+  auto cfd = column_family_set->GetColumnFamily(column_family_id);
+
+  // If SuperVersion is held, and we successfully fetched a cfd using
+  // GetAndRefSuperVersion(), it must still exist.
+  assert(cfd != nullptr);
+  ReturnAndCleanupSuperVersion(cfd, sv);
+}
+
+// REQUIRED: this function should only be called on the write thread or if the
+// mutex is held.
+ColumnFamilyHandle* DBImpl::GetColumnFamilyHandle(uint32_t column_family_id) {
+  ColumnFamilyMemTables* cf_memtables = column_family_memtables_.get();
+
+  if (!cf_memtables->Seek(column_family_id)) {
+    return nullptr;
+  }
+
+  return cf_memtables->GetColumnFamilyHandle();
+}
+
+// REQUIRED: mutex is NOT held.
+std::unique_ptr<ColumnFamilyHandle> DBImpl::GetColumnFamilyHandleUnlocked(
+    uint32_t column_family_id) {
+  InstrumentedMutexLock l(&mutex_);
+
+  auto* cfd =
+      versions_->GetColumnFamilySet()->GetColumnFamily(column_family_id);
+  if (cfd == nullptr) {
+    return nullptr;
+  }
+
+  return std::unique_ptr<ColumnFamilyHandleImpl>(
+      new ColumnFamilyHandleImpl(cfd, this, &mutex_));
+}
+
+void DBImpl::GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
+                                         const Range& range,
+                                         uint64_t* const count,
+                                         uint64_t* const size) {
+  ColumnFamilyHandleImpl* cfh =
+      static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  ColumnFamilyData* cfd = cfh->cfd();
+  SuperVersion* sv = GetAndRefSuperVersion(cfd);
+
+  // Convert user_key into a corresponding internal key.
+  InternalKey k1(range.start, kMaxSequenceNumber, kValueTypeForSeek);
+  InternalKey k2(range.limit, kMaxSequenceNumber, kValueTypeForSeek);
+  MemTable::MemTableStats memStats =
+      sv->mem->ApproximateStats(k1.Encode(), k2.Encode());
+  MemTable::MemTableStats immStats =
+      sv->imm->ApproximateStats(k1.Encode(), k2.Encode());
+  *count = memStats.count + immStats.count;
+  *size = memStats.size + immStats.size;
+
+  ReturnAndCleanupSuperVersion(cfd, sv);
+}
+
+Status DBImpl::GetApproximateSizes(const SizeApproximationOptions& options,
+                                   ColumnFamilyHandle* column_family,
+                                   const Range* range, int n, uint64_t* sizes) {
+  if (!options.include_memtables && !options.include_files) {
+    return Status::InvalidArgument("Invalid options");
+  }
+
+  const Comparator* const ucmp = column_family->GetComparator();
+  assert(ucmp);
+  size_t ts_sz = ucmp->timestamp_size();
+
+  Version* v;
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  auto cfd = cfh->cfd();
+  SuperVersion* sv = GetAndRefSuperVersion(cfd);
+  v = sv->current;
+
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  for (int i = 0; i < n; i++) {
+    Slice start = range[i].start;
+    Slice limit = range[i].limit;
+
+    // Add timestamp if needed
+    std::string start_with_ts, limit_with_ts;
+    if (ts_sz > 0) {
+      // Maximum timestamp means including all key with any timestamp
+      AppendKeyWithMaxTimestamp(&start_with_ts, start, ts_sz);
+      // Append a maximum timestamp as the range limit is exclusive:
+      // [start, limit)
+      AppendKeyWithMaxTimestamp(&limit_with_ts, limit, ts_sz);
+      start = start_with_ts;
+      limit = limit_with_ts;
+    }
+    // Convert user_key into a corresponding internal key.
+    InternalKey k1(start, kMaxSequenceNumber, kValueTypeForSeek);
+    InternalKey k2(limit, kMaxSequenceNumber, kValueTypeForSeek);
+    sizes[i] = 0;
+    if (options.include_files) {
+      sizes[i] += versions_->ApproximateSize(
+          options, read_options, v, k1.Encode(), k2.Encode(), /*start_level=*/0,
+          /*end_level=*/-1, TableReaderCaller::kUserApproximateSize);
+    }
+    if (options.include_memtables) {
+      sizes[i] += sv->mem->ApproximateStats(k1.Encode(), k2.Encode()).size;
+      sizes[i] += sv->imm->ApproximateStats(k1.Encode(), k2.Encode()).size;
+    }
+  }
+
+  ReturnAndCleanupSuperVersion(cfd, sv);
+  return Status::OK();
+}
+
+std::list<uint64_t>::iterator
+DBImpl::CaptureCurrentFileNumberInPendingOutputs() {
+  // We need to remember the iterator of our insert, because after the
+  // background job is done, we need to remove that element from
+  // pending_outputs_.
+  pending_outputs_.push_back(versions_->current_next_file_number());
+  auto pending_outputs_inserted_elem = pending_outputs_.end();
+  --pending_outputs_inserted_elem;
+  return pending_outputs_inserted_elem;
+}
+
+void DBImpl::ReleaseFileNumberFromPendingOutputs(
+    std::unique_ptr<std::list<uint64_t>::iterator>& v) {
+  if (v.get() != nullptr) {
+    pending_outputs_.erase(*v.get());
+    v.reset();
+  }
+}
+
+Status DBImpl::GetUpdatesSince(
+    SequenceNumber seq, std::unique_ptr<TransactionLogIterator>* iter,
+    const TransactionLogIterator::ReadOptions& read_options) {
+  RecordTick(stats_, GET_UPDATES_SINCE_CALLS);
+  if (seq_per_batch_) {
+    return Status::NotSupported(
+        "This API is not yet compatible with write-prepared/write-unprepared "
+        "transactions");
+  }
+  if (seq > versions_->LastSequence()) {
+    return Status::NotFound("Requested sequence not yet written in the db");
+  }
+  return wal_manager_.GetUpdatesSince(seq, iter, read_options, versions_.get());
+}
+
+Status DBImpl::DeleteFile(std::string name) {
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  uint64_t number;
+  FileType type;
+  WalFileType log_type;
+  if (!ParseFileName(name, &number, &type, &log_type) ||
+      (type != kTableFile && type != kWalFile)) {
+    ROCKS_LOG_ERROR(immutable_db_options_.info_log, "DeleteFile %s failed.\n",
+                    name.c_str());
+    return Status::InvalidArgument("Invalid file name");
+  }
+
+  if (type == kWalFile) {
+    // Only allow deleting archived log files
+    if (log_type != kArchivedLogFile) {
+      ROCKS_LOG_ERROR(immutable_db_options_.info_log,
+                      "DeleteFile %s failed - not archived log.\n",
+                      name.c_str());
+      return Status::NotSupported("Delete only supported for archived logs");
+    }
+    Status status = wal_manager_.DeleteFile(name, number);
+    if (!status.ok()) {
+      ROCKS_LOG_ERROR(immutable_db_options_.info_log,
+                      "DeleteFile %s failed -- %s.\n", name.c_str(),
+                      status.ToString().c_str());
+    }
+    return status;
+  }
+
+  Status status;
+  int level;
+  FileMetaData* metadata;
+  ColumnFamilyData* cfd;
+  VersionEdit edit;
+  JobContext job_context(next_job_id_.fetch_add(1), true);
+  {
+    InstrumentedMutexLock l(&mutex_);
+    status = versions_->GetMetadataForFile(number, &level, &metadata, &cfd);
+    if (!status.ok()) {
+      ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                     "DeleteFile %s failed. File not found\n", name.c_str());
+      job_context.Clean();
+      return Status::InvalidArgument("File not found");
+    }
+    assert(level < cfd->NumberLevels());
+
+    // If the file is being compacted no need to delete.
+    if (metadata->being_compacted) {
+      ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                     "DeleteFile %s Skipped. File about to be compacted\n",
+                     name.c_str());
+      job_context.Clean();
+      return Status::OK();
+    }
+
+    // Only the files in the last level can be deleted externally.
+    // This is to make sure that any deletion tombstones are not
+    // lost. Check that the level passed is the last level.
+    auto* vstoreage = cfd->current()->storage_info();
+    for (int i = level + 1; i < cfd->NumberLevels(); i++) {
+      if (vstoreage->NumLevelFiles(i) != 0) {
+        ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                       "DeleteFile %s FAILED. File not in last level\n",
+                       name.c_str());
+        job_context.Clean();
+        return Status::InvalidArgument("File not in last level");
+      }
+    }
+    // if level == 0, it has to be the oldest file
+    if (level == 0 &&
+        vstoreage->LevelFiles(0).back()->fd.GetNumber() != number) {
+      ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                     "DeleteFile %s failed ---"
+                     " target file in level 0 must be the oldest.",
+                     name.c_str());
+      job_context.Clean();
+      return Status::InvalidArgument("File in level 0, but not oldest");
+    }
+    edit.SetColumnFamily(cfd->GetID());
+    edit.DeleteFile(level, number);
+    status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
+                                    read_options, &edit, &mutex_,
+                                    directories_.GetDbDir());
+    if (status.ok()) {
+      InstallSuperVersionAndScheduleWork(cfd,
+                                         &job_context.superversion_contexts[0],
+                                         *cfd->GetLatestMutableCFOptions());
+    }
+    FindObsoleteFiles(&job_context, false);
+  }  // lock released here
+
+  LogFlush(immutable_db_options_.info_log);
+  // remove files outside the db-lock
+  if (job_context.HaveSomethingToDelete()) {
+    // Call PurgeObsoleteFiles() without holding mutex.
+    PurgeObsoleteFiles(job_context);
+  }
+  job_context.Clean();
+  return status;
+}
+
+Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family,
+                                   const RangePtr* ranges, size_t n,
+                                   bool include_end) {
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  Status status = Status::OK();
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  ColumnFamilyData* cfd = cfh->cfd();
+  VersionEdit edit;
+  std::set<FileMetaData*> deleted_files;
+  JobContext job_context(next_job_id_.fetch_add(1), true);
+  {
+    InstrumentedMutexLock l(&mutex_);
+    Version* input_version = cfd->current();
+
+    auto* vstorage = input_version->storage_info();
+    for (size_t r = 0; r < n; r++) {
+      auto begin = ranges[r].start, end = ranges[r].limit;
+      for (int i = 1; i < cfd->NumberLevels(); i++) {
+        if (vstorage->LevelFiles(i).empty() ||
+            !vstorage->OverlapInLevel(i, begin, end)) {
+          continue;
+        }
+        std::vector<FileMetaData*> level_files;
+        InternalKey begin_storage, end_storage, *begin_key, *end_key;
+        if (begin == nullptr) {
+          begin_key = nullptr;
+        } else {
+          begin_storage.SetMinPossibleForUserKey(*begin);
+          begin_key = &begin_storage;
+        }
+        if (end == nullptr) {
+          end_key = nullptr;
+        } else {
+          end_storage.SetMaxPossibleForUserKey(*end);
+          end_key = &end_storage;
+        }
+
+        vstorage->GetCleanInputsWithinInterval(
+            i, begin_key, end_key, &level_files, -1 /* hint_index */,
+            nullptr /* file_index */);
+        FileMetaData* level_file;
+        for (uint32_t j = 0; j < level_files.size(); j++) {
+          level_file = level_files[j];
+          if (level_file->being_compacted) {
+            continue;
+          }
+          if (deleted_files.find(level_file) != deleted_files.end()) {
+            continue;
+          }
+          if (!include_end && end != nullptr &&
+              cfd->user_comparator()->Compare(level_file->largest.user_key(),
+                                              *end) == 0) {
+            continue;
+          }
+          edit.SetColumnFamily(cfd->GetID());
+          edit.DeleteFile(i, level_file->fd.GetNumber());
+          deleted_files.insert(level_file);
+          level_file->being_compacted = true;
+        }
+        vstorage->ComputeCompactionScore(*cfd->ioptions(),
+                                         *cfd->GetLatestMutableCFOptions());
+      }
+    }
+    if (edit.GetDeletedFiles().empty()) {
+      job_context.Clean();
+      return status;
+    }
+    input_version->Ref();
+    status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
+                                    read_options, &edit, &mutex_,
+                                    directories_.GetDbDir());
+    if (status.ok()) {
+      InstallSuperVersionAndScheduleWork(cfd,
+                                         &job_context.superversion_contexts[0],
+                                         *cfd->GetLatestMutableCFOptions());
+    }
+    for (auto* deleted_file : deleted_files) {
+      deleted_file->being_compacted = false;
+    }
+    input_version->Unref();
+    FindObsoleteFiles(&job_context, false);
+  }  // lock released here
+
+  LogFlush(immutable_db_options_.info_log);
+  // remove files outside the db-lock
+  if (job_context.HaveSomethingToDelete()) {
+    // Call PurgeObsoleteFiles() without holding mutex.
+    PurgeObsoleteFiles(job_context);
+  }
+  job_context.Clean();
+  return status;
+}
+
+void DBImpl::GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) {
+  InstrumentedMutexLock l(&mutex_);
+  versions_->GetLiveFilesMetaData(metadata);
+}
+
+Status DBImpl::GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) {
+  InstrumentedMutexLock l(&mutex_);
+  return versions_->GetLiveFilesChecksumInfo(checksum_list);
+}
+
+void DBImpl::GetColumnFamilyMetaData(ColumnFamilyHandle* column_family,
+                                     ColumnFamilyMetaData* cf_meta) {
+  assert(column_family);
+  auto* cfd =
+      static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
+  auto* sv = GetAndRefSuperVersion(cfd);
+  {
+    // Without mutex, Version::GetColumnFamilyMetaData will have data race with
+    // Compaction::MarkFilesBeingCompacted. One solution is to use mutex, but
+    // this may cause regression. An alternative is to make
+    // FileMetaData::being_compacted atomic, but it will make FileMetaData
+    // non-copy-able. Another option is to separate these variables from
+    // original FileMetaData struct, and this requires re-organization of data
+    // structures. For now, we take the easy approach. If
+    // DB::GetColumnFamilyMetaData is not called frequently, the regression
+    // should not be big. We still need to keep an eye on it.
+    InstrumentedMutexLock l(&mutex_);
+    sv->current->GetColumnFamilyMetaData(cf_meta);
+  }
+  ReturnAndCleanupSuperVersion(cfd, sv);
+}
+
+void DBImpl::GetAllColumnFamilyMetaData(
+    std::vector<ColumnFamilyMetaData>* metadata) {
+  InstrumentedMutexLock l(&mutex_);
+  for (auto cfd : *(versions_->GetColumnFamilySet())) {
+    {
+      metadata->emplace_back();
+      cfd->current()->GetColumnFamilyMetaData(&metadata->back());
+    }
+  }
+}
+
+Status DBImpl::CheckConsistency() {
+  mutex_.AssertHeld();
+  std::vector<LiveFileMetaData> metadata;
+  versions_->GetLiveFilesMetaData(&metadata);
+  TEST_SYNC_POINT("DBImpl::CheckConsistency:AfterGetLiveFilesMetaData");
+
+  std::string corruption_messages;
+
+  if (immutable_db_options_.skip_checking_sst_file_sizes_on_db_open) {
+    // Instead of calling GetFileSize() for each expected file, call
+    // GetChildren() for the DB directory and check that all expected files
+    // are listed, without checking their sizes.
+    // Since sst files might be in different directories, do it for each
+    // directory separately.
+    std::map<std::string, std::vector<std::string>> files_by_directory;
+    for (const auto& md : metadata) {
+      // md.name has a leading "/". Remove it.
+      std::string fname = md.name;
+      if (!fname.empty() && fname[0] == '/') {
+        fname = fname.substr(1);
+      }
+      files_by_directory[md.db_path].push_back(fname);
+    }
+
+    IOOptions io_opts;
+    io_opts.do_not_recurse = true;
+    for (const auto& dir_files : files_by_directory) {
+      std::string directory = dir_files.first;
+      std::vector<std::string> existing_files;
+      Status s = fs_->GetChildren(directory, io_opts, &existing_files,
+                                  /*IODebugContext*=*/nullptr);
+      if (!s.ok()) {
+        corruption_messages +=
+            "Can't list files in " + directory + ": " + s.ToString() + "\n";
+        continue;
+      }
+      std::sort(existing_files.begin(), existing_files.end());
+
+      for (const std::string& fname : dir_files.second) {
+        if (!std::binary_search(existing_files.begin(), existing_files.end(),
+                                fname) &&
+            !std::binary_search(existing_files.begin(), existing_files.end(),
+                                Rocks2LevelTableFileName(fname))) {
+          corruption_messages +=
+              "Missing sst file " + fname + " in " + directory + "\n";
+        }
+      }
+    }
+  } else {
+    for (const auto& md : metadata) {
+      // md.name has a leading "/".
+      std::string file_path = md.db_path + md.name;
+
+      uint64_t fsize = 0;
+      TEST_SYNC_POINT("DBImpl::CheckConsistency:BeforeGetFileSize");
+      Status s = env_->GetFileSize(file_path, &fsize);
+      if (!s.ok() &&
+          env_->GetFileSize(Rocks2LevelTableFileName(file_path), &fsize).ok()) {
+        s = Status::OK();
+      }
+      if (!s.ok()) {
+        corruption_messages +=
+            "Can't access " + md.name + ": " + s.ToString() + "\n";
+      } else if (fsize != md.size) {
+        corruption_messages += "Sst file size mismatch: " + file_path +
+                               ". Size recorded in manifest " +
+                               std::to_string(md.size) + ", actual size " +
+                               std::to_string(fsize) + "\n";
+      }
+    }
+  }
+
+  if (corruption_messages.size() == 0) {
+    return Status::OK();
+  } else {
+    return Status::Corruption(corruption_messages);
+  }
+}
+
+Status DBImpl::GetDbIdentity(std::string& identity) const {
+  identity.assign(db_id_);
+  return Status::OK();
+}
+
+Status DBImpl::GetDbIdentityFromIdentityFile(std::string* identity) const {
+  std::string idfilename = IdentityFileName(dbname_);
+  const FileOptions soptions;
+
+  Status s = ReadFileToString(fs_.get(), idfilename, identity);
+  if (!s.ok()) {
+    return s;
+  }
+
+  // If last character is '\n' remove it from identity. (Old implementations
+  // of Env::GenerateUniqueId() would include a trailing '\n'.)
+  if (identity->size() > 0 && identity->back() == '\n') {
+    identity->pop_back();
+  }
+  return s;
+}
+
+Status DBImpl::GetDbSessionId(std::string& session_id) const {
+  session_id.assign(db_session_id_);
+  return Status::OK();
+}
+
+namespace {
+SemiStructuredUniqueIdGen* DbSessionIdGen() {
+  static SemiStructuredUniqueIdGen gen;
+  return &gen;
+}
+}  // namespace
+
+void DBImpl::TEST_ResetDbSessionIdGen() { DbSessionIdGen()->Reset(); }
+
+std::string DBImpl::GenerateDbSessionId(Env*) {
+  // See SemiStructuredUniqueIdGen for its desirable properties.
+  auto gen = DbSessionIdGen();
+
+  uint64_t lo, hi;
+  gen->GenerateNext(&hi, &lo);
+  if (lo == 0) {
+    // Avoid emitting session ID with lo==0, so that SST unique
+    // IDs can be more easily ensured non-zero
+    gen->GenerateNext(&hi, &lo);
+    assert(lo != 0);
+  }
+  return EncodeSessionId(hi, lo);
+}
+
+void DBImpl::SetDbSessionId() {
+  db_session_id_ = GenerateDbSessionId(env_);
+  TEST_SYNC_POINT_CALLBACK("DBImpl::SetDbSessionId", &db_session_id_);
+}
+
+// Default implementation -- returns not supported status
+Status DB::CreateColumnFamily(const ColumnFamilyOptions& /*cf_options*/,
+                              const std::string& /*column_family_name*/,
+                              ColumnFamilyHandle** /*handle*/) {
+  return Status::NotSupported("");
+}
+
+Status DB::CreateColumnFamilies(
+    const ColumnFamilyOptions& /*cf_options*/,
+    const std::vector<std::string>& /*column_family_names*/,
+    std::vector<ColumnFamilyHandle*>* /*handles*/) {
+  return Status::NotSupported("");
+}
+
+Status DB::CreateColumnFamilies(
+    const std::vector<ColumnFamilyDescriptor>& /*column_families*/,
+    std::vector<ColumnFamilyHandle*>* /*handles*/) {
+  return Status::NotSupported("");
+}
+
+Status DB::DropColumnFamily(ColumnFamilyHandle* /*column_family*/) {
+  return Status::NotSupported("");
+}
+
+Status DB::DropColumnFamilies(
+    const std::vector<ColumnFamilyHandle*>& /*column_families*/) {
+  return Status::NotSupported("");
+}
+
+Status DB::DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family) {
+  if (DefaultColumnFamily() == column_family) {
+    return Status::InvalidArgument(
+        "Cannot destroy the handle returned by DefaultColumnFamily()");
+  }
+  delete column_family;
+  return Status::OK();
+}
+
+DB::~DB() {}
+
+Status DBImpl::Close() {
+  InstrumentedMutexLock closing_lock_guard(&closing_mutex_);
+  if (closed_) {
+    return closing_status_;
+  }
+
+  {
+    const Status s = MaybeReleaseTimestampedSnapshotsAndCheck();
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  closing_status_ = CloseImpl();
+  closed_ = true;
+  return closing_status_;
+}
+
+Status DB::ListColumnFamilies(const DBOptions& db_options,
+                              const std::string& name,
+                              std::vector<std::string>* column_families) {
+  const std::shared_ptr<FileSystem>& fs = db_options.env->GetFileSystem();
+  return VersionSet::ListColumnFamilies(column_families, name, fs.get());
+}
+
+Snapshot::~Snapshot() {}
+
+Status DestroyDB(const std::string& dbname, const Options& options,
+                 const std::vector<ColumnFamilyDescriptor>& column_families) {
+  ImmutableDBOptions soptions(SanitizeOptions(dbname, options));
+  Env* env = soptions.env;
+  std::vector<std::string> filenames;
+  bool wal_in_db_path = soptions.IsWalDirSameAsDBPath();
+
+  // Reset the logger because it holds a handle to the
+  // log file and prevents cleanup and directory removal
+  soptions.info_log.reset();
+  IOOptions io_opts;
+  // Ignore error in case directory does not exist
+  soptions.fs
+      ->GetChildren(dbname, io_opts, &filenames,
+                    /*IODebugContext*=*/nullptr)
+      .PermitUncheckedError();
+
+  FileLock* lock;
+  const std::string lockname = LockFileName(dbname);
+  Status result = env->LockFile(lockname, &lock);
+  if (result.ok()) {
+    uint64_t number;
+    FileType type;
+    InfoLogPrefix info_log_prefix(!soptions.db_log_dir.empty(), dbname);
+    for (const auto& fname : filenames) {
+      if (ParseFileName(fname, &number, info_log_prefix.prefix, &type) &&
+          type != kDBLockFile) {  // Lock file will be deleted at end
+        Status del;
+        std::string path_to_delete = dbname + "/" + fname;
+        if (type == kMetaDatabase) {
+          del = DestroyDB(path_to_delete, options);
+        } else if (type == kTableFile || type == kWalFile ||
+                   type == kBlobFile) {
+          del = DeleteDBFile(
+              &soptions, path_to_delete, dbname,
+              /*force_bg=*/false,
+              /*force_fg=*/(type == kWalFile) ? !wal_in_db_path : false);
+        } else {
+          del = env->DeleteFile(path_to_delete);
+        }
+        if (!del.ok() && result.ok()) {
+          result = del;
+        }
+      }
+    }
+
+    std::set<std::string> paths;
+    for (const DbPath& db_path : options.db_paths) {
+      paths.insert(db_path.path);
+    }
+    for (const ColumnFamilyDescriptor& cf : column_families) {
+      for (const DbPath& cf_path : cf.options.cf_paths) {
+        paths.insert(cf_path.path);
+      }
+    }
+
+    for (const auto& path : paths) {
+      if (soptions.fs
+              ->GetChildren(path, io_opts, &filenames,
+                            /*IODebugContext*=*/nullptr)
+              .ok()) {
+        for (const auto& fname : filenames) {
+          if (ParseFileName(fname, &number, &type) &&
+              (type == kTableFile ||
+               type == kBlobFile)) {  // Lock file will be deleted at end
+            std::string file_path = path + "/" + fname;
+            Status del = DeleteDBFile(&soptions, file_path, dbname,
+                                      /*force_bg=*/false, /*force_fg=*/false);
+            if (!del.ok() && result.ok()) {
+              result = del;
+            }
+          }
+        }
+        // TODO: Should we return an error if we cannot delete the directory?
+        env->DeleteDir(path).PermitUncheckedError();
+      }
+    }
+
+    std::vector<std::string> walDirFiles;
+    std::string archivedir = ArchivalDirectory(dbname);
+    bool wal_dir_exists = false;
+    if (!soptions.IsWalDirSameAsDBPath(dbname)) {
+      wal_dir_exists =
+          soptions.fs
+              ->GetChildren(soptions.wal_dir, io_opts, &walDirFiles,
+                            /*IODebugContext*=*/nullptr)
+              .ok();
+      archivedir = ArchivalDirectory(soptions.wal_dir);
+    }
+
+    // Archive dir may be inside wal dir or dbname and should be
+    // processed and removed before those otherwise we have issues
+    // removing them
+    std::vector<std::string> archiveFiles;
+    if (soptions.fs
+            ->GetChildren(archivedir, io_opts, &archiveFiles,
+                          /*IODebugContext*=*/nullptr)
+            .ok()) {
+      // Delete archival files.
+      for (const auto& file : archiveFiles) {
+        if (ParseFileName(file, &number, &type) && type == kWalFile) {
+          Status del =
+              DeleteDBFile(&soptions, archivedir + "/" + file, archivedir,
+                           /*force_bg=*/false, /*force_fg=*/!wal_in_db_path);
+          if (!del.ok() && result.ok()) {
+            result = del;
+          }
+        }
+      }
+      // Ignore error in case dir contains other files
+      env->DeleteDir(archivedir).PermitUncheckedError();
+    }
+
+    // Delete log files in the WAL dir
+    if (wal_dir_exists) {
+      for (const auto& file : walDirFiles) {
+        if (ParseFileName(file, &number, &type) && type == kWalFile) {
+          Status del =
+              DeleteDBFile(&soptions, LogFileName(soptions.wal_dir, number),
+                           soptions.wal_dir, /*force_bg=*/false,
+                           /*force_fg=*/!wal_in_db_path);
+          if (!del.ok() && result.ok()) {
+            result = del;
+          }
+        }
+      }
+      // Ignore error in case dir contains other files
+      env->DeleteDir(soptions.wal_dir).PermitUncheckedError();
+    }
+
+    // Ignore error since state is already gone
+    env->UnlockFile(lock).PermitUncheckedError();
+    env->DeleteFile(lockname).PermitUncheckedError();
+
+    // sst_file_manager holds a ref to the logger. Make sure the logger is
+    // gone before trying to remove the directory.
+    soptions.sst_file_manager.reset();
+
+    // Ignore error in case dir contains other files
+    env->DeleteDir(dbname).PermitUncheckedError();
+    ;
+  }
+  return result;
+}
+
+Status DBImpl::WriteOptionsFile(bool need_mutex_lock,
+                                bool need_enter_write_thread) {
+  WriteThread::Writer w;
+  if (need_mutex_lock) {
+    mutex_.Lock();
+  } else {
+    mutex_.AssertHeld();
+  }
+  if (need_enter_write_thread) {
+    write_thread_.EnterUnbatched(&w, &mutex_);
+  }
+
+  std::vector<std::string> cf_names;
+  std::vector<ColumnFamilyOptions> cf_opts;
+
+  // This part requires mutex to protect the column family options
+  for (auto cfd : *versions_->GetColumnFamilySet()) {
+    if (cfd->IsDropped()) {
+      continue;
+    }
+    cf_names.push_back(cfd->GetName());
+    cf_opts.push_back(cfd->GetLatestCFOptions());
+  }
+
+  // Unlock during expensive operations.  New writes cannot get here
+  // because the single write thread ensures all new writes get queued.
+  DBOptions db_options =
+      BuildDBOptions(immutable_db_options_, mutable_db_options_);
+  mutex_.Unlock();
+
+  TEST_SYNC_POINT("DBImpl::WriteOptionsFile:1");
+  TEST_SYNC_POINT("DBImpl::WriteOptionsFile:2");
+  TEST_SYNC_POINT_CALLBACK("DBImpl::WriteOptionsFile:PersistOptions",
+                           &db_options);
+
+  std::string file_name =
+      TempOptionsFileName(GetName(), versions_->NewFileNumber());
+  Status s = PersistRocksDBOptions(db_options, cf_names, cf_opts, file_name,
+                                   fs_.get());
+
+  if (s.ok()) {
+    s = RenameTempFileToOptionsFile(file_name);
+  }
+
+  if (!s.ok() && GetEnv()->FileExists(file_name).ok()) {
+    if (!GetEnv()->DeleteFile(file_name).ok()) {
+      ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                     "Unable to delete temp options file %s",
+                     file_name.c_str());
+    }
+  }
+
+  // restore lock
+  if (!need_mutex_lock) {
+    mutex_.Lock();
+  }
+  if (need_enter_write_thread) {
+    write_thread_.ExitUnbatched(&w);
+  }
+  if (!s.ok()) {
+    ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                   "Unnable to persist options -- %s", s.ToString().c_str());
+    if (immutable_db_options_.fail_if_options_file_error) {
+      return Status::IOError("Unable to persist options.",
+                             s.ToString().c_str());
+    }
+  }
+  return Status::OK();
+}
+
+namespace {
+void DeleteOptionsFilesHelper(const std::map<uint64_t, std::string>& filenames,
+                              const size_t num_files_to_keep,
+                              const std::shared_ptr<Logger>& info_log,
+                              Env* env) {
+  if (filenames.size() <= num_files_to_keep) {
+    return;
+  }
+  for (auto iter = std::next(filenames.begin(), num_files_to_keep);
+       iter != filenames.end(); ++iter) {
+    if (!env->DeleteFile(iter->second).ok()) {
+      ROCKS_LOG_WARN(info_log, "Unable to delete options file %s",
+                     iter->second.c_str());
+    }
+  }
+}
+}  // namespace
+
+Status DBImpl::DeleteObsoleteOptionsFiles() {
+  std::vector<std::string> filenames;
+  // use ordered map to store keep the filenames sorted from the newest
+  // to the oldest.
+  std::map<uint64_t, std::string> options_filenames;
+  Status s;
+  IOOptions io_opts;
+  io_opts.do_not_recurse = true;
+  s = fs_->GetChildren(GetName(), io_opts, &filenames,
+                       /*IODebugContext*=*/nullptr);
+  if (!s.ok()) {
+    return s;
+  }
+  for (auto& filename : filenames) {
+    uint64_t file_number;
+    FileType type;
+    if (ParseFileName(filename, &file_number, &type) && type == kOptionsFile) {
+      options_filenames.insert(
+          {std::numeric_limits<uint64_t>::max() - file_number,
+           GetName() + "/" + filename});
+    }
+  }
+
+  // Keeps the latest 2 Options file
+  const size_t kNumOptionsFilesKept = 2;
+  DeleteOptionsFilesHelper(options_filenames, kNumOptionsFilesKept,
+                           immutable_db_options_.info_log, GetEnv());
+  return Status::OK();
+}
+
+Status DBImpl::RenameTempFileToOptionsFile(const std::string& file_name) {
+  Status s;
+
+  uint64_t options_file_number = versions_->NewFileNumber();
+  std::string options_file_name =
+      OptionsFileName(GetName(), options_file_number);
+  uint64_t options_file_size = 0;
+  s = GetEnv()->GetFileSize(file_name, &options_file_size);
+  if (s.ok()) {
+    // Retry if the file name happen to conflict with an existing one.
+    s = GetEnv()->RenameFile(file_name, options_file_name);
+    std::unique_ptr<FSDirectory> dir_obj;
+    if (s.ok()) {
+      s = fs_->NewDirectory(GetName(), IOOptions(), &dir_obj, nullptr);
+    }
+    if (s.ok()) {
+      s = dir_obj->FsyncWithDirOptions(IOOptions(), nullptr,
+                                       DirFsyncOptions(options_file_name));
+    }
+    if (s.ok()) {
+      Status temp_s = dir_obj->Close(IOOptions(), nullptr);
+      // The default Close() could return "NotSupproted" and we bypass it
+      // if it is not impelmented. Detailed explanations can be found in
+      // db/db_impl/db_impl.h
+      if (!temp_s.ok()) {
+        if (temp_s.IsNotSupported()) {
+          temp_s.PermitUncheckedError();
+        } else {
+          s = temp_s;
+        }
+      }
+    }
+  }
+  if (s.ok()) {
+    InstrumentedMutexLock l(&mutex_);
+    versions_->options_file_number_ = options_file_number;
+    versions_->options_file_size_ = options_file_size;
+  }
+
+  if (0 == disable_delete_obsolete_files_) {
+    // TODO: Should we check for errors here?
+    DeleteObsoleteOptionsFiles().PermitUncheckedError();
+  }
+  return s;
+}
+
+#ifdef ROCKSDB_USING_THREAD_STATUS
+
+void DBImpl::NewThreadStatusCfInfo(ColumnFamilyData* cfd) const {
+  if (immutable_db_options_.enable_thread_tracking) {
+    ThreadStatusUtil::NewColumnFamilyInfo(this, cfd, cfd->GetName(),
+                                          cfd->ioptions()->env);
+  }
+}
+
+void DBImpl::EraseThreadStatusCfInfo(ColumnFamilyData* cfd) const {
+  if (immutable_db_options_.enable_thread_tracking) {
+    ThreadStatusUtil::EraseColumnFamilyInfo(cfd);
+  }
+}
+
+void DBImpl::EraseThreadStatusDbInfo() const {
+  if (immutable_db_options_.enable_thread_tracking) {
+    ThreadStatusUtil::EraseDatabaseInfo(this);
+  }
+}
+
+#else
+void DBImpl::NewThreadStatusCfInfo(ColumnFamilyData* /*cfd*/) const {}
+
+void DBImpl::EraseThreadStatusCfInfo(ColumnFamilyData* /*cfd*/) const {}
+
+void DBImpl::EraseThreadStatusDbInfo() const {}
+#endif  // ROCKSDB_USING_THREAD_STATUS
+
+//
+// A global method that can dump out the build version
+void DumpRocksDBBuildVersion(Logger* log) {
+  ROCKS_LOG_HEADER(log, "RocksDB version: %s\n",
+                   GetRocksVersionAsString().c_str());
+  const auto& props = GetRocksBuildProperties();
+  const auto& sha = props.find("rocksdb_build_git_sha");
+  if (sha != props.end()) {
+    ROCKS_LOG_HEADER(log, "Git sha %s", sha->second.c_str());
+  }
+  const auto date = props.find("rocksdb_build_date");
+  if (date != props.end()) {
+    ROCKS_LOG_HEADER(log, "Compile date %s", date->second.c_str());
+  }
+}
+
+SequenceNumber DBImpl::GetEarliestMemTableSequenceNumber(SuperVersion* sv,
+                                                         bool include_history) {
+  // Find the earliest sequence number that we know we can rely on reading
+  // from the memtable without needing to check sst files.
+  SequenceNumber earliest_seq =
+      sv->imm->GetEarliestSequenceNumber(include_history);
+  if (earliest_seq == kMaxSequenceNumber) {
+    earliest_seq = sv->mem->GetEarliestSequenceNumber();
+  }
+  assert(sv->mem->GetEarliestSequenceNumber() >= earliest_seq);
+
+  return earliest_seq;
+}
+
+Status DBImpl::GetLatestSequenceForKey(
+    SuperVersion* sv, const Slice& key, bool cache_only,
+    SequenceNumber lower_bound_seq, SequenceNumber* seq, std::string* timestamp,
+    bool* found_record_for_key, bool* is_blob_index) {
+  Status s;
+  MergeContext merge_context;
+  SequenceNumber max_covering_tombstone_seq = 0;
+
+  // TODO: plumb Env::IOActivity
+  ReadOptions read_options;
+  SequenceNumber current_seq = versions_->LastSequence();
+
+  ColumnFamilyData* cfd = sv->cfd;
+  assert(cfd);
+  const Comparator* const ucmp = cfd->user_comparator();
+  assert(ucmp);
+  size_t ts_sz = ucmp->timestamp_size();
+  std::string ts_buf;
+  if (ts_sz > 0) {
+    assert(timestamp);
+    ts_buf.assign(ts_sz, '\xff');
+  } else {
+    assert(!timestamp);
+  }
+  Slice ts(ts_buf);
+
+  LookupKey lkey(key, current_seq, ts_sz == 0 ? nullptr : &ts);
+
+  *seq = kMaxSequenceNumber;
+  *found_record_for_key = false;
+
+  // Check if there is a record for this key in the latest memtable
+  sv->mem->Get(lkey, /*value=*/nullptr, /*columns=*/nullptr, timestamp, &s,
+               &merge_context, &max_covering_tombstone_seq, seq, read_options,
+               false /* immutable_memtable */, nullptr /*read_callback*/,
+               is_blob_index);
+
+  if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) {
+    // unexpected error reading memtable.
+    ROCKS_LOG_ERROR(immutable_db_options_.info_log,
+                    "Unexpected status returned from MemTable::Get: %s\n",
+                    s.ToString().c_str());
+
+    return s;
+  }
+  assert(!ts_sz ||
+         (*seq != kMaxSequenceNumber &&
+          *timestamp != std::string(ts_sz, '\xff')) ||
+         (*seq == kMaxSequenceNumber && timestamp->empty()));
+
+  TEST_SYNC_POINT_CALLBACK("DBImpl::GetLatestSequenceForKey:mem", timestamp);
+
+  if (*seq != kMaxSequenceNumber) {
+    // Found a sequence number, no need to check immutable memtables
+    *found_record_for_key = true;
+    return Status::OK();
+  }
+
+  SequenceNumber lower_bound_in_mem = sv->mem->GetEarliestSequenceNumber();
+  if (lower_bound_in_mem != kMaxSequenceNumber &&
+      lower_bound_in_mem < lower_bound_seq) {
+    *found_record_for_key = false;
+    return Status::OK();
+  }
+
+  // Check if there is a record for this key in the immutable memtables
+  sv->imm->Get(lkey, /*value=*/nullptr, /*columns=*/nullptr, timestamp, &s,
+               &merge_context, &max_covering_tombstone_seq, seq, read_options,
+               nullptr /*read_callback*/, is_blob_index);
+
+  if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) {
+    // unexpected error reading memtable.
+    ROCKS_LOG_ERROR(immutable_db_options_.info_log,
+                    "Unexpected status returned from MemTableList::Get: %s\n",
+                    s.ToString().c_str());
+
+    return s;
+  }
+
+  assert(!ts_sz ||
+         (*seq != kMaxSequenceNumber &&
+          *timestamp != std::string(ts_sz, '\xff')) ||
+         (*seq == kMaxSequenceNumber && timestamp->empty()));
+
+  if (*seq != kMaxSequenceNumber) {
+    // Found a sequence number, no need to check memtable history
+    *found_record_for_key = true;
+    return Status::OK();
+  }
+
+  SequenceNumber lower_bound_in_imm = sv->imm->GetEarliestSequenceNumber();
+  if (lower_bound_in_imm != kMaxSequenceNumber &&
+      lower_bound_in_imm < lower_bound_seq) {
+    *found_record_for_key = false;
+    return Status::OK();
+  }
+
+  // Check if there is a record for this key in the immutable memtables
+  sv->imm->GetFromHistory(lkey, /*value=*/nullptr, /*columns=*/nullptr,
+                          timestamp, &s, &merge_context,
+                          &max_covering_tombstone_seq, seq, read_options,
+                          is_blob_index);
+
+  if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) {
+    // unexpected error reading memtable.
+    ROCKS_LOG_ERROR(
+        immutable_db_options_.info_log,
+        "Unexpected status returned from MemTableList::GetFromHistory: %s\n",
+        s.ToString().c_str());
+
+    return s;
+  }
+
+  assert(!ts_sz ||
+         (*seq != kMaxSequenceNumber &&
+          *timestamp != std::string(ts_sz, '\xff')) ||
+         (*seq == kMaxSequenceNumber && timestamp->empty()));
+
+  if (*seq != kMaxSequenceNumber) {
+    // Found a sequence number, no need to check SST files
+    assert(0 == ts_sz || *timestamp != std::string(ts_sz, '\xff'));
+    *found_record_for_key = true;
+    return Status::OK();
+  }
+
+  // We could do a sv->imm->GetEarliestSequenceNumber(/*include_history*/ true)
+  // check here to skip the history if possible. But currently the caller
+  // already does that. Maybe we should move the logic here later.
+
+  // TODO(agiardullo): possible optimization: consider checking cached
+  // SST files if cache_only=true?
+  if (!cache_only) {
+    // Check tables
+    PinnedIteratorsManager pinned_iters_mgr;
+    sv->current->Get(read_options, lkey, /*value=*/nullptr, /*columns=*/nullptr,
+                     timestamp, &s, &merge_context, &max_covering_tombstone_seq,
+                     &pinned_iters_mgr, nullptr /* value_found */,
+                     found_record_for_key, seq, nullptr /*read_callback*/,
+                     is_blob_index);
+
+    if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) {
+      // unexpected error reading SST files
+      ROCKS_LOG_ERROR(immutable_db_options_.info_log,
+                      "Unexpected status returned from Version::Get: %s\n",
+                      s.ToString().c_str());
+    }
+  }
+
+  return s;
+}
+
+Status DBImpl::IngestExternalFile(
+    ColumnFamilyHandle* column_family,
+    const std::vector<std::string>& external_files,
+    const IngestExternalFileOptions& ingestion_options) {
+  IngestExternalFileArg arg;
+  arg.column_family = column_family;
+  arg.external_files = external_files;
+  arg.options = ingestion_options;
+  return IngestExternalFiles({arg});
+}
+
+Status DBImpl::IngestExternalFiles(
+    const std::vector<IngestExternalFileArg>& args) {
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  if (args.empty()) {
+    return Status::InvalidArgument("ingestion arg list is empty");
+  }
+  {
+    std::unordered_set<ColumnFamilyHandle*> unique_cfhs;
+    for (const auto& arg : args) {
+      if (arg.column_family == nullptr) {
+        return Status::InvalidArgument("column family handle is null");
+      } else if (unique_cfhs.count(arg.column_family) > 0) {
+        return Status::InvalidArgument(
+            "ingestion args have duplicate column families");
+      }
+      unique_cfhs.insert(arg.column_family);
+    }
+  }
+  // Ingest multiple external SST files atomically.
+  const size_t num_cfs = args.size();
+  for (size_t i = 0; i != num_cfs; ++i) {
+    if (args[i].external_files.empty()) {
+      char err_msg[128] = {0};
+      snprintf(err_msg, 128, "external_files[%zu] is empty", i);
+      return Status::InvalidArgument(err_msg);
+    }
+  }
+  for (const auto& arg : args) {
+    const IngestExternalFileOptions& ingest_opts = arg.options;
+    if (ingest_opts.ingest_behind &&
+        !immutable_db_options_.allow_ingest_behind) {
+      return Status::InvalidArgument(
+          "can't ingest_behind file in DB with allow_ingest_behind=false");
+    }
+  }
+
+  // TODO (yanqin) maybe handle the case in which column_families have
+  // duplicates
+  std::unique_ptr<std::list<uint64_t>::iterator> pending_output_elem;
+  size_t total = 0;
+  for (const auto& arg : args) {
+    total += arg.external_files.size();
+  }
+  uint64_t next_file_number = 0;
+  Status status = ReserveFileNumbersBeforeIngestion(
+      static_cast<ColumnFamilyHandleImpl*>(args[0].column_family)->cfd(), total,
+      pending_output_elem, &next_file_number);
+  if (!status.ok()) {
+    InstrumentedMutexLock l(&mutex_);
+    ReleaseFileNumberFromPendingOutputs(pending_output_elem);
+    return status;
+  }
+
+  std::vector<ExternalSstFileIngestionJob> ingestion_jobs;
+  for (const auto& arg : args) {
+    auto* cfd = static_cast<ColumnFamilyHandleImpl*>(arg.column_family)->cfd();
+    ingestion_jobs.emplace_back(versions_.get(), cfd, immutable_db_options_,
+                                mutable_db_options_, file_options_, &snapshots_,
+                                arg.options, &directories_, &event_logger_,
+                                io_tracer_);
+  }
+
+  // TODO(yanqin) maybe make jobs run in parallel
+  uint64_t start_file_number = next_file_number;
+  for (size_t i = 1; i != num_cfs; ++i) {
+    start_file_number += args[i - 1].external_files.size();
+    auto* cfd =
+        static_cast<ColumnFamilyHandleImpl*>(args[i].column_family)->cfd();
+    SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
+    Status es = ingestion_jobs[i].Prepare(
+        args[i].external_files, args[i].files_checksums,
+        args[i].files_checksum_func_names, args[i].file_temperature,
+        start_file_number, super_version);
+    // capture first error only
+    if (!es.ok() && status.ok()) {
+      status = es;
+    }
+    CleanupSuperVersion(super_version);
+  }
+  TEST_SYNC_POINT("DBImpl::IngestExternalFiles:BeforeLastJobPrepare:0");
+  TEST_SYNC_POINT("DBImpl::IngestExternalFiles:BeforeLastJobPrepare:1");
+  {
+    auto* cfd =
+        static_cast<ColumnFamilyHandleImpl*>(args[0].column_family)->cfd();
+    SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
+    Status es = ingestion_jobs[0].Prepare(
+        args[0].external_files, args[0].files_checksums,
+        args[0].files_checksum_func_names, args[0].file_temperature,
+        next_file_number, super_version);
+    if (!es.ok()) {
+      status = es;
+    }
+    CleanupSuperVersion(super_version);
+  }
+  if (!status.ok()) {
+    for (size_t i = 0; i != num_cfs; ++i) {
+      ingestion_jobs[i].Cleanup(status);
+    }
+    InstrumentedMutexLock l(&mutex_);
+    ReleaseFileNumberFromPendingOutputs(pending_output_elem);
+    return status;
+  }
+
+  std::vector<SuperVersionContext> sv_ctxs;
+  for (size_t i = 0; i != num_cfs; ++i) {
+    sv_ctxs.emplace_back(true /* create_superversion */);
+  }
+  TEST_SYNC_POINT("DBImpl::IngestExternalFiles:BeforeJobsRun:0");
+  TEST_SYNC_POINT("DBImpl::IngestExternalFiles:BeforeJobsRun:1");
+  TEST_SYNC_POINT("DBImpl::AddFile:Start");
+  {
+    InstrumentedMutexLock l(&mutex_);
+    TEST_SYNC_POINT("DBImpl::AddFile:MutexLock");
+
+    // Stop writes to the DB by entering both write threads
+    WriteThread::Writer w;
+    write_thread_.EnterUnbatched(&w, &mutex_);
+    WriteThread::Writer nonmem_w;
+    if (two_write_queues_) {
+      nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
+    }
+
+    // When unordered_write is enabled, the keys are writing to memtable in an
+    // unordered way. If the ingestion job checks memtable key range before the
+    // key landing in memtable, the ingestion job may skip the necessary
+    // memtable flush.
+    // So wait here to ensure there is no pending write to memtable.
+    WaitForPendingWrites();
+
+    num_running_ingest_file_ += static_cast<int>(num_cfs);
+    TEST_SYNC_POINT("DBImpl::IngestExternalFile:AfterIncIngestFileCounter");
+
+    bool at_least_one_cf_need_flush = false;
+    std::vector<bool> need_flush(num_cfs, false);
+    for (size_t i = 0; i != num_cfs; ++i) {
+      auto* cfd =
+          static_cast<ColumnFamilyHandleImpl*>(args[i].column_family)->cfd();
+      if (cfd->IsDropped()) {
+        // TODO (yanqin) investigate whether we should abort ingestion or
+        // proceed with other non-dropped column families.
+        status = Status::InvalidArgument(
+            "cannot ingest an external file into a dropped CF");
+        break;
+      }
+      bool tmp = false;
+      status = ingestion_jobs[i].NeedsFlush(&tmp, cfd->GetSuperVersion());
+      need_flush[i] = tmp;
+      at_least_one_cf_need_flush = (at_least_one_cf_need_flush || tmp);
+      if (!status.ok()) {
+        break;
+      }
+    }
+    TEST_SYNC_POINT_CALLBACK("DBImpl::IngestExternalFile:NeedFlush",
+                             &at_least_one_cf_need_flush);
+
+    if (status.ok() && at_least_one_cf_need_flush) {
+      FlushOptions flush_opts;
+      flush_opts.allow_write_stall = true;
+      if (immutable_db_options_.atomic_flush) {
+        mutex_.Unlock();
+        status = AtomicFlushMemTables(
+            flush_opts, FlushReason::kExternalFileIngestion,
+            {} /* provided_candidate_cfds */, true /* entered_write_thread */);
+        mutex_.Lock();
+      } else {
+        for (size_t i = 0; i != num_cfs; ++i) {
+          if (need_flush[i]) {
+            mutex_.Unlock();
+            auto* cfd =
+                static_cast<ColumnFamilyHandleImpl*>(args[i].column_family)
+                    ->cfd();
+            status = FlushMemTable(cfd, flush_opts,
+                                   FlushReason::kExternalFileIngestion,
+                                   true /* entered_write_thread */);
+            mutex_.Lock();
+            if (!status.ok()) {
+              break;
+            }
+          }
+        }
+      }
+    }
+    // Run ingestion jobs.
+    if (status.ok()) {
+      for (size_t i = 0; i != num_cfs; ++i) {
+        mutex_.AssertHeld();
+        status = ingestion_jobs[i].Run();
+        if (!status.ok()) {
+          break;
+        }
+        ingestion_jobs[i].RegisterRange();
+      }
+    }
+    if (status.ok()) {
+      autovector<ColumnFamilyData*> cfds_to_commit;
+      autovector<const MutableCFOptions*> mutable_cf_options_list;
+      autovector<autovector<VersionEdit*>> edit_lists;
+      uint32_t num_entries = 0;
+      for (size_t i = 0; i != num_cfs; ++i) {
+        auto* cfd =
+            static_cast<ColumnFamilyHandleImpl*>(args[i].column_family)->cfd();
+        if (cfd->IsDropped()) {
+          continue;
+        }
+        cfds_to_commit.push_back(cfd);
+        mutable_cf_options_list.push_back(cfd->GetLatestMutableCFOptions());
+        autovector<VersionEdit*> edit_list;
+        edit_list.push_back(ingestion_jobs[i].edit());
+        edit_lists.push_back(edit_list);
+        ++num_entries;
+      }
+      // Mark the version edits as an atomic group if the number of version
+      // edits exceeds 1.
+      if (cfds_to_commit.size() > 1) {
+        for (auto& edits : edit_lists) {
+          assert(edits.size() == 1);
+          edits[0]->MarkAtomicGroup(--num_entries);
+        }
+        assert(0 == num_entries);
+      }
+      status = versions_->LogAndApply(cfds_to_commit, mutable_cf_options_list,
+                                      read_options, edit_lists, &mutex_,
+                                      directories_.GetDbDir());
+      // It is safe to update VersionSet last seqno here after LogAndApply since
+      // LogAndApply persists last sequence number from VersionEdits,
+      // which are from file's largest seqno and not from VersionSet.
+      //
+      // It is necessary to update last seqno here since LogAndApply releases
+      // mutex when persisting MANIFEST file, and the snapshots taken during
+      // that period will not be stable if VersionSet last seqno is updated
+      // before LogAndApply.
+      int consumed_seqno_count =
+          ingestion_jobs[0].ConsumedSequenceNumbersCount();
+      for (size_t i = 1; i != num_cfs; ++i) {
+        consumed_seqno_count =
+            std::max(consumed_seqno_count,
+                     ingestion_jobs[i].ConsumedSequenceNumbersCount());
+      }
+      if (consumed_seqno_count > 0) {
+        const SequenceNumber last_seqno = versions_->LastSequence();
+        versions_->SetLastAllocatedSequence(last_seqno + consumed_seqno_count);
+        versions_->SetLastPublishedSequence(last_seqno + consumed_seqno_count);
+        versions_->SetLastSequence(last_seqno + consumed_seqno_count);
+      }
+    }
+
+    for (auto& job : ingestion_jobs) {
+      job.UnregisterRange();
+    }
+
+    if (status.ok()) {
+      for (size_t i = 0; i != num_cfs; ++i) {
+        auto* cfd =
+            static_cast<ColumnFamilyHandleImpl*>(args[i].column_family)->cfd();
+        if (!cfd->IsDropped()) {
+          InstallSuperVersionAndScheduleWork(cfd, &sv_ctxs[i],
+                                             *cfd->GetLatestMutableCFOptions());
+#ifndef NDEBUG
+          if (0 == i && num_cfs > 1) {
+            TEST_SYNC_POINT(
+                "DBImpl::IngestExternalFiles:InstallSVForFirstCF:0");
+            TEST_SYNC_POINT(
+                "DBImpl::IngestExternalFiles:InstallSVForFirstCF:1");
+          }
+#endif  // !NDEBUG
+        }
+      }
+    } else if (versions_->io_status().IsIOError()) {
+      // Error while writing to MANIFEST.
+      // In fact, versions_->io_status() can also be the result of renaming
+      // CURRENT file. With current code, it's just difficult to tell. So just
+      // be pessimistic and try write to a new MANIFEST.
+      // TODO: distinguish between MANIFEST write and CURRENT renaming
+      const IOStatus& io_s = versions_->io_status();
+      // Should handle return error?
+      error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite);
+    }
+
+    // Resume writes to the DB
+    if (two_write_queues_) {
+      nonmem_write_thread_.ExitUnbatched(&nonmem_w);
+    }
+    write_thread_.ExitUnbatched(&w);
+
+    if (status.ok()) {
+      for (auto& job : ingestion_jobs) {
+        job.UpdateStats();
+      }
+    }
+    ReleaseFileNumberFromPendingOutputs(pending_output_elem);
+    num_running_ingest_file_ -= static_cast<int>(num_cfs);
+    if (0 == num_running_ingest_file_) {
+      bg_cv_.SignalAll();
+    }
+    TEST_SYNC_POINT("DBImpl::AddFile:MutexUnlock");
+  }
+  // mutex_ is unlocked here
+
+  // Cleanup
+  for (size_t i = 0; i != num_cfs; ++i) {
+    sv_ctxs[i].Clean();
+    // This may rollback jobs that have completed successfully. This is
+    // intended for atomicity.
+    ingestion_jobs[i].Cleanup(status);
+  }
+  if (status.ok()) {
+    for (size_t i = 0; i != num_cfs; ++i) {
+      auto* cfd =
+          static_cast<ColumnFamilyHandleImpl*>(args[i].column_family)->cfd();
+      if (!cfd->IsDropped()) {
+        NotifyOnExternalFileIngested(cfd, ingestion_jobs[i]);
+      }
+    }
+  }
+  return status;
+}
+
+Status DBImpl::CreateColumnFamilyWithImport(
+    const ColumnFamilyOptions& options, const std::string& column_family_name,
+    const ImportColumnFamilyOptions& import_options,
+    const ExportImportFilesMetaData& metadata, ColumnFamilyHandle** handle) {
+  assert(handle != nullptr);
+  assert(*handle == nullptr);
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  std::string cf_comparator_name = options.comparator->Name();
+  if (cf_comparator_name != metadata.db_comparator_name) {
+    return Status::InvalidArgument("Comparator name mismatch");
+  }
+
+  // Create column family.
+  auto status = CreateColumnFamily(options, column_family_name, handle);
+  if (!status.ok()) {
+    return status;
+  }
+
+  // Import sst files from metadata.
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(*handle);
+  auto cfd = cfh->cfd();
+  ImportColumnFamilyJob import_job(versions_.get(), cfd, immutable_db_options_,
+                                   file_options_, import_options,
+                                   metadata.files, io_tracer_);
+
+  SuperVersionContext dummy_sv_ctx(/* create_superversion */ true);
+  VersionEdit dummy_edit;
+  uint64_t next_file_number = 0;
+  std::unique_ptr<std::list<uint64_t>::iterator> pending_output_elem;
+  {
+    // Lock db mutex
+    InstrumentedMutexLock l(&mutex_);
+    if (error_handler_.IsDBStopped()) {
+      // Don't import files when there is a bg_error
+      status = error_handler_.GetBGError();
+    }
+
+    // Make sure that bg cleanup wont delete the files that we are importing
+    pending_output_elem.reset(new std::list<uint64_t>::iterator(
+        CaptureCurrentFileNumberInPendingOutputs()));
+
+    if (status.ok()) {
+      // If crash happen after a hard link established, Recover function may
+      // reuse the file number that has already assigned to the internal file,
+      // and this will overwrite the external file. To protect the external
+      // file, we have to make sure the file number will never being reused.
+      next_file_number = versions_->FetchAddFileNumber(metadata.files.size());
+      auto cf_options = cfd->GetLatestMutableCFOptions();
+      status =
+          versions_->LogAndApply(cfd, *cf_options, read_options, &dummy_edit,
+                                 &mutex_, directories_.GetDbDir());
+      if (status.ok()) {
+        InstallSuperVersionAndScheduleWork(cfd, &dummy_sv_ctx, *cf_options);
+      }
+    }
+  }
+  dummy_sv_ctx.Clean();
+
+  if (status.ok()) {
+    SuperVersion* sv = cfd->GetReferencedSuperVersion(this);
+    status = import_job.Prepare(next_file_number, sv);
+    CleanupSuperVersion(sv);
+  }
+
+  if (status.ok()) {
+    SuperVersionContext sv_context(true /*create_superversion*/);
+    {
+      // Lock db mutex
+      InstrumentedMutexLock l(&mutex_);
+
+      // Stop writes to the DB by entering both write threads
+      WriteThread::Writer w;
+      write_thread_.EnterUnbatched(&w, &mutex_);
+      WriteThread::Writer nonmem_w;
+      if (two_write_queues_) {
+        nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
+      }
+
+      num_running_ingest_file_++;
+      assert(!cfd->IsDropped());
+      mutex_.AssertHeld();
+      status = import_job.Run();
+
+      // Install job edit [Mutex will be unlocked here]
+      if (status.ok()) {
+        auto cf_options = cfd->GetLatestMutableCFOptions();
+        status = versions_->LogAndApply(cfd, *cf_options, read_options,
+                                        import_job.edit(), &mutex_,
+                                        directories_.GetDbDir());
+        if (status.ok()) {
+          InstallSuperVersionAndScheduleWork(cfd, &sv_context, *cf_options);
+        }
+      }
+
+      // Resume writes to the DB
+      if (two_write_queues_) {
+        nonmem_write_thread_.ExitUnbatched(&nonmem_w);
+      }
+      write_thread_.ExitUnbatched(&w);
+
+      num_running_ingest_file_--;
+      if (num_running_ingest_file_ == 0) {
+        bg_cv_.SignalAll();
+      }
+    }
+    // mutex_ is unlocked here
+
+    sv_context.Clean();
+  }
+
+  {
+    InstrumentedMutexLock l(&mutex_);
+    ReleaseFileNumberFromPendingOutputs(pending_output_elem);
+  }
+
+  import_job.Cleanup(status);
+  if (!status.ok()) {
+    Status temp_s = DropColumnFamily(*handle);
+    if (!temp_s.ok()) {
+      ROCKS_LOG_ERROR(immutable_db_options_.info_log,
+                      "DropColumnFamily failed with error %s",
+                      temp_s.ToString().c_str());
+    }
+    // Always returns Status::OK()
+    temp_s = DestroyColumnFamilyHandle(*handle);
+    assert(temp_s.ok());
+    *handle = nullptr;
+  }
+  return status;
+}
+
+Status DBImpl::ClipColumnFamily(ColumnFamilyHandle* column_family,
+                                const Slice& begin_key, const Slice& end_key) {
+  assert(column_family);
+  Status status;
+  // Flush memtable
+  FlushOptions flush_opts;
+  flush_opts.allow_write_stall = true;
+  auto* cfd =
+      static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
+  if (immutable_db_options_.atomic_flush) {
+    status = AtomicFlushMemTables(flush_opts, FlushReason::kDeleteFiles,
+                                  {} /* provided_candidate_cfds */,
+                                  false /* entered_write_thread */);
+  } else {
+    status = FlushMemTable(cfd, flush_opts, FlushReason::kDeleteFiles,
+                           false /* entered_write_thread */);
+  }
+
+  if (status.ok()) {
+    // DeleteFilesInRanges non-overlap files except L0
+    std::vector<RangePtr> ranges;
+    ranges.push_back(RangePtr(nullptr, &begin_key));
+    ranges.push_back(RangePtr(&end_key, nullptr));
+    status = DeleteFilesInRanges(column_family, ranges.data(), ranges.size());
+  }
+
+  // DeleteRange the remaining overlapping keys
+  bool empty_after_delete = false;
+  if (status.ok()) {
+    Slice smallest_user_key, largest_user_key;
+    {
+      // Lock db mutex
+      InstrumentedMutexLock l(&mutex_);
+      cfd->current()->GetSstFilesBoundaryKeys(&smallest_user_key,
+                                              &largest_user_key);
+    }
+    // all the files has been deleted after DeleteFilesInRanges;
+    if (smallest_user_key.empty() && largest_user_key.empty()) {
+      empty_after_delete = true;
+    } else {
+      const Comparator* const ucmp = column_family->GetComparator();
+      WriteOptions wo;
+      // Delete [smallest_user_key, clip_begin_key)
+      if (ucmp->Compare(smallest_user_key, begin_key) < 0) {
+        status = DeleteRange(wo, column_family, smallest_user_key, begin_key);
+      }
+
+      if (status.ok()) {
+        // Delete [clip_end_key, largest_use_key]
+        if (ucmp->Compare(end_key, largest_user_key) < 0) {
+          status = DeleteRange(wo, column_family, end_key, largest_user_key);
+          if (status.ok()) {
+            status = Delete(wo, column_family, largest_user_key);
+          }
+        }
+      }
+    }
+  }
+
+  if (status.ok() && !empty_after_delete) {
+    // CompactRange delete all the tombstones
+    CompactRangeOptions compact_options;
+    compact_options.exclusive_manual_compaction = true;
+    compact_options.bottommost_level_compaction =
+        BottommostLevelCompaction::kForceOptimized;
+    // We could just compact the ranges [null, clip_begin_key] and
+    // [clip_end_key, null]. But due to how manual compaction calculates the
+    // last level to compact to and that range tombstones are not dropped
+    // during non-bottommost compactions, calling CompactRange() on these two
+    // ranges may not clear all range tombstones.
+    status = CompactRange(compact_options, nullptr, nullptr);
+  }
+  return status;
+}
+
+Status DBImpl::VerifyFileChecksums(const ReadOptions& read_options) {
+  return VerifyChecksumInternal(read_options, /*use_file_checksum=*/true);
+}
+
+Status DBImpl::VerifyChecksum(const ReadOptions& read_options) {
+  return VerifyChecksumInternal(read_options, /*use_file_checksum=*/false);
+}
+
+Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
+                                      bool use_file_checksum) {
+  // `bytes_read` stat is enabled based on compile-time support and cannot
+  // be dynamically toggled. So we do not need to worry about `PerfLevel`
+  // here, unlike many other `IOStatsContext` / `PerfContext` stats.
+  uint64_t prev_bytes_read = IOSTATS(bytes_read);
+
+  Status s;
+
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    s = Status::InvalidArgument(
+        "Cannot verify file checksum with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+    return s;
+  }
+  if (use_file_checksum) {
+    FileChecksumGenFactory* const file_checksum_gen_factory =
+        immutable_db_options_.file_checksum_gen_factory.get();
+    if (!file_checksum_gen_factory) {
+      s = Status::InvalidArgument(
+          "Cannot verify file checksum if options.file_checksum_gen_factory is "
+          "null");
+      return s;
+    }
+  }
+
+  // TODO: simplify using GetRefedColumnFamilySet?
+  std::vector<ColumnFamilyData*> cfd_list;
+  {
+    InstrumentedMutexLock l(&mutex_);
+    for (auto cfd : *versions_->GetColumnFamilySet()) {
+      if (!cfd->IsDropped() && cfd->initialized()) {
+        cfd->Ref();
+        cfd_list.push_back(cfd);
+      }
+    }
+  }
+  std::vector<SuperVersion*> sv_list;
+  for (auto cfd : cfd_list) {
+    sv_list.push_back(cfd->GetReferencedSuperVersion(this));
+  }
+
+  for (auto& sv : sv_list) {
+    VersionStorageInfo* vstorage = sv->current->storage_info();
+    ColumnFamilyData* cfd = sv->current->cfd();
+    Options opts;
+    if (!use_file_checksum) {
+      InstrumentedMutexLock l(&mutex_);
+      opts = Options(BuildDBOptions(immutable_db_options_, mutable_db_options_),
+                     cfd->GetLatestCFOptions());
+    }
+    for (int i = 0; i < vstorage->num_non_empty_levels() && s.ok(); i++) {
+      for (size_t j = 0; j < vstorage->LevelFilesBrief(i).num_files && s.ok();
+           j++) {
+        const auto& fd_with_krange = vstorage->LevelFilesBrief(i).files[j];
+        const auto& fd = fd_with_krange.fd;
+        const FileMetaData* fmeta = fd_with_krange.file_metadata;
+        assert(fmeta);
+        std::string fname = TableFileName(cfd->ioptions()->cf_paths,
+                                          fd.GetNumber(), fd.GetPathId());
+        if (use_file_checksum) {
+          s = VerifyFullFileChecksum(fmeta->file_checksum,
+                                     fmeta->file_checksum_func_name, fname,
+                                     read_options);
+        } else {
+          s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(
+              opts, file_options_, read_options, fname, fd.largest_seqno);
+        }
+        RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
+                   IOSTATS(bytes_read) - prev_bytes_read);
+        prev_bytes_read = IOSTATS(bytes_read);
+      }
+    }
+
+    if (s.ok() && use_file_checksum) {
+      const auto& blob_files = vstorage->GetBlobFiles();
+      for (const auto& meta : blob_files) {
+        assert(meta);
+
+        const uint64_t blob_file_number = meta->GetBlobFileNumber();
+
+        const std::string blob_file_name = BlobFileName(
+            cfd->ioptions()->cf_paths.front().path, blob_file_number);
+        s = VerifyFullFileChecksum(meta->GetChecksumValue(),
+                                   meta->GetChecksumMethod(), blob_file_name,
+                                   read_options);
+        RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
+                   IOSTATS(bytes_read) - prev_bytes_read);
+        prev_bytes_read = IOSTATS(bytes_read);
+        if (!s.ok()) {
+          break;
+        }
+      }
+    }
+    if (!s.ok()) {
+      break;
+    }
+  }
+
+  bool defer_purge = immutable_db_options().avoid_unnecessary_blocking_io;
+  {
+    InstrumentedMutexLock l(&mutex_);
+    for (auto sv : sv_list) {
+      if (sv && sv->Unref()) {
+        sv->Cleanup();
+        if (defer_purge) {
+          AddSuperVersionsToFreeQueue(sv);
+        } else {
+          delete sv;
+        }
+      }
+    }
+    if (defer_purge) {
+      SchedulePurge();
+    }
+    for (auto cfd : cfd_list) {
+      cfd->UnrefAndTryDelete();
+    }
+  }
+  RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
+             IOSTATS(bytes_read) - prev_bytes_read);
+  return s;
+}
+
+Status DBImpl::VerifyFullFileChecksum(const std::string& file_checksum_expected,
+                                      const std::string& func_name_expected,
+                                      const std::string& fname,
+                                      const ReadOptions& read_options) {
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call VerifyChecksum with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+
+  Status s;
+  if (file_checksum_expected == kUnknownFileChecksum) {
+    return s;
+  }
+  std::string file_checksum;
+  std::string func_name;
+  s = ROCKSDB_NAMESPACE::GenerateOneFileChecksum(
+      fs_.get(), fname, immutable_db_options_.file_checksum_gen_factory.get(),
+      func_name_expected, &file_checksum, &func_name,
+      read_options.readahead_size, immutable_db_options_.allow_mmap_reads,
+      io_tracer_, immutable_db_options_.rate_limiter.get(),
+      read_options.rate_limiter_priority);
+  if (s.ok()) {
+    assert(func_name_expected == func_name);
+    if (file_checksum != file_checksum_expected) {
+      std::ostringstream oss;
+      oss << fname << " file checksum mismatch, ";
+      oss << "expecting "
+          << Slice(file_checksum_expected).ToString(/*hex=*/true);
+      oss << ", but actual " << Slice(file_checksum).ToString(/*hex=*/true);
+      s = Status::Corruption(oss.str());
+      TEST_SYNC_POINT_CALLBACK("DBImpl::VerifyFullFileChecksum:mismatch", &s);
+    }
+  }
+  return s;
+}
+
+void DBImpl::NotifyOnExternalFileIngested(
+    ColumnFamilyData* cfd, const ExternalSstFileIngestionJob& ingestion_job) {
+  if (immutable_db_options_.listeners.empty()) {
+    return;
+  }
+
+  for (const IngestedFileInfo& f : ingestion_job.files_to_ingest()) {
+    ExternalFileIngestionInfo info;
+    info.cf_name = cfd->GetName();
+    info.external_file_path = f.external_file_path;
+    info.internal_file_path = f.internal_file_path;
+    info.global_seqno = f.assigned_seqno;
+    info.table_properties = f.table_properties;
+    for (auto listener : immutable_db_options_.listeners) {
+      listener->OnExternalFileIngested(this, info);
+    }
+  }
+}
+
+Status DBImpl::StartTrace(const TraceOptions& trace_options,
+                          std::unique_ptr<TraceWriter>&& trace_writer) {
+  InstrumentedMutexLock lock(&trace_mutex_);
+  tracer_.reset(new Tracer(immutable_db_options_.clock, trace_options,
+                           std::move(trace_writer)));
+  return Status::OK();
+}
+
+Status DBImpl::EndTrace() {
+  InstrumentedMutexLock lock(&trace_mutex_);
+  Status s;
+  if (tracer_ != nullptr) {
+    s = tracer_->Close();
+    tracer_.reset();
+  } else {
+    s = Status::IOError("No trace file to close");
+  }
+  return s;
+}
+
+Status DBImpl::NewDefaultReplayer(
+    const std::vector<ColumnFamilyHandle*>& handles,
+    std::unique_ptr<TraceReader>&& reader,
+    std::unique_ptr<Replayer>* replayer) {
+  replayer->reset(new ReplayerImpl(this, handles, std::move(reader)));
+  return Status::OK();
+}
+
+Status DBImpl::StartBlockCacheTrace(
+    const TraceOptions& trace_options,
+    std::unique_ptr<TraceWriter>&& trace_writer) {
+  BlockCacheTraceOptions block_trace_opts;
+  block_trace_opts.sampling_frequency = trace_options.sampling_frequency;
+
+  BlockCacheTraceWriterOptions trace_writer_opt;
+  trace_writer_opt.max_trace_file_size = trace_options.max_trace_file_size;
+
+  std::unique_ptr<BlockCacheTraceWriter> block_cache_trace_writer =
+      NewBlockCacheTraceWriter(env_->GetSystemClock().get(), trace_writer_opt,
+                               std::move(trace_writer));
+
+  return block_cache_tracer_.StartTrace(block_trace_opts,
+                                        std::move(block_cache_trace_writer));
+}
+
+Status DBImpl::StartBlockCacheTrace(
+    const BlockCacheTraceOptions& trace_options,
+    std::unique_ptr<BlockCacheTraceWriter>&& trace_writer) {
+  return block_cache_tracer_.StartTrace(trace_options, std::move(trace_writer));
+}
+
+Status DBImpl::EndBlockCacheTrace() {
+  block_cache_tracer_.EndTrace();
+  return Status::OK();
+}
+
+Status DBImpl::TraceIteratorSeek(const uint32_t& cf_id, const Slice& key,
+                                 const Slice& lower_bound,
+                                 const Slice upper_bound) {
+  Status s;
+  if (tracer_) {
+    InstrumentedMutexLock lock(&trace_mutex_);
+    if (tracer_) {
+      s = tracer_->IteratorSeek(cf_id, key, lower_bound, upper_bound);
+    }
+  }
+  return s;
+}
+
+Status DBImpl::TraceIteratorSeekForPrev(const uint32_t& cf_id, const Slice& key,
+                                        const Slice& lower_bound,
+                                        const Slice upper_bound) {
+  Status s;
+  if (tracer_) {
+    InstrumentedMutexLock lock(&trace_mutex_);
+    if (tracer_) {
+      s = tracer_->IteratorSeekForPrev(cf_id, key, lower_bound, upper_bound);
+    }
+  }
+  return s;
+}
+
+Status DBImpl::ReserveFileNumbersBeforeIngestion(
+    ColumnFamilyData* cfd, uint64_t num,
+    std::unique_ptr<std::list<uint64_t>::iterator>& pending_output_elem,
+    uint64_t* next_file_number) {
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  Status s;
+  SuperVersionContext dummy_sv_ctx(true /* create_superversion */);
+  assert(nullptr != next_file_number);
+  InstrumentedMutexLock l(&mutex_);
+  if (error_handler_.IsDBStopped()) {
+    // Do not ingest files when there is a bg_error
+    return error_handler_.GetBGError();
+  }
+  pending_output_elem.reset(new std::list<uint64_t>::iterator(
+      CaptureCurrentFileNumberInPendingOutputs()));
+  *next_file_number = versions_->FetchAddFileNumber(static_cast<uint64_t>(num));
+  auto cf_options = cfd->GetLatestMutableCFOptions();
+  VersionEdit dummy_edit;
+  // If crash happen after a hard link established, Recover function may
+  // reuse the file number that has already assigned to the internal file,
+  // and this will overwrite the external file. To protect the external
+  // file, we have to make sure the file number will never being reused.
+  s = versions_->LogAndApply(cfd, *cf_options, read_options, &dummy_edit,
+                             &mutex_, directories_.GetDbDir());
+  if (s.ok()) {
+    InstallSuperVersionAndScheduleWork(cfd, &dummy_sv_ctx, *cf_options);
+  }
+  dummy_sv_ctx.Clean();
+  return s;
+}
+
+Status DBImpl::GetCreationTimeOfOldestFile(uint64_t* creation_time) {
+  if (mutable_db_options_.max_open_files == -1) {
+    uint64_t oldest_time = std::numeric_limits<uint64_t>::max();
+    for (auto cfd : *versions_->GetColumnFamilySet()) {
+      if (!cfd->IsDropped()) {
+        uint64_t ctime;
+        {
+          SuperVersion* sv = GetAndRefSuperVersion(cfd);
+          Version* version = sv->current;
+          version->GetCreationTimeOfOldestFile(&ctime);
+          ReturnAndCleanupSuperVersion(cfd, sv);
+        }
+
+        if (ctime < oldest_time) {
+          oldest_time = ctime;
+        }
+        if (oldest_time == 0) {
+          break;
+        }
+      }
+    }
+    *creation_time = oldest_time;
+    return Status::OK();
+  } else {
+    return Status::NotSupported("This API only works if max_open_files = -1");
+  }
+}
+
+void DBImpl::RecordSeqnoToTimeMapping() {
+  // Get time first then sequence number, so the actual time of seqno is <=
+  // unix_time recorded
+  int64_t unix_time = 0;
+  immutable_db_options_.clock->GetCurrentTime(&unix_time)
+      .PermitUncheckedError();  // Ignore error
+  SequenceNumber seqno = GetLatestSequenceNumber();
+  bool appended = false;
+  {
+    InstrumentedMutexLock l(&mutex_);
+    appended = seqno_time_mapping_.Append(seqno, unix_time);
+  }
+  if (!appended) {
+    ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                   "Failed to insert sequence number to time entry: %" PRIu64
+                   " -> %" PRIu64,
+                   seqno, unix_time);
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl.h
new file mode 100644
index 0000000000..f2da7467db
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl.h
@@ -0,0 +1,2841 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <atomic>
+#include <deque>
+#include <functional>
+#include <limits>
+#include <list>
+#include <map>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "db/column_family.h"
+#include "db/compaction/compaction_iterator.h"
+#include "db/compaction/compaction_job.h"
+#include "db/error_handler.h"
+#include "db/event_helpers.h"
+#include "db/external_sst_file_ingestion_job.h"
+#include "db/flush_job.h"
+#include "db/flush_scheduler.h"
+#include "db/import_column_family_job.h"
+#include "db/internal_stats.h"
+#include "db/log_writer.h"
+#include "db/logs_with_prep_tracker.h"
+#include "db/memtable_list.h"
+#include "db/periodic_task_scheduler.h"
+#include "db/post_memtable_callback.h"
+#include "db/pre_release_callback.h"
+#include "db/range_del_aggregator.h"
+#include "db/read_callback.h"
+#include "db/seqno_to_time_mapping.h"
+#include "db/snapshot_checker.h"
+#include "db/snapshot_impl.h"
+#include "db/trim_history_scheduler.h"
+#include "db/version_edit.h"
+#include "db/wal_manager.h"
+#include "db/write_controller.h"
+#include "db/write_thread.h"
+#include "logging/event_logger.h"
+#include "monitoring/instrumented_mutex.h"
+#include "options/db_options.h"
+#include "port/port.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/status.h"
+#include "rocksdb/trace_reader_writer.h"
+#include "rocksdb/transaction_log.h"
+#include "rocksdb/utilities/replayer.h"
+#include "rocksdb/write_buffer_manager.h"
+#include "table/merging_iterator.h"
+#include "table/scoped_arena_iterator.h"
+#include "util/autovector.h"
+#include "util/hash.h"
+#include "util/repeatable_thread.h"
+#include "util/stop_watch.h"
+#include "util/thread_local.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Arena;
+class ArenaWrappedDBIter;
+class InMemoryStatsHistoryIterator;
+class MemTable;
+class PersistentStatsHistoryIterator;
+class TableCache;
+class TaskLimiterToken;
+class Version;
+class VersionEdit;
+class VersionSet;
+class WriteCallback;
+struct JobContext;
+struct ExternalSstFileInfo;
+struct MemTableInfo;
+
+// Class to maintain directories for all database paths other than main one.
+class Directories {
+ public:
+  IOStatus SetDirectories(FileSystem* fs, const std::string& dbname,
+                          const std::string& wal_dir,
+                          const std::vector<DbPath>& data_paths);
+
+  FSDirectory* GetDataDir(size_t path_id) const {
+    assert(path_id < data_dirs_.size());
+    FSDirectory* ret_dir = data_dirs_[path_id].get();
+    if (ret_dir == nullptr) {
+      // Should use db_dir_
+      return db_dir_.get();
+    }
+    return ret_dir;
+  }
+
+  FSDirectory* GetWalDir() {
+    if (wal_dir_) {
+      return wal_dir_.get();
+    }
+    return db_dir_.get();
+  }
+
+  FSDirectory* GetDbDir() { return db_dir_.get(); }
+
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) {
+    // close all directories for all database paths
+    IOStatus s = IOStatus::OK();
+
+    // The default implementation for Close() in Directory/FSDirectory class
+    // "NotSupported" status, the upper level interface should be able to
+    // handle this error so that Close() does not fail after upgrading when
+    // run on FileSystems that have not implemented `Directory::Close()` or
+    // `FSDirectory::Close()` yet
+
+    if (db_dir_) {
+      IOStatus temp_s = db_dir_->Close(options, dbg);
+      if (!temp_s.ok() && !temp_s.IsNotSupported() && s.ok()) {
+        s = std::move(temp_s);
+      }
+    }
+
+    // Attempt to close everything even if one fails
+    s.PermitUncheckedError();
+
+    if (wal_dir_) {
+      IOStatus temp_s = wal_dir_->Close(options, dbg);
+      if (!temp_s.ok() && !temp_s.IsNotSupported() && s.ok()) {
+        s = std::move(temp_s);
+      }
+    }
+
+    s.PermitUncheckedError();
+
+    for (auto& data_dir_ptr : data_dirs_) {
+      if (data_dir_ptr) {
+        IOStatus temp_s = data_dir_ptr->Close(options, dbg);
+        if (!temp_s.ok() && !temp_s.IsNotSupported() && s.ok()) {
+          s = std::move(temp_s);
+        }
+      }
+    }
+
+    // Ready for caller
+    s.MustCheck();
+    return s;
+  }
+
+ private:
+  std::unique_ptr<FSDirectory> db_dir_;
+  std::vector<std::unique_ptr<FSDirectory>> data_dirs_;
+  std::unique_ptr<FSDirectory> wal_dir_;
+};
+
+// While DB is the public interface of RocksDB, and DBImpl is the actual
+// class implementing it. It's the entrance of the core RocksdB engine.
+// All other DB implementations, e.g. TransactionDB, BlobDB, etc, wrap a
+// DBImpl internally.
+// Other than functions implementing the DB interface, some public
+// functions are there for other internal components to call. For
+// example, TransactionDB directly calls DBImpl::WriteImpl() and
+// BlobDB directly calls DBImpl::GetImpl(). Some other functions
+// are for sub-components to call. For example, ColumnFamilyHandleImpl
+// calls DBImpl::FindObsoleteFiles().
+//
+// Since it's a very large class, the definition of the functions is
+// divided in several db_impl_*.cc files, besides db_impl.cc.
+class DBImpl : public DB {
+ public:
+  DBImpl(const DBOptions& options, const std::string& dbname,
+         const bool seq_per_batch = false, const bool batch_per_txn = true,
+         bool read_only = false);
+  // No copying allowed
+  DBImpl(const DBImpl&) = delete;
+  void operator=(const DBImpl&) = delete;
+
+  virtual ~DBImpl();
+
+  // ---- Implementations of the DB interface ----
+
+  using DB::Resume;
+  Status Resume() override;
+
+  using DB::Put;
+  Status Put(const WriteOptions& options, ColumnFamilyHandle* column_family,
+             const Slice& key, const Slice& value) override;
+  Status Put(const WriteOptions& options, ColumnFamilyHandle* column_family,
+             const Slice& key, const Slice& ts, const Slice& value) override;
+
+  using DB::PutEntity;
+  Status PutEntity(const WriteOptions& options,
+                   ColumnFamilyHandle* column_family, const Slice& key,
+                   const WideColumns& columns) override;
+
+  using DB::Merge;
+  Status Merge(const WriteOptions& options, ColumnFamilyHandle* column_family,
+               const Slice& key, const Slice& value) override;
+  Status Merge(const WriteOptions& options, ColumnFamilyHandle* column_family,
+               const Slice& key, const Slice& ts, const Slice& value) override;
+
+  using DB::Delete;
+  Status Delete(const WriteOptions& options, ColumnFamilyHandle* column_family,
+                const Slice& key) override;
+  Status Delete(const WriteOptions& options, ColumnFamilyHandle* column_family,
+                const Slice& key, const Slice& ts) override;
+
+  using DB::SingleDelete;
+  Status SingleDelete(const WriteOptions& options,
+                      ColumnFamilyHandle* column_family,
+                      const Slice& key) override;
+  Status SingleDelete(const WriteOptions& options,
+                      ColumnFamilyHandle* column_family, const Slice& key,
+                      const Slice& ts) override;
+
+  using DB::DeleteRange;
+  Status DeleteRange(const WriteOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& begin_key,
+                     const Slice& end_key) override;
+  Status DeleteRange(const WriteOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& begin_key,
+                     const Slice& end_key, const Slice& ts) override;
+
+  using DB::Write;
+  virtual Status Write(const WriteOptions& options,
+                       WriteBatch* updates) override;
+
+  using DB::Get;
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     PinnableSlice* value) override;
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     PinnableSlice* value, std::string* timestamp) override;
+
+  using DB::GetEntity;
+  Status GetEntity(const ReadOptions& options,
+                   ColumnFamilyHandle* column_family, const Slice& key,
+                   PinnableWideColumns* columns) override;
+
+  using DB::GetMergeOperands;
+  Status GetMergeOperands(const ReadOptions& options,
+                          ColumnFamilyHandle* column_family, const Slice& key,
+                          PinnableSlice* merge_operands,
+                          GetMergeOperandsOptions* get_merge_operands_options,
+                          int* number_of_operands) override {
+    GetImplOptions get_impl_options;
+    get_impl_options.column_family = column_family;
+    get_impl_options.merge_operands = merge_operands;
+    get_impl_options.get_merge_operands_options = get_merge_operands_options;
+    get_impl_options.number_of_operands = number_of_operands;
+    get_impl_options.get_value = false;
+    return GetImpl(options, key, get_impl_options);
+  }
+
+  using DB::MultiGet;
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_family,
+      const std::vector<Slice>& keys,
+      std::vector<std::string>* values) override;
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_family,
+      const std::vector<Slice>& keys, std::vector<std::string>* values,
+      std::vector<std::string>* timestamps) override;
+
+  // This MultiGet is a batched version, which may be faster than calling Get
+  // multiple times, especially if the keys have some spatial locality that
+  // enables them to be queried in the same SST files/set of files. The larger
+  // the batch size, the more scope for batching and performance improvement
+  // The values and statuses parameters are arrays with number of elements
+  // equal to keys.size(). This allows the storage for those to be alloacted
+  // by the caller on the stack for small batches
+  void MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family,
+                const size_t num_keys, const Slice* keys, PinnableSlice* values,
+                Status* statuses, const bool sorted_input = false) override;
+  void MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family,
+                const size_t num_keys, const Slice* keys, PinnableSlice* values,
+                std::string* timestamps, Status* statuses,
+                const bool sorted_input = false) override;
+
+  void MultiGet(const ReadOptions& options, const size_t num_keys,
+                ColumnFamilyHandle** column_families, const Slice* keys,
+                PinnableSlice* values, Status* statuses,
+                const bool sorted_input = false) override;
+  void MultiGet(const ReadOptions& options, const size_t num_keys,
+                ColumnFamilyHandle** column_families, const Slice* keys,
+                PinnableSlice* values, std::string* timestamps,
+                Status* statuses, const bool sorted_input = false) override;
+
+  void MultiGetWithCallback(
+      const ReadOptions& options, ColumnFamilyHandle* column_family,
+      ReadCallback* callback,
+      autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys);
+
+  using DB::MultiGetEntity;
+
+  void MultiGetEntity(const ReadOptions& options,
+                      ColumnFamilyHandle* column_family, size_t num_keys,
+                      const Slice* keys, PinnableWideColumns* results,
+                      Status* statuses, bool sorted_input) override;
+
+  void MultiGetEntity(const ReadOptions& options, size_t num_keys,
+                      ColumnFamilyHandle** column_families, const Slice* keys,
+                      PinnableWideColumns* results, Status* statuses,
+                      bool sorted_input) override;
+
+  virtual Status CreateColumnFamily(const ColumnFamilyOptions& cf_options,
+                                    const std::string& column_family,
+                                    ColumnFamilyHandle** handle) override;
+  virtual Status CreateColumnFamilies(
+      const ColumnFamilyOptions& cf_options,
+      const std::vector<std::string>& column_family_names,
+      std::vector<ColumnFamilyHandle*>* handles) override;
+  virtual Status CreateColumnFamilies(
+      const std::vector<ColumnFamilyDescriptor>& column_families,
+      std::vector<ColumnFamilyHandle*>* handles) override;
+  virtual Status DropColumnFamily(ColumnFamilyHandle* column_family) override;
+  virtual Status DropColumnFamilies(
+      const std::vector<ColumnFamilyHandle*>& column_families) override;
+
+  // Returns false if key doesn't exist in the database and true if it may.
+  // If value_found is not passed in as null, then return the value if found in
+  // memory. On return, if value was found, then value_found will be set to true
+  // , otherwise false.
+  using DB::KeyMayExist;
+  virtual bool KeyMayExist(const ReadOptions& options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           std::string* value, std::string* timestamp,
+                           bool* value_found = nullptr) override;
+
+  using DB::NewIterator;
+  virtual Iterator* NewIterator(const ReadOptions& options,
+                                ColumnFamilyHandle* column_family) override;
+  virtual Status NewIterators(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_families,
+      std::vector<Iterator*>* iterators) override;
+
+  virtual const Snapshot* GetSnapshot() override;
+  virtual void ReleaseSnapshot(const Snapshot* snapshot) override;
+  // Create a timestamped snapshot. This snapshot can be shared by multiple
+  // readers. If any of them uses it for write conflict checking, then
+  // is_write_conflict_boundary is true. For simplicity, set it to true by
+  // default.
+  std::pair<Status, std::shared_ptr<const Snapshot>> CreateTimestampedSnapshot(
+      SequenceNumber snapshot_seq, uint64_t ts);
+  std::shared_ptr<const SnapshotImpl> GetTimestampedSnapshot(uint64_t ts) const;
+  void ReleaseTimestampedSnapshotsOlderThan(
+      uint64_t ts, size_t* remaining_total_ss = nullptr);
+  Status GetTimestampedSnapshots(uint64_t ts_lb, uint64_t ts_ub,
+                                 std::vector<std::shared_ptr<const Snapshot>>&
+                                     timestamped_snapshots) const;
+
+  using DB::GetProperty;
+  virtual bool GetProperty(ColumnFamilyHandle* column_family,
+                           const Slice& property, std::string* value) override;
+  using DB::GetMapProperty;
+  virtual bool GetMapProperty(
+      ColumnFamilyHandle* column_family, const Slice& property,
+      std::map<std::string, std::string>* value) override;
+  using DB::GetIntProperty;
+  virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
+                              const Slice& property, uint64_t* value) override;
+  using DB::GetAggregatedIntProperty;
+  virtual bool GetAggregatedIntProperty(const Slice& property,
+                                        uint64_t* aggregated_value) override;
+  using DB::GetApproximateSizes;
+  virtual Status GetApproximateSizes(const SizeApproximationOptions& options,
+                                     ColumnFamilyHandle* column_family,
+                                     const Range* range, int n,
+                                     uint64_t* sizes) override;
+  using DB::GetApproximateMemTableStats;
+  virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
+                                           const Range& range,
+                                           uint64_t* const count,
+                                           uint64_t* const size) override;
+  using DB::CompactRange;
+  virtual Status CompactRange(const CompactRangeOptions& options,
+                              ColumnFamilyHandle* column_family,
+                              const Slice* begin, const Slice* end) override;
+
+  using DB::CompactFiles;
+  virtual Status CompactFiles(
+      const CompactionOptions& compact_options,
+      ColumnFamilyHandle* column_family,
+      const std::vector<std::string>& input_file_names, const int output_level,
+      const int output_path_id = -1,
+      std::vector<std::string>* const output_file_names = nullptr,
+      CompactionJobInfo* compaction_job_info = nullptr) override;
+
+  virtual Status PauseBackgroundWork() override;
+  virtual Status ContinueBackgroundWork() override;
+
+  virtual Status EnableAutoCompaction(
+      const std::vector<ColumnFamilyHandle*>& column_family_handles) override;
+
+  virtual void EnableManualCompaction() override;
+  virtual void DisableManualCompaction() override;
+
+  using DB::SetOptions;
+  Status SetOptions(
+      ColumnFamilyHandle* column_family,
+      const std::unordered_map<std::string, std::string>& options_map) override;
+
+  virtual Status SetDBOptions(
+      const std::unordered_map<std::string, std::string>& options_map) override;
+
+  using DB::NumberLevels;
+  virtual int NumberLevels(ColumnFamilyHandle* column_family) override;
+  using DB::MaxMemCompactionLevel;
+  virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) override;
+  using DB::Level0StopWriteTrigger;
+  virtual int Level0StopWriteTrigger(
+      ColumnFamilyHandle* column_family) override;
+  virtual const std::string& GetName() const override;
+  virtual Env* GetEnv() const override;
+  virtual FileSystem* GetFileSystem() const override;
+  using DB::GetOptions;
+  virtual Options GetOptions(ColumnFamilyHandle* column_family) const override;
+  using DB::GetDBOptions;
+  virtual DBOptions GetDBOptions() const override;
+  using DB::Flush;
+  virtual Status Flush(const FlushOptions& options,
+                       ColumnFamilyHandle* column_family) override;
+  virtual Status Flush(
+      const FlushOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_families) override;
+  virtual Status FlushWAL(bool sync) override;
+  bool WALBufferIsEmpty();
+  virtual Status SyncWAL() override;
+  virtual Status LockWAL() override;
+  virtual Status UnlockWAL() override;
+
+  virtual SequenceNumber GetLatestSequenceNumber() const override;
+
+  // IncreaseFullHistoryTsLow(ColumnFamilyHandle*, std::string) will acquire
+  // and release db_mutex
+  Status IncreaseFullHistoryTsLow(ColumnFamilyHandle* column_family,
+                                  std::string ts_low) override;
+
+  // GetFullHistoryTsLow(ColumnFamilyHandle*, std::string*) will acquire and
+  // release db_mutex
+  Status GetFullHistoryTsLow(ColumnFamilyHandle* column_family,
+                             std::string* ts_low) override;
+
+  virtual Status GetDbIdentity(std::string& identity) const override;
+
+  virtual Status GetDbIdentityFromIdentityFile(std::string* identity) const;
+
+  virtual Status GetDbSessionId(std::string& session_id) const override;
+
+  ColumnFamilyHandle* DefaultColumnFamily() const override;
+
+  ColumnFamilyHandle* PersistentStatsColumnFamily() const;
+
+  virtual Status Close() override;
+
+  virtual Status DisableFileDeletions() override;
+
+  virtual Status EnableFileDeletions(bool force) override;
+
+  virtual bool IsFileDeletionsEnabled() const;
+
+  Status GetStatsHistory(
+      uint64_t start_time, uint64_t end_time,
+      std::unique_ptr<StatsHistoryIterator>* stats_iterator) override;
+
+  using DB::ResetStats;
+  virtual Status ResetStats() override;
+  // All the returned filenames start with "/"
+  virtual Status GetLiveFiles(std::vector<std::string>&,
+                              uint64_t* manifest_file_size,
+                              bool flush_memtable = true) override;
+  virtual Status GetSortedWalFiles(VectorLogPtr& files) override;
+  virtual Status GetCurrentWalFile(
+      std::unique_ptr<LogFile>* current_log_file) override;
+  virtual Status GetCreationTimeOfOldestFile(
+      uint64_t* creation_time) override;
+
+  virtual Status GetUpdatesSince(
+      SequenceNumber seq_number, std::unique_ptr<TransactionLogIterator>* iter,
+      const TransactionLogIterator::ReadOptions& read_options =
+          TransactionLogIterator::ReadOptions()) override;
+  virtual Status DeleteFile(std::string name) override;
+  Status DeleteFilesInRanges(ColumnFamilyHandle* column_family,
+                             const RangePtr* ranges, size_t n,
+                             bool include_end = true);
+
+  virtual void GetLiveFilesMetaData(
+      std::vector<LiveFileMetaData>* metadata) override;
+
+  virtual Status GetLiveFilesChecksumInfo(
+      FileChecksumList* checksum_list) override;
+
+  virtual Status GetLiveFilesStorageInfo(
+      const LiveFilesStorageInfoOptions& opts,
+      std::vector<LiveFileStorageInfo>* files) override;
+
+  // Obtains the meta data of the specified column family of the DB.
+  // TODO(yhchiang): output parameter is placed in the end in this codebase.
+  virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* column_family,
+                                       ColumnFamilyMetaData* metadata) override;
+
+  void GetAllColumnFamilyMetaData(
+      std::vector<ColumnFamilyMetaData>* metadata) override;
+
+  Status SuggestCompactRange(ColumnFamilyHandle* column_family,
+                             const Slice* begin, const Slice* end) override;
+
+  Status PromoteL0(ColumnFamilyHandle* column_family,
+                   int target_level) override;
+
+  using DB::IngestExternalFile;
+  virtual Status IngestExternalFile(
+      ColumnFamilyHandle* column_family,
+      const std::vector<std::string>& external_files,
+      const IngestExternalFileOptions& ingestion_options) override;
+
+  using DB::IngestExternalFiles;
+  virtual Status IngestExternalFiles(
+      const std::vector<IngestExternalFileArg>& args) override;
+
+  using DB::CreateColumnFamilyWithImport;
+  virtual Status CreateColumnFamilyWithImport(
+      const ColumnFamilyOptions& options, const std::string& column_family_name,
+      const ImportColumnFamilyOptions& import_options,
+      const ExportImportFilesMetaData& metadata,
+      ColumnFamilyHandle** handle) override;
+
+  using DB::ClipColumnFamily;
+  virtual Status ClipColumnFamily(ColumnFamilyHandle* column_family,
+                                  const Slice& begin_key,
+                                  const Slice& end_key) override;
+
+  using DB::VerifyFileChecksums;
+  Status VerifyFileChecksums(const ReadOptions& read_options) override;
+
+  using DB::VerifyChecksum;
+  virtual Status VerifyChecksum(const ReadOptions& /*read_options*/) override;
+  // Verify the checksums of files in db. Currently only tables are checked.
+  //
+  // read_options: controls file I/O behavior, e.g. read ahead size while
+  //               reading all the live table files.
+  //
+  // use_file_checksum: if false, verify the block checksums of all live table
+  //                    in db. Otherwise, obtain the file checksums and compare
+  //                    with the MANIFEST. Currently, file checksums are
+  //                    recomputed by reading all table files.
+  //
+  // Returns: OK if there is no file whose file or block checksum mismatches.
+  Status VerifyChecksumInternal(const ReadOptions& read_options,
+                                bool use_file_checksum);
+
+  Status VerifyFullFileChecksum(const std::string& file_checksum_expected,
+                                const std::string& func_name_expected,
+                                const std::string& fpath,
+                                const ReadOptions& read_options);
+
+  using DB::StartTrace;
+  virtual Status StartTrace(
+      const TraceOptions& options,
+      std::unique_ptr<TraceWriter>&& trace_writer) override;
+
+  using DB::EndTrace;
+  virtual Status EndTrace() override;
+
+  using DB::NewDefaultReplayer;
+  virtual Status NewDefaultReplayer(
+      const std::vector<ColumnFamilyHandle*>& handles,
+      std::unique_ptr<TraceReader>&& reader,
+      std::unique_ptr<Replayer>* replayer) override;
+
+  using DB::StartBlockCacheTrace;
+  Status StartBlockCacheTrace(
+      const TraceOptions& trace_options,
+      std::unique_ptr<TraceWriter>&& trace_writer) override;
+
+  Status StartBlockCacheTrace(
+      const BlockCacheTraceOptions& options,
+      std::unique_ptr<BlockCacheTraceWriter>&& trace_writer) override;
+
+  using DB::EndBlockCacheTrace;
+  Status EndBlockCacheTrace() override;
+
+  using DB::StartIOTrace;
+  Status StartIOTrace(const TraceOptions& options,
+                      std::unique_ptr<TraceWriter>&& trace_writer) override;
+
+  using DB::EndIOTrace;
+  Status EndIOTrace() override;
+
+  using DB::GetPropertiesOfAllTables;
+  virtual Status GetPropertiesOfAllTables(
+      ColumnFamilyHandle* column_family,
+      TablePropertiesCollection* props) override;
+  virtual Status GetPropertiesOfTablesInRange(
+      ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
+      TablePropertiesCollection* props) override;
+
+
+  // ---- End of implementations of the DB interface ----
+  SystemClock* GetSystemClock() const;
+
+  struct GetImplOptions {
+    ColumnFamilyHandle* column_family = nullptr;
+    PinnableSlice* value = nullptr;
+    PinnableWideColumns* columns = nullptr;
+    std::string* timestamp = nullptr;
+    bool* value_found = nullptr;
+    ReadCallback* callback = nullptr;
+    bool* is_blob_index = nullptr;
+    // If true return value associated with key via value pointer else return
+    // all merge operands for key via merge_operands pointer
+    bool get_value = true;
+    // Pointer to an array of size
+    // get_merge_operands_options.expected_max_number_of_operands allocated by
+    // user
+    PinnableSlice* merge_operands = nullptr;
+    GetMergeOperandsOptions* get_merge_operands_options = nullptr;
+    int* number_of_operands = nullptr;
+  };
+
+  // Function that Get and KeyMayExist call with no_io true or false
+  // Note: 'value_found' from KeyMayExist propagates here
+  // This function is also called by GetMergeOperands
+  // If get_impl_options.get_value = true get value associated with
+  // get_impl_options.key via get_impl_options.value
+  // If get_impl_options.get_value = false get merge operands associated with
+  // get_impl_options.key via get_impl_options.merge_operands
+  Status GetImpl(const ReadOptions& options, const Slice& key,
+                 GetImplOptions& get_impl_options);
+
+  // If `snapshot` == kMaxSequenceNumber, set a recent one inside the file.
+  ArenaWrappedDBIter* NewIteratorImpl(const ReadOptions& options,
+                                      ColumnFamilyData* cfd,
+                                      SequenceNumber snapshot,
+                                      ReadCallback* read_callback,
+                                      bool expose_blob_index = false,
+                                      bool allow_refresh = true);
+
+  virtual SequenceNumber GetLastPublishedSequence() const {
+    if (last_seq_same_as_publish_seq_) {
+      return versions_->LastSequence();
+    } else {
+      return versions_->LastPublishedSequence();
+    }
+  }
+
+  // REQUIRES: joined the main write queue if two_write_queues is disabled, and
+  // the second write queue otherwise.
+  virtual void SetLastPublishedSequence(SequenceNumber seq);
+  // Returns LastSequence in last_seq_same_as_publish_seq_
+  // mode and LastAllocatedSequence otherwise. This is useful when visiblility
+  // depends also on data written to the WAL but not to the memtable.
+  SequenceNumber TEST_GetLastVisibleSequence() const;
+
+  // Similar to Write() but will call the callback once on the single write
+  // thread to determine whether it is safe to perform the write.
+  virtual Status WriteWithCallback(const WriteOptions& write_options,
+                                   WriteBatch* my_batch,
+                                   WriteCallback* callback);
+
+  // Returns the sequence number that is guaranteed to be smaller than or equal
+  // to the sequence number of any key that could be inserted into the current
+  // memtables. It can then be assumed that any write with a larger(or equal)
+  // sequence number will be present in this memtable or a later memtable.
+  //
+  // If the earliest sequence number could not be determined,
+  // kMaxSequenceNumber will be returned.
+  //
+  // If include_history=true, will also search Memtables in MemTableList
+  // History.
+  SequenceNumber GetEarliestMemTableSequenceNumber(SuperVersion* sv,
+                                                   bool include_history);
+
+  // For a given key, check to see if there are any records for this key
+  // in the memtables, including memtable history.  If cache_only is false,
+  // SST files will also be checked.
+  //
+  // `key` should NOT have user-defined timestamp appended to user key even if
+  // timestamp is enabled.
+  //
+  // If a key is found, *found_record_for_key will be set to true and
+  // *seq will be set to the stored sequence number for the latest
+  // operation on this key or kMaxSequenceNumber if unknown. If user-defined
+  // timestamp is enabled for this column family and timestamp is not nullptr,
+  // then *timestamp will be set to the stored timestamp for the latest
+  // operation on this key.
+  // If no key is found, *found_record_for_key will be set to false.
+  //
+  // Note: If cache_only=false, it is possible for *seq to be set to 0 if
+  // the sequence number has been cleared from the record.  If the caller is
+  // holding an active db snapshot, we know the missing sequence must be less
+  // than the snapshot's sequence number (sequence numbers are only cleared
+  // when there are no earlier active snapshots).
+  //
+  // If NotFound is returned and found_record_for_key is set to false, then no
+  // record for this key was found.  If the caller is holding an active db
+  // snapshot, we know that no key could have existing after this snapshot
+  // (since we do not compact keys that have an earlier snapshot).
+  //
+  // Only records newer than or at `lower_bound_seq` are guaranteed to be
+  // returned. Memtables and files may not be checked if it only contains data
+  // older than `lower_bound_seq`.
+  //
+  // Returns OK or NotFound on success,
+  // other status on unexpected error.
+  // TODO(andrewkr): this API need to be aware of range deletion operations
+  Status GetLatestSequenceForKey(SuperVersion* sv, const Slice& key,
+                                 bool cache_only,
+                                 SequenceNumber lower_bound_seq,
+                                 SequenceNumber* seq, std::string* timestamp,
+                                 bool* found_record_for_key,
+                                 bool* is_blob_index);
+
+  Status TraceIteratorSeek(const uint32_t& cf_id, const Slice& key,
+                           const Slice& lower_bound, const Slice upper_bound);
+  Status TraceIteratorSeekForPrev(const uint32_t& cf_id, const Slice& key,
+                                  const Slice& lower_bound,
+                                  const Slice upper_bound);
+
+  // Similar to GetSnapshot(), but also lets the db know that this snapshot
+  // will be used for transaction write-conflict checking.  The DB can then
+  // make sure not to compact any keys that would prevent a write-conflict from
+  // being detected.
+  const Snapshot* GetSnapshotForWriteConflictBoundary();
+
+  // checks if all live files exist on file system and that their file sizes
+  // match to our in-memory records
+  virtual Status CheckConsistency();
+
+  // max_file_num_to_ignore allows bottom level compaction to filter out newly
+  // compacted SST files. Setting max_file_num_to_ignore to kMaxUint64 will
+  // disable the filtering
+  // If `final_output_level` is not nullptr, it is set to manual compaction's
+  // output level if returned status is OK, and it may or may not be set to
+  // manual compaction's output level if returned status is not OK.
+  Status RunManualCompaction(ColumnFamilyData* cfd, int input_level,
+                             int output_level,
+                             const CompactRangeOptions& compact_range_options,
+                             const Slice* begin, const Slice* end,
+                             bool exclusive, bool disallow_trivial_move,
+                             uint64_t max_file_num_to_ignore,
+                             const std::string& trim_ts,
+                             int* final_output_level = nullptr);
+
+  // Return an internal iterator over the current state of the database.
+  // The keys of this iterator are internal keys (see format.h).
+  // The returned iterator should be deleted when no longer needed.
+  // If allow_unprepared_value is true, the returned iterator may defer reading
+  // the value and so will require PrepareValue() to be called before value();
+  // allow_unprepared_value = false is convenient when this optimization is not
+  // useful, e.g. when reading the whole column family.
+  //
+  // read_options.ignore_range_deletions determines whether range tombstones are
+  // processed in the returned interator internally, i.e., whether range
+  // tombstone covered keys are in this iterator's output.
+  // @param read_options Must outlive the returned iterator.
+  InternalIterator* NewInternalIterator(
+      const ReadOptions& read_options, Arena* arena, SequenceNumber sequence,
+      ColumnFamilyHandle* column_family = nullptr,
+      bool allow_unprepared_value = false);
+
+  // Note: to support DB iterator refresh, memtable range tombstones in the
+  // underlying merging iterator needs to be refreshed. If db_iter is not
+  // nullptr, db_iter->SetMemtableRangetombstoneIter() is called with the
+  // memtable range tombstone iterator used by the underlying merging iterator.
+  // This range tombstone iterator can be refreshed later by db_iter.
+  // @param read_options Must outlive the returned iterator.
+  InternalIterator* NewInternalIterator(const ReadOptions& read_options,
+                                        ColumnFamilyData* cfd,
+                                        SuperVersion* super_version,
+                                        Arena* arena, SequenceNumber sequence,
+                                        bool allow_unprepared_value,
+                                        ArenaWrappedDBIter* db_iter = nullptr);
+
+  LogsWithPrepTracker* logs_with_prep_tracker() {
+    return &logs_with_prep_tracker_;
+  }
+
+  struct BGJobLimits {
+    int max_flushes;
+    int max_compactions;
+  };
+  // Returns maximum background flushes and compactions allowed to be scheduled
+  BGJobLimits GetBGJobLimits() const;
+  // Need a static version that can be called during SanitizeOptions().
+  static BGJobLimits GetBGJobLimits(int max_background_flushes,
+                                    int max_background_compactions,
+                                    int max_background_jobs,
+                                    bool parallelize_compactions);
+
+  // move logs pending closing from job_context to the DB queue and
+  // schedule a purge
+  void ScheduleBgLogWriterClose(JobContext* job_context);
+
+  uint64_t MinLogNumberToKeep();
+
+  // Returns the lower bound file number for SSTs that won't be deleted, even if
+  // they're obsolete. This lower bound is used internally to prevent newly
+  // created flush/compaction output files from being deleted before they're
+  // installed. This technique avoids the need for tracking the exact numbers of
+  // files pending creation, although it prevents more files than necessary from
+  // being deleted.
+  uint64_t MinObsoleteSstNumberToKeep();
+
+  // Returns the list of live files in 'live' and the list
+  // of all files in the filesystem in 'candidate_files'.
+  // If force == false and the last call was less than
+  // db_options_.delete_obsolete_files_period_micros microseconds ago,
+  // it will not fill up the job_context
+  void FindObsoleteFiles(JobContext* job_context, bool force,
+                         bool no_full_scan = false);
+
+  // Diffs the files listed in filenames and those that do not
+  // belong to live files are possibly removed. Also, removes all the
+  // files in sst_delete_files and log_delete_files.
+  // It is not necessary to hold the mutex when invoking this method.
+  // If FindObsoleteFiles() was run, we need to also run
+  // PurgeObsoleteFiles(), even if disable_delete_obsolete_files_ is true
+  void PurgeObsoleteFiles(JobContext& background_contet,
+                          bool schedule_only = false);
+
+  // Schedule a background job to actually delete obsolete files.
+  void SchedulePurge();
+
+  const SnapshotList& snapshots() const { return snapshots_; }
+
+  // load list of snapshots to `snap_vector` that is no newer than `max_seq`
+  // in ascending order.
+  // `oldest_write_conflict_snapshot` is filled with the oldest snapshot
+  // which satisfies SnapshotImpl.is_write_conflict_boundary_ = true.
+  void LoadSnapshots(std::vector<SequenceNumber>* snap_vector,
+                     SequenceNumber* oldest_write_conflict_snapshot,
+                     const SequenceNumber& max_seq) const {
+    InstrumentedMutexLock l(mutex());
+    snapshots().GetAll(snap_vector, oldest_write_conflict_snapshot, max_seq);
+  }
+
+  const ImmutableDBOptions& immutable_db_options() const {
+    return immutable_db_options_;
+  }
+
+  // Cancel all background jobs, including flush, compaction, background
+  // purging, stats dumping threads, etc. If `wait` = true, wait for the
+  // running jobs to abort or finish before returning. Otherwise, only
+  // sends the signals.
+  void CancelAllBackgroundWork(bool wait);
+
+  // Find Super version and reference it. Based on options, it might return
+  // the thread local cached one.
+  // Call ReturnAndCleanupSuperVersion() when it is no longer needed.
+  SuperVersion* GetAndRefSuperVersion(ColumnFamilyData* cfd);
+
+  // Similar to the previous function but looks up based on a column family id.
+  // nullptr will be returned if this column family no longer exists.
+  // REQUIRED: this function should only be called on the write thread or if the
+  // mutex is held.
+  SuperVersion* GetAndRefSuperVersion(uint32_t column_family_id);
+
+  // Un-reference the super version and clean it up if it is the last reference.
+  void CleanupSuperVersion(SuperVersion* sv);
+
+  // Un-reference the super version and return it to thread local cache if
+  // needed. If it is the last reference of the super version. Clean it up
+  // after un-referencing it.
+  void ReturnAndCleanupSuperVersion(ColumnFamilyData* cfd, SuperVersion* sv);
+
+  // Similar to the previous function but looks up based on a column family id.
+  // nullptr will be returned if this column family no longer exists.
+  // REQUIRED: this function should only be called on the write thread.
+  void ReturnAndCleanupSuperVersion(uint32_t colun_family_id, SuperVersion* sv);
+
+  // REQUIRED: this function should only be called on the write thread or if the
+  // mutex is held.  Return value only valid until next call to this function or
+  // mutex is released.
+  ColumnFamilyHandle* GetColumnFamilyHandle(uint32_t column_family_id);
+
+  // Same as above, should called without mutex held and not on write thread.
+  std::unique_ptr<ColumnFamilyHandle> GetColumnFamilyHandleUnlocked(
+      uint32_t column_family_id);
+
+  // Returns the number of currently running flushes.
+  // REQUIREMENT: mutex_ must be held when calling this function.
+  int num_running_flushes() {
+    mutex_.AssertHeld();
+    return num_running_flushes_;
+  }
+
+  // Returns the number of currently running compactions.
+  // REQUIREMENT: mutex_ must be held when calling this function.
+  int num_running_compactions() {
+    mutex_.AssertHeld();
+    return num_running_compactions_;
+  }
+
+  const WriteController& write_controller() { return write_controller_; }
+
+  // hollow transactions shell used for recovery.
+  // these will then be passed to TransactionDB so that
+  // locks can be reacquired before writing can resume.
+  struct RecoveredTransaction {
+    std::string name_;
+    bool unprepared_;
+
+    struct BatchInfo {
+      uint64_t log_number_;
+      // TODO(lth): For unprepared, the memory usage here can be big for
+      // unprepared transactions. This is only useful for rollbacks, and we
+      // can in theory just keep keyset for that.
+      WriteBatch* batch_;
+      // Number of sub-batches. A new sub-batch is created if txn attempts to
+      // insert a duplicate key,seq to memtable. This is currently used in
+      // WritePreparedTxn/WriteUnpreparedTxn.
+      size_t batch_cnt_;
+    };
+
+    // This maps the seq of the first key in the batch to BatchInfo, which
+    // contains WriteBatch and other information relevant to the batch.
+    //
+    // For WriteUnprepared, batches_ can have size greater than 1, but for
+    // other write policies, it must be of size 1.
+    std::map<SequenceNumber, BatchInfo> batches_;
+
+    explicit RecoveredTransaction(const uint64_t log, const std::string& name,
+                                  WriteBatch* batch, SequenceNumber seq,
+                                  size_t batch_cnt, bool unprepared)
+        : name_(name), unprepared_(unprepared) {
+      batches_[seq] = {log, batch, batch_cnt};
+    }
+
+    ~RecoveredTransaction() {
+      for (auto& it : batches_) {
+        delete it.second.batch_;
+      }
+    }
+
+    void AddBatch(SequenceNumber seq, uint64_t log_number, WriteBatch* batch,
+                  size_t batch_cnt, bool unprepared) {
+      assert(batches_.count(seq) == 0);
+      batches_[seq] = {log_number, batch, batch_cnt};
+      // Prior state must be unprepared, since the prepare batch must be the
+      // last batch.
+      assert(unprepared_);
+      unprepared_ = unprepared;
+    }
+  };
+
+  bool allow_2pc() const { return immutable_db_options_.allow_2pc; }
+
+  std::unordered_map<std::string, RecoveredTransaction*>
+  recovered_transactions() {
+    return recovered_transactions_;
+  }
+
+  RecoveredTransaction* GetRecoveredTransaction(const std::string& name) {
+    auto it = recovered_transactions_.find(name);
+    if (it == recovered_transactions_.end()) {
+      return nullptr;
+    } else {
+      return it->second;
+    }
+  }
+
+  void InsertRecoveredTransaction(const uint64_t log, const std::string& name,
+                                  WriteBatch* batch, SequenceNumber seq,
+                                  size_t batch_cnt, bool unprepared_batch) {
+    // For WriteUnpreparedTxn, InsertRecoveredTransaction is called multiple
+    // times for every unprepared batch encountered during recovery.
+    //
+    // If the transaction is prepared, then the last call to
+    // InsertRecoveredTransaction will have unprepared_batch = false.
+    auto rtxn = recovered_transactions_.find(name);
+    if (rtxn == recovered_transactions_.end()) {
+      recovered_transactions_[name] = new RecoveredTransaction(
+          log, name, batch, seq, batch_cnt, unprepared_batch);
+    } else {
+      rtxn->second->AddBatch(seq, log, batch, batch_cnt, unprepared_batch);
+    }
+    logs_with_prep_tracker_.MarkLogAsContainingPrepSection(log);
+  }
+
+  void DeleteRecoveredTransaction(const std::string& name) {
+    auto it = recovered_transactions_.find(name);
+    assert(it != recovered_transactions_.end());
+    auto* trx = it->second;
+    recovered_transactions_.erase(it);
+    for (const auto& info : trx->batches_) {
+      logs_with_prep_tracker_.MarkLogAsHavingPrepSectionFlushed(
+          info.second.log_number_);
+    }
+    delete trx;
+  }
+
+  void DeleteAllRecoveredTransactions() {
+    for (auto it = recovered_transactions_.begin();
+         it != recovered_transactions_.end(); ++it) {
+      delete it->second;
+    }
+    recovered_transactions_.clear();
+  }
+
+  void AddToLogsToFreeQueue(log::Writer* log_writer) {
+    mutex_.AssertHeld();
+    logs_to_free_queue_.push_back(log_writer);
+  }
+
+  void AddSuperVersionsToFreeQueue(SuperVersion* sv) {
+    superversions_to_free_queue_.push_back(sv);
+  }
+
+  void SetSnapshotChecker(SnapshotChecker* snapshot_checker);
+
+  // Fill JobContext with snapshot information needed by flush and compaction.
+  void GetSnapshotContext(JobContext* job_context,
+                          std::vector<SequenceNumber>* snapshot_seqs,
+                          SequenceNumber* earliest_write_conflict_snapshot,
+                          SnapshotChecker** snapshot_checker);
+
+  // Not thread-safe.
+  void SetRecoverableStatePreReleaseCallback(PreReleaseCallback* callback);
+
+  InstrumentedMutex* mutex() const { return &mutex_; }
+
+  // Initialize a brand new DB. The DB directory is expected to be empty before
+  // calling it. Push new manifest file name into `new_filenames`.
+  Status NewDB(std::vector<std::string>* new_filenames);
+
+  // This is to be used only by internal rocksdb classes.
+  static Status Open(const DBOptions& db_options, const std::string& name,
+                     const std::vector<ColumnFamilyDescriptor>& column_families,
+                     std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
+                     const bool seq_per_batch, const bool batch_per_txn);
+
+  static IOStatus CreateAndNewDirectory(
+      FileSystem* fs, const std::string& dirname,
+      std::unique_ptr<FSDirectory>* directory);
+
+  // find stats map from stats_history_ with smallest timestamp in
+  // the range of [start_time, end_time)
+  bool FindStatsByTime(uint64_t start_time, uint64_t end_time,
+                       uint64_t* new_time,
+                       std::map<std::string, uint64_t>* stats_map);
+
+  // Print information of all tombstones of all iterators to the std::string
+  // This is only used by ldb. The output might be capped. Tombstones
+  // printed out are not guaranteed to be in any order.
+  Status TablesRangeTombstoneSummary(ColumnFamilyHandle* column_family,
+                                     int max_entries_to_print,
+                                     std::string* out_str);
+
+  VersionSet* GetVersionSet() const { return versions_.get(); }
+
+  // Wait for all flush and compactions jobs to finish. Jobs to wait include the
+  // unscheduled (queued, but not scheduled yet). If the db is shutting down,
+  // Status::ShutdownInProgress will be returned. If PauseBackgroundWork() was
+  // called prior to this, this may potentially wait for unscheduled jobs
+  // indefinitely. abort_on_pause can be set to true to abort, and
+  // Status::Aborted will be returned immediately. This may also never return if
+  // there's sufficient ongoing writes that keeps flush and compaction going
+  // without stopping. The user would have to cease all the writes to DB to make
+  // this eventually return in a stable state.
+  Status WaitForCompact(bool abort_on_pause = false);
+
+#ifndef NDEBUG
+  // Compact any files in the named level that overlap [*begin, *end]
+  Status TEST_CompactRange(int level, const Slice* begin, const Slice* end,
+                           ColumnFamilyHandle* column_family = nullptr,
+                           bool disallow_trivial_move = false);
+
+  Status TEST_SwitchWAL();
+
+  bool TEST_UnableToReleaseOldestLog() { return unable_to_release_oldest_log_; }
+
+  bool TEST_IsLogGettingFlushed() {
+    return alive_log_files_.begin()->getting_flushed;
+  }
+
+  Status TEST_SwitchMemtable(ColumnFamilyData* cfd = nullptr);
+
+  // Force current memtable contents to be flushed.
+  Status TEST_FlushMemTable(bool wait = true, bool allow_write_stall = false,
+                            ColumnFamilyHandle* cfh = nullptr);
+
+  Status TEST_FlushMemTable(ColumnFamilyData* cfd,
+                            const FlushOptions& flush_opts);
+
+  // Flush (multiple) ColumnFamilyData without using ColumnFamilyHandle. This
+  // is because in certain cases, we can flush column families, wait for the
+  // flush to complete, but delete the column family handle before the wait
+  // finishes. For example in CompactRange.
+  Status TEST_AtomicFlushMemTables(
+      const autovector<ColumnFamilyData*>& provided_candidate_cfds,
+      const FlushOptions& flush_opts);
+
+  // Wait for background threads to complete scheduled work.
+  Status TEST_WaitForBackgroundWork();
+
+  // Wait for memtable compaction
+  Status TEST_WaitForFlushMemTable(ColumnFamilyHandle* column_family = nullptr);
+
+  // Wait for any compaction
+  Status TEST_WaitForCompact(bool abort_on_pause = false);
+
+  // Wait for any background purge
+  Status TEST_WaitForPurge();
+
+  // Get the background error status
+  Status TEST_GetBGError();
+
+  // Return the maximum overlapping data (in bytes) at next level for any
+  // file at a level >= 1.
+  uint64_t TEST_MaxNextLevelOverlappingBytes(
+      ColumnFamilyHandle* column_family = nullptr);
+
+  // Return the current manifest file no.
+  uint64_t TEST_Current_Manifest_FileNo();
+
+  // Returns the number that'll be assigned to the next file that's created.
+  uint64_t TEST_Current_Next_FileNo();
+
+  // get total level0 file size. Only for testing.
+  uint64_t TEST_GetLevel0TotalSize();
+
+  void TEST_GetFilesMetaData(
+      ColumnFamilyHandle* column_family,
+      std::vector<std::vector<FileMetaData>>* metadata,
+      std::vector<std::shared_ptr<BlobFileMetaData>>* blob_metadata = nullptr);
+
+  void TEST_LockMutex();
+
+  void TEST_UnlockMutex();
+
+  void TEST_SignalAllBgCv();
+
+  // REQUIRES: mutex locked
+  void* TEST_BeginWrite();
+
+  // REQUIRES: mutex locked
+  // pass the pointer that you got from TEST_BeginWrite()
+  void TEST_EndWrite(void* w);
+
+  uint64_t TEST_MaxTotalInMemoryState() const {
+    return max_total_in_memory_state_;
+  }
+
+  size_t TEST_LogsToFreeSize();
+
+  uint64_t TEST_LogfileNumber();
+
+  uint64_t TEST_total_log_size() const { return total_log_size_; }
+
+  // Returns column family name to ImmutableCFOptions map.
+  Status TEST_GetAllImmutableCFOptions(
+      std::unordered_map<std::string, const ImmutableCFOptions*>* iopts_map);
+
+  // Return the lastest MutableCFOptions of a column family
+  Status TEST_GetLatestMutableCFOptions(ColumnFamilyHandle* column_family,
+                                        MutableCFOptions* mutable_cf_options);
+
+  Cache* TEST_table_cache() { return table_cache_.get(); }
+
+  WriteController& TEST_write_controler() { return write_controller_; }
+
+  uint64_t TEST_FindMinLogContainingOutstandingPrep();
+  uint64_t TEST_FindMinPrepLogReferencedByMemTable();
+  size_t TEST_PreparedSectionCompletedSize();
+  size_t TEST_LogsWithPrepSize();
+
+  int TEST_BGCompactionsAllowed() const;
+  int TEST_BGFlushesAllowed() const;
+  size_t TEST_GetWalPreallocateBlockSize(uint64_t write_buffer_size) const;
+  void TEST_WaitForPeriodicTaskRun(std::function<void()> callback) const;
+  SeqnoToTimeMapping TEST_GetSeqnoToTimeMapping() const;
+  size_t TEST_EstimateInMemoryStatsHistorySize() const;
+
+  uint64_t TEST_GetCurrentLogNumber() const {
+    InstrumentedMutexLock l(mutex());
+    assert(!logs_.empty());
+    return logs_.back().number;
+  }
+
+  const std::unordered_set<uint64_t>& TEST_GetFilesGrabbedForPurge() const {
+    return files_grabbed_for_purge_;
+  }
+
+  const PeriodicTaskScheduler& TEST_GetPeriodicTaskScheduler() const;
+
+#endif  // NDEBUG
+
+  // persist stats to column family "_persistent_stats"
+  void PersistStats();
+
+  // dump rocksdb.stats to LOG
+  void DumpStats();
+
+  // flush LOG out of application buffer
+  void FlushInfoLog();
+
+  // record current sequence number to time mapping
+  void RecordSeqnoToTimeMapping();
+
+  // Interface to block and signal the DB in case of stalling writes by
+  // WriteBufferManager. Each DBImpl object contains ptr to WBMStallInterface.
+  // When DB needs to be blocked or signalled by WriteBufferManager,
+  // state_ is changed accordingly.
+  class WBMStallInterface : public StallInterface {
+   public:
+    enum State {
+      BLOCKED = 0,
+      RUNNING,
+    };
+
+    WBMStallInterface() : state_cv_(&state_mutex_) {
+      MutexLock lock(&state_mutex_);
+      state_ = State::RUNNING;
+    }
+
+    void SetState(State state) {
+      MutexLock lock(&state_mutex_);
+      state_ = state;
+    }
+
+    // Change the state_ to State::BLOCKED and wait until its state is
+    // changed by WriteBufferManager. When stall is cleared, Signal() is
+    // called to change the state and unblock the DB.
+    void Block() override {
+      MutexLock lock(&state_mutex_);
+      while (state_ == State::BLOCKED) {
+        TEST_SYNC_POINT("WBMStallInterface::BlockDB");
+        state_cv_.Wait();
+      }
+    }
+
+    // Called from WriteBufferManager. This function changes the state_
+    // to State::RUNNING indicating the stall is cleared and DB can proceed.
+    void Signal() override {
+      {
+        MutexLock lock(&state_mutex_);
+        state_ = State::RUNNING;
+      }
+      state_cv_.Signal();
+    }
+
+   private:
+    // Conditional variable and mutex to block and
+    // signal the DB during stalling process.
+    port::Mutex state_mutex_;
+    port::CondVar state_cv_;
+    // state represting whether DB is running or blocked because of stall by
+    // WriteBufferManager.
+    State state_;
+  };
+
+  static void TEST_ResetDbSessionIdGen();
+  static std::string GenerateDbSessionId(Env* env);
+
+  bool seq_per_batch() const { return seq_per_batch_; }
+
+ protected:
+  const std::string dbname_;
+  // TODO(peterd): unify with VersionSet::db_id_
+  std::string db_id_;
+  // db_session_id_ is an identifier that gets reset
+  // every time the DB is opened
+  std::string db_session_id_;
+  std::unique_ptr<VersionSet> versions_;
+  // Flag to check whether we allocated and own the info log file
+  bool own_info_log_;
+  Status init_logger_creation_s_;
+  const DBOptions initial_db_options_;
+  Env* const env_;
+  std::shared_ptr<IOTracer> io_tracer_;
+  const ImmutableDBOptions immutable_db_options_;
+  FileSystemPtr fs_;
+  MutableDBOptions mutable_db_options_;
+  Statistics* stats_;
+  std::unordered_map<std::string, RecoveredTransaction*>
+      recovered_transactions_;
+  std::unique_ptr<Tracer> tracer_;
+  InstrumentedMutex trace_mutex_;
+  BlockCacheTracer block_cache_tracer_;
+
+  // constant false canceled flag, used when the compaction is not manual
+  const std::atomic<bool> kManualCompactionCanceledFalse_{false};
+
+  // State below is protected by mutex_
+  // With two_write_queues enabled, some of the variables that accessed during
+  // WriteToWAL need different synchronization: log_empty_, alive_log_files_,
+  // logs_, logfile_number_. Refer to the definition of each variable below for
+  // more description.
+  //
+  // `mutex_` can be a hot lock in some workloads, so it deserves dedicated
+  // cachelines.
+  mutable CacheAlignedInstrumentedMutex mutex_;
+
+  ColumnFamilyHandleImpl* default_cf_handle_;
+  InternalStats* default_cf_internal_stats_;
+
+  // table_cache_ provides its own synchronization
+  std::shared_ptr<Cache> table_cache_;
+
+  ErrorHandler error_handler_;
+
+  // Unified interface for logging events
+  EventLogger event_logger_;
+
+  // only used for dynamically adjusting max_total_wal_size. it is a sum of
+  // [write_buffer_size * max_write_buffer_number] over all column families
+  std::atomic<uint64_t> max_total_in_memory_state_;
+
+  // The options to access storage files
+  const FileOptions file_options_;
+
+  // Additonal options for compaction and flush
+  FileOptions file_options_for_compaction_;
+
+  std::unique_ptr<ColumnFamilyMemTablesImpl> column_family_memtables_;
+
+  // Increase the sequence number after writing each batch, whether memtable is
+  // disabled for that or not. Otherwise the sequence number is increased after
+  // writing each key into memtable. This implies that when disable_memtable is
+  // set, the seq is not increased at all.
+  //
+  // Default: false
+  const bool seq_per_batch_;
+  // This determines during recovery whether we expect one writebatch per
+  // recovered transaction, or potentially multiple writebatches per
+  // transaction. For WriteUnprepared, this is set to false, since multiple
+  // batches can exist per transaction.
+  //
+  // Default: true
+  const bool batch_per_txn_;
+
+  // Each flush or compaction gets its own job id. this counter makes sure
+  // they're unique
+  std::atomic<int> next_job_id_;
+
+  std::atomic<bool> shutting_down_;
+
+  // RecoveryContext struct stores the context about version edits along
+  // with corresponding column_family_data and column_family_options.
+  class RecoveryContext {
+   public:
+    ~RecoveryContext() {
+      for (auto& edit_list : edit_lists_) {
+        for (auto* edit : edit_list) {
+          delete edit;
+        }
+      }
+    }
+
+    void UpdateVersionEdits(ColumnFamilyData* cfd, const VersionEdit& edit) {
+      assert(cfd != nullptr);
+      if (map_.find(cfd->GetID()) == map_.end()) {
+        uint32_t size = static_cast<uint32_t>(map_.size());
+        map_.emplace(cfd->GetID(), size);
+        cfds_.emplace_back(cfd);
+        mutable_cf_opts_.emplace_back(cfd->GetLatestMutableCFOptions());
+        edit_lists_.emplace_back(autovector<VersionEdit*>());
+      }
+      uint32_t i = map_[cfd->GetID()];
+      edit_lists_[i].emplace_back(new VersionEdit(edit));
+    }
+
+    std::unordered_map<uint32_t, uint32_t> map_;  // cf_id to index;
+    autovector<ColumnFamilyData*> cfds_;
+    autovector<const MutableCFOptions*> mutable_cf_opts_;
+    autovector<autovector<VersionEdit*>> edit_lists_;
+    // files_to_delete_ contains sst files
+    std::unordered_set<std::string> files_to_delete_;
+  };
+
+  // Except in DB::Open(), WriteOptionsFile can only be called when:
+  // Persist options to options file.
+  // If need_mutex_lock = false, the method will lock DB mutex.
+  // If need_enter_write_thread = false, the method will enter write thread.
+  Status WriteOptionsFile(bool need_mutex_lock, bool need_enter_write_thread);
+
+  Status CompactRangeInternal(const CompactRangeOptions& options,
+                              ColumnFamilyHandle* column_family,
+                              const Slice* begin, const Slice* end,
+                              const std::string& trim_ts);
+
+  // The following two functions can only be called when:
+  // 1. WriteThread::Writer::EnterUnbatched() is used.
+  // 2. db_mutex is NOT held
+  Status RenameTempFileToOptionsFile(const std::string& file_name);
+  Status DeleteObsoleteOptionsFiles();
+
+  void NotifyOnFlushBegin(ColumnFamilyData* cfd, FileMetaData* file_meta,
+                          const MutableCFOptions& mutable_cf_options,
+                          int job_id, FlushReason flush_reason);
+
+  void NotifyOnFlushCompleted(
+      ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
+      std::list<std::unique_ptr<FlushJobInfo>>* flush_jobs_info);
+
+  void NotifyOnCompactionBegin(ColumnFamilyData* cfd, Compaction* c,
+                               const Status& st,
+                               const CompactionJobStats& job_stats, int job_id);
+
+  void NotifyOnCompactionCompleted(ColumnFamilyData* cfd, Compaction* c,
+                                   const Status& st,
+                                   const CompactionJobStats& job_stats,
+                                   int job_id);
+  void NotifyOnMemTableSealed(ColumnFamilyData* cfd,
+                              const MemTableInfo& mem_table_info);
+
+  void NotifyOnExternalFileIngested(
+      ColumnFamilyData* cfd, const ExternalSstFileIngestionJob& ingestion_job);
+
+  virtual Status FlushForGetLiveFiles();
+
+  void NewThreadStatusCfInfo(ColumnFamilyData* cfd) const;
+
+  void EraseThreadStatusCfInfo(ColumnFamilyData* cfd) const;
+
+  void EraseThreadStatusDbInfo() const;
+
+  // If disable_memtable is set the application logic must guarantee that the
+  // batch will still be skipped from memtable during the recovery. An excption
+  // to this is seq_per_batch_ mode, in which since each batch already takes one
+  // seq, it is ok for the batch to write to memtable during recovery as long as
+  // it only takes one sequence number: i.e., no duplicate keys.
+  // In WriteCommitted it is guarnateed since disable_memtable is used for
+  // prepare batch which will be written to memtable later during the commit,
+  // and in WritePrepared it is guaranteed since it will be used only for WAL
+  // markers which will never be written to memtable. If the commit marker is
+  // accompanied with CommitTimeWriteBatch that is not written to memtable as
+  // long as it has no duplicate keys, it does not violate the one-seq-per-batch
+  // policy.
+  // batch_cnt is expected to be non-zero in seq_per_batch mode and
+  // indicates the number of sub-patches. A sub-patch is a subset of the write
+  // batch that does not have duplicate keys.
+  Status WriteImpl(const WriteOptions& options, WriteBatch* updates,
+                   WriteCallback* callback = nullptr,
+                   uint64_t* log_used = nullptr, uint64_t log_ref = 0,
+                   bool disable_memtable = false, uint64_t* seq_used = nullptr,
+                   size_t batch_cnt = 0,
+                   PreReleaseCallback* pre_release_callback = nullptr,
+                   PostMemTableCallback* post_memtable_callback = nullptr);
+
+  Status PipelinedWriteImpl(const WriteOptions& options, WriteBatch* updates,
+                            WriteCallback* callback = nullptr,
+                            uint64_t* log_used = nullptr, uint64_t log_ref = 0,
+                            bool disable_memtable = false,
+                            uint64_t* seq_used = nullptr);
+
+  // Write only to memtables without joining any write queue
+  Status UnorderedWriteMemtable(const WriteOptions& write_options,
+                                WriteBatch* my_batch, WriteCallback* callback,
+                                uint64_t log_ref, SequenceNumber seq,
+                                const size_t sub_batch_cnt);
+
+  // Whether the batch requires to be assigned with an order
+  enum AssignOrder : bool { kDontAssignOrder, kDoAssignOrder };
+  // Whether it requires publishing last sequence or not
+  enum PublishLastSeq : bool { kDontPublishLastSeq, kDoPublishLastSeq };
+
+  // Join the write_thread to write the batch only to the WAL. It is the
+  // responsibility of the caller to also write the write batch to the memtable
+  // if it required.
+  //
+  // sub_batch_cnt is expected to be non-zero when assign_order = kDoAssignOrder
+  // indicating the number of sub-batches in my_batch. A sub-patch is a subset
+  // of the write batch that does not have duplicate keys. When seq_per_batch is
+  // not set, each key is a separate sub_batch. Otherwise each duplicate key
+  // marks start of a new sub-batch.
+  Status WriteImplWALOnly(
+      WriteThread* write_thread, const WriteOptions& options,
+      WriteBatch* updates, WriteCallback* callback, uint64_t* log_used,
+      const uint64_t log_ref, uint64_t* seq_used, const size_t sub_batch_cnt,
+      PreReleaseCallback* pre_release_callback, const AssignOrder assign_order,
+      const PublishLastSeq publish_last_seq, const bool disable_memtable);
+
+  // write cached_recoverable_state_ to memtable if it is not empty
+  // The writer must be the leader in write_thread_ and holding mutex_
+  Status WriteRecoverableState();
+
+  // Actual implementation of Close()
+  Status CloseImpl();
+
+  // Recover the descriptor from persistent storage.  May do a significant
+  // amount of work to recover recently logged updates.  Any changes to
+  // be made to the descriptor are added to *edit.
+  // recovered_seq is set to less than kMaxSequenceNumber if the log's tail is
+  // skipped.
+  // recovery_ctx stores the context about version edits and all those
+  // edits are persisted to new Manifest after successfully syncing the new WAL.
+  virtual Status Recover(
+      const std::vector<ColumnFamilyDescriptor>& column_families,
+      bool read_only = false, bool error_if_wal_file_exists = false,
+      bool error_if_data_exists_in_wals = false,
+      uint64_t* recovered_seq = nullptr,
+      RecoveryContext* recovery_ctx = nullptr);
+
+  virtual bool OwnTablesAndLogs() const { return true; }
+
+  // Setup DB identity file, and write DB ID to manifest if necessary.
+  Status SetupDBId(bool read_only, RecoveryContext* recovery_ctx);
+  // Assign db_id_ and write DB ID to manifest if necessary.
+  void SetDBId(std::string&& id, bool read_only, RecoveryContext* recovery_ctx);
+
+  // REQUIRES: db mutex held when calling this function, but the db mutex can
+  // be released and re-acquired. Db mutex will be held when the function
+  // returns.
+  // After recovery, there may be SST files in db/cf paths that are
+  // not referenced in the MANIFEST (e.g.
+  // 1. It's best effort recovery;
+  // 2. The VersionEdits referencing the SST files are appended to
+  // RecoveryContext, DB crashes when syncing the MANIFEST, the VersionEdits are
+  // still not synced to MANIFEST during recovery.)
+  // It stores the SST files to be deleted in RecoveryContext. In the
+  // meantime, we find out the largest file number present in the paths, and
+  // bump up the version set's next_file_number_ to be 1 + largest_file_number.
+  // recovery_ctx stores the context about version edits and files to be
+  // deleted. All those edits are persisted to new Manifest after successfully
+  // syncing the new WAL.
+  Status DeleteUnreferencedSstFiles(RecoveryContext* recovery_ctx);
+
+  // SetDbSessionId() should be called in the constuctor DBImpl()
+  // to ensure that db_session_id_ gets updated every time the DB is opened
+  void SetDbSessionId();
+
+  Status FailIfCfHasTs(const ColumnFamilyHandle* column_family) const;
+  Status FailIfTsMismatchCf(ColumnFamilyHandle* column_family, const Slice& ts,
+                            bool ts_for_read) const;
+
+  // recovery_ctx stores the context about version edits and
+  // LogAndApplyForRecovery persist all those edits to new Manifest after
+  // successfully syncing new WAL.
+  // LogAndApplyForRecovery should be called only once during recovery and it
+  // should be called when RocksDB writes to a first new MANIFEST since this
+  // recovery.
+  Status LogAndApplyForRecovery(const RecoveryContext& recovery_ctx);
+
+  void InvokeWalFilterIfNeededOnColumnFamilyToWalNumberMap();
+
+  // Return true to proceed with current WAL record whose content is stored in
+  // `batch`. Return false to skip current WAL record.
+  bool InvokeWalFilterIfNeededOnWalRecord(uint64_t wal_number,
+                                          const std::string& wal_fname,
+                                          log::Reader::Reporter& reporter,
+                                          Status& status, bool& stop_replay,
+                                          WriteBatch& batch);
+
+ private:
+  friend class DB;
+  friend class ErrorHandler;
+  friend class InternalStats;
+  friend class PessimisticTransaction;
+  friend class TransactionBaseImpl;
+  friend class WriteCommittedTxn;
+  friend class WritePreparedTxn;
+  friend class WritePreparedTxnDB;
+  friend class WriteBatchWithIndex;
+  friend class WriteUnpreparedTxnDB;
+  friend class WriteUnpreparedTxn;
+
+  friend class ForwardIterator;
+  friend struct SuperVersion;
+  friend class CompactedDBImpl;
+  friend class DBTest_ConcurrentFlushWAL_Test;
+  friend class DBTest_MixedSlowdownOptionsStop_Test;
+  friend class DBCompactionTest_CompactBottomLevelFilesWithDeletions_Test;
+  friend class DBCompactionTest_CompactionDuringShutdown_Test;
+  friend class StatsHistoryTest_PersistentStatsCreateColumnFamilies_Test;
+#ifndef NDEBUG
+  friend class DBTest2_ReadCallbackTest_Test;
+  friend class WriteCallbackPTest_WriteWithCallbackTest_Test;
+  friend class XFTransactionWriteHandler;
+  friend class DBBlobIndexTest;
+  friend class WriteUnpreparedTransactionTest_RecoveryTest_Test;
+#endif
+
+  struct CompactionState;
+  struct PrepickedCompaction;
+  struct PurgeFileInfo;
+
+  struct WriteContext {
+    SuperVersionContext superversion_context;
+    autovector<MemTable*> memtables_to_free_;
+
+    explicit WriteContext(bool create_superversion = false)
+        : superversion_context(create_superversion) {}
+
+    ~WriteContext() {
+      superversion_context.Clean();
+      for (auto& m : memtables_to_free_) {
+        delete m;
+      }
+    }
+  };
+
+  struct LogFileNumberSize {
+    explicit LogFileNumberSize(uint64_t _number) : number(_number) {}
+    LogFileNumberSize() {}
+    void AddSize(uint64_t new_size) { size += new_size; }
+    uint64_t number;
+    uint64_t size = 0;
+    bool getting_flushed = false;
+  };
+
+  struct LogWriterNumber {
+    // pass ownership of _writer
+    LogWriterNumber(uint64_t _number, log::Writer* _writer)
+        : number(_number), writer(_writer) {}
+
+    log::Writer* ReleaseWriter() {
+      auto* w = writer;
+      writer = nullptr;
+      return w;
+    }
+    Status ClearWriter() {
+      Status s = writer->WriteBuffer();
+      delete writer;
+      writer = nullptr;
+      return s;
+    }
+
+    bool IsSyncing() { return getting_synced; }
+
+    uint64_t GetPreSyncSize() {
+      assert(getting_synced);
+      return pre_sync_size;
+    }
+
+    void PrepareForSync() {
+      assert(!getting_synced);
+      // Size is expected to be monotonically increasing.
+      assert(writer->file()->GetFlushedSize() >= pre_sync_size);
+      getting_synced = true;
+      pre_sync_size = writer->file()->GetFlushedSize();
+    }
+
+    void FinishSync() {
+      assert(getting_synced);
+      getting_synced = false;
+    }
+
+    uint64_t number;
+    // Visual Studio doesn't support deque's member to be noncopyable because
+    // of a std::unique_ptr as a member.
+    log::Writer* writer;  // own
+
+   private:
+    // true for some prefix of logs_
+    bool getting_synced = false;
+    // The size of the file before the sync happens. This amount is guaranteed
+    // to be persisted even if appends happen during sync so it can be used for
+    // tracking the synced size in MANIFEST.
+    uint64_t pre_sync_size = 0;
+  };
+
+  struct LogContext {
+    explicit LogContext(bool need_sync = false)
+        : need_log_sync(need_sync), need_log_dir_sync(need_sync) {}
+    bool need_log_sync = false;
+    bool need_log_dir_sync = false;
+    log::Writer* writer = nullptr;
+    LogFileNumberSize* log_file_number_size = nullptr;
+  };
+
+  // PurgeFileInfo is a structure to hold information of files to be deleted in
+  // purge_files_
+  struct PurgeFileInfo {
+    std::string fname;
+    std::string dir_to_sync;
+    FileType type;
+    uint64_t number;
+    int job_id;
+    PurgeFileInfo(std::string fn, std::string d, FileType t, uint64_t num,
+                  int jid)
+        : fname(fn), dir_to_sync(d), type(t), number(num), job_id(jid) {}
+  };
+
+  // Argument required by background flush thread.
+  struct BGFlushArg {
+    BGFlushArg()
+        : cfd_(nullptr),
+          max_memtable_id_(0),
+          superversion_context_(nullptr),
+          flush_reason_(FlushReason::kOthers) {}
+    BGFlushArg(ColumnFamilyData* cfd, uint64_t max_memtable_id,
+               SuperVersionContext* superversion_context,
+               FlushReason flush_reason)
+        : cfd_(cfd),
+          max_memtable_id_(max_memtable_id),
+          superversion_context_(superversion_context),
+          flush_reason_(flush_reason) {}
+
+    // Column family to flush.
+    ColumnFamilyData* cfd_;
+    // Maximum ID of memtable to flush. In this column family, memtables with
+    // IDs smaller than this value must be flushed before this flush completes.
+    uint64_t max_memtable_id_;
+    // Pointer to a SuperVersionContext object. After flush completes, RocksDB
+    // installs a new superversion for the column family. This operation
+    // requires a SuperVersionContext object (currently embedded in JobContext).
+    SuperVersionContext* superversion_context_;
+    FlushReason flush_reason_;
+  };
+
+  // Argument passed to flush thread.
+  struct FlushThreadArg {
+    DBImpl* db_;
+
+    Env::Priority thread_pri_;
+  };
+
+  // Information for a manual compaction
+  struct ManualCompactionState {
+    ManualCompactionState(ColumnFamilyData* _cfd, int _input_level,
+                          int _output_level, uint32_t _output_path_id,
+                          bool _exclusive, bool _disallow_trivial_move,
+                          std::atomic<bool>* _canceled)
+        : cfd(_cfd),
+          input_level(_input_level),
+          output_level(_output_level),
+          output_path_id(_output_path_id),
+          exclusive(_exclusive),
+          disallow_trivial_move(_disallow_trivial_move),
+          canceled(_canceled ? *_canceled : canceled_internal_storage) {}
+    // When _canceled is not provided by ther user, we assign the reference of
+    // canceled_internal_storage to it to consolidate canceled and
+    // manual_compaction_paused since DisableManualCompaction() might be
+    // called
+
+    ColumnFamilyData* cfd;
+    int input_level;
+    int output_level;
+    uint32_t output_path_id;
+    Status status;
+    bool done = false;
+    bool in_progress = false;    // compaction request being processed?
+    bool incomplete = false;     // only part of requested range compacted
+    bool exclusive;              // current behavior of only one manual
+    bool disallow_trivial_move;  // Force actual compaction to run
+    const InternalKey* begin = nullptr;  // nullptr means beginning of key range
+    const InternalKey* end = nullptr;    // nullptr means end of key range
+    InternalKey* manual_end = nullptr;   // how far we are compacting
+    InternalKey tmp_storage;      // Used to keep track of compaction progress
+    InternalKey tmp_storage1;     // Used to keep track of compaction progress
+
+    // When the user provides a canceled pointer in CompactRangeOptions, the
+    // above varaibe is the reference of the user-provided
+    // `canceled`, otherwise, it is the reference of canceled_internal_storage
+    std::atomic<bool> canceled_internal_storage = false;
+    std::atomic<bool>& canceled;  // Compaction canceled pointer reference
+  };
+  struct PrepickedCompaction {
+    // background compaction takes ownership of `compaction`.
+    Compaction* compaction;
+    // caller retains ownership of `manual_compaction_state` as it is reused
+    // across background compactions.
+    ManualCompactionState* manual_compaction_state;  // nullptr if non-manual
+    // task limiter token is requested during compaction picking.
+    std::unique_ptr<TaskLimiterToken> task_token;
+  };
+
+  struct CompactionArg {
+    // caller retains ownership of `db`.
+    DBImpl* db;
+    // background compaction takes ownership of `prepicked_compaction`.
+    PrepickedCompaction* prepicked_compaction;
+    Env::Priority compaction_pri_;
+  };
+
+  // Initialize the built-in column family for persistent stats. Depending on
+  // whether on-disk persistent stats have been enabled before, it may either
+  // create a new column family and column family handle or just a column family
+  // handle.
+  // Required: DB mutex held
+  Status InitPersistStatsColumnFamily();
+
+  // Persistent Stats column family has two format version key which are used
+  // for compatibility check. Write format version if it's created for the
+  // first time, read format version and check compatibility if recovering
+  // from disk. This function requires DB mutex held at entrance but may
+  // release and re-acquire DB mutex in the process.
+  // Required: DB mutex held
+  Status PersistentStatsProcessFormatVersion();
+
+  Status ResumeImpl(DBRecoverContext context);
+
+  void MaybeIgnoreError(Status* s) const;
+
+  const Status CreateArchivalDirectory();
+
+  Status CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options,
+                                const std::string& cf_name,
+                                ColumnFamilyHandle** handle);
+
+  Status DropColumnFamilyImpl(ColumnFamilyHandle* column_family);
+
+  // Delete any unneeded files and stale in-memory entries.
+  void DeleteObsoleteFiles();
+  // Delete obsolete files and log status and information of file deletion
+  void DeleteObsoleteFileImpl(int job_id, const std::string& fname,
+                              const std::string& path_to_sync, FileType type,
+                              uint64_t number);
+
+  // Background process needs to call
+  //     auto x = CaptureCurrentFileNumberInPendingOutputs()
+  //     auto file_num = versions_->NewFileNumber();
+  //     <do something>
+  //     ReleaseFileNumberFromPendingOutputs(x)
+  // This will protect any file with number `file_num` or greater from being
+  // deleted while <do something> is running.
+  // -----------
+  // This function will capture current file number and append it to
+  // pending_outputs_. This will prevent any background process to delete any
+  // file created after this point.
+  std::list<uint64_t>::iterator CaptureCurrentFileNumberInPendingOutputs();
+  // This function should be called with the result of
+  // CaptureCurrentFileNumberInPendingOutputs(). It then marks that any file
+  // created between the calls CaptureCurrentFileNumberInPendingOutputs() and
+  // ReleaseFileNumberFromPendingOutputs() can now be deleted (if it's not live
+  // and blocked by any other pending_outputs_ calls)
+  void ReleaseFileNumberFromPendingOutputs(
+      std::unique_ptr<std::list<uint64_t>::iterator>& v);
+
+  IOStatus SyncClosedLogs(JobContext* job_context, VersionEdit* synced_wals);
+
+  // Flush the in-memory write buffer to storage.  Switches to a new
+  // log-file/memtable and writes a new descriptor iff successful. Then
+  // installs a new super version for the column family.
+  Status FlushMemTableToOutputFile(
+      ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
+      bool* madeProgress, JobContext* job_context, FlushReason flush_reason,
+      SuperVersionContext* superversion_context,
+      std::vector<SequenceNumber>& snapshot_seqs,
+      SequenceNumber earliest_write_conflict_snapshot,
+      SnapshotChecker* snapshot_checker, LogBuffer* log_buffer,
+      Env::Priority thread_pri);
+
+  // Flush the memtables of (multiple) column families to multiple files on
+  // persistent storage.
+  Status FlushMemTablesToOutputFiles(
+      const autovector<BGFlushArg>& bg_flush_args, bool* made_progress,
+      JobContext* job_context, LogBuffer* log_buffer, Env::Priority thread_pri);
+
+  Status AtomicFlushMemTablesToOutputFiles(
+      const autovector<BGFlushArg>& bg_flush_args, bool* made_progress,
+      JobContext* job_context, LogBuffer* log_buffer, Env::Priority thread_pri);
+
+  // REQUIRES: log_numbers are sorted in ascending order
+  // corrupted_log_found is set to true if we recover from a corrupted log file.
+  Status RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
+                         SequenceNumber* next_sequence, bool read_only,
+                         bool* corrupted_log_found,
+                         RecoveryContext* recovery_ctx);
+
+  // The following two methods are used to flush a memtable to
+  // storage. The first one is used at database RecoveryTime (when the
+  // database is opened) and is heavyweight because it holds the mutex
+  // for the entire period. The second method WriteLevel0Table supports
+  // concurrent flush memtables to storage.
+  Status WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
+                                     MemTable* mem, VersionEdit* edit);
+
+  // Get the size of a log file and, if truncate is true, truncate the
+  // log file to its actual size, thereby freeing preallocated space.
+  // Return success even if truncate fails
+  Status GetLogSizeAndMaybeTruncate(uint64_t wal_number, bool truncate,
+                                    LogFileNumberSize* log);
+
+  // Restore alive_log_files_ and total_log_size_ after recovery.
+  // It needs to run only when there's no flush during recovery
+  // (e.g. avoid_flush_during_recovery=true). May also trigger flush
+  // in case total_log_size > max_total_wal_size.
+  Status RestoreAliveLogFiles(const std::vector<uint64_t>& log_numbers);
+
+  // num_bytes: for slowdown case, delay time is calculated based on
+  //            `num_bytes` going through.
+  Status DelayWrite(uint64_t num_bytes, WriteThread& write_thread,
+                    const WriteOptions& write_options);
+
+  // Begin stalling of writes when memory usage increases beyond a certain
+  // threshold.
+  void WriteBufferManagerStallWrites();
+
+  Status ThrottleLowPriWritesIfNeeded(const WriteOptions& write_options,
+                                      WriteBatch* my_batch);
+
+  // REQUIRES: mutex locked and in write thread.
+  Status ScheduleFlushes(WriteContext* context);
+
+  void MaybeFlushStatsCF(autovector<ColumnFamilyData*>* cfds);
+
+  Status TrimMemtableHistory(WriteContext* context);
+
+  Status SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context);
+
+  // Select and output column families qualified for atomic flush in
+  // `selected_cfds`. If `provided_candidate_cfds` is non-empty, it will be used
+  // as candidate CFs to select qualified ones from. Otherwise, all column
+  // families are used as candidate to select from.
+  //
+  // REQUIRES: mutex held
+  void SelectColumnFamiliesForAtomicFlush(
+      autovector<ColumnFamilyData*>* selected_cfds,
+      const autovector<ColumnFamilyData*>& provided_candidate_cfds = {});
+
+  // Force current memtable contents to be flushed.
+  Status FlushMemTable(ColumnFamilyData* cfd, const FlushOptions& options,
+                       FlushReason flush_reason,
+                       bool entered_write_thread = false);
+
+  // Atomic-flush memtables from quanlified CFs among `provided_candidate_cfds`
+  // (if non-empty) or amomg all column families and atomically record the
+  // result to the MANIFEST.
+  Status AtomicFlushMemTables(
+      const FlushOptions& options, FlushReason flush_reason,
+      const autovector<ColumnFamilyData*>& provided_candidate_cfds = {},
+      bool entered_write_thread = false);
+
+  // Wait until flushing this column family won't stall writes
+  Status WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd,
+                                           bool* flush_needed);
+
+  // Wait for memtable flushed.
+  // If flush_memtable_id is non-null, wait until the memtable with the ID
+  // gets flush. Otherwise, wait until the column family don't have any
+  // memtable pending flush.
+  // resuming_from_bg_err indicates whether the caller is attempting to resume
+  // from background error.
+  Status WaitForFlushMemTable(ColumnFamilyData* cfd,
+                              const uint64_t* flush_memtable_id = nullptr,
+                              bool resuming_from_bg_err = false) {
+    return WaitForFlushMemTables({cfd}, {flush_memtable_id},
+                                 resuming_from_bg_err);
+  }
+  // Wait for memtables to be flushed for multiple column families.
+  Status WaitForFlushMemTables(
+      const autovector<ColumnFamilyData*>& cfds,
+      const autovector<const uint64_t*>& flush_memtable_ids,
+      bool resuming_from_bg_err);
+
+  inline void WaitForPendingWrites() {
+    mutex_.AssertHeld();
+    TEST_SYNC_POINT("DBImpl::WaitForPendingWrites:BeforeBlock");
+    // In case of pipelined write is enabled, wait for all pending memtable
+    // writers.
+    if (immutable_db_options_.enable_pipelined_write) {
+      // Memtable writers may call DB::Get in case max_successive_merges > 0,
+      // which may lock mutex. Unlocking mutex here to avoid deadlock.
+      mutex_.Unlock();
+      write_thread_.WaitForMemTableWriters();
+      mutex_.Lock();
+    }
+
+    if (!immutable_db_options_.unordered_write) {
+      // Then the writes are finished before the next write group starts
+      return;
+    }
+
+    // Wait for the ones who already wrote to the WAL to finish their
+    // memtable write.
+    if (pending_memtable_writes_.load() != 0) {
+      std::unique_lock<std::mutex> guard(switch_mutex_);
+      switch_cv_.wait(guard,
+                      [&] { return pending_memtable_writes_.load() == 0; });
+    }
+  }
+
+  // TaskType is used to identify tasks in thread-pool, currently only
+  // differentiate manual compaction, which could be unscheduled from the
+  // thread-pool.
+  enum class TaskType : uint8_t {
+    kDefault = 0,
+    kManualCompaction = 1,
+    kCount = 2,
+  };
+
+  // Task tag is used to identity tasks in thread-pool, which is
+  // dbImpl obj address + type
+  inline void* GetTaskTag(TaskType type) {
+    return GetTaskTag(static_cast<uint8_t>(type));
+  }
+
+  inline void* GetTaskTag(uint8_t type) {
+    return static_cast<uint8_t*>(static_cast<void*>(this)) + type;
+  }
+
+  // REQUIRES: mutex locked and in write thread.
+  void AssignAtomicFlushSeq(const autovector<ColumnFamilyData*>& cfds);
+
+  // REQUIRES: mutex locked and in write thread.
+  Status SwitchWAL(WriteContext* write_context);
+
+  // REQUIRES: mutex locked and in write thread.
+  Status HandleWriteBufferManagerFlush(WriteContext* write_context);
+
+  // REQUIRES: mutex locked
+  Status PreprocessWrite(const WriteOptions& write_options,
+                         LogContext* log_context, WriteContext* write_context);
+
+  // Merge write batches in the write group into merged_batch.
+  // Returns OK if merge is successful.
+  // Returns Corruption if corruption in write batch is detected.
+  Status MergeBatch(const WriteThread::WriteGroup& write_group,
+                    WriteBatch* tmp_batch, WriteBatch** merged_batch,
+                    size_t* write_with_wal, WriteBatch** to_be_cached_state);
+
+  // rate_limiter_priority is used to charge `DBOptions::rate_limiter`
+  // for automatic WAL flush (`Options::manual_wal_flush` == false)
+  // associated with this WriteToWAL
+  IOStatus WriteToWAL(const WriteBatch& merged_batch, log::Writer* log_writer,
+                      uint64_t* log_used, uint64_t* log_size,
+                      Env::IOPriority rate_limiter_priority,
+                      LogFileNumberSize& log_file_number_size);
+
+  IOStatus WriteToWAL(const WriteThread::WriteGroup& write_group,
+                      log::Writer* log_writer, uint64_t* log_used,
+                      bool need_log_sync, bool need_log_dir_sync,
+                      SequenceNumber sequence,
+                      LogFileNumberSize& log_file_number_size);
+
+  IOStatus ConcurrentWriteToWAL(const WriteThread::WriteGroup& write_group,
+                                uint64_t* log_used,
+                                SequenceNumber* last_sequence, size_t seq_inc);
+
+  // Used by WriteImpl to update bg_error_ if paranoid check is enabled.
+  // Caller must hold mutex_.
+  void WriteStatusCheckOnLocked(const Status& status);
+
+  // Used by WriteImpl to update bg_error_ if paranoid check is enabled.
+  void WriteStatusCheck(const Status& status);
+
+  // Used by WriteImpl to update bg_error_ when IO error happens, e.g., write
+  // WAL, sync WAL fails, if paranoid check is enabled.
+  void IOStatusCheck(const IOStatus& status);
+
+  // Used by WriteImpl to update bg_error_ in case of memtable insert error.
+  void MemTableInsertStatusCheck(const Status& memtable_insert_status);
+
+  Status CompactFilesImpl(const CompactionOptions& compact_options,
+                          ColumnFamilyData* cfd, Version* version,
+                          const std::vector<std::string>& input_file_names,
+                          std::vector<std::string>* const output_file_names,
+                          const int output_level, int output_path_id,
+                          JobContext* job_context, LogBuffer* log_buffer,
+                          CompactionJobInfo* compaction_job_info);
+
+  ColumnFamilyData* GetColumnFamilyDataByName(const std::string& cf_name);
+
+  void MaybeScheduleFlushOrCompaction();
+
+  struct FlushRequest {
+    FlushReason flush_reason;
+    // A map from column family to flush to largest memtable id to persist for
+    // each column family. Once all the memtables whose IDs are smaller than or
+    // equal to this per-column-family specified value, this flush request is
+    // considered to have completed its work of flushing this column family.
+    // After completing the work for all column families in this request, this
+    // flush is considered complete.
+    std::unordered_map<ColumnFamilyData*, uint64_t>
+        cfd_to_max_mem_id_to_persist;
+  };
+
+  void GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds,
+                            FlushReason flush_reason, FlushRequest* req);
+
+  void SchedulePendingFlush(const FlushRequest& req);
+
+  void SchedulePendingCompaction(ColumnFamilyData* cfd);
+  void SchedulePendingPurge(std::string fname, std::string dir_to_sync,
+                            FileType type, uint64_t number, int job_id);
+  static void BGWorkCompaction(void* arg);
+  // Runs a pre-chosen universal compaction involving bottom level in a
+  // separate, bottom-pri thread pool.
+  static void BGWorkBottomCompaction(void* arg);
+  static void BGWorkFlush(void* arg);
+  static void BGWorkPurge(void* arg);
+  static void UnscheduleCompactionCallback(void* arg);
+  static void UnscheduleFlushCallback(void* arg);
+  void BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
+                                Env::Priority thread_pri);
+  void BackgroundCallFlush(Env::Priority thread_pri);
+  void BackgroundCallPurge();
+  Status BackgroundCompaction(bool* madeProgress, JobContext* job_context,
+                              LogBuffer* log_buffer,
+                              PrepickedCompaction* prepicked_compaction,
+                              Env::Priority thread_pri);
+  Status BackgroundFlush(bool* madeProgress, JobContext* job_context,
+                         LogBuffer* log_buffer, FlushReason* reason,
+                         Env::Priority thread_pri);
+
+  bool EnoughRoomForCompaction(ColumnFamilyData* cfd,
+                               const std::vector<CompactionInputFiles>& inputs,
+                               bool* sfm_bookkeeping, LogBuffer* log_buffer);
+
+  // Request compaction tasks token from compaction thread limiter.
+  // It always succeeds if force = true or limiter is disable.
+  bool RequestCompactionToken(ColumnFamilyData* cfd, bool force,
+                              std::unique_ptr<TaskLimiterToken>* token,
+                              LogBuffer* log_buffer);
+
+  // Schedule background tasks
+  Status StartPeriodicTaskScheduler();
+
+  Status RegisterRecordSeqnoTimeWorker();
+
+  void PrintStatistics();
+
+  size_t EstimateInMemoryStatsHistorySize() const;
+
+  // Return the minimum empty level that could hold the total data in the
+  // input level. Return the input level, if such level could not be found.
+  int FindMinimumEmptyLevelFitting(ColumnFamilyData* cfd,
+                                   const MutableCFOptions& mutable_cf_options,
+                                   int level);
+
+  // Move the files in the input level to the target level.
+  // If target_level < 0, automatically calculate the minimum level that could
+  // hold the data set.
+  Status ReFitLevel(ColumnFamilyData* cfd, int level, int target_level = -1);
+
+  // helper functions for adding and removing from flush & compaction queues
+  void AddToCompactionQueue(ColumnFamilyData* cfd);
+  ColumnFamilyData* PopFirstFromCompactionQueue();
+  FlushRequest PopFirstFromFlushQueue();
+
+  // Pick the first unthrottled compaction with task token from queue.
+  ColumnFamilyData* PickCompactionFromQueue(
+      std::unique_ptr<TaskLimiterToken>* token, LogBuffer* log_buffer);
+
+  // helper function to call after some of the logs_ were synced
+  void MarkLogsSynced(uint64_t up_to, bool synced_dir, VersionEdit* edit);
+  Status ApplyWALToManifest(const ReadOptions& read_options, VersionEdit* edit);
+  // WALs with log number up to up_to are not synced successfully.
+  void MarkLogsNotSynced(uint64_t up_to);
+
+  SnapshotImpl* GetSnapshotImpl(bool is_write_conflict_boundary,
+                                bool lock = true);
+
+  // If snapshot_seq != kMaxSequenceNumber, then this function can only be
+  // called from the write thread that publishes sequence numbers to readers.
+  // For 1) write-committed, or 2) write-prepared + one-write-queue, this will
+  // be the write thread performing memtable writes. For write-prepared with
+  // two write queues, this will be the write thread writing commit marker to
+  // the WAL.
+  // If snapshot_seq == kMaxSequenceNumber, this function is called by a caller
+  // ensuring no writes to the database.
+  std::pair<Status, std::shared_ptr<const SnapshotImpl>>
+  CreateTimestampedSnapshotImpl(SequenceNumber snapshot_seq, uint64_t ts,
+                                bool lock = true);
+
+  uint64_t GetMaxTotalWalSize() const;
+
+  FSDirectory* GetDataDir(ColumnFamilyData* cfd, size_t path_id) const;
+
+  Status MaybeReleaseTimestampedSnapshotsAndCheck();
+
+  Status CloseHelper();
+
+  void WaitForBackgroundWork();
+
+  // Background threads call this function, which is just a wrapper around
+  // the InstallSuperVersion() function. Background threads carry
+  // sv_context which can have new_superversion already
+  // allocated.
+  // All ColumnFamily state changes go through this function. Here we analyze
+  // the new state and we schedule background work if we detect that the new
+  // state needs flush or compaction.
+  void InstallSuperVersionAndScheduleWork(
+      ColumnFamilyData* cfd, SuperVersionContext* sv_context,
+      const MutableCFOptions& mutable_cf_options);
+
+  bool GetIntPropertyInternal(ColumnFamilyData* cfd,
+                              const DBPropertyInfo& property_info,
+                              bool is_locked, uint64_t* value);
+  bool GetPropertyHandleOptionsStatistics(std::string* value);
+
+  bool HasPendingManualCompaction();
+  bool HasExclusiveManualCompaction();
+  void AddManualCompaction(ManualCompactionState* m);
+  void RemoveManualCompaction(ManualCompactionState* m);
+  bool ShouldntRunManualCompaction(ManualCompactionState* m);
+  bool HaveManualCompaction(ColumnFamilyData* cfd);
+  bool MCOverlap(ManualCompactionState* m, ManualCompactionState* m1);
+  void BuildCompactionJobInfo(const ColumnFamilyData* cfd, Compaction* c,
+                              const Status& st,
+                              const CompactionJobStats& compaction_job_stats,
+                              const int job_id, const Version* current,
+                              CompactionJobInfo* compaction_job_info) const;
+  // Reserve the next 'num' file numbers for to-be-ingested external SST files,
+  // and return the current file_number in 'next_file_number'.
+  // Write a version edit to the MANIFEST.
+  Status ReserveFileNumbersBeforeIngestion(
+      ColumnFamilyData* cfd, uint64_t num,
+      std::unique_ptr<std::list<uint64_t>::iterator>& pending_output_elem,
+      uint64_t* next_file_number);
+
+  bool ShouldPurge(uint64_t file_number) const;
+  void MarkAsGrabbedForPurge(uint64_t file_number);
+
+  size_t GetWalPreallocateBlockSize(uint64_t write_buffer_size) const;
+  Env::WriteLifeTimeHint CalculateWALWriteHint() { return Env::WLTH_SHORT; }
+
+  IOStatus CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number,
+                     size_t preallocate_block_size, log::Writer** new_log);
+
+  // Validate self-consistency of DB options
+  static Status ValidateOptions(const DBOptions& db_options);
+  // Validate self-consistency of DB options and its consistency with cf options
+  static Status ValidateOptions(
+      const DBOptions& db_options,
+      const std::vector<ColumnFamilyDescriptor>& column_families);
+
+  // Utility function to do some debug validation and sort the given vector
+  // of MultiGet keys
+  void PrepareMultiGetKeys(
+      const size_t num_keys, bool sorted,
+      autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* key_ptrs);
+
+  void MultiGetCommon(const ReadOptions& options,
+                      ColumnFamilyHandle* column_family, const size_t num_keys,
+                      const Slice* keys, PinnableSlice* values,
+                      PinnableWideColumns* columns, std::string* timestamps,
+                      Status* statuses, bool sorted_input);
+
+  void MultiGetCommon(const ReadOptions& options, const size_t num_keys,
+                      ColumnFamilyHandle** column_families, const Slice* keys,
+                      PinnableSlice* values, PinnableWideColumns* columns,
+                      std::string* timestamps, Status* statuses,
+                      bool sorted_input);
+
+  // A structure to hold the information required to process MultiGet of keys
+  // belonging to one column family. For a multi column family MultiGet, there
+  // will be a container of these objects.
+  struct MultiGetColumnFamilyData {
+    ColumnFamilyHandle* cf;
+    ColumnFamilyData* cfd;
+
+    // For the batched MultiGet which relies on sorted keys, start specifies
+    // the index of first key belonging to this column family in the sorted
+    // list.
+    size_t start;
+
+    // For the batched MultiGet case, num_keys specifies the number of keys
+    // belonging to this column family in the sorted list
+    size_t num_keys;
+
+    // SuperVersion for the column family obtained in a manner that ensures a
+    // consistent view across all column families in the DB
+    SuperVersion* super_version;
+    MultiGetColumnFamilyData(ColumnFamilyHandle* column_family,
+                             SuperVersion* sv)
+        : cf(column_family),
+          cfd(static_cast<ColumnFamilyHandleImpl*>(cf)->cfd()),
+          start(0),
+          num_keys(0),
+          super_version(sv) {}
+
+    MultiGetColumnFamilyData(ColumnFamilyHandle* column_family, size_t first,
+                             size_t count, SuperVersion* sv)
+        : cf(column_family),
+          cfd(static_cast<ColumnFamilyHandleImpl*>(cf)->cfd()),
+          start(first),
+          num_keys(count),
+          super_version(sv) {}
+
+    MultiGetColumnFamilyData() = default;
+  };
+
+  // A common function to obtain a consistent snapshot, which can be implicit
+  // if the user doesn't specify a snapshot in read_options, across
+  // multiple column families for MultiGet. It will attempt to get an implicit
+  // snapshot without acquiring the db_mutes, but will give up after a few
+  // tries and acquire the mutex if a memtable flush happens. The template
+  // allows both the batched and non-batched MultiGet to call this with
+  // either an std::unordered_map or autovector of column families.
+  //
+  // If callback is non-null, the callback is refreshed with the snapshot
+  // sequence number
+  //
+  // A return value of true indicates that the SuperVersions were obtained
+  // from the ColumnFamilyData, whereas false indicates they are thread
+  // local
+  template <class T>
+  bool MultiCFSnapshot(
+      const ReadOptions& read_options, ReadCallback* callback,
+      std::function<MultiGetColumnFamilyData*(typename T::iterator&)>&
+          iter_deref_func,
+      T* cf_list, SequenceNumber* snapshot);
+
+  // The actual implementation of the batching MultiGet. The caller is expected
+  // to have acquired the SuperVersion and pass in a snapshot sequence number
+  // in order to construct the LookupKeys. The start_key and num_keys specify
+  // the range of keys in the sorted_keys vector for a single column family.
+  Status MultiGetImpl(
+      const ReadOptions& read_options, size_t start_key, size_t num_keys,
+      autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys,
+      SuperVersion* sv, SequenceNumber snap_seqnum, ReadCallback* callback);
+
+  Status DisableFileDeletionsWithLock();
+
+  Status IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd,
+                                      std::string ts_low);
+
+  bool ShouldReferenceSuperVersion(const MergeContext& merge_context);
+
+  // Lock over the persistent DB state.  Non-nullptr iff successfully acquired.
+  FileLock* db_lock_;
+
+  // In addition to mutex_, log_write_mutex_ protected writes to stats_history_
+  InstrumentedMutex stats_history_mutex_;
+  // In addition to mutex_, log_write_mutex_ protected writes to logs_ and
+  // logfile_number_. With two_write_queues it also protects alive_log_files_,
+  // and log_empty_. Refer to the definition of each variable below for more
+  // details.
+  // Note: to avoid deadlock, if needed to acquire both log_write_mutex_ and
+  // mutex_, the order should be first mutex_ and then log_write_mutex_.
+  InstrumentedMutex log_write_mutex_;
+
+  // If zero, manual compactions are allowed to proceed. If non-zero, manual
+  // compactions may still be running, but will quickly fail with
+  // `Status::Incomplete`. The value indicates how many threads have paused
+  // manual compactions. It is accessed in read mode outside the DB mutex in
+  // compaction code paths.
+  std::atomic<int> manual_compaction_paused_;
+
+  // This condition variable is signaled on these conditions:
+  // * whenever bg_compaction_scheduled_ goes down to 0
+  // * if AnyManualCompaction, whenever a compaction finishes, even if it hasn't
+  // made any progress
+  // * whenever a compaction made any progress
+  // * whenever bg_flush_scheduled_ or bg_purge_scheduled_ value decreases
+  // (i.e. whenever a flush is done, even if it didn't make any progress)
+  // * whenever there is an error in background purge, flush or compaction
+  // * whenever num_running_ingest_file_ goes to 0.
+  // * whenever pending_purge_obsolete_files_ goes to 0.
+  // * whenever disable_delete_obsolete_files_ goes to 0.
+  // * whenever SetOptions successfully updates options.
+  // * whenever a column family is dropped.
+  InstrumentedCondVar bg_cv_;
+  // Writes are protected by locking both mutex_ and log_write_mutex_, and reads
+  // must be under either mutex_ or log_write_mutex_. Since after ::Open,
+  // logfile_number_ is currently updated only in write_thread_, it can be read
+  // from the same write_thread_ without any locks.
+  uint64_t logfile_number_;
+  // Log files that we can recycle. Must be protected by db mutex_.
+  std::deque<uint64_t> log_recycle_files_;
+  // Protected by log_write_mutex_.
+  bool log_dir_synced_;
+  // Without two_write_queues, read and writes to log_empty_ are protected by
+  // mutex_. Since it is currently updated/read only in write_thread_, it can be
+  // accessed from the same write_thread_ without any locks. With
+  // two_write_queues writes, where it can be updated in different threads,
+  // read and writes are protected by log_write_mutex_ instead. This is to avoid
+  // expensive mutex_ lock during WAL write, which update log_empty_.
+  bool log_empty_;
+
+  ColumnFamilyHandleImpl* persist_stats_cf_handle_;
+
+  bool persistent_stats_cfd_exists_ = true;
+
+  // alive_log_files_ is protected by mutex_ and log_write_mutex_ with details
+  // as follows:
+  // 1. read by FindObsoleteFiles() which can be called in either application
+  //    thread or RocksDB bg threads, both mutex_ and log_write_mutex_ are
+  //    held.
+  // 2. pop_front() by FindObsoleteFiles(), both mutex_ and log_write_mutex_
+  //    are held.
+  // 3. push_back() by DBImpl::Open() and DBImpl::RestoreAliveLogFiles()
+  //    (actually called by Open()), only mutex_ is held because at this point,
+  //    the DB::Open() call has not returned success to application, and the
+  //    only other thread(s) that can conflict are bg threads calling
+  //    FindObsoleteFiles() which ensure that both mutex_ and log_write_mutex_
+  //    are held when accessing alive_log_files_.
+  // 4. read by DBImpl::Open() is protected by mutex_.
+  // 5. push_back() by SwitchMemtable(). Both mutex_ and log_write_mutex_ are
+  //    held. This is done by the write group leader. Note that in the case of
+  //    two-write-queues, another WAL-only write thread can be writing to the
+  //    WAL concurrently. See 9.
+  // 6. read by SwitchWAL() with both mutex_ and log_write_mutex_ held. This is
+  //    done by write group leader.
+  // 7. read by ConcurrentWriteToWAL() by the write group leader in the case of
+  //    two-write-queues. Only log_write_mutex_ is held to protect concurrent
+  //    pop_front() by FindObsoleteFiles().
+  // 8. read by PreprocessWrite() by the write group leader. log_write_mutex_
+  //    is held to protect the data structure from concurrent pop_front() by
+  //    FindObsoleteFiles().
+  // 9. read by ConcurrentWriteToWAL() by a WAL-only write thread in the case
+  //    of two-write-queues. Only log_write_mutex_ is held. This suffices to
+  //    protect the data structure from concurrent push_back() by current
+  //    write group leader as well as pop_front() by FindObsoleteFiles().
+  std::deque<LogFileNumberSize> alive_log_files_;
+
+  // Log files that aren't fully synced, and the current log file.
+  // Synchronization:
+  // 1. read by FindObsoleteFiles() which can be called either in application
+  //    thread or RocksDB bg threads. log_write_mutex_ is always held, while
+  //    some reads are performed without mutex_.
+  // 2. pop_front() by FindObsoleteFiles() with only log_write_mutex_ held.
+  // 3. read by DBImpl::Open() with both mutex_ and log_write_mutex_.
+  // 4. emplace_back() by DBImpl::Open() with both mutex_ and log_write_mutex.
+  //    Note that at this point, DB::Open() has not returned success to
+  //    application, thus the only other thread(s) that can conflict are bg
+  //    threads calling FindObsoleteFiles(). See 1.
+  // 5. iteration and clear() from CloseHelper() always hold log_write_mutex
+  //    and mutex_.
+  // 6. back() called by APIs FlushWAL() and LockWAL() are protected by only
+  //    log_write_mutex_. These two can be called by application threads after
+  //    DB::Open() returns success to applications.
+  // 7. read by SyncWAL(), another API, protected by only log_write_mutex_.
+  // 8. read by MarkLogsNotSynced() and MarkLogsSynced() are protected by
+  //    log_write_mutex_.
+  // 9. erase() by MarkLogsSynced() protected by log_write_mutex_.
+  // 10. read by SyncClosedLogs() protected by only log_write_mutex_. This can
+  //     happen in bg flush threads after DB::Open() returns success to
+  //     applications.
+  // 11. reads, e.g. front(), iteration, and back() called by PreprocessWrite()
+  //     holds only the log_write_mutex_. This is done by the write group
+  //     leader. A bg thread calling FindObsoleteFiles() or MarkLogsSynced()
+  //     can happen concurrently. This is fine because log_write_mutex_ is used
+  //     by all parties. See 2, 5, 9.
+  // 12. reads, empty(), back() called by SwitchMemtable() hold both mutex_ and
+  //     log_write_mutex_. This happens in the write group leader.
+  // 13. emplace_back() by SwitchMemtable() hold both mutex_ and
+  //     log_write_mutex_. This happens in the write group leader. Can conflict
+  //     with bg threads calling FindObsoleteFiles(), MarkLogsSynced(),
+  //     SyncClosedLogs(), etc. as well as application threads calling
+  //     FlushWAL(), SyncWAL(), LockWAL(). This is fine because all parties
+  //     require at least log_write_mutex_.
+  // 14. iteration called in WriteToWAL(write_group) protected by
+  //     log_write_mutex_. This is done by write group leader when
+  //     two-write-queues is disabled and write needs to sync logs.
+  // 15. back() called in ConcurrentWriteToWAL() protected by log_write_mutex_.
+  //     This can be done by the write group leader if two-write-queues is
+  //     enabled. It can also be done by another WAL-only write thread.
+  //
+  // Other observations:
+  //  - back() and items with getting_synced=true are not popped,
+  //  - The same thread that sets getting_synced=true will reset it.
+  //  - it follows that the object referred by back() can be safely read from
+  //  the write_thread_ without using mutex. Note that calling back() without
+  //  mutex may be unsafe because different implementations of deque::back() may
+  //  access other member variables of deque, causing undefined behaviors.
+  //  Generally, do not access stl containers without proper synchronization.
+  //  - it follows that the items with getting_synced=true can be safely read
+  //  from the same thread that has set getting_synced=true
+  std::deque<LogWriterNumber> logs_;
+
+  // Signaled when getting_synced becomes false for some of the logs_.
+  InstrumentedCondVar log_sync_cv_;
+  // This is the app-level state that is written to the WAL but will be used
+  // only during recovery. Using this feature enables not writing the state to
+  // memtable on normal writes and hence improving the throughput. Each new
+  // write of the state will replace the previous state entirely even if the
+  // keys in the two consecutive states do not overlap.
+  // It is protected by log_write_mutex_ when two_write_queues_ is enabled.
+  // Otherwise only the heaad of write_thread_ can access it.
+  WriteBatch cached_recoverable_state_;
+  std::atomic<bool> cached_recoverable_state_empty_ = {true};
+  std::atomic<uint64_t> total_log_size_;
+
+  // If this is non-empty, we need to delete these log files in background
+  // threads. Protected by log_write_mutex_.
+  autovector<log::Writer*> logs_to_free_;
+
+  bool is_snapshot_supported_;
+
+  std::map<uint64_t, std::map<std::string, uint64_t>> stats_history_;
+
+  std::map<std::string, uint64_t> stats_slice_;
+
+  bool stats_slice_initialized_ = false;
+
+  Directories directories_;
+
+  WriteBufferManager* write_buffer_manager_;
+
+  WriteThread write_thread_;
+  WriteBatch tmp_batch_;
+  // The write thread when the writers have no memtable write. This will be used
+  // in 2PC to batch the prepares separately from the serial commit.
+  WriteThread nonmem_write_thread_;
+
+  WriteController write_controller_;
+
+  // Size of the last batch group. In slowdown mode, next write needs to
+  // sleep if it uses up the quota.
+  // Note: This is to protect memtable and compaction. If the batch only writes
+  // to the WAL its size need not to be included in this.
+  uint64_t last_batch_group_size_;
+
+  FlushScheduler flush_scheduler_;
+
+  TrimHistoryScheduler trim_history_scheduler_;
+
+  SnapshotList snapshots_;
+
+  TimestampedSnapshotList timestamped_snapshots_;
+
+  // For each background job, pending_outputs_ keeps the current file number at
+  // the time that background job started.
+  // FindObsoleteFiles()/PurgeObsoleteFiles() never deletes any file that has
+  // number bigger than any of the file number in pending_outputs_. Since file
+  // numbers grow monotonically, this also means that pending_outputs_ is always
+  // sorted. After a background job is done executing, its file number is
+  // deleted from pending_outputs_, which allows PurgeObsoleteFiles() to clean
+  // it up.
+  // State is protected with db mutex.
+  std::list<uint64_t> pending_outputs_;
+
+  // flush_queue_ and compaction_queue_ hold column families that we need to
+  // flush and compact, respectively.
+  // A column family is inserted into flush_queue_ when it satisfies condition
+  // cfd->imm()->IsFlushPending()
+  // A column family is inserted into compaction_queue_ when it satisfied
+  // condition cfd->NeedsCompaction()
+  // Column families in this list are all Ref()-erenced
+  // TODO(icanadi) Provide some kind of ReferencedColumnFamily class that will
+  // do RAII on ColumnFamilyData
+  // Column families are in this queue when they need to be flushed or
+  // compacted. Consumers of these queues are flush and compaction threads. When
+  // column family is put on this queue, we increase unscheduled_flushes_ and
+  // unscheduled_compactions_. When these variables are bigger than zero, that
+  // means we need to schedule background threads for flush and compaction.
+  // Once the background threads are scheduled, we decrease unscheduled_flushes_
+  // and unscheduled_compactions_. That way we keep track of number of
+  // compaction and flush threads we need to schedule. This scheduling is done
+  // in MaybeScheduleFlushOrCompaction()
+  // invariant(column family present in flush_queue_ <==>
+  // ColumnFamilyData::pending_flush_ == true)
+  std::deque<FlushRequest> flush_queue_;
+  // invariant(column family present in compaction_queue_ <==>
+  // ColumnFamilyData::pending_compaction_ == true)
+  std::deque<ColumnFamilyData*> compaction_queue_;
+
+  // A map to store file numbers and filenames of the files to be purged
+  std::unordered_map<uint64_t, PurgeFileInfo> purge_files_;
+
+  // A vector to store the file numbers that have been assigned to certain
+  // JobContext. Current implementation tracks table and blob files only.
+  std::unordered_set<uint64_t> files_grabbed_for_purge_;
+
+  // A queue to store log writers to close. Protected by db mutex_.
+  std::deque<log::Writer*> logs_to_free_queue_;
+
+  std::deque<SuperVersion*> superversions_to_free_queue_;
+
+  int unscheduled_flushes_;
+
+  int unscheduled_compactions_;
+
+  // count how many background compactions are running or have been scheduled in
+  // the BOTTOM pool
+  int bg_bottom_compaction_scheduled_;
+
+  // count how many background compactions are running or have been scheduled
+  int bg_compaction_scheduled_;
+
+  // stores the number of compactions are currently running
+  int num_running_compactions_;
+
+  // number of background memtable flush jobs, submitted to the HIGH pool
+  int bg_flush_scheduled_;
+
+  // stores the number of flushes are currently running
+  int num_running_flushes_;
+
+  // number of background obsolete file purge jobs, submitted to the HIGH pool
+  int bg_purge_scheduled_;
+
+  std::deque<ManualCompactionState*> manual_compaction_dequeue_;
+
+  // shall we disable deletion of obsolete files
+  // if 0 the deletion is enabled.
+  // if non-zero, files will not be getting deleted
+  // This enables two different threads to call
+  // EnableFileDeletions() and DisableFileDeletions()
+  // without any synchronization
+  int disable_delete_obsolete_files_;
+
+  // Number of times FindObsoleteFiles has found deletable files and the
+  // corresponding call to PurgeObsoleteFiles has not yet finished.
+  int pending_purge_obsolete_files_;
+
+  // last time when DeleteObsoleteFiles with full scan was executed. Originally
+  // initialized with startup time.
+  uint64_t delete_obsolete_files_last_run_;
+
+  // last time stats were dumped to LOG
+  std::atomic<uint64_t> last_stats_dump_time_microsec_;
+
+  // The thread that wants to switch memtable, can wait on this cv until the
+  // pending writes to memtable finishes.
+  std::condition_variable switch_cv_;
+  // The mutex used by switch_cv_. mutex_ should be acquired beforehand.
+  std::mutex switch_mutex_;
+  // Number of threads intending to write to memtable
+  std::atomic<size_t> pending_memtable_writes_ = {};
+
+  // A flag indicating whether the current rocksdb database has any
+  // data that is not yet persisted into either WAL or SST file.
+  // Used when disableWAL is true.
+  std::atomic<bool> has_unpersisted_data_;
+
+  // if an attempt was made to flush all column families that
+  // the oldest log depends on but uncommitted data in the oldest
+  // log prevents the log from being released.
+  // We must attempt to free the dependent memtables again
+  // at a later time after the transaction in the oldest
+  // log is fully commited.
+  bool unable_to_release_oldest_log_;
+
+  // Number of running IngestExternalFile() or CreateColumnFamilyWithImport()
+  // calls.
+  // REQUIRES: mutex held
+  int num_running_ingest_file_;
+
+  WalManager wal_manager_;
+
+  // A value of > 0 temporarily disables scheduling of background work
+  int bg_work_paused_;
+
+  // A value of > 0 temporarily disables scheduling of background compaction
+  int bg_compaction_paused_;
+
+  // Guard against multiple concurrent refitting
+  bool refitting_level_;
+
+  // Indicate DB was opened successfully
+  bool opened_successfully_;
+
+  // The min threshold to triggere bottommost compaction for removing
+  // garbages, among all column families.
+  SequenceNumber bottommost_files_mark_threshold_ = kMaxSequenceNumber;
+
+  LogsWithPrepTracker logs_with_prep_tracker_;
+
+  // Callback for compaction to check if a key is visible to a snapshot.
+  // REQUIRES: mutex held
+  std::unique_ptr<SnapshotChecker> snapshot_checker_;
+
+  // Callback for when the cached_recoverable_state_ is written to memtable
+  // Only to be set during initialization
+  std::unique_ptr<PreReleaseCallback> recoverable_state_pre_release_callback_;
+
+  // Scheduler to run DumpStats(), PersistStats(), and FlushInfoLog().
+  // Currently, internally it has a global timer instance for running the tasks.
+  PeriodicTaskScheduler periodic_task_scheduler_;
+
+  // It contains the implementations for each periodic task.
+  std::map<PeriodicTaskType, const PeriodicTaskFunc> periodic_task_functions_;
+
+  // When set, we use a separate queue for writes that don't write to memtable.
+  // In 2PC these are the writes at Prepare phase.
+  const bool two_write_queues_;
+  const bool manual_wal_flush_;
+
+  // LastSequence also indicates last published sequence visibile to the
+  // readers. Otherwise LastPublishedSequence should be used.
+  const bool last_seq_same_as_publish_seq_;
+  // It indicates that a customized gc algorithm must be used for
+  // flush/compaction and if it is not provided vis SnapshotChecker, we should
+  // disable gc to be safe.
+  const bool use_custom_gc_;
+  // Flag to indicate that the DB instance shutdown has been initiated. This
+  // different from shutting_down_ atomic in that it is set at the beginning
+  // of shutdown sequence, specifically in order to prevent any background
+  // error recovery from going on in parallel. The latter, shutting_down_,
+  // is set a little later during the shutdown after scheduling memtable
+  // flushes
+  std::atomic<bool> shutdown_initiated_;
+  // Flag to indicate whether sst_file_manager object was allocated in
+  // DB::Open() or passed to us
+  bool own_sfm_;
+
+  // Flag to check whether Close() has been called on this DB
+  bool closed_;
+  // save the closing status, for re-calling the close()
+  Status closing_status_;
+  // mutex for DB::Close()
+  InstrumentedMutex closing_mutex_;
+
+  // Conditional variable to coordinate installation of atomic flush results.
+  // With atomic flush, each bg thread installs the result of flushing multiple
+  // column families, and different threads can flush different column
+  // families. It's difficult to rely on one thread to perform batch
+  // installation for all threads. This is different from the non-atomic flush
+  // case.
+  // atomic_flush_install_cv_ makes sure that threads install atomic flush
+  // results sequentially. Flush results of memtables with lower IDs get
+  // installed to MANIFEST first.
+  InstrumentedCondVar atomic_flush_install_cv_;
+
+  bool wal_in_db_path_;
+  std::atomic<uint64_t> max_total_wal_size_;
+
+  BlobFileCompletionCallback blob_callback_;
+
+  // Pointer to WriteBufferManager stalling interface.
+  std::unique_ptr<StallInterface> wbm_stall_;
+
+  // seqno_time_mapping_ stores the sequence number to time mapping, it's not
+  // thread safe, both read and write need db mutex hold.
+  SeqnoToTimeMapping seqno_time_mapping_;
+
+  // Stop write token that is acquired when first LockWAL() is called.
+  // Destroyed when last UnlockWAL() is called. Controlled by DB mutex.
+  // See lock_wal_count_
+  std::unique_ptr<WriteControllerToken> lock_wal_write_token_;
+
+  // The number of LockWAL called without matching UnlockWAL call.
+  // See also lock_wal_write_token_
+  uint32_t lock_wal_count_;
+};
+
+class GetWithTimestampReadCallback : public ReadCallback {
+ public:
+  explicit GetWithTimestampReadCallback(SequenceNumber seq)
+      : ReadCallback(seq) {}
+  bool IsVisibleFullCheck(SequenceNumber seq) override {
+    return seq <= max_visible_seq_;
+  }
+};
+
+extern Options SanitizeOptions(const std::string& db, const Options& src,
+                               bool read_only = false,
+                               Status* logger_creation_s = nullptr);
+
+extern DBOptions SanitizeOptions(const std::string& db, const DBOptions& src,
+                                 bool read_only = false,
+                                 Status* logger_creation_s = nullptr);
+
+extern CompressionType GetCompressionFlush(
+    const ImmutableCFOptions& ioptions,
+    const MutableCFOptions& mutable_cf_options);
+
+// Return the earliest log file to keep after the memtable flush is
+// finalized.
+// `cfd_to_flush` is the column family whose memtable (specified in
+// `memtables_to_flush`) will be flushed and thus will not depend on any WAL
+// file.
+// The function is only applicable to 2pc mode.
+extern uint64_t PrecomputeMinLogNumberToKeep2PC(
+    VersionSet* vset, const ColumnFamilyData& cfd_to_flush,
+    const autovector<VersionEdit*>& edit_list,
+    const autovector<MemTable*>& memtables_to_flush,
+    LogsWithPrepTracker* prep_tracker);
+// For atomic flush.
+extern uint64_t PrecomputeMinLogNumberToKeep2PC(
+    VersionSet* vset, const autovector<ColumnFamilyData*>& cfds_to_flush,
+    const autovector<autovector<VersionEdit*>>& edit_lists,
+    const autovector<const autovector<MemTable*>*>& memtables_to_flush,
+    LogsWithPrepTracker* prep_tracker);
+
+// In non-2PC mode, WALs with log number < the returned number can be
+// deleted after the cfd_to_flush column family is flushed successfully.
+extern uint64_t PrecomputeMinLogNumberToKeepNon2PC(
+    VersionSet* vset, const ColumnFamilyData& cfd_to_flush,
+    const autovector<VersionEdit*>& edit_list);
+// For atomic flush.
+extern uint64_t PrecomputeMinLogNumberToKeepNon2PC(
+    VersionSet* vset, const autovector<ColumnFamilyData*>& cfds_to_flush,
+    const autovector<autovector<VersionEdit*>>& edit_lists);
+
+// `cfd_to_flush` is the column family whose memtable will be flushed and thus
+// will not depend on any WAL file. nullptr means no memtable is being flushed.
+// The function is only applicable to 2pc mode.
+extern uint64_t FindMinPrepLogReferencedByMemTable(
+    VersionSet* vset, const autovector<MemTable*>& memtables_to_flush);
+// For atomic flush.
+extern uint64_t FindMinPrepLogReferencedByMemTable(
+    VersionSet* vset,
+    const autovector<const autovector<MemTable*>*>& memtables_to_flush);
+
+// Fix user-supplied options to be reasonable
+template <class T, class V>
+static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
+  if (static_cast<V>(*ptr) > maxvalue) *ptr = maxvalue;
+  if (static_cast<V>(*ptr) < minvalue) *ptr = minvalue;
+}
+
+inline Status DBImpl::FailIfCfHasTs(
+    const ColumnFamilyHandle* column_family) const {
+  column_family = column_family ? column_family : DefaultColumnFamily();
+  assert(column_family);
+  const Comparator* const ucmp = column_family->GetComparator();
+  assert(ucmp);
+  if (ucmp->timestamp_size() > 0) {
+    std::ostringstream oss;
+    oss << "cannot call this method on column family "
+        << column_family->GetName() << " that enables timestamp";
+    return Status::InvalidArgument(oss.str());
+  }
+  return Status::OK();
+}
+
+inline Status DBImpl::FailIfTsMismatchCf(ColumnFamilyHandle* column_family,
+                                         const Slice& ts,
+                                         bool ts_for_read) const {
+  if (!column_family) {
+    return Status::InvalidArgument("column family handle cannot be null");
+  }
+  assert(column_family);
+  const Comparator* const ucmp = column_family->GetComparator();
+  assert(ucmp);
+  if (0 == ucmp->timestamp_size()) {
+    std::stringstream oss;
+    oss << "cannot call this method on column family "
+        << column_family->GetName() << " that does not enable timestamp";
+    return Status::InvalidArgument(oss.str());
+  }
+  const size_t ts_sz = ts.size();
+  if (ts_sz != ucmp->timestamp_size()) {
+    std::stringstream oss;
+    oss << "Timestamp sizes mismatch: expect " << ucmp->timestamp_size() << ", "
+        << ts_sz << " given";
+    return Status::InvalidArgument(oss.str());
+  }
+  if (ts_for_read) {
+    auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+    auto cfd = cfh->cfd();
+    std::string current_ts_low = cfd->GetFullHistoryTsLow();
+    if (!current_ts_low.empty() &&
+        ucmp->CompareTimestamp(ts, current_ts_low) < 0) {
+      std::stringstream oss;
+      oss << "Read timestamp: " << ts.ToString(true)
+          << " is smaller than full_history_ts_low: "
+          << Slice(current_ts_low).ToString(true) << std::endl;
+      return Status::InvalidArgument(oss.str());
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_compaction_flush.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_compaction_flush.cc
new file mode 100644
index 0000000000..9e084ab622
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_compaction_flush.cc
@@ -0,0 +1,3980 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include <cinttypes>
+#include <deque>
+
+#include "db/builder.h"
+#include "db/db_impl/db_impl.h"
+#include "db/error_handler.h"
+#include "db/event_helpers.h"
+#include "file/sst_file_manager_impl.h"
+#include "logging/logging.h"
+#include "monitoring/iostats_context_imp.h"
+#include "monitoring/perf_context_imp.h"
+#include "monitoring/thread_status_updater.h"
+#include "monitoring/thread_status_util.h"
+#include "test_util/sync_point.h"
+#include "util/cast_util.h"
+#include "util/concurrent_task_limiter_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+bool DBImpl::EnoughRoomForCompaction(
+    ColumnFamilyData* cfd, const std::vector<CompactionInputFiles>& inputs,
+    bool* sfm_reserved_compact_space, LogBuffer* log_buffer) {
+  // Check if we have enough room to do the compaction
+  bool enough_room = true;
+  auto sfm = static_cast<SstFileManagerImpl*>(
+      immutable_db_options_.sst_file_manager.get());
+  if (sfm) {
+    // Pass the current bg_error_ to SFM so it can decide what checks to
+    // perform. If this DB instance hasn't seen any error yet, the SFM can be
+    // optimistic and not do disk space checks
+    Status bg_error = error_handler_.GetBGError();
+    enough_room = sfm->EnoughRoomForCompaction(cfd, inputs, bg_error);
+    bg_error.PermitUncheckedError();  // bg_error is just a copy of the Status
+                                      // from the error_handler_
+    if (enough_room) {
+      *sfm_reserved_compact_space = true;
+    }
+  }
+  if (!enough_room) {
+    // Just in case tests want to change the value of enough_room
+    TEST_SYNC_POINT_CALLBACK(
+        "DBImpl::BackgroundCompaction():CancelledCompaction", &enough_room);
+    ROCKS_LOG_BUFFER(log_buffer,
+                     "Cancelled compaction because not enough room");
+    RecordTick(stats_, COMPACTION_CANCELLED, 1);
+  }
+  return enough_room;
+}
+
+bool DBImpl::RequestCompactionToken(ColumnFamilyData* cfd, bool force,
+                                    std::unique_ptr<TaskLimiterToken>* token,
+                                    LogBuffer* log_buffer) {
+  assert(*token == nullptr);
+  auto limiter = static_cast<ConcurrentTaskLimiterImpl*>(
+      cfd->ioptions()->compaction_thread_limiter.get());
+  if (limiter == nullptr) {
+    return true;
+  }
+  *token = limiter->GetToken(force);
+  if (*token != nullptr) {
+    ROCKS_LOG_BUFFER(log_buffer,
+                     "Thread limiter [%s] increase [%s] compaction task, "
+                     "force: %s, tasks after: %d",
+                     limiter->GetName().c_str(), cfd->GetName().c_str(),
+                     force ? "true" : "false", limiter->GetOutstandingTask());
+    return true;
+  }
+  return false;
+}
+
+IOStatus DBImpl::SyncClosedLogs(JobContext* job_context,
+                                VersionEdit* synced_wals) {
+  TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Start");
+  InstrumentedMutexLock l(&log_write_mutex_);
+  autovector<log::Writer*, 1> logs_to_sync;
+  uint64_t current_log_number = logfile_number_;
+  while (logs_.front().number < current_log_number &&
+         logs_.front().IsSyncing()) {
+    log_sync_cv_.Wait();
+  }
+  for (auto it = logs_.begin();
+       it != logs_.end() && it->number < current_log_number; ++it) {
+    auto& log = *it;
+    log.PrepareForSync();
+    logs_to_sync.push_back(log.writer);
+  }
+
+  IOStatus io_s;
+  if (!logs_to_sync.empty()) {
+    log_write_mutex_.Unlock();
+
+    assert(job_context);
+
+    for (log::Writer* log : logs_to_sync) {
+      ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                     "[JOB %d] Syncing log #%" PRIu64, job_context->job_id,
+                     log->get_log_number());
+      if (error_handler_.IsRecoveryInProgress()) {
+        log->file()->reset_seen_error();
+      }
+      io_s = log->file()->Sync(immutable_db_options_.use_fsync);
+      if (!io_s.ok()) {
+        break;
+      }
+
+      if (immutable_db_options_.recycle_log_file_num > 0) {
+        if (error_handler_.IsRecoveryInProgress()) {
+          log->file()->reset_seen_error();
+        }
+        io_s = log->Close();
+        if (!io_s.ok()) {
+          break;
+        }
+      }
+    }
+    if (io_s.ok()) {
+      io_s = directories_.GetWalDir()->FsyncWithDirOptions(
+          IOOptions(), nullptr,
+          DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
+    }
+
+    TEST_SYNC_POINT_CALLBACK("DBImpl::SyncClosedLogs:BeforeReLock",
+                             /*arg=*/nullptr);
+    log_write_mutex_.Lock();
+
+    // "number <= current_log_number - 1" is equivalent to
+    // "number < current_log_number".
+    if (io_s.ok()) {
+      MarkLogsSynced(current_log_number - 1, true, synced_wals);
+    } else {
+      MarkLogsNotSynced(current_log_number - 1);
+    }
+    if (!io_s.ok()) {
+      TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Failed");
+      return io_s;
+    }
+  }
+  TEST_SYNC_POINT("DBImpl::SyncClosedLogs:end");
+  return io_s;
+}
+
+Status DBImpl::FlushMemTableToOutputFile(
+    ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
+    bool* made_progress, JobContext* job_context, FlushReason flush_reason,
+    SuperVersionContext* superversion_context,
+    std::vector<SequenceNumber>& snapshot_seqs,
+    SequenceNumber earliest_write_conflict_snapshot,
+    SnapshotChecker* snapshot_checker, LogBuffer* log_buffer,
+    Env::Priority thread_pri) {
+  mutex_.AssertHeld();
+  assert(cfd);
+  assert(cfd->imm());
+  assert(cfd->imm()->NumNotFlushed() != 0);
+  assert(cfd->imm()->IsFlushPending());
+  assert(versions_);
+  assert(versions_->GetColumnFamilySet());
+  // If there are more than one column families, we need to make sure that
+  // all the log files except the most recent one are synced. Otherwise if
+  // the host crashes after flushing and before WAL is persistent, the
+  // flushed SST may contain data from write batches whose updates to
+  // other (unflushed) column families are missing.
+  const bool needs_to_sync_closed_wals =
+      logfile_number_ > 0 &&
+      versions_->GetColumnFamilySet()->NumberOfColumnFamilies() > 1;
+
+  // If needs_to_sync_closed_wals is true, we need to record the current
+  // maximum memtable ID of this column family so that a later PickMemtables()
+  // call will not pick memtables whose IDs are higher. This is due to the fact
+  // that SyncClosedLogs() may release the db mutex, and memtable switch can
+  // happen for this column family in the meantime. The newly created memtables
+  // have their data backed by unsynced WALs, thus they cannot be included in
+  // this flush job.
+  // Another reason why we must record the current maximum memtable ID of this
+  // column family: SyncClosedLogs() may release db mutex, thus it's possible
+  // for application to continue to insert into memtables increasing db's
+  // sequence number. The application may take a snapshot, but this snapshot is
+  // not included in `snapshot_seqs` which will be passed to flush job because
+  // `snapshot_seqs` has already been computed before this function starts.
+  // Recording the max memtable ID ensures that the flush job does not flush
+  // a memtable without knowing such snapshot(s).
+  uint64_t max_memtable_id = needs_to_sync_closed_wals
+                                 ? cfd->imm()->GetLatestMemTableID()
+                                 : std::numeric_limits<uint64_t>::max();
+
+  // If needs_to_sync_closed_wals is false, then the flush job will pick ALL
+  // existing memtables of the column family when PickMemTable() is called
+  // later. Although we won't call SyncClosedLogs() in this case, we may still
+  // call the callbacks of the listeners, i.e. NotifyOnFlushBegin() which also
+  // releases and re-acquires the db mutex. In the meantime, the application
+  // can still insert into the memtables and increase the db's sequence number.
+  // The application can take a snapshot, hoping that the latest visible state
+  // to this snapshto is preserved. This is hard to guarantee since db mutex
+  // not held. This newly-created snapshot is not included in `snapshot_seqs`
+  // and the flush job is unaware of its presence. Consequently, the flush job
+  // may drop certain keys when generating the L0, causing incorrect data to be
+  // returned for snapshot read using this snapshot.
+  // To address this, we make sure NotifyOnFlushBegin() executes after memtable
+  // picking so that no new snapshot can be taken between the two functions.
+
+  FlushJob flush_job(
+      dbname_, cfd, immutable_db_options_, mutable_cf_options, max_memtable_id,
+      file_options_for_compaction_, versions_.get(), &mutex_, &shutting_down_,
+      snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
+      job_context, flush_reason, log_buffer, directories_.GetDbDir(),
+      GetDataDir(cfd, 0U),
+      GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_,
+      &event_logger_, mutable_cf_options.report_bg_io_stats,
+      true /* sync_output_directory */, true /* write_manifest */, thread_pri,
+      io_tracer_, seqno_time_mapping_, db_id_, db_session_id_,
+      cfd->GetFullHistoryTsLow(), &blob_callback_);
+  FileMetaData file_meta;
+
+  Status s;
+  bool need_cancel = false;
+  IOStatus log_io_s = IOStatus::OK();
+  if (needs_to_sync_closed_wals) {
+    // SyncClosedLogs() may unlock and re-lock the log_write_mutex multiple
+    // times.
+    VersionEdit synced_wals;
+    mutex_.Unlock();
+    log_io_s = SyncClosedLogs(job_context, &synced_wals);
+    mutex_.Lock();
+    if (log_io_s.ok() && synced_wals.IsWalAddition()) {
+      const ReadOptions read_options(Env::IOActivity::kFlush);
+      log_io_s =
+          status_to_io_status(ApplyWALToManifest(read_options, &synced_wals));
+      TEST_SYNC_POINT_CALLBACK("DBImpl::FlushMemTableToOutputFile:CommitWal:1",
+                               nullptr);
+    }
+
+    if (!log_io_s.ok() && !log_io_s.IsShutdownInProgress() &&
+        !log_io_s.IsColumnFamilyDropped()) {
+      error_handler_.SetBGError(log_io_s, BackgroundErrorReason::kFlush);
+    }
+  } else {
+    TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Skip");
+  }
+  s = log_io_s;
+
+  // If the log sync failed, we do not need to pick memtable. Otherwise,
+  // num_flush_not_started_ needs to be rollback.
+  TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:BeforePickMemtables");
+  if (s.ok()) {
+    flush_job.PickMemTable();
+    need_cancel = true;
+  }
+  TEST_SYNC_POINT_CALLBACK(
+      "DBImpl::FlushMemTableToOutputFile:AfterPickMemtables", &flush_job);
+
+  // may temporarily unlock and lock the mutex.
+  NotifyOnFlushBegin(cfd, &file_meta, mutable_cf_options, job_context->job_id,
+                     flush_reason);
+
+  bool switched_to_mempurge = false;
+  // Within flush_job.Run, rocksdb may call event listener to notify
+  // file creation and deletion.
+  //
+  // Note that flush_job.Run will unlock and lock the db_mutex,
+  // and EventListener callback will be called when the db_mutex
+  // is unlocked by the current thread.
+  if (s.ok()) {
+    s = flush_job.Run(&logs_with_prep_tracker_, &file_meta,
+                      &switched_to_mempurge);
+    need_cancel = false;
+  }
+
+  if (!s.ok() && need_cancel) {
+    flush_job.Cancel();
+  }
+
+  if (s.ok()) {
+    InstallSuperVersionAndScheduleWork(cfd, superversion_context,
+                                       mutable_cf_options);
+    if (made_progress) {
+      *made_progress = true;
+    }
+
+    const std::string& column_family_name = cfd->GetName();
+
+    Version* const current = cfd->current();
+    assert(current);
+
+    const VersionStorageInfo* const storage_info = current->storage_info();
+    assert(storage_info);
+
+    VersionStorageInfo::LevelSummaryStorage tmp;
+    ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n",
+                     column_family_name.c_str(),
+                     storage_info->LevelSummary(&tmp));
+
+    const auto& blob_files = storage_info->GetBlobFiles();
+    if (!blob_files.empty()) {
+      assert(blob_files.front());
+      assert(blob_files.back());
+
+      ROCKS_LOG_BUFFER(
+          log_buffer,
+          "[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64 "\n",
+          column_family_name.c_str(), blob_files.front()->GetBlobFileNumber(),
+          blob_files.back()->GetBlobFileNumber());
+    }
+  }
+
+  if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped()) {
+    if (log_io_s.ok()) {
+      // Error while writing to MANIFEST.
+      // In fact, versions_->io_status() can also be the result of renaming
+      // CURRENT file. With current code, it's just difficult to tell. So just
+      // be pessimistic and try write to a new MANIFEST.
+      // TODO: distinguish between MANIFEST write and CURRENT renaming
+      if (!versions_->io_status().ok()) {
+        // If WAL sync is successful (either WAL size is 0 or there is no IO
+        // error), all the Manifest write will be map to soft error.
+        // TODO: kManifestWriteNoWAL and kFlushNoWAL are misleading. Refactor is
+        // needed.
+        error_handler_.SetBGError(s,
+                                  BackgroundErrorReason::kManifestWriteNoWAL);
+      } else {
+        // If WAL sync is successful (either WAL size is 0 or there is no IO
+        // error), all the other SST file write errors will be set as
+        // kFlushNoWAL.
+        error_handler_.SetBGError(s, BackgroundErrorReason::kFlushNoWAL);
+      }
+    } else {
+      assert(s == log_io_s);
+      Status new_bg_error = s;
+      error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
+    }
+  }
+  // If flush ran smoothly and no mempurge happened
+  // install new SST file path.
+  if (s.ok() && (!switched_to_mempurge)) {
+    // may temporarily unlock and lock the mutex.
+    NotifyOnFlushCompleted(cfd, mutable_cf_options,
+                           flush_job.GetCommittedFlushJobsInfo());
+    auto sfm = static_cast<SstFileManagerImpl*>(
+        immutable_db_options_.sst_file_manager.get());
+    if (sfm) {
+      // Notify sst_file_manager that a new file was added
+      std::string file_path = MakeTableFileName(
+          cfd->ioptions()->cf_paths[0].path, file_meta.fd.GetNumber());
+      // TODO (PR7798).  We should only add the file to the FileManager if it
+      // exists. Otherwise, some tests may fail.  Ignore the error in the
+      // interim.
+      sfm->OnAddFile(file_path).PermitUncheckedError();
+      if (sfm->IsMaxAllowedSpaceReached()) {
+        Status new_bg_error =
+            Status::SpaceLimit("Max allowed space was reached");
+        TEST_SYNC_POINT_CALLBACK(
+            "DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached",
+            &new_bg_error);
+        error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
+      }
+    }
+  }
+  TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:Finish");
+  return s;
+}
+
+Status DBImpl::FlushMemTablesToOutputFiles(
+    const autovector<BGFlushArg>& bg_flush_args, bool* made_progress,
+    JobContext* job_context, LogBuffer* log_buffer, Env::Priority thread_pri) {
+  if (immutable_db_options_.atomic_flush) {
+    return AtomicFlushMemTablesToOutputFiles(
+        bg_flush_args, made_progress, job_context, log_buffer, thread_pri);
+  }
+  assert(bg_flush_args.size() == 1);
+  std::vector<SequenceNumber> snapshot_seqs;
+  SequenceNumber earliest_write_conflict_snapshot;
+  SnapshotChecker* snapshot_checker;
+  GetSnapshotContext(job_context, &snapshot_seqs,
+                     &earliest_write_conflict_snapshot, &snapshot_checker);
+  const auto& bg_flush_arg = bg_flush_args[0];
+  ColumnFamilyData* cfd = bg_flush_arg.cfd_;
+  // intentional infrequent copy for each flush
+  MutableCFOptions mutable_cf_options_copy = *cfd->GetLatestMutableCFOptions();
+  SuperVersionContext* superversion_context =
+      bg_flush_arg.superversion_context_;
+  FlushReason flush_reason = bg_flush_arg.flush_reason_;
+  Status s = FlushMemTableToOutputFile(
+      cfd, mutable_cf_options_copy, made_progress, job_context, flush_reason,
+      superversion_context, snapshot_seqs, earliest_write_conflict_snapshot,
+      snapshot_checker, log_buffer, thread_pri);
+  return s;
+}
+
+/*
+ * Atomically flushes multiple column families.
+ *
+ * For each column family, all memtables with ID smaller than or equal to the
+ * ID specified in bg_flush_args will be flushed. Only after all column
+ * families finish flush will this function commit to MANIFEST. If any of the
+ * column families are not flushed successfully, this function does not have
+ * any side-effect on the state of the database.
+ */
+Status DBImpl::AtomicFlushMemTablesToOutputFiles(
+    const autovector<BGFlushArg>& bg_flush_args, bool* made_progress,
+    JobContext* job_context, LogBuffer* log_buffer, Env::Priority thread_pri) {
+  mutex_.AssertHeld();
+
+  autovector<ColumnFamilyData*> cfds;
+  for (const auto& arg : bg_flush_args) {
+    cfds.emplace_back(arg.cfd_);
+  }
+
+#ifndef NDEBUG
+  for (const auto cfd : cfds) {
+    assert(cfd->imm()->NumNotFlushed() != 0);
+    assert(cfd->imm()->IsFlushPending());
+  }
+  for (const auto& bg_flush_arg : bg_flush_args) {
+    assert(bg_flush_arg.flush_reason_ == bg_flush_args[0].flush_reason_);
+  }
+#endif /* !NDEBUG */
+
+  std::vector<SequenceNumber> snapshot_seqs;
+  SequenceNumber earliest_write_conflict_snapshot;
+  SnapshotChecker* snapshot_checker;
+  GetSnapshotContext(job_context, &snapshot_seqs,
+                     &earliest_write_conflict_snapshot, &snapshot_checker);
+
+  autovector<FSDirectory*> distinct_output_dirs;
+  autovector<std::string> distinct_output_dir_paths;
+  std::vector<std::unique_ptr<FlushJob>> jobs;
+  std::vector<MutableCFOptions> all_mutable_cf_options;
+  int num_cfs = static_cast<int>(cfds.size());
+  all_mutable_cf_options.reserve(num_cfs);
+  for (int i = 0; i < num_cfs; ++i) {
+    auto cfd = cfds[i];
+    FSDirectory* data_dir = GetDataDir(cfd, 0U);
+    const std::string& curr_path = cfd->ioptions()->cf_paths[0].path;
+
+    // Add to distinct output directories if eligible. Use linear search. Since
+    // the number of elements in the vector is not large, performance should be
+    // tolerable.
+    bool found = false;
+    for (const auto& path : distinct_output_dir_paths) {
+      if (path == curr_path) {
+        found = true;
+        break;
+      }
+    }
+    if (!found) {
+      distinct_output_dir_paths.emplace_back(curr_path);
+      distinct_output_dirs.emplace_back(data_dir);
+    }
+
+    all_mutable_cf_options.emplace_back(*cfd->GetLatestMutableCFOptions());
+    const MutableCFOptions& mutable_cf_options = all_mutable_cf_options.back();
+    uint64_t max_memtable_id = bg_flush_args[i].max_memtable_id_;
+    FlushReason flush_reason = bg_flush_args[i].flush_reason_;
+    jobs.emplace_back(new FlushJob(
+        dbname_, cfd, immutable_db_options_, mutable_cf_options,
+        max_memtable_id, file_options_for_compaction_, versions_.get(), &mutex_,
+        &shutting_down_, snapshot_seqs, earliest_write_conflict_snapshot,
+        snapshot_checker, job_context, flush_reason, log_buffer,
+        directories_.GetDbDir(), data_dir,
+        GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_,
+        &event_logger_, mutable_cf_options.report_bg_io_stats,
+        false /* sync_output_directory */, false /* write_manifest */,
+        thread_pri, io_tracer_, seqno_time_mapping_, db_id_, db_session_id_,
+        cfd->GetFullHistoryTsLow(), &blob_callback_));
+  }
+
+  std::vector<FileMetaData> file_meta(num_cfs);
+  // Use of deque<bool> because vector<bool>
+  // is specific and doesn't allow &v[i].
+  std::deque<bool> switched_to_mempurge(num_cfs, false);
+  Status s;
+  IOStatus log_io_s = IOStatus::OK();
+  assert(num_cfs == static_cast<int>(jobs.size()));
+
+  for (int i = 0; i != num_cfs; ++i) {
+    const MutableCFOptions& mutable_cf_options = all_mutable_cf_options.at(i);
+    // may temporarily unlock and lock the mutex.
+    FlushReason flush_reason = bg_flush_args[i].flush_reason_;
+    NotifyOnFlushBegin(cfds[i], &file_meta[i], mutable_cf_options,
+                       job_context->job_id, flush_reason);
+  }
+
+  if (logfile_number_ > 0) {
+    // TODO (yanqin) investigate whether we should sync the closed logs for
+    // single column family case.
+    VersionEdit synced_wals;
+    mutex_.Unlock();
+    log_io_s = SyncClosedLogs(job_context, &synced_wals);
+    mutex_.Lock();
+    if (log_io_s.ok() && synced_wals.IsWalAddition()) {
+      const ReadOptions read_options(Env::IOActivity::kFlush);
+      log_io_s =
+          status_to_io_status(ApplyWALToManifest(read_options, &synced_wals));
+    }
+
+    if (!log_io_s.ok() && !log_io_s.IsShutdownInProgress() &&
+        !log_io_s.IsColumnFamilyDropped()) {
+      if (total_log_size_ > 0) {
+        error_handler_.SetBGError(log_io_s, BackgroundErrorReason::kFlush);
+      } else {
+        // If the WAL is empty, we use different error reason
+        error_handler_.SetBGError(log_io_s, BackgroundErrorReason::kFlushNoWAL);
+      }
+    }
+  }
+  s = log_io_s;
+
+  // exec_status stores the execution status of flush_jobs as
+  // <bool /* executed */, Status /* status code */>
+  autovector<std::pair<bool, Status>> exec_status;
+  std::vector<bool> pick_status;
+  for (int i = 0; i != num_cfs; ++i) {
+    // Initially all jobs are not executed, with status OK.
+    exec_status.emplace_back(false, Status::OK());
+    pick_status.push_back(false);
+  }
+
+  if (s.ok()) {
+    for (int i = 0; i != num_cfs; ++i) {
+      jobs[i]->PickMemTable();
+      pick_status[i] = true;
+    }
+  }
+
+  if (s.ok()) {
+    assert(switched_to_mempurge.size() ==
+           static_cast<long unsigned int>(num_cfs));
+    // TODO (yanqin): parallelize jobs with threads.
+    for (int i = 1; i != num_cfs; ++i) {
+      exec_status[i].second =
+          jobs[i]->Run(&logs_with_prep_tracker_, &file_meta[i],
+                       &(switched_to_mempurge.at(i)));
+      exec_status[i].first = true;
+    }
+    if (num_cfs > 1) {
+      TEST_SYNC_POINT(
+          "DBImpl::AtomicFlushMemTablesToOutputFiles:SomeFlushJobsComplete:1");
+      TEST_SYNC_POINT(
+          "DBImpl::AtomicFlushMemTablesToOutputFiles:SomeFlushJobsComplete:2");
+    }
+    assert(exec_status.size() > 0);
+    assert(!file_meta.empty());
+    exec_status[0].second = jobs[0]->Run(
+        &logs_with_prep_tracker_, file_meta.data() /* &file_meta[0] */,
+        switched_to_mempurge.empty() ? nullptr : &(switched_to_mempurge.at(0)));
+    exec_status[0].first = true;
+
+    Status error_status;
+    for (const auto& e : exec_status) {
+      if (!e.second.ok()) {
+        s = e.second;
+        if (!e.second.IsShutdownInProgress() &&
+            !e.second.IsColumnFamilyDropped()) {
+          // If a flush job did not return OK, and the CF is not dropped, and
+          // the DB is not shutting down, then we have to return this result to
+          // caller later.
+          error_status = e.second;
+        }
+      }
+    }
+
+    s = error_status.ok() ? s : error_status;
+  }
+
+  if (s.IsColumnFamilyDropped()) {
+    s = Status::OK();
+  }
+
+  if (s.ok() || s.IsShutdownInProgress()) {
+    // Sync on all distinct output directories.
+    for (auto dir : distinct_output_dirs) {
+      if (dir != nullptr) {
+        Status error_status = dir->FsyncWithDirOptions(
+            IOOptions(), nullptr,
+            DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
+        if (!error_status.ok()) {
+          s = error_status;
+          break;
+        }
+      }
+    }
+  } else {
+    // Need to undo atomic flush if something went wrong, i.e. s is not OK and
+    // it is not because of CF drop.
+    // Have to cancel the flush jobs that have NOT executed because we need to
+    // unref the versions.
+    for (int i = 0; i != num_cfs; ++i) {
+      if (pick_status[i] && !exec_status[i].first) {
+        jobs[i]->Cancel();
+      }
+    }
+    for (int i = 0; i != num_cfs; ++i) {
+      if (exec_status[i].second.ok() && exec_status[i].first) {
+        auto& mems = jobs[i]->GetMemTables();
+        cfds[i]->imm()->RollbackMemtableFlush(mems,
+                                              file_meta[i].fd.GetNumber());
+      }
+    }
+  }
+
+  if (s.ok()) {
+    const auto wait_to_install_func =
+        [&]() -> std::pair<Status, bool /*continue to wait*/> {
+      if (!versions_->io_status().ok()) {
+        // Something went wrong elsewhere, we cannot count on waiting for our
+        // turn to write/sync to MANIFEST or CURRENT. Just return.
+        return std::make_pair(versions_->io_status(), false);
+      } else if (shutting_down_.load(std::memory_order_acquire)) {
+        return std::make_pair(Status::ShutdownInProgress(), false);
+      }
+      bool ready = true;
+      for (size_t i = 0; i != cfds.size(); ++i) {
+        const auto& mems = jobs[i]->GetMemTables();
+        if (cfds[i]->IsDropped()) {
+          // If the column family is dropped, then do not wait.
+          continue;
+        } else if (!mems.empty() &&
+                   cfds[i]->imm()->GetEarliestMemTableID() < mems[0]->GetID()) {
+          // If a flush job needs to install the flush result for mems and
+          // mems[0] is not the earliest memtable, it means another thread must
+          // be installing flush results for the same column family, then the
+          // current thread needs to wait.
+          ready = false;
+          break;
+        } else if (mems.empty() && cfds[i]->imm()->GetEarliestMemTableID() <=
+                                       bg_flush_args[i].max_memtable_id_) {
+          // If a flush job does not need to install flush results, then it has
+          // to wait until all memtables up to max_memtable_id_ (inclusive) are
+          // installed.
+          ready = false;
+          break;
+        }
+      }
+      return std::make_pair(Status::OK(), !ready);
+    };
+
+    bool resuming_from_bg_err =
+        error_handler_.IsDBStopped() ||
+        (bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecovery ||
+         bg_flush_args[0].flush_reason_ ==
+             FlushReason::kErrorRecoveryRetryFlush);
+    while ((!resuming_from_bg_err || error_handler_.GetRecoveryError().ok())) {
+      std::pair<Status, bool> res = wait_to_install_func();
+
+      TEST_SYNC_POINT_CALLBACK(
+          "DBImpl::AtomicFlushMemTablesToOutputFiles:WaitToCommit", &res);
+
+      if (!res.first.ok()) {
+        s = res.first;
+        break;
+      } else if (!res.second) {
+        break;
+      }
+      atomic_flush_install_cv_.Wait();
+
+      resuming_from_bg_err =
+          error_handler_.IsDBStopped() ||
+          (bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecovery ||
+           bg_flush_args[0].flush_reason_ ==
+               FlushReason::kErrorRecoveryRetryFlush);
+    }
+
+    if (!resuming_from_bg_err) {
+      // If not resuming from bg err, then we determine future action based on
+      // whether we hit background error.
+      if (s.ok()) {
+        s = error_handler_.GetBGError();
+      }
+    } else if (s.ok()) {
+      // If resuming from bg err, we still rely on wait_to_install_func()'s
+      // result to determine future action. If wait_to_install_func() returns
+      // non-ok already, then we should not proceed to flush result
+      // installation.
+      s = error_handler_.GetRecoveryError();
+    }
+  }
+
+  if (s.ok()) {
+    autovector<ColumnFamilyData*> tmp_cfds;
+    autovector<const autovector<MemTable*>*> mems_list;
+    autovector<const MutableCFOptions*> mutable_cf_options_list;
+    autovector<FileMetaData*> tmp_file_meta;
+    autovector<std::list<std::unique_ptr<FlushJobInfo>>*>
+        committed_flush_jobs_info;
+    for (int i = 0; i != num_cfs; ++i) {
+      const auto& mems = jobs[i]->GetMemTables();
+      if (!cfds[i]->IsDropped() && !mems.empty()) {
+        tmp_cfds.emplace_back(cfds[i]);
+        mems_list.emplace_back(&mems);
+        mutable_cf_options_list.emplace_back(&all_mutable_cf_options[i]);
+        tmp_file_meta.emplace_back(&file_meta[i]);
+        committed_flush_jobs_info.emplace_back(
+            jobs[i]->GetCommittedFlushJobsInfo());
+      }
+    }
+
+    s = InstallMemtableAtomicFlushResults(
+        nullptr /* imm_lists */, tmp_cfds, mutable_cf_options_list, mems_list,
+        versions_.get(), &logs_with_prep_tracker_, &mutex_, tmp_file_meta,
+        committed_flush_jobs_info, &job_context->memtables_to_free,
+        directories_.GetDbDir(), log_buffer);
+  }
+
+  if (s.ok()) {
+    assert(num_cfs ==
+           static_cast<int>(job_context->superversion_contexts.size()));
+    for (int i = 0; i != num_cfs; ++i) {
+      assert(cfds[i]);
+
+      if (cfds[i]->IsDropped()) {
+        continue;
+      }
+      InstallSuperVersionAndScheduleWork(cfds[i],
+                                         &job_context->superversion_contexts[i],
+                                         all_mutable_cf_options[i]);
+
+      const std::string& column_family_name = cfds[i]->GetName();
+
+      Version* const current = cfds[i]->current();
+      assert(current);
+
+      const VersionStorageInfo* const storage_info = current->storage_info();
+      assert(storage_info);
+
+      VersionStorageInfo::LevelSummaryStorage tmp;
+      ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n",
+                       column_family_name.c_str(),
+                       storage_info->LevelSummary(&tmp));
+
+      const auto& blob_files = storage_info->GetBlobFiles();
+      if (!blob_files.empty()) {
+        assert(blob_files.front());
+        assert(blob_files.back());
+
+        ROCKS_LOG_BUFFER(
+            log_buffer,
+            "[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64 "\n",
+            column_family_name.c_str(), blob_files.front()->GetBlobFileNumber(),
+            blob_files.back()->GetBlobFileNumber());
+      }
+    }
+    if (made_progress) {
+      *made_progress = true;
+    }
+    auto sfm = static_cast<SstFileManagerImpl*>(
+        immutable_db_options_.sst_file_manager.get());
+    assert(all_mutable_cf_options.size() == static_cast<size_t>(num_cfs));
+    for (int i = 0; s.ok() && i != num_cfs; ++i) {
+      // If mempurge happened instead of Flush,
+      // no NotifyOnFlushCompleted call (no SST file created).
+      if (switched_to_mempurge[i]) {
+        continue;
+      }
+      if (cfds[i]->IsDropped()) {
+        continue;
+      }
+      NotifyOnFlushCompleted(cfds[i], all_mutable_cf_options[i],
+                             jobs[i]->GetCommittedFlushJobsInfo());
+      if (sfm) {
+        std::string file_path = MakeTableFileName(
+            cfds[i]->ioptions()->cf_paths[0].path, file_meta[i].fd.GetNumber());
+        // TODO (PR7798).  We should only add the file to the FileManager if it
+        // exists. Otherwise, some tests may fail.  Ignore the error in the
+        // interim.
+        sfm->OnAddFile(file_path).PermitUncheckedError();
+        if (sfm->IsMaxAllowedSpaceReached() &&
+            error_handler_.GetBGError().ok()) {
+          Status new_bg_error =
+              Status::SpaceLimit("Max allowed space was reached");
+          error_handler_.SetBGError(new_bg_error,
+                                    BackgroundErrorReason::kFlush);
+        }
+      }
+    }
+  }
+
+  // Need to undo atomic flush if something went wrong, i.e. s is not OK and
+  // it is not because of CF drop.
+  if (!s.ok() && !s.IsColumnFamilyDropped()) {
+    if (log_io_s.ok()) {
+      // Error while writing to MANIFEST.
+      // In fact, versions_->io_status() can also be the result of renaming
+      // CURRENT file. With current code, it's just difficult to tell. So just
+      // be pessimistic and try write to a new MANIFEST.
+      // TODO: distinguish between MANIFEST write and CURRENT renaming
+      if (!versions_->io_status().ok()) {
+        // If WAL sync is successful (either WAL size is 0 or there is no IO
+        // error), all the Manifest write will be map to soft error.
+        // TODO: kManifestWriteNoWAL and kFlushNoWAL are misleading. Refactor
+        // is needed.
+        error_handler_.SetBGError(s,
+                                  BackgroundErrorReason::kManifestWriteNoWAL);
+      } else {
+        // If WAL sync is successful (either WAL size is 0 or there is no IO
+        // error), all the other SST file write errors will be set as
+        // kFlushNoWAL.
+        error_handler_.SetBGError(s, BackgroundErrorReason::kFlushNoWAL);
+      }
+    } else {
+      assert(s == log_io_s);
+      Status new_bg_error = s;
+      error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
+    }
+  }
+
+  return s;
+}
+
+void DBImpl::NotifyOnFlushBegin(ColumnFamilyData* cfd, FileMetaData* file_meta,
+                                const MutableCFOptions& mutable_cf_options,
+                                int job_id, FlushReason flush_reason) {
+  if (immutable_db_options_.listeners.size() == 0U) {
+    return;
+  }
+  mutex_.AssertHeld();
+  if (shutting_down_.load(std::memory_order_acquire)) {
+    return;
+  }
+  bool triggered_writes_slowdown =
+      (cfd->current()->storage_info()->NumLevelFiles(0) >=
+       mutable_cf_options.level0_slowdown_writes_trigger);
+  bool triggered_writes_stop =
+      (cfd->current()->storage_info()->NumLevelFiles(0) >=
+       mutable_cf_options.level0_stop_writes_trigger);
+  // release lock while notifying events
+  mutex_.Unlock();
+  {
+    FlushJobInfo info{};
+    info.cf_id = cfd->GetID();
+    info.cf_name = cfd->GetName();
+    // TODO(yhchiang): make db_paths dynamic in case flush does not
+    //                 go to L0 in the future.
+    const uint64_t file_number = file_meta->fd.GetNumber();
+    info.file_path =
+        MakeTableFileName(cfd->ioptions()->cf_paths[0].path, file_number);
+    info.file_number = file_number;
+    info.thread_id = env_->GetThreadID();
+    info.job_id = job_id;
+    info.triggered_writes_slowdown = triggered_writes_slowdown;
+    info.triggered_writes_stop = triggered_writes_stop;
+    info.smallest_seqno = file_meta->fd.smallest_seqno;
+    info.largest_seqno = file_meta->fd.largest_seqno;
+    info.flush_reason = flush_reason;
+    for (auto listener : immutable_db_options_.listeners) {
+      listener->OnFlushBegin(this, info);
+    }
+  }
+  mutex_.Lock();
+// no need to signal bg_cv_ as it will be signaled at the end of the
+// flush process.
+}
+
+void DBImpl::NotifyOnFlushCompleted(
+    ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
+    std::list<std::unique_ptr<FlushJobInfo>>* flush_jobs_info) {
+  assert(flush_jobs_info != nullptr);
+  if (immutable_db_options_.listeners.size() == 0U) {
+    return;
+  }
+  mutex_.AssertHeld();
+  if (shutting_down_.load(std::memory_order_acquire)) {
+    return;
+  }
+  bool triggered_writes_slowdown =
+      (cfd->current()->storage_info()->NumLevelFiles(0) >=
+       mutable_cf_options.level0_slowdown_writes_trigger);
+  bool triggered_writes_stop =
+      (cfd->current()->storage_info()->NumLevelFiles(0) >=
+       mutable_cf_options.level0_stop_writes_trigger);
+  // release lock while notifying events
+  mutex_.Unlock();
+  {
+    for (auto& info : *flush_jobs_info) {
+      info->triggered_writes_slowdown = triggered_writes_slowdown;
+      info->triggered_writes_stop = triggered_writes_stop;
+      for (auto listener : immutable_db_options_.listeners) {
+        listener->OnFlushCompleted(this, *info);
+      }
+      TEST_SYNC_POINT(
+          "DBImpl::NotifyOnFlushCompleted::PostAllOnFlushCompleted");
+    }
+    flush_jobs_info->clear();
+  }
+  mutex_.Lock();
+  // no need to signal bg_cv_ as it will be signaled at the end of the
+  // flush process.
+}
+
+Status DBImpl::CompactRange(const CompactRangeOptions& options,
+                            ColumnFamilyHandle* column_family,
+                            const Slice* begin_without_ts,
+                            const Slice* end_without_ts) {
+  if (manual_compaction_paused_.load(std::memory_order_acquire) > 0) {
+    return Status::Incomplete(Status::SubCode::kManualCompactionPaused);
+  }
+
+  if (options.canceled && options.canceled->load(std::memory_order_acquire)) {
+    return Status::Incomplete(Status::SubCode::kManualCompactionPaused);
+  }
+
+  const Comparator* const ucmp = column_family->GetComparator();
+  assert(ucmp);
+  size_t ts_sz = ucmp->timestamp_size();
+  if (ts_sz == 0) {
+    return CompactRangeInternal(options, column_family, begin_without_ts,
+                                end_without_ts, "" /*trim_ts*/);
+  }
+
+  std::string begin_str;
+  std::string end_str;
+
+  // CompactRange compact all keys: [begin, end] inclusively. Add maximum
+  // timestamp to include all `begin` keys, and add minimal timestamp to include
+  // all `end` keys.
+  if (begin_without_ts != nullptr) {
+    AppendKeyWithMaxTimestamp(&begin_str, *begin_without_ts, ts_sz);
+  }
+  if (end_without_ts != nullptr) {
+    AppendKeyWithMinTimestamp(&end_str, *end_without_ts, ts_sz);
+  }
+  Slice begin(begin_str);
+  Slice end(end_str);
+
+  Slice* begin_with_ts = begin_without_ts ? &begin : nullptr;
+  Slice* end_with_ts = end_without_ts ? &end : nullptr;
+
+  return CompactRangeInternal(options, column_family, begin_with_ts,
+                              end_with_ts, "" /*trim_ts*/);
+}
+
+Status DBImpl::IncreaseFullHistoryTsLow(ColumnFamilyHandle* column_family,
+                                        std::string ts_low) {
+  ColumnFamilyData* cfd = nullptr;
+  if (column_family == nullptr) {
+    cfd = default_cf_handle_->cfd();
+  } else {
+    auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+    assert(cfh != nullptr);
+    cfd = cfh->cfd();
+  }
+  assert(cfd != nullptr && cfd->user_comparator() != nullptr);
+  if (cfd->user_comparator()->timestamp_size() == 0) {
+    return Status::InvalidArgument(
+        "Timestamp is not enabled in this column family");
+  }
+  if (cfd->user_comparator()->timestamp_size() != ts_low.size()) {
+    return Status::InvalidArgument("ts_low size mismatch");
+  }
+  return IncreaseFullHistoryTsLowImpl(cfd, ts_low);
+}
+
+Status DBImpl::IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd,
+                                            std::string ts_low) {
+  VersionEdit edit;
+  edit.SetColumnFamily(cfd->GetID());
+  edit.SetFullHistoryTsLow(ts_low);
+
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  TEST_SYNC_POINT_CALLBACK("DBImpl::IncreaseFullHistoryTsLowImpl:BeforeEdit",
+                           &edit);
+
+  InstrumentedMutexLock l(&mutex_);
+  std::string current_ts_low = cfd->GetFullHistoryTsLow();
+  const Comparator* ucmp = cfd->user_comparator();
+  assert(ucmp->timestamp_size() == ts_low.size() && !ts_low.empty());
+  if (!current_ts_low.empty() &&
+      ucmp->CompareTimestamp(ts_low, current_ts_low) < 0) {
+    return Status::InvalidArgument("Cannot decrease full_history_ts_low");
+  }
+
+  Status s = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
+                                    read_options, &edit, &mutex_,
+                                    directories_.GetDbDir());
+  if (!s.ok()) {
+    return s;
+  }
+  current_ts_low = cfd->GetFullHistoryTsLow();
+  if (!current_ts_low.empty() &&
+      ucmp->CompareTimestamp(current_ts_low, ts_low) > 0) {
+    std::stringstream oss;
+    oss << "full_history_ts_low: " << Slice(current_ts_low).ToString(true)
+        << " is set to be higher than the requested "
+           "timestamp: "
+        << Slice(ts_low).ToString(true) << std::endl;
+    return Status::TryAgain(oss.str());
+  }
+  return Status::OK();
+}
+
+Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
+                                    ColumnFamilyHandle* column_family,
+                                    const Slice* begin, const Slice* end,
+                                    const std::string& trim_ts) {
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  auto cfd = cfh->cfd();
+
+  if (options.target_path_id >= cfd->ioptions()->cf_paths.size()) {
+    return Status::InvalidArgument("Invalid target path ID");
+  }
+
+  bool flush_needed = true;
+
+  // Update full_history_ts_low if it's set
+  if (options.full_history_ts_low != nullptr &&
+      !options.full_history_ts_low->empty()) {
+    std::string ts_low = options.full_history_ts_low->ToString();
+    if (begin != nullptr || end != nullptr) {
+      return Status::InvalidArgument(
+          "Cannot specify compaction range with full_history_ts_low");
+    }
+    Status s = IncreaseFullHistoryTsLowImpl(cfd, ts_low);
+    if (!s.ok()) {
+      LogFlush(immutable_db_options_.info_log);
+      return s;
+    }
+  }
+
+  Status s;
+  if (begin != nullptr && end != nullptr) {
+    // TODO(ajkr): We could also optimize away the flush in certain cases where
+    // one/both sides of the interval are unbounded. But it requires more
+    // changes to RangesOverlapWithMemtables.
+    Range range(*begin, *end);
+    SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
+    s = cfd->RangesOverlapWithMemtables(
+        {range}, super_version, immutable_db_options_.allow_data_in_errors,
+        &flush_needed);
+    CleanupSuperVersion(super_version);
+  }
+
+  if (s.ok() && flush_needed) {
+    FlushOptions fo;
+    fo.allow_write_stall = options.allow_write_stall;
+    if (immutable_db_options_.atomic_flush) {
+      s = AtomicFlushMemTables(fo, FlushReason::kManualCompaction);
+    } else {
+      s = FlushMemTable(cfd, fo, FlushReason::kManualCompaction);
+    }
+    if (!s.ok()) {
+      LogFlush(immutable_db_options_.info_log);
+      return s;
+    }
+  }
+
+  constexpr int kInvalidLevel = -1;
+  int final_output_level = kInvalidLevel;
+  bool exclusive = options.exclusive_manual_compaction;
+  if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal &&
+      cfd->NumberLevels() > 1) {
+    // Always compact all files together.
+    final_output_level = cfd->NumberLevels() - 1;
+    // if bottom most level is reserved
+    if (immutable_db_options_.allow_ingest_behind) {
+      final_output_level--;
+    }
+    s = RunManualCompaction(cfd, ColumnFamilyData::kCompactAllLevels,
+                            final_output_level, options, begin, end, exclusive,
+                            false /* disable_trivial_move */,
+                            std::numeric_limits<uint64_t>::max(), trim_ts);
+  } else {
+    int first_overlapped_level = kInvalidLevel;
+    int max_overlapped_level = kInvalidLevel;
+    {
+      SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
+      Version* current_version = super_version->current;
+
+      // Might need to query the partitioner
+      SstPartitionerFactory* partitioner_factory =
+          current_version->cfd()->ioptions()->sst_partitioner_factory.get();
+      std::unique_ptr<SstPartitioner> partitioner;
+      if (partitioner_factory && begin != nullptr && end != nullptr) {
+        SstPartitioner::Context context;
+        context.is_full_compaction = false;
+        context.is_manual_compaction = true;
+        context.output_level = /*unknown*/ -1;
+        // Small lies about compaction range
+        context.smallest_user_key = *begin;
+        context.largest_user_key = *end;
+        partitioner = partitioner_factory->CreatePartitioner(context);
+      }
+
+      ReadOptions ro;
+      ro.total_order_seek = true;
+      ro.io_activity = Env::IOActivity::kCompaction;
+      bool overlap;
+      for (int level = 0;
+           level < current_version->storage_info()->num_non_empty_levels();
+           level++) {
+        overlap = true;
+
+        // Whether to look at specific keys within files for overlap with
+        // compaction range, other than largest and smallest keys of the file
+        // known in Version metadata.
+        bool check_overlap_within_file = false;
+        if (begin != nullptr && end != nullptr) {
+          // Typically checking overlap within files in this case
+          check_overlap_within_file = true;
+          // WART: Not known why we don't check within file in one-sided bound
+          // cases
+          if (partitioner) {
+            // Especially if the partitioner is new, the manual compaction
+            // might be used to enforce the partitioning. Checking overlap
+            // within files might miss cases where compaction is needed to
+            // partition the files, as in this example:
+            // * File has two keys "001" and "111"
+            // * Compaction range is ["011", "101")
+            // * Partition boundary at "100"
+            // In cases like this, file-level overlap with the compaction
+            // range is sufficient to force any partitioning that is needed
+            // within the compaction range.
+            //
+            // But if there's no partitioning boundary within the compaction
+            // range, we can be sure there's no need to fix partitioning
+            // within that range, thus safe to check overlap within file.
+            //
+            // Use a hypothetical trivial move query to check for partition
+            // boundary in range. (NOTE: in defiance of all conventions,
+            // `begin` and `end` here are both INCLUSIVE bounds, which makes
+            // this analogy to CanDoTrivialMove() accurate even when `end` is
+            // the first key in a partition.)
+            if (!partitioner->CanDoTrivialMove(*begin, *end)) {
+              check_overlap_within_file = false;
+            }
+          }
+        }
+        if (check_overlap_within_file) {
+          Status status = current_version->OverlapWithLevelIterator(
+              ro, file_options_, *begin, *end, level, &overlap);
+          if (!status.ok()) {
+            check_overlap_within_file = false;
+          }
+        }
+        if (!check_overlap_within_file) {
+          overlap = current_version->storage_info()->OverlapInLevel(level,
+                                                                    begin, end);
+        }
+        if (overlap) {
+          if (first_overlapped_level == kInvalidLevel) {
+            first_overlapped_level = level;
+          }
+          max_overlapped_level = level;
+        }
+      }
+      CleanupSuperVersion(super_version);
+    }
+    if (s.ok() && first_overlapped_level != kInvalidLevel) {
+      if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal ||
+          cfd->ioptions()->compaction_style == kCompactionStyleFIFO) {
+        assert(first_overlapped_level == 0);
+        s = RunManualCompaction(
+            cfd, first_overlapped_level, first_overlapped_level, options, begin,
+            end, exclusive, true /* disallow_trivial_move */,
+            std::numeric_limits<uint64_t>::max() /* max_file_num_to_ignore */,
+            trim_ts);
+        final_output_level = max_overlapped_level;
+      } else {
+        assert(cfd->ioptions()->compaction_style == kCompactionStyleLevel);
+        uint64_t next_file_number = versions_->current_next_file_number();
+        // Start compaction from `first_overlapped_level`, one level down at a
+        // time, until output level >= max_overlapped_level.
+        // When max_overlapped_level == 0, we will still compact from L0 -> L1
+        // (or LBase), and followed by a bottommost level intra-level compaction
+        // at L1 (or LBase), if applicable.
+        int level = first_overlapped_level;
+        final_output_level = level;
+        int output_level = 0, base_level = 0;
+        while (level < max_overlapped_level || level == 0) {
+          output_level = level + 1;
+          if (cfd->ioptions()->level_compaction_dynamic_level_bytes &&
+              level == 0) {
+            output_level = ColumnFamilyData::kCompactToBaseLevel;
+          }
+          // Use max value for `max_file_num_to_ignore` to always compact
+          // files down.
+          s = RunManualCompaction(
+              cfd, level, output_level, options, begin, end, exclusive,
+              !trim_ts.empty() /* disallow_trivial_move */,
+              std::numeric_limits<uint64_t>::max() /* max_file_num_to_ignore */,
+              trim_ts,
+              output_level == ColumnFamilyData::kCompactToBaseLevel
+                  ? &base_level
+                  : nullptr);
+          if (!s.ok()) {
+            break;
+          }
+          if (output_level == ColumnFamilyData::kCompactToBaseLevel) {
+            assert(base_level > 0);
+            level = base_level;
+          } else {
+            ++level;
+          }
+          final_output_level = level;
+          TEST_SYNC_POINT("DBImpl::RunManualCompaction()::1");
+          TEST_SYNC_POINT("DBImpl::RunManualCompaction()::2");
+        }
+        if (s.ok()) {
+          assert(final_output_level > 0);
+          // bottommost level intra-level compaction
+          // TODO(cbi): this preserves earlier behavior where if
+          //  max_overlapped_level = 0 and bottommost_level_compaction is
+          //  kIfHaveCompactionFilter, we only do a L0 -> LBase compaction
+          //  and do not do intra-LBase compaction even when user configures
+          //  compaction filter. We may want to still do a LBase -> LBase
+          //  compaction in case there is some file in LBase that did not go
+          //  through L0 -> LBase compaction, and hence did not go through
+          //  compaction filter.
+          if ((options.bottommost_level_compaction ==
+                   BottommostLevelCompaction::kIfHaveCompactionFilter &&
+               max_overlapped_level != 0 &&
+               (cfd->ioptions()->compaction_filter != nullptr ||
+                cfd->ioptions()->compaction_filter_factory != nullptr)) ||
+              options.bottommost_level_compaction ==
+                  BottommostLevelCompaction::kForceOptimized ||
+              options.bottommost_level_compaction ==
+                  BottommostLevelCompaction::kForce) {
+            // Use `next_file_number` as `max_file_num_to_ignore` to avoid
+            // rewriting newly compacted files when it is kForceOptimized.
+            s = RunManualCompaction(
+                cfd, final_output_level, final_output_level, options, begin,
+                end, exclusive, !trim_ts.empty() /* disallow_trivial_move */,
+                next_file_number /* max_file_num_to_ignore */, trim_ts);
+          }
+        }
+      }
+    }
+  }
+  if (!s.ok() || final_output_level == kInvalidLevel) {
+    LogFlush(immutable_db_options_.info_log);
+    return s;
+  }
+
+  if (options.change_level) {
+    TEST_SYNC_POINT("DBImpl::CompactRange:BeforeRefit:1");
+    TEST_SYNC_POINT("DBImpl::CompactRange:BeforeRefit:2");
+
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "[RefitLevel] waiting for background threads to stop");
+    // TODO(hx235): remove `Enable/DisableManualCompaction` and
+    // `Continue/PauseBackgroundWork` once we ensure registering RefitLevel()'s
+    // range is sufficient (if not, what else is needed) for avoiding range
+    // conflicts with other activities (e.g, compaction, flush) that are
+    // currently avoided by `Enable/DisableManualCompaction` and
+    // `Continue/PauseBackgroundWork`.
+    DisableManualCompaction();
+    s = PauseBackgroundWork();
+    if (s.ok()) {
+      TEST_SYNC_POINT("DBImpl::CompactRange:PreRefitLevel");
+      s = ReFitLevel(cfd, final_output_level, options.target_level);
+      TEST_SYNC_POINT("DBImpl::CompactRange:PostRefitLevel");
+      // ContinueBackgroundWork always return Status::OK().
+      Status temp_s = ContinueBackgroundWork();
+      assert(temp_s.ok());
+    }
+    EnableManualCompaction();
+    TEST_SYNC_POINT(
+        "DBImpl::CompactRange:PostRefitLevel:ManualCompactionEnabled");
+  }
+  LogFlush(immutable_db_options_.info_log);
+
+  {
+    InstrumentedMutexLock l(&mutex_);
+    // an automatic compaction that has been scheduled might have been
+    // preempted by the manual compactions. Need to schedule it back.
+    MaybeScheduleFlushOrCompaction();
+  }
+
+  return s;
+}
+
+Status DBImpl::CompactFiles(const CompactionOptions& compact_options,
+                            ColumnFamilyHandle* column_family,
+                            const std::vector<std::string>& input_file_names,
+                            const int output_level, const int output_path_id,
+                            std::vector<std::string>* const output_file_names,
+                            CompactionJobInfo* compaction_job_info) {
+  if (column_family == nullptr) {
+    return Status::InvalidArgument("ColumnFamilyHandle must be non-null.");
+  }
+
+  auto cfd =
+      static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
+  assert(cfd);
+
+  Status s;
+  JobContext job_context(next_job_id_.fetch_add(1), true);
+  LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
+                       immutable_db_options_.info_log.get());
+
+  // Perform CompactFiles
+  TEST_SYNC_POINT("TestCompactFiles::IngestExternalFile2");
+  TEST_SYNC_POINT_CALLBACK(
+      "TestCompactFiles:PausingManualCompaction:3",
+      reinterpret_cast<void*>(
+          const_cast<std::atomic<int>*>(&manual_compaction_paused_)));
+  {
+    InstrumentedMutexLock l(&mutex_);
+    auto* current = cfd->current();
+    current->Ref();
+
+    s = CompactFilesImpl(compact_options, cfd, current, input_file_names,
+                         output_file_names, output_level, output_path_id,
+                         &job_context, &log_buffer, compaction_job_info);
+
+    current->Unref();
+  }
+
+  // Find and delete obsolete files
+  {
+    InstrumentedMutexLock l(&mutex_);
+    // If !s.ok(), this means that Compaction failed. In that case, we want
+    // to delete all obsolete files we might have created and we force
+    // FindObsoleteFiles(). This is because job_context does not
+    // catch all created files if compaction failed.
+    FindObsoleteFiles(&job_context, !s.ok());
+  }  // release the mutex
+
+  // delete unnecessary files if any, this is done outside the mutex
+  if (job_context.HaveSomethingToClean() ||
+      job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) {
+    // Have to flush the info logs before bg_compaction_scheduled_--
+    // because if bg_flush_scheduled_ becomes 0 and the lock is
+    // released, the deconstructor of DB can kick in and destroy all the
+    // states of DB so info_log might not be available after that point.
+    // It also applies to access other states that DB owns.
+    log_buffer.FlushBufferToLog();
+    if (job_context.HaveSomethingToDelete()) {
+      // no mutex is locked here.  No need to Unlock() and Lock() here.
+      PurgeObsoleteFiles(job_context);
+    }
+    job_context.Clean();
+  }
+
+  return s;
+}
+
+Status DBImpl::CompactFilesImpl(
+    const CompactionOptions& compact_options, ColumnFamilyData* cfd,
+    Version* version, const std::vector<std::string>& input_file_names,
+    std::vector<std::string>* const output_file_names, const int output_level,
+    int output_path_id, JobContext* job_context, LogBuffer* log_buffer,
+    CompactionJobInfo* compaction_job_info) {
+  mutex_.AssertHeld();
+
+  if (shutting_down_.load(std::memory_order_acquire)) {
+    return Status::ShutdownInProgress();
+  }
+  if (manual_compaction_paused_.load(std::memory_order_acquire) > 0) {
+    return Status::Incomplete(Status::SubCode::kManualCompactionPaused);
+  }
+
+  std::unordered_set<uint64_t> input_set;
+  for (const auto& file_name : input_file_names) {
+    input_set.insert(TableFileNameToNumber(file_name));
+  }
+
+  ColumnFamilyMetaData cf_meta;
+  // TODO(yhchiang): can directly use version here if none of the
+  // following functions call is pluggable to external developers.
+  version->GetColumnFamilyMetaData(&cf_meta);
+
+  if (output_path_id < 0) {
+    if (cfd->ioptions()->cf_paths.size() == 1U) {
+      output_path_id = 0;
+    } else {
+      return Status::NotSupported(
+          "Automatic output path selection is not "
+          "yet supported in CompactFiles()");
+    }
+  }
+
+  Status s = cfd->compaction_picker()->SanitizeCompactionInputFiles(
+      &input_set, cf_meta, output_level);
+  TEST_SYNC_POINT("DBImpl::CompactFilesImpl::PostSanitizeCompactionInputFiles");
+  if (!s.ok()) {
+    return s;
+  }
+
+  std::vector<CompactionInputFiles> input_files;
+  s = cfd->compaction_picker()->GetCompactionInputsFromFileNumbers(
+      &input_files, &input_set, version->storage_info(), compact_options);
+  if (!s.ok()) {
+    return s;
+  }
+
+  for (const auto& inputs : input_files) {
+    if (cfd->compaction_picker()->AreFilesInCompaction(inputs.files)) {
+      return Status::Aborted(
+          "Some of the necessary compaction input "
+          "files are already being compacted");
+    }
+  }
+  bool sfm_reserved_compact_space = false;
+  // First check if we have enough room to do the compaction
+  bool enough_room = EnoughRoomForCompaction(
+      cfd, input_files, &sfm_reserved_compact_space, log_buffer);
+
+  if (!enough_room) {
+    // m's vars will get set properly at the end of this function,
+    // as long as status == CompactionTooLarge
+    return Status::CompactionTooLarge();
+  }
+
+  // At this point, CompactFiles will be run.
+  bg_compaction_scheduled_++;
+
+  std::unique_ptr<Compaction> c;
+  assert(cfd->compaction_picker());
+  c.reset(cfd->compaction_picker()->CompactFiles(
+      compact_options, input_files, output_level, version->storage_info(),
+      *cfd->GetLatestMutableCFOptions(), mutable_db_options_, output_path_id));
+  // we already sanitized the set of input files and checked for conflicts
+  // without releasing the lock, so we're guaranteed a compaction can be formed.
+  assert(c != nullptr);
+
+  c->SetInputVersion(version);
+  // deletion compaction currently not allowed in CompactFiles.
+  assert(!c->deletion_compaction());
+
+  std::vector<SequenceNumber> snapshot_seqs;
+  SequenceNumber earliest_write_conflict_snapshot;
+  SnapshotChecker* snapshot_checker;
+  GetSnapshotContext(job_context, &snapshot_seqs,
+                     &earliest_write_conflict_snapshot, &snapshot_checker);
+
+  std::unique_ptr<std::list<uint64_t>::iterator> pending_outputs_inserted_elem(
+      new std::list<uint64_t>::iterator(
+          CaptureCurrentFileNumberInPendingOutputs()));
+
+  assert(is_snapshot_supported_ || snapshots_.empty());
+  CompactionJobStats compaction_job_stats;
+  CompactionJob compaction_job(
+      job_context->job_id, c.get(), immutable_db_options_, mutable_db_options_,
+      file_options_for_compaction_, versions_.get(), &shutting_down_,
+      log_buffer, directories_.GetDbDir(),
+      GetDataDir(c->column_family_data(), c->output_path_id()),
+      GetDataDir(c->column_family_data(), 0), stats_, &mutex_, &error_handler_,
+      snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
+      job_context, table_cache_, &event_logger_,
+      c->mutable_cf_options()->paranoid_file_checks,
+      c->mutable_cf_options()->report_bg_io_stats, dbname_,
+      &compaction_job_stats, Env::Priority::USER, io_tracer_,
+      kManualCompactionCanceledFalse_, db_id_, db_session_id_,
+      c->column_family_data()->GetFullHistoryTsLow(), c->trim_ts(),
+      &blob_callback_, &bg_compaction_scheduled_,
+      &bg_bottom_compaction_scheduled_);
+
+  // Creating a compaction influences the compaction score because the score
+  // takes running compactions into account (by skipping files that are already
+  // being compacted). Since we just changed compaction score, we recalculate it
+  // here.
+  version->storage_info()->ComputeCompactionScore(*cfd->ioptions(),
+                                                  *c->mutable_cf_options());
+
+  compaction_job.Prepare();
+
+  mutex_.Unlock();
+  TEST_SYNC_POINT("CompactFilesImpl:0");
+  TEST_SYNC_POINT("CompactFilesImpl:1");
+  // Ignore the status here, as it will be checked in the Install down below...
+  compaction_job.Run().PermitUncheckedError();
+  TEST_SYNC_POINT("CompactFilesImpl:2");
+  TEST_SYNC_POINT("CompactFilesImpl:3");
+  mutex_.Lock();
+
+  Status status = compaction_job.Install(*c->mutable_cf_options());
+  if (status.ok()) {
+    assert(compaction_job.io_status().ok());
+    InstallSuperVersionAndScheduleWork(c->column_family_data(),
+                                       &job_context->superversion_contexts[0],
+                                       *c->mutable_cf_options());
+  }
+  // status above captures any error during compaction_job.Install, so its ok
+  // not check compaction_job.io_status() explicitly if we're not calling
+  // SetBGError
+  compaction_job.io_status().PermitUncheckedError();
+  c->ReleaseCompactionFiles(s);
+  // Need to make sure SstFileManager does its bookkeeping
+  auto sfm = static_cast<SstFileManagerImpl*>(
+      immutable_db_options_.sst_file_manager.get());
+  if (sfm && sfm_reserved_compact_space) {
+    sfm->OnCompactionCompletion(c.get());
+  }
+
+  ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
+
+  if (compaction_job_info != nullptr) {
+    BuildCompactionJobInfo(cfd, c.get(), s, compaction_job_stats,
+                           job_context->job_id, version, compaction_job_info);
+  }
+
+  if (status.ok()) {
+    // Done
+  } else if (status.IsColumnFamilyDropped() || status.IsShutdownInProgress()) {
+    // Ignore compaction errors found during shutting down
+  } else if (status.IsManualCompactionPaused()) {
+    // Don't report stopping manual compaction as error
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "[%s] [JOB %d] Stopping manual compaction",
+                   c->column_family_data()->GetName().c_str(),
+                   job_context->job_id);
+  } else {
+    ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                   "[%s] [JOB %d] Compaction error: %s",
+                   c->column_family_data()->GetName().c_str(),
+                   job_context->job_id, status.ToString().c_str());
+    IOStatus io_s = compaction_job.io_status();
+    if (!io_s.ok()) {
+      error_handler_.SetBGError(io_s, BackgroundErrorReason::kCompaction);
+    } else {
+      error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction);
+    }
+  }
+
+  if (output_file_names != nullptr) {
+    for (const auto& newf : c->edit()->GetNewFiles()) {
+      output_file_names->push_back(TableFileName(
+          c->immutable_options()->cf_paths, newf.second.fd.GetNumber(),
+          newf.second.fd.GetPathId()));
+    }
+
+    for (const auto& blob_file : c->edit()->GetBlobFileAdditions()) {
+      output_file_names->push_back(
+          BlobFileName(c->immutable_options()->cf_paths.front().path,
+                       blob_file.GetBlobFileNumber()));
+    }
+  }
+
+  c.reset();
+
+  bg_compaction_scheduled_--;
+  if (bg_compaction_scheduled_ == 0) {
+    bg_cv_.SignalAll();
+  }
+  MaybeScheduleFlushOrCompaction();
+  TEST_SYNC_POINT("CompactFilesImpl:End");
+
+  return status;
+}
+
+Status DBImpl::PauseBackgroundWork() {
+  InstrumentedMutexLock guard_lock(&mutex_);
+  bg_compaction_paused_++;
+  while (bg_bottom_compaction_scheduled_ > 0 || bg_compaction_scheduled_ > 0 ||
+         bg_flush_scheduled_ > 0) {
+    bg_cv_.Wait();
+  }
+  bg_work_paused_++;
+  return Status::OK();
+}
+
+Status DBImpl::ContinueBackgroundWork() {
+  InstrumentedMutexLock guard_lock(&mutex_);
+  if (bg_work_paused_ == 0) {
+    return Status::InvalidArgument();
+  }
+  assert(bg_work_paused_ > 0);
+  assert(bg_compaction_paused_ > 0);
+  bg_compaction_paused_--;
+  bg_work_paused_--;
+  // It's sufficient to check just bg_work_paused_ here since
+  // bg_work_paused_ is always no greater than bg_compaction_paused_
+  if (bg_work_paused_ == 0) {
+    MaybeScheduleFlushOrCompaction();
+  }
+  return Status::OK();
+}
+
+void DBImpl::NotifyOnCompactionBegin(ColumnFamilyData* cfd, Compaction* c,
+                                     const Status& st,
+                                     const CompactionJobStats& job_stats,
+                                     int job_id) {
+  if (immutable_db_options_.listeners.empty()) {
+    return;
+  }
+  mutex_.AssertHeld();
+  if (shutting_down_.load(std::memory_order_acquire)) {
+    return;
+  }
+  if (c->is_manual_compaction() &&
+      manual_compaction_paused_.load(std::memory_order_acquire) > 0) {
+    return;
+  }
+
+  c->SetNotifyOnCompactionCompleted();
+  Version* current = cfd->current();
+  current->Ref();
+  // release lock while notifying events
+  mutex_.Unlock();
+  TEST_SYNC_POINT("DBImpl::NotifyOnCompactionBegin::UnlockMutex");
+  {
+    CompactionJobInfo info{};
+    BuildCompactionJobInfo(cfd, c, st, job_stats, job_id, current, &info);
+    for (auto listener : immutable_db_options_.listeners) {
+      listener->OnCompactionBegin(this, info);
+    }
+    info.status.PermitUncheckedError();
+  }
+  mutex_.Lock();
+  current->Unref();
+}
+
+void DBImpl::NotifyOnCompactionCompleted(
+    ColumnFamilyData* cfd, Compaction* c, const Status& st,
+    const CompactionJobStats& compaction_job_stats, const int job_id) {
+  if (immutable_db_options_.listeners.size() == 0U) {
+    return;
+  }
+  mutex_.AssertHeld();
+  if (shutting_down_.load(std::memory_order_acquire)) {
+    return;
+  }
+
+  if (c->ShouldNotifyOnCompactionCompleted() == false) {
+    return;
+  }
+
+  Version* current = cfd->current();
+  current->Ref();
+  // release lock while notifying events
+  mutex_.Unlock();
+  TEST_SYNC_POINT("DBImpl::NotifyOnCompactionCompleted::UnlockMutex");
+  {
+    CompactionJobInfo info{};
+    BuildCompactionJobInfo(cfd, c, st, compaction_job_stats, job_id, current,
+                           &info);
+    for (auto listener : immutable_db_options_.listeners) {
+      listener->OnCompactionCompleted(this, info);
+    }
+  }
+  mutex_.Lock();
+  current->Unref();
+  // no need to signal bg_cv_ as it will be signaled at the end of the
+  // flush process.
+}
+
+// REQUIREMENT: block all background work by calling PauseBackgroundWork()
+// before calling this function
+Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
+  assert(level < cfd->NumberLevels());
+  if (target_level >= cfd->NumberLevels()) {
+    return Status::InvalidArgument("Target level exceeds number of levels");
+  }
+
+  const ReadOptions read_options(Env::IOActivity::kCompaction);
+
+  SuperVersionContext sv_context(/* create_superversion */ true);
+
+  InstrumentedMutexLock guard_lock(&mutex_);
+
+  auto* vstorage = cfd->current()->storage_info();
+  if (vstorage->LevelFiles(level).empty()) {
+    return Status::OK();
+  }
+  // only allow one thread refitting
+  if (refitting_level_) {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "[ReFitLevel] another thread is refitting");
+    return Status::NotSupported("another thread is refitting");
+  }
+  refitting_level_ = true;
+
+  const MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions();
+  // move to a smaller level
+  int to_level = target_level;
+  if (target_level < 0) {
+    to_level = FindMinimumEmptyLevelFitting(cfd, mutable_cf_options, level);
+  }
+
+  if (to_level != level) {
+    std::vector<CompactionInputFiles> input(1);
+    input[0].level = level;
+    for (auto& f : vstorage->LevelFiles(level)) {
+      input[0].files.push_back(f);
+    }
+    InternalKey refit_level_smallest;
+    InternalKey refit_level_largest;
+    cfd->compaction_picker()->GetRange(input[0], &refit_level_smallest,
+                                       &refit_level_largest);
+    if (to_level > level) {
+      if (level == 0) {
+        refitting_level_ = false;
+        return Status::NotSupported(
+            "Cannot change from level 0 to other levels.");
+      }
+      // Check levels are empty for a trivial move
+      for (int l = level + 1; l <= to_level; l++) {
+        if (vstorage->NumLevelFiles(l) > 0) {
+          refitting_level_ = false;
+          return Status::NotSupported(
+              "Levels between source and target are not empty for a move.");
+        }
+        if (cfd->RangeOverlapWithCompaction(refit_level_smallest.user_key(),
+                                            refit_level_largest.user_key(),
+                                            l)) {
+          refitting_level_ = false;
+          return Status::NotSupported(
+              "Levels between source and target "
+              "will have some ongoing compaction's output.");
+        }
+      }
+    } else {
+      // to_level < level
+      // Check levels are empty for a trivial move
+      for (int l = to_level; l < level; l++) {
+        if (vstorage->NumLevelFiles(l) > 0) {
+          refitting_level_ = false;
+          return Status::NotSupported(
+              "Levels between source and target are not empty for a move.");
+        }
+        if (cfd->RangeOverlapWithCompaction(refit_level_smallest.user_key(),
+                                            refit_level_largest.user_key(),
+                                            l)) {
+          refitting_level_ = false;
+          return Status::NotSupported(
+              "Levels between source and target "
+              "will have some ongoing compaction's output.");
+        }
+      }
+    }
+    ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
+                    "[%s] Before refitting:\n%s", cfd->GetName().c_str(),
+                    cfd->current()->DebugString().data());
+
+    std::unique_ptr<Compaction> c(new Compaction(
+        vstorage, *cfd->ioptions(), mutable_cf_options, mutable_db_options_,
+        {input}, to_level,
+        MaxFileSizeForLevel(
+            mutable_cf_options, to_level,
+            cfd->ioptions()
+                ->compaction_style) /* output file size limit, not applicable */
+        ,
+        LLONG_MAX /* max compaction bytes, not applicable */,
+        0 /* output path ID, not applicable */, mutable_cf_options.compression,
+        mutable_cf_options.compression_opts, Temperature::kUnknown,
+        0 /* max_subcompactions, not applicable */,
+        {} /* grandparents, not applicable */, false /* is manual */,
+        "" /* trim_ts */, -1 /* score, not applicable */,
+        false /* is deletion compaction, not applicable */,
+        false /* l0_files_might_overlap, not applicable */,
+        CompactionReason::kRefitLevel));
+    cfd->compaction_picker()->RegisterCompaction(c.get());
+    TEST_SYNC_POINT("DBImpl::ReFitLevel:PostRegisterCompaction");
+    VersionEdit edit;
+    edit.SetColumnFamily(cfd->GetID());
+
+    for (const auto& f : vstorage->LevelFiles(level)) {
+      edit.DeleteFile(level, f->fd.GetNumber());
+      edit.AddFile(
+          to_level, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(),
+          f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno,
+          f->marked_for_compaction, f->temperature, f->oldest_blob_file_number,
+          f->oldest_ancester_time, f->file_creation_time, f->epoch_number,
+          f->file_checksum, f->file_checksum_func_name, f->unique_id,
+          f->compensated_range_deletion_size, f->tail_size);
+    }
+    ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
+                    "[%s] Apply version edit:\n%s", cfd->GetName().c_str(),
+                    edit.DebugString().data());
+
+    Status status =
+        versions_->LogAndApply(cfd, mutable_cf_options, read_options, &edit,
+                               &mutex_, directories_.GetDbDir());
+
+    cfd->compaction_picker()->UnregisterCompaction(c.get());
+    c.reset();
+
+    InstallSuperVersionAndScheduleWork(cfd, &sv_context, mutable_cf_options);
+
+    ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] LogAndApply: %s\n",
+                    cfd->GetName().c_str(), status.ToString().data());
+
+    if (status.ok()) {
+      ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
+                      "[%s] After refitting:\n%s", cfd->GetName().c_str(),
+                      cfd->current()->DebugString().data());
+    }
+    sv_context.Clean();
+    refitting_level_ = false;
+
+    return status;
+  }
+
+  refitting_level_ = false;
+  return Status::OK();
+}
+
+int DBImpl::NumberLevels(ColumnFamilyHandle* column_family) {
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  return cfh->cfd()->NumberLevels();
+}
+
+int DBImpl::MaxMemCompactionLevel(ColumnFamilyHandle* /*column_family*/) {
+  return 0;
+}
+
+int DBImpl::Level0StopWriteTrigger(ColumnFamilyHandle* column_family) {
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  InstrumentedMutexLock l(&mutex_);
+  return cfh->cfd()
+      ->GetSuperVersion()
+      ->mutable_cf_options.level0_stop_writes_trigger;
+}
+
+Status DBImpl::Flush(const FlushOptions& flush_options,
+                     ColumnFamilyHandle* column_family) {
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  ROCKS_LOG_INFO(immutable_db_options_.info_log, "[%s] Manual flush start.",
+                 cfh->GetName().c_str());
+  Status s;
+  if (immutable_db_options_.atomic_flush) {
+    s = AtomicFlushMemTables(flush_options, FlushReason::kManualFlush,
+                             {cfh->cfd()});
+  } else {
+    s = FlushMemTable(cfh->cfd(), flush_options, FlushReason::kManualFlush);
+  }
+
+  ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                 "[%s] Manual flush finished, status: %s\n",
+                 cfh->GetName().c_str(), s.ToString().c_str());
+  return s;
+}
+
+Status DBImpl::Flush(const FlushOptions& flush_options,
+                     const std::vector<ColumnFamilyHandle*>& column_families) {
+  Status s;
+  if (!immutable_db_options_.atomic_flush) {
+    for (auto cfh : column_families) {
+      s = Flush(flush_options, cfh);
+      if (!s.ok()) {
+        break;
+      }
+    }
+  } else {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "Manual atomic flush start.\n"
+                   "=====Column families:=====");
+    for (auto cfh : column_families) {
+      auto cfhi = static_cast<ColumnFamilyHandleImpl*>(cfh);
+      ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s",
+                     cfhi->GetName().c_str());
+    }
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "=====End of column families list=====");
+    autovector<ColumnFamilyData*> cfds;
+    std::for_each(column_families.begin(), column_families.end(),
+                  [&cfds](ColumnFamilyHandle* elem) {
+                    auto cfh = static_cast<ColumnFamilyHandleImpl*>(elem);
+                    cfds.emplace_back(cfh->cfd());
+                  });
+    s = AtomicFlushMemTables(flush_options, FlushReason::kManualFlush, cfds);
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "Manual atomic flush finished, status: %s\n"
+                   "=====Column families:=====",
+                   s.ToString().c_str());
+    for (auto cfh : column_families) {
+      auto cfhi = static_cast<ColumnFamilyHandleImpl*>(cfh);
+      ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s",
+                     cfhi->GetName().c_str());
+    }
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "=====End of column families list=====");
+  }
+  return s;
+}
+
+Status DBImpl::RunManualCompaction(
+    ColumnFamilyData* cfd, int input_level, int output_level,
+    const CompactRangeOptions& compact_range_options, const Slice* begin,
+    const Slice* end, bool exclusive, bool disallow_trivial_move,
+    uint64_t max_file_num_to_ignore, const std::string& trim_ts,
+    int* final_output_level) {
+  assert(input_level == ColumnFamilyData::kCompactAllLevels ||
+         input_level >= 0);
+
+  InternalKey begin_storage, end_storage;
+  CompactionArg* ca = nullptr;
+
+  bool scheduled = false;
+  bool unscheduled = false;
+  Env::Priority thread_pool_priority = Env::Priority::TOTAL;
+  bool manual_conflict = false;
+
+  ManualCompactionState manual(
+      cfd, input_level, output_level, compact_range_options.target_path_id,
+      exclusive, disallow_trivial_move, compact_range_options.canceled);
+  // For universal compaction, we enforce every manual compaction to compact
+  // all files.
+  if (begin == nullptr ||
+      cfd->ioptions()->compaction_style == kCompactionStyleUniversal ||
+      cfd->ioptions()->compaction_style == kCompactionStyleFIFO) {
+    manual.begin = nullptr;
+  } else {
+    begin_storage.SetMinPossibleForUserKey(*begin);
+    manual.begin = &begin_storage;
+  }
+  if (end == nullptr ||
+      cfd->ioptions()->compaction_style == kCompactionStyleUniversal ||
+      cfd->ioptions()->compaction_style == kCompactionStyleFIFO) {
+    manual.end = nullptr;
+  } else {
+    end_storage.SetMaxPossibleForUserKey(*end);
+    manual.end = &end_storage;
+  }
+
+  TEST_SYNC_POINT("DBImpl::RunManualCompaction:0");
+  TEST_SYNC_POINT("DBImpl::RunManualCompaction:1");
+  InstrumentedMutexLock l(&mutex_);
+
+  if (manual_compaction_paused_ > 0) {
+    // Does not make sense to `AddManualCompaction()` in this scenario since
+    // `DisableManualCompaction()` just waited for the manual compaction queue
+    // to drain. So return immediately.
+    TEST_SYNC_POINT("DBImpl::RunManualCompaction:PausedAtStart");
+    manual.status =
+        Status::Incomplete(Status::SubCode::kManualCompactionPaused);
+    manual.done = true;
+    return manual.status;
+  }
+
+  // When a manual compaction arrives, temporarily disable scheduling of
+  // non-manual compactions and wait until the number of scheduled compaction
+  // jobs drops to zero. This used to be needed to ensure that this manual
+  // compaction can compact any range of keys/files. Now it is optional
+  // (see `CompactRangeOptions::exclusive_manual_compaction`). The use case for
+  // `exclusive_manual_compaction=true` is unclear beyond not trusting the code.
+  //
+  // HasPendingManualCompaction() is true when at least one thread is inside
+  // RunManualCompaction(), i.e. during that time no other compaction will
+  // get scheduled (see MaybeScheduleFlushOrCompaction).
+  //
+  // Note that the following loop doesn't stop more that one thread calling
+  // RunManualCompaction() from getting to the second while loop below.
+  // However, only one of them will actually schedule compaction, while
+  // others will wait on a condition variable until it completes.
+
+  AddManualCompaction(&manual);
+  TEST_SYNC_POINT_CALLBACK("DBImpl::RunManualCompaction:NotScheduled", &mutex_);
+  if (exclusive) {
+    // Limitation: there's no way to wake up the below loop when user sets
+    // `*manual.canceled`. So `CompactRangeOptions::exclusive_manual_compaction`
+    // and `CompactRangeOptions::canceled` might not work well together.
+    while (bg_bottom_compaction_scheduled_ > 0 ||
+           bg_compaction_scheduled_ > 0) {
+      if (manual_compaction_paused_ > 0 || manual.canceled == true) {
+        // Pretend the error came from compaction so the below cleanup/error
+        // handling code can process it.
+        manual.done = true;
+        manual.status =
+            Status::Incomplete(Status::SubCode::kManualCompactionPaused);
+        break;
+      }
+      TEST_SYNC_POINT("DBImpl::RunManualCompaction:WaitScheduled");
+      ROCKS_LOG_INFO(
+          immutable_db_options_.info_log,
+          "[%s] Manual compaction waiting for all other scheduled background "
+          "compactions to finish",
+          cfd->GetName().c_str());
+      bg_cv_.Wait();
+    }
+  }
+
+  LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
+                       immutable_db_options_.info_log.get());
+
+  ROCKS_LOG_BUFFER(&log_buffer, "[%s] Manual compaction starting",
+                   cfd->GetName().c_str());
+
+  // We don't check bg_error_ here, because if we get the error in compaction,
+  // the compaction will set manual.status to bg_error_ and set manual.done to
+  // true.
+  while (!manual.done) {
+    assert(HasPendingManualCompaction());
+    manual_conflict = false;
+    Compaction* compaction = nullptr;
+    if (ShouldntRunManualCompaction(&manual) || (manual.in_progress == true) ||
+        scheduled ||
+        (((manual.manual_end = &manual.tmp_storage1) != nullptr) &&
+         ((compaction = manual.cfd->CompactRange(
+               *manual.cfd->GetLatestMutableCFOptions(), mutable_db_options_,
+               manual.input_level, manual.output_level, compact_range_options,
+               manual.begin, manual.end, &manual.manual_end, &manual_conflict,
+               max_file_num_to_ignore, trim_ts)) == nullptr &&
+          manual_conflict))) {
+      if (!scheduled) {
+        // There is a conflicting compaction
+        if (manual_compaction_paused_ > 0 || manual.canceled == true) {
+          // Stop waiting since it was canceled. Pretend the error came from
+          // compaction so the below cleanup/error handling code can process it.
+          manual.done = true;
+          manual.status =
+              Status::Incomplete(Status::SubCode::kManualCompactionPaused);
+        }
+      }
+      if (!manual.done) {
+        bg_cv_.Wait();
+      }
+      if (manual_compaction_paused_ > 0 && scheduled && !unscheduled) {
+        assert(thread_pool_priority != Env::Priority::TOTAL);
+        // unschedule all manual compactions
+        auto unscheduled_task_num = env_->UnSchedule(
+            GetTaskTag(TaskType::kManualCompaction), thread_pool_priority);
+        if (unscheduled_task_num > 0) {
+          ROCKS_LOG_INFO(
+              immutable_db_options_.info_log,
+              "[%s] Unscheduled %d number of manual compactions from the "
+              "thread-pool",
+              cfd->GetName().c_str(), unscheduled_task_num);
+          // it may unschedule other manual compactions, notify others.
+          bg_cv_.SignalAll();
+        }
+        unscheduled = true;
+        TEST_SYNC_POINT("DBImpl::RunManualCompaction:Unscheduled");
+      }
+      if (scheduled && manual.incomplete == true) {
+        assert(!manual.in_progress);
+        scheduled = false;
+        manual.incomplete = false;
+      }
+    } else if (!scheduled) {
+      if (compaction == nullptr) {
+        manual.done = true;
+        if (final_output_level) {
+          // No compaction needed or there is a conflicting compaction.
+          // Still set `final_output_level` to the level where we would
+          // have compacted to.
+          *final_output_level = output_level;
+          if (output_level == ColumnFamilyData::kCompactToBaseLevel) {
+            *final_output_level = cfd->current()->storage_info()->base_level();
+          }
+        }
+        bg_cv_.SignalAll();
+        continue;
+      }
+      ca = new CompactionArg;
+      ca->db = this;
+      ca->prepicked_compaction = new PrepickedCompaction;
+      ca->prepicked_compaction->manual_compaction_state = &manual;
+      ca->prepicked_compaction->compaction = compaction;
+      if (!RequestCompactionToken(
+              cfd, true, &ca->prepicked_compaction->task_token, &log_buffer)) {
+        // Don't throttle manual compaction, only count outstanding tasks.
+        assert(false);
+      }
+      manual.incomplete = false;
+      if (compaction->bottommost_level() &&
+          env_->GetBackgroundThreads(Env::Priority::BOTTOM) > 0) {
+        bg_bottom_compaction_scheduled_++;
+        ca->compaction_pri_ = Env::Priority::BOTTOM;
+        env_->Schedule(&DBImpl::BGWorkBottomCompaction, ca,
+                       Env::Priority::BOTTOM,
+                       GetTaskTag(TaskType::kManualCompaction),
+                       &DBImpl::UnscheduleCompactionCallback);
+        thread_pool_priority = Env::Priority::BOTTOM;
+      } else {
+        bg_compaction_scheduled_++;
+        ca->compaction_pri_ = Env::Priority::LOW;
+        env_->Schedule(&DBImpl::BGWorkCompaction, ca, Env::Priority::LOW,
+                       GetTaskTag(TaskType::kManualCompaction),
+                       &DBImpl::UnscheduleCompactionCallback);
+        thread_pool_priority = Env::Priority::LOW;
+      }
+      scheduled = true;
+      TEST_SYNC_POINT("DBImpl::RunManualCompaction:Scheduled");
+      if (final_output_level) {
+        *final_output_level = compaction->output_level();
+      }
+    }
+  }
+
+  log_buffer.FlushBufferToLog();
+  assert(!manual.in_progress);
+  assert(HasPendingManualCompaction());
+  RemoveManualCompaction(&manual);
+  // if the manual job is unscheduled, try schedule other jobs in case there's
+  // any unscheduled compaction job which was blocked by exclusive manual
+  // compaction.
+  if (manual.status.IsIncomplete() &&
+      manual.status.subcode() == Status::SubCode::kManualCompactionPaused) {
+    MaybeScheduleFlushOrCompaction();
+  }
+  bg_cv_.SignalAll();
+  return manual.status;
+}
+
+void DBImpl::GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds,
+                                  FlushReason flush_reason, FlushRequest* req) {
+  assert(req != nullptr);
+  req->flush_reason = flush_reason;
+  req->cfd_to_max_mem_id_to_persist.reserve(cfds.size());
+  for (const auto cfd : cfds) {
+    if (nullptr == cfd) {
+      // cfd may be null, see DBImpl::ScheduleFlushes
+      continue;
+    }
+    uint64_t max_memtable_id = cfd->imm()->GetLatestMemTableID();
+    req->cfd_to_max_mem_id_to_persist.emplace(cfd, max_memtable_id);
+  }
+}
+
+Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
+                             const FlushOptions& flush_options,
+                             FlushReason flush_reason,
+                             bool entered_write_thread) {
+  // This method should not be called if atomic_flush is true.
+  assert(!immutable_db_options_.atomic_flush);
+  if (!flush_options.wait && write_controller_.IsStopped()) {
+    std::ostringstream oss;
+    oss << "Writes have been stopped, thus unable to perform manual flush. "
+           "Please try again later after writes are resumed";
+    return Status::TryAgain(oss.str());
+  }
+  Status s;
+  if (!flush_options.allow_write_stall) {
+    bool flush_needed = true;
+    s = WaitUntilFlushWouldNotStallWrites(cfd, &flush_needed);
+    TEST_SYNC_POINT("DBImpl::FlushMemTable:StallWaitDone");
+    if (!s.ok() || !flush_needed) {
+      return s;
+    }
+  }
+
+  const bool needs_to_join_write_thread = !entered_write_thread;
+  autovector<FlushRequest> flush_reqs;
+  autovector<uint64_t> memtable_ids_to_wait;
+  {
+    WriteContext context;
+    InstrumentedMutexLock guard_lock(&mutex_);
+
+    WriteThread::Writer w;
+    WriteThread::Writer nonmem_w;
+    if (needs_to_join_write_thread) {
+      write_thread_.EnterUnbatched(&w, &mutex_);
+      if (two_write_queues_) {
+        nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
+      }
+    }
+    WaitForPendingWrites();
+
+    if (flush_reason != FlushReason::kErrorRecoveryRetryFlush &&
+        (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load())) {
+      // Note that, when flush reason is kErrorRecoveryRetryFlush, during the
+      // auto retry resume, we want to avoid creating new small memtables.
+      // Therefore, SwitchMemtable will not be called. Also, since ResumeImpl
+      // will iterate through all the CFs and call FlushMemtable during auto
+      // retry resume, it is possible that in some CFs,
+      // cfd->imm()->NumNotFlushed() = 0. In this case, so no flush request will
+      // be created and scheduled, status::OK() will be returned.
+      s = SwitchMemtable(cfd, &context);
+    }
+    const uint64_t flush_memtable_id = std::numeric_limits<uint64_t>::max();
+    if (s.ok()) {
+      if (cfd->imm()->NumNotFlushed() != 0 || !cfd->mem()->IsEmpty() ||
+          !cached_recoverable_state_empty_.load()) {
+        FlushRequest req{flush_reason, {{cfd, flush_memtable_id}}};
+        flush_reqs.emplace_back(std::move(req));
+        memtable_ids_to_wait.emplace_back(cfd->imm()->GetLatestMemTableID());
+      }
+      if (immutable_db_options_.persist_stats_to_disk &&
+          flush_reason != FlushReason::kErrorRecoveryRetryFlush) {
+        ColumnFamilyData* cfd_stats =
+            versions_->GetColumnFamilySet()->GetColumnFamily(
+                kPersistentStatsColumnFamilyName);
+        if (cfd_stats != nullptr && cfd_stats != cfd &&
+            !cfd_stats->mem()->IsEmpty()) {
+          // only force flush stats CF when it will be the only CF lagging
+          // behind after the current flush
+          bool stats_cf_flush_needed = true;
+          for (auto* loop_cfd : *versions_->GetColumnFamilySet()) {
+            if (loop_cfd == cfd_stats || loop_cfd == cfd) {
+              continue;
+            }
+            if (loop_cfd->GetLogNumber() <= cfd_stats->GetLogNumber()) {
+              stats_cf_flush_needed = false;
+            }
+          }
+          if (stats_cf_flush_needed) {
+            ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                           "Force flushing stats CF with manual flush of %s "
+                           "to avoid holding old logs",
+                           cfd->GetName().c_str());
+            s = SwitchMemtable(cfd_stats, &context);
+            FlushRequest req{flush_reason, {{cfd_stats, flush_memtable_id}}};
+            flush_reqs.emplace_back(std::move(req));
+            memtable_ids_to_wait.emplace_back(
+                cfd_stats->imm()->GetLatestMemTableID());
+          }
+        }
+      }
+    }
+
+    if (s.ok() && !flush_reqs.empty()) {
+      for (const auto& req : flush_reqs) {
+        assert(req.cfd_to_max_mem_id_to_persist.size() == 1);
+        ColumnFamilyData* loop_cfd =
+            req.cfd_to_max_mem_id_to_persist.begin()->first;
+        loop_cfd->imm()->FlushRequested();
+      }
+      // If the caller wants to wait for this flush to complete, it indicates
+      // that the caller expects the ColumnFamilyData not to be free'ed by
+      // other threads which may drop the column family concurrently.
+      // Therefore, we increase the cfd's ref count.
+      if (flush_options.wait) {
+        for (const auto& req : flush_reqs) {
+          assert(req.cfd_to_max_mem_id_to_persist.size() == 1);
+          ColumnFamilyData* loop_cfd =
+              req.cfd_to_max_mem_id_to_persist.begin()->first;
+          loop_cfd->Ref();
+        }
+      }
+      for (const auto& req : flush_reqs) {
+        SchedulePendingFlush(req);
+      }
+      MaybeScheduleFlushOrCompaction();
+    }
+
+    if (needs_to_join_write_thread) {
+      write_thread_.ExitUnbatched(&w);
+      if (two_write_queues_) {
+        nonmem_write_thread_.ExitUnbatched(&nonmem_w);
+      }
+    }
+  }
+  TEST_SYNC_POINT("DBImpl::FlushMemTable:AfterScheduleFlush");
+  TEST_SYNC_POINT("DBImpl::FlushMemTable:BeforeWaitForBgFlush");
+  if (s.ok() && flush_options.wait) {
+    autovector<ColumnFamilyData*> cfds;
+    autovector<const uint64_t*> flush_memtable_ids;
+    assert(flush_reqs.size() == memtable_ids_to_wait.size());
+    for (size_t i = 0; i < flush_reqs.size(); ++i) {
+      assert(flush_reqs[i].cfd_to_max_mem_id_to_persist.size() == 1);
+      cfds.push_back(flush_reqs[i].cfd_to_max_mem_id_to_persist.begin()->first);
+      flush_memtable_ids.push_back(&(memtable_ids_to_wait[i]));
+    }
+    s = WaitForFlushMemTables(
+        cfds, flush_memtable_ids,
+        (flush_reason == FlushReason::kErrorRecovery ||
+         flush_reason == FlushReason::kErrorRecoveryRetryFlush));
+    InstrumentedMutexLock lock_guard(&mutex_);
+    for (auto* tmp_cfd : cfds) {
+      tmp_cfd->UnrefAndTryDelete();
+    }
+  }
+  TEST_SYNC_POINT("DBImpl::FlushMemTable:FlushMemTableFinished");
+  return s;
+}
+
+Status DBImpl::AtomicFlushMemTables(
+    const FlushOptions& flush_options, FlushReason flush_reason,
+    const autovector<ColumnFamilyData*>& provided_candidate_cfds,
+    bool entered_write_thread) {
+  assert(immutable_db_options_.atomic_flush);
+  if (!flush_options.wait && write_controller_.IsStopped()) {
+    std::ostringstream oss;
+    oss << "Writes have been stopped, thus unable to perform manual flush. "
+           "Please try again later after writes are resumed";
+    return Status::TryAgain(oss.str());
+  }
+  Status s;
+  autovector<ColumnFamilyData*> candidate_cfds;
+  if (provided_candidate_cfds.empty()) {
+    // Generate candidate cfds if not provided
+    {
+      InstrumentedMutexLock l(&mutex_);
+      for (ColumnFamilyData* cfd : *versions_->GetColumnFamilySet()) {
+        if (!cfd->IsDropped() && cfd->initialized()) {
+          cfd->Ref();
+          candidate_cfds.push_back(cfd);
+        }
+      }
+    }
+  } else {
+    candidate_cfds = provided_candidate_cfds;
+  }
+
+  if (!flush_options.allow_write_stall) {
+    int num_cfs_to_flush = 0;
+    for (auto cfd : candidate_cfds) {
+      bool flush_needed = true;
+      s = WaitUntilFlushWouldNotStallWrites(cfd, &flush_needed);
+      if (!s.ok()) {
+        // Unref the newly generated candidate cfds (when not provided) in
+        // `candidate_cfds`
+        if (provided_candidate_cfds.empty()) {
+          for (auto candidate_cfd : candidate_cfds) {
+            candidate_cfd->UnrefAndTryDelete();
+          }
+        }
+        return s;
+      } else if (flush_needed) {
+        ++num_cfs_to_flush;
+      }
+    }
+    if (0 == num_cfs_to_flush) {
+      // Unref the newly generated candidate cfds (when not provided) in
+      // `candidate_cfds`
+      if (provided_candidate_cfds.empty()) {
+        for (auto candidate_cfd : candidate_cfds) {
+          candidate_cfd->UnrefAndTryDelete();
+        }
+      }
+      return s;
+    }
+  }
+  const bool needs_to_join_write_thread = !entered_write_thread;
+  FlushRequest flush_req;
+  autovector<ColumnFamilyData*> cfds;
+  {
+    WriteContext context;
+    InstrumentedMutexLock guard_lock(&mutex_);
+
+    WriteThread::Writer w;
+    WriteThread::Writer nonmem_w;
+    if (needs_to_join_write_thread) {
+      write_thread_.EnterUnbatched(&w, &mutex_);
+      if (two_write_queues_) {
+        nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
+      }
+    }
+    WaitForPendingWrites();
+
+    SelectColumnFamiliesForAtomicFlush(&cfds, candidate_cfds);
+
+    // Unref the newly generated candidate cfds (when not provided) in
+    // `candidate_cfds`
+    if (provided_candidate_cfds.empty()) {
+      for (auto candidate_cfd : candidate_cfds) {
+        candidate_cfd->UnrefAndTryDelete();
+      }
+    }
+
+    for (auto cfd : cfds) {
+      if ((cfd->mem()->IsEmpty() && cached_recoverable_state_empty_.load()) ||
+          flush_reason == FlushReason::kErrorRecoveryRetryFlush) {
+        continue;
+      }
+      cfd->Ref();
+      s = SwitchMemtable(cfd, &context);
+      cfd->UnrefAndTryDelete();
+      if (!s.ok()) {
+        break;
+      }
+    }
+    if (s.ok()) {
+      AssignAtomicFlushSeq(cfds);
+      for (auto cfd : cfds) {
+        cfd->imm()->FlushRequested();
+      }
+      // If the caller wants to wait for this flush to complete, it indicates
+      // that the caller expects the ColumnFamilyData not to be free'ed by
+      // other threads which may drop the column family concurrently.
+      // Therefore, we increase the cfd's ref count.
+      if (flush_options.wait) {
+        for (auto cfd : cfds) {
+          cfd->Ref();
+        }
+      }
+      GenerateFlushRequest(cfds, flush_reason, &flush_req);
+      SchedulePendingFlush(flush_req);
+      MaybeScheduleFlushOrCompaction();
+    }
+
+    if (needs_to_join_write_thread) {
+      write_thread_.ExitUnbatched(&w);
+      if (two_write_queues_) {
+        nonmem_write_thread_.ExitUnbatched(&nonmem_w);
+      }
+    }
+  }
+  TEST_SYNC_POINT("DBImpl::AtomicFlushMemTables:AfterScheduleFlush");
+  TEST_SYNC_POINT("DBImpl::AtomicFlushMemTables:BeforeWaitForBgFlush");
+  if (s.ok() && flush_options.wait) {
+    autovector<const uint64_t*> flush_memtable_ids;
+    for (auto& iter : flush_req.cfd_to_max_mem_id_to_persist) {
+      flush_memtable_ids.push_back(&(iter.second));
+    }
+    s = WaitForFlushMemTables(
+        cfds, flush_memtable_ids,
+        (flush_reason == FlushReason::kErrorRecovery ||
+         flush_reason == FlushReason::kErrorRecoveryRetryFlush));
+    InstrumentedMutexLock lock_guard(&mutex_);
+    for (auto* cfd : cfds) {
+      cfd->UnrefAndTryDelete();
+    }
+  }
+  return s;
+}
+
+// Calling FlushMemTable(), whether from DB::Flush() or from Backup Engine, can
+// cause write stall, for example if one memtable is being flushed already.
+// This method tries to avoid write stall (similar to CompactRange() behavior)
+// it emulates how the SuperVersion / LSM would change if flush happens, checks
+// it against various constrains and delays flush if it'd cause write stall.
+// Caller should check status and flush_needed to see if flush already happened.
+Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd,
+                                                 bool* flush_needed) {
+  {
+    *flush_needed = true;
+    InstrumentedMutexLock l(&mutex_);
+    uint64_t orig_active_memtable_id = cfd->mem()->GetID();
+    WriteStallCondition write_stall_condition = WriteStallCondition::kNormal;
+    do {
+      if (write_stall_condition != WriteStallCondition::kNormal) {
+        // Same error handling as user writes: Don't wait if there's a
+        // background error, even if it's a soft error. We might wait here
+        // indefinitely as the pending flushes/compactions may never finish
+        // successfully, resulting in the stall condition lasting indefinitely
+        if (error_handler_.IsBGWorkStopped()) {
+          return error_handler_.GetBGError();
+        }
+
+        TEST_SYNC_POINT("DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait");
+        ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                       "[%s] WaitUntilFlushWouldNotStallWrites"
+                       " waiting on stall conditions to clear",
+                       cfd->GetName().c_str());
+        bg_cv_.Wait();
+      }
+      if (cfd->IsDropped()) {
+        return Status::ColumnFamilyDropped();
+      }
+      if (shutting_down_.load(std::memory_order_acquire)) {
+        return Status::ShutdownInProgress();
+      }
+
+      uint64_t earliest_memtable_id =
+          std::min(cfd->mem()->GetID(), cfd->imm()->GetEarliestMemTableID());
+      if (earliest_memtable_id > orig_active_memtable_id) {
+        // We waited so long that the memtable we were originally waiting on was
+        // flushed.
+        *flush_needed = false;
+        return Status::OK();
+      }
+
+      const auto& mutable_cf_options = *cfd->GetLatestMutableCFOptions();
+      const auto* vstorage = cfd->current()->storage_info();
+
+      // Skip stalling check if we're below auto-flush and auto-compaction
+      // triggers. If it stalled in these conditions, that'd mean the stall
+      // triggers are so low that stalling is needed for any background work. In
+      // that case we shouldn't wait since background work won't be scheduled.
+      if (cfd->imm()->NumNotFlushed() <
+              cfd->ioptions()->min_write_buffer_number_to_merge &&
+          vstorage->l0_delay_trigger_count() <
+              mutable_cf_options.level0_file_num_compaction_trigger) {
+        break;
+      }
+
+      // check whether one extra immutable memtable or an extra L0 file would
+      // cause write stalling mode to be entered. It could still enter stall
+      // mode due to pending compaction bytes, but that's less common
+      write_stall_condition = ColumnFamilyData::GetWriteStallConditionAndCause(
+                                  cfd->imm()->NumNotFlushed() + 1,
+                                  vstorage->l0_delay_trigger_count() + 1,
+                                  vstorage->estimated_compaction_needed_bytes(),
+                                  mutable_cf_options, *cfd->ioptions())
+                                  .first;
+    } while (write_stall_condition != WriteStallCondition::kNormal);
+  }
+  return Status::OK();
+}
+
+// Wait for memtables to be flushed for multiple column families.
+// let N = cfds.size()
+// for i in [0, N),
+//  1) if flush_memtable_ids[i] is not null, then the memtables with lower IDs
+//     have to be flushed for THIS column family;
+//  2) if flush_memtable_ids[i] is null, then all memtables in THIS column
+//     family have to be flushed.
+// Finish waiting when ALL column families finish flushing memtables.
+// resuming_from_bg_err indicates whether the caller is trying to resume from
+// background error or in normal processing.
+Status DBImpl::WaitForFlushMemTables(
+    const autovector<ColumnFamilyData*>& cfds,
+    const autovector<const uint64_t*>& flush_memtable_ids,
+    bool resuming_from_bg_err) {
+  int num = static_cast<int>(cfds.size());
+  // Wait until the compaction completes
+  InstrumentedMutexLock l(&mutex_);
+  Status s;
+  // If the caller is trying to resume from bg error, then
+  // error_handler_.IsDBStopped() is true.
+  while (resuming_from_bg_err || !error_handler_.IsDBStopped()) {
+    if (shutting_down_.load(std::memory_order_acquire)) {
+      s = Status::ShutdownInProgress();
+      return s;
+    }
+    // If an error has occurred during resumption, then no need to wait.
+    // But flush operation may fail because of this error, so need to
+    // return the status.
+    if (!error_handler_.GetRecoveryError().ok()) {
+      s = error_handler_.GetRecoveryError();
+      break;
+    }
+    // If BGWorkStopped, which indicate that there is a BG error and
+    // 1) soft error but requires no BG work, 2) no in auto_recovery_
+    if (!resuming_from_bg_err && error_handler_.IsBGWorkStopped() &&
+        error_handler_.GetBGError().severity() < Status::Severity::kHardError) {
+      s = error_handler_.GetBGError();
+      return s;
+    }
+
+    // Number of column families that have been dropped.
+    int num_dropped = 0;
+    // Number of column families that have finished flush.
+    int num_finished = 0;
+    for (int i = 0; i < num; ++i) {
+      if (cfds[i]->IsDropped()) {
+        ++num_dropped;
+      } else if (cfds[i]->imm()->NumNotFlushed() == 0 ||
+                 (flush_memtable_ids[i] != nullptr &&
+                  cfds[i]->imm()->GetEarliestMemTableID() >
+                      *flush_memtable_ids[i])) {
+        ++num_finished;
+      }
+    }
+    if (1 == num_dropped && 1 == num) {
+      s = Status::ColumnFamilyDropped();
+      return s;
+    }
+    // Column families involved in this flush request have either been dropped
+    // or finished flush. Then it's time to finish waiting.
+    if (num_dropped + num_finished == num) {
+      break;
+    }
+    bg_cv_.Wait();
+  }
+  // If not resuming from bg error, and an error has caused the DB to stop,
+  // then report the bg error to caller.
+  if (!resuming_from_bg_err && error_handler_.IsDBStopped()) {
+    s = error_handler_.GetBGError();
+  }
+  return s;
+}
+
+Status DBImpl::EnableAutoCompaction(
+    const std::vector<ColumnFamilyHandle*>& column_family_handles) {
+  Status s;
+  for (auto cf_ptr : column_family_handles) {
+    Status status =
+        this->SetOptions(cf_ptr, {{"disable_auto_compactions", "false"}});
+    if (!status.ok()) {
+      s = status;
+    }
+  }
+
+  return s;
+}
+
+// NOTE: Calling DisableManualCompaction() may overwrite the
+// user-provided canceled variable in CompactRangeOptions
+void DBImpl::DisableManualCompaction() {
+  InstrumentedMutexLock l(&mutex_);
+  manual_compaction_paused_.fetch_add(1, std::memory_order_release);
+
+  // Mark the canceled as true when the cancellation is triggered by
+  // manual_compaction_paused (may overwrite user-provided `canceled`)
+  for (const auto& manual_compaction : manual_compaction_dequeue_) {
+    manual_compaction->canceled = true;
+  }
+
+  // Wake up manual compactions waiting to start.
+  bg_cv_.SignalAll();
+
+  // Wait for any pending manual compactions to finish (typically through
+  // failing with `Status::Incomplete`) prior to returning. This way we are
+  // guaranteed no pending manual compaction will commit while manual
+  // compactions are "disabled".
+  while (HasPendingManualCompaction()) {
+    bg_cv_.Wait();
+  }
+}
+
+// NOTE: In contrast to DisableManualCompaction(), calling
+// EnableManualCompaction() does NOT overwrite the user-provided *canceled
+// variable to be false since there is NO CHANCE a canceled compaction
+// is uncanceled. In other words, a canceled compaction must have been
+// dropped out of the manual compaction queue, when we disable it.
+void DBImpl::EnableManualCompaction() {
+  InstrumentedMutexLock l(&mutex_);
+  assert(manual_compaction_paused_ > 0);
+  manual_compaction_paused_.fetch_sub(1, std::memory_order_release);
+}
+
+void DBImpl::MaybeScheduleFlushOrCompaction() {
+  mutex_.AssertHeld();
+  if (!opened_successfully_) {
+    // Compaction may introduce data race to DB open
+    return;
+  }
+  if (bg_work_paused_ > 0) {
+    // we paused the background work
+    return;
+  } else if (error_handler_.IsBGWorkStopped() &&
+             !error_handler_.IsRecoveryInProgress()) {
+    // There has been a hard error and this call is not part of the recovery
+    // sequence. Bail out here so we don't get into an endless loop of
+    // scheduling BG work which will again call this function
+    return;
+  } else if (shutting_down_.load(std::memory_order_acquire)) {
+    // DB is being deleted; no more background compactions
+    return;
+  }
+  auto bg_job_limits = GetBGJobLimits();
+  bool is_flush_pool_empty =
+      env_->GetBackgroundThreads(Env::Priority::HIGH) == 0;
+  while (!is_flush_pool_empty && unscheduled_flushes_ > 0 &&
+         bg_flush_scheduled_ < bg_job_limits.max_flushes) {
+    bg_flush_scheduled_++;
+    FlushThreadArg* fta = new FlushThreadArg;
+    fta->db_ = this;
+    fta->thread_pri_ = Env::Priority::HIGH;
+    env_->Schedule(&DBImpl::BGWorkFlush, fta, Env::Priority::HIGH, this,
+                   &DBImpl::UnscheduleFlushCallback);
+    --unscheduled_flushes_;
+    TEST_SYNC_POINT_CALLBACK(
+        "DBImpl::MaybeScheduleFlushOrCompaction:AfterSchedule:0",
+        &unscheduled_flushes_);
+  }
+
+  // special case -- if high-pri (flush) thread pool is empty, then schedule
+  // flushes in low-pri (compaction) thread pool.
+  if (is_flush_pool_empty) {
+    while (unscheduled_flushes_ > 0 &&
+           bg_flush_scheduled_ + bg_compaction_scheduled_ <
+               bg_job_limits.max_flushes) {
+      bg_flush_scheduled_++;
+      FlushThreadArg* fta = new FlushThreadArg;
+      fta->db_ = this;
+      fta->thread_pri_ = Env::Priority::LOW;
+      env_->Schedule(&DBImpl::BGWorkFlush, fta, Env::Priority::LOW, this,
+                     &DBImpl::UnscheduleFlushCallback);
+      --unscheduled_flushes_;
+    }
+  }
+
+  if (bg_compaction_paused_ > 0) {
+    // we paused the background compaction
+    return;
+  } else if (error_handler_.IsBGWorkStopped()) {
+    // Compaction is not part of the recovery sequence from a hard error. We
+    // might get here because recovery might do a flush and install a new
+    // super version, which will try to schedule pending compactions. Bail
+    // out here and let the higher level recovery handle compactions
+    return;
+  }
+
+  if (HasExclusiveManualCompaction()) {
+    // only manual compactions are allowed to run. don't schedule automatic
+    // compactions
+    TEST_SYNC_POINT("DBImpl::MaybeScheduleFlushOrCompaction:Conflict");
+    return;
+  }
+
+  while (bg_compaction_scheduled_ + bg_bottom_compaction_scheduled_ <
+             bg_job_limits.max_compactions &&
+         unscheduled_compactions_ > 0) {
+    CompactionArg* ca = new CompactionArg;
+    ca->db = this;
+    ca->compaction_pri_ = Env::Priority::LOW;
+    ca->prepicked_compaction = nullptr;
+    bg_compaction_scheduled_++;
+    unscheduled_compactions_--;
+    env_->Schedule(&DBImpl::BGWorkCompaction, ca, Env::Priority::LOW, this,
+                   &DBImpl::UnscheduleCompactionCallback);
+  }
+}
+
+DBImpl::BGJobLimits DBImpl::GetBGJobLimits() const {
+  mutex_.AssertHeld();
+  return GetBGJobLimits(mutable_db_options_.max_background_flushes,
+                        mutable_db_options_.max_background_compactions,
+                        mutable_db_options_.max_background_jobs,
+                        write_controller_.NeedSpeedupCompaction());
+}
+
+DBImpl::BGJobLimits DBImpl::GetBGJobLimits(int max_background_flushes,
+                                           int max_background_compactions,
+                                           int max_background_jobs,
+                                           bool parallelize_compactions) {
+  BGJobLimits res;
+  if (max_background_flushes == -1 && max_background_compactions == -1) {
+    // for our first stab implementing max_background_jobs, simply allocate a
+    // quarter of the threads to flushes.
+    res.max_flushes = std::max(1, max_background_jobs / 4);
+    res.max_compactions = std::max(1, max_background_jobs - res.max_flushes);
+  } else {
+    // compatibility code in case users haven't migrated to max_background_jobs,
+    // which automatically computes flush/compaction limits
+    res.max_flushes = std::max(1, max_background_flushes);
+    res.max_compactions = std::max(1, max_background_compactions);
+  }
+  if (!parallelize_compactions) {
+    // throttle background compactions until we deem necessary
+    res.max_compactions = 1;
+  }
+  return res;
+}
+
+void DBImpl::AddToCompactionQueue(ColumnFamilyData* cfd) {
+  assert(!cfd->queued_for_compaction());
+  cfd->Ref();
+  compaction_queue_.push_back(cfd);
+  cfd->set_queued_for_compaction(true);
+}
+
+ColumnFamilyData* DBImpl::PopFirstFromCompactionQueue() {
+  assert(!compaction_queue_.empty());
+  auto cfd = *compaction_queue_.begin();
+  compaction_queue_.pop_front();
+  assert(cfd->queued_for_compaction());
+  cfd->set_queued_for_compaction(false);
+  return cfd;
+}
+
+DBImpl::FlushRequest DBImpl::PopFirstFromFlushQueue() {
+  assert(!flush_queue_.empty());
+  FlushRequest flush_req = flush_queue_.front();
+  flush_queue_.pop_front();
+  if (!immutable_db_options_.atomic_flush) {
+    assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1);
+  }
+  for (const auto& elem : flush_req.cfd_to_max_mem_id_to_persist) {
+    if (!immutable_db_options_.atomic_flush) {
+      ColumnFamilyData* cfd = elem.first;
+      assert(cfd);
+      assert(cfd->queued_for_flush());
+      cfd->set_queued_for_flush(false);
+    }
+  }
+  return flush_req;
+}
+
+ColumnFamilyData* DBImpl::PickCompactionFromQueue(
+    std::unique_ptr<TaskLimiterToken>* token, LogBuffer* log_buffer) {
+  assert(!compaction_queue_.empty());
+  assert(*token == nullptr);
+  autovector<ColumnFamilyData*> throttled_candidates;
+  ColumnFamilyData* cfd = nullptr;
+  while (!compaction_queue_.empty()) {
+    auto first_cfd = *compaction_queue_.begin();
+    compaction_queue_.pop_front();
+    assert(first_cfd->queued_for_compaction());
+    if (!RequestCompactionToken(first_cfd, false, token, log_buffer)) {
+      throttled_candidates.push_back(first_cfd);
+      continue;
+    }
+    cfd = first_cfd;
+    cfd->set_queued_for_compaction(false);
+    break;
+  }
+  // Add throttled compaction candidates back to queue in the original order.
+  for (auto iter = throttled_candidates.rbegin();
+       iter != throttled_candidates.rend(); ++iter) {
+    compaction_queue_.push_front(*iter);
+  }
+  return cfd;
+}
+
+void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) {
+  mutex_.AssertHeld();
+  if (flush_req.cfd_to_max_mem_id_to_persist.empty()) {
+    return;
+  }
+  if (!immutable_db_options_.atomic_flush) {
+    // For the non-atomic flush case, we never schedule multiple column
+    // families in the same flush request.
+    assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1);
+    ColumnFamilyData* cfd =
+        flush_req.cfd_to_max_mem_id_to_persist.begin()->first;
+    assert(cfd);
+
+    if (!cfd->queued_for_flush() && cfd->imm()->IsFlushPending()) {
+      cfd->Ref();
+      cfd->set_queued_for_flush(true);
+      ++unscheduled_flushes_;
+      flush_queue_.push_back(flush_req);
+    }
+  } else {
+    for (auto& iter : flush_req.cfd_to_max_mem_id_to_persist) {
+      ColumnFamilyData* cfd = iter.first;
+      cfd->Ref();
+    }
+    ++unscheduled_flushes_;
+    flush_queue_.push_back(flush_req);
+  }
+}
+
+void DBImpl::SchedulePendingCompaction(ColumnFamilyData* cfd) {
+  mutex_.AssertHeld();
+  if (!cfd->queued_for_compaction() && cfd->NeedsCompaction()) {
+    AddToCompactionQueue(cfd);
+    ++unscheduled_compactions_;
+  }
+}
+
+void DBImpl::SchedulePendingPurge(std::string fname, std::string dir_to_sync,
+                                  FileType type, uint64_t number, int job_id) {
+  mutex_.AssertHeld();
+  PurgeFileInfo file_info(fname, dir_to_sync, type, number, job_id);
+  purge_files_.insert({{number, std::move(file_info)}});
+}
+
+void DBImpl::BGWorkFlush(void* arg) {
+  FlushThreadArg fta = *(reinterpret_cast<FlushThreadArg*>(arg));
+  delete reinterpret_cast<FlushThreadArg*>(arg);
+
+  IOSTATS_SET_THREAD_POOL_ID(fta.thread_pri_);
+  TEST_SYNC_POINT("DBImpl::BGWorkFlush");
+  static_cast_with_check<DBImpl>(fta.db_)->BackgroundCallFlush(fta.thread_pri_);
+  TEST_SYNC_POINT("DBImpl::BGWorkFlush:done");
+}
+
+void DBImpl::BGWorkCompaction(void* arg) {
+  CompactionArg ca = *(reinterpret_cast<CompactionArg*>(arg));
+  delete reinterpret_cast<CompactionArg*>(arg);
+  IOSTATS_SET_THREAD_POOL_ID(Env::Priority::LOW);
+  TEST_SYNC_POINT("DBImpl::BGWorkCompaction");
+  auto prepicked_compaction =
+      static_cast<PrepickedCompaction*>(ca.prepicked_compaction);
+  static_cast_with_check<DBImpl>(ca.db)->BackgroundCallCompaction(
+      prepicked_compaction, Env::Priority::LOW);
+  delete prepicked_compaction;
+}
+
+void DBImpl::BGWorkBottomCompaction(void* arg) {
+  CompactionArg ca = *(static_cast<CompactionArg*>(arg));
+  delete static_cast<CompactionArg*>(arg);
+  IOSTATS_SET_THREAD_POOL_ID(Env::Priority::BOTTOM);
+  TEST_SYNC_POINT("DBImpl::BGWorkBottomCompaction");
+  auto* prepicked_compaction = ca.prepicked_compaction;
+  assert(prepicked_compaction && prepicked_compaction->compaction);
+  ca.db->BackgroundCallCompaction(prepicked_compaction, Env::Priority::BOTTOM);
+  delete prepicked_compaction;
+}
+
+void DBImpl::BGWorkPurge(void* db) {
+  IOSTATS_SET_THREAD_POOL_ID(Env::Priority::HIGH);
+  TEST_SYNC_POINT("DBImpl::BGWorkPurge:start");
+  reinterpret_cast<DBImpl*>(db)->BackgroundCallPurge();
+  TEST_SYNC_POINT("DBImpl::BGWorkPurge:end");
+}
+
+void DBImpl::UnscheduleCompactionCallback(void* arg) {
+  CompactionArg* ca_ptr = reinterpret_cast<CompactionArg*>(arg);
+  Env::Priority compaction_pri = ca_ptr->compaction_pri_;
+  if (Env::Priority::BOTTOM == compaction_pri) {
+    // Decrement bg_bottom_compaction_scheduled_ if priority is BOTTOM
+    ca_ptr->db->bg_bottom_compaction_scheduled_--;
+  } else if (Env::Priority::LOW == compaction_pri) {
+    // Decrement bg_compaction_scheduled_ if priority is LOW
+    ca_ptr->db->bg_compaction_scheduled_--;
+  }
+  CompactionArg ca = *(ca_ptr);
+  delete reinterpret_cast<CompactionArg*>(arg);
+  if (ca.prepicked_compaction != nullptr) {
+    // if it's a manual compaction, set status to ManualCompactionPaused
+    if (ca.prepicked_compaction->manual_compaction_state) {
+      ca.prepicked_compaction->manual_compaction_state->done = true;
+      ca.prepicked_compaction->manual_compaction_state->status =
+          Status::Incomplete(Status::SubCode::kManualCompactionPaused);
+    }
+    if (ca.prepicked_compaction->compaction != nullptr) {
+      ca.prepicked_compaction->compaction->ReleaseCompactionFiles(
+          Status::Incomplete(Status::SubCode::kManualCompactionPaused));
+      delete ca.prepicked_compaction->compaction;
+    }
+    delete ca.prepicked_compaction;
+  }
+  TEST_SYNC_POINT("DBImpl::UnscheduleCompactionCallback");
+}
+
+void DBImpl::UnscheduleFlushCallback(void* arg) {
+  // Decrement bg_flush_scheduled_ in flush callback
+  reinterpret_cast<FlushThreadArg*>(arg)->db_->bg_flush_scheduled_--;
+  Env::Priority flush_pri = reinterpret_cast<FlushThreadArg*>(arg)->thread_pri_;
+  if (Env::Priority::LOW == flush_pri) {
+    TEST_SYNC_POINT("DBImpl::UnscheduleLowFlushCallback");
+  } else if (Env::Priority::HIGH == flush_pri) {
+    TEST_SYNC_POINT("DBImpl::UnscheduleHighFlushCallback");
+  }
+  delete reinterpret_cast<FlushThreadArg*>(arg);
+  TEST_SYNC_POINT("DBImpl::UnscheduleFlushCallback");
+}
+
+Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
+                               LogBuffer* log_buffer, FlushReason* reason,
+                               Env::Priority thread_pri) {
+  mutex_.AssertHeld();
+
+  Status status;
+  *reason = FlushReason::kOthers;
+  // If BG work is stopped due to an error, but a recovery is in progress,
+  // that means this flush is part of the recovery. So allow it to go through
+  if (!error_handler_.IsBGWorkStopped()) {
+    if (shutting_down_.load(std::memory_order_acquire)) {
+      status = Status::ShutdownInProgress();
+    }
+  } else if (!error_handler_.IsRecoveryInProgress()) {
+    status = error_handler_.GetBGError();
+  }
+
+  if (!status.ok()) {
+    return status;
+  }
+
+  autovector<BGFlushArg> bg_flush_args;
+  std::vector<SuperVersionContext>& superversion_contexts =
+      job_context->superversion_contexts;
+  autovector<ColumnFamilyData*> column_families_not_to_flush;
+  while (!flush_queue_.empty()) {
+    // This cfd is already referenced
+    const FlushRequest& flush_req = PopFirstFromFlushQueue();
+    FlushReason flush_reason = flush_req.flush_reason;
+    superversion_contexts.clear();
+    superversion_contexts.reserve(
+        flush_req.cfd_to_max_mem_id_to_persist.size());
+
+    for (const auto& iter : flush_req.cfd_to_max_mem_id_to_persist) {
+      ColumnFamilyData* cfd = iter.first;
+      if (cfd->GetMempurgeUsed()) {
+        // If imm() contains silent memtables (e.g.: because
+        // MemPurge was activated), requesting a flush will
+        // mark the imm_needed as true.
+        cfd->imm()->FlushRequested();
+      }
+
+      if (cfd->IsDropped() || !cfd->imm()->IsFlushPending()) {
+        // can't flush this CF, try next one
+        column_families_not_to_flush.push_back(cfd);
+        continue;
+      }
+      superversion_contexts.emplace_back(SuperVersionContext(true));
+      bg_flush_args.emplace_back(cfd, iter.second,
+                                 &(superversion_contexts.back()), flush_reason);
+    }
+    if (!bg_flush_args.empty()) {
+      break;
+    }
+  }
+
+  if (!bg_flush_args.empty()) {
+    auto bg_job_limits = GetBGJobLimits();
+    for (const auto& arg : bg_flush_args) {
+      ColumnFamilyData* cfd = arg.cfd_;
+      ROCKS_LOG_BUFFER(
+          log_buffer,
+          "Calling FlushMemTableToOutputFile with column "
+          "family [%s], flush slots available %d, compaction slots available "
+          "%d, "
+          "flush slots scheduled %d, compaction slots scheduled %d",
+          cfd->GetName().c_str(), bg_job_limits.max_flushes,
+          bg_job_limits.max_compactions, bg_flush_scheduled_,
+          bg_compaction_scheduled_);
+    }
+    status = FlushMemTablesToOutputFiles(bg_flush_args, made_progress,
+                                         job_context, log_buffer, thread_pri);
+    TEST_SYNC_POINT("DBImpl::BackgroundFlush:BeforeFlush");
+// All the CFD/bg_flush_arg in the FlushReq must have the same flush reason, so
+// just grab the first one
+#ifndef NDEBUG
+    for (const auto& bg_flush_arg : bg_flush_args) {
+      assert(bg_flush_arg.flush_reason_ == bg_flush_args[0].flush_reason_);
+    }
+#endif /* !NDEBUG */
+    *reason = bg_flush_args[0].flush_reason_;
+    for (auto& arg : bg_flush_args) {
+      ColumnFamilyData* cfd = arg.cfd_;
+      if (cfd->UnrefAndTryDelete()) {
+        arg.cfd_ = nullptr;
+      }
+    }
+  }
+  for (auto cfd : column_families_not_to_flush) {
+    cfd->UnrefAndTryDelete();
+  }
+  return status;
+}
+
+void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
+  bool made_progress = false;
+  JobContext job_context(next_job_id_.fetch_add(1), true);
+
+  TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCallFlush:start", nullptr);
+
+  LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
+                       immutable_db_options_.info_log.get());
+  TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:Start:1");
+  TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:Start:2");
+  {
+    InstrumentedMutexLock l(&mutex_);
+    assert(bg_flush_scheduled_);
+    num_running_flushes_++;
+
+    std::unique_ptr<std::list<uint64_t>::iterator>
+        pending_outputs_inserted_elem(new std::list<uint64_t>::iterator(
+            CaptureCurrentFileNumberInPendingOutputs()));
+    FlushReason reason;
+
+    Status s = BackgroundFlush(&made_progress, &job_context, &log_buffer,
+                               &reason, thread_pri);
+    if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped() &&
+        reason != FlushReason::kErrorRecovery) {
+      // Wait a little bit before retrying background flush in
+      // case this is an environmental problem and we do not want to
+      // chew up resources for failed flushes for the duration of
+      // the problem.
+      uint64_t error_cnt =
+          default_cf_internal_stats_->BumpAndGetBackgroundErrorCount();
+      bg_cv_.SignalAll();  // In case a waiter can proceed despite the error
+      mutex_.Unlock();
+      ROCKS_LOG_ERROR(immutable_db_options_.info_log,
+                      "Waiting after background flush error: %s"
+                      "Accumulated background error counts: %" PRIu64,
+                      s.ToString().c_str(), error_cnt);
+      log_buffer.FlushBufferToLog();
+      LogFlush(immutable_db_options_.info_log);
+      immutable_db_options_.clock->SleepForMicroseconds(1000000);
+      mutex_.Lock();
+    }
+
+    TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FlushFinish:0");
+    ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
+
+    // If flush failed, we want to delete all temporary files that we might have
+    // created. Thus, we force full scan in FindObsoleteFiles()
+    FindObsoleteFiles(&job_context, !s.ok() && !s.IsShutdownInProgress() &&
+                                        !s.IsColumnFamilyDropped());
+    // delete unnecessary files if any, this is done outside the mutex
+    if (job_context.HaveSomethingToClean() ||
+        job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) {
+      mutex_.Unlock();
+      TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FilesFound");
+      // Have to flush the info logs before bg_flush_scheduled_--
+      // because if bg_flush_scheduled_ becomes 0 and the lock is
+      // released, the deconstructor of DB can kick in and destroy all the
+      // states of DB so info_log might not be available after that point.
+      // It also applies to access other states that DB owns.
+      log_buffer.FlushBufferToLog();
+      if (job_context.HaveSomethingToDelete()) {
+        PurgeObsoleteFiles(job_context);
+      }
+      job_context.Clean();
+      mutex_.Lock();
+    }
+    TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:ContextCleanedUp");
+
+    assert(num_running_flushes_ > 0);
+    num_running_flushes_--;
+    bg_flush_scheduled_--;
+    // See if there's more work to be done
+    MaybeScheduleFlushOrCompaction();
+    atomic_flush_install_cv_.SignalAll();
+    bg_cv_.SignalAll();
+    // IMPORTANT: there should be no code after calling SignalAll. This call may
+    // signal the DB destructor that it's OK to proceed with destruction. In
+    // that case, all DB variables will be dealloacated and referencing them
+    // will cause trouble.
+  }
+}
+
+void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
+                                      Env::Priority bg_thread_pri) {
+  bool made_progress = false;
+  JobContext job_context(next_job_id_.fetch_add(1), true);
+  TEST_SYNC_POINT("BackgroundCallCompaction:0");
+  LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
+                       immutable_db_options_.info_log.get());
+  {
+    InstrumentedMutexLock l(&mutex_);
+
+    num_running_compactions_++;
+
+    std::unique_ptr<std::list<uint64_t>::iterator>
+        pending_outputs_inserted_elem(new std::list<uint64_t>::iterator(
+            CaptureCurrentFileNumberInPendingOutputs()));
+
+    assert((bg_thread_pri == Env::Priority::BOTTOM &&
+            bg_bottom_compaction_scheduled_) ||
+           (bg_thread_pri == Env::Priority::LOW && bg_compaction_scheduled_));
+    Status s = BackgroundCompaction(&made_progress, &job_context, &log_buffer,
+                                    prepicked_compaction, bg_thread_pri);
+    TEST_SYNC_POINT("BackgroundCallCompaction:1");
+    if (s.IsBusy()) {
+      bg_cv_.SignalAll();  // In case a waiter can proceed despite the error
+      mutex_.Unlock();
+      immutable_db_options_.clock->SleepForMicroseconds(
+          10000);  // prevent hot loop
+      mutex_.Lock();
+    } else if (!s.ok() && !s.IsShutdownInProgress() &&
+               !s.IsManualCompactionPaused() && !s.IsColumnFamilyDropped()) {
+      // Wait a little bit before retrying background compaction in
+      // case this is an environmental problem and we do not want to
+      // chew up resources for failed compactions for the duration of
+      // the problem.
+      uint64_t error_cnt =
+          default_cf_internal_stats_->BumpAndGetBackgroundErrorCount();
+      bg_cv_.SignalAll();  // In case a waiter can proceed despite the error
+      mutex_.Unlock();
+      log_buffer.FlushBufferToLog();
+      ROCKS_LOG_ERROR(immutable_db_options_.info_log,
+                      "Waiting after background compaction error: %s, "
+                      "Accumulated background error counts: %" PRIu64,
+                      s.ToString().c_str(), error_cnt);
+      LogFlush(immutable_db_options_.info_log);
+      immutable_db_options_.clock->SleepForMicroseconds(1000000);
+      mutex_.Lock();
+    } else if (s.IsManualCompactionPaused()) {
+      assert(prepicked_compaction);
+      ManualCompactionState* m = prepicked_compaction->manual_compaction_state;
+      assert(m);
+      ROCKS_LOG_BUFFER(&log_buffer, "[%s] [JOB %d] Manual compaction paused",
+                       m->cfd->GetName().c_str(), job_context.job_id);
+    }
+
+    ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
+
+    // If compaction failed, we want to delete all temporary files that we
+    // might have created (they might not be all recorded in job_context in
+    // case of a failure). Thus, we force full scan in FindObsoleteFiles()
+    FindObsoleteFiles(&job_context, !s.ok() && !s.IsShutdownInProgress() &&
+                                        !s.IsManualCompactionPaused() &&
+                                        !s.IsColumnFamilyDropped() &&
+                                        !s.IsBusy());
+    TEST_SYNC_POINT("DBImpl::BackgroundCallCompaction:FoundObsoleteFiles");
+
+    // delete unnecessary files if any, this is done outside the mutex
+    if (job_context.HaveSomethingToClean() ||
+        job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) {
+      mutex_.Unlock();
+      // Have to flush the info logs before bg_compaction_scheduled_--
+      // because if bg_flush_scheduled_ becomes 0 and the lock is
+      // released, the deconstructor of DB can kick in and destroy all the
+      // states of DB so info_log might not be available after that point.
+      // It also applies to access other states that DB owns.
+      log_buffer.FlushBufferToLog();
+      if (job_context.HaveSomethingToDelete()) {
+        PurgeObsoleteFiles(job_context);
+        TEST_SYNC_POINT("DBImpl::BackgroundCallCompaction:PurgedObsoleteFiles");
+      }
+      job_context.Clean();
+      mutex_.Lock();
+    }
+
+    assert(num_running_compactions_ > 0);
+    num_running_compactions_--;
+
+    if (bg_thread_pri == Env::Priority::LOW) {
+      bg_compaction_scheduled_--;
+    } else {
+      assert(bg_thread_pri == Env::Priority::BOTTOM);
+      bg_bottom_compaction_scheduled_--;
+    }
+
+    // See if there's more work to be done
+    MaybeScheduleFlushOrCompaction();
+
+    if (prepicked_compaction != nullptr &&
+        prepicked_compaction->task_token != nullptr) {
+      // Releasing task tokens affects (and asserts on) the DB state, so
+      // must be done before we potentially signal the DB close process to
+      // proceed below.
+      prepicked_compaction->task_token.reset();
+    }
+
+    if (made_progress ||
+        (bg_compaction_scheduled_ == 0 &&
+         bg_bottom_compaction_scheduled_ == 0) ||
+        HasPendingManualCompaction() || unscheduled_compactions_ == 0) {
+      // signal if
+      // * made_progress -- need to wakeup DelayWrite
+      // * bg_{bottom,}_compaction_scheduled_ == 0 -- need to wakeup ~DBImpl
+      // * HasPendingManualCompaction -- need to wakeup RunManualCompaction
+      // If none of this is true, there is no need to signal since nobody is
+      // waiting for it
+      bg_cv_.SignalAll();
+    }
+    // IMPORTANT: there should be no code after calling SignalAll. This call may
+    // signal the DB destructor that it's OK to proceed with destruction. In
+    // that case, all DB variables will be dealloacated and referencing them
+    // will cause trouble.
+  }
+}
+
+Status DBImpl::BackgroundCompaction(bool* made_progress,
+                                    JobContext* job_context,
+                                    LogBuffer* log_buffer,
+                                    PrepickedCompaction* prepicked_compaction,
+                                    Env::Priority thread_pri) {
+  ManualCompactionState* manual_compaction =
+      prepicked_compaction == nullptr
+          ? nullptr
+          : prepicked_compaction->manual_compaction_state;
+  *made_progress = false;
+  mutex_.AssertHeld();
+  TEST_SYNC_POINT("DBImpl::BackgroundCompaction:Start");
+
+  const ReadOptions read_options(Env::IOActivity::kCompaction);
+
+  bool is_manual = (manual_compaction != nullptr);
+  std::unique_ptr<Compaction> c;
+  if (prepicked_compaction != nullptr &&
+      prepicked_compaction->compaction != nullptr) {
+    c.reset(prepicked_compaction->compaction);
+  }
+  bool is_prepicked = is_manual || c;
+
+  // (manual_compaction->in_progress == false);
+  bool trivial_move_disallowed =
+      is_manual && manual_compaction->disallow_trivial_move;
+
+  CompactionJobStats compaction_job_stats;
+  Status status;
+  if (!error_handler_.IsBGWorkStopped()) {
+    if (shutting_down_.load(std::memory_order_acquire)) {
+      status = Status::ShutdownInProgress();
+    } else if (is_manual &&
+               manual_compaction->canceled.load(std::memory_order_acquire)) {
+      status = Status::Incomplete(Status::SubCode::kManualCompactionPaused);
+    }
+  } else {
+    status = error_handler_.GetBGError();
+    // If we get here, it means a hard error happened after this compaction
+    // was scheduled by MaybeScheduleFlushOrCompaction(), but before it got
+    // a chance to execute. Since we didn't pop a cfd from the compaction
+    // queue, increment unscheduled_compactions_
+    unscheduled_compactions_++;
+  }
+
+  if (!status.ok()) {
+    if (is_manual) {
+      manual_compaction->status = status;
+      manual_compaction->done = true;
+      manual_compaction->in_progress = false;
+      manual_compaction = nullptr;
+    }
+    if (c) {
+      c->ReleaseCompactionFiles(status);
+      c.reset();
+    }
+    return status;
+  }
+
+  if (is_manual) {
+    // another thread cannot pick up the same work
+    manual_compaction->in_progress = true;
+  }
+
+  TEST_SYNC_POINT("DBImpl::BackgroundCompaction:InProgress");
+
+  std::unique_ptr<TaskLimiterToken> task_token;
+
+  // InternalKey manual_end_storage;
+  // InternalKey* manual_end = &manual_end_storage;
+  bool sfm_reserved_compact_space = false;
+  if (is_manual) {
+    ManualCompactionState* m = manual_compaction;
+    assert(m->in_progress);
+    if (!c) {
+      m->done = true;
+      m->manual_end = nullptr;
+      ROCKS_LOG_BUFFER(
+          log_buffer,
+          "[%s] Manual compaction from level-%d from %s .. "
+          "%s; nothing to do\n",
+          m->cfd->GetName().c_str(), m->input_level,
+          (m->begin ? m->begin->DebugString(true).c_str() : "(begin)"),
+          (m->end ? m->end->DebugString(true).c_str() : "(end)"));
+    } else {
+      // First check if we have enough room to do the compaction
+      bool enough_room = EnoughRoomForCompaction(
+          m->cfd, *(c->inputs()), &sfm_reserved_compact_space, log_buffer);
+
+      if (!enough_room) {
+        // Then don't do the compaction
+        c->ReleaseCompactionFiles(status);
+        c.reset();
+        // m's vars will get set properly at the end of this function,
+        // as long as status == CompactionTooLarge
+        status = Status::CompactionTooLarge();
+      } else {
+        ROCKS_LOG_BUFFER(
+            log_buffer,
+            "[%s] Manual compaction from level-%d to level-%d from %s .. "
+            "%s; will stop at %s\n",
+            m->cfd->GetName().c_str(), m->input_level, c->output_level(),
+            (m->begin ? m->begin->DebugString(true).c_str() : "(begin)"),
+            (m->end ? m->end->DebugString(true).c_str() : "(end)"),
+            ((m->done || m->manual_end == nullptr)
+                 ? "(end)"
+                 : m->manual_end->DebugString(true).c_str()));
+      }
+    }
+  } else if (!is_prepicked && !compaction_queue_.empty()) {
+    if (HasExclusiveManualCompaction()) {
+      // Can't compact right now, but try again later
+      TEST_SYNC_POINT("DBImpl::BackgroundCompaction()::Conflict");
+
+      // Stay in the compaction queue.
+      unscheduled_compactions_++;
+
+      return Status::OK();
+    }
+
+    auto cfd = PickCompactionFromQueue(&task_token, log_buffer);
+    if (cfd == nullptr) {
+      // Can't find any executable task from the compaction queue.
+      // All tasks have been throttled by compaction thread limiter.
+      ++unscheduled_compactions_;
+      return Status::Busy();
+    }
+
+    // We unreference here because the following code will take a Ref() on
+    // this cfd if it is going to use it (Compaction class holds a
+    // reference).
+    // This will all happen under a mutex so we don't have to be afraid of
+    // somebody else deleting it.
+    if (cfd->UnrefAndTryDelete()) {
+      // This was the last reference of the column family, so no need to
+      // compact.
+      return Status::OK();
+    }
+
+    // Pick up latest mutable CF Options and use it throughout the
+    // compaction job
+    // Compaction makes a copy of the latest MutableCFOptions. It should be used
+    // throughout the compaction procedure to make sure consistency. It will
+    // eventually be installed into SuperVersion
+    auto* mutable_cf_options = cfd->GetLatestMutableCFOptions();
+    if (!mutable_cf_options->disable_auto_compactions && !cfd->IsDropped()) {
+      // NOTE: try to avoid unnecessary copy of MutableCFOptions if
+      // compaction is not necessary. Need to make sure mutex is held
+      // until we make a copy in the following code
+      TEST_SYNC_POINT("DBImpl::BackgroundCompaction():BeforePickCompaction");
+      c.reset(cfd->PickCompaction(*mutable_cf_options, mutable_db_options_,
+                                  log_buffer));
+      TEST_SYNC_POINT("DBImpl::BackgroundCompaction():AfterPickCompaction");
+
+      if (c != nullptr) {
+        bool enough_room = EnoughRoomForCompaction(
+            cfd, *(c->inputs()), &sfm_reserved_compact_space, log_buffer);
+
+        if (!enough_room) {
+          // Then don't do the compaction
+          c->ReleaseCompactionFiles(status);
+          c->column_family_data()
+              ->current()
+              ->storage_info()
+              ->ComputeCompactionScore(*(c->immutable_options()),
+                                       *(c->mutable_cf_options()));
+          AddToCompactionQueue(cfd);
+          ++unscheduled_compactions_;
+
+          c.reset();
+          // Don't need to sleep here, because BackgroundCallCompaction
+          // will sleep if !s.ok()
+          status = Status::CompactionTooLarge();
+        } else {
+          // update statistics
+          size_t num_files = 0;
+          for (auto& each_level : *c->inputs()) {
+            num_files += each_level.files.size();
+          }
+          RecordInHistogram(stats_, NUM_FILES_IN_SINGLE_COMPACTION, num_files);
+
+          // There are three things that can change compaction score:
+          // 1) When flush or compaction finish. This case is covered by
+          // InstallSuperVersionAndScheduleWork
+          // 2) When MutableCFOptions changes. This case is also covered by
+          // InstallSuperVersionAndScheduleWork, because this is when the new
+          // options take effect.
+          // 3) When we Pick a new compaction, we "remove" those files being
+          // compacted from the calculation, which then influences compaction
+          // score. Here we check if we need the new compaction even without the
+          // files that are currently being compacted. If we need another
+          // compaction, we might be able to execute it in parallel, so we add
+          // it to the queue and schedule a new thread.
+          if (cfd->NeedsCompaction()) {
+            // Yes, we need more compactions!
+            AddToCompactionQueue(cfd);
+            ++unscheduled_compactions_;
+            MaybeScheduleFlushOrCompaction();
+          }
+        }
+      }
+    }
+  }
+
+  IOStatus io_s;
+  if (!c) {
+    // Nothing to do
+    ROCKS_LOG_BUFFER(log_buffer, "Compaction nothing to do");
+  } else if (c->deletion_compaction()) {
+    // TODO(icanadi) Do we want to honor snapshots here? i.e. not delete old
+    // file if there is alive snapshot pointing to it
+    TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:BeforeCompaction",
+                             c->column_family_data());
+    assert(c->num_input_files(1) == 0);
+    assert(c->column_family_data()->ioptions()->compaction_style ==
+           kCompactionStyleFIFO);
+
+    compaction_job_stats.num_input_files = c->num_input_files(0);
+
+    NotifyOnCompactionBegin(c->column_family_data(), c.get(), status,
+                            compaction_job_stats, job_context->job_id);
+
+    for (const auto& f : *c->inputs(0)) {
+      c->edit()->DeleteFile(c->level(), f->fd.GetNumber());
+    }
+    status = versions_->LogAndApply(
+        c->column_family_data(), *c->mutable_cf_options(), read_options,
+        c->edit(), &mutex_, directories_.GetDbDir());
+    io_s = versions_->io_status();
+    InstallSuperVersionAndScheduleWork(c->column_family_data(),
+                                       &job_context->superversion_contexts[0],
+                                       *c->mutable_cf_options());
+    ROCKS_LOG_BUFFER(log_buffer, "[%s] Deleted %d files\n",
+                     c->column_family_data()->GetName().c_str(),
+                     c->num_input_files(0));
+    *made_progress = true;
+    TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:AfterCompaction",
+                             c->column_family_data());
+  } else if (!trivial_move_disallowed && c->IsTrivialMove()) {
+    TEST_SYNC_POINT("DBImpl::BackgroundCompaction:TrivialMove");
+    TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:BeforeCompaction",
+                             c->column_family_data());
+    // Instrument for event update
+    // TODO(yhchiang): add op details for showing trivial-move.
+    ThreadStatusUtil::SetColumnFamily(c->column_family_data());
+    ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION);
+
+    compaction_job_stats.num_input_files = c->num_input_files(0);
+
+    NotifyOnCompactionBegin(c->column_family_data(), c.get(), status,
+                            compaction_job_stats, job_context->job_id);
+
+    // Move files to next level
+    int32_t moved_files = 0;
+    int64_t moved_bytes = 0;
+    for (unsigned int l = 0; l < c->num_input_levels(); l++) {
+      if (c->level(l) == c->output_level()) {
+        continue;
+      }
+      for (size_t i = 0; i < c->num_input_files(l); i++) {
+        FileMetaData* f = c->input(l, i);
+        c->edit()->DeleteFile(c->level(l), f->fd.GetNumber());
+        c->edit()->AddFile(
+            c->output_level(), f->fd.GetNumber(), f->fd.GetPathId(),
+            f->fd.GetFileSize(), f->smallest, f->largest, f->fd.smallest_seqno,
+            f->fd.largest_seqno, f->marked_for_compaction, f->temperature,
+            f->oldest_blob_file_number, f->oldest_ancester_time,
+            f->file_creation_time, f->epoch_number, f->file_checksum,
+            f->file_checksum_func_name, f->unique_id,
+            f->compensated_range_deletion_size, f->tail_size);
+
+        ROCKS_LOG_BUFFER(
+            log_buffer,
+            "[%s] Moving #%" PRIu64 " to level-%d %" PRIu64 " bytes\n",
+            c->column_family_data()->GetName().c_str(), f->fd.GetNumber(),
+            c->output_level(), f->fd.GetFileSize());
+        ++moved_files;
+        moved_bytes += f->fd.GetFileSize();
+      }
+    }
+    if (c->compaction_reason() == CompactionReason::kLevelMaxLevelSize &&
+        c->immutable_options()->compaction_pri == kRoundRobin) {
+      int start_level = c->start_level();
+      if (start_level > 0) {
+        auto vstorage = c->input_version()->storage_info();
+        c->edit()->AddCompactCursor(
+            start_level,
+            vstorage->GetNextCompactCursor(start_level, c->num_input_files(0)));
+      }
+    }
+    status = versions_->LogAndApply(
+        c->column_family_data(), *c->mutable_cf_options(), read_options,
+        c->edit(), &mutex_, directories_.GetDbDir());
+    io_s = versions_->io_status();
+    // Use latest MutableCFOptions
+    InstallSuperVersionAndScheduleWork(c->column_family_data(),
+                                       &job_context->superversion_contexts[0],
+                                       *c->mutable_cf_options());
+
+    VersionStorageInfo::LevelSummaryStorage tmp;
+    c->column_family_data()->internal_stats()->IncBytesMoved(c->output_level(),
+                                                             moved_bytes);
+    {
+      event_logger_.LogToBuffer(log_buffer)
+          << "job" << job_context->job_id << "event"
+          << "trivial_move"
+          << "destination_level" << c->output_level() << "files" << moved_files
+          << "total_files_size" << moved_bytes;
+    }
+    ROCKS_LOG_BUFFER(
+        log_buffer,
+        "[%s] Moved #%d files to level-%d %" PRIu64 " bytes %s: %s\n",
+        c->column_family_data()->GetName().c_str(), moved_files,
+        c->output_level(), moved_bytes, status.ToString().c_str(),
+        c->column_family_data()->current()->storage_info()->LevelSummary(&tmp));
+    *made_progress = true;
+
+    // Clear Instrument
+    ThreadStatusUtil::ResetThreadStatus();
+    TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:AfterCompaction",
+                             c->column_family_data());
+  } else if (!is_prepicked && c->output_level() > 0 &&
+             c->output_level() ==
+                 c->column_family_data()
+                     ->current()
+                     ->storage_info()
+                     ->MaxOutputLevel(
+                         immutable_db_options_.allow_ingest_behind) &&
+             env_->GetBackgroundThreads(Env::Priority::BOTTOM) > 0) {
+    // Forward compactions involving last level to the bottom pool if it exists,
+    // such that compactions unlikely to contribute to write stalls can be
+    // delayed or deprioritized.
+    TEST_SYNC_POINT("DBImpl::BackgroundCompaction:ForwardToBottomPriPool");
+    CompactionArg* ca = new CompactionArg;
+    ca->db = this;
+    ca->compaction_pri_ = Env::Priority::BOTTOM;
+    ca->prepicked_compaction = new PrepickedCompaction;
+    ca->prepicked_compaction->compaction = c.release();
+    ca->prepicked_compaction->manual_compaction_state = nullptr;
+    // Transfer requested token, so it doesn't need to do it again.
+    ca->prepicked_compaction->task_token = std::move(task_token);
+    ++bg_bottom_compaction_scheduled_;
+    env_->Schedule(&DBImpl::BGWorkBottomCompaction, ca, Env::Priority::BOTTOM,
+                   this, &DBImpl::UnscheduleCompactionCallback);
+  } else {
+    TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:BeforeCompaction",
+                             c->column_family_data());
+    int output_level __attribute__((__unused__));
+    output_level = c->output_level();
+    TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:NonTrivial",
+                             &output_level);
+    std::vector<SequenceNumber> snapshot_seqs;
+    SequenceNumber earliest_write_conflict_snapshot;
+    SnapshotChecker* snapshot_checker;
+    GetSnapshotContext(job_context, &snapshot_seqs,
+                       &earliest_write_conflict_snapshot, &snapshot_checker);
+    assert(is_snapshot_supported_ || snapshots_.empty());
+
+    CompactionJob compaction_job(
+        job_context->job_id, c.get(), immutable_db_options_,
+        mutable_db_options_, file_options_for_compaction_, versions_.get(),
+        &shutting_down_, log_buffer, directories_.GetDbDir(),
+        GetDataDir(c->column_family_data(), c->output_path_id()),
+        GetDataDir(c->column_family_data(), 0), stats_, &mutex_,
+        &error_handler_, snapshot_seqs, earliest_write_conflict_snapshot,
+        snapshot_checker, job_context, table_cache_, &event_logger_,
+        c->mutable_cf_options()->paranoid_file_checks,
+        c->mutable_cf_options()->report_bg_io_stats, dbname_,
+        &compaction_job_stats, thread_pri, io_tracer_,
+        is_manual ? manual_compaction->canceled
+                  : kManualCompactionCanceledFalse_,
+        db_id_, db_session_id_, c->column_family_data()->GetFullHistoryTsLow(),
+        c->trim_ts(), &blob_callback_, &bg_compaction_scheduled_,
+        &bg_bottom_compaction_scheduled_);
+    compaction_job.Prepare();
+
+    NotifyOnCompactionBegin(c->column_family_data(), c.get(), status,
+                            compaction_job_stats, job_context->job_id);
+    mutex_.Unlock();
+    TEST_SYNC_POINT_CALLBACK(
+        "DBImpl::BackgroundCompaction:NonTrivial:BeforeRun", nullptr);
+    // Should handle error?
+    compaction_job.Run().PermitUncheckedError();
+    TEST_SYNC_POINT("DBImpl::BackgroundCompaction:NonTrivial:AfterRun");
+    mutex_.Lock();
+
+    status = compaction_job.Install(*c->mutable_cf_options());
+    io_s = compaction_job.io_status();
+    if (status.ok()) {
+      InstallSuperVersionAndScheduleWork(c->column_family_data(),
+                                         &job_context->superversion_contexts[0],
+                                         *c->mutable_cf_options());
+    }
+    *made_progress = true;
+    TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:AfterCompaction",
+                             c->column_family_data());
+  }
+
+  if (status.ok() && !io_s.ok()) {
+    status = io_s;
+  } else {
+    io_s.PermitUncheckedError();
+  }
+
+  if (c != nullptr) {
+    c->ReleaseCompactionFiles(status);
+    *made_progress = true;
+
+    // Need to make sure SstFileManager does its bookkeeping
+    auto sfm = static_cast<SstFileManagerImpl*>(
+        immutable_db_options_.sst_file_manager.get());
+    if (sfm && sfm_reserved_compact_space) {
+      sfm->OnCompactionCompletion(c.get());
+    }
+
+    NotifyOnCompactionCompleted(c->column_family_data(), c.get(), status,
+                                compaction_job_stats, job_context->job_id);
+  }
+
+  if (status.ok() || status.IsCompactionTooLarge() ||
+      status.IsManualCompactionPaused()) {
+    // Done
+  } else if (status.IsColumnFamilyDropped() || status.IsShutdownInProgress()) {
+    // Ignore compaction errors found during shutting down
+  } else {
+    ROCKS_LOG_WARN(immutable_db_options_.info_log, "Compaction error: %s",
+                   status.ToString().c_str());
+    if (!io_s.ok()) {
+      // Error while writing to MANIFEST.
+      // In fact, versions_->io_status() can also be the result of renaming
+      // CURRENT file. With current code, it's just difficult to tell. So just
+      // be pessimistic and try write to a new MANIFEST.
+      // TODO: distinguish between MANIFEST write and CURRENT renaming
+      auto err_reason = versions_->io_status().ok()
+                            ? BackgroundErrorReason::kCompaction
+                            : BackgroundErrorReason::kManifestWrite;
+      error_handler_.SetBGError(io_s, err_reason);
+    } else {
+      error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction);
+    }
+    if (c != nullptr && !is_manual && !error_handler_.IsBGWorkStopped()) {
+      // Put this cfd back in the compaction queue so we can retry after some
+      // time
+      auto cfd = c->column_family_data();
+      assert(cfd != nullptr);
+      // Since this compaction failed, we need to recompute the score so it
+      // takes the original input files into account
+      c->column_family_data()
+          ->current()
+          ->storage_info()
+          ->ComputeCompactionScore(*(c->immutable_options()),
+                                   *(c->mutable_cf_options()));
+      if (!cfd->queued_for_compaction()) {
+        AddToCompactionQueue(cfd);
+        ++unscheduled_compactions_;
+      }
+    }
+  }
+  // this will unref its input_version and column_family_data
+  c.reset();
+
+  if (is_manual) {
+    ManualCompactionState* m = manual_compaction;
+    if (!status.ok()) {
+      m->status = status;
+      m->done = true;
+    }
+    // For universal compaction:
+    //   Because universal compaction always happens at level 0, so one
+    //   compaction will pick up all overlapped files. No files will be
+    //   filtered out due to size limit and left for a successive compaction.
+    //   So we can safely conclude the current compaction.
+    //
+    //   Also note that, if we don't stop here, then the current compaction
+    //   writes a new file back to level 0, which will be used in successive
+    //   compaction. Hence the manual compaction will never finish.
+    //
+    // Stop the compaction if manual_end points to nullptr -- this means
+    // that we compacted the whole range. manual_end should always point
+    // to nullptr in case of universal compaction
+    if (m->manual_end == nullptr) {
+      m->done = true;
+    }
+    if (!m->done) {
+      // We only compacted part of the requested range.  Update *m
+      // to the range that is left to be compacted.
+      // Universal and FIFO compactions should always compact the whole range
+      assert(m->cfd->ioptions()->compaction_style !=
+                 kCompactionStyleUniversal ||
+             m->cfd->ioptions()->num_levels > 1);
+      assert(m->cfd->ioptions()->compaction_style != kCompactionStyleFIFO);
+      m->tmp_storage = *m->manual_end;
+      m->begin = &m->tmp_storage;
+      m->incomplete = true;
+    }
+    m->in_progress = false;  // not being processed anymore
+  }
+  TEST_SYNC_POINT("DBImpl::BackgroundCompaction:Finish");
+  return status;
+}
+
+bool DBImpl::HasPendingManualCompaction() {
+  return (!manual_compaction_dequeue_.empty());
+}
+
+void DBImpl::AddManualCompaction(DBImpl::ManualCompactionState* m) {
+  assert(manual_compaction_paused_ == 0);
+  manual_compaction_dequeue_.push_back(m);
+}
+
+void DBImpl::RemoveManualCompaction(DBImpl::ManualCompactionState* m) {
+  // Remove from queue
+  std::deque<ManualCompactionState*>::iterator it =
+      manual_compaction_dequeue_.begin();
+  while (it != manual_compaction_dequeue_.end()) {
+    if (m == (*it)) {
+      it = manual_compaction_dequeue_.erase(it);
+      return;
+    }
+    ++it;
+  }
+  assert(false);
+  return;
+}
+
+bool DBImpl::ShouldntRunManualCompaction(ManualCompactionState* m) {
+  if (m->exclusive) {
+    return (bg_bottom_compaction_scheduled_ > 0 ||
+            bg_compaction_scheduled_ > 0);
+  }
+  std::deque<ManualCompactionState*>::iterator it =
+      manual_compaction_dequeue_.begin();
+  bool seen = false;
+  while (it != manual_compaction_dequeue_.end()) {
+    if (m == (*it)) {
+      ++it;
+      seen = true;
+      continue;
+    } else if (MCOverlap(m, (*it)) && (!seen && !(*it)->in_progress)) {
+      // Consider the other manual compaction *it, conflicts if:
+      // overlaps with m
+      // and (*it) is ahead in the queue and is not yet in progress
+      return true;
+    }
+    ++it;
+  }
+  return false;
+}
+
+bool DBImpl::HaveManualCompaction(ColumnFamilyData* cfd) {
+  // Remove from priority queue
+  std::deque<ManualCompactionState*>::iterator it =
+      manual_compaction_dequeue_.begin();
+  while (it != manual_compaction_dequeue_.end()) {
+    if ((*it)->exclusive) {
+      return true;
+    }
+    if ((cfd == (*it)->cfd) && (!((*it)->in_progress || (*it)->done))) {
+      // Allow automatic compaction if manual compaction is
+      // in progress
+      return true;
+    }
+    ++it;
+  }
+  return false;
+}
+
+bool DBImpl::HasExclusiveManualCompaction() {
+  // Remove from priority queue
+  std::deque<ManualCompactionState*>::iterator it =
+      manual_compaction_dequeue_.begin();
+  while (it != manual_compaction_dequeue_.end()) {
+    if ((*it)->exclusive) {
+      return true;
+    }
+    ++it;
+  }
+  return false;
+}
+
+bool DBImpl::MCOverlap(ManualCompactionState* m, ManualCompactionState* m1) {
+  if ((m->exclusive) || (m1->exclusive)) {
+    return true;
+  }
+  if (m->cfd != m1->cfd) {
+    return false;
+  }
+  return false;
+}
+
+void DBImpl::BuildCompactionJobInfo(
+    const ColumnFamilyData* cfd, Compaction* c, const Status& st,
+    const CompactionJobStats& compaction_job_stats, const int job_id,
+    const Version* current, CompactionJobInfo* compaction_job_info) const {
+  assert(compaction_job_info != nullptr);
+  compaction_job_info->cf_id = cfd->GetID();
+  compaction_job_info->cf_name = cfd->GetName();
+  compaction_job_info->status = st;
+  compaction_job_info->thread_id = env_->GetThreadID();
+  compaction_job_info->job_id = job_id;
+  compaction_job_info->base_input_level = c->start_level();
+  compaction_job_info->output_level = c->output_level();
+  compaction_job_info->stats = compaction_job_stats;
+  compaction_job_info->table_properties = c->GetOutputTableProperties();
+  compaction_job_info->compaction_reason = c->compaction_reason();
+  compaction_job_info->compression = c->output_compression();
+
+  const ReadOptions read_options(Env::IOActivity::kCompaction);
+  for (size_t i = 0; i < c->num_input_levels(); ++i) {
+    for (const auto fmd : *c->inputs(i)) {
+      const FileDescriptor& desc = fmd->fd;
+      const uint64_t file_number = desc.GetNumber();
+      auto fn = TableFileName(c->immutable_options()->cf_paths, file_number,
+                              desc.GetPathId());
+      compaction_job_info->input_files.push_back(fn);
+      compaction_job_info->input_file_infos.push_back(CompactionFileInfo{
+          static_cast<int>(i), file_number, fmd->oldest_blob_file_number});
+      if (compaction_job_info->table_properties.count(fn) == 0) {
+        std::shared_ptr<const TableProperties> tp;
+        auto s = current->GetTableProperties(read_options, &tp, fmd, &fn);
+        if (s.ok()) {
+          compaction_job_info->table_properties[fn] = tp;
+        }
+      }
+    }
+  }
+  for (const auto& newf : c->edit()->GetNewFiles()) {
+    const FileMetaData& meta = newf.second;
+    const FileDescriptor& desc = meta.fd;
+    const uint64_t file_number = desc.GetNumber();
+    compaction_job_info->output_files.push_back(TableFileName(
+        c->immutable_options()->cf_paths, file_number, desc.GetPathId()));
+    compaction_job_info->output_file_infos.push_back(CompactionFileInfo{
+        newf.first, file_number, meta.oldest_blob_file_number});
+  }
+  compaction_job_info->blob_compression_type =
+      c->mutable_cf_options()->blob_compression_type;
+
+  // Update BlobFilesInfo.
+  for (const auto& blob_file : c->edit()->GetBlobFileAdditions()) {
+    BlobFileAdditionInfo blob_file_addition_info(
+        BlobFileName(c->immutable_options()->cf_paths.front().path,
+                     blob_file.GetBlobFileNumber()) /*blob_file_path*/,
+        blob_file.GetBlobFileNumber(), blob_file.GetTotalBlobCount(),
+        blob_file.GetTotalBlobBytes());
+    compaction_job_info->blob_file_addition_infos.emplace_back(
+        std::move(blob_file_addition_info));
+  }
+
+  // Update BlobFilesGarbageInfo.
+  for (const auto& blob_file : c->edit()->GetBlobFileGarbages()) {
+    BlobFileGarbageInfo blob_file_garbage_info(
+        BlobFileName(c->immutable_options()->cf_paths.front().path,
+                     blob_file.GetBlobFileNumber()) /*blob_file_path*/,
+        blob_file.GetBlobFileNumber(), blob_file.GetGarbageBlobCount(),
+        blob_file.GetGarbageBlobBytes());
+    compaction_job_info->blob_file_garbage_infos.emplace_back(
+        std::move(blob_file_garbage_info));
+  }
+}
+
+// SuperVersionContext gets created and destructed outside of the lock --
+// we use this conveniently to:
+// * malloc one SuperVersion() outside of the lock -- new_superversion
+// * delete SuperVersion()s outside of the lock -- superversions_to_free
+//
+// However, if InstallSuperVersionAndScheduleWork() gets called twice with the
+// same sv_context, we can't reuse the SuperVersion() that got
+// malloced because
+// first call already used it. In that rare case, we take a hit and create a
+// new SuperVersion() inside of the mutex. We do similar thing
+// for superversion_to_free
+
+void DBImpl::InstallSuperVersionAndScheduleWork(
+    ColumnFamilyData* cfd, SuperVersionContext* sv_context,
+    const MutableCFOptions& mutable_cf_options) {
+  mutex_.AssertHeld();
+
+  // Update max_total_in_memory_state_
+  size_t old_memtable_size = 0;
+  auto* old_sv = cfd->GetSuperVersion();
+  if (old_sv) {
+    old_memtable_size = old_sv->mutable_cf_options.write_buffer_size *
+                        old_sv->mutable_cf_options.max_write_buffer_number;
+  }
+
+  // this branch is unlikely to step in
+  if (UNLIKELY(sv_context->new_superversion == nullptr)) {
+    sv_context->NewSuperVersion();
+  }
+  cfd->InstallSuperVersion(sv_context, mutable_cf_options);
+
+  // There may be a small data race here. The snapshot tricking bottommost
+  // compaction may already be released here. But assuming there will always be
+  // newer snapshot created and released frequently, the compaction will be
+  // triggered soon anyway.
+  bottommost_files_mark_threshold_ = kMaxSequenceNumber;
+  for (auto* my_cfd : *versions_->GetColumnFamilySet()) {
+    if (!my_cfd->ioptions()->allow_ingest_behind) {
+      bottommost_files_mark_threshold_ = std::min(
+          bottommost_files_mark_threshold_,
+          my_cfd->current()->storage_info()->bottommost_files_mark_threshold());
+    }
+  }
+
+  // Whenever we install new SuperVersion, we might need to issue new flushes or
+  // compactions.
+  SchedulePendingCompaction(cfd);
+  MaybeScheduleFlushOrCompaction();
+
+  // Update max_total_in_memory_state_
+  max_total_in_memory_state_ = max_total_in_memory_state_ - old_memtable_size +
+                               mutable_cf_options.write_buffer_size *
+                                   mutable_cf_options.max_write_buffer_number;
+}
+
+// ShouldPurge is called by FindObsoleteFiles when doing a full scan,
+// and db mutex (mutex_) should already be held.
+// Actually, the current implementation of FindObsoleteFiles with
+// full_scan=true can issue I/O requests to obtain list of files in
+// directories, e.g. env_->getChildren while holding db mutex.
+bool DBImpl::ShouldPurge(uint64_t file_number) const {
+  return files_grabbed_for_purge_.find(file_number) ==
+             files_grabbed_for_purge_.end() &&
+         purge_files_.find(file_number) == purge_files_.end();
+}
+
+// MarkAsGrabbedForPurge is called by FindObsoleteFiles, and db mutex
+// (mutex_) should already be held.
+void DBImpl::MarkAsGrabbedForPurge(uint64_t file_number) {
+  files_grabbed_for_purge_.insert(file_number);
+}
+
+void DBImpl::SetSnapshotChecker(SnapshotChecker* snapshot_checker) {
+  InstrumentedMutexLock l(&mutex_);
+  // snapshot_checker_ should only set once. If we need to set it multiple
+  // times, we need to make sure the old one is not deleted while it is still
+  // using by a compaction job.
+  assert(!snapshot_checker_);
+  snapshot_checker_.reset(snapshot_checker);
+}
+
+void DBImpl::GetSnapshotContext(
+    JobContext* job_context, std::vector<SequenceNumber>* snapshot_seqs,
+    SequenceNumber* earliest_write_conflict_snapshot,
+    SnapshotChecker** snapshot_checker_ptr) {
+  mutex_.AssertHeld();
+  assert(job_context != nullptr);
+  assert(snapshot_seqs != nullptr);
+  assert(earliest_write_conflict_snapshot != nullptr);
+  assert(snapshot_checker_ptr != nullptr);
+
+  *snapshot_checker_ptr = snapshot_checker_.get();
+  if (use_custom_gc_ && *snapshot_checker_ptr == nullptr) {
+    *snapshot_checker_ptr = DisableGCSnapshotChecker::Instance();
+  }
+  if (*snapshot_checker_ptr != nullptr) {
+    // If snapshot_checker is used, that means the flush/compaction may
+    // contain values not visible to snapshot taken after
+    // flush/compaction job starts. Take a snapshot and it will appear
+    // in snapshot_seqs and force compaction iterator to consider such
+    // snapshots.
+    const Snapshot* job_snapshot =
+        GetSnapshotImpl(false /*write_conflict_boundary*/, false /*lock*/);
+    job_context->job_snapshot.reset(new ManagedSnapshot(this, job_snapshot));
+  }
+  *snapshot_seqs = snapshots_.GetAll(earliest_write_conflict_snapshot);
+}
+
+Status DBImpl::WaitForCompact(bool abort_on_pause) {
+  InstrumentedMutexLock l(&mutex_);
+  for (;;) {
+    if (shutting_down_.load(std::memory_order_acquire)) {
+      return Status::ShutdownInProgress();
+    }
+    if (bg_work_paused_ && abort_on_pause) {
+      return Status::Aborted();
+    }
+    if ((bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ ||
+         bg_flush_scheduled_ || unscheduled_compactions_ ||
+         unscheduled_flushes_) &&
+        (error_handler_.GetBGError().ok())) {
+      bg_cv_.Wait();
+    } else {
+      return error_handler_.GetBGError();
+    }
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_debug.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_debug.cc
new file mode 100644
index 0000000000..9856caced3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_debug.cc
@@ -0,0 +1,314 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef NDEBUG
+
+#include "db/column_family.h"
+#include "db/db_impl/db_impl.h"
+#include "db/error_handler.h"
+#include "db/periodic_task_scheduler.h"
+#include "monitoring/thread_status_updater.h"
+#include "util/cast_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+uint64_t DBImpl::TEST_GetLevel0TotalSize() {
+  InstrumentedMutexLock l(&mutex_);
+  return default_cf_handle_->cfd()->current()->storage_info()->NumLevelBytes(0);
+}
+
+Status DBImpl::TEST_SwitchWAL() {
+  WriteContext write_context;
+  InstrumentedMutexLock l(&mutex_);
+  void* writer = TEST_BeginWrite();
+  auto s = SwitchWAL(&write_context);
+  TEST_EndWrite(writer);
+  return s;
+}
+
+uint64_t DBImpl::TEST_MaxNextLevelOverlappingBytes(
+    ColumnFamilyHandle* column_family) {
+  ColumnFamilyData* cfd;
+  if (column_family == nullptr) {
+    cfd = default_cf_handle_->cfd();
+  } else {
+    auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+    cfd = cfh->cfd();
+  }
+  InstrumentedMutexLock l(&mutex_);
+  return cfd->current()->storage_info()->MaxNextLevelOverlappingBytes();
+}
+
+void DBImpl::TEST_GetFilesMetaData(
+    ColumnFamilyHandle* column_family,
+    std::vector<std::vector<FileMetaData>>* metadata,
+    std::vector<std::shared_ptr<BlobFileMetaData>>* blob_metadata) {
+  assert(metadata);
+
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  assert(cfh);
+
+  auto cfd = cfh->cfd();
+  assert(cfd);
+
+  InstrumentedMutexLock l(&mutex_);
+
+  const auto* current = cfd->current();
+  assert(current);
+
+  const auto* vstorage = current->storage_info();
+  assert(vstorage);
+
+  metadata->resize(NumberLevels());
+
+  for (int level = 0; level < NumberLevels(); ++level) {
+    const std::vector<FileMetaData*>& files = vstorage->LevelFiles(level);
+
+    (*metadata)[level].clear();
+    (*metadata)[level].reserve(files.size());
+
+    for (const auto& f : files) {
+      (*metadata)[level].push_back(*f);
+    }
+  }
+
+  if (blob_metadata) {
+    *blob_metadata = vstorage->GetBlobFiles();
+  }
+}
+
+uint64_t DBImpl::TEST_Current_Manifest_FileNo() {
+  return versions_->manifest_file_number();
+}
+
+uint64_t DBImpl::TEST_Current_Next_FileNo() {
+  return versions_->current_next_file_number();
+}
+
+Status DBImpl::TEST_CompactRange(int level, const Slice* begin,
+                                 const Slice* end,
+                                 ColumnFamilyHandle* column_family,
+                                 bool disallow_trivial_move) {
+  ColumnFamilyData* cfd;
+  if (column_family == nullptr) {
+    cfd = default_cf_handle_->cfd();
+  } else {
+    auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+    cfd = cfh->cfd();
+  }
+  int output_level =
+      (cfd->ioptions()->compaction_style == kCompactionStyleUniversal ||
+       cfd->ioptions()->compaction_style == kCompactionStyleFIFO)
+          ? level
+          : level + 1;
+  return RunManualCompaction(
+      cfd, level, output_level, CompactRangeOptions(), begin, end, true,
+      disallow_trivial_move,
+      std::numeric_limits<uint64_t>::max() /*max_file_num_to_ignore*/,
+      "" /*trim_ts*/);
+}
+
+Status DBImpl::TEST_SwitchMemtable(ColumnFamilyData* cfd) {
+  WriteContext write_context;
+  InstrumentedMutexLock l(&mutex_);
+  if (cfd == nullptr) {
+    cfd = default_cf_handle_->cfd();
+  }
+
+  Status s;
+  void* writer = TEST_BeginWrite();
+  if (two_write_queues_) {
+    WriteThread::Writer nonmem_w;
+    nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
+    s = SwitchMemtable(cfd, &write_context);
+    nonmem_write_thread_.ExitUnbatched(&nonmem_w);
+  } else {
+    s = SwitchMemtable(cfd, &write_context);
+  }
+  TEST_EndWrite(writer);
+  return s;
+}
+
+Status DBImpl::TEST_FlushMemTable(bool wait, bool allow_write_stall,
+                                  ColumnFamilyHandle* cfh) {
+  FlushOptions fo;
+  fo.wait = wait;
+  fo.allow_write_stall = allow_write_stall;
+  ColumnFamilyData* cfd;
+  if (cfh == nullptr) {
+    cfd = default_cf_handle_->cfd();
+  } else {
+    auto cfhi = static_cast_with_check<ColumnFamilyHandleImpl>(cfh);
+    cfd = cfhi->cfd();
+  }
+  return FlushMemTable(cfd, fo, FlushReason::kTest);
+}
+
+Status DBImpl::TEST_FlushMemTable(ColumnFamilyData* cfd,
+                                  const FlushOptions& flush_opts) {
+  return FlushMemTable(cfd, flush_opts, FlushReason::kTest);
+}
+
+Status DBImpl::TEST_AtomicFlushMemTables(
+    const autovector<ColumnFamilyData*>& provided_candidate_cfds,
+    const FlushOptions& flush_opts) {
+  return AtomicFlushMemTables(flush_opts, FlushReason::kTest,
+                              provided_candidate_cfds);
+}
+
+Status DBImpl::TEST_WaitForBackgroundWork() {
+  InstrumentedMutexLock l(&mutex_);
+  WaitForBackgroundWork();
+  return error_handler_.GetBGError();
+}
+
+Status DBImpl::TEST_WaitForFlushMemTable(ColumnFamilyHandle* column_family) {
+  ColumnFamilyData* cfd;
+  if (column_family == nullptr) {
+    cfd = default_cf_handle_->cfd();
+  } else {
+    auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+    cfd = cfh->cfd();
+  }
+  return WaitForFlushMemTable(cfd, nullptr, false);
+}
+
+Status DBImpl::TEST_WaitForCompact(bool abort_on_pause) {
+  // Wait until the compaction completes
+  return WaitForCompact(abort_on_pause);
+}
+
+Status DBImpl::TEST_WaitForPurge() {
+  InstrumentedMutexLock l(&mutex_);
+  while (bg_purge_scheduled_ && error_handler_.GetBGError().ok()) {
+    bg_cv_.Wait();
+  }
+  return error_handler_.GetBGError();
+}
+
+Status DBImpl::TEST_GetBGError() {
+  InstrumentedMutexLock l(&mutex_);
+  return error_handler_.GetBGError();
+}
+
+void DBImpl::TEST_LockMutex() { mutex_.Lock(); }
+
+void DBImpl::TEST_UnlockMutex() { mutex_.Unlock(); }
+
+void DBImpl::TEST_SignalAllBgCv() { bg_cv_.SignalAll(); }
+
+void* DBImpl::TEST_BeginWrite() {
+  auto w = new WriteThread::Writer();
+  write_thread_.EnterUnbatched(w, &mutex_);
+  return reinterpret_cast<void*>(w);
+}
+
+void DBImpl::TEST_EndWrite(void* w) {
+  auto writer = reinterpret_cast<WriteThread::Writer*>(w);
+  write_thread_.ExitUnbatched(writer);
+  delete writer;
+}
+
+size_t DBImpl::TEST_LogsToFreeSize() {
+  InstrumentedMutexLock l(&log_write_mutex_);
+  return logs_to_free_.size();
+}
+
+uint64_t DBImpl::TEST_LogfileNumber() {
+  InstrumentedMutexLock l(&mutex_);
+  return logfile_number_;
+}
+
+Status DBImpl::TEST_GetAllImmutableCFOptions(
+    std::unordered_map<std::string, const ImmutableCFOptions*>* iopts_map) {
+  std::vector<std::string> cf_names;
+  std::vector<const ImmutableCFOptions*> iopts;
+  {
+    InstrumentedMutexLock l(&mutex_);
+    for (auto cfd : *versions_->GetColumnFamilySet()) {
+      cf_names.push_back(cfd->GetName());
+      iopts.push_back(cfd->ioptions());
+    }
+  }
+  iopts_map->clear();
+  for (size_t i = 0; i < cf_names.size(); ++i) {
+    iopts_map->insert({cf_names[i], iopts[i]});
+  }
+
+  return Status::OK();
+}
+
+uint64_t DBImpl::TEST_FindMinLogContainingOutstandingPrep() {
+  return logs_with_prep_tracker_.FindMinLogContainingOutstandingPrep();
+}
+
+size_t DBImpl::TEST_PreparedSectionCompletedSize() {
+  return logs_with_prep_tracker_.TEST_PreparedSectionCompletedSize();
+}
+
+size_t DBImpl::TEST_LogsWithPrepSize() {
+  return logs_with_prep_tracker_.TEST_LogsWithPrepSize();
+}
+
+uint64_t DBImpl::TEST_FindMinPrepLogReferencedByMemTable() {
+  autovector<MemTable*> empty_list;
+  return FindMinPrepLogReferencedByMemTable(versions_.get(), empty_list);
+}
+
+Status DBImpl::TEST_GetLatestMutableCFOptions(
+    ColumnFamilyHandle* column_family, MutableCFOptions* mutable_cf_options) {
+  InstrumentedMutexLock l(&mutex_);
+
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  *mutable_cf_options = *cfh->cfd()->GetLatestMutableCFOptions();
+  return Status::OK();
+}
+
+int DBImpl::TEST_BGCompactionsAllowed() const {
+  InstrumentedMutexLock l(&mutex_);
+  return GetBGJobLimits().max_compactions;
+}
+
+int DBImpl::TEST_BGFlushesAllowed() const {
+  InstrumentedMutexLock l(&mutex_);
+  return GetBGJobLimits().max_flushes;
+}
+
+SequenceNumber DBImpl::TEST_GetLastVisibleSequence() const {
+  if (last_seq_same_as_publish_seq_) {
+    return versions_->LastSequence();
+  } else {
+    return versions_->LastAllocatedSequence();
+  }
+}
+
+size_t DBImpl::TEST_GetWalPreallocateBlockSize(
+    uint64_t write_buffer_size) const {
+  InstrumentedMutexLock l(&mutex_);
+  return GetWalPreallocateBlockSize(write_buffer_size);
+}
+
+void DBImpl::TEST_WaitForPeriodicTaskRun(std::function<void()> callback) const {
+  periodic_task_scheduler_.TEST_WaitForRun(callback);
+}
+
+const PeriodicTaskScheduler& DBImpl::TEST_GetPeriodicTaskScheduler() const {
+  return periodic_task_scheduler_;
+}
+
+SeqnoToTimeMapping DBImpl::TEST_GetSeqnoToTimeMapping() const {
+  InstrumentedMutexLock l(&mutex_);
+  return seqno_time_mapping_;
+}
+
+
+size_t DBImpl::TEST_EstimateInMemoryStatsHistorySize() const {
+  return EstimateInMemoryStatsHistorySize();
+}
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // NDEBUG
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_experimental.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_experimental.cc
new file mode 100644
index 0000000000..8d958ffc17
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_experimental.cc
@@ -0,0 +1,159 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <cinttypes>
+#include <vector>
+
+#include "db/column_family.h"
+#include "db/db_impl/db_impl.h"
+#include "db/job_context.h"
+#include "db/version_set.h"
+#include "logging/logging.h"
+#include "rocksdb/status.h"
+#include "util/cast_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status DBImpl::SuggestCompactRange(ColumnFamilyHandle* column_family,
+                                   const Slice* begin, const Slice* end) {
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  auto cfd = cfh->cfd();
+  InternalKey start_key, end_key;
+  if (begin != nullptr) {
+    start_key.SetMinPossibleForUserKey(*begin);
+  }
+  if (end != nullptr) {
+    end_key.SetMaxPossibleForUserKey(*end);
+  }
+  {
+    InstrumentedMutexLock l(&mutex_);
+    auto vstorage = cfd->current()->storage_info();
+    for (int level = 0; level < vstorage->num_non_empty_levels() - 1; ++level) {
+      std::vector<FileMetaData*> inputs;
+      vstorage->GetOverlappingInputs(
+          level, begin == nullptr ? nullptr : &start_key,
+          end == nullptr ? nullptr : &end_key, &inputs);
+      for (auto f : inputs) {
+        f->marked_for_compaction = true;
+      }
+    }
+    // Since we have some more files to compact, we should also recompute
+    // compaction score
+    vstorage->ComputeCompactionScore(*cfd->ioptions(),
+                                     *cfd->GetLatestMutableCFOptions());
+    SchedulePendingCompaction(cfd);
+    MaybeScheduleFlushOrCompaction();
+  }
+  return Status::OK();
+}
+
+Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) {
+  assert(column_family);
+
+  if (target_level < 1) {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "PromoteL0 FAILED. Invalid target level %d\n", target_level);
+    return Status::InvalidArgument("Invalid target level");
+  }
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  Status status;
+  VersionEdit edit;
+  JobContext job_context(next_job_id_.fetch_add(1), true);
+  {
+    InstrumentedMutexLock l(&mutex_);
+    auto* cfd = static_cast<ColumnFamilyHandleImpl*>(column_family)->cfd();
+    const auto* vstorage = cfd->current()->storage_info();
+
+    if (target_level >= vstorage->num_levels()) {
+      ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                     "PromoteL0 FAILED. Target level %d does not exist\n",
+                     target_level);
+      job_context.Clean();
+      status = Status::InvalidArgument("Target level does not exist");
+      return status;
+    }
+
+    // Sort L0 files by range.
+    const InternalKeyComparator* icmp = &cfd->internal_comparator();
+    auto l0_files = vstorage->LevelFiles(0);
+    std::sort(l0_files.begin(), l0_files.end(),
+              [icmp](FileMetaData* f1, FileMetaData* f2) {
+                return icmp->Compare(f1->largest, f2->largest) < 0;
+              });
+
+    // Check that no L0 file is being compacted and that they have
+    // non-overlapping ranges.
+    for (size_t i = 0; i < l0_files.size(); ++i) {
+      auto f = l0_files[i];
+      if (f->being_compacted) {
+        ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                       "PromoteL0 FAILED. File %" PRIu64 " being compacted\n",
+                       f->fd.GetNumber());
+        job_context.Clean();
+        status =
+            Status::InvalidArgument("PromoteL0 called during L0 compaction");
+        return status;
+      }
+
+      if (i == 0) continue;
+      auto prev_f = l0_files[i - 1];
+      if (icmp->Compare(prev_f->largest, f->smallest) >= 0) {
+        ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                       "PromoteL0 FAILED. Files %" PRIu64 " and %" PRIu64
+                       " have overlapping ranges\n",
+                       prev_f->fd.GetNumber(), f->fd.GetNumber());
+        job_context.Clean();
+        status = Status::InvalidArgument("L0 has overlapping files");
+        return status;
+      }
+    }
+
+    // Check that all levels up to target_level are empty.
+    for (int level = 1; level <= target_level; ++level) {
+      if (vstorage->NumLevelFiles(level) > 0) {
+        ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                       "PromoteL0 FAILED. Level %d not empty\n", level);
+        job_context.Clean();
+        status = Status::InvalidArgument(
+            "All levels up to target_level "
+            "must be empty");
+        return status;
+      }
+    }
+
+    edit.SetColumnFamily(cfd->GetID());
+    for (const auto& f : l0_files) {
+      edit.DeleteFile(0, f->fd.GetNumber());
+      edit.AddFile(target_level, f->fd.GetNumber(), f->fd.GetPathId(),
+                   f->fd.GetFileSize(), f->smallest, f->largest,
+                   f->fd.smallest_seqno, f->fd.largest_seqno,
+                   f->marked_for_compaction, f->temperature,
+                   f->oldest_blob_file_number, f->oldest_ancester_time,
+                   f->file_creation_time, f->epoch_number, f->file_checksum,
+                   f->file_checksum_func_name, f->unique_id,
+                   f->compensated_range_deletion_size, f->tail_size);
+    }
+
+    status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
+                                    read_options, &edit, &mutex_,
+                                    directories_.GetDbDir());
+    if (status.ok()) {
+      InstallSuperVersionAndScheduleWork(cfd,
+                                         &job_context.superversion_contexts[0],
+                                         *cfd->GetLatestMutableCFOptions());
+    }
+  }  // lock released here
+  LogFlush(immutable_db_options_.info_log);
+  job_context.Clean();
+
+  return status;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_files.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_files.cc
new file mode 100644
index 0000000000..0998c94551
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_files.cc
@@ -0,0 +1,1014 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include <cinttypes>
+#include <set>
+#include <unordered_set>
+
+#include "db/db_impl/db_impl.h"
+#include "db/event_helpers.h"
+#include "db/memtable_list.h"
+#include "file/file_util.h"
+#include "file/filename.h"
+#include "file/sst_file_manager_impl.h"
+#include "logging/logging.h"
+#include "port/port.h"
+#include "util/autovector.h"
+#include "util/defer.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+uint64_t DBImpl::MinLogNumberToKeep() {
+  return versions_->min_log_number_to_keep();
+}
+
+uint64_t DBImpl::MinObsoleteSstNumberToKeep() {
+  mutex_.AssertHeld();
+  if (!pending_outputs_.empty()) {
+    return *pending_outputs_.begin();
+  }
+  return std::numeric_limits<uint64_t>::max();
+}
+
+Status DBImpl::DisableFileDeletions() {
+  Status s;
+  int my_disable_delete_obsolete_files;
+  {
+    InstrumentedMutexLock l(&mutex_);
+    s = DisableFileDeletionsWithLock();
+    my_disable_delete_obsolete_files = disable_delete_obsolete_files_;
+  }
+  if (my_disable_delete_obsolete_files == 1) {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log, "File Deletions Disabled");
+  } else {
+    ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                   "File Deletions Disabled, but already disabled. Counter: %d",
+                   my_disable_delete_obsolete_files);
+  }
+  return s;
+}
+
+// FIXME: can be inconsistent with DisableFileDeletions in cases like
+// DBImplReadOnly
+Status DBImpl::DisableFileDeletionsWithLock() {
+  mutex_.AssertHeld();
+  ++disable_delete_obsolete_files_;
+  return Status::OK();
+}
+
+Status DBImpl::EnableFileDeletions(bool force) {
+  // Job id == 0 means that this is not our background process, but rather
+  // user thread
+  JobContext job_context(0);
+  int saved_counter;  // initialize on all paths
+  {
+    InstrumentedMutexLock l(&mutex_);
+    if (force) {
+      // if force, we need to enable file deletions right away
+      disable_delete_obsolete_files_ = 0;
+    } else if (disable_delete_obsolete_files_ > 0) {
+      --disable_delete_obsolete_files_;
+    }
+    saved_counter = disable_delete_obsolete_files_;
+    if (saved_counter == 0) {
+      FindObsoleteFiles(&job_context, true);
+      bg_cv_.SignalAll();
+    }
+  }
+  if (saved_counter == 0) {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log, "File Deletions Enabled");
+    if (job_context.HaveSomethingToDelete()) {
+      PurgeObsoleteFiles(job_context);
+    }
+  } else {
+    ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                   "File Deletions Enable, but not really enabled. Counter: %d",
+                   saved_counter);
+  }
+  job_context.Clean();
+  LogFlush(immutable_db_options_.info_log);
+  return Status::OK();
+}
+
+bool DBImpl::IsFileDeletionsEnabled() const {
+  return 0 == disable_delete_obsolete_files_;
+}
+
+// * Returns the list of live files in 'sst_live' and 'blob_live'.
+// If it's doing full scan:
+// * Returns the list of all files in the filesystem in
+// 'full_scan_candidate_files'.
+// Otherwise, gets obsolete files from VersionSet.
+// no_full_scan = true -- never do the full scan using GetChildren()
+// force = false -- don't force the full scan, except every
+//  mutable_db_options_.delete_obsolete_files_period_micros
+// force = true -- force the full scan
+void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
+                               bool no_full_scan) {
+  mutex_.AssertHeld();
+
+  // if deletion is disabled, do nothing
+  if (disable_delete_obsolete_files_ > 0) {
+    return;
+  }
+
+  bool doing_the_full_scan = false;
+
+  // logic for figuring out if we're doing the full scan
+  if (no_full_scan) {
+    doing_the_full_scan = false;
+  } else if (force ||
+             mutable_db_options_.delete_obsolete_files_period_micros == 0) {
+    doing_the_full_scan = true;
+  } else {
+    const uint64_t now_micros = immutable_db_options_.clock->NowMicros();
+    if ((delete_obsolete_files_last_run_ +
+         mutable_db_options_.delete_obsolete_files_period_micros) <
+        now_micros) {
+      doing_the_full_scan = true;
+      delete_obsolete_files_last_run_ = now_micros;
+    }
+  }
+
+  // don't delete files that might be currently written to from compaction
+  // threads
+  // Since job_context->min_pending_output is set, until file scan finishes,
+  // mutex_ cannot be released. Otherwise, we might see no min_pending_output
+  // here but later find newer generated unfinalized files while scanning.
+  job_context->min_pending_output = MinObsoleteSstNumberToKeep();
+
+  // Get obsolete files.  This function will also update the list of
+  // pending files in VersionSet().
+  versions_->GetObsoleteFiles(
+      &job_context->sst_delete_files, &job_context->blob_delete_files,
+      &job_context->manifest_delete_files, job_context->min_pending_output);
+
+  // Mark the elements in job_context->sst_delete_files and
+  // job_context->blob_delete_files as "grabbed for purge" so that other threads
+  // calling FindObsoleteFiles with full_scan=true will not add these files to
+  // candidate list for purge.
+  for (const auto& sst_to_del : job_context->sst_delete_files) {
+    MarkAsGrabbedForPurge(sst_to_del.metadata->fd.GetNumber());
+  }
+
+  for (const auto& blob_file : job_context->blob_delete_files) {
+    MarkAsGrabbedForPurge(blob_file.GetBlobFileNumber());
+  }
+
+  // store the current filenum, lognum, etc
+  job_context->manifest_file_number = versions_->manifest_file_number();
+  job_context->pending_manifest_file_number =
+      versions_->pending_manifest_file_number();
+  job_context->log_number = MinLogNumberToKeep();
+  job_context->prev_log_number = versions_->prev_log_number();
+
+  if (doing_the_full_scan) {
+    versions_->AddLiveFiles(&job_context->sst_live, &job_context->blob_live);
+    InfoLogPrefix info_log_prefix(!immutable_db_options_.db_log_dir.empty(),
+                                  dbname_);
+    std::set<std::string> paths;
+    for (size_t path_id = 0; path_id < immutable_db_options_.db_paths.size();
+         path_id++) {
+      paths.insert(immutable_db_options_.db_paths[path_id].path);
+    }
+
+    // Note that if cf_paths is not specified in the ColumnFamilyOptions
+    // of a particular column family, we use db_paths as the cf_paths
+    // setting. Hence, there can be multiple duplicates of files from db_paths
+    // in the following code. The duplicate are removed while identifying
+    // unique files in PurgeObsoleteFiles.
+    for (auto cfd : *versions_->GetColumnFamilySet()) {
+      for (size_t path_id = 0; path_id < cfd->ioptions()->cf_paths.size();
+           path_id++) {
+        auto& path = cfd->ioptions()->cf_paths[path_id].path;
+
+        if (paths.find(path) == paths.end()) {
+          paths.insert(path);
+        }
+      }
+    }
+
+    IOOptions io_opts;
+    io_opts.do_not_recurse = true;
+    for (auto& path : paths) {
+      // set of all files in the directory. We'll exclude files that are still
+      // alive in the subsequent processings.
+      std::vector<std::string> files;
+      Status s = immutable_db_options_.fs->GetChildren(
+          path, io_opts, &files, /*IODebugContext*=*/nullptr);
+      s.PermitUncheckedError();  // TODO: What should we do on error?
+      for (const std::string& file : files) {
+        uint64_t number;
+        FileType type;
+        // 1. If we cannot parse the file name, we skip;
+        // 2. If the file with file_number equals number has already been
+        // grabbed for purge by another compaction job, or it has already been
+        // schedule for purge, we also skip it if we
+        // are doing full scan in order to avoid double deletion of the same
+        // file under race conditions. See
+        // https://github.com/facebook/rocksdb/issues/3573
+        if (!ParseFileName(file, &number, info_log_prefix.prefix, &type) ||
+            !ShouldPurge(number)) {
+          continue;
+        }
+
+        // TODO(icanadi) clean up this mess to avoid having one-off "/"
+        // prefixes
+        job_context->full_scan_candidate_files.emplace_back("/" + file, path);
+      }
+    }
+
+    // Add log files in wal_dir
+    if (!immutable_db_options_.IsWalDirSameAsDBPath(dbname_)) {
+      std::vector<std::string> log_files;
+      Status s = immutable_db_options_.fs->GetChildren(
+          immutable_db_options_.wal_dir, io_opts, &log_files,
+          /*IODebugContext*=*/nullptr);
+      s.PermitUncheckedError();  // TODO: What should we do on error?
+      for (const std::string& log_file : log_files) {
+        job_context->full_scan_candidate_files.emplace_back(
+            log_file, immutable_db_options_.wal_dir);
+      }
+    }
+
+    // Add info log files in db_log_dir
+    if (!immutable_db_options_.db_log_dir.empty() &&
+        immutable_db_options_.db_log_dir != dbname_) {
+      std::vector<std::string> info_log_files;
+      Status s = immutable_db_options_.fs->GetChildren(
+          immutable_db_options_.db_log_dir, io_opts, &info_log_files,
+          /*IODebugContext*=*/nullptr);
+      s.PermitUncheckedError();  // TODO: What should we do on error?
+      for (std::string& log_file : info_log_files) {
+        job_context->full_scan_candidate_files.emplace_back(
+            log_file, immutable_db_options_.db_log_dir);
+      }
+    }
+  } else {
+    // Instead of filling ob_context->sst_live and job_context->blob_live,
+    // directly remove files that show up in any Version. This is because
+    // candidate files tend to be a small percentage of all files, so it is
+    // usually cheaper to check them against every version, compared to
+    // building a map for all files.
+    versions_->RemoveLiveFiles(job_context->sst_delete_files,
+                               job_context->blob_delete_files);
+  }
+
+  // Before potentially releasing mutex and waiting on condvar, increment
+  // pending_purge_obsolete_files_ so that another thread executing
+  // `GetSortedWals` will wait until this thread finishes execution since the
+  // other thread will be waiting for `pending_purge_obsolete_files_`.
+  // pending_purge_obsolete_files_ MUST be decremented if there is nothing to
+  // delete.
+  ++pending_purge_obsolete_files_;
+
+  Defer cleanup([job_context, this]() {
+    assert(job_context != nullptr);
+    if (!job_context->HaveSomethingToDelete()) {
+      mutex_.AssertHeld();
+      --pending_purge_obsolete_files_;
+    }
+  });
+
+  // logs_ is empty when called during recovery, in which case there can't yet
+  // be any tracked obsolete logs
+  log_write_mutex_.Lock();
+
+  if (alive_log_files_.empty() || logs_.empty()) {
+    mutex_.AssertHeld();
+    // We may reach here if the db is DBImplSecondary
+    log_write_mutex_.Unlock();
+    return;
+  }
+
+  bool mutex_unlocked = false;
+  if (!alive_log_files_.empty() && !logs_.empty()) {
+    uint64_t min_log_number = job_context->log_number;
+    size_t num_alive_log_files = alive_log_files_.size();
+    // find newly obsoleted log files
+    while (alive_log_files_.begin()->number < min_log_number) {
+      auto& earliest = *alive_log_files_.begin();
+      if (immutable_db_options_.recycle_log_file_num >
+          log_recycle_files_.size()) {
+        ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                       "adding log %" PRIu64 " to recycle list\n",
+                       earliest.number);
+        log_recycle_files_.push_back(earliest.number);
+      } else {
+        job_context->log_delete_files.push_back(earliest.number);
+      }
+      if (job_context->size_log_to_delete == 0) {
+        job_context->prev_total_log_size = total_log_size_;
+        job_context->num_alive_log_files = num_alive_log_files;
+      }
+      job_context->size_log_to_delete += earliest.size;
+      total_log_size_ -= earliest.size;
+      alive_log_files_.pop_front();
+
+      // Current log should always stay alive since it can't have
+      // number < MinLogNumber().
+      assert(alive_log_files_.size());
+    }
+    log_write_mutex_.Unlock();
+    mutex_.Unlock();
+    mutex_unlocked = true;
+    TEST_SYNC_POINT_CALLBACK("FindObsoleteFiles::PostMutexUnlock", nullptr);
+    log_write_mutex_.Lock();
+    while (!logs_.empty() && logs_.front().number < min_log_number) {
+      auto& log = logs_.front();
+      if (log.IsSyncing()) {
+        log_sync_cv_.Wait();
+        // logs_ could have changed while we were waiting.
+        continue;
+      }
+      logs_to_free_.push_back(log.ReleaseWriter());
+      logs_.pop_front();
+    }
+    // Current log cannot be obsolete.
+    assert(!logs_.empty());
+  }
+
+  // We're just cleaning up for DB::Write().
+  assert(job_context->logs_to_free.empty());
+  job_context->logs_to_free = logs_to_free_;
+
+  logs_to_free_.clear();
+  log_write_mutex_.Unlock();
+  if (mutex_unlocked) {
+    mutex_.Lock();
+  }
+  job_context->log_recycle_files.assign(log_recycle_files_.begin(),
+                                        log_recycle_files_.end());
+}
+
+// Delete obsolete files and log status and information of file deletion
+void DBImpl::DeleteObsoleteFileImpl(int job_id, const std::string& fname,
+                                    const std::string& path_to_sync,
+                                    FileType type, uint64_t number) {
+  TEST_SYNC_POINT_CALLBACK("DBImpl::DeleteObsoleteFileImpl::BeforeDeletion",
+                           const_cast<std::string*>(&fname));
+
+  Status file_deletion_status;
+  if (type == kTableFile || type == kBlobFile || type == kWalFile) {
+    // Rate limit WAL deletion only if its in the DB dir
+    file_deletion_status = DeleteDBFile(
+        &immutable_db_options_, fname, path_to_sync,
+        /*force_bg=*/false,
+        /*force_fg=*/(type == kWalFile) ? !wal_in_db_path_ : false);
+  } else {
+    file_deletion_status = env_->DeleteFile(fname);
+  }
+  TEST_SYNC_POINT_CALLBACK("DBImpl::DeleteObsoleteFileImpl:AfterDeletion",
+                           &file_deletion_status);
+  if (file_deletion_status.ok()) {
+    ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
+                    "[JOB %d] Delete %s type=%d #%" PRIu64 " -- %s\n", job_id,
+                    fname.c_str(), type, number,
+                    file_deletion_status.ToString().c_str());
+  } else if (env_->FileExists(fname).IsNotFound()) {
+    ROCKS_LOG_INFO(
+        immutable_db_options_.info_log,
+        "[JOB %d] Tried to delete a non-existing file %s type=%d #%" PRIu64
+        " -- %s\n",
+        job_id, fname.c_str(), type, number,
+        file_deletion_status.ToString().c_str());
+  } else {
+    ROCKS_LOG_ERROR(immutable_db_options_.info_log,
+                    "[JOB %d] Failed to delete %s type=%d #%" PRIu64 " -- %s\n",
+                    job_id, fname.c_str(), type, number,
+                    file_deletion_status.ToString().c_str());
+  }
+  if (type == kTableFile) {
+    EventHelpers::LogAndNotifyTableFileDeletion(
+        &event_logger_, job_id, number, fname, file_deletion_status, GetName(),
+        immutable_db_options_.listeners);
+  }
+  if (type == kBlobFile) {
+    EventHelpers::LogAndNotifyBlobFileDeletion(
+        &event_logger_, immutable_db_options_.listeners, job_id, number, fname,
+        file_deletion_status, GetName());
+  }
+}
+
+// Diffs the files listed in filenames and those that do not
+// belong to live files are possibly removed. Also, removes all the
+// files in sst_delete_files and log_delete_files.
+// It is not necessary to hold the mutex when invoking this method.
+void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
+  TEST_SYNC_POINT("DBImpl::PurgeObsoleteFiles:Begin");
+  // we'd better have sth to delete
+  assert(state.HaveSomethingToDelete());
+
+  // FindObsoleteFiles() should've populated this so nonzero
+  assert(state.manifest_file_number != 0);
+
+  // Now, convert lists to unordered sets, WITHOUT mutex held; set is slow.
+  std::unordered_set<uint64_t> sst_live_set(state.sst_live.begin(),
+                                            state.sst_live.end());
+  std::unordered_set<uint64_t> blob_live_set(state.blob_live.begin(),
+                                             state.blob_live.end());
+  std::unordered_set<uint64_t> log_recycle_files_set(
+      state.log_recycle_files.begin(), state.log_recycle_files.end());
+
+  auto candidate_files = state.full_scan_candidate_files;
+  candidate_files.reserve(
+      candidate_files.size() + state.sst_delete_files.size() +
+      state.blob_delete_files.size() + state.log_delete_files.size() +
+      state.manifest_delete_files.size());
+  // We may ignore the dbname when generating the file names.
+  for (auto& file : state.sst_delete_files) {
+    if (!file.only_delete_metadata) {
+      candidate_files.emplace_back(
+          MakeTableFileName(file.metadata->fd.GetNumber()), file.path);
+    }
+    if (file.metadata->table_reader_handle) {
+      table_cache_->Release(file.metadata->table_reader_handle);
+    }
+    file.DeleteMetadata();
+  }
+
+  for (const auto& blob_file : state.blob_delete_files) {
+    candidate_files.emplace_back(BlobFileName(blob_file.GetBlobFileNumber()),
+                                 blob_file.GetPath());
+  }
+
+  auto wal_dir = immutable_db_options_.GetWalDir();
+  for (auto file_num : state.log_delete_files) {
+    if (file_num > 0) {
+      candidate_files.emplace_back(LogFileName(file_num), wal_dir);
+    }
+  }
+  for (const auto& filename : state.manifest_delete_files) {
+    candidate_files.emplace_back(filename, dbname_);
+  }
+
+  // dedup state.candidate_files so we don't try to delete the same
+  // file twice
+  std::sort(candidate_files.begin(), candidate_files.end(),
+            [](const JobContext::CandidateFileInfo& lhs,
+               const JobContext::CandidateFileInfo& rhs) {
+              if (lhs.file_name > rhs.file_name) {
+                return true;
+              } else if (lhs.file_name < rhs.file_name) {
+                return false;
+              } else {
+                return (lhs.file_path > rhs.file_path);
+              }
+            });
+  candidate_files.erase(
+      std::unique(candidate_files.begin(), candidate_files.end()),
+      candidate_files.end());
+
+  if (state.prev_total_log_size > 0) {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "[JOB %d] Try to delete WAL files size %" PRIu64
+                   ", prev total WAL file size %" PRIu64
+                   ", number of live WAL files %" ROCKSDB_PRIszt ".\n",
+                   state.job_id, state.size_log_to_delete,
+                   state.prev_total_log_size, state.num_alive_log_files);
+  }
+
+  std::vector<std::string> old_info_log_files;
+  InfoLogPrefix info_log_prefix(!immutable_db_options_.db_log_dir.empty(),
+                                dbname_);
+
+  // File numbers of most recent two OPTIONS file in candidate_files (found in
+  // previos FindObsoleteFiles(full_scan=true))
+  // At this point, there must not be any duplicate file numbers in
+  // candidate_files.
+  uint64_t optsfile_num1 = std::numeric_limits<uint64_t>::min();
+  uint64_t optsfile_num2 = std::numeric_limits<uint64_t>::min();
+  for (const auto& candidate_file : candidate_files) {
+    const std::string& fname = candidate_file.file_name;
+    uint64_t number;
+    FileType type;
+    if (!ParseFileName(fname, &number, info_log_prefix.prefix, &type) ||
+        type != kOptionsFile) {
+      continue;
+    }
+    if (number > optsfile_num1) {
+      optsfile_num2 = optsfile_num1;
+      optsfile_num1 = number;
+    } else if (number > optsfile_num2) {
+      optsfile_num2 = number;
+    }
+  }
+
+  // Close WALs before trying to delete them.
+  for (const auto w : state.logs_to_free) {
+    // TODO: maybe check the return value of Close.
+    auto s = w->Close();
+    s.PermitUncheckedError();
+  }
+
+  bool own_files = OwnTablesAndLogs();
+  std::unordered_set<uint64_t> files_to_del;
+  for (const auto& candidate_file : candidate_files) {
+    const std::string& to_delete = candidate_file.file_name;
+    uint64_t number;
+    FileType type;
+    // Ignore file if we cannot recognize it.
+    if (!ParseFileName(to_delete, &number, info_log_prefix.prefix, &type)) {
+      continue;
+    }
+
+    bool keep = true;
+    switch (type) {
+      case kWalFile:
+        keep = ((number >= state.log_number) ||
+                (number == state.prev_log_number) ||
+                (log_recycle_files_set.find(number) !=
+                 log_recycle_files_set.end()));
+        break;
+      case kDescriptorFile:
+        // Keep my manifest file, and any newer incarnations'
+        // (can happen during manifest roll)
+        keep = (number >= state.manifest_file_number);
+        break;
+      case kTableFile:
+        // If the second condition is not there, this makes
+        // DontDeletePendingOutputs fail
+        keep = (sst_live_set.find(number) != sst_live_set.end()) ||
+               number >= state.min_pending_output;
+        if (!keep) {
+          files_to_del.insert(number);
+        }
+        break;
+      case kBlobFile:
+        keep = number >= state.min_pending_output ||
+               (blob_live_set.find(number) != blob_live_set.end());
+        if (!keep) {
+          files_to_del.insert(number);
+        }
+        break;
+      case kTempFile:
+        // Any temp files that are currently being written to must
+        // be recorded in pending_outputs_, which is inserted into "live".
+        // Also, SetCurrentFile creates a temp file when writing out new
+        // manifest, which is equal to state.pending_manifest_file_number. We
+        // should not delete that file
+        //
+        // TODO(yhchiang): carefully modify the third condition to safely
+        //                 remove the temp options files.
+        keep = (sst_live_set.find(number) != sst_live_set.end()) ||
+               (blob_live_set.find(number) != blob_live_set.end()) ||
+               (number == state.pending_manifest_file_number) ||
+               (to_delete.find(kOptionsFileNamePrefix) != std::string::npos);
+        break;
+      case kInfoLogFile:
+        keep = true;
+        if (number != 0) {
+          old_info_log_files.push_back(to_delete);
+        }
+        break;
+      case kOptionsFile:
+        keep = (number >= optsfile_num2);
+        break;
+      case kCurrentFile:
+      case kDBLockFile:
+      case kIdentityFile:
+      case kMetaDatabase:
+        keep = true;
+        break;
+    }
+
+    if (keep) {
+      continue;
+    }
+
+    std::string fname;
+    std::string dir_to_sync;
+    if (type == kTableFile) {
+      // evict from cache
+      TableCache::Evict(table_cache_.get(), number);
+      fname = MakeTableFileName(candidate_file.file_path, number);
+      dir_to_sync = candidate_file.file_path;
+    } else if (type == kBlobFile) {
+      fname = BlobFileName(candidate_file.file_path, number);
+      dir_to_sync = candidate_file.file_path;
+    } else {
+      dir_to_sync = (type == kWalFile) ? wal_dir : dbname_;
+      fname = dir_to_sync +
+              ((!dir_to_sync.empty() && dir_to_sync.back() == '/') ||
+                       (!to_delete.empty() && to_delete.front() == '/')
+                   ? ""
+                   : "/") +
+              to_delete;
+    }
+
+    if (type == kWalFile && (immutable_db_options_.WAL_ttl_seconds > 0 ||
+                             immutable_db_options_.WAL_size_limit_MB > 0)) {
+      wal_manager_.ArchiveWALFile(fname, number);
+      continue;
+    }
+
+    // If I do not own these files, e.g. secondary instance with max_open_files
+    // = -1, then no need to delete or schedule delete these files since they
+    // will be removed by their owner, e.g. the primary instance.
+    if (!own_files) {
+      continue;
+    }
+    if (schedule_only) {
+      InstrumentedMutexLock guard_lock(&mutex_);
+      SchedulePendingPurge(fname, dir_to_sync, type, number, state.job_id);
+    } else {
+      DeleteObsoleteFileImpl(state.job_id, fname, dir_to_sync, type, number);
+    }
+  }
+
+  {
+    // After purging obsolete files, remove them from files_grabbed_for_purge_.
+    InstrumentedMutexLock guard_lock(&mutex_);
+    autovector<uint64_t> to_be_removed;
+    for (auto fn : files_grabbed_for_purge_) {
+      if (files_to_del.count(fn) != 0) {
+        to_be_removed.emplace_back(fn);
+      }
+    }
+    for (auto fn : to_be_removed) {
+      files_grabbed_for_purge_.erase(fn);
+    }
+  }
+
+  // Delete old info log files.
+  size_t old_info_log_file_count = old_info_log_files.size();
+  if (old_info_log_file_count != 0 &&
+      old_info_log_file_count >= immutable_db_options_.keep_log_file_num) {
+    std::sort(old_info_log_files.begin(), old_info_log_files.end());
+    size_t end =
+        old_info_log_file_count - immutable_db_options_.keep_log_file_num;
+    for (unsigned int i = 0; i <= end; i++) {
+      std::string& to_delete = old_info_log_files.at(i);
+      std::string full_path_to_delete =
+          (immutable_db_options_.db_log_dir.empty()
+               ? dbname_
+               : immutable_db_options_.db_log_dir) +
+          "/" + to_delete;
+      ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                     "[JOB %d] Delete info log file %s\n", state.job_id,
+                     full_path_to_delete.c_str());
+      Status s = env_->DeleteFile(full_path_to_delete);
+      if (!s.ok()) {
+        if (env_->FileExists(full_path_to_delete).IsNotFound()) {
+          ROCKS_LOG_INFO(
+              immutable_db_options_.info_log,
+              "[JOB %d] Tried to delete non-existing info log file %s FAILED "
+              "-- %s\n",
+              state.job_id, to_delete.c_str(), s.ToString().c_str());
+        } else {
+          ROCKS_LOG_ERROR(immutable_db_options_.info_log,
+                          "[JOB %d] Delete info log file %s FAILED -- %s\n",
+                          state.job_id, to_delete.c_str(),
+                          s.ToString().c_str());
+        }
+      }
+    }
+  }
+  wal_manager_.PurgeObsoleteWALFiles();
+  LogFlush(immutable_db_options_.info_log);
+  InstrumentedMutexLock l(&mutex_);
+  --pending_purge_obsolete_files_;
+  assert(pending_purge_obsolete_files_ >= 0);
+  if (schedule_only) {
+    // Must change from pending_purge_obsolete_files_ to bg_purge_scheduled_
+    // while holding mutex (for GetSortedWalFiles() etc.)
+    SchedulePurge();
+  }
+  if (pending_purge_obsolete_files_ == 0) {
+    bg_cv_.SignalAll();
+  }
+  TEST_SYNC_POINT("DBImpl::PurgeObsoleteFiles:End");
+}
+
+void DBImpl::DeleteObsoleteFiles() {
+  mutex_.AssertHeld();
+  JobContext job_context(next_job_id_.fetch_add(1));
+  FindObsoleteFiles(&job_context, true);
+
+  mutex_.Unlock();
+  if (job_context.HaveSomethingToDelete()) {
+    bool defer_purge = immutable_db_options_.avoid_unnecessary_blocking_io;
+    PurgeObsoleteFiles(job_context, defer_purge);
+  }
+  job_context.Clean();
+  mutex_.Lock();
+}
+
+uint64_t FindMinPrepLogReferencedByMemTable(
+    VersionSet* vset, const autovector<MemTable*>& memtables_to_flush) {
+  uint64_t min_log = 0;
+
+  // we must look through the memtables for two phase transactions
+  // that have been committed but not yet flushed
+  std::unordered_set<MemTable*> memtables_to_flush_set(
+      memtables_to_flush.begin(), memtables_to_flush.end());
+  for (auto loop_cfd : *vset->GetColumnFamilySet()) {
+    if (loop_cfd->IsDropped()) {
+      continue;
+    }
+
+    auto log = loop_cfd->imm()->PrecomputeMinLogContainingPrepSection(
+        &memtables_to_flush_set);
+
+    if (log > 0 && (min_log == 0 || log < min_log)) {
+      min_log = log;
+    }
+
+    log = loop_cfd->mem()->GetMinLogContainingPrepSection();
+
+    if (log > 0 && (min_log == 0 || log < min_log)) {
+      min_log = log;
+    }
+  }
+
+  return min_log;
+}
+
+uint64_t FindMinPrepLogReferencedByMemTable(
+    VersionSet* vset,
+    const autovector<const autovector<MemTable*>*>& memtables_to_flush) {
+  uint64_t min_log = 0;
+
+  std::unordered_set<MemTable*> memtables_to_flush_set;
+  for (const autovector<MemTable*>* memtables : memtables_to_flush) {
+    memtables_to_flush_set.insert(memtables->begin(), memtables->end());
+  }
+  for (auto loop_cfd : *vset->GetColumnFamilySet()) {
+    if (loop_cfd->IsDropped()) {
+      continue;
+    }
+
+    auto log = loop_cfd->imm()->PrecomputeMinLogContainingPrepSection(
+        &memtables_to_flush_set);
+    if (log > 0 && (min_log == 0 || log < min_log)) {
+      min_log = log;
+    }
+
+    log = loop_cfd->mem()->GetMinLogContainingPrepSection();
+    if (log > 0 && (min_log == 0 || log < min_log)) {
+      min_log = log;
+    }
+  }
+
+  return min_log;
+}
+
+uint64_t PrecomputeMinLogNumberToKeepNon2PC(
+    VersionSet* vset, const ColumnFamilyData& cfd_to_flush,
+    const autovector<VersionEdit*>& edit_list) {
+  assert(vset != nullptr);
+
+  // Precompute the min log number containing unflushed data for the column
+  // family being flushed (`cfd_to_flush`).
+  uint64_t cf_min_log_number_to_keep = 0;
+  for (auto& e : edit_list) {
+    if (e->HasLogNumber()) {
+      cf_min_log_number_to_keep =
+          std::max(cf_min_log_number_to_keep, e->GetLogNumber());
+    }
+  }
+  if (cf_min_log_number_to_keep == 0) {
+    // No version edit contains information on log number. The log number
+    // for this column family should stay the same as it is.
+    cf_min_log_number_to_keep = cfd_to_flush.GetLogNumber();
+  }
+
+  // Get min log number containing unflushed data for other column families.
+  uint64_t min_log_number_to_keep =
+      vset->PreComputeMinLogNumberWithUnflushedData(&cfd_to_flush);
+  if (cf_min_log_number_to_keep != 0) {
+    min_log_number_to_keep =
+        std::min(cf_min_log_number_to_keep, min_log_number_to_keep);
+  }
+  return min_log_number_to_keep;
+}
+
+uint64_t PrecomputeMinLogNumberToKeepNon2PC(
+    VersionSet* vset, const autovector<ColumnFamilyData*>& cfds_to_flush,
+    const autovector<autovector<VersionEdit*>>& edit_lists) {
+  assert(vset != nullptr);
+  assert(!cfds_to_flush.empty());
+  assert(cfds_to_flush.size() == edit_lists.size());
+
+  uint64_t min_log_number_to_keep = std::numeric_limits<uint64_t>::max();
+  for (const auto& edit_list : edit_lists) {
+    uint64_t log = 0;
+    for (const auto& e : edit_list) {
+      if (e->HasLogNumber()) {
+        log = std::max(log, e->GetLogNumber());
+      }
+    }
+    if (log != 0) {
+      min_log_number_to_keep = std::min(min_log_number_to_keep, log);
+    }
+  }
+  if (min_log_number_to_keep == std::numeric_limits<uint64_t>::max()) {
+    min_log_number_to_keep = cfds_to_flush[0]->GetLogNumber();
+    for (size_t i = 1; i < cfds_to_flush.size(); i++) {
+      min_log_number_to_keep =
+          std::min(min_log_number_to_keep, cfds_to_flush[i]->GetLogNumber());
+    }
+  }
+
+  std::unordered_set<const ColumnFamilyData*> flushed_cfds(
+      cfds_to_flush.begin(), cfds_to_flush.end());
+  min_log_number_to_keep =
+      std::min(min_log_number_to_keep,
+               vset->PreComputeMinLogNumberWithUnflushedData(flushed_cfds));
+
+  return min_log_number_to_keep;
+}
+
+uint64_t PrecomputeMinLogNumberToKeep2PC(
+    VersionSet* vset, const ColumnFamilyData& cfd_to_flush,
+    const autovector<VersionEdit*>& edit_list,
+    const autovector<MemTable*>& memtables_to_flush,
+    LogsWithPrepTracker* prep_tracker) {
+  assert(vset != nullptr);
+  assert(prep_tracker != nullptr);
+  // Calculate updated min_log_number_to_keep
+  // Since the function should only be called in 2pc mode, log number in
+  // the version edit should be sufficient.
+
+  uint64_t min_log_number_to_keep =
+      PrecomputeMinLogNumberToKeepNon2PC(vset, cfd_to_flush, edit_list);
+
+  // if are 2pc we must consider logs containing prepared
+  // sections of outstanding transactions.
+  //
+  // We must check min logs with outstanding prep before we check
+  // logs references by memtables because a log referenced by the
+  // first data structure could transition to the second under us.
+  //
+  // TODO: iterating over all column families under db mutex.
+  // should find more optimal solution
+  auto min_log_in_prep_heap =
+      prep_tracker->FindMinLogContainingOutstandingPrep();
+
+  if (min_log_in_prep_heap != 0 &&
+      min_log_in_prep_heap < min_log_number_to_keep) {
+    min_log_number_to_keep = min_log_in_prep_heap;
+  }
+
+  uint64_t min_log_refed_by_mem =
+      FindMinPrepLogReferencedByMemTable(vset, memtables_to_flush);
+
+  if (min_log_refed_by_mem != 0 &&
+      min_log_refed_by_mem < min_log_number_to_keep) {
+    min_log_number_to_keep = min_log_refed_by_mem;
+  }
+  return min_log_number_to_keep;
+}
+
+uint64_t PrecomputeMinLogNumberToKeep2PC(
+    VersionSet* vset, const autovector<ColumnFamilyData*>& cfds_to_flush,
+    const autovector<autovector<VersionEdit*>>& edit_lists,
+    const autovector<const autovector<MemTable*>*>& memtables_to_flush,
+    LogsWithPrepTracker* prep_tracker) {
+  assert(vset != nullptr);
+  assert(prep_tracker != nullptr);
+  assert(cfds_to_flush.size() == edit_lists.size());
+  assert(cfds_to_flush.size() == memtables_to_flush.size());
+
+  uint64_t min_log_number_to_keep =
+      PrecomputeMinLogNumberToKeepNon2PC(vset, cfds_to_flush, edit_lists);
+
+  uint64_t min_log_in_prep_heap =
+      prep_tracker->FindMinLogContainingOutstandingPrep();
+
+  if (min_log_in_prep_heap != 0 &&
+      min_log_in_prep_heap < min_log_number_to_keep) {
+    min_log_number_to_keep = min_log_in_prep_heap;
+  }
+
+  uint64_t min_log_refed_by_mem =
+      FindMinPrepLogReferencedByMemTable(vset, memtables_to_flush);
+
+  if (min_log_refed_by_mem != 0 &&
+      min_log_refed_by_mem < min_log_number_to_keep) {
+    min_log_number_to_keep = min_log_refed_by_mem;
+  }
+
+  return min_log_number_to_keep;
+}
+
+void DBImpl::SetDBId(std::string&& id, bool read_only,
+                     RecoveryContext* recovery_ctx) {
+  assert(db_id_.empty());
+  assert(!id.empty());
+  db_id_ = std::move(id);
+  if (!read_only && immutable_db_options_.write_dbid_to_manifest) {
+    assert(recovery_ctx != nullptr);
+    assert(versions_->GetColumnFamilySet() != nullptr);
+    VersionEdit edit;
+    edit.SetDBId(db_id_);
+    versions_->db_id_ = db_id_;
+    recovery_ctx->UpdateVersionEdits(
+        versions_->GetColumnFamilySet()->GetDefault(), edit);
+  }
+}
+
+Status DBImpl::SetupDBId(bool read_only, RecoveryContext* recovery_ctx) {
+  Status s;
+  // Check for the IDENTITY file and create it if not there or
+  // broken or not matching manifest
+  std::string db_id_in_file;
+  s = fs_->FileExists(IdentityFileName(dbname_), IOOptions(), nullptr);
+  if (s.ok()) {
+    s = GetDbIdentityFromIdentityFile(&db_id_in_file);
+    if (s.ok() && !db_id_in_file.empty()) {
+      if (db_id_.empty()) {
+        // Loaded from file and wasn't already known from manifest
+        SetDBId(std::move(db_id_in_file), read_only, recovery_ctx);
+        return s;
+      } else if (db_id_ == db_id_in_file) {
+        // Loaded from file and matches manifest
+        return s;
+      }
+    }
+  }
+  if (s.IsNotFound()) {
+    s = Status::OK();
+  }
+  if (!s.ok()) {
+    assert(s.IsIOError());
+    return s;
+  }
+  // Otherwise IDENTITY file is missing or no good.
+  // Generate new id if needed
+  if (db_id_.empty()) {
+    SetDBId(env_->GenerateUniqueId(), read_only, recovery_ctx);
+  }
+  // Persist it to IDENTITY file if allowed
+  if (!read_only) {
+    s = SetIdentityFile(env_, dbname_, db_id_);
+  }
+  return s;
+}
+
+Status DBImpl::DeleteUnreferencedSstFiles(RecoveryContext* recovery_ctx) {
+  mutex_.AssertHeld();
+  std::vector<std::string> paths;
+  paths.push_back(NormalizePath(dbname_ + std::string(1, kFilePathSeparator)));
+  for (const auto& db_path : immutable_db_options_.db_paths) {
+    paths.push_back(
+        NormalizePath(db_path.path + std::string(1, kFilePathSeparator)));
+  }
+  for (const auto* cfd : *versions_->GetColumnFamilySet()) {
+    for (const auto& cf_path : cfd->ioptions()->cf_paths) {
+      paths.push_back(
+          NormalizePath(cf_path.path + std::string(1, kFilePathSeparator)));
+    }
+  }
+  // Dedup paths
+  std::sort(paths.begin(), paths.end());
+  paths.erase(std::unique(paths.begin(), paths.end()), paths.end());
+
+  uint64_t next_file_number = versions_->current_next_file_number();
+  uint64_t largest_file_number = next_file_number;
+  Status s;
+  for (const auto& path : paths) {
+    std::vector<std::string> files;
+    s = env_->GetChildren(path, &files);
+    if (!s.ok()) {
+      break;
+    }
+    for (const auto& fname : files) {
+      uint64_t number = 0;
+      FileType type;
+      if (!ParseFileName(fname, &number, &type)) {
+        continue;
+      }
+      // path ends with '/' or '\\'
+      const std::string normalized_fpath = path + fname;
+      largest_file_number = std::max(largest_file_number, number);
+      if (type == kTableFile && number >= next_file_number &&
+          recovery_ctx->files_to_delete_.find(normalized_fpath) ==
+              recovery_ctx->files_to_delete_.end()) {
+        recovery_ctx->files_to_delete_.emplace(normalized_fpath);
+      }
+    }
+  }
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (largest_file_number >= next_file_number) {
+    versions_->next_file_number_.store(largest_file_number + 1);
+  }
+
+  VersionEdit edit;
+  edit.SetNextFile(versions_->next_file_number_.load());
+  assert(versions_->GetColumnFamilySet());
+  ColumnFamilyData* default_cfd = versions_->GetColumnFamilySet()->GetDefault();
+  assert(default_cfd);
+  recovery_ctx->UpdateVersionEdits(default_cfd, edit);
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_open.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_open.cc
new file mode 100644
index 0000000000..6bc8ae4c30
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_open.cc
@@ -0,0 +1,2202 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include <cinttypes>
+
+#include "db/builder.h"
+#include "db/db_impl/db_impl.h"
+#include "db/error_handler.h"
+#include "db/periodic_task_scheduler.h"
+#include "env/composite_env_wrapper.h"
+#include "file/filename.h"
+#include "file/read_write_util.h"
+#include "file/sst_file_manager_impl.h"
+#include "file/writable_file_writer.h"
+#include "logging/logging.h"
+#include "monitoring/persistent_stats_history.h"
+#include "monitoring/thread_status_util.h"
+#include "options/options_helper.h"
+#include "rocksdb/table.h"
+#include "rocksdb/wal_filter.h"
+#include "test_util/sync_point.h"
+#include "util/rate_limiter_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+Options SanitizeOptions(const std::string& dbname, const Options& src,
+                        bool read_only, Status* logger_creation_s) {
+  auto db_options =
+      SanitizeOptions(dbname, DBOptions(src), read_only, logger_creation_s);
+  ImmutableDBOptions immutable_db_options(db_options);
+  auto cf_options =
+      SanitizeOptions(immutable_db_options, ColumnFamilyOptions(src));
+  return Options(db_options, cf_options);
+}
+
+DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src,
+                          bool read_only, Status* logger_creation_s) {
+  DBOptions result(src);
+
+  if (result.env == nullptr) {
+    result.env = Env::Default();
+  }
+
+  // result.max_open_files means an "infinite" open files.
+  if (result.max_open_files != -1) {
+    int max_max_open_files = port::GetMaxOpenFiles();
+    if (max_max_open_files == -1) {
+      max_max_open_files = 0x400000;
+    }
+    ClipToRange(&result.max_open_files, 20, max_max_open_files);
+    TEST_SYNC_POINT_CALLBACK("SanitizeOptions::AfterChangeMaxOpenFiles",
+                             &result.max_open_files);
+  }
+
+  if (result.info_log == nullptr && !read_only) {
+    Status s = CreateLoggerFromOptions(dbname, result, &result.info_log);
+    if (!s.ok()) {
+      // No place suitable for logging
+      result.info_log = nullptr;
+      if (logger_creation_s) {
+        *logger_creation_s = s;
+      }
+    }
+  }
+
+  if (!result.write_buffer_manager) {
+    result.write_buffer_manager.reset(
+        new WriteBufferManager(result.db_write_buffer_size));
+  }
+  auto bg_job_limits = DBImpl::GetBGJobLimits(
+      result.max_background_flushes, result.max_background_compactions,
+      result.max_background_jobs, true /* parallelize_compactions */);
+  result.env->IncBackgroundThreadsIfNeeded(bg_job_limits.max_compactions,
+                                           Env::Priority::LOW);
+  result.env->IncBackgroundThreadsIfNeeded(bg_job_limits.max_flushes,
+                                           Env::Priority::HIGH);
+
+  if (result.rate_limiter.get() != nullptr) {
+    if (result.bytes_per_sync == 0) {
+      result.bytes_per_sync = 1024 * 1024;
+    }
+  }
+
+  if (result.delayed_write_rate == 0) {
+    if (result.rate_limiter.get() != nullptr) {
+      result.delayed_write_rate = result.rate_limiter->GetBytesPerSecond();
+    }
+    if (result.delayed_write_rate == 0) {
+      result.delayed_write_rate = 16 * 1024 * 1024;
+    }
+  }
+
+  if (result.WAL_ttl_seconds > 0 || result.WAL_size_limit_MB > 0) {
+    result.recycle_log_file_num = false;
+  }
+
+  if (result.recycle_log_file_num &&
+      (result.wal_recovery_mode ==
+           WALRecoveryMode::kTolerateCorruptedTailRecords ||
+       result.wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery ||
+       result.wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency)) {
+    // - kTolerateCorruptedTailRecords is inconsistent with recycle log file
+    //   feature. WAL recycling expects recovery success upon encountering a
+    //   corrupt record at the point where new data ends and recycled data
+    //   remains at the tail. However, `kTolerateCorruptedTailRecords` must fail
+    //   upon encountering any such corrupt record, as it cannot differentiate
+    //   between this and a real corruption, which would cause committed updates
+    //   to be truncated -- a violation of the recovery guarantee.
+    // - kPointInTimeRecovery and kAbsoluteConsistency are incompatible with
+    //   recycle log file feature temporarily due to a bug found introducing a
+    //   hole in the recovered data
+    //   (https://github.com/facebook/rocksdb/pull/7252#issuecomment-673766236).
+    //   Besides this bug, we believe the features are fundamentally compatible.
+    result.recycle_log_file_num = 0;
+  }
+
+  if (result.db_paths.size() == 0) {
+    result.db_paths.emplace_back(dbname, std::numeric_limits<uint64_t>::max());
+  } else if (result.wal_dir.empty()) {
+    // Use dbname as default
+    result.wal_dir = dbname;
+  }
+  if (!result.wal_dir.empty()) {
+    // If there is a wal_dir already set, check to see if the wal_dir is the
+    // same as the dbname AND the same as the db_path[0] (which must exist from
+    // a few lines ago). If the wal_dir matches both of these values, then clear
+    // the wal_dir value, which will make wal_dir == dbname.  Most likely this
+    // condition was the result of reading an old options file where we forced
+    // wal_dir to be set (to dbname).
+    auto npath = NormalizePath(dbname + "/");
+    if (npath == NormalizePath(result.wal_dir + "/") &&
+        npath == NormalizePath(result.db_paths[0].path + "/")) {
+      result.wal_dir.clear();
+    }
+  }
+
+  if (!result.wal_dir.empty() && result.wal_dir.back() == '/') {
+    result.wal_dir = result.wal_dir.substr(0, result.wal_dir.size() - 1);
+  }
+
+  if (result.use_direct_reads && result.compaction_readahead_size == 0) {
+    TEST_SYNC_POINT_CALLBACK("SanitizeOptions:direct_io", nullptr);
+    result.compaction_readahead_size = 1024 * 1024 * 2;
+  }
+
+  // Force flush on DB open if 2PC is enabled, since with 2PC we have no
+  // guarantee that consecutive log files have consecutive sequence id, which
+  // make recovery complicated.
+  if (result.allow_2pc) {
+    result.avoid_flush_during_recovery = false;
+  }
+
+  ImmutableDBOptions immutable_db_options(result);
+  if (!immutable_db_options.IsWalDirSameAsDBPath()) {
+    // Either the WAL dir and db_paths[0]/db_name are not the same, or we
+    // cannot tell for sure. In either case, assume they're different and
+    // explicitly cleanup the trash log files (bypass DeleteScheduler)
+    // Do this first so even if we end up calling
+    // DeleteScheduler::CleanupDirectory on the same dir later, it will be
+    // safe
+    std::vector<std::string> filenames;
+    IOOptions io_opts;
+    io_opts.do_not_recurse = true;
+    auto wal_dir = immutable_db_options.GetWalDir();
+    Status s = immutable_db_options.fs->GetChildren(
+        wal_dir, io_opts, &filenames, /*IODebugContext*=*/nullptr);
+    s.PermitUncheckedError();  //**TODO: What to do on error?
+    for (std::string& filename : filenames) {
+      if (filename.find(".log.trash", filename.length() -
+                                          std::string(".log.trash").length()) !=
+          std::string::npos) {
+        std::string trash_file = wal_dir + "/" + filename;
+        result.env->DeleteFile(trash_file).PermitUncheckedError();
+      }
+    }
+  }
+  // When the DB is stopped, it's possible that there are some .trash files that
+  // were not deleted yet, when we open the DB we will find these .trash files
+  // and schedule them to be deleted (or delete immediately if SstFileManager
+  // was not used)
+  auto sfm = static_cast<SstFileManagerImpl*>(result.sst_file_manager.get());
+  for (size_t i = 0; i < result.db_paths.size(); i++) {
+    DeleteScheduler::CleanupDirectory(result.env, sfm, result.db_paths[i].path)
+        .PermitUncheckedError();
+  }
+
+  // Create a default SstFileManager for purposes of tracking compaction size
+  // and facilitating recovery from out of space errors.
+  if (result.sst_file_manager.get() == nullptr) {
+    std::shared_ptr<SstFileManager> sst_file_manager(
+        NewSstFileManager(result.env, result.info_log));
+    result.sst_file_manager = sst_file_manager;
+  }
+
+  // Supported wal compression types
+  if (!StreamingCompressionTypeSupported(result.wal_compression)) {
+    result.wal_compression = kNoCompression;
+    ROCKS_LOG_WARN(result.info_log,
+                   "wal_compression is disabled since only zstd is supported");
+  }
+
+  if (!result.paranoid_checks) {
+    result.skip_checking_sst_file_sizes_on_db_open = true;
+    ROCKS_LOG_INFO(result.info_log,
+                   "file size check will be skipped during open.");
+  }
+
+  return result;
+}
+
+namespace {
+Status ValidateOptionsByTable(
+    const DBOptions& db_opts,
+    const std::vector<ColumnFamilyDescriptor>& column_families) {
+  Status s;
+  for (auto& cf : column_families) {
+    s = ValidateOptions(db_opts, cf.options);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  return Status::OK();
+}
+}  // namespace
+
+Status DBImpl::ValidateOptions(
+    const DBOptions& db_options,
+    const std::vector<ColumnFamilyDescriptor>& column_families) {
+  Status s;
+  for (auto& cfd : column_families) {
+    s = ColumnFamilyData::ValidateOptions(db_options, cfd.options);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  s = ValidateOptions(db_options);
+  return s;
+}
+
+Status DBImpl::ValidateOptions(const DBOptions& db_options) {
+  if (db_options.db_paths.size() > 4) {
+    return Status::NotSupported(
+        "More than four DB paths are not supported yet. ");
+  }
+
+  if (db_options.allow_mmap_reads && db_options.use_direct_reads) {
+    // Protect against assert in PosixMMapReadableFile constructor
+    return Status::NotSupported(
+        "If memory mapped reads (allow_mmap_reads) are enabled "
+        "then direct I/O reads (use_direct_reads) must be disabled. ");
+  }
+
+  if (db_options.allow_mmap_writes &&
+      db_options.use_direct_io_for_flush_and_compaction) {
+    return Status::NotSupported(
+        "If memory mapped writes (allow_mmap_writes) are enabled "
+        "then direct I/O writes (use_direct_io_for_flush_and_compaction) must "
+        "be disabled. ");
+  }
+
+  if (db_options.keep_log_file_num == 0) {
+    return Status::InvalidArgument("keep_log_file_num must be greater than 0");
+  }
+
+  if (db_options.unordered_write &&
+      !db_options.allow_concurrent_memtable_write) {
+    return Status::InvalidArgument(
+        "unordered_write is incompatible with "
+        "!allow_concurrent_memtable_write");
+  }
+
+  if (db_options.unordered_write && db_options.enable_pipelined_write) {
+    return Status::InvalidArgument(
+        "unordered_write is incompatible with enable_pipelined_write");
+  }
+
+  if (db_options.atomic_flush && db_options.enable_pipelined_write) {
+    return Status::InvalidArgument(
+        "atomic_flush is incompatible with enable_pipelined_write");
+  }
+
+  // TODO remove this restriction
+  if (db_options.atomic_flush && db_options.best_efforts_recovery) {
+    return Status::InvalidArgument(
+        "atomic_flush is currently incompatible with best-efforts recovery");
+  }
+
+  if (db_options.use_direct_io_for_flush_and_compaction &&
+      0 == db_options.writable_file_max_buffer_size) {
+    return Status::InvalidArgument(
+        "writes in direct IO require writable_file_max_buffer_size > 0");
+  }
+
+  return Status::OK();
+}
+
+Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
+  VersionEdit new_db;
+  Status s = SetIdentityFile(env_, dbname_);
+  if (!s.ok()) {
+    return s;
+  }
+  if (immutable_db_options_.write_dbid_to_manifest) {
+    std::string temp_db_id;
+    GetDbIdentityFromIdentityFile(&temp_db_id);
+    new_db.SetDBId(temp_db_id);
+  }
+  new_db.SetLogNumber(0);
+  new_db.SetNextFile(2);
+  new_db.SetLastSequence(0);
+
+  ROCKS_LOG_INFO(immutable_db_options_.info_log, "Creating manifest 1 \n");
+  const std::string manifest = DescriptorFileName(dbname_, 1);
+  {
+    if (fs_->FileExists(manifest, IOOptions(), nullptr).ok()) {
+      fs_->DeleteFile(manifest, IOOptions(), nullptr).PermitUncheckedError();
+    }
+    std::unique_ptr<FSWritableFile> file;
+    FileOptions file_options = fs_->OptimizeForManifestWrite(file_options_);
+    s = NewWritableFile(fs_.get(), manifest, &file, file_options);
+    if (!s.ok()) {
+      return s;
+    }
+    FileTypeSet tmp_set = immutable_db_options_.checksum_handoff_file_types;
+    file->SetPreallocationBlockSize(
+        immutable_db_options_.manifest_preallocation_size);
+    std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
+        std::move(file), manifest, file_options, immutable_db_options_.clock,
+        io_tracer_, nullptr /* stats */, immutable_db_options_.listeners,
+        nullptr, tmp_set.Contains(FileType::kDescriptorFile),
+        tmp_set.Contains(FileType::kDescriptorFile)));
+    log::Writer log(std::move(file_writer), 0, false);
+    std::string record;
+    new_db.EncodeTo(&record);
+    s = log.AddRecord(record);
+    if (s.ok()) {
+      s = SyncManifest(&immutable_db_options_, log.file());
+    }
+  }
+  if (s.ok()) {
+    // Make "CURRENT" file that points to the new manifest file.
+    s = SetCurrentFile(fs_.get(), dbname_, 1, directories_.GetDbDir());
+    if (new_filenames) {
+      new_filenames->emplace_back(
+          manifest.substr(manifest.find_last_of("/\\") + 1));
+    }
+  } else {
+    fs_->DeleteFile(manifest, IOOptions(), nullptr).PermitUncheckedError();
+  }
+  return s;
+}
+
+IOStatus DBImpl::CreateAndNewDirectory(
+    FileSystem* fs, const std::string& dirname,
+    std::unique_ptr<FSDirectory>* directory) {
+  // We call CreateDirIfMissing() as the directory may already exist (if we
+  // are reopening a DB), when this happens we don't want creating the
+  // directory to cause an error. However, we need to check if creating the
+  // directory fails or else we may get an obscure message about the lock
+  // file not existing. One real-world example of this occurring is if
+  // env->CreateDirIfMissing() doesn't create intermediate directories, e.g.
+  // when dbname_ is "dir/db" but when "dir" doesn't exist.
+  IOStatus io_s = fs->CreateDirIfMissing(dirname, IOOptions(), nullptr);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  return fs->NewDirectory(dirname, IOOptions(), directory, nullptr);
+}
+
+IOStatus Directories::SetDirectories(FileSystem* fs, const std::string& dbname,
+                                     const std::string& wal_dir,
+                                     const std::vector<DbPath>& data_paths) {
+  IOStatus io_s = DBImpl::CreateAndNewDirectory(fs, dbname, &db_dir_);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  if (!wal_dir.empty() && dbname != wal_dir) {
+    io_s = DBImpl::CreateAndNewDirectory(fs, wal_dir, &wal_dir_);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+  }
+
+  data_dirs_.clear();
+  for (auto& p : data_paths) {
+    const std::string db_path = p.path;
+    if (db_path == dbname) {
+      data_dirs_.emplace_back(nullptr);
+    } else {
+      std::unique_ptr<FSDirectory> path_directory;
+      io_s = DBImpl::CreateAndNewDirectory(fs, db_path, &path_directory);
+      if (!io_s.ok()) {
+        return io_s;
+      }
+      data_dirs_.emplace_back(path_directory.release());
+    }
+  }
+  assert(data_dirs_.size() == data_paths.size());
+  return IOStatus::OK();
+}
+
+Status DBImpl::Recover(
+    const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
+    bool error_if_wal_file_exists, bool error_if_data_exists_in_wals,
+    uint64_t* recovered_seq, RecoveryContext* recovery_ctx) {
+  mutex_.AssertHeld();
+
+  bool is_new_db = false;
+  assert(db_lock_ == nullptr);
+  std::vector<std::string> files_in_dbname;
+  if (!read_only) {
+    Status s = directories_.SetDirectories(fs_.get(), dbname_,
+                                           immutable_db_options_.wal_dir,
+                                           immutable_db_options_.db_paths);
+    if (!s.ok()) {
+      return s;
+    }
+
+    s = env_->LockFile(LockFileName(dbname_), &db_lock_);
+    if (!s.ok()) {
+      return s;
+    }
+
+    std::string current_fname = CurrentFileName(dbname_);
+    // Path to any MANIFEST file in the db dir. It does not matter which one.
+    // Since best-efforts recovery ignores CURRENT file, existence of a
+    // MANIFEST indicates the recovery to recover existing db. If no MANIFEST
+    // can be found, a new db will be created.
+    std::string manifest_path;
+    if (!immutable_db_options_.best_efforts_recovery) {
+      s = env_->FileExists(current_fname);
+    } else {
+      s = Status::NotFound();
+      IOOptions io_opts;
+      io_opts.do_not_recurse = true;
+      Status io_s = immutable_db_options_.fs->GetChildren(
+          dbname_, io_opts, &files_in_dbname, /*IODebugContext*=*/nullptr);
+      if (!io_s.ok()) {
+        s = io_s;
+        files_in_dbname.clear();
+      }
+      for (const std::string& file : files_in_dbname) {
+        uint64_t number = 0;
+        FileType type = kWalFile;  // initialize
+        if (ParseFileName(file, &number, &type) && type == kDescriptorFile) {
+          uint64_t bytes;
+          s = env_->GetFileSize(DescriptorFileName(dbname_, number), &bytes);
+          if (s.ok() && bytes != 0) {
+            // Found non-empty MANIFEST (descriptor log), thus best-efforts
+            // recovery does not have to treat the db as empty.
+            manifest_path = dbname_ + "/" + file;
+            break;
+          }
+        }
+      }
+    }
+    if (s.IsNotFound()) {
+      if (immutable_db_options_.create_if_missing) {
+        s = NewDB(&files_in_dbname);
+        is_new_db = true;
+        if (!s.ok()) {
+          return s;
+        }
+      } else {
+        return Status::InvalidArgument(
+            current_fname, "does not exist (create_if_missing is false)");
+      }
+    } else if (s.ok()) {
+      if (immutable_db_options_.error_if_exists) {
+        return Status::InvalidArgument(dbname_,
+                                       "exists (error_if_exists is true)");
+      }
+    } else {
+      // Unexpected error reading file
+      assert(s.IsIOError());
+      return s;
+    }
+    // Verify compatibility of file_options_ and filesystem
+    {
+      std::unique_ptr<FSRandomAccessFile> idfile;
+      FileOptions customized_fs(file_options_);
+      customized_fs.use_direct_reads |=
+          immutable_db_options_.use_direct_io_for_flush_and_compaction;
+      const std::string& fname =
+          manifest_path.empty() ? current_fname : manifest_path;
+      s = fs_->NewRandomAccessFile(fname, customized_fs, &idfile, nullptr);
+      if (!s.ok()) {
+        std::string error_str = s.ToString();
+        // Check if unsupported Direct I/O is the root cause
+        customized_fs.use_direct_reads = false;
+        s = fs_->NewRandomAccessFile(fname, customized_fs, &idfile, nullptr);
+        if (s.ok()) {
+          return Status::InvalidArgument(
+              "Direct I/O is not supported by the specified DB.");
+        } else {
+          return Status::InvalidArgument(
+              "Found options incompatible with filesystem", error_str.c_str());
+        }
+      }
+    }
+  } else if (immutable_db_options_.best_efforts_recovery) {
+    assert(files_in_dbname.empty());
+    IOOptions io_opts;
+    io_opts.do_not_recurse = true;
+    Status s = immutable_db_options_.fs->GetChildren(
+        dbname_, io_opts, &files_in_dbname, /*IODebugContext*=*/nullptr);
+    if (s.IsNotFound()) {
+      return Status::InvalidArgument(dbname_,
+                                     "does not exist (open for read only)");
+    } else if (s.IsIOError()) {
+      return s;
+    }
+    assert(s.ok());
+  }
+  assert(db_id_.empty());
+  Status s;
+  bool missing_table_file = false;
+  if (!immutable_db_options_.best_efforts_recovery) {
+    s = versions_->Recover(column_families, read_only, &db_id_);
+  } else {
+    assert(!files_in_dbname.empty());
+    s = versions_->TryRecover(column_families, read_only, files_in_dbname,
+                              &db_id_, &missing_table_file);
+    if (s.ok()) {
+      // TryRecover may delete previous column_family_set_.
+      column_family_memtables_.reset(
+          new ColumnFamilyMemTablesImpl(versions_->GetColumnFamilySet()));
+    }
+  }
+  if (!s.ok()) {
+    return s;
+  }
+  if (s.ok() && !read_only) {
+    for (auto cfd : *versions_->GetColumnFamilySet()) {
+      // Try to trivially move files down the LSM tree to start from bottommost
+      // level when level_compaction_dynamic_level_bytes is enabled. This should
+      // only be useful when user is migrating to turning on this option.
+      // If a user is migrating from Level Compaction with a smaller level
+      // multiplier or from Universal Compaction, there may be too many
+      // non-empty levels and the trivial moves here are not sufficed for
+      // migration. Additional compactions are needed to drain unnecessary
+      // levels.
+      //
+      // Note that this step moves files down LSM without consulting
+      // SSTPartitioner. Further compactions are still needed if
+      // the user wants to partition SST files.
+      // Note that files moved in this step may not respect the compression
+      // option in target level.
+      if (cfd->ioptions()->compaction_style ==
+              CompactionStyle::kCompactionStyleLevel &&
+          cfd->ioptions()->level_compaction_dynamic_level_bytes &&
+          !cfd->GetLatestMutableCFOptions()->disable_auto_compactions) {
+        int to_level = cfd->ioptions()->num_levels - 1;
+        // last level is reserved
+        // allow_ingest_behind does not support Level Compaction,
+        // and per_key_placement can have infinite compaction loop for Level
+        // Compaction. Adjust to_level here just to be safe.
+        if (cfd->ioptions()->allow_ingest_behind ||
+            cfd->ioptions()->preclude_last_level_data_seconds > 0) {
+          to_level -= 1;
+        }
+        // Whether this column family has a level trivially moved
+        bool moved = false;
+        // Fill the LSM starting from to_level and going up one level at a time.
+        // Some loop invariants (when last level is not reserved):
+        // - levels in (from_level, to_level] are empty, and
+        // - levels in (to_level, last_level] are non-empty.
+        for (int from_level = to_level; from_level >= 0; --from_level) {
+          const std::vector<FileMetaData*>& level_files =
+              cfd->current()->storage_info()->LevelFiles(from_level);
+          if (level_files.empty() || from_level == 0) {
+            continue;
+          }
+          assert(from_level <= to_level);
+          // Trivial move files from `from_level` to `to_level`
+          if (from_level < to_level) {
+            if (!moved) {
+              // lsm_state will look like "[1,2,3,4,5,6,0]" for an LSM with
+              // 7 levels
+              std::string lsm_state = "[";
+              for (int i = 0; i < cfd->ioptions()->num_levels; ++i) {
+                lsm_state += std::to_string(
+                    cfd->current()->storage_info()->NumLevelFiles(i));
+                if (i < cfd->ioptions()->num_levels - 1) {
+                  lsm_state += ",";
+                }
+              }
+              lsm_state += "]";
+              ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                             "[%s] Trivially move files down the LSM when open "
+                             "with level_compaction_dynamic_level_bytes=true,"
+                             " lsm_state: %s (Files are moved only if DB "
+                             "Recovery is successful).",
+                             cfd->GetName().c_str(), lsm_state.c_str());
+              moved = true;
+            }
+            ROCKS_LOG_WARN(
+                immutable_db_options_.info_log,
+                "[%s] Moving %zu files from from_level-%d to from_level-%d",
+                cfd->GetName().c_str(), level_files.size(), from_level,
+                to_level);
+            VersionEdit edit;
+            edit.SetColumnFamily(cfd->GetID());
+            for (const FileMetaData* f : level_files) {
+              edit.DeleteFile(from_level, f->fd.GetNumber());
+              edit.AddFile(to_level, f->fd.GetNumber(), f->fd.GetPathId(),
+                           f->fd.GetFileSize(), f->smallest, f->largest,
+                           f->fd.smallest_seqno, f->fd.largest_seqno,
+                           f->marked_for_compaction,
+                           f->temperature,  // this can be different from
+                                            // `last_level_temperature`
+                           f->oldest_blob_file_number, f->oldest_ancester_time,
+                           f->file_creation_time, f->epoch_number,
+                           f->file_checksum, f->file_checksum_func_name,
+                           f->unique_id, f->compensated_range_deletion_size,
+                           f->tail_size);
+              ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                             "[%s] Moving #%" PRIu64
+                             " from from_level-%d to from_level-%d %" PRIu64
+                             " bytes\n",
+                             cfd->GetName().c_str(), f->fd.GetNumber(),
+                             from_level, to_level, f->fd.GetFileSize());
+            }
+            recovery_ctx->UpdateVersionEdits(cfd, edit);
+          }
+          --to_level;
+        }
+      }
+    }
+  }
+  s = SetupDBId(read_only, recovery_ctx);
+  ROCKS_LOG_INFO(immutable_db_options_.info_log, "DB ID: %s\n", db_id_.c_str());
+  if (s.ok() && !read_only) {
+    s = DeleteUnreferencedSstFiles(recovery_ctx);
+  }
+
+  if (immutable_db_options_.paranoid_checks && s.ok()) {
+    s = CheckConsistency();
+  }
+  if (s.ok() && !read_only) {
+    // TODO: share file descriptors (FSDirectory) with SetDirectories above
+    std::map<std::string, std::shared_ptr<FSDirectory>> created_dirs;
+    for (auto cfd : *versions_->GetColumnFamilySet()) {
+      s = cfd->AddDirectories(&created_dirs);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  }
+
+  std::vector<std::string> files_in_wal_dir;
+  if (s.ok()) {
+    // Initial max_total_in_memory_state_ before recovery wals. Log recovery
+    // may check this value to decide whether to flush.
+    max_total_in_memory_state_ = 0;
+    for (auto cfd : *versions_->GetColumnFamilySet()) {
+      auto* mutable_cf_options = cfd->GetLatestMutableCFOptions();
+      max_total_in_memory_state_ += mutable_cf_options->write_buffer_size *
+                                    mutable_cf_options->max_write_buffer_number;
+    }
+
+    SequenceNumber next_sequence(kMaxSequenceNumber);
+    default_cf_handle_ = new ColumnFamilyHandleImpl(
+        versions_->GetColumnFamilySet()->GetDefault(), this, &mutex_);
+    default_cf_internal_stats_ = default_cf_handle_->cfd()->internal_stats();
+
+    // Recover from all newer log files than the ones named in the
+    // descriptor (new log files may have been added by the previous
+    // incarnation without registering them in the descriptor).
+    //
+    // Note that prev_log_number() is no longer used, but we pay
+    // attention to it in case we are recovering a database
+    // produced by an older version of rocksdb.
+    auto wal_dir = immutable_db_options_.GetWalDir();
+    if (!immutable_db_options_.best_efforts_recovery) {
+      IOOptions io_opts;
+      io_opts.do_not_recurse = true;
+      s = immutable_db_options_.fs->GetChildren(
+          wal_dir, io_opts, &files_in_wal_dir, /*IODebugContext*=*/nullptr);
+    }
+    if (s.IsNotFound()) {
+      return Status::InvalidArgument("wal_dir not found", wal_dir);
+    } else if (!s.ok()) {
+      return s;
+    }
+
+    std::unordered_map<uint64_t, std::string> wal_files;
+    for (const auto& file : files_in_wal_dir) {
+      uint64_t number;
+      FileType type;
+      if (ParseFileName(file, &number, &type) && type == kWalFile) {
+        if (is_new_db) {
+          return Status::Corruption(
+              "While creating a new Db, wal_dir contains "
+              "existing log file: ",
+              file);
+        } else {
+          wal_files[number] = LogFileName(wal_dir, number);
+        }
+      }
+    }
+
+    if (immutable_db_options_.track_and_verify_wals_in_manifest) {
+      if (!immutable_db_options_.best_efforts_recovery) {
+        // Verify WALs in MANIFEST.
+        s = versions_->GetWalSet().CheckWals(env_, wal_files);
+      }  // else since best effort recovery does not recover from WALs, no need
+         // to check WALs.
+    } else if (!versions_->GetWalSet().GetWals().empty()) {
+      // Tracking is disabled, clear previously tracked WALs from MANIFEST,
+      // otherwise, in the future, if WAL tracking is enabled again,
+      // since the WALs deleted when WAL tracking is disabled are not persisted
+      // into MANIFEST, WAL check may fail.
+      VersionEdit edit;
+      WalNumber max_wal_number =
+          versions_->GetWalSet().GetWals().rbegin()->first;
+      edit.DeleteWalsBefore(max_wal_number + 1);
+      assert(recovery_ctx != nullptr);
+      assert(versions_->GetColumnFamilySet() != nullptr);
+      recovery_ctx->UpdateVersionEdits(
+          versions_->GetColumnFamilySet()->GetDefault(), edit);
+    }
+    if (!s.ok()) {
+      return s;
+    }
+
+    if (!wal_files.empty()) {
+      if (error_if_wal_file_exists) {
+        return Status::Corruption(
+            "The db was opened in readonly mode with error_if_wal_file_exists"
+            "flag but a WAL file already exists");
+      } else if (error_if_data_exists_in_wals) {
+        for (auto& wal_file : wal_files) {
+          uint64_t bytes;
+          s = env_->GetFileSize(wal_file.second, &bytes);
+          if (s.ok()) {
+            if (bytes > 0) {
+              return Status::Corruption(
+                  "error_if_data_exists_in_wals is set but there are data "
+                  " in WAL files.");
+            }
+          }
+        }
+      }
+    }
+
+    if (!wal_files.empty()) {
+      // Recover in the order in which the wals were generated
+      std::vector<uint64_t> wals;
+      wals.reserve(wal_files.size());
+      for (const auto& wal_file : wal_files) {
+        wals.push_back(wal_file.first);
+      }
+      std::sort(wals.begin(), wals.end());
+
+      bool corrupted_wal_found = false;
+      s = RecoverLogFiles(wals, &next_sequence, read_only, &corrupted_wal_found,
+                          recovery_ctx);
+      if (corrupted_wal_found && recovered_seq != nullptr) {
+        *recovered_seq = next_sequence;
+      }
+      if (!s.ok()) {
+        // Clear memtables if recovery failed
+        for (auto cfd : *versions_->GetColumnFamilySet()) {
+          cfd->CreateNewMemtable(*cfd->GetLatestMutableCFOptions(),
+                                 kMaxSequenceNumber);
+        }
+      }
+    }
+  }
+
+  if (read_only) {
+    // If we are opening as read-only, we need to update options_file_number_
+    // to reflect the most recent OPTIONS file. It does not matter for regular
+    // read-write db instance because options_file_number_ will later be
+    // updated to versions_->NewFileNumber() in RenameTempFileToOptionsFile.
+    std::vector<std::string> filenames;
+    if (s.ok()) {
+      const std::string normalized_dbname = NormalizePath(dbname_);
+      const std::string normalized_wal_dir =
+          NormalizePath(immutable_db_options_.GetWalDir());
+      if (immutable_db_options_.best_efforts_recovery) {
+        filenames = std::move(files_in_dbname);
+      } else if (normalized_dbname == normalized_wal_dir) {
+        filenames = std::move(files_in_wal_dir);
+      } else {
+        IOOptions io_opts;
+        io_opts.do_not_recurse = true;
+        s = immutable_db_options_.fs->GetChildren(
+            GetName(), io_opts, &filenames, /*IODebugContext*=*/nullptr);
+      }
+    }
+    if (s.ok()) {
+      uint64_t number = 0;
+      uint64_t options_file_number = 0;
+      FileType type;
+      for (const auto& fname : filenames) {
+        if (ParseFileName(fname, &number, &type) && type == kOptionsFile) {
+          options_file_number = std::max(number, options_file_number);
+        }
+      }
+      versions_->options_file_number_ = options_file_number;
+      uint64_t options_file_size = 0;
+      if (options_file_number > 0) {
+        s = env_->GetFileSize(OptionsFileName(GetName(), options_file_number),
+                              &options_file_size);
+      }
+      versions_->options_file_size_ = options_file_size;
+    }
+  }
+  return s;
+}
+
+Status DBImpl::PersistentStatsProcessFormatVersion() {
+  mutex_.AssertHeld();
+  Status s;
+  // persist version when stats CF doesn't exist
+  bool should_persist_format_version = !persistent_stats_cfd_exists_;
+  mutex_.Unlock();
+  if (persistent_stats_cfd_exists_) {
+    // Check persistent stats format version compatibility. Drop and recreate
+    // persistent stats CF if format version is incompatible
+    uint64_t format_version_recovered = 0;
+    Status s_format = DecodePersistentStatsVersionNumber(
+        this, StatsVersionKeyType::kFormatVersion, &format_version_recovered);
+    uint64_t compatible_version_recovered = 0;
+    Status s_compatible = DecodePersistentStatsVersionNumber(
+        this, StatsVersionKeyType::kCompatibleVersion,
+        &compatible_version_recovered);
+    // abort reading from existing stats CF if any of following is true:
+    // 1. failed to read format version or compatible version from disk
+    // 2. sst's format version is greater than current format version, meaning
+    // this sst is encoded with a newer RocksDB release, and current compatible
+    // version is below the sst's compatible version
+    if (!s_format.ok() || !s_compatible.ok() ||
+        (kStatsCFCurrentFormatVersion < format_version_recovered &&
+         kStatsCFCompatibleFormatVersion < compatible_version_recovered)) {
+      if (!s_format.ok() || !s_compatible.ok()) {
+        ROCKS_LOG_WARN(
+            immutable_db_options_.info_log,
+            "Recreating persistent stats column family since reading "
+            "persistent stats version key failed. Format key: %s, compatible "
+            "key: %s",
+            s_format.ToString().c_str(), s_compatible.ToString().c_str());
+      } else {
+        ROCKS_LOG_WARN(
+            immutable_db_options_.info_log,
+            "Recreating persistent stats column family due to corrupted or "
+            "incompatible format version. Recovered format: %" PRIu64
+            "; recovered format compatible since: %" PRIu64 "\n",
+            format_version_recovered, compatible_version_recovered);
+      }
+      s = DropColumnFamily(persist_stats_cf_handle_);
+      if (s.ok()) {
+        s = DestroyColumnFamilyHandle(persist_stats_cf_handle_);
+      }
+      ColumnFamilyHandle* handle = nullptr;
+      if (s.ok()) {
+        ColumnFamilyOptions cfo;
+        OptimizeForPersistentStats(&cfo);
+        s = CreateColumnFamily(cfo, kPersistentStatsColumnFamilyName, &handle);
+      }
+      if (s.ok()) {
+        persist_stats_cf_handle_ = static_cast<ColumnFamilyHandleImpl*>(handle);
+        // should also persist version here because old stats CF is discarded
+        should_persist_format_version = true;
+      }
+    }
+  }
+  if (should_persist_format_version) {
+    // Persistent stats CF being created for the first time, need to write
+    // format version key
+    WriteBatch batch;
+    if (s.ok()) {
+      s = batch.Put(persist_stats_cf_handle_, kFormatVersionKeyString,
+                    std::to_string(kStatsCFCurrentFormatVersion));
+    }
+    if (s.ok()) {
+      s = batch.Put(persist_stats_cf_handle_, kCompatibleVersionKeyString,
+                    std::to_string(kStatsCFCompatibleFormatVersion));
+    }
+    if (s.ok()) {
+      WriteOptions wo;
+      wo.low_pri = true;
+      wo.no_slowdown = true;
+      wo.sync = false;
+      s = Write(wo, &batch);
+    }
+  }
+  mutex_.Lock();
+  return s;
+}
+
+Status DBImpl::InitPersistStatsColumnFamily() {
+  mutex_.AssertHeld();
+  assert(!persist_stats_cf_handle_);
+  ColumnFamilyData* persistent_stats_cfd =
+      versions_->GetColumnFamilySet()->GetColumnFamily(
+          kPersistentStatsColumnFamilyName);
+  persistent_stats_cfd_exists_ = persistent_stats_cfd != nullptr;
+
+  Status s;
+  if (persistent_stats_cfd != nullptr) {
+    // We are recovering from a DB which already contains persistent stats CF,
+    // the CF is already created in VersionSet::ApplyOneVersionEdit, but
+    // column family handle was not. Need to explicitly create handle here.
+    persist_stats_cf_handle_ =
+        new ColumnFamilyHandleImpl(persistent_stats_cfd, this, &mutex_);
+  } else {
+    mutex_.Unlock();
+    ColumnFamilyHandle* handle = nullptr;
+    ColumnFamilyOptions cfo;
+    OptimizeForPersistentStats(&cfo);
+    s = CreateColumnFamily(cfo, kPersistentStatsColumnFamilyName, &handle);
+    persist_stats_cf_handle_ = static_cast<ColumnFamilyHandleImpl*>(handle);
+    mutex_.Lock();
+  }
+  return s;
+}
+
+Status DBImpl::LogAndApplyForRecovery(const RecoveryContext& recovery_ctx) {
+  mutex_.AssertHeld();
+  assert(versions_->descriptor_log_ == nullptr);
+  const ReadOptions read_options(Env::IOActivity::kDBOpen);
+  Status s = versions_->LogAndApply(
+      recovery_ctx.cfds_, recovery_ctx.mutable_cf_opts_, read_options,
+      recovery_ctx.edit_lists_, &mutex_, directories_.GetDbDir());
+  if (s.ok() && !(recovery_ctx.files_to_delete_.empty())) {
+    mutex_.Unlock();
+    for (const auto& fname : recovery_ctx.files_to_delete_) {
+      s = env_->DeleteFile(fname);
+      if (!s.ok()) {
+        break;
+      }
+    }
+    mutex_.Lock();
+  }
+  return s;
+}
+
+void DBImpl::InvokeWalFilterIfNeededOnColumnFamilyToWalNumberMap() {
+  if (immutable_db_options_.wal_filter == nullptr) {
+    return;
+  }
+  assert(immutable_db_options_.wal_filter != nullptr);
+  WalFilter& wal_filter = *(immutable_db_options_.wal_filter);
+
+  std::map<std::string, uint32_t> cf_name_id_map;
+  std::map<uint32_t, uint64_t> cf_lognumber_map;
+  assert(versions_);
+  assert(versions_->GetColumnFamilySet());
+  for (auto cfd : *versions_->GetColumnFamilySet()) {
+    assert(cfd);
+    cf_name_id_map.insert(std::make_pair(cfd->GetName(), cfd->GetID()));
+    cf_lognumber_map.insert(std::make_pair(cfd->GetID(), cfd->GetLogNumber()));
+  }
+
+  wal_filter.ColumnFamilyLogNumberMap(cf_lognumber_map, cf_name_id_map);
+}
+
+bool DBImpl::InvokeWalFilterIfNeededOnWalRecord(uint64_t wal_number,
+                                                const std::string& wal_fname,
+                                                log::Reader::Reporter& reporter,
+                                                Status& status,
+                                                bool& stop_replay,
+                                                WriteBatch& batch) {
+  if (immutable_db_options_.wal_filter == nullptr) {
+    return true;
+  }
+  assert(immutable_db_options_.wal_filter != nullptr);
+  WalFilter& wal_filter = *(immutable_db_options_.wal_filter);
+
+  WriteBatch new_batch;
+  bool batch_changed = false;
+
+  bool process_current_record = true;
+
+  WalFilter::WalProcessingOption wal_processing_option =
+      wal_filter.LogRecordFound(wal_number, wal_fname, batch, &new_batch,
+                                &batch_changed);
+
+  switch (wal_processing_option) {
+    case WalFilter::WalProcessingOption::kContinueProcessing:
+      // do nothing, proceeed normally
+      break;
+    case WalFilter::WalProcessingOption::kIgnoreCurrentRecord:
+      // skip current record
+      process_current_record = false;
+      break;
+    case WalFilter::WalProcessingOption::kStopReplay:
+      // skip current record and stop replay
+      process_current_record = false;
+      stop_replay = true;
+      break;
+    case WalFilter::WalProcessingOption::kCorruptedRecord: {
+      status = Status::Corruption("Corruption reported by Wal Filter ",
+                                  wal_filter.Name());
+      MaybeIgnoreError(&status);
+      if (!status.ok()) {
+        process_current_record = false;
+        reporter.Corruption(batch.GetDataSize(), status);
+      }
+      break;
+    }
+    default: {
+      // logical error which should not happen. If RocksDB throws, we would
+      // just do `throw std::logic_error`.
+      assert(false);
+      status = Status::NotSupported(
+          "Unknown WalProcessingOption returned by Wal Filter ",
+          wal_filter.Name());
+      MaybeIgnoreError(&status);
+      if (!status.ok()) {
+        // Ignore the error with current record processing.
+        stop_replay = true;
+      }
+      break;
+    }
+  }
+
+  if (!process_current_record) {
+    return false;
+  }
+
+  if (batch_changed) {
+    // Make sure that the count in the new batch is
+    // within the orignal count.
+    int new_count = WriteBatchInternal::Count(&new_batch);
+    int original_count = WriteBatchInternal::Count(&batch);
+    if (new_count > original_count) {
+      ROCKS_LOG_FATAL(
+          immutable_db_options_.info_log,
+          "Recovering log #%" PRIu64
+          " mode %d log filter %s returned "
+          "more records (%d) than original (%d) which is not allowed. "
+          "Aborting recovery.",
+          wal_number, static_cast<int>(immutable_db_options_.wal_recovery_mode),
+          wal_filter.Name(), new_count, original_count);
+      status = Status::NotSupported(
+          "More than original # of records "
+          "returned by Wal Filter ",
+          wal_filter.Name());
+      return false;
+    }
+    // Set the same sequence number in the new_batch
+    // as the original batch.
+    WriteBatchInternal::SetSequence(&new_batch,
+                                    WriteBatchInternal::Sequence(&batch));
+    batch = new_batch;
+  }
+  return true;
+}
+
+// REQUIRES: wal_numbers are sorted in ascending order
+Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
+                               SequenceNumber* next_sequence, bool read_only,
+                               bool* corrupted_wal_found,
+                               RecoveryContext* recovery_ctx) {
+  struct LogReporter : public log::Reader::Reporter {
+    Env* env;
+    Logger* info_log;
+    const char* fname;
+    Status* status;  // nullptr if immutable_db_options_.paranoid_checks==false
+    void Corruption(size_t bytes, const Status& s) override {
+      ROCKS_LOG_WARN(info_log, "%s%s: dropping %d bytes; %s",
+                     (status == nullptr ? "(ignoring error) " : ""), fname,
+                     static_cast<int>(bytes), s.ToString().c_str());
+      if (status != nullptr && status->ok()) {
+        *status = s;
+      }
+    }
+  };
+
+  mutex_.AssertHeld();
+  Status status;
+  std::unordered_map<int, VersionEdit> version_edits;
+  // no need to refcount because iteration is under mutex
+  for (auto cfd : *versions_->GetColumnFamilySet()) {
+    VersionEdit edit;
+    edit.SetColumnFamily(cfd->GetID());
+    version_edits.insert({cfd->GetID(), edit});
+  }
+  int job_id = next_job_id_.fetch_add(1);
+  {
+    auto stream = event_logger_.Log();
+    stream << "job" << job_id << "event"
+           << "recovery_started";
+    stream << "wal_files";
+    stream.StartArray();
+    for (auto wal_number : wal_numbers) {
+      stream << wal_number;
+    }
+    stream.EndArray();
+  }
+
+  // No-op for immutable_db_options_.wal_filter == nullptr.
+  InvokeWalFilterIfNeededOnColumnFamilyToWalNumberMap();
+
+  bool stop_replay_by_wal_filter = false;
+  bool stop_replay_for_corruption = false;
+  bool flushed = false;
+  uint64_t corrupted_wal_number = kMaxSequenceNumber;
+  uint64_t min_wal_number = MinLogNumberToKeep();
+  if (!allow_2pc()) {
+    // In non-2pc mode, we skip WALs that do not back unflushed data.
+    min_wal_number =
+        std::max(min_wal_number, versions_->MinLogNumberWithUnflushedData());
+  }
+  for (auto wal_number : wal_numbers) {
+    if (wal_number < min_wal_number) {
+      ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                     "Skipping log #%" PRIu64
+                     " since it is older than min log to keep #%" PRIu64,
+                     wal_number, min_wal_number);
+      continue;
+    }
+    // The previous incarnation may not have written any MANIFEST
+    // records after allocating this log number.  So we manually
+    // update the file number allocation counter in VersionSet.
+    versions_->MarkFileNumberUsed(wal_number);
+    // Open the log file
+    std::string fname =
+        LogFileName(immutable_db_options_.GetWalDir(), wal_number);
+
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "Recovering log #%" PRIu64 " mode %d", wal_number,
+                   static_cast<int>(immutable_db_options_.wal_recovery_mode));
+    auto logFileDropped = [this, &fname]() {
+      uint64_t bytes;
+      if (env_->GetFileSize(fname, &bytes).ok()) {
+        auto info_log = immutable_db_options_.info_log.get();
+        ROCKS_LOG_WARN(info_log, "%s: dropping %d bytes", fname.c_str(),
+                       static_cast<int>(bytes));
+      }
+    };
+    if (stop_replay_by_wal_filter) {
+      logFileDropped();
+      continue;
+    }
+
+    std::unique_ptr<SequentialFileReader> file_reader;
+    {
+      std::unique_ptr<FSSequentialFile> file;
+      status = fs_->NewSequentialFile(
+          fname, fs_->OptimizeForLogRead(file_options_), &file, nullptr);
+      if (!status.ok()) {
+        MaybeIgnoreError(&status);
+        if (!status.ok()) {
+          return status;
+        } else {
+          // Fail with one log file, but that's ok.
+          // Try next one.
+          continue;
+        }
+      }
+      file_reader.reset(new SequentialFileReader(
+          std::move(file), fname, immutable_db_options_.log_readahead_size,
+          io_tracer_));
+    }
+
+    // Create the log reader.
+    LogReporter reporter;
+    reporter.env = env_;
+    reporter.info_log = immutable_db_options_.info_log.get();
+    reporter.fname = fname.c_str();
+    if (!immutable_db_options_.paranoid_checks ||
+        immutable_db_options_.wal_recovery_mode ==
+            WALRecoveryMode::kSkipAnyCorruptedRecords) {
+      reporter.status = nullptr;
+    } else {
+      reporter.status = &status;
+    }
+    // We intentially make log::Reader do checksumming even if
+    // paranoid_checks==false so that corruptions cause entire commits
+    // to be skipped instead of propagating bad information (like overly
+    // large sequence numbers).
+    log::Reader reader(immutable_db_options_.info_log, std::move(file_reader),
+                       &reporter, true /*checksum*/, wal_number);
+
+    // Determine if we should tolerate incomplete records at the tail end of the
+    // Read all the records and add to a memtable
+    std::string scratch;
+    Slice record;
+
+    TEST_SYNC_POINT_CALLBACK("DBImpl::RecoverLogFiles:BeforeReadWal",
+                             /*arg=*/nullptr);
+    uint64_t record_checksum;
+    while (!stop_replay_by_wal_filter &&
+           reader.ReadRecord(&record, &scratch,
+                             immutable_db_options_.wal_recovery_mode,
+                             &record_checksum) &&
+           status.ok()) {
+      if (record.size() < WriteBatchInternal::kHeader) {
+        reporter.Corruption(record.size(),
+                            Status::Corruption("log record too small"));
+        continue;
+      }
+
+      // We create a new batch and initialize with a valid prot_info_ to store
+      // the data checksums
+      WriteBatch batch;
+
+      status = WriteBatchInternal::SetContents(&batch, record);
+      if (!status.ok()) {
+        return status;
+      }
+      TEST_SYNC_POINT_CALLBACK(
+          "DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:batch", &batch);
+      TEST_SYNC_POINT_CALLBACK(
+          "DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:checksum",
+          &record_checksum);
+      status = WriteBatchInternal::UpdateProtectionInfo(
+          &batch, 8 /* bytes_per_key */, &record_checksum);
+      if (!status.ok()) {
+        return status;
+      }
+
+      SequenceNumber sequence = WriteBatchInternal::Sequence(&batch);
+
+      if (immutable_db_options_.wal_recovery_mode ==
+          WALRecoveryMode::kPointInTimeRecovery) {
+        // In point-in-time recovery mode, if sequence id of log files are
+        // consecutive, we continue recovery despite corruption. This could
+        // happen when we open and write to a corrupted DB, where sequence id
+        // will start from the last sequence id we recovered.
+        if (sequence == *next_sequence) {
+          stop_replay_for_corruption = false;
+        }
+        if (stop_replay_for_corruption) {
+          logFileDropped();
+          break;
+        }
+      }
+
+      // For the default case of wal_filter == nullptr, always performs no-op
+      // and returns true.
+      if (!InvokeWalFilterIfNeededOnWalRecord(wal_number, fname, reporter,
+                                              status, stop_replay_by_wal_filter,
+                                              batch)) {
+        continue;
+      }
+
+      // If column family was not found, it might mean that the WAL write
+      // batch references to the column family that was dropped after the
+      // insert. We don't want to fail the whole write batch in that case --
+      // we just ignore the update.
+      // That's why we set ignore missing column families to true
+      bool has_valid_writes = false;
+      status = WriteBatchInternal::InsertInto(
+          &batch, column_family_memtables_.get(), &flush_scheduler_,
+          &trim_history_scheduler_, true, wal_number, this,
+          false /* concurrent_memtable_writes */, next_sequence,
+          &has_valid_writes, seq_per_batch_, batch_per_txn_);
+      MaybeIgnoreError(&status);
+      if (!status.ok()) {
+        // We are treating this as a failure while reading since we read valid
+        // blocks that do not form coherent data
+        reporter.Corruption(record.size(), status);
+        continue;
+      }
+
+      if (has_valid_writes && !read_only) {
+        // we can do this because this is called before client has access to the
+        // DB and there is only a single thread operating on DB
+        ColumnFamilyData* cfd;
+
+        while ((cfd = flush_scheduler_.TakeNextColumnFamily()) != nullptr) {
+          cfd->UnrefAndTryDelete();
+          // If this asserts, it means that InsertInto failed in
+          // filtering updates to already-flushed column families
+          assert(cfd->GetLogNumber() <= wal_number);
+          auto iter = version_edits.find(cfd->GetID());
+          assert(iter != version_edits.end());
+          VersionEdit* edit = &iter->second;
+          status = WriteLevel0TableForRecovery(job_id, cfd, cfd->mem(), edit);
+          if (!status.ok()) {
+            // Reflect errors immediately so that conditions like full
+            // file-systems cause the DB::Open() to fail.
+            return status;
+          }
+          flushed = true;
+
+          cfd->CreateNewMemtable(*cfd->GetLatestMutableCFOptions(),
+                                 *next_sequence);
+        }
+      }
+    }
+
+    if (!status.ok()) {
+      if (status.IsNotSupported()) {
+        // We should not treat NotSupported as corruption. It is rather a clear
+        // sign that we are processing a WAL that is produced by an incompatible
+        // version of the code.
+        return status;
+      }
+      if (immutable_db_options_.wal_recovery_mode ==
+          WALRecoveryMode::kSkipAnyCorruptedRecords) {
+        // We should ignore all errors unconditionally
+        status = Status::OK();
+      } else if (immutable_db_options_.wal_recovery_mode ==
+                 WALRecoveryMode::kPointInTimeRecovery) {
+        if (status.IsIOError()) {
+          ROCKS_LOG_ERROR(immutable_db_options_.info_log,
+                          "IOError during point-in-time reading log #%" PRIu64
+                          " seq #%" PRIu64
+                          ". %s. This likely mean loss of synced WAL, "
+                          "thus recovery fails.",
+                          wal_number, *next_sequence,
+                          status.ToString().c_str());
+          return status;
+        }
+        // We should ignore the error but not continue replaying
+        status = Status::OK();
+        stop_replay_for_corruption = true;
+        corrupted_wal_number = wal_number;
+        if (corrupted_wal_found != nullptr) {
+          *corrupted_wal_found = true;
+        }
+        ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                       "Point in time recovered to log #%" PRIu64
+                       " seq #%" PRIu64,
+                       wal_number, *next_sequence);
+      } else {
+        assert(immutable_db_options_.wal_recovery_mode ==
+                   WALRecoveryMode::kTolerateCorruptedTailRecords ||
+               immutable_db_options_.wal_recovery_mode ==
+                   WALRecoveryMode::kAbsoluteConsistency);
+        return status;
+      }
+    }
+
+    flush_scheduler_.Clear();
+    trim_history_scheduler_.Clear();
+    auto last_sequence = *next_sequence - 1;
+    if ((*next_sequence != kMaxSequenceNumber) &&
+        (versions_->LastSequence() <= last_sequence)) {
+      versions_->SetLastAllocatedSequence(last_sequence);
+      versions_->SetLastPublishedSequence(last_sequence);
+      versions_->SetLastSequence(last_sequence);
+    }
+  }
+  // Compare the corrupted log number to all columnfamily's current log number.
+  // Abort Open() if any column family's log number is greater than
+  // the corrupted log number, which means CF contains data beyond the point of
+  // corruption. This could during PIT recovery when the WAL is corrupted and
+  // some (but not all) CFs are flushed
+  // Exclude the PIT case where no log is dropped after the corruption point.
+  // This is to cover the case for empty wals after corrupted log, in which we
+  // don't reset stop_replay_for_corruption.
+  if (stop_replay_for_corruption == true &&
+      (immutable_db_options_.wal_recovery_mode ==
+           WALRecoveryMode::kPointInTimeRecovery ||
+       immutable_db_options_.wal_recovery_mode ==
+           WALRecoveryMode::kTolerateCorruptedTailRecords)) {
+    for (auto cfd : *versions_->GetColumnFamilySet()) {
+      // One special case cause cfd->GetLogNumber() > corrupted_wal_number but
+      // the CF is still consistent: If a new column family is created during
+      // the flush and the WAL sync fails at the same time, the new CF points to
+      // the new WAL but the old WAL is curropted. Since the new CF is empty, it
+      // is still consistent. We add the check of CF sst file size to avoid the
+      // false positive alert.
+
+      // Note that, the check of (cfd->GetLiveSstFilesSize() > 0) may leads to
+      // the ignorance of a very rare inconsistency case caused in data
+      // canclation. One CF is empty due to KV deletion. But those operations
+      // are in the WAL. If the WAL is corrupted, the status of this CF might
+      // not be consistent with others. However, the consistency check will be
+      // bypassed due to empty CF.
+      // TODO: a better and complete implementation is needed to ensure strict
+      // consistency check in WAL recovery including hanlding the tailing
+      // issues.
+      if (cfd->GetLogNumber() > corrupted_wal_number &&
+          cfd->GetLiveSstFilesSize() > 0) {
+        ROCKS_LOG_ERROR(immutable_db_options_.info_log,
+                        "Column family inconsistency: SST file contains data"
+                        " beyond the point of corruption.");
+        return Status::Corruption("SST file is ahead of WALs in CF " +
+                                  cfd->GetName());
+      }
+    }
+  }
+
+  // True if there's any data in the WALs; if not, we can skip re-processing
+  // them later
+  bool data_seen = false;
+  if (!read_only) {
+    // no need to refcount since client still doesn't have access
+    // to the DB and can not drop column families while we iterate
+    const WalNumber max_wal_number = wal_numbers.back();
+    for (auto cfd : *versions_->GetColumnFamilySet()) {
+      auto iter = version_edits.find(cfd->GetID());
+      assert(iter != version_edits.end());
+      VersionEdit* edit = &iter->second;
+
+      if (cfd->GetLogNumber() > max_wal_number) {
+        // Column family cfd has already flushed the data
+        // from all wals. Memtable has to be empty because
+        // we filter the updates based on wal_number
+        // (in WriteBatch::InsertInto)
+        assert(cfd->mem()->GetFirstSequenceNumber() == 0);
+        assert(edit->NumEntries() == 0);
+        continue;
+      }
+
+      TEST_SYNC_POINT_CALLBACK(
+          "DBImpl::RecoverLogFiles:BeforeFlushFinalMemtable", /*arg=*/nullptr);
+
+      // flush the final memtable (if non-empty)
+      if (cfd->mem()->GetFirstSequenceNumber() != 0) {
+        // If flush happened in the middle of recovery (e.g. due to memtable
+        // being full), we flush at the end. Otherwise we'll need to record
+        // where we were on last flush, which make the logic complicated.
+        if (flushed || !immutable_db_options_.avoid_flush_during_recovery) {
+          status = WriteLevel0TableForRecovery(job_id, cfd, cfd->mem(), edit);
+          if (!status.ok()) {
+            // Recovery failed
+            break;
+          }
+          flushed = true;
+
+          cfd->CreateNewMemtable(*cfd->GetLatestMutableCFOptions(),
+                                 versions_->LastSequence());
+        }
+        data_seen = true;
+      }
+
+      // Update the log number info in the version edit corresponding to this
+      // column family. Note that the version edits will be written to MANIFEST
+      // together later.
+      // writing wal_number in the manifest means that any log file
+      // with number strongly less than (wal_number + 1) is already
+      // recovered and should be ignored on next reincarnation.
+      // Since we already recovered max_wal_number, we want all wals
+      // with numbers `<= max_wal_number` (includes this one) to be ignored
+      if (flushed || cfd->mem()->GetFirstSequenceNumber() == 0) {
+        edit->SetLogNumber(max_wal_number + 1);
+      }
+    }
+    if (status.ok()) {
+      // we must mark the next log number as used, even though it's
+      // not actually used. that is because VersionSet assumes
+      // VersionSet::next_file_number_ always to be strictly greater than any
+      // log number
+      versions_->MarkFileNumberUsed(max_wal_number + 1);
+      assert(recovery_ctx != nullptr);
+
+      for (auto* cfd : *versions_->GetColumnFamilySet()) {
+        auto iter = version_edits.find(cfd->GetID());
+        assert(iter != version_edits.end());
+        recovery_ctx->UpdateVersionEdits(cfd, iter->second);
+      }
+
+      if (flushed || !data_seen) {
+        VersionEdit wal_deletion;
+        if (immutable_db_options_.track_and_verify_wals_in_manifest) {
+          wal_deletion.DeleteWalsBefore(max_wal_number + 1);
+        }
+        if (!allow_2pc()) {
+          // In non-2pc mode, flushing the memtables of the column families
+          // means we can advance min_log_number_to_keep.
+          wal_deletion.SetMinLogNumberToKeep(max_wal_number + 1);
+        }
+        assert(versions_->GetColumnFamilySet() != nullptr);
+        recovery_ctx->UpdateVersionEdits(
+            versions_->GetColumnFamilySet()->GetDefault(), wal_deletion);
+      }
+    }
+  }
+
+  if (status.ok()) {
+    if (data_seen && !flushed) {
+      status = RestoreAliveLogFiles(wal_numbers);
+    } else if (!wal_numbers.empty()) {  // If there's no data in the WAL, or we
+                                        // flushed all the data, still
+      // truncate the log file. If the process goes into a crash loop before
+      // the file is deleted, the preallocated space will never get freed.
+      const bool truncate = !read_only;
+      GetLogSizeAndMaybeTruncate(wal_numbers.back(), truncate, nullptr)
+          .PermitUncheckedError();
+    }
+  }
+
+  event_logger_.Log() << "job" << job_id << "event"
+                      << "recovery_finished";
+
+  return status;
+}
+
+Status DBImpl::GetLogSizeAndMaybeTruncate(uint64_t wal_number, bool truncate,
+                                          LogFileNumberSize* log_ptr) {
+  LogFileNumberSize log(wal_number);
+  std::string fname =
+      LogFileName(immutable_db_options_.GetWalDir(), wal_number);
+  Status s;
+  // This gets the appear size of the wals, not including preallocated space.
+  s = env_->GetFileSize(fname, &log.size);
+  TEST_SYNC_POINT_CALLBACK("DBImpl::GetLogSizeAndMaybeTruncate:0", /*arg=*/&s);
+  if (s.ok() && truncate) {
+    std::unique_ptr<FSWritableFile> last_log;
+    Status truncate_status = fs_->ReopenWritableFile(
+        fname,
+        fs_->OptimizeForLogWrite(
+            file_options_,
+            BuildDBOptions(immutable_db_options_, mutable_db_options_)),
+        &last_log, nullptr);
+    if (truncate_status.ok()) {
+      truncate_status = last_log->Truncate(log.size, IOOptions(), nullptr);
+    }
+    if (truncate_status.ok()) {
+      truncate_status = last_log->Close(IOOptions(), nullptr);
+    }
+    // Not a critical error if fail to truncate.
+    if (!truncate_status.ok() && !truncate_status.IsNotSupported()) {
+      ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                     "Failed to truncate log #%" PRIu64 ": %s", wal_number,
+                     truncate_status.ToString().c_str());
+    }
+  }
+  if (log_ptr) {
+    *log_ptr = log;
+  }
+  return s;
+}
+
+Status DBImpl::RestoreAliveLogFiles(const std::vector<uint64_t>& wal_numbers) {
+  if (wal_numbers.empty()) {
+    return Status::OK();
+  }
+  Status s;
+  mutex_.AssertHeld();
+  assert(immutable_db_options_.avoid_flush_during_recovery);
+  // Mark these as alive so they'll be considered for deletion later by
+  // FindObsoleteFiles()
+  total_log_size_ = 0;
+  log_empty_ = false;
+  uint64_t min_wal_with_unflushed_data =
+      versions_->MinLogNumberWithUnflushedData();
+  for (auto wal_number : wal_numbers) {
+    if (!allow_2pc() && wal_number < min_wal_with_unflushed_data) {
+      // In non-2pc mode, the WAL files not backing unflushed data are not
+      // alive, thus should not be added to the alive_log_files_.
+      continue;
+    }
+    // We preallocate space for wals, but then after a crash and restart, those
+    // preallocated space are not needed anymore. It is likely only the last
+    // log has such preallocated space, so we only truncate for the last log.
+    LogFileNumberSize log;
+    s = GetLogSizeAndMaybeTruncate(
+        wal_number, /*truncate=*/(wal_number == wal_numbers.back()), &log);
+    if (!s.ok()) {
+      break;
+    }
+    total_log_size_ += log.size;
+    alive_log_files_.push_back(log);
+  }
+  return s;
+}
+
+Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
+                                           MemTable* mem, VersionEdit* edit) {
+  mutex_.AssertHeld();
+  assert(cfd);
+  assert(cfd->imm());
+  // The immutable memtable list must be empty.
+  assert(std::numeric_limits<uint64_t>::max() ==
+         cfd->imm()->GetEarliestMemTableID());
+
+  const uint64_t start_micros = immutable_db_options_.clock->NowMicros();
+
+  FileMetaData meta;
+  std::vector<BlobFileAddition> blob_file_additions;
+
+  std::unique_ptr<std::list<uint64_t>::iterator> pending_outputs_inserted_elem(
+      new std::list<uint64_t>::iterator(
+          CaptureCurrentFileNumberInPendingOutputs()));
+  meta.fd = FileDescriptor(versions_->NewFileNumber(), 0, 0);
+  ReadOptions ro;
+  ro.total_order_seek = true;
+  ro.io_activity = Env::IOActivity::kDBOpen;
+  Arena arena;
+  Status s;
+  TableProperties table_properties;
+  {
+    ScopedArenaIterator iter(mem->NewIterator(ro, &arena));
+    ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
+                    "[%s] [WriteLevel0TableForRecovery]"
+                    " Level-0 table #%" PRIu64 ": started",
+                    cfd->GetName().c_str(), meta.fd.GetNumber());
+
+    // Get the latest mutable cf options while the mutex is still locked
+    const MutableCFOptions mutable_cf_options =
+        *cfd->GetLatestMutableCFOptions();
+    bool paranoid_file_checks =
+        cfd->GetLatestMutableCFOptions()->paranoid_file_checks;
+
+    int64_t _current_time = 0;
+    immutable_db_options_.clock->GetCurrentTime(&_current_time)
+        .PermitUncheckedError();  // ignore error
+    const uint64_t current_time = static_cast<uint64_t>(_current_time);
+    meta.oldest_ancester_time = current_time;
+    meta.epoch_number = cfd->NewEpochNumber();
+    {
+      auto write_hint = cfd->CalculateSSTWriteHint(0);
+      mutex_.Unlock();
+
+      SequenceNumber earliest_write_conflict_snapshot;
+      std::vector<SequenceNumber> snapshot_seqs =
+          snapshots_.GetAll(&earliest_write_conflict_snapshot);
+      auto snapshot_checker = snapshot_checker_.get();
+      if (use_custom_gc_ && snapshot_checker == nullptr) {
+        snapshot_checker = DisableGCSnapshotChecker::Instance();
+      }
+      std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
+          range_del_iters;
+      auto range_del_iter =
+          // This is called during recovery, where a live memtable is flushed
+          // directly. In this case, no fragmented tombstone list is cached in
+          // this memtable yet.
+          mem->NewRangeTombstoneIterator(ro, kMaxSequenceNumber,
+                                         false /* immutable_memtable */);
+      if (range_del_iter != nullptr) {
+        range_del_iters.emplace_back(range_del_iter);
+      }
+
+      IOStatus io_s;
+      TableBuilderOptions tboptions(
+          *cfd->ioptions(), mutable_cf_options, cfd->internal_comparator(),
+          cfd->int_tbl_prop_collector_factories(),
+          GetCompressionFlush(*cfd->ioptions(), mutable_cf_options),
+          mutable_cf_options.compression_opts, cfd->GetID(), cfd->GetName(),
+          0 /* level */, false /* is_bottommost */,
+          TableFileCreationReason::kRecovery, 0 /* oldest_key_time */,
+          0 /* file_creation_time */, db_id_, db_session_id_,
+          0 /* target_file_size */, meta.fd.GetNumber());
+      SeqnoToTimeMapping empty_seqno_time_mapping;
+      Version* version = cfd->current();
+      version->Ref();
+      const ReadOptions read_option(Env::IOActivity::kDBOpen);
+      s = BuildTable(
+          dbname_, versions_.get(), immutable_db_options_, tboptions,
+          file_options_for_compaction_, read_option, cfd->table_cache(),
+          iter.get(), std::move(range_del_iters), &meta, &blob_file_additions,
+          snapshot_seqs, earliest_write_conflict_snapshot, kMaxSequenceNumber,
+          snapshot_checker, paranoid_file_checks, cfd->internal_stats(), &io_s,
+          io_tracer_, BlobFileCreationReason::kRecovery,
+          empty_seqno_time_mapping, &event_logger_, job_id, Env::IO_HIGH,
+          nullptr /* table_properties */, write_hint,
+          nullptr /*full_history_ts_low*/, &blob_callback_, version);
+      version->Unref();
+      LogFlush(immutable_db_options_.info_log);
+      ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
+                      "[%s] [WriteLevel0TableForRecovery]"
+                      " Level-0 table #%" PRIu64 ": %" PRIu64 " bytes %s",
+                      cfd->GetName().c_str(), meta.fd.GetNumber(),
+                      meta.fd.GetFileSize(), s.ToString().c_str());
+      mutex_.Lock();
+
+      // TODO(AR) is this ok?
+      if (!io_s.ok() && s.ok()) {
+        s = io_s;
+      }
+    }
+  }
+  ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
+
+  // Note that if file_size is zero, the file has been deleted and
+  // should not be added to the manifest.
+  const bool has_output = meta.fd.GetFileSize() > 0;
+
+  constexpr int level = 0;
+
+  if (s.ok() && has_output) {
+    edit->AddFile(
+        level, meta.fd.GetNumber(), meta.fd.GetPathId(), meta.fd.GetFileSize(),
+        meta.smallest, meta.largest, meta.fd.smallest_seqno,
+        meta.fd.largest_seqno, meta.marked_for_compaction, meta.temperature,
+        meta.oldest_blob_file_number, meta.oldest_ancester_time,
+        meta.file_creation_time, meta.epoch_number, meta.file_checksum,
+        meta.file_checksum_func_name, meta.unique_id,
+        meta.compensated_range_deletion_size, meta.tail_size);
+
+    for (const auto& blob : blob_file_additions) {
+      edit->AddBlobFile(blob);
+    }
+  }
+
+  InternalStats::CompactionStats stats(CompactionReason::kFlush, 1);
+  stats.micros = immutable_db_options_.clock->NowMicros() - start_micros;
+
+  if (has_output) {
+    stats.bytes_written = meta.fd.GetFileSize();
+    stats.num_output_files = 1;
+  }
+
+  const auto& blobs = edit->GetBlobFileAdditions();
+  for (const auto& blob : blobs) {
+    stats.bytes_written_blob += blob.GetTotalBlobBytes();
+  }
+
+  stats.num_output_files_blob = static_cast<int>(blobs.size());
+
+  cfd->internal_stats()->AddCompactionStats(level, Env::Priority::USER, stats);
+  cfd->internal_stats()->AddCFStats(
+      InternalStats::BYTES_FLUSHED,
+      stats.bytes_written + stats.bytes_written_blob);
+  RecordTick(stats_, COMPACT_WRITE_BYTES, meta.fd.GetFileSize());
+  return s;
+}
+
+Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) {
+  DBOptions db_options(options);
+  ColumnFamilyOptions cf_options(options);
+  std::vector<ColumnFamilyDescriptor> column_families;
+  column_families.push_back(
+      ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options));
+  if (db_options.persist_stats_to_disk) {
+    column_families.push_back(
+        ColumnFamilyDescriptor(kPersistentStatsColumnFamilyName, cf_options));
+  }
+  std::vector<ColumnFamilyHandle*> handles;
+  Status s = DB::Open(db_options, dbname, column_families, &handles, dbptr);
+  if (s.ok()) {
+    if (db_options.persist_stats_to_disk) {
+      assert(handles.size() == 2);
+    } else {
+      assert(handles.size() == 1);
+    }
+    // i can delete the handle since DBImpl is always holding a reference to
+    // default column family
+    if (db_options.persist_stats_to_disk && handles[1] != nullptr) {
+      delete handles[1];
+    }
+    delete handles[0];
+  }
+  return s;
+}
+
+Status DB::Open(const DBOptions& db_options, const std::string& dbname,
+                const std::vector<ColumnFamilyDescriptor>& column_families,
+                std::vector<ColumnFamilyHandle*>* handles, DB** dbptr) {
+  const bool kSeqPerBatch = true;
+  const bool kBatchPerTxn = true;
+  ThreadStatusUtil::SetEnableTracking(db_options.enable_thread_tracking);
+  ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType::OP_DBOPEN);
+  Status s = DBImpl::Open(db_options, dbname, column_families, handles, dbptr,
+                          !kSeqPerBatch, kBatchPerTxn);
+  ThreadStatusUtil::ResetThreadStatus();
+  return s;
+}
+
+// TODO: Implement the trimming in flush code path.
+// TODO: Perform trimming before inserting into memtable during recovery.
+// TODO: Pick files with max_timestamp > trim_ts by each file's timestamp meta
+// info, and handle only these files to reduce io.
+Status DB::OpenAndTrimHistory(
+    const DBOptions& db_options, const std::string& dbname,
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
+    std::string trim_ts) {
+  assert(dbptr != nullptr);
+  assert(handles != nullptr);
+  auto validate_options = [&db_options] {
+    if (db_options.avoid_flush_during_recovery) {
+      return Status::InvalidArgument(
+          "avoid_flush_during_recovery incompatible with "
+          "OpenAndTrimHistory");
+    }
+    return Status::OK();
+  };
+  auto s = validate_options();
+  if (!s.ok()) {
+    return s;
+  }
+
+  DB* db = nullptr;
+  s = DB::Open(db_options, dbname, column_families, handles, &db);
+  if (!s.ok()) {
+    return s;
+  }
+  assert(db);
+  CompactRangeOptions options;
+  options.bottommost_level_compaction =
+      BottommostLevelCompaction::kForceOptimized;
+  auto db_impl = static_cast_with_check<DBImpl>(db);
+  for (auto handle : *handles) {
+    assert(handle != nullptr);
+    auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(handle);
+    auto cfd = cfh->cfd();
+    assert(cfd != nullptr);
+    // Only compact column families with timestamp enabled
+    if (cfd->user_comparator() != nullptr &&
+        cfd->user_comparator()->timestamp_size() > 0) {
+      s = db_impl->CompactRangeInternal(options, handle, nullptr, nullptr,
+                                        trim_ts);
+      if (!s.ok()) {
+        break;
+      }
+    }
+  }
+  auto clean_op = [&handles, &db] {
+    for (auto handle : *handles) {
+      auto temp_s = db->DestroyColumnFamilyHandle(handle);
+      assert(temp_s.ok());
+    }
+    handles->clear();
+    delete db;
+  };
+  if (!s.ok()) {
+    clean_op();
+    return s;
+  }
+
+  *dbptr = db;
+  return s;
+}
+
+IOStatus DBImpl::CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number,
+                           size_t preallocate_block_size,
+                           log::Writer** new_log) {
+  IOStatus io_s;
+  std::unique_ptr<FSWritableFile> lfile;
+
+  DBOptions db_options =
+      BuildDBOptions(immutable_db_options_, mutable_db_options_);
+  FileOptions opt_file_options =
+      fs_->OptimizeForLogWrite(file_options_, db_options);
+  std::string wal_dir = immutable_db_options_.GetWalDir();
+  std::string log_fname = LogFileName(wal_dir, log_file_num);
+
+  if (recycle_log_number) {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "reusing log %" PRIu64 " from recycle list\n",
+                   recycle_log_number);
+    std::string old_log_fname = LogFileName(wal_dir, recycle_log_number);
+    TEST_SYNC_POINT("DBImpl::CreateWAL:BeforeReuseWritableFile1");
+    TEST_SYNC_POINT("DBImpl::CreateWAL:BeforeReuseWritableFile2");
+    io_s = fs_->ReuseWritableFile(log_fname, old_log_fname, opt_file_options,
+                                  &lfile, /*dbg=*/nullptr);
+  } else {
+    io_s = NewWritableFile(fs_.get(), log_fname, &lfile, opt_file_options);
+  }
+
+  if (io_s.ok()) {
+    lfile->SetWriteLifeTimeHint(CalculateWALWriteHint());
+    lfile->SetPreallocationBlockSize(preallocate_block_size);
+
+    const auto& listeners = immutable_db_options_.listeners;
+    FileTypeSet tmp_set = immutable_db_options_.checksum_handoff_file_types;
+    std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
+        std::move(lfile), log_fname, opt_file_options,
+        immutable_db_options_.clock, io_tracer_, nullptr /* stats */, listeners,
+        nullptr, tmp_set.Contains(FileType::kWalFile),
+        tmp_set.Contains(FileType::kWalFile)));
+    *new_log = new log::Writer(std::move(file_writer), log_file_num,
+                               immutable_db_options_.recycle_log_file_num > 0,
+                               immutable_db_options_.manual_wal_flush,
+                               immutable_db_options_.wal_compression);
+    io_s = (*new_log)->AddCompressionTypeRecord();
+  }
+  return io_s;
+}
+
+Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
+                    const std::vector<ColumnFamilyDescriptor>& column_families,
+                    std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
+                    const bool seq_per_batch, const bool batch_per_txn) {
+  Status s = ValidateOptionsByTable(db_options, column_families);
+  if (!s.ok()) {
+    return s;
+  }
+
+  s = ValidateOptions(db_options, column_families);
+  if (!s.ok()) {
+    return s;
+  }
+
+  *dbptr = nullptr;
+  assert(handles);
+  handles->clear();
+
+  size_t max_write_buffer_size = 0;
+  for (auto cf : column_families) {
+    max_write_buffer_size =
+        std::max(max_write_buffer_size, cf.options.write_buffer_size);
+  }
+
+  DBImpl* impl = new DBImpl(db_options, dbname, seq_per_batch, batch_per_txn);
+  if (!impl->immutable_db_options_.info_log) {
+    s = impl->init_logger_creation_s_;
+    delete impl;
+    return s;
+  } else {
+    assert(impl->init_logger_creation_s_.ok());
+  }
+  s = impl->env_->CreateDirIfMissing(impl->immutable_db_options_.GetWalDir());
+  if (s.ok()) {
+    std::vector<std::string> paths;
+    for (auto& db_path : impl->immutable_db_options_.db_paths) {
+      paths.emplace_back(db_path.path);
+    }
+    for (auto& cf : column_families) {
+      for (auto& cf_path : cf.options.cf_paths) {
+        paths.emplace_back(cf_path.path);
+      }
+    }
+    for (auto& path : paths) {
+      s = impl->env_->CreateDirIfMissing(path);
+      if (!s.ok()) {
+        break;
+      }
+    }
+
+    // For recovery from NoSpace() error, we can only handle
+    // the case where the database is stored in a single path
+    if (paths.size() <= 1) {
+      impl->error_handler_.EnableAutoRecovery();
+    }
+  }
+  if (s.ok()) {
+    s = impl->CreateArchivalDirectory();
+  }
+  if (!s.ok()) {
+    delete impl;
+    return s;
+  }
+
+  impl->wal_in_db_path_ = impl->immutable_db_options_.IsWalDirSameAsDBPath();
+  RecoveryContext recovery_ctx;
+  impl->mutex_.Lock();
+
+  // Handles create_if_missing, error_if_exists
+  uint64_t recovered_seq(kMaxSequenceNumber);
+  s = impl->Recover(column_families, false /* read_only */,
+                    false /* error_if_wal_file_exists */,
+                    false /* error_if_data_exists_in_wals */, &recovered_seq,
+                    &recovery_ctx);
+  if (s.ok()) {
+    uint64_t new_log_number = impl->versions_->NewFileNumber();
+    log::Writer* new_log = nullptr;
+    const size_t preallocate_block_size =
+        impl->GetWalPreallocateBlockSize(max_write_buffer_size);
+    s = impl->CreateWAL(new_log_number, 0 /*recycle_log_number*/,
+                        preallocate_block_size, &new_log);
+    if (s.ok()) {
+      InstrumentedMutexLock wl(&impl->log_write_mutex_);
+      impl->logfile_number_ = new_log_number;
+      assert(new_log != nullptr);
+      assert(impl->logs_.empty());
+      impl->logs_.emplace_back(new_log_number, new_log);
+    }
+
+    if (s.ok()) {
+      impl->alive_log_files_.push_back(
+          DBImpl::LogFileNumberSize(impl->logfile_number_));
+      // In WritePrepared there could be gap in sequence numbers. This breaks
+      // the trick we use in kPointInTimeRecovery which assumes the first seq in
+      // the log right after the corrupted log is one larger than the last seq
+      // we read from the wals. To let this trick keep working, we add a dummy
+      // entry with the expected sequence to the first log right after recovery.
+      // In non-WritePrepared case also the new log after recovery could be
+      // empty, and thus missing the consecutive seq hint to distinguish
+      // middle-log corruption to corrupted-log-remained-after-recovery. This
+      // case also will be addressed by a dummy write.
+      if (recovered_seq != kMaxSequenceNumber) {
+        WriteBatch empty_batch;
+        WriteBatchInternal::SetSequence(&empty_batch, recovered_seq);
+        WriteOptions write_options;
+        uint64_t log_used, log_size;
+        log::Writer* log_writer = impl->logs_.back().writer;
+        LogFileNumberSize& log_file_number_size = impl->alive_log_files_.back();
+
+        assert(log_writer->get_log_number() == log_file_number_size.number);
+        impl->mutex_.AssertHeld();
+        s = impl->WriteToWAL(empty_batch, log_writer, &log_used, &log_size,
+                             Env::IO_TOTAL, log_file_number_size);
+        if (s.ok()) {
+          // Need to fsync, otherwise it might get lost after a power reset.
+          s = impl->FlushWAL(false);
+          TEST_SYNC_POINT_CALLBACK("DBImpl::Open::BeforeSyncWAL", /*arg=*/&s);
+          if (s.ok()) {
+            s = log_writer->file()->Sync(impl->immutable_db_options_.use_fsync);
+          }
+        }
+      }
+    }
+  }
+  if (s.ok()) {
+    s = impl->LogAndApplyForRecovery(recovery_ctx);
+  }
+
+  if (s.ok() && impl->immutable_db_options_.persist_stats_to_disk) {
+    impl->mutex_.AssertHeld();
+    s = impl->InitPersistStatsColumnFamily();
+  }
+
+  if (s.ok()) {
+    // set column family handles
+    for (auto cf : column_families) {
+      auto cfd =
+          impl->versions_->GetColumnFamilySet()->GetColumnFamily(cf.name);
+      if (cfd != nullptr) {
+        handles->push_back(
+            new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_));
+        impl->NewThreadStatusCfInfo(cfd);
+      } else {
+        if (db_options.create_missing_column_families) {
+          // missing column family, create it
+          ColumnFamilyHandle* handle = nullptr;
+          impl->mutex_.Unlock();
+          s = impl->CreateColumnFamily(cf.options, cf.name, &handle);
+          impl->mutex_.Lock();
+          if (s.ok()) {
+            handles->push_back(handle);
+          } else {
+            break;
+          }
+        } else {
+          s = Status::InvalidArgument("Column family not found", cf.name);
+          break;
+        }
+      }
+    }
+  }
+
+  if (s.ok()) {
+    SuperVersionContext sv_context(/* create_superversion */ true);
+    for (auto cfd : *impl->versions_->GetColumnFamilySet()) {
+      impl->InstallSuperVersionAndScheduleWork(
+          cfd, &sv_context, *cfd->GetLatestMutableCFOptions());
+    }
+    sv_context.Clean();
+  }
+
+  if (s.ok() && impl->immutable_db_options_.persist_stats_to_disk) {
+    // try to read format version
+    s = impl->PersistentStatsProcessFormatVersion();
+  }
+
+  if (s.ok()) {
+    for (auto cfd : *impl->versions_->GetColumnFamilySet()) {
+      if (!cfd->mem()->IsSnapshotSupported()) {
+        impl->is_snapshot_supported_ = false;
+      }
+      if (cfd->ioptions()->merge_operator != nullptr &&
+          !cfd->mem()->IsMergeOperatorSupported()) {
+        s = Status::InvalidArgument(
+            "The memtable of column family %s does not support merge operator "
+            "its options.merge_operator is non-null",
+            cfd->GetName().c_str());
+      }
+      if (!s.ok()) {
+        break;
+      }
+    }
+  }
+  TEST_SYNC_POINT("DBImpl::Open:Opened");
+  Status persist_options_status;
+  if (s.ok()) {
+    // Persist RocksDB Options before scheduling the compaction.
+    // The WriteOptionsFile() will release and lock the mutex internally.
+    persist_options_status = impl->WriteOptionsFile(
+        false /*need_mutex_lock*/, false /*need_enter_write_thread*/);
+
+    *dbptr = impl;
+    impl->opened_successfully_ = true;
+    impl->DeleteObsoleteFiles();
+    TEST_SYNC_POINT("DBImpl::Open:AfterDeleteFiles");
+    impl->MaybeScheduleFlushOrCompaction();
+  } else {
+    persist_options_status.PermitUncheckedError();
+  }
+  impl->mutex_.Unlock();
+
+  auto sfm = static_cast<SstFileManagerImpl*>(
+      impl->immutable_db_options_.sst_file_manager.get());
+  if (s.ok() && sfm) {
+    // Set Statistics ptr for SstFileManager to dump the stats of
+    // DeleteScheduler.
+    sfm->SetStatisticsPtr(impl->immutable_db_options_.statistics);
+    ROCKS_LOG_INFO(impl->immutable_db_options_.info_log,
+                   "SstFileManager instance %p", sfm);
+
+    // Notify SstFileManager about all sst files that already exist in
+    // db_paths[0] and cf_paths[0] when the DB is opened.
+
+    // SstFileManagerImpl needs to know sizes of the files. For files whose size
+    // we already know (sst files that appear in manifest - typically that's the
+    // vast majority of all files), we'll pass the size to SstFileManager.
+    // For all other files SstFileManager will query the size from filesystem.
+
+    std::vector<ColumnFamilyMetaData> metadata;
+    impl->GetAllColumnFamilyMetaData(&metadata);
+
+    std::unordered_map<std::string, uint64_t> known_file_sizes;
+    for (const auto& md : metadata) {
+      for (const auto& lmd : md.levels) {
+        for (const auto& fmd : lmd.files) {
+          known_file_sizes[fmd.relative_filename] = fmd.size;
+        }
+      }
+      for (const auto& bmd : md.blob_files) {
+        std::string name = bmd.blob_file_name;
+        // The BlobMetaData.blob_file_name may start with "/".
+        if (!name.empty() && name[0] == '/') {
+          name = name.substr(1);
+        }
+        known_file_sizes[name] = bmd.blob_file_size;
+      }
+    }
+
+    std::vector<std::string> paths;
+    paths.emplace_back(impl->immutable_db_options_.db_paths[0].path);
+    for (auto& cf : column_families) {
+      if (!cf.options.cf_paths.empty()) {
+        paths.emplace_back(cf.options.cf_paths[0].path);
+      }
+    }
+    // Remove duplicate paths.
+    std::sort(paths.begin(), paths.end());
+    paths.erase(std::unique(paths.begin(), paths.end()), paths.end());
+    IOOptions io_opts;
+    io_opts.do_not_recurse = true;
+    for (auto& path : paths) {
+      std::vector<std::string> existing_files;
+      impl->immutable_db_options_.fs
+          ->GetChildren(path, io_opts, &existing_files,
+                        /*IODebugContext*=*/nullptr)
+          .PermitUncheckedError();  //**TODO: What do to on error?
+      for (auto& file_name : existing_files) {
+        uint64_t file_number;
+        FileType file_type;
+        std::string file_path = path + "/" + file_name;
+        if (ParseFileName(file_name, &file_number, &file_type) &&
+            (file_type == kTableFile || file_type == kBlobFile)) {
+          // TODO: Check for errors from OnAddFile?
+          if (known_file_sizes.count(file_name)) {
+            // We're assuming that each sst file name exists in at most one of
+            // the paths.
+            sfm->OnAddFile(file_path, known_file_sizes.at(file_name))
+                .PermitUncheckedError();
+          } else {
+            sfm->OnAddFile(file_path).PermitUncheckedError();
+          }
+        }
+      }
+    }
+
+    // Reserve some disk buffer space. This is a heuristic - when we run out
+    // of disk space, this ensures that there is atleast write_buffer_size
+    // amount of free space before we resume DB writes. In low disk space
+    // conditions, we want to avoid a lot of small L0 files due to frequent
+    // WAL write failures and resultant forced flushes
+    sfm->ReserveDiskBuffer(max_write_buffer_size,
+                           impl->immutable_db_options_.db_paths[0].path);
+  }
+
+
+  if (s.ok()) {
+    ROCKS_LOG_HEADER(impl->immutable_db_options_.info_log, "DB pointer %p",
+                     impl);
+    LogFlush(impl->immutable_db_options_.info_log);
+    if (!impl->WALBufferIsEmpty()) {
+      s = impl->FlushWAL(false);
+      if (s.ok()) {
+        // Sync is needed otherwise WAL buffered data might get lost after a
+        // power reset.
+        log::Writer* log_writer = impl->logs_.back().writer;
+        s = log_writer->file()->Sync(impl->immutable_db_options_.use_fsync);
+      }
+    }
+    if (s.ok() && !persist_options_status.ok()) {
+      s = Status::IOError(
+          "DB::Open() failed --- Unable to persist Options file",
+          persist_options_status.ToString());
+    }
+  }
+  if (!s.ok()) {
+    ROCKS_LOG_WARN(impl->immutable_db_options_.info_log,
+                   "DB::Open() failed: %s", s.ToString().c_str());
+  }
+  if (s.ok()) {
+    s = impl->StartPeriodicTaskScheduler();
+  }
+
+  if (s.ok()) {
+    s = impl->RegisterRecordSeqnoTimeWorker();
+  }
+  if (!s.ok()) {
+    for (auto* h : *handles) {
+      delete h;
+    }
+    handles->clear();
+    delete impl;
+    *dbptr = nullptr;
+  }
+  return s;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_readonly.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_readonly.cc
new file mode 100644
index 0000000000..871cf80852
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_readonly.cc
@@ -0,0 +1,334 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/db_impl/db_impl_readonly.h"
+
+#include "db/arena_wrapped_db_iter.h"
+#include "db/db_impl/compacted_db_impl.h"
+#include "db/db_impl/db_impl.h"
+#include "db/db_iter.h"
+#include "db/merge_context.h"
+#include "logging/logging.h"
+#include "monitoring/perf_context_imp.h"
+#include "util/cast_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+
+DBImplReadOnly::DBImplReadOnly(const DBOptions& db_options,
+                               const std::string& dbname)
+    : DBImpl(db_options, dbname, /*seq_per_batch*/ false,
+             /*batch_per_txn*/ true, /*read_only*/ true) {
+  ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                 "Opening the db in read only mode");
+  LogFlush(immutable_db_options_.info_log);
+}
+
+DBImplReadOnly::~DBImplReadOnly() {}
+
+// Implementations of the DB interface
+Status DBImplReadOnly::Get(const ReadOptions& read_options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           PinnableSlice* pinnable_val) {
+  return Get(read_options, column_family, key, pinnable_val,
+             /*timestamp*/ nullptr);
+}
+
+Status DBImplReadOnly::Get(const ReadOptions& read_options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           PinnableSlice* pinnable_val,
+                           std::string* timestamp) {
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call Get with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+  assert(pinnable_val != nullptr);
+  // TODO: stopwatch DB_GET needed?, perf timer needed?
+  PERF_TIMER_GUARD(get_snapshot_time);
+
+  assert(column_family);
+  if (read_options.timestamp) {
+    const Status s = FailIfTsMismatchCf(
+        column_family, *(read_options.timestamp), /*ts_for_read=*/true);
+    if (!s.ok()) {
+      return s;
+    }
+  } else {
+    const Status s = FailIfCfHasTs(column_family);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  // Clear the timestamps for returning results so that we can distinguish
+  // between tombstone or key that has never been written
+  if (timestamp) {
+    timestamp->clear();
+  }
+
+  const Comparator* ucmp = column_family->GetComparator();
+  assert(ucmp);
+  std::string* ts = ucmp->timestamp_size() > 0 ? timestamp : nullptr;
+
+  Status s;
+  SequenceNumber snapshot = versions_->LastSequence();
+  GetWithTimestampReadCallback read_cb(snapshot);
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  auto cfd = cfh->cfd();
+  if (tracer_) {
+    InstrumentedMutexLock lock(&trace_mutex_);
+    if (tracer_) {
+      tracer_->Get(column_family, key);
+    }
+  }
+  SuperVersion* super_version = cfd->GetSuperVersion();
+  MergeContext merge_context;
+  SequenceNumber max_covering_tombstone_seq = 0;
+  LookupKey lkey(key, snapshot, read_options.timestamp);
+  PERF_TIMER_STOP(get_snapshot_time);
+  if (super_version->mem->Get(lkey, pinnable_val->GetSelf(),
+                              /*columns=*/nullptr, ts, &s, &merge_context,
+                              &max_covering_tombstone_seq, read_options,
+                              false /* immutable_memtable */, &read_cb)) {
+    pinnable_val->PinSelf();
+    RecordTick(stats_, MEMTABLE_HIT);
+  } else {
+    PERF_TIMER_GUARD(get_from_output_files_time);
+    PinnedIteratorsManager pinned_iters_mgr;
+    super_version->current->Get(
+        read_options, lkey, pinnable_val, /*columns=*/nullptr, ts, &s,
+        &merge_context, &max_covering_tombstone_seq, &pinned_iters_mgr,
+        /*value_found*/ nullptr,
+        /*key_exists*/ nullptr, /*seq*/ nullptr, &read_cb,
+        /*is_blob*/ nullptr,
+        /*do_merge*/ true);
+    RecordTick(stats_, MEMTABLE_MISS);
+  }
+  RecordTick(stats_, NUMBER_KEYS_READ);
+  size_t size = pinnable_val->size();
+  RecordTick(stats_, BYTES_READ, size);
+  RecordInHistogram(stats_, BYTES_PER_READ, size);
+  PERF_COUNTER_ADD(get_read_bytes, size);
+  return s;
+}
+
+Iterator* DBImplReadOnly::NewIterator(const ReadOptions& read_options,
+                                      ColumnFamilyHandle* column_family) {
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return NewErrorIterator(Status::InvalidArgument(
+        "Cannot call NewIterator with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`"));
+  }
+  assert(column_family);
+  if (read_options.timestamp) {
+    const Status s = FailIfTsMismatchCf(
+        column_family, *(read_options.timestamp), /*ts_for_read=*/true);
+    if (!s.ok()) {
+      return NewErrorIterator(s);
+    }
+  } else {
+    const Status s = FailIfCfHasTs(column_family);
+    if (!s.ok()) {
+      return NewErrorIterator(s);
+    }
+  }
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  auto cfd = cfh->cfd();
+  SuperVersion* super_version = cfd->GetSuperVersion()->Ref();
+  SequenceNumber latest_snapshot = versions_->LastSequence();
+  SequenceNumber read_seq =
+      read_options.snapshot != nullptr
+          ? reinterpret_cast<const SnapshotImpl*>(read_options.snapshot)
+                ->number_
+          : latest_snapshot;
+  ReadCallback* read_callback = nullptr;  // No read callback provided.
+  auto db_iter = NewArenaWrappedDbIterator(
+      env_, read_options, *cfd->ioptions(), super_version->mutable_cf_options,
+      super_version->current, read_seq,
+      super_version->mutable_cf_options.max_sequential_skip_in_iterations,
+      super_version->version_number, read_callback);
+  auto internal_iter = NewInternalIterator(
+      db_iter->GetReadOptions(), cfd, super_version, db_iter->GetArena(),
+      read_seq, /* allow_unprepared_value */ true, db_iter);
+  db_iter->SetIterUnderDBIter(internal_iter);
+  return db_iter;
+}
+
+Status DBImplReadOnly::NewIterators(
+    const ReadOptions& read_options,
+    const std::vector<ColumnFamilyHandle*>& column_families,
+    std::vector<Iterator*>* iterators) {
+  if (read_options.timestamp) {
+    for (auto* cf : column_families) {
+      assert(cf);
+      const Status s = FailIfTsMismatchCf(cf, *(read_options.timestamp),
+                                          /*ts_for_read=*/true);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  } else {
+    for (auto* cf : column_families) {
+      assert(cf);
+      const Status s = FailIfCfHasTs(cf);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  }
+
+  ReadCallback* read_callback = nullptr;  // No read callback provided.
+  if (iterators == nullptr) {
+    return Status::InvalidArgument("iterators not allowed to be nullptr");
+  }
+  iterators->clear();
+  iterators->reserve(column_families.size());
+  SequenceNumber latest_snapshot = versions_->LastSequence();
+  SequenceNumber read_seq =
+      read_options.snapshot != nullptr
+          ? reinterpret_cast<const SnapshotImpl*>(read_options.snapshot)
+                ->number_
+          : latest_snapshot;
+
+  for (auto cfh : column_families) {
+    auto* cfd = static_cast_with_check<ColumnFamilyHandleImpl>(cfh)->cfd();
+    auto* sv = cfd->GetSuperVersion()->Ref();
+    auto* db_iter = NewArenaWrappedDbIterator(
+        env_, read_options, *cfd->ioptions(), sv->mutable_cf_options,
+        sv->current, read_seq,
+        sv->mutable_cf_options.max_sequential_skip_in_iterations,
+        sv->version_number, read_callback);
+    auto* internal_iter = NewInternalIterator(
+        db_iter->GetReadOptions(), cfd, sv, db_iter->GetArena(), read_seq,
+        /* allow_unprepared_value */ true, db_iter);
+    db_iter->SetIterUnderDBIter(internal_iter);
+    iterators->push_back(db_iter);
+  }
+
+  return Status::OK();
+}
+
+namespace {
+// Return OK if dbname exists in the file system or create it if
+// create_if_missing
+Status OpenForReadOnlyCheckExistence(const DBOptions& db_options,
+                                     const std::string& dbname) {
+  Status s;
+  if (!db_options.create_if_missing) {
+    // Attempt to read "CURRENT" file
+    const std::shared_ptr<FileSystem>& fs = db_options.env->GetFileSystem();
+    std::string manifest_path;
+    uint64_t manifest_file_number;
+    s = VersionSet::GetCurrentManifestPath(dbname, fs.get(), &manifest_path,
+                                           &manifest_file_number);
+  } else {
+    // Historic behavior that doesn't necessarily make sense
+    s = db_options.env->CreateDirIfMissing(dbname);
+  }
+  return s;
+}
+}  // namespace
+
+Status DB::OpenForReadOnly(const Options& options, const std::string& dbname,
+                           DB** dbptr, bool /*error_if_wal_file_exists*/) {
+  Status s = OpenForReadOnlyCheckExistence(options, dbname);
+  if (!s.ok()) {
+    return s;
+  }
+
+  *dbptr = nullptr;
+
+  // Try to first open DB as fully compacted DB
+  s = CompactedDBImpl::Open(options, dbname, dbptr);
+  if (s.ok()) {
+    return s;
+  }
+
+  DBOptions db_options(options);
+  ColumnFamilyOptions cf_options(options);
+  std::vector<ColumnFamilyDescriptor> column_families;
+  column_families.push_back(
+      ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options));
+  std::vector<ColumnFamilyHandle*> handles;
+
+  s = DBImplReadOnly::OpenForReadOnlyWithoutCheck(
+      db_options, dbname, column_families, &handles, dbptr);
+  if (s.ok()) {
+    assert(handles.size() == 1);
+    // i can delete the handle since DBImpl is always holding a
+    // reference to default column family
+    delete handles[0];
+  }
+  return s;
+}
+
+Status DB::OpenForReadOnly(
+    const DBOptions& db_options, const std::string& dbname,
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
+    bool error_if_wal_file_exists) {
+  // If dbname does not exist in the file system, should not do anything
+  Status s = OpenForReadOnlyCheckExistence(db_options, dbname);
+  if (!s.ok()) {
+    return s;
+  }
+
+  return DBImplReadOnly::OpenForReadOnlyWithoutCheck(
+      db_options, dbname, column_families, handles, dbptr,
+      error_if_wal_file_exists);
+}
+
+Status DBImplReadOnly::OpenForReadOnlyWithoutCheck(
+    const DBOptions& db_options, const std::string& dbname,
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
+    bool error_if_wal_file_exists) {
+  *dbptr = nullptr;
+  handles->clear();
+
+  SuperVersionContext sv_context(/* create_superversion */ true);
+  DBImplReadOnly* impl = new DBImplReadOnly(db_options, dbname);
+  impl->mutex_.Lock();
+  Status s = impl->Recover(column_families, true /* read only */,
+                           error_if_wal_file_exists);
+  if (s.ok()) {
+    // set column family handles
+    for (auto cf : column_families) {
+      auto cfd =
+          impl->versions_->GetColumnFamilySet()->GetColumnFamily(cf.name);
+      if (cfd == nullptr) {
+        s = Status::InvalidArgument("Column family not found", cf.name);
+        break;
+      }
+      handles->push_back(new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_));
+    }
+  }
+  if (s.ok()) {
+    for (auto cfd : *impl->versions_->GetColumnFamilySet()) {
+      sv_context.NewSuperVersion();
+      cfd->InstallSuperVersion(&sv_context, &impl->mutex_);
+    }
+  }
+  impl->mutex_.Unlock();
+  sv_context.Clean();
+  if (s.ok()) {
+    *dbptr = impl;
+    for (auto* h : *handles) {
+      impl->NewThreadStatusCfInfo(
+          static_cast_with_check<ColumnFamilyHandleImpl>(h)->cfd());
+    }
+  } else {
+    for (auto h : *handles) {
+      delete h;
+    }
+    handles->clear();
+    delete impl;
+  }
+  return s;
+}
+
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_readonly.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_readonly.h
new file mode 100644
index 0000000000..a694acc003
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_readonly.h
@@ -0,0 +1,173 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <string>
+#include <vector>
+
+#include "db/db_impl/db_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// TODO: Share common structure with CompactedDBImpl and DBImplSecondary
+class DBImplReadOnly : public DBImpl {
+ public:
+  DBImplReadOnly(const DBOptions& options, const std::string& dbname);
+  // No copying allowed
+  DBImplReadOnly(const DBImplReadOnly&) = delete;
+  void operator=(const DBImplReadOnly&) = delete;
+
+  virtual ~DBImplReadOnly();
+
+  // Implementations of the DB interface
+  using DB::Get;
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     PinnableSlice* value) override;
+  Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
+             const Slice& key, PinnableSlice* value,
+             std::string* timestamp) override;
+
+  // TODO: Implement ReadOnly MultiGet?
+
+  using DBImpl::NewIterator;
+  virtual Iterator* NewIterator(const ReadOptions&,
+                                ColumnFamilyHandle* column_family) override;
+
+  virtual Status NewIterators(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_families,
+      std::vector<Iterator*>* iterators) override;
+
+  using DBImpl::Put;
+  virtual Status Put(const WriteOptions& /*options*/,
+                     ColumnFamilyHandle* /*column_family*/,
+                     const Slice& /*key*/, const Slice& /*value*/) override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+
+  using DBImpl::PutEntity;
+  Status PutEntity(const WriteOptions& /* options */,
+                   ColumnFamilyHandle* /* column_family */,
+                   const Slice& /* key */,
+                   const WideColumns& /* columns */) override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+
+  using DBImpl::Merge;
+  virtual Status Merge(const WriteOptions& /*options*/,
+                       ColumnFamilyHandle* /*column_family*/,
+                       const Slice& /*key*/, const Slice& /*value*/) override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+  using DBImpl::Delete;
+  virtual Status Delete(const WriteOptions& /*options*/,
+                        ColumnFamilyHandle* /*column_family*/,
+                        const Slice& /*key*/) override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+  using DBImpl::SingleDelete;
+  virtual Status SingleDelete(const WriteOptions& /*options*/,
+                              ColumnFamilyHandle* /*column_family*/,
+                              const Slice& /*key*/) override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+  virtual Status Write(const WriteOptions& /*options*/,
+                       WriteBatch* /*updates*/) override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+  using DBImpl::CompactRange;
+  virtual Status CompactRange(const CompactRangeOptions& /*options*/,
+                              ColumnFamilyHandle* /*column_family*/,
+                              const Slice* /*begin*/,
+                              const Slice* /*end*/) override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+
+  using DBImpl::CompactFiles;
+  virtual Status CompactFiles(
+      const CompactionOptions& /*compact_options*/,
+      ColumnFamilyHandle* /*column_family*/,
+      const std::vector<std::string>& /*input_file_names*/,
+      const int /*output_level*/, const int /*output_path_id*/ = -1,
+      std::vector<std::string>* const /*output_file_names*/ = nullptr,
+      CompactionJobInfo* /*compaction_job_info*/ = nullptr) override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+
+  virtual Status DisableFileDeletions() override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+
+  virtual Status EnableFileDeletions(bool /*force*/) override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+  virtual Status GetLiveFiles(std::vector<std::string>& ret,
+                              uint64_t* manifest_file_size,
+                              bool /*flush_memtable*/) override {
+    return DBImpl::GetLiveFiles(ret, manifest_file_size,
+                                false /* flush_memtable */);
+  }
+
+  using DBImpl::Flush;
+  virtual Status Flush(const FlushOptions& /*options*/,
+                       ColumnFamilyHandle* /*column_family*/) override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+
+  using DBImpl::SyncWAL;
+  virtual Status SyncWAL() override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+
+  using DB::IngestExternalFile;
+  virtual Status IngestExternalFile(
+      ColumnFamilyHandle* /*column_family*/,
+      const std::vector<std::string>& /*external_files*/,
+      const IngestExternalFileOptions& /*ingestion_options*/) override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+
+  using DB::CreateColumnFamilyWithImport;
+  virtual Status CreateColumnFamilyWithImport(
+      const ColumnFamilyOptions& /*options*/,
+      const std::string& /*column_family_name*/,
+      const ImportColumnFamilyOptions& /*import_options*/,
+      const ExportImportFilesMetaData& /*metadata*/,
+      ColumnFamilyHandle** /*handle*/) override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+
+  using DB::ClipColumnFamily;
+  virtual Status ClipColumnFamily(ColumnFamilyHandle* /*column_family*/,
+                                  const Slice& /*begin*/,
+                                  const Slice& /*end*/) override {
+    return Status::NotSupported("Not supported operation in read only mode.");
+  }
+
+  // FIXME: some missing overrides for more "write" functions
+
+ protected:
+  Status FlushForGetLiveFiles() override {
+    // No-op for read-only DB
+    return Status::OK();
+  }
+
+ private:
+  // A "helper" function for DB::OpenForReadOnly without column families
+  // to reduce unnecessary I/O
+  // It has the same functionality as DB::OpenForReadOnly with column families
+  // but does not check the existence of dbname in the file system
+  static Status OpenForReadOnlyWithoutCheck(
+      const DBOptions& db_options, const std::string& dbname,
+      const std::vector<ColumnFamilyDescriptor>& column_families,
+      std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
+      bool error_if_wal_file_exists = false);
+  friend class DB;
+};
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_secondary.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_secondary.cc
new file mode 100644
index 0000000000..c6fcefddc7
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_secondary.cc
@@ -0,0 +1,964 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/db_impl/db_impl_secondary.h"
+
+#include <cinttypes>
+
+#include "db/arena_wrapped_db_iter.h"
+#include "db/merge_context.h"
+#include "logging/auto_roll_logger.h"
+#include "logging/logging.h"
+#include "monitoring/perf_context_imp.h"
+#include "rocksdb/configurable.h"
+#include "util/cast_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+DBImplSecondary::DBImplSecondary(const DBOptions& db_options,
+                                 const std::string& dbname,
+                                 std::string secondary_path)
+    : DBImpl(db_options, dbname, false, true, true),
+      secondary_path_(std::move(secondary_path)) {
+  ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                 "Opening the db in secondary mode");
+  LogFlush(immutable_db_options_.info_log);
+}
+
+DBImplSecondary::~DBImplSecondary() {}
+
+Status DBImplSecondary::Recover(
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    bool /*readonly*/, bool /*error_if_wal_file_exists*/,
+    bool /*error_if_data_exists_in_wals*/, uint64_t*,
+    RecoveryContext* /*recovery_ctx*/) {
+  mutex_.AssertHeld();
+
+  JobContext job_context(0);
+  Status s;
+  s = static_cast<ReactiveVersionSet*>(versions_.get())
+          ->Recover(column_families, &manifest_reader_, &manifest_reporter_,
+                    &manifest_reader_status_);
+  if (!s.ok()) {
+    if (manifest_reader_status_) {
+      manifest_reader_status_->PermitUncheckedError();
+    }
+    return s;
+  }
+  if (immutable_db_options_.paranoid_checks && s.ok()) {
+    s = CheckConsistency();
+  }
+  // Initial max_total_in_memory_state_ before recovery logs.
+  max_total_in_memory_state_ = 0;
+  for (auto cfd : *versions_->GetColumnFamilySet()) {
+    auto* mutable_cf_options = cfd->GetLatestMutableCFOptions();
+    max_total_in_memory_state_ += mutable_cf_options->write_buffer_size *
+                                  mutable_cf_options->max_write_buffer_number;
+  }
+  if (s.ok()) {
+    default_cf_handle_ = new ColumnFamilyHandleImpl(
+        versions_->GetColumnFamilySet()->GetDefault(), this, &mutex_);
+    default_cf_internal_stats_ = default_cf_handle_->cfd()->internal_stats();
+
+    std::unordered_set<ColumnFamilyData*> cfds_changed;
+    s = FindAndRecoverLogFiles(&cfds_changed, &job_context);
+  }
+
+  if (s.IsPathNotFound()) {
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "Secondary tries to read WAL, but WAL file(s) have already "
+                   "been purged by primary.");
+    s = Status::OK();
+  }
+  // TODO: update options_file_number_ needed?
+
+  job_context.Clean();
+  return s;
+}
+
+// find new WAL and apply them in order to the secondary instance
+Status DBImplSecondary::FindAndRecoverLogFiles(
+    std::unordered_set<ColumnFamilyData*>* cfds_changed,
+    JobContext* job_context) {
+  assert(nullptr != cfds_changed);
+  assert(nullptr != job_context);
+  Status s;
+  std::vector<uint64_t> logs;
+  s = FindNewLogNumbers(&logs);
+  if (s.ok() && !logs.empty()) {
+    SequenceNumber next_sequence(kMaxSequenceNumber);
+    s = RecoverLogFiles(logs, &next_sequence, cfds_changed, job_context);
+  }
+  return s;
+}
+
+// List wal_dir and find all new WALs, return these log numbers
+Status DBImplSecondary::FindNewLogNumbers(std::vector<uint64_t>* logs) {
+  assert(logs != nullptr);
+  std::vector<std::string> filenames;
+  Status s;
+  IOOptions io_opts;
+  io_opts.do_not_recurse = true;
+  s = immutable_db_options_.fs->GetChildren(immutable_db_options_.GetWalDir(),
+                                            io_opts, &filenames,
+                                            /*IODebugContext*=*/nullptr);
+  if (s.IsNotFound()) {
+    return Status::InvalidArgument("Failed to open wal_dir",
+                                   immutable_db_options_.GetWalDir());
+  } else if (!s.ok()) {
+    return s;
+  }
+
+  // if log_readers_ is non-empty, it means we have applied all logs with log
+  // numbers smaller than the smallest log in log_readers_, so there is no
+  // need to pass these logs to RecoverLogFiles
+  uint64_t log_number_min = 0;
+  if (!log_readers_.empty()) {
+    log_number_min = log_readers_.begin()->first;
+  }
+  for (size_t i = 0; i < filenames.size(); i++) {
+    uint64_t number;
+    FileType type;
+    if (ParseFileName(filenames[i], &number, &type) && type == kWalFile &&
+        number >= log_number_min) {
+      logs->push_back(number);
+    }
+  }
+  // Recover logs in the order that they were generated
+  if (!logs->empty()) {
+    std::sort(logs->begin(), logs->end());
+  }
+  return s;
+}
+
+Status DBImplSecondary::MaybeInitLogReader(
+    uint64_t log_number, log::FragmentBufferedReader** log_reader) {
+  auto iter = log_readers_.find(log_number);
+  // make sure the log file is still present
+  if (iter == log_readers_.end() ||
+      iter->second->reader_->GetLogNumber() != log_number) {
+    // delete the obsolete log reader if log number mismatch
+    if (iter != log_readers_.end()) {
+      log_readers_.erase(iter);
+    }
+    // initialize log reader from log_number
+    // TODO: min_log_number_to_keep_2pc check needed?
+    // Open the log file
+    std::string fname =
+        LogFileName(immutable_db_options_.GetWalDir(), log_number);
+    ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                   "Recovering log #%" PRIu64 " mode %d", log_number,
+                   static_cast<int>(immutable_db_options_.wal_recovery_mode));
+
+    std::unique_ptr<SequentialFileReader> file_reader;
+    {
+      std::unique_ptr<FSSequentialFile> file;
+      Status status = fs_->NewSequentialFile(
+          fname, fs_->OptimizeForLogRead(file_options_), &file, nullptr);
+      if (!status.ok()) {
+        *log_reader = nullptr;
+        return status;
+      }
+      file_reader.reset(new SequentialFileReader(
+          std::move(file), fname, immutable_db_options_.log_readahead_size,
+          io_tracer_));
+    }
+
+    // Create the log reader.
+    LogReaderContainer* log_reader_container = new LogReaderContainer(
+        env_, immutable_db_options_.info_log, std::move(fname),
+        std::move(file_reader), log_number);
+    log_readers_.insert(std::make_pair(
+        log_number, std::unique_ptr<LogReaderContainer>(log_reader_container)));
+  }
+  iter = log_readers_.find(log_number);
+  assert(iter != log_readers_.end());
+  *log_reader = iter->second->reader_;
+  return Status::OK();
+}
+
+// After manifest recovery, replay WALs and refresh log_readers_ if necessary
+// REQUIRES: log_numbers are sorted in ascending order
+Status DBImplSecondary::RecoverLogFiles(
+    const std::vector<uint64_t>& log_numbers, SequenceNumber* next_sequence,
+    std::unordered_set<ColumnFamilyData*>* cfds_changed,
+    JobContext* job_context) {
+  assert(nullptr != cfds_changed);
+  assert(nullptr != job_context);
+  mutex_.AssertHeld();
+  Status status;
+  for (auto log_number : log_numbers) {
+    log::FragmentBufferedReader* reader = nullptr;
+    status = MaybeInitLogReader(log_number, &reader);
+    if (!status.ok()) {
+      return status;
+    }
+    assert(reader != nullptr);
+  }
+  for (auto log_number : log_numbers) {
+    auto it = log_readers_.find(log_number);
+    assert(it != log_readers_.end());
+    log::FragmentBufferedReader* reader = it->second->reader_;
+    Status* wal_read_status = it->second->status_;
+    assert(wal_read_status);
+    // Manually update the file number allocation counter in VersionSet.
+    versions_->MarkFileNumberUsed(log_number);
+
+    // Determine if we should tolerate incomplete records at the tail end of the
+    // Read all the records and add to a memtable
+    std::string scratch;
+    Slice record;
+    WriteBatch batch;
+
+    while (reader->ReadRecord(&record, &scratch,
+                              immutable_db_options_.wal_recovery_mode) &&
+           wal_read_status->ok() && status.ok()) {
+      if (record.size() < WriteBatchInternal::kHeader) {
+        reader->GetReporter()->Corruption(
+            record.size(), Status::Corruption("log record too small"));
+        continue;
+      }
+      status = WriteBatchInternal::SetContents(&batch, record);
+      if (!status.ok()) {
+        break;
+      }
+      SequenceNumber seq_of_batch = WriteBatchInternal::Sequence(&batch);
+      std::vector<uint32_t> column_family_ids;
+      status = CollectColumnFamilyIdsFromWriteBatch(batch, &column_family_ids);
+      if (status.ok()) {
+        for (const auto id : column_family_ids) {
+          ColumnFamilyData* cfd =
+              versions_->GetColumnFamilySet()->GetColumnFamily(id);
+          if (cfd == nullptr) {
+            continue;
+          }
+          if (cfds_changed->count(cfd) == 0) {
+            cfds_changed->insert(cfd);
+          }
+          const std::vector<FileMetaData*>& l0_files =
+              cfd->current()->storage_info()->LevelFiles(0);
+          SequenceNumber seq =
+              l0_files.empty() ? 0 : l0_files.back()->fd.largest_seqno;
+          // If the write batch's sequence number is smaller than the last
+          // sequence number of the largest sequence persisted for this column
+          // family, then its data must reside in an SST that has already been
+          // added in the prior MANIFEST replay.
+          if (seq_of_batch <= seq) {
+            continue;
+          }
+          auto curr_log_num = std::numeric_limits<uint64_t>::max();
+          if (cfd_to_current_log_.count(cfd) > 0) {
+            curr_log_num = cfd_to_current_log_[cfd];
+          }
+          // If the active memtable contains records added by replaying an
+          // earlier WAL, then we need to seal the memtable, add it to the
+          // immutable memtable list and create a new active memtable.
+          if (!cfd->mem()->IsEmpty() &&
+              (curr_log_num == std::numeric_limits<uint64_t>::max() ||
+               curr_log_num != log_number)) {
+            const MutableCFOptions mutable_cf_options =
+                *cfd->GetLatestMutableCFOptions();
+            MemTable* new_mem =
+                cfd->ConstructNewMemtable(mutable_cf_options, seq_of_batch);
+            cfd->mem()->SetNextLogNumber(log_number);
+            cfd->mem()->ConstructFragmentedRangeTombstones();
+            cfd->imm()->Add(cfd->mem(), &job_context->memtables_to_free);
+            new_mem->Ref();
+            cfd->SetMemtable(new_mem);
+          }
+        }
+        bool has_valid_writes = false;
+        status = WriteBatchInternal::InsertInto(
+            &batch, column_family_memtables_.get(),
+            nullptr /* flush_scheduler */, nullptr /* trim_history_scheduler*/,
+            true, log_number, this, false /* concurrent_memtable_writes */,
+            next_sequence, &has_valid_writes, seq_per_batch_, batch_per_txn_);
+      }
+      // If column family was not found, it might mean that the WAL write
+      // batch references to the column family that was dropped after the
+      // insert. We don't want to fail the whole write batch in that case --
+      // we just ignore the update.
+      // That's why we set ignore missing column families to true
+      // passing null flush_scheduler will disable memtable flushing which is
+      // needed for secondary instances
+      if (status.ok()) {
+        for (const auto id : column_family_ids) {
+          ColumnFamilyData* cfd =
+              versions_->GetColumnFamilySet()->GetColumnFamily(id);
+          if (cfd == nullptr) {
+            continue;
+          }
+          std::unordered_map<ColumnFamilyData*, uint64_t>::iterator iter =
+              cfd_to_current_log_.find(cfd);
+          if (iter == cfd_to_current_log_.end()) {
+            cfd_to_current_log_.insert({cfd, log_number});
+          } else if (log_number > iter->second) {
+            iter->second = log_number;
+          }
+        }
+        auto last_sequence = *next_sequence - 1;
+        if ((*next_sequence != kMaxSequenceNumber) &&
+            (versions_->LastSequence() <= last_sequence)) {
+          versions_->SetLastAllocatedSequence(last_sequence);
+          versions_->SetLastPublishedSequence(last_sequence);
+          versions_->SetLastSequence(last_sequence);
+        }
+      } else {
+        // We are treating this as a failure while reading since we read valid
+        // blocks that do not form coherent data
+        reader->GetReporter()->Corruption(record.size(), status);
+      }
+    }
+    if (status.ok() && !wal_read_status->ok()) {
+      status = *wal_read_status;
+    }
+    if (!status.ok()) {
+      return status;
+    }
+  }
+  // remove logreaders from map after successfully recovering the WAL
+  if (log_readers_.size() > 1) {
+    auto erase_iter = log_readers_.begin();
+    std::advance(erase_iter, log_readers_.size() - 1);
+    log_readers_.erase(log_readers_.begin(), erase_iter);
+  }
+  return status;
+}
+
+// Implementation of the DB interface
+Status DBImplSecondary::Get(const ReadOptions& read_options,
+                            ColumnFamilyHandle* column_family, const Slice& key,
+                            PinnableSlice* value) {
+  return GetImpl(read_options, column_family, key, value,
+                 /*timestamp*/ nullptr);
+}
+
+Status DBImplSecondary::Get(const ReadOptions& read_options,
+                            ColumnFamilyHandle* column_family, const Slice& key,
+                            PinnableSlice* value, std::string* timestamp) {
+  return GetImpl(read_options, column_family, key, value, timestamp);
+}
+
+Status DBImplSecondary::GetImpl(const ReadOptions& read_options,
+                                ColumnFamilyHandle* column_family,
+                                const Slice& key, PinnableSlice* pinnable_val,
+                                std::string* timestamp) {
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call Get with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+  assert(pinnable_val != nullptr);
+  PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock);
+  StopWatch sw(immutable_db_options_.clock, stats_, DB_GET);
+  PERF_TIMER_GUARD(get_snapshot_time);
+
+  assert(column_family);
+  if (read_options.timestamp) {
+    const Status s = FailIfTsMismatchCf(
+        column_family, *(read_options.timestamp), /*ts_for_read=*/true);
+    if (!s.ok()) {
+      return s;
+    }
+  } else {
+    const Status s = FailIfCfHasTs(column_family);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  // Clear the timestamp for returning results so that we can distinguish
+  // between tombstone or key that has never been written later.
+  if (timestamp) {
+    timestamp->clear();
+  }
+
+  auto cfh = static_cast<ColumnFamilyHandleImpl*>(column_family);
+  ColumnFamilyData* cfd = cfh->cfd();
+  if (tracer_) {
+    InstrumentedMutexLock lock(&trace_mutex_);
+    if (tracer_) {
+      tracer_->Get(column_family, key);
+    }
+  }
+  // Acquire SuperVersion
+  SuperVersion* super_version = GetAndRefSuperVersion(cfd);
+  SequenceNumber snapshot = versions_->LastSequence();
+  GetWithTimestampReadCallback read_cb(snapshot);
+  MergeContext merge_context;
+  SequenceNumber max_covering_tombstone_seq = 0;
+  Status s;
+  LookupKey lkey(key, snapshot, read_options.timestamp);
+  PERF_TIMER_STOP(get_snapshot_time);
+
+  bool done = false;
+  const Comparator* ucmp = column_family->GetComparator();
+  assert(ucmp);
+  std::string* ts = ucmp->timestamp_size() > 0 ? timestamp : nullptr;
+  if (super_version->mem->Get(lkey, pinnable_val->GetSelf(),
+                              /*columns=*/nullptr, ts, &s, &merge_context,
+                              &max_covering_tombstone_seq, read_options,
+                              false /* immutable_memtable */, &read_cb)) {
+    done = true;
+    pinnable_val->PinSelf();
+    RecordTick(stats_, MEMTABLE_HIT);
+  } else if ((s.ok() || s.IsMergeInProgress()) &&
+             super_version->imm->Get(
+                 lkey, pinnable_val->GetSelf(), /*columns=*/nullptr, ts, &s,
+                 &merge_context, &max_covering_tombstone_seq, read_options,
+                 &read_cb)) {
+    done = true;
+    pinnable_val->PinSelf();
+    RecordTick(stats_, MEMTABLE_HIT);
+  }
+  if (!done && !s.ok() && !s.IsMergeInProgress()) {
+    ReturnAndCleanupSuperVersion(cfd, super_version);
+    return s;
+  }
+  if (!done) {
+    PERF_TIMER_GUARD(get_from_output_files_time);
+    PinnedIteratorsManager pinned_iters_mgr;
+    super_version->current->Get(
+        read_options, lkey, pinnable_val, /*columns=*/nullptr, ts, &s,
+        &merge_context, &max_covering_tombstone_seq, &pinned_iters_mgr,
+        /*value_found*/ nullptr,
+        /*key_exists*/ nullptr, /*seq*/ nullptr, &read_cb, /*is_blob*/ nullptr,
+        /*do_merge*/ true);
+    RecordTick(stats_, MEMTABLE_MISS);
+  }
+  {
+    PERF_TIMER_GUARD(get_post_process_time);
+    ReturnAndCleanupSuperVersion(cfd, super_version);
+    RecordTick(stats_, NUMBER_KEYS_READ);
+    size_t size = pinnable_val->size();
+    RecordTick(stats_, BYTES_READ, size);
+    RecordTimeToHistogram(stats_, BYTES_PER_READ, size);
+    PERF_COUNTER_ADD(get_read_bytes, size);
+  }
+  return s;
+}
+
+Iterator* DBImplSecondary::NewIterator(const ReadOptions& read_options,
+                                       ColumnFamilyHandle* column_family) {
+  if (read_options.managed) {
+    return NewErrorIterator(
+        Status::NotSupported("Managed iterator is not supported anymore."));
+  }
+  if (read_options.read_tier == kPersistedTier) {
+    return NewErrorIterator(Status::NotSupported(
+        "ReadTier::kPersistedData is not yet supported in iterators."));
+  }
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return NewErrorIterator(Status::InvalidArgument(
+        "Cannot call NewIterator with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`"));
+  }
+
+  assert(column_family);
+  if (read_options.timestamp) {
+    const Status s = FailIfTsMismatchCf(
+        column_family, *(read_options.timestamp), /*ts_for_read=*/true);
+    if (!s.ok()) {
+      return NewErrorIterator(s);
+    }
+  } else {
+    const Status s = FailIfCfHasTs(column_family);
+    if (!s.ok()) {
+      return NewErrorIterator(s);
+    }
+  }
+
+  Iterator* result = nullptr;
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  auto cfd = cfh->cfd();
+  ReadCallback* read_callback = nullptr;  // No read callback provided.
+  if (read_options.tailing) {
+    return NewErrorIterator(Status::NotSupported(
+        "tailing iterator not supported in secondary mode"));
+  } else if (read_options.snapshot != nullptr) {
+    // TODO (yanqin) support snapshot.
+    return NewErrorIterator(
+        Status::NotSupported("snapshot not supported in secondary mode"));
+  } else {
+    SequenceNumber snapshot(kMaxSequenceNumber);
+    result = NewIteratorImpl(read_options, cfd, snapshot, read_callback);
+  }
+  return result;
+}
+
+ArenaWrappedDBIter* DBImplSecondary::NewIteratorImpl(
+    const ReadOptions& read_options, ColumnFamilyData* cfd,
+    SequenceNumber snapshot, ReadCallback* read_callback,
+    bool expose_blob_index, bool allow_refresh) {
+  assert(nullptr != cfd);
+  SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
+  assert(snapshot == kMaxSequenceNumber);
+  snapshot = versions_->LastSequence();
+  assert(snapshot != kMaxSequenceNumber);
+  auto db_iter = NewArenaWrappedDbIterator(
+      env_, read_options, *cfd->ioptions(), super_version->mutable_cf_options,
+      super_version->current, snapshot,
+      super_version->mutable_cf_options.max_sequential_skip_in_iterations,
+      super_version->version_number, read_callback, this, cfd,
+      expose_blob_index, read_options.snapshot ? false : allow_refresh);
+  auto internal_iter = NewInternalIterator(
+      db_iter->GetReadOptions(), cfd, super_version, db_iter->GetArena(),
+      snapshot, /* allow_unprepared_value */ true, db_iter);
+  db_iter->SetIterUnderDBIter(internal_iter);
+  return db_iter;
+}
+
+Status DBImplSecondary::NewIterators(
+    const ReadOptions& read_options,
+    const std::vector<ColumnFamilyHandle*>& column_families,
+    std::vector<Iterator*>* iterators) {
+  if (read_options.managed) {
+    return Status::NotSupported("Managed iterator is not supported anymore.");
+  }
+  if (read_options.read_tier == kPersistedTier) {
+    return Status::NotSupported(
+        "ReadTier::kPersistedData is not yet supported in iterators.");
+  }
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call NewIterators with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+  ReadCallback* read_callback = nullptr;  // No read callback provided.
+  if (iterators == nullptr) {
+    return Status::InvalidArgument("iterators not allowed to be nullptr");
+  }
+
+  if (read_options.timestamp) {
+    for (auto* cf : column_families) {
+      assert(cf);
+      const Status s = FailIfTsMismatchCf(cf, *(read_options.timestamp),
+                                          /*ts_for_read=*/true);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  } else {
+    for (auto* cf : column_families) {
+      assert(cf);
+      const Status s = FailIfCfHasTs(cf);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  }
+  iterators->clear();
+  iterators->reserve(column_families.size());
+  if (read_options.tailing) {
+    return Status::NotSupported(
+        "tailing iterator not supported in secondary mode");
+  } else if (read_options.snapshot != nullptr) {
+    // TODO (yanqin) support snapshot.
+    return Status::NotSupported("snapshot not supported in secondary mode");
+  } else {
+    SequenceNumber read_seq(kMaxSequenceNumber);
+    for (auto cfh : column_families) {
+      ColumnFamilyData* cfd = static_cast<ColumnFamilyHandleImpl*>(cfh)->cfd();
+      iterators->push_back(
+          NewIteratorImpl(read_options, cfd, read_seq, read_callback));
+    }
+  }
+  return Status::OK();
+}
+
+Status DBImplSecondary::CheckConsistency() {
+  mutex_.AssertHeld();
+  Status s = DBImpl::CheckConsistency();
+  // If DBImpl::CheckConsistency() which is stricter returns success, then we
+  // do not need to give a second chance.
+  if (s.ok()) {
+    return s;
+  }
+  // It's possible that DBImpl::CheckConssitency() can fail because the primary
+  // may have removed certain files, causing the GetFileSize(name) call to
+  // fail and returning a PathNotFound. In this case, we take a best-effort
+  // approach and just proceed.
+  TEST_SYNC_POINT_CALLBACK(
+      "DBImplSecondary::CheckConsistency:AfterFirstAttempt", &s);
+
+  if (immutable_db_options_.skip_checking_sst_file_sizes_on_db_open) {
+    return Status::OK();
+  }
+
+  std::vector<LiveFileMetaData> metadata;
+  versions_->GetLiveFilesMetaData(&metadata);
+
+  std::string corruption_messages;
+  for (const auto& md : metadata) {
+    // md.name has a leading "/".
+    std::string file_path = md.db_path + md.name;
+
+    uint64_t fsize = 0;
+    s = env_->GetFileSize(file_path, &fsize);
+    if (!s.ok() &&
+        (env_->GetFileSize(Rocks2LevelTableFileName(file_path), &fsize).ok() ||
+         s.IsPathNotFound())) {
+      s = Status::OK();
+    }
+    if (!s.ok()) {
+      corruption_messages +=
+          "Can't access " + md.name + ": " + s.ToString() + "\n";
+    }
+  }
+  return corruption_messages.empty() ? Status::OK()
+                                     : Status::Corruption(corruption_messages);
+}
+
+Status DBImplSecondary::TryCatchUpWithPrimary() {
+  assert(versions_.get() != nullptr);
+  assert(manifest_reader_.get() != nullptr);
+  Status s;
+  // read the manifest and apply new changes to the secondary instance
+  std::unordered_set<ColumnFamilyData*> cfds_changed;
+  JobContext job_context(0, true /*create_superversion*/);
+  {
+    InstrumentedMutexLock lock_guard(&mutex_);
+    s = static_cast_with_check<ReactiveVersionSet>(versions_.get())
+            ->ReadAndApply(&mutex_, &manifest_reader_,
+                           manifest_reader_status_.get(), &cfds_changed);
+
+    ROCKS_LOG_INFO(immutable_db_options_.info_log, "Last sequence is %" PRIu64,
+                   static_cast<uint64_t>(versions_->LastSequence()));
+    for (ColumnFamilyData* cfd : cfds_changed) {
+      if (cfd->IsDropped()) {
+        ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] is dropped\n",
+                        cfd->GetName().c_str());
+        continue;
+      }
+      VersionStorageInfo::LevelSummaryStorage tmp;
+      ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
+                      "[%s] Level summary: %s\n", cfd->GetName().c_str(),
+                      cfd->current()->storage_info()->LevelSummary(&tmp));
+    }
+
+    // list wal_dir to discover new WALs and apply new changes to the secondary
+    // instance
+    if (s.ok()) {
+      s = FindAndRecoverLogFiles(&cfds_changed, &job_context);
+    }
+    if (s.IsPathNotFound()) {
+      ROCKS_LOG_INFO(
+          immutable_db_options_.info_log,
+          "Secondary tries to read WAL, but WAL file(s) have already "
+          "been purged by primary.");
+      s = Status::OK();
+    }
+    if (s.ok()) {
+      for (auto cfd : cfds_changed) {
+        cfd->imm()->RemoveOldMemTables(cfd->GetLogNumber(),
+                                       &job_context.memtables_to_free);
+        auto& sv_context = job_context.superversion_contexts.back();
+        cfd->InstallSuperVersion(&sv_context, &mutex_);
+        sv_context.NewSuperVersion();
+      }
+    }
+  }
+  job_context.Clean();
+
+  // Cleanup unused, obsolete files.
+  JobContext purge_files_job_context(0);
+  {
+    InstrumentedMutexLock lock_guard(&mutex_);
+    // Currently, secondary instance does not own the database files, thus it
+    // is unnecessary for the secondary to force full scan.
+    FindObsoleteFiles(&purge_files_job_context, /*force=*/false);
+  }
+  if (purge_files_job_context.HaveSomethingToDelete()) {
+    PurgeObsoleteFiles(purge_files_job_context);
+  }
+  purge_files_job_context.Clean();
+  return s;
+}
+
+Status DB::OpenAsSecondary(const Options& options, const std::string& dbname,
+                           const std::string& secondary_path, DB** dbptr) {
+  *dbptr = nullptr;
+
+  DBOptions db_options(options);
+  ColumnFamilyOptions cf_options(options);
+  std::vector<ColumnFamilyDescriptor> column_families;
+  column_families.emplace_back(kDefaultColumnFamilyName, cf_options);
+  std::vector<ColumnFamilyHandle*> handles;
+
+  Status s = DB::OpenAsSecondary(db_options, dbname, secondary_path,
+                                 column_families, &handles, dbptr);
+  if (s.ok()) {
+    assert(handles.size() == 1);
+    delete handles[0];
+  }
+  return s;
+}
+
+Status DB::OpenAsSecondary(
+    const DBOptions& db_options, const std::string& dbname,
+    const std::string& secondary_path,
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    std::vector<ColumnFamilyHandle*>* handles, DB** dbptr) {
+  *dbptr = nullptr;
+
+  DBOptions tmp_opts(db_options);
+  Status s;
+  if (nullptr == tmp_opts.info_log) {
+    s = CreateLoggerFromOptions(secondary_path, tmp_opts, &tmp_opts.info_log);
+    if (!s.ok()) {
+      tmp_opts.info_log = nullptr;
+      return s;
+    }
+  }
+
+  assert(tmp_opts.info_log != nullptr);
+  if (db_options.max_open_files != -1) {
+    std::ostringstream oss;
+    oss << "The primary instance may delete all types of files after they "
+           "become obsolete. The application can coordinate the primary and "
+           "secondary so that primary does not delete/rename files that are "
+           "currently being used by the secondary. Alternatively, a custom "
+           "Env/FS can be provided such that files become inaccessible only "
+           "after all primary and secondaries indicate that they are obsolete "
+           "and deleted. If the above two are not possible, you can open the "
+           "secondary instance with `max_open_files==-1` so that secondary "
+           "will eagerly keep all table files open. Even if a file is deleted, "
+           "its content can still be accessed via a prior open file "
+           "descriptor. This is a hacky workaround for only table files. If "
+           "none of the above is done, then point lookup or "
+           "range scan via the secondary instance can result in IOError: file "
+           "not found. This can be resolved by retrying "
+           "TryCatchUpWithPrimary().";
+    ROCKS_LOG_WARN(tmp_opts.info_log, "%s", oss.str().c_str());
+  }
+
+  handles->clear();
+  DBImplSecondary* impl = new DBImplSecondary(tmp_opts, dbname, secondary_path);
+  impl->versions_.reset(new ReactiveVersionSet(
+      dbname, &impl->immutable_db_options_, impl->file_options_,
+      impl->table_cache_.get(), impl->write_buffer_manager_,
+      &impl->write_controller_, impl->io_tracer_));
+  impl->column_family_memtables_.reset(
+      new ColumnFamilyMemTablesImpl(impl->versions_->GetColumnFamilySet()));
+  impl->wal_in_db_path_ = impl->immutable_db_options_.IsWalDirSameAsDBPath();
+
+  impl->mutex_.Lock();
+  s = impl->Recover(column_families, true, false, false);
+  if (s.ok()) {
+    for (auto cf : column_families) {
+      auto cfd =
+          impl->versions_->GetColumnFamilySet()->GetColumnFamily(cf.name);
+      if (nullptr == cfd) {
+        s = Status::InvalidArgument("Column family not found", cf.name);
+        break;
+      }
+      handles->push_back(new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_));
+    }
+  }
+  SuperVersionContext sv_context(true /* create_superversion */);
+  if (s.ok()) {
+    for (auto cfd : *impl->versions_->GetColumnFamilySet()) {
+      sv_context.NewSuperVersion();
+      cfd->InstallSuperVersion(&sv_context, &impl->mutex_);
+    }
+  }
+  impl->mutex_.Unlock();
+  sv_context.Clean();
+  if (s.ok()) {
+    *dbptr = impl;
+    for (auto h : *handles) {
+      impl->NewThreadStatusCfInfo(
+          static_cast_with_check<ColumnFamilyHandleImpl>(h)->cfd());
+    }
+  } else {
+    for (auto h : *handles) {
+      delete h;
+    }
+    handles->clear();
+    delete impl;
+  }
+  return s;
+}
+
+Status DBImplSecondary::CompactWithoutInstallation(
+    const OpenAndCompactOptions& options, ColumnFamilyHandle* cfh,
+    const CompactionServiceInput& input, CompactionServiceResult* result) {
+  if (options.canceled && options.canceled->load(std::memory_order_acquire)) {
+    return Status::Incomplete(Status::SubCode::kManualCompactionPaused);
+  }
+  InstrumentedMutexLock l(&mutex_);
+  auto cfd = static_cast_with_check<ColumnFamilyHandleImpl>(cfh)->cfd();
+  if (!cfd) {
+    return Status::InvalidArgument("Cannot find column family" +
+                                   cfh->GetName());
+  }
+
+  std::unordered_set<uint64_t> input_set;
+  for (const auto& file_name : input.input_files) {
+    input_set.insert(TableFileNameToNumber(file_name));
+  }
+
+  auto* version = cfd->current();
+
+  ColumnFamilyMetaData cf_meta;
+  version->GetColumnFamilyMetaData(&cf_meta);
+
+  const MutableCFOptions* mutable_cf_options = cfd->GetLatestMutableCFOptions();
+  ColumnFamilyOptions cf_options = cfd->GetLatestCFOptions();
+  VersionStorageInfo* vstorage = version->storage_info();
+
+  // Use comp_options to reuse some CompactFiles functions
+  CompactionOptions comp_options;
+  comp_options.compression = kDisableCompressionOption;
+  comp_options.output_file_size_limit = MaxFileSizeForLevel(
+      *mutable_cf_options, input.output_level, cf_options.compaction_style,
+      vstorage->base_level(), cf_options.level_compaction_dynamic_level_bytes);
+
+  std::vector<CompactionInputFiles> input_files;
+  Status s = cfd->compaction_picker()->GetCompactionInputsFromFileNumbers(
+      &input_files, &input_set, vstorage, comp_options);
+  if (!s.ok()) {
+    return s;
+  }
+
+  std::unique_ptr<Compaction> c;
+  assert(cfd->compaction_picker());
+  c.reset(cfd->compaction_picker()->CompactFiles(
+      comp_options, input_files, input.output_level, vstorage,
+      *mutable_cf_options, mutable_db_options_, 0));
+  assert(c != nullptr);
+
+  c->SetInputVersion(version);
+
+  // Create output directory if it's not existed yet
+  std::unique_ptr<FSDirectory> output_dir;
+  s = CreateAndNewDirectory(fs_.get(), secondary_path_, &output_dir);
+  if (!s.ok()) {
+    return s;
+  }
+
+  LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
+                       immutable_db_options_.info_log.get());
+
+  const int job_id = next_job_id_.fetch_add(1);
+
+  // use primary host's db_id for running the compaction, but db_session_id is
+  // using the local one, which is to make sure the unique id is unique from
+  // the remote compactors. Because the id is generated from db_id,
+  // db_session_id and orig_file_number, unlike the local compaction, remote
+  // compaction cannot guarantee the uniqueness of orig_file_number, the file
+  // number is only assigned when compaction is done.
+  CompactionServiceCompactionJob compaction_job(
+      job_id, c.get(), immutable_db_options_, mutable_db_options_,
+      file_options_for_compaction_, versions_.get(), &shutting_down_,
+      &log_buffer, output_dir.get(), stats_, &mutex_, &error_handler_,
+      input.snapshots, table_cache_, &event_logger_, dbname_, io_tracer_,
+      options.canceled ? *options.canceled : kManualCompactionCanceledFalse_,
+      input.db_id, db_session_id_, secondary_path_, input, result);
+
+  mutex_.Unlock();
+  s = compaction_job.Run();
+  mutex_.Lock();
+
+  // clean up
+  compaction_job.io_status().PermitUncheckedError();
+  compaction_job.CleanupCompaction();
+  c->ReleaseCompactionFiles(s);
+  c.reset();
+
+  TEST_SYNC_POINT_CALLBACK("DBImplSecondary::CompactWithoutInstallation::End",
+                           &s);
+  result->status = s;
+  return s;
+}
+
+Status DB::OpenAndCompact(
+    const OpenAndCompactOptions& options, const std::string& name,
+    const std::string& output_directory, const std::string& input,
+    std::string* output,
+    const CompactionServiceOptionsOverride& override_options) {
+  if (options.canceled && options.canceled->load(std::memory_order_acquire)) {
+    return Status::Incomplete(Status::SubCode::kManualCompactionPaused);
+  }
+  CompactionServiceInput compaction_input;
+  Status s = CompactionServiceInput::Read(input, &compaction_input);
+  if (!s.ok()) {
+    return s;
+  }
+
+  compaction_input.db_options.max_open_files = -1;
+  compaction_input.db_options.compaction_service = nullptr;
+  if (compaction_input.db_options.statistics) {
+    compaction_input.db_options.statistics.reset();
+  }
+  compaction_input.db_options.env = override_options.env;
+  compaction_input.db_options.file_checksum_gen_factory =
+      override_options.file_checksum_gen_factory;
+  compaction_input.db_options.statistics = override_options.statistics;
+  compaction_input.column_family.options.comparator =
+      override_options.comparator;
+  compaction_input.column_family.options.merge_operator =
+      override_options.merge_operator;
+  compaction_input.column_family.options.compaction_filter =
+      override_options.compaction_filter;
+  compaction_input.column_family.options.compaction_filter_factory =
+      override_options.compaction_filter_factory;
+  compaction_input.column_family.options.prefix_extractor =
+      override_options.prefix_extractor;
+  compaction_input.column_family.options.table_factory =
+      override_options.table_factory;
+  compaction_input.column_family.options.sst_partitioner_factory =
+      override_options.sst_partitioner_factory;
+  compaction_input.column_family.options.table_properties_collector_factories =
+      override_options.table_properties_collector_factories;
+  compaction_input.db_options.listeners = override_options.listeners;
+
+  std::vector<ColumnFamilyDescriptor> column_families;
+  column_families.push_back(compaction_input.column_family);
+  // TODO: we have to open default CF, because of an implementation limitation,
+  // currently we just use the same CF option from input, which is not collect
+  // and open may fail.
+  if (compaction_input.column_family.name != kDefaultColumnFamilyName) {
+    column_families.emplace_back(kDefaultColumnFamilyName,
+                                 compaction_input.column_family.options);
+  }
+
+  DB* db;
+  std::vector<ColumnFamilyHandle*> handles;
+
+  s = DB::OpenAsSecondary(compaction_input.db_options, name, output_directory,
+                          column_families, &handles, &db);
+  if (!s.ok()) {
+    return s;
+  }
+
+  CompactionServiceResult compaction_result;
+  DBImplSecondary* db_secondary = static_cast_with_check<DBImplSecondary>(db);
+  assert(handles.size() > 0);
+  s = db_secondary->CompactWithoutInstallation(
+      options, handles[0], compaction_input, &compaction_result);
+
+  Status serialization_status = compaction_result.Write(output);
+
+  for (auto& handle : handles) {
+    delete handle;
+  }
+  delete db;
+  if (s.ok()) {
+    return serialization_status;
+  }
+  return s;
+}
+
+Status DB::OpenAndCompact(
+    const std::string& name, const std::string& output_directory,
+    const std::string& input, std::string* output,
+    const CompactionServiceOptionsOverride& override_options) {
+  return OpenAndCompact(OpenAndCompactOptions(), name, output_directory, input,
+                        output, override_options);
+}
+
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_secondary.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_secondary.h
new file mode 100644
index 0000000000..3c21986221
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_secondary.h
@@ -0,0 +1,406 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <string>
+#include <vector>
+
+#include "db/db_impl/db_impl.h"
+#include "logging/logging.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A wrapper class to hold log reader, log reporter, log status.
+class LogReaderContainer {
+ public:
+  LogReaderContainer()
+      : reader_(nullptr), reporter_(nullptr), status_(nullptr) {}
+  LogReaderContainer(Env* env, std::shared_ptr<Logger> info_log,
+                     std::string fname,
+                     std::unique_ptr<SequentialFileReader>&& file_reader,
+                     uint64_t log_number) {
+    LogReporter* reporter = new LogReporter();
+    status_ = new Status();
+    reporter->env = env;
+    reporter->info_log = info_log.get();
+    reporter->fname = std::move(fname);
+    reporter->status = status_;
+    reporter_ = reporter;
+    // We intentially make log::Reader do checksumming even if
+    // paranoid_checks==false so that corruptions cause entire commits
+    // to be skipped instead of propagating bad information (like overly
+    // large sequence numbers).
+    reader_ = new log::FragmentBufferedReader(info_log, std::move(file_reader),
+                                              reporter, true /*checksum*/,
+                                              log_number);
+  }
+  log::FragmentBufferedReader* reader_;
+  log::Reader::Reporter* reporter_;
+  Status* status_;
+  ~LogReaderContainer() {
+    delete reader_;
+    delete reporter_;
+    delete status_;
+  }
+
+ private:
+  struct LogReporter : public log::Reader::Reporter {
+    Env* env;
+    Logger* info_log;
+    std::string fname;
+    Status* status;  // nullptr if immutable_db_options_.paranoid_checks==false
+    void Corruption(size_t bytes, const Status& s) override {
+      ROCKS_LOG_WARN(info_log, "%s%s: dropping %d bytes; %s",
+                     (this->status == nullptr ? "(ignoring error) " : ""),
+                     fname.c_str(), static_cast<int>(bytes),
+                     s.ToString().c_str());
+      if (this->status != nullptr && this->status->ok()) {
+        *this->status = s;
+      }
+    }
+  };
+};
+
+// The secondary instance shares access to the storage as the primary.
+// The secondary is able to read and replay changes described in both the
+// MANIFEST and the WAL files without coordination with the primary.
+// The secondary instance can be opened using `DB::OpenAsSecondary`. After
+// that, it can call `DBImplSecondary::TryCatchUpWithPrimary` to make best
+// effort attempts to catch up with the primary.
+// TODO: Share common structure with CompactedDBImpl and DBImplReadOnly
+class DBImplSecondary : public DBImpl {
+ public:
+  DBImplSecondary(const DBOptions& options, const std::string& dbname,
+                  std::string secondary_path);
+  ~DBImplSecondary() override;
+
+  // Recover by replaying MANIFEST and WAL. Also initialize manifest_reader_
+  // and log_readers_ to facilitate future operations.
+  Status Recover(const std::vector<ColumnFamilyDescriptor>& column_families,
+                 bool read_only, bool error_if_wal_file_exists,
+                 bool error_if_data_exists_in_wals, uint64_t* = nullptr,
+                 RecoveryContext* recovery_ctx = nullptr) override;
+
+  // Implementations of the DB interface.
+  using DB::Get;
+  // Can return IOError due to files being deleted by the primary. To avoid
+  // IOError in this case, application can coordinate between primary and
+  // secondaries so that primary will not delete files that are currently being
+  // used by the secondaries. The application can also provide a custom FS/Env
+  // implementation so that files will remain present until all primary and
+  // secondaries indicate that they can be deleted. As a partial hacky
+  // workaround, the secondaries can be opened with `max_open_files=-1` so that
+  // it eagerly keeps all talbe files open and is able to access the contents of
+  // deleted files via prior open fd.
+  Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
+             const Slice& key, PinnableSlice* value) override;
+
+  Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
+             const Slice& key, PinnableSlice* value,
+             std::string* timestamp) override;
+
+  Status GetImpl(const ReadOptions& options, ColumnFamilyHandle* column_family,
+                 const Slice& key, PinnableSlice* value,
+                 std::string* timestamp);
+
+  using DBImpl::NewIterator;
+  // Operations on the created iterators can return IOError due to files being
+  // deleted by the primary. To avoid IOError in this case, application can
+  // coordinate between primary and secondaries so that primary will not delete
+  // files that are currently being used by the secondaries. The application can
+  // also provide a custom FS/Env implementation so that files will remain
+  // present until all primary and secondaries indicate that they can be
+  // deleted. As a partial hacky workaround, the secondaries can be opened with
+  // `max_open_files=-1` so that it eagerly keeps all talbe files open and is
+  // able to access the contents of deleted files via prior open fd.
+  Iterator* NewIterator(const ReadOptions&,
+                        ColumnFamilyHandle* column_family) override;
+
+  ArenaWrappedDBIter* NewIteratorImpl(const ReadOptions& read_options,
+                                      ColumnFamilyData* cfd,
+                                      SequenceNumber snapshot,
+                                      ReadCallback* read_callback,
+                                      bool expose_blob_index = false,
+                                      bool allow_refresh = true);
+
+  Status NewIterators(const ReadOptions& options,
+                      const std::vector<ColumnFamilyHandle*>& column_families,
+                      std::vector<Iterator*>* iterators) override;
+
+  using DBImpl::Put;
+  Status Put(const WriteOptions& /*options*/,
+             ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/,
+             const Slice& /*value*/) override {
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  using DBImpl::PutEntity;
+  Status PutEntity(const WriteOptions& /* options */,
+                   ColumnFamilyHandle* /* column_family */,
+                   const Slice& /* key */,
+                   const WideColumns& /* columns */) override {
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  using DBImpl::Merge;
+  Status Merge(const WriteOptions& /*options*/,
+               ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/,
+               const Slice& /*value*/) override {
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  using DBImpl::Delete;
+  Status Delete(const WriteOptions& /*options*/,
+                ColumnFamilyHandle* /*column_family*/,
+                const Slice& /*key*/) override {
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  using DBImpl::SingleDelete;
+  Status SingleDelete(const WriteOptions& /*options*/,
+                      ColumnFamilyHandle* /*column_family*/,
+                      const Slice& /*key*/) override {
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  Status Write(const WriteOptions& /*options*/,
+               WriteBatch* /*updates*/) override {
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  using DBImpl::CompactRange;
+  Status CompactRange(const CompactRangeOptions& /*options*/,
+                      ColumnFamilyHandle* /*column_family*/,
+                      const Slice* /*begin*/, const Slice* /*end*/) override {
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  using DBImpl::CompactFiles;
+  Status CompactFiles(
+      const CompactionOptions& /*compact_options*/,
+      ColumnFamilyHandle* /*column_family*/,
+      const std::vector<std::string>& /*input_file_names*/,
+      const int /*output_level*/, const int /*output_path_id*/ = -1,
+      std::vector<std::string>* const /*output_file_names*/ = nullptr,
+      CompactionJobInfo* /*compaction_job_info*/ = nullptr) override {
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  Status DisableFileDeletions() override {
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  Status EnableFileDeletions(bool /*force*/) override {
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  Status GetLiveFiles(std::vector<std::string>&,
+                      uint64_t* /*manifest_file_size*/,
+                      bool /*flush_memtable*/ = true) override {
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  using DBImpl::Flush;
+  Status Flush(const FlushOptions& /*options*/,
+               ColumnFamilyHandle* /*column_family*/) override {
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  using DBImpl::SetDBOptions;
+  Status SetDBOptions(const std::unordered_map<std::string, std::string>&
+                      /*options_map*/) override {
+    // Currently not supported because changing certain options may cause
+    // flush/compaction.
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  using DBImpl::SetOptions;
+  Status SetOptions(
+      ColumnFamilyHandle* /*cfd*/,
+      const std::unordered_map<std::string, std::string>& /*options_map*/)
+      override {
+    // Currently not supported because changing certain options may cause
+    // flush/compaction and/or write to MANIFEST.
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  using DBImpl::SyncWAL;
+  Status SyncWAL() override {
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  using DB::IngestExternalFile;
+  Status IngestExternalFile(
+      ColumnFamilyHandle* /*column_family*/,
+      const std::vector<std::string>& /*external_files*/,
+      const IngestExternalFileOptions& /*ingestion_options*/) override {
+    return Status::NotSupported("Not supported operation in secondary mode.");
+  }
+
+  // Try to catch up with the primary by reading as much as possible from the
+  // log files until there is nothing more to read or encounters an error. If
+  // the amount of information in the log files to process is huge, this
+  // method can take long time due to all the I/O and CPU costs.
+  Status TryCatchUpWithPrimary() override;
+
+  // Try to find log reader using log_number from log_readers_ map, initialize
+  // if it doesn't exist
+  Status MaybeInitLogReader(uint64_t log_number,
+                            log::FragmentBufferedReader** log_reader);
+
+  // Check if all live files exist on file system and that their file sizes
+  // matche to the in-memory records. It is possible that some live files may
+  // have been deleted by the primary. In this case, CheckConsistency() does
+  // not flag the missing file as inconsistency.
+  Status CheckConsistency() override;
+
+#ifndef NDEBUG
+  Status TEST_CompactWithoutInstallation(const OpenAndCompactOptions& options,
+                                         ColumnFamilyHandle* cfh,
+                                         const CompactionServiceInput& input,
+                                         CompactionServiceResult* result) {
+    return CompactWithoutInstallation(options, cfh, input, result);
+  }
+#endif  // NDEBUG
+
+ protected:
+  Status FlushForGetLiveFiles() override {
+    // No-op for read-only DB
+    return Status::OK();
+  }
+
+  // ColumnFamilyCollector is a write batch handler which does nothing
+  // except recording unique column family IDs
+  class ColumnFamilyCollector : public WriteBatch::Handler {
+    std::unordered_set<uint32_t> column_family_ids_;
+
+    Status AddColumnFamilyId(uint32_t column_family_id) {
+      if (column_family_ids_.find(column_family_id) ==
+          column_family_ids_.end()) {
+        column_family_ids_.insert(column_family_id);
+      }
+      return Status::OK();
+    }
+
+   public:
+    explicit ColumnFamilyCollector() {}
+
+    ~ColumnFamilyCollector() override {}
+
+    Status PutCF(uint32_t column_family_id, const Slice&,
+                 const Slice&) override {
+      return AddColumnFamilyId(column_family_id);
+    }
+
+    Status DeleteCF(uint32_t column_family_id, const Slice&) override {
+      return AddColumnFamilyId(column_family_id);
+    }
+
+    Status SingleDeleteCF(uint32_t column_family_id, const Slice&) override {
+      return AddColumnFamilyId(column_family_id);
+    }
+
+    Status DeleteRangeCF(uint32_t column_family_id, const Slice&,
+                         const Slice&) override {
+      return AddColumnFamilyId(column_family_id);
+    }
+
+    Status MergeCF(uint32_t column_family_id, const Slice&,
+                   const Slice&) override {
+      return AddColumnFamilyId(column_family_id);
+    }
+
+    Status PutBlobIndexCF(uint32_t column_family_id, const Slice&,
+                          const Slice&) override {
+      return AddColumnFamilyId(column_family_id);
+    }
+
+    Status MarkBeginPrepare(bool) override { return Status::OK(); }
+
+    Status MarkEndPrepare(const Slice&) override { return Status::OK(); }
+
+    Status MarkRollback(const Slice&) override { return Status::OK(); }
+
+    Status MarkCommit(const Slice&) override { return Status::OK(); }
+
+    Status MarkCommitWithTimestamp(const Slice&, const Slice&) override {
+      return Status::OK();
+    }
+
+    Status MarkNoop(bool) override { return Status::OK(); }
+
+    const std::unordered_set<uint32_t>& column_families() const {
+      return column_family_ids_;
+    }
+  };
+
+  Status CollectColumnFamilyIdsFromWriteBatch(
+      const WriteBatch& batch, std::vector<uint32_t>* column_family_ids) {
+    assert(column_family_ids != nullptr);
+    column_family_ids->clear();
+    ColumnFamilyCollector handler;
+    Status s = batch.Iterate(&handler);
+    if (s.ok()) {
+      for (const auto& cf : handler.column_families()) {
+        column_family_ids->push_back(cf);
+      }
+    }
+    return s;
+  }
+
+  bool OwnTablesAndLogs() const override {
+    // Currently, the secondary instance does not own the database files. It
+    // simply opens the files of the primary instance and tracks their file
+    // descriptors until they become obsolete. In the future, the secondary may
+    // create links to database files. OwnTablesAndLogs will return true then.
+    return false;
+  }
+
+ private:
+  friend class DB;
+
+  // No copying allowed
+  DBImplSecondary(const DBImplSecondary&);
+  void operator=(const DBImplSecondary&);
+
+  using DBImpl::Recover;
+
+  Status FindAndRecoverLogFiles(
+      std::unordered_set<ColumnFamilyData*>* cfds_changed,
+      JobContext* job_context);
+  Status FindNewLogNumbers(std::vector<uint64_t>* logs);
+  // After manifest recovery, replay WALs and refresh log_readers_ if necessary
+  // REQUIRES: log_numbers are sorted in ascending order
+  Status RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
+                         SequenceNumber* next_sequence,
+                         std::unordered_set<ColumnFamilyData*>* cfds_changed,
+                         JobContext* job_context);
+
+  // Run compaction without installation, the output files will be placed in the
+  // secondary DB path. The LSM tree won't be changed, the secondary DB is still
+  // in read-only mode.
+  Status CompactWithoutInstallation(const OpenAndCompactOptions& options,
+                                    ColumnFamilyHandle* cfh,
+                                    const CompactionServiceInput& input,
+                                    CompactionServiceResult* result);
+
+  std::unique_ptr<log::FragmentBufferedReader> manifest_reader_;
+  std::unique_ptr<log::Reader::Reporter> manifest_reporter_;
+  std::unique_ptr<Status> manifest_reader_status_;
+
+  // Cache log readers for each log number, used for continue WAL replay
+  // after recovery
+  std::map<uint64_t, std::unique_ptr<LogReaderContainer>> log_readers_;
+
+  // Current WAL number replayed for each column family.
+  std::unordered_map<ColumnFamilyData*, uint64_t> cfd_to_current_log_;
+
+  const std::string secondary_path_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_write.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_write.cc
new file mode 100644
index 0000000000..fb74434dd4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_impl/db_impl_write.cc
@@ -0,0 +1,2487 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include <cinttypes>
+
+#include "db/db_impl/db_impl.h"
+#include "db/error_handler.h"
+#include "db/event_helpers.h"
+#include "logging/logging.h"
+#include "monitoring/perf_context_imp.h"
+#include "options/options_helper.h"
+#include "test_util/sync_point.h"
+#include "util/cast_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Convenience methods
+Status DBImpl::Put(const WriteOptions& o, ColumnFamilyHandle* column_family,
+                   const Slice& key, const Slice& val) {
+  const Status s = FailIfCfHasTs(column_family);
+  if (!s.ok()) {
+    return s;
+  }
+  return DB::Put(o, column_family, key, val);
+}
+
+Status DBImpl::Put(const WriteOptions& o, ColumnFamilyHandle* column_family,
+                   const Slice& key, const Slice& ts, const Slice& val) {
+  const Status s = FailIfTsMismatchCf(column_family, ts, /*ts_for_read=*/false);
+  if (!s.ok()) {
+    return s;
+  }
+  return DB::Put(o, column_family, key, ts, val);
+}
+
+Status DBImpl::PutEntity(const WriteOptions& options,
+                         ColumnFamilyHandle* column_family, const Slice& key,
+                         const WideColumns& columns) {
+  const Status s = FailIfCfHasTs(column_family);
+  if (!s.ok()) {
+    return s;
+  }
+
+  return DB::PutEntity(options, column_family, key, columns);
+}
+
+Status DBImpl::Merge(const WriteOptions& o, ColumnFamilyHandle* column_family,
+                     const Slice& key, const Slice& val) {
+  const Status s = FailIfCfHasTs(column_family);
+  if (!s.ok()) {
+    return s;
+  }
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  if (!cfh->cfd()->ioptions()->merge_operator) {
+    return Status::NotSupported("Provide a merge_operator when opening DB");
+  } else {
+    return DB::Merge(o, column_family, key, val);
+  }
+}
+
+Status DBImpl::Merge(const WriteOptions& o, ColumnFamilyHandle* column_family,
+                     const Slice& key, const Slice& ts, const Slice& val) {
+  const Status s = FailIfTsMismatchCf(column_family, ts, /*ts_for_read=*/false);
+  if (!s.ok()) {
+    return s;
+  }
+  return DB::Merge(o, column_family, key, ts, val);
+}
+
+Status DBImpl::Delete(const WriteOptions& write_options,
+                      ColumnFamilyHandle* column_family, const Slice& key) {
+  const Status s = FailIfCfHasTs(column_family);
+  if (!s.ok()) {
+    return s;
+  }
+  return DB::Delete(write_options, column_family, key);
+}
+
+Status DBImpl::Delete(const WriteOptions& write_options,
+                      ColumnFamilyHandle* column_family, const Slice& key,
+                      const Slice& ts) {
+  const Status s = FailIfTsMismatchCf(column_family, ts, /*ts_for_read=*/false);
+  if (!s.ok()) {
+    return s;
+  }
+  return DB::Delete(write_options, column_family, key, ts);
+}
+
+Status DBImpl::SingleDelete(const WriteOptions& write_options,
+                            ColumnFamilyHandle* column_family,
+                            const Slice& key) {
+  const Status s = FailIfCfHasTs(column_family);
+  if (!s.ok()) {
+    return s;
+  }
+  return DB::SingleDelete(write_options, column_family, key);
+}
+
+Status DBImpl::SingleDelete(const WriteOptions& write_options,
+                            ColumnFamilyHandle* column_family, const Slice& key,
+                            const Slice& ts) {
+  const Status s = FailIfTsMismatchCf(column_family, ts, /*ts_for_read=*/false);
+  if (!s.ok()) {
+    return s;
+  }
+  return DB::SingleDelete(write_options, column_family, key, ts);
+}
+
+Status DBImpl::DeleteRange(const WriteOptions& write_options,
+                           ColumnFamilyHandle* column_family,
+                           const Slice& begin_key, const Slice& end_key) {
+  const Status s = FailIfCfHasTs(column_family);
+  if (!s.ok()) {
+    return s;
+  }
+  return DB::DeleteRange(write_options, column_family, begin_key, end_key);
+}
+
+Status DBImpl::DeleteRange(const WriteOptions& write_options,
+                           ColumnFamilyHandle* column_family,
+                           const Slice& begin_key, const Slice& end_key,
+                           const Slice& ts) {
+  const Status s = FailIfTsMismatchCf(column_family, ts, /*ts_for_read=*/false);
+  if (!s.ok()) {
+    return s;
+  }
+  return DB::DeleteRange(write_options, column_family, begin_key, end_key, ts);
+}
+
+void DBImpl::SetRecoverableStatePreReleaseCallback(
+    PreReleaseCallback* callback) {
+  recoverable_state_pre_release_callback_.reset(callback);
+}
+
+Status DBImpl::Write(const WriteOptions& write_options, WriteBatch* my_batch) {
+  Status s;
+  if (write_options.protection_bytes_per_key > 0) {
+    s = WriteBatchInternal::UpdateProtectionInfo(
+        my_batch, write_options.protection_bytes_per_key);
+  }
+  if (s.ok()) {
+    s = WriteImpl(write_options, my_batch, /*callback=*/nullptr,
+                  /*log_used=*/nullptr);
+  }
+  return s;
+}
+
+Status DBImpl::WriteWithCallback(const WriteOptions& write_options,
+                                 WriteBatch* my_batch,
+                                 WriteCallback* callback) {
+  Status s;
+  if (write_options.protection_bytes_per_key > 0) {
+    s = WriteBatchInternal::UpdateProtectionInfo(
+        my_batch, write_options.protection_bytes_per_key);
+  }
+  if (s.ok()) {
+    s = WriteImpl(write_options, my_batch, callback, nullptr);
+  }
+  return s;
+}
+
+// The main write queue. This is the only write queue that updates LastSequence.
+// When using one write queue, the same sequence also indicates the last
+// published sequence.
+Status DBImpl::WriteImpl(const WriteOptions& write_options,
+                         WriteBatch* my_batch, WriteCallback* callback,
+                         uint64_t* log_used, uint64_t log_ref,
+                         bool disable_memtable, uint64_t* seq_used,
+                         size_t batch_cnt,
+                         PreReleaseCallback* pre_release_callback,
+                         PostMemTableCallback* post_memtable_callback) {
+  assert(!seq_per_batch_ || batch_cnt != 0);
+  assert(my_batch == nullptr || my_batch->Count() == 0 ||
+         write_options.protection_bytes_per_key == 0 ||
+         write_options.protection_bytes_per_key ==
+             my_batch->GetProtectionBytesPerKey());
+  if (my_batch == nullptr) {
+    return Status::InvalidArgument("Batch is nullptr!");
+  } else if (!disable_memtable &&
+             WriteBatchInternal::TimestampsUpdateNeeded(*my_batch)) {
+    // If writing to memtable, then we require the caller to set/update the
+    // timestamps for the keys in the write batch.
+    // Otherwise, it means we are just writing to the WAL, and we allow
+    // timestamps unset for the keys in the write batch. This can happen if we
+    // use TransactionDB with write-committed policy, and we currently do not
+    // support user-defined timestamp with other policies.
+    // In the prepare phase, a transaction can write the batch to the WAL
+    // without inserting to memtable. The keys in the batch do not have to be
+    // assigned timestamps because they will be used only during recovery if
+    // there is a commit marker which includes their commit timestamp.
+    return Status::InvalidArgument("write batch must have timestamp(s) set");
+  } else if (write_options.rate_limiter_priority != Env::IO_TOTAL &&
+             write_options.rate_limiter_priority != Env::IO_USER) {
+    return Status::InvalidArgument(
+        "WriteOptions::rate_limiter_priority only allows "
+        "Env::IO_TOTAL and Env::IO_USER due to implementation constraints");
+  } else if (write_options.rate_limiter_priority != Env::IO_TOTAL &&
+             (write_options.disableWAL || manual_wal_flush_)) {
+    return Status::InvalidArgument(
+        "WriteOptions::rate_limiter_priority currently only supports "
+        "rate-limiting automatic WAL flush, which requires "
+        "`WriteOptions::disableWAL` and "
+        "`DBOptions::manual_wal_flush` both set to false");
+  } else if (write_options.protection_bytes_per_key != 0 &&
+             write_options.protection_bytes_per_key != 8) {
+    return Status::InvalidArgument(
+        "`WriteOptions::protection_bytes_per_key` must be zero or eight");
+  }
+  // TODO: this use of operator bool on `tracer_` can avoid unnecessary lock
+  // grabs but does not seem thread-safe.
+  if (tracer_) {
+    InstrumentedMutexLock lock(&trace_mutex_);
+    if (tracer_ && !tracer_->IsWriteOrderPreserved()) {
+      // We don't have to preserve write order so can trace anywhere. It's more
+      // efficient to trace here than to add latency to a phase of the log/apply
+      // pipeline.
+      // TODO: maybe handle the tracing status?
+      tracer_->Write(my_batch).PermitUncheckedError();
+    }
+  }
+  if (write_options.sync && write_options.disableWAL) {
+    return Status::InvalidArgument("Sync writes has to enable WAL.");
+  }
+  if (two_write_queues_ && immutable_db_options_.enable_pipelined_write) {
+    return Status::NotSupported(
+        "pipelined_writes is not compatible with concurrent prepares");
+  }
+  if (seq_per_batch_ && immutable_db_options_.enable_pipelined_write) {
+    // TODO(yiwu): update pipeline write with seq_per_batch and batch_cnt
+    return Status::NotSupported(
+        "pipelined_writes is not compatible with seq_per_batch");
+  }
+  if (immutable_db_options_.unordered_write &&
+      immutable_db_options_.enable_pipelined_write) {
+    return Status::NotSupported(
+        "pipelined_writes is not compatible with unordered_write");
+  }
+  if (immutable_db_options_.enable_pipelined_write &&
+      post_memtable_callback != nullptr) {
+    return Status::NotSupported(
+        "pipelined write currently does not honor post_memtable_callback");
+  }
+  if (seq_per_batch_ && post_memtable_callback != nullptr) {
+    return Status::NotSupported(
+        "seq_per_batch currently does not honor post_memtable_callback");
+  }
+  // Otherwise IsLatestPersistentState optimization does not make sense
+  assert(!WriteBatchInternal::IsLatestPersistentState(my_batch) ||
+         disable_memtable);
+
+  if (write_options.low_pri) {
+    Status s = ThrottleLowPriWritesIfNeeded(write_options, my_batch);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  if (two_write_queues_ && disable_memtable) {
+    AssignOrder assign_order =
+        seq_per_batch_ ? kDoAssignOrder : kDontAssignOrder;
+    // Otherwise it is WAL-only Prepare batches in WriteCommitted policy and
+    // they don't consume sequence.
+    return WriteImplWALOnly(&nonmem_write_thread_, write_options, my_batch,
+                            callback, log_used, log_ref, seq_used, batch_cnt,
+                            pre_release_callback, assign_order,
+                            kDontPublishLastSeq, disable_memtable);
+  }
+
+  if (immutable_db_options_.unordered_write) {
+    const size_t sub_batch_cnt = batch_cnt != 0
+                                     ? batch_cnt
+                                     // every key is a sub-batch consuming a seq
+                                     : WriteBatchInternal::Count(my_batch);
+    uint64_t seq = 0;
+    // Use a write thread to i) optimize for WAL write, ii) publish last
+    // sequence in in increasing order, iii) call pre_release_callback serially
+    Status status = WriteImplWALOnly(
+        &write_thread_, write_options, my_batch, callback, log_used, log_ref,
+        &seq, sub_batch_cnt, pre_release_callback, kDoAssignOrder,
+        kDoPublishLastSeq, disable_memtable);
+    TEST_SYNC_POINT("DBImpl::WriteImpl:UnorderedWriteAfterWriteWAL");
+    if (!status.ok()) {
+      return status;
+    }
+    if (seq_used) {
+      *seq_used = seq;
+    }
+    if (!disable_memtable) {
+      TEST_SYNC_POINT("DBImpl::WriteImpl:BeforeUnorderedWriteMemtable");
+      status = UnorderedWriteMemtable(write_options, my_batch, callback,
+                                      log_ref, seq, sub_batch_cnt);
+    }
+    return status;
+  }
+
+  if (immutable_db_options_.enable_pipelined_write) {
+    return PipelinedWriteImpl(write_options, my_batch, callback, log_used,
+                              log_ref, disable_memtable, seq_used);
+  }
+
+  PERF_TIMER_GUARD(write_pre_and_post_process_time);
+  WriteThread::Writer w(write_options, my_batch, callback, log_ref,
+                        disable_memtable, batch_cnt, pre_release_callback,
+                        post_memtable_callback);
+  StopWatch write_sw(immutable_db_options_.clock, stats_, DB_WRITE);
+
+  write_thread_.JoinBatchGroup(&w);
+  if (w.state == WriteThread::STATE_PARALLEL_MEMTABLE_WRITER) {
+    // we are a non-leader in a parallel group
+
+    if (w.ShouldWriteToMemtable()) {
+      PERF_TIMER_STOP(write_pre_and_post_process_time);
+      PERF_TIMER_GUARD(write_memtable_time);
+
+      ColumnFamilyMemTablesImpl column_family_memtables(
+          versions_->GetColumnFamilySet());
+      w.status = WriteBatchInternal::InsertInto(
+          &w, w.sequence, &column_family_memtables, &flush_scheduler_,
+          &trim_history_scheduler_,
+          write_options.ignore_missing_column_families, 0 /*log_number*/, this,
+          true /*concurrent_memtable_writes*/, seq_per_batch_, w.batch_cnt,
+          batch_per_txn_, write_options.memtable_insert_hint_per_batch);
+
+      PERF_TIMER_START(write_pre_and_post_process_time);
+    }
+
+    if (write_thread_.CompleteParallelMemTableWriter(&w)) {
+      // we're responsible for exit batch group
+      // TODO(myabandeh): propagate status to write_group
+      auto last_sequence = w.write_group->last_sequence;
+      for (auto* tmp_w : *(w.write_group)) {
+        assert(tmp_w);
+        if (tmp_w->post_memtable_callback) {
+          Status tmp_s =
+              (*tmp_w->post_memtable_callback)(last_sequence, disable_memtable);
+          // TODO: propagate the execution status of post_memtable_callback to
+          // caller.
+          assert(tmp_s.ok());
+        }
+      }
+      versions_->SetLastSequence(last_sequence);
+      MemTableInsertStatusCheck(w.status);
+      write_thread_.ExitAsBatchGroupFollower(&w);
+    }
+    assert(w.state == WriteThread::STATE_COMPLETED);
+    // STATE_COMPLETED conditional below handles exit
+  }
+  if (w.state == WriteThread::STATE_COMPLETED) {
+    if (log_used != nullptr) {
+      *log_used = w.log_used;
+    }
+    if (seq_used != nullptr) {
+      *seq_used = w.sequence;
+    }
+    // write is complete and leader has updated sequence
+    return w.FinalStatus();
+  }
+  // else we are the leader of the write batch group
+  assert(w.state == WriteThread::STATE_GROUP_LEADER);
+  Status status;
+  // Once reaches this point, the current writer "w" will try to do its write
+  // job.  It may also pick up some of the remaining writers in the "writers_"
+  // when it finds suitable, and finish them in the same write batch.
+  // This is how a write job could be done by the other writer.
+  WriteContext write_context;
+  LogContext log_context(write_options.sync);
+  WriteThread::WriteGroup write_group;
+  bool in_parallel_group = false;
+  uint64_t last_sequence = kMaxSequenceNumber;
+
+  assert(!two_write_queues_ || !disable_memtable);
+  {
+    // With concurrent writes we do preprocess only in the write thread that
+    // also does write to memtable to avoid sync issue on shared data structure
+    // with the other thread
+
+    // PreprocessWrite does its own perf timing.
+    PERF_TIMER_STOP(write_pre_and_post_process_time);
+
+    status = PreprocessWrite(write_options, &log_context, &write_context);
+    if (!two_write_queues_) {
+      // Assign it after ::PreprocessWrite since the sequence might advance
+      // inside it by WriteRecoverableState
+      last_sequence = versions_->LastSequence();
+    }
+
+    PERF_TIMER_START(write_pre_and_post_process_time);
+  }
+
+  // Add to log and apply to memtable.  We can release the lock
+  // during this phase since &w is currently responsible for logging
+  // and protects against concurrent loggers and concurrent writes
+  // into memtables
+
+  TEST_SYNC_POINT("DBImpl::WriteImpl:BeforeLeaderEnters");
+  last_batch_group_size_ =
+      write_thread_.EnterAsBatchGroupLeader(&w, &write_group);
+
+  IOStatus io_s;
+  Status pre_release_cb_status;
+  if (status.ok()) {
+    // TODO: this use of operator bool on `tracer_` can avoid unnecessary lock
+    // grabs but does not seem thread-safe.
+    if (tracer_) {
+      InstrumentedMutexLock lock(&trace_mutex_);
+      if (tracer_ && tracer_->IsWriteOrderPreserved()) {
+        for (auto* writer : write_group) {
+          // TODO: maybe handle the tracing status?
+          tracer_->Write(writer->batch).PermitUncheckedError();
+        }
+      }
+    }
+    // Rules for when we can update the memtable concurrently
+    // 1. supported by memtable
+    // 2. Puts are not okay if inplace_update_support
+    // 3. Merges are not okay
+    //
+    // Rules 1..2 are enforced by checking the options
+    // during startup (CheckConcurrentWritesSupported), so if
+    // options.allow_concurrent_memtable_write is true then they can be
+    // assumed to be true.  Rule 3 is checked for each batch.  We could
+    // relax rules 2 if we could prevent write batches from referring
+    // more than once to a particular key.
+    bool parallel = immutable_db_options_.allow_concurrent_memtable_write &&
+                    write_group.size > 1;
+    size_t total_count = 0;
+    size_t valid_batches = 0;
+    size_t total_byte_size = 0;
+    size_t pre_release_callback_cnt = 0;
+    for (auto* writer : write_group) {
+      assert(writer);
+      if (writer->CheckCallback(this)) {
+        valid_batches += writer->batch_cnt;
+        if (writer->ShouldWriteToMemtable()) {
+          total_count += WriteBatchInternal::Count(writer->batch);
+          parallel = parallel && !writer->batch->HasMerge();
+        }
+        total_byte_size = WriteBatchInternal::AppendedByteSize(
+            total_byte_size, WriteBatchInternal::ByteSize(writer->batch));
+        if (writer->pre_release_callback) {
+          pre_release_callback_cnt++;
+        }
+      }
+    }
+    // Note about seq_per_batch_: either disableWAL is set for the entire write
+    // group or not. In either case we inc seq for each write batch with no
+    // failed callback. This means that there could be a batch with
+    // disalbe_memtable in between; although we do not write this batch to
+    // memtable it still consumes a seq. Otherwise, if !seq_per_batch_, we inc
+    // the seq per valid written key to mem.
+    size_t seq_inc = seq_per_batch_ ? valid_batches : total_count;
+
+    const bool concurrent_update = two_write_queues_;
+    // Update stats while we are an exclusive group leader, so we know
+    // that nobody else can be writing to these particular stats.
+    // We're optimistic, updating the stats before we successfully
+    // commit.  That lets us release our leader status early.
+    auto stats = default_cf_internal_stats_;
+    stats->AddDBStats(InternalStats::kIntStatsNumKeysWritten, total_count,
+                      concurrent_update);
+    RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count);
+    stats->AddDBStats(InternalStats::kIntStatsBytesWritten, total_byte_size,
+                      concurrent_update);
+    RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
+    stats->AddDBStats(InternalStats::kIntStatsWriteDoneBySelf, 1,
+                      concurrent_update);
+    RecordTick(stats_, WRITE_DONE_BY_SELF);
+    auto write_done_by_other = write_group.size - 1;
+    if (write_done_by_other > 0) {
+      stats->AddDBStats(InternalStats::kIntStatsWriteDoneByOther,
+                        write_done_by_other, concurrent_update);
+      RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other);
+    }
+    RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size);
+
+    if (write_options.disableWAL) {
+      has_unpersisted_data_.store(true, std::memory_order_relaxed);
+    }
+
+    PERF_TIMER_STOP(write_pre_and_post_process_time);
+
+    if (!two_write_queues_) {
+      if (status.ok() && !write_options.disableWAL) {
+        assert(log_context.log_file_number_size);
+        LogFileNumberSize& log_file_number_size =
+            *(log_context.log_file_number_size);
+        PERF_TIMER_GUARD(write_wal_time);
+        io_s =
+            WriteToWAL(write_group, log_context.writer, log_used,
+                       log_context.need_log_sync, log_context.need_log_dir_sync,
+                       last_sequence + 1, log_file_number_size);
+      }
+    } else {
+      if (status.ok() && !write_options.disableWAL) {
+        PERF_TIMER_GUARD(write_wal_time);
+        // LastAllocatedSequence is increased inside WriteToWAL under
+        // wal_write_mutex_ to ensure ordered events in WAL
+        io_s = ConcurrentWriteToWAL(write_group, log_used, &last_sequence,
+                                    seq_inc);
+      } else {
+        // Otherwise we inc seq number for memtable writes
+        last_sequence = versions_->FetchAddLastAllocatedSequence(seq_inc);
+      }
+    }
+    status = io_s;
+    assert(last_sequence != kMaxSequenceNumber);
+    const SequenceNumber current_sequence = last_sequence + 1;
+    last_sequence += seq_inc;
+
+    // PreReleaseCallback is called after WAL write and before memtable write
+    if (status.ok()) {
+      SequenceNumber next_sequence = current_sequence;
+      size_t index = 0;
+      // Note: the logic for advancing seq here must be consistent with the
+      // logic in WriteBatchInternal::InsertInto(write_group...) as well as
+      // with WriteBatchInternal::InsertInto(write_batch...) that is called on
+      // the merged batch during recovery from the WAL.
+      for (auto* writer : write_group) {
+        if (writer->CallbackFailed()) {
+          continue;
+        }
+        writer->sequence = next_sequence;
+        if (writer->pre_release_callback) {
+          Status ws = writer->pre_release_callback->Callback(
+              writer->sequence, disable_memtable, writer->log_used, index++,
+              pre_release_callback_cnt);
+          if (!ws.ok()) {
+            status = pre_release_cb_status = ws;
+            break;
+          }
+        }
+        if (seq_per_batch_) {
+          assert(writer->batch_cnt);
+          next_sequence += writer->batch_cnt;
+        } else if (writer->ShouldWriteToMemtable()) {
+          next_sequence += WriteBatchInternal::Count(writer->batch);
+        }
+      }
+    }
+
+    if (status.ok()) {
+      PERF_TIMER_GUARD(write_memtable_time);
+
+      if (!parallel) {
+        // w.sequence will be set inside InsertInto
+        w.status = WriteBatchInternal::InsertInto(
+            write_group, current_sequence, column_family_memtables_.get(),
+            &flush_scheduler_, &trim_history_scheduler_,
+            write_options.ignore_missing_column_families,
+            0 /*recovery_log_number*/, this, parallel, seq_per_batch_,
+            batch_per_txn_);
+      } else {
+        write_group.last_sequence = last_sequence;
+        write_thread_.LaunchParallelMemTableWriters(&write_group);
+        in_parallel_group = true;
+
+        // Each parallel follower is doing each own writes. The leader should
+        // also do its own.
+        if (w.ShouldWriteToMemtable()) {
+          ColumnFamilyMemTablesImpl column_family_memtables(
+              versions_->GetColumnFamilySet());
+          assert(w.sequence == current_sequence);
+          w.status = WriteBatchInternal::InsertInto(
+              &w, w.sequence, &column_family_memtables, &flush_scheduler_,
+              &trim_history_scheduler_,
+              write_options.ignore_missing_column_families, 0 /*log_number*/,
+              this, true /*concurrent_memtable_writes*/, seq_per_batch_,
+              w.batch_cnt, batch_per_txn_,
+              write_options.memtable_insert_hint_per_batch);
+        }
+      }
+      if (seq_used != nullptr) {
+        *seq_used = w.sequence;
+      }
+    }
+  }
+  PERF_TIMER_START(write_pre_and_post_process_time);
+
+  if (!io_s.ok()) {
+    // Check WriteToWAL status
+    IOStatusCheck(io_s);
+  }
+  if (!w.CallbackFailed()) {
+    if (!io_s.ok()) {
+      assert(pre_release_cb_status.ok());
+    } else {
+      WriteStatusCheck(pre_release_cb_status);
+    }
+  } else {
+    assert(pre_release_cb_status.ok());
+  }
+
+  if (log_context.need_log_sync) {
+    VersionEdit synced_wals;
+    log_write_mutex_.Lock();
+    if (status.ok()) {
+      MarkLogsSynced(logfile_number_, log_context.need_log_dir_sync,
+                     &synced_wals);
+    } else {
+      MarkLogsNotSynced(logfile_number_);
+    }
+    log_write_mutex_.Unlock();
+    if (status.ok() && synced_wals.IsWalAddition()) {
+      InstrumentedMutexLock l(&mutex_);
+      // TODO: plumb Env::IOActivity
+      const ReadOptions read_options;
+      status = ApplyWALToManifest(read_options, &synced_wals);
+    }
+
+    // Requesting sync with two_write_queues_ is expected to be very rare. We
+    // hence provide a simple implementation that is not necessarily efficient.
+    if (two_write_queues_) {
+      if (manual_wal_flush_) {
+        status = FlushWAL(true);
+      } else {
+        status = SyncWAL();
+      }
+    }
+  }
+
+  bool should_exit_batch_group = true;
+  if (in_parallel_group) {
+    // CompleteParallelWorker returns true if this thread should
+    // handle exit, false means somebody else did
+    should_exit_batch_group = write_thread_.CompleteParallelMemTableWriter(&w);
+  }
+  if (should_exit_batch_group) {
+    if (status.ok()) {
+      for (auto* tmp_w : write_group) {
+        assert(tmp_w);
+        if (tmp_w->post_memtable_callback) {
+          Status tmp_s =
+              (*tmp_w->post_memtable_callback)(last_sequence, disable_memtable);
+          // TODO: propagate the execution status of post_memtable_callback to
+          // caller.
+          assert(tmp_s.ok());
+        }
+      }
+      // Note: if we are to resume after non-OK statuses we need to revisit how
+      // we reacts to non-OK statuses here.
+      versions_->SetLastSequence(last_sequence);
+    }
+    MemTableInsertStatusCheck(w.status);
+    write_thread_.ExitAsBatchGroupLeader(write_group, status);
+  }
+
+  if (status.ok()) {
+    status = w.FinalStatus();
+  }
+  return status;
+}
+
+Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options,
+                                  WriteBatch* my_batch, WriteCallback* callback,
+                                  uint64_t* log_used, uint64_t log_ref,
+                                  bool disable_memtable, uint64_t* seq_used) {
+  PERF_TIMER_GUARD(write_pre_and_post_process_time);
+  StopWatch write_sw(immutable_db_options_.clock, stats_, DB_WRITE);
+
+  WriteContext write_context;
+
+  WriteThread::Writer w(write_options, my_batch, callback, log_ref,
+                        disable_memtable, /*_batch_cnt=*/0,
+                        /*_pre_release_callback=*/nullptr);
+  write_thread_.JoinBatchGroup(&w);
+  TEST_SYNC_POINT("DBImplWrite::PipelinedWriteImpl:AfterJoinBatchGroup");
+  if (w.state == WriteThread::STATE_GROUP_LEADER) {
+    WriteThread::WriteGroup wal_write_group;
+    if (w.callback && !w.callback->AllowWriteBatching()) {
+      write_thread_.WaitForMemTableWriters();
+    }
+    LogContext log_context(!write_options.disableWAL && write_options.sync);
+    // PreprocessWrite does its own perf timing.
+    PERF_TIMER_STOP(write_pre_and_post_process_time);
+    w.status = PreprocessWrite(write_options, &log_context, &write_context);
+    PERF_TIMER_START(write_pre_and_post_process_time);
+
+    // This can set non-OK status if callback fail.
+    last_batch_group_size_ =
+        write_thread_.EnterAsBatchGroupLeader(&w, &wal_write_group);
+    const SequenceNumber current_sequence =
+        write_thread_.UpdateLastSequence(versions_->LastSequence()) + 1;
+    size_t total_count = 0;
+    size_t total_byte_size = 0;
+
+    if (w.status.ok()) {
+      // TODO: this use of operator bool on `tracer_` can avoid unnecessary lock
+      // grabs but does not seem thread-safe.
+      if (tracer_) {
+        InstrumentedMutexLock lock(&trace_mutex_);
+        if (tracer_ != nullptr && tracer_->IsWriteOrderPreserved()) {
+          for (auto* writer : wal_write_group) {
+            // TODO: maybe handle the tracing status?
+            tracer_->Write(writer->batch).PermitUncheckedError();
+          }
+        }
+      }
+      SequenceNumber next_sequence = current_sequence;
+      for (auto* writer : wal_write_group) {
+        assert(writer);
+        if (writer->CheckCallback(this)) {
+          if (writer->ShouldWriteToMemtable()) {
+            writer->sequence = next_sequence;
+            size_t count = WriteBatchInternal::Count(writer->batch);
+            next_sequence += count;
+            total_count += count;
+          }
+          total_byte_size = WriteBatchInternal::AppendedByteSize(
+              total_byte_size, WriteBatchInternal::ByteSize(writer->batch));
+        }
+      }
+      if (w.disable_wal) {
+        has_unpersisted_data_.store(true, std::memory_order_relaxed);
+      }
+      write_thread_.UpdateLastSequence(current_sequence + total_count - 1);
+    }
+
+    auto stats = default_cf_internal_stats_;
+    stats->AddDBStats(InternalStats::kIntStatsNumKeysWritten, total_count);
+    RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count);
+    stats->AddDBStats(InternalStats::kIntStatsBytesWritten, total_byte_size);
+    RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
+    RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size);
+
+    PERF_TIMER_STOP(write_pre_and_post_process_time);
+
+    IOStatus io_s;
+    io_s.PermitUncheckedError();  // Allow io_s to be uninitialized
+
+    if (w.status.ok() && !write_options.disableWAL) {
+      PERF_TIMER_GUARD(write_wal_time);
+      stats->AddDBStats(InternalStats::kIntStatsWriteDoneBySelf, 1);
+      RecordTick(stats_, WRITE_DONE_BY_SELF, 1);
+      if (wal_write_group.size > 1) {
+        stats->AddDBStats(InternalStats::kIntStatsWriteDoneByOther,
+                          wal_write_group.size - 1);
+        RecordTick(stats_, WRITE_DONE_BY_OTHER, wal_write_group.size - 1);
+      }
+      assert(log_context.log_file_number_size);
+      LogFileNumberSize& log_file_number_size =
+          *(log_context.log_file_number_size);
+      io_s =
+          WriteToWAL(wal_write_group, log_context.writer, log_used,
+                     log_context.need_log_sync, log_context.need_log_dir_sync,
+                     current_sequence, log_file_number_size);
+      w.status = io_s;
+    }
+
+    if (!io_s.ok()) {
+      // Check WriteToWAL status
+      IOStatusCheck(io_s);
+    } else if (!w.CallbackFailed()) {
+      WriteStatusCheck(w.status);
+    }
+
+    VersionEdit synced_wals;
+    if (log_context.need_log_sync) {
+      InstrumentedMutexLock l(&log_write_mutex_);
+      if (w.status.ok()) {
+        MarkLogsSynced(logfile_number_, log_context.need_log_dir_sync,
+                       &synced_wals);
+      } else {
+        MarkLogsNotSynced(logfile_number_);
+      }
+    }
+    if (w.status.ok() && synced_wals.IsWalAddition()) {
+      InstrumentedMutexLock l(&mutex_);
+      // TODO: plumb Env::IOActivity
+      const ReadOptions read_options;
+      w.status = ApplyWALToManifest(read_options, &synced_wals);
+    }
+    write_thread_.ExitAsBatchGroupLeader(wal_write_group, w.status);
+  }
+
+  // NOTE: the memtable_write_group is declared before the following
+  // `if` statement because its lifetime needs to be longer
+  // that the inner context  of the `if` as a reference to it
+  // may be used further below within the outer _write_thread
+  WriteThread::WriteGroup memtable_write_group;
+
+  if (w.state == WriteThread::STATE_MEMTABLE_WRITER_LEADER) {
+    PERF_TIMER_GUARD(write_memtable_time);
+    assert(w.ShouldWriteToMemtable());
+    write_thread_.EnterAsMemTableWriter(&w, &memtable_write_group);
+    if (memtable_write_group.size > 1 &&
+        immutable_db_options_.allow_concurrent_memtable_write) {
+      write_thread_.LaunchParallelMemTableWriters(&memtable_write_group);
+    } else {
+      memtable_write_group.status = WriteBatchInternal::InsertInto(
+          memtable_write_group, w.sequence, column_family_memtables_.get(),
+          &flush_scheduler_, &trim_history_scheduler_,
+          write_options.ignore_missing_column_families, 0 /*log_number*/, this,
+          false /*concurrent_memtable_writes*/, seq_per_batch_, batch_per_txn_);
+      versions_->SetLastSequence(memtable_write_group.last_sequence);
+      write_thread_.ExitAsMemTableWriter(&w, memtable_write_group);
+    }
+  } else {
+    // NOTE: the memtable_write_group is never really used,
+    // so we need to set its status to pass ASSERT_STATUS_CHECKED
+    memtable_write_group.status.PermitUncheckedError();
+  }
+
+  if (w.state == WriteThread::STATE_PARALLEL_MEMTABLE_WRITER) {
+    assert(w.ShouldWriteToMemtable());
+    ColumnFamilyMemTablesImpl column_family_memtables(
+        versions_->GetColumnFamilySet());
+    w.status = WriteBatchInternal::InsertInto(
+        &w, w.sequence, &column_family_memtables, &flush_scheduler_,
+        &trim_history_scheduler_, write_options.ignore_missing_column_families,
+        0 /*log_number*/, this, true /*concurrent_memtable_writes*/,
+        false /*seq_per_batch*/, 0 /*batch_cnt*/, true /*batch_per_txn*/,
+        write_options.memtable_insert_hint_per_batch);
+    if (write_thread_.CompleteParallelMemTableWriter(&w)) {
+      MemTableInsertStatusCheck(w.status);
+      versions_->SetLastSequence(w.write_group->last_sequence);
+      write_thread_.ExitAsMemTableWriter(&w, *w.write_group);
+    }
+  }
+  if (seq_used != nullptr) {
+    *seq_used = w.sequence;
+  }
+
+  assert(w.state == WriteThread::STATE_COMPLETED);
+  return w.FinalStatus();
+}
+
+Status DBImpl::UnorderedWriteMemtable(const WriteOptions& write_options,
+                                      WriteBatch* my_batch,
+                                      WriteCallback* callback, uint64_t log_ref,
+                                      SequenceNumber seq,
+                                      const size_t sub_batch_cnt) {
+  PERF_TIMER_GUARD(write_pre_and_post_process_time);
+  StopWatch write_sw(immutable_db_options_.clock, stats_, DB_WRITE);
+
+  WriteThread::Writer w(write_options, my_batch, callback, log_ref,
+                        false /*disable_memtable*/);
+
+  if (w.CheckCallback(this) && w.ShouldWriteToMemtable()) {
+    w.sequence = seq;
+    size_t total_count = WriteBatchInternal::Count(my_batch);
+    InternalStats* stats = default_cf_internal_stats_;
+    stats->AddDBStats(InternalStats::kIntStatsNumKeysWritten, total_count);
+    RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count);
+
+    ColumnFamilyMemTablesImpl column_family_memtables(
+        versions_->GetColumnFamilySet());
+    w.status = WriteBatchInternal::InsertInto(
+        &w, w.sequence, &column_family_memtables, &flush_scheduler_,
+        &trim_history_scheduler_, write_options.ignore_missing_column_families,
+        0 /*log_number*/, this, true /*concurrent_memtable_writes*/,
+        seq_per_batch_, sub_batch_cnt, true /*batch_per_txn*/,
+        write_options.memtable_insert_hint_per_batch);
+    if (write_options.disableWAL) {
+      has_unpersisted_data_.store(true, std::memory_order_relaxed);
+    }
+  }
+
+  size_t pending_cnt = pending_memtable_writes_.fetch_sub(1) - 1;
+  if (pending_cnt == 0) {
+    // switch_cv_ waits until pending_memtable_writes_ = 0. Locking its mutex
+    // before notify ensures that cv is in waiting state when it is notified
+    // thus not missing the update to pending_memtable_writes_ even though it is
+    // not modified under the mutex.
+    std::lock_guard<std::mutex> lck(switch_mutex_);
+    switch_cv_.notify_all();
+  }
+  WriteStatusCheck(w.status);
+
+  if (!w.FinalStatus().ok()) {
+    return w.FinalStatus();
+  }
+  return Status::OK();
+}
+
+// The 2nd write queue. If enabled it will be used only for WAL-only writes.
+// This is the only queue that updates LastPublishedSequence which is only
+// applicable in a two-queue setting.
+Status DBImpl::WriteImplWALOnly(
+    WriteThread* write_thread, const WriteOptions& write_options,
+    WriteBatch* my_batch, WriteCallback* callback, uint64_t* log_used,
+    const uint64_t log_ref, uint64_t* seq_used, const size_t sub_batch_cnt,
+    PreReleaseCallback* pre_release_callback, const AssignOrder assign_order,
+    const PublishLastSeq publish_last_seq, const bool disable_memtable) {
+  PERF_TIMER_GUARD(write_pre_and_post_process_time);
+  WriteThread::Writer w(write_options, my_batch, callback, log_ref,
+                        disable_memtable, sub_batch_cnt, pre_release_callback);
+  StopWatch write_sw(immutable_db_options_.clock, stats_, DB_WRITE);
+
+  write_thread->JoinBatchGroup(&w);
+  assert(w.state != WriteThread::STATE_PARALLEL_MEMTABLE_WRITER);
+  if (w.state == WriteThread::STATE_COMPLETED) {
+    if (log_used != nullptr) {
+      *log_used = w.log_used;
+    }
+    if (seq_used != nullptr) {
+      *seq_used = w.sequence;
+    }
+    return w.FinalStatus();
+  }
+  // else we are the leader of the write batch group
+  assert(w.state == WriteThread::STATE_GROUP_LEADER);
+
+  if (publish_last_seq == kDoPublishLastSeq) {
+    Status status;
+
+    // Currently we only use kDoPublishLastSeq in unordered_write
+    assert(immutable_db_options_.unordered_write);
+    WriteContext write_context;
+    if (error_handler_.IsDBStopped()) {
+      status = error_handler_.GetBGError();
+    }
+    // TODO(myabandeh): Make preliminary checks thread-safe so we could do them
+    // without paying the cost of obtaining the mutex.
+    if (status.ok()) {
+      LogContext log_context;
+      status = PreprocessWrite(write_options, &log_context, &write_context);
+      WriteStatusCheckOnLocked(status);
+    }
+    if (!status.ok()) {
+      WriteThread::WriteGroup write_group;
+      write_thread->EnterAsBatchGroupLeader(&w, &write_group);
+      write_thread->ExitAsBatchGroupLeader(write_group, status);
+      return status;
+    }
+  } else {
+    InstrumentedMutexLock lock(&mutex_);
+    Status status =
+        DelayWrite(/*num_bytes=*/0ull, *write_thread, write_options);
+    if (!status.ok()) {
+      WriteThread::WriteGroup write_group;
+      write_thread->EnterAsBatchGroupLeader(&w, &write_group);
+      write_thread->ExitAsBatchGroupLeader(write_group, status);
+      return status;
+    }
+  }
+
+  WriteThread::WriteGroup write_group;
+  uint64_t last_sequence;
+  write_thread->EnterAsBatchGroupLeader(&w, &write_group);
+  // Note: no need to update last_batch_group_size_ here since the batch writes
+  // to WAL only
+  // TODO: this use of operator bool on `tracer_` can avoid unnecessary lock
+  // grabs but does not seem thread-safe.
+  if (tracer_) {
+    InstrumentedMutexLock lock(&trace_mutex_);
+    if (tracer_ != nullptr && tracer_->IsWriteOrderPreserved()) {
+      for (auto* writer : write_group) {
+        // TODO: maybe handle the tracing status?
+        tracer_->Write(writer->batch).PermitUncheckedError();
+      }
+    }
+  }
+
+  size_t pre_release_callback_cnt = 0;
+  size_t total_byte_size = 0;
+  for (auto* writer : write_group) {
+    assert(writer);
+    if (writer->CheckCallback(this)) {
+      total_byte_size = WriteBatchInternal::AppendedByteSize(
+          total_byte_size, WriteBatchInternal::ByteSize(writer->batch));
+      if (writer->pre_release_callback) {
+        pre_release_callback_cnt++;
+      }
+    }
+  }
+
+  const bool concurrent_update = true;
+  // Update stats while we are an exclusive group leader, so we know
+  // that nobody else can be writing to these particular stats.
+  // We're optimistic, updating the stats before we successfully
+  // commit.  That lets us release our leader status early.
+  auto stats = default_cf_internal_stats_;
+  stats->AddDBStats(InternalStats::kIntStatsBytesWritten, total_byte_size,
+                    concurrent_update);
+  RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
+  stats->AddDBStats(InternalStats::kIntStatsWriteDoneBySelf, 1,
+                    concurrent_update);
+  RecordTick(stats_, WRITE_DONE_BY_SELF);
+  auto write_done_by_other = write_group.size - 1;
+  if (write_done_by_other > 0) {
+    stats->AddDBStats(InternalStats::kIntStatsWriteDoneByOther,
+                      write_done_by_other, concurrent_update);
+    RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other);
+  }
+  RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size);
+
+  PERF_TIMER_STOP(write_pre_and_post_process_time);
+
+  PERF_TIMER_GUARD(write_wal_time);
+  // LastAllocatedSequence is increased inside WriteToWAL under
+  // wal_write_mutex_ to ensure ordered events in WAL
+  size_t seq_inc = 0 /* total_count */;
+  if (assign_order == kDoAssignOrder) {
+    size_t total_batch_cnt = 0;
+    for (auto* writer : write_group) {
+      assert(writer->batch_cnt || !seq_per_batch_);
+      if (!writer->CallbackFailed()) {
+        total_batch_cnt += writer->batch_cnt;
+      }
+    }
+    seq_inc = total_batch_cnt;
+  }
+  Status status;
+  if (!write_options.disableWAL) {
+    IOStatus io_s =
+        ConcurrentWriteToWAL(write_group, log_used, &last_sequence, seq_inc);
+    status = io_s;
+    // last_sequence may not be set if there is an error
+    // This error checking and return is moved up to avoid using uninitialized
+    // last_sequence.
+    if (!io_s.ok()) {
+      IOStatusCheck(io_s);
+      write_thread->ExitAsBatchGroupLeader(write_group, status);
+      return status;
+    }
+  } else {
+    // Otherwise we inc seq number to do solely the seq allocation
+    last_sequence = versions_->FetchAddLastAllocatedSequence(seq_inc);
+  }
+
+  size_t memtable_write_cnt = 0;
+  auto curr_seq = last_sequence + 1;
+  for (auto* writer : write_group) {
+    if (writer->CallbackFailed()) {
+      continue;
+    }
+    writer->sequence = curr_seq;
+    if (assign_order == kDoAssignOrder) {
+      assert(writer->batch_cnt || !seq_per_batch_);
+      curr_seq += writer->batch_cnt;
+    }
+    if (!writer->disable_memtable) {
+      memtable_write_cnt++;
+    }
+    // else seq advances only by memtable writes
+  }
+  if (status.ok() && write_options.sync) {
+    assert(!write_options.disableWAL);
+    // Requesting sync with two_write_queues_ is expected to be very rare. We
+    // hance provide a simple implementation that is not necessarily efficient.
+    if (manual_wal_flush_) {
+      status = FlushWAL(true);
+    } else {
+      status = SyncWAL();
+    }
+  }
+  PERF_TIMER_START(write_pre_and_post_process_time);
+
+  if (!w.CallbackFailed()) {
+    WriteStatusCheck(status);
+  }
+  if (status.ok()) {
+    size_t index = 0;
+    for (auto* writer : write_group) {
+      if (!writer->CallbackFailed() && writer->pre_release_callback) {
+        assert(writer->sequence != kMaxSequenceNumber);
+        Status ws = writer->pre_release_callback->Callback(
+            writer->sequence, disable_memtable, writer->log_used, index++,
+            pre_release_callback_cnt);
+        if (!ws.ok()) {
+          status = ws;
+          break;
+        }
+      }
+    }
+  }
+  if (publish_last_seq == kDoPublishLastSeq) {
+    versions_->SetLastSequence(last_sequence + seq_inc);
+    // Currently we only use kDoPublishLastSeq in unordered_write
+    assert(immutable_db_options_.unordered_write);
+  }
+  if (immutable_db_options_.unordered_write && status.ok()) {
+    pending_memtable_writes_ += memtable_write_cnt;
+  }
+  write_thread->ExitAsBatchGroupLeader(write_group, status);
+  if (status.ok()) {
+    status = w.FinalStatus();
+  }
+  if (seq_used != nullptr) {
+    *seq_used = w.sequence;
+  }
+  return status;
+}
+
+void DBImpl::WriteStatusCheckOnLocked(const Status& status) {
+  // Is setting bg_error_ enough here?  This will at least stop
+  // compaction and fail any further writes.
+  InstrumentedMutexLock l(&mutex_);
+  assert(!status.IsIOFenced() || !error_handler_.GetBGError().ok());
+  if (immutable_db_options_.paranoid_checks && !status.ok() &&
+      !status.IsBusy() && !status.IsIncomplete()) {
+    // Maybe change the return status to void?
+    error_handler_.SetBGError(status, BackgroundErrorReason::kWriteCallback);
+  }
+}
+
+void DBImpl::WriteStatusCheck(const Status& status) {
+  // Is setting bg_error_ enough here?  This will at least stop
+  // compaction and fail any further writes.
+  assert(!status.IsIOFenced() || !error_handler_.GetBGError().ok());
+  if (immutable_db_options_.paranoid_checks && !status.ok() &&
+      !status.IsBusy() && !status.IsIncomplete()) {
+    mutex_.Lock();
+    // Maybe change the return status to void?
+    error_handler_.SetBGError(status, BackgroundErrorReason::kWriteCallback);
+    mutex_.Unlock();
+  }
+}
+
+void DBImpl::IOStatusCheck(const IOStatus& io_status) {
+  // Is setting bg_error_ enough here?  This will at least stop
+  // compaction and fail any further writes.
+  if ((immutable_db_options_.paranoid_checks && !io_status.ok() &&
+       !io_status.IsBusy() && !io_status.IsIncomplete()) ||
+      io_status.IsIOFenced()) {
+    mutex_.Lock();
+    // Maybe change the return status to void?
+    error_handler_.SetBGError(io_status, BackgroundErrorReason::kWriteCallback);
+    mutex_.Unlock();
+  } else {
+    // Force writable file to be continue writable.
+    logs_.back().writer->file()->reset_seen_error();
+  }
+}
+
+void DBImpl::MemTableInsertStatusCheck(const Status& status) {
+  // A non-OK status here indicates that the state implied by the
+  // WAL has diverged from the in-memory state.  This could be
+  // because of a corrupt write_batch (very bad), or because the
+  // client specified an invalid column family and didn't specify
+  // ignore_missing_column_families.
+  if (!status.ok()) {
+    mutex_.Lock();
+    assert(!error_handler_.IsBGWorkStopped());
+    // Maybe change the return status to void?
+    error_handler_.SetBGError(status, BackgroundErrorReason::kMemTable)
+        .PermitUncheckedError();
+    mutex_.Unlock();
+  }
+}
+
+Status DBImpl::PreprocessWrite(const WriteOptions& write_options,
+                               LogContext* log_context,
+                               WriteContext* write_context) {
+  assert(write_context != nullptr && log_context != nullptr);
+  Status status;
+
+  if (error_handler_.IsDBStopped()) {
+    InstrumentedMutexLock l(&mutex_);
+    status = error_handler_.GetBGError();
+  }
+
+  PERF_TIMER_GUARD(write_scheduling_flushes_compactions_time);
+
+  if (UNLIKELY(status.ok() && total_log_size_ > GetMaxTotalWalSize())) {
+    assert(versions_);
+    InstrumentedMutexLock l(&mutex_);
+    const ColumnFamilySet* const column_families =
+        versions_->GetColumnFamilySet();
+    assert(column_families);
+    size_t num_cfs = column_families->NumberOfColumnFamilies();
+    assert(num_cfs >= 1);
+    if (num_cfs > 1) {
+      WaitForPendingWrites();
+      status = SwitchWAL(write_context);
+    }
+  }
+
+  if (UNLIKELY(status.ok() && write_buffer_manager_->ShouldFlush())) {
+    // Before a new memtable is added in SwitchMemtable(),
+    // write_buffer_manager_->ShouldFlush() will keep returning true. If another
+    // thread is writing to another DB with the same write buffer, they may also
+    // be flushed. We may end up with flushing much more DBs than needed. It's
+    // suboptimal but still correct.
+    InstrumentedMutexLock l(&mutex_);
+    WaitForPendingWrites();
+    status = HandleWriteBufferManagerFlush(write_context);
+  }
+
+  if (UNLIKELY(status.ok() && !trim_history_scheduler_.Empty())) {
+    InstrumentedMutexLock l(&mutex_);
+    status = TrimMemtableHistory(write_context);
+  }
+
+  if (UNLIKELY(status.ok() && !flush_scheduler_.Empty())) {
+    InstrumentedMutexLock l(&mutex_);
+    WaitForPendingWrites();
+    status = ScheduleFlushes(write_context);
+  }
+
+  PERF_TIMER_STOP(write_scheduling_flushes_compactions_time);
+  PERF_TIMER_GUARD(write_pre_and_post_process_time);
+
+  if (UNLIKELY(status.ok() && (write_controller_.IsStopped() ||
+                               write_controller_.NeedsDelay()))) {
+    PERF_TIMER_STOP(write_pre_and_post_process_time);
+    PERF_TIMER_GUARD(write_delay_time);
+    // We don't know size of curent batch so that we always use the size
+    // for previous one. It might create a fairness issue that expiration
+    // might happen for smaller writes but larger writes can go through.
+    // Can optimize it if it is an issue.
+    InstrumentedMutexLock l(&mutex_);
+    status = DelayWrite(last_batch_group_size_, write_thread_, write_options);
+    PERF_TIMER_START(write_pre_and_post_process_time);
+  }
+
+  // If memory usage exceeded beyond a certain threshold,
+  // write_buffer_manager_->ShouldStall() returns true to all threads writing to
+  // all DBs and writers will be stalled.
+  // It does soft checking because WriteBufferManager::buffer_limit_ has already
+  // exceeded at this point so no new write (including current one) will go
+  // through until memory usage is decreased.
+  if (UNLIKELY(status.ok() && write_buffer_manager_->ShouldStall())) {
+    default_cf_internal_stats_->AddDBStats(
+        InternalStats::kIntStatsWriteBufferManagerLimitStopsCounts, 1,
+        true /* concurrent */);
+    if (write_options.no_slowdown) {
+      status = Status::Incomplete("Write stall");
+    } else {
+      InstrumentedMutexLock l(&mutex_);
+      WriteBufferManagerStallWrites();
+    }
+  }
+  InstrumentedMutexLock l(&log_write_mutex_);
+  if (status.ok() && log_context->need_log_sync) {
+    // Wait until the parallel syncs are finished. Any sync process has to sync
+    // the front log too so it is enough to check the status of front()
+    // We do a while loop since log_sync_cv_ is signalled when any sync is
+    // finished
+    // Note: there does not seem to be a reason to wait for parallel sync at
+    // this early step but it is not important since parallel sync (SyncWAL) and
+    // need_log_sync are usually not used together.
+    while (logs_.front().IsSyncing()) {
+      log_sync_cv_.Wait();
+    }
+    for (auto& log : logs_) {
+      // This is just to prevent the logs to be synced by a parallel SyncWAL
+      // call. We will do the actual syncing later after we will write to the
+      // WAL.
+      // Note: there does not seem to be a reason to set this early before we
+      // actually write to the WAL
+      log.PrepareForSync();
+    }
+  } else {
+    log_context->need_log_sync = false;
+  }
+  log_context->writer = logs_.back().writer;
+  log_context->need_log_dir_sync =
+      log_context->need_log_dir_sync && !log_dir_synced_;
+  log_context->log_file_number_size = std::addressof(alive_log_files_.back());
+
+  return status;
+}
+
+Status DBImpl::MergeBatch(const WriteThread::WriteGroup& write_group,
+                          WriteBatch* tmp_batch, WriteBatch** merged_batch,
+                          size_t* write_with_wal,
+                          WriteBatch** to_be_cached_state) {
+  assert(write_with_wal != nullptr);
+  assert(tmp_batch != nullptr);
+  assert(*to_be_cached_state == nullptr);
+  *write_with_wal = 0;
+  auto* leader = write_group.leader;
+  assert(!leader->disable_wal);  // Same holds for all in the batch group
+  if (write_group.size == 1 && !leader->CallbackFailed() &&
+      leader->batch->GetWalTerminationPoint().is_cleared()) {
+    // we simply write the first WriteBatch to WAL if the group only
+    // contains one batch, that batch should be written to the WAL,
+    // and the batch is not wanting to be truncated
+    *merged_batch = leader->batch;
+    if (WriteBatchInternal::IsLatestPersistentState(*merged_batch)) {
+      *to_be_cached_state = *merged_batch;
+    }
+    *write_with_wal = 1;
+  } else {
+    // WAL needs all of the batches flattened into a single batch.
+    // We could avoid copying here with an iov-like AddRecord
+    // interface
+    *merged_batch = tmp_batch;
+    for (auto writer : write_group) {
+      if (!writer->CallbackFailed()) {
+        Status s = WriteBatchInternal::Append(*merged_batch, writer->batch,
+                                              /*WAL_only*/ true);
+        if (!s.ok()) {
+          tmp_batch->Clear();
+          return s;
+        }
+        if (WriteBatchInternal::IsLatestPersistentState(writer->batch)) {
+          // We only need to cache the last of such write batch
+          *to_be_cached_state = writer->batch;
+        }
+        (*write_with_wal)++;
+      }
+    }
+  }
+  // return merged_batch;
+  return Status::OK();
+}
+
+// When two_write_queues_ is disabled, this function is called from the only
+// write thread. Otherwise this must be called holding log_write_mutex_.
+IOStatus DBImpl::WriteToWAL(const WriteBatch& merged_batch,
+                            log::Writer* log_writer, uint64_t* log_used,
+                            uint64_t* log_size,
+                            Env::IOPriority rate_limiter_priority,
+                            LogFileNumberSize& log_file_number_size) {
+  assert(log_size != nullptr);
+
+  Slice log_entry = WriteBatchInternal::Contents(&merged_batch);
+  TEST_SYNC_POINT_CALLBACK("DBImpl::WriteToWAL:log_entry", &log_entry);
+  auto s = merged_batch.VerifyChecksum();
+  if (!s.ok()) {
+    return status_to_io_status(std::move(s));
+  }
+  *log_size = log_entry.size();
+  // When two_write_queues_ WriteToWAL has to be protected from concurretn calls
+  // from the two queues anyway and log_write_mutex_ is already held. Otherwise
+  // if manual_wal_flush_ is enabled we need to protect log_writer->AddRecord
+  // from possible concurrent calls via the FlushWAL by the application.
+  const bool needs_locking = manual_wal_flush_ && !two_write_queues_;
+  // Due to performance cocerns of missed branch prediction penalize the new
+  // manual_wal_flush_ feature (by UNLIKELY) instead of the more common case
+  // when we do not need any locking.
+  if (UNLIKELY(needs_locking)) {
+    log_write_mutex_.Lock();
+  }
+  IOStatus io_s = log_writer->AddRecord(log_entry, rate_limiter_priority);
+
+  if (UNLIKELY(needs_locking)) {
+    log_write_mutex_.Unlock();
+  }
+  if (log_used != nullptr) {
+    *log_used = logfile_number_;
+  }
+  total_log_size_ += log_entry.size();
+  log_file_number_size.AddSize(*log_size);
+  log_empty_ = false;
+  return io_s;
+}
+
+IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group,
+                            log::Writer* log_writer, uint64_t* log_used,
+                            bool need_log_sync, bool need_log_dir_sync,
+                            SequenceNumber sequence,
+                            LogFileNumberSize& log_file_number_size) {
+  IOStatus io_s;
+  assert(!two_write_queues_);
+  assert(!write_group.leader->disable_wal);
+  // Same holds for all in the batch group
+  size_t write_with_wal = 0;
+  WriteBatch* to_be_cached_state = nullptr;
+  WriteBatch* merged_batch;
+  io_s = status_to_io_status(MergeBatch(write_group, &tmp_batch_, &merged_batch,
+                                        &write_with_wal, &to_be_cached_state));
+  if (UNLIKELY(!io_s.ok())) {
+    return io_s;
+  }
+
+  if (merged_batch == write_group.leader->batch) {
+    write_group.leader->log_used = logfile_number_;
+  } else if (write_with_wal > 1) {
+    for (auto writer : write_group) {
+      writer->log_used = logfile_number_;
+    }
+  }
+
+  WriteBatchInternal::SetSequence(merged_batch, sequence);
+
+  uint64_t log_size;
+  io_s = WriteToWAL(*merged_batch, log_writer, log_used, &log_size,
+                    write_group.leader->rate_limiter_priority,
+                    log_file_number_size);
+  if (to_be_cached_state) {
+    cached_recoverable_state_ = *to_be_cached_state;
+    cached_recoverable_state_empty_ = false;
+  }
+
+  if (io_s.ok() && need_log_sync) {
+    StopWatch sw(immutable_db_options_.clock, stats_, WAL_FILE_SYNC_MICROS);
+    // It's safe to access logs_ with unlocked mutex_ here because:
+    //  - we've set getting_synced=true for all logs,
+    //    so other threads won't pop from logs_ while we're here,
+    //  - only writer thread can push to logs_, and we're in
+    //    writer thread, so no one will push to logs_,
+    //  - as long as other threads don't modify it, it's safe to read
+    //    from std::deque from multiple threads concurrently.
+    //
+    // Sync operation should work with locked log_write_mutex_, because:
+    //   when DBOptions.manual_wal_flush_ is set,
+    //   FlushWAL function will be invoked by another thread.
+    //   if without locked log_write_mutex_, the log file may get data
+    //   corruption
+
+    const bool needs_locking = manual_wal_flush_ && !two_write_queues_;
+    if (UNLIKELY(needs_locking)) {
+      log_write_mutex_.Lock();
+    }
+
+    for (auto& log : logs_) {
+      io_s = log.writer->file()->Sync(immutable_db_options_.use_fsync);
+      if (!io_s.ok()) {
+        break;
+      }
+    }
+
+    if (UNLIKELY(needs_locking)) {
+      log_write_mutex_.Unlock();
+    }
+
+    if (io_s.ok() && need_log_dir_sync) {
+      // We only sync WAL directory the first time WAL syncing is
+      // requested, so that in case users never turn on WAL sync,
+      // we can avoid the disk I/O in the write code path.
+      io_s = directories_.GetWalDir()->FsyncWithDirOptions(
+          IOOptions(), nullptr,
+          DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
+    }
+  }
+
+  if (merged_batch == &tmp_batch_) {
+    tmp_batch_.Clear();
+  }
+  if (io_s.ok()) {
+    auto stats = default_cf_internal_stats_;
+    if (need_log_sync) {
+      stats->AddDBStats(InternalStats::kIntStatsWalFileSynced, 1);
+      RecordTick(stats_, WAL_FILE_SYNCED);
+    }
+    stats->AddDBStats(InternalStats::kIntStatsWalFileBytes, log_size);
+    RecordTick(stats_, WAL_FILE_BYTES, log_size);
+    stats->AddDBStats(InternalStats::kIntStatsWriteWithWal, write_with_wal);
+    RecordTick(stats_, WRITE_WITH_WAL, write_with_wal);
+  }
+  return io_s;
+}
+
+IOStatus DBImpl::ConcurrentWriteToWAL(
+    const WriteThread::WriteGroup& write_group, uint64_t* log_used,
+    SequenceNumber* last_sequence, size_t seq_inc) {
+  IOStatus io_s;
+
+  assert(two_write_queues_ || immutable_db_options_.unordered_write);
+  assert(!write_group.leader->disable_wal);
+  // Same holds for all in the batch group
+  WriteBatch tmp_batch;
+  size_t write_with_wal = 0;
+  WriteBatch* to_be_cached_state = nullptr;
+  WriteBatch* merged_batch;
+  io_s = status_to_io_status(MergeBatch(write_group, &tmp_batch, &merged_batch,
+                                        &write_with_wal, &to_be_cached_state));
+  if (UNLIKELY(!io_s.ok())) {
+    return io_s;
+  }
+
+  // We need to lock log_write_mutex_ since logs_ and alive_log_files might be
+  // pushed back concurrently
+  log_write_mutex_.Lock();
+  if (merged_batch == write_group.leader->batch) {
+    write_group.leader->log_used = logfile_number_;
+  } else if (write_with_wal > 1) {
+    for (auto writer : write_group) {
+      writer->log_used = logfile_number_;
+    }
+  }
+  *last_sequence = versions_->FetchAddLastAllocatedSequence(seq_inc);
+  auto sequence = *last_sequence + 1;
+  WriteBatchInternal::SetSequence(merged_batch, sequence);
+
+  log::Writer* log_writer = logs_.back().writer;
+  LogFileNumberSize& log_file_number_size = alive_log_files_.back();
+
+  assert(log_writer->get_log_number() == log_file_number_size.number);
+
+  uint64_t log_size;
+  io_s = WriteToWAL(*merged_batch, log_writer, log_used, &log_size,
+                    write_group.leader->rate_limiter_priority,
+                    log_file_number_size);
+  if (to_be_cached_state) {
+    cached_recoverable_state_ = *to_be_cached_state;
+    cached_recoverable_state_empty_ = false;
+  }
+  log_write_mutex_.Unlock();
+
+  if (io_s.ok()) {
+    const bool concurrent = true;
+    auto stats = default_cf_internal_stats_;
+    stats->AddDBStats(InternalStats::kIntStatsWalFileBytes, log_size,
+                      concurrent);
+    RecordTick(stats_, WAL_FILE_BYTES, log_size);
+    stats->AddDBStats(InternalStats::kIntStatsWriteWithWal, write_with_wal,
+                      concurrent);
+    RecordTick(stats_, WRITE_WITH_WAL, write_with_wal);
+  }
+  return io_s;
+}
+
+Status DBImpl::WriteRecoverableState() {
+  mutex_.AssertHeld();
+  if (!cached_recoverable_state_empty_) {
+    bool dont_care_bool;
+    SequenceNumber next_seq;
+    if (two_write_queues_) {
+      log_write_mutex_.Lock();
+    }
+    SequenceNumber seq;
+    if (two_write_queues_) {
+      seq = versions_->FetchAddLastAllocatedSequence(0);
+    } else {
+      seq = versions_->LastSequence();
+    }
+    WriteBatchInternal::SetSequence(&cached_recoverable_state_, seq + 1);
+    auto status = WriteBatchInternal::InsertInto(
+        &cached_recoverable_state_, column_family_memtables_.get(),
+        &flush_scheduler_, &trim_history_scheduler_, true,
+        0 /*recovery_log_number*/, this, false /* concurrent_memtable_writes */,
+        &next_seq, &dont_care_bool, seq_per_batch_);
+    auto last_seq = next_seq - 1;
+    if (two_write_queues_) {
+      versions_->FetchAddLastAllocatedSequence(last_seq - seq);
+      versions_->SetLastPublishedSequence(last_seq);
+    }
+    versions_->SetLastSequence(last_seq);
+    if (two_write_queues_) {
+      log_write_mutex_.Unlock();
+    }
+    if (status.ok() && recoverable_state_pre_release_callback_) {
+      const bool DISABLE_MEMTABLE = true;
+      for (uint64_t sub_batch_seq = seq + 1;
+           sub_batch_seq < next_seq && status.ok(); sub_batch_seq++) {
+        uint64_t const no_log_num = 0;
+        // Unlock it since the callback might end up locking mutex. e.g.,
+        // AddCommitted -> AdvanceMaxEvictedSeq -> GetSnapshotListFromDB
+        mutex_.Unlock();
+        status = recoverable_state_pre_release_callback_->Callback(
+            sub_batch_seq, !DISABLE_MEMTABLE, no_log_num, 0, 1);
+        mutex_.Lock();
+      }
+    }
+    if (status.ok()) {
+      cached_recoverable_state_.Clear();
+      cached_recoverable_state_empty_ = true;
+    }
+    return status;
+  }
+  return Status::OK();
+}
+
+void DBImpl::SelectColumnFamiliesForAtomicFlush(
+    autovector<ColumnFamilyData*>* selected_cfds,
+    const autovector<ColumnFamilyData*>& provided_candidate_cfds) {
+  mutex_.AssertHeld();
+  assert(selected_cfds);
+
+  autovector<ColumnFamilyData*> candidate_cfds;
+
+  // Generate candidate cfds if not provided
+  if (provided_candidate_cfds.empty()) {
+    for (ColumnFamilyData* cfd : *versions_->GetColumnFamilySet()) {
+      if (!cfd->IsDropped() && cfd->initialized()) {
+        cfd->Ref();
+        candidate_cfds.push_back(cfd);
+      }
+    }
+  } else {
+    candidate_cfds = provided_candidate_cfds;
+  }
+
+  for (ColumnFamilyData* cfd : candidate_cfds) {
+    if (cfd->IsDropped()) {
+      continue;
+    }
+    if (cfd->imm()->NumNotFlushed() != 0 || !cfd->mem()->IsEmpty() ||
+        !cached_recoverable_state_empty_.load()) {
+      selected_cfds->push_back(cfd);
+    }
+  }
+
+  // Unref the newly generated candidate cfds (when not provided) in
+  // `candidate_cfds`
+  if (provided_candidate_cfds.empty()) {
+    for (auto candidate_cfd : candidate_cfds) {
+      candidate_cfd->UnrefAndTryDelete();
+    }
+  }
+}
+
+// Assign sequence number for atomic flush.
+void DBImpl::AssignAtomicFlushSeq(const autovector<ColumnFamilyData*>& cfds) {
+  assert(immutable_db_options_.atomic_flush);
+  auto seq = versions_->LastSequence();
+  for (auto cfd : cfds) {
+    cfd->imm()->AssignAtomicFlushSeq(seq);
+  }
+}
+
+Status DBImpl::SwitchWAL(WriteContext* write_context) {
+  mutex_.AssertHeld();
+  assert(write_context != nullptr);
+  Status status;
+
+  if (alive_log_files_.begin()->getting_flushed) {
+    return status;
+  }
+
+  auto oldest_alive_log = alive_log_files_.begin()->number;
+  bool flush_wont_release_oldest_log = false;
+  if (allow_2pc()) {
+    auto oldest_log_with_uncommitted_prep =
+        logs_with_prep_tracker_.FindMinLogContainingOutstandingPrep();
+
+    assert(oldest_log_with_uncommitted_prep == 0 ||
+           oldest_log_with_uncommitted_prep >= oldest_alive_log);
+    if (oldest_log_with_uncommitted_prep > 0 &&
+        oldest_log_with_uncommitted_prep == oldest_alive_log) {
+      if (unable_to_release_oldest_log_) {
+        // we already attempted to flush all column families dependent on
+        // the oldest alive log but the log still contained uncommitted
+        // transactions so there is still nothing that we can do.
+        return status;
+      } else {
+        ROCKS_LOG_WARN(
+            immutable_db_options_.info_log,
+            "Unable to release oldest log due to uncommitted transaction");
+        unable_to_release_oldest_log_ = true;
+        flush_wont_release_oldest_log = true;
+      }
+    }
+  }
+  if (!flush_wont_release_oldest_log) {
+    // we only mark this log as getting flushed if we have successfully
+    // flushed all data in this log. If this log contains outstanding prepared
+    // transactions then we cannot flush this log until those transactions are
+    // commited.
+    unable_to_release_oldest_log_ = false;
+    alive_log_files_.begin()->getting_flushed = true;
+  }
+
+  ROCKS_LOG_INFO(
+      immutable_db_options_.info_log,
+      "Flushing all column families with data in WAL number %" PRIu64
+      ". Total log size is %" PRIu64 " while max_total_wal_size is %" PRIu64,
+      oldest_alive_log, total_log_size_.load(), GetMaxTotalWalSize());
+  // no need to refcount because drop is happening in write thread, so can't
+  // happen while we're in the write thread
+  autovector<ColumnFamilyData*> cfds;
+  if (immutable_db_options_.atomic_flush) {
+    SelectColumnFamiliesForAtomicFlush(&cfds);
+  } else {
+    for (auto cfd : *versions_->GetColumnFamilySet()) {
+      if (cfd->IsDropped()) {
+        continue;
+      }
+      if (cfd->OldestLogToKeep() <= oldest_alive_log) {
+        cfds.push_back(cfd);
+      }
+    }
+    MaybeFlushStatsCF(&cfds);
+  }
+  WriteThread::Writer nonmem_w;
+  if (two_write_queues_) {
+    nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
+  }
+
+  for (const auto cfd : cfds) {
+    cfd->Ref();
+    status = SwitchMemtable(cfd, write_context);
+    cfd->UnrefAndTryDelete();
+    if (!status.ok()) {
+      break;
+    }
+  }
+  if (two_write_queues_) {
+    nonmem_write_thread_.ExitUnbatched(&nonmem_w);
+  }
+
+  if (status.ok()) {
+    if (immutable_db_options_.atomic_flush) {
+      AssignAtomicFlushSeq(cfds);
+    }
+    for (auto cfd : cfds) {
+      cfd->imm()->FlushRequested();
+      if (!immutable_db_options_.atomic_flush) {
+        FlushRequest flush_req;
+        GenerateFlushRequest({cfd}, FlushReason::kWalFull, &flush_req);
+        SchedulePendingFlush(flush_req);
+      }
+    }
+    if (immutable_db_options_.atomic_flush) {
+      FlushRequest flush_req;
+      GenerateFlushRequest(cfds, FlushReason::kWalFull, &flush_req);
+      SchedulePendingFlush(flush_req);
+    }
+    MaybeScheduleFlushOrCompaction();
+  }
+  return status;
+}
+
+Status DBImpl::HandleWriteBufferManagerFlush(WriteContext* write_context) {
+  mutex_.AssertHeld();
+  assert(write_context != nullptr);
+  Status status;
+
+  // Before a new memtable is added in SwitchMemtable(),
+  // write_buffer_manager_->ShouldFlush() will keep returning true. If another
+  // thread is writing to another DB with the same write buffer, they may also
+  // be flushed. We may end up with flushing much more DBs than needed. It's
+  // suboptimal but still correct.
+  // no need to refcount because drop is happening in write thread, so can't
+  // happen while we're in the write thread
+  autovector<ColumnFamilyData*> cfds;
+  if (immutable_db_options_.atomic_flush) {
+    SelectColumnFamiliesForAtomicFlush(&cfds);
+  } else {
+    ColumnFamilyData* cfd_picked = nullptr;
+    SequenceNumber seq_num_for_cf_picked = kMaxSequenceNumber;
+
+    for (auto cfd : *versions_->GetColumnFamilySet()) {
+      if (cfd->IsDropped()) {
+        continue;
+      }
+      if (!cfd->mem()->IsEmpty() && !cfd->imm()->IsFlushPendingOrRunning()) {
+        // We only consider flush on CFs with bytes in the mutable memtable,
+        // and no immutable memtables for which flush has yet to finish. If
+        // we triggered flush on CFs already trying to flush, we would risk
+        // creating too many immutable memtables leading to write stalls.
+        uint64_t seq = cfd->mem()->GetCreationSeq();
+        if (cfd_picked == nullptr || seq < seq_num_for_cf_picked) {
+          cfd_picked = cfd;
+          seq_num_for_cf_picked = seq;
+        }
+      }
+    }
+    if (cfd_picked != nullptr) {
+      cfds.push_back(cfd_picked);
+    }
+    MaybeFlushStatsCF(&cfds);
+  }
+  if (!cfds.empty()) {
+    ROCKS_LOG_INFO(
+        immutable_db_options_.info_log,
+        "Flushing triggered to alleviate write buffer memory usage. Write "
+        "buffer is using %" ROCKSDB_PRIszt
+        " bytes out of a total of %" ROCKSDB_PRIszt ".",
+        write_buffer_manager_->memory_usage(),
+        write_buffer_manager_->buffer_size());
+  }
+
+  WriteThread::Writer nonmem_w;
+  if (two_write_queues_) {
+    nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
+  }
+  for (const auto cfd : cfds) {
+    if (cfd->mem()->IsEmpty()) {
+      continue;
+    }
+    cfd->Ref();
+    status = SwitchMemtable(cfd, write_context);
+    cfd->UnrefAndTryDelete();
+    if (!status.ok()) {
+      break;
+    }
+  }
+  if (two_write_queues_) {
+    nonmem_write_thread_.ExitUnbatched(&nonmem_w);
+  }
+
+  if (status.ok()) {
+    if (immutable_db_options_.atomic_flush) {
+      AssignAtomicFlushSeq(cfds);
+    }
+    for (const auto cfd : cfds) {
+      cfd->imm()->FlushRequested();
+      if (!immutable_db_options_.atomic_flush) {
+        FlushRequest flush_req;
+        GenerateFlushRequest({cfd}, FlushReason::kWriteBufferManager,
+                             &flush_req);
+        SchedulePendingFlush(flush_req);
+      }
+    }
+    if (immutable_db_options_.atomic_flush) {
+      FlushRequest flush_req;
+      GenerateFlushRequest(cfds, FlushReason::kWriteBufferManager, &flush_req);
+      SchedulePendingFlush(flush_req);
+    }
+    MaybeScheduleFlushOrCompaction();
+  }
+  return status;
+}
+
+uint64_t DBImpl::GetMaxTotalWalSize() const {
+  uint64_t max_total_wal_size =
+      max_total_wal_size_.load(std::memory_order_acquire);
+  if (max_total_wal_size > 0) {
+    return max_total_wal_size;
+  }
+  return 4 * max_total_in_memory_state_.load(std::memory_order_acquire);
+}
+
+// REQUIRES: mutex_ is held
+// REQUIRES: this thread is currently at the leader for write_thread
+Status DBImpl::DelayWrite(uint64_t num_bytes, WriteThread& write_thread,
+                          const WriteOptions& write_options) {
+  mutex_.AssertHeld();
+  uint64_t time_delayed = 0;
+  bool delayed = false;
+  {
+    StopWatch sw(immutable_db_options_.clock, stats_, WRITE_STALL,
+                 Histograms::HISTOGRAM_ENUM_MAX, &time_delayed);
+    // To avoid parallel timed delays (bad throttling), only support them
+    // on the primary write queue.
+    uint64_t delay;
+    if (&write_thread == &write_thread_) {
+      delay =
+          write_controller_.GetDelay(immutable_db_options_.clock, num_bytes);
+    } else {
+      assert(num_bytes == 0);
+      delay = 0;
+    }
+    TEST_SYNC_POINT("DBImpl::DelayWrite:Start");
+    if (delay > 0) {
+      if (write_options.no_slowdown) {
+        return Status::Incomplete("Write stall");
+      }
+      TEST_SYNC_POINT("DBImpl::DelayWrite:Sleep");
+
+      // Notify write_thread about the stall so it can setup a barrier and
+      // fail any pending writers with no_slowdown
+      write_thread.BeginWriteStall();
+      mutex_.Unlock();
+      TEST_SYNC_POINT("DBImpl::DelayWrite:BeginWriteStallDone");
+      // We will delay the write until we have slept for `delay` microseconds
+      // or we don't need a delay anymore. We check for cancellation every 1ms
+      // (slightly longer because WriteController minimum delay is 1ms, in
+      // case of sleep imprecision, rounding, etc.)
+      const uint64_t kDelayInterval = 1001;
+      uint64_t stall_end = sw.start_time() + delay;
+      while (write_controller_.NeedsDelay()) {
+        if (immutable_db_options_.clock->NowMicros() >= stall_end) {
+          // We already delayed this write `delay` microseconds
+          break;
+        }
+
+        delayed = true;
+        // Sleep for 0.001 seconds
+        immutable_db_options_.clock->SleepForMicroseconds(kDelayInterval);
+      }
+      mutex_.Lock();
+      write_thread.EndWriteStall();
+    }
+
+    // Don't wait if there's a background error, even if its a soft error. We
+    // might wait here indefinitely as the background compaction may never
+    // finish successfully, resulting in the stall condition lasting
+    // indefinitely
+    while (error_handler_.GetBGError().ok() && write_controller_.IsStopped() &&
+           !shutting_down_.load(std::memory_order_relaxed)) {
+      if (write_options.no_slowdown) {
+        return Status::Incomplete("Write stall");
+      }
+      delayed = true;
+
+      // Notify write_thread about the stall so it can setup a barrier and
+      // fail any pending writers with no_slowdown
+      write_thread.BeginWriteStall();
+      if (&write_thread == &write_thread_) {
+        TEST_SYNC_POINT("DBImpl::DelayWrite:Wait");
+      } else {
+        TEST_SYNC_POINT("DBImpl::DelayWrite:NonmemWait");
+      }
+      bg_cv_.Wait();
+      TEST_SYNC_POINT_CALLBACK("DBImpl::DelayWrite:AfterWait", &mutex_);
+      write_thread.EndWriteStall();
+    }
+  }
+  assert(!delayed || !write_options.no_slowdown);
+  if (delayed) {
+    default_cf_internal_stats_->AddDBStats(
+        InternalStats::kIntStatsWriteStallMicros, time_delayed);
+    RecordTick(stats_, STALL_MICROS, time_delayed);
+  }
+
+  // If DB is not in read-only mode and write_controller is not stopping
+  // writes, we can ignore any background errors and allow the write to
+  // proceed
+  Status s;
+  if (write_controller_.IsStopped()) {
+    if (!shutting_down_.load(std::memory_order_relaxed)) {
+      // If writes are still stopped and db not shutdown, it means we bailed
+      // due to a background error
+      s = Status::Incomplete(error_handler_.GetBGError().ToString());
+    } else {
+      s = Status::ShutdownInProgress("stalled writes");
+    }
+  }
+  if (error_handler_.IsDBStopped()) {
+    s = error_handler_.GetBGError();
+  }
+  return s;
+}
+
+// REQUIRES: mutex_ is held
+// REQUIRES: this thread is currently at the front of the writer queue
+void DBImpl::WriteBufferManagerStallWrites() {
+  mutex_.AssertHeld();
+  // First block future writer threads who want to add themselves to the queue
+  // of WriteThread.
+  write_thread_.BeginWriteStall();
+  mutex_.Unlock();
+
+  // Change the state to State::Blocked.
+  static_cast<WBMStallInterface*>(wbm_stall_.get())
+      ->SetState(WBMStallInterface::State::BLOCKED);
+  // Then WriteBufferManager will add DB instance to its queue
+  // and block this thread by calling WBMStallInterface::Block().
+  write_buffer_manager_->BeginWriteStall(wbm_stall_.get());
+  wbm_stall_->Block();
+
+  mutex_.Lock();
+  // Stall has ended. Signal writer threads so that they can add
+  // themselves to the WriteThread queue for writes.
+  write_thread_.EndWriteStall();
+}
+
+Status DBImpl::ThrottleLowPriWritesIfNeeded(const WriteOptions& write_options,
+                                            WriteBatch* my_batch) {
+  assert(write_options.low_pri);
+  // This is called outside the DB mutex. Although it is safe to make the call,
+  // the consistency condition is not guaranteed to hold. It's OK to live with
+  // it in this case.
+  // If we need to speed compaction, it means the compaction is left behind
+  // and we start to limit low pri writes to a limit.
+  if (write_controller_.NeedSpeedupCompaction()) {
+    if (allow_2pc() && (my_batch->HasCommit() || my_batch->HasRollback())) {
+      // For 2PC, we only rate limit prepare, not commit.
+      return Status::OK();
+    }
+    if (write_options.no_slowdown) {
+      return Status::Incomplete("Low priority write stall");
+    } else {
+      assert(my_batch != nullptr);
+      // Rate limit those writes. The reason that we don't completely wait
+      // is that in case the write is heavy, low pri writes may never have
+      // a chance to run. Now we guarantee we are still slowly making
+      // progress.
+      PERF_TIMER_GUARD(write_delay_time);
+      write_controller_.low_pri_rate_limiter()->Request(
+          my_batch->GetDataSize(), Env::IO_HIGH, nullptr /* stats */,
+          RateLimiter::OpType::kWrite);
+    }
+  }
+  return Status::OK();
+}
+
+void DBImpl::MaybeFlushStatsCF(autovector<ColumnFamilyData*>* cfds) {
+  assert(cfds != nullptr);
+  if (!cfds->empty() && immutable_db_options_.persist_stats_to_disk) {
+    ColumnFamilyData* cfd_stats =
+        versions_->GetColumnFamilySet()->GetColumnFamily(
+            kPersistentStatsColumnFamilyName);
+    if (cfd_stats != nullptr && !cfd_stats->mem()->IsEmpty()) {
+      for (ColumnFamilyData* cfd : *cfds) {
+        if (cfd == cfd_stats) {
+          // stats CF already included in cfds
+          return;
+        }
+      }
+      // force flush stats CF when its log number is less than all other CF's
+      // log numbers
+      bool force_flush_stats_cf = true;
+      for (auto* loop_cfd : *versions_->GetColumnFamilySet()) {
+        if (loop_cfd == cfd_stats) {
+          continue;
+        }
+        if (loop_cfd->GetLogNumber() <= cfd_stats->GetLogNumber()) {
+          force_flush_stats_cf = false;
+        }
+      }
+      if (force_flush_stats_cf) {
+        cfds->push_back(cfd_stats);
+        ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                       "Force flushing stats CF with automated flush "
+                       "to avoid holding old logs");
+      }
+    }
+  }
+}
+
+Status DBImpl::TrimMemtableHistory(WriteContext* context) {
+  autovector<ColumnFamilyData*> cfds;
+  ColumnFamilyData* tmp_cfd;
+  while ((tmp_cfd = trim_history_scheduler_.TakeNextColumnFamily()) !=
+         nullptr) {
+    cfds.push_back(tmp_cfd);
+  }
+  for (auto& cfd : cfds) {
+    autovector<MemTable*> to_delete;
+    bool trimmed = cfd->imm()->TrimHistory(&context->memtables_to_free_,
+                                           cfd->mem()->MemoryAllocatedBytes());
+    if (trimmed) {
+      context->superversion_context.NewSuperVersion();
+      assert(context->superversion_context.new_superversion.get() != nullptr);
+      cfd->InstallSuperVersion(&context->superversion_context, &mutex_);
+    }
+
+    if (cfd->UnrefAndTryDelete()) {
+      cfd = nullptr;
+    }
+  }
+  return Status::OK();
+}
+
+Status DBImpl::ScheduleFlushes(WriteContext* context) {
+  autovector<ColumnFamilyData*> cfds;
+  if (immutable_db_options_.atomic_flush) {
+    SelectColumnFamiliesForAtomicFlush(&cfds);
+    for (auto cfd : cfds) {
+      cfd->Ref();
+    }
+    flush_scheduler_.Clear();
+  } else {
+    ColumnFamilyData* tmp_cfd;
+    while ((tmp_cfd = flush_scheduler_.TakeNextColumnFamily()) != nullptr) {
+      cfds.push_back(tmp_cfd);
+    }
+    MaybeFlushStatsCF(&cfds);
+  }
+  Status status;
+  WriteThread::Writer nonmem_w;
+  if (two_write_queues_) {
+    nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
+  }
+
+  for (auto& cfd : cfds) {
+    if (!cfd->mem()->IsEmpty()) {
+      status = SwitchMemtable(cfd, context);
+    }
+    if (cfd->UnrefAndTryDelete()) {
+      cfd = nullptr;
+    }
+    if (!status.ok()) {
+      break;
+    }
+  }
+
+  if (two_write_queues_) {
+    nonmem_write_thread_.ExitUnbatched(&nonmem_w);
+  }
+
+  if (status.ok()) {
+    if (immutable_db_options_.atomic_flush) {
+      AssignAtomicFlushSeq(cfds);
+      FlushRequest flush_req;
+      GenerateFlushRequest(cfds, FlushReason::kWriteBufferFull, &flush_req);
+      SchedulePendingFlush(flush_req);
+    } else {
+      for (auto* cfd : cfds) {
+        FlushRequest flush_req;
+        GenerateFlushRequest({cfd}, FlushReason::kWriteBufferFull, &flush_req);
+        SchedulePendingFlush(flush_req);
+      }
+    }
+    MaybeScheduleFlushOrCompaction();
+  }
+  return status;
+}
+
+void DBImpl::NotifyOnMemTableSealed(ColumnFamilyData* /*cfd*/,
+                                    const MemTableInfo& mem_table_info) {
+  if (immutable_db_options_.listeners.size() == 0U) {
+    return;
+  }
+  if (shutting_down_.load(std::memory_order_acquire)) {
+    return;
+  }
+
+  mutex_.Unlock();
+  for (auto listener : immutable_db_options_.listeners) {
+    listener->OnMemTableSealed(mem_table_info);
+  }
+  mutex_.Lock();
+}
+
+// REQUIRES: mutex_ is held
+// REQUIRES: this thread is currently at the front of the writer queue
+// REQUIRES: this thread is currently at the front of the 2nd writer queue if
+// two_write_queues_ is true (This is to simplify the reasoning.)
+Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
+  mutex_.AssertHeld();
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  log::Writer* new_log = nullptr;
+  MemTable* new_mem = nullptr;
+  IOStatus io_s;
+
+  // Recoverable state is persisted in WAL. After memtable switch, WAL might
+  // be deleted, so we write the state to memtable to be persisted as well.
+  Status s = WriteRecoverableState();
+  if (!s.ok()) {
+    return s;
+  }
+
+  // Attempt to switch to a new memtable and trigger flush of old.
+  // Do this without holding the dbmutex lock.
+  assert(versions_->prev_log_number() == 0);
+  if (two_write_queues_) {
+    log_write_mutex_.Lock();
+  }
+  bool creating_new_log = !log_empty_;
+  if (two_write_queues_) {
+    log_write_mutex_.Unlock();
+  }
+  uint64_t recycle_log_number = 0;
+  if (creating_new_log && immutable_db_options_.recycle_log_file_num &&
+      !log_recycle_files_.empty()) {
+    recycle_log_number = log_recycle_files_.front();
+  }
+  uint64_t new_log_number =
+      creating_new_log ? versions_->NewFileNumber() : logfile_number_;
+  const MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions();
+
+  // Set memtable_info for memtable sealed callback
+  MemTableInfo memtable_info;
+  memtable_info.cf_name = cfd->GetName();
+  memtable_info.first_seqno = cfd->mem()->GetFirstSequenceNumber();
+  memtable_info.earliest_seqno = cfd->mem()->GetEarliestSequenceNumber();
+  memtable_info.num_entries = cfd->mem()->num_entries();
+  memtable_info.num_deletes = cfd->mem()->num_deletes();
+  // Log this later after lock release. It may be outdated, e.g., if background
+  // flush happens before logging, but that should be ok.
+  int num_imm_unflushed = cfd->imm()->NumNotFlushed();
+  const auto preallocate_block_size =
+      GetWalPreallocateBlockSize(mutable_cf_options.write_buffer_size);
+  mutex_.Unlock();
+  if (creating_new_log) {
+    // TODO: Write buffer size passed in should be max of all CF's instead
+    // of mutable_cf_options.write_buffer_size.
+    io_s = CreateWAL(new_log_number, recycle_log_number, preallocate_block_size,
+                     &new_log);
+    if (s.ok()) {
+      s = io_s;
+    }
+  }
+  if (s.ok()) {
+    SequenceNumber seq = versions_->LastSequence();
+    new_mem = cfd->ConstructNewMemtable(mutable_cf_options, seq);
+    context->superversion_context.NewSuperVersion();
+  }
+  ROCKS_LOG_INFO(immutable_db_options_.info_log,
+                 "[%s] New memtable created with log file: #%" PRIu64
+                 ". Immutable memtables: %d.\n",
+                 cfd->GetName().c_str(), new_log_number, num_imm_unflushed);
+  // There should be no concurrent write as the thread is at the front of
+  // writer queue
+  cfd->mem()->ConstructFragmentedRangeTombstones();
+
+  mutex_.Lock();
+  if (recycle_log_number != 0) {
+    // Since renaming the file is done outside DB mutex, we need to ensure
+    // concurrent full purges don't delete the file while we're recycling it.
+    // To achieve that we hold the old log number in the recyclable list until
+    // after it has been renamed.
+    assert(log_recycle_files_.front() == recycle_log_number);
+    log_recycle_files_.pop_front();
+  }
+  if (s.ok() && creating_new_log) {
+    InstrumentedMutexLock l(&log_write_mutex_);
+    assert(new_log != nullptr);
+    if (!logs_.empty()) {
+      // Alway flush the buffer of the last log before switching to a new one
+      log::Writer* cur_log_writer = logs_.back().writer;
+      if (error_handler_.IsRecoveryInProgress()) {
+        // In recovery path, we force another try of writing WAL buffer.
+        cur_log_writer->file()->reset_seen_error();
+      }
+      io_s = cur_log_writer->WriteBuffer();
+      if (s.ok()) {
+        s = io_s;
+      }
+      if (!s.ok()) {
+        ROCKS_LOG_WARN(immutable_db_options_.info_log,
+                       "[%s] Failed to switch from #%" PRIu64 " to #%" PRIu64
+                       "  WAL file\n",
+                       cfd->GetName().c_str(), cur_log_writer->get_log_number(),
+                       new_log_number);
+      }
+    }
+    if (s.ok()) {
+      logfile_number_ = new_log_number;
+      log_empty_ = true;
+      log_dir_synced_ = false;
+      logs_.emplace_back(logfile_number_, new_log);
+      alive_log_files_.push_back(LogFileNumberSize(logfile_number_));
+    }
+  }
+
+  if (!s.ok()) {
+    // how do we fail if we're not creating new log?
+    assert(creating_new_log);
+    delete new_mem;
+    delete new_log;
+    context->superversion_context.new_superversion.reset();
+    // We may have lost data from the WritableFileBuffer in-memory buffer for
+    // the current log, so treat it as a fatal error and set bg_error
+    if (!io_s.ok()) {
+      error_handler_.SetBGError(io_s, BackgroundErrorReason::kMemTable);
+    } else {
+      error_handler_.SetBGError(s, BackgroundErrorReason::kMemTable);
+    }
+    // Read back bg_error in order to get the right severity
+    s = error_handler_.GetBGError();
+    return s;
+  }
+
+  bool empty_cf_updated = false;
+  if (immutable_db_options_.track_and_verify_wals_in_manifest &&
+      !immutable_db_options_.allow_2pc && creating_new_log) {
+    // In non-2pc mode, WALs become obsolete if they do not contain unflushed
+    // data. Updating the empty CF's log number might cause some WALs to become
+    // obsolete. So we should track the WAL obsoletion event before actually
+    // updating the empty CF's log number.
+    uint64_t min_wal_number_to_keep =
+        versions_->PreComputeMinLogNumberWithUnflushedData(logfile_number_);
+    if (min_wal_number_to_keep >
+        versions_->GetWalSet().GetMinWalNumberToKeep()) {
+      // Get a snapshot of the empty column families.
+      // LogAndApply may release and reacquire db
+      // mutex, during that period, column family may become empty (e.g. its
+      // flush succeeds), then it affects the computed min_log_number_to_keep,
+      // so we take a snapshot for consistency of column family data
+      // status. If a column family becomes non-empty afterwards, its active log
+      // should still be the created new log, so the min_log_number_to_keep is
+      // not affected.
+      autovector<ColumnFamilyData*> empty_cfs;
+      for (auto cf : *versions_->GetColumnFamilySet()) {
+        if (cf->IsEmpty()) {
+          empty_cfs.push_back(cf);
+        }
+      }
+
+      VersionEdit wal_deletion;
+      wal_deletion.DeleteWalsBefore(min_wal_number_to_keep);
+      s = versions_->LogAndApplyToDefaultColumnFamily(
+          read_options, &wal_deletion, &mutex_, directories_.GetDbDir());
+      if (!s.ok() && versions_->io_status().IsIOError()) {
+        s = error_handler_.SetBGError(versions_->io_status(),
+                                      BackgroundErrorReason::kManifestWrite);
+      }
+      if (!s.ok()) {
+        return s;
+      }
+
+      for (auto cf : empty_cfs) {
+        if (cf->IsEmpty()) {
+          cf->SetLogNumber(logfile_number_);
+          // MEMPURGE: No need to change this, because new adds
+          // should still receive new sequence numbers.
+          cf->mem()->SetCreationSeq(versions_->LastSequence());
+        }  // cf may become non-empty.
+      }
+      empty_cf_updated = true;
+    }
+  }
+  if (!empty_cf_updated) {
+    for (auto cf : *versions_->GetColumnFamilySet()) {
+      // all this is just optimization to delete logs that
+      // are no longer needed -- if CF is empty, that means it
+      // doesn't need that particular log to stay alive, so we just
+      // advance the log number. no need to persist this in the manifest
+      if (cf->IsEmpty()) {
+        if (creating_new_log) {
+          cf->SetLogNumber(logfile_number_);
+        }
+        cf->mem()->SetCreationSeq(versions_->LastSequence());
+      }
+    }
+  }
+
+  cfd->mem()->SetNextLogNumber(logfile_number_);
+  assert(new_mem != nullptr);
+  cfd->imm()->Add(cfd->mem(), &context->memtables_to_free_);
+  new_mem->Ref();
+  cfd->SetMemtable(new_mem);
+  InstallSuperVersionAndScheduleWork(cfd, &context->superversion_context,
+                                     mutable_cf_options);
+
+  // Notify client that memtable is sealed, now that we have successfully
+  // installed a new memtable
+  NotifyOnMemTableSealed(cfd, memtable_info);
+  // It is possible that we got here without checking the value of i_os, but
+  // that is okay.  If we did, it most likely means that s was already an error.
+  // In any case, ignore any unchecked error for i_os here.
+  io_s.PermitUncheckedError();
+  return s;
+}
+
+size_t DBImpl::GetWalPreallocateBlockSize(uint64_t write_buffer_size) const {
+  mutex_.AssertHeld();
+  size_t bsize =
+      static_cast<size_t>(write_buffer_size / 10 + write_buffer_size);
+  // Some users might set very high write_buffer_size and rely on
+  // max_total_wal_size or other parameters to control the WAL size.
+  if (mutable_db_options_.max_total_wal_size > 0) {
+    bsize = std::min<size_t>(
+        bsize, static_cast<size_t>(mutable_db_options_.max_total_wal_size));
+  }
+  if (immutable_db_options_.db_write_buffer_size > 0) {
+    bsize = std::min<size_t>(bsize, immutable_db_options_.db_write_buffer_size);
+  }
+  if (immutable_db_options_.write_buffer_manager &&
+      immutable_db_options_.write_buffer_manager->enabled()) {
+    bsize = std::min<size_t>(
+        bsize, immutable_db_options_.write_buffer_manager->buffer_size());
+  }
+
+  return bsize;
+}
+
+// Default implementations of convenience methods that subclasses of DB
+// can call if they wish
+Status DB::Put(const WriteOptions& opt, ColumnFamilyHandle* column_family,
+               const Slice& key, const Slice& value) {
+  // Pre-allocate size of write batch conservatively.
+  // 8 bytes are taken by header, 4 bytes for count, 1 byte for type,
+  // and we allocate 11 extra bytes for key length, as well as value length.
+  WriteBatch batch(key.size() + value.size() + 24, 0 /* max_bytes */,
+                   opt.protection_bytes_per_key, 0 /* default_cf_ts_sz */);
+  Status s = batch.Put(column_family, key, value);
+  if (!s.ok()) {
+    return s;
+  }
+  return Write(opt, &batch);
+}
+
+Status DB::Put(const WriteOptions& opt, ColumnFamilyHandle* column_family,
+               const Slice& key, const Slice& ts, const Slice& value) {
+  ColumnFamilyHandle* default_cf = DefaultColumnFamily();
+  assert(default_cf);
+  const Comparator* const default_cf_ucmp = default_cf->GetComparator();
+  assert(default_cf_ucmp);
+  WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
+                   opt.protection_bytes_per_key,
+                   default_cf_ucmp->timestamp_size());
+  Status s = batch.Put(column_family, key, ts, value);
+  if (!s.ok()) {
+    return s;
+  }
+  return Write(opt, &batch);
+}
+
+Status DB::PutEntity(const WriteOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     const WideColumns& columns) {
+  const ColumnFamilyHandle* const default_cf = DefaultColumnFamily();
+  assert(default_cf);
+
+  const Comparator* const default_cf_ucmp = default_cf->GetComparator();
+  assert(default_cf_ucmp);
+
+  WriteBatch batch(/* reserved_bytes */ 0, /* max_bytes */ 0,
+                   options.protection_bytes_per_key,
+                   default_cf_ucmp->timestamp_size());
+
+  const Status s = batch.PutEntity(column_family, key, columns);
+  if (!s.ok()) {
+    return s;
+  }
+
+  return Write(options, &batch);
+}
+
+Status DB::Delete(const WriteOptions& opt, ColumnFamilyHandle* column_family,
+                  const Slice& key) {
+  WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
+                   opt.protection_bytes_per_key, 0 /* default_cf_ts_sz */);
+  Status s = batch.Delete(column_family, key);
+  if (!s.ok()) {
+    return s;
+  }
+  return Write(opt, &batch);
+}
+
+Status DB::Delete(const WriteOptions& opt, ColumnFamilyHandle* column_family,
+                  const Slice& key, const Slice& ts) {
+  ColumnFamilyHandle* default_cf = DefaultColumnFamily();
+  assert(default_cf);
+  const Comparator* const default_cf_ucmp = default_cf->GetComparator();
+  assert(default_cf_ucmp);
+  WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
+                   opt.protection_bytes_per_key,
+                   default_cf_ucmp->timestamp_size());
+  Status s = batch.Delete(column_family, key, ts);
+  if (!s.ok()) {
+    return s;
+  }
+  return Write(opt, &batch);
+}
+
+Status DB::SingleDelete(const WriteOptions& opt,
+                        ColumnFamilyHandle* column_family, const Slice& key) {
+  WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
+                   opt.protection_bytes_per_key, 0 /* default_cf_ts_sz */);
+  Status s = batch.SingleDelete(column_family, key);
+  if (!s.ok()) {
+    return s;
+  }
+  return Write(opt, &batch);
+}
+
+Status DB::SingleDelete(const WriteOptions& opt,
+                        ColumnFamilyHandle* column_family, const Slice& key,
+                        const Slice& ts) {
+  ColumnFamilyHandle* default_cf = DefaultColumnFamily();
+  assert(default_cf);
+  const Comparator* const default_cf_ucmp = default_cf->GetComparator();
+  assert(default_cf_ucmp);
+  WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
+                   opt.protection_bytes_per_key,
+                   default_cf_ucmp->timestamp_size());
+  Status s = batch.SingleDelete(column_family, key, ts);
+  if (!s.ok()) {
+    return s;
+  }
+  return Write(opt, &batch);
+}
+
+Status DB::DeleteRange(const WriteOptions& opt,
+                       ColumnFamilyHandle* column_family,
+                       const Slice& begin_key, const Slice& end_key) {
+  WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
+                   opt.protection_bytes_per_key, 0 /* default_cf_ts_sz */);
+  Status s = batch.DeleteRange(column_family, begin_key, end_key);
+  if (!s.ok()) {
+    return s;
+  }
+  return Write(opt, &batch);
+}
+
+Status DB::DeleteRange(const WriteOptions& opt,
+                       ColumnFamilyHandle* column_family,
+                       const Slice& begin_key, const Slice& end_key,
+                       const Slice& ts) {
+  ColumnFamilyHandle* default_cf = DefaultColumnFamily();
+  assert(default_cf);
+  const Comparator* const default_cf_ucmp = default_cf->GetComparator();
+  assert(default_cf_ucmp);
+  WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
+                   opt.protection_bytes_per_key,
+                   default_cf_ucmp->timestamp_size());
+  Status s = batch.DeleteRange(column_family, begin_key, end_key, ts);
+  if (!s.ok()) {
+    return s;
+  }
+  return Write(opt, &batch);
+}
+
+Status DB::Merge(const WriteOptions& opt, ColumnFamilyHandle* column_family,
+                 const Slice& key, const Slice& value) {
+  WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
+                   opt.protection_bytes_per_key, 0 /* default_cf_ts_sz */);
+  Status s = batch.Merge(column_family, key, value);
+  if (!s.ok()) {
+    return s;
+  }
+  return Write(opt, &batch);
+}
+
+Status DB::Merge(const WriteOptions& opt, ColumnFamilyHandle* column_family,
+                 const Slice& key, const Slice& ts, const Slice& value) {
+  ColumnFamilyHandle* default_cf = DefaultColumnFamily();
+  assert(default_cf);
+  const Comparator* const default_cf_ucmp = default_cf->GetComparator();
+  assert(default_cf_ucmp);
+  WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
+                   opt.protection_bytes_per_key,
+                   default_cf_ucmp->timestamp_size());
+  Status s = batch.Merge(column_family, key, ts, value);
+  if (!s.ok()) {
+    return s;
+  }
+  return Write(opt, &batch);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_info_dumper.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_info_dumper.cc
new file mode 100644
index 0000000000..be8d5bee1c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_info_dumper.cc
@@ -0,0 +1,147 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/db_info_dumper.h"
+
+#include <stdio.h>
+
+#include <algorithm>
+#include <cinttypes>
+#include <string>
+#include <vector>
+
+#include "file/filename.h"
+#include "rocksdb/env.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void DumpDBFileSummary(const ImmutableDBOptions& options,
+                       const std::string& dbname,
+                       const std::string& session_id) {
+  if (options.info_log == nullptr) {
+    return;
+  }
+
+  auto* env = options.env;
+  uint64_t number = 0;
+  FileType type = kInfoLogFile;
+
+  std::vector<std::string> files;
+  uint64_t file_num = 0;
+  uint64_t file_size;
+  std::string file_info, wal_info;
+
+  Header(options.info_log, "DB SUMMARY\n");
+  Header(options.info_log, "DB Session ID:  %s\n", session_id.c_str());
+
+  Status s;
+  // Get files in dbname dir
+  s = env->GetChildren(dbname, &files);
+  if (!s.ok()) {
+    Error(options.info_log, "Error when reading %s dir %s\n", dbname.c_str(),
+          s.ToString().c_str());
+  }
+  std::sort(files.begin(), files.end());
+  for (const std::string& file : files) {
+    if (!ParseFileName(file, &number, &type)) {
+      continue;
+    }
+    switch (type) {
+      case kCurrentFile:
+        Header(options.info_log, "CURRENT file:  %s\n", file.c_str());
+        break;
+      case kIdentityFile:
+        Header(options.info_log, "IDENTITY file:  %s\n", file.c_str());
+        break;
+      case kDescriptorFile:
+        s = env->GetFileSize(dbname + "/" + file, &file_size);
+        if (s.ok()) {
+          Header(options.info_log,
+                 "MANIFEST file:  %s size: %" PRIu64 " Bytes\n", file.c_str(),
+                 file_size);
+        } else {
+          Error(options.info_log,
+                "Error when reading MANIFEST file: %s/%s %s\n", dbname.c_str(),
+                file.c_str(), s.ToString().c_str());
+        }
+        break;
+      case kWalFile:
+        s = env->GetFileSize(dbname + "/" + file, &file_size);
+        if (s.ok()) {
+          wal_info.append(file)
+              .append(" size: ")
+              .append(std::to_string(file_size))
+              .append(" ; ");
+        } else {
+          Error(options.info_log, "Error when reading LOG file: %s/%s %s\n",
+                dbname.c_str(), file.c_str(), s.ToString().c_str());
+        }
+        break;
+      case kTableFile:
+        if (++file_num < 10) {
+          file_info.append(file).append(" ");
+        }
+        break;
+      default:
+        break;
+    }
+  }
+
+  // Get sst files in db_path dir
+  for (auto& db_path : options.db_paths) {
+    if (dbname.compare(db_path.path) != 0) {
+      s = env->GetChildren(db_path.path, &files);
+      if (!s.ok()) {
+        Error(options.info_log, "Error when reading %s dir %s\n",
+              db_path.path.c_str(), s.ToString().c_str());
+        continue;
+      }
+      std::sort(files.begin(), files.end());
+      for (const std::string& file : files) {
+        if (ParseFileName(file, &number, &type)) {
+          if (type == kTableFile && ++file_num < 10) {
+            file_info.append(file).append(" ");
+          }
+        }
+      }
+    }
+    Header(options.info_log,
+           "SST files in %s dir, Total Num: %" PRIu64 ", files: %s\n",
+           db_path.path.c_str(), file_num, file_info.c_str());
+    file_num = 0;
+    file_info.clear();
+  }
+
+  // Get wal file in wal_dir
+  const auto& wal_dir = options.GetWalDir(dbname);
+  if (!options.IsWalDirSameAsDBPath(dbname)) {
+    s = env->GetChildren(wal_dir, &files);
+    if (!s.ok()) {
+      Error(options.info_log, "Error when reading %s dir %s\n", wal_dir.c_str(),
+            s.ToString().c_str());
+      return;
+    }
+    wal_info.clear();
+    for (const std::string& file : files) {
+      if (ParseFileName(file, &number, &type)) {
+        if (type == kWalFile) {
+          s = env->GetFileSize(wal_dir + "/" + file, &file_size);
+          if (s.ok()) {
+            wal_info.append(file)
+                .append(" size: ")
+                .append(std::to_string(file_size))
+                .append(" ; ");
+          } else {
+            Error(options.info_log, "Error when reading LOG file %s/%s %s\n",
+                  wal_dir.c_str(), file.c_str(), s.ToString().c_str());
+          }
+        }
+      }
+    }
+  }
+  Header(options.info_log, "Write Ahead Log file in %s: %s\n", wal_dir.c_str(),
+         wal_info.c_str());
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_info_dumper.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_info_dumper.h
new file mode 100644
index 0000000000..f518e840fd
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_info_dumper.h
@@ -0,0 +1,15 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <string>
+
+#include "options/db_options.h"
+
+namespace ROCKSDB_NAMESPACE {
+void DumpDBFileSummary(const ImmutableDBOptions& options,
+                       const std::string& dbname,
+                       const std::string& session_id = "");
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_iter.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_iter.cc
new file mode 100644
index 0000000000..3d980c8786
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_iter.cc
@@ -0,0 +1,1708 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/db_iter.h"
+
+#include <iostream>
+#include <limits>
+#include <string>
+
+#include "db/dbformat.h"
+#include "db/merge_context.h"
+#include "db/merge_helper.h"
+#include "db/pinned_iterators_manager.h"
+#include "db/wide/wide_column_serialization.h"
+#include "file/filename.h"
+#include "logging/logging.h"
+#include "memory/arena.h"
+#include "monitoring/perf_context_imp.h"
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/system_clock.h"
+#include "table/internal_iterator.h"
+#include "table/iterator_wrapper.h"
+#include "trace_replay/trace_replay.h"
+#include "util/mutexlock.h"
+#include "util/string_util.h"
+#include "util/user_comparator_wrapper.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+DBIter::DBIter(Env* _env, const ReadOptions& read_options,
+               const ImmutableOptions& ioptions,
+               const MutableCFOptions& mutable_cf_options,
+               const Comparator* cmp, InternalIterator* iter,
+               const Version* version, SequenceNumber s, bool arena_mode,
+               uint64_t max_sequential_skip_in_iterations,
+               ReadCallback* read_callback, DBImpl* db_impl,
+               ColumnFamilyData* cfd, bool expose_blob_index)
+    : prefix_extractor_(mutable_cf_options.prefix_extractor.get()),
+      env_(_env),
+      clock_(ioptions.clock),
+      logger_(ioptions.logger),
+      user_comparator_(cmp),
+      merge_operator_(ioptions.merge_operator.get()),
+      iter_(iter),
+      version_(version),
+      read_callback_(read_callback),
+      sequence_(s),
+      statistics_(ioptions.stats),
+      max_skip_(max_sequential_skip_in_iterations),
+      max_skippable_internal_keys_(read_options.max_skippable_internal_keys),
+      num_internal_keys_skipped_(0),
+      iterate_lower_bound_(read_options.iterate_lower_bound),
+      iterate_upper_bound_(read_options.iterate_upper_bound),
+      direction_(kForward),
+      valid_(false),
+      current_entry_is_merged_(false),
+      is_key_seqnum_zero_(false),
+      prefix_same_as_start_(mutable_cf_options.prefix_extractor
+                                ? read_options.prefix_same_as_start
+                                : false),
+      pin_thru_lifetime_(read_options.pin_data),
+      expect_total_order_inner_iter_(prefix_extractor_ == nullptr ||
+                                     read_options.total_order_seek ||
+                                     read_options.auto_prefix_mode),
+      read_tier_(read_options.read_tier),
+      fill_cache_(read_options.fill_cache),
+      verify_checksums_(read_options.verify_checksums),
+      expose_blob_index_(expose_blob_index),
+      is_blob_(false),
+      arena_mode_(arena_mode),
+      db_impl_(db_impl),
+      cfd_(cfd),
+      timestamp_ub_(read_options.timestamp),
+      timestamp_lb_(read_options.iter_start_ts),
+      timestamp_size_(timestamp_ub_ ? timestamp_ub_->size() : 0) {
+  RecordTick(statistics_, NO_ITERATOR_CREATED);
+  if (pin_thru_lifetime_) {
+    pinned_iters_mgr_.StartPinning();
+  }
+  if (iter_.iter()) {
+    iter_.iter()->SetPinnedItersMgr(&pinned_iters_mgr_);
+  }
+  status_.PermitUncheckedError();
+  assert(timestamp_size_ ==
+         user_comparator_.user_comparator()->timestamp_size());
+}
+
+Status DBIter::GetProperty(std::string prop_name, std::string* prop) {
+  if (prop == nullptr) {
+    return Status::InvalidArgument("prop is nullptr");
+  }
+  if (prop_name == "rocksdb.iterator.super-version-number") {
+    // First try to pass the value returned from inner iterator.
+    return iter_.iter()->GetProperty(prop_name, prop);
+  } else if (prop_name == "rocksdb.iterator.is-key-pinned") {
+    if (valid_) {
+      *prop = (pin_thru_lifetime_ && saved_key_.IsKeyPinned()) ? "1" : "0";
+    } else {
+      *prop = "Iterator is not valid.";
+    }
+    return Status::OK();
+  } else if (prop_name == "rocksdb.iterator.internal-key") {
+    *prop = saved_key_.GetUserKey().ToString();
+    return Status::OK();
+  }
+  return Status::InvalidArgument("Unidentified property.");
+}
+
+bool DBIter::ParseKey(ParsedInternalKey* ikey) {
+  Status s = ParseInternalKey(iter_.key(), ikey, false /* log_err_key */);
+  if (!s.ok()) {
+    status_ = Status::Corruption("In DBIter: ", s.getState());
+    valid_ = false;
+    ROCKS_LOG_ERROR(logger_, "In DBIter: %s", status_.getState());
+    return false;
+  } else {
+    return true;
+  }
+}
+
+void DBIter::Next() {
+  assert(valid_);
+  assert(status_.ok());
+
+  PERF_COUNTER_ADD(iter_next_count, 1);
+  PERF_CPU_TIMER_GUARD(iter_next_cpu_nanos, clock_);
+  // Release temporarily pinned blocks from last operation
+  ReleaseTempPinnedData();
+  ResetBlobValue();
+  ResetValueAndColumns();
+  local_stats_.skip_count_ += num_internal_keys_skipped_;
+  local_stats_.skip_count_--;
+  num_internal_keys_skipped_ = 0;
+  bool ok = true;
+  if (direction_ == kReverse) {
+    is_key_seqnum_zero_ = false;
+    if (!ReverseToForward()) {
+      ok = false;
+    }
+  } else if (!current_entry_is_merged_) {
+    // If the current value is not a merge, the iter position is the
+    // current key, which is already returned. We can safely issue a
+    // Next() without checking the current key.
+    // If the current key is a merge, very likely iter already points
+    // to the next internal position.
+    assert(iter_.Valid());
+    iter_.Next();
+    PERF_COUNTER_ADD(internal_key_skipped_count, 1);
+  }
+
+  local_stats_.next_count_++;
+  if (ok && iter_.Valid()) {
+    ClearSavedValue();
+
+    if (prefix_same_as_start_) {
+      assert(prefix_extractor_ != nullptr);
+      const Slice prefix = prefix_.GetUserKey();
+      FindNextUserEntry(true /* skipping the current user key */, &prefix);
+    } else {
+      FindNextUserEntry(true /* skipping the current user key */, nullptr);
+    }
+  } else {
+    is_key_seqnum_zero_ = false;
+    valid_ = false;
+  }
+  if (statistics_ != nullptr && valid_) {
+    local_stats_.next_found_count_++;
+    local_stats_.bytes_read_ += (key().size() + value().size());
+  }
+}
+
+bool DBIter::SetBlobValueIfNeeded(const Slice& user_key,
+                                  const Slice& blob_index) {
+  assert(!is_blob_);
+  assert(blob_value_.empty());
+
+  if (expose_blob_index_) {  // Stacked BlobDB implementation
+    is_blob_ = true;
+    return true;
+  }
+
+  if (!version_) {
+    status_ = Status::Corruption("Encountered unexpected blob index.");
+    valid_ = false;
+    return false;
+  }
+
+  // TODO: consider moving ReadOptions from ArenaWrappedDBIter to DBIter to
+  // avoid having to copy options back and forth.
+  // TODO: plumb Env::IOActivity
+  ReadOptions read_options;
+  read_options.read_tier = read_tier_;
+  read_options.fill_cache = fill_cache_;
+  read_options.verify_checksums = verify_checksums_;
+
+  constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
+  constexpr uint64_t* bytes_read = nullptr;
+
+  const Status s = version_->GetBlob(read_options, user_key, blob_index,
+                                     prefetch_buffer, &blob_value_, bytes_read);
+
+  if (!s.ok()) {
+    status_ = s;
+    valid_ = false;
+    return false;
+  }
+
+  is_blob_ = true;
+  return true;
+}
+
+bool DBIter::SetValueAndColumnsFromEntity(Slice slice) {
+  assert(value_.empty());
+  assert(wide_columns_.empty());
+
+  const Status s = WideColumnSerialization::Deserialize(slice, wide_columns_);
+
+  if (!s.ok()) {
+    status_ = s;
+    valid_ = false;
+    return false;
+  }
+
+  if (!wide_columns_.empty() &&
+      wide_columns_[0].name() == kDefaultWideColumnName) {
+    value_ = wide_columns_[0].value();
+  }
+
+  return true;
+}
+
+// PRE: saved_key_ has the current user key if skipping_saved_key
+// POST: saved_key_ should have the next user key if valid_,
+//       if the current entry is a result of merge
+//           current_entry_is_merged_ => true
+//           saved_value_             => the merged value
+//
+// NOTE: In between, saved_key_ can point to a user key that has
+//       a delete marker or a sequence number higher than sequence_
+//       saved_key_ MUST have a proper user_key before calling this function
+//
+// The prefix parameter, if not null, indicates that we need to iterate
+// within the prefix, and the iterator needs to be made invalid, if no
+// more entry for the prefix can be found.
+bool DBIter::FindNextUserEntry(bool skipping_saved_key, const Slice* prefix) {
+  PERF_TIMER_GUARD(find_next_user_entry_time);
+  return FindNextUserEntryInternal(skipping_saved_key, prefix);
+}
+
+// Actual implementation of DBIter::FindNextUserEntry()
+bool DBIter::FindNextUserEntryInternal(bool skipping_saved_key,
+                                       const Slice* prefix) {
+  // Loop until we hit an acceptable entry to yield
+  assert(iter_.Valid());
+  assert(status_.ok());
+  assert(direction_ == kForward);
+  current_entry_is_merged_ = false;
+
+  // How many times in a row we have skipped an entry with user key less than
+  // or equal to saved_key_. We could skip these entries either because
+  // sequence numbers were too high or because skipping_saved_key = true.
+  // What saved_key_ contains throughout this method:
+  //  - if skipping_saved_key : saved_key_ contains the key that we need
+  //                            to skip, and we haven't seen any keys greater
+  //                            than that,
+  //  - if num_skipped > 0    : saved_key_ contains the key that we have skipped
+  //                            num_skipped times, and we haven't seen any keys
+  //                            greater than that,
+  //  - none of the above     : saved_key_ can contain anything, it doesn't
+  //                            matter.
+  uint64_t num_skipped = 0;
+  // For write unprepared, the target sequence number in reseek could be larger
+  // than the snapshot, and thus needs to be skipped again. This could result in
+  // an infinite loop of reseeks. To avoid that, we limit the number of reseeks
+  // to one.
+  bool reseek_done = false;
+
+  do {
+    // Will update is_key_seqnum_zero_ as soon as we parsed the current key
+    // but we need to save the previous value to be used in the loop.
+    bool is_prev_key_seqnum_zero = is_key_seqnum_zero_;
+    if (!ParseKey(&ikey_)) {
+      is_key_seqnum_zero_ = false;
+      return false;
+    }
+    Slice user_key_without_ts =
+        StripTimestampFromUserKey(ikey_.user_key, timestamp_size_);
+
+    is_key_seqnum_zero_ = (ikey_.sequence == 0);
+
+    assert(iterate_upper_bound_ == nullptr ||
+           iter_.UpperBoundCheckResult() != IterBoundCheck::kInbound ||
+           user_comparator_.CompareWithoutTimestamp(
+               user_key_without_ts, /*a_has_ts=*/false, *iterate_upper_bound_,
+               /*b_has_ts=*/false) < 0);
+    if (iterate_upper_bound_ != nullptr &&
+        iter_.UpperBoundCheckResult() != IterBoundCheck::kInbound &&
+        user_comparator_.CompareWithoutTimestamp(
+            user_key_without_ts, /*a_has_ts=*/false, *iterate_upper_bound_,
+            /*b_has_ts=*/false) >= 0) {
+      break;
+    }
+
+    assert(prefix == nullptr || prefix_extractor_ != nullptr);
+    if (prefix != nullptr &&
+        prefix_extractor_->Transform(user_key_without_ts).compare(*prefix) !=
+            0) {
+      assert(prefix_same_as_start_);
+      break;
+    }
+
+    if (TooManyInternalKeysSkipped()) {
+      return false;
+    }
+
+    assert(ikey_.user_key.size() >= timestamp_size_);
+    Slice ts = timestamp_size_ > 0 ? ExtractTimestampFromUserKey(
+                                         ikey_.user_key, timestamp_size_)
+                                   : Slice();
+    bool more_recent = false;
+    if (IsVisible(ikey_.sequence, ts, &more_recent)) {
+      // If the previous entry is of seqnum 0, the current entry will not
+      // possibly be skipped. This condition can potentially be relaxed to
+      // prev_key.seq <= ikey_.sequence. We are cautious because it will be more
+      // prone to bugs causing the same user key with the same sequence number.
+      // Note that with current timestamp implementation, the same user key can
+      // have different timestamps and zero sequence number on the bottommost
+      // level. This may change in the future.
+      if ((!is_prev_key_seqnum_zero || timestamp_size_ > 0) &&
+          skipping_saved_key &&
+          CompareKeyForSkip(ikey_.user_key, saved_key_.GetUserKey()) <= 0) {
+        num_skipped++;  // skip this entry
+        PERF_COUNTER_ADD(internal_key_skipped_count, 1);
+      } else {
+        assert(!skipping_saved_key ||
+               CompareKeyForSkip(ikey_.user_key, saved_key_.GetUserKey()) > 0);
+        num_skipped = 0;
+        reseek_done = false;
+        switch (ikey_.type) {
+          case kTypeDeletion:
+          case kTypeDeletionWithTimestamp:
+          case kTypeSingleDeletion:
+            // Arrange to skip all upcoming entries for this key since
+            // they are hidden by this deletion.
+            if (timestamp_lb_) {
+              saved_key_.SetInternalKey(ikey_);
+              valid_ = true;
+              return true;
+            } else {
+              saved_key_.SetUserKey(
+                  ikey_.user_key, !pin_thru_lifetime_ ||
+                                      !iter_.iter()->IsKeyPinned() /* copy */);
+              skipping_saved_key = true;
+              PERF_COUNTER_ADD(internal_delete_skipped_count, 1);
+            }
+            break;
+          case kTypeValue:
+          case kTypeBlobIndex:
+          case kTypeWideColumnEntity:
+            if (!iter_.PrepareValue()) {
+              assert(!iter_.status().ok());
+              valid_ = false;
+              return false;
+            }
+            if (timestamp_lb_) {
+              saved_key_.SetInternalKey(ikey_);
+            } else {
+              saved_key_.SetUserKey(
+                  ikey_.user_key, !pin_thru_lifetime_ ||
+                                      !iter_.iter()->IsKeyPinned() /* copy */);
+            }
+
+            if (ikey_.type == kTypeBlobIndex) {
+              if (!SetBlobValueIfNeeded(ikey_.user_key, iter_.value())) {
+                return false;
+              }
+
+              SetValueAndColumnsFromPlain(expose_blob_index_ ? iter_.value()
+                                                             : blob_value_);
+            } else if (ikey_.type == kTypeWideColumnEntity) {
+              if (!SetValueAndColumnsFromEntity(iter_.value())) {
+                return false;
+              }
+            } else {
+              assert(ikey_.type == kTypeValue);
+              SetValueAndColumnsFromPlain(iter_.value());
+            }
+
+            valid_ = true;
+            return true;
+            break;
+          case kTypeMerge:
+            if (!iter_.PrepareValue()) {
+              assert(!iter_.status().ok());
+              valid_ = false;
+              return false;
+            }
+            saved_key_.SetUserKey(
+                ikey_.user_key,
+                !pin_thru_lifetime_ || !iter_.iter()->IsKeyPinned() /* copy */);
+            // By now, we are sure the current ikey is going to yield a value
+            current_entry_is_merged_ = true;
+            valid_ = true;
+            return MergeValuesNewToOld();  // Go to a different state machine
+            break;
+          default:
+            valid_ = false;
+            status_ = Status::Corruption(
+                "Unknown value type: " +
+                std::to_string(static_cast<unsigned int>(ikey_.type)));
+            return false;
+        }
+      }
+    } else {
+      if (more_recent) {
+        PERF_COUNTER_ADD(internal_recent_skipped_count, 1);
+      }
+
+      // This key was inserted after our snapshot was taken or skipped by
+      // timestamp range. If this happens too many times in a row for the same
+      // user key, we want to seek to the target sequence number.
+      int cmp = user_comparator_.CompareWithoutTimestamp(
+          ikey_.user_key, saved_key_.GetUserKey());
+      if (cmp == 0 || (skipping_saved_key && cmp < 0)) {
+        num_skipped++;
+      } else {
+        saved_key_.SetUserKey(
+            ikey_.user_key,
+            !iter_.iter()->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
+        skipping_saved_key = false;
+        num_skipped = 0;
+        reseek_done = false;
+      }
+    }
+
+    // If we have sequentially iterated via numerous equal keys, then it's
+    // better to seek so that we can avoid too many key comparisons.
+    //
+    // To avoid infinite loops, do not reseek if we have already attempted to
+    // reseek previously.
+    //
+    // TODO(lth): If we reseek to sequence number greater than ikey_.sequence,
+    // then it does not make sense to reseek as we would actually land further
+    // away from the desired key. There is opportunity for optimization here.
+    if (num_skipped > max_skip_ && !reseek_done) {
+      is_key_seqnum_zero_ = false;
+      num_skipped = 0;
+      reseek_done = true;
+      std::string last_key;
+      if (skipping_saved_key) {
+        // We're looking for the next user-key but all we see are the same
+        // user-key with decreasing sequence numbers. Fast forward to
+        // sequence number 0 and type deletion (the smallest type).
+        if (timestamp_size_ == 0) {
+          AppendInternalKey(
+              &last_key,
+              ParsedInternalKey(saved_key_.GetUserKey(), 0, kTypeDeletion));
+        } else {
+          const std::string kTsMin(timestamp_size_, '\0');
+          AppendInternalKeyWithDifferentTimestamp(
+              &last_key,
+              ParsedInternalKey(saved_key_.GetUserKey(), 0, kTypeDeletion),
+              kTsMin);
+        }
+        // Don't set skipping_saved_key = false because we may still see more
+        // user-keys equal to saved_key_.
+      } else {
+        // We saw multiple entries with this user key and sequence numbers
+        // higher than sequence_. Fast forward to sequence_.
+        // Note that this only covers a case when a higher key was overwritten
+        // many times since our snapshot was taken, not the case when a lot of
+        // different keys were inserted after our snapshot was taken.
+        if (timestamp_size_ == 0) {
+          AppendInternalKey(
+              &last_key, ParsedInternalKey(saved_key_.GetUserKey(), sequence_,
+                                           kValueTypeForSeek));
+        } else {
+          AppendInternalKeyWithDifferentTimestamp(
+              &last_key,
+              ParsedInternalKey(saved_key_.GetUserKey(), sequence_,
+                                kValueTypeForSeek),
+              *timestamp_ub_);
+        }
+      }
+      iter_.Seek(last_key);
+      RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION);
+    } else {
+      iter_.Next();
+    }
+  } while (iter_.Valid());
+
+  valid_ = false;
+  return iter_.status().ok();
+}
+
+// Merge values of the same user key starting from the current iter_ position
+// Scan from the newer entries to older entries.
+// PRE: iter_.key() points to the first merge type entry
+//      saved_key_ stores the user key
+//      iter_.PrepareValue() has been called
+// POST: saved_value_ has the merged value for the user key
+//       iter_ points to the next entry (or invalid)
+bool DBIter::MergeValuesNewToOld() {
+  if (!merge_operator_) {
+    ROCKS_LOG_ERROR(logger_, "Options::merge_operator is null.");
+    status_ = Status::InvalidArgument("merge_operator_ must be set.");
+    valid_ = false;
+    return false;
+  }
+
+  // Temporarily pin the blocks that hold merge operands
+  TempPinData();
+  merge_context_.Clear();
+  // Start the merge process by pushing the first operand
+  merge_context_.PushOperand(
+      iter_.value(), iter_.iter()->IsValuePinned() /* operand_pinned */);
+  PERF_COUNTER_ADD(internal_merge_count, 1);
+
+  TEST_SYNC_POINT("DBIter::MergeValuesNewToOld:PushedFirstOperand");
+
+  ParsedInternalKey ikey;
+  for (iter_.Next(); iter_.Valid(); iter_.Next()) {
+    TEST_SYNC_POINT("DBIter::MergeValuesNewToOld:SteppedToNextOperand");
+    if (!ParseKey(&ikey)) {
+      return false;
+    }
+
+    if (!user_comparator_.EqualWithoutTimestamp(ikey.user_key,
+                                                saved_key_.GetUserKey())) {
+      // hit the next user key, stop right here
+      break;
+    }
+    if (kTypeDeletion == ikey.type || kTypeSingleDeletion == ikey.type ||
+        kTypeDeletionWithTimestamp == ikey.type) {
+      // hit a delete with the same user key, stop right here
+      // iter_ is positioned after delete
+      iter_.Next();
+      break;
+    }
+    if (!iter_.PrepareValue()) {
+      valid_ = false;
+      return false;
+    }
+
+    if (kTypeValue == ikey.type) {
+      // hit a put, merge the put value with operands and store the
+      // final result in saved_value_. We are done!
+      const Slice val = iter_.value();
+      if (!Merge(&val, ikey.user_key)) {
+        return false;
+      }
+      // iter_ is positioned after put
+      iter_.Next();
+      if (!iter_.status().ok()) {
+        valid_ = false;
+        return false;
+      }
+      return true;
+    } else if (kTypeMerge == ikey.type) {
+      // hit a merge, add the value as an operand and run associative merge.
+      // when complete, add result to operands and continue.
+      merge_context_.PushOperand(
+          iter_.value(), iter_.iter()->IsValuePinned() /* operand_pinned */);
+      PERF_COUNTER_ADD(internal_merge_count, 1);
+    } else if (kTypeBlobIndex == ikey.type) {
+      if (expose_blob_index_) {
+        status_ =
+            Status::NotSupported("BlobDB does not support merge operator.");
+        valid_ = false;
+        return false;
+      }
+      // hit a put, merge the put value with operands and store the
+      // final result in saved_value_. We are done!
+      if (!SetBlobValueIfNeeded(ikey.user_key, iter_.value())) {
+        return false;
+      }
+      valid_ = true;
+      if (!Merge(&blob_value_, ikey.user_key)) {
+        return false;
+      }
+
+      ResetBlobValue();
+
+      // iter_ is positioned after put
+      iter_.Next();
+      if (!iter_.status().ok()) {
+        valid_ = false;
+        return false;
+      }
+      return true;
+    } else if (kTypeWideColumnEntity == ikey.type) {
+      if (!MergeEntity(iter_.value(), ikey.user_key)) {
+        return false;
+      }
+
+      // iter_ is positioned after put
+      iter_.Next();
+      if (!iter_.status().ok()) {
+        valid_ = false;
+        return false;
+      }
+
+      return true;
+    } else {
+      valid_ = false;
+      status_ = Status::Corruption(
+          "Unrecognized value type: " +
+          std::to_string(static_cast<unsigned int>(ikey.type)));
+      return false;
+    }
+  }
+
+  if (!iter_.status().ok()) {
+    valid_ = false;
+    return false;
+  }
+
+  // we either exhausted all internal keys under this user key, or hit
+  // a deletion marker.
+  // feed null as the existing value to the merge operator, such that
+  // client can differentiate this scenario and do things accordingly.
+  if (!Merge(nullptr, saved_key_.GetUserKey())) {
+    return false;
+  }
+  assert(status_.ok());
+  return true;
+}
+
+void DBIter::Prev() {
+  assert(valid_);
+  assert(status_.ok());
+
+  PERF_COUNTER_ADD(iter_prev_count, 1);
+  PERF_CPU_TIMER_GUARD(iter_prev_cpu_nanos, clock_);
+  ReleaseTempPinnedData();
+  ResetBlobValue();
+  ResetValueAndColumns();
+  ResetInternalKeysSkippedCounter();
+  bool ok = true;
+  if (direction_ == kForward) {
+    if (!ReverseToBackward()) {
+      ok = false;
+    }
+  }
+  if (ok) {
+    ClearSavedValue();
+
+    Slice prefix;
+    if (prefix_same_as_start_) {
+      assert(prefix_extractor_ != nullptr);
+      prefix = prefix_.GetUserKey();
+    }
+    PrevInternal(prefix_same_as_start_ ? &prefix : nullptr);
+  }
+
+  if (statistics_ != nullptr) {
+    local_stats_.prev_count_++;
+    if (valid_) {
+      local_stats_.prev_found_count_++;
+      local_stats_.bytes_read_ += (key().size() + value().size());
+    }
+  }
+}
+
+bool DBIter::ReverseToForward() {
+  assert(iter_.status().ok());
+
+  // When moving backwards, iter_ is positioned on _previous_ key, which may
+  // not exist or may have different prefix than the current key().
+  // If that's the case, seek iter_ to current key.
+  if (!expect_total_order_inner_iter() || !iter_.Valid()) {
+    IterKey last_key;
+    ParsedInternalKey pikey(saved_key_.GetUserKey(), kMaxSequenceNumber,
+                            kValueTypeForSeek);
+    if (timestamp_size_ > 0) {
+      // TODO: pre-create kTsMax.
+      const std::string kTsMax(timestamp_size_, '\xff');
+      pikey.SetTimestamp(kTsMax);
+    }
+    last_key.SetInternalKey(pikey);
+    iter_.Seek(last_key.GetInternalKey());
+    RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION);
+  }
+
+  direction_ = kForward;
+  // Skip keys less than the current key() (a.k.a. saved_key_).
+  while (iter_.Valid()) {
+    ParsedInternalKey ikey;
+    if (!ParseKey(&ikey)) {
+      return false;
+    }
+    if (user_comparator_.Compare(ikey.user_key, saved_key_.GetUserKey()) >= 0) {
+      return true;
+    }
+    iter_.Next();
+  }
+
+  if (!iter_.status().ok()) {
+    valid_ = false;
+    return false;
+  }
+
+  return true;
+}
+
+// Move iter_ to the key before saved_key_.
+bool DBIter::ReverseToBackward() {
+  assert(iter_.status().ok());
+
+  // When current_entry_is_merged_ is true, iter_ may be positioned on the next
+  // key, which may not exist or may have prefix different from current.
+  // If that's the case, seek to saved_key_.
+  if (current_entry_is_merged_ &&
+      (!expect_total_order_inner_iter() || !iter_.Valid())) {
+    IterKey last_key;
+    // Using kMaxSequenceNumber and kValueTypeForSeek
+    // (not kValueTypeForSeekForPrev) to seek to a key strictly smaller
+    // than saved_key_.
+    last_key.SetInternalKey(ParsedInternalKey(
+        saved_key_.GetUserKey(), kMaxSequenceNumber, kValueTypeForSeek));
+    if (!expect_total_order_inner_iter()) {
+      iter_.SeekForPrev(last_key.GetInternalKey());
+    } else {
+      // Some iterators may not support SeekForPrev(), so we avoid using it
+      // when prefix seek mode is disabled. This is somewhat expensive
+      // (an extra Prev(), as well as an extra change of direction of iter_),
+      // so we may need to reconsider it later.
+      iter_.Seek(last_key.GetInternalKey());
+      if (!iter_.Valid() && iter_.status().ok()) {
+        iter_.SeekToLast();
+      }
+    }
+    RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION);
+  }
+
+  direction_ = kReverse;
+  return FindUserKeyBeforeSavedKey();
+}
+
+void DBIter::PrevInternal(const Slice* prefix) {
+  while (iter_.Valid()) {
+    saved_key_.SetUserKey(
+        ExtractUserKey(iter_.key()),
+        !iter_.iter()->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
+
+    assert(prefix == nullptr || prefix_extractor_ != nullptr);
+    if (prefix != nullptr &&
+        prefix_extractor_
+                ->Transform(StripTimestampFromUserKey(saved_key_.GetUserKey(),
+                                                      timestamp_size_))
+                .compare(*prefix) != 0) {
+      assert(prefix_same_as_start_);
+      // Current key does not have the same prefix as start
+      valid_ = false;
+      return;
+    }
+
+    assert(iterate_lower_bound_ == nullptr || iter_.MayBeOutOfLowerBound() ||
+           user_comparator_.CompareWithoutTimestamp(
+               saved_key_.GetUserKey(), /*a_has_ts=*/true,
+               *iterate_lower_bound_, /*b_has_ts=*/false) >= 0);
+    if (iterate_lower_bound_ != nullptr && iter_.MayBeOutOfLowerBound() &&
+        user_comparator_.CompareWithoutTimestamp(
+            saved_key_.GetUserKey(), /*a_has_ts=*/true, *iterate_lower_bound_,
+            /*b_has_ts=*/false) < 0) {
+      // We've iterated earlier than the user-specified lower bound.
+      valid_ = false;
+      return;
+    }
+
+    if (!FindValueForCurrentKey()) {  // assigns valid_
+      return;
+    }
+
+    // Whether or not we found a value for current key, we need iter_ to end up
+    // on a smaller key.
+    if (!FindUserKeyBeforeSavedKey()) {
+      return;
+    }
+
+    if (valid_) {
+      // Found the value.
+      return;
+    }
+
+    if (TooManyInternalKeysSkipped(false)) {
+      return;
+    }
+  }
+
+  // We haven't found any key - iterator is not valid
+  valid_ = false;
+}
+
+// Used for backwards iteration.
+// Looks at the entries with user key saved_key_ and finds the most up-to-date
+// value for it, or executes a merge, or determines that the value was deleted.
+// Sets valid_ to true if the value is found and is ready to be presented to
+// the user through value().
+// Sets valid_ to false if the value was deleted, and we should try another key.
+// Returns false if an error occurred, and !status().ok() and !valid_.
+//
+// PRE: iter_ is positioned on the last entry with user key equal to saved_key_.
+// POST: iter_ is positioned on one of the entries equal to saved_key_, or on
+//       the entry just before them, or on the entry just after them.
+bool DBIter::FindValueForCurrentKey() {
+  assert(iter_.Valid());
+  merge_context_.Clear();
+  current_entry_is_merged_ = false;
+  // last entry before merge (could be kTypeDeletion,
+  // kTypeDeletionWithTimestamp, kTypeSingleDeletion, kTypeValue,
+  // kTypeBlobIndex, or kTypeWideColumnEntity)
+  ValueType last_not_merge_type = kTypeDeletion;
+  ValueType last_key_entry_type = kTypeDeletion;
+
+  // If false, it indicates that we have not seen any valid entry, even though
+  // last_key_entry_type is initialized to kTypeDeletion.
+  bool valid_entry_seen = false;
+
+  // Temporarily pin blocks that hold (merge operands / the value)
+  ReleaseTempPinnedData();
+  TempPinData();
+  size_t num_skipped = 0;
+  while (iter_.Valid()) {
+    ParsedInternalKey ikey;
+    if (!ParseKey(&ikey)) {
+      return false;
+    }
+
+    if (!user_comparator_.EqualWithoutTimestamp(ikey.user_key,
+                                                saved_key_.GetUserKey())) {
+      // Found a smaller user key, thus we are done with current user key.
+      break;
+    }
+
+    assert(ikey.user_key.size() >= timestamp_size_);
+    Slice ts;
+    if (timestamp_size_ > 0) {
+      ts = Slice(ikey.user_key.data() + ikey.user_key.size() - timestamp_size_,
+                 timestamp_size_);
+    }
+
+    bool visible = IsVisible(ikey.sequence, ts);
+    if (!visible &&
+        (timestamp_lb_ == nullptr ||
+         user_comparator_.CompareTimestamp(ts, *timestamp_ub_) > 0)) {
+      // Found an invisible version of the current user key, and it must have
+      // a higher sequence number or timestamp. Therefore, we are done with the
+      // current user key.
+      break;
+    }
+
+    if (!ts.empty()) {
+      saved_timestamp_.assign(ts.data(), ts.size());
+    }
+
+    if (TooManyInternalKeysSkipped()) {
+      return false;
+    }
+
+    // This user key has lots of entries.
+    // We're going from old to new, and it's taking too long. Let's do a Seek()
+    // and go from new to old. This helps when a key was overwritten many times.
+    if (num_skipped >= max_skip_) {
+      return FindValueForCurrentKeyUsingSeek();
+    }
+
+    if (!iter_.PrepareValue()) {
+      valid_ = false;
+      return false;
+    }
+
+    if (timestamp_lb_ != nullptr) {
+      // Only needed when timestamp_lb_ is not null
+      [[maybe_unused]] const bool ret = ParseKey(&ikey_);
+      // Since the preceding ParseKey(&ikey) succeeds, so must this.
+      assert(ret);
+      saved_key_.SetInternalKey(ikey);
+    } else if (user_comparator_.Compare(ikey.user_key,
+                                        saved_key_.GetUserKey()) < 0) {
+      saved_key_.SetUserKey(
+          ikey.user_key,
+          !pin_thru_lifetime_ || !iter_.iter()->IsKeyPinned() /* copy */);
+    }
+
+    valid_entry_seen = true;
+    last_key_entry_type = ikey.type;
+    switch (last_key_entry_type) {
+      case kTypeValue:
+      case kTypeBlobIndex:
+      case kTypeWideColumnEntity:
+        if (iter_.iter()->IsValuePinned()) {
+          pinned_value_ = iter_.value();
+        } else {
+          valid_ = false;
+          status_ = Status::NotSupported(
+              "Backward iteration not supported if underlying iterator's value "
+              "cannot be pinned.");
+        }
+        merge_context_.Clear();
+        last_not_merge_type = last_key_entry_type;
+        if (!status_.ok()) {
+          return false;
+        }
+        break;
+      case kTypeDeletion:
+      case kTypeDeletionWithTimestamp:
+      case kTypeSingleDeletion:
+        merge_context_.Clear();
+        last_not_merge_type = last_key_entry_type;
+        PERF_COUNTER_ADD(internal_delete_skipped_count, 1);
+        break;
+      case kTypeMerge: {
+        assert(merge_operator_ != nullptr);
+        merge_context_.PushOperandBack(
+            iter_.value(), iter_.iter()->IsValuePinned() /* operand_pinned */);
+        PERF_COUNTER_ADD(internal_merge_count, 1);
+      } break;
+      default:
+        valid_ = false;
+        status_ = Status::Corruption(
+            "Unknown value type: " +
+            std::to_string(static_cast<unsigned int>(last_key_entry_type)));
+        return false;
+    }
+
+    PERF_COUNTER_ADD(internal_key_skipped_count, 1);
+    iter_.Prev();
+    ++num_skipped;
+
+    if (visible && timestamp_lb_ != nullptr) {
+      // If timestamp_lb_ is not nullptr, we do not have to look further for
+      // another internal key. We can return this current internal key. Yet we
+      // still keep the invariant that iter_ is positioned before the returned
+      // key.
+      break;
+    }
+  }
+
+  if (!iter_.status().ok()) {
+    valid_ = false;
+    return false;
+  }
+
+  if (!valid_entry_seen) {
+    // Since we haven't seen any valid entry, last_key_entry_type remains
+    // unchanged and the same as its initial value.
+    assert(last_key_entry_type == kTypeDeletion);
+    assert(last_not_merge_type == kTypeDeletion);
+    valid_ = false;
+    return true;
+  }
+
+  if (timestamp_lb_ != nullptr) {
+    assert(last_key_entry_type == ikey_.type);
+  }
+
+  switch (last_key_entry_type) {
+    case kTypeDeletion:
+    case kTypeDeletionWithTimestamp:
+    case kTypeSingleDeletion:
+      if (timestamp_lb_ == nullptr) {
+        valid_ = false;
+      } else {
+        valid_ = true;
+      }
+      return true;
+    case kTypeMerge:
+      current_entry_is_merged_ = true;
+      if (last_not_merge_type == kTypeDeletion ||
+          last_not_merge_type == kTypeSingleDeletion ||
+          last_not_merge_type == kTypeDeletionWithTimestamp) {
+        if (!Merge(nullptr, saved_key_.GetUserKey())) {
+          return false;
+        }
+        return true;
+      } else if (last_not_merge_type == kTypeBlobIndex) {
+        if (expose_blob_index_) {
+          status_ =
+              Status::NotSupported("BlobDB does not support merge operator.");
+          valid_ = false;
+          return false;
+        }
+        if (!SetBlobValueIfNeeded(saved_key_.GetUserKey(), pinned_value_)) {
+          return false;
+        }
+        valid_ = true;
+        if (!Merge(&blob_value_, saved_key_.GetUserKey())) {
+          return false;
+        }
+
+        ResetBlobValue();
+
+        return true;
+      } else if (last_not_merge_type == kTypeWideColumnEntity) {
+        if (!MergeEntity(pinned_value_, saved_key_.GetUserKey())) {
+          return false;
+        }
+
+        return true;
+      } else {
+        assert(last_not_merge_type == kTypeValue);
+        if (!Merge(&pinned_value_, saved_key_.GetUserKey())) {
+          return false;
+        }
+        return true;
+      }
+      break;
+    case kTypeValue:
+      SetValueAndColumnsFromPlain(pinned_value_);
+
+      break;
+    case kTypeBlobIndex:
+      if (!SetBlobValueIfNeeded(saved_key_.GetUserKey(), pinned_value_)) {
+        return false;
+      }
+
+      SetValueAndColumnsFromPlain(expose_blob_index_ ? pinned_value_
+                                                     : blob_value_);
+
+      break;
+    case kTypeWideColumnEntity:
+      if (!SetValueAndColumnsFromEntity(pinned_value_)) {
+        return false;
+      }
+      break;
+    default:
+      valid_ = false;
+      status_ = Status::Corruption(
+          "Unknown value type: " +
+          std::to_string(static_cast<unsigned int>(last_key_entry_type)));
+      return false;
+  }
+  valid_ = true;
+  return true;
+}
+
+// This function is used in FindValueForCurrentKey.
+// We use Seek() function instead of Prev() to find necessary value
+// TODO: This is very similar to FindNextUserEntry() and MergeValuesNewToOld().
+//       Would be nice to reuse some code.
+bool DBIter::FindValueForCurrentKeyUsingSeek() {
+  // FindValueForCurrentKey will enable pinning before calling
+  // FindValueForCurrentKeyUsingSeek()
+  assert(pinned_iters_mgr_.PinningEnabled());
+  std::string last_key;
+  if (0 == timestamp_size_) {
+    AppendInternalKey(&last_key,
+                      ParsedInternalKey(saved_key_.GetUserKey(), sequence_,
+                                        kValueTypeForSeek));
+  } else {
+    AppendInternalKeyWithDifferentTimestamp(
+        &last_key,
+        ParsedInternalKey(saved_key_.GetUserKey(), sequence_,
+                          kValueTypeForSeek),
+        timestamp_lb_ == nullptr ? *timestamp_ub_ : *timestamp_lb_);
+  }
+  iter_.Seek(last_key);
+  RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION);
+
+  // In case read_callback presents, the value we seek to may not be visible.
+  // Find the next value that's visible.
+  ParsedInternalKey ikey;
+
+  while (true) {
+    if (!iter_.Valid()) {
+      valid_ = false;
+      return iter_.status().ok();
+    }
+
+    if (!ParseKey(&ikey)) {
+      return false;
+    }
+    assert(ikey.user_key.size() >= timestamp_size_);
+    Slice ts;
+    if (timestamp_size_ > 0) {
+      ts = Slice(ikey.user_key.data() + ikey.user_key.size() - timestamp_size_,
+                 timestamp_size_);
+    }
+
+    if (!user_comparator_.EqualWithoutTimestamp(ikey.user_key,
+                                                saved_key_.GetUserKey())) {
+      // No visible values for this key, even though FindValueForCurrentKey()
+      // has seen some. This is possible if we're using a tailing iterator, and
+      // the entries were discarded in a compaction.
+      valid_ = false;
+      return true;
+    }
+
+    if (IsVisible(ikey.sequence, ts)) {
+      break;
+    }
+
+    iter_.Next();
+  }
+
+  if (ikey.type == kTypeDeletion || ikey.type == kTypeSingleDeletion ||
+      kTypeDeletionWithTimestamp == ikey.type) {
+    if (timestamp_lb_ == nullptr) {
+      valid_ = false;
+    } else {
+      valid_ = true;
+      saved_key_.SetInternalKey(ikey);
+    }
+    return true;
+  }
+  if (!iter_.PrepareValue()) {
+    valid_ = false;
+    return false;
+  }
+  if (timestamp_size_ > 0) {
+    Slice ts = ExtractTimestampFromUserKey(ikey.user_key, timestamp_size_);
+    saved_timestamp_.assign(ts.data(), ts.size());
+  }
+  if (ikey.type == kTypeValue || ikey.type == kTypeBlobIndex ||
+      ikey.type == kTypeWideColumnEntity) {
+    assert(iter_.iter()->IsValuePinned());
+    pinned_value_ = iter_.value();
+    if (ikey.type == kTypeBlobIndex) {
+      if (!SetBlobValueIfNeeded(ikey.user_key, pinned_value_)) {
+        return false;
+      }
+
+      SetValueAndColumnsFromPlain(expose_blob_index_ ? pinned_value_
+                                                     : blob_value_);
+    } else if (ikey.type == kTypeWideColumnEntity) {
+      if (!SetValueAndColumnsFromEntity(pinned_value_)) {
+        return false;
+      }
+    } else {
+      assert(ikey.type == kTypeValue);
+      SetValueAndColumnsFromPlain(pinned_value_);
+    }
+
+    if (timestamp_lb_ != nullptr) {
+      saved_key_.SetInternalKey(ikey);
+    }
+
+    valid_ = true;
+    return true;
+  }
+
+  // kTypeMerge. We need to collect all kTypeMerge values and save them
+  // in operands
+  assert(ikey.type == kTypeMerge);
+  current_entry_is_merged_ = true;
+  merge_context_.Clear();
+  merge_context_.PushOperand(
+      iter_.value(), iter_.iter()->IsValuePinned() /* operand_pinned */);
+  PERF_COUNTER_ADD(internal_merge_count, 1);
+
+  while (true) {
+    iter_.Next();
+
+    if (!iter_.Valid()) {
+      if (!iter_.status().ok()) {
+        valid_ = false;
+        return false;
+      }
+      break;
+    }
+    if (!ParseKey(&ikey)) {
+      return false;
+    }
+    if (!user_comparator_.EqualWithoutTimestamp(ikey.user_key,
+                                                saved_key_.GetUserKey())) {
+      break;
+    }
+    if (ikey.type == kTypeDeletion || ikey.type == kTypeSingleDeletion ||
+        ikey.type == kTypeDeletionWithTimestamp) {
+      break;
+    }
+    if (!iter_.PrepareValue()) {
+      valid_ = false;
+      return false;
+    }
+
+    if (ikey.type == kTypeValue) {
+      const Slice val = iter_.value();
+      if (!Merge(&val, saved_key_.GetUserKey())) {
+        return false;
+      }
+      return true;
+    } else if (ikey.type == kTypeMerge) {
+      merge_context_.PushOperand(
+          iter_.value(), iter_.iter()->IsValuePinned() /* operand_pinned */);
+      PERF_COUNTER_ADD(internal_merge_count, 1);
+    } else if (ikey.type == kTypeBlobIndex) {
+      if (expose_blob_index_) {
+        status_ =
+            Status::NotSupported("BlobDB does not support merge operator.");
+        valid_ = false;
+        return false;
+      }
+      if (!SetBlobValueIfNeeded(ikey.user_key, iter_.value())) {
+        return false;
+      }
+      valid_ = true;
+      if (!Merge(&blob_value_, saved_key_.GetUserKey())) {
+        return false;
+      }
+
+      ResetBlobValue();
+
+      return true;
+    } else if (ikey.type == kTypeWideColumnEntity) {
+      if (!MergeEntity(iter_.value(), saved_key_.GetUserKey())) {
+        return false;
+      }
+
+      return true;
+    } else {
+      valid_ = false;
+      status_ = Status::Corruption(
+          "Unknown value type: " +
+          std::to_string(static_cast<unsigned int>(ikey.type)));
+      return false;
+    }
+  }
+
+  if (!Merge(nullptr, saved_key_.GetUserKey())) {
+    return false;
+  }
+
+  // Make sure we leave iter_ in a good state. If it's valid and we don't care
+  // about prefixes, that's already good enough. Otherwise it needs to be
+  // seeked to the current key.
+  if (!expect_total_order_inner_iter() || !iter_.Valid()) {
+    if (!expect_total_order_inner_iter()) {
+      iter_.SeekForPrev(last_key);
+    } else {
+      iter_.Seek(last_key);
+      if (!iter_.Valid() && iter_.status().ok()) {
+        iter_.SeekToLast();
+      }
+    }
+    RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION);
+  }
+
+  valid_ = true;
+  return true;
+}
+
+bool DBIter::Merge(const Slice* val, const Slice& user_key) {
+  // `op_failure_scope` (an output parameter) is not provided (set to nullptr)
+  // since a failure must be propagated regardless of its value.
+  Status s = MergeHelper::TimedFullMerge(
+      merge_operator_, user_key, val, merge_context_.GetOperands(),
+      &saved_value_, logger_, statistics_, clock_, &pinned_value_,
+      /* update_num_ops_stats */ true,
+      /* op_failure_scope */ nullptr);
+  if (!s.ok()) {
+    valid_ = false;
+    status_ = s;
+    return false;
+  }
+
+  SetValueAndColumnsFromPlain(pinned_value_.data() ? pinned_value_
+                                                   : saved_value_);
+
+  valid_ = true;
+  return true;
+}
+
+bool DBIter::MergeEntity(const Slice& entity, const Slice& user_key) {
+  // `op_failure_scope` (an output parameter) is not provided (set to nullptr)
+  // since a failure must be propagated regardless of its value.
+  Status s = MergeHelper::TimedFullMergeWithEntity(
+      merge_operator_, user_key, entity, merge_context_.GetOperands(),
+      &saved_value_, logger_, statistics_, clock_,
+      /* update_num_ops_stats */ true,
+      /* op_failure_scope */ nullptr);
+  if (!s.ok()) {
+    valid_ = false;
+    status_ = s;
+    return false;
+  }
+
+  if (!SetValueAndColumnsFromEntity(saved_value_)) {
+    return false;
+  }
+
+  valid_ = true;
+  return true;
+}
+
+// Move backwards until the key smaller than saved_key_.
+// Changes valid_ only if return value is false.
+bool DBIter::FindUserKeyBeforeSavedKey() {
+  assert(status_.ok());
+  size_t num_skipped = 0;
+  while (iter_.Valid()) {
+    ParsedInternalKey ikey;
+    if (!ParseKey(&ikey)) {
+      return false;
+    }
+
+    if (CompareKeyForSkip(ikey.user_key, saved_key_.GetUserKey()) < 0) {
+      return true;
+    }
+
+    if (TooManyInternalKeysSkipped()) {
+      return false;
+    }
+
+    assert(ikey.sequence != kMaxSequenceNumber);
+    assert(ikey.user_key.size() >= timestamp_size_);
+    Slice ts;
+    if (timestamp_size_ > 0) {
+      ts = Slice(ikey.user_key.data() + ikey.user_key.size() - timestamp_size_,
+                 timestamp_size_);
+    }
+    if (!IsVisible(ikey.sequence, ts)) {
+      PERF_COUNTER_ADD(internal_recent_skipped_count, 1);
+    } else {
+      PERF_COUNTER_ADD(internal_key_skipped_count, 1);
+    }
+
+    if (num_skipped >= max_skip_) {
+      num_skipped = 0;
+      IterKey last_key;
+      ParsedInternalKey pikey(saved_key_.GetUserKey(), kMaxSequenceNumber,
+                              kValueTypeForSeek);
+      if (timestamp_size_ > 0) {
+        // TODO: pre-create kTsMax.
+        const std::string kTsMax(timestamp_size_, '\xff');
+        pikey.SetTimestamp(kTsMax);
+      }
+      last_key.SetInternalKey(pikey);
+      // It would be more efficient to use SeekForPrev() here, but some
+      // iterators may not support it.
+      iter_.Seek(last_key.GetInternalKey());
+      RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION);
+      if (!iter_.Valid()) {
+        break;
+      }
+    } else {
+      ++num_skipped;
+    }
+
+    iter_.Prev();
+  }
+
+  if (!iter_.status().ok()) {
+    valid_ = false;
+    return false;
+  }
+
+  return true;
+}
+
+bool DBIter::TooManyInternalKeysSkipped(bool increment) {
+  if ((max_skippable_internal_keys_ > 0) &&
+      (num_internal_keys_skipped_ > max_skippable_internal_keys_)) {
+    valid_ = false;
+    status_ = Status::Incomplete("Too many internal keys skipped.");
+    return true;
+  } else if (increment) {
+    num_internal_keys_skipped_++;
+  }
+  return false;
+}
+
+bool DBIter::IsVisible(SequenceNumber sequence, const Slice& ts,
+                       bool* more_recent) {
+  // Remember that comparator orders preceding timestamp as larger.
+  // TODO(yanqin): support timestamp in read_callback_.
+  bool visible_by_seq = (read_callback_ == nullptr)
+                            ? sequence <= sequence_
+                            : read_callback_->IsVisible(sequence);
+
+  bool visible_by_ts =
+      (timestamp_ub_ == nullptr ||
+       user_comparator_.CompareTimestamp(ts, *timestamp_ub_) <= 0) &&
+      (timestamp_lb_ == nullptr ||
+       user_comparator_.CompareTimestamp(ts, *timestamp_lb_) >= 0);
+
+  if (more_recent) {
+    *more_recent = !visible_by_seq;
+  }
+  return visible_by_seq && visible_by_ts;
+}
+
+void DBIter::SetSavedKeyToSeekTarget(const Slice& target) {
+  is_key_seqnum_zero_ = false;
+  SequenceNumber seq = sequence_;
+  saved_key_.Clear();
+  saved_key_.SetInternalKey(target, seq, kValueTypeForSeek, timestamp_ub_);
+
+  if (iterate_lower_bound_ != nullptr &&
+      user_comparator_.CompareWithoutTimestamp(
+          saved_key_.GetUserKey(), /*a_has_ts=*/true, *iterate_lower_bound_,
+          /*b_has_ts=*/false) < 0) {
+    // Seek key is smaller than the lower bound.
+    saved_key_.Clear();
+    saved_key_.SetInternalKey(*iterate_lower_bound_, seq, kValueTypeForSeek,
+                              timestamp_ub_);
+  }
+}
+
+void DBIter::SetSavedKeyToSeekForPrevTarget(const Slice& target) {
+  is_key_seqnum_zero_ = false;
+  saved_key_.Clear();
+  // now saved_key is used to store internal key.
+  saved_key_.SetInternalKey(target, 0 /* sequence_number */,
+                            kValueTypeForSeekForPrev, timestamp_ub_);
+
+  if (timestamp_size_ > 0) {
+    const std::string kTsMin(timestamp_size_, '\0');
+    Slice ts = kTsMin;
+    saved_key_.UpdateInternalKey(
+        /*seq=*/0, kValueTypeForSeekForPrev,
+        timestamp_lb_ == nullptr ? &ts : timestamp_lb_);
+  }
+
+  if (iterate_upper_bound_ != nullptr &&
+      user_comparator_.CompareWithoutTimestamp(
+          saved_key_.GetUserKey(), /*a_has_ts=*/true, *iterate_upper_bound_,
+          /*b_has_ts=*/false) >= 0) {
+    saved_key_.Clear();
+    saved_key_.SetInternalKey(*iterate_upper_bound_, kMaxSequenceNumber,
+                              kValueTypeForSeekForPrev, timestamp_ub_);
+    if (timestamp_size_ > 0) {
+      const std::string kTsMax(timestamp_size_, '\xff');
+      Slice ts = kTsMax;
+      saved_key_.UpdateInternalKey(kMaxSequenceNumber, kValueTypeForSeekForPrev,
+                                   &ts);
+    }
+  }
+}
+
+void DBIter::Seek(const Slice& target) {
+  PERF_COUNTER_ADD(iter_seek_count, 1);
+  PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
+  StopWatch sw(clock_, statistics_, DB_SEEK);
+
+  if (db_impl_ != nullptr && cfd_ != nullptr) {
+    // TODO: What do we do if this returns an error?
+    Slice lower_bound, upper_bound;
+    if (iterate_lower_bound_ != nullptr) {
+      lower_bound = *iterate_lower_bound_;
+    } else {
+      lower_bound = Slice("");
+    }
+    if (iterate_upper_bound_ != nullptr) {
+      upper_bound = *iterate_upper_bound_;
+    } else {
+      upper_bound = Slice("");
+    }
+    db_impl_->TraceIteratorSeek(cfd_->GetID(), target, lower_bound, upper_bound)
+        .PermitUncheckedError();
+  }
+
+  status_ = Status::OK();
+  ReleaseTempPinnedData();
+  ResetBlobValue();
+  ResetValueAndColumns();
+  ResetInternalKeysSkippedCounter();
+
+  // Seek the inner iterator based on the target key.
+  {
+    PERF_TIMER_GUARD(seek_internal_seek_time);
+
+    SetSavedKeyToSeekTarget(target);
+    iter_.Seek(saved_key_.GetInternalKey());
+
+    RecordTick(statistics_, NUMBER_DB_SEEK);
+  }
+  if (!iter_.Valid()) {
+    valid_ = false;
+    return;
+  }
+  direction_ = kForward;
+
+  // Now the inner iterator is placed to the target position. From there,
+  // we need to find out the next key that is visible to the user.
+  ClearSavedValue();
+  if (prefix_same_as_start_) {
+    // The case where the iterator needs to be invalidated if it has exhausted
+    // keys within the same prefix of the seek key.
+    assert(prefix_extractor_ != nullptr);
+    Slice target_prefix = prefix_extractor_->Transform(target);
+    FindNextUserEntry(false /* not skipping saved_key */,
+                      &target_prefix /* prefix */);
+    if (valid_) {
+      // Remember the prefix of the seek key for the future Next() call to
+      // check.
+      prefix_.SetUserKey(target_prefix);
+    }
+  } else {
+    FindNextUserEntry(false /* not skipping saved_key */, nullptr);
+  }
+  if (!valid_) {
+    return;
+  }
+
+  // Updating stats and perf context counters.
+  if (statistics_ != nullptr) {
+    // Decrement since we don't want to count this key as skipped
+    RecordTick(statistics_, NUMBER_DB_SEEK_FOUND);
+    RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
+  }
+  PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size());
+}
+
+void DBIter::SeekForPrev(const Slice& target) {
+  PERF_COUNTER_ADD(iter_seek_count, 1);
+  PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
+  StopWatch sw(clock_, statistics_, DB_SEEK);
+
+  if (db_impl_ != nullptr && cfd_ != nullptr) {
+    // TODO: What do we do if this returns an error?
+    Slice lower_bound, upper_bound;
+    if (iterate_lower_bound_ != nullptr) {
+      lower_bound = *iterate_lower_bound_;
+    } else {
+      lower_bound = Slice("");
+    }
+    if (iterate_upper_bound_ != nullptr) {
+      upper_bound = *iterate_upper_bound_;
+    } else {
+      upper_bound = Slice("");
+    }
+    db_impl_
+        ->TraceIteratorSeekForPrev(cfd_->GetID(), target, lower_bound,
+                                   upper_bound)
+        .PermitUncheckedError();
+  }
+
+  status_ = Status::OK();
+  ReleaseTempPinnedData();
+  ResetBlobValue();
+  ResetValueAndColumns();
+  ResetInternalKeysSkippedCounter();
+
+  // Seek the inner iterator based on the target key.
+  {
+    PERF_TIMER_GUARD(seek_internal_seek_time);
+    SetSavedKeyToSeekForPrevTarget(target);
+    iter_.SeekForPrev(saved_key_.GetInternalKey());
+    RecordTick(statistics_, NUMBER_DB_SEEK);
+  }
+  if (!iter_.Valid()) {
+    valid_ = false;
+    return;
+  }
+  direction_ = kReverse;
+
+  // Now the inner iterator is placed to the target position. From there,
+  // we need to find out the first key that is visible to the user in the
+  // backward direction.
+  ClearSavedValue();
+  if (prefix_same_as_start_) {
+    // The case where the iterator needs to be invalidated if it has exhausted
+    // keys within the same prefix of the seek key.
+    assert(prefix_extractor_ != nullptr);
+    Slice target_prefix = prefix_extractor_->Transform(target);
+    PrevInternal(&target_prefix);
+    if (valid_) {
+      // Remember the prefix of the seek key for the future Prev() call to
+      // check.
+      prefix_.SetUserKey(target_prefix);
+    }
+  } else {
+    PrevInternal(nullptr);
+  }
+
+  // Report stats and perf context.
+  if (statistics_ != nullptr && valid_) {
+    RecordTick(statistics_, NUMBER_DB_SEEK_FOUND);
+    RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
+    PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size());
+  }
+}
+
+void DBIter::SeekToFirst() {
+  if (iterate_lower_bound_ != nullptr) {
+    Seek(*iterate_lower_bound_);
+    return;
+  }
+  PERF_COUNTER_ADD(iter_seek_count, 1);
+  PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
+  // Don't use iter_::Seek() if we set a prefix extractor
+  // because prefix seek will be used.
+  if (!expect_total_order_inner_iter()) {
+    max_skip_ = std::numeric_limits<uint64_t>::max();
+  }
+  status_ = Status::OK();
+  // if iterator is empty, this status_ could be unchecked.
+  status_.PermitUncheckedError();
+  direction_ = kForward;
+  ReleaseTempPinnedData();
+  ResetBlobValue();
+  ResetValueAndColumns();
+  ResetInternalKeysSkippedCounter();
+  ClearSavedValue();
+  is_key_seqnum_zero_ = false;
+
+  {
+    PERF_TIMER_GUARD(seek_internal_seek_time);
+    iter_.SeekToFirst();
+  }
+
+  RecordTick(statistics_, NUMBER_DB_SEEK);
+  if (iter_.Valid()) {
+    saved_key_.SetUserKey(
+        ExtractUserKey(iter_.key()),
+        !iter_.iter()->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
+    FindNextUserEntry(false /* not skipping saved_key */,
+                      nullptr /* no prefix check */);
+    if (statistics_ != nullptr) {
+      if (valid_) {
+        RecordTick(statistics_, NUMBER_DB_SEEK_FOUND);
+        RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
+        PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size());
+      }
+    }
+  } else {
+    valid_ = false;
+  }
+  if (valid_ && prefix_same_as_start_) {
+    assert(prefix_extractor_ != nullptr);
+    prefix_.SetUserKey(prefix_extractor_->Transform(
+        StripTimestampFromUserKey(saved_key_.GetUserKey(), timestamp_size_)));
+  }
+}
+
+void DBIter::SeekToLast() {
+  if (iterate_upper_bound_ != nullptr) {
+    // Seek to last key strictly less than ReadOptions.iterate_upper_bound.
+    SeekForPrev(*iterate_upper_bound_);
+#ifndef NDEBUG
+    Slice k = Valid() ? key() : Slice();
+    if (Valid() && timestamp_size_ > 0 && timestamp_lb_) {
+      k.remove_suffix(kNumInternalBytes + timestamp_size_);
+    }
+    assert(!Valid() || user_comparator_.CompareWithoutTimestamp(
+                           k, /*a_has_ts=*/false, *iterate_upper_bound_,
+                           /*b_has_ts=*/false) < 0);
+#endif
+    return;
+  }
+
+  PERF_COUNTER_ADD(iter_seek_count, 1);
+  PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
+  // Don't use iter_::Seek() if we set a prefix extractor
+  // because prefix seek will be used.
+  if (!expect_total_order_inner_iter()) {
+    max_skip_ = std::numeric_limits<uint64_t>::max();
+  }
+  status_ = Status::OK();
+  // if iterator is empty, this status_ could be unchecked.
+  status_.PermitUncheckedError();
+  direction_ = kReverse;
+  ReleaseTempPinnedData();
+  ResetBlobValue();
+  ResetValueAndColumns();
+  ResetInternalKeysSkippedCounter();
+  ClearSavedValue();
+  is_key_seqnum_zero_ = false;
+
+  {
+    PERF_TIMER_GUARD(seek_internal_seek_time);
+    iter_.SeekToLast();
+  }
+  PrevInternal(nullptr);
+  if (statistics_ != nullptr) {
+    RecordTick(statistics_, NUMBER_DB_SEEK);
+    if (valid_) {
+      RecordTick(statistics_, NUMBER_DB_SEEK_FOUND);
+      RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
+      PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size());
+    }
+  }
+  if (valid_ && prefix_same_as_start_) {
+    assert(prefix_extractor_ != nullptr);
+    prefix_.SetUserKey(prefix_extractor_->Transform(
+        StripTimestampFromUserKey(saved_key_.GetUserKey(), timestamp_size_)));
+  }
+}
+
+Iterator* NewDBIterator(Env* env, const ReadOptions& read_options,
+                        const ImmutableOptions& ioptions,
+                        const MutableCFOptions& mutable_cf_options,
+                        const Comparator* user_key_comparator,
+                        InternalIterator* internal_iter, const Version* version,
+                        const SequenceNumber& sequence,
+                        uint64_t max_sequential_skip_in_iterations,
+                        ReadCallback* read_callback, DBImpl* db_impl,
+                        ColumnFamilyData* cfd, bool expose_blob_index) {
+  DBIter* db_iter =
+      new DBIter(env, read_options, ioptions, mutable_cf_options,
+                 user_key_comparator, internal_iter, version, sequence, false,
+                 max_sequential_skip_in_iterations, read_callback, db_impl, cfd,
+                 expose_blob_index);
+  return db_iter;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_iter.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_iter.h
new file mode 100644
index 0000000000..e1663bb7ed
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_iter.h
@@ -0,0 +1,411 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <cstdint>
+#include <string>
+
+#include "db/db_impl/db_impl.h"
+#include "db/range_del_aggregator.h"
+#include "memory/arena.h"
+#include "options/cf_options.h"
+#include "rocksdb/db.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/wide_columns.h"
+#include "table/iterator_wrapper.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Version;
+
+// This file declares the factory functions of DBIter, in its original form
+// or a wrapped form with class ArenaWrappedDBIter, which is defined here.
+// Class DBIter, which is declared and implemented inside db_iter.cc, is
+// an iterator that converts internal keys (yielded by an InternalIterator)
+// that were live at the specified sequence number into appropriate user
+// keys.
+// Each internal key consists of a user key, a sequence number, and a value
+// type. DBIter deals with multiple key versions, tombstones, merge operands,
+// etc, and exposes an Iterator.
+// For example, DBIter may wrap following InternalIterator:
+//    user key: AAA  value: v3   seqno: 100    type: Put
+//    user key: AAA  value: v2   seqno: 97     type: Put
+//    user key: AAA  value: v1   seqno: 95     type: Put
+//    user key: BBB  value: v1   seqno: 90     type: Put
+//    user key: BBC  value: N/A  seqno: 98     type: Delete
+//    user key: BBC  value: v1   seqno: 95     type: Put
+// If the snapshot passed in is 102, then the DBIter is expected to
+// expose the following iterator:
+//    key: AAA  value: v3
+//    key: BBB  value: v1
+// If the snapshot passed in is 96, then it should expose:
+//    key: AAA  value: v1
+//    key: BBB  value: v1
+//    key: BBC  value: v1
+//
+
+// Memtables and sstables that make the DB representation contain
+// (userkey,seq,type) => uservalue entries.  DBIter
+// combines multiple entries for the same userkey found in the DB
+// representation into a single entry while accounting for sequence
+// numbers, deletion markers, overwrites, etc.
+class DBIter final : public Iterator {
+ public:
+  // The following is grossly complicated. TODO: clean it up
+  // Which direction is the iterator currently moving?
+  // (1) When moving forward:
+  //   (1a) if current_entry_is_merged_ = false, the internal iterator is
+  //        positioned at the exact entry that yields this->key(), this->value()
+  //   (1b) if current_entry_is_merged_ = true, the internal iterator is
+  //        positioned immediately after the last entry that contributed to the
+  //        current this->value(). That entry may or may not have key equal to
+  //        this->key().
+  // (2) When moving backwards, the internal iterator is positioned
+  //     just before all entries whose user key == this->key().
+  enum Direction : uint8_t { kForward, kReverse };
+
+  // LocalStatistics contain Statistics counters that will be aggregated per
+  // each iterator instance and then will be sent to the global statistics when
+  // the iterator is destroyed.
+  //
+  // The purpose of this approach is to avoid perf regression happening
+  // when multiple threads bump the atomic counters from a DBIter::Next().
+  struct LocalStatistics {
+    explicit LocalStatistics() { ResetCounters(); }
+
+    void ResetCounters() {
+      next_count_ = 0;
+      next_found_count_ = 0;
+      prev_count_ = 0;
+      prev_found_count_ = 0;
+      bytes_read_ = 0;
+      skip_count_ = 0;
+    }
+
+    void BumpGlobalStatistics(Statistics* global_statistics) {
+      RecordTick(global_statistics, NUMBER_DB_NEXT, next_count_);
+      RecordTick(global_statistics, NUMBER_DB_NEXT_FOUND, next_found_count_);
+      RecordTick(global_statistics, NUMBER_DB_PREV, prev_count_);
+      RecordTick(global_statistics, NUMBER_DB_PREV_FOUND, prev_found_count_);
+      RecordTick(global_statistics, ITER_BYTES_READ, bytes_read_);
+      RecordTick(global_statistics, NUMBER_ITER_SKIP, skip_count_);
+      PERF_COUNTER_ADD(iter_read_bytes, bytes_read_);
+      ResetCounters();
+    }
+
+    // Map to Tickers::NUMBER_DB_NEXT
+    uint64_t next_count_;
+    // Map to Tickers::NUMBER_DB_NEXT_FOUND
+    uint64_t next_found_count_;
+    // Map to Tickers::NUMBER_DB_PREV
+    uint64_t prev_count_;
+    // Map to Tickers::NUMBER_DB_PREV_FOUND
+    uint64_t prev_found_count_;
+    // Map to Tickers::ITER_BYTES_READ
+    uint64_t bytes_read_;
+    // Map to Tickers::NUMBER_ITER_SKIP
+    uint64_t skip_count_;
+  };
+
+  DBIter(Env* _env, const ReadOptions& read_options,
+         const ImmutableOptions& ioptions,
+         const MutableCFOptions& mutable_cf_options, const Comparator* cmp,
+         InternalIterator* iter, const Version* version, SequenceNumber s,
+         bool arena_mode, uint64_t max_sequential_skip_in_iterations,
+         ReadCallback* read_callback, DBImpl* db_impl, ColumnFamilyData* cfd,
+         bool expose_blob_index);
+
+  // No copying allowed
+  DBIter(const DBIter&) = delete;
+  void operator=(const DBIter&) = delete;
+
+  ~DBIter() override {
+    // Release pinned data if any
+    if (pinned_iters_mgr_.PinningEnabled()) {
+      pinned_iters_mgr_.ReleasePinnedData();
+    }
+    RecordTick(statistics_, NO_ITERATOR_DELETED);
+    ResetInternalKeysSkippedCounter();
+    local_stats_.BumpGlobalStatistics(statistics_);
+    iter_.DeleteIter(arena_mode_);
+  }
+  void SetIter(InternalIterator* iter) {
+    assert(iter_.iter() == nullptr);
+    iter_.Set(iter);
+    iter_.iter()->SetPinnedItersMgr(&pinned_iters_mgr_);
+  }
+
+  bool Valid() const override {
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+    if (valid_) {
+      status_.PermitUncheckedError();
+    }
+#endif  // ROCKSDB_ASSERT_STATUS_CHECKED
+    return valid_;
+  }
+  Slice key() const override {
+    assert(valid_);
+    if (timestamp_lb_) {
+      return saved_key_.GetInternalKey();
+    } else {
+      const Slice ukey_and_ts = saved_key_.GetUserKey();
+      return Slice(ukey_and_ts.data(), ukey_and_ts.size() - timestamp_size_);
+    }
+  }
+  Slice value() const override {
+    assert(valid_);
+
+    return value_;
+  }
+
+  const WideColumns& columns() const override {
+    assert(valid_);
+
+    return wide_columns_;
+  }
+
+  Status status() const override {
+    if (status_.ok()) {
+      return iter_.status();
+    } else {
+      assert(!valid_);
+      return status_;
+    }
+  }
+  Slice timestamp() const override {
+    assert(valid_);
+    assert(timestamp_size_ > 0);
+    if (direction_ == kReverse) {
+      return saved_timestamp_;
+    }
+    const Slice ukey_and_ts = saved_key_.GetUserKey();
+    assert(timestamp_size_ < ukey_and_ts.size());
+    return ExtractTimestampFromUserKey(ukey_and_ts, timestamp_size_);
+  }
+  bool IsBlob() const {
+    assert(valid_);
+    return is_blob_;
+  }
+
+  Status GetProperty(std::string prop_name, std::string* prop) override;
+
+  void Next() final override;
+  void Prev() final override;
+  // 'target' does not contain timestamp, even if user timestamp feature is
+  // enabled.
+  void Seek(const Slice& target) final override;
+  void SeekForPrev(const Slice& target) final override;
+  void SeekToFirst() final override;
+  void SeekToLast() final override;
+  Env* env() const { return env_; }
+  void set_sequence(uint64_t s) {
+    sequence_ = s;
+    if (read_callback_) {
+      read_callback_->Refresh(s);
+    }
+  }
+  void set_valid(bool v) { valid_ = v; }
+
+ private:
+  // For all methods in this block:
+  // PRE: iter_->Valid() && status_.ok()
+  // Return false if there was an error, and status() is non-ok, valid_ = false;
+  // in this case callers would usually stop what they were doing and return.
+  bool ReverseToForward();
+  bool ReverseToBackward();
+  // Set saved_key_ to the seek key to target, with proper sequence number set.
+  // It might get adjusted if the seek key is smaller than iterator lower bound.
+  // target does not have timestamp.
+  void SetSavedKeyToSeekTarget(const Slice& target);
+  // Set saved_key_ to the seek key to target, with proper sequence number set.
+  // It might get adjusted if the seek key is larger than iterator upper bound.
+  // target does not have timestamp.
+  void SetSavedKeyToSeekForPrevTarget(const Slice& target);
+  bool FindValueForCurrentKey();
+  bool FindValueForCurrentKeyUsingSeek();
+  bool FindUserKeyBeforeSavedKey();
+  // If `skipping_saved_key` is true, the function will keep iterating until it
+  // finds a user key that is larger than `saved_key_`.
+  // If `prefix` is not null, the iterator needs to stop when all keys for the
+  // prefix are exhausted and the iterator is set to invalid.
+  bool FindNextUserEntry(bool skipping_saved_key, const Slice* prefix);
+  // Internal implementation of FindNextUserEntry().
+  bool FindNextUserEntryInternal(bool skipping_saved_key, const Slice* prefix);
+  bool ParseKey(ParsedInternalKey* key);
+  bool MergeValuesNewToOld();
+
+  // If prefix is not null, we need to set the iterator to invalid if no more
+  // entry can be found within the prefix.
+  void PrevInternal(const Slice* prefix);
+  bool TooManyInternalKeysSkipped(bool increment = true);
+  bool IsVisible(SequenceNumber sequence, const Slice& ts,
+                 bool* more_recent = nullptr);
+
+  // Temporarily pin the blocks that we encounter until ReleaseTempPinnedData()
+  // is called
+  void TempPinData() {
+    if (!pin_thru_lifetime_) {
+      pinned_iters_mgr_.StartPinning();
+    }
+  }
+
+  // Release blocks pinned by TempPinData()
+  void ReleaseTempPinnedData() {
+    if (!pin_thru_lifetime_ && pinned_iters_mgr_.PinningEnabled()) {
+      pinned_iters_mgr_.ReleasePinnedData();
+    }
+  }
+
+  inline void ClearSavedValue() {
+    if (saved_value_.capacity() > 1048576) {
+      std::string empty;
+      swap(empty, saved_value_);
+    } else {
+      saved_value_.clear();
+    }
+  }
+
+  inline void ResetInternalKeysSkippedCounter() {
+    local_stats_.skip_count_ += num_internal_keys_skipped_;
+    if (valid_) {
+      local_stats_.skip_count_--;
+    }
+    num_internal_keys_skipped_ = 0;
+  }
+
+  bool expect_total_order_inner_iter() {
+    assert(expect_total_order_inner_iter_ || prefix_extractor_ != nullptr);
+    return expect_total_order_inner_iter_;
+  }
+
+  // If lower bound of timestamp is given by ReadOptions.iter_start_ts, we need
+  // to return versions of the same key. We cannot just skip if the key value
+  // is the same but timestamps are different but fall in timestamp range.
+  inline int CompareKeyForSkip(const Slice& a, const Slice& b) {
+    return timestamp_lb_ != nullptr
+               ? user_comparator_.Compare(a, b)
+               : user_comparator_.CompareWithoutTimestamp(a, b);
+  }
+
+  // Retrieves the blob value for the specified user key using the given blob
+  // index when using the integrated BlobDB implementation.
+  bool SetBlobValueIfNeeded(const Slice& user_key, const Slice& blob_index);
+
+  void ResetBlobValue() {
+    is_blob_ = false;
+    blob_value_.Reset();
+  }
+
+  void SetValueAndColumnsFromPlain(const Slice& slice) {
+    assert(value_.empty());
+    assert(wide_columns_.empty());
+
+    value_ = slice;
+    wide_columns_.emplace_back(kDefaultWideColumnName, slice);
+  }
+
+  bool SetValueAndColumnsFromEntity(Slice slice);
+
+  void ResetValueAndColumns() {
+    value_.clear();
+    wide_columns_.clear();
+  }
+
+  // If user-defined timestamp is enabled, `user_key` includes timestamp.
+  bool Merge(const Slice* val, const Slice& user_key);
+  bool MergeEntity(const Slice& entity, const Slice& user_key);
+
+  const SliceTransform* prefix_extractor_;
+  Env* const env_;
+  SystemClock* clock_;
+  Logger* logger_;
+  UserComparatorWrapper user_comparator_;
+  const MergeOperator* const merge_operator_;
+  IteratorWrapper iter_;
+  const Version* version_;
+  ReadCallback* read_callback_;
+  // Max visible sequence number. It is normally the snapshot seq unless we have
+  // uncommitted data in db as in WriteUnCommitted.
+  SequenceNumber sequence_;
+
+  IterKey saved_key_;
+  // Reusable internal key data structure. This is only used inside one function
+  // and should not be used across functions. Reusing this object can reduce
+  // overhead of calling construction of the function if creating it each time.
+  ParsedInternalKey ikey_;
+  std::string saved_value_;
+  Slice pinned_value_;
+  // for prefix seek mode to support prev()
+  PinnableSlice blob_value_;
+  // Value of the default column
+  Slice value_;
+  // All columns (i.e. name-value pairs)
+  WideColumns wide_columns_;
+  Statistics* statistics_;
+  uint64_t max_skip_;
+  uint64_t max_skippable_internal_keys_;
+  uint64_t num_internal_keys_skipped_;
+  const Slice* iterate_lower_bound_;
+  const Slice* iterate_upper_bound_;
+
+  // The prefix of the seek key. It is only used when prefix_same_as_start_
+  // is true and prefix extractor is not null. In Next() or Prev(), current keys
+  // will be checked against this prefix, so that the iterator can be
+  // invalidated if the keys in this prefix has been exhausted. Set it using
+  // SetUserKey() and use it using GetUserKey().
+  IterKey prefix_;
+
+  Status status_;
+  Direction direction_;
+  bool valid_;
+  bool current_entry_is_merged_;
+  // True if we know that the current entry's seqnum is 0.
+  // This information is used as that the next entry will be for another
+  // user key.
+  bool is_key_seqnum_zero_;
+  const bool prefix_same_as_start_;
+  // Means that we will pin all data blocks we read as long the Iterator
+  // is not deleted, will be true if ReadOptions::pin_data is true
+  const bool pin_thru_lifetime_;
+  // Expect the inner iterator to maintain a total order.
+  // prefix_extractor_ must be non-NULL if the value is false.
+  const bool expect_total_order_inner_iter_;
+  ReadTier read_tier_;
+  bool fill_cache_;
+  bool verify_checksums_;
+  // Whether the iterator is allowed to expose blob references. Set to true when
+  // the stacked BlobDB implementation is used, false otherwise.
+  bool expose_blob_index_;
+  bool is_blob_;
+  bool arena_mode_;
+  // List of operands for merge operator.
+  MergeContext merge_context_;
+  LocalStatistics local_stats_;
+  PinnedIteratorsManager pinned_iters_mgr_;
+  DBImpl* db_impl_;
+  ColumnFamilyData* cfd_;
+  const Slice* const timestamp_ub_;
+  const Slice* const timestamp_lb_;
+  const size_t timestamp_size_;
+  std::string saved_timestamp_;
+};
+
+// Return a new iterator that converts internal keys (yielded by
+// "*internal_iter") that were live at the specified `sequence` number
+// into appropriate user keys.
+extern Iterator* NewDBIterator(
+    Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions,
+    const MutableCFOptions& mutable_cf_options,
+    const Comparator* user_key_comparator, InternalIterator* internal_iter,
+    const Version* version, const SequenceNumber& sequence,
+    uint64_t max_sequential_skip_in_iterations, ReadCallback* read_callback,
+    DBImpl* db_impl = nullptr, ColumnFamilyData* cfd = nullptr,
+    bool expose_blob_index = false);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_test_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_test_util.h
new file mode 100644
index 0000000000..5b35c9907b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_test_util.h
@@ -0,0 +1,1340 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <fcntl.h>
+
+#include <algorithm>
+#include <cinttypes>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <thread>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "db/db_impl/db_impl.h"
+#include "file/filename.h"
+#include "rocksdb/advanced_options.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/io_status.h"
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/sst_file_writer.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/table.h"
+#include "rocksdb/utilities/checkpoint.h"
+#include "table/mock_table.h"
+#include "table/scoped_arena_iterator.h"
+#include "test_util/sync_point.h"
+#include "test_util/testharness.h"
+#include "util/cast_util.h"
+#include "util/compression.h"
+#include "util/mutexlock.h"
+#include "util/string_util.h"
+#include "utilities/merge_operators.h"
+
+// In case defined by Windows headers
+#undef small
+
+namespace ROCKSDB_NAMESPACE {
+class MockEnv;
+
+namespace anon {
+class AtomicCounter {
+ public:
+  explicit AtomicCounter(Env* env = NULL)
+      : env_(env), cond_count_(&mu_), count_(0) {}
+
+  void Increment() {
+    MutexLock l(&mu_);
+    count_++;
+    cond_count_.SignalAll();
+  }
+
+  int Read() {
+    MutexLock l(&mu_);
+    return count_;
+  }
+
+  bool WaitFor(int count) {
+    MutexLock l(&mu_);
+
+    uint64_t start = env_->NowMicros();
+    while (count_ < count) {
+      uint64_t now = env_->NowMicros();
+      cond_count_.TimedWait(now + /*1s*/ 1 * 1000 * 1000);
+      if (env_->NowMicros() - start > /*10s*/ 10 * 1000 * 1000) {
+        return false;
+      }
+      if (count_ < count) {
+        GTEST_LOG_(WARNING) << "WaitFor is taking more time than usual";
+      }
+    }
+
+    return true;
+  }
+
+  void Reset() {
+    MutexLock l(&mu_);
+    count_ = 0;
+    cond_count_.SignalAll();
+  }
+
+ private:
+  Env* env_;
+  port::Mutex mu_;
+  port::CondVar cond_count_;
+  int count_;
+};
+
+struct OptionsOverride {
+  std::shared_ptr<const FilterPolicy> filter_policy = nullptr;
+  // These will be used only if filter_policy is set
+  bool partition_filters = false;
+  // Force using a default block cache. (Setting to false allows ASAN build
+  // use a trivially small block cache for better UAF error detection.)
+  bool full_block_cache = false;
+  uint64_t metadata_block_size = 1024;
+
+  // Used as a bit mask of individual enums in which to skip an XF test point
+  int skip_policy = 0;
+};
+
+}  // namespace anon
+
+enum SkipPolicy { kSkipNone = 0, kSkipNoSnapshot = 1, kSkipNoPrefix = 2 };
+
+// Special Env used to delay background operations
+class SpecialEnv : public EnvWrapper {
+ public:
+  explicit SpecialEnv(Env* base, bool time_elapse_only_sleep = false);
+
+  static const char* kClassName() { return "SpecialEnv"; }
+  const char* Name() const override { return kClassName(); }
+
+  Status NewWritableFile(const std::string& f, std::unique_ptr<WritableFile>* r,
+                         const EnvOptions& soptions) override {
+    class SSTableFile : public WritableFile {
+     private:
+      SpecialEnv* env_;
+      std::unique_ptr<WritableFile> base_;
+
+     public:
+      SSTableFile(SpecialEnv* env, std::unique_ptr<WritableFile>&& base)
+          : env_(env), base_(std::move(base)) {}
+      Status Append(const Slice& data) override {
+        if (env_->table_write_callback_) {
+          (*env_->table_write_callback_)();
+        }
+        if (env_->drop_writes_.load(std::memory_order_acquire)) {
+          // Drop writes on the floor
+          return Status::OK();
+        } else if (env_->no_space_.load(std::memory_order_acquire)) {
+          return Status::NoSpace("No space left on device");
+        } else {
+          env_->bytes_written_ += data.size();
+          return base_->Append(data);
+        }
+      }
+      Status Append(
+          const Slice& data,
+          const DataVerificationInfo& /* verification_info */) override {
+        return Append(data);
+      }
+      Status PositionedAppend(const Slice& data, uint64_t offset) override {
+        if (env_->table_write_callback_) {
+          (*env_->table_write_callback_)();
+        }
+        if (env_->drop_writes_.load(std::memory_order_acquire)) {
+          // Drop writes on the floor
+          return Status::OK();
+        } else if (env_->no_space_.load(std::memory_order_acquire)) {
+          return Status::NoSpace("No space left on device");
+        } else {
+          env_->bytes_written_ += data.size();
+          return base_->PositionedAppend(data, offset);
+        }
+      }
+      Status PositionedAppend(
+          const Slice& data, uint64_t offset,
+          const DataVerificationInfo& /* verification_info */) override {
+        return PositionedAppend(data, offset);
+      }
+      Status Truncate(uint64_t size) override { return base_->Truncate(size); }
+      Status RangeSync(uint64_t offset, uint64_t nbytes) override {
+        Status s = base_->RangeSync(offset, nbytes);
+#if !(defined NDEBUG) || !defined(OS_WIN)
+        TEST_SYNC_POINT_CALLBACK("SpecialEnv::SStableFile::RangeSync", &s);
+#endif  // !(defined NDEBUG) || !defined(OS_WIN)
+        return s;
+      }
+      Status Close() override {
+// SyncPoint is not supported in Released Windows Mode.
+#if !(defined NDEBUG) || !defined(OS_WIN)
+        // Check preallocation size
+        // preallocation size is never passed to base file.
+        size_t preallocation_size = preallocation_block_size();
+        TEST_SYNC_POINT_CALLBACK("DBTestWritableFile.GetPreallocationStatus",
+                                 &preallocation_size);
+#endif  // !(defined NDEBUG) || !defined(OS_WIN)
+        Status s = base_->Close();
+#if !(defined NDEBUG) || !defined(OS_WIN)
+        TEST_SYNC_POINT_CALLBACK("SpecialEnv::SStableFile::Close", &s);
+#endif  // !(defined NDEBUG) || !defined(OS_WIN)
+        return s;
+      }
+      Status Flush() override { return base_->Flush(); }
+      Status Sync() override {
+        ++env_->sync_counter_;
+        while (env_->delay_sstable_sync_.load(std::memory_order_acquire)) {
+          env_->SleepForMicroseconds(100000);
+        }
+        Status s;
+        if (!env_->skip_fsync_) {
+          s = base_->Sync();
+        }
+#if !(defined NDEBUG) || !defined(OS_WIN)
+        TEST_SYNC_POINT_CALLBACK("SpecialEnv::SStableFile::Sync", &s);
+#endif  // !(defined NDEBUG) || !defined(OS_WIN)
+        return s;
+      }
+      void SetIOPriority(Env::IOPriority pri) override {
+        base_->SetIOPriority(pri);
+      }
+      Env::IOPriority GetIOPriority() override {
+        return base_->GetIOPriority();
+      }
+      bool use_direct_io() const override { return base_->use_direct_io(); }
+      Status Allocate(uint64_t offset, uint64_t len) override {
+        return base_->Allocate(offset, len);
+      }
+      size_t GetUniqueId(char* id, size_t max_size) const override {
+        return base_->GetUniqueId(id, max_size);
+      }
+    };
+    class ManifestFile : public WritableFile {
+     public:
+      ManifestFile(SpecialEnv* env, std::unique_ptr<WritableFile>&& b)
+          : env_(env), base_(std::move(b)) {}
+      Status Append(const Slice& data) override {
+        if (env_->manifest_write_error_.load(std::memory_order_acquire)) {
+          return Status::IOError("simulated writer error");
+        } else {
+          return base_->Append(data);
+        }
+      }
+      Status Append(
+          const Slice& data,
+          const DataVerificationInfo& /*verification_info*/) override {
+        return Append(data);
+      }
+
+      Status Truncate(uint64_t size) override { return base_->Truncate(size); }
+      Status Close() override { return base_->Close(); }
+      Status Flush() override { return base_->Flush(); }
+      Status Sync() override {
+        ++env_->sync_counter_;
+        if (env_->manifest_sync_error_.load(std::memory_order_acquire)) {
+          return Status::IOError("simulated sync error");
+        } else {
+          if (env_->skip_fsync_) {
+            return Status::OK();
+          } else {
+            return base_->Sync();
+          }
+        }
+      }
+      uint64_t GetFileSize() override { return base_->GetFileSize(); }
+      Status Allocate(uint64_t offset, uint64_t len) override {
+        return base_->Allocate(offset, len);
+      }
+
+     private:
+      SpecialEnv* env_;
+      std::unique_ptr<WritableFile> base_;
+    };
+    class WalFile : public WritableFile {
+     public:
+      WalFile(SpecialEnv* env, std::unique_ptr<WritableFile>&& b)
+          : env_(env), base_(std::move(b)) {
+        env_->num_open_wal_file_.fetch_add(1);
+      }
+      virtual ~WalFile() { env_->num_open_wal_file_.fetch_add(-1); }
+      Status Append(const Slice& data) override {
+#if !(defined NDEBUG) || !defined(OS_WIN)
+        TEST_SYNC_POINT("SpecialEnv::WalFile::Append:1");
+#endif
+        Status s;
+        if (env_->log_write_error_.load(std::memory_order_acquire)) {
+          s = Status::IOError("simulated writer error");
+        } else {
+          int slowdown =
+              env_->log_write_slowdown_.load(std::memory_order_acquire);
+          if (slowdown > 0) {
+            env_->SleepForMicroseconds(slowdown);
+          }
+          s = base_->Append(data);
+        }
+#if !(defined NDEBUG) || !defined(OS_WIN)
+        TEST_SYNC_POINT("SpecialEnv::WalFile::Append:2");
+#endif
+        return s;
+      }
+      Status Append(
+          const Slice& data,
+          const DataVerificationInfo& /* verification_info */) override {
+        return Append(data);
+      }
+      Status Truncate(uint64_t size) override { return base_->Truncate(size); }
+      void PrepareWrite(size_t offset, size_t len) override {
+        base_->PrepareWrite(offset, len);
+      }
+      void SetPreallocationBlockSize(size_t size) override {
+        base_->SetPreallocationBlockSize(size);
+      }
+      Status Close() override {
+// SyncPoint is not supported in Released Windows Mode.
+#if !(defined NDEBUG) || !defined(OS_WIN)
+        // Check preallocation size
+        size_t block_size, last_allocated_block;
+        base_->GetPreallocationStatus(&block_size, &last_allocated_block);
+        TEST_SYNC_POINT_CALLBACK("DBTestWalFile.GetPreallocationStatus",
+                                 &block_size);
+#endif  // !(defined NDEBUG) || !defined(OS_WIN)
+
+        return base_->Close();
+      }
+      Status Flush() override { return base_->Flush(); }
+      Status Sync() override {
+        ++env_->sync_counter_;
+        if (env_->corrupt_in_sync_) {
+          EXPECT_OK(Append(std::string(33000, ' ')));
+          return Status::IOError("Ingested Sync Failure");
+        }
+        if (env_->skip_fsync_) {
+          return Status::OK();
+        } else {
+          return base_->Sync();
+        }
+      }
+      bool IsSyncThreadSafe() const override {
+        return env_->is_wal_sync_thread_safe_.load();
+      }
+      Status Allocate(uint64_t offset, uint64_t len) override {
+        return base_->Allocate(offset, len);
+      }
+
+     private:
+      SpecialEnv* env_;
+      std::unique_ptr<WritableFile> base_;
+    };
+    class OtherFile : public WritableFile {
+     public:
+      OtherFile(SpecialEnv* env, std::unique_ptr<WritableFile>&& b)
+          : env_(env), base_(std::move(b)) {}
+      Status Append(const Slice& data) override { return base_->Append(data); }
+      Status Append(
+          const Slice& data,
+          const DataVerificationInfo& /*verification_info*/) override {
+        return Append(data);
+      }
+      Status Truncate(uint64_t size) override { return base_->Truncate(size); }
+      Status Close() override { return base_->Close(); }
+      Status Flush() override { return base_->Flush(); }
+      Status Sync() override {
+        if (env_->skip_fsync_) {
+          return Status::OK();
+        } else {
+          return base_->Sync();
+        }
+      }
+      uint64_t GetFileSize() override { return base_->GetFileSize(); }
+      Status Allocate(uint64_t offset, uint64_t len) override {
+        return base_->Allocate(offset, len);
+      }
+
+     private:
+      SpecialEnv* env_;
+      std::unique_ptr<WritableFile> base_;
+    };
+
+    if (no_file_overwrite_.load(std::memory_order_acquire) &&
+        target()->FileExists(f).ok()) {
+      return Status::NotSupported("SpecialEnv::no_file_overwrite_ is true.");
+    }
+
+    if (non_writeable_rate_.load(std::memory_order_acquire) > 0) {
+      uint32_t random_number;
+      {
+        MutexLock l(&rnd_mutex_);
+        random_number = rnd_.Uniform(100);
+      }
+      if (random_number < non_writeable_rate_.load()) {
+        return Status::IOError("simulated random write error");
+      }
+    }
+
+    new_writable_count_++;
+
+    if (non_writable_count_.load() > 0) {
+      non_writable_count_--;
+      return Status::IOError("simulated write error");
+    }
+
+    EnvOptions optimized = soptions;
+    if (strstr(f.c_str(), "MANIFEST") != nullptr ||
+        strstr(f.c_str(), "log") != nullptr) {
+      optimized.use_mmap_writes = false;
+      optimized.use_direct_writes = false;
+    }
+
+    Status s = target()->NewWritableFile(f, r, optimized);
+    if (s.ok()) {
+      if (strstr(f.c_str(), ".sst") != nullptr) {
+        r->reset(new SSTableFile(this, std::move(*r)));
+      } else if (strstr(f.c_str(), "MANIFEST") != nullptr) {
+        r->reset(new ManifestFile(this, std::move(*r)));
+      } else if (strstr(f.c_str(), "log") != nullptr) {
+        r->reset(new WalFile(this, std::move(*r)));
+      } else {
+        r->reset(new OtherFile(this, std::move(*r)));
+      }
+    }
+    return s;
+  }
+
+  Status NewRandomAccessFile(const std::string& f,
+                             std::unique_ptr<RandomAccessFile>* r,
+                             const EnvOptions& soptions) override {
+    class CountingFile : public RandomAccessFile {
+     public:
+      CountingFile(std::unique_ptr<RandomAccessFile>&& target,
+                   anon::AtomicCounter* counter,
+                   std::atomic<size_t>* bytes_read)
+          : target_(std::move(target)),
+            counter_(counter),
+            bytes_read_(bytes_read) {}
+      virtual Status Read(uint64_t offset, size_t n, Slice* result,
+                          char* scratch) const override {
+        counter_->Increment();
+        Status s = target_->Read(offset, n, result, scratch);
+        *bytes_read_ += result->size();
+        return s;
+      }
+
+      virtual Status Prefetch(uint64_t offset, size_t n) override {
+        Status s = target_->Prefetch(offset, n);
+        *bytes_read_ += n;
+        return s;
+      }
+
+     private:
+      std::unique_ptr<RandomAccessFile> target_;
+      anon::AtomicCounter* counter_;
+      std::atomic<size_t>* bytes_read_;
+    };
+
+    class RandomFailureFile : public RandomAccessFile {
+     public:
+      RandomFailureFile(std::unique_ptr<RandomAccessFile>&& target,
+                        std::atomic<uint64_t>* failure_cnt, uint32_t fail_odd)
+          : target_(std::move(target)),
+            fail_cnt_(failure_cnt),
+            fail_odd_(fail_odd) {}
+      virtual Status Read(uint64_t offset, size_t n, Slice* result,
+                          char* scratch) const override {
+        if (Random::GetTLSInstance()->OneIn(fail_odd_)) {
+          fail_cnt_->fetch_add(1);
+          return Status::IOError("random error");
+        }
+        return target_->Read(offset, n, result, scratch);
+      }
+
+      virtual Status Prefetch(uint64_t offset, size_t n) override {
+        return target_->Prefetch(offset, n);
+      }
+
+     private:
+      std::unique_ptr<RandomAccessFile> target_;
+      std::atomic<uint64_t>* fail_cnt_;
+      uint32_t fail_odd_;
+    };
+
+    Status s = target()->NewRandomAccessFile(f, r, soptions);
+    random_file_open_counter_++;
+    if (s.ok()) {
+      if (count_random_reads_) {
+        r->reset(new CountingFile(std::move(*r), &random_read_counter_,
+                                  &random_read_bytes_counter_));
+      } else if (rand_reads_fail_odd_ > 0) {
+        r->reset(new RandomFailureFile(std::move(*r), &num_reads_fails_,
+                                       rand_reads_fail_odd_));
+      }
+    }
+
+    if (s.ok() && soptions.compaction_readahead_size > 0) {
+      compaction_readahead_size_ = soptions.compaction_readahead_size;
+    }
+    return s;
+  }
+
+  virtual Status NewSequentialFile(const std::string& f,
+                                   std::unique_ptr<SequentialFile>* r,
+                                   const EnvOptions& soptions) override {
+    class CountingFile : public SequentialFile {
+     public:
+      CountingFile(std::unique_ptr<SequentialFile>&& target,
+                   anon::AtomicCounter* counter)
+          : target_(std::move(target)), counter_(counter) {}
+      virtual Status Read(size_t n, Slice* result, char* scratch) override {
+        counter_->Increment();
+        return target_->Read(n, result, scratch);
+      }
+      virtual Status Skip(uint64_t n) override { return target_->Skip(n); }
+
+     private:
+      std::unique_ptr<SequentialFile> target_;
+      anon::AtomicCounter* counter_;
+    };
+
+    Status s = target()->NewSequentialFile(f, r, soptions);
+    if (s.ok() && count_sequential_reads_) {
+      r->reset(new CountingFile(std::move(*r), &sequential_read_counter_));
+    }
+    return s;
+  }
+
+  virtual void SleepForMicroseconds(int micros) override {
+    sleep_counter_.Increment();
+    if (no_slowdown_ || time_elapse_only_sleep_) {
+      addon_microseconds_.fetch_add(micros);
+    }
+    if (!no_slowdown_) {
+      target()->SleepForMicroseconds(micros);
+    }
+  }
+
+  void MockSleepForMicroseconds(int64_t micros) {
+    sleep_counter_.Increment();
+    assert(no_slowdown_);
+    addon_microseconds_.fetch_add(micros);
+  }
+
+  void MockSleepForSeconds(int64_t seconds) {
+    sleep_counter_.Increment();
+    assert(no_slowdown_);
+    addon_microseconds_.fetch_add(seconds * 1000000);
+  }
+
+  virtual Status GetCurrentTime(int64_t* unix_time) override {
+    Status s;
+    if (time_elapse_only_sleep_) {
+      *unix_time = maybe_starting_time_;
+    } else {
+      s = target()->GetCurrentTime(unix_time);
+    }
+    if (s.ok()) {
+      // mock microseconds elapsed to seconds of time
+      *unix_time += addon_microseconds_.load() / 1000000;
+    }
+    return s;
+  }
+
+  virtual uint64_t NowCPUNanos() override {
+    now_cpu_count_.fetch_add(1);
+    return target()->NowCPUNanos();
+  }
+
+  virtual uint64_t NowNanos() override {
+    return (time_elapse_only_sleep_ ? 0 : target()->NowNanos()) +
+           addon_microseconds_.load() * 1000;
+  }
+
+  virtual uint64_t NowMicros() override {
+    return (time_elapse_only_sleep_ ? 0 : target()->NowMicros()) +
+           addon_microseconds_.load();
+  }
+
+  virtual Status DeleteFile(const std::string& fname) override {
+    delete_count_.fetch_add(1);
+    return target()->DeleteFile(fname);
+  }
+
+  void SetMockSleep(bool enabled = true) { no_slowdown_ = enabled; }
+
+  Status NewDirectory(const std::string& name,
+                      std::unique_ptr<Directory>* result) override {
+    if (!skip_fsync_) {
+      return target()->NewDirectory(name, result);
+    } else {
+      class NoopDirectory : public Directory {
+       public:
+        NoopDirectory() {}
+        ~NoopDirectory() {}
+
+        Status Fsync() override { return Status::OK(); }
+        Status Close() override { return Status::OK(); }
+      };
+
+      result->reset(new NoopDirectory());
+      return Status::OK();
+    }
+  }
+
+  Status RenameFile(const std::string& src, const std::string& dest) override {
+    rename_count_.fetch_add(1);
+    if (rename_error_.load(std::memory_order_acquire)) {
+      return Status::NotSupported("Simulated `RenameFile()` error.");
+    }
+    return target()->RenameFile(src, dest);
+  }
+
+  // Something to return when mocking current time
+  const int64_t maybe_starting_time_;
+
+  Random rnd_;
+  port::Mutex rnd_mutex_;  // Lock to pretect rnd_
+
+  // sstable Sync() calls are blocked while this pointer is non-nullptr.
+  std::atomic<bool> delay_sstable_sync_;
+
+  // Drop writes on the floor while this pointer is non-nullptr.
+  std::atomic<bool> drop_writes_;
+
+  // Simulate no-space errors while this pointer is non-nullptr.
+  std::atomic<bool> no_space_;
+
+  // Simulate non-writable file system while this pointer is non-nullptr
+  std::atomic<bool> non_writable_;
+
+  // Force sync of manifest files to fail while this pointer is non-nullptr
+  std::atomic<bool> manifest_sync_error_;
+
+  // Force write to manifest files to fail while this pointer is non-nullptr
+  std::atomic<bool> manifest_write_error_;
+
+  // Force write to log files to fail while this pointer is non-nullptr
+  std::atomic<bool> log_write_error_;
+
+  // Force `RenameFile()` to fail while this pointer is non-nullptr
+  std::atomic<bool> rename_error_{false};
+
+  // Slow down every log write, in micro-seconds.
+  std::atomic<int> log_write_slowdown_;
+
+  // If true, returns Status::NotSupported for file overwrite.
+  std::atomic<bool> no_file_overwrite_;
+
+  // Number of WAL files that are still open for write.
+  std::atomic<int> num_open_wal_file_;
+
+  bool count_random_reads_;
+  uint32_t rand_reads_fail_odd_ = 0;
+  std::atomic<uint64_t> num_reads_fails_;
+  anon::AtomicCounter random_read_counter_;
+  std::atomic<size_t> random_read_bytes_counter_;
+  std::atomic<int> random_file_open_counter_;
+
+  bool count_sequential_reads_;
+  anon::AtomicCounter sequential_read_counter_;
+
+  anon::AtomicCounter sleep_counter_;
+
+  std::atomic<int64_t> bytes_written_;
+
+  std::atomic<int> sync_counter_;
+
+  // If true, all fsync to files and directories are skipped.
+  bool skip_fsync_ = false;
+
+  // If true, ingest the corruption to file during sync.
+  bool corrupt_in_sync_ = false;
+
+  std::atomic<uint32_t> non_writeable_rate_;
+
+  std::atomic<uint32_t> new_writable_count_;
+
+  std::atomic<uint32_t> non_writable_count_;
+
+  std::function<void()>* table_write_callback_;
+
+  std::atomic<int> now_cpu_count_;
+
+  std::atomic<int> delete_count_;
+
+  std::atomic<int> rename_count_{0};
+
+  std::atomic<bool> is_wal_sync_thread_safe_{true};
+
+  std::atomic<size_t> compaction_readahead_size_{};
+
+ private:  // accessing these directly is prone to error
+  friend class DBTestBase;
+
+  std::atomic<int64_t> addon_microseconds_{0};
+
+  // Do not modify in the env of a running DB (could cause deadlock)
+  std::atomic<bool> time_elapse_only_sleep_;
+
+  bool no_slowdown_;
+};
+
+class FileTemperatureTestFS : public FileSystemWrapper {
+ public:
+  explicit FileTemperatureTestFS(const std::shared_ptr<FileSystem>& fs)
+      : FileSystemWrapper(fs) {}
+
+  static const char* kClassName() { return "FileTemperatureTestFS"; }
+  const char* Name() const override { return kClassName(); }
+
+  IOStatus NewSequentialFile(const std::string& fname, const FileOptions& opts,
+                             std::unique_ptr<FSSequentialFile>* result,
+                             IODebugContext* dbg) override {
+    IOStatus s = target()->NewSequentialFile(fname, opts, result, dbg);
+    uint64_t number;
+    FileType type;
+    if (ParseFileName(GetFileName(fname), &number, &type) &&
+        type == kTableFile) {
+      MutexLock lock(&mu_);
+      requested_sst_file_temperatures_.emplace_back(number, opts.temperature);
+      if (s.ok()) {
+        if (opts.temperature != Temperature::kUnknown) {
+          // Be extra picky and don't open if a wrong non-unknown temperature is
+          // provided
+          auto e = current_sst_file_temperatures_.find(number);
+          if (e != current_sst_file_temperatures_.end() &&
+              e->second != opts.temperature) {
+            result->reset();
+            return IOStatus::PathNotFound("Temperature mismatch on " + fname);
+          }
+        }
+        *result = WrapWithTemperature<FSSequentialFileOwnerWrapper>(
+            number, std::move(*result));
+      }
+    }
+    return s;
+  }
+
+  IOStatus NewRandomAccessFile(const std::string& fname,
+                               const FileOptions& opts,
+                               std::unique_ptr<FSRandomAccessFile>* result,
+                               IODebugContext* dbg) override {
+    IOStatus s = target()->NewRandomAccessFile(fname, opts, result, dbg);
+    uint64_t number;
+    FileType type;
+    if (ParseFileName(GetFileName(fname), &number, &type) &&
+        type == kTableFile) {
+      MutexLock lock(&mu_);
+      requested_sst_file_temperatures_.emplace_back(number, opts.temperature);
+      if (s.ok()) {
+        if (opts.temperature != Temperature::kUnknown) {
+          // Be extra picky and don't open if a wrong non-unknown temperature is
+          // provided
+          auto e = current_sst_file_temperatures_.find(number);
+          if (e != current_sst_file_temperatures_.end() &&
+              e->second != opts.temperature) {
+            result->reset();
+            return IOStatus::PathNotFound("Temperature mismatch on " + fname);
+          }
+        }
+        *result = WrapWithTemperature<FSRandomAccessFileOwnerWrapper>(
+            number, std::move(*result));
+      }
+    }
+    return s;
+  }
+
+  void PopRequestedSstFileTemperatures(
+      std::vector<std::pair<uint64_t, Temperature>>* out = nullptr) {
+    MutexLock lock(&mu_);
+    if (out) {
+      *out = std::move(requested_sst_file_temperatures_);
+      assert(requested_sst_file_temperatures_.empty());
+    } else {
+      requested_sst_file_temperatures_.clear();
+    }
+  }
+
+  IOStatus NewWritableFile(const std::string& fname, const FileOptions& opts,
+                           std::unique_ptr<FSWritableFile>* result,
+                           IODebugContext* dbg) override {
+    uint64_t number;
+    FileType type;
+    if (ParseFileName(GetFileName(fname), &number, &type) &&
+        type == kTableFile) {
+      MutexLock lock(&mu_);
+      current_sst_file_temperatures_[number] = opts.temperature;
+    }
+    return target()->NewWritableFile(fname, opts, result, dbg);
+  }
+
+  void CopyCurrentSstFileTemperatures(std::map<uint64_t, Temperature>* out) {
+    MutexLock lock(&mu_);
+    *out = current_sst_file_temperatures_;
+  }
+
+  void OverrideSstFileTemperature(uint64_t number, Temperature temp) {
+    MutexLock lock(&mu_);
+    current_sst_file_temperatures_[number] = temp;
+  }
+
+ protected:
+  port::Mutex mu_;
+  std::vector<std::pair<uint64_t, Temperature>>
+      requested_sst_file_temperatures_;
+  std::map<uint64_t, Temperature> current_sst_file_temperatures_;
+
+  std::string GetFileName(const std::string& fname) {
+    auto filename = fname.substr(fname.find_last_of(kFilePathSeparator) + 1);
+    // workaround only for Windows that the file path could contain both Windows
+    // FilePathSeparator and '/'
+    filename = filename.substr(filename.find_last_of('/') + 1);
+    return filename;
+  }
+
+  template <class FileOwnerWrapperT, /*inferred*/ class FileT>
+  std::unique_ptr<FileT> WrapWithTemperature(uint64_t number,
+                                             std::unique_ptr<FileT>&& t) {
+    class FileWithTemp : public FileOwnerWrapperT {
+     public:
+      FileWithTemp(FileTemperatureTestFS* fs, uint64_t number,
+                   std::unique_ptr<FileT>&& t)
+          : FileOwnerWrapperT(std::move(t)), fs_(fs), number_(number) {}
+
+      Temperature GetTemperature() const override {
+        MutexLock lock(&fs_->mu_);
+        return fs_->current_sst_file_temperatures_[number_];
+      }
+
+     private:
+      FileTemperatureTestFS* fs_;
+      uint64_t number_;
+    };
+    return std::make_unique<FileWithTemp>(this, number, std::move(t));
+  }
+};
+
+class OnFileDeletionListener : public EventListener {
+ public:
+  OnFileDeletionListener() : matched_count_(0), expected_file_name_("") {}
+  const char* Name() const override { return kClassName(); }
+  static const char* kClassName() { return "OnFileDeletionListener"; }
+
+  void SetExpectedFileName(const std::string file_name) {
+    expected_file_name_ = file_name;
+  }
+
+  void VerifyMatchedCount(size_t expected_value) {
+    ASSERT_EQ(matched_count_, expected_value);
+  }
+
+  void OnTableFileDeleted(const TableFileDeletionInfo& info) override {
+    if (expected_file_name_ != "") {
+      ASSERT_EQ(expected_file_name_, info.file_path);
+      expected_file_name_ = "";
+      matched_count_++;
+    }
+  }
+
+ private:
+  size_t matched_count_;
+  std::string expected_file_name_;
+};
+
+class FlushCounterListener : public EventListener {
+ public:
+  const char* Name() const override { return kClassName(); }
+  static const char* kClassName() { return "FlushCounterListener"; }
+  std::atomic<int> count{0};
+  std::atomic<FlushReason> expected_flush_reason{FlushReason::kOthers};
+
+  void OnFlushBegin(DB* /*db*/, const FlushJobInfo& flush_job_info) override {
+    count++;
+    ASSERT_EQ(expected_flush_reason.load(), flush_job_info.flush_reason);
+  }
+};
+
+// A test merge operator mimics put but also fails if one of merge operands is
+// "corrupted", "corrupted_try_merge", or "corrupted_must_merge".
+class TestPutOperator : public MergeOperator {
+ public:
+  virtual bool FullMergeV2(const MergeOperationInput& merge_in,
+                           MergeOperationOutput* merge_out) const override {
+    static const std::map<std::string, MergeOperator::OpFailureScope>
+        bad_operand_to_op_failure_scope = {
+            {"corrupted", MergeOperator::OpFailureScope::kDefault},
+            {"corrupted_try_merge", MergeOperator::OpFailureScope::kTryMerge},
+            {"corrupted_must_merge",
+             MergeOperator::OpFailureScope::kMustMerge}};
+    auto check_operand =
+        [](Slice operand_val,
+           MergeOperator::OpFailureScope* op_failure_scope) -> bool {
+      auto iter = bad_operand_to_op_failure_scope.find(operand_val.ToString());
+      if (iter != bad_operand_to_op_failure_scope.end()) {
+        *op_failure_scope = iter->second;
+        return false;
+      }
+      return true;
+    };
+    if (merge_in.existing_value != nullptr &&
+        !check_operand(*merge_in.existing_value,
+                       &merge_out->op_failure_scope)) {
+      return false;
+    }
+    for (auto value : merge_in.operand_list) {
+      if (!check_operand(value, &merge_out->op_failure_scope)) {
+        return false;
+      }
+    }
+    merge_out->existing_operand = merge_in.operand_list.back();
+    return true;
+  }
+
+  virtual const char* Name() const override { return "TestPutOperator"; }
+};
+
+/*
+ * A cache wrapper that tracks certain CacheEntryRole's cache charge, its
+ * peaks and increments
+ *
+ *        p0
+ *       / \   p1
+ *      /   \  /\
+ *     /     \/  \
+ *  a /       b   \
+ * peaks = {p0, p1}
+ * increments = {p1-a, p2-b}
+ */
+template <CacheEntryRole R>
+class TargetCacheChargeTrackingCache : public CacheWrapper {
+ public:
+  explicit TargetCacheChargeTrackingCache(std::shared_ptr<Cache> target);
+
+  const char* Name() const override { return "TargetCacheChargeTrackingCache"; }
+
+  Status Insert(const Slice& key, ObjectPtr value,
+                const CacheItemHelper* helper, size_t charge,
+                Handle** handle = nullptr,
+                Priority priority = Priority::LOW) override;
+
+  using Cache::Release;
+  bool Release(Handle* handle, bool erase_if_last_ref = false) override;
+
+  std::size_t GetCacheCharge() { return cur_cache_charge_; }
+
+  std::deque<std::size_t> GetChargedCachePeaks() { return cache_charge_peaks_; }
+
+  std::size_t GetChargedCacheIncrementSum() {
+    return cache_charge_increments_sum_;
+  }
+
+ private:
+  static const Cache::CacheItemHelper* kCrmHelper;
+
+  std::size_t cur_cache_charge_;
+  std::size_t cache_charge_peak_;
+  std::size_t cache_charge_increment_;
+  bool last_peak_tracked_;
+  std::deque<std::size_t> cache_charge_peaks_;
+  std::size_t cache_charge_increments_sum_;
+};
+
+class DBTestBase : public testing::Test {
+ public:
+  // Sequence of option configurations to try
+  enum OptionConfig : int {
+    kDefault = 0,
+    kBlockBasedTableWithPrefixHashIndex = 1,
+    kBlockBasedTableWithWholeKeyHashIndex = 2,
+    kPlainTableFirstBytePrefix = 3,
+    kPlainTableCappedPrefix = 4,
+    kPlainTableCappedPrefixNonMmap = 5,
+    kPlainTableAllBytesPrefix = 6,
+    kVectorRep = 7,
+    kHashLinkList = 8,
+    kMergePut = 9,
+    kFilter = 10,
+    kFullFilterWithNewTableReaderForCompactions = 11,
+    kUncompressed = 12,
+    kNumLevel_3 = 13,
+    kDBLogDir = 14,
+    kWalDirAndMmapReads = 15,
+    kManifestFileSize = 16,
+    kPerfOptions = 17,
+    kHashSkipList = 18,
+    kUniversalCompaction = 19,
+    kUniversalCompactionMultiLevel = 20,
+    kInfiniteMaxOpenFiles = 21,
+    kCRC32cChecksum = 22,
+    kFIFOCompaction = 23,
+    kOptimizeFiltersForHits = 24,
+    kRowCache = 25,
+    kRecycleLogFiles = 26,
+    kConcurrentSkipList = 27,
+    kPipelinedWrite = 28,
+    kConcurrentWALWrites = 29,
+    kDirectIO,
+    kLevelSubcompactions,
+    kBlockBasedTableWithIndexRestartInterval,
+    kBlockBasedTableWithPartitionedIndex,
+    kBlockBasedTableWithPartitionedIndexFormat4,
+    kBlockBasedTableWithLatestFormat,
+    kPartitionedFilterWithNewTableReaderForCompactions,
+    kUniversalSubcompactions,
+    kUnorderedWrite,
+    // This must be the last line
+    kEnd,
+  };
+
+ public:
+  std::string dbname_;
+  std::string alternative_wal_dir_;
+  std::string alternative_db_log_dir_;
+  MockEnv* mem_env_;
+  Env* encrypted_env_;
+  SpecialEnv* env_;
+  std::shared_ptr<Env> env_guard_;
+  DB* db_;
+  std::vector<ColumnFamilyHandle*> handles_;
+
+  int option_config_;
+  Options last_options_;
+
+  // Skip some options, as they may not be applicable to a specific test.
+  // To add more skip constants, use values 4, 8, 16, etc.
+  enum OptionSkip {
+    kNoSkip = 0,
+    kSkipDeletesFilterFirst = 1,
+    kSkipUniversalCompaction = 2,
+    kSkipMergePut = 4,
+    kSkipPlainTable = 8,
+    kSkipHashIndex = 16,
+    kSkipNoSeekToLast = 32,
+    kSkipFIFOCompaction = 128,
+    kSkipMmapReads = 256,
+  };
+
+  const int kRangeDelSkipConfigs =
+      // Plain tables do not support range deletions.
+      kSkipPlainTable |
+      // MmapReads disables the iterator pinning that RangeDelAggregator
+      // requires.
+      kSkipMmapReads;
+
+  // `env_do_fsync` decides whether the special Env would do real
+  // fsync for files and directories. Skipping fsync can speed up
+  // tests, but won't cover the exact fsync logic.
+  DBTestBase(const std::string path, bool env_do_fsync);
+
+  ~DBTestBase();
+
+  static std::string Key(int i) {
+    char buf[100];
+    snprintf(buf, sizeof(buf), "key%06d", i);
+    return std::string(buf);
+  }
+
+  static bool ShouldSkipOptions(int option_config, int skip_mask = kNoSkip);
+
+  // Switch to a fresh database with the next option configuration to
+  // test.  Return false if there are no more configurations to test.
+  bool ChangeOptions(int skip_mask = kNoSkip);
+
+  // Switch between different compaction styles.
+  bool ChangeCompactOptions();
+
+  // Switch between different WAL-realted options.
+  bool ChangeWalOptions();
+
+  // Switch between different filter policy
+  // Jump from kDefault to kFilter to kFullFilter
+  bool ChangeFilterOptions();
+
+  // Switch between different DB options for file ingestion tests.
+  bool ChangeOptionsForFileIngestionTest();
+
+  // Return the current option configuration.
+  Options CurrentOptions(const anon::OptionsOverride& options_override =
+                             anon::OptionsOverride()) const;
+
+  Options CurrentOptions(const Options& default_options,
+                         const anon::OptionsOverride& options_override =
+                             anon::OptionsOverride()) const;
+
+  Options GetDefaultOptions() const;
+
+  Options GetOptions(int option_config) const {
+    return GetOptions(option_config, GetDefaultOptions());
+  }
+
+  Options GetOptions(int option_config, const Options& default_options,
+                     const anon::OptionsOverride& options_override =
+                         anon::OptionsOverride()) const;
+
+  DBImpl* dbfull() { return static_cast_with_check<DBImpl>(db_); }
+
+  void CreateColumnFamilies(const std::vector<std::string>& cfs,
+                            const Options& options);
+
+  void CreateAndReopenWithCF(const std::vector<std::string>& cfs,
+                             const Options& options);
+
+  void ReopenWithColumnFamilies(const std::vector<std::string>& cfs,
+                                const std::vector<Options>& options);
+
+  void ReopenWithColumnFamilies(const std::vector<std::string>& cfs,
+                                const Options& options);
+
+  Status TryReopenWithColumnFamilies(const std::vector<std::string>& cfs,
+                                     const std::vector<Options>& options);
+
+  Status TryReopenWithColumnFamilies(const std::vector<std::string>& cfs,
+                                     const Options& options);
+
+  void Reopen(const Options& options);
+
+  void Close();
+
+  void DestroyAndReopen(const Options& options);
+
+  void Destroy(const Options& options, bool delete_cf_paths = false);
+
+  Status ReadOnlyReopen(const Options& options);
+
+  Status TryReopen(const Options& options);
+
+  bool IsDirectIOSupported();
+
+  bool IsMemoryMappedAccessSupported() const;
+
+  Status Flush(int cf = 0);
+
+  Status Flush(const std::vector<int>& cf_ids);
+
+  Status Put(const Slice& k, const Slice& v, WriteOptions wo = WriteOptions());
+
+  Status Put(int cf, const Slice& k, const Slice& v,
+             WriteOptions wo = WriteOptions());
+
+  Status Merge(const Slice& k, const Slice& v,
+               WriteOptions wo = WriteOptions());
+
+  Status Merge(int cf, const Slice& k, const Slice& v,
+               WriteOptions wo = WriteOptions());
+
+  Status Delete(const std::string& k);
+
+  Status Delete(int cf, const std::string& k);
+
+  Status SingleDelete(const std::string& k);
+
+  Status SingleDelete(int cf, const std::string& k);
+
+  std::string Get(const std::string& k, const Snapshot* snapshot = nullptr);
+
+  std::string Get(int cf, const std::string& k,
+                  const Snapshot* snapshot = nullptr);
+
+  Status Get(const std::string& k, PinnableSlice* v);
+
+  std::vector<std::string> MultiGet(std::vector<int> cfs,
+                                    const std::vector<std::string>& k,
+                                    const Snapshot* snapshot,
+                                    const bool batched,
+                                    const bool async = false);
+
+  std::vector<std::string> MultiGet(const std::vector<std::string>& k,
+                                    const Snapshot* snapshot = nullptr,
+                                    const bool async = false);
+
+  uint64_t GetNumSnapshots();
+
+  uint64_t GetTimeOldestSnapshots();
+
+  uint64_t GetSequenceOldestSnapshots();
+
+  // Return a string that contains all key,value pairs in order,
+  // formatted like "(k1->v1)(k2->v2)".
+  std::string Contents(int cf = 0);
+
+  std::string AllEntriesFor(const Slice& user_key, int cf = 0);
+
+  // Similar to AllEntriesFor but this function also covers reopen with fifo.
+  // Note that test cases with snapshots or entries in memtable should simply
+  // use AllEntriesFor instead as snapshots and entries in memtable will
+  // survive after db reopen.
+  void CheckAllEntriesWithFifoReopen(const std::string& expected_value,
+                                     const Slice& user_key, int cf,
+                                     const std::vector<std::string>& cfs,
+                                     const Options& options);
+
+  int NumSortedRuns(int cf = 0);
+
+  uint64_t TotalSize(int cf = 0);
+
+  uint64_t SizeAtLevel(int level);
+
+  size_t TotalLiveFiles(int cf = 0);
+
+  size_t TotalLiveFilesAtPath(int cf, const std::string& path);
+
+  size_t CountLiveFiles();
+
+  int NumTableFilesAtLevel(int level, int cf = 0);
+
+  double CompressionRatioAtLevel(int level, int cf = 0);
+
+  int TotalTableFiles(int cf = 0, int levels = -1);
+
+  std::vector<uint64_t> GetBlobFileNumbers();
+
+  // Return spread of files per level
+  std::string FilesPerLevel(int cf = 0);
+
+  size_t CountFiles();
+
+  Status CountFiles(size_t* count);
+
+  Status Size(const Slice& start, const Slice& limit, uint64_t* size) {
+    return Size(start, limit, 0, size);
+  }
+
+  Status Size(const Slice& start, const Slice& limit, int cf, uint64_t* size);
+
+  void Compact(int cf, const Slice& start, const Slice& limit,
+               uint32_t target_path_id);
+
+  void Compact(int cf, const Slice& start, const Slice& limit);
+
+  void Compact(const Slice& start, const Slice& limit);
+
+  // Do n memtable compactions, each of which produces an sstable
+  // covering the range [small,large].
+  void MakeTables(int n, const std::string& small, const std::string& large,
+                  int cf = 0);
+
+  // Prevent pushing of new sstables into deeper levels by adding
+  // tables that cover a specified range to all levels.
+  void FillLevels(const std::string& smallest, const std::string& largest,
+                  int cf);
+
+  void MoveFilesToLevel(int level, int cf = 0);
+
+  void DumpFileCounts(const char* label);
+
+  std::string DumpSSTableList();
+
+  static void GetSstFiles(Env* env, std::string path,
+                          std::vector<std::string>* files);
+
+  int GetSstFileCount(std::string path);
+
+  // this will generate non-overlapping files since it keeps increasing key_idx
+  void GenerateNewFile(Random* rnd, int* key_idx, bool nowait = false);
+
+  void GenerateNewFile(int fd, Random* rnd, int* key_idx, bool nowait = false);
+
+  static const int kNumKeysByGenerateNewRandomFile;
+  static const int KNumKeysByGenerateNewFile = 100;
+
+  void GenerateNewRandomFile(Random* rnd, bool nowait = false);
+
+  std::string IterStatus(Iterator* iter);
+
+  Options OptionsForLogIterTest();
+
+  std::string DummyString(size_t len, char c = 'a');
+
+  void VerifyIterLast(std::string expected_key, int cf = 0);
+
+  // Used to test InplaceUpdate
+
+  // If previous value is nullptr or delta is > than previous value,
+  //   sets newValue with delta
+  // If previous value is not empty,
+  //   updates previous value with 'b' string of previous value size - 1.
+  static UpdateStatus updateInPlaceSmallerSize(char* prevValue,
+                                               uint32_t* prevSize, Slice delta,
+                                               std::string* newValue);
+
+  static UpdateStatus updateInPlaceSmallerVarintSize(char* prevValue,
+                                                     uint32_t* prevSize,
+                                                     Slice delta,
+                                                     std::string* newValue);
+
+  static UpdateStatus updateInPlaceLargerSize(char* prevValue,
+                                              uint32_t* prevSize, Slice delta,
+                                              std::string* newValue);
+
+  static UpdateStatus updateInPlaceNoAction(char* prevValue, uint32_t* prevSize,
+                                            Slice delta, std::string* newValue);
+
+  // Utility method to test InplaceUpdate
+  void validateNumberOfEntries(int numValues, int cf = 0);
+
+  void CopyFile(const std::string& source, const std::string& destination,
+                uint64_t size = 0);
+
+  Status GetAllDataFiles(const FileType file_type,
+                         std::unordered_map<std::string, uint64_t>* sst_files,
+                         uint64_t* total_size = nullptr);
+
+  std::vector<std::uint64_t> ListTableFiles(Env* env, const std::string& path);
+
+  void VerifyDBFromMap(
+      std::map<std::string, std::string> true_data,
+      size_t* total_reads_res = nullptr, bool tailing_iter = false,
+      std::map<std::string, Status> status = std::map<std::string, Status>());
+
+  void VerifyDBInternal(
+      std::vector<std::pair<std::string, std::string>> true_data);
+
+  uint64_t GetNumberOfSstFilesForColumnFamily(DB* db,
+                                              std::string column_family_name);
+
+  uint64_t GetSstSizeHelper(Temperature temperature);
+
+  uint64_t TestGetTickerCount(const Options& options, Tickers ticker_type) {
+    return options.statistics->getTickerCount(ticker_type);
+  }
+
+  uint64_t TestGetAndResetTickerCount(const Options& options,
+                                      Tickers ticker_type) {
+    return options.statistics->getAndResetTickerCount(ticker_type);
+  }
+  // Short name for TestGetAndResetTickerCount
+  uint64_t PopTicker(const Options& options, Tickers ticker_type) {
+    return options.statistics->getAndResetTickerCount(ticker_type);
+  }
+
+  // Note: reverting this setting within the same test run is not yet
+  // supported
+  void SetTimeElapseOnlySleepOnReopen(DBOptions* options);
+
+ private:  // Prone to error on direct use
+  void MaybeInstallTimeElapseOnlySleep(const DBOptions& options);
+
+  bool time_elapse_only_sleep_on_reopen_ = false;
+};
+
+// For verifying that all files generated by current version have SST
+// unique ids.
+void VerifySstUniqueIds(const TablePropertiesCollection& props);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_with_timestamp_test_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_with_timestamp_test_util.h
new file mode 100644
index 0000000000..8a0d8e4e31
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/db_with_timestamp_test_util.h
@@ -0,0 +1,126 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "db/db_test_util.h"
+#include "port/stack_trace.h"
+#include "test_util/testutil.h"
+
+namespace ROCKSDB_NAMESPACE {
+class DBBasicTestWithTimestampBase : public DBTestBase {
+ public:
+  explicit DBBasicTestWithTimestampBase(const std::string& dbname)
+      : DBTestBase(dbname, /*env_do_fsync=*/true) {}
+
+ protected:
+  static std::string Key1(uint64_t k);
+
+  static std::string KeyWithPrefix(std::string prefix, uint64_t k);
+
+  static std::vector<Slice> ConvertStrToSlice(
+      std::vector<std::string>& strings);
+
+  class TestComparator : public Comparator {
+   private:
+    const Comparator* cmp_without_ts_;
+
+   public:
+    explicit TestComparator(size_t ts_sz)
+        : Comparator(ts_sz), cmp_without_ts_(nullptr) {
+      cmp_without_ts_ = BytewiseComparator();
+    }
+
+    const char* Name() const override { return "TestComparator"; }
+
+    void FindShortSuccessor(std::string*) const override {}
+
+    void FindShortestSeparator(std::string*, const Slice&) const override {}
+
+    int Compare(const Slice& a, const Slice& b) const override {
+      int r = CompareWithoutTimestamp(a, b);
+      if (r != 0 || 0 == timestamp_size()) {
+        return r;
+      }
+      return -CompareTimestamp(
+          Slice(a.data() + a.size() - timestamp_size(), timestamp_size()),
+          Slice(b.data() + b.size() - timestamp_size(), timestamp_size()));
+    }
+
+    using Comparator::CompareWithoutTimestamp;
+    int CompareWithoutTimestamp(const Slice& a, bool a_has_ts, const Slice& b,
+                                bool b_has_ts) const override {
+      if (a_has_ts) {
+        assert(a.size() >= timestamp_size());
+      }
+      if (b_has_ts) {
+        assert(b.size() >= timestamp_size());
+      }
+      Slice lhs = a_has_ts ? StripTimestampFromUserKey(a, timestamp_size()) : a;
+      Slice rhs = b_has_ts ? StripTimestampFromUserKey(b, timestamp_size()) : b;
+      return cmp_without_ts_->Compare(lhs, rhs);
+    }
+
+    int CompareTimestamp(const Slice& ts1, const Slice& ts2) const override {
+      if (!ts1.data() && !ts2.data()) {
+        return 0;
+      } else if (ts1.data() && !ts2.data()) {
+        return 1;
+      } else if (!ts1.data() && ts2.data()) {
+        return -1;
+      }
+      assert(ts1.size() == ts2.size());
+      uint64_t low1 = 0;
+      uint64_t low2 = 0;
+      uint64_t high1 = 0;
+      uint64_t high2 = 0;
+      const size_t kSize = ts1.size();
+      std::unique_ptr<char[]> ts1_buf(new char[kSize]);
+      memcpy(ts1_buf.get(), ts1.data(), ts1.size());
+      std::unique_ptr<char[]> ts2_buf(new char[kSize]);
+      memcpy(ts2_buf.get(), ts2.data(), ts2.size());
+      Slice ts1_copy = Slice(ts1_buf.get(), kSize);
+      Slice ts2_copy = Slice(ts2_buf.get(), kSize);
+      auto* ptr1 = const_cast<Slice*>(&ts1_copy);
+      auto* ptr2 = const_cast<Slice*>(&ts2_copy);
+      if (!GetFixed64(ptr1, &low1) || !GetFixed64(ptr1, &high1) ||
+          !GetFixed64(ptr2, &low2) || !GetFixed64(ptr2, &high2)) {
+        assert(false);
+      }
+      if (high1 < high2) {
+        return -1;
+      } else if (high1 > high2) {
+        return 1;
+      }
+      if (low1 < low2) {
+        return -1;
+      } else if (low1 > low2) {
+        return 1;
+      }
+      return 0;
+    }
+  };
+
+  std::string Timestamp(uint64_t low, uint64_t high);
+
+  void CheckIterUserEntry(const Iterator* it, const Slice& expected_key,
+                          ValueType expected_value_type,
+                          const Slice& expected_value,
+                          const Slice& expected_ts) const;
+
+  void CheckIterEntry(const Iterator* it, const Slice& expected_ukey,
+                      SequenceNumber expected_seq, ValueType expected_val_type,
+                      const Slice& expected_value,
+                      const Slice& expected_ts) const;
+
+  void CheckIterEntry(const Iterator* it, const Slice& expected_ukey,
+                      ValueType expected_val_type, const Slice& expected_value,
+                      const Slice& expected_ts) const;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/dbformat.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/dbformat.cc
new file mode 100644
index 0000000000..2c3581ca00
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/dbformat.cc
@@ -0,0 +1,213 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "db/dbformat.h"
+
+#include <stdio.h>
+
+#include <cinttypes>
+
+#include "db/lookup_key.h"
+#include "monitoring/perf_context_imp.h"
+#include "port/port.h"
+#include "util/coding.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// kValueTypeForSeek defines the ValueType that should be passed when
+// constructing a ParsedInternalKey object for seeking to a particular
+// sequence number (since we sort sequence numbers in decreasing order
+// and the value type is embedded as the low 8 bits in the sequence
+// number in internal keys, we need to use the highest-numbered
+// ValueType, not the lowest).
+const ValueType kValueTypeForSeek = kTypeWideColumnEntity;
+const ValueType kValueTypeForSeekForPrev = kTypeDeletion;
+const std::string kDisableUserTimestamp("");
+
+EntryType GetEntryType(ValueType value_type) {
+  switch (value_type) {
+    case kTypeValue:
+      return kEntryPut;
+    case kTypeDeletion:
+      return kEntryDelete;
+    case kTypeDeletionWithTimestamp:
+      return kEntryDeleteWithTimestamp;
+    case kTypeSingleDeletion:
+      return kEntrySingleDelete;
+    case kTypeMerge:
+      return kEntryMerge;
+    case kTypeRangeDeletion:
+      return kEntryRangeDeletion;
+    case kTypeBlobIndex:
+      return kEntryBlobIndex;
+    case kTypeWideColumnEntity:
+      return kEntryWideColumnEntity;
+    default:
+      return kEntryOther;
+  }
+}
+
+void AppendInternalKey(std::string* result, const ParsedInternalKey& key) {
+  result->append(key.user_key.data(), key.user_key.size());
+  PutFixed64(result, PackSequenceAndType(key.sequence, key.type));
+}
+
+void AppendInternalKeyWithDifferentTimestamp(std::string* result,
+                                             const ParsedInternalKey& key,
+                                             const Slice& ts) {
+  assert(key.user_key.size() >= ts.size());
+  result->append(key.user_key.data(), key.user_key.size() - ts.size());
+  result->append(ts.data(), ts.size());
+  PutFixed64(result, PackSequenceAndType(key.sequence, key.type));
+}
+
+void AppendInternalKeyFooter(std::string* result, SequenceNumber s,
+                             ValueType t) {
+  PutFixed64(result, PackSequenceAndType(s, t));
+}
+
+void AppendKeyWithMinTimestamp(std::string* result, const Slice& key,
+                               size_t ts_sz) {
+  assert(ts_sz > 0);
+  const std::string kTsMin(ts_sz, static_cast<unsigned char>(0));
+  result->append(key.data(), key.size());
+  result->append(kTsMin.data(), ts_sz);
+}
+
+void AppendKeyWithMaxTimestamp(std::string* result, const Slice& key,
+                               size_t ts_sz) {
+  assert(ts_sz > 0);
+  const std::string kTsMax(ts_sz, static_cast<unsigned char>(0xff));
+  result->append(key.data(), key.size());
+  result->append(kTsMax.data(), ts_sz);
+}
+
+void AppendUserKeyWithMaxTimestamp(std::string* result, const Slice& key,
+                                   size_t ts_sz) {
+  assert(ts_sz > 0);
+  result->append(key.data(), key.size() - ts_sz);
+
+  static constexpr char kTsMax[] = "\xff\xff\xff\xff\xff\xff\xff\xff\xff";
+  if (ts_sz < strlen(kTsMax)) {
+    result->append(kTsMax, ts_sz);
+  } else {
+    result->append(std::string(ts_sz, '\xff'));
+  }
+}
+
+std::string ParsedInternalKey::DebugString(bool log_err_key, bool hex) const {
+  std::string result = "'";
+  if (log_err_key) {
+    result += user_key.ToString(hex);
+  } else {
+    result += "<redacted>";
+  }
+
+  char buf[50];
+  snprintf(buf, sizeof(buf), "' seq:%" PRIu64 ", type:%d", sequence,
+           static_cast<int>(type));
+
+  result += buf;
+  return result;
+}
+
+std::string InternalKey::DebugString(bool hex) const {
+  std::string result;
+  ParsedInternalKey parsed;
+  if (ParseInternalKey(rep_, &parsed, false /* log_err_key */).ok()) {
+    result = parsed.DebugString(true /* log_err_key */, hex);  // TODO
+  } else {
+    result = "(bad)";
+    result.append(EscapeString(rep_));
+  }
+  return result;
+}
+
+int InternalKeyComparator::Compare(const ParsedInternalKey& a,
+                                   const ParsedInternalKey& b) const {
+  // Order by:
+  //    increasing user key (according to user-supplied comparator)
+  //    decreasing sequence number
+  //    decreasing type (though sequence# should be enough to disambiguate)
+  int r = user_comparator_.Compare(a.user_key, b.user_key);
+  if (r == 0) {
+    if (a.sequence > b.sequence) {
+      r = -1;
+    } else if (a.sequence < b.sequence) {
+      r = +1;
+    } else if (a.type > b.type) {
+      r = -1;
+    } else if (a.type < b.type) {
+      r = +1;
+    }
+  }
+  return r;
+}
+
+int InternalKeyComparator::Compare(const Slice& a,
+                                   const ParsedInternalKey& b) const {
+  // Order by:
+  //    increasing user key (according to user-supplied comparator)
+  //    decreasing sequence number
+  //    decreasing type (though sequence# should be enough to disambiguate)
+  int r = user_comparator_.Compare(ExtractUserKey(a), b.user_key);
+  if (r == 0) {
+    const uint64_t anum =
+        DecodeFixed64(a.data() + a.size() - kNumInternalBytes);
+    const uint64_t bnum = (b.sequence << 8) | b.type;
+    if (anum > bnum) {
+      r = -1;
+    } else if (anum < bnum) {
+      r = +1;
+    }
+  }
+  return r;
+}
+
+int InternalKeyComparator::Compare(const ParsedInternalKey& a,
+                                   const Slice& b) const {
+  return -Compare(b, a);
+}
+
+LookupKey::LookupKey(const Slice& _user_key, SequenceNumber s,
+                     const Slice* ts) {
+  size_t usize = _user_key.size();
+  size_t ts_sz = (nullptr == ts) ? 0 : ts->size();
+  size_t needed = usize + ts_sz + 13;  // A conservative estimate
+  char* dst;
+  if (needed <= sizeof(space_)) {
+    dst = space_;
+  } else {
+    dst = new char[needed];
+  }
+  start_ = dst;
+  // NOTE: We don't support users keys of more than 2GB :)
+  dst = EncodeVarint32(dst, static_cast<uint32_t>(usize + ts_sz + 8));
+  kstart_ = dst;
+  memcpy(dst, _user_key.data(), usize);
+  dst += usize;
+  if (nullptr != ts) {
+    memcpy(dst, ts->data(), ts_sz);
+    dst += ts_sz;
+  }
+  EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek));
+  dst += 8;
+  end_ = dst;
+}
+
+void IterKey::EnlargeBuffer(size_t key_size) {
+  // If size is smaller than buffer size, continue using current buffer,
+  // or the static allocated one, as default
+  assert(key_size > buf_size_);
+  // Need to enlarge the buffer.
+  ResetBuffer();
+  buf_ = new char[key_size];
+  buf_size_ = key_size;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/dbformat.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/dbformat.h
new file mode 100644
index 0000000000..3a6edc1bf0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/dbformat.h
@@ -0,0 +1,869 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <stdio.h>
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "rocksdb/comparator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/types.h"
+#include "util/coding.h"
+#include "util/user_comparator_wrapper.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// The file declares data structures and functions that deal with internal
+// keys.
+// Each internal key contains a user key, a sequence number (SequenceNumber)
+// and a type (ValueType), and they are usually encoded together.
+// There are some related helper classes here.
+
+class InternalKey;
+
+// Value types encoded as the last component of internal keys.
+// DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
+// data structures.
+// The highest bit of the value type needs to be reserved to SST tables
+// for them to do more flexible encoding.
+enum ValueType : unsigned char {
+  kTypeDeletion = 0x0,
+  kTypeValue = 0x1,
+  kTypeMerge = 0x2,
+  kTypeLogData = 0x3,               // WAL only.
+  kTypeColumnFamilyDeletion = 0x4,  // WAL only.
+  kTypeColumnFamilyValue = 0x5,     // WAL only.
+  kTypeColumnFamilyMerge = 0x6,     // WAL only.
+  kTypeSingleDeletion = 0x7,
+  kTypeColumnFamilySingleDeletion = 0x8,  // WAL only.
+  kTypeBeginPrepareXID = 0x9,             // WAL only.
+  kTypeEndPrepareXID = 0xA,               // WAL only.
+  kTypeCommitXID = 0xB,                   // WAL only.
+  kTypeRollbackXID = 0xC,                 // WAL only.
+  kTypeNoop = 0xD,                        // WAL only.
+  kTypeColumnFamilyRangeDeletion = 0xE,   // WAL only.
+  kTypeRangeDeletion = 0xF,               // meta block
+  kTypeColumnFamilyBlobIndex = 0x10,      // Blob DB only
+  kTypeBlobIndex = 0x11,                  // Blob DB only
+  // When the prepared record is also persisted in db, we use a different
+  // record. This is to ensure that the WAL that is generated by a WritePolicy
+  // is not mistakenly read by another, which would result into data
+  // inconsistency.
+  kTypeBeginPersistedPrepareXID = 0x12,  // WAL only.
+  // Similar to kTypeBeginPersistedPrepareXID, this is to ensure that WAL
+  // generated by WriteUnprepared write policy is not mistakenly read by
+  // another.
+  kTypeBeginUnprepareXID = 0x13,  // WAL only.
+  kTypeDeletionWithTimestamp = 0x14,
+  kTypeCommitXIDAndTimestamp = 0x15,  // WAL only
+  kTypeWideColumnEntity = 0x16,
+  kTypeColumnFamilyWideColumnEntity = 0x17,  // WAL only
+  kTypeMaxValid,    // Should be after the last valid type, only used for
+                    // validation
+  kMaxValue = 0x7F  // Not used for storing records.
+};
+
+// Defined in dbformat.cc
+extern const ValueType kValueTypeForSeek;
+extern const ValueType kValueTypeForSeekForPrev;
+
+// Checks whether a type is an inline value type
+// (i.e. a type used in memtable skiplist and sst file datablock).
+inline bool IsValueType(ValueType t) {
+  return t <= kTypeMerge || kTypeSingleDeletion == t || kTypeBlobIndex == t ||
+         kTypeDeletionWithTimestamp == t || kTypeWideColumnEntity == t;
+}
+
+// Checks whether a type is from user operation
+// kTypeRangeDeletion is in meta block so this API is separated from above
+// kTypeMaxValid can be from keys generated by
+// TruncatedRangeDelIterator::start_key()
+inline bool IsExtendedValueType(ValueType t) {
+  return IsValueType(t) || t == kTypeRangeDeletion || t == kTypeMaxValid;
+}
+
+// We leave eight bits empty at the bottom so a type and sequence#
+// can be packed together into 64-bits.
+static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1);
+
+static const SequenceNumber kDisableGlobalSequenceNumber =
+    std::numeric_limits<uint64_t>::max();
+
+constexpr uint64_t kNumInternalBytes = 8;
+
+// Defined in dbformat.cc
+extern const std::string kDisableUserTimestamp;
+
+// The data structure that represents an internal key in the way that user_key,
+// sequence number and type are stored in separated forms.
+struct ParsedInternalKey {
+  Slice user_key;
+  SequenceNumber sequence;
+  ValueType type;
+
+  ParsedInternalKey()
+      : sequence(kMaxSequenceNumber),
+        type(kTypeDeletion)  // Make code analyzer happy
+  {}                         // Intentionally left uninitialized (for speed)
+  // u contains timestamp if user timestamp feature is enabled.
+  ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
+      : user_key(u), sequence(seq), type(t) {}
+  std::string DebugString(bool log_err_key, bool hex) const;
+
+  void clear() {
+    user_key.clear();
+    sequence = 0;
+    type = kTypeDeletion;
+  }
+
+  void SetTimestamp(const Slice& ts) {
+    assert(ts.size() <= user_key.size());
+    const char* addr = user_key.data() + user_key.size() - ts.size();
+    memcpy(const_cast<char*>(addr), ts.data(), ts.size());
+  }
+
+  Slice GetTimestamp(size_t ts_sz) {
+    assert(ts_sz <= user_key.size());
+    const char* addr = user_key.data() + user_key.size() - ts_sz;
+    return Slice(const_cast<char*>(addr), ts_sz);
+  }
+};
+
+// Return the length of the encoding of "key".
+inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
+  return key.user_key.size() + kNumInternalBytes;
+}
+
+// Pack a sequence number and a ValueType into a uint64_t
+inline uint64_t PackSequenceAndType(uint64_t seq, ValueType t) {
+  assert(seq <= kMaxSequenceNumber);
+  // kTypeMaxValid is used in TruncatedRangeDelIterator, see its constructor.
+  assert(IsExtendedValueType(t) || t == kTypeMaxValid);
+  return (seq << 8) | t;
+}
+
+// Given the result of PackSequenceAndType, store the sequence number in *seq
+// and the ValueType in *t.
+inline void UnPackSequenceAndType(uint64_t packed, uint64_t* seq,
+                                  ValueType* t) {
+  *seq = packed >> 8;
+  *t = static_cast<ValueType>(packed & 0xff);
+
+  // Commented the following two assertions in order to test key-value checksum
+  // on corrupted keys without crashing ("DbKvChecksumTest").
+  // assert(*seq <= kMaxSequenceNumber);
+  // assert(IsExtendedValueType(*t));
+}
+
+EntryType GetEntryType(ValueType value_type);
+
+// Append the serialization of "key" to *result.
+extern void AppendInternalKey(std::string* result,
+                              const ParsedInternalKey& key);
+
+// Append the serialization of "key" to *result, replacing the original
+// timestamp with argument ts.
+extern void AppendInternalKeyWithDifferentTimestamp(
+    std::string* result, const ParsedInternalKey& key, const Slice& ts);
+
+// Serialized internal key consists of user key followed by footer.
+// This function appends the footer to *result, assuming that *result already
+// contains the user key at the end.
+extern void AppendInternalKeyFooter(std::string* result, SequenceNumber s,
+                                    ValueType t);
+
+// Append the key and a minimal timestamp to *result
+extern void AppendKeyWithMinTimestamp(std::string* result, const Slice& key,
+                                      size_t ts_sz);
+
+// Append the key and a maximal timestamp to *result
+extern void AppendKeyWithMaxTimestamp(std::string* result, const Slice& key,
+                                      size_t ts_sz);
+
+// `key` is a user key with timestamp. Append the user key without timestamp
+// and the maximal timestamp to *result.
+extern void AppendUserKeyWithMaxTimestamp(std::string* result, const Slice& key,
+                                          size_t ts_sz);
+
+// Attempt to parse an internal key from "internal_key".  On success,
+// stores the parsed data in "*result", and returns true.
+//
+// On error, returns false, leaves "*result" in an undefined state.
+extern Status ParseInternalKey(const Slice& internal_key,
+                               ParsedInternalKey* result, bool log_err_key);
+
+// Returns the user key portion of an internal key.
+inline Slice ExtractUserKey(const Slice& internal_key) {
+  assert(internal_key.size() >= kNumInternalBytes);
+  return Slice(internal_key.data(), internal_key.size() - kNumInternalBytes);
+}
+
+inline Slice ExtractUserKeyAndStripTimestamp(const Slice& internal_key,
+                                             size_t ts_sz) {
+  Slice ret = internal_key;
+  ret.remove_suffix(kNumInternalBytes + ts_sz);
+  return ret;
+}
+
+inline Slice StripTimestampFromUserKey(const Slice& user_key, size_t ts_sz) {
+  Slice ret = user_key;
+  ret.remove_suffix(ts_sz);
+  return ret;
+}
+
+inline Slice ExtractTimestampFromUserKey(const Slice& user_key, size_t ts_sz) {
+  assert(user_key.size() >= ts_sz);
+  return Slice(user_key.data() + user_key.size() - ts_sz, ts_sz);
+}
+
+inline Slice ExtractTimestampFromKey(const Slice& internal_key, size_t ts_sz) {
+  const size_t key_size = internal_key.size();
+  assert(key_size >= kNumInternalBytes + ts_sz);
+  return Slice(internal_key.data() + key_size - ts_sz - kNumInternalBytes,
+               ts_sz);
+}
+
+inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) {
+  assert(internal_key.size() >= kNumInternalBytes);
+  const size_t n = internal_key.size();
+  return DecodeFixed64(internal_key.data() + n - kNumInternalBytes);
+}
+
+inline ValueType ExtractValueType(const Slice& internal_key) {
+  uint64_t num = ExtractInternalKeyFooter(internal_key);
+  unsigned char c = num & 0xff;
+  return static_cast<ValueType>(c);
+}
+
+// A comparator for internal keys that uses a specified comparator for
+// the user key portion and breaks ties by decreasing sequence number.
+class InternalKeyComparator
+#ifdef NDEBUG
+    final
+#endif
+    : public CompareInterface {
+ private:
+  UserComparatorWrapper user_comparator_;
+
+ public:
+  // `InternalKeyComparator`s constructed with the default constructor are not
+  // usable and will segfault on any attempt to use them for comparisons.
+  InternalKeyComparator() = default;
+
+  // @param named If true, assign a name to this comparator based on the
+  //    underlying comparator's name. This involves an allocation and copy in
+  //    this constructor to precompute the result of `Name()`. To avoid this
+  //    overhead, set `named` to false. In that case, `Name()` will return a
+  //    generic name that is non-specific to the underlying comparator.
+  explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) {}
+  virtual ~InternalKeyComparator() {}
+
+  int Compare(const Slice& a, const Slice& b) const override;
+
+  bool Equal(const Slice& a, const Slice& b) const {
+    // TODO Use user_comparator_.Equal(). Perhaps compare seqno before
+    // comparing the user key too.
+    return Compare(a, b) == 0;
+  }
+
+  // Same as Compare except that it excludes the value type from comparison
+  int CompareKeySeq(const Slice& a, const Slice& b) const;
+
+  const Comparator* user_comparator() const {
+    return user_comparator_.user_comparator();
+  }
+
+  int Compare(const InternalKey& a, const InternalKey& b) const;
+  int Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const;
+  int Compare(const Slice& a, const ParsedInternalKey& b) const;
+  int Compare(const ParsedInternalKey& a, const Slice& b) const;
+  // In this `Compare()` overload, the sequence numbers provided in
+  // `a_global_seqno` and `b_global_seqno` override the sequence numbers in `a`
+  // and `b`, respectively. To disable sequence number override(s), provide the
+  // value `kDisableGlobalSequenceNumber`.
+  int Compare(const Slice& a, SequenceNumber a_global_seqno, const Slice& b,
+              SequenceNumber b_global_seqno) const;
+};
+
+// The class represent the internal key in encoded form.
+class InternalKey {
+ private:
+  std::string rep_;
+
+ public:
+  InternalKey() {}  // Leave rep_ as empty to indicate it is invalid
+  InternalKey(const Slice& _user_key, SequenceNumber s, ValueType t) {
+    AppendInternalKey(&rep_, ParsedInternalKey(_user_key, s, t));
+  }
+  InternalKey(const Slice& _user_key, SequenceNumber s, ValueType t, Slice ts) {
+    AppendInternalKeyWithDifferentTimestamp(
+        &rep_, ParsedInternalKey(_user_key, s, t), ts);
+  }
+
+  // sets the internal key to be bigger or equal to all internal keys with this
+  // user key
+  void SetMaxPossibleForUserKey(const Slice& _user_key) {
+    AppendInternalKey(
+        &rep_, ParsedInternalKey(_user_key, 0, static_cast<ValueType>(0)));
+  }
+
+  // sets the internal key to be smaller or equal to all internal keys with this
+  // user key
+  void SetMinPossibleForUserKey(const Slice& _user_key) {
+    AppendInternalKey(&rep_, ParsedInternalKey(_user_key, kMaxSequenceNumber,
+                                               kValueTypeForSeek));
+  }
+
+  bool Valid() const {
+    ParsedInternalKey parsed;
+    return (ParseInternalKey(Slice(rep_), &parsed, false /* log_err_key */)
+                .ok());  // TODO
+  }
+
+  void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); }
+  Slice Encode() const {
+    assert(!rep_.empty());
+    return rep_;
+  }
+
+  Slice user_key() const { return ExtractUserKey(rep_); }
+  size_t size() const { return rep_.size(); }
+
+  void Set(const Slice& _user_key, SequenceNumber s, ValueType t) {
+    SetFrom(ParsedInternalKey(_user_key, s, t));
+  }
+
+  void Set(const Slice& _user_key_with_ts, SequenceNumber s, ValueType t,
+           const Slice& ts) {
+    ParsedInternalKey pik = ParsedInternalKey(_user_key_with_ts, s, t);
+    // Should not call pik.SetTimestamp() directly as it overwrites the buffer
+    // containing _user_key.
+    SetFrom(pik, ts);
+  }
+
+  void SetFrom(const ParsedInternalKey& p) {
+    rep_.clear();
+    AppendInternalKey(&rep_, p);
+  }
+
+  void SetFrom(const ParsedInternalKey& p, const Slice& ts) {
+    rep_.clear();
+    AppendInternalKeyWithDifferentTimestamp(&rep_, p, ts);
+  }
+
+  void Clear() { rep_.clear(); }
+
+  // The underlying representation.
+  // Intended only to be used together with ConvertFromUserKey().
+  std::string* rep() { return &rep_; }
+
+  // Assuming that *rep() contains a user key, this method makes internal key
+  // out of it in-place. This saves a memcpy compared to Set()/SetFrom().
+  void ConvertFromUserKey(SequenceNumber s, ValueType t) {
+    AppendInternalKeyFooter(&rep_, s, t);
+  }
+
+  std::string DebugString(bool hex) const;
+};
+
+inline int InternalKeyComparator::Compare(const InternalKey& a,
+                                          const InternalKey& b) const {
+  return Compare(a.Encode(), b.Encode());
+}
+
+inline Status ParseInternalKey(const Slice& internal_key,
+                               ParsedInternalKey* result, bool log_err_key) {
+  const size_t n = internal_key.size();
+
+  if (n < kNumInternalBytes) {
+    return Status::Corruption("Corrupted Key: Internal Key too small. Size=" +
+                              std::to_string(n) + ". ");
+  }
+
+  uint64_t num = DecodeFixed64(internal_key.data() + n - kNumInternalBytes);
+  unsigned char c = num & 0xff;
+  result->sequence = num >> 8;
+  result->type = static_cast<ValueType>(c);
+  assert(result->type <= ValueType::kMaxValue);
+  result->user_key = Slice(internal_key.data(), n - kNumInternalBytes);
+
+  if (IsExtendedValueType(result->type)) {
+    return Status::OK();
+  } else {
+    return Status::Corruption("Corrupted Key",
+                              result->DebugString(log_err_key, true));
+  }
+}
+
+// Update the sequence number in the internal key.
+// Guarantees not to invalidate ikey.data().
+inline void UpdateInternalKey(std::string* ikey, uint64_t seq, ValueType t) {
+  size_t ikey_sz = ikey->size();
+  assert(ikey_sz >= kNumInternalBytes);
+  uint64_t newval = (seq << 8) | t;
+
+  // Note: Since C++11, strings are guaranteed to be stored contiguously and
+  // string::operator[]() is guaranteed not to change ikey.data().
+  EncodeFixed64(&(*ikey)[ikey_sz - kNumInternalBytes], newval);
+}
+
+// Get the sequence number from the internal key
+inline uint64_t GetInternalKeySeqno(const Slice& internal_key) {
+  const size_t n = internal_key.size();
+  assert(n >= kNumInternalBytes);
+  uint64_t num = DecodeFixed64(internal_key.data() + n - kNumInternalBytes);
+  return num >> 8;
+}
+
+// The class to store keys in an efficient way. It allows:
+// 1. Users can either copy the key into it, or have it point to an unowned
+//    address.
+// 2. For copied key, a short inline buffer is kept to reduce memory
+//    allocation for smaller keys.
+// 3. It tracks user key or internal key, and allow conversion between them.
+class IterKey {
+ public:
+  IterKey()
+      : buf_(space_),
+        key_(buf_),
+        key_size_(0),
+        buf_size_(sizeof(space_)),
+        is_user_key_(true) {}
+  // No copying allowed
+  IterKey(const IterKey&) = delete;
+  void operator=(const IterKey&) = delete;
+
+  ~IterKey() { ResetBuffer(); }
+
+  // The bool will be picked up by the next calls to SetKey
+  void SetIsUserKey(bool is_user_key) { is_user_key_ = is_user_key; }
+
+  // Returns the key in whichever format that was provided to KeyIter
+  // If user-defined timestamp is enabled, then timestamp is included in the
+  // return result.
+  Slice GetKey() const { return Slice(key_, key_size_); }
+
+  Slice GetInternalKey() const {
+    assert(!IsUserKey());
+    return Slice(key_, key_size_);
+  }
+
+  // If user-defined timestamp is enabled, then timestamp is included in the
+  // return result of GetUserKey();
+  Slice GetUserKey() const {
+    if (IsUserKey()) {
+      return Slice(key_, key_size_);
+    } else {
+      assert(key_size_ >= kNumInternalBytes);
+      return Slice(key_, key_size_ - kNumInternalBytes);
+    }
+  }
+
+  size_t Size() const { return key_size_; }
+
+  void Clear() { key_size_ = 0; }
+
+  // Append "non_shared_data" to its back, from "shared_len"
+  // This function is used in Block::Iter::ParseNextKey
+  // shared_len: bytes in [0, shard_len-1] would be remained
+  // non_shared_data: data to be append, its length must be >= non_shared_len
+  void TrimAppend(const size_t shared_len, const char* non_shared_data,
+                  const size_t non_shared_len) {
+    assert(shared_len <= key_size_);
+    size_t total_size = shared_len + non_shared_len;
+
+    if (IsKeyPinned() /* key is not in buf_ */) {
+      // Copy the key from external memory to buf_ (copy shared_len bytes)
+      EnlargeBufferIfNeeded(total_size);
+      memcpy(buf_, key_, shared_len);
+    } else if (total_size > buf_size_) {
+      // Need to allocate space, delete previous space
+      char* p = new char[total_size];
+      memcpy(p, key_, shared_len);
+
+      if (buf_ != space_) {
+        delete[] buf_;
+      }
+
+      buf_ = p;
+      buf_size_ = total_size;
+    }
+
+    memcpy(buf_ + shared_len, non_shared_data, non_shared_len);
+    key_ = buf_;
+    key_size_ = total_size;
+  }
+
+  Slice SetKey(const Slice& key, bool copy = true) {
+    // is_user_key_ expected to be set already via SetIsUserKey
+    return SetKeyImpl(key, copy);
+  }
+
+  // If user-defined timestamp is enabled, then `key` includes timestamp.
+  // TODO(yanqin) this is also used to set prefix, which do not include
+  // timestamp. Should be handled.
+  Slice SetUserKey(const Slice& key, bool copy = true) {
+    is_user_key_ = true;
+    return SetKeyImpl(key, copy);
+  }
+
+  Slice SetInternalKey(const Slice& key, bool copy = true) {
+    is_user_key_ = false;
+    return SetKeyImpl(key, copy);
+  }
+
+  // Copies the content of key, updates the reference to the user key in ikey
+  // and returns a Slice referencing the new copy.
+  Slice SetInternalKey(const Slice& key, ParsedInternalKey* ikey) {
+    size_t key_n = key.size();
+    assert(key_n >= kNumInternalBytes);
+    SetInternalKey(key);
+    ikey->user_key = Slice(key_, key_n - kNumInternalBytes);
+    return Slice(key_, key_n);
+  }
+
+  // Copy the key into IterKey own buf_
+  void OwnKey() {
+    assert(IsKeyPinned() == true);
+
+    Reserve(key_size_);
+    memcpy(buf_, key_, key_size_);
+    key_ = buf_;
+  }
+
+  // Update the sequence number in the internal key.  Guarantees not to
+  // invalidate slices to the key (and the user key).
+  void UpdateInternalKey(uint64_t seq, ValueType t, const Slice* ts = nullptr) {
+    assert(!IsKeyPinned());
+    assert(key_size_ >= kNumInternalBytes);
+    if (ts) {
+      assert(key_size_ >= kNumInternalBytes + ts->size());
+      memcpy(&buf_[key_size_ - kNumInternalBytes - ts->size()], ts->data(),
+             ts->size());
+    }
+    uint64_t newval = (seq << 8) | t;
+    EncodeFixed64(&buf_[key_size_ - kNumInternalBytes], newval);
+  }
+
+  bool IsKeyPinned() const { return (key_ != buf_); }
+
+  // If `ts` is provided, user_key should not contain timestamp,
+  // and `ts` is appended after user_key.
+  // TODO: more efficient storage for timestamp.
+  void SetInternalKey(const Slice& key_prefix, const Slice& user_key,
+                      SequenceNumber s,
+                      ValueType value_type = kValueTypeForSeek,
+                      const Slice* ts = nullptr) {
+    size_t psize = key_prefix.size();
+    size_t usize = user_key.size();
+    size_t ts_sz = (ts != nullptr ? ts->size() : 0);
+    EnlargeBufferIfNeeded(psize + usize + sizeof(uint64_t) + ts_sz);
+    if (psize > 0) {
+      memcpy(buf_, key_prefix.data(), psize);
+    }
+    memcpy(buf_ + psize, user_key.data(), usize);
+    if (ts) {
+      memcpy(buf_ + psize + usize, ts->data(), ts_sz);
+    }
+    EncodeFixed64(buf_ + usize + psize + ts_sz,
+                  PackSequenceAndType(s, value_type));
+
+    key_ = buf_;
+    key_size_ = psize + usize + sizeof(uint64_t) + ts_sz;
+    is_user_key_ = false;
+  }
+
+  void SetInternalKey(const Slice& user_key, SequenceNumber s,
+                      ValueType value_type = kValueTypeForSeek,
+                      const Slice* ts = nullptr) {
+    SetInternalKey(Slice(), user_key, s, value_type, ts);
+  }
+
+  void Reserve(size_t size) {
+    EnlargeBufferIfNeeded(size);
+    key_size_ = size;
+  }
+
+  void SetInternalKey(const ParsedInternalKey& parsed_key) {
+    SetInternalKey(Slice(), parsed_key);
+  }
+
+  void SetInternalKey(const Slice& key_prefix,
+                      const ParsedInternalKey& parsed_key_suffix) {
+    SetInternalKey(key_prefix, parsed_key_suffix.user_key,
+                   parsed_key_suffix.sequence, parsed_key_suffix.type);
+  }
+
+  void EncodeLengthPrefixedKey(const Slice& key) {
+    auto size = key.size();
+    EnlargeBufferIfNeeded(size + static_cast<size_t>(VarintLength(size)));
+    char* ptr = EncodeVarint32(buf_, static_cast<uint32_t>(size));
+    memcpy(ptr, key.data(), size);
+    key_ = buf_;
+    is_user_key_ = true;
+  }
+
+  bool IsUserKey() const { return is_user_key_; }
+
+ private:
+  char* buf_;
+  const char* key_;
+  size_t key_size_;
+  size_t buf_size_;
+  char space_[32];  // Avoid allocation for short keys
+  bool is_user_key_;
+
+  Slice SetKeyImpl(const Slice& key, bool copy) {
+    size_t size = key.size();
+    if (copy) {
+      // Copy key to buf_
+      EnlargeBufferIfNeeded(size);
+      memcpy(buf_, key.data(), size);
+      key_ = buf_;
+    } else {
+      // Update key_ to point to external memory
+      key_ = key.data();
+    }
+    key_size_ = size;
+    return Slice(key_, key_size_);
+  }
+
+  void ResetBuffer() {
+    if (buf_ != space_) {
+      delete[] buf_;
+      buf_ = space_;
+    }
+    buf_size_ = sizeof(space_);
+    key_size_ = 0;
+  }
+
+  // Enlarge the buffer size if needed based on key_size.
+  // By default, static allocated buffer is used. Once there is a key
+  // larger than the static allocated buffer, another buffer is dynamically
+  // allocated, until a larger key buffer is requested. In that case, we
+  // reallocate buffer and delete the old one.
+  void EnlargeBufferIfNeeded(size_t key_size) {
+    // If size is smaller than buffer size, continue using current buffer,
+    // or the static allocated one, as default
+    if (key_size > buf_size_) {
+      EnlargeBuffer(key_size);
+    }
+  }
+
+  void EnlargeBuffer(size_t key_size);
+};
+
+// Convert from a SliceTransform of user keys, to a SliceTransform of
+// internal keys.
+class InternalKeySliceTransform : public SliceTransform {
+ public:
+  explicit InternalKeySliceTransform(const SliceTransform* transform)
+      : transform_(transform) {}
+
+  virtual const char* Name() const override { return transform_->Name(); }
+
+  virtual Slice Transform(const Slice& src) const override {
+    auto user_key = ExtractUserKey(src);
+    return transform_->Transform(user_key);
+  }
+
+  virtual bool InDomain(const Slice& src) const override {
+    auto user_key = ExtractUserKey(src);
+    return transform_->InDomain(user_key);
+  }
+
+  virtual bool InRange(const Slice& dst) const override {
+    auto user_key = ExtractUserKey(dst);
+    return transform_->InRange(user_key);
+  }
+
+  const SliceTransform* user_prefix_extractor() const { return transform_; }
+
+ private:
+  // Like comparator, InternalKeySliceTransform will not take care of the
+  // deletion of transform_
+  const SliceTransform* const transform_;
+};
+
+// Read the key of a record from a write batch.
+// if this record represent the default column family then cf_record
+// must be passed as false, otherwise it must be passed as true.
+extern bool ReadKeyFromWriteBatchEntry(Slice* input, Slice* key,
+                                       bool cf_record);
+
+// Read record from a write batch piece from input.
+// tag, column_family, key, value and blob are return values. Callers own the
+// slice they point to.
+// Tag is defined as ValueType.
+// input will be advanced to after the record.
+// If user-defined timestamp is enabled for a column family, then the `key`
+// resulting from this call will include timestamp.
+extern Status ReadRecordFromWriteBatch(Slice* input, char* tag,
+                                       uint32_t* column_family, Slice* key,
+                                       Slice* value, Slice* blob, Slice* xid);
+
+// When user call DeleteRange() to delete a range of keys,
+// we will store a serialized RangeTombstone in MemTable and SST.
+// the struct here is an easy-understood form
+// start/end_key_ is the start/end user key of the range to be deleted
+struct RangeTombstone {
+  Slice start_key_;
+  Slice end_key_;
+  SequenceNumber seq_;
+  // TODO: we should optimize the storage here when user-defined timestamp
+  //  is NOT enabled: they currently take up (16 + 32 + 32) bytes per tombstone.
+  Slice ts_;
+  std::string pinned_start_key_;
+  std::string pinned_end_key_;
+
+  RangeTombstone() = default;
+  RangeTombstone(Slice sk, Slice ek, SequenceNumber sn)
+      : start_key_(sk), end_key_(ek), seq_(sn) {}
+
+  // User-defined timestamp is enabled, `sk` and `ek` should be user key
+  // with timestamp, `ts` will replace the timestamps in `sk` and
+  // `ek`.
+  RangeTombstone(Slice sk, Slice ek, SequenceNumber sn, Slice ts)
+      : seq_(sn), ts_(ts) {
+    assert(!ts.empty());
+    pinned_start_key_.reserve(sk.size());
+    pinned_start_key_.append(sk.data(), sk.size() - ts.size());
+    pinned_start_key_.append(ts.data(), ts.size());
+    pinned_end_key_.reserve(ek.size());
+    pinned_end_key_.append(ek.data(), ek.size() - ts.size());
+    pinned_end_key_.append(ts.data(), ts.size());
+    start_key_ = pinned_start_key_;
+    end_key_ = pinned_end_key_;
+  }
+
+  RangeTombstone(ParsedInternalKey parsed_key, Slice value) {
+    start_key_ = parsed_key.user_key;
+    seq_ = parsed_key.sequence;
+    end_key_ = value;
+  }
+
+  // be careful to use Serialize(), allocates new memory
+  std::pair<InternalKey, Slice> Serialize() const {
+    auto key = InternalKey(start_key_, seq_, kTypeRangeDeletion);
+    return std::make_pair(std::move(key), end_key_);
+  }
+
+  // be careful to use SerializeKey(), allocates new memory
+  InternalKey SerializeKey() const {
+    return InternalKey(start_key_, seq_, kTypeRangeDeletion);
+  }
+
+  // The tombstone end-key is exclusive, so we generate an internal-key here
+  // which has a similar property. Using kMaxSequenceNumber guarantees that
+  // the returned internal-key will compare less than any other internal-key
+  // with the same user-key. This in turn guarantees that the serialized
+  // end-key for a tombstone such as [a-b] will compare less than the key "b".
+  //
+  // be careful to use SerializeEndKey(), allocates new memory
+  InternalKey SerializeEndKey() const {
+    if (!ts_.empty()) {
+      static constexpr char kTsMax[] = "\xff\xff\xff\xff\xff\xff\xff\xff\xff";
+      if (ts_.size() <= strlen(kTsMax)) {
+        return InternalKey(end_key_, kMaxSequenceNumber, kTypeRangeDeletion,
+                           Slice(kTsMax, ts_.size()));
+      } else {
+        return InternalKey(end_key_, kMaxSequenceNumber, kTypeRangeDeletion,
+                           std::string(ts_.size(), '\xff'));
+      }
+    }
+    return InternalKey(end_key_, kMaxSequenceNumber, kTypeRangeDeletion);
+  }
+};
+
+inline int InternalKeyComparator::Compare(const Slice& akey,
+                                          const Slice& bkey) const {
+  // Order by:
+  //    increasing user key (according to user-supplied comparator)
+  //    decreasing sequence number
+  //    decreasing type (though sequence# should be enough to disambiguate)
+  int r = user_comparator_.Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
+  if (r == 0) {
+    const uint64_t anum =
+        DecodeFixed64(akey.data() + akey.size() - kNumInternalBytes);
+    const uint64_t bnum =
+        DecodeFixed64(bkey.data() + bkey.size() - kNumInternalBytes);
+    if (anum > bnum) {
+      r = -1;
+    } else if (anum < bnum) {
+      r = +1;
+    }
+  }
+  return r;
+}
+
+inline int InternalKeyComparator::CompareKeySeq(const Slice& akey,
+                                                const Slice& bkey) const {
+  // Order by:
+  //    increasing user key (according to user-supplied comparator)
+  //    decreasing sequence number
+  int r = user_comparator_.Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
+  if (r == 0) {
+    // Shift the number to exclude the last byte which contains the value type
+    const uint64_t anum =
+        DecodeFixed64(akey.data() + akey.size() - kNumInternalBytes) >> 8;
+    const uint64_t bnum =
+        DecodeFixed64(bkey.data() + bkey.size() - kNumInternalBytes) >> 8;
+    if (anum > bnum) {
+      r = -1;
+    } else if (anum < bnum) {
+      r = +1;
+    }
+  }
+  return r;
+}
+
+inline int InternalKeyComparator::Compare(const Slice& a,
+                                          SequenceNumber a_global_seqno,
+                                          const Slice& b,
+                                          SequenceNumber b_global_seqno) const {
+  int r = user_comparator_.Compare(ExtractUserKey(a), ExtractUserKey(b));
+  if (r == 0) {
+    uint64_t a_footer, b_footer;
+    if (a_global_seqno == kDisableGlobalSequenceNumber) {
+      a_footer = ExtractInternalKeyFooter(a);
+    } else {
+      a_footer = PackSequenceAndType(a_global_seqno, ExtractValueType(a));
+    }
+    if (b_global_seqno == kDisableGlobalSequenceNumber) {
+      b_footer = ExtractInternalKeyFooter(b);
+    } else {
+      b_footer = PackSequenceAndType(b_global_seqno, ExtractValueType(b));
+    }
+    if (a_footer > b_footer) {
+      r = -1;
+    } else if (a_footer < b_footer) {
+      r = +1;
+    }
+  }
+  return r;
+}
+
+// Wrap InternalKeyComparator as a comparator class for ParsedInternalKey.
+struct ParsedInternalKeyComparator {
+  explicit ParsedInternalKeyComparator(const InternalKeyComparator* c)
+      : cmp(c) {}
+
+  bool operator()(const ParsedInternalKey& a,
+                  const ParsedInternalKey& b) const {
+    return cmp->Compare(a, b) < 0;
+  }
+
+  const InternalKeyComparator* cmp;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/error_handler.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/error_handler.cc
new file mode 100644
index 0000000000..98c3e82d5f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/error_handler.cc
@@ -0,0 +1,792 @@
+//  Copyright (c) 2018-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include "db/error_handler.h"
+
+#include "db/db_impl/db_impl.h"
+#include "db/event_helpers.h"
+#include "file/sst_file_manager_impl.h"
+#include "logging/logging.h"
+#include "port/lang.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Maps to help decide the severity of an error based on the
+// BackgroundErrorReason, Code, SubCode and whether db_options.paranoid_checks
+// is set or not. There are 3 maps, going from most specific to least specific
+// (i.e from all 4 fields in a tuple to only the BackgroundErrorReason and
+// paranoid_checks). The less specific map serves as a catch all in case we miss
+// a specific error code or subcode.
+std::map<std::tuple<BackgroundErrorReason, Status::Code, Status::SubCode, bool>,
+         Status::Severity>
+    ErrorSeverityMap = {
+        // Errors during BG compaction
+        {std::make_tuple(BackgroundErrorReason::kCompaction,
+                         Status::Code::kIOError, Status::SubCode::kNoSpace,
+                         true),
+         Status::Severity::kSoftError},
+        {std::make_tuple(BackgroundErrorReason::kCompaction,
+                         Status::Code::kIOError, Status::SubCode::kNoSpace,
+                         false),
+         Status::Severity::kNoError},
+        {std::make_tuple(BackgroundErrorReason::kCompaction,
+                         Status::Code::kIOError, Status::SubCode::kSpaceLimit,
+                         true),
+         Status::Severity::kHardError},
+        {std::make_tuple(BackgroundErrorReason::kCompaction,
+                         Status::Code::kIOError, Status::SubCode::kIOFenced,
+                         true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kCompaction,
+                         Status::Code::kIOError, Status::SubCode::kIOFenced,
+                         false),
+         Status::Severity::kFatalError},
+        // Errors during BG flush
+        {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
+                         Status::SubCode::kNoSpace, true),
+         Status::Severity::kHardError},
+        {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
+                         Status::SubCode::kNoSpace, false),
+         Status::Severity::kNoError},
+        {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
+                         Status::SubCode::kSpaceLimit, true),
+         Status::Severity::kHardError},
+        {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
+                         Status::SubCode::kIOFenced, true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
+                         Status::SubCode::kIOFenced, false),
+         Status::Severity::kFatalError},
+        // Errors during Write
+        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
+                         Status::Code::kIOError, Status::SubCode::kNoSpace,
+                         true),
+         Status::Severity::kHardError},
+        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
+                         Status::Code::kIOError, Status::SubCode::kNoSpace,
+                         false),
+         Status::Severity::kHardError},
+        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
+                         Status::Code::kIOError, Status::SubCode::kIOFenced,
+                         true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
+                         Status::Code::kIOError, Status::SubCode::kIOFenced,
+                         false),
+         Status::Severity::kFatalError},
+        // Errors during MANIFEST write
+        {std::make_tuple(BackgroundErrorReason::kManifestWrite,
+                         Status::Code::kIOError, Status::SubCode::kNoSpace,
+                         true),
+         Status::Severity::kHardError},
+        {std::make_tuple(BackgroundErrorReason::kManifestWrite,
+                         Status::Code::kIOError, Status::SubCode::kNoSpace,
+                         false),
+         Status::Severity::kHardError},
+        {std::make_tuple(BackgroundErrorReason::kManifestWrite,
+                         Status::Code::kIOError, Status::SubCode::kIOFenced,
+                         true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kManifestWrite,
+                         Status::Code::kIOError, Status::SubCode::kIOFenced,
+                         false),
+         Status::Severity::kFatalError},
+        // Errors during BG flush with WAL disabled
+        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
+                         Status::Code::kIOError, Status::SubCode::kNoSpace,
+                         true),
+         Status::Severity::kHardError},
+        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
+                         Status::Code::kIOError, Status::SubCode::kNoSpace,
+                         false),
+         Status::Severity::kNoError},
+        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
+                         Status::Code::kIOError, Status::SubCode::kSpaceLimit,
+                         true),
+         Status::Severity::kHardError},
+        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
+                         Status::Code::kIOError, Status::SubCode::kIOFenced,
+                         true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
+                         Status::Code::kIOError, Status::SubCode::kIOFenced,
+                         false),
+         Status::Severity::kFatalError},
+        // Errors during MANIFEST write when WAL is disabled
+        {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
+                         Status::Code::kIOError, Status::SubCode::kNoSpace,
+                         true),
+         Status::Severity::kHardError},
+        {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
+                         Status::Code::kIOError, Status::SubCode::kNoSpace,
+                         false),
+         Status::Severity::kHardError},
+        {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
+                         Status::Code::kIOError, Status::SubCode::kIOFenced,
+                         true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
+                         Status::Code::kIOError, Status::SubCode::kIOFenced,
+                         false),
+         Status::Severity::kFatalError},
+
+};
+
+std::map<std::tuple<BackgroundErrorReason, Status::Code, bool>,
+         Status::Severity>
+    DefaultErrorSeverityMap = {
+        // Errors during BG compaction
+        {std::make_tuple(BackgroundErrorReason::kCompaction,
+                         Status::Code::kCorruption, true),
+         Status::Severity::kUnrecoverableError},
+        {std::make_tuple(BackgroundErrorReason::kCompaction,
+                         Status::Code::kCorruption, false),
+         Status::Severity::kNoError},
+        {std::make_tuple(BackgroundErrorReason::kCompaction,
+                         Status::Code::kIOError, true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kCompaction,
+                         Status::Code::kIOError, false),
+         Status::Severity::kNoError},
+        // Errors during BG flush
+        {std::make_tuple(BackgroundErrorReason::kFlush,
+                         Status::Code::kCorruption, true),
+         Status::Severity::kUnrecoverableError},
+        {std::make_tuple(BackgroundErrorReason::kFlush,
+                         Status::Code::kCorruption, false),
+         Status::Severity::kNoError},
+        {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
+                         true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
+                         false),
+         Status::Severity::kNoError},
+        // Errors during Write
+        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
+                         Status::Code::kCorruption, true),
+         Status::Severity::kUnrecoverableError},
+        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
+                         Status::Code::kCorruption, false),
+         Status::Severity::kNoError},
+        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
+                         Status::Code::kIOError, true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kWriteCallback,
+                         Status::Code::kIOError, false),
+         Status::Severity::kNoError},
+        {std::make_tuple(BackgroundErrorReason::kManifestWrite,
+                         Status::Code::kIOError, true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kManifestWrite,
+                         Status::Code::kIOError, false),
+         Status::Severity::kFatalError},
+        // Errors during BG flush with WAL disabled
+        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
+                         Status::Code::kCorruption, true),
+         Status::Severity::kUnrecoverableError},
+        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
+                         Status::Code::kCorruption, false),
+         Status::Severity::kNoError},
+        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
+                         Status::Code::kIOError, true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
+                         Status::Code::kIOError, false),
+         Status::Severity::kNoError},
+        {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
+                         Status::Code::kIOError, true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
+                         Status::Code::kIOError, false),
+         Status::Severity::kFatalError},
+};
+
+std::map<std::tuple<BackgroundErrorReason, bool>, Status::Severity>
+    DefaultReasonMap = {
+        // Errors during BG compaction
+        {std::make_tuple(BackgroundErrorReason::kCompaction, true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kCompaction, false),
+         Status::Severity::kNoError},
+        // Errors during BG flush
+        {std::make_tuple(BackgroundErrorReason::kFlush, true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kFlush, false),
+         Status::Severity::kNoError},
+        // Errors during Write
+        {std::make_tuple(BackgroundErrorReason::kWriteCallback, true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kWriteCallback, false),
+         Status::Severity::kFatalError},
+        // Errors during Memtable update
+        {std::make_tuple(BackgroundErrorReason::kMemTable, true),
+         Status::Severity::kFatalError},
+        {std::make_tuple(BackgroundErrorReason::kMemTable, false),
+         Status::Severity::kFatalError},
+};
+
+void ErrorHandler::CancelErrorRecovery() {
+  db_mutex_->AssertHeld();
+
+  // We'll release the lock before calling sfm, so make sure no new
+  // recovery gets scheduled at that point
+  auto_recovery_ = false;
+  SstFileManagerImpl* sfm =
+      reinterpret_cast<SstFileManagerImpl*>(db_options_.sst_file_manager.get());
+  if (sfm) {
+    // This may or may not cancel a pending recovery
+    db_mutex_->Unlock();
+    bool cancelled = sfm->CancelErrorRecovery(this);
+    db_mutex_->Lock();
+    if (cancelled) {
+      recovery_in_prog_ = false;
+    }
+  }
+
+  // If auto recovery is also runing to resume from the retryable error,
+  // we should wait and end the auto recovery.
+  EndAutoRecovery();
+}
+
+STATIC_AVOID_DESTRUCTION(const Status, kOkStatus){Status::OK()};
+
+// This is the main function for looking at an error during a background
+// operation and deciding the severity, and error recovery strategy. The high
+// level algorithm is as follows -
+// 1. Classify the severity of the error based on the ErrorSeverityMap,
+//    DefaultErrorSeverityMap and DefaultReasonMap defined earlier
+// 2. Call a Status code specific override function to adjust the severity
+//    if needed. The reason for this is our ability to recover may depend on
+//    the exact options enabled in DBOptions
+// 3. Determine if auto recovery is possible. A listener notification callback
+//    is called, which can disable the auto recovery even if we decide its
+//    feasible
+// 4. For Status::NoSpace() errors, rely on SstFileManagerImpl to control
+//    the actual recovery. If no sst file manager is specified in DBOptions,
+//    a default one is allocated during DB::Open(), so there will always be
+//    one.
+// This can also get called as part of a recovery operation. In that case, we
+// also track the error separately in recovery_error_ so we can tell in the
+// end whether recovery succeeded or not
+const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
+                                              BackgroundErrorReason reason) {
+  db_mutex_->AssertHeld();
+  if (bg_err.ok()) {
+    return kOkStatus;
+  }
+
+  if (bg_error_stats_ != nullptr) {
+    RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_ERROR_COUNT);
+  }
+  ROCKS_LOG_INFO(db_options_.info_log,
+                 "ErrorHandler: Set regular background error\n");
+
+  bool paranoid = db_options_.paranoid_checks;
+  Status::Severity sev = Status::Severity::kFatalError;
+  Status new_bg_err;
+  DBRecoverContext context;
+  bool found = false;
+
+  {
+    auto entry = ErrorSeverityMap.find(
+        std::make_tuple(reason, bg_err.code(), bg_err.subcode(), paranoid));
+    if (entry != ErrorSeverityMap.end()) {
+      sev = entry->second;
+      found = true;
+    }
+  }
+
+  if (!found) {
+    auto entry = DefaultErrorSeverityMap.find(
+        std::make_tuple(reason, bg_err.code(), paranoid));
+    if (entry != DefaultErrorSeverityMap.end()) {
+      sev = entry->second;
+      found = true;
+    }
+  }
+
+  if (!found) {
+    auto entry = DefaultReasonMap.find(std::make_tuple(reason, paranoid));
+    if (entry != DefaultReasonMap.end()) {
+      sev = entry->second;
+    }
+  }
+
+  new_bg_err = Status(bg_err, sev);
+
+  // Check if recovery is currently in progress. If it is, we will save this
+  // error so we can check it at the end to see if recovery succeeded or not
+  if (recovery_in_prog_ && recovery_error_.ok()) {
+    recovery_error_ = new_bg_err;
+  }
+
+  bool auto_recovery = auto_recovery_;
+  if (new_bg_err.severity() >= Status::Severity::kFatalError && auto_recovery) {
+    auto_recovery = false;
+  }
+
+  // Allow some error specific overrides
+  if (new_bg_err.subcode() == IOStatus::SubCode::kNoSpace ||
+      new_bg_err.subcode() == IOStatus::SubCode::kSpaceLimit) {
+    new_bg_err = OverrideNoSpaceError(new_bg_err, &auto_recovery);
+  }
+
+  if (!new_bg_err.ok()) {
+    Status s = new_bg_err;
+    EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s,
+                                          db_mutex_, &auto_recovery);
+    if (!s.ok() && (s.severity() > bg_error_.severity())) {
+      bg_error_ = s;
+    } else {
+      // This error is less severe than previously encountered error. Don't
+      // take any further action
+      return bg_error_;
+    }
+  }
+
+  recover_context_ = context;
+  if (auto_recovery) {
+    recovery_in_prog_ = true;
+
+    // Kick-off error specific recovery
+    if (new_bg_err.subcode() == IOStatus::SubCode::kNoSpace ||
+        new_bg_err.subcode() == IOStatus::SubCode::kSpaceLimit) {
+      RecoverFromNoSpace();
+    }
+  }
+  if (bg_error_.severity() >= Status::Severity::kHardError) {
+    is_db_stopped_.store(true, std::memory_order_release);
+  }
+  return bg_error_;
+}
+
+// This is the main function for looking at IO related error during the
+// background operations. The main logic is:
+// 1) File scope IO error is treated as retryable IO error in the write
+//    path. In RocksDB, If a file has write IO error and it is at file scope,
+//    RocksDB never write to the same file again. RocksDB will create a new
+//    file and rewrite the whole content. Thus, it is retryable.
+// 1) if the error is caused by data loss, the error is mapped to
+//    unrecoverable error. Application/user must take action to handle
+//    this situation (File scope case is excluded).
+// 2) if the error is a Retryable IO error (i.e., it is a file scope IO error,
+//     or its retryable flag is set and not a data loss error), auto resume
+//     will be called and the auto resume can be controlled by resume count
+//     and resume interval options. There are three sub-cases:
+//    a) if the error happens during compaction, it is mapped to a soft error.
+//       the compaction thread will reschedule a new compaction.
+//    b) if the error happens during flush and also WAL is empty, it is mapped
+//       to a soft error. Note that, it includes the case that IO error happens
+//       in SST or manifest write during flush.
+//    c) all other errors are mapped to hard error.
+// 3) for other cases, SetBGError(const Status& bg_err, BackgroundErrorReason
+//    reason) will be called to handle other error cases.
+const Status& ErrorHandler::SetBGError(const Status& bg_status,
+                                       BackgroundErrorReason reason) {
+  db_mutex_->AssertHeld();
+  Status tmp_status = bg_status;
+  IOStatus bg_io_err = status_to_io_status(std::move(tmp_status));
+
+  if (bg_io_err.ok()) {
+    return kOkStatus;
+  }
+  ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s",
+                 bg_io_err.ToString().c_str());
+
+  if (recovery_in_prog_ && recovery_io_error_.ok()) {
+    recovery_io_error_ = bg_io_err;
+  }
+  if (BackgroundErrorReason::kManifestWrite == reason ||
+      BackgroundErrorReason::kManifestWriteNoWAL == reason) {
+    // Always returns ok
+    ROCKS_LOG_INFO(db_options_.info_log, "Disabling File Deletions");
+    db_->DisableFileDeletionsWithLock().PermitUncheckedError();
+  }
+
+  Status new_bg_io_err = bg_io_err;
+  DBRecoverContext context;
+  if (bg_io_err.GetScope() != IOStatus::IOErrorScope::kIOErrorScopeFile &&
+      bg_io_err.GetDataLoss()) {
+    // First, data loss (non file scope) is treated as unrecoverable error. So
+    // it can directly overwrite any existing bg_error_.
+    bool auto_recovery = false;
+    Status bg_err(new_bg_io_err, Status::Severity::kUnrecoverableError);
+    CheckAndSetRecoveryAndBGError(bg_err);
+    if (bg_error_stats_ != nullptr) {
+      RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_ERROR_COUNT);
+      RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_IO_ERROR_COUNT);
+    }
+    ROCKS_LOG_INFO(
+        db_options_.info_log,
+        "ErrorHandler: Set background IO error as unrecoverable error\n");
+    EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
+                                          &bg_err, db_mutex_, &auto_recovery);
+    recover_context_ = context;
+    return bg_error_;
+  } else if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace &&
+             (bg_io_err.GetScope() ==
+                  IOStatus::IOErrorScope::kIOErrorScopeFile ||
+              bg_io_err.GetRetryable())) {
+    // Second, check if the error is a retryable IO error (file scope IO error
+    // is also treated as retryable IO error in RocksDB write path). if it is
+    // retryable error and its severity is higher than bg_error_, overwrite the
+    // bg_error_ with new error. In current stage, for retryable IO error of
+    // compaction, treat it as soft error. In other cases, treat the retryable
+    // IO error as hard error. Note that, all the NoSpace error should be
+    // handled by the SstFileManager::StartErrorRecovery(). Therefore, no matter
+    // it is retryable or file scope, this logic will be bypassed.
+    bool auto_recovery = false;
+    EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
+                                          &new_bg_io_err, db_mutex_,
+                                          &auto_recovery);
+    if (bg_error_stats_ != nullptr) {
+      RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_ERROR_COUNT);
+      RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_IO_ERROR_COUNT);
+      RecordTick(bg_error_stats_.get(),
+                 ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT);
+    }
+    ROCKS_LOG_INFO(db_options_.info_log,
+                   "ErrorHandler: Set background retryable IO error\n");
+    if (BackgroundErrorReason::kCompaction == reason) {
+      // We map the retryable IO error during compaction to soft error. Since
+      // compaction can reschedule by itself. We will not set the BG error in
+      // this case
+      // TODO:  a better way to set or clean the retryable IO error which
+      // happens during compaction SST file write.
+      if (bg_error_stats_ != nullptr) {
+        RecordTick(bg_error_stats_.get(), ERROR_HANDLER_AUTORESUME_COUNT);
+      }
+      ROCKS_LOG_INFO(
+          db_options_.info_log,
+          "ErrorHandler: Compaction will schedule by itself to resume\n");
+      return bg_error_;
+    } else if (BackgroundErrorReason::kFlushNoWAL == reason ||
+               BackgroundErrorReason::kManifestWriteNoWAL == reason) {
+      // When the BG Retryable IO error reason is flush without WAL,
+      // We map it to a soft error. At the same time, all the background work
+      // should be stopped except the BG work from recovery. Therefore, we
+      // set the soft_error_no_bg_work_ to true. At the same time, since DB
+      // continues to receive writes when BG error is soft error, to avoid
+      // to many small memtable being generated during auto resume, the flush
+      // reason is set to kErrorRecoveryRetryFlush.
+      Status bg_err(new_bg_io_err, Status::Severity::kSoftError);
+      CheckAndSetRecoveryAndBGError(bg_err);
+      soft_error_no_bg_work_ = true;
+      context.flush_reason = FlushReason::kErrorRecoveryRetryFlush;
+      recover_context_ = context;
+      return StartRecoverFromRetryableBGIOError(bg_io_err);
+    } else {
+      Status bg_err(new_bg_io_err, Status::Severity::kHardError);
+      CheckAndSetRecoveryAndBGError(bg_err);
+      recover_context_ = context;
+      return StartRecoverFromRetryableBGIOError(bg_io_err);
+    }
+  } else {
+    if (bg_error_stats_ != nullptr) {
+      RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_IO_ERROR_COUNT);
+    }
+    // HandleKnownErrors() will use recovery_error_, so ignore
+    // recovery_io_error_.
+    // TODO: Do some refactoring and use only one recovery_error_
+    recovery_io_error_.PermitUncheckedError();
+    return HandleKnownErrors(new_bg_io_err, reason);
+  }
+}
+
+Status ErrorHandler::OverrideNoSpaceError(const Status& bg_error,
+                                          bool* auto_recovery) {
+  if (bg_error.severity() >= Status::Severity::kFatalError) {
+    return bg_error;
+  }
+
+  if (db_options_.sst_file_manager.get() == nullptr) {
+    // We rely on SFM to poll for enough disk space and recover
+    *auto_recovery = false;
+    return bg_error;
+  }
+
+  if (db_options_.allow_2pc &&
+      (bg_error.severity() <= Status::Severity::kSoftError)) {
+    // Don't know how to recover, as the contents of the current WAL file may
+    // be inconsistent, and it may be needed for 2PC. If 2PC is not enabled,
+    // we can just flush the memtable and discard the log
+    *auto_recovery = false;
+    return Status(bg_error, Status::Severity::kFatalError);
+  }
+
+  {
+    uint64_t free_space;
+    if (db_options_.env->GetFreeSpace(db_options_.db_paths[0].path,
+                                      &free_space) == Status::NotSupported()) {
+      *auto_recovery = false;
+    }
+  }
+
+  return bg_error;
+}
+
+void ErrorHandler::RecoverFromNoSpace() {
+  SstFileManagerImpl* sfm =
+      reinterpret_cast<SstFileManagerImpl*>(db_options_.sst_file_manager.get());
+
+  // Inform SFM of the error, so it can kick-off the recovery
+  if (sfm) {
+    sfm->StartErrorRecovery(this, bg_error_);
+  }
+}
+
+Status ErrorHandler::ClearBGError() {
+  db_mutex_->AssertHeld();
+
+  // Signal that recovery succeeded
+  if (recovery_error_.ok()) {
+    Status old_bg_error = bg_error_;
+    // old_bg_error is only for notifying listeners, so may not be checked
+    old_bg_error.PermitUncheckedError();
+    // Clear and check the recovery IO and BG error
+    bg_error_ = Status::OK();
+    recovery_io_error_ = IOStatus::OK();
+    bg_error_.PermitUncheckedError();
+    recovery_io_error_.PermitUncheckedError();
+    recovery_in_prog_ = false;
+    soft_error_no_bg_work_ = false;
+    EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, old_bg_error,
+                                           bg_error_, db_mutex_);
+  }
+  return recovery_error_;
+}
+
+Status ErrorHandler::RecoverFromBGError(bool is_manual) {
+  InstrumentedMutexLock l(db_mutex_);
+  bool no_bg_work_original_flag = soft_error_no_bg_work_;
+  if (is_manual) {
+    // If its a manual recovery and there's a background recovery in progress
+    // return busy status
+    if (recovery_in_prog_) {
+      return Status::Busy();
+    }
+    recovery_in_prog_ = true;
+
+    // In manual resume, we allow the bg work to run. If it is a auto resume,
+    // the bg work should follow this tag.
+    soft_error_no_bg_work_ = false;
+
+    // In manual resume, if the bg error is a soft error and also requires
+    // no bg work, the error must be recovered by call the flush with
+    // flush reason: kErrorRecoveryRetryFlush. In other case, the flush
+    // reason is set to kErrorRecovery.
+    if (no_bg_work_original_flag) {
+      recover_context_.flush_reason = FlushReason::kErrorRecoveryRetryFlush;
+    } else {
+      recover_context_.flush_reason = FlushReason::kErrorRecovery;
+    }
+  }
+
+  if (bg_error_.severity() == Status::Severity::kSoftError &&
+      recover_context_.flush_reason == FlushReason::kErrorRecovery) {
+    // Simply clear the background error and return
+    recovery_error_ = Status::OK();
+    return ClearBGError();
+  }
+
+  // Reset recovery_error_. We will use this to record any errors that happen
+  // during the recovery process. While recovering, the only operations that
+  // can generate background errors should be the flush operations
+  recovery_error_ = Status::OK();
+  recovery_error_.PermitUncheckedError();
+  Status s = db_->ResumeImpl(recover_context_);
+  if (s.ok()) {
+    soft_error_no_bg_work_ = false;
+  } else {
+    soft_error_no_bg_work_ = no_bg_work_original_flag;
+  }
+
+  // For manual recover, shutdown, and fatal error  cases, set
+  // recovery_in_prog_ to false. For automatic background recovery, leave it
+  // as is regardless of success or failure as it will be retried
+  if (is_manual || s.IsShutdownInProgress() ||
+      bg_error_.severity() >= Status::Severity::kFatalError) {
+    recovery_in_prog_ = false;
+  }
+  return s;
+}
+
+const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(
+    const IOStatus& io_error) {
+  db_mutex_->AssertHeld();
+  if (bg_error_.ok()) {
+    return bg_error_;
+  } else if (io_error.ok()) {
+    return kOkStatus;
+  } else if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) {
+    // Auto resume BG error is not enabled, directly return bg_error_.
+    return bg_error_;
+  }
+  if (bg_error_stats_ != nullptr) {
+    RecordTick(bg_error_stats_.get(), ERROR_HANDLER_AUTORESUME_COUNT);
+  }
+  ROCKS_LOG_INFO(
+      db_options_.info_log,
+      "ErrorHandler: Call StartRecoverFromRetryableBGIOError to resume\n");
+  if (recovery_thread_) {
+    // In this case, if recovery_in_prog_ is false, current thread should
+    // wait the previous recover thread to finish and create a new thread
+    // to recover from the bg error.
+    db_mutex_->Unlock();
+    recovery_thread_->join();
+    db_mutex_->Lock();
+  }
+
+  recovery_in_prog_ = true;
+  recovery_thread_.reset(
+      new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this));
+
+  if (recovery_io_error_.ok() && recovery_error_.ok()) {
+    return recovery_error_;
+  } else {
+    return bg_error_;
+  }
+}
+
+// Automatic recover from Retryable BG IO error. Must be called after db
+// mutex is released.
+void ErrorHandler::RecoverFromRetryableBGIOError() {
+  TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeStart");
+  InstrumentedMutexLock l(db_mutex_);
+  if (end_recovery_) {
+    EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_,
+                                           Status::ShutdownInProgress(),
+                                           db_mutex_);
+    return;
+  }
+  DBRecoverContext context = recover_context_;
+  int resume_count = db_options_.max_bgerror_resume_count;
+  uint64_t wait_interval = db_options_.bgerror_resume_retry_interval;
+  uint64_t retry_count = 0;
+  // Recover from the retryable error. Create a separate thread to do it.
+  while (resume_count > 0) {
+    if (end_recovery_) {
+      EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_,
+                                             Status::ShutdownInProgress(),
+                                             db_mutex_);
+      return;
+    }
+    TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeResume0");
+    TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeResume1");
+    recovery_io_error_ = IOStatus::OK();
+    recovery_error_ = Status::OK();
+    retry_count++;
+    Status s = db_->ResumeImpl(context);
+    if (bg_error_stats_ != nullptr) {
+      RecordTick(bg_error_stats_.get(),
+                 ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT);
+    }
+    if (s.IsShutdownInProgress() ||
+        bg_error_.severity() >= Status::Severity::kFatalError) {
+      // If DB shutdown in progress or the error severity is higher than
+      // Hard Error, stop auto resume and returns.
+      recovery_in_prog_ = false;
+      if (bg_error_stats_ != nullptr) {
+        RecordInHistogram(bg_error_stats_.get(),
+                          ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count);
+      }
+      EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_,
+                                             bg_error_, db_mutex_);
+      return;
+    }
+    if (!recovery_io_error_.ok() &&
+        recovery_error_.severity() <= Status::Severity::kHardError &&
+        recovery_io_error_.GetRetryable()) {
+      // If new BG IO error happens during auto recovery and it is retryable
+      // and its severity is Hard Error or lower, the auto resmue sleep for
+      // a period of time and redo auto resume if it is allowed.
+      TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeWait0");
+      TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeWait1");
+      int64_t wait_until = db_options_.clock->NowMicros() + wait_interval;
+      cv_.TimedWait(wait_until);
+    } else {
+      // There are three possibility: 1) recover_io_error is set during resume
+      // and the error is not retryable, 2) recover is successful, 3) other
+      // error happens during resume and cannot be resumed here.
+      if (recovery_io_error_.ok() && recovery_error_.ok() && s.ok()) {
+        // recover from the retryable IO error and no other BG errors. Clean
+        // the bg_error and notify user.
+        TEST_SYNC_POINT("RecoverFromRetryableBGIOError:RecoverSuccess");
+        Status old_bg_error = bg_error_;
+        is_db_stopped_.store(false, std::memory_order_release);
+        bg_error_ = Status::OK();
+        bg_error_.PermitUncheckedError();
+        EventHelpers::NotifyOnErrorRecoveryEnd(
+            db_options_.listeners, old_bg_error, bg_error_, db_mutex_);
+        if (bg_error_stats_ != nullptr) {
+          RecordTick(bg_error_stats_.get(),
+                     ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT);
+          RecordInHistogram(bg_error_stats_.get(),
+                            ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count);
+        }
+        recovery_in_prog_ = false;
+        if (soft_error_no_bg_work_) {
+          soft_error_no_bg_work_ = false;
+        }
+        return;
+      } else {
+        // In this case: 1) recovery_io_error is more serious or not retryable
+        // 2) other Non IO recovery_error happens. The auto recovery stops.
+        recovery_in_prog_ = false;
+        if (bg_error_stats_ != nullptr) {
+          RecordInHistogram(bg_error_stats_.get(),
+                            ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count);
+        }
+        EventHelpers::NotifyOnErrorRecoveryEnd(
+            db_options_.listeners, bg_error_,
+            !recovery_io_error_.ok()
+                ? recovery_io_error_
+                : (!recovery_error_.ok() ? recovery_error_ : s),
+            db_mutex_);
+        return;
+      }
+    }
+    resume_count--;
+  }
+  recovery_in_prog_ = false;
+  EventHelpers::NotifyOnErrorRecoveryEnd(
+      db_options_.listeners, bg_error_,
+      Status::Aborted("Exceeded resume retry count"), db_mutex_);
+  TEST_SYNC_POINT("RecoverFromRetryableBGIOError:LoopOut");
+  if (bg_error_stats_ != nullptr) {
+    RecordInHistogram(bg_error_stats_.get(),
+                      ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count);
+  }
+  return;
+}
+
+void ErrorHandler::CheckAndSetRecoveryAndBGError(const Status& bg_err) {
+  if (recovery_in_prog_ && recovery_error_.ok()) {
+    recovery_error_ = bg_err;
+  }
+  if (bg_err.severity() > bg_error_.severity()) {
+    bg_error_ = bg_err;
+  }
+  if (bg_error_.severity() >= Status::Severity::kHardError) {
+    is_db_stopped_.store(true, std::memory_order_release);
+  }
+  return;
+}
+
+void ErrorHandler::EndAutoRecovery() {
+  db_mutex_->AssertHeld();
+  if (!end_recovery_) {
+    end_recovery_ = true;
+  }
+  cv_.SignalAll();
+  db_mutex_->Unlock();
+  if (recovery_thread_) {
+    recovery_thread_->join();
+  }
+  db_mutex_->Lock();
+  return;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/error_handler.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/error_handler.h
new file mode 100644
index 0000000000..34e08a525d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/error_handler.h
@@ -0,0 +1,124 @@
+//  Copyright (c) 2018-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include "monitoring/instrumented_mutex.h"
+#include "options/db_options.h"
+#include "rocksdb/io_status.h"
+#include "rocksdb/listener.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class DBImpl;
+
+// This structure is used to store the DB recovery context. The context is
+// the information that related to the recover actions. For example, it contains
+// FlushReason, which tells the flush job why this flush is called.
+struct DBRecoverContext {
+  FlushReason flush_reason;
+
+  DBRecoverContext() : flush_reason(FlushReason::kErrorRecovery) {}
+
+  DBRecoverContext(FlushReason reason) : flush_reason(reason) {}
+};
+
+class ErrorHandler {
+ public:
+  ErrorHandler(DBImpl* db, const ImmutableDBOptions& db_options,
+               InstrumentedMutex* db_mutex)
+      : db_(db),
+        db_options_(db_options),
+        cv_(db_mutex),
+        end_recovery_(false),
+        recovery_thread_(nullptr),
+        db_mutex_(db_mutex),
+        auto_recovery_(false),
+        recovery_in_prog_(false),
+        soft_error_no_bg_work_(false),
+        is_db_stopped_(false),
+        bg_error_stats_(db_options.statistics) {
+    // Clear the checked flag for uninitialized errors
+    bg_error_.PermitUncheckedError();
+    recovery_error_.PermitUncheckedError();
+    recovery_io_error_.PermitUncheckedError();
+  }
+
+  void EnableAutoRecovery() { auto_recovery_ = true; }
+
+  Status::Severity GetErrorSeverity(BackgroundErrorReason reason,
+                                    Status::Code code, Status::SubCode subcode);
+
+  const Status& SetBGError(const Status& bg_err, BackgroundErrorReason reason);
+
+  Status GetBGError() const { return bg_error_; }
+
+  Status GetRecoveryError() const { return recovery_error_; }
+
+  Status ClearBGError();
+
+  bool IsDBStopped() { return is_db_stopped_.load(std::memory_order_acquire); }
+
+  bool IsBGWorkStopped() {
+    assert(db_mutex_);
+    db_mutex_->AssertHeld();
+    return !bg_error_.ok() &&
+           (bg_error_.severity() >= Status::Severity::kHardError ||
+            !auto_recovery_ || soft_error_no_bg_work_);
+  }
+
+  bool IsSoftErrorNoBGWork() { return soft_error_no_bg_work_; }
+
+  bool IsRecoveryInProgress() { return recovery_in_prog_; }
+
+  Status RecoverFromBGError(bool is_manual = false);
+  void CancelErrorRecovery();
+
+  void EndAutoRecovery();
+
+ private:
+  DBImpl* db_;
+  const ImmutableDBOptions& db_options_;
+  Status bg_error_;
+  // A separate Status variable used to record any errors during the
+  // recovery process from hard errors
+  Status recovery_error_;
+  // A separate IO Status variable used to record any IO errors during
+  // the recovery process. At the same time, recovery_error_ is also set.
+  IOStatus recovery_io_error_;
+  // The condition variable used with db_mutex during auto resume for time
+  // wait.
+  InstrumentedCondVar cv_;
+  bool end_recovery_;
+  std::unique_ptr<port::Thread> recovery_thread_;
+
+  InstrumentedMutex* db_mutex_;
+  // A flag indicating whether automatic recovery from errors is enabled
+  bool auto_recovery_;
+  bool recovery_in_prog_;
+  // A flag to indicate that for the soft error, we should not allow any
+  // background work except the work is from recovery.
+  bool soft_error_no_bg_work_;
+
+  // Used to store the context for recover, such as flush reason.
+  DBRecoverContext recover_context_;
+  std::atomic<bool> is_db_stopped_;
+
+  // The pointer of DB statistics.
+  std::shared_ptr<Statistics> bg_error_stats_;
+
+  const Status& HandleKnownErrors(const Status& bg_err,
+                                  BackgroundErrorReason reason);
+  Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
+  void RecoverFromNoSpace();
+  const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
+  void RecoverFromRetryableBGIOError();
+  // First, if it is in recovery and the recovery_error is ok. Set the
+  // recovery_error_ to bg_err. Second, if the severity is higher than the
+  // current bg_error_, overwrite it.
+  void CheckAndSetRecoveryAndBGError(const Status& bg_err);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/event_helpers.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/event_helpers.cc
new file mode 100644
index 0000000000..4360144ece
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/event_helpers.cc
@@ -0,0 +1,323 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/event_helpers.h"
+
+#include "rocksdb/convenience.h"
+#include "rocksdb/listener.h"
+#include "rocksdb/utilities/customizable_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+Status EventListener::CreateFromString(const ConfigOptions& config_options,
+                                       const std::string& id,
+                                       std::shared_ptr<EventListener>* result) {
+  return LoadSharedObject<EventListener>(config_options, id, result);
+}
+
+namespace {
+template <class T>
+inline T SafeDivide(T a, T b) {
+  return b == 0 ? 0 : a / b;
+}
+}  // anonymous namespace
+
+void EventHelpers::AppendCurrentTime(JSONWriter* jwriter) {
+  *jwriter << "time_micros"
+           << std::chrono::duration_cast<std::chrono::microseconds>(
+                  std::chrono::system_clock::now().time_since_epoch())
+                  .count();
+}
+
+void EventHelpers::NotifyTableFileCreationStarted(
+    const std::vector<std::shared_ptr<EventListener>>& listeners,
+    const std::string& db_name, const std::string& cf_name,
+    const std::string& file_path, int job_id, TableFileCreationReason reason) {
+  if (listeners.empty()) {
+    return;
+  }
+  TableFileCreationBriefInfo info;
+  info.db_name = db_name;
+  info.cf_name = cf_name;
+  info.file_path = file_path;
+  info.job_id = job_id;
+  info.reason = reason;
+  for (auto& listener : listeners) {
+    listener->OnTableFileCreationStarted(info);
+  }
+}
+
+void EventHelpers::NotifyOnBackgroundError(
+    const std::vector<std::shared_ptr<EventListener>>& listeners,
+    BackgroundErrorReason reason, Status* bg_error, InstrumentedMutex* db_mutex,
+    bool* auto_recovery) {
+  if (listeners.empty()) {
+    return;
+  }
+  db_mutex->AssertHeld();
+  // release lock while notifying events
+  db_mutex->Unlock();
+  for (auto& listener : listeners) {
+    listener->OnBackgroundError(reason, bg_error);
+    bg_error->PermitUncheckedError();
+    if (*auto_recovery) {
+      listener->OnErrorRecoveryBegin(reason, *bg_error, auto_recovery);
+    }
+  }
+  db_mutex->Lock();
+}
+
+void EventHelpers::LogAndNotifyTableFileCreationFinished(
+    EventLogger* event_logger,
+    const std::vector<std::shared_ptr<EventListener>>& listeners,
+    const std::string& db_name, const std::string& cf_name,
+    const std::string& file_path, int job_id, const FileDescriptor& fd,
+    uint64_t oldest_blob_file_number, const TableProperties& table_properties,
+    TableFileCreationReason reason, const Status& s,
+    const std::string& file_checksum,
+    const std::string& file_checksum_func_name) {
+  if (s.ok() && event_logger) {
+    JSONWriter jwriter;
+    AppendCurrentTime(&jwriter);
+    jwriter << "cf_name" << cf_name << "job" << job_id << "event"
+            << "table_file_creation"
+            << "file_number" << fd.GetNumber() << "file_size"
+            << fd.GetFileSize() << "file_checksum"
+            << Slice(file_checksum).ToString(true) << "file_checksum_func_name"
+            << file_checksum_func_name << "smallest_seqno" << fd.smallest_seqno
+            << "largest_seqno" << fd.largest_seqno;
+
+    // table_properties
+    {
+      jwriter << "table_properties";
+      jwriter.StartObject();
+
+      // basic properties:
+      jwriter << "data_size" << table_properties.data_size << "index_size"
+              << table_properties.index_size << "index_partitions"
+              << table_properties.index_partitions << "top_level_index_size"
+              << table_properties.top_level_index_size
+              << "index_key_is_user_key"
+              << table_properties.index_key_is_user_key
+              << "index_value_is_delta_encoded"
+              << table_properties.index_value_is_delta_encoded << "filter_size"
+              << table_properties.filter_size << "raw_key_size"
+              << table_properties.raw_key_size << "raw_average_key_size"
+              << SafeDivide(table_properties.raw_key_size,
+                            table_properties.num_entries)
+              << "raw_value_size" << table_properties.raw_value_size
+              << "raw_average_value_size"
+              << SafeDivide(table_properties.raw_value_size,
+                            table_properties.num_entries)
+              << "num_data_blocks" << table_properties.num_data_blocks
+              << "num_entries" << table_properties.num_entries
+              << "num_filter_entries" << table_properties.num_filter_entries
+              << "num_deletions" << table_properties.num_deletions
+              << "num_merge_operands" << table_properties.num_merge_operands
+              << "num_range_deletions" << table_properties.num_range_deletions
+              << "format_version" << table_properties.format_version
+              << "fixed_key_len" << table_properties.fixed_key_len
+              << "filter_policy" << table_properties.filter_policy_name
+              << "column_family_name" << table_properties.column_family_name
+              << "column_family_id" << table_properties.column_family_id
+              << "comparator" << table_properties.comparator_name
+              << "merge_operator" << table_properties.merge_operator_name
+              << "prefix_extractor_name"
+              << table_properties.prefix_extractor_name << "property_collectors"
+              << table_properties.property_collectors_names << "compression"
+              << table_properties.compression_name << "compression_options"
+              << table_properties.compression_options << "creation_time"
+              << table_properties.creation_time << "oldest_key_time"
+              << table_properties.oldest_key_time << "file_creation_time"
+              << table_properties.file_creation_time
+              << "slow_compression_estimated_data_size"
+              << table_properties.slow_compression_estimated_data_size
+              << "fast_compression_estimated_data_size"
+              << table_properties.fast_compression_estimated_data_size
+              << "db_id" << table_properties.db_id << "db_session_id"
+              << table_properties.db_session_id << "orig_file_number"
+              << table_properties.orig_file_number << "seqno_to_time_mapping";
+
+      if (table_properties.seqno_to_time_mapping.empty()) {
+        jwriter << "N/A";
+      } else {
+        SeqnoToTimeMapping tmp;
+        Status status = tmp.Add(table_properties.seqno_to_time_mapping);
+        if (status.ok()) {
+          jwriter << tmp.ToHumanString();
+        } else {
+          jwriter << "Invalid";
+        }
+      }
+
+      // user collected properties
+      for (const auto& prop : table_properties.readable_properties) {
+        jwriter << prop.first << prop.second;
+      }
+      jwriter.EndObject();
+    }
+
+    if (oldest_blob_file_number != kInvalidBlobFileNumber) {
+      jwriter << "oldest_blob_file_number" << oldest_blob_file_number;
+    }
+
+    jwriter.EndObject();
+
+    event_logger->Log(jwriter);
+  }
+
+  if (listeners.empty()) {
+    return;
+  }
+  TableFileCreationInfo info;
+  info.db_name = db_name;
+  info.cf_name = cf_name;
+  info.file_path = file_path;
+  info.file_size = fd.file_size;
+  info.job_id = job_id;
+  info.table_properties = table_properties;
+  info.reason = reason;
+  info.status = s;
+  info.file_checksum = file_checksum;
+  info.file_checksum_func_name = file_checksum_func_name;
+  for (auto& listener : listeners) {
+    listener->OnTableFileCreated(info);
+  }
+  info.status.PermitUncheckedError();
+}
+
+void EventHelpers::LogAndNotifyTableFileDeletion(
+    EventLogger* event_logger, int job_id, uint64_t file_number,
+    const std::string& file_path, const Status& status,
+    const std::string& dbname,
+    const std::vector<std::shared_ptr<EventListener>>& listeners) {
+  JSONWriter jwriter;
+  AppendCurrentTime(&jwriter);
+
+  jwriter << "job" << job_id << "event"
+          << "table_file_deletion"
+          << "file_number" << file_number;
+  if (!status.ok()) {
+    jwriter << "status" << status.ToString();
+  }
+
+  jwriter.EndObject();
+
+  event_logger->Log(jwriter);
+
+  if (listeners.empty()) {
+    return;
+  }
+  TableFileDeletionInfo info;
+  info.db_name = dbname;
+  info.job_id = job_id;
+  info.file_path = file_path;
+  info.status = status;
+  for (auto& listener : listeners) {
+    listener->OnTableFileDeleted(info);
+  }
+  info.status.PermitUncheckedError();
+}
+
+void EventHelpers::NotifyOnErrorRecoveryEnd(
+    const std::vector<std::shared_ptr<EventListener>>& listeners,
+    const Status& old_bg_error, const Status& new_bg_error,
+    InstrumentedMutex* db_mutex) {
+  if (!listeners.empty()) {
+    db_mutex->AssertHeld();
+    // release lock while notifying events
+    db_mutex->Unlock();
+    for (auto& listener : listeners) {
+      BackgroundErrorRecoveryInfo info;
+      info.old_bg_error = old_bg_error;
+      info.new_bg_error = new_bg_error;
+      listener->OnErrorRecoveryCompleted(old_bg_error);
+      listener->OnErrorRecoveryEnd(info);
+      info.old_bg_error.PermitUncheckedError();
+      info.new_bg_error.PermitUncheckedError();
+    }
+    db_mutex->Lock();
+  }
+}
+
+void EventHelpers::NotifyBlobFileCreationStarted(
+    const std::vector<std::shared_ptr<EventListener>>& listeners,
+    const std::string& db_name, const std::string& cf_name,
+    const std::string& file_path, int job_id,
+    BlobFileCreationReason creation_reason) {
+  if (listeners.empty()) {
+    return;
+  }
+  BlobFileCreationBriefInfo info(db_name, cf_name, file_path, job_id,
+                                 creation_reason);
+  for (const auto& listener : listeners) {
+    listener->OnBlobFileCreationStarted(info);
+  }
+}
+
+void EventHelpers::LogAndNotifyBlobFileCreationFinished(
+    EventLogger* event_logger,
+    const std::vector<std::shared_ptr<EventListener>>& listeners,
+    const std::string& db_name, const std::string& cf_name,
+    const std::string& file_path, int job_id, uint64_t file_number,
+    BlobFileCreationReason creation_reason, const Status& s,
+    const std::string& file_checksum,
+    const std::string& file_checksum_func_name, uint64_t total_blob_count,
+    uint64_t total_blob_bytes) {
+  if (s.ok() && event_logger) {
+    JSONWriter jwriter;
+    AppendCurrentTime(&jwriter);
+    jwriter << "cf_name" << cf_name << "job" << job_id << "event"
+            << "blob_file_creation"
+            << "file_number" << file_number << "total_blob_count"
+            << total_blob_count << "total_blob_bytes" << total_blob_bytes
+            << "file_checksum" << file_checksum << "file_checksum_func_name"
+            << file_checksum_func_name << "status" << s.ToString();
+
+    jwriter.EndObject();
+    event_logger->Log(jwriter);
+  }
+
+  if (listeners.empty()) {
+    return;
+  }
+  BlobFileCreationInfo info(db_name, cf_name, file_path, job_id,
+                            creation_reason, total_blob_count, total_blob_bytes,
+                            s, file_checksum, file_checksum_func_name);
+  for (const auto& listener : listeners) {
+    listener->OnBlobFileCreated(info);
+  }
+  info.status.PermitUncheckedError();
+}
+
+void EventHelpers::LogAndNotifyBlobFileDeletion(
+    EventLogger* event_logger,
+    const std::vector<std::shared_ptr<EventListener>>& listeners, int job_id,
+    uint64_t file_number, const std::string& file_path, const Status& status,
+    const std::string& dbname) {
+  if (event_logger) {
+    JSONWriter jwriter;
+    AppendCurrentTime(&jwriter);
+
+    jwriter << "job" << job_id << "event"
+            << "blob_file_deletion"
+            << "file_number" << file_number;
+    if (!status.ok()) {
+      jwriter << "status" << status.ToString();
+    }
+
+    jwriter.EndObject();
+    event_logger->Log(jwriter);
+  }
+  if (listeners.empty()) {
+    return;
+  }
+  BlobFileDeletionInfo info(dbname, file_path, job_id, status);
+  for (const auto& listener : listeners) {
+    listener->OnBlobFileDeleted(info);
+  }
+  info.status.PermitUncheckedError();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/event_helpers.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/event_helpers.h
new file mode 100644
index 0000000000..a1331d8a9a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/event_helpers.h
@@ -0,0 +1,78 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "db/column_family.h"
+#include "db/version_edit.h"
+#include "logging/event_logger.h"
+#include "rocksdb/listener.h"
+#include "rocksdb/table_properties.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class EventHelpers {
+ public:
+  static void AppendCurrentTime(JSONWriter* json_writer);
+  static void NotifyTableFileCreationStarted(
+      const std::vector<std::shared_ptr<EventListener>>& listeners,
+      const std::string& db_name, const std::string& cf_name,
+      const std::string& file_path, int job_id, TableFileCreationReason reason);
+  static void NotifyOnBackgroundError(
+      const std::vector<std::shared_ptr<EventListener>>& listeners,
+      BackgroundErrorReason reason, Status* bg_error,
+      InstrumentedMutex* db_mutex, bool* auto_recovery);
+  static void LogAndNotifyTableFileCreationFinished(
+      EventLogger* event_logger,
+      const std::vector<std::shared_ptr<EventListener>>& listeners,
+      const std::string& db_name, const std::string& cf_name,
+      const std::string& file_path, int job_id, const FileDescriptor& fd,
+      uint64_t oldest_blob_file_number, const TableProperties& table_properties,
+      TableFileCreationReason reason, const Status& s,
+      const std::string& file_checksum,
+      const std::string& file_checksum_func_name);
+  static void LogAndNotifyTableFileDeletion(
+      EventLogger* event_logger, int job_id, uint64_t file_number,
+      const std::string& file_path, const Status& status,
+      const std::string& db_name,
+      const std::vector<std::shared_ptr<EventListener>>& listeners);
+  static void NotifyOnErrorRecoveryEnd(
+      const std::vector<std::shared_ptr<EventListener>>& listeners,
+      const Status& old_bg_error, const Status& new_bg_error,
+      InstrumentedMutex* db_mutex);
+
+  static void NotifyBlobFileCreationStarted(
+      const std::vector<std::shared_ptr<EventListener>>& listeners,
+      const std::string& db_name, const std::string& cf_name,
+      const std::string& file_path, int job_id,
+      BlobFileCreationReason creation_reason);
+
+  static void LogAndNotifyBlobFileCreationFinished(
+      EventLogger* event_logger,
+      const std::vector<std::shared_ptr<EventListener>>& listeners,
+      const std::string& db_name, const std::string& cf_name,
+      const std::string& file_path, int job_id, uint64_t file_number,
+      BlobFileCreationReason creation_reason, const Status& s,
+      const std::string& file_checksum,
+      const std::string& file_checksum_func_name, uint64_t total_blob_count,
+      uint64_t total_blob_bytes);
+
+  static void LogAndNotifyBlobFileDeletion(
+      EventLogger* event_logger,
+      const std::vector<std::shared_ptr<EventListener>>& listeners, int job_id,
+      uint64_t file_number, const std::string& file_path, const Status& status,
+      const std::string& db_name);
+
+ private:
+  static void LogAndNotifyTableFileCreation(
+      EventLogger* event_logger,
+      const std::vector<std::shared_ptr<EventListener>>& listeners,
+      const FileDescriptor& fd, const TableFileCreationInfo& info);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/experimental.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/experimental.cc
new file mode 100644
index 0000000000..cb5fb31794
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/experimental.cc
@@ -0,0 +1,144 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/experimental.h"
+
+#include "db/db_impl/db_impl.h"
+#include "db/version_util.h"
+#include "logging/logging.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace experimental {
+
+
+Status SuggestCompactRange(DB* db, ColumnFamilyHandle* column_family,
+                           const Slice* begin, const Slice* end) {
+  if (db == nullptr) {
+    return Status::InvalidArgument("DB is empty");
+  }
+
+  return db->SuggestCompactRange(column_family, begin, end);
+}
+
+Status PromoteL0(DB* db, ColumnFamilyHandle* column_family, int target_level) {
+  if (db == nullptr) {
+    return Status::InvalidArgument("Didn't recognize DB object");
+  }
+  return db->PromoteL0(column_family, target_level);
+}
+
+
+Status SuggestCompactRange(DB* db, const Slice* begin, const Slice* end) {
+  return SuggestCompactRange(db, db->DefaultColumnFamily(), begin, end);
+}
+
+Status UpdateManifestForFilesState(
+    const DBOptions& db_opts, const std::string& db_name,
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    const UpdateManifestForFilesStateOptions& opts) {
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  OfflineManifestWriter w(db_opts, db_name);
+  Status s = w.Recover(column_families);
+
+  size_t files_updated = 0;
+  size_t cfs_updated = 0;
+  auto fs = db_opts.env->GetFileSystem();
+
+  for (auto cfd : *w.Versions().GetColumnFamilySet()) {
+    if (!s.ok()) {
+      break;
+    }
+    assert(cfd);
+
+    if (cfd->IsDropped() || !cfd->initialized()) {
+      continue;
+    }
+
+    const auto* current = cfd->current();
+    assert(current);
+
+    const auto* vstorage = current->storage_info();
+    assert(vstorage);
+
+    VersionEdit edit;
+    edit.SetColumnFamily(cfd->GetID());
+
+    /* SST files */
+    for (int level = 0; level < cfd->NumberLevels(); level++) {
+      if (!s.ok()) {
+        break;
+      }
+      const auto& level_files = vstorage->LevelFiles(level);
+
+      for (const auto& lf : level_files) {
+        assert(lf);
+
+        uint64_t number = lf->fd.GetNumber();
+        std::string fname =
+            TableFileName(w.IOptions().db_paths, number, lf->fd.GetPathId());
+
+        std::unique_ptr<FSSequentialFile> f;
+        FileOptions fopts;
+        // Use kUnknown to signal the FileSystem to search all tiers for the
+        // file.
+        fopts.temperature = Temperature::kUnknown;
+
+        IOStatus file_ios =
+            fs->NewSequentialFile(fname, fopts, &f, /*dbg*/ nullptr);
+        if (file_ios.ok()) {
+          if (opts.update_temperatures) {
+            Temperature temp = f->GetTemperature();
+            if (temp != Temperature::kUnknown && temp != lf->temperature) {
+              // Current state inconsistent with manifest
+              ++files_updated;
+              edit.DeleteFile(level, number);
+              edit.AddFile(
+                  level, number, lf->fd.GetPathId(), lf->fd.GetFileSize(),
+                  lf->smallest, lf->largest, lf->fd.smallest_seqno,
+                  lf->fd.largest_seqno, lf->marked_for_compaction, temp,
+                  lf->oldest_blob_file_number, lf->oldest_ancester_time,
+                  lf->file_creation_time, lf->epoch_number, lf->file_checksum,
+                  lf->file_checksum_func_name, lf->unique_id,
+                  lf->compensated_range_deletion_size, lf->tail_size);
+            }
+          }
+        } else {
+          s = file_ios;
+          break;
+        }
+      }
+    }
+
+    if (s.ok() && edit.NumEntries() > 0) {
+      std::unique_ptr<FSDirectory> db_dir;
+      s = fs->NewDirectory(db_name, IOOptions(), &db_dir, nullptr);
+      if (s.ok()) {
+        s = w.LogAndApply(read_options, cfd, &edit, db_dir.get());
+      }
+      if (s.ok()) {
+        ++cfs_updated;
+      }
+    }
+  }
+
+  if (cfs_updated > 0) {
+    ROCKS_LOG_INFO(db_opts.info_log,
+                   "UpdateManifestForFilesState: updated %zu files in %zu CFs",
+                   files_updated, cfs_updated);
+  } else if (s.ok()) {
+    ROCKS_LOG_INFO(db_opts.info_log,
+                   "UpdateManifestForFilesState: no updates needed");
+  }
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(db_opts.info_log, "UpdateManifestForFilesState failed: %s",
+                    s.ToString().c_str());
+  }
+
+  return s;
+}
+
+}  // namespace experimental
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/external_sst_file_ingestion_job.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/external_sst_file_ingestion_job.cc
new file mode 100644
index 0000000000..f8716e5f48
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/external_sst_file_ingestion_job.cc
@@ -0,0 +1,1104 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "db/external_sst_file_ingestion_job.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "db/db_impl/db_impl.h"
+#include "db/version_edit.h"
+#include "file/file_util.h"
+#include "file/random_access_file_reader.h"
+#include "logging/logging.h"
+#include "table/merging_iterator.h"
+#include "table/scoped_arena_iterator.h"
+#include "table/sst_file_writer_collectors.h"
+#include "table/table_builder.h"
+#include "table/unique_id_impl.h"
+#include "test_util/sync_point.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status ExternalSstFileIngestionJob::Prepare(
+    const std::vector<std::string>& external_files_paths,
+    const std::vector<std::string>& files_checksums,
+    const std::vector<std::string>& files_checksum_func_names,
+    const Temperature& file_temperature, uint64_t next_file_number,
+    SuperVersion* sv) {
+  Status status;
+
+  // Read the information of files we are ingesting
+  for (const std::string& file_path : external_files_paths) {
+    IngestedFileInfo file_to_ingest;
+    status =
+        GetIngestedFileInfo(file_path, next_file_number++, &file_to_ingest, sv);
+    if (!status.ok()) {
+      return status;
+    }
+
+    if (file_to_ingest.cf_id !=
+            TablePropertiesCollectorFactory::Context::kUnknownColumnFamily &&
+        file_to_ingest.cf_id != cfd_->GetID()) {
+      return Status::InvalidArgument(
+          "External file column family id don't match");
+    }
+
+    if (file_to_ingest.num_entries == 0 &&
+        file_to_ingest.num_range_deletions == 0) {
+      return Status::InvalidArgument("File contain no entries");
+    }
+
+    if (!file_to_ingest.smallest_internal_key.Valid() ||
+        !file_to_ingest.largest_internal_key.Valid()) {
+      return Status::Corruption("Generated table have corrupted keys");
+    }
+
+    files_to_ingest_.emplace_back(std::move(file_to_ingest));
+  }
+
+  const Comparator* ucmp = cfd_->internal_comparator().user_comparator();
+  auto num_files = files_to_ingest_.size();
+  if (num_files == 0) {
+    return Status::InvalidArgument("The list of files is empty");
+  } else if (num_files > 1) {
+    // Verify that passed files don't have overlapping ranges
+    autovector<const IngestedFileInfo*> sorted_files;
+    for (size_t i = 0; i < num_files; i++) {
+      sorted_files.push_back(&files_to_ingest_[i]);
+    }
+
+    std::sort(
+        sorted_files.begin(), sorted_files.end(),
+        [&ucmp](const IngestedFileInfo* info1, const IngestedFileInfo* info2) {
+          return sstableKeyCompare(ucmp, info1->smallest_internal_key,
+                                   info2->smallest_internal_key) < 0;
+        });
+
+    for (size_t i = 0; i + 1 < num_files; i++) {
+      if (sstableKeyCompare(ucmp, sorted_files[i]->largest_internal_key,
+                            sorted_files[i + 1]->smallest_internal_key) >= 0) {
+        files_overlap_ = true;
+        break;
+      }
+    }
+  }
+
+  // Hanlde the file temperature
+  for (size_t i = 0; i < num_files; i++) {
+    files_to_ingest_[i].file_temperature = file_temperature;
+  }
+
+  if (ingestion_options_.ingest_behind && files_overlap_) {
+    return Status::NotSupported("Files have overlapping ranges");
+  }
+
+  // Copy/Move external files into DB
+  std::unordered_set<size_t> ingestion_path_ids;
+  for (IngestedFileInfo& f : files_to_ingest_) {
+    f.copy_file = false;
+    const std::string path_outside_db = f.external_file_path;
+    const std::string path_inside_db = TableFileName(
+        cfd_->ioptions()->cf_paths, f.fd.GetNumber(), f.fd.GetPathId());
+    if (ingestion_options_.move_files) {
+      status =
+          fs_->LinkFile(path_outside_db, path_inside_db, IOOptions(), nullptr);
+      if (status.ok()) {
+        // It is unsafe to assume application had sync the file and file
+        // directory before ingest the file. For integrity of RocksDB we need
+        // to sync the file.
+        std::unique_ptr<FSWritableFile> file_to_sync;
+        Status s = fs_->ReopenWritableFile(path_inside_db, env_options_,
+                                           &file_to_sync, nullptr);
+        TEST_SYNC_POINT_CALLBACK("ExternalSstFileIngestionJob::Prepare:Reopen",
+                                 &s);
+        // Some file systems (especially remote/distributed) don't support
+        // reopening a file for writing and don't require reopening and
+        // syncing the file. Ignore the NotSupported error in that case.
+        if (!s.IsNotSupported()) {
+          status = s;
+          if (status.ok()) {
+            TEST_SYNC_POINT(
+                "ExternalSstFileIngestionJob::BeforeSyncIngestedFile");
+            status = SyncIngestedFile(file_to_sync.get());
+            TEST_SYNC_POINT(
+                "ExternalSstFileIngestionJob::AfterSyncIngestedFile");
+            if (!status.ok()) {
+              ROCKS_LOG_WARN(db_options_.info_log,
+                             "Failed to sync ingested file %s: %s",
+                             path_inside_db.c_str(), status.ToString().c_str());
+            }
+          }
+        }
+      } else if (status.IsNotSupported() &&
+                 ingestion_options_.failed_move_fall_back_to_copy) {
+        // Original file is on a different FS, use copy instead of hard linking.
+        f.copy_file = true;
+        ROCKS_LOG_INFO(db_options_.info_log,
+                       "Triy to link file %s but it's not supported : %s",
+                       path_outside_db.c_str(), status.ToString().c_str());
+      }
+    } else {
+      f.copy_file = true;
+    }
+
+    if (f.copy_file) {
+      TEST_SYNC_POINT_CALLBACK("ExternalSstFileIngestionJob::Prepare:CopyFile",
+                               nullptr);
+      // CopyFile also sync the new file.
+      status =
+          CopyFile(fs_.get(), path_outside_db, path_inside_db, 0,
+                   db_options_.use_fsync, io_tracer_, Temperature::kUnknown);
+    }
+    TEST_SYNC_POINT("ExternalSstFileIngestionJob::Prepare:FileAdded");
+    if (!status.ok()) {
+      break;
+    }
+    f.internal_file_path = path_inside_db;
+    // Initialize the checksum information of ingested files.
+    f.file_checksum = kUnknownFileChecksum;
+    f.file_checksum_func_name = kUnknownFileChecksumFuncName;
+    ingestion_path_ids.insert(f.fd.GetPathId());
+  }
+
+  TEST_SYNC_POINT("ExternalSstFileIngestionJob::BeforeSyncDir");
+  if (status.ok()) {
+    for (auto path_id : ingestion_path_ids) {
+      status = directories_->GetDataDir(path_id)->FsyncWithDirOptions(
+          IOOptions(), nullptr,
+          DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
+      if (!status.ok()) {
+        ROCKS_LOG_WARN(db_options_.info_log,
+                       "Failed to sync directory %" ROCKSDB_PRIszt
+                       " while ingest file: %s",
+                       path_id, status.ToString().c_str());
+        break;
+      }
+    }
+  }
+  TEST_SYNC_POINT("ExternalSstFileIngestionJob::AfterSyncDir");
+
+  // Generate and check the sst file checksum. Note that, if
+  // IngestExternalFileOptions::write_global_seqno is true, we will not update
+  // the checksum information in the files_to_ingests_ here, since the file is
+  // upadted with the new global_seqno. After global_seqno is updated, DB will
+  // generate the new checksum and store it in the Manifest. In all other cases
+  // if ingestion_options_.write_global_seqno == true and
+  // verify_file_checksum is false, we only check the checksum function name.
+  if (status.ok() && db_options_.file_checksum_gen_factory != nullptr) {
+    if (ingestion_options_.verify_file_checksum == false &&
+        files_checksums.size() == files_to_ingest_.size() &&
+        files_checksum_func_names.size() == files_to_ingest_.size()) {
+      // Only when verify_file_checksum == false and the checksum for ingested
+      // files are provided, DB will use the provided checksum and does not
+      // generate the checksum for ingested files.
+      need_generate_file_checksum_ = false;
+    } else {
+      need_generate_file_checksum_ = true;
+    }
+    FileChecksumGenContext gen_context;
+    std::unique_ptr<FileChecksumGenerator> file_checksum_gen =
+        db_options_.file_checksum_gen_factory->CreateFileChecksumGenerator(
+            gen_context);
+    std::vector<std::string> generated_checksums;
+    std::vector<std::string> generated_checksum_func_names;
+    // Step 1: generate the checksum for ingested sst file.
+    if (need_generate_file_checksum_) {
+      for (size_t i = 0; i < files_to_ingest_.size(); i++) {
+        std::string generated_checksum;
+        std::string generated_checksum_func_name;
+        std::string requested_checksum_func_name;
+        // TODO: rate limit file reads for checksum calculation during file
+        // ingestion.
+        IOStatus io_s = GenerateOneFileChecksum(
+            fs_.get(), files_to_ingest_[i].internal_file_path,
+            db_options_.file_checksum_gen_factory.get(),
+            requested_checksum_func_name, &generated_checksum,
+            &generated_checksum_func_name,
+            ingestion_options_.verify_checksums_readahead_size,
+            db_options_.allow_mmap_reads, io_tracer_,
+            db_options_.rate_limiter.get(),
+            Env::IO_TOTAL /* rate_limiter_priority */);
+        if (!io_s.ok()) {
+          status = io_s;
+          ROCKS_LOG_WARN(db_options_.info_log,
+                         "Sst file checksum generation of file: %s failed: %s",
+                         files_to_ingest_[i].internal_file_path.c_str(),
+                         status.ToString().c_str());
+          break;
+        }
+        if (ingestion_options_.write_global_seqno == false) {
+          files_to_ingest_[i].file_checksum = generated_checksum;
+          files_to_ingest_[i].file_checksum_func_name =
+              generated_checksum_func_name;
+        }
+        generated_checksums.push_back(generated_checksum);
+        generated_checksum_func_names.push_back(generated_checksum_func_name);
+      }
+    }
+
+    // Step 2: based on the verify_file_checksum and ingested checksum
+    // information, do the verification.
+    if (status.ok()) {
+      if (files_checksums.size() == files_to_ingest_.size() &&
+          files_checksum_func_names.size() == files_to_ingest_.size()) {
+        // Verify the checksum and checksum function name.
+        if (ingestion_options_.verify_file_checksum) {
+          for (size_t i = 0; i < files_to_ingest_.size(); i++) {
+            if (files_checksum_func_names[i] !=
+                generated_checksum_func_names[i]) {
+              status = Status::InvalidArgument(
+                  "Checksum function name does not match with the checksum "
+                  "function name of this DB");
+              ROCKS_LOG_WARN(
+                  db_options_.info_log,
+                  "Sst file checksum verification of file: %s failed: %s",
+                  external_files_paths[i].c_str(), status.ToString().c_str());
+              break;
+            }
+            if (files_checksums[i] != generated_checksums[i]) {
+              status = Status::Corruption(
+                  "Ingested checksum does not match with the generated "
+                  "checksum");
+              ROCKS_LOG_WARN(
+                  db_options_.info_log,
+                  "Sst file checksum verification of file: %s failed: %s",
+                  files_to_ingest_[i].internal_file_path.c_str(),
+                  status.ToString().c_str());
+              break;
+            }
+          }
+        } else {
+          // If verify_file_checksum is not enabled, we only verify the
+          // checksum function name. If it does not match, fail the ingestion.
+          // If matches, we trust the ingested checksum information and store
+          // in the Manifest.
+          for (size_t i = 0; i < files_to_ingest_.size(); i++) {
+            if (files_checksum_func_names[i] != file_checksum_gen->Name()) {
+              status = Status::InvalidArgument(
+                  "Checksum function name does not match with the checksum "
+                  "function name of this DB");
+              ROCKS_LOG_WARN(
+                  db_options_.info_log,
+                  "Sst file checksum verification of file: %s failed: %s",
+                  external_files_paths[i].c_str(), status.ToString().c_str());
+              break;
+            }
+            files_to_ingest_[i].file_checksum = files_checksums[i];
+            files_to_ingest_[i].file_checksum_func_name =
+                files_checksum_func_names[i];
+          }
+        }
+      } else if (files_checksums.size() != files_checksum_func_names.size() ||
+                 (files_checksums.size() == files_checksum_func_names.size() &&
+                  files_checksums.size() != 0)) {
+        // The checksum or checksum function name vector are not both empty
+        // and they are incomplete.
+        status = Status::InvalidArgument(
+            "The checksum information of ingested sst files are nonempty and "
+            "the size of checksums or the size of the checksum function "
+            "names "
+            "does not match with the number of ingested sst files");
+        ROCKS_LOG_WARN(
+            db_options_.info_log,
+            "The ingested sst files checksum information is incomplete: %s",
+            status.ToString().c_str());
+      }
+    }
+  }
+
+  // TODO: The following is duplicated with Cleanup().
+  if (!status.ok()) {
+    IOOptions io_opts;
+    // We failed, remove all files that we copied into the db
+    for (IngestedFileInfo& f : files_to_ingest_) {
+      if (f.internal_file_path.empty()) {
+        continue;
+      }
+      Status s = fs_->DeleteFile(f.internal_file_path, io_opts, nullptr);
+      if (!s.ok()) {
+        ROCKS_LOG_WARN(db_options_.info_log,
+                       "AddFile() clean up for file %s failed : %s",
+                       f.internal_file_path.c_str(), s.ToString().c_str());
+      }
+    }
+  }
+
+  return status;
+}
+
+Status ExternalSstFileIngestionJob::NeedsFlush(bool* flush_needed,
+                                               SuperVersion* super_version) {
+  autovector<Range> ranges;
+  autovector<std::string> keys;
+  size_t ts_sz = cfd_->user_comparator()->timestamp_size();
+  if (ts_sz) {
+    // Check all ranges [begin, end] inclusively. Add maximum
+    // timestamp to include all `begin` keys, and add minimal timestamp to
+    // include all `end` keys.
+    for (const IngestedFileInfo& file_to_ingest : files_to_ingest_) {
+      std::string begin_str;
+      std::string end_str;
+      AppendUserKeyWithMaxTimestamp(
+          &begin_str, file_to_ingest.smallest_internal_key.user_key(), ts_sz);
+      AppendKeyWithMinTimestamp(
+          &end_str, file_to_ingest.largest_internal_key.user_key(), ts_sz);
+      keys.emplace_back(std::move(begin_str));
+      keys.emplace_back(std::move(end_str));
+    }
+    for (size_t i = 0; i < files_to_ingest_.size(); ++i) {
+      ranges.emplace_back(keys[2 * i], keys[2 * i + 1]);
+    }
+  } else {
+    for (const IngestedFileInfo& file_to_ingest : files_to_ingest_) {
+      ranges.emplace_back(file_to_ingest.smallest_internal_key.user_key(),
+                          file_to_ingest.largest_internal_key.user_key());
+    }
+  }
+  Status status = cfd_->RangesOverlapWithMemtables(
+      ranges, super_version, db_options_.allow_data_in_errors, flush_needed);
+  if (status.ok() && *flush_needed &&
+      !ingestion_options_.allow_blocking_flush) {
+    status = Status::InvalidArgument("External file requires flush");
+  }
+  return status;
+}
+
+// REQUIRES: we have become the only writer by entering both write_thread_ and
+// nonmem_write_thread_
+Status ExternalSstFileIngestionJob::Run() {
+  Status status;
+  SuperVersion* super_version = cfd_->GetSuperVersion();
+#ifndef NDEBUG
+  // We should never run the job with a memtable that is overlapping
+  // with the files we are ingesting
+  bool need_flush = false;
+  status = NeedsFlush(&need_flush, super_version);
+  if (!status.ok()) {
+    return status;
+  }
+  if (need_flush) {
+    return Status::TryAgain();
+  }
+  assert(status.ok() && need_flush == false);
+#endif
+
+  bool force_global_seqno = false;
+
+  if (ingestion_options_.snapshot_consistency && !db_snapshots_->empty()) {
+    // We need to assign a global sequence number to all the files even
+    // if the don't overlap with any ranges since we have snapshots
+    force_global_seqno = true;
+  }
+  // It is safe to use this instead of LastAllocatedSequence since we are
+  // the only active writer, and hence they are equal
+  SequenceNumber last_seqno = versions_->LastSequence();
+  edit_.SetColumnFamily(cfd_->GetID());
+  // The levels that the files will be ingested into
+
+  for (IngestedFileInfo& f : files_to_ingest_) {
+    SequenceNumber assigned_seqno = 0;
+    if (ingestion_options_.ingest_behind) {
+      status = CheckLevelForIngestedBehindFile(&f);
+    } else {
+      status = AssignLevelAndSeqnoForIngestedFile(
+          super_version, force_global_seqno, cfd_->ioptions()->compaction_style,
+          last_seqno, &f, &assigned_seqno);
+    }
+
+    // Modify the smallest/largest internal key to include the sequence number
+    // that we just learned. Only overwrite sequence number zero. There could
+    // be a nonzero sequence number already to indicate a range tombstone's
+    // exclusive endpoint.
+    ParsedInternalKey smallest_parsed, largest_parsed;
+    if (status.ok()) {
+      status = ParseInternalKey(*f.smallest_internal_key.rep(),
+                                &smallest_parsed, false /* log_err_key */);
+    }
+    if (status.ok()) {
+      status = ParseInternalKey(*f.largest_internal_key.rep(), &largest_parsed,
+                                false /* log_err_key */);
+    }
+    if (!status.ok()) {
+      return status;
+    }
+    if (smallest_parsed.sequence == 0) {
+      UpdateInternalKey(f.smallest_internal_key.rep(), assigned_seqno,
+                        smallest_parsed.type);
+    }
+    if (largest_parsed.sequence == 0) {
+      UpdateInternalKey(f.largest_internal_key.rep(), assigned_seqno,
+                        largest_parsed.type);
+    }
+
+    status = AssignGlobalSeqnoForIngestedFile(&f, assigned_seqno);
+    TEST_SYNC_POINT_CALLBACK("ExternalSstFileIngestionJob::Run",
+                             &assigned_seqno);
+    if (assigned_seqno > last_seqno) {
+      assert(assigned_seqno == last_seqno + 1);
+      last_seqno = assigned_seqno;
+      ++consumed_seqno_count_;
+    }
+    if (!status.ok()) {
+      return status;
+    }
+
+    status = GenerateChecksumForIngestedFile(&f);
+    if (!status.ok()) {
+      return status;
+    }
+
+    // We use the import time as the ancester time. This is the time the data
+    // is written to the database.
+    int64_t temp_current_time = 0;
+    uint64_t current_time = kUnknownFileCreationTime;
+    uint64_t oldest_ancester_time = kUnknownOldestAncesterTime;
+    if (clock_->GetCurrentTime(&temp_current_time).ok()) {
+      current_time = oldest_ancester_time =
+          static_cast<uint64_t>(temp_current_time);
+    }
+    uint64_t tail_size = 0;
+    bool contain_no_data_blocks = f.table_properties.num_entries > 0 &&
+                                  (f.table_properties.num_entries ==
+                                   f.table_properties.num_range_deletions);
+    if (f.table_properties.tail_start_offset > 0 || contain_no_data_blocks) {
+      uint64_t file_size = f.fd.GetFileSize();
+      assert(f.table_properties.tail_start_offset <= file_size);
+      tail_size = file_size - f.table_properties.tail_start_offset;
+    }
+
+    FileMetaData f_metadata(
+        f.fd.GetNumber(), f.fd.GetPathId(), f.fd.GetFileSize(),
+        f.smallest_internal_key, f.largest_internal_key, f.assigned_seqno,
+        f.assigned_seqno, false, f.file_temperature, kInvalidBlobFileNumber,
+        oldest_ancester_time, current_time,
+        ingestion_options_.ingest_behind
+            ? kReservedEpochNumberForFileIngestedBehind
+            : cfd_->NewEpochNumber(),
+        f.file_checksum, f.file_checksum_func_name, f.unique_id, 0, tail_size);
+    f_metadata.temperature = f.file_temperature;
+    edit_.AddFile(f.picked_level, f_metadata);
+  }
+
+  CreateEquivalentFileIngestingCompactions();
+  return status;
+}
+
+void ExternalSstFileIngestionJob::CreateEquivalentFileIngestingCompactions() {
+  // A map from output level to input of compactions equivalent to this
+  // ingestion job.
+  // TODO: simplify below logic to creating compaction per ingested file
+  // instead of per output level, once we figure out how to treat ingested files
+  // with adjacent range deletion tombstones to same output level in the same
+  // job as non-overlapping compactions.
+  std::map<int, CompactionInputFiles>
+      output_level_to_file_ingesting_compaction_input;
+
+  for (const auto& pair : edit_.GetNewFiles()) {
+    int output_level = pair.first;
+    const FileMetaData& f_metadata = pair.second;
+
+    CompactionInputFiles& input =
+        output_level_to_file_ingesting_compaction_input[output_level];
+    if (input.files.empty()) {
+      // Treat the source level of ingested files to be level 0
+      input.level = 0;
+    }
+
+    compaction_input_metdatas_.push_back(new FileMetaData(f_metadata));
+    input.files.push_back(compaction_input_metdatas_.back());
+  }
+
+  for (const auto& pair : output_level_to_file_ingesting_compaction_input) {
+    int output_level = pair.first;
+    const CompactionInputFiles& input = pair.second;
+
+    const auto& mutable_cf_options = *(cfd_->GetLatestMutableCFOptions());
+    file_ingesting_compactions_.push_back(new Compaction(
+        cfd_->current()->storage_info(), *cfd_->ioptions(), mutable_cf_options,
+        mutable_db_options_, {input}, output_level,
+        MaxFileSizeForLevel(
+            mutable_cf_options, output_level,
+            cfd_->ioptions()->compaction_style) /* output file size
+            limit,
+                                                 * not applicable
+                                                 */
+        ,
+        LLONG_MAX /* max compaction bytes, not applicable */,
+        0 /* output path ID, not applicable */, mutable_cf_options.compression,
+        mutable_cf_options.compression_opts, Temperature::kUnknown,
+        0 /* max_subcompaction, not applicable */,
+        {} /* grandparents, not applicable */, false /* is manual */,
+        "" /* trim_ts */, -1 /* score, not applicable */,
+        false /* is deletion compaction, not applicable */,
+        files_overlap_ /* l0_files_might_overlap, not applicable */,
+        CompactionReason::kExternalSstIngestion));
+  }
+}
+
+void ExternalSstFileIngestionJob::RegisterRange() {
+  for (const auto& c : file_ingesting_compactions_) {
+    cfd_->compaction_picker()->RegisterCompaction(c);
+  }
+}
+
+void ExternalSstFileIngestionJob::UnregisterRange() {
+  for (const auto& c : file_ingesting_compactions_) {
+    cfd_->compaction_picker()->UnregisterCompaction(c);
+    delete c;
+  }
+  file_ingesting_compactions_.clear();
+
+  for (const auto& f : compaction_input_metdatas_) {
+    delete f;
+  }
+  compaction_input_metdatas_.clear();
+}
+
+void ExternalSstFileIngestionJob::UpdateStats() {
+  // Update internal stats for new ingested files
+  uint64_t total_keys = 0;
+  uint64_t total_l0_files = 0;
+  uint64_t total_time = clock_->NowMicros() - job_start_time_;
+
+  EventLoggerStream stream = event_logger_->Log();
+  stream << "event"
+         << "ingest_finished";
+  stream << "files_ingested";
+  stream.StartArray();
+
+  for (IngestedFileInfo& f : files_to_ingest_) {
+    InternalStats::CompactionStats stats(
+        CompactionReason::kExternalSstIngestion, 1);
+    stats.micros = total_time;
+    // If actual copy occurred for this file, then we need to count the file
+    // size as the actual bytes written. If the file was linked, then we ignore
+    // the bytes written for file metadata.
+    // TODO (yanqin) maybe account for file metadata bytes for exact accuracy?
+    if (f.copy_file) {
+      stats.bytes_written = f.fd.GetFileSize();
+    } else {
+      stats.bytes_moved = f.fd.GetFileSize();
+    }
+    stats.num_output_files = 1;
+    cfd_->internal_stats()->AddCompactionStats(f.picked_level,
+                                               Env::Priority::USER, stats);
+    cfd_->internal_stats()->AddCFStats(InternalStats::BYTES_INGESTED_ADD_FILE,
+                                       f.fd.GetFileSize());
+    total_keys += f.num_entries;
+    if (f.picked_level == 0) {
+      total_l0_files += 1;
+    }
+    ROCKS_LOG_INFO(
+        db_options_.info_log,
+        "[AddFile] External SST file %s was ingested in L%d with path %s "
+        "(global_seqno=%" PRIu64 ")\n",
+        f.external_file_path.c_str(), f.picked_level,
+        f.internal_file_path.c_str(), f.assigned_seqno);
+    stream << "file" << f.internal_file_path << "level" << f.picked_level;
+  }
+  stream.EndArray();
+
+  stream << "lsm_state";
+  stream.StartArray();
+  auto vstorage = cfd_->current()->storage_info();
+  for (int level = 0; level < vstorage->num_levels(); ++level) {
+    stream << vstorage->NumLevelFiles(level);
+  }
+  stream.EndArray();
+
+  cfd_->internal_stats()->AddCFStats(InternalStats::INGESTED_NUM_KEYS_TOTAL,
+                                     total_keys);
+  cfd_->internal_stats()->AddCFStats(InternalStats::INGESTED_NUM_FILES_TOTAL,
+                                     files_to_ingest_.size());
+  cfd_->internal_stats()->AddCFStats(
+      InternalStats::INGESTED_LEVEL0_NUM_FILES_TOTAL, total_l0_files);
+}
+
+void ExternalSstFileIngestionJob::Cleanup(const Status& status) {
+  IOOptions io_opts;
+  if (!status.ok()) {
+    // We failed to add the files to the database
+    // remove all the files we copied
+    for (IngestedFileInfo& f : files_to_ingest_) {
+      if (f.internal_file_path.empty()) {
+        continue;
+      }
+      Status s = fs_->DeleteFile(f.internal_file_path, io_opts, nullptr);
+      if (!s.ok()) {
+        ROCKS_LOG_WARN(db_options_.info_log,
+                       "AddFile() clean up for file %s failed : %s",
+                       f.internal_file_path.c_str(), s.ToString().c_str());
+      }
+    }
+    consumed_seqno_count_ = 0;
+    files_overlap_ = false;
+  } else if (status.ok() && ingestion_options_.move_files) {
+    // The files were moved and added successfully, remove original file links
+    for (IngestedFileInfo& f : files_to_ingest_) {
+      Status s = fs_->DeleteFile(f.external_file_path, io_opts, nullptr);
+      if (!s.ok()) {
+        ROCKS_LOG_WARN(
+            db_options_.info_log,
+            "%s was added to DB successfully but failed to remove original "
+            "file link : %s",
+            f.external_file_path.c_str(), s.ToString().c_str());
+      }
+    }
+  }
+}
+
+Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
+    const std::string& external_file, uint64_t new_file_number,
+    IngestedFileInfo* file_to_ingest, SuperVersion* sv) {
+  file_to_ingest->external_file_path = external_file;
+
+  // Get external file size
+  Status status = fs_->GetFileSize(external_file, IOOptions(),
+                                   &file_to_ingest->file_size, nullptr);
+  if (!status.ok()) {
+    return status;
+  }
+
+  // Assign FD with number
+  file_to_ingest->fd =
+      FileDescriptor(new_file_number, 0, file_to_ingest->file_size);
+
+  // Create TableReader for external file
+  std::unique_ptr<TableReader> table_reader;
+  std::unique_ptr<FSRandomAccessFile> sst_file;
+  std::unique_ptr<RandomAccessFileReader> sst_file_reader;
+
+  status =
+      fs_->NewRandomAccessFile(external_file, env_options_, &sst_file, nullptr);
+  if (!status.ok()) {
+    return status;
+  }
+  sst_file_reader.reset(new RandomAccessFileReader(
+      std::move(sst_file), external_file, nullptr /*Env*/, io_tracer_));
+
+  status = cfd_->ioptions()->table_factory->NewTableReader(
+      TableReaderOptions(
+          *cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor,
+          env_options_, cfd_->internal_comparator(),
+          sv->mutable_cf_options.block_protection_bytes_per_key,
+          /*skip_filters*/ false, /*immortal*/ false,
+          /*force_direct_prefetch*/ false, /*level*/ -1,
+          /*block_cache_tracer*/ nullptr,
+          /*max_file_size_for_l0_meta_pin*/ 0, versions_->DbSessionId(),
+          /*cur_file_num*/ new_file_number),
+      std::move(sst_file_reader), file_to_ingest->file_size, &table_reader);
+  if (!status.ok()) {
+    return status;
+  }
+
+  if (ingestion_options_.verify_checksums_before_ingest) {
+    // If customized readahead size is needed, we can pass a user option
+    // all the way to here. Right now we just rely on the default readahead
+    // to keep things simple.
+    // TODO: plumb Env::IOActivity
+    ReadOptions ro;
+    ro.readahead_size = ingestion_options_.verify_checksums_readahead_size;
+    status = table_reader->VerifyChecksum(
+        ro, TableReaderCaller::kExternalSSTIngestion);
+  }
+  if (!status.ok()) {
+    return status;
+  }
+
+  // Get the external file properties
+  auto props = table_reader->GetTableProperties();
+  const auto& uprops = props->user_collected_properties;
+
+  // Get table version
+  auto version_iter = uprops.find(ExternalSstFilePropertyNames::kVersion);
+  if (version_iter == uprops.end()) {
+    return Status::Corruption("External file version not found");
+  }
+  file_to_ingest->version = DecodeFixed32(version_iter->second.c_str());
+
+  auto seqno_iter = uprops.find(ExternalSstFilePropertyNames::kGlobalSeqno);
+  if (file_to_ingest->version == 2) {
+    // version 2 imply that we have global sequence number
+    if (seqno_iter == uprops.end()) {
+      return Status::Corruption(
+          "External file global sequence number not found");
+    }
+
+    // Set the global sequence number
+    file_to_ingest->original_seqno = DecodeFixed64(seqno_iter->second.c_str());
+    if (props->external_sst_file_global_seqno_offset == 0) {
+      file_to_ingest->global_seqno_offset = 0;
+      return Status::Corruption("Was not able to find file global seqno field");
+    }
+    file_to_ingest->global_seqno_offset =
+        static_cast<size_t>(props->external_sst_file_global_seqno_offset);
+  } else if (file_to_ingest->version == 1) {
+    // SST file V1 should not have global seqno field
+    assert(seqno_iter == uprops.end());
+    file_to_ingest->original_seqno = 0;
+    if (ingestion_options_.allow_blocking_flush ||
+        ingestion_options_.allow_global_seqno) {
+      return Status::InvalidArgument(
+          "External SST file V1 does not support global seqno");
+    }
+  } else {
+    return Status::InvalidArgument("External file version is not supported");
+  }
+  // Get number of entries in table
+  file_to_ingest->num_entries = props->num_entries;
+  file_to_ingest->num_range_deletions = props->num_range_deletions;
+
+  ParsedInternalKey key;
+  // TODO: plumb Env::IOActivity
+  ReadOptions ro;
+  std::unique_ptr<InternalIterator> iter(table_reader->NewIterator(
+      ro, sv->mutable_cf_options.prefix_extractor.get(), /*arena=*/nullptr,
+      /*skip_filters=*/false, TableReaderCaller::kExternalSSTIngestion));
+  std::unique_ptr<InternalIterator> range_del_iter(
+      table_reader->NewRangeTombstoneIterator(ro));
+
+  // Get first (smallest) and last (largest) key from file.
+  file_to_ingest->smallest_internal_key =
+      InternalKey("", 0, ValueType::kTypeValue);
+  file_to_ingest->largest_internal_key =
+      InternalKey("", 0, ValueType::kTypeValue);
+  bool bounds_set = false;
+  bool allow_data_in_errors = db_options_.allow_data_in_errors;
+  iter->SeekToFirst();
+  if (iter->Valid()) {
+    Status pik_status =
+        ParseInternalKey(iter->key(), &key, allow_data_in_errors);
+    if (!pik_status.ok()) {
+      return Status::Corruption("Corrupted key in external file. ",
+                                pik_status.getState());
+    }
+    if (key.sequence != 0) {
+      return Status::Corruption("External file has non zero sequence number");
+    }
+    file_to_ingest->smallest_internal_key.SetFrom(key);
+
+    iter->SeekToLast();
+    pik_status = ParseInternalKey(iter->key(), &key, allow_data_in_errors);
+    if (!pik_status.ok()) {
+      return Status::Corruption("Corrupted key in external file. ",
+                                pik_status.getState());
+    }
+    if (key.sequence != 0) {
+      return Status::Corruption("External file has non zero sequence number");
+    }
+    file_to_ingest->largest_internal_key.SetFrom(key);
+
+    bounds_set = true;
+  }
+
+  // We may need to adjust these key bounds, depending on whether any range
+  // deletion tombstones extend past them.
+  const Comparator* ucmp = cfd_->internal_comparator().user_comparator();
+  if (range_del_iter != nullptr) {
+    for (range_del_iter->SeekToFirst(); range_del_iter->Valid();
+         range_del_iter->Next()) {
+      Status pik_status =
+          ParseInternalKey(range_del_iter->key(), &key, allow_data_in_errors);
+      if (!pik_status.ok()) {
+        return Status::Corruption("Corrupted key in external file. ",
+                                  pik_status.getState());
+      }
+      RangeTombstone tombstone(key, range_del_iter->value());
+
+      InternalKey start_key = tombstone.SerializeKey();
+      if (!bounds_set ||
+          sstableKeyCompare(ucmp, start_key,
+                            file_to_ingest->smallest_internal_key) < 0) {
+        file_to_ingest->smallest_internal_key = start_key;
+      }
+      InternalKey end_key = tombstone.SerializeEndKey();
+      if (!bounds_set ||
+          sstableKeyCompare(ucmp, end_key,
+                            file_to_ingest->largest_internal_key) > 0) {
+        file_to_ingest->largest_internal_key = end_key;
+      }
+      bounds_set = true;
+    }
+  }
+
+  file_to_ingest->cf_id = static_cast<uint32_t>(props->column_family_id);
+
+  file_to_ingest->table_properties = *props;
+
+  auto s = GetSstInternalUniqueId(props->db_id, props->db_session_id,
+                                  props->orig_file_number,
+                                  &(file_to_ingest->unique_id));
+  if (!s.ok()) {
+    ROCKS_LOG_WARN(db_options_.info_log,
+                   "Failed to get SST unique id for file %s",
+                   file_to_ingest->internal_file_path.c_str());
+    file_to_ingest->unique_id = kNullUniqueId64x2;
+  }
+
+  return status;
+}
+
+Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
+    SuperVersion* sv, bool force_global_seqno, CompactionStyle compaction_style,
+    SequenceNumber last_seqno, IngestedFileInfo* file_to_ingest,
+    SequenceNumber* assigned_seqno) {
+  Status status;
+  *assigned_seqno = 0;
+  if (force_global_seqno) {
+    *assigned_seqno = last_seqno + 1;
+    if (compaction_style == kCompactionStyleUniversal || files_overlap_) {
+      if (ingestion_options_.fail_if_not_bottommost_level) {
+        status = Status::TryAgain(
+            "Files cannot be ingested to Lmax. Please make sure key range of "
+            "Lmax does not overlap with files to ingest.");
+        return status;
+      }
+      file_to_ingest->picked_level = 0;
+      return status;
+    }
+  }
+
+  bool overlap_with_db = false;
+  Arena arena;
+  // TODO: plumb Env::IOActivity
+  ReadOptions ro;
+  ro.total_order_seek = true;
+  int target_level = 0;
+  auto* vstorage = cfd_->current()->storage_info();
+
+  for (int lvl = 0; lvl < cfd_->NumberLevels(); lvl++) {
+    if (lvl > 0 && lvl < vstorage->base_level()) {
+      continue;
+    }
+    if (cfd_->RangeOverlapWithCompaction(
+            file_to_ingest->smallest_internal_key.user_key(),
+            file_to_ingest->largest_internal_key.user_key(), lvl)) {
+      // We must use L0 or any level higher than `lvl` to be able to overwrite
+      // the compaction output keys that we overlap with in this level, We also
+      // need to assign this file a seqno to overwrite the compaction output
+      // keys in level `lvl`
+      overlap_with_db = true;
+      break;
+    } else if (vstorage->NumLevelFiles(lvl) > 0) {
+      bool overlap_with_level = false;
+      status = sv->current->OverlapWithLevelIterator(
+          ro, env_options_, file_to_ingest->smallest_internal_key.user_key(),
+          file_to_ingest->largest_internal_key.user_key(), lvl,
+          &overlap_with_level);
+      if (!status.ok()) {
+        return status;
+      }
+      if (overlap_with_level) {
+        // We must use L0 or any level higher than `lvl` to be able to overwrite
+        // the keys that we overlap with in this level, We also need to assign
+        // this file a seqno to overwrite the existing keys in level `lvl`
+        overlap_with_db = true;
+        break;
+      }
+
+      if (compaction_style == kCompactionStyleUniversal && lvl != 0) {
+        const std::vector<FileMetaData*>& level_files =
+            vstorage->LevelFiles(lvl);
+        const SequenceNumber level_largest_seqno =
+            (*std::max_element(level_files.begin(), level_files.end(),
+                               [](FileMetaData* f1, FileMetaData* f2) {
+                                 return f1->fd.largest_seqno <
+                                        f2->fd.largest_seqno;
+                               }))
+                ->fd.largest_seqno;
+        // should only assign seqno to current level's largest seqno when
+        // the file fits
+        if (level_largest_seqno != 0 &&
+            IngestedFileFitInLevel(file_to_ingest, lvl)) {
+          *assigned_seqno = level_largest_seqno;
+        } else {
+          continue;
+        }
+      }
+    } else if (compaction_style == kCompactionStyleUniversal) {
+      continue;
+    }
+
+    // We don't overlap with any keys in this level, but we still need to check
+    // if our file can fit in it
+    if (IngestedFileFitInLevel(file_to_ingest, lvl)) {
+      target_level = lvl;
+    }
+  }
+  // If files overlap, we have to ingest them at level 0 and assign the newest
+  // sequence number
+  if (files_overlap_) {
+    target_level = 0;
+    *assigned_seqno = last_seqno + 1;
+  }
+
+  if (ingestion_options_.fail_if_not_bottommost_level &&
+      target_level < cfd_->NumberLevels() - 1) {
+    status = Status::TryAgain(
+        "Files cannot be ingested to Lmax. Please make sure key range of Lmax "
+        "and ongoing compaction's output to Lmax"
+        "does not overlap with files to ingest.");
+    return status;
+  }
+
+  TEST_SYNC_POINT_CALLBACK(
+      "ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile",
+      &overlap_with_db);
+  file_to_ingest->picked_level = target_level;
+  if (overlap_with_db && *assigned_seqno == 0) {
+    *assigned_seqno = last_seqno + 1;
+  }
+  return status;
+}
+
+Status ExternalSstFileIngestionJob::CheckLevelForIngestedBehindFile(
+    IngestedFileInfo* file_to_ingest) {
+  auto* vstorage = cfd_->current()->storage_info();
+  // First, check if new files fit in the bottommost level
+  int bottom_lvl = cfd_->NumberLevels() - 1;
+  if (!IngestedFileFitInLevel(file_to_ingest, bottom_lvl)) {
+    return Status::InvalidArgument(
+        "Can't ingest_behind file as it doesn't fit "
+        "at the bottommost level!");
+  }
+
+  // Second, check if despite allow_ingest_behind=true we still have 0 seqnums
+  // at some upper level
+  for (int lvl = 0; lvl < cfd_->NumberLevels() - 1; lvl++) {
+    for (auto file : vstorage->LevelFiles(lvl)) {
+      if (file->fd.smallest_seqno == 0) {
+        return Status::InvalidArgument(
+            "Can't ingest_behind file as despite allow_ingest_behind=true "
+            "there are files with 0 seqno in database at upper levels!");
+      }
+    }
+  }
+
+  file_to_ingest->picked_level = bottom_lvl;
+  return Status::OK();
+}
+
+Status ExternalSstFileIngestionJob::AssignGlobalSeqnoForIngestedFile(
+    IngestedFileInfo* file_to_ingest, SequenceNumber seqno) {
+  if (file_to_ingest->original_seqno == seqno) {
+    // This file already have the correct global seqno
+    return Status::OK();
+  } else if (!ingestion_options_.allow_global_seqno) {
+    return Status::InvalidArgument("Global seqno is required, but disabled");
+  } else if (file_to_ingest->global_seqno_offset == 0) {
+    return Status::InvalidArgument(
+        "Trying to set global seqno for a file that don't have a global seqno "
+        "field");
+  }
+
+  if (ingestion_options_.write_global_seqno) {
+    // Determine if we can write global_seqno to a given offset of file.
+    // If the file system does not support random write, then we should not.
+    // Otherwise we should.
+    std::unique_ptr<FSRandomRWFile> rwfile;
+    Status status = fs_->NewRandomRWFile(file_to_ingest->internal_file_path,
+                                         env_options_, &rwfile, nullptr);
+    TEST_SYNC_POINT_CALLBACK("ExternalSstFileIngestionJob::NewRandomRWFile",
+                             &status);
+    if (status.ok()) {
+      FSRandomRWFilePtr fsptr(std::move(rwfile), io_tracer_,
+                              file_to_ingest->internal_file_path);
+      std::string seqno_val;
+      PutFixed64(&seqno_val, seqno);
+      status = fsptr->Write(file_to_ingest->global_seqno_offset, seqno_val,
+                            IOOptions(), nullptr);
+      if (status.ok()) {
+        TEST_SYNC_POINT("ExternalSstFileIngestionJob::BeforeSyncGlobalSeqno");
+        status = SyncIngestedFile(fsptr.get());
+        TEST_SYNC_POINT("ExternalSstFileIngestionJob::AfterSyncGlobalSeqno");
+        if (!status.ok()) {
+          ROCKS_LOG_WARN(db_options_.info_log,
+                         "Failed to sync ingested file %s after writing global "
+                         "sequence number: %s",
+                         file_to_ingest->internal_file_path.c_str(),
+                         status.ToString().c_str());
+        }
+      }
+      if (!status.ok()) {
+        return status;
+      }
+    } else if (!status.IsNotSupported()) {
+      return status;
+    }
+  }
+
+  file_to_ingest->assigned_seqno = seqno;
+  return Status::OK();
+}
+
+IOStatus ExternalSstFileIngestionJob::GenerateChecksumForIngestedFile(
+    IngestedFileInfo* file_to_ingest) {
+  if (db_options_.file_checksum_gen_factory == nullptr ||
+      need_generate_file_checksum_ == false ||
+      ingestion_options_.write_global_seqno == false) {
+    // If file_checksum_gen_factory is not set, we are not able to generate
+    // the checksum. if write_global_seqno is false, it means we will use
+    // file checksum generated during Prepare(). This step will be skipped.
+    return IOStatus::OK();
+  }
+  std::string file_checksum;
+  std::string file_checksum_func_name;
+  std::string requested_checksum_func_name;
+  // TODO: rate limit file reads for checksum calculation during file ingestion.
+  IOStatus io_s = GenerateOneFileChecksum(
+      fs_.get(), file_to_ingest->internal_file_path,
+      db_options_.file_checksum_gen_factory.get(), requested_checksum_func_name,
+      &file_checksum, &file_checksum_func_name,
+      ingestion_options_.verify_checksums_readahead_size,
+      db_options_.allow_mmap_reads, io_tracer_, db_options_.rate_limiter.get(),
+      Env::IO_TOTAL /* rate_limiter_priority */);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  file_to_ingest->file_checksum = file_checksum;
+  file_to_ingest->file_checksum_func_name = file_checksum_func_name;
+  return IOStatus::OK();
+}
+
+bool ExternalSstFileIngestionJob::IngestedFileFitInLevel(
+    const IngestedFileInfo* file_to_ingest, int level) {
+  if (level == 0) {
+    // Files can always fit in L0
+    return true;
+  }
+
+  auto* vstorage = cfd_->current()->storage_info();
+  Slice file_smallest_user_key(
+      file_to_ingest->smallest_internal_key.user_key());
+  Slice file_largest_user_key(file_to_ingest->largest_internal_key.user_key());
+
+  if (vstorage->OverlapInLevel(level, &file_smallest_user_key,
+                               &file_largest_user_key)) {
+    // File overlap with another files in this level, we cannot
+    // add it to this level
+    return false;
+  }
+
+  // File did not overlap with level files, nor compaction output
+  return true;
+}
+
+template <typename TWritableFile>
+Status ExternalSstFileIngestionJob::SyncIngestedFile(TWritableFile* file) {
+  assert(file != nullptr);
+  if (db_options_.use_fsync) {
+    return file->Fsync(IOOptions(), nullptr);
+  } else {
+    return file->Sync(IOOptions(), nullptr);
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/external_sst_file_ingestion_job.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/external_sst_file_ingestion_job.h
new file mode 100644
index 0000000000..49bb1e31e5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/external_sst_file_ingestion_job.h
@@ -0,0 +1,238 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "db/column_family.h"
+#include "db/internal_stats.h"
+#include "db/snapshot_impl.h"
+#include "db/version_edit.h"
+#include "env/file_system_tracer.h"
+#include "logging/event_logger.h"
+#include "options/db_options.h"
+#include "rocksdb/db.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/sst_file_writer.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Directories;
+class SystemClock;
+
+struct IngestedFileInfo {
+  // External file path
+  std::string external_file_path;
+  // Smallest internal key in external file
+  InternalKey smallest_internal_key;
+  // Largest internal key in external file
+  InternalKey largest_internal_key;
+  // Sequence number for keys in external file
+  SequenceNumber original_seqno;
+  // Offset of the global sequence number field in the file, will
+  // be zero if version is 1 (global seqno is not supported)
+  size_t global_seqno_offset;
+  // External file size
+  uint64_t file_size;
+  // total number of keys in external file
+  uint64_t num_entries;
+  // total number of range deletions in external file
+  uint64_t num_range_deletions;
+  // Id of column family this file shoule be ingested into
+  uint32_t cf_id;
+  // TableProperties read from external file
+  TableProperties table_properties;
+  // Version of external file
+  int version;
+
+  // FileDescriptor for the file inside the DB
+  FileDescriptor fd;
+  // file path that we picked for file inside the DB
+  std::string internal_file_path;
+  // Global sequence number that we picked for the file inside the DB
+  SequenceNumber assigned_seqno = 0;
+  // Level inside the DB we picked for the external file.
+  int picked_level = 0;
+  // Whether to copy or link the external sst file. copy_file will be set to
+  // false if ingestion_options.move_files is true and underlying FS
+  // supports link operation. Need to provide a default value to make the
+  // undefined-behavior sanity check of llvm happy. Since
+  // ingestion_options.move_files is false by default, thus copy_file is true
+  // by default.
+  bool copy_file = true;
+  // The checksum of ingested file
+  std::string file_checksum;
+  // The name of checksum function that generate the checksum
+  std::string file_checksum_func_name;
+  // The temperature of the file to be ingested
+  Temperature file_temperature = Temperature::kUnknown;
+  // Unique id of the file to be ingested
+  UniqueId64x2 unique_id{};
+};
+
+class ExternalSstFileIngestionJob {
+ public:
+  ExternalSstFileIngestionJob(
+      VersionSet* versions, ColumnFamilyData* cfd,
+      const ImmutableDBOptions& db_options,
+      const MutableDBOptions& mutable_db_options, const EnvOptions& env_options,
+      SnapshotList* db_snapshots,
+      const IngestExternalFileOptions& ingestion_options,
+      Directories* directories, EventLogger* event_logger,
+      const std::shared_ptr<IOTracer>& io_tracer)
+      : clock_(db_options.clock),
+        fs_(db_options.fs, io_tracer),
+        versions_(versions),
+        cfd_(cfd),
+        db_options_(db_options),
+        mutable_db_options_(mutable_db_options),
+        env_options_(env_options),
+        db_snapshots_(db_snapshots),
+        ingestion_options_(ingestion_options),
+        directories_(directories),
+        event_logger_(event_logger),
+        job_start_time_(clock_->NowMicros()),
+        consumed_seqno_count_(0),
+        io_tracer_(io_tracer) {
+    assert(directories != nullptr);
+  }
+
+  ~ExternalSstFileIngestionJob() {
+    for (const auto& c : file_ingesting_compactions_) {
+      cfd_->compaction_picker()->UnregisterCompaction(c);
+      delete c;
+    }
+
+    for (const auto& f : compaction_input_metdatas_) {
+      delete f;
+    }
+  }
+
+  // Prepare the job by copying external files into the DB.
+  Status Prepare(const std::vector<std::string>& external_files_paths,
+                 const std::vector<std::string>& files_checksums,
+                 const std::vector<std::string>& files_checksum_func_names,
+                 const Temperature& file_temperature, uint64_t next_file_number,
+                 SuperVersion* sv);
+
+  // Check if we need to flush the memtable before running the ingestion job
+  // This will be true if the files we are ingesting are overlapping with any
+  // key range in the memtable.
+  //
+  // @param super_version A referenced SuperVersion that will be held for the
+  //    duration of this function.
+  //
+  // Thread-safe
+  Status NeedsFlush(bool* flush_needed, SuperVersion* super_version);
+
+  // Will execute the ingestion job and prepare edit() to be applied.
+  // REQUIRES: Mutex held
+  Status Run();
+
+  // Register key range involved in this ingestion job
+  // to prevent key range conflict with other ongoing compaction/file ingestion
+  // REQUIRES: Mutex held
+  void RegisterRange();
+
+  // Unregister key range registered for this ingestion job
+  // REQUIRES: Mutex held
+  void UnregisterRange();
+
+  // Update column family stats.
+  // REQUIRES: Mutex held
+  void UpdateStats();
+
+  // Cleanup after successful/failed job
+  void Cleanup(const Status& status);
+
+  VersionEdit* edit() { return &edit_; }
+
+  const autovector<IngestedFileInfo>& files_to_ingest() const {
+    return files_to_ingest_;
+  }
+
+  // How many sequence numbers did we consume as part of the ingest job?
+  int ConsumedSequenceNumbersCount() const { return consumed_seqno_count_; }
+
+ private:
+  // Open the external file and populate `file_to_ingest` with all the
+  // external information we need to ingest this file.
+  Status GetIngestedFileInfo(const std::string& external_file,
+                             uint64_t new_file_number,
+                             IngestedFileInfo* file_to_ingest,
+                             SuperVersion* sv);
+
+  // Assign `file_to_ingest` the appropriate sequence number and the lowest
+  // possible level that it can be ingested to according to compaction_style.
+  // REQUIRES: Mutex held
+  Status AssignLevelAndSeqnoForIngestedFile(SuperVersion* sv,
+                                            bool force_global_seqno,
+                                            CompactionStyle compaction_style,
+                                            SequenceNumber last_seqno,
+                                            IngestedFileInfo* file_to_ingest,
+                                            SequenceNumber* assigned_seqno);
+
+  // File that we want to ingest behind always goes to the lowest level;
+  // we just check that it fits in the level, that DB allows ingest_behind,
+  // and that we don't have 0 seqnums at the upper levels.
+  // REQUIRES: Mutex held
+  Status CheckLevelForIngestedBehindFile(IngestedFileInfo* file_to_ingest);
+
+  // Set the file global sequence number to `seqno`
+  Status AssignGlobalSeqnoForIngestedFile(IngestedFileInfo* file_to_ingest,
+                                          SequenceNumber seqno);
+  // Generate the file checksum and store in the IngestedFileInfo
+  IOStatus GenerateChecksumForIngestedFile(IngestedFileInfo* file_to_ingest);
+
+  // Check if `file_to_ingest` can fit in level `level`
+  // REQUIRES: Mutex held
+  bool IngestedFileFitInLevel(const IngestedFileInfo* file_to_ingest,
+                              int level);
+
+  // Helper method to sync given file.
+  template <typename TWritableFile>
+  Status SyncIngestedFile(TWritableFile* file);
+
+  // Create equivalent `Compaction` objects to this file ingestion job
+  // , which will be used to check range conflict with other ongoing
+  // compactions.
+  void CreateEquivalentFileIngestingCompactions();
+
+  SystemClock* clock_;
+  FileSystemPtr fs_;
+  VersionSet* versions_;
+  ColumnFamilyData* cfd_;
+  const ImmutableDBOptions& db_options_;
+  const MutableDBOptions& mutable_db_options_;
+  const EnvOptions& env_options_;
+  SnapshotList* db_snapshots_;
+  autovector<IngestedFileInfo> files_to_ingest_;
+  const IngestExternalFileOptions& ingestion_options_;
+  Directories* directories_;
+  EventLogger* event_logger_;
+  VersionEdit edit_;
+  uint64_t job_start_time_;
+  int consumed_seqno_count_;
+  // Set in ExternalSstFileIngestionJob::Prepare(), if true all files are
+  // ingested in L0
+  bool files_overlap_{false};
+  // Set in ExternalSstFileIngestionJob::Prepare(), if true and DB
+  // file_checksum_gen_factory is set, DB will generate checksum each file.
+  bool need_generate_file_checksum_{true};
+  std::shared_ptr<IOTracer> io_tracer_;
+
+  // Below are variables used in (un)registering range for this ingestion job
+  //
+  // FileMetaData used in inputs of compactions equivalent to this ingestion
+  // job
+  std::vector<FileMetaData*> compaction_input_metdatas_;
+  // Compactions equivalent to this ingestion job
+  std::vector<Compaction*> file_ingesting_compactions_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/file_indexer.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/file_indexer.cc
new file mode 100644
index 0000000000..608f1cb28d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/file_indexer.cc
@@ -0,0 +1,218 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/file_indexer.h"
+
+#include <algorithm>
+#include <functional>
+
+#include "db/version_edit.h"
+#include "rocksdb/comparator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+FileIndexer::FileIndexer(const Comparator* ucmp)
+    : num_levels_(0), ucmp_(ucmp), level_rb_(nullptr) {}
+
+size_t FileIndexer::NumLevelIndex() const { return next_level_index_.size(); }
+
+size_t FileIndexer::LevelIndexSize(size_t level) const {
+  if (level >= next_level_index_.size()) {
+    return 0;
+  }
+  return next_level_index_[level].num_index;
+}
+
+void FileIndexer::GetNextLevelIndex(const size_t level, const size_t file_index,
+                                    const int cmp_smallest,
+                                    const int cmp_largest, int32_t* left_bound,
+                                    int32_t* right_bound) const {
+  assert(level > 0);
+
+  // Last level, no hint
+  if (level == num_levels_ - 1) {
+    *left_bound = 0;
+    *right_bound = -1;
+    return;
+  }
+
+  assert(level < num_levels_ - 1);
+  assert(static_cast<int32_t>(file_index) <= level_rb_[level]);
+
+  const IndexUnit* index_units = next_level_index_[level].index_units;
+  const auto& index = index_units[file_index];
+
+  if (cmp_smallest < 0) {
+    *left_bound = (level > 0 && file_index > 0)
+                      ? index_units[file_index - 1].largest_lb
+                      : 0;
+    *right_bound = index.smallest_rb;
+  } else if (cmp_smallest == 0) {
+    *left_bound = index.smallest_lb;
+    *right_bound = index.smallest_rb;
+  } else if (cmp_smallest > 0 && cmp_largest < 0) {
+    *left_bound = index.smallest_lb;
+    *right_bound = index.largest_rb;
+  } else if (cmp_largest == 0) {
+    *left_bound = index.largest_lb;
+    *right_bound = index.largest_rb;
+  } else if (cmp_largest > 0) {
+    *left_bound = index.largest_lb;
+    *right_bound = level_rb_[level + 1];
+  } else {
+    assert(false);
+  }
+
+  assert(*left_bound >= 0);
+  assert(*left_bound <= *right_bound + 1);
+  assert(*right_bound <= level_rb_[level + 1]);
+}
+
+void FileIndexer::UpdateIndex(Arena* arena, const size_t num_levels,
+                              std::vector<FileMetaData*>* const files) {
+  if (files == nullptr) {
+    return;
+  }
+  if (num_levels == 0) {  // uint_32 0-1 would cause bad behavior
+    num_levels_ = num_levels;
+    return;
+  }
+  assert(level_rb_ == nullptr);  // level_rb_ should be init here
+
+  num_levels_ = num_levels;
+  next_level_index_.resize(num_levels);
+
+  char* mem = arena->AllocateAligned(num_levels_ * sizeof(int32_t));
+  level_rb_ = new (mem) int32_t[num_levels_];
+  for (size_t i = 0; i < num_levels_; i++) {
+    level_rb_[i] = -1;
+  }
+
+  // L1 - Ln-1
+  for (size_t level = 1; level < num_levels_ - 1; ++level) {
+    const auto& upper_files = files[level];
+    const int32_t upper_size = static_cast<int32_t>(upper_files.size());
+    const auto& lower_files = files[level + 1];
+    level_rb_[level] = static_cast<int32_t>(upper_files.size()) - 1;
+    if (upper_size == 0) {
+      continue;
+    }
+    IndexLevel& index_level = next_level_index_[level];
+    index_level.num_index = upper_size;
+    mem = arena->AllocateAligned(upper_size * sizeof(IndexUnit));
+    index_level.index_units = new (mem) IndexUnit[upper_size];
+
+    CalculateLB(
+        upper_files, lower_files, &index_level,
+        [this](const FileMetaData* a, const FileMetaData* b) -> int {
+          return ucmp_->CompareWithoutTimestamp(a->smallest.user_key(),
+                                                b->largest.user_key());
+        },
+        [](IndexUnit* index, int32_t f_idx) { index->smallest_lb = f_idx; });
+    CalculateLB(
+        upper_files, lower_files, &index_level,
+        [this](const FileMetaData* a, const FileMetaData* b) -> int {
+          return ucmp_->CompareWithoutTimestamp(a->largest.user_key(),
+                                                b->largest.user_key());
+        },
+        [](IndexUnit* index, int32_t f_idx) { index->largest_lb = f_idx; });
+    CalculateRB(
+        upper_files, lower_files, &index_level,
+        [this](const FileMetaData* a, const FileMetaData* b) -> int {
+          return ucmp_->CompareWithoutTimestamp(a->smallest.user_key(),
+                                                b->smallest.user_key());
+        },
+        [](IndexUnit* index, int32_t f_idx) { index->smallest_rb = f_idx; });
+    CalculateRB(
+        upper_files, lower_files, &index_level,
+        [this](const FileMetaData* a, const FileMetaData* b) -> int {
+          return ucmp_->CompareWithoutTimestamp(a->largest.user_key(),
+                                                b->smallest.user_key());
+        },
+        [](IndexUnit* index, int32_t f_idx) { index->largest_rb = f_idx; });
+  }
+
+  level_rb_[num_levels_ - 1] =
+      static_cast<int32_t>(files[num_levels_ - 1].size()) - 1;
+}
+
+void FileIndexer::CalculateLB(
+    const std::vector<FileMetaData*>& upper_files,
+    const std::vector<FileMetaData*>& lower_files, IndexLevel* index_level,
+    std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
+    std::function<void(IndexUnit*, int32_t)> set_index) {
+  const int32_t upper_size = static_cast<int32_t>(upper_files.size());
+  const int32_t lower_size = static_cast<int32_t>(lower_files.size());
+  int32_t upper_idx = 0;
+  int32_t lower_idx = 0;
+
+  IndexUnit* index = index_level->index_units;
+  while (upper_idx < upper_size && lower_idx < lower_size) {
+    int cmp = cmp_op(upper_files[upper_idx], lower_files[lower_idx]);
+
+    if (cmp == 0) {
+      set_index(&index[upper_idx], lower_idx);
+      ++upper_idx;
+    } else if (cmp > 0) {
+      // Lower level's file (largest) is smaller, a key won't hit in that
+      // file. Move to next lower file
+      ++lower_idx;
+    } else {
+      // Lower level's file becomes larger, update the index, and
+      // move to the next upper file
+      set_index(&index[upper_idx], lower_idx);
+      ++upper_idx;
+    }
+  }
+
+  while (upper_idx < upper_size) {
+    // Lower files are exhausted, that means the remaining upper files are
+    // greater than any lower files. Set the index to be the lower level size.
+    set_index(&index[upper_idx], lower_size);
+    ++upper_idx;
+  }
+}
+
+void FileIndexer::CalculateRB(
+    const std::vector<FileMetaData*>& upper_files,
+    const std::vector<FileMetaData*>& lower_files, IndexLevel* index_level,
+    std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
+    std::function<void(IndexUnit*, int32_t)> set_index) {
+  const int32_t upper_size = static_cast<int32_t>(upper_files.size());
+  const int32_t lower_size = static_cast<int32_t>(lower_files.size());
+  int32_t upper_idx = upper_size - 1;
+  int32_t lower_idx = lower_size - 1;
+
+  IndexUnit* index = index_level->index_units;
+  while (upper_idx >= 0 && lower_idx >= 0) {
+    int cmp = cmp_op(upper_files[upper_idx], lower_files[lower_idx]);
+
+    if (cmp == 0) {
+      set_index(&index[upper_idx], lower_idx);
+      --upper_idx;
+    } else if (cmp < 0) {
+      // Lower level's file (smallest) is larger, a key won't hit in that
+      // file. Move to next lower file.
+      --lower_idx;
+    } else {
+      // Lower level's file becomes smaller, update the index, and move to
+      // the next the upper file
+      set_index(&index[upper_idx], lower_idx);
+      --upper_idx;
+    }
+  }
+  while (upper_idx >= 0) {
+    // Lower files are exhausted, that means the remaining upper files are
+    // smaller than any lower files. Set it to -1.
+    set_index(&index[upper_idx], -1);
+    --upper_idx;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/file_indexer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/file_indexer.h
new file mode 100644
index 0000000000..45cb136150
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/file_indexer.h
@@ -0,0 +1,140 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <vector>
+
+#include "memory/arena.h"
+#include "port/port.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Comparator;
+struct FileMetaData;
+struct FdWithKeyRange;
+struct FileLevel;
+
+// The file tree structure in Version is prebuilt and the range of each file
+// is known. On Version::Get(), it uses binary search to find a potential file
+// and then check if a target key can be found in the file by comparing the key
+// to each file's smallest and largest key. The results of these comparisons
+// can be reused beyond checking if a key falls into a file's range.
+// With some pre-calculated knowledge, each key comparison that has been done
+// can serve as a hint to narrow down further searches: if a key compared to
+// be smaller than a file's smallest or largest, that comparison can be used
+// to find out the right bound of next binary search. Similarly, if a key
+// compared to be larger than a file's smallest or largest, it can be utilized
+// to find out the left bound of next binary search.
+// With these hints: it can greatly reduce the range of binary search,
+// especially for bottom levels, given that one file most likely overlaps with
+// only N files from level below (where N is max_bytes_for_level_multiplier).
+// So on level L, we will only look at ~N files instead of N^L files on the
+// naive approach.
+class FileIndexer {
+ public:
+  explicit FileIndexer(const Comparator* ucmp);
+
+  size_t NumLevelIndex() const;
+
+  size_t LevelIndexSize(size_t level) const;
+
+  // Return a file index range in the next level to search for a key based on
+  // smallest and largest key comparison for the current file specified by
+  // level and file_index. When *left_index < *right_index, both index should
+  // be valid and fit in the vector size.
+  void GetNextLevelIndex(const size_t level, const size_t file_index,
+                         const int cmp_smallest, const int cmp_largest,
+                         int32_t* left_bound, int32_t* right_bound) const;
+
+  void UpdateIndex(Arena* arena, const size_t num_levels,
+                   std::vector<FileMetaData*>* const files);
+
+  enum { kLevelMaxIndex = std::numeric_limits<int32_t>::max() };
+
+ private:
+  size_t num_levels_;
+  const Comparator* ucmp_;
+
+  struct IndexUnit {
+    IndexUnit()
+        : smallest_lb(0), largest_lb(0), smallest_rb(-1), largest_rb(-1) {}
+    // During file search, a key is compared against smallest and largest
+    // from a FileMetaData. It can have 3 possible outcomes:
+    // (1) key is smaller than smallest, implying it is also smaller than
+    //     larger. Precalculated index based on "smallest < smallest" can
+    //     be used to provide right bound.
+    // (2) key is in between smallest and largest.
+    //     Precalculated index based on "smallest > greatest" can be used to
+    //     provide left bound.
+    //     Precalculated index based on "largest < smallest" can be used to
+    //     provide right bound.
+    // (3) key is larger than largest, implying it is also larger than smallest.
+    //     Precalculated index based on "largest > largest" can be used to
+    //     provide left bound.
+    //
+    // As a result, we will need to do:
+    // Compare smallest (<=) and largest keys from upper level file with
+    // smallest key from lower level to get a right bound.
+    // Compare smallest (>=) and largest keys from upper level file with
+    // largest key from lower level to get a left bound.
+    //
+    // Example:
+    //    level 1:              [50 - 60]
+    //    level 2:        [1 - 40], [45 - 55], [58 - 80]
+    // A key 35, compared to be less than 50, 3rd file on level 2 can be
+    // skipped according to rule (1). LB = 0, RB = 1.
+    // A key 53, sits in the middle 50 and 60. 1st file on level 2 can be
+    // skipped according to rule (2)-a, but the 3rd file cannot be skipped
+    // because 60 is greater than 58. LB = 1, RB = 2.
+    // A key 70, compared to be larger than 60. 1st and 2nd file can be skipped
+    // according to rule (3). LB = 2, RB = 2.
+    //
+    // Point to a left most file in a lower level that may contain a key,
+    // which compares greater than smallest of a FileMetaData (upper level)
+    int32_t smallest_lb;
+    // Point to a left most file in a lower level that may contain a key,
+    // which compares greater than largest of a FileMetaData (upper level)
+    int32_t largest_lb;
+    // Point to a right most file in a lower level that may contain a key,
+    // which compares smaller than smallest of a FileMetaData (upper level)
+    int32_t smallest_rb;
+    // Point to a right most file in a lower level that may contain a key,
+    // which compares smaller than largest of a FileMetaData (upper level)
+    int32_t largest_rb;
+  };
+
+  // Data structure to store IndexUnits in a whole level
+  struct IndexLevel {
+    size_t num_index;
+    IndexUnit* index_units;
+
+    IndexLevel() : num_index(0), index_units(nullptr) {}
+  };
+
+  void CalculateLB(
+      const std::vector<FileMetaData*>& upper_files,
+      const std::vector<FileMetaData*>& lower_files, IndexLevel* index_level,
+      std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
+      std::function<void(IndexUnit*, int32_t)> set_index);
+
+  void CalculateRB(
+      const std::vector<FileMetaData*>& upper_files,
+      const std::vector<FileMetaData*>& lower_files, IndexLevel* index_level,
+      std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
+      std::function<void(IndexUnit*, int32_t)> set_index);
+
+  autovector<IndexLevel> next_level_index_;
+  int32_t* level_rb_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_job.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_job.cc
new file mode 100644
index 0000000000..8b152ee3e0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_job.cc
@@ -0,0 +1,1097 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/flush_job.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <vector>
+
+#include "db/builder.h"
+#include "db/db_iter.h"
+#include "db/dbformat.h"
+#include "db/event_helpers.h"
+#include "db/log_reader.h"
+#include "db/log_writer.h"
+#include "db/memtable.h"
+#include "db/memtable_list.h"
+#include "db/merge_context.h"
+#include "db/range_tombstone_fragmenter.h"
+#include "db/version_edit.h"
+#include "db/version_set.h"
+#include "file/file_util.h"
+#include "file/filename.h"
+#include "logging/event_logger.h"
+#include "logging/log_buffer.h"
+#include "logging/logging.h"
+#include "monitoring/iostats_context_imp.h"
+#include "monitoring/perf_context_imp.h"
+#include "monitoring/thread_status_util.h"
+#include "port/port.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/status.h"
+#include "rocksdb/table.h"
+#include "table/merging_iterator.h"
+#include "table/table_builder.h"
+#include "table/two_level_iterator.h"
+#include "test_util/sync_point.h"
+#include "util/coding.h"
+#include "util/mutexlock.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const char* GetFlushReasonString(FlushReason flush_reason) {
+  switch (flush_reason) {
+    case FlushReason::kOthers:
+      return "Other Reasons";
+    case FlushReason::kGetLiveFiles:
+      return "Get Live Files";
+    case FlushReason::kShutDown:
+      return "Shut down";
+    case FlushReason::kExternalFileIngestion:
+      return "External File Ingestion";
+    case FlushReason::kManualCompaction:
+      return "Manual Compaction";
+    case FlushReason::kWriteBufferManager:
+      return "Write Buffer Manager";
+    case FlushReason::kWriteBufferFull:
+      return "Write Buffer Full";
+    case FlushReason::kTest:
+      return "Test";
+    case FlushReason::kDeleteFiles:
+      return "Delete Files";
+    case FlushReason::kAutoCompaction:
+      return "Auto Compaction";
+    case FlushReason::kManualFlush:
+      return "Manual Flush";
+    case FlushReason::kErrorRecovery:
+      return "Error Recovery";
+    case FlushReason::kWalFull:
+      return "WAL Full";
+    default:
+      return "Invalid";
+  }
+}
+
+FlushJob::FlushJob(
+    const std::string& dbname, ColumnFamilyData* cfd,
+    const ImmutableDBOptions& db_options,
+    const MutableCFOptions& mutable_cf_options, uint64_t max_memtable_id,
+    const FileOptions& file_options, VersionSet* versions,
+    InstrumentedMutex* db_mutex, std::atomic<bool>* shutting_down,
+    std::vector<SequenceNumber> existing_snapshots,
+    SequenceNumber earliest_write_conflict_snapshot,
+    SnapshotChecker* snapshot_checker, JobContext* job_context,
+    FlushReason flush_reason, LogBuffer* log_buffer, FSDirectory* db_directory,
+    FSDirectory* output_file_directory, CompressionType output_compression,
+    Statistics* stats, EventLogger* event_logger, bool measure_io_stats,
+    const bool sync_output_directory, const bool write_manifest,
+    Env::Priority thread_pri, const std::shared_ptr<IOTracer>& io_tracer,
+    const SeqnoToTimeMapping& seqno_time_mapping, const std::string& db_id,
+    const std::string& db_session_id, std::string full_history_ts_low,
+    BlobFileCompletionCallback* blob_callback)
+    : dbname_(dbname),
+      db_id_(db_id),
+      db_session_id_(db_session_id),
+      cfd_(cfd),
+      db_options_(db_options),
+      mutable_cf_options_(mutable_cf_options),
+      max_memtable_id_(max_memtable_id),
+      file_options_(file_options),
+      versions_(versions),
+      db_mutex_(db_mutex),
+      shutting_down_(shutting_down),
+      existing_snapshots_(std::move(existing_snapshots)),
+      earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot),
+      snapshot_checker_(snapshot_checker),
+      job_context_(job_context),
+      flush_reason_(flush_reason),
+      log_buffer_(log_buffer),
+      db_directory_(db_directory),
+      output_file_directory_(output_file_directory),
+      output_compression_(output_compression),
+      stats_(stats),
+      event_logger_(event_logger),
+      measure_io_stats_(measure_io_stats),
+      sync_output_directory_(sync_output_directory),
+      write_manifest_(write_manifest),
+      edit_(nullptr),
+      base_(nullptr),
+      pick_memtable_called(false),
+      thread_pri_(thread_pri),
+      io_tracer_(io_tracer),
+      clock_(db_options_.clock),
+      full_history_ts_low_(std::move(full_history_ts_low)),
+      blob_callback_(blob_callback),
+      db_impl_seqno_time_mapping_(seqno_time_mapping) {
+  // Update the thread status to indicate flush.
+  ReportStartedFlush();
+  TEST_SYNC_POINT("FlushJob::FlushJob()");
+}
+
+FlushJob::~FlushJob() { ThreadStatusUtil::ResetThreadStatus(); }
+
+void FlushJob::ReportStartedFlush() {
+  ThreadStatusUtil::SetEnableTracking(db_options_.enable_thread_tracking);
+  ThreadStatusUtil::SetColumnFamily(cfd_);
+  ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_FLUSH);
+  ThreadStatusUtil::SetThreadOperationProperty(ThreadStatus::COMPACTION_JOB_ID,
+                                               job_context_->job_id);
+
+  IOSTATS_RESET(bytes_written);
+}
+
+void FlushJob::ReportFlushInputSize(const autovector<MemTable*>& mems) {
+  uint64_t input_size = 0;
+  for (auto* mem : mems) {
+    input_size += mem->ApproximateMemoryUsage();
+  }
+  ThreadStatusUtil::IncreaseThreadOperationProperty(
+      ThreadStatus::FLUSH_BYTES_MEMTABLES, input_size);
+}
+
+void FlushJob::RecordFlushIOStats() {
+  RecordTick(stats_, FLUSH_WRITE_BYTES, IOSTATS(bytes_written));
+  ThreadStatusUtil::IncreaseThreadOperationProperty(
+      ThreadStatus::FLUSH_BYTES_WRITTEN, IOSTATS(bytes_written));
+  IOSTATS_RESET(bytes_written);
+}
+void FlushJob::PickMemTable() {
+  db_mutex_->AssertHeld();
+  assert(!pick_memtable_called);
+  pick_memtable_called = true;
+
+  // Maximum "NextLogNumber" of the memtables to flush.
+  // When mempurge feature is turned off, this variable is useless
+  // because the memtables are implicitly sorted by increasing order of creation
+  // time. Therefore mems_->back()->GetNextLogNumber() is already equal to
+  // max_next_log_number. However when Mempurge is on, the memtables are no
+  // longer sorted by increasing order of creation time. Therefore this variable
+  // becomes necessary because mems_->back()->GetNextLogNumber() is no longer
+  // necessarily equal to max_next_log_number.
+  uint64_t max_next_log_number = 0;
+
+  // Save the contents of the earliest memtable as a new Table
+  cfd_->imm()->PickMemtablesToFlush(max_memtable_id_, &mems_,
+                                    &max_next_log_number);
+  if (mems_.empty()) {
+    return;
+  }
+
+  ReportFlushInputSize(mems_);
+
+  // entries mems are (implicitly) sorted in ascending order by their created
+  // time. We will use the first memtable's `edit` to keep the meta info for
+  // this flush.
+  MemTable* m = mems_[0];
+  edit_ = m->GetEdits();
+  edit_->SetPrevLogNumber(0);
+  // SetLogNumber(log_num) indicates logs with number smaller than log_num
+  // will no longer be picked up for recovery.
+  edit_->SetLogNumber(max_next_log_number);
+  edit_->SetColumnFamily(cfd_->GetID());
+
+  // path 0 for level 0 file.
+  meta_.fd = FileDescriptor(versions_->NewFileNumber(), 0, 0);
+  meta_.epoch_number = cfd_->NewEpochNumber();
+
+  base_ = cfd_->current();
+  base_->Ref();  // it is likely that we do not need this reference
+}
+
+Status FlushJob::Run(LogsWithPrepTracker* prep_tracker, FileMetaData* file_meta,
+                     bool* switched_to_mempurge) {
+  TEST_SYNC_POINT("FlushJob::Start");
+  db_mutex_->AssertHeld();
+  assert(pick_memtable_called);
+  // Mempurge threshold can be dynamically changed.
+  // For sake of consistency, mempurge_threshold is
+  // saved locally to maintain consistency in each
+  // FlushJob::Run call.
+  double mempurge_threshold =
+      mutable_cf_options_.experimental_mempurge_threshold;
+
+  AutoThreadOperationStageUpdater stage_run(ThreadStatus::STAGE_FLUSH_RUN);
+  if (mems_.empty()) {
+    ROCKS_LOG_BUFFER(log_buffer_, "[%s] Nothing in memtable to flush",
+                     cfd_->GetName().c_str());
+    return Status::OK();
+  }
+
+  // I/O measurement variables
+  PerfLevel prev_perf_level = PerfLevel::kEnableTime;
+  uint64_t prev_write_nanos = 0;
+  uint64_t prev_fsync_nanos = 0;
+  uint64_t prev_range_sync_nanos = 0;
+  uint64_t prev_prepare_write_nanos = 0;
+  uint64_t prev_cpu_write_nanos = 0;
+  uint64_t prev_cpu_read_nanos = 0;
+  if (measure_io_stats_) {
+    prev_perf_level = GetPerfLevel();
+    SetPerfLevel(PerfLevel::kEnableTime);
+    prev_write_nanos = IOSTATS(write_nanos);
+    prev_fsync_nanos = IOSTATS(fsync_nanos);
+    prev_range_sync_nanos = IOSTATS(range_sync_nanos);
+    prev_prepare_write_nanos = IOSTATS(prepare_write_nanos);
+    prev_cpu_write_nanos = IOSTATS(cpu_write_nanos);
+    prev_cpu_read_nanos = IOSTATS(cpu_read_nanos);
+  }
+  Status mempurge_s = Status::NotFound("No MemPurge.");
+  if ((mempurge_threshold > 0.0) &&
+      (flush_reason_ == FlushReason::kWriteBufferFull) && (!mems_.empty()) &&
+      MemPurgeDecider(mempurge_threshold) && !(db_options_.atomic_flush)) {
+    cfd_->SetMempurgeUsed();
+    mempurge_s = MemPurge();
+    if (!mempurge_s.ok()) {
+      // Mempurge is typically aborted when the output
+      // bytes cannot be contained onto a single output memtable.
+      if (mempurge_s.IsAborted()) {
+        ROCKS_LOG_INFO(db_options_.info_log, "Mempurge process aborted: %s\n",
+                       mempurge_s.ToString().c_str());
+      } else {
+        // However the mempurge process can also fail for
+        // other reasons (eg: new_mem->Add() fails).
+        ROCKS_LOG_WARN(db_options_.info_log, "Mempurge process failed: %s\n",
+                       mempurge_s.ToString().c_str());
+      }
+    } else {
+      if (switched_to_mempurge) {
+        *switched_to_mempurge = true;
+      } else {
+        // The mempurge process was successful, but no switch_to_mempurge
+        // pointer provided so no way to propagate the state of flush job.
+        ROCKS_LOG_WARN(db_options_.info_log,
+                       "Mempurge process succeeded"
+                       "but no 'switched_to_mempurge' ptr provided.\n");
+      }
+    }
+  }
+  Status s;
+  if (mempurge_s.ok()) {
+    base_->Unref();
+    s = Status::OK();
+  } else {
+    // This will release and re-acquire the mutex.
+    s = WriteLevel0Table();
+  }
+
+  if (s.ok() && cfd_->IsDropped()) {
+    s = Status::ColumnFamilyDropped("Column family dropped during compaction");
+  }
+  if ((s.ok() || s.IsColumnFamilyDropped()) &&
+      shutting_down_->load(std::memory_order_acquire)) {
+    s = Status::ShutdownInProgress("Database shutdown");
+  }
+
+  if (!s.ok()) {
+    cfd_->imm()->RollbackMemtableFlush(mems_, meta_.fd.GetNumber());
+  } else if (write_manifest_) {
+    TEST_SYNC_POINT("FlushJob::InstallResults");
+    // Replace immutable memtable with the generated Table
+    s = cfd_->imm()->TryInstallMemtableFlushResults(
+        cfd_, mutable_cf_options_, mems_, prep_tracker, versions_, db_mutex_,
+        meta_.fd.GetNumber(), &job_context_->memtables_to_free, db_directory_,
+        log_buffer_, &committed_flush_jobs_info_,
+        !(mempurge_s.ok()) /* write_edit : true if no mempurge happened (or if aborted),
+                              but 'false' if mempurge successful: no new min log number
+                              or new level 0 file path to write to manifest. */);
+  }
+
+  if (s.ok() && file_meta != nullptr) {
+    *file_meta = meta_;
+  }
+  RecordFlushIOStats();
+
+  // When measure_io_stats_ is true, the default 512 bytes is not enough.
+  auto stream = event_logger_->LogToBuffer(log_buffer_, 1024);
+  stream << "job" << job_context_->job_id << "event"
+         << "flush_finished";
+  stream << "output_compression"
+         << CompressionTypeToString(output_compression_);
+  stream << "lsm_state";
+  stream.StartArray();
+  auto vstorage = cfd_->current()->storage_info();
+  for (int level = 0; level < vstorage->num_levels(); ++level) {
+    stream << vstorage->NumLevelFiles(level);
+  }
+  stream.EndArray();
+
+  const auto& blob_files = vstorage->GetBlobFiles();
+  if (!blob_files.empty()) {
+    assert(blob_files.front());
+    stream << "blob_file_head" << blob_files.front()->GetBlobFileNumber();
+
+    assert(blob_files.back());
+    stream << "blob_file_tail" << blob_files.back()->GetBlobFileNumber();
+  }
+
+  stream << "immutable_memtables" << cfd_->imm()->NumNotFlushed();
+
+  if (measure_io_stats_) {
+    if (prev_perf_level != PerfLevel::kEnableTime) {
+      SetPerfLevel(prev_perf_level);
+    }
+    stream << "file_write_nanos" << (IOSTATS(write_nanos) - prev_write_nanos);
+    stream << "file_range_sync_nanos"
+           << (IOSTATS(range_sync_nanos) - prev_range_sync_nanos);
+    stream << "file_fsync_nanos" << (IOSTATS(fsync_nanos) - prev_fsync_nanos);
+    stream << "file_prepare_write_nanos"
+           << (IOSTATS(prepare_write_nanos) - prev_prepare_write_nanos);
+    stream << "file_cpu_write_nanos"
+           << (IOSTATS(cpu_write_nanos) - prev_cpu_write_nanos);
+    stream << "file_cpu_read_nanos"
+           << (IOSTATS(cpu_read_nanos) - prev_cpu_read_nanos);
+  }
+
+  return s;
+}
+
+void FlushJob::Cancel() {
+  db_mutex_->AssertHeld();
+  assert(base_ != nullptr);
+  base_->Unref();
+}
+
+Status FlushJob::MemPurge() {
+  Status s;
+  db_mutex_->AssertHeld();
+  db_mutex_->Unlock();
+  assert(!mems_.empty());
+
+  // Measure purging time.
+  const uint64_t start_micros = clock_->NowMicros();
+  const uint64_t start_cpu_micros = clock_->CPUMicros();
+
+  MemTable* new_mem = nullptr;
+  // For performance/log investigation purposes:
+  // look at how much useful payload we harvest in the new_mem.
+  // This value is then printed to the DB log.
+  double new_mem_capacity = 0.0;
+
+  // Create two iterators, one for the memtable data (contains
+  // info from puts + deletes), and one for the memtable
+  // Range Tombstones (from DeleteRanges).
+  // TODO: plumb Env::IOActivity
+  ReadOptions ro;
+  ro.total_order_seek = true;
+  Arena arena;
+  std::vector<InternalIterator*> memtables;
+  std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
+      range_del_iters;
+  for (MemTable* m : mems_) {
+    memtables.push_back(m->NewIterator(ro, &arena));
+    auto* range_del_iter = m->NewRangeTombstoneIterator(
+        ro, kMaxSequenceNumber, true /* immutable_memtable */);
+    if (range_del_iter != nullptr) {
+      range_del_iters.emplace_back(range_del_iter);
+    }
+  }
+
+  assert(!memtables.empty());
+  SequenceNumber first_seqno = kMaxSequenceNumber;
+  SequenceNumber earliest_seqno = kMaxSequenceNumber;
+  // Pick first and earliest seqno as min of all first_seqno
+  // and earliest_seqno of the mempurged memtables.
+  for (const auto& mem : mems_) {
+    first_seqno = mem->GetFirstSequenceNumber() < first_seqno
+                      ? mem->GetFirstSequenceNumber()
+                      : first_seqno;
+    earliest_seqno = mem->GetEarliestSequenceNumber() < earliest_seqno
+                         ? mem->GetEarliestSequenceNumber()
+                         : earliest_seqno;
+  }
+
+  ScopedArenaIterator iter(
+      NewMergingIterator(&(cfd_->internal_comparator()), memtables.data(),
+                         static_cast<int>(memtables.size()), &arena));
+
+  auto* ioptions = cfd_->ioptions();
+
+  // Place iterator at the First (meaning most recent) key node.
+  iter->SeekToFirst();
+
+  const std::string* const full_history_ts_low = &(cfd_->GetFullHistoryTsLow());
+  std::unique_ptr<CompactionRangeDelAggregator> range_del_agg(
+      new CompactionRangeDelAggregator(&(cfd_->internal_comparator()),
+                                       existing_snapshots_,
+                                       full_history_ts_low));
+  for (auto& rd_iter : range_del_iters) {
+    range_del_agg->AddTombstones(std::move(rd_iter));
+  }
+
+  // If there is valid data in the memtable,
+  // or at least range tombstones, copy over the info
+  // to the new memtable.
+  if (iter->Valid() || !range_del_agg->IsEmpty()) {
+    // MaxSize is the size of a memtable.
+    size_t maxSize = mutable_cf_options_.write_buffer_size;
+    std::unique_ptr<CompactionFilter> compaction_filter;
+    if (ioptions->compaction_filter_factory != nullptr &&
+        ioptions->compaction_filter_factory->ShouldFilterTableFileCreation(
+            TableFileCreationReason::kFlush)) {
+      CompactionFilter::Context ctx;
+      ctx.is_full_compaction = false;
+      ctx.is_manual_compaction = false;
+      ctx.column_family_id = cfd_->GetID();
+      ctx.reason = TableFileCreationReason::kFlush;
+      compaction_filter =
+          ioptions->compaction_filter_factory->CreateCompactionFilter(ctx);
+      if (compaction_filter != nullptr &&
+          !compaction_filter->IgnoreSnapshots()) {
+        s = Status::NotSupported(
+            "CompactionFilter::IgnoreSnapshots() = false is not supported "
+            "anymore.");
+        return s;
+      }
+    }
+
+    new_mem = new MemTable((cfd_->internal_comparator()), *(cfd_->ioptions()),
+                           mutable_cf_options_, cfd_->write_buffer_mgr(),
+                           earliest_seqno, cfd_->GetID());
+    assert(new_mem != nullptr);
+
+    Env* env = db_options_.env;
+    assert(env);
+    MergeHelper merge(
+        env, (cfd_->internal_comparator()).user_comparator(),
+        (ioptions->merge_operator).get(), compaction_filter.get(),
+        ioptions->logger, true /* internal key corruption is not ok */,
+        existing_snapshots_.empty() ? 0 : existing_snapshots_.back(),
+        snapshot_checker_);
+    assert(job_context_);
+    SequenceNumber job_snapshot_seq = job_context_->GetJobSnapshotSequence();
+    const std::atomic<bool> kManualCompactionCanceledFalse{false};
+    CompactionIterator c_iter(
+        iter.get(), (cfd_->internal_comparator()).user_comparator(), &merge,
+        kMaxSequenceNumber, &existing_snapshots_,
+        earliest_write_conflict_snapshot_, job_snapshot_seq, snapshot_checker_,
+        env, ShouldReportDetailedTime(env, ioptions->stats),
+        true /* internal key corruption is not ok */, range_del_agg.get(),
+        nullptr, ioptions->allow_data_in_errors,
+        ioptions->enforce_single_del_contracts,
+        /*manual_compaction_canceled=*/kManualCompactionCanceledFalse,
+        /*compaction=*/nullptr, compaction_filter.get(),
+        /*shutting_down=*/nullptr, ioptions->info_log, full_history_ts_low);
+
+    // Set earliest sequence number in the new memtable
+    // to be equal to the earliest sequence number of the
+    // memtable being flushed (See later if there is a need
+    // to update this number!).
+    new_mem->SetEarliestSequenceNumber(earliest_seqno);
+    // Likewise for first seq number.
+    new_mem->SetFirstSequenceNumber(first_seqno);
+    SequenceNumber new_first_seqno = kMaxSequenceNumber;
+
+    c_iter.SeekToFirst();
+
+    // Key transfer
+    for (; c_iter.Valid(); c_iter.Next()) {
+      const ParsedInternalKey ikey = c_iter.ikey();
+      const Slice value = c_iter.value();
+      new_first_seqno =
+          ikey.sequence < new_first_seqno ? ikey.sequence : new_first_seqno;
+
+      // Should we update "OldestKeyTime" ???? -> timestamp appear
+      // to still be an "experimental" feature.
+      s = new_mem->Add(
+          ikey.sequence, ikey.type, ikey.user_key, value,
+          nullptr,   // KV protection info set as nullptr since it
+                     // should only be useful for the first add to
+                     // the original memtable.
+          false,     // : allow concurrent_memtable_writes_
+                     // Not seen as necessary for now.
+          nullptr,   // get_post_process_info(m) must be nullptr
+                     // when concurrent_memtable_writes is switched off.
+          nullptr);  // hint, only used when concurrent_memtable_writes_
+                     // is switched on.
+      if (!s.ok()) {
+        break;
+      }
+
+      // If new_mem has size greater than maxSize,
+      // then rollback to regular flush operation,
+      // and destroy new_mem.
+      if (new_mem->ApproximateMemoryUsage() > maxSize) {
+        s = Status::Aborted("Mempurge filled more than one memtable.");
+        new_mem_capacity = 1.0;
+        break;
+      }
+    }
+
+    // Check status and propagate
+    // potential error status from c_iter
+    if (!s.ok()) {
+      c_iter.status().PermitUncheckedError();
+    } else if (!c_iter.status().ok()) {
+      s = c_iter.status();
+    }
+
+    // Range tombstone transfer.
+    if (s.ok()) {
+      auto range_del_it = range_del_agg->NewIterator();
+      for (range_del_it->SeekToFirst(); range_del_it->Valid();
+           range_del_it->Next()) {
+        auto tombstone = range_del_it->Tombstone();
+        new_first_seqno =
+            tombstone.seq_ < new_first_seqno ? tombstone.seq_ : new_first_seqno;
+        s = new_mem->Add(
+            tombstone.seq_,        // Sequence number
+            kTypeRangeDeletion,    // KV type
+            tombstone.start_key_,  // Key is start key.
+            tombstone.end_key_,    // Value is end key.
+            nullptr,               // KV protection info set as nullptr since it
+                                   // should only be useful for the first add to
+                                   // the original memtable.
+            false,                 // : allow concurrent_memtable_writes_
+                                   // Not seen as necessary for now.
+            nullptr,               // get_post_process_info(m) must be nullptr
+                      // when concurrent_memtable_writes is switched off.
+            nullptr);  // hint, only used when concurrent_memtable_writes_
+                       // is switched on.
+
+        if (!s.ok()) {
+          break;
+        }
+
+        // If new_mem has size greater than maxSize,
+        // then rollback to regular flush operation,
+        // and destroy new_mem.
+        if (new_mem->ApproximateMemoryUsage() > maxSize) {
+          s = Status::Aborted(Slice("Mempurge filled more than one memtable."));
+          new_mem_capacity = 1.0;
+          break;
+        }
+      }
+    }
+
+    // If everything happened smoothly and new_mem contains valid data,
+    // decide if it is flushed to storage or kept in the imm()
+    // memtable list (memory).
+    if (s.ok() && (new_first_seqno != kMaxSequenceNumber)) {
+      // Rectify the first sequence number, which (unlike the earliest seq
+      // number) needs to be present in the new memtable.
+      new_mem->SetFirstSequenceNumber(new_first_seqno);
+
+      // The new_mem is added to the list of immutable memtables
+      // only if it filled at less than 100% capacity and isn't flagged
+      // as in need of being flushed.
+      if (new_mem->ApproximateMemoryUsage() < maxSize &&
+          !(new_mem->ShouldFlushNow())) {
+        // Construct fragmented memtable range tombstones without mutex
+        new_mem->ConstructFragmentedRangeTombstones();
+        db_mutex_->Lock();
+        uint64_t new_mem_id = mems_[0]->GetID();
+
+        new_mem->SetID(new_mem_id);
+        new_mem->SetNextLogNumber(mems_[0]->GetNextLogNumber());
+
+        // This addition will not trigger another flush, because
+        // we do not call SchedulePendingFlush().
+        cfd_->imm()->Add(new_mem, &job_context_->memtables_to_free);
+        new_mem->Ref();
+        // Piggyback FlushJobInfo on the first flushed memtable.
+        db_mutex_->AssertHeld();
+        meta_.fd.file_size = 0;
+        mems_[0]->SetFlushJobInfo(GetFlushJobInfo());
+        db_mutex_->Unlock();
+      } else {
+        s = Status::Aborted(Slice("Mempurge filled more than one memtable."));
+        new_mem_capacity = 1.0;
+        if (new_mem) {
+          job_context_->memtables_to_free.push_back(new_mem);
+        }
+      }
+    } else {
+      // In this case, the newly allocated new_mem is empty.
+      assert(new_mem != nullptr);
+      job_context_->memtables_to_free.push_back(new_mem);
+    }
+  }
+
+  // Reacquire the mutex for WriteLevel0 function.
+  db_mutex_->Lock();
+
+  // If mempurge successful, don't write input tables to level0,
+  // but write any full output table to level0.
+  if (s.ok()) {
+    TEST_SYNC_POINT("DBImpl::FlushJob:MemPurgeSuccessful");
+  } else {
+    TEST_SYNC_POINT("DBImpl::FlushJob:MemPurgeUnsuccessful");
+  }
+  const uint64_t micros = clock_->NowMicros() - start_micros;
+  const uint64_t cpu_micros = clock_->CPUMicros() - start_cpu_micros;
+  ROCKS_LOG_INFO(db_options_.info_log,
+                 "[%s] [JOB %d] Mempurge lasted %" PRIu64
+                 " microseconds, and %" PRIu64
+                 " cpu "
+                 "microseconds. Status is %s ok. Perc capacity: %f\n",
+                 cfd_->GetName().c_str(), job_context_->job_id, micros,
+                 cpu_micros, s.ok() ? "" : "not", new_mem_capacity);
+
+  return s;
+}
+
+bool FlushJob::MemPurgeDecider(double threshold) {
+  // Never trigger mempurge if threshold is not a strictly positive value.
+  if (!(threshold > 0.0)) {
+    return false;
+  }
+  if (threshold > (1.0 * mems_.size())) {
+    return true;
+  }
+  // Payload and useful_payload (in bytes).
+  // The useful payload ratio of a given MemTable
+  // is estimated to be useful_payload/payload.
+  uint64_t payload = 0, useful_payload = 0, entry_size = 0;
+
+  // Local variables used repetitively inside the for-loop
+  // when iterating over the sampled entries.
+  Slice key_slice, value_slice;
+  ParsedInternalKey res;
+  SnapshotImpl min_snapshot;
+  std::string vget;
+  Status mget_s, parse_s;
+  MergeContext merge_context;
+  SequenceNumber max_covering_tombstone_seq = 0, sqno = 0,
+                 min_seqno_snapshot = 0;
+  bool get_res, can_be_useful_payload, not_in_next_mems;
+
+  // If estimated_useful_payload is > threshold,
+  // then flush to storage, else MemPurge.
+  double estimated_useful_payload = 0.0;
+  // Cochran formula for determining sample size.
+  // 95% confidence interval, 7% precision.
+  //    n0 = (1.96*1.96)*0.25/(0.07*0.07) = 196.0
+  // TODO: plumb Env::IOActivity
+  double n0 = 196.0;
+  ReadOptions ro;
+  ro.total_order_seek = true;
+
+  // Iterate over each memtable of the set.
+  for (auto mem_iter = std::begin(mems_); mem_iter != std::end(mems_);
+       mem_iter++) {
+    MemTable* mt = *mem_iter;
+
+    // Else sample from the table.
+    uint64_t nentries = mt->num_entries();
+    // Corrected Cochran formula for small populations
+    // (converges to n0 for large populations).
+    uint64_t target_sample_size =
+        static_cast<uint64_t>(ceil(n0 / (1.0 + (n0 / nentries))));
+    std::unordered_set<const char*> sentries = {};
+    // Populate sample entries set.
+    mt->UniqueRandomSample(target_sample_size, &sentries);
+
+    // Estimate the garbage ratio by comparing if
+    // each sample corresponds to a valid entry.
+    for (const char* ss : sentries) {
+      key_slice = GetLengthPrefixedSlice(ss);
+      parse_s = ParseInternalKey(key_slice, &res, true /*log_err_key*/);
+      if (!parse_s.ok()) {
+        ROCKS_LOG_WARN(db_options_.info_log,
+                       "Memtable Decider: ParseInternalKey did not parse "
+                       "key_slice %s successfully.",
+                       key_slice.data());
+      }
+
+      // Size of the entry is "key size (+ value size if KV entry)"
+      entry_size = key_slice.size();
+      if (res.type == kTypeValue) {
+        value_slice =
+            GetLengthPrefixedSlice(key_slice.data() + key_slice.size());
+        entry_size += value_slice.size();
+      }
+
+      // Count entry bytes as payload.
+      payload += entry_size;
+
+      LookupKey lkey(res.user_key, kMaxSequenceNumber);
+
+      // Paranoia: zero out these values just in case.
+      max_covering_tombstone_seq = 0;
+      sqno = 0;
+
+      // Pick the oldest existing snapshot that is more recent
+      // than the sequence number of the sampled entry.
+      min_seqno_snapshot = kMaxSequenceNumber;
+      for (SequenceNumber seq_num : existing_snapshots_) {
+        if (seq_num > res.sequence && seq_num < min_seqno_snapshot) {
+          min_seqno_snapshot = seq_num;
+        }
+      }
+      min_snapshot.number_ = min_seqno_snapshot;
+      ro.snapshot =
+          min_seqno_snapshot < kMaxSequenceNumber ? &min_snapshot : nullptr;
+
+      // Estimate if the sample entry is valid or not.
+      get_res = mt->Get(lkey, &vget, /*columns=*/nullptr, /*timestamp=*/nullptr,
+                        &mget_s, &merge_context, &max_covering_tombstone_seq,
+                        &sqno, ro, true /* immutable_memtable */);
+      if (!get_res) {
+        ROCKS_LOG_WARN(
+            db_options_.info_log,
+            "Memtable Get returned false when Get(sampled entry). "
+            "Yet each sample entry should exist somewhere in the memtable, "
+            "unrelated to whether it has been deleted or not.");
+      }
+
+      // TODO(bjlemaire): evaluate typeMerge.
+      // This is where the sampled entry is estimated to be
+      // garbage or not. Note that this is a garbage *estimation*
+      // because we do not include certain items such as
+      // CompactionFitlers triggered at flush, or if the same delete
+      // has been inserted twice or more in the memtable.
+
+      // Evaluate if the entry can be useful payload
+      // Situation #1: entry is a KV entry, was found in the memtable mt
+      //               and the sequence numbers match.
+      can_be_useful_payload = (res.type == kTypeValue) && get_res &&
+                              mget_s.ok() && (sqno == res.sequence);
+
+      // Situation #2: entry is a delete entry, was found in the memtable mt
+      //               (because gres==true) and no valid KV entry is found.
+      //               (note: duplicate delete entries are also taken into
+      //               account here, because the sequence number 'sqno'
+      //               in memtable->Get(&sqno) operation is set to be equal
+      //               to the most recent delete entry as well).
+      can_be_useful_payload |=
+          ((res.type == kTypeDeletion) || (res.type == kTypeSingleDeletion)) &&
+          mget_s.IsNotFound() && get_res && (sqno == res.sequence);
+
+      // If there is a chance that the entry is useful payload
+      // Verify that the entry does not appear in the following memtables
+      // (memtables with greater memtable ID/larger sequence numbers).
+      if (can_be_useful_payload) {
+        not_in_next_mems = true;
+        for (auto next_mem_iter = mem_iter + 1;
+             next_mem_iter != std::end(mems_); next_mem_iter++) {
+          if ((*next_mem_iter)
+                  ->Get(lkey, &vget, /*columns=*/nullptr, /*timestamp=*/nullptr,
+                        &mget_s, &merge_context, &max_covering_tombstone_seq,
+                        &sqno, ro, true /* immutable_memtable */)) {
+            not_in_next_mems = false;
+            break;
+          }
+        }
+        if (not_in_next_mems) {
+          useful_payload += entry_size;
+        }
+      }
+    }
+    if (payload > 0) {
+      // We use the estimated useful payload ratio to
+      // evaluate how many of the memtable bytes are useful bytes.
+      estimated_useful_payload +=
+          (mt->ApproximateMemoryUsage()) * (useful_payload * 1.0 / payload);
+
+      ROCKS_LOG_INFO(db_options_.info_log,
+                     "Mempurge sampling [CF %s] - found garbage ratio from "
+                     "sampling: %f. Threshold is %f\n",
+                     cfd_->GetName().c_str(),
+                     (payload - useful_payload) * 1.0 / payload, threshold);
+    } else {
+      ROCKS_LOG_WARN(db_options_.info_log,
+                     "Mempurge sampling: null payload measured, and collected "
+                     "sample size is %zu\n.",
+                     sentries.size());
+    }
+  }
+  // We convert the total number of useful payload bytes
+  // into the proportion of memtable necessary to store all these bytes.
+  // We compare this proportion with the threshold value.
+  return ((estimated_useful_payload / mutable_cf_options_.write_buffer_size) <
+          threshold);
+}
+
+Status FlushJob::WriteLevel0Table() {
+  AutoThreadOperationStageUpdater stage_updater(
+      ThreadStatus::STAGE_FLUSH_WRITE_L0);
+  db_mutex_->AssertHeld();
+  const uint64_t start_micros = clock_->NowMicros();
+  const uint64_t start_cpu_micros = clock_->CPUMicros();
+  Status s;
+
+  SequenceNumber smallest_seqno = mems_.front()->GetEarliestSequenceNumber();
+  if (!db_impl_seqno_time_mapping_.Empty()) {
+    // make a local copy, as the seqno_time_mapping from db_impl is not thread
+    // safe, which will be used while not holding the db_mutex.
+    seqno_to_time_mapping_ = db_impl_seqno_time_mapping_.Copy(smallest_seqno);
+  }
+
+  std::vector<BlobFileAddition> blob_file_additions;
+
+  {
+    auto write_hint = cfd_->CalculateSSTWriteHint(0);
+    Env::IOPriority io_priority = GetRateLimiterPriorityForWrite();
+    db_mutex_->Unlock();
+    if (log_buffer_) {
+      log_buffer_->FlushBufferToLog();
+    }
+    // memtables and range_del_iters store internal iterators over each data
+    // memtable and its associated range deletion memtable, respectively, at
+    // corresponding indexes.
+    std::vector<InternalIterator*> memtables;
+    std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
+        range_del_iters;
+    ReadOptions ro;
+    ro.total_order_seek = true;
+    ro.io_activity = Env::IOActivity::kFlush;
+    Arena arena;
+    uint64_t total_num_entries = 0, total_num_deletes = 0;
+    uint64_t total_data_size = 0;
+    size_t total_memory_usage = 0;
+    // Used for testing:
+    uint64_t mems_size = mems_.size();
+    (void)mems_size;  // avoids unused variable error when
+                      // TEST_SYNC_POINT_CALLBACK not used.
+    TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:num_memtables",
+                             &mems_size);
+    assert(job_context_);
+    for (MemTable* m : mems_) {
+      ROCKS_LOG_INFO(
+          db_options_.info_log,
+          "[%s] [JOB %d] Flushing memtable with next log file: %" PRIu64 "\n",
+          cfd_->GetName().c_str(), job_context_->job_id, m->GetNextLogNumber());
+      memtables.push_back(m->NewIterator(ro, &arena));
+      auto* range_del_iter = m->NewRangeTombstoneIterator(
+          ro, kMaxSequenceNumber, true /* immutable_memtable */);
+      if (range_del_iter != nullptr) {
+        range_del_iters.emplace_back(range_del_iter);
+      }
+      total_num_entries += m->num_entries();
+      total_num_deletes += m->num_deletes();
+      total_data_size += m->get_data_size();
+      total_memory_usage += m->ApproximateMemoryUsage();
+    }
+
+    event_logger_->Log() << "job" << job_context_->job_id << "event"
+                         << "flush_started"
+                         << "num_memtables" << mems_.size() << "num_entries"
+                         << total_num_entries << "num_deletes"
+                         << total_num_deletes << "total_data_size"
+                         << total_data_size << "memory_usage"
+                         << total_memory_usage << "flush_reason"
+                         << GetFlushReasonString(flush_reason_);
+
+    {
+      ScopedArenaIterator iter(
+          NewMergingIterator(&cfd_->internal_comparator(), memtables.data(),
+                             static_cast<int>(memtables.size()), &arena));
+      ROCKS_LOG_INFO(db_options_.info_log,
+                     "[%s] [JOB %d] Level-0 flush table #%" PRIu64 ": started",
+                     cfd_->GetName().c_str(), job_context_->job_id,
+                     meta_.fd.GetNumber());
+
+      TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:output_compression",
+                               &output_compression_);
+      int64_t _current_time = 0;
+      auto status = clock_->GetCurrentTime(&_current_time);
+      // Safe to proceed even if GetCurrentTime fails. So, log and proceed.
+      if (!status.ok()) {
+        ROCKS_LOG_WARN(
+            db_options_.info_log,
+            "Failed to get current time to populate creation_time property. "
+            "Status: %s",
+            status.ToString().c_str());
+      }
+      const uint64_t current_time = static_cast<uint64_t>(_current_time);
+
+      uint64_t oldest_key_time = mems_.front()->ApproximateOldestKeyTime();
+
+      // It's not clear whether oldest_key_time is always available. In case
+      // it is not available, use current_time.
+      uint64_t oldest_ancester_time = std::min(current_time, oldest_key_time);
+
+      TEST_SYNC_POINT_CALLBACK(
+          "FlushJob::WriteLevel0Table:oldest_ancester_time",
+          &oldest_ancester_time);
+      meta_.oldest_ancester_time = oldest_ancester_time;
+      meta_.file_creation_time = current_time;
+
+      uint64_t num_input_entries = 0;
+      uint64_t memtable_payload_bytes = 0;
+      uint64_t memtable_garbage_bytes = 0;
+      IOStatus io_s;
+
+      const std::string* const full_history_ts_low =
+          (full_history_ts_low_.empty()) ? nullptr : &full_history_ts_low_;
+      TableBuilderOptions tboptions(
+          *cfd_->ioptions(), mutable_cf_options_, cfd_->internal_comparator(),
+          cfd_->int_tbl_prop_collector_factories(), output_compression_,
+          mutable_cf_options_.compression_opts, cfd_->GetID(), cfd_->GetName(),
+          0 /* level */, false /* is_bottommost */,
+          TableFileCreationReason::kFlush, oldest_key_time, current_time,
+          db_id_, db_session_id_, 0 /* target_file_size */,
+          meta_.fd.GetNumber());
+      const SequenceNumber job_snapshot_seq =
+          job_context_->GetJobSnapshotSequence();
+      const ReadOptions read_options(Env::IOActivity::kFlush);
+      s = BuildTable(dbname_, versions_, db_options_, tboptions, file_options_,
+                     read_options, cfd_->table_cache(), iter.get(),
+                     std::move(range_del_iters), &meta_, &blob_file_additions,
+                     existing_snapshots_, earliest_write_conflict_snapshot_,
+                     job_snapshot_seq, snapshot_checker_,
+                     mutable_cf_options_.paranoid_file_checks,
+                     cfd_->internal_stats(), &io_s, io_tracer_,
+                     BlobFileCreationReason::kFlush, seqno_to_time_mapping_,
+                     event_logger_, job_context_->job_id, io_priority,
+                     &table_properties_, write_hint, full_history_ts_low,
+                     blob_callback_, base_, &num_input_entries,
+                     &memtable_payload_bytes, &memtable_garbage_bytes);
+      // TODO: Cleanup io_status in BuildTable and table builders
+      assert(!s.ok() || io_s.ok());
+      io_s.PermitUncheckedError();
+      if (num_input_entries != total_num_entries && s.ok()) {
+        std::string msg = "Expected " + std::to_string(total_num_entries) +
+                          " entries in memtables, but read " +
+                          std::to_string(num_input_entries);
+        ROCKS_LOG_WARN(db_options_.info_log, "[%s] [JOB %d] Level-0 flush %s",
+                       cfd_->GetName().c_str(), job_context_->job_id,
+                       msg.c_str());
+        if (db_options_.flush_verify_memtable_count) {
+          s = Status::Corruption(msg);
+        }
+      }
+      if (tboptions.reason == TableFileCreationReason::kFlush) {
+        TEST_SYNC_POINT("DBImpl::FlushJob:Flush");
+        RecordTick(stats_, MEMTABLE_PAYLOAD_BYTES_AT_FLUSH,
+                   memtable_payload_bytes);
+        RecordTick(stats_, MEMTABLE_GARBAGE_BYTES_AT_FLUSH,
+                   memtable_garbage_bytes);
+      }
+      LogFlush(db_options_.info_log);
+    }
+    ROCKS_LOG_BUFFER(log_buffer_,
+                     "[%s] [JOB %d] Level-0 flush table #%" PRIu64 ": %" PRIu64
+                     " bytes %s"
+                     "%s",
+                     cfd_->GetName().c_str(), job_context_->job_id,
+                     meta_.fd.GetNumber(), meta_.fd.GetFileSize(),
+                     s.ToString().c_str(),
+                     meta_.marked_for_compaction ? " (needs compaction)" : "");
+
+    if (s.ok() && output_file_directory_ != nullptr && sync_output_directory_) {
+      s = output_file_directory_->FsyncWithDirOptions(
+          IOOptions(), nullptr,
+          DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
+    }
+    TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table", &mems_);
+    db_mutex_->Lock();
+  }
+  base_->Unref();
+
+  // Note that if file_size is zero, the file has been deleted and
+  // should not be added to the manifest.
+  const bool has_output = meta_.fd.GetFileSize() > 0;
+
+  if (s.ok() && has_output) {
+    TEST_SYNC_POINT("DBImpl::FlushJob:SSTFileCreated");
+    // if we have more than 1 background thread, then we cannot
+    // insert files directly into higher levels because some other
+    // threads could be concurrently producing compacted files for
+    // that key range.
+    // Add file to L0
+    edit_->AddFile(0 /* level */, meta_.fd.GetNumber(), meta_.fd.GetPathId(),
+                   meta_.fd.GetFileSize(), meta_.smallest, meta_.largest,
+                   meta_.fd.smallest_seqno, meta_.fd.largest_seqno,
+                   meta_.marked_for_compaction, meta_.temperature,
+                   meta_.oldest_blob_file_number, meta_.oldest_ancester_time,
+                   meta_.file_creation_time, meta_.epoch_number,
+                   meta_.file_checksum, meta_.file_checksum_func_name,
+                   meta_.unique_id, meta_.compensated_range_deletion_size,
+                   meta_.tail_size);
+    edit_->SetBlobFileAdditions(std::move(blob_file_additions));
+  }
+  // Piggyback FlushJobInfo on the first first flushed memtable.
+  mems_[0]->SetFlushJobInfo(GetFlushJobInfo());
+
+  // Note that here we treat flush as level 0 compaction in internal stats
+  InternalStats::CompactionStats stats(CompactionReason::kFlush, 1);
+  const uint64_t micros = clock_->NowMicros() - start_micros;
+  const uint64_t cpu_micros = clock_->CPUMicros() - start_cpu_micros;
+  stats.micros = micros;
+  stats.cpu_micros = cpu_micros;
+
+  ROCKS_LOG_INFO(db_options_.info_log,
+                 "[%s] [JOB %d] Flush lasted %" PRIu64
+                 " microseconds, and %" PRIu64 " cpu microseconds.\n",
+                 cfd_->GetName().c_str(), job_context_->job_id, micros,
+                 cpu_micros);
+
+  if (has_output) {
+    stats.bytes_written = meta_.fd.GetFileSize();
+    stats.num_output_files = 1;
+  }
+
+  const auto& blobs = edit_->GetBlobFileAdditions();
+  for (const auto& blob : blobs) {
+    stats.bytes_written_blob += blob.GetTotalBlobBytes();
+  }
+
+  stats.num_output_files_blob = static_cast<int>(blobs.size());
+
+  RecordTimeToHistogram(stats_, FLUSH_TIME, stats.micros);
+  cfd_->internal_stats()->AddCompactionStats(0 /* level */, thread_pri_, stats);
+  cfd_->internal_stats()->AddCFStats(
+      InternalStats::BYTES_FLUSHED,
+      stats.bytes_written + stats.bytes_written_blob);
+  RecordFlushIOStats();
+
+  return s;
+}
+
+Env::IOPriority FlushJob::GetRateLimiterPriorityForWrite() {
+  if (versions_ && versions_->GetColumnFamilySet() &&
+      versions_->GetColumnFamilySet()->write_controller()) {
+    WriteController* write_controller =
+        versions_->GetColumnFamilySet()->write_controller();
+    if (write_controller->IsStopped() || write_controller->NeedsDelay()) {
+      return Env::IO_USER;
+    }
+  }
+
+  return Env::IO_HIGH;
+}
+
+std::unique_ptr<FlushJobInfo> FlushJob::GetFlushJobInfo() const {
+  db_mutex_->AssertHeld();
+  std::unique_ptr<FlushJobInfo> info(new FlushJobInfo{});
+  info->cf_id = cfd_->GetID();
+  info->cf_name = cfd_->GetName();
+
+  const uint64_t file_number = meta_.fd.GetNumber();
+  info->file_path =
+      MakeTableFileName(cfd_->ioptions()->cf_paths[0].path, file_number);
+  info->file_number = file_number;
+  info->oldest_blob_file_number = meta_.oldest_blob_file_number;
+  info->thread_id = db_options_.env->GetThreadID();
+  info->job_id = job_context_->job_id;
+  info->smallest_seqno = meta_.fd.smallest_seqno;
+  info->largest_seqno = meta_.fd.largest_seqno;
+  info->table_properties = table_properties_;
+  info->flush_reason = flush_reason_;
+  info->blob_compression_type = mutable_cf_options_.blob_compression_type;
+
+  // Update BlobFilesInfo.
+  for (const auto& blob_file : edit_->GetBlobFileAdditions()) {
+    BlobFileAdditionInfo blob_file_addition_info(
+        BlobFileName(cfd_->ioptions()->cf_paths.front().path,
+                     blob_file.GetBlobFileNumber()) /*blob_file_path*/,
+        blob_file.GetBlobFileNumber(), blob_file.GetTotalBlobCount(),
+        blob_file.GetTotalBlobBytes());
+    info->blob_file_addition_infos.emplace_back(
+        std::move(blob_file_addition_info));
+  }
+  return info;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_job.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_job.h
new file mode 100644
index 0000000000..d3902f0bd0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_job.h
@@ -0,0 +1,200 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <atomic>
+#include <deque>
+#include <limits>
+#include <list>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "db/blob/blob_file_completion_callback.h"
+#include "db/column_family.h"
+#include "db/flush_scheduler.h"
+#include "db/internal_stats.h"
+#include "db/job_context.h"
+#include "db/log_writer.h"
+#include "db/logs_with_prep_tracker.h"
+#include "db/memtable_list.h"
+#include "db/seqno_to_time_mapping.h"
+#include "db/snapshot_impl.h"
+#include "db/version_edit.h"
+#include "db/write_controller.h"
+#include "db/write_thread.h"
+#include "logging/event_logger.h"
+#include "monitoring/instrumented_mutex.h"
+#include "options/db_options.h"
+#include "port/port.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/listener.h"
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/transaction_log.h"
+#include "table/scoped_arena_iterator.h"
+#include "util/autovector.h"
+#include "util/stop_watch.h"
+#include "util/thread_local.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class DBImpl;
+class MemTable;
+class SnapshotChecker;
+class TableCache;
+class Version;
+class VersionEdit;
+class VersionSet;
+class Arena;
+
+class FlushJob {
+ public:
+  // TODO(icanadi) make effort to reduce number of parameters here
+  // IMPORTANT: mutable_cf_options needs to be alive while FlushJob is alive
+  FlushJob(const std::string& dbname, ColumnFamilyData* cfd,
+           const ImmutableDBOptions& db_options,
+           const MutableCFOptions& mutable_cf_options, uint64_t max_memtable_id,
+           const FileOptions& file_options, VersionSet* versions,
+           InstrumentedMutex* db_mutex, std::atomic<bool>* shutting_down,
+           std::vector<SequenceNumber> existing_snapshots,
+           SequenceNumber earliest_write_conflict_snapshot,
+           SnapshotChecker* snapshot_checker, JobContext* job_context,
+           FlushReason flush_reason, LogBuffer* log_buffer,
+           FSDirectory* db_directory, FSDirectory* output_file_directory,
+           CompressionType output_compression, Statistics* stats,
+           EventLogger* event_logger, bool measure_io_stats,
+           const bool sync_output_directory, const bool write_manifest,
+           Env::Priority thread_pri, const std::shared_ptr<IOTracer>& io_tracer,
+           const SeqnoToTimeMapping& seq_time_mapping,
+           const std::string& db_id = "", const std::string& db_session_id = "",
+           std::string full_history_ts_low = "",
+           BlobFileCompletionCallback* blob_callback = nullptr);
+
+  ~FlushJob();
+
+  // Require db_mutex held.
+  // Once PickMemTable() is called, either Run() or Cancel() has to be called.
+  void PickMemTable();
+  Status Run(LogsWithPrepTracker* prep_tracker = nullptr,
+             FileMetaData* file_meta = nullptr,
+             bool* switched_to_mempurge = nullptr);
+  void Cancel();
+  const autovector<MemTable*>& GetMemTables() const { return mems_; }
+
+  std::list<std::unique_ptr<FlushJobInfo>>* GetCommittedFlushJobsInfo() {
+    return &committed_flush_jobs_info_;
+  }
+
+ private:
+  friend class FlushJobTest_GetRateLimiterPriorityForWrite_Test;
+
+  void ReportStartedFlush();
+  void ReportFlushInputSize(const autovector<MemTable*>& mems);
+  void RecordFlushIOStats();
+  Status WriteLevel0Table();
+
+  // Memtable Garbage Collection algorithm: a MemPurge takes the list
+  // of immutable memtables and filters out (or "purge") the outdated bytes
+  // out of it. The output (the filtered bytes, or "useful payload") is
+  // then transfered into a new memtable. If this memtable is filled, then
+  // the mempurge is aborted and rerouted to a regular flush process. Else,
+  // depending on the heuristics, placed onto the immutable memtable list.
+  // The addition to the imm list will not trigger a flush operation. The
+  // flush of the imm list will instead be triggered once the mutable memtable
+  // is added to the imm list.
+  // This process is typically intended for workloads with heavy overwrites
+  // when we want to avoid SSD writes (and reads) as much as possible.
+  // "MemPurge" is an experimental feature still at a very early stage
+  // of development. At the moment it is only compatible with the Get, Put,
+  // Delete operations as well as Iterators and CompactionFilters.
+  // For this early version, "MemPurge" is called by setting the
+  // options.experimental_mempurge_threshold value as >0.0. When this is
+  // the case, ALL automatic flush operations (kWRiteBufferManagerFull) will
+  // first go through the MemPurge process. Therefore, we strongly
+  // recommend all users not to set this flag as true given that the MemPurge
+  // process has not matured yet.
+  Status MemPurge();
+  bool MemPurgeDecider(double threshold);
+  // The rate limiter priority (io_priority) is determined dynamically here.
+  Env::IOPriority GetRateLimiterPriorityForWrite();
+  std::unique_ptr<FlushJobInfo> GetFlushJobInfo() const;
+
+  const std::string& dbname_;
+  const std::string db_id_;
+  const std::string db_session_id_;
+  ColumnFamilyData* cfd_;
+  const ImmutableDBOptions& db_options_;
+  const MutableCFOptions& mutable_cf_options_;
+  // A variable storing the largest memtable id to flush in this
+  // flush job. RocksDB uses this variable to select the memtables to flush in
+  // this job. All memtables in this column family with an ID smaller than or
+  // equal to max_memtable_id_ will be selected for flush.
+  uint64_t max_memtable_id_;
+  const FileOptions file_options_;
+  VersionSet* versions_;
+  InstrumentedMutex* db_mutex_;
+  std::atomic<bool>* shutting_down_;
+  std::vector<SequenceNumber> existing_snapshots_;
+  SequenceNumber earliest_write_conflict_snapshot_;
+  SnapshotChecker* snapshot_checker_;
+  JobContext* job_context_;
+  FlushReason flush_reason_;
+  LogBuffer* log_buffer_;
+  FSDirectory* db_directory_;
+  FSDirectory* output_file_directory_;
+  CompressionType output_compression_;
+  Statistics* stats_;
+  EventLogger* event_logger_;
+  TableProperties table_properties_;
+  bool measure_io_stats_;
+  // True if this flush job should call fsync on the output directory. False
+  // otherwise.
+  // Usually sync_output_directory_ is true. A flush job needs to call sync on
+  // the output directory before committing to the MANIFEST.
+  // However, an individual flush job does not have to call sync on the output
+  // directory if it is part of an atomic flush. After all flush jobs in the
+  // atomic flush succeed, call sync once on each distinct output directory.
+  const bool sync_output_directory_;
+  // True if this flush job should write to MANIFEST after successfully
+  // flushing memtables. False otherwise.
+  // Usually write_manifest_ is true. A flush job commits to the MANIFEST after
+  // flushing the memtables.
+  // However, an individual flush job cannot rashly write to the MANIFEST
+  // immediately after it finishes the flush if it is part of an atomic flush.
+  // In this case, only after all flush jobs succeed in flush can RocksDB
+  // commit to the MANIFEST.
+  const bool write_manifest_;
+  // The current flush job can commit flush result of a concurrent flush job.
+  // We collect FlushJobInfo of all jobs committed by current job and fire
+  // OnFlushCompleted for them.
+  std::list<std::unique_ptr<FlushJobInfo>> committed_flush_jobs_info_;
+
+  // Variables below are set by PickMemTable():
+  FileMetaData meta_;
+  autovector<MemTable*> mems_;
+  VersionEdit* edit_;
+  Version* base_;
+  bool pick_memtable_called;
+  Env::Priority thread_pri_;
+
+  const std::shared_ptr<IOTracer> io_tracer_;
+  SystemClock* clock_;
+
+  const std::string full_history_ts_low_;
+  BlobFileCompletionCallback* blob_callback_;
+
+  // reference to the seqno_time_mapping_ in db_impl.h, not safe to read without
+  // db mutex
+  const SeqnoToTimeMapping& db_impl_seqno_time_mapping_;
+  SeqnoToTimeMapping seqno_to_time_mapping_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_scheduler.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_scheduler.cc
new file mode 100644
index 0000000000..6f4d3e1a5e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_scheduler.cc
@@ -0,0 +1,86 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/flush_scheduler.h"
+
+#include <cassert>
+
+#include "db/column_family.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void FlushScheduler::ScheduleWork(ColumnFamilyData* cfd) {
+#ifndef NDEBUG
+  {
+    std::lock_guard<std::mutex> lock(checking_mutex_);
+    assert(checking_set_.count(cfd) == 0);
+    checking_set_.insert(cfd);
+  }
+#endif  // NDEBUG
+  cfd->Ref();
+// Suppress false positive clang analyzer warnings.
+#ifndef __clang_analyzer__
+  Node* node = new Node{cfd, head_.load(std::memory_order_relaxed)};
+  while (!head_.compare_exchange_strong(
+      node->next, node, std::memory_order_relaxed, std::memory_order_relaxed)) {
+    // failing CAS updates the first param, so we are already set for
+    // retry.  TakeNextColumnFamily won't happen until after another
+    // inter-thread synchronization, so we don't even need release
+    // semantics for this CAS
+  }
+#endif  // __clang_analyzer__
+}
+
+ColumnFamilyData* FlushScheduler::TakeNextColumnFamily() {
+  while (true) {
+    if (head_.load(std::memory_order_relaxed) == nullptr) {
+      return nullptr;
+    }
+
+    // dequeue the head
+    Node* node = head_.load(std::memory_order_relaxed);
+    head_.store(node->next, std::memory_order_relaxed);
+    ColumnFamilyData* cfd = node->column_family;
+    delete node;
+
+#ifndef NDEBUG
+    {
+      std::lock_guard<std::mutex> lock(checking_mutex_);
+      auto iter = checking_set_.find(cfd);
+      assert(iter != checking_set_.end());
+      checking_set_.erase(iter);
+    }
+#endif  // NDEBUG
+
+    if (!cfd->IsDropped()) {
+      // success
+      return cfd;
+    }
+
+    // no longer relevant, retry
+    cfd->UnrefAndTryDelete();
+  }
+}
+
+bool FlushScheduler::Empty() {
+  auto rv = head_.load(std::memory_order_relaxed) == nullptr;
+#ifndef NDEBUG
+  std::lock_guard<std::mutex> lock(checking_mutex_);
+  // Empty is allowed to be called concurrnetly with ScheduleFlush. It would
+  // only miss the recent schedules.
+  assert((rv == checking_set_.empty()) || rv);
+#endif  // NDEBUG
+  return rv;
+}
+
+void FlushScheduler::Clear() {
+  ColumnFamilyData* cfd;
+  while ((cfd = TakeNextColumnFamily()) != nullptr) {
+    cfd->UnrefAndTryDelete();
+  }
+  assert(head_.load(std::memory_order_relaxed) == nullptr);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_scheduler.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_scheduler.h
new file mode 100644
index 0000000000..eb03f3e114
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/flush_scheduler.h
@@ -0,0 +1,55 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <mutex>
+#include <set>
+
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class ColumnFamilyData;
+
+// FlushScheduler keeps track of all column families whose memtable may
+// be full and require flushing. Unless otherwise noted, all methods on
+// FlushScheduler should be called only with the DB mutex held or from
+// a single-threaded recovery context.
+class FlushScheduler {
+ public:
+  FlushScheduler() : head_(nullptr) {}
+
+  // May be called from multiple threads at once, but not concurrent with
+  // any other method calls on this instance
+  void ScheduleWork(ColumnFamilyData* cfd);
+
+  // Removes and returns Ref()-ed column family. Client needs to Unref().
+  // Filters column families that have been dropped.
+  ColumnFamilyData* TakeNextColumnFamily();
+
+  // This can be called concurrently with ScheduleWork but it would miss all
+  // the scheduled flushes after the last synchronization. This would result
+  // into less precise enforcement of memtable sizes but should not matter much.
+  bool Empty();
+
+  void Clear();
+
+ private:
+  struct Node {
+    ColumnFamilyData* column_family;
+    Node* next;
+  };
+
+  std::atomic<Node*> head_;
+#ifndef NDEBUG
+  std::mutex checking_mutex_;
+  std::set<ColumnFamilyData*> checking_set_;
+#endif  // NDEBUG
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/forward_iterator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/forward_iterator.cc
new file mode 100644
index 0000000000..75a7c599b8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/forward_iterator.cc
@@ -0,0 +1,1070 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/forward_iterator.h"
+
+#include <limits>
+#include <string>
+#include <utility>
+
+#include "db/column_family.h"
+#include "db/db_impl/db_impl.h"
+#include "db/db_iter.h"
+#include "db/dbformat.h"
+#include "db/job_context.h"
+#include "db/range_del_aggregator.h"
+#include "db/range_tombstone_fragmenter.h"
+#include "rocksdb/env.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/slice_transform.h"
+#include "table/merging_iterator.h"
+#include "test_util/sync_point.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Usage:
+//     ForwardLevelIterator iter;
+//     iter.SetFileIndex(file_index);
+//     iter.Seek(target); // or iter.SeekToFirst();
+//     iter.Next()
+class ForwardLevelIterator : public InternalIterator {
+ public:
+  ForwardLevelIterator(
+      const ColumnFamilyData* const cfd, const ReadOptions& read_options,
+      const std::vector<FileMetaData*>& files,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor,
+      bool allow_unprepared_value, uint8_t block_protection_bytes_per_key)
+      : cfd_(cfd),
+        read_options_(read_options),
+        files_(files),
+        valid_(false),
+        file_index_(std::numeric_limits<uint32_t>::max()),
+        file_iter_(nullptr),
+        pinned_iters_mgr_(nullptr),
+        prefix_extractor_(prefix_extractor),
+        allow_unprepared_value_(allow_unprepared_value),
+        block_protection_bytes_per_key_(block_protection_bytes_per_key) {
+    status_.PermitUncheckedError();  // Allow uninitialized status through
+  }
+
+  ~ForwardLevelIterator() override {
+    // Reset current pointer
+    if (pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled()) {
+      pinned_iters_mgr_->PinIterator(file_iter_);
+    } else {
+      delete file_iter_;
+    }
+  }
+
+  void SetFileIndex(uint32_t file_index) {
+    assert(file_index < files_.size());
+    status_ = Status::OK();
+    if (file_index != file_index_) {
+      file_index_ = file_index;
+      Reset();
+    }
+  }
+  void Reset() {
+    assert(file_index_ < files_.size());
+
+    // Reset current pointer
+    if (pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled()) {
+      pinned_iters_mgr_->PinIterator(file_iter_);
+    } else {
+      delete file_iter_;
+    }
+
+    ReadRangeDelAggregator range_del_agg(&cfd_->internal_comparator(),
+                                         kMaxSequenceNumber /* upper_bound */);
+    file_iter_ = cfd_->table_cache()->NewIterator(
+        read_options_, *(cfd_->soptions()), cfd_->internal_comparator(),
+        *files_[file_index_],
+        read_options_.ignore_range_deletions ? nullptr : &range_del_agg,
+        prefix_extractor_, /*table_reader_ptr=*/nullptr,
+        /*file_read_hist=*/nullptr, TableReaderCaller::kUserIterator,
+        /*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1,
+        /*max_file_size_for_l0_meta_pin=*/0,
+        /*smallest_compaction_key=*/nullptr,
+        /*largest_compaction_key=*/nullptr, allow_unprepared_value_,
+        block_protection_bytes_per_key_);
+    file_iter_->SetPinnedItersMgr(pinned_iters_mgr_);
+    valid_ = false;
+    if (!range_del_agg.IsEmpty()) {
+      status_ = Status::NotSupported(
+          "Range tombstones unsupported with ForwardIterator");
+    }
+  }
+  void SeekToLast() override {
+    status_ = Status::NotSupported("ForwardLevelIterator::SeekToLast()");
+    valid_ = false;
+  }
+  void Prev() override {
+    status_ = Status::NotSupported("ForwardLevelIterator::Prev()");
+    valid_ = false;
+  }
+  bool Valid() const override { return valid_; }
+  void SeekToFirst() override {
+    assert(file_iter_ != nullptr);
+    if (!status_.ok()) {
+      assert(!valid_);
+      return;
+    }
+    file_iter_->SeekToFirst();
+    valid_ = file_iter_->Valid();
+  }
+  void Seek(const Slice& internal_key) override {
+    assert(file_iter_ != nullptr);
+
+    // This deviates from the usual convention for InternalIterator::Seek() in
+    // that it doesn't discard pre-existing error status. That's because this
+    // Seek() is only supposed to be called immediately after SetFileIndex()
+    // (which discards pre-existing error status), and SetFileIndex() may set
+    // an error status, which we shouldn't discard.
+    if (!status_.ok()) {
+      assert(!valid_);
+      return;
+    }
+
+    file_iter_->Seek(internal_key);
+    valid_ = file_iter_->Valid();
+  }
+  void SeekForPrev(const Slice& /*internal_key*/) override {
+    status_ = Status::NotSupported("ForwardLevelIterator::SeekForPrev()");
+    valid_ = false;
+  }
+  void Next() override {
+    assert(valid_);
+    file_iter_->Next();
+    for (;;) {
+      valid_ = file_iter_->Valid();
+      if (!file_iter_->status().ok()) {
+        assert(!valid_);
+        return;
+      }
+      if (valid_) {
+        return;
+      }
+      if (file_index_ + 1 >= files_.size()) {
+        valid_ = false;
+        return;
+      }
+      SetFileIndex(file_index_ + 1);
+      if (!status_.ok()) {
+        assert(!valid_);
+        return;
+      }
+      file_iter_->SeekToFirst();
+    }
+  }
+  Slice key() const override {
+    assert(valid_);
+    return file_iter_->key();
+  }
+  Slice value() const override {
+    assert(valid_);
+    return file_iter_->value();
+  }
+  Status status() const override {
+    if (!status_.ok()) {
+      return status_;
+    } else if (file_iter_) {
+      return file_iter_->status();
+    }
+    return Status::OK();
+  }
+  bool PrepareValue() override {
+    assert(valid_);
+    if (file_iter_->PrepareValue()) {
+      return true;
+    }
+
+    assert(!file_iter_->Valid());
+    valid_ = false;
+    return false;
+  }
+  bool IsKeyPinned() const override {
+    return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
+           file_iter_->IsKeyPinned();
+  }
+  bool IsValuePinned() const override {
+    return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
+           file_iter_->IsValuePinned();
+  }
+  void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
+    pinned_iters_mgr_ = pinned_iters_mgr;
+    if (file_iter_) {
+      file_iter_->SetPinnedItersMgr(pinned_iters_mgr_);
+    }
+  }
+
+ private:
+  const ColumnFamilyData* const cfd_;
+  const ReadOptions& read_options_;
+  const std::vector<FileMetaData*>& files_;
+
+  bool valid_;
+  uint32_t file_index_;
+  Status status_;
+  InternalIterator* file_iter_;
+  PinnedIteratorsManager* pinned_iters_mgr_;
+  // Kept alive by ForwardIterator::sv_->mutable_cf_options
+  const std::shared_ptr<const SliceTransform>& prefix_extractor_;
+  const bool allow_unprepared_value_;
+  const uint8_t block_protection_bytes_per_key_;
+};
+
+ForwardIterator::ForwardIterator(DBImpl* db, const ReadOptions& read_options,
+                                 ColumnFamilyData* cfd,
+                                 SuperVersion* current_sv,
+                                 bool allow_unprepared_value)
+    : db_(db),
+      read_options_(read_options),
+      cfd_(cfd),
+      prefix_extractor_(current_sv->mutable_cf_options.prefix_extractor.get()),
+      user_comparator_(cfd->user_comparator()),
+      allow_unprepared_value_(allow_unprepared_value),
+      immutable_min_heap_(MinIterComparator(&cfd_->internal_comparator())),
+      sv_(current_sv),
+      mutable_iter_(nullptr),
+      current_(nullptr),
+      valid_(false),
+      status_(Status::OK()),
+      immutable_status_(Status::OK()),
+      has_iter_trimmed_for_upper_bound_(false),
+      current_over_upper_bound_(false),
+      is_prev_set_(false),
+      is_prev_inclusive_(false),
+      pinned_iters_mgr_(nullptr) {
+  if (sv_) {
+    RebuildIterators(false);
+  }
+  if (!cfd_->ioptions()->env->GetFileSystem()->use_async_io()) {
+    read_options_.async_io = false;
+  }
+
+  // immutable_status_ is a local aggregation of the
+  // status of the immutable Iterators.
+  // We have to PermitUncheckedError in case it is never
+  // used, otherwise it will fail ASSERT_STATUS_CHECKED.
+  immutable_status_.PermitUncheckedError();
+}
+
+ForwardIterator::~ForwardIterator() { Cleanup(true); }
+
+void ForwardIterator::SVCleanup(DBImpl* db, SuperVersion* sv,
+                                bool background_purge_on_iterator_cleanup) {
+  if (sv->Unref()) {
+    // Job id == 0 means that this is not our background process, but rather
+    // user thread
+    JobContext job_context(0);
+    db->mutex_.Lock();
+    sv->Cleanup();
+    db->FindObsoleteFiles(&job_context, false, true);
+    if (background_purge_on_iterator_cleanup) {
+      db->ScheduleBgLogWriterClose(&job_context);
+      db->AddSuperVersionsToFreeQueue(sv);
+      db->SchedulePurge();
+    }
+    db->mutex_.Unlock();
+    if (!background_purge_on_iterator_cleanup) {
+      delete sv;
+    }
+    if (job_context.HaveSomethingToDelete()) {
+      db->PurgeObsoleteFiles(job_context, background_purge_on_iterator_cleanup);
+    }
+    job_context.Clean();
+  }
+}
+
+namespace {
+struct SVCleanupParams {
+  DBImpl* db;
+  SuperVersion* sv;
+  bool background_purge_on_iterator_cleanup;
+};
+}  // anonymous namespace
+
+// Used in PinnedIteratorsManager to release pinned SuperVersion
+void ForwardIterator::DeferredSVCleanup(void* arg) {
+  auto d = reinterpret_cast<SVCleanupParams*>(arg);
+  ForwardIterator::SVCleanup(d->db, d->sv,
+                             d->background_purge_on_iterator_cleanup);
+  delete d;
+}
+
+void ForwardIterator::SVCleanup() {
+  if (sv_ == nullptr) {
+    return;
+  }
+  bool background_purge =
+      read_options_.background_purge_on_iterator_cleanup ||
+      db_->immutable_db_options().avoid_unnecessary_blocking_io;
+  if (pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled()) {
+    // pinned_iters_mgr_ tells us to make sure that all visited key-value slices
+    // are alive until pinned_iters_mgr_->ReleasePinnedData() is called.
+    // The slices may point into some memtables owned by sv_, so we need to keep
+    // sv_ referenced until pinned_iters_mgr_ unpins everything.
+    auto p = new SVCleanupParams{db_, sv_, background_purge};
+    pinned_iters_mgr_->PinPtr(p, &ForwardIterator::DeferredSVCleanup);
+  } else {
+    SVCleanup(db_, sv_, background_purge);
+  }
+}
+
+void ForwardIterator::Cleanup(bool release_sv) {
+  if (mutable_iter_ != nullptr) {
+    DeleteIterator(mutable_iter_, true /* is_arena */);
+  }
+
+  for (auto* m : imm_iters_) {
+    DeleteIterator(m, true /* is_arena */);
+  }
+  imm_iters_.clear();
+
+  for (auto* f : l0_iters_) {
+    DeleteIterator(f);
+  }
+  l0_iters_.clear();
+
+  for (auto* l : level_iters_) {
+    DeleteIterator(l);
+  }
+  level_iters_.clear();
+
+  if (release_sv) {
+    SVCleanup();
+  }
+}
+
+bool ForwardIterator::Valid() const {
+  // See UpdateCurrent().
+  return valid_ ? !current_over_upper_bound_ : false;
+}
+
+void ForwardIterator::SeekToFirst() {
+  if (sv_ == nullptr) {
+    RebuildIterators(true);
+  } else if (sv_->version_number != cfd_->GetSuperVersionNumber()) {
+    RenewIterators();
+  } else if (immutable_status_.IsIncomplete()) {
+    ResetIncompleteIterators();
+  }
+  SeekInternal(Slice(), true, false);
+}
+
+bool ForwardIterator::IsOverUpperBound(const Slice& internal_key) const {
+  return !(read_options_.iterate_upper_bound == nullptr ||
+           cfd_->internal_comparator().user_comparator()->Compare(
+               ExtractUserKey(internal_key),
+               *read_options_.iterate_upper_bound) < 0);
+}
+
+void ForwardIterator::Seek(const Slice& internal_key) {
+  if (sv_ == nullptr) {
+    RebuildIterators(true);
+  } else if (sv_->version_number != cfd_->GetSuperVersionNumber()) {
+    RenewIterators();
+  } else if (immutable_status_.IsIncomplete()) {
+    ResetIncompleteIterators();
+  }
+
+  SeekInternal(internal_key, false, false);
+  if (read_options_.async_io) {
+    SeekInternal(internal_key, false, true);
+  }
+}
+
+// In case of async_io, SeekInternal is called twice with seek_after_async_io
+// enabled in second call which only does seeking part to retrieve the blocks.
+void ForwardIterator::SeekInternal(const Slice& internal_key,
+                                   bool seek_to_first,
+                                   bool seek_after_async_io) {
+  assert(mutable_iter_);
+  // mutable
+  if (!seek_after_async_io) {
+    seek_to_first ? mutable_iter_->SeekToFirst()
+                  : mutable_iter_->Seek(internal_key);
+  }
+
+  // immutable
+  // TODO(ljin): NeedToSeekImmutable has negative impact on performance
+  // if it turns to need to seek immutable often. We probably want to have
+  // an option to turn it off.
+  if (seek_to_first || seek_after_async_io ||
+      NeedToSeekImmutable(internal_key)) {
+    if (!seek_after_async_io) {
+      immutable_status_ = Status::OK();
+      if (has_iter_trimmed_for_upper_bound_ &&
+          (
+              // prev_ is not set yet
+              is_prev_set_ == false ||
+              // We are doing SeekToFirst() and internal_key.size() = 0
+              seek_to_first ||
+              // prev_key_ > internal_key
+              cfd_->internal_comparator().InternalKeyComparator::Compare(
+                  prev_key_.GetInternalKey(), internal_key) > 0)) {
+        // Some iterators are trimmed. Need to rebuild.
+        RebuildIterators(true);
+        // Already seeked mutable iter, so seek again
+        seek_to_first ? mutable_iter_->SeekToFirst()
+                      : mutable_iter_->Seek(internal_key);
+      }
+      {
+        auto tmp = MinIterHeap(MinIterComparator(&cfd_->internal_comparator()));
+        immutable_min_heap_.swap(tmp);
+      }
+      for (size_t i = 0; i < imm_iters_.size(); i++) {
+        auto* m = imm_iters_[i];
+        seek_to_first ? m->SeekToFirst() : m->Seek(internal_key);
+        if (!m->status().ok()) {
+          immutable_status_ = m->status();
+        } else if (m->Valid()) {
+          immutable_min_heap_.push(m);
+        }
+      }
+    }
+
+    Slice target_user_key;
+    if (!seek_to_first) {
+      target_user_key = ExtractUserKey(internal_key);
+    }
+    const VersionStorageInfo* vstorage = sv_->current->storage_info();
+    const std::vector<FileMetaData*>& l0 = vstorage->LevelFiles(0);
+    for (size_t i = 0; i < l0.size(); ++i) {
+      if (!l0_iters_[i]) {
+        continue;
+      }
+      if (seek_after_async_io) {
+        if (!l0_iters_[i]->status().IsTryAgain()) {
+          continue;
+        }
+      }
+
+      if (seek_to_first) {
+        l0_iters_[i]->SeekToFirst();
+      } else {
+        // If the target key passes over the largest key, we are sure Next()
+        // won't go over this file.
+        if (seek_after_async_io == false &&
+            user_comparator_->Compare(target_user_key,
+                                      l0[i]->largest.user_key()) > 0) {
+          if (read_options_.iterate_upper_bound != nullptr) {
+            has_iter_trimmed_for_upper_bound_ = true;
+            DeleteIterator(l0_iters_[i]);
+            l0_iters_[i] = nullptr;
+          }
+          continue;
+        }
+        l0_iters_[i]->Seek(internal_key);
+      }
+
+      if (l0_iters_[i]->status().IsTryAgain()) {
+        assert(!seek_after_async_io);
+        continue;
+      } else if (!l0_iters_[i]->status().ok()) {
+        immutable_status_ = l0_iters_[i]->status();
+      } else if (l0_iters_[i]->Valid() &&
+                 !IsOverUpperBound(l0_iters_[i]->key())) {
+        immutable_min_heap_.push(l0_iters_[i]);
+      } else {
+        has_iter_trimmed_for_upper_bound_ = true;
+        DeleteIterator(l0_iters_[i]);
+        l0_iters_[i] = nullptr;
+      }
+    }
+
+    for (int32_t level = 1; level < vstorage->num_levels(); ++level) {
+      const std::vector<FileMetaData*>& level_files =
+          vstorage->LevelFiles(level);
+      if (level_files.empty()) {
+        continue;
+      }
+      if (level_iters_[level - 1] == nullptr) {
+        continue;
+      }
+
+      if (seek_after_async_io) {
+        if (!level_iters_[level - 1]->status().IsTryAgain()) {
+          continue;
+        }
+      }
+      uint32_t f_idx = 0;
+      if (!seek_to_first && !seek_after_async_io) {
+        f_idx = FindFileInRange(level_files, internal_key, 0,
+                                static_cast<uint32_t>(level_files.size()));
+      }
+
+      // Seek
+      if (seek_after_async_io || f_idx < level_files.size()) {
+        if (!seek_after_async_io) {
+          level_iters_[level - 1]->SetFileIndex(f_idx);
+        }
+        seek_to_first ? level_iters_[level - 1]->SeekToFirst()
+                      : level_iters_[level - 1]->Seek(internal_key);
+
+        if (level_iters_[level - 1]->status().IsTryAgain()) {
+          assert(!seek_after_async_io);
+          continue;
+        } else if (!level_iters_[level - 1]->status().ok()) {
+          immutable_status_ = level_iters_[level - 1]->status();
+        } else if (level_iters_[level - 1]->Valid() &&
+                   !IsOverUpperBound(level_iters_[level - 1]->key())) {
+          immutable_min_heap_.push(level_iters_[level - 1]);
+        } else {
+          // Nothing in this level is interesting. Remove.
+          has_iter_trimmed_for_upper_bound_ = true;
+          DeleteIterator(level_iters_[level - 1]);
+          level_iters_[level - 1] = nullptr;
+        }
+      }
+    }
+
+    if (seek_to_first) {
+      is_prev_set_ = false;
+    } else {
+      prev_key_.SetInternalKey(internal_key);
+      is_prev_set_ = true;
+      is_prev_inclusive_ = true;
+    }
+
+    TEST_SYNC_POINT_CALLBACK("ForwardIterator::SeekInternal:Immutable", this);
+  } else if (current_ && current_ != mutable_iter_) {
+    // current_ is one of immutable iterators, push it back to the heap
+    immutable_min_heap_.push(current_);
+  }
+
+  // For async_io, it should be updated when seek_after_async_io is true (in
+  // second call).
+  if (seek_to_first || !read_options_.async_io || seek_after_async_io) {
+    UpdateCurrent();
+  }
+  TEST_SYNC_POINT_CALLBACK("ForwardIterator::SeekInternal:Return", this);
+}
+
+void ForwardIterator::Next() {
+  assert(valid_);
+  bool update_prev_key = false;
+
+  if (sv_ == nullptr || sv_->version_number != cfd_->GetSuperVersionNumber()) {
+    std::string current_key = key().ToString();
+    Slice old_key(current_key.data(), current_key.size());
+
+    if (sv_ == nullptr) {
+      RebuildIterators(true);
+    } else {
+      RenewIterators();
+    }
+
+    SeekInternal(old_key, false, false);
+    if (read_options_.async_io) {
+      SeekInternal(old_key, false, true);
+    }
+
+    if (!valid_ || key().compare(old_key) != 0) {
+      return;
+    }
+  } else if (current_ != mutable_iter_) {
+    // It is going to advance immutable iterator
+
+    if (is_prev_set_ && prefix_extractor_) {
+      // advance prev_key_ to current_ only if they share the same prefix
+      update_prev_key =
+          prefix_extractor_->Transform(prev_key_.GetUserKey())
+              .compare(prefix_extractor_->Transform(current_->key())) == 0;
+    } else {
+      update_prev_key = true;
+    }
+
+    if (update_prev_key) {
+      prev_key_.SetInternalKey(current_->key());
+      is_prev_set_ = true;
+      is_prev_inclusive_ = false;
+    }
+  }
+
+  current_->Next();
+  if (current_ != mutable_iter_) {
+    if (!current_->status().ok()) {
+      immutable_status_ = current_->status();
+    } else if ((current_->Valid()) && (!IsOverUpperBound(current_->key()))) {
+      immutable_min_heap_.push(current_);
+    } else {
+      if ((current_->Valid()) && (IsOverUpperBound(current_->key()))) {
+        // remove the current iterator
+        DeleteCurrentIter();
+        current_ = nullptr;
+      }
+      if (update_prev_key) {
+        mutable_iter_->Seek(prev_key_.GetInternalKey());
+      }
+    }
+  }
+  UpdateCurrent();
+  TEST_SYNC_POINT_CALLBACK("ForwardIterator::Next:Return", this);
+}
+
+Slice ForwardIterator::key() const {
+  assert(valid_);
+  return current_->key();
+}
+
+Slice ForwardIterator::value() const {
+  assert(valid_);
+  return current_->value();
+}
+
+Status ForwardIterator::status() const {
+  if (!status_.ok()) {
+    return status_;
+  } else if (!mutable_iter_->status().ok()) {
+    return mutable_iter_->status();
+  }
+
+  return immutable_status_;
+}
+
+bool ForwardIterator::PrepareValue() {
+  assert(valid_);
+  if (current_->PrepareValue()) {
+    return true;
+  }
+
+  assert(!current_->Valid());
+  assert(!current_->status().ok());
+  assert(current_ != mutable_iter_);  // memtable iterator can't fail
+  assert(immutable_status_.ok());
+
+  valid_ = false;
+  immutable_status_ = current_->status();
+  return false;
+}
+
+Status ForwardIterator::GetProperty(std::string prop_name, std::string* prop) {
+  assert(prop != nullptr);
+  if (prop_name == "rocksdb.iterator.super-version-number") {
+    *prop = std::to_string(sv_->version_number);
+    return Status::OK();
+  }
+  return Status::InvalidArgument();
+}
+
+void ForwardIterator::SetPinnedItersMgr(
+    PinnedIteratorsManager* pinned_iters_mgr) {
+  pinned_iters_mgr_ = pinned_iters_mgr;
+  UpdateChildrenPinnedItersMgr();
+}
+
+void ForwardIterator::UpdateChildrenPinnedItersMgr() {
+  // Set PinnedIteratorsManager for mutable memtable iterator.
+  if (mutable_iter_) {
+    mutable_iter_->SetPinnedItersMgr(pinned_iters_mgr_);
+  }
+
+  // Set PinnedIteratorsManager for immutable memtable iterators.
+  for (InternalIterator* child_iter : imm_iters_) {
+    if (child_iter) {
+      child_iter->SetPinnedItersMgr(pinned_iters_mgr_);
+    }
+  }
+
+  // Set PinnedIteratorsManager for L0 files iterators.
+  for (InternalIterator* child_iter : l0_iters_) {
+    if (child_iter) {
+      child_iter->SetPinnedItersMgr(pinned_iters_mgr_);
+    }
+  }
+
+  // Set PinnedIteratorsManager for L1+ levels iterators.
+  for (ForwardLevelIterator* child_iter : level_iters_) {
+    if (child_iter) {
+      child_iter->SetPinnedItersMgr(pinned_iters_mgr_);
+    }
+  }
+}
+
+bool ForwardIterator::IsKeyPinned() const {
+  return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
+         current_->IsKeyPinned();
+}
+
+bool ForwardIterator::IsValuePinned() const {
+  return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
+         current_->IsValuePinned();
+}
+
+void ForwardIterator::RebuildIterators(bool refresh_sv) {
+  // Clean up
+  Cleanup(refresh_sv);
+  if (refresh_sv) {
+    // New
+    sv_ = cfd_->GetReferencedSuperVersion(db_);
+  }
+  ReadRangeDelAggregator range_del_agg(&cfd_->internal_comparator(),
+                                       kMaxSequenceNumber /* upper_bound */);
+  mutable_iter_ = sv_->mem->NewIterator(read_options_, &arena_);
+  sv_->imm->AddIterators(read_options_, &imm_iters_, &arena_);
+  if (!read_options_.ignore_range_deletions) {
+    std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
+        sv_->mem->NewRangeTombstoneIterator(
+            read_options_, sv_->current->version_set()->LastSequence(),
+            false /* immutable_memtable */));
+    range_del_agg.AddTombstones(std::move(range_del_iter));
+    // Always return Status::OK().
+    Status temp_s = sv_->imm->AddRangeTombstoneIterators(read_options_, &arena_,
+                                                         &range_del_agg);
+    assert(temp_s.ok());
+  }
+  has_iter_trimmed_for_upper_bound_ = false;
+
+  const auto* vstorage = sv_->current->storage_info();
+  const auto& l0_files = vstorage->LevelFiles(0);
+  l0_iters_.reserve(l0_files.size());
+  for (const auto* l0 : l0_files) {
+    if ((read_options_.iterate_upper_bound != nullptr) &&
+        cfd_->internal_comparator().user_comparator()->Compare(
+            l0->smallest.user_key(), *read_options_.iterate_upper_bound) > 0) {
+      // No need to set has_iter_trimmed_for_upper_bound_: this ForwardIterator
+      // will never be interested in files with smallest key above
+      // iterate_upper_bound, since iterate_upper_bound can't be changed.
+      l0_iters_.push_back(nullptr);
+      continue;
+    }
+    l0_iters_.push_back(cfd_->table_cache()->NewIterator(
+        read_options_, *cfd_->soptions(), cfd_->internal_comparator(), *l0,
+        read_options_.ignore_range_deletions ? nullptr : &range_del_agg,
+        sv_->mutable_cf_options.prefix_extractor,
+        /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr,
+        TableReaderCaller::kUserIterator, /*arena=*/nullptr,
+        /*skip_filters=*/false, /*level=*/-1,
+        MaxFileSizeForL0MetaPin(sv_->mutable_cf_options),
+        /*smallest_compaction_key=*/nullptr,
+        /*largest_compaction_key=*/nullptr, allow_unprepared_value_,
+        sv_->mutable_cf_options.block_protection_bytes_per_key));
+  }
+  BuildLevelIterators(vstorage, sv_);
+  current_ = nullptr;
+  is_prev_set_ = false;
+
+  UpdateChildrenPinnedItersMgr();
+  if (!range_del_agg.IsEmpty()) {
+    status_ = Status::NotSupported(
+        "Range tombstones unsupported with ForwardIterator");
+    valid_ = false;
+  }
+}
+
+void ForwardIterator::RenewIterators() {
+  SuperVersion* svnew;
+  assert(sv_);
+  svnew = cfd_->GetReferencedSuperVersion(db_);
+
+  if (mutable_iter_ != nullptr) {
+    DeleteIterator(mutable_iter_, true /* is_arena */);
+  }
+  for (auto* m : imm_iters_) {
+    DeleteIterator(m, true /* is_arena */);
+  }
+  imm_iters_.clear();
+
+  mutable_iter_ = svnew->mem->NewIterator(read_options_, &arena_);
+  svnew->imm->AddIterators(read_options_, &imm_iters_, &arena_);
+  ReadRangeDelAggregator range_del_agg(&cfd_->internal_comparator(),
+                                       kMaxSequenceNumber /* upper_bound */);
+  if (!read_options_.ignore_range_deletions) {
+    std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
+        svnew->mem->NewRangeTombstoneIterator(
+            read_options_, sv_->current->version_set()->LastSequence(),
+            false /* immutable_memtable */));
+    range_del_agg.AddTombstones(std::move(range_del_iter));
+    // Always return Status::OK().
+    Status temp_s = svnew->imm->AddRangeTombstoneIterators(
+        read_options_, &arena_, &range_del_agg);
+    assert(temp_s.ok());
+  }
+
+  const auto* vstorage = sv_->current->storage_info();
+  const auto& l0_files = vstorage->LevelFiles(0);
+  const auto* vstorage_new = svnew->current->storage_info();
+  const auto& l0_files_new = vstorage_new->LevelFiles(0);
+  size_t iold, inew;
+  bool found;
+  std::vector<InternalIterator*> l0_iters_new;
+  l0_iters_new.reserve(l0_files_new.size());
+
+  for (inew = 0; inew < l0_files_new.size(); inew++) {
+    found = false;
+    for (iold = 0; iold < l0_files.size(); iold++) {
+      if (l0_files[iold] == l0_files_new[inew]) {
+        found = true;
+        break;
+      }
+    }
+    if (found) {
+      if (l0_iters_[iold] == nullptr) {
+        l0_iters_new.push_back(nullptr);
+        TEST_SYNC_POINT_CALLBACK("ForwardIterator::RenewIterators:Null", this);
+      } else {
+        l0_iters_new.push_back(l0_iters_[iold]);
+        l0_iters_[iold] = nullptr;
+        TEST_SYNC_POINT_CALLBACK("ForwardIterator::RenewIterators:Copy", this);
+      }
+      continue;
+    }
+    l0_iters_new.push_back(cfd_->table_cache()->NewIterator(
+        read_options_, *cfd_->soptions(), cfd_->internal_comparator(),
+        *l0_files_new[inew],
+        read_options_.ignore_range_deletions ? nullptr : &range_del_agg,
+        svnew->mutable_cf_options.prefix_extractor,
+        /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr,
+        TableReaderCaller::kUserIterator, /*arena=*/nullptr,
+        /*skip_filters=*/false, /*level=*/-1,
+        MaxFileSizeForL0MetaPin(svnew->mutable_cf_options),
+        /*smallest_compaction_key=*/nullptr,
+        /*largest_compaction_key=*/nullptr, allow_unprepared_value_,
+        svnew->mutable_cf_options.block_protection_bytes_per_key));
+  }
+
+  for (auto* f : l0_iters_) {
+    DeleteIterator(f);
+  }
+  l0_iters_.clear();
+  l0_iters_ = l0_iters_new;
+
+  for (auto* l : level_iters_) {
+    DeleteIterator(l);
+  }
+  level_iters_.clear();
+  BuildLevelIterators(vstorage_new, svnew);
+  current_ = nullptr;
+  is_prev_set_ = false;
+  SVCleanup();
+  sv_ = svnew;
+
+  UpdateChildrenPinnedItersMgr();
+  if (!range_del_agg.IsEmpty()) {
+    status_ = Status::NotSupported(
+        "Range tombstones unsupported with ForwardIterator");
+    valid_ = false;
+  }
+}
+
+void ForwardIterator::BuildLevelIterators(const VersionStorageInfo* vstorage,
+                                          SuperVersion* sv) {
+  level_iters_.reserve(vstorage->num_levels() - 1);
+  for (int32_t level = 1; level < vstorage->num_levels(); ++level) {
+    const auto& level_files = vstorage->LevelFiles(level);
+    if ((level_files.empty()) ||
+        ((read_options_.iterate_upper_bound != nullptr) &&
+         (user_comparator_->Compare(*read_options_.iterate_upper_bound,
+                                    level_files[0]->smallest.user_key()) <
+          0))) {
+      level_iters_.push_back(nullptr);
+      if (!level_files.empty()) {
+        has_iter_trimmed_for_upper_bound_ = true;
+      }
+    } else {
+      level_iters_.push_back(new ForwardLevelIterator(
+          cfd_, read_options_, level_files,
+          sv->mutable_cf_options.prefix_extractor, allow_unprepared_value_,
+          sv->mutable_cf_options.block_protection_bytes_per_key));
+    }
+  }
+}
+
+void ForwardIterator::ResetIncompleteIterators() {
+  const auto& l0_files = sv_->current->storage_info()->LevelFiles(0);
+  for (size_t i = 0; i < l0_iters_.size(); ++i) {
+    assert(i < l0_files.size());
+    if (!l0_iters_[i] || !l0_iters_[i]->status().IsIncomplete()) {
+      continue;
+    }
+    DeleteIterator(l0_iters_[i]);
+    l0_iters_[i] = cfd_->table_cache()->NewIterator(
+        read_options_, *cfd_->soptions(), cfd_->internal_comparator(),
+        *l0_files[i], /*range_del_agg=*/nullptr,
+        sv_->mutable_cf_options.prefix_extractor,
+        /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr,
+        TableReaderCaller::kUserIterator, /*arena=*/nullptr,
+        /*skip_filters=*/false, /*level=*/-1,
+        MaxFileSizeForL0MetaPin(sv_->mutable_cf_options),
+        /*smallest_compaction_key=*/nullptr,
+        /*largest_compaction_key=*/nullptr, allow_unprepared_value_,
+        sv_->mutable_cf_options.block_protection_bytes_per_key);
+    l0_iters_[i]->SetPinnedItersMgr(pinned_iters_mgr_);
+  }
+
+  for (auto* level_iter : level_iters_) {
+    if (level_iter && level_iter->status().IsIncomplete()) {
+      level_iter->Reset();
+    }
+  }
+
+  current_ = nullptr;
+  is_prev_set_ = false;
+}
+
+void ForwardIterator::UpdateCurrent() {
+  if (immutable_min_heap_.empty() && !mutable_iter_->Valid()) {
+    current_ = nullptr;
+  } else if (immutable_min_heap_.empty()) {
+    current_ = mutable_iter_;
+  } else if (!mutable_iter_->Valid()) {
+    current_ = immutable_min_heap_.top();
+    immutable_min_heap_.pop();
+  } else {
+    current_ = immutable_min_heap_.top();
+    assert(current_ != nullptr);
+    assert(current_->Valid());
+    int cmp = cfd_->internal_comparator().InternalKeyComparator::Compare(
+        mutable_iter_->key(), current_->key());
+    assert(cmp != 0);
+    if (cmp > 0) {
+      immutable_min_heap_.pop();
+    } else {
+      current_ = mutable_iter_;
+    }
+  }
+  valid_ = current_ != nullptr && immutable_status_.ok();
+  if (!status_.ok()) {
+    status_ = Status::OK();
+  }
+
+  // Upper bound doesn't apply to the memtable iterator. We want Valid() to
+  // return false when all iterators are over iterate_upper_bound, but can't
+  // just set valid_ to false, as that would effectively disable the tailing
+  // optimization (Seek() would be called on all immutable iterators regardless
+  // of whether the target key is greater than prev_key_).
+  current_over_upper_bound_ = valid_ && IsOverUpperBound(current_->key());
+}
+
+bool ForwardIterator::NeedToSeekImmutable(const Slice& target) {
+  // We maintain the interval (prev_key_, immutable_min_heap_.top()->key())
+  // such that there are no records with keys within that range in
+  // immutable_min_heap_. Since immutable structures (SST files and immutable
+  // memtables) can't change in this version, we don't need to do a seek if
+  // 'target' belongs to that interval (immutable_min_heap_.top() is already
+  // at the correct position).
+
+  if (!valid_ || !current_ || !is_prev_set_ || !immutable_status_.ok()) {
+    return true;
+  }
+  Slice prev_key = prev_key_.GetInternalKey();
+  if (prefix_extractor_ && prefix_extractor_->Transform(target).compare(
+                               prefix_extractor_->Transform(prev_key)) != 0) {
+    return true;
+  }
+  if (cfd_->internal_comparator().InternalKeyComparator::Compare(
+          prev_key, target) >= (is_prev_inclusive_ ? 1 : 0)) {
+    return true;
+  }
+
+  if (immutable_min_heap_.empty() && current_ == mutable_iter_) {
+    // Nothing to seek on.
+    return false;
+  }
+  if (cfd_->internal_comparator().InternalKeyComparator::Compare(
+          target, current_ == mutable_iter_ ? immutable_min_heap_.top()->key()
+                                            : current_->key()) > 0) {
+    return true;
+  }
+  return false;
+}
+
+void ForwardIterator::DeleteCurrentIter() {
+  const VersionStorageInfo* vstorage = sv_->current->storage_info();
+  const std::vector<FileMetaData*>& l0 = vstorage->LevelFiles(0);
+  for (size_t i = 0; i < l0.size(); ++i) {
+    if (!l0_iters_[i]) {
+      continue;
+    }
+    if (l0_iters_[i] == current_) {
+      has_iter_trimmed_for_upper_bound_ = true;
+      DeleteIterator(l0_iters_[i]);
+      l0_iters_[i] = nullptr;
+      return;
+    }
+  }
+
+  for (int32_t level = 1; level < vstorage->num_levels(); ++level) {
+    if (level_iters_[level - 1] == nullptr) {
+      continue;
+    }
+    if (level_iters_[level - 1] == current_) {
+      has_iter_trimmed_for_upper_bound_ = true;
+      DeleteIterator(level_iters_[level - 1]);
+      level_iters_[level - 1] = nullptr;
+    }
+  }
+}
+
+bool ForwardIterator::TEST_CheckDeletedIters(int* pdeleted_iters,
+                                             int* pnum_iters) {
+  bool retval = false;
+  int deleted_iters = 0;
+  int num_iters = 0;
+
+  const VersionStorageInfo* vstorage = sv_->current->storage_info();
+  const std::vector<FileMetaData*>& l0 = vstorage->LevelFiles(0);
+  for (size_t i = 0; i < l0.size(); ++i) {
+    if (!l0_iters_[i]) {
+      retval = true;
+      deleted_iters++;
+    } else {
+      num_iters++;
+    }
+  }
+
+  for (int32_t level = 1; level < vstorage->num_levels(); ++level) {
+    if ((level_iters_[level - 1] == nullptr) &&
+        (!vstorage->LevelFiles(level).empty())) {
+      retval = true;
+      deleted_iters++;
+    } else if (!vstorage->LevelFiles(level).empty()) {
+      num_iters++;
+    }
+  }
+  if ((!retval) && num_iters <= 1) {
+    retval = true;
+  }
+  if (pdeleted_iters) {
+    *pdeleted_iters = deleted_iters;
+  }
+  if (pnum_iters) {
+    *pnum_iters = num_iters;
+  }
+  return retval;
+}
+
+uint32_t ForwardIterator::FindFileInRange(
+    const std::vector<FileMetaData*>& files, const Slice& internal_key,
+    uint32_t left, uint32_t right) {
+  auto cmp = [&](const FileMetaData* f, const Slice& k) -> bool {
+    return cfd_->internal_comparator().InternalKeyComparator::Compare(
+               f->largest.Encode(), k) < 0;
+  };
+  const auto& b = files.begin();
+  return static_cast<uint32_t>(
+      std::lower_bound(b + left, b + right, internal_key, cmp) - b);
+}
+
+void ForwardIterator::DeleteIterator(InternalIterator* iter, bool is_arena) {
+  if (iter == nullptr) {
+    return;
+  }
+
+  if (pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled()) {
+    pinned_iters_mgr_->PinIterator(iter, is_arena);
+  } else {
+    if (is_arena) {
+      iter->~InternalIterator();
+    } else {
+      delete iter;
+    }
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/forward_iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/forward_iterator.h
new file mode 100644
index 0000000000..cb418aeeb0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/forward_iterator.h
@@ -0,0 +1,166 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include "rocksdb/comparator.h"
+
+#include <queue>
+#include <string>
+#include <vector>
+
+#include "memory/arena.h"
+#include "rocksdb/db.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/options.h"
+#include "table/internal_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class DBImpl;
+class Env;
+struct SuperVersion;
+class ColumnFamilyData;
+class ForwardLevelIterator;
+class VersionStorageInfo;
+struct FileMetaData;
+
+class MinIterComparator {
+ public:
+  explicit MinIterComparator(const CompareInterface* comparator)
+      : comparator_(comparator) {}
+
+  bool operator()(InternalIterator* a, InternalIterator* b) {
+    return comparator_->Compare(a->key(), b->key()) > 0;
+  }
+
+ private:
+  const CompareInterface* comparator_;
+};
+
+using MinIterHeap =
+    std::priority_queue<InternalIterator*, std::vector<InternalIterator*>,
+                        MinIterComparator>;
+
+/**
+ * ForwardIterator is a special type of iterator that only supports Seek()
+ * and Next(). It is expected to perform better than TailingIterator by
+ * removing the encapsulation and making all information accessible within
+ * the iterator. At the current implementation, snapshot is taken at the
+ * time Seek() is called. The Next() followed do not see new values after.
+ */
+class ForwardIterator : public InternalIterator {
+ public:
+  ForwardIterator(DBImpl* db, const ReadOptions& read_options,
+                  ColumnFamilyData* cfd, SuperVersion* current_sv = nullptr,
+                  bool allow_unprepared_value = false);
+  virtual ~ForwardIterator();
+
+  void SeekForPrev(const Slice& /*target*/) override {
+    status_ = Status::NotSupported("ForwardIterator::SeekForPrev()");
+    valid_ = false;
+  }
+  void SeekToLast() override {
+    status_ = Status::NotSupported("ForwardIterator::SeekToLast()");
+    valid_ = false;
+  }
+  void Prev() override {
+    status_ = Status::NotSupported("ForwardIterator::Prev");
+    valid_ = false;
+  }
+
+  virtual bool Valid() const override;
+  void SeekToFirst() override;
+  virtual void Seek(const Slice& target) override;
+  virtual void Next() override;
+  virtual Slice key() const override;
+  virtual Slice value() const override;
+  virtual Status status() const override;
+  virtual bool PrepareValue() override;
+  virtual Status GetProperty(std::string prop_name, std::string* prop) override;
+  virtual void SetPinnedItersMgr(
+      PinnedIteratorsManager* pinned_iters_mgr) override;
+  virtual bool IsKeyPinned() const override;
+  virtual bool IsValuePinned() const override;
+
+  bool TEST_CheckDeletedIters(int* deleted_iters, int* num_iters);
+
+ private:
+  void Cleanup(bool release_sv);
+  // Unreference and, if needed, clean up the current SuperVersion. This is
+  // either done immediately or deferred until this iterator is unpinned by
+  // PinnedIteratorsManager.
+  void SVCleanup();
+  static void SVCleanup(DBImpl* db, SuperVersion* sv,
+                        bool background_purge_on_iterator_cleanup);
+  static void DeferredSVCleanup(void* arg);
+
+  void RebuildIterators(bool refresh_sv);
+  void RenewIterators();
+  void BuildLevelIterators(const VersionStorageInfo* vstorage,
+                           SuperVersion* sv);
+  void ResetIncompleteIterators();
+  void SeekInternal(const Slice& internal_key, bool seek_to_first,
+                    bool seek_after_async_io);
+
+  void UpdateCurrent();
+  bool NeedToSeekImmutable(const Slice& internal_key);
+  void DeleteCurrentIter();
+  uint32_t FindFileInRange(const std::vector<FileMetaData*>& files,
+                           const Slice& internal_key, uint32_t left,
+                           uint32_t right);
+
+  bool IsOverUpperBound(const Slice& internal_key) const;
+
+  // Set PinnedIteratorsManager for all children Iterators, this function should
+  // be called whenever we update children Iterators or pinned_iters_mgr_.
+  void UpdateChildrenPinnedItersMgr();
+
+  // A helper function that will release iter in the proper manner, or pass it
+  // to pinned_iters_mgr_ to release it later if pinning is enabled.
+  void DeleteIterator(InternalIterator* iter, bool is_arena = false);
+
+  DBImpl* const db_;
+  ReadOptions read_options_;
+  ColumnFamilyData* const cfd_;
+  const SliceTransform* const prefix_extractor_;
+  const Comparator* user_comparator_;
+  const bool allow_unprepared_value_;
+  MinIterHeap immutable_min_heap_;
+
+  SuperVersion* sv_;
+  InternalIterator* mutable_iter_;
+  std::vector<InternalIterator*> imm_iters_;
+  std::vector<InternalIterator*> l0_iters_;
+  std::vector<ForwardLevelIterator*> level_iters_;
+  InternalIterator* current_;
+  bool valid_;
+
+  // Internal iterator status; set only by one of the unsupported methods.
+  Status status_;
+  // Status of immutable iterators, maintained here to avoid iterating over
+  // all of them in status().
+  Status immutable_status_;
+  // Indicates that at least one of the immutable iterators pointed to a key
+  // larger than iterate_upper_bound and was therefore destroyed. Seek() may
+  // need to rebuild such iterators.
+  bool has_iter_trimmed_for_upper_bound_;
+  // Is current key larger than iterate_upper_bound? If so, makes Valid()
+  // return false.
+  bool current_over_upper_bound_;
+
+  // Left endpoint of the range of keys that immutable iterators currently
+  // cover. When Seek() is called with a key that's within that range, immutable
+  // iterators don't need to be moved; see NeedToSeekImmutable(). This key is
+  // included in the range after a Seek(), but excluded when advancing the
+  // iterator using Next().
+  IterKey prev_key_;
+  bool is_prev_set_;
+  bool is_prev_inclusive_;
+
+  PinnedIteratorsManager* pinned_iters_mgr_;
+  Arena arena_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/history_trimming_iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/history_trimming_iterator.h
new file mode 100644
index 0000000000..4af5cde720
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/history_trimming_iterator.h
@@ -0,0 +1,95 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "db/dbformat.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/slice.h"
+#include "table/internal_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class HistoryTrimmingIterator : public InternalIterator {
+ public:
+  explicit HistoryTrimmingIterator(InternalIterator* input,
+                                   const Comparator* cmp, const std::string& ts)
+      : input_(input), filter_ts_(ts), cmp_(cmp) {
+    assert(cmp_->timestamp_size() > 0 && !ts.empty());
+  }
+
+  bool filter() const {
+    if (!input_->Valid()) {
+      return true;
+    }
+    Slice current_ts = ExtractTimestampFromKey(key(), cmp_->timestamp_size());
+    return cmp_->CompareTimestamp(current_ts, Slice(filter_ts_)) <= 0;
+  }
+
+  bool Valid() const override { return input_->Valid(); }
+
+  void SeekToFirst() override {
+    input_->SeekToFirst();
+    while (!filter()) {
+      input_->Next();
+    }
+  }
+
+  void SeekToLast() override {
+    input_->SeekToLast();
+    while (!filter()) {
+      input_->Prev();
+    }
+  }
+
+  void Seek(const Slice& target) override {
+    input_->Seek(target);
+    while (!filter()) {
+      input_->Next();
+    }
+  }
+
+  void SeekForPrev(const Slice& target) override {
+    input_->SeekForPrev(target);
+    while (!filter()) {
+      input_->Prev();
+    }
+  }
+
+  void Next() override {
+    do {
+      input_->Next();
+    } while (!filter());
+  }
+
+  void Prev() override {
+    do {
+      input_->Prev();
+    } while (!filter());
+  }
+
+  Slice key() const override { return input_->key(); }
+
+  Slice value() const override { return input_->value(); }
+
+  Status status() const override { return input_->status(); }
+
+  bool IsKeyPinned() const override { return input_->IsKeyPinned(); }
+
+  bool IsValuePinned() const override { return input_->IsValuePinned(); }
+
+  bool IsDeleteRangeSentinelKey() const override {
+    return input_->IsDeleteRangeSentinelKey();
+  }
+
+ private:
+  InternalIterator* input_;
+  const std::string filter_ts_;
+  const Comparator* const cmp_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/import_column_family_job.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/import_column_family_job.cc
new file mode 100644
index 0000000000..f76d0ac1b9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/import_column_family_job.cc
@@ -0,0 +1,364 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/version_builder.h"
+
+#include "db/import_column_family_job.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <string>
+#include <vector>
+
+#include "db/version_edit.h"
+#include "file/file_util.h"
+#include "file/random_access_file_reader.h"
+#include "logging/logging.h"
+#include "table/merging_iterator.h"
+#include "table/scoped_arena_iterator.h"
+#include "table/sst_file_writer_collectors.h"
+#include "table/table_builder.h"
+#include "table/unique_id_impl.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status ImportColumnFamilyJob::Prepare(uint64_t next_file_number,
+                                      SuperVersion* sv) {
+  Status status;
+
+  // Read the information of files we are importing
+  for (const auto& file_metadata : metadata_) {
+    const auto file_path = file_metadata.db_path + "/" + file_metadata.name;
+    IngestedFileInfo file_to_import;
+    status = GetIngestedFileInfo(file_path, next_file_number++, sv,
+                                 file_metadata, &file_to_import);
+    if (!status.ok()) {
+      return status;
+    }
+    files_to_import_.push_back(file_to_import);
+  }
+
+  auto num_files = files_to_import_.size();
+  if (num_files == 0) {
+    status = Status::InvalidArgument("The list of files is empty");
+    return status;
+  }
+
+  for (const auto& f : files_to_import_) {
+    if (f.num_entries == 0) {
+      status = Status::InvalidArgument("File contain no entries");
+      return status;
+    }
+
+    if (!f.smallest_internal_key.Valid() || !f.largest_internal_key.Valid()) {
+      status = Status::Corruption("File has corrupted keys");
+      return status;
+    }
+  }
+
+  // Copy/Move external files into DB
+  auto hardlink_files = import_options_.move_files;
+  for (auto& f : files_to_import_) {
+    const auto path_outside_db = f.external_file_path;
+    const auto path_inside_db = TableFileName(
+        cfd_->ioptions()->cf_paths, f.fd.GetNumber(), f.fd.GetPathId());
+
+    if (hardlink_files) {
+      status =
+          fs_->LinkFile(path_outside_db, path_inside_db, IOOptions(), nullptr);
+      if (status.IsNotSupported()) {
+        // Original file is on a different FS, use copy instead of hard linking
+        hardlink_files = false;
+        ROCKS_LOG_INFO(db_options_.info_log,
+                       "Try to link file %s but it's not supported : %s",
+                       f.internal_file_path.c_str(), status.ToString().c_str());
+      }
+    }
+    if (!hardlink_files) {
+      status =
+          CopyFile(fs_.get(), path_outside_db, path_inside_db, 0,
+                   db_options_.use_fsync, io_tracer_, Temperature::kUnknown);
+    }
+    if (!status.ok()) {
+      break;
+    }
+    f.copy_file = !hardlink_files;
+    f.internal_file_path = path_inside_db;
+  }
+
+  if (!status.ok()) {
+    // We failed, remove all files that we copied into the db
+    for (const auto& f : files_to_import_) {
+      if (f.internal_file_path.empty()) {
+        break;
+      }
+      const auto s =
+          fs_->DeleteFile(f.internal_file_path, IOOptions(), nullptr);
+      if (!s.ok()) {
+        ROCKS_LOG_WARN(db_options_.info_log,
+                       "AddFile() clean up for file %s failed : %s",
+                       f.internal_file_path.c_str(), s.ToString().c_str());
+      }
+    }
+  }
+
+  return status;
+}
+
+// REQUIRES: we have become the only writer by entering both write_thread_ and
+// nonmem_write_thread_
+Status ImportColumnFamilyJob::Run() {
+  // We use the import time as the ancester time. This is the time the data
+  // is written to the database.
+  int64_t temp_current_time = 0;
+  uint64_t oldest_ancester_time = kUnknownOldestAncesterTime;
+  uint64_t current_time = kUnknownOldestAncesterTime;
+  if (clock_->GetCurrentTime(&temp_current_time).ok()) {
+    current_time = oldest_ancester_time =
+        static_cast<uint64_t>(temp_current_time);
+  }
+
+  // Recover files' epoch number using dummy VersionStorageInfo
+  VersionBuilder dummy_version_builder(
+      cfd_->current()->version_set()->file_options(), cfd_->ioptions(),
+      cfd_->table_cache(), cfd_->current()->storage_info(),
+      cfd_->current()->version_set(),
+      cfd_->GetFileMetadataCacheReservationManager());
+  VersionStorageInfo dummy_vstorage(
+      &cfd_->internal_comparator(), cfd_->user_comparator(),
+      cfd_->NumberLevels(), cfd_->ioptions()->compaction_style,
+      nullptr /* src_vstorage */, cfd_->ioptions()->force_consistency_checks,
+      EpochNumberRequirement::kMightMissing);
+  Status s;
+  for (size_t i = 0; s.ok() && i < files_to_import_.size(); ++i) {
+    const auto& f = files_to_import_[i];
+    const auto& file_metadata = metadata_[i];
+
+    uint64_t tail_size = 0;
+    bool contain_no_data_blocks = f.table_properties.num_entries > 0 &&
+                                  (f.table_properties.num_entries ==
+                                   f.table_properties.num_range_deletions);
+    if (f.table_properties.tail_start_offset > 0 || contain_no_data_blocks) {
+      uint64_t file_size = f.fd.GetFileSize();
+      assert(f.table_properties.tail_start_offset <= file_size);
+      tail_size = file_size - f.table_properties.tail_start_offset;
+    }
+
+    VersionEdit dummy_version_edit;
+    dummy_version_edit.AddFile(
+        file_metadata.level, f.fd.GetNumber(), f.fd.GetPathId(),
+        f.fd.GetFileSize(), f.smallest_internal_key, f.largest_internal_key,
+        file_metadata.smallest_seqno, file_metadata.largest_seqno, false,
+        file_metadata.temperature, kInvalidBlobFileNumber, oldest_ancester_time,
+        current_time, file_metadata.epoch_number, kUnknownFileChecksum,
+        kUnknownFileChecksumFuncName, f.unique_id, 0, tail_size);
+    s = dummy_version_builder.Apply(&dummy_version_edit);
+  }
+  if (s.ok()) {
+    s = dummy_version_builder.SaveTo(&dummy_vstorage);
+  }
+  if (s.ok()) {
+    dummy_vstorage.RecoverEpochNumbers(cfd_);
+  }
+
+  // Record changes from this CF import in VersionEdit, including files with
+  // recovered epoch numbers
+  if (s.ok()) {
+    edit_.SetColumnFamily(cfd_->GetID());
+
+    for (int level = 0; level < dummy_vstorage.num_levels(); level++) {
+      for (FileMetaData* file_meta : dummy_vstorage.LevelFiles(level)) {
+        edit_.AddFile(level, *file_meta);
+        // If incoming sequence number is higher, update local sequence number.
+        if (file_meta->fd.largest_seqno > versions_->LastSequence()) {
+          versions_->SetLastAllocatedSequence(file_meta->fd.largest_seqno);
+          versions_->SetLastPublishedSequence(file_meta->fd.largest_seqno);
+          versions_->SetLastSequence(file_meta->fd.largest_seqno);
+        }
+      }
+    }
+  }
+
+  // Release resources occupied by the dummy VersionStorageInfo
+  for (int level = 0; level < dummy_vstorage.num_levels(); level++) {
+    for (FileMetaData* file_meta : dummy_vstorage.LevelFiles(level)) {
+      file_meta->refs--;
+      if (file_meta->refs <= 0) {
+        delete file_meta;
+      }
+    }
+  }
+  return s;
+}
+
+void ImportColumnFamilyJob::Cleanup(const Status& status) {
+  if (!status.ok()) {
+    // We failed to add files to the database remove all the files we copied.
+    for (const auto& f : files_to_import_) {
+      const auto s =
+          fs_->DeleteFile(f.internal_file_path, IOOptions(), nullptr);
+      if (!s.ok()) {
+        ROCKS_LOG_WARN(db_options_.info_log,
+                       "AddFile() clean up for file %s failed : %s",
+                       f.internal_file_path.c_str(), s.ToString().c_str());
+      }
+    }
+  } else if (status.ok() && import_options_.move_files) {
+    // The files were moved and added successfully, remove original file links
+    for (IngestedFileInfo& f : files_to_import_) {
+      const auto s =
+          fs_->DeleteFile(f.external_file_path, IOOptions(), nullptr);
+      if (!s.ok()) {
+        ROCKS_LOG_WARN(
+            db_options_.info_log,
+            "%s was added to DB successfully but failed to remove original "
+            "file link : %s",
+            f.external_file_path.c_str(), s.ToString().c_str());
+      }
+    }
+  }
+}
+
+Status ImportColumnFamilyJob::GetIngestedFileInfo(
+    const std::string& external_file, uint64_t new_file_number,
+    SuperVersion* sv, const LiveFileMetaData& file_meta,
+    IngestedFileInfo* file_to_import) {
+  file_to_import->external_file_path = external_file;
+  Status status;
+  if (file_meta.size > 0) {
+    file_to_import->file_size = file_meta.size;
+  } else {
+    // Get external file size
+    status = fs_->GetFileSize(external_file, IOOptions(),
+                              &file_to_import->file_size, nullptr);
+    if (!status.ok()) {
+      return status;
+    }
+  }
+  // Assign FD with number
+  file_to_import->fd =
+      FileDescriptor(new_file_number, 0, file_to_import->file_size);
+
+  // Create TableReader for external file
+  std::unique_ptr<TableReader> table_reader;
+  std::unique_ptr<FSRandomAccessFile> sst_file;
+  std::unique_ptr<RandomAccessFileReader> sst_file_reader;
+
+  status =
+      fs_->NewRandomAccessFile(external_file, env_options_, &sst_file, nullptr);
+  if (!status.ok()) {
+    return status;
+  }
+  sst_file_reader.reset(new RandomAccessFileReader(
+      std::move(sst_file), external_file, nullptr /*Env*/, io_tracer_));
+
+  status = cfd_->ioptions()->table_factory->NewTableReader(
+      TableReaderOptions(
+          *cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor,
+          env_options_, cfd_->internal_comparator(),
+          sv->mutable_cf_options.block_protection_bytes_per_key,
+          /*skip_filters*/ false, /*immortal*/ false,
+          /*force_direct_prefetch*/ false, /*level*/ -1,
+          /*block_cache_tracer*/ nullptr,
+          /*max_file_size_for_l0_meta_pin*/ 0, versions_->DbSessionId(),
+          /*cur_file_num*/ new_file_number),
+      std::move(sst_file_reader), file_to_import->file_size, &table_reader);
+  if (!status.ok()) {
+    return status;
+  }
+
+  // Get the external file properties
+  auto props = table_reader->GetTableProperties();
+
+  // Set original_seqno to 0.
+  file_to_import->original_seqno = 0;
+
+  // Get number of entries in table
+  file_to_import->num_entries = props->num_entries;
+
+  // If the importing files were exported with Checkpoint::ExportColumnFamily(),
+  // we cannot simply recompute smallest and largest used to truncate range
+  // tombstones from file content, and we expect smallest and largest populated
+  // in file_meta.
+  if (file_meta.smallest.empty()) {
+    assert(file_meta.largest.empty());
+    // TODO: plumb Env::IOActivity
+    ReadOptions ro;
+    std::unique_ptr<InternalIterator> iter(table_reader->NewIterator(
+        ro, sv->mutable_cf_options.prefix_extractor.get(), /*arena=*/nullptr,
+        /*skip_filters=*/false, TableReaderCaller::kExternalSSTIngestion));
+
+    // Get first (smallest) key from file
+    iter->SeekToFirst();
+    bool bound_set = false;
+    if (iter->Valid()) {
+      file_to_import->smallest_internal_key.DecodeFrom(iter->key());
+      iter->SeekToLast();
+      file_to_import->largest_internal_key.DecodeFrom(iter->key());
+      bound_set = true;
+    }
+
+    std::unique_ptr<InternalIterator> range_del_iter{
+        table_reader->NewRangeTombstoneIterator(ro)};
+    if (range_del_iter != nullptr) {
+      range_del_iter->SeekToFirst();
+      if (range_del_iter->Valid()) {
+        ParsedInternalKey key;
+        Status pik_status = ParseInternalKey(range_del_iter->key(), &key,
+                                             db_options_.allow_data_in_errors);
+        if (!pik_status.ok()) {
+          return Status::Corruption("Corrupted key in external file. ",
+                                    pik_status.getState());
+        }
+        RangeTombstone first_tombstone(key, range_del_iter->value());
+        InternalKey start_key = first_tombstone.SerializeKey();
+        const InternalKeyComparator* icmp = &cfd_->internal_comparator();
+        if (!bound_set ||
+            icmp->Compare(start_key, file_to_import->smallest_internal_key) <
+                0) {
+          file_to_import->smallest_internal_key = start_key;
+        }
+
+        range_del_iter->SeekToLast();
+        pik_status = ParseInternalKey(range_del_iter->key(), &key,
+                                      db_options_.allow_data_in_errors);
+        if (!pik_status.ok()) {
+          return Status::Corruption("Corrupted key in external file. ",
+                                    pik_status.getState());
+        }
+        RangeTombstone last_tombstone(key, range_del_iter->value());
+        InternalKey end_key = last_tombstone.SerializeEndKey();
+        if (!bound_set ||
+            icmp->Compare(end_key, file_to_import->largest_internal_key) > 0) {
+          file_to_import->largest_internal_key = end_key;
+        }
+        bound_set = true;
+      }
+    }
+    assert(bound_set);
+  } else {
+    assert(!file_meta.largest.empty());
+    file_to_import->smallest_internal_key.DecodeFrom(file_meta.smallest);
+    file_to_import->largest_internal_key.DecodeFrom(file_meta.largest);
+  }
+
+  file_to_import->cf_id = static_cast<uint32_t>(props->column_family_id);
+
+  file_to_import->table_properties = *props;
+
+  auto s = GetSstInternalUniqueId(props->db_id, props->db_session_id,
+                                  props->orig_file_number,
+                                  &(file_to_import->unique_id));
+  if (!s.ok()) {
+    ROCKS_LOG_WARN(db_options_.info_log,
+                   "Failed to get SST unique id for file %s",
+                   file_to_import->internal_file_path.c_str());
+  }
+
+  return status;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/import_column_family_job.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/import_column_family_job.h
new file mode 100644
index 0000000000..d0e2f9c296
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/import_column_family_job.h
@@ -0,0 +1,82 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "db/column_family.h"
+#include "db/external_sst_file_ingestion_job.h"
+#include "db/snapshot_impl.h"
+#include "options/db_options.h"
+#include "rocksdb/db.h"
+#include "rocksdb/metadata.h"
+#include "rocksdb/sst_file_writer.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+struct EnvOptions;
+class SystemClock;
+
+// Imports a set of sst files as is into a new column family. Logic is similar
+// to ExternalSstFileIngestionJob.
+class ImportColumnFamilyJob {
+ public:
+  ImportColumnFamilyJob(VersionSet* versions, ColumnFamilyData* cfd,
+                        const ImmutableDBOptions& db_options,
+                        const EnvOptions& env_options,
+                        const ImportColumnFamilyOptions& import_options,
+                        const std::vector<LiveFileMetaData>& metadata,
+                        const std::shared_ptr<IOTracer>& io_tracer)
+      : clock_(db_options.clock),
+        versions_(versions),
+        cfd_(cfd),
+        db_options_(db_options),
+        fs_(db_options_.fs, io_tracer),
+        env_options_(env_options),
+        import_options_(import_options),
+        metadata_(metadata),
+        io_tracer_(io_tracer) {}
+
+  // Prepare the job by copying external files into the DB.
+  Status Prepare(uint64_t next_file_number, SuperVersion* sv);
+
+  // Will execute the import job and prepare edit() to be applied.
+  // REQUIRES: Mutex held
+  Status Run();
+
+  // Cleanup after successful/failed job
+  void Cleanup(const Status& status);
+
+  VersionEdit* edit() { return &edit_; }
+
+  const autovector<IngestedFileInfo>& files_to_import() const {
+    return files_to_import_;
+  }
+
+ private:
+  // Open the external file and populate `file_to_import` with all the
+  // external information we need to import this file.
+  Status GetIngestedFileInfo(const std::string& external_file,
+                             uint64_t new_file_number, SuperVersion* sv,
+                             const LiveFileMetaData& file_meta,
+                             IngestedFileInfo* file_to_import);
+
+  SystemClock* clock_;
+  VersionSet* versions_;
+  ColumnFamilyData* cfd_;
+  const ImmutableDBOptions& db_options_;
+  const FileSystemPtr fs_;
+  const EnvOptions& env_options_;
+  autovector<IngestedFileInfo> files_to_import_;
+  VersionEdit edit_;
+  const ImportColumnFamilyOptions& import_options_;
+  std::vector<LiveFileMetaData> metadata_;
+  const std::shared_ptr<IOTracer> io_tracer_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/internal_stats.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/internal_stats.cc
new file mode 100644
index 0000000000..ca28542659
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/internal_stats.cc
@@ -0,0 +1,2121 @@
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/internal_stats.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <cstddef>
+#include <limits>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "cache/cache_entry_roles.h"
+#include "cache/cache_entry_stats.h"
+#include "db/column_family.h"
+#include "db/db_impl/db_impl.h"
+#include "db/write_stall_stats.h"
+#include "port/port.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/table.h"
+#include "table/block_based/cachable_entry.h"
+#include "util/hash_containers.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+
+const std::map<LevelStatType, LevelStat> InternalStats::compaction_level_stats =
+    {
+        {LevelStatType::NUM_FILES, LevelStat{"NumFiles", "Files"}},
+        {LevelStatType::COMPACTED_FILES,
+         LevelStat{"CompactedFiles", "CompactedFiles"}},
+        {LevelStatType::SIZE_BYTES, LevelStat{"SizeBytes", "Size"}},
+        {LevelStatType::SCORE, LevelStat{"Score", "Score"}},
+        {LevelStatType::READ_GB, LevelStat{"ReadGB", "Read(GB)"}},
+        {LevelStatType::RN_GB, LevelStat{"RnGB", "Rn(GB)"}},
+        {LevelStatType::RNP1_GB, LevelStat{"Rnp1GB", "Rnp1(GB)"}},
+        {LevelStatType::WRITE_GB, LevelStat{"WriteGB", "Write(GB)"}},
+        {LevelStatType::W_NEW_GB, LevelStat{"WnewGB", "Wnew(GB)"}},
+        {LevelStatType::MOVED_GB, LevelStat{"MovedGB", "Moved(GB)"}},
+        {LevelStatType::WRITE_AMP, LevelStat{"WriteAmp", "W-Amp"}},
+        {LevelStatType::READ_MBPS, LevelStat{"ReadMBps", "Rd(MB/s)"}},
+        {LevelStatType::WRITE_MBPS, LevelStat{"WriteMBps", "Wr(MB/s)"}},
+        {LevelStatType::COMP_SEC, LevelStat{"CompSec", "Comp(sec)"}},
+        {LevelStatType::COMP_CPU_SEC,
+         LevelStat{"CompMergeCPU", "CompMergeCPU(sec)"}},
+        {LevelStatType::COMP_COUNT, LevelStat{"CompCount", "Comp(cnt)"}},
+        {LevelStatType::AVG_SEC, LevelStat{"AvgSec", "Avg(sec)"}},
+        {LevelStatType::KEY_IN, LevelStat{"KeyIn", "KeyIn"}},
+        {LevelStatType::KEY_DROP, LevelStat{"KeyDrop", "KeyDrop"}},
+        {LevelStatType::R_BLOB_GB, LevelStat{"RblobGB", "Rblob(GB)"}},
+        {LevelStatType::W_BLOB_GB, LevelStat{"WblobGB", "Wblob(GB)"}},
+};
+
+const std::map<InternalStats::InternalDBStatsType, DBStatInfo>
+    InternalStats::db_stats_type_to_info = {
+        {InternalStats::kIntStatsWalFileBytes,
+         DBStatInfo{"db.wal_bytes_written"}},
+        {InternalStats::kIntStatsWalFileSynced, DBStatInfo{"db.wal_syncs"}},
+        {InternalStats::kIntStatsBytesWritten,
+         DBStatInfo{"db.user_bytes_written"}},
+        {InternalStats::kIntStatsNumKeysWritten,
+         DBStatInfo{"db.user_keys_written"}},
+        {InternalStats::kIntStatsWriteDoneByOther,
+         DBStatInfo{"db.user_writes_by_other"}},
+        {InternalStats::kIntStatsWriteDoneBySelf,
+         DBStatInfo{"db.user_writes_by_self"}},
+        {InternalStats::kIntStatsWriteWithWal,
+         DBStatInfo{"db.user_writes_with_wal"}},
+        {InternalStats::kIntStatsWriteStallMicros,
+         DBStatInfo{"db.user_write_stall_micros"}},
+        {InternalStats::kIntStatsWriteBufferManagerLimitStopsCounts,
+         DBStatInfo{WriteStallStatsMapKeys::CauseConditionCount(
+             WriteStallCause::kWriteBufferManagerLimit,
+             WriteStallCondition::kStopped)}},
+};
+
+namespace {
+const double kMB = 1048576.0;
+const double kGB = kMB * 1024;
+const double kMicrosInSec = 1000000.0;
+
+void PrintLevelStatsHeader(char* buf, size_t len, const std::string& cf_name,
+                           const std::string& group_by) {
+  int written_size =
+      snprintf(buf, len, "\n** Compaction Stats [%s] **\n", cf_name.c_str());
+  written_size = std::min(written_size, static_cast<int>(len));
+  auto hdr = [](LevelStatType t) {
+    return InternalStats::compaction_level_stats.at(t).header_name.c_str();
+  };
+  int line_size = snprintf(
+      buf + written_size, len - written_size,
+      "%s    %s   %s     %s %s  %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s "
+      "%s\n",
+      // Note that we skip COMPACTED_FILES and merge it with Files column
+      group_by.c_str(), hdr(LevelStatType::NUM_FILES),
+      hdr(LevelStatType::SIZE_BYTES), hdr(LevelStatType::SCORE),
+      hdr(LevelStatType::READ_GB), hdr(LevelStatType::RN_GB),
+      hdr(LevelStatType::RNP1_GB), hdr(LevelStatType::WRITE_GB),
+      hdr(LevelStatType::W_NEW_GB), hdr(LevelStatType::MOVED_GB),
+      hdr(LevelStatType::WRITE_AMP), hdr(LevelStatType::READ_MBPS),
+      hdr(LevelStatType::WRITE_MBPS), hdr(LevelStatType::COMP_SEC),
+      hdr(LevelStatType::COMP_CPU_SEC), hdr(LevelStatType::COMP_COUNT),
+      hdr(LevelStatType::AVG_SEC), hdr(LevelStatType::KEY_IN),
+      hdr(LevelStatType::KEY_DROP), hdr(LevelStatType::R_BLOB_GB),
+      hdr(LevelStatType::W_BLOB_GB));
+
+  written_size += line_size;
+  written_size = std::min(written_size, static_cast<int>(len));
+  snprintf(buf + written_size, len - written_size, "%s\n",
+           std::string(line_size, '-').c_str());
+}
+
+void PrepareLevelStats(std::map<LevelStatType, double>* level_stats,
+                       int num_files, int being_compacted,
+                       double total_file_size, double score, double w_amp,
+                       const InternalStats::CompactionStats& stats) {
+  const uint64_t bytes_read = stats.bytes_read_non_output_levels +
+                              stats.bytes_read_output_level +
+                              stats.bytes_read_blob;
+  const uint64_t bytes_written = stats.bytes_written + stats.bytes_written_blob;
+  const int64_t bytes_new = stats.bytes_written - stats.bytes_read_output_level;
+  const double elapsed = (stats.micros + 1) / kMicrosInSec;
+
+  (*level_stats)[LevelStatType::NUM_FILES] = num_files;
+  (*level_stats)[LevelStatType::COMPACTED_FILES] = being_compacted;
+  (*level_stats)[LevelStatType::SIZE_BYTES] = total_file_size;
+  (*level_stats)[LevelStatType::SCORE] = score;
+  (*level_stats)[LevelStatType::READ_GB] = bytes_read / kGB;
+  (*level_stats)[LevelStatType::RN_GB] =
+      stats.bytes_read_non_output_levels / kGB;
+  (*level_stats)[LevelStatType::RNP1_GB] = stats.bytes_read_output_level / kGB;
+  (*level_stats)[LevelStatType::WRITE_GB] = stats.bytes_written / kGB;
+  (*level_stats)[LevelStatType::W_NEW_GB] = bytes_new / kGB;
+  (*level_stats)[LevelStatType::MOVED_GB] = stats.bytes_moved / kGB;
+  (*level_stats)[LevelStatType::WRITE_AMP] = w_amp;
+  (*level_stats)[LevelStatType::READ_MBPS] = bytes_read / kMB / elapsed;
+  (*level_stats)[LevelStatType::WRITE_MBPS] = bytes_written / kMB / elapsed;
+  (*level_stats)[LevelStatType::COMP_SEC] = stats.micros / kMicrosInSec;
+  (*level_stats)[LevelStatType::COMP_CPU_SEC] = stats.cpu_micros / kMicrosInSec;
+  (*level_stats)[LevelStatType::COMP_COUNT] = stats.count;
+  (*level_stats)[LevelStatType::AVG_SEC] =
+      stats.count == 0 ? 0 : stats.micros / kMicrosInSec / stats.count;
+  (*level_stats)[LevelStatType::KEY_IN] =
+      static_cast<double>(stats.num_input_records);
+  (*level_stats)[LevelStatType::KEY_DROP] =
+      static_cast<double>(stats.num_dropped_records);
+  (*level_stats)[LevelStatType::R_BLOB_GB] = stats.bytes_read_blob / kGB;
+  (*level_stats)[LevelStatType::W_BLOB_GB] = stats.bytes_written_blob / kGB;
+}
+
+void PrintLevelStats(char* buf, size_t len, const std::string& name,
+                     const std::map<LevelStatType, double>& stat_value) {
+  snprintf(
+      buf, len,
+      "%4s "      /*  Level */
+      "%6d/%-3d " /*  Files */
+      "%8s "      /*  Size */
+      "%5.1f "    /*  Score */
+      "%8.1f "    /*  Read(GB) */
+      "%7.1f "    /*  Rn(GB) */
+      "%8.1f "    /*  Rnp1(GB) */
+      "%9.1f "    /*  Write(GB) */
+      "%8.1f "    /*  Wnew(GB) */
+      "%9.1f "    /*  Moved(GB) */
+      "%5.1f "    /*  W-Amp */
+      "%8.1f "    /*  Rd(MB/s) */
+      "%8.1f "    /*  Wr(MB/s) */
+      "%9.2f "    /*  Comp(sec) */
+      "%17.2f "   /*  CompMergeCPU(sec) */
+      "%9d "      /*  Comp(cnt) */
+      "%8.3f "    /*  Avg(sec) */
+      "%7s "      /*  KeyIn */
+      "%6s "      /*  KeyDrop */
+      "%9.1f "    /*  Rblob(GB) */
+      "%9.1f\n",  /*  Wblob(GB) */
+      name.c_str(), static_cast<int>(stat_value.at(LevelStatType::NUM_FILES)),
+      static_cast<int>(stat_value.at(LevelStatType::COMPACTED_FILES)),
+      BytesToHumanString(
+          static_cast<uint64_t>(stat_value.at(LevelStatType::SIZE_BYTES)))
+          .c_str(),
+      stat_value.at(LevelStatType::SCORE),
+      stat_value.at(LevelStatType::READ_GB),
+      stat_value.at(LevelStatType::RN_GB),
+      stat_value.at(LevelStatType::RNP1_GB),
+      stat_value.at(LevelStatType::WRITE_GB),
+      stat_value.at(LevelStatType::W_NEW_GB),
+      stat_value.at(LevelStatType::MOVED_GB),
+      stat_value.at(LevelStatType::WRITE_AMP),
+      stat_value.at(LevelStatType::READ_MBPS),
+      stat_value.at(LevelStatType::WRITE_MBPS),
+      stat_value.at(LevelStatType::COMP_SEC),
+      stat_value.at(LevelStatType::COMP_CPU_SEC),
+      static_cast<int>(stat_value.at(LevelStatType::COMP_COUNT)),
+      stat_value.at(LevelStatType::AVG_SEC),
+      NumberToHumanString(
+          static_cast<std::int64_t>(stat_value.at(LevelStatType::KEY_IN)))
+          .c_str(),
+      NumberToHumanString(
+          static_cast<std::int64_t>(stat_value.at(LevelStatType::KEY_DROP)))
+          .c_str(),
+      stat_value.at(LevelStatType::R_BLOB_GB),
+      stat_value.at(LevelStatType::W_BLOB_GB));
+}
+
+void PrintLevelStats(char* buf, size_t len, const std::string& name,
+                     int num_files, int being_compacted, double total_file_size,
+                     double score, double w_amp,
+                     const InternalStats::CompactionStats& stats) {
+  std::map<LevelStatType, double> level_stats;
+  PrepareLevelStats(&level_stats, num_files, being_compacted, total_file_size,
+                    score, w_amp, stats);
+  PrintLevelStats(buf, len, name, level_stats);
+}
+
+// Assumes that trailing numbers represent an optional argument. This requires
+// property names to not end with numbers.
+std::pair<Slice, Slice> GetPropertyNameAndArg(const Slice& property) {
+  Slice name = property, arg = property;
+  size_t sfx_len = 0;
+  while (sfx_len < property.size() &&
+         isdigit(property[property.size() - sfx_len - 1])) {
+    ++sfx_len;
+  }
+  name.remove_suffix(sfx_len);
+  arg.remove_prefix(property.size() - sfx_len);
+  return {name, arg};
+}
+}  // anonymous namespace
+
+static const std::string rocksdb_prefix = "rocksdb.";
+
+static const std::string num_files_at_level_prefix = "num-files-at-level";
+static const std::string compression_ratio_at_level_prefix =
+    "compression-ratio-at-level";
+static const std::string allstats = "stats";
+static const std::string sstables = "sstables";
+static const std::string cfstats = "cfstats";
+static const std::string cfstats_no_file_histogram =
+    "cfstats-no-file-histogram";
+static const std::string cf_file_histogram = "cf-file-histogram";
+static const std::string cf_write_stall_stats = "cf-write-stall-stats";
+static const std::string dbstats = "dbstats";
+static const std::string db_write_stall_stats = "db-write-stall-stats";
+static const std::string levelstats = "levelstats";
+static const std::string block_cache_entry_stats = "block-cache-entry-stats";
+static const std::string fast_block_cache_entry_stats =
+    "fast-block-cache-entry-stats";
+static const std::string num_immutable_mem_table = "num-immutable-mem-table";
+static const std::string num_immutable_mem_table_flushed =
+    "num-immutable-mem-table-flushed";
+static const std::string mem_table_flush_pending = "mem-table-flush-pending";
+static const std::string compaction_pending = "compaction-pending";
+static const std::string background_errors = "background-errors";
+static const std::string cur_size_active_mem_table =
+    "cur-size-active-mem-table";
+static const std::string cur_size_all_mem_tables = "cur-size-all-mem-tables";
+static const std::string size_all_mem_tables = "size-all-mem-tables";
+static const std::string num_entries_active_mem_table =
+    "num-entries-active-mem-table";
+static const std::string num_entries_imm_mem_tables =
+    "num-entries-imm-mem-tables";
+static const std::string num_deletes_active_mem_table =
+    "num-deletes-active-mem-table";
+static const std::string num_deletes_imm_mem_tables =
+    "num-deletes-imm-mem-tables";
+static const std::string estimate_num_keys = "estimate-num-keys";
+static const std::string estimate_table_readers_mem =
+    "estimate-table-readers-mem";
+static const std::string is_file_deletions_enabled =
+    "is-file-deletions-enabled";
+static const std::string num_snapshots = "num-snapshots";
+static const std::string oldest_snapshot_time = "oldest-snapshot-time";
+static const std::string oldest_snapshot_sequence = "oldest-snapshot-sequence";
+static const std::string num_live_versions = "num-live-versions";
+static const std::string current_version_number =
+    "current-super-version-number";
+static const std::string estimate_live_data_size = "estimate-live-data-size";
+static const std::string min_log_number_to_keep_str = "min-log-number-to-keep";
+static const std::string min_obsolete_sst_number_to_keep_str =
+    "min-obsolete-sst-number-to-keep";
+static const std::string base_level_str = "base-level";
+static const std::string total_sst_files_size = "total-sst-files-size";
+static const std::string live_sst_files_size = "live-sst-files-size";
+static const std::string live_sst_files_size_at_temperature =
+    "live-sst-files-size-at-temperature";
+static const std::string estimate_pending_comp_bytes =
+    "estimate-pending-compaction-bytes";
+static const std::string aggregated_table_properties =
+    "aggregated-table-properties";
+static const std::string aggregated_table_properties_at_level =
+    aggregated_table_properties + "-at-level";
+static const std::string num_running_compactions = "num-running-compactions";
+static const std::string num_running_flushes = "num-running-flushes";
+static const std::string actual_delayed_write_rate =
+    "actual-delayed-write-rate";
+static const std::string is_write_stopped = "is-write-stopped";
+static const std::string estimate_oldest_key_time = "estimate-oldest-key-time";
+static const std::string block_cache_capacity = "block-cache-capacity";
+static const std::string block_cache_usage = "block-cache-usage";
+static const std::string block_cache_pinned_usage = "block-cache-pinned-usage";
+static const std::string options_statistics = "options-statistics";
+static const std::string num_blob_files = "num-blob-files";
+static const std::string blob_stats = "blob-stats";
+static const std::string total_blob_file_size = "total-blob-file-size";
+static const std::string live_blob_file_size = "live-blob-file-size";
+static const std::string live_blob_file_garbage_size =
+    "live-blob-file-garbage-size";
+static const std::string blob_cache_capacity = "blob-cache-capacity";
+static const std::string blob_cache_usage = "blob-cache-usage";
+static const std::string blob_cache_pinned_usage = "blob-cache-pinned-usage";
+
+const std::string DB::Properties::kNumFilesAtLevelPrefix =
+    rocksdb_prefix + num_files_at_level_prefix;
+const std::string DB::Properties::kCompressionRatioAtLevelPrefix =
+    rocksdb_prefix + compression_ratio_at_level_prefix;
+const std::string DB::Properties::kStats = rocksdb_prefix + allstats;
+const std::string DB::Properties::kSSTables = rocksdb_prefix + sstables;
+const std::string DB::Properties::kCFStats = rocksdb_prefix + cfstats;
+const std::string DB::Properties::kCFStatsNoFileHistogram =
+    rocksdb_prefix + cfstats_no_file_histogram;
+const std::string DB::Properties::kCFFileHistogram =
+    rocksdb_prefix + cf_file_histogram;
+const std::string DB::Properties::kCFWriteStallStats =
+    rocksdb_prefix + cf_write_stall_stats;
+const std::string DB::Properties::kDBWriteStallStats =
+    rocksdb_prefix + db_write_stall_stats;
+const std::string DB::Properties::kDBStats = rocksdb_prefix + dbstats;
+const std::string DB::Properties::kLevelStats = rocksdb_prefix + levelstats;
+const std::string DB::Properties::kBlockCacheEntryStats =
+    rocksdb_prefix + block_cache_entry_stats;
+const std::string DB::Properties::kFastBlockCacheEntryStats =
+    rocksdb_prefix + fast_block_cache_entry_stats;
+const std::string DB::Properties::kNumImmutableMemTable =
+    rocksdb_prefix + num_immutable_mem_table;
+const std::string DB::Properties::kNumImmutableMemTableFlushed =
+    rocksdb_prefix + num_immutable_mem_table_flushed;
+const std::string DB::Properties::kMemTableFlushPending =
+    rocksdb_prefix + mem_table_flush_pending;
+const std::string DB::Properties::kCompactionPending =
+    rocksdb_prefix + compaction_pending;
+const std::string DB::Properties::kNumRunningCompactions =
+    rocksdb_prefix + num_running_compactions;
+const std::string DB::Properties::kNumRunningFlushes =
+    rocksdb_prefix + num_running_flushes;
+const std::string DB::Properties::kBackgroundErrors =
+    rocksdb_prefix + background_errors;
+const std::string DB::Properties::kCurSizeActiveMemTable =
+    rocksdb_prefix + cur_size_active_mem_table;
+const std::string DB::Properties::kCurSizeAllMemTables =
+    rocksdb_prefix + cur_size_all_mem_tables;
+const std::string DB::Properties::kSizeAllMemTables =
+    rocksdb_prefix + size_all_mem_tables;
+const std::string DB::Properties::kNumEntriesActiveMemTable =
+    rocksdb_prefix + num_entries_active_mem_table;
+const std::string DB::Properties::kNumEntriesImmMemTables =
+    rocksdb_prefix + num_entries_imm_mem_tables;
+const std::string DB::Properties::kNumDeletesActiveMemTable =
+    rocksdb_prefix + num_deletes_active_mem_table;
+const std::string DB::Properties::kNumDeletesImmMemTables =
+    rocksdb_prefix + num_deletes_imm_mem_tables;
+const std::string DB::Properties::kEstimateNumKeys =
+    rocksdb_prefix + estimate_num_keys;
+const std::string DB::Properties::kEstimateTableReadersMem =
+    rocksdb_prefix + estimate_table_readers_mem;
+const std::string DB::Properties::kIsFileDeletionsEnabled =
+    rocksdb_prefix + is_file_deletions_enabled;
+const std::string DB::Properties::kNumSnapshots =
+    rocksdb_prefix + num_snapshots;
+const std::string DB::Properties::kOldestSnapshotTime =
+    rocksdb_prefix + oldest_snapshot_time;
+const std::string DB::Properties::kOldestSnapshotSequence =
+    rocksdb_prefix + oldest_snapshot_sequence;
+const std::string DB::Properties::kNumLiveVersions =
+    rocksdb_prefix + num_live_versions;
+const std::string DB::Properties::kCurrentSuperVersionNumber =
+    rocksdb_prefix + current_version_number;
+const std::string DB::Properties::kEstimateLiveDataSize =
+    rocksdb_prefix + estimate_live_data_size;
+const std::string DB::Properties::kMinLogNumberToKeep =
+    rocksdb_prefix + min_log_number_to_keep_str;
+const std::string DB::Properties::kMinObsoleteSstNumberToKeep =
+    rocksdb_prefix + min_obsolete_sst_number_to_keep_str;
+const std::string DB::Properties::kTotalSstFilesSize =
+    rocksdb_prefix + total_sst_files_size;
+const std::string DB::Properties::kLiveSstFilesSize =
+    rocksdb_prefix + live_sst_files_size;
+const std::string DB::Properties::kBaseLevel = rocksdb_prefix + base_level_str;
+const std::string DB::Properties::kEstimatePendingCompactionBytes =
+    rocksdb_prefix + estimate_pending_comp_bytes;
+const std::string DB::Properties::kAggregatedTableProperties =
+    rocksdb_prefix + aggregated_table_properties;
+const std::string DB::Properties::kAggregatedTablePropertiesAtLevel =
+    rocksdb_prefix + aggregated_table_properties_at_level;
+const std::string DB::Properties::kActualDelayedWriteRate =
+    rocksdb_prefix + actual_delayed_write_rate;
+const std::string DB::Properties::kIsWriteStopped =
+    rocksdb_prefix + is_write_stopped;
+const std::string DB::Properties::kEstimateOldestKeyTime =
+    rocksdb_prefix + estimate_oldest_key_time;
+const std::string DB::Properties::kBlockCacheCapacity =
+    rocksdb_prefix + block_cache_capacity;
+const std::string DB::Properties::kBlockCacheUsage =
+    rocksdb_prefix + block_cache_usage;
+const std::string DB::Properties::kBlockCachePinnedUsage =
+    rocksdb_prefix + block_cache_pinned_usage;
+const std::string DB::Properties::kOptionsStatistics =
+    rocksdb_prefix + options_statistics;
+const std::string DB::Properties::kLiveSstFilesSizeAtTemperature =
+    rocksdb_prefix + live_sst_files_size_at_temperature;
+const std::string DB::Properties::kNumBlobFiles =
+    rocksdb_prefix + num_blob_files;
+const std::string DB::Properties::kBlobStats = rocksdb_prefix + blob_stats;
+const std::string DB::Properties::kTotalBlobFileSize =
+    rocksdb_prefix + total_blob_file_size;
+const std::string DB::Properties::kLiveBlobFileSize =
+    rocksdb_prefix + live_blob_file_size;
+const std::string DB::Properties::kLiveBlobFileGarbageSize =
+    rocksdb_prefix + live_blob_file_garbage_size;
+const std::string DB::Properties::kBlobCacheCapacity =
+    rocksdb_prefix + blob_cache_capacity;
+const std::string DB::Properties::kBlobCacheUsage =
+    rocksdb_prefix + blob_cache_usage;
+const std::string DB::Properties::kBlobCachePinnedUsage =
+    rocksdb_prefix + blob_cache_pinned_usage;
+
+const std::string InternalStats::kPeriodicCFStats =
+    DB::Properties::kCFStats + ".periodic";
+const int InternalStats::kMaxNoChangePeriodSinceDump = 8;
+
+const UnorderedMap<std::string, DBPropertyInfo>
+    InternalStats::ppt_name_to_info = {
+        {DB::Properties::kNumFilesAtLevelPrefix,
+         {false, &InternalStats::HandleNumFilesAtLevel, nullptr, nullptr,
+          nullptr}},
+        {DB::Properties::kCompressionRatioAtLevelPrefix,
+         {false, &InternalStats::HandleCompressionRatioAtLevelPrefix, nullptr,
+          nullptr, nullptr}},
+        {DB::Properties::kLevelStats,
+         {false, &InternalStats::HandleLevelStats, nullptr, nullptr, nullptr}},
+        {DB::Properties::kStats,
+         {false, &InternalStats::HandleStats, nullptr, nullptr, nullptr}},
+        {DB::Properties::kCFStats,
+         {false, &InternalStats::HandleCFStats, nullptr,
+          &InternalStats::HandleCFMapStats, nullptr}},
+        {InternalStats::kPeriodicCFStats,
+         {false, &InternalStats::HandleCFStatsPeriodic, nullptr, nullptr,
+          nullptr}},
+        {DB::Properties::kCFStatsNoFileHistogram,
+         {false, &InternalStats::HandleCFStatsNoFileHistogram, nullptr, nullptr,
+          nullptr}},
+        {DB::Properties::kCFFileHistogram,
+         {false, &InternalStats::HandleCFFileHistogram, nullptr, nullptr,
+          nullptr}},
+        {DB::Properties::kCFWriteStallStats,
+         {false, &InternalStats::HandleCFWriteStallStats, nullptr,
+          &InternalStats::HandleCFWriteStallStatsMap, nullptr}},
+        {DB::Properties::kDBStats,
+         {false, &InternalStats::HandleDBStats, nullptr,
+          &InternalStats::HandleDBMapStats, nullptr}},
+        {DB::Properties::kDBWriteStallStats,
+         {false, &InternalStats::HandleDBWriteStallStats, nullptr,
+          &InternalStats::HandleDBWriteStallStatsMap, nullptr}},
+        {DB::Properties::kBlockCacheEntryStats,
+         {true, &InternalStats::HandleBlockCacheEntryStats, nullptr,
+          &InternalStats::HandleBlockCacheEntryStatsMap, nullptr}},
+        {DB::Properties::kFastBlockCacheEntryStats,
+         {true, &InternalStats::HandleFastBlockCacheEntryStats, nullptr,
+          &InternalStats::HandleFastBlockCacheEntryStatsMap, nullptr}},
+        {DB::Properties::kSSTables,
+         {false, &InternalStats::HandleSsTables, nullptr, nullptr, nullptr}},
+        {DB::Properties::kAggregatedTableProperties,
+         {false, &InternalStats::HandleAggregatedTableProperties, nullptr,
+          &InternalStats::HandleAggregatedTablePropertiesMap, nullptr}},
+        {DB::Properties::kAggregatedTablePropertiesAtLevel,
+         {false, &InternalStats::HandleAggregatedTablePropertiesAtLevel,
+          nullptr, &InternalStats::HandleAggregatedTablePropertiesAtLevelMap,
+          nullptr}},
+        {DB::Properties::kNumImmutableMemTable,
+         {false, nullptr, &InternalStats::HandleNumImmutableMemTable, nullptr,
+          nullptr}},
+        {DB::Properties::kNumImmutableMemTableFlushed,
+         {false, nullptr, &InternalStats::HandleNumImmutableMemTableFlushed,
+          nullptr, nullptr}},
+        {DB::Properties::kMemTableFlushPending,
+         {false, nullptr, &InternalStats::HandleMemTableFlushPending, nullptr,
+          nullptr}},
+        {DB::Properties::kCompactionPending,
+         {false, nullptr, &InternalStats::HandleCompactionPending, nullptr,
+          nullptr}},
+        {DB::Properties::kBackgroundErrors,
+         {false, nullptr, &InternalStats::HandleBackgroundErrors, nullptr,
+          nullptr}},
+        {DB::Properties::kCurSizeActiveMemTable,
+         {false, nullptr, &InternalStats::HandleCurSizeActiveMemTable, nullptr,
+          nullptr}},
+        {DB::Properties::kCurSizeAllMemTables,
+         {false, nullptr, &InternalStats::HandleCurSizeAllMemTables, nullptr,
+          nullptr}},
+        {DB::Properties::kSizeAllMemTables,
+         {false, nullptr, &InternalStats::HandleSizeAllMemTables, nullptr,
+          nullptr}},
+        {DB::Properties::kNumEntriesActiveMemTable,
+         {false, nullptr, &InternalStats::HandleNumEntriesActiveMemTable,
+          nullptr, nullptr}},
+        {DB::Properties::kNumEntriesImmMemTables,
+         {false, nullptr, &InternalStats::HandleNumEntriesImmMemTables, nullptr,
+          nullptr}},
+        {DB::Properties::kNumDeletesActiveMemTable,
+         {false, nullptr, &InternalStats::HandleNumDeletesActiveMemTable,
+          nullptr, nullptr}},
+        {DB::Properties::kNumDeletesImmMemTables,
+         {false, nullptr, &InternalStats::HandleNumDeletesImmMemTables, nullptr,
+          nullptr}},
+        {DB::Properties::kEstimateNumKeys,
+         {false, nullptr, &InternalStats::HandleEstimateNumKeys, nullptr,
+          nullptr}},
+        {DB::Properties::kEstimateTableReadersMem,
+         {true, nullptr, &InternalStats::HandleEstimateTableReadersMem, nullptr,
+          nullptr}},
+        {DB::Properties::kIsFileDeletionsEnabled,
+         {false, nullptr, &InternalStats::HandleIsFileDeletionsEnabled, nullptr,
+          nullptr}},
+        {DB::Properties::kNumSnapshots,
+         {false, nullptr, &InternalStats::HandleNumSnapshots, nullptr,
+          nullptr}},
+        {DB::Properties::kOldestSnapshotTime,
+         {false, nullptr, &InternalStats::HandleOldestSnapshotTime, nullptr,
+          nullptr}},
+        {DB::Properties::kOldestSnapshotSequence,
+         {false, nullptr, &InternalStats::HandleOldestSnapshotSequence, nullptr,
+          nullptr}},
+        {DB::Properties::kNumLiveVersions,
+         {false, nullptr, &InternalStats::HandleNumLiveVersions, nullptr,
+          nullptr}},
+        {DB::Properties::kCurrentSuperVersionNumber,
+         {false, nullptr, &InternalStats::HandleCurrentSuperVersionNumber,
+          nullptr, nullptr}},
+        {DB::Properties::kEstimateLiveDataSize,
+         {true, nullptr, &InternalStats::HandleEstimateLiveDataSize, nullptr,
+          nullptr}},
+        {DB::Properties::kMinLogNumberToKeep,
+         {false, nullptr, &InternalStats::HandleMinLogNumberToKeep, nullptr,
+          nullptr}},
+        {DB::Properties::kMinObsoleteSstNumberToKeep,
+         {false, nullptr, &InternalStats::HandleMinObsoleteSstNumberToKeep,
+          nullptr, nullptr}},
+        {DB::Properties::kBaseLevel,
+         {false, nullptr, &InternalStats::HandleBaseLevel, nullptr, nullptr}},
+        {DB::Properties::kTotalSstFilesSize,
+         {false, nullptr, &InternalStats::HandleTotalSstFilesSize, nullptr,
+          nullptr}},
+        {DB::Properties::kLiveSstFilesSize,
+         {false, nullptr, &InternalStats::HandleLiveSstFilesSize, nullptr,
+          nullptr}},
+        {DB::Properties::kLiveSstFilesSizeAtTemperature,
+         {false, &InternalStats::HandleLiveSstFilesSizeAtTemperature, nullptr,
+          nullptr, nullptr}},
+        {DB::Properties::kEstimatePendingCompactionBytes,
+         {false, nullptr, &InternalStats::HandleEstimatePendingCompactionBytes,
+          nullptr, nullptr}},
+        {DB::Properties::kNumRunningFlushes,
+         {false, nullptr, &InternalStats::HandleNumRunningFlushes, nullptr,
+          nullptr}},
+        {DB::Properties::kNumRunningCompactions,
+         {false, nullptr, &InternalStats::HandleNumRunningCompactions, nullptr,
+          nullptr}},
+        {DB::Properties::kActualDelayedWriteRate,
+         {false, nullptr, &InternalStats::HandleActualDelayedWriteRate, nullptr,
+          nullptr}},
+        {DB::Properties::kIsWriteStopped,
+         {false, nullptr, &InternalStats::HandleIsWriteStopped, nullptr,
+          nullptr}},
+        {DB::Properties::kEstimateOldestKeyTime,
+         {false, nullptr, &InternalStats::HandleEstimateOldestKeyTime, nullptr,
+          nullptr}},
+        {DB::Properties::kBlockCacheCapacity,
+         {false, nullptr, &InternalStats::HandleBlockCacheCapacity, nullptr,
+          nullptr}},
+        {DB::Properties::kBlockCacheUsage,
+         {false, nullptr, &InternalStats::HandleBlockCacheUsage, nullptr,
+          nullptr}},
+        {DB::Properties::kBlockCachePinnedUsage,
+         {false, nullptr, &InternalStats::HandleBlockCachePinnedUsage, nullptr,
+          nullptr}},
+        {DB::Properties::kOptionsStatistics,
+         {true, nullptr, nullptr, nullptr,
+          &DBImpl::GetPropertyHandleOptionsStatistics}},
+        {DB::Properties::kNumBlobFiles,
+         {false, nullptr, &InternalStats::HandleNumBlobFiles, nullptr,
+          nullptr}},
+        {DB::Properties::kBlobStats,
+         {false, &InternalStats::HandleBlobStats, nullptr, nullptr, nullptr}},
+        {DB::Properties::kTotalBlobFileSize,
+         {false, nullptr, &InternalStats::HandleTotalBlobFileSize, nullptr,
+          nullptr}},
+        {DB::Properties::kLiveBlobFileSize,
+         {false, nullptr, &InternalStats::HandleLiveBlobFileSize, nullptr,
+          nullptr}},
+        {DB::Properties::kLiveBlobFileGarbageSize,
+         {false, nullptr, &InternalStats::HandleLiveBlobFileGarbageSize,
+          nullptr, nullptr}},
+        {DB::Properties::kBlobCacheCapacity,
+         {false, nullptr, &InternalStats::HandleBlobCacheCapacity, nullptr,
+          nullptr}},
+        {DB::Properties::kBlobCacheUsage,
+         {false, nullptr, &InternalStats::HandleBlobCacheUsage, nullptr,
+          nullptr}},
+        {DB::Properties::kBlobCachePinnedUsage,
+         {false, nullptr, &InternalStats::HandleBlobCachePinnedUsage, nullptr,
+          nullptr}},
+};
+
+InternalStats::InternalStats(int num_levels, SystemClock* clock,
+                             ColumnFamilyData* cfd)
+    : db_stats_{},
+      cf_stats_value_{},
+      cf_stats_count_{},
+      comp_stats_(num_levels),
+      comp_stats_by_pri_(Env::Priority::TOTAL),
+      file_read_latency_(num_levels),
+      has_cf_change_since_dump_(true),
+      bg_error_count_(0),
+      number_levels_(num_levels),
+      clock_(clock),
+      cfd_(cfd),
+      started_at_(clock->NowMicros()) {
+  Cache* block_cache = GetBlockCacheForStats();
+  if (block_cache) {
+    // Extract or create stats collector. Could fail in rare cases.
+    Status s = CacheEntryStatsCollector<CacheEntryRoleStats>::GetShared(
+        block_cache, clock_, &cache_entry_stats_collector_);
+    if (s.ok()) {
+      assert(cache_entry_stats_collector_);
+    } else {
+      assert(!cache_entry_stats_collector_);
+    }
+  }
+}
+
+void InternalStats::TEST_GetCacheEntryRoleStats(CacheEntryRoleStats* stats,
+                                                bool foreground) {
+  CollectCacheEntryStats(foreground);
+  if (cache_entry_stats_collector_) {
+    cache_entry_stats_collector_->GetStats(stats);
+  }
+}
+
+void InternalStats::CollectCacheEntryStats(bool foreground) {
+  // This function is safe to call from any thread because
+  // cache_entry_stats_collector_ field is const after constructor
+  // and ->GetStats does its own synchronization, which also suffices for
+  // cache_entry_stats_.
+
+  if (!cache_entry_stats_collector_) {
+    return;  // nothing to do (e.g. no block cache)
+  }
+
+  // For "background" collections, strictly cap the collection time by
+  // expanding effective cache TTL. For foreground, be more aggressive about
+  // getting latest data.
+  int min_interval_seconds = foreground ? 10 : 180;
+  // 1/500 = max of 0.2% of one CPU thread
+  int min_interval_factor = foreground ? 10 : 500;
+  cache_entry_stats_collector_->CollectStats(min_interval_seconds,
+                                             min_interval_factor);
+}
+
+std::function<void(const Slice& key, Cache::ObjectPtr value, size_t charge,
+                   const Cache::CacheItemHelper* helper)>
+InternalStats::CacheEntryRoleStats::GetEntryCallback() {
+  return [&](const Slice& /*key*/, Cache::ObjectPtr /*value*/, size_t charge,
+             const Cache::CacheItemHelper* helper) -> void {
+    size_t role_idx =
+        static_cast<size_t>(helper ? helper->role : CacheEntryRole::kMisc);
+    entry_counts[role_idx]++;
+    total_charges[role_idx] += charge;
+  };
+}
+
+void InternalStats::CacheEntryRoleStats::BeginCollection(
+    Cache* cache, SystemClock*, uint64_t start_time_micros) {
+  Clear();
+  last_start_time_micros_ = start_time_micros;
+  ++collection_count;
+  std::ostringstream str;
+  str << cache->Name() << "@" << static_cast<void*>(cache) << "#"
+      << port::GetProcessID();
+  cache_id = str.str();
+  cache_capacity = cache->GetCapacity();
+  cache_usage = cache->GetUsage();
+  table_size = cache->GetTableAddressCount();
+  occupancy = cache->GetOccupancyCount();
+  hash_seed = cache->GetHashSeed();
+}
+
+void InternalStats::CacheEntryRoleStats::EndCollection(
+    Cache*, SystemClock*, uint64_t end_time_micros) {
+  last_end_time_micros_ = end_time_micros;
+}
+
+void InternalStats::CacheEntryRoleStats::SkippedCollection() {
+  ++copies_of_last_collection;
+}
+
+uint64_t InternalStats::CacheEntryRoleStats::GetLastDurationMicros() const {
+  if (last_end_time_micros_ > last_start_time_micros_) {
+    return last_end_time_micros_ - last_start_time_micros_;
+  } else {
+    return 0U;
+  }
+}
+
+std::string InternalStats::CacheEntryRoleStats::ToString(
+    SystemClock* clock) const {
+  std::ostringstream str;
+  str << "Block cache " << cache_id
+      << " capacity: " << BytesToHumanString(cache_capacity)
+      << " seed: " << hash_seed << " usage: " << BytesToHumanString(cache_usage)
+      << " table_size: " << table_size << " occupancy: " << occupancy
+      << " collections: " << collection_count
+      << " last_copies: " << copies_of_last_collection
+      << " last_secs: " << (GetLastDurationMicros() / 1000000.0)
+      << " secs_since: "
+      << ((clock->NowMicros() - last_end_time_micros_) / 1000000U) << "\n";
+  str << "Block cache entry stats(count,size,portion):";
+  for (size_t i = 0; i < kNumCacheEntryRoles; ++i) {
+    if (entry_counts[i] > 0) {
+      str << " " << kCacheEntryRoleToCamelString[i] << "(" << entry_counts[i]
+          << "," << BytesToHumanString(total_charges[i]) << ","
+          << (100.0 * total_charges[i] / cache_capacity) << "%)";
+    }
+  }
+  str << "\n";
+  return str.str();
+}
+
+void InternalStats::CacheEntryRoleStats::ToMap(
+    std::map<std::string, std::string>* values, SystemClock* clock) const {
+  values->clear();
+  auto& v = *values;
+  v[BlockCacheEntryStatsMapKeys::CacheId()] = cache_id;
+  v[BlockCacheEntryStatsMapKeys::CacheCapacityBytes()] =
+      std::to_string(cache_capacity);
+  v[BlockCacheEntryStatsMapKeys::LastCollectionDurationSeconds()] =
+      std::to_string(GetLastDurationMicros() / 1000000.0);
+  v[BlockCacheEntryStatsMapKeys::LastCollectionAgeSeconds()] =
+      std::to_string((clock->NowMicros() - last_end_time_micros_) / 1000000U);
+  for (size_t i = 0; i < kNumCacheEntryRoles; ++i) {
+    auto role = static_cast<CacheEntryRole>(i);
+    v[BlockCacheEntryStatsMapKeys::EntryCount(role)] =
+        std::to_string(entry_counts[i]);
+    v[BlockCacheEntryStatsMapKeys::UsedBytes(role)] =
+        std::to_string(total_charges[i]);
+    v[BlockCacheEntryStatsMapKeys::UsedPercent(role)] =
+        std::to_string(100.0 * total_charges[i] / cache_capacity);
+  }
+}
+
+bool InternalStats::HandleBlockCacheEntryStatsInternal(std::string* value,
+                                                       bool fast) {
+  if (!cache_entry_stats_collector_) {
+    return false;
+  }
+  CollectCacheEntryStats(!fast /* foreground */);
+  CacheEntryRoleStats stats;
+  cache_entry_stats_collector_->GetStats(&stats);
+  *value = stats.ToString(clock_);
+  return true;
+}
+
+bool InternalStats::HandleBlockCacheEntryStatsMapInternal(
+    std::map<std::string, std::string>* values, bool fast) {
+  if (!cache_entry_stats_collector_) {
+    return false;
+  }
+  CollectCacheEntryStats(!fast /* foreground */);
+  CacheEntryRoleStats stats;
+  cache_entry_stats_collector_->GetStats(&stats);
+  stats.ToMap(values, clock_);
+  return true;
+}
+
+bool InternalStats::HandleBlockCacheEntryStats(std::string* value,
+                                               Slice /*suffix*/) {
+  return HandleBlockCacheEntryStatsInternal(value, false /* fast */);
+}
+
+bool InternalStats::HandleBlockCacheEntryStatsMap(
+    std::map<std::string, std::string>* values, Slice /*suffix*/) {
+  return HandleBlockCacheEntryStatsMapInternal(values, false /* fast */);
+}
+
+bool InternalStats::HandleFastBlockCacheEntryStats(std::string* value,
+                                                   Slice /*suffix*/) {
+  return HandleBlockCacheEntryStatsInternal(value, true /* fast */);
+}
+
+bool InternalStats::HandleFastBlockCacheEntryStatsMap(
+    std::map<std::string, std::string>* values, Slice /*suffix*/) {
+  return HandleBlockCacheEntryStatsMapInternal(values, true /* fast */);
+}
+
+bool InternalStats::HandleLiveSstFilesSizeAtTemperature(std::string* value,
+                                                        Slice suffix) {
+  uint64_t temperature;
+  bool ok = ConsumeDecimalNumber(&suffix, &temperature) && suffix.empty();
+  if (!ok) {
+    return false;
+  }
+
+  uint64_t size = 0;
+  const auto* vstorage = cfd_->current()->storage_info();
+  for (int level = 0; level < vstorage->num_levels(); level++) {
+    for (const auto& file_meta : vstorage->LevelFiles(level)) {
+      if (static_cast<uint8_t>(file_meta->temperature) == temperature) {
+        size += file_meta->fd.GetFileSize();
+      }
+    }
+  }
+
+  *value = std::to_string(size);
+  return true;
+}
+
+bool InternalStats::HandleNumBlobFiles(uint64_t* value, DBImpl* /*db*/,
+                                       Version* /*version*/) {
+  assert(value);
+  assert(cfd_);
+
+  const auto* current = cfd_->current();
+  assert(current);
+
+  const auto* vstorage = current->storage_info();
+  assert(vstorage);
+
+  const auto& blob_files = vstorage->GetBlobFiles();
+
+  *value = blob_files.size();
+
+  return true;
+}
+
+bool InternalStats::HandleBlobStats(std::string* value, Slice /*suffix*/) {
+  assert(value);
+  assert(cfd_);
+
+  const auto* current = cfd_->current();
+  assert(current);
+
+  const auto* vstorage = current->storage_info();
+  assert(vstorage);
+
+  const auto blob_st = vstorage->GetBlobStats();
+
+  std::ostringstream oss;
+
+  oss << "Number of blob files: " << vstorage->GetBlobFiles().size()
+      << "\nTotal size of blob files: " << blob_st.total_file_size
+      << "\nTotal size of garbage in blob files: " << blob_st.total_garbage_size
+      << "\nBlob file space amplification: " << blob_st.space_amp << '\n';
+
+  value->append(oss.str());
+
+  return true;
+}
+
+bool InternalStats::HandleTotalBlobFileSize(uint64_t* value, DBImpl* /*db*/,
+                                            Version* /*version*/) {
+  assert(value);
+  assert(cfd_);
+
+  *value = cfd_->GetTotalBlobFileSize();
+
+  return true;
+}
+
+bool InternalStats::HandleLiveBlobFileSize(uint64_t* value, DBImpl* /*db*/,
+                                           Version* /*version*/) {
+  assert(value);
+  assert(cfd_);
+
+  const auto* current = cfd_->current();
+  assert(current);
+
+  const auto* vstorage = current->storage_info();
+  assert(vstorage);
+
+  *value = vstorage->GetBlobStats().total_file_size;
+
+  return true;
+}
+
+bool InternalStats::HandleLiveBlobFileGarbageSize(uint64_t* value,
+                                                  DBImpl* /*db*/,
+                                                  Version* /*version*/) {
+  assert(value);
+  assert(cfd_);
+
+  const auto* current = cfd_->current();
+  assert(current);
+
+  const auto* vstorage = current->storage_info();
+  assert(vstorage);
+
+  *value = vstorage->GetBlobStats().total_garbage_size;
+
+  return true;
+}
+
+Cache* InternalStats::GetBlobCacheForStats() {
+  return cfd_->ioptions()->blob_cache.get();
+}
+
+bool InternalStats::HandleBlobCacheCapacity(uint64_t* value, DBImpl* /*db*/,
+                                            Version* /*version*/) {
+  Cache* blob_cache = GetBlobCacheForStats();
+  if (blob_cache) {
+    *value = static_cast<uint64_t>(blob_cache->GetCapacity());
+    return true;
+  }
+  return false;
+}
+
+bool InternalStats::HandleBlobCacheUsage(uint64_t* value, DBImpl* /*db*/,
+                                         Version* /*version*/) {
+  Cache* blob_cache = GetBlobCacheForStats();
+  if (blob_cache) {
+    *value = static_cast<uint64_t>(blob_cache->GetUsage());
+    return true;
+  }
+  return false;
+}
+
+bool InternalStats::HandleBlobCachePinnedUsage(uint64_t* value, DBImpl* /*db*/,
+                                               Version* /*version*/) {
+  Cache* blob_cache = GetBlobCacheForStats();
+  if (blob_cache) {
+    *value = static_cast<uint64_t>(blob_cache->GetPinnedUsage());
+    return true;
+  }
+  return false;
+}
+
+const DBPropertyInfo* GetPropertyInfo(const Slice& property) {
+  std::string ppt_name = GetPropertyNameAndArg(property).first.ToString();
+  auto ppt_info_iter = InternalStats::ppt_name_to_info.find(ppt_name);
+  if (ppt_info_iter == InternalStats::ppt_name_to_info.end()) {
+    return nullptr;
+  }
+  return &ppt_info_iter->second;
+}
+
+bool InternalStats::GetStringProperty(const DBPropertyInfo& property_info,
+                                      const Slice& property,
+                                      std::string* value) {
+  assert(value != nullptr);
+  assert(property_info.handle_string != nullptr);
+  Slice arg = GetPropertyNameAndArg(property).second;
+  return (this->*(property_info.handle_string))(value, arg);
+}
+
+bool InternalStats::GetMapProperty(const DBPropertyInfo& property_info,
+                                   const Slice& property,
+                                   std::map<std::string, std::string>* value) {
+  assert(value != nullptr);
+  assert(property_info.handle_map != nullptr);
+  Slice arg = GetPropertyNameAndArg(property).second;
+  return (this->*(property_info.handle_map))(value, arg);
+}
+
+bool InternalStats::GetIntProperty(const DBPropertyInfo& property_info,
+                                   uint64_t* value, DBImpl* db) {
+  assert(value != nullptr);
+  assert(property_info.handle_int != nullptr &&
+         !property_info.need_out_of_mutex);
+  db->mutex_.AssertHeld();
+  return (this->*(property_info.handle_int))(value, db, nullptr /* version */);
+}
+
+bool InternalStats::GetIntPropertyOutOfMutex(
+    const DBPropertyInfo& property_info, Version* version, uint64_t* value) {
+  assert(value != nullptr);
+  assert(property_info.handle_int != nullptr &&
+         property_info.need_out_of_mutex);
+  return (this->*(property_info.handle_int))(value, nullptr /* db */, version);
+}
+
+bool InternalStats::HandleNumFilesAtLevel(std::string* value, Slice suffix) {
+  uint64_t level;
+  const auto* vstorage = cfd_->current()->storage_info();
+  bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty();
+  if (!ok || static_cast<int>(level) >= number_levels_) {
+    return false;
+  } else {
+    char buf[100];
+    snprintf(buf, sizeof(buf), "%d",
+             vstorage->NumLevelFiles(static_cast<int>(level)));
+    *value = buf;
+    return true;
+  }
+}
+
+bool InternalStats::HandleCompressionRatioAtLevelPrefix(std::string* value,
+                                                        Slice suffix) {
+  uint64_t level;
+  const auto* vstorage = cfd_->current()->storage_info();
+  bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty();
+  if (!ok || level >= static_cast<uint64_t>(number_levels_)) {
+    return false;
+  }
+  *value = std::to_string(
+      vstorage->GetEstimatedCompressionRatioAtLevel(static_cast<int>(level)));
+  return true;
+}
+
+bool InternalStats::HandleLevelStats(std::string* value, Slice /*suffix*/) {
+  char buf[1000];
+  const auto* vstorage = cfd_->current()->storage_info();
+  snprintf(buf, sizeof(buf),
+           "Level Files Size(MB)\n"
+           "--------------------\n");
+  value->append(buf);
+
+  for (int level = 0; level < number_levels_; level++) {
+    snprintf(buf, sizeof(buf), "%3d %8d %8.0f\n", level,
+             vstorage->NumLevelFiles(level),
+             vstorage->NumLevelBytes(level) / kMB);
+    value->append(buf);
+  }
+  return true;
+}
+
+bool InternalStats::HandleStats(std::string* value, Slice suffix) {
+  if (!HandleCFStats(value, suffix)) {
+    return false;
+  }
+  if (!HandleDBStats(value, suffix)) {
+    return false;
+  }
+  return true;
+}
+
+bool InternalStats::HandleCFMapStats(
+    std::map<std::string, std::string>* cf_stats, Slice /*suffix*/) {
+  DumpCFMapStats(cf_stats);
+  return true;
+}
+
+bool InternalStats::HandleCFStats(std::string* value, Slice /*suffix*/) {
+  DumpCFStats(value);
+  return true;
+}
+
+bool InternalStats::HandleCFStatsPeriodic(std::string* value,
+                                          Slice /*suffix*/) {
+  bool has_change = has_cf_change_since_dump_;
+  if (!has_change) {
+    // If file histogram changes, there is activity in this period too.
+    uint64_t new_histogram_num = 0;
+    for (int level = 0; level < number_levels_; level++) {
+      new_histogram_num += file_read_latency_[level].num();
+    }
+    new_histogram_num += blob_file_read_latency_.num();
+    if (new_histogram_num != last_histogram_num) {
+      has_change = true;
+      last_histogram_num = new_histogram_num;
+    }
+  }
+  if (has_change) {
+    no_cf_change_period_since_dump_ = 0;
+    has_cf_change_since_dump_ = false;
+  } else if (no_cf_change_period_since_dump_++ > 0) {
+    // Not ready to sync
+    if (no_cf_change_period_since_dump_ == kMaxNoChangePeriodSinceDump) {
+      // Next periodic, we need to dump stats even if there is no change.
+      no_cf_change_period_since_dump_ = 0;
+    }
+    return true;
+  }
+
+  DumpCFStatsNoFileHistogram(/*is_periodic=*/true, value);
+  DumpCFFileHistogram(value);
+  return true;
+}
+
+bool InternalStats::HandleCFStatsNoFileHistogram(std::string* value,
+                                                 Slice /*suffix*/) {
+  DumpCFStatsNoFileHistogram(/*is_periodic=*/false, value);
+  return true;
+}
+
+bool InternalStats::HandleCFFileHistogram(std::string* value,
+                                          Slice /*suffix*/) {
+  DumpCFFileHistogram(value);
+  return true;
+}
+
+bool InternalStats::HandleCFWriteStallStats(std::string* value,
+                                            Slice /*suffix*/) {
+  DumpCFStatsWriteStall(value);
+  return true;
+}
+
+bool InternalStats::HandleCFWriteStallStatsMap(
+    std::map<std::string, std::string>* value, Slice /*suffix*/) {
+  DumpCFMapStatsWriteStall(value);
+  return true;
+}
+
+bool InternalStats::HandleDBMapStats(
+    std::map<std::string, std::string>* db_stats, Slice /*suffix*/) {
+  DumpDBMapStats(db_stats);
+  return true;
+}
+
+bool InternalStats::HandleDBStats(std::string* value, Slice /*suffix*/) {
+  DumpDBStats(value);
+  return true;
+}
+
+bool InternalStats::HandleDBWriteStallStats(std::string* value,
+                                            Slice /*suffix*/) {
+  DumpDBStatsWriteStall(value);
+  return true;
+}
+
+bool InternalStats::HandleDBWriteStallStatsMap(
+    std::map<std::string, std::string>* value, Slice /*suffix*/) {
+  DumpDBMapStatsWriteStall(value);
+  return true;
+}
+
+bool InternalStats::HandleSsTables(std::string* value, Slice /*suffix*/) {
+  auto* current = cfd_->current();
+  *value = current->DebugString(true, true);
+  return true;
+}
+
+bool InternalStats::HandleAggregatedTableProperties(std::string* value,
+                                                    Slice /*suffix*/) {
+  std::shared_ptr<const TableProperties> tp;
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  auto s = cfd_->current()->GetAggregatedTableProperties(read_options, &tp);
+  if (!s.ok()) {
+    return false;
+  }
+  *value = tp->ToString();
+  return true;
+}
+
+static std::map<std::string, std::string> MapUint64ValuesToString(
+    const std::map<std::string, uint64_t>& from) {
+  std::map<std::string, std::string> to;
+  for (const auto& e : from) {
+    to[e.first] = std::to_string(e.second);
+  }
+  return to;
+}
+
+bool InternalStats::HandleAggregatedTablePropertiesMap(
+    std::map<std::string, std::string>* values, Slice /*suffix*/) {
+  std::shared_ptr<const TableProperties> tp;
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  auto s = cfd_->current()->GetAggregatedTableProperties(read_options, &tp);
+  if (!s.ok()) {
+    return false;
+  }
+  *values = MapUint64ValuesToString(tp->GetAggregatablePropertiesAsMap());
+  return true;
+}
+
+bool InternalStats::HandleAggregatedTablePropertiesAtLevel(std::string* values,
+                                                           Slice suffix) {
+  uint64_t level;
+  bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty();
+  if (!ok || static_cast<int>(level) >= number_levels_) {
+    return false;
+  }
+  std::shared_ptr<const TableProperties> tp;
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  auto s = cfd_->current()->GetAggregatedTableProperties(
+      read_options, &tp, static_cast<int>(level));
+  if (!s.ok()) {
+    return false;
+  }
+  *values = tp->ToString();
+  return true;
+}
+
+bool InternalStats::HandleAggregatedTablePropertiesAtLevelMap(
+    std::map<std::string, std::string>* values, Slice suffix) {
+  uint64_t level;
+  bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty();
+  if (!ok || static_cast<int>(level) >= number_levels_) {
+    return false;
+  }
+  std::shared_ptr<const TableProperties> tp;
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  auto s = cfd_->current()->GetAggregatedTableProperties(
+      read_options, &tp, static_cast<int>(level));
+  if (!s.ok()) {
+    return false;
+  }
+  *values = MapUint64ValuesToString(tp->GetAggregatablePropertiesAsMap());
+  return true;
+}
+
+bool InternalStats::HandleNumImmutableMemTable(uint64_t* value, DBImpl* /*db*/,
+                                               Version* /*version*/) {
+  *value = cfd_->imm()->NumNotFlushed();
+  return true;
+}
+
+bool InternalStats::HandleNumImmutableMemTableFlushed(uint64_t* value,
+                                                      DBImpl* /*db*/,
+                                                      Version* /*version*/) {
+  *value = cfd_->imm()->NumFlushed();
+  return true;
+}
+
+bool InternalStats::HandleMemTableFlushPending(uint64_t* value, DBImpl* /*db*/,
+                                               Version* /*version*/) {
+  *value = (cfd_->imm()->IsFlushPending() ? 1 : 0);
+  return true;
+}
+
+bool InternalStats::HandleNumRunningFlushes(uint64_t* value, DBImpl* db,
+                                            Version* /*version*/) {
+  *value = db->num_running_flushes();
+  return true;
+}
+
+bool InternalStats::HandleCompactionPending(uint64_t* value, DBImpl* /*db*/,
+                                            Version* /*version*/) {
+  // 1 if the system already determines at least one compaction is needed.
+  // 0 otherwise,
+  const auto* vstorage = cfd_->current()->storage_info();
+  *value = (cfd_->compaction_picker()->NeedsCompaction(vstorage) ? 1 : 0);
+  return true;
+}
+
+bool InternalStats::HandleNumRunningCompactions(uint64_t* value, DBImpl* db,
+                                                Version* /*version*/) {
+  *value = db->num_running_compactions_;
+  return true;
+}
+
+bool InternalStats::HandleBackgroundErrors(uint64_t* value, DBImpl* /*db*/,
+                                           Version* /*version*/) {
+  // Accumulated number of  errors in background flushes or compactions.
+  *value = GetBackgroundErrorCount();
+  return true;
+}
+
+bool InternalStats::HandleCurSizeActiveMemTable(uint64_t* value, DBImpl* /*db*/,
+                                                Version* /*version*/) {
+  // Current size of the active memtable
+  // Using ApproximateMemoryUsageFast to avoid the need for synchronization
+  *value = cfd_->mem()->ApproximateMemoryUsageFast();
+  return true;
+}
+
+bool InternalStats::HandleCurSizeAllMemTables(uint64_t* value, DBImpl* /*db*/,
+                                              Version* /*version*/) {
+  // Current size of the active memtable + immutable memtables
+  // Using ApproximateMemoryUsageFast to avoid the need for synchronization
+  *value = cfd_->mem()->ApproximateMemoryUsageFast() +
+           cfd_->imm()->ApproximateUnflushedMemTablesMemoryUsage();
+  return true;
+}
+
+bool InternalStats::HandleSizeAllMemTables(uint64_t* value, DBImpl* /*db*/,
+                                           Version* /*version*/) {
+  // Using ApproximateMemoryUsageFast to avoid the need for synchronization
+  *value = cfd_->mem()->ApproximateMemoryUsageFast() +
+           cfd_->imm()->ApproximateMemoryUsage();
+  return true;
+}
+
+bool InternalStats::HandleNumEntriesActiveMemTable(uint64_t* value,
+                                                   DBImpl* /*db*/,
+                                                   Version* /*version*/) {
+  // Current number of entires in the active memtable
+  *value = cfd_->mem()->num_entries();
+  return true;
+}
+
+bool InternalStats::HandleNumEntriesImmMemTables(uint64_t* value,
+                                                 DBImpl* /*db*/,
+                                                 Version* /*version*/) {
+  // Current number of entries in the immutable memtables
+  *value = cfd_->imm()->current()->GetTotalNumEntries();
+  return true;
+}
+
+bool InternalStats::HandleNumDeletesActiveMemTable(uint64_t* value,
+                                                   DBImpl* /*db*/,
+                                                   Version* /*version*/) {
+  // Current number of entires in the active memtable
+  *value = cfd_->mem()->num_deletes();
+  return true;
+}
+
+bool InternalStats::HandleNumDeletesImmMemTables(uint64_t* value,
+                                                 DBImpl* /*db*/,
+                                                 Version* /*version*/) {
+  // Current number of entries in the immutable memtables
+  *value = cfd_->imm()->current()->GetTotalNumDeletes();
+  return true;
+}
+
+bool InternalStats::HandleEstimateNumKeys(uint64_t* value, DBImpl* /*db*/,
+                                          Version* /*version*/) {
+  // Estimate number of entries in the column family:
+  // Use estimated entries in tables + total entries in memtables.
+  const auto* vstorage = cfd_->current()->storage_info();
+  uint64_t estimate_keys = cfd_->mem()->num_entries() +
+                           cfd_->imm()->current()->GetTotalNumEntries() +
+                           vstorage->GetEstimatedActiveKeys();
+  uint64_t estimate_deletes =
+      cfd_->mem()->num_deletes() + cfd_->imm()->current()->GetTotalNumDeletes();
+  *value = estimate_keys > estimate_deletes * 2
+               ? estimate_keys - (estimate_deletes * 2)
+               : 0;
+  return true;
+}
+
+bool InternalStats::HandleNumSnapshots(uint64_t* value, DBImpl* db,
+                                       Version* /*version*/) {
+  *value = db->snapshots().count();
+  return true;
+}
+
+bool InternalStats::HandleOldestSnapshotTime(uint64_t* value, DBImpl* db,
+                                             Version* /*version*/) {
+  *value = static_cast<uint64_t>(db->snapshots().GetOldestSnapshotTime());
+  return true;
+}
+
+bool InternalStats::HandleOldestSnapshotSequence(uint64_t* value, DBImpl* db,
+                                                 Version* /*version*/) {
+  *value = static_cast<uint64_t>(db->snapshots().GetOldestSnapshotSequence());
+  return true;
+}
+
+bool InternalStats::HandleNumLiveVersions(uint64_t* value, DBImpl* /*db*/,
+                                          Version* /*version*/) {
+  *value = cfd_->GetNumLiveVersions();
+  return true;
+}
+
+bool InternalStats::HandleCurrentSuperVersionNumber(uint64_t* value,
+                                                    DBImpl* /*db*/,
+                                                    Version* /*version*/) {
+  *value = cfd_->GetSuperVersionNumber();
+  return true;
+}
+
+bool InternalStats::HandleIsFileDeletionsEnabled(uint64_t* value, DBImpl* db,
+                                                 Version* /*version*/) {
+  *value = db->IsFileDeletionsEnabled() ? 1 : 0;
+  return true;
+}
+
+bool InternalStats::HandleBaseLevel(uint64_t* value, DBImpl* /*db*/,
+                                    Version* /*version*/) {
+  const auto* vstorage = cfd_->current()->storage_info();
+  *value = vstorage->base_level();
+  return true;
+}
+
+bool InternalStats::HandleTotalSstFilesSize(uint64_t* value, DBImpl* /*db*/,
+                                            Version* /*version*/) {
+  *value = cfd_->GetTotalSstFilesSize();
+  return true;
+}
+
+bool InternalStats::HandleLiveSstFilesSize(uint64_t* value, DBImpl* /*db*/,
+                                           Version* /*version*/) {
+  *value = cfd_->GetLiveSstFilesSize();
+  return true;
+}
+
+bool InternalStats::HandleEstimatePendingCompactionBytes(uint64_t* value,
+                                                         DBImpl* /*db*/,
+                                                         Version* /*version*/) {
+  const auto* vstorage = cfd_->current()->storage_info();
+  *value = vstorage->estimated_compaction_needed_bytes();
+  return true;
+}
+
+bool InternalStats::HandleEstimateTableReadersMem(uint64_t* value,
+                                                  DBImpl* /*db*/,
+                                                  Version* version) {
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  *value = (version == nullptr)
+               ? 0
+               : version->GetMemoryUsageByTableReaders(read_options);
+  return true;
+}
+
+bool InternalStats::HandleEstimateLiveDataSize(uint64_t* value, DBImpl* /*db*/,
+                                               Version* version) {
+  const auto* vstorage = version->storage_info();
+  *value = vstorage->EstimateLiveDataSize();
+  return true;
+}
+
+bool InternalStats::HandleMinLogNumberToKeep(uint64_t* value, DBImpl* db,
+                                             Version* /*version*/) {
+  *value = db->MinLogNumberToKeep();
+  return true;
+}
+
+bool InternalStats::HandleMinObsoleteSstNumberToKeep(uint64_t* value,
+                                                     DBImpl* db,
+                                                     Version* /*version*/) {
+  *value = db->MinObsoleteSstNumberToKeep();
+  return true;
+}
+
+bool InternalStats::HandleActualDelayedWriteRate(uint64_t* value, DBImpl* db,
+                                                 Version* /*version*/) {
+  const WriteController& wc = db->write_controller();
+  if (!wc.NeedsDelay()) {
+    *value = 0;
+  } else {
+    *value = wc.delayed_write_rate();
+  }
+  return true;
+}
+
+bool InternalStats::HandleIsWriteStopped(uint64_t* value, DBImpl* db,
+                                         Version* /*version*/) {
+  *value = db->write_controller().IsStopped() ? 1 : 0;
+  return true;
+}
+
+bool InternalStats::HandleEstimateOldestKeyTime(uint64_t* value, DBImpl* /*db*/,
+                                                Version* /*version*/) {
+  // TODO(yiwu): The property is currently available for fifo compaction
+  // with allow_compaction = false. This is because we don't propagate
+  // oldest_key_time on compaction.
+  if (cfd_->ioptions()->compaction_style != kCompactionStyleFIFO ||
+      cfd_->GetCurrentMutableCFOptions()
+          ->compaction_options_fifo.allow_compaction) {
+    return false;
+  }
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  TablePropertiesCollection collection;
+  auto s = cfd_->current()->GetPropertiesOfAllTables(read_options, &collection);
+  if (!s.ok()) {
+    return false;
+  }
+  *value = std::numeric_limits<uint64_t>::max();
+  for (auto& p : collection) {
+    *value = std::min(*value, p.second->oldest_key_time);
+    if (*value == 0) {
+      break;
+    }
+  }
+  if (*value > 0) {
+    *value = std::min({cfd_->mem()->ApproximateOldestKeyTime(),
+                       cfd_->imm()->ApproximateOldestKeyTime(), *value});
+  }
+  return *value > 0 && *value < std::numeric_limits<uint64_t>::max();
+}
+
+Cache* InternalStats::GetBlockCacheForStats() {
+  auto* table_factory = cfd_->ioptions()->table_factory.get();
+  assert(table_factory != nullptr);
+  return table_factory->GetOptions<Cache>(TableFactory::kBlockCacheOpts());
+}
+
+bool InternalStats::HandleBlockCacheCapacity(uint64_t* value, DBImpl* /*db*/,
+                                             Version* /*version*/) {
+  Cache* block_cache = GetBlockCacheForStats();
+  if (block_cache) {
+    *value = static_cast<uint64_t>(block_cache->GetCapacity());
+    return true;
+  }
+  return false;
+}
+
+bool InternalStats::HandleBlockCacheUsage(uint64_t* value, DBImpl* /*db*/,
+                                          Version* /*version*/) {
+  Cache* block_cache = GetBlockCacheForStats();
+  if (block_cache) {
+    *value = static_cast<uint64_t>(block_cache->GetUsage());
+    return true;
+  }
+  return false;
+}
+
+bool InternalStats::HandleBlockCachePinnedUsage(uint64_t* value, DBImpl* /*db*/,
+                                                Version* /*version*/) {
+  Cache* block_cache = GetBlockCacheForStats();
+  if (block_cache) {
+    *value = static_cast<uint64_t>(block_cache->GetPinnedUsage());
+    return true;
+  }
+  return false;
+}
+
+void InternalStats::DumpDBMapStats(
+    std::map<std::string, std::string>* db_stats) {
+  for (int i = 0; i < static_cast<int>(kIntStatsNumMax); ++i) {
+    InternalDBStatsType type = static_cast<InternalDBStatsType>(i);
+    (*db_stats)[db_stats_type_to_info.at(type).property_name] =
+        std::to_string(GetDBStats(type));
+  }
+  double seconds_up = (clock_->NowMicros() - started_at_) / kMicrosInSec;
+  (*db_stats)["db.uptime"] = std::to_string(seconds_up);
+}
+
+void InternalStats::DumpDBStats(std::string* value) {
+  char buf[1000];
+  // DB-level stats, only available from default column family
+  double seconds_up = (clock_->NowMicros() - started_at_) / kMicrosInSec;
+  double interval_seconds_up = seconds_up - db_stats_snapshot_.seconds_up;
+  snprintf(buf, sizeof(buf),
+           "\n** DB Stats **\nUptime(secs): %.1f total, %.1f interval\n",
+           seconds_up, interval_seconds_up);
+  value->append(buf);
+  // Cumulative
+  uint64_t user_bytes_written =
+      GetDBStats(InternalStats::kIntStatsBytesWritten);
+  uint64_t num_keys_written =
+      GetDBStats(InternalStats::kIntStatsNumKeysWritten);
+  uint64_t write_other = GetDBStats(InternalStats::kIntStatsWriteDoneByOther);
+  uint64_t write_self = GetDBStats(InternalStats::kIntStatsWriteDoneBySelf);
+  uint64_t wal_bytes = GetDBStats(InternalStats::kIntStatsWalFileBytes);
+  uint64_t wal_synced = GetDBStats(InternalStats::kIntStatsWalFileSynced);
+  uint64_t write_with_wal = GetDBStats(InternalStats::kIntStatsWriteWithWal);
+  uint64_t write_stall_micros =
+      GetDBStats(InternalStats::kIntStatsWriteStallMicros);
+
+  const int kHumanMicrosLen = 32;
+  char human_micros[kHumanMicrosLen];
+
+  // Data
+  // writes: total number of write requests.
+  // keys: total number of key updates issued by all the write requests
+  // commit groups: number of group commits issued to the DB. Each group can
+  //                contain one or more writes.
+  // so writes/keys is the average number of put in multi-put or put
+  // writes/groups is the average group commit size.
+  //
+  // The format is the same for interval stats.
+  snprintf(buf, sizeof(buf),
+           "Cumulative writes: %s writes, %s keys, %s commit groups, "
+           "%.1f writes per commit group, ingest: %.2f GB, %.2f MB/s\n",
+           NumberToHumanString(write_other + write_self).c_str(),
+           NumberToHumanString(num_keys_written).c_str(),
+           NumberToHumanString(write_self).c_str(),
+           (write_other + write_self) /
+               std::max(1.0, static_cast<double>(write_self)),
+           user_bytes_written / kGB,
+           user_bytes_written / kMB / std::max(seconds_up, 0.001));
+  value->append(buf);
+  // WAL
+  snprintf(buf, sizeof(buf),
+           "Cumulative WAL: %s writes, %s syncs, "
+           "%.2f writes per sync, written: %.2f GB, %.2f MB/s\n",
+           NumberToHumanString(write_with_wal).c_str(),
+           NumberToHumanString(wal_synced).c_str(),
+           write_with_wal / std::max(1.0, static_cast<double>(wal_synced)),
+           wal_bytes / kGB, wal_bytes / kMB / std::max(seconds_up, 0.001));
+  value->append(buf);
+  // Stall
+  AppendHumanMicros(write_stall_micros, human_micros, kHumanMicrosLen, true);
+  snprintf(buf, sizeof(buf), "Cumulative stall: %s, %.1f percent\n",
+           human_micros,
+           // 10000 = divide by 1M to get secs, then multiply by 100 for pct
+           write_stall_micros / 10000.0 / std::max(seconds_up, 0.001));
+  value->append(buf);
+
+  // Interval
+  uint64_t interval_write_other = write_other - db_stats_snapshot_.write_other;
+  uint64_t interval_write_self = write_self - db_stats_snapshot_.write_self;
+  uint64_t interval_num_keys_written =
+      num_keys_written - db_stats_snapshot_.num_keys_written;
+  snprintf(
+      buf, sizeof(buf),
+      "Interval writes: %s writes, %s keys, %s commit groups, "
+      "%.1f writes per commit group, ingest: %.2f MB, %.2f MB/s\n",
+      NumberToHumanString(interval_write_other + interval_write_self).c_str(),
+      NumberToHumanString(interval_num_keys_written).c_str(),
+      NumberToHumanString(interval_write_self).c_str(),
+      static_cast<double>(interval_write_other + interval_write_self) /
+          std::max(1.0, static_cast<double>(interval_write_self)),
+      (user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB,
+      (user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB /
+          std::max(interval_seconds_up, 0.001)),
+      value->append(buf);
+
+  uint64_t interval_write_with_wal =
+      write_with_wal - db_stats_snapshot_.write_with_wal;
+  uint64_t interval_wal_synced = wal_synced - db_stats_snapshot_.wal_synced;
+  uint64_t interval_wal_bytes = wal_bytes - db_stats_snapshot_.wal_bytes;
+
+  snprintf(buf, sizeof(buf),
+           "Interval WAL: %s writes, %s syncs, "
+           "%.2f writes per sync, written: %.2f GB, %.2f MB/s\n",
+           NumberToHumanString(interval_write_with_wal).c_str(),
+           NumberToHumanString(interval_wal_synced).c_str(),
+           interval_write_with_wal /
+               std::max(1.0, static_cast<double>(interval_wal_synced)),
+           interval_wal_bytes / kGB,
+           interval_wal_bytes / kMB / std::max(interval_seconds_up, 0.001));
+  value->append(buf);
+
+  // Stall
+  AppendHumanMicros(write_stall_micros - db_stats_snapshot_.write_stall_micros,
+                    human_micros, kHumanMicrosLen, true);
+  snprintf(buf, sizeof(buf), "Interval stall: %s, %.1f percent\n", human_micros,
+           // 10000 = divide by 1M to get secs, then multiply by 100 for pct
+           (write_stall_micros - db_stats_snapshot_.write_stall_micros) /
+               10000.0 / std::max(interval_seconds_up, 0.001));
+  value->append(buf);
+
+  std::string write_stall_stats;
+  DumpDBStatsWriteStall(&write_stall_stats);
+  value->append(write_stall_stats);
+
+  db_stats_snapshot_.seconds_up = seconds_up;
+  db_stats_snapshot_.ingest_bytes = user_bytes_written;
+  db_stats_snapshot_.write_other = write_other;
+  db_stats_snapshot_.write_self = write_self;
+  db_stats_snapshot_.num_keys_written = num_keys_written;
+  db_stats_snapshot_.wal_bytes = wal_bytes;
+  db_stats_snapshot_.wal_synced = wal_synced;
+  db_stats_snapshot_.write_with_wal = write_with_wal;
+  db_stats_snapshot_.write_stall_micros = write_stall_micros;
+}
+
+void InternalStats::DumpDBMapStatsWriteStall(
+    std::map<std::string, std::string>* value) {
+  constexpr uint32_t max_db_scope_write_stall_cause =
+      static_cast<uint32_t>(WriteStallCause::kDBScopeWriteStallCauseEnumMax);
+
+  for (uint32_t i =
+           max_db_scope_write_stall_cause - kNumDBScopeWriteStallCauses;
+       i < max_db_scope_write_stall_cause; ++i) {
+    for (uint32_t j = 0;
+         j < static_cast<uint32_t>(WriteStallCondition::kNormal); ++j) {
+      WriteStallCause cause = static_cast<WriteStallCause>(i);
+      WriteStallCondition condition = static_cast<WriteStallCondition>(j);
+      InternalStats::InternalDBStatsType internal_db_stat =
+          InternalDBStat(cause, condition);
+
+      if (internal_db_stat == InternalStats::kIntStatsNumMax) {
+        continue;
+      }
+
+      std::string name =
+          WriteStallStatsMapKeys::CauseConditionCount(cause, condition);
+      uint64_t stat =
+          db_stats_[static_cast<std::size_t>(internal_db_stat)].load(
+              std::memory_order_relaxed);
+      (*value)[name] = std::to_string(stat);
+    }
+  }
+}
+
+void InternalStats::DumpDBStatsWriteStall(std::string* value) {
+  assert(value);
+
+  std::map<std::string, std::string> write_stall_stats_map;
+  DumpDBMapStatsWriteStall(&write_stall_stats_map);
+
+  std::ostringstream str;
+  str << "Write Stall (count): ";
+
+  for (auto write_stall_stats_map_iter = write_stall_stats_map.begin();
+       write_stall_stats_map_iter != write_stall_stats_map.end();
+       write_stall_stats_map_iter++) {
+    const auto& name_and_stat = *write_stall_stats_map_iter;
+    str << name_and_stat.first << ": " << name_and_stat.second;
+    if (std::next(write_stall_stats_map_iter) == write_stall_stats_map.end()) {
+      str << "\n";
+    } else {
+      str << ", ";
+    }
+  }
+  *value = str.str();
+}
+
+/**
+ * Dump Compaction Level stats to a map of stat name with "compaction." prefix
+ * to value in double as string. The level in stat name is represented with
+ * a prefix "Lx" where "x" is the level number. A special level "Sum"
+ * represents the sum of a stat for all levels.
+ * The result also contains IO stall counters which keys start with "io_stalls."
+ * and values represent uint64 encoded as strings.
+ */
+void InternalStats::DumpCFMapStats(
+    std::map<std::string, std::string>* cf_stats) {
+  const VersionStorageInfo* vstorage = cfd_->current()->storage_info();
+  CompactionStats compaction_stats_sum;
+  std::map<int, std::map<LevelStatType, double>> levels_stats;
+  DumpCFMapStats(vstorage, &levels_stats, &compaction_stats_sum);
+  for (auto const& level_ent : levels_stats) {
+    auto level_str =
+        level_ent.first == -1 ? "Sum" : "L" + std::to_string(level_ent.first);
+    for (auto const& stat_ent : level_ent.second) {
+      auto stat_type = stat_ent.first;
+      auto key_str =
+          "compaction." + level_str + "." +
+          InternalStats::compaction_level_stats.at(stat_type).property_name;
+      (*cf_stats)[key_str] = std::to_string(stat_ent.second);
+    }
+  }
+
+  DumpCFMapStatsWriteStall(cf_stats);
+}
+
+void InternalStats::DumpCFMapStats(
+    const VersionStorageInfo* vstorage,
+    std::map<int, std::map<LevelStatType, double>>* levels_stats,
+    CompactionStats* compaction_stats_sum) {
+  assert(vstorage);
+
+  int num_levels_to_check =
+      (cfd_->ioptions()->compaction_style != kCompactionStyleFIFO)
+          ? vstorage->num_levels() - 1
+          : 1;
+
+  // Compaction scores are sorted based on its value. Restore them to the
+  // level order
+  std::vector<double> compaction_score(number_levels_, 0);
+  for (int i = 0; i < num_levels_to_check; ++i) {
+    compaction_score[vstorage->CompactionScoreLevel(i)] =
+        vstorage->CompactionScore(i);
+  }
+  // Count # of files being compacted for each level
+  std::vector<int> files_being_compacted(number_levels_, 0);
+  for (int level = 0; level < number_levels_; ++level) {
+    for (auto* f : vstorage->LevelFiles(level)) {
+      if (f->being_compacted) {
+        ++files_being_compacted[level];
+      }
+    }
+  }
+
+  int total_files = 0;
+  int total_files_being_compacted = 0;
+  double total_file_size = 0;
+  uint64_t flush_ingest = cf_stats_value_[BYTES_FLUSHED];
+  uint64_t add_file_ingest = cf_stats_value_[BYTES_INGESTED_ADD_FILE];
+  uint64_t curr_ingest = flush_ingest + add_file_ingest;
+  for (int level = 0; level < number_levels_; level++) {
+    int files = vstorage->NumLevelFiles(level);
+    total_files += files;
+    total_files_being_compacted += files_being_compacted[level];
+    if (comp_stats_[level].micros > 0 || comp_stats_[level].cpu_micros > 0 ||
+        files > 0) {
+      compaction_stats_sum->Add(comp_stats_[level]);
+      total_file_size += vstorage->NumLevelBytes(level);
+      uint64_t input_bytes;
+      if (level == 0) {
+        input_bytes = curr_ingest;
+      } else {
+        input_bytes = comp_stats_[level].bytes_read_non_output_levels +
+                      comp_stats_[level].bytes_read_blob;
+      }
+      double w_amp =
+          (input_bytes == 0)
+              ? 0.0
+              : static_cast<double>(comp_stats_[level].bytes_written +
+                                    comp_stats_[level].bytes_written_blob) /
+                    input_bytes;
+      std::map<LevelStatType, double> level_stats;
+      PrepareLevelStats(&level_stats, files, files_being_compacted[level],
+                        static_cast<double>(vstorage->NumLevelBytes(level)),
+                        compaction_score[level], w_amp, comp_stats_[level]);
+      (*levels_stats)[level] = level_stats;
+    }
+  }
+  // Cumulative summary
+  double w_amp = (0 == curr_ingest)
+                     ? 0.0
+                     : (compaction_stats_sum->bytes_written +
+                        compaction_stats_sum->bytes_written_blob) /
+                           static_cast<double>(curr_ingest);
+  // Stats summary across levels
+  std::map<LevelStatType, double> sum_stats;
+  PrepareLevelStats(&sum_stats, total_files, total_files_being_compacted,
+                    total_file_size, 0, w_amp, *compaction_stats_sum);
+  (*levels_stats)[-1] = sum_stats;  //  -1 is for the Sum level
+}
+
+void InternalStats::DumpCFMapStatsByPriority(
+    std::map<int, std::map<LevelStatType, double>>* priorities_stats) {
+  for (size_t priority = 0; priority < comp_stats_by_pri_.size(); priority++) {
+    if (comp_stats_by_pri_[priority].micros > 0) {
+      std::map<LevelStatType, double> priority_stats;
+      PrepareLevelStats(&priority_stats, 0 /* num_files */,
+                        0 /* being_compacted */, 0 /* total_file_size */,
+                        0 /* compaction_score */, 0 /* w_amp */,
+                        comp_stats_by_pri_[priority]);
+      (*priorities_stats)[static_cast<int>(priority)] = priority_stats;
+    }
+  }
+}
+
+void InternalStats::DumpCFMapStatsWriteStall(
+    std::map<std::string, std::string>* value) {
+  uint64_t total_delays = 0;
+  uint64_t total_stops = 0;
+  constexpr uint32_t max_cf_scope_write_stall_cause =
+      static_cast<uint32_t>(WriteStallCause::kCFScopeWriteStallCauseEnumMax);
+
+  for (uint32_t i =
+           max_cf_scope_write_stall_cause - kNumCFScopeWriteStallCauses;
+       i < max_cf_scope_write_stall_cause; ++i) {
+    for (uint32_t j = 0;
+         j < static_cast<uint32_t>(WriteStallCondition::kNormal); ++j) {
+      WriteStallCause cause = static_cast<WriteStallCause>(i);
+      WriteStallCondition condition = static_cast<WriteStallCondition>(j);
+      InternalStats::InternalCFStatsType internal_cf_stat =
+          InternalCFStat(cause, condition);
+
+      if (internal_cf_stat == InternalStats::INTERNAL_CF_STATS_ENUM_MAX) {
+        continue;
+      }
+
+      std::string name =
+          WriteStallStatsMapKeys::CauseConditionCount(cause, condition);
+      uint64_t stat =
+          cf_stats_count_[static_cast<std::size_t>(internal_cf_stat)];
+      (*value)[name] = std::to_string(stat);
+
+      if (condition == WriteStallCondition::kDelayed) {
+        total_delays += stat;
+      } else if (condition == WriteStallCondition::kStopped) {
+        total_stops += stat;
+      }
+    }
+  }
+
+  (*value)[WriteStallStatsMapKeys::
+               CFL0FileCountLimitDelaysWithOngoingCompaction()] =
+      std::to_string(
+          cf_stats_count_[L0_FILE_COUNT_LIMIT_DELAYS_WITH_ONGOING_COMPACTION]);
+  (*value)[WriteStallStatsMapKeys::
+               CFL0FileCountLimitStopsWithOngoingCompaction()] =
+      std::to_string(
+          cf_stats_count_[L0_FILE_COUNT_LIMIT_STOPS_WITH_ONGOING_COMPACTION]);
+
+  (*value)[WriteStallStatsMapKeys::TotalStops()] = std::to_string(total_stops);
+  (*value)[WriteStallStatsMapKeys::TotalDelays()] =
+      std::to_string(total_delays);
+}
+
+void InternalStats::DumpCFStatsWriteStall(std::string* value,
+                                          uint64_t* total_stall_count) {
+  assert(value);
+
+  std::map<std::string, std::string> write_stall_stats_map;
+  DumpCFMapStatsWriteStall(&write_stall_stats_map);
+
+  std::ostringstream str;
+  str << "Write Stall (count): ";
+
+  for (auto write_stall_stats_map_iter = write_stall_stats_map.begin();
+       write_stall_stats_map_iter != write_stall_stats_map.end();
+       write_stall_stats_map_iter++) {
+    const auto& name_and_stat = *write_stall_stats_map_iter;
+    str << name_and_stat.first << ": " << name_and_stat.second;
+    if (std::next(write_stall_stats_map_iter) == write_stall_stats_map.end()) {
+      str << "\n";
+    } else {
+      str << ", ";
+    }
+  }
+
+  if (total_stall_count) {
+    *total_stall_count =
+        ParseUint64(
+            write_stall_stats_map[WriteStallStatsMapKeys::TotalStops()]) +
+        ParseUint64(
+            write_stall_stats_map[WriteStallStatsMapKeys::TotalDelays()]);
+    if (*total_stall_count > 0) {
+      str << "interval: " << *total_stall_count - cf_stats_snapshot_.stall_count
+          << " total count\n";
+    }
+  }
+  *value = str.str();
+}
+
+void InternalStats::DumpCFStats(std::string* value) {
+  DumpCFStatsNoFileHistogram(/*is_periodic=*/false, value);
+  DumpCFFileHistogram(value);
+}
+
+void InternalStats::DumpCFStatsNoFileHistogram(bool is_periodic,
+                                               std::string* value) {
+  char buf[2000];
+  // Per-ColumnFamily stats
+  PrintLevelStatsHeader(buf, sizeof(buf), cfd_->GetName(), "Level");
+  value->append(buf);
+
+  // Print stats for each level
+  const VersionStorageInfo* vstorage = cfd_->current()->storage_info();
+  std::map<int, std::map<LevelStatType, double>> levels_stats;
+  CompactionStats compaction_stats_sum;
+  DumpCFMapStats(vstorage, &levels_stats, &compaction_stats_sum);
+  for (int l = 0; l < number_levels_; ++l) {
+    if (levels_stats.find(l) != levels_stats.end()) {
+      PrintLevelStats(buf, sizeof(buf), "L" + std::to_string(l),
+                      levels_stats[l]);
+      value->append(buf);
+    }
+  }
+
+  // Print sum of level stats
+  PrintLevelStats(buf, sizeof(buf), "Sum", levels_stats[-1]);
+  value->append(buf);
+
+  uint64_t flush_ingest = cf_stats_value_[BYTES_FLUSHED];
+  uint64_t add_file_ingest = cf_stats_value_[BYTES_INGESTED_ADD_FILE];
+  uint64_t ingest_files_addfile = cf_stats_value_[INGESTED_NUM_FILES_TOTAL];
+  uint64_t ingest_l0_files_addfile =
+      cf_stats_value_[INGESTED_LEVEL0_NUM_FILES_TOTAL];
+  uint64_t ingest_keys_addfile = cf_stats_value_[INGESTED_NUM_KEYS_TOTAL];
+  // Interval summary
+  uint64_t interval_flush_ingest =
+      flush_ingest - cf_stats_snapshot_.ingest_bytes_flush;
+  uint64_t interval_add_file_inget =
+      add_file_ingest - cf_stats_snapshot_.ingest_bytes_addfile;
+  uint64_t interval_ingest =
+      interval_flush_ingest + interval_add_file_inget + 1;
+  CompactionStats interval_stats(compaction_stats_sum);
+  interval_stats.Subtract(cf_stats_snapshot_.comp_stats);
+  double w_amp =
+      (interval_stats.bytes_written + interval_stats.bytes_written_blob) /
+      static_cast<double>(interval_ingest);
+  PrintLevelStats(buf, sizeof(buf), "Int", 0, 0, 0, 0, w_amp, interval_stats);
+  value->append(buf);
+
+  PrintLevelStatsHeader(buf, sizeof(buf), cfd_->GetName(), "Priority");
+  value->append(buf);
+  std::map<int, std::map<LevelStatType, double>> priorities_stats;
+  DumpCFMapStatsByPriority(&priorities_stats);
+  for (size_t priority = 0; priority < comp_stats_by_pri_.size(); ++priority) {
+    if (priorities_stats.find(static_cast<int>(priority)) !=
+        priorities_stats.end()) {
+      PrintLevelStats(
+          buf, sizeof(buf),
+          Env::PriorityToString(static_cast<Env::Priority>(priority)),
+          priorities_stats[static_cast<int>(priority)]);
+      value->append(buf);
+    }
+  }
+
+  const auto blob_st = vstorage->GetBlobStats();
+
+  snprintf(buf, sizeof(buf),
+           "\nBlob file count: %" ROCKSDB_PRIszt
+           ", total size: %.1f GB, garbage size: %.1f GB, space amp: %.1f\n\n",
+           vstorage->GetBlobFiles().size(), blob_st.total_file_size / kGB,
+           blob_st.total_garbage_size / kGB, blob_st.space_amp);
+  value->append(buf);
+
+  uint64_t now_micros = clock_->NowMicros();
+  double seconds_up = (now_micros - started_at_) / kMicrosInSec;
+  double interval_seconds_up = seconds_up - cf_stats_snapshot_.seconds_up;
+  snprintf(buf, sizeof(buf), "Uptime(secs): %.1f total, %.1f interval\n",
+           seconds_up, interval_seconds_up);
+  value->append(buf);
+  snprintf(buf, sizeof(buf), "Flush(GB): cumulative %.3f, interval %.3f\n",
+           flush_ingest / kGB, interval_flush_ingest / kGB);
+  value->append(buf);
+  snprintf(buf, sizeof(buf), "AddFile(GB): cumulative %.3f, interval %.3f\n",
+           add_file_ingest / kGB, interval_add_file_inget / kGB);
+  value->append(buf);
+
+  uint64_t interval_ingest_files_addfile =
+      ingest_files_addfile - cf_stats_snapshot_.ingest_files_addfile;
+  snprintf(buf, sizeof(buf),
+           "AddFile(Total Files): cumulative %" PRIu64 ", interval %" PRIu64
+           "\n",
+           ingest_files_addfile, interval_ingest_files_addfile);
+  value->append(buf);
+
+  uint64_t interval_ingest_l0_files_addfile =
+      ingest_l0_files_addfile - cf_stats_snapshot_.ingest_l0_files_addfile;
+  snprintf(buf, sizeof(buf),
+           "AddFile(L0 Files): cumulative %" PRIu64 ", interval %" PRIu64 "\n",
+           ingest_l0_files_addfile, interval_ingest_l0_files_addfile);
+  value->append(buf);
+
+  uint64_t interval_ingest_keys_addfile =
+      ingest_keys_addfile - cf_stats_snapshot_.ingest_keys_addfile;
+  snprintf(buf, sizeof(buf),
+           "AddFile(Keys): cumulative %" PRIu64 ", interval %" PRIu64 "\n",
+           ingest_keys_addfile, interval_ingest_keys_addfile);
+  value->append(buf);
+
+  // Compact
+  uint64_t compact_bytes_read = 0;
+  uint64_t compact_bytes_write = 0;
+  uint64_t compact_micros = 0;
+  for (int level = 0; level < number_levels_; level++) {
+    compact_bytes_read += comp_stats_[level].bytes_read_output_level +
+                          comp_stats_[level].bytes_read_non_output_levels +
+                          comp_stats_[level].bytes_read_blob;
+    compact_bytes_write += comp_stats_[level].bytes_written +
+                           comp_stats_[level].bytes_written_blob;
+    compact_micros += comp_stats_[level].micros;
+  }
+
+  snprintf(buf, sizeof(buf),
+           "Cumulative compaction: %.2f GB write, %.2f MB/s write, "
+           "%.2f GB read, %.2f MB/s read, %.1f seconds\n",
+           compact_bytes_write / kGB,
+           compact_bytes_write / kMB / std::max(seconds_up, 0.001),
+           compact_bytes_read / kGB,
+           compact_bytes_read / kMB / std::max(seconds_up, 0.001),
+           compact_micros / kMicrosInSec);
+  value->append(buf);
+
+  // Compaction interval
+  uint64_t interval_compact_bytes_write =
+      compact_bytes_write - cf_stats_snapshot_.compact_bytes_write;
+  uint64_t interval_compact_bytes_read =
+      compact_bytes_read - cf_stats_snapshot_.compact_bytes_read;
+  uint64_t interval_compact_micros =
+      compact_micros - cf_stats_snapshot_.compact_micros;
+
+  snprintf(
+      buf, sizeof(buf),
+      "Interval compaction: %.2f GB write, %.2f MB/s write, "
+      "%.2f GB read, %.2f MB/s read, %.1f seconds\n",
+      interval_compact_bytes_write / kGB,
+      interval_compact_bytes_write / kMB / std::max(interval_seconds_up, 0.001),
+      interval_compact_bytes_read / kGB,
+      interval_compact_bytes_read / kMB / std::max(interval_seconds_up, 0.001),
+      interval_compact_micros / kMicrosInSec);
+  value->append(buf);
+  if (is_periodic) {
+    cf_stats_snapshot_.compact_bytes_write = compact_bytes_write;
+    cf_stats_snapshot_.compact_bytes_read = compact_bytes_read;
+    cf_stats_snapshot_.compact_micros = compact_micros;
+  }
+
+  std::string write_stall_stats;
+  uint64_t total_stall_count;
+  DumpCFStatsWriteStall(&write_stall_stats, &total_stall_count);
+  value->append(write_stall_stats);
+
+  if (is_periodic) {
+    cf_stats_snapshot_.seconds_up = seconds_up;
+    cf_stats_snapshot_.ingest_bytes_flush = flush_ingest;
+    cf_stats_snapshot_.ingest_bytes_addfile = add_file_ingest;
+    cf_stats_snapshot_.ingest_files_addfile = ingest_files_addfile;
+    cf_stats_snapshot_.ingest_l0_files_addfile = ingest_l0_files_addfile;
+    cf_stats_snapshot_.ingest_keys_addfile = ingest_keys_addfile;
+    cf_stats_snapshot_.comp_stats = compaction_stats_sum;
+    cf_stats_snapshot_.stall_count = total_stall_count;
+  }
+
+  // Do not gather cache entry stats during CFStats because DB
+  // mutex is held. Only dump last cached collection (rely on DB
+  // periodic stats dump to update)
+  if (cache_entry_stats_collector_) {
+    CacheEntryRoleStats stats;
+    // thread safe
+    cache_entry_stats_collector_->GetStats(&stats);
+
+    constexpr uint64_t kDayInMicros = uint64_t{86400} * 1000000U;
+
+    // Skip if stats are extremely old (> 1 day, incl not yet populated)
+    if (now_micros - stats.last_end_time_micros_ < kDayInMicros) {
+      value->append(stats.ToString(clock_));
+    }
+  }
+}
+
+void InternalStats::DumpCFFileHistogram(std::string* value) {
+  assert(value);
+  assert(cfd_);
+
+  std::ostringstream oss;
+  oss << "\n** File Read Latency Histogram By Level [" << cfd_->GetName()
+      << "] **\n";
+
+  for (int level = 0; level < number_levels_; level++) {
+    if (!file_read_latency_[level].Empty()) {
+      oss << "** Level " << level << " read latency histogram (micros):\n"
+          << file_read_latency_[level].ToString() << '\n';
+    }
+  }
+
+  if (!blob_file_read_latency_.Empty()) {
+    oss << "** Blob file read latency histogram (micros):\n"
+        << blob_file_read_latency_.ToString() << '\n';
+  }
+
+  value->append(oss.str());
+}
+
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/internal_stats.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/internal_stats.h
new file mode 100644
index 0000000000..a65ef629aa
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/internal_stats.h
@@ -0,0 +1,875 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "cache/cache_entry_roles.h"
+#include "db/version_set.h"
+#include "rocksdb/system_clock.h"
+#include "util/hash_containers.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+template <class Stats>
+class CacheEntryStatsCollector;
+class DBImpl;
+class MemTableList;
+
+// Config for retrieving a property's value.
+struct DBPropertyInfo {
+  bool need_out_of_mutex;
+
+  // gcc had an internal error for initializing union of pointer-to-member-
+  // functions. Workaround is to populate exactly one of the following function
+  // pointers with a non-nullptr value.
+
+  // @param value Value-result argument for storing the property's string value
+  // @param suffix Argument portion of the property. For example, suffix would
+  //      be "5" for the property "rocksdb.num-files-at-level5". So far, only
+  //      certain string properties take an argument.
+  bool (InternalStats::*handle_string)(std::string* value, Slice suffix);
+
+  // @param value Value-result argument for storing the property's uint64 value
+  // @param db Many of the int properties rely on DBImpl methods.
+  // @param version Version is needed in case the property is retrieved without
+  //      holding db mutex, which is only supported for int properties.
+  bool (InternalStats::*handle_int)(uint64_t* value, DBImpl* db,
+                                    Version* version);
+
+  // @param props Map of general properties to populate
+  // @param suffix Argument portion of the property. (see handle_string)
+  bool (InternalStats::*handle_map)(std::map<std::string, std::string>* props,
+                                    Slice suffix);
+
+  // handle the string type properties rely on DBImpl methods
+  // @param value Value-result argument for storing the property's string value
+  bool (DBImpl::*handle_string_dbimpl)(std::string* value);
+};
+
+extern const DBPropertyInfo* GetPropertyInfo(const Slice& property);
+
+#undef SCORE
+enum class LevelStatType {
+  INVALID = 0,
+  NUM_FILES,
+  COMPACTED_FILES,
+  SIZE_BYTES,
+  SCORE,
+  READ_GB,
+  RN_GB,
+  RNP1_GB,
+  WRITE_GB,
+  W_NEW_GB,
+  MOVED_GB,
+  WRITE_AMP,
+  READ_MBPS,
+  WRITE_MBPS,
+  COMP_SEC,
+  COMP_CPU_SEC,
+  COMP_COUNT,
+  AVG_SEC,
+  KEY_IN,
+  KEY_DROP,
+  R_BLOB_GB,
+  W_BLOB_GB,
+  TOTAL  // total number of types
+};
+
+struct LevelStat {
+  // This what will be L?.property_name in the flat map returned to the user
+  std::string property_name;
+  // This will be what we will print in the header in the cli
+  std::string header_name;
+};
+
+struct DBStatInfo {
+  // This what will be property_name in the flat map returned to the user
+  std::string property_name;
+};
+
+class InternalStats {
+ public:
+  static const std::map<LevelStatType, LevelStat> compaction_level_stats;
+
+  enum InternalCFStatsType {
+    MEMTABLE_LIMIT_DELAYS,
+    MEMTABLE_LIMIT_STOPS,
+    L0_FILE_COUNT_LIMIT_DELAYS,
+    L0_FILE_COUNT_LIMIT_STOPS,
+    PENDING_COMPACTION_BYTES_LIMIT_DELAYS,
+    PENDING_COMPACTION_BYTES_LIMIT_STOPS,
+    // Write slowdown caused by l0 file count limit while there is ongoing L0
+    // compaction
+    L0_FILE_COUNT_LIMIT_DELAYS_WITH_ONGOING_COMPACTION,
+    // Write stop caused by l0 file count limit while there is ongoing L0
+    // compaction
+    L0_FILE_COUNT_LIMIT_STOPS_WITH_ONGOING_COMPACTION,
+    WRITE_STALLS_ENUM_MAX,
+    // End of all write stall stats
+    BYTES_FLUSHED,
+    BYTES_INGESTED_ADD_FILE,
+    INGESTED_NUM_FILES_TOTAL,
+    INGESTED_LEVEL0_NUM_FILES_TOTAL,
+    INGESTED_NUM_KEYS_TOTAL,
+    INTERNAL_CF_STATS_ENUM_MAX,
+  };
+
+  enum InternalDBStatsType {
+    kIntStatsWalFileBytes,
+    kIntStatsWalFileSynced,
+    kIntStatsBytesWritten,
+    kIntStatsNumKeysWritten,
+    kIntStatsWriteDoneByOther,
+    kIntStatsWriteDoneBySelf,
+    kIntStatsWriteWithWal,
+    // TODO(hx235): Currently `kIntStatsWriteStallMicros` only measures
+    // "delayed" time of CF-scope write stalls, not including the "stopped" time
+    // nor any DB-scope write stalls (e.g, ones triggered by
+    // `WriteBufferManager`).
+    //
+    // However, the word "write stall" includes both "delayed" and "stopped"
+    // (see `WriteStallCondition`) and DB-scope writes stalls (see
+    // `WriteStallCause`).
+    //
+    // So we should improve, rename or clarify it
+    kIntStatsWriteStallMicros,
+    kIntStatsWriteBufferManagerLimitStopsCounts,
+    kIntStatsNumMax,
+  };
+
+  static const std::map<InternalDBStatsType, DBStatInfo> db_stats_type_to_info;
+
+  InternalStats(int num_levels, SystemClock* clock, ColumnFamilyData* cfd);
+
+  // Per level compaction stats
+  struct CompactionOutputsStats {
+    uint64_t num_output_records = 0;
+    uint64_t bytes_written = 0;
+    uint64_t bytes_written_blob = 0;
+    uint64_t num_output_files = 0;
+    uint64_t num_output_files_blob = 0;
+
+    void Add(const CompactionOutputsStats& stats) {
+      this->num_output_records += stats.num_output_records;
+      this->bytes_written += stats.bytes_written;
+      this->bytes_written_blob += stats.bytes_written_blob;
+      this->num_output_files += stats.num_output_files;
+      this->num_output_files_blob += stats.num_output_files_blob;
+    }
+  };
+
+  // Per level compaction stats.  comp_stats_[level] stores the stats for
+  // compactions that produced data for the specified "level".
+  struct CompactionStats {
+    uint64_t micros;
+    uint64_t cpu_micros;
+
+    // The number of bytes read from all non-output levels (table files)
+    uint64_t bytes_read_non_output_levels;
+
+    // The number of bytes read from the compaction output level (table files)
+    uint64_t bytes_read_output_level;
+
+    // The number of bytes read from blob files
+    uint64_t bytes_read_blob;
+
+    // Total number of bytes written to table files during compaction
+    uint64_t bytes_written;
+
+    // Total number of bytes written to blob files during compaction
+    uint64_t bytes_written_blob;
+
+    // Total number of bytes moved to the output level (table files)
+    uint64_t bytes_moved;
+
+    // The number of compaction input files in all non-output levels (table
+    // files)
+    int num_input_files_in_non_output_levels;
+
+    // The number of compaction input files in the output level (table files)
+    int num_input_files_in_output_level;
+
+    // The number of compaction output files (table files)
+    int num_output_files;
+
+    // The number of compaction output files (blob files)
+    int num_output_files_blob;
+
+    // Total incoming entries during compaction between levels N and N+1
+    uint64_t num_input_records;
+
+    // Accumulated diff number of entries
+    // (num input entries - num output entries) for compaction levels N and N+1
+    uint64_t num_dropped_records;
+
+    // Total output entries from compaction
+    uint64_t num_output_records;
+
+    // Number of compactions done
+    int count;
+
+    // Number of compactions done per CompactionReason
+    int counts[static_cast<int>(CompactionReason::kNumOfReasons)]{};
+
+    explicit CompactionStats()
+        : micros(0),
+          cpu_micros(0),
+          bytes_read_non_output_levels(0),
+          bytes_read_output_level(0),
+          bytes_read_blob(0),
+          bytes_written(0),
+          bytes_written_blob(0),
+          bytes_moved(0),
+          num_input_files_in_non_output_levels(0),
+          num_input_files_in_output_level(0),
+          num_output_files(0),
+          num_output_files_blob(0),
+          num_input_records(0),
+          num_dropped_records(0),
+          num_output_records(0),
+          count(0) {
+      int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
+      for (int i = 0; i < num_of_reasons; i++) {
+        counts[i] = 0;
+      }
+    }
+
+    explicit CompactionStats(CompactionReason reason, int c)
+        : micros(0),
+          cpu_micros(0),
+          bytes_read_non_output_levels(0),
+          bytes_read_output_level(0),
+          bytes_read_blob(0),
+          bytes_written(0),
+          bytes_written_blob(0),
+          bytes_moved(0),
+          num_input_files_in_non_output_levels(0),
+          num_input_files_in_output_level(0),
+          num_output_files(0),
+          num_output_files_blob(0),
+          num_input_records(0),
+          num_dropped_records(0),
+          num_output_records(0),
+          count(c) {
+      int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
+      for (int i = 0; i < num_of_reasons; i++) {
+        counts[i] = 0;
+      }
+      int r = static_cast<int>(reason);
+      if (r >= 0 && r < num_of_reasons) {
+        counts[r] = c;
+      } else {
+        count = 0;
+      }
+    }
+
+    CompactionStats(const CompactionStats& c)
+        : micros(c.micros),
+          cpu_micros(c.cpu_micros),
+          bytes_read_non_output_levels(c.bytes_read_non_output_levels),
+          bytes_read_output_level(c.bytes_read_output_level),
+          bytes_read_blob(c.bytes_read_blob),
+          bytes_written(c.bytes_written),
+          bytes_written_blob(c.bytes_written_blob),
+          bytes_moved(c.bytes_moved),
+          num_input_files_in_non_output_levels(
+              c.num_input_files_in_non_output_levels),
+          num_input_files_in_output_level(c.num_input_files_in_output_level),
+          num_output_files(c.num_output_files),
+          num_output_files_blob(c.num_output_files_blob),
+          num_input_records(c.num_input_records),
+          num_dropped_records(c.num_dropped_records),
+          num_output_records(c.num_output_records),
+          count(c.count) {
+      int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
+      for (int i = 0; i < num_of_reasons; i++) {
+        counts[i] = c.counts[i];
+      }
+    }
+
+    CompactionStats& operator=(const CompactionStats& c) {
+      micros = c.micros;
+      cpu_micros = c.cpu_micros;
+      bytes_read_non_output_levels = c.bytes_read_non_output_levels;
+      bytes_read_output_level = c.bytes_read_output_level;
+      bytes_read_blob = c.bytes_read_blob;
+      bytes_written = c.bytes_written;
+      bytes_written_blob = c.bytes_written_blob;
+      bytes_moved = c.bytes_moved;
+      num_input_files_in_non_output_levels =
+          c.num_input_files_in_non_output_levels;
+      num_input_files_in_output_level = c.num_input_files_in_output_level;
+      num_output_files = c.num_output_files;
+      num_output_files_blob = c.num_output_files_blob;
+      num_input_records = c.num_input_records;
+      num_dropped_records = c.num_dropped_records;
+      num_output_records = c.num_output_records;
+      count = c.count;
+
+      int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
+      for (int i = 0; i < num_of_reasons; i++) {
+        counts[i] = c.counts[i];
+      }
+      return *this;
+    }
+
+    void Clear() {
+      this->micros = 0;
+      this->cpu_micros = 0;
+      this->bytes_read_non_output_levels = 0;
+      this->bytes_read_output_level = 0;
+      this->bytes_read_blob = 0;
+      this->bytes_written = 0;
+      this->bytes_written_blob = 0;
+      this->bytes_moved = 0;
+      this->num_input_files_in_non_output_levels = 0;
+      this->num_input_files_in_output_level = 0;
+      this->num_output_files = 0;
+      this->num_output_files_blob = 0;
+      this->num_input_records = 0;
+      this->num_dropped_records = 0;
+      this->num_output_records = 0;
+      this->count = 0;
+      int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
+      for (int i = 0; i < num_of_reasons; i++) {
+        counts[i] = 0;
+      }
+    }
+
+    void Add(const CompactionStats& c) {
+      this->micros += c.micros;
+      this->cpu_micros += c.cpu_micros;
+      this->bytes_read_non_output_levels += c.bytes_read_non_output_levels;
+      this->bytes_read_output_level += c.bytes_read_output_level;
+      this->bytes_read_blob += c.bytes_read_blob;
+      this->bytes_written += c.bytes_written;
+      this->bytes_written_blob += c.bytes_written_blob;
+      this->bytes_moved += c.bytes_moved;
+      this->num_input_files_in_non_output_levels +=
+          c.num_input_files_in_non_output_levels;
+      this->num_input_files_in_output_level +=
+          c.num_input_files_in_output_level;
+      this->num_output_files += c.num_output_files;
+      this->num_output_files_blob += c.num_output_files_blob;
+      this->num_input_records += c.num_input_records;
+      this->num_dropped_records += c.num_dropped_records;
+      this->num_output_records += c.num_output_records;
+      this->count += c.count;
+      int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
+      for (int i = 0; i < num_of_reasons; i++) {
+        counts[i] += c.counts[i];
+      }
+    }
+
+    void Add(const CompactionOutputsStats& stats) {
+      this->num_output_files += static_cast<int>(stats.num_output_files);
+      this->num_output_records += stats.num_output_records;
+      this->bytes_written += stats.bytes_written;
+      this->bytes_written_blob += stats.bytes_written_blob;
+      this->num_output_files_blob +=
+          static_cast<int>(stats.num_output_files_blob);
+    }
+
+    void Subtract(const CompactionStats& c) {
+      this->micros -= c.micros;
+      this->cpu_micros -= c.cpu_micros;
+      this->bytes_read_non_output_levels -= c.bytes_read_non_output_levels;
+      this->bytes_read_output_level -= c.bytes_read_output_level;
+      this->bytes_read_blob -= c.bytes_read_blob;
+      this->bytes_written -= c.bytes_written;
+      this->bytes_written_blob -= c.bytes_written_blob;
+      this->bytes_moved -= c.bytes_moved;
+      this->num_input_files_in_non_output_levels -=
+          c.num_input_files_in_non_output_levels;
+      this->num_input_files_in_output_level -=
+          c.num_input_files_in_output_level;
+      this->num_output_files -= c.num_output_files;
+      this->num_output_files_blob -= c.num_output_files_blob;
+      this->num_input_records -= c.num_input_records;
+      this->num_dropped_records -= c.num_dropped_records;
+      this->num_output_records -= c.num_output_records;
+      this->count -= c.count;
+      int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
+      for (int i = 0; i < num_of_reasons; i++) {
+        counts[i] -= c.counts[i];
+      }
+    }
+
+    void ResetCompactionReason(CompactionReason reason) {
+      int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
+      assert(count == 1);  // only support update one compaction reason
+      for (int i = 0; i < num_of_reasons; i++) {
+        counts[i] = 0;
+      }
+      int r = static_cast<int>(reason);
+      assert(r >= 0 && r < num_of_reasons);
+      counts[r] = 1;
+    }
+  };
+
+  // Compaction stats, for per_key_placement compaction, it includes 2 levels
+  // stats: the last level and the penultimate level.
+  struct CompactionStatsFull {
+    // the stats for the target primary output level
+    CompactionStats stats;
+
+    // stats for penultimate level output if exist
+    bool has_penultimate_level_output = false;
+    CompactionStats penultimate_level_stats;
+
+    explicit CompactionStatsFull() : stats(), penultimate_level_stats() {}
+
+    explicit CompactionStatsFull(CompactionReason reason, int c)
+        : stats(reason, c), penultimate_level_stats(reason, c){};
+
+    uint64_t TotalBytesWritten() const {
+      uint64_t bytes_written = stats.bytes_written + stats.bytes_written_blob;
+      if (has_penultimate_level_output) {
+        bytes_written += penultimate_level_stats.bytes_written +
+                         penultimate_level_stats.bytes_written_blob;
+      }
+      return bytes_written;
+    }
+
+    uint64_t DroppedRecords() {
+      uint64_t output_records = stats.num_output_records;
+      if (has_penultimate_level_output) {
+        output_records += penultimate_level_stats.num_output_records;
+      }
+      if (stats.num_input_records > output_records) {
+        return stats.num_input_records - output_records;
+      }
+      return 0;
+    }
+
+    void SetMicros(uint64_t val) {
+      stats.micros = val;
+      penultimate_level_stats.micros = val;
+    }
+
+    void AddCpuMicros(uint64_t val) {
+      stats.cpu_micros += val;
+      penultimate_level_stats.cpu_micros += val;
+    }
+  };
+
+  // For use with CacheEntryStatsCollector
+  struct CacheEntryRoleStats {
+    uint64_t cache_capacity = 0;
+    uint64_t cache_usage = 0;
+    size_t table_size = 0;
+    size_t occupancy = 0;
+    std::string cache_id;
+    std::array<uint64_t, kNumCacheEntryRoles> total_charges;
+    std::array<size_t, kNumCacheEntryRoles> entry_counts;
+    uint32_t collection_count = 0;
+    uint32_t copies_of_last_collection = 0;
+    uint64_t last_start_time_micros_ = 0;
+    uint64_t last_end_time_micros_ = 0;
+    uint32_t hash_seed = 0;
+
+    void Clear() {
+      // Wipe everything except collection_count
+      uint32_t saved_collection_count = collection_count;
+      *this = CacheEntryRoleStats();
+      collection_count = saved_collection_count;
+    }
+
+    void BeginCollection(Cache*, SystemClock*, uint64_t start_time_micros);
+    std::function<void(const Slice& key, Cache::ObjectPtr value, size_t charge,
+                       const Cache::CacheItemHelper* helper)>
+    GetEntryCallback();
+    void EndCollection(Cache*, SystemClock*, uint64_t end_time_micros);
+    void SkippedCollection();
+
+    std::string ToString(SystemClock* clock) const;
+    void ToMap(std::map<std::string, std::string>* values,
+               SystemClock* clock) const;
+
+   private:
+    uint64_t GetLastDurationMicros() const;
+  };
+
+  void Clear() {
+    for (int i = 0; i < kIntStatsNumMax; i++) {
+      db_stats_[i].store(0);
+    }
+    for (int i = 0; i < INTERNAL_CF_STATS_ENUM_MAX; i++) {
+      cf_stats_count_[i] = 0;
+      cf_stats_value_[i] = 0;
+    }
+    for (auto& comp_stat : comp_stats_) {
+      comp_stat.Clear();
+    }
+    per_key_placement_comp_stats_.Clear();
+    for (auto& h : file_read_latency_) {
+      h.Clear();
+    }
+    blob_file_read_latency_.Clear();
+    cf_stats_snapshot_.Clear();
+    db_stats_snapshot_.Clear();
+    bg_error_count_ = 0;
+    started_at_ = clock_->NowMicros();
+    has_cf_change_since_dump_ = true;
+  }
+
+  void AddCompactionStats(int level, Env::Priority thread_pri,
+                          const CompactionStats& stats) {
+    comp_stats_[level].Add(stats);
+    comp_stats_by_pri_[thread_pri].Add(stats);
+  }
+
+  void AddCompactionStats(int level, Env::Priority thread_pri,
+                          const CompactionStatsFull& comp_stats_full) {
+    AddCompactionStats(level, thread_pri, comp_stats_full.stats);
+    if (comp_stats_full.has_penultimate_level_output) {
+      per_key_placement_comp_stats_.Add(
+          comp_stats_full.penultimate_level_stats);
+    }
+  }
+
+  void IncBytesMoved(int level, uint64_t amount) {
+    comp_stats_[level].bytes_moved += amount;
+  }
+
+  void AddCFStats(InternalCFStatsType type, uint64_t value) {
+    has_cf_change_since_dump_ = true;
+    cf_stats_value_[type] += value;
+    ++cf_stats_count_[type];
+  }
+
+  void AddDBStats(InternalDBStatsType type, uint64_t value,
+                  bool concurrent = false) {
+    auto& v = db_stats_[type];
+    if (concurrent) {
+      v.fetch_add(value, std::memory_order_relaxed);
+    } else {
+      v.store(v.load(std::memory_order_relaxed) + value,
+              std::memory_order_relaxed);
+    }
+  }
+
+  uint64_t GetDBStats(InternalDBStatsType type) {
+    return db_stats_[type].load(std::memory_order_relaxed);
+  }
+
+  HistogramImpl* GetFileReadHist(int level) {
+    return &file_read_latency_[level];
+  }
+
+  HistogramImpl* GetBlobFileReadHist() { return &blob_file_read_latency_; }
+
+  uint64_t GetBackgroundErrorCount() const { return bg_error_count_; }
+
+  uint64_t BumpAndGetBackgroundErrorCount() { return ++bg_error_count_; }
+
+  bool GetStringProperty(const DBPropertyInfo& property_info,
+                         const Slice& property, std::string* value);
+
+  bool GetMapProperty(const DBPropertyInfo& property_info,
+                      const Slice& property,
+                      std::map<std::string, std::string>* value);
+
+  bool GetIntProperty(const DBPropertyInfo& property_info, uint64_t* value,
+                      DBImpl* db);
+
+  bool GetIntPropertyOutOfMutex(const DBPropertyInfo& property_info,
+                                Version* version, uint64_t* value);
+
+  // Unless there is a recent enough collection of the stats, collect and
+  // saved new cache entry stats. If `foreground`, require data to be more
+  // recent to skip re-collection.
+  //
+  // This should only be called while NOT holding the DB mutex.
+  void CollectCacheEntryStats(bool foreground);
+
+  const uint64_t* TEST_GetCFStatsValue() const { return cf_stats_value_; }
+
+  const std::vector<CompactionStats>& TEST_GetCompactionStats() const {
+    return comp_stats_;
+  }
+
+  const CompactionStats& TEST_GetPerKeyPlacementCompactionStats() const {
+    return per_key_placement_comp_stats_;
+  }
+
+  void TEST_GetCacheEntryRoleStats(CacheEntryRoleStats* stats, bool foreground);
+
+  // Store a mapping from the user-facing DB::Properties string to our
+  // DBPropertyInfo struct used internally for retrieving properties.
+  static const UnorderedMap<std::string, DBPropertyInfo> ppt_name_to_info;
+
+  static const std::string kPeriodicCFStats;
+
+ private:
+  void DumpDBMapStats(std::map<std::string, std::string>* db_stats);
+  void DumpDBStats(std::string* value);
+
+  void DumpDBMapStatsWriteStall(std::map<std::string, std::string>* value);
+  void DumpDBStatsWriteStall(std::string* value);
+
+  void DumpCFMapStats(std::map<std::string, std::string>* cf_stats);
+  void DumpCFMapStats(
+      const VersionStorageInfo* vstorage,
+      std::map<int, std::map<LevelStatType, double>>* level_stats,
+      CompactionStats* compaction_stats_sum);
+  void DumpCFMapStatsByPriority(
+      std::map<int, std::map<LevelStatType, double>>* priorities_stats);
+  void DumpCFStats(std::string* value);
+  // if is_periodic = true, it is an internal call by RocksDB periodically to
+  // dump the status.
+  void DumpCFStatsNoFileHistogram(bool is_periodic, std::string* value);
+  // if is_periodic = true, it is an internal call by RocksDB periodically to
+  // dump the status.
+  void DumpCFFileHistogram(std::string* value);
+
+  void DumpCFMapStatsWriteStall(std::map<std::string, std::string>* value);
+  void DumpCFStatsWriteStall(std::string* value,
+                             uint64_t* total_stall_count = nullptr);
+
+  Cache* GetBlockCacheForStats();
+  Cache* GetBlobCacheForStats();
+
+  // Per-DB stats
+  std::atomic<uint64_t> db_stats_[kIntStatsNumMax];
+  // Per-ColumnFamily stats
+  uint64_t cf_stats_value_[INTERNAL_CF_STATS_ENUM_MAX];
+  uint64_t cf_stats_count_[INTERNAL_CF_STATS_ENUM_MAX];
+  // Initialize/reference the collector in constructor so that we don't need
+  // additional synchronization in InternalStats, relying on synchronization
+  // in CacheEntryStatsCollector::GetStats. This collector is pinned in cache
+  // (through a shared_ptr) so that it does not get immediately ejected from
+  // a full cache, which would force a re-scan on the next GetStats.
+  std::shared_ptr<CacheEntryStatsCollector<CacheEntryRoleStats>>
+      cache_entry_stats_collector_;
+  // Per-ColumnFamily/level compaction stats
+  std::vector<CompactionStats> comp_stats_;
+  std::vector<CompactionStats> comp_stats_by_pri_;
+  CompactionStats per_key_placement_comp_stats_;
+  std::vector<HistogramImpl> file_read_latency_;
+  HistogramImpl blob_file_read_latency_;
+  bool has_cf_change_since_dump_;
+  // How many periods of no change since the last time stats are dumped for
+  // a periodic dump.
+  int no_cf_change_period_since_dump_ = 0;
+  uint64_t last_histogram_num = std::numeric_limits<uint64_t>::max();
+  static const int kMaxNoChangePeriodSinceDump;
+
+  // Used to compute per-interval statistics
+  struct CFStatsSnapshot {
+    // ColumnFamily-level stats
+    CompactionStats comp_stats;
+    uint64_t ingest_bytes_flush;  // Bytes written to L0 (Flush)
+    uint64_t stall_count;         // Total counts of CF-scope write stalls
+    // Stats from compaction jobs - bytes written, bytes read, duration.
+    uint64_t compact_bytes_write;
+    uint64_t compact_bytes_read;
+    uint64_t compact_micros;
+    double seconds_up;
+
+    // AddFile specific stats
+    uint64_t ingest_bytes_addfile;     // Total Bytes ingested
+    uint64_t ingest_files_addfile;     // Total number of files ingested
+    uint64_t ingest_l0_files_addfile;  // Total number of files ingested to L0
+    uint64_t ingest_keys_addfile;      // Total number of keys ingested
+
+    CFStatsSnapshot()
+        : ingest_bytes_flush(0),
+          stall_count(0),
+          compact_bytes_write(0),
+          compact_bytes_read(0),
+          compact_micros(0),
+          seconds_up(0),
+          ingest_bytes_addfile(0),
+          ingest_files_addfile(0),
+          ingest_l0_files_addfile(0),
+          ingest_keys_addfile(0) {}
+
+    void Clear() {
+      comp_stats.Clear();
+      ingest_bytes_flush = 0;
+      stall_count = 0;
+      compact_bytes_write = 0;
+      compact_bytes_read = 0;
+      compact_micros = 0;
+      seconds_up = 0;
+      ingest_bytes_addfile = 0;
+      ingest_files_addfile = 0;
+      ingest_l0_files_addfile = 0;
+      ingest_keys_addfile = 0;
+    }
+  } cf_stats_snapshot_;
+
+  struct DBStatsSnapshot {
+    // DB-level stats
+    uint64_t ingest_bytes;    // Bytes written by user
+    uint64_t wal_bytes;       // Bytes written to WAL
+    uint64_t wal_synced;      // Number of times WAL is synced
+    uint64_t write_with_wal;  // Number of writes that request WAL
+    // These count the number of writes processed by the calling thread or
+    // another thread.
+    uint64_t write_other;
+    uint64_t write_self;
+    // Total number of keys written. write_self and write_other measure number
+    // of write requests written, Each of the write request can contain updates
+    // to multiple keys. num_keys_written is total number of keys updated by all
+    // those writes.
+    uint64_t num_keys_written;
+    // Total time writes delayed by stalls.
+    uint64_t write_stall_micros;
+    double seconds_up;
+
+    DBStatsSnapshot()
+        : ingest_bytes(0),
+          wal_bytes(0),
+          wal_synced(0),
+          write_with_wal(0),
+          write_other(0),
+          write_self(0),
+          num_keys_written(0),
+          write_stall_micros(0),
+          seconds_up(0) {}
+
+    void Clear() {
+      ingest_bytes = 0;
+      wal_bytes = 0;
+      wal_synced = 0;
+      write_with_wal = 0;
+      write_other = 0;
+      write_self = 0;
+      num_keys_written = 0;
+      write_stall_micros = 0;
+      seconds_up = 0;
+    }
+  } db_stats_snapshot_;
+
+  // Handler functions for getting property values. They use "value" as a value-
+  // result argument, and return true upon successfully setting "value".
+  bool HandleNumFilesAtLevel(std::string* value, Slice suffix);
+  bool HandleCompressionRatioAtLevelPrefix(std::string* value, Slice suffix);
+  bool HandleLevelStats(std::string* value, Slice suffix);
+  bool HandleStats(std::string* value, Slice suffix);
+  bool HandleCFMapStats(std::map<std::string, std::string>* compaction_stats,
+                        Slice suffix);
+  bool HandleCFStats(std::string* value, Slice suffix);
+  bool HandleCFStatsNoFileHistogram(std::string* value, Slice suffix);
+  bool HandleCFFileHistogram(std::string* value, Slice suffix);
+  bool HandleCFStatsPeriodic(std::string* value, Slice suffix);
+  bool HandleCFWriteStallStats(std::string* value, Slice suffix);
+  bool HandleCFWriteStallStatsMap(std::map<std::string, std::string>* values,
+                                  Slice suffix);
+  bool HandleDBMapStats(std::map<std::string, std::string>* compaction_stats,
+                        Slice suffix);
+  bool HandleDBStats(std::string* value, Slice suffix);
+  bool HandleDBWriteStallStats(std::string* value, Slice suffix);
+  bool HandleDBWriteStallStatsMap(std::map<std::string, std::string>* values,
+                                  Slice suffix);
+  bool HandleSsTables(std::string* value, Slice suffix);
+  bool HandleAggregatedTableProperties(std::string* value, Slice suffix);
+  bool HandleAggregatedTablePropertiesAtLevel(std::string* value, Slice suffix);
+  bool HandleAggregatedTablePropertiesMap(
+      std::map<std::string, std::string>* values, Slice suffix);
+  bool HandleAggregatedTablePropertiesAtLevelMap(
+      std::map<std::string, std::string>* values, Slice suffix);
+  bool HandleNumImmutableMemTable(uint64_t* value, DBImpl* db,
+                                  Version* version);
+  bool HandleNumImmutableMemTableFlushed(uint64_t* value, DBImpl* db,
+                                         Version* version);
+  bool HandleMemTableFlushPending(uint64_t* value, DBImpl* db,
+                                  Version* version);
+  bool HandleNumRunningFlushes(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleCompactionPending(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleNumRunningCompactions(uint64_t* value, DBImpl* db,
+                                   Version* version);
+  bool HandleBackgroundErrors(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleCurSizeActiveMemTable(uint64_t* value, DBImpl* db,
+                                   Version* version);
+  bool HandleCurSizeAllMemTables(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleSizeAllMemTables(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleNumEntriesActiveMemTable(uint64_t* value, DBImpl* db,
+                                      Version* version);
+  bool HandleNumEntriesImmMemTables(uint64_t* value, DBImpl* db,
+                                    Version* version);
+  bool HandleNumDeletesActiveMemTable(uint64_t* value, DBImpl* db,
+                                      Version* version);
+  bool HandleNumDeletesImmMemTables(uint64_t* value, DBImpl* db,
+                                    Version* version);
+  bool HandleEstimateNumKeys(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleNumSnapshots(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleOldestSnapshotTime(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleOldestSnapshotSequence(uint64_t* value, DBImpl* db,
+                                    Version* version);
+  bool HandleNumLiveVersions(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleCurrentSuperVersionNumber(uint64_t* value, DBImpl* db,
+                                       Version* version);
+  bool HandleIsFileDeletionsEnabled(uint64_t* value, DBImpl* db,
+                                    Version* version);
+  bool HandleBaseLevel(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleTotalSstFilesSize(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleLiveSstFilesSize(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleEstimatePendingCompactionBytes(uint64_t* value, DBImpl* db,
+                                            Version* version);
+  bool HandleEstimateTableReadersMem(uint64_t* value, DBImpl* db,
+                                     Version* version);
+  bool HandleEstimateLiveDataSize(uint64_t* value, DBImpl* db,
+                                  Version* version);
+  bool HandleMinLogNumberToKeep(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleMinObsoleteSstNumberToKeep(uint64_t* value, DBImpl* db,
+                                        Version* version);
+  bool HandleActualDelayedWriteRate(uint64_t* value, DBImpl* db,
+                                    Version* version);
+  bool HandleIsWriteStopped(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleEstimateOldestKeyTime(uint64_t* value, DBImpl* db,
+                                   Version* version);
+  bool HandleBlockCacheCapacity(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleBlockCacheUsage(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleBlockCachePinnedUsage(uint64_t* value, DBImpl* db,
+                                   Version* version);
+  bool HandleBlockCacheEntryStatsInternal(std::string* value, bool fast);
+  bool HandleBlockCacheEntryStatsMapInternal(
+      std::map<std::string, std::string>* values, bool fast);
+  bool HandleBlockCacheEntryStats(std::string* value, Slice suffix);
+  bool HandleBlockCacheEntryStatsMap(std::map<std::string, std::string>* values,
+                                     Slice suffix);
+  bool HandleFastBlockCacheEntryStats(std::string* value, Slice suffix);
+  bool HandleFastBlockCacheEntryStatsMap(
+      std::map<std::string, std::string>* values, Slice suffix);
+  bool HandleLiveSstFilesSizeAtTemperature(std::string* value, Slice suffix);
+  bool HandleNumBlobFiles(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleBlobStats(std::string* value, Slice suffix);
+  bool HandleTotalBlobFileSize(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleLiveBlobFileSize(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleLiveBlobFileGarbageSize(uint64_t* value, DBImpl* db,
+                                     Version* version);
+  bool HandleBlobCacheCapacity(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleBlobCacheUsage(uint64_t* value, DBImpl* db, Version* version);
+  bool HandleBlobCachePinnedUsage(uint64_t* value, DBImpl* db,
+                                  Version* version);
+
+  // Total number of background errors encountered. Every time a flush task
+  // or compaction task fails, this counter is incremented. The failure can
+  // be caused by any possible reason, including file system errors, out of
+  // resources, or input file corruption. Failing when retrying the same flush
+  // or compaction will cause the counter to increase too.
+  uint64_t bg_error_count_;
+
+  const int number_levels_;
+  SystemClock* clock_;
+  ColumnFamilyData* cfd_;
+  uint64_t started_at_;
+};
+
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/job_context.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/job_context.h
new file mode 100644
index 0000000000..a550ba2d46
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/job_context.h
@@ -0,0 +1,237 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "db/column_family.h"
+#include "db/log_writer.h"
+#include "db/version_set.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class MemTable;
+struct SuperVersion;
+
+struct SuperVersionContext {
+  struct WriteStallNotification {
+    WriteStallInfo write_stall_info;
+    const ImmutableOptions* immutable_options;
+  };
+
+  autovector<SuperVersion*> superversions_to_free;
+#ifndef ROCKSDB_DISABLE_STALL_NOTIFICATION
+  autovector<WriteStallNotification> write_stall_notifications;
+#endif
+  std::unique_ptr<SuperVersion>
+      new_superversion;  // if nullptr no new superversion
+
+  explicit SuperVersionContext(bool create_superversion = false)
+      : new_superversion(create_superversion ? new SuperVersion() : nullptr) {}
+
+  explicit SuperVersionContext(SuperVersionContext&& other) noexcept
+      : superversions_to_free(std::move(other.superversions_to_free)),
+#ifndef ROCKSDB_DISABLE_STALL_NOTIFICATION
+        write_stall_notifications(std::move(other.write_stall_notifications)),
+#endif
+        new_superversion(std::move(other.new_superversion)) {
+  }
+  // No copies
+  SuperVersionContext(const SuperVersionContext& other) = delete;
+  void operator=(const SuperVersionContext& other) = delete;
+
+  void NewSuperVersion() {
+    new_superversion = std::unique_ptr<SuperVersion>(new SuperVersion());
+  }
+
+  inline bool HaveSomethingToDelete() const {
+#ifndef ROCKSDB_DISABLE_STALL_NOTIFICATION
+    return !superversions_to_free.empty() || !write_stall_notifications.empty();
+#else
+    return !superversions_to_free.empty();
+#endif
+  }
+
+  void PushWriteStallNotification(WriteStallCondition old_cond,
+                                  WriteStallCondition new_cond,
+                                  const std::string& name,
+                                  const ImmutableOptions* ioptions) {
+#if !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION)
+    WriteStallNotification notif;
+    notif.write_stall_info.cf_name = name;
+    notif.write_stall_info.condition.prev = old_cond;
+    notif.write_stall_info.condition.cur = new_cond;
+    notif.immutable_options = ioptions;
+    write_stall_notifications.push_back(notif);
+#else
+    (void)old_cond;
+    (void)new_cond;
+    (void)name;
+    (void)ioptions;
+#endif  // !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION)
+  }
+
+  void Clean() {
+#if !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION)
+    // notify listeners on changed write stall conditions
+    for (auto& notif : write_stall_notifications) {
+      for (auto& listener : notif.immutable_options->listeners) {
+        listener->OnStallConditionsChanged(notif.write_stall_info);
+      }
+    }
+    write_stall_notifications.clear();
+#endif
+    // free superversions
+    for (auto s : superversions_to_free) {
+      delete s;
+    }
+    superversions_to_free.clear();
+  }
+
+  ~SuperVersionContext() {
+#ifndef ROCKSDB_DISABLE_STALL_NOTIFICATION
+    assert(write_stall_notifications.empty());
+#endif
+    assert(superversions_to_free.empty());
+  }
+};
+
+struct JobContext {
+  inline bool HaveSomethingToDelete() const {
+    return !(full_scan_candidate_files.empty() && sst_delete_files.empty() &&
+             blob_delete_files.empty() && log_delete_files.empty() &&
+             manifest_delete_files.empty());
+  }
+
+  inline bool HaveSomethingToClean() const {
+    bool sv_have_sth = false;
+    for (const auto& sv_ctx : superversion_contexts) {
+      if (sv_ctx.HaveSomethingToDelete()) {
+        sv_have_sth = true;
+        break;
+      }
+    }
+    return memtables_to_free.size() > 0 || logs_to_free.size() > 0 ||
+           job_snapshot != nullptr || sv_have_sth;
+  }
+
+  SequenceNumber GetJobSnapshotSequence() const {
+    if (job_snapshot) {
+      assert(job_snapshot->snapshot());
+      return job_snapshot->snapshot()->GetSequenceNumber();
+    }
+    return kMaxSequenceNumber;
+  }
+
+  // Structure to store information for candidate files to delete.
+  struct CandidateFileInfo {
+    std::string file_name;
+    std::string file_path;
+    CandidateFileInfo(std::string name, std::string path)
+        : file_name(std::move(name)), file_path(std::move(path)) {}
+    bool operator==(const CandidateFileInfo& other) const {
+      return file_name == other.file_name && file_path == other.file_path;
+    }
+  };
+
+  // Unique job id
+  int job_id;
+
+  // a list of all files that we'll consider deleting
+  // (every once in a while this is filled up with all files
+  // in the DB directory)
+  // (filled only if we're doing full scan)
+  std::vector<CandidateFileInfo> full_scan_candidate_files;
+
+  // the list of all live sst files that cannot be deleted
+  std::vector<uint64_t> sst_live;
+
+  // the list of sst files that we need to delete
+  std::vector<ObsoleteFileInfo> sst_delete_files;
+
+  // the list of all live blob files that cannot be deleted
+  std::vector<uint64_t> blob_live;
+
+  // the list of blob files that we need to delete
+  std::vector<ObsoleteBlobFileInfo> blob_delete_files;
+
+  // a list of log files that we need to delete
+  std::vector<uint64_t> log_delete_files;
+
+  // a list of log files that we need to preserve during full purge since they
+  // will be reused later
+  std::vector<uint64_t> log_recycle_files;
+
+  // a list of manifest files that we need to delete
+  std::vector<std::string> manifest_delete_files;
+
+  // a list of memtables to be free
+  autovector<MemTable*> memtables_to_free;
+
+  // contexts for installing superversions for multiple column families
+  std::vector<SuperVersionContext> superversion_contexts;
+
+  autovector<log::Writer*> logs_to_free;
+
+  // the current manifest_file_number, log_number and prev_log_number
+  // that corresponds to the set of files in 'live'.
+  uint64_t manifest_file_number;
+  uint64_t pending_manifest_file_number;
+  uint64_t log_number;
+  uint64_t prev_log_number;
+
+  uint64_t min_pending_output = 0;
+  uint64_t prev_total_log_size = 0;
+  size_t num_alive_log_files = 0;
+  uint64_t size_log_to_delete = 0;
+
+  // Snapshot taken before flush/compaction job.
+  std::unique_ptr<ManagedSnapshot> job_snapshot;
+
+  explicit JobContext(int _job_id, bool create_superversion = false) {
+    job_id = _job_id;
+    manifest_file_number = 0;
+    pending_manifest_file_number = 0;
+    log_number = 0;
+    prev_log_number = 0;
+    superversion_contexts.emplace_back(
+        SuperVersionContext(create_superversion));
+  }
+
+  // For non-empty JobContext Clean() has to be called at least once before
+  // before destruction (see asserts in ~JobContext()). Should be called with
+  // unlocked DB mutex. Destructor doesn't call Clean() to avoid accidentally
+  // doing potentially slow Clean() with locked DB mutex.
+  void Clean() {
+    // free superversions
+    for (auto& sv_context : superversion_contexts) {
+      sv_context.Clean();
+    }
+    // free pending memtables
+    for (auto m : memtables_to_free) {
+      delete m;
+    }
+    for (auto l : logs_to_free) {
+      delete l;
+    }
+
+    memtables_to_free.clear();
+    logs_to_free.clear();
+    job_snapshot.reset();
+  }
+
+  ~JobContext() {
+    assert(memtables_to_free.size() == 0);
+    assert(logs_to_free.size() == 0);
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/kv_checksum.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/kv_checksum.h
new file mode 100644
index 0000000000..53c02485ff
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/kv_checksum.h
@@ -0,0 +1,484 @@
+//  Copyright (c) 2020-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file contains classes containing fields to protect individual entries.
+// The classes are named "ProtectionInfo<suffix>", where <suffix> indicates the
+// combination of fields that are covered. Each field has a single letter
+// abbreviation as follows.
+//
+// K = key
+// V = value
+// O = optype aka value type
+// S = seqno
+// C = CF ID
+//
+// Then, for example, a class that protects an entry consisting of key, value,
+// optype, and CF ID (i.e., a `WriteBatch` entry) would be named
+// `ProtectionInfoKVOC`.
+//
+// The `ProtectionInfo.*` classes are templated on the integer type used to hold
+// the XOR of hashes for each field. Only unsigned integer types are supported,
+// and the maximum supported integer width is 64 bits. When the integer type is
+// narrower than the hash values, we lop off the most significant bits to make
+// them fit.
+//
+// The `ProtectionInfo.*` classes are all intended to be non-persistent. We do
+// not currently make the byte order consistent for integer fields before
+// hashing them, so the resulting values are endianness-dependent.
+
+#pragma once
+
+#include <type_traits>
+
+#include "db/dbformat.h"
+#include "rocksdb/types.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+template <typename T>
+class ProtectionInfo;
+template <typename T>
+class ProtectionInfoKVO;
+template <typename T>
+class ProtectionInfoKVOC;
+template <typename T>
+class ProtectionInfoKVOS;
+template <typename T>
+class ProtectionInfoKV;
+
+// Aliases for 64-bit protection infos.
+using ProtectionInfo64 = ProtectionInfo<uint64_t>;
+using ProtectionInfoKVO64 = ProtectionInfoKVO<uint64_t>;
+using ProtectionInfoKVOC64 = ProtectionInfoKVOC<uint64_t>;
+using ProtectionInfoKVOS64 = ProtectionInfoKVOS<uint64_t>;
+
+template <typename T>
+class ProtectionInfo {
+ public:
+  ProtectionInfo() = default;
+
+  Status GetStatus() const;
+  ProtectionInfoKVO<T> ProtectKVO(const Slice& key, const Slice& value,
+                                  ValueType op_type) const;
+  ProtectionInfoKVO<T> ProtectKVO(const SliceParts& key,
+                                  const SliceParts& value,
+                                  ValueType op_type) const;
+  ProtectionInfoKV<T> ProtectKV(const Slice& key, const Slice& value) const;
+
+ private:
+  friend class ProtectionInfoKVO<T>;
+  friend class ProtectionInfoKVOS<T>;
+  friend class ProtectionInfoKVOC<T>;
+  friend class ProtectionInfoKV<T>;
+
+  // Each field is hashed with an independent value so we can catch fields being
+  // swapped. Per the `NPHash64()` docs, using consecutive seeds is a pitfall,
+  // and we should instead vary our seeds by a large odd number. This value by
+  // which we increment (0xD28AAD72F49BD50B) was taken from
+  // `head -c8 /dev/urandom | hexdump`, run repeatedly until it yielded an odd
+  // number. The values are computed manually since the Windows C++ compiler
+  // complains about the overflow when adding constants.
+  static const uint64_t kSeedK = 0;
+  static const uint64_t kSeedV = 0xD28AAD72F49BD50B;
+  static const uint64_t kSeedO = 0xA5155AE5E937AA16;
+  static const uint64_t kSeedS = 0x77A00858DDD37F21;
+  static const uint64_t kSeedC = 0x4A2AB5CBD26F542C;
+
+  ProtectionInfo(T val) : val_(val) {
+    static_assert(sizeof(ProtectionInfo<T>) == sizeof(T), "");
+  }
+
+  T GetVal() const { return val_; }
+  void SetVal(T val) { val_ = val; }
+
+  void Encode(uint8_t len, char* dst) const {
+    assert(sizeof(val_) >= len);
+    switch (len) {
+      case 1:
+        dst[0] = static_cast<uint8_t>(val_);
+        break;
+      case 2:
+        EncodeFixed16(dst, static_cast<uint16_t>(val_));
+        break;
+      case 4:
+        EncodeFixed32(dst, static_cast<uint32_t>(val_));
+        break;
+      case 8:
+        EncodeFixed64(dst, static_cast<uint64_t>(val_));
+        break;
+      default:
+        assert(false);
+    }
+  }
+
+  bool Verify(uint8_t len, const char* checksum_ptr) const {
+    assert(sizeof(val_) >= len);
+    switch (len) {
+      case 1:
+        return static_cast<uint8_t>(checksum_ptr[0]) ==
+               static_cast<uint8_t>(val_);
+      case 2:
+        return DecodeFixed16(checksum_ptr) == static_cast<uint16_t>(val_);
+      case 4:
+        return DecodeFixed32(checksum_ptr) == static_cast<uint32_t>(val_);
+      case 8:
+        return DecodeFixed64(checksum_ptr) == static_cast<uint64_t>(val_);
+      default:
+        assert(false);
+        return false;
+    }
+  }
+
+  T val_ = 0;
+};
+
+template <typename T>
+class ProtectionInfoKVO {
+ public:
+  ProtectionInfoKVO() = default;
+
+  ProtectionInfo<T> StripKVO(const Slice& key, const Slice& value,
+                             ValueType op_type) const;
+  ProtectionInfo<T> StripKVO(const SliceParts& key, const SliceParts& value,
+                             ValueType op_type) const;
+
+  ProtectionInfoKVOC<T> ProtectC(ColumnFamilyId column_family_id) const;
+  ProtectionInfoKVOS<T> ProtectS(SequenceNumber sequence_number) const;
+
+  void UpdateK(const Slice& old_key, const Slice& new_key);
+  void UpdateK(const SliceParts& old_key, const SliceParts& new_key);
+  void UpdateV(const Slice& old_value, const Slice& new_value);
+  void UpdateV(const SliceParts& old_value, const SliceParts& new_value);
+  void UpdateO(ValueType old_op_type, ValueType new_op_type);
+
+  // Encode this protection info into `len` bytes and stores them in `dst`.
+  void Encode(uint8_t len, char* dst) const { info_.Encode(len, dst); }
+  // Verify this protection info against the protection info encoded by Encode()
+  // at the first `len` bytes of `checksum_ptr`.
+  // Returns true iff the verification is successful.
+  bool Verify(uint8_t len, const char* checksum_ptr) const {
+    return info_.Verify(len, checksum_ptr);
+  }
+
+ private:
+  friend class ProtectionInfo<T>;
+  friend class ProtectionInfoKVOS<T>;
+  friend class ProtectionInfoKVOC<T>;
+
+  explicit ProtectionInfoKVO(T val) : info_(val) {
+    static_assert(sizeof(ProtectionInfoKVO<T>) == sizeof(T), "");
+  }
+
+  T GetVal() const { return info_.GetVal(); }
+  void SetVal(T val) { info_.SetVal(val); }
+
+  ProtectionInfo<T> info_;
+};
+
+template <typename T>
+class ProtectionInfoKVOC {
+ public:
+  ProtectionInfoKVOC() = default;
+
+  ProtectionInfoKVO<T> StripC(ColumnFamilyId column_family_id) const;
+
+  void UpdateK(const Slice& old_key, const Slice& new_key) {
+    kvo_.UpdateK(old_key, new_key);
+  }
+  void UpdateK(const SliceParts& old_key, const SliceParts& new_key) {
+    kvo_.UpdateK(old_key, new_key);
+  }
+  void UpdateV(const Slice& old_value, const Slice& new_value) {
+    kvo_.UpdateV(old_value, new_value);
+  }
+  void UpdateV(const SliceParts& old_value, const SliceParts& new_value) {
+    kvo_.UpdateV(old_value, new_value);
+  }
+  void UpdateO(ValueType old_op_type, ValueType new_op_type) {
+    kvo_.UpdateO(old_op_type, new_op_type);
+  }
+  void UpdateC(ColumnFamilyId old_column_family_id,
+               ColumnFamilyId new_column_family_id);
+
+  void Encode(uint8_t len, char* dst) const { kvo_.Encode(len, dst); }
+  bool Verify(uint8_t len, const char* checksum_ptr) const {
+    return kvo_.Verify(len, checksum_ptr);
+  }
+
+ private:
+  friend class ProtectionInfoKVO<T>;
+
+  explicit ProtectionInfoKVOC(T val) : kvo_(val) {
+    static_assert(sizeof(ProtectionInfoKVOC<T>) == sizeof(T), "");
+  }
+
+  T GetVal() const { return kvo_.GetVal(); }
+  void SetVal(T val) { kvo_.SetVal(val); }
+
+  ProtectionInfoKVO<T> kvo_;
+};
+
+template <typename T>
+class ProtectionInfoKVOS {
+ public:
+  ProtectionInfoKVOS() = default;
+
+  ProtectionInfoKVO<T> StripS(SequenceNumber sequence_number) const;
+
+  void UpdateK(const Slice& old_key, const Slice& new_key) {
+    kvo_.UpdateK(old_key, new_key);
+  }
+  void UpdateK(const SliceParts& old_key, const SliceParts& new_key) {
+    kvo_.UpdateK(old_key, new_key);
+  }
+  void UpdateV(const Slice& old_value, const Slice& new_value) {
+    kvo_.UpdateV(old_value, new_value);
+  }
+  void UpdateV(const SliceParts& old_value, const SliceParts& new_value) {
+    kvo_.UpdateV(old_value, new_value);
+  }
+  void UpdateO(ValueType old_op_type, ValueType new_op_type) {
+    kvo_.UpdateO(old_op_type, new_op_type);
+  }
+  void UpdateS(SequenceNumber old_sequence_number,
+               SequenceNumber new_sequence_number);
+
+  void Encode(uint8_t len, char* dst) const { kvo_.Encode(len, dst); }
+  bool Verify(uint8_t len, const char* checksum_ptr) const {
+    return kvo_.Verify(len, checksum_ptr);
+  }
+
+ private:
+  friend class ProtectionInfoKVO<T>;
+
+  explicit ProtectionInfoKVOS(T val) : kvo_(val) {
+    static_assert(sizeof(ProtectionInfoKVOS<T>) == sizeof(T), "");
+  }
+
+  T GetVal() const { return kvo_.GetVal(); }
+  void SetVal(T val) { kvo_.SetVal(val); }
+
+  ProtectionInfoKVO<T> kvo_;
+};
+
+template <typename T>
+class ProtectionInfoKV {
+ public:
+  ProtectionInfoKV() = default;
+
+  void Encode(uint8_t len, char* dst) const { info_.Encode(len, dst); }
+  bool Verify(uint8_t len, const char* checksum_ptr) const {
+    return info_.Verify(len, checksum_ptr);
+  }
+
+ private:
+  friend class ProtectionInfo<T>;
+
+  explicit ProtectionInfoKV(T val) : info_(val) {
+    static_assert(sizeof(ProtectionInfoKV<T>) == sizeof(T));
+  }
+
+  ProtectionInfo<T> info_;
+};
+
+template <typename T>
+Status ProtectionInfo<T>::GetStatus() const {
+  if (val_ != 0) {
+    return Status::Corruption("ProtectionInfo mismatch");
+  }
+  return Status::OK();
+}
+
+template <typename T>
+ProtectionInfoKVO<T> ProtectionInfo<T>::ProtectKVO(const Slice& key,
+                                                   const Slice& value,
+                                                   ValueType op_type) const {
+  T val = GetVal();
+  val = val ^ static_cast<T>(GetSliceNPHash64(key, ProtectionInfo<T>::kSeedK));
+  val =
+      val ^ static_cast<T>(GetSliceNPHash64(value, ProtectionInfo<T>::kSeedV));
+  val = val ^
+        static_cast<T>(NPHash64(reinterpret_cast<char*>(&op_type),
+                                sizeof(op_type), ProtectionInfo<T>::kSeedO));
+  return ProtectionInfoKVO<T>(val);
+}
+
+template <typename T>
+ProtectionInfoKVO<T> ProtectionInfo<T>::ProtectKVO(const SliceParts& key,
+                                                   const SliceParts& value,
+                                                   ValueType op_type) const {
+  T val = GetVal();
+  val = val ^
+        static_cast<T>(GetSlicePartsNPHash64(key, ProtectionInfo<T>::kSeedK));
+  val = val ^
+        static_cast<T>(GetSlicePartsNPHash64(value, ProtectionInfo<T>::kSeedV));
+  val = val ^
+        static_cast<T>(NPHash64(reinterpret_cast<char*>(&op_type),
+                                sizeof(op_type), ProtectionInfo<T>::kSeedO));
+  return ProtectionInfoKVO<T>(val);
+}
+
+template <typename T>
+ProtectionInfoKV<T> ProtectionInfo<T>::ProtectKV(const Slice& key,
+                                                 const Slice& value) const {
+  T val = GetVal();
+  val = val ^ static_cast<T>(GetSliceNPHash64(key, ProtectionInfo<T>::kSeedK));
+  val =
+      val ^ static_cast<T>(GetSliceNPHash64(value, ProtectionInfo<T>::kSeedV));
+  return ProtectionInfoKV<T>(val);
+}
+
+template <typename T>
+void ProtectionInfoKVO<T>::UpdateK(const Slice& old_key, const Slice& new_key) {
+  T val = GetVal();
+  val = val ^
+        static_cast<T>(GetSliceNPHash64(old_key, ProtectionInfo<T>::kSeedK));
+  val = val ^
+        static_cast<T>(GetSliceNPHash64(new_key, ProtectionInfo<T>::kSeedK));
+  SetVal(val);
+}
+
+template <typename T>
+void ProtectionInfoKVO<T>::UpdateK(const SliceParts& old_key,
+                                   const SliceParts& new_key) {
+  T val = GetVal();
+  val = val ^ static_cast<T>(
+                  GetSlicePartsNPHash64(old_key, ProtectionInfo<T>::kSeedK));
+  val = val ^ static_cast<T>(
+                  GetSlicePartsNPHash64(new_key, ProtectionInfo<T>::kSeedK));
+  SetVal(val);
+}
+
+template <typename T>
+void ProtectionInfoKVO<T>::UpdateV(const Slice& old_value,
+                                   const Slice& new_value) {
+  T val = GetVal();
+  val = val ^
+        static_cast<T>(GetSliceNPHash64(old_value, ProtectionInfo<T>::kSeedV));
+  val = val ^
+        static_cast<T>(GetSliceNPHash64(new_value, ProtectionInfo<T>::kSeedV));
+  SetVal(val);
+}
+
+template <typename T>
+void ProtectionInfoKVO<T>::UpdateV(const SliceParts& old_value,
+                                   const SliceParts& new_value) {
+  T val = GetVal();
+  val = val ^ static_cast<T>(
+                  GetSlicePartsNPHash64(old_value, ProtectionInfo<T>::kSeedV));
+  val = val ^ static_cast<T>(
+                  GetSlicePartsNPHash64(new_value, ProtectionInfo<T>::kSeedV));
+  SetVal(val);
+}
+
+template <typename T>
+void ProtectionInfoKVO<T>::UpdateO(ValueType old_op_type,
+                                   ValueType new_op_type) {
+  T val = GetVal();
+  val = val ^ static_cast<T>(NPHash64(reinterpret_cast<char*>(&old_op_type),
+                                      sizeof(old_op_type),
+                                      ProtectionInfo<T>::kSeedO));
+  val = val ^ static_cast<T>(NPHash64(reinterpret_cast<char*>(&new_op_type),
+                                      sizeof(new_op_type),
+                                      ProtectionInfo<T>::kSeedO));
+  SetVal(val);
+}
+
+template <typename T>
+ProtectionInfo<T> ProtectionInfoKVO<T>::StripKVO(const Slice& key,
+                                                 const Slice& value,
+                                                 ValueType op_type) const {
+  T val = GetVal();
+  val = val ^ static_cast<T>(GetSliceNPHash64(key, ProtectionInfo<T>::kSeedK));
+  val =
+      val ^ static_cast<T>(GetSliceNPHash64(value, ProtectionInfo<T>::kSeedV));
+  val = val ^
+        static_cast<T>(NPHash64(reinterpret_cast<char*>(&op_type),
+                                sizeof(op_type), ProtectionInfo<T>::kSeedO));
+  return ProtectionInfo<T>(val);
+}
+
+template <typename T>
+ProtectionInfo<T> ProtectionInfoKVO<T>::StripKVO(const SliceParts& key,
+                                                 const SliceParts& value,
+                                                 ValueType op_type) const {
+  T val = GetVal();
+  val = val ^
+        static_cast<T>(GetSlicePartsNPHash64(key, ProtectionInfo<T>::kSeedK));
+  val = val ^
+        static_cast<T>(GetSlicePartsNPHash64(value, ProtectionInfo<T>::kSeedV));
+  val = val ^
+        static_cast<T>(NPHash64(reinterpret_cast<char*>(&op_type),
+                                sizeof(op_type), ProtectionInfo<T>::kSeedO));
+  return ProtectionInfo<T>(val);
+}
+
+template <typename T>
+ProtectionInfoKVOC<T> ProtectionInfoKVO<T>::ProtectC(
+    ColumnFamilyId column_family_id) const {
+  T val = GetVal();
+  val = val ^ static_cast<T>(NPHash64(
+                  reinterpret_cast<char*>(&column_family_id),
+                  sizeof(column_family_id), ProtectionInfo<T>::kSeedC));
+  return ProtectionInfoKVOC<T>(val);
+}
+
+template <typename T>
+ProtectionInfoKVO<T> ProtectionInfoKVOC<T>::StripC(
+    ColumnFamilyId column_family_id) const {
+  T val = GetVal();
+  val = val ^ static_cast<T>(NPHash64(
+                  reinterpret_cast<char*>(&column_family_id),
+                  sizeof(column_family_id), ProtectionInfo<T>::kSeedC));
+  return ProtectionInfoKVO<T>(val);
+}
+
+template <typename T>
+void ProtectionInfoKVOC<T>::UpdateC(ColumnFamilyId old_column_family_id,
+                                    ColumnFamilyId new_column_family_id) {
+  T val = GetVal();
+  val = val ^ static_cast<T>(NPHash64(
+                  reinterpret_cast<char*>(&old_column_family_id),
+                  sizeof(old_column_family_id), ProtectionInfo<T>::kSeedC));
+  val = val ^ static_cast<T>(NPHash64(
+                  reinterpret_cast<char*>(&new_column_family_id),
+                  sizeof(new_column_family_id), ProtectionInfo<T>::kSeedC));
+  SetVal(val);
+}
+
+template <typename T>
+ProtectionInfoKVOS<T> ProtectionInfoKVO<T>::ProtectS(
+    SequenceNumber sequence_number) const {
+  T val = GetVal();
+  val = val ^ static_cast<T>(NPHash64(reinterpret_cast<char*>(&sequence_number),
+                                      sizeof(sequence_number),
+                                      ProtectionInfo<T>::kSeedS));
+  return ProtectionInfoKVOS<T>(val);
+}
+
+template <typename T>
+ProtectionInfoKVO<T> ProtectionInfoKVOS<T>::StripS(
+    SequenceNumber sequence_number) const {
+  T val = GetVal();
+  val = val ^ static_cast<T>(NPHash64(reinterpret_cast<char*>(&sequence_number),
+                                      sizeof(sequence_number),
+                                      ProtectionInfo<T>::kSeedS));
+  return ProtectionInfoKVO<T>(val);
+}
+
+template <typename T>
+void ProtectionInfoKVOS<T>::UpdateS(SequenceNumber old_sequence_number,
+                                    SequenceNumber new_sequence_number) {
+  T val = GetVal();
+  val = val ^ static_cast<T>(NPHash64(
+                  reinterpret_cast<char*>(&old_sequence_number),
+                  sizeof(old_sequence_number), ProtectionInfo<T>::kSeedS));
+  val = val ^ static_cast<T>(NPHash64(
+                  reinterpret_cast<char*>(&new_sequence_number),
+                  sizeof(new_sequence_number), ProtectionInfo<T>::kSeedS));
+  SetVal(val);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_format.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_format.h
new file mode 100644
index 0000000000..a976b3f9ea
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_format.h
@@ -0,0 +1,55 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Log format information shared by reader and writer.
+// See ../doc/log_format.txt for more detail.
+
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace log {
+
+enum RecordType {
+  // Zero is reserved for preallocated files
+  kZeroType = 0,
+  kFullType = 1,
+
+  // For fragments
+  kFirstType = 2,
+  kMiddleType = 3,
+  kLastType = 4,
+
+  // For recycled log files
+  kRecyclableFullType = 5,
+  kRecyclableFirstType = 6,
+  kRecyclableMiddleType = 7,
+  kRecyclableLastType = 8,
+
+  // Compression Type
+  kSetCompressionType = 9,
+
+  // User-defined timestamp sizes
+  kUserDefinedTimestampSizeType = 10,
+  kRecyclableUserDefinedTimestampSizeType = 11,
+};
+constexpr int kMaxRecordType = kRecyclableUserDefinedTimestampSizeType;
+
+constexpr unsigned int kBlockSize = 32768;
+
+// Header is checksum (4 bytes), length (2 bytes), type (1 byte)
+constexpr int kHeaderSize = 4 + 2 + 1;
+
+// Recyclable header is checksum (4 bytes), length (2 bytes), type (1 byte),
+// log number (4 bytes).
+constexpr int kRecyclableHeaderSize = 4 + 2 + 1 + 4;
+
+}  // namespace log
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_reader.cc
new file mode 100644
index 0000000000..5ec262dcd3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_reader.cc
@@ -0,0 +1,934 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/log_reader.h"
+
+#include <stdio.h>
+
+#include "file/sequence_file_reader.h"
+#include "port/lang.h"
+#include "rocksdb/env.h"
+#include "test_util/sync_point.h"
+#include "util/coding.h"
+#include "util/crc32c.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace log {
+
+Reader::Reporter::~Reporter() {}
+
+Reader::Reader(std::shared_ptr<Logger> info_log,
+               std::unique_ptr<SequentialFileReader>&& _file,
+               Reporter* reporter, bool checksum, uint64_t log_num)
+    : info_log_(info_log),
+      file_(std::move(_file)),
+      reporter_(reporter),
+      checksum_(checksum),
+      backing_store_(new char[kBlockSize]),
+      buffer_(),
+      eof_(false),
+      read_error_(false),
+      eof_offset_(0),
+      last_record_offset_(0),
+      end_of_buffer_offset_(0),
+      log_number_(log_num),
+      recycled_(false),
+      first_record_read_(false),
+      compression_type_(kNoCompression),
+      compression_type_record_read_(false),
+      uncompress_(nullptr),
+      hash_state_(nullptr),
+      uncompress_hash_state_(nullptr){};
+
+Reader::~Reader() {
+  delete[] backing_store_;
+  if (uncompress_) {
+    delete uncompress_;
+  }
+  if (hash_state_) {
+    XXH3_freeState(hash_state_);
+  }
+  if (uncompress_hash_state_) {
+    XXH3_freeState(uncompress_hash_state_);
+  }
+}
+
+// For kAbsoluteConsistency, on clean shutdown we don't expect any error
+// in the log files.  For other modes, we can ignore only incomplete records
+// in the last log file, which are presumably due to a write in progress
+// during restart (or from log recycling).
+//
+// TODO krad: Evaluate if we need to move to a more strict mode where we
+// restrict the inconsistency to only the last log
+bool Reader::ReadRecord(Slice* record, std::string* scratch,
+                        WALRecoveryMode wal_recovery_mode,
+                        uint64_t* record_checksum) {
+  scratch->clear();
+  record->clear();
+  if (record_checksum != nullptr) {
+    if (hash_state_ == nullptr) {
+      hash_state_ = XXH3_createState();
+    }
+    XXH3_64bits_reset(hash_state_);
+  }
+  if (uncompress_) {
+    uncompress_->Reset();
+  }
+  bool in_fragmented_record = false;
+  // Record offset of the logical record that we're reading
+  // 0 is a dummy value to make compilers happy
+  uint64_t prospective_record_offset = 0;
+
+  Slice fragment;
+  while (true) {
+    uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size();
+    size_t drop_size = 0;
+    const unsigned int record_type =
+        ReadPhysicalRecord(&fragment, &drop_size, record_checksum);
+    switch (record_type) {
+      case kFullType:
+      case kRecyclableFullType:
+        if (in_fragmented_record && !scratch->empty()) {
+          // Handle bug in earlier versions of log::Writer where
+          // it could emit an empty kFirstType record at the tail end
+          // of a block followed by a kFullType or kFirstType record
+          // at the beginning of the next block.
+          ReportCorruption(scratch->size(), "partial record without end(1)");
+        }
+        // No need to compute record_checksum since the record
+        // consists of a single fragment and the checksum is computed
+        // in ReadPhysicalRecord() if WAL compression is enabled
+        if (record_checksum != nullptr && uncompress_ == nullptr) {
+          // No need to stream since the record is a single fragment
+          *record_checksum = XXH3_64bits(fragment.data(), fragment.size());
+        }
+        prospective_record_offset = physical_record_offset;
+        scratch->clear();
+        *record = fragment;
+        last_record_offset_ = prospective_record_offset;
+        first_record_read_ = true;
+        return true;
+
+      case kFirstType:
+      case kRecyclableFirstType:
+        if (in_fragmented_record && !scratch->empty()) {
+          // Handle bug in earlier versions of log::Writer where
+          // it could emit an empty kFirstType record at the tail end
+          // of a block followed by a kFullType or kFirstType record
+          // at the beginning of the next block.
+          ReportCorruption(scratch->size(), "partial record without end(2)");
+          XXH3_64bits_reset(hash_state_);
+        }
+        if (record_checksum != nullptr) {
+          XXH3_64bits_update(hash_state_, fragment.data(), fragment.size());
+        }
+        prospective_record_offset = physical_record_offset;
+        scratch->assign(fragment.data(), fragment.size());
+        in_fragmented_record = true;
+        break;
+
+      case kMiddleType:
+      case kRecyclableMiddleType:
+        if (!in_fragmented_record) {
+          ReportCorruption(fragment.size(),
+                           "missing start of fragmented record(1)");
+        } else {
+          if (record_checksum != nullptr) {
+            XXH3_64bits_update(hash_state_, fragment.data(), fragment.size());
+          }
+          scratch->append(fragment.data(), fragment.size());
+        }
+        break;
+
+      case kLastType:
+      case kRecyclableLastType:
+        if (!in_fragmented_record) {
+          ReportCorruption(fragment.size(),
+                           "missing start of fragmented record(2)");
+        } else {
+          if (record_checksum != nullptr) {
+            XXH3_64bits_update(hash_state_, fragment.data(), fragment.size());
+            *record_checksum = XXH3_64bits_digest(hash_state_);
+          }
+          scratch->append(fragment.data(), fragment.size());
+          *record = Slice(*scratch);
+          last_record_offset_ = prospective_record_offset;
+          first_record_read_ = true;
+          return true;
+        }
+        break;
+
+      case kSetCompressionType: {
+        if (compression_type_record_read_) {
+          ReportCorruption(fragment.size(),
+                           "read multiple SetCompressionType records");
+        }
+        if (first_record_read_) {
+          ReportCorruption(fragment.size(),
+                           "SetCompressionType not the first record");
+        }
+        prospective_record_offset = physical_record_offset;
+        scratch->clear();
+        last_record_offset_ = prospective_record_offset;
+        CompressionTypeRecord compression_record(kNoCompression);
+        Status s = compression_record.DecodeFrom(&fragment);
+        if (!s.ok()) {
+          ReportCorruption(fragment.size(),
+                           "could not decode SetCompressionType record");
+        } else {
+          InitCompression(compression_record);
+        }
+        break;
+      }
+      case kUserDefinedTimestampSizeType:
+      case kRecyclableUserDefinedTimestampSizeType: {
+        if (in_fragmented_record && !scratch->empty()) {
+          ReportCorruption(
+              scratch->size(),
+              "user-defined timestamp size record interspersed partial record");
+        }
+        prospective_record_offset = physical_record_offset;
+        scratch->clear();
+        last_record_offset_ = prospective_record_offset;
+        UserDefinedTimestampSizeRecord ts_record;
+        Status s = ts_record.DecodeFrom(&fragment);
+        if (!s.ok()) {
+          ReportCorruption(
+              fragment.size(),
+              "could not decode user-defined timestamp size record");
+        } else {
+          s = UpdateRecordedTimestampSize(
+              ts_record.GetUserDefinedTimestampSize());
+          if (!s.ok()) {
+            ReportCorruption(fragment.size(), s.getState());
+          }
+        }
+        break;
+      }
+
+      case kBadHeader:
+        if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency ||
+            wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery) {
+          // In clean shutdown we don't expect any error in the log files.
+          // In point-in-time recovery an incomplete record at the end could
+          // produce a hole in the recovered data. Report an error here, which
+          // higher layers can choose to ignore when it's provable there is no
+          // hole.
+          ReportCorruption(drop_size, "truncated header");
+        }
+        FALLTHROUGH_INTENDED;
+
+      case kEof:
+        if (in_fragmented_record) {
+          if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency ||
+              wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery) {
+            // In clean shutdown we don't expect any error in the log files.
+            // In point-in-time recovery an incomplete record at the end could
+            // produce a hole in the recovered data. Report an error here, which
+            // higher layers can choose to ignore when it's provable there is no
+            // hole.
+            ReportCorruption(scratch->size(), "error reading trailing data");
+          }
+          // This can be caused by the writer dying immediately after
+          //  writing a physical record but before completing the next; don't
+          //  treat it as a corruption, just ignore the entire logical record.
+          scratch->clear();
+        }
+        return false;
+
+      case kOldRecord:
+        if (wal_recovery_mode != WALRecoveryMode::kSkipAnyCorruptedRecords) {
+          // Treat a record from a previous instance of the log as EOF.
+          if (in_fragmented_record) {
+            if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency ||
+                wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery) {
+              // In clean shutdown we don't expect any error in the log files.
+              // In point-in-time recovery an incomplete record at the end could
+              // produce a hole in the recovered data. Report an error here,
+              // which higher layers can choose to ignore when it's provable
+              // there is no hole.
+              ReportCorruption(scratch->size(), "error reading trailing data");
+            }
+            // This can be caused by the writer dying immediately after
+            //  writing a physical record but before completing the next; don't
+            //  treat it as a corruption, just ignore the entire logical record.
+            scratch->clear();
+          }
+          return false;
+        }
+        FALLTHROUGH_INTENDED;
+
+      case kBadRecord:
+        if (in_fragmented_record) {
+          ReportCorruption(scratch->size(), "error in middle of record");
+          in_fragmented_record = false;
+          scratch->clear();
+        }
+        break;
+
+      case kBadRecordLen:
+        if (eof_) {
+          if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency ||
+              wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery) {
+            // In clean shutdown we don't expect any error in the log files.
+            // In point-in-time recovery an incomplete record at the end could
+            // produce a hole in the recovered data. Report an error here, which
+            // higher layers can choose to ignore when it's provable there is no
+            // hole.
+            ReportCorruption(drop_size, "truncated record body");
+          }
+          return false;
+        }
+        FALLTHROUGH_INTENDED;
+
+      case kBadRecordChecksum:
+        if (recycled_ && wal_recovery_mode ==
+                             WALRecoveryMode::kTolerateCorruptedTailRecords) {
+          scratch->clear();
+          return false;
+        }
+        if (record_type == kBadRecordLen) {
+          ReportCorruption(drop_size, "bad record length");
+        } else {
+          ReportCorruption(drop_size, "checksum mismatch");
+        }
+        if (in_fragmented_record) {
+          ReportCorruption(scratch->size(), "error in middle of record");
+          in_fragmented_record = false;
+          scratch->clear();
+        }
+        break;
+
+      default: {
+        char buf[40];
+        snprintf(buf, sizeof(buf), "unknown record type %u", record_type);
+        ReportCorruption(
+            (fragment.size() + (in_fragmented_record ? scratch->size() : 0)),
+            buf);
+        in_fragmented_record = false;
+        scratch->clear();
+        break;
+      }
+    }
+  }
+  return false;
+}
+
+uint64_t Reader::LastRecordOffset() { return last_record_offset_; }
+
+uint64_t Reader::LastRecordEnd() {
+  return end_of_buffer_offset_ - buffer_.size();
+}
+
+void Reader::UnmarkEOF() {
+  if (read_error_) {
+    return;
+  }
+  eof_ = false;
+  if (eof_offset_ == 0) {
+    return;
+  }
+  UnmarkEOFInternal();
+}
+
+void Reader::UnmarkEOFInternal() {
+  // If the EOF was in the middle of a block (a partial block was read) we have
+  // to read the rest of the block as ReadPhysicalRecord can only read full
+  // blocks and expects the file position indicator to be aligned to the start
+  // of a block.
+  //
+  //      consumed_bytes + buffer_size() + remaining == kBlockSize
+
+  size_t consumed_bytes = eof_offset_ - buffer_.size();
+  size_t remaining = kBlockSize - eof_offset_;
+
+  // backing_store_ is used to concatenate what is left in buffer_ and
+  // the remainder of the block. If buffer_ already uses backing_store_,
+  // we just append the new data.
+  if (buffer_.data() != backing_store_ + consumed_bytes) {
+    // Buffer_ does not use backing_store_ for storage.
+    // Copy what is left in buffer_ to backing_store.
+    memmove(backing_store_ + consumed_bytes, buffer_.data(), buffer_.size());
+  }
+
+  Slice read_buffer;
+  // TODO: rate limit log reader with approriate priority.
+  // TODO: avoid overcharging rate limiter:
+  // Note that the Read here might overcharge SequentialFileReader's internal
+  // rate limiter if priority is not IO_TOTAL, e.g., when there is not enough
+  // content left until EOF to read.
+  Status status =
+      file_->Read(remaining, &read_buffer, backing_store_ + eof_offset_,
+                  Env::IO_TOTAL /* rate_limiter_priority */);
+
+  size_t added = read_buffer.size();
+  end_of_buffer_offset_ += added;
+
+  if (!status.ok()) {
+    if (added > 0) {
+      ReportDrop(added, status);
+    }
+
+    read_error_ = true;
+    return;
+  }
+
+  if (read_buffer.data() != backing_store_ + eof_offset_) {
+    // Read did not write to backing_store_
+    memmove(backing_store_ + eof_offset_, read_buffer.data(),
+            read_buffer.size());
+  }
+
+  buffer_ = Slice(backing_store_ + consumed_bytes,
+                  eof_offset_ + added - consumed_bytes);
+
+  if (added < remaining) {
+    eof_ = true;
+    eof_offset_ += added;
+  } else {
+    eof_offset_ = 0;
+  }
+}
+
+void Reader::ReportCorruption(size_t bytes, const char* reason) {
+  ReportDrop(bytes, Status::Corruption(reason));
+}
+
+void Reader::ReportDrop(size_t bytes, const Status& reason) {
+  if (reporter_ != nullptr) {
+    reporter_->Corruption(bytes, reason);
+  }
+}
+
+bool Reader::ReadMore(size_t* drop_size, int* error) {
+  if (!eof_ && !read_error_) {
+    // Last read was a full read, so this is a trailer to skip
+    buffer_.clear();
+    // TODO: rate limit log reader with approriate priority.
+    // TODO: avoid overcharging rate limiter:
+    // Note that the Read here might overcharge SequentialFileReader's internal
+    // rate limiter if priority is not IO_TOTAL, e.g., when there is not enough
+    // content left until EOF to read.
+    Status status = file_->Read(kBlockSize, &buffer_, backing_store_,
+                                Env::IO_TOTAL /* rate_limiter_priority */);
+    TEST_SYNC_POINT_CALLBACK("LogReader::ReadMore:AfterReadFile", &status);
+    end_of_buffer_offset_ += buffer_.size();
+    if (!status.ok()) {
+      buffer_.clear();
+      ReportDrop(kBlockSize, status);
+      read_error_ = true;
+      *error = kEof;
+      return false;
+    } else if (buffer_.size() < static_cast<size_t>(kBlockSize)) {
+      eof_ = true;
+      eof_offset_ = buffer_.size();
+    }
+    return true;
+  } else {
+    // Note that if buffer_ is non-empty, we have a truncated header at the
+    //  end of the file, which can be caused by the writer crashing in the
+    //  middle of writing the header. Unless explicitly requested we don't
+    //  considering this an error, just report EOF.
+    if (buffer_.size()) {
+      *drop_size = buffer_.size();
+      buffer_.clear();
+      *error = kBadHeader;
+      return false;
+    }
+    buffer_.clear();
+    *error = kEof;
+    return false;
+  }
+}
+
+unsigned int Reader::ReadPhysicalRecord(Slice* result, size_t* drop_size,
+                                        uint64_t* fragment_checksum) {
+  while (true) {
+    // We need at least the minimum header size
+    if (buffer_.size() < static_cast<size_t>(kHeaderSize)) {
+      // the default value of r is meaningless because ReadMore will overwrite
+      // it if it returns false; in case it returns true, the return value will
+      // not be used anyway
+      int r = kEof;
+      if (!ReadMore(drop_size, &r)) {
+        return r;
+      }
+      continue;
+    }
+
+    // Parse the header
+    const char* header = buffer_.data();
+    const uint32_t a = static_cast<uint32_t>(header[4]) & 0xff;
+    const uint32_t b = static_cast<uint32_t>(header[5]) & 0xff;
+    const unsigned int type = header[6];
+    const uint32_t length = a | (b << 8);
+    int header_size = kHeaderSize;
+    if ((type >= kRecyclableFullType && type <= kRecyclableLastType) ||
+        type == kRecyclableUserDefinedTimestampSizeType) {
+      if (end_of_buffer_offset_ - buffer_.size() == 0) {
+        recycled_ = true;
+      }
+      header_size = kRecyclableHeaderSize;
+      // We need enough for the larger header
+      if (buffer_.size() < static_cast<size_t>(kRecyclableHeaderSize)) {
+        int r = kEof;
+        if (!ReadMore(drop_size, &r)) {
+          return r;
+        }
+        continue;
+      }
+      const uint32_t log_num = DecodeFixed32(header + 7);
+      if (log_num != log_number_) {
+        return kOldRecord;
+      }
+    }
+    if (header_size + length > buffer_.size()) {
+      assert(buffer_.size() >= static_cast<size_t>(header_size));
+      *drop_size = buffer_.size();
+      buffer_.clear();
+      // If the end of the read has been reached without seeing
+      // `header_size + length` bytes of payload, report a corruption. The
+      // higher layers can decide how to handle it based on the recovery mode,
+      // whether this occurred at EOF, whether this is the final WAL, etc.
+      return kBadRecordLen;
+    }
+
+    if (type == kZeroType && length == 0) {
+      // Skip zero length record without reporting any drops since
+      // such records are produced by the mmap based writing code in
+      // env_posix.cc that preallocates file regions.
+      // NOTE: this should never happen in DB written by new RocksDB versions,
+      // since we turn off mmap writes to manifest and log files
+      buffer_.clear();
+      return kBadRecord;
+    }
+
+    // Check crc
+    if (checksum_) {
+      uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header));
+      uint32_t actual_crc = crc32c::Value(header + 6, length + header_size - 6);
+      if (actual_crc != expected_crc) {
+        // Drop the rest of the buffer since "length" itself may have
+        // been corrupted and if we trust it, we could find some
+        // fragment of a real log record that just happens to look
+        // like a valid log record.
+        *drop_size = buffer_.size();
+        buffer_.clear();
+        return kBadRecordChecksum;
+      }
+    }
+
+    buffer_.remove_prefix(header_size + length);
+
+    if (!uncompress_ || type == kSetCompressionType ||
+        type == kUserDefinedTimestampSizeType ||
+        type == kRecyclableUserDefinedTimestampSizeType) {
+      *result = Slice(header + header_size, length);
+      return type;
+    } else {
+      // Uncompress compressed records
+      uncompressed_record_.clear();
+      if (fragment_checksum != nullptr) {
+        if (uncompress_hash_state_ == nullptr) {
+          uncompress_hash_state_ = XXH3_createState();
+        }
+        XXH3_64bits_reset(uncompress_hash_state_);
+      }
+
+      size_t uncompressed_size = 0;
+      int remaining = 0;
+      const char* input = header + header_size;
+      do {
+        remaining = uncompress_->Uncompress(
+            input, length, uncompressed_buffer_.get(), &uncompressed_size);
+        input = nullptr;
+        if (remaining < 0) {
+          buffer_.clear();
+          return kBadRecord;
+        }
+        if (uncompressed_size > 0) {
+          if (fragment_checksum != nullptr) {
+            XXH3_64bits_update(uncompress_hash_state_,
+                               uncompressed_buffer_.get(), uncompressed_size);
+          }
+          uncompressed_record_.append(uncompressed_buffer_.get(),
+                                      uncompressed_size);
+        }
+      } while (remaining > 0 || uncompressed_size == kBlockSize);
+
+      if (fragment_checksum != nullptr) {
+        // We can remove this check by updating hash_state_ directly,
+        // but that requires resetting hash_state_ for full and first types
+        // for edge cases like consecutive fist type records.
+        // Leaving the check as is since it is cleaner and can revert to the
+        // above approach if it causes performance impact.
+        *fragment_checksum = XXH3_64bits_digest(uncompress_hash_state_);
+        uint64_t actual_checksum = XXH3_64bits(uncompressed_record_.data(),
+                                               uncompressed_record_.size());
+        if (*fragment_checksum != actual_checksum) {
+          // uncompressed_record_ contains bad content that does not match
+          // actual decompressed content
+          return kBadRecord;
+        }
+      }
+      *result = Slice(uncompressed_record_);
+      return type;
+    }
+  }
+}
+
+// Initialize uncompress related fields
+void Reader::InitCompression(const CompressionTypeRecord& compression_record) {
+  compression_type_ = compression_record.GetCompressionType();
+  compression_type_record_read_ = true;
+  constexpr uint32_t compression_format_version = 2;
+  uncompress_ = StreamingUncompress::Create(
+      compression_type_, compression_format_version, kBlockSize);
+  assert(uncompress_ != nullptr);
+  uncompressed_buffer_ = std::unique_ptr<char[]>(new char[kBlockSize]);
+  assert(uncompressed_buffer_);
+}
+
+Status Reader::UpdateRecordedTimestampSize(
+    const std::vector<std::pair<uint32_t, size_t>>& cf_to_ts_sz) {
+  for (const auto& [cf, ts_sz] : cf_to_ts_sz) {
+    // Zero user-defined timestamp size are not recorded.
+    if (ts_sz == 0) {
+      return Status::Corruption(
+          "User-defined timestamp size record contains zero timestamp size.");
+    }
+    // The user-defined timestamp size record for a column family should not be
+    // updated in the same log file.
+    if (recorded_cf_to_ts_sz_.count(cf) != 0) {
+      return Status::Corruption(
+          "User-defined timestamp size record contains update to "
+          "recorded column family.");
+    }
+    recorded_cf_to_ts_sz_.insert(std::make_pair(cf, ts_sz));
+  }
+  return Status::OK();
+}
+
+bool FragmentBufferedReader::ReadRecord(Slice* record, std::string* scratch,
+                                        WALRecoveryMode /*unused*/,
+                                        uint64_t* /* checksum */) {
+  assert(record != nullptr);
+  assert(scratch != nullptr);
+  record->clear();
+  scratch->clear();
+  if (uncompress_) {
+    uncompress_->Reset();
+  }
+
+  uint64_t prospective_record_offset = 0;
+  uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size();
+  size_t drop_size = 0;
+  unsigned int fragment_type_or_err = 0;  // Initialize to make compiler happy
+  Slice fragment;
+  while (TryReadFragment(&fragment, &drop_size, &fragment_type_or_err)) {
+    switch (fragment_type_or_err) {
+      case kFullType:
+      case kRecyclableFullType:
+        if (in_fragmented_record_ && !fragments_.empty()) {
+          ReportCorruption(fragments_.size(), "partial record without end(1)");
+        }
+        fragments_.clear();
+        *record = fragment;
+        prospective_record_offset = physical_record_offset;
+        last_record_offset_ = prospective_record_offset;
+        first_record_read_ = true;
+        in_fragmented_record_ = false;
+        return true;
+
+      case kFirstType:
+      case kRecyclableFirstType:
+        if (in_fragmented_record_ || !fragments_.empty()) {
+          ReportCorruption(fragments_.size(), "partial record without end(2)");
+        }
+        prospective_record_offset = physical_record_offset;
+        fragments_.assign(fragment.data(), fragment.size());
+        in_fragmented_record_ = true;
+        break;
+
+      case kMiddleType:
+      case kRecyclableMiddleType:
+        if (!in_fragmented_record_) {
+          ReportCorruption(fragment.size(),
+                           "missing start of fragmented record(1)");
+        } else {
+          fragments_.append(fragment.data(), fragment.size());
+        }
+        break;
+
+      case kLastType:
+      case kRecyclableLastType:
+        if (!in_fragmented_record_) {
+          ReportCorruption(fragment.size(),
+                           "missing start of fragmented record(2)");
+        } else {
+          fragments_.append(fragment.data(), fragment.size());
+          scratch->assign(fragments_.data(), fragments_.size());
+          fragments_.clear();
+          *record = Slice(*scratch);
+          last_record_offset_ = prospective_record_offset;
+          first_record_read_ = true;
+          in_fragmented_record_ = false;
+          return true;
+        }
+        break;
+
+      case kSetCompressionType: {
+        if (compression_type_record_read_) {
+          ReportCorruption(fragment.size(),
+                           "read multiple SetCompressionType records");
+        }
+        if (first_record_read_) {
+          ReportCorruption(fragment.size(),
+                           "SetCompressionType not the first record");
+        }
+        fragments_.clear();
+        prospective_record_offset = physical_record_offset;
+        last_record_offset_ = prospective_record_offset;
+        in_fragmented_record_ = false;
+        CompressionTypeRecord compression_record(kNoCompression);
+        Status s = compression_record.DecodeFrom(&fragment);
+        if (!s.ok()) {
+          ReportCorruption(fragment.size(),
+                           "could not decode SetCompressionType record");
+        } else {
+          InitCompression(compression_record);
+        }
+        break;
+      }
+
+      case kUserDefinedTimestampSizeType:
+      case kRecyclableUserDefinedTimestampSizeType: {
+        if (in_fragmented_record_ && !scratch->empty()) {
+          ReportCorruption(
+              scratch->size(),
+              "user-defined timestamp size record interspersed partial record");
+        }
+        fragments_.clear();
+        prospective_record_offset = physical_record_offset;
+        last_record_offset_ = prospective_record_offset;
+        in_fragmented_record_ = false;
+        UserDefinedTimestampSizeRecord ts_record;
+        Status s = ts_record.DecodeFrom(&fragment);
+        if (!s.ok()) {
+          ReportCorruption(
+              fragment.size(),
+              "could not decode user-defined timestamp size record");
+        } else {
+          s = UpdateRecordedTimestampSize(
+              ts_record.GetUserDefinedTimestampSize());
+          if (!s.ok()) {
+            ReportCorruption(fragment.size(), s.getState());
+          }
+        }
+        break;
+      }
+
+      case kBadHeader:
+      case kBadRecord:
+      case kEof:
+      case kOldRecord:
+        if (in_fragmented_record_) {
+          ReportCorruption(fragments_.size(), "error in middle of record");
+          in_fragmented_record_ = false;
+          fragments_.clear();
+        }
+        break;
+
+      case kBadRecordChecksum:
+        if (recycled_) {
+          fragments_.clear();
+          return false;
+        }
+        ReportCorruption(drop_size, "checksum mismatch");
+        if (in_fragmented_record_) {
+          ReportCorruption(fragments_.size(), "error in middle of record");
+          in_fragmented_record_ = false;
+          fragments_.clear();
+        }
+        break;
+
+      default: {
+        char buf[40];
+        snprintf(buf, sizeof(buf), "unknown record type %u",
+                 fragment_type_or_err);
+        ReportCorruption(
+            fragment.size() + (in_fragmented_record_ ? fragments_.size() : 0),
+            buf);
+        in_fragmented_record_ = false;
+        fragments_.clear();
+        break;
+      }
+    }
+  }
+  return false;
+}
+
+void FragmentBufferedReader::UnmarkEOF() {
+  if (read_error_) {
+    return;
+  }
+  eof_ = false;
+  UnmarkEOFInternal();
+}
+
+bool FragmentBufferedReader::TryReadMore(size_t* drop_size, int* error) {
+  if (!eof_ && !read_error_) {
+    // Last read was a full read, so this is a trailer to skip
+    buffer_.clear();
+    // TODO: rate limit log reader with approriate priority.
+    // TODO: avoid overcharging rate limiter:
+    // Note that the Read here might overcharge SequentialFileReader's internal
+    // rate limiter if priority is not IO_TOTAL, e.g., when there is not enough
+    // content left until EOF to read.
+    Status status = file_->Read(kBlockSize, &buffer_, backing_store_,
+                                Env::IO_TOTAL /* rate_limiter_priority */);
+    end_of_buffer_offset_ += buffer_.size();
+    if (!status.ok()) {
+      buffer_.clear();
+      ReportDrop(kBlockSize, status);
+      read_error_ = true;
+      *error = kEof;
+      return false;
+    } else if (buffer_.size() < static_cast<size_t>(kBlockSize)) {
+      eof_ = true;
+      eof_offset_ = buffer_.size();
+      TEST_SYNC_POINT_CALLBACK(
+          "FragmentBufferedLogReader::TryReadMore:FirstEOF", nullptr);
+    }
+    return true;
+  } else if (!read_error_) {
+    UnmarkEOF();
+  }
+  if (!read_error_) {
+    return true;
+  }
+  *error = kEof;
+  *drop_size = buffer_.size();
+  if (buffer_.size() > 0) {
+    *error = kBadHeader;
+  }
+  buffer_.clear();
+  return false;
+}
+
+// return true if the caller should process the fragment_type_or_err.
+bool FragmentBufferedReader::TryReadFragment(
+    Slice* fragment, size_t* drop_size, unsigned int* fragment_type_or_err) {
+  assert(fragment != nullptr);
+  assert(drop_size != nullptr);
+  assert(fragment_type_or_err != nullptr);
+
+  while (buffer_.size() < static_cast<size_t>(kHeaderSize)) {
+    size_t old_size = buffer_.size();
+    int error = kEof;
+    if (!TryReadMore(drop_size, &error)) {
+      *fragment_type_or_err = error;
+      return false;
+    } else if (old_size == buffer_.size()) {
+      return false;
+    }
+  }
+  const char* header = buffer_.data();
+  const uint32_t a = static_cast<uint32_t>(header[4]) & 0xff;
+  const uint32_t b = static_cast<uint32_t>(header[5]) & 0xff;
+  const unsigned int type = header[6];
+  const uint32_t length = a | (b << 8);
+  int header_size = kHeaderSize;
+  if ((type >= kRecyclableFullType && type <= kRecyclableLastType) ||
+      type == kRecyclableUserDefinedTimestampSizeType) {
+    if (end_of_buffer_offset_ - buffer_.size() == 0) {
+      recycled_ = true;
+    }
+    header_size = kRecyclableHeaderSize;
+    while (buffer_.size() < static_cast<size_t>(kRecyclableHeaderSize)) {
+      size_t old_size = buffer_.size();
+      int error = kEof;
+      if (!TryReadMore(drop_size, &error)) {
+        *fragment_type_or_err = error;
+        return false;
+      } else if (old_size == buffer_.size()) {
+        return false;
+      }
+    }
+    const uint32_t log_num = DecodeFixed32(header + 7);
+    if (log_num != log_number_) {
+      *fragment_type_or_err = kOldRecord;
+      return true;
+    }
+  }
+
+  while (header_size + length > buffer_.size()) {
+    size_t old_size = buffer_.size();
+    int error = kEof;
+    if (!TryReadMore(drop_size, &error)) {
+      *fragment_type_or_err = error;
+      return false;
+    } else if (old_size == buffer_.size()) {
+      return false;
+    }
+  }
+
+  if (type == kZeroType && length == 0) {
+    buffer_.clear();
+    *fragment_type_or_err = kBadRecord;
+    return true;
+  }
+
+  if (checksum_) {
+    uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header));
+    uint32_t actual_crc = crc32c::Value(header + 6, length + header_size - 6);
+    if (actual_crc != expected_crc) {
+      *drop_size = buffer_.size();
+      buffer_.clear();
+      *fragment_type_or_err = kBadRecordChecksum;
+      return true;
+    }
+  }
+
+  buffer_.remove_prefix(header_size + length);
+
+  if (!uncompress_ || type == kSetCompressionType ||
+      type == kUserDefinedTimestampSizeType ||
+      type == kRecyclableUserDefinedTimestampSizeType) {
+    *fragment = Slice(header + header_size, length);
+    *fragment_type_or_err = type;
+    return true;
+  } else {
+    // Uncompress compressed records
+    uncompressed_record_.clear();
+    size_t uncompressed_size = 0;
+    int remaining = 0;
+    const char* input = header + header_size;
+    do {
+      remaining = uncompress_->Uncompress(
+          input, length, uncompressed_buffer_.get(), &uncompressed_size);
+      input = nullptr;
+      if (remaining < 0) {
+        buffer_.clear();
+        *fragment_type_or_err = kBadRecord;
+        return true;
+      }
+      if (uncompressed_size > 0) {
+        uncompressed_record_.append(uncompressed_buffer_.get(),
+                                    uncompressed_size);
+      }
+    } while (remaining > 0 || uncompressed_size == kBlockSize);
+    *fragment = Slice(std::move(uncompressed_record_));
+    *fragment_type_or_err = type;
+    return true;
+  }
+}
+
+}  // namespace log
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_reader.h
new file mode 100644
index 0000000000..b7b298e58a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_reader.h
@@ -0,0 +1,241 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <stdint.h>
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "db/log_format.h"
+#include "file/sequence_file_reader.h"
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "util/compression.h"
+#include "util/udt_util.h"
+#include "util/xxhash.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Logger;
+
+namespace log {
+
+/**
+ * Reader is a general purpose log stream reader implementation. The actual job
+ * of reading from the device is implemented by the SequentialFile interface.
+ *
+ * Please see Writer for details on the file and record layout.
+ */
+class Reader {
+ public:
+  // Interface for reporting errors.
+  class Reporter {
+   public:
+    virtual ~Reporter();
+
+    // Some corruption was detected.  "size" is the approximate number
+    // of bytes dropped due to the corruption.
+    virtual void Corruption(size_t bytes, const Status& status) = 0;
+  };
+
+  // Create a reader that will return log records from "*file".
+  // "*file" must remain live while this Reader is in use.
+  //
+  // If "reporter" is non-nullptr, it is notified whenever some data is
+  // dropped due to a detected corruption.  "*reporter" must remain
+  // live while this Reader is in use.
+  //
+  // If "checksum" is true, verify checksums if available.
+  Reader(std::shared_ptr<Logger> info_log,
+         std::unique_ptr<SequentialFileReader>&& file, Reporter* reporter,
+         bool checksum, uint64_t log_num);
+  // No copying allowed
+  Reader(const Reader&) = delete;
+  void operator=(const Reader&) = delete;
+
+  virtual ~Reader();
+
+  // Read the next record into *record.  Returns true if read
+  // successfully, false if we hit end of the input.  May use
+  // "*scratch" as temporary storage. The contents filled in *record
+  // will only be valid until the next mutating operation on this
+  // reader or the next mutation to *scratch.
+  // If record_checksum is not nullptr, then this function will calculate the
+  // checksum of the record read and set record_checksum to it. The checksum is
+  // calculated from the original buffers that contain the contents of the
+  // record.
+  virtual bool ReadRecord(Slice* record, std::string* scratch,
+                          WALRecoveryMode wal_recovery_mode =
+                              WALRecoveryMode::kTolerateCorruptedTailRecords,
+                          uint64_t* record_checksum = nullptr);
+
+  // Return the recorded user-defined timestamp size that have been read so
+  // far. This only applies to WAL logs.
+  const std::unordered_map<uint32_t, size_t>& GetRecordedTimestampSize() const {
+    return recorded_cf_to_ts_sz_;
+  }
+
+  // Returns the physical offset of the last record returned by ReadRecord.
+  //
+  // Undefined before the first call to ReadRecord.
+  uint64_t LastRecordOffset();
+
+  // Returns the first physical offset after the last record returned by
+  // ReadRecord, or zero before first call to ReadRecord. This can also be
+  // thought of as the "current" position in processing the file bytes.
+  uint64_t LastRecordEnd();
+
+  // returns true if the reader has encountered an eof condition.
+  bool IsEOF() { return eof_; }
+
+  // returns true if the reader has encountered read error.
+  bool hasReadError() const { return read_error_; }
+
+  // when we know more data has been written to the file. we can use this
+  // function to force the reader to look again in the file.
+  // Also aligns the file position indicator to the start of the next block
+  // by reading the rest of the data from the EOF position to the end of the
+  // block that was partially read.
+  virtual void UnmarkEOF();
+
+  SequentialFileReader* file() { return file_.get(); }
+
+  Reporter* GetReporter() const { return reporter_; }
+
+  uint64_t GetLogNumber() const { return log_number_; }
+
+  size_t GetReadOffset() const {
+    return static_cast<size_t>(end_of_buffer_offset_);
+  }
+
+  bool IsCompressedAndEmptyFile() {
+    return !first_record_read_ && compression_type_record_read_;
+  }
+
+ protected:
+  std::shared_ptr<Logger> info_log_;
+  const std::unique_ptr<SequentialFileReader> file_;
+  Reporter* const reporter_;
+  bool const checksum_;
+  char* const backing_store_;
+
+  // Internal state variables used for reading records
+  Slice buffer_;
+  bool eof_;         // Last Read() indicated EOF by returning < kBlockSize
+  bool read_error_;  // Error occurred while reading from file
+
+  // Offset of the file position indicator within the last block when an
+  // EOF was detected.
+  size_t eof_offset_;
+
+  // Offset of the last record returned by ReadRecord.
+  uint64_t last_record_offset_;
+  // Offset of the first location past the end of buffer_.
+  uint64_t end_of_buffer_offset_;
+
+  // which log number this is
+  uint64_t const log_number_;
+
+  // Whether this is a recycled log file
+  bool recycled_;
+
+  // Whether the first record has been read or not.
+  bool first_record_read_;
+  // Type of compression used
+  CompressionType compression_type_;
+  // Track whether the compression type record has been read or not.
+  bool compression_type_record_read_;
+  StreamingUncompress* uncompress_;
+  // Reusable uncompressed output buffer
+  std::unique_ptr<char[]> uncompressed_buffer_;
+  // Reusable uncompressed record
+  std::string uncompressed_record_;
+  // Used for stream hashing fragment content in ReadRecord()
+  XXH3_state_t* hash_state_;
+  // Used for stream hashing uncompressed buffer in ReadPhysicalRecord()
+  XXH3_state_t* uncompress_hash_state_;
+
+  // The recorded user-defined timestamp sizes that have been read so far. This
+  // is only for WAL logs.
+  std::unordered_map<uint32_t, size_t> recorded_cf_to_ts_sz_;
+
+  // Extend record types with the following special values
+  enum {
+    kEof = kMaxRecordType + 1,
+    // Returned whenever we find an invalid physical record.
+    // Currently there are three situations in which this happens:
+    // * The record has an invalid CRC (ReadPhysicalRecord reports a drop)
+    // * The record is a 0-length record (No drop is reported)
+    kBadRecord = kMaxRecordType + 2,
+    // Returned when we fail to read a valid header.
+    kBadHeader = kMaxRecordType + 3,
+    // Returned when we read an old record from a previous user of the log.
+    kOldRecord = kMaxRecordType + 4,
+    // Returned when we get a bad record length
+    kBadRecordLen = kMaxRecordType + 5,
+    // Returned when we get a bad record checksum
+    kBadRecordChecksum = kMaxRecordType + 6,
+  };
+
+  // Return type, or one of the preceding special values
+  // If WAL compressioned is enabled, fragment_checksum is the checksum of the
+  // fragment computed from the orginal buffer containinng uncompressed
+  // fragment.
+  unsigned int ReadPhysicalRecord(Slice* result, size_t* drop_size,
+                                  uint64_t* fragment_checksum = nullptr);
+
+  // Read some more
+  bool ReadMore(size_t* drop_size, int* error);
+
+  void UnmarkEOFInternal();
+
+  // Reports dropped bytes to the reporter.
+  // buffer_ must be updated to remove the dropped bytes prior to invocation.
+  void ReportCorruption(size_t bytes, const char* reason);
+  void ReportDrop(size_t bytes, const Status& reason);
+
+  void InitCompression(const CompressionTypeRecord& compression_record);
+
+  Status UpdateRecordedTimestampSize(
+      const std::vector<std::pair<uint32_t, size_t>>& cf_to_ts_sz);
+};
+
+class FragmentBufferedReader : public Reader {
+ public:
+  FragmentBufferedReader(std::shared_ptr<Logger> info_log,
+                         std::unique_ptr<SequentialFileReader>&& _file,
+                         Reporter* reporter, bool checksum, uint64_t log_num)
+      : Reader(info_log, std::move(_file), reporter, checksum, log_num),
+        fragments_(),
+        in_fragmented_record_(false) {}
+  ~FragmentBufferedReader() override {}
+  bool ReadRecord(Slice* record, std::string* scratch,
+                  WALRecoveryMode wal_recovery_mode =
+                      WALRecoveryMode::kTolerateCorruptedTailRecords,
+                  uint64_t* record_checksum = nullptr) override;
+  void UnmarkEOF() override;
+
+ private:
+  std::string fragments_;
+  bool in_fragmented_record_;
+
+  bool TryReadFragment(Slice* result, size_t* drop_size,
+                       unsigned int* fragment_type_or_err);
+
+  bool TryReadMore(size_t* drop_size, int* error);
+
+  // No copy allowed
+  FragmentBufferedReader(const FragmentBufferedReader&);
+  void operator=(const FragmentBufferedReader&);
+};
+
+}  // namespace log
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_writer.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_writer.cc
new file mode 100644
index 0000000000..1ddf0a26c8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_writer.cc
@@ -0,0 +1,279 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/log_writer.h"
+
+#include <stdint.h>
+
+#include "file/writable_file_writer.h"
+#include "rocksdb/env.h"
+#include "rocksdb/io_status.h"
+#include "util/coding.h"
+#include "util/crc32c.h"
+#include "util/udt_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace log {
+
+Writer::Writer(std::unique_ptr<WritableFileWriter>&& dest, uint64_t log_number,
+               bool recycle_log_files, bool manual_flush,
+               CompressionType compression_type)
+    : dest_(std::move(dest)),
+      block_offset_(0),
+      log_number_(log_number),
+      recycle_log_files_(recycle_log_files),
+      manual_flush_(manual_flush),
+      compression_type_(compression_type),
+      compress_(nullptr) {
+  for (int i = 0; i <= kMaxRecordType; i++) {
+    char t = static_cast<char>(i);
+    type_crc_[i] = crc32c::Value(&t, 1);
+  }
+}
+
+Writer::~Writer() {
+  if (dest_) {
+    WriteBuffer().PermitUncheckedError();
+  }
+  if (compress_) {
+    delete compress_;
+  }
+}
+
+IOStatus Writer::WriteBuffer() {
+  if (dest_->seen_error()) {
+    return IOStatus::IOError("Seen error. Skip writing buffer.");
+  }
+  return dest_->Flush();
+}
+
+IOStatus Writer::Close() {
+  IOStatus s;
+  if (dest_) {
+    s = dest_->Close();
+    dest_.reset();
+  }
+  return s;
+}
+
+IOStatus Writer::AddRecord(const Slice& slice,
+                           Env::IOPriority rate_limiter_priority) {
+  const char* ptr = slice.data();
+  size_t left = slice.size();
+
+  // Header size varies depending on whether we are recycling or not.
+  const int header_size =
+      recycle_log_files_ ? kRecyclableHeaderSize : kHeaderSize;
+
+  // Fragment the record if necessary and emit it.  Note that if slice
+  // is empty, we still want to iterate once to emit a single
+  // zero-length record
+  bool begin = true;
+  int compress_remaining = 0;
+  bool compress_start = false;
+  if (compress_) {
+    compress_->Reset();
+    compress_start = true;
+  }
+
+  IOStatus s;
+  do {
+    const int64_t leftover = kBlockSize - block_offset_;
+    assert(leftover >= 0);
+    if (leftover < header_size) {
+      // Switch to a new block
+      if (leftover > 0) {
+        // Fill the trailer (literal below relies on kHeaderSize and
+        // kRecyclableHeaderSize being <= 11)
+        assert(header_size <= 11);
+        s = dest_->Append(Slice("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+                                static_cast<size_t>(leftover)),
+                          0 /* crc32c_checksum */, rate_limiter_priority);
+        if (!s.ok()) {
+          break;
+        }
+      }
+      block_offset_ = 0;
+    }
+
+    // Invariant: we never leave < header_size bytes in a block.
+    assert(static_cast<int64_t>(kBlockSize - block_offset_) >= header_size);
+
+    const size_t avail = kBlockSize - block_offset_ - header_size;
+
+    // Compress the record if compression is enabled.
+    // Compress() is called at least once (compress_start=true) and after the
+    // previous generated compressed chunk is written out as one or more
+    // physical records (left=0).
+    if (compress_ && (compress_start || left == 0)) {
+      compress_remaining = compress_->Compress(slice.data(), slice.size(),
+                                               compressed_buffer_.get(), &left);
+
+      if (compress_remaining < 0) {
+        // Set failure status
+        s = IOStatus::IOError("Unexpected WAL compression error");
+        s.SetDataLoss(true);
+        break;
+      } else if (left == 0) {
+        // Nothing left to compress
+        if (!compress_start) {
+          break;
+        }
+      }
+      compress_start = false;
+      ptr = compressed_buffer_.get();
+    }
+
+    const size_t fragment_length = (left < avail) ? left : avail;
+
+    RecordType type;
+    const bool end = (left == fragment_length && compress_remaining == 0);
+    if (begin && end) {
+      type = recycle_log_files_ ? kRecyclableFullType : kFullType;
+    } else if (begin) {
+      type = recycle_log_files_ ? kRecyclableFirstType : kFirstType;
+    } else if (end) {
+      type = recycle_log_files_ ? kRecyclableLastType : kLastType;
+    } else {
+      type = recycle_log_files_ ? kRecyclableMiddleType : kMiddleType;
+    }
+
+    s = EmitPhysicalRecord(type, ptr, fragment_length, rate_limiter_priority);
+    ptr += fragment_length;
+    left -= fragment_length;
+    begin = false;
+  } while (s.ok() && (left > 0 || compress_remaining > 0));
+
+  if (s.ok()) {
+    if (!manual_flush_) {
+      s = dest_->Flush(rate_limiter_priority);
+    }
+  }
+
+  return s;
+}
+
+IOStatus Writer::AddCompressionTypeRecord() {
+  // Should be the first record
+  assert(block_offset_ == 0);
+
+  if (compression_type_ == kNoCompression) {
+    // No need to add a record
+    return IOStatus::OK();
+  }
+
+  CompressionTypeRecord record(compression_type_);
+  std::string encode;
+  record.EncodeTo(&encode);
+  IOStatus s =
+      EmitPhysicalRecord(kSetCompressionType, encode.data(), encode.size());
+  if (s.ok()) {
+    if (!manual_flush_) {
+      s = dest_->Flush();
+    }
+    // Initialize fields required for compression
+    const size_t max_output_buffer_len =
+        kBlockSize - (recycle_log_files_ ? kRecyclableHeaderSize : kHeaderSize);
+    CompressionOptions opts;
+    constexpr uint32_t compression_format_version = 2;
+    compress_ = StreamingCompress::Create(compression_type_, opts,
+                                          compression_format_version,
+                                          max_output_buffer_len);
+    assert(compress_ != nullptr);
+    compressed_buffer_ =
+        std::unique_ptr<char[]>(new char[max_output_buffer_len]);
+    assert(compressed_buffer_);
+  } else {
+    // Disable compression if the record could not be added.
+    compression_type_ = kNoCompression;
+  }
+  return s;
+}
+
+IOStatus Writer::MaybeAddUserDefinedTimestampSizeRecord(
+    const std::unordered_map<uint32_t, size_t>& cf_to_ts_sz,
+    Env::IOPriority rate_limiter_priority) {
+  std::vector<std::pair<uint32_t, size_t>> ts_sz_to_record;
+  for (const auto& [cf_id, ts_sz] : cf_to_ts_sz) {
+    if (recorded_cf_to_ts_sz_.count(cf_id) != 0) {
+      // A column family's user-defined timestamp size should not be
+      // updated while DB is running.
+      assert(recorded_cf_to_ts_sz_[cf_id] == ts_sz);
+    } else if (ts_sz != 0) {
+      ts_sz_to_record.emplace_back(cf_id, ts_sz);
+      recorded_cf_to_ts_sz_.insert(std::make_pair(cf_id, ts_sz));
+    }
+  }
+  if (ts_sz_to_record.empty()) {
+    return IOStatus::OK();
+  }
+
+  UserDefinedTimestampSizeRecord record(std::move(ts_sz_to_record));
+  std::string encoded;
+  record.EncodeTo(&encoded);
+  RecordType type = recycle_log_files_ ? kRecyclableUserDefinedTimestampSizeType
+                                       : kUserDefinedTimestampSizeType;
+  return EmitPhysicalRecord(type, encoded.data(), encoded.size(),
+                            rate_limiter_priority);
+}
+
+bool Writer::BufferIsEmpty() { return dest_->BufferIsEmpty(); }
+
+IOStatus Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n,
+                                    Env::IOPriority rate_limiter_priority) {
+  assert(n <= 0xffff);  // Must fit in two bytes
+
+  size_t header_size;
+  char buf[kRecyclableHeaderSize];
+
+  // Format the header
+  buf[4] = static_cast<char>(n & 0xff);
+  buf[5] = static_cast<char>(n >> 8);
+  buf[6] = static_cast<char>(t);
+
+  uint32_t crc = type_crc_[t];
+  if (t < kRecyclableFullType || t == kSetCompressionType ||
+      t == kUserDefinedTimestampSizeType) {
+    // Legacy record format
+    assert(block_offset_ + kHeaderSize + n <= kBlockSize);
+    header_size = kHeaderSize;
+  } else {
+    // Recyclable record format
+    assert(block_offset_ + kRecyclableHeaderSize + n <= kBlockSize);
+    header_size = kRecyclableHeaderSize;
+
+    // Only encode low 32-bits of the 64-bit log number.  This means
+    // we will fail to detect an old record if we recycled a log from
+    // ~4 billion logs ago, but that is effectively impossible, and
+    // even if it were we'dbe far more likely to see a false positive
+    // on the 32-bit CRC.
+    EncodeFixed32(buf + 7, static_cast<uint32_t>(log_number_));
+    crc = crc32c::Extend(crc, buf + 7, 4);
+  }
+
+  // Compute the crc of the record type and the payload.
+  uint32_t payload_crc = crc32c::Value(ptr, n);
+  crc = crc32c::Crc32cCombine(crc, payload_crc, n);
+  crc = crc32c::Mask(crc);  // Adjust for storage
+  TEST_SYNC_POINT_CALLBACK("LogWriter::EmitPhysicalRecord:BeforeEncodeChecksum",
+                           &crc);
+  EncodeFixed32(buf, crc);
+
+  // Write the header and the payload
+  IOStatus s = dest_->Append(Slice(buf, header_size), 0 /* crc32c_checksum */,
+                             rate_limiter_priority);
+  if (s.ok()) {
+    s = dest_->Append(Slice(ptr, n), payload_crc, rate_limiter_priority);
+  }
+  block_offset_ += header_size + n;
+  return s;
+}
+
+}  // namespace log
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_writer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_writer.h
new file mode 100644
index 0000000000..75e44d1a4e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/log_writer.h
@@ -0,0 +1,144 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "db/log_format.h"
+#include "rocksdb/compression_type.h"
+#include "rocksdb/env.h"
+#include "rocksdb/io_status.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "util/compression.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class WritableFileWriter;
+
+namespace log {
+
+/**
+ * Writer is a general purpose log stream writer. It provides an append-only
+ * abstraction for writing data. The details of the how the data is written is
+ * handled by the WritableFile sub-class implementation.
+ *
+ * File format:
+ *
+ * File is broken down into variable sized records. The format of each record
+ * is described below.
+ *       +-----+-------------+--+----+----------+------+-- ... ----+
+ * File  | r0  |        r1   |P | r2 |    r3    |  r4  |           |
+ *       +-----+-------------+--+----+----------+------+-- ... ----+
+ *       <--- kBlockSize ------>|<-- kBlockSize ------>|
+ *  rn = variable size records
+ *  P = Padding
+ *
+ * Data is written out in kBlockSize chunks. If next record does not fit
+ * into the space left, the leftover space will be padded with \0.
+ *
+ * Legacy record format:
+ *
+ * +---------+-----------+-----------+--- ... ---+
+ * |CRC (4B) | Size (2B) | Type (1B) | Payload   |
+ * +---------+-----------+-----------+--- ... ---+
+ *
+ * CRC = 32bit hash computed over the record type and payload using CRC
+ * Size = Length of the payload data
+ * Type = Type of record
+ *        (kZeroType, kFullType, kFirstType, kLastType, kMiddleType )
+ *        The type is used to group a bunch of records together to represent
+ *        blocks that are larger than kBlockSize
+ * Payload = Byte stream as long as specified by the payload size
+ *
+ * Recyclable record format:
+ *
+ * +---------+-----------+-----------+----------------+--- ... ---+
+ * |CRC (4B) | Size (2B) | Type (1B) | Log number (4B)| Payload   |
+ * +---------+-----------+-----------+----------------+--- ... ---+
+ *
+ * Same as above, with the addition of
+ * Log number = 32bit log file number, so that we can distinguish between
+ * records written by the most recent log writer vs a previous one.
+ */
+class Writer {
+ public:
+  // Create a writer that will append data to "*dest".
+  // "*dest" must be initially empty.
+  // "*dest" must remain live while this Writer is in use.
+  explicit Writer(std::unique_ptr<WritableFileWriter>&& dest,
+                  uint64_t log_number, bool recycle_log_files,
+                  bool manual_flush = false,
+                  CompressionType compressionType = kNoCompression);
+  // No copying allowed
+  Writer(const Writer&) = delete;
+  void operator=(const Writer&) = delete;
+
+  ~Writer();
+
+  IOStatus AddRecord(const Slice& slice,
+                     Env::IOPriority rate_limiter_priority = Env::IO_TOTAL);
+  IOStatus AddCompressionTypeRecord();
+
+  // If there are column families in `cf_to_ts_sz` not included in
+  // `recorded_cf_to_ts_sz_` and its user-defined timestamp size is non-zero,
+  // adds a record of type kUserDefinedTimestampSizeType or
+  // kRecyclableUserDefinedTimestampSizeType for these column families.
+  // This timestamp size record applies to all subsequent records.
+  IOStatus MaybeAddUserDefinedTimestampSizeRecord(
+      const std::unordered_map<uint32_t, size_t>& cf_to_ts_sz,
+      Env::IOPriority rate_limiter_priority = Env::IO_TOTAL);
+
+  WritableFileWriter* file() { return dest_.get(); }
+  const WritableFileWriter* file() const { return dest_.get(); }
+
+  uint64_t get_log_number() const { return log_number_; }
+
+  IOStatus WriteBuffer();
+
+  IOStatus Close();
+
+  bool BufferIsEmpty();
+
+ private:
+  std::unique_ptr<WritableFileWriter> dest_;
+  size_t block_offset_;  // Current offset in block
+  uint64_t log_number_;
+  bool recycle_log_files_;
+
+  // crc32c values for all supported record types.  These are
+  // pre-computed to reduce the overhead of computing the crc of the
+  // record type stored in the header.
+  uint32_t type_crc_[kMaxRecordType + 1];
+
+  IOStatus EmitPhysicalRecord(
+      RecordType type, const char* ptr, size_t length,
+      Env::IOPriority rate_limiter_priority = Env::IO_TOTAL);
+
+  // If true, it does not flush after each write. Instead it relies on the upper
+  // layer to manually does the flush by calling ::WriteBuffer()
+  bool manual_flush_;
+
+  // Compression Type
+  CompressionType compression_type_;
+  StreamingCompress* compress_;
+  // Reusable compressed output buffer
+  std::unique_ptr<char[]> compressed_buffer_;
+
+  // The recorded user-defined timestamp size that have been written so far.
+  // Since the user-defined timestamp size cannot be changed while the DB is
+  // running, existing entry in this map cannot be updated.
+  std::unordered_map<uint32_t, size_t> recorded_cf_to_ts_sz_;
+};
+
+}  // namespace log
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/logs_with_prep_tracker.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/logs_with_prep_tracker.cc
new file mode 100644
index 0000000000..ff98155c45
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/logs_with_prep_tracker.cc
@@ -0,0 +1,67 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include "db/logs_with_prep_tracker.h"
+
+#include "port/likely.h"
+
+namespace ROCKSDB_NAMESPACE {
+void LogsWithPrepTracker::MarkLogAsHavingPrepSectionFlushed(uint64_t log) {
+  assert(log != 0);
+  std::lock_guard<std::mutex> lock(prepared_section_completed_mutex_);
+  auto it = prepared_section_completed_.find(log);
+  if (UNLIKELY(it == prepared_section_completed_.end())) {
+    prepared_section_completed_[log] = 1;
+  } else {
+    it->second += 1;
+  }
+}
+
+void LogsWithPrepTracker::MarkLogAsContainingPrepSection(uint64_t log) {
+  assert(log != 0);
+  std::lock_guard<std::mutex> lock(logs_with_prep_mutex_);
+
+  auto rit = logs_with_prep_.rbegin();
+  bool updated = false;
+  // Most probably the last log is the one that is being marked for
+  // having a prepare section; so search from the end.
+  for (; rit != logs_with_prep_.rend() && rit->log >= log; ++rit) {
+    if (rit->log == log) {
+      rit->cnt++;
+      updated = true;
+      break;
+    }
+  }
+  if (!updated) {
+    // We are either at the start, or at a position with rit->log < log
+    logs_with_prep_.insert(rit.base(), {log, 1});
+  }
+}
+
+uint64_t LogsWithPrepTracker::FindMinLogContainingOutstandingPrep() {
+  std::lock_guard<std::mutex> lock(logs_with_prep_mutex_);
+  auto it = logs_with_prep_.begin();
+  // start with the smallest log
+  for (; it != logs_with_prep_.end();) {
+    auto min_log = it->log;
+    {
+      std::lock_guard<std::mutex> lock2(prepared_section_completed_mutex_);
+      auto completed_it = prepared_section_completed_.find(min_log);
+      if (completed_it == prepared_section_completed_.end() ||
+          completed_it->second < it->cnt) {
+        return min_log;
+      }
+      assert(completed_it != prepared_section_completed_.end() &&
+             completed_it->second == it->cnt);
+      prepared_section_completed_.erase(completed_it);
+    }
+    // erase from beginning in vector is not efficient but this function is not
+    // on the fast path.
+    it = logs_with_prep_.erase(it);
+  }
+  // no such log found
+  return 0;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/logs_with_prep_tracker.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/logs_with_prep_tracker.h
new file mode 100644
index 0000000000..f72f0ca078
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/logs_with_prep_tracker.h
@@ -0,0 +1,62 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <mutex>
+#include <unordered_map>
+#include <vector>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// This class is used to track the log files with outstanding prepare entries.
+class LogsWithPrepTracker {
+ public:
+  // Called when a transaction prepared in `log` has been committed or aborted.
+  void MarkLogAsHavingPrepSectionFlushed(uint64_t log);
+  // Called when a transaction is prepared in `log`.
+  void MarkLogAsContainingPrepSection(uint64_t log);
+  // Return the earliest log file with outstanding prepare entries.
+  uint64_t FindMinLogContainingOutstandingPrep();
+  size_t TEST_PreparedSectionCompletedSize() {
+    return prepared_section_completed_.size();
+  }
+  size_t TEST_LogsWithPrepSize() { return logs_with_prep_.size(); }
+
+ private:
+  // REQUIRES: logs_with_prep_mutex_ held
+  //
+  // sorted list of log numbers still containing prepared data.
+  // this is used by FindObsoleteFiles to determine which
+  // flushed logs we must keep around because they still
+  // contain prepared data which has not been committed or rolled back
+  struct LogCnt {
+    uint64_t log;  // the log number
+    uint64_t cnt;  // number of prepared sections in the log
+  };
+  std::vector<LogCnt> logs_with_prep_;
+  std::mutex logs_with_prep_mutex_;
+
+  // REQUIRES: prepared_section_completed_mutex_ held
+  //
+  // to be used in conjunction with logs_with_prep_.
+  // once a transaction with data in log L is committed or rolled back
+  // rather than updating logs_with_prep_ directly we keep track of that
+  // in prepared_section_completed_ which maps LOG -> instance_count. This helps
+  // avoiding contention between a commit thread and the prepare threads.
+  //
+  // when trying to determine the minimum log still active we first
+  // consult logs_with_prep_. while that root value maps to
+  // an equal value in prepared_section_completed_ we erase the log from
+  // both logs_with_prep_ and prepared_section_completed_.
+  std::unordered_map<uint64_t, uint64_t> prepared_section_completed_;
+  std::mutex prepared_section_completed_mutex_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/lookup_key.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/lookup_key.h
new file mode 100644
index 0000000000..68851bddd1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/lookup_key.h
@@ -0,0 +1,68 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <string>
+#include <utility>
+
+#include "rocksdb/slice.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A helper class useful for DBImpl::Get()
+class LookupKey {
+ public:
+  // Initialize *this for looking up user_key at a snapshot with
+  // the specified sequence number.
+  LookupKey(const Slice& _user_key, SequenceNumber sequence,
+            const Slice* ts = nullptr);
+
+  ~LookupKey();
+
+  // Return a key suitable for lookup in a MemTable.
+  Slice memtable_key() const {
+    return Slice(start_, static_cast<size_t>(end_ - start_));
+  }
+
+  // Return an internal key (suitable for passing to an internal iterator)
+  Slice internal_key() const {
+    return Slice(kstart_, static_cast<size_t>(end_ - kstart_));
+  }
+
+  // Return the user key.
+  // If user-defined timestamp is enabled, then timestamp is included in the
+  // result.
+  Slice user_key() const {
+    return Slice(kstart_, static_cast<size_t>(end_ - kstart_ - 8));
+  }
+
+ private:
+  // We construct a char array of the form:
+  //    klength  varint32               <-- start_
+  //    userkey  char[klength]          <-- kstart_
+  //    tag      uint64
+  //                                    <-- end_
+  // The array is a suitable MemTable key.
+  // The suffix starting with "userkey" can be used as an InternalKey.
+  const char* start_;
+  const char* kstart_;
+  const char* end_;
+  char space_[200];  // Avoid allocation for short keys
+
+  // No copying allowed
+  LookupKey(const LookupKey&);
+  void operator=(const LookupKey&);
+};
+
+inline LookupKey::~LookupKey() {
+  if (start_ != space_) delete[] start_;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/malloc_stats.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/malloc_stats.cc
new file mode 100644
index 0000000000..641e01f9a3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/malloc_stats.cc
@@ -0,0 +1,53 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/malloc_stats.h"
+
+#include <string.h>
+
+#include <memory>
+
+#include "port/jemalloc_helper.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+#ifdef ROCKSDB_JEMALLOC
+
+struct MallocStatus {
+  char* cur;
+  char* end;
+};
+
+static void GetJemallocStatus(void* mstat_arg, const char* status) {
+  MallocStatus* mstat = reinterpret_cast<MallocStatus*>(mstat_arg);
+  size_t status_len = status ? strlen(status) : 0;
+  size_t buf_size = (size_t)(mstat->end - mstat->cur);
+  if (!status_len || status_len > buf_size) {
+    return;
+  }
+
+  snprintf(mstat->cur, buf_size, "%s", status);
+  mstat->cur += status_len;
+}
+void DumpMallocStats(std::string* stats) {
+  if (!HasJemalloc()) {
+    return;
+  }
+  MallocStatus mstat;
+  const unsigned int kMallocStatusLen = 1000000;
+  std::unique_ptr<char[]> buf{new char[kMallocStatusLen + 1]};
+  mstat.cur = buf.get();
+  mstat.end = buf.get() + kMallocStatusLen;
+  malloc_stats_print(GetJemallocStatus, &mstat, "");
+  stats->append(buf.get());
+}
+#else
+void DumpMallocStats(std::string*) {}
+#endif  // ROCKSDB_JEMALLOC
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/malloc_stats.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/malloc_stats.h
new file mode 100644
index 0000000000..1cca8a9522
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/malloc_stats.h
@@ -0,0 +1,22 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+
+#include <string>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void DumpMallocStats(std::string*);
+
+}
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable.cc
new file mode 100644
index 0000000000..3801b67a9b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable.cc
@@ -0,0 +1,1675 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/memtable.h"
+
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <memory>
+
+#include "db/dbformat.h"
+#include "db/kv_checksum.h"
+#include "db/merge_context.h"
+#include "db/merge_helper.h"
+#include "db/pinned_iterators_manager.h"
+#include "db/range_tombstone_fragmenter.h"
+#include "db/read_callback.h"
+#include "db/wide/wide_column_serialization.h"
+#include "logging/logging.h"
+#include "memory/arena.h"
+#include "memory/memory_usage.h"
+#include "monitoring/perf_context_imp.h"
+#include "monitoring/statistics_impl.h"
+#include "port/lang.h"
+#include "port/port.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/types.h"
+#include "rocksdb/write_buffer_manager.h"
+#include "table/internal_iterator.h"
+#include "table/iterator_wrapper.h"
+#include "table/merging_iterator.h"
+#include "util/autovector.h"
+#include "util/coding.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+ImmutableMemTableOptions::ImmutableMemTableOptions(
+    const ImmutableOptions& ioptions,
+    const MutableCFOptions& mutable_cf_options)
+    : arena_block_size(mutable_cf_options.arena_block_size),
+      memtable_prefix_bloom_bits(
+          static_cast<uint32_t>(
+              static_cast<double>(mutable_cf_options.write_buffer_size) *
+              mutable_cf_options.memtable_prefix_bloom_size_ratio) *
+          8u),
+      memtable_huge_page_size(mutable_cf_options.memtable_huge_page_size),
+      memtable_whole_key_filtering(
+          mutable_cf_options.memtable_whole_key_filtering),
+      inplace_update_support(ioptions.inplace_update_support),
+      inplace_update_num_locks(mutable_cf_options.inplace_update_num_locks),
+      inplace_callback(ioptions.inplace_callback),
+      max_successive_merges(mutable_cf_options.max_successive_merges),
+      statistics(ioptions.stats),
+      merge_operator(ioptions.merge_operator.get()),
+      info_log(ioptions.logger),
+      allow_data_in_errors(ioptions.allow_data_in_errors),
+      protection_bytes_per_key(
+          mutable_cf_options.memtable_protection_bytes_per_key) {}
+
+MemTable::MemTable(const InternalKeyComparator& cmp,
+                   const ImmutableOptions& ioptions,
+                   const MutableCFOptions& mutable_cf_options,
+                   WriteBufferManager* write_buffer_manager,
+                   SequenceNumber latest_seq, uint32_t column_family_id)
+    : comparator_(cmp),
+      moptions_(ioptions, mutable_cf_options),
+      refs_(0),
+      kArenaBlockSize(Arena::OptimizeBlockSize(moptions_.arena_block_size)),
+      mem_tracker_(write_buffer_manager),
+      arena_(moptions_.arena_block_size,
+             (write_buffer_manager != nullptr &&
+              (write_buffer_manager->enabled() ||
+               write_buffer_manager->cost_to_cache()))
+                 ? &mem_tracker_
+                 : nullptr,
+             mutable_cf_options.memtable_huge_page_size),
+      table_(ioptions.memtable_factory->CreateMemTableRep(
+          comparator_, &arena_, mutable_cf_options.prefix_extractor.get(),
+          ioptions.logger, column_family_id)),
+      range_del_table_(SkipListFactory().CreateMemTableRep(
+          comparator_, &arena_, nullptr /* transform */, ioptions.logger,
+          column_family_id)),
+      is_range_del_table_empty_(true),
+      data_size_(0),
+      num_entries_(0),
+      num_deletes_(0),
+      write_buffer_size_(mutable_cf_options.write_buffer_size),
+      flush_in_progress_(false),
+      flush_completed_(false),
+      file_number_(0),
+      first_seqno_(0),
+      earliest_seqno_(latest_seq),
+      creation_seq_(latest_seq),
+      mem_next_logfile_number_(0),
+      min_prep_log_referenced_(0),
+      locks_(moptions_.inplace_update_support
+                 ? moptions_.inplace_update_num_locks
+                 : 0),
+      prefix_extractor_(mutable_cf_options.prefix_extractor.get()),
+      flush_state_(FLUSH_NOT_REQUESTED),
+      clock_(ioptions.clock),
+      insert_with_hint_prefix_extractor_(
+          ioptions.memtable_insert_with_hint_prefix_extractor.get()),
+      oldest_key_time_(std::numeric_limits<uint64_t>::max()),
+      atomic_flush_seqno_(kMaxSequenceNumber),
+      approximate_memory_usage_(0) {
+  UpdateFlushState();
+  // something went wrong if we need to flush before inserting anything
+  assert(!ShouldScheduleFlush());
+
+  // use bloom_filter_ for both whole key and prefix bloom filter
+  if ((prefix_extractor_ || moptions_.memtable_whole_key_filtering) &&
+      moptions_.memtable_prefix_bloom_bits > 0) {
+    bloom_filter_.reset(
+        new DynamicBloom(&arena_, moptions_.memtable_prefix_bloom_bits,
+                         6 /* hard coded 6 probes */,
+                         moptions_.memtable_huge_page_size, ioptions.logger));
+  }
+  // Initialize cached_range_tombstone_ here since it could
+  // be read before it is constructed in MemTable::Add(), which could also lead
+  // to a data race on the global mutex table backing atomic shared_ptr.
+  auto new_cache = std::make_shared<FragmentedRangeTombstoneListCache>();
+  size_t size = cached_range_tombstone_.Size();
+  for (size_t i = 0; i < size; ++i) {
+    std::shared_ptr<FragmentedRangeTombstoneListCache>* local_cache_ref_ptr =
+        cached_range_tombstone_.AccessAtCore(i);
+    auto new_local_cache_ref = std::make_shared<
+        const std::shared_ptr<FragmentedRangeTombstoneListCache>>(new_cache);
+    std::atomic_store_explicit(
+        local_cache_ref_ptr,
+        std::shared_ptr<FragmentedRangeTombstoneListCache>(new_local_cache_ref,
+                                                           new_cache.get()),
+        std::memory_order_relaxed);
+  }
+}
+
+MemTable::~MemTable() {
+  mem_tracker_.FreeMem();
+  assert(refs_ == 0);
+}
+
+size_t MemTable::ApproximateMemoryUsage() {
+  autovector<size_t> usages = {
+      arena_.ApproximateMemoryUsage(), table_->ApproximateMemoryUsage(),
+      range_del_table_->ApproximateMemoryUsage(),
+      ROCKSDB_NAMESPACE::ApproximateMemoryUsage(insert_hints_)};
+  size_t total_usage = 0;
+  for (size_t usage : usages) {
+    // If usage + total_usage >= kMaxSizet, return kMaxSizet.
+    // the following variation is to avoid numeric overflow.
+    if (usage >= std::numeric_limits<size_t>::max() - total_usage) {
+      return std::numeric_limits<size_t>::max();
+    }
+    total_usage += usage;
+  }
+  approximate_memory_usage_.store(total_usage, std::memory_order_relaxed);
+  // otherwise, return the actual usage
+  return total_usage;
+}
+
+bool MemTable::ShouldFlushNow() {
+  size_t write_buffer_size = write_buffer_size_.load(std::memory_order_relaxed);
+  // In a lot of times, we cannot allocate arena blocks that exactly matches the
+  // buffer size. Thus we have to decide if we should over-allocate or
+  // under-allocate.
+  // This constant variable can be interpreted as: if we still have more than
+  // "kAllowOverAllocationRatio * kArenaBlockSize" space left, we'd try to over
+  // allocate one more block.
+  const double kAllowOverAllocationRatio = 0.6;
+
+  // If arena still have room for new block allocation, we can safely say it
+  // shouldn't flush.
+  auto allocated_memory = table_->ApproximateMemoryUsage() +
+                          range_del_table_->ApproximateMemoryUsage() +
+                          arena_.MemoryAllocatedBytes();
+
+  approximate_memory_usage_.store(allocated_memory, std::memory_order_relaxed);
+
+  // if we can still allocate one more block without exceeding the
+  // over-allocation ratio, then we should not flush.
+  if (allocated_memory + kArenaBlockSize <
+      write_buffer_size + kArenaBlockSize * kAllowOverAllocationRatio) {
+    return false;
+  }
+
+  // if user keeps adding entries that exceeds write_buffer_size, we need to
+  // flush earlier even though we still have much available memory left.
+  if (allocated_memory >
+      write_buffer_size + kArenaBlockSize * kAllowOverAllocationRatio) {
+    return true;
+  }
+
+  // In this code path, Arena has already allocated its "last block", which
+  // means the total allocatedmemory size is either:
+  //  (1) "moderately" over allocated the memory (no more than `0.6 * arena
+  // block size`. Or,
+  //  (2) the allocated memory is less than write buffer size, but we'll stop
+  // here since if we allocate a new arena block, we'll over allocate too much
+  // more (half of the arena block size) memory.
+  //
+  // In either case, to avoid over-allocate, the last block will stop allocation
+  // when its usage reaches a certain ratio, which we carefully choose "0.75
+  // full" as the stop condition because it addresses the following issue with
+  // great simplicity: What if the next inserted entry's size is
+  // bigger than AllocatedAndUnused()?
+  //
+  // The answer is: if the entry size is also bigger than 0.25 *
+  // kArenaBlockSize, a dedicated block will be allocated for it; otherwise
+  // arena will anyway skip the AllocatedAndUnused() and allocate a new, empty
+  // and regular block. In either case, we *overly* over-allocated.
+  //
+  // Therefore, setting the last block to be at most "0.75 full" avoids both
+  // cases.
+  //
+  // NOTE: the average percentage of waste space of this approach can be counted
+  // as: "arena block size * 0.25 / write buffer size". User who specify a small
+  // write buffer size and/or big arena block size may suffer.
+  return arena_.AllocatedAndUnused() < kArenaBlockSize / 4;
+}
+
+void MemTable::UpdateFlushState() {
+  auto state = flush_state_.load(std::memory_order_relaxed);
+  if (state == FLUSH_NOT_REQUESTED && ShouldFlushNow()) {
+    // ignore CAS failure, because that means somebody else requested
+    // a flush
+    flush_state_.compare_exchange_strong(state, FLUSH_REQUESTED,
+                                         std::memory_order_relaxed,
+                                         std::memory_order_relaxed);
+  }
+}
+
+void MemTable::UpdateOldestKeyTime() {
+  uint64_t oldest_key_time = oldest_key_time_.load(std::memory_order_relaxed);
+  if (oldest_key_time == std::numeric_limits<uint64_t>::max()) {
+    int64_t current_time = 0;
+    auto s = clock_->GetCurrentTime(&current_time);
+    if (s.ok()) {
+      assert(current_time >= 0);
+      // If fail, the timestamp is already set.
+      oldest_key_time_.compare_exchange_strong(
+          oldest_key_time, static_cast<uint64_t>(current_time),
+          std::memory_order_relaxed, std::memory_order_relaxed);
+    }
+  }
+}
+
+Status MemTable::VerifyEntryChecksum(const char* entry,
+                                     uint32_t protection_bytes_per_key,
+                                     bool allow_data_in_errors) {
+  if (protection_bytes_per_key == 0) {
+    return Status::OK();
+  }
+  uint32_t key_length;
+  const char* key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length);
+  if (key_ptr == nullptr) {
+    return Status::Corruption("Unable to parse internal key length");
+  }
+  if (key_length < 8) {
+    return Status::Corruption("Memtable entry internal key length too short.");
+  }
+  Slice user_key = Slice(key_ptr, key_length - 8);
+
+  const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
+  ValueType type;
+  SequenceNumber seq;
+  UnPackSequenceAndType(tag, &seq, &type);
+
+  uint32_t value_length = 0;
+  const char* value_ptr = GetVarint32Ptr(
+      key_ptr + key_length, key_ptr + key_length + 5, &value_length);
+  if (value_ptr == nullptr) {
+    return Status::Corruption("Unable to parse internal key value");
+  }
+  Slice value = Slice(value_ptr, value_length);
+
+  const char* checksum_ptr = value_ptr + value_length;
+  bool match =
+      ProtectionInfo64()
+          .ProtectKVO(user_key, value, type)
+          .ProtectS(seq)
+          .Verify(static_cast<uint8_t>(protection_bytes_per_key), checksum_ptr);
+  if (!match) {
+    std::string msg(
+        "Corrupted memtable entry, per key-value checksum verification "
+        "failed.");
+    if (allow_data_in_errors) {
+      msg.append("Unrecognized value type: " +
+                 std::to_string(static_cast<int>(type)) + ". ");
+      msg.append("User key: " + user_key.ToString(/*hex=*/true) + ". ");
+      msg.append("seq: " + std::to_string(seq) + ".");
+    }
+    return Status::Corruption(msg.c_str());
+  }
+  return Status::OK();
+}
+
+int MemTable::KeyComparator::operator()(const char* prefix_len_key1,
+                                        const char* prefix_len_key2) const {
+  // Internal keys are encoded as length-prefixed strings.
+  Slice k1 = GetLengthPrefixedSlice(prefix_len_key1);
+  Slice k2 = GetLengthPrefixedSlice(prefix_len_key2);
+  return comparator.CompareKeySeq(k1, k2);
+}
+
+int MemTable::KeyComparator::operator()(
+    const char* prefix_len_key, const KeyComparator::DecodedType& key) const {
+  // Internal keys are encoded as length-prefixed strings.
+  Slice a = GetLengthPrefixedSlice(prefix_len_key);
+  return comparator.CompareKeySeq(a, key);
+}
+
+void MemTableRep::InsertConcurrently(KeyHandle /*handle*/) {
+  throw std::runtime_error("concurrent insert not supported");
+}
+
+Slice MemTableRep::UserKey(const char* key) const {
+  Slice slice = GetLengthPrefixedSlice(key);
+  return Slice(slice.data(), slice.size() - 8);
+}
+
+KeyHandle MemTableRep::Allocate(const size_t len, char** buf) {
+  *buf = allocator_->Allocate(len);
+  return static_cast<KeyHandle>(*buf);
+}
+
+// Encode a suitable internal key target for "target" and return it.
+// Uses *scratch as scratch space, and the returned pointer will point
+// into this scratch space.
+const char* EncodeKey(std::string* scratch, const Slice& target) {
+  scratch->clear();
+  PutVarint32(scratch, static_cast<uint32_t>(target.size()));
+  scratch->append(target.data(), target.size());
+  return scratch->data();
+}
+
+class MemTableIterator : public InternalIterator {
+ public:
+  MemTableIterator(const MemTable& mem, const ReadOptions& read_options,
+                   Arena* arena, bool use_range_del_table = false)
+      : bloom_(nullptr),
+        prefix_extractor_(mem.prefix_extractor_),
+        comparator_(mem.comparator_),
+        valid_(false),
+        arena_mode_(arena != nullptr),
+        value_pinned_(
+            !mem.GetImmutableMemTableOptions()->inplace_update_support),
+        protection_bytes_per_key_(mem.moptions_.protection_bytes_per_key),
+        status_(Status::OK()),
+        logger_(mem.moptions_.info_log) {
+    if (use_range_del_table) {
+      iter_ = mem.range_del_table_->GetIterator(arena);
+    } else if (prefix_extractor_ != nullptr && !read_options.total_order_seek &&
+               !read_options.auto_prefix_mode) {
+      // Auto prefix mode is not implemented in memtable yet.
+      bloom_ = mem.bloom_filter_.get();
+      iter_ = mem.table_->GetDynamicPrefixIterator(arena);
+    } else {
+      iter_ = mem.table_->GetIterator(arena);
+    }
+    status_.PermitUncheckedError();
+  }
+  // No copying allowed
+  MemTableIterator(const MemTableIterator&) = delete;
+  void operator=(const MemTableIterator&) = delete;
+
+  ~MemTableIterator() override {
+#ifndef NDEBUG
+    // Assert that the MemTableIterator is never deleted while
+    // Pinning is Enabled.
+    assert(!pinned_iters_mgr_ || !pinned_iters_mgr_->PinningEnabled());
+#endif
+    if (arena_mode_) {
+      iter_->~Iterator();
+    } else {
+      delete iter_;
+    }
+  }
+
+#ifndef NDEBUG
+  void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
+    pinned_iters_mgr_ = pinned_iters_mgr;
+  }
+  PinnedIteratorsManager* pinned_iters_mgr_ = nullptr;
+#endif
+
+  bool Valid() const override { return valid_ && status_.ok(); }
+  void Seek(const Slice& k) override {
+    PERF_TIMER_GUARD(seek_on_memtable_time);
+    PERF_COUNTER_ADD(seek_on_memtable_count, 1);
+    if (bloom_) {
+      // iterator should only use prefix bloom filter
+      auto ts_sz = comparator_.comparator.user_comparator()->timestamp_size();
+      Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz));
+      if (prefix_extractor_->InDomain(user_k_without_ts)) {
+        if (!bloom_->MayContain(
+                prefix_extractor_->Transform(user_k_without_ts))) {
+          PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
+          valid_ = false;
+          return;
+        } else {
+          PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
+        }
+      }
+    }
+    iter_->Seek(k, nullptr);
+    valid_ = iter_->Valid();
+    VerifyEntryChecksum();
+  }
+  void SeekForPrev(const Slice& k) override {
+    PERF_TIMER_GUARD(seek_on_memtable_time);
+    PERF_COUNTER_ADD(seek_on_memtable_count, 1);
+    if (bloom_) {
+      auto ts_sz = comparator_.comparator.user_comparator()->timestamp_size();
+      Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz));
+      if (prefix_extractor_->InDomain(user_k_without_ts)) {
+        if (!bloom_->MayContain(
+                prefix_extractor_->Transform(user_k_without_ts))) {
+          PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
+          valid_ = false;
+          return;
+        } else {
+          PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
+        }
+      }
+    }
+    iter_->Seek(k, nullptr);
+    valid_ = iter_->Valid();
+    VerifyEntryChecksum();
+    if (!Valid() && status().ok()) {
+      SeekToLast();
+    }
+    while (Valid() && comparator_.comparator.Compare(k, key()) < 0) {
+      Prev();
+    }
+  }
+  void SeekToFirst() override {
+    iter_->SeekToFirst();
+    valid_ = iter_->Valid();
+    VerifyEntryChecksum();
+  }
+  void SeekToLast() override {
+    iter_->SeekToLast();
+    valid_ = iter_->Valid();
+    VerifyEntryChecksum();
+  }
+  void Next() override {
+    PERF_COUNTER_ADD(next_on_memtable_count, 1);
+    assert(Valid());
+    iter_->Next();
+    TEST_SYNC_POINT_CALLBACK("MemTableIterator::Next:0", iter_);
+    valid_ = iter_->Valid();
+    VerifyEntryChecksum();
+  }
+  bool NextAndGetResult(IterateResult* result) override {
+    Next();
+    bool is_valid = Valid();
+    if (is_valid) {
+      result->key = key();
+      result->bound_check_result = IterBoundCheck::kUnknown;
+      result->value_prepared = true;
+    }
+    return is_valid;
+  }
+  void Prev() override {
+    PERF_COUNTER_ADD(prev_on_memtable_count, 1);
+    assert(Valid());
+    iter_->Prev();
+    valid_ = iter_->Valid();
+    VerifyEntryChecksum();
+  }
+  Slice key() const override {
+    assert(Valid());
+    return GetLengthPrefixedSlice(iter_->key());
+  }
+  Slice value() const override {
+    assert(Valid());
+    Slice key_slice = GetLengthPrefixedSlice(iter_->key());
+    return GetLengthPrefixedSlice(key_slice.data() + key_slice.size());
+  }
+
+  Status status() const override { return status_; }
+
+  bool IsKeyPinned() const override {
+    // memtable data is always pinned
+    return true;
+  }
+
+  bool IsValuePinned() const override {
+    // memtable value is always pinned, except if we allow inplace update.
+    return value_pinned_;
+  }
+
+ private:
+  DynamicBloom* bloom_;
+  const SliceTransform* const prefix_extractor_;
+  const MemTable::KeyComparator comparator_;
+  MemTableRep::Iterator* iter_;
+  bool valid_;
+  bool arena_mode_;
+  bool value_pinned_;
+  uint32_t protection_bytes_per_key_;
+  Status status_;
+  Logger* logger_;
+
+  void VerifyEntryChecksum() {
+    if (protection_bytes_per_key_ > 0 && Valid()) {
+      status_ = MemTable::VerifyEntryChecksum(iter_->key(),
+                                              protection_bytes_per_key_);
+      if (!status_.ok()) {
+        ROCKS_LOG_ERROR(logger_, "In MemtableIterator: %s", status_.getState());
+      }
+    }
+  }
+};
+
+InternalIterator* MemTable::NewIterator(const ReadOptions& read_options,
+                                        Arena* arena) {
+  assert(arena != nullptr);
+  auto mem = arena->AllocateAligned(sizeof(MemTableIterator));
+  return new (mem) MemTableIterator(*this, read_options, arena);
+}
+
+FragmentedRangeTombstoneIterator* MemTable::NewRangeTombstoneIterator(
+    const ReadOptions& read_options, SequenceNumber read_seq,
+    bool immutable_memtable) {
+  if (read_options.ignore_range_deletions ||
+      is_range_del_table_empty_.load(std::memory_order_relaxed)) {
+    return nullptr;
+  }
+  return NewRangeTombstoneIteratorInternal(read_options, read_seq,
+                                           immutable_memtable);
+}
+
+FragmentedRangeTombstoneIterator* MemTable::NewRangeTombstoneIteratorInternal(
+    const ReadOptions& read_options, SequenceNumber read_seq,
+    bool immutable_memtable) {
+  if (immutable_memtable) {
+    // Note that caller should already have verified that
+    // !is_range_del_table_empty_
+    assert(IsFragmentedRangeTombstonesConstructed());
+    return new FragmentedRangeTombstoneIterator(
+        fragmented_range_tombstone_list_.get(), comparator_.comparator,
+        read_seq, read_options.timestamp);
+  }
+
+  // takes current cache
+  std::shared_ptr<FragmentedRangeTombstoneListCache> cache =
+      std::atomic_load_explicit(cached_range_tombstone_.Access(),
+                                std::memory_order_relaxed);
+  // construct fragmented tombstone list if necessary
+  if (!cache->initialized.load(std::memory_order_acquire)) {
+    cache->reader_mutex.lock();
+    if (!cache->tombstones) {
+      auto* unfragmented_iter =
+          new MemTableIterator(*this, read_options, nullptr /* arena */,
+                               true /* use_range_del_table */);
+      cache->tombstones.reset(new FragmentedRangeTombstoneList(
+          std::unique_ptr<InternalIterator>(unfragmented_iter),
+          comparator_.comparator));
+      cache->initialized.store(true, std::memory_order_release);
+    }
+    cache->reader_mutex.unlock();
+  }
+
+  auto* fragmented_iter = new FragmentedRangeTombstoneIterator(
+      cache, comparator_.comparator, read_seq, read_options.timestamp);
+  return fragmented_iter;
+}
+
+void MemTable::ConstructFragmentedRangeTombstones() {
+  assert(!IsFragmentedRangeTombstonesConstructed(false));
+  // There should be no concurrent Construction
+  if (!is_range_del_table_empty_.load(std::memory_order_relaxed)) {
+    // TODO: plumb Env::IOActivity
+    auto* unfragmented_iter =
+        new MemTableIterator(*this, ReadOptions(), nullptr /* arena */,
+                             true /* use_range_del_table */);
+
+    fragmented_range_tombstone_list_ =
+        std::make_unique<FragmentedRangeTombstoneList>(
+            std::unique_ptr<InternalIterator>(unfragmented_iter),
+            comparator_.comparator);
+  }
+}
+
+port::RWMutex* MemTable::GetLock(const Slice& key) {
+  return &locks_[GetSliceRangedNPHash(key, locks_.size())];
+}
+
+MemTable::MemTableStats MemTable::ApproximateStats(const Slice& start_ikey,
+                                                   const Slice& end_ikey) {
+  uint64_t entry_count = table_->ApproximateNumEntries(start_ikey, end_ikey);
+  entry_count += range_del_table_->ApproximateNumEntries(start_ikey, end_ikey);
+  if (entry_count == 0) {
+    return {0, 0};
+  }
+  uint64_t n = num_entries_.load(std::memory_order_relaxed);
+  if (n == 0) {
+    return {0, 0};
+  }
+  if (entry_count > n) {
+    // (range_del_)table_->ApproximateNumEntries() is just an estimate so it can
+    // be larger than actual entries we have. Cap it to entries we have to limit
+    // the inaccuracy.
+    entry_count = n;
+  }
+  uint64_t data_size = data_size_.load(std::memory_order_relaxed);
+  return {entry_count * (data_size / n), entry_count};
+}
+
+Status MemTable::VerifyEncodedEntry(Slice encoded,
+                                    const ProtectionInfoKVOS64& kv_prot_info) {
+  uint32_t ikey_len = 0;
+  if (!GetVarint32(&encoded, &ikey_len)) {
+    return Status::Corruption("Unable to parse internal key length");
+  }
+  size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size();
+  if (ikey_len < 8 + ts_sz) {
+    return Status::Corruption("Internal key length too short");
+  }
+  if (ikey_len > encoded.size()) {
+    return Status::Corruption("Internal key length too long");
+  }
+  uint32_t value_len = 0;
+  const size_t user_key_len = ikey_len - 8;
+  Slice key(encoded.data(), user_key_len);
+  encoded.remove_prefix(user_key_len);
+
+  uint64_t packed = DecodeFixed64(encoded.data());
+  ValueType value_type = kMaxValue;
+  SequenceNumber sequence_number = kMaxSequenceNumber;
+  UnPackSequenceAndType(packed, &sequence_number, &value_type);
+  encoded.remove_prefix(8);
+
+  if (!GetVarint32(&encoded, &value_len)) {
+    return Status::Corruption("Unable to parse value length");
+  }
+  if (value_len < encoded.size()) {
+    return Status::Corruption("Value length too short");
+  }
+  if (value_len > encoded.size()) {
+    return Status::Corruption("Value length too long");
+  }
+  Slice value(encoded.data(), value_len);
+
+  return kv_prot_info.StripS(sequence_number)
+      .StripKVO(key, value, value_type)
+      .GetStatus();
+}
+
+void MemTable::UpdateEntryChecksum(const ProtectionInfoKVOS64* kv_prot_info,
+                                   const Slice& key, const Slice& value,
+                                   ValueType type, SequenceNumber s,
+                                   char* checksum_ptr) {
+  if (moptions_.protection_bytes_per_key == 0) {
+    return;
+  }
+
+  if (kv_prot_info == nullptr) {
+    ProtectionInfo64()
+        .ProtectKVO(key, value, type)
+        .ProtectS(s)
+        .Encode(static_cast<uint8_t>(moptions_.protection_bytes_per_key),
+                checksum_ptr);
+  } else {
+    kv_prot_info->Encode(
+        static_cast<uint8_t>(moptions_.protection_bytes_per_key), checksum_ptr);
+  }
+}
+
+Status MemTable::Add(SequenceNumber s, ValueType type,
+                     const Slice& key, /* user key */
+                     const Slice& value,
+                     const ProtectionInfoKVOS64* kv_prot_info,
+                     bool allow_concurrent,
+                     MemTablePostProcessInfo* post_process_info, void** hint) {
+  // Format of an entry is concatenation of:
+  //  key_size     : varint32 of internal_key.size()
+  //  key bytes    : char[internal_key.size()]
+  //  value_size   : varint32 of value.size()
+  //  value bytes  : char[value.size()]
+  //  checksum     : char[moptions_.protection_bytes_per_key]
+  uint32_t key_size = static_cast<uint32_t>(key.size());
+  uint32_t val_size = static_cast<uint32_t>(value.size());
+  uint32_t internal_key_size = key_size + 8;
+  const uint32_t encoded_len = VarintLength(internal_key_size) +
+                               internal_key_size + VarintLength(val_size) +
+                               val_size + moptions_.protection_bytes_per_key;
+  char* buf = nullptr;
+  std::unique_ptr<MemTableRep>& table =
+      type == kTypeRangeDeletion ? range_del_table_ : table_;
+  KeyHandle handle = table->Allocate(encoded_len, &buf);
+
+  char* p = EncodeVarint32(buf, internal_key_size);
+  memcpy(p, key.data(), key_size);
+  Slice key_slice(p, key_size);
+  p += key_size;
+  uint64_t packed = PackSequenceAndType(s, type);
+  EncodeFixed64(p, packed);
+  p += 8;
+  p = EncodeVarint32(p, val_size);
+  memcpy(p, value.data(), val_size);
+  assert((unsigned)(p + val_size - buf + moptions_.protection_bytes_per_key) ==
+         (unsigned)encoded_len);
+
+  UpdateEntryChecksum(kv_prot_info, key, value, type, s,
+                      buf + encoded_len - moptions_.protection_bytes_per_key);
+  Slice encoded(buf, encoded_len - moptions_.protection_bytes_per_key);
+  if (kv_prot_info != nullptr) {
+    TEST_SYNC_POINT_CALLBACK("MemTable::Add:Encoded", &encoded);
+    Status status = VerifyEncodedEntry(encoded, *kv_prot_info);
+    if (!status.ok()) {
+      return status;
+    }
+  }
+
+  size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size();
+  Slice key_without_ts = StripTimestampFromUserKey(key, ts_sz);
+
+  if (!allow_concurrent) {
+    // Extract prefix for insert with hint.
+    if (insert_with_hint_prefix_extractor_ != nullptr &&
+        insert_with_hint_prefix_extractor_->InDomain(key_slice)) {
+      Slice prefix = insert_with_hint_prefix_extractor_->Transform(key_slice);
+      bool res = table->InsertKeyWithHint(handle, &insert_hints_[prefix]);
+      if (UNLIKELY(!res)) {
+        return Status::TryAgain("key+seq exists");
+      }
+    } else {
+      bool res = table->InsertKey(handle);
+      if (UNLIKELY(!res)) {
+        return Status::TryAgain("key+seq exists");
+      }
+    }
+
+    // this is a bit ugly, but is the way to avoid locked instructions
+    // when incrementing an atomic
+    num_entries_.store(num_entries_.load(std::memory_order_relaxed) + 1,
+                       std::memory_order_relaxed);
+    data_size_.store(data_size_.load(std::memory_order_relaxed) + encoded_len,
+                     std::memory_order_relaxed);
+    if (type == kTypeDeletion || type == kTypeSingleDeletion ||
+        type == kTypeDeletionWithTimestamp) {
+      num_deletes_.store(num_deletes_.load(std::memory_order_relaxed) + 1,
+                         std::memory_order_relaxed);
+    }
+
+    if (bloom_filter_ && prefix_extractor_ &&
+        prefix_extractor_->InDomain(key_without_ts)) {
+      bloom_filter_->Add(prefix_extractor_->Transform(key_without_ts));
+    }
+    if (bloom_filter_ && moptions_.memtable_whole_key_filtering) {
+      bloom_filter_->Add(key_without_ts);
+    }
+
+    // The first sequence number inserted into the memtable
+    assert(first_seqno_ == 0 || s >= first_seqno_);
+    if (first_seqno_ == 0) {
+      first_seqno_.store(s, std::memory_order_relaxed);
+
+      if (earliest_seqno_ == kMaxSequenceNumber) {
+        earliest_seqno_.store(GetFirstSequenceNumber(),
+                              std::memory_order_relaxed);
+      }
+      assert(first_seqno_.load() >= earliest_seqno_.load());
+    }
+    assert(post_process_info == nullptr);
+    UpdateFlushState();
+  } else {
+    bool res = (hint == nullptr)
+                   ? table->InsertKeyConcurrently(handle)
+                   : table->InsertKeyWithHintConcurrently(handle, hint);
+    if (UNLIKELY(!res)) {
+      return Status::TryAgain("key+seq exists");
+    }
+
+    assert(post_process_info != nullptr);
+    post_process_info->num_entries++;
+    post_process_info->data_size += encoded_len;
+    if (type == kTypeDeletion) {
+      post_process_info->num_deletes++;
+    }
+
+    if (bloom_filter_ && prefix_extractor_ &&
+        prefix_extractor_->InDomain(key_without_ts)) {
+      bloom_filter_->AddConcurrently(
+          prefix_extractor_->Transform(key_without_ts));
+    }
+    if (bloom_filter_ && moptions_.memtable_whole_key_filtering) {
+      bloom_filter_->AddConcurrently(key_without_ts);
+    }
+
+    // atomically update first_seqno_ and earliest_seqno_.
+    uint64_t cur_seq_num = first_seqno_.load(std::memory_order_relaxed);
+    while ((cur_seq_num == 0 || s < cur_seq_num) &&
+           !first_seqno_.compare_exchange_weak(cur_seq_num, s)) {
+    }
+    uint64_t cur_earliest_seqno =
+        earliest_seqno_.load(std::memory_order_relaxed);
+    while (
+        (cur_earliest_seqno == kMaxSequenceNumber || s < cur_earliest_seqno) &&
+        !first_seqno_.compare_exchange_weak(cur_earliest_seqno, s)) {
+    }
+  }
+  if (type == kTypeRangeDeletion) {
+    auto new_cache = std::make_shared<FragmentedRangeTombstoneListCache>();
+    size_t size = cached_range_tombstone_.Size();
+    if (allow_concurrent) {
+      range_del_mutex_.lock();
+    }
+    for (size_t i = 0; i < size; ++i) {
+      std::shared_ptr<FragmentedRangeTombstoneListCache>* local_cache_ref_ptr =
+          cached_range_tombstone_.AccessAtCore(i);
+      auto new_local_cache_ref = std::make_shared<
+          const std::shared_ptr<FragmentedRangeTombstoneListCache>>(new_cache);
+      // It is okay for some reader to load old cache during invalidation as
+      // the new sequence number is not published yet.
+      // Each core will have a shared_ptr to a shared_ptr to the cached
+      // fragmented range tombstones, so that ref count is maintianed locally
+      // per-core using the per-core shared_ptr.
+      std::atomic_store_explicit(
+          local_cache_ref_ptr,
+          std::shared_ptr<FragmentedRangeTombstoneListCache>(
+              new_local_cache_ref, new_cache.get()),
+          std::memory_order_relaxed);
+    }
+    if (allow_concurrent) {
+      range_del_mutex_.unlock();
+    }
+    is_range_del_table_empty_.store(false, std::memory_order_relaxed);
+  }
+  UpdateOldestKeyTime();
+
+  TEST_SYNC_POINT_CALLBACK("MemTable::Add:BeforeReturn:Encoded", &encoded);
+  return Status::OK();
+}
+
+// Callback from MemTable::Get()
+namespace {
+
+struct Saver {
+  Status* status;
+  const LookupKey* key;
+  bool* found_final_value;  // Is value set correctly? Used by KeyMayExist
+  bool* merge_in_progress;
+  std::string* value;
+  PinnableWideColumns* columns;
+  SequenceNumber seq;
+  std::string* timestamp;
+  const MergeOperator* merge_operator;
+  // the merge operations encountered;
+  MergeContext* merge_context;
+  SequenceNumber max_covering_tombstone_seq;
+  MemTable* mem;
+  Logger* logger;
+  Statistics* statistics;
+  bool inplace_update_support;
+  bool do_merge;
+  SystemClock* clock;
+
+  ReadCallback* callback_;
+  bool* is_blob_index;
+  bool allow_data_in_errors;
+  uint32_t protection_bytes_per_key;
+  bool CheckCallback(SequenceNumber _seq) {
+    if (callback_) {
+      return callback_->IsVisible(_seq);
+    }
+    return true;
+  }
+};
+}  // anonymous namespace
+
+static bool SaveValue(void* arg, const char* entry) {
+  TEST_SYNC_POINT_CALLBACK("Memtable::SaveValue:Begin:entry", &entry);
+  Saver* s = reinterpret_cast<Saver*>(arg);
+  assert(s != nullptr);
+  assert(!s->value || !s->columns);
+
+  if (s->protection_bytes_per_key > 0) {
+    *(s->status) = MemTable::VerifyEntryChecksum(
+        entry, s->protection_bytes_per_key, s->allow_data_in_errors);
+    if (!s->status->ok()) {
+      ROCKS_LOG_ERROR(s->logger, "In SaveValue: %s", s->status->getState());
+      // Memtable entry corrupted
+      return false;
+    }
+  }
+
+  MergeContext* merge_context = s->merge_context;
+  SequenceNumber max_covering_tombstone_seq = s->max_covering_tombstone_seq;
+  const MergeOperator* merge_operator = s->merge_operator;
+
+  assert(merge_context != nullptr);
+
+  // Refer to comments under MemTable::Add() for entry format.
+  // Check that it belongs to same user key.
+  uint32_t key_length = 0;
+  const char* key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length);
+  assert(key_length >= 8);
+  Slice user_key_slice = Slice(key_ptr, key_length - 8);
+  const Comparator* user_comparator =
+      s->mem->GetInternalKeyComparator().user_comparator();
+  size_t ts_sz = user_comparator->timestamp_size();
+  if (ts_sz && s->timestamp && max_covering_tombstone_seq > 0) {
+    // timestamp should already be set to range tombstone timestamp
+    assert(s->timestamp->size() == ts_sz);
+  }
+  if (user_comparator->EqualWithoutTimestamp(user_key_slice,
+                                             s->key->user_key())) {
+    // Correct user key
+    const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
+    ValueType type;
+    SequenceNumber seq;
+    UnPackSequenceAndType(tag, &seq, &type);
+    // If the value is not in the snapshot, skip it
+    if (!s->CheckCallback(seq)) {
+      return true;  // to continue to the next seq
+    }
+
+    if (s->seq == kMaxSequenceNumber) {
+      s->seq = seq;
+      if (s->seq > max_covering_tombstone_seq) {
+        if (ts_sz && s->timestamp != nullptr) {
+          // `timestamp` was set to range tombstone's timestamp before
+          // `SaveValue` is ever called. This key has a higher sequence number
+          // than range tombstone, and is the key with the highest seqno across
+          // all keys with this user_key, so we update timestamp here.
+          Slice ts = ExtractTimestampFromUserKey(user_key_slice, ts_sz);
+          s->timestamp->assign(ts.data(), ts_sz);
+        }
+      } else {
+        s->seq = max_covering_tombstone_seq;
+      }
+    }
+
+    if (ts_sz > 0 && s->timestamp != nullptr) {
+      if (!s->timestamp->empty()) {
+        assert(ts_sz == s->timestamp->size());
+      }
+      // TODO optimize for smaller size ts
+      const std::string kMaxTs(ts_sz, '\xff');
+      if (s->timestamp->empty() ||
+          user_comparator->CompareTimestamp(*(s->timestamp), kMaxTs) == 0) {
+        Slice ts = ExtractTimestampFromUserKey(user_key_slice, ts_sz);
+        s->timestamp->assign(ts.data(), ts_sz);
+      }
+    }
+
+    if ((type == kTypeValue || type == kTypeMerge || type == kTypeBlobIndex ||
+         type == kTypeWideColumnEntity || type == kTypeDeletion ||
+         type == kTypeSingleDeletion || type == kTypeDeletionWithTimestamp) &&
+        max_covering_tombstone_seq > seq) {
+      type = kTypeRangeDeletion;
+    }
+    switch (type) {
+      case kTypeBlobIndex: {
+        if (!s->do_merge) {
+          *(s->status) = Status::NotSupported(
+              "GetMergeOperands not supported by stacked BlobDB");
+          *(s->found_final_value) = true;
+          return false;
+        }
+
+        if (*(s->merge_in_progress)) {
+          *(s->status) = Status::NotSupported(
+              "Merge operator not supported by stacked BlobDB");
+          *(s->found_final_value) = true;
+          return false;
+        }
+
+        if (s->is_blob_index == nullptr) {
+          ROCKS_LOG_ERROR(s->logger, "Encountered unexpected blob index.");
+          *(s->status) = Status::NotSupported(
+              "Encountered unexpected blob index. Please open DB with "
+              "ROCKSDB_NAMESPACE::blob_db::BlobDB.");
+          *(s->found_final_value) = true;
+          return false;
+        }
+
+        if (s->inplace_update_support) {
+          s->mem->GetLock(s->key->user_key())->ReadLock();
+        }
+
+        Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
+
+        *(s->status) = Status::OK();
+
+        if (s->value) {
+          s->value->assign(v.data(), v.size());
+        } else if (s->columns) {
+          s->columns->SetPlainValue(v);
+        }
+
+        if (s->inplace_update_support) {
+          s->mem->GetLock(s->key->user_key())->ReadUnlock();
+        }
+
+        *(s->found_final_value) = true;
+        *(s->is_blob_index) = true;
+
+        return false;
+      }
+      case kTypeValue: {
+        if (s->inplace_update_support) {
+          s->mem->GetLock(s->key->user_key())->ReadLock();
+        }
+
+        Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
+
+        *(s->status) = Status::OK();
+
+        if (!s->do_merge) {
+          // Preserve the value with the goal of returning it as part of
+          // raw merge operands to the user
+          // TODO(yanqin) update MergeContext so that timestamps information
+          // can also be retained.
+
+          merge_context->PushOperand(
+              v, s->inplace_update_support == false /* operand_pinned */);
+        } else if (*(s->merge_in_progress)) {
+          assert(s->do_merge);
+
+          if (s->value || s->columns) {
+            std::string result;
+            // `op_failure_scope` (an output parameter) is not provided (set to
+            // nullptr) since a failure must be propagated regardless of its
+            // value.
+            *(s->status) = MergeHelper::TimedFullMerge(
+                merge_operator, s->key->user_key(), &v,
+                merge_context->GetOperands(), &result, s->logger, s->statistics,
+                s->clock, /* result_operand */ nullptr,
+                /* update_num_ops_stats */ true,
+                /* op_failure_scope */ nullptr);
+
+            if (s->status->ok()) {
+              if (s->value) {
+                *(s->value) = std::move(result);
+              } else {
+                assert(s->columns);
+                s->columns->SetPlainValue(std::move(result));
+              }
+            }
+          }
+        } else if (s->value) {
+          s->value->assign(v.data(), v.size());
+        } else if (s->columns) {
+          s->columns->SetPlainValue(v);
+        }
+
+        if (s->inplace_update_support) {
+          s->mem->GetLock(s->key->user_key())->ReadUnlock();
+        }
+
+        *(s->found_final_value) = true;
+
+        if (s->is_blob_index != nullptr) {
+          *(s->is_blob_index) = false;
+        }
+
+        return false;
+      }
+      case kTypeWideColumnEntity: {
+        if (s->inplace_update_support) {
+          s->mem->GetLock(s->key->user_key())->ReadLock();
+        }
+
+        Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
+
+        *(s->status) = Status::OK();
+
+        if (!s->do_merge) {
+          // Preserve the value with the goal of returning it as part of
+          // raw merge operands to the user
+
+          Slice value_of_default;
+          *(s->status) = WideColumnSerialization::GetValueOfDefaultColumn(
+              v, value_of_default);
+
+          if (s->status->ok()) {
+            merge_context->PushOperand(
+                value_of_default,
+                s->inplace_update_support == false /* operand_pinned */);
+          }
+        } else if (*(s->merge_in_progress)) {
+          assert(s->do_merge);
+
+          if (s->value) {
+            Slice value_of_default;
+            *(s->status) = WideColumnSerialization::GetValueOfDefaultColumn(
+                v, value_of_default);
+            if (s->status->ok()) {
+              // `op_failure_scope` (an output parameter) is not provided (set
+              // to nullptr) since a failure must be propagated regardless of
+              // its value.
+              *(s->status) = MergeHelper::TimedFullMerge(
+                  merge_operator, s->key->user_key(), &value_of_default,
+                  merge_context->GetOperands(), s->value, s->logger,
+                  s->statistics, s->clock, /* result_operand */ nullptr,
+                  /* update_num_ops_stats */ true,
+                  /* op_failure_scope */ nullptr);
+            }
+          } else if (s->columns) {
+            std::string result;
+            // `op_failure_scope` (an output parameter) is not provided (set to
+            // nullptr) since a failure must be propagated regardless of its
+            // value.
+            *(s->status) = MergeHelper::TimedFullMergeWithEntity(
+                merge_operator, s->key->user_key(), v,
+                merge_context->GetOperands(), &result, s->logger, s->statistics,
+                s->clock, /* update_num_ops_stats */ true,
+                /* op_failure_scope */ nullptr);
+
+            if (s->status->ok()) {
+              *(s->status) = s->columns->SetWideColumnValue(std::move(result));
+            }
+          }
+        } else if (s->value) {
+          Slice value_of_default;
+          *(s->status) = WideColumnSerialization::GetValueOfDefaultColumn(
+              v, value_of_default);
+          if (s->status->ok()) {
+            s->value->assign(value_of_default.data(), value_of_default.size());
+          }
+        } else if (s->columns) {
+          *(s->status) = s->columns->SetWideColumnValue(v);
+        }
+
+        if (s->inplace_update_support) {
+          s->mem->GetLock(s->key->user_key())->ReadUnlock();
+        }
+
+        *(s->found_final_value) = true;
+
+        if (s->is_blob_index != nullptr) {
+          *(s->is_blob_index) = false;
+        }
+
+        return false;
+      }
+      case kTypeDeletion:
+      case kTypeDeletionWithTimestamp:
+      case kTypeSingleDeletion:
+      case kTypeRangeDeletion: {
+        if (*(s->merge_in_progress)) {
+          if (s->value || s->columns) {
+            std::string result;
+            // `op_failure_scope` (an output parameter) is not provided (set to
+            // nullptr) since a failure must be propagated regardless of its
+            // value.
+            *(s->status) = MergeHelper::TimedFullMerge(
+                merge_operator, s->key->user_key(), nullptr,
+                merge_context->GetOperands(), &result, s->logger, s->statistics,
+                s->clock, /* result_operand */ nullptr,
+                /* update_num_ops_stats */ true,
+                /* op_failure_scope */ nullptr);
+
+            if (s->status->ok()) {
+              if (s->value) {
+                *(s->value) = std::move(result);
+              } else {
+                assert(s->columns);
+                s->columns->SetPlainValue(std::move(result));
+              }
+            }
+          } else {
+            // We have found a final value (a base deletion) and have newer
+            // merge operands that we do not intend to merge. Nothing remains
+            // to be done so assign status to OK.
+            *(s->status) = Status::OK();
+          }
+        } else {
+          *(s->status) = Status::NotFound();
+        }
+        *(s->found_final_value) = true;
+        return false;
+      }
+      case kTypeMerge: {
+        if (!merge_operator) {
+          *(s->status) = Status::InvalidArgument(
+              "merge_operator is not properly initialized.");
+          // Normally we continue the loop (return true) when we see a merge
+          // operand.  But in case of an error, we should stop the loop
+          // immediately and pretend we have found the value to stop further
+          // seek.  Otherwise, the later call will override this error status.
+          *(s->found_final_value) = true;
+          return false;
+        }
+        Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
+        *(s->merge_in_progress) = true;
+        merge_context->PushOperand(
+            v, s->inplace_update_support == false /* operand_pinned */);
+        PERF_COUNTER_ADD(internal_merge_point_lookup_count, 1);
+
+        if (s->do_merge && merge_operator->ShouldMerge(
+                               merge_context->GetOperandsDirectionBackward())) {
+          if (s->value || s->columns) {
+            std::string result;
+            // `op_failure_scope` (an output parameter) is not provided (set to
+            // nullptr) since a failure must be propagated regardless of its
+            // value.
+            *(s->status) = MergeHelper::TimedFullMerge(
+                merge_operator, s->key->user_key(), nullptr,
+                merge_context->GetOperands(), &result, s->logger, s->statistics,
+                s->clock, /* result_operand */ nullptr,
+                /* update_num_ops_stats */ true,
+                /* op_failure_scope */ nullptr);
+
+            if (s->status->ok()) {
+              if (s->value) {
+                *(s->value) = std::move(result);
+              } else {
+                assert(s->columns);
+                s->columns->SetPlainValue(std::move(result));
+              }
+            }
+          }
+
+          *(s->found_final_value) = true;
+          return false;
+        }
+        return true;
+      }
+      default: {
+        std::string msg("Corrupted value not expected.");
+        if (s->allow_data_in_errors) {
+          msg.append("Unrecognized value type: " +
+                     std::to_string(static_cast<int>(type)) + ". ");
+          msg.append("User key: " + user_key_slice.ToString(/*hex=*/true) +
+                     ". ");
+          msg.append("seq: " + std::to_string(seq) + ".");
+        }
+        *(s->status) = Status::Corruption(msg.c_str());
+        return false;
+      }
+    }
+  }
+
+  // s->state could be Corrupt, merge or notfound
+  return false;
+}
+
+bool MemTable::Get(const LookupKey& key, std::string* value,
+                   PinnableWideColumns* columns, std::string* timestamp,
+                   Status* s, MergeContext* merge_context,
+                   SequenceNumber* max_covering_tombstone_seq,
+                   SequenceNumber* seq, const ReadOptions& read_opts,
+                   bool immutable_memtable, ReadCallback* callback,
+                   bool* is_blob_index, bool do_merge) {
+  // The sequence number is updated synchronously in version_set.h
+  if (IsEmpty()) {
+    // Avoiding recording stats for speed.
+    return false;
+  }
+  PERF_TIMER_GUARD(get_from_memtable_time);
+
+  std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
+      NewRangeTombstoneIterator(read_opts,
+                                GetInternalKeySeqno(key.internal_key()),
+                                immutable_memtable));
+  if (range_del_iter != nullptr) {
+    SequenceNumber covering_seq =
+        range_del_iter->MaxCoveringTombstoneSeqnum(key.user_key());
+    if (covering_seq > *max_covering_tombstone_seq) {
+      *max_covering_tombstone_seq = covering_seq;
+      if (timestamp) {
+        // Will be overwritten in SaveValue() if there is a point key with
+        // a higher seqno.
+        timestamp->assign(range_del_iter->timestamp().data(),
+                          range_del_iter->timestamp().size());
+      }
+    }
+  }
+
+  bool found_final_value = false;
+  bool merge_in_progress = s->IsMergeInProgress();
+  bool may_contain = true;
+  size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size();
+  Slice user_key_without_ts = StripTimestampFromUserKey(key.user_key(), ts_sz);
+  bool bloom_checked = false;
+  if (bloom_filter_) {
+    // when both memtable_whole_key_filtering and prefix_extractor_ are set,
+    // only do whole key filtering for Get() to save CPU
+    if (moptions_.memtable_whole_key_filtering) {
+      may_contain = bloom_filter_->MayContain(user_key_without_ts);
+      bloom_checked = true;
+    } else {
+      assert(prefix_extractor_);
+      if (prefix_extractor_->InDomain(user_key_without_ts)) {
+        may_contain = bloom_filter_->MayContain(
+            prefix_extractor_->Transform(user_key_without_ts));
+        bloom_checked = true;
+      }
+    }
+  }
+
+  if (bloom_filter_ && !may_contain) {
+    // iter is null if prefix bloom says the key does not exist
+    PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
+    *seq = kMaxSequenceNumber;
+  } else {
+    if (bloom_checked) {
+      PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
+    }
+    GetFromTable(key, *max_covering_tombstone_seq, do_merge, callback,
+                 is_blob_index, value, columns, timestamp, s, merge_context,
+                 seq, &found_final_value, &merge_in_progress);
+  }
+
+  // No change to value, since we have not yet found a Put/Delete
+  // Propagate corruption error
+  if (!found_final_value && merge_in_progress && !s->IsCorruption()) {
+    *s = Status::MergeInProgress();
+  }
+  PERF_COUNTER_ADD(get_from_memtable_count, 1);
+  return found_final_value;
+}
+
+void MemTable::GetFromTable(const LookupKey& key,
+                            SequenceNumber max_covering_tombstone_seq,
+                            bool do_merge, ReadCallback* callback,
+                            bool* is_blob_index, std::string* value,
+                            PinnableWideColumns* columns,
+                            std::string* timestamp, Status* s,
+                            MergeContext* merge_context, SequenceNumber* seq,
+                            bool* found_final_value, bool* merge_in_progress) {
+  Saver saver;
+  saver.status = s;
+  saver.found_final_value = found_final_value;
+  saver.merge_in_progress = merge_in_progress;
+  saver.key = &key;
+  saver.value = value;
+  saver.columns = columns;
+  saver.timestamp = timestamp;
+  saver.seq = kMaxSequenceNumber;
+  saver.mem = this;
+  saver.merge_context = merge_context;
+  saver.max_covering_tombstone_seq = max_covering_tombstone_seq;
+  saver.merge_operator = moptions_.merge_operator;
+  saver.logger = moptions_.info_log;
+  saver.inplace_update_support = moptions_.inplace_update_support;
+  saver.statistics = moptions_.statistics;
+  saver.clock = clock_;
+  saver.callback_ = callback;
+  saver.is_blob_index = is_blob_index;
+  saver.do_merge = do_merge;
+  saver.allow_data_in_errors = moptions_.allow_data_in_errors;
+  saver.protection_bytes_per_key = moptions_.protection_bytes_per_key;
+  table_->Get(key, &saver, SaveValue);
+  *seq = saver.seq;
+}
+
+void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
+                        ReadCallback* callback, bool immutable_memtable) {
+  // The sequence number is updated synchronously in version_set.h
+  if (IsEmpty()) {
+    // Avoiding recording stats for speed.
+    return;
+  }
+  PERF_TIMER_GUARD(get_from_memtable_time);
+
+  // For now, memtable Bloom filter is effectively disabled if there are any
+  // range tombstones. This is the simplest way to ensure range tombstones are
+  // handled. TODO: allow Bloom checks where max_covering_tombstone_seq==0
+  bool no_range_del = read_options.ignore_range_deletions ||
+                      is_range_del_table_empty_.load(std::memory_order_relaxed);
+  MultiGetRange temp_range(*range, range->begin(), range->end());
+  if (bloom_filter_ && no_range_del) {
+    bool whole_key =
+        !prefix_extractor_ || moptions_.memtable_whole_key_filtering;
+    std::array<Slice, MultiGetContext::MAX_BATCH_SIZE> bloom_keys;
+    std::array<bool, MultiGetContext::MAX_BATCH_SIZE> may_match;
+    std::array<size_t, MultiGetContext::MAX_BATCH_SIZE> range_indexes;
+    int num_keys = 0;
+    for (auto iter = temp_range.begin(); iter != temp_range.end(); ++iter) {
+      if (whole_key) {
+        bloom_keys[num_keys] = iter->ukey_without_ts;
+        range_indexes[num_keys++] = iter.index();
+      } else if (prefix_extractor_->InDomain(iter->ukey_without_ts)) {
+        bloom_keys[num_keys] =
+            prefix_extractor_->Transform(iter->ukey_without_ts);
+        range_indexes[num_keys++] = iter.index();
+      }
+    }
+    bloom_filter_->MayContain(num_keys, &bloom_keys[0], &may_match[0]);
+    for (int i = 0; i < num_keys; ++i) {
+      if (!may_match[i]) {
+        temp_range.SkipIndex(range_indexes[i]);
+        PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
+      } else {
+        PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
+      }
+    }
+  }
+  for (auto iter = temp_range.begin(); iter != temp_range.end(); ++iter) {
+    bool found_final_value{false};
+    bool merge_in_progress = iter->s->IsMergeInProgress();
+    if (!no_range_del) {
+      std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
+          NewRangeTombstoneIteratorInternal(
+              read_options, GetInternalKeySeqno(iter->lkey->internal_key()),
+              immutable_memtable));
+      SequenceNumber covering_seq =
+          range_del_iter->MaxCoveringTombstoneSeqnum(iter->lkey->user_key());
+      if (covering_seq > iter->max_covering_tombstone_seq) {
+        iter->max_covering_tombstone_seq = covering_seq;
+        if (iter->timestamp) {
+          // Will be overwritten in SaveValue() if there is a point key with
+          // a higher seqno.
+          iter->timestamp->assign(range_del_iter->timestamp().data(),
+                                  range_del_iter->timestamp().size());
+        }
+      }
+    }
+    SequenceNumber dummy_seq;
+    GetFromTable(*(iter->lkey), iter->max_covering_tombstone_seq, true,
+                 callback, &iter->is_blob_index,
+                 iter->value ? iter->value->GetSelf() : nullptr, iter->columns,
+                 iter->timestamp, iter->s, &(iter->merge_context), &dummy_seq,
+                 &found_final_value, &merge_in_progress);
+
+    if (!found_final_value && merge_in_progress) {
+      *(iter->s) = Status::MergeInProgress();
+    }
+
+    if (found_final_value) {
+      if (iter->value) {
+        iter->value->PinSelf();
+        range->AddValueSize(iter->value->size());
+      } else {
+        assert(iter->columns);
+        range->AddValueSize(iter->columns->serialized_size());
+      }
+
+      range->MarkKeyDone(iter);
+      RecordTick(moptions_.statistics, MEMTABLE_HIT);
+      if (range->GetValueSize() > read_options.value_size_soft_limit) {
+        // Set all remaining keys in range to Abort
+        for (auto range_iter = range->begin(); range_iter != range->end();
+             ++range_iter) {
+          range->MarkKeyDone(range_iter);
+          *(range_iter->s) = Status::Aborted();
+        }
+        break;
+      }
+    }
+  }
+  PERF_COUNTER_ADD(get_from_memtable_count, 1);
+}
+
+Status MemTable::Update(SequenceNumber seq, ValueType value_type,
+                        const Slice& key, const Slice& value,
+                        const ProtectionInfoKVOS64* kv_prot_info) {
+  LookupKey lkey(key, seq);
+  Slice mem_key = lkey.memtable_key();
+
+  std::unique_ptr<MemTableRep::Iterator> iter(
+      table_->GetDynamicPrefixIterator());
+  iter->Seek(lkey.internal_key(), mem_key.data());
+
+  if (iter->Valid()) {
+    // Refer to comments under MemTable::Add() for entry format.
+    // Check that it belongs to same user key.  We do not check the
+    // sequence number since the Seek() call above should have skipped
+    // all entries with overly large sequence numbers.
+    const char* entry = iter->key();
+    uint32_t key_length = 0;
+    const char* key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length);
+    if (comparator_.comparator.user_comparator()->Equal(
+            Slice(key_ptr, key_length - 8), lkey.user_key())) {
+      // Correct user key
+      const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
+      ValueType type;
+      SequenceNumber existing_seq;
+      UnPackSequenceAndType(tag, &existing_seq, &type);
+      assert(existing_seq != seq);
+      if (type == value_type) {
+        Slice prev_value = GetLengthPrefixedSlice(key_ptr + key_length);
+        uint32_t prev_size = static_cast<uint32_t>(prev_value.size());
+        uint32_t new_size = static_cast<uint32_t>(value.size());
+
+        // Update value, if new value size  <= previous value size
+        if (new_size <= prev_size) {
+          char* p =
+              EncodeVarint32(const_cast<char*>(key_ptr) + key_length, new_size);
+          WriteLock wl(GetLock(lkey.user_key()));
+          memcpy(p, value.data(), value.size());
+          assert((unsigned)((p + value.size()) - entry) ==
+                 (unsigned)(VarintLength(key_length) + key_length +
+                            VarintLength(value.size()) + value.size()));
+          RecordTick(moptions_.statistics, NUMBER_KEYS_UPDATED);
+          if (kv_prot_info != nullptr) {
+            ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
+            // `seq` is swallowed and `existing_seq` prevails.
+            updated_kv_prot_info.UpdateS(seq, existing_seq);
+            UpdateEntryChecksum(&updated_kv_prot_info, key, value, type,
+                                existing_seq, p + value.size());
+            Slice encoded(entry, p + value.size() - entry);
+            return VerifyEncodedEntry(encoded, updated_kv_prot_info);
+          } else {
+            UpdateEntryChecksum(nullptr, key, value, type, existing_seq,
+                                p + value.size());
+          }
+          return Status::OK();
+        }
+      }
+    }
+  }
+
+  // The latest value is not value_type or key doesn't exist
+  return Add(seq, value_type, key, value, kv_prot_info);
+}
+
+Status MemTable::UpdateCallback(SequenceNumber seq, const Slice& key,
+                                const Slice& delta,
+                                const ProtectionInfoKVOS64* kv_prot_info) {
+  LookupKey lkey(key, seq);
+  Slice memkey = lkey.memtable_key();
+
+  std::unique_ptr<MemTableRep::Iterator> iter(
+      table_->GetDynamicPrefixIterator());
+  iter->Seek(lkey.internal_key(), memkey.data());
+
+  if (iter->Valid()) {
+    // Refer to comments under MemTable::Add() for entry format.
+    // Check that it belongs to same user key.  We do not check the
+    // sequence number since the Seek() call above should have skipped
+    // all entries with overly large sequence numbers.
+    const char* entry = iter->key();
+    uint32_t key_length = 0;
+    const char* key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length);
+    if (comparator_.comparator.user_comparator()->Equal(
+            Slice(key_ptr, key_length - 8), lkey.user_key())) {
+      // Correct user key
+      const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
+      ValueType type;
+      uint64_t existing_seq;
+      UnPackSequenceAndType(tag, &existing_seq, &type);
+      if (type == kTypeValue) {
+        Slice prev_value = GetLengthPrefixedSlice(key_ptr + key_length);
+        uint32_t prev_size = static_cast<uint32_t>(prev_value.size());
+
+        char* prev_buffer = const_cast<char*>(prev_value.data());
+        uint32_t new_prev_size = prev_size;
+
+        std::string str_value;
+        WriteLock wl(GetLock(lkey.user_key()));
+        auto status = moptions_.inplace_callback(prev_buffer, &new_prev_size,
+                                                 delta, &str_value);
+        if (status == UpdateStatus::UPDATED_INPLACE) {
+          // Value already updated by callback.
+          assert(new_prev_size <= prev_size);
+          if (new_prev_size < prev_size) {
+            // overwrite the new prev_size
+            char* p = EncodeVarint32(const_cast<char*>(key_ptr) + key_length,
+                                     new_prev_size);
+            if (VarintLength(new_prev_size) < VarintLength(prev_size)) {
+              // shift the value buffer as well.
+              memcpy(p, prev_buffer, new_prev_size);
+              prev_buffer = p;
+            }
+          }
+          RecordTick(moptions_.statistics, NUMBER_KEYS_UPDATED);
+          UpdateFlushState();
+          Slice new_value(prev_buffer, new_prev_size);
+          if (kv_prot_info != nullptr) {
+            ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
+            // `seq` is swallowed and `existing_seq` prevails.
+            updated_kv_prot_info.UpdateS(seq, existing_seq);
+            updated_kv_prot_info.UpdateV(delta, new_value);
+            Slice encoded(entry, prev_buffer + new_prev_size - entry);
+            UpdateEntryChecksum(&updated_kv_prot_info, key, new_value, type,
+                                existing_seq, prev_buffer + new_prev_size);
+            return VerifyEncodedEntry(encoded, updated_kv_prot_info);
+          } else {
+            UpdateEntryChecksum(nullptr, key, new_value, type, existing_seq,
+                                prev_buffer + new_prev_size);
+          }
+          return Status::OK();
+        } else if (status == UpdateStatus::UPDATED) {
+          Status s;
+          if (kv_prot_info != nullptr) {
+            ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
+            updated_kv_prot_info.UpdateV(delta, str_value);
+            s = Add(seq, kTypeValue, key, Slice(str_value),
+                    &updated_kv_prot_info);
+          } else {
+            s = Add(seq, kTypeValue, key, Slice(str_value),
+                    nullptr /* kv_prot_info */);
+          }
+          RecordTick(moptions_.statistics, NUMBER_KEYS_WRITTEN);
+          UpdateFlushState();
+          return s;
+        } else if (status == UpdateStatus::UPDATE_FAILED) {
+          // `UPDATE_FAILED` is named incorrectly. It indicates no update
+          // happened. It does not indicate a failure happened.
+          UpdateFlushState();
+          return Status::OK();
+        }
+      }
+    }
+  }
+  // The latest value is not `kTypeValue` or key doesn't exist
+  return Status::NotFound();
+}
+
+size_t MemTable::CountSuccessiveMergeEntries(const LookupKey& key) {
+  Slice memkey = key.memtable_key();
+
+  // A total ordered iterator is costly for some memtablerep (prefix aware
+  // reps). By passing in the user key, we allow efficient iterator creation.
+  // The iterator only needs to be ordered within the same user key.
+  std::unique_ptr<MemTableRep::Iterator> iter(
+      table_->GetDynamicPrefixIterator());
+  iter->Seek(key.internal_key(), memkey.data());
+
+  size_t num_successive_merges = 0;
+
+  for (; iter->Valid(); iter->Next()) {
+    const char* entry = iter->key();
+    uint32_t key_length = 0;
+    const char* iter_key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length);
+    if (!comparator_.comparator.user_comparator()->Equal(
+            Slice(iter_key_ptr, key_length - 8), key.user_key())) {
+      break;
+    }
+
+    const uint64_t tag = DecodeFixed64(iter_key_ptr + key_length - 8);
+    ValueType type;
+    uint64_t unused;
+    UnPackSequenceAndType(tag, &unused, &type);
+    if (type != kTypeMerge) {
+      break;
+    }
+
+    ++num_successive_merges;
+  }
+
+  return num_successive_merges;
+}
+
+void MemTableRep::Get(const LookupKey& k, void* callback_args,
+                      bool (*callback_func)(void* arg, const char* entry)) {
+  auto iter = GetDynamicPrefixIterator();
+  for (iter->Seek(k.internal_key(), k.memtable_key().data());
+       iter->Valid() && callback_func(callback_args, iter->key());
+       iter->Next()) {
+  }
+}
+
+void MemTable::RefLogContainingPrepSection(uint64_t log) {
+  assert(log > 0);
+  auto cur = min_prep_log_referenced_.load();
+  while ((log < cur || cur == 0) &&
+         !min_prep_log_referenced_.compare_exchange_strong(cur, log)) {
+    cur = min_prep_log_referenced_.load();
+  }
+}
+
+uint64_t MemTable::GetMinLogContainingPrepSection() {
+  return min_prep_log_referenced_.load();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable.h
new file mode 100644
index 0000000000..eefabcf88d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable.h
@@ -0,0 +1,660 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <atomic>
+#include <deque>
+#include <functional>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "db/dbformat.h"
+#include "db/kv_checksum.h"
+#include "db/range_tombstone_fragmenter.h"
+#include "db/read_callback.h"
+#include "db/version_edit.h"
+#include "memory/allocator.h"
+#include "memory/concurrent_arena.h"
+#include "monitoring/instrumented_mutex.h"
+#include "options/cf_options.h"
+#include "rocksdb/db.h"
+#include "rocksdb/memtablerep.h"
+#include "table/multiget_context.h"
+#include "util/dynamic_bloom.h"
+#include "util/hash.h"
+#include "util/hash_containers.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct FlushJobInfo;
+class Mutex;
+class MemTableIterator;
+class MergeContext;
+class SystemClock;
+
+struct ImmutableMemTableOptions {
+  explicit ImmutableMemTableOptions(const ImmutableOptions& ioptions,
+                                    const MutableCFOptions& mutable_cf_options);
+  size_t arena_block_size;
+  uint32_t memtable_prefix_bloom_bits;
+  size_t memtable_huge_page_size;
+  bool memtable_whole_key_filtering;
+  bool inplace_update_support;
+  size_t inplace_update_num_locks;
+  UpdateStatus (*inplace_callback)(char* existing_value,
+                                   uint32_t* existing_value_size,
+                                   Slice delta_value,
+                                   std::string* merged_value);
+  size_t max_successive_merges;
+  Statistics* statistics;
+  MergeOperator* merge_operator;
+  Logger* info_log;
+  bool allow_data_in_errors;
+  uint32_t protection_bytes_per_key;
+};
+
+// Batched counters to updated when inserting keys in one write batch.
+// In post process of the write batch, these can be updated together.
+// Only used in concurrent memtable insert case.
+struct MemTablePostProcessInfo {
+  uint64_t data_size = 0;
+  uint64_t num_entries = 0;
+  uint64_t num_deletes = 0;
+};
+
+using MultiGetRange = MultiGetContext::Range;
+// Note:  Many of the methods in this class have comments indicating that
+// external synchronization is required as these methods are not thread-safe.
+// It is up to higher layers of code to decide how to prevent concurrent
+// invocation of these methods.  This is usually done by acquiring either
+// the db mutex or the single writer thread.
+//
+// Some of these methods are documented to only require external
+// synchronization if this memtable is immutable.  Calling MarkImmutable() is
+// not sufficient to guarantee immutability.  It is up to higher layers of
+// code to determine if this MemTable can still be modified by other threads.
+// Eg: The Superversion stores a pointer to the current MemTable (that can
+// be modified) and a separate list of the MemTables that can no longer be
+// written to (aka the 'immutable memtables').
+class MemTable {
+ public:
+  struct KeyComparator : public MemTableRep::KeyComparator {
+    const InternalKeyComparator comparator;
+    explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) {}
+    virtual int operator()(const char* prefix_len_key1,
+                           const char* prefix_len_key2) const override;
+    virtual int operator()(const char* prefix_len_key,
+                           const DecodedType& key) const override;
+  };
+
+  // MemTables are reference counted.  The initial reference count
+  // is zero and the caller must call Ref() at least once.
+  //
+  // earliest_seq should be the current SequenceNumber in the db such that any
+  // key inserted into this memtable will have an equal or larger seq number.
+  // (When a db is first created, the earliest sequence number will be 0).
+  // If the earliest sequence number is not known, kMaxSequenceNumber may be
+  // used, but this may prevent some transactions from succeeding until the
+  // first key is inserted into the memtable.
+  explicit MemTable(const InternalKeyComparator& comparator,
+                    const ImmutableOptions& ioptions,
+                    const MutableCFOptions& mutable_cf_options,
+                    WriteBufferManager* write_buffer_manager,
+                    SequenceNumber earliest_seq, uint32_t column_family_id);
+  // No copying allowed
+  MemTable(const MemTable&) = delete;
+  MemTable& operator=(const MemTable&) = delete;
+
+  // Do not delete this MemTable unless Unref() indicates it not in use.
+  ~MemTable();
+
+  // Increase reference count.
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable.
+  void Ref() { ++refs_; }
+
+  // Drop reference count.
+  // If the refcount goes to zero return this memtable, otherwise return null.
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable.
+  MemTable* Unref() {
+    --refs_;
+    assert(refs_ >= 0);
+    if (refs_ <= 0) {
+      return this;
+    }
+    return nullptr;
+  }
+
+  // Returns an estimate of the number of bytes of data in use by this
+  // data structure.
+  //
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable (unless this Memtable is immutable).
+  size_t ApproximateMemoryUsage();
+
+  // As a cheap version of `ApproximateMemoryUsage()`, this function doesn't
+  // require external synchronization. The value may be less accurate though
+  size_t ApproximateMemoryUsageFast() const {
+    return approximate_memory_usage_.load(std::memory_order_relaxed);
+  }
+
+  // used by MemTableListVersion::MemoryAllocatedBytesExcludingLast
+  size_t MemoryAllocatedBytes() const {
+    return table_->ApproximateMemoryUsage() +
+           range_del_table_->ApproximateMemoryUsage() +
+           arena_.MemoryAllocatedBytes();
+  }
+
+  // Returns a vector of unique random memtable entries of size 'sample_size'.
+  //
+  // Note: the entries are stored in the unordered_set as length-prefixed keys,
+  //       hence their representation in the set as "const char*".
+  // Note2: the size of the output set 'entries' is not enforced to be strictly
+  //        equal to 'target_sample_size'. Its final size might be slightly
+  //        greater or slightly less than 'target_sample_size'
+  //
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable (unless this Memtable is immutable).
+  // REQUIRES: SkipList memtable representation. This function is not
+  // implemented for any other type of memtable representation (vectorrep,
+  // hashskiplist,...).
+  void UniqueRandomSample(const uint64_t& target_sample_size,
+                          std::unordered_set<const char*>* entries) {
+    // TODO(bjlemaire): at the moment, only supported by skiplistrep.
+    // Extend it to all other memtable representations.
+    table_->UniqueRandomSample(num_entries(), target_sample_size, entries);
+  }
+
+  // This method heuristically determines if the memtable should continue to
+  // host more data.
+  bool ShouldScheduleFlush() const {
+    return flush_state_.load(std::memory_order_relaxed) == FLUSH_REQUESTED;
+  }
+
+  // Returns true if a flush should be scheduled and the caller should
+  // be the one to schedule it
+  bool MarkFlushScheduled() {
+    auto before = FLUSH_REQUESTED;
+    return flush_state_.compare_exchange_strong(before, FLUSH_SCHEDULED,
+                                                std::memory_order_relaxed,
+                                                std::memory_order_relaxed);
+  }
+
+  // Return an iterator that yields the contents of the memtable.
+  //
+  // The caller must ensure that the underlying MemTable remains live
+  // while the returned iterator is live.  The keys returned by this
+  // iterator are internal keys encoded by AppendInternalKey in the
+  // db/dbformat.{h,cc} module.
+  //
+  // By default, it returns an iterator for prefix seek if prefix_extractor
+  // is configured in Options.
+  // arena: If not null, the arena needs to be used to allocate the Iterator.
+  //        Calling ~Iterator of the iterator will destroy all the states but
+  //        those allocated in arena.
+  InternalIterator* NewIterator(const ReadOptions& read_options, Arena* arena);
+
+  // Returns an iterator that yields the range tombstones of the memtable.
+  // The caller must ensure that the underlying MemTable remains live
+  // while the returned iterator is live.
+  // @param immutable_memtable Whether this memtable is an immutable memtable.
+  // This information is not stored in memtable itself, so it needs to be
+  // specified by the caller. This flag is used internally to decide whether a
+  // cached fragmented range tombstone list can be returned. This cached version
+  // is constructed when a memtable becomes immutable. Setting the flag to false
+  // will always yield correct result, but may incur performance penalty as it
+  // always creates a new fragmented range tombstone list.
+  FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
+      const ReadOptions& read_options, SequenceNumber read_seq,
+      bool immutable_memtable);
+
+  Status VerifyEncodedEntry(Slice encoded,
+                            const ProtectionInfoKVOS64& kv_prot_info);
+
+  // Add an entry into memtable that maps key to value at the
+  // specified sequence number and with the specified type.
+  // Typically value will be empty if type==kTypeDeletion.
+  //
+  // REQUIRES: if allow_concurrent = false, external synchronization to prevent
+  // simultaneous operations on the same MemTable.
+  //
+  // Returns `Status::TryAgain` if the `seq`, `key` combination already exists
+  // in the memtable and `MemTableRepFactory::CanHandleDuplicatedKey()` is true.
+  // The next attempt should try a larger value for `seq`.
+  Status Add(SequenceNumber seq, ValueType type, const Slice& key,
+             const Slice& value, const ProtectionInfoKVOS64* kv_prot_info,
+             bool allow_concurrent = false,
+             MemTablePostProcessInfo* post_process_info = nullptr,
+             void** hint = nullptr);
+
+  // Used to Get value associated with key or Get Merge Operands associated
+  // with key.
+  // If do_merge = true the default behavior which is Get value for key is
+  // executed. Expected behavior is described right below.
+  // If memtable contains a value for key, store it in *value and return true.
+  // If memtable contains a deletion for key, store a NotFound() error
+  // in *status and return true.
+  // If memtable contains Merge operation as the most recent entry for a key,
+  //   and the merge process does not stop (not reaching a value or delete),
+  //   prepend the current merge operand to *operands.
+  //   store MergeInProgress in s, and return false.
+  // Else, return false.
+  // If any operation was found, its most recent sequence number
+  // will be stored in *seq on success (regardless of whether true/false is
+  // returned).  Otherwise, *seq will be set to kMaxSequenceNumber.
+  // On success, *s may be set to OK, NotFound, or MergeInProgress.  Any other
+  // status returned indicates a corruption or other unexpected error.
+  // If do_merge = false then any Merge Operands encountered for key are simply
+  // stored in merge_context.operands_list and never actually merged to get a
+  // final value. The raw Merge Operands are eventually returned to the user.
+  // @param immutable_memtable Whether this memtable is immutable. Used
+  // internally by NewRangeTombstoneIterator(). See comment above
+  // NewRangeTombstoneIterator() for more detail.
+  bool Get(const LookupKey& key, std::string* value,
+           PinnableWideColumns* columns, std::string* timestamp, Status* s,
+           MergeContext* merge_context,
+           SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq,
+           const ReadOptions& read_opts, bool immutable_memtable,
+           ReadCallback* callback = nullptr, bool* is_blob_index = nullptr,
+           bool do_merge = true);
+
+  bool Get(const LookupKey& key, std::string* value,
+           PinnableWideColumns* columns, std::string* timestamp, Status* s,
+           MergeContext* merge_context,
+           SequenceNumber* max_covering_tombstone_seq,
+           const ReadOptions& read_opts, bool immutable_memtable,
+           ReadCallback* callback = nullptr, bool* is_blob_index = nullptr,
+           bool do_merge = true) {
+    SequenceNumber seq;
+    return Get(key, value, columns, timestamp, s, merge_context,
+               max_covering_tombstone_seq, &seq, read_opts, immutable_memtable,
+               callback, is_blob_index, do_merge);
+  }
+
+  // @param immutable_memtable Whether this memtable is immutable. Used
+  // internally by NewRangeTombstoneIterator(). See comment above
+  // NewRangeTombstoneIterator() for more detail.
+  void MultiGet(const ReadOptions& read_options, MultiGetRange* range,
+                ReadCallback* callback, bool immutable_memtable);
+
+  // If `key` exists in current memtable with type value_type and the existing
+  // value is at least as large as the new value, updates it in-place. Otherwise
+  // adds the new value to the memtable out-of-place.
+  //
+  // Returns `Status::TryAgain` if the `seq`, `key` combination already exists
+  // in the memtable and `MemTableRepFactory::CanHandleDuplicatedKey()` is true.
+  // The next attempt should try a larger value for `seq`.
+  //
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable.
+  Status Update(SequenceNumber seq, ValueType value_type, const Slice& key,
+                const Slice& value, const ProtectionInfoKVOS64* kv_prot_info);
+
+  // If `key` exists in current memtable with type `kTypeValue` and the existing
+  // value is at least as large as the new value, updates it in-place. Otherwise
+  // if `key` exists in current memtable with type `kTypeValue`, adds the new
+  // value to the memtable out-of-place.
+  //
+  // Returns `Status::NotFound` if `key` does not exist in current memtable or
+  // the latest version of `key` does not have `kTypeValue`.
+  //
+  // Returns `Status::TryAgain` if the `seq`, `key` combination already exists
+  // in the memtable and `MemTableRepFactory::CanHandleDuplicatedKey()` is true.
+  // The next attempt should try a larger value for `seq`.
+  //
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable.
+  Status UpdateCallback(SequenceNumber seq, const Slice& key,
+                        const Slice& delta,
+                        const ProtectionInfoKVOS64* kv_prot_info);
+
+  // Returns the number of successive merge entries starting from the newest
+  // entry for the key up to the last non-merge entry or last entry for the
+  // key in the memtable.
+  size_t CountSuccessiveMergeEntries(const LookupKey& key);
+
+  // Update counters and flush status after inserting a whole write batch
+  // Used in concurrent memtable inserts.
+  void BatchPostProcess(const MemTablePostProcessInfo& update_counters) {
+    num_entries_.fetch_add(update_counters.num_entries,
+                           std::memory_order_relaxed);
+    data_size_.fetch_add(update_counters.data_size, std::memory_order_relaxed);
+    if (update_counters.num_deletes != 0) {
+      num_deletes_.fetch_add(update_counters.num_deletes,
+                             std::memory_order_relaxed);
+    }
+    UpdateFlushState();
+  }
+
+  // Get total number of entries in the mem table.
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable (unless this Memtable is immutable).
+  uint64_t num_entries() const {
+    return num_entries_.load(std::memory_order_relaxed);
+  }
+
+  // Get total number of deletes in the mem table.
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable (unless this Memtable is immutable).
+  uint64_t num_deletes() const {
+    return num_deletes_.load(std::memory_order_relaxed);
+  }
+
+  uint64_t get_data_size() const {
+    return data_size_.load(std::memory_order_relaxed);
+  }
+
+  // Dynamically change the memtable's capacity. If set below the current usage,
+  // the next key added will trigger a flush. Can only increase size when
+  // memtable prefix bloom is disabled, since we can't easily allocate more
+  // space.
+  void UpdateWriteBufferSize(size_t new_write_buffer_size) {
+    if (bloom_filter_ == nullptr ||
+        new_write_buffer_size < write_buffer_size_) {
+      write_buffer_size_.store(new_write_buffer_size,
+                               std::memory_order_relaxed);
+    }
+  }
+
+  // Returns the edits area that is needed for flushing the memtable
+  VersionEdit* GetEdits() { return &edit_; }
+
+  // Returns if there is no entry inserted to the mem table.
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable (unless this Memtable is immutable).
+  bool IsEmpty() const { return first_seqno_ == 0; }
+
+  // Returns the sequence number of the first element that was inserted
+  // into the memtable.
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable (unless this Memtable is immutable).
+  SequenceNumber GetFirstSequenceNumber() {
+    return first_seqno_.load(std::memory_order_relaxed);
+  }
+
+  // Returns the sequence number of the first element that was inserted
+  // into the memtable.
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable (unless this Memtable is immutable).
+  void SetFirstSequenceNumber(SequenceNumber first_seqno) {
+    return first_seqno_.store(first_seqno, std::memory_order_relaxed);
+  }
+
+  // Returns the sequence number that is guaranteed to be smaller than or equal
+  // to the sequence number of any key that could be inserted into this
+  // memtable. It can then be assumed that any write with a larger(or equal)
+  // sequence number will be present in this memtable or a later memtable.
+  //
+  // If the earliest sequence number could not be determined,
+  // kMaxSequenceNumber will be returned.
+  SequenceNumber GetEarliestSequenceNumber() {
+    return earliest_seqno_.load(std::memory_order_relaxed);
+  }
+
+  // Sets the sequence number that is guaranteed to be smaller than or equal
+  // to the sequence number of any key that could be inserted into this
+  // memtable. It can then be assumed that any write with a larger(or equal)
+  // sequence number will be present in this memtable or a later memtable.
+  // Used only for MemPurge operation
+  void SetEarliestSequenceNumber(SequenceNumber earliest_seqno) {
+    return earliest_seqno_.store(earliest_seqno, std::memory_order_relaxed);
+  }
+
+  // DB's latest sequence ID when the memtable is created. This number
+  // may be updated to a more recent one before any key is inserted.
+  SequenceNumber GetCreationSeq() const { return creation_seq_; }
+
+  void SetCreationSeq(SequenceNumber sn) { creation_seq_ = sn; }
+
+  // Returns the next active logfile number when this memtable is about to
+  // be flushed to storage
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable.
+  uint64_t GetNextLogNumber() { return mem_next_logfile_number_; }
+
+  // Sets the next active logfile number when this memtable is about to
+  // be flushed to storage
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable.
+  void SetNextLogNumber(uint64_t num) { mem_next_logfile_number_ = num; }
+
+  // if this memtable contains data from a committed
+  // two phase transaction we must take note of the
+  // log which contains that data so we can know
+  // when to relese that log
+  void RefLogContainingPrepSection(uint64_t log);
+  uint64_t GetMinLogContainingPrepSection();
+
+  // Notify the underlying storage that no more items will be added.
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable.
+  // After MarkImmutable() is called, you should not attempt to
+  // write anything to this MemTable().  (Ie. do not call Add() or Update()).
+  void MarkImmutable() {
+    table_->MarkReadOnly();
+    mem_tracker_.DoneAllocating();
+  }
+
+  // Notify the underlying storage that all data it contained has been
+  // persisted.
+  // REQUIRES: external synchronization to prevent simultaneous
+  // operations on the same MemTable.
+  void MarkFlushed() { table_->MarkFlushed(); }
+
+  // return true if the current MemTableRep supports merge operator.
+  bool IsMergeOperatorSupported() const {
+    return table_->IsMergeOperatorSupported();
+  }
+
+  // return true if the current MemTableRep supports snapshots.
+  // inplace update prevents snapshots,
+  bool IsSnapshotSupported() const {
+    return table_->IsSnapshotSupported() && !moptions_.inplace_update_support;
+  }
+
+  struct MemTableStats {
+    uint64_t size;
+    uint64_t count;
+  };
+
+  MemTableStats ApproximateStats(const Slice& start_ikey,
+                                 const Slice& end_ikey);
+
+  // Get the lock associated for the key
+  port::RWMutex* GetLock(const Slice& key);
+
+  const InternalKeyComparator& GetInternalKeyComparator() const {
+    return comparator_.comparator;
+  }
+
+  const ImmutableMemTableOptions* GetImmutableMemTableOptions() const {
+    return &moptions_;
+  }
+
+  uint64_t ApproximateOldestKeyTime() const {
+    return oldest_key_time_.load(std::memory_order_relaxed);
+  }
+
+  // REQUIRES: db_mutex held.
+  void SetID(uint64_t id) { id_ = id; }
+
+  uint64_t GetID() const { return id_; }
+
+  void SetFlushCompleted(bool completed) { flush_completed_ = completed; }
+
+  uint64_t GetFileNumber() const { return file_number_; }
+
+  void SetFileNumber(uint64_t file_num) { file_number_ = file_num; }
+
+  void SetFlushInProgress(bool in_progress) {
+    flush_in_progress_ = in_progress;
+  }
+
+  void SetFlushJobInfo(std::unique_ptr<FlushJobInfo>&& info) {
+    flush_job_info_ = std::move(info);
+  }
+
+  std::unique_ptr<FlushJobInfo> ReleaseFlushJobInfo() {
+    return std::move(flush_job_info_);
+  }
+
+  // Returns a heuristic flush decision
+  bool ShouldFlushNow();
+
+  void ConstructFragmentedRangeTombstones();
+
+  // Returns whether a fragmented range tombstone list is already constructed
+  // for this memtable. It should be constructed right before a memtable is
+  // added to an immutable memtable list. Note that if a memtable does not have
+  // any range tombstone, then no range tombstone list will ever be constructed.
+  // @param allow_empty Specifies whether a memtable with no range tombstone is
+  // considered to have its fragmented range tombstone list constructed.
+  bool IsFragmentedRangeTombstonesConstructed(bool allow_empty = true) const {
+    if (allow_empty) {
+      return fragmented_range_tombstone_list_.get() != nullptr ||
+             is_range_del_table_empty_;
+    } else {
+      return fragmented_range_tombstone_list_.get() != nullptr;
+    }
+  }
+
+  // Returns Corruption status if verification fails.
+  static Status VerifyEntryChecksum(const char* entry,
+                                    uint32_t protection_bytes_per_key,
+                                    bool allow_data_in_errors = false);
+
+ private:
+  enum FlushStateEnum { FLUSH_NOT_REQUESTED, FLUSH_REQUESTED, FLUSH_SCHEDULED };
+
+  friend class MemTableIterator;
+  friend class MemTableBackwardIterator;
+  friend class MemTableList;
+
+  KeyComparator comparator_;
+  const ImmutableMemTableOptions moptions_;
+  int refs_;
+  const size_t kArenaBlockSize;
+  AllocTracker mem_tracker_;
+  ConcurrentArena arena_;
+  std::unique_ptr<MemTableRep> table_;
+  std::unique_ptr<MemTableRep> range_del_table_;
+  std::atomic_bool is_range_del_table_empty_;
+
+  // Total data size of all data inserted
+  std::atomic<uint64_t> data_size_;
+  std::atomic<uint64_t> num_entries_;
+  std::atomic<uint64_t> num_deletes_;
+
+  // Dynamically changeable memtable option
+  std::atomic<size_t> write_buffer_size_;
+
+  // These are used to manage memtable flushes to storage
+  bool flush_in_progress_;  // started the flush
+  bool flush_completed_;    // finished the flush
+  uint64_t file_number_;    // filled up after flush is complete
+
+  // The updates to be applied to the transaction log when this
+  // memtable is flushed to storage.
+  VersionEdit edit_;
+
+  // The sequence number of the kv that was inserted first
+  std::atomic<SequenceNumber> first_seqno_;
+
+  // The db sequence number at the time of creation or kMaxSequenceNumber
+  // if not set.
+  std::atomic<SequenceNumber> earliest_seqno_;
+
+  SequenceNumber creation_seq_;
+
+  // The log files earlier than this number can be deleted.
+  uint64_t mem_next_logfile_number_;
+
+  // the earliest log containing a prepared section
+  // which has been inserted into this memtable.
+  std::atomic<uint64_t> min_prep_log_referenced_;
+
+  // rw locks for inplace updates
+  std::vector<port::RWMutex> locks_;
+
+  const SliceTransform* const prefix_extractor_;
+  std::unique_ptr<DynamicBloom> bloom_filter_;
+
+  std::atomic<FlushStateEnum> flush_state_;
+
+  SystemClock* clock_;
+
+  // Extract sequential insert prefixes.
+  const SliceTransform* insert_with_hint_prefix_extractor_;
+
+  // Insert hints for each prefix.
+  UnorderedMapH<Slice, void*, SliceHasher> insert_hints_;
+
+  // Timestamp of oldest key
+  std::atomic<uint64_t> oldest_key_time_;
+
+  // Memtable id to track flush.
+  uint64_t id_ = 0;
+
+  // Sequence number of the atomic flush that is responsible for this memtable.
+  // The sequence number of atomic flush is a seq, such that no writes with
+  // sequence numbers greater than or equal to seq are flushed, while all
+  // writes with sequence number smaller than seq are flushed.
+  SequenceNumber atomic_flush_seqno_;
+
+  // keep track of memory usage in table_, arena_, and range_del_table_.
+  // Gets refreshed inside `ApproximateMemoryUsage()` or `ShouldFlushNow`
+  std::atomic<uint64_t> approximate_memory_usage_;
+
+  // Flush job info of the current memtable.
+  std::unique_ptr<FlushJobInfo> flush_job_info_;
+
+  // Updates flush_state_ using ShouldFlushNow()
+  void UpdateFlushState();
+
+  void UpdateOldestKeyTime();
+
+  void GetFromTable(const LookupKey& key,
+                    SequenceNumber max_covering_tombstone_seq, bool do_merge,
+                    ReadCallback* callback, bool* is_blob_index,
+                    std::string* value, PinnableWideColumns* columns,
+                    std::string* timestamp, Status* s,
+                    MergeContext* merge_context, SequenceNumber* seq,
+                    bool* found_final_value, bool* merge_in_progress);
+
+  // Always returns non-null and assumes certain pre-checks (e.g.,
+  // is_range_del_table_empty_) are done. This is only valid during the lifetime
+  // of the underlying memtable.
+  // read_seq and read_options.timestamp will be used as the upper bound
+  // for range tombstones.
+  FragmentedRangeTombstoneIterator* NewRangeTombstoneIteratorInternal(
+      const ReadOptions& read_options, SequenceNumber read_seq,
+      bool immutable_memtable);
+
+  // The fragmented range tombstones of this memtable.
+  // This is constructed when this memtable becomes immutable
+  // if !is_range_del_table_empty_.
+  std::unique_ptr<FragmentedRangeTombstoneList>
+      fragmented_range_tombstone_list_;
+
+  // makes sure there is a single range tombstone writer to invalidate cache
+  std::mutex range_del_mutex_;
+  CoreLocalArray<std::shared_ptr<FragmentedRangeTombstoneListCache>>
+      cached_range_tombstone_;
+
+  void UpdateEntryChecksum(const ProtectionInfoKVOS64* kv_prot_info,
+                           const Slice& key, const Slice& value, ValueType type,
+                           SequenceNumber s, char* checksum_ptr);
+};
+
+extern const char* EncodeKey(std::string* scratch, const Slice& target);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable_list.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable_list.cc
new file mode 100644
index 0000000000..ee1563f016
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable_list.cc
@@ -0,0 +1,987 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include "db/memtable_list.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <limits>
+#include <queue>
+#include <string>
+
+#include "db/db_impl/db_impl.h"
+#include "db/memtable.h"
+#include "db/range_tombstone_fragmenter.h"
+#include "db/version_set.h"
+#include "logging/log_buffer.h"
+#include "logging/logging.h"
+#include "monitoring/thread_status_util.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "table/merging_iterator.h"
+#include "test_util/sync_point.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class InternalKeyComparator;
+class Mutex;
+class VersionSet;
+
+void MemTableListVersion::AddMemTable(MemTable* m) {
+  memlist_.push_front(m);
+  *parent_memtable_list_memory_usage_ += m->ApproximateMemoryUsage();
+}
+
+void MemTableListVersion::UnrefMemTable(autovector<MemTable*>* to_delete,
+                                        MemTable* m) {
+  if (m->Unref()) {
+    to_delete->push_back(m);
+    assert(*parent_memtable_list_memory_usage_ >= m->ApproximateMemoryUsage());
+    *parent_memtable_list_memory_usage_ -= m->ApproximateMemoryUsage();
+  }
+}
+
+MemTableListVersion::MemTableListVersion(
+    size_t* parent_memtable_list_memory_usage, const MemTableListVersion& old)
+    : max_write_buffer_number_to_maintain_(
+          old.max_write_buffer_number_to_maintain_),
+      max_write_buffer_size_to_maintain_(
+          old.max_write_buffer_size_to_maintain_),
+      parent_memtable_list_memory_usage_(parent_memtable_list_memory_usage) {
+  memlist_ = old.memlist_;
+  for (auto& m : memlist_) {
+    m->Ref();
+  }
+
+  memlist_history_ = old.memlist_history_;
+  for (auto& m : memlist_history_) {
+    m->Ref();
+  }
+}
+
+MemTableListVersion::MemTableListVersion(
+    size_t* parent_memtable_list_memory_usage,
+    int max_write_buffer_number_to_maintain,
+    int64_t max_write_buffer_size_to_maintain)
+    : max_write_buffer_number_to_maintain_(max_write_buffer_number_to_maintain),
+      max_write_buffer_size_to_maintain_(max_write_buffer_size_to_maintain),
+      parent_memtable_list_memory_usage_(parent_memtable_list_memory_usage) {}
+
+void MemTableListVersion::Ref() { ++refs_; }
+
+// called by superversion::clean()
+void MemTableListVersion::Unref(autovector<MemTable*>* to_delete) {
+  assert(refs_ >= 1);
+  --refs_;
+  if (refs_ == 0) {
+    // if to_delete is equal to nullptr it means we're confident
+    // that refs_ will not be zero
+    assert(to_delete != nullptr);
+    for (const auto& m : memlist_) {
+      UnrefMemTable(to_delete, m);
+    }
+    for (const auto& m : memlist_history_) {
+      UnrefMemTable(to_delete, m);
+    }
+    delete this;
+  }
+}
+
+int MemTableList::NumNotFlushed() const {
+  int size = static_cast<int>(current_->memlist_.size());
+  assert(num_flush_not_started_ <= size);
+  return size;
+}
+
+int MemTableList::NumFlushed() const {
+  return static_cast<int>(current_->memlist_history_.size());
+}
+
+// Search all the memtables starting from the most recent one.
+// Return the most recent value found, if any.
+// Operands stores the list of merge operations to apply, so far.
+bool MemTableListVersion::Get(const LookupKey& key, std::string* value,
+                              PinnableWideColumns* columns,
+                              std::string* timestamp, Status* s,
+                              MergeContext* merge_context,
+                              SequenceNumber* max_covering_tombstone_seq,
+                              SequenceNumber* seq, const ReadOptions& read_opts,
+                              ReadCallback* callback, bool* is_blob_index) {
+  return GetFromList(&memlist_, key, value, columns, timestamp, s,
+                     merge_context, max_covering_tombstone_seq, seq, read_opts,
+                     callback, is_blob_index);
+}
+
+void MemTableListVersion::MultiGet(const ReadOptions& read_options,
+                                   MultiGetRange* range,
+                                   ReadCallback* callback) {
+  for (auto memtable : memlist_) {
+    memtable->MultiGet(read_options, range, callback,
+                       true /* immutable_memtable */);
+    if (range->empty()) {
+      return;
+    }
+  }
+}
+
+bool MemTableListVersion::GetMergeOperands(
+    const LookupKey& key, Status* s, MergeContext* merge_context,
+    SequenceNumber* max_covering_tombstone_seq, const ReadOptions& read_opts) {
+  for (MemTable* memtable : memlist_) {
+    bool done = memtable->Get(
+        key, /*value=*/nullptr, /*columns=*/nullptr, /*timestamp=*/nullptr, s,
+        merge_context, max_covering_tombstone_seq, read_opts,
+        true /* immutable_memtable */, nullptr, nullptr, false);
+    if (done) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool MemTableListVersion::GetFromHistory(
+    const LookupKey& key, std::string* value, PinnableWideColumns* columns,
+    std::string* timestamp, Status* s, MergeContext* merge_context,
+    SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq,
+    const ReadOptions& read_opts, bool* is_blob_index) {
+  return GetFromList(&memlist_history_, key, value, columns, timestamp, s,
+                     merge_context, max_covering_tombstone_seq, seq, read_opts,
+                     nullptr /*read_callback*/, is_blob_index);
+}
+
+bool MemTableListVersion::GetFromList(
+    std::list<MemTable*>* list, const LookupKey& key, std::string* value,
+    PinnableWideColumns* columns, std::string* timestamp, Status* s,
+    MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq,
+    SequenceNumber* seq, const ReadOptions& read_opts, ReadCallback* callback,
+    bool* is_blob_index) {
+  *seq = kMaxSequenceNumber;
+
+  for (auto& memtable : *list) {
+    assert(memtable->IsFragmentedRangeTombstonesConstructed());
+    SequenceNumber current_seq = kMaxSequenceNumber;
+
+    bool done =
+        memtable->Get(key, value, columns, timestamp, s, merge_context,
+                      max_covering_tombstone_seq, &current_seq, read_opts,
+                      true /* immutable_memtable */, callback, is_blob_index);
+    if (*seq == kMaxSequenceNumber) {
+      // Store the most recent sequence number of any operation on this key.
+      // Since we only care about the most recent change, we only need to
+      // return the first operation found when searching memtables in
+      // reverse-chronological order.
+      // current_seq would be equal to kMaxSequenceNumber if the value was to be
+      // skipped. This allows seq to be assigned again when the next value is
+      // read.
+      *seq = current_seq;
+    }
+
+    if (done) {
+      assert(*seq != kMaxSequenceNumber || s->IsNotFound());
+      return true;
+    }
+    if (!done && !s->ok() && !s->IsMergeInProgress() && !s->IsNotFound()) {
+      return false;
+    }
+  }
+  return false;
+}
+
+Status MemTableListVersion::AddRangeTombstoneIterators(
+    const ReadOptions& read_opts, Arena* /*arena*/,
+    RangeDelAggregator* range_del_agg) {
+  assert(range_del_agg != nullptr);
+  // Except for snapshot read, using kMaxSequenceNumber is OK because these
+  // are immutable memtables.
+  SequenceNumber read_seq = read_opts.snapshot != nullptr
+                                ? read_opts.snapshot->GetSequenceNumber()
+                                : kMaxSequenceNumber;
+  for (auto& m : memlist_) {
+    assert(m->IsFragmentedRangeTombstonesConstructed());
+    std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
+        m->NewRangeTombstoneIterator(read_opts, read_seq,
+                                     true /* immutable_memtable */));
+    range_del_agg->AddTombstones(std::move(range_del_iter));
+  }
+  return Status::OK();
+}
+
+void MemTableListVersion::AddIterators(
+    const ReadOptions& options, std::vector<InternalIterator*>* iterator_list,
+    Arena* arena) {
+  for (auto& m : memlist_) {
+    iterator_list->push_back(m->NewIterator(options, arena));
+  }
+}
+
+void MemTableListVersion::AddIterators(const ReadOptions& options,
+                                       MergeIteratorBuilder* merge_iter_builder,
+                                       bool add_range_tombstone_iter) {
+  for (auto& m : memlist_) {
+    auto mem_iter = m->NewIterator(options, merge_iter_builder->GetArena());
+    if (!add_range_tombstone_iter || options.ignore_range_deletions) {
+      merge_iter_builder->AddIterator(mem_iter);
+    } else {
+      // Except for snapshot read, using kMaxSequenceNumber is OK because these
+      // are immutable memtables.
+      SequenceNumber read_seq = options.snapshot != nullptr
+                                    ? options.snapshot->GetSequenceNumber()
+                                    : kMaxSequenceNumber;
+      TruncatedRangeDelIterator* mem_tombstone_iter = nullptr;
+      auto range_del_iter = m->NewRangeTombstoneIterator(
+          options, read_seq, true /* immutale_memtable */);
+      if (range_del_iter == nullptr || range_del_iter->empty()) {
+        delete range_del_iter;
+      } else {
+        mem_tombstone_iter = new TruncatedRangeDelIterator(
+            std::unique_ptr<FragmentedRangeTombstoneIterator>(range_del_iter),
+            &m->GetInternalKeyComparator(), nullptr /* smallest */,
+            nullptr /* largest */);
+      }
+      merge_iter_builder->AddPointAndTombstoneIterator(mem_iter,
+                                                       mem_tombstone_iter);
+    }
+  }
+}
+
+uint64_t MemTableListVersion::GetTotalNumEntries() const {
+  uint64_t total_num = 0;
+  for (auto& m : memlist_) {
+    total_num += m->num_entries();
+  }
+  return total_num;
+}
+
+MemTable::MemTableStats MemTableListVersion::ApproximateStats(
+    const Slice& start_ikey, const Slice& end_ikey) {
+  MemTable::MemTableStats total_stats = {0, 0};
+  for (auto& m : memlist_) {
+    auto mStats = m->ApproximateStats(start_ikey, end_ikey);
+    total_stats.size += mStats.size;
+    total_stats.count += mStats.count;
+  }
+  return total_stats;
+}
+
+uint64_t MemTableListVersion::GetTotalNumDeletes() const {
+  uint64_t total_num = 0;
+  for (auto& m : memlist_) {
+    total_num += m->num_deletes();
+  }
+  return total_num;
+}
+
+SequenceNumber MemTableListVersion::GetEarliestSequenceNumber(
+    bool include_history) const {
+  if (include_history && !memlist_history_.empty()) {
+    return memlist_history_.back()->GetEarliestSequenceNumber();
+  } else if (!memlist_.empty()) {
+    return memlist_.back()->GetEarliestSequenceNumber();
+  } else {
+    return kMaxSequenceNumber;
+  }
+}
+
+SequenceNumber MemTableListVersion::GetFirstSequenceNumber() const {
+  SequenceNumber min_first_seqno = kMaxSequenceNumber;
+  // The first memtable in the list might not be the oldest one with mempurge
+  for (const auto& m : memlist_) {
+    min_first_seqno = std::min(m->GetFirstSequenceNumber(), min_first_seqno);
+  }
+  return min_first_seqno;
+}
+
+// caller is responsible for referencing m
+void MemTableListVersion::Add(MemTable* m, autovector<MemTable*>* to_delete) {
+  assert(refs_ == 1);  // only when refs_ == 1 is MemTableListVersion mutable
+  AddMemTable(m);
+  // m->MemoryAllocatedBytes() is added in MemoryAllocatedBytesExcludingLast
+  TrimHistory(to_delete, 0);
+}
+
+// Removes m from list of memtables not flushed.  Caller should NOT Unref m.
+void MemTableListVersion::Remove(MemTable* m,
+                                 autovector<MemTable*>* to_delete) {
+  assert(refs_ == 1);  // only when refs_ == 1 is MemTableListVersion mutable
+  memlist_.remove(m);
+
+  m->MarkFlushed();
+  if (max_write_buffer_size_to_maintain_ > 0 ||
+      max_write_buffer_number_to_maintain_ > 0) {
+    memlist_history_.push_front(m);
+    // Unable to get size of mutable memtable at this point, pass 0 to
+    // TrimHistory as a best effort.
+    TrimHistory(to_delete, 0);
+  } else {
+    UnrefMemTable(to_delete, m);
+  }
+}
+
+// return the total memory usage assuming the oldest flushed memtable is dropped
+size_t MemTableListVersion::MemoryAllocatedBytesExcludingLast() const {
+  size_t total_memtable_size = 0;
+  for (auto& memtable : memlist_) {
+    total_memtable_size += memtable->MemoryAllocatedBytes();
+  }
+  for (auto& memtable : memlist_history_) {
+    total_memtable_size += memtable->MemoryAllocatedBytes();
+  }
+  if (!memlist_history_.empty()) {
+    total_memtable_size -= memlist_history_.back()->MemoryAllocatedBytes();
+  }
+  return total_memtable_size;
+}
+
+bool MemTableListVersion::MemtableLimitExceeded(size_t usage) {
+  if (max_write_buffer_size_to_maintain_ > 0) {
+    // calculate the total memory usage after dropping the oldest flushed
+    // memtable, compare with max_write_buffer_size_to_maintain_ to decide
+    // whether to trim history
+    return MemoryAllocatedBytesExcludingLast() + usage >=
+           static_cast<size_t>(max_write_buffer_size_to_maintain_);
+  } else if (max_write_buffer_number_to_maintain_ > 0) {
+    return memlist_.size() + memlist_history_.size() >
+           static_cast<size_t>(max_write_buffer_number_to_maintain_);
+  } else {
+    return false;
+  }
+}
+
+// Make sure we don't use up too much space in history
+bool MemTableListVersion::TrimHistory(autovector<MemTable*>* to_delete,
+                                      size_t usage) {
+  bool ret = false;
+  while (MemtableLimitExceeded(usage) && !memlist_history_.empty()) {
+    MemTable* x = memlist_history_.back();
+    memlist_history_.pop_back();
+
+    UnrefMemTable(to_delete, x);
+    ret = true;
+  }
+  return ret;
+}
+
+// Returns true if there is at least one memtable on which flush has
+// not yet started.
+bool MemTableList::IsFlushPending() const {
+  if ((flush_requested_ && num_flush_not_started_ > 0) ||
+      (num_flush_not_started_ >= min_write_buffer_number_to_merge_)) {
+    assert(imm_flush_needed.load(std::memory_order_relaxed));
+    return true;
+  }
+  return false;
+}
+
+bool MemTableList::IsFlushPendingOrRunning() const {
+  if (current_->memlist_.size() - num_flush_not_started_ > 0) {
+    // Flush is already running on at least one memtable
+    return true;
+  }
+  return IsFlushPending();
+}
+
+// Returns the memtables that need to be flushed.
+void MemTableList::PickMemtablesToFlush(uint64_t max_memtable_id,
+                                        autovector<MemTable*>* ret,
+                                        uint64_t* max_next_log_number) {
+  AutoThreadOperationStageUpdater stage_updater(
+      ThreadStatus::STAGE_PICK_MEMTABLES_TO_FLUSH);
+  const auto& memlist = current_->memlist_;
+  bool atomic_flush = false;
+
+  // Note: every time MemTableList::Add(mem) is called, it adds the new mem
+  // at the FRONT of the memlist (memlist.push_front(mem)). Therefore, by
+  // iterating through the memlist starting at the end, the vector<MemTable*>
+  // ret is filled with memtables already sorted in increasing MemTable ID.
+  // However, when the mempurge feature is activated, new memtables with older
+  // IDs will be added to the memlist.
+  for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) {
+    MemTable* m = *it;
+    if (!atomic_flush && m->atomic_flush_seqno_ != kMaxSequenceNumber) {
+      atomic_flush = true;
+    }
+    if (m->GetID() > max_memtable_id) {
+      break;
+    }
+    if (!m->flush_in_progress_) {
+      assert(!m->flush_completed_);
+      num_flush_not_started_--;
+      if (num_flush_not_started_ == 0) {
+        imm_flush_needed.store(false, std::memory_order_release);
+      }
+      m->flush_in_progress_ = true;  // flushing will start very soon
+      if (max_next_log_number) {
+        *max_next_log_number =
+            std::max(m->GetNextLogNumber(), *max_next_log_number);
+      }
+      ret->push_back(m);
+    } else if (!ret->empty()) {
+      // This `break` is necessary to prevent picking non-consecutive memtables
+      // in case `memlist` has one or more entries with
+      // `flush_in_progress_ == true` sandwiched between entries with
+      // `flush_in_progress_ == false`. This could happen after parallel flushes
+      // are picked and the one flushing older memtables is rolled back.
+      break;
+    }
+  }
+  if (!atomic_flush || num_flush_not_started_ == 0) {
+    flush_requested_ = false;  // start-flush request is complete
+  }
+}
+
+void MemTableList::RollbackMemtableFlush(const autovector<MemTable*>& mems,
+                                         uint64_t /*file_number*/) {
+  AutoThreadOperationStageUpdater stage_updater(
+      ThreadStatus::STAGE_MEMTABLE_ROLLBACK);
+  assert(!mems.empty());
+
+  // If the flush was not successful, then just reset state.
+  // Maybe a succeeding attempt to flush will be successful.
+  for (MemTable* m : mems) {
+    assert(m->flush_in_progress_);
+    assert(m->file_number_ == 0);
+
+    m->flush_in_progress_ = false;
+    m->flush_completed_ = false;
+    m->edit_.Clear();
+    num_flush_not_started_++;
+  }
+  imm_flush_needed.store(true, std::memory_order_release);
+}
+
+// Try record a successful flush in the manifest file. It might just return
+// Status::OK letting a concurrent flush to do actual the recording..
+Status MemTableList::TryInstallMemtableFlushResults(
+    ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
+    const autovector<MemTable*>& mems, LogsWithPrepTracker* prep_tracker,
+    VersionSet* vset, InstrumentedMutex* mu, uint64_t file_number,
+    autovector<MemTable*>* to_delete, FSDirectory* db_directory,
+    LogBuffer* log_buffer,
+    std::list<std::unique_ptr<FlushJobInfo>>* committed_flush_jobs_info,
+    bool write_edits) {
+  AutoThreadOperationStageUpdater stage_updater(
+      ThreadStatus::STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS);
+  mu->AssertHeld();
+
+  const ReadOptions read_options(Env::IOActivity::kFlush);
+
+  // Flush was successful
+  // Record the status on the memtable object. Either this call or a call by a
+  // concurrent flush thread will read the status and write it to manifest.
+  for (size_t i = 0; i < mems.size(); ++i) {
+    // All the edits are associated with the first memtable of this batch.
+    assert(i == 0 || mems[i]->GetEdits()->NumEntries() == 0);
+
+    mems[i]->flush_completed_ = true;
+    mems[i]->file_number_ = file_number;
+  }
+
+  // if some other thread is already committing, then return
+  Status s;
+  if (commit_in_progress_) {
+    TEST_SYNC_POINT("MemTableList::TryInstallMemtableFlushResults:InProgress");
+    return s;
+  }
+
+  // Only a single thread can be executing this piece of code
+  commit_in_progress_ = true;
+
+  // Retry until all completed flushes are committed. New flushes can finish
+  // while the current thread is writing manifest where mutex is released.
+  while (s.ok()) {
+    auto& memlist = current_->memlist_;
+    // The back is the oldest; if flush_completed_ is not set to it, it means
+    // that we were assigned a more recent memtable. The memtables' flushes must
+    // be recorded in manifest in order. A concurrent flush thread, who is
+    // assigned to flush the oldest memtable, will later wake up and does all
+    // the pending writes to manifest, in order.
+    if (memlist.empty() || !memlist.back()->flush_completed_) {
+      break;
+    }
+    // scan all memtables from the earliest, and commit those
+    // (in that order) that have finished flushing. Memtables
+    // are always committed in the order that they were created.
+    uint64_t batch_file_number = 0;
+    size_t batch_count = 0;
+    autovector<VersionEdit*> edit_list;
+    autovector<MemTable*> memtables_to_flush;
+    // enumerate from the last (earliest) element to see how many batch finished
+    for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) {
+      MemTable* m = *it;
+      if (!m->flush_completed_) {
+        break;
+      }
+      if (it == memlist.rbegin() || batch_file_number != m->file_number_) {
+        batch_file_number = m->file_number_;
+        if (m->edit_.GetBlobFileAdditions().empty()) {
+          ROCKS_LOG_BUFFER(log_buffer,
+                           "[%s] Level-0 commit table #%" PRIu64 " started",
+                           cfd->GetName().c_str(), m->file_number_);
+        } else {
+          ROCKS_LOG_BUFFER(log_buffer,
+                           "[%s] Level-0 commit table #%" PRIu64
+                           " (+%zu blob files) started",
+                           cfd->GetName().c_str(), m->file_number_,
+                           m->edit_.GetBlobFileAdditions().size());
+        }
+
+        edit_list.push_back(&m->edit_);
+        memtables_to_flush.push_back(m);
+        std::unique_ptr<FlushJobInfo> info = m->ReleaseFlushJobInfo();
+        if (info != nullptr) {
+          committed_flush_jobs_info->push_back(std::move(info));
+        }
+      }
+      batch_count++;
+    }
+
+    // TODO(myabandeh): Not sure how batch_count could be 0 here.
+    if (batch_count > 0) {
+      uint64_t min_wal_number_to_keep = 0;
+      assert(edit_list.size() > 0);
+      if (vset->db_options()->allow_2pc) {
+        // Note that if mempurge is successful, the edit_list will
+        // not be applicable (contains info of new min_log number to keep,
+        // and level 0 file path of SST file created during normal flush,
+        // so both pieces of information are irrelevant after a successful
+        // mempurge operation).
+        min_wal_number_to_keep = PrecomputeMinLogNumberToKeep2PC(
+            vset, *cfd, edit_list, memtables_to_flush, prep_tracker);
+
+        // We piggyback the information of earliest log file to keep in the
+        // manifest entry for the last file flushed.
+      } else {
+        min_wal_number_to_keep =
+            PrecomputeMinLogNumberToKeepNon2PC(vset, *cfd, edit_list);
+      }
+
+      VersionEdit wal_deletion;
+      wal_deletion.SetMinLogNumberToKeep(min_wal_number_to_keep);
+      if (vset->db_options()->track_and_verify_wals_in_manifest) {
+        if (min_wal_number_to_keep >
+            vset->GetWalSet().GetMinWalNumberToKeep()) {
+          wal_deletion.DeleteWalsBefore(min_wal_number_to_keep);
+        }
+        TEST_SYNC_POINT_CALLBACK(
+            "MemTableList::TryInstallMemtableFlushResults:"
+            "AfterComputeMinWalToKeep",
+            nullptr);
+      }
+      edit_list.push_back(&wal_deletion);
+
+      const auto manifest_write_cb = [this, cfd, batch_count, log_buffer,
+                                      to_delete, mu](const Status& status) {
+        RemoveMemTablesOrRestoreFlags(status, cfd, batch_count, log_buffer,
+                                      to_delete, mu);
+      };
+      if (write_edits) {
+        // this can release and reacquire the mutex.
+        s = vset->LogAndApply(cfd, mutable_cf_options, read_options, edit_list,
+                              mu, db_directory, /*new_descriptor_log=*/false,
+                              /*column_family_options=*/nullptr,
+                              manifest_write_cb);
+      } else {
+        // If write_edit is false (e.g: successful mempurge),
+        // then remove old memtables, wake up manifest write queue threads,
+        // and don't commit anything to the manifest file.
+        RemoveMemTablesOrRestoreFlags(s, cfd, batch_count, log_buffer,
+                                      to_delete, mu);
+        // Note: cfd->SetLogNumber is only called when a VersionEdit
+        // is written to MANIFEST. When mempurge is succesful, we skip
+        // this step, therefore cfd->GetLogNumber is always is
+        // earliest log with data unflushed.
+        // Notify new head of manifest write queue.
+        // wake up all the waiting writers
+        // TODO(bjlemaire): explain full reason WakeUpWaitingManifestWriters
+        // needed or investigate more.
+        vset->WakeUpWaitingManifestWriters();
+      }
+    }
+  }
+  commit_in_progress_ = false;
+  return s;
+}
+
+// New memtables are inserted at the front of the list.
+void MemTableList::Add(MemTable* m, autovector<MemTable*>* to_delete) {
+  assert(static_cast<int>(current_->memlist_.size()) >= num_flush_not_started_);
+  InstallNewVersion();
+  // this method is used to move mutable memtable into an immutable list.
+  // since mutable memtable is already refcounted by the DBImpl,
+  // and when moving to the immutable list we don't unref it,
+  // we don't have to ref the memtable here. we just take over the
+  // reference from the DBImpl.
+  current_->Add(m, to_delete);
+  m->MarkImmutable();
+  num_flush_not_started_++;
+  if (num_flush_not_started_ == 1) {
+    imm_flush_needed.store(true, std::memory_order_release);
+  }
+  UpdateCachedValuesFromMemTableListVersion();
+  ResetTrimHistoryNeeded();
+}
+
+bool MemTableList::TrimHistory(autovector<MemTable*>* to_delete, size_t usage) {
+  InstallNewVersion();
+  bool ret = current_->TrimHistory(to_delete, usage);
+  UpdateCachedValuesFromMemTableListVersion();
+  ResetTrimHistoryNeeded();
+  return ret;
+}
+
+// Returns an estimate of the number of bytes of data in use.
+size_t MemTableList::ApproximateUnflushedMemTablesMemoryUsage() {
+  size_t total_size = 0;
+  for (auto& memtable : current_->memlist_) {
+    total_size += memtable->ApproximateMemoryUsage();
+  }
+  return total_size;
+}
+
+size_t MemTableList::ApproximateMemoryUsage() { return current_memory_usage_; }
+
+size_t MemTableList::MemoryAllocatedBytesExcludingLast() const {
+  const size_t usage = current_memory_allocted_bytes_excluding_last_.load(
+      std::memory_order_relaxed);
+  return usage;
+}
+
+bool MemTableList::HasHistory() const {
+  const bool has_history = current_has_history_.load(std::memory_order_relaxed);
+  return has_history;
+}
+
+void MemTableList::UpdateCachedValuesFromMemTableListVersion() {
+  const size_t total_memtable_size =
+      current_->MemoryAllocatedBytesExcludingLast();
+  current_memory_allocted_bytes_excluding_last_.store(
+      total_memtable_size, std::memory_order_relaxed);
+
+  const bool has_history = current_->HasHistory();
+  current_has_history_.store(has_history, std::memory_order_relaxed);
+}
+
+uint64_t MemTableList::ApproximateOldestKeyTime() const {
+  if (!current_->memlist_.empty()) {
+    return current_->memlist_.back()->ApproximateOldestKeyTime();
+  }
+  return std::numeric_limits<uint64_t>::max();
+}
+
+void MemTableList::InstallNewVersion() {
+  if (current_->refs_ == 1) {
+    // we're the only one using the version, just keep using it
+  } else {
+    // somebody else holds the current version, we need to create new one
+    MemTableListVersion* version = current_;
+    current_ = new MemTableListVersion(&current_memory_usage_, *version);
+    current_->Ref();
+    version->Unref();
+  }
+}
+
+void MemTableList::RemoveMemTablesOrRestoreFlags(
+    const Status& s, ColumnFamilyData* cfd, size_t batch_count,
+    LogBuffer* log_buffer, autovector<MemTable*>* to_delete,
+    InstrumentedMutex* mu) {
+  assert(mu);
+  mu->AssertHeld();
+  assert(to_delete);
+  // we will be changing the version in the next code path,
+  // so we better create a new one, since versions are immutable
+  InstallNewVersion();
+
+  // All the later memtables that have the same filenum
+  // are part of the same batch. They can be committed now.
+  uint64_t mem_id = 1;  // how many memtables have been flushed.
+
+  // commit new state only if the column family is NOT dropped.
+  // The reason is as follows (refer to
+  // ColumnFamilyTest.FlushAndDropRaceCondition).
+  // If the column family is dropped, then according to LogAndApply, its
+  // corresponding flush operation is NOT written to the MANIFEST. This
+  // means the DB is not aware of the L0 files generated from the flush.
+  // By committing the new state, we remove the memtable from the memtable
+  // list. Creating an iterator on this column family will not be able to
+  // read full data since the memtable is removed, and the DB is not aware
+  // of the L0 files, causing MergingIterator unable to build child
+  // iterators. RocksDB contract requires that the iterator can be created
+  // on a dropped column family, and we must be able to
+  // read full data as long as column family handle is not deleted, even if
+  // the column family is dropped.
+  if (s.ok() && !cfd->IsDropped()) {  // commit new state
+    while (batch_count-- > 0) {
+      MemTable* m = current_->memlist_.back();
+      if (m->edit_.GetBlobFileAdditions().empty()) {
+        ROCKS_LOG_BUFFER(log_buffer,
+                         "[%s] Level-0 commit table #%" PRIu64
+                         ": memtable #%" PRIu64 " done",
+                         cfd->GetName().c_str(), m->file_number_, mem_id);
+      } else {
+        ROCKS_LOG_BUFFER(log_buffer,
+                         "[%s] Level-0 commit table #%" PRIu64
+                         " (+%zu blob files)"
+                         ": memtable #%" PRIu64 " done",
+                         cfd->GetName().c_str(), m->file_number_,
+                         m->edit_.GetBlobFileAdditions().size(), mem_id);
+      }
+
+      assert(m->file_number_ > 0);
+      current_->Remove(m, to_delete);
+      UpdateCachedValuesFromMemTableListVersion();
+      ResetTrimHistoryNeeded();
+      ++mem_id;
+    }
+  } else {
+    for (auto it = current_->memlist_.rbegin(); batch_count-- > 0; ++it) {
+      MemTable* m = *it;
+      // commit failed. setup state so that we can flush again.
+      if (m->edit_.GetBlobFileAdditions().empty()) {
+        ROCKS_LOG_BUFFER(log_buffer,
+                         "Level-0 commit table #%" PRIu64 ": memtable #%" PRIu64
+                         " failed",
+                         m->file_number_, mem_id);
+      } else {
+        ROCKS_LOG_BUFFER(log_buffer,
+                         "Level-0 commit table #%" PRIu64
+                         " (+%zu blob files)"
+                         ": memtable #%" PRIu64 " failed",
+                         m->file_number_,
+                         m->edit_.GetBlobFileAdditions().size(), mem_id);
+      }
+
+      m->flush_completed_ = false;
+      m->flush_in_progress_ = false;
+      m->edit_.Clear();
+      num_flush_not_started_++;
+      m->file_number_ = 0;
+      imm_flush_needed.store(true, std::memory_order_release);
+      ++mem_id;
+    }
+  }
+}
+
+uint64_t MemTableList::PrecomputeMinLogContainingPrepSection(
+    const std::unordered_set<MemTable*>* memtables_to_flush) {
+  uint64_t min_log = 0;
+
+  for (auto& m : current_->memlist_) {
+    if (memtables_to_flush && memtables_to_flush->count(m)) {
+      continue;
+    }
+
+    auto log = m->GetMinLogContainingPrepSection();
+
+    if (log > 0 && (min_log == 0 || log < min_log)) {
+      min_log = log;
+    }
+  }
+
+  return min_log;
+}
+
+// Commit a successful atomic flush in the manifest file.
+Status InstallMemtableAtomicFlushResults(
+    const autovector<MemTableList*>* imm_lists,
+    const autovector<ColumnFamilyData*>& cfds,
+    const autovector<const MutableCFOptions*>& mutable_cf_options_list,
+    const autovector<const autovector<MemTable*>*>& mems_list, VersionSet* vset,
+    LogsWithPrepTracker* prep_tracker, InstrumentedMutex* mu,
+    const autovector<FileMetaData*>& file_metas,
+    const autovector<std::list<std::unique_ptr<FlushJobInfo>>*>&
+        committed_flush_jobs_info,
+    autovector<MemTable*>* to_delete, FSDirectory* db_directory,
+    LogBuffer* log_buffer) {
+  AutoThreadOperationStageUpdater stage_updater(
+      ThreadStatus::STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS);
+  mu->AssertHeld();
+
+  const ReadOptions read_options(Env::IOActivity::kFlush);
+
+  size_t num = mems_list.size();
+  assert(cfds.size() == num);
+  if (imm_lists != nullptr) {
+    assert(imm_lists->size() == num);
+  }
+  if (num == 0) {
+    return Status::OK();
+  }
+
+  for (size_t k = 0; k != num; ++k) {
+#ifndef NDEBUG
+    const auto* imm =
+        (imm_lists == nullptr) ? cfds[k]->imm() : imm_lists->at(k);
+    if (!mems_list[k]->empty()) {
+      assert((*mems_list[k])[0]->GetID() == imm->GetEarliestMemTableID());
+    }
+#endif
+    assert(nullptr != file_metas[k]);
+    for (size_t i = 0; i != mems_list[k]->size(); ++i) {
+      assert(i == 0 || (*mems_list[k])[i]->GetEdits()->NumEntries() == 0);
+      (*mems_list[k])[i]->SetFlushCompleted(true);
+      (*mems_list[k])[i]->SetFileNumber(file_metas[k]->fd.GetNumber());
+    }
+    if (committed_flush_jobs_info[k]) {
+      assert(!mems_list[k]->empty());
+      assert((*mems_list[k])[0]);
+      std::unique_ptr<FlushJobInfo> flush_job_info =
+          (*mems_list[k])[0]->ReleaseFlushJobInfo();
+      committed_flush_jobs_info[k]->push_back(std::move(flush_job_info));
+    }
+  }
+
+  Status s;
+
+  autovector<autovector<VersionEdit*>> edit_lists;
+  uint32_t num_entries = 0;
+  for (const auto mems : mems_list) {
+    assert(mems != nullptr);
+    autovector<VersionEdit*> edits;
+    assert(!mems->empty());
+    edits.emplace_back((*mems)[0]->GetEdits());
+    ++num_entries;
+    edit_lists.emplace_back(edits);
+  }
+
+  WalNumber min_wal_number_to_keep = 0;
+  if (vset->db_options()->allow_2pc) {
+    min_wal_number_to_keep = PrecomputeMinLogNumberToKeep2PC(
+        vset, cfds, edit_lists, mems_list, prep_tracker);
+  } else {
+    min_wal_number_to_keep =
+        PrecomputeMinLogNumberToKeepNon2PC(vset, cfds, edit_lists);
+  }
+
+  VersionEdit wal_deletion;
+  wal_deletion.SetMinLogNumberToKeep(min_wal_number_to_keep);
+  if (vset->db_options()->track_and_verify_wals_in_manifest &&
+      min_wal_number_to_keep > vset->GetWalSet().GetMinWalNumberToKeep()) {
+    wal_deletion.DeleteWalsBefore(min_wal_number_to_keep);
+  }
+  edit_lists.back().push_back(&wal_deletion);
+  ++num_entries;
+
+  // Mark the version edits as an atomic group if the number of version edits
+  // exceeds 1.
+  if (cfds.size() > 1) {
+    for (size_t i = 0; i < edit_lists.size(); i++) {
+      assert((edit_lists[i].size() == 1) ||
+             ((edit_lists[i].size() == 2) && (i == edit_lists.size() - 1)));
+      for (auto& e : edit_lists[i]) {
+        e->MarkAtomicGroup(--num_entries);
+      }
+    }
+    assert(0 == num_entries);
+  }
+
+  // this can release and reacquire the mutex.
+  s = vset->LogAndApply(cfds, mutable_cf_options_list, read_options, edit_lists,
+                        mu, db_directory);
+
+  for (size_t k = 0; k != cfds.size(); ++k) {
+    auto* imm = (imm_lists == nullptr) ? cfds[k]->imm() : imm_lists->at(k);
+    imm->InstallNewVersion();
+  }
+
+  if (s.ok() || s.IsColumnFamilyDropped()) {
+    for (size_t i = 0; i != cfds.size(); ++i) {
+      if (cfds[i]->IsDropped()) {
+        continue;
+      }
+      auto* imm = (imm_lists == nullptr) ? cfds[i]->imm() : imm_lists->at(i);
+      for (auto m : *mems_list[i]) {
+        assert(m->GetFileNumber() > 0);
+        uint64_t mem_id = m->GetID();
+
+        const VersionEdit* const edit = m->GetEdits();
+        assert(edit);
+
+        if (edit->GetBlobFileAdditions().empty()) {
+          ROCKS_LOG_BUFFER(log_buffer,
+                           "[%s] Level-0 commit table #%" PRIu64
+                           ": memtable #%" PRIu64 " done",
+                           cfds[i]->GetName().c_str(), m->GetFileNumber(),
+                           mem_id);
+        } else {
+          ROCKS_LOG_BUFFER(log_buffer,
+                           "[%s] Level-0 commit table #%" PRIu64
+                           " (+%zu blob files)"
+                           ": memtable #%" PRIu64 " done",
+                           cfds[i]->GetName().c_str(), m->GetFileNumber(),
+                           edit->GetBlobFileAdditions().size(), mem_id);
+        }
+
+        imm->current_->Remove(m, to_delete);
+        imm->UpdateCachedValuesFromMemTableListVersion();
+        imm->ResetTrimHistoryNeeded();
+      }
+    }
+  } else {
+    for (size_t i = 0; i != cfds.size(); ++i) {
+      auto* imm = (imm_lists == nullptr) ? cfds[i]->imm() : imm_lists->at(i);
+      for (auto m : *mems_list[i]) {
+        uint64_t mem_id = m->GetID();
+
+        const VersionEdit* const edit = m->GetEdits();
+        assert(edit);
+
+        if (edit->GetBlobFileAdditions().empty()) {
+          ROCKS_LOG_BUFFER(log_buffer,
+                           "[%s] Level-0 commit table #%" PRIu64
+                           ": memtable #%" PRIu64 " failed",
+                           cfds[i]->GetName().c_str(), m->GetFileNumber(),
+                           mem_id);
+        } else {
+          ROCKS_LOG_BUFFER(log_buffer,
+                           "[%s] Level-0 commit table #%" PRIu64
+                           " (+%zu blob files)"
+                           ": memtable #%" PRIu64 " failed",
+                           cfds[i]->GetName().c_str(), m->GetFileNumber(),
+                           edit->GetBlobFileAdditions().size(), mem_id);
+        }
+
+        m->SetFlushCompleted(false);
+        m->SetFlushInProgress(false);
+        m->GetEdits()->Clear();
+        m->SetFileNumber(0);
+        imm->num_flush_not_started_++;
+      }
+      imm->imm_flush_needed.store(true, std::memory_order_release);
+    }
+  }
+
+  return s;
+}
+
+void MemTableList::RemoveOldMemTables(uint64_t log_number,
+                                      autovector<MemTable*>* to_delete) {
+  assert(to_delete != nullptr);
+  InstallNewVersion();
+  auto& memlist = current_->memlist_;
+  autovector<MemTable*> old_memtables;
+  for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) {
+    MemTable* mem = *it;
+    if (mem->GetNextLogNumber() > log_number) {
+      break;
+    }
+    old_memtables.push_back(mem);
+  }
+
+  for (auto it = old_memtables.begin(); it != old_memtables.end(); ++it) {
+    MemTable* mem = *it;
+    current_->Remove(mem, to_delete);
+    --num_flush_not_started_;
+    if (0 == num_flush_not_started_) {
+      imm_flush_needed.store(false, std::memory_order_release);
+    }
+  }
+
+  UpdateCachedValuesFromMemTableListVersion();
+  ResetTrimHistoryNeeded();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable_list.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable_list.h
new file mode 100644
index 0000000000..1ad28a59e2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/memtable_list.h
@@ -0,0 +1,471 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+#include <deque>
+#include <limits>
+#include <list>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "db/logs_with_prep_tracker.h"
+#include "db/memtable.h"
+#include "db/range_del_aggregator.h"
+#include "file/filename.h"
+#include "logging/log_buffer.h"
+#include "monitoring/instrumented_mutex.h"
+#include "rocksdb/db.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/types.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class ColumnFamilyData;
+class InternalKeyComparator;
+class InstrumentedMutex;
+class MergeIteratorBuilder;
+class MemTableList;
+
+struct FlushJobInfo;
+
+// keeps a list of immutable memtables in a vector. the list is immutable
+// if refcount is bigger than one. It is used as a state for Get() and
+// Iterator code paths
+//
+// This class is not thread-safe.  External synchronization is required
+// (such as holding the db mutex or being on the write thread).
+class MemTableListVersion {
+ public:
+  explicit MemTableListVersion(size_t* parent_memtable_list_memory_usage,
+                               const MemTableListVersion& old);
+  explicit MemTableListVersion(size_t* parent_memtable_list_memory_usage,
+                               int max_write_buffer_number_to_maintain,
+                               int64_t max_write_buffer_size_to_maintain);
+
+  void Ref();
+  void Unref(autovector<MemTable*>* to_delete = nullptr);
+
+  // Search all the memtables starting from the most recent one.
+  // Return the most recent value found, if any.
+  //
+  // If any operation was found for this key, its most recent sequence number
+  // will be stored in *seq on success (regardless of whether true/false is
+  // returned).  Otherwise, *seq will be set to kMaxSequenceNumber.
+  bool Get(const LookupKey& key, std::string* value,
+           PinnableWideColumns* columns, std::string* timestamp, Status* s,
+           MergeContext* merge_context,
+           SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq,
+           const ReadOptions& read_opts, ReadCallback* callback = nullptr,
+           bool* is_blob_index = nullptr);
+
+  bool Get(const LookupKey& key, std::string* value,
+           PinnableWideColumns* columns, std::string* timestamp, Status* s,
+           MergeContext* merge_context,
+           SequenceNumber* max_covering_tombstone_seq,
+           const ReadOptions& read_opts, ReadCallback* callback = nullptr,
+           bool* is_blob_index = nullptr) {
+    SequenceNumber seq;
+    return Get(key, value, columns, timestamp, s, merge_context,
+               max_covering_tombstone_seq, &seq, read_opts, callback,
+               is_blob_index);
+  }
+
+  void MultiGet(const ReadOptions& read_options, MultiGetRange* range,
+                ReadCallback* callback);
+
+  // Returns all the merge operands corresponding to the key by searching all
+  // memtables starting from the most recent one.
+  bool GetMergeOperands(const LookupKey& key, Status* s,
+                        MergeContext* merge_context,
+                        SequenceNumber* max_covering_tombstone_seq,
+                        const ReadOptions& read_opts);
+
+  // Similar to Get(), but searches the Memtable history of memtables that
+  // have already been flushed.  Should only be used from in-memory only
+  // queries (such as Transaction validation) as the history may contain
+  // writes that are also present in the SST files.
+  bool GetFromHistory(const LookupKey& key, std::string* value,
+                      PinnableWideColumns* columns, std::string* timestamp,
+                      Status* s, MergeContext* merge_context,
+                      SequenceNumber* max_covering_tombstone_seq,
+                      SequenceNumber* seq, const ReadOptions& read_opts,
+                      bool* is_blob_index = nullptr);
+  bool GetFromHistory(const LookupKey& key, std::string* value,
+                      PinnableWideColumns* columns, std::string* timestamp,
+                      Status* s, MergeContext* merge_context,
+                      SequenceNumber* max_covering_tombstone_seq,
+                      const ReadOptions& read_opts,
+                      bool* is_blob_index = nullptr) {
+    SequenceNumber seq;
+    return GetFromHistory(key, value, columns, timestamp, s, merge_context,
+                          max_covering_tombstone_seq, &seq, read_opts,
+                          is_blob_index);
+  }
+
+  Status AddRangeTombstoneIterators(const ReadOptions& read_opts, Arena* arena,
+                                    RangeDelAggregator* range_del_agg);
+
+  void AddIterators(const ReadOptions& options,
+                    std::vector<InternalIterator*>* iterator_list,
+                    Arena* arena);
+
+  void AddIterators(const ReadOptions& options,
+                    MergeIteratorBuilder* merge_iter_builder,
+                    bool add_range_tombstone_iter);
+
+  uint64_t GetTotalNumEntries() const;
+
+  uint64_t GetTotalNumDeletes() const;
+
+  MemTable::MemTableStats ApproximateStats(const Slice& start_ikey,
+                                           const Slice& end_ikey);
+
+  // Returns the value of MemTable::GetEarliestSequenceNumber() on the most
+  // recent MemTable in this list or kMaxSequenceNumber if the list is empty.
+  // If include_history=true, will also search Memtables in MemTableList
+  // History.
+  SequenceNumber GetEarliestSequenceNumber(bool include_history = false) const;
+
+  // Return the first sequence number from the memtable list, which is the
+  // smallest sequence number of all FirstSequenceNumber.
+  // Return kMaxSequenceNumber if the list is empty.
+  SequenceNumber GetFirstSequenceNumber() const;
+
+ private:
+  friend class MemTableList;
+
+  friend Status InstallMemtableAtomicFlushResults(
+      const autovector<MemTableList*>* imm_lists,
+      const autovector<ColumnFamilyData*>& cfds,
+      const autovector<const MutableCFOptions*>& mutable_cf_options_list,
+      const autovector<const autovector<MemTable*>*>& mems_list,
+      VersionSet* vset, LogsWithPrepTracker* prep_tracker,
+      InstrumentedMutex* mu, const autovector<FileMetaData*>& file_meta,
+      const autovector<std::list<std::unique_ptr<FlushJobInfo>>*>&
+          committed_flush_jobs_info,
+      autovector<MemTable*>* to_delete, FSDirectory* db_directory,
+      LogBuffer* log_buffer);
+
+  // REQUIRE: m is an immutable memtable
+  void Add(MemTable* m, autovector<MemTable*>* to_delete);
+  // REQUIRE: m is an immutable memtable
+  void Remove(MemTable* m, autovector<MemTable*>* to_delete);
+
+  // Return true if memtable is trimmed
+  bool TrimHistory(autovector<MemTable*>* to_delete, size_t usage);
+
+  bool GetFromList(std::list<MemTable*>* list, const LookupKey& key,
+                   std::string* value, PinnableWideColumns* columns,
+                   std::string* timestamp, Status* s,
+                   MergeContext* merge_context,
+                   SequenceNumber* max_covering_tombstone_seq,
+                   SequenceNumber* seq, const ReadOptions& read_opts,
+                   ReadCallback* callback = nullptr,
+                   bool* is_blob_index = nullptr);
+
+  void AddMemTable(MemTable* m);
+
+  void UnrefMemTable(autovector<MemTable*>* to_delete, MemTable* m);
+
+  // Calculate the total amount of memory used by memlist_ and memlist_history_
+  // excluding the last MemTable in memlist_history_. The reason for excluding
+  // the last MemTable is to see if dropping the last MemTable will keep total
+  // memory usage above or equal to max_write_buffer_size_to_maintain_
+  size_t MemoryAllocatedBytesExcludingLast() const;
+
+  // Whether this version contains flushed memtables that are only kept around
+  // for transaction conflict checking.
+  bool HasHistory() const { return !memlist_history_.empty(); }
+
+  bool MemtableLimitExceeded(size_t usage);
+
+  // Immutable MemTables that have not yet been flushed.
+  std::list<MemTable*> memlist_;
+
+  // MemTables that have already been flushed
+  // (used during Transaction validation)
+  std::list<MemTable*> memlist_history_;
+
+  // Maximum number of MemTables to keep in memory (including both flushed
+  const int max_write_buffer_number_to_maintain_;
+  // Maximum size of MemTables to keep in memory (including both flushed
+  // and not-yet-flushed tables).
+  const int64_t max_write_buffer_size_to_maintain_;
+
+  int refs_ = 0;
+
+  size_t* parent_memtable_list_memory_usage_;
+};
+
+// This class stores references to all the immutable memtables.
+// The memtables are flushed to L0 as soon as possible and in
+// any order. If there are more than one immutable memtable, their
+// flushes can occur concurrently.  However, they are 'committed'
+// to the manifest in FIFO order to maintain correctness and
+// recoverability from a crash.
+//
+//
+// Other than imm_flush_needed and imm_trim_needed, this class is not
+// thread-safe and requires external synchronization (such as holding the db
+// mutex or being on the write thread.)
+class MemTableList {
+ public:
+  // A list of memtables.
+  explicit MemTableList(int min_write_buffer_number_to_merge,
+                        int max_write_buffer_number_to_maintain,
+                        int64_t max_write_buffer_size_to_maintain)
+      : imm_flush_needed(false),
+        imm_trim_needed(false),
+        min_write_buffer_number_to_merge_(min_write_buffer_number_to_merge),
+        current_(new MemTableListVersion(&current_memory_usage_,
+                                         max_write_buffer_number_to_maintain,
+                                         max_write_buffer_size_to_maintain)),
+        num_flush_not_started_(0),
+        commit_in_progress_(false),
+        flush_requested_(false),
+        current_memory_usage_(0),
+        current_memory_allocted_bytes_excluding_last_(0),
+        current_has_history_(false) {
+    current_->Ref();
+  }
+
+  // Should not delete MemTableList without making sure MemTableList::current()
+  // is Unref()'d.
+  ~MemTableList() {}
+
+  MemTableListVersion* current() const { return current_; }
+
+  // so that background threads can detect non-nullptr pointer to
+  // determine whether there is anything more to start flushing.
+  std::atomic<bool> imm_flush_needed;
+
+  std::atomic<bool> imm_trim_needed;
+
+  // Returns the total number of memtables in the list that haven't yet
+  // been flushed and logged.
+  int NumNotFlushed() const;
+
+  // Returns total number of memtables in the list that have been
+  // completely flushed and logged.
+  int NumFlushed() const;
+
+  // Returns true if there is at least one memtable on which flush has
+  // not yet started.
+  bool IsFlushPending() const;
+
+  // Returns true if there is at least one memtable that is pending flush or
+  // flushing.
+  bool IsFlushPendingOrRunning() const;
+
+  // Returns the earliest memtables that needs to be flushed. The returned
+  // memtables are guaranteed to be in the ascending order of created time.
+  void PickMemtablesToFlush(uint64_t max_memtable_id,
+                            autovector<MemTable*>* mems,
+                            uint64_t* max_next_log_number = nullptr);
+
+  // Reset status of the given memtable list back to pending state so that
+  // they can get picked up again on the next round of flush.
+  void RollbackMemtableFlush(const autovector<MemTable*>& mems,
+                             uint64_t file_number);
+
+  // Try commit a successful flush in the manifest file. It might just return
+  // Status::OK letting a concurrent flush to do the actual the recording.
+  Status TryInstallMemtableFlushResults(
+      ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
+      const autovector<MemTable*>& m, LogsWithPrepTracker* prep_tracker,
+      VersionSet* vset, InstrumentedMutex* mu, uint64_t file_number,
+      autovector<MemTable*>* to_delete, FSDirectory* db_directory,
+      LogBuffer* log_buffer,
+      std::list<std::unique_ptr<FlushJobInfo>>* committed_flush_jobs_info,
+      bool write_edits = true);
+
+  // New memtables are inserted at the front of the list.
+  // Takes ownership of the referenced held on *m by the caller of Add().
+  // By default, adding memtables will flag that the memtable list needs to be
+  // flushed, but in certain situations, like after a mempurge, we may want to
+  // avoid flushing the memtable list upon addition of a memtable.
+  void Add(MemTable* m, autovector<MemTable*>* to_delete);
+
+  // Returns an estimate of the number of bytes of data in use.
+  size_t ApproximateMemoryUsage();
+
+  // Returns the cached current_memory_allocted_bytes_excluding_last_ value.
+  size_t MemoryAllocatedBytesExcludingLast() const;
+
+  // Returns the cached current_has_history_ value.
+  bool HasHistory() const;
+
+  // Updates current_memory_allocted_bytes_excluding_last_ and
+  // current_has_history_ from MemTableListVersion. Must be called whenever
+  // InstallNewVersion is called.
+  void UpdateCachedValuesFromMemTableListVersion();
+
+  // `usage` is the current size of the mutable Memtable. When
+  // max_write_buffer_size_to_maintain is used, total size of mutable and
+  // immutable memtables is checked against it to decide whether to trim
+  // memtable list.
+  //
+  // Return true if memtable is trimmed
+  bool TrimHistory(autovector<MemTable*>* to_delete, size_t usage);
+
+  // Returns an estimate of the number of bytes of data used by
+  // the unflushed mem-tables.
+  size_t ApproximateUnflushedMemTablesMemoryUsage();
+
+  // Returns an estimate of the timestamp of the earliest key.
+  uint64_t ApproximateOldestKeyTime() const;
+
+  // Request a flush of all existing memtables to storage.  This will
+  // cause future calls to IsFlushPending() to return true if this list is
+  // non-empty (regardless of the min_write_buffer_number_to_merge
+  // parameter). This flush request will persist until the next time
+  // PickMemtablesToFlush() is called.
+  void FlushRequested() {
+    flush_requested_ = true;
+    // If there are some memtables stored in imm() that don't trigger
+    // flush (eg: mempurge output memtable), then update imm_flush_needed.
+    // Note: if race condition and imm_flush_needed is set to true
+    // when there is num_flush_not_started_==0, then there is no
+    // impact whatsoever. Imm_flush_needed is only used in an assert
+    // in IsFlushPending().
+    if (num_flush_not_started_ > 0) {
+      imm_flush_needed.store(true, std::memory_order_release);
+    }
+  }
+
+  bool HasFlushRequested() { return flush_requested_; }
+
+  // Returns true if a trim history should be scheduled and the caller should
+  // be the one to schedule it
+  bool MarkTrimHistoryNeeded() {
+    auto expected = false;
+    return imm_trim_needed.compare_exchange_strong(
+        expected, true, std::memory_order_relaxed, std::memory_order_relaxed);
+  }
+
+  void ResetTrimHistoryNeeded() {
+    auto expected = true;
+    imm_trim_needed.compare_exchange_strong(
+        expected, false, std::memory_order_relaxed, std::memory_order_relaxed);
+  }
+
+  // Copying allowed
+  // MemTableList(const MemTableList&);
+  // void operator=(const MemTableList&);
+
+  size_t* current_memory_usage() { return &current_memory_usage_; }
+
+  // Returns the min log containing the prep section after memtables listsed in
+  // `memtables_to_flush` are flushed and their status is persisted in manifest.
+  uint64_t PrecomputeMinLogContainingPrepSection(
+      const std::unordered_set<MemTable*>* memtables_to_flush = nullptr);
+
+  uint64_t GetEarliestMemTableID() const {
+    auto& memlist = current_->memlist_;
+    if (memlist.empty()) {
+      return std::numeric_limits<uint64_t>::max();
+    }
+    return memlist.back()->GetID();
+  }
+
+  uint64_t GetLatestMemTableID() const {
+    auto& memlist = current_->memlist_;
+    if (memlist.empty()) {
+      return 0;
+    }
+    return memlist.front()->GetID();
+  }
+
+  void AssignAtomicFlushSeq(const SequenceNumber& seq) {
+    const auto& memlist = current_->memlist_;
+    // Scan the memtable list from new to old
+    for (auto it = memlist.begin(); it != memlist.end(); ++it) {
+      MemTable* mem = *it;
+      if (mem->atomic_flush_seqno_ == kMaxSequenceNumber) {
+        mem->atomic_flush_seqno_ = seq;
+      } else {
+        // Earlier memtables must have been assigned a atomic flush seq, no
+        // need to continue scan.
+        break;
+      }
+    }
+  }
+
+  // Used only by DBImplSecondary during log replay.
+  // Remove memtables whose data were written before the WAL with log_number
+  // was created, i.e. mem->GetNextLogNumber() <= log_number. The memtables are
+  // not freed, but put into a vector for future deref and reclamation.
+  void RemoveOldMemTables(uint64_t log_number,
+                          autovector<MemTable*>* to_delete);
+
+ private:
+  friend Status InstallMemtableAtomicFlushResults(
+      const autovector<MemTableList*>* imm_lists,
+      const autovector<ColumnFamilyData*>& cfds,
+      const autovector<const MutableCFOptions*>& mutable_cf_options_list,
+      const autovector<const autovector<MemTable*>*>& mems_list,
+      VersionSet* vset, LogsWithPrepTracker* prep_tracker,
+      InstrumentedMutex* mu, const autovector<FileMetaData*>& file_meta,
+      const autovector<std::list<std::unique_ptr<FlushJobInfo>>*>&
+          committed_flush_jobs_info,
+      autovector<MemTable*>* to_delete, FSDirectory* db_directory,
+      LogBuffer* log_buffer);
+
+  // DB mutex held
+  void InstallNewVersion();
+
+  // DB mutex held
+  // Called after writing to MANIFEST
+  void RemoveMemTablesOrRestoreFlags(const Status& s, ColumnFamilyData* cfd,
+                                     size_t batch_count, LogBuffer* log_buffer,
+                                     autovector<MemTable*>* to_delete,
+                                     InstrumentedMutex* mu);
+
+  const int min_write_buffer_number_to_merge_;
+
+  MemTableListVersion* current_;
+
+  // the number of elements that still need flushing
+  int num_flush_not_started_;
+
+  // committing in progress
+  bool commit_in_progress_;
+
+  // Requested a flush of memtables to storage. It's possible to request that
+  // a subset of memtables be flushed.
+  bool flush_requested_;
+
+  // The current memory usage.
+  size_t current_memory_usage_;
+
+  // Cached value of current_->MemoryAllocatedBytesExcludingLast().
+  std::atomic<size_t> current_memory_allocted_bytes_excluding_last_;
+
+  // Cached value of current_->HasHistory().
+  std::atomic<bool> current_has_history_;
+};
+
+// Installs memtable atomic flush results.
+// In most cases, imm_lists is nullptr, and the function simply uses the
+// immutable memtable lists associated with the cfds. There are unit tests that
+// installs flush results for external immutable memtable lists other than the
+// cfds' own immutable memtable lists, e.g. MemTableLIstTest. In this case,
+// imm_lists parameter is not nullptr.
+extern Status InstallMemtableAtomicFlushResults(
+    const autovector<MemTableList*>* imm_lists,
+    const autovector<ColumnFamilyData*>& cfds,
+    const autovector<const MutableCFOptions*>& mutable_cf_options_list,
+    const autovector<const autovector<MemTable*>*>& mems_list, VersionSet* vset,
+    LogsWithPrepTracker* prep_tracker, InstrumentedMutex* mu,
+    const autovector<FileMetaData*>& file_meta,
+    const autovector<std::list<std::unique_ptr<FlushJobInfo>>*>&
+        committed_flush_jobs_info,
+    autovector<MemTable*>* to_delete, FSDirectory* db_directory,
+    LogBuffer* log_buffer);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_context.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_context.h
new file mode 100644
index 0000000000..8a7b072902
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_context.h
@@ -0,0 +1,147 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const std::vector<Slice> empty_operand_list;
+
+// The merge context for merging a user key.
+// When doing a Get(), DB will create such a class and pass it when
+// issuing Get() operation to memtables and version_set. The operands
+// will be fetched from the context when issuing partial of full merge.
+class MergeContext {
+ public:
+  // Clear all the operands
+  void Clear() {
+    if (operand_list_) {
+      operand_list_->clear();
+      copied_operands_->clear();
+    }
+  }
+
+  // Push a merge operand
+  void PushOperand(const Slice& operand_slice, bool operand_pinned = false) {
+    Initialize();
+    SetDirectionBackward();
+
+    if (operand_pinned) {
+      operand_list_->push_back(operand_slice);
+    } else {
+      // We need to have our own copy of the operand since it's not pinned
+      copied_operands_->emplace_back(
+          new std::string(operand_slice.data(), operand_slice.size()));
+      operand_list_->push_back(*copied_operands_->back());
+    }
+  }
+
+  // Push back a merge operand
+  void PushOperandBack(const Slice& operand_slice,
+                       bool operand_pinned = false) {
+    Initialize();
+    SetDirectionForward();
+
+    if (operand_pinned) {
+      operand_list_->push_back(operand_slice);
+    } else {
+      // We need to have our own copy of the operand since it's not pinned
+      copied_operands_->emplace_back(
+          new std::string(operand_slice.data(), operand_slice.size()));
+      operand_list_->push_back(*copied_operands_->back());
+    }
+  }
+
+  // return total number of operands in the list
+  size_t GetNumOperands() const {
+    if (!operand_list_) {
+      return 0;
+    }
+    return operand_list_->size();
+  }
+
+  // Get the operand at the index.
+  Slice GetOperand(int index) const {
+    assert(operand_list_);
+
+    SetDirectionForward();
+    return (*operand_list_)[index];
+  }
+
+  // Same as GetOperandsDirectionForward
+  //
+  // Note that the returned reference is only good until another call
+  // to this MergeContext.  If the returned value is needed for longer,
+  // a copy must be made.
+  const std::vector<Slice>& GetOperands() const {
+    return GetOperandsDirectionForward();
+  }
+
+  // Return all the operands in the order as they were merged (passed to
+  // FullMerge or FullMergeV2)
+  //
+  // Note that the returned reference is only good until another call
+  // to this MergeContext.  If the returned value is needed for longer,
+  // a copy must be made.
+  const std::vector<Slice>& GetOperandsDirectionForward() const {
+    if (!operand_list_) {
+      return empty_operand_list;
+    }
+
+    SetDirectionForward();
+    return *operand_list_;
+  }
+
+  // Return all the operands in the reversed order relative to how they were
+  // merged (passed to FullMerge or FullMergeV2)
+  //
+  // Note that the returned reference is only good until another call
+  // to this MergeContext.  If the returned value is needed for longer,
+  // a copy must be made.
+  const std::vector<Slice>& GetOperandsDirectionBackward() const {
+    if (!operand_list_) {
+      return empty_operand_list;
+    }
+
+    SetDirectionBackward();
+    return *operand_list_;
+  }
+
+ private:
+  void Initialize() {
+    if (!operand_list_) {
+      operand_list_.reset(new std::vector<Slice>());
+      copied_operands_.reset(new std::vector<std::unique_ptr<std::string>>());
+    }
+  }
+
+  void SetDirectionForward() const {
+    if (operands_reversed_ == true) {
+      std::reverse(operand_list_->begin(), operand_list_->end());
+      operands_reversed_ = false;
+    }
+  }
+
+  void SetDirectionBackward() const {
+    if (operands_reversed_ == false) {
+      std::reverse(operand_list_->begin(), operand_list_->end());
+      operands_reversed_ = true;
+    }
+  }
+
+  // List of operands
+  mutable std::unique_ptr<std::vector<Slice>> operand_list_;
+  // Copy of operands that are not pinned.
+  std::unique_ptr<std::vector<std::unique_ptr<std::string>>> copied_operands_;
+  mutable bool operands_reversed_ = true;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_helper.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_helper.cc
new file mode 100644
index 0000000000..110ac9622e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_helper.cc
@@ -0,0 +1,606 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/merge_helper.h"
+
+#include <string>
+
+#include "db/blob/blob_fetcher.h"
+#include "db/blob/blob_index.h"
+#include "db/blob/prefetch_buffer_collection.h"
+#include "db/compaction/compaction_iteration_stats.h"
+#include "db/dbformat.h"
+#include "db/wide/wide_column_serialization.h"
+#include "logging/logging.h"
+#include "monitoring/perf_context_imp.h"
+#include "monitoring/statistics_impl.h"
+#include "port/likely.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/db.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/system_clock.h"
+#include "table/format.h"
+#include "table/internal_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+MergeHelper::MergeHelper(Env* env, const Comparator* user_comparator,
+                         const MergeOperator* user_merge_operator,
+                         const CompactionFilter* compaction_filter,
+                         Logger* logger, bool assert_valid_internal_key,
+                         SequenceNumber latest_snapshot,
+                         const SnapshotChecker* snapshot_checker, int level,
+                         Statistics* stats,
+                         const std::atomic<bool>* shutting_down)
+    : env_(env),
+      clock_(env->GetSystemClock().get()),
+      user_comparator_(user_comparator),
+      user_merge_operator_(user_merge_operator),
+      compaction_filter_(compaction_filter),
+      shutting_down_(shutting_down),
+      logger_(logger),
+      assert_valid_internal_key_(assert_valid_internal_key),
+      allow_single_operand_(false),
+      latest_snapshot_(latest_snapshot),
+      snapshot_checker_(snapshot_checker),
+      level_(level),
+      keys_(),
+      filter_timer_(clock_),
+      total_filter_time_(0U),
+      stats_(stats) {
+  assert(user_comparator_ != nullptr);
+  if (user_merge_operator_) {
+    allow_single_operand_ = user_merge_operator_->AllowSingleOperand();
+  }
+}
+
+Status MergeHelper::TimedFullMerge(
+    const MergeOperator* merge_operator, const Slice& key, const Slice* value,
+    const std::vector<Slice>& operands, std::string* result, Logger* logger,
+    Statistics* statistics, SystemClock* clock, Slice* result_operand,
+    bool update_num_ops_stats,
+    MergeOperator::OpFailureScope* op_failure_scope) {
+  assert(merge_operator != nullptr);
+
+  if (operands.empty()) {
+    assert(value != nullptr && result != nullptr);
+    result->assign(value->data(), value->size());
+    return Status::OK();
+  }
+
+  if (update_num_ops_stats) {
+    RecordInHistogram(statistics, READ_NUM_MERGE_OPERANDS,
+                      static_cast<uint64_t>(operands.size()));
+  }
+
+  bool success = false;
+  Slice tmp_result_operand(nullptr, 0);
+  const MergeOperator::MergeOperationInput merge_in(key, value, operands,
+                                                    logger);
+  MergeOperator::MergeOperationOutput merge_out(*result, tmp_result_operand);
+  {
+    // Setup to time the merge
+    StopWatchNano timer(clock, statistics != nullptr);
+    PERF_TIMER_GUARD(merge_operator_time_nanos);
+
+    // Do the merge
+    success = merge_operator->FullMergeV2(merge_in, &merge_out);
+
+    if (tmp_result_operand.data()) {
+      // FullMergeV2 result is an existing operand
+      if (result_operand != nullptr) {
+        *result_operand = tmp_result_operand;
+      } else {
+        result->assign(tmp_result_operand.data(), tmp_result_operand.size());
+      }
+    } else if (result_operand) {
+      *result_operand = Slice(nullptr, 0);
+    }
+
+    RecordTick(statistics, MERGE_OPERATION_TOTAL_TIME,
+               statistics ? timer.ElapsedNanos() : 0);
+  }
+
+  if (op_failure_scope != nullptr) {
+    *op_failure_scope = merge_out.op_failure_scope;
+    // Apply default per merge_operator.h
+    if (*op_failure_scope == MergeOperator::OpFailureScope::kDefault) {
+      *op_failure_scope = MergeOperator::OpFailureScope::kTryMerge;
+    }
+  }
+
+  if (!success) {
+    RecordTick(statistics, NUMBER_MERGE_FAILURES);
+    return Status::Corruption(Status::SubCode::kMergeOperatorFailed);
+  }
+
+  return Status::OK();
+}
+
+Status MergeHelper::TimedFullMergeWithEntity(
+    const MergeOperator* merge_operator, const Slice& key, Slice base_entity,
+    const std::vector<Slice>& operands, std::string* result, Logger* logger,
+    Statistics* statistics, SystemClock* clock, bool update_num_ops_stats,
+    MergeOperator::OpFailureScope* op_failure_scope) {
+  WideColumns base_columns;
+
+  {
+    const Status s =
+        WideColumnSerialization::Deserialize(base_entity, base_columns);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  const bool has_default_column =
+      !base_columns.empty() && base_columns[0].name() == kDefaultWideColumnName;
+
+  Slice value_of_default;
+  if (has_default_column) {
+    value_of_default = base_columns[0].value();
+  }
+
+  std::string merge_result;
+
+  {
+    const Status s = TimedFullMerge(merge_operator, key, &value_of_default,
+                                    operands, &merge_result, logger, statistics,
+                                    clock, nullptr /* result_operand */,
+                                    update_num_ops_stats, op_failure_scope);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  if (has_default_column) {
+    base_columns[0].value() = merge_result;
+
+    const Status s = WideColumnSerialization::Serialize(base_columns, *result);
+    if (!s.ok()) {
+      return s;
+    }
+  } else {
+    const Status s =
+        WideColumnSerialization::Serialize(merge_result, base_columns, *result);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  return Status::OK();
+}
+
+// PRE:  iter points to the first merge type entry
+// POST: iter points to the first entry beyond the merge process (or the end)
+//       keys_, operands_ are updated to reflect the merge result.
+//       keys_ stores the list of keys encountered while merging.
+//       operands_ stores the list of merge operands encountered while merging.
+//       keys_[i] corresponds to operands_[i] for each i.
+//
+// TODO: Avoid the snapshot stripe map lookup in CompactionRangeDelAggregator
+// and just pass the StripeRep corresponding to the stripe being merged.
+Status MergeHelper::MergeUntil(InternalIterator* iter,
+                               CompactionRangeDelAggregator* range_del_agg,
+                               const SequenceNumber stop_before,
+                               const bool at_bottom,
+                               const bool allow_data_in_errors,
+                               const BlobFetcher* blob_fetcher,
+                               const std::string* const full_history_ts_low,
+                               PrefetchBufferCollection* prefetch_buffers,
+                               CompactionIterationStats* c_iter_stats) {
+  // Get a copy of the internal key, before it's invalidated by iter->Next()
+  // Also maintain the list of merge operands seen.
+  assert(HasOperator());
+  keys_.clear();
+  merge_context_.Clear();
+  has_compaction_filter_skip_until_ = false;
+  assert(user_merge_operator_);
+  assert(user_comparator_);
+  const size_t ts_sz = user_comparator_->timestamp_size();
+  if (full_history_ts_low) {
+    assert(ts_sz > 0);
+    assert(ts_sz == full_history_ts_low->size());
+  }
+  bool first_key = true;
+
+  // We need to parse the internal key again as the parsed key is
+  // backed by the internal key!
+  // Assume no internal key corruption as it has been successfully parsed
+  // by the caller.
+  // original_key_is_iter variable is just caching the information:
+  // original_key_is_iter == (iter->key().ToString() == original_key)
+  bool original_key_is_iter = true;
+  std::string original_key = iter->key().ToString();
+  // Important:
+  // orig_ikey is backed by original_key if keys_.empty()
+  // orig_ikey is backed by keys_.back() if !keys_.empty()
+  ParsedInternalKey orig_ikey;
+
+  Status s = ParseInternalKey(original_key, &orig_ikey, allow_data_in_errors);
+  assert(s.ok());
+  if (!s.ok()) return s;
+
+  assert(kTypeMerge == orig_ikey.type);
+
+  bool hit_the_next_user_key = false;
+  int cmp_with_full_history_ts_low = 0;
+  for (; iter->Valid(); iter->Next(), original_key_is_iter = false) {
+    if (IsShuttingDown()) {
+      s = Status::ShutdownInProgress();
+      return s;
+    }
+    // Skip range tombstones emitted by the compaction iterator.
+    if (iter->IsDeleteRangeSentinelKey()) {
+      continue;
+    }
+
+    ParsedInternalKey ikey;
+    assert(keys_.size() == merge_context_.GetNumOperands());
+
+    Status pik_status =
+        ParseInternalKey(iter->key(), &ikey, allow_data_in_errors);
+    Slice ts;
+    if (pik_status.ok()) {
+      ts = ExtractTimestampFromUserKey(ikey.user_key, ts_sz);
+      if (full_history_ts_low) {
+        cmp_with_full_history_ts_low =
+            user_comparator_->CompareTimestamp(ts, *full_history_ts_low);
+      }
+    }
+    if (!pik_status.ok()) {
+      // stop at corrupted key
+      if (assert_valid_internal_key_) {
+        return pik_status;
+      }
+      break;
+    } else if (first_key) {
+      // If user-defined timestamp is enabled, we expect both user key and
+      // timestamps are equal, as a sanity check.
+      assert(user_comparator_->Equal(ikey.user_key, orig_ikey.user_key));
+      first_key = false;
+    } else if (!user_comparator_->EqualWithoutTimestamp(ikey.user_key,
+                                                        orig_ikey.user_key) ||
+               (ts_sz > 0 &&
+                !user_comparator_->Equal(ikey.user_key, orig_ikey.user_key) &&
+                cmp_with_full_history_ts_low >= 0)) {
+      // 1) hit a different user key, or
+      // 2) user-defined timestamp is enabled, and hit a version of user key NOT
+      // eligible for GC, then stop right here.
+      hit_the_next_user_key = true;
+      break;
+    } else if (stop_before > 0 && ikey.sequence <= stop_before &&
+               LIKELY(snapshot_checker_ == nullptr ||
+                      snapshot_checker_->CheckInSnapshot(ikey.sequence,
+                                                         stop_before) !=
+                          SnapshotCheckerResult::kNotInSnapshot)) {
+      // hit an entry that's possibly visible by the previous snapshot, can't
+      // touch that
+      break;
+    }
+
+    // At this point we are guaranteed that we need to process this key.
+
+    assert(IsValueType(ikey.type));
+    if (ikey.type != kTypeMerge) {
+      // hit a put/delete/single delete
+      //   => merge the put value or a nullptr with operands_
+      //   => store result in operands_.back() (and update keys_.back())
+      //   => change the entry type to kTypeValue for keys_.back()
+      // We are done! Success!
+
+      // If there are no operands, just return the Status::OK(). That will cause
+      // the compaction iterator to write out the key we're currently at, which
+      // is the put/delete we just encountered.
+      if (keys_.empty()) {
+        return s;
+      }
+
+      // TODO: if we're in compaction and it's a put, it would be nice to run
+      // compaction filter on it.
+      std::string merge_result;
+      MergeOperator::OpFailureScope op_failure_scope;
+
+      if (range_del_agg &&
+          range_del_agg->ShouldDelete(
+              ikey, RangeDelPositioningMode::kForwardTraversal)) {
+        s = TimedFullMerge(user_merge_operator_, ikey.user_key, nullptr,
+                           merge_context_.GetOperands(), &merge_result, logger_,
+                           stats_, clock_,
+                           /* result_operand */ nullptr,
+                           /* update_num_ops_stats */ false, &op_failure_scope);
+      } else if (ikey.type == kTypeValue) {
+        const Slice val = iter->value();
+
+        s = TimedFullMerge(user_merge_operator_, ikey.user_key, &val,
+                           merge_context_.GetOperands(), &merge_result, logger_,
+                           stats_, clock_,
+                           /* result_operand */ nullptr,
+                           /* update_num_ops_stats */ false, &op_failure_scope);
+      } else if (ikey.type == kTypeBlobIndex) {
+        BlobIndex blob_index;
+
+        s = blob_index.DecodeFrom(iter->value());
+        if (!s.ok()) {
+          return s;
+        }
+
+        FilePrefetchBuffer* prefetch_buffer =
+            prefetch_buffers ? prefetch_buffers->GetOrCreatePrefetchBuffer(
+                                   blob_index.file_number())
+                             : nullptr;
+
+        uint64_t bytes_read = 0;
+
+        assert(blob_fetcher);
+
+        PinnableSlice blob_value;
+        s = blob_fetcher->FetchBlob(ikey.user_key, blob_index, prefetch_buffer,
+                                    &blob_value, &bytes_read);
+        if (!s.ok()) {
+          return s;
+        }
+
+        if (c_iter_stats) {
+          ++c_iter_stats->num_blobs_read;
+          c_iter_stats->total_blob_bytes_read += bytes_read;
+        }
+
+        s = TimedFullMerge(user_merge_operator_, ikey.user_key, &blob_value,
+                           merge_context_.GetOperands(), &merge_result, logger_,
+                           stats_, clock_,
+                           /* result_operand */ nullptr,
+                           /* update_num_ops_stats */ false, &op_failure_scope);
+      } else if (ikey.type == kTypeWideColumnEntity) {
+        s = TimedFullMergeWithEntity(
+            user_merge_operator_, ikey.user_key, iter->value(),
+            merge_context_.GetOperands(), &merge_result, logger_, stats_,
+            clock_, /* update_num_ops_stats */ false, &op_failure_scope);
+      } else {
+        s = TimedFullMerge(user_merge_operator_, ikey.user_key, nullptr,
+                           merge_context_.GetOperands(), &merge_result, logger_,
+                           stats_, clock_,
+                           /* result_operand */ nullptr,
+                           /* update_num_ops_stats */ false, &op_failure_scope);
+      }
+
+      // We store the result in keys_.back() and operands_.back()
+      // if nothing went wrong (i.e.: no operand corruption on disk)
+      if (s.ok()) {
+        // The original key encountered
+        original_key = std::move(keys_.back());
+        orig_ikey.type = ikey.type == kTypeWideColumnEntity
+                             ? kTypeWideColumnEntity
+                             : kTypeValue;
+        UpdateInternalKey(&original_key, orig_ikey.sequence, orig_ikey.type);
+        keys_.clear();
+        merge_context_.Clear();
+        keys_.emplace_front(std::move(original_key));
+        merge_context_.PushOperand(merge_result);
+
+        // move iter to the next entry
+        iter->Next();
+      } else if (op_failure_scope ==
+                 MergeOperator::OpFailureScope::kMustMerge) {
+        // Change to `Status::MergeInProgress()` to denote output consists of
+        // merge operands only. Leave `iter` at the non-merge entry so it will
+        // be output after.
+        s = Status::MergeInProgress();
+      }
+      return s;
+    } else {
+      // hit a merge
+      //   => if there is a compaction filter, apply it.
+      //   => check for range tombstones covering the operand
+      //   => merge the operand into the front of the operands_ list
+      //      if not filtered
+      //   => then continue because we haven't yet seen a Put/Delete.
+      //
+      // Keep queuing keys and operands until we either meet a put / delete
+      // request or later did a partial merge.
+
+      Slice value_slice = iter->value();
+      // add an operand to the list if:
+      // 1) it's included in one of the snapshots. in that case we *must* write
+      // it out, no matter what compaction filter says
+      // 2) it's not filtered by a compaction filter
+      CompactionFilter::Decision filter =
+          ikey.sequence <= latest_snapshot_
+              ? CompactionFilter::Decision::kKeep
+              : FilterMerge(orig_ikey.user_key, value_slice);
+      if (filter != CompactionFilter::Decision::kRemoveAndSkipUntil &&
+          range_del_agg != nullptr &&
+          range_del_agg->ShouldDelete(
+              iter->key(), RangeDelPositioningMode::kForwardTraversal)) {
+        filter = CompactionFilter::Decision::kRemove;
+      }
+      if (filter == CompactionFilter::Decision::kKeep ||
+          filter == CompactionFilter::Decision::kChangeValue) {
+        if (original_key_is_iter) {
+          // this is just an optimization that saves us one memcpy
+          keys_.emplace_front(original_key);
+        } else {
+          keys_.emplace_front(iter->key().ToString());
+        }
+        if (keys_.size() == 1) {
+          // we need to re-anchor the orig_ikey because it was anchored by
+          // original_key before
+          pik_status =
+              ParseInternalKey(keys_.back(), &orig_ikey, allow_data_in_errors);
+          pik_status.PermitUncheckedError();
+          assert(pik_status.ok());
+        }
+        if (filter == CompactionFilter::Decision::kKeep) {
+          merge_context_.PushOperand(
+              value_slice, iter->IsValuePinned() /* operand_pinned */);
+        } else {
+          assert(filter == CompactionFilter::Decision::kChangeValue);
+          // Compaction filter asked us to change the operand from value_slice
+          // to compaction_filter_value_.
+          merge_context_.PushOperand(compaction_filter_value_, false);
+        }
+      } else if (filter == CompactionFilter::Decision::kRemoveAndSkipUntil) {
+        // Compaction filter asked us to remove this key altogether
+        // (not just this operand), along with some keys following it.
+        keys_.clear();
+        merge_context_.Clear();
+        has_compaction_filter_skip_until_ = true;
+        return s;
+      }
+    }
+  }
+
+  if (cmp_with_full_history_ts_low >= 0) {
+    size_t num_merge_operands = merge_context_.GetNumOperands();
+    if (ts_sz && num_merge_operands > 1) {
+      // We do not merge merge operands with different timestamps if they are
+      // not eligible for GC.
+      ROCKS_LOG_ERROR(logger_, "ts_sz=%d, %d merge oprands",
+                      static_cast<int>(ts_sz),
+                      static_cast<int>(num_merge_operands));
+      assert(false);
+    }
+  }
+
+  if (merge_context_.GetNumOperands() == 0) {
+    // we filtered out all the merge operands
+    return s;
+  }
+
+  // We are sure we have seen this key's entire history if:
+  // at_bottom == true (this does not necessarily mean it is the bottommost
+  // layer, but rather that we are confident the key does not appear on any of
+  // the lower layers, at_bottom == false doesn't mean it does appear, just
+  // that we can't be sure, see Compaction::IsBottommostLevel for details)
+  // AND
+  // we have either encountered another key or end of key history on this
+  // layer.
+  // Note that if user-defined timestamp is enabled, we need some extra caution
+  // here: if full_history_ts_low is nullptr, or it's not null but the key's
+  // timestamp is greater than or equal to full_history_ts_low, it means this
+  // key cannot be dropped. We may not have seen the beginning of the key.
+  //
+  // When these conditions are true we are able to merge all the keys
+  // using full merge.
+  //
+  // For these cases we are not sure about, we simply miss the opportunity
+  // to combine the keys. Since VersionSet::SetupOtherInputs() always makes
+  // sure that all merge-operands on the same level get compacted together,
+  // this will simply lead to these merge operands moving to the next level.
+  bool surely_seen_the_beginning =
+      (hit_the_next_user_key || !iter->Valid()) && at_bottom &&
+      (ts_sz == 0 || cmp_with_full_history_ts_low < 0);
+  if (surely_seen_the_beginning) {
+    // do a final merge with nullptr as the existing value and say
+    // bye to the merge type (it's now converted to a Put)
+    assert(kTypeMerge == orig_ikey.type);
+    assert(merge_context_.GetNumOperands() >= 1);
+    assert(merge_context_.GetNumOperands() == keys_.size());
+    std::string merge_result;
+    MergeOperator::OpFailureScope op_failure_scope;
+    s = TimedFullMerge(user_merge_operator_, orig_ikey.user_key, nullptr,
+                       merge_context_.GetOperands(), &merge_result, logger_,
+                       stats_, clock_,
+                       /* result_operand */ nullptr,
+                       /* update_num_ops_stats */ false, &op_failure_scope);
+    if (s.ok()) {
+      // The original key encountered
+      // We are certain that keys_ is not empty here (see assertions couple of
+      // lines before).
+      original_key = std::move(keys_.back());
+      orig_ikey.type = kTypeValue;
+      UpdateInternalKey(&original_key, orig_ikey.sequence, orig_ikey.type);
+      keys_.clear();
+      merge_context_.Clear();
+      keys_.emplace_front(std::move(original_key));
+      merge_context_.PushOperand(merge_result);
+    } else if (op_failure_scope == MergeOperator::OpFailureScope::kMustMerge) {
+      // Change to `Status::MergeInProgress()` to denote output consists of
+      // merge operands only.
+      s = Status::MergeInProgress();
+    }
+  } else {
+    // We haven't seen the beginning of the key nor a Put/Delete.
+    // Attempt to use the user's associative merge function to
+    // merge the stacked merge operands into a single operand.
+    s = Status::MergeInProgress();
+    if (merge_context_.GetNumOperands() >= 2 ||
+        (allow_single_operand_ && merge_context_.GetNumOperands() == 1)) {
+      bool merge_success = false;
+      std::string merge_result;
+      {
+        StopWatchNano timer(clock_, stats_ != nullptr);
+        PERF_TIMER_GUARD(merge_operator_time_nanos);
+        merge_success = user_merge_operator_->PartialMergeMulti(
+            orig_ikey.user_key,
+            std::deque<Slice>(merge_context_.GetOperands().begin(),
+                              merge_context_.GetOperands().end()),
+            &merge_result, logger_);
+        RecordTick(stats_, MERGE_OPERATION_TOTAL_TIME,
+                   stats_ ? timer.ElapsedNanosSafe() : 0);
+      }
+      if (merge_success) {
+        // Merging of operands (associative merge) was successful.
+        // Replace operands with the merge result
+        merge_context_.Clear();
+        merge_context_.PushOperand(merge_result);
+        keys_.erase(keys_.begin(), keys_.end() - 1);
+      }
+    }
+  }
+
+  return s;
+}
+
+MergeOutputIterator::MergeOutputIterator(const MergeHelper* merge_helper)
+    : merge_helper_(merge_helper) {
+  it_keys_ = merge_helper_->keys().rend();
+  it_values_ = merge_helper_->values().rend();
+}
+
+void MergeOutputIterator::SeekToFirst() {
+  const auto& keys = merge_helper_->keys();
+  const auto& values = merge_helper_->values();
+  assert(keys.size() == values.size());
+  it_keys_ = keys.rbegin();
+  it_values_ = values.rbegin();
+}
+
+void MergeOutputIterator::Next() {
+  ++it_keys_;
+  ++it_values_;
+}
+
+CompactionFilter::Decision MergeHelper::FilterMerge(const Slice& user_key,
+                                                    const Slice& value_slice) {
+  if (compaction_filter_ == nullptr) {
+    return CompactionFilter::Decision::kKeep;
+  }
+  if (stats_ != nullptr && ShouldReportDetailedTime(env_, stats_)) {
+    filter_timer_.Start();
+  }
+  compaction_filter_value_.clear();
+  compaction_filter_skip_until_.Clear();
+  auto ret = compaction_filter_->FilterV3(
+      level_, user_key, CompactionFilter::ValueType::kMergeOperand,
+      &value_slice, /* existing_columns */ nullptr, &compaction_filter_value_,
+      /* new_columns */ nullptr, compaction_filter_skip_until_.rep());
+  if (ret == CompactionFilter::Decision::kRemoveAndSkipUntil) {
+    if (user_comparator_->Compare(*compaction_filter_skip_until_.rep(),
+                                  user_key) <= 0) {
+      // Invalid skip_until returned from compaction filter.
+      // Keep the key as per FilterV2/FilterV3 documentation.
+      ret = CompactionFilter::Decision::kKeep;
+    } else {
+      compaction_filter_skip_until_.ConvertFromUserKey(kMaxSequenceNumber,
+                                                       kValueTypeForSeek);
+    }
+  }
+  if (stats_ != nullptr && ShouldReportDetailedTime(env_, stats_)) {
+    total_filter_time_ += filter_timer_.ElapsedNanosSafe();
+  }
+  return ret;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_helper.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_helper.h
new file mode 100644
index 0000000000..7f624b7432
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_helper.h
@@ -0,0 +1,224 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+#include <deque>
+#include <string>
+#include <vector>
+
+#include "db/merge_context.h"
+#include "db/range_del_aggregator.h"
+#include "db/snapshot_checker.h"
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/env.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/wide_columns.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Comparator;
+class Iterator;
+class Logger;
+class MergeOperator;
+class Statistics;
+class SystemClock;
+class BlobFetcher;
+class PrefetchBufferCollection;
+struct CompactionIterationStats;
+
+class MergeHelper {
+ public:
+  MergeHelper(Env* env, const Comparator* user_comparator,
+              const MergeOperator* user_merge_operator,
+              const CompactionFilter* compaction_filter, Logger* logger,
+              bool assert_valid_internal_key, SequenceNumber latest_snapshot,
+              const SnapshotChecker* snapshot_checker = nullptr, int level = 0,
+              Statistics* stats = nullptr,
+              const std::atomic<bool>* shutting_down = nullptr);
+
+  // Wrapper around MergeOperator::FullMergeV2() that records perf statistics.
+  // Result of merge will be written to result if status returned is OK.
+  // If operands is empty, the value will simply be copied to result.
+  // Set `update_num_ops_stats` to true if it is from a user read, so that
+  // the latency is sensitive.
+  // Returns one of the following statuses:
+  // - OK: Entries were successfully merged.
+  // - Corruption: Merge operator reported unsuccessful merge. The scope of the
+  //   damage will be stored in `*op_failure_scope` when `op_failure_scope` is
+  //   not nullptr
+  static Status TimedFullMerge(const MergeOperator* merge_operator,
+                               const Slice& key, const Slice* value,
+                               const std::vector<Slice>& operands,
+                               std::string* result, Logger* logger,
+                               Statistics* statistics, SystemClock* clock,
+                               Slice* result_operand, bool update_num_ops_stats,
+                               MergeOperator::OpFailureScope* op_failure_scope);
+
+  static Status TimedFullMergeWithEntity(
+      const MergeOperator* merge_operator, const Slice& key, Slice base_entity,
+      const std::vector<Slice>& operands, std::string* result, Logger* logger,
+      Statistics* statistics, SystemClock* clock, bool update_num_ops_stats,
+      MergeOperator::OpFailureScope* op_failure_scope);
+
+  // During compaction, merge entries until we hit
+  //     - a corrupted key
+  //     - a Put/Delete,
+  //     - a different user key,
+  //     - a specific sequence number (snapshot boundary),
+  //     - REMOVE_AND_SKIP_UNTIL returned from compaction filter,
+  //  or - the end of iteration
+  //
+  // The result(s) of the merge can be accessed in `MergeHelper::keys()` and
+  // `MergeHelper::values()`, which are invalidated the next time `MergeUntil()`
+  // is called. `MergeOutputIterator` is specially designed to iterate the
+  // results of a `MergeHelper`'s most recent `MergeUntil()`.
+  //
+  // iter: (IN)  points to the first merge type entry
+  //       (OUT) points to the first entry not included in the merge process
+  // range_del_agg: (IN) filters merge operands covered by range tombstones.
+  // stop_before: (IN) a sequence number that merge should not cross.
+  //                   0 means no restriction
+  // at_bottom:   (IN) true if the iterator covers the bottem level, which means
+  //                   we could reach the start of the history of this user key.
+  // allow_data_in_errors: (IN) if true, data details will be displayed in
+  //                   error/log messages.
+  // blob_fetcher: (IN) blob fetcher object for the compaction's input version.
+  // prefetch_buffers: (IN/OUT) a collection of blob file prefetch buffers
+  //                            used for compaction readahead.
+  // c_iter_stats: (OUT) compaction iteration statistics.
+  //
+  // Returns one of the following statuses:
+  // - OK: Entries were successfully merged.
+  // - MergeInProgress: Output consists of merge operands only.
+  // - Corruption: Merge operator reported unsuccessful merge or a corrupted
+  //   key has been encountered and not expected (applies only when compiling
+  //   with asserts removed).
+  // - ShutdownInProgress: interrupted by shutdown (*shutting_down == true).
+  //
+  // REQUIRED: The first key in the input is not corrupted.
+  Status MergeUntil(InternalIterator* iter,
+                    CompactionRangeDelAggregator* range_del_agg,
+                    const SequenceNumber stop_before, const bool at_bottom,
+                    const bool allow_data_in_errors,
+                    const BlobFetcher* blob_fetcher,
+                    const std::string* const full_history_ts_low,
+                    PrefetchBufferCollection* prefetch_buffers,
+                    CompactionIterationStats* c_iter_stats);
+
+  // Filters a merge operand using the compaction filter specified
+  // in the constructor. Returns the decision that the filter made.
+  // Uses compaction_filter_value_ and compaction_filter_skip_until_ for the
+  // optional outputs of compaction filter.
+  // user_key includes timestamp if user-defined timestamp is enabled.
+  CompactionFilter::Decision FilterMerge(const Slice& user_key,
+                                         const Slice& value_slice);
+
+  // Query the merge result
+  // These are valid until the next MergeUntil call
+  // If the merging was successful:
+  //   - keys() contains a single element with the latest sequence number of
+  //     the merges. The type will be Put or Merge. See IMPORTANT 1 note, below.
+  //   - values() contains a single element with the result of merging all the
+  //     operands together
+  //
+  //   IMPORTANT 1: the key type could change after the MergeUntil call.
+  //        Put/Delete + Merge + ... + Merge => Put
+  //        Merge + ... + Merge => Merge
+  //
+  // If the merge operator is not associative, and if a Put/Delete is not found
+  // then the merging will be unsuccessful. In this case:
+  //   - keys() contains the list of internal keys seen in order of iteration.
+  //   - values() contains the list of values (merges) seen in the same order.
+  //              values() is parallel to keys() so that the first entry in
+  //              keys() is the key associated with the first entry in values()
+  //              and so on. These lists will be the same length.
+  //              All of these pairs will be merges over the same user key.
+  //              See IMPORTANT 2 note below.
+  //
+  //   IMPORTANT 2: The entries were traversed in order from BACK to FRONT.
+  //                So keys().back() was the first key seen by iterator.
+  // TODO: Re-style this comment to be like the first one
+  const std::deque<std::string>& keys() const { return keys_; }
+  const std::vector<Slice>& values() const {
+    return merge_context_.GetOperands();
+  }
+  uint64_t TotalFilterTime() const { return total_filter_time_; }
+  bool HasOperator() const { return user_merge_operator_ != nullptr; }
+
+  // If compaction filter returned REMOVE_AND_SKIP_UNTIL, this method will
+  // return true and fill *until with the key to which we should skip.
+  // If true, keys() and values() are empty.
+  bool FilteredUntil(Slice* skip_until) const {
+    if (!has_compaction_filter_skip_until_) {
+      return false;
+    }
+    assert(compaction_filter_ != nullptr);
+    assert(skip_until != nullptr);
+    assert(compaction_filter_skip_until_.Valid());
+    *skip_until = compaction_filter_skip_until_.Encode();
+    return true;
+  }
+
+ private:
+  Env* env_;
+  SystemClock* clock_;
+  const Comparator* user_comparator_;
+  const MergeOperator* user_merge_operator_;
+  const CompactionFilter* compaction_filter_;
+  const std::atomic<bool>* shutting_down_;
+  Logger* logger_;
+  bool assert_valid_internal_key_;  // enforce no internal key corruption?
+  bool allow_single_operand_;
+  SequenceNumber latest_snapshot_;
+  const SnapshotChecker* const snapshot_checker_;
+  int level_;
+
+  // the scratch area that holds the result of MergeUntil
+  // valid up to the next MergeUntil call
+
+  // Keeps track of the sequence of keys seen
+  std::deque<std::string> keys_;
+  // Parallel with keys_; stores the operands
+  mutable MergeContext merge_context_;
+
+  StopWatchNano filter_timer_;
+  uint64_t total_filter_time_;
+  Statistics* stats_;
+
+  bool has_compaction_filter_skip_until_ = false;
+  std::string compaction_filter_value_;
+  InternalKey compaction_filter_skip_until_;
+
+  bool IsShuttingDown() {
+    // This is a best-effort facility, so memory_order_relaxed is sufficient.
+    return shutting_down_ && shutting_down_->load(std::memory_order_relaxed);
+  }
+};
+
+// MergeOutputIterator can be used to iterate over the result of a merge.
+class MergeOutputIterator {
+ public:
+  // The MergeOutputIterator is bound to a MergeHelper instance.
+  explicit MergeOutputIterator(const MergeHelper* merge_helper);
+
+  // Seeks to the first record in the output.
+  void SeekToFirst();
+  // Advances to the next record in the output.
+  void Next();
+
+  Slice key() { return Slice(*it_keys_); }
+  Slice value() { return Slice(*it_values_); }
+  bool Valid() { return it_keys_ != merge_helper_->keys().rend(); }
+
+ private:
+  const MergeHelper* merge_helper_;
+  std::deque<std::string>::const_reverse_iterator it_keys_;
+  std::vector<Slice>::const_reverse_iterator it_values_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_operator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_operator.cc
new file mode 100644
index 0000000000..d325856406
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/merge_operator.cc
@@ -0,0 +1,85 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+/**
+ * Back-end implementation details specific to the Merge Operator.
+ */
+
+#include "rocksdb/merge_operator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+bool MergeOperator::FullMergeV2(const MergeOperationInput& merge_in,
+                                MergeOperationOutput* merge_out) const {
+  // If FullMergeV2 is not implemented, we convert the operand_list to
+  // std::deque<std::string> and pass it to FullMerge
+  std::deque<std::string> operand_list_str;
+  for (auto& op : merge_in.operand_list) {
+    operand_list_str.emplace_back(op.data(), op.size());
+  }
+  return FullMerge(merge_in.key, merge_in.existing_value, operand_list_str,
+                   &merge_out->new_value, merge_in.logger);
+}
+
+// The default implementation of PartialMergeMulti, which invokes
+// PartialMerge multiple times internally and merges two operands at
+// a time.
+bool MergeOperator::PartialMergeMulti(const Slice& key,
+                                      const std::deque<Slice>& operand_list,
+                                      std::string* new_value,
+                                      Logger* logger) const {
+  assert(operand_list.size() >= 2);
+  // Simply loop through the operands
+  Slice temp_slice(operand_list[0]);
+
+  for (size_t i = 1; i < operand_list.size(); ++i) {
+    auto& operand = operand_list[i];
+    std::string temp_value;
+    if (!PartialMerge(key, temp_slice, operand, &temp_value, logger)) {
+      return false;
+    }
+    swap(temp_value, *new_value);
+    temp_slice = Slice(*new_value);
+  }
+
+  // The result will be in *new_value. All merges succeeded.
+  return true;
+}
+
+// Given a "real" merge from the library, call the user's
+// associative merge function one-by-one on each of the operands.
+// NOTE: It is assumed that the client's merge-operator will handle any errors.
+bool AssociativeMergeOperator::FullMergeV2(
+    const MergeOperationInput& merge_in,
+    MergeOperationOutput* merge_out) const {
+  // Simply loop through the operands
+  Slice temp_existing;
+  const Slice* existing_value = merge_in.existing_value;
+  for (const auto& operand : merge_in.operand_list) {
+    std::string temp_value;
+    if (!Merge(merge_in.key, existing_value, operand, &temp_value,
+               merge_in.logger)) {
+      return false;
+    }
+    swap(temp_value, merge_out->new_value);
+    temp_existing = Slice(merge_out->new_value);
+    existing_value = &temp_existing;
+  }
+
+  // The result will be in *new_value. All merges succeeded.
+  return true;
+}
+
+// Call the user defined simple merge on the operands;
+// NOTE: It is assumed that the client's merge-operator will handle any errors.
+bool AssociativeMergeOperator::PartialMerge(const Slice& key,
+                                            const Slice& left_operand,
+                                            const Slice& right_operand,
+                                            std::string* new_value,
+                                            Logger* logger) const {
+  return Merge(key, &left_operand, right_operand, new_value, logger);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/output_validator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/output_validator.cc
new file mode 100644
index 0000000000..e93e2d68c4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/output_validator.cc
@@ -0,0 +1,33 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include "db/output_validator.h"
+
+#include "test_util/sync_point.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+Status OutputValidator::Add(const Slice& key, const Slice& value) {
+  if (enable_hash_) {
+    // Generate a rolling 64-bit hash of the key and values
+    paranoid_hash_ = NPHash64(key.data(), key.size(), paranoid_hash_);
+    paranoid_hash_ = NPHash64(value.data(), value.size(), paranoid_hash_);
+  }
+  if (enable_order_check_) {
+    TEST_SYNC_POINT_CALLBACK("OutputValidator::Add:order_check",
+                             /*arg=*/nullptr);
+    if (key.size() < kNumInternalBytes) {
+      return Status::Corruption(
+          "Compaction tries to write a key without internal bytes.");
+    }
+    // prev_key_ starts with empty.
+    if (!prev_key_.empty() && icmp_.Compare(key, prev_key_) < 0) {
+      return Status::Corruption("Compaction sees out-of-order keys.");
+    }
+    prev_key_.assign(key.data(), key.size());
+  }
+  return Status::OK();
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/output_validator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/output_validator.h
new file mode 100644
index 0000000000..40635f9c44
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/output_validator.h
@@ -0,0 +1,48 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+#include "db/dbformat.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+// A class that validates key/value that is inserted to an SST file.
+// Pass every key/value of the file using OutputValidator::Add()
+// and the class validates key order and optionally calculate a hash
+// of all the key and value.
+class OutputValidator {
+ public:
+  explicit OutputValidator(const InternalKeyComparator& icmp,
+                           bool enable_order_check, bool enable_hash,
+                           uint64_t precalculated_hash = 0)
+      : icmp_(icmp),
+        paranoid_hash_(precalculated_hash),
+        enable_order_check_(enable_order_check),
+        enable_hash_(enable_hash) {}
+
+  // Add a key to the KV sequence, and return whether the key follows
+  // criteria, e.g. key is ordered.
+  Status Add(const Slice& key, const Slice& value);
+
+  // Compare result of two key orders are the same. It can be used
+  // to compare the keys inserted into a file, and what is read back.
+  // Return true if the validation passes.
+  bool CompareValidator(const OutputValidator& other_validator) {
+    return GetHash() == other_validator.GetHash();
+  }
+
+  // Not (yet) intended to be persisted, so subject to change
+  // without notice between releases.
+  uint64_t GetHash() const { return paranoid_hash_; }
+
+ private:
+  const InternalKeyComparator& icmp_;
+  std::string prev_key_;
+  uint64_t paranoid_hash_ = 0;
+  bool enable_order_check_;
+  bool enable_hash_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/periodic_task_scheduler.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/periodic_task_scheduler.cc
new file mode 100644
index 0000000000..1306f45da6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/periodic_task_scheduler.cc
@@ -0,0 +1,111 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/periodic_task_scheduler.h"
+
+#include "rocksdb/system_clock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// `timer_mutex` is a global mutex serves 3 purposes currently:
+// (1) to ensure calls to `Start()` and `Shutdown()` are serialized, as
+//     they are currently not implemented in a thread-safe way; and
+// (2) to ensure the `Timer::Add()`s and `Timer::Start()` run atomically, and
+//     the `Timer::Cancel()`s and `Timer::Shutdown()` run atomically.
+// (3) protect tasks_map_ in PeriodicTaskScheduler
+// Note: It's not efficient to have a static global mutex, for
+// PeriodicTaskScheduler it should be okay, as the operations are called
+// infrequently.
+static port::Mutex timer_mutex;
+
+static const std::map<PeriodicTaskType, uint64_t> kDefaultPeriodSeconds = {
+    {PeriodicTaskType::kDumpStats, kInvalidPeriodSec},
+    {PeriodicTaskType::kPersistStats, kInvalidPeriodSec},
+    {PeriodicTaskType::kFlushInfoLog, 10},
+    {PeriodicTaskType::kRecordSeqnoTime, kInvalidPeriodSec},
+};
+
+static const std::map<PeriodicTaskType, std::string> kPeriodicTaskTypeNames = {
+    {PeriodicTaskType::kDumpStats, "dump_st"},
+    {PeriodicTaskType::kPersistStats, "pst_st"},
+    {PeriodicTaskType::kFlushInfoLog, "flush_info_log"},
+    {PeriodicTaskType::kRecordSeqnoTime, "record_seq_time"},
+};
+
+Status PeriodicTaskScheduler::Register(PeriodicTaskType task_type,
+                                       const PeriodicTaskFunc& fn) {
+  return Register(task_type, fn, kDefaultPeriodSeconds.at(task_type));
+}
+
+Status PeriodicTaskScheduler::Register(PeriodicTaskType task_type,
+                                       const PeriodicTaskFunc& fn,
+                                       uint64_t repeat_period_seconds) {
+  MutexLock l(&timer_mutex);
+  static std::atomic<uint64_t> initial_delay(0);
+
+  if (repeat_period_seconds == kInvalidPeriodSec) {
+    return Status::InvalidArgument("Invalid task repeat period");
+  }
+  auto it = tasks_map_.find(task_type);
+  if (it != tasks_map_.end()) {
+    // the task already exists and it's the same, no update needed
+    if (it->second.repeat_every_sec == repeat_period_seconds) {
+      return Status::OK();
+    }
+    // cancel the existing one before register new one
+    timer_->Cancel(it->second.name);
+    tasks_map_.erase(it);
+  }
+
+  timer_->Start();
+  // put task type name as prefix, for easy debug
+  std::string unique_id =
+      kPeriodicTaskTypeNames.at(task_type) + std::to_string(id_++);
+
+  bool succeeded = timer_->Add(
+      fn, unique_id,
+      (initial_delay.fetch_add(1) % repeat_period_seconds) * kMicrosInSecond,
+      repeat_period_seconds * kMicrosInSecond);
+  if (!succeeded) {
+    return Status::Aborted("Failed to register periodic task");
+  }
+  auto result = tasks_map_.try_emplace(
+      task_type, TaskInfo{unique_id, repeat_period_seconds});
+  if (!result.second) {
+    return Status::Aborted("Failed to add periodic task");
+  };
+  return Status::OK();
+}
+
+Status PeriodicTaskScheduler::Unregister(PeriodicTaskType task_type) {
+  MutexLock l(&timer_mutex);
+  auto it = tasks_map_.find(task_type);
+  if (it != tasks_map_.end()) {
+    timer_->Cancel(it->second.name);
+    tasks_map_.erase(it);
+  }
+  if (!timer_->HasPendingTask()) {
+    timer_->Shutdown();
+  }
+  return Status::OK();
+}
+
+Timer* PeriodicTaskScheduler::Default() {
+  static Timer timer(SystemClock::Default().get());
+  return &timer;
+}
+
+#ifndef NDEBUG
+void PeriodicTaskScheduler::TEST_OverrideTimer(SystemClock* clock) {
+  static Timer test_timer(clock);
+  test_timer.TEST_OverrideTimer(clock);
+  MutexLock l(&timer_mutex);
+  timer_ = &test_timer;
+}
+#endif  // NDEBUG
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/periodic_task_scheduler.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/periodic_task_scheduler.h
new file mode 100644
index 0000000000..4d129a6797
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/periodic_task_scheduler.h
@@ -0,0 +1,108 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include "util/timer.h"
+
+namespace ROCKSDB_NAMESPACE {
+class SystemClock;
+
+using PeriodicTaskFunc = std::function<void()>;
+
+constexpr uint64_t kInvalidPeriodSec = 0;
+
+// List of task types
+enum class PeriodicTaskType : uint8_t {
+  kDumpStats = 0,
+  kPersistStats,
+  kFlushInfoLog,
+  kRecordSeqnoTime,
+  kMax,
+};
+
+// PeriodicTaskScheduler contains the periodic task scheduled from the DB
+// instance. It's used to schedule/unschedule DumpStats(), PersistStats(),
+// FlushInfoLog(), etc. Each type of the task can only have one instance,
+// re-register the same task type would only update the repeat period.
+//
+// Internally, it uses a global single threaded timer object to run the periodic
+// task functions. Timer thread will always be started since the info log
+// flushing cannot be disabled.
+class PeriodicTaskScheduler {
+ public:
+  explicit PeriodicTaskScheduler() = default;
+
+  PeriodicTaskScheduler(const PeriodicTaskScheduler&) = delete;
+  PeriodicTaskScheduler(PeriodicTaskScheduler&&) = delete;
+  PeriodicTaskScheduler& operator=(const PeriodicTaskScheduler&) = delete;
+  PeriodicTaskScheduler& operator=(PeriodicTaskScheduler&&) = delete;
+
+  // Register a task with its default repeat period
+  Status Register(PeriodicTaskType task_type, const PeriodicTaskFunc& fn);
+
+  // Register a task with specified repeat period. 0 is an invalid argument
+  // (kInvalidPeriodSec). To stop the task, please use Unregister() specifically
+  Status Register(PeriodicTaskType task_type, const PeriodicTaskFunc& fn,
+                  uint64_t repeat_period_seconds);
+
+  // Unregister the task
+  Status Unregister(PeriodicTaskType task_type);
+
+#ifndef NDEBUG
+  // Override the timer for the unittest
+  void TEST_OverrideTimer(SystemClock* clock);
+
+  // Call Timer TEST_WaitForRun() which wait until Timer starting waiting.
+  void TEST_WaitForRun(const std::function<void()>& callback) const {
+    if (timer_ != nullptr) {
+      timer_->TEST_WaitForRun(callback);
+    }
+  }
+
+  // Get global valid task number in the Timer
+  size_t TEST_GetValidTaskNum() const {
+    if (timer_ != nullptr) {
+      return timer_->TEST_GetPendingTaskNum();
+    }
+    return 0;
+  }
+
+  // If it has the specified task type registered
+  bool TEST_HasTask(PeriodicTaskType task_type) const {
+    auto it = tasks_map_.find(task_type);
+    return it != tasks_map_.end();
+  }
+#endif  // NDEBUG
+
+ private:
+  // default global Timer instance
+  static Timer* Default();
+
+  // Internal structure to store task information
+  struct TaskInfo {
+    TaskInfo(std::string _name, uint64_t _repeat_every_sec)
+        : name(std::move(_name)), repeat_every_sec(_repeat_every_sec) {}
+    std::string name;
+    uint64_t repeat_every_sec;
+  };
+
+  // Internal tasks map
+  std::map<PeriodicTaskType, TaskInfo> tasks_map_;
+
+  // Global timer pointer, which doesn't support synchronous add/cancel tasks
+  // so having a global `timer_mutex` for add/cancel task.
+  Timer* timer_ = Default();
+
+  // Global task id, protected by the global `timer_mutex`
+  inline static uint64_t id_;
+
+  static constexpr uint64_t kMicrosInSecond = 1000U * 1000U;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/pinned_iterators_manager.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/pinned_iterators_manager.h
new file mode 100644
index 0000000000..0fcf231dad
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/pinned_iterators_manager.h
@@ -0,0 +1,92 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+#include <algorithm>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "table/internal_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// PinnedIteratorsManager will be notified whenever we need to pin an Iterator
+// and it will be responsible for deleting pinned Iterators when they are
+// not needed anymore.
+class PinnedIteratorsManager : public Cleanable {
+ public:
+  PinnedIteratorsManager() : pinning_enabled(false) {}
+  ~PinnedIteratorsManager() {
+    if (pinning_enabled) {
+      ReleasePinnedData();
+    }
+  }
+
+  // Move constructor and move assignment is allowed.
+  PinnedIteratorsManager(PinnedIteratorsManager&& other) noexcept = default;
+  PinnedIteratorsManager& operator=(PinnedIteratorsManager&& other) noexcept =
+      default;
+
+  // Enable Iterators pinning
+  void StartPinning() {
+    assert(pinning_enabled == false);
+    pinning_enabled = true;
+  }
+
+  // Is pinning enabled ?
+  bool PinningEnabled() { return pinning_enabled; }
+
+  // Take ownership of iter and delete it when ReleasePinnedData() is called
+  void PinIterator(InternalIterator* iter, bool arena = false) {
+    if (arena) {
+      PinPtr(iter, &PinnedIteratorsManager::ReleaseArenaInternalIterator);
+    } else {
+      PinPtr(iter, &PinnedIteratorsManager::ReleaseInternalIterator);
+    }
+  }
+
+  using ReleaseFunction = void (*)(void* arg1);
+  void PinPtr(void* ptr, ReleaseFunction release_func) {
+    assert(pinning_enabled);
+    if (ptr == nullptr) {
+      return;
+    }
+    pinned_ptrs_.emplace_back(ptr, release_func);
+  }
+
+  // Release pinned Iterators
+  inline void ReleasePinnedData() {
+    assert(pinning_enabled == true);
+    pinning_enabled = false;
+
+    // Remove duplicate pointers
+    std::sort(pinned_ptrs_.begin(), pinned_ptrs_.end());
+    auto unique_end = std::unique(pinned_ptrs_.begin(), pinned_ptrs_.end());
+
+    for (auto i = pinned_ptrs_.begin(); i != unique_end; ++i) {
+      void* ptr = i->first;
+      ReleaseFunction release_func = i->second;
+      release_func(ptr);
+    }
+    pinned_ptrs_.clear();
+    // Also do cleanups from the base Cleanable
+    Cleanable::Reset();
+  }
+
+ private:
+  static void ReleaseInternalIterator(void* ptr) {
+    delete reinterpret_cast<InternalIterator*>(ptr);
+  }
+
+  static void ReleaseArenaInternalIterator(void* ptr) {
+    reinterpret_cast<InternalIterator*>(ptr)->~InternalIterator();
+  }
+
+  bool pinning_enabled;
+  std::vector<std::pair<void*, ReleaseFunction>> pinned_ptrs_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/post_memtable_callback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/post_memtable_callback.h
new file mode 100644
index 0000000000..fbf2fbe869
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/post_memtable_callback.h
@@ -0,0 +1,25 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Callback invoked after finishing writing to the memtable but before
+// publishing the sequence number to readers.
+// Note that with write-prepared/write-unprepared transactions with
+// two-write-queues, PreReleaseCallback is called before publishing the
+// sequence numbers to readers.
+class PostMemTableCallback {
+ public:
+  virtual ~PostMemTableCallback() {}
+
+  virtual Status operator()(SequenceNumber seq, bool disable_memtable) = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/pre_release_callback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/pre_release_callback.h
new file mode 100644
index 0000000000..6b90394877
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/pre_release_callback.h
@@ -0,0 +1,37 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class PreReleaseCallback {
+ public:
+  virtual ~PreReleaseCallback() {}
+
+  // Will be called while on the write thread after the write to the WAL and
+  // before the write to memtable. This is useful if any operation needs to be
+  // done before the write gets visible to the readers, or if we want to reduce
+  // the overhead of locking by updating something sequentially while we are on
+  // the write thread. If the callback fails, this function returns a non-OK
+  // status, the sequence number will not be released, and same status will be
+  // propagated to all the writers in the write group.
+  // seq is the sequence number that is used for this write and will be
+  // released.
+  // is_mem_disabled is currently used for debugging purposes to assert that
+  // the callback is done from the right write queue.
+  // If non-zero, log_number indicates the WAL log to which we wrote.
+  // index >= 0 specifies the order of callback in the same write thread.
+  // total > index specifies the total number of callbacks in the same write
+  // thread. Together with index, could be used to reduce the redundant
+  // operations among the callbacks.
+  virtual Status Callback(SequenceNumber seq, bool is_mem_disabled,
+                          uint64_t log_number, size_t index, size_t total) = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_del_aggregator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_del_aggregator.cc
new file mode 100644
index 0000000000..6e76f9c725
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_del_aggregator.cc
@@ -0,0 +1,555 @@
+//  Copyright (c) 2018-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/range_del_aggregator.h"
+
+#include "db/compaction/compaction_iteration_stats.h"
+#include "db/dbformat.h"
+#include "db/pinned_iterators_manager.h"
+#include "db/range_del_aggregator.h"
+#include "db/range_tombstone_fragmenter.h"
+#include "db/version_edit.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/types.h"
+#include "table/internal_iterator.h"
+#include "table/scoped_arena_iterator.h"
+#include "table/table_builder.h"
+#include "util/heap.h"
+#include "util/kv_map.h"
+#include "util/vector_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+TruncatedRangeDelIterator::TruncatedRangeDelIterator(
+    std::unique_ptr<FragmentedRangeTombstoneIterator> iter,
+    const InternalKeyComparator* icmp, const InternalKey* smallest,
+    const InternalKey* largest)
+    : iter_(std::move(iter)),
+      icmp_(icmp),
+      smallest_ikey_(smallest),
+      largest_ikey_(largest) {
+  // Set up bounds such that range tombstones from this iterator are
+  // truncated to range [smallest_, largest_).
+  if (smallest != nullptr) {
+    pinned_bounds_.emplace_back();
+    auto& parsed_smallest = pinned_bounds_.back();
+    Status pik_status = ParseInternalKey(smallest->Encode(), &parsed_smallest,
+                                         false /* log_err_key */);  // TODO
+    pik_status.PermitUncheckedError();
+    parsed_smallest.type = kTypeMaxValid;
+    assert(pik_status.ok());
+    smallest_ = &parsed_smallest;
+  }
+  if (largest != nullptr) {
+    pinned_bounds_.emplace_back();
+    auto& parsed_largest = pinned_bounds_.back();
+
+    Status pik_status = ParseInternalKey(largest->Encode(), &parsed_largest,
+                                         false /* log_err_key */);  // TODO
+    pik_status.PermitUncheckedError();
+    assert(pik_status.ok());
+
+    if (parsed_largest.type == kTypeRangeDeletion &&
+        parsed_largest.sequence == kMaxSequenceNumber) {
+      // The file boundary has been artificially extended by a range tombstone.
+      // We do not need to adjust largest to properly truncate range
+      // tombstones that extend past the boundary.
+    } else if (parsed_largest.sequence == 0) {
+      // The largest key in the sstable has a sequence number of 0. Since we
+      // guarantee that no internal keys with the same user key and sequence
+      // number can exist in a DB, we know that the largest key in this sstable
+      // cannot exist as the smallest key in the next sstable. This further
+      // implies that no range tombstone in this sstable covers largest;
+      // otherwise, the file boundary would have been artificially extended.
+      //
+      // Therefore, we will never truncate a range tombstone at largest, so we
+      // can leave it unchanged.
+      // TODO: maybe use kMaxValid here to ensure range tombstone having
+      //  distinct key from point keys.
+    } else {
+      // The same user key may straddle two sstable boundaries. To ensure that
+      // the truncated end key can cover the largest key in this sstable, reduce
+      // its sequence number by 1.
+      parsed_largest.sequence -= 1;
+      // This line is not needed for correctness, but it ensures that the
+      // truncated end key is not covering keys from the next SST file.
+      parsed_largest.type = kTypeMaxValid;
+    }
+    largest_ = &parsed_largest;
+  }
+}
+
+bool TruncatedRangeDelIterator::Valid() const {
+  assert(iter_ != nullptr);
+  return iter_->Valid() &&
+         (smallest_ == nullptr ||
+          icmp_->Compare(*smallest_, iter_->parsed_end_key()) < 0) &&
+         (largest_ == nullptr ||
+          icmp_->Compare(iter_->parsed_start_key(), *largest_) < 0);
+}
+
+// NOTE: target is a user key, with timestamp if enabled.
+void TruncatedRangeDelIterator::Seek(const Slice& target) {
+  if (largest_ != nullptr &&
+      icmp_->Compare(*largest_, ParsedInternalKey(target, kMaxSequenceNumber,
+                                                  kTypeRangeDeletion)) <= 0) {
+    iter_->Invalidate();
+    return;
+  }
+  if (smallest_ != nullptr &&
+      icmp_->user_comparator()->Compare(target, smallest_->user_key) < 0) {
+    iter_->Seek(smallest_->user_key);
+    return;
+  }
+  iter_->Seek(target);
+}
+
+void TruncatedRangeDelIterator::SeekInternalKey(const Slice& target) {
+  if (largest_ && icmp_->Compare(*largest_, target) <= 0) {
+    iter_->Invalidate();
+    return;
+  }
+  if (smallest_ && icmp_->Compare(target, *smallest_) < 0) {
+    // Since target < smallest, target < largest_.
+    // This seek must land on a range tombstone where end_key() > target,
+    // so there is no need to check again.
+    iter_->Seek(smallest_->user_key);
+  } else {
+    iter_->Seek(ExtractUserKey(target));
+    while (Valid() && icmp_->Compare(end_key(), target) <= 0) {
+      Next();
+    }
+  }
+}
+
+// NOTE: target is a user key, with timestamp if enabled.
+void TruncatedRangeDelIterator::SeekForPrev(const Slice& target) {
+  if (smallest_ != nullptr &&
+      icmp_->Compare(ParsedInternalKey(target, 0, kTypeRangeDeletion),
+                     *smallest_) < 0) {
+    iter_->Invalidate();
+    return;
+  }
+  if (largest_ != nullptr &&
+      icmp_->user_comparator()->Compare(largest_->user_key, target) < 0) {
+    iter_->SeekForPrev(largest_->user_key);
+    return;
+  }
+  iter_->SeekForPrev(target);
+}
+
+void TruncatedRangeDelIterator::SeekToFirst() {
+  if (smallest_ != nullptr) {
+    iter_->Seek(smallest_->user_key);
+    return;
+  }
+  iter_->SeekToTopFirst();
+}
+
+void TruncatedRangeDelIterator::SeekToLast() {
+  if (largest_ != nullptr) {
+    iter_->SeekForPrev(largest_->user_key);
+    return;
+  }
+  iter_->SeekToTopLast();
+}
+
+std::map<SequenceNumber, std::unique_ptr<TruncatedRangeDelIterator>>
+TruncatedRangeDelIterator::SplitBySnapshot(
+    const std::vector<SequenceNumber>& snapshots) {
+  using FragmentedIterPair =
+      std::pair<const SequenceNumber,
+                std::unique_ptr<FragmentedRangeTombstoneIterator>>;
+
+  auto split_untruncated_iters = iter_->SplitBySnapshot(snapshots);
+  std::map<SequenceNumber, std::unique_ptr<TruncatedRangeDelIterator>>
+      split_truncated_iters;
+  std::for_each(
+      split_untruncated_iters.begin(), split_untruncated_iters.end(),
+      [&](FragmentedIterPair& iter_pair) {
+        auto truncated_iter = std::make_unique<TruncatedRangeDelIterator>(
+            std::move(iter_pair.second), icmp_, smallest_ikey_, largest_ikey_);
+        split_truncated_iters.emplace(iter_pair.first,
+                                      std::move(truncated_iter));
+      });
+  return split_truncated_iters;
+}
+
+ForwardRangeDelIterator::ForwardRangeDelIterator(
+    const InternalKeyComparator* icmp)
+    : icmp_(icmp),
+      unused_idx_(0),
+      active_seqnums_(SeqMaxComparator()),
+      active_iters_(EndKeyMinComparator(icmp)),
+      inactive_iters_(StartKeyMinComparator(icmp)) {}
+
+bool ForwardRangeDelIterator::ShouldDelete(const ParsedInternalKey& parsed) {
+  // Move active iterators that end before parsed.
+  while (!active_iters_.empty() &&
+         icmp_->Compare((*active_iters_.top())->end_key(), parsed) <= 0) {
+    TruncatedRangeDelIterator* iter = PopActiveIter();
+    do {
+      iter->Next();
+    } while (iter->Valid() && icmp_->Compare(iter->end_key(), parsed) <= 0);
+    PushIter(iter, parsed);
+    assert(active_iters_.size() == active_seqnums_.size());
+  }
+
+  // Move inactive iterators that start before parsed.
+  while (!inactive_iters_.empty() &&
+         icmp_->Compare(inactive_iters_.top()->start_key(), parsed) <= 0) {
+    TruncatedRangeDelIterator* iter = PopInactiveIter();
+    while (iter->Valid() && icmp_->Compare(iter->end_key(), parsed) <= 0) {
+      iter->Next();
+    }
+    PushIter(iter, parsed);
+    assert(active_iters_.size() == active_seqnums_.size());
+  }
+
+  return active_seqnums_.empty()
+             ? false
+             : (*active_seqnums_.begin())->seq() > parsed.sequence;
+}
+
+void ForwardRangeDelIterator::Invalidate() {
+  unused_idx_ = 0;
+  active_iters_.clear();
+  active_seqnums_.clear();
+  inactive_iters_.clear();
+}
+
+ReverseRangeDelIterator::ReverseRangeDelIterator(
+    const InternalKeyComparator* icmp)
+    : icmp_(icmp),
+      unused_idx_(0),
+      active_seqnums_(SeqMaxComparator()),
+      active_iters_(StartKeyMaxComparator(icmp)),
+      inactive_iters_(EndKeyMaxComparator(icmp)) {}
+
+bool ReverseRangeDelIterator::ShouldDelete(const ParsedInternalKey& parsed) {
+  // Move active iterators that start after parsed.
+  while (!active_iters_.empty() &&
+         icmp_->Compare(parsed, (*active_iters_.top())->start_key()) < 0) {
+    TruncatedRangeDelIterator* iter = PopActiveIter();
+    do {
+      iter->Prev();
+    } while (iter->Valid() && icmp_->Compare(parsed, iter->start_key()) < 0);
+    PushIter(iter, parsed);
+    assert(active_iters_.size() == active_seqnums_.size());
+  }
+
+  // Move inactive iterators that end after parsed.
+  while (!inactive_iters_.empty() &&
+         icmp_->Compare(parsed, inactive_iters_.top()->end_key()) < 0) {
+    TruncatedRangeDelIterator* iter = PopInactiveIter();
+    while (iter->Valid() && icmp_->Compare(parsed, iter->start_key()) < 0) {
+      iter->Prev();
+    }
+    PushIter(iter, parsed);
+    assert(active_iters_.size() == active_seqnums_.size());
+  }
+
+  return active_seqnums_.empty()
+             ? false
+             : (*active_seqnums_.begin())->seq() > parsed.sequence;
+}
+
+void ReverseRangeDelIterator::Invalidate() {
+  unused_idx_ = 0;
+  active_iters_.clear();
+  active_seqnums_.clear();
+  inactive_iters_.clear();
+}
+
+bool RangeDelAggregator::StripeRep::ShouldDelete(
+    const ParsedInternalKey& parsed, RangeDelPositioningMode mode) {
+  if (!InStripe(parsed.sequence) || IsEmpty()) {
+    return false;
+  }
+  switch (mode) {
+    case RangeDelPositioningMode::kForwardTraversal:
+      InvalidateReverseIter();
+
+      // Pick up previously unseen iterators.
+      for (auto it = std::next(iters_.begin(), forward_iter_.UnusedIdx());
+           it != iters_.end(); ++it, forward_iter_.IncUnusedIdx()) {
+        auto& iter = *it;
+        forward_iter_.AddNewIter(iter.get(), parsed);
+      }
+
+      return forward_iter_.ShouldDelete(parsed);
+    case RangeDelPositioningMode::kBackwardTraversal:
+      InvalidateForwardIter();
+
+      // Pick up previously unseen iterators.
+      for (auto it = std::next(iters_.begin(), reverse_iter_.UnusedIdx());
+           it != iters_.end(); ++it, reverse_iter_.IncUnusedIdx()) {
+        auto& iter = *it;
+        reverse_iter_.AddNewIter(iter.get(), parsed);
+      }
+
+      return reverse_iter_.ShouldDelete(parsed);
+    default:
+      assert(false);
+      return false;
+  }
+}
+
+bool RangeDelAggregator::StripeRep::IsRangeOverlapped(const Slice& start,
+                                                      const Slice& end) {
+  Invalidate();
+
+  // Set the internal start/end keys so that:
+  // - if start_ikey has the same user key and sequence number as the
+  // current end key, start_ikey will be considered greater; and
+  // - if end_ikey has the same user key and sequence number as the current
+  // start key, end_ikey will be considered greater.
+  ParsedInternalKey start_ikey(start, kMaxSequenceNumber,
+                               static_cast<ValueType>(0));
+  ParsedInternalKey end_ikey(end, 0, static_cast<ValueType>(0));
+  for (auto& iter : iters_) {
+    bool checked_candidate_tombstones = false;
+    for (iter->SeekForPrev(start);
+         iter->Valid() && icmp_->Compare(iter->start_key(), end_ikey) <= 0;
+         iter->Next()) {
+      checked_candidate_tombstones = true;
+      if (icmp_->Compare(start_ikey, iter->end_key()) < 0 &&
+          icmp_->Compare(iter->start_key(), end_ikey) <= 0) {
+        return true;
+      }
+    }
+
+    if (!checked_candidate_tombstones) {
+      // Do an additional check for when the end of the range is the begin
+      // key of a tombstone, which we missed earlier since SeekForPrev'ing
+      // to the start was invalid.
+      iter->SeekForPrev(end);
+      if (iter->Valid() && icmp_->Compare(start_ikey, iter->end_key()) < 0 &&
+          icmp_->Compare(iter->start_key(), end_ikey) <= 0) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+void ReadRangeDelAggregator::AddTombstones(
+    std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
+    const InternalKey* smallest, const InternalKey* largest) {
+  if (input_iter == nullptr || input_iter->empty()) {
+    return;
+  }
+  rep_.AddTombstones(std::make_unique<TruncatedRangeDelIterator>(
+      std::move(input_iter), icmp_, smallest, largest));
+}
+
+bool ReadRangeDelAggregator::ShouldDeleteImpl(const ParsedInternalKey& parsed,
+                                              RangeDelPositioningMode mode) {
+  return rep_.ShouldDelete(parsed, mode);
+}
+
+bool ReadRangeDelAggregator::IsRangeOverlapped(const Slice& start,
+                                               const Slice& end) {
+  InvalidateRangeDelMapPositions();
+  return rep_.IsRangeOverlapped(start, end);
+}
+
+void CompactionRangeDelAggregator::AddTombstones(
+    std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
+    const InternalKey* smallest, const InternalKey* largest) {
+  if (input_iter == nullptr || input_iter->empty()) {
+    return;
+  }
+  // This bounds output of CompactionRangeDelAggregator::NewIterator.
+  if (!trim_ts_.empty()) {
+    assert(icmp_->user_comparator()->timestamp_size() > 0);
+    input_iter->SetTimestampUpperBound(&trim_ts_);
+  }
+
+  assert(input_iter->lower_bound() == 0);
+  assert(input_iter->upper_bound() == kMaxSequenceNumber);
+  parent_iters_.emplace_back(new TruncatedRangeDelIterator(
+      std::move(input_iter), icmp_, smallest, largest));
+
+  Slice* ts_upper_bound = nullptr;
+  if (!ts_upper_bound_.empty()) {
+    assert(icmp_->user_comparator()->timestamp_size() > 0);
+    ts_upper_bound = &ts_upper_bound_;
+  }
+  auto split_iters = parent_iters_.back()->SplitBySnapshot(*snapshots_);
+  for (auto& split_iter : split_iters) {
+    auto it = reps_.find(split_iter.first);
+    if (it == reps_.end()) {
+      bool inserted;
+      SequenceNumber upper_bound = split_iter.second->upper_bound();
+      SequenceNumber lower_bound = split_iter.second->lower_bound();
+      std::tie(it, inserted) = reps_.emplace(
+          split_iter.first, StripeRep(icmp_, upper_bound, lower_bound));
+      assert(inserted);
+    }
+    assert(it != reps_.end());
+    // ts_upper_bound is used to bound ShouldDelete() to only consider
+    // range tombstones under full_history_ts_low_ and trim_ts_. Keys covered by
+    // range tombstones that are above full_history_ts_low_ should not be
+    // dropped prematurely: user may read with a timestamp between the range
+    // tombstone and the covered key. Note that we cannot set timestamp
+    // upperbound on the original `input_iter` since `input_iter`s are later
+    // used in CompactionRangeDelAggregator::NewIterator to output range
+    // tombstones for persistence. We do not want to only persist range
+    // tombstones with timestamp lower than ts_upper_bound.
+    split_iter.second->SetTimestampUpperBound(ts_upper_bound);
+    it->second.AddTombstones(std::move(split_iter.second));
+  }
+}
+
+bool CompactionRangeDelAggregator::ShouldDelete(const ParsedInternalKey& parsed,
+                                                RangeDelPositioningMode mode) {
+  auto it = reps_.lower_bound(parsed.sequence);
+  if (it == reps_.end()) {
+    return false;
+  }
+  return it->second.ShouldDelete(parsed, mode);
+}
+
+namespace {
+
+// Produce a sorted (by start internal key) stream of range tombstones from
+// `children`. lower_bound and upper_bound on internal key can be
+// optionally specified. Range tombstones that ends before lower_bound or starts
+// after upper_bound are excluded.
+// If user-defined timestamp is enabled, lower_bound and upper_bound should
+// contain timestamp.
+class TruncatedRangeDelMergingIter : public InternalIterator {
+ public:
+  TruncatedRangeDelMergingIter(
+      const InternalKeyComparator* icmp, const Slice* lower_bound,
+      const Slice* upper_bound,
+      const std::vector<std::unique_ptr<TruncatedRangeDelIterator>>& children)
+      : icmp_(icmp),
+        lower_bound_(lower_bound),
+        upper_bound_(upper_bound),
+        heap_(StartKeyMinComparator(icmp)),
+        ts_sz_(icmp_->user_comparator()->timestamp_size()) {
+    for (auto& child : children) {
+      if (child != nullptr) {
+        assert(child->lower_bound() == 0);
+        assert(child->upper_bound() == kMaxSequenceNumber);
+        children_.push_back(child.get());
+      }
+    }
+  }
+
+  bool Valid() const override {
+    return !heap_.empty() && !AfterEndKey(heap_.top());
+  }
+  Status status() const override { return Status::OK(); }
+
+  void SeekToFirst() override {
+    heap_.clear();
+    for (auto& child : children_) {
+      if (lower_bound_ != nullptr) {
+        child->Seek(ExtractUserKey(*lower_bound_));
+        // Since the above `Seek()` operates on a user key while `lower_bound_`
+        // is an internal key, we may need to advance `child` farther for it to
+        // be in bounds.
+        while (child->Valid() && BeforeStartKey(child)) {
+          child->InternalNext();
+        }
+      } else {
+        child->SeekToFirst();
+      }
+      if (child->Valid()) {
+        heap_.push(child);
+      }
+    }
+  }
+
+  void Next() override {
+    auto* top = heap_.top();
+    top->InternalNext();
+    if (top->Valid()) {
+      heap_.replace_top(top);
+    } else {
+      heap_.pop();
+    }
+  }
+
+  Slice key() const override {
+    auto* top = heap_.top();
+    if (ts_sz_) {
+      cur_start_key_.Set(top->start_key().user_key, top->seq(),
+                         kTypeRangeDeletion, top->timestamp());
+    } else {
+      cur_start_key_.Set(top->start_key().user_key, top->seq(),
+                         kTypeRangeDeletion);
+    }
+    assert(top->start_key().user_key.size() >= ts_sz_);
+    return cur_start_key_.Encode();
+  }
+
+  Slice value() const override {
+    auto* top = heap_.top();
+    if (!ts_sz_) {
+      return top->end_key().user_key;
+    }
+    assert(top->timestamp().size() == ts_sz_);
+    cur_end_key_.clear();
+    cur_end_key_.append(top->end_key().user_key.data(),
+                        top->end_key().user_key.size() - ts_sz_);
+    cur_end_key_.append(top->timestamp().data(), ts_sz_);
+    return cur_end_key_;
+  }
+
+  // Unused InternalIterator methods
+  void Prev() override { assert(false); }
+  void Seek(const Slice& /* target */) override { assert(false); }
+  void SeekForPrev(const Slice& /* target */) override { assert(false); }
+  void SeekToLast() override { assert(false); }
+
+ private:
+  bool BeforeStartKey(const TruncatedRangeDelIterator* iter) const {
+    if (lower_bound_ == nullptr) {
+      return false;
+    }
+    return icmp_->Compare(iter->end_key(), *lower_bound_) <= 0;
+  }
+
+  bool AfterEndKey(const TruncatedRangeDelIterator* iter) const {
+    if (upper_bound_ == nullptr) {
+      return false;
+    }
+    return icmp_->Compare(iter->start_key(), *upper_bound_) > 0;
+  }
+
+  const InternalKeyComparator* icmp_;
+  const Slice* lower_bound_;
+  const Slice* upper_bound_;
+  BinaryHeap<TruncatedRangeDelIterator*, StartKeyMinComparator> heap_;
+  std::vector<TruncatedRangeDelIterator*> children_;
+
+  mutable InternalKey cur_start_key_;
+  mutable std::string cur_end_key_;
+  size_t ts_sz_;
+};
+
+}  // anonymous namespace
+
+std::unique_ptr<FragmentedRangeTombstoneIterator>
+CompactionRangeDelAggregator::NewIterator(const Slice* lower_bound,
+                                          const Slice* upper_bound) {
+  InvalidateRangeDelMapPositions();
+  auto merging_iter = std::make_unique<TruncatedRangeDelMergingIter>(
+      icmp_, lower_bound, upper_bound, parent_iters_);
+
+  auto fragmented_tombstone_list =
+      std::make_shared<FragmentedRangeTombstoneList>(
+          std::move(merging_iter), *icmp_, true /* for_compaction */,
+          *snapshots_);
+
+  return std::make_unique<FragmentedRangeTombstoneIterator>(
+      fragmented_tombstone_list, *icmp_, kMaxSequenceNumber /* upper_bound */);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_del_aggregator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_del_aggregator.h
new file mode 100644
index 0000000000..dc1e730389
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_del_aggregator.h
@@ -0,0 +1,478 @@
+//  Copyright (c) 2018-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <algorithm>
+#include <iterator>
+#include <list>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "db/compaction/compaction_iteration_stats.h"
+#include "db/dbformat.h"
+#include "db/pinned_iterators_manager.h"
+#include "db/range_del_aggregator.h"
+#include "db/range_tombstone_fragmenter.h"
+#include "db/version_edit.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/types.h"
+#include "table/internal_iterator.h"
+#include "table/scoped_arena_iterator.h"
+#include "table/table_builder.h"
+#include "util/heap.h"
+#include "util/kv_map.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class TruncatedRangeDelIterator {
+ public:
+  TruncatedRangeDelIterator(
+      std::unique_ptr<FragmentedRangeTombstoneIterator> iter,
+      const InternalKeyComparator* icmp, const InternalKey* smallest,
+      const InternalKey* largest);
+
+  bool Valid() const;
+
+  void Next() { iter_->TopNext(); }
+  void Prev() { iter_->TopPrev(); }
+
+  void InternalNext() { iter_->Next(); }
+
+  // Seeks to the tombstone with the highest visible sequence number that covers
+  // target (a user key). If no such tombstone exists, the position will be at
+  // the earliest tombstone that ends after target.
+  // REQUIRES: target is a user key.
+  void Seek(const Slice& target);
+
+  // Seeks to the first range tombstone with end_key() > target.
+  void SeekInternalKey(const Slice& target);
+
+  // Seeks to the tombstone with the highest visible sequence number that covers
+  // target (a user key). If no such tombstone exists, the position will be at
+  // the latest tombstone that starts before target.
+  void SeekForPrev(const Slice& target);
+
+  void SeekToFirst();
+  void SeekToLast();
+
+  ParsedInternalKey start_key() const {
+    return (smallest_ == nullptr ||
+            icmp_->Compare(*smallest_, iter_->parsed_start_key()) <= 0)
+               ? iter_->parsed_start_key()
+               : *smallest_;
+  }
+
+  ParsedInternalKey end_key() const {
+    return (largest_ == nullptr ||
+            icmp_->Compare(iter_->parsed_end_key(), *largest_) <= 0)
+               ? iter_->parsed_end_key()
+               : *largest_;
+  }
+
+  SequenceNumber seq() const { return iter_->seq(); }
+  Slice timestamp() const {
+    assert(icmp_->user_comparator()->timestamp_size());
+    return iter_->timestamp();
+  }
+  void SetTimestampUpperBound(const Slice* ts_upper_bound) {
+    iter_->SetTimestampUpperBound(ts_upper_bound);
+  }
+
+  std::map<SequenceNumber, std::unique_ptr<TruncatedRangeDelIterator>>
+  SplitBySnapshot(const std::vector<SequenceNumber>& snapshots);
+
+  SequenceNumber upper_bound() const { return iter_->upper_bound(); }
+
+  SequenceNumber lower_bound() const { return iter_->lower_bound(); }
+
+ private:
+  std::unique_ptr<FragmentedRangeTombstoneIterator> iter_;
+  const InternalKeyComparator* icmp_;
+  const ParsedInternalKey* smallest_ = nullptr;
+  const ParsedInternalKey* largest_ = nullptr;
+  std::list<ParsedInternalKey> pinned_bounds_;
+
+  const InternalKey* smallest_ikey_;
+  const InternalKey* largest_ikey_;
+};
+
+struct SeqMaxComparator {
+  bool operator()(const TruncatedRangeDelIterator* a,
+                  const TruncatedRangeDelIterator* b) const {
+    return a->seq() > b->seq();
+  }
+};
+
+struct StartKeyMinComparator {
+  explicit StartKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {}
+
+  bool operator()(const TruncatedRangeDelIterator* a,
+                  const TruncatedRangeDelIterator* b) const {
+    return icmp->Compare(a->start_key(), b->start_key()) > 0;
+  }
+
+  const InternalKeyComparator* icmp;
+};
+
+class ForwardRangeDelIterator {
+ public:
+  explicit ForwardRangeDelIterator(const InternalKeyComparator* icmp);
+
+  bool ShouldDelete(const ParsedInternalKey& parsed);
+  void Invalidate();
+
+  void AddNewIter(TruncatedRangeDelIterator* iter,
+                  const ParsedInternalKey& parsed) {
+    iter->Seek(parsed.user_key);
+    PushIter(iter, parsed);
+    assert(active_iters_.size() == active_seqnums_.size());
+  }
+
+  size_t UnusedIdx() const { return unused_idx_; }
+  void IncUnusedIdx() { unused_idx_++; }
+
+ private:
+  using ActiveSeqSet =
+      std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>;
+
+  struct EndKeyMinComparator {
+    explicit EndKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {}
+
+    bool operator()(const ActiveSeqSet::const_iterator& a,
+                    const ActiveSeqSet::const_iterator& b) const {
+      return icmp->Compare((*a)->end_key(), (*b)->end_key()) > 0;
+    }
+
+    const InternalKeyComparator* icmp;
+  };
+
+  void PushIter(TruncatedRangeDelIterator* iter,
+                const ParsedInternalKey& parsed) {
+    if (!iter->Valid()) {
+      // The iterator has been fully consumed, so we don't need to add it to
+      // either of the heaps.
+      return;
+    }
+    int cmp = icmp_->Compare(parsed, iter->start_key());
+    if (cmp < 0) {
+      PushInactiveIter(iter);
+    } else {
+      PushActiveIter(iter);
+    }
+  }
+
+  void PushActiveIter(TruncatedRangeDelIterator* iter) {
+    auto seq_pos = active_seqnums_.insert(iter);
+    active_iters_.push(seq_pos);
+  }
+
+  TruncatedRangeDelIterator* PopActiveIter() {
+    auto active_top = active_iters_.top();
+    auto iter = *active_top;
+    active_iters_.pop();
+    active_seqnums_.erase(active_top);
+    return iter;
+  }
+
+  void PushInactiveIter(TruncatedRangeDelIterator* iter) {
+    inactive_iters_.push(iter);
+  }
+
+  TruncatedRangeDelIterator* PopInactiveIter() {
+    auto* iter = inactive_iters_.top();
+    inactive_iters_.pop();
+    return iter;
+  }
+
+  const InternalKeyComparator* icmp_;
+  size_t unused_idx_;
+  ActiveSeqSet active_seqnums_;
+  BinaryHeap<ActiveSeqSet::const_iterator, EndKeyMinComparator> active_iters_;
+  BinaryHeap<TruncatedRangeDelIterator*, StartKeyMinComparator> inactive_iters_;
+};
+
+class ReverseRangeDelIterator {
+ public:
+  explicit ReverseRangeDelIterator(const InternalKeyComparator* icmp);
+
+  bool ShouldDelete(const ParsedInternalKey& parsed);
+  void Invalidate();
+
+  void AddNewIter(TruncatedRangeDelIterator* iter,
+                  const ParsedInternalKey& parsed) {
+    iter->SeekForPrev(parsed.user_key);
+    PushIter(iter, parsed);
+    assert(active_iters_.size() == active_seqnums_.size());
+  }
+
+  size_t UnusedIdx() const { return unused_idx_; }
+  void IncUnusedIdx() { unused_idx_++; }
+
+ private:
+  using ActiveSeqSet =
+      std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>;
+
+  struct EndKeyMaxComparator {
+    explicit EndKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {}
+
+    bool operator()(const TruncatedRangeDelIterator* a,
+                    const TruncatedRangeDelIterator* b) const {
+      return icmp->Compare(a->end_key(), b->end_key()) < 0;
+    }
+
+    const InternalKeyComparator* icmp;
+  };
+  struct StartKeyMaxComparator {
+    explicit StartKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {}
+
+    bool operator()(const ActiveSeqSet::const_iterator& a,
+                    const ActiveSeqSet::const_iterator& b) const {
+      return icmp->Compare((*a)->start_key(), (*b)->start_key()) < 0;
+    }
+
+    const InternalKeyComparator* icmp;
+  };
+
+  void PushIter(TruncatedRangeDelIterator* iter,
+                const ParsedInternalKey& parsed) {
+    if (!iter->Valid()) {
+      // The iterator has been fully consumed, so we don't need to add it to
+      // either of the heaps.
+    } else if (icmp_->Compare(iter->end_key(), parsed) <= 0) {
+      PushInactiveIter(iter);
+    } else {
+      PushActiveIter(iter);
+    }
+  }
+
+  void PushActiveIter(TruncatedRangeDelIterator* iter) {
+    auto seq_pos = active_seqnums_.insert(iter);
+    active_iters_.push(seq_pos);
+  }
+
+  TruncatedRangeDelIterator* PopActiveIter() {
+    auto active_top = active_iters_.top();
+    auto iter = *active_top;
+    active_iters_.pop();
+    active_seqnums_.erase(active_top);
+    return iter;
+  }
+
+  void PushInactiveIter(TruncatedRangeDelIterator* iter) {
+    inactive_iters_.push(iter);
+  }
+
+  TruncatedRangeDelIterator* PopInactiveIter() {
+    auto* iter = inactive_iters_.top();
+    inactive_iters_.pop();
+    return iter;
+  }
+
+  const InternalKeyComparator* icmp_;
+  size_t unused_idx_;
+  ActiveSeqSet active_seqnums_;
+  BinaryHeap<ActiveSeqSet::const_iterator, StartKeyMaxComparator> active_iters_;
+  BinaryHeap<TruncatedRangeDelIterator*, EndKeyMaxComparator> inactive_iters_;
+};
+
+enum class RangeDelPositioningMode { kForwardTraversal, kBackwardTraversal };
+class RangeDelAggregator {
+ public:
+  explicit RangeDelAggregator(const InternalKeyComparator* icmp)
+      : icmp_(icmp) {}
+  virtual ~RangeDelAggregator() {}
+
+  virtual void AddTombstones(
+      std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
+      const InternalKey* smallest = nullptr,
+      const InternalKey* largest = nullptr) = 0;
+
+  bool ShouldDelete(const Slice& ikey, RangeDelPositioningMode mode) {
+    ParsedInternalKey parsed;
+
+    Status pik_status =
+        ParseInternalKey(ikey, &parsed, false /* log_err_key */);  // TODO
+    assert(pik_status.ok());
+    if (!pik_status.ok()) {
+      return false;
+    }
+
+    return ShouldDelete(parsed, mode);
+  }
+  virtual bool ShouldDelete(const ParsedInternalKey& parsed,
+                            RangeDelPositioningMode mode) = 0;
+
+  virtual void InvalidateRangeDelMapPositions() = 0;
+
+  virtual bool IsEmpty() const = 0;
+
+  bool AddFile(uint64_t file_number) {
+    return files_seen_.insert(file_number).second;
+  }
+
+ protected:
+  class StripeRep {
+   public:
+    StripeRep(const InternalKeyComparator* icmp, SequenceNumber upper_bound,
+              SequenceNumber lower_bound)
+        : icmp_(icmp),
+          forward_iter_(icmp),
+          reverse_iter_(icmp),
+          upper_bound_(upper_bound),
+          lower_bound_(lower_bound) {}
+
+    void AddTombstones(std::unique_ptr<TruncatedRangeDelIterator> input_iter) {
+      iters_.push_back(std::move(input_iter));
+    }
+
+    bool IsEmpty() const { return iters_.empty(); }
+
+    bool ShouldDelete(const ParsedInternalKey& parsed,
+                      RangeDelPositioningMode mode);
+
+    void Invalidate() {
+      if (!IsEmpty()) {
+        InvalidateForwardIter();
+        InvalidateReverseIter();
+      }
+    }
+
+    // If user-defined timestamp is enabled, `start` and `end` are user keys
+    // with timestamp.
+    bool IsRangeOverlapped(const Slice& start, const Slice& end);
+
+   private:
+    bool InStripe(SequenceNumber seq) const {
+      return lower_bound_ <= seq && seq <= upper_bound_;
+    }
+
+    void InvalidateForwardIter() { forward_iter_.Invalidate(); }
+
+    void InvalidateReverseIter() { reverse_iter_.Invalidate(); }
+
+    const InternalKeyComparator* icmp_;
+    std::vector<std::unique_ptr<TruncatedRangeDelIterator>> iters_;
+    ForwardRangeDelIterator forward_iter_;
+    ReverseRangeDelIterator reverse_iter_;
+    SequenceNumber upper_bound_;
+    SequenceNumber lower_bound_;
+  };
+
+  const InternalKeyComparator* icmp_;
+
+ private:
+  std::set<uint64_t> files_seen_;
+};
+
+class ReadRangeDelAggregator final : public RangeDelAggregator {
+ public:
+  ReadRangeDelAggregator(const InternalKeyComparator* icmp,
+                         SequenceNumber upper_bound)
+      : RangeDelAggregator(icmp),
+        rep_(icmp, upper_bound, 0 /* lower_bound */) {}
+  ~ReadRangeDelAggregator() override {}
+
+  using RangeDelAggregator::ShouldDelete;
+  void AddTombstones(
+      std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
+      const InternalKey* smallest = nullptr,
+      const InternalKey* largest = nullptr) override;
+
+  bool ShouldDelete(const ParsedInternalKey& parsed,
+                    RangeDelPositioningMode mode) final override {
+    if (rep_.IsEmpty()) {
+      return false;
+    }
+    return ShouldDeleteImpl(parsed, mode);
+  }
+
+  bool IsRangeOverlapped(const Slice& start, const Slice& end);
+
+  void InvalidateRangeDelMapPositions() override { rep_.Invalidate(); }
+
+  bool IsEmpty() const override { return rep_.IsEmpty(); }
+
+ private:
+  StripeRep rep_;
+
+  bool ShouldDeleteImpl(const ParsedInternalKey& parsed,
+                        RangeDelPositioningMode mode);
+};
+
+class CompactionRangeDelAggregator : public RangeDelAggregator {
+ public:
+  CompactionRangeDelAggregator(const InternalKeyComparator* icmp,
+                               const std::vector<SequenceNumber>& snapshots,
+                               const std::string* full_history_ts_low = nullptr,
+                               const std::string* trim_ts = nullptr)
+      : RangeDelAggregator(icmp), snapshots_(&snapshots) {
+    if (full_history_ts_low) {
+      ts_upper_bound_ = *full_history_ts_low;
+    }
+    if (trim_ts) {
+      trim_ts_ = *trim_ts;
+      // Range tombstone newer than `trim_ts` or `full_history_ts_low` should
+      // not be considered in ShouldDelete().
+      if (ts_upper_bound_.empty()) {
+        ts_upper_bound_ = trim_ts_;
+      } else if (!trim_ts_.empty() && icmp->user_comparator()->CompareTimestamp(
+                                          trim_ts_, ts_upper_bound_) < 0) {
+        ts_upper_bound_ = trim_ts_;
+      }
+    }
+  }
+  ~CompactionRangeDelAggregator() override {}
+
+  void AddTombstones(
+      std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
+      const InternalKey* smallest = nullptr,
+      const InternalKey* largest = nullptr) override;
+
+  using RangeDelAggregator::ShouldDelete;
+  bool ShouldDelete(const ParsedInternalKey& parsed,
+                    RangeDelPositioningMode mode) override;
+
+  bool IsRangeOverlapped(const Slice& start, const Slice& end);
+
+  void InvalidateRangeDelMapPositions() override {
+    for (auto& rep : reps_) {
+      rep.second.Invalidate();
+    }
+  }
+
+  bool IsEmpty() const override {
+    for (const auto& rep : reps_) {
+      if (!rep.second.IsEmpty()) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Creates an iterator over all the range tombstones in the aggregator, for
+  // use in compaction.
+  //
+  // NOTE: the internal key boundaries are used for optimization purposes to
+  // reduce the number of tombstones that are passed to the fragmenter; they do
+  // not guarantee that the resulting iterator only contains range tombstones
+  // that cover keys in the provided range. If required, these bounds must be
+  // enforced during iteration.
+  std::unique_ptr<FragmentedRangeTombstoneIterator> NewIterator(
+      const Slice* lower_bound = nullptr, const Slice* upper_bound = nullptr);
+
+ private:
+  std::vector<std::unique_ptr<TruncatedRangeDelIterator>> parent_iters_;
+  std::map<SequenceNumber, StripeRep> reps_;
+
+  const std::vector<SequenceNumber>* snapshots_;
+  // min over full_history_ts_low and trim_ts_
+  Slice ts_upper_bound_{};
+  Slice trim_ts_{};
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_tombstone_fragmenter.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_tombstone_fragmenter.cc
new file mode 100644
index 0000000000..7e7cedeca4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_tombstone_fragmenter.cc
@@ -0,0 +1,502 @@
+//  Copyright (c) 2018-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/range_tombstone_fragmenter.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <cstdio>
+#include <functional>
+#include <set>
+
+#include "util/autovector.h"
+#include "util/kv_map.h"
+#include "util/vector_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+FragmentedRangeTombstoneList::FragmentedRangeTombstoneList(
+    std::unique_ptr<InternalIterator> unfragmented_tombstones,
+    const InternalKeyComparator& icmp, bool for_compaction,
+    const std::vector<SequenceNumber>& snapshots) {
+  if (unfragmented_tombstones == nullptr) {
+    return;
+  }
+  bool is_sorted = true;
+  InternalKey pinned_last_start_key;
+  Slice last_start_key;
+  num_unfragmented_tombstones_ = 0;
+  total_tombstone_payload_bytes_ = 0;
+  for (unfragmented_tombstones->SeekToFirst(); unfragmented_tombstones->Valid();
+       unfragmented_tombstones->Next(), num_unfragmented_tombstones_++) {
+    total_tombstone_payload_bytes_ += unfragmented_tombstones->key().size() +
+                                      unfragmented_tombstones->value().size();
+    if (num_unfragmented_tombstones_ > 0 &&
+        icmp.Compare(last_start_key, unfragmented_tombstones->key()) > 0) {
+      is_sorted = false;
+      break;
+    }
+    if (unfragmented_tombstones->IsKeyPinned()) {
+      last_start_key = unfragmented_tombstones->key();
+    } else {
+      pinned_last_start_key.DecodeFrom(unfragmented_tombstones->key());
+      last_start_key = pinned_last_start_key.Encode();
+    }
+  }
+  if (is_sorted) {
+    FragmentTombstones(std::move(unfragmented_tombstones), icmp, for_compaction,
+                       snapshots);
+    return;
+  }
+
+  // Sort the tombstones before fragmenting them.
+  std::vector<std::string> keys, values;
+  keys.reserve(num_unfragmented_tombstones_);
+  values.reserve(num_unfragmented_tombstones_);
+  // Reset the counter to zero for the next iteration over keys.
+  total_tombstone_payload_bytes_ = 0;
+  for (unfragmented_tombstones->SeekToFirst(); unfragmented_tombstones->Valid();
+       unfragmented_tombstones->Next()) {
+    total_tombstone_payload_bytes_ += unfragmented_tombstones->key().size() +
+                                      unfragmented_tombstones->value().size();
+    keys.emplace_back(unfragmented_tombstones->key().data(),
+                      unfragmented_tombstones->key().size());
+    values.emplace_back(unfragmented_tombstones->value().data(),
+                        unfragmented_tombstones->value().size());
+  }
+  // VectorIterator implicitly sorts by key during construction.
+  auto iter = std::make_unique<VectorIterator>(std::move(keys),
+                                               std::move(values), &icmp);
+  FragmentTombstones(std::move(iter), icmp, for_compaction, snapshots);
+}
+
+void FragmentedRangeTombstoneList::FragmentTombstones(
+    std::unique_ptr<InternalIterator> unfragmented_tombstones,
+    const InternalKeyComparator& icmp, bool for_compaction,
+    const std::vector<SequenceNumber>& snapshots) {
+  Slice cur_start_key(nullptr, 0);
+  auto cmp = ParsedInternalKeyComparator(&icmp);
+
+  // Stores the end keys and sequence numbers of range tombstones with a start
+  // key less than or equal to cur_start_key. Provides an ordering by end key
+  // for use in flush_current_tombstones.
+  std::set<ParsedInternalKey, ParsedInternalKeyComparator> cur_end_keys(cmp);
+
+  size_t ts_sz = icmp.user_comparator()->timestamp_size();
+  // Given the next start key in unfragmented_tombstones,
+  // flush_current_tombstones writes every tombstone fragment that starts
+  // and ends with a key before next_start_key, and starts with a key greater
+  // than or equal to cur_start_key.
+  auto flush_current_tombstones = [&](const Slice& next_start_key) {
+    auto it = cur_end_keys.begin();
+    bool reached_next_start_key = false;
+    for (; it != cur_end_keys.end() && !reached_next_start_key; ++it) {
+      Slice cur_end_key = it->user_key;
+      if (icmp.user_comparator()->CompareWithoutTimestamp(cur_start_key,
+                                                          cur_end_key) == 0) {
+        // Empty tombstone.
+        continue;
+      }
+      if (icmp.user_comparator()->CompareWithoutTimestamp(next_start_key,
+                                                          cur_end_key) <= 0) {
+        // All the end keys in [it, cur_end_keys.end()) are after
+        // next_start_key, so the tombstones they represent can be used in
+        // fragments that start with keys greater than or equal to
+        // next_start_key. However, the end keys we already passed will not be
+        // used in any more tombstone fragments.
+        //
+        // Remove the fully fragmented tombstones and stop iteration after a
+        // final round of flushing to preserve the tombstones we can create more
+        // fragments from.
+        reached_next_start_key = true;
+        cur_end_keys.erase(cur_end_keys.begin(), it);
+        cur_end_key = next_start_key;
+      }
+
+      // Flush a range tombstone fragment [cur_start_key, cur_end_key), which
+      // should not overlap with the last-flushed tombstone fragment.
+      assert(tombstones_.empty() ||
+             icmp.user_comparator()->CompareWithoutTimestamp(
+                 tombstones_.back().end_key, cur_start_key) <= 0);
+
+      // Sort the sequence numbers of the tombstones being fragmented in
+      // descending order, and then flush them in that order.
+      autovector<SequenceNumber> seqnums_to_flush;
+      autovector<Slice> timestamps_to_flush;
+      for (auto flush_it = it; flush_it != cur_end_keys.end(); ++flush_it) {
+        seqnums_to_flush.push_back(flush_it->sequence);
+        if (ts_sz) {
+          timestamps_to_flush.push_back(
+              ExtractTimestampFromUserKey(flush_it->user_key, ts_sz));
+        }
+      }
+      // TODO: bind the two sorting together to be more efficient
+      std::sort(seqnums_to_flush.begin(), seqnums_to_flush.end(),
+                std::greater<SequenceNumber>());
+      if (ts_sz) {
+        std::sort(timestamps_to_flush.begin(), timestamps_to_flush.end(),
+                  [icmp](const Slice& ts1, const Slice& ts2) {
+                    return icmp.user_comparator()->CompareTimestamp(ts1, ts2) >
+                           0;
+                  });
+      }
+
+      size_t start_idx = tombstone_seqs_.size();
+      size_t end_idx = start_idx + seqnums_to_flush.size();
+
+      // If user-defined timestamp is enabled, we should not drop tombstones
+      // from any snapshot stripe. Garbage collection of range tombstones
+      // happens in CompactionOutputs::AddRangeDels().
+      if (for_compaction && ts_sz == 0) {
+        // Drop all tombstone seqnums that are not preserved by a snapshot.
+        SequenceNumber next_snapshot = kMaxSequenceNumber;
+        for (auto seq : seqnums_to_flush) {
+          if (seq <= next_snapshot) {
+            // This seqnum is visible by a lower snapshot.
+            tombstone_seqs_.push_back(seq);
+            auto upper_bound_it =
+                std::lower_bound(snapshots.begin(), snapshots.end(), seq);
+            if (upper_bound_it == snapshots.begin()) {
+              // This seqnum is the topmost one visible by the earliest
+              // snapshot. None of the seqnums below it will be visible, so we
+              // can skip them.
+              break;
+            }
+            next_snapshot = *std::prev(upper_bound_it);
+          }
+        }
+        end_idx = tombstone_seqs_.size();
+      } else {
+        // The fragmentation is being done for reads, so preserve all seqnums.
+        tombstone_seqs_.insert(tombstone_seqs_.end(), seqnums_to_flush.begin(),
+                               seqnums_to_flush.end());
+        if (ts_sz) {
+          tombstone_timestamps_.insert(tombstone_timestamps_.end(),
+                                       timestamps_to_flush.begin(),
+                                       timestamps_to_flush.end());
+        }
+      }
+
+      assert(start_idx < end_idx);
+      if (ts_sz) {
+        std::string start_key_with_max_ts;
+        AppendUserKeyWithMaxTimestamp(&start_key_with_max_ts, cur_start_key,
+                                      ts_sz);
+        pinned_slices_.emplace_back(std::move(start_key_with_max_ts));
+        Slice start_key = pinned_slices_.back();
+
+        std::string end_key_with_max_ts;
+        AppendUserKeyWithMaxTimestamp(&end_key_with_max_ts, cur_end_key, ts_sz);
+        pinned_slices_.emplace_back(std::move(end_key_with_max_ts));
+        Slice end_key = pinned_slices_.back();
+
+        // RangeTombstoneStack expects start_key and end_key to have max
+        // timestamp.
+        tombstones_.emplace_back(start_key, end_key, start_idx, end_idx);
+      } else {
+        tombstones_.emplace_back(cur_start_key, cur_end_key, start_idx,
+                                 end_idx);
+      }
+
+      cur_start_key = cur_end_key;
+    }
+    if (!reached_next_start_key) {
+      // There is a gap between the last flushed tombstone fragment and
+      // the next tombstone's start key. Remove all the end keys in
+      // the working set, since we have fully fragmented their corresponding
+      // tombstones.
+      cur_end_keys.clear();
+    }
+    cur_start_key = next_start_key;
+  };
+
+  pinned_iters_mgr_.StartPinning();
+
+  bool no_tombstones = true;
+  for (unfragmented_tombstones->SeekToFirst(); unfragmented_tombstones->Valid();
+       unfragmented_tombstones->Next()) {
+    const Slice& ikey = unfragmented_tombstones->key();
+    Slice tombstone_start_key = ExtractUserKey(ikey);
+    SequenceNumber tombstone_seq = GetInternalKeySeqno(ikey);
+    if (!unfragmented_tombstones->IsKeyPinned()) {
+      pinned_slices_.emplace_back(tombstone_start_key.data(),
+                                  tombstone_start_key.size());
+      tombstone_start_key = pinned_slices_.back();
+    }
+    no_tombstones = false;
+
+    Slice tombstone_end_key = unfragmented_tombstones->value();
+    if (!unfragmented_tombstones->IsValuePinned()) {
+      pinned_slices_.emplace_back(tombstone_end_key.data(),
+                                  tombstone_end_key.size());
+      tombstone_end_key = pinned_slices_.back();
+    }
+    if (!cur_end_keys.empty() &&
+        icmp.user_comparator()->CompareWithoutTimestamp(
+            cur_start_key, tombstone_start_key) != 0) {
+      // The start key has changed. Flush all tombstones that start before
+      // this new start key.
+      flush_current_tombstones(tombstone_start_key);
+    }
+    cur_start_key = tombstone_start_key;
+
+    cur_end_keys.emplace(tombstone_end_key, tombstone_seq, kTypeRangeDeletion);
+  }
+  if (!cur_end_keys.empty()) {
+    ParsedInternalKey last_end_key = *std::prev(cur_end_keys.end());
+    flush_current_tombstones(last_end_key.user_key);
+  }
+
+  if (!no_tombstones) {
+    pinned_iters_mgr_.PinIterator(unfragmented_tombstones.release(),
+                                  false /* arena */);
+  }
+}
+
+bool FragmentedRangeTombstoneList::ContainsRange(SequenceNumber lower,
+                                                 SequenceNumber upper) {
+  std::call_once(seq_set_init_once_flag_, [this]() {
+    for (auto s : tombstone_seqs_) {
+      seq_set_.insert(s);
+    }
+  });
+  auto seq_it = seq_set_.lower_bound(lower);
+  return seq_it != seq_set_.end() && *seq_it <= upper;
+}
+
+FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator(
+    FragmentedRangeTombstoneList* tombstones, const InternalKeyComparator& icmp,
+    SequenceNumber _upper_bound, const Slice* ts_upper_bound,
+    SequenceNumber _lower_bound)
+    : tombstone_start_cmp_(icmp.user_comparator()),
+      tombstone_end_cmp_(icmp.user_comparator()),
+      icmp_(&icmp),
+      ucmp_(icmp.user_comparator()),
+      tombstones_(tombstones),
+      upper_bound_(_upper_bound),
+      lower_bound_(_lower_bound),
+      ts_upper_bound_(ts_upper_bound) {
+  assert(tombstones_ != nullptr);
+  Invalidate();
+}
+
+FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator(
+    const std::shared_ptr<FragmentedRangeTombstoneList>& tombstones,
+    const InternalKeyComparator& icmp, SequenceNumber _upper_bound,
+    const Slice* ts_upper_bound, SequenceNumber _lower_bound)
+    : tombstone_start_cmp_(icmp.user_comparator()),
+      tombstone_end_cmp_(icmp.user_comparator()),
+      icmp_(&icmp),
+      ucmp_(icmp.user_comparator()),
+      tombstones_ref_(tombstones),
+      tombstones_(tombstones_ref_.get()),
+      upper_bound_(_upper_bound),
+      lower_bound_(_lower_bound),
+      ts_upper_bound_(ts_upper_bound) {
+  assert(tombstones_ != nullptr);
+  Invalidate();
+}
+
+FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator(
+    const std::shared_ptr<FragmentedRangeTombstoneListCache>& tombstones_cache,
+    const InternalKeyComparator& icmp, SequenceNumber _upper_bound,
+    const Slice* ts_upper_bound, SequenceNumber _lower_bound)
+    : tombstone_start_cmp_(icmp.user_comparator()),
+      tombstone_end_cmp_(icmp.user_comparator()),
+      icmp_(&icmp),
+      ucmp_(icmp.user_comparator()),
+      tombstones_cache_ref_(tombstones_cache),
+      tombstones_(tombstones_cache_ref_->tombstones.get()),
+      upper_bound_(_upper_bound),
+      lower_bound_(_lower_bound) {
+  assert(tombstones_ != nullptr);
+  if (!ts_upper_bound || ts_upper_bound->empty()) {
+    ts_upper_bound_ = nullptr;
+  } else {
+    ts_upper_bound_ = ts_upper_bound;
+  }
+  Invalidate();
+}
+
+void FragmentedRangeTombstoneIterator::SeekToFirst() {
+  pos_ = tombstones_->begin();
+  seq_pos_ = tombstones_->seq_begin();
+}
+
+void FragmentedRangeTombstoneIterator::SeekToTopFirst() {
+  if (tombstones_->empty()) {
+    Invalidate();
+    return;
+  }
+  pos_ = tombstones_->begin();
+  SetMaxVisibleSeqAndTimestamp();
+  ScanForwardToVisibleTombstone();
+}
+
+void FragmentedRangeTombstoneIterator::SeekToLast() {
+  pos_ = std::prev(tombstones_->end());
+  seq_pos_ = std::prev(tombstones_->seq_end());
+}
+
+void FragmentedRangeTombstoneIterator::SeekToTopLast() {
+  if (tombstones_->empty()) {
+    Invalidate();
+    return;
+  }
+  pos_ = std::prev(tombstones_->end());
+  SetMaxVisibleSeqAndTimestamp();
+  ScanBackwardToVisibleTombstone();
+}
+
+// @param `target` is a user key, with timestamp if user-defined timestamp is
+// enabled.
+void FragmentedRangeTombstoneIterator::Seek(const Slice& target) {
+  if (tombstones_->empty()) {
+    Invalidate();
+    return;
+  }
+  SeekToCoveringTombstone(target);
+  ScanForwardToVisibleTombstone();
+}
+
+void FragmentedRangeTombstoneIterator::SeekForPrev(const Slice& target) {
+  if (tombstones_->empty()) {
+    Invalidate();
+    return;
+  }
+  SeekForPrevToCoveringTombstone(target);
+  ScanBackwardToVisibleTombstone();
+}
+
+void FragmentedRangeTombstoneIterator::SeekToCoveringTombstone(
+    const Slice& target) {
+  pos_ = std::upper_bound(tombstones_->begin(), tombstones_->end(), target,
+                          tombstone_end_cmp_);
+  if (pos_ == tombstones_->end()) {
+    // All tombstones end before target.
+    seq_pos_ = tombstones_->seq_end();
+    return;
+  }
+  SetMaxVisibleSeqAndTimestamp();
+}
+
+void FragmentedRangeTombstoneIterator::SeekForPrevToCoveringTombstone(
+    const Slice& target) {
+  if (tombstones_->empty()) {
+    Invalidate();
+    return;
+  }
+  pos_ = std::upper_bound(tombstones_->begin(), tombstones_->end(), target,
+                          tombstone_start_cmp_);
+  if (pos_ == tombstones_->begin()) {
+    // All tombstones start after target.
+    Invalidate();
+    return;
+  }
+  --pos_;
+  SetMaxVisibleSeqAndTimestamp();
+}
+
+void FragmentedRangeTombstoneIterator::ScanForwardToVisibleTombstone() {
+  while (pos_ != tombstones_->end() &&
+         (seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx) ||
+          *seq_pos_ < lower_bound_)) {
+    ++pos_;
+    if (pos_ == tombstones_->end()) {
+      Invalidate();
+      return;
+    }
+    SetMaxVisibleSeqAndTimestamp();
+  }
+}
+
+void FragmentedRangeTombstoneIterator::ScanBackwardToVisibleTombstone() {
+  while (pos_ != tombstones_->end() &&
+         (seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx) ||
+          *seq_pos_ < lower_bound_)) {
+    if (pos_ == tombstones_->begin()) {
+      Invalidate();
+      return;
+    }
+    --pos_;
+    SetMaxVisibleSeqAndTimestamp();
+  }
+}
+
+void FragmentedRangeTombstoneIterator::Next() {
+  ++seq_pos_;
+  if (seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx)) {
+    ++pos_;
+  }
+}
+
+void FragmentedRangeTombstoneIterator::TopNext() {
+  ++pos_;
+  if (pos_ == tombstones_->end()) {
+    return;
+  }
+  SetMaxVisibleSeqAndTimestamp();
+  ScanForwardToVisibleTombstone();
+}
+
+void FragmentedRangeTombstoneIterator::Prev() {
+  if (seq_pos_ == tombstones_->seq_begin()) {
+    Invalidate();
+    return;
+  }
+  --seq_pos_;
+  if (pos_ == tombstones_->end() ||
+      seq_pos_ == tombstones_->seq_iter(pos_->seq_start_idx - 1)) {
+    --pos_;
+  }
+}
+
+void FragmentedRangeTombstoneIterator::TopPrev() {
+  if (pos_ == tombstones_->begin()) {
+    Invalidate();
+    return;
+  }
+  --pos_;
+  SetMaxVisibleSeqAndTimestamp();
+  ScanBackwardToVisibleTombstone();
+}
+
+bool FragmentedRangeTombstoneIterator::Valid() const {
+  return tombstones_ != nullptr && pos_ != tombstones_->end();
+}
+
+SequenceNumber FragmentedRangeTombstoneIterator::MaxCoveringTombstoneSeqnum(
+    const Slice& target_user_key) {
+  SeekToCoveringTombstone(target_user_key);
+  return ValidPos() && ucmp_->CompareWithoutTimestamp(start_key(),
+                                                      target_user_key) <= 0
+             ? seq()
+             : 0;
+}
+
+std::map<SequenceNumber, std::unique_ptr<FragmentedRangeTombstoneIterator>>
+FragmentedRangeTombstoneIterator::SplitBySnapshot(
+    const std::vector<SequenceNumber>& snapshots) {
+  std::map<SequenceNumber, std::unique_ptr<FragmentedRangeTombstoneIterator>>
+      splits;
+  SequenceNumber lower = 0;
+  SequenceNumber upper;
+  for (size_t i = 0; i <= snapshots.size(); i++) {
+    if (i >= snapshots.size()) {
+      upper = kMaxSequenceNumber;
+    } else {
+      upper = snapshots[i];
+    }
+    if (tombstones_->ContainsRange(lower, upper)) {
+      splits.emplace(upper,
+                     std::make_unique<FragmentedRangeTombstoneIterator>(
+                         tombstones_, *icmp_, upper, ts_upper_bound_, lower));
+    }
+    lower = upper + 1;
+  }
+  return splits;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_tombstone_fragmenter.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_tombstone_fragmenter.h
new file mode 100644
index 0000000000..8c7d982972
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/range_tombstone_fragmenter.h
@@ -0,0 +1,356 @@
+//  Copyright (c) 2018-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <list>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "db/dbformat.h"
+#include "db/pinned_iterators_manager.h"
+#include "rocksdb/status.h"
+#include "table/internal_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+struct FragmentedRangeTombstoneList;
+
+struct FragmentedRangeTombstoneListCache {
+  // ensure only the first reader needs to initialize l
+  std::mutex reader_mutex;
+  std::unique_ptr<FragmentedRangeTombstoneList> tombstones = nullptr;
+  // readers will first check this bool to avoid
+  std::atomic<bool> initialized = false;
+};
+
+struct FragmentedRangeTombstoneList {
+ public:
+  // A compact representation of a "stack" of range tombstone fragments, which
+  // start and end at the same user keys but have different sequence numbers.
+  // The members seq_start_idx and seq_end_idx are intended to be parameters to
+  // seq_iter().
+  // If user-defined timestamp is enabled, `start` and `end` should be user keys
+  // with timestamp, and the timestamps are set to max timestamp to be returned
+  // by parsed_start_key()/parsed_end_key(). seq_start_idx and seq_end_idx will
+  // also be used as parameters to ts_iter().
+  struct RangeTombstoneStack {
+    RangeTombstoneStack(const Slice& start, const Slice& end, size_t start_idx,
+                        size_t end_idx)
+        : start_key(start),
+          end_key(end),
+          seq_start_idx(start_idx),
+          seq_end_idx(end_idx) {}
+    Slice start_key;
+    Slice end_key;
+    size_t seq_start_idx;
+    size_t seq_end_idx;
+  };
+  // Assumes unfragmented_tombstones->key() and unfragmented_tombstones->value()
+  // both contain timestamp if enabled.
+  FragmentedRangeTombstoneList(
+      std::unique_ptr<InternalIterator> unfragmented_tombstones,
+      const InternalKeyComparator& icmp, bool for_compaction = false,
+      const std::vector<SequenceNumber>& snapshots = {});
+
+  std::vector<RangeTombstoneStack>::const_iterator begin() const {
+    return tombstones_.begin();
+  }
+
+  std::vector<RangeTombstoneStack>::const_iterator end() const {
+    return tombstones_.end();
+  }
+
+  std::vector<SequenceNumber>::const_iterator seq_iter(size_t idx) const {
+    return std::next(tombstone_seqs_.begin(), idx);
+  }
+
+  std::vector<Slice>::const_iterator ts_iter(size_t idx) const {
+    return std::next(tombstone_timestamps_.begin(), idx);
+  }
+
+  std::vector<SequenceNumber>::const_iterator seq_begin() const {
+    return tombstone_seqs_.begin();
+  }
+
+  std::vector<SequenceNumber>::const_iterator seq_end() const {
+    return tombstone_seqs_.end();
+  }
+
+  bool empty() const { return tombstones_.empty(); }
+
+  // Returns true if the stored tombstones contain with one with a sequence
+  // number in [lower, upper].
+  // This method is not const as it internally lazy initialize a set of
+  // sequence numbers (`seq_set_`).
+  bool ContainsRange(SequenceNumber lower, SequenceNumber upper);
+
+  uint64_t num_unfragmented_tombstones() const {
+    return num_unfragmented_tombstones_;
+  }
+
+  uint64_t total_tombstone_payload_bytes() const {
+    return total_tombstone_payload_bytes_;
+  }
+
+ private:
+  // Given an ordered range tombstone iterator unfragmented_tombstones,
+  // "fragment" the tombstones into non-overlapping pieces. Each
+  // "non-overlapping piece" is a RangeTombstoneStack in tombstones_, which
+  // contains start_key, end_key, and indices that points to sequence numbers
+  // (in tombstone_seqs_) and timestamps (in tombstone_timestamps_). If
+  // for_compaction is true, then `snapshots` should be provided. Range
+  // tombstone fragments are dropped if they are not visible in any snapshot and
+  // user-defined timestamp is not enabled. That is, for each snapshot stripe
+  // [lower, upper], the range tombstone fragment with largest seqno in [lower,
+  // upper] is preserved, and all the other range tombstones are dropped.
+  void FragmentTombstones(
+      std::unique_ptr<InternalIterator> unfragmented_tombstones,
+      const InternalKeyComparator& icmp, bool for_compaction,
+      const std::vector<SequenceNumber>& snapshots);
+
+  std::vector<RangeTombstoneStack> tombstones_;
+  std::vector<SequenceNumber> tombstone_seqs_;
+  std::vector<Slice> tombstone_timestamps_;
+  std::once_flag seq_set_init_once_flag_;
+  std::set<SequenceNumber> seq_set_;
+  std::list<std::string> pinned_slices_;
+  PinnedIteratorsManager pinned_iters_mgr_;
+  uint64_t num_unfragmented_tombstones_;
+  uint64_t total_tombstone_payload_bytes_;
+};
+
+// FragmentedRangeTombstoneIterator converts an InternalIterator of a range-del
+// meta block into an iterator over non-overlapping tombstone fragments. The
+// tombstone fragmentation process should be more efficient than the range
+// tombstone collapsing algorithm in RangeDelAggregator because this leverages
+// the internal key ordering already provided by the input iterator, if
+// applicable (when the iterator is unsorted, a new sorted iterator is created
+// before proceeding). If there are few overlaps, creating a
+// FragmentedRangeTombstoneIterator should be O(n), while the RangeDelAggregator
+// tombstone collapsing is always O(n log n).
+class FragmentedRangeTombstoneIterator : public InternalIterator {
+ public:
+  FragmentedRangeTombstoneIterator(FragmentedRangeTombstoneList* tombstones,
+                                   const InternalKeyComparator& icmp,
+                                   SequenceNumber upper_bound,
+                                   const Slice* ts_upper_bound = nullptr,
+                                   SequenceNumber lower_bound = 0);
+  FragmentedRangeTombstoneIterator(
+      const std::shared_ptr<FragmentedRangeTombstoneList>& tombstones,
+      const InternalKeyComparator& icmp, SequenceNumber upper_bound,
+      const Slice* ts_upper_bound = nullptr, SequenceNumber lower_bound = 0);
+  FragmentedRangeTombstoneIterator(
+      const std::shared_ptr<FragmentedRangeTombstoneListCache>& tombstones,
+      const InternalKeyComparator& icmp, SequenceNumber upper_bound,
+      const Slice* ts_upper_bound = nullptr, SequenceNumber lower_bound = 0);
+
+  void SeekToFirst() override;
+  void SeekToLast() override;
+
+  void SeekToTopFirst();
+  void SeekToTopLast();
+
+  // NOTE: Seek and SeekForPrev do not behave in the way InternalIterator
+  // seeking should behave. This is OK because they are not currently used, but
+  // eventually FragmentedRangeTombstoneIterator should no longer implement
+  // InternalIterator.
+  //
+  // Seeks to the range tombstone that covers target at a seqnum in the
+  // snapshot. If no such tombstone exists, seek to the earliest tombstone in
+  // the snapshot that ends after target.
+  void Seek(const Slice& target) override;
+  // Seeks to the range tombstone that covers target at a seqnum in the
+  // snapshot. If no such tombstone exists, seek to the latest tombstone in the
+  // snapshot that starts before target.
+  void SeekForPrev(const Slice& target) override;
+
+  void Next() override;
+  void Prev() override;
+
+  void TopNext();
+  void TopPrev();
+
+  bool Valid() const override;
+  // Note that key() and value() do not return correct timestamp.
+  // Caller should call timestamp() to get the current timestamp.
+  Slice key() const override {
+    MaybePinKey();
+    return current_start_key_.Encode();
+  }
+  Slice value() const override { return pos_->end_key; }
+  bool IsKeyPinned() const override { return false; }
+  bool IsValuePinned() const override { return true; }
+  Status status() const override { return Status::OK(); }
+
+  bool empty() const { return tombstones_->empty(); }
+  void Invalidate() {
+    pos_ = tombstones_->end();
+    seq_pos_ = tombstones_->seq_end();
+    pinned_pos_ = tombstones_->end();
+    pinned_seq_pos_ = tombstones_->seq_end();
+  }
+
+  RangeTombstone Tombstone() const {
+    assert(Valid());
+    if (icmp_->user_comparator()->timestamp_size()) {
+      return RangeTombstone(start_key(), end_key(), seq(), timestamp());
+    }
+    return RangeTombstone(start_key(), end_key(), seq());
+  }
+  // Note that start_key() and end_key() are not guaranteed to have the
+  // correct timestamp. User can call timestamp() to get the correct
+  // timestamp().
+  Slice start_key() const { return pos_->start_key; }
+  Slice end_key() const { return pos_->end_key; }
+  SequenceNumber seq() const { return *seq_pos_; }
+  Slice timestamp() const {
+    // seqno and timestamp are stored in the same order.
+    return *tombstones_->ts_iter(seq_pos_ - tombstones_->seq_begin());
+  }
+  // Current use case is by CompactionRangeDelAggregator to set
+  // full_history_ts_low_.
+  void SetTimestampUpperBound(const Slice* ts_upper_bound) {
+    ts_upper_bound_ = ts_upper_bound;
+  }
+
+  ParsedInternalKey parsed_start_key() const {
+    return ParsedInternalKey(pos_->start_key, seq(), kTypeRangeDeletion);
+  }
+  ParsedInternalKey parsed_end_key() const {
+    return ParsedInternalKey(pos_->end_key, kMaxSequenceNumber,
+                             kTypeRangeDeletion);
+  }
+
+  // Return the max sequence number of a range tombstone that covers
+  // the given user key.
+  // If there is no covering tombstone, then 0 is returned.
+  SequenceNumber MaxCoveringTombstoneSeqnum(const Slice& user_key);
+
+  // Splits the iterator into n+1 iterators (where n is the number of
+  // snapshots), each providing a view over a "stripe" of sequence numbers. The
+  // iterators are keyed by the upper bound of their ranges (the provided
+  // snapshots + kMaxSequenceNumber).
+  //
+  // NOTE: the iterators in the returned map are no longer valid if their
+  // parent iterator is deleted, since they do not modify the refcount of the
+  // underlying tombstone list. Therefore, this map should be deleted before
+  // the parent iterator.
+  std::map<SequenceNumber, std::unique_ptr<FragmentedRangeTombstoneIterator>>
+  SplitBySnapshot(const std::vector<SequenceNumber>& snapshots);
+
+  SequenceNumber upper_bound() const { return upper_bound_; }
+  SequenceNumber lower_bound() const { return lower_bound_; }
+
+  uint64_t num_unfragmented_tombstones() const {
+    return tombstones_->num_unfragmented_tombstones();
+  }
+  uint64_t total_tombstone_payload_bytes() const {
+    return tombstones_->total_tombstone_payload_bytes();
+  }
+
+ private:
+  using RangeTombstoneStack = FragmentedRangeTombstoneList::RangeTombstoneStack;
+
+  struct RangeTombstoneStackStartComparator {
+    explicit RangeTombstoneStackStartComparator(const Comparator* c) : cmp(c) {}
+
+    bool operator()(const RangeTombstoneStack& a,
+                    const RangeTombstoneStack& b) const {
+      return cmp->CompareWithoutTimestamp(a.start_key, b.start_key) < 0;
+    }
+
+    bool operator()(const RangeTombstoneStack& a, const Slice& b) const {
+      return cmp->CompareWithoutTimestamp(a.start_key, b) < 0;
+    }
+
+    bool operator()(const Slice& a, const RangeTombstoneStack& b) const {
+      return cmp->CompareWithoutTimestamp(a, b.start_key) < 0;
+    }
+
+    const Comparator* cmp;
+  };
+
+  struct RangeTombstoneStackEndComparator {
+    explicit RangeTombstoneStackEndComparator(const Comparator* c) : cmp(c) {}
+
+    bool operator()(const RangeTombstoneStack& a,
+                    const RangeTombstoneStack& b) const {
+      return cmp->CompareWithoutTimestamp(a.end_key, b.end_key) < 0;
+    }
+
+    bool operator()(const RangeTombstoneStack& a, const Slice& b) const {
+      return cmp->CompareWithoutTimestamp(a.end_key, b) < 0;
+    }
+
+    bool operator()(const Slice& a, const RangeTombstoneStack& b) const {
+      return cmp->CompareWithoutTimestamp(a, b.end_key) < 0;
+    }
+
+    const Comparator* cmp;
+  };
+
+  void MaybePinKey() const {
+    if (pos_ != tombstones_->end() && seq_pos_ != tombstones_->seq_end() &&
+        (pinned_pos_ != pos_ || pinned_seq_pos_ != seq_pos_)) {
+      current_start_key_.Set(pos_->start_key, *seq_pos_, kTypeRangeDeletion);
+      pinned_pos_ = pos_;
+      pinned_seq_pos_ = seq_pos_;
+    }
+  }
+
+  void SeekToCoveringTombstone(const Slice& key);
+  void SeekForPrevToCoveringTombstone(const Slice& key);
+  void ScanForwardToVisibleTombstone();
+  void ScanBackwardToVisibleTombstone();
+  bool ValidPos() const {
+    return Valid() && seq_pos_ != tombstones_->seq_iter(pos_->seq_end_idx);
+  }
+
+  const RangeTombstoneStackStartComparator tombstone_start_cmp_;
+  const RangeTombstoneStackEndComparator tombstone_end_cmp_;
+  const InternalKeyComparator* icmp_;
+  const Comparator* ucmp_;
+  std::shared_ptr<FragmentedRangeTombstoneList> tombstones_ref_;
+  std::shared_ptr<FragmentedRangeTombstoneListCache> tombstones_cache_ref_;
+  FragmentedRangeTombstoneList* tombstones_;
+  SequenceNumber upper_bound_;
+  SequenceNumber lower_bound_;
+  // Only consider timestamps <= ts_upper_bound_.
+  const Slice* ts_upper_bound_;
+  std::vector<RangeTombstoneStack>::const_iterator pos_;
+  std::vector<SequenceNumber>::const_iterator seq_pos_;
+  mutable std::vector<RangeTombstoneStack>::const_iterator pinned_pos_;
+  mutable std::vector<SequenceNumber>::const_iterator pinned_seq_pos_;
+  mutable InternalKey current_start_key_;
+
+  // Check the current RangeTombstoneStack `pos_` against timestamp
+  // upper bound `ts_upper_bound_` and sequence number upper bound
+  // `upper_bound_`. Update the sequence number (and timestamp) pointer
+  // `seq_pos_` to the first valid position satisfying both bounds.
+  void SetMaxVisibleSeqAndTimestamp() {
+    seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
+                                tombstones_->seq_iter(pos_->seq_end_idx),
+                                upper_bound_, std::greater<SequenceNumber>());
+    if (ts_upper_bound_ && !ts_upper_bound_->empty()) {
+      auto ts_pos = std::lower_bound(
+          tombstones_->ts_iter(pos_->seq_start_idx),
+          tombstones_->ts_iter(pos_->seq_end_idx), *ts_upper_bound_,
+          [this](const Slice& s1, const Slice& s2) {
+            return ucmp_->CompareTimestamp(s1, s2) > 0;
+          });
+      auto ts_idx = ts_pos - tombstones_->ts_iter(pos_->seq_start_idx);
+      auto seq_idx = seq_pos_ - tombstones_->seq_iter(pos_->seq_start_idx);
+      if (seq_idx < ts_idx) {
+        // seq and ts are ordered in non-increasing order. Only updates seq_pos_
+        // to a larger index for smaller sequence number and timestamp.
+        seq_pos_ = tombstones_->seq_iter(pos_->seq_start_idx + ts_idx);
+      }
+    }
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/read_callback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/read_callback.h
new file mode 100644
index 0000000000..c042352db0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/read_callback.h
@@ -0,0 +1,54 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "db/dbformat.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class ReadCallback {
+ public:
+  explicit ReadCallback(SequenceNumber last_visible_seq)
+      : max_visible_seq_(last_visible_seq) {}
+  ReadCallback(SequenceNumber last_visible_seq, SequenceNumber min_uncommitted)
+      : max_visible_seq_(last_visible_seq), min_uncommitted_(min_uncommitted) {}
+
+  virtual ~ReadCallback() {}
+
+  // Will be called to see if the seq number visible; if not it moves on to
+  // the next seq number.
+  virtual bool IsVisibleFullCheck(SequenceNumber seq) = 0;
+
+  inline bool IsVisible(SequenceNumber seq) {
+    assert(min_uncommitted_ > 0);
+    assert(min_uncommitted_ >= kMinUnCommittedSeq);
+    if (seq < min_uncommitted_) {  // handles seq == 0 as well
+      assert(seq <= max_visible_seq_);
+      return true;
+    } else if (max_visible_seq_ < seq) {
+      assert(seq != 0);
+      return false;
+    } else {
+      assert(seq != 0);  // already handled in the first if-then clause
+      return IsVisibleFullCheck(seq);
+    }
+  }
+
+  inline SequenceNumber max_visible_seq() { return max_visible_seq_; }
+
+  // Refresh to a more recent visible seq
+  virtual void Refresh(SequenceNumber seq) { max_visible_seq_ = seq; }
+
+ protected:
+  // The max visible seq, it is usually the snapshot but could be larger if
+  // transaction has its own writes written to db.
+  SequenceNumber max_visible_seq_ = kMaxSequenceNumber;
+  // Any seq less than min_uncommitted_ is committed.
+  const SequenceNumber min_uncommitted_ = kMinUnCommittedSeq;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/repair.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/repair.cc
new file mode 100644
index 0000000000..67513cacc4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/repair.cc
@@ -0,0 +1,839 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Repairer does best effort recovery to recover as much data as possible after
+// a disaster without compromising consistency. It does not guarantee bringing
+// the database to a time consistent state.
+//
+// Repair process is broken into 4 phases:
+// (a) Find files
+// (b) Convert logs to tables
+// (c) Extract metadata
+// (d) Write Descriptor
+//
+// (a) Find files
+//
+// The repairer goes through all the files in the directory, and classifies them
+// based on their file name. Any file that cannot be identified by name will be
+// ignored.
+//
+// (b) Convert logs to table
+//
+// Every log file that is active is replayed. All sections of the file where the
+// checksum does not match is skipped over. We intentionally give preference to
+// data consistency.
+//
+// (c) Extract metadata
+//
+// We scan every table to compute
+// (1) smallest/largest for the table
+// (2) largest sequence number in the table
+// (3) oldest blob file referred to by the table (if applicable)
+//
+// If we are unable to scan the file, then we ignore the table.
+//
+// (d) Write Descriptor
+//
+// We generate descriptor contents:
+//  - log number is set to zero
+//  - next-file-number is set to 1 + largest file number we found
+//  - last-sequence-number is set to largest sequence# found across
+//    all tables (see 2c)
+//  - compaction pointers are cleared
+//  - every table file is added at level 0
+//
+// Possible optimization 1:
+//   (a) Compute total size and use to pick appropriate max-level M
+//   (b) Sort tables by largest sequence# in the table
+//   (c) For each table: if it overlaps earlier table, place in level-0,
+//       else place in level-M.
+//   (d) We can provide options for time consistent recovery and unsafe recovery
+//       (ignore checksum failure when applicable)
+// Possible optimization 2:
+//   Store per-table metadata (smallest, largest, largest-seq#, ...)
+//   in the table's meta section to speed up ScanTable.
+
+#include "db/version_builder.h"
+
+#include <cinttypes>
+
+#include "db/builder.h"
+#include "db/db_impl/db_impl.h"
+#include "db/dbformat.h"
+#include "db/log_reader.h"
+#include "db/log_writer.h"
+#include "db/memtable.h"
+#include "db/table_cache.h"
+#include "db/version_edit.h"
+#include "db/write_batch_internal.h"
+#include "file/filename.h"
+#include "file/writable_file_writer.h"
+#include "logging/logging.h"
+#include "options/cf_options.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "rocksdb/write_buffer_manager.h"
+#include "table/scoped_arena_iterator.h"
+#include "table/unique_id_impl.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+class Repairer {
+ public:
+  Repairer(const std::string& dbname, const DBOptions& db_options,
+           const std::vector<ColumnFamilyDescriptor>& column_families,
+           const ColumnFamilyOptions& default_cf_opts,
+           const ColumnFamilyOptions& unknown_cf_opts, bool create_unknown_cfs)
+      : dbname_(dbname),
+        db_session_id_(DBImpl::GenerateDbSessionId(db_options.env)),
+        env_(db_options.env),
+        file_options_(),
+        db_options_(SanitizeOptions(dbname_, db_options)),
+        immutable_db_options_(ImmutableDBOptions(db_options_)),
+        icmp_(default_cf_opts.comparator),
+        default_cf_opts_(
+            SanitizeOptions(immutable_db_options_, default_cf_opts)),
+        default_iopts_(
+            ImmutableOptions(immutable_db_options_, default_cf_opts_)),
+        unknown_cf_opts_(
+            SanitizeOptions(immutable_db_options_, unknown_cf_opts)),
+        create_unknown_cfs_(create_unknown_cfs),
+        raw_table_cache_(
+            // TableCache can be small since we expect each table to be opened
+            // once.
+            NewLRUCache(10, db_options_.table_cache_numshardbits)),
+        table_cache_(new TableCache(default_iopts_, &file_options_,
+                                    raw_table_cache_.get(),
+                                    /*block_cache_tracer=*/nullptr,
+                                    /*io_tracer=*/nullptr, db_session_id_)),
+        wb_(db_options_.db_write_buffer_size),
+        wc_(db_options_.delayed_write_rate),
+        vset_(dbname_, &immutable_db_options_, file_options_,
+              raw_table_cache_.get(), &wb_, &wc_,
+              /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
+              /*db_id=*/"", db_session_id_),
+        next_file_number_(1),
+        db_lock_(nullptr),
+        closed_(false) {
+    for (const auto& cfd : column_families) {
+      cf_name_to_opts_[cfd.name] = cfd.options;
+    }
+  }
+
+  const ColumnFamilyOptions* GetColumnFamilyOptions(
+      const std::string& cf_name) {
+    if (cf_name_to_opts_.find(cf_name) == cf_name_to_opts_.end()) {
+      if (create_unknown_cfs_) {
+        return &unknown_cf_opts_;
+      }
+      return nullptr;
+    }
+    return &cf_name_to_opts_[cf_name];
+  }
+
+  // Adds a column family to the VersionSet with cf_options_ and updates
+  // manifest.
+  Status AddColumnFamily(const std::string& cf_name, uint32_t cf_id) {
+    // TODO: plumb Env::IOActivity;
+    const ReadOptions read_options;
+    const auto* cf_opts = GetColumnFamilyOptions(cf_name);
+    if (cf_opts == nullptr) {
+      return Status::Corruption("Encountered unknown column family with name=" +
+                                cf_name + ", id=" + std::to_string(cf_id));
+    }
+    Options opts(db_options_, *cf_opts);
+    MutableCFOptions mut_cf_opts(opts);
+
+    VersionEdit edit;
+    edit.SetComparatorName(opts.comparator->Name());
+    edit.SetLogNumber(0);
+    edit.SetColumnFamily(cf_id);
+    ColumnFamilyData* cfd;
+    cfd = nullptr;
+    edit.AddColumnFamily(cf_name);
+
+    mutex_.Lock();
+    std::unique_ptr<FSDirectory> db_dir;
+    Status status = env_->GetFileSystem()->NewDirectory(dbname_, IOOptions(),
+                                                        &db_dir, nullptr);
+    if (status.ok()) {
+      status = vset_.LogAndApply(cfd, mut_cf_opts, read_options, &edit, &mutex_,
+                                 db_dir.get(), false /* new_descriptor_log */,
+                                 cf_opts);
+    }
+    mutex_.Unlock();
+    return status;
+  }
+
+  Status Close() {
+    Status s = Status::OK();
+    if (!closed_) {
+      if (db_lock_ != nullptr) {
+        s = env_->UnlockFile(db_lock_);
+        db_lock_ = nullptr;
+      }
+      closed_ = true;
+    }
+    return s;
+  }
+
+  ~Repairer() { Close().PermitUncheckedError(); }
+
+  Status Run() {
+    Status status = env_->LockFile(LockFileName(dbname_), &db_lock_);
+    if (!status.ok()) {
+      return status;
+    }
+    status = FindFiles();
+    DBImpl* db_impl = nullptr;
+    if (status.ok()) {
+      // Discard older manifests and start a fresh one
+      for (size_t i = 0; i < manifests_.size(); i++) {
+        ArchiveFile(dbname_ + "/" + manifests_[i]);
+      }
+      // Just create a DBImpl temporarily so we can reuse NewDB()
+      db_impl = new DBImpl(db_options_, dbname_);
+      status = db_impl->NewDB(/*new_filenames=*/nullptr);
+    }
+    delete db_impl;
+
+    if (status.ok()) {
+      // Recover using the fresh manifest created by NewDB()
+      status =
+          vset_.Recover({{kDefaultColumnFamilyName, default_cf_opts_}}, false);
+    }
+    if (status.ok()) {
+      // Need to scan existing SST files first so the column families are
+      // created before we process WAL files
+      ExtractMetaData();
+
+      // ExtractMetaData() uses table_fds_ to know which SST files' metadata to
+      // extract -- we need to clear it here since metadata for existing SST
+      // files has been extracted already
+      table_fds_.clear();
+      ConvertLogFilesToTables();
+      ExtractMetaData();
+      status = AddTables();
+    }
+    if (status.ok()) {
+      uint64_t bytes = 0;
+      for (size_t i = 0; i < tables_.size(); i++) {
+        bytes += tables_[i].meta.fd.GetFileSize();
+      }
+      ROCKS_LOG_WARN(db_options_.info_log,
+                     "**** Repaired rocksdb %s; "
+                     "recovered %" ROCKSDB_PRIszt " files; %" PRIu64
+                     " bytes. "
+                     "Some data may have been lost. "
+                     "****",
+                     dbname_.c_str(), tables_.size(), bytes);
+    }
+    return status;
+  }
+
+ private:
+  struct TableInfo {
+    FileMetaData meta;
+    uint32_t column_family_id;
+    std::string column_family_name;
+  };
+
+  std::string const dbname_;
+  std::string db_session_id_;
+  Env* const env_;
+  const FileOptions file_options_;
+  const DBOptions db_options_;
+  const ImmutableDBOptions immutable_db_options_;
+  const InternalKeyComparator icmp_;
+  const ColumnFamilyOptions default_cf_opts_;
+  const ImmutableOptions default_iopts_;  // table_cache_ holds reference
+  const ColumnFamilyOptions unknown_cf_opts_;
+  const bool create_unknown_cfs_;
+  std::shared_ptr<Cache> raw_table_cache_;
+  std::unique_ptr<TableCache> table_cache_;
+  WriteBufferManager wb_;
+  WriteController wc_;
+  VersionSet vset_;
+  std::unordered_map<std::string, ColumnFamilyOptions> cf_name_to_opts_;
+  InstrumentedMutex mutex_;
+
+  std::vector<std::string> manifests_;
+  std::vector<FileDescriptor> table_fds_;
+  std::vector<uint64_t> logs_;
+  std::vector<TableInfo> tables_;
+  uint64_t next_file_number_;
+  // Lock over the persistent DB state. Non-nullptr iff successfully
+  // acquired.
+  FileLock* db_lock_;
+  bool closed_;
+
+  Status FindFiles() {
+    std::vector<std::string> filenames;
+    bool found_file = false;
+    std::vector<std::string> to_search_paths;
+
+    for (size_t path_id = 0; path_id < db_options_.db_paths.size(); path_id++) {
+      to_search_paths.push_back(db_options_.db_paths[path_id].path);
+    }
+
+    // search wal_dir if user uses a customize wal_dir
+    bool same = immutable_db_options_.IsWalDirSameAsDBPath(dbname_);
+    if (!same) {
+      to_search_paths.push_back(immutable_db_options_.wal_dir);
+    }
+
+    for (size_t path_id = 0; path_id < to_search_paths.size(); path_id++) {
+      ROCKS_LOG_INFO(db_options_.info_log, "Searching path %s\n",
+                     to_search_paths[path_id].c_str());
+      Status status = env_->GetChildren(to_search_paths[path_id], &filenames);
+      if (!status.ok()) {
+        return status;
+      }
+      if (!filenames.empty()) {
+        found_file = true;
+      }
+
+      uint64_t number;
+      FileType type;
+      for (size_t i = 0; i < filenames.size(); i++) {
+        if (ParseFileName(filenames[i], &number, &type)) {
+          if (type == kDescriptorFile) {
+            manifests_.push_back(filenames[i]);
+          } else {
+            if (number + 1 > next_file_number_) {
+              next_file_number_ = number + 1;
+            }
+            if (type == kWalFile) {
+              logs_.push_back(number);
+            } else if (type == kTableFile) {
+              table_fds_.emplace_back(number, static_cast<uint32_t>(path_id),
+                                      0);
+            } else {
+              // Ignore other files
+            }
+          }
+        }
+      }
+    }
+    if (!found_file) {
+      return Status::Corruption(dbname_, "repair found no files");
+    }
+    return Status::OK();
+  }
+
+  void ConvertLogFilesToTables() {
+    const auto& wal_dir = immutable_db_options_.GetWalDir();
+    for (size_t i = 0; i < logs_.size(); i++) {
+      // we should use LogFileName(wal_dir, logs_[i]) here. user might uses
+      // wal_dir option.
+      std::string logname = LogFileName(wal_dir, logs_[i]);
+      Status status = ConvertLogToTable(wal_dir, logs_[i]);
+      if (!status.ok()) {
+        ROCKS_LOG_WARN(db_options_.info_log,
+                       "Log #%" PRIu64 ": ignoring conversion error: %s",
+                       logs_[i], status.ToString().c_str());
+      }
+      ArchiveFile(logname);
+    }
+  }
+
+  Status ConvertLogToTable(const std::string& wal_dir, uint64_t log) {
+    struct LogReporter : public log::Reader::Reporter {
+      Env* env;
+      std::shared_ptr<Logger> info_log;
+      uint64_t lognum;
+      void Corruption(size_t bytes, const Status& s) override {
+        // We print error messages for corruption, but continue repairing.
+        ROCKS_LOG_ERROR(info_log, "Log #%" PRIu64 ": dropping %d bytes; %s",
+                        lognum, static_cast<int>(bytes), s.ToString().c_str());
+      }
+    };
+
+    // TODO: plumb Env::IOActivity
+    const ReadOptions read_options;
+
+    // Open the log file
+    std::string logname = LogFileName(wal_dir, log);
+    const auto& fs = env_->GetFileSystem();
+    std::unique_ptr<SequentialFileReader> lfile_reader;
+    Status status = SequentialFileReader::Create(
+        fs, logname, fs->OptimizeForLogRead(file_options_), &lfile_reader,
+        nullptr /* dbg */, nullptr /* rate limiter */);
+    if (!status.ok()) {
+      return status;
+    }
+
+    // Create the log reader.
+    LogReporter reporter;
+    reporter.env = env_;
+    reporter.info_log = db_options_.info_log;
+    reporter.lognum = log;
+    // We intentionally make log::Reader do checksumming so that
+    // corruptions cause entire commits to be skipped instead of
+    // propagating bad information (like overly large sequence
+    // numbers).
+    log::Reader reader(db_options_.info_log, std::move(lfile_reader), &reporter,
+                       true /*enable checksum*/, log);
+
+    // Initialize per-column family memtables
+    for (auto* cfd : *vset_.GetColumnFamilySet()) {
+      cfd->CreateNewMemtable(*cfd->GetLatestMutableCFOptions(),
+                             kMaxSequenceNumber);
+    }
+    auto cf_mems = new ColumnFamilyMemTablesImpl(vset_.GetColumnFamilySet());
+
+    // Read all the records and add to a memtable
+    std::string scratch;
+    Slice record;
+    WriteBatch batch;
+    int counter = 0;
+    while (reader.ReadRecord(&record, &scratch)) {
+      if (record.size() < WriteBatchInternal::kHeader) {
+        reporter.Corruption(record.size(),
+                            Status::Corruption("log record too small"));
+        continue;
+      }
+      Status record_status = WriteBatchInternal::SetContents(&batch, record);
+      if (record_status.ok()) {
+        record_status =
+            WriteBatchInternal::InsertInto(&batch, cf_mems, nullptr, nullptr);
+      }
+      if (record_status.ok()) {
+        counter += WriteBatchInternal::Count(&batch);
+      } else {
+        ROCKS_LOG_WARN(db_options_.info_log, "Log #%" PRIu64 ": ignoring %s",
+                       log, record_status.ToString().c_str());
+      }
+    }
+
+    // Dump a table for each column family with entries in this log file.
+    for (auto* cfd : *vset_.GetColumnFamilySet()) {
+      // Do not record a version edit for this conversion to a Table
+      // since ExtractMetaData() will also generate edits.
+      MemTable* mem = cfd->mem();
+      if (mem->IsEmpty()) {
+        continue;
+      }
+
+      FileMetaData meta;
+      meta.fd = FileDescriptor(next_file_number_++, 0, 0);
+      // TODO: plumb Env::IOActivity
+      ReadOptions ro;
+      ro.total_order_seek = true;
+      Arena arena;
+      ScopedArenaIterator iter(mem->NewIterator(ro, &arena));
+      int64_t _current_time = 0;
+      immutable_db_options_.clock->GetCurrentTime(&_current_time)
+          .PermitUncheckedError();  // ignore error
+      const uint64_t current_time = static_cast<uint64_t>(_current_time);
+      meta.file_creation_time = current_time;
+      SnapshotChecker* snapshot_checker = DisableGCSnapshotChecker::Instance();
+
+      auto write_hint = cfd->CalculateSSTWriteHint(0);
+      std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
+          range_del_iters;
+      auto range_del_iter = mem->NewRangeTombstoneIterator(
+          ro, kMaxSequenceNumber, false /* immutable_memtable */);
+      if (range_del_iter != nullptr) {
+        range_del_iters.emplace_back(range_del_iter);
+      }
+
+      IOStatus io_s;
+      CompressionOptions default_compression;
+      TableBuilderOptions tboptions(
+          *cfd->ioptions(), *cfd->GetLatestMutableCFOptions(),
+          cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(),
+          kNoCompression, default_compression, cfd->GetID(), cfd->GetName(),
+          -1 /* level */, false /* is_bottommost */,
+          TableFileCreationReason::kRecovery, 0 /* oldest_key_time */,
+          0 /* file_creation_time */, "DB Repairer" /* db_id */, db_session_id_,
+          0 /*target_file_size*/, meta.fd.GetNumber());
+
+      SeqnoToTimeMapping empty_seqno_time_mapping;
+      status = BuildTable(
+          dbname_, /* versions */ nullptr, immutable_db_options_, tboptions,
+          file_options_, read_options, table_cache_.get(), iter.get(),
+          std::move(range_del_iters), &meta, nullptr /* blob_file_additions */,
+          {}, kMaxSequenceNumber, kMaxSequenceNumber, snapshot_checker,
+          false /* paranoid_file_checks*/, nullptr /* internal_stats */, &io_s,
+          nullptr /*IOTracer*/, BlobFileCreationReason::kRecovery,
+          empty_seqno_time_mapping, nullptr /* event_logger */, 0 /* job_id */,
+          Env::IO_HIGH, nullptr /* table_properties */, write_hint);
+      ROCKS_LOG_INFO(db_options_.info_log,
+                     "Log #%" PRIu64 ": %d ops saved to Table #%" PRIu64 " %s",
+                     log, counter, meta.fd.GetNumber(),
+                     status.ToString().c_str());
+      if (status.ok()) {
+        if (meta.fd.GetFileSize() > 0) {
+          table_fds_.push_back(meta.fd);
+        }
+      } else {
+        break;
+      }
+    }
+    delete cf_mems;
+    return status;
+  }
+
+  void ExtractMetaData() {
+    for (size_t i = 0; i < table_fds_.size(); i++) {
+      TableInfo t;
+      t.meta.fd = table_fds_[i];
+      Status status = ScanTable(&t);
+      if (!status.ok()) {
+        std::string fname = TableFileName(
+            db_options_.db_paths, t.meta.fd.GetNumber(), t.meta.fd.GetPathId());
+        char file_num_buf[kFormatFileNumberBufSize];
+        FormatFileNumber(t.meta.fd.GetNumber(), t.meta.fd.GetPathId(),
+                         file_num_buf, sizeof(file_num_buf));
+        ROCKS_LOG_WARN(db_options_.info_log, "Table #%s: ignoring %s",
+                       file_num_buf, status.ToString().c_str());
+        ArchiveFile(fname);
+      } else {
+        tables_.push_back(t);
+      }
+    }
+  }
+
+  Status ScanTable(TableInfo* t) {
+    std::string fname = TableFileName(
+        db_options_.db_paths, t->meta.fd.GetNumber(), t->meta.fd.GetPathId());
+    int counter = 0;
+    uint64_t file_size;
+    Status status = env_->GetFileSize(fname, &file_size);
+    t->meta.fd = FileDescriptor(t->meta.fd.GetNumber(), t->meta.fd.GetPathId(),
+                                file_size);
+    std::shared_ptr<const TableProperties> props;
+    if (status.ok()) {
+      // TODO: plumb Env::IOActivity
+      const ReadOptions read_options;
+      status = table_cache_->GetTableProperties(
+          file_options_, read_options, icmp_, t->meta, &props,
+          0 /* block_protection_bytes_per_key */);
+    }
+    if (status.ok()) {
+      auto s =
+          GetSstInternalUniqueId(props->db_id, props->db_session_id,
+                                 props->orig_file_number, &t->meta.unique_id);
+      if (!s.ok()) {
+        ROCKS_LOG_WARN(db_options_.info_log,
+                       "Table #%" PRIu64
+                       ": unable to get unique id, default to Unknown.",
+                       t->meta.fd.GetNumber());
+      }
+      t->column_family_id = static_cast<uint32_t>(props->column_family_id);
+      if (t->column_family_id ==
+          TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) {
+        ROCKS_LOG_WARN(
+            db_options_.info_log,
+            "Table #%" PRIu64
+            ": column family unknown (probably due to legacy format); "
+            "adding to default column family id 0.",
+            t->meta.fd.GetNumber());
+        t->column_family_id = 0;
+      }
+
+      if (vset_.GetColumnFamilySet()->GetColumnFamily(t->column_family_id) ==
+          nullptr) {
+        status =
+            AddColumnFamily(props->column_family_name, t->column_family_id);
+      }
+      t->meta.oldest_ancester_time = props->creation_time;
+    }
+    if (status.ok()) {
+      uint64_t tail_size = 0;
+      bool contain_no_data_blocks =
+          props->num_entries > 0 &&
+          (props->num_entries == props->num_range_deletions);
+      if (props->tail_start_offset > 0 || contain_no_data_blocks) {
+        assert(props->tail_start_offset <= file_size);
+        tail_size = file_size - props->tail_start_offset;
+      }
+      t->meta.tail_size = tail_size;
+    }
+    ColumnFamilyData* cfd = nullptr;
+    if (status.ok()) {
+      cfd = vset_.GetColumnFamilySet()->GetColumnFamily(t->column_family_id);
+      if (cfd->GetName() != props->column_family_name) {
+        ROCKS_LOG_ERROR(
+            db_options_.info_log,
+            "Table #%" PRIu64
+            ": inconsistent column family name '%s'; expected '%s' for column "
+            "family id %" PRIu32 ".",
+            t->meta.fd.GetNumber(), props->column_family_name.c_str(),
+            cfd->GetName().c_str(), t->column_family_id);
+        status = Status::Corruption(dbname_, "inconsistent column family name");
+      }
+    }
+    if (status.ok()) {
+      // TODO: plumb Env::IOActivity
+      ReadOptions ropts;
+      ropts.total_order_seek = true;
+      InternalIterator* iter = table_cache_->NewIterator(
+          ropts, file_options_, cfd->internal_comparator(), t->meta,
+          nullptr /* range_del_agg */,
+          cfd->GetLatestMutableCFOptions()->prefix_extractor,
+          /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr,
+          TableReaderCaller::kRepair, /*arena=*/nullptr, /*skip_filters=*/false,
+          /*level=*/-1, /*max_file_size_for_l0_meta_pin=*/0,
+          /*smallest_compaction_key=*/nullptr,
+          /*largest_compaction_key=*/nullptr,
+          /*allow_unprepared_value=*/false,
+          cfd->GetLatestMutableCFOptions()->block_protection_bytes_per_key);
+      ParsedInternalKey parsed;
+      for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+        Slice key = iter->key();
+        Status pik_status =
+            ParseInternalKey(key, &parsed, db_options_.allow_data_in_errors);
+        if (!pik_status.ok()) {
+          ROCKS_LOG_ERROR(db_options_.info_log,
+                          "Table #%" PRIu64 ": unparsable key - %s",
+                          t->meta.fd.GetNumber(), pik_status.getState());
+          continue;
+        }
+
+        counter++;
+
+        status = t->meta.UpdateBoundaries(key, iter->value(), parsed.sequence,
+                                          parsed.type);
+        if (!status.ok()) {
+          break;
+        }
+      }
+      if (status.ok() && !iter->status().ok()) {
+        status = iter->status();
+      }
+      delete iter;
+
+      ROCKS_LOG_INFO(db_options_.info_log, "Table #%" PRIu64 ": %d entries %s",
+                     t->meta.fd.GetNumber(), counter,
+                     status.ToString().c_str());
+    }
+    if (status.ok()) {
+      // XXX/FIXME: This is just basic, naive handling of range tombstones,
+      // like call to UpdateBoundariesForRange in builder.cc where we assume
+      // an SST file is a full sorted run. This probably needs the extra logic
+      // from compaction_job.cc around call to UpdateBoundariesForRange (to
+      // handle range tombstones extendingg beyond range of other entries).
+      // TODO: plumb Env::IOActivity
+      ReadOptions ropts;
+      std::unique_ptr<FragmentedRangeTombstoneIterator> r_iter;
+      status = table_cache_->GetRangeTombstoneIterator(
+          ropts, cfd->internal_comparator(), t->meta,
+          cfd->GetLatestMutableCFOptions()->block_protection_bytes_per_key,
+          &r_iter);
+
+      if (r_iter) {
+        r_iter->SeekToFirst();
+
+        while (r_iter->Valid()) {
+          auto tombstone = r_iter->Tombstone();
+          auto kv = tombstone.Serialize();
+          t->meta.UpdateBoundariesForRange(
+              kv.first, tombstone.SerializeEndKey(), tombstone.seq_,
+              cfd->internal_comparator());
+          r_iter->Next();
+        }
+      }
+    }
+    return status;
+  }
+
+  Status AddTables() {
+    // TODO: plumb Env::IOActivity;
+    const ReadOptions read_options;
+    std::unordered_map<uint32_t, std::vector<const TableInfo*>> cf_id_to_tables;
+    SequenceNumber max_sequence = 0;
+    for (size_t i = 0; i < tables_.size(); i++) {
+      cf_id_to_tables[tables_[i].column_family_id].push_back(&tables_[i]);
+      if (max_sequence < tables_[i].meta.fd.largest_seqno) {
+        max_sequence = tables_[i].meta.fd.largest_seqno;
+      }
+    }
+    vset_.SetLastAllocatedSequence(max_sequence);
+    vset_.SetLastPublishedSequence(max_sequence);
+    vset_.SetLastSequence(max_sequence);
+
+    for (const auto& cf_id_and_tables : cf_id_to_tables) {
+      auto* cfd =
+          vset_.GetColumnFamilySet()->GetColumnFamily(cf_id_and_tables.first);
+
+      // Recover files' epoch number using dummy VersionStorageInfo
+      VersionBuilder dummy_version_builder(
+          cfd->current()->version_set()->file_options(), cfd->ioptions(),
+          cfd->table_cache(), cfd->current()->storage_info(),
+          cfd->current()->version_set(),
+          cfd->GetFileMetadataCacheReservationManager());
+      VersionStorageInfo dummy_vstorage(
+          &cfd->internal_comparator(), cfd->user_comparator(),
+          cfd->NumberLevels(), cfd->ioptions()->compaction_style,
+          nullptr /* src_vstorage */, cfd->ioptions()->force_consistency_checks,
+          EpochNumberRequirement::kMightMissing);
+      Status s;
+      VersionEdit dummy_edit;
+      for (const auto* table : cf_id_and_tables.second) {
+        // TODO(opt): separate out into multiple levels
+        dummy_edit.AddFile(
+            0, table->meta.fd.GetNumber(), table->meta.fd.GetPathId(),
+            table->meta.fd.GetFileSize(), table->meta.smallest,
+            table->meta.largest, table->meta.fd.smallest_seqno,
+            table->meta.fd.largest_seqno, table->meta.marked_for_compaction,
+            table->meta.temperature, table->meta.oldest_blob_file_number,
+            table->meta.oldest_ancester_time, table->meta.file_creation_time,
+            table->meta.epoch_number, table->meta.file_checksum,
+            table->meta.file_checksum_func_name, table->meta.unique_id,
+            table->meta.compensated_range_deletion_size, table->meta.tail_size);
+      }
+      s = dummy_version_builder.Apply(&dummy_edit);
+      if (s.ok()) {
+        s = dummy_version_builder.SaveTo(&dummy_vstorage);
+      }
+      if (s.ok()) {
+        dummy_vstorage.RecoverEpochNumbers(cfd);
+      }
+      if (s.ok()) {
+        // Record changes from this repair in VersionEdit, including files with
+        // recovered epoch numbers
+        VersionEdit edit;
+        edit.SetComparatorName(cfd->user_comparator()->Name());
+        edit.SetLogNumber(0);
+        edit.SetNextFile(next_file_number_);
+        edit.SetColumnFamily(cfd->GetID());
+        for (int level = 0; level < dummy_vstorage.num_levels(); ++level) {
+          for (FileMetaData* file_meta : dummy_vstorage.LevelFiles(level)) {
+            edit.AddFile(level, *file_meta);
+          }
+        }
+
+        // Release resources occupied by the dummy VersionStorageInfo
+        for (int level = 0; level < dummy_vstorage.num_levels(); ++level) {
+          for (FileMetaData* file_meta : dummy_vstorage.LevelFiles(level)) {
+            file_meta->refs--;
+            if (file_meta->refs <= 0) {
+              delete file_meta;
+            }
+          }
+        }
+
+        // Persist record of changes
+        assert(next_file_number_ > 0);
+        vset_.MarkFileNumberUsed(next_file_number_ - 1);
+        mutex_.Lock();
+        std::unique_ptr<FSDirectory> db_dir;
+        s = env_->GetFileSystem()->NewDirectory(dbname_, IOOptions(), &db_dir,
+                                                nullptr);
+        if (s.ok()) {
+          s = vset_.LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
+                                read_options, &edit, &mutex_, db_dir.get(),
+                                false /* new_descriptor_log */);
+        }
+        mutex_.Unlock();
+      }
+      if (!s.ok()) {
+        return s;
+      }
+    }
+    return Status::OK();
+  }
+
+  void ArchiveFile(const std::string& fname) {
+    // Move into another directory.  E.g., for
+    //    dir/foo
+    // rename to
+    //    dir/lost/foo
+    const char* slash = strrchr(fname.c_str(), '/');
+    std::string new_dir;
+    if (slash != nullptr) {
+      new_dir.assign(fname.data(), slash - fname.data());
+    }
+    new_dir.append("/lost");
+    env_->CreateDir(new_dir).PermitUncheckedError();  // Ignore error
+    std::string new_file = new_dir;
+    new_file.append("/");
+    new_file.append((slash == nullptr) ? fname.c_str() : slash + 1);
+    Status s = env_->RenameFile(fname, new_file);
+    ROCKS_LOG_INFO(db_options_.info_log, "Archiving %s: %s\n", fname.c_str(),
+                   s.ToString().c_str());
+  }
+};
+
+Status GetDefaultCFOptions(
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    ColumnFamilyOptions* res) {
+  assert(res != nullptr);
+  auto iter = std::find_if(column_families.begin(), column_families.end(),
+                           [](const ColumnFamilyDescriptor& cfd) {
+                             return cfd.name == kDefaultColumnFamilyName;
+                           });
+  if (iter == column_families.end()) {
+    return Status::InvalidArgument(
+        "column_families", "Must contain entry for default column family");
+  }
+  *res = iter->options;
+  return Status::OK();
+}
+}  // anonymous namespace
+
+Status RepairDB(const std::string& dbname, const DBOptions& db_options,
+                const std::vector<ColumnFamilyDescriptor>& column_families) {
+  ColumnFamilyOptions default_cf_opts;
+  Status status = GetDefaultCFOptions(column_families, &default_cf_opts);
+  if (!status.ok()) {
+    return status;
+  }
+
+  Repairer repairer(dbname, db_options, column_families, default_cf_opts,
+                    ColumnFamilyOptions() /* unknown_cf_opts */,
+                    false /* create_unknown_cfs */);
+  status = repairer.Run();
+  if (status.ok()) {
+    status = repairer.Close();
+  }
+  return status;
+}
+
+Status RepairDB(const std::string& dbname, const DBOptions& db_options,
+                const std::vector<ColumnFamilyDescriptor>& column_families,
+                const ColumnFamilyOptions& unknown_cf_opts) {
+  ColumnFamilyOptions default_cf_opts;
+  Status status = GetDefaultCFOptions(column_families, &default_cf_opts);
+  if (!status.ok()) {
+    return status;
+  }
+
+  Repairer repairer(dbname, db_options, column_families, default_cf_opts,
+                    unknown_cf_opts, true /* create_unknown_cfs */);
+  status = repairer.Run();
+  if (status.ok()) {
+    status = repairer.Close();
+  }
+  return status;
+}
+
+Status RepairDB(const std::string& dbname, const Options& options) {
+  Options opts(options);
+  DBOptions db_options(opts);
+  ColumnFamilyOptions cf_options(opts);
+
+  Repairer repairer(dbname, db_options, {}, cf_options /* default_cf_opts */,
+                    cf_options /* unknown_cf_opts */,
+                    true /* create_unknown_cfs */);
+  Status status = repairer.Run();
+  if (status.ok()) {
+    status = repairer.Close();
+  }
+  return status;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/seqno_to_time_mapping.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/seqno_to_time_mapping.cc
new file mode 100644
index 0000000000..c692099294
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/seqno_to_time_mapping.cc
@@ -0,0 +1,341 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/seqno_to_time_mapping.h"
+
+#include "db/version_edit.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+uint64_t SeqnoToTimeMapping::GetOldestApproximateTime(
+    const SequenceNumber seqno) const {
+  assert(is_sorted_);
+  auto it = std::upper_bound(seqno_time_mapping_.begin(),
+                             seqno_time_mapping_.end(), seqno);
+  if (it == seqno_time_mapping_.begin()) {
+    return 0;
+  }
+  it--;
+  return it->time;
+}
+
+void SeqnoToTimeMapping::Add(SequenceNumber seqno, uint64_t time) {
+  if (seqno == 0) {
+    return;
+  }
+  is_sorted_ = false;
+  seqno_time_mapping_.emplace_back(seqno, time);
+}
+
+void SeqnoToTimeMapping::TruncateOldEntries(const uint64_t now) {
+  assert(is_sorted_);
+
+  if (max_time_duration_ == 0) {
+    return;
+  }
+
+  const uint64_t cut_off_time =
+      now > max_time_duration_ ? now - max_time_duration_ : 0;
+  assert(cut_off_time <= now);  // no overflow
+
+  auto it = std::upper_bound(
+      seqno_time_mapping_.begin(), seqno_time_mapping_.end(), cut_off_time,
+      [](uint64_t target, const SeqnoTimePair& other) -> bool {
+        return target < other.time;
+      });
+  if (it == seqno_time_mapping_.begin()) {
+    return;
+  }
+  it--;
+  seqno_time_mapping_.erase(seqno_time_mapping_.begin(), it);
+}
+
+SequenceNumber SeqnoToTimeMapping::GetOldestSequenceNum(uint64_t time) {
+  assert(is_sorted_);
+
+  auto it = std::upper_bound(
+      seqno_time_mapping_.begin(), seqno_time_mapping_.end(), time,
+      [](uint64_t target, const SeqnoTimePair& other) -> bool {
+        return target < other.time;
+      });
+  if (it == seqno_time_mapping_.begin()) {
+    return 0;
+  }
+  it--;
+  return it->seqno;
+}
+
+// The encoded format is:
+//  [num_of_entries][[seqno][time],[seqno][time],...]
+//      ^                                 ^
+//    var_int                      delta_encoded (var_int)
+void SeqnoToTimeMapping::Encode(std::string& dest, const SequenceNumber start,
+                                const SequenceNumber end, const uint64_t now,
+                                const uint64_t output_size) const {
+  assert(is_sorted_);
+  if (start > end) {
+    // It could happen when the SST file is empty, the initial value of min
+    // sequence number is kMaxSequenceNumber and max is 0.
+    // The empty output file will be removed in the final step of compaction.
+    return;
+  }
+
+  auto start_it = std::upper_bound(seqno_time_mapping_.begin(),
+                                   seqno_time_mapping_.end(), start);
+  if (start_it != seqno_time_mapping_.begin()) {
+    start_it--;
+  }
+
+  auto end_it = std::upper_bound(seqno_time_mapping_.begin(),
+                                 seqno_time_mapping_.end(), end);
+  if (end_it == seqno_time_mapping_.begin()) {
+    return;
+  }
+  if (start_it >= end_it) {
+    return;
+  }
+
+  // truncate old entries that are not needed
+  if (max_time_duration_ > 0) {
+    const uint64_t cut_off_time =
+        now > max_time_duration_ ? now - max_time_duration_ : 0;
+    while (start_it < end_it && start_it->time < cut_off_time) {
+      start_it++;
+    }
+  }
+  // to include the first element
+  if (start_it != seqno_time_mapping_.begin()) {
+    start_it--;
+  }
+
+  // If there are more data than needed, pick the entries for encoding.
+  // It's not the most optimized algorithm for selecting the best representative
+  // entries over the time.
+  // It starts from the beginning and makes sure the distance is larger than
+  // `(end - start) / size` before selecting the number. For example, for the
+  // following list, pick 3 entries (it will pick seqno #1, #6, #8):
+  //    1 -> 10
+  //    5 -> 17
+  //    6 -> 25
+  //    8 -> 30
+  // first, it always picks the first one, then there are 2 num_entries_to_fill
+  // and the time difference between current one vs. the last one is
+  // (30 - 10) = 20. 20/2 = 10. So it will skip until 10+10 = 20. => it skips
+  // #5 and pick #6.
+  // But the most optimized solution is picking #1 #5 #8, as it will be more
+  // evenly distributed for time. Anyway the following algorithm is simple and
+  // may over-select new data, which is good. We do want more accurate time
+  // information for recent data.
+  std::deque<SeqnoTimePair> output_copy;
+  if (std::distance(start_it, end_it) > static_cast<int64_t>(output_size)) {
+    int64_t num_entries_to_fill = static_cast<int64_t>(output_size);
+    auto last_it = end_it;
+    last_it--;
+    uint64_t end_time = last_it->time;
+    uint64_t skip_until_time = 0;
+    for (auto it = start_it; it < end_it; it++) {
+      // skip if it's not reach the skip_until_time yet
+      if (std::distance(it, end_it) > num_entries_to_fill &&
+          it->time < skip_until_time) {
+        continue;
+      }
+      output_copy.push_back(*it);
+      num_entries_to_fill--;
+      if (std::distance(it, end_it) > num_entries_to_fill &&
+          num_entries_to_fill > 0) {
+        // If there are more entries than we need, re-calculate the
+        // skip_until_time, which means skip until that time
+        skip_until_time =
+            it->time + ((end_time - it->time) / num_entries_to_fill);
+      }
+    }
+
+    // Make sure all entries are filled
+    assert(num_entries_to_fill == 0);
+    start_it = output_copy.begin();
+    end_it = output_copy.end();
+  }
+
+  // Delta encode the data
+  uint64_t size = std::distance(start_it, end_it);
+  PutVarint64(&dest, size);
+  SeqnoTimePair base;
+  for (auto it = start_it; it < end_it; it++) {
+    assert(base < *it);
+    SeqnoTimePair val = *it - base;
+    base = *it;
+    val.Encode(dest);
+  }
+}
+
+Status SeqnoToTimeMapping::Add(const std::string& seqno_time_mapping_str) {
+  Slice input(seqno_time_mapping_str);
+  if (input.empty()) {
+    return Status::OK();
+  }
+  uint64_t size;
+  if (!GetVarint64(&input, &size)) {
+    return Status::Corruption("Invalid sequence number time size");
+  }
+  is_sorted_ = false;
+  SeqnoTimePair base;
+  for (uint64_t i = 0; i < size; i++) {
+    SeqnoTimePair val;
+    Status s = val.Decode(input);
+    if (!s.ok()) {
+      return s;
+    }
+    val.Add(base);
+    seqno_time_mapping_.emplace_back(val);
+    base = val;
+  }
+  return Status::OK();
+}
+
+void SeqnoToTimeMapping::SeqnoTimePair::Encode(std::string& dest) const {
+  PutVarint64Varint64(&dest, seqno, time);
+}
+
+Status SeqnoToTimeMapping::SeqnoTimePair::Decode(Slice& input) {
+  if (!GetVarint64(&input, &seqno)) {
+    return Status::Corruption("Invalid sequence number");
+  }
+  if (!GetVarint64(&input, &time)) {
+    return Status::Corruption("Invalid time");
+  }
+  return Status::OK();
+}
+
+bool SeqnoToTimeMapping::Append(SequenceNumber seqno, uint64_t time) {
+  assert(is_sorted_);
+
+  // skip seq number 0, which may have special meaning, like zeroed out data
+  if (seqno == 0) {
+    return false;
+  }
+  if (!Empty()) {
+    if (seqno < Last().seqno || time < Last().time) {
+      return false;
+    }
+    if (seqno == Last().seqno) {
+      Last().time = time;
+      return true;
+    }
+    if (time == Last().time) {
+      // new sequence has the same time as old one, no need to add new mapping
+      return false;
+    }
+  }
+
+  seqno_time_mapping_.emplace_back(seqno, time);
+
+  if (seqno_time_mapping_.size() > max_capacity_) {
+    seqno_time_mapping_.pop_front();
+  }
+  return true;
+}
+
+bool SeqnoToTimeMapping::Resize(uint64_t min_time_duration,
+                                uint64_t max_time_duration) {
+  uint64_t new_max_capacity =
+      CalculateMaxCapacity(min_time_duration, max_time_duration);
+  if (new_max_capacity == max_capacity_) {
+    return false;
+  } else if (new_max_capacity < seqno_time_mapping_.size()) {
+    uint64_t delta = seqno_time_mapping_.size() - new_max_capacity;
+    seqno_time_mapping_.erase(seqno_time_mapping_.begin(),
+                              seqno_time_mapping_.begin() + delta);
+  }
+  max_capacity_ = new_max_capacity;
+  return true;
+}
+
+Status SeqnoToTimeMapping::Sort() {
+  if (is_sorted_) {
+    return Status::OK();
+  }
+  if (seqno_time_mapping_.empty()) {
+    is_sorted_ = true;
+    return Status::OK();
+  }
+
+  std::deque<SeqnoTimePair> copy = std::move(seqno_time_mapping_);
+
+  std::sort(copy.begin(), copy.end());
+
+  seqno_time_mapping_.clear();
+
+  // remove seqno = 0, which may have special meaning, like zeroed out data
+  while (copy.front().seqno == 0) {
+    copy.pop_front();
+  }
+
+  SeqnoTimePair prev = copy.front();
+  for (const auto& it : copy) {
+    // If sequence number is the same, pick the one with larger time, which is
+    // more accurate than the older time.
+    if (it.seqno == prev.seqno) {
+      assert(it.time >= prev.time);
+      prev.time = it.time;
+    } else {
+      assert(it.seqno > prev.seqno);
+      // If a larger sequence number has an older time which is not useful, skip
+      if (it.time > prev.time) {
+        seqno_time_mapping_.push_back(prev);
+        prev = it;
+      }
+    }
+  }
+  seqno_time_mapping_.emplace_back(prev);
+
+  is_sorted_ = true;
+  return Status::OK();
+}
+
+std::string SeqnoToTimeMapping::ToHumanString() const {
+  std::string ret;
+  for (const auto& seq_time : seqno_time_mapping_) {
+    AppendNumberTo(&ret, seq_time.seqno);
+    ret.append("->");
+    AppendNumberTo(&ret, seq_time.time);
+    ret.append(",");
+  }
+  return ret;
+}
+
+SeqnoToTimeMapping SeqnoToTimeMapping::Copy(
+    SequenceNumber smallest_seqno) const {
+  SeqnoToTimeMapping ret;
+  auto it = std::upper_bound(seqno_time_mapping_.begin(),
+                             seqno_time_mapping_.end(), smallest_seqno);
+  if (it != seqno_time_mapping_.begin()) {
+    it--;
+  }
+  std::copy(it, seqno_time_mapping_.end(),
+            std::back_inserter(ret.seqno_time_mapping_));
+  return ret;
+}
+
+uint64_t SeqnoToTimeMapping::CalculateMaxCapacity(uint64_t min_time_duration,
+                                                  uint64_t max_time_duration) {
+  if (min_time_duration == 0) {
+    return 0;
+  }
+  return std::min(
+      kMaxSeqnoToTimeEntries,
+      max_time_duration * kMaxSeqnoTimePairsPerCF / min_time_duration);
+}
+
+SeqnoToTimeMapping::SeqnoTimePair SeqnoToTimeMapping::SeqnoTimePair::operator-(
+    const SeqnoTimePair& other) const {
+  SeqnoTimePair res;
+  res.seqno = seqno - other.seqno;
+  res.time = time - other.time;
+  return res;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/seqno_to_time_mapping.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/seqno_to_time_mapping.h
new file mode 100644
index 0000000000..4ffc9c1992
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/seqno_to_time_mapping.h
@@ -0,0 +1,189 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <algorithm>
+#include <cinttypes>
+#include <deque>
+#include <functional>
+#include <iterator>
+#include <string>
+
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+constexpr uint64_t kUnknownSeqnoTime = 0;
+
+// SeqnoToTimeMapping stores the sequence number to time mapping, so given a
+// sequence number it can estimate the oldest possible time for that sequence
+// number. For example:
+//   10 -> 100
+//   50 -> 300
+// then if a key has seqno 19, the OldestApproximateTime would be 100, for 51 it
+// would be 300.
+// As it's a sorted list, the new entry is inserted from the back. The old data
+// will be popped from the front if they're no longer used.
+//
+// Note: the data struct is not thread safe, both read and write need to be
+//  synchronized by caller.
+class SeqnoToTimeMapping {
+ public:
+  // Maximum number of entries can be encoded into SST. The data is delta encode
+  // so the maximum data usage for each SST is < 0.3K
+  static constexpr uint64_t kMaxSeqnoTimePairsPerSST = 100;
+
+  // Maximum number of entries per CF. If there's only CF with this feature on,
+  // the max duration divided by this number, so for example, if
+  // preclude_last_level_data_seconds = 100000 (~1day), then it will sample the
+  // seqno -> time every 1000 seconds (~17minutes). Then the maximum entry it
+  // needs is 100.
+  // When there are multiple CFs having this feature on, the sampling cadence is
+  // determined by the smallest setting, the capacity is determined the largest
+  // setting, also it's caped by kMaxSeqnoTimePairsPerCF * 10.
+  static constexpr uint64_t kMaxSeqnoTimePairsPerCF = 100;
+
+  // A simple struct for sequence number to time pair
+  struct SeqnoTimePair {
+    SequenceNumber seqno = 0;
+    uint64_t time = 0;
+
+    SeqnoTimePair() = default;
+    SeqnoTimePair(SequenceNumber _seqno, uint64_t _time)
+        : seqno(_seqno), time(_time) {}
+
+    // Encode to dest string
+    void Encode(std::string& dest) const;
+
+    // Decode the value from input Slice and remove it from the input
+    Status Decode(Slice& input);
+
+    // subtraction of 2 SeqnoTimePair
+    SeqnoTimePair operator-(const SeqnoTimePair& other) const;
+
+    // Add 2 values together
+    void Add(const SeqnoTimePair& obj) {
+      seqno += obj.seqno;
+      time += obj.time;
+    }
+
+    // Compare SeqnoTimePair with a sequence number, used for binary search a
+    // sequence number in a list of SeqnoTimePair
+    bool operator<(const SequenceNumber& other) const { return seqno < other; }
+
+    // Compare 2 SeqnoTimePair
+    bool operator<(const SeqnoTimePair& other) const {
+      return std::tie(seqno, time) < std::tie(other.seqno, other.time);
+    }
+
+    // Check if 2 SeqnoTimePair is the same
+    bool operator==(const SeqnoTimePair& other) const {
+      return std::tie(seqno, time) == std::tie(other.seqno, other.time);
+    }
+  };
+
+  // constractor of SeqnoToTimeMapping
+  // max_time_duration is the maximum time it should track. For example, if
+  // preclude_last_level_data_seconds is 1 day, then if an entry is older than 1
+  // day, then it can be removed.
+  // max_capacity is the maximum number of entry it can hold. For single CF,
+  // it's caped at 100 (kMaxSeqnoTimePairsPerCF), otherwise
+  // kMaxSeqnoTimePairsPerCF * 10.
+  // If it's set to 0, means it won't truncate any old data.
+  explicit SeqnoToTimeMapping(uint64_t max_time_duration = 0,
+                              uint64_t max_capacity = 0)
+      : max_time_duration_(max_time_duration), max_capacity_(max_capacity) {}
+
+  // Append a new entry to the list. The new entry should be newer than the
+  // existing ones. It maintains the internal sorted status.
+  bool Append(SequenceNumber seqno, uint64_t time);
+
+  // Given a sequence number, estimate it's oldest time
+  uint64_t GetOldestApproximateTime(SequenceNumber seqno) const;
+
+  // Truncate the old entries based on the current time and max_time_duration_
+  void TruncateOldEntries(uint64_t now);
+
+  // Given a time, return it's oldest possible sequence number
+  SequenceNumber GetOldestSequenceNum(uint64_t time);
+
+  // Encode to a binary string
+  void Encode(std::string& des, SequenceNumber start, SequenceNumber end,
+              uint64_t now,
+              uint64_t output_size = kMaxSeqnoTimePairsPerSST) const;
+
+  // Add a new random entry, unlike Append(), it can be any data, but also makes
+  // the list un-sorted.
+  void Add(SequenceNumber seqno, uint64_t time);
+
+  // Decode and add the entries to the current obj. The list will be unsorted
+  Status Add(const std::string& seqno_time_mapping_str);
+
+  // Return the number of entries
+  size_t Size() const { return seqno_time_mapping_.size(); }
+
+  // Reduce the size of internal list
+  bool Resize(uint64_t min_time_duration, uint64_t max_time_duration);
+
+  // Override the max_time_duration_
+  void SetMaxTimeDuration(uint64_t max_time_duration) {
+    max_time_duration_ = max_time_duration;
+  }
+
+  uint64_t GetCapacity() const { return max_capacity_; }
+
+  // Sort the list, which also remove the redundant entries, useless entries,
+  // which makes sure the seqno is sorted, but also the time
+  Status Sort();
+
+  // copy the current obj from the given smallest_seqno.
+  SeqnoToTimeMapping Copy(SequenceNumber smallest_seqno) const;
+
+  // If the internal list is empty
+  bool Empty() const { return seqno_time_mapping_.empty(); }
+
+  // clear all entries
+  void Clear() { seqno_time_mapping_.clear(); }
+
+  // return the string for user message
+  // Note: Not efficient, okay for print
+  std::string ToHumanString() const;
+
+#ifndef NDEBUG
+  const std::deque<SeqnoTimePair>& TEST_GetInternalMapping() const {
+    return seqno_time_mapping_;
+  }
+#endif
+
+ private:
+  static constexpr uint64_t kMaxSeqnoToTimeEntries =
+      kMaxSeqnoTimePairsPerCF * 10;
+
+  uint64_t max_time_duration_;
+  uint64_t max_capacity_;
+
+  std::deque<SeqnoTimePair> seqno_time_mapping_;
+
+  bool is_sorted_ = true;
+
+  static uint64_t CalculateMaxCapacity(uint64_t min_time_duration,
+                                       uint64_t max_time_duration);
+
+  SeqnoTimePair& Last() {
+    assert(!Empty());
+    return seqno_time_mapping_.back();
+  }
+};
+
+// for searching the sequence number from SeqnoToTimeMapping
+inline bool operator<(const SequenceNumber& seqno,
+                      const SeqnoToTimeMapping::SeqnoTimePair& other) {
+  return seqno < other.seqno;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/snapshot_checker.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/snapshot_checker.h
new file mode 100644
index 0000000000..b7ff1df8c0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/snapshot_checker.h
@@ -0,0 +1,58 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+enum class SnapshotCheckerResult : int {
+  kInSnapshot = 0,
+  kNotInSnapshot = 1,
+  // In case snapshot is released and the checker has no clue whether
+  // the given sequence is visible to the snapshot.
+  kSnapshotReleased = 2,
+};
+
+// Callback class that control GC of duplicate keys in flush/compaction.
+class SnapshotChecker {
+ public:
+  virtual ~SnapshotChecker() {}
+  virtual SnapshotCheckerResult CheckInSnapshot(
+      SequenceNumber sequence, SequenceNumber snapshot_sequence) const = 0;
+};
+
+class DisableGCSnapshotChecker : public SnapshotChecker {
+ public:
+  virtual ~DisableGCSnapshotChecker() {}
+  virtual SnapshotCheckerResult CheckInSnapshot(
+      SequenceNumber /*sequence*/,
+      SequenceNumber /*snapshot_sequence*/) const override {
+    // By returning kNotInSnapshot, we prevent all the values from being GCed
+    return SnapshotCheckerResult::kNotInSnapshot;
+  }
+  static DisableGCSnapshotChecker* Instance();
+
+ protected:
+  explicit DisableGCSnapshotChecker() {}
+};
+
+class WritePreparedTxnDB;
+
+// Callback class created by WritePreparedTxnDB to check if a key
+// is visible by a snapshot.
+class WritePreparedSnapshotChecker : public SnapshotChecker {
+ public:
+  explicit WritePreparedSnapshotChecker(WritePreparedTxnDB* txn_db);
+  virtual ~WritePreparedSnapshotChecker() {}
+
+  virtual SnapshotCheckerResult CheckInSnapshot(
+      SequenceNumber sequence, SequenceNumber snapshot_sequence) const override;
+
+ private:
+  const WritePreparedTxnDB* const txn_db_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/snapshot_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/snapshot_impl.cc
new file mode 100644
index 0000000000..98b4754634
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/snapshot_impl.cc
@@ -0,0 +1,25 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/db.h"
+#include "rocksdb/snapshot.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+ManagedSnapshot::ManagedSnapshot(DB* db)
+    : db_(db), snapshot_(db->GetSnapshot()) {}
+
+ManagedSnapshot::ManagedSnapshot(DB* db, const Snapshot* _snapshot)
+    : db_(db), snapshot_(_snapshot) {}
+
+ManagedSnapshot::~ManagedSnapshot() {
+  if (snapshot_) {
+    db_->ReleaseSnapshot(snapshot_);
+  }
+}
+
+const Snapshot* ManagedSnapshot::snapshot() { return snapshot_; }
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/snapshot_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/snapshot_impl.h
new file mode 100644
index 0000000000..23e5e98cd2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/snapshot_impl.h
@@ -0,0 +1,239 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <vector>
+
+#include "db/dbformat.h"
+#include "rocksdb/db.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class SnapshotList;
+
+// Snapshots are kept in a doubly-linked list in the DB.
+// Each SnapshotImpl corresponds to a particular sequence number.
+class SnapshotImpl : public Snapshot {
+ public:
+  SequenceNumber number_;  // const after creation
+  // It indicates the smallest uncommitted data at the time the snapshot was
+  // taken. This is currently used by WritePrepared transactions to limit the
+  // scope of queries to IsInSnapshot.
+  SequenceNumber min_uncommitted_ = kMinUnCommittedSeq;
+
+  SequenceNumber GetSequenceNumber() const override { return number_; }
+
+  int64_t GetUnixTime() const override { return unix_time_; }
+
+  uint64_t GetTimestamp() const override { return timestamp_; }
+
+ private:
+  friend class SnapshotList;
+
+  // SnapshotImpl is kept in a doubly-linked circular list
+  SnapshotImpl* prev_;
+  SnapshotImpl* next_;
+
+  SnapshotList* list_;  // just for sanity checks
+
+  int64_t unix_time_;
+
+  uint64_t timestamp_;
+
+  // Will this snapshot be used by a Transaction to do write-conflict checking?
+  bool is_write_conflict_boundary_;
+};
+
+class SnapshotList {
+ public:
+  SnapshotList() {
+    list_.prev_ = &list_;
+    list_.next_ = &list_;
+    list_.number_ = 0xFFFFFFFFL;  // placeholder marker, for debugging
+    // Set all the variables to make UBSAN happy.
+    list_.list_ = nullptr;
+    list_.unix_time_ = 0;
+    list_.timestamp_ = 0;
+    list_.is_write_conflict_boundary_ = false;
+    count_ = 0;
+  }
+
+  // No copy-construct.
+  SnapshotList(const SnapshotList&) = delete;
+
+  bool empty() const {
+    assert(list_.next_ != &list_ || 0 == count_);
+    return list_.next_ == &list_;
+  }
+  SnapshotImpl* oldest() const {
+    assert(!empty());
+    return list_.next_;
+  }
+  SnapshotImpl* newest() const {
+    assert(!empty());
+    return list_.prev_;
+  }
+
+  SnapshotImpl* New(SnapshotImpl* s, SequenceNumber seq, uint64_t unix_time,
+                    bool is_write_conflict_boundary,
+                    uint64_t ts = std::numeric_limits<uint64_t>::max()) {
+    s->number_ = seq;
+    s->unix_time_ = unix_time;
+    s->timestamp_ = ts;
+    s->is_write_conflict_boundary_ = is_write_conflict_boundary;
+    s->list_ = this;
+    s->next_ = &list_;
+    s->prev_ = list_.prev_;
+    s->prev_->next_ = s;
+    s->next_->prev_ = s;
+    count_++;
+    return s;
+  }
+
+  // Do not responsible to free the object.
+  void Delete(const SnapshotImpl* s) {
+    assert(s->list_ == this);
+    s->prev_->next_ = s->next_;
+    s->next_->prev_ = s->prev_;
+    count_--;
+  }
+
+  // retrieve all snapshot numbers up until max_seq. They are sorted in
+  // ascending order (with no duplicates).
+  std::vector<SequenceNumber> GetAll(
+      SequenceNumber* oldest_write_conflict_snapshot = nullptr,
+      const SequenceNumber& max_seq = kMaxSequenceNumber) const {
+    std::vector<SequenceNumber> ret;
+    GetAll(&ret, oldest_write_conflict_snapshot, max_seq);
+    return ret;
+  }
+
+  void GetAll(std::vector<SequenceNumber>* snap_vector,
+              SequenceNumber* oldest_write_conflict_snapshot = nullptr,
+              const SequenceNumber& max_seq = kMaxSequenceNumber) const {
+    std::vector<SequenceNumber>& ret = *snap_vector;
+    // So far we have no use case that would pass a non-empty vector
+    assert(ret.size() == 0);
+
+    if (oldest_write_conflict_snapshot != nullptr) {
+      *oldest_write_conflict_snapshot = kMaxSequenceNumber;
+    }
+
+    if (empty()) {
+      return;
+    }
+    const SnapshotImpl* s = &list_;
+    while (s->next_ != &list_) {
+      if (s->next_->number_ > max_seq) {
+        break;
+      }
+      // Avoid duplicates
+      if (ret.empty() || ret.back() != s->next_->number_) {
+        ret.push_back(s->next_->number_);
+      }
+
+      if (oldest_write_conflict_snapshot != nullptr &&
+          *oldest_write_conflict_snapshot == kMaxSequenceNumber &&
+          s->next_->is_write_conflict_boundary_) {
+        // If this is the first write-conflict boundary snapshot in the list,
+        // it is the oldest
+        *oldest_write_conflict_snapshot = s->next_->number_;
+      }
+
+      s = s->next_;
+    }
+    return;
+  }
+
+  // get the sequence number of the most recent snapshot
+  SequenceNumber GetNewest() {
+    if (empty()) {
+      return 0;
+    }
+    return newest()->number_;
+  }
+
+  int64_t GetOldestSnapshotTime() const {
+    if (empty()) {
+      return 0;
+    } else {
+      return oldest()->unix_time_;
+    }
+  }
+
+  int64_t GetOldestSnapshotSequence() const {
+    if (empty()) {
+      return 0;
+    } else {
+      return oldest()->GetSequenceNumber();
+    }
+  }
+
+  uint64_t count() const { return count_; }
+
+ private:
+  // Dummy head of doubly-linked list of snapshots
+  SnapshotImpl list_;
+  uint64_t count_;
+};
+
+// All operations on TimestampedSnapshotList must be protected by db mutex.
+class TimestampedSnapshotList {
+ public:
+  explicit TimestampedSnapshotList() = default;
+
+  std::shared_ptr<const SnapshotImpl> GetSnapshot(uint64_t ts) const {
+    if (ts == std::numeric_limits<uint64_t>::max() && !snapshots_.empty()) {
+      auto it = snapshots_.rbegin();
+      assert(it != snapshots_.rend());
+      return it->second;
+    }
+    auto it = snapshots_.find(ts);
+    if (it == snapshots_.end()) {
+      return std::shared_ptr<const SnapshotImpl>();
+    }
+    return it->second;
+  }
+
+  void GetSnapshots(
+      uint64_t ts_lb, uint64_t ts_ub,
+      std::vector<std::shared_ptr<const Snapshot>>& snapshots) const {
+    assert(ts_lb < ts_ub);
+    auto it_low = snapshots_.lower_bound(ts_lb);
+    auto it_high = snapshots_.lower_bound(ts_ub);
+    for (auto it = it_low; it != it_high; ++it) {
+      snapshots.emplace_back(it->second);
+    }
+  }
+
+  void AddSnapshot(const std::shared_ptr<const SnapshotImpl>& snapshot) {
+    assert(snapshot);
+    snapshots_.try_emplace(snapshot->GetTimestamp(), snapshot);
+  }
+
+  // snapshots_to_release: the container to where the timestamped snapshots will
+  // be moved so that it retains the last reference to the snapshots and the
+  // snapshots won't be actually released which requires db mutex. The
+  // snapshots will be released by caller of ReleaseSnapshotsOlderThan().
+  void ReleaseSnapshotsOlderThan(
+      uint64_t ts,
+      autovector<std::shared_ptr<const SnapshotImpl>>& snapshots_to_release) {
+    auto ub = snapshots_.lower_bound(ts);
+    for (auto it = snapshots_.begin(); it != ub; ++it) {
+      snapshots_to_release.emplace_back(it->second);
+    }
+    snapshots_.erase(snapshots_.begin(), ub);
+  }
+
+ private:
+  std::map<uint64_t, std::shared_ptr<const SnapshotImpl>> snapshots_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_cache.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_cache.cc
new file mode 100644
index 0000000000..73fa07c6d5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_cache.cc
@@ -0,0 +1,710 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/table_cache.h"
+
+#include "db/dbformat.h"
+#include "db/range_tombstone_fragmenter.h"
+#include "db/snapshot_impl.h"
+#include "db/version_edit.h"
+#include "file/file_util.h"
+#include "file/filename.h"
+#include "file/random_access_file_reader.h"
+#include "monitoring/perf_context_imp.h"
+#include "rocksdb/advanced_options.h"
+#include "rocksdb/statistics.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "table/get_context.h"
+#include "table/internal_iterator.h"
+#include "table/iterator_wrapper.h"
+#include "table/multiget_context.h"
+#include "table/table_builder.h"
+#include "table/table_reader.h"
+#include "test_util/sync_point.h"
+#include "util/cast_util.h"
+#include "util/coding.h"
+#include "util/stop_watch.h"
+
+// Generate the regular and coroutine versions of some methods by
+// including table_cache_sync_and_async.h twice
+// Macros in the header will expand differently based on whether
+// WITH_COROUTINES or WITHOUT_COROUTINES is defined
+// clang-format off
+#define WITHOUT_COROUTINES
+#include "db/table_cache_sync_and_async.h"
+#undef WITHOUT_COROUTINES
+#define WITH_COROUTINES
+#include "db/table_cache_sync_and_async.h"
+#undef WITH_COROUTINES
+// clang-format on
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+static Slice GetSliceForFileNumber(const uint64_t* file_number) {
+  return Slice(reinterpret_cast<const char*>(file_number),
+               sizeof(*file_number));
+}
+
+
+void AppendVarint64(IterKey* key, uint64_t v) {
+  char buf[10];
+  auto ptr = EncodeVarint64(buf, v);
+  key->TrimAppend(key->Size(), buf, ptr - buf);
+}
+
+
+}  // anonymous namespace
+
+const int kLoadConcurency = 128;
+
+TableCache::TableCache(const ImmutableOptions& ioptions,
+                       const FileOptions* file_options, Cache* const cache,
+                       BlockCacheTracer* const block_cache_tracer,
+                       const std::shared_ptr<IOTracer>& io_tracer,
+                       const std::string& db_session_id)
+    : ioptions_(ioptions),
+      file_options_(*file_options),
+      cache_(cache),
+      immortal_tables_(false),
+      block_cache_tracer_(block_cache_tracer),
+      loader_mutex_(kLoadConcurency, kGetSliceNPHash64UnseededFnPtr),
+      io_tracer_(io_tracer),
+      db_session_id_(db_session_id) {
+  if (ioptions_.row_cache) {
+    // If the same cache is shared by multiple instances, we need to
+    // disambiguate its entries.
+    PutVarint64(&row_cache_id_, ioptions_.row_cache->NewId());
+  }
+}
+
+TableCache::~TableCache() {}
+
+Status TableCache::GetTableReader(
+    const ReadOptions& ro, const FileOptions& file_options,
+    const InternalKeyComparator& internal_comparator,
+    const FileMetaData& file_meta, bool sequential_mode, bool record_read_stats,
+    uint8_t block_protection_bytes_per_key, HistogramImpl* file_read_hist,
+    std::unique_ptr<TableReader>* table_reader,
+    const std::shared_ptr<const SliceTransform>& prefix_extractor,
+    bool skip_filters, int level, bool prefetch_index_and_filter_in_cache,
+    size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
+  std::string fname = TableFileName(
+      ioptions_.cf_paths, file_meta.fd.GetNumber(), file_meta.fd.GetPathId());
+  std::unique_ptr<FSRandomAccessFile> file;
+  FileOptions fopts = file_options;
+  fopts.temperature = file_temperature;
+  Status s = PrepareIOFromReadOptions(ro, ioptions_.clock, fopts.io_options);
+  TEST_SYNC_POINT_CALLBACK("TableCache::GetTableReader:BeforeOpenFile",
+                           const_cast<Status*>(&s));
+  if (s.ok()) {
+    s = ioptions_.fs->NewRandomAccessFile(fname, fopts, &file, nullptr);
+  }
+  if (s.ok()) {
+    RecordTick(ioptions_.stats, NO_FILE_OPENS);
+  } else if (s.IsPathNotFound()) {
+    fname = Rocks2LevelTableFileName(fname);
+    s = PrepareIOFromReadOptions(ro, ioptions_.clock, fopts.io_options);
+    if (s.ok()) {
+      s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file,
+                                            nullptr);
+    }
+    if (s.ok()) {
+      RecordTick(ioptions_.stats, NO_FILE_OPENS);
+    }
+  }
+
+  if (s.ok()) {
+    if (!sequential_mode && ioptions_.advise_random_on_open) {
+      file->Hint(FSRandomAccessFile::kRandom);
+    }
+    StopWatch sw(ioptions_.clock, ioptions_.stats, TABLE_OPEN_IO_MICROS);
+    std::unique_ptr<RandomAccessFileReader> file_reader(
+        new RandomAccessFileReader(
+            std::move(file), fname, ioptions_.clock, io_tracer_,
+            record_read_stats ? ioptions_.stats : nullptr, SST_READ_MICROS,
+            file_read_hist, ioptions_.rate_limiter.get(), ioptions_.listeners,
+            file_temperature, level == ioptions_.num_levels - 1));
+    UniqueId64x2 expected_unique_id;
+    if (ioptions_.verify_sst_unique_id_in_manifest) {
+      expected_unique_id = file_meta.unique_id;
+    } else {
+      expected_unique_id = kNullUniqueId64x2;  // null ID == no verification
+    }
+    s = ioptions_.table_factory->NewTableReader(
+        ro,
+        TableReaderOptions(
+            ioptions_, prefix_extractor, file_options, internal_comparator,
+            block_protection_bytes_per_key, skip_filters, immortal_tables_,
+            false /* force_direct_prefetch */, level, block_cache_tracer_,
+            max_file_size_for_l0_meta_pin, db_session_id_,
+            file_meta.fd.GetNumber(), expected_unique_id,
+            file_meta.fd.largest_seqno, file_meta.tail_size),
+        std::move(file_reader), file_meta.fd.GetFileSize(), table_reader,
+        prefetch_index_and_filter_in_cache);
+    TEST_SYNC_POINT("TableCache::GetTableReader:0");
+  }
+  return s;
+}
+
+Status TableCache::FindTable(
+    const ReadOptions& ro, const FileOptions& file_options,
+    const InternalKeyComparator& internal_comparator,
+    const FileMetaData& file_meta, TypedHandle** handle,
+    uint8_t block_protection_bytes_per_key,
+    const std::shared_ptr<const SliceTransform>& prefix_extractor,
+    const bool no_io, bool record_read_stats, HistogramImpl* file_read_hist,
+    bool skip_filters, int level, bool prefetch_index_and_filter_in_cache,
+    size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
+  PERF_TIMER_GUARD_WITH_CLOCK(find_table_nanos, ioptions_.clock);
+  uint64_t number = file_meta.fd.GetNumber();
+  Slice key = GetSliceForFileNumber(&number);
+  *handle = cache_.Lookup(key);
+  TEST_SYNC_POINT_CALLBACK("TableCache::FindTable:0",
+                           const_cast<bool*>(&no_io));
+
+  if (*handle == nullptr) {
+    if (no_io) {
+      return Status::Incomplete("Table not found in table_cache, no_io is set");
+    }
+    MutexLock load_lock(loader_mutex_.get(key));
+    // We check the cache again under loading mutex
+    *handle = cache_.Lookup(key);
+    if (*handle != nullptr) {
+      return Status::OK();
+    }
+
+    std::unique_ptr<TableReader> table_reader;
+    Status s = GetTableReader(ro, file_options, internal_comparator, file_meta,
+                              false /* sequential mode */, record_read_stats,
+                              block_protection_bytes_per_key, file_read_hist,
+                              &table_reader, prefix_extractor, skip_filters,
+                              level, prefetch_index_and_filter_in_cache,
+                              max_file_size_for_l0_meta_pin, file_temperature);
+    if (!s.ok()) {
+      assert(table_reader == nullptr);
+      RecordTick(ioptions_.stats, NO_FILE_ERRORS);
+      // We do not cache error results so that if the error is transient,
+      // or somebody repairs the file, we recover automatically.
+    } else {
+      s = cache_.Insert(key, table_reader.get(), 1, handle);
+      if (s.ok()) {
+        // Release ownership of table reader.
+        table_reader.release();
+      }
+    }
+    return s;
+  }
+  return Status::OK();
+}
+
+InternalIterator* TableCache::NewIterator(
+    const ReadOptions& options, const FileOptions& file_options,
+    const InternalKeyComparator& icomparator, const FileMetaData& file_meta,
+    RangeDelAggregator* range_del_agg,
+    const std::shared_ptr<const SliceTransform>& prefix_extractor,
+    TableReader** table_reader_ptr, HistogramImpl* file_read_hist,
+    TableReaderCaller caller, Arena* arena, bool skip_filters, int level,
+    size_t max_file_size_for_l0_meta_pin,
+    const InternalKey* smallest_compaction_key,
+    const InternalKey* largest_compaction_key, bool allow_unprepared_value,
+    uint8_t block_protection_bytes_per_key,
+    TruncatedRangeDelIterator** range_del_iter) {
+  PERF_TIMER_GUARD(new_table_iterator_nanos);
+
+  Status s;
+  TableReader* table_reader = nullptr;
+  TypedHandle* handle = nullptr;
+  if (table_reader_ptr != nullptr) {
+    *table_reader_ptr = nullptr;
+  }
+  bool for_compaction = caller == TableReaderCaller::kCompaction;
+  auto& fd = file_meta.fd;
+  table_reader = fd.table_reader;
+  if (table_reader == nullptr) {
+    s = FindTable(options, file_options, icomparator, file_meta, &handle,
+                  block_protection_bytes_per_key, prefix_extractor,
+                  options.read_tier == kBlockCacheTier /* no_io */,
+                  !for_compaction /* record_read_stats */, file_read_hist,
+                  skip_filters, level,
+                  true /* prefetch_index_and_filter_in_cache */,
+                  max_file_size_for_l0_meta_pin, file_meta.temperature);
+    if (s.ok()) {
+      table_reader = cache_.Value(handle);
+    }
+  }
+  InternalIterator* result = nullptr;
+  if (s.ok()) {
+    if (options.table_filter &&
+        !options.table_filter(*table_reader->GetTableProperties())) {
+      result = NewEmptyInternalIterator<Slice>(arena);
+    } else {
+      result = table_reader->NewIterator(
+          options, prefix_extractor.get(), arena, skip_filters, caller,
+          file_options.compaction_readahead_size, allow_unprepared_value);
+    }
+    if (handle != nullptr) {
+      cache_.RegisterReleaseAsCleanup(handle, *result);
+      handle = nullptr;  // prevent from releasing below
+    }
+
+    if (for_compaction) {
+      table_reader->SetupForCompaction();
+    }
+    if (table_reader_ptr != nullptr) {
+      *table_reader_ptr = table_reader;
+    }
+  }
+  if (s.ok() && !options.ignore_range_deletions) {
+    if (range_del_iter != nullptr) {
+      auto new_range_del_iter =
+          table_reader->NewRangeTombstoneIterator(options);
+      if (new_range_del_iter == nullptr || new_range_del_iter->empty()) {
+        delete new_range_del_iter;
+        *range_del_iter = nullptr;
+      } else {
+        *range_del_iter = new TruncatedRangeDelIterator(
+            std::unique_ptr<FragmentedRangeTombstoneIterator>(
+                new_range_del_iter),
+            &icomparator, &file_meta.smallest, &file_meta.largest);
+      }
+    }
+    if (range_del_agg != nullptr) {
+      if (range_del_agg->AddFile(fd.GetNumber())) {
+        std::unique_ptr<FragmentedRangeTombstoneIterator> new_range_del_iter(
+            static_cast<FragmentedRangeTombstoneIterator*>(
+                table_reader->NewRangeTombstoneIterator(options)));
+        if (new_range_del_iter != nullptr) {
+          s = new_range_del_iter->status();
+        }
+        if (s.ok()) {
+          const InternalKey* smallest = &file_meta.smallest;
+          const InternalKey* largest = &file_meta.largest;
+          if (smallest_compaction_key != nullptr) {
+            smallest = smallest_compaction_key;
+          }
+          if (largest_compaction_key != nullptr) {
+            largest = largest_compaction_key;
+          }
+          range_del_agg->AddTombstones(std::move(new_range_del_iter), smallest,
+                                       largest);
+        }
+      }
+    }
+  }
+
+  if (handle != nullptr) {
+    cache_.Release(handle);
+  }
+  if (!s.ok()) {
+    assert(result == nullptr);
+    result = NewErrorInternalIterator<Slice>(s, arena);
+  }
+  return result;
+}
+
+Status TableCache::GetRangeTombstoneIterator(
+    const ReadOptions& options,
+    const InternalKeyComparator& internal_comparator,
+    const FileMetaData& file_meta, uint8_t block_protection_bytes_per_key,
+    std::unique_ptr<FragmentedRangeTombstoneIterator>* out_iter) {
+  assert(out_iter);
+  const FileDescriptor& fd = file_meta.fd;
+  Status s;
+  TableReader* t = fd.table_reader;
+  TypedHandle* handle = nullptr;
+  if (t == nullptr) {
+    s = FindTable(options, file_options_, internal_comparator, file_meta,
+                  &handle, block_protection_bytes_per_key);
+    if (s.ok()) {
+      t = cache_.Value(handle);
+    }
+  }
+  if (s.ok()) {
+    // Note: NewRangeTombstoneIterator could return nullptr
+    out_iter->reset(t->NewRangeTombstoneIterator(options));
+  }
+  if (handle) {
+    if (*out_iter) {
+      cache_.RegisterReleaseAsCleanup(handle, **out_iter);
+    } else {
+      cache_.Release(handle);
+    }
+  }
+  return s;
+}
+
+void TableCache::CreateRowCacheKeyPrefix(const ReadOptions& options,
+                                         const FileDescriptor& fd,
+                                         const Slice& internal_key,
+                                         GetContext* get_context,
+                                         IterKey& row_cache_key) {
+  uint64_t fd_number = fd.GetNumber();
+  // We use the user key as cache key instead of the internal key,
+  // otherwise the whole cache would be invalidated every time the
+  // sequence key increases. However, to support caching snapshot
+  // reads, we append the sequence number (incremented by 1 to
+  // distinguish from 0) only in this case.
+  // If the snapshot is larger than the largest seqno in the file,
+  // all data should be exposed to the snapshot, so we treat it
+  // the same as there is no snapshot. The exception is that if
+  // a seq-checking callback is registered, some internal keys
+  // may still be filtered out.
+  uint64_t seq_no = 0;
+  // Maybe we can include the whole file ifsnapshot == fd.largest_seqno.
+  if (options.snapshot != nullptr &&
+      (get_context->has_callback() ||
+       static_cast_with_check<const SnapshotImpl>(options.snapshot)
+               ->GetSequenceNumber() <= fd.largest_seqno)) {
+    // We should consider to use options.snapshot->GetSequenceNumber()
+    // instead of GetInternalKeySeqno(k), which will make the code
+    // easier to understand.
+    seq_no = 1 + GetInternalKeySeqno(internal_key);
+  }
+
+  // Compute row cache key.
+  row_cache_key.TrimAppend(row_cache_key.Size(), row_cache_id_.data(),
+                           row_cache_id_.size());
+  AppendVarint64(&row_cache_key, fd_number);
+  AppendVarint64(&row_cache_key, seq_no);
+}
+
+bool TableCache::GetFromRowCache(const Slice& user_key, IterKey& row_cache_key,
+                                 size_t prefix_size, GetContext* get_context) {
+  bool found = false;
+
+  row_cache_key.TrimAppend(prefix_size, user_key.data(), user_key.size());
+  RowCacheInterface row_cache{ioptions_.row_cache.get()};
+  if (auto row_handle = row_cache.Lookup(row_cache_key.GetUserKey())) {
+    // Cleanable routine to release the cache entry
+    Cleanable value_pinner;
+    // If it comes here value is located on the cache.
+    // found_row_cache_entry points to the value on cache,
+    // and value_pinner has cleanup procedure for the cached entry.
+    // After replayGetContextLog() returns, get_context.pinnable_slice_
+    // will point to cache entry buffer (or a copy based on that) and
+    // cleanup routine under value_pinner will be delegated to
+    // get_context.pinnable_slice_. Cache entry is released when
+    // get_context.pinnable_slice_ is reset.
+    row_cache.RegisterReleaseAsCleanup(row_handle, value_pinner);
+    replayGetContextLog(*row_cache.Value(row_handle), user_key, get_context,
+                        &value_pinner);
+    RecordTick(ioptions_.stats, ROW_CACHE_HIT);
+    found = true;
+  } else {
+    RecordTick(ioptions_.stats, ROW_CACHE_MISS);
+  }
+  return found;
+}
+
+Status TableCache::Get(
+    const ReadOptions& options,
+    const InternalKeyComparator& internal_comparator,
+    const FileMetaData& file_meta, const Slice& k, GetContext* get_context,
+    uint8_t block_protection_bytes_per_key,
+    const std::shared_ptr<const SliceTransform>& prefix_extractor,
+    HistogramImpl* file_read_hist, bool skip_filters, int level,
+    size_t max_file_size_for_l0_meta_pin) {
+  auto& fd = file_meta.fd;
+  std::string* row_cache_entry = nullptr;
+  bool done = false;
+  IterKey row_cache_key;
+  std::string row_cache_entry_buffer;
+
+  // Check row cache if enabled. Since row cache does not currently store
+  // sequence numbers, we cannot use it if we need to fetch the sequence.
+  if (ioptions_.row_cache && !get_context->NeedToReadSequence()) {
+    auto user_key = ExtractUserKey(k);
+    CreateRowCacheKeyPrefix(options, fd, k, get_context, row_cache_key);
+    done = GetFromRowCache(user_key, row_cache_key, row_cache_key.Size(),
+                           get_context);
+    if (!done) {
+      row_cache_entry = &row_cache_entry_buffer;
+    }
+  }
+  Status s;
+  TableReader* t = fd.table_reader;
+  TypedHandle* handle = nullptr;
+  if (!done) {
+    assert(s.ok());
+    if (t == nullptr) {
+      s = FindTable(options, file_options_, internal_comparator, file_meta,
+                    &handle, block_protection_bytes_per_key, prefix_extractor,
+                    options.read_tier == kBlockCacheTier /* no_io */,
+                    true /* record_read_stats */, file_read_hist, skip_filters,
+                    level, true /* prefetch_index_and_filter_in_cache */,
+                    max_file_size_for_l0_meta_pin, file_meta.temperature);
+      if (s.ok()) {
+        t = cache_.Value(handle);
+      }
+    }
+    SequenceNumber* max_covering_tombstone_seq =
+        get_context->max_covering_tombstone_seq();
+    if (s.ok() && max_covering_tombstone_seq != nullptr &&
+        !options.ignore_range_deletions) {
+      std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
+          t->NewRangeTombstoneIterator(options));
+      if (range_del_iter != nullptr) {
+        SequenceNumber seq =
+            range_del_iter->MaxCoveringTombstoneSeqnum(ExtractUserKey(k));
+        if (seq > *max_covering_tombstone_seq) {
+          *max_covering_tombstone_seq = seq;
+          if (get_context->NeedTimestamp()) {
+            get_context->SetTimestampFromRangeTombstone(
+                range_del_iter->timestamp());
+          }
+        }
+      }
+    }
+    if (s.ok()) {
+      get_context->SetReplayLog(row_cache_entry);  // nullptr if no cache.
+      s = t->Get(options, k, get_context, prefix_extractor.get(), skip_filters);
+      get_context->SetReplayLog(nullptr);
+    } else if (options.read_tier == kBlockCacheTier && s.IsIncomplete()) {
+      // Couldn't find Table in cache but treat as kFound if no_io set
+      get_context->MarkKeyMayExist();
+      s = Status::OK();
+      done = true;
+    }
+  }
+
+  // Put the replay log in row cache only if something was found.
+  if (!done && s.ok() && row_cache_entry && !row_cache_entry->empty()) {
+    RowCacheInterface row_cache{ioptions_.row_cache.get()};
+    size_t charge = row_cache_entry->capacity() + sizeof(std::string);
+    auto row_ptr = new std::string(std::move(*row_cache_entry));
+    // If row cache is full, it's OK to continue.
+    row_cache.Insert(row_cache_key.GetUserKey(), row_ptr, charge)
+        .PermitUncheckedError();
+  }
+
+  if (handle != nullptr) {
+    cache_.Release(handle);
+  }
+  return s;
+}
+
+void TableCache::UpdateRangeTombstoneSeqnums(
+    const ReadOptions& options, TableReader* t,
+    MultiGetContext::Range& table_range) {
+  std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
+      t->NewRangeTombstoneIterator(options));
+  if (range_del_iter != nullptr) {
+    for (auto iter = table_range.begin(); iter != table_range.end(); ++iter) {
+      SequenceNumber* max_covering_tombstone_seq =
+          iter->get_context->max_covering_tombstone_seq();
+      SequenceNumber seq =
+          range_del_iter->MaxCoveringTombstoneSeqnum(iter->ukey_with_ts);
+      if (seq > *max_covering_tombstone_seq) {
+        *max_covering_tombstone_seq = seq;
+        if (iter->get_context->NeedTimestamp()) {
+          iter->get_context->SetTimestampFromRangeTombstone(
+              range_del_iter->timestamp());
+        }
+      }
+    }
+  }
+}
+
+Status TableCache::MultiGetFilter(
+    const ReadOptions& options,
+    const InternalKeyComparator& internal_comparator,
+    const FileMetaData& file_meta,
+    const std::shared_ptr<const SliceTransform>& prefix_extractor,
+    HistogramImpl* file_read_hist, int level,
+    MultiGetContext::Range* mget_range, TypedHandle** table_handle,
+    uint8_t block_protection_bytes_per_key) {
+  auto& fd = file_meta.fd;
+  IterKey row_cache_key;
+  std::string row_cache_entry_buffer;
+
+  // Check if we need to use the row cache. If yes, then we cannot do the
+  // filtering here, since the filtering needs to happen after the row cache
+  // lookup.
+  KeyContext& first_key = *mget_range->begin();
+  if (ioptions_.row_cache && !first_key.get_context->NeedToReadSequence()) {
+    return Status::NotSupported();
+  }
+  Status s;
+  TableReader* t = fd.table_reader;
+  TypedHandle* handle = nullptr;
+  MultiGetContext::Range tombstone_range(*mget_range, mget_range->begin(),
+                                         mget_range->end());
+  if (t == nullptr) {
+    s = FindTable(options, file_options_, internal_comparator, file_meta,
+                  &handle, block_protection_bytes_per_key, prefix_extractor,
+                  options.read_tier == kBlockCacheTier /* no_io */,
+                  true /* record_read_stats */, file_read_hist,
+                  /*skip_filters=*/false, level,
+                  true /* prefetch_index_and_filter_in_cache */,
+                  /*max_file_size_for_l0_meta_pin=*/0, file_meta.temperature);
+    if (s.ok()) {
+      t = cache_.Value(handle);
+    }
+    *table_handle = handle;
+  }
+  if (s.ok()) {
+    s = t->MultiGetFilter(options, prefix_extractor.get(), mget_range);
+  }
+  if (s.ok() && !options.ignore_range_deletions) {
+    // Update the range tombstone sequence numbers for the keys here
+    // as TableCache::MultiGet may or may not be called, and even if it
+    // is, it may be called with fewer keys in the rangedue to filtering.
+    UpdateRangeTombstoneSeqnums(options, t, tombstone_range);
+  }
+  if (mget_range->empty() && handle) {
+    cache_.Release(handle);
+    *table_handle = nullptr;
+  }
+
+  return s;
+}
+
+Status TableCache::GetTableProperties(
+    const FileOptions& file_options, const ReadOptions& read_options,
+    const InternalKeyComparator& internal_comparator,
+    const FileMetaData& file_meta,
+    std::shared_ptr<const TableProperties>* properties,
+    uint8_t block_protection_bytes_per_key,
+    const std::shared_ptr<const SliceTransform>& prefix_extractor, bool no_io) {
+  auto table_reader = file_meta.fd.table_reader;
+  // table already been pre-loaded?
+  if (table_reader) {
+    *properties = table_reader->GetTableProperties();
+
+    return Status::OK();
+  }
+
+  TypedHandle* table_handle = nullptr;
+  Status s = FindTable(read_options, file_options, internal_comparator,
+                       file_meta, &table_handle, block_protection_bytes_per_key,
+                       prefix_extractor, no_io);
+  if (!s.ok()) {
+    return s;
+  }
+  assert(table_handle);
+  auto table = cache_.Value(table_handle);
+  *properties = table->GetTableProperties();
+  cache_.Release(table_handle);
+  return s;
+}
+
+Status TableCache::ApproximateKeyAnchors(
+    const ReadOptions& ro, const InternalKeyComparator& internal_comparator,
+    const FileMetaData& file_meta, uint8_t block_protection_bytes_per_key,
+    std::vector<TableReader::Anchor>& anchors) {
+  Status s;
+  TableReader* t = file_meta.fd.table_reader;
+  TypedHandle* handle = nullptr;
+  if (t == nullptr) {
+    s = FindTable(ro, file_options_, internal_comparator, file_meta, &handle,
+                  block_protection_bytes_per_key);
+    if (s.ok()) {
+      t = cache_.Value(handle);
+    }
+  }
+  if (s.ok() && t != nullptr) {
+    s = t->ApproximateKeyAnchors(ro, anchors);
+  }
+  if (handle != nullptr) {
+    cache_.Release(handle);
+  }
+  return s;
+}
+
+size_t TableCache::GetMemoryUsageByTableReader(
+    const FileOptions& file_options, const ReadOptions& read_options,
+    const InternalKeyComparator& internal_comparator,
+    const FileMetaData& file_meta, uint8_t block_protection_bytes_per_key,
+    const std::shared_ptr<const SliceTransform>& prefix_extractor) {
+  auto table_reader = file_meta.fd.table_reader;
+  // table already been pre-loaded?
+  if (table_reader) {
+    return table_reader->ApproximateMemoryUsage();
+  }
+
+  TypedHandle* table_handle = nullptr;
+  Status s = FindTable(read_options, file_options, internal_comparator,
+                       file_meta, &table_handle, block_protection_bytes_per_key,
+                       prefix_extractor, true /* no_io */);
+  if (!s.ok()) {
+    return 0;
+  }
+  assert(table_handle);
+  auto table = cache_.Value(table_handle);
+  auto ret = table->ApproximateMemoryUsage();
+  cache_.Release(table_handle);
+  return ret;
+}
+
+void TableCache::Evict(Cache* cache, uint64_t file_number) {
+  cache->Erase(GetSliceForFileNumber(&file_number));
+}
+
+uint64_t TableCache::ApproximateOffsetOf(
+    const ReadOptions& read_options, const Slice& key,
+    const FileMetaData& file_meta, TableReaderCaller caller,
+    const InternalKeyComparator& internal_comparator,
+    uint8_t block_protection_bytes_per_key,
+    const std::shared_ptr<const SliceTransform>& prefix_extractor) {
+  uint64_t result = 0;
+  TableReader* table_reader = file_meta.fd.table_reader;
+  TypedHandle* table_handle = nullptr;
+  if (table_reader == nullptr) {
+    const bool for_compaction = (caller == TableReaderCaller::kCompaction);
+    Status s = FindTable(
+        read_options, file_options_, internal_comparator, file_meta,
+        &table_handle, block_protection_bytes_per_key, prefix_extractor,
+        false /* no_io */, !for_compaction /* record_read_stats */);
+    if (s.ok()) {
+      table_reader = cache_.Value(table_handle);
+    }
+  }
+
+  if (table_reader != nullptr) {
+    result = table_reader->ApproximateOffsetOf(read_options, key, caller);
+  }
+  if (table_handle != nullptr) {
+    cache_.Release(table_handle);
+  }
+
+  return result;
+}
+
+uint64_t TableCache::ApproximateSize(
+    const ReadOptions& read_options, const Slice& start, const Slice& end,
+    const FileMetaData& file_meta, TableReaderCaller caller,
+    const InternalKeyComparator& internal_comparator,
+    uint8_t block_protection_bytes_per_key,
+    const std::shared_ptr<const SliceTransform>& prefix_extractor) {
+  uint64_t result = 0;
+  TableReader* table_reader = file_meta.fd.table_reader;
+  TypedHandle* table_handle = nullptr;
+  if (table_reader == nullptr) {
+    const bool for_compaction = (caller == TableReaderCaller::kCompaction);
+    Status s = FindTable(
+        read_options, file_options_, internal_comparator, file_meta,
+        &table_handle, block_protection_bytes_per_key, prefix_extractor,
+        false /* no_io */, !for_compaction /* record_read_stats */);
+    if (s.ok()) {
+      table_reader = cache_.Value(table_handle);
+    }
+  }
+
+  if (table_reader != nullptr) {
+    result = table_reader->ApproximateSize(read_options, start, end, caller);
+  }
+  if (table_handle != nullptr) {
+    cache_.Release(table_handle);
+  }
+
+  return result;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_cache.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_cache.h
new file mode 100644
index 0000000000..41201eea8a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_cache.h
@@ -0,0 +1,283 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Thread-safe (provides internal synchronization)
+
+#pragma once
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "cache/typed_cache.h"
+#include "db/dbformat.h"
+#include "db/range_del_aggregator.h"
+#include "options/cf_options.h"
+#include "port/port.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
+#include "table/table_reader.h"
+#include "trace_replay/block_cache_tracer.h"
+#include "util/coro_utils.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Env;
+class Arena;
+struct FileDescriptor;
+class GetContext;
+class HistogramImpl;
+
+// Manages caching for TableReader objects for a column family. The actual
+// cache is allocated separately and passed to the constructor. TableCache
+// wraps around the underlying SST file readers by providing Get(),
+// MultiGet() and NewIterator() methods that hide the instantiation,
+// caching and access to the TableReader. The main purpose of this is
+// performance - by caching the TableReader, it avoids unnecessary file opens
+// and object allocation and instantiation. One exception is compaction, where
+// a new TableReader may be instantiated - see NewIterator() comments
+//
+// Another service provided by TableCache is managing the row cache - if the
+// DB is configured with a row cache, and the lookup key is present in the row
+// cache, lookup is very fast. The row cache is obtained from
+// ioptions.row_cache
+class TableCache {
+ public:
+  TableCache(const ImmutableOptions& ioptions,
+             const FileOptions* storage_options, Cache* cache,
+             BlockCacheTracer* const block_cache_tracer,
+             const std::shared_ptr<IOTracer>& io_tracer,
+             const std::string& db_session_id);
+  ~TableCache();
+
+  // Cache interface for table cache
+  using CacheInterface =
+      BasicTypedCacheInterface<TableReader, CacheEntryRole::kMisc>;
+  using TypedHandle = CacheInterface::TypedHandle;
+
+  // Cache interface for row cache
+  using RowCacheInterface =
+      BasicTypedCacheInterface<std::string, CacheEntryRole::kMisc>;
+  using RowHandle = RowCacheInterface::TypedHandle;
+
+  // Return an iterator for the specified file number (the corresponding
+  // file length must be exactly "file_size" bytes).  If "table_reader_ptr"
+  // is non-nullptr, also sets "*table_reader_ptr" to point to the Table object
+  // underlying the returned iterator, or nullptr if no Table object underlies
+  // the returned iterator.  The returned "*table_reader_ptr" object is owned
+  // by the cache and should not be deleted, and is valid for as long as the
+  // returned iterator is live.
+  // If !options.ignore_range_deletions, and range_del_iter is non-nullptr,
+  // then range_del_iter is set to a TruncatedRangeDelIterator for range
+  // tombstones in the SST file corresponding to the specified file number. The
+  // upper/lower bounds for the TruncatedRangeDelIterator are set to the SST
+  // file's boundary.
+  // @param options Must outlive the returned iterator.
+  // @param range_del_agg If non-nullptr, adds range deletions to the
+  //    aggregator. If an error occurs, returns it in a NewErrorInternalIterator
+  // @param for_compaction If true, a new TableReader may be allocated (but
+  //                       not cached), depending on the CF options
+  // @param skip_filters Disables loading/accessing the filter block
+  // @param level The level this table is at, -1 for "not set / don't know"
+  InternalIterator* NewIterator(
+      const ReadOptions& options, const FileOptions& toptions,
+      const InternalKeyComparator& internal_comparator,
+      const FileMetaData& file_meta, RangeDelAggregator* range_del_agg,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor,
+      TableReader** table_reader_ptr, HistogramImpl* file_read_hist,
+      TableReaderCaller caller, Arena* arena, bool skip_filters, int level,
+      size_t max_file_size_for_l0_meta_pin,
+      const InternalKey* smallest_compaction_key,
+      const InternalKey* largest_compaction_key, bool allow_unprepared_value,
+      uint8_t protection_bytes_per_key,
+      TruncatedRangeDelIterator** range_del_iter = nullptr);
+
+  // If a seek to internal key "k" in specified file finds an entry,
+  // call get_context->SaveValue() repeatedly until
+  // it returns false. As a side effect, it will insert the TableReader
+  // into the cache and potentially evict another entry
+  // @param get_context Context for get operation. The result of the lookup
+  //                    can be retrieved by calling get_context->State()
+  // @param file_read_hist If non-nullptr, the file reader statistics are
+  //                       recorded
+  // @param skip_filters Disables loading/accessing the filter block
+  // @param level The level this table is at, -1 for "not set / don't know"
+  Status Get(
+      const ReadOptions& options,
+      const InternalKeyComparator& internal_comparator,
+      const FileMetaData& file_meta, const Slice& k, GetContext* get_context,
+      uint8_t block_protection_bytes_per_key,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
+      HistogramImpl* file_read_hist = nullptr, bool skip_filters = false,
+      int level = -1, size_t max_file_size_for_l0_meta_pin = 0);
+
+  // Return the range delete tombstone iterator of the file specified by
+  // `file_meta`.
+  Status GetRangeTombstoneIterator(
+      const ReadOptions& options,
+      const InternalKeyComparator& internal_comparator,
+      const FileMetaData& file_meta, uint8_t block_protection_bytes_per_key,
+      std::unique_ptr<FragmentedRangeTombstoneIterator>* out_iter);
+
+  // Call table reader's MultiGetFilter to use the bloom filter to filter out
+  // keys. Returns Status::NotSupported() if row cache needs to be checked.
+  // If the table cache is looked up to get the table reader, the cache handle
+  // is returned in table_handle. This handle should be passed back to
+  // MultiGet() so it can be released.
+  Status MultiGetFilter(
+      const ReadOptions& options,
+      const InternalKeyComparator& internal_comparator,
+      const FileMetaData& file_meta,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor,
+      HistogramImpl* file_read_hist, int level,
+      MultiGetContext::Range* mget_range, TypedHandle** table_handle,
+      uint8_t block_protection_bytes_per_key);
+
+  // If a seek to internal key "k" in specified file finds an entry,
+  // call get_context->SaveValue() repeatedly until
+  // it returns false. As a side effect, it will insert the TableReader
+  // into the cache and potentially evict another entry
+  // @param mget_range Pointer to the structure describing a batch of keys to
+  //                   be looked up in this table file. The result is stored
+  //                   in the embedded GetContext
+  // @param skip_filters Disables loading/accessing the filter block
+  // @param level The level this table is at, -1 for "not set / don't know"
+  DECLARE_SYNC_AND_ASYNC(
+      Status, MultiGet, const ReadOptions& options,
+      const InternalKeyComparator& internal_comparator,
+      const FileMetaData& file_meta, const MultiGetContext::Range* mget_range,
+      uint8_t block_protection_bytes_per_key,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
+      HistogramImpl* file_read_hist = nullptr, bool skip_filters = false,
+      bool skip_range_deletions = false, int level = -1,
+      TypedHandle* table_handle = nullptr);
+
+  // Evict any entry for the specified file number
+  static void Evict(Cache* cache, uint64_t file_number);
+
+  // Find table reader
+  // @param skip_filters Disables loading/accessing the filter block
+  // @param level == -1 means not specified
+  Status FindTable(
+      const ReadOptions& ro, const FileOptions& toptions,
+      const InternalKeyComparator& internal_comparator,
+      const FileMetaData& file_meta, TypedHandle**,
+      uint8_t block_protection_bytes_per_key,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
+      const bool no_io = false, bool record_read_stats = true,
+      HistogramImpl* file_read_hist = nullptr, bool skip_filters = false,
+      int level = -1, bool prefetch_index_and_filter_in_cache = true,
+      size_t max_file_size_for_l0_meta_pin = 0,
+      Temperature file_temperature = Temperature::kUnknown);
+
+  // Get the table properties of a given table.
+  // @no_io: indicates if we should load table to the cache if it is not present
+  //         in table cache yet.
+  // @returns: `properties` will be reset on success. Please note that we will
+  //            return Status::Incomplete() if table is not present in cache and
+  //            we set `no_io` to be true.
+  Status GetTableProperties(
+      const FileOptions& toptions, const ReadOptions& read_options,
+      const InternalKeyComparator& internal_comparator,
+      const FileMetaData& file_meta,
+      std::shared_ptr<const TableProperties>* properties,
+      uint8_t block_protection_bytes_per_key,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
+      bool no_io = false);
+
+  Status ApproximateKeyAnchors(const ReadOptions& ro,
+                               const InternalKeyComparator& internal_comparator,
+                               const FileMetaData& file_meta,
+                               uint8_t block_protection_bytes_per_key,
+                               std::vector<TableReader::Anchor>& anchors);
+
+  // Return total memory usage of the table reader of the file.
+  // 0 if table reader of the file is not loaded.
+  size_t GetMemoryUsageByTableReader(
+      const FileOptions& toptions, const ReadOptions& read_options,
+      const InternalKeyComparator& internal_comparator,
+      const FileMetaData& file_meta, uint8_t block_protection_bytes_per_key,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr);
+
+  // Returns approximated offset of a key in a file represented by fd.
+  uint64_t ApproximateOffsetOf(
+      const ReadOptions& read_options, const Slice& key,
+      const FileMetaData& file_meta, TableReaderCaller caller,
+      const InternalKeyComparator& internal_comparator,
+      uint8_t block_protection_bytes_per_key,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr);
+
+  // Returns approximated data size between start and end keys in a file
+  // represented by fd (the start key must not be greater than the end key).
+  uint64_t ApproximateSize(
+      const ReadOptions& read_options, const Slice& start, const Slice& end,
+      const FileMetaData& file_meta, TableReaderCaller caller,
+      const InternalKeyComparator& internal_comparator,
+      uint8_t block_protection_bytes_per_key,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr);
+
+  CacheInterface& get_cache() { return cache_; }
+
+  // Capacity of the backing Cache that indicates infinite TableCache capacity.
+  // For example when max_open_files is -1 we set the backing Cache to this.
+  static const int kInfiniteCapacity = 0x400000;
+
+  // The tables opened with this TableCache will be immortal, i.e., their
+  // lifetime is as long as that of the DB.
+  void SetTablesAreImmortal() {
+    if (cache_.get()->GetCapacity() >= kInfiniteCapacity) {
+      immortal_tables_ = true;
+    }
+  }
+
+ private:
+  // Build a table reader
+  Status GetTableReader(
+      const ReadOptions& ro, const FileOptions& file_options,
+      const InternalKeyComparator& internal_comparator,
+      const FileMetaData& file_meta, bool sequential_mode,
+      bool record_read_stats, uint8_t block_protection_bytes_per_key,
+      HistogramImpl* file_read_hist, std::unique_ptr<TableReader>* table_reader,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
+      bool skip_filters = false, int level = -1,
+      bool prefetch_index_and_filter_in_cache = true,
+      size_t max_file_size_for_l0_meta_pin = 0,
+      Temperature file_temperature = Temperature::kUnknown);
+
+  // Update the max_covering_tombstone_seq in the GetContext for each key based
+  // on the range deletions in the table
+  void UpdateRangeTombstoneSeqnums(const ReadOptions& options, TableReader* t,
+                                   MultiGetContext::Range& table_range);
+
+  // Create a key prefix for looking up the row cache. The prefix is of the
+  // format row_cache_id + fd_number + seq_no. Later, the user key can be
+  // appended to form the full key
+  void CreateRowCacheKeyPrefix(const ReadOptions& options,
+                               const FileDescriptor& fd,
+                               const Slice& internal_key,
+                               GetContext* get_context, IterKey& row_cache_key);
+
+  // Helper function to lookup the row cache for a key. It appends the
+  // user key to row_cache_key at offset prefix_size
+  bool GetFromRowCache(const Slice& user_key, IterKey& row_cache_key,
+                       size_t prefix_size, GetContext* get_context);
+
+  const ImmutableOptions& ioptions_;
+  const FileOptions& file_options_;
+  CacheInterface cache_;
+  std::string row_cache_id_;
+  bool immortal_tables_;
+  BlockCacheTracer* const block_cache_tracer_;
+  Striped<port::Mutex, Slice> loader_mutex_;
+  std::shared_ptr<IOTracer> io_tracer_;
+  std::string db_session_id_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_cache_sync_and_async.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_cache_sync_and_async.h
new file mode 100644
index 0000000000..df8e9337f6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_cache_sync_and_async.h
@@ -0,0 +1,130 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/coro_utils.h"
+
+#if defined(WITHOUT_COROUTINES) || \
+    (defined(USE_COROUTINES) && defined(WITH_COROUTINES))
+namespace ROCKSDB_NAMESPACE {
+
+#if defined(WITHOUT_COROUTINES)
+#endif
+
+// Batched version of TableCache::MultiGet.
+DEFINE_SYNC_AND_ASYNC(Status, TableCache::MultiGet)
+(const ReadOptions& options, const InternalKeyComparator& internal_comparator,
+ const FileMetaData& file_meta, const MultiGetContext::Range* mget_range,
+ uint8_t block_protection_bytes_per_key,
+ const std::shared_ptr<const SliceTransform>& prefix_extractor,
+ HistogramImpl* file_read_hist, bool skip_filters, bool skip_range_deletions,
+ int level, TypedHandle* handle) {
+  auto& fd = file_meta.fd;
+  Status s;
+  TableReader* t = fd.table_reader;
+  MultiGetRange table_range(*mget_range, mget_range->begin(),
+                            mget_range->end());
+  if (handle != nullptr && t == nullptr) {
+    t = cache_.Value(handle);
+  }
+  autovector<std::string, MultiGetContext::MAX_BATCH_SIZE> row_cache_entries;
+  IterKey row_cache_key;
+  size_t row_cache_key_prefix_size = 0;
+  KeyContext& first_key = *table_range.begin();
+  bool lookup_row_cache =
+      ioptions_.row_cache && !first_key.get_context->NeedToReadSequence();
+
+  // Check row cache if enabled. Since row cache does not currently store
+  // sequence numbers, we cannot use it if we need to fetch the sequence.
+  if (lookup_row_cache) {
+    GetContext* first_context = first_key.get_context;
+    CreateRowCacheKeyPrefix(options, fd, first_key.ikey, first_context,
+                            row_cache_key);
+    row_cache_key_prefix_size = row_cache_key.Size();
+
+    for (auto miter = table_range.begin(); miter != table_range.end();
+         ++miter) {
+      const Slice& user_key = miter->ukey_with_ts;
+
+      GetContext* get_context = miter->get_context;
+
+      if (GetFromRowCache(user_key, row_cache_key, row_cache_key_prefix_size,
+                          get_context)) {
+        table_range.SkipKey(miter);
+      } else {
+        row_cache_entries.emplace_back();
+        get_context->SetReplayLog(&(row_cache_entries.back()));
+      }
+    }
+  }
+
+  // Check that table_range is not empty. Its possible all keys may have been
+  // found in the row cache and thus the range may now be empty
+  if (s.ok() && !table_range.empty()) {
+    if (t == nullptr) {
+      assert(handle == nullptr);
+      s = FindTable(options, file_options_, internal_comparator, file_meta,
+                    &handle, block_protection_bytes_per_key, prefix_extractor,
+                    options.read_tier == kBlockCacheTier /* no_io */,
+                    true /* record_read_stats */, file_read_hist, skip_filters,
+                    level, true /* prefetch_index_and_filter_in_cache */,
+                    0 /*max_file_size_for_l0_meta_pin*/, file_meta.temperature);
+      TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet:FindTable", &s);
+      if (s.ok()) {
+        t = cache_.Value(handle);
+        assert(t);
+      }
+    }
+    if (s.ok() && !options.ignore_range_deletions && !skip_range_deletions) {
+      UpdateRangeTombstoneSeqnums(options, t, table_range);
+    }
+    if (s.ok()) {
+      CO_AWAIT(t->MultiGet)
+      (options, &table_range, prefix_extractor.get(), skip_filters);
+    } else if (options.read_tier == kBlockCacheTier && s.IsIncomplete()) {
+      for (auto iter = table_range.begin(); iter != table_range.end(); ++iter) {
+        Status* status = iter->s;
+        if (status->IsIncomplete()) {
+          // Couldn't find Table in cache but treat as kFound if no_io set
+          iter->get_context->MarkKeyMayExist();
+          s = Status::OK();
+        }
+      }
+    }
+  }
+
+  if (lookup_row_cache) {
+    size_t row_idx = 0;
+    RowCacheInterface row_cache{ioptions_.row_cache.get()};
+
+    for (auto miter = table_range.begin(); miter != table_range.end();
+         ++miter) {
+      std::string& row_cache_entry = row_cache_entries[row_idx++];
+      const Slice& user_key = miter->ukey_with_ts;
+      ;
+      GetContext* get_context = miter->get_context;
+
+      get_context->SetReplayLog(nullptr);
+      // Compute row cache key.
+      row_cache_key.TrimAppend(row_cache_key_prefix_size, user_key.data(),
+                               user_key.size());
+      // Put the replay log in row cache only if something was found.
+      if (s.ok() && !row_cache_entry.empty()) {
+        size_t charge = row_cache_entry.capacity() + sizeof(std::string);
+        auto row_ptr = new std::string(std::move(row_cache_entry));
+        // If row cache is full, it's OK.
+        row_cache.Insert(row_cache_key.GetUserKey(), row_ptr, charge)
+            .PermitUncheckedError();
+      }
+    }
+  }
+
+  if (handle != nullptr) {
+    cache_.Release(handle);
+  }
+  CO_RETURN s;
+}
+}  // namespace ROCKSDB_NAMESPACE
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_properties_collector.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_properties_collector.cc
new file mode 100644
index 0000000000..edb9a1b63a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_properties_collector.cc
@@ -0,0 +1,74 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/table_properties_collector.h"
+
+#include "db/dbformat.h"
+#include "util/coding.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+uint64_t GetUint64Property(const UserCollectedProperties& props,
+                           const std::string& property_name,
+                           bool* property_present) {
+  auto pos = props.find(property_name);
+  if (pos == props.end()) {
+    *property_present = false;
+    return 0;
+  }
+  Slice raw = pos->second;
+  uint64_t val = 0;
+  *property_present = true;
+  return GetVarint64(&raw, &val) ? val : 0;
+}
+
+}  // anonymous namespace
+
+Status UserKeyTablePropertiesCollector::InternalAdd(const Slice& key,
+                                                    const Slice& value,
+                                                    uint64_t file_size) {
+  ParsedInternalKey ikey;
+  Status s = ParseInternalKey(key, &ikey, false /* log_err_key */);  // TODO
+  if (!s.ok()) {
+    return s;
+  }
+
+  return collector_->AddUserKey(ikey.user_key, value, GetEntryType(ikey.type),
+                                ikey.sequence, file_size);
+}
+
+void UserKeyTablePropertiesCollector::BlockAdd(
+    uint64_t block_uncomp_bytes, uint64_t block_compressed_bytes_fast,
+    uint64_t block_compressed_bytes_slow) {
+  return collector_->BlockAdd(block_uncomp_bytes, block_compressed_bytes_fast,
+                              block_compressed_bytes_slow);
+}
+
+Status UserKeyTablePropertiesCollector::Finish(
+    UserCollectedProperties* properties) {
+  return collector_->Finish(properties);
+}
+
+UserCollectedProperties UserKeyTablePropertiesCollector::GetReadableProperties()
+    const {
+  return collector_->GetReadableProperties();
+}
+
+uint64_t GetDeletedKeys(const UserCollectedProperties& props) {
+  bool property_present_ignored;
+  return GetUint64Property(props, TablePropertiesNames::kDeletedKeys,
+                           &property_present_ignored);
+}
+
+uint64_t GetMergeOperands(const UserCollectedProperties& props,
+                          bool* property_present) {
+  return GetUint64Property(props, TablePropertiesNames::kMergeOperands,
+                           property_present);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_properties_collector.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_properties_collector.h
new file mode 100644
index 0000000000..968115c3d7
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/table_properties_collector.h
@@ -0,0 +1,177 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file defines a collection of statistics collectors.
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "db/dbformat.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/table_properties.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Base class for internal table properties collector.
+class IntTblPropCollector {
+ public:
+  virtual ~IntTblPropCollector() {}
+  virtual Status Finish(UserCollectedProperties* properties) = 0;
+
+  virtual const char* Name() const = 0;
+
+  // @params key    the user key that is inserted into the table.
+  // @params value  the value that is inserted into the table.
+  virtual Status InternalAdd(const Slice& key, const Slice& value,
+                             uint64_t file_size) = 0;
+
+  virtual void BlockAdd(uint64_t block_uncomp_bytes,
+                        uint64_t block_compressed_bytes_fast,
+                        uint64_t block_compressed_bytes_slow) = 0;
+
+  virtual UserCollectedProperties GetReadableProperties() const = 0;
+
+  virtual bool NeedCompact() const { return false; }
+};
+
+// Factory for internal table properties collector.
+class IntTblPropCollectorFactory {
+ public:
+  virtual ~IntTblPropCollectorFactory() {}
+  // has to be thread-safe
+  virtual IntTblPropCollector* CreateIntTblPropCollector(
+      uint32_t column_family_id, int level_at_creation) = 0;
+
+  // The name of the properties collector can be used for debugging purpose.
+  virtual const char* Name() const = 0;
+};
+
+using IntTblPropCollectorFactories =
+    std::vector<std::unique_ptr<IntTblPropCollectorFactory>>;
+
+// When rocksdb creates a new table, it will encode all "user keys" into
+// "internal keys", which contains meta information of a given entry.
+//
+// This class extracts user key from the encoded internal key when Add() is
+// invoked.
+class UserKeyTablePropertiesCollector : public IntTblPropCollector {
+ public:
+  // transfer of ownership
+  explicit UserKeyTablePropertiesCollector(TablePropertiesCollector* collector)
+      : collector_(collector) {}
+
+  virtual ~UserKeyTablePropertiesCollector() {}
+
+  virtual Status InternalAdd(const Slice& key, const Slice& value,
+                             uint64_t file_size) override;
+
+  virtual void BlockAdd(uint64_t block_uncomp_bytes,
+                        uint64_t block_compressed_bytes_fast,
+                        uint64_t block_compressed_bytes_slow) override;
+
+  virtual Status Finish(UserCollectedProperties* properties) override;
+
+  virtual const char* Name() const override { return collector_->Name(); }
+
+  UserCollectedProperties GetReadableProperties() const override;
+
+  virtual bool NeedCompact() const override {
+    return collector_->NeedCompact();
+  }
+
+ protected:
+  std::unique_ptr<TablePropertiesCollector> collector_;
+};
+
+class UserKeyTablePropertiesCollectorFactory
+    : public IntTblPropCollectorFactory {
+ public:
+  explicit UserKeyTablePropertiesCollectorFactory(
+      std::shared_ptr<TablePropertiesCollectorFactory> user_collector_factory)
+      : user_collector_factory_(user_collector_factory) {}
+  virtual IntTblPropCollector* CreateIntTblPropCollector(
+      uint32_t column_family_id, int level_at_creation) override {
+    TablePropertiesCollectorFactory::Context context;
+    context.column_family_id = column_family_id;
+    context.level_at_creation = level_at_creation;
+    return new UserKeyTablePropertiesCollector(
+        user_collector_factory_->CreateTablePropertiesCollector(context));
+  }
+
+  virtual const char* Name() const override {
+    return user_collector_factory_->Name();
+  }
+
+ private:
+  std::shared_ptr<TablePropertiesCollectorFactory> user_collector_factory_;
+};
+
+// When rocksdb creates a newtable, it will encode all "user keys" into
+// "internal keys". This class collects min/max timestamp from the encoded
+// internal key when Add() is invoked.
+//
+// @param cmp  the user comparator to compare the timestamps in internal key.
+class TimestampTablePropertiesCollector : public IntTblPropCollector {
+ public:
+  explicit TimestampTablePropertiesCollector(const Comparator* cmp)
+      : cmp_(cmp),
+        timestamp_min_(kDisableUserTimestamp),
+        timestamp_max_(kDisableUserTimestamp) {}
+
+  Status InternalAdd(const Slice& key, const Slice& /* value */,
+                     uint64_t /* file_size */) override {
+    auto user_key = ExtractUserKey(key);
+    assert(cmp_ && cmp_->timestamp_size() > 0);
+    if (user_key.size() < cmp_->timestamp_size()) {
+      return Status::Corruption(
+          "User key size mismatch when comparing to timestamp size.");
+    }
+    auto timestamp_in_key =
+        ExtractTimestampFromUserKey(user_key, cmp_->timestamp_size());
+    if (timestamp_max_ == kDisableUserTimestamp ||
+        cmp_->CompareTimestamp(timestamp_in_key, timestamp_max_) > 0) {
+      timestamp_max_.assign(timestamp_in_key.data(), timestamp_in_key.size());
+    }
+    if (timestamp_min_ == kDisableUserTimestamp ||
+        cmp_->CompareTimestamp(timestamp_min_, timestamp_in_key) > 0) {
+      timestamp_min_.assign(timestamp_in_key.data(), timestamp_in_key.size());
+    }
+    return Status::OK();
+  }
+
+  void BlockAdd(uint64_t /* block_uncomp_bytes */,
+                uint64_t /* block_compressed_bytes_fast */,
+                uint64_t /* block_compressed_bytes_slow */) override {
+    return;
+  }
+
+  Status Finish(UserCollectedProperties* properties) override {
+    // timestamp is empty is table is empty
+    assert(timestamp_min_.size() == timestamp_max_.size() &&
+           (timestamp_min_.empty() ||
+            timestamp_max_.size() == cmp_->timestamp_size()));
+    properties->insert({"rocksdb.timestamp_min", timestamp_min_});
+    properties->insert({"rocksdb.timestamp_max", timestamp_max_});
+    return Status::OK();
+  }
+
+  const char* Name() const override {
+    return "TimestampTablePropertiesCollector";
+  }
+
+  UserCollectedProperties GetReadableProperties() const override {
+    return {{"rocksdb.timestamp_min", Slice(timestamp_min_).ToString(true)},
+            {"rocksdb.timestamp_max", Slice(timestamp_max_).ToString(true)}};
+  }
+
+ protected:
+  const Comparator* const cmp_;
+  std::string timestamp_min_;
+  std::string timestamp_max_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/transaction_log_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/transaction_log_impl.cc
new file mode 100644
index 0000000000..8841b8cf3b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/transaction_log_impl.cc
@@ -0,0 +1,296 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "db/transaction_log_impl.h"
+
+#include <cinttypes>
+
+#include "db/write_batch_internal.h"
+#include "file/sequence_file_reader.h"
+#include "util/defer.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+TransactionLogIteratorImpl::TransactionLogIteratorImpl(
+    const std::string& dir, const ImmutableDBOptions* options,
+    const TransactionLogIterator::ReadOptions& read_options,
+    const EnvOptions& soptions, const SequenceNumber seq,
+    std::unique_ptr<VectorLogPtr> files, VersionSet const* const versions,
+    const bool seq_per_batch, const std::shared_ptr<IOTracer>& io_tracer)
+    : dir_(dir),
+      options_(options),
+      read_options_(read_options),
+      soptions_(soptions),
+      starting_sequence_number_(seq),
+      files_(std::move(files)),
+      versions_(versions),
+      seq_per_batch_(seq_per_batch),
+      io_tracer_(io_tracer),
+      started_(false),
+      is_valid_(false),
+      current_file_index_(0),
+      current_batch_seq_(0),
+      current_last_seq_(0) {
+  assert(files_ != nullptr);
+  assert(versions_ != nullptr);
+  assert(!seq_per_batch_);
+  current_status_.PermitUncheckedError();  // Clear on start
+  reporter_.env = options_->env;
+  reporter_.info_log = options_->info_log.get();
+  SeekToStartSequence();  // Seek till starting sequence
+}
+
+Status TransactionLogIteratorImpl::OpenLogFile(
+    const LogFile* log_file,
+    std::unique_ptr<SequentialFileReader>* file_reader) {
+  FileSystemPtr fs(options_->fs, io_tracer_);
+  std::unique_ptr<FSSequentialFile> file;
+  std::string fname;
+  Status s;
+  EnvOptions optimized_env_options = fs->OptimizeForLogRead(soptions_);
+  if (log_file->Type() == kArchivedLogFile) {
+    fname = ArchivedLogFileName(dir_, log_file->LogNumber());
+    s = fs->NewSequentialFile(fname, optimized_env_options, &file, nullptr);
+  } else {
+    fname = LogFileName(dir_, log_file->LogNumber());
+    s = fs->NewSequentialFile(fname, optimized_env_options, &file, nullptr);
+    if (!s.ok()) {
+      //  If cannot open file in DB directory.
+      //  Try the archive dir, as it could have moved in the meanwhile.
+      fname = ArchivedLogFileName(dir_, log_file->LogNumber());
+      s = fs->NewSequentialFile(fname, optimized_env_options, &file, nullptr);
+    }
+  }
+  if (s.ok()) {
+    file_reader->reset(new SequentialFileReader(std::move(file), fname,
+                                                io_tracer_, options_->listeners,
+                                                options_->rate_limiter.get()));
+  }
+  return s;
+}
+
+BatchResult TransactionLogIteratorImpl::GetBatch() {
+  assert(is_valid_);  //  cannot call in a non valid state.
+  BatchResult result;
+  result.sequence = current_batch_seq_;
+  result.writeBatchPtr = std::move(current_batch_);
+  return result;
+}
+
+Status TransactionLogIteratorImpl::status() { return current_status_; }
+
+bool TransactionLogIteratorImpl::Valid() { return started_ && is_valid_; }
+
+bool TransactionLogIteratorImpl::RestrictedRead(Slice* record) {
+  // Don't read if no more complete entries to read from logs
+  if (current_last_seq_ >= versions_->LastSequence()) {
+    return false;
+  }
+  return current_log_reader_->ReadRecord(record, &scratch_);
+}
+
+void TransactionLogIteratorImpl::SeekToStartSequence(uint64_t start_file_index,
+                                                     bool strict) {
+  Slice record;
+  started_ = false;
+  is_valid_ = false;
+  // Check invariant of TransactionLogIterator when SeekToStartSequence()
+  // succeeds.
+  const Defer defer([this]() {
+    if (is_valid_) {
+      assert(current_status_.ok());
+      if (starting_sequence_number_ > current_batch_seq_) {
+        assert(current_batch_seq_ < current_last_seq_);
+        assert(current_last_seq_ >= starting_sequence_number_);
+      }
+    }
+  });
+  if (files_->size() <= start_file_index) {
+    return;
+  } else if (!current_status_.ok()) {
+    return;
+  }
+  Status s =
+      OpenLogReader(files_->at(static_cast<size_t>(start_file_index)).get());
+  if (!s.ok()) {
+    current_status_ = s;
+    reporter_.Info(current_status_.ToString().c_str());
+    return;
+  }
+  while (RestrictedRead(&record)) {
+    if (record.size() < WriteBatchInternal::kHeader) {
+      reporter_.Corruption(record.size(),
+                           Status::Corruption("very small log record"));
+      continue;
+    }
+    UpdateCurrentWriteBatch(record);
+    if (current_last_seq_ >= starting_sequence_number_) {
+      if (strict && current_batch_seq_ != starting_sequence_number_) {
+        current_status_ = Status::Corruption(
+            "Gap in sequence number. Could not "
+            "seek to required sequence number");
+        reporter_.Info(current_status_.ToString().c_str());
+        return;
+      } else if (strict) {
+        reporter_.Info(
+            "Could seek required sequence number. Iterator will "
+            "continue.");
+      }
+      is_valid_ = true;
+      started_ = true;  // set started_ as we could seek till starting sequence
+      return;
+    } else {
+      is_valid_ = false;
+    }
+  }
+
+  // Could not find start sequence in first file. Normally this must be the
+  // only file. Otherwise log the error and let the iterator return next entry
+  // If strict is set, we want to seek exactly till the start sequence and it
+  // should have been present in the file we scanned above
+  if (strict) {
+    current_status_ = Status::Corruption(
+        "Gap in sequence number. Could not "
+        "seek to required sequence number");
+    reporter_.Info(current_status_.ToString().c_str());
+  } else if (files_->size() != 1) {
+    current_status_ = Status::Corruption(
+        "Start sequence was not found, "
+        "skipping to the next available");
+    reporter_.Info(current_status_.ToString().c_str());
+    // Let NextImpl find the next available entry. started_ remains false
+    // because we don't want to check for gaps while moving to start sequence
+    NextImpl(true);
+  }
+}
+
+void TransactionLogIteratorImpl::Next() {
+  if (!current_status_.ok()) {
+    return;
+  }
+  return NextImpl(false);
+}
+
+void TransactionLogIteratorImpl::NextImpl(bool internal) {
+  Slice record;
+  is_valid_ = false;
+  if (!internal && !started_) {
+    // Runs every time until we can seek to the start sequence
+    SeekToStartSequence();
+  }
+  while (true) {
+    assert(current_log_reader_);
+    if (current_log_reader_->IsEOF()) {
+      current_log_reader_->UnmarkEOF();
+    }
+    while (RestrictedRead(&record)) {
+      if (record.size() < WriteBatchInternal::kHeader) {
+        reporter_.Corruption(record.size(),
+                             Status::Corruption("very small log record"));
+        continue;
+      } else {
+        // started_ should be true if called by application
+        assert(internal || started_);
+        // started_ should be false if called internally
+        assert(!internal || !started_);
+        UpdateCurrentWriteBatch(record);
+        if (internal && !started_) {
+          started_ = true;
+        }
+        return;
+      }
+    }
+
+    // Open the next file
+    if (current_file_index_ < files_->size() - 1) {
+      ++current_file_index_;
+      Status s = OpenLogReader(files_->at(current_file_index_).get());
+      if (!s.ok()) {
+        is_valid_ = false;
+        current_status_ = s;
+        return;
+      }
+    } else {
+      is_valid_ = false;
+      if (current_last_seq_ == versions_->LastSequence()) {
+        current_status_ = Status::OK();
+      } else {
+        const char* msg = "Create a new iterator to fetch the new tail.";
+        current_status_ = Status::TryAgain(msg);
+      }
+      return;
+    }
+  }
+}
+
+bool TransactionLogIteratorImpl::IsBatchExpected(
+    const WriteBatch* batch, const SequenceNumber expected_seq) {
+  assert(batch);
+  SequenceNumber batchSeq = WriteBatchInternal::Sequence(batch);
+  if (batchSeq != expected_seq) {
+    char buf[200];
+    snprintf(buf, sizeof(buf),
+             "Discontinuity in log records. Got seq=%" PRIu64
+             ", Expected seq=%" PRIu64 ", Last flushed seq=%" PRIu64
+             ".Log iterator will reseek the correct batch.",
+             batchSeq, expected_seq, versions_->LastSequence());
+    reporter_.Info(buf);
+    return false;
+  }
+  return true;
+}
+
+void TransactionLogIteratorImpl::UpdateCurrentWriteBatch(const Slice& record) {
+  std::unique_ptr<WriteBatch> batch(new WriteBatch());
+  Status s = WriteBatchInternal::SetContents(batch.get(), record);
+  s.PermitUncheckedError();  // TODO: What should we do with this error?
+
+  SequenceNumber expected_seq = current_last_seq_ + 1;
+  // If the iterator has started, then confirm that we get continuous batches
+  if (started_ && !IsBatchExpected(batch.get(), expected_seq)) {
+    // Seek to the batch having expected sequence number
+    if (expected_seq < files_->at(current_file_index_)->StartSequence()) {
+      // Expected batch must lie in the previous log file
+      // Avoid underflow.
+      if (current_file_index_ != 0) {
+        current_file_index_--;
+      }
+    }
+    starting_sequence_number_ = expected_seq;
+    // currentStatus_ will be set to Ok if reseek succeeds
+    // Note: this is still ok in seq_pre_batch_ && two_write_queuesp_ mode
+    // that allows gaps in the WAL since it will still skip over the gap.
+    current_status_ = Status::NotFound("Gap in sequence numbers");
+    // In seq_per_batch_ mode, gaps in the seq are possible so the strict mode
+    // should be disabled
+    return SeekToStartSequence(current_file_index_, !seq_per_batch_);
+  }
+
+  current_batch_seq_ = WriteBatchInternal::Sequence(batch.get());
+  assert(!seq_per_batch_);
+  current_last_seq_ =
+      current_batch_seq_ + WriteBatchInternal::Count(batch.get()) - 1;
+  // currentBatchSeq_ can only change here
+  assert(current_last_seq_ <= versions_->LastSequence());
+
+  current_batch_ = std::move(batch);
+  is_valid_ = true;
+  current_status_ = Status::OK();
+}
+
+Status TransactionLogIteratorImpl::OpenLogReader(const LogFile* log_file) {
+  std::unique_ptr<SequentialFileReader> file;
+  Status s = OpenLogFile(log_file, &file);
+  if (!s.ok()) {
+    return s;
+  }
+  assert(file);
+  current_log_reader_.reset(
+      new log::Reader(options_->info_log, std::move(file), &reporter_,
+                      read_options_.verify_checksums_, log_file->LogNumber()));
+  return Status::OK();
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/transaction_log_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/transaction_log_impl.h
new file mode 100644
index 0000000000..6568de23f6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/transaction_log_impl.h
@@ -0,0 +1,128 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <vector>
+
+#include "db/log_reader.h"
+#include "db/version_set.h"
+#include "file/filename.h"
+#include "logging/logging.h"
+#include "options/db_options.h"
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "rocksdb/transaction_log.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class LogFileImpl : public LogFile {
+ public:
+  LogFileImpl(uint64_t logNum, WalFileType logType, SequenceNumber startSeq,
+              uint64_t sizeBytes)
+      : logNumber_(logNum),
+        type_(logType),
+        startSequence_(startSeq),
+        sizeFileBytes_(sizeBytes) {}
+
+  std::string PathName() const override {
+    if (type_ == kArchivedLogFile) {
+      return ArchivedLogFileName("", logNumber_);
+    }
+    return LogFileName("", logNumber_);
+  }
+
+  uint64_t LogNumber() const override { return logNumber_; }
+
+  WalFileType Type() const override { return type_; }
+
+  SequenceNumber StartSequence() const override { return startSequence_; }
+
+  uint64_t SizeFileBytes() const override { return sizeFileBytes_; }
+
+  bool operator<(const LogFile& that) const {
+    return LogNumber() < that.LogNumber();
+  }
+
+ private:
+  uint64_t logNumber_;
+  WalFileType type_;
+  SequenceNumber startSequence_;
+  uint64_t sizeFileBytes_;
+};
+
+class TransactionLogIteratorImpl : public TransactionLogIterator {
+ public:
+  TransactionLogIteratorImpl(
+      const std::string& dir, const ImmutableDBOptions* options,
+      const TransactionLogIterator::ReadOptions& read_options,
+      const EnvOptions& soptions, const SequenceNumber seqNum,
+      std::unique_ptr<VectorLogPtr> files, VersionSet const* const versions,
+      const bool seq_per_batch, const std::shared_ptr<IOTracer>& io_tracer);
+
+  virtual bool Valid() override;
+
+  virtual void Next() override;
+
+  virtual Status status() override;
+
+  virtual BatchResult GetBatch() override;
+
+ private:
+  const std::string& dir_;
+  const ImmutableDBOptions* options_;
+  const TransactionLogIterator::ReadOptions read_options_;
+  const EnvOptions& soptions_;
+  SequenceNumber starting_sequence_number_;
+  std::unique_ptr<VectorLogPtr> files_;
+  // Used only to get latest seq. num
+  // TODO(icanadi) can this be just a callback?
+  VersionSet const* const versions_;
+  const bool seq_per_batch_;
+  std::shared_ptr<IOTracer> io_tracer_;
+
+  // State variables
+  bool started_;
+  bool is_valid_;  // not valid when it starts of.
+  Status current_status_;
+  size_t current_file_index_;
+  std::unique_ptr<WriteBatch> current_batch_;
+  std::unique_ptr<log::Reader> current_log_reader_;
+  std::string scratch_;
+  Status OpenLogFile(const LogFile* log_file,
+                     std::unique_ptr<SequentialFileReader>* file);
+
+  struct LogReporter : public log::Reader::Reporter {
+    Env* env;
+    Logger* info_log;
+    virtual void Corruption(size_t bytes, const Status& s) override {
+      ROCKS_LOG_ERROR(info_log, "dropping %" ROCKSDB_PRIszt " bytes; %s", bytes,
+                      s.ToString().c_str());
+    }
+    virtual void Info(const char* s) { ROCKS_LOG_INFO(info_log, "%s", s); }
+  } reporter_;
+
+  SequenceNumber
+      current_batch_seq_;  // sequence number at start of current batch
+  SequenceNumber current_last_seq_;  // last sequence in the current batch
+  // Reads from transaction log only if the writebatch record has been written
+  bool RestrictedRead(Slice* record);
+  // Seeks to starting_sequence_number_ reading from start_file_index in files_.
+  // If strict is set, then must get a batch starting with
+  // starting_sequence_number_.
+  void SeekToStartSequence(uint64_t start_file_index = 0, bool strict = false);
+  // Implementation of Next. SeekToStartSequence calls it internally with
+  // internal=true to let it find next entry even if it has to jump gaps because
+  // the iterator may start off from the first available entry but promises to
+  // be continuous after that
+  void NextImpl(bool internal = false);
+  // Check if batch is expected, else return false
+  bool IsBatchExpected(const WriteBatch* batch, SequenceNumber expected_seq);
+  // Update current batch if a continuous batch is found.
+  void UpdateCurrentWriteBatch(const Slice& record);
+  Status OpenLogReader(const LogFile* file);
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/trim_history_scheduler.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/trim_history_scheduler.cc
new file mode 100644
index 0000000000..d7ca0899f1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/trim_history_scheduler.cc
@@ -0,0 +1,54 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/trim_history_scheduler.h"
+
+#include <cassert>
+
+#include "db/column_family.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void TrimHistoryScheduler::ScheduleWork(ColumnFamilyData* cfd) {
+  std::lock_guard<std::mutex> lock(checking_mutex_);
+  cfd->Ref();
+  cfds_.push_back(cfd);
+  is_empty_.store(false, std::memory_order_relaxed);
+}
+
+ColumnFamilyData* TrimHistoryScheduler::TakeNextColumnFamily() {
+  std::lock_guard<std::mutex> lock(checking_mutex_);
+  while (true) {
+    if (cfds_.empty()) {
+      return nullptr;
+    }
+    ColumnFamilyData* cfd = cfds_.back();
+    cfds_.pop_back();
+    if (cfds_.empty()) {
+      is_empty_.store(true, std::memory_order_relaxed);
+    }
+
+    if (!cfd->IsDropped()) {
+      // success
+      return cfd;
+    }
+    cfd->UnrefAndTryDelete();
+  }
+}
+
+bool TrimHistoryScheduler::Empty() {
+  bool is_empty = is_empty_.load(std::memory_order_relaxed);
+  return is_empty;
+}
+
+void TrimHistoryScheduler::Clear() {
+  ColumnFamilyData* cfd;
+  while ((cfd = TakeNextColumnFamily()) != nullptr) {
+    cfd->UnrefAndTryDelete();
+  }
+  assert(Empty());
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/trim_history_scheduler.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/trim_history_scheduler.h
new file mode 100644
index 0000000000..252802a7ae
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/trim_history_scheduler.h
@@ -0,0 +1,46 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <stdint.h>
+
+#include <atomic>
+#include <mutex>
+
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class ColumnFamilyData;
+
+// Similar to FlushScheduler, TrimHistoryScheduler is a FIFO queue that keeps
+// track of column families whose flushed immutable memtables may need to be
+// removed (aka trimmed). The actual trimming may be slightly delayed. Due to
+// the use of the mutex and atomic variable, ScheduleWork,
+// TakeNextColumnFamily, and, Empty can be called concurrently.
+class TrimHistoryScheduler {
+ public:
+  TrimHistoryScheduler() : is_empty_(true) {}
+
+  // When a column family needs history trimming, add cfd to the FIFO queue
+  void ScheduleWork(ColumnFamilyData* cfd);
+
+  // Remove the column family from the queue, the caller is responsible for
+  // calling `MemtableList::TrimHistory`
+  ColumnFamilyData* TakeNextColumnFamily();
+
+  bool Empty();
+
+  void Clear();
+
+  // Not on critical path, use mutex to ensure thread safety
+ private:
+  std::atomic<bool> is_empty_;
+  autovector<ColumnFamilyData*> cfds_;
+  std::mutex checking_mutex_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_builder.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_builder.cc
new file mode 100644
index 0000000000..d87ef94494
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_builder.cc
@@ -0,0 +1,1426 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/version_builder.h"
+
+#include <algorithm>
+#include <atomic>
+#include <cinttypes>
+#include <functional>
+#include <map>
+#include <memory>
+#include <set>
+#include <sstream>
+#include <thread>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "cache/cache_reservation_manager.h"
+#include "db/blob/blob_file_meta.h"
+#include "db/dbformat.h"
+#include "db/internal_stats.h"
+#include "db/table_cache.h"
+#include "db/version_edit.h"
+#include "db/version_set.h"
+#include "port/port.h"
+#include "table/table_reader.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class VersionBuilder::Rep {
+  class NewestFirstByEpochNumber {
+   private:
+    inline static const NewestFirstBySeqNo seqno_cmp;
+
+   public:
+    bool operator()(const FileMetaData* lhs, const FileMetaData* rhs) const {
+      assert(lhs);
+      assert(rhs);
+
+      if (lhs->epoch_number != rhs->epoch_number) {
+        return lhs->epoch_number > rhs->epoch_number;
+      } else {
+        return seqno_cmp(lhs, rhs);
+      }
+    }
+  };
+  class BySmallestKey {
+   public:
+    explicit BySmallestKey(const InternalKeyComparator* cmp) : cmp_(cmp) {}
+
+    bool operator()(const FileMetaData* lhs, const FileMetaData* rhs) const {
+      assert(lhs);
+      assert(rhs);
+      assert(cmp_);
+
+      const int r = cmp_->Compare(lhs->smallest, rhs->smallest);
+      if (r != 0) {
+        return (r < 0);
+      }
+
+      // Break ties by file number
+      return (lhs->fd.GetNumber() < rhs->fd.GetNumber());
+    }
+
+   private:
+    const InternalKeyComparator* cmp_;
+  };
+
+  struct LevelState {
+    std::unordered_set<uint64_t> deleted_files;
+    // Map from file number to file meta data.
+    std::unordered_map<uint64_t, FileMetaData*> added_files;
+  };
+
+  // A class that represents the accumulated changes (like additional garbage or
+  // newly linked/unlinked SST files) for a given blob file after applying a
+  // series of VersionEdits.
+  class BlobFileMetaDataDelta {
+   public:
+    bool IsEmpty() const {
+      return !additional_garbage_count_ && !additional_garbage_bytes_ &&
+             newly_linked_ssts_.empty() && newly_unlinked_ssts_.empty();
+    }
+
+    uint64_t GetAdditionalGarbageCount() const {
+      return additional_garbage_count_;
+    }
+
+    uint64_t GetAdditionalGarbageBytes() const {
+      return additional_garbage_bytes_;
+    }
+
+    const std::unordered_set<uint64_t>& GetNewlyLinkedSsts() const {
+      return newly_linked_ssts_;
+    }
+
+    const std::unordered_set<uint64_t>& GetNewlyUnlinkedSsts() const {
+      return newly_unlinked_ssts_;
+    }
+
+    void AddGarbage(uint64_t count, uint64_t bytes) {
+      additional_garbage_count_ += count;
+      additional_garbage_bytes_ += bytes;
+    }
+
+    void LinkSst(uint64_t sst_file_number) {
+      assert(newly_linked_ssts_.find(sst_file_number) ==
+             newly_linked_ssts_.end());
+
+      // Reconcile with newly unlinked SSTs on the fly. (Note: an SST can be
+      // linked to and unlinked from the same blob file in the case of a trivial
+      // move.)
+      auto it = newly_unlinked_ssts_.find(sst_file_number);
+
+      if (it != newly_unlinked_ssts_.end()) {
+        newly_unlinked_ssts_.erase(it);
+      } else {
+        newly_linked_ssts_.emplace(sst_file_number);
+      }
+    }
+
+    void UnlinkSst(uint64_t sst_file_number) {
+      assert(newly_unlinked_ssts_.find(sst_file_number) ==
+             newly_unlinked_ssts_.end());
+
+      // Reconcile with newly linked SSTs on the fly. (Note: an SST can be
+      // linked to and unlinked from the same blob file in the case of a trivial
+      // move.)
+      auto it = newly_linked_ssts_.find(sst_file_number);
+
+      if (it != newly_linked_ssts_.end()) {
+        newly_linked_ssts_.erase(it);
+      } else {
+        newly_unlinked_ssts_.emplace(sst_file_number);
+      }
+    }
+
+   private:
+    uint64_t additional_garbage_count_ = 0;
+    uint64_t additional_garbage_bytes_ = 0;
+    std::unordered_set<uint64_t> newly_linked_ssts_;
+    std::unordered_set<uint64_t> newly_unlinked_ssts_;
+  };
+
+  // A class that represents the state of a blob file after applying a series of
+  // VersionEdits. In addition to the resulting state, it also contains the
+  // delta (see BlobFileMetaDataDelta above). The resulting state can be used to
+  // identify obsolete blob files, while the delta makes it possible to
+  // efficiently detect trivial moves.
+  class MutableBlobFileMetaData {
+   public:
+    // To be used for brand new blob files
+    explicit MutableBlobFileMetaData(
+        std::shared_ptr<SharedBlobFileMetaData>&& shared_meta)
+        : shared_meta_(std::move(shared_meta)) {}
+
+    // To be used for pre-existing blob files
+    explicit MutableBlobFileMetaData(
+        const std::shared_ptr<BlobFileMetaData>& meta)
+        : shared_meta_(meta->GetSharedMeta()),
+          linked_ssts_(meta->GetLinkedSsts()),
+          garbage_blob_count_(meta->GetGarbageBlobCount()),
+          garbage_blob_bytes_(meta->GetGarbageBlobBytes()) {}
+
+    const std::shared_ptr<SharedBlobFileMetaData>& GetSharedMeta() const {
+      return shared_meta_;
+    }
+
+    uint64_t GetBlobFileNumber() const {
+      assert(shared_meta_);
+      return shared_meta_->GetBlobFileNumber();
+    }
+
+    bool HasDelta() const { return !delta_.IsEmpty(); }
+
+    const std::unordered_set<uint64_t>& GetLinkedSsts() const {
+      return linked_ssts_;
+    }
+
+    uint64_t GetGarbageBlobCount() const { return garbage_blob_count_; }
+
+    uint64_t GetGarbageBlobBytes() const { return garbage_blob_bytes_; }
+
+    bool AddGarbage(uint64_t count, uint64_t bytes) {
+      assert(shared_meta_);
+
+      if (garbage_blob_count_ + count > shared_meta_->GetTotalBlobCount() ||
+          garbage_blob_bytes_ + bytes > shared_meta_->GetTotalBlobBytes()) {
+        return false;
+      }
+
+      delta_.AddGarbage(count, bytes);
+
+      garbage_blob_count_ += count;
+      garbage_blob_bytes_ += bytes;
+
+      return true;
+    }
+
+    void LinkSst(uint64_t sst_file_number) {
+      delta_.LinkSst(sst_file_number);
+
+      assert(linked_ssts_.find(sst_file_number) == linked_ssts_.end());
+      linked_ssts_.emplace(sst_file_number);
+    }
+
+    void UnlinkSst(uint64_t sst_file_number) {
+      delta_.UnlinkSst(sst_file_number);
+
+      assert(linked_ssts_.find(sst_file_number) != linked_ssts_.end());
+      linked_ssts_.erase(sst_file_number);
+    }
+
+   private:
+    std::shared_ptr<SharedBlobFileMetaData> shared_meta_;
+    // Accumulated changes
+    BlobFileMetaDataDelta delta_;
+    // Resulting state after applying the changes
+    BlobFileMetaData::LinkedSsts linked_ssts_;
+    uint64_t garbage_blob_count_ = 0;
+    uint64_t garbage_blob_bytes_ = 0;
+  };
+
+  const FileOptions& file_options_;
+  const ImmutableCFOptions* const ioptions_;
+  TableCache* table_cache_;
+  VersionStorageInfo* base_vstorage_;
+  VersionSet* version_set_;
+  int num_levels_;
+  LevelState* levels_;
+  // Store sizes of levels larger than num_levels_. We do this instead of
+  // storing them in levels_ to avoid regression in case there are no files
+  // on invalid levels. The version is not consistent if in the end the files
+  // on invalid levels don't cancel out.
+  std::unordered_map<int, size_t> invalid_level_sizes_;
+  // Whether there are invalid new files or invalid deletion on levels larger
+  // than num_levels_.
+  bool has_invalid_levels_;
+  // Current levels of table files affected by additions/deletions.
+  std::unordered_map<uint64_t, int> table_file_levels_;
+  // Current compact cursors that should be changed after the last compaction
+  std::unordered_map<int, InternalKey> updated_compact_cursors_;
+  NewestFirstByEpochNumber level_zero_cmp_by_epochno_;
+  NewestFirstBySeqNo level_zero_cmp_by_seqno_;
+  BySmallestKey level_nonzero_cmp_;
+
+  // Mutable metadata objects for all blob files affected by the series of
+  // version edits.
+  std::map<uint64_t, MutableBlobFileMetaData> mutable_blob_file_metas_;
+
+  std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr_;
+
+ public:
+  Rep(const FileOptions& file_options, const ImmutableCFOptions* ioptions,
+      TableCache* table_cache, VersionStorageInfo* base_vstorage,
+      VersionSet* version_set,
+      std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr)
+      : file_options_(file_options),
+        ioptions_(ioptions),
+        table_cache_(table_cache),
+        base_vstorage_(base_vstorage),
+        version_set_(version_set),
+        num_levels_(base_vstorage->num_levels()),
+        has_invalid_levels_(false),
+        level_nonzero_cmp_(base_vstorage_->InternalComparator()),
+        file_metadata_cache_res_mgr_(file_metadata_cache_res_mgr) {
+    assert(ioptions_);
+
+    levels_ = new LevelState[num_levels_];
+  }
+
+  ~Rep() {
+    for (int level = 0; level < num_levels_; level++) {
+      const auto& added = levels_[level].added_files;
+      for (auto& pair : added) {
+        UnrefFile(pair.second);
+      }
+    }
+
+    delete[] levels_;
+  }
+
+  void UnrefFile(FileMetaData* f) {
+    f->refs--;
+    if (f->refs <= 0) {
+      if (f->table_reader_handle) {
+        assert(table_cache_ != nullptr);
+        // NOTE: have to release in raw cache interface to avoid using a
+        // TypedHandle for FileMetaData::table_reader_handle
+        table_cache_->get_cache().get()->Release(f->table_reader_handle);
+        f->table_reader_handle = nullptr;
+      }
+
+      if (file_metadata_cache_res_mgr_) {
+        Status s = file_metadata_cache_res_mgr_->UpdateCacheReservation(
+            f->ApproximateMemoryUsage(), false /* increase */);
+        s.PermitUncheckedError();
+      }
+      delete f;
+    }
+  }
+
+  // Mapping used for checking the consistency of links between SST files and
+  // blob files. It is built using the forward links (table file -> blob file),
+  // and is subsequently compared with the inverse mapping stored in the
+  // BlobFileMetaData objects.
+  using ExpectedLinkedSsts =
+      std::unordered_map<uint64_t, BlobFileMetaData::LinkedSsts>;
+
+  static void UpdateExpectedLinkedSsts(
+      uint64_t table_file_number, uint64_t blob_file_number,
+      ExpectedLinkedSsts* expected_linked_ssts) {
+    assert(expected_linked_ssts);
+
+    if (blob_file_number == kInvalidBlobFileNumber) {
+      return;
+    }
+
+    (*expected_linked_ssts)[blob_file_number].emplace(table_file_number);
+  }
+
+  template <typename Checker>
+  Status CheckConsistencyDetailsForLevel(
+      const VersionStorageInfo* vstorage, int level, Checker checker,
+      const std::string& sync_point,
+      ExpectedLinkedSsts* expected_linked_ssts) const {
+#ifdef NDEBUG
+    (void)sync_point;
+#endif
+
+    assert(vstorage);
+    assert(level >= 0 && level < num_levels_);
+    assert(expected_linked_ssts);
+
+    const auto& level_files = vstorage->LevelFiles(level);
+
+    if (level_files.empty()) {
+      return Status::OK();
+    }
+
+    assert(level_files[0]);
+    UpdateExpectedLinkedSsts(level_files[0]->fd.GetNumber(),
+                             level_files[0]->oldest_blob_file_number,
+                             expected_linked_ssts);
+
+    for (size_t i = 1; i < level_files.size(); ++i) {
+      assert(level_files[i]);
+      UpdateExpectedLinkedSsts(level_files[i]->fd.GetNumber(),
+                               level_files[i]->oldest_blob_file_number,
+                               expected_linked_ssts);
+
+      auto lhs = level_files[i - 1];
+      auto rhs = level_files[i];
+
+#ifndef NDEBUG
+      auto pair = std::make_pair(&lhs, &rhs);
+      TEST_SYNC_POINT_CALLBACK(sync_point, &pair);
+#endif
+
+      const Status s = checker(lhs, rhs);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+
+    return Status::OK();
+  }
+
+  // Make sure table files are sorted correctly and that the links between
+  // table files and blob files are consistent.
+  Status CheckConsistencyDetails(const VersionStorageInfo* vstorage) const {
+    assert(vstorage);
+
+    ExpectedLinkedSsts expected_linked_ssts;
+
+    if (num_levels_ > 0) {
+      const InternalKeyComparator* const icmp = vstorage->InternalComparator();
+      EpochNumberRequirement epoch_number_requirement =
+          vstorage->GetEpochNumberRequirement();
+      assert(icmp);
+      // Check L0
+      {
+        auto l0_checker = [this, epoch_number_requirement, icmp](
+                              const FileMetaData* lhs,
+                              const FileMetaData* rhs) {
+          assert(lhs);
+          assert(rhs);
+
+          if (epoch_number_requirement ==
+              EpochNumberRequirement::kMightMissing) {
+            if (!level_zero_cmp_by_seqno_(lhs, rhs)) {
+              std::ostringstream oss;
+              oss << "L0 files are not sorted properly: files #"
+                  << lhs->fd.GetNumber() << " with seqnos (largest, smallest) "
+                  << lhs->fd.largest_seqno << " , " << lhs->fd.smallest_seqno
+                  << ", #" << rhs->fd.GetNumber()
+                  << " with seqnos (largest, smallest) "
+                  << rhs->fd.largest_seqno << " , " << rhs->fd.smallest_seqno;
+              return Status::Corruption("VersionBuilder", oss.str());
+            }
+          } else if (epoch_number_requirement ==
+                     EpochNumberRequirement::kMustPresent) {
+            if (lhs->epoch_number == rhs->epoch_number) {
+              bool range_overlapped =
+                  icmp->Compare(lhs->smallest, rhs->largest) <= 0 &&
+                  icmp->Compare(lhs->largest, rhs->smallest) >= 0;
+
+              if (range_overlapped) {
+                std::ostringstream oss;
+                oss << "L0 files of same epoch number but overlapping range #"
+                    << lhs->fd.GetNumber()
+                    << " , smallest key: " << lhs->smallest.DebugString(true)
+                    << " , largest key: " << lhs->largest.DebugString(true)
+                    << " , epoch number: " << lhs->epoch_number << " vs. file #"
+                    << rhs->fd.GetNumber()
+                    << " , smallest key: " << rhs->smallest.DebugString(true)
+                    << " , largest key: " << rhs->largest.DebugString(true)
+                    << " , epoch number: " << rhs->epoch_number;
+                return Status::Corruption("VersionBuilder", oss.str());
+              }
+            }
+
+            if (!level_zero_cmp_by_epochno_(lhs, rhs)) {
+              std::ostringstream oss;
+              oss << "L0 files are not sorted properly: files #"
+                  << lhs->fd.GetNumber() << " with epoch number "
+                  << lhs->epoch_number << ", #" << rhs->fd.GetNumber()
+                  << " with epoch number " << rhs->epoch_number;
+              return Status::Corruption("VersionBuilder", oss.str());
+            }
+          }
+
+          return Status::OK();
+        };
+
+        const Status s = CheckConsistencyDetailsForLevel(
+            vstorage, /* level */ 0, l0_checker,
+            "VersionBuilder::CheckConsistency0", &expected_linked_ssts);
+        if (!s.ok()) {
+          return s;
+        }
+      }
+
+      // Check L1 and up
+
+      for (int level = 1; level < num_levels_; ++level) {
+        auto checker = [this, level, icmp](const FileMetaData* lhs,
+                                           const FileMetaData* rhs) {
+          assert(lhs);
+          assert(rhs);
+
+          if (!level_nonzero_cmp_(lhs, rhs)) {
+            std::ostringstream oss;
+            oss << 'L' << level << " files are not sorted properly: files #"
+                << lhs->fd.GetNumber() << ", #" << rhs->fd.GetNumber();
+
+            return Status::Corruption("VersionBuilder", oss.str());
+          }
+
+          // Make sure there is no overlap in level
+          if (icmp->Compare(lhs->largest, rhs->smallest) >= 0) {
+            std::ostringstream oss;
+            oss << 'L' << level << " has overlapping ranges: file #"
+                << lhs->fd.GetNumber()
+                << " largest key: " << lhs->largest.DebugString(true)
+                << " vs. file #" << rhs->fd.GetNumber()
+                << " smallest key: " << rhs->smallest.DebugString(true);
+
+            return Status::Corruption("VersionBuilder", oss.str());
+          }
+
+          return Status::OK();
+        };
+
+        const Status s = CheckConsistencyDetailsForLevel(
+            vstorage, level, checker, "VersionBuilder::CheckConsistency1",
+            &expected_linked_ssts);
+        if (!s.ok()) {
+          return s;
+        }
+      }
+    }
+
+    // Make sure that all blob files in the version have non-garbage data and
+    // the links between them and the table files are consistent.
+    const auto& blob_files = vstorage->GetBlobFiles();
+    for (const auto& blob_file_meta : blob_files) {
+      assert(blob_file_meta);
+
+      const uint64_t blob_file_number = blob_file_meta->GetBlobFileNumber();
+
+      if (blob_file_meta->GetGarbageBlobCount() >=
+          blob_file_meta->GetTotalBlobCount()) {
+        std::ostringstream oss;
+        oss << "Blob file #" << blob_file_number
+            << " consists entirely of garbage";
+
+        return Status::Corruption("VersionBuilder", oss.str());
+      }
+
+      if (blob_file_meta->GetLinkedSsts() !=
+          expected_linked_ssts[blob_file_number]) {
+        std::ostringstream oss;
+        oss << "Links are inconsistent between table files and blob file #"
+            << blob_file_number;
+
+        return Status::Corruption("VersionBuilder", oss.str());
+      }
+    }
+
+    Status ret_s;
+    TEST_SYNC_POINT_CALLBACK("VersionBuilder::CheckConsistencyBeforeReturn",
+                             &ret_s);
+    return ret_s;
+  }
+
+  Status CheckConsistency(const VersionStorageInfo* vstorage) const {
+    assert(vstorage);
+
+    // Always run consistency checks in debug build
+#ifdef NDEBUG
+    if (!vstorage->force_consistency_checks()) {
+      return Status::OK();
+    }
+#endif
+    Status s = CheckConsistencyDetails(vstorage);
+    if (s.IsCorruption() && s.getState()) {
+      // Make it clear the error is due to force_consistency_checks = 1 or
+      // debug build
+#ifdef NDEBUG
+      auto prefix = "force_consistency_checks";
+#else
+      auto prefix = "force_consistency_checks(DEBUG)";
+#endif
+      s = Status::Corruption(prefix, s.getState());
+    } else {
+      // was only expecting corruption with message, or OK
+      assert(s.ok());
+    }
+    return s;
+  }
+
+  bool CheckConsistencyForNumLevels() const {
+    // Make sure there are no files on or beyond num_levels().
+    if (has_invalid_levels_) {
+      return false;
+    }
+
+    for (const auto& pair : invalid_level_sizes_) {
+      const size_t level_size = pair.second;
+      if (level_size != 0) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  bool IsBlobFileInVersion(uint64_t blob_file_number) const {
+    auto mutable_it = mutable_blob_file_metas_.find(blob_file_number);
+    if (mutable_it != mutable_blob_file_metas_.end()) {
+      return true;
+    }
+
+    assert(base_vstorage_);
+    const auto meta = base_vstorage_->GetBlobFileMetaData(blob_file_number);
+
+    return !!meta;
+  }
+
+  MutableBlobFileMetaData* GetOrCreateMutableBlobFileMetaData(
+      uint64_t blob_file_number) {
+    auto mutable_it = mutable_blob_file_metas_.find(blob_file_number);
+    if (mutable_it != mutable_blob_file_metas_.end()) {
+      return &mutable_it->second;
+    }
+
+    assert(base_vstorage_);
+    const auto meta = base_vstorage_->GetBlobFileMetaData(blob_file_number);
+
+    if (meta) {
+      mutable_it = mutable_blob_file_metas_
+                       .emplace(blob_file_number, MutableBlobFileMetaData(meta))
+                       .first;
+      return &mutable_it->second;
+    }
+
+    return nullptr;
+  }
+
+  Status ApplyBlobFileAddition(const BlobFileAddition& blob_file_addition) {
+    const uint64_t blob_file_number = blob_file_addition.GetBlobFileNumber();
+
+    if (IsBlobFileInVersion(blob_file_number)) {
+      std::ostringstream oss;
+      oss << "Blob file #" << blob_file_number << " already added";
+
+      return Status::Corruption("VersionBuilder", oss.str());
+    }
+
+    // Note: we use C++11 for now but in C++14, this could be done in a more
+    // elegant way using generalized lambda capture.
+    VersionSet* const vs = version_set_;
+    const ImmutableCFOptions* const ioptions = ioptions_;
+
+    auto deleter = [vs, ioptions](SharedBlobFileMetaData* shared_meta) {
+      if (vs) {
+        assert(ioptions);
+        assert(!ioptions->cf_paths.empty());
+        assert(shared_meta);
+
+        vs->AddObsoleteBlobFile(shared_meta->GetBlobFileNumber(),
+                                ioptions->cf_paths.front().path);
+      }
+
+      delete shared_meta;
+    };
+
+    auto shared_meta = SharedBlobFileMetaData::Create(
+        blob_file_number, blob_file_addition.GetTotalBlobCount(),
+        blob_file_addition.GetTotalBlobBytes(),
+        blob_file_addition.GetChecksumMethod(),
+        blob_file_addition.GetChecksumValue(), deleter);
+
+    mutable_blob_file_metas_.emplace(
+        blob_file_number, MutableBlobFileMetaData(std::move(shared_meta)));
+
+    return Status::OK();
+  }
+
+  Status ApplyBlobFileGarbage(const BlobFileGarbage& blob_file_garbage) {
+    const uint64_t blob_file_number = blob_file_garbage.GetBlobFileNumber();
+
+    MutableBlobFileMetaData* const mutable_meta =
+        GetOrCreateMutableBlobFileMetaData(blob_file_number);
+
+    if (!mutable_meta) {
+      std::ostringstream oss;
+      oss << "Blob file #" << blob_file_number << " not found";
+
+      return Status::Corruption("VersionBuilder", oss.str());
+    }
+
+    if (!mutable_meta->AddGarbage(blob_file_garbage.GetGarbageBlobCount(),
+                                  blob_file_garbage.GetGarbageBlobBytes())) {
+      std::ostringstream oss;
+      oss << "Garbage overflow for blob file #" << blob_file_number;
+      return Status::Corruption("VersionBuilder", oss.str());
+    }
+
+    return Status::OK();
+  }
+
+  int GetCurrentLevelForTableFile(uint64_t file_number) const {
+    auto it = table_file_levels_.find(file_number);
+    if (it != table_file_levels_.end()) {
+      return it->second;
+    }
+
+    assert(base_vstorage_);
+    return base_vstorage_->GetFileLocation(file_number).GetLevel();
+  }
+
+  uint64_t GetOldestBlobFileNumberForTableFile(int level,
+                                               uint64_t file_number) const {
+    assert(level < num_levels_);
+
+    const auto& added_files = levels_[level].added_files;
+
+    auto it = added_files.find(file_number);
+    if (it != added_files.end()) {
+      const FileMetaData* const meta = it->second;
+      assert(meta);
+
+      return meta->oldest_blob_file_number;
+    }
+
+    assert(base_vstorage_);
+    const FileMetaData* const meta =
+        base_vstorage_->GetFileMetaDataByNumber(file_number);
+    assert(meta);
+
+    return meta->oldest_blob_file_number;
+  }
+
+  Status ApplyFileDeletion(int level, uint64_t file_number) {
+    assert(level != VersionStorageInfo::FileLocation::Invalid().GetLevel());
+
+    const int current_level = GetCurrentLevelForTableFile(file_number);
+
+    if (level != current_level) {
+      if (level >= num_levels_) {
+        has_invalid_levels_ = true;
+      }
+
+      std::ostringstream oss;
+      oss << "Cannot delete table file #" << file_number << " from level "
+          << level << " since it is ";
+      if (current_level ==
+          VersionStorageInfo::FileLocation::Invalid().GetLevel()) {
+        oss << "not in the LSM tree";
+      } else {
+        oss << "on level " << current_level;
+      }
+
+      return Status::Corruption("VersionBuilder", oss.str());
+    }
+
+    if (level >= num_levels_) {
+      assert(invalid_level_sizes_[level] > 0);
+      --invalid_level_sizes_[level];
+
+      table_file_levels_[file_number] =
+          VersionStorageInfo::FileLocation::Invalid().GetLevel();
+
+      return Status::OK();
+    }
+
+    const uint64_t blob_file_number =
+        GetOldestBlobFileNumberForTableFile(level, file_number);
+
+    if (blob_file_number != kInvalidBlobFileNumber) {
+      MutableBlobFileMetaData* const mutable_meta =
+          GetOrCreateMutableBlobFileMetaData(blob_file_number);
+      if (mutable_meta) {
+        mutable_meta->UnlinkSst(file_number);
+      }
+    }
+
+    auto& level_state = levels_[level];
+
+    auto& add_files = level_state.added_files;
+    auto add_it = add_files.find(file_number);
+    if (add_it != add_files.end()) {
+      UnrefFile(add_it->second);
+      add_files.erase(add_it);
+    }
+
+    auto& del_files = level_state.deleted_files;
+    assert(del_files.find(file_number) == del_files.end());
+    del_files.emplace(file_number);
+
+    table_file_levels_[file_number] =
+        VersionStorageInfo::FileLocation::Invalid().GetLevel();
+
+    return Status::OK();
+  }
+
+  Status ApplyFileAddition(int level, const FileMetaData& meta) {
+    assert(level != VersionStorageInfo::FileLocation::Invalid().GetLevel());
+
+    const uint64_t file_number = meta.fd.GetNumber();
+
+    const int current_level = GetCurrentLevelForTableFile(file_number);
+
+    if (current_level !=
+        VersionStorageInfo::FileLocation::Invalid().GetLevel()) {
+      if (level >= num_levels_) {
+        has_invalid_levels_ = true;
+      }
+
+      std::ostringstream oss;
+      oss << "Cannot add table file #" << file_number << " to level " << level
+          << " since it is already in the LSM tree on level " << current_level;
+      return Status::Corruption("VersionBuilder", oss.str());
+    }
+
+    if (level >= num_levels_) {
+      ++invalid_level_sizes_[level];
+      table_file_levels_[file_number] = level;
+
+      return Status::OK();
+    }
+
+    auto& level_state = levels_[level];
+
+    auto& del_files = level_state.deleted_files;
+    auto del_it = del_files.find(file_number);
+    if (del_it != del_files.end()) {
+      del_files.erase(del_it);
+    }
+
+    FileMetaData* const f = new FileMetaData(meta);
+    f->refs = 1;
+
+    if (file_metadata_cache_res_mgr_) {
+      Status s = file_metadata_cache_res_mgr_->UpdateCacheReservation(
+          f->ApproximateMemoryUsage(), true /* increase */);
+      if (!s.ok()) {
+        delete f;
+        s = Status::MemoryLimit(
+            "Can't allocate " +
+            kCacheEntryRoleToCamelString[static_cast<std::uint32_t>(
+                CacheEntryRole::kFileMetadata)] +
+            " due to exceeding the memory limit "
+            "based on "
+            "cache capacity");
+        return s;
+      }
+    }
+
+    auto& add_files = level_state.added_files;
+    assert(add_files.find(file_number) == add_files.end());
+    add_files.emplace(file_number, f);
+
+    const uint64_t blob_file_number = f->oldest_blob_file_number;
+
+    if (blob_file_number != kInvalidBlobFileNumber) {
+      MutableBlobFileMetaData* const mutable_meta =
+          GetOrCreateMutableBlobFileMetaData(blob_file_number);
+      if (mutable_meta) {
+        mutable_meta->LinkSst(file_number);
+      }
+    }
+
+    table_file_levels_[file_number] = level;
+
+    return Status::OK();
+  }
+
+  Status ApplyCompactCursors(int level,
+                             const InternalKey& smallest_uncompacted_key) {
+    if (level < 0) {
+      std::ostringstream oss;
+      oss << "Cannot add compact cursor (" << level << ","
+          << smallest_uncompacted_key.Encode().ToString()
+          << " due to invalid level (level = " << level << ")";
+      return Status::Corruption("VersionBuilder", oss.str());
+    }
+    if (level < num_levels_) {
+      // Omit levels (>= num_levels_) when re-open with shrinking num_levels_
+      updated_compact_cursors_[level] = smallest_uncompacted_key;
+    }
+    return Status::OK();
+  }
+
+  // Apply all of the edits in *edit to the current state.
+  Status Apply(const VersionEdit* edit) {
+    {
+      const Status s = CheckConsistency(base_vstorage_);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+
+    // Note: we process the blob file related changes first because the
+    // table file addition/deletion logic depends on the blob files
+    // already being there.
+
+    // Add new blob files
+    for (const auto& blob_file_addition : edit->GetBlobFileAdditions()) {
+      const Status s = ApplyBlobFileAddition(blob_file_addition);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+
+    // Increase the amount of garbage for blob files affected by GC
+    for (const auto& blob_file_garbage : edit->GetBlobFileGarbages()) {
+      const Status s = ApplyBlobFileGarbage(blob_file_garbage);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+
+    // Delete table files
+    for (const auto& deleted_file : edit->GetDeletedFiles()) {
+      const int level = deleted_file.first;
+      const uint64_t file_number = deleted_file.second;
+
+      const Status s = ApplyFileDeletion(level, file_number);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+
+    // Add new table files
+    for (const auto& new_file : edit->GetNewFiles()) {
+      const int level = new_file.first;
+      const FileMetaData& meta = new_file.second;
+
+      const Status s = ApplyFileAddition(level, meta);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+
+    // Populate compact cursors for round-robin compaction, leave
+    // the cursor to be empty to indicate it is invalid
+    for (const auto& cursor : edit->GetCompactCursors()) {
+      const int level = cursor.first;
+      const InternalKey smallest_uncompacted_key = cursor.second;
+      const Status s = ApplyCompactCursors(level, smallest_uncompacted_key);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+    return Status::OK();
+  }
+
+  // Helper function template for merging the blob file metadata from the base
+  // version with the mutable metadata representing the state after applying the
+  // edits. The function objects process_base and process_mutable are
+  // respectively called to handle a base version object when there is no
+  // matching mutable object, and a mutable object when there is no matching
+  // base version object. process_both is called to perform the merge when a
+  // given blob file appears both in the base version and the mutable list. The
+  // helper stops processing objects if a function object returns false. Blob
+  // files with a file number below first_blob_file are not processed.
+  template <typename ProcessBase, typename ProcessMutable, typename ProcessBoth>
+  void MergeBlobFileMetas(uint64_t first_blob_file, ProcessBase process_base,
+                          ProcessMutable process_mutable,
+                          ProcessBoth process_both) const {
+    assert(base_vstorage_);
+
+    auto base_it = base_vstorage_->GetBlobFileMetaDataLB(first_blob_file);
+    const auto base_it_end = base_vstorage_->GetBlobFiles().end();
+
+    auto mutable_it = mutable_blob_file_metas_.lower_bound(first_blob_file);
+    const auto mutable_it_end = mutable_blob_file_metas_.end();
+
+    while (base_it != base_it_end && mutable_it != mutable_it_end) {
+      const auto& base_meta = *base_it;
+      assert(base_meta);
+
+      const uint64_t base_blob_file_number = base_meta->GetBlobFileNumber();
+      const uint64_t mutable_blob_file_number = mutable_it->first;
+
+      if (base_blob_file_number < mutable_blob_file_number) {
+        if (!process_base(base_meta)) {
+          return;
+        }
+
+        ++base_it;
+      } else if (mutable_blob_file_number < base_blob_file_number) {
+        const auto& mutable_meta = mutable_it->second;
+
+        if (!process_mutable(mutable_meta)) {
+          return;
+        }
+
+        ++mutable_it;
+      } else {
+        assert(base_blob_file_number == mutable_blob_file_number);
+
+        const auto& mutable_meta = mutable_it->second;
+
+        if (!process_both(base_meta, mutable_meta)) {
+          return;
+        }
+
+        ++base_it;
+        ++mutable_it;
+      }
+    }
+
+    while (base_it != base_it_end) {
+      const auto& base_meta = *base_it;
+
+      if (!process_base(base_meta)) {
+        return;
+      }
+
+      ++base_it;
+    }
+
+    while (mutable_it != mutable_it_end) {
+      const auto& mutable_meta = mutable_it->second;
+
+      if (!process_mutable(mutable_meta)) {
+        return;
+      }
+
+      ++mutable_it;
+    }
+  }
+
+  // Helper function template for finding the first blob file that has linked
+  // SSTs.
+  template <typename Meta>
+  static bool CheckLinkedSsts(const Meta& meta,
+                              uint64_t* min_oldest_blob_file_num) {
+    assert(min_oldest_blob_file_num);
+
+    if (!meta.GetLinkedSsts().empty()) {
+      assert(*min_oldest_blob_file_num == kInvalidBlobFileNumber);
+
+      *min_oldest_blob_file_num = meta.GetBlobFileNumber();
+
+      return false;
+    }
+
+    return true;
+  }
+
+  // Find the oldest blob file that has linked SSTs.
+  uint64_t GetMinOldestBlobFileNumber() const {
+    uint64_t min_oldest_blob_file_num = kInvalidBlobFileNumber;
+
+    auto process_base =
+        [&min_oldest_blob_file_num](
+            const std::shared_ptr<BlobFileMetaData>& base_meta) {
+          assert(base_meta);
+
+          return CheckLinkedSsts(*base_meta, &min_oldest_blob_file_num);
+        };
+
+    auto process_mutable = [&min_oldest_blob_file_num](
+                               const MutableBlobFileMetaData& mutable_meta) {
+      return CheckLinkedSsts(mutable_meta, &min_oldest_blob_file_num);
+    };
+
+    auto process_both = [&min_oldest_blob_file_num](
+                            const std::shared_ptr<BlobFileMetaData>& base_meta,
+                            const MutableBlobFileMetaData& mutable_meta) {
+#ifndef NDEBUG
+      assert(base_meta);
+      assert(base_meta->GetSharedMeta() == mutable_meta.GetSharedMeta());
+#else
+      (void)base_meta;
+#endif
+
+      // Look at mutable_meta since it supersedes *base_meta
+      return CheckLinkedSsts(mutable_meta, &min_oldest_blob_file_num);
+    };
+
+    MergeBlobFileMetas(kInvalidBlobFileNumber, process_base, process_mutable,
+                       process_both);
+
+    return min_oldest_blob_file_num;
+  }
+
+  static std::shared_ptr<BlobFileMetaData> CreateBlobFileMetaData(
+      const MutableBlobFileMetaData& mutable_meta) {
+    return BlobFileMetaData::Create(
+        mutable_meta.GetSharedMeta(), mutable_meta.GetLinkedSsts(),
+        mutable_meta.GetGarbageBlobCount(), mutable_meta.GetGarbageBlobBytes());
+  }
+
+  // Add the blob file specified by meta to *vstorage if it is determined to
+  // contain valid data (blobs).
+  template <typename Meta>
+  static void AddBlobFileIfNeeded(VersionStorageInfo* vstorage, Meta&& meta) {
+    assert(vstorage);
+    assert(meta);
+
+    if (meta->GetLinkedSsts().empty() &&
+        meta->GetGarbageBlobCount() >= meta->GetTotalBlobCount()) {
+      return;
+    }
+
+    vstorage->AddBlobFile(std::forward<Meta>(meta));
+  }
+
+  // Merge the blob file metadata from the base version with the changes (edits)
+  // applied, and save the result into *vstorage.
+  void SaveBlobFilesTo(VersionStorageInfo* vstorage) const {
+    assert(vstorage);
+
+    assert(base_vstorage_);
+    vstorage->ReserveBlob(base_vstorage_->GetBlobFiles().size() +
+                          mutable_blob_file_metas_.size());
+
+    const uint64_t oldest_blob_file_with_linked_ssts =
+        GetMinOldestBlobFileNumber();
+
+    auto process_base =
+        [vstorage](const std::shared_ptr<BlobFileMetaData>& base_meta) {
+          assert(base_meta);
+
+          AddBlobFileIfNeeded(vstorage, base_meta);
+
+          return true;
+        };
+
+    auto process_mutable =
+        [vstorage](const MutableBlobFileMetaData& mutable_meta) {
+          AddBlobFileIfNeeded(vstorage, CreateBlobFileMetaData(mutable_meta));
+
+          return true;
+        };
+
+    auto process_both = [vstorage](
+                            const std::shared_ptr<BlobFileMetaData>& base_meta,
+                            const MutableBlobFileMetaData& mutable_meta) {
+      assert(base_meta);
+      assert(base_meta->GetSharedMeta() == mutable_meta.GetSharedMeta());
+
+      if (!mutable_meta.HasDelta()) {
+        assert(base_meta->GetGarbageBlobCount() ==
+               mutable_meta.GetGarbageBlobCount());
+        assert(base_meta->GetGarbageBlobBytes() ==
+               mutable_meta.GetGarbageBlobBytes());
+        assert(base_meta->GetLinkedSsts() == mutable_meta.GetLinkedSsts());
+
+        AddBlobFileIfNeeded(vstorage, base_meta);
+
+        return true;
+      }
+
+      AddBlobFileIfNeeded(vstorage, CreateBlobFileMetaData(mutable_meta));
+
+      return true;
+    };
+
+    MergeBlobFileMetas(oldest_blob_file_with_linked_ssts, process_base,
+                       process_mutable, process_both);
+  }
+
+  void MaybeAddFile(VersionStorageInfo* vstorage, int level,
+                    FileMetaData* f) const {
+    const uint64_t file_number = f->fd.GetNumber();
+
+    const auto& level_state = levels_[level];
+
+    const auto& del_files = level_state.deleted_files;
+    const auto del_it = del_files.find(file_number);
+
+    if (del_it != del_files.end()) {
+      // f is to-be-deleted table file
+      vstorage->RemoveCurrentStats(f);
+    } else {
+      const auto& add_files = level_state.added_files;
+      const auto add_it = add_files.find(file_number);
+
+      // Note: if the file appears both in the base version and in the added
+      // list, the added FileMetaData supersedes the one in the base version.
+      if (add_it != add_files.end() && add_it->second != f) {
+        vstorage->RemoveCurrentStats(f);
+      } else {
+        vstorage->AddFile(level, f);
+      }
+    }
+  }
+
+  template <typename Cmp>
+  void SaveSSTFilesTo(VersionStorageInfo* vstorage, int level, Cmp cmp) const {
+    // Merge the set of added files with the set of pre-existing files.
+    // Drop any deleted files.  Store the result in *vstorage.
+    const auto& base_files = base_vstorage_->LevelFiles(level);
+    const auto& unordered_added_files = levels_[level].added_files;
+    vstorage->Reserve(level, base_files.size() + unordered_added_files.size());
+
+    // Sort added files for the level.
+    std::vector<FileMetaData*> added_files;
+    added_files.reserve(unordered_added_files.size());
+    for (const auto& pair : unordered_added_files) {
+      added_files.push_back(pair.second);
+    }
+    std::sort(added_files.begin(), added_files.end(), cmp);
+
+    auto base_iter = base_files.begin();
+    auto base_end = base_files.end();
+    auto added_iter = added_files.begin();
+    auto added_end = added_files.end();
+    while (added_iter != added_end || base_iter != base_end) {
+      if (base_iter == base_end ||
+          (added_iter != added_end && cmp(*added_iter, *base_iter))) {
+        MaybeAddFile(vstorage, level, *added_iter++);
+      } else {
+        MaybeAddFile(vstorage, level, *base_iter++);
+      }
+    }
+  }
+
+  bool PromoteEpochNumberRequirementIfNeeded(
+      VersionStorageInfo* vstorage) const {
+    if (vstorage->HasMissingEpochNumber()) {
+      return false;
+    }
+
+    for (int level = 0; level < num_levels_; ++level) {
+      for (const auto& pair : levels_[level].added_files) {
+        const FileMetaData* f = pair.second;
+        if (f->epoch_number == kUnknownEpochNumber) {
+          return false;
+        }
+      }
+    }
+
+    vstorage->SetEpochNumberRequirement(EpochNumberRequirement::kMustPresent);
+    return true;
+  }
+
+  void SaveSSTFilesTo(VersionStorageInfo* vstorage) const {
+    assert(vstorage);
+
+    if (!num_levels_) {
+      return;
+    }
+
+    EpochNumberRequirement epoch_number_requirement =
+        vstorage->GetEpochNumberRequirement();
+
+    if (epoch_number_requirement == EpochNumberRequirement::kMightMissing) {
+      bool promoted = PromoteEpochNumberRequirementIfNeeded(vstorage);
+      if (promoted) {
+        epoch_number_requirement = vstorage->GetEpochNumberRequirement();
+      }
+    }
+
+    if (epoch_number_requirement == EpochNumberRequirement::kMightMissing) {
+      SaveSSTFilesTo(vstorage, /* level */ 0, level_zero_cmp_by_seqno_);
+    } else {
+      SaveSSTFilesTo(vstorage, /* level */ 0, level_zero_cmp_by_epochno_);
+    }
+
+    for (int level = 1; level < num_levels_; ++level) {
+      SaveSSTFilesTo(vstorage, level, level_nonzero_cmp_);
+    }
+  }
+
+  void SaveCompactCursorsTo(VersionStorageInfo* vstorage) const {
+    for (auto iter = updated_compact_cursors_.begin();
+         iter != updated_compact_cursors_.end(); iter++) {
+      vstorage->AddCursorForOneLevel(iter->first, iter->second);
+    }
+  }
+
+  // Save the current state in *vstorage.
+  Status SaveTo(VersionStorageInfo* vstorage) const {
+    Status s;
+
+#ifndef NDEBUG
+    // The same check is done within Apply() so we skip it in release mode.
+    s = CheckConsistency(base_vstorage_);
+    if (!s.ok()) {
+      return s;
+    }
+#endif  // NDEBUG
+
+    s = CheckConsistency(vstorage);
+    if (!s.ok()) {
+      return s;
+    }
+
+    SaveSSTFilesTo(vstorage);
+
+    SaveBlobFilesTo(vstorage);
+
+    SaveCompactCursorsTo(vstorage);
+
+    s = CheckConsistency(vstorage);
+    return s;
+  }
+
+  Status LoadTableHandlers(
+      InternalStats* internal_stats, int max_threads,
+      bool prefetch_index_and_filter_in_cache, bool is_initial_load,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor,
+      size_t max_file_size_for_l0_meta_pin, const ReadOptions& read_options,
+      uint8_t block_protection_bytes_per_key) {
+    assert(table_cache_ != nullptr);
+
+    size_t table_cache_capacity =
+        table_cache_->get_cache().get()->GetCapacity();
+    bool always_load = (table_cache_capacity == TableCache::kInfiniteCapacity);
+    size_t max_load = std::numeric_limits<size_t>::max();
+
+    if (!always_load) {
+      // If it is initial loading and not set to always loading all the
+      // files, we only load up to kInitialLoadLimit files, to limit the
+      // time reopening the DB.
+      const size_t kInitialLoadLimit = 16;
+      size_t load_limit;
+      // If the table cache is not 1/4 full, we pin the table handle to
+      // file metadata to avoid the cache read costs when reading the file.
+      // The downside of pinning those files is that LRU won't be followed
+      // for those files. This doesn't matter much because if number of files
+      // of the DB excceeds table cache capacity, eventually no table reader
+      // will be pinned and LRU will be followed.
+      if (is_initial_load) {
+        load_limit = std::min(kInitialLoadLimit, table_cache_capacity / 4);
+      } else {
+        load_limit = table_cache_capacity / 4;
+      }
+
+      size_t table_cache_usage = table_cache_->get_cache().get()->GetUsage();
+      if (table_cache_usage >= load_limit) {
+        // TODO (yanqin) find a suitable status code.
+        return Status::OK();
+      } else {
+        max_load = load_limit - table_cache_usage;
+      }
+    }
+
+    // <file metadata, level>
+    std::vector<std::pair<FileMetaData*, int>> files_meta;
+    std::vector<Status> statuses;
+    for (int level = 0; level < num_levels_; level++) {
+      for (auto& file_meta_pair : levels_[level].added_files) {
+        auto* file_meta = file_meta_pair.second;
+        // If the file has been opened before, just skip it.
+        if (!file_meta->table_reader_handle) {
+          files_meta.emplace_back(file_meta, level);
+          statuses.emplace_back(Status::OK());
+        }
+        if (files_meta.size() >= max_load) {
+          break;
+        }
+      }
+      if (files_meta.size() >= max_load) {
+        break;
+      }
+    }
+
+    std::atomic<size_t> next_file_meta_idx(0);
+    std::function<void()> load_handlers_func([&]() {
+      while (true) {
+        size_t file_idx = next_file_meta_idx.fetch_add(1);
+        if (file_idx >= files_meta.size()) {
+          break;
+        }
+
+        auto* file_meta = files_meta[file_idx].first;
+        int level = files_meta[file_idx].second;
+        TableCache::TypedHandle* handle = nullptr;
+        statuses[file_idx] = table_cache_->FindTable(
+            read_options, file_options_,
+            *(base_vstorage_->InternalComparator()), *file_meta, &handle,
+            block_protection_bytes_per_key, prefix_extractor, false /*no_io */,
+            true /* record_read_stats */,
+            internal_stats->GetFileReadHist(level), false, level,
+            prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin,
+            file_meta->temperature);
+        if (handle != nullptr) {
+          file_meta->table_reader_handle = handle;
+          // Load table_reader
+          file_meta->fd.table_reader = table_cache_->get_cache().Value(handle);
+        }
+      }
+    });
+
+    std::vector<port::Thread> threads;
+    for (int i = 1; i < max_threads; i++) {
+      threads.emplace_back(load_handlers_func);
+    }
+    load_handlers_func();
+    for (auto& t : threads) {
+      t.join();
+    }
+    Status ret;
+    for (const auto& s : statuses) {
+      if (!s.ok()) {
+        if (ret.ok()) {
+          ret = s;
+        }
+      }
+    }
+    return ret;
+  }
+};
+
+VersionBuilder::VersionBuilder(
+    const FileOptions& file_options, const ImmutableCFOptions* ioptions,
+    TableCache* table_cache, VersionStorageInfo* base_vstorage,
+    VersionSet* version_set,
+    std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr)
+    : rep_(new Rep(file_options, ioptions, table_cache, base_vstorage,
+                   version_set, file_metadata_cache_res_mgr)) {}
+
+VersionBuilder::~VersionBuilder() = default;
+
+bool VersionBuilder::CheckConsistencyForNumLevels() {
+  return rep_->CheckConsistencyForNumLevels();
+}
+
+Status VersionBuilder::Apply(const VersionEdit* edit) {
+  return rep_->Apply(edit);
+}
+
+Status VersionBuilder::SaveTo(VersionStorageInfo* vstorage) const {
+  return rep_->SaveTo(vstorage);
+}
+
+Status VersionBuilder::LoadTableHandlers(
+    InternalStats* internal_stats, int max_threads,
+    bool prefetch_index_and_filter_in_cache, bool is_initial_load,
+    const std::shared_ptr<const SliceTransform>& prefix_extractor,
+    size_t max_file_size_for_l0_meta_pin, const ReadOptions& read_options,
+    uint8_t block_protection_bytes_per_key) {
+  return rep_->LoadTableHandlers(
+      internal_stats, max_threads, prefetch_index_and_filter_in_cache,
+      is_initial_load, prefix_extractor, max_file_size_for_l0_meta_pin,
+      read_options, block_protection_bytes_per_key);
+}
+
+uint64_t VersionBuilder::GetMinOldestBlobFileNumber() const {
+  return rep_->GetMinOldestBlobFileNumber();
+}
+
+BaseReferencedVersionBuilder::BaseReferencedVersionBuilder(
+    ColumnFamilyData* cfd)
+    : version_builder_(new VersionBuilder(
+          cfd->current()->version_set()->file_options(), cfd->ioptions(),
+          cfd->table_cache(), cfd->current()->storage_info(),
+          cfd->current()->version_set(),
+          cfd->GetFileMetadataCacheReservationManager())),
+      version_(cfd->current()) {
+  version_->Ref();
+}
+
+BaseReferencedVersionBuilder::BaseReferencedVersionBuilder(
+    ColumnFamilyData* cfd, Version* v)
+    : version_builder_(new VersionBuilder(
+          cfd->current()->version_set()->file_options(), cfd->ioptions(),
+          cfd->table_cache(), v->storage_info(), v->version_set(),
+          cfd->GetFileMetadataCacheReservationManager())),
+      version_(v) {
+  assert(version_ != cfd->current());
+}
+
+BaseReferencedVersionBuilder::~BaseReferencedVersionBuilder() {
+  version_->Unref();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_builder.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_builder.h
new file mode 100644
index 0000000000..fb2a304a84
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_builder.h
@@ -0,0 +1,93 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+#pragma once
+
+#include <memory>
+
+#include "db/version_edit.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/metadata.h"
+#include "rocksdb/slice_transform.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct ImmutableCFOptions;
+class TableCache;
+class VersionStorageInfo;
+class VersionEdit;
+struct FileMetaData;
+class InternalStats;
+class Version;
+class VersionSet;
+class ColumnFamilyData;
+class CacheReservationManager;
+
+// A helper class so we can efficiently apply a whole sequence
+// of edits to a particular state without creating intermediate
+// Versions that contain full copies of the intermediate state.
+class VersionBuilder {
+ public:
+  VersionBuilder(const FileOptions& file_options,
+                 const ImmutableCFOptions* ioptions, TableCache* table_cache,
+                 VersionStorageInfo* base_vstorage, VersionSet* version_set,
+                 std::shared_ptr<CacheReservationManager>
+                     file_metadata_cache_res_mgr = nullptr);
+  ~VersionBuilder();
+
+  bool CheckConsistencyForNumLevels();
+  Status Apply(const VersionEdit* edit);
+  Status SaveTo(VersionStorageInfo* vstorage) const;
+  Status LoadTableHandlers(
+      InternalStats* internal_stats, int max_threads,
+      bool prefetch_index_and_filter_in_cache, bool is_initial_load,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor,
+      size_t max_file_size_for_l0_meta_pin, const ReadOptions& read_options,
+      uint8_t block_protection_bytes_per_key);
+  uint64_t GetMinOldestBlobFileNumber() const;
+
+ private:
+  class Rep;
+  std::unique_ptr<Rep> rep_;
+};
+
+// A wrapper of version builder which references the current version in
+// constructor and unref it in the destructor.
+// Both of the constructor and destructor need to be called inside DB Mutex.
+class BaseReferencedVersionBuilder {
+ public:
+  explicit BaseReferencedVersionBuilder(ColumnFamilyData* cfd);
+  BaseReferencedVersionBuilder(ColumnFamilyData* cfd, Version* v);
+  ~BaseReferencedVersionBuilder();
+  VersionBuilder* version_builder() const { return version_builder_.get(); }
+
+ private:
+  std::unique_ptr<VersionBuilder> version_builder_;
+  Version* version_;
+};
+
+class NewestFirstBySeqNo {
+ public:
+  bool operator()(const FileMetaData* lhs, const FileMetaData* rhs) const {
+    assert(lhs);
+    assert(rhs);
+
+    if (lhs->fd.largest_seqno != rhs->fd.largest_seqno) {
+      return lhs->fd.largest_seqno > rhs->fd.largest_seqno;
+    }
+
+    if (lhs->fd.smallest_seqno != rhs->fd.smallest_seqno) {
+      return lhs->fd.smallest_seqno > rhs->fd.smallest_seqno;
+    }
+
+    // Break ties by file number
+    return lhs->fd.GetNumber() > rhs->fd.GetNumber();
+  }
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit.cc
new file mode 100644
index 0000000000..4f1ae80d21
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit.cc
@@ -0,0 +1,1059 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/version_edit.h"
+
+#include "db/blob/blob_index.h"
+#include "db/version_set.h"
+#include "logging/event_logger.h"
+#include "rocksdb/slice.h"
+#include "table/unique_id_impl.h"
+#include "test_util/sync_point.h"
+#include "util/coding.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {}  // anonymous namespace
+
+uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id) {
+  assert(number <= kFileNumberMask);
+  return number | (path_id * (kFileNumberMask + 1));
+}
+
+Status FileMetaData::UpdateBoundaries(const Slice& key, const Slice& value,
+                                      SequenceNumber seqno,
+                                      ValueType value_type) {
+  if (value_type == kTypeBlobIndex) {
+    BlobIndex blob_index;
+    const Status s = blob_index.DecodeFrom(value);
+    if (!s.ok()) {
+      return s;
+    }
+
+    if (!blob_index.IsInlined() && !blob_index.HasTTL()) {
+      if (blob_index.file_number() == kInvalidBlobFileNumber) {
+        return Status::Corruption("Invalid blob file number");
+      }
+
+      if (oldest_blob_file_number == kInvalidBlobFileNumber ||
+          oldest_blob_file_number > blob_index.file_number()) {
+        oldest_blob_file_number = blob_index.file_number();
+      }
+    }
+  }
+
+  if (smallest.size() == 0) {
+    smallest.DecodeFrom(key);
+  }
+  largest.DecodeFrom(key);
+  fd.smallest_seqno = std::min(fd.smallest_seqno, seqno);
+  fd.largest_seqno = std::max(fd.largest_seqno, seqno);
+
+  return Status::OK();
+}
+
+void VersionEdit::Clear() {
+  max_level_ = 0;
+  db_id_.clear();
+  comparator_.clear();
+  log_number_ = 0;
+  prev_log_number_ = 0;
+  next_file_number_ = 0;
+  max_column_family_ = 0;
+  min_log_number_to_keep_ = 0;
+  last_sequence_ = 0;
+  has_db_id_ = false;
+  has_comparator_ = false;
+  has_log_number_ = false;
+  has_prev_log_number_ = false;
+  has_next_file_number_ = false;
+  has_max_column_family_ = false;
+  has_min_log_number_to_keep_ = false;
+  has_last_sequence_ = false;
+  compact_cursors_.clear();
+  deleted_files_.clear();
+  new_files_.clear();
+  blob_file_additions_.clear();
+  blob_file_garbages_.clear();
+  wal_additions_.clear();
+  wal_deletion_.Reset();
+  column_family_ = 0;
+  is_column_family_add_ = false;
+  is_column_family_drop_ = false;
+  column_family_name_.clear();
+  is_in_atomic_group_ = false;
+  remaining_entries_ = 0;
+  full_history_ts_low_.clear();
+}
+
+bool VersionEdit::EncodeTo(std::string* dst) const {
+  if (has_db_id_) {
+    PutVarint32(dst, kDbId);
+    PutLengthPrefixedSlice(dst, db_id_);
+  }
+  if (has_comparator_) {
+    PutVarint32(dst, kComparator);
+    PutLengthPrefixedSlice(dst, comparator_);
+  }
+  if (has_log_number_) {
+    PutVarint32Varint64(dst, kLogNumber, log_number_);
+  }
+  if (has_prev_log_number_) {
+    PutVarint32Varint64(dst, kPrevLogNumber, prev_log_number_);
+  }
+  if (has_next_file_number_) {
+    PutVarint32Varint64(dst, kNextFileNumber, next_file_number_);
+  }
+  if (has_max_column_family_) {
+    PutVarint32Varint32(dst, kMaxColumnFamily, max_column_family_);
+  }
+  if (has_min_log_number_to_keep_) {
+    PutVarint32Varint64(dst, kMinLogNumberToKeep, min_log_number_to_keep_);
+  }
+  if (has_last_sequence_) {
+    PutVarint32Varint64(dst, kLastSequence, last_sequence_);
+  }
+  for (size_t i = 0; i < compact_cursors_.size(); i++) {
+    if (compact_cursors_[i].second.Valid()) {
+      PutVarint32(dst, kCompactCursor);
+      PutVarint32(dst, compact_cursors_[i].first);  // level
+      PutLengthPrefixedSlice(dst, compact_cursors_[i].second.Encode());
+    }
+  }
+  for (const auto& deleted : deleted_files_) {
+    PutVarint32Varint32Varint64(dst, kDeletedFile, deleted.first /* level */,
+                                deleted.second /* file number */);
+  }
+
+  bool min_log_num_written = false;
+  for (size_t i = 0; i < new_files_.size(); i++) {
+    const FileMetaData& f = new_files_[i].second;
+    if (!f.smallest.Valid() || !f.largest.Valid() ||
+        f.epoch_number == kUnknownEpochNumber) {
+      return false;
+    }
+    PutVarint32(dst, kNewFile4);
+    PutVarint32Varint64(dst, new_files_[i].first /* level */, f.fd.GetNumber());
+    PutVarint64(dst, f.fd.GetFileSize());
+    PutLengthPrefixedSlice(dst, f.smallest.Encode());
+    PutLengthPrefixedSlice(dst, f.largest.Encode());
+    PutVarint64Varint64(dst, f.fd.smallest_seqno, f.fd.largest_seqno);
+    // Customized fields' format:
+    // +-----------------------------+
+    // | 1st field's tag (varint32)  |
+    // +-----------------------------+
+    // | 1st field's size (varint32) |
+    // +-----------------------------+
+    // |    bytes for 1st field      |
+    // |  (based on size decoded)    |
+    // +-----------------------------+
+    // |                             |
+    // |          ......             |
+    // |                             |
+    // +-----------------------------+
+    // | last field's size (varint32)|
+    // +-----------------------------+
+    // |    bytes for last field     |
+    // |  (based on size decoded)    |
+    // +-----------------------------+
+    // | terminating tag (varint32)  |
+    // +-----------------------------+
+    //
+    // Customized encoding for fields:
+    //   tag kPathId: 1 byte as path_id
+    //   tag kNeedCompaction:
+    //        now only can take one char value 1 indicating need-compaction
+    //
+    PutVarint32(dst, NewFileCustomTag::kOldestAncesterTime);
+    std::string varint_oldest_ancester_time;
+    PutVarint64(&varint_oldest_ancester_time, f.oldest_ancester_time);
+    TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:VarintOldestAncesterTime",
+                             &varint_oldest_ancester_time);
+    PutLengthPrefixedSlice(dst, Slice(varint_oldest_ancester_time));
+
+    PutVarint32(dst, NewFileCustomTag::kFileCreationTime);
+    std::string varint_file_creation_time;
+    PutVarint64(&varint_file_creation_time, f.file_creation_time);
+    TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:VarintFileCreationTime",
+                             &varint_file_creation_time);
+    PutLengthPrefixedSlice(dst, Slice(varint_file_creation_time));
+
+    PutVarint32(dst, NewFileCustomTag::kEpochNumber);
+    std::string varint_epoch_number;
+    PutVarint64(&varint_epoch_number, f.epoch_number);
+    PutLengthPrefixedSlice(dst, Slice(varint_epoch_number));
+
+    PutVarint32(dst, NewFileCustomTag::kFileChecksum);
+    PutLengthPrefixedSlice(dst, Slice(f.file_checksum));
+
+    PutVarint32(dst, NewFileCustomTag::kFileChecksumFuncName);
+    PutLengthPrefixedSlice(dst, Slice(f.file_checksum_func_name));
+
+    if (f.fd.GetPathId() != 0) {
+      PutVarint32(dst, NewFileCustomTag::kPathId);
+      char p = static_cast<char>(f.fd.GetPathId());
+      PutLengthPrefixedSlice(dst, Slice(&p, 1));
+    }
+    if (f.temperature != Temperature::kUnknown) {
+      PutVarint32(dst, NewFileCustomTag::kTemperature);
+      char p = static_cast<char>(f.temperature);
+      PutLengthPrefixedSlice(dst, Slice(&p, 1));
+    }
+    if (f.marked_for_compaction) {
+      PutVarint32(dst, NewFileCustomTag::kNeedCompaction);
+      char p = static_cast<char>(1);
+      PutLengthPrefixedSlice(dst, Slice(&p, 1));
+    }
+    if (has_min_log_number_to_keep_ && !min_log_num_written) {
+      PutVarint32(dst, NewFileCustomTag::kMinLogNumberToKeepHack);
+      std::string varint_log_number;
+      PutFixed64(&varint_log_number, min_log_number_to_keep_);
+      PutLengthPrefixedSlice(dst, Slice(varint_log_number));
+      min_log_num_written = true;
+    }
+    if (f.oldest_blob_file_number != kInvalidBlobFileNumber) {
+      PutVarint32(dst, NewFileCustomTag::kOldestBlobFileNumber);
+      std::string oldest_blob_file_number;
+      PutVarint64(&oldest_blob_file_number, f.oldest_blob_file_number);
+      PutLengthPrefixedSlice(dst, Slice(oldest_blob_file_number));
+    }
+    UniqueId64x2 unique_id = f.unique_id;
+    TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:UniqueId", &unique_id);
+    if (unique_id != kNullUniqueId64x2) {
+      PutVarint32(dst, NewFileCustomTag::kUniqueId);
+      std::string unique_id_str = EncodeUniqueIdBytes(&unique_id);
+      PutLengthPrefixedSlice(dst, Slice(unique_id_str));
+    }
+    if (f.compensated_range_deletion_size) {
+      PutVarint32(dst, kCompensatedRangeDeletionSize);
+      std::string compensated_range_deletion_size;
+      PutVarint64(&compensated_range_deletion_size,
+                  f.compensated_range_deletion_size);
+      PutLengthPrefixedSlice(dst, Slice(compensated_range_deletion_size));
+    }
+    if (f.tail_size) {
+      PutVarint32(dst, NewFileCustomTag::kTailSize);
+      std::string varint_tail_size;
+      PutVarint64(&varint_tail_size, f.tail_size);
+      PutLengthPrefixedSlice(dst, Slice(varint_tail_size));
+    }
+
+    TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:NewFile4:CustomizeFields",
+                             dst);
+
+    PutVarint32(dst, NewFileCustomTag::kTerminate);
+  }
+
+  for (const auto& blob_file_addition : blob_file_additions_) {
+    PutVarint32(dst, kBlobFileAddition);
+    blob_file_addition.EncodeTo(dst);
+  }
+
+  for (const auto& blob_file_garbage : blob_file_garbages_) {
+    PutVarint32(dst, kBlobFileGarbage);
+    blob_file_garbage.EncodeTo(dst);
+  }
+
+  for (const auto& wal_addition : wal_additions_) {
+    PutVarint32(dst, kWalAddition2);
+    std::string encoded;
+    wal_addition.EncodeTo(&encoded);
+    PutLengthPrefixedSlice(dst, encoded);
+  }
+
+  if (!wal_deletion_.IsEmpty()) {
+    PutVarint32(dst, kWalDeletion2);
+    std::string encoded;
+    wal_deletion_.EncodeTo(&encoded);
+    PutLengthPrefixedSlice(dst, encoded);
+  }
+
+  // 0 is default and does not need to be explicitly written
+  if (column_family_ != 0) {
+    PutVarint32Varint32(dst, kColumnFamily, column_family_);
+  }
+
+  if (is_column_family_add_) {
+    PutVarint32(dst, kColumnFamilyAdd);
+    PutLengthPrefixedSlice(dst, Slice(column_family_name_));
+  }
+
+  if (is_column_family_drop_) {
+    PutVarint32(dst, kColumnFamilyDrop);
+  }
+
+  if (is_in_atomic_group_) {
+    PutVarint32(dst, kInAtomicGroup);
+    PutVarint32(dst, remaining_entries_);
+  }
+
+  if (HasFullHistoryTsLow()) {
+    PutVarint32(dst, kFullHistoryTsLow);
+    PutLengthPrefixedSlice(dst, full_history_ts_low_);
+  }
+  return true;
+}
+
+static bool GetInternalKey(Slice* input, InternalKey* dst) {
+  Slice str;
+  if (GetLengthPrefixedSlice(input, &str)) {
+    dst->DecodeFrom(str);
+    return dst->Valid();
+  } else {
+    return false;
+  }
+}
+
+bool VersionEdit::GetLevel(Slice* input, int* level, const char** /*msg*/) {
+  uint32_t v = 0;
+  if (GetVarint32(input, &v)) {
+    *level = v;
+    if (max_level_ < *level) {
+      max_level_ = *level;
+    }
+    return true;
+  } else {
+    return false;
+  }
+}
+
+const char* VersionEdit::DecodeNewFile4From(Slice* input) {
+  const char* msg = nullptr;
+  int level = 0;
+  FileMetaData f;
+  uint64_t number = 0;
+  uint32_t path_id = 0;
+  uint64_t file_size = 0;
+  SequenceNumber smallest_seqno = 0;
+  SequenceNumber largest_seqno = kMaxSequenceNumber;
+  if (GetLevel(input, &level, &msg) && GetVarint64(input, &number) &&
+      GetVarint64(input, &file_size) && GetInternalKey(input, &f.smallest) &&
+      GetInternalKey(input, &f.largest) &&
+      GetVarint64(input, &smallest_seqno) &&
+      GetVarint64(input, &largest_seqno)) {
+    // See comments in VersionEdit::EncodeTo() for format of customized fields
+    while (true) {
+      uint32_t custom_tag = 0;
+      Slice field;
+      if (!GetVarint32(input, &custom_tag)) {
+        return "new-file4 custom field";
+      }
+      if (custom_tag == kTerminate) {
+        break;
+      }
+      if (!GetLengthPrefixedSlice(input, &field)) {
+        return "new-file4 custom field length prefixed slice error";
+      }
+      switch (custom_tag) {
+        case kPathId:
+          if (field.size() != 1) {
+            return "path_id field wrong size";
+          }
+          path_id = field[0];
+          if (path_id > 3) {
+            return "path_id wrong vaue";
+          }
+          break;
+        case kOldestAncesterTime:
+          if (!GetVarint64(&field, &f.oldest_ancester_time)) {
+            return "invalid oldest ancester time";
+          }
+          break;
+        case kFileCreationTime:
+          if (!GetVarint64(&field, &f.file_creation_time)) {
+            return "invalid file creation time";
+          }
+          break;
+        case kEpochNumber:
+          if (!GetVarint64(&field, &f.epoch_number)) {
+            return "invalid epoch number";
+          }
+          break;
+        case kFileChecksum:
+          f.file_checksum = field.ToString();
+          break;
+        case kFileChecksumFuncName:
+          f.file_checksum_func_name = field.ToString();
+          break;
+        case kNeedCompaction:
+          if (field.size() != 1) {
+            return "need_compaction field wrong size";
+          }
+          f.marked_for_compaction = (field[0] == 1);
+          break;
+        case kMinLogNumberToKeepHack:
+          // This is a hack to encode kMinLogNumberToKeep in a
+          // forward-compatible fashion.
+          if (!GetFixed64(&field, &min_log_number_to_keep_)) {
+            return "deleted log number malformatted";
+          }
+          has_min_log_number_to_keep_ = true;
+          break;
+        case kOldestBlobFileNumber:
+          if (!GetVarint64(&field, &f.oldest_blob_file_number)) {
+            return "invalid oldest blob file number";
+          }
+          break;
+        case kTemperature:
+          if (field.size() != 1) {
+            return "temperature field wrong size";
+          } else {
+            Temperature casted_field = static_cast<Temperature>(field[0]);
+            if (casted_field <= Temperature::kCold) {
+              f.temperature = casted_field;
+            }
+          }
+          break;
+        case kUniqueId:
+          if (!DecodeUniqueIdBytes(field.ToString(), &f.unique_id).ok()) {
+            f.unique_id = kNullUniqueId64x2;
+            return "invalid unique id";
+          }
+          break;
+        case kCompensatedRangeDeletionSize:
+          if (!GetVarint64(&field, &f.compensated_range_deletion_size)) {
+            return "Invalid compensated range deletion size";
+          }
+          break;
+        case kTailSize:
+          if (!GetVarint64(&field, &f.tail_size)) {
+            return "invalid tail start offset";
+          }
+          break;
+        default:
+          if ((custom_tag & kCustomTagNonSafeIgnoreMask) != 0) {
+            // Should not proceed if cannot understand it
+            return "new-file4 custom field not supported";
+          }
+          break;
+      }
+    }
+  } else {
+    return "new-file4 entry";
+  }
+  f.fd =
+      FileDescriptor(number, path_id, file_size, smallest_seqno, largest_seqno);
+  new_files_.push_back(std::make_pair(level, f));
+  return nullptr;
+}
+
+Status VersionEdit::DecodeFrom(const Slice& src) {
+  Clear();
+#ifndef NDEBUG
+  bool ignore_ignorable_tags = false;
+  TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:IgnoreIgnorableTags",
+                           &ignore_ignorable_tags);
+#endif
+  Slice input = src;
+  const char* msg = nullptr;
+  uint32_t tag = 0;
+
+  // Temporary storage for parsing
+  int level = 0;
+  FileMetaData f;
+  Slice str;
+  InternalKey key;
+  while (msg == nullptr && GetVarint32(&input, &tag)) {
+#ifndef NDEBUG
+    if (ignore_ignorable_tags && tag > kTagSafeIgnoreMask) {
+      tag = kTagSafeIgnoreMask;
+    }
+#endif
+    switch (tag) {
+      case kDbId:
+        if (GetLengthPrefixedSlice(&input, &str)) {
+          db_id_ = str.ToString();
+          has_db_id_ = true;
+        } else {
+          msg = "db id";
+        }
+        break;
+      case kComparator:
+        if (GetLengthPrefixedSlice(&input, &str)) {
+          comparator_ = str.ToString();
+          has_comparator_ = true;
+        } else {
+          msg = "comparator name";
+        }
+        break;
+
+      case kLogNumber:
+        if (GetVarint64(&input, &log_number_)) {
+          has_log_number_ = true;
+        } else {
+          msg = "log number";
+        }
+        break;
+
+      case kPrevLogNumber:
+        if (GetVarint64(&input, &prev_log_number_)) {
+          has_prev_log_number_ = true;
+        } else {
+          msg = "previous log number";
+        }
+        break;
+
+      case kNextFileNumber:
+        if (GetVarint64(&input, &next_file_number_)) {
+          has_next_file_number_ = true;
+        } else {
+          msg = "next file number";
+        }
+        break;
+
+      case kMaxColumnFamily:
+        if (GetVarint32(&input, &max_column_family_)) {
+          has_max_column_family_ = true;
+        } else {
+          msg = "max column family";
+        }
+        break;
+
+      case kMinLogNumberToKeep:
+        if (GetVarint64(&input, &min_log_number_to_keep_)) {
+          has_min_log_number_to_keep_ = true;
+        } else {
+          msg = "min log number to kee";
+        }
+        break;
+
+      case kLastSequence:
+        if (GetVarint64(&input, &last_sequence_)) {
+          has_last_sequence_ = true;
+        } else {
+          msg = "last sequence number";
+        }
+        break;
+
+      case kCompactCursor:
+        if (GetLevel(&input, &level, &msg) && GetInternalKey(&input, &key)) {
+          // Here we re-use the output format of compact pointer in LevelDB
+          // to persist compact_cursors_
+          compact_cursors_.push_back(std::make_pair(level, key));
+        } else {
+          if (!msg) {
+            msg = "compaction cursor";
+          }
+        }
+        break;
+
+      case kDeletedFile: {
+        uint64_t number = 0;
+        if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number)) {
+          deleted_files_.insert(std::make_pair(level, number));
+        } else {
+          if (!msg) {
+            msg = "deleted file";
+          }
+        }
+        break;
+      }
+
+      case kNewFile: {
+        uint64_t number = 0;
+        uint64_t file_size = 0;
+        if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) &&
+            GetVarint64(&input, &file_size) &&
+            GetInternalKey(&input, &f.smallest) &&
+            GetInternalKey(&input, &f.largest)) {
+          f.fd = FileDescriptor(number, 0, file_size);
+          new_files_.push_back(std::make_pair(level, f));
+        } else {
+          if (!msg) {
+            msg = "new-file entry";
+          }
+        }
+        break;
+      }
+      case kNewFile2: {
+        uint64_t number = 0;
+        uint64_t file_size = 0;
+        SequenceNumber smallest_seqno = 0;
+        SequenceNumber largest_seqno = kMaxSequenceNumber;
+        if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) &&
+            GetVarint64(&input, &file_size) &&
+            GetInternalKey(&input, &f.smallest) &&
+            GetInternalKey(&input, &f.largest) &&
+            GetVarint64(&input, &smallest_seqno) &&
+            GetVarint64(&input, &largest_seqno)) {
+          f.fd = FileDescriptor(number, 0, file_size, smallest_seqno,
+                                largest_seqno);
+          new_files_.push_back(std::make_pair(level, f));
+        } else {
+          if (!msg) {
+            msg = "new-file2 entry";
+          }
+        }
+        break;
+      }
+
+      case kNewFile3: {
+        uint64_t number = 0;
+        uint32_t path_id = 0;
+        uint64_t file_size = 0;
+        SequenceNumber smallest_seqno = 0;
+        SequenceNumber largest_seqno = kMaxSequenceNumber;
+        if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) &&
+            GetVarint32(&input, &path_id) && GetVarint64(&input, &file_size) &&
+            GetInternalKey(&input, &f.smallest) &&
+            GetInternalKey(&input, &f.largest) &&
+            GetVarint64(&input, &smallest_seqno) &&
+            GetVarint64(&input, &largest_seqno)) {
+          f.fd = FileDescriptor(number, path_id, file_size, smallest_seqno,
+                                largest_seqno);
+          new_files_.push_back(std::make_pair(level, f));
+        } else {
+          if (!msg) {
+            msg = "new-file3 entry";
+          }
+        }
+        break;
+      }
+
+      case kNewFile4: {
+        msg = DecodeNewFile4From(&input);
+        break;
+      }
+
+      case kBlobFileAddition:
+      case kBlobFileAddition_DEPRECATED: {
+        BlobFileAddition blob_file_addition;
+        const Status s = blob_file_addition.DecodeFrom(&input);
+        if (!s.ok()) {
+          return s;
+        }
+
+        AddBlobFile(std::move(blob_file_addition));
+        break;
+      }
+
+      case kBlobFileGarbage:
+      case kBlobFileGarbage_DEPRECATED: {
+        BlobFileGarbage blob_file_garbage;
+        const Status s = blob_file_garbage.DecodeFrom(&input);
+        if (!s.ok()) {
+          return s;
+        }
+
+        AddBlobFileGarbage(std::move(blob_file_garbage));
+        break;
+      }
+
+      case kWalAddition: {
+        WalAddition wal_addition;
+        const Status s = wal_addition.DecodeFrom(&input);
+        if (!s.ok()) {
+          return s;
+        }
+
+        wal_additions_.emplace_back(std::move(wal_addition));
+        break;
+      }
+
+      case kWalAddition2: {
+        Slice encoded;
+        if (!GetLengthPrefixedSlice(&input, &encoded)) {
+          msg = "WalAddition not prefixed by length";
+          break;
+        }
+
+        WalAddition wal_addition;
+        const Status s = wal_addition.DecodeFrom(&encoded);
+        if (!s.ok()) {
+          return s;
+        }
+
+        wal_additions_.emplace_back(std::move(wal_addition));
+        break;
+      }
+
+      case kWalDeletion: {
+        WalDeletion wal_deletion;
+        const Status s = wal_deletion.DecodeFrom(&input);
+        if (!s.ok()) {
+          return s;
+        }
+
+        wal_deletion_ = std::move(wal_deletion);
+        break;
+      }
+
+      case kWalDeletion2: {
+        Slice encoded;
+        if (!GetLengthPrefixedSlice(&input, &encoded)) {
+          msg = "WalDeletion not prefixed by length";
+          break;
+        }
+
+        WalDeletion wal_deletion;
+        const Status s = wal_deletion.DecodeFrom(&encoded);
+        if (!s.ok()) {
+          return s;
+        }
+
+        wal_deletion_ = std::move(wal_deletion);
+        break;
+      }
+
+      case kColumnFamily:
+        if (!GetVarint32(&input, &column_family_)) {
+          if (!msg) {
+            msg = "set column family id";
+          }
+        }
+        break;
+
+      case kColumnFamilyAdd:
+        if (GetLengthPrefixedSlice(&input, &str)) {
+          is_column_family_add_ = true;
+          column_family_name_ = str.ToString();
+        } else {
+          if (!msg) {
+            msg = "column family add";
+          }
+        }
+        break;
+
+      case kColumnFamilyDrop:
+        is_column_family_drop_ = true;
+        break;
+
+      case kInAtomicGroup:
+        is_in_atomic_group_ = true;
+        if (!GetVarint32(&input, &remaining_entries_)) {
+          if (!msg) {
+            msg = "remaining entries";
+          }
+        }
+        break;
+
+      case kFullHistoryTsLow:
+        if (!GetLengthPrefixedSlice(&input, &str)) {
+          msg = "full_history_ts_low";
+        } else if (str.empty()) {
+          msg = "full_history_ts_low: empty";
+        } else {
+          full_history_ts_low_.assign(str.data(), str.size());
+        }
+        break;
+
+      default:
+        if (tag & kTagSafeIgnoreMask) {
+          // Tag from future which can be safely ignored.
+          // The next field must be the length of the entry.
+          uint32_t field_len;
+          if (!GetVarint32(&input, &field_len) ||
+              static_cast<size_t>(field_len) > input.size()) {
+            if (!msg) {
+              msg = "safely ignoreable tag length error";
+            }
+          } else {
+            input.remove_prefix(static_cast<size_t>(field_len));
+          }
+        } else {
+          msg = "unknown tag";
+        }
+        break;
+    }
+  }
+
+  if (msg == nullptr && !input.empty()) {
+    msg = "invalid tag";
+  }
+
+  Status result;
+  if (msg != nullptr) {
+    result = Status::Corruption("VersionEdit", msg);
+  }
+  return result;
+}
+
+std::string VersionEdit::DebugString(bool hex_key) const {
+  std::string r;
+  r.append("VersionEdit {");
+  if (has_db_id_) {
+    r.append("\n  DB ID: ");
+    r.append(db_id_);
+  }
+  if (has_comparator_) {
+    r.append("\n  Comparator: ");
+    r.append(comparator_);
+  }
+  if (has_log_number_) {
+    r.append("\n  LogNumber: ");
+    AppendNumberTo(&r, log_number_);
+  }
+  if (has_prev_log_number_) {
+    r.append("\n  PrevLogNumber: ");
+    AppendNumberTo(&r, prev_log_number_);
+  }
+  if (has_next_file_number_) {
+    r.append("\n  NextFileNumber: ");
+    AppendNumberTo(&r, next_file_number_);
+  }
+  if (has_max_column_family_) {
+    r.append("\n  MaxColumnFamily: ");
+    AppendNumberTo(&r, max_column_family_);
+  }
+  if (has_min_log_number_to_keep_) {
+    r.append("\n  MinLogNumberToKeep: ");
+    AppendNumberTo(&r, min_log_number_to_keep_);
+  }
+  if (has_last_sequence_) {
+    r.append("\n  LastSeq: ");
+    AppendNumberTo(&r, last_sequence_);
+  }
+  for (const auto& level_and_compact_cursor : compact_cursors_) {
+    r.append("\n  CompactCursor: ");
+    AppendNumberTo(&r, level_and_compact_cursor.first);
+    r.append(" ");
+    r.append(level_and_compact_cursor.second.DebugString(hex_key));
+  }
+  for (const auto& deleted_file : deleted_files_) {
+    r.append("\n  DeleteFile: ");
+    AppendNumberTo(&r, deleted_file.first);
+    r.append(" ");
+    AppendNumberTo(&r, deleted_file.second);
+  }
+  for (size_t i = 0; i < new_files_.size(); i++) {
+    const FileMetaData& f = new_files_[i].second;
+    r.append("\n  AddFile: ");
+    AppendNumberTo(&r, new_files_[i].first);
+    r.append(" ");
+    AppendNumberTo(&r, f.fd.GetNumber());
+    r.append(" ");
+    AppendNumberTo(&r, f.fd.GetFileSize());
+    r.append(" ");
+    r.append(f.smallest.DebugString(hex_key));
+    r.append(" .. ");
+    r.append(f.largest.DebugString(hex_key));
+    if (f.oldest_blob_file_number != kInvalidBlobFileNumber) {
+      r.append(" blob_file:");
+      AppendNumberTo(&r, f.oldest_blob_file_number);
+    }
+    r.append(" oldest_ancester_time:");
+    AppendNumberTo(&r, f.oldest_ancester_time);
+    r.append(" file_creation_time:");
+    AppendNumberTo(&r, f.file_creation_time);
+    r.append(" epoch_number:");
+    AppendNumberTo(&r, f.epoch_number);
+    r.append(" file_checksum:");
+    r.append(Slice(f.file_checksum).ToString(true));
+    r.append(" file_checksum_func_name: ");
+    r.append(f.file_checksum_func_name);
+    if (f.temperature != Temperature::kUnknown) {
+      r.append(" temperature: ");
+      // Maybe change to human readable format whenthe feature becomes
+      // permanent
+      r.append(std::to_string(static_cast<int>(f.temperature)));
+    }
+    if (f.unique_id != kNullUniqueId64x2) {
+      r.append(" unique_id(internal): ");
+      UniqueId64x2 id = f.unique_id;
+      r.append(InternalUniqueIdToHumanString(&id));
+      r.append(" public_unique_id: ");
+      InternalUniqueIdToExternal(&id);
+      r.append(UniqueIdToHumanString(EncodeUniqueIdBytes(&id)));
+    }
+    r.append(" tail size:");
+    AppendNumberTo(&r, f.tail_size);
+  }
+
+  for (const auto& blob_file_addition : blob_file_additions_) {
+    r.append("\n  BlobFileAddition: ");
+    r.append(blob_file_addition.DebugString());
+  }
+
+  for (const auto& blob_file_garbage : blob_file_garbages_) {
+    r.append("\n  BlobFileGarbage: ");
+    r.append(blob_file_garbage.DebugString());
+  }
+
+  for (const auto& wal_addition : wal_additions_) {
+    r.append("\n  WalAddition: ");
+    r.append(wal_addition.DebugString());
+  }
+
+  if (!wal_deletion_.IsEmpty()) {
+    r.append("\n  WalDeletion: ");
+    r.append(wal_deletion_.DebugString());
+  }
+
+  r.append("\n  ColumnFamily: ");
+  AppendNumberTo(&r, column_family_);
+  if (is_column_family_add_) {
+    r.append("\n  ColumnFamilyAdd: ");
+    r.append(column_family_name_);
+  }
+  if (is_column_family_drop_) {
+    r.append("\n  ColumnFamilyDrop");
+  }
+  if (is_in_atomic_group_) {
+    r.append("\n  AtomicGroup: ");
+    AppendNumberTo(&r, remaining_entries_);
+    r.append(" entries remains");
+  }
+  if (HasFullHistoryTsLow()) {
+    r.append("\n FullHistoryTsLow: ");
+    r.append(Slice(full_history_ts_low_).ToString(hex_key));
+  }
+  r.append("\n}\n");
+  return r;
+}
+
+std::string VersionEdit::DebugJSON(int edit_num, bool hex_key) const {
+  JSONWriter jw;
+  jw << "EditNumber" << edit_num;
+
+  if (has_db_id_) {
+    jw << "DB ID" << db_id_;
+  }
+  if (has_comparator_) {
+    jw << "Comparator" << comparator_;
+  }
+  if (has_log_number_) {
+    jw << "LogNumber" << log_number_;
+  }
+  if (has_prev_log_number_) {
+    jw << "PrevLogNumber" << prev_log_number_;
+  }
+  if (has_next_file_number_) {
+    jw << "NextFileNumber" << next_file_number_;
+  }
+  if (has_max_column_family_) {
+    jw << "MaxColumnFamily" << max_column_family_;
+  }
+  if (has_min_log_number_to_keep_) {
+    jw << "MinLogNumberToKeep" << min_log_number_to_keep_;
+  }
+  if (has_last_sequence_) {
+    jw << "LastSeq" << last_sequence_;
+  }
+
+  if (!deleted_files_.empty()) {
+    jw << "DeletedFiles";
+    jw.StartArray();
+
+    for (const auto& deleted_file : deleted_files_) {
+      jw.StartArrayedObject();
+      jw << "Level" << deleted_file.first;
+      jw << "FileNumber" << deleted_file.second;
+      jw.EndArrayedObject();
+    }
+
+    jw.EndArray();
+  }
+
+  if (!new_files_.empty()) {
+    jw << "AddedFiles";
+    jw.StartArray();
+
+    for (size_t i = 0; i < new_files_.size(); i++) {
+      jw.StartArrayedObject();
+      jw << "Level" << new_files_[i].first;
+      const FileMetaData& f = new_files_[i].second;
+      jw << "FileNumber" << f.fd.GetNumber();
+      jw << "FileSize" << f.fd.GetFileSize();
+      jw << "SmallestIKey" << f.smallest.DebugString(hex_key);
+      jw << "LargestIKey" << f.largest.DebugString(hex_key);
+      jw << "OldestAncesterTime" << f.oldest_ancester_time;
+      jw << "FileCreationTime" << f.file_creation_time;
+      jw << "EpochNumber" << f.epoch_number;
+      jw << "FileChecksum" << Slice(f.file_checksum).ToString(true);
+      jw << "FileChecksumFuncName" << f.file_checksum_func_name;
+      if (f.temperature != Temperature::kUnknown) {
+        jw << "temperature" << std::to_string(static_cast<int>(f.temperature));
+      }
+      if (f.oldest_blob_file_number != kInvalidBlobFileNumber) {
+        jw << "OldestBlobFile" << f.oldest_blob_file_number;
+      }
+      if (f.temperature != Temperature::kUnknown) {
+        // Maybe change to human readable format whenthe feature becomes
+        // permanent
+        jw << "Temperature" << static_cast<int>(f.temperature);
+      }
+      jw << "TailSize" << f.tail_size;
+      jw.EndArrayedObject();
+    }
+
+    jw.EndArray();
+  }
+
+  if (!blob_file_additions_.empty()) {
+    jw << "BlobFileAdditions";
+
+    jw.StartArray();
+
+    for (const auto& blob_file_addition : blob_file_additions_) {
+      jw.StartArrayedObject();
+      jw << blob_file_addition;
+      jw.EndArrayedObject();
+    }
+
+    jw.EndArray();
+  }
+
+  if (!blob_file_garbages_.empty()) {
+    jw << "BlobFileGarbages";
+
+    jw.StartArray();
+
+    for (const auto& blob_file_garbage : blob_file_garbages_) {
+      jw.StartArrayedObject();
+      jw << blob_file_garbage;
+      jw.EndArrayedObject();
+    }
+
+    jw.EndArray();
+  }
+
+  if (!wal_additions_.empty()) {
+    jw << "WalAdditions";
+
+    jw.StartArray();
+
+    for (const auto& wal_addition : wal_additions_) {
+      jw.StartArrayedObject();
+      jw << wal_addition;
+      jw.EndArrayedObject();
+    }
+
+    jw.EndArray();
+  }
+
+  if (!wal_deletion_.IsEmpty()) {
+    jw << "WalDeletion";
+    jw.StartObject();
+    jw << wal_deletion_;
+    jw.EndObject();
+  }
+
+  jw << "ColumnFamily" << column_family_;
+
+  if (is_column_family_add_) {
+    jw << "ColumnFamilyAdd" << column_family_name_;
+  }
+  if (is_column_family_drop_) {
+    jw << "ColumnFamilyDrop" << column_family_name_;
+  }
+  if (is_in_atomic_group_) {
+    jw << "AtomicGroup" << remaining_entries_;
+  }
+
+  if (HasFullHistoryTsLow()) {
+    jw << "FullHistoryTsLow" << Slice(full_history_ts_low_).ToString(hex_key);
+  }
+
+  jw.EndObject();
+
+  return jw.Get();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit.h
new file mode 100644
index 0000000000..07e8f37742
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit.h
@@ -0,0 +1,702 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <algorithm>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "db/blob/blob_file_addition.h"
+#include "db/blob/blob_file_garbage.h"
+#include "db/dbformat.h"
+#include "db/wal_edit.h"
+#include "memory/arena.h"
+#include "port/malloc.h"
+#include "rocksdb/advanced_cache.h"
+#include "rocksdb/advanced_options.h"
+#include "table/table_reader.h"
+#include "table/unique_id_impl.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Tag numbers for serialized VersionEdit.  These numbers are written to
+// disk and should not be changed. The number should be forward compatible so
+// users can down-grade RocksDB safely. A future Tag is ignored by doing '&'
+// between Tag and kTagSafeIgnoreMask field.
+enum Tag : uint32_t {
+  kComparator = 1,
+  kLogNumber = 2,
+  kNextFileNumber = 3,
+  kLastSequence = 4,
+  kCompactCursor = 5,
+  kDeletedFile = 6,
+  kNewFile = 7,
+  // 8 was used for large value refs
+  kPrevLogNumber = 9,
+  kMinLogNumberToKeep = 10,
+
+  // these are new formats divergent from open source leveldb
+  kNewFile2 = 100,
+  kNewFile3 = 102,
+  kNewFile4 = 103,      // 4th (the latest) format version of adding files
+  kColumnFamily = 200,  // specify column family for version edit
+  kColumnFamilyAdd = 201,
+  kColumnFamilyDrop = 202,
+  kMaxColumnFamily = 203,
+
+  kInAtomicGroup = 300,
+
+  kBlobFileAddition = 400,
+  kBlobFileGarbage,
+
+  // Mask for an unidentified tag from the future which can be safely ignored.
+  kTagSafeIgnoreMask = 1 << 13,
+
+  // Forward compatible (aka ignorable) records
+  kDbId,
+  kBlobFileAddition_DEPRECATED,
+  kBlobFileGarbage_DEPRECATED,
+  kWalAddition,
+  kWalDeletion,
+  kFullHistoryTsLow,
+  kWalAddition2,
+  kWalDeletion2,
+};
+
+enum NewFileCustomTag : uint32_t {
+  kTerminate = 1,  // The end of customized fields
+  kNeedCompaction = 2,
+  // Since Manifest is not entirely forward-compatible, we currently encode
+  // kMinLogNumberToKeep as part of NewFile as a hack. This should be removed
+  // when manifest becomes forward-compatible.
+  kMinLogNumberToKeepHack = 3,
+  kOldestBlobFileNumber = 4,
+  kOldestAncesterTime = 5,
+  kFileCreationTime = 6,
+  kFileChecksum = 7,
+  kFileChecksumFuncName = 8,
+  kTemperature = 9,
+  kMinTimestamp = 10,
+  kMaxTimestamp = 11,
+  kUniqueId = 12,
+  kEpochNumber = 13,
+  kCompensatedRangeDeletionSize = 14,
+  kTailSize = 15,
+
+  // If this bit for the custom tag is set, opening DB should fail if
+  // we don't know this field.
+  kCustomTagNonSafeIgnoreMask = 1 << 6,
+
+  // Forward incompatible (aka unignorable) fields
+  kPathId,
+};
+
+class VersionSet;
+
+constexpr uint64_t kFileNumberMask = 0x3FFFFFFFFFFFFFFF;
+constexpr uint64_t kUnknownOldestAncesterTime = 0;
+constexpr uint64_t kUnknownFileCreationTime = 0;
+constexpr uint64_t kUnknownEpochNumber = 0;
+// If `Options::allow_ingest_behind` is true, this epoch number
+// will be dedicated to files ingested behind.
+constexpr uint64_t kReservedEpochNumberForFileIngestedBehind = 1;
+
+extern uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id);
+
+// A copyable structure contains information needed to read data from an SST
+// file. It can contain a pointer to a table reader opened for the file, or
+// file number and size, which can be used to create a new table reader for it.
+// The behavior is undefined when a copied of the structure is used when the
+// file is not in any live version any more.
+struct FileDescriptor {
+  // Table reader in table_reader_handle
+  TableReader* table_reader;
+  uint64_t packed_number_and_path_id;
+  uint64_t file_size;             // File size in bytes
+  SequenceNumber smallest_seqno;  // The smallest seqno in this file
+  SequenceNumber largest_seqno;   // The largest seqno in this file
+
+  FileDescriptor() : FileDescriptor(0, 0, 0) {}
+
+  FileDescriptor(uint64_t number, uint32_t path_id, uint64_t _file_size)
+      : FileDescriptor(number, path_id, _file_size, kMaxSequenceNumber, 0) {}
+
+  FileDescriptor(uint64_t number, uint32_t path_id, uint64_t _file_size,
+                 SequenceNumber _smallest_seqno, SequenceNumber _largest_seqno)
+      : table_reader(nullptr),
+        packed_number_and_path_id(PackFileNumberAndPathId(number, path_id)),
+        file_size(_file_size),
+        smallest_seqno(_smallest_seqno),
+        largest_seqno(_largest_seqno) {}
+
+  FileDescriptor(const FileDescriptor& fd) { *this = fd; }
+
+  FileDescriptor& operator=(const FileDescriptor& fd) {
+    table_reader = fd.table_reader;
+    packed_number_and_path_id = fd.packed_number_and_path_id;
+    file_size = fd.file_size;
+    smallest_seqno = fd.smallest_seqno;
+    largest_seqno = fd.largest_seqno;
+    return *this;
+  }
+
+  uint64_t GetNumber() const {
+    return packed_number_and_path_id & kFileNumberMask;
+  }
+  uint32_t GetPathId() const {
+    return static_cast<uint32_t>(packed_number_and_path_id /
+                                 (kFileNumberMask + 1));
+  }
+  uint64_t GetFileSize() const { return file_size; }
+};
+
+struct FileSampledStats {
+  FileSampledStats() : num_reads_sampled(0) {}
+  FileSampledStats(const FileSampledStats& other) { *this = other; }
+  FileSampledStats& operator=(const FileSampledStats& other) {
+    num_reads_sampled = other.num_reads_sampled.load();
+    return *this;
+  }
+
+  // number of user reads to this file.
+  mutable std::atomic<uint64_t> num_reads_sampled;
+};
+
+struct FileMetaData {
+  FileDescriptor fd;
+  InternalKey smallest;  // Smallest internal key served by table
+  InternalKey largest;   // Largest internal key served by table
+
+  // Needs to be disposed when refs becomes 0.
+  Cache::Handle* table_reader_handle = nullptr;
+
+  FileSampledStats stats;
+
+  // Stats for compensating deletion entries during compaction
+
+  // File size compensated by deletion entry.
+  // This is used to compute a file's compaction priority, and is updated in
+  // Version::ComputeCompensatedSizes() first time when the file is created or
+  // loaded.  After it is updated (!= 0), it is immutable.
+  uint64_t compensated_file_size = 0;
+  // These values can mutate, but they can only be read or written from
+  // single-threaded LogAndApply thread
+  uint64_t num_entries = 0;     // the number of entries.
+  // The number of deletion entries, including range deletions.
+  uint64_t num_deletions = 0;
+  uint64_t raw_key_size = 0;    // total uncompressed key size.
+  uint64_t raw_value_size = 0;  // total uncompressed value size.
+  uint64_t num_range_deletions = 0;
+  // This is computed during Flush/Compaction, and is added to
+  // `compensated_file_size`. Currently, this estimates the size of keys in the
+  // next level covered by range tombstones in this file.
+  uint64_t compensated_range_deletion_size = 0;
+
+  int refs = 0;  // Reference count
+
+  bool being_compacted = false;       // Is this file undergoing compaction?
+  bool init_stats_from_file = false;  // true if the data-entry stats of this
+                                      // file has initialized from file.
+
+  bool marked_for_compaction = false;  // True if client asked us nicely to
+                                       // compact this file.
+  Temperature temperature = Temperature::kUnknown;
+
+  // Used only in BlobDB. The file number of the oldest blob file this SST file
+  // refers to. 0 is an invalid value; BlobDB numbers the files starting from 1.
+  uint64_t oldest_blob_file_number = kInvalidBlobFileNumber;
+
+  // The file could be the compaction output from other SST files, which could
+  // in turn be outputs for compact older SST files. We track the memtable
+  // flush timestamp for the oldest SST file that eventually contribute data
+  // to this file. 0 means the information is not available.
+  uint64_t oldest_ancester_time = kUnknownOldestAncesterTime;
+
+  // Unix time when the SST file is created.
+  uint64_t file_creation_time = kUnknownFileCreationTime;
+
+  // The order of a file being flushed or ingested/imported.
+  // Compaction output file will be assigned with the minimum `epoch_number`
+  // among input files'.
+  // For L0, larger `epoch_number` indicates newer L0 file.
+  uint64_t epoch_number = kUnknownEpochNumber;
+
+  // File checksum
+  std::string file_checksum = kUnknownFileChecksum;
+
+  // File checksum function name
+  std::string file_checksum_func_name = kUnknownFileChecksumFuncName;
+
+  // SST unique id
+  UniqueId64x2 unique_id{};
+
+  // Size of the "tail" part of a SST file
+  // "Tail" refers to all blocks after data blocks till the end of the SST file
+  uint64_t tail_size = 0;
+
+  FileMetaData() = default;
+
+  FileMetaData(uint64_t file, uint32_t file_path_id, uint64_t file_size,
+               const InternalKey& smallest_key, const InternalKey& largest_key,
+               const SequenceNumber& smallest_seq,
+               const SequenceNumber& largest_seq, bool marked_for_compact,
+               Temperature _temperature, uint64_t oldest_blob_file,
+               uint64_t _oldest_ancester_time, uint64_t _file_creation_time,
+               uint64_t _epoch_number, const std::string& _file_checksum,
+               const std::string& _file_checksum_func_name,
+               UniqueId64x2 _unique_id,
+               const uint64_t _compensated_range_deletion_size,
+               uint64_t _tail_size)
+      : fd(file, file_path_id, file_size, smallest_seq, largest_seq),
+        smallest(smallest_key),
+        largest(largest_key),
+        compensated_range_deletion_size(_compensated_range_deletion_size),
+        marked_for_compaction(marked_for_compact),
+        temperature(_temperature),
+        oldest_blob_file_number(oldest_blob_file),
+        oldest_ancester_time(_oldest_ancester_time),
+        file_creation_time(_file_creation_time),
+        epoch_number(_epoch_number),
+        file_checksum(_file_checksum),
+        file_checksum_func_name(_file_checksum_func_name),
+        unique_id(std::move(_unique_id)),
+        tail_size(_tail_size) {
+    TEST_SYNC_POINT_CALLBACK("FileMetaData::FileMetaData", this);
+  }
+
+  // REQUIRED: Keys must be given to the function in sorted order (it expects
+  // the last key to be the largest).
+  Status UpdateBoundaries(const Slice& key, const Slice& value,
+                          SequenceNumber seqno, ValueType value_type);
+
+  // Unlike UpdateBoundaries, ranges do not need to be presented in any
+  // particular order.
+  void UpdateBoundariesForRange(const InternalKey& start,
+                                const InternalKey& end, SequenceNumber seqno,
+                                const InternalKeyComparator& icmp) {
+    if (smallest.size() == 0 || icmp.Compare(start, smallest) < 0) {
+      smallest = start;
+    }
+    if (largest.size() == 0 || icmp.Compare(largest, end) < 0) {
+      largest = end;
+    }
+    assert(icmp.Compare(smallest, largest) <= 0);
+    fd.smallest_seqno = std::min(fd.smallest_seqno, seqno);
+    fd.largest_seqno = std::max(fd.largest_seqno, seqno);
+  }
+
+  // Try to get oldest ancester time from the class itself or table properties
+  // if table reader is already pinned.
+  // 0 means the information is not available.
+  uint64_t TryGetOldestAncesterTime() {
+    if (oldest_ancester_time != kUnknownOldestAncesterTime) {
+      return oldest_ancester_time;
+    } else if (fd.table_reader != nullptr &&
+               fd.table_reader->GetTableProperties() != nullptr) {
+      return fd.table_reader->GetTableProperties()->creation_time;
+    }
+    return kUnknownOldestAncesterTime;
+  }
+
+  uint64_t TryGetFileCreationTime() {
+    if (file_creation_time != kUnknownFileCreationTime) {
+      return file_creation_time;
+    } else if (fd.table_reader != nullptr &&
+               fd.table_reader->GetTableProperties() != nullptr) {
+      return fd.table_reader->GetTableProperties()->file_creation_time;
+    }
+    return kUnknownFileCreationTime;
+  }
+
+  // WARNING: manual update to this function is needed
+  // whenever a new string property is added to FileMetaData
+  // to reduce approximation error.
+  //
+  // TODO: eliminate the need of manually updating this function
+  // for new string properties
+  size_t ApproximateMemoryUsage() const {
+    size_t usage = 0;
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+    usage += malloc_usable_size(const_cast<FileMetaData*>(this));
+#else
+    usage += sizeof(*this);
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+    usage += smallest.size() + largest.size() + file_checksum.size() +
+             file_checksum_func_name.size();
+    return usage;
+  }
+};
+
+// A compressed copy of file meta data that just contain minimum data needed
+// to serve read operations, while still keeping the pointer to full metadata
+// of the file in case it is needed.
+struct FdWithKeyRange {
+  FileDescriptor fd;
+  FileMetaData* file_metadata;  // Point to all metadata
+  Slice smallest_key;           // slice that contain smallest key
+  Slice largest_key;            // slice that contain largest key
+
+  FdWithKeyRange()
+      : fd(), file_metadata(nullptr), smallest_key(), largest_key() {}
+
+  FdWithKeyRange(FileDescriptor _fd, Slice _smallest_key, Slice _largest_key,
+                 FileMetaData* _file_metadata)
+      : fd(_fd),
+        file_metadata(_file_metadata),
+        smallest_key(_smallest_key),
+        largest_key(_largest_key) {}
+};
+
+// Data structure to store an array of FdWithKeyRange in one level
+// Actual data is guaranteed to be stored closely
+struct LevelFilesBrief {
+  size_t num_files;
+  FdWithKeyRange* files;
+  LevelFilesBrief() {
+    num_files = 0;
+    files = nullptr;
+  }
+};
+
+// The state of a DB at any given time is referred to as a Version.
+// Any modification to the Version is considered a Version Edit. A Version is
+// constructed by joining a sequence of Version Edits. Version Edits are written
+// to the MANIFEST file.
+class VersionEdit {
+ public:
+  void Clear();
+
+  void SetDBId(const std::string& db_id) {
+    has_db_id_ = true;
+    db_id_ = db_id;
+  }
+  bool HasDbId() const { return has_db_id_; }
+  const std::string& GetDbId() const { return db_id_; }
+
+  void SetComparatorName(const Slice& name) {
+    has_comparator_ = true;
+    comparator_ = name.ToString();
+  }
+  bool HasComparatorName() const { return has_comparator_; }
+  const std::string& GetComparatorName() const { return comparator_; }
+
+  void SetLogNumber(uint64_t num) {
+    has_log_number_ = true;
+    log_number_ = num;
+  }
+  bool HasLogNumber() const { return has_log_number_; }
+  uint64_t GetLogNumber() const { return log_number_; }
+
+  void SetPrevLogNumber(uint64_t num) {
+    has_prev_log_number_ = true;
+    prev_log_number_ = num;
+  }
+  bool HasPrevLogNumber() const { return has_prev_log_number_; }
+  uint64_t GetPrevLogNumber() const { return prev_log_number_; }
+
+  void SetNextFile(uint64_t num) {
+    has_next_file_number_ = true;
+    next_file_number_ = num;
+  }
+  bool HasNextFile() const { return has_next_file_number_; }
+  uint64_t GetNextFile() const { return next_file_number_; }
+
+  void SetMaxColumnFamily(uint32_t max_column_family) {
+    has_max_column_family_ = true;
+    max_column_family_ = max_column_family;
+  }
+  bool HasMaxColumnFamily() const { return has_max_column_family_; }
+  uint32_t GetMaxColumnFamily() const { return max_column_family_; }
+
+  void SetMinLogNumberToKeep(uint64_t num) {
+    has_min_log_number_to_keep_ = true;
+    min_log_number_to_keep_ = num;
+  }
+  bool HasMinLogNumberToKeep() const { return has_min_log_number_to_keep_; }
+  uint64_t GetMinLogNumberToKeep() const { return min_log_number_to_keep_; }
+
+  void SetLastSequence(SequenceNumber seq) {
+    has_last_sequence_ = true;
+    last_sequence_ = seq;
+  }
+  bool HasLastSequence() const { return has_last_sequence_; }
+  SequenceNumber GetLastSequence() const { return last_sequence_; }
+
+  // Delete the specified table file from the specified level.
+  void DeleteFile(int level, uint64_t file) {
+    deleted_files_.emplace(level, file);
+  }
+
+  // Retrieve the table files deleted as well as their associated levels.
+  using DeletedFiles = std::set<std::pair<int, uint64_t>>;
+  const DeletedFiles& GetDeletedFiles() const { return deleted_files_; }
+
+  // Add the specified table file at the specified level.
+  // REQUIRES: "smallest" and "largest" are smallest and largest keys in file
+  // REQUIRES: "oldest_blob_file_number" is the number of the oldest blob file
+  // referred to by this file if any, kInvalidBlobFileNumber otherwise.
+  void AddFile(int level, uint64_t file, uint32_t file_path_id,
+               uint64_t file_size, const InternalKey& smallest,
+               const InternalKey& largest, const SequenceNumber& smallest_seqno,
+               const SequenceNumber& largest_seqno, bool marked_for_compaction,
+               Temperature temperature, uint64_t oldest_blob_file_number,
+               uint64_t oldest_ancester_time, uint64_t file_creation_time,
+               uint64_t epoch_number, const std::string& file_checksum,
+               const std::string& file_checksum_func_name,
+               const UniqueId64x2& unique_id,
+               const uint64_t compensated_range_deletion_size,
+               uint64_t tail_size) {
+    assert(smallest_seqno <= largest_seqno);
+    new_files_.emplace_back(
+        level,
+        FileMetaData(file, file_path_id, file_size, smallest, largest,
+                     smallest_seqno, largest_seqno, marked_for_compaction,
+                     temperature, oldest_blob_file_number, oldest_ancester_time,
+                     file_creation_time, epoch_number, file_checksum,
+                     file_checksum_func_name, unique_id,
+                     compensated_range_deletion_size, tail_size));
+    if (!HasLastSequence() || largest_seqno > GetLastSequence()) {
+      SetLastSequence(largest_seqno);
+    }
+  }
+
+  void AddFile(int level, const FileMetaData& f) {
+    assert(f.fd.smallest_seqno <= f.fd.largest_seqno);
+    new_files_.emplace_back(level, f);
+    if (!HasLastSequence() || f.fd.largest_seqno > GetLastSequence()) {
+      SetLastSequence(f.fd.largest_seqno);
+    }
+  }
+
+  // Retrieve the table files added as well as their associated levels.
+  using NewFiles = std::vector<std::pair<int, FileMetaData>>;
+  const NewFiles& GetNewFiles() const { return new_files_; }
+
+  // Retrieve all the compact cursors
+  using CompactCursors = std::vector<std::pair<int, InternalKey>>;
+  const CompactCursors& GetCompactCursors() const { return compact_cursors_; }
+  void AddCompactCursor(int level, const InternalKey& cursor) {
+    compact_cursors_.push_back(std::make_pair(level, cursor));
+  }
+  void SetCompactCursors(
+      const std::vector<InternalKey>& compact_cursors_by_level) {
+    compact_cursors_.clear();
+    compact_cursors_.reserve(compact_cursors_by_level.size());
+    for (int i = 0; i < (int)compact_cursors_by_level.size(); i++) {
+      if (compact_cursors_by_level[i].Valid()) {
+        compact_cursors_.push_back(
+            std::make_pair(i, compact_cursors_by_level[i]));
+      }
+    }
+  }
+
+  // Add a new blob file.
+  void AddBlobFile(uint64_t blob_file_number, uint64_t total_blob_count,
+                   uint64_t total_blob_bytes, std::string checksum_method,
+                   std::string checksum_value) {
+    blob_file_additions_.emplace_back(
+        blob_file_number, total_blob_count, total_blob_bytes,
+        std::move(checksum_method), std::move(checksum_value));
+  }
+
+  void AddBlobFile(BlobFileAddition blob_file_addition) {
+    blob_file_additions_.emplace_back(std::move(blob_file_addition));
+  }
+
+  // Retrieve all the blob files added.
+  using BlobFileAdditions = std::vector<BlobFileAddition>;
+  const BlobFileAdditions& GetBlobFileAdditions() const {
+    return blob_file_additions_;
+  }
+
+  void SetBlobFileAdditions(BlobFileAdditions blob_file_additions) {
+    assert(blob_file_additions_.empty());
+    blob_file_additions_ = std::move(blob_file_additions);
+  }
+
+  // Add garbage for an existing blob file.  Note: intentionally broken English
+  // follows.
+  void AddBlobFileGarbage(uint64_t blob_file_number,
+                          uint64_t garbage_blob_count,
+                          uint64_t garbage_blob_bytes) {
+    blob_file_garbages_.emplace_back(blob_file_number, garbage_blob_count,
+                                     garbage_blob_bytes);
+  }
+
+  void AddBlobFileGarbage(BlobFileGarbage blob_file_garbage) {
+    blob_file_garbages_.emplace_back(std::move(blob_file_garbage));
+  }
+
+  // Retrieve all the blob file garbage added.
+  using BlobFileGarbages = std::vector<BlobFileGarbage>;
+  const BlobFileGarbages& GetBlobFileGarbages() const {
+    return blob_file_garbages_;
+  }
+
+  void SetBlobFileGarbages(BlobFileGarbages blob_file_garbages) {
+    assert(blob_file_garbages_.empty());
+    blob_file_garbages_ = std::move(blob_file_garbages);
+  }
+
+  // Add a WAL (either just created or closed).
+  // AddWal and DeleteWalsBefore cannot be called on the same VersionEdit.
+  void AddWal(WalNumber number, WalMetadata metadata = WalMetadata()) {
+    assert(NumEntries() == wal_additions_.size());
+    wal_additions_.emplace_back(number, std::move(metadata));
+  }
+
+  // Retrieve all the added WALs.
+  const WalAdditions& GetWalAdditions() const { return wal_additions_; }
+
+  bool IsWalAddition() const { return !wal_additions_.empty(); }
+
+  // Delete a WAL (either directly deleted or archived).
+  // AddWal and DeleteWalsBefore cannot be called on the same VersionEdit.
+  void DeleteWalsBefore(WalNumber number) {
+    assert((NumEntries() == 1) == !wal_deletion_.IsEmpty());
+    wal_deletion_ = WalDeletion(number);
+  }
+
+  const WalDeletion& GetWalDeletion() const { return wal_deletion_; }
+
+  bool IsWalDeletion() const { return !wal_deletion_.IsEmpty(); }
+
+  bool IsWalManipulation() const {
+    size_t entries = NumEntries();
+    return (entries > 0) && ((entries == wal_additions_.size()) ||
+                             (entries == !wal_deletion_.IsEmpty()));
+  }
+
+  // Number of edits
+  size_t NumEntries() const {
+    return new_files_.size() + deleted_files_.size() +
+           blob_file_additions_.size() + blob_file_garbages_.size() +
+           wal_additions_.size() + !wal_deletion_.IsEmpty();
+  }
+
+  void SetColumnFamily(uint32_t column_family_id) {
+    column_family_ = column_family_id;
+  }
+  uint32_t GetColumnFamily() const { return column_family_; }
+
+  // set column family ID by calling SetColumnFamily()
+  void AddColumnFamily(const std::string& name) {
+    assert(!is_column_family_drop_);
+    assert(!is_column_family_add_);
+    assert(NumEntries() == 0);
+    is_column_family_add_ = true;
+    column_family_name_ = name;
+  }
+
+  // set column family ID by calling SetColumnFamily()
+  void DropColumnFamily() {
+    assert(!is_column_family_drop_);
+    assert(!is_column_family_add_);
+    assert(NumEntries() == 0);
+    is_column_family_drop_ = true;
+  }
+
+  bool IsColumnFamilyManipulation() const {
+    return is_column_family_add_ || is_column_family_drop_;
+  }
+
+  bool IsColumnFamilyAdd() const { return is_column_family_add_; }
+
+  bool IsColumnFamilyDrop() const { return is_column_family_drop_; }
+
+  void MarkAtomicGroup(uint32_t remaining_entries) {
+    is_in_atomic_group_ = true;
+    remaining_entries_ = remaining_entries;
+  }
+  bool IsInAtomicGroup() const { return is_in_atomic_group_; }
+  uint32_t GetRemainingEntries() const { return remaining_entries_; }
+
+  bool HasFullHistoryTsLow() const { return !full_history_ts_low_.empty(); }
+  const std::string& GetFullHistoryTsLow() const {
+    assert(HasFullHistoryTsLow());
+    return full_history_ts_low_;
+  }
+  void SetFullHistoryTsLow(std::string full_history_ts_low) {
+    assert(!full_history_ts_low.empty());
+    full_history_ts_low_ = std::move(full_history_ts_low);
+  }
+
+  // return true on success.
+  bool EncodeTo(std::string* dst) const;
+  Status DecodeFrom(const Slice& src);
+
+  std::string DebugString(bool hex_key = false) const;
+  std::string DebugJSON(int edit_num, bool hex_key = false) const;
+
+ private:
+  friend class ReactiveVersionSet;
+  friend class VersionEditHandlerBase;
+  friend class ListColumnFamiliesHandler;
+  friend class VersionEditHandler;
+  friend class VersionEditHandlerPointInTime;
+  friend class DumpManifestHandler;
+  friend class VersionSet;
+  friend class Version;
+  friend class AtomicGroupReadBuffer;
+
+  bool GetLevel(Slice* input, int* level, const char** msg);
+
+  const char* DecodeNewFile4From(Slice* input);
+
+  int max_level_ = 0;
+  std::string db_id_;
+  std::string comparator_;
+  uint64_t log_number_ = 0;
+  uint64_t prev_log_number_ = 0;
+  uint64_t next_file_number_ = 0;
+  uint32_t max_column_family_ = 0;
+  // The most recent WAL log number that is deleted
+  uint64_t min_log_number_to_keep_ = 0;
+  SequenceNumber last_sequence_ = 0;
+  bool has_db_id_ = false;
+  bool has_comparator_ = false;
+  bool has_log_number_ = false;
+  bool has_prev_log_number_ = false;
+  bool has_next_file_number_ = false;
+  bool has_max_column_family_ = false;
+  bool has_min_log_number_to_keep_ = false;
+  bool has_last_sequence_ = false;
+
+  // Compaction cursors for round-robin compaction policy
+  CompactCursors compact_cursors_;
+
+  DeletedFiles deleted_files_;
+  NewFiles new_files_;
+
+  BlobFileAdditions blob_file_additions_;
+  BlobFileGarbages blob_file_garbages_;
+
+  WalAdditions wal_additions_;
+  WalDeletion wal_deletion_;
+
+  // Each version edit record should have column_family_ set
+  // If it's not set, it is default (0)
+  uint32_t column_family_ = 0;
+  // a version edit can be either column_family add or
+  // column_family drop. If it's column family add,
+  // it also includes column family name.
+  bool is_column_family_drop_ = false;
+  bool is_column_family_add_ = false;
+  std::string column_family_name_;
+
+  bool is_in_atomic_group_ = false;
+  uint32_t remaining_entries_ = 0;
+
+  std::string full_history_ts_low_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit_handler.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit_handler.cc
new file mode 100644
index 0000000000..d507c4b0c8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit_handler.cc
@@ -0,0 +1,1017 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/version_edit_handler.h"
+
+#include <cinttypes>
+#include <sstream>
+
+#include "db/blob/blob_file_reader.h"
+#include "db/blob/blob_source.h"
+#include "db/version_edit.h"
+#include "logging/logging.h"
+#include "monitoring/persistent_stats_history.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void VersionEditHandlerBase::Iterate(log::Reader& reader,
+                                     Status* log_read_status) {
+  Slice record;
+  std::string scratch;
+  assert(log_read_status);
+  assert(log_read_status->ok());
+
+  [[maybe_unused]] size_t recovered_edits = 0;
+  Status s = Initialize();
+  while (reader.LastRecordEnd() < max_manifest_read_size_ && s.ok() &&
+         reader.ReadRecord(&record, &scratch) && log_read_status->ok()) {
+    VersionEdit edit;
+    s = edit.DecodeFrom(record);
+    if (!s.ok()) {
+      break;
+    }
+
+    s = read_buffer_.AddEdit(&edit);
+    if (!s.ok()) {
+      break;
+    }
+    ColumnFamilyData* cfd = nullptr;
+    if (edit.is_in_atomic_group_) {
+      if (read_buffer_.IsFull()) {
+        for (auto& e : read_buffer_.replay_buffer()) {
+          s = ApplyVersionEdit(e, &cfd);
+          if (!s.ok()) {
+            break;
+          }
+          ++recovered_edits;
+        }
+        if (!s.ok()) {
+          break;
+        }
+        read_buffer_.Clear();
+      }
+    } else {
+      s = ApplyVersionEdit(edit, &cfd);
+      if (s.ok()) {
+        ++recovered_edits;
+      }
+    }
+  }
+  if (!log_read_status->ok()) {
+    s = *log_read_status;
+  }
+
+  CheckIterationResult(reader, &s);
+
+  if (!s.ok()) {
+    if (s.IsCorruption()) {
+      // when we find a Corruption error, something is
+      // wrong with the underlying file. in this case we
+      // want to report the filename, so in here we append
+      // the filename to the Corruption message
+      assert(reader.file());
+
+      // build a new error message
+      std::stringstream message;
+      // append previous dynamic state message
+      const char* state = s.getState();
+      if (state != nullptr) {
+        message << state;
+        message << ' ';
+      }
+      // append the filename to the corruption message
+      message << "in file " << reader.file()->file_name();
+      // overwrite the status with the extended status
+      s = Status(s.code(), s.subcode(), s.severity(), message.str());
+    }
+    status_ = s;
+  }
+  TEST_SYNC_POINT_CALLBACK("VersionEditHandlerBase::Iterate:Finish",
+                           &recovered_edits);
+}
+
+Status ListColumnFamiliesHandler::ApplyVersionEdit(
+    VersionEdit& edit, ColumnFamilyData** /*unused*/) {
+  Status s;
+  if (edit.is_column_family_add_) {
+    if (column_family_names_.find(edit.column_family_) !=
+        column_family_names_.end()) {
+      s = Status::Corruption("Manifest adding the same column family twice");
+    } else {
+      column_family_names_.insert(
+          {edit.column_family_, edit.column_family_name_});
+    }
+  } else if (edit.is_column_family_drop_) {
+    if (column_family_names_.find(edit.column_family_) ==
+        column_family_names_.end()) {
+      s = Status::Corruption("Manifest - dropping non-existing column family");
+    } else {
+      column_family_names_.erase(edit.column_family_);
+    }
+  }
+  return s;
+}
+
+Status FileChecksumRetriever::ApplyVersionEdit(VersionEdit& edit,
+                                               ColumnFamilyData** /*unused*/) {
+  for (const auto& deleted_file : edit.GetDeletedFiles()) {
+    Status s = file_checksum_list_.RemoveOneFileChecksum(deleted_file.second);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  for (const auto& new_file : edit.GetNewFiles()) {
+    Status s = file_checksum_list_.InsertOneFileChecksum(
+        new_file.second.fd.GetNumber(), new_file.second.file_checksum,
+        new_file.second.file_checksum_func_name);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  for (const auto& new_blob_file : edit.GetBlobFileAdditions()) {
+    std::string checksum_value = new_blob_file.GetChecksumValue();
+    std::string checksum_method = new_blob_file.GetChecksumMethod();
+    assert(checksum_value.empty() == checksum_method.empty());
+    if (checksum_method.empty()) {
+      checksum_value = kUnknownFileChecksum;
+      checksum_method = kUnknownFileChecksumFuncName;
+    }
+    Status s = file_checksum_list_.InsertOneFileChecksum(
+        new_blob_file.GetBlobFileNumber(), checksum_value, checksum_method);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  return Status::OK();
+}
+
+VersionEditHandler::VersionEditHandler(
+    bool read_only, std::vector<ColumnFamilyDescriptor> column_families,
+    VersionSet* version_set, bool track_missing_files,
+    bool no_error_if_files_missing, const std::shared_ptr<IOTracer>& io_tracer,
+    const ReadOptions& read_options, bool skip_load_table_files,
+    EpochNumberRequirement epoch_number_requirement)
+    : VersionEditHandlerBase(read_options),
+      read_only_(read_only),
+      column_families_(std::move(column_families)),
+      version_set_(version_set),
+      track_missing_files_(track_missing_files),
+      no_error_if_files_missing_(no_error_if_files_missing),
+      io_tracer_(io_tracer),
+      skip_load_table_files_(skip_load_table_files),
+      initialized_(false),
+      epoch_number_requirement_(epoch_number_requirement) {
+  assert(version_set_ != nullptr);
+}
+
+Status VersionEditHandler::Initialize() {
+  Status s;
+  if (!initialized_) {
+    for (const auto& cf_desc : column_families_) {
+      name_to_options_.emplace(cf_desc.name, cf_desc.options);
+    }
+    auto default_cf_iter = name_to_options_.find(kDefaultColumnFamilyName);
+    if (default_cf_iter == name_to_options_.end()) {
+      s = Status::InvalidArgument("Default column family not specified");
+    }
+    if (s.ok()) {
+      VersionEdit default_cf_edit;
+      default_cf_edit.AddColumnFamily(kDefaultColumnFamilyName);
+      default_cf_edit.SetColumnFamily(0);
+      ColumnFamilyData* cfd =
+          CreateCfAndInit(default_cf_iter->second, default_cf_edit);
+      assert(cfd != nullptr);
+#ifdef NDEBUG
+      (void)cfd;
+#endif
+      initialized_ = true;
+    }
+  }
+  return s;
+}
+
+Status VersionEditHandler::ApplyVersionEdit(VersionEdit& edit,
+                                            ColumnFamilyData** cfd) {
+  Status s;
+  if (edit.is_column_family_add_) {
+    s = OnColumnFamilyAdd(edit, cfd);
+  } else if (edit.is_column_family_drop_) {
+    s = OnColumnFamilyDrop(edit, cfd);
+  } else if (edit.IsWalAddition()) {
+    s = OnWalAddition(edit);
+  } else if (edit.IsWalDeletion()) {
+    s = OnWalDeletion(edit);
+  } else {
+    s = OnNonCfOperation(edit, cfd);
+  }
+  if (s.ok()) {
+    assert(cfd != nullptr);
+    s = ExtractInfoFromVersionEdit(*cfd, edit);
+  }
+  return s;
+}
+
+Status VersionEditHandler::OnColumnFamilyAdd(VersionEdit& edit,
+                                             ColumnFamilyData** cfd) {
+  bool cf_in_not_found = false;
+  bool cf_in_builders = false;
+  CheckColumnFamilyId(edit, &cf_in_not_found, &cf_in_builders);
+
+  assert(cfd != nullptr);
+  *cfd = nullptr;
+  Status s;
+  if (cf_in_builders || cf_in_not_found) {
+    s = Status::Corruption("MANIFEST adding the same column family twice: " +
+                           edit.column_family_name_);
+  }
+  if (s.ok()) {
+    auto cf_options = name_to_options_.find(edit.column_family_name_);
+    // implicitly add persistent_stats column family without requiring user
+    // to specify
+    ColumnFamilyData* tmp_cfd = nullptr;
+    bool is_persistent_stats_column_family =
+        edit.column_family_name_.compare(kPersistentStatsColumnFamilyName) == 0;
+    if (cf_options == name_to_options_.end() &&
+        !is_persistent_stats_column_family) {
+      column_families_not_found_.emplace(edit.column_family_,
+                                         edit.column_family_name_);
+    } else {
+      if (is_persistent_stats_column_family) {
+        ColumnFamilyOptions cfo;
+        OptimizeForPersistentStats(&cfo);
+        tmp_cfd = CreateCfAndInit(cfo, edit);
+      } else {
+        tmp_cfd = CreateCfAndInit(cf_options->second, edit);
+      }
+      *cfd = tmp_cfd;
+    }
+  }
+  return s;
+}
+
+Status VersionEditHandler::OnColumnFamilyDrop(VersionEdit& edit,
+                                              ColumnFamilyData** cfd) {
+  bool cf_in_not_found = false;
+  bool cf_in_builders = false;
+  CheckColumnFamilyId(edit, &cf_in_not_found, &cf_in_builders);
+
+  assert(cfd != nullptr);
+  *cfd = nullptr;
+  ColumnFamilyData* tmp_cfd = nullptr;
+  Status s;
+  if (cf_in_builders) {
+    tmp_cfd = DestroyCfAndCleanup(edit);
+  } else if (cf_in_not_found) {
+    column_families_not_found_.erase(edit.column_family_);
+  } else {
+    s = Status::Corruption("MANIFEST - dropping non-existing column family");
+  }
+  *cfd = tmp_cfd;
+  return s;
+}
+
+Status VersionEditHandler::OnWalAddition(VersionEdit& edit) {
+  assert(edit.IsWalAddition());
+  return version_set_->wals_.AddWals(edit.GetWalAdditions());
+}
+
+Status VersionEditHandler::OnWalDeletion(VersionEdit& edit) {
+  assert(edit.IsWalDeletion());
+  return version_set_->wals_.DeleteWalsBefore(
+      edit.GetWalDeletion().GetLogNumber());
+}
+
+Status VersionEditHandler::OnNonCfOperation(VersionEdit& edit,
+                                            ColumnFamilyData** cfd) {
+  bool cf_in_not_found = false;
+  bool cf_in_builders = false;
+  CheckColumnFamilyId(edit, &cf_in_not_found, &cf_in_builders);
+
+  assert(cfd != nullptr);
+  *cfd = nullptr;
+  Status s;
+  if (!cf_in_not_found) {
+    if (!cf_in_builders) {
+      s = Status::Corruption(
+          "MANIFEST record referencing unknown column family");
+    }
+    ColumnFamilyData* tmp_cfd = nullptr;
+    if (s.ok()) {
+      auto builder_iter = builders_.find(edit.column_family_);
+      assert(builder_iter != builders_.end());
+      tmp_cfd = version_set_->GetColumnFamilySet()->GetColumnFamily(
+          edit.column_family_);
+      assert(tmp_cfd != nullptr);
+      s = MaybeCreateVersion(edit, tmp_cfd, /*force_create_version=*/false);
+      if (s.ok()) {
+        s = builder_iter->second->version_builder()->Apply(&edit);
+      }
+    }
+    *cfd = tmp_cfd;
+  }
+  return s;
+}
+
+// TODO maybe cache the computation result
+bool VersionEditHandler::HasMissingFiles() const {
+  bool ret = false;
+  for (const auto& elem : cf_to_missing_files_) {
+    const auto& missing_files = elem.second;
+    if (!missing_files.empty()) {
+      ret = true;
+      break;
+    }
+  }
+  if (!ret) {
+    for (const auto& elem : cf_to_missing_blob_files_high_) {
+      if (elem.second != kInvalidBlobFileNumber) {
+        ret = true;
+        break;
+      }
+    }
+  }
+  return ret;
+}
+
+void VersionEditHandler::CheckColumnFamilyId(const VersionEdit& edit,
+                                             bool* cf_in_not_found,
+                                             bool* cf_in_builders) const {
+  assert(cf_in_not_found != nullptr);
+  assert(cf_in_builders != nullptr);
+  // Not found means that user didn't supply that column
+  // family option AND we encountered column family add
+  // record. Once we encounter column family drop record,
+  // we will delete the column family from
+  // column_families_not_found.
+  bool in_not_found = column_families_not_found_.find(edit.column_family_) !=
+                      column_families_not_found_.end();
+  // in builders means that user supplied that column family
+  // option AND that we encountered column family add record
+  bool in_builders = builders_.find(edit.column_family_) != builders_.end();
+  // They cannot both be true
+  assert(!(in_not_found && in_builders));
+  *cf_in_not_found = in_not_found;
+  *cf_in_builders = in_builders;
+}
+
+void VersionEditHandler::CheckIterationResult(const log::Reader& reader,
+                                              Status* s) {
+  assert(s != nullptr);
+  if (!s->ok()) {
+    // Do nothing here.
+  } else if (!version_edit_params_.has_log_number_ ||
+             !version_edit_params_.has_next_file_number_ ||
+             !version_edit_params_.has_last_sequence_) {
+    std::string msg("no ");
+    if (!version_edit_params_.has_log_number_) {
+      msg.append("log_file_number, ");
+    }
+    if (!version_edit_params_.has_next_file_number_) {
+      msg.append("next_file_number, ");
+    }
+    if (!version_edit_params_.has_last_sequence_) {
+      msg.append("last_sequence, ");
+    }
+    msg = msg.substr(0, msg.size() - 2);
+    msg.append(" entry in MANIFEST");
+    *s = Status::Corruption(msg);
+  }
+  // There were some column families in the MANIFEST that weren't specified
+  // in the argument. This is OK in read_only mode
+  if (s->ok() && MustOpenAllColumnFamilies() &&
+      !column_families_not_found_.empty()) {
+    std::string msg;
+    for (const auto& cf : column_families_not_found_) {
+      msg.append(", ");
+      msg.append(cf.second);
+    }
+    msg = msg.substr(2);
+    *s = Status::InvalidArgument("Column families not opened: " + msg);
+  }
+  if (s->ok()) {
+    version_set_->GetColumnFamilySet()->UpdateMaxColumnFamily(
+        version_edit_params_.max_column_family_);
+    version_set_->MarkMinLogNumberToKeep(
+        version_edit_params_.min_log_number_to_keep_);
+    version_set_->MarkFileNumberUsed(version_edit_params_.prev_log_number_);
+    version_set_->MarkFileNumberUsed(version_edit_params_.log_number_);
+    for (auto* cfd : *(version_set_->GetColumnFamilySet())) {
+      if (cfd->IsDropped()) {
+        continue;
+      }
+      auto builder_iter = builders_.find(cfd->GetID());
+      assert(builder_iter != builders_.end());
+      auto* builder = builder_iter->second->version_builder();
+      if (!builder->CheckConsistencyForNumLevels()) {
+        *s = Status::InvalidArgument(
+            "db has more levels than options.num_levels");
+        break;
+      }
+    }
+  }
+  if (s->ok()) {
+    for (auto* cfd : *(version_set_->GetColumnFamilySet())) {
+      if (cfd->IsDropped()) {
+        continue;
+      }
+      if (read_only_) {
+        cfd->table_cache()->SetTablesAreImmortal();
+      }
+      *s = LoadTables(cfd, /*prefetch_index_and_filter_in_cache=*/false,
+                      /*is_initial_load=*/true);
+      if (!s->ok()) {
+        // If s is IOError::PathNotFound, then we mark the db as corrupted.
+        if (s->IsPathNotFound()) {
+          *s = Status::Corruption("Corruption: " + s->ToString());
+        }
+        break;
+      }
+    }
+  }
+
+  if (s->ok()) {
+    for (auto* cfd : *(version_set_->column_family_set_)) {
+      if (cfd->IsDropped()) {
+        continue;
+      }
+      assert(cfd->initialized());
+      VersionEdit edit;
+      *s = MaybeCreateVersion(edit, cfd, /*force_create_version=*/true);
+      if (!s->ok()) {
+        break;
+      }
+    }
+  }
+  if (s->ok()) {
+    version_set_->manifest_file_size_ = reader.GetReadOffset();
+    assert(version_set_->manifest_file_size_ > 0);
+    version_set_->next_file_number_.store(
+        version_edit_params_.next_file_number_ + 1);
+    SequenceNumber last_seq = version_edit_params_.last_sequence_;
+    assert(last_seq != kMaxSequenceNumber);
+    if (last_seq != kMaxSequenceNumber &&
+        last_seq > version_set_->last_allocated_sequence_.load()) {
+      version_set_->last_allocated_sequence_.store(last_seq);
+    }
+    if (last_seq != kMaxSequenceNumber &&
+        last_seq > version_set_->last_published_sequence_.load()) {
+      version_set_->last_published_sequence_.store(last_seq);
+    }
+    if (last_seq != kMaxSequenceNumber &&
+        last_seq > version_set_->last_sequence_.load()) {
+      version_set_->last_sequence_.store(last_seq);
+    }
+    if (last_seq != kMaxSequenceNumber &&
+        last_seq > version_set_->descriptor_last_sequence_) {
+      // This is the maximum last sequence of all `VersionEdit`s iterated. It
+      // may be greater than the maximum `largest_seqno` of all files in case
+      // the newest data referred to by the MANIFEST has been dropped or had its
+      // sequence number zeroed through compaction.
+      version_set_->descriptor_last_sequence_ = last_seq;
+    }
+    version_set_->prev_log_number_ = version_edit_params_.prev_log_number_;
+  }
+}
+
+ColumnFamilyData* VersionEditHandler::CreateCfAndInit(
+    const ColumnFamilyOptions& cf_options, const VersionEdit& edit) {
+  ColumnFamilyData* cfd =
+      version_set_->CreateColumnFamily(cf_options, read_options_, &edit);
+  assert(cfd != nullptr);
+  cfd->set_initialized();
+  assert(builders_.find(edit.column_family_) == builders_.end());
+  builders_.emplace(edit.column_family_,
+                    VersionBuilderUPtr(new BaseReferencedVersionBuilder(cfd)));
+  if (track_missing_files_) {
+    cf_to_missing_files_.emplace(edit.column_family_,
+                                 std::unordered_set<uint64_t>());
+    cf_to_missing_blob_files_high_.emplace(edit.column_family_,
+                                           kInvalidBlobFileNumber);
+  }
+  return cfd;
+}
+
+ColumnFamilyData* VersionEditHandler::DestroyCfAndCleanup(
+    const VersionEdit& edit) {
+  auto builder_iter = builders_.find(edit.column_family_);
+  assert(builder_iter != builders_.end());
+  builders_.erase(builder_iter);
+  if (track_missing_files_) {
+    auto missing_files_iter = cf_to_missing_files_.find(edit.column_family_);
+    assert(missing_files_iter != cf_to_missing_files_.end());
+    cf_to_missing_files_.erase(missing_files_iter);
+
+    auto missing_blob_files_high_iter =
+        cf_to_missing_blob_files_high_.find(edit.column_family_);
+    assert(missing_blob_files_high_iter !=
+           cf_to_missing_blob_files_high_.end());
+    cf_to_missing_blob_files_high_.erase(missing_blob_files_high_iter);
+  }
+  ColumnFamilyData* ret =
+      version_set_->GetColumnFamilySet()->GetColumnFamily(edit.column_family_);
+  assert(ret != nullptr);
+  ret->SetDropped();
+  ret->UnrefAndTryDelete();
+  ret = nullptr;
+  return ret;
+}
+
+Status VersionEditHandler::MaybeCreateVersion(const VersionEdit& /*edit*/,
+                                              ColumnFamilyData* cfd,
+                                              bool force_create_version) {
+  assert(cfd->initialized());
+  Status s;
+  if (force_create_version) {
+    auto builder_iter = builders_.find(cfd->GetID());
+    assert(builder_iter != builders_.end());
+    auto* builder = builder_iter->second->version_builder();
+    auto* v = new Version(cfd, version_set_, version_set_->file_options_,
+                          *cfd->GetLatestMutableCFOptions(), io_tracer_,
+                          version_set_->current_version_number_++,
+                          epoch_number_requirement_);
+    s = builder->SaveTo(v->storage_info());
+    if (s.ok()) {
+      // Install new version
+      v->PrepareAppend(
+          *cfd->GetLatestMutableCFOptions(), read_options_,
+          !(version_set_->db_options_->skip_stats_update_on_db_open));
+      version_set_->AppendVersion(cfd, v);
+    } else {
+      delete v;
+    }
+  }
+  return s;
+}
+
+Status VersionEditHandler::LoadTables(ColumnFamilyData* cfd,
+                                      bool prefetch_index_and_filter_in_cache,
+                                      bool is_initial_load) {
+  bool skip_load_table_files = skip_load_table_files_;
+  TEST_SYNC_POINT_CALLBACK(
+      "VersionEditHandler::LoadTables:skip_load_table_files",
+      &skip_load_table_files);
+  if (skip_load_table_files) {
+    return Status::OK();
+  }
+  assert(cfd != nullptr);
+  assert(!cfd->IsDropped());
+  auto builder_iter = builders_.find(cfd->GetID());
+  assert(builder_iter != builders_.end());
+  assert(builder_iter->second != nullptr);
+  VersionBuilder* builder = builder_iter->second->version_builder();
+  assert(builder);
+  const MutableCFOptions* moptions = cfd->GetLatestMutableCFOptions();
+  Status s = builder->LoadTableHandlers(
+      cfd->internal_stats(),
+      version_set_->db_options_->max_file_opening_threads,
+      prefetch_index_and_filter_in_cache, is_initial_load,
+      moptions->prefix_extractor, MaxFileSizeForL0MetaPin(*moptions),
+      read_options_, moptions->block_protection_bytes_per_key);
+  if ((s.IsPathNotFound() || s.IsCorruption()) && no_error_if_files_missing_) {
+    s = Status::OK();
+  }
+  if (!s.ok() && !version_set_->db_options_->paranoid_checks) {
+    s = Status::OK();
+  }
+  return s;
+}
+
+Status VersionEditHandler::ExtractInfoFromVersionEdit(ColumnFamilyData* cfd,
+                                                      const VersionEdit& edit) {
+  Status s;
+  if (edit.has_db_id_) {
+    version_set_->db_id_ = edit.GetDbId();
+    version_edit_params_.SetDBId(edit.db_id_);
+  }
+  if (cfd != nullptr) {
+    if (edit.has_log_number_) {
+      if (cfd->GetLogNumber() > edit.log_number_) {
+        ROCKS_LOG_WARN(
+            version_set_->db_options()->info_log,
+            "MANIFEST corruption detected, but ignored - Log numbers in "
+            "records NOT monotonically increasing");
+      } else {
+        cfd->SetLogNumber(edit.log_number_);
+        version_edit_params_.SetLogNumber(edit.log_number_);
+      }
+    }
+    if (edit.has_comparator_ &&
+        edit.comparator_ != cfd->user_comparator()->Name()) {
+      if (!cf_to_cmp_names_) {
+        s = Status::InvalidArgument(
+            cfd->user_comparator()->Name(),
+            "does not match existing comparator " + edit.comparator_);
+      } else {
+        cf_to_cmp_names_->emplace(cfd->GetID(), edit.comparator_);
+      }
+    }
+    if (edit.HasFullHistoryTsLow()) {
+      const std::string& new_ts = edit.GetFullHistoryTsLow();
+      cfd->SetFullHistoryTsLow(new_ts);
+    }
+  }
+
+  if (s.ok()) {
+    if (edit.has_prev_log_number_) {
+      version_edit_params_.SetPrevLogNumber(edit.prev_log_number_);
+    }
+    if (edit.has_next_file_number_) {
+      version_edit_params_.SetNextFile(edit.next_file_number_);
+    }
+    if (edit.has_max_column_family_) {
+      version_edit_params_.SetMaxColumnFamily(edit.max_column_family_);
+    }
+    if (edit.has_min_log_number_to_keep_) {
+      version_edit_params_.min_log_number_to_keep_ =
+          std::max(version_edit_params_.min_log_number_to_keep_,
+                   edit.min_log_number_to_keep_);
+    }
+    if (edit.has_last_sequence_) {
+      // `VersionEdit::last_sequence_`s are assumed to be non-decreasing. This
+      // is legacy behavior that cannot change without breaking downgrade
+      // compatibility.
+      assert(!version_edit_params_.has_last_sequence_ ||
+             version_edit_params_.last_sequence_ <= edit.last_sequence_);
+      version_edit_params_.SetLastSequence(edit.last_sequence_);
+    }
+    if (!version_edit_params_.has_prev_log_number_) {
+      version_edit_params_.SetPrevLogNumber(0);
+    }
+  }
+  return s;
+}
+
+VersionEditHandlerPointInTime::VersionEditHandlerPointInTime(
+    bool read_only, std::vector<ColumnFamilyDescriptor> column_families,
+    VersionSet* version_set, const std::shared_ptr<IOTracer>& io_tracer,
+    const ReadOptions& read_options,
+    EpochNumberRequirement epoch_number_requirement)
+    : VersionEditHandler(read_only, column_families, version_set,
+                         /*track_missing_files=*/true,
+                         /*no_error_if_files_missing=*/true, io_tracer,
+                         read_options, epoch_number_requirement) {}
+
+VersionEditHandlerPointInTime::~VersionEditHandlerPointInTime() {
+  for (const auto& elem : versions_) {
+    delete elem.second;
+  }
+  versions_.clear();
+}
+
+void VersionEditHandlerPointInTime::CheckIterationResult(
+    const log::Reader& reader, Status* s) {
+  VersionEditHandler::CheckIterationResult(reader, s);
+  assert(s != nullptr);
+  if (s->ok()) {
+    for (auto* cfd : *(version_set_->column_family_set_)) {
+      if (cfd->IsDropped()) {
+        continue;
+      }
+      assert(cfd->initialized());
+      auto v_iter = versions_.find(cfd->GetID());
+      if (v_iter != versions_.end()) {
+        assert(v_iter->second != nullptr);
+
+        version_set_->AppendVersion(cfd, v_iter->second);
+        versions_.erase(v_iter);
+      }
+    }
+  } else {
+    for (const auto& elem : versions_) {
+      delete elem.second;
+    }
+    versions_.clear();
+  }
+}
+
+ColumnFamilyData* VersionEditHandlerPointInTime::DestroyCfAndCleanup(
+    const VersionEdit& edit) {
+  ColumnFamilyData* cfd = VersionEditHandler::DestroyCfAndCleanup(edit);
+  auto v_iter = versions_.find(edit.column_family_);
+  if (v_iter != versions_.end()) {
+    delete v_iter->second;
+    versions_.erase(v_iter);
+  }
+  return cfd;
+}
+
+Status VersionEditHandlerPointInTime::MaybeCreateVersion(
+    const VersionEdit& edit, ColumnFamilyData* cfd, bool force_create_version) {
+  assert(cfd != nullptr);
+  if (!force_create_version) {
+    assert(edit.column_family_ == cfd->GetID());
+  }
+  auto missing_files_iter = cf_to_missing_files_.find(cfd->GetID());
+  assert(missing_files_iter != cf_to_missing_files_.end());
+  std::unordered_set<uint64_t>& missing_files = missing_files_iter->second;
+
+  auto missing_blob_files_high_iter =
+      cf_to_missing_blob_files_high_.find(cfd->GetID());
+  assert(missing_blob_files_high_iter != cf_to_missing_blob_files_high_.end());
+  const uint64_t prev_missing_blob_file_high =
+      missing_blob_files_high_iter->second;
+
+  VersionBuilder* builder = nullptr;
+
+  if (prev_missing_blob_file_high != kInvalidBlobFileNumber) {
+    auto builder_iter = builders_.find(cfd->GetID());
+    assert(builder_iter != builders_.end());
+    builder = builder_iter->second->version_builder();
+    assert(builder != nullptr);
+  }
+
+  // At this point, we have not yet applied the new version edits read from the
+  // MANIFEST. We check whether we have any missing table and blob files.
+  const bool prev_has_missing_files =
+      !missing_files.empty() ||
+      (prev_missing_blob_file_high != kInvalidBlobFileNumber &&
+       prev_missing_blob_file_high >= builder->GetMinOldestBlobFileNumber());
+
+  for (const auto& file : edit.GetDeletedFiles()) {
+    uint64_t file_num = file.second;
+    auto fiter = missing_files.find(file_num);
+    if (fiter != missing_files.end()) {
+      missing_files.erase(fiter);
+    }
+  }
+
+  assert(!cfd->ioptions()->cf_paths.empty());
+  Status s;
+  for (const auto& elem : edit.GetNewFiles()) {
+    int level = elem.first;
+    const FileMetaData& meta = elem.second;
+    const FileDescriptor& fd = meta.fd;
+    uint64_t file_num = fd.GetNumber();
+    const std::string fpath =
+        MakeTableFileName(cfd->ioptions()->cf_paths[0].path, file_num);
+    s = VerifyFile(cfd, fpath, level, meta);
+    if (s.IsPathNotFound() || s.IsNotFound() || s.IsCorruption()) {
+      missing_files.insert(file_num);
+      s = Status::OK();
+    } else if (!s.ok()) {
+      break;
+    }
+  }
+
+  uint64_t missing_blob_file_num = prev_missing_blob_file_high;
+  for (const auto& elem : edit.GetBlobFileAdditions()) {
+    uint64_t file_num = elem.GetBlobFileNumber();
+    s = VerifyBlobFile(cfd, file_num, elem);
+    if (s.IsPathNotFound() || s.IsNotFound() || s.IsCorruption()) {
+      missing_blob_file_num = std::max(missing_blob_file_num, file_num);
+      s = Status::OK();
+    } else if (!s.ok()) {
+      break;
+    }
+  }
+
+  bool has_missing_blob_files = false;
+  if (missing_blob_file_num != kInvalidBlobFileNumber &&
+      missing_blob_file_num >= prev_missing_blob_file_high) {
+    missing_blob_files_high_iter->second = missing_blob_file_num;
+    has_missing_blob_files = true;
+  } else if (missing_blob_file_num < prev_missing_blob_file_high) {
+    assert(false);
+  }
+
+  // We still have not applied the new version edit, but have tried to add new
+  // table and blob files after verifying their presence and consistency.
+  // Therefore, we know whether we will see new missing table and blob files
+  // later after actually applying the version edit. We perform the check here
+  // and record the result.
+  const bool has_missing_files =
+      !missing_files.empty() || has_missing_blob_files;
+
+  bool missing_info = !version_edit_params_.has_log_number_ ||
+                      !version_edit_params_.has_next_file_number_ ||
+                      !version_edit_params_.has_last_sequence_;
+
+  // Create version before apply edit. The version will represent the state
+  // before applying the version edit.
+  // A new version will created if:
+  // 1) no error has occurred so far, and
+  // 2) log_number_, next_file_number_ and last_sequence_ are known, and
+  // 3) any of the following:
+  //   a) no missing file before, but will have missing file(s) after applying
+  //      this version edit.
+  //   b) no missing file after applying the version edit, and the caller
+  //      explicitly request that a new version be created.
+  if (s.ok() && !missing_info &&
+      ((has_missing_files && !prev_has_missing_files) ||
+       (!has_missing_files && force_create_version))) {
+    if (!builder) {
+      auto builder_iter = builders_.find(cfd->GetID());
+      assert(builder_iter != builders_.end());
+      builder = builder_iter->second->version_builder();
+      assert(builder);
+    }
+
+    const MutableCFOptions* cf_opts_ptr = cfd->GetLatestMutableCFOptions();
+    auto* version = new Version(cfd, version_set_, version_set_->file_options_,
+                                *cf_opts_ptr, io_tracer_,
+                                version_set_->current_version_number_++,
+                                epoch_number_requirement_);
+    s = builder->LoadTableHandlers(
+        cfd->internal_stats(),
+        version_set_->db_options_->max_file_opening_threads, false, true,
+        cf_opts_ptr->prefix_extractor, MaxFileSizeForL0MetaPin(*cf_opts_ptr),
+        read_options_, cf_opts_ptr->block_protection_bytes_per_key);
+    if (!s.ok()) {
+      delete version;
+      if (s.IsCorruption()) {
+        s = Status::OK();
+      }
+      return s;
+    }
+    s = builder->SaveTo(version->storage_info());
+    if (s.ok()) {
+      version->PrepareAppend(
+          *cfd->GetLatestMutableCFOptions(), read_options_,
+          !version_set_->db_options_->skip_stats_update_on_db_open);
+      auto v_iter = versions_.find(cfd->GetID());
+      if (v_iter != versions_.end()) {
+        delete v_iter->second;
+        v_iter->second = version;
+      } else {
+        versions_.emplace(cfd->GetID(), version);
+      }
+    } else {
+      delete version;
+    }
+  }
+  return s;
+}
+
+Status VersionEditHandlerPointInTime::VerifyFile(ColumnFamilyData* cfd,
+                                                 const std::string& fpath,
+                                                 int level,
+                                                 const FileMetaData& fmeta) {
+  return version_set_->VerifyFileMetadata(read_options_, cfd, fpath, level,
+                                          fmeta);
+}
+
+Status VersionEditHandlerPointInTime::VerifyBlobFile(
+    ColumnFamilyData* cfd, uint64_t blob_file_num,
+    const BlobFileAddition& blob_addition) {
+  BlobSource* blob_source = cfd->blob_source();
+  assert(blob_source);
+  CacheHandleGuard<BlobFileReader> blob_file_reader;
+
+  Status s = blob_source->GetBlobFileReader(read_options_, blob_file_num,
+                                            &blob_file_reader);
+  if (!s.ok()) {
+    return s;
+  }
+  // TODO: verify checksum
+  (void)blob_addition;
+  return s;
+}
+
+Status VersionEditHandlerPointInTime::LoadTables(
+    ColumnFamilyData* /*cfd*/, bool /*prefetch_index_and_filter_in_cache*/,
+    bool /*is_initial_load*/) {
+  return Status::OK();
+}
+
+Status ManifestTailer::Initialize() {
+  if (Mode::kRecovery == mode_) {
+    return VersionEditHandler::Initialize();
+  }
+  assert(Mode::kCatchUp == mode_);
+  Status s;
+  if (!initialized_) {
+    ColumnFamilySet* cfd_set = version_set_->GetColumnFamilySet();
+    assert(cfd_set);
+    ColumnFamilyData* default_cfd = cfd_set->GetDefault();
+    assert(default_cfd);
+    auto builder_iter = builders_.find(default_cfd->GetID());
+    assert(builder_iter != builders_.end());
+
+    Version* dummy_version = default_cfd->dummy_versions();
+    assert(dummy_version);
+    Version* base_version = dummy_version->Next();
+    assert(base_version);
+    base_version->Ref();
+    VersionBuilderUPtr new_builder(
+        new BaseReferencedVersionBuilder(default_cfd, base_version));
+    builder_iter->second = std::move(new_builder);
+
+    initialized_ = true;
+  }
+  return s;
+}
+
+Status ManifestTailer::ApplyVersionEdit(VersionEdit& edit,
+                                        ColumnFamilyData** cfd) {
+  Status s = VersionEditHandler::ApplyVersionEdit(edit, cfd);
+  if (s.ok()) {
+    assert(cfd);
+    if (*cfd) {
+      cfds_changed_.insert(*cfd);
+    }
+  }
+  return s;
+}
+
+Status ManifestTailer::OnColumnFamilyAdd(VersionEdit& edit,
+                                         ColumnFamilyData** cfd) {
+  if (Mode::kRecovery == mode_) {
+    return VersionEditHandler::OnColumnFamilyAdd(edit, cfd);
+  }
+  assert(Mode::kCatchUp == mode_);
+  ColumnFamilySet* cfd_set = version_set_->GetColumnFamilySet();
+  assert(cfd_set);
+  ColumnFamilyData* tmp_cfd = cfd_set->GetColumnFamily(edit.GetColumnFamily());
+  assert(cfd);
+  *cfd = tmp_cfd;
+  if (!tmp_cfd) {
+    // For now, ignore new column families created after Recover() succeeds.
+    return Status::OK();
+  }
+  auto builder_iter = builders_.find(edit.GetColumnFamily());
+  assert(builder_iter != builders_.end());
+
+  Version* dummy_version = tmp_cfd->dummy_versions();
+  assert(dummy_version);
+  Version* base_version = dummy_version->Next();
+  assert(base_version);
+  base_version->Ref();
+  VersionBuilderUPtr new_builder(
+      new BaseReferencedVersionBuilder(tmp_cfd, base_version));
+  builder_iter->second = std::move(new_builder);
+
+#ifndef NDEBUG
+  auto version_iter = versions_.find(edit.GetColumnFamily());
+  assert(version_iter == versions_.end());
+#endif  // !NDEBUG
+  return Status::OK();
+}
+
+void ManifestTailer::CheckIterationResult(const log::Reader& reader,
+                                          Status* s) {
+  VersionEditHandlerPointInTime::CheckIterationResult(reader, s);
+  assert(s);
+  if (s->ok()) {
+    if (Mode::kRecovery == mode_) {
+      mode_ = Mode::kCatchUp;
+    } else {
+      assert(Mode::kCatchUp == mode_);
+    }
+  }
+}
+
+Status ManifestTailer::VerifyFile(ColumnFamilyData* cfd,
+                                  const std::string& fpath, int level,
+                                  const FileMetaData& fmeta) {
+  Status s =
+      VersionEditHandlerPointInTime::VerifyFile(cfd, fpath, level, fmeta);
+  // TODO: Open file or create hard link to prevent the file from being
+  // deleted.
+  return s;
+}
+
+void DumpManifestHandler::CheckIterationResult(const log::Reader& reader,
+                                               Status* s) {
+  VersionEditHandler::CheckIterationResult(reader, s);
+  if (!s->ok()) {
+    fprintf(stdout, "%s\n", s->ToString().c_str());
+    return;
+  }
+  assert(cf_to_cmp_names_);
+  for (auto* cfd : *(version_set_->column_family_set_)) {
+    fprintf(stdout,
+            "--------------- Column family \"%s\"  (ID %" PRIu32
+            ") --------------\n",
+            cfd->GetName().c_str(), cfd->GetID());
+    fprintf(stdout, "log number: %" PRIu64 "\n", cfd->GetLogNumber());
+    auto it = cf_to_cmp_names_->find(cfd->GetID());
+    if (it != cf_to_cmp_names_->end()) {
+      fprintf(stdout,
+              "comparator: <%s>, but the comparator object is not available.\n",
+              it->second.c_str());
+    } else {
+      fprintf(stdout, "comparator: %s\n", cfd->user_comparator()->Name());
+    }
+    assert(cfd->current());
+
+    // Print out DebugStrings. Can include non-terminating null characters.
+    fwrite(cfd->current()->DebugString(hex_).data(), sizeof(char),
+           cfd->current()->DebugString(hex_).size(), stdout);
+  }
+  fprintf(stdout,
+          "next_file_number %" PRIu64 " last_sequence %" PRIu64
+          "  prev_log_number %" PRIu64 " max_column_family %" PRIu32
+          " min_log_number_to_keep %" PRIu64 "\n",
+          version_set_->current_next_file_number(),
+          version_set_->LastSequence(), version_set_->prev_log_number(),
+          version_set_->column_family_set_->GetMaxColumnFamily(),
+          version_set_->min_log_number_to_keep());
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit_handler.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit_handler.h
new file mode 100644
index 0000000000..4b9f195420
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_edit_handler.h
@@ -0,0 +1,334 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "db/version_builder.h"
+#include "db/version_edit.h"
+#include "db/version_set.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct FileMetaData;
+
+class VersionEditHandlerBase {
+ public:
+  explicit VersionEditHandlerBase(const ReadOptions& read_options)
+      : read_options_(read_options),
+        max_manifest_read_size_(std::numeric_limits<uint64_t>::max()) {}
+
+  virtual ~VersionEditHandlerBase() {}
+
+  void Iterate(log::Reader& reader, Status* log_read_status);
+
+  const Status& status() const { return status_; }
+
+  AtomicGroupReadBuffer& GetReadBuffer() { return read_buffer_; }
+
+ protected:
+  explicit VersionEditHandlerBase(const ReadOptions& read_options,
+                                  uint64_t max_read_size)
+      : read_options_(read_options), max_manifest_read_size_(max_read_size) {}
+  virtual Status Initialize() { return Status::OK(); }
+
+  virtual Status ApplyVersionEdit(VersionEdit& edit,
+                                  ColumnFamilyData** cfd) = 0;
+
+  virtual void CheckIterationResult(const log::Reader& /*reader*/,
+                                    Status* /*s*/) {}
+
+  void ClearReadBuffer() { read_buffer_.Clear(); }
+
+  Status status_;
+
+  const ReadOptions& read_options_;
+
+ private:
+  AtomicGroupReadBuffer read_buffer_;
+  const uint64_t max_manifest_read_size_;
+};
+
+class ListColumnFamiliesHandler : public VersionEditHandlerBase {
+ public:
+  explicit ListColumnFamiliesHandler(const ReadOptions& read_options)
+      : VersionEditHandlerBase(read_options) {}
+
+  ~ListColumnFamiliesHandler() override {}
+
+  const std::map<uint32_t, std::string> GetColumnFamilyNames() const {
+    return column_family_names_;
+  }
+
+ protected:
+  Status ApplyVersionEdit(VersionEdit& edit,
+                          ColumnFamilyData** /*unused*/) override;
+
+ private:
+  // default column family is always implicitly there
+  std::map<uint32_t, std::string> column_family_names_{
+      {0, kDefaultColumnFamilyName}};
+};
+
+class FileChecksumRetriever : public VersionEditHandlerBase {
+ public:
+  FileChecksumRetriever(const ReadOptions& read_options, uint64_t max_read_size,
+                        FileChecksumList& file_checksum_list)
+      : VersionEditHandlerBase(read_options, max_read_size),
+        file_checksum_list_(file_checksum_list) {}
+
+  ~FileChecksumRetriever() override {}
+
+ protected:
+  Status ApplyVersionEdit(VersionEdit& edit,
+                          ColumnFamilyData** /*unused*/) override;
+
+ private:
+  FileChecksumList& file_checksum_list_;
+};
+
+using VersionBuilderUPtr = std::unique_ptr<BaseReferencedVersionBuilder>;
+
+// A class used for scanning MANIFEST file.
+// VersionEditHandler reads a MANIFEST file, parses the version edits, and
+// builds the version set's in-memory state, e.g. the version storage info for
+// the versions of column families.
+// To use this class and its subclasses,
+// 1. Create an object of VersionEditHandler or its subclasses.
+//    VersionEditHandler handler(read_only, column_families, version_set,
+//                               track_missing_files,
+//                               no_error_if_files_missing);
+// 2. Status s = handler.Iterate(reader, &db_id);
+// 3. Check s and handle possible errors.
+//
+// Not thread-safe, external synchronization is necessary if an object of
+// VersionEditHandler is shared by multiple threads.
+class VersionEditHandler : public VersionEditHandlerBase {
+ public:
+  explicit VersionEditHandler(
+      bool read_only,
+      const std::vector<ColumnFamilyDescriptor>& column_families,
+      VersionSet* version_set, bool track_missing_files,
+      bool no_error_if_files_missing,
+      const std::shared_ptr<IOTracer>& io_tracer,
+      const ReadOptions& read_options,
+      EpochNumberRequirement epoch_number_requirement =
+          EpochNumberRequirement::kMustPresent)
+      : VersionEditHandler(
+            read_only, column_families, version_set, track_missing_files,
+            no_error_if_files_missing, io_tracer, read_options,
+            /*skip_load_table_files=*/false, epoch_number_requirement) {}
+
+  ~VersionEditHandler() override {}
+
+  const VersionEditParams& GetVersionEditParams() const {
+    return version_edit_params_;
+  }
+
+  bool HasMissingFiles() const;
+
+  void GetDbId(std::string* db_id) const {
+    if (db_id && version_edit_params_.has_db_id_) {
+      *db_id = version_edit_params_.db_id_;
+    }
+  }
+
+ protected:
+  explicit VersionEditHandler(
+      bool read_only, std::vector<ColumnFamilyDescriptor> column_families,
+      VersionSet* version_set, bool track_missing_files,
+      bool no_error_if_files_missing,
+      const std::shared_ptr<IOTracer>& io_tracer,
+      const ReadOptions& read_options, bool skip_load_table_files,
+      EpochNumberRequirement epoch_number_requirement =
+          EpochNumberRequirement::kMustPresent);
+
+  Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) override;
+
+  virtual Status OnColumnFamilyAdd(VersionEdit& edit, ColumnFamilyData** cfd);
+
+  Status OnColumnFamilyDrop(VersionEdit& edit, ColumnFamilyData** cfd);
+
+  Status OnNonCfOperation(VersionEdit& edit, ColumnFamilyData** cfd);
+
+  Status OnWalAddition(VersionEdit& edit);
+
+  Status OnWalDeletion(VersionEdit& edit);
+
+  Status Initialize() override;
+
+  void CheckColumnFamilyId(const VersionEdit& edit, bool* cf_in_not_found,
+                           bool* cf_in_builders) const;
+
+  void CheckIterationResult(const log::Reader& reader, Status* s) override;
+
+  ColumnFamilyData* CreateCfAndInit(const ColumnFamilyOptions& cf_options,
+                                    const VersionEdit& edit);
+
+  virtual ColumnFamilyData* DestroyCfAndCleanup(const VersionEdit& edit);
+
+  virtual Status MaybeCreateVersion(const VersionEdit& edit,
+                                    ColumnFamilyData* cfd,
+                                    bool force_create_version);
+
+  virtual Status LoadTables(ColumnFamilyData* cfd,
+                            bool prefetch_index_and_filter_in_cache,
+                            bool is_initial_load);
+
+  virtual bool MustOpenAllColumnFamilies() const { return !read_only_; }
+
+  const bool read_only_;
+  std::vector<ColumnFamilyDescriptor> column_families_;
+  VersionSet* version_set_;
+  std::unordered_map<uint32_t, VersionBuilderUPtr> builders_;
+  std::unordered_map<std::string, ColumnFamilyOptions> name_to_options_;
+  // Keeps track of column families in manifest that were not found in
+  // column families parameters. if those column families are not dropped
+  // by subsequent manifest records, Recover() will return failure status.
+  std::unordered_map<uint32_t, std::string> column_families_not_found_;
+  VersionEditParams version_edit_params_;
+  const bool track_missing_files_;
+  std::unordered_map<uint32_t, std::unordered_set<uint64_t>>
+      cf_to_missing_files_;
+  std::unordered_map<uint32_t, uint64_t> cf_to_missing_blob_files_high_;
+  bool no_error_if_files_missing_;
+  std::shared_ptr<IOTracer> io_tracer_;
+  bool skip_load_table_files_;
+  bool initialized_;
+  std::unique_ptr<std::unordered_map<uint32_t, std::string>> cf_to_cmp_names_;
+  EpochNumberRequirement epoch_number_requirement_;
+
+ private:
+  Status ExtractInfoFromVersionEdit(ColumnFamilyData* cfd,
+                                    const VersionEdit& edit);
+};
+
+// A class similar to its base class, i.e. VersionEditHandler.
+// VersionEditHandlerPointInTime restores the versions to the most recent point
+// in time such that at this point, the version does not have missing files.
+//
+// Not thread-safe, external synchronization is necessary if an object of
+// VersionEditHandlerPointInTime is shared by multiple threads.
+class VersionEditHandlerPointInTime : public VersionEditHandler {
+ public:
+  VersionEditHandlerPointInTime(
+      bool read_only, std::vector<ColumnFamilyDescriptor> column_families,
+      VersionSet* version_set, const std::shared_ptr<IOTracer>& io_tracer,
+      const ReadOptions& read_options,
+      EpochNumberRequirement epoch_number_requirement =
+          EpochNumberRequirement::kMustPresent);
+  ~VersionEditHandlerPointInTime() override;
+
+ protected:
+  void CheckIterationResult(const log::Reader& reader, Status* s) override;
+  ColumnFamilyData* DestroyCfAndCleanup(const VersionEdit& edit) override;
+  Status MaybeCreateVersion(const VersionEdit& edit, ColumnFamilyData* cfd,
+                            bool force_create_version) override;
+  virtual Status VerifyFile(ColumnFamilyData* cfd, const std::string& fpath,
+                            int level, const FileMetaData& fmeta);
+  virtual Status VerifyBlobFile(ColumnFamilyData* cfd, uint64_t blob_file_num,
+                                const BlobFileAddition& blob_addition);
+
+  Status LoadTables(ColumnFamilyData* cfd,
+                    bool prefetch_index_and_filter_in_cache,
+                    bool is_initial_load) override;
+
+  std::unordered_map<uint32_t, Version*> versions_;
+};
+
+class ManifestTailer : public VersionEditHandlerPointInTime {
+ public:
+  explicit ManifestTailer(std::vector<ColumnFamilyDescriptor> column_families,
+                          VersionSet* version_set,
+                          const std::shared_ptr<IOTracer>& io_tracer,
+                          const ReadOptions& read_options,
+                          EpochNumberRequirement epoch_number_requirement =
+                              EpochNumberRequirement::kMustPresent)
+      : VersionEditHandlerPointInTime(/*read_only=*/false, column_families,
+                                      version_set, io_tracer, read_options,
+                                      epoch_number_requirement),
+        mode_(Mode::kRecovery) {}
+
+  void PrepareToReadNewManifest() {
+    initialized_ = false;
+    ClearReadBuffer();
+  }
+
+  std::unordered_set<ColumnFamilyData*>& GetUpdatedColumnFamilies() {
+    return cfds_changed_;
+  }
+
+ protected:
+  Status Initialize() override;
+
+  bool MustOpenAllColumnFamilies() const override { return false; }
+
+  Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) override;
+
+  Status OnColumnFamilyAdd(VersionEdit& edit, ColumnFamilyData** cfd) override;
+
+  void CheckIterationResult(const log::Reader& reader, Status* s) override;
+
+  Status VerifyFile(ColumnFamilyData* cfd, const std::string& fpath, int level,
+                    const FileMetaData& fmeta) override;
+
+  enum Mode : uint8_t {
+    kRecovery = 0,
+    kCatchUp = 1,
+  };
+
+  Mode mode_;
+  std::unordered_set<ColumnFamilyData*> cfds_changed_;
+};
+
+class DumpManifestHandler : public VersionEditHandler {
+ public:
+  DumpManifestHandler(std::vector<ColumnFamilyDescriptor> column_families,
+                      VersionSet* version_set,
+                      const std::shared_ptr<IOTracer>& io_tracer,
+                      const ReadOptions& read_options, bool verbose, bool hex,
+                      bool json)
+      : VersionEditHandler(
+            /*read_only=*/true, column_families, version_set,
+            /*track_missing_files=*/false,
+            /*no_error_if_files_missing=*/false, io_tracer, read_options,
+            /*skip_load_table_files=*/true),
+        verbose_(verbose),
+        hex_(hex),
+        json_(json),
+        count_(0) {
+    cf_to_cmp_names_.reset(new std::unordered_map<uint32_t, std::string>());
+  }
+
+  ~DumpManifestHandler() override {}
+
+  Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) override {
+    // Write out each individual edit
+    if (verbose_ && !json_) {
+      // Print out DebugStrings. Can include non-terminating null characters.
+      fwrite(edit.DebugString(hex_).data(), sizeof(char),
+             edit.DebugString(hex_).size(), stdout);
+    } else if (json_) {
+      // Print out DebugStrings. Can include non-terminating null characters.
+      fwrite(edit.DebugString(hex_).data(), sizeof(char),
+             edit.DebugString(hex_).size(), stdout);
+    }
+    ++count_;
+    return VersionEditHandler::ApplyVersionEdit(edit, cfd);
+  }
+
+  void CheckIterationResult(const log::Reader& reader, Status* s) override;
+
+ private:
+  const bool verbose_;
+  const bool hex_;
+  const bool json_;
+  int count_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_set.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_set.cc
new file mode 100644
index 0000000000..674c0e4aa9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_set.cc
@@ -0,0 +1,7286 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/version_set.h"
+
+#include <algorithm>
+#include <array>
+#include <cinttypes>
+#include <cstdio>
+#include <list>
+#include <map>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "db/blob/blob_fetcher.h"
+#include "db/blob/blob_file_cache.h"
+#include "db/blob/blob_file_reader.h"
+#include "db/blob/blob_log_format.h"
+#include "db/blob/blob_source.h"
+#include "db/compaction/compaction.h"
+#include "db/compaction/file_pri.h"
+#include "db/dbformat.h"
+#include "db/internal_stats.h"
+#include "db/log_reader.h"
+#include "db/log_writer.h"
+#include "db/memtable.h"
+#include "db/merge_context.h"
+#include "db/merge_helper.h"
+#include "db/pinned_iterators_manager.h"
+#include "db/table_cache.h"
+#include "db/version_builder.h"
+#include "db/version_edit.h"
+#include "db/version_edit_handler.h"
+#include "table/compaction_merging_iterator.h"
+
+#if USE_COROUTINES
+#include "folly/experimental/coro/BlockingWait.h"
+#include "folly/experimental/coro/Collect.h"
+#endif
+#include "file/filename.h"
+#include "file/random_access_file_reader.h"
+#include "file/read_write_util.h"
+#include "file/writable_file_writer.h"
+#include "logging/logging.h"
+#include "monitoring/file_read_sample.h"
+#include "monitoring/perf_context_imp.h"
+#include "monitoring/persistent_stats_history.h"
+#include "options/options_helper.h"
+#include "rocksdb/env.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/write_buffer_manager.h"
+#include "table/format.h"
+#include "table/get_context.h"
+#include "table/internal_iterator.h"
+#include "table/merging_iterator.h"
+#include "table/meta_blocks.h"
+#include "table/multiget_context.h"
+#include "table/plain/plain_table_factory.h"
+#include "table/table_reader.h"
+#include "table/two_level_iterator.h"
+#include "table/unique_id_impl.h"
+#include "test_util/sync_point.h"
+#include "util/cast_util.h"
+#include "util/coding.h"
+#include "util/coro_utils.h"
+#include "util/stop_watch.h"
+#include "util/string_util.h"
+#include "util/user_comparator_wrapper.h"
+
+// Generate the regular and coroutine versions of some methods by
+// including version_set_sync_and_async.h twice
+// Macros in the header will expand differently based on whether
+// WITH_COROUTINES or WITHOUT_COROUTINES is defined
+// clang-format off
+#define WITHOUT_COROUTINES
+#include "db/version_set_sync_and_async.h"
+#undef WITHOUT_COROUTINES
+#define WITH_COROUTINES
+#include "db/version_set_sync_and_async.h"
+#undef WITH_COROUTINES
+// clang-format on
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+// Find File in LevelFilesBrief data structure
+// Within an index range defined by left and right
+int FindFileInRange(const InternalKeyComparator& icmp,
+                    const LevelFilesBrief& file_level, const Slice& key,
+                    uint32_t left, uint32_t right) {
+  auto cmp = [&](const FdWithKeyRange& f, const Slice& k) -> bool {
+    return icmp.InternalKeyComparator::Compare(f.largest_key, k) < 0;
+  };
+  const auto& b = file_level.files;
+  return static_cast<int>(std::lower_bound(b + left, b + right, key, cmp) - b);
+}
+
+Status OverlapWithIterator(const Comparator* ucmp,
+                           const Slice& smallest_user_key,
+                           const Slice& largest_user_key,
+                           InternalIterator* iter, bool* overlap) {
+  InternalKey range_start(smallest_user_key, kMaxSequenceNumber,
+                          kValueTypeForSeek);
+  iter->Seek(range_start.Encode());
+  if (!iter->status().ok()) {
+    return iter->status();
+  }
+
+  *overlap = false;
+  if (iter->Valid()) {
+    ParsedInternalKey seek_result;
+    Status s = ParseInternalKey(iter->key(), &seek_result,
+                                false /* log_err_key */);  // TODO
+    if (!s.ok()) return s;
+
+    if (ucmp->CompareWithoutTimestamp(seek_result.user_key, largest_user_key) <=
+        0) {
+      *overlap = true;
+    }
+  }
+
+  return iter->status();
+}
+
+// Class to help choose the next file to search for the particular key.
+// Searches and returns files level by level.
+// We can search level-by-level since entries never hop across
+// levels. Therefore we are guaranteed that if we find data
+// in a smaller level, later levels are irrelevant (unless we
+// are MergeInProgress).
+class FilePicker {
+ public:
+  FilePicker(const Slice& user_key, const Slice& ikey,
+             autovector<LevelFilesBrief>* file_levels, unsigned int num_levels,
+             FileIndexer* file_indexer, const Comparator* user_comparator,
+             const InternalKeyComparator* internal_comparator)
+      : num_levels_(num_levels),
+        curr_level_(static_cast<unsigned int>(-1)),
+        returned_file_level_(static_cast<unsigned int>(-1)),
+        hit_file_level_(static_cast<unsigned int>(-1)),
+        search_left_bound_(0),
+        search_right_bound_(FileIndexer::kLevelMaxIndex),
+        level_files_brief_(file_levels),
+        is_hit_file_last_in_level_(false),
+        curr_file_level_(nullptr),
+        user_key_(user_key),
+        ikey_(ikey),
+        file_indexer_(file_indexer),
+        user_comparator_(user_comparator),
+        internal_comparator_(internal_comparator) {
+    // Setup member variables to search first level.
+    search_ended_ = !PrepareNextLevel();
+    if (!search_ended_) {
+      // Prefetch Level 0 table data to avoid cache miss if possible.
+      for (unsigned int i = 0; i < (*level_files_brief_)[0].num_files; ++i) {
+        auto* r = (*level_files_brief_)[0].files[i].fd.table_reader;
+        if (r) {
+          r->Prepare(ikey);
+        }
+      }
+    }
+  }
+
+  int GetCurrentLevel() const { return curr_level_; }
+
+  FdWithKeyRange* GetNextFile() {
+    while (!search_ended_) {  // Loops over different levels.
+      while (curr_index_in_curr_level_ < curr_file_level_->num_files) {
+        // Loops over all files in current level.
+        FdWithKeyRange* f = &curr_file_level_->files[curr_index_in_curr_level_];
+        hit_file_level_ = curr_level_;
+        is_hit_file_last_in_level_ =
+            curr_index_in_curr_level_ == curr_file_level_->num_files - 1;
+        int cmp_largest = -1;
+
+        // Do key range filtering of files or/and fractional cascading if:
+        // (1) not all the files are in level 0, or
+        // (2) there are more than 3 current level files
+        // If there are only 3 or less current level files in the system, we
+        // skip the key range filtering. In this case, more likely, the system
+        // is highly tuned to minimize number of tables queried by each query,
+        // so it is unlikely that key range filtering is more efficient than
+        // querying the files.
+        if (num_levels_ > 1 || curr_file_level_->num_files > 3) {
+          // Check if key is within a file's range. If search left bound and
+          // right bound point to the same find, we are sure key falls in
+          // range.
+          assert(curr_level_ == 0 ||
+                 curr_index_in_curr_level_ == start_index_in_curr_level_ ||
+                 user_comparator_->CompareWithoutTimestamp(
+                     user_key_, ExtractUserKey(f->smallest_key)) <= 0);
+
+          int cmp_smallest = user_comparator_->CompareWithoutTimestamp(
+              user_key_, ExtractUserKey(f->smallest_key));
+          if (cmp_smallest >= 0) {
+            cmp_largest = user_comparator_->CompareWithoutTimestamp(
+                user_key_, ExtractUserKey(f->largest_key));
+          }
+
+          // Setup file search bound for the next level based on the
+          // comparison results
+          if (curr_level_ > 0) {
+            file_indexer_->GetNextLevelIndex(
+                curr_level_, curr_index_in_curr_level_, cmp_smallest,
+                cmp_largest, &search_left_bound_, &search_right_bound_);
+          }
+          // Key falls out of current file's range
+          if (cmp_smallest < 0 || cmp_largest > 0) {
+            if (curr_level_ == 0) {
+              ++curr_index_in_curr_level_;
+              continue;
+            } else {
+              // Search next level.
+              break;
+            }
+          }
+        }
+
+        returned_file_level_ = curr_level_;
+        if (curr_level_ > 0 && cmp_largest < 0) {
+          // No more files to search in this level.
+          search_ended_ = !PrepareNextLevel();
+        } else {
+          ++curr_index_in_curr_level_;
+        }
+        return f;
+      }
+      // Start searching next level.
+      search_ended_ = !PrepareNextLevel();
+    }
+    // Search ended.
+    return nullptr;
+  }
+
+  // getter for current file level
+  // for GET_HIT_L0, GET_HIT_L1 & GET_HIT_L2_AND_UP counts
+  unsigned int GetHitFileLevel() { return hit_file_level_; }
+
+  // Returns true if the most recent "hit file" (i.e., one returned by
+  // GetNextFile()) is at the last index in its level.
+  bool IsHitFileLastInLevel() { return is_hit_file_last_in_level_; }
+
+ private:
+  unsigned int num_levels_;
+  unsigned int curr_level_;
+  unsigned int returned_file_level_;
+  unsigned int hit_file_level_;
+  int32_t search_left_bound_;
+  int32_t search_right_bound_;
+  autovector<LevelFilesBrief>* level_files_brief_;
+  bool search_ended_;
+  bool is_hit_file_last_in_level_;
+  LevelFilesBrief* curr_file_level_;
+  unsigned int curr_index_in_curr_level_;
+  unsigned int start_index_in_curr_level_;
+  Slice user_key_;
+  Slice ikey_;
+  FileIndexer* file_indexer_;
+  const Comparator* user_comparator_;
+  const InternalKeyComparator* internal_comparator_;
+
+  // Setup local variables to search next level.
+  // Returns false if there are no more levels to search.
+  bool PrepareNextLevel() {
+    curr_level_++;
+    while (curr_level_ < num_levels_) {
+      curr_file_level_ = &(*level_files_brief_)[curr_level_];
+      if (curr_file_level_->num_files == 0) {
+        // When current level is empty, the search bound generated from upper
+        // level must be [0, -1] or [0, FileIndexer::kLevelMaxIndex] if it is
+        // also empty.
+        assert(search_left_bound_ == 0);
+        assert(search_right_bound_ == -1 ||
+               search_right_bound_ == FileIndexer::kLevelMaxIndex);
+        // Since current level is empty, it will need to search all files in
+        // the next level
+        search_left_bound_ = 0;
+        search_right_bound_ = FileIndexer::kLevelMaxIndex;
+        curr_level_++;
+        continue;
+      }
+
+      // Some files may overlap each other. We find
+      // all files that overlap user_key and process them in order from
+      // newest to oldest. In the context of merge-operator, this can occur at
+      // any level. Otherwise, it only occurs at Level-0 (since Put/Deletes
+      // are always compacted into a single entry).
+      int32_t start_index;
+      if (curr_level_ == 0) {
+        // On Level-0, we read through all files to check for overlap.
+        start_index = 0;
+      } else {
+        // On Level-n (n>=1), files are sorted. Binary search to find the
+        // earliest file whose largest key >= ikey. Search left bound and
+        // right bound are used to narrow the range.
+        if (search_left_bound_ <= search_right_bound_) {
+          if (search_right_bound_ == FileIndexer::kLevelMaxIndex) {
+            search_right_bound_ =
+                static_cast<int32_t>(curr_file_level_->num_files) - 1;
+          }
+          // `search_right_bound_` is an inclusive upper-bound, but since it was
+          // determined based on user key, it is still possible the lookup key
+          // falls to the right of `search_right_bound_`'s corresponding file.
+          // So, pass a limit one higher, which allows us to detect this case.
+          start_index =
+              FindFileInRange(*internal_comparator_, *curr_file_level_, ikey_,
+                              static_cast<uint32_t>(search_left_bound_),
+                              static_cast<uint32_t>(search_right_bound_) + 1);
+          if (start_index == search_right_bound_ + 1) {
+            // `ikey_` comes after `search_right_bound_`. The lookup key does
+            // not exist on this level, so let's skip this level and do a full
+            // binary search on the next level.
+            search_left_bound_ = 0;
+            search_right_bound_ = FileIndexer::kLevelMaxIndex;
+            curr_level_++;
+            continue;
+          }
+        } else {
+          // search_left_bound > search_right_bound, key does not exist in
+          // this level. Since no comparison is done in this level, it will
+          // need to search all files in the next level.
+          search_left_bound_ = 0;
+          search_right_bound_ = FileIndexer::kLevelMaxIndex;
+          curr_level_++;
+          continue;
+        }
+      }
+      start_index_in_curr_level_ = start_index;
+      curr_index_in_curr_level_ = start_index;
+
+      return true;
+    }
+    // curr_level_ = num_levels_. So, no more levels to search.
+    return false;
+  }
+};
+}  // anonymous namespace
+
+class FilePickerMultiGet {
+ private:
+  struct FilePickerContext;
+
+ public:
+  FilePickerMultiGet(MultiGetRange* range,
+                     autovector<LevelFilesBrief>* file_levels,
+                     unsigned int num_levels, FileIndexer* file_indexer,
+                     const Comparator* user_comparator,
+                     const InternalKeyComparator* internal_comparator)
+      : num_levels_(num_levels),
+        curr_level_(static_cast<unsigned int>(-1)),
+        returned_file_level_(static_cast<unsigned int>(-1)),
+        hit_file_level_(static_cast<unsigned int>(-1)),
+        range_(*range, range->begin(), range->end()),
+        maybe_repeat_key_(false),
+        current_level_range_(*range, range->begin(), range->end()),
+        current_file_range_(*range, range->begin(), range->end()),
+        batch_iter_(range->begin()),
+        batch_iter_prev_(range->begin()),
+        upper_key_(range->begin()),
+        level_files_brief_(file_levels),
+        is_hit_file_last_in_level_(false),
+        curr_file_level_(nullptr),
+        file_indexer_(file_indexer),
+        user_comparator_(user_comparator),
+        internal_comparator_(internal_comparator),
+        hit_file_(nullptr) {
+    for (auto iter = range_.begin(); iter != range_.end(); ++iter) {
+      fp_ctx_array_[iter.index()] =
+          FilePickerContext(0, FileIndexer::kLevelMaxIndex);
+    }
+
+    // Setup member variables to search first level.
+    search_ended_ = !PrepareNextLevel();
+    if (!search_ended_) {
+      // REVISIT
+      // Prefetch Level 0 table data to avoid cache miss if possible.
+      // As of now, only PlainTableReader and CuckooTableReader do any
+      // prefetching. This may not be necessary anymore once we implement
+      // batching in those table readers
+      for (unsigned int i = 0; i < (*level_files_brief_)[0].num_files; ++i) {
+        auto* r = (*level_files_brief_)[0].files[i].fd.table_reader;
+        if (r) {
+          for (auto iter = range_.begin(); iter != range_.end(); ++iter) {
+            r->Prepare(iter->ikey);
+          }
+        }
+      }
+    }
+  }
+
+  FilePickerMultiGet(MultiGetRange* range, const FilePickerMultiGet& other)
+      : num_levels_(other.num_levels_),
+        curr_level_(other.curr_level_),
+        returned_file_level_(other.returned_file_level_),
+        hit_file_level_(other.hit_file_level_),
+        fp_ctx_array_(other.fp_ctx_array_),
+        range_(*range, range->begin(), range->end()),
+        maybe_repeat_key_(false),
+        current_level_range_(*range, range->begin(), range->end()),
+        current_file_range_(*range, range->begin(), range->end()),
+        batch_iter_(range->begin()),
+        batch_iter_prev_(range->begin()),
+        upper_key_(range->begin()),
+        level_files_brief_(other.level_files_brief_),
+        is_hit_file_last_in_level_(false),
+        curr_file_level_(other.curr_file_level_),
+        file_indexer_(other.file_indexer_),
+        user_comparator_(other.user_comparator_),
+        internal_comparator_(other.internal_comparator_),
+        hit_file_(nullptr) {
+    PrepareNextLevelForSearch();
+  }
+
+  int GetCurrentLevel() const { return curr_level_; }
+
+  void PrepareNextLevelForSearch() { search_ended_ = !PrepareNextLevel(); }
+
+  FdWithKeyRange* GetNextFileInLevel() {
+    if (batch_iter_ == current_level_range_.end() || search_ended_) {
+      hit_file_ = nullptr;
+      return nullptr;
+    } else {
+      if (maybe_repeat_key_) {
+        maybe_repeat_key_ = false;
+        // Check if we found the final value for the last key in the
+        // previous lookup range. If we did, then there's no need to look
+        // any further for that key, so advance batch_iter_. Else, keep
+        // batch_iter_ positioned on that key so we look it up again in
+        // the next file
+        // For L0, always advance the key because we will look in the next
+        // file regardless for all keys not found yet
+        if (current_level_range_.CheckKeyDone(batch_iter_) ||
+            curr_level_ == 0) {
+          batch_iter_ = upper_key_;
+        }
+      }
+      // batch_iter_prev_ will become the start key for the next file
+      // lookup
+      batch_iter_prev_ = batch_iter_;
+    }
+
+    MultiGetRange next_file_range(current_level_range_, batch_iter_prev_,
+                                  current_level_range_.end());
+    size_t curr_file_index =
+        (batch_iter_ != current_level_range_.end())
+            ? fp_ctx_array_[batch_iter_.index()].curr_index_in_curr_level
+            : curr_file_level_->num_files;
+    FdWithKeyRange* f;
+    bool is_last_key_in_file;
+    if (!GetNextFileInLevelWithKeys(&next_file_range, &curr_file_index, &f,
+                                    &is_last_key_in_file)) {
+      hit_file_ = nullptr;
+      return nullptr;
+    } else {
+      if (is_last_key_in_file) {
+        // Since cmp_largest is 0, batch_iter_ still points to the last key
+        // that falls in this file, instead of the next one. Increment
+        // the file index for all keys between batch_iter_ and upper_key_
+        auto tmp_iter = batch_iter_;
+        while (tmp_iter != upper_key_) {
+          ++(fp_ctx_array_[tmp_iter.index()].curr_index_in_curr_level);
+          ++tmp_iter;
+        }
+        maybe_repeat_key_ = true;
+      }
+      // Set the range for this file
+      current_file_range_ =
+          MultiGetRange(next_file_range, batch_iter_prev_, upper_key_);
+      returned_file_level_ = curr_level_;
+      hit_file_level_ = curr_level_;
+      is_hit_file_last_in_level_ =
+          curr_file_index == curr_file_level_->num_files - 1;
+      hit_file_ = f;
+      return f;
+    }
+  }
+
+  // getter for current file level
+  // for GET_HIT_L0, GET_HIT_L1 & GET_HIT_L2_AND_UP counts
+  unsigned int GetHitFileLevel() { return hit_file_level_; }
+
+  FdWithKeyRange* GetHitFile() { return hit_file_; }
+
+  // Returns true if the most recent "hit file" (i.e., one returned by
+  // GetNextFile()) is at the last index in its level.
+  bool IsHitFileLastInLevel() { return is_hit_file_last_in_level_; }
+
+  bool KeyMaySpanNextFile() { return maybe_repeat_key_; }
+
+  bool IsSearchEnded() { return search_ended_; }
+
+  const MultiGetRange& CurrentFileRange() { return current_file_range_; }
+
+  bool RemainingOverlapInLevel() {
+    return !current_level_range_.Suffix(current_file_range_).empty();
+  }
+
+  MultiGetRange& GetRange() { return range_; }
+
+  void ReplaceRange(const MultiGetRange& other) {
+    assert(hit_file_ == nullptr);
+    range_ = other;
+    current_level_range_ = other;
+  }
+
+  FilePickerMultiGet(FilePickerMultiGet&& other)
+      : num_levels_(other.num_levels_),
+        curr_level_(other.curr_level_),
+        returned_file_level_(other.returned_file_level_),
+        hit_file_level_(other.hit_file_level_),
+        fp_ctx_array_(std::move(other.fp_ctx_array_)),
+        range_(std::move(other.range_)),
+        maybe_repeat_key_(other.maybe_repeat_key_),
+        current_level_range_(std::move(other.current_level_range_)),
+        current_file_range_(std::move(other.current_file_range_)),
+        batch_iter_(other.batch_iter_, &current_level_range_),
+        batch_iter_prev_(other.batch_iter_prev_, &current_level_range_),
+        upper_key_(other.upper_key_, &current_level_range_),
+        level_files_brief_(other.level_files_brief_),
+        search_ended_(other.search_ended_),
+        is_hit_file_last_in_level_(other.is_hit_file_last_in_level_),
+        curr_file_level_(other.curr_file_level_),
+        file_indexer_(other.file_indexer_),
+        user_comparator_(other.user_comparator_),
+        internal_comparator_(other.internal_comparator_),
+        hit_file_(other.hit_file_) {}
+
+ private:
+  unsigned int num_levels_;
+  unsigned int curr_level_;
+  unsigned int returned_file_level_;
+  unsigned int hit_file_level_;
+
+  struct FilePickerContext {
+    int32_t search_left_bound;
+    int32_t search_right_bound;
+    unsigned int curr_index_in_curr_level;
+    unsigned int start_index_in_curr_level;
+
+    FilePickerContext(int32_t left, int32_t right)
+        : search_left_bound(left),
+          search_right_bound(right),
+          curr_index_in_curr_level(0),
+          start_index_in_curr_level(0) {}
+
+    FilePickerContext() = default;
+  };
+  std::array<FilePickerContext, MultiGetContext::MAX_BATCH_SIZE> fp_ctx_array_;
+  MultiGetRange range_;
+  bool maybe_repeat_key_;
+  MultiGetRange current_level_range_;
+  MultiGetRange current_file_range_;
+  // Iterator to iterate through the keys in a MultiGet batch, that gets reset
+  // at the beginning of each level. Each call to GetNextFile() will position
+  // batch_iter_ at or right after the last key that was found in the returned
+  // SST file
+  MultiGetRange::Iterator batch_iter_;
+  // An iterator that records the previous position of batch_iter_, i.e last
+  // key found in the previous SST file, in order to serve as the start of
+  // the batch key range for the next SST file
+  MultiGetRange::Iterator batch_iter_prev_;
+  MultiGetRange::Iterator upper_key_;
+  autovector<LevelFilesBrief>* level_files_brief_;
+  bool search_ended_;
+  bool is_hit_file_last_in_level_;
+  LevelFilesBrief* curr_file_level_;
+  FileIndexer* file_indexer_;
+  const Comparator* user_comparator_;
+  const InternalKeyComparator* internal_comparator_;
+  FdWithKeyRange* hit_file_;
+
+  // Iterates through files in the current level until it finds a file that
+  // contains at least one key from the MultiGet batch
+  bool GetNextFileInLevelWithKeys(MultiGetRange* next_file_range,
+                                  size_t* file_index, FdWithKeyRange** fd,
+                                  bool* is_last_key_in_file) {
+    size_t curr_file_index = *file_index;
+    FdWithKeyRange* f = nullptr;
+    bool file_hit = false;
+    int cmp_largest = -1;
+    if (curr_file_index >= curr_file_level_->num_files) {
+      // In the unlikely case the next key is a duplicate of the current key,
+      // and the current key is the last in the level and the internal key
+      // was not found, we need to skip lookup for the remaining keys and
+      // reset the search bounds
+      if (batch_iter_ != current_level_range_.end()) {
+        ++batch_iter_;
+        for (; batch_iter_ != current_level_range_.end(); ++batch_iter_) {
+          struct FilePickerContext& fp_ctx = fp_ctx_array_[batch_iter_.index()];
+          fp_ctx.search_left_bound = 0;
+          fp_ctx.search_right_bound = FileIndexer::kLevelMaxIndex;
+        }
+      }
+      return false;
+    }
+    // Loops over keys in the MultiGet batch until it finds a file with
+    // atleast one of the keys. Then it keeps moving forward until the
+    // last key in the batch that falls in that file
+    while (batch_iter_ != current_level_range_.end() &&
+           (fp_ctx_array_[batch_iter_.index()].curr_index_in_curr_level ==
+                curr_file_index ||
+            !file_hit)) {
+      struct FilePickerContext& fp_ctx = fp_ctx_array_[batch_iter_.index()];
+      f = &curr_file_level_->files[fp_ctx.curr_index_in_curr_level];
+      Slice& user_key = batch_iter_->ukey_without_ts;
+
+      // Do key range filtering of files or/and fractional cascading if:
+      // (1) not all the files are in level 0, or
+      // (2) there are more than 3 current level files
+      // If there are only 3 or less current level files in the system, we
+      // skip the key range filtering. In this case, more likely, the system
+      // is highly tuned to minimize number of tables queried by each query,
+      // so it is unlikely that key range filtering is more efficient than
+      // querying the files.
+      if (num_levels_ > 1 || curr_file_level_->num_files > 3) {
+        // Check if key is within a file's range. If search left bound and
+        // right bound point to the same find, we are sure key falls in
+        // range.
+        int cmp_smallest = user_comparator_->CompareWithoutTimestamp(
+            user_key, false, ExtractUserKey(f->smallest_key), true);
+
+        assert(curr_level_ == 0 ||
+               fp_ctx.curr_index_in_curr_level ==
+                   fp_ctx.start_index_in_curr_level ||
+               cmp_smallest <= 0);
+
+        if (cmp_smallest >= 0) {
+          cmp_largest = user_comparator_->CompareWithoutTimestamp(
+              user_key, false, ExtractUserKey(f->largest_key), true);
+        } else {
+          cmp_largest = -1;
+        }
+
+        // Setup file search bound for the next level based on the
+        // comparison results
+        if (curr_level_ > 0) {
+          file_indexer_->GetNextLevelIndex(
+              curr_level_, fp_ctx.curr_index_in_curr_level, cmp_smallest,
+              cmp_largest, &fp_ctx.search_left_bound,
+              &fp_ctx.search_right_bound);
+        }
+        // Key falls out of current file's range
+        if (cmp_smallest < 0 || cmp_largest > 0) {
+          next_file_range->SkipKey(batch_iter_);
+        } else {
+          file_hit = true;
+        }
+      } else {
+        file_hit = true;
+      }
+      if (cmp_largest == 0) {
+        // cmp_largest is 0, which means the next key will not be in this
+        // file, so stop looking further. However, its possible there are
+        // duplicates in the batch, so find the upper bound for the batch
+        // in this file (upper_key_) by skipping past the duplicates. We
+        // leave batch_iter_ as is since we may have to pick up from there
+        // for the next file, if this file has a merge value rather than
+        // final value
+        upper_key_ = batch_iter_;
+        ++upper_key_;
+        while (upper_key_ != current_level_range_.end() &&
+               user_comparator_->CompareWithoutTimestamp(
+                   batch_iter_->ukey_without_ts, false,
+                   upper_key_->ukey_without_ts, false) == 0) {
+          ++upper_key_;
+        }
+        break;
+      } else {
+        if (curr_level_ == 0) {
+          // We need to look through all files in level 0
+          ++fp_ctx.curr_index_in_curr_level;
+        }
+        ++batch_iter_;
+      }
+      if (!file_hit) {
+        curr_file_index =
+            (batch_iter_ != current_level_range_.end())
+                ? fp_ctx_array_[batch_iter_.index()].curr_index_in_curr_level
+                : curr_file_level_->num_files;
+      }
+    }
+
+    *fd = f;
+    *file_index = curr_file_index;
+    *is_last_key_in_file = cmp_largest == 0;
+    if (!*is_last_key_in_file) {
+      // If the largest key in the batch overlapping the file is not the
+      // largest key in the file, upper_ley_ would not have been updated so
+      // update it here
+      upper_key_ = batch_iter_;
+    }
+    return file_hit;
+  }
+
+  // Setup local variables to search next level.
+  // Returns false if there are no more levels to search.
+  bool PrepareNextLevel() {
+    if (curr_level_ == 0) {
+      MultiGetRange::Iterator mget_iter = current_level_range_.begin();
+      if (fp_ctx_array_[mget_iter.index()].curr_index_in_curr_level <
+          curr_file_level_->num_files) {
+        batch_iter_prev_ = current_level_range_.begin();
+        upper_key_ = batch_iter_ = current_level_range_.begin();
+        return true;
+      }
+    }
+
+    curr_level_++;
+    // Reset key range to saved value
+    while (curr_level_ < num_levels_) {
+      bool level_contains_keys = false;
+      curr_file_level_ = &(*level_files_brief_)[curr_level_];
+      if (curr_file_level_->num_files == 0) {
+        // When current level is empty, the search bound generated from upper
+        // level must be [0, -1] or [0, FileIndexer::kLevelMaxIndex] if it is
+        // also empty.
+
+        for (auto mget_iter = current_level_range_.begin();
+             mget_iter != current_level_range_.end(); ++mget_iter) {
+          struct FilePickerContext& fp_ctx = fp_ctx_array_[mget_iter.index()];
+
+          assert(fp_ctx.search_left_bound == 0);
+          assert(fp_ctx.search_right_bound == -1 ||
+                 fp_ctx.search_right_bound == FileIndexer::kLevelMaxIndex);
+          // Since current level is empty, it will need to search all files in
+          // the next level
+          fp_ctx.search_left_bound = 0;
+          fp_ctx.search_right_bound = FileIndexer::kLevelMaxIndex;
+        }
+        // Skip all subsequent empty levels
+        do {
+          ++curr_level_;
+        } while ((curr_level_ < num_levels_) &&
+                 (*level_files_brief_)[curr_level_].num_files == 0);
+        continue;
+      }
+
+      // Some files may overlap each other. We find
+      // all files that overlap user_key and process them in order from
+      // newest to oldest. In the context of merge-operator, this can occur at
+      // any level. Otherwise, it only occurs at Level-0 (since Put/Deletes
+      // are always compacted into a single entry).
+      int32_t start_index = -1;
+      current_level_range_ =
+          MultiGetRange(range_, range_.begin(), range_.end());
+      for (auto mget_iter = current_level_range_.begin();
+           mget_iter != current_level_range_.end(); ++mget_iter) {
+        struct FilePickerContext& fp_ctx = fp_ctx_array_[mget_iter.index()];
+        if (curr_level_ == 0) {
+          // On Level-0, we read through all files to check for overlap.
+          start_index = 0;
+          level_contains_keys = true;
+        } else {
+          // On Level-n (n>=1), files are sorted. Binary search to find the
+          // earliest file whose largest key >= ikey. Search left bound and
+          // right bound are used to narrow the range.
+          if (fp_ctx.search_left_bound <= fp_ctx.search_right_bound) {
+            if (fp_ctx.search_right_bound == FileIndexer::kLevelMaxIndex) {
+              fp_ctx.search_right_bound =
+                  static_cast<int32_t>(curr_file_level_->num_files) - 1;
+            }
+            // `search_right_bound_` is an inclusive upper-bound, but since it
+            // was determined based on user key, it is still possible the lookup
+            // key falls to the right of `search_right_bound_`'s corresponding
+            // file. So, pass a limit one higher, which allows us to detect this
+            // case.
+            Slice& ikey = mget_iter->ikey;
+            start_index = FindFileInRange(
+                *internal_comparator_, *curr_file_level_, ikey,
+                static_cast<uint32_t>(fp_ctx.search_left_bound),
+                static_cast<uint32_t>(fp_ctx.search_right_bound) + 1);
+            if (start_index == fp_ctx.search_right_bound + 1) {
+              // `ikey_` comes after `search_right_bound_`. The lookup key does
+              // not exist on this level, so let's skip this level and do a full
+              // binary search on the next level.
+              fp_ctx.search_left_bound = 0;
+              fp_ctx.search_right_bound = FileIndexer::kLevelMaxIndex;
+              current_level_range_.SkipKey(mget_iter);
+              continue;
+            } else {
+              level_contains_keys = true;
+            }
+          } else {
+            // search_left_bound > search_right_bound, key does not exist in
+            // this level. Since no comparison is done in this level, it will
+            // need to search all files in the next level.
+            fp_ctx.search_left_bound = 0;
+            fp_ctx.search_right_bound = FileIndexer::kLevelMaxIndex;
+            current_level_range_.SkipKey(mget_iter);
+            continue;
+          }
+        }
+        fp_ctx.start_index_in_curr_level = start_index;
+        fp_ctx.curr_index_in_curr_level = start_index;
+      }
+      if (level_contains_keys) {
+        batch_iter_prev_ = current_level_range_.begin();
+        upper_key_ = batch_iter_ = current_level_range_.begin();
+        return true;
+      }
+      curr_level_++;
+    }
+    // curr_level_ = num_levels_. So, no more levels to search.
+    return false;
+  }
+};
+
+VersionStorageInfo::~VersionStorageInfo() { delete[] files_; }
+
+Version::~Version() {
+  assert(refs_ == 0);
+
+  // Remove from linked list
+  prev_->next_ = next_;
+  next_->prev_ = prev_;
+
+  // Drop references to files
+  for (int level = 0; level < storage_info_.num_levels_; level++) {
+    for (size_t i = 0; i < storage_info_.files_[level].size(); i++) {
+      FileMetaData* f = storage_info_.files_[level][i];
+      assert(f->refs > 0);
+      f->refs--;
+      if (f->refs <= 0) {
+        assert(cfd_ != nullptr);
+        uint32_t path_id = f->fd.GetPathId();
+        assert(path_id < cfd_->ioptions()->cf_paths.size());
+        vset_->obsolete_files_.push_back(
+            ObsoleteFileInfo(f, cfd_->ioptions()->cf_paths[path_id].path,
+                             cfd_->GetFileMetadataCacheReservationManager()));
+      }
+    }
+  }
+}
+
+int FindFile(const InternalKeyComparator& icmp,
+             const LevelFilesBrief& file_level, const Slice& key) {
+  return FindFileInRange(icmp, file_level, key, 0,
+                         static_cast<uint32_t>(file_level.num_files));
+}
+
+void DoGenerateLevelFilesBrief(LevelFilesBrief* file_level,
+                               const std::vector<FileMetaData*>& files,
+                               Arena* arena) {
+  assert(file_level);
+  assert(arena);
+
+  size_t num = files.size();
+  file_level->num_files = num;
+  char* mem = arena->AllocateAligned(num * sizeof(FdWithKeyRange));
+  file_level->files = new (mem) FdWithKeyRange[num];
+
+  for (size_t i = 0; i < num; i++) {
+    Slice smallest_key = files[i]->smallest.Encode();
+    Slice largest_key = files[i]->largest.Encode();
+
+    // Copy key slice to sequential memory
+    size_t smallest_size = smallest_key.size();
+    size_t largest_size = largest_key.size();
+    mem = arena->AllocateAligned(smallest_size + largest_size);
+    memcpy(mem, smallest_key.data(), smallest_size);
+    memcpy(mem + smallest_size, largest_key.data(), largest_size);
+
+    FdWithKeyRange& f = file_level->files[i];
+    f.fd = files[i]->fd;
+    f.file_metadata = files[i];
+    f.smallest_key = Slice(mem, smallest_size);
+    f.largest_key = Slice(mem + smallest_size, largest_size);
+  }
+}
+
+static bool AfterFile(const Comparator* ucmp, const Slice* user_key,
+                      const FdWithKeyRange* f) {
+  // nullptr user_key occurs before all keys and is therefore never after *f
+  return (user_key != nullptr &&
+          ucmp->CompareWithoutTimestamp(*user_key,
+                                        ExtractUserKey(f->largest_key)) > 0);
+}
+
+static bool BeforeFile(const Comparator* ucmp, const Slice* user_key,
+                       const FdWithKeyRange* f) {
+  // nullptr user_key occurs after all keys and is therefore never before *f
+  return (user_key != nullptr &&
+          ucmp->CompareWithoutTimestamp(*user_key,
+                                        ExtractUserKey(f->smallest_key)) < 0);
+}
+
+bool SomeFileOverlapsRange(const InternalKeyComparator& icmp,
+                           bool disjoint_sorted_files,
+                           const LevelFilesBrief& file_level,
+                           const Slice* smallest_user_key,
+                           const Slice* largest_user_key) {
+  const Comparator* ucmp = icmp.user_comparator();
+  if (!disjoint_sorted_files) {
+    // Need to check against all files
+    for (size_t i = 0; i < file_level.num_files; i++) {
+      const FdWithKeyRange* f = &(file_level.files[i]);
+      if (AfterFile(ucmp, smallest_user_key, f) ||
+          BeforeFile(ucmp, largest_user_key, f)) {
+        // No overlap
+      } else {
+        return true;  // Overlap
+      }
+    }
+    return false;
+  }
+
+  // Binary search over file list
+  uint32_t index = 0;
+  if (smallest_user_key != nullptr) {
+    // Find the leftmost possible internal key for smallest_user_key
+    InternalKey small;
+    small.SetMinPossibleForUserKey(*smallest_user_key);
+    index = FindFile(icmp, file_level, small.Encode());
+  }
+
+  if (index >= file_level.num_files) {
+    // beginning of range is after all files, so no overlap.
+    return false;
+  }
+
+  return !BeforeFile(ucmp, largest_user_key, &file_level.files[index]);
+}
+
+namespace {
+
+class LevelIterator final : public InternalIterator {
+ public:
+  // @param read_options Must outlive this iterator.
+  LevelIterator(
+      TableCache* table_cache, const ReadOptions& read_options,
+      const FileOptions& file_options, const InternalKeyComparator& icomparator,
+      const LevelFilesBrief* flevel,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor,
+      bool should_sample, HistogramImpl* file_read_hist,
+      TableReaderCaller caller, bool skip_filters, int level,
+      uint8_t block_protection_bytes_per_key, RangeDelAggregator* range_del_agg,
+      const std::vector<AtomicCompactionUnitBoundary>* compaction_boundaries =
+          nullptr,
+      bool allow_unprepared_value = false,
+      TruncatedRangeDelIterator**** range_tombstone_iter_ptr_ = nullptr)
+      : table_cache_(table_cache),
+        read_options_(read_options),
+        file_options_(file_options),
+        icomparator_(icomparator),
+        user_comparator_(icomparator.user_comparator()),
+        flevel_(flevel),
+        prefix_extractor_(prefix_extractor),
+        file_read_hist_(file_read_hist),
+        should_sample_(should_sample),
+        caller_(caller),
+        skip_filters_(skip_filters),
+        allow_unprepared_value_(allow_unprepared_value),
+        file_index_(flevel_->num_files),
+        level_(level),
+        range_del_agg_(range_del_agg),
+        pinned_iters_mgr_(nullptr),
+        compaction_boundaries_(compaction_boundaries),
+        is_next_read_sequential_(false),
+        block_protection_bytes_per_key_(block_protection_bytes_per_key),
+        range_tombstone_iter_(nullptr),
+        to_return_sentinel_(false) {
+    // Empty level is not supported.
+    assert(flevel_ != nullptr && flevel_->num_files > 0);
+    if (range_tombstone_iter_ptr_) {
+      *range_tombstone_iter_ptr_ = &range_tombstone_iter_;
+    }
+  }
+
+  ~LevelIterator() override { delete file_iter_.Set(nullptr); }
+
+  // Seek to the first file with a key >= target.
+  // If range_tombstone_iter_ is not nullptr, then we pretend that file
+  // boundaries are fake keys (sentinel keys). These keys are used to keep range
+  // tombstones alive even when all point keys in an SST file are exhausted.
+  // These sentinel keys will be skipped in merging iterator.
+  void Seek(const Slice& target) override;
+  void SeekForPrev(const Slice& target) override;
+  void SeekToFirst() override;
+  void SeekToLast() override;
+  void Next() final override;
+  bool NextAndGetResult(IterateResult* result) override;
+  void Prev() override;
+
+  // In addition to valid and invalid state (!file_iter.Valid() and
+  // status.ok()), a third state of the iterator is when !file_iter_.Valid() and
+  // to_return_sentinel_. This means we are at the end of a file, and a sentinel
+  // key (the file boundary that we pretend as a key) is to be returned next.
+  // file_iter_.Valid() and to_return_sentinel_ should not both be true.
+  bool Valid() const override {
+    assert(!(file_iter_.Valid() && to_return_sentinel_));
+    return file_iter_.Valid() || to_return_sentinel_;
+  }
+  Slice key() const override {
+    assert(Valid());
+    if (to_return_sentinel_) {
+      // Sentinel should be returned after file_iter_ reaches the end of the
+      // file
+      assert(!file_iter_.Valid());
+      return sentinel_;
+    }
+    return file_iter_.key();
+  }
+
+  Slice value() const override {
+    assert(Valid());
+    assert(!to_return_sentinel_);
+    return file_iter_.value();
+  }
+
+  Status status() const override {
+    return file_iter_.iter() ? file_iter_.status() : Status::OK();
+  }
+
+  bool PrepareValue() override { return file_iter_.PrepareValue(); }
+
+  inline bool MayBeOutOfLowerBound() override {
+    assert(Valid());
+    return may_be_out_of_lower_bound_ && file_iter_.MayBeOutOfLowerBound();
+  }
+
+  inline IterBoundCheck UpperBoundCheckResult() override {
+    if (Valid()) {
+      return file_iter_.UpperBoundCheckResult();
+    } else {
+      return IterBoundCheck::kUnknown;
+    }
+  }
+
+  void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
+    pinned_iters_mgr_ = pinned_iters_mgr;
+    if (file_iter_.iter()) {
+      file_iter_.SetPinnedItersMgr(pinned_iters_mgr);
+    }
+  }
+
+  bool IsKeyPinned() const override {
+    return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
+           file_iter_.iter() && file_iter_.IsKeyPinned();
+  }
+
+  bool IsValuePinned() const override {
+    return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
+           file_iter_.iter() && file_iter_.IsValuePinned();
+  }
+
+  bool IsDeleteRangeSentinelKey() const override { return to_return_sentinel_; }
+
+ private:
+  // Return true if at least one invalid file is seen and skipped.
+  bool SkipEmptyFileForward();
+  void SkipEmptyFileBackward();
+  void SetFileIterator(InternalIterator* iter);
+  void InitFileIterator(size_t new_file_index);
+
+  const Slice& file_smallest_key(size_t file_index) {
+    assert(file_index < flevel_->num_files);
+    return flevel_->files[file_index].smallest_key;
+  }
+
+  const Slice& file_largest_key(size_t file_index) {
+    assert(file_index < flevel_->num_files);
+    return flevel_->files[file_index].largest_key;
+  }
+
+  bool KeyReachedUpperBound(const Slice& internal_key) {
+    return read_options_.iterate_upper_bound != nullptr &&
+           user_comparator_.CompareWithoutTimestamp(
+               ExtractUserKey(internal_key), /*a_has_ts=*/true,
+               *read_options_.iterate_upper_bound, /*b_has_ts=*/false) >= 0;
+  }
+
+  void ClearRangeTombstoneIter() {
+    if (range_tombstone_iter_ && *range_tombstone_iter_) {
+      delete *range_tombstone_iter_;
+      *range_tombstone_iter_ = nullptr;
+    }
+  }
+
+  // Move file_iter_ to the file at file_index_.
+  // range_tombstone_iter_ is updated with a range tombstone iterator
+  // into the new file. Old range tombstone iterator is cleared.
+  InternalIterator* NewFileIterator() {
+    assert(file_index_ < flevel_->num_files);
+    auto file_meta = flevel_->files[file_index_];
+    if (should_sample_) {
+      sample_file_read_inc(file_meta.file_metadata);
+    }
+
+    const InternalKey* smallest_compaction_key = nullptr;
+    const InternalKey* largest_compaction_key = nullptr;
+    if (compaction_boundaries_ != nullptr) {
+      smallest_compaction_key = (*compaction_boundaries_)[file_index_].smallest;
+      largest_compaction_key = (*compaction_boundaries_)[file_index_].largest;
+    }
+    CheckMayBeOutOfLowerBound();
+    ClearRangeTombstoneIter();
+    return table_cache_->NewIterator(
+        read_options_, file_options_, icomparator_, *file_meta.file_metadata,
+        range_del_agg_, prefix_extractor_,
+        nullptr /* don't need reference to table */, file_read_hist_, caller_,
+        /*arena=*/nullptr, skip_filters_, level_,
+        /*max_file_size_for_l0_meta_pin=*/0, smallest_compaction_key,
+        largest_compaction_key, allow_unprepared_value_,
+        block_protection_bytes_per_key_, range_tombstone_iter_);
+  }
+
+  // Check if current file being fully within iterate_lower_bound.
+  //
+  // Note MyRocks may update iterate bounds between seek. To workaround it,
+  // we need to check and update may_be_out_of_lower_bound_ accordingly.
+  void CheckMayBeOutOfLowerBound() {
+    if (read_options_.iterate_lower_bound != nullptr &&
+        file_index_ < flevel_->num_files) {
+      may_be_out_of_lower_bound_ =
+          user_comparator_.CompareWithoutTimestamp(
+              ExtractUserKey(file_smallest_key(file_index_)), /*a_has_ts=*/true,
+              *read_options_.iterate_lower_bound, /*b_has_ts=*/false) < 0;
+    }
+  }
+
+  TableCache* table_cache_;
+  const ReadOptions& read_options_;
+  const FileOptions& file_options_;
+  const InternalKeyComparator& icomparator_;
+  const UserComparatorWrapper user_comparator_;
+  const LevelFilesBrief* flevel_;
+  mutable FileDescriptor current_value_;
+  // `prefix_extractor_` may be non-null even for total order seek. Checking
+  // this variable is not the right way to identify whether prefix iterator
+  // is used.
+  const std::shared_ptr<const SliceTransform>& prefix_extractor_;
+
+  HistogramImpl* file_read_hist_;
+  bool should_sample_;
+  TableReaderCaller caller_;
+  bool skip_filters_;
+  bool allow_unprepared_value_;
+  bool may_be_out_of_lower_bound_ = true;
+  size_t file_index_;
+  int level_;
+  RangeDelAggregator* range_del_agg_;
+  IteratorWrapper file_iter_;  // May be nullptr
+  PinnedIteratorsManager* pinned_iters_mgr_;
+
+  // To be propagated to RangeDelAggregator in order to safely truncate range
+  // tombstones.
+  const std::vector<AtomicCompactionUnitBoundary>* compaction_boundaries_;
+
+  bool is_next_read_sequential_;
+
+  uint8_t block_protection_bytes_per_key_;
+
+  // This is set when this level iterator is used under a merging iterator
+  // that processes range tombstones. range_tombstone_iter_ points to where the
+  // merging iterator stores the range tombstones iterator for this level. When
+  // this level iterator moves to a new SST file, it updates the range
+  // tombstones accordingly through this pointer. So the merging iterator always
+  // has access to the current SST file's range tombstones.
+  //
+  // The level iterator treats file boundary as fake keys (sentinel keys) to
+  // keep range tombstones alive if needed and make upper level, i.e. merging
+  // iterator, aware of file changes (when level iterator moves to a new SST
+  // file, there is some bookkeeping work that needs to be done at merging
+  // iterator end).
+  //
+  // *range_tombstone_iter_ points to range tombstones of the current SST file
+  TruncatedRangeDelIterator** range_tombstone_iter_;
+
+  // Whether next/prev key is a sentinel key.
+  bool to_return_sentinel_ = false;
+  // The sentinel key to be returned
+  Slice sentinel_;
+  // Sets flags for if we should return the sentinel key next.
+  // The condition for returning sentinel is reaching the end of current
+  // file_iter_: !Valid() && status.().ok().
+  void TrySetDeleteRangeSentinel(const Slice& boundary_key);
+  void ClearSentinel() { to_return_sentinel_ = false; }
+
+  // Set in Seek() when a prefix seek reaches end of the current file,
+  // and the next file has a different prefix. SkipEmptyFileForward()
+  // will not move to next file when this flag is set.
+  bool prefix_exhausted_ = false;
+};
+
+void LevelIterator::TrySetDeleteRangeSentinel(const Slice& boundary_key) {
+  assert(range_tombstone_iter_);
+  if (file_iter_.iter() != nullptr && !file_iter_.Valid() &&
+      file_iter_.status().ok()) {
+    to_return_sentinel_ = true;
+    sentinel_ = boundary_key;
+  }
+}
+
+void LevelIterator::Seek(const Slice& target) {
+  prefix_exhausted_ = false;
+  ClearSentinel();
+  // Check whether the seek key fall under the same file
+  bool need_to_reseek = true;
+  if (file_iter_.iter() != nullptr && file_index_ < flevel_->num_files) {
+    const FdWithKeyRange& cur_file = flevel_->files[file_index_];
+    if (icomparator_.InternalKeyComparator::Compare(
+            target, cur_file.largest_key) <= 0 &&
+        icomparator_.InternalKeyComparator::Compare(
+            target, cur_file.smallest_key) >= 0) {
+      need_to_reseek = false;
+      assert(static_cast<size_t>(FindFile(icomparator_, *flevel_, target)) ==
+             file_index_);
+    }
+  }
+  if (need_to_reseek) {
+    TEST_SYNC_POINT("LevelIterator::Seek:BeforeFindFile");
+    size_t new_file_index = FindFile(icomparator_, *flevel_, target);
+    InitFileIterator(new_file_index);
+  }
+
+  if (file_iter_.iter() != nullptr) {
+    file_iter_.Seek(target);
+    // Status::TryAgain indicates asynchronous request for retrieval of data
+    // blocks has been submitted. So it should return at this point and Seek
+    // should be called again to retrieve the requested block and execute the
+    // remaining code.
+    if (file_iter_.status() == Status::TryAgain()) {
+      return;
+    }
+    if (!file_iter_.Valid() && file_iter_.status().ok() &&
+        prefix_extractor_ != nullptr && !read_options_.total_order_seek &&
+        !read_options_.auto_prefix_mode &&
+        file_index_ < flevel_->num_files - 1) {
+      size_t ts_sz = user_comparator_.user_comparator()->timestamp_size();
+      Slice target_user_key_without_ts =
+          ExtractUserKeyAndStripTimestamp(target, ts_sz);
+      Slice next_file_first_user_key_without_ts =
+          ExtractUserKeyAndStripTimestamp(file_smallest_key(file_index_ + 1),
+                                          ts_sz);
+      if (prefix_extractor_->InDomain(target_user_key_without_ts) &&
+          (!prefix_extractor_->InDomain(next_file_first_user_key_without_ts) ||
+           user_comparator_.CompareWithoutTimestamp(
+               prefix_extractor_->Transform(target_user_key_without_ts), false,
+               prefix_extractor_->Transform(
+                   next_file_first_user_key_without_ts),
+               false) != 0)) {
+        // SkipEmptyFileForward() will not advance to next file when this flag
+        // is set for reason detailed below.
+        //
+        // The file we initially positioned to has no keys under the target
+        // prefix, and the next file's smallest key has a different prefix than
+        // target. When doing prefix iterator seek, when keys for one prefix
+        // have been exhausted, it can jump to any key that is larger. Here we
+        // are enforcing a stricter contract than that, in order to make it
+        // easier for higher layers (merging and DB iterator) to reason the
+        // correctness:
+        // 1. Within the prefix, the result should be accurate.
+        // 2. If keys for the prefix is exhausted, it is either positioned to
+        // the next key after the prefix, or make the iterator invalid.
+        // A side benefit will be that it invalidates the iterator earlier so
+        // that the upper level merging iterator can merge fewer child
+        // iterators.
+        //
+        // The flag is cleared in Seek*() calls. There is no need to clear the
+        // flag in Prev() since Prev() will not be called when the flag is set
+        // for reasons explained below. If range_tombstone_iter_ is nullptr,
+        // then there is no file boundary sentinel key. Since
+        // !file_iter_.Valid() from the if condition above, this level iterator
+        // is !Valid(), so Prev() will not be called. If range_tombstone_iter_
+        // is not nullptr, there are two cases depending on if this level
+        // iterator reaches top of the heap in merging iterator (the upper
+        // layer).
+        //  If so, merging iterator will see the sentinel key, call
+        //  NextAndGetResult() and the call to NextAndGetResult() will skip the
+        //  sentinel key and makes this level iterator invalid. If not, then it
+        //  could be because the upper layer is done before any method of this
+        //  level iterator is called or another Seek*() call is invoked. Either
+        //  way, Prev() is never called before Seek*().
+        // The flag should not be cleared at the beginning of
+        // Next/NextAndGetResult() since it is used in SkipEmptyFileForward()
+        // called in Next/NextAndGetResult().
+        prefix_exhausted_ = true;
+      }
+    }
+
+    if (range_tombstone_iter_) {
+      TrySetDeleteRangeSentinel(file_largest_key(file_index_));
+    }
+  }
+  SkipEmptyFileForward();
+  CheckMayBeOutOfLowerBound();
+}
+
+void LevelIterator::SeekForPrev(const Slice& target) {
+  prefix_exhausted_ = false;
+  ClearSentinel();
+  size_t new_file_index = FindFile(icomparator_, *flevel_, target);
+  // Seek beyond this level's smallest key
+  if (new_file_index == 0 &&
+      icomparator_.Compare(target, file_smallest_key(0)) < 0) {
+    SetFileIterator(nullptr);
+    ClearRangeTombstoneIter();
+    CheckMayBeOutOfLowerBound();
+    return;
+  }
+  if (new_file_index >= flevel_->num_files) {
+    new_file_index = flevel_->num_files - 1;
+  }
+
+  InitFileIterator(new_file_index);
+  if (file_iter_.iter() != nullptr) {
+    file_iter_.SeekForPrev(target);
+    if (range_tombstone_iter_ &&
+        icomparator_.Compare(target, file_smallest_key(file_index_)) >= 0) {
+      // In SeekForPrev() case, it is possible that the target is less than
+      // file's lower boundary since largest key is used to determine file index
+      // (FindFile()). When target is less than file's lower boundary, sentinel
+      // key should not be set so that SeekForPrev() does not result in a key
+      // larger than target. This is correct in that there is no need to keep
+      // the range tombstones in this file alive as they only cover keys
+      // starting from the file's lower boundary, which is after `target`.
+      TrySetDeleteRangeSentinel(file_smallest_key(file_index_));
+    }
+    SkipEmptyFileBackward();
+  }
+  CheckMayBeOutOfLowerBound();
+}
+
+void LevelIterator::SeekToFirst() {
+  prefix_exhausted_ = false;
+  ClearSentinel();
+  InitFileIterator(0);
+  if (file_iter_.iter() != nullptr) {
+    file_iter_.SeekToFirst();
+    if (range_tombstone_iter_) {
+      // We do this in SeekToFirst() and SeekToLast() since
+      // we could have an empty file with only range tombstones.
+      TrySetDeleteRangeSentinel(file_largest_key(file_index_));
+    }
+  }
+  SkipEmptyFileForward();
+  CheckMayBeOutOfLowerBound();
+}
+
+void LevelIterator::SeekToLast() {
+  prefix_exhausted_ = false;
+  ClearSentinel();
+  InitFileIterator(flevel_->num_files - 1);
+  if (file_iter_.iter() != nullptr) {
+    file_iter_.SeekToLast();
+    if (range_tombstone_iter_) {
+      TrySetDeleteRangeSentinel(file_smallest_key(file_index_));
+    }
+  }
+  SkipEmptyFileBackward();
+  CheckMayBeOutOfLowerBound();
+}
+
+void LevelIterator::Next() {
+  assert(Valid());
+  if (to_return_sentinel_) {
+    // file_iter_ is at EOF already when to_return_sentinel_
+    ClearSentinel();
+  } else {
+    file_iter_.Next();
+    if (range_tombstone_iter_) {
+      TrySetDeleteRangeSentinel(file_largest_key(file_index_));
+    }
+  }
+  SkipEmptyFileForward();
+}
+
+bool LevelIterator::NextAndGetResult(IterateResult* result) {
+  assert(Valid());
+  // file_iter_ is at EOF already when to_return_sentinel_
+  bool is_valid = !to_return_sentinel_ && file_iter_.NextAndGetResult(result);
+  if (!is_valid) {
+    if (to_return_sentinel_) {
+      ClearSentinel();
+    } else if (range_tombstone_iter_) {
+      TrySetDeleteRangeSentinel(file_largest_key(file_index_));
+    }
+    is_next_read_sequential_ = true;
+    SkipEmptyFileForward();
+    is_next_read_sequential_ = false;
+    is_valid = Valid();
+    if (is_valid) {
+      // This could be set in TrySetDeleteRangeSentinel() or
+      // SkipEmptyFileForward() above.
+      if (to_return_sentinel_) {
+        result->key = sentinel_;
+        result->bound_check_result = IterBoundCheck::kUnknown;
+        result->value_prepared = true;
+      } else {
+        result->key = key();
+        result->bound_check_result = file_iter_.UpperBoundCheckResult();
+        // Ideally, we should return the real file_iter_.value_prepared but the
+        // information is not here. It would casue an extra PrepareValue()
+        // for the first key of a file.
+        result->value_prepared = !allow_unprepared_value_;
+      }
+    }
+  }
+  return is_valid;
+}
+
+void LevelIterator::Prev() {
+  assert(Valid());
+  if (to_return_sentinel_) {
+    ClearSentinel();
+  } else {
+    file_iter_.Prev();
+    if (range_tombstone_iter_) {
+      TrySetDeleteRangeSentinel(file_smallest_key(file_index_));
+    }
+  }
+  SkipEmptyFileBackward();
+}
+
+bool LevelIterator::SkipEmptyFileForward() {
+  bool seen_empty_file = false;
+  // Pause at sentinel key
+  while (!to_return_sentinel_ &&
+         (file_iter_.iter() == nullptr ||
+          (!file_iter_.Valid() && file_iter_.status().ok() &&
+           file_iter_.iter()->UpperBoundCheckResult() !=
+               IterBoundCheck::kOutOfBound))) {
+    seen_empty_file = true;
+    // Move to next file
+    if (file_index_ >= flevel_->num_files - 1 ||
+        KeyReachedUpperBound(file_smallest_key(file_index_ + 1)) ||
+        prefix_exhausted_) {
+      SetFileIterator(nullptr);
+      ClearRangeTombstoneIter();
+      break;
+    }
+    // may init a new *range_tombstone_iter
+    InitFileIterator(file_index_ + 1);
+    // We moved to a new SST file
+    // Seek range_tombstone_iter_ to reset its !Valid() default state.
+    // We do not need to call range_tombstone_iter_.Seek* in
+    // LevelIterator::Seek* since when the merging iterator calls
+    // LevelIterator::Seek*, it should also call Seek* into the corresponding
+    // range tombstone iterator.
+    if (file_iter_.iter() != nullptr) {
+      file_iter_.SeekToFirst();
+      if (range_tombstone_iter_) {
+        if (*range_tombstone_iter_) {
+          (*range_tombstone_iter_)->SeekToFirst();
+        }
+        TrySetDeleteRangeSentinel(file_largest_key(file_index_));
+      }
+    }
+  }
+  return seen_empty_file;
+}
+
+void LevelIterator::SkipEmptyFileBackward() {
+  // Pause at sentinel key
+  while (!to_return_sentinel_ &&
+         (file_iter_.iter() == nullptr ||
+          (!file_iter_.Valid() && file_iter_.status().ok()))) {
+    // Move to previous file
+    if (file_index_ == 0) {
+      // Already the first file
+      SetFileIterator(nullptr);
+      ClearRangeTombstoneIter();
+      return;
+    }
+    InitFileIterator(file_index_ - 1);
+    // We moved to a new SST file
+    // Seek range_tombstone_iter_ to reset its !Valid() default state.
+    if (file_iter_.iter() != nullptr) {
+      file_iter_.SeekToLast();
+      if (range_tombstone_iter_) {
+        if (*range_tombstone_iter_) {
+          (*range_tombstone_iter_)->SeekToLast();
+        }
+        TrySetDeleteRangeSentinel(file_smallest_key(file_index_));
+        if (to_return_sentinel_) {
+          break;
+        }
+      }
+    }
+  }
+}
+
+void LevelIterator::SetFileIterator(InternalIterator* iter) {
+  if (pinned_iters_mgr_ && iter) {
+    iter->SetPinnedItersMgr(pinned_iters_mgr_);
+  }
+
+  InternalIterator* old_iter = file_iter_.Set(iter);
+
+  // Update the read pattern for PrefetchBuffer.
+  if (is_next_read_sequential_) {
+    file_iter_.UpdateReadaheadState(old_iter);
+  }
+
+  if (pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled()) {
+    pinned_iters_mgr_->PinIterator(old_iter);
+  } else {
+    delete old_iter;
+  }
+}
+
+void LevelIterator::InitFileIterator(size_t new_file_index) {
+  if (new_file_index >= flevel_->num_files) {
+    file_index_ = new_file_index;
+    SetFileIterator(nullptr);
+    ClearRangeTombstoneIter();
+    return;
+  } else {
+    // If the file iterator shows incomplete, we try it again if users seek
+    // to the same file, as this time we may go to a different data block
+    // which is cached in block cache.
+    //
+    if (file_iter_.iter() != nullptr && !file_iter_.status().IsIncomplete() &&
+        new_file_index == file_index_) {
+      // file_iter_ is already constructed with this iterator, so
+      // no need to change anything
+    } else {
+      file_index_ = new_file_index;
+      InternalIterator* iter = NewFileIterator();
+      SetFileIterator(iter);
+    }
+  }
+}
+}  // anonymous namespace
+
+Status Version::GetTableProperties(const ReadOptions& read_options,
+                                   std::shared_ptr<const TableProperties>* tp,
+                                   const FileMetaData* file_meta,
+                                   const std::string* fname) const {
+  auto table_cache = cfd_->table_cache();
+  auto ioptions = cfd_->ioptions();
+  Status s = table_cache->GetTableProperties(
+      file_options_, read_options, cfd_->internal_comparator(), *file_meta, tp,
+      mutable_cf_options_.block_protection_bytes_per_key,
+      mutable_cf_options_.prefix_extractor, true /* no io */);
+  if (s.ok()) {
+    return s;
+  }
+
+  // We only ignore error type `Incomplete` since it's by design that we
+  // disallow table when it's not in table cache.
+  if (!s.IsIncomplete()) {
+    return s;
+  }
+
+  // 2. Table is not present in table cache, we'll read the table properties
+  // directly from the properties block in the file.
+  std::unique_ptr<FSRandomAccessFile> file;
+  std::string file_name;
+  if (fname != nullptr) {
+    file_name = *fname;
+  } else {
+    file_name = TableFileName(ioptions->cf_paths, file_meta->fd.GetNumber(),
+                              file_meta->fd.GetPathId());
+  }
+  s = ioptions->fs->NewRandomAccessFile(file_name, file_options_, &file,
+                                        nullptr);
+  if (!s.ok()) {
+    return s;
+  }
+
+  // By setting the magic number to kNullTableMagicNumber, we can bypass
+  // the magic number check in the footer.
+  std::unique_ptr<RandomAccessFileReader> file_reader(
+      new RandomAccessFileReader(
+          std::move(file), file_name, ioptions->clock /* clock */, io_tracer_,
+          ioptions->stats /* stats */,
+          Histograms::SST_READ_MICROS /* hist_type */,
+          nullptr /* file_read_hist */, nullptr /* rate_limiter */,
+          ioptions->listeners));
+  std::unique_ptr<TableProperties> props;
+  s = ReadTableProperties(
+      file_reader.get(), file_meta->fd.GetFileSize(),
+      Footer::kNullTableMagicNumber /* table's magic number */, *ioptions,
+      read_options, &props);
+  if (!s.ok()) {
+    return s;
+  }
+  *tp = std::move(props);
+  RecordTick(ioptions->stats, NUMBER_DIRECT_LOAD_TABLE_PROPERTIES);
+  return s;
+}
+
+Status Version::GetPropertiesOfAllTables(const ReadOptions& read_options,
+                                         TablePropertiesCollection* props) {
+  Status s;
+  for (int level = 0; level < storage_info_.num_levels_; level++) {
+    s = GetPropertiesOfAllTables(read_options, props, level);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  return Status::OK();
+}
+
+Status Version::TablesRangeTombstoneSummary(int max_entries_to_print,
+                                            std::string* out_str) {
+  if (max_entries_to_print <= 0) {
+    return Status::OK();
+  }
+  int num_entries_left = max_entries_to_print;
+
+  std::stringstream ss;
+
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  for (int level = 0; level < storage_info_.num_levels_; level++) {
+    for (const auto& file_meta : storage_info_.files_[level]) {
+      auto fname =
+          TableFileName(cfd_->ioptions()->cf_paths, file_meta->fd.GetNumber(),
+                        file_meta->fd.GetPathId());
+
+      ss << "=== file : " << fname << " ===\n";
+
+      TableCache* table_cache = cfd_->table_cache();
+      std::unique_ptr<FragmentedRangeTombstoneIterator> tombstone_iter;
+
+      Status s = table_cache->GetRangeTombstoneIterator(
+          read_options, cfd_->internal_comparator(), *file_meta,
+          cfd_->GetLatestMutableCFOptions()->block_protection_bytes_per_key,
+          &tombstone_iter);
+      if (!s.ok()) {
+        return s;
+      }
+      if (tombstone_iter) {
+        tombstone_iter->SeekToFirst();
+
+        // TODO: print timestamp
+        while (tombstone_iter->Valid() && num_entries_left > 0) {
+          ss << "start: " << tombstone_iter->start_key().ToString(true)
+             << " end: " << tombstone_iter->end_key().ToString(true)
+             << " seq: " << tombstone_iter->seq() << '\n';
+          tombstone_iter->Next();
+          num_entries_left--;
+        }
+        if (num_entries_left <= 0) {
+          break;
+        }
+      }
+    }
+    if (num_entries_left <= 0) {
+      break;
+    }
+  }
+  assert(num_entries_left >= 0);
+  if (num_entries_left <= 0) {
+    ss << "(results may not be complete)\n";
+  }
+
+  *out_str = ss.str();
+  return Status::OK();
+}
+
+Status Version::GetPropertiesOfAllTables(const ReadOptions& read_options,
+                                         TablePropertiesCollection* props,
+                                         int level) {
+  for (const auto& file_meta : storage_info_.files_[level]) {
+    auto fname =
+        TableFileName(cfd_->ioptions()->cf_paths, file_meta->fd.GetNumber(),
+                      file_meta->fd.GetPathId());
+    // 1. If the table is already present in table cache, load table
+    // properties from there.
+    std::shared_ptr<const TableProperties> table_properties;
+    Status s =
+        GetTableProperties(read_options, &table_properties, file_meta, &fname);
+    if (s.ok()) {
+      props->insert({fname, table_properties});
+    } else {
+      return s;
+    }
+  }
+
+  return Status::OK();
+}
+
+Status Version::GetPropertiesOfTablesInRange(
+    const ReadOptions& read_options, const Range* range, std::size_t n,
+    TablePropertiesCollection* props) const {
+  for (int level = 0; level < storage_info_.num_non_empty_levels(); level++) {
+    for (decltype(n) i = 0; i < n; i++) {
+      // Convert user_key into a corresponding internal key.
+      InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek);
+      InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek);
+      std::vector<FileMetaData*> files;
+      storage_info_.GetOverlappingInputs(level, &k1, &k2, &files, -1, nullptr,
+                                         false);
+      for (const auto& file_meta : files) {
+        auto fname =
+            TableFileName(cfd_->ioptions()->cf_paths, file_meta->fd.GetNumber(),
+                          file_meta->fd.GetPathId());
+        if (props->count(fname) == 0) {
+          // 1. If the table is already present in table cache, load table
+          // properties from there.
+          std::shared_ptr<const TableProperties> table_properties;
+          Status s = GetTableProperties(read_options, &table_properties,
+                                        file_meta, &fname);
+          if (s.ok()) {
+            props->insert({fname, table_properties});
+          } else {
+            return s;
+          }
+        }
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
+Status Version::GetAggregatedTableProperties(
+    const ReadOptions& read_options, std::shared_ptr<const TableProperties>* tp,
+    int level) {
+  TablePropertiesCollection props;
+  Status s;
+  if (level < 0) {
+    s = GetPropertiesOfAllTables(read_options, &props);
+  } else {
+    s = GetPropertiesOfAllTables(read_options, &props, level);
+  }
+  if (!s.ok()) {
+    return s;
+  }
+
+  auto* new_tp = new TableProperties();
+  for (const auto& item : props) {
+    new_tp->Add(*item.second);
+  }
+  tp->reset(new_tp);
+  return Status::OK();
+}
+
+size_t Version::GetMemoryUsageByTableReaders(const ReadOptions& read_options) {
+  size_t total_usage = 0;
+  for (auto& file_level : storage_info_.level_files_brief_) {
+    for (size_t i = 0; i < file_level.num_files; i++) {
+      total_usage += cfd_->table_cache()->GetMemoryUsageByTableReader(
+          file_options_, read_options, cfd_->internal_comparator(),
+          *file_level.files[i].file_metadata,
+          mutable_cf_options_.block_protection_bytes_per_key,
+          mutable_cf_options_.prefix_extractor);
+    }
+  }
+  return total_usage;
+}
+
+void Version::GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta) {
+  assert(cf_meta);
+  assert(cfd_);
+
+  cf_meta->name = cfd_->GetName();
+  cf_meta->size = 0;
+  cf_meta->file_count = 0;
+  cf_meta->levels.clear();
+
+  cf_meta->blob_file_size = 0;
+  cf_meta->blob_file_count = 0;
+  cf_meta->blob_files.clear();
+
+  auto* ioptions = cfd_->ioptions();
+  auto* vstorage = storage_info();
+
+  for (int level = 0; level < cfd_->NumberLevels(); level++) {
+    uint64_t level_size = 0;
+    cf_meta->file_count += vstorage->LevelFiles(level).size();
+    std::vector<SstFileMetaData> files;
+    for (const auto& file : vstorage->LevelFiles(level)) {
+      uint32_t path_id = file->fd.GetPathId();
+      std::string file_path;
+      if (path_id < ioptions->cf_paths.size()) {
+        file_path = ioptions->cf_paths[path_id].path;
+      } else {
+        assert(!ioptions->cf_paths.empty());
+        file_path = ioptions->cf_paths.back().path;
+      }
+      const uint64_t file_number = file->fd.GetNumber();
+      files.emplace_back(
+          MakeTableFileName("", file_number), file_number, file_path,
+          file->fd.GetFileSize(), file->fd.smallest_seqno,
+          file->fd.largest_seqno, file->smallest.user_key().ToString(),
+          file->largest.user_key().ToString(),
+          file->stats.num_reads_sampled.load(std::memory_order_relaxed),
+          file->being_compacted, file->temperature,
+          file->oldest_blob_file_number, file->TryGetOldestAncesterTime(),
+          file->TryGetFileCreationTime(), file->epoch_number,
+          file->file_checksum, file->file_checksum_func_name);
+      files.back().num_entries = file->num_entries;
+      files.back().num_deletions = file->num_deletions;
+      files.back().smallest = file->smallest.Encode().ToString();
+      files.back().largest = file->largest.Encode().ToString();
+      level_size += file->fd.GetFileSize();
+    }
+    cf_meta->levels.emplace_back(level, level_size, std::move(files));
+    cf_meta->size += level_size;
+  }
+  for (const auto& meta : vstorage->GetBlobFiles()) {
+    assert(meta);
+
+    cf_meta->blob_files.emplace_back(
+        meta->GetBlobFileNumber(), BlobFileName("", meta->GetBlobFileNumber()),
+        ioptions->cf_paths.front().path, meta->GetBlobFileSize(),
+        meta->GetTotalBlobCount(), meta->GetTotalBlobBytes(),
+        meta->GetGarbageBlobCount(), meta->GetGarbageBlobBytes(),
+        meta->GetChecksumMethod(), meta->GetChecksumValue());
+    ++cf_meta->blob_file_count;
+    cf_meta->blob_file_size += meta->GetBlobFileSize();
+  }
+}
+
+uint64_t Version::GetSstFilesSize() {
+  uint64_t sst_files_size = 0;
+  for (int level = 0; level < storage_info_.num_levels_; level++) {
+    for (const auto& file_meta : storage_info_.LevelFiles(level)) {
+      sst_files_size += file_meta->fd.GetFileSize();
+    }
+  }
+  return sst_files_size;
+}
+
+void Version::GetSstFilesBoundaryKeys(Slice* smallest_user_key,
+                                      Slice* largest_user_key) {
+  smallest_user_key->clear();
+  largest_user_key->clear();
+  bool initialized = false;
+  const Comparator* ucmp = storage_info_.user_comparator_;
+  for (int level = 0; level < cfd_->NumberLevels(); level++) {
+    if (storage_info_.LevelFiles(level).size() == 0) {
+      continue;
+    }
+    if (level == 0) {
+      // we need to consider all files on level 0
+      for (const auto& file : storage_info_.LevelFiles(level)) {
+        const Slice& start_user_key = file->smallest.user_key();
+        if (!initialized ||
+            ucmp->Compare(start_user_key, *smallest_user_key) < 0) {
+          *smallest_user_key = start_user_key;
+        }
+        const Slice& end_user_key = file->largest.user_key();
+        if (!initialized ||
+            ucmp->Compare(end_user_key, *largest_user_key) > 0) {
+          *largest_user_key = end_user_key;
+        }
+        initialized = true;
+      }
+    } else {
+      // we only need to consider the first and last file
+      const Slice& start_user_key =
+          storage_info_.LevelFiles(level)[0]->smallest.user_key();
+      if (!initialized ||
+          ucmp->Compare(start_user_key, *smallest_user_key) < 0) {
+        *smallest_user_key = start_user_key;
+      }
+      const Slice& end_user_key =
+          storage_info_.LevelFiles(level).back()->largest.user_key();
+      if (!initialized || ucmp->Compare(end_user_key, *largest_user_key) > 0) {
+        *largest_user_key = end_user_key;
+      }
+      initialized = true;
+    }
+  }
+}
+
+void Version::GetCreationTimeOfOldestFile(uint64_t* creation_time) {
+  uint64_t oldest_time = std::numeric_limits<uint64_t>::max();
+  for (int level = 0; level < storage_info_.num_non_empty_levels_; level++) {
+    for (FileMetaData* meta : storage_info_.LevelFiles(level)) {
+      assert(meta->fd.table_reader != nullptr);
+      uint64_t file_creation_time = meta->TryGetFileCreationTime();
+      if (file_creation_time == kUnknownFileCreationTime) {
+        *creation_time = 0;
+        return;
+      }
+      if (file_creation_time < oldest_time) {
+        oldest_time = file_creation_time;
+      }
+    }
+  }
+  *creation_time = oldest_time;
+}
+
+InternalIterator* Version::TEST_GetLevelIterator(
+    const ReadOptions& read_options, MergeIteratorBuilder* merge_iter_builder,
+    int level, bool allow_unprepared_value) {
+  auto* arena = merge_iter_builder->GetArena();
+  auto* mem = arena->AllocateAligned(sizeof(LevelIterator));
+  TruncatedRangeDelIterator*** tombstone_iter_ptr = nullptr;
+  auto level_iter = new (mem) LevelIterator(
+      cfd_->table_cache(), read_options, file_options_,
+      cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
+      mutable_cf_options_.prefix_extractor, should_sample_file_read(),
+      cfd_->internal_stats()->GetFileReadHist(level),
+      TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
+      mutable_cf_options_.block_protection_bytes_per_key,
+      nullptr /* range_del_agg */, nullptr /* compaction_boundaries */,
+      allow_unprepared_value, &tombstone_iter_ptr);
+  if (read_options.ignore_range_deletions) {
+    merge_iter_builder->AddIterator(level_iter);
+  } else {
+    merge_iter_builder->AddPointAndTombstoneIterator(
+        level_iter, nullptr /* tombstone_iter */, tombstone_iter_ptr);
+  }
+  return level_iter;
+}
+
+uint64_t VersionStorageInfo::GetEstimatedActiveKeys() const {
+  // Estimation will be inaccurate when:
+  // (1) there exist merge keys
+  // (2) keys are directly overwritten
+  // (3) deletion on non-existing keys
+  // (4) low number of samples
+  if (current_num_samples_ == 0) {
+    return 0;
+  }
+
+  if (current_num_non_deletions_ <= current_num_deletions_) {
+    return 0;
+  }
+
+  uint64_t est = current_num_non_deletions_ - current_num_deletions_;
+
+  uint64_t file_count = 0;
+  for (int level = 0; level < num_levels_; ++level) {
+    file_count += files_[level].size();
+  }
+
+  if (current_num_samples_ < file_count) {
+    // casting to avoid overflowing
+    return static_cast<uint64_t>(
+        (est * static_cast<double>(file_count) / current_num_samples_));
+  } else {
+    return est;
+  }
+}
+
+double VersionStorageInfo::GetEstimatedCompressionRatioAtLevel(
+    int level) const {
+  assert(level < num_levels_);
+  uint64_t sum_file_size_bytes = 0;
+  uint64_t sum_data_size_bytes = 0;
+  for (auto* file_meta : files_[level]) {
+    sum_file_size_bytes += file_meta->fd.GetFileSize();
+    sum_data_size_bytes += file_meta->raw_key_size + file_meta->raw_value_size;
+  }
+  if (sum_file_size_bytes == 0) {
+    return -1.0;
+  }
+  return static_cast<double>(sum_data_size_bytes) / sum_file_size_bytes;
+}
+
+void Version::AddIterators(const ReadOptions& read_options,
+                           const FileOptions& soptions,
+                           MergeIteratorBuilder* merge_iter_builder,
+                           bool allow_unprepared_value) {
+  assert(storage_info_.finalized_);
+
+  for (int level = 0; level < storage_info_.num_non_empty_levels(); level++) {
+    AddIteratorsForLevel(read_options, soptions, merge_iter_builder, level,
+                         allow_unprepared_value);
+  }
+}
+
+void Version::AddIteratorsForLevel(const ReadOptions& read_options,
+                                   const FileOptions& soptions,
+                                   MergeIteratorBuilder* merge_iter_builder,
+                                   int level, bool allow_unprepared_value) {
+  assert(storage_info_.finalized_);
+  if (level >= storage_info_.num_non_empty_levels()) {
+    // This is an empty level
+    return;
+  } else if (storage_info_.LevelFilesBrief(level).num_files == 0) {
+    // No files in this level
+    return;
+  }
+
+  bool should_sample = should_sample_file_read();
+
+  auto* arena = merge_iter_builder->GetArena();
+  if (level == 0) {
+    // Merge all level zero files together since they may overlap
+    TruncatedRangeDelIterator* tombstone_iter = nullptr;
+    for (size_t i = 0; i < storage_info_.LevelFilesBrief(0).num_files; i++) {
+      const auto& file = storage_info_.LevelFilesBrief(0).files[i];
+      auto table_iter = cfd_->table_cache()->NewIterator(
+          read_options, soptions, cfd_->internal_comparator(),
+          *file.file_metadata, /*range_del_agg=*/nullptr,
+          mutable_cf_options_.prefix_extractor, nullptr,
+          cfd_->internal_stats()->GetFileReadHist(0),
+          TableReaderCaller::kUserIterator, arena,
+          /*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_,
+          /*smallest_compaction_key=*/nullptr,
+          /*largest_compaction_key=*/nullptr, allow_unprepared_value,
+          mutable_cf_options_.block_protection_bytes_per_key, &tombstone_iter);
+      if (read_options.ignore_range_deletions) {
+        merge_iter_builder->AddIterator(table_iter);
+      } else {
+        merge_iter_builder->AddPointAndTombstoneIterator(table_iter,
+                                                         tombstone_iter);
+      }
+    }
+    if (should_sample) {
+      // Count ones for every L0 files. This is done per iterator creation
+      // rather than Seek(), while files in other levels are recored per seek.
+      // If users execute one range query per iterator, there may be some
+      // discrepancy here.
+      for (FileMetaData* meta : storage_info_.LevelFiles(0)) {
+        sample_file_read_inc(meta);
+      }
+    }
+  } else if (storage_info_.LevelFilesBrief(level).num_files > 0) {
+    // For levels > 0, we can use a concatenating iterator that sequentially
+    // walks through the non-overlapping files in the level, opening them
+    // lazily.
+    auto* mem = arena->AllocateAligned(sizeof(LevelIterator));
+    TruncatedRangeDelIterator*** tombstone_iter_ptr = nullptr;
+    auto level_iter = new (mem) LevelIterator(
+        cfd_->table_cache(), read_options, soptions,
+        cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
+        mutable_cf_options_.prefix_extractor, should_sample_file_read(),
+        cfd_->internal_stats()->GetFileReadHist(level),
+        TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
+        mutable_cf_options_.block_protection_bytes_per_key,
+        /*range_del_agg=*/nullptr,
+        /*compaction_boundaries=*/nullptr, allow_unprepared_value,
+        &tombstone_iter_ptr);
+    if (read_options.ignore_range_deletions) {
+      merge_iter_builder->AddIterator(level_iter);
+    } else {
+      merge_iter_builder->AddPointAndTombstoneIterator(
+          level_iter, nullptr /* tombstone_iter */, tombstone_iter_ptr);
+    }
+  }
+}
+
+Status Version::OverlapWithLevelIterator(const ReadOptions& read_options,
+                                         const FileOptions& file_options,
+                                         const Slice& smallest_user_key,
+                                         const Slice& largest_user_key,
+                                         int level, bool* overlap) {
+  assert(storage_info_.finalized_);
+
+  auto icmp = cfd_->internal_comparator();
+  auto ucmp = icmp.user_comparator();
+
+  Arena arena;
+  Status status;
+  ReadRangeDelAggregator range_del_agg(&icmp,
+                                       kMaxSequenceNumber /* upper_bound */);
+
+  *overlap = false;
+
+  if (level == 0) {
+    for (size_t i = 0; i < storage_info_.LevelFilesBrief(0).num_files; i++) {
+      const auto file = &storage_info_.LevelFilesBrief(0).files[i];
+      if (AfterFile(ucmp, &smallest_user_key, file) ||
+          BeforeFile(ucmp, &largest_user_key, file)) {
+        continue;
+      }
+      ScopedArenaIterator iter(cfd_->table_cache()->NewIterator(
+          read_options, file_options, cfd_->internal_comparator(),
+          *file->file_metadata, &range_del_agg,
+          mutable_cf_options_.prefix_extractor, nullptr,
+          cfd_->internal_stats()->GetFileReadHist(0),
+          TableReaderCaller::kUserIterator, &arena,
+          /*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_,
+          /*smallest_compaction_key=*/nullptr,
+          /*largest_compaction_key=*/nullptr,
+          /*allow_unprepared_value=*/false,
+          mutable_cf_options_.block_protection_bytes_per_key));
+      status = OverlapWithIterator(ucmp, smallest_user_key, largest_user_key,
+                                   iter.get(), overlap);
+      if (!status.ok() || *overlap) {
+        break;
+      }
+    }
+  } else if (storage_info_.LevelFilesBrief(level).num_files > 0) {
+    auto mem = arena.AllocateAligned(sizeof(LevelIterator));
+    ScopedArenaIterator iter(new (mem) LevelIterator(
+        cfd_->table_cache(), read_options, file_options,
+        cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
+        mutable_cf_options_.prefix_extractor, should_sample_file_read(),
+        cfd_->internal_stats()->GetFileReadHist(level),
+        TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
+        mutable_cf_options_.block_protection_bytes_per_key, &range_del_agg,
+        nullptr, false));
+    status = OverlapWithIterator(ucmp, smallest_user_key, largest_user_key,
+                                 iter.get(), overlap);
+  }
+
+  if (status.ok() && *overlap == false &&
+      range_del_agg.IsRangeOverlapped(smallest_user_key, largest_user_key)) {
+    *overlap = true;
+  }
+  return status;
+}
+
+VersionStorageInfo::VersionStorageInfo(
+    const InternalKeyComparator* internal_comparator,
+    const Comparator* user_comparator, int levels,
+    CompactionStyle compaction_style, VersionStorageInfo* ref_vstorage,
+    bool _force_consistency_checks,
+    EpochNumberRequirement epoch_number_requirement)
+    : internal_comparator_(internal_comparator),
+      user_comparator_(user_comparator),
+      // cfd is nullptr if Version is dummy
+      num_levels_(levels),
+      num_non_empty_levels_(0),
+      file_indexer_(user_comparator),
+      compaction_style_(compaction_style),
+      files_(new std::vector<FileMetaData*>[num_levels_]),
+      base_level_(num_levels_ == 1 ? -1 : 1),
+      lowest_unnecessary_level_(-1),
+      level_multiplier_(0.0),
+      files_by_compaction_pri_(num_levels_),
+      level0_non_overlapping_(false),
+      next_file_to_compact_by_size_(num_levels_),
+      compaction_score_(num_levels_),
+      compaction_level_(num_levels_),
+      l0_delay_trigger_count_(0),
+      compact_cursor_(num_levels_),
+      accumulated_file_size_(0),
+      accumulated_raw_key_size_(0),
+      accumulated_raw_value_size_(0),
+      accumulated_num_non_deletions_(0),
+      accumulated_num_deletions_(0),
+      current_num_non_deletions_(0),
+      current_num_deletions_(0),
+      current_num_samples_(0),
+      estimated_compaction_needed_bytes_(0),
+      finalized_(false),
+      force_consistency_checks_(_force_consistency_checks),
+      epoch_number_requirement_(epoch_number_requirement) {
+  if (ref_vstorage != nullptr) {
+    accumulated_file_size_ = ref_vstorage->accumulated_file_size_;
+    accumulated_raw_key_size_ = ref_vstorage->accumulated_raw_key_size_;
+    accumulated_raw_value_size_ = ref_vstorage->accumulated_raw_value_size_;
+    accumulated_num_non_deletions_ =
+        ref_vstorage->accumulated_num_non_deletions_;
+    accumulated_num_deletions_ = ref_vstorage->accumulated_num_deletions_;
+    current_num_non_deletions_ = ref_vstorage->current_num_non_deletions_;
+    current_num_deletions_ = ref_vstorage->current_num_deletions_;
+    current_num_samples_ = ref_vstorage->current_num_samples_;
+    oldest_snapshot_seqnum_ = ref_vstorage->oldest_snapshot_seqnum_;
+    compact_cursor_ = ref_vstorage->compact_cursor_;
+    compact_cursor_.resize(num_levels_);
+  }
+}
+
+Version::Version(ColumnFamilyData* column_family_data, VersionSet* vset,
+                 const FileOptions& file_opt,
+                 const MutableCFOptions mutable_cf_options,
+                 const std::shared_ptr<IOTracer>& io_tracer,
+                 uint64_t version_number,
+                 EpochNumberRequirement epoch_number_requirement)
+    : env_(vset->env_),
+      clock_(vset->clock_),
+      cfd_(column_family_data),
+      info_log_((cfd_ == nullptr) ? nullptr : cfd_->ioptions()->logger),
+      db_statistics_((cfd_ == nullptr) ? nullptr : cfd_->ioptions()->stats),
+      table_cache_((cfd_ == nullptr) ? nullptr : cfd_->table_cache()),
+      blob_source_(cfd_ ? cfd_->blob_source() : nullptr),
+      merge_operator_(
+          (cfd_ == nullptr) ? nullptr : cfd_->ioptions()->merge_operator.get()),
+      storage_info_(
+          (cfd_ == nullptr) ? nullptr : &cfd_->internal_comparator(),
+          (cfd_ == nullptr) ? nullptr : cfd_->user_comparator(),
+          cfd_ == nullptr ? 0 : cfd_->NumberLevels(),
+          cfd_ == nullptr ? kCompactionStyleLevel
+                          : cfd_->ioptions()->compaction_style,
+          (cfd_ == nullptr || cfd_->current() == nullptr)
+              ? nullptr
+              : cfd_->current()->storage_info(),
+          cfd_ == nullptr ? false : cfd_->ioptions()->force_consistency_checks,
+          epoch_number_requirement),
+      vset_(vset),
+      next_(this),
+      prev_(this),
+      refs_(0),
+      file_options_(file_opt),
+      mutable_cf_options_(mutable_cf_options),
+      max_file_size_for_l0_meta_pin_(
+          MaxFileSizeForL0MetaPin(mutable_cf_options_)),
+      version_number_(version_number),
+      io_tracer_(io_tracer),
+      use_async_io_(env_->GetFileSystem()->use_async_io()) {}
+
+Status Version::GetBlob(const ReadOptions& read_options, const Slice& user_key,
+                        const Slice& blob_index_slice,
+                        FilePrefetchBuffer* prefetch_buffer,
+                        PinnableSlice* value, uint64_t* bytes_read) const {
+  BlobIndex blob_index;
+
+  {
+    Status s = blob_index.DecodeFrom(blob_index_slice);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  return GetBlob(read_options, user_key, blob_index, prefetch_buffer, value,
+                 bytes_read);
+}
+
+Status Version::GetBlob(const ReadOptions& read_options, const Slice& user_key,
+                        const BlobIndex& blob_index,
+                        FilePrefetchBuffer* prefetch_buffer,
+                        PinnableSlice* value, uint64_t* bytes_read) const {
+  assert(value);
+
+  if (blob_index.HasTTL() || blob_index.IsInlined()) {
+    return Status::Corruption("Unexpected TTL/inlined blob index");
+  }
+
+  const uint64_t blob_file_number = blob_index.file_number();
+
+  auto blob_file_meta = storage_info_.GetBlobFileMetaData(blob_file_number);
+  if (!blob_file_meta) {
+    return Status::Corruption("Invalid blob file number");
+  }
+
+  assert(blob_source_);
+  value->Reset();
+  const Status s = blob_source_->GetBlob(
+      read_options, user_key, blob_file_number, blob_index.offset(),
+      blob_file_meta->GetBlobFileSize(), blob_index.size(),
+      blob_index.compression(), prefetch_buffer, value, bytes_read);
+
+  return s;
+}
+
+void Version::MultiGetBlob(
+    const ReadOptions& read_options, MultiGetRange& range,
+    std::unordered_map<uint64_t, BlobReadContexts>& blob_ctxs) {
+  assert(!blob_ctxs.empty());
+
+  autovector<BlobFileReadRequests> blob_reqs;
+
+  for (auto& ctx : blob_ctxs) {
+    const auto file_number = ctx.first;
+    const auto blob_file_meta = storage_info_.GetBlobFileMetaData(file_number);
+
+    autovector<BlobReadRequest> blob_reqs_in_file;
+    BlobReadContexts& blobs_in_file = ctx.second;
+    for (auto& blob : blobs_in_file) {
+      const BlobIndex& blob_index = blob.blob_index;
+      const KeyContext* const key_context = blob.key_context;
+      assert(key_context);
+      assert(key_context->get_context);
+      assert(key_context->s);
+
+      if (key_context->value) {
+        key_context->value->Reset();
+      } else {
+        assert(key_context->columns);
+        key_context->columns->Reset();
+      }
+
+      if (!blob_file_meta) {
+        *key_context->s = Status::Corruption("Invalid blob file number");
+        continue;
+      }
+
+      if (blob_index.HasTTL() || blob_index.IsInlined()) {
+        *key_context->s =
+            Status::Corruption("Unexpected TTL/inlined blob index");
+        continue;
+      }
+
+      blob_reqs_in_file.emplace_back(
+          key_context->get_context->ukey_to_get_blob_value(),
+          blob_index.offset(), blob_index.size(), blob_index.compression(),
+          &blob.result, key_context->s);
+    }
+    if (blob_reqs_in_file.size() > 0) {
+      const auto file_size = blob_file_meta->GetBlobFileSize();
+      blob_reqs.emplace_back(file_number, file_size, blob_reqs_in_file);
+    }
+  }
+
+  if (blob_reqs.size() > 0) {
+    blob_source_->MultiGetBlob(read_options, blob_reqs,
+                               /*bytes_read=*/nullptr);
+  }
+
+  for (auto& ctx : blob_ctxs) {
+    BlobReadContexts& blobs_in_file = ctx.second;
+    for (auto& blob : blobs_in_file) {
+      const KeyContext* const key_context = blob.key_context;
+      assert(key_context);
+      assert(key_context->get_context);
+      assert(key_context->s);
+
+      if (key_context->s->ok()) {
+        if (key_context->value) {
+          *key_context->value = std::move(blob.result);
+          range.AddValueSize(key_context->value->size());
+        } else {
+          assert(key_context->columns);
+          key_context->columns->SetPlainValue(std::move(blob.result));
+          range.AddValueSize(key_context->columns->serialized_size());
+        }
+
+        if (range.GetValueSize() > read_options.value_size_soft_limit) {
+          *key_context->s = Status::Aborted();
+        }
+      } else if (key_context->s->IsIncomplete()) {
+        // read_options.read_tier == kBlockCacheTier
+        // Cannot read blob(s): no disk I/O allowed
+        auto& get_context = *(key_context->get_context);
+        get_context.MarkKeyMayExist();
+      }
+    }
+  }
+}
+
+void Version::Get(const ReadOptions& read_options, const LookupKey& k,
+                  PinnableSlice* value, PinnableWideColumns* columns,
+                  std::string* timestamp, Status* status,
+                  MergeContext* merge_context,
+                  SequenceNumber* max_covering_tombstone_seq,
+                  PinnedIteratorsManager* pinned_iters_mgr, bool* value_found,
+                  bool* key_exists, SequenceNumber* seq, ReadCallback* callback,
+                  bool* is_blob, bool do_merge) {
+  Slice ikey = k.internal_key();
+  Slice user_key = k.user_key();
+
+  assert(status->ok() || status->IsMergeInProgress());
+
+  if (key_exists != nullptr) {
+    // will falsify below if not found
+    *key_exists = true;
+  }
+
+  uint64_t tracing_get_id = BlockCacheTraceHelper::kReservedGetId;
+  if (vset_ && vset_->block_cache_tracer_ &&
+      vset_->block_cache_tracer_->is_tracing_enabled()) {
+    tracing_get_id = vset_->block_cache_tracer_->NextGetId();
+  }
+
+  // Note: the old StackableDB-based BlobDB passes in
+  // GetImplOptions::is_blob_index; for the integrated BlobDB implementation, we
+  // need to provide it here.
+  bool is_blob_index = false;
+  bool* const is_blob_to_use = is_blob ? is_blob : &is_blob_index;
+  BlobFetcher blob_fetcher(this, read_options);
+
+  assert(pinned_iters_mgr);
+  GetContext get_context(
+      user_comparator(), merge_operator_, info_log_, db_statistics_,
+      status->ok() ? GetContext::kNotFound : GetContext::kMerge, user_key,
+      do_merge ? value : nullptr, do_merge ? columns : nullptr,
+      do_merge ? timestamp : nullptr, value_found, merge_context, do_merge,
+      max_covering_tombstone_seq, clock_, seq,
+      merge_operator_ ? pinned_iters_mgr : nullptr, callback, is_blob_to_use,
+      tracing_get_id, &blob_fetcher);
+
+  // Pin blocks that we read to hold merge operands
+  if (merge_operator_) {
+    pinned_iters_mgr->StartPinning();
+  }
+
+  FilePicker fp(user_key, ikey, &storage_info_.level_files_brief_,
+                storage_info_.num_non_empty_levels_,
+                &storage_info_.file_indexer_, user_comparator(),
+                internal_comparator());
+  FdWithKeyRange* f = fp.GetNextFile();
+
+  while (f != nullptr) {
+    if (*max_covering_tombstone_seq > 0) {
+      // The remaining files we look at will only contain covered keys, so we
+      // stop here.
+      break;
+    }
+    if (get_context.sample()) {
+      sample_file_read_inc(f->file_metadata);
+    }
+
+    bool timer_enabled =
+        GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex &&
+        get_perf_context()->per_level_perf_context_enabled;
+    StopWatchNano timer(clock_, timer_enabled /* auto_start */);
+    *status = table_cache_->Get(
+        read_options, *internal_comparator(), *f->file_metadata, ikey,
+        &get_context, mutable_cf_options_.block_protection_bytes_per_key,
+        mutable_cf_options_.prefix_extractor,
+        cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
+        IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
+                        fp.IsHitFileLastInLevel()),
+        fp.GetHitFileLevel(), max_file_size_for_l0_meta_pin_);
+    // TODO: examine the behavior for corrupted key
+    if (timer_enabled) {
+      PERF_COUNTER_BY_LEVEL_ADD(get_from_table_nanos, timer.ElapsedNanos(),
+                                fp.GetHitFileLevel());
+    }
+    if (!status->ok()) {
+      if (db_statistics_ != nullptr) {
+        get_context.ReportCounters();
+      }
+      return;
+    }
+
+    // report the counters before returning
+    if (get_context.State() != GetContext::kNotFound &&
+        get_context.State() != GetContext::kMerge &&
+        db_statistics_ != nullptr) {
+      get_context.ReportCounters();
+    }
+    switch (get_context.State()) {
+      case GetContext::kNotFound:
+        // Keep searching in other files
+        break;
+      case GetContext::kMerge:
+        // TODO: update per-level perfcontext user_key_return_count for kMerge
+        break;
+      case GetContext::kFound:
+        if (fp.GetHitFileLevel() == 0) {
+          RecordTick(db_statistics_, GET_HIT_L0);
+        } else if (fp.GetHitFileLevel() == 1) {
+          RecordTick(db_statistics_, GET_HIT_L1);
+        } else if (fp.GetHitFileLevel() >= 2) {
+          RecordTick(db_statistics_, GET_HIT_L2_AND_UP);
+        }
+
+        PERF_COUNTER_BY_LEVEL_ADD(user_key_return_count, 1,
+                                  fp.GetHitFileLevel());
+
+        if (is_blob_index && do_merge && (value || columns)) {
+          assert(!columns ||
+                 (!columns->columns().empty() &&
+                  columns->columns().front().name() == kDefaultWideColumnName));
+
+          Slice blob_index =
+              value ? *value : columns->columns().front().value();
+
+          TEST_SYNC_POINT_CALLBACK("Version::Get::TamperWithBlobIndex",
+                                   &blob_index);
+
+          constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
+
+          PinnableSlice result;
+
+          constexpr uint64_t* bytes_read = nullptr;
+
+          *status = GetBlob(read_options, get_context.ukey_to_get_blob_value(),
+                            blob_index, prefetch_buffer, &result, bytes_read);
+          if (!status->ok()) {
+            if (status->IsIncomplete()) {
+              get_context.MarkKeyMayExist();
+            }
+            return;
+          }
+
+          if (value) {
+            *value = std::move(result);
+          } else {
+            assert(columns);
+            columns->SetPlainValue(std::move(result));
+          }
+        }
+
+        return;
+      case GetContext::kDeleted:
+        // Use empty error message for speed
+        *status = Status::NotFound();
+        return;
+      case GetContext::kCorrupt:
+        *status = Status::Corruption("corrupted key for ", user_key);
+        return;
+      case GetContext::kUnexpectedBlobIndex:
+        ROCKS_LOG_ERROR(info_log_, "Encounter unexpected blob index.");
+        *status = Status::NotSupported(
+            "Encounter unexpected blob index. Please open DB with "
+            "ROCKSDB_NAMESPACE::blob_db::BlobDB instead.");
+        return;
+      case GetContext::kMergeOperatorFailed:
+        *status = Status::Corruption(Status::SubCode::kMergeOperatorFailed);
+        return;
+    }
+    f = fp.GetNextFile();
+  }
+  if (db_statistics_ != nullptr) {
+    get_context.ReportCounters();
+  }
+  if (GetContext::kMerge == get_context.State()) {
+    if (!do_merge) {
+      *status = Status::OK();
+      return;
+    }
+    if (!merge_operator_) {
+      *status = Status::InvalidArgument(
+          "merge_operator is not properly initialized.");
+      return;
+    }
+    // merge_operands are in saver and we hit the beginning of the key history
+    // do a final merge of nullptr and operands;
+    if (value || columns) {
+      std::string result;
+      // `op_failure_scope` (an output parameter) is not provided (set to
+      // nullptr) since a failure must be propagated regardless of its value.
+      *status = MergeHelper::TimedFullMerge(
+          merge_operator_, user_key, nullptr, merge_context->GetOperands(),
+          &result, info_log_, db_statistics_, clock_,
+          /* result_operand */ nullptr, /* update_num_ops_stats */ true,
+          /* op_failure_scope */ nullptr);
+      if (status->ok()) {
+        if (LIKELY(value != nullptr)) {
+          *(value->GetSelf()) = std::move(result);
+          value->PinSelf();
+        } else {
+          assert(columns != nullptr);
+          columns->SetPlainValue(std::move(result));
+        }
+      }
+    }
+  } else {
+    if (key_exists != nullptr) {
+      *key_exists = false;
+    }
+    *status = Status::NotFound();  // Use an empty error message for speed
+  }
+}
+
+void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
+                       ReadCallback* callback) {
+  PinnedIteratorsManager pinned_iters_mgr;
+
+  // Pin blocks that we read to hold merge operands
+  if (merge_operator_) {
+    pinned_iters_mgr.StartPinning();
+  }
+  uint64_t tracing_mget_id = BlockCacheTraceHelper::kReservedGetId;
+
+  if (vset_ && vset_->block_cache_tracer_ &&
+      vset_->block_cache_tracer_->is_tracing_enabled()) {
+    tracing_mget_id = vset_->block_cache_tracer_->NextGetId();
+  }
+  // Even though we know the batch size won't be > MAX_BATCH_SIZE,
+  // use autovector in order to avoid unnecessary construction of GetContext
+  // objects, which is expensive
+  autovector<GetContext, 16> get_ctx;
+  BlobFetcher blob_fetcher(this, read_options);
+  for (auto iter = range->begin(); iter != range->end(); ++iter) {
+    assert(iter->s->ok() || iter->s->IsMergeInProgress());
+    get_ctx.emplace_back(
+        user_comparator(), merge_operator_, info_log_, db_statistics_,
+        iter->s->ok() ? GetContext::kNotFound : GetContext::kMerge,
+        iter->ukey_with_ts, iter->value, iter->columns, iter->timestamp,
+        nullptr, &(iter->merge_context), true,
+        &iter->max_covering_tombstone_seq, clock_, nullptr,
+        merge_operator_ ? &pinned_iters_mgr : nullptr, callback,
+        &iter->is_blob_index, tracing_mget_id, &blob_fetcher);
+    // MergeInProgress status, if set, has been transferred to the get_context
+    // state, so we set status to ok here. From now on, the iter status will
+    // be used for IO errors, and get_context state will be used for any
+    // key level errors
+    *(iter->s) = Status::OK();
+  }
+  int get_ctx_index = 0;
+  for (auto iter = range->begin(); iter != range->end();
+       ++iter, get_ctx_index++) {
+    iter->get_context = &(get_ctx[get_ctx_index]);
+  }
+
+  Status s;
+  // blob_file => [[blob_idx, it], ...]
+  std::unordered_map<uint64_t, BlobReadContexts> blob_ctxs;
+  MultiGetRange keys_with_blobs_range(*range, range->begin(), range->end());
+#if USE_COROUTINES
+  if (read_options.async_io && read_options.optimize_multiget_for_io &&
+      using_coroutines() && use_async_io_) {
+    s = MultiGetAsync(read_options, range, &blob_ctxs);
+  } else
+#endif  // USE_COROUTINES
+  {
+    MultiGetRange file_picker_range(*range, range->begin(), range->end());
+    FilePickerMultiGet fp(&file_picker_range, &storage_info_.level_files_brief_,
+                          storage_info_.num_non_empty_levels_,
+                          &storage_info_.file_indexer_, user_comparator(),
+                          internal_comparator());
+    FdWithKeyRange* f = fp.GetNextFileInLevel();
+    uint64_t num_index_read = 0;
+    uint64_t num_filter_read = 0;
+    uint64_t num_sst_read = 0;
+    uint64_t num_level_read = 0;
+
+    int prev_level = -1;
+
+    while (!fp.IsSearchEnded()) {
+      // This will be set to true later if we actually look up in a file in L0.
+      // For per level stats purposes, an L0 file is treated as a level
+      bool dump_stats_for_l0_file = false;
+
+      // Avoid using the coroutine version if we're looking in a L0 file, since
+      // L0 files won't be parallelized anyway. The regular synchronous version
+      // is faster.
+      if (!read_options.async_io || !using_coroutines() || !use_async_io_ ||
+          fp.GetHitFileLevel() == 0 || !fp.RemainingOverlapInLevel()) {
+        if (f) {
+          bool skip_filters =
+              IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
+                              fp.IsHitFileLastInLevel());
+          // Call MultiGetFromSST for looking up a single file
+          s = MultiGetFromSST(read_options, fp.CurrentFileRange(),
+                              fp.GetHitFileLevel(), skip_filters,
+                              /*skip_range_deletions=*/false, f, blob_ctxs,
+                              /*table_handle=*/nullptr, num_filter_read,
+                              num_index_read, num_sst_read);
+          if (fp.GetHitFileLevel() == 0) {
+            dump_stats_for_l0_file = true;
+          }
+        }
+        if (s.ok()) {
+          f = fp.GetNextFileInLevel();
+        }
+#if USE_COROUTINES
+      } else {
+        std::vector<folly::coro::Task<Status>> mget_tasks;
+        while (f != nullptr) {
+          MultiGetRange file_range = fp.CurrentFileRange();
+          TableCache::TypedHandle* table_handle = nullptr;
+          bool skip_filters =
+              IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
+                              fp.IsHitFileLastInLevel());
+          bool skip_range_deletions = false;
+          if (!skip_filters) {
+            Status status = table_cache_->MultiGetFilter(
+                read_options, *internal_comparator(), *f->file_metadata,
+                mutable_cf_options_.prefix_extractor,
+                cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
+                fp.GetHitFileLevel(), &file_range, &table_handle,
+                mutable_cf_options_.block_protection_bytes_per_key);
+            skip_range_deletions = true;
+            if (status.ok()) {
+              skip_filters = true;
+            } else if (!status.IsNotSupported()) {
+              s = status;
+            }
+          }
+
+          if (!s.ok()) {
+            break;
+          }
+
+          if (!file_range.empty()) {
+            mget_tasks.emplace_back(MultiGetFromSSTCoroutine(
+                read_options, file_range, fp.GetHitFileLevel(), skip_filters,
+                skip_range_deletions, f, blob_ctxs, table_handle,
+                num_filter_read, num_index_read, num_sst_read));
+          }
+          if (fp.KeyMaySpanNextFile()) {
+            break;
+          }
+          f = fp.GetNextFileInLevel();
+        }
+        if (mget_tasks.size() > 0) {
+          RecordTick(db_statistics_, MULTIGET_COROUTINE_COUNT,
+                     mget_tasks.size());
+          // Collect all results so far
+          std::vector<Status> statuses = folly::coro::blockingWait(
+              folly::coro::collectAllRange(std::move(mget_tasks))
+                  .scheduleOn(&range->context()->executor()));
+          if (s.ok()) {
+            for (Status stat : statuses) {
+              if (!stat.ok()) {
+                s = std::move(stat);
+                break;
+              }
+            }
+          }
+
+          if (s.ok() && fp.KeyMaySpanNextFile()) {
+            f = fp.GetNextFileInLevel();
+          }
+        }
+#endif  // USE_COROUTINES
+      }
+      // If bad status or we found final result for all the keys
+      if (!s.ok() || file_picker_range.empty()) {
+        break;
+      }
+      if (!f) {
+        // Reached the end of this level. Prepare the next level
+        fp.PrepareNextLevelForSearch();
+        if (!fp.IsSearchEnded()) {
+          // Its possible there is no overlap on this level and f is nullptr
+          f = fp.GetNextFileInLevel();
+        }
+        if (dump_stats_for_l0_file ||
+            (prev_level != 0 && prev_level != (int)fp.GetHitFileLevel())) {
+          // Dump the stats if the search has moved to the next level and
+          // reset for next level.
+          if (num_filter_read + num_index_read) {
+            RecordInHistogram(db_statistics_,
+                              NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
+                              num_index_read + num_filter_read);
+          }
+          if (num_sst_read) {
+            RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL,
+                              num_sst_read);
+            num_level_read++;
+          }
+          num_filter_read = 0;
+          num_index_read = 0;
+          num_sst_read = 0;
+        }
+        prev_level = fp.GetHitFileLevel();
+      }
+    }
+
+    // Dump stats for most recent level
+    if (num_filter_read + num_index_read) {
+      RecordInHistogram(db_statistics_,
+                        NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
+                        num_index_read + num_filter_read);
+    }
+    if (num_sst_read) {
+      RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read);
+      num_level_read++;
+    }
+    if (num_level_read) {
+      RecordInHistogram(db_statistics_, NUM_LEVEL_READ_PER_MULTIGET,
+                        num_level_read);
+    }
+  }
+
+  if (s.ok() && !blob_ctxs.empty()) {
+    MultiGetBlob(read_options, keys_with_blobs_range, blob_ctxs);
+  }
+
+  // Process any left over keys
+  for (auto iter = range->begin(); s.ok() && iter != range->end(); ++iter) {
+    GetContext& get_context = *iter->get_context;
+    Status* status = iter->s;
+    Slice user_key = iter->lkey->user_key();
+
+    if (db_statistics_ != nullptr) {
+      get_context.ReportCounters();
+    }
+    if (GetContext::kMerge == get_context.State()) {
+      if (!merge_operator_) {
+        *status = Status::InvalidArgument(
+            "merge_operator is not properly initialized.");
+        range->MarkKeyDone(iter);
+        continue;
+      }
+      // merge_operands are in saver and we hit the beginning of the key history
+      // do a final merge of nullptr and operands;
+      std::string result;
+
+      // `op_failure_scope` (an output parameter) is not provided (set to
+      // nullptr) since a failure must be propagated regardless of its value.
+      *status = MergeHelper::TimedFullMerge(
+          merge_operator_, user_key, nullptr, iter->merge_context.GetOperands(),
+          &result, info_log_, db_statistics_, clock_,
+          /* result_operand */ nullptr, /* update_num_ops_stats */ true,
+          /* op_failure_scope */ nullptr);
+      if (LIKELY(iter->value != nullptr)) {
+        *iter->value->GetSelf() = std::move(result);
+        iter->value->PinSelf();
+        range->AddValueSize(iter->value->size());
+      } else {
+        assert(iter->columns);
+        iter->columns->SetPlainValue(std::move(result));
+        range->AddValueSize(iter->columns->serialized_size());
+      }
+
+      range->MarkKeyDone(iter);
+      if (range->GetValueSize() > read_options.value_size_soft_limit) {
+        s = Status::Aborted();
+        break;
+      }
+    } else {
+      range->MarkKeyDone(iter);
+      *status = Status::NotFound();  // Use an empty error message for speed
+    }
+  }
+
+  for (auto iter = range->begin(); iter != range->end(); ++iter) {
+    range->MarkKeyDone(iter);
+    *(iter->s) = s;
+  }
+}
+
+#ifdef USE_COROUTINES
+Status Version::ProcessBatch(
+    const ReadOptions& read_options, FilePickerMultiGet* batch,
+    std::vector<folly::coro::Task<Status>>& mget_tasks,
+    std::unordered_map<uint64_t, BlobReadContexts>* blob_ctxs,
+    autovector<FilePickerMultiGet, 4>& batches, std::deque<size_t>& waiting,
+    std::deque<size_t>& to_process, unsigned int& num_tasks_queued,
+    std::unordered_map<int, std::tuple<uint64_t, uint64_t, uint64_t>>&
+        mget_stats) {
+  FilePickerMultiGet& fp = *batch;
+  MultiGetRange range = fp.GetRange();
+  // Initialize a new empty range. Any keys that are not in this level will
+  // eventually become part of the new range.
+  MultiGetRange leftover(range, range.begin(), range.begin());
+  FdWithKeyRange* f = nullptr;
+  Status s;
+
+  f = fp.GetNextFileInLevel();
+  while (!f) {
+    fp.PrepareNextLevelForSearch();
+    if (!fp.IsSearchEnded()) {
+      f = fp.GetNextFileInLevel();
+    } else {
+      break;
+    }
+  }
+  while (f) {
+    MultiGetRange file_range = fp.CurrentFileRange();
+    TableCache::TypedHandle* table_handle = nullptr;
+    bool skip_filters = IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
+                                        fp.IsHitFileLastInLevel());
+    bool skip_range_deletions = false;
+    if (!skip_filters) {
+      Status status = table_cache_->MultiGetFilter(
+          read_options, *internal_comparator(), *f->file_metadata,
+          mutable_cf_options_.prefix_extractor,
+          cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
+          fp.GetHitFileLevel(), &file_range, &table_handle,
+          mutable_cf_options_.block_protection_bytes_per_key);
+      if (status.ok()) {
+        skip_filters = true;
+        skip_range_deletions = true;
+      } else if (!status.IsNotSupported()) {
+        s = status;
+      }
+    }
+    if (!s.ok()) {
+      break;
+    }
+    // At this point, file_range contains any keys that are likely in this
+    // file. It may have false positives, but that's ok since higher level
+    // lookups for the key are dependent on this lookup anyway.
+    // Add the complement of file_range to leftover. That's the set of keys
+    // definitely not in this level.
+    // Subtract the complement of file_range from range, since they will be
+    // processed in a separate batch in parallel.
+    leftover += ~file_range;
+    range -= ~file_range;
+    if (!file_range.empty()) {
+      int level = fp.GetHitFileLevel();
+      auto stat = mget_stats.find(level);
+      if (stat == mget_stats.end()) {
+        auto entry = mget_stats.insert({level, {0, 0, 0}});
+        assert(entry.second);
+        stat = entry.first;
+      }
+
+      if (waiting.empty() && to_process.empty() &&
+          !fp.RemainingOverlapInLevel() && leftover.empty() &&
+          mget_tasks.empty()) {
+        // All keys are in one SST file, so take the fast path
+        s = MultiGetFromSST(read_options, file_range, fp.GetHitFileLevel(),
+                            skip_filters, skip_range_deletions, f, *blob_ctxs,
+                            table_handle, std::get<0>(stat->second),
+                            std::get<1>(stat->second),
+                            std::get<2>(stat->second));
+      } else {
+        mget_tasks.emplace_back(MultiGetFromSSTCoroutine(
+            read_options, file_range, fp.GetHitFileLevel(), skip_filters,
+            skip_range_deletions, f, *blob_ctxs, table_handle,
+            std::get<0>(stat->second), std::get<1>(stat->second),
+            std::get<2>(stat->second)));
+        ++num_tasks_queued;
+      }
+    }
+    if (fp.KeyMaySpanNextFile() && !file_range.empty()) {
+      break;
+    }
+    f = fp.GetNextFileInLevel();
+  }
+  // Split the current batch only if some keys are likely in this level and
+  // some are not. Only split if we're done with this level, i.e f is null.
+  // Otherwise, it means there are more files in this level to look at.
+  if (s.ok() && !f && !leftover.empty() && !range.empty()) {
+    fp.ReplaceRange(range);
+    batches.emplace_back(&leftover, fp);
+    to_process.emplace_back(batches.size() - 1);
+  }
+  // 1. If f is non-null, that means we might not be done with this level.
+  //    This can happen if one of the keys is the last key in the file, i.e
+  //    fp.KeyMaySpanNextFile() is true.
+  // 2. If range is empty, then we're done with this range and no need to
+  //    prepare the next level
+  // 3. If some tasks were queued for this range, then the next level will be
+  //    prepared after executing those tasks
+  if (!f && !range.empty() && !num_tasks_queued) {
+    fp.PrepareNextLevelForSearch();
+  }
+  return s;
+}
+
+Status Version::MultiGetAsync(
+    const ReadOptions& options, MultiGetRange* range,
+    std::unordered_map<uint64_t, BlobReadContexts>* blob_ctxs) {
+  autovector<FilePickerMultiGet, 4> batches;
+  std::deque<size_t> waiting;
+  std::deque<size_t> to_process;
+  Status s;
+  std::vector<folly::coro::Task<Status>> mget_tasks;
+  std::unordered_map<int, std::tuple<uint64_t, uint64_t, uint64_t>> mget_stats;
+
+  // Create the initial batch with the input range
+  batches.emplace_back(range, &storage_info_.level_files_brief_,
+                       storage_info_.num_non_empty_levels_,
+                       &storage_info_.file_indexer_, user_comparator(),
+                       internal_comparator());
+  to_process.emplace_back(0);
+
+  while (!to_process.empty()) {
+    // As we process a batch, it may get split into two. So reserve space for
+    // an additional batch in the autovector in order to prevent later moves
+    // of elements in ProcessBatch().
+    batches.reserve(batches.size() + 1);
+
+    size_t idx = to_process.front();
+    FilePickerMultiGet* batch = &batches.at(idx);
+    unsigned int num_tasks_queued = 0;
+    to_process.pop_front();
+    if (batch->IsSearchEnded() || batch->GetRange().empty()) {
+      // If to_process is empty, i.e no more batches to look at, then we need
+      // schedule the enqueued coroutines and wait for them. Otherwise, we
+      // skip this batch and move to the next one in to_process.
+      if (!to_process.empty()) {
+        continue;
+      }
+    } else {
+      // Look through one level. This may split the batch and enqueue it to
+      // to_process
+      s = ProcessBatch(options, batch, mget_tasks, blob_ctxs, batches, waiting,
+                       to_process, num_tasks_queued, mget_stats);
+      // If ProcessBatch didn't enqueue any coroutine tasks, it means all
+      // keys were filtered out. So put the batch back in to_process to
+      // lookup in the next level
+      if (!num_tasks_queued && !batch->IsSearchEnded()) {
+        // Put this back in the processing queue
+        to_process.emplace_back(idx);
+      } else if (num_tasks_queued) {
+        waiting.emplace_back(idx);
+      }
+    }
+    // If ProcessBatch() returned an error, then schedule the enqueued
+    // coroutines and wait for them, then abort the MultiGet.
+    if (to_process.empty() || !s.ok()) {
+      if (mget_tasks.size() > 0) {
+        assert(waiting.size());
+        RecordTick(db_statistics_, MULTIGET_COROUTINE_COUNT, mget_tasks.size());
+        // Collect all results so far
+        std::vector<Status> statuses = folly::coro::blockingWait(
+            folly::coro::collectAllRange(std::move(mget_tasks))
+                .scheduleOn(&range->context()->executor()));
+        mget_tasks.clear();
+        if (s.ok()) {
+          for (Status stat : statuses) {
+            if (!stat.ok()) {
+              s = std::move(stat);
+              break;
+            }
+          }
+        }
+
+        if (!s.ok()) {
+          break;
+        }
+
+        for (size_t wait_idx : waiting) {
+          FilePickerMultiGet& fp = batches.at(wait_idx);
+          // 1. If fp.GetHitFile() is non-null, then there could be more
+          // overlap in this level. So skip preparing next level.
+          // 2. If fp.GetRange() is empty, then this batch is completed
+          // and no need to prepare the next level.
+          if (!fp.GetHitFile() && !fp.GetRange().empty()) {
+            fp.PrepareNextLevelForSearch();
+          }
+        }
+        to_process.swap(waiting);
+      } else {
+        assert(!s.ok() || waiting.size() == 0);
+      }
+    }
+    if (!s.ok()) {
+      break;
+    }
+  }
+
+  uint64_t num_levels = 0;
+  for (auto& stat : mget_stats) {
+    if (stat.first == 0) {
+      num_levels += std::get<2>(stat.second);
+    } else {
+      num_levels++;
+    }
+
+    uint64_t num_meta_reads =
+        std::get<0>(stat.second) + std::get<1>(stat.second);
+    uint64_t num_sst_reads = std::get<2>(stat.second);
+    if (num_meta_reads > 0) {
+      RecordInHistogram(db_statistics_,
+                        NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
+                        num_meta_reads);
+    }
+    if (num_sst_reads > 0) {
+      RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_reads);
+    }
+  }
+  if (num_levels > 0) {
+    RecordInHistogram(db_statistics_, NUM_LEVEL_READ_PER_MULTIGET, num_levels);
+  }
+
+  return s;
+}
+#endif
+
+bool Version::IsFilterSkipped(int level, bool is_file_last_in_level) {
+  // Reaching the bottom level implies misses at all upper levels, so we'll
+  // skip checking the filters when we predict a hit.
+  return cfd_->ioptions()->optimize_filters_for_hits &&
+         (level > 0 || is_file_last_in_level) &&
+         level == storage_info_.num_non_empty_levels() - 1;
+}
+
+void VersionStorageInfo::GenerateLevelFilesBrief() {
+  level_files_brief_.resize(num_non_empty_levels_);
+  for (int level = 0; level < num_non_empty_levels_; level++) {
+    DoGenerateLevelFilesBrief(&level_files_brief_[level], files_[level],
+                              &arena_);
+  }
+}
+
+void VersionStorageInfo::PrepareForVersionAppend(
+    const ImmutableOptions& immutable_options,
+    const MutableCFOptions& mutable_cf_options) {
+  ComputeCompensatedSizes();
+  UpdateNumNonEmptyLevels();
+  CalculateBaseBytes(immutable_options, mutable_cf_options);
+  UpdateFilesByCompactionPri(immutable_options, mutable_cf_options);
+  GenerateFileIndexer();
+  GenerateLevelFilesBrief();
+  GenerateLevel0NonOverlapping();
+  if (!immutable_options.allow_ingest_behind) {
+    GenerateBottommostFiles();
+  }
+  GenerateFileLocationIndex();
+}
+
+void Version::PrepareAppend(const MutableCFOptions& mutable_cf_options,
+                            const ReadOptions& read_options,
+                            bool update_stats) {
+  TEST_SYNC_POINT_CALLBACK(
+      "Version::PrepareAppend:forced_check",
+      reinterpret_cast<void*>(&storage_info_.force_consistency_checks_));
+
+  if (update_stats) {
+    UpdateAccumulatedStats(read_options);
+  }
+
+  storage_info_.PrepareForVersionAppend(*cfd_->ioptions(), mutable_cf_options);
+}
+
+bool Version::MaybeInitializeFileMetaData(const ReadOptions& read_options,
+                                          FileMetaData* file_meta) {
+  if (file_meta->init_stats_from_file || file_meta->compensated_file_size > 0) {
+    return false;
+  }
+  std::shared_ptr<const TableProperties> tp;
+  Status s = GetTableProperties(read_options, &tp, file_meta);
+  file_meta->init_stats_from_file = true;
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(vset_->db_options_->info_log,
+                    "Unable to load table properties for file %" PRIu64
+                    " --- %s\n",
+                    file_meta->fd.GetNumber(), s.ToString().c_str());
+    return false;
+  }
+  if (tp.get() == nullptr) return false;
+  file_meta->num_entries = tp->num_entries;
+  file_meta->num_deletions = tp->num_deletions;
+  file_meta->raw_value_size = tp->raw_value_size;
+  file_meta->raw_key_size = tp->raw_key_size;
+  file_meta->num_range_deletions = tp->num_range_deletions;
+  return true;
+}
+
+void VersionStorageInfo::UpdateAccumulatedStats(FileMetaData* file_meta) {
+  TEST_SYNC_POINT_CALLBACK("VersionStorageInfo::UpdateAccumulatedStats",
+                           nullptr);
+
+  assert(file_meta->init_stats_from_file);
+  accumulated_file_size_ += file_meta->fd.GetFileSize();
+  accumulated_raw_key_size_ += file_meta->raw_key_size;
+  accumulated_raw_value_size_ += file_meta->raw_value_size;
+  accumulated_num_non_deletions_ +=
+      file_meta->num_entries - file_meta->num_deletions;
+  accumulated_num_deletions_ += file_meta->num_deletions;
+
+  current_num_non_deletions_ +=
+      file_meta->num_entries - file_meta->num_deletions;
+  current_num_deletions_ += file_meta->num_deletions;
+  current_num_samples_++;
+}
+
+void VersionStorageInfo::RemoveCurrentStats(FileMetaData* file_meta) {
+  if (file_meta->init_stats_from_file) {
+    current_num_non_deletions_ -=
+        file_meta->num_entries - file_meta->num_deletions;
+    current_num_deletions_ -= file_meta->num_deletions;
+    current_num_samples_--;
+  }
+}
+
+void Version::UpdateAccumulatedStats(const ReadOptions& read_options) {
+  // maximum number of table properties loaded from files.
+  const int kMaxInitCount = 20;
+  int init_count = 0;
+  // here only the first kMaxInitCount files which haven't been
+  // initialized from file will be updated with num_deletions.
+  // The motivation here is to cap the maximum I/O per Version creation.
+  // The reason for choosing files from lower-level instead of higher-level
+  // is that such design is able to propagate the initialization from
+  // lower-level to higher-level:  When the num_deletions of lower-level
+  // files are updated, it will make the lower-level files have accurate
+  // compensated_file_size, making lower-level to higher-level compaction
+  // will be triggered, which creates higher-level files whose num_deletions
+  // will be updated here.
+  for (int level = 0;
+       level < storage_info_.num_levels_ && init_count < kMaxInitCount;
+       ++level) {
+    for (auto* file_meta : storage_info_.files_[level]) {
+      if (MaybeInitializeFileMetaData(read_options, file_meta)) {
+        // each FileMeta will be initialized only once.
+        storage_info_.UpdateAccumulatedStats(file_meta);
+        // when option "max_open_files" is -1, all the file metadata has
+        // already been read, so MaybeInitializeFileMetaData() won't incur
+        // any I/O cost. "max_open_files=-1" means that the table cache passed
+        // to the VersionSet and then to the ColumnFamilySet has a size of
+        // TableCache::kInfiniteCapacity
+        if (vset_->GetColumnFamilySet()->get_table_cache()->GetCapacity() ==
+            TableCache::kInfiniteCapacity) {
+          continue;
+        }
+        if (++init_count >= kMaxInitCount) {
+          break;
+        }
+      }
+    }
+  }
+  // In case all sampled-files contain only deletion entries, then we
+  // load the table-property of a file in higher-level to initialize
+  // that value.
+  for (int level = storage_info_.num_levels_ - 1;
+       storage_info_.accumulated_raw_value_size_ == 0 && level >= 0; --level) {
+    for (int i = static_cast<int>(storage_info_.files_[level].size()) - 1;
+         storage_info_.accumulated_raw_value_size_ == 0 && i >= 0; --i) {
+      if (MaybeInitializeFileMetaData(read_options,
+                                      storage_info_.files_[level][i])) {
+        storage_info_.UpdateAccumulatedStats(storage_info_.files_[level][i]);
+      }
+    }
+  }
+}
+
+void VersionStorageInfo::ComputeCompensatedSizes() {
+  static const int kDeletionWeightOnCompaction = 2;
+  uint64_t average_value_size = GetAverageValueSize();
+
+  // compute the compensated size
+  for (int level = 0; level < num_levels_; level++) {
+    for (auto* file_meta : files_[level]) {
+      // Here we only compute compensated_file_size for those file_meta
+      // which compensated_file_size is uninitialized (== 0). This is true only
+      // for files that have been created right now and no other thread has
+      // access to them. That's why we can safely mutate compensated_file_size.
+      if (file_meta->compensated_file_size == 0) {
+        file_meta->compensated_file_size = file_meta->fd.GetFileSize();
+        // Here we only boost the size of deletion entries of a file only
+        // when the number of deletion entries is greater than the number of
+        // non-deletion entries in the file.  The motivation here is that in
+        // a stable workload, the number of deletion entries should be roughly
+        // equal to the number of non-deletion entries.  If we compensate the
+        // size of deletion entries in a stable workload, the deletion
+        // compensation logic might introduce unwanted effet which changes the
+        // shape of LSM tree.
+        if ((file_meta->num_deletions - file_meta->num_range_deletions) * 2 >=
+            file_meta->num_entries) {
+          file_meta->compensated_file_size +=
+              ((file_meta->num_deletions - file_meta->num_range_deletions) * 2 -
+               file_meta->num_entries) *
+              average_value_size * kDeletionWeightOnCompaction;
+        }
+        file_meta->compensated_file_size +=
+            file_meta->compensated_range_deletion_size;
+      }
+    }
+  }
+}
+
+int VersionStorageInfo::MaxInputLevel() const {
+  if (compaction_style_ == kCompactionStyleLevel) {
+    return num_levels() - 2;
+  }
+  return 0;
+}
+
+int VersionStorageInfo::MaxOutputLevel(bool allow_ingest_behind) const {
+  if (allow_ingest_behind) {
+    assert(num_levels() > 1);
+    return num_levels() - 2;
+  }
+  return num_levels() - 1;
+}
+
+void VersionStorageInfo::EstimateCompactionBytesNeeded(
+    const MutableCFOptions& mutable_cf_options) {
+  // Only implemented for level-based compaction
+  if (compaction_style_ != kCompactionStyleLevel) {
+    estimated_compaction_needed_bytes_ = 0;
+    return;
+  }
+
+  // Start from Level 0, if level 0 qualifies compaction to level 1,
+  // we estimate the size of compaction.
+  // Then we move on to the next level and see whether it qualifies compaction
+  // to the next level. The size of the level is estimated as the actual size
+  // on the level plus the input bytes from the previous level if there is any.
+  // If it exceeds, take the exceeded bytes as compaction input and add the size
+  // of the compaction size to tatal size.
+  // We keep doing it to Level 2, 3, etc, until the last level and return the
+  // accumulated bytes.
+
+  uint64_t bytes_compact_to_next_level = 0;
+  uint64_t level_size = 0;
+  for (auto* f : files_[0]) {
+    level_size += f->fd.GetFileSize();
+  }
+  // Level 0
+  bool level0_compact_triggered = false;
+  if (static_cast<int>(files_[0].size()) >=
+          mutable_cf_options.level0_file_num_compaction_trigger ||
+      level_size >= mutable_cf_options.max_bytes_for_level_base) {
+    level0_compact_triggered = true;
+    estimated_compaction_needed_bytes_ = level_size;
+    bytes_compact_to_next_level = level_size;
+  } else {
+    estimated_compaction_needed_bytes_ = 0;
+  }
+
+  // Level 1 and up.
+  uint64_t bytes_next_level = 0;
+  for (int level = base_level(); level <= MaxInputLevel(); level++) {
+    level_size = 0;
+    if (bytes_next_level > 0) {
+#ifndef NDEBUG
+      uint64_t level_size2 = 0;
+      for (auto* f : files_[level]) {
+        level_size2 += f->fd.GetFileSize();
+      }
+      assert(level_size2 == bytes_next_level);
+#endif
+      level_size = bytes_next_level;
+      bytes_next_level = 0;
+    } else {
+      for (auto* f : files_[level]) {
+        level_size += f->fd.GetFileSize();
+      }
+    }
+    if (level == base_level() && level0_compact_triggered) {
+      // Add base level size to compaction if level0 compaction triggered.
+      estimated_compaction_needed_bytes_ += level_size;
+    }
+    // Add size added by previous compaction
+    level_size += bytes_compact_to_next_level;
+    bytes_compact_to_next_level = 0;
+    uint64_t level_target = MaxBytesForLevel(level);
+    if (level_size > level_target) {
+      bytes_compact_to_next_level = level_size - level_target;
+      // Estimate the actual compaction fan-out ratio as size ratio between
+      // the two levels.
+
+      assert(bytes_next_level == 0);
+      if (level + 1 < num_levels_) {
+        for (auto* f : files_[level + 1]) {
+          bytes_next_level += f->fd.GetFileSize();
+        }
+      }
+      if (bytes_next_level > 0) {
+        assert(level_size > 0);
+        estimated_compaction_needed_bytes_ += static_cast<uint64_t>(
+            static_cast<double>(bytes_compact_to_next_level) *
+            (static_cast<double>(bytes_next_level) /
+                 static_cast<double>(level_size) +
+             1));
+      }
+    }
+  }
+}
+
+namespace {
+uint32_t GetExpiredTtlFilesCount(const ImmutableOptions& ioptions,
+                                 const MutableCFOptions& mutable_cf_options,
+                                 const std::vector<FileMetaData*>& files) {
+  uint32_t ttl_expired_files_count = 0;
+
+  int64_t _current_time;
+  auto status = ioptions.clock->GetCurrentTime(&_current_time);
+  if (status.ok()) {
+    const uint64_t current_time = static_cast<uint64_t>(_current_time);
+    for (FileMetaData* f : files) {
+      if (!f->being_compacted) {
+        uint64_t oldest_ancester_time = f->TryGetOldestAncesterTime();
+        if (oldest_ancester_time != 0 &&
+            oldest_ancester_time < (current_time - mutable_cf_options.ttl)) {
+          ttl_expired_files_count++;
+        }
+      }
+    }
+  }
+  return ttl_expired_files_count;
+}
+
+bool ShouldChangeFileTemperature(const ImmutableOptions& ioptions,
+                                 const MutableCFOptions& mutable_cf_options,
+                                 const std::vector<FileMetaData*>& files) {
+  const std::vector<FileTemperatureAge>& ages =
+      mutable_cf_options.compaction_options_fifo
+          .file_temperature_age_thresholds;
+  if (ages.empty()) {
+    return false;
+  }
+  if (files.empty()) {
+    return false;
+  }
+  int64_t _current_time;
+  auto status = ioptions.clock->GetCurrentTime(&_current_time);
+  const uint64_t current_time = static_cast<uint64_t>(_current_time);
+  // We use oldest_ancestor_time of a file to be the estimate age of
+  // the file just older than it. This is the same logic used in
+  // FIFOCompactionPicker::PickTemperatureChangeCompaction().
+  if (status.ok() && current_time >= ages[0].age) {
+    uint64_t create_time_threshold = current_time - ages[0].age;
+    Temperature target_temp;
+    assert(files.size() >= 1);
+    for (size_t index = files.size() - 1; index >= 1; --index) {
+      FileMetaData* cur_file = files[index];
+      FileMetaData* prev_file = files[index - 1];
+      if (!cur_file->being_compacted) {
+        uint64_t oldest_ancestor_time = prev_file->TryGetOldestAncesterTime();
+        if (oldest_ancestor_time == kUnknownOldestAncesterTime) {
+          return false;
+        }
+        if (oldest_ancestor_time > create_time_threshold) {
+          return false;
+        }
+        target_temp = ages[0].temperature;
+        for (size_t i = 1; i < ages.size(); ++i) {
+          if (current_time >= ages[i].age &&
+              oldest_ancestor_time <= current_time - ages[i].age) {
+            target_temp = ages[i].temperature;
+          }
+        }
+        if (cur_file->temperature != target_temp) {
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+}  // anonymous namespace
+
+void VersionStorageInfo::ComputeCompactionScore(
+    const ImmutableOptions& immutable_options,
+    const MutableCFOptions& mutable_cf_options) {
+  double total_downcompact_bytes = 0.0;
+  // Historically, score is defined as actual bytes in a level divided by
+  // the level's target size, and 1.0 is the threshold for triggering
+  // compaction. Higher score means higher prioritization.
+  // Now we keep the compaction triggering condition, but consider more
+  // factors for prioritization, while still keeping the 1.0 threshold.
+  // In order to provide flexibility for reducing score while still
+  // maintaining it to be over 1.0, we scale the original score by 10x
+  // if it is larger than 1.0.
+  const double kScoreScale = 10.0;
+  for (int level = 0; level <= MaxInputLevel(); level++) {
+    double score;
+    if (level == 0) {
+      // We treat level-0 specially by bounding the number of files
+      // instead of number of bytes for two reasons:
+      //
+      // (1) With larger write-buffer sizes, it is nice not to do too
+      // many level-0 compactions.
+      //
+      // (2) The files in level-0 are merged on every read and
+      // therefore we wish to avoid too many files when the individual
+      // file size is small (perhaps because of a small write-buffer
+      // setting, or very high compression ratios, or lots of
+      // overwrites/deletions).
+      int num_sorted_runs = 0;
+      uint64_t total_size = 0;
+      for (auto* f : files_[level]) {
+        total_downcompact_bytes += static_cast<double>(f->fd.GetFileSize());
+        if (!f->being_compacted) {
+          total_size += f->compensated_file_size;
+          num_sorted_runs++;
+        }
+      }
+      if (compaction_style_ == kCompactionStyleUniversal) {
+        // For universal compaction, we use level0 score to indicate
+        // compaction score for the whole DB. Adding other levels as if
+        // they are L0 files.
+        for (int i = 1; i < num_levels(); i++) {
+          // It's possible that a subset of the files in a level may be in a
+          // compaction, due to delete triggered compaction or trivial move.
+          // In that case, the below check may not catch a level being
+          // compacted as it only checks the first file. The worst that can
+          // happen is a scheduled compaction thread will find nothing to do.
+          if (!files_[i].empty() && !files_[i][0]->being_compacted) {
+            num_sorted_runs++;
+          }
+        }
+      }
+
+      if (compaction_style_ == kCompactionStyleFIFO) {
+        score = static_cast<double>(total_size) /
+                mutable_cf_options.compaction_options_fifo.max_table_files_size;
+        if (score < 1 &&
+            mutable_cf_options.compaction_options_fifo.allow_compaction) {
+          score = std::max(
+              static_cast<double>(num_sorted_runs) /
+                  mutable_cf_options.level0_file_num_compaction_trigger,
+              score);
+        }
+        if (score < 1 && mutable_cf_options.ttl > 0) {
+          score =
+              std::max(static_cast<double>(GetExpiredTtlFilesCount(
+                           immutable_options, mutable_cf_options, files_[0])),
+                       score);
+        }
+        if (score < 1 &&
+            ShouldChangeFileTemperature(immutable_options, mutable_cf_options,
+                                        files_[0])) {
+          // For FIFO, just need a large enough score to trigger compaction.
+          const double kScoreForNeedCompaction = 1.1;
+          score = kScoreForNeedCompaction;
+        }
+      } else {
+        score = static_cast<double>(num_sorted_runs) /
+                mutable_cf_options.level0_file_num_compaction_trigger;
+        if (compaction_style_ == kCompactionStyleLevel && num_levels() > 1) {
+          // Level-based involves L0->L0 compactions that can lead to oversized
+          // L0 files. Take into account size as well to avoid later giant
+          // compactions to the base level.
+          // If score in L0 is always too high, L0->L1 will always be
+          // prioritized over L1->L2 compaction and L1 will accumulate to
+          // too large. But if L0 score isn't high enough, L0 will accumulate
+          // and data is not moved to L1 fast enough. With potential L0->L0
+          // compaction, number of L0 files aren't always an indication of
+          // L0 oversizing, and we also need to consider total size of L0.
+          if (immutable_options.level_compaction_dynamic_level_bytes) {
+            if (total_size >= mutable_cf_options.max_bytes_for_level_base) {
+              // When calculating estimated_compaction_needed_bytes, we assume
+              // L0 is qualified as pending compactions. We will need to make
+              // sure that it qualifies for compaction.
+              // It might be guaranteed by logic below anyway, but we are
+              // explicit here to make sure we don't stop writes with no
+              // compaction scheduled.
+              score = std::max(score, 1.01);
+            }
+            if (total_size > level_max_bytes_[base_level_]) {
+              // In this case, we compare L0 size with actual L1 size and make
+              // sure score is more than 1.0 (10.0 after scaled) if L0 is larger
+              // than L1. Since in this case L1 score is lower than 10.0, L0->L1
+              // is prioritized over L1->L2.
+              uint64_t base_level_size = 0;
+              for (auto f : files_[base_level_]) {
+                base_level_size += f->compensated_file_size;
+              }
+              score = std::max(score, static_cast<double>(total_size) /
+                                          static_cast<double>(std::max(
+                                              base_level_size,
+                                              level_max_bytes_[base_level_])));
+            }
+            if (score > 1.0) {
+              score *= kScoreScale;
+            }
+          } else {
+            score = std::max(score,
+                             static_cast<double>(total_size) /
+                                 mutable_cf_options.max_bytes_for_level_base);
+          }
+        }
+      }
+    } else {  // level > 0
+      // Compute the ratio of current size to size limit.
+      uint64_t level_bytes_no_compacting = 0;
+      uint64_t level_total_bytes = 0;
+      for (auto f : files_[level]) {
+        level_total_bytes += f->fd.GetFileSize();
+        if (!f->being_compacted) {
+          level_bytes_no_compacting += f->compensated_file_size;
+        }
+      }
+      if (!immutable_options.level_compaction_dynamic_level_bytes) {
+        score = static_cast<double>(level_bytes_no_compacting) /
+                MaxBytesForLevel(level);
+      } else {
+        if (level_bytes_no_compacting < MaxBytesForLevel(level)) {
+          score = static_cast<double>(level_bytes_no_compacting) /
+                  MaxBytesForLevel(level);
+        } else {
+          // If there are a large mount of data being compacted down to the
+          // current level soon, we would de-prioritize compaction from
+          // a level where the incoming data would be a large ratio. We do
+          // it by dividing level size not by target level size, but
+          // the target size and the incoming compaction bytes.
+          score = static_cast<double>(level_bytes_no_compacting) /
+                  (MaxBytesForLevel(level) + total_downcompact_bytes) *
+                  kScoreScale;
+        }
+        // Drain unnecessary levels, but with lower priority compared to
+        // when L0 is eligible. Only non-empty levels can be unnecessary.
+        // If there is no unnecessary levels, lowest_unnecessary_level_ = -1.
+        if (level_bytes_no_compacting > 0 &&
+            level <= lowest_unnecessary_level_) {
+          score = std::max(
+              score, kScoreScale *
+                         (1.001 + 0.001 * (lowest_unnecessary_level_ - level)));
+        }
+      }
+      if (level <= lowest_unnecessary_level_) {
+        total_downcompact_bytes += level_total_bytes;
+      } else if (level_total_bytes > MaxBytesForLevel(level)) {
+        total_downcompact_bytes +=
+            static_cast<double>(level_total_bytes - MaxBytesForLevel(level));
+      }
+    }
+    compaction_level_[level] = level;
+    compaction_score_[level] = score;
+  }
+
+  // sort all the levels based on their score. Higher scores get listed
+  // first. Use bubble sort because the number of entries are small.
+  for (int i = 0; i < num_levels() - 2; i++) {
+    for (int j = i + 1; j < num_levels() - 1; j++) {
+      if (compaction_score_[i] < compaction_score_[j]) {
+        double score = compaction_score_[i];
+        int level = compaction_level_[i];
+        compaction_score_[i] = compaction_score_[j];
+        compaction_level_[i] = compaction_level_[j];
+        compaction_score_[j] = score;
+        compaction_level_[j] = level;
+      }
+    }
+  }
+  ComputeFilesMarkedForCompaction();
+  if (!immutable_options.allow_ingest_behind) {
+    ComputeBottommostFilesMarkedForCompaction();
+  }
+  if (mutable_cf_options.ttl > 0) {
+    ComputeExpiredTtlFiles(immutable_options, mutable_cf_options.ttl);
+  }
+  if (mutable_cf_options.periodic_compaction_seconds > 0) {
+    ComputeFilesMarkedForPeriodicCompaction(
+        immutable_options, mutable_cf_options.periodic_compaction_seconds);
+  }
+
+  if (mutable_cf_options.enable_blob_garbage_collection &&
+      mutable_cf_options.blob_garbage_collection_age_cutoff > 0.0 &&
+      mutable_cf_options.blob_garbage_collection_force_threshold < 1.0) {
+    ComputeFilesMarkedForForcedBlobGC(
+        mutable_cf_options.blob_garbage_collection_age_cutoff,
+        mutable_cf_options.blob_garbage_collection_force_threshold);
+  }
+
+  EstimateCompactionBytesNeeded(mutable_cf_options);
+}
+
+void VersionStorageInfo::ComputeFilesMarkedForCompaction() {
+  files_marked_for_compaction_.clear();
+  int last_qualify_level = 0;
+
+  // Do not include files from the last level with data
+  // If table properties collector suggests a file on the last level,
+  // we should not move it to a new level.
+  for (int level = num_levels() - 1; level >= 1; level--) {
+    if (!files_[level].empty()) {
+      last_qualify_level = level - 1;
+      break;
+    }
+  }
+
+  for (int level = 0; level <= last_qualify_level; level++) {
+    for (auto* f : files_[level]) {
+      if (!f->being_compacted && f->marked_for_compaction) {
+        files_marked_for_compaction_.emplace_back(level, f);
+      }
+    }
+  }
+}
+
+void VersionStorageInfo::ComputeExpiredTtlFiles(
+    const ImmutableOptions& ioptions, const uint64_t ttl) {
+  assert(ttl > 0);
+
+  expired_ttl_files_.clear();
+
+  int64_t _current_time;
+  auto status = ioptions.clock->GetCurrentTime(&_current_time);
+  if (!status.ok()) {
+    return;
+  }
+  const uint64_t current_time = static_cast<uint64_t>(_current_time);
+
+  for (int level = 0; level < num_levels() - 1; level++) {
+    for (FileMetaData* f : files_[level]) {
+      if (!f->being_compacted) {
+        uint64_t oldest_ancester_time = f->TryGetOldestAncesterTime();
+        if (oldest_ancester_time > 0 &&
+            oldest_ancester_time < (current_time - ttl)) {
+          expired_ttl_files_.emplace_back(level, f);
+        }
+      }
+    }
+  }
+}
+
+void VersionStorageInfo::ComputeFilesMarkedForPeriodicCompaction(
+    const ImmutableOptions& ioptions,
+    const uint64_t periodic_compaction_seconds) {
+  assert(periodic_compaction_seconds > 0);
+
+  files_marked_for_periodic_compaction_.clear();
+
+  int64_t temp_current_time;
+  auto status = ioptions.clock->GetCurrentTime(&temp_current_time);
+  if (!status.ok()) {
+    return;
+  }
+  const uint64_t current_time = static_cast<uint64_t>(temp_current_time);
+
+  // If periodic_compaction_seconds is larger than current time, periodic
+  // compaction can't possibly be triggered.
+  if (periodic_compaction_seconds > current_time) {
+    return;
+  }
+
+  const uint64_t allowed_time_limit =
+      current_time - periodic_compaction_seconds;
+
+  for (int level = 0; level < num_levels(); level++) {
+    for (auto f : files_[level]) {
+      if (!f->being_compacted) {
+        // Compute a file's modification time in the following order:
+        // 1. Use file_creation_time table property if it is > 0.
+        // 2. Use creation_time table property if it is > 0.
+        // 3. Use file's mtime metadata if the above two table properties are 0.
+        // Don't consider the file at all if the modification time cannot be
+        // correctly determined based on the above conditions.
+        uint64_t file_modification_time = f->TryGetFileCreationTime();
+        if (file_modification_time == kUnknownFileCreationTime) {
+          file_modification_time = f->TryGetOldestAncesterTime();
+        }
+        if (file_modification_time == kUnknownOldestAncesterTime) {
+          auto file_path = TableFileName(ioptions.cf_paths, f->fd.GetNumber(),
+                                         f->fd.GetPathId());
+          status = ioptions.env->GetFileModificationTime(
+              file_path, &file_modification_time);
+          if (!status.ok()) {
+            ROCKS_LOG_WARN(ioptions.logger,
+                           "Can't get file modification time: %s: %s",
+                           file_path.c_str(), status.ToString().c_str());
+            continue;
+          }
+        }
+        if (file_modification_time > 0 &&
+            file_modification_time < allowed_time_limit) {
+          files_marked_for_periodic_compaction_.emplace_back(level, f);
+        }
+      }
+    }
+  }
+}
+
+void VersionStorageInfo::ComputeFilesMarkedForForcedBlobGC(
+    double blob_garbage_collection_age_cutoff,
+    double blob_garbage_collection_force_threshold) {
+  files_marked_for_forced_blob_gc_.clear();
+
+  if (blob_files_.empty()) {
+    return;
+  }
+
+  // Number of blob files eligible for GC based on age
+  const size_t cutoff_count = static_cast<size_t>(
+      blob_garbage_collection_age_cutoff * blob_files_.size());
+  if (!cutoff_count) {
+    return;
+  }
+
+  // Compute the sum of total and garbage bytes over the oldest batch of blob
+  // files. The oldest batch is defined as the set of blob files which are
+  // kept alive by the same SSTs as the very oldest one. Here is a toy example.
+  // Let's assume we have three SSTs 1, 2, and 3, and four blob files 10, 11,
+  // 12, and 13. Also, let's say SSTs 1 and 2 both rely on blob file 10 and
+  // potentially some higher-numbered ones, while SST 3 relies on blob file 12
+  // and potentially some higher-numbered ones. Then, the SST to oldest blob
+  // file mapping is as follows:
+  //
+  // SST file number               Oldest blob file number
+  // 1                             10
+  // 2                             10
+  // 3                             12
+  //
+  // This is what the same thing looks like from the blob files' POV. (Note that
+  // the linked SSTs simply denote the inverse mapping of the above.)
+  //
+  // Blob file number              Linked SST set
+  // 10                            {1, 2}
+  // 11                            {}
+  // 12                            {3}
+  // 13                            {}
+  //
+  // Then, the oldest batch of blob files consists of blob files 10 and 11,
+  // and we can get rid of them by forcing the compaction of SSTs 1 and 2.
+  //
+  // Note that the overall ratio of garbage computed for the batch has to exceed
+  // blob_garbage_collection_force_threshold and the entire batch has to be
+  // eligible for GC according to blob_garbage_collection_age_cutoff in order
+  // for us to schedule any compactions.
+  const auto& oldest_meta = blob_files_.front();
+  assert(oldest_meta);
+
+  const auto& linked_ssts = oldest_meta->GetLinkedSsts();
+  assert(!linked_ssts.empty());
+
+  size_t count = 1;
+  uint64_t sum_total_blob_bytes = oldest_meta->GetTotalBlobBytes();
+  uint64_t sum_garbage_blob_bytes = oldest_meta->GetGarbageBlobBytes();
+
+  assert(cutoff_count <= blob_files_.size());
+
+  for (; count < cutoff_count; ++count) {
+    const auto& meta = blob_files_[count];
+    assert(meta);
+
+    if (!meta->GetLinkedSsts().empty()) {
+      // Found the beginning of the next batch of blob files
+      break;
+    }
+
+    sum_total_blob_bytes += meta->GetTotalBlobBytes();
+    sum_garbage_blob_bytes += meta->GetGarbageBlobBytes();
+  }
+
+  if (count < blob_files_.size()) {
+    const auto& meta = blob_files_[count];
+    assert(meta);
+
+    if (meta->GetLinkedSsts().empty()) {
+      // Some files in the oldest batch are not eligible for GC
+      return;
+    }
+  }
+
+  if (sum_garbage_blob_bytes <
+      blob_garbage_collection_force_threshold * sum_total_blob_bytes) {
+    return;
+  }
+
+  for (uint64_t sst_file_number : linked_ssts) {
+    const FileLocation location = GetFileLocation(sst_file_number);
+    assert(location.IsValid());
+
+    const int level = location.GetLevel();
+    assert(level >= 0);
+
+    const size_t pos = location.GetPosition();
+
+    FileMetaData* const sst_meta = files_[level][pos];
+    assert(sst_meta);
+
+    if (sst_meta->being_compacted) {
+      continue;
+    }
+
+    files_marked_for_forced_blob_gc_.emplace_back(level, sst_meta);
+  }
+}
+
+namespace {
+
+// used to sort files by size
+struct Fsize {
+  size_t index;
+  FileMetaData* file;
+};
+
+// Comparator that is used to sort files based on their size
+// In normal mode: descending size
+bool CompareCompensatedSizeDescending(const Fsize& first, const Fsize& second) {
+  return (first.file->compensated_file_size >
+          second.file->compensated_file_size);
+}
+}  // anonymous namespace
+
+void VersionStorageInfo::AddFile(int level, FileMetaData* f) {
+  auto& level_files = files_[level];
+  level_files.push_back(f);
+
+  f->refs++;
+}
+
+void VersionStorageInfo::AddBlobFile(
+    std::shared_ptr<BlobFileMetaData> blob_file_meta) {
+  assert(blob_file_meta);
+
+  assert(blob_files_.empty() ||
+         (blob_files_.back() && blob_files_.back()->GetBlobFileNumber() <
+                                    blob_file_meta->GetBlobFileNumber()));
+
+  blob_files_.emplace_back(std::move(blob_file_meta));
+}
+
+VersionStorageInfo::BlobFiles::const_iterator
+VersionStorageInfo::GetBlobFileMetaDataLB(uint64_t blob_file_number) const {
+  return std::lower_bound(
+      blob_files_.begin(), blob_files_.end(), blob_file_number,
+      [](const std::shared_ptr<BlobFileMetaData>& lhs, uint64_t rhs) {
+        assert(lhs);
+        return lhs->GetBlobFileNumber() < rhs;
+      });
+}
+
+void VersionStorageInfo::SetFinalized() {
+  finalized_ = true;
+
+#ifndef NDEBUG
+  if (compaction_style_ != kCompactionStyleLevel) {
+    // Not level based compaction.
+    return;
+  }
+  assert(base_level_ < 0 || num_levels() == 1 ||
+         (base_level_ >= 1 && base_level_ < num_levels()));
+  // Verify all levels newer than base_level are empty except L0
+  for (int level = 1; level < base_level(); level++) {
+    assert(NumLevelBytes(level) == 0);
+  }
+  uint64_t max_bytes_prev_level = 0;
+  for (int level = base_level(); level < num_levels() - 1; level++) {
+    if (LevelFiles(level).size() == 0) {
+      continue;
+    }
+    assert(MaxBytesForLevel(level) >= max_bytes_prev_level);
+    max_bytes_prev_level = MaxBytesForLevel(level);
+  }
+  for (int level = 0; level < num_levels(); level++) {
+    assert(LevelFiles(level).size() == 0 ||
+           LevelFiles(level).size() == LevelFilesBrief(level).num_files);
+    if (LevelFiles(level).size() > 0) {
+      assert(level < num_non_empty_levels());
+    }
+  }
+  assert(compaction_level_.size() > 0);
+  assert(compaction_level_.size() == compaction_score_.size());
+#endif
+}
+
+void VersionStorageInfo::UpdateNumNonEmptyLevels() {
+  num_non_empty_levels_ = num_levels_;
+  for (int i = num_levels_ - 1; i >= 0; i--) {
+    if (files_[i].size() != 0) {
+      return;
+    } else {
+      num_non_empty_levels_ = i;
+    }
+  }
+}
+
+namespace {
+// Sort `temp` based on ratio of overlapping size over file size
+void SortFileByOverlappingRatio(
+    const InternalKeyComparator& icmp, const std::vector<FileMetaData*>& files,
+    const std::vector<FileMetaData*>& next_level_files, SystemClock* clock,
+    int level, int num_non_empty_levels, uint64_t ttl,
+    std::vector<Fsize>* temp) {
+  std::unordered_map<uint64_t, uint64_t> file_to_order;
+  auto next_level_it = next_level_files.begin();
+
+  int64_t curr_time;
+  Status status = clock->GetCurrentTime(&curr_time);
+  if (!status.ok()) {
+    // If we can't get time, disable TTL.
+    ttl = 0;
+  }
+
+  FileTtlBooster ttl_booster(static_cast<uint64_t>(curr_time), ttl,
+                             num_non_empty_levels, level);
+
+  for (auto& file : files) {
+    uint64_t overlapping_bytes = 0;
+    // Skip files in next level that is smaller than current file
+    while (next_level_it != next_level_files.end() &&
+           icmp.Compare((*next_level_it)->largest, file->smallest) < 0) {
+      next_level_it++;
+    }
+
+    while (next_level_it != next_level_files.end() &&
+           icmp.Compare((*next_level_it)->smallest, file->largest) < 0) {
+      overlapping_bytes += (*next_level_it)->fd.file_size;
+
+      if (icmp.Compare((*next_level_it)->largest, file->largest) > 0) {
+        // next level file cross large boundary of current file.
+        break;
+      }
+      next_level_it++;
+    }
+
+    uint64_t ttl_boost_score = (ttl > 0) ? ttl_booster.GetBoostScore(file) : 1;
+    assert(ttl_boost_score > 0);
+    assert(file->compensated_file_size != 0);
+    file_to_order[file->fd.GetNumber()] = overlapping_bytes * 1024U /
+                                          file->compensated_file_size /
+                                          ttl_boost_score;
+  }
+
+  size_t num_to_sort = temp->size() > VersionStorageInfo::kNumberFilesToSort
+                           ? VersionStorageInfo::kNumberFilesToSort
+                           : temp->size();
+
+  std::partial_sort(temp->begin(), temp->begin() + num_to_sort, temp->end(),
+                    [&](const Fsize& f1, const Fsize& f2) -> bool {
+                      // If score is the same, pick file with smaller keys.
+                      // This makes the algorithm more deterministic, and also
+                      // help the trivial move case to have more files to
+                      // extend.
+                      if (file_to_order[f1.file->fd.GetNumber()] ==
+                          file_to_order[f2.file->fd.GetNumber()]) {
+                        return icmp.Compare(f1.file->smallest,
+                                            f2.file->smallest) < 0;
+                      }
+                      return file_to_order[f1.file->fd.GetNumber()] <
+                             file_to_order[f2.file->fd.GetNumber()];
+                    });
+}
+
+void SortFileByRoundRobin(const InternalKeyComparator& icmp,
+                          std::vector<InternalKey>* compact_cursor,
+                          bool level0_non_overlapping, int level,
+                          std::vector<Fsize>* temp) {
+  if (level == 0 && !level0_non_overlapping) {
+    // Using kOldestSmallestSeqFirst when level === 0, since the
+    // files may overlap (not fully sorted)
+    std::sort(temp->begin(), temp->end(),
+              [](const Fsize& f1, const Fsize& f2) -> bool {
+                return f1.file->fd.smallest_seqno < f2.file->fd.smallest_seqno;
+              });
+    return;
+  }
+
+  bool should_move_files =
+      compact_cursor->at(level).size() > 0 && temp->size() > 1;
+
+  // The iterator points to the Fsize with smallest key larger than or equal to
+  // the given cursor
+  std::vector<Fsize>::iterator current_file_iter;
+  if (should_move_files) {
+    // Find the file of which the smallest key is larger than or equal to
+    // the cursor (the smallest key in the successor file of the last
+    // chosen file), skip this if the cursor is invalid or there is only
+    // one file in this level
+    current_file_iter = std::lower_bound(
+        temp->begin(), temp->end(), compact_cursor->at(level),
+        [&](const Fsize& f, const InternalKey& cursor) -> bool {
+          return icmp.Compare(cursor, f.file->smallest) > 0;
+        });
+
+    should_move_files =
+        current_file_iter != temp->end() && current_file_iter != temp->begin();
+  }
+  if (should_move_files) {
+    // Construct a local temporary vector
+    std::vector<Fsize> local_temp;
+    local_temp.reserve(temp->size());
+    // Move the selected File into the first position and its successors
+    // into the second, third, ..., positions
+    for (auto iter = current_file_iter; iter != temp->end(); iter++) {
+      local_temp.push_back(*iter);
+    }
+    // Move the origin predecessors of the selected file in a round-robin
+    // manner
+    for (auto iter = temp->begin(); iter != current_file_iter; iter++) {
+      local_temp.push_back(*iter);
+    }
+    // Replace all the items in temp
+    for (size_t i = 0; i < local_temp.size(); i++) {
+      temp->at(i) = local_temp[i];
+    }
+  }
+}
+}  // anonymous namespace
+
+void VersionStorageInfo::UpdateFilesByCompactionPri(
+    const ImmutableOptions& ioptions, const MutableCFOptions& options) {
+  if (compaction_style_ == kCompactionStyleNone ||
+      compaction_style_ == kCompactionStyleFIFO ||
+      compaction_style_ == kCompactionStyleUniversal) {
+    // don't need this
+    return;
+  }
+  // No need to sort the highest level because it is never compacted.
+  for (int level = 0; level < num_levels() - 1; level++) {
+    const std::vector<FileMetaData*>& files = files_[level];
+    auto& files_by_compaction_pri = files_by_compaction_pri_[level];
+    assert(files_by_compaction_pri.size() == 0);
+
+    // populate a temp vector for sorting based on size
+    std::vector<Fsize> temp(files.size());
+    for (size_t i = 0; i < files.size(); i++) {
+      temp[i].index = i;
+      temp[i].file = files[i];
+    }
+
+    // sort the top number_of_files_to_sort_ based on file size
+    size_t num = VersionStorageInfo::kNumberFilesToSort;
+    if (num > temp.size()) {
+      num = temp.size();
+    }
+    switch (ioptions.compaction_pri) {
+      case kByCompensatedSize:
+        std::partial_sort(temp.begin(), temp.begin() + num, temp.end(),
+                          CompareCompensatedSizeDescending);
+        break;
+      case kOldestLargestSeqFirst:
+        std::sort(temp.begin(), temp.end(),
+                  [](const Fsize& f1, const Fsize& f2) -> bool {
+                    return f1.file->fd.largest_seqno <
+                           f2.file->fd.largest_seqno;
+                  });
+        break;
+      case kOldestSmallestSeqFirst:
+        std::sort(temp.begin(), temp.end(),
+                  [](const Fsize& f1, const Fsize& f2) -> bool {
+                    return f1.file->fd.smallest_seqno <
+                           f2.file->fd.smallest_seqno;
+                  });
+        break;
+      case kMinOverlappingRatio:
+        SortFileByOverlappingRatio(*internal_comparator_, files_[level],
+                                   files_[level + 1], ioptions.clock, level,
+                                   num_non_empty_levels_, options.ttl, &temp);
+        break;
+      case kRoundRobin:
+        SortFileByRoundRobin(*internal_comparator_, &compact_cursor_,
+                             level0_non_overlapping_, level, &temp);
+        break;
+      default:
+        assert(false);
+    }
+    assert(temp.size() == files.size());
+
+    // initialize files_by_compaction_pri_
+    for (size_t i = 0; i < temp.size(); i++) {
+      files_by_compaction_pri.push_back(static_cast<int>(temp[i].index));
+    }
+    next_file_to_compact_by_size_[level] = 0;
+    assert(files_[level].size() == files_by_compaction_pri_[level].size());
+  }
+}
+
+void VersionStorageInfo::GenerateLevel0NonOverlapping() {
+  assert(!finalized_);
+  level0_non_overlapping_ = true;
+  if (level_files_brief_.size() == 0) {
+    return;
+  }
+
+  // A copy of L0 files sorted by smallest key
+  std::vector<FdWithKeyRange> level0_sorted_file(
+      level_files_brief_[0].files,
+      level_files_brief_[0].files + level_files_brief_[0].num_files);
+  std::sort(level0_sorted_file.begin(), level0_sorted_file.end(),
+            [this](const FdWithKeyRange& f1, const FdWithKeyRange& f2) -> bool {
+              return (internal_comparator_->Compare(f1.smallest_key,
+                                                    f2.smallest_key) < 0);
+            });
+
+  for (size_t i = 1; i < level0_sorted_file.size(); ++i) {
+    FdWithKeyRange& f = level0_sorted_file[i];
+    FdWithKeyRange& prev = level0_sorted_file[i - 1];
+    if (internal_comparator_->Compare(prev.largest_key, f.smallest_key) >= 0) {
+      level0_non_overlapping_ = false;
+      break;
+    }
+  }
+}
+
+void VersionStorageInfo::GenerateBottommostFiles() {
+  assert(!finalized_);
+  assert(bottommost_files_.empty());
+  for (size_t level = 0; level < level_files_brief_.size(); ++level) {
+    for (size_t file_idx = 0; file_idx < level_files_brief_[level].num_files;
+         ++file_idx) {
+      const FdWithKeyRange& f = level_files_brief_[level].files[file_idx];
+      int l0_file_idx;
+      if (level == 0) {
+        l0_file_idx = static_cast<int>(file_idx);
+      } else {
+        l0_file_idx = -1;
+      }
+      Slice smallest_user_key = ExtractUserKey(f.smallest_key);
+      Slice largest_user_key = ExtractUserKey(f.largest_key);
+      if (!RangeMightExistAfterSortedRun(smallest_user_key, largest_user_key,
+                                         static_cast<int>(level),
+                                         l0_file_idx)) {
+        bottommost_files_.emplace_back(static_cast<int>(level),
+                                       f.file_metadata);
+      }
+    }
+  }
+}
+
+void VersionStorageInfo::GenerateFileLocationIndex() {
+  size_t num_files = 0;
+
+  for (int level = 0; level < num_levels_; ++level) {
+    num_files += files_[level].size();
+  }
+
+  file_locations_.reserve(num_files);
+
+  for (int level = 0; level < num_levels_; ++level) {
+    for (size_t pos = 0; pos < files_[level].size(); ++pos) {
+      const FileMetaData* const meta = files_[level][pos];
+      assert(meta);
+
+      const uint64_t file_number = meta->fd.GetNumber();
+
+      assert(file_locations_.find(file_number) == file_locations_.end());
+      file_locations_.emplace(file_number, FileLocation(level, pos));
+    }
+  }
+}
+
+void VersionStorageInfo::UpdateOldestSnapshot(SequenceNumber seqnum) {
+  assert(seqnum >= oldest_snapshot_seqnum_);
+  oldest_snapshot_seqnum_ = seqnum;
+  if (oldest_snapshot_seqnum_ > bottommost_files_mark_threshold_) {
+    ComputeBottommostFilesMarkedForCompaction();
+  }
+}
+
+void VersionStorageInfo::ComputeBottommostFilesMarkedForCompaction() {
+  bottommost_files_marked_for_compaction_.clear();
+  bottommost_files_mark_threshold_ = kMaxSequenceNumber;
+  for (auto& level_and_file : bottommost_files_) {
+    if (!level_and_file.second->being_compacted &&
+        level_and_file.second->fd.largest_seqno != 0) {
+      // largest_seqno might be nonzero due to containing the final key in an
+      // earlier compaction, whose seqnum we didn't zero out. Multiple deletions
+      // ensures the file really contains deleted or overwritten keys.
+      if (level_and_file.second->fd.largest_seqno < oldest_snapshot_seqnum_) {
+        bottommost_files_marked_for_compaction_.push_back(level_and_file);
+      } else {
+        bottommost_files_mark_threshold_ =
+            std::min(bottommost_files_mark_threshold_,
+                     level_and_file.second->fd.largest_seqno);
+      }
+    }
+  }
+}
+
+void Version::Ref() { ++refs_; }
+
+bool Version::Unref() {
+  assert(refs_ >= 1);
+  --refs_;
+  if (refs_ == 0) {
+    delete this;
+    return true;
+  }
+  return false;
+}
+
+bool VersionStorageInfo::OverlapInLevel(int level,
+                                        const Slice* smallest_user_key,
+                                        const Slice* largest_user_key) {
+  if (level >= num_non_empty_levels_) {
+    // empty level, no overlap
+    return false;
+  }
+  return SomeFileOverlapsRange(*internal_comparator_, (level > 0),
+                               level_files_brief_[level], smallest_user_key,
+                               largest_user_key);
+}
+
+// Store in "*inputs" all files in "level" that overlap [begin,end]
+// If hint_index is specified, then it points to a file in the
+// overlapping range.
+// The file_index returns a pointer to any file in an overlapping range.
+void VersionStorageInfo::GetOverlappingInputs(
+    int level, const InternalKey* begin, const InternalKey* end,
+    std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
+    bool expand_range, InternalKey** next_smallest) const {
+  if (level >= num_non_empty_levels_) {
+    // this level is empty, no overlapping inputs
+    return;
+  }
+
+  inputs->clear();
+  if (file_index) {
+    *file_index = -1;
+  }
+  const Comparator* user_cmp = user_comparator_;
+  if (level > 0) {
+    GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs, hint_index,
+                                          file_index, false, next_smallest);
+    return;
+  }
+
+  if (next_smallest) {
+    // next_smallest key only makes sense for non-level 0, where files are
+    // non-overlapping
+    *next_smallest = nullptr;
+  }
+
+  Slice user_begin, user_end;
+  if (begin != nullptr) {
+    user_begin = begin->user_key();
+  }
+  if (end != nullptr) {
+    user_end = end->user_key();
+  }
+
+  // index stores the file index need to check.
+  std::list<size_t> index;
+  for (size_t i = 0; i < level_files_brief_[level].num_files; i++) {
+    index.emplace_back(i);
+  }
+
+  while (!index.empty()) {
+    bool found_overlapping_file = false;
+    auto iter = index.begin();
+    while (iter != index.end()) {
+      FdWithKeyRange* f = &(level_files_brief_[level].files[*iter]);
+      const Slice file_start = ExtractUserKey(f->smallest_key);
+      const Slice file_limit = ExtractUserKey(f->largest_key);
+      if (begin != nullptr &&
+          user_cmp->CompareWithoutTimestamp(file_limit, user_begin) < 0) {
+        // "f" is completely before specified range; skip it
+        iter++;
+      } else if (end != nullptr &&
+                 user_cmp->CompareWithoutTimestamp(file_start, user_end) > 0) {
+        // "f" is completely after specified range; skip it
+        iter++;
+      } else {
+        // if overlap
+        inputs->emplace_back(files_[level][*iter]);
+        found_overlapping_file = true;
+        // record the first file index.
+        if (file_index && *file_index == -1) {
+          *file_index = static_cast<int>(*iter);
+        }
+        // the related file is overlap, erase to avoid checking again.
+        iter = index.erase(iter);
+        if (expand_range) {
+          if (begin != nullptr &&
+              user_cmp->CompareWithoutTimestamp(file_start, user_begin) < 0) {
+            user_begin = file_start;
+          }
+          if (end != nullptr &&
+              user_cmp->CompareWithoutTimestamp(file_limit, user_end) > 0) {
+            user_end = file_limit;
+          }
+        }
+      }
+    }
+    // if all the files left are not overlap, break
+    if (!found_overlapping_file) {
+      break;
+    }
+  }
+}
+
+// Store in "*inputs" files in "level" that within range [begin,end]
+// Guarantee a "clean cut" boundary between the files in inputs
+// and the surrounding files and the maxinum number of files.
+// This will ensure that no parts of a key are lost during compaction.
+// If hint_index is specified, then it points to a file in the range.
+// The file_index returns a pointer to any file in an overlapping range.
+void VersionStorageInfo::GetCleanInputsWithinInterval(
+    int level, const InternalKey* begin, const InternalKey* end,
+    std::vector<FileMetaData*>* inputs, int hint_index, int* file_index) const {
+  inputs->clear();
+  if (file_index) {
+    *file_index = -1;
+  }
+  if (level >= num_non_empty_levels_ || level == 0 ||
+      level_files_brief_[level].num_files == 0) {
+    // this level is empty, no inputs within range
+    // also don't support clean input interval within L0
+    return;
+  }
+
+  GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs, hint_index,
+                                        file_index, true /* within_interval */);
+}
+
+// Store in "*inputs" all files in "level" that overlap [begin,end]
+// Employ binary search to find at least one file that overlaps the
+// specified range. From that file, iterate backwards and
+// forwards to find all overlapping files.
+// if within_range is set, then only store the maximum clean inputs
+// within range [begin, end]. "clean" means there is a boundary
+// between the files in "*inputs" and the surrounding files
+void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
+    int level, const InternalKey* begin, const InternalKey* end,
+    std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
+    bool within_interval, InternalKey** next_smallest) const {
+  assert(level > 0);
+
+  auto user_cmp = user_comparator_;
+  const FdWithKeyRange* files = level_files_brief_[level].files;
+  const int num_files = static_cast<int>(level_files_brief_[level].num_files);
+
+  // begin to use binary search to find lower bound
+  // and upper bound.
+  int start_index = 0;
+  int end_index = num_files;
+
+  if (begin != nullptr) {
+    // if within_interval is true, with file_key would find
+    // not overlapping ranges in std::lower_bound.
+    auto cmp = [&user_cmp, &within_interval](const FdWithKeyRange& f,
+                                             const InternalKey* k) {
+      auto& file_key = within_interval ? f.file_metadata->smallest
+                                       : f.file_metadata->largest;
+      return sstableKeyCompare(user_cmp, file_key, *k) < 0;
+    };
+
+    start_index = static_cast<int>(
+        std::lower_bound(files,
+                         files + (hint_index == -1 ? num_files : hint_index),
+                         begin, cmp) -
+        files);
+
+    if (start_index > 0 && within_interval) {
+      bool is_overlapping = true;
+      while (is_overlapping && start_index < num_files) {
+        auto& pre_limit = files[start_index - 1].file_metadata->largest;
+        auto& cur_start = files[start_index].file_metadata->smallest;
+        is_overlapping = sstableKeyCompare(user_cmp, pre_limit, cur_start) == 0;
+        start_index += is_overlapping;
+      }
+    }
+  }
+
+  if (end != nullptr) {
+    // if within_interval is true, with file_key would find
+    // not overlapping ranges in std::upper_bound.
+    auto cmp = [&user_cmp, &within_interval](const InternalKey* k,
+                                             const FdWithKeyRange& f) {
+      auto& file_key = within_interval ? f.file_metadata->largest
+                                       : f.file_metadata->smallest;
+      return sstableKeyCompare(user_cmp, *k, file_key) < 0;
+    };
+
+    end_index = static_cast<int>(
+        std::upper_bound(files + start_index, files + num_files, end, cmp) -
+        files);
+
+    if (end_index < num_files && within_interval) {
+      bool is_overlapping = true;
+      while (is_overlapping && end_index > start_index) {
+        auto& next_start = files[end_index].file_metadata->smallest;
+        auto& cur_limit = files[end_index - 1].file_metadata->largest;
+        is_overlapping =
+            sstableKeyCompare(user_cmp, cur_limit, next_start) == 0;
+        end_index -= is_overlapping;
+      }
+    }
+  }
+
+  assert(start_index <= end_index);
+
+  // If there were no overlapping files, return immediately.
+  if (start_index == end_index) {
+    if (next_smallest) {
+      *next_smallest = nullptr;
+    }
+    return;
+  }
+
+  assert(start_index < end_index);
+
+  // returns the index where an overlap is found
+  if (file_index) {
+    *file_index = start_index;
+  }
+
+  // insert overlapping files into vector
+  for (int i = start_index; i < end_index; i++) {
+    inputs->push_back(files_[level][i]);
+  }
+
+  if (next_smallest != nullptr) {
+    // Provide the next key outside the range covered by inputs
+    if (end_index < static_cast<int>(files_[level].size())) {
+      **next_smallest = files_[level][end_index]->smallest;
+    } else {
+      *next_smallest = nullptr;
+    }
+  }
+}
+
+uint64_t VersionStorageInfo::NumLevelBytes(int level) const {
+  assert(level >= 0);
+  assert(level < num_levels());
+  return TotalFileSize(files_[level]);
+}
+
+const char* VersionStorageInfo::LevelSummary(
+    LevelSummaryStorage* scratch) const {
+  int len = 0;
+  if (compaction_style_ == kCompactionStyleLevel && num_levels() > 1) {
+    assert(base_level_ < static_cast<int>(level_max_bytes_.size()));
+    if (level_multiplier_ != 0.0) {
+      len = snprintf(
+          scratch->buffer, sizeof(scratch->buffer),
+          "base level %d level multiplier %.2f max bytes base %" PRIu64 " ",
+          base_level_, level_multiplier_, level_max_bytes_[base_level_]);
+    }
+  }
+  len +=
+      snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, "files[");
+  for (int i = 0; i < num_levels(); i++) {
+    int sz = sizeof(scratch->buffer) - len;
+    int ret = snprintf(scratch->buffer + len, sz, "%d ", int(files_[i].size()));
+    if (ret < 0 || ret >= sz) break;
+    len += ret;
+  }
+  if (len > 0) {
+    // overwrite the last space
+    --len;
+  }
+  len += snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len,
+                  "] max score %.2f", compaction_score_[0]);
+
+  if (!files_marked_for_compaction_.empty()) {
+    snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len,
+             " (%" ROCKSDB_PRIszt " files need compaction)",
+             files_marked_for_compaction_.size());
+  }
+
+  return scratch->buffer;
+}
+
+const char* VersionStorageInfo::LevelFileSummary(FileSummaryStorage* scratch,
+                                                 int level) const {
+  int len = snprintf(scratch->buffer, sizeof(scratch->buffer), "files_size[");
+  for (const auto& f : files_[level]) {
+    int sz = sizeof(scratch->buffer) - len;
+    char sztxt[16];
+    AppendHumanBytes(f->fd.GetFileSize(), sztxt, sizeof(sztxt));
+    int ret = snprintf(scratch->buffer + len, sz,
+                       "#%" PRIu64 "(seq=%" PRIu64 ",sz=%s,%d) ",
+                       f->fd.GetNumber(), f->fd.smallest_seqno, sztxt,
+                       static_cast<int>(f->being_compacted));
+    if (ret < 0 || ret >= sz) break;
+    len += ret;
+  }
+  // overwrite the last space (only if files_[level].size() is non-zero)
+  if (files_[level].size() && len > 0) {
+    --len;
+  }
+  snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, "]");
+  return scratch->buffer;
+}
+
+bool VersionStorageInfo::HasMissingEpochNumber() const {
+  for (int level = 0; level < num_levels_; ++level) {
+    for (const FileMetaData* f : files_[level]) {
+      if (f->epoch_number == kUnknownEpochNumber) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+uint64_t VersionStorageInfo::GetMaxEpochNumberOfFiles() const {
+  uint64_t max_epoch_number = kUnknownEpochNumber;
+  for (int level = 0; level < num_levels_; ++level) {
+    for (const FileMetaData* f : files_[level]) {
+      max_epoch_number = std::max(max_epoch_number, f->epoch_number);
+    }
+  }
+  return max_epoch_number;
+}
+
+void VersionStorageInfo::RecoverEpochNumbers(ColumnFamilyData* cfd) {
+  cfd->ResetNextEpochNumber();
+
+  bool reserve_epoch_num_for_file_ingested_behind =
+      cfd->ioptions()->allow_ingest_behind;
+  if (reserve_epoch_num_for_file_ingested_behind) {
+    uint64_t reserved_epoch_number = cfd->NewEpochNumber();
+    assert(reserved_epoch_number == kReservedEpochNumberForFileIngestedBehind);
+    ROCKS_LOG_INFO(cfd->ioptions()->info_log.get(),
+                   "[%s]CF has reserved epoch number %" PRIu64
+                   " for files ingested "
+                   "behind since `Options::allow_ingest_behind` is true",
+                   cfd->GetName().c_str(), reserved_epoch_number);
+  }
+
+  if (HasMissingEpochNumber()) {
+    assert(epoch_number_requirement_ == EpochNumberRequirement::kMightMissing);
+    assert(num_levels_ >= 1);
+
+    for (int level = num_levels_ - 1; level >= 1; --level) {
+      auto& files_at_level = files_[level];
+      if (files_at_level.empty()) {
+        continue;
+      }
+      uint64_t next_epoch_number = cfd->NewEpochNumber();
+      for (FileMetaData* f : files_at_level) {
+        f->epoch_number = next_epoch_number;
+      }
+    }
+
+    for (auto file_meta_iter = files_[0].rbegin();
+         file_meta_iter != files_[0].rend(); file_meta_iter++) {
+      FileMetaData* f = *file_meta_iter;
+      f->epoch_number = cfd->NewEpochNumber();
+    }
+
+    ROCKS_LOG_WARN(cfd->ioptions()->info_log.get(),
+                   "[%s]CF's epoch numbers are inferred based on seqno",
+                   cfd->GetName().c_str());
+    epoch_number_requirement_ = EpochNumberRequirement::kMustPresent;
+  } else {
+    assert(epoch_number_requirement_ == EpochNumberRequirement::kMustPresent);
+    cfd->SetNextEpochNumber(
+        std::max(GetMaxEpochNumberOfFiles() + 1, cfd->GetNextEpochNumber()));
+  }
+}
+
+uint64_t VersionStorageInfo::MaxNextLevelOverlappingBytes() {
+  uint64_t result = 0;
+  std::vector<FileMetaData*> overlaps;
+  for (int level = 1; level < num_levels() - 1; level++) {
+    for (const auto& f : files_[level]) {
+      GetOverlappingInputs(level + 1, &f->smallest, &f->largest, &overlaps);
+      const uint64_t sum = TotalFileSize(overlaps);
+      if (sum > result) {
+        result = sum;
+      }
+    }
+  }
+  return result;
+}
+
+uint64_t VersionStorageInfo::MaxBytesForLevel(int level) const {
+  // Note: the result for level zero is not really used since we set
+  // the level-0 compaction threshold based on number of files.
+  assert(level >= 0);
+  assert(level < static_cast<int>(level_max_bytes_.size()));
+  return level_max_bytes_[level];
+}
+
+void VersionStorageInfo::CalculateBaseBytes(const ImmutableOptions& ioptions,
+                                            const MutableCFOptions& options) {
+  // Special logic to set number of sorted runs.
+  // It is to match the previous behavior when all files are in L0.
+  int num_l0_count = static_cast<int>(files_[0].size());
+  if (compaction_style_ == kCompactionStyleUniversal) {
+    // For universal compaction, we use level0 score to indicate
+    // compaction score for the whole DB. Adding other levels as if
+    // they are L0 files.
+    for (int i = 1; i < num_levels(); i++) {
+      if (!files_[i].empty()) {
+        num_l0_count++;
+      }
+    }
+  }
+  set_l0_delay_trigger_count(num_l0_count);
+
+  level_max_bytes_.resize(ioptions.num_levels);
+  if (!ioptions.level_compaction_dynamic_level_bytes) {
+    base_level_ = (ioptions.compaction_style == kCompactionStyleLevel) ? 1 : -1;
+
+    // Calculate for static bytes base case
+    for (int i = 0; i < ioptions.num_levels; ++i) {
+      if (i == 0 && ioptions.compaction_style == kCompactionStyleUniversal) {
+        level_max_bytes_[i] = options.max_bytes_for_level_base;
+      } else if (i > 1) {
+        level_max_bytes_[i] = MultiplyCheckOverflow(
+            MultiplyCheckOverflow(level_max_bytes_[i - 1],
+                                  options.max_bytes_for_level_multiplier),
+            options.MaxBytesMultiplerAdditional(i - 1));
+      } else {
+        level_max_bytes_[i] = options.max_bytes_for_level_base;
+      }
+    }
+  } else {
+    assert(ioptions.compaction_style == kCompactionStyleLevel);
+    uint64_t max_level_size = 0;
+
+    int first_non_empty_level = -1;
+    // Find size of non-L0 level of most data.
+    // Cannot use the size of the last level because it can be empty or less
+    // than previous levels after compaction.
+    for (int i = 1; i < num_levels_; i++) {
+      uint64_t total_size = 0;
+      for (const auto& f : files_[i]) {
+        total_size += f->fd.GetFileSize();
+      }
+      if (total_size > 0 && first_non_empty_level == -1) {
+        first_non_empty_level = i;
+      }
+      if (total_size > max_level_size) {
+        max_level_size = total_size;
+      }
+    }
+
+    // Prefill every level's max bytes to disallow compaction from there.
+    for (int i = 0; i < num_levels_; i++) {
+      level_max_bytes_[i] = std::numeric_limits<uint64_t>::max();
+    }
+
+    lowest_unnecessary_level_ = -1;
+    if (max_level_size == 0) {
+      // No data for L1 and up. L0 compacts to last level directly.
+      // No compaction from L1+ needs to be scheduled.
+      base_level_ = num_levels_ - 1;
+    } else {
+      assert(first_non_empty_level >= 1);
+      uint64_t base_bytes_max = options.max_bytes_for_level_base;
+      uint64_t base_bytes_min = static_cast<uint64_t>(
+          base_bytes_max / options.max_bytes_for_level_multiplier);
+
+      // Try whether we can make last level's target size to be max_level_size
+      uint64_t cur_level_size = max_level_size;
+      for (int i = num_levels_ - 2; i >= first_non_empty_level; i--) {
+        // Round up after dividing
+        cur_level_size = static_cast<uint64_t>(
+            cur_level_size / options.max_bytes_for_level_multiplier);
+        if (lowest_unnecessary_level_ == -1 &&
+            cur_level_size <= base_bytes_min &&
+            (ioptions.preclude_last_level_data_seconds == 0 ||
+             i < num_levels_ - 2)) {
+          // When per_key_placement is enabled, the penultimate level is
+          // necessary.
+          lowest_unnecessary_level_ = i;
+        }
+      }
+
+      // Calculate base level and its size.
+      uint64_t base_level_size;
+      if (cur_level_size <= base_bytes_min) {
+        // If per_key_placement is not enabled,
+        // either there is only one non-empty level after level 0,
+        // which can less than base_bytes_min AND necessary,
+        // or there is some unnecessary level.
+        assert(first_non_empty_level == num_levels_ - 1 ||
+               ioptions.preclude_last_level_data_seconds > 0 ||
+               lowest_unnecessary_level_ != -1);
+        // Case 1. If we make target size of last level to be max_level_size,
+        // target size of the first non-empty level would be smaller than
+        // base_bytes_min. We set it be base_bytes_min.
+        base_level_size = base_bytes_min + 1U;
+        base_level_ = first_non_empty_level;
+        if (base_level_ < num_levels_ - 1) {
+          ROCKS_LOG_INFO(
+              ioptions.logger,
+              "More existing levels in DB than needed: all non-zero "
+              "levels <= level %d are unnecessary.  "
+              "max_bytes_for_level_multiplier may not be guaranteed.",
+              lowest_unnecessary_level_);
+        }
+      } else {
+        assert(lowest_unnecessary_level_ == -1);
+        // Find base level (where L0 data is compacted to).
+        base_level_ = first_non_empty_level;
+        while (base_level_ > 1 && cur_level_size > base_bytes_max) {
+          --base_level_;
+          cur_level_size = static_cast<uint64_t>(
+              cur_level_size / options.max_bytes_for_level_multiplier);
+        }
+        if (cur_level_size > base_bytes_max) {
+          // Even L1 will be too large
+          assert(base_level_ == 1);
+          base_level_size = base_bytes_max;
+        } else {
+          base_level_size = cur_level_size;
+        }
+      }
+
+      level_multiplier_ = options.max_bytes_for_level_multiplier;
+      assert(base_level_size > 0);
+
+      uint64_t level_size = base_level_size;
+      for (int i = base_level_; i < num_levels_; i++) {
+        if (i > base_level_) {
+          level_size = MultiplyCheckOverflow(level_size, level_multiplier_);
+        }
+        // Don't set any level below base_bytes_max. Otherwise, the LSM can
+        // assume an hourglass shape where L1+ sizes are smaller than L0. This
+        // causes compaction scoring, which depends on level sizes, to favor L1+
+        // at the expense of L0, which may fill up and stall.
+        level_max_bytes_[i] = std::max(level_size, base_bytes_max);
+      }
+    }
+  }
+}
+
+uint64_t VersionStorageInfo::EstimateLiveDataSize() const {
+  // Estimate the live data size by adding up the size of a maximal set of
+  // sst files with no range overlap in same or higher level. The less
+  // compacted, the more optimistic (smaller) this estimate is. Also,
+  // for multiple sorted runs within a level, file order will matter.
+  uint64_t size = 0;
+
+  auto ikey_lt = [this](InternalKey* x, InternalKey* y) {
+    return internal_comparator_->Compare(*x, *y) < 0;
+  };
+  // (Ordered) map of largest keys in files being included in size estimate
+  std::map<InternalKey*, FileMetaData*, decltype(ikey_lt)> ranges(ikey_lt);
+
+  for (int l = num_levels_ - 1; l >= 0; l--) {
+    bool found_end = false;
+    for (auto file : files_[l]) {
+      // Find the first file already included with largest key is larger than
+      // the smallest key of `file`. If that file does not overlap with the
+      // current file, none of the files in the map does. If there is
+      // no potential overlap, we can safely insert the rest of this level
+      // (if the level is not 0) into the map without checking again because
+      // the elements in the level are sorted and non-overlapping.
+      auto lb = (found_end && l != 0) ? ranges.end()
+                                      : ranges.lower_bound(&file->smallest);
+      found_end = (lb == ranges.end());
+      if (found_end || internal_comparator_->Compare(
+                           file->largest, (*lb).second->smallest) < 0) {
+        ranges.emplace_hint(lb, &file->largest, file);
+        size += file->fd.file_size;
+      }
+    }
+  }
+
+  // For BlobDB, the result also includes the exact value of live bytes in the
+  // blob files of the version.
+  for (const auto& meta : blob_files_) {
+    assert(meta);
+
+    size += meta->GetTotalBlobBytes();
+    size -= meta->GetGarbageBlobBytes();
+  }
+
+  return size;
+}
+
+bool VersionStorageInfo::RangeMightExistAfterSortedRun(
+    const Slice& smallest_user_key, const Slice& largest_user_key,
+    int last_level, int last_l0_idx) {
+  assert((last_l0_idx != -1) == (last_level == 0));
+  // TODO(ajkr): this preserves earlier behavior where we considered an L0 file
+  // bottommost only if it's the oldest L0 file and there are no files on older
+  // levels. It'd be better to consider it bottommost if there's no overlap in
+  // older levels/files.
+  if (last_level == 0 &&
+      last_l0_idx != static_cast<int>(LevelFiles(0).size() - 1)) {
+    return true;
+  }
+
+  // Checks whether there are files living beyond the `last_level`. If lower
+  // levels have files, it checks for overlap between [`smallest_key`,
+  // `largest_key`] and those files. Bottomlevel optimizations can be made if
+  // there are no files in lower levels or if there is no overlap with the files
+  // in the lower levels.
+  for (int level = last_level + 1; level < num_levels(); level++) {
+    // The range is not in the bottommost level if there are files in lower
+    // levels when the `last_level` is 0 or if there are files in lower levels
+    // which overlap with [`smallest_key`, `largest_key`].
+    if (files_[level].size() > 0 &&
+        (last_level == 0 ||
+         OverlapInLevel(level, &smallest_user_key, &largest_user_key))) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void Version::AddLiveFiles(std::vector<uint64_t>* live_table_files,
+                           std::vector<uint64_t>* live_blob_files) const {
+  assert(live_table_files);
+  assert(live_blob_files);
+
+  for (int level = 0; level < storage_info_.num_levels(); ++level) {
+    const auto& level_files = storage_info_.LevelFiles(level);
+    for (const auto& meta : level_files) {
+      assert(meta);
+
+      live_table_files->emplace_back(meta->fd.GetNumber());
+    }
+  }
+
+  const auto& blob_files = storage_info_.GetBlobFiles();
+  for (const auto& meta : blob_files) {
+    assert(meta);
+
+    live_blob_files->emplace_back(meta->GetBlobFileNumber());
+  }
+}
+
+void Version::RemoveLiveFiles(
+    std::vector<ObsoleteFileInfo>& sst_delete_candidates,
+    std::vector<ObsoleteBlobFileInfo>& blob_delete_candidates) const {
+  for (ObsoleteFileInfo& fi : sst_delete_candidates) {
+    if (!fi.only_delete_metadata &&
+        storage_info()->GetFileLocation(fi.metadata->fd.GetNumber()) !=
+            VersionStorageInfo::FileLocation::Invalid()) {
+      fi.only_delete_metadata = true;
+    }
+  }
+
+  blob_delete_candidates.erase(
+      std::remove_if(
+          blob_delete_candidates.begin(), blob_delete_candidates.end(),
+          [this](ObsoleteBlobFileInfo& x) {
+            return storage_info()->GetBlobFileMetaData(x.GetBlobFileNumber());
+          }),
+      blob_delete_candidates.end());
+}
+
+std::string Version::DebugString(bool hex, bool print_stats) const {
+  std::string r;
+  for (int level = 0; level < storage_info_.num_levels_; level++) {
+    // E.g.,
+    //   --- level 1 ---
+    //   17:123[1 .. 124]['a' .. 'd']
+    //   20:43[124 .. 128]['e' .. 'g']
+    //
+    // if print_stats=true:
+    //   17:123[1 .. 124]['a' .. 'd'](4096)
+    r.append("--- level ");
+    AppendNumberTo(&r, level);
+    r.append(" --- version# ");
+    AppendNumberTo(&r, version_number_);
+    if (storage_info_.compact_cursor_[level].Valid()) {
+      r.append(" --- compact_cursor: ");
+      r.append(storage_info_.compact_cursor_[level].DebugString(hex));
+    }
+    r.append(" ---\n");
+    const std::vector<FileMetaData*>& files = storage_info_.files_[level];
+    for (size_t i = 0; i < files.size(); i++) {
+      r.push_back(' ');
+      AppendNumberTo(&r, files[i]->fd.GetNumber());
+      r.push_back(':');
+      AppendNumberTo(&r, files[i]->fd.GetFileSize());
+      r.append("[");
+      AppendNumberTo(&r, files[i]->fd.smallest_seqno);
+      r.append(" .. ");
+      AppendNumberTo(&r, files[i]->fd.largest_seqno);
+      r.append("]");
+      r.append("[");
+      r.append(files[i]->smallest.DebugString(hex));
+      r.append(" .. ");
+      r.append(files[i]->largest.DebugString(hex));
+      r.append("]");
+      if (files[i]->oldest_blob_file_number != kInvalidBlobFileNumber) {
+        r.append(" blob_file:");
+        AppendNumberTo(&r, files[i]->oldest_blob_file_number);
+      }
+      if (print_stats) {
+        r.append("(");
+        r.append(std::to_string(
+            files[i]->stats.num_reads_sampled.load(std::memory_order_relaxed)));
+        r.append(")");
+      }
+      r.append("\n");
+    }
+  }
+
+  const auto& blob_files = storage_info_.GetBlobFiles();
+  if (!blob_files.empty()) {
+    r.append("--- blob files --- version# ");
+    AppendNumberTo(&r, version_number_);
+    r.append(" ---\n");
+    for (const auto& blob_file_meta : blob_files) {
+      assert(blob_file_meta);
+
+      r.append(blob_file_meta->DebugString());
+      r.push_back('\n');
+    }
+  }
+
+  return r;
+}
+
+// this is used to batch writes to the manifest file
+struct VersionSet::ManifestWriter {
+  Status status;
+  bool done;
+  InstrumentedCondVar cv;
+  ColumnFamilyData* cfd;
+  const MutableCFOptions mutable_cf_options;
+  const autovector<VersionEdit*>& edit_list;
+  const std::function<void(const Status&)> manifest_write_callback;
+
+  explicit ManifestWriter(
+      InstrumentedMutex* mu, ColumnFamilyData* _cfd,
+      const MutableCFOptions& cf_options, const autovector<VersionEdit*>& e,
+      const std::function<void(const Status&)>& manifest_wcb)
+      : done(false),
+        cv(mu),
+        cfd(_cfd),
+        mutable_cf_options(cf_options),
+        edit_list(e),
+        manifest_write_callback(manifest_wcb) {}
+  ~ManifestWriter() { status.PermitUncheckedError(); }
+
+  bool IsAllWalEdits() const {
+    bool all_wal_edits = true;
+    for (const auto& e : edit_list) {
+      if (!e->IsWalManipulation()) {
+        all_wal_edits = false;
+        break;
+      }
+    }
+    return all_wal_edits;
+  }
+};
+
+Status AtomicGroupReadBuffer::AddEdit(VersionEdit* edit) {
+  assert(edit);
+  if (edit->is_in_atomic_group_) {
+    TEST_SYNC_POINT("AtomicGroupReadBuffer::AddEdit:AtomicGroup");
+    if (replay_buffer_.empty()) {
+      replay_buffer_.resize(edit->remaining_entries_ + 1);
+      TEST_SYNC_POINT_CALLBACK(
+          "AtomicGroupReadBuffer::AddEdit:FirstInAtomicGroup", edit);
+    }
+    read_edits_in_atomic_group_++;
+    if (read_edits_in_atomic_group_ + edit->remaining_entries_ !=
+        static_cast<uint32_t>(replay_buffer_.size())) {
+      TEST_SYNC_POINT_CALLBACK(
+          "AtomicGroupReadBuffer::AddEdit:IncorrectAtomicGroupSize", edit);
+      return Status::Corruption("corrupted atomic group");
+    }
+    replay_buffer_[read_edits_in_atomic_group_ - 1] = *edit;
+    if (read_edits_in_atomic_group_ == replay_buffer_.size()) {
+      TEST_SYNC_POINT_CALLBACK(
+          "AtomicGroupReadBuffer::AddEdit:LastInAtomicGroup", edit);
+      return Status::OK();
+    }
+    return Status::OK();
+  }
+
+  // A normal edit.
+  if (!replay_buffer().empty()) {
+    TEST_SYNC_POINT_CALLBACK(
+        "AtomicGroupReadBuffer::AddEdit:AtomicGroupMixedWithNormalEdits", edit);
+    return Status::Corruption("corrupted atomic group");
+  }
+  return Status::OK();
+}
+
+bool AtomicGroupReadBuffer::IsFull() const {
+  return read_edits_in_atomic_group_ == replay_buffer_.size();
+}
+
+bool AtomicGroupReadBuffer::IsEmpty() const { return replay_buffer_.empty(); }
+
+void AtomicGroupReadBuffer::Clear() {
+  read_edits_in_atomic_group_ = 0;
+  replay_buffer_.clear();
+}
+
+VersionSet::VersionSet(const std::string& dbname,
+                       const ImmutableDBOptions* _db_options,
+                       const FileOptions& storage_options, Cache* table_cache,
+                       WriteBufferManager* write_buffer_manager,
+                       WriteController* write_controller,
+                       BlockCacheTracer* const block_cache_tracer,
+                       const std::shared_ptr<IOTracer>& io_tracer,
+                       const std::string& db_id,
+                       const std::string& db_session_id)
+    : column_family_set_(new ColumnFamilySet(
+          dbname, _db_options, storage_options, table_cache,
+          write_buffer_manager, write_controller, block_cache_tracer, io_tracer,
+          db_id, db_session_id)),
+      table_cache_(table_cache),
+      env_(_db_options->env),
+      fs_(_db_options->fs, io_tracer),
+      clock_(_db_options->clock),
+      dbname_(dbname),
+      db_options_(_db_options),
+      next_file_number_(2),
+      manifest_file_number_(0),  // Filled by Recover()
+      options_file_number_(0),
+      options_file_size_(0),
+      pending_manifest_file_number_(0),
+      last_sequence_(0),
+      last_allocated_sequence_(0),
+      last_published_sequence_(0),
+      prev_log_number_(0),
+      current_version_number_(0),
+      manifest_file_size_(0),
+      file_options_(storage_options),
+      block_cache_tracer_(block_cache_tracer),
+      io_tracer_(io_tracer),
+      db_session_id_(db_session_id) {}
+
+VersionSet::~VersionSet() {
+  // we need to delete column_family_set_ because its destructor depends on
+  // VersionSet
+  column_family_set_.reset();
+  for (auto& file : obsolete_files_) {
+    if (file.metadata->table_reader_handle) {
+      table_cache_->Release(file.metadata->table_reader_handle);
+      TableCache::Evict(table_cache_, file.metadata->fd.GetNumber());
+    }
+    file.DeleteMetadata();
+  }
+  obsolete_files_.clear();
+  io_status_.PermitUncheckedError();
+}
+
+void VersionSet::Reset() {
+  if (column_family_set_) {
+    WriteBufferManager* wbm = column_family_set_->write_buffer_manager();
+    WriteController* wc = column_family_set_->write_controller();
+    // db_id becomes the source of truth after DBImpl::Recover():
+    // https://github.com/facebook/rocksdb/blob/v7.3.1/db/db_impl/db_impl_open.cc#L527
+    // Note: we may not be able to recover db_id from MANIFEST if
+    // options.write_dbid_to_manifest is false (default).
+    column_family_set_.reset(new ColumnFamilySet(
+        dbname_, db_options_, file_options_, table_cache_, wbm, wc,
+        block_cache_tracer_, io_tracer_, db_id_, db_session_id_));
+  }
+  db_id_.clear();
+  next_file_number_.store(2);
+  min_log_number_to_keep_.store(0);
+  manifest_file_number_ = 0;
+  options_file_number_ = 0;
+  pending_manifest_file_number_ = 0;
+  last_sequence_.store(0);
+  last_allocated_sequence_.store(0);
+  last_published_sequence_.store(0);
+  prev_log_number_ = 0;
+  descriptor_log_.reset();
+  current_version_number_ = 0;
+  manifest_writers_.clear();
+  manifest_file_size_ = 0;
+  obsolete_files_.clear();
+  obsolete_manifests_.clear();
+  wals_.Reset();
+}
+
+void VersionSet::AppendVersion(ColumnFamilyData* column_family_data,
+                               Version* v) {
+  // compute new compaction score
+  v->storage_info()->ComputeCompactionScore(
+      *column_family_data->ioptions(),
+      *column_family_data->GetLatestMutableCFOptions());
+
+  // Mark v finalized
+  v->storage_info_.SetFinalized();
+
+  // Make "v" current
+  assert(v->refs_ == 0);
+  Version* current = column_family_data->current();
+  assert(v != current);
+  if (current != nullptr) {
+    assert(current->refs_ > 0);
+    current->Unref();
+  }
+  column_family_data->SetCurrent(v);
+  v->Ref();
+
+  // Append to linked list
+  v->prev_ = column_family_data->dummy_versions()->prev_;
+  v->next_ = column_family_data->dummy_versions();
+  v->prev_->next_ = v;
+  v->next_->prev_ = v;
+}
+
+Status VersionSet::ProcessManifestWrites(
+    std::deque<ManifestWriter>& writers, InstrumentedMutex* mu,
+    FSDirectory* dir_contains_current_file, bool new_descriptor_log,
+    const ColumnFamilyOptions* new_cf_options,
+    const ReadOptions& read_options) {
+  mu->AssertHeld();
+  assert(!writers.empty());
+  ManifestWriter& first_writer = writers.front();
+  ManifestWriter* last_writer = &first_writer;
+
+  assert(!manifest_writers_.empty());
+  assert(manifest_writers_.front() == &first_writer);
+
+  autovector<VersionEdit*> batch_edits;
+  autovector<Version*> versions;
+  autovector<const MutableCFOptions*> mutable_cf_options_ptrs;
+  std::vector<std::unique_ptr<BaseReferencedVersionBuilder>> builder_guards;
+
+  // Tracking `max_last_sequence` is needed to ensure we write
+  // `VersionEdit::last_sequence_`s in non-decreasing order according to the
+  // recovery code's requirement. It also allows us to defer updating
+  // `descriptor_last_sequence_` until the apply phase, after the log phase
+  // succeeds.
+  SequenceNumber max_last_sequence = descriptor_last_sequence_;
+
+  if (first_writer.edit_list.front()->IsColumnFamilyManipulation()) {
+    // No group commits for column family add or drop
+    LogAndApplyCFHelper(first_writer.edit_list.front(), &max_last_sequence);
+    batch_edits.push_back(first_writer.edit_list.front());
+  } else {
+    auto it = manifest_writers_.cbegin();
+    size_t group_start = std::numeric_limits<size_t>::max();
+    while (it != manifest_writers_.cend()) {
+      if ((*it)->edit_list.front()->IsColumnFamilyManipulation()) {
+        // no group commits for column family add or drop
+        break;
+      }
+      last_writer = *(it++);
+      assert(last_writer != nullptr);
+      assert(last_writer->cfd != nullptr);
+      if (last_writer->cfd->IsDropped()) {
+        // If we detect a dropped CF at this point, and the corresponding
+        // version edits belong to an atomic group, then we need to find out
+        // the preceding version edits in the same atomic group, and update
+        // their `remaining_entries_` member variable because we are NOT going
+        // to write the version edits' of dropped CF to the MANIFEST. If we
+        // don't update, then Recover can report corrupted atomic group because
+        // the `remaining_entries_` do not match.
+        if (!batch_edits.empty()) {
+          if (batch_edits.back()->is_in_atomic_group_ &&
+              batch_edits.back()->remaining_entries_ > 0) {
+            assert(group_start < batch_edits.size());
+            const auto& edit_list = last_writer->edit_list;
+            size_t k = 0;
+            while (k < edit_list.size()) {
+              if (!edit_list[k]->is_in_atomic_group_) {
+                break;
+              } else if (edit_list[k]->remaining_entries_ == 0) {
+                ++k;
+                break;
+              }
+              ++k;
+            }
+            for (auto i = group_start; i < batch_edits.size(); ++i) {
+              assert(static_cast<uint32_t>(k) <=
+                     batch_edits.back()->remaining_entries_);
+              batch_edits[i]->remaining_entries_ -= static_cast<uint32_t>(k);
+            }
+          }
+        }
+        continue;
+      }
+      // We do a linear search on versions because versions is small.
+      // TODO(yanqin) maybe consider unordered_map
+      Version* version = nullptr;
+      VersionBuilder* builder = nullptr;
+      for (int i = 0; i != static_cast<int>(versions.size()); ++i) {
+        uint32_t cf_id = last_writer->cfd->GetID();
+        if (versions[i]->cfd()->GetID() == cf_id) {
+          version = versions[i];
+          assert(!builder_guards.empty() &&
+                 builder_guards.size() == versions.size());
+          builder = builder_guards[i]->version_builder();
+          TEST_SYNC_POINT_CALLBACK(
+              "VersionSet::ProcessManifestWrites:SameColumnFamily", &cf_id);
+          break;
+        }
+      }
+      if (version == nullptr) {
+        // WAL manipulations do not need to be applied to versions.
+        if (!last_writer->IsAllWalEdits()) {
+          version = new Version(last_writer->cfd, this, file_options_,
+                                last_writer->mutable_cf_options, io_tracer_,
+                                current_version_number_++);
+          versions.push_back(version);
+          mutable_cf_options_ptrs.push_back(&last_writer->mutable_cf_options);
+          builder_guards.emplace_back(
+              new BaseReferencedVersionBuilder(last_writer->cfd));
+          builder = builder_guards.back()->version_builder();
+        }
+        assert(last_writer->IsAllWalEdits() || builder);
+        assert(last_writer->IsAllWalEdits() || version);
+        TEST_SYNC_POINT_CALLBACK("VersionSet::ProcessManifestWrites:NewVersion",
+                                 version);
+      }
+      for (const auto& e : last_writer->edit_list) {
+        if (e->is_in_atomic_group_) {
+          if (batch_edits.empty() || !batch_edits.back()->is_in_atomic_group_ ||
+              (batch_edits.back()->is_in_atomic_group_ &&
+               batch_edits.back()->remaining_entries_ == 0)) {
+            group_start = batch_edits.size();
+          }
+        } else if (group_start != std::numeric_limits<size_t>::max()) {
+          group_start = std::numeric_limits<size_t>::max();
+        }
+        Status s = LogAndApplyHelper(last_writer->cfd, builder, e,
+                                     &max_last_sequence, mu);
+        if (!s.ok()) {
+          // free up the allocated memory
+          for (auto v : versions) {
+            delete v;
+          }
+          return s;
+        }
+        batch_edits.push_back(e);
+      }
+    }
+    for (int i = 0; i < static_cast<int>(versions.size()); ++i) {
+      assert(!builder_guards.empty() &&
+             builder_guards.size() == versions.size());
+      auto* builder = builder_guards[i]->version_builder();
+      Status s = builder->SaveTo(versions[i]->storage_info());
+      if (!s.ok()) {
+        // free up the allocated memory
+        for (auto v : versions) {
+          delete v;
+        }
+        return s;
+      }
+    }
+  }
+
+#ifndef NDEBUG
+  // Verify that version edits of atomic groups have correct
+  // remaining_entries_.
+  size_t k = 0;
+  while (k < batch_edits.size()) {
+    while (k < batch_edits.size() && !batch_edits[k]->is_in_atomic_group_) {
+      ++k;
+    }
+    if (k == batch_edits.size()) {
+      break;
+    }
+    size_t i = k;
+    while (i < batch_edits.size()) {
+      if (!batch_edits[i]->is_in_atomic_group_) {
+        break;
+      }
+      assert(i - k + batch_edits[i]->remaining_entries_ ==
+             batch_edits[k]->remaining_entries_);
+      if (batch_edits[i]->remaining_entries_ == 0) {
+        ++i;
+        break;
+      }
+      ++i;
+    }
+    assert(batch_edits[i - 1]->is_in_atomic_group_);
+    assert(0 == batch_edits[i - 1]->remaining_entries_);
+    std::vector<VersionEdit*> tmp;
+    for (size_t j = k; j != i; ++j) {
+      tmp.emplace_back(batch_edits[j]);
+    }
+    TEST_SYNC_POINT_CALLBACK(
+        "VersionSet::ProcessManifestWrites:CheckOneAtomicGroup", &tmp);
+    k = i;
+  }
+#endif  // NDEBUG
+
+  assert(pending_manifest_file_number_ == 0);
+  if (!descriptor_log_ ||
+      manifest_file_size_ > db_options_->max_manifest_file_size) {
+    TEST_SYNC_POINT("VersionSet::ProcessManifestWrites:BeforeNewManifest");
+    new_descriptor_log = true;
+  } else {
+    pending_manifest_file_number_ = manifest_file_number_;
+  }
+
+  // Local cached copy of state variable(s). WriteCurrentStateToManifest()
+  // reads its content after releasing db mutex to avoid race with
+  // SwitchMemtable().
+  std::unordered_map<uint32_t, MutableCFState> curr_state;
+  VersionEdit wal_additions;
+  if (new_descriptor_log) {
+    pending_manifest_file_number_ = NewFileNumber();
+    batch_edits.back()->SetNextFile(next_file_number_.load());
+
+    // if we are writing out new snapshot make sure to persist max column
+    // family.
+    if (column_family_set_->GetMaxColumnFamily() > 0) {
+      first_writer.edit_list.front()->SetMaxColumnFamily(
+          column_family_set_->GetMaxColumnFamily());
+    }
+    for (const auto* cfd : *column_family_set_) {
+      assert(curr_state.find(cfd->GetID()) == curr_state.end());
+      curr_state.emplace(std::make_pair(
+          cfd->GetID(),
+          MutableCFState(cfd->GetLogNumber(), cfd->GetFullHistoryTsLow())));
+    }
+
+    for (const auto& wal : wals_.GetWals()) {
+      wal_additions.AddWal(wal.first, wal.second);
+    }
+  }
+
+  uint64_t new_manifest_file_size = 0;
+  Status s;
+  IOStatus io_s;
+  IOStatus manifest_io_status;
+  {
+    FileOptions opt_file_opts = fs_->OptimizeForManifestWrite(file_options_);
+    mu->Unlock();
+    TEST_SYNC_POINT("VersionSet::LogAndApply:WriteManifestStart");
+    TEST_SYNC_POINT_CALLBACK("VersionSet::LogAndApply:WriteManifest", nullptr);
+    if (!first_writer.edit_list.front()->IsColumnFamilyManipulation()) {
+      for (int i = 0; i < static_cast<int>(versions.size()); ++i) {
+        assert(!builder_guards.empty() &&
+               builder_guards.size() == versions.size());
+        assert(!mutable_cf_options_ptrs.empty() &&
+               builder_guards.size() == versions.size());
+        ColumnFamilyData* cfd = versions[i]->cfd_;
+        s = builder_guards[i]->version_builder()->LoadTableHandlers(
+            cfd->internal_stats(), 1 /* max_threads */,
+            true /* prefetch_index_and_filter_in_cache */,
+            false /* is_initial_load */,
+            mutable_cf_options_ptrs[i]->prefix_extractor,
+            MaxFileSizeForL0MetaPin(*mutable_cf_options_ptrs[i]), read_options,
+            mutable_cf_options_ptrs[i]->block_protection_bytes_per_key);
+        if (!s.ok()) {
+          if (db_options_->paranoid_checks) {
+            break;
+          }
+          s = Status::OK();
+        }
+      }
+    }
+
+    if (s.ok() && new_descriptor_log) {
+      // This is fine because everything inside of this block is serialized --
+      // only one thread can be here at the same time
+      // create new manifest file
+      ROCKS_LOG_INFO(db_options_->info_log, "Creating manifest %" PRIu64 "\n",
+                     pending_manifest_file_number_);
+      std::string descriptor_fname =
+          DescriptorFileName(dbname_, pending_manifest_file_number_);
+      std::unique_ptr<FSWritableFile> descriptor_file;
+      io_s = NewWritableFile(fs_.get(), descriptor_fname, &descriptor_file,
+                             opt_file_opts);
+      if (io_s.ok()) {
+        descriptor_file->SetPreallocationBlockSize(
+            db_options_->manifest_preallocation_size);
+        FileTypeSet tmp_set = db_options_->checksum_handoff_file_types;
+        std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
+            std::move(descriptor_file), descriptor_fname, opt_file_opts, clock_,
+            io_tracer_, nullptr, db_options_->listeners, nullptr,
+            tmp_set.Contains(FileType::kDescriptorFile),
+            tmp_set.Contains(FileType::kDescriptorFile)));
+        descriptor_log_.reset(
+            new log::Writer(std::move(file_writer), 0, false));
+        s = WriteCurrentStateToManifest(curr_state, wal_additions,
+                                        descriptor_log_.get(), io_s);
+      } else {
+        manifest_io_status = io_s;
+        s = io_s;
+      }
+    }
+
+    if (s.ok()) {
+      if (!first_writer.edit_list.front()->IsColumnFamilyManipulation()) {
+        constexpr bool update_stats = true;
+
+        for (int i = 0; i < static_cast<int>(versions.size()); ++i) {
+          versions[i]->PrepareAppend(*mutable_cf_options_ptrs[i], read_options,
+                                     update_stats);
+        }
+      }
+
+      // Write new records to MANIFEST log
+#ifndef NDEBUG
+      size_t idx = 0;
+#endif
+      for (auto& e : batch_edits) {
+        std::string record;
+        if (!e->EncodeTo(&record)) {
+          s = Status::Corruption("Unable to encode VersionEdit:" +
+                                 e->DebugString(true));
+          break;
+        }
+        TEST_KILL_RANDOM_WITH_WEIGHT("VersionSet::LogAndApply:BeforeAddRecord",
+                                     REDUCE_ODDS2);
+#ifndef NDEBUG
+        if (batch_edits.size() > 1 && batch_edits.size() - 1 == idx) {
+          TEST_SYNC_POINT_CALLBACK(
+              "VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:0",
+              nullptr);
+          TEST_SYNC_POINT(
+              "VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:1");
+        }
+        ++idx;
+#endif /* !NDEBUG */
+        io_s = descriptor_log_->AddRecord(record);
+        if (!io_s.ok()) {
+          s = io_s;
+          manifest_io_status = io_s;
+          break;
+        }
+      }
+
+      if (s.ok()) {
+        io_s = SyncManifest(db_options_, descriptor_log_->file());
+        manifest_io_status = io_s;
+        TEST_SYNC_POINT_CALLBACK(
+            "VersionSet::ProcessManifestWrites:AfterSyncManifest", &io_s);
+      }
+      if (!io_s.ok()) {
+        s = io_s;
+        ROCKS_LOG_ERROR(db_options_->info_log, "MANIFEST write %s\n",
+                        s.ToString().c_str());
+      }
+    }
+
+    // If we just created a new descriptor file, install it by writing a
+    // new CURRENT file that points to it.
+    if (s.ok()) {
+      assert(manifest_io_status.ok());
+    }
+    if (s.ok() && new_descriptor_log) {
+      io_s = SetCurrentFile(fs_.get(), dbname_, pending_manifest_file_number_,
+                            dir_contains_current_file);
+      if (!io_s.ok()) {
+        s = io_s;
+      }
+    }
+
+    if (s.ok()) {
+      // find offset in manifest file where this version is stored.
+      new_manifest_file_size = descriptor_log_->file()->GetFileSize();
+    }
+
+    if (first_writer.edit_list.front()->is_column_family_drop_) {
+      TEST_SYNC_POINT("VersionSet::LogAndApply::ColumnFamilyDrop:0");
+      TEST_SYNC_POINT("VersionSet::LogAndApply::ColumnFamilyDrop:1");
+      TEST_SYNC_POINT("VersionSet::LogAndApply::ColumnFamilyDrop:2");
+    }
+
+    LogFlush(db_options_->info_log);
+    TEST_SYNC_POINT("VersionSet::LogAndApply:WriteManifestDone");
+    mu->Lock();
+  }
+
+  if (s.ok()) {
+    // Apply WAL edits, DB mutex must be held.
+    for (auto& e : batch_edits) {
+      if (e->IsWalAddition()) {
+        s = wals_.AddWals(e->GetWalAdditions());
+      } else if (e->IsWalDeletion()) {
+        s = wals_.DeleteWalsBefore(e->GetWalDeletion().GetLogNumber());
+      }
+      if (!s.ok()) {
+        break;
+      }
+    }
+  }
+
+  if (!io_s.ok()) {
+    if (io_status_.ok()) {
+      io_status_ = io_s;
+    }
+  } else if (!io_status_.ok()) {
+    io_status_ = io_s;
+  }
+
+  // Append the old manifest file to the obsolete_manifest_ list to be deleted
+  // by PurgeObsoleteFiles later.
+  if (s.ok() && new_descriptor_log) {
+    obsolete_manifests_.emplace_back(
+        DescriptorFileName("", manifest_file_number_));
+  }
+
+  // Install the new versions
+  if (s.ok()) {
+    if (first_writer.edit_list.front()->is_column_family_add_) {
+      assert(batch_edits.size() == 1);
+      assert(new_cf_options != nullptr);
+      assert(max_last_sequence == descriptor_last_sequence_);
+      CreateColumnFamily(*new_cf_options, read_options,
+                         first_writer.edit_list.front());
+    } else if (first_writer.edit_list.front()->is_column_family_drop_) {
+      assert(batch_edits.size() == 1);
+      assert(max_last_sequence == descriptor_last_sequence_);
+      first_writer.cfd->SetDropped();
+      first_writer.cfd->UnrefAndTryDelete();
+    } else {
+      // Each version in versions corresponds to a column family.
+      // For each column family, update its log number indicating that logs
+      // with number smaller than this should be ignored.
+      uint64_t last_min_log_number_to_keep = 0;
+      for (const auto& e : batch_edits) {
+        ColumnFamilyData* cfd = nullptr;
+        if (!e->IsColumnFamilyManipulation()) {
+          cfd = column_family_set_->GetColumnFamily(e->column_family_);
+          // e would not have been added to batch_edits if its corresponding
+          // column family is dropped.
+          assert(cfd);
+        }
+        if (cfd) {
+          if (e->has_log_number_ && e->log_number_ > cfd->GetLogNumber()) {
+            cfd->SetLogNumber(e->log_number_);
+          }
+          if (e->HasFullHistoryTsLow()) {
+            cfd->SetFullHistoryTsLow(e->GetFullHistoryTsLow());
+          }
+        }
+        if (e->has_min_log_number_to_keep_) {
+          last_min_log_number_to_keep =
+              std::max(last_min_log_number_to_keep, e->min_log_number_to_keep_);
+        }
+      }
+
+      if (last_min_log_number_to_keep != 0) {
+        MarkMinLogNumberToKeep(last_min_log_number_to_keep);
+      }
+
+      for (int i = 0; i < static_cast<int>(versions.size()); ++i) {
+        ColumnFamilyData* cfd = versions[i]->cfd_;
+        AppendVersion(cfd, versions[i]);
+      }
+    }
+    assert(max_last_sequence >= descriptor_last_sequence_);
+    descriptor_last_sequence_ = max_last_sequence;
+    manifest_file_number_ = pending_manifest_file_number_;
+    manifest_file_size_ = new_manifest_file_size;
+    prev_log_number_ = first_writer.edit_list.front()->prev_log_number_;
+  } else {
+    std::string version_edits;
+    for (auto& e : batch_edits) {
+      version_edits += ("\n" + e->DebugString(true));
+    }
+    ROCKS_LOG_ERROR(db_options_->info_log,
+                    "Error in committing version edit to MANIFEST: %s",
+                    version_edits.c_str());
+    for (auto v : versions) {
+      delete v;
+    }
+    if (manifest_io_status.ok()) {
+      manifest_file_number_ = pending_manifest_file_number_;
+      manifest_file_size_ = new_manifest_file_size;
+    }
+    // If manifest append failed for whatever reason, the file could be
+    // corrupted. So we need to force the next version update to start a
+    // new manifest file.
+    descriptor_log_.reset();
+    // If manifest operations failed, then we know the CURRENT file still
+    // points to the original MANIFEST. Therefore, we can safely delete the
+    // new MANIFEST.
+    // If manifest operations succeeded, and we are here, then it is possible
+    // that renaming tmp file to CURRENT failed.
+    //
+    // On local POSIX-compliant FS, the CURRENT must point to the original
+    // MANIFEST. We can delete the new MANIFEST for simplicity, but we can also
+    // keep it. Future recovery will ignore this MANIFEST. It's also ok for the
+    // process not to crash and continue using the db. Any future LogAndApply()
+    // call will switch to a new MANIFEST and update CURRENT, still ignoring
+    // this one.
+    //
+    // On non-local FS, it is
+    // possible that the rename operation succeeded on the server (remote)
+    // side, but the client somehow returns a non-ok status to RocksDB. Note
+    // that this does not violate atomicity. Should we delete the new MANIFEST
+    // successfully, a subsequent recovery attempt will likely see the CURRENT
+    // pointing to the new MANIFEST, thus fail. We will not be able to open the
+    // DB again. Therefore, if manifest operations succeed, we should keep the
+    // the new MANIFEST. If the process proceeds, any future LogAndApply() call
+    // will switch to a new MANIFEST and update CURRENT. If user tries to
+    // re-open the DB,
+    // a) CURRENT points to the new MANIFEST, and the new MANIFEST is present.
+    // b) CURRENT points to the original MANIFEST, and the original MANIFEST
+    //    also exists.
+    if (new_descriptor_log && !manifest_io_status.ok()) {
+      ROCKS_LOG_INFO(db_options_->info_log,
+                     "Deleting manifest %" PRIu64 " current manifest %" PRIu64
+                     "\n",
+                     pending_manifest_file_number_, manifest_file_number_);
+      Status manifest_del_status = env_->DeleteFile(
+          DescriptorFileName(dbname_, pending_manifest_file_number_));
+      if (!manifest_del_status.ok()) {
+        ROCKS_LOG_WARN(db_options_->info_log,
+                       "Failed to delete manifest %" PRIu64 ": %s",
+                       pending_manifest_file_number_,
+                       manifest_del_status.ToString().c_str());
+      }
+    }
+  }
+
+  pending_manifest_file_number_ = 0;
+
+#ifndef NDEBUG
+  // This is here kind of awkwardly because there's no other consistency
+  // checks on `VersionSet`'s updates for the new `Version`s. We might want
+  // to move it to a dedicated function, or remove it if we gain enough
+  // confidence in `descriptor_last_sequence_`.
+  if (s.ok()) {
+    for (const auto* v : versions) {
+      const auto* vstorage = v->storage_info();
+      for (int level = 0; level < vstorage->num_levels(); ++level) {
+        for (const auto& file : vstorage->LevelFiles(level)) {
+          assert(file->fd.largest_seqno <= descriptor_last_sequence_);
+        }
+      }
+    }
+  }
+#endif  // NDEBUG
+
+  // wake up all the waiting writers
+  while (true) {
+    ManifestWriter* ready = manifest_writers_.front();
+    manifest_writers_.pop_front();
+    bool need_signal = true;
+    for (const auto& w : writers) {
+      if (&w == ready) {
+        need_signal = false;
+        break;
+      }
+    }
+    ready->status = s;
+    ready->done = true;
+    if (ready->manifest_write_callback) {
+      (ready->manifest_write_callback)(s);
+    }
+    if (need_signal) {
+      ready->cv.Signal();
+    }
+    if (ready == last_writer) {
+      break;
+    }
+  }
+  if (!manifest_writers_.empty()) {
+    manifest_writers_.front()->cv.Signal();
+  }
+  return s;
+}
+
+void VersionSet::WakeUpWaitingManifestWriters() {
+  // wake up all the waiting writers
+  // Notify new head of manifest write queue.
+  if (!manifest_writers_.empty()) {
+    manifest_writers_.front()->cv.Signal();
+  }
+}
+
+// 'datas' is grammatically incorrect. We still use this notation to indicate
+// that this variable represents a collection of column_family_data.
+Status VersionSet::LogAndApply(
+    const autovector<ColumnFamilyData*>& column_family_datas,
+    const autovector<const MutableCFOptions*>& mutable_cf_options_list,
+    const ReadOptions& read_options,
+    const autovector<autovector<VersionEdit*>>& edit_lists,
+    InstrumentedMutex* mu, FSDirectory* dir_contains_current_file,
+    bool new_descriptor_log, const ColumnFamilyOptions* new_cf_options,
+    const std::vector<std::function<void(const Status&)>>& manifest_wcbs) {
+  mu->AssertHeld();
+  int num_edits = 0;
+  for (const auto& elist : edit_lists) {
+    num_edits += static_cast<int>(elist.size());
+  }
+  if (num_edits == 0) {
+    return Status::OK();
+  } else if (num_edits > 1) {
+#ifndef NDEBUG
+    for (const auto& edit_list : edit_lists) {
+      for (const auto& edit : edit_list) {
+        assert(!edit->IsColumnFamilyManipulation());
+      }
+    }
+#endif /* ! NDEBUG */
+  }
+
+  int num_cfds = static_cast<int>(column_family_datas.size());
+  if (num_cfds == 1 && column_family_datas[0] == nullptr) {
+    assert(edit_lists.size() == 1 && edit_lists[0].size() == 1);
+    assert(edit_lists[0][0]->is_column_family_add_);
+    assert(new_cf_options != nullptr);
+  }
+  std::deque<ManifestWriter> writers;
+  if (num_cfds > 0) {
+    assert(static_cast<size_t>(num_cfds) == mutable_cf_options_list.size());
+    assert(static_cast<size_t>(num_cfds) == edit_lists.size());
+  }
+  for (int i = 0; i < num_cfds; ++i) {
+    const auto wcb =
+        manifest_wcbs.empty() ? [](const Status&) {} : manifest_wcbs[i];
+    writers.emplace_back(mu, column_family_datas[i],
+                         *mutable_cf_options_list[i], edit_lists[i], wcb);
+    manifest_writers_.push_back(&writers[i]);
+  }
+  assert(!writers.empty());
+  ManifestWriter& first_writer = writers.front();
+  TEST_SYNC_POINT_CALLBACK("VersionSet::LogAndApply:BeforeWriterWaiting",
+                           nullptr);
+  while (!first_writer.done && &first_writer != manifest_writers_.front()) {
+    first_writer.cv.Wait();
+  }
+  if (first_writer.done) {
+    // All non-CF-manipulation operations can be grouped together and committed
+    // to MANIFEST. They should all have finished. The status code is stored in
+    // the first manifest writer.
+#ifndef NDEBUG
+    for (const auto& writer : writers) {
+      assert(writer.done);
+    }
+    TEST_SYNC_POINT_CALLBACK("VersionSet::LogAndApply:WakeUpAndDone", mu);
+#endif /* !NDEBUG */
+    return first_writer.status;
+  }
+
+  int num_undropped_cfds = 0;
+  for (auto cfd : column_family_datas) {
+    // if cfd == nullptr, it is a column family add.
+    if (cfd == nullptr || !cfd->IsDropped()) {
+      ++num_undropped_cfds;
+    }
+  }
+  if (0 == num_undropped_cfds) {
+    for (int i = 0; i != num_cfds; ++i) {
+      manifest_writers_.pop_front();
+    }
+    // Notify new head of manifest write queue.
+    if (!manifest_writers_.empty()) {
+      manifest_writers_.front()->cv.Signal();
+    }
+    return Status::ColumnFamilyDropped();
+  }
+  return ProcessManifestWrites(writers, mu, dir_contains_current_file,
+                               new_descriptor_log, new_cf_options,
+                               read_options);
+}
+
+void VersionSet::LogAndApplyCFHelper(VersionEdit* edit,
+                                     SequenceNumber* max_last_sequence) {
+  assert(max_last_sequence != nullptr);
+  assert(edit->IsColumnFamilyManipulation());
+  edit->SetNextFile(next_file_number_.load());
+  assert(!edit->HasLastSequence());
+  edit->SetLastSequence(*max_last_sequence);
+  if (edit->is_column_family_drop_) {
+    // if we drop column family, we have to make sure to save max column family,
+    // so that we don't reuse existing ID
+    edit->SetMaxColumnFamily(column_family_set_->GetMaxColumnFamily());
+  }
+}
+
+Status VersionSet::LogAndApplyHelper(ColumnFamilyData* cfd,
+                                     VersionBuilder* builder, VersionEdit* edit,
+                                     SequenceNumber* max_last_sequence,
+                                     InstrumentedMutex* mu) {
+#ifdef NDEBUG
+  (void)cfd;
+#endif
+  mu->AssertHeld();
+  assert(!edit->IsColumnFamilyManipulation());
+  assert(max_last_sequence != nullptr);
+
+  if (edit->has_log_number_) {
+    assert(edit->log_number_ >= cfd->GetLogNumber());
+    assert(edit->log_number_ < next_file_number_.load());
+  }
+
+  if (!edit->has_prev_log_number_) {
+    edit->SetPrevLogNumber(prev_log_number_);
+  }
+  edit->SetNextFile(next_file_number_.load());
+  if (edit->HasLastSequence() && edit->GetLastSequence() > *max_last_sequence) {
+    *max_last_sequence = edit->GetLastSequence();
+  } else {
+    edit->SetLastSequence(*max_last_sequence);
+  }
+
+  // The builder can be nullptr only if edit is WAL manipulation,
+  // because WAL edits do not need to be applied to versions,
+  // we return Status::OK() in this case.
+  assert(builder || edit->IsWalManipulation());
+  return builder ? builder->Apply(edit) : Status::OK();
+}
+
+Status VersionSet::GetCurrentManifestPath(const std::string& dbname,
+                                          FileSystem* fs,
+                                          std::string* manifest_path,
+                                          uint64_t* manifest_file_number) {
+  assert(fs != nullptr);
+  assert(manifest_path != nullptr);
+  assert(manifest_file_number != nullptr);
+
+  std::string fname;
+  Status s = ReadFileToString(fs, CurrentFileName(dbname), &fname);
+  if (!s.ok()) {
+    return s;
+  }
+  if (fname.empty() || fname.back() != '\n') {
+    return Status::Corruption("CURRENT file does not end with newline");
+  }
+  // remove the trailing '\n'
+  fname.resize(fname.size() - 1);
+  FileType type;
+  bool parse_ok = ParseFileName(fname, manifest_file_number, &type);
+  if (!parse_ok || type != kDescriptorFile) {
+    return Status::Corruption("CURRENT file corrupted");
+  }
+  *manifest_path = dbname;
+  if (dbname.back() != '/') {
+    manifest_path->push_back('/');
+  }
+  manifest_path->append(fname);
+  return Status::OK();
+}
+
+Status VersionSet::Recover(
+    const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
+    std::string* db_id, bool no_error_if_files_missing) {
+  const ReadOptions read_options(Env::IOActivity::kDBOpen);
+  // Read "CURRENT" file, which contains a pointer to the current manifest
+  // file
+  std::string manifest_path;
+  Status s = GetCurrentManifestPath(dbname_, fs_.get(), &manifest_path,
+                                    &manifest_file_number_);
+  if (!s.ok()) {
+    return s;
+  }
+
+  ROCKS_LOG_INFO(db_options_->info_log, "Recovering from manifest file: %s\n",
+                 manifest_path.c_str());
+
+  std::unique_ptr<SequentialFileReader> manifest_file_reader;
+  {
+    std::unique_ptr<FSSequentialFile> manifest_file;
+    s = fs_->NewSequentialFile(manifest_path,
+                               fs_->OptimizeForManifestRead(file_options_),
+                               &manifest_file, nullptr);
+    if (!s.ok()) {
+      return s;
+    }
+    manifest_file_reader.reset(new SequentialFileReader(
+        std::move(manifest_file), manifest_path,
+        db_options_->log_readahead_size, io_tracer_, db_options_->listeners));
+  }
+  uint64_t current_manifest_file_size = 0;
+  uint64_t log_number = 0;
+  {
+    VersionSet::LogReporter reporter;
+    Status log_read_status;
+    reporter.status = &log_read_status;
+    log::Reader reader(nullptr, std::move(manifest_file_reader), &reporter,
+                       true /* checksum */, 0 /* log_number */);
+    VersionEditHandler handler(
+        read_only, column_families, const_cast<VersionSet*>(this),
+        /*track_missing_files=*/false, no_error_if_files_missing, io_tracer_,
+        read_options, EpochNumberRequirement::kMightMissing);
+    handler.Iterate(reader, &log_read_status);
+    s = handler.status();
+    if (s.ok()) {
+      log_number = handler.GetVersionEditParams().log_number_;
+      current_manifest_file_size = reader.GetReadOffset();
+      assert(current_manifest_file_size != 0);
+      handler.GetDbId(db_id);
+    }
+    if (s.ok()) {
+      RecoverEpochNumbers();
+    }
+  }
+
+  if (s.ok()) {
+    manifest_file_size_ = current_manifest_file_size;
+    ROCKS_LOG_INFO(
+        db_options_->info_log,
+        "Recovered from manifest file:%s succeeded,"
+        "manifest_file_number is %" PRIu64 ", next_file_number is %" PRIu64
+        ", last_sequence is %" PRIu64 ", log_number is %" PRIu64
+        ",prev_log_number is %" PRIu64 ",max_column_family is %" PRIu32
+        ",min_log_number_to_keep is %" PRIu64 "\n",
+        manifest_path.c_str(), manifest_file_number_, next_file_number_.load(),
+        last_sequence_.load(), log_number, prev_log_number_,
+        column_family_set_->GetMaxColumnFamily(), min_log_number_to_keep());
+
+    for (auto cfd : *column_family_set_) {
+      if (cfd->IsDropped()) {
+        continue;
+      }
+      ROCKS_LOG_INFO(db_options_->info_log,
+                     "Column family [%s] (ID %" PRIu32
+                     "), log number is %" PRIu64 "\n",
+                     cfd->GetName().c_str(), cfd->GetID(), cfd->GetLogNumber());
+    }
+  }
+
+  return s;
+}
+
+namespace {
+class ManifestPicker {
+ public:
+  explicit ManifestPicker(const std::string& dbname,
+                          const std::vector<std::string>& files_in_dbname);
+  // REQUIRES Valid() == true
+  std::string GetNextManifest(uint64_t* file_number, std::string* file_name);
+  bool Valid() const { return manifest_file_iter_ != manifest_files_.end(); }
+
+ private:
+  const std::string& dbname_;
+  // MANIFEST file names(s)
+  std::vector<std::string> manifest_files_;
+  std::vector<std::string>::const_iterator manifest_file_iter_;
+};
+
+ManifestPicker::ManifestPicker(const std::string& dbname,
+                               const std::vector<std::string>& files_in_dbname)
+    : dbname_(dbname) {
+  // populate manifest files
+  assert(!files_in_dbname.empty());
+  for (const auto& fname : files_in_dbname) {
+    uint64_t file_num = 0;
+    FileType file_type;
+    bool parse_ok = ParseFileName(fname, &file_num, &file_type);
+    if (parse_ok && file_type == kDescriptorFile) {
+      manifest_files_.push_back(fname);
+    }
+  }
+  // seek to first manifest
+  std::sort(manifest_files_.begin(), manifest_files_.end(),
+            [](const std::string& lhs, const std::string& rhs) {
+              uint64_t num1 = 0;
+              uint64_t num2 = 0;
+              FileType type1;
+              FileType type2;
+              bool parse_ok1 = ParseFileName(lhs, &num1, &type1);
+              bool parse_ok2 = ParseFileName(rhs, &num2, &type2);
+#ifndef NDEBUG
+              assert(parse_ok1);
+              assert(parse_ok2);
+#else
+              (void)parse_ok1;
+              (void)parse_ok2;
+#endif
+              return num1 > num2;
+            });
+  manifest_file_iter_ = manifest_files_.begin();
+}
+
+std::string ManifestPicker::GetNextManifest(uint64_t* number,
+                                            std::string* file_name) {
+  assert(Valid());
+  std::string ret;
+  if (manifest_file_iter_ != manifest_files_.end()) {
+    ret.assign(dbname_);
+    if (ret.back() != kFilePathSeparator) {
+      ret.push_back(kFilePathSeparator);
+    }
+    ret.append(*manifest_file_iter_);
+    if (number) {
+      FileType type;
+      bool parse = ParseFileName(*manifest_file_iter_, number, &type);
+      assert(type == kDescriptorFile);
+#ifndef NDEBUG
+      assert(parse);
+#else
+      (void)parse;
+#endif
+    }
+    if (file_name) {
+      *file_name = *manifest_file_iter_;
+    }
+    ++manifest_file_iter_;
+  }
+  return ret;
+}
+}  // anonymous namespace
+
+Status VersionSet::TryRecover(
+    const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
+    const std::vector<std::string>& files_in_dbname, std::string* db_id,
+    bool* has_missing_table_file) {
+  ManifestPicker manifest_picker(dbname_, files_in_dbname);
+  if (!manifest_picker.Valid()) {
+    return Status::Corruption("Cannot locate MANIFEST file in " + dbname_);
+  }
+  Status s;
+  std::string manifest_path =
+      manifest_picker.GetNextManifest(&manifest_file_number_, nullptr);
+  while (!manifest_path.empty()) {
+    s = TryRecoverFromOneManifest(manifest_path, column_families, read_only,
+                                  db_id, has_missing_table_file);
+    if (s.ok() || !manifest_picker.Valid()) {
+      break;
+    }
+    Reset();
+    manifest_path =
+        manifest_picker.GetNextManifest(&manifest_file_number_, nullptr);
+  }
+  return s;
+}
+
+Status VersionSet::TryRecoverFromOneManifest(
+    const std::string& manifest_path,
+    const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
+    std::string* db_id, bool* has_missing_table_file) {
+  const ReadOptions read_options(Env::IOActivity::kDBOpen);
+  ROCKS_LOG_INFO(db_options_->info_log, "Trying to recover from manifest: %s\n",
+                 manifest_path.c_str());
+  std::unique_ptr<SequentialFileReader> manifest_file_reader;
+  Status s;
+  {
+    std::unique_ptr<FSSequentialFile> manifest_file;
+    s = fs_->NewSequentialFile(manifest_path,
+                               fs_->OptimizeForManifestRead(file_options_),
+                               &manifest_file, nullptr);
+    if (!s.ok()) {
+      return s;
+    }
+    manifest_file_reader.reset(new SequentialFileReader(
+        std::move(manifest_file), manifest_path,
+        db_options_->log_readahead_size, io_tracer_, db_options_->listeners));
+  }
+
+  assert(s.ok());
+  VersionSet::LogReporter reporter;
+  reporter.status = &s;
+  log::Reader reader(nullptr, std::move(manifest_file_reader), &reporter,
+                     /*checksum=*/true, /*log_num=*/0);
+  VersionEditHandlerPointInTime handler_pit(
+      read_only, column_families, const_cast<VersionSet*>(this), io_tracer_,
+      read_options, EpochNumberRequirement::kMightMissing);
+
+  handler_pit.Iterate(reader, &s);
+
+  handler_pit.GetDbId(db_id);
+
+  assert(nullptr != has_missing_table_file);
+  *has_missing_table_file = handler_pit.HasMissingFiles();
+
+  s = handler_pit.status();
+  if (s.ok()) {
+    RecoverEpochNumbers();
+  }
+  return s;
+}
+
+void VersionSet::RecoverEpochNumbers() {
+  for (auto cfd : *column_family_set_) {
+    if (cfd->IsDropped()) {
+      continue;
+    }
+    assert(cfd->initialized());
+    cfd->RecoverEpochNumbers();
+  }
+}
+
+Status VersionSet::ListColumnFamilies(std::vector<std::string>* column_families,
+                                      const std::string& dbname,
+                                      FileSystem* fs) {
+  // Read "CURRENT" file, which contains a pointer to the current manifest file
+  std::string manifest_path;
+  uint64_t manifest_file_number;
+  Status s =
+      GetCurrentManifestPath(dbname, fs, &manifest_path, &manifest_file_number);
+  if (!s.ok()) {
+    return s;
+  }
+  return ListColumnFamiliesFromManifest(manifest_path, fs, column_families);
+}
+
+Status VersionSet::ListColumnFamiliesFromManifest(
+    const std::string& manifest_path, FileSystem* fs,
+    std::vector<std::string>* column_families) {
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  std::unique_ptr<SequentialFileReader> file_reader;
+  Status s;
+  {
+    std::unique_ptr<FSSequentialFile> file;
+    // these are just for performance reasons, not correctness,
+    // so we're fine using the defaults
+    s = fs->NewSequentialFile(manifest_path, FileOptions(), &file, nullptr);
+    if (!s.ok()) {
+      return s;
+    }
+    file_reader = std::make_unique<SequentialFileReader>(
+        std::move(file), manifest_path, /*io_tracer=*/nullptr);
+  }
+
+  VersionSet::LogReporter reporter;
+  reporter.status = &s;
+  log::Reader reader(nullptr, std::move(file_reader), &reporter,
+                     true /* checksum */, 0 /* log_number */);
+
+  ListColumnFamiliesHandler handler(read_options);
+  handler.Iterate(reader, &s);
+
+  assert(column_families);
+  column_families->clear();
+  if (handler.status().ok()) {
+    for (const auto& iter : handler.GetColumnFamilyNames()) {
+      column_families->push_back(iter.second);
+    }
+  }
+
+  return handler.status();
+}
+
+Status VersionSet::ReduceNumberOfLevels(const std::string& dbname,
+                                        const Options* options,
+                                        const FileOptions& file_options,
+                                        int new_levels) {
+  if (new_levels <= 1) {
+    return Status::InvalidArgument(
+        "Number of levels needs to be bigger than 1");
+  }
+
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+
+  ImmutableDBOptions db_options(*options);
+  ColumnFamilyOptions cf_options(*options);
+  std::shared_ptr<Cache> tc(NewLRUCache(options->max_open_files - 10,
+                                        options->table_cache_numshardbits));
+  WriteController wc(options->delayed_write_rate);
+  WriteBufferManager wb(options->db_write_buffer_size);
+  VersionSet versions(dbname, &db_options, file_options, tc.get(), &wb, &wc,
+                      nullptr /*BlockCacheTracer*/, nullptr /*IOTracer*/,
+                      /*db_id*/ "",
+                      /*db_session_id*/ "");
+  Status status;
+
+  std::vector<ColumnFamilyDescriptor> dummy;
+  ColumnFamilyDescriptor dummy_descriptor(kDefaultColumnFamilyName,
+                                          ColumnFamilyOptions(*options));
+  dummy.push_back(dummy_descriptor);
+  status = versions.Recover(dummy);
+  if (!status.ok()) {
+    return status;
+  }
+
+  Version* current_version =
+      versions.GetColumnFamilySet()->GetDefault()->current();
+  auto* vstorage = current_version->storage_info();
+  int current_levels = vstorage->num_levels();
+
+  if (current_levels <= new_levels) {
+    return Status::OK();
+  }
+
+  // Make sure there are file only on one level from
+  // (new_levels-1) to (current_levels-1)
+  int first_nonempty_level = -1;
+  int first_nonempty_level_filenum = 0;
+  for (int i = new_levels - 1; i < current_levels; i++) {
+    int file_num = vstorage->NumLevelFiles(i);
+    if (file_num != 0) {
+      if (first_nonempty_level < 0) {
+        first_nonempty_level = i;
+        first_nonempty_level_filenum = file_num;
+      } else {
+        char msg[255];
+        snprintf(msg, sizeof(msg),
+                 "Found at least two levels containing files: "
+                 "[%d:%d],[%d:%d].\n",
+                 first_nonempty_level, first_nonempty_level_filenum, i,
+                 file_num);
+        return Status::InvalidArgument(msg);
+      }
+    }
+  }
+
+  // we need to allocate an array with the old number of levels size to
+  // avoid SIGSEGV in WriteCurrentStatetoManifest()
+  // however, all levels bigger or equal to new_levels will be empty
+  std::vector<FileMetaData*>* new_files_list =
+      new std::vector<FileMetaData*>[current_levels];
+  for (int i = 0; i < new_levels - 1; i++) {
+    new_files_list[i] = vstorage->LevelFiles(i);
+  }
+
+  if (first_nonempty_level > 0) {
+    auto& new_last_level = new_files_list[new_levels - 1];
+
+    new_last_level = vstorage->LevelFiles(first_nonempty_level);
+
+    for (size_t i = 0; i < new_last_level.size(); ++i) {
+      const FileMetaData* const meta = new_last_level[i];
+      assert(meta);
+
+      const uint64_t file_number = meta->fd.GetNumber();
+
+      vstorage->file_locations_[file_number] =
+          VersionStorageInfo::FileLocation(new_levels - 1, i);
+    }
+  }
+
+  delete[] vstorage->files_;
+  vstorage->files_ = new_files_list;
+  vstorage->num_levels_ = new_levels;
+  vstorage->ResizeCompactCursors(new_levels);
+
+  MutableCFOptions mutable_cf_options(*options);
+  VersionEdit ve;
+  InstrumentedMutex dummy_mutex;
+  InstrumentedMutexLock l(&dummy_mutex);
+  return versions.LogAndApply(versions.GetColumnFamilySet()->GetDefault(),
+                              mutable_cf_options, read_options, &ve,
+                              &dummy_mutex, nullptr, true);
+}
+
+// Get the checksum information including the checksum and checksum function
+// name of all SST and blob files in VersionSet. Store the information in
+// FileChecksumList which contains a map from file number to its checksum info.
+// If DB is not running, make sure call VersionSet::Recover() to load the file
+// metadata from Manifest to VersionSet before calling this function.
+Status VersionSet::GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) {
+  // Clean the previously stored checksum information if any.
+  Status s;
+  if (checksum_list == nullptr) {
+    s = Status::InvalidArgument("checksum_list is nullptr");
+    return s;
+  }
+  checksum_list->reset();
+
+  for (auto cfd : *column_family_set_) {
+    assert(cfd);
+
+    if (cfd->IsDropped() || !cfd->initialized()) {
+      continue;
+    }
+
+    const auto* current = cfd->current();
+    assert(current);
+
+    const auto* vstorage = current->storage_info();
+    assert(vstorage);
+
+    /* SST files */
+    for (int level = 0; level < cfd->NumberLevels(); level++) {
+      const auto& level_files = vstorage->LevelFiles(level);
+
+      for (const auto& file : level_files) {
+        assert(file);
+
+        s = checksum_list->InsertOneFileChecksum(file->fd.GetNumber(),
+                                                 file->file_checksum,
+                                                 file->file_checksum_func_name);
+        if (!s.ok()) {
+          return s;
+        }
+      }
+    }
+
+    /* Blob files */
+    const auto& blob_files = vstorage->GetBlobFiles();
+    for (const auto& meta : blob_files) {
+      assert(meta);
+
+      std::string checksum_value = meta->GetChecksumValue();
+      std::string checksum_method = meta->GetChecksumMethod();
+      assert(checksum_value.empty() == checksum_method.empty());
+      if (meta->GetChecksumMethod().empty()) {
+        checksum_value = kUnknownFileChecksum;
+        checksum_method = kUnknownFileChecksumFuncName;
+      }
+
+      s = checksum_list->InsertOneFileChecksum(meta->GetBlobFileNumber(),
+                                               checksum_value, checksum_method);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  }
+
+  return s;
+}
+
+Status VersionSet::DumpManifest(Options& options, std::string& dscname,
+                                bool verbose, bool hex, bool json) {
+  assert(options.env);
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+
+  std::vector<std::string> column_families;
+  Status s = ListColumnFamiliesFromManifest(
+      dscname, options.env->GetFileSystem().get(), &column_families);
+  if (!s.ok()) {
+    return s;
+  }
+
+  // Open the specified manifest file.
+  std::unique_ptr<SequentialFileReader> file_reader;
+  {
+    std::unique_ptr<FSSequentialFile> file;
+    const std::shared_ptr<FileSystem>& fs = options.env->GetFileSystem();
+    s = fs->NewSequentialFile(
+        dscname, fs->OptimizeForManifestRead(file_options_), &file, nullptr);
+    if (!s.ok()) {
+      return s;
+    }
+    file_reader = std::make_unique<SequentialFileReader>(
+        std::move(file), dscname, db_options_->log_readahead_size, io_tracer_);
+  }
+
+  std::vector<ColumnFamilyDescriptor> cf_descs;
+  for (const auto& cf : column_families) {
+    cf_descs.emplace_back(cf, options);
+  }
+
+  DumpManifestHandler handler(cf_descs, this, io_tracer_, read_options, verbose,
+                              hex, json);
+  {
+    VersionSet::LogReporter reporter;
+    reporter.status = &s;
+    log::Reader reader(nullptr, std::move(file_reader), &reporter,
+                       true /* checksum */, 0 /* log_number */);
+    handler.Iterate(reader, &s);
+  }
+
+  return handler.status();
+}
+
+void VersionSet::MarkFileNumberUsed(uint64_t number) {
+  // only called during recovery and repair which are single threaded, so this
+  // works because there can't be concurrent calls
+  if (next_file_number_.load(std::memory_order_relaxed) <= number) {
+    next_file_number_.store(number + 1, std::memory_order_relaxed);
+  }
+}
+// Called only either from ::LogAndApply which is protected by mutex or during
+// recovery which is single-threaded.
+void VersionSet::MarkMinLogNumberToKeep(uint64_t number) {
+  if (min_log_number_to_keep_.load(std::memory_order_relaxed) < number) {
+    min_log_number_to_keep_.store(number, std::memory_order_relaxed);
+  }
+}
+
+Status VersionSet::WriteCurrentStateToManifest(
+    const std::unordered_map<uint32_t, MutableCFState>& curr_state,
+    const VersionEdit& wal_additions, log::Writer* log, IOStatus& io_s) {
+  // TODO: Break up into multiple records to reduce memory usage on recovery?
+
+  // WARNING: This method doesn't hold a mutex!!
+
+  // This is done without DB mutex lock held, but only within single-threaded
+  // LogAndApply. Column family manipulations can only happen within LogAndApply
+  // (the same single thread), so we're safe to iterate.
+
+  assert(io_s.ok());
+  if (db_options_->write_dbid_to_manifest) {
+    VersionEdit edit_for_db_id;
+    assert(!db_id_.empty());
+    edit_for_db_id.SetDBId(db_id_);
+    std::string db_id_record;
+    if (!edit_for_db_id.EncodeTo(&db_id_record)) {
+      return Status::Corruption("Unable to Encode VersionEdit:" +
+                                edit_for_db_id.DebugString(true));
+    }
+    io_s = log->AddRecord(db_id_record);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+  }
+
+  // Save WALs.
+  if (!wal_additions.GetWalAdditions().empty()) {
+    TEST_SYNC_POINT_CALLBACK("VersionSet::WriteCurrentStateToManifest:SaveWal",
+                             const_cast<VersionEdit*>(&wal_additions));
+    std::string record;
+    if (!wal_additions.EncodeTo(&record)) {
+      return Status::Corruption("Unable to Encode VersionEdit: " +
+                                wal_additions.DebugString(true));
+    }
+    io_s = log->AddRecord(record);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+  }
+
+  // New manifest should rollover the WAL deletion record from previous
+  // manifest. Otherwise, when an addition record of a deleted WAL gets added to
+  // this new manifest later (which can happens in e.g, SyncWAL()), this new
+  // manifest creates an illusion that such WAL hasn't been deleted.
+  VersionEdit wal_deletions;
+  wal_deletions.DeleteWalsBefore(min_log_number_to_keep());
+  std::string wal_deletions_record;
+  if (!wal_deletions.EncodeTo(&wal_deletions_record)) {
+    return Status::Corruption("Unable to Encode VersionEdit: " +
+                              wal_deletions.DebugString(true));
+  }
+  io_s = log->AddRecord(wal_deletions_record);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+
+  for (auto cfd : *column_family_set_) {
+    assert(cfd);
+
+    if (cfd->IsDropped()) {
+      continue;
+    }
+    assert(cfd->initialized());
+    {
+      // Store column family info
+      VersionEdit edit;
+      if (cfd->GetID() != 0) {
+        // default column family is always there,
+        // no need to explicitly write it
+        edit.AddColumnFamily(cfd->GetName());
+        edit.SetColumnFamily(cfd->GetID());
+      }
+      edit.SetComparatorName(
+          cfd->internal_comparator().user_comparator()->Name());
+      std::string record;
+      if (!edit.EncodeTo(&record)) {
+        return Status::Corruption("Unable to Encode VersionEdit:" +
+                                  edit.DebugString(true));
+      }
+      io_s = log->AddRecord(record);
+      if (!io_s.ok()) {
+        return io_s;
+      }
+    }
+
+    {
+      // Save files
+      VersionEdit edit;
+      edit.SetColumnFamily(cfd->GetID());
+
+      const auto* current = cfd->current();
+      assert(current);
+
+      const auto* vstorage = current->storage_info();
+      assert(vstorage);
+
+      for (int level = 0; level < cfd->NumberLevels(); level++) {
+        const auto& level_files = vstorage->LevelFiles(level);
+
+        for (const auto& f : level_files) {
+          assert(f);
+
+          edit.AddFile(level, f->fd.GetNumber(), f->fd.GetPathId(),
+                       f->fd.GetFileSize(), f->smallest, f->largest,
+                       f->fd.smallest_seqno, f->fd.largest_seqno,
+                       f->marked_for_compaction, f->temperature,
+                       f->oldest_blob_file_number, f->oldest_ancester_time,
+                       f->file_creation_time, f->epoch_number, f->file_checksum,
+                       f->file_checksum_func_name, f->unique_id,
+                       f->compensated_range_deletion_size, f->tail_size);
+        }
+      }
+
+      edit.SetCompactCursors(vstorage->GetCompactCursors());
+
+      const auto& blob_files = vstorage->GetBlobFiles();
+      for (const auto& meta : blob_files) {
+        assert(meta);
+
+        const uint64_t blob_file_number = meta->GetBlobFileNumber();
+
+        edit.AddBlobFile(blob_file_number, meta->GetTotalBlobCount(),
+                         meta->GetTotalBlobBytes(), meta->GetChecksumMethod(),
+                         meta->GetChecksumValue());
+        if (meta->GetGarbageBlobCount() > 0) {
+          edit.AddBlobFileGarbage(blob_file_number, meta->GetGarbageBlobCount(),
+                                  meta->GetGarbageBlobBytes());
+        }
+      }
+
+      const auto iter = curr_state.find(cfd->GetID());
+      assert(iter != curr_state.end());
+      uint64_t log_number = iter->second.log_number;
+      edit.SetLogNumber(log_number);
+
+      if (cfd->GetID() == 0) {
+        // min_log_number_to_keep is for the whole db, not for specific column
+        // family. So it does not need to be set for every column family, just
+        // need to be set once. Since default CF can never be dropped, we set
+        // the min_log to the default CF here.
+        uint64_t min_log = min_log_number_to_keep();
+        if (min_log != 0) {
+          edit.SetMinLogNumberToKeep(min_log);
+        }
+      }
+
+      const std::string& full_history_ts_low = iter->second.full_history_ts_low;
+      if (!full_history_ts_low.empty()) {
+        edit.SetFullHistoryTsLow(full_history_ts_low);
+      }
+
+      edit.SetLastSequence(descriptor_last_sequence_);
+
+      std::string record;
+      if (!edit.EncodeTo(&record)) {
+        return Status::Corruption("Unable to Encode VersionEdit:" +
+                                  edit.DebugString(true));
+      }
+      io_s = log->AddRecord(record);
+      if (!io_s.ok()) {
+        return io_s;
+      }
+    }
+  }
+  return Status::OK();
+}
+
+// TODO(aekmekji): in CompactionJob::GenSubcompactionBoundaries(), this
+// function is called repeatedly with consecutive pairs of slices. For example
+// if the slice list is [a, b, c, d] this function is called with arguments
+// (a,b) then (b,c) then (c,d). Knowing this, an optimization is possible where
+// we avoid doing binary search for the keys b and c twice and instead somehow
+// maintain state of where they first appear in the files.
+uint64_t VersionSet::ApproximateSize(const SizeApproximationOptions& options,
+                                     const ReadOptions& read_options,
+                                     Version* v, const Slice& start,
+                                     const Slice& end, int start_level,
+                                     int end_level, TableReaderCaller caller) {
+  const auto& icmp = v->cfd_->internal_comparator();
+
+  // pre-condition
+  assert(icmp.Compare(start, end) <= 0);
+
+  uint64_t total_full_size = 0;
+  const auto* vstorage = v->storage_info();
+  const int num_non_empty_levels = vstorage->num_non_empty_levels();
+  end_level = (end_level == -1) ? num_non_empty_levels
+                                : std::min(end_level, num_non_empty_levels);
+  if (end_level <= start_level) {
+    return 0;
+  }
+
+  // Outline of the optimization that uses options.files_size_error_margin.
+  // When approximating the files total size that is used to store a keys range,
+  // we first sum up the sizes of the files that fully fall into the range.
+  // Then we sum up the sizes of all the files that may intersect with the range
+  // (this includes all files in L0 as well). Then, if total_intersecting_size
+  // is smaller than total_full_size * options.files_size_error_margin - we can
+  // infer that the intersecting files have a sufficiently negligible
+  // contribution to the total size, and we can approximate the storage required
+  // for the keys in range as just half of the intersecting_files_size.
+  // E.g., if the value of files_size_error_margin is 0.1, then the error of the
+  // approximation is limited to only ~10% of the total size of files that fully
+  // fall into the keys range. In such case, this helps to avoid a costly
+  // process of binary searching the intersecting files that is required only
+  // for a more precise calculation of the total size.
+
+  autovector<FdWithKeyRange*, 32> first_files;
+  autovector<FdWithKeyRange*, 16> last_files;
+
+  // scan all the levels
+  for (int level = start_level; level < end_level; ++level) {
+    const LevelFilesBrief& files_brief = vstorage->LevelFilesBrief(level);
+    if (files_brief.num_files == 0) {
+      // empty level, skip exploration
+      continue;
+    }
+
+    if (level == 0) {
+      // level 0 files are not in sorted order, we need to iterate through
+      // the list to compute the total bytes that require scanning,
+      // so handle the case explicitly (similarly to first_files case)
+      for (size_t i = 0; i < files_brief.num_files; i++) {
+        first_files.push_back(&files_brief.files[i]);
+      }
+      continue;
+    }
+
+    assert(level > 0);
+    assert(files_brief.num_files > 0);
+
+    // identify the file position for start key
+    const int idx_start =
+        FindFileInRange(icmp, files_brief, start, 0,
+                        static_cast<uint32_t>(files_brief.num_files - 1));
+    assert(static_cast<size_t>(idx_start) < files_brief.num_files);
+
+    // identify the file position for end key
+    int idx_end = idx_start;
+    if (icmp.Compare(files_brief.files[idx_end].largest_key, end) < 0) {
+      idx_end =
+          FindFileInRange(icmp, files_brief, end, idx_start,
+                          static_cast<uint32_t>(files_brief.num_files - 1));
+    }
+    assert(idx_end >= idx_start &&
+           static_cast<size_t>(idx_end) < files_brief.num_files);
+
+    // scan all files from the starting index to the ending index
+    // (inferred from the sorted order)
+
+    // first scan all the intermediate full files (excluding first and last)
+    for (int i = idx_start + 1; i < idx_end; ++i) {
+      uint64_t file_size = files_brief.files[i].fd.GetFileSize();
+      // The entire file falls into the range, so we can just take its size.
+      assert(file_size == ApproximateSize(read_options, v, files_brief.files[i],
+                                          start, end, caller));
+      total_full_size += file_size;
+    }
+
+    // save the first and the last files (which may be the same file), so we
+    // can scan them later.
+    first_files.push_back(&files_brief.files[idx_start]);
+    if (idx_start != idx_end) {
+      // we need to estimate size for both files, only if they are different
+      last_files.push_back(&files_brief.files[idx_end]);
+    }
+  }
+
+  // The sum of all file sizes that intersect the [start, end] keys range.
+  uint64_t total_intersecting_size = 0;
+  for (const auto* file_ptr : first_files) {
+    total_intersecting_size += file_ptr->fd.GetFileSize();
+  }
+  for (const auto* file_ptr : last_files) {
+    total_intersecting_size += file_ptr->fd.GetFileSize();
+  }
+
+  // Now scan all the first & last files at each level, and estimate their size.
+  // If the total_intersecting_size is less than X% of the total_full_size - we
+  // want to approximate the result in order to avoid the costly binary search
+  // inside ApproximateSize. We use half of file size as an approximation below.
+
+  const double margin = options.files_size_error_margin;
+  if (margin > 0 && total_intersecting_size <
+                        static_cast<uint64_t>(total_full_size * margin)) {
+    total_full_size += total_intersecting_size / 2;
+  } else {
+    // Estimate for all the first files (might also be last files), at each
+    // level
+    for (const auto file_ptr : first_files) {
+      total_full_size +=
+          ApproximateSize(read_options, v, *file_ptr, start, end, caller);
+    }
+
+    // Estimate for all the last files, at each level
+    for (const auto file_ptr : last_files) {
+      // We could use ApproximateSize here, but calling ApproximateOffsetOf
+      // directly is just more efficient.
+      total_full_size +=
+          ApproximateOffsetOf(read_options, v, *file_ptr, end, caller);
+    }
+  }
+
+  return total_full_size;
+}
+
+uint64_t VersionSet::ApproximateOffsetOf(const ReadOptions& read_options,
+                                         Version* v, const FdWithKeyRange& f,
+                                         const Slice& key,
+                                         TableReaderCaller caller) {
+  // pre-condition
+  assert(v);
+  const auto& icmp = v->cfd_->internal_comparator();
+
+  uint64_t result = 0;
+  if (icmp.Compare(f.largest_key, key) <= 0) {
+    // Entire file is before "key", so just add the file size
+    result = f.fd.GetFileSize();
+  } else if (icmp.Compare(f.smallest_key, key) > 0) {
+    // Entire file is after "key", so ignore
+    result = 0;
+  } else {
+    // "key" falls in the range for this table.  Add the
+    // approximate offset of "key" within the table.
+    TableCache* table_cache = v->cfd_->table_cache();
+    const MutableCFOptions& cf_opts = v->GetMutableCFOptions();
+    if (table_cache != nullptr) {
+      result = table_cache->ApproximateOffsetOf(
+          read_options, key, *f.file_metadata, caller, icmp,
+          cf_opts.block_protection_bytes_per_key, cf_opts.prefix_extractor);
+    }
+  }
+  return result;
+}
+
+uint64_t VersionSet::ApproximateSize(const ReadOptions& read_options,
+                                     Version* v, const FdWithKeyRange& f,
+                                     const Slice& start, const Slice& end,
+                                     TableReaderCaller caller) {
+  // pre-condition
+  assert(v);
+  const auto& icmp = v->cfd_->internal_comparator();
+  assert(icmp.Compare(start, end) <= 0);
+
+  if (icmp.Compare(f.largest_key, start) <= 0 ||
+      icmp.Compare(f.smallest_key, end) > 0) {
+    // Entire file is before or after the start/end keys range
+    return 0;
+  }
+
+  if (icmp.Compare(f.smallest_key, start) >= 0) {
+    // Start of the range is before the file start - approximate by end offset
+    return ApproximateOffsetOf(read_options, v, f, end, caller);
+  }
+
+  if (icmp.Compare(f.largest_key, end) < 0) {
+    // End of the range is after the file end - approximate by subtracting
+    // start offset from the file size
+    uint64_t start_offset =
+        ApproximateOffsetOf(read_options, v, f, start, caller);
+    assert(f.fd.GetFileSize() >= start_offset);
+    return f.fd.GetFileSize() - start_offset;
+  }
+
+  // The interval falls entirely in the range for this file.
+  TableCache* table_cache = v->cfd_->table_cache();
+  if (table_cache == nullptr) {
+    return 0;
+  }
+  const MutableCFOptions& cf_opts = v->GetMutableCFOptions();
+  return table_cache->ApproximateSize(
+      read_options, start, end, *f.file_metadata, caller, icmp,
+      cf_opts.block_protection_bytes_per_key, cf_opts.prefix_extractor);
+}
+
+void VersionSet::RemoveLiveFiles(
+    std::vector<ObsoleteFileInfo>& sst_delete_candidates,
+    std::vector<ObsoleteBlobFileInfo>& blob_delete_candidates) const {
+  assert(column_family_set_);
+  for (auto cfd : *column_family_set_) {
+    assert(cfd);
+    if (!cfd->initialized()) {
+      continue;
+    }
+
+    auto* current = cfd->current();
+    bool found_current = false;
+
+    Version* const dummy_versions = cfd->dummy_versions();
+    assert(dummy_versions);
+
+    for (Version* v = dummy_versions->next_; v != dummy_versions;
+         v = v->next_) {
+      v->RemoveLiveFiles(sst_delete_candidates, blob_delete_candidates);
+      if (v == current) {
+        found_current = true;
+      }
+    }
+
+    if (!found_current && current != nullptr) {
+      // Should never happen unless it is a bug.
+      assert(false);
+      current->RemoveLiveFiles(sst_delete_candidates, blob_delete_candidates);
+    }
+  }
+}
+
+void VersionSet::AddLiveFiles(std::vector<uint64_t>* live_table_files,
+                              std::vector<uint64_t>* live_blob_files) const {
+  assert(live_table_files);
+  assert(live_blob_files);
+
+  // pre-calculate space requirement
+  size_t total_table_files = 0;
+  size_t total_blob_files = 0;
+
+  assert(column_family_set_);
+  for (auto cfd : *column_family_set_) {
+    assert(cfd);
+
+    if (!cfd->initialized()) {
+      continue;
+    }
+
+    Version* const dummy_versions = cfd->dummy_versions();
+    assert(dummy_versions);
+
+    for (Version* v = dummy_versions->next_; v != dummy_versions;
+         v = v->next_) {
+      assert(v);
+
+      const auto* vstorage = v->storage_info();
+      assert(vstorage);
+
+      for (int level = 0; level < vstorage->num_levels(); ++level) {
+        total_table_files += vstorage->LevelFiles(level).size();
+      }
+
+      total_blob_files += vstorage->GetBlobFiles().size();
+    }
+  }
+
+  // just one time extension to the right size
+  live_table_files->reserve(live_table_files->size() + total_table_files);
+  live_blob_files->reserve(live_blob_files->size() + total_blob_files);
+
+  assert(column_family_set_);
+  for (auto cfd : *column_family_set_) {
+    assert(cfd);
+    if (!cfd->initialized()) {
+      continue;
+    }
+
+    auto* current = cfd->current();
+    bool found_current = false;
+
+    Version* const dummy_versions = cfd->dummy_versions();
+    assert(dummy_versions);
+
+    for (Version* v = dummy_versions->next_; v != dummy_versions;
+         v = v->next_) {
+      v->AddLiveFiles(live_table_files, live_blob_files);
+      if (v == current) {
+        found_current = true;
+      }
+    }
+
+    if (!found_current && current != nullptr) {
+      // Should never happen unless it is a bug.
+      assert(false);
+      current->AddLiveFiles(live_table_files, live_blob_files);
+    }
+  }
+}
+
+InternalIterator* VersionSet::MakeInputIterator(
+    const ReadOptions& read_options, const Compaction* c,
+    RangeDelAggregator* range_del_agg,
+    const FileOptions& file_options_compactions,
+    const std::optional<const Slice>& start,
+    const std::optional<const Slice>& end) {
+  auto cfd = c->column_family_data();
+  // Level-0 files have to be merged together.  For other levels,
+  // we will make a concatenating iterator per level.
+  // TODO(opt): use concatenating iterator for level-0 if there is no overlap
+  const size_t space = (c->level() == 0 ? c->input_levels(0)->num_files +
+                                              c->num_input_levels() - 1
+                                        : c->num_input_levels());
+  InternalIterator** list = new InternalIterator*[space];
+  // First item in the pair is a pointer to range tombstones.
+  // Second item is a pointer to a member of a LevelIterator,
+  // that will be initialized to where CompactionMergingIterator stores
+  // pointer to its range tombstones. This is used by LevelIterator
+  // to update pointer to range tombstones as it traverse different SST files.
+  std::vector<
+      std::pair<TruncatedRangeDelIterator*, TruncatedRangeDelIterator***>>
+      range_tombstones;
+  size_t num = 0;
+  for (size_t which = 0; which < c->num_input_levels(); which++) {
+    if (c->input_levels(which)->num_files != 0) {
+      if (c->level(which) == 0) {
+        const LevelFilesBrief* flevel = c->input_levels(which);
+        for (size_t i = 0; i < flevel->num_files; i++) {
+          const FileMetaData& fmd = *flevel->files[i].file_metadata;
+          if (start.has_value() &&
+              cfd->user_comparator()->CompareWithoutTimestamp(
+                  start.value(), fmd.largest.user_key()) > 0) {
+            continue;
+          }
+          // We should be able to filter out the case where the end key
+          // equals to the end boundary, since the end key is exclusive.
+          // We try to be extra safe here.
+          if (end.has_value() &&
+              cfd->user_comparator()->CompareWithoutTimestamp(
+                  end.value(), fmd.smallest.user_key()) < 0) {
+            continue;
+          }
+          TruncatedRangeDelIterator* range_tombstone_iter = nullptr;
+          list[num++] = cfd->table_cache()->NewIterator(
+              read_options, file_options_compactions,
+              cfd->internal_comparator(), fmd, range_del_agg,
+              c->mutable_cf_options()->prefix_extractor,
+              /*table_reader_ptr=*/nullptr,
+              /*file_read_hist=*/nullptr, TableReaderCaller::kCompaction,
+              /*arena=*/nullptr,
+              /*skip_filters=*/false,
+              /*level=*/static_cast<int>(c->level(which)),
+              MaxFileSizeForL0MetaPin(*c->mutable_cf_options()),
+              /*smallest_compaction_key=*/nullptr,
+              /*largest_compaction_key=*/nullptr,
+              /*allow_unprepared_value=*/false,
+              c->mutable_cf_options()->block_protection_bytes_per_key,
+              /*range_del_iter=*/&range_tombstone_iter);
+          range_tombstones.emplace_back(range_tombstone_iter, nullptr);
+        }
+      } else {
+        // Create concatenating iterator for the files from this level
+        TruncatedRangeDelIterator*** tombstone_iter_ptr = nullptr;
+        list[num++] = new LevelIterator(
+            cfd->table_cache(), read_options, file_options_compactions,
+            cfd->internal_comparator(), c->input_levels(which),
+            c->mutable_cf_options()->prefix_extractor,
+            /*should_sample=*/false,
+            /*no per level latency histogram=*/nullptr,
+            TableReaderCaller::kCompaction, /*skip_filters=*/false,
+            /*level=*/static_cast<int>(c->level(which)),
+            c->mutable_cf_options()->block_protection_bytes_per_key,
+            range_del_agg, c->boundaries(which), false, &tombstone_iter_ptr);
+        range_tombstones.emplace_back(nullptr, tombstone_iter_ptr);
+      }
+    }
+  }
+  assert(num <= space);
+  InternalIterator* result = NewCompactionMergingIterator(
+      &c->column_family_data()->internal_comparator(), list,
+      static_cast<int>(num), range_tombstones);
+  delete[] list;
+  return result;
+}
+
+Status VersionSet::GetMetadataForFile(uint64_t number, int* filelevel,
+                                      FileMetaData** meta,
+                                      ColumnFamilyData** cfd) {
+  for (auto cfd_iter : *column_family_set_) {
+    if (!cfd_iter->initialized()) {
+      continue;
+    }
+    Version* version = cfd_iter->current();
+    const auto* vstorage = version->storage_info();
+    for (int level = 0; level < vstorage->num_levels(); level++) {
+      for (const auto& file : vstorage->LevelFiles(level)) {
+        if (file->fd.GetNumber() == number) {
+          *meta = file;
+          *filelevel = level;
+          *cfd = cfd_iter;
+          return Status::OK();
+        }
+      }
+    }
+  }
+  return Status::NotFound("File not present in any level");
+}
+
+void VersionSet::GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) {
+  for (auto cfd : *column_family_set_) {
+    if (cfd->IsDropped() || !cfd->initialized()) {
+      continue;
+    }
+    for (int level = 0; level < cfd->NumberLevels(); level++) {
+      for (const auto& file :
+           cfd->current()->storage_info()->LevelFiles(level)) {
+        LiveFileMetaData filemetadata;
+        filemetadata.column_family_name = cfd->GetName();
+        uint32_t path_id = file->fd.GetPathId();
+        if (path_id < cfd->ioptions()->cf_paths.size()) {
+          filemetadata.db_path = cfd->ioptions()->cf_paths[path_id].path;
+        } else {
+          assert(!cfd->ioptions()->cf_paths.empty());
+          filemetadata.db_path = cfd->ioptions()->cf_paths.back().path;
+        }
+        filemetadata.directory = filemetadata.db_path;
+        const uint64_t file_number = file->fd.GetNumber();
+        filemetadata.name = MakeTableFileName("", file_number);
+        filemetadata.relative_filename = filemetadata.name.substr(1);
+        filemetadata.file_number = file_number;
+        filemetadata.level = level;
+        filemetadata.size = file->fd.GetFileSize();
+        filemetadata.smallestkey = file->smallest.user_key().ToString();
+        filemetadata.largestkey = file->largest.user_key().ToString();
+        filemetadata.smallest_seqno = file->fd.smallest_seqno;
+        filemetadata.largest_seqno = file->fd.largest_seqno;
+        filemetadata.num_reads_sampled =
+            file->stats.num_reads_sampled.load(std::memory_order_relaxed);
+        filemetadata.being_compacted = file->being_compacted;
+        filemetadata.num_entries = file->num_entries;
+        filemetadata.num_deletions = file->num_deletions;
+        filemetadata.oldest_blob_file_number = file->oldest_blob_file_number;
+        filemetadata.file_checksum = file->file_checksum;
+        filemetadata.file_checksum_func_name = file->file_checksum_func_name;
+        filemetadata.temperature = file->temperature;
+        filemetadata.oldest_ancester_time = file->TryGetOldestAncesterTime();
+        filemetadata.file_creation_time = file->TryGetFileCreationTime();
+        filemetadata.epoch_number = file->epoch_number;
+        metadata->push_back(filemetadata);
+      }
+    }
+  }
+}
+
+void VersionSet::GetObsoleteFiles(std::vector<ObsoleteFileInfo>* files,
+                                  std::vector<ObsoleteBlobFileInfo>* blob_files,
+                                  std::vector<std::string>* manifest_filenames,
+                                  uint64_t min_pending_output) {
+  assert(files);
+  assert(blob_files);
+  assert(manifest_filenames);
+  assert(files->empty());
+  assert(blob_files->empty());
+  assert(manifest_filenames->empty());
+
+  std::vector<ObsoleteFileInfo> pending_files;
+  for (auto& f : obsolete_files_) {
+    if (f.metadata->fd.GetNumber() < min_pending_output) {
+      files->emplace_back(std::move(f));
+    } else {
+      pending_files.emplace_back(std::move(f));
+    }
+  }
+  obsolete_files_.swap(pending_files);
+
+  std::vector<ObsoleteBlobFileInfo> pending_blob_files;
+  for (auto& blob_file : obsolete_blob_files_) {
+    if (blob_file.GetBlobFileNumber() < min_pending_output) {
+      blob_files->emplace_back(std::move(blob_file));
+    } else {
+      pending_blob_files.emplace_back(std::move(blob_file));
+    }
+  }
+  obsolete_blob_files_.swap(pending_blob_files);
+
+  obsolete_manifests_.swap(*manifest_filenames);
+}
+
+ColumnFamilyData* VersionSet::CreateColumnFamily(
+    const ColumnFamilyOptions& cf_options, const ReadOptions& read_options,
+    const VersionEdit* edit) {
+  assert(edit->is_column_family_add_);
+
+  MutableCFOptions dummy_cf_options;
+  Version* dummy_versions =
+      new Version(nullptr, this, file_options_, dummy_cf_options, io_tracer_);
+  // Ref() dummy version once so that later we can call Unref() to delete it
+  // by avoiding calling "delete" explicitly (~Version is private)
+  dummy_versions->Ref();
+  auto new_cfd = column_family_set_->CreateColumnFamily(
+      edit->column_family_name_, edit->column_family_, dummy_versions,
+      cf_options);
+
+  Version* v = new Version(new_cfd, this, file_options_,
+                           *new_cfd->GetLatestMutableCFOptions(), io_tracer_,
+                           current_version_number_++);
+
+  constexpr bool update_stats = false;
+
+  v->PrepareAppend(*new_cfd->GetLatestMutableCFOptions(), read_options,
+                   update_stats);
+
+  AppendVersion(new_cfd, v);
+  // GetLatestMutableCFOptions() is safe here without mutex since the
+  // cfd is not available to client
+  new_cfd->CreateNewMemtable(*new_cfd->GetLatestMutableCFOptions(),
+                             LastSequence());
+  new_cfd->SetLogNumber(edit->log_number_);
+  return new_cfd;
+}
+
+uint64_t VersionSet::GetNumLiveVersions(Version* dummy_versions) {
+  uint64_t count = 0;
+  for (Version* v = dummy_versions->next_; v != dummy_versions; v = v->next_) {
+    count++;
+  }
+  return count;
+}
+
+uint64_t VersionSet::GetTotalSstFilesSize(Version* dummy_versions) {
+  std::unordered_set<uint64_t> unique_files;
+  uint64_t total_files_size = 0;
+  for (Version* v = dummy_versions->next_; v != dummy_versions; v = v->next_) {
+    VersionStorageInfo* storage_info = v->storage_info();
+    for (int level = 0; level < storage_info->num_levels_; level++) {
+      for (const auto& file_meta : storage_info->LevelFiles(level)) {
+        if (unique_files.find(file_meta->fd.packed_number_and_path_id) ==
+            unique_files.end()) {
+          unique_files.insert(file_meta->fd.packed_number_and_path_id);
+          total_files_size += file_meta->fd.GetFileSize();
+        }
+      }
+    }
+  }
+  return total_files_size;
+}
+
+uint64_t VersionSet::GetTotalBlobFileSize(Version* dummy_versions) {
+  std::unordered_set<uint64_t> unique_blob_files;
+
+  uint64_t all_versions_blob_file_size = 0;
+
+  for (auto* v = dummy_versions->next_; v != dummy_versions; v = v->next_) {
+    // iterate all the versions
+    const auto* vstorage = v->storage_info();
+    assert(vstorage);
+
+    const auto& blob_files = vstorage->GetBlobFiles();
+
+    for (const auto& meta : blob_files) {
+      assert(meta);
+
+      const uint64_t blob_file_number = meta->GetBlobFileNumber();
+
+      if (unique_blob_files.find(blob_file_number) == unique_blob_files.end()) {
+        // find Blob file that has not been counted
+        unique_blob_files.insert(blob_file_number);
+        all_versions_blob_file_size += meta->GetBlobFileSize();
+      }
+    }
+  }
+
+  return all_versions_blob_file_size;
+}
+
+Status VersionSet::VerifyFileMetadata(const ReadOptions& read_options,
+                                      ColumnFamilyData* cfd,
+                                      const std::string& fpath, int level,
+                                      const FileMetaData& meta) {
+  uint64_t fsize = 0;
+  Status status = fs_->GetFileSize(fpath, IOOptions(), &fsize, nullptr);
+  if (status.ok()) {
+    if (fsize != meta.fd.GetFileSize()) {
+      status = Status::Corruption("File size mismatch: " + fpath);
+    }
+  }
+  if (status.ok() && db_options_->verify_sst_unique_id_in_manifest) {
+    assert(cfd);
+    TableCache* table_cache = cfd->table_cache();
+    assert(table_cache);
+
+    const MutableCFOptions* const cf_opts = cfd->GetLatestMutableCFOptions();
+    assert(cf_opts);
+    std::shared_ptr<const SliceTransform> pe = cf_opts->prefix_extractor;
+    size_t max_sz_for_l0_meta_pin = MaxFileSizeForL0MetaPin(*cf_opts);
+
+    const FileOptions& file_opts = file_options();
+
+    Version* version = cfd->current();
+    assert(version);
+    VersionStorageInfo& storage_info = version->storage_info_;
+    const InternalKeyComparator* icmp = storage_info.InternalComparator();
+    assert(icmp);
+
+    InternalStats* internal_stats = cfd->internal_stats();
+
+    TableCache::TypedHandle* handle = nullptr;
+    FileMetaData meta_copy = meta;
+    status = table_cache->FindTable(
+        read_options, file_opts, *icmp, meta_copy, &handle,
+        cf_opts->block_protection_bytes_per_key, pe,
+        /*no_io=*/false, /*record_read_stats=*/true,
+        internal_stats->GetFileReadHist(level), false, level,
+        /*prefetch_index_and_filter_in_cache*/ false, max_sz_for_l0_meta_pin,
+        meta_copy.temperature);
+    if (handle) {
+      table_cache->get_cache().Release(handle);
+    }
+  }
+  return status;
+}
+
+ReactiveVersionSet::ReactiveVersionSet(
+    const std::string& dbname, const ImmutableDBOptions* _db_options,
+    const FileOptions& _file_options, Cache* table_cache,
+    WriteBufferManager* write_buffer_manager, WriteController* write_controller,
+    const std::shared_ptr<IOTracer>& io_tracer)
+    : VersionSet(dbname, _db_options, _file_options, table_cache,
+                 write_buffer_manager, write_controller,
+                 /*block_cache_tracer=*/nullptr, io_tracer, /*db_id*/ "",
+                 /*db_session_id*/ "") {}
+
+ReactiveVersionSet::~ReactiveVersionSet() {}
+
+Status ReactiveVersionSet::Recover(
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    std::unique_ptr<log::FragmentBufferedReader>* manifest_reader,
+    std::unique_ptr<log::Reader::Reporter>* manifest_reporter,
+    std::unique_ptr<Status>* manifest_reader_status) {
+  assert(manifest_reader != nullptr);
+  assert(manifest_reporter != nullptr);
+  assert(manifest_reader_status != nullptr);
+
+  manifest_reader_status->reset(new Status());
+  manifest_reporter->reset(new LogReporter());
+  static_cast_with_check<LogReporter>(manifest_reporter->get())->status =
+      manifest_reader_status->get();
+  Status s = MaybeSwitchManifest(manifest_reporter->get(), manifest_reader);
+  if (!s.ok()) {
+    return s;
+  }
+  log::Reader* reader = manifest_reader->get();
+  assert(reader);
+
+  manifest_tailer_.reset(new ManifestTailer(
+      column_families, const_cast<ReactiveVersionSet*>(this), io_tracer_,
+      read_options_, EpochNumberRequirement::kMightMissing));
+
+  manifest_tailer_->Iterate(*reader, manifest_reader_status->get());
+
+  s = manifest_tailer_->status();
+  if (s.ok()) {
+    RecoverEpochNumbers();
+  }
+  return s;
+}
+
+Status ReactiveVersionSet::ReadAndApply(
+    InstrumentedMutex* mu,
+    std::unique_ptr<log::FragmentBufferedReader>* manifest_reader,
+    Status* manifest_read_status,
+    std::unordered_set<ColumnFamilyData*>* cfds_changed) {
+  assert(manifest_reader != nullptr);
+  assert(cfds_changed != nullptr);
+  mu->AssertHeld();
+
+  Status s;
+  log::Reader* reader = manifest_reader->get();
+  assert(reader);
+  s = MaybeSwitchManifest(reader->GetReporter(), manifest_reader);
+  if (!s.ok()) {
+    return s;
+  }
+  manifest_tailer_->Iterate(*(manifest_reader->get()), manifest_read_status);
+  s = manifest_tailer_->status();
+  if (s.ok()) {
+    *cfds_changed = std::move(manifest_tailer_->GetUpdatedColumnFamilies());
+  }
+
+  return s;
+}
+
+Status ReactiveVersionSet::MaybeSwitchManifest(
+    log::Reader::Reporter* reporter,
+    std::unique_ptr<log::FragmentBufferedReader>* manifest_reader) {
+  assert(manifest_reader != nullptr);
+  Status s;
+  std::string manifest_path;
+  s = GetCurrentManifestPath(dbname_, fs_.get(), &manifest_path,
+                             &manifest_file_number_);
+  if (!s.ok()) {
+    return s;
+  }
+  std::unique_ptr<FSSequentialFile> manifest_file;
+  if (manifest_reader->get() != nullptr &&
+      manifest_reader->get()->file()->file_name() == manifest_path) {
+    // CURRENT points to the same MANIFEST as before, no need to switch
+    // MANIFEST.
+    return s;
+  }
+  assert(nullptr == manifest_reader->get() ||
+         manifest_reader->get()->file()->file_name() != manifest_path);
+  s = fs_->FileExists(manifest_path, IOOptions(), nullptr);
+  if (s.IsNotFound()) {
+    return Status::TryAgain(
+        "The primary may have switched to a new MANIFEST and deleted the old "
+        "one.");
+  } else if (!s.ok()) {
+    return s;
+  }
+  TEST_SYNC_POINT(
+      "ReactiveVersionSet::MaybeSwitchManifest:"
+      "AfterGetCurrentManifestPath:0");
+  TEST_SYNC_POINT(
+      "ReactiveVersionSet::MaybeSwitchManifest:"
+      "AfterGetCurrentManifestPath:1");
+  // The primary can also delete the MANIFEST while the secondary is reading
+  // it. This is OK on POSIX. For other file systems, maybe create a hard link
+  // to MANIFEST. The hard link should be cleaned up later by the secondary.
+  s = fs_->NewSequentialFile(manifest_path,
+                             fs_->OptimizeForManifestRead(file_options_),
+                             &manifest_file, nullptr);
+  std::unique_ptr<SequentialFileReader> manifest_file_reader;
+  if (s.ok()) {
+    manifest_file_reader.reset(new SequentialFileReader(
+        std::move(manifest_file), manifest_path,
+        db_options_->log_readahead_size, io_tracer_, db_options_->listeners));
+    manifest_reader->reset(new log::FragmentBufferedReader(
+        nullptr, std::move(manifest_file_reader), reporter, true /* checksum */,
+        0 /* log_number */));
+    ROCKS_LOG_INFO(db_options_->info_log, "Switched to new manifest: %s\n",
+                   manifest_path.c_str());
+    if (manifest_tailer_) {
+      manifest_tailer_->PrepareToReadNewManifest();
+    }
+  } else if (s.IsPathNotFound()) {
+    // This can happen if the primary switches to a new MANIFEST after the
+    // secondary reads the CURRENT file but before the secondary actually tries
+    // to open the MANIFEST.
+    s = Status::TryAgain(
+        "The primary may have switched to a new MANIFEST and deleted the old "
+        "one.");
+  }
+  return s;
+}
+
+#ifndef NDEBUG
+uint64_t ReactiveVersionSet::TEST_read_edits_in_atomic_group() const {
+  assert(manifest_tailer_);
+  return manifest_tailer_->GetReadBuffer().TEST_read_edits_in_atomic_group();
+}
+#endif  // !NDEBUG
+
+std::vector<VersionEdit>& ReactiveVersionSet::replay_buffer() {
+  assert(manifest_tailer_);
+  return manifest_tailer_->GetReadBuffer().replay_buffer();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_set.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_set.h
new file mode 100644
index 0000000000..25ad365837
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_set.h
@@ -0,0 +1,1714 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// The representation of a DBImpl consists of a set of Versions.  The
+// newest version is called "current".  Older versions may be kept
+// around to provide a consistent view to live iterators.
+//
+// Each Version keeps track of a set of table files per level, as well as a
+// set of blob files. The entire set of versions is maintained in a
+// VersionSet.
+//
+// Version,VersionSet are thread-compatible, but require external
+// synchronization on all accesses.
+
+#pragma once
+#include <atomic>
+#include <deque>
+#include <limits>
+#include <map>
+#include <memory>
+#include <optional>
+#include <set>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "cache/cache_helpers.h"
+#include "db/blob/blob_file_meta.h"
+#include "db/blob/blob_index.h"
+#include "db/column_family.h"
+#include "db/compaction/compaction.h"
+#include "db/compaction/compaction_picker.h"
+#include "db/dbformat.h"
+#include "db/file_indexer.h"
+#include "db/log_reader.h"
+#include "db/range_del_aggregator.h"
+#include "db/read_callback.h"
+#include "db/table_cache.h"
+#include "db/version_builder.h"
+#include "db/version_edit.h"
+#include "db/write_controller.h"
+#include "env/file_system_tracer.h"
+#if USE_COROUTINES
+#include "folly/experimental/coro/BlockingWait.h"
+#include "folly/experimental/coro/Collect.h"
+#endif
+#include "monitoring/instrumented_mutex.h"
+#include "options/db_options.h"
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_checksum.h"
+#include "table/get_context.h"
+#include "table/multiget_context.h"
+#include "trace_replay/block_cache_tracer.h"
+#include "util/autovector.h"
+#include "util/coro_utils.h"
+#include "util/hash_containers.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace log {
+class Writer;
+}
+
+class BlobIndex;
+class Compaction;
+class LogBuffer;
+class LookupKey;
+class MemTable;
+class Version;
+class VersionSet;
+class WriteBufferManager;
+class MergeContext;
+class ColumnFamilySet;
+class MergeIteratorBuilder;
+class SystemClock;
+class ManifestTailer;
+class FilePickerMultiGet;
+
+// VersionEdit is always supposed to be valid and it is used to point at
+// entries in Manifest. Ideally it should not be used as a container to
+// carry around few of its fields as function params because it can cause
+// readers to think it's a valid entry from Manifest. To avoid that confusion
+// introducing VersionEditParams to simply carry around multiple VersionEdit
+// params. It need not point to a valid record in Manifest.
+using VersionEditParams = VersionEdit;
+
+// Return the smallest index i such that file_level.files[i]->largest >= key.
+// Return file_level.num_files if there is no such file.
+// REQUIRES: "file_level.files" contains a sorted list of
+// non-overlapping files.
+extern int FindFile(const InternalKeyComparator& icmp,
+                    const LevelFilesBrief& file_level, const Slice& key);
+
+// Returns true iff some file in "files" overlaps the user key range
+// [*smallest,*largest].
+// smallest==nullptr represents a key smaller than all keys in the DB.
+// largest==nullptr represents a key largest than all keys in the DB.
+// REQUIRES: If disjoint_sorted_files, file_level.files[]
+// contains disjoint ranges in sorted order.
+extern bool SomeFileOverlapsRange(const InternalKeyComparator& icmp,
+                                  bool disjoint_sorted_files,
+                                  const LevelFilesBrief& file_level,
+                                  const Slice* smallest_user_key,
+                                  const Slice* largest_user_key);
+
+// Generate LevelFilesBrief from vector<FdWithKeyRange*>
+// Would copy smallest_key and largest_key data to sequential memory
+// arena: Arena used to allocate the memory
+extern void DoGenerateLevelFilesBrief(LevelFilesBrief* file_level,
+                                      const std::vector<FileMetaData*>& files,
+                                      Arena* arena);
+enum EpochNumberRequirement {
+  kMightMissing,
+  kMustPresent,
+};
+
+// Information of the storage associated with each Version, including number of
+// levels of LSM tree, files information at each level, files marked for
+// compaction, blob files, etc.
+class VersionStorageInfo {
+ public:
+  VersionStorageInfo(const InternalKeyComparator* internal_comparator,
+                     const Comparator* user_comparator, int num_levels,
+                     CompactionStyle compaction_style,
+                     VersionStorageInfo* src_vstorage,
+                     bool _force_consistency_checks,
+                     EpochNumberRequirement epoch_number_requirement =
+                         EpochNumberRequirement::kMustPresent);
+  // No copying allowed
+  VersionStorageInfo(const VersionStorageInfo&) = delete;
+  void operator=(const VersionStorageInfo&) = delete;
+  ~VersionStorageInfo();
+
+  void Reserve(int level, size_t size) { files_[level].reserve(size); }
+
+  void AddFile(int level, FileMetaData* f);
+
+  // Resize/Initialize the space for compact_cursor_
+  void ResizeCompactCursors(int level) {
+    compact_cursor_.resize(level, InternalKey());
+  }
+
+  const std::vector<InternalKey>& GetCompactCursors() const {
+    return compact_cursor_;
+  }
+
+  // REQUIRES: ResizeCompactCursors has been called
+  void AddCursorForOneLevel(int level,
+                            const InternalKey& smallest_uncompacted_key) {
+    compact_cursor_[level] = smallest_uncompacted_key;
+  }
+
+  // REQUIRES: lock is held
+  // Update the compact cursor and advance the file index using increment
+  // so that it can point to the next cursor (increment means the number of
+  // input files in this level of the last compaction)
+  const InternalKey& GetNextCompactCursor(int level, size_t increment) {
+    int cmp_idx = next_file_to_compact_by_size_[level] + (int)increment;
+    assert(cmp_idx <= (int)files_by_compaction_pri_[level].size());
+    // TODO(zichen): may need to update next_file_to_compact_by_size_
+    // for parallel compaction.
+    InternalKey new_cursor;
+    if (cmp_idx >= (int)files_by_compaction_pri_[level].size()) {
+      cmp_idx = 0;
+    }
+    // TODO(zichen): rethink if this strategy gives us some good guarantee
+    return files_[level][files_by_compaction_pri_[level][cmp_idx]]->smallest;
+  }
+
+  void ReserveBlob(size_t size) { blob_files_.reserve(size); }
+
+  void AddBlobFile(std::shared_ptr<BlobFileMetaData> blob_file_meta);
+
+  void PrepareForVersionAppend(const ImmutableOptions& immutable_options,
+                               const MutableCFOptions& mutable_cf_options);
+
+  // REQUIRES: PrepareForVersionAppend has been called
+  void SetFinalized();
+
+  // Update the accumulated stats from a file-meta.
+  void UpdateAccumulatedStats(FileMetaData* file_meta);
+
+  // Decrease the current stat from a to-be-deleted file-meta
+  void RemoveCurrentStats(FileMetaData* file_meta);
+
+  // Updates internal structures that keep track of compaction scores
+  // We use compaction scores to figure out which compaction to do next
+  // REQUIRES: db_mutex held!!
+  // TODO find a better way to pass compaction_options_fifo.
+  void ComputeCompactionScore(const ImmutableOptions& immutable_options,
+                              const MutableCFOptions& mutable_cf_options);
+
+  // Estimate est_comp_needed_bytes_
+  void EstimateCompactionBytesNeeded(
+      const MutableCFOptions& mutable_cf_options);
+
+  // This computes files_marked_for_compaction_ and is called by
+  // ComputeCompactionScore()
+  void ComputeFilesMarkedForCompaction();
+
+  // This computes ttl_expired_files_ and is called by
+  // ComputeCompactionScore()
+  void ComputeExpiredTtlFiles(const ImmutableOptions& ioptions,
+                              const uint64_t ttl);
+
+  // This computes files_marked_for_periodic_compaction_ and is called by
+  // ComputeCompactionScore()
+  void ComputeFilesMarkedForPeriodicCompaction(
+      const ImmutableOptions& ioptions,
+      const uint64_t periodic_compaction_seconds);
+
+  // This computes bottommost_files_marked_for_compaction_ and is called by
+  // ComputeCompactionScore() or UpdateOldestSnapshot().
+  //
+  // Among bottommost files (assumes they've already been computed), marks the
+  // ones that have keys that would be eliminated if recompacted, according to
+  // the seqnum of the oldest existing snapshot. Must be called every time
+  // oldest snapshot changes as that is when bottom-level files can become
+  // eligible for compaction.
+  //
+  // REQUIRES: DB mutex held
+  void ComputeBottommostFilesMarkedForCompaction();
+
+  // This computes files_marked_for_forced_blob_gc_ and is called by
+  // ComputeCompactionScore()
+  //
+  // REQUIRES: DB mutex held
+  void ComputeFilesMarkedForForcedBlobGC(
+      double blob_garbage_collection_age_cutoff,
+      double blob_garbage_collection_force_threshold);
+
+  bool level0_non_overlapping() const { return level0_non_overlapping_; }
+
+  // Updates the oldest snapshot and related internal state, like the bottommost
+  // files marked for compaction.
+  // REQUIRES: DB mutex held
+  void UpdateOldestSnapshot(SequenceNumber oldest_snapshot_seqnum);
+
+  int MaxInputLevel() const;
+  int MaxOutputLevel(bool allow_ingest_behind) const;
+
+  // Return level number that has idx'th highest score
+  int CompactionScoreLevel(int idx) const { return compaction_level_[idx]; }
+
+  // Return idx'th highest score
+  double CompactionScore(int idx) const { return compaction_score_[idx]; }
+
+  void GetOverlappingInputs(
+      int level, const InternalKey* begin,  // nullptr means before all keys
+      const InternalKey* end,               // nullptr means after all keys
+      std::vector<FileMetaData*>* inputs,
+      int hint_index = -1,        // index of overlap file
+      int* file_index = nullptr,  // return index of overlap file
+      bool expand_range = true,   // if set, returns files which overlap the
+                                  // range and overlap each other. If false,
+                                  // then just files intersecting the range
+      InternalKey** next_smallest = nullptr)  // if non-null, returns the
+      const;  // smallest key of next file not included
+  void GetCleanInputsWithinInterval(
+      int level, const InternalKey* begin,  // nullptr means before all keys
+      const InternalKey* end,               // nullptr means after all keys
+      std::vector<FileMetaData*>* inputs,
+      int hint_index = -1,        // index of overlap file
+      int* file_index = nullptr)  // return index of overlap file
+      const;
+
+  void GetOverlappingInputsRangeBinarySearch(
+      int level,                 // level > 0
+      const InternalKey* begin,  // nullptr means before all keys
+      const InternalKey* end,    // nullptr means after all keys
+      std::vector<FileMetaData*>* inputs,
+      int hint_index,                // index of overlap file
+      int* file_index,               // return index of overlap file
+      bool within_interval = false,  // if set, force the inputs within interval
+      InternalKey** next_smallest = nullptr)  // if non-null, returns the
+      const;  // smallest key of next file not included
+
+  // Returns true iff some file in the specified level overlaps
+  // some part of [*smallest_user_key,*largest_user_key].
+  // smallest_user_key==NULL represents a key smaller than all keys in the DB.
+  // largest_user_key==NULL represents a key largest than all keys in the DB.
+  bool OverlapInLevel(int level, const Slice* smallest_user_key,
+                      const Slice* largest_user_key);
+
+  // Returns true iff the first or last file in inputs contains
+  // an overlapping user key to the file "just outside" of it (i.e.
+  // just after the last file, or just before the first file)
+  // REQUIRES: "*inputs" is a sorted list of non-overlapping files
+  bool HasOverlappingUserKey(const std::vector<FileMetaData*>* inputs,
+                             int level);
+
+  int num_levels() const { return num_levels_; }
+
+  // REQUIRES: PrepareForVersionAppend has been called
+  int num_non_empty_levels() const {
+    assert(finalized_);
+    return num_non_empty_levels_;
+  }
+
+  // REQUIRES: PrepareForVersionAppend has been called
+  // This may or may not return number of level files. It is to keep backward
+  // compatible behavior in universal compaction.
+  int l0_delay_trigger_count() const { return l0_delay_trigger_count_; }
+
+  void set_l0_delay_trigger_count(int v) { l0_delay_trigger_count_ = v; }
+
+  // REQUIRES: This version has been saved (see VersionBuilder::SaveTo)
+  int NumLevelFiles(int level) const {
+    assert(finalized_);
+    return static_cast<int>(files_[level].size());
+  }
+
+  // Return the combined file size of all files at the specified level.
+  uint64_t NumLevelBytes(int level) const;
+
+  // REQUIRES: This version has been saved (see VersionBuilder::SaveTo)
+  const std::vector<FileMetaData*>& LevelFiles(int level) const {
+    return files_[level];
+  }
+
+  bool HasMissingEpochNumber() const;
+  uint64_t GetMaxEpochNumberOfFiles() const;
+  EpochNumberRequirement GetEpochNumberRequirement() const {
+    return epoch_number_requirement_;
+  }
+  void SetEpochNumberRequirement(
+      EpochNumberRequirement epoch_number_requirement) {
+    epoch_number_requirement_ = epoch_number_requirement;
+  }
+  void RecoverEpochNumbers(ColumnFamilyData* cfd);
+
+  class FileLocation {
+   public:
+    FileLocation() = default;
+    FileLocation(int level, size_t position)
+        : level_(level), position_(position) {}
+
+    int GetLevel() const { return level_; }
+    size_t GetPosition() const { return position_; }
+
+    bool IsValid() const { return level_ >= 0; }
+
+    bool operator==(const FileLocation& rhs) const {
+      return level_ == rhs.level_ && position_ == rhs.position_;
+    }
+
+    bool operator!=(const FileLocation& rhs) const { return !(*this == rhs); }
+
+    static FileLocation Invalid() { return FileLocation(); }
+
+   private:
+    int level_ = -1;
+    size_t position_ = 0;
+  };
+
+  // REQUIRES: PrepareForVersionAppend has been called
+  FileLocation GetFileLocation(uint64_t file_number) const {
+    const auto it = file_locations_.find(file_number);
+
+    if (it == file_locations_.end()) {
+      return FileLocation::Invalid();
+    }
+
+    assert(it->second.GetLevel() < num_levels_);
+    assert(it->second.GetPosition() < files_[it->second.GetLevel()].size());
+    assert(files_[it->second.GetLevel()][it->second.GetPosition()]);
+    assert(files_[it->second.GetLevel()][it->second.GetPosition()]
+               ->fd.GetNumber() == file_number);
+
+    return it->second;
+  }
+
+  // REQUIRES: PrepareForVersionAppend has been called
+  FileMetaData* GetFileMetaDataByNumber(uint64_t file_number) const {
+    auto location = GetFileLocation(file_number);
+
+    if (!location.IsValid()) {
+      return nullptr;
+    }
+
+    return files_[location.GetLevel()][location.GetPosition()];
+  }
+
+  // REQUIRES: This version has been saved (see VersionBuilder::SaveTo)
+  using BlobFiles = std::vector<std::shared_ptr<BlobFileMetaData>>;
+  const BlobFiles& GetBlobFiles() const { return blob_files_; }
+
+  // REQUIRES: This version has been saved (see VersionBuilder::SaveTo)
+  BlobFiles::const_iterator GetBlobFileMetaDataLB(
+      uint64_t blob_file_number) const;
+
+  // REQUIRES: This version has been saved (see VersionBuilder::SaveTo)
+  std::shared_ptr<BlobFileMetaData> GetBlobFileMetaData(
+      uint64_t blob_file_number) const {
+    const auto it = GetBlobFileMetaDataLB(blob_file_number);
+
+    assert(it == blob_files_.end() || *it);
+
+    if (it != blob_files_.end() &&
+        (*it)->GetBlobFileNumber() == blob_file_number) {
+      return *it;
+    }
+
+    return std::shared_ptr<BlobFileMetaData>();
+  }
+
+  // REQUIRES: This version has been saved (see VersionBuilder::SaveTo)
+  struct BlobStats {
+    uint64_t total_file_size = 0;
+    uint64_t total_garbage_size = 0;
+    double space_amp = 0.0;
+  };
+
+  BlobStats GetBlobStats() const {
+    uint64_t total_file_size = 0;
+    uint64_t total_garbage_size = 0;
+
+    for (const auto& meta : blob_files_) {
+      assert(meta);
+
+      total_file_size += meta->GetBlobFileSize();
+      total_garbage_size += meta->GetGarbageBlobBytes();
+    }
+
+    double space_amp = 0.0;
+    if (total_file_size > total_garbage_size) {
+      space_amp = static_cast<double>(total_file_size) /
+                  (total_file_size - total_garbage_size);
+    }
+
+    return BlobStats{total_file_size, total_garbage_size, space_amp};
+  }
+
+  const ROCKSDB_NAMESPACE::LevelFilesBrief& LevelFilesBrief(int level) const {
+    assert(level < static_cast<int>(level_files_brief_.size()));
+    return level_files_brief_[level];
+  }
+
+  // REQUIRES: PrepareForVersionAppend has been called
+  const std::vector<int>& FilesByCompactionPri(int level) const {
+    assert(finalized_);
+    return files_by_compaction_pri_[level];
+  }
+
+  // REQUIRES: ComputeCompactionScore has been called
+  // REQUIRES: DB mutex held during access
+  const autovector<std::pair<int, FileMetaData*>>& FilesMarkedForCompaction()
+      const {
+    assert(finalized_);
+    return files_marked_for_compaction_;
+  }
+
+  void TEST_AddFileMarkedForCompaction(int level, FileMetaData* f) {
+    f->marked_for_compaction = true;
+    files_marked_for_compaction_.emplace_back(level, f);
+  }
+
+  // REQUIRES: ComputeCompactionScore has been called
+  // REQUIRES: DB mutex held during access
+  const autovector<std::pair<int, FileMetaData*>>& ExpiredTtlFiles() const {
+    assert(finalized_);
+    return expired_ttl_files_;
+  }
+
+  // REQUIRES: ComputeCompactionScore has been called
+  // REQUIRES: DB mutex held during access
+  const autovector<std::pair<int, FileMetaData*>>&
+  FilesMarkedForPeriodicCompaction() const {
+    assert(finalized_);
+    return files_marked_for_periodic_compaction_;
+  }
+
+  void TEST_AddFileMarkedForPeriodicCompaction(int level, FileMetaData* f) {
+    files_marked_for_periodic_compaction_.emplace_back(level, f);
+  }
+
+  // REQUIRES: ComputeCompactionScore has been called
+  // REQUIRES: DB mutex held during access
+  const autovector<std::pair<int, FileMetaData*>>&
+  BottommostFilesMarkedForCompaction() const {
+    assert(finalized_);
+    return bottommost_files_marked_for_compaction_;
+  }
+
+  // REQUIRES: ComputeCompactionScore has been called
+  // REQUIRES: DB mutex held during access
+  const autovector<std::pair<int, FileMetaData*>>& FilesMarkedForForcedBlobGC()
+      const {
+    assert(finalized_);
+    return files_marked_for_forced_blob_gc_;
+  }
+
+  int base_level() const { return base_level_; }
+  double level_multiplier() const { return level_multiplier_; }
+
+  // REQUIRES: lock is held
+  // Set the index that is used to offset into files_by_compaction_pri_ to find
+  // the next compaction candidate file.
+  void SetNextCompactionIndex(int level, int index) {
+    next_file_to_compact_by_size_[level] = index;
+  }
+
+  // REQUIRES: lock is held
+  int NextCompactionIndex(int level) const {
+    return next_file_to_compact_by_size_[level];
+  }
+
+  // REQUIRES: PrepareForVersionAppend has been called
+  const FileIndexer& file_indexer() const {
+    assert(finalized_);
+    return file_indexer_;
+  }
+
+  // Only the first few entries of files_by_compaction_pri_ are sorted.
+  // There is no need to sort all the files because it is likely
+  // that on a running system, we need to look at only the first
+  // few largest files because a new version is created every few
+  // seconds/minutes (because of concurrent compactions).
+  static const size_t kNumberFilesToSort = 50;
+
+  // Return a human-readable short (single-line) summary of the number
+  // of files per level.  Uses *scratch as backing store.
+  struct LevelSummaryStorage {
+    char buffer[1000];
+  };
+  struct FileSummaryStorage {
+    char buffer[3000];
+  };
+  const char* LevelSummary(LevelSummaryStorage* scratch) const;
+  // Return a human-readable short (single-line) summary of files
+  // in a specified level.  Uses *scratch as backing store.
+  const char* LevelFileSummary(FileSummaryStorage* scratch, int level) const;
+
+  // Return the maximum overlapping data (in bytes) at next level for any
+  // file at a level >= 1.
+  uint64_t MaxNextLevelOverlappingBytes();
+
+  // Return a human readable string that describes this version's contents.
+  std::string DebugString(bool hex = false) const;
+
+  uint64_t GetAverageValueSize() const {
+    if (accumulated_num_non_deletions_ == 0) {
+      return 0;
+    }
+    assert(accumulated_raw_key_size_ + accumulated_raw_value_size_ > 0);
+    assert(accumulated_file_size_ > 0);
+    return accumulated_raw_value_size_ / accumulated_num_non_deletions_ *
+           accumulated_file_size_ /
+           (accumulated_raw_key_size_ + accumulated_raw_value_size_);
+  }
+
+  uint64_t GetEstimatedActiveKeys() const;
+
+  double GetEstimatedCompressionRatioAtLevel(int level) const;
+
+  // re-initializes the index that is used to offset into
+  // files_by_compaction_pri_
+  // to find the next compaction candidate file.
+  void ResetNextCompactionIndex(int level) {
+    next_file_to_compact_by_size_[level] = 0;
+  }
+
+  const InternalKeyComparator* InternalComparator() const {
+    return internal_comparator_;
+  }
+
+  // Returns maximum total bytes of data on a given level.
+  uint64_t MaxBytesForLevel(int level) const;
+
+  // Returns an estimate of the amount of live data in bytes.
+  uint64_t EstimateLiveDataSize() const;
+
+  uint64_t estimated_compaction_needed_bytes() const {
+    return estimated_compaction_needed_bytes_;
+  }
+
+  void TEST_set_estimated_compaction_needed_bytes(uint64_t v) {
+    estimated_compaction_needed_bytes_ = v;
+  }
+
+  bool force_consistency_checks() const { return force_consistency_checks_; }
+
+  SequenceNumber bottommost_files_mark_threshold() const {
+    return bottommost_files_mark_threshold_;
+  }
+
+  // Returns whether any key in [`smallest_key`, `largest_key`] could appear in
+  // an older L0 file than `last_l0_idx` or in a greater level than `last_level`
+  //
+  // @param last_level Level after which we check for overlap
+  // @param last_l0_idx If `last_level == 0`, index of L0 file after which we
+  //    check for overlap; otherwise, must be -1
+  bool RangeMightExistAfterSortedRun(const Slice& smallest_user_key,
+                                     const Slice& largest_user_key,
+                                     int last_level, int last_l0_idx);
+
+ private:
+  void ComputeCompensatedSizes();
+  void UpdateNumNonEmptyLevels();
+  void CalculateBaseBytes(const ImmutableOptions& ioptions,
+                          const MutableCFOptions& options);
+  void UpdateFilesByCompactionPri(const ImmutableOptions& immutable_options,
+                                  const MutableCFOptions& mutable_cf_options);
+
+  void GenerateFileIndexer() {
+    file_indexer_.UpdateIndex(&arena_, num_non_empty_levels_, files_);
+  }
+
+  void GenerateLevelFilesBrief();
+  void GenerateLevel0NonOverlapping();
+  void GenerateBottommostFiles();
+  void GenerateFileLocationIndex();
+
+  const InternalKeyComparator* internal_comparator_;
+  const Comparator* user_comparator_;
+  int num_levels_;            // Number of levels
+  int num_non_empty_levels_;  // Number of levels. Any level larger than it
+                              // is guaranteed to be empty.
+  // Per-level max bytes
+  std::vector<uint64_t> level_max_bytes_;
+
+  // A short brief metadata of files per level
+  autovector<ROCKSDB_NAMESPACE::LevelFilesBrief> level_files_brief_;
+  FileIndexer file_indexer_;
+  Arena arena_;  // Used to allocate space for file_levels_
+
+  CompactionStyle compaction_style_;
+
+  // List of files per level, files in each level are arranged
+  // in increasing order of keys
+  std::vector<FileMetaData*>* files_;
+
+  // Map of all table files in version. Maps file number to (level, position on
+  // level).
+  using FileLocations = UnorderedMap<uint64_t, FileLocation>;
+  FileLocations file_locations_;
+
+  // Vector of blob files in version sorted by blob file number.
+  BlobFiles blob_files_;
+
+  // Level that L0 data should be compacted to. All levels < base_level_ should
+  // be empty. -1 if it is not level-compaction so it's not applicable.
+  int base_level_;
+
+  // Applies to level compaction when
+  // `level_compaction_dynamic_level_bytes=true`. All non-empty levels <=
+  // lowest_unnecessary_level_ are not needed and will be drained automatically.
+  // -1 if there is no unnecessary level,
+  int lowest_unnecessary_level_;
+
+  double level_multiplier_;
+
+  // A list for the same set of files that are stored in files_,
+  // but files in each level are now sorted based on file
+  // size. The file with the largest size is at the front.
+  // This vector stores the index of the file from files_.
+  std::vector<std::vector<int>> files_by_compaction_pri_;
+
+  // If true, means that files in L0 have keys with non overlapping ranges
+  bool level0_non_overlapping_;
+
+  // An index into files_by_compaction_pri_ that specifies the first
+  // file that is not yet compacted
+  std::vector<int> next_file_to_compact_by_size_;
+
+  // Only the first few entries of files_by_compaction_pri_ are sorted.
+  // There is no need to sort all the files because it is likely
+  // that on a running system, we need to look at only the first
+  // few largest files because a new version is created every few
+  // seconds/minutes (because of concurrent compactions).
+  static const size_t number_of_files_to_sort_ = 50;
+
+  // This vector contains list of files marked for compaction and also not
+  // currently being compacted. It is protected by DB mutex. It is calculated in
+  // ComputeCompactionScore()
+  autovector<std::pair<int, FileMetaData*>> files_marked_for_compaction_;
+
+  autovector<std::pair<int, FileMetaData*>> expired_ttl_files_;
+
+  autovector<std::pair<int, FileMetaData*>>
+      files_marked_for_periodic_compaction_;
+
+  // These files are considered bottommost because none of their keys can exist
+  // at lower levels. They are not necessarily all in the same level. The marked
+  // ones are eligible for compaction because they contain duplicate key
+  // versions that are no longer protected by snapshot. These variables are
+  // protected by DB mutex and are calculated in `GenerateBottommostFiles()` and
+  // `ComputeBottommostFilesMarkedForCompaction()`.
+  autovector<std::pair<int, FileMetaData*>> bottommost_files_;
+  autovector<std::pair<int, FileMetaData*>>
+      bottommost_files_marked_for_compaction_;
+
+  autovector<std::pair<int, FileMetaData*>> files_marked_for_forced_blob_gc_;
+
+  // Threshold for needing to mark another bottommost file. Maintain it so we
+  // can quickly check when releasing a snapshot whether more bottommost files
+  // became eligible for compaction. It's defined as the min of the max nonzero
+  // seqnums of unmarked bottommost files.
+  SequenceNumber bottommost_files_mark_threshold_ = kMaxSequenceNumber;
+
+  // Monotonically increases as we release old snapshots. Zero indicates no
+  // snapshots have been released yet. When no snapshots remain we set it to the
+  // current seqnum, which needs to be protected as a snapshot can still be
+  // created that references it.
+  SequenceNumber oldest_snapshot_seqnum_ = 0;
+
+  // Level that should be compacted next and its compaction score.
+  // Score < 1 means compaction is not strictly needed.  These fields
+  // are initialized by ComputeCompactionScore.
+  // The most critical level to be compacted is listed first
+  // These are used to pick the best compaction level
+  std::vector<double> compaction_score_;
+  std::vector<int> compaction_level_;
+  int l0_delay_trigger_count_ = 0;  // Count used to trigger slow down and stop
+                                    // for number of L0 files.
+
+  // Compact cursors for round-robin compactions in each level
+  std::vector<InternalKey> compact_cursor_;
+
+  // the following are the sampled temporary stats.
+  // the current accumulated size of sampled files.
+  uint64_t accumulated_file_size_;
+  // the current accumulated size of all raw keys based on the sampled files.
+  uint64_t accumulated_raw_key_size_;
+  // the current accumulated size of all raw keys based on the sampled files.
+  uint64_t accumulated_raw_value_size_;
+  // total number of non-deletion entries
+  uint64_t accumulated_num_non_deletions_;
+  // total number of deletion entries
+  uint64_t accumulated_num_deletions_;
+  // current number of non_deletion entries
+  uint64_t current_num_non_deletions_;
+  // current number of deletion entries
+  uint64_t current_num_deletions_;
+  // current number of file samples
+  uint64_t current_num_samples_;
+  // Estimated bytes needed to be compacted until all levels' size is down to
+  // target sizes.
+  uint64_t estimated_compaction_needed_bytes_;
+
+  bool finalized_;
+
+  // If set to true, we will run consistency checks even if RocksDB
+  // is compiled in release mode
+  bool force_consistency_checks_;
+
+  EpochNumberRequirement epoch_number_requirement_;
+
+  friend class Version;
+  friend class VersionSet;
+};
+
+struct ObsoleteFileInfo {
+  FileMetaData* metadata;
+  std::string path;
+  // If true, the FileMataData should be destroyed but the file should
+  // not be deleted. This is because another FileMetaData still references
+  // the file, usually because the file is trivial moved so two FileMetadata
+  // is managing the file.
+  bool only_delete_metadata = false;
+
+  ObsoleteFileInfo() noexcept
+      : metadata(nullptr), only_delete_metadata(false) {}
+  ObsoleteFileInfo(FileMetaData* f, const std::string& file_path,
+                   std::shared_ptr<CacheReservationManager>
+                       file_metadata_cache_res_mgr_arg = nullptr)
+      : metadata(f),
+        path(file_path),
+        only_delete_metadata(false),
+        file_metadata_cache_res_mgr(file_metadata_cache_res_mgr_arg) {}
+
+  ObsoleteFileInfo(const ObsoleteFileInfo&) = delete;
+  ObsoleteFileInfo& operator=(const ObsoleteFileInfo&) = delete;
+
+  ObsoleteFileInfo(ObsoleteFileInfo&& rhs) noexcept : ObsoleteFileInfo() {
+    *this = std::move(rhs);
+  }
+
+  ObsoleteFileInfo& operator=(ObsoleteFileInfo&& rhs) noexcept {
+    path = std::move(rhs.path);
+    metadata = rhs.metadata;
+    rhs.metadata = nullptr;
+    file_metadata_cache_res_mgr = rhs.file_metadata_cache_res_mgr;
+    rhs.file_metadata_cache_res_mgr = nullptr;
+
+    return *this;
+  }
+  void DeleteMetadata() {
+    if (file_metadata_cache_res_mgr) {
+      Status s = file_metadata_cache_res_mgr->UpdateCacheReservation(
+          metadata->ApproximateMemoryUsage(), false /* increase */);
+      s.PermitUncheckedError();
+    }
+    delete metadata;
+    metadata = nullptr;
+  }
+
+ private:
+  std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr;
+};
+
+class ObsoleteBlobFileInfo {
+ public:
+  ObsoleteBlobFileInfo(uint64_t blob_file_number, std::string path)
+      : blob_file_number_(blob_file_number), path_(std::move(path)) {}
+
+  uint64_t GetBlobFileNumber() const { return blob_file_number_; }
+  const std::string& GetPath() const { return path_; }
+
+ private:
+  uint64_t blob_file_number_;
+  std::string path_;
+};
+
+using MultiGetRange = MultiGetContext::Range;
+// A column family's version consists of the table and blob files owned by
+// the column family at a certain point in time.
+class Version {
+ public:
+  // Append to *iters a sequence of iterators that will
+  // yield the contents of this Version when merged together.
+  // @param read_options Must outlive any iterator built by
+  // `merger_iter_builder`.
+  void AddIterators(const ReadOptions& read_options,
+                    const FileOptions& soptions,
+                    MergeIteratorBuilder* merger_iter_builder,
+                    bool allow_unprepared_value);
+
+  // @param read_options Must outlive any iterator built by
+  // `merger_iter_builder`.
+  void AddIteratorsForLevel(const ReadOptions& read_options,
+                            const FileOptions& soptions,
+                            MergeIteratorBuilder* merger_iter_builder,
+                            int level, bool allow_unprepared_value);
+
+  Status OverlapWithLevelIterator(const ReadOptions&, const FileOptions&,
+                                  const Slice& smallest_user_key,
+                                  const Slice& largest_user_key, int level,
+                                  bool* overlap);
+
+  // Lookup the value for key or get all merge operands for key.
+  // If do_merge = true (default) then lookup value for key.
+  // Behavior if do_merge = true:
+  //    If found, store it in *value and
+  //    return OK.  Else return a non-OK status.
+  //    Uses *operands to store merge_operator operations to apply later.
+  //
+  //    If the ReadOptions.read_tier is set to do a read-only fetch, then
+  //    *value_found will be set to false if it cannot be determined whether
+  //    this value exists without doing IO.
+  //
+  //    If the key is Deleted, *status will be set to NotFound and
+  //                        *key_exists will be set to true.
+  //    If no key was found, *status will be set to NotFound and
+  //                      *key_exists will be set to false.
+  //    If seq is non-null, *seq will be set to the sequence number found
+  //    for the key if a key was found.
+  // Behavior if do_merge = false
+  //    If the key has any merge operands then store them in
+  //    merge_context.operands_list and don't merge the operands
+  // REQUIRES: lock is not held
+  // REQUIRES: pinned_iters_mgr != nullptr
+  void Get(const ReadOptions&, const LookupKey& key, PinnableSlice* value,
+           PinnableWideColumns* columns, std::string* timestamp, Status* status,
+           MergeContext* merge_context,
+           SequenceNumber* max_covering_tombstone_seq,
+           PinnedIteratorsManager* pinned_iters_mgr,
+           bool* value_found = nullptr, bool* key_exists = nullptr,
+           SequenceNumber* seq = nullptr, ReadCallback* callback = nullptr,
+           bool* is_blob = nullptr, bool do_merge = true);
+
+  void MultiGet(const ReadOptions&, MultiGetRange* range,
+                ReadCallback* callback = nullptr);
+
+  // Interprets blob_index_slice as a blob reference, and (assuming the
+  // corresponding blob file is part of this Version) retrieves the blob and
+  // saves it in *value.
+  // REQUIRES: blob_index_slice stores an encoded blob reference
+  Status GetBlob(const ReadOptions& read_options, const Slice& user_key,
+                 const Slice& blob_index_slice,
+                 FilePrefetchBuffer* prefetch_buffer, PinnableSlice* value,
+                 uint64_t* bytes_read) const;
+
+  // Retrieves a blob using a blob reference and saves it in *value,
+  // assuming the corresponding blob file is part of this Version.
+  Status GetBlob(const ReadOptions& read_options, const Slice& user_key,
+                 const BlobIndex& blob_index,
+                 FilePrefetchBuffer* prefetch_buffer, PinnableSlice* value,
+                 uint64_t* bytes_read) const;
+
+  struct BlobReadContext {
+    BlobReadContext(const BlobIndex& blob_idx, const KeyContext* key_ctx)
+        : blob_index(blob_idx), key_context(key_ctx) {}
+
+    BlobIndex blob_index;
+    const KeyContext* key_context;
+    PinnableSlice result;
+  };
+
+  using BlobReadContexts = std::vector<BlobReadContext>;
+  void MultiGetBlob(const ReadOptions& read_options, MultiGetRange& range,
+                    std::unordered_map<uint64_t, BlobReadContexts>& blob_ctxs);
+
+  // Loads some stats information from files (if update_stats is set) and
+  // populates derived data structures. Call without mutex held. It needs to be
+  // called before appending the version to the version set.
+  void PrepareAppend(const MutableCFOptions& mutable_cf_options,
+                     const ReadOptions& read_options, bool update_stats);
+
+  // Reference count management (so Versions do not disappear out from
+  // under live iterators)
+  void Ref();
+  // Decrease reference count. Delete the object if no reference left
+  // and return true. Otherwise, return false.
+  bool Unref();
+
+  // Add all files listed in the current version to *live_table_files and
+  // *live_blob_files.
+  void AddLiveFiles(std::vector<uint64_t>* live_table_files,
+                    std::vector<uint64_t>* live_blob_files) const;
+
+  // Remove live files that are in the delete candidate lists.
+  void RemoveLiveFiles(
+      std::vector<ObsoleteFileInfo>& sst_delete_candidates,
+      std::vector<ObsoleteBlobFileInfo>& blob_delete_candidates) const;
+
+  // Return a human readable string that describes this version's contents.
+  std::string DebugString(bool hex = false, bool print_stats = false) const;
+
+  // Returns the version number of this version
+  uint64_t GetVersionNumber() const { return version_number_; }
+
+  // REQUIRES: lock is held
+  // On success, "tp" will contains the table properties of the file
+  // specified in "file_meta".  If the file name of "file_meta" is
+  // known ahead, passing it by a non-null "fname" can save a
+  // file-name conversion.
+  Status GetTableProperties(const ReadOptions& read_options,
+                            std::shared_ptr<const TableProperties>* tp,
+                            const FileMetaData* file_meta,
+                            const std::string* fname = nullptr) const;
+
+  // REQUIRES: lock is held
+  // On success, *props will be populated with all SSTables' table properties.
+  // The keys of `props` are the sst file name, the values of `props` are the
+  // tables' properties, represented as std::shared_ptr.
+  Status GetPropertiesOfAllTables(const ReadOptions& read_options,
+                                  TablePropertiesCollection* props);
+  Status GetPropertiesOfAllTables(const ReadOptions& read_options,
+                                  TablePropertiesCollection* props, int level);
+  Status GetPropertiesOfTablesInRange(const ReadOptions& read_options,
+                                      const Range* range, std::size_t n,
+                                      TablePropertiesCollection* props) const;
+
+  // Print summary of range delete tombstones in SST files into out_str,
+  // with maximum max_entries_to_print entries printed out.
+  Status TablesRangeTombstoneSummary(int max_entries_to_print,
+                                     std::string* out_str);
+
+  // REQUIRES: lock is held
+  // On success, "tp" will contains the aggregated table property among
+  // the table properties of all sst files in this version.
+  Status GetAggregatedTableProperties(
+      const ReadOptions& read_options,
+      std::shared_ptr<const TableProperties>* tp, int level = -1);
+
+  uint64_t GetEstimatedActiveKeys() {
+    return storage_info_.GetEstimatedActiveKeys();
+  }
+
+  size_t GetMemoryUsageByTableReaders(const ReadOptions& read_options);
+
+  ColumnFamilyData* cfd() const { return cfd_; }
+
+  // Return the next Version in the linked list.
+  Version* Next() const { return next_; }
+
+  int TEST_refs() const { return refs_; }
+
+  VersionStorageInfo* storage_info() { return &storage_info_; }
+  const VersionStorageInfo* storage_info() const { return &storage_info_; }
+
+  VersionSet* version_set() { return vset_; }
+
+  void GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta);
+
+  void GetSstFilesBoundaryKeys(Slice* smallest_user_key,
+                               Slice* largest_user_key);
+
+  uint64_t GetSstFilesSize();
+
+  // Retrieves the file_creation_time of the oldest file in the DB.
+  // Prerequisite for this API is max_open_files = -1
+  void GetCreationTimeOfOldestFile(uint64_t* creation_time);
+
+  const MutableCFOptions& GetMutableCFOptions() { return mutable_cf_options_; }
+
+  InternalIterator* TEST_GetLevelIterator(
+      const ReadOptions& read_options, MergeIteratorBuilder* merge_iter_builder,
+      int level, bool allow_unprepared_value);
+
+ private:
+  Env* env_;
+  SystemClock* clock_;
+
+  friend class ReactiveVersionSet;
+  friend class VersionSet;
+  friend class VersionEditHandler;
+  friend class VersionEditHandlerPointInTime;
+
+  const InternalKeyComparator* internal_comparator() const {
+    return storage_info_.internal_comparator_;
+  }
+  const Comparator* user_comparator() const {
+    return storage_info_.user_comparator_;
+  }
+
+  // Returns true if the filter blocks in the specified level will not be
+  // checked during read operations. In certain cases (trivial move or preload),
+  // the filter block may already be cached, but we still do not access it such
+  // that it eventually expires from the cache.
+  bool IsFilterSkipped(int level, bool is_file_last_in_level = false);
+
+  // The helper function of UpdateAccumulatedStats, which may fill the missing
+  // fields of file_meta from its associated TableProperties.
+  // Returns true if it does initialize FileMetaData.
+  bool MaybeInitializeFileMetaData(const ReadOptions& read_options,
+                                   FileMetaData* file_meta);
+
+  // Update the accumulated stats associated with the current version.
+  // This accumulated stats will be used in compaction.
+  void UpdateAccumulatedStats(const ReadOptions& read_options);
+
+  DECLARE_SYNC_AND_ASYNC(
+      /* ret_type */ Status, /* func_name */ MultiGetFromSST,
+      const ReadOptions& read_options, MultiGetRange file_range,
+      int hit_file_level, bool skip_filters, bool skip_range_deletions,
+      FdWithKeyRange* f,
+      std::unordered_map<uint64_t, BlobReadContexts>& blob_ctxs,
+      TableCache::TypedHandle* table_handle, uint64_t& num_filter_read,
+      uint64_t& num_index_read, uint64_t& num_sst_read);
+
+#ifdef USE_COROUTINES
+  // MultiGet using async IO to read data blocks from SST files in parallel
+  // within and across levels
+  Status MultiGetAsync(
+      const ReadOptions& options, MultiGetRange* range,
+      std::unordered_map<uint64_t, BlobReadContexts>* blob_ctxs);
+
+  // A helper function to lookup a batch of keys in a single level. It will
+  // queue coroutine tasks to mget_tasks. It may also split the input batch
+  // by creating a new batch with keys definitely not in this level and
+  // enqueuing it to to_process.
+  Status ProcessBatch(
+      const ReadOptions& read_options, FilePickerMultiGet* batch,
+      std::vector<folly::coro::Task<Status>>& mget_tasks,
+      std::unordered_map<uint64_t, BlobReadContexts>* blob_ctxs,
+      autovector<FilePickerMultiGet, 4>& batches, std::deque<size_t>& waiting,
+      std::deque<size_t>& to_process, unsigned int& num_tasks_queued,
+      std::unordered_map<int, std::tuple<uint64_t, uint64_t, uint64_t>>&
+          mget_stats);
+#endif
+
+  ColumnFamilyData* cfd_;  // ColumnFamilyData to which this Version belongs
+  Logger* info_log_;
+  Statistics* db_statistics_;
+  TableCache* table_cache_;
+  BlobSource* blob_source_;
+  const MergeOperator* merge_operator_;
+
+  VersionStorageInfo storage_info_;
+  VersionSet* vset_;  // VersionSet to which this Version belongs
+  Version* next_;     // Next version in linked list
+  Version* prev_;     // Previous version in linked list
+  int refs_;          // Number of live refs to this version
+  const FileOptions file_options_;
+  const MutableCFOptions mutable_cf_options_;
+  // Cached value to avoid recomputing it on every read.
+  const size_t max_file_size_for_l0_meta_pin_;
+
+  // A version number that uniquely represents this version. This is
+  // used for debugging and logging purposes only.
+  uint64_t version_number_;
+  std::shared_ptr<IOTracer> io_tracer_;
+  bool use_async_io_;
+
+  Version(ColumnFamilyData* cfd, VersionSet* vset, const FileOptions& file_opt,
+          MutableCFOptions mutable_cf_options,
+          const std::shared_ptr<IOTracer>& io_tracer,
+          uint64_t version_number = 0,
+          EpochNumberRequirement epoch_number_requirement =
+              EpochNumberRequirement::kMustPresent);
+
+  ~Version();
+
+  // No copying allowed
+  Version(const Version&) = delete;
+  void operator=(const Version&) = delete;
+};
+
+class BaseReferencedVersionBuilder;
+
+class AtomicGroupReadBuffer {
+ public:
+  AtomicGroupReadBuffer() = default;
+  Status AddEdit(VersionEdit* edit);
+  void Clear();
+  bool IsFull() const;
+  bool IsEmpty() const;
+
+  uint64_t TEST_read_edits_in_atomic_group() const {
+    return read_edits_in_atomic_group_;
+  }
+  std::vector<VersionEdit>& replay_buffer() { return replay_buffer_; }
+
+ private:
+  uint64_t read_edits_in_atomic_group_ = 0;
+  std::vector<VersionEdit> replay_buffer_;
+};
+
+// VersionSet is the collection of versions of all the column families of the
+// database. Each database owns one VersionSet. A VersionSet has access to all
+// column families via ColumnFamilySet, i.e. set of the column families.
+class VersionSet {
+ public:
+  VersionSet(const std::string& dbname, const ImmutableDBOptions* db_options,
+             const FileOptions& file_options, Cache* table_cache,
+             WriteBufferManager* write_buffer_manager,
+             WriteController* write_controller,
+             BlockCacheTracer* const block_cache_tracer,
+             const std::shared_ptr<IOTracer>& io_tracer,
+             const std::string& db_id, const std::string& db_session_id);
+  // No copying allowed
+  VersionSet(const VersionSet&) = delete;
+  void operator=(const VersionSet&) = delete;
+
+  virtual ~VersionSet();
+
+  Status LogAndApplyToDefaultColumnFamily(
+      const ReadOptions& read_options, VersionEdit* edit, InstrumentedMutex* mu,
+      FSDirectory* dir_contains_current_file, bool new_descriptor_log = false,
+      const ColumnFamilyOptions* column_family_options = nullptr) {
+    ColumnFamilyData* default_cf = GetColumnFamilySet()->GetDefault();
+    const MutableCFOptions* cf_options =
+        default_cf->GetLatestMutableCFOptions();
+    return LogAndApply(default_cf, *cf_options, read_options, edit, mu,
+                       dir_contains_current_file, new_descriptor_log,
+                       column_family_options);
+  }
+
+  // Apply *edit to the current version to form a new descriptor that
+  // is both saved to persistent state and installed as the new
+  // current version.  Will release *mu while actually writing to the file.
+  // column_family_options has to be set if edit is column family add
+  // REQUIRES: *mu is held on entry.
+  // REQUIRES: no other thread concurrently calls LogAndApply()
+  Status LogAndApply(
+      ColumnFamilyData* column_family_data,
+      const MutableCFOptions& mutable_cf_options,
+      const ReadOptions& read_options, VersionEdit* edit, InstrumentedMutex* mu,
+      FSDirectory* dir_contains_current_file, bool new_descriptor_log = false,
+      const ColumnFamilyOptions* column_family_options = nullptr) {
+    autovector<ColumnFamilyData*> cfds;
+    cfds.emplace_back(column_family_data);
+    autovector<const MutableCFOptions*> mutable_cf_options_list;
+    mutable_cf_options_list.emplace_back(&mutable_cf_options);
+    autovector<autovector<VersionEdit*>> edit_lists;
+    autovector<VersionEdit*> edit_list;
+    edit_list.emplace_back(edit);
+    edit_lists.emplace_back(edit_list);
+    return LogAndApply(cfds, mutable_cf_options_list, read_options, edit_lists,
+                       mu, dir_contains_current_file, new_descriptor_log,
+                       column_family_options);
+  }
+  // The batch version. If edit_list.size() > 1, caller must ensure that
+  // no edit in the list column family add or drop
+  Status LogAndApply(
+      ColumnFamilyData* column_family_data,
+      const MutableCFOptions& mutable_cf_options,
+      const ReadOptions& read_options,
+      const autovector<VersionEdit*>& edit_list, InstrumentedMutex* mu,
+      FSDirectory* dir_contains_current_file, bool new_descriptor_log = false,
+      const ColumnFamilyOptions* column_family_options = nullptr,
+      const std::function<void(const Status&)>& manifest_wcb = {}) {
+    autovector<ColumnFamilyData*> cfds;
+    cfds.emplace_back(column_family_data);
+    autovector<const MutableCFOptions*> mutable_cf_options_list;
+    mutable_cf_options_list.emplace_back(&mutable_cf_options);
+    autovector<autovector<VersionEdit*>> edit_lists;
+    edit_lists.emplace_back(edit_list);
+    return LogAndApply(cfds, mutable_cf_options_list, read_options, edit_lists,
+                       mu, dir_contains_current_file, new_descriptor_log,
+                       column_family_options, {manifest_wcb});
+  }
+
+  // The across-multi-cf batch version. If edit_lists contain more than
+  // 1 version edits, caller must ensure that no edit in the []list is column
+  // family manipulation.
+  virtual Status LogAndApply(
+      const autovector<ColumnFamilyData*>& cfds,
+      const autovector<const MutableCFOptions*>& mutable_cf_options_list,
+      const ReadOptions& read_options,
+      const autovector<autovector<VersionEdit*>>& edit_lists,
+      InstrumentedMutex* mu, FSDirectory* dir_contains_current_file,
+      bool new_descriptor_log = false,
+      const ColumnFamilyOptions* new_cf_options = nullptr,
+      const std::vector<std::function<void(const Status&)>>& manifest_wcbs =
+          {});
+
+  static Status GetCurrentManifestPath(const std::string& dbname,
+                                       FileSystem* fs,
+                                       std::string* manifest_filename,
+                                       uint64_t* manifest_file_number);
+  void WakeUpWaitingManifestWriters();
+
+  // Recover the last saved descriptor (MANIFEST) from persistent storage.
+  // If read_only == true, Recover() will not complain if some column families
+  // are not opened
+  Status Recover(const std::vector<ColumnFamilyDescriptor>& column_families,
+                 bool read_only = false, std::string* db_id = nullptr,
+                 bool no_error_if_files_missing = false);
+
+  Status TryRecover(const std::vector<ColumnFamilyDescriptor>& column_families,
+                    bool read_only,
+                    const std::vector<std::string>& files_in_dbname,
+                    std::string* db_id, bool* has_missing_table_file);
+
+  // Try to recover the version set to the most recent consistent state
+  // recorded in the specified manifest.
+  Status TryRecoverFromOneManifest(
+      const std::string& manifest_path,
+      const std::vector<ColumnFamilyDescriptor>& column_families,
+      bool read_only, std::string* db_id, bool* has_missing_table_file);
+
+  // Recover the next epoch number of each CFs and epoch number
+  // of their files (if missing)
+  void RecoverEpochNumbers();
+
+  // Reads a manifest file and returns a list of column families in
+  // column_families.
+  static Status ListColumnFamilies(std::vector<std::string>* column_families,
+                                   const std::string& dbname, FileSystem* fs);
+  static Status ListColumnFamiliesFromManifest(
+      const std::string& manifest_path, FileSystem* fs,
+      std::vector<std::string>* column_families);
+
+  // Try to reduce the number of levels. This call is valid when
+  // only one level from the new max level to the old
+  // max level containing files.
+  // The call is static, since number of levels is immutable during
+  // the lifetime of a RocksDB instance. It reduces number of levels
+  // in a DB by applying changes to manifest.
+  // For example, a db currently has 7 levels [0-6], and a call to
+  // to reduce to 5 [0-4] can only be executed when only one level
+  // among [4-6] contains files.
+  static Status ReduceNumberOfLevels(const std::string& dbname,
+                                     const Options* options,
+                                     const FileOptions& file_options,
+                                     int new_levels);
+
+  // Get the checksum information of all live files
+  Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list);
+
+  // printf contents (for debugging)
+  Status DumpManifest(Options& options, std::string& manifestFileName,
+                      bool verbose, bool hex = false, bool json = false);
+
+
+  const std::string& DbSessionId() const { return db_session_id_; }
+
+  // Return the current manifest file number
+  uint64_t manifest_file_number() const { return manifest_file_number_; }
+
+  uint64_t options_file_number() const { return options_file_number_; }
+
+  uint64_t pending_manifest_file_number() const {
+    return pending_manifest_file_number_;
+  }
+
+  uint64_t current_next_file_number() const { return next_file_number_.load(); }
+
+  uint64_t min_log_number_to_keep() const {
+    return min_log_number_to_keep_.load();
+  }
+
+  // Allocate and return a new file number
+  uint64_t NewFileNumber() { return next_file_number_.fetch_add(1); }
+
+  // Fetch And Add n new file number
+  uint64_t FetchAddFileNumber(uint64_t n) {
+    return next_file_number_.fetch_add(n);
+  }
+
+  // Return the last sequence number.
+  uint64_t LastSequence() const {
+    return last_sequence_.load(std::memory_order_acquire);
+  }
+
+  // Note: memory_order_acquire must be sufficient.
+  uint64_t LastAllocatedSequence() const {
+    return last_allocated_sequence_.load(std::memory_order_seq_cst);
+  }
+
+  // Note: memory_order_acquire must be sufficient.
+  uint64_t LastPublishedSequence() const {
+    return last_published_sequence_.load(std::memory_order_seq_cst);
+  }
+
+  // Set the last sequence number to s.
+  void SetLastSequence(uint64_t s) {
+    assert(s >= last_sequence_);
+    // Last visible sequence must always be less than last written seq
+    assert(!db_options_->two_write_queues || s <= last_allocated_sequence_);
+    last_sequence_.store(s, std::memory_order_release);
+  }
+
+  // Note: memory_order_release must be sufficient
+  void SetLastPublishedSequence(uint64_t s) {
+    assert(s >= last_published_sequence_);
+    last_published_sequence_.store(s, std::memory_order_seq_cst);
+  }
+
+  // Note: memory_order_release must be sufficient
+  void SetLastAllocatedSequence(uint64_t s) {
+    assert(s >= last_allocated_sequence_);
+    last_allocated_sequence_.store(s, std::memory_order_seq_cst);
+  }
+
+  // Note: memory_order_release must be sufficient
+  uint64_t FetchAddLastAllocatedSequence(uint64_t s) {
+    return last_allocated_sequence_.fetch_add(s, std::memory_order_seq_cst);
+  }
+
+  // Mark the specified file number as used.
+  // REQUIRED: this is only called during single-threaded recovery or repair.
+  void MarkFileNumberUsed(uint64_t number);
+
+  // Mark the specified log number as deleted
+  // REQUIRED: this is only called during single-threaded recovery or repair, or
+  // from ::LogAndApply where the global mutex is held.
+  void MarkMinLogNumberToKeep(uint64_t number);
+
+  // Return the log file number for the log file that is currently
+  // being compacted, or zero if there is no such log file.
+  uint64_t prev_log_number() const { return prev_log_number_; }
+
+  // Returns the minimum log number which still has data not flushed to any SST
+  // file.
+  // In non-2PC mode, all the log numbers smaller than this number can be safely
+  // deleted, although we still use `min_log_number_to_keep_` to determine when
+  // to delete a WAL file.
+  uint64_t MinLogNumberWithUnflushedData() const {
+    return PreComputeMinLogNumberWithUnflushedData(nullptr);
+  }
+
+  // Returns the minimum log number which still has data not flushed to any SST
+  // file.
+  // Empty column families' log number is considered to be
+  // new_log_number_for_empty_cf.
+  uint64_t PreComputeMinLogNumberWithUnflushedData(
+      uint64_t new_log_number_for_empty_cf) const {
+    uint64_t min_log_num = std::numeric_limits<uint64_t>::max();
+    for (auto cfd : *column_family_set_) {
+      // It's safe to ignore dropped column families here:
+      // cfd->IsDropped() becomes true after the drop is persisted in MANIFEST.
+      uint64_t num =
+          cfd->IsEmpty() ? new_log_number_for_empty_cf : cfd->GetLogNumber();
+      if (min_log_num > num && !cfd->IsDropped()) {
+        min_log_num = num;
+      }
+    }
+    return min_log_num;
+  }
+  // Returns the minimum log number which still has data not flushed to any SST
+  // file, except data from `cfd_to_skip`.
+  uint64_t PreComputeMinLogNumberWithUnflushedData(
+      const ColumnFamilyData* cfd_to_skip) const {
+    uint64_t min_log_num = std::numeric_limits<uint64_t>::max();
+    for (auto cfd : *column_family_set_) {
+      if (cfd == cfd_to_skip) {
+        continue;
+      }
+      // It's safe to ignore dropped column families here:
+      // cfd->IsDropped() becomes true after the drop is persisted in MANIFEST.
+      if (min_log_num > cfd->GetLogNumber() && !cfd->IsDropped()) {
+        min_log_num = cfd->GetLogNumber();
+      }
+    }
+    return min_log_num;
+  }
+  // Returns the minimum log number which still has data not flushed to any SST
+  // file, except data from `cfds_to_skip`.
+  uint64_t PreComputeMinLogNumberWithUnflushedData(
+      const std::unordered_set<const ColumnFamilyData*>& cfds_to_skip) const {
+    uint64_t min_log_num = std::numeric_limits<uint64_t>::max();
+    for (auto cfd : *column_family_set_) {
+      if (cfds_to_skip.count(cfd)) {
+        continue;
+      }
+      // It's safe to ignore dropped column families here:
+      // cfd->IsDropped() becomes true after the drop is persisted in MANIFEST.
+      if (min_log_num > cfd->GetLogNumber() && !cfd->IsDropped()) {
+        min_log_num = cfd->GetLogNumber();
+      }
+    }
+    return min_log_num;
+  }
+
+  // Create an iterator that reads over the compaction inputs for "*c".
+  // The caller should delete the iterator when no longer needed.
+  // @param read_options Must outlive the returned iterator.
+  // @param start, end indicates compaction range
+  InternalIterator* MakeInputIterator(
+      const ReadOptions& read_options, const Compaction* c,
+      RangeDelAggregator* range_del_agg,
+      const FileOptions& file_options_compactions,
+      const std::optional<const Slice>& start,
+      const std::optional<const Slice>& end);
+
+  // Add all files listed in any live version to *live_table_files and
+  // *live_blob_files. Note that these lists may contain duplicates.
+  void AddLiveFiles(std::vector<uint64_t>* live_table_files,
+                    std::vector<uint64_t>* live_blob_files) const;
+
+  // Remove live files that are in the delete candidate lists.
+  void RemoveLiveFiles(
+      std::vector<ObsoleteFileInfo>& sst_delete_candidates,
+      std::vector<ObsoleteBlobFileInfo>& blob_delete_candidates) const;
+
+  // Return the approximate size of data to be scanned for range [start, end)
+  // in levels [start_level, end_level). If end_level == -1 it will search
+  // through all non-empty levels
+  uint64_t ApproximateSize(const SizeApproximationOptions& options,
+                           const ReadOptions& read_options, Version* v,
+                           const Slice& start, const Slice& end,
+                           int start_level, int end_level,
+                           TableReaderCaller caller);
+
+  // Return the size of the current manifest file
+  uint64_t manifest_file_size() const { return manifest_file_size_; }
+
+  Status GetMetadataForFile(uint64_t number, int* filelevel,
+                            FileMetaData** metadata, ColumnFamilyData** cfd);
+
+  // This function doesn't support leveldb SST filenames
+  void GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata);
+
+  void AddObsoleteBlobFile(uint64_t blob_file_number, std::string path) {
+    assert(table_cache_);
+
+    table_cache_->Erase(GetSliceForKey(&blob_file_number));
+
+    obsolete_blob_files_.emplace_back(blob_file_number, std::move(path));
+  }
+
+  void GetObsoleteFiles(std::vector<ObsoleteFileInfo>* files,
+                        std::vector<ObsoleteBlobFileInfo>* blob_files,
+                        std::vector<std::string>* manifest_filenames,
+                        uint64_t min_pending_output);
+
+  ColumnFamilySet* GetColumnFamilySet() { return column_family_set_.get(); }
+  RefedColumnFamilySet GetRefedColumnFamilySet() {
+    return RefedColumnFamilySet(GetColumnFamilySet());
+  }
+
+  const FileOptions& file_options() { return file_options_; }
+  void ChangeFileOptions(const MutableDBOptions& new_options) {
+    file_options_.writable_file_max_buffer_size =
+        new_options.writable_file_max_buffer_size;
+  }
+
+  const ImmutableDBOptions* db_options() const { return db_options_; }
+
+  static uint64_t GetNumLiveVersions(Version* dummy_versions);
+
+  static uint64_t GetTotalSstFilesSize(Version* dummy_versions);
+
+  static uint64_t GetTotalBlobFileSize(Version* dummy_versions);
+
+  // Get the IO Status returned by written Manifest.
+  const IOStatus& io_status() const { return io_status_; }
+
+  // The returned WalSet needs to be accessed with DB mutex held.
+  const WalSet& GetWalSet() const { return wals_; }
+
+  void TEST_CreateAndAppendVersion(ColumnFamilyData* cfd) {
+    assert(cfd);
+
+    const auto& mutable_cf_options = *cfd->GetLatestMutableCFOptions();
+    Version* const version =
+        new Version(cfd, this, file_options_, mutable_cf_options, io_tracer_);
+
+    constexpr bool update_stats = false;
+    const ReadOptions read_options;
+    version->PrepareAppend(mutable_cf_options, read_options, update_stats);
+    AppendVersion(cfd, version);
+  }
+
+ protected:
+  using VersionBuilderMap =
+      UnorderedMap<uint32_t, std::unique_ptr<BaseReferencedVersionBuilder>>;
+
+  struct ManifestWriter;
+
+  friend class Version;
+  friend class VersionEditHandler;
+  friend class VersionEditHandlerPointInTime;
+  friend class DumpManifestHandler;
+  friend class DBImpl;
+  friend class DBImplReadOnly;
+
+  struct LogReporter : public log::Reader::Reporter {
+    Status* status;
+    virtual void Corruption(size_t /*bytes*/, const Status& s) override {
+      if (status->ok()) {
+        *status = s;
+      }
+    }
+  };
+
+  void Reset();
+
+  // Returns approximated offset of a key in a file for a given version.
+  uint64_t ApproximateOffsetOf(const ReadOptions& read_options, Version* v,
+                               const FdWithKeyRange& f, const Slice& key,
+                               TableReaderCaller caller);
+
+  // Returns approximated data size between start and end keys in a file
+  // for a given version.
+  uint64_t ApproximateSize(const ReadOptions& read_options, Version* v,
+                           const FdWithKeyRange& f, const Slice& start,
+                           const Slice& end, TableReaderCaller caller);
+
+  struct MutableCFState {
+    uint64_t log_number;
+    std::string full_history_ts_low;
+
+    explicit MutableCFState() = default;
+    explicit MutableCFState(uint64_t _log_number, std::string ts_low)
+        : log_number(_log_number), full_history_ts_low(std::move(ts_low)) {}
+  };
+
+  // Save current contents to *log
+  Status WriteCurrentStateToManifest(
+      const std::unordered_map<uint32_t, MutableCFState>& curr_state,
+      const VersionEdit& wal_additions, log::Writer* log, IOStatus& io_s);
+
+  void AppendVersion(ColumnFamilyData* column_family_data, Version* v);
+
+  ColumnFamilyData* CreateColumnFamily(const ColumnFamilyOptions& cf_options,
+                                       const ReadOptions& read_options,
+                                       const VersionEdit* edit);
+
+  Status VerifyFileMetadata(const ReadOptions& read_options,
+                            ColumnFamilyData* cfd, const std::string& fpath,
+                            int level, const FileMetaData& meta);
+
+  // Protected by DB mutex.
+  WalSet wals_;
+
+  std::unique_ptr<ColumnFamilySet> column_family_set_;
+  Cache* table_cache_;
+  Env* const env_;
+  FileSystemPtr const fs_;
+  SystemClock* const clock_;
+  const std::string dbname_;
+  std::string db_id_;
+  const ImmutableDBOptions* const db_options_;
+  std::atomic<uint64_t> next_file_number_;
+  // Any WAL number smaller than this should be ignored during recovery,
+  // and is qualified for being deleted.
+  std::atomic<uint64_t> min_log_number_to_keep_ = {0};
+  uint64_t manifest_file_number_;
+  uint64_t options_file_number_;
+  uint64_t options_file_size_;
+  uint64_t pending_manifest_file_number_;
+  // The last seq visible to reads. It normally indicates the last sequence in
+  // the memtable but when using two write queues it could also indicate the
+  // last sequence in the WAL visible to reads.
+  std::atomic<uint64_t> last_sequence_;
+  // The last sequence number of data committed to the descriptor (manifest
+  // file).
+  SequenceNumber descriptor_last_sequence_ = 0;
+  // The last seq that is already allocated. It is applicable only when we have
+  // two write queues. In that case seq might or might not have appreated in
+  // memtable but it is expected to appear in the WAL.
+  // We have last_sequence <= last_allocated_sequence_
+  std::atomic<uint64_t> last_allocated_sequence_;
+  // The last allocated sequence that is also published to the readers. This is
+  // applicable only when last_seq_same_as_publish_seq_ is not set. Otherwise
+  // last_sequence_ also indicates the last published seq.
+  // We have last_sequence <= last_published_sequence_ <=
+  // last_allocated_sequence_
+  std::atomic<uint64_t> last_published_sequence_;
+  uint64_t prev_log_number_;  // 0 or backing store for memtable being compacted
+
+  // Opened lazily
+  std::unique_ptr<log::Writer> descriptor_log_;
+
+  // generates a increasing version number for every new version
+  uint64_t current_version_number_;
+
+  // Queue of writers to the manifest file
+  std::deque<ManifestWriter*> manifest_writers_;
+
+  // Current size of manifest file
+  uint64_t manifest_file_size_;
+
+  std::vector<ObsoleteFileInfo> obsolete_files_;
+  std::vector<ObsoleteBlobFileInfo> obsolete_blob_files_;
+  std::vector<std::string> obsolete_manifests_;
+
+  // env options for all reads and writes except compactions
+  FileOptions file_options_;
+
+  BlockCacheTracer* const block_cache_tracer_;
+
+  // Store the IO status when Manifest is written
+  IOStatus io_status_;
+
+  std::shared_ptr<IOTracer> io_tracer_;
+
+  std::string db_session_id_;
+
+ private:
+  // REQUIRES db mutex at beginning. may release and re-acquire db mutex
+  Status ProcessManifestWrites(std::deque<ManifestWriter>& writers,
+                               InstrumentedMutex* mu,
+                               FSDirectory* dir_contains_current_file,
+                               bool new_descriptor_log,
+                               const ColumnFamilyOptions* new_cf_options,
+                               const ReadOptions& read_options);
+
+  void LogAndApplyCFHelper(VersionEdit* edit,
+                           SequenceNumber* max_last_sequence);
+  Status LogAndApplyHelper(ColumnFamilyData* cfd, VersionBuilder* b,
+                           VersionEdit* edit, SequenceNumber* max_last_sequence,
+                           InstrumentedMutex* mu);
+};
+
+// ReactiveVersionSet represents a collection of versions of the column
+// families of the database. Users of ReactiveVersionSet, e.g. DBImplSecondary,
+// need to replay the MANIFEST (description log in older terms) in order to
+// reconstruct and install versions.
+class ReactiveVersionSet : public VersionSet {
+ public:
+  ReactiveVersionSet(const std::string& dbname,
+                     const ImmutableDBOptions* _db_options,
+                     const FileOptions& _file_options, Cache* table_cache,
+                     WriteBufferManager* write_buffer_manager,
+                     WriteController* write_controller,
+                     const std::shared_ptr<IOTracer>& io_tracer);
+
+  ~ReactiveVersionSet() override;
+
+  Status ReadAndApply(
+      InstrumentedMutex* mu,
+      std::unique_ptr<log::FragmentBufferedReader>* manifest_reader,
+      Status* manifest_read_status,
+      std::unordered_set<ColumnFamilyData*>* cfds_changed);
+
+  Status Recover(const std::vector<ColumnFamilyDescriptor>& column_families,
+                 std::unique_ptr<log::FragmentBufferedReader>* manifest_reader,
+                 std::unique_ptr<log::Reader::Reporter>* manifest_reporter,
+                 std::unique_ptr<Status>* manifest_reader_status);
+#ifndef NDEBUG
+  uint64_t TEST_read_edits_in_atomic_group() const;
+#endif  //! NDEBUG
+
+  std::vector<VersionEdit>& replay_buffer();
+
+ protected:
+  // REQUIRES db mutex
+  Status ApplyOneVersionEditToBuilder(
+      VersionEdit& edit, std::unordered_set<ColumnFamilyData*>* cfds_changed,
+      VersionEdit* version_edit);
+
+  Status MaybeSwitchManifest(
+      log::Reader::Reporter* reporter,
+      std::unique_ptr<log::FragmentBufferedReader>* manifest_reader);
+
+ private:
+  std::unique_ptr<ManifestTailer> manifest_tailer_;
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options_;
+  using VersionSet::LogAndApply;
+  using VersionSet::Recover;
+
+  Status LogAndApply(
+      const autovector<ColumnFamilyData*>& /*cfds*/,
+      const autovector<const MutableCFOptions*>& /*mutable_cf_options_list*/,
+      const ReadOptions& /* read_options */,
+      const autovector<autovector<VersionEdit*>>& /*edit_lists*/,
+      InstrumentedMutex* /*mu*/, FSDirectory* /*dir_contains_current_file*/,
+      bool /*new_descriptor_log*/, const ColumnFamilyOptions* /*new_cf_option*/,
+      const std::vector<std::function<void(const Status&)>>& /*manifest_wcbs*/)
+      override {
+    return Status::NotSupported("not supported in reactive mode");
+  }
+
+  // No copy allowed
+  ReactiveVersionSet(const ReactiveVersionSet&);
+  ReactiveVersionSet& operator=(const ReactiveVersionSet&);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_set_sync_and_async.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_set_sync_and_async.h
new file mode 100644
index 0000000000..2507762e8c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_set_sync_and_async.h
@@ -0,0 +1,172 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/coro_utils.h"
+
+#if defined(WITHOUT_COROUTINES) || \
+    (defined(USE_COROUTINES) && defined(WITH_COROUTINES))
+
+namespace ROCKSDB_NAMESPACE {
+
+// Lookup a batch of keys in a single SST file
+DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
+(const ReadOptions& read_options, MultiGetRange file_range, int hit_file_level,
+ bool skip_filters, bool skip_range_deletions, FdWithKeyRange* f,
+ std::unordered_map<uint64_t, BlobReadContexts>& blob_ctxs,
+ TableCache::TypedHandle* table_handle, uint64_t& num_filter_read,
+ uint64_t& num_index_read, uint64_t& num_sst_read) {
+  bool timer_enabled = GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex &&
+                       get_perf_context()->per_level_perf_context_enabled;
+
+  Status s;
+  StopWatchNano timer(clock_, timer_enabled /* auto_start */);
+  s = CO_AWAIT(table_cache_->MultiGet)(
+      read_options, *internal_comparator(), *f->file_metadata, &file_range,
+      mutable_cf_options_.block_protection_bytes_per_key,
+      mutable_cf_options_.prefix_extractor,
+      cfd_->internal_stats()->GetFileReadHist(hit_file_level), skip_filters,
+      skip_range_deletions, hit_file_level, table_handle);
+  // TODO: examine the behavior for corrupted key
+  if (timer_enabled) {
+    PERF_COUNTER_BY_LEVEL_ADD(get_from_table_nanos, timer.ElapsedNanos(),
+                              hit_file_level);
+  }
+  if (!s.ok()) {
+    // TODO: Set status for individual keys appropriately
+    for (auto iter = file_range.begin(); iter != file_range.end(); ++iter) {
+      *iter->s = s;
+      file_range.MarkKeyDone(iter);
+    }
+    CO_RETURN s;
+  }
+  uint64_t batch_size = 0;
+  for (auto iter = file_range.begin(); s.ok() && iter != file_range.end();
+       ++iter) {
+    GetContext& get_context = *iter->get_context;
+    Status* status = iter->s;
+    // The Status in the KeyContext takes precedence over GetContext state
+    // Status may be an error if there were any IO errors in the table
+    // reader. We never expect Status to be NotFound(), as that is
+    // determined by get_context
+    assert(!status->IsNotFound());
+    if (!status->ok()) {
+      file_range.MarkKeyDone(iter);
+      continue;
+    }
+
+    if (get_context.sample()) {
+      sample_file_read_inc(f->file_metadata);
+    }
+    batch_size++;
+    num_index_read += get_context.get_context_stats_.num_index_read;
+    num_filter_read += get_context.get_context_stats_.num_filter_read;
+    num_sst_read += get_context.get_context_stats_.num_sst_read;
+    // Reset these stats since they're specific to a level
+    get_context.get_context_stats_.num_index_read = 0;
+    get_context.get_context_stats_.num_filter_read = 0;
+    get_context.get_context_stats_.num_sst_read = 0;
+
+    // report the counters before returning
+    if (get_context.State() != GetContext::kNotFound &&
+        get_context.State() != GetContext::kMerge &&
+        db_statistics_ != nullptr) {
+      get_context.ReportCounters();
+    } else {
+      if (iter->max_covering_tombstone_seq > 0) {
+        // The remaining files we look at will only contain covered keys, so
+        // we stop here for this key
+        file_range.SkipKey(iter);
+      }
+    }
+    switch (get_context.State()) {
+      case GetContext::kNotFound:
+        // Keep searching in other files
+        break;
+      case GetContext::kMerge:
+        // TODO: update per-level perfcontext user_key_return_count for kMerge
+        break;
+      case GetContext::kFound:
+        if (hit_file_level == 0) {
+          RecordTick(db_statistics_, GET_HIT_L0);
+        } else if (hit_file_level == 1) {
+          RecordTick(db_statistics_, GET_HIT_L1);
+        } else if (hit_file_level >= 2) {
+          RecordTick(db_statistics_, GET_HIT_L2_AND_UP);
+        }
+
+        PERF_COUNTER_BY_LEVEL_ADD(user_key_return_count, 1, hit_file_level);
+
+        file_range.MarkKeyDone(iter);
+
+        if (iter->is_blob_index) {
+          BlobIndex blob_index;
+          Status tmp_s;
+
+          if (iter->value) {
+            TEST_SYNC_POINT_CALLBACK("Version::MultiGet::TamperWithBlobIndex",
+                                     &(*iter));
+
+            tmp_s = blob_index.DecodeFrom(*(iter->value));
+
+          } else {
+            assert(iter->columns);
+            assert(!iter->columns->columns().empty());
+            assert(iter->columns->columns().front().name() ==
+                   kDefaultWideColumnName);
+
+            tmp_s =
+                blob_index.DecodeFrom(iter->columns->columns().front().value());
+          }
+
+          if (tmp_s.ok()) {
+            const uint64_t blob_file_num = blob_index.file_number();
+            blob_ctxs[blob_file_num].emplace_back(blob_index, &*iter);
+          } else {
+            *(iter->s) = tmp_s;
+          }
+        } else {
+          if (iter->value) {
+            file_range.AddValueSize(iter->value->size());
+          } else {
+            assert(iter->columns);
+            file_range.AddValueSize(iter->columns->serialized_size());
+          }
+
+          if (file_range.GetValueSize() > read_options.value_size_soft_limit) {
+            s = Status::Aborted();
+            break;
+          }
+        }
+        continue;
+      case GetContext::kDeleted:
+        // Use empty error message for speed
+        *status = Status::NotFound();
+        file_range.MarkKeyDone(iter);
+        continue;
+      case GetContext::kCorrupt:
+        *status =
+            Status::Corruption("corrupted key for ", iter->lkey->user_key());
+        file_range.MarkKeyDone(iter);
+        continue;
+      case GetContext::kUnexpectedBlobIndex:
+        ROCKS_LOG_ERROR(info_log_, "Encounter unexpected blob index.");
+        *status = Status::NotSupported(
+            "Encounter unexpected blob index. Please open DB with "
+            "ROCKSDB_NAMESPACE::blob_db::BlobDB instead.");
+        file_range.MarkKeyDone(iter);
+        continue;
+      case GetContext::kMergeOperatorFailed:
+        *status = Status::Corruption(Status::SubCode::kMergeOperatorFailed);
+        file_range.MarkKeyDone(iter);
+        continue;
+    }
+  }
+
+  RecordInHistogram(db_statistics_, SST_BATCH_SIZE, batch_size);
+  CO_RETURN s;
+}
+}  // namespace ROCKSDB_NAMESPACE
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_util.h
new file mode 100644
index 0000000000..e39f255719
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/version_util.h
@@ -0,0 +1,72 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "db/version_set.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Instead of opening a `DB` to perform certain manifest updates, this
+// uses the underlying `VersionSet` API to read and modify the MANIFEST. This
+// allows us to use the user's real options, while not having to worry about
+// the DB persisting new SST files via flush/compaction or attempting to read/
+// compact files which may fail, particularly for the file we intend to remove
+// (the user may want to remove an already deleted file from MANIFEST).
+class OfflineManifestWriter {
+ public:
+  OfflineManifestWriter(const DBOptions& options, const std::string& db_path)
+      : wc_(options.delayed_write_rate),
+        wb_(options.db_write_buffer_size),
+        immutable_db_options_(WithDbPath(options, db_path)),
+        tc_(NewLRUCache(1 << 20 /* capacity */,
+                        options.table_cache_numshardbits)),
+        versions_(db_path, &immutable_db_options_, sopt_, tc_.get(), &wb_, &wc_,
+                  /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
+                  /*db_id*/ "", /*db_session_id*/ "") {}
+
+  Status Recover(const std::vector<ColumnFamilyDescriptor>& column_families) {
+    return versions_.Recover(column_families, /*read_only*/ false,
+                             /*db_id*/ nullptr,
+                             /*no_error_if_files_missing*/ true);
+  }
+
+  Status LogAndApply(const ReadOptions& read_options, ColumnFamilyData* cfd,
+                     VersionEdit* edit,
+                     FSDirectory* dir_contains_current_file) {
+    // Use `mutex` to imitate a locked DB mutex when calling `LogAndApply()`.
+    InstrumentedMutex mutex;
+    mutex.Lock();
+    Status s = versions_.LogAndApply(
+        cfd, *cfd->GetLatestMutableCFOptions(), read_options, edit, &mutex,
+        dir_contains_current_file, false /* new_descriptor_log */);
+    mutex.Unlock();
+    return s;
+  }
+
+  VersionSet& Versions() { return versions_; }
+  const ImmutableDBOptions& IOptions() { return immutable_db_options_; }
+
+ private:
+  WriteController wc_;
+  WriteBufferManager wb_;
+  ImmutableDBOptions immutable_db_options_;
+  std::shared_ptr<Cache> tc_;
+  EnvOptions sopt_;
+  VersionSet versions_;
+
+  static ImmutableDBOptions WithDbPath(const DBOptions& options,
+                                       const std::string& db_path) {
+    ImmutableDBOptions rv(options);
+    if (rv.db_paths.empty()) {
+      // `VersionSet` expects options that have been through
+      // `SanitizeOptions()`, which would sanitize an empty `db_paths`.
+      rv.db_paths.emplace_back(db_path, 0 /* target_size */);
+    }
+    return rv;
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_edit.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_edit.cc
new file mode 100644
index 0000000000..2525be610b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_edit.cc
@@ -0,0 +1,211 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#include "db/wal_edit.h"
+
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void WalAddition::EncodeTo(std::string* dst) const {
+  PutVarint64(dst, number_);
+
+  if (metadata_.HasSyncedSize()) {
+    PutVarint32(dst, static_cast<uint32_t>(WalAdditionTag::kSyncedSize));
+    PutVarint64(dst, metadata_.GetSyncedSizeInBytes());
+  }
+
+  PutVarint32(dst, static_cast<uint32_t>(WalAdditionTag::kTerminate));
+}
+
+Status WalAddition::DecodeFrom(Slice* src) {
+  constexpr char class_name[] = "WalAddition";
+
+  if (!GetVarint64(src, &number_)) {
+    return Status::Corruption(class_name, "Error decoding WAL log number");
+  }
+
+  while (true) {
+    uint32_t tag_value = 0;
+    if (!GetVarint32(src, &tag_value)) {
+      return Status::Corruption(class_name, "Error decoding tag");
+    }
+    WalAdditionTag tag = static_cast<WalAdditionTag>(tag_value);
+    switch (tag) {
+      case WalAdditionTag::kSyncedSize: {
+        uint64_t size = 0;
+        if (!GetVarint64(src, &size)) {
+          return Status::Corruption(class_name, "Error decoding WAL file size");
+        }
+        metadata_.SetSyncedSizeInBytes(size);
+        break;
+      }
+      // TODO: process future tags such as checksum.
+      case WalAdditionTag::kTerminate:
+        return Status::OK();
+      default: {
+        std::stringstream ss;
+        ss << "Unknown tag " << tag_value;
+        return Status::Corruption(class_name, ss.str());
+      }
+    }
+  }
+}
+
+JSONWriter& operator<<(JSONWriter& jw, const WalAddition& wal) {
+  jw << "LogNumber" << wal.GetLogNumber() << "SyncedSizeInBytes"
+     << wal.GetMetadata().GetSyncedSizeInBytes();
+  return jw;
+}
+
+std::ostream& operator<<(std::ostream& os, const WalAddition& wal) {
+  os << "log_number: " << wal.GetLogNumber()
+     << " synced_size_in_bytes: " << wal.GetMetadata().GetSyncedSizeInBytes();
+  return os;
+}
+
+std::string WalAddition::DebugString() const {
+  std::ostringstream oss;
+  oss << *this;
+  return oss.str();
+}
+
+void WalDeletion::EncodeTo(std::string* dst) const {
+  PutVarint64(dst, number_);
+}
+
+Status WalDeletion::DecodeFrom(Slice* src) {
+  constexpr char class_name[] = "WalDeletion";
+
+  if (!GetVarint64(src, &number_)) {
+    return Status::Corruption(class_name, "Error decoding WAL log number");
+  }
+
+  return Status::OK();
+}
+
+JSONWriter& operator<<(JSONWriter& jw, const WalDeletion& wal) {
+  jw << "LogNumber" << wal.GetLogNumber();
+  return jw;
+}
+
+std::ostream& operator<<(std::ostream& os, const WalDeletion& wal) {
+  os << "log_number: " << wal.GetLogNumber();
+  return os;
+}
+
+std::string WalDeletion::DebugString() const {
+  std::ostringstream oss;
+  oss << *this;
+  return oss.str();
+}
+
+Status WalSet::AddWal(const WalAddition& wal) {
+  if (wal.GetLogNumber() < min_wal_number_to_keep_) {
+    // The WAL has been obsolete, ignore it.
+    return Status::OK();
+  }
+
+  auto it = wals_.lower_bound(wal.GetLogNumber());
+  bool existing = it != wals_.end() && it->first == wal.GetLogNumber();
+
+  if (!existing) {
+    wals_.insert(it, {wal.GetLogNumber(), wal.GetMetadata()});
+    return Status::OK();
+  }
+
+  assert(existing);
+  if (!wal.GetMetadata().HasSyncedSize()) {
+    std::stringstream ss;
+    ss << "WAL " << wal.GetLogNumber() << " is created more than once";
+    return Status::Corruption("WalSet::AddWal", ss.str());
+  }
+
+  assert(wal.GetMetadata().HasSyncedSize());
+  if (it->second.HasSyncedSize() && wal.GetMetadata().GetSyncedSizeInBytes() <=
+                                        it->second.GetSyncedSizeInBytes()) {
+    // This is possible because version edits with different synced WAL sizes
+    // for the same WAL can be committed out-of-order. For example, thread
+    // 1 synces the first 10 bytes of 1.log, while thread 2 synces the first 20
+    // bytes of 1.log. It's possible that thread 1 calls LogAndApply() after
+    // thread 2.
+    // In this case, just return ok.
+    return Status::OK();
+  }
+
+  // Update synced size for the given WAL.
+  it->second.SetSyncedSizeInBytes(wal.GetMetadata().GetSyncedSizeInBytes());
+  return Status::OK();
+}
+
+Status WalSet::AddWals(const WalAdditions& wals) {
+  Status s;
+  for (const WalAddition& wal : wals) {
+    s = AddWal(wal);
+    if (!s.ok()) {
+      break;
+    }
+  }
+  return s;
+}
+
+Status WalSet::DeleteWalsBefore(WalNumber wal) {
+  if (wal > min_wal_number_to_keep_) {
+    min_wal_number_to_keep_ = wal;
+    wals_.erase(wals_.begin(), wals_.lower_bound(wal));
+  }
+  return Status::OK();
+}
+
+void WalSet::Reset() {
+  wals_.clear();
+  min_wal_number_to_keep_ = 0;
+}
+
+Status WalSet::CheckWals(
+    Env* env,
+    const std::unordered_map<WalNumber, std::string>& logs_on_disk) const {
+  assert(env != nullptr);
+
+  Status s;
+  for (const auto& wal : wals_) {
+    const uint64_t log_number = wal.first;
+    const WalMetadata& wal_meta = wal.second;
+
+    if (!wal_meta.HasSyncedSize()) {
+      // The WAL and WAL directory is not even synced,
+      // so the WAL's inode may not be persisted,
+      // then the WAL might not show up when listing WAL directory.
+      continue;
+    }
+
+    if (logs_on_disk.find(log_number) == logs_on_disk.end()) {
+      std::stringstream ss;
+      ss << "Missing WAL with log number: " << log_number << ".";
+      s = Status::Corruption(ss.str());
+      break;
+    }
+
+    uint64_t log_file_size = 0;
+    s = env->GetFileSize(logs_on_disk.at(log_number), &log_file_size);
+    if (!s.ok()) {
+      break;
+    }
+    if (log_file_size < wal_meta.GetSyncedSizeInBytes()) {
+      std::stringstream ss;
+      ss << "Size mismatch: WAL (log number: " << log_number
+         << ") in MANIFEST is " << wal_meta.GetSyncedSizeInBytes()
+         << " bytes , but actually is " << log_file_size << " bytes on disk.";
+      s = Status::Corruption(ss.str());
+      break;
+    }
+  }
+
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_edit.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_edit.h
new file mode 100644
index 0000000000..bb5c5e292e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_edit.h
@@ -0,0 +1,177 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+// WAL related classes used in VersionEdit and VersionSet.
+// Modifications to WalAddition and WalDeletion may need to update
+// VersionEdit and its related tests.
+
+#pragma once
+
+#include <map>
+#include <ostream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "logging/event_logger.h"
+#include "port/port.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class JSONWriter;
+class Slice;
+class Status;
+
+using WalNumber = uint64_t;
+
+// Metadata of a WAL.
+class WalMetadata {
+ public:
+  WalMetadata() = default;
+
+  explicit WalMetadata(uint64_t synced_size_bytes)
+      : synced_size_bytes_(synced_size_bytes) {}
+
+  bool HasSyncedSize() const { return synced_size_bytes_ != kUnknownWalSize; }
+
+  void SetSyncedSizeInBytes(uint64_t bytes) { synced_size_bytes_ = bytes; }
+
+  uint64_t GetSyncedSizeInBytes() const { return synced_size_bytes_; }
+
+ private:
+  friend bool operator==(const WalMetadata& lhs, const WalMetadata& rhs);
+  friend bool operator!=(const WalMetadata& lhs, const WalMetadata& rhs);
+  // The size of WAL is unknown, used when the WAL is not synced yet or is
+  // empty.
+  constexpr static uint64_t kUnknownWalSize =
+      std::numeric_limits<uint64_t>::max();
+
+  // Size of the most recently synced WAL in bytes.
+  uint64_t synced_size_bytes_ = kUnknownWalSize;
+};
+
+inline bool operator==(const WalMetadata& lhs, const WalMetadata& rhs) {
+  return lhs.synced_size_bytes_ == rhs.synced_size_bytes_;
+}
+
+inline bool operator!=(const WalMetadata& lhs, const WalMetadata& rhs) {
+  return !(lhs == rhs);
+}
+
+// These tags are persisted to MANIFEST, so it's part of the user API.
+enum class WalAdditionTag : uint32_t {
+  // Indicates that there are no more tags.
+  kTerminate = 1,
+  // Synced Size in bytes.
+  kSyncedSize = 2,
+  // Add tags in the future, such as checksum?
+};
+
+// Records the event of adding a WAL in VersionEdit.
+class WalAddition {
+ public:
+  WalAddition() : number_(0), metadata_() {}
+
+  explicit WalAddition(WalNumber number) : number_(number), metadata_() {}
+
+  WalAddition(WalNumber number, WalMetadata meta)
+      : number_(number), metadata_(std::move(meta)) {}
+
+  WalNumber GetLogNumber() const { return number_; }
+
+  const WalMetadata& GetMetadata() const { return metadata_; }
+
+  void EncodeTo(std::string* dst) const;
+
+  Status DecodeFrom(Slice* src);
+
+  std::string DebugString() const;
+
+ private:
+  WalNumber number_;
+  WalMetadata metadata_;
+};
+
+std::ostream& operator<<(std::ostream& os, const WalAddition& wal);
+JSONWriter& operator<<(JSONWriter& jw, const WalAddition& wal);
+
+using WalAdditions = std::vector<WalAddition>;
+
+// Records the event of deleting WALs before the specified log number.
+class WalDeletion {
+ public:
+  WalDeletion() : number_(kEmpty) {}
+
+  explicit WalDeletion(WalNumber number) : number_(number) {}
+
+  WalNumber GetLogNumber() const { return number_; }
+
+  void EncodeTo(std::string* dst) const;
+
+  Status DecodeFrom(Slice* src);
+
+  std::string DebugString() const;
+
+  bool IsEmpty() const { return number_ == kEmpty; }
+
+  void Reset() { number_ = kEmpty; }
+
+ private:
+  static constexpr WalNumber kEmpty = 0;
+
+  WalNumber number_;
+};
+
+std::ostream& operator<<(std::ostream& os, const WalDeletion& wal);
+JSONWriter& operator<<(JSONWriter& jw, const WalDeletion& wal);
+
+// Used in VersionSet to keep the current set of WALs.
+//
+// When a WAL is synced or becomes obsoleted,
+// a VersionEdit is logged to MANIFEST and
+// the WAL is added to or deleted from WalSet.
+//
+// Not thread safe, needs external synchronization such as holding DB mutex.
+class WalSet {
+ public:
+  // Add WAL(s).
+  // If the WAL is closed,
+  // then there must be an existing unclosed WAL,
+  // otherwise, return Status::Corruption.
+  // Can happen when applying a VersionEdit or recovering from MANIFEST.
+  Status AddWal(const WalAddition& wal);
+  Status AddWals(const WalAdditions& wals);
+
+  // Delete WALs with log number smaller than the specified wal number.
+  // Can happen when applying a VersionEdit or recovering from MANIFEST.
+  Status DeleteWalsBefore(WalNumber wal);
+
+  // Resets the internal state.
+  void Reset();
+
+  // WALs with number less than MinWalNumberToKeep should not exist in WalSet.
+  WalNumber GetMinWalNumberToKeep() const { return min_wal_number_to_keep_; }
+
+  const std::map<WalNumber, WalMetadata>& GetWals() const { return wals_; }
+
+  // Checks whether there are missing or corrupted WALs.
+  // Returns Status::OK if there is no missing nor corrupted WAL,
+  // otherwise returns Status::Corruption.
+  // logs_on_disk is a map from log number to the log filename.
+  // Note that logs_on_disk may contain logs that is obsolete but
+  // haven't been deleted from disk.
+  Status CheckWals(
+      Env* env,
+      const std::unordered_map<WalNumber, std::string>& logs_on_disk) const;
+
+ private:
+  std::map<WalNumber, WalMetadata> wals_;
+  // WAL number < min_wal_number_to_keep_ should not exist in wals_.
+  // It's monotonically increasing, in-memory only, not written to MANIFEST.
+  WalNumber min_wal_number_to_keep_ = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_manager.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_manager.cc
new file mode 100644
index 0000000000..400b2e58b6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_manager.cc
@@ -0,0 +1,527 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/wal_manager.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <memory>
+#include <vector>
+
+#include "db/log_reader.h"
+#include "db/log_writer.h"
+#include "db/transaction_log_impl.h"
+#include "db/write_batch_internal.h"
+#include "file/file_util.h"
+#include "file/filename.h"
+#include "file/sequence_file_reader.h"
+#include "logging/logging.h"
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "rocksdb/write_batch.h"
+#include "test_util/sync_point.h"
+#include "util/cast_util.h"
+#include "util/coding.h"
+#include "util/mutexlock.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+
+Status WalManager::DeleteFile(const std::string& fname, uint64_t number) {
+  auto s = env_->DeleteFile(wal_dir_ + "/" + fname);
+  if (s.ok()) {
+    MutexLock l(&read_first_record_cache_mutex_);
+    read_first_record_cache_.erase(number);
+  }
+  return s;
+}
+
+Status WalManager::GetSortedWalFiles(VectorLogPtr& files) {
+  // First get sorted files in db dir, then get sorted files from archived
+  // dir, to avoid a race condition where a log file is moved to archived
+  // dir in between.
+  Status s;
+  // list wal files in main db dir.
+  VectorLogPtr logs;
+  s = GetSortedWalsOfType(wal_dir_, logs, kAliveLogFile);
+  if (!s.ok()) {
+    return s;
+  }
+
+  // Reproduce the race condition where a log file is moved
+  // to archived dir, between these two sync points, used in
+  // (DBTest,TransactionLogIteratorRace)
+  TEST_SYNC_POINT("WalManager::GetSortedWalFiles:1");
+  TEST_SYNC_POINT("WalManager::GetSortedWalFiles:2");
+
+  files.clear();
+  // list wal files in archive dir.
+  std::string archivedir = ArchivalDirectory(wal_dir_);
+  Status exists = env_->FileExists(archivedir);
+  if (exists.ok()) {
+    s = GetSortedWalsOfType(archivedir, files, kArchivedLogFile);
+    if (!s.ok()) {
+      return s;
+    }
+  } else if (!exists.IsNotFound()) {
+    assert(s.IsIOError());
+    return s;
+  }
+
+  uint64_t latest_archived_log_number = 0;
+  if (!files.empty()) {
+    latest_archived_log_number = files.back()->LogNumber();
+    ROCKS_LOG_INFO(db_options_.info_log, "Latest Archived log: %" PRIu64,
+                   latest_archived_log_number);
+  }
+
+  files.reserve(files.size() + logs.size());
+  for (auto& log : logs) {
+    if (log->LogNumber() > latest_archived_log_number) {
+      files.push_back(std::move(log));
+    } else {
+      // When the race condition happens, we could see the
+      // same log in both db dir and archived dir. Simply
+      // ignore the one in db dir. Note that, if we read
+      // archived dir first, we would have missed the log file.
+      ROCKS_LOG_WARN(db_options_.info_log, "%s already moved to archive",
+                     log->PathName().c_str());
+    }
+  }
+
+  return s;
+}
+
+Status WalManager::GetUpdatesSince(
+    SequenceNumber seq, std::unique_ptr<TransactionLogIterator>* iter,
+    const TransactionLogIterator::ReadOptions& read_options,
+    VersionSet* version_set) {
+  if (seq_per_batch_) {
+    return Status::NotSupported();
+  }
+
+  assert(!seq_per_batch_);
+
+  //  Get all sorted Wal Files.
+  //  Do binary search and open files and find the seq number.
+
+  std::unique_ptr<VectorLogPtr> wal_files(new VectorLogPtr);
+  Status s = GetSortedWalFiles(*wal_files);
+  if (!s.ok()) {
+    return s;
+  }
+
+  s = RetainProbableWalFiles(*wal_files, seq);
+  if (!s.ok()) {
+    return s;
+  }
+  iter->reset(new TransactionLogIteratorImpl(
+      wal_dir_, &db_options_, read_options, file_options_, seq,
+      std::move(wal_files), version_set, seq_per_batch_, io_tracer_));
+  return (*iter)->status();
+}
+
+// 1. Go through all archived files and
+//    a. if ttl is enabled, delete outdated files
+//    b. if archive size limit is enabled, delete empty files,
+//        compute file number and size.
+// 2. If size limit is enabled:
+//    a. compute how many files should be deleted
+//    b. get sorted non-empty archived logs
+//    c. delete what should be deleted
+void WalManager::PurgeObsoleteWALFiles() {
+  bool const ttl_enabled = db_options_.WAL_ttl_seconds > 0;
+  bool const size_limit_enabled = db_options_.WAL_size_limit_MB > 0;
+  if (!ttl_enabled && !size_limit_enabled) {
+    return;
+  }
+
+  int64_t current_time = 0;
+  Status s = db_options_.clock->GetCurrentTime(&current_time);
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(db_options_.info_log, "Can't get current time: %s",
+                    s.ToString().c_str());
+    assert(false);
+    return;
+  }
+  uint64_t const now_seconds = static_cast<uint64_t>(current_time);
+  uint64_t const time_to_check = (ttl_enabled && !size_limit_enabled)
+                                     ? db_options_.WAL_ttl_seconds / 2
+                                     : kDefaultIntervalToDeleteObsoleteWAL;
+
+  if (purge_wal_files_last_run_ + time_to_check > now_seconds) {
+    return;
+  }
+
+  purge_wal_files_last_run_ = now_seconds;
+
+  std::string archival_dir = ArchivalDirectory(wal_dir_);
+  std::vector<std::string> files;
+  s = env_->GetChildren(archival_dir, &files);
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(db_options_.info_log, "Can't get archive files: %s",
+                    s.ToString().c_str());
+    assert(false);
+    return;
+  }
+
+  size_t log_files_num = 0;
+  uint64_t log_file_size = 0;
+  for (auto& f : files) {
+    uint64_t number;
+    FileType type;
+    if (ParseFileName(f, &number, &type) && type == kWalFile) {
+      std::string const file_path = archival_dir + "/" + f;
+      if (ttl_enabled) {
+        uint64_t file_m_time;
+        s = env_->GetFileModificationTime(file_path, &file_m_time);
+        if (!s.ok()) {
+          ROCKS_LOG_WARN(db_options_.info_log,
+                         "Can't get file mod time: %s: %s", file_path.c_str(),
+                         s.ToString().c_str());
+          continue;
+        }
+        if (now_seconds - file_m_time > db_options_.WAL_ttl_seconds) {
+          s = DeleteDBFile(&db_options_, file_path, archival_dir, false,
+                           /*force_fg=*/!wal_in_db_path_);
+          if (!s.ok()) {
+            ROCKS_LOG_WARN(db_options_.info_log, "Can't delete file: %s: %s",
+                           file_path.c_str(), s.ToString().c_str());
+            continue;
+          } else {
+            MutexLock l(&read_first_record_cache_mutex_);
+            read_first_record_cache_.erase(number);
+          }
+          continue;
+        }
+      }
+
+      if (size_limit_enabled) {
+        uint64_t file_size;
+        s = env_->GetFileSize(file_path, &file_size);
+        if (!s.ok()) {
+          ROCKS_LOG_ERROR(db_options_.info_log,
+                          "Unable to get file size: %s: %s", file_path.c_str(),
+                          s.ToString().c_str());
+          return;
+        } else {
+          if (file_size > 0) {
+            log_file_size = std::max(log_file_size, file_size);
+            ++log_files_num;
+          } else {
+            s = DeleteDBFile(&db_options_, file_path, archival_dir, false,
+                             /*force_fg=*/!wal_in_db_path_);
+            if (!s.ok()) {
+              ROCKS_LOG_WARN(db_options_.info_log,
+                             "Unable to delete file: %s: %s", file_path.c_str(),
+                             s.ToString().c_str());
+              continue;
+            } else {
+              MutexLock l(&read_first_record_cache_mutex_);
+              read_first_record_cache_.erase(number);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (0 == log_files_num || !size_limit_enabled) {
+    return;
+  }
+
+  size_t const files_keep_num = static_cast<size_t>(
+      db_options_.WAL_size_limit_MB * 1024 * 1024 / log_file_size);
+  if (log_files_num <= files_keep_num) {
+    return;
+  }
+
+  size_t files_del_num = log_files_num - files_keep_num;
+  VectorLogPtr archived_logs;
+  s = GetSortedWalsOfType(archival_dir, archived_logs, kArchivedLogFile);
+  if (!s.ok()) {
+    ROCKS_LOG_WARN(db_options_.info_log,
+                   "Unable to get archived WALs from: %s: %s",
+                   archival_dir.c_str(), s.ToString().c_str());
+    files_del_num = 0;
+  } else if (files_del_num > archived_logs.size()) {
+    ROCKS_LOG_WARN(db_options_.info_log,
+                   "Trying to delete more archived log files than "
+                   "exist. Deleting all");
+    files_del_num = archived_logs.size();
+  }
+
+  for (size_t i = 0; i < files_del_num; ++i) {
+    std::string const file_path = archived_logs[i]->PathName();
+    s = DeleteDBFile(&db_options_, wal_dir_ + "/" + file_path, wal_dir_, false,
+                     /*force_fg=*/!wal_in_db_path_);
+    if (!s.ok()) {
+      ROCKS_LOG_WARN(db_options_.info_log, "Unable to delete file: %s: %s",
+                     file_path.c_str(), s.ToString().c_str());
+      continue;
+    } else {
+      MutexLock l(&read_first_record_cache_mutex_);
+      read_first_record_cache_.erase(archived_logs[i]->LogNumber());
+    }
+  }
+}
+
+void WalManager::ArchiveWALFile(const std::string& fname, uint64_t number) {
+  auto archived_log_name = ArchivedLogFileName(wal_dir_, number);
+  // The sync point below is used in (DBTest,TransactionLogIteratorRace)
+  TEST_SYNC_POINT("WalManager::PurgeObsoleteFiles:1");
+  Status s = env_->RenameFile(fname, archived_log_name);
+  // The sync point below is used in (DBTest,TransactionLogIteratorRace)
+  TEST_SYNC_POINT("WalManager::PurgeObsoleteFiles:2");
+  ROCKS_LOG_INFO(db_options_.info_log, "Move log file %s to %s -- %s\n",
+                 fname.c_str(), archived_log_name.c_str(),
+                 s.ToString().c_str());
+}
+
+Status WalManager::GetSortedWalsOfType(const std::string& path,
+                                       VectorLogPtr& log_files,
+                                       WalFileType log_type) {
+  std::vector<std::string> all_files;
+  const Status status = env_->GetChildren(path, &all_files);
+  if (!status.ok()) {
+    return status;
+  }
+  log_files.reserve(all_files.size());
+  for (const auto& f : all_files) {
+    uint64_t number;
+    FileType type;
+    if (ParseFileName(f, &number, &type) && type == kWalFile) {
+      SequenceNumber sequence;
+      Status s = ReadFirstRecord(log_type, number, &sequence);
+      if (!s.ok()) {
+        return s;
+      }
+      if (sequence == 0) {
+        // empty file
+        continue;
+      }
+
+      // Reproduce the race condition where a log file is moved
+      // to archived dir, between these two sync points, used in
+      // (DBTest,TransactionLogIteratorRace)
+      TEST_SYNC_POINT("WalManager::GetSortedWalsOfType:1");
+      TEST_SYNC_POINT("WalManager::GetSortedWalsOfType:2");
+
+      uint64_t size_bytes;
+      s = env_->GetFileSize(LogFileName(path, number), &size_bytes);
+      // re-try in case the alive log file has been moved to archive.
+      if (!s.ok() && log_type == kAliveLogFile) {
+        std::string archived_file = ArchivedLogFileName(path, number);
+        if (env_->FileExists(archived_file).ok()) {
+          s = env_->GetFileSize(archived_file, &size_bytes);
+          if (!s.ok() && env_->FileExists(archived_file).IsNotFound()) {
+            // oops, the file just got deleted from archived dir! move on
+            s = Status::OK();
+            continue;
+          }
+        }
+      }
+      if (!s.ok()) {
+        return s;
+      }
+
+      log_files.push_back(std::unique_ptr<LogFile>(
+          new LogFileImpl(number, log_type, sequence, size_bytes)));
+    }
+  }
+  std::sort(
+      log_files.begin(), log_files.end(),
+      [](const std::unique_ptr<LogFile>& a, const std::unique_ptr<LogFile>& b) {
+        LogFileImpl* a_impl = static_cast_with_check<LogFileImpl>(a.get());
+        LogFileImpl* b_impl = static_cast_with_check<LogFileImpl>(b.get());
+        return *a_impl < *b_impl;
+      });
+  return status;
+}
+
+Status WalManager::RetainProbableWalFiles(VectorLogPtr& all_logs,
+                                          const SequenceNumber target) {
+  int64_t start = 0;  // signed to avoid overflow when target is < first file.
+  int64_t end = static_cast<int64_t>(all_logs.size()) - 1;
+  // Binary Search. avoid opening all files.
+  while (end >= start) {
+    int64_t mid = start + (end - start) / 2;  // Avoid overflow.
+    SequenceNumber current_seq_num =
+        all_logs.at(static_cast<size_t>(mid))->StartSequence();
+    if (current_seq_num == target) {
+      end = mid;
+      break;
+    } else if (current_seq_num < target) {
+      start = mid + 1;
+    } else {
+      end = mid - 1;
+    }
+  }
+  // end could be -ve.
+  size_t start_index =
+      static_cast<size_t>(std::max(static_cast<int64_t>(0), end));
+  // The last wal file is always included
+  all_logs.erase(all_logs.begin(), all_logs.begin() + start_index);
+  return Status::OK();
+}
+
+Status WalManager::ReadFirstRecord(const WalFileType type,
+                                   const uint64_t number,
+                                   SequenceNumber* sequence) {
+  *sequence = 0;
+  if (type != kAliveLogFile && type != kArchivedLogFile) {
+    ROCKS_LOG_ERROR(db_options_.info_log, "[WalManger] Unknown file type %s",
+                    std::to_string(type).c_str());
+    return Status::NotSupported("File Type Not Known " + std::to_string(type));
+  }
+  {
+    MutexLock l(&read_first_record_cache_mutex_);
+    auto itr = read_first_record_cache_.find(number);
+    if (itr != read_first_record_cache_.end()) {
+      *sequence = itr->second;
+      return Status::OK();
+    }
+  }
+  Status s;
+  if (type == kAliveLogFile) {
+    std::string fname = LogFileName(wal_dir_, number);
+    s = ReadFirstLine(fname, number, sequence);
+    if (!s.ok() && env_->FileExists(fname).ok()) {
+      // return any error that is not caused by non-existing file
+      return s;
+    }
+  }
+
+  if (type == kArchivedLogFile || !s.ok()) {
+    //  check if the file got moved to archive.
+    std::string archived_file = ArchivedLogFileName(wal_dir_, number);
+    s = ReadFirstLine(archived_file, number, sequence);
+    // maybe the file was deleted from archive dir. If that's the case, return
+    // Status::OK(). The caller with identify this as empty file because
+    // *sequence == 0
+    if (!s.ok() && env_->FileExists(archived_file).IsNotFound()) {
+      return Status::OK();
+    }
+  }
+
+  if (s.ok() && *sequence != 0) {
+    MutexLock l(&read_first_record_cache_mutex_);
+    read_first_record_cache_.insert({number, *sequence});
+  }
+  return s;
+}
+
+Status WalManager::GetLiveWalFile(uint64_t number,
+                                  std::unique_ptr<LogFile>* log_file) {
+  if (!log_file) {
+    return Status::InvalidArgument("log_file not preallocated.");
+  }
+
+  if (!number) {
+    return Status::PathNotFound("log file not available");
+  }
+
+  Status s;
+
+  uint64_t size_bytes;
+  s = env_->GetFileSize(LogFileName(wal_dir_, number), &size_bytes);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  log_file->reset(new LogFileImpl(number, kAliveLogFile,
+                                  0,  // SequenceNumber
+                                  size_bytes));
+
+  return Status::OK();
+}
+
+// the function returns status.ok() and sequence == 0 if the file exists, but is
+// empty
+Status WalManager::ReadFirstLine(const std::string& fname,
+                                 const uint64_t number,
+                                 SequenceNumber* sequence) {
+  struct LogReporter : public log::Reader::Reporter {
+    Env* env;
+    Logger* info_log;
+    const char* fname;
+
+    Status* status;
+    bool ignore_error;  // true if db_options_.paranoid_checks==false
+    void Corruption(size_t bytes, const Status& s) override {
+      ROCKS_LOG_WARN(info_log, "[WalManager] %s%s: dropping %d bytes; %s",
+                     (this->ignore_error ? "(ignoring error) " : ""), fname,
+                     static_cast<int>(bytes), s.ToString().c_str());
+      if (this->status->ok()) {
+        // only keep the first error
+        *this->status = s;
+      }
+    }
+  };
+
+  std::unique_ptr<FSSequentialFile> file;
+  Status status = fs_->NewSequentialFile(
+      fname, fs_->OptimizeForLogRead(file_options_), &file, nullptr);
+  std::unique_ptr<SequentialFileReader> file_reader(
+      new SequentialFileReader(std::move(file), fname, io_tracer_));
+
+  if (!status.ok()) {
+    return status;
+  }
+
+  LogReporter reporter;
+  reporter.env = env_;
+  reporter.info_log = db_options_.info_log.get();
+  reporter.fname = fname.c_str();
+  reporter.status = &status;
+  reporter.ignore_error = !db_options_.paranoid_checks;
+  log::Reader reader(db_options_.info_log, std::move(file_reader), &reporter,
+                     true /*checksum*/, number);
+  std::string scratch;
+  Slice record;
+
+  if (reader.ReadRecord(&record, &scratch) &&
+      (status.ok() || !db_options_.paranoid_checks)) {
+    if (record.size() < WriteBatchInternal::kHeader) {
+      reporter.Corruption(record.size(),
+                          Status::Corruption("log record too small"));
+      // TODO read record's till the first no corrupt entry?
+    } else {
+      WriteBatch batch;
+      // We can overwrite an existing non-OK Status since it'd only reach here
+      // with `paranoid_checks == false`.
+      status = WriteBatchInternal::SetContents(&batch, record);
+      if (status.ok()) {
+        *sequence = WriteBatchInternal::Sequence(&batch);
+        return status;
+      }
+    }
+  }
+
+  if (status.ok() && reader.IsCompressedAndEmptyFile()) {
+    // In case of wal_compression, it writes a `kSetCompressionType` record
+    // which is not associated with any sequence number. As result for an empty
+    // file, GetSortedWalsOfType() will skip these WALs causing the operations
+    // to fail.
+    // Therefore, in order to avoid that failure, it sets sequence_number to 1
+    // indicating those WALs should be included.
+    *sequence = 1;
+  } else {
+    // ReadRecord might have returned false on EOF, which means that the log
+    // file is empty. Or, a failure may have occurred while processing the first
+    // entry. In any case, return status and set sequence number to 0.
+    *sequence = 0;
+  }
+  return status;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_manager.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_manager.h
new file mode 100644
index 0000000000..ab79bf0023
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wal_manager.h
@@ -0,0 +1,136 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <atomic>
+#include <deque>
+#include <limits>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "db/version_set.h"
+#include "file/file_util.h"
+#include "options/db_options.h"
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/status.h"
+#include "rocksdb/transaction_log.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+
+// WAL manager provides the abstraction for reading the WAL files as a single
+// unit. Internally, it opens and reads the files using Reader or Writer
+// abstraction.
+class WalManager {
+ public:
+  WalManager(const ImmutableDBOptions& db_options,
+             const FileOptions& file_options,
+             const std::shared_ptr<IOTracer>& io_tracer,
+             const bool seq_per_batch = false)
+      : db_options_(db_options),
+        file_options_(file_options),
+        env_(db_options.env),
+        fs_(db_options.fs, io_tracer),
+        purge_wal_files_last_run_(0),
+        seq_per_batch_(seq_per_batch),
+        wal_dir_(db_options_.GetWalDir()),
+        wal_in_db_path_(db_options_.IsWalDirSameAsDBPath()),
+        io_tracer_(io_tracer) {}
+
+  Status GetSortedWalFiles(VectorLogPtr& files);
+
+  // Allow user to tail transaction log to find all recent changes to the
+  // database that are newer than `seq_number`.
+  Status GetUpdatesSince(
+      SequenceNumber seq_number, std::unique_ptr<TransactionLogIterator>* iter,
+      const TransactionLogIterator::ReadOptions& read_options,
+      VersionSet* version_set);
+
+  void PurgeObsoleteWALFiles();
+
+  void ArchiveWALFile(const std::string& fname, uint64_t number);
+
+  Status DeleteFile(const std::string& fname, uint64_t number);
+
+  Status GetLiveWalFile(uint64_t number, std::unique_ptr<LogFile>* log_file);
+
+  Status TEST_ReadFirstRecord(const WalFileType type, const uint64_t number,
+                              SequenceNumber* sequence) {
+    return ReadFirstRecord(type, number, sequence);
+  }
+
+  Status TEST_ReadFirstLine(const std::string& fname, const uint64_t number,
+                            SequenceNumber* sequence) {
+    return ReadFirstLine(fname, number, sequence);
+  }
+
+ private:
+  Status GetSortedWalsOfType(const std::string& path, VectorLogPtr& log_files,
+                             WalFileType type);
+  // Requires: all_logs should be sorted with earliest log file first
+  // Retains all log files in all_logs which contain updates with seq no.
+  // Greater Than or Equal to the requested SequenceNumber.
+  Status RetainProbableWalFiles(VectorLogPtr& all_logs,
+                                const SequenceNumber target);
+
+  // ReadFirstRecord checks the read_first_record_cache_ to see if the entry
+  // exists or not. If not, it will read the WAL file.
+  // In case of wal_compression, WAL contains a `kSetCompressionType` record
+  // which is not associated with any sequence number. So the sequence_number is
+  // set to 1 if that WAL doesn't include any other record (basically empty) in
+  // order to include that WAL and is inserted in read_first_record_cache_.
+  // Therefore, sequence_number is used as boolean if WAL should be included or
+  // not and that sequence_number shouldn't be use for any other purpose.
+  Status ReadFirstRecord(const WalFileType type, const uint64_t number,
+                         SequenceNumber* sequence);
+
+  // In case of no wal_compression, ReadFirstLine returns status.ok() and
+  // sequence == 0 if the file exists, but is empty.
+  // In case of wal_compression, WAL contains
+  // `kSetCompressionType` record which is not associated with any sequence
+  // number if that WAL doesn't include any other record (basically empty). As
+  // result for an empty file, GetSortedWalsOfType() will skip these WALs
+  // causing the operations to fail. To avoid that, it sets sequence_number to
+  // 1 inorder to include that WAL.
+  Status ReadFirstLine(const std::string& fname, const uint64_t number,
+                       SequenceNumber* sequence);
+
+  // ------- state from DBImpl ------
+  const ImmutableDBOptions& db_options_;
+  const FileOptions file_options_;
+  Env* env_;
+  const FileSystemPtr fs_;
+
+  // ------- WalManager state -------
+  // cache for ReadFirstRecord() calls
+  std::unordered_map<uint64_t, SequenceNumber> read_first_record_cache_;
+  port::Mutex read_first_record_cache_mutex_;
+
+  // last time when PurgeObsoleteWALFiles ran.
+  uint64_t purge_wal_files_last_run_;
+
+  bool seq_per_batch_;
+
+  const std::string& wal_dir_;
+
+  bool wal_in_db_path_;
+
+  // obsolete files will be deleted every this seconds if ttl deletion is
+  // enabled and archive size_limit is disabled.
+  static constexpr uint64_t kDefaultIntervalToDeleteObsoleteWAL = 600;
+
+  std::shared_ptr<IOTracer> io_tracer_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wide/wide_column_serialization.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wide/wide_column_serialization.cc
new file mode 100644
index 0000000000..f62143c405
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wide/wide_column_serialization.cc
@@ -0,0 +1,182 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/wide/wide_column_serialization.h"
+
+#include <algorithm>
+#include <cassert>
+#include <limits>
+
+#include "rocksdb/slice.h"
+#include "util/autovector.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status WideColumnSerialization::SerializeImpl(const Slice* value_of_default,
+                                              const WideColumns& columns,
+                                              std::string& output) {
+  const size_t num_columns =
+      value_of_default ? columns.size() + 1 : columns.size();
+
+  if (num_columns > static_cast<size_t>(std::numeric_limits<uint32_t>::max())) {
+    return Status::InvalidArgument("Too many wide columns");
+  }
+
+  PutVarint32(&output, kCurrentVersion);
+
+  PutVarint32(&output, static_cast<uint32_t>(num_columns));
+
+  const Slice* prev_name = nullptr;
+  if (value_of_default) {
+    if (value_of_default->size() >
+        static_cast<size_t>(std::numeric_limits<uint32_t>::max())) {
+      return Status::InvalidArgument("Wide column value too long");
+    }
+
+    PutLengthPrefixedSlice(&output, kDefaultWideColumnName);
+    PutVarint32(&output, static_cast<uint32_t>(value_of_default->size()));
+
+    prev_name = &kDefaultWideColumnName;
+  }
+
+  for (size_t i = 0; i < columns.size(); ++i) {
+    const WideColumn& column = columns[i];
+
+    const Slice& name = column.name();
+    if (name.size() >
+        static_cast<size_t>(std::numeric_limits<uint32_t>::max())) {
+      return Status::InvalidArgument("Wide column name too long");
+    }
+
+    if (prev_name && prev_name->compare(name) >= 0) {
+      return Status::Corruption("Wide columns out of order");
+    }
+
+    const Slice& value = column.value();
+    if (value.size() >
+        static_cast<size_t>(std::numeric_limits<uint32_t>::max())) {
+      return Status::InvalidArgument("Wide column value too long");
+    }
+
+    PutLengthPrefixedSlice(&output, name);
+    PutVarint32(&output, static_cast<uint32_t>(value.size()));
+
+    prev_name = &name;
+  }
+
+  if (value_of_default) {
+    output.append(value_of_default->data(), value_of_default->size());
+  }
+
+  for (const auto& column : columns) {
+    const Slice& value = column.value();
+
+    output.append(value.data(), value.size());
+  }
+
+  return Status::OK();
+}
+
+Status WideColumnSerialization::Deserialize(Slice& input,
+                                            WideColumns& columns) {
+  assert(columns.empty());
+
+  uint32_t version = 0;
+  if (!GetVarint32(&input, &version)) {
+    return Status::Corruption("Error decoding wide column version");
+  }
+
+  if (version > kCurrentVersion) {
+    return Status::NotSupported("Unsupported wide column version");
+  }
+
+  uint32_t num_columns = 0;
+  if (!GetVarint32(&input, &num_columns)) {
+    return Status::Corruption("Error decoding number of wide columns");
+  }
+
+  if (!num_columns) {
+    return Status::OK();
+  }
+
+  columns.reserve(num_columns);
+
+  autovector<uint32_t, 16> column_value_sizes;
+  column_value_sizes.reserve(num_columns);
+
+  for (uint32_t i = 0; i < num_columns; ++i) {
+    Slice name;
+    if (!GetLengthPrefixedSlice(&input, &name)) {
+      return Status::Corruption("Error decoding wide column name");
+    }
+
+    if (!columns.empty() && columns.back().name().compare(name) >= 0) {
+      return Status::Corruption("Wide columns out of order");
+    }
+
+    columns.emplace_back(name, Slice());
+
+    uint32_t value_size = 0;
+    if (!GetVarint32(&input, &value_size)) {
+      return Status::Corruption("Error decoding wide column value size");
+    }
+
+    column_value_sizes.emplace_back(value_size);
+  }
+
+  const Slice data(input);
+  size_t pos = 0;
+
+  for (uint32_t i = 0; i < num_columns; ++i) {
+    const uint32_t value_size = column_value_sizes[i];
+
+    if (pos + value_size > data.size()) {
+      return Status::Corruption("Error decoding wide column value payload");
+    }
+
+    columns[i].value() = Slice(data.data() + pos, value_size);
+
+    pos += value_size;
+  }
+
+  return Status::OK();
+}
+
+WideColumns::const_iterator WideColumnSerialization::Find(
+    const WideColumns& columns, const Slice& column_name) {
+  const auto it =
+      std::lower_bound(columns.cbegin(), columns.cend(), column_name,
+                       [](const WideColumn& lhs, const Slice& rhs) {
+                         return lhs.name().compare(rhs) < 0;
+                       });
+
+  if (it == columns.cend() || it->name() != column_name) {
+    return columns.cend();
+  }
+
+  return it;
+}
+
+Status WideColumnSerialization::GetValueOfDefaultColumn(Slice& input,
+                                                        Slice& value) {
+  WideColumns columns;
+
+  const Status s = Deserialize(input, columns);
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (columns.empty() || columns[0].name() != kDefaultWideColumnName) {
+    value.clear();
+    return Status::OK();
+  }
+
+  value = columns[0].value();
+
+  return Status::OK();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wide/wide_column_serialization.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wide/wide_column_serialization.h
new file mode 100644
index 0000000000..f0ffbd3924
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wide/wide_column_serialization.h
@@ -0,0 +1,77 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/status.h"
+#include "rocksdb/wide_columns.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+
+// Wide-column serialization/deserialization primitives.
+//
+// The two main parts of the layout are 1) a sorted index containing the column
+// names and column value sizes and 2) the column values themselves. Keeping the
+// index and the values separate will enable selectively reading column values
+// down the line. Note that currently the index has to be fully parsed in order
+// to find out the offset of each column value.
+//
+// Legend: cn = column name, cv = column value, cns = column name size, cvs =
+// column value size.
+//
+//      +----------+--------------+----------+-------+----------+---...
+//      | version  | # of columns |  cns 1   | cn 1  |  cvs 1   |
+//      +----------+--------------+------------------+--------- +---...
+//      | varint32 |   varint32   | varint32 | bytes | varint32 |
+//      +----------+--------------+----------+-------+----------+---...
+//
+//      ... continued ...
+//
+//          ...---+----------+-------+----------+-------+---...---+-------+
+//                |  cns N   | cn N  |  cvs N   | cv 1  |         | cv N  |
+//          ...---+----------+-------+----------+-------+---...---+-------+
+//                | varint32 | bytes | varint32 | bytes |         | bytes |
+//          ...---+----------+-------+----------+-------+---...---+-------+
+
+class WideColumnSerialization {
+ public:
+  static Status Serialize(const WideColumns& columns, std::string& output);
+  static Status Serialize(const Slice& value_of_default,
+                          const WideColumns& other_columns,
+                          std::string& output);
+
+  static Status Deserialize(Slice& input, WideColumns& columns);
+
+  static WideColumns::const_iterator Find(const WideColumns& columns,
+                                          const Slice& column_name);
+  static Status GetValueOfDefaultColumn(Slice& input, Slice& value);
+
+  static constexpr uint32_t kCurrentVersion = 1;
+
+ private:
+  static Status SerializeImpl(const Slice* value_of_default,
+                              const WideColumns& columns, std::string& output);
+};
+
+inline Status WideColumnSerialization::Serialize(const WideColumns& columns,
+                                                 std::string& output) {
+  constexpr Slice* value_of_default = nullptr;
+
+  return SerializeImpl(value_of_default, columns, output);
+}
+
+inline Status WideColumnSerialization::Serialize(
+    const Slice& value_of_default, const WideColumns& other_columns,
+    std::string& output) {
+  return SerializeImpl(&value_of_default, other_columns, output);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wide/wide_columns.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wide/wide_columns.cc
new file mode 100644
index 0000000000..186be7f854
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/wide/wide_columns.cc
@@ -0,0 +1,22 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/wide_columns.h"
+
+#include "db/wide/wide_column_serialization.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const Slice kDefaultWideColumnName;
+
+const WideColumns kNoWideColumns;
+
+Status PinnableWideColumns::CreateIndexForWideColumns() {
+  Slice value_copy = value_;
+
+  return WideColumnSerialization::Deserialize(value_copy, columns_);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_batch.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_batch.cc
new file mode 100644
index 0000000000..726b14f098
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_batch.cc
@@ -0,0 +1,3142 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// WriteBatch::rep_ :=
+//    sequence: fixed64
+//    count: fixed32
+//    data: record[count]
+// record :=
+//    kTypeValue varstring varstring
+//    kTypeDeletion varstring
+//    kTypeSingleDeletion varstring
+//    kTypeRangeDeletion varstring varstring
+//    kTypeMerge varstring varstring
+//    kTypeColumnFamilyValue varint32 varstring varstring
+//    kTypeColumnFamilyDeletion varint32 varstring
+//    kTypeColumnFamilySingleDeletion varint32 varstring
+//    kTypeColumnFamilyRangeDeletion varint32 varstring varstring
+//    kTypeColumnFamilyMerge varint32 varstring varstring
+//    kTypeBeginPrepareXID
+//    kTypeEndPrepareXID varstring
+//    kTypeCommitXID varstring
+//    kTypeCommitXIDAndTimestamp varstring varstring
+//    kTypeRollbackXID varstring
+//    kTypeBeginPersistedPrepareXID
+//    kTypeBeginUnprepareXID
+//    kTypeWideColumnEntity varstring varstring
+//    kTypeColumnFamilyWideColumnEntity varint32 varstring varstring
+//    kTypeNoop
+// varstring :=
+//    len: varint32
+//    data: uint8[len]
+
+#include "rocksdb/write_batch.h"
+
+#include <algorithm>
+#include <limits>
+#include <map>
+#include <stack>
+#include <stdexcept>
+#include <type_traits>
+#include <unordered_map>
+#include <vector>
+
+#include "db/column_family.h"
+#include "db/db_impl/db_impl.h"
+#include "db/dbformat.h"
+#include "db/flush_scheduler.h"
+#include "db/kv_checksum.h"
+#include "db/memtable.h"
+#include "db/merge_context.h"
+#include "db/snapshot_impl.h"
+#include "db/trim_history_scheduler.h"
+#include "db/wide/wide_column_serialization.h"
+#include "db/write_batch_internal.h"
+#include "monitoring/perf_context_imp.h"
+#include "monitoring/statistics_impl.h"
+#include "port/lang.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/system_clock.h"
+#include "util/autovector.h"
+#include "util/cast_util.h"
+#include "util/coding.h"
+#include "util/duplicate_detector.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// anon namespace for file-local types
+namespace {
+
+enum ContentFlags : uint32_t {
+  DEFERRED = 1 << 0,
+  HAS_PUT = 1 << 1,
+  HAS_DELETE = 1 << 2,
+  HAS_SINGLE_DELETE = 1 << 3,
+  HAS_MERGE = 1 << 4,
+  HAS_BEGIN_PREPARE = 1 << 5,
+  HAS_END_PREPARE = 1 << 6,
+  HAS_COMMIT = 1 << 7,
+  HAS_ROLLBACK = 1 << 8,
+  HAS_DELETE_RANGE = 1 << 9,
+  HAS_BLOB_INDEX = 1 << 10,
+  HAS_BEGIN_UNPREPARE = 1 << 11,
+  HAS_PUT_ENTITY = 1 << 12,
+};
+
+struct BatchContentClassifier : public WriteBatch::Handler {
+  uint32_t content_flags = 0;
+
+  Status PutCF(uint32_t, const Slice&, const Slice&) override {
+    content_flags |= ContentFlags::HAS_PUT;
+    return Status::OK();
+  }
+
+  Status PutEntityCF(uint32_t /* column_family_id */, const Slice& /* key */,
+                     const Slice& /* entity */) override {
+    content_flags |= ContentFlags::HAS_PUT_ENTITY;
+    return Status::OK();
+  }
+
+  Status DeleteCF(uint32_t, const Slice&) override {
+    content_flags |= ContentFlags::HAS_DELETE;
+    return Status::OK();
+  }
+
+  Status SingleDeleteCF(uint32_t, const Slice&) override {
+    content_flags |= ContentFlags::HAS_SINGLE_DELETE;
+    return Status::OK();
+  }
+
+  Status DeleteRangeCF(uint32_t, const Slice&, const Slice&) override {
+    content_flags |= ContentFlags::HAS_DELETE_RANGE;
+    return Status::OK();
+  }
+
+  Status MergeCF(uint32_t, const Slice&, const Slice&) override {
+    content_flags |= ContentFlags::HAS_MERGE;
+    return Status::OK();
+  }
+
+  Status PutBlobIndexCF(uint32_t, const Slice&, const Slice&) override {
+    content_flags |= ContentFlags::HAS_BLOB_INDEX;
+    return Status::OK();
+  }
+
+  Status MarkBeginPrepare(bool unprepare) override {
+    content_flags |= ContentFlags::HAS_BEGIN_PREPARE;
+    if (unprepare) {
+      content_flags |= ContentFlags::HAS_BEGIN_UNPREPARE;
+    }
+    return Status::OK();
+  }
+
+  Status MarkEndPrepare(const Slice&) override {
+    content_flags |= ContentFlags::HAS_END_PREPARE;
+    return Status::OK();
+  }
+
+  Status MarkCommit(const Slice&) override {
+    content_flags |= ContentFlags::HAS_COMMIT;
+    return Status::OK();
+  }
+
+  Status MarkCommitWithTimestamp(const Slice&, const Slice&) override {
+    content_flags |= ContentFlags::HAS_COMMIT;
+    return Status::OK();
+  }
+
+  Status MarkRollback(const Slice&) override {
+    content_flags |= ContentFlags::HAS_ROLLBACK;
+    return Status::OK();
+  }
+};
+
+}  // anonymous namespace
+
+struct SavePoints {
+  std::stack<SavePoint, autovector<SavePoint>> stack;
+};
+
+WriteBatch::WriteBatch(size_t reserved_bytes, size_t max_bytes,
+                       size_t protection_bytes_per_key, size_t default_cf_ts_sz)
+    : content_flags_(0),
+      max_bytes_(max_bytes),
+      default_cf_ts_sz_(default_cf_ts_sz),
+      rep_() {
+  // Currently `protection_bytes_per_key` can only be enabled at 8 bytes per
+  // entry.
+  assert(protection_bytes_per_key == 0 || protection_bytes_per_key == 8);
+  if (protection_bytes_per_key != 0) {
+    prot_info_.reset(new WriteBatch::ProtectionInfo());
+  }
+  rep_.reserve((reserved_bytes > WriteBatchInternal::kHeader)
+                   ? reserved_bytes
+                   : WriteBatchInternal::kHeader);
+  rep_.resize(WriteBatchInternal::kHeader);
+}
+
+WriteBatch::WriteBatch(const std::string& rep)
+    : content_flags_(ContentFlags::DEFERRED), max_bytes_(0), rep_(rep) {}
+
+WriteBatch::WriteBatch(std::string&& rep)
+    : content_flags_(ContentFlags::DEFERRED),
+      max_bytes_(0),
+      rep_(std::move(rep)) {}
+
+WriteBatch::WriteBatch(const WriteBatch& src)
+    : wal_term_point_(src.wal_term_point_),
+      content_flags_(src.content_flags_.load(std::memory_order_relaxed)),
+      max_bytes_(src.max_bytes_),
+      default_cf_ts_sz_(src.default_cf_ts_sz_),
+      rep_(src.rep_) {
+  if (src.save_points_ != nullptr) {
+    save_points_.reset(new SavePoints());
+    save_points_->stack = src.save_points_->stack;
+  }
+  if (src.prot_info_ != nullptr) {
+    prot_info_.reset(new WriteBatch::ProtectionInfo());
+    prot_info_->entries_ = src.prot_info_->entries_;
+  }
+}
+
+WriteBatch::WriteBatch(WriteBatch&& src) noexcept
+    : save_points_(std::move(src.save_points_)),
+      wal_term_point_(std::move(src.wal_term_point_)),
+      content_flags_(src.content_flags_.load(std::memory_order_relaxed)),
+      max_bytes_(src.max_bytes_),
+      prot_info_(std::move(src.prot_info_)),
+      default_cf_ts_sz_(src.default_cf_ts_sz_),
+      rep_(std::move(src.rep_)) {}
+
+WriteBatch& WriteBatch::operator=(const WriteBatch& src) {
+  if (&src != this) {
+    this->~WriteBatch();
+    new (this) WriteBatch(src);
+  }
+  return *this;
+}
+
+WriteBatch& WriteBatch::operator=(WriteBatch&& src) {
+  if (&src != this) {
+    this->~WriteBatch();
+    new (this) WriteBatch(std::move(src));
+  }
+  return *this;
+}
+
+WriteBatch::~WriteBatch() {}
+
+WriteBatch::Handler::~Handler() {}
+
+void WriteBatch::Handler::LogData(const Slice& /*blob*/) {
+  // If the user has not specified something to do with blobs, then we ignore
+  // them.
+}
+
+bool WriteBatch::Handler::Continue() { return true; }
+
+void WriteBatch::Clear() {
+  rep_.clear();
+  rep_.resize(WriteBatchInternal::kHeader);
+
+  content_flags_.store(0, std::memory_order_relaxed);
+
+  if (save_points_ != nullptr) {
+    while (!save_points_->stack.empty()) {
+      save_points_->stack.pop();
+    }
+  }
+
+  if (prot_info_ != nullptr) {
+    prot_info_->entries_.clear();
+  }
+  wal_term_point_.clear();
+  default_cf_ts_sz_ = 0;
+}
+
+uint32_t WriteBatch::Count() const { return WriteBatchInternal::Count(this); }
+
+uint32_t WriteBatch::ComputeContentFlags() const {
+  auto rv = content_flags_.load(std::memory_order_relaxed);
+  if ((rv & ContentFlags::DEFERRED) != 0) {
+    BatchContentClassifier classifier;
+    // Should we handle status here?
+    Iterate(&classifier).PermitUncheckedError();
+    rv = classifier.content_flags;
+
+    // this method is conceptually const, because it is performing a lazy
+    // computation that doesn't affect the abstract state of the batch.
+    // content_flags_ is marked mutable so that we can perform the
+    // following assignment
+    content_flags_.store(rv, std::memory_order_relaxed);
+  }
+  return rv;
+}
+
+void WriteBatch::MarkWalTerminationPoint() {
+  wal_term_point_.size = GetDataSize();
+  wal_term_point_.count = Count();
+  wal_term_point_.content_flags = content_flags_;
+}
+
+size_t WriteBatch::GetProtectionBytesPerKey() const {
+  if (prot_info_ != nullptr) {
+    return prot_info_->GetBytesPerKey();
+  }
+  return 0;
+}
+
+bool WriteBatch::HasPut() const {
+  return (ComputeContentFlags() & ContentFlags::HAS_PUT) != 0;
+}
+
+bool WriteBatch::HasPutEntity() const {
+  return (ComputeContentFlags() & ContentFlags::HAS_PUT_ENTITY) != 0;
+}
+
+bool WriteBatch::HasDelete() const {
+  return (ComputeContentFlags() & ContentFlags::HAS_DELETE) != 0;
+}
+
+bool WriteBatch::HasSingleDelete() const {
+  return (ComputeContentFlags() & ContentFlags::HAS_SINGLE_DELETE) != 0;
+}
+
+bool WriteBatch::HasDeleteRange() const {
+  return (ComputeContentFlags() & ContentFlags::HAS_DELETE_RANGE) != 0;
+}
+
+bool WriteBatch::HasMerge() const {
+  return (ComputeContentFlags() & ContentFlags::HAS_MERGE) != 0;
+}
+
+bool ReadKeyFromWriteBatchEntry(Slice* input, Slice* key, bool cf_record) {
+  assert(input != nullptr && key != nullptr);
+  // Skip tag byte
+  input->remove_prefix(1);
+
+  if (cf_record) {
+    // Skip column_family bytes
+    uint32_t cf;
+    if (!GetVarint32(input, &cf)) {
+      return false;
+    }
+  }
+
+  // Extract key
+  return GetLengthPrefixedSlice(input, key);
+}
+
+bool WriteBatch::HasBeginPrepare() const {
+  return (ComputeContentFlags() & ContentFlags::HAS_BEGIN_PREPARE) != 0;
+}
+
+bool WriteBatch::HasEndPrepare() const {
+  return (ComputeContentFlags() & ContentFlags::HAS_END_PREPARE) != 0;
+}
+
+bool WriteBatch::HasCommit() const {
+  return (ComputeContentFlags() & ContentFlags::HAS_COMMIT) != 0;
+}
+
+bool WriteBatch::HasRollback() const {
+  return (ComputeContentFlags() & ContentFlags::HAS_ROLLBACK) != 0;
+}
+
+Status ReadRecordFromWriteBatch(Slice* input, char* tag,
+                                uint32_t* column_family, Slice* key,
+                                Slice* value, Slice* blob, Slice* xid) {
+  assert(key != nullptr && value != nullptr);
+  *tag = (*input)[0];
+  input->remove_prefix(1);
+  *column_family = 0;  // default
+  switch (*tag) {
+    case kTypeColumnFamilyValue:
+      if (!GetVarint32(input, column_family)) {
+        return Status::Corruption("bad WriteBatch Put");
+      }
+      FALLTHROUGH_INTENDED;
+    case kTypeValue:
+      if (!GetLengthPrefixedSlice(input, key) ||
+          !GetLengthPrefixedSlice(input, value)) {
+        return Status::Corruption("bad WriteBatch Put");
+      }
+      break;
+    case kTypeColumnFamilyDeletion:
+    case kTypeColumnFamilySingleDeletion:
+      if (!GetVarint32(input, column_family)) {
+        return Status::Corruption("bad WriteBatch Delete");
+      }
+      FALLTHROUGH_INTENDED;
+    case kTypeDeletion:
+    case kTypeSingleDeletion:
+      if (!GetLengthPrefixedSlice(input, key)) {
+        return Status::Corruption("bad WriteBatch Delete");
+      }
+      break;
+    case kTypeColumnFamilyRangeDeletion:
+      if (!GetVarint32(input, column_family)) {
+        return Status::Corruption("bad WriteBatch DeleteRange");
+      }
+      FALLTHROUGH_INTENDED;
+    case kTypeRangeDeletion:
+      // for range delete, "key" is begin_key, "value" is end_key
+      if (!GetLengthPrefixedSlice(input, key) ||
+          !GetLengthPrefixedSlice(input, value)) {
+        return Status::Corruption("bad WriteBatch DeleteRange");
+      }
+      break;
+    case kTypeColumnFamilyMerge:
+      if (!GetVarint32(input, column_family)) {
+        return Status::Corruption("bad WriteBatch Merge");
+      }
+      FALLTHROUGH_INTENDED;
+    case kTypeMerge:
+      if (!GetLengthPrefixedSlice(input, key) ||
+          !GetLengthPrefixedSlice(input, value)) {
+        return Status::Corruption("bad WriteBatch Merge");
+      }
+      break;
+    case kTypeColumnFamilyBlobIndex:
+      if (!GetVarint32(input, column_family)) {
+        return Status::Corruption("bad WriteBatch BlobIndex");
+      }
+      FALLTHROUGH_INTENDED;
+    case kTypeBlobIndex:
+      if (!GetLengthPrefixedSlice(input, key) ||
+          !GetLengthPrefixedSlice(input, value)) {
+        return Status::Corruption("bad WriteBatch BlobIndex");
+      }
+      break;
+    case kTypeLogData:
+      assert(blob != nullptr);
+      if (!GetLengthPrefixedSlice(input, blob)) {
+        return Status::Corruption("bad WriteBatch Blob");
+      }
+      break;
+    case kTypeNoop:
+    case kTypeBeginPrepareXID:
+      // This indicates that the prepared batch is also persisted in the db.
+      // This is used in WritePreparedTxn
+    case kTypeBeginPersistedPrepareXID:
+      // This is used in WriteUnpreparedTxn
+    case kTypeBeginUnprepareXID:
+      break;
+    case kTypeEndPrepareXID:
+      if (!GetLengthPrefixedSlice(input, xid)) {
+        return Status::Corruption("bad EndPrepare XID");
+      }
+      break;
+    case kTypeCommitXIDAndTimestamp:
+      if (!GetLengthPrefixedSlice(input, key)) {
+        return Status::Corruption("bad commit timestamp");
+      }
+      FALLTHROUGH_INTENDED;
+    case kTypeCommitXID:
+      if (!GetLengthPrefixedSlice(input, xid)) {
+        return Status::Corruption("bad Commit XID");
+      }
+      break;
+    case kTypeRollbackXID:
+      if (!GetLengthPrefixedSlice(input, xid)) {
+        return Status::Corruption("bad Rollback XID");
+      }
+      break;
+    case kTypeColumnFamilyWideColumnEntity:
+      if (!GetVarint32(input, column_family)) {
+        return Status::Corruption("bad WriteBatch PutEntity");
+      }
+      FALLTHROUGH_INTENDED;
+    case kTypeWideColumnEntity:
+      if (!GetLengthPrefixedSlice(input, key) ||
+          !GetLengthPrefixedSlice(input, value)) {
+        return Status::Corruption("bad WriteBatch PutEntity");
+      }
+      break;
+    default:
+      return Status::Corruption("unknown WriteBatch tag");
+  }
+  return Status::OK();
+}
+
+Status WriteBatch::Iterate(Handler* handler) const {
+  if (rep_.size() < WriteBatchInternal::kHeader) {
+    return Status::Corruption("malformed WriteBatch (too small)");
+  }
+
+  return WriteBatchInternal::Iterate(this, handler, WriteBatchInternal::kHeader,
+                                     rep_.size());
+}
+
+Status WriteBatchInternal::Iterate(const WriteBatch* wb,
+                                   WriteBatch::Handler* handler, size_t begin,
+                                   size_t end) {
+  if (begin > wb->rep_.size() || end > wb->rep_.size() || end < begin) {
+    return Status::Corruption("Invalid start/end bounds for Iterate");
+  }
+  assert(begin <= end);
+  Slice input(wb->rep_.data() + begin, static_cast<size_t>(end - begin));
+  bool whole_batch =
+      (begin == WriteBatchInternal::kHeader) && (end == wb->rep_.size());
+
+  Slice key, value, blob, xid;
+
+  // Sometimes a sub-batch starts with a Noop. We want to exclude such Noops as
+  // the batch boundary symbols otherwise we would mis-count the number of
+  // batches. We do that by checking whether the accumulated batch is empty
+  // before seeing the next Noop.
+  bool empty_batch = true;
+  uint32_t found = 0;
+  Status s;
+  char tag = 0;
+  uint32_t column_family = 0;  // default
+  bool last_was_try_again = false;
+  bool handler_continue = true;
+  while (((s.ok() && !input.empty()) || UNLIKELY(s.IsTryAgain()))) {
+    handler_continue = handler->Continue();
+    if (!handler_continue) {
+      break;
+    }
+
+    if (LIKELY(!s.IsTryAgain())) {
+      last_was_try_again = false;
+      tag = 0;
+      column_family = 0;  // default
+
+      s = ReadRecordFromWriteBatch(&input, &tag, &column_family, &key, &value,
+                                   &blob, &xid);
+      if (!s.ok()) {
+        return s;
+      }
+    } else {
+      assert(s.IsTryAgain());
+      assert(!last_was_try_again);  // to detect infinite loop bugs
+      if (UNLIKELY(last_was_try_again)) {
+        return Status::Corruption(
+            "two consecutive TryAgain in WriteBatch handler; this is either a "
+            "software bug or data corruption.");
+      }
+      last_was_try_again = true;
+      s = Status::OK();
+    }
+
+    switch (tag) {
+      case kTypeColumnFamilyValue:
+      case kTypeValue:
+        assert(wb->content_flags_.load(std::memory_order_relaxed) &
+               (ContentFlags::DEFERRED | ContentFlags::HAS_PUT));
+        s = handler->PutCF(column_family, key, value);
+        if (LIKELY(s.ok())) {
+          empty_batch = false;
+          found++;
+        }
+        break;
+      case kTypeColumnFamilyDeletion:
+      case kTypeDeletion:
+        assert(wb->content_flags_.load(std::memory_order_relaxed) &
+               (ContentFlags::DEFERRED | ContentFlags::HAS_DELETE));
+        s = handler->DeleteCF(column_family, key);
+        if (LIKELY(s.ok())) {
+          empty_batch = false;
+          found++;
+        }
+        break;
+      case kTypeColumnFamilySingleDeletion:
+      case kTypeSingleDeletion:
+        assert(wb->content_flags_.load(std::memory_order_relaxed) &
+               (ContentFlags::DEFERRED | ContentFlags::HAS_SINGLE_DELETE));
+        s = handler->SingleDeleteCF(column_family, key);
+        if (LIKELY(s.ok())) {
+          empty_batch = false;
+          found++;
+        }
+        break;
+      case kTypeColumnFamilyRangeDeletion:
+      case kTypeRangeDeletion:
+        assert(wb->content_flags_.load(std::memory_order_relaxed) &
+               (ContentFlags::DEFERRED | ContentFlags::HAS_DELETE_RANGE));
+        s = handler->DeleteRangeCF(column_family, key, value);
+        if (LIKELY(s.ok())) {
+          empty_batch = false;
+          found++;
+        }
+        break;
+      case kTypeColumnFamilyMerge:
+      case kTypeMerge:
+        assert(wb->content_flags_.load(std::memory_order_relaxed) &
+               (ContentFlags::DEFERRED | ContentFlags::HAS_MERGE));
+        s = handler->MergeCF(column_family, key, value);
+        if (LIKELY(s.ok())) {
+          empty_batch = false;
+          found++;
+        }
+        break;
+      case kTypeColumnFamilyBlobIndex:
+      case kTypeBlobIndex:
+        assert(wb->content_flags_.load(std::memory_order_relaxed) &
+               (ContentFlags::DEFERRED | ContentFlags::HAS_BLOB_INDEX));
+        s = handler->PutBlobIndexCF(column_family, key, value);
+        if (LIKELY(s.ok())) {
+          found++;
+        }
+        break;
+      case kTypeLogData:
+        handler->LogData(blob);
+        // A batch might have nothing but LogData. It is still a batch.
+        empty_batch = false;
+        break;
+      case kTypeBeginPrepareXID:
+        assert(wb->content_flags_.load(std::memory_order_relaxed) &
+               (ContentFlags::DEFERRED | ContentFlags::HAS_BEGIN_PREPARE));
+        s = handler->MarkBeginPrepare();
+        assert(s.ok());
+        empty_batch = false;
+        if (handler->WriteAfterCommit() ==
+            WriteBatch::Handler::OptionState::kDisabled) {
+          s = Status::NotSupported(
+              "WriteCommitted txn tag when write_after_commit_ is disabled (in "
+              "WritePrepared/WriteUnprepared mode). If it is not due to "
+              "corruption, the WAL must be emptied before changing the "
+              "WritePolicy.");
+        }
+        if (handler->WriteBeforePrepare() ==
+            WriteBatch::Handler::OptionState::kEnabled) {
+          s = Status::NotSupported(
+              "WriteCommitted txn tag when write_before_prepare_ is enabled "
+              "(in WriteUnprepared mode). If it is not due to corruption, the "
+              "WAL must be emptied before changing the WritePolicy.");
+        }
+        break;
+      case kTypeBeginPersistedPrepareXID:
+        assert(wb->content_flags_.load(std::memory_order_relaxed) &
+               (ContentFlags::DEFERRED | ContentFlags::HAS_BEGIN_PREPARE));
+        s = handler->MarkBeginPrepare();
+        assert(s.ok());
+        empty_batch = false;
+        if (handler->WriteAfterCommit() ==
+            WriteBatch::Handler::OptionState::kEnabled) {
+          s = Status::NotSupported(
+              "WritePrepared/WriteUnprepared txn tag when write_after_commit_ "
+              "is enabled (in default WriteCommitted mode). If it is not due "
+              "to corruption, the WAL must be emptied before changing the "
+              "WritePolicy.");
+        }
+        break;
+      case kTypeBeginUnprepareXID:
+        assert(wb->content_flags_.load(std::memory_order_relaxed) &
+               (ContentFlags::DEFERRED | ContentFlags::HAS_BEGIN_UNPREPARE));
+        s = handler->MarkBeginPrepare(true /* unprepared */);
+        assert(s.ok());
+        empty_batch = false;
+        if (handler->WriteAfterCommit() ==
+            WriteBatch::Handler::OptionState::kEnabled) {
+          s = Status::NotSupported(
+              "WriteUnprepared txn tag when write_after_commit_ is enabled (in "
+              "default WriteCommitted mode). If it is not due to corruption, "
+              "the WAL must be emptied before changing the WritePolicy.");
+        }
+        if (handler->WriteBeforePrepare() ==
+            WriteBatch::Handler::OptionState::kDisabled) {
+          s = Status::NotSupported(
+              "WriteUnprepared txn tag when write_before_prepare_ is disabled "
+              "(in WriteCommitted/WritePrepared mode). If it is not due to "
+              "corruption, the WAL must be emptied before changing the "
+              "WritePolicy.");
+        }
+        break;
+      case kTypeEndPrepareXID:
+        assert(wb->content_flags_.load(std::memory_order_relaxed) &
+               (ContentFlags::DEFERRED | ContentFlags::HAS_END_PREPARE));
+        s = handler->MarkEndPrepare(xid);
+        assert(s.ok());
+        empty_batch = true;
+        break;
+      case kTypeCommitXID:
+        assert(wb->content_flags_.load(std::memory_order_relaxed) &
+               (ContentFlags::DEFERRED | ContentFlags::HAS_COMMIT));
+        s = handler->MarkCommit(xid);
+        assert(s.ok());
+        empty_batch = true;
+        break;
+      case kTypeCommitXIDAndTimestamp:
+        assert(wb->content_flags_.load(std::memory_order_relaxed) &
+               (ContentFlags::DEFERRED | ContentFlags::HAS_COMMIT));
+        // key stores the commit timestamp.
+        assert(!key.empty());
+        s = handler->MarkCommitWithTimestamp(xid, key);
+        if (LIKELY(s.ok())) {
+          empty_batch = true;
+        }
+        break;
+      case kTypeRollbackXID:
+        assert(wb->content_flags_.load(std::memory_order_relaxed) &
+               (ContentFlags::DEFERRED | ContentFlags::HAS_ROLLBACK));
+        s = handler->MarkRollback(xid);
+        assert(s.ok());
+        empty_batch = true;
+        break;
+      case kTypeNoop:
+        s = handler->MarkNoop(empty_batch);
+        assert(s.ok());
+        empty_batch = true;
+        break;
+      case kTypeWideColumnEntity:
+      case kTypeColumnFamilyWideColumnEntity:
+        assert(wb->content_flags_.load(std::memory_order_relaxed) &
+               (ContentFlags::DEFERRED | ContentFlags::HAS_PUT_ENTITY));
+        s = handler->PutEntityCF(column_family, key, value);
+        if (LIKELY(s.ok())) {
+          empty_batch = false;
+          ++found;
+        }
+        break;
+      default:
+        return Status::Corruption("unknown WriteBatch tag");
+    }
+  }
+  if (!s.ok()) {
+    return s;
+  }
+  if (handler_continue && whole_batch &&
+      found != WriteBatchInternal::Count(wb)) {
+    return Status::Corruption("WriteBatch has wrong count");
+  } else {
+    return Status::OK();
+  }
+}
+
+bool WriteBatchInternal::IsLatestPersistentState(const WriteBatch* b) {
+  return b->is_latest_persistent_state_;
+}
+
+void WriteBatchInternal::SetAsLatestPersistentState(WriteBatch* b) {
+  b->is_latest_persistent_state_ = true;
+}
+
+uint32_t WriteBatchInternal::Count(const WriteBatch* b) {
+  return DecodeFixed32(b->rep_.data() + 8);
+}
+
+void WriteBatchInternal::SetCount(WriteBatch* b, uint32_t n) {
+  EncodeFixed32(&b->rep_[8], n);
+}
+
+SequenceNumber WriteBatchInternal::Sequence(const WriteBatch* b) {
+  return SequenceNumber(DecodeFixed64(b->rep_.data()));
+}
+
+void WriteBatchInternal::SetSequence(WriteBatch* b, SequenceNumber seq) {
+  EncodeFixed64(&b->rep_[0], seq);
+}
+
+size_t WriteBatchInternal::GetFirstOffset(WriteBatch* /*b*/) {
+  return WriteBatchInternal::kHeader;
+}
+
+std::tuple<Status, uint32_t, size_t>
+WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(
+    WriteBatch* b, ColumnFamilyHandle* column_family) {
+  uint32_t cf_id = GetColumnFamilyID(column_family);
+  size_t ts_sz = 0;
+  Status s;
+  if (column_family) {
+    const Comparator* const ucmp = column_family->GetComparator();
+    if (ucmp) {
+      ts_sz = ucmp->timestamp_size();
+      if (0 == cf_id && b->default_cf_ts_sz_ != ts_sz) {
+        s = Status::InvalidArgument("Default cf timestamp size mismatch");
+      }
+    }
+  } else if (b->default_cf_ts_sz_ > 0) {
+    ts_sz = b->default_cf_ts_sz_;
+  }
+  return std::make_tuple(s, cf_id, ts_sz);
+}
+
+namespace {
+Status CheckColumnFamilyTimestampSize(ColumnFamilyHandle* column_family,
+                                      const Slice& ts) {
+  if (!column_family) {
+    return Status::InvalidArgument("column family handle cannot be null");
+  }
+  const Comparator* const ucmp = column_family->GetComparator();
+  assert(ucmp);
+  size_t cf_ts_sz = ucmp->timestamp_size();
+  if (0 == cf_ts_sz) {
+    return Status::InvalidArgument("timestamp disabled");
+  }
+  if (cf_ts_sz != ts.size()) {
+    return Status::InvalidArgument("timestamp size mismatch");
+  }
+  return Status::OK();
+}
+}  // anonymous namespace
+
+Status WriteBatchInternal::Put(WriteBatch* b, uint32_t column_family_id,
+                               const Slice& key, const Slice& value) {
+  if (key.size() > size_t{std::numeric_limits<uint32_t>::max()}) {
+    return Status::InvalidArgument("key is too large");
+  }
+  if (value.size() > size_t{std::numeric_limits<uint32_t>::max()}) {
+    return Status::InvalidArgument("value is too large");
+  }
+
+  LocalSavePoint save(b);
+  WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+  if (column_family_id == 0) {
+    b->rep_.push_back(static_cast<char>(kTypeValue));
+  } else {
+    b->rep_.push_back(static_cast<char>(kTypeColumnFamilyValue));
+    PutVarint32(&b->rep_, column_family_id);
+  }
+  PutLengthPrefixedSlice(&b->rep_, key);
+  PutLengthPrefixedSlice(&b->rep_, value);
+  b->content_flags_.store(
+      b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_PUT,
+      std::memory_order_relaxed);
+  if (b->prot_info_ != nullptr) {
+    // Technically the optype could've been `kTypeColumnFamilyValue` with the
+    // CF ID encoded in the `WriteBatch`. That distinction is unimportant
+    // however since we verify CF ID is correct, as well as all other fields
+    // (a missing/extra encoded CF ID would corrupt another field). It is
+    // convenient to consolidate on `kTypeValue` here as that is what will be
+    // inserted into memtable.
+    b->prot_info_->entries_.emplace_back(ProtectionInfo64()
+                                             .ProtectKVO(key, value, kTypeValue)
+                                             .ProtectC(column_family_id));
+  }
+  return save.commit();
+}
+
+Status WriteBatch::Put(ColumnFamilyHandle* column_family, const Slice& key,
+                       const Slice& value) {
+  size_t ts_sz = 0;
+  uint32_t cf_id = 0;
+  Status s;
+
+  std::tie(s, cf_id, ts_sz) =
+      WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
+                                                            column_family);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (0 == ts_sz) {
+    return WriteBatchInternal::Put(this, cf_id, key, value);
+  }
+
+  needs_in_place_update_ts_ = true;
+  has_key_with_ts_ = true;
+  std::string dummy_ts(ts_sz, '\0');
+  std::array<Slice, 2> key_with_ts{{key, dummy_ts}};
+  return WriteBatchInternal::Put(this, cf_id, SliceParts(key_with_ts.data(), 2),
+                                 SliceParts(&value, 1));
+}
+
+Status WriteBatch::Put(ColumnFamilyHandle* column_family, const Slice& key,
+                       const Slice& ts, const Slice& value) {
+  const Status s = CheckColumnFamilyTimestampSize(column_family, ts);
+  if (!s.ok()) {
+    return s;
+  }
+  has_key_with_ts_ = true;
+  assert(column_family);
+  uint32_t cf_id = column_family->GetID();
+  std::array<Slice, 2> key_with_ts{{key, ts}};
+  return WriteBatchInternal::Put(this, cf_id, SliceParts(key_with_ts.data(), 2),
+                                 SliceParts(&value, 1));
+}
+
+Status WriteBatchInternal::CheckSlicePartsLength(const SliceParts& key,
+                                                 const SliceParts& value) {
+  size_t total_key_bytes = 0;
+  for (int i = 0; i < key.num_parts; ++i) {
+    total_key_bytes += key.parts[i].size();
+  }
+  if (total_key_bytes >= size_t{std::numeric_limits<uint32_t>::max()}) {
+    return Status::InvalidArgument("key is too large");
+  }
+
+  size_t total_value_bytes = 0;
+  for (int i = 0; i < value.num_parts; ++i) {
+    total_value_bytes += value.parts[i].size();
+  }
+  if (total_value_bytes >= size_t{std::numeric_limits<uint32_t>::max()}) {
+    return Status::InvalidArgument("value is too large");
+  }
+  return Status::OK();
+}
+
+Status WriteBatchInternal::Put(WriteBatch* b, uint32_t column_family_id,
+                               const SliceParts& key, const SliceParts& value) {
+  Status s = CheckSlicePartsLength(key, value);
+  if (!s.ok()) {
+    return s;
+  }
+
+  LocalSavePoint save(b);
+  WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+  if (column_family_id == 0) {
+    b->rep_.push_back(static_cast<char>(kTypeValue));
+  } else {
+    b->rep_.push_back(static_cast<char>(kTypeColumnFamilyValue));
+    PutVarint32(&b->rep_, column_family_id);
+  }
+  PutLengthPrefixedSliceParts(&b->rep_, key);
+  PutLengthPrefixedSliceParts(&b->rep_, value);
+  b->content_flags_.store(
+      b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_PUT,
+      std::memory_order_relaxed);
+  if (b->prot_info_ != nullptr) {
+    // See comment in first `WriteBatchInternal::Put()` overload concerning the
+    // `ValueType` argument passed to `ProtectKVO()`.
+    b->prot_info_->entries_.emplace_back(ProtectionInfo64()
+                                             .ProtectKVO(key, value, kTypeValue)
+                                             .ProtectC(column_family_id));
+  }
+  return save.commit();
+}
+
+Status WriteBatch::Put(ColumnFamilyHandle* column_family, const SliceParts& key,
+                       const SliceParts& value) {
+  size_t ts_sz = 0;
+  uint32_t cf_id = 0;
+  Status s;
+
+  std::tie(s, cf_id, ts_sz) =
+      WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
+                                                            column_family);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (ts_sz == 0) {
+    return WriteBatchInternal::Put(this, cf_id, key, value);
+  }
+
+  return Status::InvalidArgument(
+      "Cannot call this method on column family enabling timestamp");
+}
+
+Status WriteBatchInternal::PutEntity(WriteBatch* b, uint32_t column_family_id,
+                                     const Slice& key,
+                                     const WideColumns& columns) {
+  assert(b);
+
+  if (key.size() > size_t{std::numeric_limits<uint32_t>::max()}) {
+    return Status::InvalidArgument("key is too large");
+  }
+
+  WideColumns sorted_columns(columns);
+  std::sort(sorted_columns.begin(), sorted_columns.end(),
+            [](const WideColumn& lhs, const WideColumn& rhs) {
+              return lhs.name().compare(rhs.name()) < 0;
+            });
+
+  std::string entity;
+  const Status s = WideColumnSerialization::Serialize(sorted_columns, entity);
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (entity.size() > size_t{std::numeric_limits<uint32_t>::max()}) {
+    return Status::InvalidArgument("wide column entity is too large");
+  }
+
+  LocalSavePoint save(b);
+
+  WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+
+  if (column_family_id == 0) {
+    b->rep_.push_back(static_cast<char>(kTypeWideColumnEntity));
+  } else {
+    b->rep_.push_back(static_cast<char>(kTypeColumnFamilyWideColumnEntity));
+    PutVarint32(&b->rep_, column_family_id);
+  }
+
+  PutLengthPrefixedSlice(&b->rep_, key);
+  PutLengthPrefixedSlice(&b->rep_, entity);
+
+  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                              ContentFlags::HAS_PUT_ENTITY,
+                          std::memory_order_relaxed);
+
+  if (b->prot_info_ != nullptr) {
+    b->prot_info_->entries_.emplace_back(
+        ProtectionInfo64()
+            .ProtectKVO(key, entity, kTypeWideColumnEntity)
+            .ProtectC(column_family_id));
+  }
+
+  return save.commit();
+}
+
+Status WriteBatch::PutEntity(ColumnFamilyHandle* column_family,
+                             const Slice& key, const WideColumns& columns) {
+  if (!column_family) {
+    return Status::InvalidArgument(
+        "Cannot call this method without a column family handle");
+  }
+
+  Status s;
+  uint32_t cf_id = 0;
+  size_t ts_sz = 0;
+
+  std::tie(s, cf_id, ts_sz) =
+      WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
+                                                            column_family);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (ts_sz) {
+    return Status::InvalidArgument(
+        "Cannot call this method on column family enabling timestamp");
+  }
+
+  return WriteBatchInternal::PutEntity(this, cf_id, key, columns);
+}
+
+Status WriteBatchInternal::InsertNoop(WriteBatch* b) {
+  b->rep_.push_back(static_cast<char>(kTypeNoop));
+  return Status::OK();
+}
+
+Status WriteBatchInternal::MarkEndPrepare(WriteBatch* b, const Slice& xid,
+                                          bool write_after_commit,
+                                          bool unprepared_batch) {
+  // a manually constructed batch can only contain one prepare section
+  assert(b->rep_[12] == static_cast<char>(kTypeNoop));
+
+  // all savepoints up to this point are cleared
+  if (b->save_points_ != nullptr) {
+    while (!b->save_points_->stack.empty()) {
+      b->save_points_->stack.pop();
+    }
+  }
+
+  // rewrite noop as begin marker
+  b->rep_[12] = static_cast<char>(
+      write_after_commit ? kTypeBeginPrepareXID
+                         : (unprepared_batch ? kTypeBeginUnprepareXID
+                                             : kTypeBeginPersistedPrepareXID));
+  b->rep_.push_back(static_cast<char>(kTypeEndPrepareXID));
+  PutLengthPrefixedSlice(&b->rep_, xid);
+  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                              ContentFlags::HAS_END_PREPARE |
+                              ContentFlags::HAS_BEGIN_PREPARE,
+                          std::memory_order_relaxed);
+  if (unprepared_batch) {
+    b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                                ContentFlags::HAS_BEGIN_UNPREPARE,
+                            std::memory_order_relaxed);
+  }
+  return Status::OK();
+}
+
+Status WriteBatchInternal::MarkCommit(WriteBatch* b, const Slice& xid) {
+  b->rep_.push_back(static_cast<char>(kTypeCommitXID));
+  PutLengthPrefixedSlice(&b->rep_, xid);
+  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                              ContentFlags::HAS_COMMIT,
+                          std::memory_order_relaxed);
+  return Status::OK();
+}
+
+Status WriteBatchInternal::MarkCommitWithTimestamp(WriteBatch* b,
+                                                   const Slice& xid,
+                                                   const Slice& commit_ts) {
+  assert(!commit_ts.empty());
+  b->rep_.push_back(static_cast<char>(kTypeCommitXIDAndTimestamp));
+  PutLengthPrefixedSlice(&b->rep_, commit_ts);
+  PutLengthPrefixedSlice(&b->rep_, xid);
+  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                              ContentFlags::HAS_COMMIT,
+                          std::memory_order_relaxed);
+  return Status::OK();
+}
+
+Status WriteBatchInternal::MarkRollback(WriteBatch* b, const Slice& xid) {
+  b->rep_.push_back(static_cast<char>(kTypeRollbackXID));
+  PutLengthPrefixedSlice(&b->rep_, xid);
+  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                              ContentFlags::HAS_ROLLBACK,
+                          std::memory_order_relaxed);
+  return Status::OK();
+}
+
+Status WriteBatchInternal::Delete(WriteBatch* b, uint32_t column_family_id,
+                                  const Slice& key) {
+  LocalSavePoint save(b);
+  WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+  if (column_family_id == 0) {
+    b->rep_.push_back(static_cast<char>(kTypeDeletion));
+  } else {
+    b->rep_.push_back(static_cast<char>(kTypeColumnFamilyDeletion));
+    PutVarint32(&b->rep_, column_family_id);
+  }
+  PutLengthPrefixedSlice(&b->rep_, key);
+  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                              ContentFlags::HAS_DELETE,
+                          std::memory_order_relaxed);
+  if (b->prot_info_ != nullptr) {
+    // See comment in first `WriteBatchInternal::Put()` overload concerning the
+    // `ValueType` argument passed to `ProtectKVO()`.
+    b->prot_info_->entries_.emplace_back(
+        ProtectionInfo64()
+            .ProtectKVO(key, "" /* value */, kTypeDeletion)
+            .ProtectC(column_family_id));
+  }
+  return save.commit();
+}
+
+Status WriteBatch::Delete(ColumnFamilyHandle* column_family, const Slice& key) {
+  size_t ts_sz = 0;
+  uint32_t cf_id = 0;
+  Status s;
+
+  std::tie(s, cf_id, ts_sz) =
+      WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
+                                                            column_family);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (0 == ts_sz) {
+    return WriteBatchInternal::Delete(this, cf_id, key);
+  }
+
+  needs_in_place_update_ts_ = true;
+  has_key_with_ts_ = true;
+  std::string dummy_ts(ts_sz, '\0');
+  std::array<Slice, 2> key_with_ts{{key, dummy_ts}};
+  return WriteBatchInternal::Delete(this, cf_id,
+                                    SliceParts(key_with_ts.data(), 2));
+}
+
+Status WriteBatch::Delete(ColumnFamilyHandle* column_family, const Slice& key,
+                          const Slice& ts) {
+  const Status s = CheckColumnFamilyTimestampSize(column_family, ts);
+  if (!s.ok()) {
+    return s;
+  }
+  assert(column_family);
+  has_key_with_ts_ = true;
+  uint32_t cf_id = column_family->GetID();
+  std::array<Slice, 2> key_with_ts{{key, ts}};
+  return WriteBatchInternal::Delete(this, cf_id,
+                                    SliceParts(key_with_ts.data(), 2));
+}
+
+Status WriteBatchInternal::Delete(WriteBatch* b, uint32_t column_family_id,
+                                  const SliceParts& key) {
+  LocalSavePoint save(b);
+  WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+  if (column_family_id == 0) {
+    b->rep_.push_back(static_cast<char>(kTypeDeletion));
+  } else {
+    b->rep_.push_back(static_cast<char>(kTypeColumnFamilyDeletion));
+    PutVarint32(&b->rep_, column_family_id);
+  }
+  PutLengthPrefixedSliceParts(&b->rep_, key);
+  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                              ContentFlags::HAS_DELETE,
+                          std::memory_order_relaxed);
+  if (b->prot_info_ != nullptr) {
+    // See comment in first `WriteBatchInternal::Put()` overload concerning the
+    // `ValueType` argument passed to `ProtectKVO()`.
+    b->prot_info_->entries_.emplace_back(
+        ProtectionInfo64()
+            .ProtectKVO(key,
+                        SliceParts(nullptr /* _parts */, 0 /* _num_parts */),
+                        kTypeDeletion)
+            .ProtectC(column_family_id));
+  }
+  return save.commit();
+}
+
+Status WriteBatch::Delete(ColumnFamilyHandle* column_family,
+                          const SliceParts& key) {
+  size_t ts_sz = 0;
+  uint32_t cf_id = 0;
+  Status s;
+
+  std::tie(s, cf_id, ts_sz) =
+      WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
+                                                            column_family);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (0 == ts_sz) {
+    return WriteBatchInternal::Delete(this, cf_id, key);
+  }
+
+  return Status::InvalidArgument(
+      "Cannot call this method on column family enabling timestamp");
+}
+
+Status WriteBatchInternal::SingleDelete(WriteBatch* b,
+                                        uint32_t column_family_id,
+                                        const Slice& key) {
+  LocalSavePoint save(b);
+  WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+  if (column_family_id == 0) {
+    b->rep_.push_back(static_cast<char>(kTypeSingleDeletion));
+  } else {
+    b->rep_.push_back(static_cast<char>(kTypeColumnFamilySingleDeletion));
+    PutVarint32(&b->rep_, column_family_id);
+  }
+  PutLengthPrefixedSlice(&b->rep_, key);
+  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                              ContentFlags::HAS_SINGLE_DELETE,
+                          std::memory_order_relaxed);
+  if (b->prot_info_ != nullptr) {
+    // See comment in first `WriteBatchInternal::Put()` overload concerning the
+    // `ValueType` argument passed to `ProtectKVO()`.
+    b->prot_info_->entries_.emplace_back(
+        ProtectionInfo64()
+            .ProtectKVO(key, "" /* value */, kTypeSingleDeletion)
+            .ProtectC(column_family_id));
+  }
+  return save.commit();
+}
+
+Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family,
+                                const Slice& key) {
+  size_t ts_sz = 0;
+  uint32_t cf_id = 0;
+  Status s;
+
+  std::tie(s, cf_id, ts_sz) =
+      WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
+                                                            column_family);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (0 == ts_sz) {
+    return WriteBatchInternal::SingleDelete(this, cf_id, key);
+  }
+
+  needs_in_place_update_ts_ = true;
+  has_key_with_ts_ = true;
+  std::string dummy_ts(ts_sz, '\0');
+  std::array<Slice, 2> key_with_ts{{key, dummy_ts}};
+  return WriteBatchInternal::SingleDelete(this, cf_id,
+                                          SliceParts(key_with_ts.data(), 2));
+}
+
+Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family,
+                                const Slice& key, const Slice& ts) {
+  const Status s = CheckColumnFamilyTimestampSize(column_family, ts);
+  if (!s.ok()) {
+    return s;
+  }
+  has_key_with_ts_ = true;
+  assert(column_family);
+  uint32_t cf_id = column_family->GetID();
+  std::array<Slice, 2> key_with_ts{{key, ts}};
+  return WriteBatchInternal::SingleDelete(this, cf_id,
+                                          SliceParts(key_with_ts.data(), 2));
+}
+
+Status WriteBatchInternal::SingleDelete(WriteBatch* b,
+                                        uint32_t column_family_id,
+                                        const SliceParts& key) {
+  LocalSavePoint save(b);
+  WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+  if (column_family_id == 0) {
+    b->rep_.push_back(static_cast<char>(kTypeSingleDeletion));
+  } else {
+    b->rep_.push_back(static_cast<char>(kTypeColumnFamilySingleDeletion));
+    PutVarint32(&b->rep_, column_family_id);
+  }
+  PutLengthPrefixedSliceParts(&b->rep_, key);
+  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                              ContentFlags::HAS_SINGLE_DELETE,
+                          std::memory_order_relaxed);
+  if (b->prot_info_ != nullptr) {
+    // See comment in first `WriteBatchInternal::Put()` overload concerning the
+    // `ValueType` argument passed to `ProtectKVO()`.
+    b->prot_info_->entries_.emplace_back(
+        ProtectionInfo64()
+            .ProtectKVO(key,
+                        SliceParts(nullptr /* _parts */,
+                                   0 /* _num_parts */) /* value */,
+                        kTypeSingleDeletion)
+            .ProtectC(column_family_id));
+  }
+  return save.commit();
+}
+
+Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family,
+                                const SliceParts& key) {
+  size_t ts_sz = 0;
+  uint32_t cf_id = 0;
+  Status s;
+
+  std::tie(s, cf_id, ts_sz) =
+      WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
+                                                            column_family);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (0 == ts_sz) {
+    return WriteBatchInternal::SingleDelete(this, cf_id, key);
+  }
+
+  return Status::InvalidArgument(
+      "Cannot call this method on column family enabling timestamp");
+}
+
+Status WriteBatchInternal::DeleteRange(WriteBatch* b, uint32_t column_family_id,
+                                       const Slice& begin_key,
+                                       const Slice& end_key) {
+  LocalSavePoint save(b);
+  WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+  if (column_family_id == 0) {
+    b->rep_.push_back(static_cast<char>(kTypeRangeDeletion));
+  } else {
+    b->rep_.push_back(static_cast<char>(kTypeColumnFamilyRangeDeletion));
+    PutVarint32(&b->rep_, column_family_id);
+  }
+  PutLengthPrefixedSlice(&b->rep_, begin_key);
+  PutLengthPrefixedSlice(&b->rep_, end_key);
+  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                              ContentFlags::HAS_DELETE_RANGE,
+                          std::memory_order_relaxed);
+  if (b->prot_info_ != nullptr) {
+    // See comment in first `WriteBatchInternal::Put()` overload concerning the
+    // `ValueType` argument passed to `ProtectKVO()`.
+    // In `DeleteRange()`, the end key is treated as the value.
+    b->prot_info_->entries_.emplace_back(
+        ProtectionInfo64()
+            .ProtectKVO(begin_key, end_key, kTypeRangeDeletion)
+            .ProtectC(column_family_id));
+  }
+  return save.commit();
+}
+
+Status WriteBatch::DeleteRange(ColumnFamilyHandle* column_family,
+                               const Slice& begin_key, const Slice& end_key) {
+  size_t ts_sz = 0;
+  uint32_t cf_id = 0;
+  Status s;
+
+  std::tie(s, cf_id, ts_sz) =
+      WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
+                                                            column_family);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (0 == ts_sz) {
+    return WriteBatchInternal::DeleteRange(this, cf_id, begin_key, end_key);
+  }
+
+  needs_in_place_update_ts_ = true;
+  has_key_with_ts_ = true;
+  std::string dummy_ts(ts_sz, '\0');
+  std::array<Slice, 2> begin_key_with_ts{{begin_key, dummy_ts}};
+  std::array<Slice, 2> end_key_with_ts{{end_key, dummy_ts}};
+  return WriteBatchInternal::DeleteRange(
+      this, cf_id, SliceParts(begin_key_with_ts.data(), 2),
+      SliceParts(end_key_with_ts.data(), 2));
+}
+
+Status WriteBatch::DeleteRange(ColumnFamilyHandle* column_family,
+                               const Slice& begin_key, const Slice& end_key,
+                               const Slice& ts) {
+  const Status s = CheckColumnFamilyTimestampSize(column_family, ts);
+  if (!s.ok()) {
+    return s;
+  }
+  assert(column_family);
+  has_key_with_ts_ = true;
+  uint32_t cf_id = column_family->GetID();
+  std::array<Slice, 2> key_with_ts{{begin_key, ts}};
+  std::array<Slice, 2> end_key_with_ts{{end_key, ts}};
+  return WriteBatchInternal::DeleteRange(this, cf_id,
+                                         SliceParts(key_with_ts.data(), 2),
+                                         SliceParts(end_key_with_ts.data(), 2));
+}
+
+Status WriteBatchInternal::DeleteRange(WriteBatch* b, uint32_t column_family_id,
+                                       const SliceParts& begin_key,
+                                       const SliceParts& end_key) {
+  LocalSavePoint save(b);
+  WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+  if (column_family_id == 0) {
+    b->rep_.push_back(static_cast<char>(kTypeRangeDeletion));
+  } else {
+    b->rep_.push_back(static_cast<char>(kTypeColumnFamilyRangeDeletion));
+    PutVarint32(&b->rep_, column_family_id);
+  }
+  PutLengthPrefixedSliceParts(&b->rep_, begin_key);
+  PutLengthPrefixedSliceParts(&b->rep_, end_key);
+  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                              ContentFlags::HAS_DELETE_RANGE,
+                          std::memory_order_relaxed);
+  if (b->prot_info_ != nullptr) {
+    // See comment in first `WriteBatchInternal::Put()` overload concerning the
+    // `ValueType` argument passed to `ProtectKVO()`.
+    // In `DeleteRange()`, the end key is treated as the value.
+    b->prot_info_->entries_.emplace_back(
+        ProtectionInfo64()
+            .ProtectKVO(begin_key, end_key, kTypeRangeDeletion)
+            .ProtectC(column_family_id));
+  }
+  return save.commit();
+}
+
+Status WriteBatch::DeleteRange(ColumnFamilyHandle* column_family,
+                               const SliceParts& begin_key,
+                               const SliceParts& end_key) {
+  size_t ts_sz = 0;
+  uint32_t cf_id = 0;
+  Status s;
+
+  std::tie(s, cf_id, ts_sz) =
+      WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
+                                                            column_family);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (0 == ts_sz) {
+    return WriteBatchInternal::DeleteRange(this, cf_id, begin_key, end_key);
+  }
+
+  return Status::InvalidArgument(
+      "Cannot call this method on column family enabling timestamp");
+}
+
+Status WriteBatchInternal::Merge(WriteBatch* b, uint32_t column_family_id,
+                                 const Slice& key, const Slice& value) {
+  if (key.size() > size_t{std::numeric_limits<uint32_t>::max()}) {
+    return Status::InvalidArgument("key is too large");
+  }
+  if (value.size() > size_t{std::numeric_limits<uint32_t>::max()}) {
+    return Status::InvalidArgument("value is too large");
+  }
+
+  LocalSavePoint save(b);
+  WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+  if (column_family_id == 0) {
+    b->rep_.push_back(static_cast<char>(kTypeMerge));
+  } else {
+    b->rep_.push_back(static_cast<char>(kTypeColumnFamilyMerge));
+    PutVarint32(&b->rep_, column_family_id);
+  }
+  PutLengthPrefixedSlice(&b->rep_, key);
+  PutLengthPrefixedSlice(&b->rep_, value);
+  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                              ContentFlags::HAS_MERGE,
+                          std::memory_order_relaxed);
+  if (b->prot_info_ != nullptr) {
+    // See comment in first `WriteBatchInternal::Put()` overload concerning the
+    // `ValueType` argument passed to `ProtectKVO()`.
+    b->prot_info_->entries_.emplace_back(ProtectionInfo64()
+                                             .ProtectKVO(key, value, kTypeMerge)
+                                             .ProtectC(column_family_id));
+  }
+  return save.commit();
+}
+
+Status WriteBatch::Merge(ColumnFamilyHandle* column_family, const Slice& key,
+                         const Slice& value) {
+  size_t ts_sz = 0;
+  uint32_t cf_id = 0;
+  Status s;
+
+  std::tie(s, cf_id, ts_sz) =
+      WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
+                                                            column_family);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (0 == ts_sz) {
+    return WriteBatchInternal::Merge(this, cf_id, key, value);
+  }
+
+  needs_in_place_update_ts_ = true;
+  has_key_with_ts_ = true;
+  std::string dummy_ts(ts_sz, '\0');
+  std::array<Slice, 2> key_with_ts{{key, dummy_ts}};
+
+  return WriteBatchInternal::Merge(
+      this, cf_id, SliceParts(key_with_ts.data(), 2), SliceParts(&value, 1));
+}
+
+Status WriteBatch::Merge(ColumnFamilyHandle* column_family, const Slice& key,
+                         const Slice& ts, const Slice& value) {
+  const Status s = CheckColumnFamilyTimestampSize(column_family, ts);
+  if (!s.ok()) {
+    return s;
+  }
+  has_key_with_ts_ = true;
+  assert(column_family);
+  uint32_t cf_id = column_family->GetID();
+  std::array<Slice, 2> key_with_ts{{key, ts}};
+  return WriteBatchInternal::Merge(
+      this, cf_id, SliceParts(key_with_ts.data(), 2), SliceParts(&value, 1));
+}
+
+Status WriteBatchInternal::Merge(WriteBatch* b, uint32_t column_family_id,
+                                 const SliceParts& key,
+                                 const SliceParts& value) {
+  Status s = CheckSlicePartsLength(key, value);
+  if (!s.ok()) {
+    return s;
+  }
+
+  LocalSavePoint save(b);
+  WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+  if (column_family_id == 0) {
+    b->rep_.push_back(static_cast<char>(kTypeMerge));
+  } else {
+    b->rep_.push_back(static_cast<char>(kTypeColumnFamilyMerge));
+    PutVarint32(&b->rep_, column_family_id);
+  }
+  PutLengthPrefixedSliceParts(&b->rep_, key);
+  PutLengthPrefixedSliceParts(&b->rep_, value);
+  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                              ContentFlags::HAS_MERGE,
+                          std::memory_order_relaxed);
+  if (b->prot_info_ != nullptr) {
+    // See comment in first `WriteBatchInternal::Put()` overload concerning the
+    // `ValueType` argument passed to `ProtectKVO()`.
+    b->prot_info_->entries_.emplace_back(ProtectionInfo64()
+                                             .ProtectKVO(key, value, kTypeMerge)
+                                             .ProtectC(column_family_id));
+  }
+  return save.commit();
+}
+
+Status WriteBatch::Merge(ColumnFamilyHandle* column_family,
+                         const SliceParts& key, const SliceParts& value) {
+  size_t ts_sz = 0;
+  uint32_t cf_id = 0;
+  Status s;
+
+  std::tie(s, cf_id, ts_sz) =
+      WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
+                                                            column_family);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (0 == ts_sz) {
+    return WriteBatchInternal::Merge(this, cf_id, key, value);
+  }
+
+  return Status::InvalidArgument(
+      "Cannot call this method on column family enabling timestamp");
+}
+
+Status WriteBatchInternal::PutBlobIndex(WriteBatch* b,
+                                        uint32_t column_family_id,
+                                        const Slice& key, const Slice& value) {
+  LocalSavePoint save(b);
+  WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+  if (column_family_id == 0) {
+    b->rep_.push_back(static_cast<char>(kTypeBlobIndex));
+  } else {
+    b->rep_.push_back(static_cast<char>(kTypeColumnFamilyBlobIndex));
+    PutVarint32(&b->rep_, column_family_id);
+  }
+  PutLengthPrefixedSlice(&b->rep_, key);
+  PutLengthPrefixedSlice(&b->rep_, value);
+  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
+                              ContentFlags::HAS_BLOB_INDEX,
+                          std::memory_order_relaxed);
+  if (b->prot_info_ != nullptr) {
+    // See comment in first `WriteBatchInternal::Put()` overload concerning the
+    // `ValueType` argument passed to `ProtectKVO()`.
+    b->prot_info_->entries_.emplace_back(
+        ProtectionInfo64()
+            .ProtectKVO(key, value, kTypeBlobIndex)
+            .ProtectC(column_family_id));
+  }
+  return save.commit();
+}
+
+Status WriteBatch::PutLogData(const Slice& blob) {
+  LocalSavePoint save(this);
+  rep_.push_back(static_cast<char>(kTypeLogData));
+  PutLengthPrefixedSlice(&rep_, blob);
+  return save.commit();
+}
+
+void WriteBatch::SetSavePoint() {
+  if (save_points_ == nullptr) {
+    save_points_.reset(new SavePoints());
+  }
+  // Record length and count of current batch of writes.
+  save_points_->stack.push(SavePoint(
+      GetDataSize(), Count(), content_flags_.load(std::memory_order_relaxed)));
+}
+
+Status WriteBatch::RollbackToSavePoint() {
+  if (save_points_ == nullptr || save_points_->stack.size() == 0) {
+    return Status::NotFound();
+  }
+
+  // Pop the most recent savepoint off the stack
+  SavePoint savepoint = save_points_->stack.top();
+  save_points_->stack.pop();
+
+  assert(savepoint.size <= rep_.size());
+  assert(static_cast<uint32_t>(savepoint.count) <= Count());
+
+  if (savepoint.size == rep_.size()) {
+    // No changes to rollback
+  } else if (savepoint.size == 0) {
+    // Rollback everything
+    Clear();
+  } else {
+    rep_.resize(savepoint.size);
+    if (prot_info_ != nullptr) {
+      prot_info_->entries_.resize(savepoint.count);
+    }
+    WriteBatchInternal::SetCount(this, savepoint.count);
+    content_flags_.store(savepoint.content_flags, std::memory_order_relaxed);
+  }
+
+  return Status::OK();
+}
+
+Status WriteBatch::PopSavePoint() {
+  if (save_points_ == nullptr || save_points_->stack.size() == 0) {
+    return Status::NotFound();
+  }
+
+  // Pop the most recent savepoint off the stack
+  save_points_->stack.pop();
+
+  return Status::OK();
+}
+
+Status WriteBatch::UpdateTimestamps(
+    const Slice& ts, std::function<size_t(uint32_t)> ts_sz_func) {
+  TimestampUpdater<decltype(ts_sz_func)> ts_updater(prot_info_.get(),
+                                                    std::move(ts_sz_func), ts);
+  const Status s = Iterate(&ts_updater);
+  if (s.ok()) {
+    needs_in_place_update_ts_ = false;
+  }
+  return s;
+}
+
+Status WriteBatch::VerifyChecksum() const {
+  if (prot_info_ == nullptr) {
+    return Status::OK();
+  }
+  Slice input(rep_.data() + WriteBatchInternal::kHeader,
+              rep_.size() - WriteBatchInternal::kHeader);
+  Slice key, value, blob, xid;
+  char tag = 0;
+  uint32_t column_family = 0;  // default
+  Status s;
+  size_t prot_info_idx = 0;
+  bool checksum_protected = true;
+  while (!input.empty() && prot_info_idx < prot_info_->entries_.size()) {
+    // In case key/value/column_family are not updated by
+    // ReadRecordFromWriteBatch
+    key.clear();
+    value.clear();
+    column_family = 0;
+    s = ReadRecordFromWriteBatch(&input, &tag, &column_family, &key, &value,
+                                 &blob, &xid);
+    if (!s.ok()) {
+      return s;
+    }
+    checksum_protected = true;
+    // Write batch checksum uses op_type without ColumnFamily (e.g., if op_type
+    // in the write batch is kTypeColumnFamilyValue, kTypeValue is used to
+    // compute the checksum), and encodes column family id separately. See
+    // comment in first `WriteBatchInternal::Put()` for more detail.
+    switch (tag) {
+      case kTypeColumnFamilyValue:
+      case kTypeValue:
+        tag = kTypeValue;
+        break;
+      case kTypeColumnFamilyDeletion:
+      case kTypeDeletion:
+        tag = kTypeDeletion;
+        break;
+      case kTypeColumnFamilySingleDeletion:
+      case kTypeSingleDeletion:
+        tag = kTypeSingleDeletion;
+        break;
+      case kTypeColumnFamilyRangeDeletion:
+      case kTypeRangeDeletion:
+        tag = kTypeRangeDeletion;
+        break;
+      case kTypeColumnFamilyMerge:
+      case kTypeMerge:
+        tag = kTypeMerge;
+        break;
+      case kTypeColumnFamilyBlobIndex:
+      case kTypeBlobIndex:
+        tag = kTypeBlobIndex;
+        break;
+      case kTypeLogData:
+      case kTypeBeginPrepareXID:
+      case kTypeEndPrepareXID:
+      case kTypeCommitXID:
+      case kTypeRollbackXID:
+      case kTypeNoop:
+      case kTypeBeginPersistedPrepareXID:
+      case kTypeBeginUnprepareXID:
+      case kTypeDeletionWithTimestamp:
+      case kTypeCommitXIDAndTimestamp:
+        checksum_protected = false;
+        break;
+      case kTypeColumnFamilyWideColumnEntity:
+      case kTypeWideColumnEntity:
+        tag = kTypeWideColumnEntity;
+        break;
+      default:
+        return Status::Corruption(
+            "unknown WriteBatch tag",
+            std::to_string(static_cast<unsigned int>(tag)));
+    }
+    if (checksum_protected) {
+      s = prot_info_->entries_[prot_info_idx++]
+              .StripC(column_family)
+              .StripKVO(key, value, static_cast<ValueType>(tag))
+              .GetStatus();
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  }
+
+  if (prot_info_idx != WriteBatchInternal::Count(this)) {
+    return Status::Corruption("WriteBatch has wrong count");
+  }
+  assert(WriteBatchInternal::Count(this) == prot_info_->entries_.size());
+  return Status::OK();
+}
+
+namespace {
+
+class MemTableInserter : public WriteBatch::Handler {
+  SequenceNumber sequence_;
+  ColumnFamilyMemTables* const cf_mems_;
+  FlushScheduler* const flush_scheduler_;
+  TrimHistoryScheduler* const trim_history_scheduler_;
+  const bool ignore_missing_column_families_;
+  const uint64_t recovering_log_number_;
+  // log number that all Memtables inserted into should reference
+  uint64_t log_number_ref_;
+  DBImpl* db_;
+  const bool concurrent_memtable_writes_;
+  bool post_info_created_;
+  const WriteBatch::ProtectionInfo* prot_info_;
+  size_t prot_info_idx_;
+
+  bool* has_valid_writes_;
+  // On some (!) platforms just default creating
+  // a map is too expensive in the Write() path as they
+  // cause memory allocations though unused.
+  // Make creation optional but do not incur
+  // std::unique_ptr additional allocation
+  using MemPostInfoMap = std::map<MemTable*, MemTablePostProcessInfo>;
+  using PostMapType = std::aligned_storage<sizeof(MemPostInfoMap)>::type;
+  PostMapType mem_post_info_map_;
+  // current recovered transaction we are rebuilding (recovery)
+  WriteBatch* rebuilding_trx_;
+  SequenceNumber rebuilding_trx_seq_;
+  // Increase seq number once per each write batch. Otherwise increase it once
+  // per key.
+  bool seq_per_batch_;
+  // Whether the memtable write will be done only after the commit
+  bool write_after_commit_;
+  // Whether memtable write can be done before prepare
+  bool write_before_prepare_;
+  // Whether this batch was unprepared or not
+  bool unprepared_batch_;
+  using DupDetector = std::aligned_storage<sizeof(DuplicateDetector)>::type;
+  DupDetector duplicate_detector_;
+  bool dup_dectector_on_;
+
+  bool hint_per_batch_;
+  bool hint_created_;
+  // Hints for this batch
+  using HintMap = std::unordered_map<MemTable*, void*>;
+  using HintMapType = std::aligned_storage<sizeof(HintMap)>::type;
+  HintMapType hint_;
+
+  HintMap& GetHintMap() {
+    assert(hint_per_batch_);
+    if (!hint_created_) {
+      new (&hint_) HintMap();
+      hint_created_ = true;
+    }
+    return *reinterpret_cast<HintMap*>(&hint_);
+  }
+
+  MemPostInfoMap& GetPostMap() {
+    assert(concurrent_memtable_writes_);
+    if (!post_info_created_) {
+      new (&mem_post_info_map_) MemPostInfoMap();
+      post_info_created_ = true;
+    }
+    return *reinterpret_cast<MemPostInfoMap*>(&mem_post_info_map_);
+  }
+
+  bool IsDuplicateKeySeq(uint32_t column_family_id, const Slice& key) {
+    assert(!write_after_commit_);
+    assert(rebuilding_trx_ != nullptr);
+    if (!dup_dectector_on_) {
+      new (&duplicate_detector_) DuplicateDetector(db_);
+      dup_dectector_on_ = true;
+    }
+    return reinterpret_cast<DuplicateDetector*>(&duplicate_detector_)
+        ->IsDuplicateKeySeq(column_family_id, key, sequence_);
+  }
+
+  const ProtectionInfoKVOC64* NextProtectionInfo() {
+    const ProtectionInfoKVOC64* res = nullptr;
+    if (prot_info_ != nullptr) {
+      assert(prot_info_idx_ < prot_info_->entries_.size());
+      res = &prot_info_->entries_[prot_info_idx_];
+      ++prot_info_idx_;
+    }
+    return res;
+  }
+
+  void DecrementProtectionInfoIdxForTryAgain() {
+    if (prot_info_ != nullptr) --prot_info_idx_;
+  }
+
+  void ResetProtectionInfo() {
+    prot_info_idx_ = 0;
+    prot_info_ = nullptr;
+  }
+
+ protected:
+  Handler::OptionState WriteBeforePrepare() const override {
+    return write_before_prepare_ ? Handler::OptionState::kEnabled
+                                 : Handler::OptionState::kDisabled;
+  }
+  Handler::OptionState WriteAfterCommit() const override {
+    return write_after_commit_ ? Handler::OptionState::kEnabled
+                               : Handler::OptionState::kDisabled;
+  }
+
+ public:
+  // cf_mems should not be shared with concurrent inserters
+  MemTableInserter(SequenceNumber _sequence, ColumnFamilyMemTables* cf_mems,
+                   FlushScheduler* flush_scheduler,
+                   TrimHistoryScheduler* trim_history_scheduler,
+                   bool ignore_missing_column_families,
+                   uint64_t recovering_log_number, DB* db,
+                   bool concurrent_memtable_writes,
+                   const WriteBatch::ProtectionInfo* prot_info,
+                   bool* has_valid_writes = nullptr, bool seq_per_batch = false,
+                   bool batch_per_txn = true, bool hint_per_batch = false)
+      : sequence_(_sequence),
+        cf_mems_(cf_mems),
+        flush_scheduler_(flush_scheduler),
+        trim_history_scheduler_(trim_history_scheduler),
+        ignore_missing_column_families_(ignore_missing_column_families),
+        recovering_log_number_(recovering_log_number),
+        log_number_ref_(0),
+        db_(static_cast_with_check<DBImpl>(db)),
+        concurrent_memtable_writes_(concurrent_memtable_writes),
+        post_info_created_(false),
+        prot_info_(prot_info),
+        prot_info_idx_(0),
+        has_valid_writes_(has_valid_writes),
+        rebuilding_trx_(nullptr),
+        rebuilding_trx_seq_(0),
+        seq_per_batch_(seq_per_batch),
+        // Write after commit currently uses one seq per key (instead of per
+        // batch). So seq_per_batch being false indicates write_after_commit
+        // approach.
+        write_after_commit_(!seq_per_batch),
+        // WriteUnprepared can write WriteBatches per transaction, so
+        // batch_per_txn being false indicates write_before_prepare.
+        write_before_prepare_(!batch_per_txn),
+        unprepared_batch_(false),
+        duplicate_detector_(),
+        dup_dectector_on_(false),
+        hint_per_batch_(hint_per_batch),
+        hint_created_(false) {
+    assert(cf_mems_);
+  }
+
+  ~MemTableInserter() override {
+    if (dup_dectector_on_) {
+      reinterpret_cast<DuplicateDetector*>(&duplicate_detector_)
+          ->~DuplicateDetector();
+    }
+    if (post_info_created_) {
+      reinterpret_cast<MemPostInfoMap*>(&mem_post_info_map_)->~MemPostInfoMap();
+    }
+    if (hint_created_) {
+      for (auto iter : GetHintMap()) {
+        delete[] reinterpret_cast<char*>(iter.second);
+      }
+      reinterpret_cast<HintMap*>(&hint_)->~HintMap();
+    }
+    delete rebuilding_trx_;
+  }
+
+  MemTableInserter(const MemTableInserter&) = delete;
+  MemTableInserter& operator=(const MemTableInserter&) = delete;
+
+  // The batch seq is regularly restarted; In normal mode it is set when
+  // MemTableInserter is constructed in the write thread and in recovery mode it
+  // is set when a batch, which is tagged with seq, is read from the WAL.
+  // Within a sequenced batch, which could be a merge of multiple batches, we
+  // have two policies to advance the seq: i) seq_per_key (default) and ii)
+  // seq_per_batch. To implement the latter we need to mark the boundary between
+  // the individual batches. The approach is this: 1) Use the terminating
+  // markers to indicate the boundary (kTypeEndPrepareXID, kTypeCommitXID,
+  // kTypeRollbackXID) 2) Terminate a batch with kTypeNoop in the absence of a
+  // natural boundary marker.
+  void MaybeAdvanceSeq(bool batch_boundry = false) {
+    if (batch_boundry == seq_per_batch_) {
+      sequence_++;
+    }
+  }
+
+  void set_log_number_ref(uint64_t log) { log_number_ref_ = log; }
+  void set_prot_info(const WriteBatch::ProtectionInfo* prot_info) {
+    prot_info_ = prot_info;
+    prot_info_idx_ = 0;
+  }
+
+  SequenceNumber sequence() const { return sequence_; }
+
+  void PostProcess() {
+    assert(concurrent_memtable_writes_);
+    // If post info was not created there is nothing
+    // to process and no need to create on demand
+    if (post_info_created_) {
+      for (auto& pair : GetPostMap()) {
+        pair.first->BatchPostProcess(pair.second);
+      }
+    }
+  }
+
+  bool SeekToColumnFamily(uint32_t column_family_id, Status* s) {
+    // If we are in a concurrent mode, it is the caller's responsibility
+    // to clone the original ColumnFamilyMemTables so that each thread
+    // has its own instance.  Otherwise, it must be guaranteed that there
+    // is no concurrent access
+    bool found = cf_mems_->Seek(column_family_id);
+    if (!found) {
+      if (ignore_missing_column_families_) {
+        *s = Status::OK();
+      } else {
+        *s = Status::InvalidArgument(
+            "Invalid column family specified in write batch");
+      }
+      return false;
+    }
+    if (recovering_log_number_ != 0 &&
+        recovering_log_number_ < cf_mems_->GetLogNumber()) {
+      // This is true only in recovery environment (recovering_log_number_ is
+      // always 0 in
+      // non-recovery, regular write code-path)
+      // * If recovering_log_number_ < cf_mems_->GetLogNumber(), this means that
+      // column family already contains updates from this log. We can't apply
+      // updates twice because of update-in-place or merge workloads -- ignore
+      // the update
+      *s = Status::OK();
+      return false;
+    }
+
+    if (has_valid_writes_ != nullptr) {
+      *has_valid_writes_ = true;
+    }
+
+    if (log_number_ref_ > 0) {
+      cf_mems_->GetMemTable()->RefLogContainingPrepSection(log_number_ref_);
+    }
+
+    return true;
+  }
+
+  Status PutCFImpl(uint32_t column_family_id, const Slice& key,
+                   const Slice& value, ValueType value_type,
+                   const ProtectionInfoKVOS64* kv_prot_info) {
+    // optimize for non-recovery mode
+    if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) {
+      // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+      return WriteBatchInternal::Put(rebuilding_trx_, column_family_id, key,
+                                     value);
+      // else insert the values to the memtable right away
+    }
+
+    Status ret_status;
+    if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) {
+      if (ret_status.ok() && rebuilding_trx_ != nullptr) {
+        assert(!write_after_commit_);
+        // The CF is probably flushed and hence no need for insert but we still
+        // need to keep track of the keys for upcoming rollback/commit.
+        // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+        ret_status = WriteBatchInternal::Put(rebuilding_trx_, column_family_id,
+                                             key, value);
+        if (ret_status.ok()) {
+          MaybeAdvanceSeq(IsDuplicateKeySeq(column_family_id, key));
+        }
+      } else if (ret_status.ok()) {
+        MaybeAdvanceSeq(false /* batch_boundary */);
+      }
+      return ret_status;
+    }
+    assert(ret_status.ok());
+
+    MemTable* mem = cf_mems_->GetMemTable();
+    auto* moptions = mem->GetImmutableMemTableOptions();
+    // inplace_update_support is inconsistent with snapshots, and therefore with
+    // any kind of transactions including the ones that use seq_per_batch
+    assert(!seq_per_batch_ || !moptions->inplace_update_support);
+    if (!moptions->inplace_update_support) {
+      ret_status =
+          mem->Add(sequence_, value_type, key, value, kv_prot_info,
+                   concurrent_memtable_writes_, get_post_process_info(mem),
+                   hint_per_batch_ ? &GetHintMap()[mem] : nullptr);
+    } else if (moptions->inplace_callback == nullptr ||
+               value_type != kTypeValue) {
+      assert(!concurrent_memtable_writes_);
+      ret_status = mem->Update(sequence_, value_type, key, value, kv_prot_info);
+    } else {
+      assert(!concurrent_memtable_writes_);
+      assert(value_type == kTypeValue);
+      ret_status = mem->UpdateCallback(sequence_, key, value, kv_prot_info);
+      if (ret_status.IsNotFound()) {
+        // key not found in memtable. Do sst get, update, add
+        SnapshotImpl read_from_snapshot;
+        read_from_snapshot.number_ = sequence_;
+        // TODO: plumb Env::IOActivity
+        ReadOptions ropts;
+        // it's going to be overwritten for sure, so no point caching data block
+        // containing the old version
+        ropts.fill_cache = false;
+        ropts.snapshot = &read_from_snapshot;
+
+        std::string prev_value;
+        std::string merged_value;
+
+        auto cf_handle = cf_mems_->GetColumnFamilyHandle();
+        Status get_status = Status::NotSupported();
+        if (db_ != nullptr && recovering_log_number_ == 0) {
+          if (cf_handle == nullptr) {
+            cf_handle = db_->DefaultColumnFamily();
+          }
+          // TODO (yanqin): fix when user-defined timestamp is enabled.
+          get_status = db_->Get(ropts, cf_handle, key, &prev_value);
+        }
+        // Intentionally overwrites the `NotFound` in `ret_status`.
+        if (!get_status.ok() && !get_status.IsNotFound()) {
+          ret_status = get_status;
+        } else {
+          ret_status = Status::OK();
+        }
+        if (ret_status.ok()) {
+          UpdateStatus update_status;
+          char* prev_buffer = const_cast<char*>(prev_value.c_str());
+          uint32_t prev_size = static_cast<uint32_t>(prev_value.size());
+          if (get_status.ok()) {
+            update_status = moptions->inplace_callback(prev_buffer, &prev_size,
+                                                       value, &merged_value);
+          } else {
+            update_status = moptions->inplace_callback(
+                nullptr /* existing_value */, nullptr /* existing_value_size */,
+                value, &merged_value);
+          }
+          if (update_status == UpdateStatus::UPDATED_INPLACE) {
+            assert(get_status.ok());
+            if (kv_prot_info != nullptr) {
+              ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
+              updated_kv_prot_info.UpdateV(value,
+                                           Slice(prev_buffer, prev_size));
+              // prev_value is updated in-place with final value.
+              ret_status = mem->Add(sequence_, value_type, key,
+                                    Slice(prev_buffer, prev_size),
+                                    &updated_kv_prot_info);
+            } else {
+              ret_status = mem->Add(sequence_, value_type, key,
+                                    Slice(prev_buffer, prev_size),
+                                    nullptr /* kv_prot_info */);
+            }
+            if (ret_status.ok()) {
+              RecordTick(moptions->statistics, NUMBER_KEYS_WRITTEN);
+            }
+          } else if (update_status == UpdateStatus::UPDATED) {
+            if (kv_prot_info != nullptr) {
+              ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
+              updated_kv_prot_info.UpdateV(value, merged_value);
+              // merged_value contains the final value.
+              ret_status = mem->Add(sequence_, value_type, key,
+                                    Slice(merged_value), &updated_kv_prot_info);
+            } else {
+              // merged_value contains the final value.
+              ret_status =
+                  mem->Add(sequence_, value_type, key, Slice(merged_value),
+                           nullptr /* kv_prot_info */);
+            }
+            if (ret_status.ok()) {
+              RecordTick(moptions->statistics, NUMBER_KEYS_WRITTEN);
+            }
+          }
+        }
+      }
+    }
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      assert(seq_per_batch_);
+      const bool kBatchBoundary = true;
+      MaybeAdvanceSeq(kBatchBoundary);
+    } else if (ret_status.ok()) {
+      MaybeAdvanceSeq();
+      CheckMemtableFull();
+    }
+    // optimize for non-recovery mode
+    // If `ret_status` is `TryAgain` then the next (successful) try will add
+    // the key to the rebuilding transaction object. If `ret_status` is
+    // another non-OK `Status`, then the `rebuilding_trx_` will be thrown
+    // away. So we only need to add to it when `ret_status.ok()`.
+    if (UNLIKELY(ret_status.ok() && rebuilding_trx_ != nullptr)) {
+      assert(!write_after_commit_);
+      // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+      ret_status = WriteBatchInternal::Put(rebuilding_trx_, column_family_id,
+                                           key, value);
+    }
+    return ret_status;
+  }
+
+  Status PutCF(uint32_t column_family_id, const Slice& key,
+               const Slice& value) override {
+    const auto* kv_prot_info = NextProtectionInfo();
+    Status ret_status;
+    if (kv_prot_info != nullptr) {
+      // Memtable needs seqno, doesn't need CF ID
+      auto mem_kv_prot_info =
+          kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
+      ret_status = PutCFImpl(column_family_id, key, value, kTypeValue,
+                             &mem_kv_prot_info);
+    } else {
+      ret_status = PutCFImpl(column_family_id, key, value, kTypeValue,
+                             nullptr /* kv_prot_info */);
+    }
+    // TODO: this assumes that if TryAgain status is returned to the caller,
+    // the operation is actually tried again. The proper way to do this is to
+    // pass a `try_again` parameter to the operation itself and decrement
+    // prot_info_idx_ based on that
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
+    return ret_status;
+  }
+
+  Status PutEntityCF(uint32_t column_family_id, const Slice& key,
+                     const Slice& value) override {
+    const auto* kv_prot_info = NextProtectionInfo();
+
+    Status s;
+    if (kv_prot_info) {
+      // Memtable needs seqno, doesn't need CF ID
+      auto mem_kv_prot_info =
+          kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
+      s = PutCFImpl(column_family_id, key, value, kTypeWideColumnEntity,
+                    &mem_kv_prot_info);
+    } else {
+      s = PutCFImpl(column_family_id, key, value, kTypeWideColumnEntity,
+                    /* kv_prot_info */ nullptr);
+    }
+
+    if (UNLIKELY(s.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
+
+    return s;
+  }
+
+  Status DeleteImpl(uint32_t /*column_family_id*/, const Slice& key,
+                    const Slice& value, ValueType delete_type,
+                    const ProtectionInfoKVOS64* kv_prot_info) {
+    Status ret_status;
+    MemTable* mem = cf_mems_->GetMemTable();
+    ret_status =
+        mem->Add(sequence_, delete_type, key, value, kv_prot_info,
+                 concurrent_memtable_writes_, get_post_process_info(mem),
+                 hint_per_batch_ ? &GetHintMap()[mem] : nullptr);
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      assert(seq_per_batch_);
+      const bool kBatchBoundary = true;
+      MaybeAdvanceSeq(kBatchBoundary);
+    } else if (ret_status.ok()) {
+      MaybeAdvanceSeq();
+      CheckMemtableFull();
+    }
+    return ret_status;
+  }
+
+  Status DeleteCF(uint32_t column_family_id, const Slice& key) override {
+    const auto* kv_prot_info = NextProtectionInfo();
+    // optimize for non-recovery mode
+    if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) {
+      // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+      return WriteBatchInternal::Delete(rebuilding_trx_, column_family_id, key);
+      // else insert the values to the memtable right away
+    }
+
+    Status ret_status;
+    if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) {
+      if (ret_status.ok() && rebuilding_trx_ != nullptr) {
+        assert(!write_after_commit_);
+        // The CF is probably flushed and hence no need for insert but we still
+        // need to keep track of the keys for upcoming rollback/commit.
+        // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+        ret_status =
+            WriteBatchInternal::Delete(rebuilding_trx_, column_family_id, key);
+        if (ret_status.ok()) {
+          MaybeAdvanceSeq(IsDuplicateKeySeq(column_family_id, key));
+        }
+      } else if (ret_status.ok()) {
+        MaybeAdvanceSeq(false /* batch_boundary */);
+      }
+      if (UNLIKELY(ret_status.IsTryAgain())) {
+        DecrementProtectionInfoIdxForTryAgain();
+      }
+      return ret_status;
+    }
+
+    ColumnFamilyData* cfd = cf_mems_->current();
+    assert(!cfd || cfd->user_comparator());
+    const size_t ts_sz = (cfd && cfd->user_comparator())
+                             ? cfd->user_comparator()->timestamp_size()
+                             : 0;
+    const ValueType delete_type =
+        (0 == ts_sz) ? kTypeDeletion : kTypeDeletionWithTimestamp;
+    if (kv_prot_info != nullptr) {
+      auto mem_kv_prot_info =
+          kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
+      mem_kv_prot_info.UpdateO(kTypeDeletion, delete_type);
+      ret_status = DeleteImpl(column_family_id, key, Slice(), delete_type,
+                              &mem_kv_prot_info);
+    } else {
+      ret_status = DeleteImpl(column_family_id, key, Slice(), delete_type,
+                              nullptr /* kv_prot_info */);
+    }
+    // optimize for non-recovery mode
+    // If `ret_status` is `TryAgain` then the next (successful) try will add
+    // the key to the rebuilding transaction object. If `ret_status` is
+    // another non-OK `Status`, then the `rebuilding_trx_` will be thrown
+    // away. So we only need to add to it when `ret_status.ok()`.
+    if (UNLIKELY(ret_status.ok() && rebuilding_trx_ != nullptr)) {
+      assert(!write_after_commit_);
+      // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+      ret_status =
+          WriteBatchInternal::Delete(rebuilding_trx_, column_family_id, key);
+    }
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
+    return ret_status;
+  }
+
+  Status SingleDeleteCF(uint32_t column_family_id, const Slice& key) override {
+    const auto* kv_prot_info = NextProtectionInfo();
+    // optimize for non-recovery mode
+    if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) {
+      // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+      return WriteBatchInternal::SingleDelete(rebuilding_trx_, column_family_id,
+                                              key);
+      // else insert the values to the memtable right away
+    }
+
+    Status ret_status;
+    if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) {
+      if (ret_status.ok() && rebuilding_trx_ != nullptr) {
+        assert(!write_after_commit_);
+        // The CF is probably flushed and hence no need for insert but we still
+        // need to keep track of the keys for upcoming rollback/commit.
+        // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+        ret_status = WriteBatchInternal::SingleDelete(rebuilding_trx_,
+                                                      column_family_id, key);
+        if (ret_status.ok()) {
+          MaybeAdvanceSeq(IsDuplicateKeySeq(column_family_id, key));
+        }
+      } else if (ret_status.ok()) {
+        MaybeAdvanceSeq(false /* batch_boundary */);
+      }
+      if (UNLIKELY(ret_status.IsTryAgain())) {
+        DecrementProtectionInfoIdxForTryAgain();
+      }
+      return ret_status;
+    }
+    assert(ret_status.ok());
+
+    if (kv_prot_info != nullptr) {
+      auto mem_kv_prot_info =
+          kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
+      ret_status = DeleteImpl(column_family_id, key, Slice(),
+                              kTypeSingleDeletion, &mem_kv_prot_info);
+    } else {
+      ret_status = DeleteImpl(column_family_id, key, Slice(),
+                              kTypeSingleDeletion, nullptr /* kv_prot_info */);
+    }
+    // optimize for non-recovery mode
+    // If `ret_status` is `TryAgain` then the next (successful) try will add
+    // the key to the rebuilding transaction object. If `ret_status` is
+    // another non-OK `Status`, then the `rebuilding_trx_` will be thrown
+    // away. So we only need to add to it when `ret_status.ok()`.
+    if (UNLIKELY(ret_status.ok() && rebuilding_trx_ != nullptr)) {
+      assert(!write_after_commit_);
+      // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+      ret_status = WriteBatchInternal::SingleDelete(rebuilding_trx_,
+                                                    column_family_id, key);
+    }
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
+    return ret_status;
+  }
+
+  Status DeleteRangeCF(uint32_t column_family_id, const Slice& begin_key,
+                       const Slice& end_key) override {
+    const auto* kv_prot_info = NextProtectionInfo();
+    // optimize for non-recovery mode
+    if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) {
+      // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+      return WriteBatchInternal::DeleteRange(rebuilding_trx_, column_family_id,
+                                             begin_key, end_key);
+      // else insert the values to the memtable right away
+    }
+
+    Status ret_status;
+    if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) {
+      if (ret_status.ok() && rebuilding_trx_ != nullptr) {
+        assert(!write_after_commit_);
+        // The CF is probably flushed and hence no need for insert but we still
+        // need to keep track of the keys for upcoming rollback/commit.
+        // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+        ret_status = WriteBatchInternal::DeleteRange(
+            rebuilding_trx_, column_family_id, begin_key, end_key);
+        if (ret_status.ok()) {
+          MaybeAdvanceSeq(IsDuplicateKeySeq(column_family_id, begin_key));
+        }
+      } else if (ret_status.ok()) {
+        MaybeAdvanceSeq(false /* batch_boundary */);
+      }
+      if (UNLIKELY(ret_status.IsTryAgain())) {
+        DecrementProtectionInfoIdxForTryAgain();
+      }
+      return ret_status;
+    }
+    assert(ret_status.ok());
+
+    if (db_ != nullptr) {
+      auto cf_handle = cf_mems_->GetColumnFamilyHandle();
+      if (cf_handle == nullptr) {
+        cf_handle = db_->DefaultColumnFamily();
+      }
+      auto* cfd =
+          static_cast_with_check<ColumnFamilyHandleImpl>(cf_handle)->cfd();
+      if (!cfd->is_delete_range_supported()) {
+        // TODO(ajkr): refactor `SeekToColumnFamily()` so it returns a `Status`.
+        ret_status.PermitUncheckedError();
+        return Status::NotSupported(
+            std::string("DeleteRange not supported for table type ") +
+            cfd->ioptions()->table_factory->Name() + " in CF " +
+            cfd->GetName());
+      }
+      int cmp =
+          cfd->user_comparator()->CompareWithoutTimestamp(begin_key, end_key);
+      if (cmp > 0) {
+        // TODO(ajkr): refactor `SeekToColumnFamily()` so it returns a `Status`.
+        ret_status.PermitUncheckedError();
+        // It's an empty range where endpoints appear mistaken. Don't bother
+        // applying it to the DB, and return an error to the user.
+        return Status::InvalidArgument("end key comes before start key");
+      } else if (cmp == 0) {
+        // TODO(ajkr): refactor `SeekToColumnFamily()` so it returns a `Status`.
+        ret_status.PermitUncheckedError();
+        // It's an empty range. Don't bother applying it to the DB.
+        return Status::OK();
+      }
+    }
+
+    if (kv_prot_info != nullptr) {
+      auto mem_kv_prot_info =
+          kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
+      ret_status = DeleteImpl(column_family_id, begin_key, end_key,
+                              kTypeRangeDeletion, &mem_kv_prot_info);
+    } else {
+      ret_status = DeleteImpl(column_family_id, begin_key, end_key,
+                              kTypeRangeDeletion, nullptr /* kv_prot_info */);
+    }
+    // optimize for non-recovery mode
+    // If `ret_status` is `TryAgain` then the next (successful) try will add
+    // the key to the rebuilding transaction object. If `ret_status` is
+    // another non-OK `Status`, then the `rebuilding_trx_` will be thrown
+    // away. So we only need to add to it when `ret_status.ok()`.
+    if (UNLIKELY(!ret_status.IsTryAgain() && rebuilding_trx_ != nullptr)) {
+      assert(!write_after_commit_);
+      // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+      ret_status = WriteBatchInternal::DeleteRange(
+          rebuilding_trx_, column_family_id, begin_key, end_key);
+    }
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
+    return ret_status;
+  }
+
+  Status MergeCF(uint32_t column_family_id, const Slice& key,
+                 const Slice& value) override {
+    const auto* kv_prot_info = NextProtectionInfo();
+    // optimize for non-recovery mode
+    if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) {
+      // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+      return WriteBatchInternal::Merge(rebuilding_trx_, column_family_id, key,
+                                       value);
+      // else insert the values to the memtable right away
+    }
+
+    Status ret_status;
+    if (UNLIKELY(!SeekToColumnFamily(column_family_id, &ret_status))) {
+      if (ret_status.ok() && rebuilding_trx_ != nullptr) {
+        assert(!write_after_commit_);
+        // The CF is probably flushed and hence no need for insert but we still
+        // need to keep track of the keys for upcoming rollback/commit.
+        // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+        ret_status = WriteBatchInternal::Merge(rebuilding_trx_,
+                                               column_family_id, key, value);
+        if (ret_status.ok()) {
+          MaybeAdvanceSeq(IsDuplicateKeySeq(column_family_id, key));
+        }
+      } else if (ret_status.ok()) {
+        MaybeAdvanceSeq(false /* batch_boundary */);
+      }
+      if (UNLIKELY(ret_status.IsTryAgain())) {
+        DecrementProtectionInfoIdxForTryAgain();
+      }
+      return ret_status;
+    }
+    assert(ret_status.ok());
+
+    MemTable* mem = cf_mems_->GetMemTable();
+    auto* moptions = mem->GetImmutableMemTableOptions();
+    if (moptions->merge_operator == nullptr) {
+      return Status::InvalidArgument(
+          "Merge requires `ColumnFamilyOptions::merge_operator != nullptr`");
+    }
+    bool perform_merge = false;
+    assert(!concurrent_memtable_writes_ ||
+           moptions->max_successive_merges == 0);
+
+    // If we pass DB through and options.max_successive_merges is hit
+    // during recovery, Get() will be issued which will try to acquire
+    // DB mutex and cause deadlock, as DB mutex is already held.
+    // So we disable merge in recovery
+    if (moptions->max_successive_merges > 0 && db_ != nullptr &&
+        recovering_log_number_ == 0) {
+      assert(!concurrent_memtable_writes_);
+      LookupKey lkey(key, sequence_);
+
+      // Count the number of successive merges at the head
+      // of the key in the memtable
+      size_t num_merges = mem->CountSuccessiveMergeEntries(lkey);
+
+      if (num_merges >= moptions->max_successive_merges) {
+        perform_merge = true;
+      }
+    }
+
+    if (perform_merge) {
+      // 1) Get the existing value
+      std::string get_value;
+
+      // Pass in the sequence number so that we also include previous merge
+      // operations in the same batch.
+      SnapshotImpl read_from_snapshot;
+      read_from_snapshot.number_ = sequence_;
+      // TODO: plumb Env::IOActivity
+      ReadOptions read_options;
+      read_options.snapshot = &read_from_snapshot;
+
+      auto cf_handle = cf_mems_->GetColumnFamilyHandle();
+      if (cf_handle == nullptr) {
+        cf_handle = db_->DefaultColumnFamily();
+      }
+      Status get_status = db_->Get(read_options, cf_handle, key, &get_value);
+      if (!get_status.ok()) {
+        // Failed to read a key we know exists. Store the delta in memtable.
+        perform_merge = false;
+      } else {
+        Slice get_value_slice = Slice(get_value);
+
+        // 2) Apply this merge
+        auto merge_operator = moptions->merge_operator;
+        assert(merge_operator);
+
+        std::string new_value;
+        // `op_failure_scope` (an output parameter) is not provided (set to
+        // nullptr) since a failure must be propagated regardless of its value.
+        Status merge_status = MergeHelper::TimedFullMerge(
+            merge_operator, key, &get_value_slice, {value}, &new_value,
+            moptions->info_log, moptions->statistics,
+            SystemClock::Default().get(), /* result_operand */ nullptr,
+            /* update_num_ops_stats */ false,
+            /* op_failure_scope */ nullptr);
+
+        if (!merge_status.ok()) {
+          // Failed to merge!
+          // Store the delta in memtable
+          perform_merge = false;
+        } else {
+          // 3) Add value to memtable
+          assert(!concurrent_memtable_writes_);
+          if (kv_prot_info != nullptr) {
+            auto merged_kv_prot_info =
+                kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
+            merged_kv_prot_info.UpdateV(value, new_value);
+            merged_kv_prot_info.UpdateO(kTypeMerge, kTypeValue);
+            ret_status = mem->Add(sequence_, kTypeValue, key, new_value,
+                                  &merged_kv_prot_info);
+          } else {
+            ret_status = mem->Add(sequence_, kTypeValue, key, new_value,
+                                  nullptr /* kv_prot_info */);
+          }
+        }
+      }
+    }
+
+    if (!perform_merge) {
+      assert(ret_status.ok());
+      // Add merge operand to memtable
+      if (kv_prot_info != nullptr) {
+        auto mem_kv_prot_info =
+            kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
+        ret_status =
+            mem->Add(sequence_, kTypeMerge, key, value, &mem_kv_prot_info,
+                     concurrent_memtable_writes_, get_post_process_info(mem));
+      } else {
+        ret_status = mem->Add(
+            sequence_, kTypeMerge, key, value, nullptr /* kv_prot_info */,
+            concurrent_memtable_writes_, get_post_process_info(mem));
+      }
+    }
+
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      assert(seq_per_batch_);
+      const bool kBatchBoundary = true;
+      MaybeAdvanceSeq(kBatchBoundary);
+    } else if (ret_status.ok()) {
+      MaybeAdvanceSeq();
+      CheckMemtableFull();
+    }
+    // optimize for non-recovery mode
+    // If `ret_status` is `TryAgain` then the next (successful) try will add
+    // the key to the rebuilding transaction object. If `ret_status` is
+    // another non-OK `Status`, then the `rebuilding_trx_` will be thrown
+    // away. So we only need to add to it when `ret_status.ok()`.
+    if (UNLIKELY(ret_status.ok() && rebuilding_trx_ != nullptr)) {
+      assert(!write_after_commit_);
+      // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
+      ret_status = WriteBatchInternal::Merge(rebuilding_trx_, column_family_id,
+                                             key, value);
+    }
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
+    return ret_status;
+  }
+
+  Status PutBlobIndexCF(uint32_t column_family_id, const Slice& key,
+                        const Slice& value) override {
+    const auto* kv_prot_info = NextProtectionInfo();
+    Status ret_status;
+    if (kv_prot_info != nullptr) {
+      // Memtable needs seqno, doesn't need CF ID
+      auto mem_kv_prot_info =
+          kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
+      // Same as PutCF except for value type.
+      ret_status = PutCFImpl(column_family_id, key, value, kTypeBlobIndex,
+                             &mem_kv_prot_info);
+    } else {
+      ret_status = PutCFImpl(column_family_id, key, value, kTypeBlobIndex,
+                             nullptr /* kv_prot_info */);
+    }
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
+    return ret_status;
+  }
+
+  void CheckMemtableFull() {
+    if (flush_scheduler_ != nullptr) {
+      auto* cfd = cf_mems_->current();
+      assert(cfd != nullptr);
+      if (cfd->mem()->ShouldScheduleFlush() &&
+          cfd->mem()->MarkFlushScheduled()) {
+        // MarkFlushScheduled only returns true if we are the one that
+        // should take action, so no need to dedup further
+        flush_scheduler_->ScheduleWork(cfd);
+      }
+    }
+    // check if memtable_list size exceeds max_write_buffer_size_to_maintain
+    if (trim_history_scheduler_ != nullptr) {
+      auto* cfd = cf_mems_->current();
+
+      assert(cfd);
+      assert(cfd->ioptions());
+
+      const size_t size_to_maintain = static_cast<size_t>(
+          cfd->ioptions()->max_write_buffer_size_to_maintain);
+
+      if (size_to_maintain > 0) {
+        MemTableList* const imm = cfd->imm();
+        assert(imm);
+
+        if (imm->HasHistory()) {
+          const MemTable* const mem = cfd->mem();
+          assert(mem);
+
+          if (mem->MemoryAllocatedBytes() +
+                      imm->MemoryAllocatedBytesExcludingLast() >=
+                  size_to_maintain &&
+              imm->MarkTrimHistoryNeeded()) {
+            trim_history_scheduler_->ScheduleWork(cfd);
+          }
+        }
+      }
+    }
+  }
+
+  // The write batch handler calls MarkBeginPrepare with unprepare set to true
+  // if it encounters the kTypeBeginUnprepareXID marker.
+  Status MarkBeginPrepare(bool unprepare) override {
+    assert(rebuilding_trx_ == nullptr);
+    assert(db_);
+
+    if (recovering_log_number_ != 0) {
+      db_->mutex()->AssertHeld();
+      // during recovery we rebuild a hollow transaction
+      // from all encountered prepare sections of the wal
+      if (db_->allow_2pc() == false) {
+        return Status::NotSupported(
+            "WAL contains prepared transactions. Open with "
+            "TransactionDB::Open().");
+      }
+
+      // we are now iterating through a prepared section
+      rebuilding_trx_ = new WriteBatch();
+      rebuilding_trx_seq_ = sequence_;
+      // Verify that we have matching MarkBeginPrepare/MarkEndPrepare markers.
+      // unprepared_batch_ should be false because it is false by default, and
+      // gets reset to false in MarkEndPrepare.
+      assert(!unprepared_batch_);
+      unprepared_batch_ = unprepare;
+
+      if (has_valid_writes_ != nullptr) {
+        *has_valid_writes_ = true;
+      }
+    }
+
+    return Status::OK();
+  }
+
+  Status MarkEndPrepare(const Slice& name) override {
+    assert(db_);
+    assert((rebuilding_trx_ != nullptr) == (recovering_log_number_ != 0));
+
+    if (recovering_log_number_ != 0) {
+      db_->mutex()->AssertHeld();
+      assert(db_->allow_2pc());
+      size_t batch_cnt =
+          write_after_commit_
+              ? 0  // 0 will disable further checks
+              : static_cast<size_t>(sequence_ - rebuilding_trx_seq_ + 1);
+      db_->InsertRecoveredTransaction(recovering_log_number_, name.ToString(),
+                                      rebuilding_trx_, rebuilding_trx_seq_,
+                                      batch_cnt, unprepared_batch_);
+      unprepared_batch_ = false;
+      rebuilding_trx_ = nullptr;
+    } else {
+      assert(rebuilding_trx_ == nullptr);
+    }
+    const bool batch_boundry = true;
+    MaybeAdvanceSeq(batch_boundry);
+
+    return Status::OK();
+  }
+
+  Status MarkNoop(bool empty_batch) override {
+    if (recovering_log_number_ != 0) {
+      db_->mutex()->AssertHeld();
+    }
+    // A hack in pessimistic transaction could result into a noop at the start
+    // of the write batch, that should be ignored.
+    if (!empty_batch) {
+      // In the absence of Prepare markers, a kTypeNoop tag indicates the end of
+      // a batch. This happens when write batch commits skipping the prepare
+      // phase.
+      const bool batch_boundry = true;
+      MaybeAdvanceSeq(batch_boundry);
+    }
+    return Status::OK();
+  }
+
+  Status MarkCommit(const Slice& name) override {
+    assert(db_);
+
+    Status s;
+
+    if (recovering_log_number_ != 0) {
+      // We must hold db mutex in recovery.
+      db_->mutex()->AssertHeld();
+      // in recovery when we encounter a commit marker
+      // we lookup this transaction in our set of rebuilt transactions
+      // and commit.
+      auto trx = db_->GetRecoveredTransaction(name.ToString());
+
+      // the log containing the prepared section may have
+      // been released in the last incarnation because the
+      // data was flushed to L0
+      if (trx != nullptr) {
+        // at this point individual CF lognumbers will prevent
+        // duplicate re-insertion of values.
+        assert(log_number_ref_ == 0);
+        if (write_after_commit_) {
+          // write_after_commit_ can only have one batch in trx.
+          assert(trx->batches_.size() == 1);
+          const auto& batch_info = trx->batches_.begin()->second;
+          // all inserts must reference this trx log number
+          log_number_ref_ = batch_info.log_number_;
+          ResetProtectionInfo();
+          s = batch_info.batch_->Iterate(this);
+          log_number_ref_ = 0;
+        }
+        // else the values are already inserted before the commit
+
+        if (s.ok()) {
+          db_->DeleteRecoveredTransaction(name.ToString());
+        }
+        if (has_valid_writes_ != nullptr) {
+          *has_valid_writes_ = true;
+        }
+      }
+    } else {
+      // When writes are not delayed until commit, there is no disconnect
+      // between a memtable write and the WAL that supports it. So the commit
+      // need not reference any log as the only log to which it depends.
+      assert(!write_after_commit_ || log_number_ref_ > 0);
+    }
+    const bool batch_boundry = true;
+    MaybeAdvanceSeq(batch_boundry);
+
+    if (UNLIKELY(s.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
+
+    return s;
+  }
+
+  Status MarkCommitWithTimestamp(const Slice& name,
+                                 const Slice& commit_ts) override {
+    assert(db_);
+
+    Status s;
+
+    if (recovering_log_number_ != 0) {
+      // In recovery, db mutex must be held.
+      db_->mutex()->AssertHeld();
+      // in recovery when we encounter a commit marker
+      // we lookup this transaction in our set of rebuilt transactions
+      // and commit.
+      auto trx = db_->GetRecoveredTransaction(name.ToString());
+      // the log containing the prepared section may have
+      // been released in the last incarnation because the
+      // data was flushed to L0
+      if (trx) {
+        // at this point individual CF lognumbers will prevent
+        // duplicate re-insertion of values.
+        assert(0 == log_number_ref_);
+        if (write_after_commit_) {
+          // write_after_commit_ can only have one batch in trx.
+          assert(trx->batches_.size() == 1);
+          const auto& batch_info = trx->batches_.begin()->second;
+          // all inserts must reference this trx log number
+          log_number_ref_ = batch_info.log_number_;
+
+          s = batch_info.batch_->UpdateTimestamps(
+              commit_ts, [this](uint32_t cf) {
+                assert(db_);
+                VersionSet* const vset = db_->GetVersionSet();
+                assert(vset);
+                ColumnFamilySet* const cf_set = vset->GetColumnFamilySet();
+                assert(cf_set);
+                ColumnFamilyData* cfd = cf_set->GetColumnFamily(cf);
+                assert(cfd);
+                const auto* const ucmp = cfd->user_comparator();
+                assert(ucmp);
+                return ucmp->timestamp_size();
+              });
+          if (s.ok()) {
+            ResetProtectionInfo();
+            s = batch_info.batch_->Iterate(this);
+            log_number_ref_ = 0;
+          }
+        }
+        // else the values are already inserted before the commit
+
+        if (s.ok()) {
+          db_->DeleteRecoveredTransaction(name.ToString());
+        }
+        if (has_valid_writes_) {
+          *has_valid_writes_ = true;
+        }
+      }
+    } else {
+      // When writes are not delayed until commit, there is no connection
+      // between a memtable write and the WAL that supports it. So the commit
+      // need not reference any log as the only log to which it depends.
+      assert(!write_after_commit_ || log_number_ref_ > 0);
+    }
+    constexpr bool batch_boundary = true;
+    MaybeAdvanceSeq(batch_boundary);
+
+    if (UNLIKELY(s.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
+
+    return s;
+  }
+
+  Status MarkRollback(const Slice& name) override {
+    assert(db_);
+
+    if (recovering_log_number_ != 0) {
+      auto trx = db_->GetRecoveredTransaction(name.ToString());
+
+      // the log containing the transactions prep section
+      // may have been released in the previous incarnation
+      // because we knew it had been rolled back
+      if (trx != nullptr) {
+        db_->DeleteRecoveredTransaction(name.ToString());
+      }
+    } else {
+      // in non recovery we simply ignore this tag
+    }
+
+    const bool batch_boundry = true;
+    MaybeAdvanceSeq(batch_boundry);
+
+    return Status::OK();
+  }
+
+ private:
+  MemTablePostProcessInfo* get_post_process_info(MemTable* mem) {
+    if (!concurrent_memtable_writes_) {
+      // No need to batch counters locally if we don't use concurrent mode.
+      return nullptr;
+    }
+    return &GetPostMap()[mem];
+  }
+};
+
+}  // anonymous namespace
+
+// This function can only be called in these conditions:
+// 1) During Recovery()
+// 2) During Write(), in a single-threaded write thread
+// 3) During Write(), in a concurrent context where memtables has been cloned
+// The reason is that it calls memtables->Seek(), which has a stateful cache
+Status WriteBatchInternal::InsertInto(
+    WriteThread::WriteGroup& write_group, SequenceNumber sequence,
+    ColumnFamilyMemTables* memtables, FlushScheduler* flush_scheduler,
+    TrimHistoryScheduler* trim_history_scheduler,
+    bool ignore_missing_column_families, uint64_t recovery_log_number, DB* db,
+    bool concurrent_memtable_writes, bool seq_per_batch, bool batch_per_txn) {
+  MemTableInserter inserter(
+      sequence, memtables, flush_scheduler, trim_history_scheduler,
+      ignore_missing_column_families, recovery_log_number, db,
+      concurrent_memtable_writes, nullptr /* prot_info */,
+      nullptr /*has_valid_writes*/, seq_per_batch, batch_per_txn);
+  for (auto w : write_group) {
+    if (w->CallbackFailed()) {
+      continue;
+    }
+    w->sequence = inserter.sequence();
+    if (!w->ShouldWriteToMemtable()) {
+      // In seq_per_batch_ mode this advances the seq by one.
+      inserter.MaybeAdvanceSeq(true);
+      continue;
+    }
+    SetSequence(w->batch, inserter.sequence());
+    inserter.set_log_number_ref(w->log_ref);
+    inserter.set_prot_info(w->batch->prot_info_.get());
+    w->status = w->batch->Iterate(&inserter);
+    if (!w->status.ok()) {
+      return w->status;
+    }
+    assert(!seq_per_batch || w->batch_cnt != 0);
+    assert(!seq_per_batch || inserter.sequence() - w->sequence == w->batch_cnt);
+  }
+  return Status::OK();
+}
+
+Status WriteBatchInternal::InsertInto(
+    WriteThread::Writer* writer, SequenceNumber sequence,
+    ColumnFamilyMemTables* memtables, FlushScheduler* flush_scheduler,
+    TrimHistoryScheduler* trim_history_scheduler,
+    bool ignore_missing_column_families, uint64_t log_number, DB* db,
+    bool concurrent_memtable_writes, bool seq_per_batch, size_t batch_cnt,
+    bool batch_per_txn, bool hint_per_batch) {
+#ifdef NDEBUG
+  (void)batch_cnt;
+#endif
+  assert(writer->ShouldWriteToMemtable());
+  MemTableInserter inserter(sequence, memtables, flush_scheduler,
+                            trim_history_scheduler,
+                            ignore_missing_column_families, log_number, db,
+                            concurrent_memtable_writes, nullptr /* prot_info */,
+                            nullptr /*has_valid_writes*/, seq_per_batch,
+                            batch_per_txn, hint_per_batch);
+  SetSequence(writer->batch, sequence);
+  inserter.set_log_number_ref(writer->log_ref);
+  inserter.set_prot_info(writer->batch->prot_info_.get());
+  Status s = writer->batch->Iterate(&inserter);
+  assert(!seq_per_batch || batch_cnt != 0);
+  assert(!seq_per_batch || inserter.sequence() - sequence == batch_cnt);
+  if (concurrent_memtable_writes) {
+    inserter.PostProcess();
+  }
+  return s;
+}
+
+Status WriteBatchInternal::InsertInto(
+    const WriteBatch* batch, ColumnFamilyMemTables* memtables,
+    FlushScheduler* flush_scheduler,
+    TrimHistoryScheduler* trim_history_scheduler,
+    bool ignore_missing_column_families, uint64_t log_number, DB* db,
+    bool concurrent_memtable_writes, SequenceNumber* next_seq,
+    bool* has_valid_writes, bool seq_per_batch, bool batch_per_txn) {
+  MemTableInserter inserter(Sequence(batch), memtables, flush_scheduler,
+                            trim_history_scheduler,
+                            ignore_missing_column_families, log_number, db,
+                            concurrent_memtable_writes, batch->prot_info_.get(),
+                            has_valid_writes, seq_per_batch, batch_per_txn);
+  Status s = batch->Iterate(&inserter);
+  if (next_seq != nullptr) {
+    *next_seq = inserter.sequence();
+  }
+  if (concurrent_memtable_writes) {
+    inserter.PostProcess();
+  }
+  return s;
+}
+
+namespace {
+
+// This class updates protection info for a WriteBatch.
+class ProtectionInfoUpdater : public WriteBatch::Handler {
+ public:
+  explicit ProtectionInfoUpdater(WriteBatch::ProtectionInfo* prot_info)
+      : prot_info_(prot_info) {}
+
+  ~ProtectionInfoUpdater() override {}
+
+  Status PutCF(uint32_t cf, const Slice& key, const Slice& val) override {
+    return UpdateProtInfo(cf, key, val, kTypeValue);
+  }
+
+  Status PutEntityCF(uint32_t cf, const Slice& key,
+                     const Slice& entity) override {
+    return UpdateProtInfo(cf, key, entity, kTypeWideColumnEntity);
+  }
+
+  Status DeleteCF(uint32_t cf, const Slice& key) override {
+    return UpdateProtInfo(cf, key, "", kTypeDeletion);
+  }
+
+  Status SingleDeleteCF(uint32_t cf, const Slice& key) override {
+    return UpdateProtInfo(cf, key, "", kTypeSingleDeletion);
+  }
+
+  Status DeleteRangeCF(uint32_t cf, const Slice& begin_key,
+                       const Slice& end_key) override {
+    return UpdateProtInfo(cf, begin_key, end_key, kTypeRangeDeletion);
+  }
+
+  Status MergeCF(uint32_t cf, const Slice& key, const Slice& val) override {
+    return UpdateProtInfo(cf, key, val, kTypeMerge);
+  }
+
+  Status PutBlobIndexCF(uint32_t cf, const Slice& key,
+                        const Slice& val) override {
+    return UpdateProtInfo(cf, key, val, kTypeBlobIndex);
+  }
+
+  Status MarkBeginPrepare(bool /* unprepare */) override {
+    return Status::OK();
+  }
+
+  Status MarkEndPrepare(const Slice& /* xid */) override {
+    return Status::OK();
+  }
+
+  Status MarkCommit(const Slice& /* xid */) override { return Status::OK(); }
+
+  Status MarkCommitWithTimestamp(const Slice& /* xid */,
+                                 const Slice& /* ts */) override {
+    return Status::OK();
+  }
+
+  Status MarkRollback(const Slice& /* xid */) override { return Status::OK(); }
+
+  Status MarkNoop(bool /* empty_batch */) override { return Status::OK(); }
+
+ private:
+  Status UpdateProtInfo(uint32_t cf, const Slice& key, const Slice& val,
+                        const ValueType op_type) {
+    if (prot_info_) {
+      prot_info_->entries_.emplace_back(
+          ProtectionInfo64().ProtectKVO(key, val, op_type).ProtectC(cf));
+    }
+    return Status::OK();
+  }
+
+  // No copy or move.
+  ProtectionInfoUpdater(const ProtectionInfoUpdater&) = delete;
+  ProtectionInfoUpdater(ProtectionInfoUpdater&&) = delete;
+  ProtectionInfoUpdater& operator=(const ProtectionInfoUpdater&) = delete;
+  ProtectionInfoUpdater& operator=(ProtectionInfoUpdater&&) = delete;
+
+  WriteBatch::ProtectionInfo* const prot_info_ = nullptr;
+};
+
+}  // anonymous namespace
+
+Status WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) {
+  assert(contents.size() >= WriteBatchInternal::kHeader);
+  assert(b->prot_info_ == nullptr);
+
+  b->rep_.assign(contents.data(), contents.size());
+  b->content_flags_.store(ContentFlags::DEFERRED, std::memory_order_relaxed);
+  return Status::OK();
+}
+
+Status WriteBatchInternal::Append(WriteBatch* dst, const WriteBatch* src,
+                                  const bool wal_only) {
+  assert(dst->Count() == 0 ||
+         (dst->prot_info_ == nullptr) == (src->prot_info_ == nullptr));
+  if ((src->prot_info_ != nullptr &&
+       src->prot_info_->entries_.size() != src->Count()) ||
+      (dst->prot_info_ != nullptr &&
+       dst->prot_info_->entries_.size() != dst->Count())) {
+    return Status::Corruption(
+        "Write batch has inconsistent count and number of checksums");
+  }
+
+  size_t src_len;
+  int src_count;
+  uint32_t src_flags;
+
+  const SavePoint& batch_end = src->GetWalTerminationPoint();
+
+  if (wal_only && !batch_end.is_cleared()) {
+    src_len = batch_end.size - WriteBatchInternal::kHeader;
+    src_count = batch_end.count;
+    src_flags = batch_end.content_flags;
+  } else {
+    src_len = src->rep_.size() - WriteBatchInternal::kHeader;
+    src_count = Count(src);
+    src_flags = src->content_flags_.load(std::memory_order_relaxed);
+  }
+
+  if (src->prot_info_ != nullptr) {
+    if (dst->prot_info_ == nullptr) {
+      dst->prot_info_.reset(new WriteBatch::ProtectionInfo());
+    }
+    std::copy(src->prot_info_->entries_.begin(),
+              src->prot_info_->entries_.begin() + src_count,
+              std::back_inserter(dst->prot_info_->entries_));
+  } else if (dst->prot_info_ != nullptr) {
+    // dst has empty prot_info->entries
+    // In this special case, we allow write batch without prot_info to
+    // be appende to write batch with empty prot_info
+    dst->prot_info_ = nullptr;
+  }
+  SetCount(dst, Count(dst) + src_count);
+  assert(src->rep_.size() >= WriteBatchInternal::kHeader);
+  dst->rep_.append(src->rep_.data() + WriteBatchInternal::kHeader, src_len);
+  dst->content_flags_.store(
+      dst->content_flags_.load(std::memory_order_relaxed) | src_flags,
+      std::memory_order_relaxed);
+  return Status::OK();
+}
+
+size_t WriteBatchInternal::AppendedByteSize(size_t leftByteSize,
+                                            size_t rightByteSize) {
+  if (leftByteSize == 0 || rightByteSize == 0) {
+    return leftByteSize + rightByteSize;
+  } else {
+    return leftByteSize + rightByteSize - WriteBatchInternal::kHeader;
+  }
+}
+
+Status WriteBatchInternal::UpdateProtectionInfo(WriteBatch* wb,
+                                                size_t bytes_per_key,
+                                                uint64_t* checksum) {
+  if (bytes_per_key == 0) {
+    if (wb->prot_info_ != nullptr) {
+      wb->prot_info_.reset();
+      return Status::OK();
+    } else {
+      // Already not protected.
+      return Status::OK();
+    }
+  } else if (bytes_per_key == 8) {
+    if (wb->prot_info_ == nullptr) {
+      wb->prot_info_.reset(new WriteBatch::ProtectionInfo());
+      ProtectionInfoUpdater prot_info_updater(wb->prot_info_.get());
+      Status s = wb->Iterate(&prot_info_updater);
+      if (s.ok() && checksum != nullptr) {
+        uint64_t expected_hash = XXH3_64bits(wb->rep_.data(), wb->rep_.size());
+        if (expected_hash != *checksum) {
+          return Status::Corruption("Write batch content corrupted.");
+        }
+      }
+      return s;
+    } else {
+      // Already protected.
+      return Status::OK();
+    }
+  }
+  return Status::NotSupported(
+      "WriteBatch protection info must be zero or eight bytes/key");
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_batch_base.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_batch_base.cc
new file mode 100644
index 0000000000..e4c0e74bd6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_batch_base.cc
@@ -0,0 +1,94 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/write_batch_base.h"
+
+#include <string>
+
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Simple implementation of SlicePart variants of Put().  Child classes
+// can override these method with more performant solutions if they choose.
+Status WriteBatchBase::Put(ColumnFamilyHandle* column_family,
+                           const SliceParts& key, const SliceParts& value) {
+  std::string key_buf, value_buf;
+  Slice key_slice(key, &key_buf);
+  Slice value_slice(value, &value_buf);
+
+  return Put(column_family, key_slice, value_slice);
+}
+
+Status WriteBatchBase::Put(const SliceParts& key, const SliceParts& value) {
+  std::string key_buf, value_buf;
+  Slice key_slice(key, &key_buf);
+  Slice value_slice(value, &value_buf);
+
+  return Put(key_slice, value_slice);
+}
+
+Status WriteBatchBase::Delete(ColumnFamilyHandle* column_family,
+                              const SliceParts& key) {
+  std::string key_buf;
+  Slice key_slice(key, &key_buf);
+  return Delete(column_family, key_slice);
+}
+
+Status WriteBatchBase::Delete(const SliceParts& key) {
+  std::string key_buf;
+  Slice key_slice(key, &key_buf);
+  return Delete(key_slice);
+}
+
+Status WriteBatchBase::SingleDelete(ColumnFamilyHandle* column_family,
+                                    const SliceParts& key) {
+  std::string key_buf;
+  Slice key_slice(key, &key_buf);
+  return SingleDelete(column_family, key_slice);
+}
+
+Status WriteBatchBase::SingleDelete(const SliceParts& key) {
+  std::string key_buf;
+  Slice key_slice(key, &key_buf);
+  return SingleDelete(key_slice);
+}
+
+Status WriteBatchBase::DeleteRange(ColumnFamilyHandle* column_family,
+                                   const SliceParts& begin_key,
+                                   const SliceParts& end_key) {
+  std::string begin_key_buf, end_key_buf;
+  Slice begin_key_slice(begin_key, &begin_key_buf);
+  Slice end_key_slice(end_key, &end_key_buf);
+  return DeleteRange(column_family, begin_key_slice, end_key_slice);
+}
+
+Status WriteBatchBase::DeleteRange(const SliceParts& begin_key,
+                                   const SliceParts& end_key) {
+  std::string begin_key_buf, end_key_buf;
+  Slice begin_key_slice(begin_key, &begin_key_buf);
+  Slice end_key_slice(end_key, &end_key_buf);
+  return DeleteRange(begin_key_slice, end_key_slice);
+}
+
+Status WriteBatchBase::Merge(ColumnFamilyHandle* column_family,
+                             const SliceParts& key, const SliceParts& value) {
+  std::string key_buf, value_buf;
+  Slice key_slice(key, &key_buf);
+  Slice value_slice(value, &value_buf);
+
+  return Merge(column_family, key_slice, value_slice);
+}
+
+Status WriteBatchBase::Merge(const SliceParts& key, const SliceParts& value) {
+  std::string key_buf, value_buf;
+  Slice key_slice(key, &key_buf);
+  Slice value_slice(value, &value_buf);
+
+  return Merge(key_slice, value_slice);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_batch_internal.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_batch_internal.h
new file mode 100644
index 0000000000..1be0bd1400
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_batch_internal.h
@@ -0,0 +1,401 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <array>
+#include <vector>
+
+#include "db/flush_scheduler.h"
+#include "db/kv_checksum.h"
+#include "db/trim_history_scheduler.h"
+#include "db/write_thread.h"
+#include "rocksdb/db.h"
+#include "rocksdb/options.h"
+#include "rocksdb/types.h"
+#include "rocksdb/write_batch.h"
+#include "util/autovector.h"
+#include "util/cast_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class MemTable;
+class FlushScheduler;
+class ColumnFamilyData;
+
+class ColumnFamilyMemTables {
+ public:
+  virtual ~ColumnFamilyMemTables() {}
+  virtual bool Seek(uint32_t column_family_id) = 0;
+  // returns true if the update to memtable should be ignored
+  // (useful when recovering from log whose updates have already
+  // been processed)
+  virtual uint64_t GetLogNumber() const = 0;
+  virtual MemTable* GetMemTable() const = 0;
+  virtual ColumnFamilyHandle* GetColumnFamilyHandle() = 0;
+  virtual ColumnFamilyData* current() { return nullptr; }
+};
+
+class ColumnFamilyMemTablesDefault : public ColumnFamilyMemTables {
+ public:
+  explicit ColumnFamilyMemTablesDefault(MemTable* mem)
+      : ok_(false), mem_(mem) {}
+
+  bool Seek(uint32_t column_family_id) override {
+    ok_ = (column_family_id == 0);
+    return ok_;
+  }
+
+  uint64_t GetLogNumber() const override { return 0; }
+
+  MemTable* GetMemTable() const override {
+    assert(ok_);
+    return mem_;
+  }
+
+  ColumnFamilyHandle* GetColumnFamilyHandle() override { return nullptr; }
+
+ private:
+  bool ok_;
+  MemTable* mem_;
+};
+
+struct WriteBatch::ProtectionInfo {
+  // `WriteBatch` usually doesn't contain a huge number of keys so protecting
+  // with a fixed, non-configurable eight bytes per key may work well enough.
+  autovector<ProtectionInfoKVOC64> entries_;
+
+  size_t GetBytesPerKey() const { return 8; }
+};
+
+// WriteBatchInternal provides static methods for manipulating a
+// WriteBatch that we don't want in the public WriteBatch interface.
+class WriteBatchInternal {
+ public:
+  // WriteBatch header has an 8-byte sequence number followed by a 4-byte count.
+  static constexpr size_t kHeader = 12;
+
+  // WriteBatch methods with column_family_id instead of ColumnFamilyHandle*
+  static Status Put(WriteBatch* batch, uint32_t column_family_id,
+                    const Slice& key, const Slice& value);
+
+  static Status Put(WriteBatch* batch, uint32_t column_family_id,
+                    const SliceParts& key, const SliceParts& value);
+
+  static Status PutEntity(WriteBatch* batch, uint32_t column_family_id,
+                          const Slice& key, const WideColumns& columns);
+
+  static Status Delete(WriteBatch* batch, uint32_t column_family_id,
+                       const SliceParts& key);
+
+  static Status Delete(WriteBatch* batch, uint32_t column_family_id,
+                       const Slice& key);
+
+  static Status SingleDelete(WriteBatch* batch, uint32_t column_family_id,
+                             const SliceParts& key);
+
+  static Status SingleDelete(WriteBatch* batch, uint32_t column_family_id,
+                             const Slice& key);
+
+  static Status DeleteRange(WriteBatch* b, uint32_t column_family_id,
+                            const Slice& begin_key, const Slice& end_key);
+
+  static Status DeleteRange(WriteBatch* b, uint32_t column_family_id,
+                            const SliceParts& begin_key,
+                            const SliceParts& end_key);
+
+  static Status Merge(WriteBatch* batch, uint32_t column_family_id,
+                      const Slice& key, const Slice& value);
+
+  static Status Merge(WriteBatch* batch, uint32_t column_family_id,
+                      const SliceParts& key, const SliceParts& value);
+
+  static Status PutBlobIndex(WriteBatch* batch, uint32_t column_family_id,
+                             const Slice& key, const Slice& value);
+
+  static Status MarkEndPrepare(WriteBatch* batch, const Slice& xid,
+                               const bool write_after_commit = true,
+                               const bool unprepared_batch = false);
+
+  static Status MarkRollback(WriteBatch* batch, const Slice& xid);
+
+  static Status MarkCommit(WriteBatch* batch, const Slice& xid);
+
+  static Status MarkCommitWithTimestamp(WriteBatch* batch, const Slice& xid,
+                                        const Slice& commit_ts);
+
+  static Status InsertNoop(WriteBatch* batch);
+
+  // Return the number of entries in the batch.
+  static uint32_t Count(const WriteBatch* batch);
+
+  // Set the count for the number of entries in the batch.
+  static void SetCount(WriteBatch* batch, uint32_t n);
+
+  // Return the sequence number for the start of this batch.
+  static SequenceNumber Sequence(const WriteBatch* batch);
+
+  // Store the specified number as the sequence number for the start of
+  // this batch.
+  static void SetSequence(WriteBatch* batch, SequenceNumber seq);
+
+  // Returns the offset of the first entry in the batch.
+  // This offset is only valid if the batch is not empty.
+  static size_t GetFirstOffset(WriteBatch* batch);
+
+  static Slice Contents(const WriteBatch* batch) { return Slice(batch->rep_); }
+
+  static size_t ByteSize(const WriteBatch* batch) { return batch->rep_.size(); }
+
+  static Status SetContents(WriteBatch* batch, const Slice& contents);
+
+  static Status CheckSlicePartsLength(const SliceParts& key,
+                                      const SliceParts& value);
+
+  // Inserts batches[i] into memtable, for i in 0..num_batches-1 inclusive.
+  //
+  // If ignore_missing_column_families == true. WriteBatch
+  // referencing non-existing column family will be ignored.
+  // If ignore_missing_column_families == false, processing of the
+  // batches will be stopped if a reference is found to a non-existing
+  // column family and InvalidArgument() will be returned.  The writes
+  // in batches may be only partially applied at that point.
+  //
+  // If log_number is non-zero, the memtable will be updated only if
+  // memtables->GetLogNumber() >= log_number.
+  //
+  // If flush_scheduler is non-null, it will be invoked if the memtable
+  // should be flushed.
+  //
+  // Under concurrent use, the caller is responsible for making sure that
+  // the memtables object itself is thread-local.
+  static Status InsertInto(
+      WriteThread::WriteGroup& write_group, SequenceNumber sequence,
+      ColumnFamilyMemTables* memtables, FlushScheduler* flush_scheduler,
+      TrimHistoryScheduler* trim_history_scheduler,
+      bool ignore_missing_column_families = false, uint64_t log_number = 0,
+      DB* db = nullptr, bool concurrent_memtable_writes = false,
+      bool seq_per_batch = false, bool batch_per_txn = true);
+
+  // Convenience form of InsertInto when you have only one batch
+  // next_seq returns the seq after last sequence number used in MemTable insert
+  static Status InsertInto(
+      const WriteBatch* batch, ColumnFamilyMemTables* memtables,
+      FlushScheduler* flush_scheduler,
+      TrimHistoryScheduler* trim_history_scheduler,
+      bool ignore_missing_column_families = false, uint64_t log_number = 0,
+      DB* db = nullptr, bool concurrent_memtable_writes = false,
+      SequenceNumber* next_seq = nullptr, bool* has_valid_writes = nullptr,
+      bool seq_per_batch = false, bool batch_per_txn = true);
+
+  static Status InsertInto(WriteThread::Writer* writer, SequenceNumber sequence,
+                           ColumnFamilyMemTables* memtables,
+                           FlushScheduler* flush_scheduler,
+                           TrimHistoryScheduler* trim_history_scheduler,
+                           bool ignore_missing_column_families = false,
+                           uint64_t log_number = 0, DB* db = nullptr,
+                           bool concurrent_memtable_writes = false,
+                           bool seq_per_batch = false, size_t batch_cnt = 0,
+                           bool batch_per_txn = true,
+                           bool hint_per_batch = false);
+
+  // Appends src write batch to dst write batch and updates count in dst
+  // write batch. Returns OK if the append is successful. Checks number of
+  // checksum against count in dst and src write batches, and returns Corruption
+  // if the count is inconsistent.
+  static Status Append(WriteBatch* dst, const WriteBatch* src,
+                       const bool WAL_only = false);
+
+  // Returns the byte size of appending a WriteBatch with ByteSize
+  // leftByteSize and a WriteBatch with ByteSize rightByteSize
+  static size_t AppendedByteSize(size_t leftByteSize, size_t rightByteSize);
+
+  // Iterate over [begin, end) range of a write batch
+  static Status Iterate(const WriteBatch* wb, WriteBatch::Handler* handler,
+                        size_t begin, size_t end);
+
+  // This write batch includes the latest state that should be persisted. Such
+  // state meant to be used only during recovery.
+  static void SetAsLatestPersistentState(WriteBatch* b);
+  static bool IsLatestPersistentState(const WriteBatch* b);
+
+  static std::tuple<Status, uint32_t, size_t> GetColumnFamilyIdAndTimestampSize(
+      WriteBatch* b, ColumnFamilyHandle* column_family);
+
+  static bool TimestampsUpdateNeeded(const WriteBatch& wb) {
+    return wb.needs_in_place_update_ts_;
+  }
+
+  static bool HasKeyWithTimestamp(const WriteBatch& wb) {
+    return wb.has_key_with_ts_;
+  }
+
+  // Update per-key value protection information on this write batch.
+  // If checksum is provided, the batch content is verfied against the checksum.
+  static Status UpdateProtectionInfo(WriteBatch* wb, size_t bytes_per_key,
+                                     uint64_t* checksum = nullptr);
+};
+
+// LocalSavePoint is similar to a scope guard
+class LocalSavePoint {
+ public:
+  explicit LocalSavePoint(WriteBatch* batch)
+      : batch_(batch),
+        savepoint_(batch->GetDataSize(), batch->Count(),
+                   batch->content_flags_.load(std::memory_order_relaxed))
+#ifndef NDEBUG
+        ,
+        committed_(false)
+#endif
+  {
+  }
+
+#ifndef NDEBUG
+  ~LocalSavePoint() { assert(committed_); }
+#endif
+  Status commit() {
+#ifndef NDEBUG
+    committed_ = true;
+#endif
+    if (batch_->max_bytes_ && batch_->rep_.size() > batch_->max_bytes_) {
+      batch_->rep_.resize(savepoint_.size);
+      WriteBatchInternal::SetCount(batch_, savepoint_.count);
+      if (batch_->prot_info_ != nullptr) {
+        batch_->prot_info_->entries_.resize(savepoint_.count);
+      }
+      batch_->content_flags_.store(savepoint_.content_flags,
+                                   std::memory_order_relaxed);
+      return Status::MemoryLimit();
+    }
+    return Status::OK();
+  }
+
+ private:
+  WriteBatch* batch_;
+  SavePoint savepoint_;
+#ifndef NDEBUG
+  bool committed_;
+#endif
+};
+
+template <typename TimestampSizeFuncType>
+class TimestampUpdater : public WriteBatch::Handler {
+ public:
+  explicit TimestampUpdater(WriteBatch::ProtectionInfo* prot_info,
+                            TimestampSizeFuncType&& ts_sz_func, const Slice& ts)
+      : prot_info_(prot_info),
+        ts_sz_func_(std::move(ts_sz_func)),
+        timestamp_(ts) {
+    assert(!timestamp_.empty());
+  }
+
+  ~TimestampUpdater() override {}
+
+  Status PutCF(uint32_t cf, const Slice& key, const Slice&) override {
+    return UpdateTimestamp(cf, key);
+  }
+
+  Status DeleteCF(uint32_t cf, const Slice& key) override {
+    return UpdateTimestamp(cf, key);
+  }
+
+  Status SingleDeleteCF(uint32_t cf, const Slice& key) override {
+    return UpdateTimestamp(cf, key);
+  }
+
+  Status DeleteRangeCF(uint32_t cf, const Slice& begin_key,
+                       const Slice& end_key) override {
+    Status s = UpdateTimestamp(cf, begin_key, true /* is_key */);
+    if (s.ok()) {
+      s = UpdateTimestamp(cf, end_key, false /* is_key */);
+    }
+    return s;
+  }
+
+  Status MergeCF(uint32_t cf, const Slice& key, const Slice&) override {
+    return UpdateTimestamp(cf, key);
+  }
+
+  Status PutBlobIndexCF(uint32_t cf, const Slice& key, const Slice&) override {
+    return UpdateTimestamp(cf, key);
+  }
+
+  Status MarkBeginPrepare(bool) override { return Status::OK(); }
+
+  Status MarkEndPrepare(const Slice&) override { return Status::OK(); }
+
+  Status MarkCommit(const Slice&) override { return Status::OK(); }
+
+  Status MarkCommitWithTimestamp(const Slice&, const Slice&) override {
+    return Status::OK();
+  }
+
+  Status MarkRollback(const Slice&) override { return Status::OK(); }
+
+  Status MarkNoop(bool /*empty_batch*/) override { return Status::OK(); }
+
+ private:
+  // @param is_key specifies whether the update is for key or value.
+  Status UpdateTimestamp(uint32_t cf, const Slice& buf, bool is_key = true) {
+    Status s = UpdateTimestampImpl(cf, buf, idx_, is_key);
+    ++idx_;
+    return s;
+  }
+
+  Status UpdateTimestampImpl(uint32_t cf, const Slice& buf, size_t /*idx*/,
+                             bool is_key) {
+    if (timestamp_.empty()) {
+      return Status::InvalidArgument("Timestamp is empty");
+    }
+    size_t cf_ts_sz = ts_sz_func_(cf);
+    if (0 == cf_ts_sz) {
+      // Skip this column family.
+      return Status::OK();
+    } else if (std::numeric_limits<size_t>::max() == cf_ts_sz) {
+      // Column family timestamp info not found.
+      return Status::NotFound();
+    } else if (cf_ts_sz != timestamp_.size()) {
+      return Status::InvalidArgument("timestamp size mismatch");
+    }
+    UpdateProtectionInformationIfNeeded(buf, timestamp_, is_key);
+
+    char* ptr = const_cast<char*>(buf.data() + buf.size() - cf_ts_sz);
+    assert(ptr);
+    memcpy(ptr, timestamp_.data(), timestamp_.size());
+    return Status::OK();
+  }
+
+  void UpdateProtectionInformationIfNeeded(const Slice& buf, const Slice& ts,
+                                           bool is_key) {
+    if (prot_info_ != nullptr) {
+      const size_t ts_sz = ts.size();
+      SliceParts old(&buf, 1);
+      Slice old_no_ts(buf.data(), buf.size() - ts_sz);
+      std::array<Slice, 2> new_key_cmpts{{old_no_ts, ts}};
+      SliceParts new_parts(new_key_cmpts.data(), 2);
+      if (is_key) {
+        prot_info_->entries_[idx_].UpdateK(old, new_parts);
+      } else {
+        prot_info_->entries_[idx_].UpdateV(old, new_parts);
+      }
+    }
+  }
+
+  // No copy or move.
+  TimestampUpdater(const TimestampUpdater&) = delete;
+  TimestampUpdater(TimestampUpdater&&) = delete;
+  TimestampUpdater& operator=(const TimestampUpdater&) = delete;
+  TimestampUpdater& operator=(TimestampUpdater&&) = delete;
+
+  WriteBatch::ProtectionInfo* const prot_info_ = nullptr;
+  const TimestampSizeFuncType ts_sz_func_{};
+  const Slice timestamp_;
+  size_t idx_ = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_callback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_callback.h
new file mode 100644
index 0000000000..106d020417
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_callback.h
@@ -0,0 +1,27 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class DB;
+
+class WriteCallback {
+ public:
+  virtual ~WriteCallback() {}
+
+  // Will be called while on the write thread before the write executes.  If
+  // this function returns a non-OK status, the write will be aborted and this
+  // status will be returned to the caller of DB::Write().
+  virtual Status Callback(DB* db) = 0;
+
+  // return true if writes with this callback can be batched with other writes
+  virtual bool AllowWriteBatching() = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_controller.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_controller.cc
new file mode 100644
index 0000000000..c5f7443752
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_controller.cc
@@ -0,0 +1,121 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/write_controller.h"
+
+#include <algorithm>
+#include <atomic>
+#include <cassert>
+#include <ratio>
+
+#include "rocksdb/system_clock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+std::unique_ptr<WriteControllerToken> WriteController::GetStopToken() {
+  ++total_stopped_;
+  return std::unique_ptr<WriteControllerToken>(new StopWriteToken(this));
+}
+
+std::unique_ptr<WriteControllerToken> WriteController::GetDelayToken(
+    uint64_t write_rate) {
+  if (0 == total_delayed_++) {
+    // Starting delay, so reset counters.
+    next_refill_time_ = 0;
+    credit_in_bytes_ = 0;
+  }
+  // NOTE: for simplicity, any current credit_in_bytes_ or "debt" in
+  // next_refill_time_ will be based on an old rate. This rate will apply
+  // for subsequent additional debts and for the next refill.
+  set_delayed_write_rate(write_rate);
+  return std::unique_ptr<WriteControllerToken>(new DelayWriteToken(this));
+}
+
+std::unique_ptr<WriteControllerToken>
+WriteController::GetCompactionPressureToken() {
+  ++total_compaction_pressure_;
+  return std::unique_ptr<WriteControllerToken>(
+      new CompactionPressureToken(this));
+}
+
+bool WriteController::IsStopped() const {
+  return total_stopped_.load(std::memory_order_relaxed) > 0;
+}
+// This is inside DB mutex, so we can't sleep and need to minimize
+// frequency to get time.
+// If it turns out to be a performance issue, we can redesign the thread
+// synchronization model here.
+// The function trust caller will sleep micros returned.
+uint64_t WriteController::GetDelay(SystemClock* clock, uint64_t num_bytes) {
+  if (total_stopped_.load(std::memory_order_relaxed) > 0) {
+    return 0;
+  }
+  if (total_delayed_.load(std::memory_order_relaxed) == 0) {
+    return 0;
+  }
+
+  if (credit_in_bytes_ >= num_bytes) {
+    credit_in_bytes_ -= num_bytes;
+    return 0;
+  }
+  // The frequency to get time inside DB mutex is less than one per refill
+  // interval.
+  auto time_now = NowMicrosMonotonic(clock);
+
+  const uint64_t kMicrosPerSecond = 1000000;
+  // Refill every 1 ms
+  const uint64_t kMicrosPerRefill = 1000;
+
+  if (next_refill_time_ == 0) {
+    // Start with an initial allotment of bytes for one interval
+    next_refill_time_ = time_now;
+  }
+  if (next_refill_time_ <= time_now) {
+    // Refill based on time interval plus any extra elapsed
+    uint64_t elapsed = time_now - next_refill_time_ + kMicrosPerRefill;
+    credit_in_bytes_ += static_cast<uint64_t>(
+        1.0 * elapsed / kMicrosPerSecond * delayed_write_rate_ + 0.999999);
+    next_refill_time_ = time_now + kMicrosPerRefill;
+
+    if (credit_in_bytes_ >= num_bytes) {
+      // Avoid delay if possible, to reduce DB mutex release & re-aquire.
+      credit_in_bytes_ -= num_bytes;
+      return 0;
+    }
+  }
+
+  // We need to delay to avoid exceeding write rate.
+  assert(num_bytes > credit_in_bytes_);
+  uint64_t bytes_over_budget = num_bytes - credit_in_bytes_;
+  uint64_t needed_delay = static_cast<uint64_t>(
+      1.0 * bytes_over_budget / delayed_write_rate_ * kMicrosPerSecond);
+
+  credit_in_bytes_ = 0;
+  next_refill_time_ += needed_delay;
+
+  // Minimum delay of refill interval, to reduce DB mutex contention.
+  return std::max(next_refill_time_ - time_now, kMicrosPerRefill);
+}
+
+uint64_t WriteController::NowMicrosMonotonic(SystemClock* clock) {
+  return clock->NowNanos() / std::milli::den;
+}
+
+StopWriteToken::~StopWriteToken() {
+  assert(controller_->total_stopped_ >= 1);
+  --controller_->total_stopped_;
+}
+
+DelayWriteToken::~DelayWriteToken() {
+  controller_->total_delayed_--;
+  assert(controller_->total_delayed_.load() >= 0);
+}
+
+CompactionPressureToken::~CompactionPressureToken() {
+  controller_->total_compaction_pressure_--;
+  assert(controller_->total_compaction_pressure_ >= 0);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_controller.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_controller.h
new file mode 100644
index 0000000000..bcead165b3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_controller.h
@@ -0,0 +1,148 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <stdint.h>
+
+#include <atomic>
+#include <memory>
+
+#include "rocksdb/rate_limiter.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class SystemClock;
+class WriteControllerToken;
+
+// WriteController is controlling write stalls in our write code-path. Write
+// stalls happen when compaction can't keep up with write rate.
+// All of the methods here (including WriteControllerToken's destructors) need
+// to be called while holding DB mutex
+class WriteController {
+ public:
+  explicit WriteController(uint64_t _delayed_write_rate = 1024u * 1024u * 32u,
+                           int64_t low_pri_rate_bytes_per_sec = 1024 * 1024)
+      : total_stopped_(0),
+        total_delayed_(0),
+        total_compaction_pressure_(0),
+        credit_in_bytes_(0),
+        next_refill_time_(0),
+        low_pri_rate_limiter_(
+            NewGenericRateLimiter(low_pri_rate_bytes_per_sec)) {
+    set_max_delayed_write_rate(_delayed_write_rate);
+  }
+  ~WriteController() = default;
+
+  // When an actor (column family) requests a stop token, all writes will be
+  // stopped until the stop token is released (deleted)
+  std::unique_ptr<WriteControllerToken> GetStopToken();
+  // When an actor (column family) requests a delay token, total delay for all
+  // writes to the DB will be controlled under the delayed write rate. Every
+  // write needs to call GetDelay() with number of bytes writing to the DB,
+  // which returns number of microseconds to sleep.
+  std::unique_ptr<WriteControllerToken> GetDelayToken(
+      uint64_t delayed_write_rate);
+  // When an actor (column family) requests a moderate token, compaction
+  // threads will be increased
+  std::unique_ptr<WriteControllerToken> GetCompactionPressureToken();
+
+  // these three metods are querying the state of the WriteController
+  bool IsStopped() const;
+  bool NeedsDelay() const { return total_delayed_.load() > 0; }
+  bool NeedSpeedupCompaction() const {
+    return IsStopped() || NeedsDelay() || total_compaction_pressure_.load() > 0;
+  }
+  // return how many microseconds the caller needs to sleep after the call
+  // num_bytes: how many number of bytes to put into the DB.
+  // Prerequisite: DB mutex held.
+  uint64_t GetDelay(SystemClock* clock, uint64_t num_bytes);
+  void set_delayed_write_rate(uint64_t write_rate) {
+    // avoid divide 0
+    if (write_rate == 0) {
+      write_rate = 1u;
+    } else if (write_rate > max_delayed_write_rate()) {
+      write_rate = max_delayed_write_rate();
+    }
+    delayed_write_rate_ = write_rate;
+  }
+
+  void set_max_delayed_write_rate(uint64_t write_rate) {
+    // avoid divide 0
+    if (write_rate == 0) {
+      write_rate = 1u;
+    }
+    max_delayed_write_rate_ = write_rate;
+    // update delayed_write_rate_ as well
+    delayed_write_rate_ = write_rate;
+  }
+
+  uint64_t delayed_write_rate() const { return delayed_write_rate_; }
+
+  uint64_t max_delayed_write_rate() const { return max_delayed_write_rate_; }
+
+  RateLimiter* low_pri_rate_limiter() { return low_pri_rate_limiter_.get(); }
+
+ private:
+  uint64_t NowMicrosMonotonic(SystemClock* clock);
+
+  friend class WriteControllerToken;
+  friend class StopWriteToken;
+  friend class DelayWriteToken;
+  friend class CompactionPressureToken;
+
+  std::atomic<int> total_stopped_;
+  std::atomic<int> total_delayed_;
+  std::atomic<int> total_compaction_pressure_;
+
+  // Number of bytes allowed to write without delay
+  uint64_t credit_in_bytes_;
+  // Next time that we can add more credit of bytes
+  uint64_t next_refill_time_;
+  // Write rate set when initialization or by `DBImpl::SetDBOptions`
+  uint64_t max_delayed_write_rate_;
+  // Current write rate (bytes / second)
+  uint64_t delayed_write_rate_;
+
+  std::unique_ptr<RateLimiter> low_pri_rate_limiter_;
+};
+
+class WriteControllerToken {
+ public:
+  explicit WriteControllerToken(WriteController* controller)
+      : controller_(controller) {}
+  virtual ~WriteControllerToken() {}
+
+ protected:
+  WriteController* controller_;
+
+ private:
+  // no copying allowed
+  WriteControllerToken(const WriteControllerToken&) = delete;
+  void operator=(const WriteControllerToken&) = delete;
+};
+
+class StopWriteToken : public WriteControllerToken {
+ public:
+  explicit StopWriteToken(WriteController* controller)
+      : WriteControllerToken(controller) {}
+  virtual ~StopWriteToken();
+};
+
+class DelayWriteToken : public WriteControllerToken {
+ public:
+  explicit DelayWriteToken(WriteController* controller)
+      : WriteControllerToken(controller) {}
+  virtual ~DelayWriteToken();
+};
+
+class CompactionPressureToken : public WriteControllerToken {
+ public:
+  explicit CompactionPressureToken(WriteController* controller)
+      : WriteControllerToken(controller) {}
+  virtual ~CompactionPressureToken();
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_stall_stats.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_stall_stats.cc
new file mode 100644
index 0000000000..3973df7685
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_stall_stats.cc
@@ -0,0 +1,179 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/write_stall_stats.h"
+
+namespace ROCKSDB_NAMESPACE {
+const std::string& InvalidWriteStallHyphenString() {
+  static const std::string kInvalidWriteStallHyphenString = "invalid";
+  return kInvalidWriteStallHyphenString;
+}
+
+const std::string& WriteStallCauseToHyphenString(WriteStallCause cause) {
+  static const std::string kMemtableLimit = "memtable-limit";
+  static const std::string kL0FileCountLimit = "l0-file-count-limit";
+  static const std::string kPendingCompactionBytes = "pending-compaction-bytes";
+  static const std::string kWriteBufferManagerLimit =
+      "write-buffer-manager-limit";
+  switch (cause) {
+    case WriteStallCause::kMemtableLimit:
+      return kMemtableLimit;
+    case WriteStallCause::kL0FileCountLimit:
+      return kL0FileCountLimit;
+    case WriteStallCause::kPendingCompactionBytes:
+      return kPendingCompactionBytes;
+    case WriteStallCause::kWriteBufferManagerLimit:
+      return kWriteBufferManagerLimit;
+    default:
+      break;
+  }
+  return InvalidWriteStallHyphenString();
+}
+
+const std::string& WriteStallConditionToHyphenString(
+    WriteStallCondition condition) {
+  static const std::string kDelayed = "delays";
+  static const std::string kStopped = "stops";
+  switch (condition) {
+    case WriteStallCondition::kDelayed:
+      return kDelayed;
+    case WriteStallCondition::kStopped:
+      return kStopped;
+    default:
+      break;
+  }
+  return InvalidWriteStallHyphenString();
+}
+
+InternalStats::InternalCFStatsType InternalCFStat(
+    WriteStallCause cause, WriteStallCondition condition) {
+  switch (cause) {
+    case WriteStallCause::kMemtableLimit: {
+      switch (condition) {
+        case WriteStallCondition::kDelayed:
+          return InternalStats::MEMTABLE_LIMIT_DELAYS;
+        case WriteStallCondition::kStopped:
+          return InternalStats::MEMTABLE_LIMIT_STOPS;
+        case WriteStallCondition::kNormal:
+          break;
+      }
+      break;
+    }
+    case WriteStallCause::kL0FileCountLimit: {
+      switch (condition) {
+        case WriteStallCondition::kDelayed:
+          return InternalStats::L0_FILE_COUNT_LIMIT_DELAYS;
+        case WriteStallCondition::kStopped:
+          return InternalStats::L0_FILE_COUNT_LIMIT_STOPS;
+        case WriteStallCondition::kNormal:
+          break;
+      }
+      break;
+    }
+    case WriteStallCause::kPendingCompactionBytes: {
+      switch (condition) {
+        case WriteStallCondition::kDelayed:
+          return InternalStats::PENDING_COMPACTION_BYTES_LIMIT_DELAYS;
+        case WriteStallCondition::kStopped:
+          return InternalStats::PENDING_COMPACTION_BYTES_LIMIT_STOPS;
+        case WriteStallCondition::kNormal:
+          break;
+      }
+      break;
+    }
+    default:
+      break;
+  }
+  return InternalStats::INTERNAL_CF_STATS_ENUM_MAX;
+}
+
+InternalStats::InternalDBStatsType InternalDBStat(
+    WriteStallCause cause, WriteStallCondition condition) {
+  switch (cause) {
+    case WriteStallCause::kWriteBufferManagerLimit: {
+      switch (condition) {
+        case WriteStallCondition::kStopped:
+          return InternalStats::kIntStatsWriteBufferManagerLimitStopsCounts;
+        default:
+          break;
+      }
+      break;
+    }
+    default:
+      break;
+  }
+  return InternalStats::kIntStatsNumMax;
+}
+
+bool isCFScopeWriteStallCause(WriteStallCause cause) {
+  uint32_t int_cause = static_cast<uint32_t>(cause);
+  uint32_t lower_bound =
+      static_cast<uint32_t>(WriteStallCause::kCFScopeWriteStallCauseEnumMax) -
+      kNumCFScopeWriteStallCauses;
+  uint32_t upper_bound =
+      static_cast<uint32_t>(WriteStallCause::kCFScopeWriteStallCauseEnumMax) -
+      1;
+  return lower_bound <= int_cause && int_cause <= upper_bound;
+}
+
+bool isDBScopeWriteStallCause(WriteStallCause cause) {
+  uint32_t int_cause = static_cast<uint32_t>(cause);
+  uint32_t lower_bound =
+      static_cast<uint32_t>(WriteStallCause::kDBScopeWriteStallCauseEnumMax) -
+      kNumDBScopeWriteStallCauses;
+  uint32_t upper_bound =
+      static_cast<uint32_t>(WriteStallCause::kDBScopeWriteStallCauseEnumMax) -
+      1;
+  return lower_bound <= int_cause && int_cause <= upper_bound;
+}
+
+const std::string& WriteStallStatsMapKeys::TotalStops() {
+  static const std::string kTotalStops = "total-stops";
+  return kTotalStops;
+}
+
+const std::string& WriteStallStatsMapKeys::TotalDelays() {
+  static const std::string kTotalDelays = "total-delays";
+  return kTotalDelays;
+}
+
+const std::string&
+WriteStallStatsMapKeys::CFL0FileCountLimitDelaysWithOngoingCompaction() {
+  static const std::string ret =
+      "cf-l0-file-count-limit-delays-with-ongoing-compaction";
+  return ret;
+}
+
+const std::string&
+WriteStallStatsMapKeys::CFL0FileCountLimitStopsWithOngoingCompaction() {
+  static const std::string ret =
+      "cf-l0-file-count-limit-stops-with-ongoing-compaction";
+  return ret;
+}
+
+std::string WriteStallStatsMapKeys::CauseConditionCount(
+    WriteStallCause cause, WriteStallCondition condition) {
+  std::string cause_condition_count_name;
+
+  std::string cause_name;
+  if (isCFScopeWriteStallCause(cause) || isDBScopeWriteStallCause(cause)) {
+    cause_name = WriteStallCauseToHyphenString(cause);
+  } else {
+    assert(false);
+    return "";
+  }
+
+  const std::string& condition_name =
+      WriteStallConditionToHyphenString(condition);
+
+  cause_condition_count_name.reserve(cause_name.size() + 1 +
+                                     condition_name.size());
+  cause_condition_count_name.append(cause_name);
+  cause_condition_count_name.append("-");
+  cause_condition_count_name.append(condition_name);
+
+  return cause_condition_count_name;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_stall_stats.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_stall_stats.h
new file mode 100644
index 0000000000..6394abb0a8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_stall_stats.h
@@ -0,0 +1,47 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <array>
+
+#include "db/internal_stats.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+extern const std::string& InvalidWriteStallHyphenString();
+
+extern const std::string& WriteStallCauseToHyphenString(WriteStallCause cause);
+
+extern const std::string& WriteStallConditionToHyphenString(
+    WriteStallCondition condition);
+
+// REQUIRES:
+// cause` is CF-scope `WriteStallCause`, see `WriteStallCause` for more
+//
+// REQUIRES:
+// `condition` != `WriteStallCondition::kNormal`
+extern InternalStats::InternalCFStatsType InternalCFStat(
+    WriteStallCause cause, WriteStallCondition condition);
+
+// REQUIRES:
+// cause` is DB-scope `WriteStallCause`, see `WriteStallCause` for more
+//
+// REQUIRES:
+// `condition` != `WriteStallCondition::kNormal`
+extern InternalStats::InternalDBStatsType InternalDBStat(
+    WriteStallCause cause, WriteStallCondition condition);
+
+extern bool isCFScopeWriteStallCause(WriteStallCause cause);
+extern bool isDBScopeWriteStallCause(WriteStallCause cause);
+
+constexpr uint32_t kNumCFScopeWriteStallCauses =
+    static_cast<uint32_t>(WriteStallCause::kCFScopeWriteStallCauseEnumMax) -
+    static_cast<uint32_t>(WriteStallCause::kMemtableLimit);
+
+constexpr uint32_t kNumDBScopeWriteStallCauses =
+    static_cast<uint32_t>(WriteStallCause::kDBScopeWriteStallCauseEnumMax) -
+    static_cast<uint32_t>(WriteStallCause::kWriteBufferManagerLimit);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_thread.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_thread.cc
new file mode 100644
index 0000000000..7987007752
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_thread.cc
@@ -0,0 +1,844 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/write_thread.h"
+
+#include <chrono>
+#include <thread>
+
+#include "db/column_family.h"
+#include "monitoring/perf_context_imp.h"
+#include "port/port.h"
+#include "test_util/sync_point.h"
+#include "util/random.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+WriteThread::WriteThread(const ImmutableDBOptions& db_options)
+    : max_yield_usec_(db_options.enable_write_thread_adaptive_yield
+                          ? db_options.write_thread_max_yield_usec
+                          : 0),
+      slow_yield_usec_(db_options.write_thread_slow_yield_usec),
+      allow_concurrent_memtable_write_(
+          db_options.allow_concurrent_memtable_write),
+      enable_pipelined_write_(db_options.enable_pipelined_write),
+      max_write_batch_group_size_bytes(
+          db_options.max_write_batch_group_size_bytes),
+      newest_writer_(nullptr),
+      newest_memtable_writer_(nullptr),
+      last_sequence_(0),
+      write_stall_dummy_(),
+      stall_mu_(),
+      stall_cv_(&stall_mu_) {}
+
+uint8_t WriteThread::BlockingAwaitState(Writer* w, uint8_t goal_mask) {
+  // We're going to block.  Lazily create the mutex.  We guarantee
+  // propagation of this construction to the waker via the
+  // STATE_LOCKED_WAITING state.  The waker won't try to touch the mutex
+  // or the condvar unless they CAS away the STATE_LOCKED_WAITING that
+  // we install below.
+  w->CreateMutex();
+
+  auto state = w->state.load(std::memory_order_acquire);
+  assert(state != STATE_LOCKED_WAITING);
+  if ((state & goal_mask) == 0 &&
+      w->state.compare_exchange_strong(state, STATE_LOCKED_WAITING)) {
+    // we have permission (and an obligation) to use StateMutex
+    std::unique_lock<std::mutex> guard(w->StateMutex());
+    w->StateCV().wait(guard, [w] {
+      return w->state.load(std::memory_order_relaxed) != STATE_LOCKED_WAITING;
+    });
+    state = w->state.load(std::memory_order_relaxed);
+  }
+  // else tricky.  Goal is met or CAS failed.  In the latter case the waker
+  // must have changed the state, and compare_exchange_strong has updated
+  // our local variable with the new one.  At the moment WriteThread never
+  // waits for a transition across intermediate states, so we know that
+  // since a state change has occurred the goal must have been met.
+  assert((state & goal_mask) != 0);
+  return state;
+}
+
+uint8_t WriteThread::AwaitState(Writer* w, uint8_t goal_mask,
+                                AdaptationContext* ctx) {
+  uint8_t state = 0;
+
+  // 1. Busy loop using "pause" for 1 micro sec
+  // 2. Else SOMETIMES busy loop using "yield" for 100 micro sec (default)
+  // 3. Else blocking wait
+
+  // On a modern Xeon each loop takes about 7 nanoseconds (most of which
+  // is the effect of the pause instruction), so 200 iterations is a bit
+  // more than a microsecond.  This is long enough that waits longer than
+  // this can amortize the cost of accessing the clock and yielding.
+  for (uint32_t tries = 0; tries < 200; ++tries) {
+    state = w->state.load(std::memory_order_acquire);
+    if ((state & goal_mask) != 0) {
+      return state;
+    }
+    port::AsmVolatilePause();
+  }
+
+  // This is below the fast path, so that the stat is zero when all writes are
+  // from the same thread.
+  PERF_TIMER_GUARD(write_thread_wait_nanos);
+
+  // If we're only going to end up waiting a short period of time,
+  // it can be a lot more efficient to call std::this_thread::yield()
+  // in a loop than to block in StateMutex().  For reference, on my 4.0
+  // SELinux test server with support for syscall auditing enabled, the
+  // minimum latency between FUTEX_WAKE to returning from FUTEX_WAIT is
+  // 2.7 usec, and the average is more like 10 usec.  That can be a big
+  // drag on RockDB's single-writer design.  Of course, spinning is a
+  // bad idea if other threads are waiting to run or if we're going to
+  // wait for a long time.  How do we decide?
+  //
+  // We break waiting into 3 categories: short-uncontended,
+  // short-contended, and long.  If we had an oracle, then we would always
+  // spin for short-uncontended, always block for long, and our choice for
+  // short-contended might depend on whether we were trying to optimize
+  // RocksDB throughput or avoid being greedy with system resources.
+  //
+  // Bucketing into short or long is easy by measuring elapsed time.
+  // Differentiating short-uncontended from short-contended is a bit
+  // trickier, but not too bad.  We could look for involuntary context
+  // switches using getrusage(RUSAGE_THREAD, ..), but it's less work
+  // (portability code and CPU) to just look for yield calls that take
+  // longer than we expect.  sched_yield() doesn't actually result in any
+  // context switch overhead if there are no other runnable processes
+  // on the current core, in which case it usually takes less than
+  // a microsecond.
+  //
+  // There are two primary tunables here: the threshold between "short"
+  // and "long" waits, and the threshold at which we suspect that a yield
+  // is slow enough to indicate we should probably block.  If these
+  // thresholds are chosen well then CPU-bound workloads that don't
+  // have more threads than cores will experience few context switches
+  // (voluntary or involuntary), and the total number of context switches
+  // (voluntary and involuntary) will not be dramatically larger (maybe
+  // 2x) than the number of voluntary context switches that occur when
+  // --max_yield_wait_micros=0.
+  //
+  // There's another constant, which is the number of slow yields we will
+  // tolerate before reversing our previous decision.  Solitary slow
+  // yields are pretty common (low-priority small jobs ready to run),
+  // so this should be at least 2.  We set this conservatively to 3 so
+  // that we can also immediately schedule a ctx adaptation, rather than
+  // waiting for the next update_ctx.
+
+  const size_t kMaxSlowYieldsWhileSpinning = 3;
+
+  // Whether the yield approach has any credit in this context. The credit is
+  // added by yield being succesfull before timing out, and decreased otherwise.
+  auto& yield_credit = ctx->value;
+  // Update the yield_credit based on sample runs or right after a hard failure
+  bool update_ctx = false;
+  // Should we reinforce the yield credit
+  bool would_spin_again = false;
+  // The samling base for updating the yeild credit. The sampling rate would be
+  // 1/sampling_base.
+  const int sampling_base = 256;
+
+  if (max_yield_usec_ > 0) {
+    update_ctx = Random::GetTLSInstance()->OneIn(sampling_base);
+
+    if (update_ctx || yield_credit.load(std::memory_order_relaxed) >= 0) {
+      // we're updating the adaptation statistics, or spinning has >
+      // 50% chance of being shorter than max_yield_usec_ and causing no
+      // involuntary context switches
+      auto spin_begin = std::chrono::steady_clock::now();
+
+      // this variable doesn't include the final yield (if any) that
+      // causes the goal to be met
+      size_t slow_yield_count = 0;
+
+      auto iter_begin = spin_begin;
+      while ((iter_begin - spin_begin) <=
+             std::chrono::microseconds(max_yield_usec_)) {
+        std::this_thread::yield();
+
+        state = w->state.load(std::memory_order_acquire);
+        if ((state & goal_mask) != 0) {
+          // success
+          would_spin_again = true;
+          break;
+        }
+
+        auto now = std::chrono::steady_clock::now();
+        if (now == iter_begin ||
+            now - iter_begin >= std::chrono::microseconds(slow_yield_usec_)) {
+          // conservatively count it as a slow yield if our clock isn't
+          // accurate enough to measure the yield duration
+          ++slow_yield_count;
+          if (slow_yield_count >= kMaxSlowYieldsWhileSpinning) {
+            // Not just one ivcsw, but several.  Immediately update yield_credit
+            // and fall back to blocking
+            update_ctx = true;
+            break;
+          }
+        }
+        iter_begin = now;
+      }
+    }
+  }
+
+  if ((state & goal_mask) == 0) {
+    TEST_SYNC_POINT_CALLBACK("WriteThread::AwaitState:BlockingWaiting", w);
+    state = BlockingAwaitState(w, goal_mask);
+  }
+
+  if (update_ctx) {
+    // Since our update is sample based, it is ok if a thread overwrites the
+    // updates by other threads. Thus the update does not have to be atomic.
+    auto v = yield_credit.load(std::memory_order_relaxed);
+    // fixed point exponential decay with decay constant 1/1024, with +1
+    // and -1 scaled to avoid overflow for int32_t
+    //
+    // On each update the positive credit is decayed by a facor of 1/1024 (i.e.,
+    // 0.1%). If the sampled yield was successful, the credit is also increased
+    // by X. Setting X=2^17 ensures that the credit never exceeds
+    // 2^17*2^10=2^27, which is lower than 2^31 the upperbound of int32_t. Same
+    // logic applies to negative credits.
+    v = v - (v / 1024) + (would_spin_again ? 1 : -1) * 131072;
+    yield_credit.store(v, std::memory_order_relaxed);
+  }
+
+  assert((state & goal_mask) != 0);
+  return state;
+}
+
+void WriteThread::SetState(Writer* w, uint8_t new_state) {
+  assert(w);
+  auto state = w->state.load(std::memory_order_acquire);
+  if (state == STATE_LOCKED_WAITING ||
+      !w->state.compare_exchange_strong(state, new_state)) {
+    assert(state == STATE_LOCKED_WAITING);
+
+    std::lock_guard<std::mutex> guard(w->StateMutex());
+    assert(w->state.load(std::memory_order_relaxed) != new_state);
+    w->state.store(new_state, std::memory_order_relaxed);
+    w->StateCV().notify_one();
+  }
+}
+
+bool WriteThread::LinkOne(Writer* w, std::atomic<Writer*>* newest_writer) {
+  assert(newest_writer != nullptr);
+  assert(w->state == STATE_INIT);
+  Writer* writers = newest_writer->load(std::memory_order_relaxed);
+  while (true) {
+    assert(writers != w);
+    // If write stall in effect, and w->no_slowdown is not true,
+    // block here until stall is cleared. If its true, then return
+    // immediately
+    if (writers == &write_stall_dummy_) {
+      if (w->no_slowdown) {
+        w->status = Status::Incomplete("Write stall");
+        SetState(w, STATE_COMPLETED);
+        return false;
+      }
+      // Since no_slowdown is false, wait here to be notified of the write
+      // stall clearing
+      {
+        MutexLock lock(&stall_mu_);
+        writers = newest_writer->load(std::memory_order_relaxed);
+        if (writers == &write_stall_dummy_) {
+          TEST_SYNC_POINT_CALLBACK("WriteThread::WriteStall::Wait", w);
+          stall_cv_.Wait();
+          // Load newest_writers_ again since it may have changed
+          writers = newest_writer->load(std::memory_order_relaxed);
+          continue;
+        }
+      }
+    }
+    w->link_older = writers;
+    if (newest_writer->compare_exchange_weak(writers, w)) {
+      return (writers == nullptr);
+    }
+  }
+}
+
+bool WriteThread::LinkGroup(WriteGroup& write_group,
+                            std::atomic<Writer*>* newest_writer) {
+  assert(newest_writer != nullptr);
+  Writer* leader = write_group.leader;
+  Writer* last_writer = write_group.last_writer;
+  Writer* w = last_writer;
+  while (true) {
+    // Unset link_newer pointers to make sure when we call
+    // CreateMissingNewerLinks later it create all missing links.
+    w->link_newer = nullptr;
+    w->write_group = nullptr;
+    if (w == leader) {
+      break;
+    }
+    w = w->link_older;
+  }
+  Writer* newest = newest_writer->load(std::memory_order_relaxed);
+  while (true) {
+    leader->link_older = newest;
+    if (newest_writer->compare_exchange_weak(newest, last_writer)) {
+      return (newest == nullptr);
+    }
+  }
+}
+
+void WriteThread::CreateMissingNewerLinks(Writer* head) {
+  while (true) {
+    Writer* next = head->link_older;
+    if (next == nullptr || next->link_newer != nullptr) {
+      assert(next == nullptr || next->link_newer == head);
+      break;
+    }
+    next->link_newer = head;
+    head = next;
+  }
+}
+
+void WriteThread::CompleteLeader(WriteGroup& write_group) {
+  assert(write_group.size > 0);
+  Writer* leader = write_group.leader;
+  if (write_group.size == 1) {
+    write_group.leader = nullptr;
+    write_group.last_writer = nullptr;
+  } else {
+    assert(leader->link_newer != nullptr);
+    leader->link_newer->link_older = nullptr;
+    write_group.leader = leader->link_newer;
+  }
+  write_group.size -= 1;
+  SetState(leader, STATE_COMPLETED);
+}
+
+void WriteThread::CompleteFollower(Writer* w, WriteGroup& write_group) {
+  assert(write_group.size > 1);
+  assert(w != write_group.leader);
+  if (w == write_group.last_writer) {
+    w->link_older->link_newer = nullptr;
+    write_group.last_writer = w->link_older;
+  } else {
+    w->link_older->link_newer = w->link_newer;
+    w->link_newer->link_older = w->link_older;
+  }
+  write_group.size -= 1;
+  SetState(w, STATE_COMPLETED);
+}
+
+void WriteThread::BeginWriteStall() {
+  ++stall_begun_count_;
+  LinkOne(&write_stall_dummy_, &newest_writer_);
+
+  // Walk writer list until w->write_group != nullptr. The current write group
+  // will not have a mix of slowdown/no_slowdown, so its ok to stop at that
+  // point
+  Writer* w = write_stall_dummy_.link_older;
+  Writer* prev = &write_stall_dummy_;
+  while (w != nullptr && w->write_group == nullptr) {
+    if (w->no_slowdown) {
+      prev->link_older = w->link_older;
+      w->status = Status::Incomplete("Write stall");
+      SetState(w, STATE_COMPLETED);
+      // Only update `link_newer` if it's already set.
+      // `CreateMissingNewerLinks()` will update the nullptr `link_newer` later,
+      // which assumes the the first non-nullptr `link_newer` is the last
+      // nullptr link in the writer list.
+      // If `link_newer` is set here, `CreateMissingNewerLinks()` may stop
+      // updating the whole list when it sees the first non nullptr link.
+      if (prev->link_older && prev->link_older->link_newer) {
+        prev->link_older->link_newer = prev;
+      }
+      w = prev->link_older;
+    } else {
+      prev = w;
+      w = w->link_older;
+    }
+  }
+}
+
+void WriteThread::EndWriteStall() {
+  MutexLock lock(&stall_mu_);
+
+  // Unlink write_stall_dummy_ from the write queue. This will unblock
+  // pending write threads to enqueue themselves
+  assert(newest_writer_.load(std::memory_order_relaxed) == &write_stall_dummy_);
+  // write_stall_dummy_.link_older can be nullptr only if LockWAL() has been
+  // called.
+  if (write_stall_dummy_.link_older) {
+    write_stall_dummy_.link_older->link_newer = write_stall_dummy_.link_newer;
+  }
+  newest_writer_.exchange(write_stall_dummy_.link_older);
+
+  ++stall_ended_count_;
+
+  // Wake up writers
+  stall_cv_.SignalAll();
+}
+
+uint64_t WriteThread::GetBegunCountOfOutstandingStall() {
+  if (stall_begun_count_ > stall_ended_count_) {
+    // Oustanding stall in queue
+    assert(newest_writer_.load(std::memory_order_relaxed) ==
+           &write_stall_dummy_);
+    return stall_begun_count_;
+  } else {
+    // No stall in queue
+    assert(newest_writer_.load(std::memory_order_relaxed) !=
+           &write_stall_dummy_);
+    return 0;
+  }
+}
+
+void WriteThread::WaitForStallEndedCount(uint64_t stall_count) {
+  MutexLock lock(&stall_mu_);
+
+  while (stall_ended_count_ < stall_count) {
+    stall_cv_.Wait();
+  }
+}
+
+static WriteThread::AdaptationContext jbg_ctx("JoinBatchGroup");
+void WriteThread::JoinBatchGroup(Writer* w) {
+  TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:Start", w);
+  assert(w->batch != nullptr);
+
+  bool linked_as_leader = LinkOne(w, &newest_writer_);
+
+  if (linked_as_leader) {
+    SetState(w, STATE_GROUP_LEADER);
+  }
+
+  TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:Wait", w);
+  TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:Wait2", w);
+
+  if (!linked_as_leader) {
+    /**
+     * Wait util:
+     * 1) An existing leader pick us as the new leader when it finishes
+     * 2) An existing leader pick us as its follewer and
+     * 2.1) finishes the memtable writes on our behalf
+     * 2.2) Or tell us to finish the memtable writes in pralallel
+     * 3) (pipelined write) An existing leader pick us as its follower and
+     *    finish book-keeping and WAL write for us, enqueue us as pending
+     *    memtable writer, and
+     * 3.1) we become memtable writer group leader, or
+     * 3.2) an existing memtable writer group leader tell us to finish memtable
+     *      writes in parallel.
+     */
+    TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:BeganWaiting", w);
+    AwaitState(w,
+               STATE_GROUP_LEADER | STATE_MEMTABLE_WRITER_LEADER |
+                   STATE_PARALLEL_MEMTABLE_WRITER | STATE_COMPLETED,
+               &jbg_ctx);
+    TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:DoneWaiting", w);
+  }
+}
+
+size_t WriteThread::EnterAsBatchGroupLeader(Writer* leader,
+                                            WriteGroup* write_group) {
+  assert(leader->link_older == nullptr);
+  assert(leader->batch != nullptr);
+  assert(write_group != nullptr);
+
+  size_t size = WriteBatchInternal::ByteSize(leader->batch);
+
+  // Allow the group to grow up to a maximum size, but if the
+  // original write is small, limit the growth so we do not slow
+  // down the small write too much.
+  size_t max_size = max_write_batch_group_size_bytes;
+  const uint64_t min_batch_size_bytes = max_write_batch_group_size_bytes / 8;
+  if (size <= min_batch_size_bytes) {
+    max_size = size + min_batch_size_bytes;
+  }
+
+  leader->write_group = write_group;
+  write_group->leader = leader;
+  write_group->last_writer = leader;
+  write_group->size = 1;
+  Writer* newest_writer = newest_writer_.load(std::memory_order_acquire);
+
+  // This is safe regardless of any db mutex status of the caller. Previous
+  // calls to ExitAsGroupLeader either didn't call CreateMissingNewerLinks
+  // (they emptied the list and then we added ourself as leader) or had to
+  // explicitly wake us up (the list was non-empty when we added ourself,
+  // so we have already received our MarkJoined).
+  CreateMissingNewerLinks(newest_writer);
+
+  // Tricky. Iteration start (leader) is exclusive and finish
+  // (newest_writer) is inclusive. Iteration goes from old to new.
+  Writer* w = leader;
+  while (w != newest_writer) {
+    assert(w->link_newer);
+    w = w->link_newer;
+
+    if (w->sync && !leader->sync) {
+      // Do not include a sync write into a batch handled by a non-sync write.
+      break;
+    }
+
+    if (w->no_slowdown != leader->no_slowdown) {
+      // Do not mix writes that are ok with delays with the ones that
+      // request fail on delays.
+      break;
+    }
+
+    if (w->disable_wal != leader->disable_wal) {
+      // Do not mix writes that enable WAL with the ones whose
+      // WAL disabled.
+      break;
+    }
+
+    if (w->protection_bytes_per_key != leader->protection_bytes_per_key) {
+      // Do not mix writes with different levels of integrity protection.
+      break;
+    }
+
+    if (w->rate_limiter_priority != leader->rate_limiter_priority) {
+      // Do not mix writes with different rate limiter priorities.
+      break;
+    }
+
+    if (w->batch == nullptr) {
+      // Do not include those writes with nullptr batch. Those are not writes,
+      // those are something else. They want to be alone
+      break;
+    }
+
+    if (w->callback != nullptr && !w->callback->AllowWriteBatching()) {
+      // don't batch writes that don't want to be batched
+      break;
+    }
+
+    auto batch_size = WriteBatchInternal::ByteSize(w->batch);
+    if (size + batch_size > max_size) {
+      // Do not make batch too big
+      break;
+    }
+
+    w->write_group = write_group;
+    size += batch_size;
+    write_group->last_writer = w;
+    write_group->size++;
+  }
+  TEST_SYNC_POINT_CALLBACK("WriteThread::EnterAsBatchGroupLeader:End", w);
+  return size;
+}
+
+void WriteThread::EnterAsMemTableWriter(Writer* leader,
+                                        WriteGroup* write_group) {
+  assert(leader != nullptr);
+  assert(leader->link_older == nullptr);
+  assert(leader->batch != nullptr);
+  assert(write_group != nullptr);
+
+  size_t size = WriteBatchInternal::ByteSize(leader->batch);
+
+  // Allow the group to grow up to a maximum size, but if the
+  // original write is small, limit the growth so we do not slow
+  // down the small write too much.
+  size_t max_size = max_write_batch_group_size_bytes;
+  const uint64_t min_batch_size_bytes = max_write_batch_group_size_bytes / 8;
+  if (size <= min_batch_size_bytes) {
+    max_size = size + min_batch_size_bytes;
+  }
+
+  leader->write_group = write_group;
+  write_group->leader = leader;
+  write_group->size = 1;
+  Writer* last_writer = leader;
+
+  if (!allow_concurrent_memtable_write_ || !leader->batch->HasMerge()) {
+    Writer* newest_writer = newest_memtable_writer_.load();
+    CreateMissingNewerLinks(newest_writer);
+
+    Writer* w = leader;
+    while (w != newest_writer) {
+      assert(w->link_newer);
+      w = w->link_newer;
+
+      if (w->batch == nullptr) {
+        break;
+      }
+
+      if (w->batch->HasMerge()) {
+        break;
+      }
+
+      if (!allow_concurrent_memtable_write_) {
+        auto batch_size = WriteBatchInternal::ByteSize(w->batch);
+        if (size + batch_size > max_size) {
+          // Do not make batch too big
+          break;
+        }
+        size += batch_size;
+      }
+
+      w->write_group = write_group;
+      last_writer = w;
+      write_group->size++;
+    }
+  }
+
+  write_group->last_writer = last_writer;
+  write_group->last_sequence =
+      last_writer->sequence + WriteBatchInternal::Count(last_writer->batch) - 1;
+}
+
+void WriteThread::ExitAsMemTableWriter(Writer* /*self*/,
+                                       WriteGroup& write_group) {
+  Writer* leader = write_group.leader;
+  Writer* last_writer = write_group.last_writer;
+
+  Writer* newest_writer = last_writer;
+  if (!newest_memtable_writer_.compare_exchange_strong(newest_writer,
+                                                       nullptr)) {
+    CreateMissingNewerLinks(newest_writer);
+    Writer* next_leader = last_writer->link_newer;
+    assert(next_leader != nullptr);
+    next_leader->link_older = nullptr;
+    SetState(next_leader, STATE_MEMTABLE_WRITER_LEADER);
+  }
+  Writer* w = leader;
+  while (true) {
+    if (!write_group.status.ok()) {
+      w->status = write_group.status;
+    }
+    Writer* next = w->link_newer;
+    if (w != leader) {
+      SetState(w, STATE_COMPLETED);
+    }
+    if (w == last_writer) {
+      break;
+    }
+    assert(next);
+    w = next;
+  }
+  // Note that leader has to exit last, since it owns the write group.
+  SetState(leader, STATE_COMPLETED);
+}
+
+void WriteThread::LaunchParallelMemTableWriters(WriteGroup* write_group) {
+  assert(write_group != nullptr);
+  write_group->running.store(write_group->size);
+  for (auto w : *write_group) {
+    SetState(w, STATE_PARALLEL_MEMTABLE_WRITER);
+  }
+}
+
+static WriteThread::AdaptationContext cpmtw_ctx(
+    "CompleteParallelMemTableWriter");
+// This method is called by both the leader and parallel followers
+bool WriteThread::CompleteParallelMemTableWriter(Writer* w) {
+  auto* write_group = w->write_group;
+  if (!w->status.ok()) {
+    std::lock_guard<std::mutex> guard(write_group->leader->StateMutex());
+    write_group->status = w->status;
+  }
+
+  if (write_group->running-- > 1) {
+    // we're not the last one
+    AwaitState(w, STATE_COMPLETED, &cpmtw_ctx);
+    return false;
+  }
+  // else we're the last parallel worker and should perform exit duties.
+  w->status = write_group->status;
+  // Callers of this function must ensure w->status is checked.
+  write_group->status.PermitUncheckedError();
+  return true;
+}
+
+void WriteThread::ExitAsBatchGroupFollower(Writer* w) {
+  auto* write_group = w->write_group;
+
+  assert(w->state == STATE_PARALLEL_MEMTABLE_WRITER);
+  assert(write_group->status.ok());
+  ExitAsBatchGroupLeader(*write_group, write_group->status);
+  assert(w->status.ok());
+  assert(w->state == STATE_COMPLETED);
+  SetState(write_group->leader, STATE_COMPLETED);
+}
+
+static WriteThread::AdaptationContext eabgl_ctx("ExitAsBatchGroupLeader");
+void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
+                                         Status& status) {
+  TEST_SYNC_POINT_CALLBACK("WriteThread::ExitAsBatchGroupLeader:Start",
+                           &write_group);
+
+  Writer* leader = write_group.leader;
+  Writer* last_writer = write_group.last_writer;
+  assert(leader->link_older == nullptr);
+
+  // If status is non-ok already, then write_group.status won't have the chance
+  // of being propagated to caller.
+  if (!status.ok()) {
+    write_group.status.PermitUncheckedError();
+  }
+
+  // Propagate memtable write error to the whole group.
+  if (status.ok() && !write_group.status.ok()) {
+    status = write_group.status;
+  }
+
+  if (enable_pipelined_write_) {
+    // We insert a dummy Writer right before our current write_group. This
+    // allows us to unlink our write_group without the risk that a subsequent
+    // writer becomes a new leader and might overtake us and add itself to the
+    // memtable-writer-list before we can do so. This ensures that writers are
+    // added to the memtable-writer-list in the exact same order in which they
+    // were in the newest_writer list.
+    // This must happen before completing the writers from our group to prevent
+    // a race where the owning thread of one of these writers can start a new
+    // write operation.
+    Writer dummy;
+    Writer* head = newest_writer_.load(std::memory_order_acquire);
+    if (head != last_writer ||
+        !newest_writer_.compare_exchange_strong(head, &dummy)) {
+      // Either last_writer wasn't the head during the load(), or it was the
+      // head during the load() but somebody else pushed onto the list before
+      // we did the compare_exchange_strong (causing it to fail). In the latter
+      // case compare_exchange_strong has the effect of re-reading its first
+      // param (head). No need to retry a failing CAS, because only a departing
+      // leader (which we are at the moment) can remove nodes from the list.
+      assert(head != last_writer);
+
+      // After walking link_older starting from head (if not already done) we
+      // will be able to traverse w->link_newer below.
+      CreateMissingNewerLinks(head);
+      assert(last_writer->link_newer != nullptr);
+      last_writer->link_newer->link_older = &dummy;
+      dummy.link_newer = last_writer->link_newer;
+    }
+
+    // Complete writers that don't write to memtable
+    for (Writer* w = last_writer; w != leader;) {
+      Writer* next = w->link_older;
+      w->status = status;
+      if (!w->ShouldWriteToMemtable()) {
+        CompleteFollower(w, write_group);
+      }
+      w = next;
+    }
+    if (!leader->ShouldWriteToMemtable()) {
+      CompleteLeader(write_group);
+    }
+
+    TEST_SYNC_POINT_CALLBACK(
+        "WriteThread::ExitAsBatchGroupLeader:AfterCompleteWriters",
+        &write_group);
+
+    // Link the remaining of the group to memtable writer list.
+    // We have to link our group to memtable writer queue before wake up the
+    // next leader or set newest_writer_ to null, otherwise the next leader
+    // can run ahead of us and link to memtable writer queue before we do.
+    if (write_group.size > 0) {
+      if (LinkGroup(write_group, &newest_memtable_writer_)) {
+        // The leader can now be different from current writer.
+        SetState(write_group.leader, STATE_MEMTABLE_WRITER_LEADER);
+      }
+    }
+
+    // Unlink the dummy writer from the list and identify the new leader
+    head = newest_writer_.load(std::memory_order_acquire);
+    if (head != &dummy ||
+        !newest_writer_.compare_exchange_strong(head, nullptr)) {
+      CreateMissingNewerLinks(head);
+      Writer* new_leader = dummy.link_newer;
+      assert(new_leader != nullptr);
+      new_leader->link_older = nullptr;
+      SetState(new_leader, STATE_GROUP_LEADER);
+    }
+
+    AwaitState(leader,
+               STATE_MEMTABLE_WRITER_LEADER | STATE_PARALLEL_MEMTABLE_WRITER |
+                   STATE_COMPLETED,
+               &eabgl_ctx);
+  } else {
+    Writer* head = newest_writer_.load(std::memory_order_acquire);
+    if (head != last_writer ||
+        !newest_writer_.compare_exchange_strong(head, nullptr)) {
+      // Either last_writer wasn't the head during the load(), or it was the
+      // head during the load() but somebody else pushed onto the list before
+      // we did the compare_exchange_strong (causing it to fail).  In the
+      // latter case compare_exchange_strong has the effect of re-reading
+      // its first param (head).  No need to retry a failing CAS, because
+      // only a departing leader (which we are at the moment) can remove
+      // nodes from the list.
+      assert(head != last_writer);
+
+      // After walking link_older starting from head (if not already done)
+      // we will be able to traverse w->link_newer below. This function
+      // can only be called from an active leader, only a leader can
+      // clear newest_writer_, we didn't, and only a clear newest_writer_
+      // could cause the next leader to start their work without a call
+      // to MarkJoined, so we can definitely conclude that no other leader
+      // work is going on here (with or without db mutex).
+      CreateMissingNewerLinks(head);
+      assert(last_writer->link_newer != nullptr);
+      assert(last_writer->link_newer->link_older == last_writer);
+      last_writer->link_newer->link_older = nullptr;
+
+      // Next leader didn't self-identify, because newest_writer_ wasn't
+      // nullptr when they enqueued (we were definitely enqueued before them
+      // and are still in the list).  That means leader handoff occurs when
+      // we call MarkJoined
+      SetState(last_writer->link_newer, STATE_GROUP_LEADER);
+    }
+    // else nobody else was waiting, although there might already be a new
+    // leader now
+
+    while (last_writer != leader) {
+      assert(last_writer);
+      last_writer->status = status;
+      // we need to read link_older before calling SetState, because as soon
+      // as it is marked committed the other thread's Await may return and
+      // deallocate the Writer.
+      auto next = last_writer->link_older;
+      SetState(last_writer, STATE_COMPLETED);
+
+      last_writer = next;
+    }
+  }
+}
+
+static WriteThread::AdaptationContext eu_ctx("EnterUnbatched");
+void WriteThread::EnterUnbatched(Writer* w, InstrumentedMutex* mu) {
+  assert(w != nullptr && w->batch == nullptr);
+  mu->Unlock();
+  bool linked_as_leader = LinkOne(w, &newest_writer_);
+  if (!linked_as_leader) {
+    TEST_SYNC_POINT("WriteThread::EnterUnbatched:Wait");
+    // Last leader will not pick us as a follower since our batch is nullptr
+    AwaitState(w, STATE_GROUP_LEADER, &eu_ctx);
+  }
+  if (enable_pipelined_write_) {
+    WaitForMemTableWriters();
+  }
+  mu->Lock();
+}
+
+void WriteThread::ExitUnbatched(Writer* w) {
+  assert(w != nullptr);
+  Writer* newest_writer = w;
+  if (!newest_writer_.compare_exchange_strong(newest_writer, nullptr)) {
+    CreateMissingNewerLinks(newest_writer);
+    Writer* next_leader = w->link_newer;
+    assert(next_leader != nullptr);
+    next_leader->link_older = nullptr;
+    SetState(next_leader, STATE_GROUP_LEADER);
+  }
+}
+
+static WriteThread::AdaptationContext wfmw_ctx("WaitForMemTableWriters");
+void WriteThread::WaitForMemTableWriters() {
+  assert(enable_pipelined_write_);
+  if (newest_memtable_writer_.load() == nullptr) {
+    return;
+  }
+  Writer w;
+  if (!LinkOne(&w, &newest_memtable_writer_)) {
+    AwaitState(&w, STATE_MEMTABLE_WRITER_LEADER, &wfmw_ctx);
+  }
+  newest_memtable_writer_.store(nullptr);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_thread.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_thread.h
new file mode 100644
index 0000000000..6e5805e376
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db/write_thread.h
@@ -0,0 +1,464 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <chrono>
+#include <condition_variable>
+#include <cstdint>
+#include <mutex>
+#include <type_traits>
+#include <vector>
+
+#include "db/dbformat.h"
+#include "db/post_memtable_callback.h"
+#include "db/pre_release_callback.h"
+#include "db/write_callback.h"
+#include "monitoring/instrumented_mutex.h"
+#include "rocksdb/options.h"
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+#include "rocksdb/write_batch.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class WriteThread {
+ public:
+  enum State : uint8_t {
+    // The initial state of a writer.  This is a Writer that is
+    // waiting in JoinBatchGroup.  This state can be left when another
+    // thread informs the waiter that it has become a group leader
+    // (-> STATE_GROUP_LEADER), when a leader that has chosen to be
+    // non-parallel informs a follower that its writes have been committed
+    // (-> STATE_COMPLETED), or when a leader that has chosen to perform
+    // updates in parallel and needs this Writer to apply its batch (->
+    // STATE_PARALLEL_MEMTABLE_WRITER).
+    STATE_INIT = 1,
+
+    // The state used to inform a waiting Writer that it has become the
+    // leader, and it should now build a write batch group.  Tricky:
+    // this state is not used if newest_writer_ is empty when a writer
+    // enqueues itself, because there is no need to wait (or even to
+    // create the mutex and condvar used to wait) in that case.  This is
+    // a terminal state unless the leader chooses to make this a parallel
+    // batch, in which case the last parallel worker to finish will move
+    // the leader to STATE_COMPLETED.
+    STATE_GROUP_LEADER = 2,
+
+    // The state used to inform a waiting writer that it has become the
+    // leader of memtable writer group. The leader will either write
+    // memtable for the whole group, or launch a parallel group write
+    // to memtable by calling LaunchParallelMemTableWrite.
+    STATE_MEMTABLE_WRITER_LEADER = 4,
+
+    // The state used to inform a waiting writer that it has become a
+    // parallel memtable writer. It can be the group leader who launch the
+    // parallel writer group, or one of the followers. The writer should then
+    // apply its batch to the memtable concurrently and call
+    // CompleteParallelMemTableWriter.
+    STATE_PARALLEL_MEMTABLE_WRITER = 8,
+
+    // A follower whose writes have been applied, or a parallel leader
+    // whose followers have all finished their work.  This is a terminal
+    // state.
+    STATE_COMPLETED = 16,
+
+    // A state indicating that the thread may be waiting using StateMutex()
+    // and StateCondVar()
+    STATE_LOCKED_WAITING = 32,
+  };
+
+  struct Writer;
+
+  struct WriteGroup {
+    Writer* leader = nullptr;
+    Writer* last_writer = nullptr;
+    SequenceNumber last_sequence;
+    // before running goes to zero, status needs leader->StateMutex()
+    Status status;
+    std::atomic<size_t> running;
+    size_t size = 0;
+
+    struct Iterator {
+      Writer* writer;
+      Writer* last_writer;
+
+      explicit Iterator(Writer* w, Writer* last)
+          : writer(w), last_writer(last) {}
+
+      Writer* operator*() const { return writer; }
+
+      Iterator& operator++() {
+        assert(writer != nullptr);
+        if (writer == last_writer) {
+          writer = nullptr;
+        } else {
+          writer = writer->link_newer;
+        }
+        return *this;
+      }
+
+      bool operator!=(const Iterator& other) const {
+        return writer != other.writer;
+      }
+    };
+
+    Iterator begin() const { return Iterator(leader, last_writer); }
+    Iterator end() const { return Iterator(nullptr, nullptr); }
+  };
+
+  // Information kept for every waiting writer.
+  struct Writer {
+    WriteBatch* batch;
+    bool sync;
+    bool no_slowdown;
+    bool disable_wal;
+    Env::IOPriority rate_limiter_priority;
+    bool disable_memtable;
+    size_t batch_cnt;  // if non-zero, number of sub-batches in the write batch
+    size_t protection_bytes_per_key;
+    PreReleaseCallback* pre_release_callback;
+    PostMemTableCallback* post_memtable_callback;
+    uint64_t log_used;  // log number that this batch was inserted into
+    uint64_t log_ref;   // log number that memtable insert should reference
+    WriteCallback* callback;
+    bool made_waitable;          // records lazy construction of mutex and cv
+    std::atomic<uint8_t> state;  // write under StateMutex() or pre-link
+    WriteGroup* write_group;
+    SequenceNumber sequence;  // the sequence number to use for the first key
+    Status status;
+    Status callback_status;  // status returned by callback->Callback()
+
+    std::aligned_storage<sizeof(std::mutex)>::type state_mutex_bytes;
+    std::aligned_storage<sizeof(std::condition_variable)>::type state_cv_bytes;
+    Writer* link_older;  // read/write only before linking, or as leader
+    Writer* link_newer;  // lazy, read/write only before linking, or as leader
+
+    Writer()
+        : batch(nullptr),
+          sync(false),
+          no_slowdown(false),
+          disable_wal(false),
+          rate_limiter_priority(Env::IOPriority::IO_TOTAL),
+          disable_memtable(false),
+          batch_cnt(0),
+          protection_bytes_per_key(0),
+          pre_release_callback(nullptr),
+          post_memtable_callback(nullptr),
+          log_used(0),
+          log_ref(0),
+          callback(nullptr),
+          made_waitable(false),
+          state(STATE_INIT),
+          write_group(nullptr),
+          sequence(kMaxSequenceNumber),
+          link_older(nullptr),
+          link_newer(nullptr) {}
+
+    Writer(const WriteOptions& write_options, WriteBatch* _batch,
+           WriteCallback* _callback, uint64_t _log_ref, bool _disable_memtable,
+           size_t _batch_cnt = 0,
+           PreReleaseCallback* _pre_release_callback = nullptr,
+           PostMemTableCallback* _post_memtable_callback = nullptr)
+        : batch(_batch),
+          sync(write_options.sync),
+          no_slowdown(write_options.no_slowdown),
+          disable_wal(write_options.disableWAL),
+          rate_limiter_priority(write_options.rate_limiter_priority),
+          disable_memtable(_disable_memtable),
+          batch_cnt(_batch_cnt),
+          protection_bytes_per_key(_batch->GetProtectionBytesPerKey()),
+          pre_release_callback(_pre_release_callback),
+          post_memtable_callback(_post_memtable_callback),
+          log_used(0),
+          log_ref(_log_ref),
+          callback(_callback),
+          made_waitable(false),
+          state(STATE_INIT),
+          write_group(nullptr),
+          sequence(kMaxSequenceNumber),
+          link_older(nullptr),
+          link_newer(nullptr) {}
+
+    ~Writer() {
+      if (made_waitable) {
+        StateMutex().~mutex();
+        StateCV().~condition_variable();
+      }
+      status.PermitUncheckedError();
+      callback_status.PermitUncheckedError();
+    }
+
+    bool CheckCallback(DB* db) {
+      if (callback != nullptr) {
+        callback_status = callback->Callback(db);
+      }
+      return callback_status.ok();
+    }
+
+    void CreateMutex() {
+      if (!made_waitable) {
+        // Note that made_waitable is tracked separately from state
+        // transitions, because we can't atomically create the mutex and
+        // link into the list.
+        made_waitable = true;
+        new (&state_mutex_bytes) std::mutex;
+        new (&state_cv_bytes) std::condition_variable;
+      }
+    }
+
+    // returns the aggregate status of this Writer
+    Status FinalStatus() {
+      if (!status.ok()) {
+        // a non-ok memtable write status takes presidence
+        assert(callback == nullptr || callback_status.ok());
+        return status;
+      } else if (!callback_status.ok()) {
+        // if the callback failed then that is the status we want
+        // because a memtable insert should not have been attempted
+        assert(callback != nullptr);
+        assert(status.ok());
+        return callback_status;
+      } else {
+        // if there is no callback then we only care about
+        // the memtable insert status
+        assert(callback == nullptr || callback_status.ok());
+        return status;
+      }
+    }
+
+    bool CallbackFailed() {
+      return (callback != nullptr) && !callback_status.ok();
+    }
+
+    bool ShouldWriteToMemtable() {
+      return status.ok() && !CallbackFailed() && !disable_memtable;
+    }
+
+    bool ShouldWriteToWAL() {
+      return status.ok() && !CallbackFailed() && !disable_wal;
+    }
+
+    // No other mutexes may be acquired while holding StateMutex(), it is
+    // always last in the order
+    std::mutex& StateMutex() {
+      assert(made_waitable);
+      return *static_cast<std::mutex*>(static_cast<void*>(&state_mutex_bytes));
+    }
+
+    std::condition_variable& StateCV() {
+      assert(made_waitable);
+      return *static_cast<std::condition_variable*>(
+          static_cast<void*>(&state_cv_bytes));
+    }
+  };
+
+  struct AdaptationContext {
+    const char* name;
+    std::atomic<int32_t> value;
+
+    explicit AdaptationContext(const char* name0) : name(name0), value(0) {}
+  };
+
+  explicit WriteThread(const ImmutableDBOptions& db_options);
+
+  virtual ~WriteThread() = default;
+
+  // IMPORTANT: None of the methods in this class rely on the db mutex
+  // for correctness. All of the methods except JoinBatchGroup and
+  // EnterUnbatched may be called either with or without the db mutex held.
+  // Correctness is maintained by ensuring that only a single thread is
+  // a leader at a time.
+
+  // Registers w as ready to become part of a batch group, waits until the
+  // caller should perform some work, and returns the current state of the
+  // writer.  If w has become the leader of a write batch group, returns
+  // STATE_GROUP_LEADER.  If w has been made part of a sequential batch
+  // group and the leader has performed the write, returns STATE_DONE.
+  // If w has been made part of a parallel batch group and is responsible
+  // for updating the memtable, returns STATE_PARALLEL_MEMTABLE_WRITER.
+  //
+  // The db mutex SHOULD NOT be held when calling this function, because
+  // it will block.
+  //
+  // Writer* w:        Writer to be executed as part of a batch group
+  void JoinBatchGroup(Writer* w);
+
+  // Constructs a write batch group led by leader, which should be a
+  // Writer passed to JoinBatchGroup on the current thread.
+  //
+  // Writer* leader:          Writer that is STATE_GROUP_LEADER
+  // WriteGroup* write_group: Out-param of group members
+  // returns:                 Total batch group byte size
+  size_t EnterAsBatchGroupLeader(Writer* leader, WriteGroup* write_group);
+
+  // Unlinks the Writer-s in a batch group, wakes up the non-leaders,
+  // and wakes up the next leader (if any).
+  //
+  // WriteGroup* write_group: the write group
+  // Status status:           Status of write operation
+  void ExitAsBatchGroupLeader(WriteGroup& write_group, Status& status);
+
+  // Exit batch group on behalf of batch group leader.
+  void ExitAsBatchGroupFollower(Writer* w);
+
+  // Constructs a write batch group led by leader from newest_memtable_writers_
+  // list. The leader should either write memtable for the whole group and
+  // call ExitAsMemTableWriter, or launch parallel memtable write through
+  // LaunchParallelMemTableWriters.
+  void EnterAsMemTableWriter(Writer* leader, WriteGroup* write_grup);
+
+  // Memtable writer group leader, or the last finished writer in a parallel
+  // write group, exit from the newest_memtable_writers_ list, and wake up
+  // the next leader if needed.
+  void ExitAsMemTableWriter(Writer* self, WriteGroup& write_group);
+
+  // Causes JoinBatchGroup to return STATE_PARALLEL_MEMTABLE_WRITER for all of
+  // the non-leader members of this write batch group.  Sets Writer::sequence
+  // before waking them up.
+  //
+  // WriteGroup* write_group: Extra state used to coordinate the parallel add
+  void LaunchParallelMemTableWriters(WriteGroup* write_group);
+
+  // Reports the completion of w's batch to the parallel group leader, and
+  // waits for the rest of the parallel batch to complete.  Returns true
+  // if this thread is the last to complete, and hence should advance
+  // the sequence number and then call EarlyExitParallelGroup, false if
+  // someone else has already taken responsibility for that.
+  bool CompleteParallelMemTableWriter(Writer* w);
+
+  // Waits for all preceding writers (unlocking mu while waiting), then
+  // registers w as the currently proceeding writer.
+  //
+  // Writer* w:              A Writer not eligible for batching
+  // InstrumentedMutex* mu:  The db mutex, to unlock while waiting
+  // REQUIRES: db mutex held
+  void EnterUnbatched(Writer* w, InstrumentedMutex* mu);
+
+  // Completes a Writer begun with EnterUnbatched, unblocking subsequent
+  // writers.
+  void ExitUnbatched(Writer* w);
+
+  // Wait for all parallel memtable writers to finish, in case pipelined
+  // write is enabled.
+  void WaitForMemTableWriters();
+
+  SequenceNumber UpdateLastSequence(SequenceNumber sequence) {
+    if (sequence > last_sequence_) {
+      last_sequence_ = sequence;
+    }
+    return last_sequence_;
+  }
+
+  // Insert a dummy writer at the tail of the write queue to indicate a write
+  // stall, and fail any writers in the queue with no_slowdown set to true
+  // REQUIRES: db mutex held, no other stall on this queue outstanding
+  void BeginWriteStall();
+
+  // Remove the dummy writer and wake up waiting writers
+  // REQUIRES: db mutex held
+  void EndWriteStall();
+
+  // Number of BeginWriteStall(), or 0 if there is no active stall in the
+  // write queue.
+  // REQUIRES: db mutex held
+  uint64_t GetBegunCountOfOutstandingStall();
+
+  // Wait for number of completed EndWriteStall() to reach >= `stall_count`,
+  // which will generally have come from GetBegunCountOfOutstandingStall().
+  // (Does not require db mutex held)
+  void WaitForStallEndedCount(uint64_t stall_count);
+
+ private:
+  // See AwaitState.
+  const uint64_t max_yield_usec_;
+  const uint64_t slow_yield_usec_;
+
+  // Allow multiple writers write to memtable concurrently.
+  const bool allow_concurrent_memtable_write_;
+
+  // Enable pipelined write to WAL and memtable.
+  const bool enable_pipelined_write_;
+
+  // The maximum limit of number of bytes that are written in a single batch
+  // of WAL or memtable write. It is followed when the leader write size
+  // is larger than 1/8 of this limit.
+  const uint64_t max_write_batch_group_size_bytes;
+
+  // Points to the newest pending writer. Only leader can remove
+  // elements, adding can be done lock-free by anybody.
+  std::atomic<Writer*> newest_writer_;
+
+  // Points to the newest pending memtable writer. Used only when pipelined
+  // write is enabled.
+  std::atomic<Writer*> newest_memtable_writer_;
+
+  // The last sequence that have been consumed by a writer. The sequence
+  // is not necessary visible to reads because the writer can be ongoing.
+  SequenceNumber last_sequence_;
+
+  // A dummy writer to indicate a write stall condition. This will be inserted
+  // at the tail of the writer queue by the leader, so newer writers can just
+  // check for this and bail
+  Writer write_stall_dummy_;
+
+  // Mutex and condvar for writers to block on a write stall. During a write
+  // stall, writers with no_slowdown set to false will wait on this rather
+  // on the writer queue
+  port::Mutex stall_mu_;
+  port::CondVar stall_cv_;
+
+  // Count the number of stalls begun, so that we can check whether
+  // a particular stall has cleared (even if caught in another stall).
+  // Controlled by DB mutex.
+  // Because of the contract on BeginWriteStall() / EndWriteStall(),
+  // stall_ended_count_ <= stall_begun_count_ <= stall_ended_count_ + 1.
+  uint64_t stall_begun_count_ = 0;
+  // Count the number of stalls ended, so that we can check whether
+  // a particular stall has cleared (even if caught in another stall).
+  // Writes controlled by DB mutex + stall_mu_, signalled by stall_cv_.
+  // Read with stall_mu or DB mutex.
+  uint64_t stall_ended_count_ = 0;
+
+  // Waits for w->state & goal_mask using w->StateMutex().  Returns
+  // the state that satisfies goal_mask.
+  uint8_t BlockingAwaitState(Writer* w, uint8_t goal_mask);
+
+  // Blocks until w->state & goal_mask, returning the state value
+  // that satisfied the predicate.  Uses ctx to adaptively use
+  // std::this_thread::yield() to avoid mutex overheads.  ctx should be
+  // a context-dependent static.
+  uint8_t AwaitState(Writer* w, uint8_t goal_mask, AdaptationContext* ctx);
+
+  // Set writer state and wake the writer up if it is waiting.
+  void SetState(Writer* w, uint8_t new_state);
+
+  // Links w into the newest_writer list. Return true if w was linked directly
+  // into the leader position.  Safe to call from multiple threads without
+  // external locking.
+  bool LinkOne(Writer* w, std::atomic<Writer*>* newest_writer);
+
+  // Link write group into the newest_writer list as a whole, while keeping the
+  // order of the writers unchanged. Return true if the group was linked
+  // directly into the leader position.
+  bool LinkGroup(WriteGroup& write_group, std::atomic<Writer*>* newest_writer);
+
+  // Computes any missing link_newer links.  Should not be called
+  // concurrently with itself.
+  void CreateMissingNewerLinks(Writer* head);
+
+  // Set the leader in write_group to completed state and remove it from the
+  // write group.
+  void CompleteLeader(WriteGroup& write_group);
+
+  // Set a follower in write_group to completed state and remove it from the
+  // write group.
+  void CompleteFollower(Writer* w, WriteGroup& write_group);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_common.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_common.h
new file mode 100644
index 0000000000..8756932a7e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_common.h
@@ -0,0 +1,674 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// The test uses an array to compare against values written to the database.
+// Keys written to the array are in 1:1 correspondence to the actual values in
+// the database according to the formula in the function GenerateValue.
+
+// Space is reserved in the array from 0 to FLAGS_max_key and values are
+// randomly written/deleted/read from those positions. During verification we
+// compare all the positions in the array. To shorten/elongate the running
+// time, you could change the settings: FLAGS_max_key, FLAGS_ops_per_thread,
+// (sometimes also FLAGS_threads).
+//
+// NOTE that if FLAGS_test_batches_snapshots is set, the test will have
+// different behavior. See comment of the flag for details.
+
+#ifdef GFLAGS
+#pragma once
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <array>
+#include <chrono>
+#include <cinttypes>
+#include <exception>
+#include <queue>
+#include <thread>
+
+#include "db/db_impl/db_impl.h"
+#include "db/version_set.h"
+#include "db_stress_tool/db_stress_env_wrapper.h"
+#include "db_stress_tool/db_stress_listener.h"
+#include "db_stress_tool/db_stress_shared_state.h"
+#include "db_stress_tool/db_stress_test_base.h"
+#include "logging/logging.h"
+#include "monitoring/histogram.h"
+#include "options/options_helper.h"
+#include "port/port.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/env.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/utilities/backup_engine.h"
+#include "rocksdb/utilities/checkpoint.h"
+#include "rocksdb/utilities/db_ttl.h"
+#include "rocksdb/utilities/debug.h"
+#include "rocksdb/utilities/options_util.h"
+#include "rocksdb/utilities/transaction.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "rocksdb/write_batch.h"
+#include "test_util/testutil.h"
+#include "util/coding.h"
+#include "util/compression.h"
+#include "util/crc32c.h"
+#include "util/gflags_compat.h"
+#include "util/mutexlock.h"
+#include "util/random.h"
+#include "util/string_util.h"
+#include "utilities/blob_db/blob_db.h"
+#include "utilities/fault_injection_fs.h"
+#include "utilities/merge_operators.h"
+
+using GFLAGS_NAMESPACE::ParseCommandLineFlags;
+using GFLAGS_NAMESPACE::RegisterFlagValidator;
+using GFLAGS_NAMESPACE::SetUsageMessage;
+
+DECLARE_uint64(seed);
+DECLARE_bool(read_only);
+DECLARE_int64(max_key);
+DECLARE_double(hot_key_alpha);
+DECLARE_int32(max_key_len);
+DECLARE_string(key_len_percent_dist);
+DECLARE_int32(key_window_scale_factor);
+DECLARE_int32(column_families);
+DECLARE_string(options_file);
+DECLARE_int64(active_width);
+DECLARE_bool(test_batches_snapshots);
+DECLARE_bool(atomic_flush);
+DECLARE_int32(manual_wal_flush_one_in);
+DECLARE_int32(lock_wal_one_in);
+DECLARE_bool(test_cf_consistency);
+DECLARE_bool(test_multi_ops_txns);
+DECLARE_int32(threads);
+DECLARE_int32(ttl);
+DECLARE_int32(value_size_mult);
+DECLARE_int32(compaction_readahead_size);
+DECLARE_bool(enable_pipelined_write);
+DECLARE_bool(verify_before_write);
+DECLARE_bool(histogram);
+DECLARE_bool(destroy_db_initially);
+DECLARE_bool(verbose);
+DECLARE_bool(progress_reports);
+DECLARE_uint64(db_write_buffer_size);
+DECLARE_int32(write_buffer_size);
+DECLARE_int32(max_write_buffer_number);
+DECLARE_int32(min_write_buffer_number_to_merge);
+DECLARE_int32(max_write_buffer_number_to_maintain);
+DECLARE_int64(max_write_buffer_size_to_maintain);
+DECLARE_double(memtable_prefix_bloom_size_ratio);
+DECLARE_bool(memtable_whole_key_filtering);
+DECLARE_int32(open_files);
+DECLARE_int64(compressed_cache_size);
+DECLARE_int32(compressed_cache_numshardbits);
+DECLARE_int32(compaction_style);
+DECLARE_int32(compaction_pri);
+DECLARE_int32(num_levels);
+DECLARE_int32(level0_file_num_compaction_trigger);
+DECLARE_int32(level0_slowdown_writes_trigger);
+DECLARE_int32(level0_stop_writes_trigger);
+DECLARE_int32(block_size);
+DECLARE_int32(format_version);
+DECLARE_int32(index_block_restart_interval);
+DECLARE_bool(disable_auto_compactions);
+DECLARE_int32(max_background_compactions);
+DECLARE_int32(num_bottom_pri_threads);
+DECLARE_int32(compaction_thread_pool_adjust_interval);
+DECLARE_int32(compaction_thread_pool_variations);
+DECLARE_int32(max_background_flushes);
+DECLARE_int32(universal_size_ratio);
+DECLARE_int32(universal_min_merge_width);
+DECLARE_int32(universal_max_merge_width);
+DECLARE_int32(universal_max_size_amplification_percent);
+DECLARE_int32(clear_column_family_one_in);
+DECLARE_int32(get_live_files_one_in);
+DECLARE_int32(get_sorted_wal_files_one_in);
+DECLARE_int32(get_current_wal_file_one_in);
+DECLARE_int32(set_options_one_in);
+DECLARE_int32(set_in_place_one_in);
+DECLARE_int64(cache_size);
+DECLARE_int32(cache_numshardbits);
+DECLARE_bool(cache_index_and_filter_blocks);
+DECLARE_bool(charge_compression_dictionary_building_buffer);
+DECLARE_bool(charge_filter_construction);
+DECLARE_bool(charge_table_reader);
+DECLARE_bool(charge_file_metadata);
+DECLARE_bool(charge_blob_cache);
+DECLARE_int32(top_level_index_pinning);
+DECLARE_int32(partition_pinning);
+DECLARE_int32(unpartitioned_pinning);
+DECLARE_string(cache_type);
+DECLARE_uint64(subcompactions);
+DECLARE_uint64(periodic_compaction_seconds);
+DECLARE_uint64(compaction_ttl);
+DECLARE_bool(fifo_allow_compaction);
+DECLARE_bool(allow_concurrent_memtable_write);
+DECLARE_double(experimental_mempurge_threshold);
+DECLARE_bool(enable_write_thread_adaptive_yield);
+DECLARE_int32(reopen);
+DECLARE_double(bloom_bits);
+DECLARE_int32(ribbon_starting_level);
+DECLARE_bool(partition_filters);
+DECLARE_bool(optimize_filters_for_memory);
+DECLARE_bool(detect_filter_construct_corruption);
+DECLARE_int32(index_type);
+DECLARE_int32(data_block_index_type);
+DECLARE_string(db);
+DECLARE_string(secondaries_base);
+DECLARE_bool(test_secondary);
+DECLARE_string(expected_values_dir);
+DECLARE_bool(verify_checksum);
+DECLARE_bool(mmap_read);
+DECLARE_bool(mmap_write);
+DECLARE_bool(use_direct_reads);
+DECLARE_bool(use_direct_io_for_flush_and_compaction);
+DECLARE_bool(mock_direct_io);
+DECLARE_bool(statistics);
+DECLARE_bool(sync);
+DECLARE_bool(use_fsync);
+DECLARE_uint64(stats_dump_period_sec);
+DECLARE_uint64(bytes_per_sync);
+DECLARE_uint64(wal_bytes_per_sync);
+DECLARE_int32(kill_random_test);
+DECLARE_string(kill_exclude_prefixes);
+DECLARE_bool(disable_wal);
+DECLARE_uint64(recycle_log_file_num);
+DECLARE_int64(target_file_size_base);
+DECLARE_int32(target_file_size_multiplier);
+DECLARE_uint64(max_bytes_for_level_base);
+DECLARE_double(max_bytes_for_level_multiplier);
+DECLARE_int32(range_deletion_width);
+DECLARE_uint64(rate_limiter_bytes_per_sec);
+DECLARE_bool(rate_limit_bg_reads);
+DECLARE_bool(rate_limit_user_ops);
+DECLARE_bool(rate_limit_auto_wal_flush);
+DECLARE_uint64(sst_file_manager_bytes_per_sec);
+DECLARE_uint64(sst_file_manager_bytes_per_truncate);
+DECLARE_bool(use_txn);
+DECLARE_uint64(txn_write_policy);
+DECLARE_bool(unordered_write);
+DECLARE_int32(backup_one_in);
+DECLARE_uint64(backup_max_size);
+DECLARE_int32(checkpoint_one_in);
+DECLARE_int32(ingest_external_file_one_in);
+DECLARE_int32(ingest_external_file_width);
+DECLARE_int32(compact_files_one_in);
+DECLARE_int32(compact_range_one_in);
+DECLARE_int32(mark_for_compaction_one_file_in);
+DECLARE_int32(flush_one_in);
+DECLARE_int32(pause_background_one_in);
+DECLARE_int32(compact_range_width);
+DECLARE_int32(acquire_snapshot_one_in);
+DECLARE_bool(compare_full_db_state_snapshot);
+DECLARE_uint64(snapshot_hold_ops);
+DECLARE_bool(long_running_snapshots);
+DECLARE_bool(use_multiget);
+DECLARE_bool(use_get_entity);
+DECLARE_bool(use_multi_get_entity);
+DECLARE_int32(readpercent);
+DECLARE_int32(prefixpercent);
+DECLARE_int32(writepercent);
+DECLARE_int32(delpercent);
+DECLARE_int32(delrangepercent);
+DECLARE_int32(nooverwritepercent);
+DECLARE_int32(iterpercent);
+DECLARE_uint64(num_iterations);
+DECLARE_int32(customopspercent);
+DECLARE_string(compression_type);
+DECLARE_string(bottommost_compression_type);
+DECLARE_int32(compression_max_dict_bytes);
+DECLARE_int32(compression_zstd_max_train_bytes);
+DECLARE_int32(compression_parallel_threads);
+DECLARE_uint64(compression_max_dict_buffer_bytes);
+DECLARE_bool(compression_use_zstd_dict_trainer);
+DECLARE_string(checksum_type);
+DECLARE_string(env_uri);
+DECLARE_string(fs_uri);
+DECLARE_uint64(ops_per_thread);
+DECLARE_uint64(log2_keys_per_lock);
+DECLARE_uint64(max_manifest_file_size);
+DECLARE_bool(in_place_update);
+DECLARE_string(memtablerep);
+DECLARE_int32(prefix_size);
+DECLARE_bool(use_merge);
+DECLARE_uint32(use_put_entity_one_in);
+DECLARE_bool(use_full_merge_v1);
+DECLARE_int32(sync_wal_one_in);
+DECLARE_bool(avoid_unnecessary_blocking_io);
+DECLARE_bool(write_dbid_to_manifest);
+DECLARE_bool(avoid_flush_during_recovery);
+DECLARE_uint64(max_write_batch_group_size_bytes);
+DECLARE_bool(level_compaction_dynamic_level_bytes);
+DECLARE_int32(verify_checksum_one_in);
+DECLARE_int32(verify_db_one_in);
+DECLARE_int32(continuous_verification_interval);
+DECLARE_int32(get_property_one_in);
+DECLARE_string(file_checksum_impl);
+
+// Options for StackableDB-based BlobDB
+DECLARE_bool(use_blob_db);
+DECLARE_uint64(blob_db_min_blob_size);
+DECLARE_uint64(blob_db_bytes_per_sync);
+DECLARE_uint64(blob_db_file_size);
+DECLARE_bool(blob_db_enable_gc);
+DECLARE_double(blob_db_gc_cutoff);
+
+// Options for integrated BlobDB
+DECLARE_bool(allow_setting_blob_options_dynamically);
+DECLARE_bool(enable_blob_files);
+DECLARE_uint64(min_blob_size);
+DECLARE_uint64(blob_file_size);
+DECLARE_string(blob_compression_type);
+DECLARE_bool(enable_blob_garbage_collection);
+DECLARE_double(blob_garbage_collection_age_cutoff);
+DECLARE_double(blob_garbage_collection_force_threshold);
+DECLARE_uint64(blob_compaction_readahead_size);
+DECLARE_int32(blob_file_starting_level);
+DECLARE_bool(use_blob_cache);
+DECLARE_bool(use_shared_block_and_blob_cache);
+DECLARE_uint64(blob_cache_size);
+DECLARE_int32(blob_cache_numshardbits);
+DECLARE_int32(prepopulate_blob_cache);
+
+DECLARE_int32(approximate_size_one_in);
+DECLARE_bool(sync_fault_injection);
+
+DECLARE_bool(best_efforts_recovery);
+DECLARE_bool(skip_verifydb);
+DECLARE_bool(enable_compaction_filter);
+DECLARE_bool(paranoid_file_checks);
+DECLARE_bool(fail_if_options_file_error);
+DECLARE_uint64(batch_protection_bytes_per_key);
+DECLARE_uint32(memtable_protection_bytes_per_key);
+DECLARE_uint32(block_protection_bytes_per_key);
+
+DECLARE_uint64(user_timestamp_size);
+DECLARE_string(secondary_cache_uri);
+DECLARE_int32(secondary_cache_fault_one_in);
+
+DECLARE_int32(prepopulate_block_cache);
+
+DECLARE_bool(two_write_queues);
+DECLARE_bool(use_only_the_last_commit_time_batch_for_recovery);
+DECLARE_uint64(wp_snapshot_cache_bits);
+DECLARE_uint64(wp_commit_cache_bits);
+
+DECLARE_bool(adaptive_readahead);
+DECLARE_bool(async_io);
+DECLARE_string(wal_compression);
+DECLARE_bool(verify_sst_unique_id_in_manifest);
+
+DECLARE_int32(create_timestamped_snapshot_one_in);
+
+DECLARE_bool(allow_data_in_errors);
+
+DECLARE_bool(enable_thread_tracking);
+
+// Tiered storage
+DECLARE_bool(enable_tiered_storage);  // set last_level_temperature
+DECLARE_int64(preclude_last_level_data_seconds);
+DECLARE_int64(preserve_internal_time_seconds);
+
+DECLARE_int32(verify_iterator_with_expected_state_one_in);
+DECLARE_bool(preserve_unverified_changes);
+
+DECLARE_uint64(readahead_size);
+DECLARE_uint64(initial_auto_readahead_size);
+DECLARE_uint64(max_auto_readahead_size);
+DECLARE_uint64(num_file_reads_for_auto_readahead);
+DECLARE_bool(use_io_uring);
+
+constexpr long KB = 1024;
+constexpr int kRandomValueMaxFactor = 3;
+constexpr int kValueMaxLen = 100;
+
+// wrapped posix environment
+extern ROCKSDB_NAMESPACE::Env* db_stress_env;
+extern ROCKSDB_NAMESPACE::Env* db_stress_listener_env;
+extern std::shared_ptr<ROCKSDB_NAMESPACE::FaultInjectionTestFS> fault_fs_guard;
+
+extern enum ROCKSDB_NAMESPACE::CompressionType compression_type_e;
+extern enum ROCKSDB_NAMESPACE::CompressionType bottommost_compression_type_e;
+extern enum ROCKSDB_NAMESPACE::ChecksumType checksum_type_e;
+
+enum RepFactory { kSkipList, kHashSkipList, kVectorRep };
+
+inline enum RepFactory StringToRepFactory(const char* ctype) {
+  assert(ctype);
+
+  if (!strcasecmp(ctype, "skip_list"))
+    return kSkipList;
+  else if (!strcasecmp(ctype, "prefix_hash"))
+    return kHashSkipList;
+  else if (!strcasecmp(ctype, "vector"))
+    return kVectorRep;
+
+  fprintf(stdout, "Cannot parse memreptable %s\n", ctype);
+  return kSkipList;
+}
+
+extern enum RepFactory FLAGS_rep_factory;
+
+namespace ROCKSDB_NAMESPACE {
+inline enum ROCKSDB_NAMESPACE::CompressionType StringToCompressionType(
+    const char* ctype) {
+  assert(ctype);
+
+  ROCKSDB_NAMESPACE::CompressionType ret_compression_type;
+
+  if (!strcasecmp(ctype, "disable")) {
+    ret_compression_type = ROCKSDB_NAMESPACE::kDisableCompressionOption;
+  } else if (!strcasecmp(ctype, "none")) {
+    ret_compression_type = ROCKSDB_NAMESPACE::kNoCompression;
+  } else if (!strcasecmp(ctype, "snappy")) {
+    ret_compression_type = ROCKSDB_NAMESPACE::kSnappyCompression;
+  } else if (!strcasecmp(ctype, "zlib")) {
+    ret_compression_type = ROCKSDB_NAMESPACE::kZlibCompression;
+  } else if (!strcasecmp(ctype, "bzip2")) {
+    ret_compression_type = ROCKSDB_NAMESPACE::kBZip2Compression;
+  } else if (!strcasecmp(ctype, "lz4")) {
+    ret_compression_type = ROCKSDB_NAMESPACE::kLZ4Compression;
+  } else if (!strcasecmp(ctype, "lz4hc")) {
+    ret_compression_type = ROCKSDB_NAMESPACE::kLZ4HCCompression;
+  } else if (!strcasecmp(ctype, "xpress")) {
+    ret_compression_type = ROCKSDB_NAMESPACE::kXpressCompression;
+  } else if (!strcasecmp(ctype, "zstd")) {
+    ret_compression_type = ROCKSDB_NAMESPACE::kZSTD;
+  } else {
+    fprintf(stderr, "Cannot parse compression type '%s'\n", ctype);
+    ret_compression_type =
+        ROCKSDB_NAMESPACE::kSnappyCompression;  // default value
+  }
+  if (ret_compression_type != ROCKSDB_NAMESPACE::kDisableCompressionOption &&
+      !CompressionTypeSupported(ret_compression_type)) {
+    // Use no compression will be more portable but considering this is
+    // only a stress test and snappy is widely available. Use snappy here.
+    ret_compression_type = ROCKSDB_NAMESPACE::kSnappyCompression;
+  }
+  return ret_compression_type;
+}
+
+inline enum ROCKSDB_NAMESPACE::ChecksumType StringToChecksumType(
+    const char* ctype) {
+  assert(ctype);
+  auto iter = ROCKSDB_NAMESPACE::checksum_type_string_map.find(ctype);
+  if (iter != ROCKSDB_NAMESPACE::checksum_type_string_map.end()) {
+    return iter->second;
+  }
+  fprintf(stderr, "Cannot parse checksum type '%s'\n", ctype);
+  return ROCKSDB_NAMESPACE::kCRC32c;
+}
+
+inline std::string ChecksumTypeToString(ROCKSDB_NAMESPACE::ChecksumType ctype) {
+  auto iter = std::find_if(
+      ROCKSDB_NAMESPACE::checksum_type_string_map.begin(),
+      ROCKSDB_NAMESPACE::checksum_type_string_map.end(),
+      [&](const std::pair<std::string, ROCKSDB_NAMESPACE::ChecksumType>&
+              name_and_enum_val) { return name_and_enum_val.second == ctype; });
+  assert(iter != ROCKSDB_NAMESPACE::checksum_type_string_map.end());
+  return iter->first;
+}
+
+inline std::vector<std::string> SplitString(std::string src) {
+  std::vector<std::string> ret;
+  if (src.empty()) {
+    return ret;
+  }
+  size_t pos = 0;
+  size_t pos_comma;
+  while ((pos_comma = src.find(',', pos)) != std::string::npos) {
+    ret.push_back(src.substr(pos, pos_comma - pos));
+    pos = pos_comma + 1;
+  }
+  ret.push_back(src.substr(pos, src.length()));
+  return ret;
+}
+
+#ifdef _MSC_VER
+#pragma warning(push)
+// truncation of constant value on static_cast
+#pragma warning(disable : 4309)
+#endif
+inline bool GetNextPrefix(const ROCKSDB_NAMESPACE::Slice& src, std::string* v) {
+  std::string ret = src.ToString();
+  for (int i = static_cast<int>(ret.size()) - 1; i >= 0; i--) {
+    if (ret[i] != static_cast<char>(255)) {
+      ret[i] = ret[i] + 1;
+      break;
+    } else if (i != 0) {
+      ret[i] = 0;
+    } else {
+      // all FF. No next prefix
+      return false;
+    }
+  }
+  *v = ret;
+  return true;
+}
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+// Append `val` to `*key` in fixed-width big-endian format
+extern inline void AppendIntToString(uint64_t val, std::string* key) {
+  // PutFixed64 uses little endian
+  PutFixed64(key, val);
+  // Reverse to get big endian
+  char* int_data = &((*key)[key->size() - sizeof(uint64_t)]);
+  for (size_t i = 0; i < sizeof(uint64_t) / 2; ++i) {
+    std::swap(int_data[i], int_data[sizeof(uint64_t) - 1 - i]);
+  }
+}
+
+// A struct for maintaining the parameters for generating variable length keys
+struct KeyGenContext {
+  // Number of adjacent keys in one cycle of key lengths
+  uint64_t window;
+  // Number of keys of each possible length in a given window
+  std::vector<uint64_t> weights;
+};
+extern KeyGenContext key_gen_ctx;
+
+// Generate a variable length key string from the given int64 val. The
+// order of the keys is preserved. The key could be anywhere from 8 to
+// max_key_len * 8 bytes.
+// The algorithm picks the length based on the
+// offset of the val within a configured window and the distribution of the
+// number of keys of various lengths in that window. For example, if x, y, x are
+// the weights assigned to each possible key length, the keys generated would be
+// - {0}...{x-1}
+// {(x-1),0}..{(x-1),(y-1)},{(x-1),(y-1),0}..{(x-1),(y-1),(z-1)} and so on.
+// Additionally, a trailer of 0-7 bytes could be appended.
+extern inline std::string Key(int64_t val) {
+  uint64_t window = key_gen_ctx.window;
+  size_t levels = key_gen_ctx.weights.size();
+  std::string key;
+  // Over-reserve and for now do not bother `shrink_to_fit()` since the key
+  // strings are transient.
+  key.reserve(FLAGS_max_key_len * 8);
+
+  uint64_t window_idx = static_cast<uint64_t>(val) / window;
+  uint64_t offset = static_cast<uint64_t>(val) % window;
+  for (size_t level = 0; level < levels; ++level) {
+    uint64_t weight = key_gen_ctx.weights[level];
+    uint64_t pfx;
+    if (level == 0) {
+      pfx = window_idx * weight;
+    } else {
+      pfx = 0;
+    }
+    pfx += offset >= weight ? weight - 1 : offset;
+    AppendIntToString(pfx, &key);
+    if (offset < weight) {
+      // Use the bottom 3 bits of offset as the number of trailing 'x's in the
+      // key. If the next key is going to be of the next level, then skip the
+      // trailer as it would break ordering. If the key length is already at
+      // max, skip the trailer.
+      if (offset < weight - 1 && level < levels - 1) {
+        size_t trailer_len = offset & 0x7;
+        key.append(trailer_len, 'x');
+      }
+      break;
+    }
+    offset -= weight;
+  }
+
+  return key;
+}
+
+// Given a string key, map it to an index into the expected values buffer
+extern inline bool GetIntVal(std::string big_endian_key, uint64_t* key_p) {
+  size_t size_key = big_endian_key.size();
+  std::vector<uint64_t> prefixes;
+
+  assert(size_key <= key_gen_ctx.weights.size() * sizeof(uint64_t));
+
+  std::string little_endian_key;
+  little_endian_key.resize(size_key);
+  for (size_t start = 0; start + sizeof(uint64_t) <= size_key;
+       start += sizeof(uint64_t)) {
+    size_t end = start + sizeof(uint64_t);
+    for (size_t i = 0; i < sizeof(uint64_t); ++i) {
+      little_endian_key[start + i] = big_endian_key[end - 1 - i];
+    }
+    Slice little_endian_slice =
+        Slice(&little_endian_key[start], sizeof(uint64_t));
+    uint64_t pfx;
+    if (!GetFixed64(&little_endian_slice, &pfx)) {
+      return false;
+    }
+    prefixes.emplace_back(pfx);
+  }
+
+  uint64_t key = 0;
+  for (size_t i = 0; i < prefixes.size(); ++i) {
+    uint64_t pfx = prefixes[i];
+    key += (pfx / key_gen_ctx.weights[i]) * key_gen_ctx.window +
+           pfx % key_gen_ctx.weights[i];
+    if (i < prefixes.size() - 1) {
+      // The encoding writes a `key_gen_ctx.weights[i] - 1` that counts for
+      // `key_gen_ctx.weights[i]` when there are more prefixes to come. So we
+      // need to add back the one here as we're at a non-last prefix.
+      ++key;
+    }
+  }
+  *key_p = key;
+  return true;
+}
+
+// Given a string prefix, map it to the first corresponding index in the
+// expected values buffer.
+inline bool GetFirstIntValInPrefix(std::string big_endian_prefix,
+                                   uint64_t* key_p) {
+  size_t size_key = big_endian_prefix.size();
+  // Pad with zeros to make it a multiple of 8. This function may be called
+  // with a prefix, in which case we return the first index that falls
+  // inside or outside that prefix, dependeing on whether the prefix is
+  // the start of upper bound of a scan
+  unsigned int pad = sizeof(uint64_t) - (size_key % sizeof(uint64_t));
+  if (pad < sizeof(uint64_t)) {
+    big_endian_prefix.append(pad, '\0');
+  }
+  return GetIntVal(std::move(big_endian_prefix), key_p);
+}
+
+extern inline uint64_t GetPrefixKeyCount(const std::string& prefix,
+                                         const std::string& ub) {
+  uint64_t start = 0;
+  uint64_t end = 0;
+
+  if (!GetFirstIntValInPrefix(prefix, &start) ||
+      !GetFirstIntValInPrefix(ub, &end)) {
+    return 0;
+  }
+
+  return end - start;
+}
+
+extern inline std::string StringToHex(const std::string& str) {
+  std::string result = "0x";
+  result.append(Slice(str).ToString(true));
+  return result;
+}
+
+inline std::string WideColumnsToHex(const WideColumns& columns) {
+  if (columns.empty()) {
+    return std::string();
+  }
+
+  std::ostringstream oss;
+
+  oss << std::hex;
+
+  auto it = columns.begin();
+  oss << *it;
+  for (++it; it != columns.end(); ++it) {
+    oss << ' ' << *it;
+  }
+
+  return oss.str();
+}
+
+// Unified output format for double parameters
+extern inline std::string FormatDoubleParam(double param) {
+  return std::to_string(param);
+}
+
+// Make sure that double parameter is a value we can reproduce by
+// re-inputting the value printed.
+extern inline void SanitizeDoubleParam(double* param) {
+  *param = std::atof(FormatDoubleParam(*param).c_str());
+}
+
+extern void PoolSizeChangeThread(void* v);
+
+extern void DbVerificationThread(void* v);
+
+extern void TimestampedSnapshotsThread(void* v);
+
+extern void PrintKeyValue(int cf, uint64_t key, const char* value, size_t sz);
+
+extern int64_t GenerateOneKey(ThreadState* thread, uint64_t iteration);
+
+extern std::vector<int64_t> GenerateNKeys(ThreadState* thread, int num_keys,
+                                          uint64_t iteration);
+
+extern size_t GenerateValue(uint32_t rand, char* v, size_t max_sz);
+extern uint32_t GetValueBase(Slice s);
+
+extern WideColumns GenerateWideColumns(uint32_t value_base, const Slice& slice);
+extern WideColumns GenerateExpectedWideColumns(uint32_t value_base,
+                                               const Slice& slice);
+extern bool VerifyWideColumns(const Slice& value, const WideColumns& columns);
+extern bool VerifyWideColumns(const WideColumns& columns);
+
+extern StressTest* CreateCfConsistencyStressTest();
+extern StressTest* CreateBatchedOpsStressTest();
+extern StressTest* CreateNonBatchedOpsStressTest();
+extern StressTest* CreateMultiOpsTxnsStressTest();
+extern void CheckAndSetOptionsForMultiOpsTxnStressTest();
+extern void InitializeHotKeyGenerator(double alpha);
+extern int64_t GetOneHotKeyID(double rand_seed, int64_t max_key);
+
+extern std::string GetNowNanos();
+
+std::shared_ptr<FileChecksumGenFactory> GetFileChecksumImpl(
+    const std::string& name);
+
+Status DeleteFilesInDirectory(const std::string& dirname);
+Status SaveFilesInDirectory(const std::string& src_dirname,
+                            const std::string& dst_dirname);
+Status DestroyUnverifiedSubdir(const std::string& dirname);
+Status InitUnverifiedSubdir(const std::string& dirname);
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // GFLAGS
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_compaction_filter.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_compaction_filter.h
new file mode 100644
index 0000000000..408bb48f3e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_compaction_filter.h
@@ -0,0 +1,96 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "db_stress_tool/db_stress_common.h"
+#include "db_stress_tool/db_stress_shared_state.h"
+#include "rocksdb/compaction_filter.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// DbStressCompactionFilter is safe to use with db_stress as it does not perform
+// any mutation. It only makes `kRemove` decisions for keys that are already
+// non-existent according to the `SharedState`.
+class DbStressCompactionFilter : public CompactionFilter {
+ public:
+  DbStressCompactionFilter(SharedState* state, int cf_id)
+      : state_(state), cf_id_(cf_id) {}
+
+  Decision FilterV2(int /*level*/, const Slice& key, ValueType /*value_type*/,
+                    const Slice& /*existing_value*/, std::string* /*new_value*/,
+                    std::string* /*skip_until*/) const override {
+    if (state_ == nullptr) {
+      return Decision::kKeep;
+    }
+    if (key.empty() || ('0' <= key[0] && key[0] <= '9')) {
+      // It is likely leftover from a test_batches_snapshots run. Below this
+      // conditional, the test_batches_snapshots key format is not handled
+      // properly. Just keep it to be safe.
+      return Decision::kKeep;
+    }
+    uint64_t key_num = 0;
+    {
+      Slice ukey_without_ts = key;
+      assert(ukey_without_ts.size() >= FLAGS_user_timestamp_size);
+      ukey_without_ts.remove_suffix(FLAGS_user_timestamp_size);
+      [[maybe_unused]] bool ok =
+          GetIntVal(ukey_without_ts.ToString(), &key_num);
+      assert(ok);
+    }
+    port::Mutex* key_mutex = state_->GetMutexForKey(cf_id_, key_num);
+    if (!key_mutex->TryLock()) {
+      return Decision::kKeep;
+    }
+    // Reaching here means we acquired the lock.
+
+    bool key_exists = state_->Exists(cf_id_, key_num);
+    const bool allow_overwrite = state_->AllowsOverwrite(key_num);
+
+    key_mutex->Unlock();
+
+    if (!key_exists) {
+      return allow_overwrite ? Decision::kRemove : Decision::kPurge;
+    }
+    return Decision::kKeep;
+  }
+
+  const char* Name() const override { return "DbStressCompactionFilter"; }
+
+ private:
+  SharedState* const state_;
+  const int cf_id_;
+};
+
+class DbStressCompactionFilterFactory : public CompactionFilterFactory {
+ public:
+  DbStressCompactionFilterFactory() : state_(nullptr) {}
+
+  void SetSharedState(SharedState* state) {
+    MutexLock state_mutex_guard(&state_mutex_);
+    state_ = state;
+  }
+
+  std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+      const CompactionFilter::Context& context) override {
+    MutexLock state_mutex_guard(&state_mutex_);
+    return std::unique_ptr<CompactionFilter>(
+        new DbStressCompactionFilter(state_, context.column_family_id));
+  }
+
+  const char* Name() const override {
+    return "DbStressCompactionFilterFactory";
+  }
+
+ private:
+  port::Mutex state_mutex_;
+  SharedState* state_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_driver.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_driver.h
new file mode 100644
index 0000000000..a173470ff7
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_driver.h
@@ -0,0 +1,18 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db_stress_tool/db_stress_shared_state.h"
+#ifdef GFLAGS
+#pragma once
+#include "db_stress_tool/db_stress_test_base.h"
+namespace ROCKSDB_NAMESPACE {
+extern void ThreadBody(void* /*thread_state*/);
+extern bool RunStressTest(SharedState*);
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // GFLAGS
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_env_wrapper.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_env_wrapper.h
new file mode 100644
index 0000000000..612d9fc6b9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_env_wrapper.h
@@ -0,0 +1,78 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifdef GFLAGS
+#pragma once
+#include "db_stress_tool/db_stress_common.h"
+#include "monitoring/thread_status_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+class DbStressRandomAccessFileWrapper : public FSRandomAccessFileOwnerWrapper {
+ public:
+  explicit DbStressRandomAccessFileWrapper(
+      std::unique_ptr<FSRandomAccessFile>&& target)
+      : FSRandomAccessFileOwnerWrapper(std::move(target)) {}
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override {
+#ifndef NDEBUG
+    const ThreadStatus::OperationType thread_op =
+        ThreadStatusUtil::GetThreadOperation();
+    Env::IOActivity io_activity =
+        ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op);
+    assert(io_activity == Env::IOActivity::kUnknown ||
+           io_activity == options.io_activity);
+#endif
+    return target()->Read(offset, n, options, result, scratch, dbg);
+  }
+};
+
+class DbStressFSWrapper : public FileSystemWrapper {
+ public:
+  explicit DbStressFSWrapper(const std::shared_ptr<FileSystem>& t)
+      : FileSystemWrapper(t) {}
+  static const char* kClassName() { return "DbStressFS"; }
+  const char* Name() const override { return kClassName(); }
+
+  IOStatus NewRandomAccessFile(const std::string& f,
+                               const FileOptions& file_opts,
+                               std::unique_ptr<FSRandomAccessFile>* r,
+                               IODebugContext* dbg) override {
+    std::unique_ptr<FSRandomAccessFile> file;
+    IOStatus s = target()->NewRandomAccessFile(f, file_opts, &file, dbg);
+    if (s.ok()) {
+      r->reset(new DbStressRandomAccessFileWrapper(std::move(file)));
+    }
+    return s;
+  }
+
+  IOStatus DeleteFile(const std::string& f, const IOOptions& opts,
+                      IODebugContext* dbg) override {
+    // We determine whether it is a manifest file by searching a strong,
+    // so that there will be false positive if the directory path contains the
+    // keyword but it is unlikely.
+    // Checkpoint, backup, and restore directories needs to be exempted.
+    if (!if_preserve_all_manifests ||
+        f.find("MANIFEST-") == std::string::npos ||
+        f.find("checkpoint") != std::string::npos ||
+        f.find(".backup") != std::string::npos ||
+        f.find(".restore") != std::string::npos) {
+      return target()->DeleteFile(f, opts, dbg);
+    }
+    // Rename the file instead of deletion to keep the history, and
+    // at the same time it is not visible to RocksDB.
+    return target()->RenameFile(f, f + "_renamed_", opts, dbg);
+  }
+
+  // If true, all manifest files will not be delted in DeleteFile().
+  bool if_preserve_all_manifests = true;
+};
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // GFLAGS
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_listener.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_listener.h
new file mode 100644
index 0000000000..97bbdaefa4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_listener.h
@@ -0,0 +1,269 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#ifdef GFLAGS
+#pragma once
+
+#include <mutex>
+#include <unordered_set>
+
+#include "file/filename.h"
+#include "file/writable_file_writer.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/listener.h"
+#include "rocksdb/table_properties.h"
+#include "rocksdb/unique_id.h"
+#include "util/gflags_compat.h"
+#include "util/random.h"
+
+DECLARE_int32(compact_files_one_in);
+
+namespace ROCKSDB_NAMESPACE {
+
+// Verify across process executions that all seen IDs are unique
+class UniqueIdVerifier {
+ public:
+  explicit UniqueIdVerifier(const std::string& db_name, Env* env);
+  ~UniqueIdVerifier();
+
+  void Verify(const std::string& id);
+
+ private:
+  void VerifyNoWrite(const std::string& id);
+
+ private:
+  std::mutex mutex_;
+  // IDs persisted to a hidden file inside DB dir
+  std::string path_;
+  std::unique_ptr<WritableFileWriter> data_file_writer_;
+  // Starting byte for which 8 bytes to check in memory within 24 byte ID
+  size_t offset_;
+  // Working copy of the set of 8 byte pieces
+  std::unordered_set<uint64_t> id_set_;
+};
+
+class DbStressListener : public EventListener {
+ public:
+  DbStressListener(const std::string& db_name,
+                   const std::vector<DbPath>& db_paths,
+                   const std::vector<ColumnFamilyDescriptor>& column_families,
+                   Env* env)
+      : db_name_(db_name),
+        db_paths_(db_paths),
+        column_families_(column_families),
+        num_pending_file_creations_(0),
+        unique_ids_(db_name, env) {}
+
+  const char* Name() const override { return kClassName(); }
+  static const char* kClassName() { return "DBStressListener"; }
+
+  ~DbStressListener() override { assert(num_pending_file_creations_ == 0); }
+  void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override {
+    assert(IsValidColumnFamilyName(info.cf_name));
+    VerifyFilePath(info.file_path);
+    // pretending doing some work here
+    RandomSleep();
+  }
+
+  void OnFlushBegin(DB* /*db*/,
+                    const FlushJobInfo& /*flush_job_info*/) override {
+    RandomSleep();
+  }
+
+  void OnTableFileDeleted(const TableFileDeletionInfo& /*info*/) override {
+    RandomSleep();
+  }
+
+  void OnCompactionBegin(DB* /*db*/, const CompactionJobInfo& /*ci*/) override {
+    RandomSleep();
+  }
+
+  void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override {
+    assert(IsValidColumnFamilyName(ci.cf_name));
+    assert(ci.input_files.size() + ci.output_files.size() > 0U);
+    for (const auto& file_path : ci.input_files) {
+      VerifyFilePath(file_path);
+    }
+    for (const auto& file_path : ci.output_files) {
+      VerifyFilePath(file_path);
+    }
+    // pretending doing some work here
+    RandomSleep();
+  }
+
+  void OnTableFileCreationStarted(
+      const TableFileCreationBriefInfo& /*info*/) override {
+    ++num_pending_file_creations_;
+  }
+
+  void OnTableFileCreated(const TableFileCreationInfo& info) override {
+    assert(info.db_name == db_name_);
+    assert(IsValidColumnFamilyName(info.cf_name));
+    assert(info.job_id > 0 || FLAGS_compact_files_one_in > 0);
+    if (info.status.ok()) {
+      assert(info.file_size > 0);
+      VerifyFilePath(info.file_path);
+      assert(info.table_properties.data_size > 0 ||
+             info.table_properties.num_range_deletions > 0);
+      assert(info.table_properties.raw_key_size > 0);
+      assert(info.table_properties.num_entries > 0);
+      VerifyTableFileUniqueId(info.table_properties, info.file_path);
+    }
+    --num_pending_file_creations_;
+  }
+
+  void OnMemTableSealed(const MemTableInfo& /*info*/) override {
+    RandomSleep();
+  }
+
+  void OnColumnFamilyHandleDeletionStarted(
+      ColumnFamilyHandle* /*handle*/) override {
+    RandomSleep();
+  }
+
+  void OnExternalFileIngested(DB* /*db*/,
+                              const ExternalFileIngestionInfo& info) override {
+    RandomSleep();
+    // Here we assume that each generated external file is ingested
+    // exactly once (or thrown away in case of crash)
+    VerifyTableFileUniqueId(info.table_properties, info.internal_file_path);
+  }
+
+  void OnBackgroundError(BackgroundErrorReason /* reason */,
+                         Status* /* bg_error */) override {
+    RandomSleep();
+  }
+
+  void OnStallConditionsChanged(const WriteStallInfo& /*info*/) override {
+    RandomSleep();
+  }
+
+  void OnFileReadFinish(const FileOperationInfo& info) override {
+    // Even empty callback is valuable because sometimes some locks are
+    // released in order to make the callback.
+
+    // Sleep carefully here as it is a frequent operation and we don't want
+    // to slow down the tests. We always sleep when the read is large.
+    // When read is small, sleep in a small chance.
+    size_t length_read = info.length;
+    if (length_read >= 1000000 || Random::GetTLSInstance()->OneIn(1000)) {
+      RandomSleep();
+    }
+  }
+
+  void OnFileWriteFinish(const FileOperationInfo& info) override {
+    // Even empty callback is valuable because sometimes some locks are
+    // released in order to make the callback.
+
+    // Sleep carefully here as it is a frequent operation and we don't want
+    // to slow down the tests. When the write is large, always sleep.
+    // Otherwise, sleep in a relatively small chance.
+    size_t length_write = info.length;
+    if (length_write >= 1000000 || Random::GetTLSInstance()->OneIn(64)) {
+      RandomSleep();
+    }
+  }
+
+  bool ShouldBeNotifiedOnFileIO() override {
+    RandomSleep();
+    return static_cast<bool>(Random::GetTLSInstance()->OneIn(1));
+  }
+
+  void OnErrorRecoveryBegin(BackgroundErrorReason /* reason */,
+                            Status /* bg_error */,
+                            bool* /* auto_recovery */) override {
+    RandomSleep();
+  }
+
+  void OnErrorRecoveryCompleted(Status /* old_bg_error */) override {
+    RandomSleep();
+  }
+
+ protected:
+  bool IsValidColumnFamilyName(const std::string& cf_name) const {
+    if (cf_name == kDefaultColumnFamilyName) {
+      return true;
+    }
+    // The column family names in the stress tests are numbers.
+    for (size_t i = 0; i < cf_name.size(); ++i) {
+      if (cf_name[i] < '0' || cf_name[i] > '9') {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  void VerifyFileDir(const std::string& file_dir) {
+#ifndef NDEBUG
+    if (db_name_ == file_dir) {
+      return;
+    }
+    for (const auto& db_path : db_paths_) {
+      if (db_path.path == file_dir) {
+        return;
+      }
+    }
+    for (auto& cf : column_families_) {
+      for (const auto& cf_path : cf.options.cf_paths) {
+        if (cf_path.path == file_dir) {
+          return;
+        }
+      }
+    }
+    assert(false);
+#else
+    (void)file_dir;
+#endif  // !NDEBUG
+  }
+
+  void VerifyFileName(const std::string& file_name) {
+#ifndef NDEBUG
+    uint64_t file_number;
+    FileType file_type;
+    bool result = ParseFileName(file_name, &file_number, &file_type);
+    assert(result);
+    assert(file_type == kTableFile);
+#else
+    (void)file_name;
+#endif  // !NDEBUG
+  }
+
+  void VerifyFilePath(const std::string& file_path) {
+#ifndef NDEBUG
+    size_t pos = file_path.find_last_of("/");
+    if (pos == std::string::npos) {
+      VerifyFileName(file_path);
+    } else {
+      if (pos > 0) {
+        VerifyFileDir(file_path.substr(0, pos));
+      }
+      VerifyFileName(file_path.substr(pos));
+    }
+#else
+    (void)file_path;
+#endif  // !NDEBUG
+  }
+
+  // Unique id is verified using the TableProperties. file_path is only used
+  // for reporting.
+  void VerifyTableFileUniqueId(const TableProperties& new_file_properties,
+                               const std::string& file_path);
+
+  void RandomSleep() {
+    std::this_thread::sleep_for(
+        std::chrono::microseconds(Random::GetTLSInstance()->Uniform(5000)));
+  }
+
+ private:
+  std::string db_name_;
+  std::vector<DbPath> db_paths_;
+  std::vector<ColumnFamilyDescriptor> column_families_;
+  std::atomic<int> num_pending_file_creations_;
+  UniqueIdVerifier unique_ids_;
+};
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // GFLAGS
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_shared_state.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_shared_state.h
new file mode 100644
index 0000000000..0137f0b2e4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_shared_state.h
@@ -0,0 +1,437 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors
+
+#ifdef GFLAGS
+#pragma once
+
+#include "db_stress_tool/db_stress_stat.h"
+#include "db_stress_tool/expected_state.h"
+// SyncPoint is not supported in Released Windows Mode.
+#if !(defined NDEBUG) || !defined(OS_WIN)
+#include "test_util/sync_point.h"
+#endif  // !(defined NDEBUG) || !defined(OS_WIN)
+#include "util/gflags_compat.h"
+
+DECLARE_uint64(seed);
+DECLARE_int64(max_key);
+DECLARE_uint64(log2_keys_per_lock);
+DECLARE_int32(threads);
+DECLARE_int32(column_families);
+DECLARE_int32(nooverwritepercent);
+DECLARE_string(expected_values_dir);
+DECLARE_int32(clear_column_family_one_in);
+DECLARE_bool(test_batches_snapshots);
+DECLARE_int32(compaction_thread_pool_adjust_interval);
+DECLARE_int32(continuous_verification_interval);
+DECLARE_int32(read_fault_one_in);
+DECLARE_int32(write_fault_one_in);
+DECLARE_int32(open_metadata_write_fault_one_in);
+DECLARE_int32(open_write_fault_one_in);
+DECLARE_int32(open_read_fault_one_in);
+
+DECLARE_int32(injest_error_severity);
+
+namespace ROCKSDB_NAMESPACE {
+class StressTest;
+
+// State shared by all concurrent executions of the same benchmark.
+class SharedState {
+ public:
+  // Errors when reading filter blocks are ignored, so we use a thread
+  // local variable updated via sync points to keep track of errors injected
+  // while reading filter blocks in order to ignore the Get/MultiGet result
+  // for those calls
+  static thread_local bool ignore_read_error;
+
+  SharedState(Env* /*env*/, StressTest* stress_test)
+      : cv_(&mu_),
+        seed_(static_cast<uint32_t>(FLAGS_seed)),
+        max_key_(FLAGS_max_key),
+        log2_keys_per_lock_(static_cast<uint32_t>(FLAGS_log2_keys_per_lock)),
+        num_threads_(0),
+        num_initialized_(0),
+        num_populated_(0),
+        vote_reopen_(0),
+        num_done_(0),
+        start_(false),
+        start_verify_(false),
+        num_bg_threads_(0),
+        should_stop_bg_thread_(false),
+        bg_thread_finished_(0),
+        stress_test_(stress_test),
+        verification_failure_(false),
+        should_stop_test_(false),
+        no_overwrite_ids_(GenerateNoOverwriteIds()),
+        expected_state_manager_(nullptr),
+        printing_verification_results_(false),
+        start_timestamp_(Env::Default()->NowNanos()) {
+    Status status;
+    // TODO: We should introduce a way to explicitly disable verification
+    // during shutdown. When that is disabled and FLAGS_expected_values_dir
+    // is empty (disabling verification at startup), we can skip tracking
+    // expected state. Only then should we permit bypassing the below feature
+    // compatibility checks.
+    if (!FLAGS_expected_values_dir.empty()) {
+      if (!std::atomic<uint32_t>{}.is_lock_free()) {
+        status = Status::InvalidArgument(
+            "Cannot use --expected_values_dir on platforms without lock-free "
+            "std::atomic<uint32_t>");
+      }
+      if (status.ok() && FLAGS_clear_column_family_one_in > 0) {
+        status = Status::InvalidArgument(
+            "Cannot use --expected_values_dir on when "
+            "--clear_column_family_one_in is greater than zero.");
+      }
+    }
+    if (status.ok()) {
+      if (FLAGS_expected_values_dir.empty()) {
+        expected_state_manager_.reset(
+            new AnonExpectedStateManager(FLAGS_max_key, FLAGS_column_families));
+      } else {
+        expected_state_manager_.reset(new FileExpectedStateManager(
+            FLAGS_max_key, FLAGS_column_families, FLAGS_expected_values_dir));
+      }
+      status = expected_state_manager_->Open();
+    }
+    if (!status.ok()) {
+      fprintf(stderr, "Failed setting up expected state with error: %s\n",
+              status.ToString().c_str());
+      exit(1);
+    }
+
+    if (FLAGS_test_batches_snapshots) {
+      fprintf(stdout, "No lock creation because test_batches_snapshots set\n");
+      return;
+    }
+
+    long num_locks = static_cast<long>(max_key_ >> log2_keys_per_lock_);
+    if (max_key_ & ((1 << log2_keys_per_lock_) - 1)) {
+      num_locks++;
+    }
+    fprintf(stdout, "Creating %ld locks\n", num_locks * FLAGS_column_families);
+    key_locks_.resize(FLAGS_column_families);
+
+    for (int i = 0; i < FLAGS_column_families; ++i) {
+      key_locks_[i].reset(new port::Mutex[num_locks]);
+    }
+    if (FLAGS_read_fault_one_in) {
+#ifdef NDEBUG
+      // Unsupported in release mode because it relies on
+      // `IGNORE_STATUS_IF_ERROR` to distinguish faults not expected to lead to
+      // failure.
+      fprintf(stderr,
+              "Cannot set nonzero value for --read_fault_one_in in "
+              "release mode.");
+      exit(1);
+#else   // NDEBUG
+      SyncPoint::GetInstance()->SetCallBack("FaultInjectionIgnoreError",
+                                            IgnoreReadErrorCallback);
+      SyncPoint::GetInstance()->EnableProcessing();
+#endif  // NDEBUG
+    }
+  }
+
+  ~SharedState() {
+#ifndef NDEBUG
+    if (FLAGS_read_fault_one_in) {
+      SyncPoint::GetInstance()->ClearAllCallBacks();
+      SyncPoint::GetInstance()->DisableProcessing();
+    }
+#endif
+  }
+
+  port::Mutex* GetMutex() { return &mu_; }
+
+  port::CondVar* GetCondVar() { return &cv_; }
+
+  StressTest* GetStressTest() const { return stress_test_; }
+
+  int64_t GetMaxKey() const { return max_key_; }
+
+  uint32_t GetNumThreads() const { return num_threads_; }
+
+  void SetThreads(int num_threads) { num_threads_ = num_threads; }
+
+  void IncInitialized() { num_initialized_++; }
+
+  void IncOperated() { num_populated_++; }
+
+  void IncDone() { num_done_++; }
+
+  void IncVotedReopen() { vote_reopen_ = (vote_reopen_ + 1) % num_threads_; }
+
+  bool AllInitialized() const { return num_initialized_ >= num_threads_; }
+
+  bool AllOperated() const { return num_populated_ >= num_threads_; }
+
+  bool AllDone() const { return num_done_ >= num_threads_; }
+
+  bool AllVotedReopen() { return (vote_reopen_ == 0); }
+
+  void SetStart() { start_ = true; }
+
+  void SetStartVerify() { start_verify_ = true; }
+
+  bool Started() const { return start_; }
+
+  bool VerifyStarted() const { return start_verify_; }
+
+  void SetVerificationFailure() { verification_failure_.store(true); }
+
+  bool HasVerificationFailedYet() const { return verification_failure_.load(); }
+
+  void SetShouldStopTest() { should_stop_test_.store(true); }
+
+  bool ShouldStopTest() const { return should_stop_test_.load(); }
+
+  // Returns a lock covering `key` in `cf`.
+  port::Mutex* GetMutexForKey(int cf, int64_t key) {
+    return &key_locks_[cf][key >> log2_keys_per_lock_];
+  }
+
+  // Acquires locks for all keys in `cf`.
+  void LockColumnFamily(int cf) {
+    for (int i = 0; i < max_key_ >> log2_keys_per_lock_; ++i) {
+      key_locks_[cf][i].Lock();
+    }
+  }
+
+  // Releases locks for all keys in `cf`.
+  void UnlockColumnFamily(int cf) {
+    for (int i = 0; i < max_key_ >> log2_keys_per_lock_; ++i) {
+      key_locks_[cf][i].Unlock();
+    }
+  }
+
+  // Returns a collection of mutex locks covering the key range [start, end) in
+  // `cf`.
+  std::vector<std::unique_ptr<MutexLock>> GetLocksForKeyRange(int cf,
+                                                              int64_t start,
+                                                              int64_t end) {
+    std::vector<std::unique_ptr<MutexLock>> range_locks;
+
+    if (start >= end) {
+      return range_locks;
+    }
+
+    const int64_t start_idx = start >> log2_keys_per_lock_;
+
+    int64_t end_idx = end >> log2_keys_per_lock_;
+    if ((end & ((1 << log2_keys_per_lock_) - 1)) == 0) {
+      --end_idx;
+    }
+
+    for (int64_t idx = start_idx; idx <= end_idx; ++idx) {
+      range_locks.emplace_back(
+          std::make_unique<MutexLock>(&key_locks_[cf][idx]));
+    }
+
+    return range_locks;
+  }
+
+  Status SaveAtAndAfter(DB* db) {
+    return expected_state_manager_->SaveAtAndAfter(db);
+  }
+
+  bool HasHistory() { return expected_state_manager_->HasHistory(); }
+
+  Status Restore(DB* db) { return expected_state_manager_->Restore(db); }
+
+  // Requires external locking covering all keys in `cf`.
+  void ClearColumnFamily(int cf) {
+    return expected_state_manager_->ClearColumnFamily(cf);
+  }
+
+  // Prepare a Put that will be started but not finish yet
+  // This is useful for crash-recovery testing when the process may crash
+  // before updating the corresponding expected value
+  //
+  // Requires external locking covering `key` in `cf` to prevent concurrent
+  // write or delete to the same `key`.
+  PendingExpectedValue PreparePut(int cf, int64_t key) {
+    return expected_state_manager_->PreparePut(cf, key);
+  }
+
+  // Does not requires external locking.
+  ExpectedValue Get(int cf, int64_t key) {
+    return expected_state_manager_->Get(cf, key);
+  }
+
+  // Prepare a Delete that will be started but not finish yet
+  // This is useful for crash-recovery testing when the process may crash
+  // before updating the corresponding expected value
+  //
+  // Requires external locking covering `key` in `cf` to prevent concurrent
+  // write or delete to the same `key`.
+  PendingExpectedValue PrepareDelete(int cf, int64_t key) {
+    return expected_state_manager_->PrepareDelete(cf, key);
+  }
+
+  // Requires external locking covering `key` in `cf` to prevent concurrent
+  // write or delete to the same `key`.
+  PendingExpectedValue PrepareSingleDelete(int cf, int64_t key) {
+    return expected_state_manager_->PrepareSingleDelete(cf, key);
+  }
+
+  // Requires external locking covering keys in `[begin_key, end_key)` in `cf`
+  // to prevent concurrent write or delete to the same `key`.
+  std::vector<PendingExpectedValue> PrepareDeleteRange(int cf,
+                                                       int64_t begin_key,
+                                                       int64_t end_key) {
+    return expected_state_manager_->PrepareDeleteRange(cf, begin_key, end_key);
+  }
+
+  bool AllowsOverwrite(int64_t key) const {
+    return no_overwrite_ids_.find(key) == no_overwrite_ids_.end();
+  }
+
+  // Requires external locking covering `key` in `cf` to prevent concurrent
+  // delete to the same `key`.
+  bool Exists(int cf, int64_t key) {
+    return expected_state_manager_->Exists(cf, key);
+  }
+
+  // Sync the `value_base` to the corresponding expected value
+  void SyncPut(int cf, int64_t key, uint32_t value_base) {
+    return expected_state_manager_->SyncPut(cf, key, value_base);
+  }
+
+  // Sync the corresponding expected value to be pending Put
+  void SyncPendingPut(int cf, int64_t key) {
+    return expected_state_manager_->SyncPendingPut(cf, key);
+  }
+
+  // Sync the corresponding expected value to be deleted
+  void SyncDelete(int cf, int64_t key) {
+    return expected_state_manager_->SyncDelete(cf, key);
+  }
+
+  uint32_t GetSeed() const { return seed_; }
+
+  void SetShouldStopBgThread() { should_stop_bg_thread_ = true; }
+
+  bool ShouldStopBgThread() { return should_stop_bg_thread_; }
+
+  void IncBgThreads() { ++num_bg_threads_; }
+
+  void IncBgThreadsFinished() { ++bg_thread_finished_; }
+
+  bool BgThreadsFinished() const {
+    return bg_thread_finished_ == num_bg_threads_;
+  }
+
+  bool ShouldVerifyAtBeginning() const {
+    return !FLAGS_expected_values_dir.empty();
+  }
+
+  bool PrintingVerificationResults() {
+    bool tmp = false;
+    return !printing_verification_results_.compare_exchange_strong(
+        tmp, true, std::memory_order_relaxed);
+  }
+
+  void FinishPrintingVerificationResults() {
+    printing_verification_results_.store(false, std::memory_order_relaxed);
+  }
+
+  uint64_t GetStartTimestamp() const { return start_timestamp_; }
+
+ private:
+  static void IgnoreReadErrorCallback(void*) { ignore_read_error = true; }
+
+  // Pick random keys in each column family that will not experience overwrite.
+  std::unordered_set<int64_t> GenerateNoOverwriteIds() const {
+    fprintf(stdout, "Choosing random keys with no overwrite\n");
+    // Start with the identity permutation. Subsequent iterations of
+    // for loop below will start with perm of previous for loop
+    std::vector<int64_t> permutation(max_key_);
+    for (int64_t i = 0; i < max_key_; ++i) {
+      permutation[i] = i;
+    }
+    // Now do the Knuth shuffle
+    const int64_t num_no_overwrite_keys =
+        (max_key_ * FLAGS_nooverwritepercent) / 100;
+    // Only need to figure out first num_no_overwrite_keys of permutation
+    std::unordered_set<int64_t> ret;
+    ret.reserve(num_no_overwrite_keys);
+    Random64 rnd(seed_);
+    for (int64_t i = 0; i < num_no_overwrite_keys; i++) {
+      assert(i < max_key_);
+      int64_t rand_index = i + rnd.Next() % (max_key_ - i);
+      // Swap i and rand_index;
+      int64_t temp = permutation[i];
+      permutation[i] = permutation[rand_index];
+      permutation[rand_index] = temp;
+      // Fill no_overwrite_ids_ with the first num_no_overwrite_keys of
+      // permutation
+      ret.insert(permutation[i]);
+    }
+    return ret;
+  }
+
+  port::Mutex mu_;
+  port::CondVar cv_;
+  const uint32_t seed_;
+  const int64_t max_key_;
+  const uint32_t log2_keys_per_lock_;
+  int num_threads_;
+  long num_initialized_;
+  long num_populated_;
+  long vote_reopen_;
+  long num_done_;
+  bool start_;
+  bool start_verify_;
+  int num_bg_threads_;
+  bool should_stop_bg_thread_;
+  int bg_thread_finished_;
+  StressTest* stress_test_;
+  std::atomic<bool> verification_failure_;
+  std::atomic<bool> should_stop_test_;
+
+  // Keys that should not be overwritten
+  const std::unordered_set<int64_t> no_overwrite_ids_;
+
+  std::unique_ptr<ExpectedStateManager> expected_state_manager_;
+  // Cannot store `port::Mutex` directly in vector since it is not copyable
+  // and storing it in the container may require copying depending on the impl.
+  std::vector<std::unique_ptr<port::Mutex[]>> key_locks_;
+  std::atomic<bool> printing_verification_results_;
+  const uint64_t start_timestamp_;
+};
+
+// Per-thread state for concurrent executions of the same benchmark.
+struct ThreadState {
+  uint32_t tid;  // 0..n-1
+  Random rand;   // Has different seeds for different threads
+  SharedState* shared;
+  Stats stats;
+  struct SnapshotState {
+    const Snapshot* snapshot;
+    // The cf from which we did a Get at this snapshot
+    int cf_at;
+    // The name of the cf at the time that we did a read
+    std::string cf_at_name;
+    // The key with which we did a Get at this snapshot
+    std::string key;
+    // The status of the Get
+    Status status;
+    // The value of the Get
+    std::string value;
+    // optional state of all keys in the db
+    std::vector<bool>* key_vec;
+
+    std::string timestamp;
+  };
+  std::queue<std::pair<uint64_t, SnapshotState>> snapshot_queue;
+
+  ThreadState(uint32_t index, SharedState* _shared)
+      : tid(index), rand(1000 + index + _shared->GetSeed()), shared(_shared) {}
+};
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // GFLAGS
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_stat.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_stat.h
new file mode 100644
index 0000000000..5b38c6e2bb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_stat.h
@@ -0,0 +1,219 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <cinttypes>
+#include <memory>
+#include <queue>
+#include <unordered_set>
+
+#include "monitoring/histogram.h"
+#include "port/port.h"
+#include "rocksdb/snapshot.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/system_clock.h"
+#include "util/gflags_compat.h"
+#include "util/random.h"
+
+DECLARE_bool(histogram);
+DECLARE_bool(progress_reports);
+
+namespace ROCKSDB_NAMESPACE {
+
+// Database statistics
+extern std::shared_ptr<ROCKSDB_NAMESPACE::Statistics> dbstats;
+extern std::shared_ptr<ROCKSDB_NAMESPACE::Statistics> dbstats_secondaries;
+
+class Stats {
+ private:
+  uint64_t start_;
+  uint64_t finish_;
+  double seconds_;
+  long done_;
+  long gets_;
+  long prefixes_;
+  long writes_;
+  long deletes_;
+  size_t single_deletes_;
+  long iterator_size_sums_;
+  long founds_;
+  long iterations_;
+  long range_deletions_;
+  long covered_by_range_deletions_;
+  long errors_;
+  long verified_errors_;
+  long num_compact_files_succeed_;
+  long num_compact_files_failed_;
+  int next_report_;
+  size_t bytes_;
+  uint64_t last_op_finish_;
+  HistogramImpl hist_;
+
+ public:
+  Stats() {}
+
+  void Start() {
+    next_report_ = 100;
+    hist_.Clear();
+    done_ = 0;
+    gets_ = 0;
+    prefixes_ = 0;
+    writes_ = 0;
+    deletes_ = 0;
+    single_deletes_ = 0;
+    iterator_size_sums_ = 0;
+    founds_ = 0;
+    iterations_ = 0;
+    range_deletions_ = 0;
+    covered_by_range_deletions_ = 0;
+    errors_ = 0;
+    verified_errors_ = 0;
+    bytes_ = 0;
+    seconds_ = 0;
+    num_compact_files_succeed_ = 0;
+    num_compact_files_failed_ = 0;
+    start_ = SystemClock::Default()->NowMicros();
+    last_op_finish_ = start_;
+    finish_ = start_;
+  }
+
+  void Merge(const Stats& other) {
+    hist_.Merge(other.hist_);
+    done_ += other.done_;
+    gets_ += other.gets_;
+    prefixes_ += other.prefixes_;
+    writes_ += other.writes_;
+    deletes_ += other.deletes_;
+    single_deletes_ += other.single_deletes_;
+    iterator_size_sums_ += other.iterator_size_sums_;
+    founds_ += other.founds_;
+    iterations_ += other.iterations_;
+    range_deletions_ += other.range_deletions_;
+    covered_by_range_deletions_ = other.covered_by_range_deletions_;
+    errors_ += other.errors_;
+    verified_errors_ += other.verified_errors_;
+    bytes_ += other.bytes_;
+    seconds_ += other.seconds_;
+    num_compact_files_succeed_ += other.num_compact_files_succeed_;
+    num_compact_files_failed_ += other.num_compact_files_failed_;
+    if (other.start_ < start_) start_ = other.start_;
+    if (other.finish_ > finish_) finish_ = other.finish_;
+  }
+
+  void Stop() {
+    finish_ = SystemClock::Default()->NowMicros();
+    seconds_ = (finish_ - start_) * 1e-6;
+  }
+
+  void FinishedSingleOp() {
+    if (FLAGS_histogram) {
+      auto now = SystemClock::Default()->NowMicros();
+      auto micros = now - last_op_finish_;
+      hist_.Add(micros);
+      if (micros > 20000) {
+        fprintf(stdout, "long op: %" PRIu64 " micros%30s\r", micros, "");
+      }
+      last_op_finish_ = now;
+    }
+
+    done_++;
+    if (FLAGS_progress_reports) {
+      if (done_ >= next_report_) {
+        if (next_report_ < 1000)
+          next_report_ += 100;
+        else if (next_report_ < 5000)
+          next_report_ += 500;
+        else if (next_report_ < 10000)
+          next_report_ += 1000;
+        else if (next_report_ < 50000)
+          next_report_ += 5000;
+        else if (next_report_ < 100000)
+          next_report_ += 10000;
+        else if (next_report_ < 500000)
+          next_report_ += 50000;
+        else
+          next_report_ += 100000;
+        fprintf(stdout, "... finished %ld ops%30s\r", done_, "");
+      }
+    }
+  }
+
+  void AddBytesForWrites(long nwrites, size_t nbytes) {
+    writes_ += nwrites;
+    bytes_ += nbytes;
+  }
+
+  void AddGets(long ngets, long nfounds) {
+    founds_ += nfounds;
+    gets_ += ngets;
+  }
+
+  void AddPrefixes(long nprefixes, long count) {
+    prefixes_ += nprefixes;
+    iterator_size_sums_ += count;
+  }
+
+  void AddIterations(long n) { iterations_ += n; }
+
+  void AddDeletes(long n) { deletes_ += n; }
+
+  void AddSingleDeletes(size_t n) { single_deletes_ += n; }
+
+  void AddRangeDeletions(long n) { range_deletions_ += n; }
+
+  void AddCoveredByRangeDeletions(long n) { covered_by_range_deletions_ += n; }
+
+  void AddErrors(long n) { errors_ += n; }
+
+  void AddVerifiedErrors(long n) { verified_errors_ += n; }
+
+  void AddNumCompactFilesSucceed(long n) { num_compact_files_succeed_ += n; }
+
+  void AddNumCompactFilesFailed(long n) { num_compact_files_failed_ += n; }
+
+  void Report(const char* name) {
+    std::string extra;
+    if (bytes_ < 1 || done_ < 1) {
+      fprintf(stderr, "No writes or ops?\n");
+      return;
+    }
+
+    double elapsed = (finish_ - start_) * 1e-6;
+    double bytes_mb = bytes_ / 1048576.0;
+    double rate = bytes_mb / elapsed;
+    double throughput = (double)done_ / elapsed;
+
+    fprintf(stdout, "%-12s: ", name);
+    fprintf(stdout, "%.3f micros/op %ld ops/sec\n", seconds_ * 1e6 / done_,
+            (long)throughput);
+    fprintf(stdout, "%-12s: Wrote %.2f MB (%.2f MB/sec) (%ld%% of %ld ops)\n",
+            "", bytes_mb, rate, (100 * writes_) / done_, done_);
+    fprintf(stdout, "%-12s: Wrote %ld times\n", "", writes_);
+    fprintf(stdout, "%-12s: Deleted %ld times\n", "", deletes_);
+    fprintf(stdout, "%-12s: Single deleted %" ROCKSDB_PRIszt " times\n", "",
+            single_deletes_);
+    fprintf(stdout, "%-12s: %ld read and %ld found the key\n", "", gets_,
+            founds_);
+    fprintf(stdout, "%-12s: Prefix scanned %ld times\n", "", prefixes_);
+    fprintf(stdout, "%-12s: Iterator size sum is %ld\n", "",
+            iterator_size_sums_);
+    fprintf(stdout, "%-12s: Iterated %ld times\n", "", iterations_);
+    fprintf(stdout, "%-12s: Deleted %ld key-ranges\n", "", range_deletions_);
+    fprintf(stdout, "%-12s: Range deletions covered %ld keys\n", "",
+            covered_by_range_deletions_);
+
+    fprintf(stdout, "%-12s: Got errors %ld times\n", "", errors_);
+    fprintf(stdout, "%-12s: %ld CompactFiles() succeed\n", "",
+            num_compact_files_succeed_);
+    fprintf(stdout, "%-12s: %ld CompactFiles() did not succeed\n", "",
+            num_compact_files_failed_);
+
+    if (FLAGS_histogram) {
+      fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str());
+    }
+    fflush(stdout);
+  }
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_table_properties_collector.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_table_properties_collector.h
new file mode 100644
index 0000000000..d1758cbb4c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_table_properties_collector.h
@@ -0,0 +1,65 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/table.h"
+#include "util/gflags_compat.h"
+#include "util/random.h"
+
+DECLARE_int32(mark_for_compaction_one_file_in);
+
+namespace ROCKSDB_NAMESPACE {
+
+// A `DbStressTablePropertiesCollector` ignores what keys/values were added to
+// the table, adds no properties to the table, and decides at random whether the
+// table will be marked for compaction according to
+// `FLAGS_mark_for_compaction_one_file_in`.
+class DbStressTablePropertiesCollector : public TablePropertiesCollector {
+ public:
+  DbStressTablePropertiesCollector()
+      : need_compact_(Random::GetTLSInstance()->OneInOpt(
+            FLAGS_mark_for_compaction_one_file_in)) {}
+
+  virtual Status AddUserKey(const Slice& /* key */, const Slice& /* value */,
+                            EntryType /*type*/, SequenceNumber /*seq*/,
+                            uint64_t /*file_size*/) override {
+    return Status::OK();
+  }
+
+  virtual Status Finish(UserCollectedProperties* /* properties */) override {
+    return Status::OK();
+  }
+
+  virtual UserCollectedProperties GetReadableProperties() const override {
+    return UserCollectedProperties{};
+  }
+
+  virtual const char* Name() const override {
+    return "DbStressTablePropertiesCollector";
+  }
+
+  virtual bool NeedCompact() const override { return need_compact_; }
+
+ private:
+  const bool need_compact_;
+};
+
+// A `DbStressTablePropertiesCollectorFactory` creates
+// `DbStressTablePropertiesCollectorFactory`s.
+class DbStressTablePropertiesCollectorFactory
+    : public TablePropertiesCollectorFactory {
+ public:
+  virtual TablePropertiesCollector* CreateTablePropertiesCollector(
+      TablePropertiesCollectorFactory::Context /* context */) override {
+    return new DbStressTablePropertiesCollector();
+  }
+
+  virtual const char* Name() const override {
+    return "DbStressTablePropertiesCollectorFactory";
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_test_base.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_test_base.h
new file mode 100644
index 0000000000..f7ee247a74
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/db_stress_test_base.h
@@ -0,0 +1,327 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifdef GFLAGS
+#pragma once
+
+#include "db_stress_tool/db_stress_common.h"
+#include "db_stress_tool/db_stress_shared_state.h"
+
+namespace ROCKSDB_NAMESPACE {
+class SystemClock;
+class Transaction;
+class TransactionDB;
+struct TransactionDBOptions;
+
+class StressTest {
+ public:
+  StressTest();
+
+  virtual ~StressTest();
+
+  std::shared_ptr<Cache> NewCache(size_t capacity, int32_t num_shard_bits);
+
+  static std::vector<std::string> GetBlobCompressionTags();
+
+  bool BuildOptionsTable();
+
+  void InitDb(SharedState*);
+  // The initialization work is split into two parts to avoid a circular
+  // dependency with `SharedState`.
+  virtual void FinishInitDb(SharedState*);
+  void TrackExpectedState(SharedState* shared);
+  void OperateDb(ThreadState* thread);
+  virtual void VerifyDb(ThreadState* thread) const = 0;
+  virtual void ContinuouslyVerifyDb(ThreadState* /*thread*/) const = 0;
+  void PrintStatistics();
+
+ protected:
+  Status AssertSame(DB* db, ColumnFamilyHandle* cf,
+                    ThreadState::SnapshotState& snap_state);
+
+  // Currently PreloadDb has to be single-threaded.
+  void PreloadDbAndReopenAsReadOnly(int64_t number_of_keys,
+                                    SharedState* shared);
+
+  Status SetOptions(ThreadState* thread);
+
+  // For transactionsDB, there can be txns prepared but not yet committeed
+  // right before previous stress run crash.
+  // They will be recovered and processed through
+  // ProcessRecoveredPreparedTxnsHelper on the start of current stress run.
+  void ProcessRecoveredPreparedTxns(SharedState* shared);
+
+  // Default implementation will first update ExpectedState to be
+  // `SharedState::UNKNOWN` for each keys in `txn` and then randomly
+  // commit or rollback `txn`.
+  virtual void ProcessRecoveredPreparedTxnsHelper(Transaction* txn,
+                                                  SharedState* shared);
+
+  Status NewTxn(WriteOptions& write_opts, Transaction** txn);
+
+  Status CommitTxn(Transaction* txn, ThreadState* thread = nullptr);
+
+  Status RollbackTxn(Transaction* txn);
+
+  virtual void MaybeClearOneColumnFamily(ThreadState* /* thread */) {}
+
+  virtual bool ShouldAcquireMutexOnKey() const { return false; }
+
+  // Returns true if DB state is tracked by the stress test.
+  virtual bool IsStateTracked() const = 0;
+
+  virtual std::vector<int> GenerateColumnFamilies(
+      const int /* num_column_families */, int rand_column_family) const {
+    return {rand_column_family};
+  }
+
+  virtual std::vector<int64_t> GenerateKeys(int64_t rand_key) const {
+    return {rand_key};
+  }
+
+  virtual Status TestGet(ThreadState* thread, const ReadOptions& read_opts,
+                         const std::vector<int>& rand_column_families,
+                         const std::vector<int64_t>& rand_keys) = 0;
+
+  virtual std::vector<Status> TestMultiGet(
+      ThreadState* thread, const ReadOptions& read_opts,
+      const std::vector<int>& rand_column_families,
+      const std::vector<int64_t>& rand_keys) = 0;
+
+  virtual void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts,
+                             const std::vector<int>& rand_column_families,
+                             const std::vector<int64_t>& rand_keys) = 0;
+
+  virtual void TestMultiGetEntity(ThreadState* thread,
+                                  const ReadOptions& read_opts,
+                                  const std::vector<int>& rand_column_families,
+                                  const std::vector<int64_t>& rand_keys) = 0;
+
+  virtual Status TestPrefixScan(ThreadState* thread,
+                                const ReadOptions& read_opts,
+                                const std::vector<int>& rand_column_families,
+                                const std::vector<int64_t>& rand_keys) = 0;
+
+  virtual Status TestPut(ThreadState* thread, WriteOptions& write_opts,
+                         const ReadOptions& read_opts,
+                         const std::vector<int>& cf_ids,
+                         const std::vector<int64_t>& keys,
+                         char (&value)[100]) = 0;
+
+  virtual Status TestDelete(ThreadState* thread, WriteOptions& write_opts,
+                            const std::vector<int>& rand_column_families,
+                            const std::vector<int64_t>& rand_keys) = 0;
+
+  virtual Status TestDeleteRange(ThreadState* thread, WriteOptions& write_opts,
+                                 const std::vector<int>& rand_column_families,
+                                 const std::vector<int64_t>& rand_keys) = 0;
+
+  virtual void TestIngestExternalFile(
+      ThreadState* thread, const std::vector<int>& rand_column_families,
+      const std::vector<int64_t>& rand_keys) = 0;
+
+  // Issue compact range, starting with start_key, whose integer value
+  // is rand_key.
+  virtual void TestCompactRange(ThreadState* thread, int64_t rand_key,
+                                const Slice& start_key,
+                                ColumnFamilyHandle* column_family);
+
+  // Calculate a hash value for all keys in range [start_key, end_key]
+  // at a certain snapshot.
+  uint32_t GetRangeHash(ThreadState* thread, const Snapshot* snapshot,
+                        ColumnFamilyHandle* column_family,
+                        const Slice& start_key, const Slice& end_key);
+
+  // Return a column family handle that mirrors what is pointed by
+  // `column_family_id`, which will be used to validate data to be correct.
+  // By default, the column family itself will be returned.
+  virtual ColumnFamilyHandle* GetControlCfh(ThreadState* /* thread*/,
+                                            int column_family_id) {
+    return column_families_[column_family_id];
+  }
+
+  // Generated a list of keys that close to boundaries of SST keys.
+  // If there isn't any SST file in the DB, return empty list.
+  std::vector<std::string> GetWhiteBoxKeys(ThreadState* thread, DB* db,
+                                           ColumnFamilyHandle* cfh,
+                                           size_t num_keys);
+
+  // Given a key K, this creates an iterator which scans to K and then
+  // does a random sequence of Next/Prev operations.
+  virtual Status TestIterate(ThreadState* thread, const ReadOptions& read_opts,
+                             const std::vector<int>& rand_column_families,
+                             const std::vector<int64_t>& rand_keys);
+
+  virtual Status TestIterateAgainstExpected(
+      ThreadState* /* thread */, const ReadOptions& /* read_opts */,
+      const std::vector<int>& /* rand_column_families */,
+      const std::vector<int64_t>& /* rand_keys */) {
+    return Status::NotSupported();
+  }
+
+  // Enum used by VerifyIterator() to identify the mode to validate.
+  enum LastIterateOp {
+    kLastOpSeek,
+    kLastOpSeekForPrev,
+    kLastOpNextOrPrev,
+    kLastOpSeekToFirst,
+    kLastOpSeekToLast
+  };
+
+  // Compare the two iterator, iter and cmp_iter are in the same position,
+  // unless iter might be made invalidate or undefined because of
+  // upper or lower bounds, or prefix extractor.
+  // Will flag failure if the verification fails.
+  // diverged = true if the two iterator is already diverged.
+  // True if verification passed, false if not.
+  // op_logs is the information to print when validation fails.
+  void VerifyIterator(ThreadState* thread, ColumnFamilyHandle* cmp_cfh,
+                      const ReadOptions& ro, Iterator* iter, Iterator* cmp_iter,
+                      LastIterateOp op, const Slice& seek_key,
+                      const std::string& op_logs, bool* diverged);
+
+  virtual Status TestBackupRestore(ThreadState* thread,
+                                   const std::vector<int>& rand_column_families,
+                                   const std::vector<int64_t>& rand_keys);
+
+  virtual Status TestCheckpoint(ThreadState* thread,
+                                const std::vector<int>& rand_column_families,
+                                const std::vector<int64_t>& rand_keys);
+
+  void TestCompactFiles(ThreadState* thread, ColumnFamilyHandle* column_family);
+
+  Status TestFlush(const std::vector<int>& rand_column_families);
+
+  Status TestPauseBackground(ThreadState* thread);
+
+  void TestAcquireSnapshot(ThreadState* thread, int rand_column_family,
+                           const std::string& keystr, uint64_t i);
+
+  Status MaybeReleaseSnapshots(ThreadState* thread, uint64_t i);
+  Status VerifyGetLiveFiles() const;
+  Status VerifyGetSortedWalFiles() const;
+  Status VerifyGetCurrentWalFile() const;
+  void TestGetProperty(ThreadState* thread) const;
+
+  virtual Status TestApproximateSize(
+      ThreadState* thread, uint64_t iteration,
+      const std::vector<int>& rand_column_families,
+      const std::vector<int64_t>& rand_keys);
+
+  virtual Status TestCustomOperations(
+      ThreadState* /*thread*/,
+      const std::vector<int>& /*rand_column_families*/) {
+    return Status::NotSupported("TestCustomOperations() must be overridden");
+  }
+
+  void VerificationAbort(SharedState* shared, std::string msg, Status s) const;
+
+  void VerificationAbort(SharedState* shared, std::string msg, int cf,
+                         int64_t key) const;
+
+  void VerificationAbort(SharedState* shared, std::string msg, int cf,
+                         int64_t key, Slice value_from_db,
+                         Slice value_from_expected) const;
+
+  void VerificationAbort(SharedState* shared, int cf, int64_t key,
+                         const Slice& value, const WideColumns& columns) const;
+
+  static std::string DebugString(const Slice& value,
+                                 const WideColumns& columns);
+
+  void PrintEnv() const;
+
+  void Open(SharedState* shared);
+
+  void Reopen(ThreadState* thread);
+
+  virtual void RegisterAdditionalListeners() {}
+
+  virtual void PrepareTxnDbOptions(SharedState* /*shared*/,
+                                   TransactionDBOptions& /*txn_db_opts*/) {}
+
+  // Returns whether the timestamp of read_opts is updated.
+  bool MaybeUseOlderTimestampForPointLookup(ThreadState* thread,
+                                            std::string& ts_str,
+                                            Slice& ts_slice,
+                                            ReadOptions& read_opts);
+
+  void MaybeUseOlderTimestampForRangeScan(ThreadState* thread,
+                                          std::string& ts_str, Slice& ts_slice,
+                                          ReadOptions& read_opts);
+
+  std::shared_ptr<Cache> cache_;
+  std::shared_ptr<Cache> compressed_cache_;
+  std::shared_ptr<const FilterPolicy> filter_policy_;
+  DB* db_;
+  TransactionDB* txn_db_;
+
+  // Currently only used in MultiOpsTxnsStressTest
+  std::atomic<DB*> db_aptr_;
+
+  Options options_;
+  SystemClock* clock_;
+  std::vector<ColumnFamilyHandle*> column_families_;
+  std::vector<std::string> column_family_names_;
+  std::atomic<int> new_column_family_name_;
+  int num_times_reopened_;
+  std::unordered_map<std::string, std::vector<std::string>> options_table_;
+  std::vector<std::string> options_index_;
+  std::atomic<bool> db_preload_finished_;
+
+  // Fields used for continuous verification from another thread
+  DB* cmp_db_;
+  std::vector<ColumnFamilyHandle*> cmp_cfhs_;
+  bool is_db_stopped_;
+};
+
+// Load options from OPTIONS file and populate `options`.
+extern bool InitializeOptionsFromFile(Options& options);
+
+// Initialize `options` using command line arguments.
+// When this function is called, `cache`, `block_cache_compressed`,
+// `filter_policy` have all been initialized. Therefore, we just pass them as
+// input arguments.
+extern void InitializeOptionsFromFlags(
+    const std::shared_ptr<Cache>& cache,
+    const std::shared_ptr<const FilterPolicy>& filter_policy, Options& options);
+
+// Initialize `options` on which `InitializeOptionsFromFile()` and
+// `InitializeOptionsFromFlags()` have both been called already.
+// There are two cases.
+// Case 1: OPTIONS file is not specified. Command line arguments have been used
+//         to initialize `options`. InitializeOptionsGeneral() will use
+//         `cache` and `filter_policy` to initialize
+//         corresponding fields of `options`. InitializeOptionsGeneral() will
+//         also set up other fields of `options` so that stress test can run.
+//         Examples include `create_if_missing` and
+//         `create_missing_column_families`, etc.
+// Case 2: OPTIONS file is specified. It is possible that, after loading from
+//         the given OPTIONS files, some shared object fields are still not
+//         initialized because they are not set in the OPTIONS file. In this
+//         case, if command line arguments indicate that the user wants to set
+//         up such shared objects, e.g. block cache, compressed block cache,
+//         row cache, filter policy, then InitializeOptionsGeneral() will honor
+//         the user's choice, thus passing `cache`,
+//         `filter_policy` as input arguments.
+//
+// InitializeOptionsGeneral() must not overwrite fields of `options` loaded
+// from OPTIONS file.
+extern void InitializeOptionsGeneral(
+    const std::shared_ptr<Cache>& cache,
+    const std::shared_ptr<const FilterPolicy>& filter_policy, Options& options);
+
+// If no OPTIONS file is specified, set up `options` so that we can test
+// user-defined timestamp which requires `-user_timestamp_size=8`.
+// This function also checks for known (currently) incompatible features with
+// user-defined timestamp.
+extern void CheckAndSetOptionsForUserTimestamp(Options& options);
+
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // GFLAGS
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/expected_state.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/expected_state.h
new file mode 100644
index 0000000000..f3f924cb88
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/expected_state.h
@@ -0,0 +1,329 @@
+//  Copyright (c) 2021-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#ifdef GFLAGS
+
+#pragma once
+
+#include <stdint.h>
+
+#include <atomic>
+#include <memory>
+
+#include "db/dbformat.h"
+#include "db_stress_tool/expected_value.h"
+#include "file/file_util.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/types.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+// `ExpectedState` provides read/write access to expected values stored in
+// `ExpectedState` for every key.
+class ExpectedState {
+ public:
+  explicit ExpectedState(size_t max_key, size_t num_column_families);
+
+  virtual ~ExpectedState() {}
+
+  // Requires external locking preventing concurrent execution with any other
+  // member function.
+  virtual Status Open(bool create) = 0;
+
+  // Requires external locking covering all keys in `cf`.
+  void ClearColumnFamily(int cf);
+
+  // Prepare a Put that will be started but not finished yet
+  // This is useful for crash-recovery testing when the process may crash
+  // before updating the corresponding expected value
+  //
+  // Requires external locking covering `key` in `cf` to prevent concurrent
+  // write or delete to the same `key`.
+  PendingExpectedValue PreparePut(int cf, int64_t key);
+
+  // Does not requires external locking.
+  ExpectedValue Get(int cf, int64_t key);
+
+  // Prepare a Delete that will be started but not finished yet
+  // This is useful for crash-recovery testing when the process may crash
+  // before updating the corresponding expected value
+  //
+  // Requires external locking covering `key` in `cf` to prevent concurrent
+  // write or delete to the same `key`.
+  PendingExpectedValue PrepareDelete(int cf, int64_t key,
+                                     bool* prepared = nullptr);
+
+  // Requires external locking covering `key` in `cf` to prevent concurrent
+  // write or delete to the same `key`.
+  PendingExpectedValue PrepareSingleDelete(int cf, int64_t key);
+
+  // Requires external locking covering keys in `[begin_key, end_key)` in `cf`
+  // to prevent concurrent write or delete to the same `key`.
+  std::vector<PendingExpectedValue> PrepareDeleteRange(int cf,
+                                                       int64_t begin_key,
+                                                       int64_t end_key);
+
+  // Update the expected value for start of an incomplete write or delete
+  // operation on the key assoicated with this expected value
+  void Precommit(int cf, int64_t key, const ExpectedValue& value);
+
+  // Requires external locking covering `key` in `cf` to prevent concurrent
+  // delete to the same `key`.
+  bool Exists(int cf, int64_t key);
+
+  // Sync the `value_base` to the corresponding expected value
+  //
+  // Requires external locking covering `key` in `cf` or be in single thread
+  // to prevent concurrent write or delete to the same `key`
+  void SyncPut(int cf, int64_t key, uint32_t value_base);
+
+  // Sync the corresponding expected value to be pending Put
+  //
+  // Requires external locking covering `key` in `cf` or be in single thread
+  // to prevent concurrent write or delete to the same `key`
+  void SyncPendingPut(int cf, int64_t key);
+
+  // Sync the corresponding expected value to be deleted
+  //
+  // Requires external locking covering `key` in `cf` or be in single thread
+  // to prevent concurrent write or delete to the same `key`
+  void SyncDelete(int cf, int64_t key);
+
+  // Sync the corresponding expected values to be deleted
+  //
+  // Requires external locking covering keys in `[begin_key, end_key)` in `cf`
+  // to prevent concurrent write or delete to the same `key`
+  void SyncDeleteRange(int cf, int64_t begin_key, int64_t end_key);
+
+ private:
+  // Does not requires external locking.
+  std::atomic<uint32_t>& Value(int cf, int64_t key) const {
+    return values_[cf * max_key_ + key];
+  }
+
+  // Does not requires external locking
+  ExpectedValue Load(int cf, int64_t key) const {
+    return ExpectedValue(Value(cf, key).load());
+  }
+
+  const size_t max_key_;
+  const size_t num_column_families_;
+
+ protected:
+  size_t GetValuesLen() const {
+    return sizeof(std::atomic<uint32_t>) * num_column_families_ * max_key_;
+  }
+
+  // Requires external locking preventing concurrent execution with any other
+  // member function.
+  void Reset();
+
+  std::atomic<uint32_t>* values_;
+};
+
+// A `FileExpectedState` implements `ExpectedState` backed by a file.
+class FileExpectedState : public ExpectedState {
+ public:
+  explicit FileExpectedState(std::string expected_state_file_path,
+                             size_t max_key, size_t num_column_families);
+
+  // Requires external locking preventing concurrent execution with any other
+  // member function.
+  Status Open(bool create) override;
+
+ private:
+  const std::string expected_state_file_path_;
+  std::unique_ptr<MemoryMappedFileBuffer> expected_state_mmap_buffer_;
+};
+
+// An `AnonExpectedState` implements `ExpectedState` backed by a memory
+// allocation.
+class AnonExpectedState : public ExpectedState {
+ public:
+  explicit AnonExpectedState(size_t max_key, size_t num_column_families);
+
+  // Requires external locking preventing concurrent execution with any other
+  // member function.
+  Status Open(bool create) override;
+
+ private:
+  std::unique_ptr<std::atomic<uint32_t>[]> values_allocation_;
+};
+
+// An `ExpectedStateManager` manages data about the expected state of the
+// database. It exposes operations for reading and modifying the latest
+// expected state.
+class ExpectedStateManager {
+ public:
+  explicit ExpectedStateManager(size_t max_key, size_t num_column_families);
+
+  virtual ~ExpectedStateManager();
+
+  // Requires external locking preventing concurrent execution with any other
+  // member function.
+  virtual Status Open() = 0;
+
+  // Saves expected values for the current state of `db` and begins tracking
+  // changes. Following a successful `SaveAtAndAfter()`, `Restore()` can be
+  // called on the same DB, as long as its state does not roll back to before
+  // its current state.
+  //
+  // Requires external locking preventing concurrent execution with any other
+  // member function. Furthermore, `db` must not be mutated while this function
+  // is executing.
+  virtual Status SaveAtAndAfter(DB* db) = 0;
+
+  // Returns true if at least one state of historical expected values can be
+  // restored.
+  //
+  // Requires external locking preventing concurrent execution with any other
+  // member function.
+  virtual bool HasHistory() = 0;
+
+  // Restores expected values according to the current state of `db`. See
+  // `SaveAtAndAfter()` for conditions where this can be called.
+  //
+  // Requires external locking preventing concurrent execution with any other
+  // member function. Furthermore, `db` must not be mutated while this function
+  // is executing.
+  virtual Status Restore(DB* db) = 0;
+
+  // Requires external locking covering all keys in `cf`.
+  void ClearColumnFamily(int cf) { return latest_->ClearColumnFamily(cf); }
+
+  // See ExpectedState::PreparePut()
+  PendingExpectedValue PreparePut(int cf, int64_t key) {
+    return latest_->PreparePut(cf, key);
+  }
+
+  // See ExpectedState::Get()
+  ExpectedValue Get(int cf, int64_t key) { return latest_->Get(cf, key); }
+
+  // See ExpectedState::PrepareDelete()
+  PendingExpectedValue PrepareDelete(int cf, int64_t key) {
+    return latest_->PrepareDelete(cf, key);
+  }
+
+  // See ExpectedState::PrepareSingleDelete()
+  PendingExpectedValue PrepareSingleDelete(int cf, int64_t key) {
+    return latest_->PrepareSingleDelete(cf, key);
+  }
+
+  // See ExpectedState::PrepareDeleteRange()
+  std::vector<PendingExpectedValue> PrepareDeleteRange(int cf,
+                                                       int64_t begin_key,
+                                                       int64_t end_key) {
+    return latest_->PrepareDeleteRange(cf, begin_key, end_key);
+  }
+
+  // See ExpectedState::Exists()
+  bool Exists(int cf, int64_t key) { return latest_->Exists(cf, key); }
+
+  // See ExpectedState::SyncPut()
+  void SyncPut(int cf, int64_t key, uint32_t value_base) {
+    return latest_->SyncPut(cf, key, value_base);
+  }
+
+  // See ExpectedState::SyncPendingPut()
+  void SyncPendingPut(int cf, int64_t key) {
+    return latest_->SyncPendingPut(cf, key);
+  }
+
+  // See ExpectedState::SyncDelete()
+  void SyncDelete(int cf, int64_t key) { return latest_->SyncDelete(cf, key); }
+
+  // See ExpectedState::SyncDeleteRange()
+  void SyncDeleteRange(int cf, int64_t begin_key, int64_t end_key) {
+    return latest_->SyncDeleteRange(cf, begin_key, end_key);
+  }
+
+ protected:
+  const size_t max_key_;
+  const size_t num_column_families_;
+  std::unique_ptr<ExpectedState> latest_;
+};
+
+// A `FileExpectedStateManager` implements an `ExpectedStateManager` backed by
+// a directory of files containing data about the expected state of the
+// database.
+class FileExpectedStateManager : public ExpectedStateManager {
+ public:
+  explicit FileExpectedStateManager(size_t max_key, size_t num_column_families,
+                                    std::string expected_state_dir_path);
+
+  // Requires external locking preventing concurrent execution with any other
+  // member function.
+  Status Open() override;
+
+  // See `ExpectedStateManager::SaveAtAndAfter()` API doc.
+  //
+  // This implementation makes a copy of "LATEST.state" into
+  // "<current seqno>.state", and starts a trace in "<current seqno>.trace".
+  // Due to using external files, a following `Restore()` can happen even
+  // from a different process.
+  Status SaveAtAndAfter(DB* db) override;
+
+  // See `ExpectedStateManager::HasHistory()` API doc.
+  bool HasHistory() override;
+
+  // See `ExpectedStateManager::Restore()` API doc.
+  //
+  // Say `db->GetLatestSequenceNumber()` was `a` last time `SaveAtAndAfter()`
+  // was called and now it is `b`. Then this function replays `b - a` write
+  // operations from "`a`.trace" onto "`a`.state", and then copies the resulting
+  // file into "LATEST.state".
+  Status Restore(DB* db) override;
+
+ private:
+  // Requires external locking preventing concurrent execution with any other
+  // member function.
+  Status Clean();
+
+  std::string GetTempPathForFilename(const std::string& filename);
+  std::string GetPathForFilename(const std::string& filename);
+
+  static const std::string kLatestBasename;
+  static const std::string kStateFilenameSuffix;
+  static const std::string kTraceFilenameSuffix;
+  static const std::string kTempFilenamePrefix;
+  static const std::string kTempFilenameSuffix;
+
+  const std::string expected_state_dir_path_;
+  SequenceNumber saved_seqno_ = kMaxSequenceNumber;
+};
+
+// An `AnonExpectedStateManager` implements an `ExpectedStateManager` backed by
+// a memory allocation containing data about the expected state of the database.
+class AnonExpectedStateManager : public ExpectedStateManager {
+ public:
+  explicit AnonExpectedStateManager(size_t max_key, size_t num_column_families);
+
+  // See `ExpectedStateManager::SaveAtAndAfter()` API doc.
+  //
+  // This implementation returns `Status::NotSupported` since we do not
+  // currently have a need to keep history of expected state within a process.
+  Status SaveAtAndAfter(DB* /* db */) override {
+    return Status::NotSupported();
+  }
+
+  // See `ExpectedStateManager::HasHistory()` API doc.
+  bool HasHistory() override { return false; }
+
+  // See `ExpectedStateManager::Restore()` API doc.
+  //
+  // This implementation returns `Status::NotSupported` since we do not
+  // currently have a need to keep history of expected state within a process.
+  Status Restore(DB* /* db */) override { return Status::NotSupported(); }
+
+  // Requires external locking preventing concurrent execution with any other
+  // member function.
+  Status Open() override;
+};
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // GFLAGS
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/expected_value.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/expected_value.h
new file mode 100644
index 0000000000..2e41b130cd
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/expected_value.h
@@ -0,0 +1,206 @@
+//  Copyright (c) 2021-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#ifdef GFLAGS
+
+#pragma once
+
+#include <stdint.h>
+
+#include <atomic>
+#include <cassert>
+#include <memory>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+// `ExpectedValue` represents the expected value of a key used in db stress,
+// which provides APIs to obtain various information e.g, value base, existence,
+// pending operation status and APIs to edit expected value.
+//
+// This class is not thread-safe.
+class ExpectedValue {
+ public:
+  static uint32_t GetValueBaseMask() { return VALUE_BASE_MASK; }
+  static uint32_t GetValueBaseDelta() { return VALUE_BASE_DELTA; }
+  static uint32_t GetDelCounterDelta() { return DEL_COUNTER_DELTA; }
+  static uint32_t GetDelMask() { return DEL_MASK; }
+  static bool IsValueBaseValid(uint32_t value_base) {
+    return IsValuePartValid(value_base, VALUE_BASE_MASK);
+  }
+
+  explicit ExpectedValue(uint32_t expected_value)
+      : expected_value_(expected_value) {}
+
+  bool Exists() const { return PendingWrite() || !IsDeleted(); }
+
+  uint32_t Read() const { return expected_value_; }
+
+  void Put(bool pending);
+
+  bool Delete(bool pending);
+
+  void SyncPut(uint32_t value_base);
+
+  void SyncPendingPut();
+
+  void SyncDelete();
+
+  uint32_t GetValueBase() const { return GetValuePart(VALUE_BASE_MASK); }
+
+  uint32_t NextValueBase() const {
+    return GetIncrementedValuePart(VALUE_BASE_MASK, VALUE_BASE_DELTA);
+  }
+
+  void SetValueBase(uint32_t new_value_base) {
+    SetValuePart(VALUE_BASE_MASK, new_value_base);
+  }
+
+  bool PendingWrite() const {
+    const uint32_t pending_write = GetValuePart(PENDING_WRITE_MASK);
+    return pending_write != 0;
+  }
+
+  void SetPendingWrite() {
+    SetValuePart(PENDING_WRITE_MASK, PENDING_WRITE_MASK);
+  }
+
+  void ClearPendingWrite() { ClearValuePart(PENDING_WRITE_MASK); }
+
+  uint32_t GetDelCounter() const { return GetValuePart(DEL_COUNTER_MASK); }
+
+  uint32_t NextDelCounter() const {
+    return GetIncrementedValuePart(DEL_COUNTER_MASK, DEL_COUNTER_DELTA);
+  }
+
+  void SetDelCounter(uint32_t new_del_counter) {
+    SetValuePart(DEL_COUNTER_MASK, new_del_counter);
+  }
+
+  bool PendingDelete() const {
+    const uint32_t pending_del = GetValuePart(PENDING_DEL_MASK);
+    return pending_del != 0;
+  }
+
+  void SetPendingDel() { SetValuePart(PENDING_DEL_MASK, PENDING_DEL_MASK); }
+
+  void ClearPendingDel() { ClearValuePart(PENDING_DEL_MASK); }
+
+  bool IsDeleted() const {
+    const uint32_t deleted = GetValuePart(DEL_MASK);
+    return deleted != 0;
+  }
+
+  void SetDeleted() { SetValuePart(DEL_MASK, DEL_MASK); }
+
+  void ClearDeleted() { ClearValuePart(DEL_MASK); }
+
+  uint32_t GetFinalValueBase() const;
+
+  uint32_t GetFinalDelCounter() const;
+
+ private:
+  static bool IsValuePartValid(uint32_t value_part, uint32_t value_part_mask) {
+    return (value_part & (~value_part_mask)) == 0;
+  }
+
+  // The 32-bit expected_value_ is divided into following parts:
+  // Bit 0 - 14: value base
+  static constexpr uint32_t VALUE_BASE_MASK = 0x7fff;
+  static constexpr uint32_t VALUE_BASE_DELTA = 1;
+  // Bit 15: whether write to this value base is pending (0 equals `false`)
+  static constexpr uint32_t PENDING_WRITE_MASK = (uint32_t)1 << 15;
+  // Bit 16 - 29: deletion counter (i.e, number of times this value base has
+  // been deleted)
+  static constexpr uint32_t DEL_COUNTER_MASK = 0x3fff0000;
+  static constexpr uint32_t DEL_COUNTER_DELTA = (uint32_t)1 << 16;
+  // Bit 30: whether deletion of this value base is pending (0 equals `false`)
+  static constexpr uint32_t PENDING_DEL_MASK = (uint32_t)1 << 30;
+  // Bit 31: whether this value base is deleted (0 equals `false`)
+  static constexpr uint32_t DEL_MASK = (uint32_t)1 << 31;
+
+  uint32_t GetValuePart(uint32_t value_part_mask) const {
+    return expected_value_ & value_part_mask;
+  }
+
+  uint32_t GetIncrementedValuePart(uint32_t value_part_mask,
+                                   uint32_t value_part_delta) const {
+    uint32_t current_value_part = GetValuePart(value_part_mask);
+    ExpectedValue temp_expected_value(current_value_part + value_part_delta);
+    return temp_expected_value.GetValuePart(value_part_mask);
+  }
+
+  void SetValuePart(uint32_t value_part_mask, uint32_t new_value_part) {
+    assert(IsValuePartValid(new_value_part, value_part_mask));
+    ClearValuePart(value_part_mask);
+    expected_value_ |= new_value_part;
+  }
+
+  void ClearValuePart(uint32_t value_part_mask) {
+    expected_value_ &= (~value_part_mask);
+  }
+
+  uint32_t expected_value_;
+};
+
+// `PendingExpectedValue` represents the expected value of a key undergoing a
+// pending operation in db stress.
+//
+// This class is not thread-safe.
+class PendingExpectedValue {
+ public:
+  explicit PendingExpectedValue(std::atomic<uint32_t>* value_ptr,
+                                ExpectedValue orig_value,
+                                ExpectedValue final_value)
+      : value_ptr_(value_ptr),
+        orig_value_(orig_value),
+        final_value_(final_value) {}
+
+  void Commit() {
+    // To prevent low-level instruction reordering that results
+    // in setting expected value happens before db write
+    std::atomic_thread_fence(std::memory_order_release);
+    value_ptr_->store(final_value_.Read());
+  }
+
+  uint32_t GetFinalValueBase() { return final_value_.GetValueBase(); }
+
+ private:
+  std::atomic<uint32_t>* const value_ptr_;
+  const ExpectedValue orig_value_;
+  const ExpectedValue final_value_;
+};
+
+// `ExpectedValueHelper` provides utils to parse `ExpectedValue` to obtain
+// useful info about it in db stress
+class ExpectedValueHelper {
+ public:
+  // Return whether the key associated with `pre_read_expected_value` and
+  // `post_read_expected_value` is expected not to exist from begining till the
+  // end of the read
+  //
+  // The negation of `MustHaveNotExisted()` is "may have not existed".
+  // To assert some key must have existsed, please use `MustHaveExisted()`
+  static bool MustHaveNotExisted(ExpectedValue pre_read_expected_value,
+                                 ExpectedValue post_read_expected_value);
+
+  // Return whether the key associated with `pre_read_expected_value` and
+  // `post_read_expected_value` is expected to exist from begining till the end
+  // of the read.
+  //
+  // The negation of `MustHaveExisted()` is "may have existed".
+  // To assert some key must have not existsed, please use
+  // `MustHaveNotExisted()`
+  static bool MustHaveExisted(ExpectedValue pre_read_expected_value,
+                              ExpectedValue post_read_expected_value);
+
+  // Return whether the `value_base` falls within the expected value base
+  static bool InExpectedValueBaseRange(uint32_t value_base,
+                                       ExpectedValue pre_read_expected_value,
+                                       ExpectedValue post_read_expected_value);
+};
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // GFLAGS
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/multi_ops_txns_stress.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/multi_ops_txns_stress.h
new file mode 100644
index 0000000000..12c45aaa32
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/db_stress_tool/multi_ops_txns_stress.h
@@ -0,0 +1,446 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifdef GFLAGS
+#include "db_stress_tool/db_stress_common.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// This file defines MultiOpsTxnsStress so that we can stress test RocksDB
+// transactions on a simple, emulated relational table.
+//
+// The record format is similar to the example found at
+// https://github.com/facebook/mysql-5.6/wiki/MyRocks-record-format.
+//
+// The table is created by
+// ```
+// create table t1 (
+//   a int primary key,
+//   b int,
+//   c int,
+//   key(c),
+//   )
+// ```
+//
+// (For simplicity, we use uint32_t for int here.)
+//
+// For this table, there is a primary index using `a`, as well as a secondary
+// index using `c` and `a`.
+//
+// Primary key format:
+// | index id | M(a) |
+// Primary index value:
+// | b | c |
+// M(a) represents the big-endian format of a.
+//
+// Secondary key format:
+// | index id | M(c) | M(a) |
+// Secondary index value:
+// | crc32 |
+// Similarly to M(a), M(c) is the big-endian format of c.
+//
+// The in-memory representation of a record is defined in class
+// MultiOpsTxnsStress:Record that includes a number of helper methods to
+// encode/decode primary index keys, primary index values, secondary index keys,
+// secondary index values, etc.
+//
+// Sometimes primary index and secondary index reside on different column
+// families, but sometimes they colocate in the same column family. Current
+// implementation puts them in the same (default) column family, and this is
+// subject to future change if we find it interesting to test the other case.
+//
+// Class MultiOpsTxnsStressTest has the following transactions for testing.
+//
+// 1. Primary key update
+// UPDATE t1 SET a = 3 WHERE a = 2;
+// ```
+// tx->GetForUpdate(primary key a=2)
+// tx->GetForUpdate(primary key a=3)
+// tx->Delete(primary key a=2)
+// tx->Put(primary key a=3, value)
+// tx->batch->SingleDelete(secondary key a=2)
+// tx->batch->Put(secondary key a=3, value)
+// tx->Prepare()
+// Tx->Commit()
+// ```
+//
+// 2. Secondary key update
+// UPDATE t1 SET c = 3 WHERE c = 2;
+// ```
+// iter->Seek(secondary key)
+// // Get corresponding primary key value(s) from iterator
+// tx->GetForUpdate(primary key)
+// tx->Put(primary key, value c=3)
+// tx->batch->SingleDelete(secondary key c=2)
+// tx->batch->Put(secondary key c=3)
+// tx->Prepare()
+// tx->Commit()
+// ```
+//
+// 3. Primary index value update
+// UPDATE t1 SET b = b + 1 WHERE a = 2;
+// ```
+// tx->GetForUpdate(primary key a=2)
+// tx->Put(primary key a=2, value b=b+1)
+// tx->Prepare()
+// tx->Commit()
+// ```
+//
+// 4. Point lookup
+// SELECT * FROM t1 WHERE a = 3;
+// ```
+// tx->Get(primary key a=3)
+// tx->Commit()
+// ```
+//
+// 5. Range scan
+// SELECT * FROM t1 WHERE c = 2;
+// ```
+// it = tx->GetIterator()
+// it->Seek(secondary key c=2)
+// tx->Commit()
+// ```
+
+class MultiOpsTxnsStressTest : public StressTest {
+ public:
+  class Record {
+   public:
+    static constexpr uint32_t kMetadataPrefix = 0;
+    static constexpr uint32_t kPrimaryIndexId = 1;
+    static constexpr uint32_t kSecondaryIndexId = 2;
+
+    static constexpr size_t kPrimaryIndexEntrySize = 8 + 8;
+    static constexpr size_t kSecondaryIndexEntrySize = 12 + 4;
+
+    static_assert(kPrimaryIndexId < kSecondaryIndexId,
+                  "kPrimaryIndexId must be smaller than kSecondaryIndexId");
+
+    static_assert(sizeof(kPrimaryIndexId) == sizeof(uint32_t),
+                  "kPrimaryIndexId must be 4 bytes");
+    static_assert(sizeof(kSecondaryIndexId) == sizeof(uint32_t),
+                  "kSecondaryIndexId must be 4 bytes");
+
+    // Used for generating search key to probe primary index.
+    static std::string EncodePrimaryKey(uint32_t a);
+    // Used for generating search prefix to probe secondary index.
+    static std::string EncodeSecondaryKey(uint32_t c);
+    // Used for generating search key to probe secondary index.
+    static std::string EncodeSecondaryKey(uint32_t c, uint32_t a);
+
+    static std::tuple<Status, uint32_t, uint32_t> DecodePrimaryIndexValue(
+        Slice primary_index_value);
+
+    static std::pair<Status, uint32_t> DecodeSecondaryIndexValue(
+        Slice secondary_index_value);
+
+    Record() = default;
+    Record(uint32_t _a, uint32_t _b, uint32_t _c) : a_(_a), b_(_b), c_(_c) {}
+
+    bool operator==(const Record& other) const {
+      return a_ == other.a_ && b_ == other.b_ && c_ == other.c_;
+    }
+
+    bool operator!=(const Record& other) const { return !(*this == other); }
+
+    std::pair<std::string, std::string> EncodePrimaryIndexEntry() const;
+
+    std::string EncodePrimaryKey() const;
+
+    std::string EncodePrimaryIndexValue() const;
+
+    std::pair<std::string, std::string> EncodeSecondaryIndexEntry() const;
+
+    std::string EncodeSecondaryKey() const;
+
+    Status DecodePrimaryIndexEntry(Slice primary_index_key,
+                                   Slice primary_index_value);
+
+    Status DecodeSecondaryIndexEntry(Slice secondary_index_key,
+                                     Slice secondary_index_value);
+
+    uint32_t a_value() const { return a_; }
+    uint32_t b_value() const { return b_; }
+    uint32_t c_value() const { return c_; }
+
+    void SetA(uint32_t _a) { a_ = _a; }
+    void SetB(uint32_t _b) { b_ = _b; }
+    void SetC(uint32_t _c) { c_ = _c; }
+
+    std::string ToString() const {
+      std::string ret("(");
+      ret.append(std::to_string(a_));
+      ret.append(",");
+      ret.append(std::to_string(b_));
+      ret.append(",");
+      ret.append(std::to_string(c_));
+      ret.append(")");
+      return ret;
+    }
+
+   private:
+    friend class InvariantChecker;
+
+    uint32_t a_{0};
+    uint32_t b_{0};
+    uint32_t c_{0};
+  };
+
+  MultiOpsTxnsStressTest() {}
+
+  ~MultiOpsTxnsStressTest() override {}
+
+  void FinishInitDb(SharedState*) override;
+
+  void ReopenAndPreloadDbIfNeeded(SharedState* shared);
+
+  bool IsStateTracked() const override { return false; }
+
+  Status TestGet(ThreadState* thread, const ReadOptions& read_opts,
+                 const std::vector<int>& rand_column_families,
+                 const std::vector<int64_t>& rand_keys) override;
+
+  std::vector<Status> TestMultiGet(
+      ThreadState* thread, const ReadOptions& read_opts,
+      const std::vector<int>& rand_column_families,
+      const std::vector<int64_t>& rand_keys) override;
+
+  void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts,
+                     const std::vector<int>& rand_column_families,
+                     const std::vector<int64_t>& rand_keys) override;
+
+  void TestMultiGetEntity(ThreadState* thread, const ReadOptions& read_opts,
+                          const std::vector<int>& rand_column_families,
+                          const std::vector<int64_t>& rand_keys) override;
+
+  Status TestPrefixScan(ThreadState* thread, const ReadOptions& read_opts,
+                        const std::vector<int>& rand_column_families,
+                        const std::vector<int64_t>& rand_keys) override;
+
+  // Given a key K, this creates an iterator which scans to K and then
+  // does a random sequence of Next/Prev operations.
+  Status TestIterate(ThreadState* thread, const ReadOptions& read_opts,
+                     const std::vector<int>& rand_column_families,
+                     const std::vector<int64_t>& rand_keys) override;
+
+  Status TestPut(ThreadState* thread, WriteOptions& write_opts,
+                 const ReadOptions& read_opts, const std::vector<int>& cf_ids,
+                 const std::vector<int64_t>& keys, char (&value)[100]) override;
+
+  Status TestDelete(ThreadState* thread, WriteOptions& write_opts,
+                    const std::vector<int>& rand_column_families,
+                    const std::vector<int64_t>& rand_keys) override;
+
+  Status TestDeleteRange(ThreadState* thread, WriteOptions& write_opts,
+                         const std::vector<int>& rand_column_families,
+                         const std::vector<int64_t>& rand_keys) override;
+
+  void TestIngestExternalFile(ThreadState* thread,
+                              const std::vector<int>& rand_column_families,
+                              const std::vector<int64_t>& rand_keys) override;
+
+  void TestCompactRange(ThreadState* thread, int64_t rand_key,
+                        const Slice& start_key,
+                        ColumnFamilyHandle* column_family) override;
+
+  Status TestBackupRestore(ThreadState* thread,
+                           const std::vector<int>& rand_column_families,
+                           const std::vector<int64_t>& rand_keys) override;
+
+  Status TestCheckpoint(ThreadState* thread,
+                        const std::vector<int>& rand_column_families,
+                        const std::vector<int64_t>& rand_keys) override;
+
+  Status TestApproximateSize(ThreadState* thread, uint64_t iteration,
+                             const std::vector<int>& rand_column_families,
+                             const std::vector<int64_t>& rand_keys) override;
+
+  Status TestCustomOperations(
+      ThreadState* thread,
+      const std::vector<int>& rand_column_families) override;
+
+  void RegisterAdditionalListeners() override;
+
+  void PrepareTxnDbOptions(SharedState* /*shared*/,
+                           TransactionDBOptions& txn_db_opts) override;
+
+  Status PrimaryKeyUpdateTxn(ThreadState* thread, uint32_t old_a,
+                             uint32_t old_a_pos, uint32_t new_a);
+
+  Status SecondaryKeyUpdateTxn(ThreadState* thread, uint32_t old_c,
+                               uint32_t old_c_pos, uint32_t new_c);
+
+  Status UpdatePrimaryIndexValueTxn(ThreadState* thread, uint32_t a,
+                                    uint32_t b_delta);
+
+  Status PointLookupTxn(ThreadState* thread, ReadOptions ropts, uint32_t a);
+
+  Status RangeScanTxn(ThreadState* thread, ReadOptions ropts, uint32_t c);
+
+  void VerifyDb(ThreadState* thread) const override;
+
+  void ContinuouslyVerifyDb(ThreadState* thread) const override {
+    VerifyDb(thread);
+  }
+
+  void VerifyPkSkFast(const ReadOptions& read_options, int job_id);
+
+ protected:
+  class Counter {
+   public:
+    uint64_t Next() { return value_.fetch_add(1); }
+
+   private:
+    std::atomic<uint64_t> value_ = Env::Default()->NowNanos();
+  };
+
+  using KeySet = std::set<uint32_t>;
+  class KeyGenerator {
+   public:
+    explicit KeyGenerator(uint32_t s, uint32_t low, uint32_t high,
+                          KeySet&& existing_uniq, KeySet&& non_existing_uniq)
+        : rand_(s),
+          low_(low),
+          high_(high),
+          existing_uniq_(std::move(existing_uniq)),
+          non_existing_uniq_(std::move(non_existing_uniq)) {}
+    ~KeyGenerator() {
+      assert(!existing_uniq_.empty());
+      assert(!non_existing_uniq_.empty());
+    }
+    void FinishInit();
+
+    std::pair<uint32_t, uint32_t> ChooseExisting();
+    void Replace(uint32_t old_val, uint32_t old_pos, uint32_t new_val);
+    uint32_t Allocate();
+    void UndoAllocation(uint32_t new_val);
+
+    std::string ToString() const {
+      std::ostringstream oss;
+      oss << "[" << low_ << ", " << high_ << "): " << existing_.size()
+          << " elements, " << existing_uniq_.size() << " unique values, "
+          << non_existing_uniq_.size() << " unique non-existing values";
+      return oss.str();
+    }
+
+   private:
+    Random rand_;
+    uint32_t low_ = 0;
+    uint32_t high_ = 0;
+    std::vector<uint32_t> existing_{};
+    KeySet existing_uniq_{};
+    KeySet non_existing_uniq_{};
+    bool initialized_ = false;
+  };
+
+  // Return <a, pos>
+  std::pair<uint32_t, uint32_t> ChooseExistingA(ThreadState* thread);
+
+  uint32_t GenerateNextA(ThreadState* thread);
+
+  // Return <c, pos>
+  std::pair<uint32_t, uint32_t> ChooseExistingC(ThreadState* thread);
+
+  uint32_t GenerateNextC(ThreadState* thread);
+
+  // Randomly commit or rollback `txn`
+  void ProcessRecoveredPreparedTxnsHelper(Transaction* txn,
+                                          SharedState*) override;
+
+  // Some applications, e.g. MyRocks writes a KV pair to the database via
+  // commit-time-write-batch (ctwb) in additional to the transaction's regular
+  // write batch. The key is usually constant representing some system
+  // metadata, while the value is monoticailly increasing which represents the
+  // actual value of the metadata. Method WriteToCommitTimeWriteBatch()
+  // emulates this scenario.
+  Status WriteToCommitTimeWriteBatch(Transaction& txn);
+
+  Status CommitAndCreateTimestampedSnapshotIfNeeded(ThreadState* thread,
+                                                    Transaction& txn);
+
+  void SetupSnapshot(ThreadState* thread, ReadOptions& read_opts,
+                     Transaction& txn,
+                     std::shared_ptr<const Snapshot>& snapshot);
+
+  std::vector<std::unique_ptr<KeyGenerator>> key_gen_for_a_;
+  std::vector<std::unique_ptr<KeyGenerator>> key_gen_for_c_;
+
+  Counter counter_{};
+
+ private:
+  struct KeySpaces {
+    uint32_t lb_a = 0;
+    uint32_t ub_a = 0;
+    uint32_t lb_c = 0;
+    uint32_t ub_c = 0;
+
+    explicit KeySpaces() = default;
+    explicit KeySpaces(uint32_t _lb_a, uint32_t _ub_a, uint32_t _lb_c,
+                       uint32_t _ub_c)
+        : lb_a(_lb_a), ub_a(_ub_a), lb_c(_lb_c), ub_c(_ub_c) {}
+
+    std::string EncodeTo() const;
+    bool DecodeFrom(Slice data);
+  };
+
+  void PersistKeySpacesDesc(const std::string& key_spaces_path, uint32_t lb_a,
+                            uint32_t ub_a, uint32_t lb_c, uint32_t ub_c);
+
+  KeySpaces ReadKeySpacesDesc(const std::string& key_spaces_path);
+
+  void PreloadDb(SharedState* shared, int threads, uint32_t lb_a, uint32_t ub_a,
+                 uint32_t lb_c, uint32_t ub_c);
+
+  void ScanExistingDb(SharedState* shared, int threads);
+};
+
+class InvariantChecker {
+ public:
+  static_assert(sizeof(MultiOpsTxnsStressTest::Record().a_) == sizeof(uint32_t),
+                "MultiOpsTxnsStressTest::Record::a_ must be 4 bytes");
+  static_assert(sizeof(MultiOpsTxnsStressTest::Record().b_) == sizeof(uint32_t),
+                "MultiOpsTxnsStressTest::Record::b_ must be 4 bytes");
+  static_assert(sizeof(MultiOpsTxnsStressTest::Record().c_) == sizeof(uint32_t),
+                "MultiOpsTxnsStressTest::Record::c_ must be 4 bytes");
+};
+
+class MultiOpsTxnsStressListener : public EventListener {
+ public:
+  explicit MultiOpsTxnsStressListener(MultiOpsTxnsStressTest* stress_test)
+      : stress_test_(stress_test) {
+    assert(stress_test_);
+  }
+
+  ~MultiOpsTxnsStressListener() override {}
+
+  void OnFlushCompleted(DB* db, const FlushJobInfo& info) override {
+    assert(db);
+#ifdef NDEBUG
+    (void)db;
+#endif
+    assert(info.cf_id == 0);
+    const ReadOptions read_options(Env::IOActivity::kFlush);
+    stress_test_->VerifyPkSkFast(read_options, info.job_id);
+  }
+
+  void OnCompactionCompleted(DB* db, const CompactionJobInfo& info) override {
+    assert(db);
+#ifdef NDEBUG
+    (void)db;
+#endif
+    assert(info.cf_id == 0);
+    const ReadOptions read_options(Env::IOActivity::kCompaction);
+    stress_test_->VerifyPkSkFast(read_options, info.job_id);
+  }
+
+ private:
+  MultiOpsTxnsStressTest* const stress_test_ = nullptr;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // GFLAGS
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/composite_env.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/composite_env.cc
new file mode 100644
index 0000000000..8ddc9a1a6c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/composite_env.cc
@@ -0,0 +1,534 @@
+// Copyright (c) 2019-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include "env/composite_env_wrapper.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+// The CompositeEnvWrapper class provides an interface that is compatible
+// with the old monolithic Env API, and an implementation that wraps around
+// the new Env that provides threading and other OS related functionality, and
+// the new FileSystem API that provides storage functionality. By
+// providing the old Env interface, it allows the rest of RocksDB code to
+// be agnostic of whether the underlying Env implementation is a monolithic
+// Env or an Env + FileSystem. In the former case, the user will specify
+// Options::env only, whereas in the latter case, the user will specify
+// Options::env and Options::file_system.
+
+class CompositeSequentialFileWrapper : public SequentialFile {
+ public:
+  explicit CompositeSequentialFileWrapper(
+      std::unique_ptr<FSSequentialFile>& target)
+      : target_(std::move(target)) {}
+
+  Status Read(size_t n, Slice* result, char* scratch) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Read(n, io_opts, result, scratch, &dbg);
+  }
+  Status Skip(uint64_t n) override { return target_->Skip(n); }
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  Status InvalidateCache(size_t offset, size_t length) override {
+    return target_->InvalidateCache(offset, length);
+  }
+  Status PositionedRead(uint64_t offset, size_t n, Slice* result,
+                        char* scratch) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->PositionedRead(offset, n, io_opts, result, scratch, &dbg);
+  }
+
+ private:
+  std::unique_ptr<FSSequentialFile> target_;
+};
+
+class CompositeRandomAccessFileWrapper : public RandomAccessFile {
+ public:
+  explicit CompositeRandomAccessFileWrapper(
+      std::unique_ptr<FSRandomAccessFile>& target)
+      : target_(std::move(target)) {}
+
+  Status Read(uint64_t offset, size_t n, Slice* result,
+              char* scratch) const override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Read(offset, n, io_opts, result, scratch, &dbg);
+  }
+  Status MultiRead(ReadRequest* reqs, size_t num_reqs) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    std::vector<FSReadRequest> fs_reqs;
+    Status status;
+
+    fs_reqs.resize(num_reqs);
+    for (size_t i = 0; i < num_reqs; ++i) {
+      fs_reqs[i].offset = reqs[i].offset;
+      fs_reqs[i].len = reqs[i].len;
+      fs_reqs[i].scratch = reqs[i].scratch;
+      fs_reqs[i].status = IOStatus::OK();
+    }
+    status = target_->MultiRead(fs_reqs.data(), num_reqs, io_opts, &dbg);
+    for (size_t i = 0; i < num_reqs; ++i) {
+      reqs[i].result = fs_reqs[i].result;
+      reqs[i].status = fs_reqs[i].status;
+    }
+    return status;
+  }
+  Status Prefetch(uint64_t offset, size_t n) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Prefetch(offset, n, io_opts, &dbg);
+  }
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    return target_->GetUniqueId(id, max_size);
+  }
+  void Hint(AccessPattern pattern) override {
+    target_->Hint((FSRandomAccessFile::AccessPattern)pattern);
+  }
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  Status InvalidateCache(size_t offset, size_t length) override {
+    return target_->InvalidateCache(offset, length);
+  }
+
+ private:
+  std::unique_ptr<FSRandomAccessFile> target_;
+};
+
+class CompositeWritableFileWrapper : public WritableFile {
+ public:
+  explicit CompositeWritableFileWrapper(std::unique_ptr<FSWritableFile>& t)
+      : target_(std::move(t)) {}
+
+  Status Append(const Slice& data) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Append(data, io_opts, &dbg);
+  }
+  Status Append(const Slice& data,
+                const DataVerificationInfo& verification_info) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Append(data, io_opts, verification_info, &dbg);
+  }
+  Status PositionedAppend(const Slice& data, uint64_t offset) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->PositionedAppend(data, offset, io_opts, &dbg);
+  }
+  Status PositionedAppend(
+      const Slice& data, uint64_t offset,
+      const DataVerificationInfo& verification_info) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->PositionedAppend(data, offset, io_opts, verification_info,
+                                     &dbg);
+  }
+  Status Truncate(uint64_t size) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Truncate(size, io_opts, &dbg);
+  }
+  Status Close() override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Close(io_opts, &dbg);
+  }
+  Status Flush() override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Flush(io_opts, &dbg);
+  }
+  Status Sync() override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Sync(io_opts, &dbg);
+  }
+  Status Fsync() override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Fsync(io_opts, &dbg);
+  }
+  bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); }
+
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+
+  void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override {
+    target_->SetWriteLifeTimeHint(hint);
+  }
+
+  Env::WriteLifeTimeHint GetWriteLifeTimeHint() override {
+    return target_->GetWriteLifeTimeHint();
+  }
+
+  uint64_t GetFileSize() override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->GetFileSize(io_opts, &dbg);
+  }
+
+  void SetPreallocationBlockSize(size_t size) override {
+    target_->SetPreallocationBlockSize(size);
+  }
+
+  void GetPreallocationStatus(size_t* block_size,
+                              size_t* last_allocated_block) override {
+    target_->GetPreallocationStatus(block_size, last_allocated_block);
+  }
+
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    return target_->GetUniqueId(id, max_size);
+  }
+
+  Status InvalidateCache(size_t offset, size_t length) override {
+    return target_->InvalidateCache(offset, length);
+  }
+
+  Status RangeSync(uint64_t offset, uint64_t nbytes) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->RangeSync(offset, nbytes, io_opts, &dbg);
+  }
+
+  void PrepareWrite(size_t offset, size_t len) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    target_->PrepareWrite(offset, len, io_opts, &dbg);
+  }
+
+  Status Allocate(uint64_t offset, uint64_t len) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Allocate(offset, len, io_opts, &dbg);
+  }
+
+  std::unique_ptr<FSWritableFile>* target() { return &target_; }
+
+ private:
+  std::unique_ptr<FSWritableFile> target_;
+};
+
+class CompositeRandomRWFileWrapper : public RandomRWFile {
+ public:
+  explicit CompositeRandomRWFileWrapper(std::unique_ptr<FSRandomRWFile>& target)
+      : target_(std::move(target)) {}
+
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  Status Write(uint64_t offset, const Slice& data) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Write(offset, data, io_opts, &dbg);
+  }
+  Status Read(uint64_t offset, size_t n, Slice* result,
+              char* scratch) const override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Read(offset, n, io_opts, result, scratch, &dbg);
+  }
+  Status Flush() override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Flush(io_opts, &dbg);
+  }
+  Status Sync() override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Sync(io_opts, &dbg);
+  }
+  Status Fsync() override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Fsync(io_opts, &dbg);
+  }
+  Status Close() override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Close(io_opts, &dbg);
+  }
+
+ private:
+  std::unique_ptr<FSRandomRWFile> target_;
+};
+
+class CompositeDirectoryWrapper : public Directory {
+ public:
+  explicit CompositeDirectoryWrapper(std::unique_ptr<FSDirectory>& target)
+      : target_(std::move(target)) {}
+
+  Status Fsync() override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->FsyncWithDirOptions(io_opts, &dbg, DirFsyncOptions());
+  }
+
+  Status Close() override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return target_->Close(io_opts, &dbg);
+  }
+
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    return target_->GetUniqueId(id, max_size);
+  }
+
+ private:
+  std::unique_ptr<FSDirectory> target_;
+};
+}  // namespace
+
+Status CompositeEnv::NewSequentialFile(const std::string& f,
+                                       std::unique_ptr<SequentialFile>* r,
+                                       const EnvOptions& options) {
+  IODebugContext dbg;
+  std::unique_ptr<FSSequentialFile> file;
+  Status status;
+  status =
+      file_system_->NewSequentialFile(f, FileOptions(options), &file, &dbg);
+  if (status.ok()) {
+    r->reset(new CompositeSequentialFileWrapper(file));
+  }
+  return status;
+}
+
+Status CompositeEnv::NewRandomAccessFile(const std::string& f,
+                                         std::unique_ptr<RandomAccessFile>* r,
+                                         const EnvOptions& options) {
+  IODebugContext dbg;
+  std::unique_ptr<FSRandomAccessFile> file;
+  Status status;
+  status =
+      file_system_->NewRandomAccessFile(f, FileOptions(options), &file, &dbg);
+  if (status.ok()) {
+    r->reset(new CompositeRandomAccessFileWrapper(file));
+  }
+  return status;
+}
+
+Status CompositeEnv::NewWritableFile(const std::string& f,
+                                     std::unique_ptr<WritableFile>* r,
+                                     const EnvOptions& options) {
+  IODebugContext dbg;
+  std::unique_ptr<FSWritableFile> file;
+  Status status;
+  status = file_system_->NewWritableFile(f, FileOptions(options), &file, &dbg);
+  if (status.ok()) {
+    r->reset(new CompositeWritableFileWrapper(file));
+  }
+  return status;
+}
+
+Status CompositeEnv::ReopenWritableFile(const std::string& fname,
+                                        std::unique_ptr<WritableFile>* result,
+                                        const EnvOptions& options) {
+  IODebugContext dbg;
+  Status status;
+  std::unique_ptr<FSWritableFile> file;
+  status = file_system_->ReopenWritableFile(fname, FileOptions(options), &file,
+                                            &dbg);
+  if (status.ok()) {
+    result->reset(new CompositeWritableFileWrapper(file));
+  }
+  return status;
+}
+
+Status CompositeEnv::ReuseWritableFile(const std::string& fname,
+                                       const std::string& old_fname,
+                                       std::unique_ptr<WritableFile>* r,
+                                       const EnvOptions& options) {
+  IODebugContext dbg;
+  Status status;
+  std::unique_ptr<FSWritableFile> file;
+  status = file_system_->ReuseWritableFile(fname, old_fname,
+                                           FileOptions(options), &file, &dbg);
+  if (status.ok()) {
+    r->reset(new CompositeWritableFileWrapper(file));
+  }
+  return status;
+}
+
+Status CompositeEnv::NewRandomRWFile(const std::string& fname,
+                                     std::unique_ptr<RandomRWFile>* result,
+                                     const EnvOptions& options) {
+  IODebugContext dbg;
+  std::unique_ptr<FSRandomRWFile> file;
+  Status status;
+  status =
+      file_system_->NewRandomRWFile(fname, FileOptions(options), &file, &dbg);
+  if (status.ok()) {
+    result->reset(new CompositeRandomRWFileWrapper(file));
+  }
+  return status;
+}
+
+Status CompositeEnv::NewDirectory(const std::string& name,
+                                  std::unique_ptr<Directory>* result) {
+  IOOptions io_opts;
+  IODebugContext dbg;
+  std::unique_ptr<FSDirectory> dir;
+  Status status;
+  status = file_system_->NewDirectory(name, io_opts, &dir, &dbg);
+  if (status.ok()) {
+    result->reset(new CompositeDirectoryWrapper(dir));
+  }
+  return status;
+}
+
+namespace {
+static std::unordered_map<std::string, OptionTypeInfo> env_wrapper_type_info = {
+    {"target",
+     OptionTypeInfo(0, OptionType::kUnknown, OptionVerificationType::kByName,
+                    OptionTypeFlags::kDontSerialize)
+         .SetParseFunc([](const ConfigOptions& opts,
+                          const std::string& /*name*/, const std::string& value,
+                          void* addr) {
+           auto target = static_cast<EnvWrapper::Target*>(addr);
+           return Env::CreateFromString(opts, value, &(target->env),
+                                        &(target->guard));
+         })
+         .SetEqualsFunc([](const ConfigOptions& opts,
+                           const std::string& /*name*/, const void* addr1,
+                           const void* addr2, std::string* mismatch) {
+           const auto target1 = static_cast<const EnvWrapper::Target*>(addr1);
+           const auto target2 = static_cast<const EnvWrapper::Target*>(addr2);
+           if (target1->env != nullptr) {
+             return target1->env->AreEquivalent(opts, target2->env, mismatch);
+           } else {
+             return (target2->env == nullptr);
+           }
+         })
+         .SetPrepareFunc([](const ConfigOptions& opts,
+                            const std::string& /*name*/, void* addr) {
+           auto target = static_cast<EnvWrapper::Target*>(addr);
+           if (target->guard.get() != nullptr) {
+             target->env = target->guard.get();
+           } else if (target->env == nullptr) {
+             target->env = Env::Default();
+           }
+           return target->env->PrepareOptions(opts);
+         })
+         .SetValidateFunc([](const DBOptions& db_opts,
+                             const ColumnFamilyOptions& cf_opts,
+                             const std::string& /*name*/, const void* addr) {
+           const auto target = static_cast<const EnvWrapper::Target*>(addr);
+           if (target->env == nullptr) {
+             return Status::InvalidArgument("Target Env not specified");
+           } else {
+             return target->env->ValidateOptions(db_opts, cf_opts);
+           }
+         })},
+};
+static std::unordered_map<std::string, OptionTypeInfo>
+    composite_fs_wrapper_type_info = {
+        {"file_system",
+         OptionTypeInfo::AsCustomSharedPtr<FileSystem>(
+             0, OptionVerificationType::kByName, OptionTypeFlags::kNone)},
+};
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    composite_clock_wrapper_type_info = {
+        {"clock",
+         OptionTypeInfo::AsCustomSharedPtr<SystemClock>(
+             0, OptionVerificationType::kByName, OptionTypeFlags::kNone)},
+};
+
+}  // namespace
+
+std::unique_ptr<Env> NewCompositeEnv(const std::shared_ptr<FileSystem>& fs) {
+  return std::unique_ptr<Env>(new CompositeEnvWrapper(Env::Default(), fs));
+}
+
+CompositeEnvWrapper::CompositeEnvWrapper(Env* env,
+                                         const std::shared_ptr<FileSystem>& fs,
+                                         const std::shared_ptr<SystemClock>& sc)
+    : CompositeEnv(fs, sc), target_(env) {
+  RegisterOptions("", &target_, &env_wrapper_type_info);
+  RegisterOptions("", &file_system_, &composite_fs_wrapper_type_info);
+  RegisterOptions("", &system_clock_, &composite_clock_wrapper_type_info);
+}
+
+CompositeEnvWrapper::CompositeEnvWrapper(const std::shared_ptr<Env>& env,
+                                         const std::shared_ptr<FileSystem>& fs,
+                                         const std::shared_ptr<SystemClock>& sc)
+    : CompositeEnv(fs, sc), target_(env) {
+  RegisterOptions("", &target_, &env_wrapper_type_info);
+  RegisterOptions("", &file_system_, &composite_fs_wrapper_type_info);
+  RegisterOptions("", &system_clock_, &composite_clock_wrapper_type_info);
+}
+
+Status CompositeEnvWrapper::PrepareOptions(const ConfigOptions& options) {
+  target_.Prepare();
+  if (file_system_ == nullptr) {
+    file_system_ = target_.env->GetFileSystem();
+  }
+  if (system_clock_ == nullptr) {
+    system_clock_ = target_.env->GetSystemClock();
+  }
+  return Env::PrepareOptions(options);
+}
+
+std::string CompositeEnvWrapper::SerializeOptions(
+    const ConfigOptions& config_options, const std::string& header) const {
+  auto options = CompositeEnv::SerializeOptions(config_options, header);
+  if (target_.env != nullptr && target_.env != Env::Default()) {
+    options.append("target=");
+    options.append(target_.env->ToString(config_options));
+  }
+  return options;
+}
+
+EnvWrapper::EnvWrapper(Env* t) : target_(t) {
+  RegisterOptions("", &target_, &env_wrapper_type_info);
+}
+
+EnvWrapper::EnvWrapper(std::unique_ptr<Env>&& t) : target_(std::move(t)) {
+  RegisterOptions("", &target_, &env_wrapper_type_info);
+}
+
+EnvWrapper::EnvWrapper(const std::shared_ptr<Env>& t) : target_(t) {
+  RegisterOptions("", &target_, &env_wrapper_type_info);
+}
+
+EnvWrapper::~EnvWrapper() {}
+
+Status EnvWrapper::PrepareOptions(const ConfigOptions& options) {
+  target_.Prepare();
+  return Env::PrepareOptions(options);
+}
+
+std::string EnvWrapper::SerializeOptions(const ConfigOptions& config_options,
+                                         const std::string& header) const {
+  auto parent = Env::SerializeOptions(config_options, "");
+  if (config_options.IsShallow() || target_.env == nullptr ||
+      target_.env == Env::Default()) {
+    return parent;
+  } else {
+    std::string result = header;
+    if (!StartsWith(parent, OptionTypeInfo::kIdPropName())) {
+      result.append(OptionTypeInfo::kIdPropName()).append("=");
+    }
+    result.append(parent);
+    if (!EndsWith(result, config_options.delimiter)) {
+      result.append(config_options.delimiter);
+    }
+    result.append("target=").append(target_.env->ToString(config_options));
+    return result;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/composite_env_wrapper.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/composite_env_wrapper.h
new file mode 100644
index 0000000000..003c50658b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/composite_env_wrapper.h
@@ -0,0 +1,378 @@
+// Copyright (c) 2019-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/system_clock.h"
+
+#ifdef _WIN32
+// Windows API macro interference
+#undef DeleteFile
+#undef GetCurrentTime
+#undef LoadLibrary
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+class CompositeEnv : public Env {
+ public:
+  // Initialize a CompositeEnvWrapper that delegates all thread/time related
+  // calls to env, and all file operations to fs
+  explicit CompositeEnv(const std::shared_ptr<FileSystem>& fs,
+                        const std::shared_ptr<SystemClock>& clock)
+      : Env(fs, clock) {}
+
+  Status RegisterDbPaths(const std::vector<std::string>& paths) override {
+    return file_system_->RegisterDbPaths(paths);
+  }
+  Status UnregisterDbPaths(const std::vector<std::string>& paths) override {
+    return file_system_->UnregisterDbPaths(paths);
+  }
+
+  // The following text is boilerplate that forwards all methods to target()
+  Status NewSequentialFile(const std::string& f,
+                           std::unique_ptr<SequentialFile>* r,
+                           const EnvOptions& options) override;
+
+  Status NewRandomAccessFile(const std::string& f,
+                             std::unique_ptr<RandomAccessFile>* r,
+                             const EnvOptions& options) override;
+
+  Status NewWritableFile(const std::string& f, std::unique_ptr<WritableFile>* r,
+                         const EnvOptions& options) override;
+
+  Status ReopenWritableFile(const std::string& fname,
+                            std::unique_ptr<WritableFile>* result,
+                            const EnvOptions& options) override;
+
+  Status ReuseWritableFile(const std::string& fname,
+                           const std::string& old_fname,
+                           std::unique_ptr<WritableFile>* r,
+                           const EnvOptions& options) override;
+
+  Status NewRandomRWFile(const std::string& fname,
+                         std::unique_ptr<RandomRWFile>* result,
+                         const EnvOptions& options) override;
+
+  Status NewMemoryMappedFileBuffer(
+      const std::string& fname,
+      std::unique_ptr<MemoryMappedFileBuffer>* result) override {
+    return file_system_->NewMemoryMappedFileBuffer(fname, result);
+  }
+
+  Status NewDirectory(const std::string& name,
+                      std::unique_ptr<Directory>* result) override;
+
+  Status FileExists(const std::string& f) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->FileExists(f, io_opts, &dbg);
+  }
+  Status GetChildren(const std::string& dir,
+                     std::vector<std::string>* r) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->GetChildren(dir, io_opts, r, &dbg);
+  }
+  Status GetChildrenFileAttributes(
+      const std::string& dir, std::vector<FileAttributes>* result) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->GetChildrenFileAttributes(dir, io_opts, result, &dbg);
+  }
+  Status DeleteFile(const std::string& f) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->DeleteFile(f, io_opts, &dbg);
+  }
+  Status Truncate(const std::string& fname, size_t size) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->Truncate(fname, size, io_opts, &dbg);
+  }
+  Status CreateDir(const std::string& d) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->CreateDir(d, io_opts, &dbg);
+  }
+  Status CreateDirIfMissing(const std::string& d) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->CreateDirIfMissing(d, io_opts, &dbg);
+  }
+  Status DeleteDir(const std::string& d) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->DeleteDir(d, io_opts, &dbg);
+  }
+  Status GetFileSize(const std::string& f, uint64_t* s) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->GetFileSize(f, io_opts, s, &dbg);
+  }
+
+  Status GetFileModificationTime(const std::string& fname,
+                                 uint64_t* file_mtime) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->GetFileModificationTime(fname, io_opts, file_mtime,
+                                                 &dbg);
+  }
+
+  Status RenameFile(const std::string& s, const std::string& t) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->RenameFile(s, t, io_opts, &dbg);
+  }
+
+  Status LinkFile(const std::string& s, const std::string& t) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->LinkFile(s, t, io_opts, &dbg);
+  }
+
+  Status NumFileLinks(const std::string& fname, uint64_t* count) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->NumFileLinks(fname, io_opts, count, &dbg);
+  }
+
+  Status AreFilesSame(const std::string& first, const std::string& second,
+                      bool* res) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->AreFilesSame(first, second, io_opts, res, &dbg);
+  }
+
+  Status LockFile(const std::string& f, FileLock** l) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->LockFile(f, io_opts, l, &dbg);
+  }
+
+  Status UnlockFile(FileLock* l) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->UnlockFile(l, io_opts, &dbg);
+  }
+
+  Status GetAbsolutePath(const std::string& db_path,
+                         std::string* output_path) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->GetAbsolutePath(db_path, io_opts, output_path, &dbg);
+  }
+
+  Status NewLogger(const std::string& fname,
+                   std::shared_ptr<Logger>* result) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->NewLogger(fname, io_opts, result, &dbg);
+  }
+
+  Status IsDirectory(const std::string& path, bool* is_dir) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->IsDirectory(path, io_opts, is_dir, &dbg);
+  }
+
+  Status GetTestDirectory(std::string* path) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->GetTestDirectory(io_opts, path, &dbg);
+  }
+
+  EnvOptions OptimizeForLogRead(const EnvOptions& env_options) const override {
+    return file_system_->OptimizeForLogRead(FileOptions(env_options));
+  }
+
+  EnvOptions OptimizeForManifestRead(
+      const EnvOptions& env_options) const override {
+    return file_system_->OptimizeForManifestRead(FileOptions(env_options));
+  }
+
+  EnvOptions OptimizeForLogWrite(const EnvOptions& env_options,
+                                 const DBOptions& db_options) const override {
+    return file_system_->OptimizeForLogWrite(FileOptions(env_options),
+                                             db_options);
+  }
+
+  EnvOptions OptimizeForManifestWrite(
+      const EnvOptions& env_options) const override {
+    return file_system_->OptimizeForManifestWrite(FileOptions(env_options));
+  }
+
+  EnvOptions OptimizeForCompactionTableWrite(
+      const EnvOptions& env_options,
+      const ImmutableDBOptions& immutable_ops) const override {
+    return file_system_->OptimizeForCompactionTableWrite(
+        FileOptions(env_options), immutable_ops);
+  }
+  EnvOptions OptimizeForCompactionTableRead(
+      const EnvOptions& env_options,
+      const ImmutableDBOptions& db_options) const override {
+    return file_system_->OptimizeForCompactionTableRead(
+        FileOptions(env_options), db_options);
+  }
+  EnvOptions OptimizeForBlobFileRead(
+      const EnvOptions& env_options,
+      const ImmutableDBOptions& db_options) const override {
+    return file_system_->OptimizeForBlobFileRead(FileOptions(env_options),
+                                                 db_options);
+  }
+  // This seems to clash with a macro on Windows, so #undef it here
+#ifdef GetFreeSpace
+#undef GetFreeSpace
+#endif
+  Status GetFreeSpace(const std::string& path, uint64_t* diskfree) override {
+    IOOptions io_opts;
+    IODebugContext dbg;
+    return file_system_->GetFreeSpace(path, io_opts, diskfree, &dbg);
+  }
+  uint64_t NowMicros() override { return system_clock_->NowMicros(); }
+  uint64_t NowNanos() override { return system_clock_->NowNanos(); }
+
+  uint64_t NowCPUNanos() override { return system_clock_->CPUNanos(); }
+
+  void SleepForMicroseconds(int micros) override {
+    system_clock_->SleepForMicroseconds(micros);
+  }
+
+  Status GetCurrentTime(int64_t* unix_time) override {
+    return system_clock_->GetCurrentTime(unix_time);
+  }
+  std::string TimeToString(uint64_t time) override {
+    return system_clock_->TimeToString(time);
+  }
+};
+
+class CompositeEnvWrapper : public CompositeEnv {
+ public:
+  // Initialize a CompositeEnvWrapper that delegates all thread/time related
+  // calls to env, and all file operations to fs
+  explicit CompositeEnvWrapper(Env* env)
+      : CompositeEnvWrapper(env, env->GetFileSystem(), env->GetSystemClock()) {}
+  explicit CompositeEnvWrapper(Env* env, const std::shared_ptr<FileSystem>& fs)
+      : CompositeEnvWrapper(env, fs, env->GetSystemClock()) {}
+
+  explicit CompositeEnvWrapper(Env* env, const std::shared_ptr<SystemClock>& sc)
+      : CompositeEnvWrapper(env, env->GetFileSystem(), sc) {}
+
+  explicit CompositeEnvWrapper(Env* env, const std::shared_ptr<FileSystem>& fs,
+                               const std::shared_ptr<SystemClock>& sc);
+
+  explicit CompositeEnvWrapper(const std::shared_ptr<Env>& env,
+                               const std::shared_ptr<FileSystem>& fs)
+      : CompositeEnvWrapper(env, fs, env->GetSystemClock()) {}
+
+  explicit CompositeEnvWrapper(const std::shared_ptr<Env>& env,
+                               const std::shared_ptr<SystemClock>& sc)
+      : CompositeEnvWrapper(env, env->GetFileSystem(), sc) {}
+
+  explicit CompositeEnvWrapper(const std::shared_ptr<Env>& env,
+                               const std::shared_ptr<FileSystem>& fs,
+                               const std::shared_ptr<SystemClock>& sc);
+
+  static const char* kClassName() { return "CompositeEnv"; }
+  const char* Name() const override { return kClassName(); }
+  bool IsInstanceOf(const std::string& name) const override {
+    if (name == kClassName()) {
+      return true;
+    } else {
+      return CompositeEnv::IsInstanceOf(name);
+    }
+  }
+  const Customizable* Inner() const override { return target_.env; }
+
+  Status PrepareOptions(const ConfigOptions& options) override;
+  std::string SerializeOptions(const ConfigOptions& config_options,
+                               const std::string& header) const override;
+
+  // Return the target to which this Env forwards all calls
+  Env* env_target() const { return target_.env; }
+
+#if !defined(OS_WIN) && !defined(ROCKSDB_NO_DYNAMIC_EXTENSION)
+  Status LoadLibrary(const std::string& lib_name,
+                     const std::string& search_path,
+                     std::shared_ptr<DynamicLibrary>* result) override {
+    return target_.env->LoadLibrary(lib_name, search_path, result);
+  }
+#endif
+
+  void Schedule(void (*f)(void* arg), void* a, Priority pri,
+                void* tag = nullptr, void (*u)(void* arg) = nullptr) override {
+    return target_.env->Schedule(f, a, pri, tag, u);
+  }
+
+  int UnSchedule(void* tag, Priority pri) override {
+    return target_.env->UnSchedule(tag, pri);
+  }
+
+  void StartThread(void (*f)(void*), void* a) override {
+    return target_.env->StartThread(f, a);
+  }
+  void WaitForJoin() override { return target_.env->WaitForJoin(); }
+  unsigned int GetThreadPoolQueueLen(Priority pri = LOW) const override {
+    return target_.env->GetThreadPoolQueueLen(pri);
+  }
+
+  int ReserveThreads(int threads_to_be_reserved, Priority pri) override {
+    return target_.env->ReserveThreads(threads_to_be_reserved, pri);
+  }
+
+  int ReleaseThreads(int threads_to_be_released, Priority pri) override {
+    return target_.env->ReleaseThreads(threads_to_be_released, pri);
+  }
+
+  Status GetHostName(char* name, uint64_t len) override {
+    return target_.env->GetHostName(name, len);
+  }
+  void SetBackgroundThreads(int num, Priority pri) override {
+    return target_.env->SetBackgroundThreads(num, pri);
+  }
+  int GetBackgroundThreads(Priority pri) override {
+    return target_.env->GetBackgroundThreads(pri);
+  }
+
+  Status SetAllowNonOwnerAccess(bool allow_non_owner_access) override {
+    return target_.env->SetAllowNonOwnerAccess(allow_non_owner_access);
+  }
+
+  void IncBackgroundThreadsIfNeeded(int num, Priority pri) override {
+    return target_.env->IncBackgroundThreadsIfNeeded(num, pri);
+  }
+
+  void LowerThreadPoolIOPriority(Priority pool) override {
+    target_.env->LowerThreadPoolIOPriority(pool);
+  }
+
+  void LowerThreadPoolCPUPriority(Priority pool) override {
+    target_.env->LowerThreadPoolCPUPriority(pool);
+  }
+
+  Status LowerThreadPoolCPUPriority(Priority pool, CpuPriority pri) override {
+    return target_.env->LowerThreadPoolCPUPriority(pool, pri);
+  }
+
+  Status GetThreadList(std::vector<ThreadStatus>* thread_list) override {
+    return target_.env->GetThreadList(thread_list);
+  }
+
+  ThreadStatusUpdater* GetThreadStatusUpdater() const override {
+    return target_.env->GetThreadStatusUpdater();
+  }
+
+  uint64_t GetThreadID() const override { return target_.env->GetThreadID(); }
+
+  std::string GenerateUniqueId() override {
+    return target_.env->GenerateUniqueId();
+  }
+
+ private:
+  EnvWrapper::Target target_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/emulated_clock.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/emulated_clock.h
new file mode 100644
index 0000000000..6227376350
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/emulated_clock.h
@@ -0,0 +1,114 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <atomic>
+#include <string>
+
+#include "rocksdb/status.h"
+#include "rocksdb/system_clock.h"
+
+namespace ROCKSDB_NAMESPACE {
+// A SystemClock that can "mock" sleep and counts its operations.
+class EmulatedSystemClock : public SystemClockWrapper {
+ private:
+  // Something to return when mocking current time
+  const int64_t maybe_starting_time_;
+  std::atomic<int> sleep_counter_{0};
+  std::atomic<int> cpu_counter_{0};
+  std::atomic<int64_t> addon_microseconds_{0};
+  // Do not modify in the env of a running DB (could cause deadlock)
+  std::atomic<bool> time_elapse_only_sleep_;
+  bool no_slowdown_;
+
+ public:
+  explicit EmulatedSystemClock(const std::shared_ptr<SystemClock>& base,
+                               bool time_elapse_only_sleep = false);
+
+  static const char* kClassName() { return "TimeEmulatedSystemClock"; }
+  const char* Name() const override { return kClassName(); }
+
+  virtual void SleepForMicroseconds(int micros) override {
+    sleep_counter_++;
+    if (no_slowdown_ || time_elapse_only_sleep_) {
+      addon_microseconds_.fetch_add(micros);
+    }
+    if (!no_slowdown_) {
+      SystemClockWrapper::SleepForMicroseconds(micros);
+    }
+  }
+
+  void MockSleepForMicroseconds(int64_t micros) {
+    sleep_counter_++;
+    assert(no_slowdown_);
+    addon_microseconds_.fetch_add(micros);
+  }
+
+  void MockSleepForSeconds(int64_t seconds) {
+    sleep_counter_++;
+    assert(no_slowdown_);
+    addon_microseconds_.fetch_add(seconds * 1000000);
+  }
+
+  void SetTimeElapseOnlySleep(bool enabled) {
+    // We cannot set these before destroying the last DB because they might
+    // cause a deadlock or similar without the appropriate options set in
+    // the DB.
+    time_elapse_only_sleep_ = enabled;
+    no_slowdown_ = enabled;
+  }
+
+  bool IsTimeElapseOnlySleep() const { return time_elapse_only_sleep_.load(); }
+  void SetMockSleep(bool enabled = true) { no_slowdown_ = enabled; }
+  bool IsMockSleepEnabled() const { return no_slowdown_; }
+
+  int GetSleepCounter() const { return sleep_counter_.load(); }
+
+  virtual Status GetCurrentTime(int64_t* unix_time) override {
+    Status s;
+    if (time_elapse_only_sleep_) {
+      *unix_time = maybe_starting_time_;
+    } else {
+      s = SystemClockWrapper::GetCurrentTime(unix_time);
+    }
+    if (s.ok()) {
+      // mock microseconds elapsed to seconds of time
+      *unix_time += addon_microseconds_.load() / 1000000;
+    }
+    return s;
+  }
+
+  virtual uint64_t CPUNanos() override {
+    cpu_counter_++;
+    return SystemClockWrapper::CPUNanos();
+  }
+
+  virtual uint64_t CPUMicros() override {
+    cpu_counter_++;
+    return SystemClockWrapper::CPUMicros();
+  }
+
+  virtual uint64_t NowNanos() override {
+    return (time_elapse_only_sleep_ ? 0 : SystemClockWrapper::NowNanos()) +
+           addon_microseconds_.load() * 1000;
+  }
+
+  virtual uint64_t NowMicros() override {
+    return (time_elapse_only_sleep_ ? 0 : SystemClockWrapper::NowMicros()) +
+           addon_microseconds_.load();
+  }
+
+  int GetCpuCounter() const { return cpu_counter_.load(); }
+
+  void ResetCounters() {
+    cpu_counter_.store(0);
+    sleep_counter_.store(0);
+  }
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env.cc
new file mode 100644
index 0000000000..2137738c7b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env.cc
@@ -0,0 +1,1233 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/env.h"
+
+#include <thread>
+
+#include "env/composite_env_wrapper.h"
+#include "env/emulated_clock.h"
+#include "env/mock_env.h"
+#include "env/unique_id_gen.h"
+#include "logging/env_logger.h"
+#include "memory/arena.h"
+#include "options/db_options.h"
+#include "port/port.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/options.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+static int RegisterBuiltinEnvs(ObjectLibrary& library,
+                               const std::string& /*arg*/) {
+  library.AddFactory<Env>(MockEnv::kClassName(), [](const std::string& /*uri*/,
+                                                    std::unique_ptr<Env>* guard,
+                                                    std::string* /* errmsg */) {
+    guard->reset(MockEnv::Create(Env::Default()));
+    return guard->get();
+  });
+  library.AddFactory<Env>(
+      CompositeEnvWrapper::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<Env>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new CompositeEnvWrapper(Env::Default()));
+        return guard->get();
+      });
+  size_t num_types;
+  return static_cast<int>(library.GetFactoryCount(&num_types));
+}
+
+static void RegisterSystemEnvs() {
+  static std::once_flag loaded;
+  std::call_once(loaded, [&]() {
+    RegisterBuiltinEnvs(*(ObjectLibrary::Default().get()), "");
+  });
+}
+
+class LegacySystemClock : public SystemClock {
+ private:
+  Env* env_;
+
+ public:
+  explicit LegacySystemClock(Env* env) : env_(env) {}
+  const char* Name() const override { return "LegacySystemClock"; }
+
+  // Returns the number of micro-seconds since some fixed point in time.
+  // It is often used as system time such as in GenericRateLimiter
+  // and other places so a port needs to return system time in order to work.
+  uint64_t NowMicros() override { return env_->NowMicros(); }
+
+  // Returns the number of nano-seconds since some fixed point in time. Only
+  // useful for computing deltas of time in one run.
+  // Default implementation simply relies on NowMicros.
+  // In platform-specific implementations, NowNanos() should return time points
+  // that are MONOTONIC.
+  uint64_t NowNanos() override { return env_->NowNanos(); }
+
+  uint64_t CPUMicros() override { return CPUNanos() / 1000; }
+  uint64_t CPUNanos() override { return env_->NowCPUNanos(); }
+
+  // Sleep/delay the thread for the prescribed number of micro-seconds.
+  void SleepForMicroseconds(int micros) override {
+    env_->SleepForMicroseconds(micros);
+  }
+
+  // Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC).
+  // Only overwrites *unix_time on success.
+  Status GetCurrentTime(int64_t* unix_time) override {
+    return env_->GetCurrentTime(unix_time);
+  }
+  // Converts seconds-since-Jan-01-1970 to a printable string
+  std::string TimeToString(uint64_t time) override {
+    return env_->TimeToString(time);
+  }
+
+  std::string SerializeOptions(const ConfigOptions& /*config_options*/,
+                               const std::string& /*prefix*/) const override {
+    // We do not want the LegacySystemClock to appear in the serialized output.
+    // This clock is an internal class for those who do not implement one and
+    // would be part of the Env.  As such, do not serialize it here.
+    return "";
+  }
+};
+
+class LegacySequentialFileWrapper : public FSSequentialFile {
+ public:
+  explicit LegacySequentialFileWrapper(
+      std::unique_ptr<SequentialFile>&& _target)
+      : target_(std::move(_target)) {}
+
+  IOStatus Read(size_t n, const IOOptions& /*options*/, Slice* result,
+                char* scratch, IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Read(n, result, scratch));
+  }
+  IOStatus Skip(uint64_t n) override {
+    return status_to_io_status(target_->Skip(n));
+  }
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  IOStatus InvalidateCache(size_t offset, size_t length) override {
+    return status_to_io_status(target_->InvalidateCache(offset, length));
+  }
+  IOStatus PositionedRead(uint64_t offset, size_t n,
+                          const IOOptions& /*options*/, Slice* result,
+                          char* scratch, IODebugContext* /*dbg*/) override {
+    return status_to_io_status(
+        target_->PositionedRead(offset, n, result, scratch));
+  }
+
+ private:
+  std::unique_ptr<SequentialFile> target_;
+};
+
+class LegacyRandomAccessFileWrapper : public FSRandomAccessFile {
+ public:
+  explicit LegacyRandomAccessFileWrapper(
+      std::unique_ptr<RandomAccessFile>&& target)
+      : target_(std::move(target)) {}
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& /*options*/,
+                Slice* result, char* scratch,
+                IODebugContext* /*dbg*/) const override {
+    return status_to_io_status(target_->Read(offset, n, result, scratch));
+  }
+
+  IOStatus MultiRead(FSReadRequest* fs_reqs, size_t num_reqs,
+                     const IOOptions& /*options*/,
+                     IODebugContext* /*dbg*/) override {
+    std::vector<ReadRequest> reqs;
+    Status status;
+
+    reqs.reserve(num_reqs);
+    for (size_t i = 0; i < num_reqs; ++i) {
+      ReadRequest req;
+
+      req.offset = fs_reqs[i].offset;
+      req.len = fs_reqs[i].len;
+      req.scratch = fs_reqs[i].scratch;
+      req.status = Status::OK();
+
+      reqs.emplace_back(req);
+    }
+    status = target_->MultiRead(reqs.data(), num_reqs);
+    for (size_t i = 0; i < num_reqs; ++i) {
+      fs_reqs[i].result = reqs[i].result;
+      fs_reqs[i].status = status_to_io_status(std::move(reqs[i].status));
+    }
+    return status_to_io_status(std::move(status));
+  }
+
+  IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Prefetch(offset, n));
+  }
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    return target_->GetUniqueId(id, max_size);
+  }
+  void Hint(AccessPattern pattern) override {
+    target_->Hint((RandomAccessFile::AccessPattern)pattern);
+  }
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  IOStatus InvalidateCache(size_t offset, size_t length) override {
+    return status_to_io_status(target_->InvalidateCache(offset, length));
+  }
+
+ private:
+  std::unique_ptr<RandomAccessFile> target_;
+};
+
+class LegacyRandomRWFileWrapper : public FSRandomRWFile {
+ public:
+  explicit LegacyRandomRWFileWrapper(std::unique_ptr<RandomRWFile>&& target)
+      : target_(std::move(target)) {}
+
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  IOStatus Write(uint64_t offset, const Slice& data,
+                 const IOOptions& /*options*/,
+                 IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Write(offset, data));
+  }
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& /*options*/,
+                Slice* result, char* scratch,
+                IODebugContext* /*dbg*/) const override {
+    return status_to_io_status(target_->Read(offset, n, result, scratch));
+  }
+  IOStatus Flush(const IOOptions& /*options*/,
+                 IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Flush());
+  }
+  IOStatus Sync(const IOOptions& /*options*/,
+                IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Sync());
+  }
+  IOStatus Fsync(const IOOptions& /*options*/,
+                 IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Fsync());
+  }
+  IOStatus Close(const IOOptions& /*options*/,
+                 IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Close());
+  }
+
+ private:
+  std::unique_ptr<RandomRWFile> target_;
+};
+
+class LegacyWritableFileWrapper : public FSWritableFile {
+ public:
+  explicit LegacyWritableFileWrapper(std::unique_ptr<WritableFile>&& _target)
+      : target_(std::move(_target)) {}
+
+  IOStatus Append(const Slice& data, const IOOptions& /*options*/,
+                  IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Append(data));
+  }
+  IOStatus Append(const Slice& data, const IOOptions& /*options*/,
+                  const DataVerificationInfo& /*verification_info*/,
+                  IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Append(data));
+  }
+  IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                            const IOOptions& /*options*/,
+                            IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->PositionedAppend(data, offset));
+  }
+  IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                            const IOOptions& /*options*/,
+                            const DataVerificationInfo& /*verification_info*/,
+                            IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->PositionedAppend(data, offset));
+  }
+  IOStatus Truncate(uint64_t size, const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Truncate(size));
+  }
+  IOStatus Close(const IOOptions& /*options*/,
+                 IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Close());
+  }
+  IOStatus Flush(const IOOptions& /*options*/,
+                 IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Flush());
+  }
+  IOStatus Sync(const IOOptions& /*options*/,
+                IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Sync());
+  }
+  IOStatus Fsync(const IOOptions& /*options*/,
+                 IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Fsync());
+  }
+  bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); }
+
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+
+  void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override {
+    target_->SetWriteLifeTimeHint(hint);
+  }
+
+  Env::WriteLifeTimeHint GetWriteLifeTimeHint() override {
+    return target_->GetWriteLifeTimeHint();
+  }
+
+  uint64_t GetFileSize(const IOOptions& /*options*/,
+                       IODebugContext* /*dbg*/) override {
+    return target_->GetFileSize();
+  }
+
+  void SetPreallocationBlockSize(size_t size) override {
+    target_->SetPreallocationBlockSize(size);
+  }
+
+  void GetPreallocationStatus(size_t* block_size,
+                              size_t* last_allocated_block) override {
+    target_->GetPreallocationStatus(block_size, last_allocated_block);
+  }
+
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    return target_->GetUniqueId(id, max_size);
+  }
+
+  IOStatus InvalidateCache(size_t offset, size_t length) override {
+    return status_to_io_status(target_->InvalidateCache(offset, length));
+  }
+
+  IOStatus RangeSync(uint64_t offset, uint64_t nbytes,
+                     const IOOptions& /*options*/,
+                     IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->RangeSync(offset, nbytes));
+  }
+
+  void PrepareWrite(size_t offset, size_t len, const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override {
+    target_->PrepareWrite(offset, len);
+  }
+
+  IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Allocate(offset, len));
+  }
+
+ private:
+  std::unique_ptr<WritableFile> target_;
+};
+
+class LegacyDirectoryWrapper : public FSDirectory {
+ public:
+  explicit LegacyDirectoryWrapper(std::unique_ptr<Directory>&& target)
+      : target_(std::move(target)) {}
+
+  IOStatus Fsync(const IOOptions& /*options*/,
+                 IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Fsync());
+  }
+  IOStatus Close(const IOOptions& /*options*/,
+                 IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Close());
+  }
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    return target_->GetUniqueId(id, max_size);
+  }
+
+ private:
+  std::unique_ptr<Directory> target_;
+};
+
+class LegacyFileSystemWrapper : public FileSystem {
+ public:
+  // Initialize an EnvWrapper that delegates all calls to *t
+  explicit LegacyFileSystemWrapper(Env* t) : target_(t) {}
+  ~LegacyFileSystemWrapper() override {}
+
+  static const char* kClassName() { return "LegacyFileSystem"; }
+  const char* Name() const override { return kClassName(); }
+
+  // Return the target to which this Env forwards all calls
+  Env* target() const { return target_; }
+
+  // The following text is boilerplate that forwards all methods to target()
+  IOStatus NewSequentialFile(const std::string& f, const FileOptions& file_opts,
+                             std::unique_ptr<FSSequentialFile>* r,
+                             IODebugContext* /*dbg*/) override {
+    std::unique_ptr<SequentialFile> file;
+    Status s = target_->NewSequentialFile(f, &file, file_opts);
+    if (s.ok()) {
+      r->reset(new LegacySequentialFileWrapper(std::move(file)));
+    }
+    return status_to_io_status(std::move(s));
+  }
+  IOStatus NewRandomAccessFile(const std::string& f,
+                               const FileOptions& file_opts,
+                               std::unique_ptr<FSRandomAccessFile>* r,
+                               IODebugContext* /*dbg*/) override {
+    std::unique_ptr<RandomAccessFile> file;
+    Status s = target_->NewRandomAccessFile(f, &file, file_opts);
+    if (s.ok()) {
+      r->reset(new LegacyRandomAccessFileWrapper(std::move(file)));
+    }
+    return status_to_io_status(std::move(s));
+  }
+  IOStatus NewWritableFile(const std::string& f, const FileOptions& file_opts,
+                           std::unique_ptr<FSWritableFile>* r,
+                           IODebugContext* /*dbg*/) override {
+    std::unique_ptr<WritableFile> file;
+    Status s = target_->NewWritableFile(f, &file, file_opts);
+    if (s.ok()) {
+      r->reset(new LegacyWritableFileWrapper(std::move(file)));
+    }
+    return status_to_io_status(std::move(s));
+  }
+  IOStatus ReopenWritableFile(const std::string& fname,
+                              const FileOptions& file_opts,
+                              std::unique_ptr<FSWritableFile>* result,
+                              IODebugContext* /*dbg*/) override {
+    std::unique_ptr<WritableFile> file;
+    Status s = target_->ReopenWritableFile(fname, &file, file_opts);
+    if (s.ok()) {
+      result->reset(new LegacyWritableFileWrapper(std::move(file)));
+    }
+    return status_to_io_status(std::move(s));
+  }
+  IOStatus ReuseWritableFile(const std::string& fname,
+                             const std::string& old_fname,
+                             const FileOptions& file_opts,
+                             std::unique_ptr<FSWritableFile>* r,
+                             IODebugContext* /*dbg*/) override {
+    std::unique_ptr<WritableFile> file;
+    Status s = target_->ReuseWritableFile(fname, old_fname, &file, file_opts);
+    if (s.ok()) {
+      r->reset(new LegacyWritableFileWrapper(std::move(file)));
+    }
+    return status_to_io_status(std::move(s));
+  }
+  IOStatus NewRandomRWFile(const std::string& fname,
+                           const FileOptions& file_opts,
+                           std::unique_ptr<FSRandomRWFile>* result,
+                           IODebugContext* /*dbg*/) override {
+    std::unique_ptr<RandomRWFile> file;
+    Status s = target_->NewRandomRWFile(fname, &file, file_opts);
+    if (s.ok()) {
+      result->reset(new LegacyRandomRWFileWrapper(std::move(file)));
+    }
+    return status_to_io_status(std::move(s));
+  }
+  IOStatus NewMemoryMappedFileBuffer(
+      const std::string& fname,
+      std::unique_ptr<MemoryMappedFileBuffer>* result) override {
+    return status_to_io_status(
+        target_->NewMemoryMappedFileBuffer(fname, result));
+  }
+  IOStatus NewDirectory(const std::string& name, const IOOptions& /*io_opts*/,
+                        std::unique_ptr<FSDirectory>* result,
+                        IODebugContext* /*dbg*/) override {
+    std::unique_ptr<Directory> dir;
+    Status s = target_->NewDirectory(name, &dir);
+    if (s.ok()) {
+      result->reset(new LegacyDirectoryWrapper(std::move(dir)));
+    }
+    return status_to_io_status(std::move(s));
+  }
+  IOStatus FileExists(const std::string& f, const IOOptions& /*io_opts*/,
+                      IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->FileExists(f));
+  }
+  IOStatus GetChildren(const std::string& dir, const IOOptions& /*io_opts*/,
+                       std::vector<std::string>* r,
+                       IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->GetChildren(dir, r));
+  }
+  IOStatus GetChildrenFileAttributes(const std::string& dir,
+                                     const IOOptions& /*options*/,
+                                     std::vector<FileAttributes>* result,
+                                     IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->GetChildrenFileAttributes(dir, result));
+  }
+  IOStatus DeleteFile(const std::string& f, const IOOptions& /*options*/,
+                      IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->DeleteFile(f));
+  }
+  IOStatus Truncate(const std::string& fname, size_t size,
+                    const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->Truncate(fname, size));
+  }
+  IOStatus CreateDir(const std::string& d, const IOOptions& /*options*/,
+                     IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->CreateDir(d));
+  }
+  IOStatus CreateDirIfMissing(const std::string& d,
+                              const IOOptions& /*options*/,
+                              IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->CreateDirIfMissing(d));
+  }
+  IOStatus DeleteDir(const std::string& d, const IOOptions& /*options*/,
+                     IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->DeleteDir(d));
+  }
+  IOStatus GetFileSize(const std::string& f, const IOOptions& /*options*/,
+                       uint64_t* s, IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->GetFileSize(f, s));
+  }
+
+  IOStatus GetFileModificationTime(const std::string& fname,
+                                   const IOOptions& /*options*/,
+                                   uint64_t* file_mtime,
+                                   IODebugContext* /*dbg*/) override {
+    return status_to_io_status(
+        target_->GetFileModificationTime(fname, file_mtime));
+  }
+
+  IOStatus GetAbsolutePath(const std::string& db_path,
+                           const IOOptions& /*options*/,
+                           std::string* output_path,
+                           IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->GetAbsolutePath(db_path, output_path));
+  }
+
+  IOStatus RenameFile(const std::string& s, const std::string& t,
+                      const IOOptions& /*options*/,
+                      IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->RenameFile(s, t));
+  }
+
+  IOStatus LinkFile(const std::string& s, const std::string& t,
+                    const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->LinkFile(s, t));
+  }
+
+  IOStatus NumFileLinks(const std::string& fname, const IOOptions& /*options*/,
+                        uint64_t* count, IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->NumFileLinks(fname, count));
+  }
+
+  IOStatus AreFilesSame(const std::string& first, const std::string& second,
+                        const IOOptions& /*options*/, bool* res,
+                        IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->AreFilesSame(first, second, res));
+  }
+
+  IOStatus LockFile(const std::string& f, const IOOptions& /*options*/,
+                    FileLock** l, IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->LockFile(f, l));
+  }
+
+  IOStatus UnlockFile(FileLock* l, const IOOptions& /*options*/,
+                      IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->UnlockFile(l));
+  }
+
+  IOStatus GetTestDirectory(const IOOptions& /*options*/, std::string* path,
+                            IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->GetTestDirectory(path));
+  }
+  IOStatus NewLogger(const std::string& fname, const IOOptions& /*options*/,
+                     std::shared_ptr<Logger>* result,
+                     IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->NewLogger(fname, result));
+  }
+
+  void SanitizeFileOptions(FileOptions* opts) const override {
+    target_->SanitizeEnvOptions(opts);
+  }
+
+  FileOptions OptimizeForLogRead(
+      const FileOptions& file_options) const override {
+    return target_->OptimizeForLogRead(file_options);
+  }
+  FileOptions OptimizeForManifestRead(
+      const FileOptions& file_options) const override {
+    return target_->OptimizeForManifestRead(file_options);
+  }
+  FileOptions OptimizeForLogWrite(const FileOptions& file_options,
+                                  const DBOptions& db_options) const override {
+    return target_->OptimizeForLogWrite(file_options, db_options);
+  }
+  FileOptions OptimizeForManifestWrite(
+      const FileOptions& file_options) const override {
+    return target_->OptimizeForManifestWrite(file_options);
+  }
+  FileOptions OptimizeForCompactionTableWrite(
+      const FileOptions& file_options,
+      const ImmutableDBOptions& immutable_ops) const override {
+    return target_->OptimizeForCompactionTableWrite(file_options,
+                                                    immutable_ops);
+  }
+  FileOptions OptimizeForCompactionTableRead(
+      const FileOptions& file_options,
+      const ImmutableDBOptions& db_options) const override {
+    return target_->OptimizeForCompactionTableRead(file_options, db_options);
+  }
+  FileOptions OptimizeForBlobFileRead(
+      const FileOptions& file_options,
+      const ImmutableDBOptions& db_options) const override {
+    return target_->OptimizeForBlobFileRead(file_options, db_options);
+  }
+
+#ifdef GetFreeSpace
+#undef GetFreeSpace
+#endif
+  IOStatus GetFreeSpace(const std::string& path, const IOOptions& /*options*/,
+                        uint64_t* diskfree, IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->GetFreeSpace(path, diskfree));
+  }
+  IOStatus IsDirectory(const std::string& path, const IOOptions& /*options*/,
+                       bool* is_dir, IODebugContext* /*dbg*/) override {
+    return status_to_io_status(target_->IsDirectory(path, is_dir));
+  }
+
+  std::string SerializeOptions(const ConfigOptions& /*config_options*/,
+                               const std::string& /*prefix*/) const override {
+    // We do not want the LegacyFileSystem to appear in the serialized output.
+    // This clock is an internal class for those who do not implement one and
+    // would be part of the Env.  As such, do not serialize it here.
+    return "";
+  }
+ private:
+  Env* target_;
+};
+}  // end anonymous namespace
+
+Env::Env() : thread_status_updater_(nullptr) {
+  file_system_ = std::make_shared<LegacyFileSystemWrapper>(this);
+  system_clock_ = std::make_shared<LegacySystemClock>(this);
+}
+
+Env::Env(const std::shared_ptr<FileSystem>& fs)
+    : thread_status_updater_(nullptr), file_system_(fs) {
+  system_clock_ = std::make_shared<LegacySystemClock>(this);
+}
+
+Env::Env(const std::shared_ptr<FileSystem>& fs,
+         const std::shared_ptr<SystemClock>& clock)
+    : thread_status_updater_(nullptr), file_system_(fs), system_clock_(clock) {}
+
+Env::~Env() {}
+
+Status Env::NewLogger(const std::string& fname,
+                      std::shared_ptr<Logger>* result) {
+  return NewEnvLogger(fname, this, result);
+}
+
+Status Env::CreateFromString(const ConfigOptions& config_options,
+                             const std::string& value, Env** result) {
+  Env* base = Env::Default();
+  if (value.empty() || base->IsInstanceOf(value)) {
+    *result = base;
+    return Status::OK();
+  } else {
+    RegisterSystemEnvs();
+    Env* env = *result;
+    Status s = LoadStaticObject<Env>(config_options, value, &env);
+    if (s.ok()) {
+      *result = env;
+    }
+    return s;
+  }
+}
+
+Status Env::CreateFromString(const ConfigOptions& config_options,
+                             const std::string& value, Env** result,
+                             std::shared_ptr<Env>* guard) {
+  assert(result);
+  assert(guard != nullptr);
+  std::unique_ptr<Env> uniq;
+
+  Env* env = *result;
+  std::string id;
+  std::unordered_map<std::string, std::string> opt_map;
+
+  Status status =
+      Customizable::GetOptionsMap(config_options, env, value, &id, &opt_map);
+  if (!status.ok()) {  // GetOptionsMap failed
+    return status;
+  }
+  Env* base = Env::Default();
+  if (id.empty() || base->IsInstanceOf(id)) {
+    env = base;
+    status = Status::OK();
+  } else {
+    RegisterSystemEnvs();
+    // First, try to load the Env as a unique object.
+    status = config_options.registry->NewObject<Env>(id, &env, &uniq);
+  }
+  if (config_options.ignore_unsupported_options && status.IsNotSupported()) {
+    status = Status::OK();
+  } else if (status.ok()) {
+    status = Customizable::ConfigureNewObject(config_options, env, opt_map);
+  }
+  if (status.ok()) {
+    guard->reset(uniq.release());
+    *result = env;
+  }
+  return status;
+}
+
+Status Env::CreateFromUri(const ConfigOptions& config_options,
+                          const std::string& env_uri, const std::string& fs_uri,
+                          Env** result, std::shared_ptr<Env>* guard) {
+  *result = config_options.env;
+  if (env_uri.empty() && fs_uri.empty()) {
+    // Neither specified.  Use the default
+    guard->reset();
+    return Status::OK();
+  } else if (!env_uri.empty() && !fs_uri.empty()) {
+    // Both specified.  Cannot choose.  Return Invalid
+    return Status::InvalidArgument("cannot specify both fs_uri and env_uri");
+  } else if (fs_uri.empty()) {  // Only have an ENV URI.  Create an Env from it
+    return CreateFromString(config_options, env_uri, result, guard);
+  } else {
+    std::shared_ptr<FileSystem> fs;
+    Status s = FileSystem::CreateFromString(config_options, fs_uri, &fs);
+    if (s.ok()) {
+      guard->reset(new CompositeEnvWrapper(*result, fs));
+      *result = guard->get();
+    }
+    return s;
+  }
+}
+
+std::string Env::PriorityToString(Env::Priority priority) {
+  switch (priority) {
+    case Env::Priority::BOTTOM:
+      return "Bottom";
+    case Env::Priority::LOW:
+      return "Low";
+    case Env::Priority::HIGH:
+      return "High";
+    case Env::Priority::USER:
+      return "User";
+    case Env::Priority::TOTAL:
+      assert(false);
+  }
+  return "Invalid";
+}
+
+uint64_t Env::GetThreadID() const {
+  std::hash<std::thread::id> hasher;
+  return hasher(std::this_thread::get_id());
+}
+
+Status Env::ReuseWritableFile(const std::string& fname,
+                              const std::string& old_fname,
+                              std::unique_ptr<WritableFile>* result,
+                              const EnvOptions& options) {
+  Status s = RenameFile(old_fname, fname);
+  if (!s.ok()) {
+    return s;
+  }
+  return NewWritableFile(fname, result, options);
+}
+
+Status Env::GetChildrenFileAttributes(const std::string& dir,
+                                      std::vector<FileAttributes>* result) {
+  assert(result != nullptr);
+  std::vector<std::string> child_fnames;
+  Status s = GetChildren(dir, &child_fnames);
+  if (!s.ok()) {
+    return s;
+  }
+  result->resize(child_fnames.size());
+  size_t result_size = 0;
+  for (size_t i = 0; i < child_fnames.size(); ++i) {
+    const std::string path = dir + "/" + child_fnames[i];
+    if (!(s = GetFileSize(path, &(*result)[result_size].size_bytes)).ok()) {
+      if (FileExists(path).IsNotFound()) {
+        // The file may have been deleted since we listed the directory
+        continue;
+      }
+      return s;
+    }
+    (*result)[result_size].name = std::move(child_fnames[i]);
+    result_size++;
+  }
+  result->resize(result_size);
+  return Status::OK();
+}
+
+Status Env::GetHostNameString(std::string* result) {
+  std::array<char, kMaxHostNameLen> hostname_buf{};
+  Status s = GetHostName(hostname_buf.data(), hostname_buf.size());
+  if (s.ok()) {
+    hostname_buf[hostname_buf.size() - 1] = '\0';
+    result->assign(hostname_buf.data());
+  }
+  return s;
+}
+
+std::string Env::GenerateUniqueId() {
+  std::string result;
+  bool success = port::GenerateRfcUuid(&result);
+  if (!success) {
+    // Fall back on our own way of generating a unique ID and adapt it to
+    // RFC 4122 variant 1 version 4 (a random ID).
+    // https://en.wikipedia.org/wiki/Universally_unique_identifier
+    // We already tried GenerateRfcUuid so no need to try it again in
+    // GenerateRawUniqueId
+    constexpr bool exclude_port_uuid = true;
+    uint64_t upper, lower;
+    GenerateRawUniqueId(&upper, &lower, exclude_port_uuid);
+
+    // Set 4-bit version to 4
+    upper = (upper & (~uint64_t{0xf000})) | 0x4000;
+    // Set unary-encoded variant to 1 (0b10)
+    lower = (lower & (~(uint64_t{3} << 62))) | (uint64_t{2} << 62);
+
+    // Use 36 character format of RFC 4122
+    result.resize(36U);
+    char* buf = &result[0];
+    PutBaseChars<16>(&buf, 8, upper >> 32, /*!uppercase*/ false);
+    *(buf++) = '-';
+    PutBaseChars<16>(&buf, 4, upper >> 16, /*!uppercase*/ false);
+    *(buf++) = '-';
+    PutBaseChars<16>(&buf, 4, upper, /*!uppercase*/ false);
+    *(buf++) = '-';
+    PutBaseChars<16>(&buf, 4, lower >> 48, /*!uppercase*/ false);
+    *(buf++) = '-';
+    PutBaseChars<16>(&buf, 12, lower, /*!uppercase*/ false);
+    assert(buf == &result[36]);
+
+    // Verify variant 1 version 4
+    assert(result[14] == '4');
+    assert(result[19] == '8' || result[19] == '9' || result[19] == 'a' ||
+           result[19] == 'b');
+  }
+  return result;
+}
+
+SequentialFile::~SequentialFile() {}
+
+RandomAccessFile::~RandomAccessFile() {}
+
+WritableFile::~WritableFile() {}
+
+MemoryMappedFileBuffer::~MemoryMappedFileBuffer() {}
+
+Logger::~Logger() {}
+
+Status Logger::Close() {
+  if (!closed_) {
+    closed_ = true;
+    return CloseImpl();
+  } else {
+    return Status::OK();
+  }
+}
+
+Status Logger::CloseImpl() { return Status::NotSupported(); }
+
+FileLock::~FileLock() {}
+
+void LogFlush(Logger* info_log) {
+  if (info_log) {
+    info_log->Flush();
+  }
+}
+
+static void Logv(Logger* info_log, const char* format, va_list ap) {
+  if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::INFO_LEVEL) {
+    info_log->Logv(InfoLogLevel::INFO_LEVEL, format, ap);
+  }
+}
+
+void Log(Logger* info_log, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Logv(info_log, format, ap);
+  va_end(ap);
+}
+
+void Logger::Logv(const InfoLogLevel log_level, const char* format,
+                  va_list ap) {
+  static const char* kInfoLogLevelNames[5] = {"DEBUG", "INFO", "WARN", "ERROR",
+                                              "FATAL"};
+  if (log_level < log_level_) {
+    return;
+  }
+
+  if (log_level == InfoLogLevel::INFO_LEVEL) {
+    // Doesn't print log level if it is INFO level.
+    // This is to avoid unexpected performance regression after we add
+    // the feature of log level. All the logs before we add the feature
+    // are INFO level. We don't want to add extra costs to those existing
+    // logging.
+    Logv(format, ap);
+  } else if (log_level == InfoLogLevel::HEADER_LEVEL) {
+    LogHeader(format, ap);
+  } else {
+    char new_format[500];
+    snprintf(new_format, sizeof(new_format) - 1, "[%s] %s",
+             kInfoLogLevelNames[log_level], format);
+    Logv(new_format, ap);
+  }
+
+  if (log_level >= InfoLogLevel::WARN_LEVEL &&
+      log_level != InfoLogLevel::HEADER_LEVEL) {
+    // Log messages with severity of warning or higher should be rare and are
+    // sometimes followed by an unclean crash. We want to be sure important
+    // messages are not lost in an application buffer when that happens.
+    Flush();
+  }
+}
+
+static void Logv(const InfoLogLevel log_level, Logger* info_log,
+                 const char* format, va_list ap) {
+  if (info_log && info_log->GetInfoLogLevel() <= log_level) {
+    if (log_level == InfoLogLevel::HEADER_LEVEL) {
+      info_log->LogHeader(format, ap);
+    } else {
+      info_log->Logv(log_level, format, ap);
+    }
+  }
+}
+
+void Log(const InfoLogLevel log_level, Logger* info_log, const char* format,
+         ...) {
+  va_list ap;
+  va_start(ap, format);
+  Logv(log_level, info_log, format, ap);
+  va_end(ap);
+}
+
+static void Headerv(Logger* info_log, const char* format, va_list ap) {
+  if (info_log) {
+    info_log->LogHeader(format, ap);
+  }
+}
+
+void Header(Logger* info_log, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Headerv(info_log, format, ap);
+  va_end(ap);
+}
+
+static void Debugv(Logger* info_log, const char* format, va_list ap) {
+  if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::DEBUG_LEVEL) {
+    info_log->Logv(InfoLogLevel::DEBUG_LEVEL, format, ap);
+  }
+}
+
+void Debug(Logger* info_log, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Debugv(info_log, format, ap);
+  va_end(ap);
+}
+
+static void Infov(Logger* info_log, const char* format, va_list ap) {
+  if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::INFO_LEVEL) {
+    info_log->Logv(InfoLogLevel::INFO_LEVEL, format, ap);
+  }
+}
+
+void Info(Logger* info_log, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Infov(info_log, format, ap);
+  va_end(ap);
+}
+
+static void Warnv(Logger* info_log, const char* format, va_list ap) {
+  if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::WARN_LEVEL) {
+    info_log->Logv(InfoLogLevel::WARN_LEVEL, format, ap);
+  }
+}
+
+void Warn(Logger* info_log, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Warnv(info_log, format, ap);
+  va_end(ap);
+}
+
+static void Errorv(Logger* info_log, const char* format, va_list ap) {
+  if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::ERROR_LEVEL) {
+    info_log->Logv(InfoLogLevel::ERROR_LEVEL, format, ap);
+  }
+}
+
+void Error(Logger* info_log, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Errorv(info_log, format, ap);
+  va_end(ap);
+}
+
+static void Fatalv(Logger* info_log, const char* format, va_list ap) {
+  if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::FATAL_LEVEL) {
+    info_log->Logv(InfoLogLevel::FATAL_LEVEL, format, ap);
+  }
+}
+
+void Fatal(Logger* info_log, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Fatalv(info_log, format, ap);
+  va_end(ap);
+}
+
+void LogFlush(const std::shared_ptr<Logger>& info_log) {
+  LogFlush(info_log.get());
+}
+
+void Log(const InfoLogLevel log_level, const std::shared_ptr<Logger>& info_log,
+         const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Logv(log_level, info_log.get(), format, ap);
+  va_end(ap);
+}
+
+void Header(const std::shared_ptr<Logger>& info_log, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Headerv(info_log.get(), format, ap);
+  va_end(ap);
+}
+
+void Debug(const std::shared_ptr<Logger>& info_log, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Debugv(info_log.get(), format, ap);
+  va_end(ap);
+}
+
+void Info(const std::shared_ptr<Logger>& info_log, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Infov(info_log.get(), format, ap);
+  va_end(ap);
+}
+
+void Warn(const std::shared_ptr<Logger>& info_log, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Warnv(info_log.get(), format, ap);
+  va_end(ap);
+}
+
+void Error(const std::shared_ptr<Logger>& info_log, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Errorv(info_log.get(), format, ap);
+  va_end(ap);
+}
+
+void Fatal(const std::shared_ptr<Logger>& info_log, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Fatalv(info_log.get(), format, ap);
+  va_end(ap);
+}
+
+void Log(const std::shared_ptr<Logger>& info_log, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  Logv(info_log.get(), format, ap);
+  va_end(ap);
+}
+
+Status WriteStringToFile(Env* env, const Slice& data, const std::string& fname,
+                         bool should_sync) {
+  const auto& fs = env->GetFileSystem();
+  return WriteStringToFile(fs.get(), data, fname, should_sync);
+}
+
+Status ReadFileToString(Env* env, const std::string& fname, std::string* data) {
+  const auto& fs = env->GetFileSystem();
+  return ReadFileToString(fs.get(), fname, data);
+}
+
+namespace {  // anonymous namespace
+
+void AssignEnvOptions(EnvOptions* env_options, const DBOptions& options) {
+  env_options->use_mmap_reads = options.allow_mmap_reads;
+  env_options->use_mmap_writes = options.allow_mmap_writes;
+  env_options->use_direct_reads = options.use_direct_reads;
+  env_options->set_fd_cloexec = options.is_fd_close_on_exec;
+  env_options->bytes_per_sync = options.bytes_per_sync;
+  env_options->compaction_readahead_size = options.compaction_readahead_size;
+  env_options->random_access_max_buffer_size =
+      options.random_access_max_buffer_size;
+  env_options->rate_limiter = options.rate_limiter.get();
+  env_options->writable_file_max_buffer_size =
+      options.writable_file_max_buffer_size;
+  env_options->allow_fallocate = options.allow_fallocate;
+  env_options->strict_bytes_per_sync = options.strict_bytes_per_sync;
+  options.env->SanitizeEnvOptions(env_options);
+}
+
+}  // namespace
+
+EnvOptions Env::OptimizeForLogWrite(const EnvOptions& env_options,
+                                    const DBOptions& db_options) const {
+  EnvOptions optimized_env_options(env_options);
+  optimized_env_options.bytes_per_sync = db_options.wal_bytes_per_sync;
+  optimized_env_options.writable_file_max_buffer_size =
+      db_options.writable_file_max_buffer_size;
+  return optimized_env_options;
+}
+
+EnvOptions Env::OptimizeForManifestWrite(const EnvOptions& env_options) const {
+  return env_options;
+}
+
+EnvOptions Env::OptimizeForLogRead(const EnvOptions& env_options) const {
+  EnvOptions optimized_env_options(env_options);
+  optimized_env_options.use_direct_reads = false;
+  return optimized_env_options;
+}
+
+EnvOptions Env::OptimizeForManifestRead(const EnvOptions& env_options) const {
+  EnvOptions optimized_env_options(env_options);
+  optimized_env_options.use_direct_reads = false;
+  return optimized_env_options;
+}
+
+EnvOptions Env::OptimizeForCompactionTableWrite(
+    const EnvOptions& env_options, const ImmutableDBOptions& db_options) const {
+  EnvOptions optimized_env_options(env_options);
+  optimized_env_options.use_direct_writes =
+      db_options.use_direct_io_for_flush_and_compaction;
+  return optimized_env_options;
+}
+
+EnvOptions Env::OptimizeForCompactionTableRead(
+    const EnvOptions& env_options, const ImmutableDBOptions& db_options) const {
+  EnvOptions optimized_env_options(env_options);
+  optimized_env_options.use_direct_reads = db_options.use_direct_reads;
+  return optimized_env_options;
+}
+EnvOptions Env::OptimizeForBlobFileRead(
+    const EnvOptions& env_options, const ImmutableDBOptions& db_options) const {
+  EnvOptions optimized_env_options(env_options);
+  optimized_env_options.use_direct_reads = db_options.use_direct_reads;
+  return optimized_env_options;
+}
+
+EnvOptions::EnvOptions(const DBOptions& options) {
+  AssignEnvOptions(this, options);
+}
+
+EnvOptions::EnvOptions() {
+  DBOptions options;
+  AssignEnvOptions(this, options);
+}
+
+Status NewEnvLogger(const std::string& fname, Env* env,
+                    std::shared_ptr<Logger>* result) {
+  FileOptions options;
+  // TODO: Tune the buffer size.
+  options.writable_file_max_buffer_size = 1024 * 1024;
+  std::unique_ptr<FSWritableFile> writable_file;
+  const auto status = env->GetFileSystem()->NewWritableFile(
+      fname, options, &writable_file, nullptr);
+  if (!status.ok()) {
+    return status;
+  }
+
+  *result = std::make_shared<EnvLogger>(std::move(writable_file), fname,
+                                        options, env);
+  return Status::OK();
+}
+
+const std::shared_ptr<FileSystem>& Env::GetFileSystem() const {
+  return file_system_;
+}
+
+const std::shared_ptr<SystemClock>& Env::GetSystemClock() const {
+  return system_clock_;
+}
+namespace {
+static std::unordered_map<std::string, OptionTypeInfo> sc_wrapper_type_info = {
+    {"target",
+     OptionTypeInfo::AsCustomSharedPtr<SystemClock>(
+         0, OptionVerificationType::kByName, OptionTypeFlags::kDontSerialize)},
+};
+
+}  // namespace
+SystemClockWrapper::SystemClockWrapper(const std::shared_ptr<SystemClock>& t)
+    : target_(t) {
+  RegisterOptions("", &target_, &sc_wrapper_type_info);
+}
+
+Status SystemClockWrapper::PrepareOptions(const ConfigOptions& options) {
+  if (target_ == nullptr) {
+    target_ = SystemClock::Default();
+  }
+  return SystemClock::PrepareOptions(options);
+}
+
+std::string SystemClockWrapper::SerializeOptions(
+    const ConfigOptions& config_options, const std::string& header) const {
+  auto parent = SystemClock::SerializeOptions(config_options, "");
+  if (config_options.IsShallow() || target_ == nullptr ||
+      target_->IsInstanceOf(SystemClock::kDefaultName())) {
+    return parent;
+  } else {
+    std::string result = header;
+    if (!StartsWith(parent, OptionTypeInfo::kIdPropName())) {
+      result.append(OptionTypeInfo::kIdPropName()).append("=");
+    }
+    result.append(parent);
+    if (!EndsWith(result, config_options.delimiter)) {
+      result.append(config_options.delimiter);
+    }
+    result.append("target=").append(target_->ToString(config_options));
+    return result;
+  }
+}
+
+static int RegisterBuiltinSystemClocks(ObjectLibrary& library,
+                                       const std::string& /*arg*/) {
+  library.AddFactory<SystemClock>(
+      EmulatedSystemClock::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<SystemClock>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new EmulatedSystemClock(SystemClock::Default()));
+        return guard->get();
+      });
+  size_t num_types;
+  return static_cast<int>(library.GetFactoryCount(&num_types));
+}
+
+Status SystemClock::CreateFromString(const ConfigOptions& config_options,
+                                     const std::string& value,
+                                     std::shared_ptr<SystemClock>* result) {
+  auto clock = SystemClock::Default();
+  if (clock->IsInstanceOf(value)) {
+    *result = clock;
+    return Status::OK();
+  } else {
+    static std::once_flag once;
+    std::call_once(once, [&]() {
+      RegisterBuiltinSystemClocks(*(ObjectLibrary::Default().get()), "");
+    });
+    return LoadSharedObject<SystemClock>(config_options, value, result);
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_chroot.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_chroot.cc
new file mode 100644
index 0000000000..5ff32a7e44
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_chroot.cc
@@ -0,0 +1,148 @@
+//  Copyright (c) 2016-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#if !defined(OS_WIN)
+
+#include "env/env_chroot.h"
+
+#include <errno.h>   // errno
+#include <stdlib.h>  // realpath, free
+#include <unistd.h>  // geteuid
+
+#include "env/composite_env_wrapper.h"
+#include "env/fs_remap.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/string_util.h"  // errnoStr
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+static std::unordered_map<std::string, OptionTypeInfo> chroot_fs_type_info = {
+    {"chroot_dir", {0, OptionType::kString}}};
+}  // namespace
+ChrootFileSystem::ChrootFileSystem(const std::shared_ptr<FileSystem>& base,
+                                   const std::string& chroot_dir)
+    : RemapFileSystem(base), chroot_dir_(chroot_dir) {
+  RegisterOptions("chroot_dir", &chroot_dir_, &chroot_fs_type_info);
+}
+
+Status ChrootFileSystem::PrepareOptions(const ConfigOptions& options) {
+  Status s = FileSystemWrapper::PrepareOptions(options);
+  if (!s.ok()) {
+    return s;
+  } else if (chroot_dir_.empty()) {
+    s = Status::InvalidArgument("ChRootFileSystem requires a chroot dir");
+  } else {
+    s = target_->FileExists(chroot_dir_, IOOptions(), nullptr);
+  }
+  if (s.ok()) {
+#if defined(OS_AIX)
+    char resolvedName[PATH_MAX];
+    char* real_chroot_dir = realpath(chroot_dir_.c_str(), resolvedName);
+#else
+    char* real_chroot_dir = realpath(chroot_dir_.c_str(), nullptr);
+#endif
+    // chroot_dir must exist so realpath() returns non-nullptr.
+    assert(real_chroot_dir != nullptr);
+    chroot_dir_ = real_chroot_dir;
+#if !defined(OS_AIX)
+    free(real_chroot_dir);
+#endif
+  }
+  return s;
+}
+
+IOStatus ChrootFileSystem::GetTestDirectory(const IOOptions& options,
+                                            std::string* path,
+                                            IODebugContext* dbg) {
+  // Adapted from PosixEnv's implementation since it doesn't provide a way to
+  // create directory in the chroot.
+  char buf[256];
+  snprintf(buf, sizeof(buf), "/rocksdbtest-%d", static_cast<int>(geteuid()));
+  *path = buf;
+
+  // Directory may already exist, so ignore return
+  return CreateDirIfMissing(*path, options, dbg);
+}
+
+// Returns status and expanded absolute path including the chroot directory.
+// Checks whether the provided path breaks out of the chroot. If it returns
+// non-OK status, the returned path should not be used.
+std::pair<IOStatus, std::string> ChrootFileSystem::EncodePath(
+    const std::string& path) {
+  if (path.empty() || path[0] != '/') {
+    return {IOStatus::InvalidArgument(path, "Not an absolute path"), ""};
+  }
+  std::pair<IOStatus, std::string> res;
+  res.second = chroot_dir_ + path;
+#if defined(OS_AIX)
+  char resolvedName[PATH_MAX];
+  char* normalized_path = realpath(res.second.c_str(), resolvedName);
+#else
+  char* normalized_path = realpath(res.second.c_str(), nullptr);
+#endif
+  if (normalized_path == nullptr) {
+    res.first = IOStatus::NotFound(res.second, errnoStr(errno).c_str());
+  } else if (strlen(normalized_path) < chroot_dir_.size() ||
+             strncmp(normalized_path, chroot_dir_.c_str(),
+                     chroot_dir_.size()) != 0) {
+    res.first = IOStatus::IOError(res.second,
+                                  "Attempted to access path outside chroot");
+  } else {
+    res.first = IOStatus::OK();
+  }
+#if !defined(OS_AIX)
+  free(normalized_path);
+#endif
+  return res;
+}
+
+// Similar to EncodePath() except assumes the basename in the path hasn't been
+// created yet.
+std::pair<IOStatus, std::string> ChrootFileSystem::EncodePathWithNewBasename(
+    const std::string& path) {
+  if (path.empty() || path[0] != '/') {
+    return {IOStatus::InvalidArgument(path, "Not an absolute path"), ""};
+  }
+  // Basename may be followed by trailing slashes
+  size_t final_idx = path.find_last_not_of('/');
+  if (final_idx == std::string::npos) {
+    // It's only slashes so no basename to extract
+    return EncodePath(path);
+  }
+
+  // Pull off the basename temporarily since realname(3) (used by
+  // EncodePath()) requires a path that exists
+  size_t base_sep = path.rfind('/', final_idx);
+  auto status_and_enc_path = EncodePath(path.substr(0, base_sep + 1));
+  status_and_enc_path.second.append(path.substr(base_sep + 1));
+  return status_and_enc_path;
+}
+
+std::shared_ptr<FileSystem> NewChrootFileSystem(
+    const std::shared_ptr<FileSystem>& base, const std::string& chroot_dir) {
+  auto chroot_fs = std::make_shared<ChrootFileSystem>(base, chroot_dir);
+  Status s = chroot_fs->PrepareOptions(ConfigOptions());
+  if (s.ok()) {
+    return chroot_fs;
+  } else {
+    return nullptr;
+  }
+}
+
+Env* NewChrootEnv(Env* base_env, const std::string& chroot_dir) {
+  if (!base_env->FileExists(chroot_dir).ok()) {
+    return nullptr;
+  }
+  auto chroot_fs = NewChrootFileSystem(base_env->GetFileSystem(), chroot_dir);
+  if (chroot_fs != nullptr) {
+    return new CompositeEnvWrapper(base_env, chroot_fs);
+  } else {
+    return nullptr;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  //  !defined(OS_WIN)
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_chroot.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_chroot.h
new file mode 100644
index 0000000000..9cead1561b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_chroot.h
@@ -0,0 +1,55 @@
+//  Copyright (c) 2016-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#if !defined(OS_WIN)
+
+#include <string>
+
+#include "env/fs_remap.h"
+#include "rocksdb/file_system.h"
+
+namespace ROCKSDB_NAMESPACE {
+class ChrootFileSystem : public RemapFileSystem {
+ public:
+  ChrootFileSystem(const std::shared_ptr<FileSystem>& base,
+                   const std::string& chroot_dir);
+
+  static const char* kClassName() { return "ChrootFS"; }
+  const char* Name() const override { return kClassName(); }
+
+  IOStatus GetTestDirectory(const IOOptions& options, std::string* path,
+                            IODebugContext* dbg) override;
+
+  Status PrepareOptions(const ConfigOptions& options) override;
+
+ protected:
+  // Returns status and expanded absolute path including the chroot directory.
+  // Checks whether the provided path breaks out of the chroot. If it returns
+  // non-OK status, the returned path should not be used.
+  std::pair<IOStatus, std::string> EncodePath(const std::string& path) override;
+
+  // Similar to EncodePath() except assumes the basename in the path hasn't been
+  // created yet.
+  std::pair<IOStatus, std::string> EncodePathWithNewBasename(
+      const std::string& path) override;
+
+ private:
+  std::string chroot_dir_;
+};
+
+// Returns an Env that translates paths such that the root directory appears to
+// be chroot_dir. chroot_dir should refer to an existing directory.
+//
+// This class has not been fully analyzed for providing strong security
+// guarantees.
+Env* NewChrootEnv(Env* base_env, const std::string& chroot_dir);
+std::shared_ptr<FileSystem> NewChrootFileSystem(
+    const std::shared_ptr<FileSystem>& base, const std::string& chroot_dir);
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // !defined(OS_WIN)
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_encryption.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_encryption.cc
new file mode 100644
index 0000000000..beb31a9cf9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_encryption.cc
@@ -0,0 +1,1346 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "rocksdb/env_encryption.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <iostream>
+
+#include "env/composite_env_wrapper.h"
+#include "env/env_encryption_ctr.h"
+#include "monitoring/perf_context_imp.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/io_status.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/aligned_buffer.h"
+#include "util/coding.h"
+#include "util/random.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+std::shared_ptr<EncryptionProvider> EncryptionProvider::NewCTRProvider(
+    const std::shared_ptr<BlockCipher>& cipher) {
+  return std::make_shared<CTREncryptionProvider>(cipher);
+}
+
+// Read up to "n" bytes from the file.  "scratch[0..n-1]" may be
+// written by this routine.  Sets "*result" to the data that was
+// read (including if fewer than "n" bytes were successfully read).
+// May set "*result" to point at data in "scratch[0..n-1]", so
+// "scratch[0..n-1]" must be live when "*result" is used.
+// If an error was encountered, returns a non-OK status.
+//
+// REQUIRES: External synchronization
+IOStatus EncryptedSequentialFile::Read(size_t n, const IOOptions& options,
+                                       Slice* result, char* scratch,
+                                       IODebugContext* dbg) {
+  assert(scratch);
+  IOStatus io_s = file_->Read(n, options, result, scratch, dbg);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  {
+    PERF_TIMER_GUARD(decrypt_data_nanos);
+    io_s = status_to_io_status(
+        stream_->Decrypt(offset_, (char*)result->data(), result->size()));
+  }
+  if (io_s.ok()) {
+    offset_ += result->size();  // We've already ready data from disk, so update
+                                // offset_ even if decryption fails.
+  }
+  return io_s;
+}
+
+// Skip "n" bytes from the file. This is guaranteed to be no
+// slower that reading the same data, but may be faster.
+//
+// If end of file is reached, skipping will stop at the end of the
+// file, and Skip will return OK.
+//
+// REQUIRES: External synchronization
+IOStatus EncryptedSequentialFile::Skip(uint64_t n) {
+  auto status = file_->Skip(n);
+  if (!status.ok()) {
+    return status;
+  }
+  offset_ += n;
+  return status;
+}
+
+// Indicates the upper layers if the current SequentialFile implementation
+// uses direct IO.
+bool EncryptedSequentialFile::use_direct_io() const {
+  return file_->use_direct_io();
+}
+
+// Use the returned alignment value to allocate
+// aligned buffer for Direct I/O
+size_t EncryptedSequentialFile::GetRequiredBufferAlignment() const {
+  return file_->GetRequiredBufferAlignment();
+}
+
+// Remove any kind of caching of data from the offset to offset+length
+// of this file. If the length is 0, then it refers to the end of file.
+// If the system is not caching the file contents, then this is a noop.
+IOStatus EncryptedSequentialFile::InvalidateCache(size_t offset,
+                                                  size_t length) {
+  return file_->InvalidateCache(offset + prefixLength_, length);
+}
+
+// Positioned Read for direct I/O
+// If Direct I/O enabled, offset, n, and scratch should be properly aligned
+IOStatus EncryptedSequentialFile::PositionedRead(uint64_t offset, size_t n,
+                                                 const IOOptions& options,
+                                                 Slice* result, char* scratch,
+                                                 IODebugContext* dbg) {
+  assert(scratch);
+  offset += prefixLength_;  // Skip prefix
+  auto io_s = file_->PositionedRead(offset, n, options, result, scratch, dbg);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  offset_ = offset + result->size();
+  {
+    PERF_TIMER_GUARD(decrypt_data_nanos);
+    io_s = status_to_io_status(
+        stream_->Decrypt(offset, (char*)result->data(), result->size()));
+  }
+  return io_s;
+}
+
+// Read up to "n" bytes from the file starting at "offset".
+// "scratch[0..n-1]" may be written by this routine.  Sets "*result"
+// to the data that was read (including if fewer than "n" bytes were
+// successfully read).  May set "*result" to point at data in
+// "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
+// "*result" is used.  If an error was encountered, returns a non-OK
+// status.
+//
+// Safe for concurrent use by multiple threads.
+// If Direct I/O enabled, offset, n, and scratch should be aligned properly.
+IOStatus EncryptedRandomAccessFile::Read(uint64_t offset, size_t n,
+                                         const IOOptions& options,
+                                         Slice* result, char* scratch,
+                                         IODebugContext* dbg) const {
+  assert(scratch);
+  offset += prefixLength_;
+  auto io_s = file_->Read(offset, n, options, result, scratch, dbg);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  {
+    PERF_TIMER_GUARD(decrypt_data_nanos);
+    io_s = status_to_io_status(
+        stream_->Decrypt(offset, (char*)result->data(), result->size()));
+  }
+  return io_s;
+}
+
+// Readahead the file starting from offset by n bytes for caching.
+IOStatus EncryptedRandomAccessFile::Prefetch(uint64_t offset, size_t n,
+                                             const IOOptions& options,
+                                             IODebugContext* dbg) {
+  // return Status::OK();
+  return file_->Prefetch(offset + prefixLength_, n, options, dbg);
+}
+
+// Tries to get an unique ID for this file that will be the same each time
+// the file is opened (and will stay the same while the file is open).
+// Furthermore, it tries to make this ID at most "max_size" bytes. If such an
+// ID can be created this function returns the length of the ID and places it
+// in "id"; otherwise, this function returns 0, in which case "id"
+// may not have been modified.
+//
+// This function guarantees, for IDs from a given environment, two unique ids
+// cannot be made equal to each other by adding arbitrary bytes to one of
+// them. That is, no unique ID is the prefix of another.
+//
+// This function guarantees that the returned ID will not be interpretable as
+// a single varint.
+//
+// Note: these IDs are only valid for the duration of the process.
+size_t EncryptedRandomAccessFile::GetUniqueId(char* id, size_t max_size) const {
+  return file_->GetUniqueId(id, max_size);
+};
+
+void EncryptedRandomAccessFile::Hint(AccessPattern pattern) {
+  file_->Hint(pattern);
+}
+
+// Indicates the upper layers if the current RandomAccessFile implementation
+// uses direct IO.
+bool EncryptedRandomAccessFile::use_direct_io() const {
+  return file_->use_direct_io();
+}
+
+// Use the returned alignment value to allocate
+// aligned buffer for Direct I/O
+size_t EncryptedRandomAccessFile::GetRequiredBufferAlignment() const {
+  return file_->GetRequiredBufferAlignment();
+}
+
+// Remove any kind of caching of data from the offset to offset+length
+// of this file. If the length is 0, then it refers to the end of file.
+// If the system is not caching the file contents, then this is a noop.
+IOStatus EncryptedRandomAccessFile::InvalidateCache(size_t offset,
+                                                    size_t length) {
+  return file_->InvalidateCache(offset + prefixLength_, length);
+}
+
+// A file abstraction for sequential writing.  The implementation
+// must provide buffering since callers may append small fragments
+// at a time to the file.
+IOStatus EncryptedWritableFile::Append(const Slice& data,
+                                       const IOOptions& options,
+                                       IODebugContext* dbg) {
+  AlignedBuffer buf;
+  Slice dataToAppend(data);
+  if (data.size() > 0) {
+    auto offset = file_->GetFileSize(options, dbg);  // size including prefix
+    // Encrypt in cloned buffer
+    buf.Alignment(GetRequiredBufferAlignment());
+    buf.AllocateNewBuffer(data.size());
+    // TODO (sagar0): Modify AlignedBuffer.Append to allow doing a memmove
+    // so that the next two lines can be replaced with buf.Append().
+    memmove(buf.BufferStart(), data.data(), data.size());
+    buf.Size(data.size());
+    IOStatus io_s;
+    {
+      PERF_TIMER_GUARD(encrypt_data_nanos);
+      io_s = status_to_io_status(
+          stream_->Encrypt(offset, buf.BufferStart(), buf.CurrentSize()));
+    }
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    dataToAppend = Slice(buf.BufferStart(), buf.CurrentSize());
+  }
+  return file_->Append(dataToAppend, options, dbg);
+}
+
+IOStatus EncryptedWritableFile::PositionedAppend(const Slice& data,
+                                                 uint64_t offset,
+                                                 const IOOptions& options,
+                                                 IODebugContext* dbg) {
+  AlignedBuffer buf;
+  Slice dataToAppend(data);
+  offset += prefixLength_;
+  if (data.size() > 0) {
+    // Encrypt in cloned buffer
+    buf.Alignment(GetRequiredBufferAlignment());
+    buf.AllocateNewBuffer(data.size());
+    memmove(buf.BufferStart(), data.data(), data.size());
+    buf.Size(data.size());
+    IOStatus io_s;
+    {
+      PERF_TIMER_GUARD(encrypt_data_nanos);
+      io_s = status_to_io_status(
+          stream_->Encrypt(offset, buf.BufferStart(), buf.CurrentSize()));
+    }
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    dataToAppend = Slice(buf.BufferStart(), buf.CurrentSize());
+  }
+  return file_->PositionedAppend(dataToAppend, offset, options, dbg);
+}
+
+// Indicates the upper layers if the current WritableFile implementation
+// uses direct IO.
+bool EncryptedWritableFile::use_direct_io() const {
+  return file_->use_direct_io();
+}
+
+// true if Sync() and Fsync() are safe to call concurrently with Append()
+// and Flush().
+bool EncryptedWritableFile::IsSyncThreadSafe() const {
+  return file_->IsSyncThreadSafe();
+}
+
+// Use the returned alignment value to allocate
+// aligned buffer for Direct I/O
+size_t EncryptedWritableFile::GetRequiredBufferAlignment() const {
+  return file_->GetRequiredBufferAlignment();
+}
+
+/*
+ * Get the size of valid data in the file.
+ */
+uint64_t EncryptedWritableFile::GetFileSize(const IOOptions& options,
+                                            IODebugContext* dbg) {
+  return file_->GetFileSize(options, dbg) - prefixLength_;
+}
+
+// Truncate is necessary to trim the file to the correct size
+// before closing. It is not always possible to keep track of the file
+// size due to whole pages writes. The behavior is undefined if called
+// with other writes to follow.
+IOStatus EncryptedWritableFile::Truncate(uint64_t size,
+                                         const IOOptions& options,
+                                         IODebugContext* dbg) {
+  return file_->Truncate(size + prefixLength_, options, dbg);
+}
+
+// Remove any kind of caching of data from the offset to offset+length
+// of this file. If the length is 0, then it refers to the end of file.
+// If the system is not caching the file contents, then this is a noop.
+// This call has no effect on dirty pages in the cache.
+IOStatus EncryptedWritableFile::InvalidateCache(size_t offset, size_t length) {
+  return file_->InvalidateCache(offset + prefixLength_, length);
+}
+
+// Sync a file range with disk.
+// offset is the starting byte of the file range to be synchronized.
+// nbytes specifies the length of the range to be synchronized.
+// This asks the OS to initiate flushing the cached data to disk,
+// without waiting for completion.
+// Default implementation does nothing.
+IOStatus EncryptedWritableFile::RangeSync(uint64_t offset, uint64_t nbytes,
+                                          const IOOptions& options,
+                                          IODebugContext* dbg) {
+  return file_->RangeSync(offset + prefixLength_, nbytes, options, dbg);
+}
+
+// PrepareWrite performs any necessary preparation for a write
+// before the write actually occurs.  This allows for pre-allocation
+// of space on devices where it can result in less file
+// fragmentation and/or less waste from over-zealous filesystem
+// pre-allocation.
+void EncryptedWritableFile::PrepareWrite(size_t offset, size_t len,
+                                         const IOOptions& options,
+                                         IODebugContext* dbg) {
+  file_->PrepareWrite(offset + prefixLength_, len, options, dbg);
+}
+
+void EncryptedWritableFile::SetPreallocationBlockSize(size_t size) {
+  // the size here doesn't need to include prefixLength_, as it's a
+  // configuration will be use for `PrepareWrite()`.
+  file_->SetPreallocationBlockSize(size);
+}
+
+void EncryptedWritableFile::GetPreallocationStatus(
+    size_t* block_size, size_t* last_allocated_block) {
+  file_->GetPreallocationStatus(block_size, last_allocated_block);
+}
+
+// Pre-allocates space for a file.
+IOStatus EncryptedWritableFile::Allocate(uint64_t offset, uint64_t len,
+                                         const IOOptions& options,
+                                         IODebugContext* dbg) {
+  return file_->Allocate(offset + prefixLength_, len, options, dbg);
+}
+
+IOStatus EncryptedWritableFile::Flush(const IOOptions& options,
+                                      IODebugContext* dbg) {
+  return file_->Flush(options, dbg);
+}
+
+IOStatus EncryptedWritableFile::Sync(const IOOptions& options,
+                                     IODebugContext* dbg) {
+  return file_->Sync(options, dbg);
+}
+
+IOStatus EncryptedWritableFile::Close(const IOOptions& options,
+                                      IODebugContext* dbg) {
+  return file_->Close(options, dbg);
+}
+
+// A file abstraction for random reading and writing.
+
+// Indicates if the class makes use of direct I/O
+// If false you must pass aligned buffer to Write()
+bool EncryptedRandomRWFile::use_direct_io() const {
+  return file_->use_direct_io();
+}
+
+// Use the returned alignment value to allocate
+// aligned buffer for Direct I/O
+size_t EncryptedRandomRWFile::GetRequiredBufferAlignment() const {
+  return file_->GetRequiredBufferAlignment();
+}
+
+// Write bytes in `data` at  offset `offset`, Returns Status::OK() on success.
+// Pass aligned buffer when use_direct_io() returns true.
+IOStatus EncryptedRandomRWFile::Write(uint64_t offset, const Slice& data,
+                                      const IOOptions& options,
+                                      IODebugContext* dbg) {
+  AlignedBuffer buf;
+  Slice dataToWrite(data);
+  offset += prefixLength_;
+  if (data.size() > 0) {
+    // Encrypt in cloned buffer
+    buf.Alignment(GetRequiredBufferAlignment());
+    buf.AllocateNewBuffer(data.size());
+    memmove(buf.BufferStart(), data.data(), data.size());
+    buf.Size(data.size());
+    IOStatus io_s;
+    {
+      PERF_TIMER_GUARD(encrypt_data_nanos);
+      io_s = status_to_io_status(
+          stream_->Encrypt(offset, buf.BufferStart(), buf.CurrentSize()));
+    }
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    dataToWrite = Slice(buf.BufferStart(), buf.CurrentSize());
+  }
+  return file_->Write(offset, dataToWrite, options, dbg);
+}
+
+// Read up to `n` bytes starting from offset `offset` and store them in
+// result, provided `scratch` size should be at least `n`.
+// Returns Status::OK() on success.
+IOStatus EncryptedRandomRWFile::Read(uint64_t offset, size_t n,
+                                     const IOOptions& options, Slice* result,
+                                     char* scratch, IODebugContext* dbg) const {
+  assert(scratch);
+  offset += prefixLength_;
+  auto status = file_->Read(offset, n, options, result, scratch, dbg);
+  if (!status.ok()) {
+    return status;
+  }
+  {
+    PERF_TIMER_GUARD(decrypt_data_nanos);
+    status = status_to_io_status(
+        stream_->Decrypt(offset, (char*)result->data(), result->size()));
+  }
+  return status;
+}
+
+IOStatus EncryptedRandomRWFile::Flush(const IOOptions& options,
+                                      IODebugContext* dbg) {
+  return file_->Flush(options, dbg);
+}
+
+IOStatus EncryptedRandomRWFile::Sync(const IOOptions& options,
+                                     IODebugContext* dbg) {
+  return file_->Sync(options, dbg);
+}
+
+IOStatus EncryptedRandomRWFile::Fsync(const IOOptions& options,
+                                      IODebugContext* dbg) {
+  return file_->Fsync(options, dbg);
+}
+
+IOStatus EncryptedRandomRWFile::Close(const IOOptions& options,
+                                      IODebugContext* dbg) {
+  return file_->Close(options, dbg);
+}
+
+namespace {
+static std::unordered_map<std::string, OptionTypeInfo> encrypted_fs_type_info =
+    {
+        {"provider",
+         OptionTypeInfo::AsCustomSharedPtr<EncryptionProvider>(
+             0 /* No offset, whole struct*/, OptionVerificationType::kByName,
+             OptionTypeFlags::kNone)},
+};
+// EncryptedFileSystemImpl implements an FileSystemWrapper that adds encryption
+// to files stored on disk.
+class EncryptedFileSystemImpl : public EncryptedFileSystem {
+ public:
+  const char* Name() const override {
+    return EncryptedFileSystem::kClassName();
+  }
+  // Returns the raw encryption provider that should be used to write the input
+  // encrypted file.  If there is no such provider, NotFound is returned.
+  IOStatus GetWritableProvider(const std::string& /*fname*/,
+                               EncryptionProvider** result) {
+    if (provider_) {
+      *result = provider_.get();
+      return IOStatus::OK();
+    } else {
+      *result = nullptr;
+      return IOStatus::NotFound("No WriteProvider specified");
+    }
+  }
+
+  // Returns the raw encryption provider that should be used to read the input
+  // encrypted file.  If there is no such provider, NotFound is returned.
+  IOStatus GetReadableProvider(const std::string& /*fname*/,
+                               EncryptionProvider** result) {
+    if (provider_) {
+      *result = provider_.get();
+      return IOStatus::OK();
+    } else {
+      *result = nullptr;
+      return IOStatus::NotFound("No Provider specified");
+    }
+  }
+
+  // Creates a CipherStream for the underlying file/name using the options
+  // If a writable provider is found and encryption is enabled, uses
+  // this provider to create a cipher stream.
+  // @param fname         Name of the writable file
+  // @param underlying    The underlying "raw" file
+  // @param options       Options for creating the file/cipher
+  // @param prefix_length Returns the length of the encryption prefix used for
+  // this file
+  // @param stream        Returns the cipher stream to use for this file if it
+  // should be encrypted
+  // @return OK on success, non-OK on failure.
+  template <class TypeFile>
+  IOStatus CreateWritableCipherStream(
+      const std::string& fname, const std::unique_ptr<TypeFile>& underlying,
+      const FileOptions& options, size_t* prefix_length,
+      std::unique_ptr<BlockAccessCipherStream>* stream, IODebugContext* dbg) {
+    EncryptionProvider* provider = nullptr;
+    *prefix_length = 0;
+    IOStatus status = GetWritableProvider(fname, &provider);
+    if (!status.ok()) {
+      return status;
+    } else if (provider != nullptr) {
+      // Initialize & write prefix (if needed)
+      AlignedBuffer buffer;
+      Slice prefix;
+      *prefix_length = provider->GetPrefixLength();
+      if (*prefix_length > 0) {
+        // Initialize prefix
+        buffer.Alignment(underlying->GetRequiredBufferAlignment());
+        buffer.AllocateNewBuffer(*prefix_length);
+        status = status_to_io_status(provider->CreateNewPrefix(
+            fname, buffer.BufferStart(), *prefix_length));
+        if (status.ok()) {
+          buffer.Size(*prefix_length);
+          prefix = Slice(buffer.BufferStart(), buffer.CurrentSize());
+          // Write prefix
+          status = underlying->Append(prefix, options.io_options, dbg);
+        }
+        if (!status.ok()) {
+          return status;
+        }
+      }
+      // Create cipher stream
+      status = status_to_io_status(
+          provider->CreateCipherStream(fname, options, prefix, stream));
+    }
+    return status;
+  }
+
+  template <class TypeFile>
+  IOStatus CreateWritableEncryptedFile(const std::string& fname,
+                                       std::unique_ptr<TypeFile>& underlying,
+                                       const FileOptions& options,
+                                       std::unique_ptr<TypeFile>* result,
+                                       IODebugContext* dbg) {
+    // Create cipher stream
+    std::unique_ptr<BlockAccessCipherStream> stream;
+    size_t prefix_length;
+    IOStatus status = CreateWritableCipherStream(fname, underlying, options,
+                                                 &prefix_length, &stream, dbg);
+    if (status.ok()) {
+      if (stream) {
+        result->reset(new EncryptedWritableFile(
+            std::move(underlying), std::move(stream), prefix_length));
+      } else {
+        result->reset(underlying.release());
+      }
+    }
+    return status;
+  }
+
+  // Creates a CipherStream for the underlying file/name using the options
+  // If a writable provider is found and encryption is enabled, uses
+  // this provider to create a cipher stream.
+  // @param fname         Name of the writable file
+  // @param underlying    The underlying "raw" file
+  // @param options       Options for creating the file/cipher
+  // @param prefix_length Returns the length of the encryption prefix used for
+  // this file
+  // @param stream        Returns the cipher stream to use for this file if it
+  // should be encrypted
+  // @return OK on success, non-OK on failure.
+  template <class TypeFile>
+  IOStatus CreateRandomWriteCipherStream(
+      const std::string& fname, const std::unique_ptr<TypeFile>& underlying,
+      const FileOptions& options, size_t* prefix_length,
+      std::unique_ptr<BlockAccessCipherStream>* stream, IODebugContext* dbg) {
+    EncryptionProvider* provider = nullptr;
+    *prefix_length = 0;
+    IOStatus io_s = GetWritableProvider(fname, &provider);
+    if (!io_s.ok()) {
+      return io_s;
+    } else if (provider != nullptr) {
+      // Initialize & write prefix (if needed)
+      AlignedBuffer buffer;
+      Slice prefix;
+      *prefix_length = provider->GetPrefixLength();
+      if (*prefix_length > 0) {
+        // Initialize prefix
+        buffer.Alignment(underlying->GetRequiredBufferAlignment());
+        buffer.AllocateNewBuffer(*prefix_length);
+        io_s = status_to_io_status(provider->CreateNewPrefix(
+            fname, buffer.BufferStart(), *prefix_length));
+        if (io_s.ok()) {
+          buffer.Size(*prefix_length);
+          prefix = Slice(buffer.BufferStart(), buffer.CurrentSize());
+          // Write prefix
+          io_s = underlying->Write(0, prefix, options.io_options, dbg);
+        }
+        if (!io_s.ok()) {
+          return io_s;
+        }
+      }
+      // Create cipher stream
+      io_s = status_to_io_status(
+          provider->CreateCipherStream(fname, options, prefix, stream));
+    }
+    return io_s;
+  }
+
+  // Creates a CipherStream for the underlying file/name using the options
+  // If a readable provider is found and the file is encrypted, uses
+  // this provider to create a cipher stream.
+  // @param fname         Name of the writable file
+  // @param underlying    The underlying "raw" file
+  // @param options       Options for creating the file/cipher
+  // @param prefix_length Returns the length of the encryption prefix used for
+  // this file
+  // @param stream        Returns the cipher stream to use for this file if it
+  // is encrypted
+  // @return OK on success, non-OK on failure.
+  template <class TypeFile>
+  IOStatus CreateSequentialCipherStream(
+      const std::string& fname, const std::unique_ptr<TypeFile>& underlying,
+      const FileOptions& options, size_t* prefix_length,
+      std::unique_ptr<BlockAccessCipherStream>* stream, IODebugContext* dbg) {
+    // Read prefix (if needed)
+    AlignedBuffer buffer;
+    Slice prefix;
+    *prefix_length = provider_->GetPrefixLength();
+    if (*prefix_length > 0) {
+      // Read prefix
+      buffer.Alignment(underlying->GetRequiredBufferAlignment());
+      buffer.AllocateNewBuffer(*prefix_length);
+      IOStatus status = underlying->Read(*prefix_length, options.io_options,
+                                         &prefix, buffer.BufferStart(), dbg);
+      if (!status.ok()) {
+        return status;
+      }
+      buffer.Size(*prefix_length);
+    }
+    return status_to_io_status(
+        provider_->CreateCipherStream(fname, options, prefix, stream));
+  }
+
+  // Creates a CipherStream for the underlying file/name using the options
+  // If a readable provider is found and the file is encrypted, uses
+  // this provider to create a cipher stream.
+  // @param fname         Name of the writable file
+  // @param underlying    The underlying "raw" file
+  // @param options       Options for creating the file/cipher
+  // @param prefix_length Returns the length of the encryption prefix used for
+  // this file
+  // @param stream        Returns the cipher stream to use for this file if it
+  // is encrypted
+  // @return OK on success, non-OK on failure.
+  template <class TypeFile>
+  IOStatus CreateRandomReadCipherStream(
+      const std::string& fname, const std::unique_ptr<TypeFile>& underlying,
+      const FileOptions& options, size_t* prefix_length,
+      std::unique_ptr<BlockAccessCipherStream>* stream, IODebugContext* dbg) {
+    // Read prefix (if needed)
+    AlignedBuffer buffer;
+    Slice prefix;
+    *prefix_length = provider_->GetPrefixLength();
+    if (*prefix_length > 0) {
+      // Read prefix
+      buffer.Alignment(underlying->GetRequiredBufferAlignment());
+      buffer.AllocateNewBuffer(*prefix_length);
+      IOStatus status = underlying->Read(0, *prefix_length, options.io_options,
+                                         &prefix, buffer.BufferStart(), dbg);
+      if (!status.ok()) {
+        return status;
+      }
+      buffer.Size(*prefix_length);
+    }
+    return status_to_io_status(
+        provider_->CreateCipherStream(fname, options, prefix, stream));
+  }
+
+ public:
+  EncryptedFileSystemImpl(const std::shared_ptr<FileSystem>& base,
+                          const std::shared_ptr<EncryptionProvider>& provider)
+      : EncryptedFileSystem(base) {
+    provider_ = provider;
+    RegisterOptions("EncryptionProvider", &provider_, &encrypted_fs_type_info);
+  }
+
+  Status AddCipher(const std::string& descriptor, const char* cipher,
+                   size_t len, bool for_write) override {
+    return provider_->AddCipher(descriptor, cipher, len, for_write);
+  }
+
+  // NewSequentialFile opens a file for sequential reading.
+  IOStatus NewSequentialFile(const std::string& fname,
+                             const FileOptions& options,
+                             std::unique_ptr<FSSequentialFile>* result,
+                             IODebugContext* dbg) override {
+    result->reset();
+    if (options.use_mmap_reads) {
+      return IOStatus::InvalidArgument();
+    }
+    // Open file using underlying Env implementation
+    std::unique_ptr<FSSequentialFile> underlying;
+    auto status =
+        FileSystemWrapper::NewSequentialFile(fname, options, &underlying, dbg);
+    if (!status.ok()) {
+      return status;
+    }
+    uint64_t file_size;
+    status = FileSystemWrapper::GetFileSize(fname, options.io_options,
+                                            &file_size, dbg);
+    if (!status.ok()) {
+      return status;
+    }
+    if (!file_size) {
+      *result = std::move(underlying);
+      return status;
+    }
+    // Create cipher stream
+    std::unique_ptr<BlockAccessCipherStream> stream;
+    size_t prefix_length;
+    status = CreateSequentialCipherStream(fname, underlying, options,
+                                          &prefix_length, &stream, dbg);
+    if (status.ok()) {
+      result->reset(new EncryptedSequentialFile(
+          std::move(underlying), std::move(stream), prefix_length));
+    }
+    return status;
+  }
+
+  // NewRandomAccessFile opens a file for random read access.
+  IOStatus NewRandomAccessFile(const std::string& fname,
+                               const FileOptions& options,
+                               std::unique_ptr<FSRandomAccessFile>* result,
+                               IODebugContext* dbg) override {
+    result->reset();
+    if (options.use_mmap_reads) {
+      return IOStatus::InvalidArgument();
+    }
+    // Open file using underlying Env implementation
+    std::unique_ptr<FSRandomAccessFile> underlying;
+    auto status = FileSystemWrapper::NewRandomAccessFile(fname, options,
+                                                         &underlying, dbg);
+    if (!status.ok()) {
+      return status;
+    }
+    std::unique_ptr<BlockAccessCipherStream> stream;
+    size_t prefix_length;
+    status = CreateRandomReadCipherStream(fname, underlying, options,
+                                          &prefix_length, &stream, dbg);
+    if (status.ok()) {
+      if (stream) {
+        result->reset(new EncryptedRandomAccessFile(
+            std::move(underlying), std::move(stream), prefix_length));
+      } else {
+        result->reset(underlying.release());
+      }
+    }
+    return status;
+  }
+
+  // NewWritableFile opens a file for sequential writing.
+  IOStatus NewWritableFile(const std::string& fname, const FileOptions& options,
+                           std::unique_ptr<FSWritableFile>* result,
+                           IODebugContext* dbg) override {
+    result->reset();
+    if (options.use_mmap_writes) {
+      return IOStatus::InvalidArgument();
+    }
+    // Open file using underlying Env implementation
+    std::unique_ptr<FSWritableFile> underlying;
+    IOStatus status =
+        FileSystemWrapper::NewWritableFile(fname, options, &underlying, dbg);
+    if (!status.ok()) {
+      return status;
+    }
+    return CreateWritableEncryptedFile(fname, underlying, options, result, dbg);
+  }
+
+  // Create an object that writes to a new file with the specified
+  // name.  Deletes any existing file with the same name and creates a
+  // new file.  On success, stores a pointer to the new file in
+  // *result and returns OK.  On failure stores nullptr in *result and
+  // returns non-OK.
+  //
+  // The returned file will only be accessed by one thread at a time.
+  IOStatus ReopenWritableFile(const std::string& fname,
+                              const FileOptions& options,
+                              std::unique_ptr<FSWritableFile>* result,
+                              IODebugContext* dbg) override {
+    result->reset();
+    if (options.use_mmap_writes) {
+      return IOStatus::InvalidArgument();
+    }
+    // Open file using underlying Env implementation
+    std::unique_ptr<FSWritableFile> underlying;
+    IOStatus status =
+        FileSystemWrapper::ReopenWritableFile(fname, options, &underlying, dbg);
+    if (!status.ok()) {
+      return status;
+    }
+    return CreateWritableEncryptedFile(fname, underlying, options, result, dbg);
+  }
+
+  // Reuse an existing file by renaming it and opening it as writable.
+  IOStatus ReuseWritableFile(const std::string& fname,
+                             const std::string& old_fname,
+                             const FileOptions& options,
+                             std::unique_ptr<FSWritableFile>* result,
+                             IODebugContext* dbg) override {
+    result->reset();
+    if (options.use_mmap_writes) {
+      return IOStatus::InvalidArgument();
+    }
+    // Open file using underlying Env implementation
+    std::unique_ptr<FSWritableFile> underlying;
+    auto status = FileSystemWrapper::ReuseWritableFile(
+        fname, old_fname, options, &underlying, dbg);
+    if (!status.ok()) {
+      return status;
+    }
+    return CreateWritableEncryptedFile(fname, underlying, options, result, dbg);
+  }
+
+  // Open `fname` for random read and write, if file doesn't exist the file
+  // will be created.  On success, stores a pointer to the new file in
+  // *result and returns OK.  On failure returns non-OK.
+  //
+  // The returned file will only be accessed by one thread at a time.
+  IOStatus NewRandomRWFile(const std::string& fname, const FileOptions& options,
+                           std::unique_ptr<FSRandomRWFile>* result,
+                           IODebugContext* dbg) override {
+    result->reset();
+    if (options.use_mmap_reads || options.use_mmap_writes) {
+      return IOStatus::InvalidArgument();
+    }
+    // Check file exists
+    bool isNewFile = !FileExists(fname, options.io_options, dbg).ok();
+
+    // Open file using underlying Env implementation
+    std::unique_ptr<FSRandomRWFile> underlying;
+    auto status =
+        FileSystemWrapper::NewRandomRWFile(fname, options, &underlying, dbg);
+    if (!status.ok()) {
+      return status;
+    }
+    // Create cipher stream
+    std::unique_ptr<BlockAccessCipherStream> stream;
+    size_t prefix_length = 0;
+    if (!isNewFile) {
+      // File already exists, read prefix
+      status = CreateRandomReadCipherStream(fname, underlying, options,
+                                            &prefix_length, &stream, dbg);
+    } else {
+      status = CreateRandomWriteCipherStream(fname, underlying, options,
+                                             &prefix_length, &stream, dbg);
+    }
+    if (status.ok()) {
+      if (stream) {
+        result->reset(new EncryptedRandomRWFile(
+            std::move(underlying), std::move(stream), prefix_length));
+      } else {
+        result->reset(underlying.release());
+      }
+    }
+    return status;
+  }
+
+  // Store in *result the attributes of the children of the specified
+  // directory.
+  // In case the implementation lists the directory prior to iterating the
+  // files
+  // and files are concurrently deleted, the deleted files will be omitted
+  // from
+  // result.
+  // The name attributes are relative to "dir".
+  // Original contents of *results are dropped.
+  // Returns OK if "dir" exists and "*result" contains its children.
+  //         NotFound if "dir" does not exist, the calling process does not
+  //         have
+  //                  permission to access "dir", or if "dir" is invalid.
+  //         IOError if an IO Error was encountered
+  IOStatus GetChildrenFileAttributes(const std::string& dir,
+                                     const IOOptions& options,
+                                     std::vector<FileAttributes>* result,
+                                     IODebugContext* dbg) override {
+    auto status =
+        FileSystemWrapper::GetChildrenFileAttributes(dir, options, result, dbg);
+    if (!status.ok()) {
+      return status;
+    }
+    for (auto it = std::begin(*result); it != std::end(*result); ++it) {
+      // assert(it->size_bytes >= prefixLength);
+      //  breaks env_basic_test when called on directory containing
+      //  directories
+      // which makes subtraction of prefixLength worrisome since
+      // FileAttributes does not identify directories
+      EncryptionProvider* provider;
+      status = GetReadableProvider(it->name, &provider);
+      if (!status.ok()) {
+        return status;
+      } else if (provider != nullptr) {
+        it->size_bytes -= provider->GetPrefixLength();
+      }
+    }
+    return IOStatus::OK();
+  }
+
+  // Store the size of fname in *file_size.
+  IOStatus GetFileSize(const std::string& fname, const IOOptions& options,
+                       uint64_t* file_size, IODebugContext* dbg) override {
+    auto status =
+        FileSystemWrapper::GetFileSize(fname, options, file_size, dbg);
+    if (!status.ok() || !(*file_size)) {
+      return status;
+    }
+    EncryptionProvider* provider;
+    status = GetReadableProvider(fname, &provider);
+    if (provider != nullptr && status.ok()) {
+      size_t prefixLength = provider->GetPrefixLength();
+      assert(*file_size >= prefixLength);
+      *file_size -= prefixLength;
+    }
+    return status;
+  }
+
+ private:
+  std::shared_ptr<EncryptionProvider> provider_;
+};
+}  // namespace
+
+Status NewEncryptedFileSystemImpl(
+    const std::shared_ptr<FileSystem>& base,
+    const std::shared_ptr<EncryptionProvider>& provider,
+    std::unique_ptr<FileSystem>* result) {
+  result->reset(new EncryptedFileSystemImpl(base, provider));
+  return Status::OK();
+}
+
+std::shared_ptr<FileSystem> NewEncryptedFS(
+    const std::shared_ptr<FileSystem>& base,
+    const std::shared_ptr<EncryptionProvider>& provider) {
+  std::unique_ptr<FileSystem> efs;
+  Status s = NewEncryptedFileSystemImpl(base, provider, &efs);
+  if (s.ok()) {
+    s = efs->PrepareOptions(ConfigOptions());
+  }
+  if (s.ok()) {
+    std::shared_ptr<FileSystem> result(efs.release());
+    return result;
+  } else {
+    return nullptr;
+  }
+}
+// Returns an Env that encrypts data when stored on disk and decrypts data when
+// read from disk.
+Env* NewEncryptedEnv(Env* base_env,
+                     const std::shared_ptr<EncryptionProvider>& provider) {
+  return new CompositeEnvWrapper(
+      base_env, NewEncryptedFS(base_env->GetFileSystem(), provider));
+}
+
+// Encrypt one or more (partial) blocks of data at the file offset.
+// Length of data is given in dataSize.
+Status BlockAccessCipherStream::Encrypt(uint64_t fileOffset, char* data,
+                                        size_t dataSize) {
+  // Calculate block index
+  auto blockSize = BlockSize();
+  uint64_t blockIndex = fileOffset / blockSize;
+  size_t blockOffset = fileOffset % blockSize;
+  std::unique_ptr<char[]> blockBuffer;
+
+  std::string scratch;
+  AllocateScratch(scratch);
+
+  // Encrypt individual blocks.
+  while (1) {
+    char* block = data;
+    size_t n = std::min(dataSize, blockSize - blockOffset);
+    if (n != blockSize) {
+      // We're not encrypting a full block.
+      // Copy data to blockBuffer
+      if (!blockBuffer.get()) {
+        // Allocate buffer
+        blockBuffer = std::unique_ptr<char[]>(new char[blockSize]);
+      }
+      block = blockBuffer.get();
+      // Copy plain data to block buffer
+      memmove(block + blockOffset, data, n);
+    }
+    auto status = EncryptBlock(blockIndex, block, (char*)scratch.data());
+    if (!status.ok()) {
+      return status;
+    }
+    if (block != data) {
+      // Copy encrypted data back to `data`.
+      memmove(data, block + blockOffset, n);
+    }
+    dataSize -= n;
+    if (dataSize == 0) {
+      return Status::OK();
+    }
+    data += n;
+    blockOffset = 0;
+    blockIndex++;
+  }
+}
+
+// Decrypt one or more (partial) blocks of data at the file offset.
+// Length of data is given in dataSize.
+Status BlockAccessCipherStream::Decrypt(uint64_t fileOffset, char* data,
+                                        size_t dataSize) {
+  // Calculate block index
+  auto blockSize = BlockSize();
+  uint64_t blockIndex = fileOffset / blockSize;
+  size_t blockOffset = fileOffset % blockSize;
+  std::unique_ptr<char[]> blockBuffer;
+
+  std::string scratch;
+  AllocateScratch(scratch);
+
+  // Decrypt individual blocks.
+  while (1) {
+    char* block = data;
+    size_t n = std::min(dataSize, blockSize - blockOffset);
+    if (n != blockSize) {
+      // We're not decrypting a full block.
+      // Copy data to blockBuffer
+      if (!blockBuffer.get()) {
+        // Allocate buffer
+        blockBuffer = std::unique_ptr<char[]>(new char[blockSize]);
+      }
+      block = blockBuffer.get();
+      // Copy encrypted data to block buffer
+      memmove(block + blockOffset, data, n);
+    }
+    auto status = DecryptBlock(blockIndex, block, (char*)scratch.data());
+    if (!status.ok()) {
+      return status;
+    }
+    if (block != data) {
+      // Copy decrypted data back to `data`.
+      memmove(data, block + blockOffset, n);
+    }
+
+    // Simply decrementing dataSize by n could cause it to underflow,
+    // which will very likely make it read over the original bounds later
+    assert(dataSize >= n);
+    if (dataSize < n) {
+      return Status::Corruption("Cannot decrypt data at given offset");
+    }
+
+    dataSize -= n;
+    if (dataSize == 0) {
+      return Status::OK();
+    }
+    data += n;
+    blockOffset = 0;
+    blockIndex++;
+  }
+}
+
+namespace {
+static std::unordered_map<std::string, OptionTypeInfo>
+    rot13_block_cipher_type_info = {
+        {"block_size",
+         {0 /* No offset, whole struct*/, OptionType::kInt,
+          OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+};
+// Implements a BlockCipher using ROT13.
+//
+// Note: This is a sample implementation of BlockCipher,
+// it is NOT considered safe and should NOT be used in production.
+class ROT13BlockCipher : public BlockCipher {
+ private:
+  size_t blockSize_;
+
+ public:
+  explicit ROT13BlockCipher(size_t blockSize) : blockSize_(blockSize) {
+    RegisterOptions("ROT13BlockCipherOptions", &blockSize_,
+                    &rot13_block_cipher_type_info);
+  }
+
+  static const char* kClassName() { return "ROT13"; }
+  const char* Name() const override { return kClassName(); }
+  // BlockSize returns the size of each block supported by this cipher stream.
+  size_t BlockSize() override { return blockSize_; }
+
+  // Encrypt a block of data.
+  // Length of data is equal to BlockSize().
+  Status Encrypt(char* data) override {
+    for (size_t i = 0; i < blockSize_; ++i) {
+      data[i] += 13;
+    }
+    return Status::OK();
+  }
+
+  // Decrypt a block of data.
+  // Length of data is equal to BlockSize().
+  Status Decrypt(char* data) override { return Encrypt(data); }
+};
+static const std::unordered_map<std::string, OptionTypeInfo>
+    ctr_encryption_provider_type_info = {
+        {"cipher",
+         OptionTypeInfo::AsCustomSharedPtr<BlockCipher>(
+             0 /* No offset, whole struct*/, OptionVerificationType::kByName,
+             OptionTypeFlags::kNone)},
+};
+}  // anonymous namespace
+
+// Allocate scratch space which is passed to EncryptBlock/DecryptBlock.
+void CTRCipherStream::AllocateScratch(std::string& scratch) {
+  auto blockSize = cipher_->BlockSize();
+  scratch.reserve(blockSize);
+}
+
+// Encrypt a block of data at the given block index.
+// Length of data is equal to BlockSize();
+Status CTRCipherStream::EncryptBlock(uint64_t blockIndex, char* data,
+                                     char* scratch) {
+  // Create nonce + counter
+  auto blockSize = cipher_->BlockSize();
+  memmove(scratch, iv_.data(), blockSize);
+  EncodeFixed64(scratch, blockIndex + initialCounter_);
+
+  // Encrypt nonce+counter
+  auto status = cipher_->Encrypt(scratch);
+  if (!status.ok()) {
+    return status;
+  }
+
+  // XOR data with ciphertext.
+  for (size_t i = 0; i < blockSize; i++) {
+    data[i] = data[i] ^ scratch[i];
+  }
+  return Status::OK();
+}
+
+// Decrypt a block of data at the given block index.
+// Length of data is equal to BlockSize();
+Status CTRCipherStream::DecryptBlock(uint64_t blockIndex, char* data,
+                                     char* scratch) {
+  // For CTR decryption & encryption are the same
+  return EncryptBlock(blockIndex, data, scratch);
+}
+
+CTREncryptionProvider::CTREncryptionProvider(
+    const std::shared_ptr<BlockCipher>& c)
+    : cipher_(c) {
+  RegisterOptions("Cipher", &cipher_, &ctr_encryption_provider_type_info);
+}
+
+bool CTREncryptionProvider::IsInstanceOf(const std::string& name) const {
+  // Special case for test purposes.
+  if (name == "1://test" && cipher_ != nullptr) {
+    return cipher_->IsInstanceOf(ROT13BlockCipher::kClassName());
+  } else {
+    return EncryptionProvider::IsInstanceOf(name);
+  }
+}
+
+// GetPrefixLength returns the length of the prefix that is added to every file
+// and used for storing encryption options.
+// For optimal performance, the prefix length should be a multiple of
+// the page size.
+size_t CTREncryptionProvider::GetPrefixLength() const {
+  return defaultPrefixLength;
+}
+
+Status CTREncryptionProvider::AddCipher(const std::string& /*descriptor*/,
+                                        const char* cipher, size_t len,
+                                        bool /*for_write*/) {
+  if (cipher_) {
+    return Status::NotSupported("Cannot add keys to CTREncryptionProvider");
+  } else if (strcmp(ROT13BlockCipher::kClassName(), cipher) == 0) {
+    cipher_.reset(new ROT13BlockCipher(len));
+    return Status::OK();
+  } else {
+    return BlockCipher::CreateFromString(ConfigOptions(), std::string(cipher),
+                                         &cipher_);
+  }
+}
+
+// decodeCTRParameters decodes the initial counter & IV from the given
+// (plain text) prefix.
+static void decodeCTRParameters(const char* prefix, size_t blockSize,
+                                uint64_t& initialCounter, Slice& iv) {
+  // First block contains 64-bit initial counter
+  initialCounter = DecodeFixed64(prefix);
+  // Second block contains IV
+  iv = Slice(prefix + blockSize, blockSize);
+}
+
+// CreateNewPrefix initialized an allocated block of prefix memory
+// for a new file.
+Status CTREncryptionProvider::CreateNewPrefix(const std::string& /*fname*/,
+                                              char* prefix,
+                                              size_t prefixLength) const {
+  if (!cipher_) {
+    return Status::InvalidArgument("Encryption Cipher is missing");
+  }
+  // Create & seed rnd.
+  Random rnd((uint32_t)SystemClock::Default()->NowMicros());
+  // Fill entire prefix block with random values.
+  for (size_t i = 0; i < prefixLength; i++) {
+    prefix[i] = rnd.Uniform(256) & 0xFF;
+  }
+  // Take random data to extract initial counter & IV
+  auto blockSize = cipher_->BlockSize();
+  uint64_t initialCounter;
+  Slice prefixIV;
+  decodeCTRParameters(prefix, blockSize, initialCounter, prefixIV);
+
+  // Now populate the rest of the prefix, starting from the third block.
+  PopulateSecretPrefixPart(prefix + (2 * blockSize),
+                           prefixLength - (2 * blockSize), blockSize);
+
+  // Encrypt the prefix, starting from block 2 (leave block 0, 1 with initial
+  // counter & IV unencrypted)
+  CTRCipherStream cipherStream(cipher_, prefixIV.data(), initialCounter);
+  Status status;
+  {
+    PERF_TIMER_GUARD(encrypt_data_nanos);
+    status = cipherStream.Encrypt(0, prefix + (2 * blockSize),
+                                  prefixLength - (2 * blockSize));
+  }
+  if (!status.ok()) {
+    return status;
+  }
+  return Status::OK();
+}
+
+// PopulateSecretPrefixPart initializes the data into a new prefix block
+// in plain text.
+// Returns the amount of space (starting from the start of the prefix)
+// that has been initialized.
+size_t CTREncryptionProvider::PopulateSecretPrefixPart(
+    char* /*prefix*/, size_t /*prefixLength*/, size_t /*blockSize*/) const {
+  // Nothing to do here, put in custom data in override when needed.
+  return 0;
+}
+
+Status CTREncryptionProvider::CreateCipherStream(
+    const std::string& fname, const EnvOptions& options, Slice& prefix,
+    std::unique_ptr<BlockAccessCipherStream>* result) {
+  if (!cipher_) {
+    return Status::InvalidArgument("Encryption Cipher is missing");
+  }
+  // Read plain text part of prefix.
+  auto blockSize = cipher_->BlockSize();
+  uint64_t initialCounter;
+  Slice iv;
+  decodeCTRParameters(prefix.data(), blockSize, initialCounter, iv);
+
+  // If the prefix is smaller than twice the block size, we would below read a
+  // very large chunk of the file (and very likely read over the bounds)
+  assert(prefix.size() >= 2 * blockSize);
+  if (prefix.size() < 2 * blockSize) {
+    return Status::Corruption("Unable to read from file " + fname +
+                              ": read attempt would read beyond file bounds");
+  }
+
+  // Decrypt the encrypted part of the prefix, starting from block 2 (block 0, 1
+  // with initial counter & IV are unencrypted)
+  CTRCipherStream cipherStream(cipher_, iv.data(), initialCounter);
+  Status status;
+  {
+    PERF_TIMER_GUARD(decrypt_data_nanos);
+    status = cipherStream.Decrypt(0, (char*)prefix.data() + (2 * blockSize),
+                                  prefix.size() - (2 * blockSize));
+  }
+  if (!status.ok()) {
+    return status;
+  }
+
+  // Create cipher stream
+  return CreateCipherStreamFromPrefix(fname, options, initialCounter, iv,
+                                      prefix, result);
+}
+
+// CreateCipherStreamFromPrefix creates a block access cipher stream for a file
+// given given name and options. The given prefix is already decrypted.
+Status CTREncryptionProvider::CreateCipherStreamFromPrefix(
+    const std::string& /*fname*/, const EnvOptions& /*options*/,
+    uint64_t initialCounter, const Slice& iv, const Slice& /*prefix*/,
+    std::unique_ptr<BlockAccessCipherStream>* result) {
+  (*result) = std::unique_ptr<BlockAccessCipherStream>(
+      new CTRCipherStream(cipher_, iv.data(), initialCounter));
+  return Status::OK();
+}
+
+namespace {
+static void RegisterEncryptionBuiltins() {
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    auto lib = ObjectRegistry::Default()->AddLibrary("encryption");
+    // Match "CTR" or "CTR://test"
+    lib->AddFactory<EncryptionProvider>(
+        ObjectLibrary::PatternEntry(CTREncryptionProvider::kClassName(), true)
+            .AddSuffix("://test"),
+        [](const std::string& uri, std::unique_ptr<EncryptionProvider>* guard,
+           std::string* /*errmsg*/) {
+          if (EndsWith(uri, "://test")) {
+            std::shared_ptr<BlockCipher> cipher =
+                std::make_shared<ROT13BlockCipher>(32);
+            guard->reset(new CTREncryptionProvider(cipher));
+          } else {
+            guard->reset(new CTREncryptionProvider());
+          }
+          return guard->get();
+        });
+
+    lib->AddFactory<EncryptionProvider>(
+        "1://test", [](const std::string& /*uri*/,
+                       std::unique_ptr<EncryptionProvider>* guard,
+                       std::string* /*errmsg*/) {
+          std::shared_ptr<BlockCipher> cipher =
+              std::make_shared<ROT13BlockCipher>(32);
+          guard->reset(new CTREncryptionProvider(cipher));
+          return guard->get();
+        });
+
+    // Match "ROT13" or "ROT13:[0-9]+"
+    lib->AddFactory<BlockCipher>(
+        ObjectLibrary::PatternEntry(ROT13BlockCipher::kClassName(), true)
+            .AddNumber(":"),
+        [](const std::string& uri, std::unique_ptr<BlockCipher>* guard,
+           std::string* /* errmsg */) {
+          size_t colon = uri.find(':');
+          if (colon != std::string::npos) {
+            size_t block_size = ParseSizeT(uri.substr(colon + 1));
+            guard->reset(new ROT13BlockCipher(block_size));
+          } else {
+            guard->reset(new ROT13BlockCipher(32));
+          }
+
+          return guard->get();
+        });
+  });
+}
+}  // namespace
+
+Status BlockCipher::CreateFromString(const ConfigOptions& config_options,
+                                     const std::string& value,
+                                     std::shared_ptr<BlockCipher>* result) {
+  RegisterEncryptionBuiltins();
+  return LoadSharedObject<BlockCipher>(config_options, value, result);
+}
+
+Status EncryptionProvider::CreateFromString(
+    const ConfigOptions& config_options, const std::string& value,
+    std::shared_ptr<EncryptionProvider>* result) {
+  RegisterEncryptionBuiltins();
+  return LoadSharedObject<EncryptionProvider>(config_options, value, result);
+}
+
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_encryption_ctr.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_encryption_ctr.h
new file mode 100644
index 0000000000..fab1da1fbf
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_encryption_ctr.h
@@ -0,0 +1,114 @@
+//  Copyright (c) 2016-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include "rocksdb/env_encryption.h"
+
+namespace ROCKSDB_NAMESPACE {
+// CTRCipherStream implements BlockAccessCipherStream using an
+// Counter operations mode.
+// See https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation
+//
+// Note: This is a possible implementation of BlockAccessCipherStream,
+// it is considered suitable for use.
+class CTRCipherStream final : public BlockAccessCipherStream {
+ private:
+  std::shared_ptr<BlockCipher> cipher_;
+  std::string iv_;
+  uint64_t initialCounter_;
+
+ public:
+  CTRCipherStream(const std::shared_ptr<BlockCipher>& c, const char* iv,
+                  uint64_t initialCounter)
+      : cipher_(c), iv_(iv, c->BlockSize()), initialCounter_(initialCounter){};
+  virtual ~CTRCipherStream(){};
+
+  // BlockSize returns the size of each block supported by this cipher stream.
+  size_t BlockSize() override { return cipher_->BlockSize(); }
+
+ protected:
+  // Allocate scratch space which is passed to EncryptBlock/DecryptBlock.
+  void AllocateScratch(std::string&) override;
+
+  // Encrypt a block of data at the given block index.
+  // Length of data is equal to BlockSize();
+  Status EncryptBlock(uint64_t blockIndex, char* data, char* scratch) override;
+
+  // Decrypt a block of data at the given block index.
+  // Length of data is equal to BlockSize();
+  Status DecryptBlock(uint64_t blockIndex, char* data, char* scratch) override;
+};
+
+// This encryption provider uses a CTR cipher stream, with a given block cipher
+// and IV.
+//
+// Note: This is a possible implementation of EncryptionProvider,
+// it is considered suitable for use, provided a safe BlockCipher is used.
+class CTREncryptionProvider : public EncryptionProvider {
+ private:
+  std::shared_ptr<BlockCipher> cipher_;
+
+ protected:
+  // For optimal performance when using direct IO, the prefix length should be a
+  // multiple of the page size. This size is to ensure the first real data byte
+  // is placed at largest known alignment point for direct io.
+  const static size_t defaultPrefixLength = 4096;
+
+ public:
+  explicit CTREncryptionProvider(
+      const std::shared_ptr<BlockCipher>& c = nullptr);
+  virtual ~CTREncryptionProvider() {}
+
+  static const char* kClassName() { return "CTR"; }
+  const char* Name() const override { return kClassName(); }
+  bool IsInstanceOf(const std::string& name) const override;
+  // GetPrefixLength returns the length of the prefix that is added to every
+  // file
+  // and used for storing encryption options.
+  // For optimal performance when using direct IO, the prefix length should be a
+  // multiple of the page size.
+  size_t GetPrefixLength() const override;
+
+  // CreateNewPrefix initialized an allocated block of prefix memory
+  // for a new file.
+  Status CreateNewPrefix(const std::string& fname, char* prefix,
+                         size_t prefixLength) const override;
+
+  // CreateCipherStream creates a block access cipher stream for a file given
+  // given name and options.
+  Status CreateCipherStream(
+      const std::string& fname, const EnvOptions& options, Slice& prefix,
+      std::unique_ptr<BlockAccessCipherStream>* result) override;
+
+  Status AddCipher(const std::string& descriptor, const char* /*cipher*/,
+                   size_t /*len*/, bool /*for_write*/) override;
+
+ protected:
+  // PopulateSecretPrefixPart initializes the data into a new prefix block
+  // that will be encrypted. This function will store the data in plain text.
+  // It will be encrypted later (before written to disk).
+  // Returns the amount of space (starting from the start of the prefix)
+  // that has been initialized.
+  virtual size_t PopulateSecretPrefixPart(char* prefix, size_t prefixLength,
+                                          size_t blockSize) const;
+
+  // CreateCipherStreamFromPrefix creates a block access cipher stream for a
+  // file given
+  // given name and options. The given prefix is already decrypted.
+  virtual Status CreateCipherStreamFromPrefix(
+      const std::string& fname, const EnvOptions& options,
+      uint64_t initialCounter, const Slice& iv, const Slice& prefix,
+      std::unique_ptr<BlockAccessCipherStream>* result);
+};
+
+Status NewEncryptedFileSystemImpl(
+    const std::shared_ptr<FileSystem>& base_fs,
+    const std::shared_ptr<EncryptionProvider>& provider,
+    std::unique_ptr<FileSystem>* fs);
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_posix.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_posix.cc
new file mode 100644
index 0000000000..400b8ac6ee
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/env_posix.cc
@@ -0,0 +1,524 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors
+
+#include "port/lang.h"
+#if !defined(OS_WIN)
+
+#include <dirent.h>
+#ifndef ROCKSDB_NO_DYNAMIC_EXTENSION
+#include <dlfcn.h>
+#endif
+#include <errno.h>
+#include <fcntl.h>
+
+#if defined(ROCKSDB_IOURING_PRESENT)
+#include <liburing.h>
+#endif
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#if defined(OS_LINUX) || defined(OS_SOLARIS) || defined(OS_ANDROID)
+#include <sys/statfs.h>
+#endif
+#include <sys/statvfs.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#if defined(ROCKSDB_IOURING_PRESENT)
+#include <sys/uio.h>
+#endif
+#include <time.h>
+#include <unistd.h>
+
+#include <algorithm>
+// Get nano time includes
+#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD)
+#elif defined(__MACH__)
+#include <Availability.h>
+#include <mach/clock.h>
+#include <mach/mach.h>
+#else
+#include <chrono>
+#endif
+#include <deque>
+#include <set>
+#include <vector>
+
+#include "env/composite_env_wrapper.h"
+#include "env/io_posix.h"
+#include "monitoring/iostats_context_imp.h"
+#include "monitoring/thread_status_updater.h"
+#include "port/port.h"
+#include "port/sys_time.h"
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/system_clock.h"
+#include "test_util/sync_point.h"
+#include "util/coding.h"
+#include "util/compression_context_cache.h"
+#include "util/random.h"
+#include "util/string_util.h"
+#include "util/thread_local.h"
+#include "util/threadpool_imp.h"
+
+#if !defined(TMPFS_MAGIC)
+#define TMPFS_MAGIC 0x01021994
+#endif
+#if !defined(XFS_SUPER_MAGIC)
+#define XFS_SUPER_MAGIC 0x58465342
+#endif
+#if !defined(EXT4_SUPER_MAGIC)
+#define EXT4_SUPER_MAGIC 0xEF53
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+#if defined(OS_WIN)
+static const std::string kSharedLibExt = ".dll";
+static const char kPathSeparator = ';';
+#else
+static const char kPathSeparator = ':';
+#if defined(OS_MACOSX)
+static const std::string kSharedLibExt = ".dylib";
+#else
+static const std::string kSharedLibExt = ".so";
+#endif
+#endif
+
+namespace {
+
+ThreadStatusUpdater* CreateThreadStatusUpdater() {
+  return new ThreadStatusUpdater();
+}
+
+#ifndef ROCKSDB_NO_DYNAMIC_EXTENSION
+class PosixDynamicLibrary : public DynamicLibrary {
+ public:
+  PosixDynamicLibrary(const std::string& name, void* handle)
+      : name_(name), handle_(handle) {}
+  ~PosixDynamicLibrary() override { dlclose(handle_); }
+
+  Status LoadSymbol(const std::string& sym_name, void** func) override {
+    assert(nullptr != func);
+    dlerror();  // Clear any old error
+    *func = dlsym(handle_, sym_name.c_str());
+    if (*func != nullptr) {
+      return Status::OK();
+    } else {
+      char* err = dlerror();
+      return Status::NotFound("Error finding symbol: " + sym_name, err);
+    }
+  }
+
+  const char* Name() const override { return name_.c_str(); }
+
+ private:
+  std::string name_;
+  void* handle_;
+};
+#endif  // !ROCKSDB_NO_DYNAMIC_EXTENSION
+
+class PosixClock : public SystemClock {
+ public:
+  static const char* kClassName() { return "PosixClock"; }
+  const char* Name() const override { return kDefaultName(); }
+  const char* NickName() const override { return kClassName(); }
+
+  uint64_t NowMicros() override {
+    port::TimeVal tv;
+    port::GetTimeOfDay(&tv, nullptr);
+    return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+  }
+
+  uint64_t NowNanos() override {
+#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \
+    defined(OS_AIX)
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
+#elif defined(OS_SOLARIS)
+    return gethrtime();
+#elif defined(__MACH__)
+    clock_serv_t cclock;
+    mach_timespec_t ts;
+    host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
+    clock_get_time(cclock, &ts);
+    mach_port_deallocate(mach_task_self(), cclock);
+    return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
+#else
+    return std::chrono::duration_cast<std::chrono::nanoseconds>(
+               std::chrono::steady_clock::now().time_since_epoch())
+        .count();
+#endif
+  }
+
+  uint64_t CPUMicros() override {
+#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \
+    defined(OS_AIX) || (defined(__MACH__) && defined(__MAC_10_12))
+    struct timespec ts;
+    clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
+    return (static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec) / 1000;
+#endif
+    return 0;
+  }
+
+  uint64_t CPUNanos() override {
+#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \
+    defined(OS_AIX) || (defined(__MACH__) && defined(__MAC_10_12))
+    struct timespec ts;
+    clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
+    return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
+#endif
+    return 0;
+  }
+
+  void SleepForMicroseconds(int micros) override { usleep(micros); }
+
+  Status GetCurrentTime(int64_t* unix_time) override {
+    time_t ret = time(nullptr);
+    if (ret == (time_t)-1) {
+      return IOError("GetCurrentTime", "", errno);
+    }
+    *unix_time = (int64_t)ret;
+    return Status::OK();
+  }
+
+  std::string TimeToString(uint64_t secondsSince1970) override {
+    const time_t seconds = (time_t)secondsSince1970;
+    struct tm t;
+    int maxsize = 64;
+    std::string dummy;
+    dummy.reserve(maxsize);
+    dummy.resize(maxsize);
+    char* p = &dummy[0];
+    port::LocalTimeR(&seconds, &t);
+    snprintf(p, maxsize, "%04d/%02d/%02d-%02d:%02d:%02d ", t.tm_year + 1900,
+             t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec);
+    return dummy;
+  }
+};
+
+class PosixEnv : public CompositeEnv {
+ public:
+  static const char* kClassName() { return "PosixEnv"; }
+  const char* Name() const override { return kClassName(); }
+  const char* NickName() const override { return kDefaultName(); }
+
+  struct JoinThreadsOnExit {
+    explicit JoinThreadsOnExit(PosixEnv& _deflt) : deflt(_deflt) {}
+    ~JoinThreadsOnExit() {
+      for (const auto tid : deflt.threads_to_join_) {
+        pthread_join(tid, nullptr);
+      }
+      for (int pool_id = 0; pool_id < Env::Priority::TOTAL; ++pool_id) {
+        deflt.thread_pools_[pool_id].JoinAllThreads();
+      }
+      // Do not delete the thread_status_updater_ in order to avoid the
+      // free after use when Env::Default() is destructed while some other
+      // child threads are still trying to update thread status. All
+      // PosixEnv instances use the same thread_status_updater_, so never
+      // explicitly delete it.
+    }
+    PosixEnv& deflt;
+  };
+
+  void SetFD_CLOEXEC(int fd, const EnvOptions* options) {
+    if ((options == nullptr || options->set_fd_cloexec) && fd > 0) {
+      fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+    }
+  }
+
+#ifndef ROCKSDB_NO_DYNAMIC_EXTENSION
+  // Loads the named library into the result.
+  // If the input name is empty, the current executable is loaded
+  // On *nix systems, a "lib" prefix is added to the name if one is not supplied
+  // Comparably, the appropriate shared library extension is added to the name
+  // if not supplied. If search_path is not specified, the shared library will
+  // be loaded using the default path (LD_LIBRARY_PATH) If search_path is
+  // specified, the shared library will be searched for in the directories
+  // provided by the search path
+  Status LoadLibrary(const std::string& name, const std::string& path,
+                     std::shared_ptr<DynamicLibrary>* result) override {
+    assert(result != nullptr);
+    if (name.empty()) {
+      void* hndl = dlopen(NULL, RTLD_NOW);
+      if (hndl != nullptr) {
+        result->reset(new PosixDynamicLibrary(name, hndl));
+        return Status::OK();
+      }
+    } else {
+      std::string library_name = name;
+      if (library_name.find(kSharedLibExt) == std::string::npos) {
+        library_name = library_name + kSharedLibExt;
+      }
+#if !defined(OS_WIN)
+      if (library_name.find('/') == std::string::npos &&
+          library_name.compare(0, 3, "lib") != 0) {
+        library_name = "lib" + library_name;
+      }
+#endif
+      if (path.empty()) {
+        void* hndl = dlopen(library_name.c_str(), RTLD_NOW);
+        if (hndl != nullptr) {
+          result->reset(new PosixDynamicLibrary(library_name, hndl));
+          return Status::OK();
+        }
+      } else {
+        std::string local_path;
+        std::stringstream ss(path);
+        while (getline(ss, local_path, kPathSeparator)) {
+          if (!path.empty()) {
+            std::string full_name = local_path + "/" + library_name;
+            void* hndl = dlopen(full_name.c_str(), RTLD_NOW);
+            if (hndl != nullptr) {
+              result->reset(new PosixDynamicLibrary(full_name, hndl));
+              return Status::OK();
+            }
+          }
+        }
+      }
+    }
+    return Status::IOError(
+        IOErrorMsg("Failed to open shared library: xs", name), dlerror());
+  }
+#endif  // !ROCKSDB_NO_DYNAMIC_EXTENSION
+
+  void Schedule(void (*function)(void* arg1), void* arg, Priority pri = LOW,
+                void* tag = nullptr,
+                void (*unschedFunction)(void* arg) = nullptr) override;
+
+  int UnSchedule(void* arg, Priority pri) override;
+
+  void StartThread(void (*function)(void* arg), void* arg) override;
+
+  void WaitForJoin() override;
+
+  unsigned int GetThreadPoolQueueLen(Priority pri = LOW) const override;
+
+  int ReserveThreads(int threads_to_be_reserved, Priority pri) override;
+
+  int ReleaseThreads(int threads_to_be_released, Priority pri) override;
+
+  Status GetThreadList(std::vector<ThreadStatus>* thread_list) override {
+    assert(thread_status_updater_);
+    return thread_status_updater_->GetThreadList(thread_list);
+  }
+
+  uint64_t GetThreadID() const override {
+    uint64_t thread_id = 0;
+#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 30)
+    thread_id = ::gettid();
+#else   // __GLIBC_PREREQ(2, 30)
+    pthread_t tid = pthread_self();
+    memcpy(&thread_id, &tid, std::min(sizeof(thread_id), sizeof(tid)));
+#endif  // __GLIBC_PREREQ(2, 30)
+#else   // defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ)
+    pthread_t tid = pthread_self();
+    memcpy(&thread_id, &tid, std::min(sizeof(thread_id), sizeof(tid)));
+#endif  // defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ)
+    return thread_id;
+  }
+
+  Status GetHostName(char* name, uint64_t len) override {
+    int ret = gethostname(name, static_cast<size_t>(len));
+    if (ret < 0) {
+      if (errno == EFAULT || errno == EINVAL) {
+        return Status::InvalidArgument(errnoStr(errno).c_str());
+      } else {
+        return IOError("GetHostName", name, errno);
+      }
+    }
+    return Status::OK();
+  }
+
+  ThreadStatusUpdater* GetThreadStatusUpdater() const override {
+    return Env::GetThreadStatusUpdater();
+  }
+
+  std::string GenerateUniqueId() override { return Env::GenerateUniqueId(); }
+
+  // Allow increasing the number of worker threads.
+  void SetBackgroundThreads(int num, Priority pri) override {
+    assert(pri >= Priority::BOTTOM && pri <= Priority::HIGH);
+    thread_pools_[pri].SetBackgroundThreads(num);
+  }
+
+  int GetBackgroundThreads(Priority pri) override {
+    assert(pri >= Priority::BOTTOM && pri <= Priority::HIGH);
+    return thread_pools_[pri].GetBackgroundThreads();
+  }
+
+  Status SetAllowNonOwnerAccess(bool allow_non_owner_access) override {
+    allow_non_owner_access_ = allow_non_owner_access;
+    return Status::OK();
+  }
+
+  // Allow increasing the number of worker threads.
+  void IncBackgroundThreadsIfNeeded(int num, Priority pri) override {
+    assert(pri >= Priority::BOTTOM && pri <= Priority::HIGH);
+    thread_pools_[pri].IncBackgroundThreadsIfNeeded(num);
+  }
+
+  void LowerThreadPoolIOPriority(Priority pool) override {
+    assert(pool >= Priority::BOTTOM && pool <= Priority::HIGH);
+#ifdef OS_LINUX
+    thread_pools_[pool].LowerIOPriority();
+#else
+    (void)pool;
+#endif
+  }
+
+  void LowerThreadPoolCPUPriority(Priority pool) override {
+    assert(pool >= Priority::BOTTOM && pool <= Priority::HIGH);
+    thread_pools_[pool].LowerCPUPriority(CpuPriority::kLow);
+  }
+
+  Status LowerThreadPoolCPUPriority(Priority pool, CpuPriority pri) override {
+    assert(pool >= Priority::BOTTOM && pool <= Priority::HIGH);
+    thread_pools_[pool].LowerCPUPriority(pri);
+    return Status::OK();
+  }
+
+ private:
+  friend Env* Env::Default();
+  // Constructs the default Env, a singleton
+  PosixEnv();
+
+  // The below 4 members are only used by the default PosixEnv instance.
+  // Non-default instances simply maintain references to the backing
+  // members in te default instance
+  std::vector<ThreadPoolImpl> thread_pools_storage_;
+  pthread_mutex_t mu_storage_;
+  std::vector<pthread_t> threads_to_join_storage_;
+  bool allow_non_owner_access_storage_;
+
+  std::vector<ThreadPoolImpl>& thread_pools_;
+  pthread_mutex_t& mu_;
+  std::vector<pthread_t>& threads_to_join_;
+  // If true, allow non owner read access for db files. Otherwise, non-owner
+  //  has no access to db files.
+  bool& allow_non_owner_access_;
+};
+
+PosixEnv::PosixEnv()
+    : CompositeEnv(FileSystem::Default(), SystemClock::Default()),
+      thread_pools_storage_(Priority::TOTAL),
+      allow_non_owner_access_storage_(true),
+      thread_pools_(thread_pools_storage_),
+      mu_(mu_storage_),
+      threads_to_join_(threads_to_join_storage_),
+      allow_non_owner_access_(allow_non_owner_access_storage_) {
+  ThreadPoolImpl::PthreadCall("mutex_init", pthread_mutex_init(&mu_, nullptr));
+  for (int pool_id = 0; pool_id < Env::Priority::TOTAL; ++pool_id) {
+    thread_pools_[pool_id].SetThreadPriority(
+        static_cast<Env::Priority>(pool_id));
+    // This allows later initializing the thread-local-env of each thread.
+    thread_pools_[pool_id].SetHostEnv(this);
+  }
+  thread_status_updater_ = CreateThreadStatusUpdater();
+}
+
+void PosixEnv::Schedule(void (*function)(void* arg1), void* arg, Priority pri,
+                        void* tag, void (*unschedFunction)(void* arg)) {
+  assert(pri >= Priority::BOTTOM && pri <= Priority::HIGH);
+  thread_pools_[pri].Schedule(function, arg, tag, unschedFunction);
+}
+
+int PosixEnv::UnSchedule(void* arg, Priority pri) {
+  return thread_pools_[pri].UnSchedule(arg);
+}
+
+unsigned int PosixEnv::GetThreadPoolQueueLen(Priority pri) const {
+  assert(pri >= Priority::BOTTOM && pri <= Priority::HIGH);
+  return thread_pools_[pri].GetQueueLen();
+}
+
+int PosixEnv::ReserveThreads(int threads_to_reserved, Priority pri) {
+  assert(pri >= Priority::BOTTOM && pri <= Priority::HIGH);
+  return thread_pools_[pri].ReserveThreads(threads_to_reserved);
+}
+
+int PosixEnv::ReleaseThreads(int threads_to_released, Priority pri) {
+  assert(pri >= Priority::BOTTOM && pri <= Priority::HIGH);
+  return thread_pools_[pri].ReleaseThreads(threads_to_released);
+}
+
+struct StartThreadState {
+  void (*user_function)(void*);
+  void* arg;
+};
+
+static void* StartThreadWrapper(void* arg) {
+  StartThreadState* state = reinterpret_cast<StartThreadState*>(arg);
+  state->user_function(state->arg);
+  delete state;
+  return nullptr;
+}
+
+void PosixEnv::StartThread(void (*function)(void* arg), void* arg) {
+  pthread_t t;
+  StartThreadState* state = new StartThreadState;
+  state->user_function = function;
+  state->arg = arg;
+  ThreadPoolImpl::PthreadCall(
+      "start thread", pthread_create(&t, nullptr, &StartThreadWrapper, state));
+  ThreadPoolImpl::PthreadCall("lock", pthread_mutex_lock(&mu_));
+  threads_to_join_.push_back(t);
+  ThreadPoolImpl::PthreadCall("unlock", pthread_mutex_unlock(&mu_));
+}
+
+void PosixEnv::WaitForJoin() {
+  for (const auto tid : threads_to_join_) {
+    pthread_join(tid, nullptr);
+  }
+  threads_to_join_.clear();
+}
+
+}  // namespace
+
+//
+// Default Posix Env
+//
+Env* Env::Default() {
+  // The following function call initializes the singletons of ThreadLocalPtr
+  // right before the static default_env.  This guarantees default_env will
+  // always being destructed before the ThreadLocalPtr singletons get
+  // destructed as C++ guarantees that the destructions of static variables
+  // is in the reverse order of their constructions.
+  //
+  // Since static members are destructed in the reverse order
+  // of their construction, having this call here guarantees that
+  // the destructor of static PosixEnv will go first, then the
+  // the singletons of ThreadLocalPtr.
+  ThreadLocalPtr::InitSingletons();
+  CompressionContextCache::InitSingleton();
+  INIT_SYNC_POINT_SINGLETONS();
+  // Avoid problems with accessing most members of Env::Default() during
+  // static destruction.
+  STATIC_AVOID_DESTRUCTION(PosixEnv, default_env);
+  // This destructor must be called on exit
+  static PosixEnv::JoinThreadsOnExit thread_joiner(default_env);
+  return &default_env;
+}
+
+//
+// Default Posix SystemClock
+//
+const std::shared_ptr<SystemClock>& SystemClock::Default() {
+  STATIC_AVOID_DESTRUCTION(std::shared_ptr<SystemClock>, instance)
+  (std::make_shared<PosixClock>());
+  return instance;
+}
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/file_system.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/file_system.cc
new file mode 100644
index 0000000000..71fb4d5bc7
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/file_system.cc
@@ -0,0 +1,277 @@
+// Copyright (c) 2019-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include "rocksdb/file_system.h"
+
+#include "env/composite_env_wrapper.h"
+#include "env/env_chroot.h"
+#include "env/env_encryption_ctr.h"
+#include "env/fs_readonly.h"
+#include "env/mock_env.h"
+#include "logging/env_logger.h"
+#include "options/db_options.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/string_util.h"
+#include "utilities/counted_fs.h"
+#include "utilities/env_timed.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+FileSystem::FileSystem() {}
+
+FileSystem::~FileSystem() {}
+
+static int RegisterBuiltinFileSystems(ObjectLibrary& library,
+                                      const std::string& /*arg*/) {
+  library.AddFactory<FileSystem>(
+      TimedFileSystem::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<FileSystem>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new TimedFileSystem(nullptr));
+        return guard->get();
+      });
+  library.AddFactory<FileSystem>(
+      ReadOnlyFileSystem::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<FileSystem>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new ReadOnlyFileSystem(nullptr));
+        return guard->get();
+      });
+  library.AddFactory<FileSystem>(
+      EncryptedFileSystem::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<FileSystem>* guard,
+         std::string* errmsg) {
+        Status s = NewEncryptedFileSystemImpl(nullptr, nullptr, guard);
+        if (!s.ok()) {
+          *errmsg = s.ToString();
+        }
+        return guard->get();
+      });
+  library.AddFactory<FileSystem>(
+      CountedFileSystem::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<FileSystem>* guard,
+         std::string* /*errmsg*/) {
+        guard->reset(new CountedFileSystem(FileSystem::Default()));
+        return guard->get();
+      });
+  library.AddFactory<FileSystem>(
+      MockFileSystem::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<FileSystem>* guard,
+         std::string* /*errmsg*/) {
+        guard->reset(new MockFileSystem(SystemClock::Default()));
+        return guard->get();
+      });
+#ifndef OS_WIN
+  library.AddFactory<FileSystem>(
+      ChrootFileSystem::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<FileSystem>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new ChrootFileSystem(nullptr, ""));
+        return guard->get();
+      });
+#endif  // OS_WIN
+  size_t num_types;
+  return static_cast<int>(library.GetFactoryCount(&num_types));
+}
+
+Status FileSystem::CreateFromString(const ConfigOptions& config_options,
+                                    const std::string& value,
+                                    std::shared_ptr<FileSystem>* result) {
+  auto default_fs = FileSystem::Default();
+  if (default_fs->IsInstanceOf(value)) {
+    *result = default_fs;
+    return Status::OK();
+  } else {
+    static std::once_flag once;
+    std::call_once(once, [&]() {
+      RegisterBuiltinFileSystems(*(ObjectLibrary::Default().get()), "");
+    });
+    return LoadSharedObject<FileSystem>(config_options, value, result);
+  }
+}
+
+IOStatus FileSystem::ReuseWritableFile(const std::string& fname,
+                                       const std::string& old_fname,
+                                       const FileOptions& opts,
+                                       std::unique_ptr<FSWritableFile>* result,
+                                       IODebugContext* dbg) {
+  IOStatus s = RenameFile(old_fname, fname, opts.io_options, dbg);
+  if (!s.ok()) {
+    return s;
+  }
+  return NewWritableFile(fname, opts, result, dbg);
+}
+
+IOStatus FileSystem::NewLogger(const std::string& fname,
+                               const IOOptions& io_opts,
+                               std::shared_ptr<Logger>* result,
+                               IODebugContext* dbg) {
+  FileOptions options;
+  options.io_options = io_opts;
+  // TODO: Tune the buffer size.
+  options.writable_file_max_buffer_size = 1024 * 1024;
+  std::unique_ptr<FSWritableFile> writable_file;
+  const IOStatus status = NewWritableFile(fname, options, &writable_file, dbg);
+  if (!status.ok()) {
+    return status;
+  }
+
+  *result = std::make_shared<EnvLogger>(std::move(writable_file), fname,
+                                        options, Env::Default());
+  return IOStatus::OK();
+}
+
+FileOptions FileSystem::OptimizeForLogRead(
+    const FileOptions& file_options) const {
+  FileOptions optimized_file_options(file_options);
+  optimized_file_options.use_direct_reads = false;
+  return optimized_file_options;
+}
+
+FileOptions FileSystem::OptimizeForManifestRead(
+    const FileOptions& file_options) const {
+  FileOptions optimized_file_options(file_options);
+  optimized_file_options.use_direct_reads = false;
+  return optimized_file_options;
+}
+
+FileOptions FileSystem::OptimizeForLogWrite(const FileOptions& file_options,
+                                            const DBOptions& db_options) const {
+  FileOptions optimized_file_options(file_options);
+  optimized_file_options.bytes_per_sync = db_options.wal_bytes_per_sync;
+  optimized_file_options.writable_file_max_buffer_size =
+      db_options.writable_file_max_buffer_size;
+  return optimized_file_options;
+}
+
+FileOptions FileSystem::OptimizeForManifestWrite(
+    const FileOptions& file_options) const {
+  return file_options;
+}
+
+FileOptions FileSystem::OptimizeForCompactionTableWrite(
+    const FileOptions& file_options,
+    const ImmutableDBOptions& db_options) const {
+  FileOptions optimized_file_options(file_options);
+  optimized_file_options.use_direct_writes =
+      db_options.use_direct_io_for_flush_and_compaction;
+  return optimized_file_options;
+}
+
+FileOptions FileSystem::OptimizeForCompactionTableRead(
+    const FileOptions& file_options,
+    const ImmutableDBOptions& db_options) const {
+  FileOptions optimized_file_options(file_options);
+  optimized_file_options.use_direct_reads = db_options.use_direct_reads;
+  return optimized_file_options;
+}
+
+FileOptions FileSystem::OptimizeForBlobFileRead(
+    const FileOptions& file_options,
+    const ImmutableDBOptions& db_options) const {
+  FileOptions optimized_file_options(file_options);
+  optimized_file_options.use_direct_reads = db_options.use_direct_reads;
+  return optimized_file_options;
+}
+
+IOStatus WriteStringToFile(FileSystem* fs, const Slice& data,
+                           const std::string& fname, bool should_sync) {
+  std::unique_ptr<FSWritableFile> file;
+  EnvOptions soptions;
+  IOStatus s = fs->NewWritableFile(fname, soptions, &file, nullptr);
+  if (!s.ok()) {
+    return s;
+  }
+  s = file->Append(data, IOOptions(), nullptr);
+  if (s.ok() && should_sync) {
+    s = file->Sync(IOOptions(), nullptr);
+  }
+  if (!s.ok()) {
+    fs->DeleteFile(fname, IOOptions(), nullptr);
+  }
+  return s;
+}
+
+IOStatus ReadFileToString(FileSystem* fs, const std::string& fname,
+                          std::string* data) {
+  FileOptions soptions;
+  data->clear();
+  std::unique_ptr<FSSequentialFile> file;
+  IOStatus s = status_to_io_status(
+      fs->NewSequentialFile(fname, soptions, &file, nullptr));
+  if (!s.ok()) {
+    return s;
+  }
+  static const int kBufferSize = 8192;
+  char* space = new char[kBufferSize];
+  while (true) {
+    Slice fragment;
+    s = file->Read(kBufferSize, IOOptions(), &fragment, space, nullptr);
+    if (!s.ok()) {
+      break;
+    }
+    data->append(fragment.data(), fragment.size());
+    if (fragment.empty()) {
+      break;
+    }
+  }
+  delete[] space;
+  return s;
+}
+
+namespace {
+static std::unordered_map<std::string, OptionTypeInfo> fs_wrapper_type_info = {
+    {"target",
+     OptionTypeInfo::AsCustomSharedPtr<FileSystem>(
+         0, OptionVerificationType::kByName, OptionTypeFlags::kDontSerialize)},
+};
+}  // namespace
+FileSystemWrapper::FileSystemWrapper(const std::shared_ptr<FileSystem>& t)
+    : target_(t) {
+  RegisterOptions("", &target_, &fs_wrapper_type_info);
+}
+
+Status FileSystemWrapper::PrepareOptions(const ConfigOptions& options) {
+  if (target_ == nullptr) {
+    target_ = FileSystem::Default();
+  }
+  return FileSystem::PrepareOptions(options);
+}
+
+std::string FileSystemWrapper::SerializeOptions(
+    const ConfigOptions& config_options, const std::string& header) const {
+  auto parent = FileSystem::SerializeOptions(config_options, "");
+  if (config_options.IsShallow() || target_ == nullptr ||
+      target_->IsInstanceOf(FileSystem::kDefaultName())) {
+    return parent;
+  } else {
+    std::string result = header;
+    if (!StartsWith(parent, OptionTypeInfo::kIdPropName())) {
+      result.append(OptionTypeInfo::kIdPropName()).append("=");
+    }
+    result.append(parent);
+    if (!EndsWith(result, config_options.delimiter)) {
+      result.append(config_options.delimiter);
+    }
+    result.append("target=").append(target_->ToString(config_options));
+    return result;
+  }
+}
+
+DirFsyncOptions::DirFsyncOptions() { reason = kDefault; }
+
+DirFsyncOptions::DirFsyncOptions(std::string file_renamed_new_name) {
+  reason = kFileRenamed;
+  renamed_new_name = file_renamed_new_name;
+}
+
+DirFsyncOptions::DirFsyncOptions(FsyncReason fsync_reason) {
+  assert(fsync_reason != kFileRenamed);
+  reason = fsync_reason;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/file_system_tracer.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/file_system_tracer.cc
new file mode 100644
index 0000000000..d0c45c57ee
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/file_system_tracer.cc
@@ -0,0 +1,564 @@
+// Copyright (c) 2019-present, Facebook, Inc.  All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "env/file_system_tracer.h"
+
+#include "rocksdb/file_system.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/trace_record.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+IOStatus FileSystemTracingWrapper::NewSequentialFile(
+    const std::string& fname, const FileOptions& file_opts,
+    std::unique_ptr<FSSequentialFile>* result, IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->NewSequentialFile(fname, file_opts, result, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          fname.substr(fname.find_last_of("/\\") + 1));
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FileSystemTracingWrapper::NewRandomAccessFile(
+    const std::string& fname, const FileOptions& file_opts,
+    std::unique_ptr<FSRandomAccessFile>* result, IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->NewRandomAccessFile(fname, file_opts, result, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          fname.substr(fname.find_last_of("/\\") + 1));
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FileSystemTracingWrapper::NewWritableFile(
+    const std::string& fname, const FileOptions& file_opts,
+    std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->NewWritableFile(fname, file_opts, result, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          fname.substr(fname.find_last_of("/\\") + 1));
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FileSystemTracingWrapper::ReopenWritableFile(
+    const std::string& fname, const FileOptions& file_opts,
+    std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->ReopenWritableFile(fname, file_opts, result, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          fname.substr(fname.find_last_of("/\\") + 1));
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FileSystemTracingWrapper::ReuseWritableFile(
+    const std::string& fname, const std::string& old_fname,
+    const FileOptions& file_opts, std::unique_ptr<FSWritableFile>* result,
+    IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s =
+      target()->ReuseWritableFile(fname, old_fname, file_opts, result, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          fname.substr(fname.find_last_of("/\\") + 1));
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FileSystemTracingWrapper::NewRandomRWFile(
+    const std::string& fname, const FileOptions& file_opts,
+    std::unique_ptr<FSRandomRWFile>* result, IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->NewRandomRWFile(fname, file_opts, result, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          fname.substr(fname.find_last_of("/\\") + 1));
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FileSystemTracingWrapper::NewDirectory(
+    const std::string& name, const IOOptions& io_opts,
+    std::unique_ptr<FSDirectory>* result, IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->NewDirectory(name, io_opts, result, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          name.substr(name.find_last_of("/\\") + 1));
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FileSystemTracingWrapper::GetChildren(const std::string& dir,
+                                               const IOOptions& io_opts,
+                                               std::vector<std::string>* r,
+                                               IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->GetChildren(dir, io_opts, r, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          dir.substr(dir.find_last_of("/\\") + 1));
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FileSystemTracingWrapper::DeleteFile(const std::string& fname,
+                                              const IOOptions& options,
+                                              IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->DeleteFile(fname, options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          fname.substr(fname.find_last_of("/\\") + 1));
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FileSystemTracingWrapper::CreateDir(const std::string& dirname,
+                                             const IOOptions& options,
+                                             IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->CreateDir(dirname, options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          dirname.substr(dirname.find_last_of("/\\") + 1));
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FileSystemTracingWrapper::CreateDirIfMissing(
+    const std::string& dirname, const IOOptions& options, IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->CreateDirIfMissing(dirname, options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          dirname.substr(dirname.find_last_of("/\\") + 1));
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FileSystemTracingWrapper::DeleteDir(const std::string& dirname,
+                                             const IOOptions& options,
+                                             IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->DeleteDir(dirname, options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          dirname.substr(dirname.find_last_of("/\\") + 1));
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FileSystemTracingWrapper::GetFileSize(const std::string& fname,
+                                               const IOOptions& options,
+                                               uint64_t* file_size,
+                                               IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->GetFileSize(fname, options, file_size, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOFileSize);
+  IOTraceRecord io_record(
+      clock_->NowNanos(), TraceType::kIOTracer, io_op_data, __func__, elapsed,
+      s.ToString(), fname.substr(fname.find_last_of("/\\") + 1), *file_size);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FileSystemTracingWrapper::Truncate(const std::string& fname,
+                                            size_t size,
+                                            const IOOptions& options,
+                                            IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->Truncate(fname, size, options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOFileSize);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          __func__, elapsed, s.ToString(),
+                          fname.substr(fname.find_last_of("/\\") + 1), size);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FSSequentialFileTracingWrapper::Read(size_t n,
+                                              const IOOptions& options,
+                                              Slice* result, char* scratch,
+                                              IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->Read(n, options, result, scratch, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          __func__, elapsed, s.ToString(), file_name_,
+                          result->size(), 0 /*Offset*/);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FSSequentialFileTracingWrapper::InvalidateCache(size_t offset,
+                                                         size_t length) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->InvalidateCache(offset, length);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  io_op_data |= (1 << IOTraceOp::kIOOffset);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          __func__, elapsed, s.ToString(), file_name_, length,
+                          offset);
+  io_tracer_->WriteIOOp(io_record, nullptr /*dbg*/);
+  return s;
+}
+
+IOStatus FSSequentialFileTracingWrapper::PositionedRead(
+    uint64_t offset, size_t n, const IOOptions& options, Slice* result,
+    char* scratch, IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s =
+      target()->PositionedRead(offset, n, options, result, scratch, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  io_op_data |= (1 << IOTraceOp::kIOOffset);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          __func__, elapsed, s.ToString(), file_name_,
+                          result->size(), offset);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FSRandomAccessFileTracingWrapper::Read(uint64_t offset, size_t n,
+                                                const IOOptions& options,
+                                                Slice* result, char* scratch,
+                                                IODebugContext* dbg) const {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->Read(offset, n, options, result, scratch, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  io_op_data |= (1 << IOTraceOp::kIOOffset);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          __func__, elapsed, s.ToString(), file_name_, n,
+                          offset);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FSRandomAccessFileTracingWrapper::MultiRead(FSReadRequest* reqs,
+                                                     size_t num_reqs,
+                                                     const IOOptions& options,
+                                                     IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->MultiRead(reqs, num_reqs, options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t latency = elapsed;
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  io_op_data |= (1 << IOTraceOp::kIOOffset);
+  for (size_t i = 0; i < num_reqs; i++) {
+    IOTraceRecord io_record(
+        clock_->NowNanos(), TraceType::kIOTracer, io_op_data, __func__, latency,
+        reqs[i].status.ToString(), file_name_, reqs[i].len, reqs[i].offset);
+    io_tracer_->WriteIOOp(io_record, dbg);
+  }
+  return s;
+}
+
+IOStatus FSRandomAccessFileTracingWrapper::Prefetch(uint64_t offset, size_t n,
+                                                    const IOOptions& options,
+                                                    IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->Prefetch(offset, n, options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  io_op_data |= (1 << IOTraceOp::kIOOffset);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          __func__, elapsed, s.ToString(), file_name_, n,
+                          offset);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FSRandomAccessFileTracingWrapper::InvalidateCache(size_t offset,
+                                                           size_t length) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->InvalidateCache(offset, length);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  io_op_data |= (1 << IOTraceOp::kIOOffset);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          __func__, elapsed, s.ToString(), file_name_, length,
+                          static_cast<uint64_t>(offset));
+  io_tracer_->WriteIOOp(io_record, nullptr /*dbg*/);
+  return s;
+}
+
+IOStatus FSRandomAccessFileTracingWrapper::ReadAsync(
+    FSReadRequest& req, const IOOptions& opts,
+    std::function<void(const FSReadRequest&, void*)> cb, void* cb_arg,
+    void** io_handle, IOHandleDeleter* del_fn, IODebugContext* dbg) {
+  // Create a callback and populate info.
+  auto read_async_callback =
+      std::bind(&FSRandomAccessFileTracingWrapper::ReadAsyncCallback, this,
+                std::placeholders::_1, std::placeholders::_2);
+  ReadAsyncCallbackInfo* read_async_cb_info = new ReadAsyncCallbackInfo;
+  read_async_cb_info->cb_ = cb;
+  read_async_cb_info->cb_arg_ = cb_arg;
+  read_async_cb_info->start_time_ = clock_->NowNanos();
+  read_async_cb_info->file_op_ = __func__;
+
+  IOStatus s = target()->ReadAsync(req, opts, read_async_callback,
+                                   read_async_cb_info, io_handle, del_fn, dbg);
+
+  if (!s.ok()) {
+    delete read_async_cb_info;
+  }
+  return s;
+}
+
+void FSRandomAccessFileTracingWrapper::ReadAsyncCallback(
+    const FSReadRequest& req, void* cb_arg) {
+  ReadAsyncCallbackInfo* read_async_cb_info =
+      static_cast<ReadAsyncCallbackInfo*>(cb_arg);
+  assert(read_async_cb_info);
+  assert(read_async_cb_info->cb_);
+
+  uint64_t elapsed = clock_->NowNanos() - read_async_cb_info->start_time_;
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  io_op_data |= (1 << IOTraceOp::kIOOffset);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          read_async_cb_info->file_op_, elapsed,
+                          req.status.ToString(), file_name_, req.result.size(),
+                          req.offset);
+  io_tracer_->WriteIOOp(io_record, nullptr /*dbg*/);
+
+  // call the underlying callback.
+  read_async_cb_info->cb_(req, read_async_cb_info->cb_arg_);
+  delete read_async_cb_info;
+}
+
+IOStatus FSWritableFileTracingWrapper::Append(const Slice& data,
+                                              const IOOptions& options,
+                                              IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->Append(data, options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          __func__, elapsed, s.ToString(), file_name_,
+                          data.size(), 0 /*Offset*/);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FSWritableFileTracingWrapper::PositionedAppend(
+    const Slice& data, uint64_t offset, const IOOptions& options,
+    IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->PositionedAppend(data, offset, options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  io_op_data |= (1 << IOTraceOp::kIOOffset);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          __func__, elapsed, s.ToString(), file_name_,
+                          data.size(), offset);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FSWritableFileTracingWrapper::Truncate(uint64_t size,
+                                                const IOOptions& options,
+                                                IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->Truncate(size, options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          __func__, elapsed, s.ToString(), file_name_, size,
+                          0 /*Offset*/);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FSWritableFileTracingWrapper::Close(const IOOptions& options,
+                                             IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->Close(options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          file_name_);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+uint64_t FSWritableFileTracingWrapper::GetFileSize(const IOOptions& options,
+                                                   IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  uint64_t file_size = target()->GetFileSize(options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOFileSize);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          __func__, elapsed, "OK", file_name_, file_size);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return file_size;
+}
+
+IOStatus FSWritableFileTracingWrapper::InvalidateCache(size_t offset,
+                                                       size_t length) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->InvalidateCache(offset, length);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  io_op_data |= (1 << IOTraceOp::kIOOffset);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          __func__, elapsed, s.ToString(), file_name_, length,
+                          static_cast<uint64_t>(offset));
+  io_tracer_->WriteIOOp(io_record, nullptr /*dbg*/);
+  return s;
+}
+
+IOStatus FSRandomRWFileTracingWrapper::Write(uint64_t offset, const Slice& data,
+                                             const IOOptions& options,
+                                             IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->Write(offset, data, options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  io_op_data |= (1 << IOTraceOp::kIOOffset);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          __func__, elapsed, s.ToString(), file_name_,
+                          data.size(), offset);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FSRandomRWFileTracingWrapper::Read(uint64_t offset, size_t n,
+                                            const IOOptions& options,
+                                            Slice* result, char* scratch,
+                                            IODebugContext* dbg) const {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->Read(offset, n, options, result, scratch, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  io_op_data |= (1 << IOTraceOp::kIOOffset);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          __func__, elapsed, s.ToString(), file_name_, n,
+                          offset);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FSRandomRWFileTracingWrapper::Flush(const IOOptions& options,
+                                             IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->Flush(options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          file_name_);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FSRandomRWFileTracingWrapper::Close(const IOOptions& options,
+                                             IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->Close(options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          file_name_);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FSRandomRWFileTracingWrapper::Sync(const IOOptions& options,
+                                            IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->Sync(options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          file_name_);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+
+IOStatus FSRandomRWFileTracingWrapper::Fsync(const IOOptions& options,
+                                             IODebugContext* dbg) {
+  StopWatchNano timer(clock_);
+  timer.Start();
+  IOStatus s = target()->Fsync(options, dbg);
+  uint64_t elapsed = timer.ElapsedNanos();
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
+                          0 /*io_op_data*/, __func__, elapsed, s.ToString(),
+                          file_name_);
+  io_tracer_->WriteIOOp(io_record, dbg);
+  return s;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/file_system_tracer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/file_system_tracer.h
new file mode 100644
index 0000000000..979a0bf120
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/file_system_tracer.h
@@ -0,0 +1,461 @@
+// Copyright (c) 2019-present, Facebook, Inc.  All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/file_system.h"
+#include "rocksdb/system_clock.h"
+#include "trace_replay/io_tracer.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// FileSystemTracingWrapper is a wrapper class above FileSystem that forwards
+// the call to the underlying storage system. It then invokes IOTracer to record
+// file operations and other contextual information in a binary format for
+// tracing. It overrides methods we are interested in tracing and extends
+// FileSystemWrapper, which forwards all methods that are not explicitly
+// overridden.
+class FileSystemTracingWrapper : public FileSystemWrapper {
+ public:
+  FileSystemTracingWrapper(const std::shared_ptr<FileSystem>& t,
+                           const std::shared_ptr<IOTracer>& io_tracer)
+      : FileSystemWrapper(t),
+        io_tracer_(io_tracer),
+        clock_(SystemClock::Default().get()) {}
+
+  ~FileSystemTracingWrapper() override {}
+
+  static const char* kClassName() { return "FileSystemTracing"; }
+  const char* Name() const override { return kClassName(); }
+
+  IOStatus NewSequentialFile(const std::string& fname,
+                             const FileOptions& file_opts,
+                             std::unique_ptr<FSSequentialFile>* result,
+                             IODebugContext* dbg) override;
+
+  IOStatus NewRandomAccessFile(const std::string& fname,
+                               const FileOptions& file_opts,
+                               std::unique_ptr<FSRandomAccessFile>* result,
+                               IODebugContext* dbg) override;
+
+  IOStatus NewWritableFile(const std::string& fname,
+                           const FileOptions& file_opts,
+                           std::unique_ptr<FSWritableFile>* result,
+                           IODebugContext* dbg) override;
+
+  IOStatus ReopenWritableFile(const std::string& fname,
+                              const FileOptions& file_opts,
+                              std::unique_ptr<FSWritableFile>* result,
+                              IODebugContext* dbg) override;
+
+  IOStatus ReuseWritableFile(const std::string& fname,
+                             const std::string& old_fname,
+                             const FileOptions& file_opts,
+                             std::unique_ptr<FSWritableFile>* result,
+                             IODebugContext* dbg) override;
+
+  IOStatus NewRandomRWFile(const std::string& fname, const FileOptions& options,
+                           std::unique_ptr<FSRandomRWFile>* result,
+                           IODebugContext* dbg) override;
+
+  IOStatus NewDirectory(const std::string& name, const IOOptions& io_opts,
+                        std::unique_ptr<FSDirectory>* result,
+                        IODebugContext* dbg) override;
+
+  IOStatus GetChildren(const std::string& dir, const IOOptions& io_opts,
+                       std::vector<std::string>* r,
+                       IODebugContext* dbg) override;
+
+  IOStatus DeleteFile(const std::string& fname, const IOOptions& options,
+                      IODebugContext* dbg) override;
+
+  IOStatus CreateDir(const std::string& dirname, const IOOptions& options,
+                     IODebugContext* dbg) override;
+
+  IOStatus CreateDirIfMissing(const std::string& dirname,
+                              const IOOptions& options,
+                              IODebugContext* dbg) override;
+
+  IOStatus DeleteDir(const std::string& dirname, const IOOptions& options,
+                     IODebugContext* dbg) override;
+
+  IOStatus GetFileSize(const std::string& fname, const IOOptions& options,
+                       uint64_t* file_size, IODebugContext* dbg) override;
+
+  IOStatus Truncate(const std::string& fname, size_t size,
+                    const IOOptions& options, IODebugContext* dbg) override;
+
+ private:
+  std::shared_ptr<IOTracer> io_tracer_;
+  SystemClock* clock_;
+};
+
+// The FileSystemPtr is a wrapper class that takes pointer to storage systems
+// (such as posix filesystems). It overloads operator -> and returns a pointer
+// of either FileSystem or FileSystemTracingWrapper based on whether tracing is
+// enabled or  not. It is added to bypass FileSystemTracingWrapper when tracing
+// is disabled.
+class FileSystemPtr {
+ public:
+  FileSystemPtr(std::shared_ptr<FileSystem> fs,
+                const std::shared_ptr<IOTracer>& io_tracer)
+      : fs_(fs), io_tracer_(io_tracer) {
+    fs_tracer_ = std::make_shared<FileSystemTracingWrapper>(fs_, io_tracer_);
+  }
+
+  std::shared_ptr<FileSystem> operator->() const {
+    if (io_tracer_ && io_tracer_->is_tracing_enabled()) {
+      return fs_tracer_;
+    } else {
+      return fs_;
+    }
+  }
+
+  /* Returns the underlying File System pointer */
+  FileSystem* get() const {
+    if (io_tracer_ && io_tracer_->is_tracing_enabled()) {
+      return fs_tracer_.get();
+    } else {
+      return fs_.get();
+    }
+  }
+
+ private:
+  std::shared_ptr<FileSystem> fs_;
+  std::shared_ptr<IOTracer> io_tracer_;
+  std::shared_ptr<FileSystemTracingWrapper> fs_tracer_;
+};
+
+// FSSequentialFileTracingWrapper is a wrapper class above FSSequentialFile that
+// forwards the call to the underlying storage system. It then invokes IOTracer
+// to record file operations and other contextual information in a binary format
+// for tracing. It overrides methods we are interested in tracing and extends
+// FSSequentialFileWrapper, which forwards all methods that are not explicitly
+// overridden.
+class FSSequentialFileTracingWrapper : public FSSequentialFileOwnerWrapper {
+ public:
+  FSSequentialFileTracingWrapper(std::unique_ptr<FSSequentialFile>&& t,
+                                 std::shared_ptr<IOTracer> io_tracer,
+                                 const std::string& file_name)
+      : FSSequentialFileOwnerWrapper(std::move(t)),
+        io_tracer_(io_tracer),
+        clock_(SystemClock::Default().get()),
+        file_name_(file_name) {}
+
+  ~FSSequentialFileTracingWrapper() override {}
+
+  IOStatus Read(size_t n, const IOOptions& options, Slice* result,
+                char* scratch, IODebugContext* dbg) override;
+
+  IOStatus InvalidateCache(size_t offset, size_t length) override;
+
+  IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& options,
+                          Slice* result, char* scratch,
+                          IODebugContext* dbg) override;
+
+ private:
+  std::shared_ptr<IOTracer> io_tracer_;
+  SystemClock* clock_;
+  std::string file_name_;
+};
+
+// The FSSequentialFilePtr is a wrapper class that takes pointer to storage
+// systems (such as posix filesystems). It overloads operator -> and returns a
+// pointer of either FSSequentialFile or FSSequentialFileTracingWrapper based on
+// whether tracing is enabled or not. It is added to bypass
+// FSSequentialFileTracingWrapper when tracing is disabled.
+class FSSequentialFilePtr {
+ public:
+  FSSequentialFilePtr() = delete;
+  FSSequentialFilePtr(std::unique_ptr<FSSequentialFile>&& fs,
+                      const std::shared_ptr<IOTracer>& io_tracer,
+                      const std::string& file_name)
+      : io_tracer_(io_tracer),
+        fs_tracer_(std::move(fs), io_tracer_,
+                   file_name.substr(file_name.find_last_of("/\\") +
+                                    1) /* pass file name */) {}
+
+  FSSequentialFile* operator->() const {
+    if (io_tracer_ && io_tracer_->is_tracing_enabled()) {
+      return const_cast<FSSequentialFileTracingWrapper*>(&fs_tracer_);
+    } else {
+      return fs_tracer_.target();
+    }
+  }
+
+  FSSequentialFile* get() const {
+    if (io_tracer_ && io_tracer_->is_tracing_enabled()) {
+      return const_cast<FSSequentialFileTracingWrapper*>(&fs_tracer_);
+    } else {
+      return fs_tracer_.target();
+    }
+  }
+
+ private:
+  std::shared_ptr<IOTracer> io_tracer_;
+  FSSequentialFileTracingWrapper fs_tracer_;
+};
+
+// FSRandomAccessFileTracingWrapper is a wrapper class above FSRandomAccessFile
+// that forwards the call to the underlying storage system. It then invokes
+// IOTracer to record file operations and other contextual information in a
+// binary format for tracing. It overrides methods we are interested in tracing
+// and extends FSRandomAccessFileWrapper, which forwards all methods that are
+// not explicitly overridden.
+class FSRandomAccessFileTracingWrapper : public FSRandomAccessFileOwnerWrapper {
+ public:
+  FSRandomAccessFileTracingWrapper(std::unique_ptr<FSRandomAccessFile>&& t,
+                                   std::shared_ptr<IOTracer> io_tracer,
+                                   const std::string& file_name)
+      : FSRandomAccessFileOwnerWrapper(std::move(t)),
+        io_tracer_(io_tracer),
+        clock_(SystemClock::Default().get()),
+        file_name_(file_name) {}
+
+  ~FSRandomAccessFileTracingWrapper() override {}
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override;
+
+  IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
+                     const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& options,
+                    IODebugContext* dbg) override;
+
+  IOStatus InvalidateCache(size_t offset, size_t length) override;
+
+  IOStatus ReadAsync(FSReadRequest& req, const IOOptions& opts,
+                     std::function<void(const FSReadRequest&, void*)> cb,
+                     void* cb_arg, void** io_handle, IOHandleDeleter* del_fn,
+                     IODebugContext* dbg) override;
+
+  void ReadAsyncCallback(const FSReadRequest& req, void* cb_arg);
+
+ private:
+  std::shared_ptr<IOTracer> io_tracer_;
+  SystemClock* clock_;
+  // Stores file name instead of full path.
+  std::string file_name_;
+
+  struct ReadAsyncCallbackInfo {
+    uint64_t start_time_;
+    std::function<void(const FSReadRequest&, void*)> cb_;
+    void* cb_arg_;
+    std::string file_op_;
+  };
+};
+
+// The FSRandomAccessFilePtr is a wrapper class that takes pointer to storage
+// systems (such as posix filesystems). It overloads operator -> and returns a
+// pointer of either FSRandomAccessFile or FSRandomAccessFileTracingWrapper
+// based on whether tracing is enabled or not. It is added to bypass
+// FSRandomAccessFileTracingWrapper when tracing is disabled.
+class FSRandomAccessFilePtr {
+ public:
+  FSRandomAccessFilePtr(std::unique_ptr<FSRandomAccessFile>&& fs,
+                        const std::shared_ptr<IOTracer>& io_tracer,
+                        const std::string& file_name)
+      : io_tracer_(io_tracer),
+        fs_tracer_(std::move(fs), io_tracer_,
+                   file_name.substr(file_name.find_last_of("/\\") +
+                                    1) /* pass file name */) {}
+
+  FSRandomAccessFile* operator->() const {
+    if (io_tracer_ && io_tracer_->is_tracing_enabled()) {
+      return const_cast<FSRandomAccessFileTracingWrapper*>(&fs_tracer_);
+    } else {
+      return fs_tracer_.target();
+    }
+  }
+
+  FSRandomAccessFile* get() const {
+    if (io_tracer_ && io_tracer_->is_tracing_enabled()) {
+      return const_cast<FSRandomAccessFileTracingWrapper*>(&fs_tracer_);
+    } else {
+      return fs_tracer_.target();
+    }
+  }
+
+ private:
+  std::shared_ptr<IOTracer> io_tracer_;
+  FSRandomAccessFileTracingWrapper fs_tracer_;
+};
+
+// FSWritableFileTracingWrapper is a wrapper class above FSWritableFile that
+// forwards the call to the underlying storage system. It then invokes IOTracer
+// to record file operations and other contextual information in a binary format
+// for tracing. It overrides methods we are interested in tracing and extends
+// FSWritableFileWrapper, which forwards all methods that are not explicitly
+// overridden.
+class FSWritableFileTracingWrapper : public FSWritableFileOwnerWrapper {
+ public:
+  FSWritableFileTracingWrapper(std::unique_ptr<FSWritableFile>&& t,
+                               std::shared_ptr<IOTracer> io_tracer,
+                               const std::string& file_name)
+      : FSWritableFileOwnerWrapper(std::move(t)),
+        io_tracer_(io_tracer),
+        clock_(SystemClock::Default().get()),
+        file_name_(file_name) {}
+
+  ~FSWritableFileTracingWrapper() override {}
+
+  IOStatus Append(const Slice& data, const IOOptions& options,
+                  IODebugContext* dbg) override;
+  IOStatus Append(const Slice& data, const IOOptions& options,
+                  const DataVerificationInfo& /*verification_info*/,
+                  IODebugContext* dbg) override {
+    return Append(data, options, dbg);
+  }
+
+  IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                            const IOOptions& options,
+                            IODebugContext* dbg) override;
+  IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                            const IOOptions& options,
+                            const DataVerificationInfo& /*verification_info*/,
+                            IODebugContext* dbg) override {
+    return PositionedAppend(data, offset, options, dbg);
+  }
+
+  IOStatus Truncate(uint64_t size, const IOOptions& options,
+                    IODebugContext* dbg) override;
+
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
+
+  uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus InvalidateCache(size_t offset, size_t length) override;
+
+ private:
+  std::shared_ptr<IOTracer> io_tracer_;
+  SystemClock* clock_;
+  // Stores file name instead of full path.
+  std::string file_name_;
+};
+
+// The FSWritableFilePtr is a wrapper class that takes pointer to storage
+// systems (such as posix filesystems). It overloads operator -> and returns a
+// pointer of either FSWritableFile or FSWritableFileTracingWrapper based on
+// whether tracing is enabled or not. It is added to bypass
+// FSWritableFileTracingWrapper when tracing is disabled.
+class FSWritableFilePtr {
+ public:
+  FSWritableFilePtr(std::unique_ptr<FSWritableFile>&& fs,
+                    const std::shared_ptr<IOTracer>& io_tracer,
+                    const std::string& file_name)
+      : io_tracer_(io_tracer) {
+    fs_tracer_.reset(new FSWritableFileTracingWrapper(
+        std::move(fs), io_tracer_,
+        file_name.substr(file_name.find_last_of("/\\") +
+                         1) /* pass file name */));
+  }
+
+  FSWritableFile* operator->() const {
+    if (io_tracer_ && io_tracer_->is_tracing_enabled()) {
+      return fs_tracer_.get();
+    } else {
+      return fs_tracer_->target();
+    }
+  }
+
+  FSWritableFile* get() const {
+    if (io_tracer_ && io_tracer_->is_tracing_enabled()) {
+      return fs_tracer_.get();
+    } else if (fs_tracer_) {
+      return fs_tracer_->target();
+    } else {
+      return nullptr;
+    }
+  }
+
+  void reset() {
+    fs_tracer_.reset();
+    io_tracer_ = nullptr;
+  }
+
+ private:
+  std::shared_ptr<IOTracer> io_tracer_;
+  std::unique_ptr<FSWritableFileTracingWrapper> fs_tracer_;
+};
+
+// FSRandomRWFileTracingWrapper is a wrapper class above FSRandomRWFile that
+// forwards the call to the underlying storage system. It then invokes IOTracer
+// to record file operations and other contextual information in a binary format
+// for tracing. It overrides methods we are interested in tracing and extends
+// FSRandomRWFileWrapper, which forwards all methods that are not explicitly
+// overridden.
+class FSRandomRWFileTracingWrapper : public FSRandomRWFileOwnerWrapper {
+ public:
+  FSRandomRWFileTracingWrapper(std::unique_ptr<FSRandomRWFile>&& t,
+                               std::shared_ptr<IOTracer> io_tracer,
+                               const std::string& file_name)
+      : FSRandomRWFileOwnerWrapper(std::move(t)),
+        io_tracer_(io_tracer),
+        clock_(SystemClock::Default().get()),
+        file_name_(file_name) {}
+
+  ~FSRandomRWFileTracingWrapper() override {}
+
+  IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options,
+                 IODebugContext* dbg) override;
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override;
+
+  IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
+
+ private:
+  std::shared_ptr<IOTracer> io_tracer_;
+  SystemClock* clock_;
+  // Stores file name instead of full path.
+  std::string file_name_;
+};
+
+// The FSRandomRWFilePtr is a wrapper class that takes pointer to storage
+// systems (such as posix filesystems). It overloads operator -> and returns a
+// pointer of either FSRandomRWFile or FSRandomRWFileTracingWrapper based on
+// whether tracing is enabled or not. It is added to bypass
+// FSRandomRWFileTracingWrapper when tracing is disabled.
+class FSRandomRWFilePtr {
+ public:
+  FSRandomRWFilePtr(std::unique_ptr<FSRandomRWFile>&& fs,
+                    std::shared_ptr<IOTracer> io_tracer,
+                    const std::string& file_name)
+      : io_tracer_(io_tracer),
+        fs_tracer_(std::move(fs), io_tracer_,
+                   file_name.substr(file_name.find_last_of("/\\") +
+                                    1) /* pass file name */) {}
+
+  FSRandomRWFile* operator->() const {
+    if (io_tracer_ && io_tracer_->is_tracing_enabled()) {
+      return const_cast<FSRandomRWFileTracingWrapper*>(&fs_tracer_);
+    } else {
+      return fs_tracer_.target();
+    }
+  }
+
+  FSRandomRWFile* get() const {
+    if (io_tracer_ && io_tracer_->is_tracing_enabled()) {
+      return const_cast<FSRandomRWFileTracingWrapper*>(&fs_tracer_);
+    } else {
+      return fs_tracer_.target();
+    }
+  }
+
+ private:
+  std::shared_ptr<IOTracer> io_tracer_;
+  FSRandomRWFileTracingWrapper fs_tracer_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_posix.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_posix.cc
new file mode 100644
index 0000000000..c2e8cfaf04
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_posix.cc
@@ -0,0 +1,1284 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors
+
+#if !defined(OS_WIN)
+
+#include <dirent.h>
+#ifndef ROCKSDB_NO_DYNAMIC_EXTENSION
+#include <dlfcn.h>
+#endif
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#if defined(OS_LINUX) || defined(OS_SOLARIS) || defined(OS_ANDROID)
+#include <sys/statfs.h>
+#include <sys/sysmacros.h>
+#endif
+#include <sys/statvfs.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+
+#include <algorithm>
+// Get nano time includes
+#if defined(OS_LINUX) || defined(OS_FREEBSD)
+#elif defined(__MACH__)
+#include <Availability.h>
+#include <mach/clock.h>
+#include <mach/mach.h>
+#else
+#include <chrono>
+#endif
+#include <deque>
+#include <set>
+#include <vector>
+
+#include "env/composite_env_wrapper.h"
+#include "env/io_posix.h"
+#include "monitoring/iostats_context_imp.h"
+#include "monitoring/thread_status_updater.h"
+#include "port/lang.h"
+#include "port/port.h"
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "test_util/sync_point.h"
+#include "util/coding.h"
+#include "util/compression_context_cache.h"
+#include "util/random.h"
+#include "util/string_util.h"
+#include "util/thread_local.h"
+#include "util/threadpool_imp.h"
+
+#if !defined(TMPFS_MAGIC)
+#define TMPFS_MAGIC 0x01021994
+#endif
+#if !defined(XFS_SUPER_MAGIC)
+#define XFS_SUPER_MAGIC 0x58465342
+#endif
+#if !defined(EXT4_SUPER_MAGIC)
+#define EXT4_SUPER_MAGIC 0xEF53
+#endif
+
+extern "C" bool RocksDbIOUringEnable() __attribute__((__weak__));
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+inline mode_t GetDBFileMode(bool allow_non_owner_access) {
+  return allow_non_owner_access ? 0644 : 0600;
+}
+
+// list of pathnames that are locked
+// Only used for error message.
+struct LockHoldingInfo {
+  int64_t acquire_time;
+  uint64_t acquiring_thread;
+};
+static std::map<std::string, LockHoldingInfo> locked_files;
+static port::Mutex mutex_locked_files;
+
+static int LockOrUnlock(int fd, bool lock) {
+  errno = 0;
+  struct flock f;
+  memset(&f, 0, sizeof(f));
+  f.l_type = (lock ? F_WRLCK : F_UNLCK);
+  f.l_whence = SEEK_SET;
+  f.l_start = 0;
+  f.l_len = 0;  // Lock/unlock entire file
+  int value = fcntl(fd, F_SETLK, &f);
+
+  return value;
+}
+
+class PosixFileLock : public FileLock {
+ public:
+  int fd_ = /*invalid*/ -1;
+  std::string filename;
+
+  void Clear() {
+    fd_ = -1;
+    filename.clear();
+  }
+
+  virtual ~PosixFileLock() override {
+    // Check for destruction without UnlockFile
+    assert(fd_ == -1);
+  }
+};
+
+int cloexec_flags(int flags, const EnvOptions* options) {
+  // If the system supports opening the file with cloexec enabled,
+  // do so, as this avoids a race condition if a db is opened around
+  // the same time that a child process is forked
+#ifdef O_CLOEXEC
+  if (options == nullptr || options->set_fd_cloexec) {
+    flags |= O_CLOEXEC;
+  }
+#else
+  (void)options;
+#endif
+  return flags;
+}
+
+class PosixFileSystem : public FileSystem {
+ public:
+  PosixFileSystem();
+
+  static const char* kClassName() { return "PosixFileSystem"; }
+  const char* Name() const override { return kClassName(); }
+  const char* NickName() const override { return kDefaultName(); }
+
+  ~PosixFileSystem() override {}
+  bool IsInstanceOf(const std::string& name) const override {
+    if (name == "posix") {
+      return true;
+    } else {
+      return FileSystem::IsInstanceOf(name);
+    }
+  }
+
+  void SetFD_CLOEXEC(int fd, const EnvOptions* options) {
+    if ((options == nullptr || options->set_fd_cloexec) && fd > 0) {
+      fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+    }
+  }
+
+  IOStatus NewSequentialFile(const std::string& fname,
+                             const FileOptions& options,
+                             std::unique_ptr<FSSequentialFile>* result,
+                             IODebugContext* /*dbg*/) override {
+    result->reset();
+    int fd = -1;
+    int flags = cloexec_flags(O_RDONLY, &options);
+    FILE* file = nullptr;
+
+    if (options.use_direct_reads && !options.use_mmap_reads) {
+#if !defined(OS_MACOSX) && !defined(OS_OPENBSD) && !defined(OS_SOLARIS)
+      flags |= O_DIRECT;
+      TEST_SYNC_POINT_CALLBACK("NewSequentialFile:O_DIRECT", &flags);
+#endif
+    }
+
+    do {
+      IOSTATS_TIMER_GUARD(open_nanos);
+      fd = open(fname.c_str(), flags, GetDBFileMode(allow_non_owner_access_));
+    } while (fd < 0 && errno == EINTR);
+    if (fd < 0) {
+      return IOError("While opening a file for sequentially reading", fname,
+                     errno);
+    }
+
+    SetFD_CLOEXEC(fd, &options);
+
+    if (options.use_direct_reads && !options.use_mmap_reads) {
+#ifdef OS_MACOSX
+      if (fcntl(fd, F_NOCACHE, 1) == -1) {
+        close(fd);
+        return IOError("While fcntl NoCache", fname, errno);
+      }
+#endif
+    } else {
+      do {
+        IOSTATS_TIMER_GUARD(open_nanos);
+        file = fdopen(fd, "r");
+      } while (file == nullptr && errno == EINTR);
+      if (file == nullptr) {
+        close(fd);
+        return IOError("While opening file for sequentially read", fname,
+                       errno);
+      }
+    }
+    result->reset(new PosixSequentialFile(
+        fname, file, fd, GetLogicalBlockSizeForReadIfNeeded(options, fname, fd),
+        options));
+    return IOStatus::OK();
+  }
+
+  IOStatus NewRandomAccessFile(const std::string& fname,
+                               const FileOptions& options,
+                               std::unique_ptr<FSRandomAccessFile>* result,
+                               IODebugContext* /*dbg*/) override {
+    result->reset();
+    IOStatus s = IOStatus::OK();
+    int fd;
+    int flags = cloexec_flags(O_RDONLY, &options);
+
+    if (options.use_direct_reads && !options.use_mmap_reads) {
+#if !defined(OS_MACOSX) && !defined(OS_OPENBSD) && !defined(OS_SOLARIS)
+      flags |= O_DIRECT;
+      TEST_SYNC_POINT_CALLBACK("NewRandomAccessFile:O_DIRECT", &flags);
+#endif
+    }
+
+    do {
+      IOSTATS_TIMER_GUARD(open_nanos);
+      fd = open(fname.c_str(), flags, GetDBFileMode(allow_non_owner_access_));
+    } while (fd < 0 && errno == EINTR);
+    if (fd < 0) {
+      s = IOError("While open a file for random read", fname, errno);
+      return s;
+    }
+    SetFD_CLOEXEC(fd, &options);
+
+    if (options.use_mmap_reads) {
+      // Use of mmap for random reads has been removed because it
+      // kills performance when storage is fast.
+      // Use mmap when virtual address-space is plentiful.
+      uint64_t size;
+      IOOptions opts;
+      s = GetFileSize(fname, opts, &size, nullptr);
+      if (s.ok()) {
+        void* base = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd, 0);
+        if (base != MAP_FAILED) {
+          result->reset(
+              new PosixMmapReadableFile(fd, fname, base, size, options));
+        } else {
+          s = IOError("while mmap file for read", fname, errno);
+          close(fd);
+        }
+      } else {
+        close(fd);
+      }
+    } else {
+      if (options.use_direct_reads && !options.use_mmap_reads) {
+#ifdef OS_MACOSX
+        if (fcntl(fd, F_NOCACHE, 1) == -1) {
+          close(fd);
+          return IOError("while fcntl NoCache", fname, errno);
+        }
+#endif
+      }
+      result->reset(new PosixRandomAccessFile(
+          fname, fd, GetLogicalBlockSizeForReadIfNeeded(options, fname, fd),
+          options
+#if defined(ROCKSDB_IOURING_PRESENT)
+          ,
+          !IsIOUringEnabled() ? nullptr : thread_local_io_urings_.get()
+#endif
+              ));
+    }
+    return s;
+  }
+
+  virtual IOStatus OpenWritableFile(const std::string& fname,
+                                    const FileOptions& options, bool reopen,
+                                    std::unique_ptr<FSWritableFile>* result,
+                                    IODebugContext* /*dbg*/) {
+    result->reset();
+    IOStatus s;
+    int fd = -1;
+    int flags = (reopen) ? (O_CREAT | O_APPEND) : (O_CREAT | O_TRUNC);
+    // Direct IO mode with O_DIRECT flag or F_NOCAHCE (MAC OSX)
+    if (options.use_direct_writes && !options.use_mmap_writes) {
+      // Note: we should avoid O_APPEND here due to ta the following bug:
+      // POSIX requires that opening a file with the O_APPEND flag should
+      // have no affect on the location at which pwrite() writes data.
+      // However, on Linux, if a file is opened with O_APPEND, pwrite()
+      // appends data to the end of the file, regardless of the value of
+      // offset.
+      // More info here: https://linux.die.net/man/2/pwrite
+      flags |= O_WRONLY;
+#if !defined(OS_MACOSX) && !defined(OS_OPENBSD) && !defined(OS_SOLARIS)
+      flags |= O_DIRECT;
+#endif
+      TEST_SYNC_POINT_CALLBACK("NewWritableFile:O_DIRECT", &flags);
+    } else if (options.use_mmap_writes) {
+      // non-direct I/O
+      flags |= O_RDWR;
+    } else {
+      flags |= O_WRONLY;
+    }
+
+    flags = cloexec_flags(flags, &options);
+
+    do {
+      IOSTATS_TIMER_GUARD(open_nanos);
+      fd = open(fname.c_str(), flags, GetDBFileMode(allow_non_owner_access_));
+    } while (fd < 0 && errno == EINTR);
+
+    if (fd < 0) {
+      s = IOError("While open a file for appending", fname, errno);
+      return s;
+    }
+    SetFD_CLOEXEC(fd, &options);
+
+    if (options.use_mmap_writes) {
+      MaybeForceDisableMmap(fd);
+    }
+    if (options.use_mmap_writes && !forceMmapOff_) {
+      result->reset(new PosixMmapFile(fname, fd, page_size_, options));
+    } else if (options.use_direct_writes && !options.use_mmap_writes) {
+#ifdef OS_MACOSX
+      if (fcntl(fd, F_NOCACHE, 1) == -1) {
+        close(fd);
+        s = IOError("While fcntl NoCache an opened file for appending", fname,
+                    errno);
+        return s;
+      }
+#elif defined(OS_SOLARIS)
+      if (directio(fd, DIRECTIO_ON) == -1) {
+        if (errno != ENOTTY) {  // ZFS filesystems don't support DIRECTIO_ON
+          close(fd);
+          s = IOError("While calling directio()", fname, errno);
+          return s;
+        }
+      }
+#endif
+      result->reset(new PosixWritableFile(
+          fname, fd, GetLogicalBlockSizeForWriteIfNeeded(options, fname, fd),
+          options));
+    } else {
+      // disable mmap writes
+      EnvOptions no_mmap_writes_options = options;
+      no_mmap_writes_options.use_mmap_writes = false;
+      result->reset(
+          new PosixWritableFile(fname, fd,
+                                GetLogicalBlockSizeForWriteIfNeeded(
+                                    no_mmap_writes_options, fname, fd),
+                                no_mmap_writes_options));
+    }
+    return s;
+  }
+
+  IOStatus NewWritableFile(const std::string& fname, const FileOptions& options,
+                           std::unique_ptr<FSWritableFile>* result,
+                           IODebugContext* dbg) override {
+    return OpenWritableFile(fname, options, false, result, dbg);
+  }
+
+  IOStatus ReopenWritableFile(const std::string& fname,
+                              const FileOptions& options,
+                              std::unique_ptr<FSWritableFile>* result,
+                              IODebugContext* dbg) override {
+    return OpenWritableFile(fname, options, true, result, dbg);
+  }
+
+  IOStatus ReuseWritableFile(const std::string& fname,
+                             const std::string& old_fname,
+                             const FileOptions& options,
+                             std::unique_ptr<FSWritableFile>* result,
+                             IODebugContext* /*dbg*/) override {
+    result->reset();
+    IOStatus s;
+    int fd = -1;
+
+    int flags = 0;
+    // Direct IO mode with O_DIRECT flag or F_NOCAHCE (MAC OSX)
+    if (options.use_direct_writes && !options.use_mmap_writes) {
+      flags |= O_WRONLY;
+#if !defined(OS_MACOSX) && !defined(OS_OPENBSD) && !defined(OS_SOLARIS)
+      flags |= O_DIRECT;
+#endif
+      TEST_SYNC_POINT_CALLBACK("NewWritableFile:O_DIRECT", &flags);
+    } else if (options.use_mmap_writes) {
+      // mmap needs O_RDWR mode
+      flags |= O_RDWR;
+    } else {
+      flags |= O_WRONLY;
+    }
+
+    flags = cloexec_flags(flags, &options);
+
+    do {
+      IOSTATS_TIMER_GUARD(open_nanos);
+      fd = open(old_fname.c_str(), flags,
+                GetDBFileMode(allow_non_owner_access_));
+    } while (fd < 0 && errno == EINTR);
+    if (fd < 0) {
+      s = IOError("while reopen file for write", fname, errno);
+      return s;
+    }
+
+    SetFD_CLOEXEC(fd, &options);
+    // rename into place
+    if (rename(old_fname.c_str(), fname.c_str()) != 0) {
+      s = IOError("while rename file to " + fname, old_fname, errno);
+      close(fd);
+      return s;
+    }
+
+    if (options.use_mmap_writes) {
+      MaybeForceDisableMmap(fd);
+    }
+    if (options.use_mmap_writes && !forceMmapOff_) {
+      result->reset(new PosixMmapFile(fname, fd, page_size_, options));
+    } else if (options.use_direct_writes && !options.use_mmap_writes) {
+#ifdef OS_MACOSX
+      if (fcntl(fd, F_NOCACHE, 1) == -1) {
+        close(fd);
+        s = IOError("while fcntl NoCache for reopened file for append", fname,
+                    errno);
+        return s;
+      }
+#elif defined(OS_SOLARIS)
+      if (directio(fd, DIRECTIO_ON) == -1) {
+        if (errno != ENOTTY) {  // ZFS filesystems don't support DIRECTIO_ON
+          close(fd);
+          s = IOError("while calling directio()", fname, errno);
+          return s;
+        }
+      }
+#endif
+      result->reset(new PosixWritableFile(
+          fname, fd, GetLogicalBlockSizeForWriteIfNeeded(options, fname, fd),
+          options));
+    } else {
+      // disable mmap writes
+      FileOptions no_mmap_writes_options = options;
+      no_mmap_writes_options.use_mmap_writes = false;
+      result->reset(
+          new PosixWritableFile(fname, fd,
+                                GetLogicalBlockSizeForWriteIfNeeded(
+                                    no_mmap_writes_options, fname, fd),
+                                no_mmap_writes_options));
+    }
+    return s;
+  }
+
+  IOStatus NewRandomRWFile(const std::string& fname, const FileOptions& options,
+                           std::unique_ptr<FSRandomRWFile>* result,
+                           IODebugContext* /*dbg*/) override {
+    int fd = -1;
+    int flags = cloexec_flags(O_RDWR, &options);
+
+    while (fd < 0) {
+      IOSTATS_TIMER_GUARD(open_nanos);
+
+      fd = open(fname.c_str(), flags, GetDBFileMode(allow_non_owner_access_));
+      if (fd < 0) {
+        // Error while opening the file
+        if (errno == EINTR) {
+          continue;
+        }
+        return IOError("While open file for random read/write", fname, errno);
+      }
+    }
+
+    SetFD_CLOEXEC(fd, &options);
+    result->reset(new PosixRandomRWFile(fname, fd, options));
+    return IOStatus::OK();
+  }
+
+  IOStatus NewMemoryMappedFileBuffer(
+      const std::string& fname,
+      std::unique_ptr<MemoryMappedFileBuffer>* result) override {
+    int fd = -1;
+    IOStatus status;
+    int flags = cloexec_flags(O_RDWR, nullptr);
+
+    while (fd < 0) {
+      IOSTATS_TIMER_GUARD(open_nanos);
+      fd = open(fname.c_str(), flags, 0644);
+      if (fd < 0) {
+        // Error while opening the file
+        if (errno == EINTR) {
+          continue;
+        }
+        status =
+            IOError("While open file for raw mmap buffer access", fname, errno);
+        break;
+      }
+    }
+    uint64_t size;
+    if (status.ok()) {
+      IOOptions opts;
+      status = GetFileSize(fname, opts, &size, nullptr);
+    }
+    void* base = nullptr;
+    if (status.ok()) {
+      base = mmap(nullptr, static_cast<size_t>(size), PROT_READ | PROT_WRITE,
+                  MAP_SHARED, fd, 0);
+      if (base == MAP_FAILED) {
+        status = IOError("while mmap file for read", fname, errno);
+      }
+    }
+    if (status.ok()) {
+      result->reset(
+          new PosixMemoryMappedFileBuffer(base, static_cast<size_t>(size)));
+    }
+    if (fd >= 0) {
+      // don't need to keep it open after mmap has been called
+      close(fd);
+    }
+    return status;
+  }
+
+  IOStatus NewDirectory(const std::string& name, const IOOptions& /*opts*/,
+                        std::unique_ptr<FSDirectory>* result,
+                        IODebugContext* /*dbg*/) override {
+    result->reset();
+    int fd;
+    int flags = cloexec_flags(0, nullptr);
+    {
+      IOSTATS_TIMER_GUARD(open_nanos);
+      fd = open(name.c_str(), flags);
+    }
+    if (fd < 0) {
+      return IOError("While open directory", name, errno);
+    } else {
+      result->reset(new PosixDirectory(fd, name));
+    }
+    return IOStatus::OK();
+  }
+
+  IOStatus FileExists(const std::string& fname, const IOOptions& /*opts*/,
+                      IODebugContext* /*dbg*/) override {
+    int result = access(fname.c_str(), F_OK);
+
+    if (result == 0) {
+      return IOStatus::OK();
+    }
+
+    int err = errno;
+    switch (err) {
+      case EACCES:
+      case ELOOP:
+      case ENAMETOOLONG:
+      case ENOENT:
+      case ENOTDIR:
+        return IOStatus::NotFound();
+      default:
+        assert(err == EIO || err == ENOMEM);
+        return IOStatus::IOError("Unexpected error(" + std::to_string(err) +
+                                 ") accessing file `" + fname + "' ");
+    }
+  }
+
+  IOStatus GetChildren(const std::string& dir, const IOOptions& opts,
+                       std::vector<std::string>* result,
+                       IODebugContext* /*dbg*/) override {
+    result->clear();
+
+    DIR* d = opendir(dir.c_str());
+    if (d == nullptr) {
+      switch (errno) {
+        case EACCES:
+        case ENOENT:
+        case ENOTDIR:
+          return IOStatus::NotFound();
+        default:
+          return IOError("While opendir", dir, errno);
+      }
+    }
+
+    // reset errno before calling readdir()
+    errno = 0;
+    struct dirent* entry;
+
+    while ((entry = readdir(d)) != nullptr) {
+      // filter out '.' and '..' directory entries
+      // which appear only on some platforms
+      const bool ignore =
+          entry->d_type == DT_DIR &&
+          (strcmp(entry->d_name, ".") == 0 ||
+           strcmp(entry->d_name, "..") == 0
+#ifndef ASSERT_STATUS_CHECKED
+           // In case of ASSERT_STATUS_CHECKED, GetChildren support older
+           // version of API for debugging purpose.
+           || opts.do_not_recurse
+#endif
+          );
+      if (!ignore) {
+        result->push_back(entry->d_name);
+      }
+      errno = 0;  // reset errno if readdir() success
+    }
+
+    // always attempt to close the dir
+    const auto pre_close_errno = errno;  // errno may be modified by closedir
+    const int close_result = closedir(d);
+
+    if (pre_close_errno != 0) {
+      // error occurred during readdir
+      return IOError("While readdir", dir, pre_close_errno);
+    }
+
+    if (close_result != 0) {
+      // error occurred during closedir
+      return IOError("While closedir", dir, errno);
+    }
+
+    return IOStatus::OK();
+  }
+
+  IOStatus DeleteFile(const std::string& fname, const IOOptions& /*opts*/,
+                      IODebugContext* /*dbg*/) override {
+    IOStatus result;
+    if (unlink(fname.c_str()) != 0) {
+      result = IOError("while unlink() file", fname, errno);
+    }
+    return result;
+  }
+
+  IOStatus CreateDir(const std::string& name, const IOOptions& /*opts*/,
+                     IODebugContext* /*dbg*/) override {
+    if (mkdir(name.c_str(), 0755) != 0) {
+      return IOError("While mkdir", name, errno);
+    }
+    return IOStatus::OK();
+  }
+
+  IOStatus CreateDirIfMissing(const std::string& name,
+                              const IOOptions& /*opts*/,
+                              IODebugContext* /*dbg*/) override {
+    if (mkdir(name.c_str(), 0755) != 0) {
+      if (errno != EEXIST) {
+        return IOError("While mkdir if missing", name, errno);
+      } else if (!DirExists(name)) {  // Check that name is actually a
+                                      // directory.
+        // Message is taken from mkdir
+        return IOStatus::IOError("`" + name +
+                                 "' exists but is not a directory");
+      }
+    }
+    return IOStatus::OK();
+  }
+
+  IOStatus DeleteDir(const std::string& name, const IOOptions& /*opts*/,
+                     IODebugContext* /*dbg*/) override {
+    if (rmdir(name.c_str()) != 0) {
+      return IOError("file rmdir", name, errno);
+    }
+    return IOStatus::OK();
+  }
+
+  IOStatus GetFileSize(const std::string& fname, const IOOptions& /*opts*/,
+                       uint64_t* size, IODebugContext* /*dbg*/) override {
+    struct stat sbuf;
+    if (stat(fname.c_str(), &sbuf) != 0) {
+      *size = 0;
+      return IOError("while stat a file for size", fname, errno);
+    } else {
+      *size = sbuf.st_size;
+    }
+    return IOStatus::OK();
+  }
+
+  IOStatus GetFileModificationTime(const std::string& fname,
+                                   const IOOptions& /*opts*/,
+                                   uint64_t* file_mtime,
+                                   IODebugContext* /*dbg*/) override {
+    struct stat s;
+    if (stat(fname.c_str(), &s) != 0) {
+      return IOError("while stat a file for modification time", fname, errno);
+    }
+    *file_mtime = static_cast<uint64_t>(s.st_mtime);
+    return IOStatus::OK();
+  }
+
+  IOStatus RenameFile(const std::string& src, const std::string& target,
+                      const IOOptions& /*opts*/,
+                      IODebugContext* /*dbg*/) override {
+    if (rename(src.c_str(), target.c_str()) != 0) {
+      return IOError("While renaming a file to " + target, src, errno);
+    }
+    return IOStatus::OK();
+  }
+
+  IOStatus LinkFile(const std::string& src, const std::string& target,
+                    const IOOptions& /*opts*/,
+                    IODebugContext* /*dbg*/) override {
+    if (link(src.c_str(), target.c_str()) != 0) {
+      if (errno == EXDEV || errno == ENOTSUP) {
+        return IOStatus::NotSupported(errno == EXDEV
+                                          ? "No cross FS links allowed"
+                                          : "Links not supported by FS");
+      }
+      return IOError("while link file to " + target, src, errno);
+    }
+    return IOStatus::OK();
+  }
+
+  IOStatus NumFileLinks(const std::string& fname, const IOOptions& /*opts*/,
+                        uint64_t* count, IODebugContext* /*dbg*/) override {
+    struct stat s;
+    if (stat(fname.c_str(), &s) != 0) {
+      return IOError("while stat a file for num file links", fname, errno);
+    }
+    *count = static_cast<uint64_t>(s.st_nlink);
+    return IOStatus::OK();
+  }
+
+  IOStatus AreFilesSame(const std::string& first, const std::string& second,
+                        const IOOptions& /*opts*/, bool* res,
+                        IODebugContext* /*dbg*/) override {
+    struct stat statbuf[2];
+    if (stat(first.c_str(), &statbuf[0]) != 0) {
+      return IOError("stat file", first, errno);
+    }
+    if (stat(second.c_str(), &statbuf[1]) != 0) {
+      return IOError("stat file", second, errno);
+    }
+
+    if (major(statbuf[0].st_dev) != major(statbuf[1].st_dev) ||
+        minor(statbuf[0].st_dev) != minor(statbuf[1].st_dev) ||
+        statbuf[0].st_ino != statbuf[1].st_ino) {
+      *res = false;
+    } else {
+      *res = true;
+    }
+    return IOStatus::OK();
+  }
+
+  IOStatus LockFile(const std::string& fname, const IOOptions& /*opts*/,
+                    FileLock** lock, IODebugContext* /*dbg*/) override {
+    *lock = nullptr;
+
+    LockHoldingInfo lhi;
+    int64_t current_time = 0;
+    // Ignore status code as the time is only used for error message.
+    SystemClock::Default()
+        ->GetCurrentTime(&current_time)
+        .PermitUncheckedError();
+    lhi.acquire_time = current_time;
+    lhi.acquiring_thread = Env::Default()->GetThreadID();
+
+    mutex_locked_files.Lock();
+    // If it already exists in the locked_files set, then it is already locked,
+    // and fail this lock attempt. Otherwise, insert it into locked_files.
+    // This check is needed because fcntl() does not detect lock conflict
+    // if the fcntl is issued by the same thread that earlier acquired
+    // this lock.
+    // We must do this check *before* opening the file:
+    // Otherwise, we will open a new file descriptor. Locks are associated with
+    // a process, not a file descriptor and when *any* file descriptor is
+    // closed, all locks the process holds for that *file* are released
+    const auto it_success = locked_files.insert({fname, lhi});
+    if (it_success.second == false) {
+      LockHoldingInfo prev_info = it_success.first->second;
+      mutex_locked_files.Unlock();
+      errno = ENOLCK;
+      // Note that the thread ID printed is the same one as the one in
+      // posix logger, but posix logger prints it hex format.
+      return IOError("lock hold by current process, acquire time " +
+                         std::to_string(prev_info.acquire_time) +
+                         " acquiring thread " +
+                         std::to_string(prev_info.acquiring_thread),
+                     fname, errno);
+    }
+
+    IOStatus result = IOStatus::OK();
+    int fd;
+    int flags = cloexec_flags(O_RDWR | O_CREAT, nullptr);
+
+    {
+      IOSTATS_TIMER_GUARD(open_nanos);
+      fd = open(fname.c_str(), flags, 0644);
+    }
+    if (fd < 0) {
+      result = IOError("while open a file for lock", fname, errno);
+    } else if (LockOrUnlock(fd, true) == -1) {
+      result = IOError("While lock file", fname, errno);
+      close(fd);
+    } else {
+      SetFD_CLOEXEC(fd, nullptr);
+      PosixFileLock* my_lock = new PosixFileLock;
+      my_lock->fd_ = fd;
+      my_lock->filename = fname;
+      *lock = my_lock;
+    }
+    if (!result.ok()) {
+      // If there is an error in locking, then remove the pathname from
+      // locked_files. (If we got this far, it did not exist in locked_files
+      // before this call.)
+      locked_files.erase(fname);
+    }
+
+    mutex_locked_files.Unlock();
+    return result;
+  }
+
+  IOStatus UnlockFile(FileLock* lock, const IOOptions& /*opts*/,
+                      IODebugContext* /*dbg*/) override {
+    PosixFileLock* my_lock = reinterpret_cast<PosixFileLock*>(lock);
+    IOStatus result;
+    mutex_locked_files.Lock();
+    // If we are unlocking, then verify that we had locked it earlier,
+    // it should already exist in locked_files. Remove it from locked_files.
+    if (locked_files.erase(my_lock->filename) != 1) {
+      errno = ENOLCK;
+      result = IOError("unlock", my_lock->filename, errno);
+    } else if (LockOrUnlock(my_lock->fd_, false) == -1) {
+      result = IOError("unlock", my_lock->filename, errno);
+    }
+    close(my_lock->fd_);
+    my_lock->Clear();
+    delete my_lock;
+    mutex_locked_files.Unlock();
+    return result;
+  }
+
+  IOStatus GetAbsolutePath(const std::string& db_path,
+                           const IOOptions& /*opts*/, std::string* output_path,
+                           IODebugContext* /*dbg*/) override {
+    if (!db_path.empty() && db_path[0] == '/') {
+      *output_path = db_path;
+      return IOStatus::OK();
+    }
+
+    char the_path[4096];
+    char* ret = getcwd(the_path, 4096);
+    if (ret == nullptr) {
+      return IOStatus::IOError(errnoStr(errno).c_str());
+    }
+
+    *output_path = ret;
+    return IOStatus::OK();
+  }
+
+  IOStatus GetTestDirectory(const IOOptions& /*opts*/, std::string* result,
+                            IODebugContext* /*dbg*/) override {
+    const char* env = getenv("TEST_TMPDIR");
+    if (env && env[0] != '\0') {
+      *result = env;
+    } else {
+      char buf[100];
+      snprintf(buf, sizeof(buf), "/tmp/rocksdbtest-%d", int(geteuid()));
+      *result = buf;
+    }
+    // Directory may already exist
+    {
+      IOOptions opts;
+      return CreateDirIfMissing(*result, opts, nullptr);
+    }
+    return IOStatus::OK();
+  }
+
+  IOStatus GetFreeSpace(const std::string& fname, const IOOptions& /*opts*/,
+                        uint64_t* free_space,
+                        IODebugContext* /*dbg*/) override {
+    struct statvfs sbuf;
+
+    if (statvfs(fname.c_str(), &sbuf) < 0) {
+      return IOError("While doing statvfs", fname, errno);
+    }
+
+    // sbuf.bfree is total free space available to root
+    // sbuf.bavail is total free space available to unprivileged user
+    //  sbuf.bavail <= sbuf.bfree ... pick correct based upon effective user id
+    if (geteuid()) {
+      // non-zero user is unprivileged, or -1 if error.  take more conservative
+      // size
+      *free_space = ((uint64_t)sbuf.f_bsize * sbuf.f_bavail);
+    } else {
+      // root user can access all disk space
+      *free_space = ((uint64_t)sbuf.f_bsize * sbuf.f_bfree);
+    }
+    return IOStatus::OK();
+  }
+
+  IOStatus IsDirectory(const std::string& path, const IOOptions& /*opts*/,
+                       bool* is_dir, IODebugContext* /*dbg*/) override {
+    // First open
+    int fd = -1;
+    int flags = cloexec_flags(O_RDONLY, nullptr);
+    {
+      IOSTATS_TIMER_GUARD(open_nanos);
+      fd = open(path.c_str(), flags);
+    }
+    if (fd < 0) {
+      return IOError("While open for IsDirectory()", path, errno);
+    }
+    IOStatus io_s;
+    struct stat sbuf;
+    if (fstat(fd, &sbuf) < 0) {
+      io_s = IOError("While doing stat for IsDirectory()", path, errno);
+    }
+    close(fd);
+    if (io_s.ok() && nullptr != is_dir) {
+      *is_dir = S_ISDIR(sbuf.st_mode);
+    }
+    return io_s;
+  }
+
+  FileOptions OptimizeForLogWrite(const FileOptions& file_options,
+                                  const DBOptions& db_options) const override {
+    FileOptions optimized = file_options;
+    optimized.use_mmap_writes = false;
+    optimized.use_direct_writes = false;
+    optimized.bytes_per_sync = db_options.wal_bytes_per_sync;
+    // TODO(icanadi) it's faster if fallocate_with_keep_size is false, but it
+    // breaks TransactionLogIteratorStallAtLastRecord unit test. Fix the unit
+    // test and make this false
+    optimized.fallocate_with_keep_size = true;
+    optimized.writable_file_max_buffer_size =
+        db_options.writable_file_max_buffer_size;
+    return optimized;
+  }
+
+  FileOptions OptimizeForManifestWrite(
+      const FileOptions& file_options) const override {
+    FileOptions optimized = file_options;
+    optimized.use_mmap_writes = false;
+    optimized.use_direct_writes = false;
+    optimized.fallocate_with_keep_size = true;
+    return optimized;
+  }
+#ifdef OS_LINUX
+  Status RegisterDbPaths(const std::vector<std::string>& paths) override {
+    return logical_block_size_cache_.RefAndCacheLogicalBlockSize(paths);
+  }
+  Status UnregisterDbPaths(const std::vector<std::string>& paths) override {
+    logical_block_size_cache_.UnrefAndTryRemoveCachedLogicalBlockSize(paths);
+    return Status::OK();
+  }
+#endif
+ private:
+  bool forceMmapOff_ = false;  // do we override Env options?
+
+  // Returns true iff the named directory exists and is a directory.
+  virtual bool DirExists(const std::string& dname) {
+    struct stat statbuf;
+    if (stat(dname.c_str(), &statbuf) == 0) {
+      return S_ISDIR(statbuf.st_mode);
+    }
+    return false;  // stat() failed return false
+  }
+
+  bool SupportsFastAllocate(int fd) {
+#ifdef ROCKSDB_FALLOCATE_PRESENT
+    struct statfs s;
+    if (fstatfs(fd, &s)) {
+      return false;
+    }
+    switch (s.f_type) {
+      case EXT4_SUPER_MAGIC:
+        return true;
+      case XFS_SUPER_MAGIC:
+        return true;
+      case TMPFS_MAGIC:
+        return true;
+      default:
+        return false;
+    }
+#else
+    (void)fd;
+    return false;
+#endif
+  }
+
+  void MaybeForceDisableMmap(int fd) {
+    static std::once_flag s_check_disk_for_mmap_once;
+    assert(this == FileSystem::Default().get());
+    std::call_once(
+        s_check_disk_for_mmap_once,
+        [this](int fdesc) {
+          // this will be executed once in the program's lifetime.
+          // do not use mmapWrite on non ext-3/xfs/tmpfs systems.
+          if (!SupportsFastAllocate(fdesc)) {
+            forceMmapOff_ = true;
+          }
+        },
+        fd);
+  }
+
+#ifdef ROCKSDB_IOURING_PRESENT
+  bool IsIOUringEnabled() {
+    if (RocksDbIOUringEnable && RocksDbIOUringEnable()) {
+      return true;
+    } else {
+      return false;
+    }
+  }
+#endif  // ROCKSDB_IOURING_PRESENT
+
+  // EXPERIMENTAL
+  //
+  // TODO akankshamahajan:
+  // 1. Update Poll API to take into account min_completions
+  // and returns if number of handles in io_handles (any order) completed is
+  // equal to atleast min_completions.
+  // 2. Currently in case of direct_io, Read API is called because of which call
+  // to Poll API fails as it expects IOHandle to be populated.
+  virtual IOStatus Poll(std::vector<void*>& io_handles,
+                        size_t /*min_completions*/) override {
+#if defined(ROCKSDB_IOURING_PRESENT)
+    // io_uring_queue_init.
+    struct io_uring* iu = nullptr;
+    if (thread_local_io_urings_) {
+      iu = static_cast<struct io_uring*>(thread_local_io_urings_->Get());
+    }
+
+    // Init failed, platform doesn't support io_uring.
+    if (iu == nullptr) {
+      return IOStatus::NotSupported("Poll");
+    }
+
+    for (size_t i = 0; i < io_handles.size(); i++) {
+      // The request has been completed in earlier runs.
+      if ((static_cast<Posix_IOHandle*>(io_handles[i]))->is_finished) {
+        continue;
+      }
+      // Loop until IO for io_handles[i] is completed.
+      while (true) {
+        // io_uring_wait_cqe.
+        struct io_uring_cqe* cqe = nullptr;
+        ssize_t ret = io_uring_wait_cqe(iu, &cqe);
+        if (ret) {
+          // abort as it shouldn't be in indeterminate state and there is no
+          // good way currently to handle this error.
+          abort();
+        }
+
+        // Step 3: Populate the request.
+        assert(cqe != nullptr);
+        Posix_IOHandle* posix_handle =
+            static_cast<Posix_IOHandle*>(io_uring_cqe_get_data(cqe));
+        assert(posix_handle->iu == iu);
+        if (posix_handle->iu != iu) {
+          return IOStatus::IOError("");
+        }
+        // Reset cqe data to catch any stray reuse of it
+        static_cast<struct io_uring_cqe*>(cqe)->user_data = 0xd5d5d5d5d5d5d5d5;
+
+        FSReadRequest req;
+        req.scratch = posix_handle->scratch;
+        req.offset = posix_handle->offset;
+        req.len = posix_handle->len;
+
+        size_t finished_len = 0;
+        size_t bytes_read = 0;
+        bool read_again = false;
+        UpdateResult(cqe, "", req.len, posix_handle->iov.iov_len,
+                     true /*async_read*/, posix_handle->use_direct_io,
+                     posix_handle->alignment, finished_len, &req, bytes_read,
+                     read_again);
+        posix_handle->is_finished = true;
+        io_uring_cqe_seen(iu, cqe);
+        posix_handle->cb(req, posix_handle->cb_arg);
+
+        (void)finished_len;
+        (void)bytes_read;
+        (void)read_again;
+
+        if (static_cast<Posix_IOHandle*>(io_handles[i]) == posix_handle) {
+          break;
+        }
+      }
+    }
+    return IOStatus::OK();
+#else
+    (void)io_handles;
+    return IOStatus::NotSupported("Poll");
+#endif
+  }
+
+  virtual IOStatus AbortIO(std::vector<void*>& io_handles) override {
+#if defined(ROCKSDB_IOURING_PRESENT)
+    // io_uring_queue_init.
+    struct io_uring* iu = nullptr;
+    if (thread_local_io_urings_) {
+      iu = static_cast<struct io_uring*>(thread_local_io_urings_->Get());
+    }
+
+    // Init failed, platform doesn't support io_uring.
+    // If Poll is not supported then it didn't submit any request and it should
+    // return OK.
+    if (iu == nullptr) {
+      return IOStatus::OK();
+    }
+
+    for (size_t i = 0; i < io_handles.size(); i++) {
+      Posix_IOHandle* posix_handle =
+          static_cast<Posix_IOHandle*>(io_handles[i]);
+      if (posix_handle->is_finished == true) {
+        continue;
+      }
+      assert(posix_handle->iu == iu);
+      if (posix_handle->iu != iu) {
+        return IOStatus::IOError("");
+      }
+
+      // Prepare the cancel request.
+      struct io_uring_sqe* sqe;
+      sqe = io_uring_get_sqe(iu);
+
+      // In order to cancel the request, sqe->addr of cancel request should
+      // match with the read request submitted which is posix_handle->iov.
+      io_uring_prep_cancel(sqe, &posix_handle->iov, 0);
+      // Sets sqe->user_data to posix_handle.
+      io_uring_sqe_set_data(sqe, posix_handle);
+
+      // submit the request.
+      ssize_t ret = io_uring_submit(iu);
+      if (ret < 0) {
+        fprintf(stderr, "io_uring_submit error: %ld\n", long(ret));
+        return IOStatus::IOError("io_uring_submit() requested but returned " +
+                                 std::to_string(ret));
+      }
+    }
+
+    // After submitting the requests, wait for the requests.
+    for (size_t i = 0; i < io_handles.size(); i++) {
+      if ((static_cast<Posix_IOHandle*>(io_handles[i]))->is_finished) {
+        continue;
+      }
+
+      while (true) {
+        struct io_uring_cqe* cqe = nullptr;
+        ssize_t ret = io_uring_wait_cqe(iu, &cqe);
+        if (ret) {
+          // abort as it shouldn't be in indeterminate state and there is no
+          // good way currently to handle this error.
+          abort();
+        }
+        assert(cqe != nullptr);
+
+        // Returns cqe->user_data.
+        Posix_IOHandle* posix_handle =
+            static_cast<Posix_IOHandle*>(io_uring_cqe_get_data(cqe));
+        assert(posix_handle->iu == iu);
+        if (posix_handle->iu != iu) {
+          return IOStatus::IOError("");
+        }
+        posix_handle->req_count++;
+
+        // Reset cqe data to catch any stray reuse of it
+        static_cast<struct io_uring_cqe*>(cqe)->user_data = 0xd5d5d5d5d5d5d5d5;
+        io_uring_cqe_seen(iu, cqe);
+
+        // - If the request is cancelled successfully, the original request is
+        //   completed with -ECANCELED and the cancel request is completed with
+        //   a result of 0.
+        // - If the request was already running, the original may or
+        //   may not complete in error. The cancel request will complete with
+        //  -EALREADY for that case.
+        // - And finally, if the request to cancel wasn't
+        //   found, the cancel request is completed with -ENOENT.
+        //
+        // Every handle has to wait for 2 requests completion: original one and
+        // the cancel request which is tracked by PosixHandle::req_count.
+        if (posix_handle->req_count == 2 &&
+            static_cast<Posix_IOHandle*>(io_handles[i]) == posix_handle) {
+          posix_handle->is_finished = true;
+          FSReadRequest req;
+          req.status = IOStatus::Aborted();
+          posix_handle->cb(req, posix_handle->cb_arg);
+
+          break;
+        }
+      }
+    }
+    return IOStatus::OK();
+#else
+    // If Poll is not supported then it didn't submit any request and it should
+    // return OK.
+    (void)io_handles;
+    return IOStatus::OK();
+#endif
+  }
+
+  bool use_async_io() override {
+#if defined(ROCKSDB_IOURING_PRESENT)
+    return IsIOUringEnabled();
+#else
+    return false;
+#endif
+  }
+
+#if defined(ROCKSDB_IOURING_PRESENT)
+  // io_uring instance
+  std::unique_ptr<ThreadLocalPtr> thread_local_io_urings_;
+#endif
+
+  size_t page_size_;
+
+  // If true, allow non owner read access for db files. Otherwise, non-owner
+  //  has no access to db files.
+  bool allow_non_owner_access_;
+
+#ifdef OS_LINUX
+  static LogicalBlockSizeCache logical_block_size_cache_;
+#endif
+  static size_t GetLogicalBlockSize(const std::string& fname, int fd);
+  // In non-direct IO mode, this directly returns kDefaultPageSize.
+  // Otherwise call GetLogicalBlockSize.
+  static size_t GetLogicalBlockSizeForReadIfNeeded(const EnvOptions& options,
+                                                   const std::string& fname,
+                                                   int fd);
+  static size_t GetLogicalBlockSizeForWriteIfNeeded(const EnvOptions& options,
+                                                    const std::string& fname,
+                                                    int fd);
+};
+
+#ifdef OS_LINUX
+LogicalBlockSizeCache PosixFileSystem::logical_block_size_cache_;
+#endif
+
+size_t PosixFileSystem::GetLogicalBlockSize(const std::string& fname, int fd) {
+#ifdef OS_LINUX
+  return logical_block_size_cache_.GetLogicalBlockSize(fname, fd);
+#else
+  (void)fname;
+  return PosixHelper::GetLogicalBlockSizeOfFd(fd);
+#endif
+}
+
+size_t PosixFileSystem::GetLogicalBlockSizeForReadIfNeeded(
+    const EnvOptions& options, const std::string& fname, int fd) {
+  return options.use_direct_reads
+             ? PosixFileSystem::GetLogicalBlockSize(fname, fd)
+             : kDefaultPageSize;
+}
+
+size_t PosixFileSystem::GetLogicalBlockSizeForWriteIfNeeded(
+    const EnvOptions& options, const std::string& fname, int fd) {
+  return options.use_direct_writes
+             ? PosixFileSystem::GetLogicalBlockSize(fname, fd)
+             : kDefaultPageSize;
+}
+
+PosixFileSystem::PosixFileSystem()
+    : forceMmapOff_(false),
+      page_size_(getpagesize()),
+      allow_non_owner_access_(true) {
+#if defined(ROCKSDB_IOURING_PRESENT)
+  // Test whether IOUring is supported, and if it does, create a managing
+  // object for thread local point so that in the future thread-local
+  // io_uring can be created.
+  struct io_uring* new_io_uring = CreateIOUring();
+  if (new_io_uring != nullptr) {
+    thread_local_io_urings_.reset(new ThreadLocalPtr(DeleteIOUring));
+    delete new_io_uring;
+  }
+#endif
+}
+
+}  // namespace
+
+//
+// Default Posix FileSystem
+//
+std::shared_ptr<FileSystem> FileSystem::Default() {
+  STATIC_AVOID_DESTRUCTION(std::shared_ptr<FileSystem>, instance)
+  (std::make_shared<PosixFileSystem>());
+  return instance;
+}
+
+static FactoryFunc<FileSystem> posix_filesystem_reg =
+    ObjectLibrary::Default()->AddFactory<FileSystem>(
+        ObjectLibrary::PatternEntry("posix").AddSeparator("://", false),
+        [](const std::string& /* uri */, std::unique_ptr<FileSystem>* f,
+           std::string* /* errmsg */) {
+          f->reset(new PosixFileSystem());
+          return f->get();
+        });
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_readonly.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_readonly.h
new file mode 100644
index 0000000000..7a04aea080
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_readonly.h
@@ -0,0 +1,105 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include "rocksdb/file_system.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A FileSystem wrapper that only allows read-only operation.
+//
+// This class has not been fully analyzed for providing strong security
+// guarantees.
+class ReadOnlyFileSystem : public FileSystemWrapper {
+  static inline IOStatus FailReadOnly() {
+    IOStatus s = IOStatus::IOError("Attempted write to ReadOnlyFileSystem");
+    assert(s.GetRetryable() == false);
+    return s;
+  }
+
+ public:
+  explicit ReadOnlyFileSystem(const std::shared_ptr<FileSystem>& base)
+      : FileSystemWrapper(base) {}
+
+  static const char* kClassName() { return "ReadOnlyFileSystem"; }
+  const char* Name() const override { return kClassName(); }
+
+  IOStatus NewWritableFile(const std::string& /*fname*/,
+                           const FileOptions& /*options*/,
+                           std::unique_ptr<FSWritableFile>* /*result*/,
+                           IODebugContext* /*dbg*/) override {
+    return FailReadOnly();
+  }
+  IOStatus ReuseWritableFile(const std::string& /*fname*/,
+                             const std::string& /*old_fname*/,
+                             const FileOptions& /*options*/,
+                             std::unique_ptr<FSWritableFile>* /*result*/,
+                             IODebugContext* /*dbg*/) override {
+    return FailReadOnly();
+  }
+  IOStatus NewRandomRWFile(const std::string& /*fname*/,
+                           const FileOptions& /*options*/,
+                           std::unique_ptr<FSRandomRWFile>* /*result*/,
+                           IODebugContext* /*dbg*/) override {
+    return FailReadOnly();
+  }
+  IOStatus NewDirectory(const std::string& /*dir*/,
+                        const IOOptions& /*options*/,
+                        std::unique_ptr<FSDirectory>* /*result*/,
+                        IODebugContext* /*dbg*/) override {
+    return FailReadOnly();
+  }
+  IOStatus DeleteFile(const std::string& /*fname*/,
+                      const IOOptions& /*options*/,
+                      IODebugContext* /*dbg*/) override {
+    return FailReadOnly();
+  }
+  IOStatus CreateDir(const std::string& /*dirname*/,
+                     const IOOptions& /*options*/,
+                     IODebugContext* /*dbg*/) override {
+    return FailReadOnly();
+  }
+  IOStatus CreateDirIfMissing(const std::string& dirname,
+                              const IOOptions& options,
+                              IODebugContext* dbg) override {
+    // Allow if dir already exists
+    bool is_dir = false;
+    IOStatus s = IsDirectory(dirname, options, &is_dir, dbg);
+    if (s.ok() && is_dir) {
+      return s;
+    } else {
+      return FailReadOnly();
+    }
+  }
+  IOStatus DeleteDir(const std::string& /*dirname*/,
+                     const IOOptions& /*options*/,
+                     IODebugContext* /*dbg*/) override {
+    return FailReadOnly();
+  }
+  IOStatus RenameFile(const std::string& /*src*/, const std::string& /*dest*/,
+                      const IOOptions& /*options*/,
+                      IODebugContext* /*dbg*/) override {
+    return FailReadOnly();
+  }
+  IOStatus LinkFile(const std::string& /*src*/, const std::string& /*dest*/,
+                    const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override {
+    return FailReadOnly();
+  }
+  IOStatus LockFile(const std::string& /*fname*/, const IOOptions& /*options*/,
+                    FileLock** /*lock*/, IODebugContext* /*dbg*/) override {
+    return FailReadOnly();
+  }
+  IOStatus NewLogger(const std::string& /*fname*/, const IOOptions& /*options*/,
+                     std::shared_ptr<Logger>* /*result*/,
+                     IODebugContext* /*dbg*/) override {
+    return FailReadOnly();
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_remap.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_remap.cc
new file mode 100644
index 0000000000..b9832e6cba
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_remap.cc
@@ -0,0 +1,341 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "env/fs_remap.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+RemapFileSystem::RemapFileSystem(const std::shared_ptr<FileSystem>& base)
+    : FileSystemWrapper(base) {}
+
+std::pair<IOStatus, std::string> RemapFileSystem::EncodePathWithNewBasename(
+    const std::string& path) {
+  // No difference by default
+  return EncodePath(path);
+}
+
+Status RemapFileSystem::RegisterDbPaths(const std::vector<std::string>& paths) {
+  std::vector<std::string> encoded_paths;
+  encoded_paths.reserve(paths.size());
+  for (auto& path : paths) {
+    auto status_and_enc_path = EncodePathWithNewBasename(path);
+    if (!status_and_enc_path.first.ok()) {
+      return status_and_enc_path.first;
+    }
+    encoded_paths.emplace_back(status_and_enc_path.second);
+  }
+  return FileSystemWrapper::RegisterDbPaths(encoded_paths);
+}
+
+Status RemapFileSystem::UnregisterDbPaths(
+    const std::vector<std::string>& paths) {
+  std::vector<std::string> encoded_paths;
+  encoded_paths.reserve(paths.size());
+  for (auto& path : paths) {
+    auto status_and_enc_path = EncodePathWithNewBasename(path);
+    if (!status_and_enc_path.first.ok()) {
+      return status_and_enc_path.first;
+    }
+    encoded_paths.emplace_back(status_and_enc_path.second);
+  }
+  return FileSystemWrapper::UnregisterDbPaths(encoded_paths);
+}
+
+IOStatus RemapFileSystem::NewSequentialFile(
+    const std::string& fname, const FileOptions& options,
+    std::unique_ptr<FSSequentialFile>* result, IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePathWithNewBasename(fname);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::NewSequentialFile(status_and_enc_path.second,
+                                              options, result, dbg);
+}
+
+IOStatus RemapFileSystem::NewRandomAccessFile(
+    const std::string& fname, const FileOptions& options,
+    std::unique_ptr<FSRandomAccessFile>* result, IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePathWithNewBasename(fname);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::NewRandomAccessFile(status_and_enc_path.second,
+                                                options, result, dbg);
+}
+
+IOStatus RemapFileSystem::NewWritableFile(
+    const std::string& fname, const FileOptions& options,
+    std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePathWithNewBasename(fname);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::NewWritableFile(status_and_enc_path.second, options,
+                                            result, dbg);
+}
+
+IOStatus RemapFileSystem::ReuseWritableFile(
+    const std::string& fname, const std::string& old_fname,
+    const FileOptions& options, std::unique_ptr<FSWritableFile>* result,
+    IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePathWithNewBasename(fname);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  auto status_and_old_enc_path = EncodePath(old_fname);
+  if (!status_and_old_enc_path.first.ok()) {
+    return status_and_old_enc_path.first;
+  }
+  return FileSystemWrapper::ReuseWritableFile(status_and_old_enc_path.second,
+                                              status_and_old_enc_path.second,
+                                              options, result, dbg);
+}
+
+IOStatus RemapFileSystem::NewRandomRWFile(
+    const std::string& fname, const FileOptions& options,
+    std::unique_ptr<FSRandomRWFile>* result, IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePathWithNewBasename(fname);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::NewRandomRWFile(status_and_enc_path.second, options,
+                                            result, dbg);
+}
+
+IOStatus RemapFileSystem::NewDirectory(const std::string& dir,
+                                       const IOOptions& options,
+                                       std::unique_ptr<FSDirectory>* result,
+                                       IODebugContext* dbg) {
+  // A hassle to remap DirFsyncOptions::renamed_new_name
+  class RemapFSDirectory : public FSDirectoryWrapper {
+   public:
+    RemapFSDirectory(RemapFileSystem* fs, std::unique_ptr<FSDirectory>&& t)
+        : FSDirectoryWrapper(std::move(t)), fs_(fs) {}
+    IOStatus FsyncWithDirOptions(
+        const IOOptions& options, IODebugContext* dbg,
+        const DirFsyncOptions& dir_fsync_options) override {
+      if (dir_fsync_options.renamed_new_name.empty()) {
+        return FSDirectoryWrapper::FsyncWithDirOptions(options, dbg,
+                                                       dir_fsync_options);
+      } else {
+        auto status_and_enc_path =
+            fs_->EncodePath(dir_fsync_options.renamed_new_name);
+        if (status_and_enc_path.first.ok()) {
+          DirFsyncOptions mapped_options = dir_fsync_options;
+          mapped_options.renamed_new_name = status_and_enc_path.second;
+          return FSDirectoryWrapper::FsyncWithDirOptions(options, dbg,
+                                                         mapped_options);
+        } else {
+          return status_and_enc_path.first;
+        }
+      }
+    }
+
+   private:
+    RemapFileSystem* const fs_;
+  };
+
+  auto status_and_enc_path = EncodePathWithNewBasename(dir);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  IOStatus ios = FileSystemWrapper::NewDirectory(status_and_enc_path.second,
+                                                 options, result, dbg);
+  if (ios.ok()) {
+    *result = std::make_unique<RemapFSDirectory>(this, std::move(*result));
+  }
+  return ios;
+}
+
+IOStatus RemapFileSystem::FileExists(const std::string& fname,
+                                     const IOOptions& options,
+                                     IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePathWithNewBasename(fname);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::FileExists(status_and_enc_path.second, options,
+                                       dbg);
+}
+
+IOStatus RemapFileSystem::GetChildren(const std::string& dir,
+                                      const IOOptions& options,
+                                      std::vector<std::string>* result,
+                                      IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePath(dir);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::GetChildren(status_and_enc_path.second, options,
+                                        result, dbg);
+}
+
+IOStatus RemapFileSystem::GetChildrenFileAttributes(
+    const std::string& dir, const IOOptions& options,
+    std::vector<FileAttributes>* result, IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePath(dir);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::GetChildrenFileAttributes(
+      status_and_enc_path.second, options, result, dbg);
+}
+
+IOStatus RemapFileSystem::DeleteFile(const std::string& fname,
+                                     const IOOptions& options,
+                                     IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePath(fname);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::DeleteFile(status_and_enc_path.second, options,
+                                       dbg);
+}
+
+IOStatus RemapFileSystem::CreateDir(const std::string& dirname,
+                                    const IOOptions& options,
+                                    IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePathWithNewBasename(dirname);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::CreateDir(status_and_enc_path.second, options, dbg);
+}
+
+IOStatus RemapFileSystem::CreateDirIfMissing(const std::string& dirname,
+                                             const IOOptions& options,
+                                             IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePathWithNewBasename(dirname);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::CreateDirIfMissing(status_and_enc_path.second,
+                                               options, dbg);
+}
+
+IOStatus RemapFileSystem::DeleteDir(const std::string& dirname,
+                                    const IOOptions& options,
+                                    IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePath(dirname);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::DeleteDir(status_and_enc_path.second, options, dbg);
+}
+
+IOStatus RemapFileSystem::GetFileSize(const std::string& fname,
+                                      const IOOptions& options,
+                                      uint64_t* file_size,
+                                      IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePath(fname);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::GetFileSize(status_and_enc_path.second, options,
+                                        file_size, dbg);
+}
+
+IOStatus RemapFileSystem::GetFileModificationTime(const std::string& fname,
+                                                  const IOOptions& options,
+                                                  uint64_t* file_mtime,
+                                                  IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePath(fname);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::GetFileModificationTime(status_and_enc_path.second,
+                                                    options, file_mtime, dbg);
+}
+
+IOStatus RemapFileSystem::IsDirectory(const std::string& path,
+                                      const IOOptions& options, bool* is_dir,
+                                      IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePath(path);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::IsDirectory(status_and_enc_path.second, options,
+                                        is_dir, dbg);
+}
+
+IOStatus RemapFileSystem::RenameFile(const std::string& src,
+                                     const std::string& dest,
+                                     const IOOptions& options,
+                                     IODebugContext* dbg) {
+  auto status_and_src_enc_path = EncodePath(src);
+  if (!status_and_src_enc_path.first.ok()) {
+    if (status_and_src_enc_path.first.IsNotFound()) {
+      const IOStatus& s = status_and_src_enc_path.first;
+      status_and_src_enc_path.first = IOStatus::PathNotFound(s.ToString());
+    }
+    return status_and_src_enc_path.first;
+  }
+  auto status_and_dest_enc_path = EncodePathWithNewBasename(dest);
+  if (!status_and_dest_enc_path.first.ok()) {
+    return status_and_dest_enc_path.first;
+  }
+  return FileSystemWrapper::RenameFile(status_and_src_enc_path.second,
+                                       status_and_dest_enc_path.second, options,
+                                       dbg);
+}
+
+IOStatus RemapFileSystem::LinkFile(const std::string& src,
+                                   const std::string& dest,
+                                   const IOOptions& options,
+                                   IODebugContext* dbg) {
+  auto status_and_src_enc_path = EncodePath(src);
+  if (!status_and_src_enc_path.first.ok()) {
+    return status_and_src_enc_path.first;
+  }
+  auto status_and_dest_enc_path = EncodePathWithNewBasename(dest);
+  if (!status_and_dest_enc_path.first.ok()) {
+    return status_and_dest_enc_path.first;
+  }
+  return FileSystemWrapper::LinkFile(status_and_src_enc_path.second,
+                                     status_and_dest_enc_path.second, options,
+                                     dbg);
+}
+
+IOStatus RemapFileSystem::LockFile(const std::string& fname,
+                                   const IOOptions& options, FileLock** lock,
+                                   IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePathWithNewBasename(fname);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  // FileLock subclasses may store path (e.g., PosixFileLock stores it). We
+  // can skip stripping the chroot directory from this path because callers
+  // shouldn't use it.
+  return FileSystemWrapper::LockFile(status_and_enc_path.second, options, lock,
+                                     dbg);
+}
+
+IOStatus RemapFileSystem::NewLogger(const std::string& fname,
+                                    const IOOptions& options,
+                                    std::shared_ptr<Logger>* result,
+                                    IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePathWithNewBasename(fname);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::NewLogger(status_and_enc_path.second, options,
+                                      result, dbg);
+}
+
+IOStatus RemapFileSystem::GetAbsolutePath(const std::string& db_path,
+                                          const IOOptions& options,
+                                          std::string* output_path,
+                                          IODebugContext* dbg) {
+  auto status_and_enc_path = EncodePathWithNewBasename(db_path);
+  if (!status_and_enc_path.first.ok()) {
+    return status_and_enc_path.first;
+  }
+  return FileSystemWrapper::GetAbsolutePath(status_and_enc_path.second, options,
+                                            output_path, dbg);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_remap.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_remap.h
new file mode 100644
index 0000000000..a3c998262c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/fs_remap.h
@@ -0,0 +1,137 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <utility>
+
+#include "rocksdb/file_system.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// An abstract FileSystem wrapper that creates a view of an existing
+// FileSystem by remapping names in some way.
+//
+// This class has not been fully analyzed for providing strong security
+// guarantees.
+class RemapFileSystem : public FileSystemWrapper {
+ public:
+  explicit RemapFileSystem(const std::shared_ptr<FileSystem>& base);
+
+ protected:
+  // Returns status and mapped-to path in the wrapped filesystem.
+  // If it returns non-OK status, the returned path should not be used.
+  virtual std::pair<IOStatus, std::string> EncodePath(
+      const std::string& path) = 0;
+
+  // Similar to EncodePath() except used in cases in which it is OK for
+  // no file or directory on 'path' to already exist, such as if the
+  // operation would create one. However, the parent of 'path' is expected
+  // to exist for the operation to succeed.
+  // Default implementation: call EncodePath
+  virtual std::pair<IOStatus, std::string> EncodePathWithNewBasename(
+      const std::string& path);
+
+ public:
+  // Left abstract:
+  // const char* Name() const override { ... }
+  static const char* kClassName() { return "RemapFileSystem"; }
+  bool IsInstanceOf(const std::string& id) const override {
+    if (id == kClassName()) {
+      return true;
+    } else {
+      return FileSystemWrapper::IsInstanceOf(id);
+    }
+  }
+
+  Status RegisterDbPaths(const std::vector<std::string>& paths) override;
+
+  Status UnregisterDbPaths(const std::vector<std::string>& paths) override;
+
+  IOStatus NewSequentialFile(const std::string& fname,
+                             const FileOptions& options,
+                             std::unique_ptr<FSSequentialFile>* result,
+                             IODebugContext* dbg) override;
+
+  IOStatus NewRandomAccessFile(const std::string& fname,
+                               const FileOptions& options,
+                               std::unique_ptr<FSRandomAccessFile>* result,
+                               IODebugContext* dbg) override;
+
+  IOStatus NewWritableFile(const std::string& fname, const FileOptions& options,
+                           std::unique_ptr<FSWritableFile>* result,
+                           IODebugContext* dbg) override;
+
+  IOStatus ReuseWritableFile(const std::string& fname,
+                             const std::string& old_fname,
+                             const FileOptions& options,
+                             std::unique_ptr<FSWritableFile>* result,
+                             IODebugContext* dbg) override;
+
+  IOStatus NewRandomRWFile(const std::string& fname, const FileOptions& options,
+                           std::unique_ptr<FSRandomRWFile>* result,
+                           IODebugContext* dbg) override;
+
+  IOStatus NewDirectory(const std::string& dir, const IOOptions& options,
+                        std::unique_ptr<FSDirectory>* result,
+                        IODebugContext* dbg) override;
+
+  IOStatus FileExists(const std::string& fname, const IOOptions& options,
+                      IODebugContext* dbg) override;
+
+  IOStatus GetChildren(const std::string& dir, const IOOptions& options,
+                       std::vector<std::string>* result,
+                       IODebugContext* dbg) override;
+
+  IOStatus GetChildrenFileAttributes(const std::string& dir,
+                                     const IOOptions& options,
+                                     std::vector<FileAttributes>* result,
+                                     IODebugContext* dbg) override;
+
+  IOStatus DeleteFile(const std::string& fname, const IOOptions& options,
+                      IODebugContext* dbg) override;
+
+  IOStatus CreateDir(const std::string& dirname, const IOOptions& options,
+                     IODebugContext* dbg) override;
+
+  IOStatus CreateDirIfMissing(const std::string& dirname,
+                              const IOOptions& options,
+                              IODebugContext* dbg) override;
+
+  IOStatus DeleteDir(const std::string& dirname, const IOOptions& options,
+                     IODebugContext* dbg) override;
+
+  IOStatus GetFileSize(const std::string& fname, const IOOptions& options,
+                       uint64_t* file_size, IODebugContext* dbg) override;
+
+  IOStatus GetFileModificationTime(const std::string& fname,
+                                   const IOOptions& options,
+                                   uint64_t* file_mtime,
+                                   IODebugContext* dbg) override;
+
+  IOStatus IsDirectory(const std::string& path, const IOOptions& options,
+                       bool* is_dir, IODebugContext* dbg) override;
+
+  IOStatus RenameFile(const std::string& src, const std::string& dest,
+                      const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus LinkFile(const std::string& src, const std::string& dest,
+                    const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus LockFile(const std::string& fname, const IOOptions& options,
+                    FileLock** lock, IODebugContext* dbg) override;
+
+  IOStatus NewLogger(const std::string& fname, const IOOptions& options,
+                     std::shared_ptr<Logger>* result,
+                     IODebugContext* dbg) override;
+
+  IOStatus GetAbsolutePath(const std::string& db_path, const IOOptions& options,
+                           std::string* output_path,
+                           IODebugContext* dbg) override;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/io_posix.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/io_posix.cc
new file mode 100644
index 0000000000..0ec0e9c83b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/io_posix.cc
@@ -0,0 +1,1733 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifdef ROCKSDB_LIB_IO_POSIX
+#include "env/io_posix.h"
+
+#include <errno.h>
+#include <fcntl.h>
+
+#include <algorithm>
+#if defined(OS_LINUX)
+#include <linux/fs.h>
+#ifndef FALLOC_FL_KEEP_SIZE
+#include <linux/falloc.h>
+#endif
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#ifdef OS_LINUX
+#include <sys/statfs.h>
+#include <sys/sysmacros.h>
+#endif
+#include "monitoring/iostats_context_imp.h"
+#include "port/port.h"
+#include "port/stack_trace.h"
+#include "rocksdb/slice.h"
+#include "test_util/sync_point.h"
+#include "util/autovector.h"
+#include "util/coding.h"
+#include "util/string_util.h"
+
+#if defined(OS_LINUX) && !defined(F_SET_RW_HINT)
+#define F_LINUX_SPECIFIC_BASE 1024
+#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12)
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+std::string IOErrorMsg(const std::string& context,
+                       const std::string& file_name) {
+  if (file_name.empty()) {
+    return context;
+  }
+  return context + ": " + file_name;
+}
+
+// file_name can be left empty if it is not unkown.
+IOStatus IOError(const std::string& context, const std::string& file_name,
+                 int err_number) {
+  switch (err_number) {
+    case ENOSPC: {
+      IOStatus s = IOStatus::NoSpace(IOErrorMsg(context, file_name),
+                                     errnoStr(err_number).c_str());
+      s.SetRetryable(true);
+      return s;
+    }
+    case ESTALE:
+      return IOStatus::IOError(IOStatus::kStaleFile);
+    case ENOENT:
+      return IOStatus::PathNotFound(IOErrorMsg(context, file_name),
+                                    errnoStr(err_number).c_str());
+    default:
+      return IOStatus::IOError(IOErrorMsg(context, file_name),
+                               errnoStr(err_number).c_str());
+  }
+}
+
+// A wrapper for fadvise, if the platform doesn't support fadvise,
+// it will simply return 0.
+int Fadvise(int fd, off_t offset, size_t len, int advice) {
+#ifdef OS_LINUX
+  return posix_fadvise(fd, offset, len, advice);
+#else
+  (void)fd;
+  (void)offset;
+  (void)len;
+  (void)advice;
+  return 0;  // simply do nothing.
+#endif
+}
+
+// A wrapper for fadvise, if the platform doesn't support fadvise,
+// it will simply return 0.
+int Madvise(void* addr, size_t len, int advice) {
+#ifdef OS_LINUX
+  return posix_madvise(addr, len, advice);
+#else
+  (void)addr;
+  (void)len;
+  (void)advice;
+  return 0;  // simply do nothing.
+#endif
+}
+
+namespace {
+
+// On MacOS (and probably *BSD), the posix write and pwrite calls do not support
+// buffers larger than 2^31-1 bytes. These two wrappers fix this issue by
+// cutting the buffer in 1GB chunks. We use this chunk size to be sure to keep
+// the writes aligned.
+
+bool PosixWrite(int fd, const char* buf, size_t nbyte) {
+  const size_t kLimit1Gb = 1UL << 30;
+
+  const char* src = buf;
+  size_t left = nbyte;
+
+  while (left != 0) {
+    size_t bytes_to_write = std::min(left, kLimit1Gb);
+
+    ssize_t done = write(fd, src, bytes_to_write);
+    if (done < 0) {
+      if (errno == EINTR) {
+        continue;
+      }
+      return false;
+    }
+    left -= done;
+    src += done;
+  }
+  return true;
+}
+
+bool PosixPositionedWrite(int fd, const char* buf, size_t nbyte, off_t offset) {
+  const size_t kLimit1Gb = 1UL << 30;
+
+  const char* src = buf;
+  size_t left = nbyte;
+
+  while (left != 0) {
+    size_t bytes_to_write = std::min(left, kLimit1Gb);
+
+    ssize_t done = pwrite(fd, src, bytes_to_write, offset);
+    if (done < 0) {
+      if (errno == EINTR) {
+        continue;
+      }
+      return false;
+    }
+    left -= done;
+    offset += done;
+    src += done;
+  }
+
+  return true;
+}
+
+#ifdef ROCKSDB_RANGESYNC_PRESENT
+
+#if !defined(ZFS_SUPER_MAGIC)
+// The magic number for ZFS was not exposed until recently. It should be fixed
+// forever so we can just copy the magic number here.
+#define ZFS_SUPER_MAGIC 0x2fc12fc1
+#endif
+
+bool IsSyncFileRangeSupported(int fd) {
+  // This function tracks and checks for cases where we know `sync_file_range`
+  // definitely will not work properly despite passing the compile-time check
+  // (`ROCKSDB_RANGESYNC_PRESENT`). If we are unsure, or if any of the checks
+  // fail in unexpected ways, we allow `sync_file_range` to be used. This way
+  // should minimize risk of impacting existing use cases.
+  struct statfs buf;
+  int ret = fstatfs(fd, &buf);
+  assert(ret == 0);
+  if (ret == 0 && buf.f_type == ZFS_SUPER_MAGIC) {
+    // Testing on ZFS showed the writeback did not happen asynchronously when
+    // `sync_file_range` was called, even though it returned success. Avoid it
+    // and use `fdatasync` instead to preserve the contract of `bytes_per_sync`,
+    // even though this'll incur extra I/O for metadata.
+    return false;
+  }
+
+  ret = sync_file_range(fd, 0 /* offset */, 0 /* nbytes */, 0 /* flags */);
+  assert(!(ret == -1 && errno != ENOSYS));
+  if (ret == -1 && errno == ENOSYS) {
+    // `sync_file_range` is not implemented on all platforms even if
+    // compile-time checks pass and a supported filesystem is in-use. For
+    // example, using ext4 on WSL (Windows Subsystem for Linux),
+    // `sync_file_range()` returns `ENOSYS`
+    // ("Function not implemented").
+    return false;
+  }
+  // None of the known cases matched, so allow `sync_file_range` use.
+  return true;
+}
+
+#undef ZFS_SUPER_MAGIC
+
+#endif  // ROCKSDB_RANGESYNC_PRESENT
+
+}  // anonymous namespace
+
+/*
+ * PosixSequentialFile
+ */
+PosixSequentialFile::PosixSequentialFile(const std::string& fname, FILE* file,
+                                         int fd, size_t logical_block_size,
+                                         const EnvOptions& options)
+    : filename_(fname),
+      file_(file),
+      fd_(fd),
+      use_direct_io_(options.use_direct_reads),
+      logical_sector_size_(logical_block_size) {
+  assert(!options.use_direct_reads || !options.use_mmap_reads);
+}
+
+PosixSequentialFile::~PosixSequentialFile() {
+  if (!use_direct_io()) {
+    assert(file_);
+    fclose(file_);
+  } else {
+    assert(fd_);
+    close(fd_);
+  }
+}
+
+IOStatus PosixSequentialFile::Read(size_t n, const IOOptions& /*opts*/,
+                                   Slice* result, char* scratch,
+                                   IODebugContext* /*dbg*/) {
+  assert(result != nullptr && !use_direct_io());
+  IOStatus s;
+  size_t r = 0;
+  do {
+    clearerr(file_);
+    r = fread_unlocked(scratch, 1, n, file_);
+  } while (r == 0 && ferror(file_) && errno == EINTR);
+  *result = Slice(scratch, r);
+  if (r < n) {
+    if (feof(file_)) {
+      // We leave status as ok if we hit the end of the file
+      // We also clear the error so that the reads can continue
+      // if a new data is written to the file
+      clearerr(file_);
+    } else {
+      // A partial read with an error: return a non-ok status
+      s = IOError("While reading file sequentially", filename_, errno);
+    }
+  }
+  return s;
+}
+
+IOStatus PosixSequentialFile::PositionedRead(uint64_t offset, size_t n,
+                                             const IOOptions& /*opts*/,
+                                             Slice* result, char* scratch,
+                                             IODebugContext* /*dbg*/) {
+  assert(use_direct_io());
+  assert(IsSectorAligned(offset, GetRequiredBufferAlignment()));
+  assert(IsSectorAligned(n, GetRequiredBufferAlignment()));
+  assert(IsSectorAligned(scratch, GetRequiredBufferAlignment()));
+
+  IOStatus s;
+  ssize_t r = -1;
+  size_t left = n;
+  char* ptr = scratch;
+  while (left > 0) {
+    r = pread(fd_, ptr, left, static_cast<off_t>(offset));
+    if (r <= 0) {
+      if (r == -1 && errno == EINTR) {
+        continue;
+      }
+      break;
+    }
+    ptr += r;
+    offset += r;
+    left -= r;
+    if (!IsSectorAligned(r, GetRequiredBufferAlignment())) {
+      // Bytes reads don't fill sectors. Should only happen at the end
+      // of the file.
+      break;
+    }
+  }
+  if (r < 0) {
+    // An error: return a non-ok status
+    s = IOError("While pread " + std::to_string(n) + " bytes from offset " +
+                    std::to_string(offset),
+                filename_, errno);
+  }
+  *result = Slice(scratch, (r < 0) ? 0 : n - left);
+  return s;
+}
+
+IOStatus PosixSequentialFile::Skip(uint64_t n) {
+  if (fseek(file_, static_cast<long int>(n), SEEK_CUR)) {
+    return IOError("While fseek to skip " + std::to_string(n) + " bytes",
+                   filename_, errno);
+  }
+  return IOStatus::OK();
+}
+
+IOStatus PosixSequentialFile::InvalidateCache(size_t offset, size_t length) {
+#ifndef OS_LINUX
+  (void)offset;
+  (void)length;
+  return IOStatus::OK();
+#else
+  if (!use_direct_io()) {
+    // free OS pages
+    int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED);
+    if (ret != 0) {
+      return IOError("While fadvise NotNeeded offset " +
+                         std::to_string(offset) + " len " +
+                         std::to_string(length),
+                     filename_, errno);
+    }
+  }
+  return IOStatus::OK();
+#endif
+}
+
+/*
+ * PosixRandomAccessFile
+ */
+#if defined(OS_LINUX)
+size_t PosixHelper::GetUniqueIdFromFile(int fd, char* id, size_t max_size) {
+  if (max_size < kMaxVarint64Length * 3) {
+    return 0;
+  }
+
+  struct stat buf;
+  int result = fstat(fd, &buf);
+  if (result == -1) {
+    return 0;
+  }
+
+  long version = 0;
+  result = ioctl(fd, FS_IOC_GETVERSION, &version);
+  TEST_SYNC_POINT_CALLBACK("GetUniqueIdFromFile:FS_IOC_GETVERSION", &result);
+  if (result == -1) {
+    return 0;
+  }
+  uint64_t uversion = (uint64_t)version;
+
+  char* rid = id;
+  rid = EncodeVarint64(rid, buf.st_dev);
+  rid = EncodeVarint64(rid, buf.st_ino);
+  rid = EncodeVarint64(rid, uversion);
+  assert(rid >= id);
+  return static_cast<size_t>(rid - id);
+}
+#endif
+
+#if defined(OS_MACOSX) || defined(OS_AIX)
+size_t PosixHelper::GetUniqueIdFromFile(int fd, char* id, size_t max_size) {
+  if (max_size < kMaxVarint64Length * 3) {
+    return 0;
+  }
+
+  struct stat buf;
+  int result = fstat(fd, &buf);
+  if (result == -1) {
+    return 0;
+  }
+
+  char* rid = id;
+  rid = EncodeVarint64(rid, buf.st_dev);
+  rid = EncodeVarint64(rid, buf.st_ino);
+  rid = EncodeVarint64(rid, buf.st_gen);
+  assert(rid >= id);
+  return static_cast<size_t>(rid - id);
+}
+#endif
+
+#ifdef OS_LINUX
+std::string RemoveTrailingSlash(const std::string& path) {
+  std::string p = path;
+  if (p.size() > 1 && p.back() == '/') {
+    p.pop_back();
+  }
+  return p;
+}
+
+Status LogicalBlockSizeCache::RefAndCacheLogicalBlockSize(
+    const std::vector<std::string>& directories) {
+  std::vector<std::string> dirs;
+  dirs.reserve(directories.size());
+  for (auto& d : directories) {
+    dirs.emplace_back(RemoveTrailingSlash(d));
+  }
+
+  std::map<std::string, size_t> dir_sizes;
+  {
+    ReadLock lock(&cache_mutex_);
+    for (const auto& dir : dirs) {
+      if (cache_.find(dir) == cache_.end()) {
+        dir_sizes.emplace(dir, 0);
+      }
+    }
+  }
+
+  Status s;
+  for (auto& dir_size : dir_sizes) {
+    s = get_logical_block_size_of_directory_(dir_size.first, &dir_size.second);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  WriteLock lock(&cache_mutex_);
+  for (const auto& dir : dirs) {
+    auto& v = cache_[dir];
+    v.ref++;
+    auto dir_size = dir_sizes.find(dir);
+    if (dir_size != dir_sizes.end()) {
+      v.size = dir_size->second;
+    }
+  }
+  return s;
+}
+
+void LogicalBlockSizeCache::UnrefAndTryRemoveCachedLogicalBlockSize(
+    const std::vector<std::string>& directories) {
+  std::vector<std::string> dirs;
+  dirs.reserve(directories.size());
+  for (auto& dir : directories) {
+    dirs.emplace_back(RemoveTrailingSlash(dir));
+  }
+
+  WriteLock lock(&cache_mutex_);
+  for (const auto& dir : dirs) {
+    auto it = cache_.find(dir);
+    if (it != cache_.end() && !(--(it->second.ref))) {
+      cache_.erase(it);
+    }
+  }
+}
+
+size_t LogicalBlockSizeCache::GetLogicalBlockSize(const std::string& fname,
+                                                  int fd) {
+  std::string dir = fname.substr(0, fname.find_last_of("/"));
+  if (dir.empty()) {
+    dir = "/";
+  }
+  {
+    ReadLock lock(&cache_mutex_);
+    auto it = cache_.find(dir);
+    if (it != cache_.end()) {
+      return it->second.size;
+    }
+  }
+  return get_logical_block_size_of_fd_(fd);
+}
+#endif
+
+Status PosixHelper::GetLogicalBlockSizeOfDirectory(const std::string& directory,
+                                                   size_t* size) {
+  int fd = open(directory.c_str(), O_DIRECTORY | O_RDONLY);
+  if (fd == -1) {
+    close(fd);
+    return Status::IOError("Cannot open directory " + directory);
+  }
+  *size = PosixHelper::GetLogicalBlockSizeOfFd(fd);
+  close(fd);
+  return Status::OK();
+}
+
+size_t PosixHelper::GetLogicalBlockSizeOfFd(int fd) {
+#ifdef OS_LINUX
+  struct stat buf;
+  int result = fstat(fd, &buf);
+  if (result == -1) {
+    return kDefaultPageSize;
+  }
+  if (major(buf.st_dev) == 0) {
+    // Unnamed devices (e.g. non-device mounts), reserved as null device number.
+    // These don't have an entry in /sys/dev/block/. Return a sensible default.
+    return kDefaultPageSize;
+  }
+
+  // Reading queue/logical_block_size does not require special permissions.
+  const int kBufferSize = 100;
+  char path[kBufferSize];
+  char real_path[PATH_MAX + 1];
+  snprintf(path, kBufferSize, "/sys/dev/block/%u:%u", major(buf.st_dev),
+           minor(buf.st_dev));
+  if (realpath(path, real_path) == nullptr) {
+    return kDefaultPageSize;
+  }
+  std::string device_dir(real_path);
+  if (!device_dir.empty() && device_dir.back() == '/') {
+    device_dir.pop_back();
+  }
+  // NOTE: sda3 and nvme0n1p1 do not have a `queue/` subdir, only the parent sda
+  // and nvme0n1 have it.
+  // $ ls -al '/sys/dev/block/8:3'
+  // lrwxrwxrwx. 1 root root 0 Jun 26 01:38 /sys/dev/block/8:3 ->
+  // ../../block/sda/sda3
+  // $ ls -al '/sys/dev/block/259:4'
+  // lrwxrwxrwx 1 root root 0 Jan 31 16:04 /sys/dev/block/259:4 ->
+  // ../../devices/pci0000:17/0000:17:00.0/0000:18:00.0/nvme/nvme0/nvme0n1/nvme0n1p1
+  size_t parent_end = device_dir.rfind('/', device_dir.length() - 1);
+  if (parent_end == std::string::npos) {
+    return kDefaultPageSize;
+  }
+  size_t parent_begin = device_dir.rfind('/', parent_end - 1);
+  if (parent_begin == std::string::npos) {
+    return kDefaultPageSize;
+  }
+  std::string parent =
+      device_dir.substr(parent_begin + 1, parent_end - parent_begin - 1);
+  std::string child = device_dir.substr(parent_end + 1, std::string::npos);
+  if (parent != "block" &&
+      (child.compare(0, 4, "nvme") || child.find('p') != std::string::npos)) {
+    device_dir = device_dir.substr(0, parent_end);
+  }
+  std::string fname = device_dir + "/queue/logical_block_size";
+  FILE* fp;
+  size_t size = 0;
+  fp = fopen(fname.c_str(), "r");
+  if (fp != nullptr) {
+    char* line = nullptr;
+    size_t len = 0;
+    if (getline(&line, &len, fp) != -1) {
+      sscanf(line, "%zu", &size);
+    }
+    free(line);
+    fclose(fp);
+  }
+  if (size != 0 && (size & (size - 1)) == 0) {
+    return size;
+  }
+#endif
+  (void)fd;
+  return kDefaultPageSize;
+}
+
+/*
+ * PosixRandomAccessFile
+ *
+ * pread() based random-access
+ */
+PosixRandomAccessFile::PosixRandomAccessFile(
+    const std::string& fname, int fd, size_t logical_block_size,
+    const EnvOptions& options
+#if defined(ROCKSDB_IOURING_PRESENT)
+    ,
+    ThreadLocalPtr* thread_local_io_urings
+#endif
+    )
+    : filename_(fname),
+      fd_(fd),
+      use_direct_io_(options.use_direct_reads),
+      logical_sector_size_(logical_block_size)
+#if defined(ROCKSDB_IOURING_PRESENT)
+      ,
+      thread_local_io_urings_(thread_local_io_urings)
+#endif
+{
+  assert(!options.use_direct_reads || !options.use_mmap_reads);
+  assert(!options.use_mmap_reads);
+}
+
+PosixRandomAccessFile::~PosixRandomAccessFile() { close(fd_); }
+
+IOStatus PosixRandomAccessFile::Read(uint64_t offset, size_t n,
+                                     const IOOptions& /*opts*/, Slice* result,
+                                     char* scratch,
+                                     IODebugContext* /*dbg*/) const {
+  if (use_direct_io()) {
+    assert(IsSectorAligned(offset, GetRequiredBufferAlignment()));
+    assert(IsSectorAligned(n, GetRequiredBufferAlignment()));
+    assert(IsSectorAligned(scratch, GetRequiredBufferAlignment()));
+  }
+  IOStatus s;
+  ssize_t r = -1;
+  size_t left = n;
+  char* ptr = scratch;
+  while (left > 0) {
+    r = pread(fd_, ptr, left, static_cast<off_t>(offset));
+    if (r <= 0) {
+      if (r == -1 && errno == EINTR) {
+        continue;
+      }
+      break;
+    }
+    ptr += r;
+    offset += r;
+    left -= r;
+    if (use_direct_io() &&
+        r % static_cast<ssize_t>(GetRequiredBufferAlignment()) != 0) {
+      // Bytes reads don't fill sectors. Should only happen at the end
+      // of the file.
+      break;
+    }
+  }
+  if (r < 0) {
+    // An error: return a non-ok status
+    s = IOError("While pread offset " + std::to_string(offset) + " len " +
+                    std::to_string(n),
+                filename_, errno);
+  }
+  *result = Slice(scratch, (r < 0) ? 0 : n - left);
+  return s;
+}
+
+IOStatus PosixRandomAccessFile::MultiRead(FSReadRequest* reqs, size_t num_reqs,
+                                          const IOOptions& options,
+                                          IODebugContext* dbg) {
+  if (use_direct_io()) {
+    for (size_t i = 0; i < num_reqs; i++) {
+      assert(IsSectorAligned(reqs[i].offset, GetRequiredBufferAlignment()));
+      assert(IsSectorAligned(reqs[i].len, GetRequiredBufferAlignment()));
+      assert(IsSectorAligned(reqs[i].scratch, GetRequiredBufferAlignment()));
+    }
+  }
+
+#if defined(ROCKSDB_IOURING_PRESENT)
+  struct io_uring* iu = nullptr;
+  if (thread_local_io_urings_) {
+    iu = static_cast<struct io_uring*>(thread_local_io_urings_->Get());
+    if (iu == nullptr) {
+      iu = CreateIOUring();
+      if (iu != nullptr) {
+        thread_local_io_urings_->Reset(iu);
+      }
+    }
+  }
+
+  // Init failed, platform doesn't support io_uring. Fall back to
+  // serialized reads
+  if (iu == nullptr) {
+    return FSRandomAccessFile::MultiRead(reqs, num_reqs, options, dbg);
+  }
+
+  IOStatus ios = IOStatus::OK();
+
+  struct WrappedReadRequest {
+    FSReadRequest* req;
+    struct iovec iov;
+    size_t finished_len;
+    explicit WrappedReadRequest(FSReadRequest* r) : req(r), finished_len(0) {}
+  };
+
+  autovector<WrappedReadRequest, 32> req_wraps;
+  autovector<WrappedReadRequest*, 4> incomplete_rq_list;
+  std::unordered_set<WrappedReadRequest*> wrap_cache;
+
+  for (size_t i = 0; i < num_reqs; i++) {
+    req_wraps.emplace_back(&reqs[i]);
+  }
+
+  size_t reqs_off = 0;
+  while (num_reqs > reqs_off || !incomplete_rq_list.empty()) {
+    size_t this_reqs = (num_reqs - reqs_off) + incomplete_rq_list.size();
+
+    // If requests exceed depth, split it into batches
+    if (this_reqs > kIoUringDepth) this_reqs = kIoUringDepth;
+
+    assert(incomplete_rq_list.size() <= this_reqs);
+    for (size_t i = 0; i < this_reqs; i++) {
+      WrappedReadRequest* rep_to_submit;
+      if (i < incomplete_rq_list.size()) {
+        rep_to_submit = incomplete_rq_list[i];
+      } else {
+        rep_to_submit = &req_wraps[reqs_off++];
+      }
+      assert(rep_to_submit->req->len > rep_to_submit->finished_len);
+      rep_to_submit->iov.iov_base =
+          rep_to_submit->req->scratch + rep_to_submit->finished_len;
+      rep_to_submit->iov.iov_len =
+          rep_to_submit->req->len - rep_to_submit->finished_len;
+
+      struct io_uring_sqe* sqe;
+      sqe = io_uring_get_sqe(iu);
+      io_uring_prep_readv(
+          sqe, fd_, &rep_to_submit->iov, 1,
+          rep_to_submit->req->offset + rep_to_submit->finished_len);
+      io_uring_sqe_set_data(sqe, rep_to_submit);
+      wrap_cache.emplace(rep_to_submit);
+    }
+    incomplete_rq_list.clear();
+
+    ssize_t ret =
+        io_uring_submit_and_wait(iu, static_cast<unsigned int>(this_reqs));
+    TEST_SYNC_POINT_CALLBACK(
+        "PosixRandomAccessFile::MultiRead:io_uring_submit_and_wait:return1",
+        &ret);
+    TEST_SYNC_POINT_CALLBACK(
+        "PosixRandomAccessFile::MultiRead:io_uring_submit_and_wait:return2",
+        iu);
+
+    if (static_cast<size_t>(ret) != this_reqs) {
+      fprintf(stderr, "ret = %ld this_reqs: %ld\n", (long)ret, (long)this_reqs);
+      // If error happens and we submitted fewer than expected, it is an
+      // exception case and we don't retry here. We should still consume
+      // what is is submitted in the ring.
+      for (ssize_t i = 0; i < ret; i++) {
+        struct io_uring_cqe* cqe = nullptr;
+        io_uring_wait_cqe(iu, &cqe);
+        if (cqe != nullptr) {
+          io_uring_cqe_seen(iu, cqe);
+        }
+      }
+      return IOStatus::IOError("io_uring_submit_and_wait() requested " +
+                               std::to_string(this_reqs) + " but returned " +
+                               std::to_string(ret));
+    }
+
+    for (size_t i = 0; i < this_reqs; i++) {
+      struct io_uring_cqe* cqe = nullptr;
+      WrappedReadRequest* req_wrap;
+
+      // We could use the peek variant here, but this seems safer in terms
+      // of our initial wait not reaping all completions
+      ret = io_uring_wait_cqe(iu, &cqe);
+      TEST_SYNC_POINT_CALLBACK(
+          "PosixRandomAccessFile::MultiRead:io_uring_wait_cqe:return", &ret);
+      if (ret) {
+        ios = IOStatus::IOError("io_uring_wait_cqe() returns " +
+                                std::to_string(ret));
+
+        if (cqe != nullptr) {
+          io_uring_cqe_seen(iu, cqe);
+        }
+        continue;
+      }
+
+      req_wrap = static_cast<WrappedReadRequest*>(io_uring_cqe_get_data(cqe));
+      // Reset cqe data to catch any stray reuse of it
+      static_cast<struct io_uring_cqe*>(cqe)->user_data = 0xd5d5d5d5d5d5d5d5;
+      // Check that we got a valid unique cqe data
+      auto wrap_check = wrap_cache.find(req_wrap);
+      if (wrap_check == wrap_cache.end()) {
+        fprintf(stderr,
+                "PosixRandomAccessFile::MultiRead: "
+                "Bad cqe data from IO uring - %p\n",
+                req_wrap);
+        port::PrintStack();
+        ios = IOStatus::IOError("io_uring_cqe_get_data() returned " +
+                                std::to_string((uint64_t)req_wrap));
+        continue;
+      }
+      wrap_cache.erase(wrap_check);
+
+      FSReadRequest* req = req_wrap->req;
+      size_t bytes_read = 0;
+      bool read_again = false;
+      UpdateResult(cqe, filename_, req->len, req_wrap->iov.iov_len,
+                   false /*async_read*/, use_direct_io(),
+                   GetRequiredBufferAlignment(), req_wrap->finished_len, req,
+                   bytes_read, read_again);
+      int32_t res = cqe->res;
+      if (res >= 0) {
+        if (bytes_read == 0) {
+          if (read_again) {
+            Slice tmp_slice;
+            req->status =
+                Read(req->offset + req_wrap->finished_len,
+                     req->len - req_wrap->finished_len, options, &tmp_slice,
+                     req->scratch + req_wrap->finished_len, dbg);
+            req->result =
+                Slice(req->scratch, req_wrap->finished_len + tmp_slice.size());
+          }
+          // else It means EOF so no need to do anything.
+        } else if (bytes_read < req_wrap->iov.iov_len) {
+          incomplete_rq_list.push_back(req_wrap);
+        }
+      }
+      io_uring_cqe_seen(iu, cqe);
+    }
+    wrap_cache.clear();
+  }
+  return ios;
+#else
+  return FSRandomAccessFile::MultiRead(reqs, num_reqs, options, dbg);
+#endif
+}
+
+IOStatus PosixRandomAccessFile::Prefetch(uint64_t offset, size_t n,
+                                         const IOOptions& /*opts*/,
+                                         IODebugContext* /*dbg*/) {
+  IOStatus s;
+  if (!use_direct_io()) {
+    ssize_t r = 0;
+#ifdef OS_LINUX
+    r = readahead(fd_, offset, n);
+#endif
+#ifdef OS_MACOSX
+    radvisory advice;
+    advice.ra_offset = static_cast<off_t>(offset);
+    advice.ra_count = static_cast<int>(n);
+    r = fcntl(fd_, F_RDADVISE, &advice);
+#endif
+    if (r == -1) {
+      s = IOError("While prefetching offset " + std::to_string(offset) +
+                      " len " + std::to_string(n),
+                  filename_, errno);
+    }
+  }
+  return s;
+}
+
+#if defined(OS_LINUX) || defined(OS_MACOSX) || defined(OS_AIX)
+size_t PosixRandomAccessFile::GetUniqueId(char* id, size_t max_size) const {
+  return PosixHelper::GetUniqueIdFromFile(fd_, id, max_size);
+}
+#endif
+
+void PosixRandomAccessFile::Hint(AccessPattern pattern) {
+  if (use_direct_io()) {
+    return;
+  }
+  switch (pattern) {
+    case kNormal:
+      Fadvise(fd_, 0, 0, POSIX_FADV_NORMAL);
+      break;
+    case kRandom:
+      Fadvise(fd_, 0, 0, POSIX_FADV_RANDOM);
+      break;
+    case kSequential:
+      Fadvise(fd_, 0, 0, POSIX_FADV_SEQUENTIAL);
+      break;
+    case kWillNeed:
+      Fadvise(fd_, 0, 0, POSIX_FADV_WILLNEED);
+      break;
+    case kWontNeed:
+      Fadvise(fd_, 0, 0, POSIX_FADV_DONTNEED);
+      break;
+    default:
+      assert(false);
+      break;
+  }
+}
+
+IOStatus PosixRandomAccessFile::InvalidateCache(size_t offset, size_t length) {
+  if (use_direct_io()) {
+    return IOStatus::OK();
+  }
+#ifndef OS_LINUX
+  (void)offset;
+  (void)length;
+  return IOStatus::OK();
+#else
+  // free OS pages
+  int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED);
+  if (ret == 0) {
+    return IOStatus::OK();
+  }
+  return IOError("While fadvise NotNeeded offset " + std::to_string(offset) +
+                     " len " + std::to_string(length),
+                 filename_, errno);
+#endif
+}
+
+IOStatus PosixRandomAccessFile::ReadAsync(
+    FSReadRequest& req, const IOOptions& /*opts*/,
+    std::function<void(const FSReadRequest&, void*)> cb, void* cb_arg,
+    void** io_handle, IOHandleDeleter* del_fn, IODebugContext* /*dbg*/) {
+  if (use_direct_io()) {
+    assert(IsSectorAligned(req.offset, GetRequiredBufferAlignment()));
+    assert(IsSectorAligned(req.len, GetRequiredBufferAlignment()));
+    assert(IsSectorAligned(req.scratch, GetRequiredBufferAlignment()));
+  }
+
+#if defined(ROCKSDB_IOURING_PRESENT)
+  // io_uring_queue_init.
+  struct io_uring* iu = nullptr;
+  if (thread_local_io_urings_) {
+    iu = static_cast<struct io_uring*>(thread_local_io_urings_->Get());
+    if (iu == nullptr) {
+      iu = CreateIOUring();
+      if (iu != nullptr) {
+        thread_local_io_urings_->Reset(iu);
+      }
+    }
+  }
+
+  // Init failed, platform doesn't support io_uring.
+  if (iu == nullptr) {
+    return IOStatus::NotSupported("ReadAsync");
+  }
+
+  // Allocate io_handle.
+  IOHandleDeleter deletefn = [](void* args) -> void {
+    delete (static_cast<Posix_IOHandle*>(args));
+    args = nullptr;
+  };
+
+  // Initialize Posix_IOHandle.
+  Posix_IOHandle* posix_handle =
+      new Posix_IOHandle(iu, cb, cb_arg, req.offset, req.len, req.scratch,
+                         use_direct_io(), GetRequiredBufferAlignment());
+  posix_handle->iov.iov_base = req.scratch;
+  posix_handle->iov.iov_len = req.len;
+
+  *io_handle = static_cast<void*>(posix_handle);
+  *del_fn = deletefn;
+
+  // Step 3: io_uring_sqe_set_data
+  struct io_uring_sqe* sqe;
+  sqe = io_uring_get_sqe(iu);
+
+  io_uring_prep_readv(sqe, fd_, /*sqe->addr=*/&posix_handle->iov,
+                      /*sqe->len=*/1, /*sqe->offset=*/posix_handle->offset);
+
+  // Sets sqe->user_data to posix_handle.
+  io_uring_sqe_set_data(sqe, posix_handle);
+
+  // Step 4: io_uring_submit
+  ssize_t ret = io_uring_submit(iu);
+  if (ret < 0) {
+    fprintf(stderr, "io_uring_submit error: %ld\n", long(ret));
+    return IOStatus::IOError("io_uring_submit() requested but returned " +
+                             std::to_string(ret));
+  }
+  return IOStatus::OK();
+#else
+  (void)req;
+  (void)cb;
+  (void)cb_arg;
+  (void)io_handle;
+  (void)del_fn;
+  return IOStatus::NotSupported("ReadAsync");
+#endif
+}
+
+/*
+ * PosixMmapReadableFile
+ *
+ * mmap() based random-access
+ */
+// base[0,length-1] contains the mmapped contents of the file.
+PosixMmapReadableFile::PosixMmapReadableFile(const int fd,
+                                             const std::string& fname,
+                                             void* base, size_t length,
+                                             const EnvOptions& options)
+    : fd_(fd), filename_(fname), mmapped_region_(base), length_(length) {
+#ifdef NDEBUG
+  (void)options;
+#endif
+  fd_ = fd_ + 0;  // suppress the warning for used variables
+  assert(options.use_mmap_reads);
+  assert(!options.use_direct_reads);
+}
+
+PosixMmapReadableFile::~PosixMmapReadableFile() {
+  int ret = munmap(mmapped_region_, length_);
+  if (ret != 0) {
+    fprintf(stdout, "failed to munmap %p length %" ROCKSDB_PRIszt " \n",
+            mmapped_region_, length_);
+  }
+  close(fd_);
+}
+
+IOStatus PosixMmapReadableFile::Read(uint64_t offset, size_t n,
+                                     const IOOptions& /*opts*/, Slice* result,
+                                     char* /*scratch*/,
+                                     IODebugContext* /*dbg*/) const {
+  IOStatus s;
+  if (offset > length_) {
+    *result = Slice();
+    return IOError("While mmap read offset " + std::to_string(offset) +
+                       " larger than file length " + std::to_string(length_),
+                   filename_, EINVAL);
+  } else if (offset + n > length_) {
+    n = static_cast<size_t>(length_ - offset);
+  }
+  *result = Slice(reinterpret_cast<char*>(mmapped_region_) + offset, n);
+  return s;
+}
+
+void PosixMmapReadableFile::Hint(AccessPattern pattern) {
+  switch (pattern) {
+    case kNormal:
+      Madvise(mmapped_region_, length_, POSIX_MADV_NORMAL);
+      break;
+    case kRandom:
+      Madvise(mmapped_region_, length_, POSIX_MADV_RANDOM);
+      break;
+    case kSequential:
+      Madvise(mmapped_region_, length_, POSIX_MADV_SEQUENTIAL);
+      break;
+    case kWillNeed:
+      Madvise(mmapped_region_, length_, POSIX_MADV_WILLNEED);
+      break;
+    case kWontNeed:
+      Madvise(mmapped_region_, length_, POSIX_MADV_DONTNEED);
+      break;
+    default:
+      assert(false);
+      break;
+  }
+}
+
+IOStatus PosixMmapReadableFile::InvalidateCache(size_t offset, size_t length) {
+#ifndef OS_LINUX
+  (void)offset;
+  (void)length;
+  return IOStatus::OK();
+#else
+  // free OS pages
+  int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED);
+  if (ret == 0) {
+    return IOStatus::OK();
+  }
+  return IOError("While fadvise not needed. Offset " + std::to_string(offset) +
+                     " len" + std::to_string(length),
+                 filename_, errno);
+#endif
+}
+
+/*
+ * PosixMmapFile
+ *
+ * We preallocate up to an extra megabyte and use memcpy to append new
+ * data to the file.  This is safe since we either properly close the
+ * file before reading from it, or for log files, the reading code
+ * knows enough to skip zero suffixes.
+ */
+IOStatus PosixMmapFile::UnmapCurrentRegion() {
+  TEST_KILL_RANDOM("PosixMmapFile::UnmapCurrentRegion:0");
+  if (base_ != nullptr) {
+    int munmap_status = munmap(base_, limit_ - base_);
+    if (munmap_status != 0) {
+      return IOError("While munmap", filename_, munmap_status);
+    }
+    file_offset_ += limit_ - base_;
+    base_ = nullptr;
+    limit_ = nullptr;
+    last_sync_ = nullptr;
+    dst_ = nullptr;
+
+    // Increase the amount we map the next time, but capped at 1MB
+    if (map_size_ < (1 << 20)) {
+      map_size_ *= 2;
+    }
+  }
+  return IOStatus::OK();
+}
+
+IOStatus PosixMmapFile::MapNewRegion() {
+#ifdef ROCKSDB_FALLOCATE_PRESENT
+  assert(base_ == nullptr);
+  TEST_KILL_RANDOM("PosixMmapFile::UnmapCurrentRegion:0");
+  // we can't fallocate with FALLOC_FL_KEEP_SIZE here
+  if (allow_fallocate_) {
+    IOSTATS_TIMER_GUARD(allocate_nanos);
+    int alloc_status = fallocate(fd_, 0, file_offset_, map_size_);
+    if (alloc_status != 0) {
+      // fallback to posix_fallocate
+      alloc_status = posix_fallocate(fd_, file_offset_, map_size_);
+    }
+    if (alloc_status != 0) {
+      return IOStatus::IOError("Error allocating space to file : " + filename_ +
+                               "Error : " + errnoStr(alloc_status).c_str());
+    }
+  }
+
+  TEST_KILL_RANDOM("PosixMmapFile::Append:1");
+  void* ptr = mmap(nullptr, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_,
+                   file_offset_);
+  if (ptr == MAP_FAILED) {
+    return IOStatus::IOError("MMap failed on " + filename_);
+  }
+  TEST_KILL_RANDOM("PosixMmapFile::Append:2");
+
+  base_ = reinterpret_cast<char*>(ptr);
+  limit_ = base_ + map_size_;
+  dst_ = base_;
+  last_sync_ = base_;
+  return IOStatus::OK();
+#else
+  return IOStatus::NotSupported("This platform doesn't support fallocate()");
+#endif
+}
+
+IOStatus PosixMmapFile::Msync() {
+  if (dst_ == last_sync_) {
+    return IOStatus::OK();
+  }
+  // Find the beginnings of the pages that contain the first and last
+  // bytes to be synced.
+  size_t p1 = TruncateToPageBoundary(last_sync_ - base_);
+  size_t p2 = TruncateToPageBoundary(dst_ - base_ - 1);
+  last_sync_ = dst_;
+  TEST_KILL_RANDOM("PosixMmapFile::Msync:0");
+  if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC) < 0) {
+    return IOError("While msync", filename_, errno);
+  }
+  return IOStatus::OK();
+}
+
+PosixMmapFile::PosixMmapFile(const std::string& fname, int fd, size_t page_size,
+                             const EnvOptions& options)
+    : filename_(fname),
+      fd_(fd),
+      page_size_(page_size),
+      map_size_(Roundup(65536, page_size)),
+      base_(nullptr),
+      limit_(nullptr),
+      dst_(nullptr),
+      last_sync_(nullptr),
+      file_offset_(0) {
+#ifdef ROCKSDB_FALLOCATE_PRESENT
+  allow_fallocate_ = options.allow_fallocate;
+  fallocate_with_keep_size_ = options.fallocate_with_keep_size;
+#else
+  (void)options;
+#endif
+  assert((page_size & (page_size - 1)) == 0);
+  assert(options.use_mmap_writes);
+  assert(!options.use_direct_writes);
+}
+
+PosixMmapFile::~PosixMmapFile() {
+  if (fd_ >= 0) {
+    IOStatus s = PosixMmapFile::Close(IOOptions(), nullptr);
+    s.PermitUncheckedError();
+  }
+}
+
+IOStatus PosixMmapFile::Append(const Slice& data, const IOOptions& /*opts*/,
+                               IODebugContext* /*dbg*/) {
+  const char* src = data.data();
+  size_t left = data.size();
+  while (left > 0) {
+    assert(base_ <= dst_);
+    assert(dst_ <= limit_);
+    size_t avail = limit_ - dst_;
+    if (avail == 0) {
+      IOStatus s = UnmapCurrentRegion();
+      if (!s.ok()) {
+        return s;
+      }
+      s = MapNewRegion();
+      if (!s.ok()) {
+        return s;
+      }
+      TEST_KILL_RANDOM("PosixMmapFile::Append:0");
+    }
+
+    size_t n = (left <= avail) ? left : avail;
+    assert(dst_);
+    memcpy(dst_, src, n);
+    dst_ += n;
+    src += n;
+    left -= n;
+  }
+  return IOStatus::OK();
+}
+
+IOStatus PosixMmapFile::Close(const IOOptions& /*opts*/,
+                              IODebugContext* /*dbg*/) {
+  IOStatus s;
+  size_t unused = limit_ - dst_;
+
+  s = UnmapCurrentRegion();
+  if (!s.ok()) {
+    s = IOError("While closing mmapped file", filename_, errno);
+  } else if (unused > 0) {
+    // Trim the extra space at the end of the file
+    if (ftruncate(fd_, file_offset_ - unused) < 0) {
+      s = IOError("While ftruncating mmaped file", filename_, errno);
+    }
+  }
+
+  if (close(fd_) < 0) {
+    if (s.ok()) {
+      s = IOError("While closing mmapped file", filename_, errno);
+    }
+  }
+
+  fd_ = -1;
+  base_ = nullptr;
+  limit_ = nullptr;
+  return s;
+}
+
+IOStatus PosixMmapFile::Flush(const IOOptions& /*opts*/,
+                              IODebugContext* /*dbg*/) {
+  return IOStatus::OK();
+}
+
+IOStatus PosixMmapFile::Sync(const IOOptions& /*opts*/,
+                             IODebugContext* /*dbg*/) {
+#ifdef HAVE_FULLFSYNC
+  if (::fcntl(fd_, F_FULLFSYNC) < 0) {
+    return IOError("while fcntl(F_FULLSYNC) mmapped file", filename_, errno);
+  }
+#else   // HAVE_FULLFSYNC
+  if (fdatasync(fd_) < 0) {
+    return IOError("While fdatasync mmapped file", filename_, errno);
+  }
+#endif  // HAVE_FULLFSYNC
+
+  return Msync();
+}
+
+/**
+ * Flush data as well as metadata to stable storage.
+ */
+IOStatus PosixMmapFile::Fsync(const IOOptions& /*opts*/,
+                              IODebugContext* /*dbg*/) {
+#ifdef HAVE_FULLFSYNC
+  if (::fcntl(fd_, F_FULLFSYNC) < 0) {
+    return IOError("While fcntl(F_FULLSYNC) on mmaped file", filename_, errno);
+  }
+#else   // HAVE_FULLFSYNC
+  if (fsync(fd_) < 0) {
+    return IOError("While fsync mmaped file", filename_, errno);
+  }
+#endif  // HAVE_FULLFSYNC
+
+  return Msync();
+}
+
+/**
+ * Get the size of valid data in the file. This will not match the
+ * size that is returned from the filesystem because we use mmap
+ * to extend file by map_size every time.
+ */
+uint64_t PosixMmapFile::GetFileSize(const IOOptions& /*opts*/,
+                                    IODebugContext* /*dbg*/) {
+  size_t used = dst_ - base_;
+  return file_offset_ + used;
+}
+
+IOStatus PosixMmapFile::InvalidateCache(size_t offset, size_t length) {
+#ifndef OS_LINUX
+  (void)offset;
+  (void)length;
+  return IOStatus::OK();
+#else
+  // free OS pages
+  int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED);
+  if (ret == 0) {
+    return IOStatus::OK();
+  }
+  return IOError("While fadvise NotNeeded mmapped file", filename_, errno);
+#endif
+}
+
+#ifdef ROCKSDB_FALLOCATE_PRESENT
+IOStatus PosixMmapFile::Allocate(uint64_t offset, uint64_t len,
+                                 const IOOptions& /*opts*/,
+                                 IODebugContext* /*dbg*/) {
+  assert(offset <= static_cast<uint64_t>(std::numeric_limits<off_t>::max()));
+  assert(len <= static_cast<uint64_t>(std::numeric_limits<off_t>::max()));
+  TEST_KILL_RANDOM("PosixMmapFile::Allocate:0");
+  int alloc_status = 0;
+  if (allow_fallocate_) {
+    alloc_status =
+        fallocate(fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0,
+                  static_cast<off_t>(offset), static_cast<off_t>(len));
+  }
+  if (alloc_status == 0) {
+    return IOStatus::OK();
+  } else {
+    return IOError("While fallocate offset " + std::to_string(offset) +
+                       " len " + std::to_string(len),
+                   filename_, errno);
+  }
+}
+#endif
+
+/*
+ * PosixWritableFile
+ *
+ * Use posix write to write data to a file.
+ */
+PosixWritableFile::PosixWritableFile(const std::string& fname, int fd,
+                                     size_t logical_block_size,
+                                     const EnvOptions& options)
+    : FSWritableFile(options),
+      filename_(fname),
+      use_direct_io_(options.use_direct_writes),
+      fd_(fd),
+      filesize_(0),
+      logical_sector_size_(logical_block_size) {
+#ifdef ROCKSDB_FALLOCATE_PRESENT
+  allow_fallocate_ = options.allow_fallocate;
+  fallocate_with_keep_size_ = options.fallocate_with_keep_size;
+#endif
+#ifdef ROCKSDB_RANGESYNC_PRESENT
+  sync_file_range_supported_ = IsSyncFileRangeSupported(fd_);
+#endif  // ROCKSDB_RANGESYNC_PRESENT
+  assert(!options.use_mmap_writes);
+}
+
+PosixWritableFile::~PosixWritableFile() {
+  if (fd_ >= 0) {
+    IOStatus s = PosixWritableFile::Close(IOOptions(), nullptr);
+    s.PermitUncheckedError();
+  }
+}
+
+IOStatus PosixWritableFile::Append(const Slice& data, const IOOptions& /*opts*/,
+                                   IODebugContext* /*dbg*/) {
+  if (use_direct_io()) {
+    assert(IsSectorAligned(data.size(), GetRequiredBufferAlignment()));
+    assert(IsSectorAligned(data.data(), GetRequiredBufferAlignment()));
+  }
+  const char* src = data.data();
+  size_t nbytes = data.size();
+
+  if (!PosixWrite(fd_, src, nbytes)) {
+    return IOError("While appending to file", filename_, errno);
+  }
+
+  filesize_ += nbytes;
+  return IOStatus::OK();
+}
+
+IOStatus PosixWritableFile::PositionedAppend(const Slice& data, uint64_t offset,
+                                             const IOOptions& /*opts*/,
+                                             IODebugContext* /*dbg*/) {
+  if (use_direct_io()) {
+    assert(IsSectorAligned(offset, GetRequiredBufferAlignment()));
+    assert(IsSectorAligned(data.size(), GetRequiredBufferAlignment()));
+    assert(IsSectorAligned(data.data(), GetRequiredBufferAlignment()));
+  }
+  assert(offset <= static_cast<uint64_t>(std::numeric_limits<off_t>::max()));
+  const char* src = data.data();
+  size_t nbytes = data.size();
+  if (!PosixPositionedWrite(fd_, src, nbytes, static_cast<off_t>(offset))) {
+    return IOError("While pwrite to file at offset " + std::to_string(offset),
+                   filename_, errno);
+  }
+  filesize_ = offset + nbytes;
+  return IOStatus::OK();
+}
+
+IOStatus PosixWritableFile::Truncate(uint64_t size, const IOOptions& /*opts*/,
+                                     IODebugContext* /*dbg*/) {
+  IOStatus s;
+  int r = ftruncate(fd_, size);
+  if (r < 0) {
+    s = IOError("While ftruncate file to size " + std::to_string(size),
+                filename_, errno);
+  } else {
+    filesize_ = size;
+  }
+  return s;
+}
+
+IOStatus PosixWritableFile::Close(const IOOptions& /*opts*/,
+                                  IODebugContext* /*dbg*/) {
+  IOStatus s;
+
+  size_t block_size;
+  size_t last_allocated_block;
+  GetPreallocationStatus(&block_size, &last_allocated_block);
+  TEST_SYNC_POINT_CALLBACK("PosixWritableFile::Close", &last_allocated_block);
+  if (last_allocated_block > 0) {
+    // trim the extra space preallocated at the end of the file
+    // NOTE(ljin): we probably don't want to surface failure as an IOError,
+    // but it will be nice to log these errors.
+    int dummy __attribute__((__unused__));
+    dummy = ftruncate(fd_, filesize_);
+#if defined(ROCKSDB_FALLOCATE_PRESENT) && defined(FALLOC_FL_PUNCH_HOLE)
+    // in some file systems, ftruncate only trims trailing space if the
+    // new file size is smaller than the current size. Calling fallocate
+    // with FALLOC_FL_PUNCH_HOLE flag to explicitly release these unused
+    // blocks. FALLOC_FL_PUNCH_HOLE is supported on at least the following
+    // filesystems:
+    //   XFS (since Linux 2.6.38)
+    //   ext4 (since Linux 3.0)
+    //   Btrfs (since Linux 3.7)
+    //   tmpfs (since Linux 3.5)
+    // We ignore error since failure of this operation does not affect
+    // correctness.
+    struct stat file_stats;
+    int result = fstat(fd_, &file_stats);
+    // After ftruncate, we check whether ftruncate has the correct behavior.
+    // If not, we should hack it with FALLOC_FL_PUNCH_HOLE
+    if (result == 0 &&
+        (file_stats.st_size + file_stats.st_blksize - 1) /
+                file_stats.st_blksize !=
+            file_stats.st_blocks / (file_stats.st_blksize / 512)) {
+      IOSTATS_TIMER_GUARD(allocate_nanos);
+      if (allow_fallocate_) {
+        fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, filesize_,
+                  block_size * last_allocated_block - filesize_);
+      }
+    }
+#endif
+  }
+
+  if (close(fd_) < 0) {
+    s = IOError("While closing file after writing", filename_, errno);
+  }
+  fd_ = -1;
+  return s;
+}
+
+// write out the cached data to the OS cache
+IOStatus PosixWritableFile::Flush(const IOOptions& /*opts*/,
+                                  IODebugContext* /*dbg*/) {
+  return IOStatus::OK();
+}
+
+IOStatus PosixWritableFile::Sync(const IOOptions& /*opts*/,
+                                 IODebugContext* /*dbg*/) {
+#ifdef HAVE_FULLFSYNC
+  if (::fcntl(fd_, F_FULLFSYNC) < 0) {
+    return IOError("while fcntl(F_FULLFSYNC)", filename_, errno);
+  }
+#else   // HAVE_FULLFSYNC
+  if (fdatasync(fd_) < 0) {
+    return IOError("While fdatasync", filename_, errno);
+  }
+#endif  // HAVE_FULLFSYNC
+  return IOStatus::OK();
+}
+
+IOStatus PosixWritableFile::Fsync(const IOOptions& /*opts*/,
+                                  IODebugContext* /*dbg*/) {
+#ifdef HAVE_FULLFSYNC
+  if (::fcntl(fd_, F_FULLFSYNC) < 0) {
+    return IOError("while fcntl(F_FULLFSYNC)", filename_, errno);
+  }
+#else   // HAVE_FULLFSYNC
+  if (fsync(fd_) < 0) {
+    return IOError("While fsync", filename_, errno);
+  }
+#endif  // HAVE_FULLFSYNC
+  return IOStatus::OK();
+}
+
+bool PosixWritableFile::IsSyncThreadSafe() const { return true; }
+
+uint64_t PosixWritableFile::GetFileSize(const IOOptions& /*opts*/,
+                                        IODebugContext* /*dbg*/) {
+  return filesize_;
+}
+
+void PosixWritableFile::SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) {
+#ifdef OS_LINUX
+// Suppress Valgrind "Unimplemented functionality" error.
+#ifndef ROCKSDB_VALGRIND_RUN
+  if (hint == write_hint_) {
+    return;
+  }
+  if (fcntl(fd_, F_SET_RW_HINT, &hint) == 0) {
+    write_hint_ = hint;
+  }
+#else
+  (void)hint;
+#endif  // ROCKSDB_VALGRIND_RUN
+#else
+  (void)hint;
+#endif  // OS_LINUX
+}
+
+IOStatus PosixWritableFile::InvalidateCache(size_t offset, size_t length) {
+  if (use_direct_io()) {
+    return IOStatus::OK();
+  }
+#ifndef OS_LINUX
+  (void)offset;
+  (void)length;
+  return IOStatus::OK();
+#else
+  // free OS pages
+  int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED);
+  if (ret == 0) {
+    return IOStatus::OK();
+  }
+  return IOError("While fadvise NotNeeded", filename_, errno);
+#endif
+}
+
+#ifdef ROCKSDB_FALLOCATE_PRESENT
+IOStatus PosixWritableFile::Allocate(uint64_t offset, uint64_t len,
+                                     const IOOptions& /*opts*/,
+                                     IODebugContext* /*dbg*/) {
+  assert(offset <= static_cast<uint64_t>(std::numeric_limits<off_t>::max()));
+  assert(len <= static_cast<uint64_t>(std::numeric_limits<off_t>::max()));
+  TEST_KILL_RANDOM("PosixWritableFile::Allocate:0");
+  IOSTATS_TIMER_GUARD(allocate_nanos);
+  int alloc_status = 0;
+  if (allow_fallocate_) {
+    alloc_status =
+        fallocate(fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0,
+                  static_cast<off_t>(offset), static_cast<off_t>(len));
+  }
+  if (alloc_status == 0) {
+    return IOStatus::OK();
+  } else {
+    return IOError("While fallocate offset " + std::to_string(offset) +
+                       " len " + std::to_string(len),
+                   filename_, errno);
+  }
+}
+#endif
+
+IOStatus PosixWritableFile::RangeSync(uint64_t offset, uint64_t nbytes,
+                                      const IOOptions& opts,
+                                      IODebugContext* dbg) {
+#ifdef ROCKSDB_RANGESYNC_PRESENT
+  assert(offset <= static_cast<uint64_t>(std::numeric_limits<off_t>::max()));
+  assert(nbytes <= static_cast<uint64_t>(std::numeric_limits<off_t>::max()));
+  if (sync_file_range_supported_) {
+    int ret;
+    if (strict_bytes_per_sync_) {
+      // Specifying `SYNC_FILE_RANGE_WAIT_BEFORE` together with an offset/length
+      // that spans all bytes written so far tells `sync_file_range` to wait for
+      // any outstanding writeback requests to finish before issuing a new one.
+      ret =
+          sync_file_range(fd_, 0, static_cast<off_t>(offset + nbytes),
+                          SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE);
+    } else {
+      ret = sync_file_range(fd_, static_cast<off_t>(offset),
+                            static_cast<off_t>(nbytes), SYNC_FILE_RANGE_WRITE);
+    }
+    if (ret != 0) {
+      return IOError("While sync_file_range returned " + std::to_string(ret),
+                     filename_, errno);
+    }
+    return IOStatus::OK();
+  }
+#endif  // ROCKSDB_RANGESYNC_PRESENT
+  return FSWritableFile::RangeSync(offset, nbytes, opts, dbg);
+}
+
+#ifdef OS_LINUX
+size_t PosixWritableFile::GetUniqueId(char* id, size_t max_size) const {
+  return PosixHelper::GetUniqueIdFromFile(fd_, id, max_size);
+}
+#endif
+
+/*
+ * PosixRandomRWFile
+ */
+
+PosixRandomRWFile::PosixRandomRWFile(const std::string& fname, int fd,
+                                     const EnvOptions& /*options*/)
+    : filename_(fname), fd_(fd) {}
+
+PosixRandomRWFile::~PosixRandomRWFile() {
+  if (fd_ >= 0) {
+    IOStatus s = Close(IOOptions(), nullptr);
+    s.PermitUncheckedError();
+  }
+}
+
+IOStatus PosixRandomRWFile::Write(uint64_t offset, const Slice& data,
+                                  const IOOptions& /*opts*/,
+                                  IODebugContext* /*dbg*/) {
+  const char* src = data.data();
+  size_t nbytes = data.size();
+  if (!PosixPositionedWrite(fd_, src, nbytes, static_cast<off_t>(offset))) {
+    return IOError("While write random read/write file at offset " +
+                       std::to_string(offset),
+                   filename_, errno);
+  }
+
+  return IOStatus::OK();
+}
+
+IOStatus PosixRandomRWFile::Read(uint64_t offset, size_t n,
+                                 const IOOptions& /*opts*/, Slice* result,
+                                 char* scratch, IODebugContext* /*dbg*/) const {
+  size_t left = n;
+  char* ptr = scratch;
+  while (left > 0) {
+    ssize_t done = pread(fd_, ptr, left, offset);
+    if (done < 0) {
+      // error while reading from file
+      if (errno == EINTR) {
+        // read was interrupted, try again.
+        continue;
+      }
+      return IOError("While reading random read/write file offset " +
+                         std::to_string(offset) + " len " + std::to_string(n),
+                     filename_, errno);
+    } else if (done == 0) {
+      // Nothing more to read
+      break;
+    }
+
+    // Read `done` bytes
+    ptr += done;
+    offset += done;
+    left -= done;
+  }
+
+  *result = Slice(scratch, n - left);
+  return IOStatus::OK();
+}
+
+IOStatus PosixRandomRWFile::Flush(const IOOptions& /*opts*/,
+                                  IODebugContext* /*dbg*/) {
+  return IOStatus::OK();
+}
+
+IOStatus PosixRandomRWFile::Sync(const IOOptions& /*opts*/,
+                                 IODebugContext* /*dbg*/) {
+#ifdef HAVE_FULLFSYNC
+  if (::fcntl(fd_, F_FULLFSYNC) < 0) {
+    return IOError("while fcntl(F_FULLFSYNC) random rw file", filename_, errno);
+  }
+#else   // HAVE_FULLFSYNC
+  if (fdatasync(fd_) < 0) {
+    return IOError("While fdatasync random read/write file", filename_, errno);
+  }
+#endif  // HAVE_FULLFSYNC
+  return IOStatus::OK();
+}
+
+IOStatus PosixRandomRWFile::Fsync(const IOOptions& /*opts*/,
+                                  IODebugContext* /*dbg*/) {
+#ifdef HAVE_FULLFSYNC
+  if (::fcntl(fd_, F_FULLFSYNC) < 0) {
+    return IOError("While fcntl(F_FULLSYNC) random rw file", filename_, errno);
+  }
+#else   // HAVE_FULLFSYNC
+  if (fsync(fd_) < 0) {
+    return IOError("While fsync random read/write file", filename_, errno);
+  }
+#endif  // HAVE_FULLFSYNC
+  return IOStatus::OK();
+}
+
+IOStatus PosixRandomRWFile::Close(const IOOptions& /*opts*/,
+                                  IODebugContext* /*dbg*/) {
+  if (close(fd_) < 0) {
+    return IOError("While close random read/write file", filename_, errno);
+  }
+  fd_ = -1;
+  return IOStatus::OK();
+}
+
+PosixMemoryMappedFileBuffer::~PosixMemoryMappedFileBuffer() {
+  // TODO should have error handling though not much we can do...
+  munmap(this->base_, length_);
+}
+
+/*
+ * PosixDirectory
+ */
+#if !defined(BTRFS_SUPER_MAGIC)
+// The magic number for BTRFS is fixed, if it's not defined, define it here
+#define BTRFS_SUPER_MAGIC 0x9123683E
+#endif
+PosixDirectory::PosixDirectory(int fd, const std::string& directory_name)
+    : fd_(fd), directory_name_(directory_name) {
+  is_btrfs_ = false;
+#ifdef OS_LINUX
+  struct statfs buf;
+  int ret = fstatfs(fd, &buf);
+  is_btrfs_ = (ret == 0 && buf.f_type == static_cast<decltype(buf.f_type)>(
+                                             BTRFS_SUPER_MAGIC));
+#endif
+}
+
+PosixDirectory::~PosixDirectory() {
+  if (fd_ >= 0) {
+    IOStatus s = PosixDirectory::Close(IOOptions(), nullptr);
+    s.PermitUncheckedError();
+  }
+}
+
+IOStatus PosixDirectory::Fsync(const IOOptions& opts, IODebugContext* dbg) {
+  return FsyncWithDirOptions(opts, dbg, DirFsyncOptions());
+}
+
+// Users who want the file entries synced in Directory project must call a
+// Fsync or FsyncWithDirOptions function before Close
+IOStatus PosixDirectory::Close(const IOOptions& /*opts*/,
+                               IODebugContext* /*dbg*/) {
+  IOStatus s = IOStatus::OK();
+  if (close(fd_) < 0) {
+    s = IOError("While closing directory ", directory_name_, errno);
+  } else {
+    fd_ = -1;
+  }
+  return s;
+}
+
+IOStatus PosixDirectory::FsyncWithDirOptions(
+    const IOOptions& /*opts*/, IODebugContext* /*dbg*/,
+    const DirFsyncOptions& dir_fsync_options) {
+  assert(fd_ >= 0);  // Check use after close
+  IOStatus s = IOStatus::OK();
+#ifndef OS_AIX
+  if (is_btrfs_) {
+    // skip dir fsync for new file creation, which is not needed for btrfs
+    if (dir_fsync_options.reason == DirFsyncOptions::kNewFileSynced) {
+      return s;
+    }
+    // skip dir fsync for renaming file, only need to sync new file
+    if (dir_fsync_options.reason == DirFsyncOptions::kFileRenamed) {
+      std::string new_name = dir_fsync_options.renamed_new_name;
+      assert(!new_name.empty());
+      int fd;
+      do {
+        IOSTATS_TIMER_GUARD(open_nanos);
+        fd = open(new_name.c_str(), O_RDONLY);
+      } while (fd < 0 && errno == EINTR);
+      if (fd < 0) {
+        s = IOError("While open renaming file", new_name, errno);
+      } else if (fsync(fd) < 0) {
+        s = IOError("While fsync renaming file", new_name, errno);
+      }
+      if (close(fd) < 0) {
+        s = IOError("While closing file after fsync", new_name, errno);
+      }
+      return s;
+    }
+    // fallback to dir-fsync for kDefault, kDirRenamed and kFileDeleted
+  }
+
+  // skip fsync/fcntl when fd_ == -1 since this file descriptor has been closed
+  // in either the de-construction or the close function, data must have been
+  // fsync-ed before de-construction and close is called
+#ifdef HAVE_FULLFSYNC
+  // btrfs is a Linux file system, while currently F_FULLFSYNC is available on
+  // Mac OS.
+  assert(!is_btrfs_);
+  if (fd_ != -1 && ::fcntl(fd_, F_FULLFSYNC) < 0) {
+    return IOError("while fcntl(F_FULLFSYNC)", "a directory", errno);
+  }
+#else   // HAVE_FULLFSYNC
+  if (fd_ != -1 && fsync(fd_) == -1) {
+    s = IOError("While fsync", "a directory", errno);
+  }
+#endif  // HAVE_FULLFSYNC
+#endif  // OS_AIX
+  return s;
+}
+}  // namespace ROCKSDB_NAMESPACE
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/io_posix.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/io_posix.h
new file mode 100644
index 0000000000..f129668ea5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/io_posix.h
@@ -0,0 +1,523 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+#include <errno.h>
+#if defined(ROCKSDB_IOURING_PRESENT)
+#include <liburing.h>
+#include <sys/uio.h>
+#endif
+#include <unistd.h>
+
+#include <atomic>
+#include <functional>
+#include <map>
+#include <string>
+
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/io_status.h"
+#include "test_util/sync_point.h"
+#include "util/mutexlock.h"
+#include "util/thread_local.h"
+
+// For non linux platform, the following macros are used only as place
+// holder.
+#if !(defined OS_LINUX) && !(defined CYGWIN) && !(defined OS_AIX)
+#define POSIX_FADV_NORMAL 0     /* [MC1] no further special treatment */
+#define POSIX_FADV_RANDOM 1     /* [MC1] expect random page refs */
+#define POSIX_FADV_SEQUENTIAL 2 /* [MC1] expect sequential page refs */
+#define POSIX_FADV_WILLNEED 3   /* [MC1] will need these pages */
+#define POSIX_FADV_DONTNEED 4   /* [MC1] don't need these pages */
+
+#define POSIX_MADV_NORMAL 0     /* [MC1] no further special treatment */
+#define POSIX_MADV_RANDOM 1     /* [MC1] expect random page refs */
+#define POSIX_MADV_SEQUENTIAL 2 /* [MC1] expect sequential page refs */
+#define POSIX_MADV_WILLNEED 3   /* [MC1] will need these pages */
+#define POSIX_MADV_DONTNEED 4   /* [MC1] don't need these pages */
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+std::string IOErrorMsg(const std::string& context,
+                       const std::string& file_name);
+// file_name can be left empty if it is not unkown.
+IOStatus IOError(const std::string& context, const std::string& file_name,
+                 int err_number);
+
+class PosixHelper {
+ public:
+  static size_t GetUniqueIdFromFile(int fd, char* id, size_t max_size);
+  static size_t GetLogicalBlockSizeOfFd(int fd);
+  static Status GetLogicalBlockSizeOfDirectory(const std::string& directory,
+                                               size_t* size);
+};
+
+/*
+ * DirectIOHelper
+ */
+inline bool IsSectorAligned(const size_t off, size_t sector_size) {
+  assert((sector_size & (sector_size - 1)) == 0);
+  return (off & (sector_size - 1)) == 0;
+}
+
+#ifndef NDEBUG
+inline bool IsSectorAligned(const void* ptr, size_t sector_size) {
+  return uintptr_t(ptr) % sector_size == 0;
+}
+#endif
+
+#if defined(ROCKSDB_IOURING_PRESENT)
+struct Posix_IOHandle {
+  Posix_IOHandle(struct io_uring* _iu,
+                 std::function<void(const FSReadRequest&, void*)> _cb,
+                 void* _cb_arg, uint64_t _offset, size_t _len, char* _scratch,
+                 bool _use_direct_io, size_t _alignment)
+      : iu(_iu),
+        cb(_cb),
+        cb_arg(_cb_arg),
+        offset(_offset),
+        len(_len),
+        scratch(_scratch),
+        use_direct_io(_use_direct_io),
+        alignment(_alignment),
+        is_finished(false),
+        req_count(0) {}
+
+  struct iovec iov;
+  struct io_uring* iu;
+  std::function<void(const FSReadRequest&, void*)> cb;
+  void* cb_arg;
+  uint64_t offset;
+  size_t len;
+  char* scratch;
+  bool use_direct_io;
+  size_t alignment;
+  bool is_finished;
+  // req_count is used by AbortIO API to keep track of number of requests.
+  uint32_t req_count;
+};
+
+inline void UpdateResult(struct io_uring_cqe* cqe, const std::string& file_name,
+                         size_t len, size_t iov_len, bool async_read,
+                         bool use_direct_io, size_t alignment,
+                         size_t& finished_len, FSReadRequest* req,
+                         size_t& bytes_read, bool& read_again) {
+  read_again = false;
+  if (cqe->res < 0) {
+    req->result = Slice(req->scratch, 0);
+    req->status = IOError("Req failed", file_name, cqe->res);
+  } else {
+    bytes_read = static_cast<size_t>(cqe->res);
+    TEST_SYNC_POINT_CALLBACK("UpdateResults::io_uring_result", &bytes_read);
+    if (bytes_read == iov_len) {
+      req->result = Slice(req->scratch, req->len);
+      req->status = IOStatus::OK();
+    } else if (bytes_read == 0) {
+      /// cqe->res == 0 can means EOF, or can mean partial results. See
+      // comment
+      // https://github.com/facebook/rocksdb/pull/6441#issuecomment-589843435
+      // Fall back to pread in this case.
+      if (use_direct_io && !IsSectorAligned(finished_len, alignment)) {
+        // Bytes reads don't fill sectors. Should only happen at the end
+        // of the file.
+        req->result = Slice(req->scratch, finished_len);
+        req->status = IOStatus::OK();
+      } else {
+        if (async_read) {
+          // No  bytes read. It can means EOF. In case of partial results, it's
+          // caller responsibility to call read/readasync again.
+          req->result = Slice(req->scratch, 0);
+          req->status = IOStatus::OK();
+        } else {
+          read_again = true;
+        }
+      }
+    } else if (bytes_read < iov_len) {
+      assert(bytes_read > 0);
+      if (async_read) {
+        req->result = Slice(req->scratch, bytes_read);
+        req->status = IOStatus::OK();
+      } else {
+        assert(bytes_read + finished_len < len);
+        finished_len += bytes_read;
+      }
+    } else {
+      req->result = Slice(req->scratch, 0);
+      req->status = IOError("Req returned more bytes than requested", file_name,
+                            cqe->res);
+    }
+  }
+#ifdef NDEBUG
+  (void)len;
+#endif
+}
+#endif
+
+#ifdef OS_LINUX
+// Files under a specific directory have the same logical block size.
+// This class caches the logical block size for the specified directories to
+// save the CPU cost of computing the size.
+// Safe for concurrent access from multiple threads without any external
+// synchronization.
+class LogicalBlockSizeCache {
+ public:
+  LogicalBlockSizeCache(
+      std::function<size_t(int)> get_logical_block_size_of_fd =
+          PosixHelper::GetLogicalBlockSizeOfFd,
+      std::function<Status(const std::string&, size_t*)>
+          get_logical_block_size_of_directory =
+              PosixHelper::GetLogicalBlockSizeOfDirectory)
+      : get_logical_block_size_of_fd_(get_logical_block_size_of_fd),
+        get_logical_block_size_of_directory_(
+            get_logical_block_size_of_directory) {}
+
+  // Takes the following actions:
+  // 1. Increases reference count of the directories;
+  // 2. If the directory's logical block size is not cached,
+  //    compute the buffer size and cache the result.
+  Status RefAndCacheLogicalBlockSize(
+      const std::vector<std::string>& directories);
+
+  // Takes the following actions:
+  // 1. Decreases reference count of the directories;
+  // 2. If the reference count of a directory reaches 0, remove the directory
+  //    from the cache.
+  void UnrefAndTryRemoveCachedLogicalBlockSize(
+      const std::vector<std::string>& directories);
+
+  // Returns the logical block size for the file.
+  //
+  // If the file is under a cached directory, return the cached size.
+  // Otherwise, the size is computed.
+  size_t GetLogicalBlockSize(const std::string& fname, int fd);
+
+  int GetRefCount(const std::string& dir) {
+    ReadLock lock(&cache_mutex_);
+    auto it = cache_.find(dir);
+    if (it == cache_.end()) {
+      return 0;
+    }
+    return it->second.ref;
+  }
+
+  size_t Size() const { return cache_.size(); }
+
+  bool Contains(const std::string& dir) {
+    ReadLock lock(&cache_mutex_);
+    return cache_.find(dir) != cache_.end();
+  }
+
+ private:
+  struct CacheValue {
+    CacheValue() : size(0), ref(0) {}
+
+    // Logical block size of the directory.
+    size_t size;
+    // Reference count of the directory.
+    int ref;
+  };
+
+  std::function<size_t(int)> get_logical_block_size_of_fd_;
+  std::function<Status(const std::string&, size_t*)>
+      get_logical_block_size_of_directory_;
+
+  std::map<std::string, CacheValue> cache_;
+  port::RWMutex cache_mutex_;
+};
+#endif
+
+class PosixSequentialFile : public FSSequentialFile {
+ private:
+  std::string filename_;
+  FILE* file_;
+  int fd_;
+  bool use_direct_io_;
+  size_t logical_sector_size_;
+
+ public:
+  PosixSequentialFile(const std::string& fname, FILE* file, int fd,
+                      size_t logical_block_size, const EnvOptions& options);
+  virtual ~PosixSequentialFile();
+
+  virtual IOStatus Read(size_t n, const IOOptions& opts, Slice* result,
+                        char* scratch, IODebugContext* dbg) override;
+  virtual IOStatus PositionedRead(uint64_t offset, size_t n,
+                                  const IOOptions& opts, Slice* result,
+                                  char* scratch, IODebugContext* dbg) override;
+  virtual IOStatus Skip(uint64_t n) override;
+  virtual IOStatus InvalidateCache(size_t offset, size_t length) override;
+  virtual bool use_direct_io() const override { return use_direct_io_; }
+  virtual size_t GetRequiredBufferAlignment() const override {
+    return logical_sector_size_;
+  }
+};
+
+#if defined(ROCKSDB_IOURING_PRESENT)
+// io_uring instance queue depth
+const unsigned int kIoUringDepth = 256;
+
+inline void DeleteIOUring(void* p) {
+  struct io_uring* iu = static_cast<struct io_uring*>(p);
+  delete iu;
+}
+
+inline struct io_uring* CreateIOUring() {
+  struct io_uring* new_io_uring = new struct io_uring;
+  int ret = io_uring_queue_init(kIoUringDepth, new_io_uring, 0);
+  if (ret) {
+    delete new_io_uring;
+    new_io_uring = nullptr;
+  }
+  return new_io_uring;
+}
+#endif  // defined(ROCKSDB_IOURING_PRESENT)
+
+class PosixRandomAccessFile : public FSRandomAccessFile {
+ protected:
+  std::string filename_;
+  int fd_;
+  bool use_direct_io_;
+  size_t logical_sector_size_;
+#if defined(ROCKSDB_IOURING_PRESENT)
+  ThreadLocalPtr* thread_local_io_urings_;
+#endif
+
+ public:
+  PosixRandomAccessFile(const std::string& fname, int fd,
+                        size_t logical_block_size, const EnvOptions& options
+#if defined(ROCKSDB_IOURING_PRESENT)
+                        ,
+                        ThreadLocalPtr* thread_local_io_urings
+#endif
+  );
+  virtual ~PosixRandomAccessFile();
+
+  virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions& opts,
+                        Slice* result, char* scratch,
+                        IODebugContext* dbg) const override;
+
+  virtual IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
+                             const IOOptions& options,
+                             IODebugContext* dbg) override;
+
+  virtual IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& opts,
+                            IODebugContext* dbg) override;
+
+#if defined(OS_LINUX) || defined(OS_MACOSX) || defined(OS_AIX)
+  virtual size_t GetUniqueId(char* id, size_t max_size) const override;
+#endif
+  virtual void Hint(AccessPattern pattern) override;
+  virtual IOStatus InvalidateCache(size_t offset, size_t length) override;
+  virtual bool use_direct_io() const override { return use_direct_io_; }
+  virtual size_t GetRequiredBufferAlignment() const override {
+    return logical_sector_size_;
+  }
+  // EXPERIMENTAL
+  virtual IOStatus ReadAsync(
+      FSReadRequest& req, const IOOptions& opts,
+      std::function<void(const FSReadRequest&, void*)> cb, void* cb_arg,
+      void** io_handle, IOHandleDeleter* del_fn, IODebugContext* dbg) override;
+};
+
+class PosixWritableFile : public FSWritableFile {
+ protected:
+  const std::string filename_;
+  const bool use_direct_io_;
+  int fd_;
+  uint64_t filesize_;
+  size_t logical_sector_size_;
+#ifdef ROCKSDB_FALLOCATE_PRESENT
+  bool allow_fallocate_;
+  bool fallocate_with_keep_size_;
+#endif
+#ifdef ROCKSDB_RANGESYNC_PRESENT
+  // Even if the syscall is present, the filesystem may still not properly
+  // support it, so we need to do a dynamic check too.
+  bool sync_file_range_supported_;
+#endif  // ROCKSDB_RANGESYNC_PRESENT
+
+ public:
+  explicit PosixWritableFile(const std::string& fname, int fd,
+                             size_t logical_block_size,
+                             const EnvOptions& options);
+  virtual ~PosixWritableFile();
+
+  // Need to implement this so the file is truncated correctly
+  // with direct I/O
+  virtual IOStatus Truncate(uint64_t size, const IOOptions& opts,
+                            IODebugContext* dbg) override;
+  virtual IOStatus Close(const IOOptions& opts, IODebugContext* dbg) override;
+  virtual IOStatus Append(const Slice& data, const IOOptions& opts,
+                          IODebugContext* dbg) override;
+  virtual IOStatus Append(const Slice& data, const IOOptions& opts,
+                          const DataVerificationInfo& /* verification_info */,
+                          IODebugContext* dbg) override {
+    return Append(data, opts, dbg);
+  }
+  virtual IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                                    const IOOptions& opts,
+                                    IODebugContext* dbg) override;
+  virtual IOStatus PositionedAppend(
+      const Slice& data, uint64_t offset, const IOOptions& opts,
+      const DataVerificationInfo& /* verification_info */,
+      IODebugContext* dbg) override {
+    return PositionedAppend(data, offset, opts, dbg);
+  }
+  virtual IOStatus Flush(const IOOptions& opts, IODebugContext* dbg) override;
+  virtual IOStatus Sync(const IOOptions& opts, IODebugContext* dbg) override;
+  virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override;
+  virtual bool IsSyncThreadSafe() const override;
+  virtual bool use_direct_io() const override { return use_direct_io_; }
+  virtual void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override;
+  virtual uint64_t GetFileSize(const IOOptions& opts,
+                               IODebugContext* dbg) override;
+  virtual IOStatus InvalidateCache(size_t offset, size_t length) override;
+  virtual size_t GetRequiredBufferAlignment() const override {
+    return logical_sector_size_;
+  }
+#ifdef ROCKSDB_FALLOCATE_PRESENT
+  virtual IOStatus Allocate(uint64_t offset, uint64_t len,
+                            const IOOptions& opts,
+                            IODebugContext* dbg) override;
+#endif
+  virtual IOStatus RangeSync(uint64_t offset, uint64_t nbytes,
+                             const IOOptions& opts,
+                             IODebugContext* dbg) override;
+#ifdef OS_LINUX
+  virtual size_t GetUniqueId(char* id, size_t max_size) const override;
+#endif
+};
+
+// mmap() based random-access
+class PosixMmapReadableFile : public FSRandomAccessFile {
+ private:
+  int fd_;
+  std::string filename_;
+  void* mmapped_region_;
+  size_t length_;
+
+ public:
+  PosixMmapReadableFile(const int fd, const std::string& fname, void* base,
+                        size_t length, const EnvOptions& options);
+  virtual ~PosixMmapReadableFile();
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& opts, Slice* result,
+                char* scratch, IODebugContext* dbg) const override;
+  void Hint(AccessPattern pattern) override;
+  IOStatus InvalidateCache(size_t offset, size_t length) override;
+};
+
+class PosixMmapFile : public FSWritableFile {
+ private:
+  std::string filename_;
+  int fd_;
+  size_t page_size_;
+  size_t map_size_;       // How much extra memory to map at a time
+  char* base_;            // The mapped region
+  char* limit_;           // Limit of the mapped region
+  char* dst_;             // Where to write next  (in range [base_,limit_])
+  char* last_sync_;       // Where have we synced up to
+  uint64_t file_offset_;  // Offset of base_ in file
+#ifdef ROCKSDB_FALLOCATE_PRESENT
+  bool allow_fallocate_;  // If false, fallocate calls are bypassed
+  bool fallocate_with_keep_size_;
+#endif
+
+  // Roundup x to a multiple of y
+  static size_t Roundup(size_t x, size_t y) { return ((x + y - 1) / y) * y; }
+
+  size_t TruncateToPageBoundary(size_t s) {
+    s -= (s & (page_size_ - 1));
+    assert((s % page_size_) == 0);
+    return s;
+  }
+
+  IOStatus MapNewRegion();
+  IOStatus UnmapCurrentRegion();
+  IOStatus Msync();
+
+ public:
+  PosixMmapFile(const std::string& fname, int fd, size_t page_size,
+                const EnvOptions& options);
+  ~PosixMmapFile();
+
+  // Means Close() will properly take care of truncate
+  // and it does not need any additional information
+  virtual IOStatus Truncate(uint64_t /*size*/, const IOOptions& /*opts*/,
+                            IODebugContext* /*dbg*/) override {
+    return IOStatus::OK();
+  }
+  virtual IOStatus Close(const IOOptions& opts, IODebugContext* dbg) override;
+  virtual IOStatus Append(const Slice& data, const IOOptions& opts,
+                          IODebugContext* dbg) override;
+  virtual IOStatus Append(const Slice& data, const IOOptions& opts,
+                          const DataVerificationInfo& /* verification_info */,
+                          IODebugContext* dbg) override {
+    return Append(data, opts, dbg);
+  }
+  virtual IOStatus Flush(const IOOptions& opts, IODebugContext* dbg) override;
+  virtual IOStatus Sync(const IOOptions& opts, IODebugContext* dbg) override;
+  virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override;
+  virtual uint64_t GetFileSize(const IOOptions& opts,
+                               IODebugContext* dbg) override;
+  virtual IOStatus InvalidateCache(size_t offset, size_t length) override;
+#ifdef ROCKSDB_FALLOCATE_PRESENT
+  virtual IOStatus Allocate(uint64_t offset, uint64_t len,
+                            const IOOptions& opts,
+                            IODebugContext* dbg) override;
+#endif
+};
+
+class PosixRandomRWFile : public FSRandomRWFile {
+ public:
+  explicit PosixRandomRWFile(const std::string& fname, int fd,
+                             const EnvOptions& options);
+  virtual ~PosixRandomRWFile();
+
+  virtual IOStatus Write(uint64_t offset, const Slice& data,
+                         const IOOptions& opts, IODebugContext* dbg) override;
+
+  virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions& opts,
+                        Slice* result, char* scratch,
+                        IODebugContext* dbg) const override;
+
+  virtual IOStatus Flush(const IOOptions& opts, IODebugContext* dbg) override;
+  virtual IOStatus Sync(const IOOptions& opts, IODebugContext* dbg) override;
+  virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override;
+  virtual IOStatus Close(const IOOptions& opts, IODebugContext* dbg) override;
+
+ private:
+  const std::string filename_;
+  int fd_;
+};
+
+struct PosixMemoryMappedFileBuffer : public MemoryMappedFileBuffer {
+  PosixMemoryMappedFileBuffer(void* _base, size_t _length)
+      : MemoryMappedFileBuffer(_base, _length) {}
+  virtual ~PosixMemoryMappedFileBuffer();
+};
+
+class PosixDirectory : public FSDirectory {
+ public:
+  explicit PosixDirectory(int fd, const std::string& directory_name);
+  ~PosixDirectory();
+  virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override;
+
+  virtual IOStatus Close(const IOOptions& opts, IODebugContext* dbg) override;
+
+  virtual IOStatus FsyncWithDirOptions(
+      const IOOptions&, IODebugContext*,
+      const DirFsyncOptions& dir_fsync_options) override;
+
+ private:
+  int fd_;
+  bool is_btrfs_;
+  const std::string directory_name_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/mock_env.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/mock_env.cc
new file mode 100644
index 0000000000..c232af61eb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/mock_env.cc
@@ -0,0 +1,1058 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "env/mock_env.h"
+
+#include <algorithm>
+#include <chrono>
+
+#include "env/emulated_clock.h"
+#include "file/filename.h"
+#include "port/sys_time.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/utilities/options_type.h"
+#include "test_util/sync_point.h"
+#include "util/cast_util.h"
+#include "util/hash.h"
+#include "util/random.h"
+#include "util/rate_limiter_impl.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+int64_t MaybeCurrentTime(const std::shared_ptr<SystemClock>& clock) {
+  int64_t time = 1337346000;  // arbitrary fallback default
+  clock->GetCurrentTime(&time).PermitUncheckedError();
+  return time;
+}
+
+static std::unordered_map<std::string, OptionTypeInfo> time_elapse_type_info = {
+    {"time_elapse_only_sleep",
+     {0, OptionType::kBoolean, OptionVerificationType::kNormal,
+      OptionTypeFlags::kCompareNever,
+      [](const ConfigOptions& /*opts*/, const std::string& /*name*/,
+         const std::string& value, void* addr) {
+        auto clock = static_cast<EmulatedSystemClock*>(addr);
+        clock->SetTimeElapseOnlySleep(ParseBoolean("", value));
+        return Status::OK();
+      },
+      [](const ConfigOptions& /*opts*/, const std::string& /*name*/,
+         const void* addr, std::string* value) {
+        const auto clock = static_cast<const EmulatedSystemClock*>(addr);
+        *value = clock->IsTimeElapseOnlySleep() ? "true" : "false";
+        return Status::OK();
+      },
+      nullptr}},
+};
+static std::unordered_map<std::string, OptionTypeInfo> mock_sleep_type_info = {
+    {"mock_sleep",
+     {0, OptionType::kBoolean, OptionVerificationType::kNormal,
+      OptionTypeFlags::kCompareNever,
+      [](const ConfigOptions& /*opts*/, const std::string& /*name*/,
+         const std::string& value, void* addr) {
+        auto clock = static_cast<EmulatedSystemClock*>(addr);
+        clock->SetMockSleep(ParseBoolean("", value));
+        return Status::OK();
+      },
+      [](const ConfigOptions& /*opts*/, const std::string& /*name*/,
+         const void* addr, std::string* value) {
+        const auto clock = static_cast<const EmulatedSystemClock*>(addr);
+        *value = clock->IsMockSleepEnabled() ? "true" : "false";
+        return Status::OK();
+      },
+      nullptr}},
+};
+}  // namespace
+
+EmulatedSystemClock::EmulatedSystemClock(
+    const std::shared_ptr<SystemClock>& base, bool time_elapse_only_sleep)
+    : SystemClockWrapper(base),
+      maybe_starting_time_(MaybeCurrentTime(base)),
+      time_elapse_only_sleep_(time_elapse_only_sleep),
+      no_slowdown_(time_elapse_only_sleep) {
+  RegisterOptions("", this, &time_elapse_type_info);
+  RegisterOptions("", this, &mock_sleep_type_info);
+}
+
+class MemFile {
+ public:
+  explicit MemFile(SystemClock* clock, const std::string& fn,
+                   bool _is_lock_file = false)
+      : clock_(clock),
+        fn_(fn),
+        refs_(0),
+        is_lock_file_(_is_lock_file),
+        locked_(false),
+        size_(0),
+        modified_time_(Now()),
+        rnd_(Lower32of64(GetSliceNPHash64(fn))),
+        fsynced_bytes_(0) {}
+  // No copying allowed.
+  MemFile(const MemFile&) = delete;
+  void operator=(const MemFile&) = delete;
+
+  void Ref() {
+    MutexLock lock(&mutex_);
+    ++refs_;
+  }
+
+  bool is_lock_file() const { return is_lock_file_; }
+
+  bool Lock() {
+    assert(is_lock_file_);
+    MutexLock lock(&mutex_);
+    if (locked_) {
+      return false;
+    } else {
+      locked_ = true;
+      return true;
+    }
+  }
+
+  void Unlock() {
+    assert(is_lock_file_);
+    MutexLock lock(&mutex_);
+    locked_ = false;
+  }
+
+  void Unref() {
+    bool do_delete = false;
+    {
+      MutexLock lock(&mutex_);
+      --refs_;
+      assert(refs_ >= 0);
+      if (refs_ <= 0) {
+        do_delete = true;
+      }
+    }
+
+    if (do_delete) {
+      delete this;
+    }
+  }
+
+  uint64_t Size() const { return size_; }
+
+  void Truncate(size_t size, const IOOptions& /*options*/,
+                IODebugContext* /*dbg*/) {
+    MutexLock lock(&mutex_);
+    if (size < size_) {
+      data_.resize(size);
+      size_ = size;
+    }
+  }
+
+  void CorruptBuffer() {
+    if (fsynced_bytes_ >= size_) {
+      return;
+    }
+    uint64_t buffered_bytes = size_ - fsynced_bytes_;
+    uint64_t start =
+        fsynced_bytes_ + rnd_.Uniform(static_cast<int>(buffered_bytes));
+    uint64_t end = std::min(start + 512, size_.load());
+    MutexLock lock(&mutex_);
+    for (uint64_t pos = start; pos < end; ++pos) {
+      data_[static_cast<size_t>(pos)] = static_cast<char>(rnd_.Uniform(256));
+    }
+  }
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& /*options*/,
+                Slice* result, char* scratch, IODebugContext* /*dbg*/) const {
+    {
+      IOStatus s;
+      TEST_SYNC_POINT_CALLBACK("MemFile::Read:IOStatus", &s);
+      if (!s.ok()) {
+        // with sync point only
+        *result = Slice();
+        return s;
+      }
+    }
+    MutexLock lock(&mutex_);
+    const uint64_t available = Size() - std::min(Size(), offset);
+    size_t offset_ = static_cast<size_t>(offset);
+    if (n > available) {
+      n = static_cast<size_t>(available);
+    }
+    if (n == 0) {
+      *result = Slice();
+      return IOStatus::OK();
+    }
+    if (scratch) {
+      memcpy(scratch, &(data_[offset_]), n);
+      *result = Slice(scratch, n);
+    } else {
+      *result = Slice(&(data_[offset_]), n);
+    }
+    return IOStatus::OK();
+  }
+
+  IOStatus Write(uint64_t offset, const Slice& data,
+                 const IOOptions& /*options*/, IODebugContext* /*dbg*/) {
+    MutexLock lock(&mutex_);
+    size_t offset_ = static_cast<size_t>(offset);
+    if (offset + data.size() > data_.size()) {
+      data_.resize(offset_ + data.size());
+    }
+    data_.replace(offset_, data.size(), data.data(), data.size());
+    size_ = data_.size();
+    modified_time_ = Now();
+    return IOStatus::OK();
+  }
+
+  IOStatus Append(const Slice& data, const IOOptions& /*options*/,
+                  IODebugContext* /*dbg*/) {
+    MutexLock lock(&mutex_);
+    data_.append(data.data(), data.size());
+    size_ = data_.size();
+    modified_time_ = Now();
+    return IOStatus::OK();
+  }
+
+  IOStatus Fsync(const IOOptions& /*options*/, IODebugContext* /*dbg*/) {
+    fsynced_bytes_ = size_.load();
+    return IOStatus::OK();
+  }
+
+  uint64_t ModifiedTime() const { return modified_time_; }
+
+ private:
+  uint64_t Now() {
+    int64_t unix_time = 0;
+    auto s = clock_->GetCurrentTime(&unix_time);
+    assert(s.ok());
+    return static_cast<uint64_t>(unix_time);
+  }
+
+  // Private since only Unref() should be used to delete it.
+  ~MemFile() { assert(refs_ == 0); }
+
+  SystemClock* clock_;
+  const std::string fn_;
+  mutable port::Mutex mutex_;
+  int refs_;
+  bool is_lock_file_;
+  bool locked_;
+
+  // Data written into this file, all bytes before fsynced_bytes are
+  // persistent.
+  std::string data_;
+  std::atomic<uint64_t> size_;
+  std::atomic<uint64_t> modified_time_;
+
+  Random rnd_;
+  std::atomic<uint64_t> fsynced_bytes_;
+};
+
+namespace {
+
+class MockSequentialFile : public FSSequentialFile {
+ public:
+  explicit MockSequentialFile(MemFile* file, const FileOptions& opts)
+      : file_(file),
+        use_direct_io_(opts.use_direct_reads),
+        use_mmap_read_(opts.use_mmap_reads),
+        pos_(0) {
+    file_->Ref();
+  }
+
+  ~MockSequentialFile() override { file_->Unref(); }
+
+  IOStatus Read(size_t n, const IOOptions& options, Slice* result,
+                char* scratch, IODebugContext* dbg) override {
+    IOStatus s = file_->Read(pos_, n, options, result,
+                             (use_mmap_read_) ? nullptr : scratch, dbg);
+    if (s.ok()) {
+      pos_ += result->size();
+    }
+    return s;
+  }
+
+  bool use_direct_io() const override { return use_direct_io_; }
+  IOStatus Skip(uint64_t n) override {
+    if (pos_ > file_->Size()) {
+      return IOStatus::IOError("pos_ > file_->Size()");
+    }
+    const uint64_t available = file_->Size() - pos_;
+    if (n > available) {
+      n = available;
+    }
+    pos_ += static_cast<size_t>(n);
+    return IOStatus::OK();
+  }
+
+ private:
+  MemFile* file_;
+  bool use_direct_io_;
+  bool use_mmap_read_;
+  size_t pos_;
+};
+
+class MockRandomAccessFile : public FSRandomAccessFile {
+ public:
+  explicit MockRandomAccessFile(MemFile* file, const FileOptions& opts)
+      : file_(file),
+        use_direct_io_(opts.use_direct_reads),
+        use_mmap_read_(opts.use_mmap_reads) {
+    file_->Ref();
+  }
+
+  ~MockRandomAccessFile() override { file_->Unref(); }
+
+  bool use_direct_io() const override { return use_direct_io_; }
+
+  IOStatus Prefetch(uint64_t /*offset*/, size_t /*n*/,
+                    const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override {
+    return IOStatus::OK();
+  }
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override {
+    if (use_mmap_read_) {
+      return file_->Read(offset, n, options, result, nullptr, dbg);
+    } else {
+      return file_->Read(offset, n, options, result, scratch, dbg);
+    }
+  }
+
+ private:
+  MemFile* file_;
+  bool use_direct_io_;
+  bool use_mmap_read_;
+};
+
+class MockRandomRWFile : public FSRandomRWFile {
+ public:
+  explicit MockRandomRWFile(MemFile* file) : file_(file) { file_->Ref(); }
+
+  ~MockRandomRWFile() override { file_->Unref(); }
+
+  IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options,
+                 IODebugContext* dbg) override {
+    return file_->Write(offset, data, options, dbg);
+  }
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override {
+    return file_->Read(offset, n, options, result, scratch, dbg);
+  }
+
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override {
+    return file_->Fsync(options, dbg);
+  }
+
+  IOStatus Flush(const IOOptions& /*options*/,
+                 IODebugContext* /*dbg*/) override {
+    return IOStatus::OK();
+  }
+
+  IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override {
+    return file_->Fsync(options, dbg);
+  }
+
+ private:
+  MemFile* file_;
+};
+
+class MockWritableFile : public FSWritableFile {
+ public:
+  MockWritableFile(MemFile* file, const FileOptions& opts)
+      : file_(file),
+        use_direct_io_(opts.use_direct_writes),
+        rate_limiter_(opts.rate_limiter) {
+    file_->Ref();
+  }
+
+  ~MockWritableFile() override { file_->Unref(); }
+
+  bool use_direct_io() const override { return false && use_direct_io_; }
+
+  using FSWritableFile::Append;
+  IOStatus Append(const Slice& data, const IOOptions& options,
+                  IODebugContext* dbg) override {
+    size_t bytes_written = 0;
+    while (bytes_written < data.size()) {
+      auto bytes = RequestToken(data.size() - bytes_written);
+      IOStatus s = file_->Append(Slice(data.data() + bytes_written, bytes),
+                                 options, dbg);
+      if (!s.ok()) {
+        return s;
+      }
+      bytes_written += bytes;
+    }
+    return IOStatus::OK();
+  }
+
+  using FSWritableFile::PositionedAppend;
+  IOStatus PositionedAppend(const Slice& data, uint64_t /*offset*/,
+                            const IOOptions& options,
+                            IODebugContext* dbg) override {
+    assert(use_direct_io_);
+    return Append(data, options, dbg);
+  }
+
+  IOStatus Truncate(uint64_t size, const IOOptions& options,
+                    IODebugContext* dbg) override {
+    file_->Truncate(static_cast<size_t>(size), options, dbg);
+    return IOStatus::OK();
+  }
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override {
+    return file_->Fsync(options, dbg);
+  }
+
+  IOStatus Flush(const IOOptions& /*options*/,
+                 IODebugContext* /*dbg*/) override {
+    return IOStatus::OK();
+  }
+
+  IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override {
+    return file_->Fsync(options, dbg);
+  }
+
+  uint64_t GetFileSize(const IOOptions& /*options*/,
+                       IODebugContext* /*dbg*/) override {
+    return file_->Size();
+  }
+
+ private:
+  inline size_t RequestToken(size_t bytes) {
+    if (rate_limiter_ && io_priority_ < Env::IO_TOTAL) {
+      bytes = std::min(
+          bytes, static_cast<size_t>(rate_limiter_->GetSingleBurstBytes()));
+      rate_limiter_->Request(bytes, io_priority_);
+    }
+    return bytes;
+  }
+
+  MemFile* file_;
+  bool use_direct_io_;
+  RateLimiter* rate_limiter_;
+};
+
+class MockEnvDirectory : public FSDirectory {
+ public:
+  IOStatus Fsync(const IOOptions& /*options*/,
+                 IODebugContext* /*dbg*/) override {
+    return IOStatus::OK();
+  }
+
+  IOStatus Close(const IOOptions& /*options*/,
+                 IODebugContext* /*dbg*/) override {
+    return IOStatus::OK();
+  }
+};
+
+class MockEnvFileLock : public FileLock {
+ public:
+  explicit MockEnvFileLock(const std::string& fname) : fname_(fname) {}
+
+  std::string FileName() const { return fname_; }
+
+ private:
+  const std::string fname_;
+};
+
+class TestMemLogger : public Logger {
+ private:
+  std::unique_ptr<FSWritableFile> file_;
+  std::atomic_size_t log_size_;
+  static const uint64_t flush_every_seconds_ = 5;
+  std::atomic_uint_fast64_t last_flush_micros_;
+  SystemClock* clock_;
+  IOOptions options_;
+  IODebugContext* dbg_;
+  std::atomic<bool> flush_pending_;
+
+ public:
+  TestMemLogger(std::unique_ptr<FSWritableFile> f, SystemClock* clock,
+                const IOOptions& options, IODebugContext* dbg,
+                const InfoLogLevel log_level = InfoLogLevel::ERROR_LEVEL)
+      : Logger(log_level),
+        file_(std::move(f)),
+        log_size_(0),
+        last_flush_micros_(0),
+        clock_(clock),
+        options_(options),
+        dbg_(dbg),
+        flush_pending_(false) {}
+  ~TestMemLogger() override {}
+
+  void Flush() override {
+    if (flush_pending_) {
+      flush_pending_ = false;
+    }
+    last_flush_micros_ = clock_->NowMicros();
+  }
+
+  using Logger::Logv;
+  void Logv(const char* format, va_list ap) override {
+    // We try twice: the first time with a fixed-size stack allocated buffer,
+    // and the second time with a much larger dynamically allocated buffer.
+    char buffer[500];
+    for (int iter = 0; iter < 2; iter++) {
+      char* base;
+      int bufsize;
+      if (iter == 0) {
+        bufsize = sizeof(buffer);
+        base = buffer;
+      } else {
+        bufsize = 30000;
+        base = new char[bufsize];
+      }
+      char* p = base;
+      char* limit = base + bufsize;
+
+      port::TimeVal now_tv;
+      port::GetTimeOfDay(&now_tv, nullptr);
+      const time_t seconds = now_tv.tv_sec;
+      struct tm t;
+      memset(&t, 0, sizeof(t));
+      struct tm* ret __attribute__((__unused__));
+      ret = port::LocalTimeR(&seconds, &t);
+      assert(ret);
+      p += snprintf(p, limit - p, "%04d/%02d/%02d-%02d:%02d:%02d.%06d ",
+                    t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour,
+                    t.tm_min, t.tm_sec, static_cast<int>(now_tv.tv_usec));
+
+      // Print the message
+      if (p < limit) {
+        va_list backup_ap;
+        va_copy(backup_ap, ap);
+        p += vsnprintf(p, limit - p, format, backup_ap);
+        va_end(backup_ap);
+      }
+
+      // Truncate to available space if necessary
+      if (p >= limit) {
+        if (iter == 0) {
+          continue;  // Try again with larger buffer
+        } else {
+          p = limit - 1;
+        }
+      }
+
+      // Add newline if necessary
+      if (p == base || p[-1] != '\n') {
+        *p++ = '\n';
+      }
+
+      assert(p <= limit);
+      const size_t write_size = p - base;
+
+      Status s = file_->Append(Slice(base, write_size), options_, dbg_);
+      if (s.ok()) {
+        flush_pending_ = true;
+        log_size_ += write_size;
+      }
+      uint64_t now_micros =
+          static_cast<uint64_t>(now_tv.tv_sec) * 1000000 + now_tv.tv_usec;
+      if (now_micros - last_flush_micros_ >= flush_every_seconds_ * 1000000) {
+        flush_pending_ = false;
+        last_flush_micros_ = now_micros;
+      }
+      if (base != buffer) {
+        delete[] base;
+      }
+      break;
+    }
+  }
+  size_t GetLogFileSize() const override { return log_size_; }
+};
+
+static std::unordered_map<std::string, OptionTypeInfo> mock_fs_type_info = {
+    {"supports_direct_io",
+     {0, OptionType::kBoolean, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+};
+}  // namespace
+
+MockFileSystem::MockFileSystem(const std::shared_ptr<SystemClock>& clock,
+                               bool supports_direct_io)
+    : system_clock_(clock), supports_direct_io_(supports_direct_io) {
+  clock_ = system_clock_.get();
+  RegisterOptions("", &supports_direct_io_, &mock_fs_type_info);
+}
+
+MockFileSystem::~MockFileSystem() {
+  for (auto i = file_map_.begin(); i != file_map_.end(); ++i) {
+    i->second->Unref();
+  }
+}
+
+Status MockFileSystem::PrepareOptions(const ConfigOptions& options) {
+  Status s = FileSystem::PrepareOptions(options);
+  if (s.ok() && system_clock_ == SystemClock::Default()) {
+    system_clock_ = options.env->GetSystemClock();
+    clock_ = system_clock_.get();
+  }
+  return s;
+}
+
+IOStatus MockFileSystem::GetAbsolutePath(const std::string& db_path,
+                                         const IOOptions& /*options*/,
+                                         std::string* output_path,
+                                         IODebugContext* /*dbg*/) {
+  *output_path = NormalizeMockPath(db_path);
+  if (output_path->at(0) != '/') {
+    return IOStatus::NotSupported("GetAbsolutePath");
+  } else {
+    return IOStatus::OK();
+  }
+}
+
+std::string MockFileSystem::NormalizeMockPath(const std::string& path) {
+  std::string p = NormalizePath(path);
+  if (p.back() == kFilePathSeparator && p.size() > 1) {
+    p.pop_back();
+  }
+  return p;
+}
+
+// Partial implementation of the FileSystem interface.
+IOStatus MockFileSystem::NewSequentialFile(
+    const std::string& fname, const FileOptions& file_opts,
+    std::unique_ptr<FSSequentialFile>* result, IODebugContext* /*dbg*/) {
+  auto fn = NormalizeMockPath(fname);
+
+  MutexLock lock(&mutex_);
+  if (file_map_.find(fn) == file_map_.end()) {
+    *result = nullptr;
+    return IOStatus::PathNotFound(fn);
+  }
+  auto* f = file_map_[fn];
+  if (f->is_lock_file()) {
+    return IOStatus::InvalidArgument(fn, "Cannot open a lock file.");
+  } else if (file_opts.use_direct_reads && !supports_direct_io_) {
+    return IOStatus::NotSupported("Direct I/O Not Supported");
+  } else {
+    result->reset(new MockSequentialFile(f, file_opts));
+    return IOStatus::OK();
+  }
+}
+
+IOStatus MockFileSystem::NewRandomAccessFile(
+    const std::string& fname, const FileOptions& file_opts,
+    std::unique_ptr<FSRandomAccessFile>* result, IODebugContext* /*dbg*/) {
+  auto fn = NormalizeMockPath(fname);
+  MutexLock lock(&mutex_);
+  if (file_map_.find(fn) == file_map_.end()) {
+    *result = nullptr;
+    return IOStatus::PathNotFound(fn);
+  }
+  auto* f = file_map_[fn];
+  if (f->is_lock_file()) {
+    return IOStatus::InvalidArgument(fn, "Cannot open a lock file.");
+  } else if (file_opts.use_direct_reads && !supports_direct_io_) {
+    return IOStatus::NotSupported("Direct I/O Not Supported");
+  } else {
+    result->reset(new MockRandomAccessFile(f, file_opts));
+    return IOStatus::OK();
+  }
+}
+
+IOStatus MockFileSystem::NewRandomRWFile(
+    const std::string& fname, const FileOptions& /*file_opts*/,
+    std::unique_ptr<FSRandomRWFile>* result, IODebugContext* /*dbg*/) {
+  auto fn = NormalizeMockPath(fname);
+  MutexLock lock(&mutex_);
+  if (file_map_.find(fn) == file_map_.end()) {
+    *result = nullptr;
+    return IOStatus::PathNotFound(fn);
+  }
+  auto* f = file_map_[fn];
+  if (f->is_lock_file()) {
+    return IOStatus::InvalidArgument(fn, "Cannot open a lock file.");
+  }
+  result->reset(new MockRandomRWFile(f));
+  return IOStatus::OK();
+}
+
+IOStatus MockFileSystem::ReuseWritableFile(
+    const std::string& fname, const std::string& old_fname,
+    const FileOptions& options, std::unique_ptr<FSWritableFile>* result,
+    IODebugContext* dbg) {
+  auto s = RenameFile(old_fname, fname, IOOptions(), dbg);
+  if (!s.ok()) {
+    return s;
+  } else {
+    result->reset();
+    return NewWritableFile(fname, options, result, dbg);
+  }
+}
+
+IOStatus MockFileSystem::NewWritableFile(
+    const std::string& fname, const FileOptions& file_opts,
+    std::unique_ptr<FSWritableFile>* result, IODebugContext* /*dbg*/) {
+  auto fn = NormalizeMockPath(fname);
+  MutexLock lock(&mutex_);
+  if (file_map_.find(fn) != file_map_.end()) {
+    DeleteFileInternal(fn);
+  }
+  MemFile* file = new MemFile(clock_, fn, false);
+  file->Ref();
+  file_map_[fn] = file;
+  if (file_opts.use_direct_writes && !supports_direct_io_) {
+    return IOStatus::NotSupported("Direct I/O Not Supported");
+  } else {
+    result->reset(new MockWritableFile(file, file_opts));
+    return IOStatus::OK();
+  }
+}
+
+IOStatus MockFileSystem::ReopenWritableFile(
+    const std::string& fname, const FileOptions& file_opts,
+    std::unique_ptr<FSWritableFile>* result, IODebugContext* /*dbg*/) {
+  auto fn = NormalizeMockPath(fname);
+  MutexLock lock(&mutex_);
+  MemFile* file = nullptr;
+  if (file_map_.find(fn) == file_map_.end()) {
+    file = new MemFile(clock_, fn, false);
+    // Only take a reference when we create the file objectt
+    file->Ref();
+    file_map_[fn] = file;
+  } else {
+    file = file_map_[fn];
+  }
+  if (file_opts.use_direct_writes && !supports_direct_io_) {
+    return IOStatus::NotSupported("Direct I/O Not Supported");
+  } else {
+    result->reset(new MockWritableFile(file, file_opts));
+    return IOStatus::OK();
+  }
+}
+
+IOStatus MockFileSystem::NewDirectory(const std::string& /*name*/,
+                                      const IOOptions& /*io_opts*/,
+                                      std::unique_ptr<FSDirectory>* result,
+                                      IODebugContext* /*dbg*/) {
+  result->reset(new MockEnvDirectory());
+  return IOStatus::OK();
+}
+
+IOStatus MockFileSystem::FileExists(const std::string& fname,
+                                    const IOOptions& /*io_opts*/,
+                                    IODebugContext* /*dbg*/) {
+  auto fn = NormalizeMockPath(fname);
+  MutexLock lock(&mutex_);
+  if (file_map_.find(fn) != file_map_.end()) {
+    // File exists
+    return IOStatus::OK();
+  }
+  // Now also check if fn exists as a dir
+  for (const auto& iter : file_map_) {
+    const std::string& filename = iter.first;
+    if (filename.size() >= fn.size() + 1 && filename[fn.size()] == '/' &&
+        Slice(filename).starts_with(Slice(fn))) {
+      return IOStatus::OK();
+    }
+  }
+  return IOStatus::NotFound();
+}
+
+bool MockFileSystem::GetChildrenInternal(const std::string& dir,
+                                         std::vector<std::string>* result) {
+  auto d = NormalizeMockPath(dir);
+  bool found_dir = false;
+  result->clear();
+  for (const auto& iter : file_map_) {
+    const std::string& filename = iter.first;
+
+    if (filename == d) {
+      found_dir = true;
+    } else if (filename.size() >= d.size() + 1 && filename[d.size()] == '/' &&
+               Slice(filename).starts_with(Slice(d))) {
+      found_dir = true;
+      size_t next_slash = filename.find('/', d.size() + 1);
+      if (next_slash != std::string::npos) {
+        result->push_back(
+            filename.substr(d.size() + 1, next_slash - d.size() - 1));
+      } else {
+        result->push_back(filename.substr(d.size() + 1));
+      }
+    }
+  }
+  result->erase(std::unique(result->begin(), result->end()), result->end());
+  return found_dir;
+}
+
+IOStatus MockFileSystem::GetChildren(const std::string& dir,
+                                     const IOOptions& /*options*/,
+                                     std::vector<std::string>* result,
+                                     IODebugContext* /*dbg*/) {
+  MutexLock lock(&mutex_);
+  bool found_dir = GetChildrenInternal(dir, result);
+#ifndef __clang_analyzer__
+  return found_dir ? IOStatus::OK() : IOStatus::NotFound(dir);
+#else
+  return found_dir ? IOStatus::OK() : IOStatus::NotFound();
+#endif
+}
+
+void MockFileSystem::DeleteFileInternal(const std::string& fname) {
+  assert(fname == NormalizeMockPath(fname));
+  const auto& pair = file_map_.find(fname);
+  if (pair != file_map_.end()) {
+    pair->second->Unref();
+    file_map_.erase(fname);
+  }
+}
+
+IOStatus MockFileSystem::DeleteFile(const std::string& fname,
+                                    const IOOptions& /*options*/,
+                                    IODebugContext* /*dbg*/) {
+  auto fn = NormalizeMockPath(fname);
+  MutexLock lock(&mutex_);
+  if (file_map_.find(fn) == file_map_.end()) {
+    return IOStatus::PathNotFound(fn);
+  }
+
+  DeleteFileInternal(fn);
+  return IOStatus::OK();
+}
+
+IOStatus MockFileSystem::Truncate(const std::string& fname, size_t size,
+                                  const IOOptions& options,
+                                  IODebugContext* dbg) {
+  auto fn = NormalizeMockPath(fname);
+  MutexLock lock(&mutex_);
+  auto iter = file_map_.find(fn);
+  if (iter == file_map_.end()) {
+    return IOStatus::PathNotFound(fn);
+  }
+  iter->second->Truncate(size, options, dbg);
+  return IOStatus::OK();
+}
+
+IOStatus MockFileSystem::CreateDir(const std::string& dirname,
+                                   const IOOptions& /*options*/,
+                                   IODebugContext* /*dbg*/) {
+  auto dn = NormalizeMockPath(dirname);
+  MutexLock lock(&mutex_);
+  if (file_map_.find(dn) == file_map_.end()) {
+    MemFile* file = new MemFile(clock_, dn, false);
+    file->Ref();
+    file_map_[dn] = file;
+  } else {
+    return IOStatus::IOError();
+  }
+  return IOStatus::OK();
+}
+
+IOStatus MockFileSystem::CreateDirIfMissing(const std::string& dirname,
+                                            const IOOptions& options,
+                                            IODebugContext* dbg) {
+  CreateDir(dirname, options, dbg).PermitUncheckedError();
+  return IOStatus::OK();
+}
+
+IOStatus MockFileSystem::DeleteDir(const std::string& dirname,
+                                   const IOOptions& /*options*/,
+                                   IODebugContext* /*dbg*/) {
+  auto dir = NormalizeMockPath(dirname);
+  MutexLock lock(&mutex_);
+  if (file_map_.find(dir) == file_map_.end()) {
+    return IOStatus::PathNotFound(dir);
+  } else {
+    std::vector<std::string> children;
+    if (GetChildrenInternal(dir, &children)) {
+      for (const auto& child : children) {
+        DeleteFileInternal(child);
+      }
+    }
+    DeleteFileInternal(dir);
+    return IOStatus::OK();
+  }
+}
+
+IOStatus MockFileSystem::GetFileSize(const std::string& fname,
+                                     const IOOptions& /*options*/,
+                                     uint64_t* file_size,
+                                     IODebugContext* /*dbg*/) {
+  auto fn = NormalizeMockPath(fname);
+  TEST_SYNC_POINT_CALLBACK("MockFileSystem::GetFileSize:CheckFileType", &fn);
+  MutexLock lock(&mutex_);
+  auto iter = file_map_.find(fn);
+  if (iter == file_map_.end()) {
+    return IOStatus::PathNotFound(fn);
+  }
+
+  *file_size = iter->second->Size();
+  return IOStatus::OK();
+}
+
+IOStatus MockFileSystem::GetFileModificationTime(const std::string& fname,
+                                                 const IOOptions& /*options*/,
+                                                 uint64_t* time,
+                                                 IODebugContext* /*dbg*/) {
+  auto fn = NormalizeMockPath(fname);
+  MutexLock lock(&mutex_);
+  auto iter = file_map_.find(fn);
+  if (iter == file_map_.end()) {
+    return IOStatus::PathNotFound(fn);
+  }
+  *time = iter->second->ModifiedTime();
+  return IOStatus::OK();
+}
+
+bool MockFileSystem::RenameFileInternal(const std::string& src,
+                                        const std::string& dest) {
+  if (file_map_.find(src) == file_map_.end()) {
+    return false;
+  } else {
+    std::vector<std::string> children;
+    if (GetChildrenInternal(src, &children)) {
+      for (const auto& child : children) {
+        RenameFileInternal(src + "/" + child, dest + "/" + child);
+      }
+    }
+    DeleteFileInternal(dest);
+    file_map_[dest] = file_map_[src];
+    file_map_.erase(src);
+    return true;
+  }
+}
+
+IOStatus MockFileSystem::RenameFile(const std::string& src,
+                                    const std::string& dest,
+                                    const IOOptions& /*options*/,
+                                    IODebugContext* /*dbg*/) {
+  auto s = NormalizeMockPath(src);
+  auto t = NormalizeMockPath(dest);
+  MutexLock lock(&mutex_);
+  bool found = RenameFileInternal(s, t);
+  if (!found) {
+    return IOStatus::PathNotFound(s);
+  } else {
+    return IOStatus::OK();
+  }
+}
+
+IOStatus MockFileSystem::LinkFile(const std::string& src,
+                                  const std::string& dest,
+                                  const IOOptions& /*options*/,
+                                  IODebugContext* /*dbg*/) {
+  auto s = NormalizeMockPath(src);
+  auto t = NormalizeMockPath(dest);
+  MutexLock lock(&mutex_);
+  if (file_map_.find(s) == file_map_.end()) {
+    return IOStatus::PathNotFound(s);
+  }
+
+  DeleteFileInternal(t);
+  file_map_[t] = file_map_[s];
+  file_map_[t]->Ref();  // Otherwise it might get deleted when noone uses s
+  return IOStatus::OK();
+}
+
+IOStatus MockFileSystem::NewLogger(const std::string& fname,
+                                   const IOOptions& io_opts,
+                                   std::shared_ptr<Logger>* result,
+                                   IODebugContext* dbg) {
+  auto fn = NormalizeMockPath(fname);
+  MutexLock lock(&mutex_);
+  auto iter = file_map_.find(fn);
+  MemFile* file = nullptr;
+  if (iter == file_map_.end()) {
+    file = new MemFile(clock_, fn, false);
+    file->Ref();
+    file_map_[fn] = file;
+  } else {
+    file = iter->second;
+  }
+  std::unique_ptr<FSWritableFile> f(new MockWritableFile(file, FileOptions()));
+  result->reset(new TestMemLogger(std::move(f), clock_, io_opts, dbg));
+  return IOStatus::OK();
+}
+
+IOStatus MockFileSystem::LockFile(const std::string& fname,
+                                  const IOOptions& /*options*/,
+                                  FileLock** flock, IODebugContext* /*dbg*/) {
+  auto fn = NormalizeMockPath(fname);
+  {
+    MutexLock lock(&mutex_);
+    if (file_map_.find(fn) != file_map_.end()) {
+      if (!file_map_[fn]->is_lock_file()) {
+        return IOStatus::InvalidArgument(fname, "Not a lock file.");
+      }
+      if (!file_map_[fn]->Lock()) {
+        return IOStatus::IOError(fn, "lock is already held.");
+      }
+    } else {
+      auto* file = new MemFile(clock_, fn, true);
+      file->Ref();
+      file->Lock();
+      file_map_[fn] = file;
+    }
+  }
+  *flock = new MockEnvFileLock(fn);
+  return IOStatus::OK();
+}
+
+IOStatus MockFileSystem::UnlockFile(FileLock* flock,
+                                    const IOOptions& /*options*/,
+                                    IODebugContext* /*dbg*/) {
+  std::string fn = static_cast_with_check<MockEnvFileLock>(flock)->FileName();
+  {
+    MutexLock lock(&mutex_);
+    if (file_map_.find(fn) != file_map_.end()) {
+      if (!file_map_[fn]->is_lock_file()) {
+        return IOStatus::InvalidArgument(fn, "Not a lock file.");
+      }
+      file_map_[fn]->Unlock();
+    }
+  }
+  delete flock;
+  return IOStatus::OK();
+}
+
+IOStatus MockFileSystem::GetTestDirectory(const IOOptions& /*options*/,
+                                          std::string* path,
+                                          IODebugContext* /*dbg*/) {
+  *path = "/test";
+  return IOStatus::OK();
+}
+
+Status MockFileSystem::CorruptBuffer(const std::string& fname) {
+  auto fn = NormalizeMockPath(fname);
+  MutexLock lock(&mutex_);
+  auto iter = file_map_.find(fn);
+  if (iter == file_map_.end()) {
+    return Status::IOError(fn, "File not found");
+  }
+  iter->second->CorruptBuffer();
+  return Status::OK();
+}
+
+MockEnv::MockEnv(Env* env, const std::shared_ptr<FileSystem>& fs,
+                 const std::shared_ptr<SystemClock>& clock)
+    : CompositeEnvWrapper(env, fs, clock) {}
+
+MockEnv* MockEnv::Create(Env* env) {
+  auto clock =
+      std::make_shared<EmulatedSystemClock>(env->GetSystemClock(), true);
+  return MockEnv::Create(env, clock);
+}
+
+MockEnv* MockEnv::Create(Env* env, const std::shared_ptr<SystemClock>& clock) {
+  auto fs = std::make_shared<MockFileSystem>(clock);
+  return new MockEnv(env, fs, clock);
+}
+
+Status MockEnv::CorruptBuffer(const std::string& fname) {
+  auto mock = static_cast_with_check<MockFileSystem>(GetFileSystem().get());
+  return mock->CorruptBuffer(fname);
+}
+
+// This is to maintain the behavior before swithcing from InMemoryEnv to MockEnv
+Env* NewMemEnv(Env* base_env) { return MockEnv::Create(base_env); }
+
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/mock_env.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/mock_env.h
new file mode 100644
index 0000000000..406a31f635
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/mock_env.h
@@ -0,0 +1,144 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <atomic>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "env/composite_env_wrapper.h"
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/status.h"
+#include "rocksdb/system_clock.h"
+
+namespace ROCKSDB_NAMESPACE {
+class MemFile;
+class MockFileSystem : public FileSystem {
+ public:
+  explicit MockFileSystem(const std::shared_ptr<SystemClock>& clock,
+                          bool supports_direct_io = true);
+  ~MockFileSystem() override;
+
+  static const char* kClassName() { return "MemoryFileSystem"; }
+  const char* Name() const override { return kClassName(); }
+  IOStatus NewSequentialFile(const std::string& f, const FileOptions& file_opts,
+                             std::unique_ptr<FSSequentialFile>* r,
+                             IODebugContext* dbg) override;
+  IOStatus NewRandomAccessFile(const std::string& f,
+                               const FileOptions& file_opts,
+                               std::unique_ptr<FSRandomAccessFile>* r,
+                               IODebugContext* dbg) override;
+
+  IOStatus NewRandomRWFile(const std::string& fname,
+                           const FileOptions& file_opts,
+                           std::unique_ptr<FSRandomRWFile>* result,
+                           IODebugContext* dbg) override;
+  IOStatus ReuseWritableFile(const std::string& fname,
+                             const std::string& old_fname,
+                             const FileOptions& file_opts,
+                             std::unique_ptr<FSWritableFile>* result,
+                             IODebugContext* dbg) override;
+  IOStatus NewWritableFile(const std::string& fname,
+                           const FileOptions& file_opts,
+                           std::unique_ptr<FSWritableFile>* result,
+                           IODebugContext* dbg) override;
+  IOStatus ReopenWritableFile(const std::string& fname,
+                              const FileOptions& options,
+                              std::unique_ptr<FSWritableFile>* result,
+                              IODebugContext* dbg) override;
+  IOStatus NewDirectory(const std::string& /*name*/, const IOOptions& io_opts,
+                        std::unique_ptr<FSDirectory>* result,
+                        IODebugContext* dbg) override;
+  IOStatus FileExists(const std::string& fname, const IOOptions& /*io_opts*/,
+                      IODebugContext* /*dbg*/) override;
+  IOStatus GetChildren(const std::string& dir, const IOOptions& options,
+                       std::vector<std::string>* result,
+                       IODebugContext* dbg) override;
+  IOStatus DeleteFile(const std::string& fname, const IOOptions& options,
+                      IODebugContext* dbg) override;
+  IOStatus Truncate(const std::string& fname, size_t size,
+                    const IOOptions& options, IODebugContext* dbg) override;
+  IOStatus CreateDir(const std::string& dirname, const IOOptions& options,
+                     IODebugContext* dbg) override;
+  IOStatus CreateDirIfMissing(const std::string& dirname,
+                              const IOOptions& options,
+                              IODebugContext* dbg) override;
+  IOStatus DeleteDir(const std::string& dirname, const IOOptions& options,
+                     IODebugContext* dbg) override;
+
+  IOStatus GetFileSize(const std::string& fname, const IOOptions& options,
+                       uint64_t* file_size, IODebugContext* dbg) override;
+
+  IOStatus GetFileModificationTime(const std::string& fname,
+                                   const IOOptions& options,
+                                   uint64_t* file_mtime,
+                                   IODebugContext* dbg) override;
+  IOStatus RenameFile(const std::string& src, const std::string& target,
+                      const IOOptions& options, IODebugContext* dbg) override;
+  IOStatus LinkFile(const std::string& /*src*/, const std::string& /*target*/,
+                    const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override;
+  IOStatus LockFile(const std::string& fname, const IOOptions& options,
+                    FileLock** lock, IODebugContext* dbg) override;
+  IOStatus UnlockFile(FileLock* lock, const IOOptions& options,
+                      IODebugContext* dbg) override;
+  IOStatus GetTestDirectory(const IOOptions& options, std::string* path,
+                            IODebugContext* dbg) override;
+  IOStatus NewLogger(const std::string& fname, const IOOptions& io_opts,
+                     std::shared_ptr<Logger>* result,
+                     IODebugContext* dbg) override;
+  // Get full directory name for this db.
+  IOStatus GetAbsolutePath(const std::string& db_path,
+                           const IOOptions& /*options*/,
+                           std::string* output_path,
+                           IODebugContext* /*dbg*/) override;
+  IOStatus IsDirectory(const std::string& /*path*/,
+                       const IOOptions& /*options*/, bool* /*is_dir*/,
+                       IODebugContext* /*dgb*/) override {
+    return IOStatus::NotSupported("IsDirectory");
+  }
+
+  Status CorruptBuffer(const std::string& fname);
+  Status PrepareOptions(const ConfigOptions& options) override;
+
+ private:
+  bool RenameFileInternal(const std::string& src, const std::string& dest);
+  void DeleteFileInternal(const std::string& fname);
+  bool GetChildrenInternal(const std::string& fname,
+                           std::vector<std::string>* results);
+
+  std::string NormalizeMockPath(const std::string& path);
+
+ private:
+  // Map from filenames to MemFile objects, representing a simple file system.
+  port::Mutex mutex_;
+  std::map<std::string, MemFile*> file_map_;  // Protected by mutex_.
+  std::shared_ptr<SystemClock> system_clock_;
+  SystemClock* clock_;
+  bool supports_direct_io_;
+};
+
+class MockEnv : public CompositeEnvWrapper {
+ public:
+  static MockEnv* Create(Env* base);
+  static MockEnv* Create(Env* base, const std::shared_ptr<SystemClock>& clock);
+
+  static const char* kClassName() { return "MockEnv"; }
+  const char* Name() const override { return kClassName(); }
+
+  Status CorruptBuffer(const std::string& fname);
+
+ private:
+  MockEnv(Env* env, const std::shared_ptr<FileSystem>& fs,
+          const std::shared_ptr<SystemClock>& clock);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/unique_id_gen.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/unique_id_gen.cc
new file mode 100644
index 0000000000..a1986fa15d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/unique_id_gen.cc
@@ -0,0 +1,164 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "env/unique_id_gen.h"
+
+#include <algorithm>
+#include <array>
+#include <cstring>
+#include <random>
+
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/version.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+struct GenerateRawUniqueIdOpts {
+  Env* env = Env::Default();
+  bool exclude_port_uuid = false;
+  bool exclude_env_details = false;
+  bool exclude_random_device = false;
+};
+
+// Each of these "tracks" below should be sufficient for generating 128 bits
+// of entropy, after hashing the raw bytes. The tracks are separable for
+// testing purposes, but in production we combine as many tracks as possible
+// to ensure quality results even if some environments have degraded
+// capabilities or quality in some APIs.
+//
+// This approach has not been validated for use in cryptography. The goal is
+// generating globally unique values with high probability without coordination
+// between instances.
+//
+// Linux performance: EntropyTrackRandomDevice is much faster than
+// EntropyTrackEnvDetails, which is much faster than EntropyTrackPortUuid.
+
+struct EntropyTrackPortUuid {
+  std::array<char, 36> uuid;
+
+  void Populate(const GenerateRawUniqueIdOpts& opts) {
+    if (opts.exclude_port_uuid) {
+      return;
+    }
+    std::string s;
+    port::GenerateRfcUuid(&s);
+    if (s.size() >= uuid.size()) {
+      std::copy_n(s.begin(), uuid.size(), uuid.begin());
+    }
+  }
+};
+
+struct EntropyTrackEnvDetails {
+  std::array<char, 64> hostname_buf;
+  int64_t process_id;
+  uint64_t thread_id;
+  int64_t unix_time;
+  uint64_t nano_time;
+
+  void Populate(const GenerateRawUniqueIdOpts& opts) {
+    if (opts.exclude_env_details) {
+      return;
+    }
+    opts.env->GetHostName(hostname_buf.data(), hostname_buf.size())
+        .PermitUncheckedError();
+    process_id = port::GetProcessID();
+    thread_id = opts.env->GetThreadID();
+    opts.env->GetCurrentTime(&unix_time).PermitUncheckedError();
+    nano_time = opts.env->NowNanos();
+  }
+};
+
+struct EntropyTrackRandomDevice {
+  using RandType = std::random_device::result_type;
+  static constexpr size_t kNumRandVals =
+      /* generous bits */ 192U / (8U * sizeof(RandType));
+  std::array<RandType, kNumRandVals> rand_vals;
+
+  void Populate(const GenerateRawUniqueIdOpts& opts) {
+    if (opts.exclude_random_device) {
+      return;
+    }
+    std::random_device r;
+    for (auto& val : rand_vals) {
+      val = r();
+    }
+  }
+};
+
+struct Entropy {
+  uint64_t version_identifier;
+  EntropyTrackRandomDevice et1;
+  EntropyTrackEnvDetails et2;
+  EntropyTrackPortUuid et3;
+
+  void Populate(const GenerateRawUniqueIdOpts& opts) {
+    // If we change the format of what goes into the entropy inputs, it's
+    // conceivable there could be a physical collision in the hash input
+    // even though they are logically different. This value should change
+    // if there's a change to the "schema" here, including byte order.
+    version_identifier = (uint64_t{ROCKSDB_MAJOR} << 32) +
+                         (uint64_t{ROCKSDB_MINOR} << 16) +
+                         uint64_t{ROCKSDB_PATCH};
+    et1.Populate(opts);
+    et2.Populate(opts);
+    et3.Populate(opts);
+  }
+};
+
+void GenerateRawUniqueIdImpl(uint64_t* a, uint64_t* b,
+                             const GenerateRawUniqueIdOpts& opts) {
+  Entropy e;
+  std::memset(&e, 0, sizeof(e));
+  e.Populate(opts);
+  Hash2x64(reinterpret_cast<const char*>(&e), sizeof(e), a, b);
+}
+
+}  // namespace
+
+void GenerateRawUniqueId(uint64_t* a, uint64_t* b, bool exclude_port_uuid) {
+  GenerateRawUniqueIdOpts opts;
+  opts.exclude_port_uuid = exclude_port_uuid;
+  assert(!opts.exclude_env_details);
+  assert(!opts.exclude_random_device);
+  GenerateRawUniqueIdImpl(a, b, opts);
+}
+
+#ifndef NDEBUG
+void TEST_GenerateRawUniqueId(uint64_t* a, uint64_t* b, bool exclude_port_uuid,
+                              bool exclude_env_details,
+                              bool exclude_random_device) {
+  GenerateRawUniqueIdOpts opts;
+  opts.exclude_port_uuid = exclude_port_uuid;
+  opts.exclude_env_details = exclude_env_details;
+  opts.exclude_random_device = exclude_random_device;
+  GenerateRawUniqueIdImpl(a, b, opts);
+}
+#endif
+
+void SemiStructuredUniqueIdGen::Reset() {
+  saved_process_id_ = port::GetProcessID();
+  GenerateRawUniqueId(&base_upper_, &base_lower_);
+  counter_ = 0;
+}
+
+void SemiStructuredUniqueIdGen::GenerateNext(uint64_t* upper, uint64_t* lower) {
+  if (port::GetProcessID() == saved_process_id_) {
+    // Safe to increment the atomic for guaranteed uniqueness within this
+    // process lifetime. Xor slightly better than +. See
+    // https://github.com/pdillinger/unique_id
+    *lower = base_lower_ ^ counter_.fetch_add(1);
+    *upper = base_upper_;
+  } else {
+    // There must have been a fork() or something. Rather than attempting to
+    // update in a thread-safe way, simply fall back on GenerateRawUniqueId.
+    GenerateRawUniqueId(upper, lower);
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/unique_id_gen.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/unique_id_gen.h
new file mode 100644
index 0000000000..69033f8d3a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/env/unique_id_gen.h
@@ -0,0 +1,85 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// This file is for functions that generate unique identifiers by
+// (at least in part) by extracting novel entropy or sources of uniqueness
+// from the execution environment. (By contrast, random.h is for algorithmic
+// pseudorandomness.)
+//
+// These functions could eventually migrate to public APIs, such as in Env.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <type_traits>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Generates a new 128-bit identifier that is universally unique
+// (with high probability) for each call. The result is split into
+// two 64-bit pieces. This function has NOT been validated for use in
+// cryptography.
+//
+// This is used in generating DB session IDs and by Env::GenerateUniqueId
+// (used for DB IDENTITY) if the platform does not provide a generator of
+// RFC 4122 UUIDs or fails somehow. (Set exclude_port_uuid=true if this
+// function is used as a fallback for GenerateRfcUuid, because no need
+// trying it again.)
+void GenerateRawUniqueId(uint64_t* a, uint64_t* b,
+                         bool exclude_port_uuid = false);
+
+#ifndef NDEBUG
+// A version of above with options for challenge testing
+void TEST_GenerateRawUniqueId(uint64_t* a, uint64_t* b, bool exclude_port_uuid,
+                              bool exclude_env_details,
+                              bool exclude_random_device);
+#endif
+
+// Generates globally unique ids with lower probability of any collisions
+// vs. each unique id being independently random (GenerateRawUniqueId).
+// We call this "semi-structured" because between different
+// SemiStructuredUniqueIdGen objects, the IDs are separated by random
+// intervals (unstructured), but within a single SemiStructuredUniqueIdGen
+// object, the generated IDs are trivially related (structured). See
+// https://github.com/pdillinger/unique_id for how this improves probability
+// of no collision. In short, if we have n SemiStructuredUniqueIdGen
+// objects each generating m IDs, the first collision is expected at
+// around n = sqrt(2^128 / m), equivalently n * sqrt(m) = 2^64,
+// rather than n * m = 2^64 for fully random IDs.
+class SemiStructuredUniqueIdGen {
+ public:
+  // Initializes with random starting state (from GenerateRawUniqueId)
+  SemiStructuredUniqueIdGen() { Reset(); }
+  // Re-initializes, but not thread safe
+  void Reset();
+
+  // Assuming no fork(), `lower` is guaranteed unique from one call
+  // to the next (thread safe).
+  void GenerateNext(uint64_t* upper, uint64_t* lower);
+
+  // For generating smaller values. Will cycle through all the possibilities
+  // before repeating.
+  template <typename T>
+  T GenerateNext() {
+    static_assert(sizeof(T) <= sizeof(uint64_t));
+    static_assert(std::is_integral_v<T>);
+    uint64_t ignore, val;
+    GenerateNext(&ignore, &val);
+    return static_cast<T>(val);
+  }
+
+  uint64_t GetBaseUpper() const { return base_upper_; }
+
+ private:
+  uint64_t base_upper_;
+  uint64_t base_lower_;
+  std::atomic<uint64_t> counter_;
+  int64_t saved_process_id_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/delete_scheduler.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/delete_scheduler.cc
new file mode 100644
index 0000000000..8a2d1615d0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/delete_scheduler.cc
@@ -0,0 +1,409 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "file/delete_scheduler.h"
+
+#include <cinttypes>
+#include <thread>
+#include <vector>
+
+#include "file/sst_file_manager_impl.h"
+#include "logging/logging.h"
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/system_clock.h"
+#include "test_util/sync_point.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+DeleteScheduler::DeleteScheduler(SystemClock* clock, FileSystem* fs,
+                                 int64_t rate_bytes_per_sec, Logger* info_log,
+                                 SstFileManagerImpl* sst_file_manager,
+                                 double max_trash_db_ratio,
+                                 uint64_t bytes_max_delete_chunk)
+    : clock_(clock),
+      fs_(fs),
+      total_trash_size_(0),
+      rate_bytes_per_sec_(rate_bytes_per_sec),
+      pending_files_(0),
+      bytes_max_delete_chunk_(bytes_max_delete_chunk),
+      closing_(false),
+      cv_(&mu_),
+      bg_thread_(nullptr),
+      info_log_(info_log),
+      sst_file_manager_(sst_file_manager),
+      max_trash_db_ratio_(max_trash_db_ratio) {
+  assert(sst_file_manager != nullptr);
+  assert(max_trash_db_ratio >= 0);
+  MaybeCreateBackgroundThread();
+}
+
+DeleteScheduler::~DeleteScheduler() {
+  {
+    InstrumentedMutexLock l(&mu_);
+    closing_ = true;
+    cv_.SignalAll();
+  }
+  if (bg_thread_) {
+    bg_thread_->join();
+  }
+  for (const auto& it : bg_errors_) {
+    it.second.PermitUncheckedError();
+  }
+}
+
+Status DeleteScheduler::DeleteFile(const std::string& file_path,
+                                   const std::string& dir_to_sync,
+                                   const bool force_bg) {
+  if (rate_bytes_per_sec_.load() <= 0 ||
+      (!force_bg &&
+       total_trash_size_.load() >
+           sst_file_manager_->GetTotalSize() * max_trash_db_ratio_.load())) {
+    // Rate limiting is disabled or trash size makes up more than
+    // max_trash_db_ratio_ (default 25%) of the total DB size
+    TEST_SYNC_POINT("DeleteScheduler::DeleteFile");
+    Status s = fs_->DeleteFile(file_path, IOOptions(), nullptr);
+    if (s.ok()) {
+      s = sst_file_manager_->OnDeleteFile(file_path);
+      ROCKS_LOG_INFO(info_log_,
+                     "Deleted file %s immediately, rate_bytes_per_sec %" PRIi64
+                     ", total_trash_size %" PRIu64 " max_trash_db_ratio %lf",
+                     file_path.c_str(), rate_bytes_per_sec_.load(),
+                     total_trash_size_.load(), max_trash_db_ratio_.load());
+      InstrumentedMutexLock l(&mu_);
+      RecordTick(stats_.get(), FILES_DELETED_IMMEDIATELY);
+    }
+    return s;
+  }
+
+  // Move file to trash
+  std::string trash_file;
+  Status s = MarkAsTrash(file_path, &trash_file);
+  ROCKS_LOG_INFO(info_log_, "Mark file: %s as trash -- %s", trash_file.c_str(),
+                 s.ToString().c_str());
+
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(info_log_, "Failed to mark %s as trash -- %s",
+                    file_path.c_str(), s.ToString().c_str());
+    s = fs_->DeleteFile(file_path, IOOptions(), nullptr);
+    if (s.ok()) {
+      s = sst_file_manager_->OnDeleteFile(file_path);
+      ROCKS_LOG_INFO(info_log_, "Deleted file %s immediately",
+                     trash_file.c_str());
+      InstrumentedMutexLock l(&mu_);
+      RecordTick(stats_.get(), FILES_DELETED_IMMEDIATELY);
+    }
+    return s;
+  }
+
+  // Update the total trash size
+  uint64_t trash_file_size = 0;
+  IOStatus io_s =
+      fs_->GetFileSize(trash_file, IOOptions(), &trash_file_size, nullptr);
+  if (io_s.ok()) {
+    total_trash_size_.fetch_add(trash_file_size);
+  }
+  //**TODO: What should we do if we failed to
+  // get the file size?
+
+  // Add file to delete queue
+  {
+    InstrumentedMutexLock l(&mu_);
+    RecordTick(stats_.get(), FILES_MARKED_TRASH);
+    queue_.emplace(trash_file, dir_to_sync);
+    pending_files_++;
+    if (pending_files_ == 1) {
+      cv_.SignalAll();
+    }
+  }
+  return s;
+}
+
+std::map<std::string, Status> DeleteScheduler::GetBackgroundErrors() {
+  InstrumentedMutexLock l(&mu_);
+  return bg_errors_;
+}
+
+const std::string DeleteScheduler::kTrashExtension = ".trash";
+bool DeleteScheduler::IsTrashFile(const std::string& file_path) {
+  return (file_path.size() >= kTrashExtension.size() &&
+          file_path.rfind(kTrashExtension) ==
+              file_path.size() - kTrashExtension.size());
+}
+
+Status DeleteScheduler::CleanupDirectory(Env* env, SstFileManagerImpl* sfm,
+                                         const std::string& path) {
+  Status s;
+  // Check if there are any files marked as trash in this path
+  std::vector<std::string> files_in_path;
+  const auto& fs = env->GetFileSystem();
+  IOOptions io_opts;
+  io_opts.do_not_recurse = true;
+  s = fs->GetChildren(path, io_opts, &files_in_path,
+                      /*IODebugContext*=*/nullptr);
+  if (!s.ok()) {
+    return s;
+  }
+  for (const std::string& current_file : files_in_path) {
+    if (!DeleteScheduler::IsTrashFile(current_file)) {
+      // not a trash file, skip
+      continue;
+    }
+
+    Status file_delete;
+    std::string trash_file = path + "/" + current_file;
+    if (sfm) {
+      // We have an SstFileManager that will schedule the file delete
+      s = sfm->OnAddFile(trash_file);
+      file_delete = sfm->ScheduleFileDeletion(trash_file, path);
+    } else {
+      // Delete the file immediately
+      file_delete = env->DeleteFile(trash_file);
+    }
+
+    if (s.ok() && !file_delete.ok()) {
+      s = file_delete;
+    }
+  }
+
+  return s;
+}
+
+Status DeleteScheduler::MarkAsTrash(const std::string& file_path,
+                                    std::string* trash_file) {
+  // Sanity check of the path
+  size_t idx = file_path.rfind("/");
+  if (idx == std::string::npos || idx == file_path.size() - 1) {
+    return Status::InvalidArgument("file_path is corrupted");
+  }
+
+  if (DeleteScheduler::IsTrashFile(file_path)) {
+    // This is already a trash file
+    *trash_file = file_path;
+    return Status::OK();
+  }
+
+  *trash_file = file_path + kTrashExtension;
+  // TODO(tec) : Implement Env::RenameFileIfNotExist and remove
+  //             file_move_mu mutex.
+  int cnt = 0;
+  Status s;
+  InstrumentedMutexLock l(&file_move_mu_);
+  while (true) {
+    s = fs_->FileExists(*trash_file, IOOptions(), nullptr);
+    if (s.IsNotFound()) {
+      // We found a path for our file in trash
+      s = fs_->RenameFile(file_path, *trash_file, IOOptions(), nullptr);
+      break;
+    } else if (s.ok()) {
+      // Name conflict, generate new random suffix
+      *trash_file = file_path + std::to_string(cnt) + kTrashExtension;
+    } else {
+      // Error during FileExists call, we cannot continue
+      break;
+    }
+    cnt++;
+  }
+  if (s.ok()) {
+    s = sst_file_manager_->OnMoveFile(file_path, *trash_file);
+  }
+  return s;
+}
+
+void DeleteScheduler::BackgroundEmptyTrash() {
+  TEST_SYNC_POINT("DeleteScheduler::BackgroundEmptyTrash");
+
+  while (true) {
+    InstrumentedMutexLock l(&mu_);
+    while (queue_.empty() && !closing_) {
+      cv_.Wait();
+    }
+
+    if (closing_) {
+      return;
+    }
+
+    // Delete all files in queue_
+    uint64_t start_time = clock_->NowMicros();
+    uint64_t total_deleted_bytes = 0;
+    int64_t current_delete_rate = rate_bytes_per_sec_.load();
+    while (!queue_.empty() && !closing_) {
+      if (current_delete_rate != rate_bytes_per_sec_.load()) {
+        // User changed the delete rate
+        current_delete_rate = rate_bytes_per_sec_.load();
+        start_time = clock_->NowMicros();
+        total_deleted_bytes = 0;
+        ROCKS_LOG_INFO(info_log_, "rate_bytes_per_sec is changed to %" PRIi64,
+                       current_delete_rate);
+      }
+
+      // Get new file to delete
+      const FileAndDir& fad = queue_.front();
+      std::string path_in_trash = fad.fname;
+
+      // We don't need to hold the lock while deleting the file
+      mu_.Unlock();
+      uint64_t deleted_bytes = 0;
+      bool is_complete = true;
+      // Delete file from trash and update total_penlty value
+      Status s =
+          DeleteTrashFile(path_in_trash, fad.dir, &deleted_bytes, &is_complete);
+      total_deleted_bytes += deleted_bytes;
+      mu_.Lock();
+      if (is_complete) {
+        queue_.pop();
+      }
+
+      if (!s.ok()) {
+        bg_errors_[path_in_trash] = s;
+      }
+
+      // Apply penalty if necessary
+      uint64_t total_penalty;
+      if (current_delete_rate > 0) {
+        // rate limiting is enabled
+        total_penalty =
+            ((total_deleted_bytes * kMicrosInSecond) / current_delete_rate);
+        ROCKS_LOG_INFO(info_log_,
+                       "Rate limiting is enabled with penalty %" PRIu64
+                       " after deleting file %s",
+                       total_penalty, path_in_trash.c_str());
+        while (!closing_ && !cv_.TimedWait(start_time + total_penalty)) {
+        }
+      } else {
+        // rate limiting is disabled
+        total_penalty = 0;
+        ROCKS_LOG_INFO(info_log_,
+                       "Rate limiting is disabled after deleting file %s",
+                       path_in_trash.c_str());
+      }
+      TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait",
+                               &total_penalty);
+
+      if (is_complete) {
+        pending_files_--;
+      }
+      if (pending_files_ == 0) {
+        // Unblock WaitForEmptyTrash since there are no more files waiting
+        // to be deleted
+        cv_.SignalAll();
+      }
+    }
+  }
+}
+
+Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
+                                        const std::string& dir_to_sync,
+                                        uint64_t* deleted_bytes,
+                                        bool* is_complete) {
+  uint64_t file_size;
+  Status s = fs_->GetFileSize(path_in_trash, IOOptions(), &file_size, nullptr);
+  *is_complete = true;
+  TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile");
+  if (s.ok()) {
+    bool need_full_delete = true;
+    if (bytes_max_delete_chunk_ != 0 && file_size > bytes_max_delete_chunk_) {
+      uint64_t num_hard_links = 2;
+      // We don't have to worry aobut data race between linking a new
+      // file after the number of file link check and ftruncte because
+      // the file is now in trash and no hardlink is supposed to create
+      // to trash files by RocksDB.
+      Status my_status = fs_->NumFileLinks(path_in_trash, IOOptions(),
+                                           &num_hard_links, nullptr);
+      if (my_status.ok()) {
+        if (num_hard_links == 1) {
+          std::unique_ptr<FSWritableFile> wf;
+          my_status = fs_->ReopenWritableFile(path_in_trash, FileOptions(), &wf,
+                                              nullptr);
+          if (my_status.ok()) {
+            my_status = wf->Truncate(file_size - bytes_max_delete_chunk_,
+                                     IOOptions(), nullptr);
+            if (my_status.ok()) {
+              TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:Fsync");
+              my_status = wf->Fsync(IOOptions(), nullptr);
+            }
+          }
+          if (my_status.ok()) {
+            *deleted_bytes = bytes_max_delete_chunk_;
+            need_full_delete = false;
+            *is_complete = false;
+          } else {
+            ROCKS_LOG_WARN(info_log_,
+                           "Failed to partially delete %s from trash -- %s",
+                           path_in_trash.c_str(), my_status.ToString().c_str());
+          }
+        } else {
+          ROCKS_LOG_INFO(info_log_,
+                         "Cannot delete %s slowly through ftruncate from trash "
+                         "as it has other links",
+                         path_in_trash.c_str());
+        }
+      } else if (!num_link_error_printed_) {
+        ROCKS_LOG_INFO(
+            info_log_,
+            "Cannot delete files slowly through ftruncate from trash "
+            "as Env::NumFileLinks() returns error: %s",
+            my_status.ToString().c_str());
+        num_link_error_printed_ = true;
+      }
+    }
+
+    if (need_full_delete) {
+      s = fs_->DeleteFile(path_in_trash, IOOptions(), nullptr);
+      if (!dir_to_sync.empty()) {
+        std::unique_ptr<FSDirectory> dir_obj;
+        if (s.ok()) {
+          s = fs_->NewDirectory(dir_to_sync, IOOptions(), &dir_obj, nullptr);
+        }
+        if (s.ok()) {
+          s = dir_obj->FsyncWithDirOptions(
+              IOOptions(), nullptr,
+              DirFsyncOptions(DirFsyncOptions::FsyncReason::kFileDeleted));
+          TEST_SYNC_POINT_CALLBACK(
+              "DeleteScheduler::DeleteTrashFile::AfterSyncDir",
+              reinterpret_cast<void*>(const_cast<std::string*>(&dir_to_sync)));
+        }
+      }
+      if (s.ok()) {
+        *deleted_bytes = file_size;
+        s = sst_file_manager_->OnDeleteFile(path_in_trash);
+      }
+    }
+  }
+  if (!s.ok()) {
+    // Error while getting file size or while deleting
+    ROCKS_LOG_ERROR(info_log_, "Failed to delete %s from trash -- %s",
+                    path_in_trash.c_str(), s.ToString().c_str());
+    *deleted_bytes = 0;
+  } else {
+    total_trash_size_.fetch_sub(*deleted_bytes);
+  }
+
+  return s;
+}
+
+void DeleteScheduler::WaitForEmptyTrash() {
+  InstrumentedMutexLock l(&mu_);
+  while (pending_files_ > 0 && !closing_) {
+    cv_.Wait();
+  }
+}
+
+void DeleteScheduler::MaybeCreateBackgroundThread() {
+  if (bg_thread_ == nullptr && rate_bytes_per_sec_.load() > 0) {
+    bg_thread_.reset(
+        new port::Thread(&DeleteScheduler::BackgroundEmptyTrash, this));
+    ROCKS_LOG_INFO(info_log_,
+                   "Created background thread for deletion scheduler with "
+                   "rate_bytes_per_sec: %" PRIi64,
+                   rate_bytes_per_sec_.load());
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/delete_scheduler.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/delete_scheduler.h
new file mode 100644
index 0000000000..da3735aed8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/delete_scheduler.h
@@ -0,0 +1,147 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <map>
+#include <queue>
+#include <string>
+#include <thread>
+
+#include "monitoring/instrumented_mutex.h"
+#include "port/port.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Env;
+class FileSystem;
+class Logger;
+class SstFileManagerImpl;
+class SystemClock;
+
+// DeleteScheduler allows the DB to enforce a rate limit on file deletion,
+// Instead of deleteing files immediately, files are marked as trash
+// and deleted in a background thread that apply sleep penalty between deletes
+// if they are happening in a rate faster than rate_bytes_per_sec,
+//
+// Rate limiting can be turned off by setting rate_bytes_per_sec = 0, In this
+// case DeleteScheduler will delete files immediately.
+class DeleteScheduler {
+ public:
+  DeleteScheduler(SystemClock* clock, FileSystem* fs,
+                  int64_t rate_bytes_per_sec, Logger* info_log,
+                  SstFileManagerImpl* sst_file_manager,
+                  double max_trash_db_ratio, uint64_t bytes_max_delete_chunk);
+
+  ~DeleteScheduler();
+
+  // Return delete rate limit in bytes per second
+  int64_t GetRateBytesPerSecond() { return rate_bytes_per_sec_.load(); }
+
+  // Set delete rate limit in bytes per second
+  void SetRateBytesPerSecond(int64_t bytes_per_sec) {
+    rate_bytes_per_sec_.store(bytes_per_sec);
+    MaybeCreateBackgroundThread();
+  }
+
+  // Mark file as trash directory and schedule its deletion. If force_bg is
+  // set, it forces the file to always be deleted in the background thread,
+  // except when rate limiting is disabled
+  Status DeleteFile(const std::string& fname, const std::string& dir_to_sync,
+                    const bool force_bg = false);
+
+  // Wait for all files being deleteing in the background to finish or for
+  // destructor to be called.
+  void WaitForEmptyTrash();
+
+  // Return a map containing errors that happened in BackgroundEmptyTrash
+  // file_path => error status
+  std::map<std::string, Status> GetBackgroundErrors();
+
+  uint64_t GetTotalTrashSize() { return total_trash_size_.load(); }
+
+  // Return trash/DB size ratio where new files will be deleted immediately
+  double GetMaxTrashDBRatio() { return max_trash_db_ratio_.load(); }
+
+  // Update trash/DB size ratio where new files will be deleted immediately
+  void SetMaxTrashDBRatio(double r) {
+    assert(r >= 0);
+    max_trash_db_ratio_.store(r);
+  }
+
+  static const std::string kTrashExtension;
+  static bool IsTrashFile(const std::string& file_path);
+
+  // Check if there are any .trash files in path, and schedule their deletion
+  // Or delete immediately if sst_file_manager is nullptr
+  static Status CleanupDirectory(Env* env, SstFileManagerImpl* sfm,
+                                 const std::string& path);
+
+  void SetStatisticsPtr(const std::shared_ptr<Statistics>& stats) {
+    InstrumentedMutexLock l(&mu_);
+    stats_ = stats;
+  }
+
+ private:
+  Status MarkAsTrash(const std::string& file_path, std::string* path_in_trash);
+
+  Status DeleteTrashFile(const std::string& path_in_trash,
+                         const std::string& dir_to_sync,
+                         uint64_t* deleted_bytes, bool* is_complete);
+
+  void BackgroundEmptyTrash();
+
+  void MaybeCreateBackgroundThread();
+
+  SystemClock* clock_;
+  FileSystem* fs_;
+
+  // total size of trash files
+  std::atomic<uint64_t> total_trash_size_;
+  // Maximum number of bytes that should be deleted per second
+  std::atomic<int64_t> rate_bytes_per_sec_;
+  // Mutex to protect queue_, pending_files_, bg_errors_, closing_, stats_
+  InstrumentedMutex mu_;
+
+  struct FileAndDir {
+    FileAndDir(const std::string& f, const std::string& d) : fname(f), dir(d) {}
+    std::string fname;
+    std::string dir;  // empty will be skipped.
+  };
+
+  // Queue of trash files that need to be deleted
+  std::queue<FileAndDir> queue_;
+  // Number of trash files that are waiting to be deleted
+  int32_t pending_files_;
+  uint64_t bytes_max_delete_chunk_;
+  // Errors that happened in BackgroundEmptyTrash (file_path => error)
+  std::map<std::string, Status> bg_errors_;
+
+  bool num_link_error_printed_ = false;
+  // Set to true in ~DeleteScheduler() to force BackgroundEmptyTrash to stop
+  bool closing_;
+  // Condition variable signaled in these conditions
+  //    - pending_files_ value change from 0 => 1
+  //    - pending_files_ value change from 1 => 0
+  //    - closing_ value is set to true
+  InstrumentedCondVar cv_;
+  // Background thread running BackgroundEmptyTrash
+  std::unique_ptr<port::Thread> bg_thread_;
+  // Mutex to protect threads from file name conflicts
+  InstrumentedMutex file_move_mu_;
+  Logger* info_log_;
+  SstFileManagerImpl* sst_file_manager_;
+  // If the trash size constitutes for more than this fraction of the total DB
+  // size we will start deleting new files passed to DeleteScheduler
+  // immediately
+  std::atomic<double> max_trash_db_ratio_;
+  static const uint64_t kMicrosInSecond = 1000 * 1000LL;
+  std::shared_ptr<Statistics> stats_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_prefetch_buffer.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_prefetch_buffer.cc
new file mode 100644
index 0000000000..050947210f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_prefetch_buffer.cc
@@ -0,0 +1,955 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "file/file_prefetch_buffer.h"
+
+#include <algorithm>
+#include <cassert>
+
+#include "file/random_access_file_reader.h"
+#include "monitoring/histogram.h"
+#include "monitoring/iostats_context_imp.h"
+#include "port/port.h"
+#include "test_util/sync_point.h"
+#include "util/random.h"
+#include "util/rate_limiter_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void FilePrefetchBuffer::CalculateOffsetAndLen(size_t alignment,
+                                               uint64_t offset,
+                                               size_t roundup_len,
+                                               uint32_t index, bool refit_tail,
+                                               uint64_t& chunk_len) {
+  uint64_t chunk_offset_in_buffer = 0;
+  bool copy_data_to_new_buffer = false;
+  // Check if requested bytes are in the existing buffer_.
+  // If only a few bytes exist -- reuse them & read only what is really needed.
+  //     This is typically the case of incremental reading of data.
+  // If no bytes exist in buffer -- full pread.
+  if (DoesBufferContainData(index) && IsOffsetInBuffer(offset, index)) {
+    // Only a few requested bytes are in the buffer. memmove those chunk of
+    // bytes to the beginning, and memcpy them back into the new buffer if a
+    // new buffer is created.
+    chunk_offset_in_buffer = Rounddown(
+        static_cast<size_t>(offset - bufs_[index].offset_), alignment);
+    chunk_len = static_cast<uint64_t>(bufs_[index].buffer_.CurrentSize()) -
+                chunk_offset_in_buffer;
+    assert(chunk_offset_in_buffer % alignment == 0);
+    assert(chunk_len % alignment == 0);
+    assert(chunk_offset_in_buffer + chunk_len <=
+           bufs_[index].offset_ + bufs_[index].buffer_.CurrentSize());
+    if (chunk_len > 0) {
+      copy_data_to_new_buffer = true;
+    } else {
+      // this reset is not necessary, but just to be safe.
+      chunk_offset_in_buffer = 0;
+    }
+  }
+
+  // Create a new buffer only if current capacity is not sufficient, and memcopy
+  // bytes from old buffer if needed (i.e., if chunk_len is greater than 0).
+  if (bufs_[index].buffer_.Capacity() < roundup_len) {
+    bufs_[index].buffer_.Alignment(alignment);
+    bufs_[index].buffer_.AllocateNewBuffer(
+        static_cast<size_t>(roundup_len), copy_data_to_new_buffer,
+        chunk_offset_in_buffer, static_cast<size_t>(chunk_len));
+  } else if (chunk_len > 0 && refit_tail) {
+    // New buffer not needed. But memmove bytes from tail to the beginning since
+    // chunk_len is greater than 0.
+    bufs_[index].buffer_.RefitTail(static_cast<size_t>(chunk_offset_in_buffer),
+                                   static_cast<size_t>(chunk_len));
+  } else if (chunk_len > 0) {
+    // For async prefetching, it doesn't call RefitTail with chunk_len > 0.
+    // Allocate new buffer if needed because aligned buffer calculate remaining
+    // buffer as capacity_ - cursize_ which might not be the case in this as we
+    // are not refitting.
+    // TODO akanksha: Update the condition when asynchronous prefetching is
+    // stable.
+    bufs_[index].buffer_.Alignment(alignment);
+    bufs_[index].buffer_.AllocateNewBuffer(
+        static_cast<size_t>(roundup_len), copy_data_to_new_buffer,
+        chunk_offset_in_buffer, static_cast<size_t>(chunk_len));
+  }
+}
+
+Status FilePrefetchBuffer::Read(const IOOptions& opts,
+                                RandomAccessFileReader* reader,
+                                Env::IOPriority rate_limiter_priority,
+                                uint64_t read_len, uint64_t chunk_len,
+                                uint64_t rounddown_start, uint32_t index) {
+  Slice result;
+  Status s = reader->Read(opts, rounddown_start + chunk_len, read_len, &result,
+                          bufs_[index].buffer_.BufferStart() + chunk_len,
+                          /*aligned_buf=*/nullptr, rate_limiter_priority);
+#ifndef NDEBUG
+  if (result.size() < read_len) {
+    // Fake an IO error to force db_stress fault injection to ignore
+    // truncated read errors
+    IGNORE_STATUS_IF_ERROR(Status::IOError());
+  }
+#endif
+  if (!s.ok()) {
+    return s;
+  }
+
+  // Update the buffer offset and size.
+  bufs_[index].offset_ = rounddown_start;
+  bufs_[index].buffer_.Size(static_cast<size_t>(chunk_len) + result.size());
+  return s;
+}
+
+Status FilePrefetchBuffer::ReadAsync(const IOOptions& opts,
+                                     RandomAccessFileReader* reader,
+                                     uint64_t read_len,
+                                     uint64_t rounddown_start, uint32_t index) {
+  TEST_SYNC_POINT("FilePrefetchBuffer::ReadAsync");
+  // callback for async read request.
+  auto fp = std::bind(&FilePrefetchBuffer::PrefetchAsyncCallback, this,
+                      std::placeholders::_1, std::placeholders::_2);
+  FSReadRequest req;
+  Slice result;
+  req.len = read_len;
+  req.offset = rounddown_start;
+  req.result = result;
+  req.scratch = bufs_[index].buffer_.BufferStart();
+  bufs_[index].async_req_len_ = req.len;
+
+  Status s =
+      reader->ReadAsync(req, opts, fp, &(bufs_[index].pos_),
+                        &(bufs_[index].io_handle_), &(bufs_[index].del_fn_),
+                        /*aligned_buf=*/nullptr);
+  req.status.PermitUncheckedError();
+  if (s.ok()) {
+    bufs_[index].async_read_in_progress_ = true;
+  }
+  return s;
+}
+
+Status FilePrefetchBuffer::Prefetch(const IOOptions& opts,
+                                    RandomAccessFileReader* reader,
+                                    uint64_t offset, size_t n,
+                                    Env::IOPriority rate_limiter_priority) {
+  if (!enable_ || reader == nullptr) {
+    return Status::OK();
+  }
+  TEST_SYNC_POINT("FilePrefetchBuffer::Prefetch:Start");
+
+  if (offset + n <= bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize()) {
+    // All requested bytes are already in the curr_ buffer. So no need to Read
+    // again.
+    return Status::OK();
+  }
+
+  size_t alignment = reader->file()->GetRequiredBufferAlignment();
+  size_t offset_ = static_cast<size_t>(offset);
+  uint64_t rounddown_offset = Rounddown(offset_, alignment);
+  uint64_t roundup_end = Roundup(offset_ + n, alignment);
+  uint64_t roundup_len = roundup_end - rounddown_offset;
+  assert(roundup_len >= alignment);
+  assert(roundup_len % alignment == 0);
+
+  uint64_t chunk_len = 0;
+  CalculateOffsetAndLen(alignment, offset, roundup_len, curr_,
+                        true /*refit_tail*/, chunk_len);
+  size_t read_len = static_cast<size_t>(roundup_len - chunk_len);
+
+  Status s = Read(opts, reader, rate_limiter_priority, read_len, chunk_len,
+                  rounddown_offset, curr_);
+  if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail && s.ok()) {
+    RecordInHistogram(stats_, TABLE_OPEN_PREFETCH_TAIL_READ_BYTES, read_len);
+  }
+  return s;
+}
+
+// Copy data from src to third buffer.
+void FilePrefetchBuffer::CopyDataToBuffer(uint32_t src, uint64_t& offset,
+                                          size_t& length) {
+  if (length == 0) {
+    return;
+  }
+  uint64_t copy_offset = (offset - bufs_[src].offset_);
+  size_t copy_len = 0;
+  if (IsDataBlockInBuffer(offset, length, src)) {
+    // All the bytes are in src.
+    copy_len = length;
+  } else {
+    copy_len = bufs_[src].buffer_.CurrentSize() - copy_offset;
+  }
+
+  memcpy(bufs_[2].buffer_.BufferStart() + bufs_[2].buffer_.CurrentSize(),
+         bufs_[src].buffer_.BufferStart() + copy_offset, copy_len);
+
+  bufs_[2].buffer_.Size(bufs_[2].buffer_.CurrentSize() + copy_len);
+
+  // Update offset and length.
+  offset += copy_len;
+  length -= copy_len;
+
+  // length > 0 indicates it has consumed all data from the src buffer and it
+  // still needs to read more other buffer.
+  if (length > 0) {
+    bufs_[src].buffer_.Clear();
+  }
+}
+
+// Clear the buffers if it contains outdated data. Outdated data can be
+// because previous sequential reads were read from the cache instead of these
+// buffer. In that case outdated IOs should be aborted.
+void FilePrefetchBuffer::AbortIOIfNeeded(uint64_t offset) {
+  uint32_t second = curr_ ^ 1;
+  std::vector<void*> handles;
+  autovector<uint32_t> buf_pos;
+  if (IsBufferOutdatedWithAsyncProgress(offset, curr_)) {
+    handles.emplace_back(bufs_[curr_].io_handle_);
+    buf_pos.emplace_back(curr_);
+  }
+  if (IsBufferOutdatedWithAsyncProgress(offset, second)) {
+    handles.emplace_back(bufs_[second].io_handle_);
+    buf_pos.emplace_back(second);
+  }
+  if (!handles.empty()) {
+    StopWatch sw(clock_, stats_, ASYNC_PREFETCH_ABORT_MICROS);
+    Status s = fs_->AbortIO(handles);
+    assert(s.ok());
+  }
+
+  for (auto& pos : buf_pos) {
+    // Release io_handle.
+    DestroyAndClearIOHandle(pos);
+  }
+
+  if (bufs_[second].io_handle_ == nullptr) {
+    bufs_[second].async_read_in_progress_ = false;
+  }
+
+  if (bufs_[curr_].io_handle_ == nullptr) {
+    bufs_[curr_].async_read_in_progress_ = false;
+  }
+}
+
+void FilePrefetchBuffer::AbortAllIOs() {
+  uint32_t second = curr_ ^ 1;
+  std::vector<void*> handles;
+  for (uint32_t i = 0; i < 2; i++) {
+    if (bufs_[i].async_read_in_progress_ && bufs_[i].io_handle_ != nullptr) {
+      handles.emplace_back(bufs_[i].io_handle_);
+    }
+  }
+  if (!handles.empty()) {
+    StopWatch sw(clock_, stats_, ASYNC_PREFETCH_ABORT_MICROS);
+    Status s = fs_->AbortIO(handles);
+    assert(s.ok());
+  }
+
+  // Release io_handles.
+  if (bufs_[curr_].io_handle_ != nullptr && bufs_[curr_].del_fn_ != nullptr) {
+    DestroyAndClearIOHandle(curr_);
+  } else {
+    bufs_[curr_].async_read_in_progress_ = false;
+  }
+
+  if (bufs_[second].io_handle_ != nullptr && bufs_[second].del_fn_ != nullptr) {
+    DestroyAndClearIOHandle(second);
+  } else {
+    bufs_[second].async_read_in_progress_ = false;
+  }
+}
+
+// Clear the buffers if it contains outdated data. Outdated data can be
+// because previous sequential reads were read from the cache instead of these
+// buffer.
+void FilePrefetchBuffer::UpdateBuffersIfNeeded(uint64_t offset) {
+  uint32_t second = curr_ ^ 1;
+  if (IsBufferOutdated(offset, curr_)) {
+    bufs_[curr_].buffer_.Clear();
+  }
+  if (IsBufferOutdated(offset, second)) {
+    bufs_[second].buffer_.Clear();
+  }
+
+  {
+    // In case buffers do not align, reset second buffer. This can happen in
+    // case readahead_size is set.
+    if (!bufs_[second].async_read_in_progress_ &&
+        !bufs_[curr_].async_read_in_progress_) {
+      if (DoesBufferContainData(curr_)) {
+        if (bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize() !=
+            bufs_[second].offset_) {
+          bufs_[second].buffer_.Clear();
+        }
+      } else {
+        if (!IsOffsetInBuffer(offset, second)) {
+          bufs_[second].buffer_.Clear();
+        }
+      }
+    }
+  }
+
+  // If data starts from second buffer, make it curr_. Second buffer can be
+  // either partial filled, full or async read is in progress.
+  if (bufs_[second].async_read_in_progress_) {
+    if (IsOffsetInBufferWithAsyncProgress(offset, second)) {
+      curr_ = curr_ ^ 1;
+    }
+  } else {
+    if (DoesBufferContainData(second) && IsOffsetInBuffer(offset, second)) {
+      assert(bufs_[curr_].async_read_in_progress_ ||
+             bufs_[curr_].buffer_.CurrentSize() == 0);
+      curr_ = curr_ ^ 1;
+    }
+  }
+}
+
+void FilePrefetchBuffer::PollAndUpdateBuffersIfNeeded(uint64_t offset) {
+  if (bufs_[curr_].async_read_in_progress_ && fs_ != nullptr) {
+    if (bufs_[curr_].io_handle_ != nullptr) {
+      // Wait for prefetch data to complete.
+      // No mutex is needed as async_read_in_progress behaves as mutex and is
+      // updated by main thread only.
+      std::vector<void*> handles;
+      handles.emplace_back(bufs_[curr_].io_handle_);
+      StopWatch sw(clock_, stats_, POLL_WAIT_MICROS);
+      fs_->Poll(handles, 1).PermitUncheckedError();
+    }
+
+    // Reset and Release io_handle after the Poll API as request has been
+    // completed.
+    DestroyAndClearIOHandle(curr_);
+  }
+  UpdateBuffersIfNeeded(offset);
+}
+
+Status FilePrefetchBuffer::HandleOverlappingData(
+    const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
+    size_t length, size_t readahead_size,
+    Env::IOPriority /*rate_limiter_priority*/, bool& copy_to_third_buffer,
+    uint64_t& tmp_offset, size_t& tmp_length) {
+  Status s;
+  size_t alignment = reader->file()->GetRequiredBufferAlignment();
+  uint32_t second;
+
+  // Check if the first buffer has the required offset and the async read is
+  // still in progress. This should only happen if a prefetch was initiated
+  // by Seek, but the next access is at another offset.
+  if (bufs_[curr_].async_read_in_progress_ &&
+      IsOffsetInBufferWithAsyncProgress(offset, curr_)) {
+    PollAndUpdateBuffersIfNeeded(offset);
+  }
+  second = curr_ ^ 1;
+
+  // If data is overlapping over two buffers, copy the data from curr_ and
+  // call ReadAsync on curr_.
+  if (!bufs_[curr_].async_read_in_progress_ && DoesBufferContainData(curr_) &&
+      IsOffsetInBuffer(offset, curr_) &&
+      (/*Data extends over curr_ buffer and second buffer either has data or in
+         process of population=*/
+       (offset + length > bufs_[second].offset_) &&
+       (bufs_[second].async_read_in_progress_ ||
+        DoesBufferContainData(second)))) {
+    // Allocate new buffer to third buffer;
+    bufs_[2].buffer_.Clear();
+    bufs_[2].buffer_.Alignment(alignment);
+    bufs_[2].buffer_.AllocateNewBuffer(length);
+    bufs_[2].offset_ = offset;
+    copy_to_third_buffer = true;
+
+    CopyDataToBuffer(curr_, tmp_offset, tmp_length);
+
+    // Call async prefetching on curr_ since data has been consumed in curr_
+    // only if data lies within second buffer.
+    size_t second_size = bufs_[second].async_read_in_progress_
+                             ? bufs_[second].async_req_len_
+                             : bufs_[second].buffer_.CurrentSize();
+    if (tmp_offset + tmp_length <= bufs_[second].offset_ + second_size) {
+      uint64_t rounddown_start = bufs_[second].offset_ + second_size;
+      uint64_t roundup_end =
+          Roundup(rounddown_start + readahead_size, alignment);
+      uint64_t roundup_len = roundup_end - rounddown_start;
+      uint64_t chunk_len = 0;
+      CalculateOffsetAndLen(alignment, rounddown_start, roundup_len, curr_,
+                            false, chunk_len);
+      assert(chunk_len == 0);
+      assert(roundup_len >= chunk_len);
+
+      bufs_[curr_].offset_ = rounddown_start;
+      uint64_t read_len = static_cast<size_t>(roundup_len - chunk_len);
+      s = ReadAsync(opts, reader, read_len, rounddown_start, curr_);
+      if (!s.ok()) {
+        DestroyAndClearIOHandle(curr_);
+        bufs_[curr_].buffer_.Clear();
+        return s;
+      }
+    }
+    curr_ = curr_ ^ 1;
+  }
+  return s;
+}
+// If async_io is enabled in case of sequential reads, PrefetchAsyncInternal is
+// called. When buffers are switched, we clear the curr_ buffer as we assume the
+// data has been consumed because of sequential reads.
+// Data in buffers will always be sequential with curr_ following second and
+// not vice versa.
+//
+// Scenarios for prefetching asynchronously:
+// Case1: If both buffers are empty, prefetch n + readahead_size_/2 bytes
+//        synchronously in curr_ and prefetch readahead_size_/2 async in second
+//        buffer.
+// Case2: If second buffer has partial or full data, make it current and
+//        prefetch readahead_size_/2 async in second buffer. In case of
+//        partial data, prefetch remaining bytes from size n synchronously to
+//        fulfill the requested bytes request.
+// Case3: If curr_ has partial data, prefetch remaining bytes from size n
+//        synchronously in curr_ to fulfill the requested bytes request and
+//        prefetch readahead_size_/2 bytes async in second buffer.
+// Case4: (Special case) If data is in both buffers, copy requested data from
+//        curr_, send async request on curr_, wait for poll to fill second
+//        buffer (if any), and copy remaining data from second buffer to third
+//        buffer.
+Status FilePrefetchBuffer::PrefetchAsyncInternal(
+    const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
+    size_t length, size_t readahead_size, Env::IOPriority rate_limiter_priority,
+    bool& copy_to_third_buffer) {
+  if (!enable_) {
+    return Status::OK();
+  }
+
+  TEST_SYNC_POINT("FilePrefetchBuffer::PrefetchAsyncInternal:Start");
+
+  size_t alignment = reader->file()->GetRequiredBufferAlignment();
+  Status s;
+  uint64_t tmp_offset = offset;
+  size_t tmp_length = length;
+
+  // 1. Abort IO and swap buffers if needed to point curr_ to first buffer with
+  // data.
+  if (!explicit_prefetch_submitted_) {
+    AbortIOIfNeeded(offset);
+  }
+  UpdateBuffersIfNeeded(offset);
+
+  // 2. Handle overlapping data over two buffers. If data is overlapping then
+  //    during this call:
+  //   - data from curr_ is copied into third buffer,
+  //   - curr_ is send for async prefetching of further data if second buffer
+  //     contains remaining requested data or in progress for async prefetch,
+  //   - switch buffers and curr_ now points to second buffer to copy remaining
+  //     data.
+  s = HandleOverlappingData(opts, reader, offset, length, readahead_size,
+                            rate_limiter_priority, copy_to_third_buffer,
+                            tmp_offset, tmp_length);
+  if (!s.ok()) {
+    return s;
+  }
+
+  // 3. Call Poll only if data is needed for the second buffer.
+  //    - Return if whole data is in curr_ and second buffer is in progress or
+  //      already full.
+  //    - If second buffer is empty, it will go for ReadAsync for second buffer.
+  if (!bufs_[curr_].async_read_in_progress_ && DoesBufferContainData(curr_) &&
+      IsDataBlockInBuffer(offset, length, curr_)) {
+    // Whole data is in curr_.
+    UpdateBuffersIfNeeded(offset);
+    if (!IsSecondBuffEligibleForPrefetching()) {
+      return s;
+    }
+  } else {
+    // After poll request, curr_ might be empty because of IOError in
+    // callback while reading or may contain required data.
+    PollAndUpdateBuffersIfNeeded(offset);
+  }
+
+  if (copy_to_third_buffer) {
+    offset = tmp_offset;
+    length = tmp_length;
+  }
+
+  // 4. After polling and swapping buffers, if all the requested bytes are in
+  // curr_, it will only go for async prefetching.
+  // copy_to_third_buffer is a special case so it will be handled separately.
+  if (!copy_to_third_buffer && DoesBufferContainData(curr_) &&
+      IsDataBlockInBuffer(offset, length, curr_)) {
+    offset += length;
+    length = 0;
+
+    // Since async request was submitted directly by calling PrefetchAsync in
+    // last call, we don't need to prefetch further as this call is to poll
+    // the data submitted in previous call.
+    if (explicit_prefetch_submitted_) {
+      return s;
+    }
+    if (!IsSecondBuffEligibleForPrefetching()) {
+      return s;
+    }
+  }
+
+  uint32_t second = curr_ ^ 1;
+  assert(!bufs_[curr_].async_read_in_progress_);
+
+  // In case because of some IOError curr_ got empty, abort IO for second as
+  // well. Otherwise data might not align if more data needs to be read in curr_
+  // which might overlap with second buffer.
+  if (!DoesBufferContainData(curr_) && bufs_[second].async_read_in_progress_) {
+    if (bufs_[second].io_handle_ != nullptr) {
+      std::vector<void*> handles;
+      handles.emplace_back(bufs_[second].io_handle_);
+      {
+        StopWatch sw(clock_, stats_, ASYNC_PREFETCH_ABORT_MICROS);
+        Status status = fs_->AbortIO(handles);
+        assert(status.ok());
+      }
+    }
+    DestroyAndClearIOHandle(second);
+    bufs_[second].buffer_.Clear();
+  }
+
+  // 5. Data is overlapping i.e. some of the data has been copied to third
+  // buffer and remaining will be updated below.
+  if (copy_to_third_buffer && DoesBufferContainData(curr_)) {
+    CopyDataToBuffer(curr_, offset, length);
+
+    // Length == 0: All the requested data has been copied to third buffer and
+    // it has already gone for async prefetching. It can return without doing
+    // anything further.
+    // Length > 0: More data needs to be consumed so it will continue async
+    // and sync prefetching and copy the remaining data to third buffer in the
+    // end.
+    if (length == 0) {
+      return s;
+    }
+  }
+
+  // 6. Go for ReadAsync and Read (if needed).
+  size_t prefetch_size = length + readahead_size;
+  size_t _offset = static_cast<size_t>(offset);
+
+  // offset and size alignment for curr_ buffer with synchronous prefetching
+  uint64_t rounddown_start1 = Rounddown(_offset, alignment);
+  uint64_t roundup_end1 = Roundup(_offset + prefetch_size, alignment);
+  uint64_t roundup_len1 = roundup_end1 - rounddown_start1;
+  assert(roundup_len1 >= alignment);
+  assert(roundup_len1 % alignment == 0);
+  uint64_t chunk_len1 = 0;
+  uint64_t read_len1 = 0;
+
+  assert(!bufs_[second].async_read_in_progress_ &&
+         !DoesBufferContainData(second));
+
+  // For length == 0, skip the synchronous prefetching. read_len1 will be 0.
+  if (length > 0) {
+    CalculateOffsetAndLen(alignment, offset, roundup_len1, curr_,
+                          false /*refit_tail*/, chunk_len1);
+    assert(roundup_len1 >= chunk_len1);
+    read_len1 = static_cast<size_t>(roundup_len1 - chunk_len1);
+  }
+  {
+    // offset and size alignment for second buffer for asynchronous
+    // prefetching
+    uint64_t rounddown_start2 = roundup_end1;
+    uint64_t roundup_end2 =
+        Roundup(rounddown_start2 + readahead_size, alignment);
+
+    // For length == 0, do the asynchronous prefetching in second instead of
+    // synchronous prefetching in curr_.
+    if (length == 0) {
+      rounddown_start2 =
+          bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize();
+      roundup_end2 = Roundup(rounddown_start2 + prefetch_size, alignment);
+    }
+
+    uint64_t roundup_len2 = roundup_end2 - rounddown_start2;
+    uint64_t chunk_len2 = 0;
+    CalculateOffsetAndLen(alignment, rounddown_start2, roundup_len2, second,
+                          false /*refit_tail*/, chunk_len2);
+    assert(chunk_len2 == 0);
+    // Update the buffer offset.
+    bufs_[second].offset_ = rounddown_start2;
+    assert(roundup_len2 >= chunk_len2);
+    uint64_t read_len2 = static_cast<size_t>(roundup_len2 - chunk_len2);
+    s = ReadAsync(opts, reader, read_len2, rounddown_start2, second);
+    if (!s.ok()) {
+      DestroyAndClearIOHandle(second);
+      bufs_[second].buffer_.Clear();
+      return s;
+    }
+  }
+
+  if (read_len1 > 0) {
+    s = Read(opts, reader, rate_limiter_priority, read_len1, chunk_len1,
+             rounddown_start1, curr_);
+    if (!s.ok()) {
+      if (bufs_[second].io_handle_ != nullptr) {
+        std::vector<void*> handles;
+        handles.emplace_back(bufs_[second].io_handle_);
+        {
+          StopWatch sw(clock_, stats_, ASYNC_PREFETCH_ABORT_MICROS);
+          Status status = fs_->AbortIO(handles);
+          assert(status.ok());
+        }
+      }
+      DestroyAndClearIOHandle(second);
+      bufs_[second].buffer_.Clear();
+      bufs_[curr_].buffer_.Clear();
+      return s;
+    }
+  }
+  // Copy remaining requested bytes to third_buffer.
+  if (copy_to_third_buffer && length > 0) {
+    CopyDataToBuffer(curr_, offset, length);
+  }
+  return s;
+}
+
+bool FilePrefetchBuffer::TryReadFromCache(const IOOptions& opts,
+                                          RandomAccessFileReader* reader,
+                                          uint64_t offset, size_t n,
+                                          Slice* result, Status* status,
+                                          Env::IOPriority rate_limiter_priority,
+                                          bool for_compaction /* = false */) {
+  bool ret = TryReadFromCacheUntracked(opts, reader, offset, n, result, status,
+                                       rate_limiter_priority, for_compaction);
+  if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail && enable_) {
+    if (ret) {
+      RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_HIT);
+    } else {
+      RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
+    }
+  }
+  return ret;
+}
+
+bool FilePrefetchBuffer::TryReadFromCacheUntracked(
+    const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
+    size_t n, Slice* result, Status* status,
+    Env::IOPriority rate_limiter_priority, bool for_compaction /* = false */) {
+  if (track_min_offset_ && offset < min_offset_read_) {
+    min_offset_read_ = static_cast<size_t>(offset);
+  }
+  if (!enable_ || (offset < bufs_[curr_].offset_)) {
+    return false;
+  }
+
+  // If the buffer contains only a few of the requested bytes:
+  //    If readahead is enabled: prefetch the remaining bytes + readahead bytes
+  //        and satisfy the request.
+  //    If readahead is not enabled: return false.
+  TEST_SYNC_POINT_CALLBACK("FilePrefetchBuffer::TryReadFromCache",
+                           &readahead_size_);
+  if (offset + n > bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize()) {
+    if (readahead_size_ > 0) {
+      Status s;
+      assert(reader != nullptr);
+      assert(max_readahead_size_ >= readahead_size_);
+      if (for_compaction) {
+        s = Prefetch(opts, reader, offset, std::max(n, readahead_size_),
+                     rate_limiter_priority);
+      } else {
+        if (implicit_auto_readahead_) {
+          if (!IsEligibleForPrefetch(offset, n)) {
+            // Ignore status as Prefetch is not called.
+            s.PermitUncheckedError();
+            return false;
+          }
+        }
+        s = Prefetch(opts, reader, offset, n + readahead_size_,
+                     rate_limiter_priority);
+      }
+      if (!s.ok()) {
+        if (status) {
+          *status = s;
+        }
+#ifndef NDEBUG
+        IGNORE_STATUS_IF_ERROR(s);
+#endif
+        return false;
+      }
+      readahead_size_ = std::min(max_readahead_size_, readahead_size_ * 2);
+    } else {
+      return false;
+    }
+  }
+  UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
+
+  uint64_t offset_in_buffer = offset - bufs_[curr_].offset_;
+  *result = Slice(bufs_[curr_].buffer_.BufferStart() + offset_in_buffer, n);
+  return true;
+}
+
+bool FilePrefetchBuffer::TryReadFromCacheAsync(
+    const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
+    size_t n, Slice* result, Status* status,
+    Env::IOPriority rate_limiter_priority) {
+  bool ret = TryReadFromCacheAsyncUntracked(opts, reader, offset, n, result,
+                                            status, rate_limiter_priority);
+  if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail && enable_) {
+    if (ret) {
+      RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_HIT);
+    } else {
+      RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
+    }
+  }
+  return ret;
+}
+
+bool FilePrefetchBuffer::TryReadFromCacheAsyncUntracked(
+    const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
+    size_t n, Slice* result, Status* status,
+    Env::IOPriority rate_limiter_priority) {
+  if (track_min_offset_ && offset < min_offset_read_) {
+    min_offset_read_ = static_cast<size_t>(offset);
+  }
+
+  if (!enable_) {
+    return false;
+  }
+
+  if (explicit_prefetch_submitted_) {
+    // explicit_prefetch_submitted_ is special case where it expects request
+    // submitted in PrefetchAsync should match with this request. Otherwise
+    // buffers will be outdated.
+    // Random offset called. So abort the IOs.
+    if (prev_offset_ != offset) {
+      AbortAllIOs();
+      bufs_[curr_].buffer_.Clear();
+      bufs_[curr_ ^ 1].buffer_.Clear();
+      explicit_prefetch_submitted_ = false;
+      return false;
+    }
+  }
+
+  if (!explicit_prefetch_submitted_ && offset < bufs_[curr_].offset_) {
+    return false;
+  }
+
+  bool prefetched = false;
+  bool copy_to_third_buffer = false;
+  // If the buffer contains only a few of the requested bytes:
+  //    If readahead is enabled: prefetch the remaining bytes + readahead bytes
+  //        and satisfy the request.
+  //    If readahead is not enabled: return false.
+  TEST_SYNC_POINT_CALLBACK("FilePrefetchBuffer::TryReadFromCache",
+                           &readahead_size_);
+
+  if (explicit_prefetch_submitted_ ||
+      (bufs_[curr_].async_read_in_progress_ ||
+       offset + n >
+           bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize())) {
+    if (readahead_size_ > 0) {
+      Status s;
+      assert(reader != nullptr);
+      assert(max_readahead_size_ >= readahead_size_);
+
+      if (implicit_auto_readahead_) {
+        if (!IsEligibleForPrefetch(offset, n)) {
+          // Ignore status as Prefetch is not called.
+          s.PermitUncheckedError();
+          return false;
+        }
+      }
+      // Prefetch n + readahead_size_/2 synchronously as remaining
+      // readahead_size_/2 will be prefetched asynchronously.
+      s = PrefetchAsyncInternal(opts, reader, offset, n, readahead_size_ / 2,
+                                rate_limiter_priority, copy_to_third_buffer);
+      explicit_prefetch_submitted_ = false;
+      if (!s.ok()) {
+        if (status) {
+          *status = s;
+        }
+#ifndef NDEBUG
+        IGNORE_STATUS_IF_ERROR(s);
+#endif
+        return false;
+      }
+      prefetched = explicit_prefetch_submitted_ ? false : true;
+    } else {
+      return false;
+    }
+  }
+
+  UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
+
+  uint32_t index = curr_;
+  if (copy_to_third_buffer) {
+    index = 2;
+  }
+  uint64_t offset_in_buffer = offset - bufs_[index].offset_;
+  *result = Slice(bufs_[index].buffer_.BufferStart() + offset_in_buffer, n);
+  if (prefetched) {
+    readahead_size_ = std::min(max_readahead_size_, readahead_size_ * 2);
+  }
+  return true;
+}
+
+void FilePrefetchBuffer::PrefetchAsyncCallback(const FSReadRequest& req,
+                                               void* cb_arg) {
+  uint32_t index = *(static_cast<uint32_t*>(cb_arg));
+#ifndef NDEBUG
+  if (req.result.size() < req.len) {
+    // Fake an IO error to force db_stress fault injection to ignore
+    // truncated read errors
+    IGNORE_STATUS_IF_ERROR(Status::IOError());
+  }
+  IGNORE_STATUS_IF_ERROR(req.status);
+#endif
+
+  if (req.status.ok()) {
+    if (req.offset + req.result.size() <=
+        bufs_[index].offset_ + bufs_[index].buffer_.CurrentSize()) {
+      // All requested bytes are already in the buffer or no data is read
+      // because of EOF. So no need to update.
+      return;
+    }
+    if (req.offset < bufs_[index].offset_) {
+      // Next block to be read has changed (Recent read was not a sequential
+      // read). So ignore this read.
+      return;
+    }
+    size_t current_size = bufs_[index].buffer_.CurrentSize();
+    bufs_[index].buffer_.Size(current_size + req.result.size());
+  }
+}
+
+Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts,
+                                         RandomAccessFileReader* reader,
+                                         uint64_t offset, size_t n,
+                                         Slice* result) {
+  assert(reader != nullptr);
+  if (!enable_) {
+    return Status::NotSupported();
+  }
+
+  TEST_SYNC_POINT("FilePrefetchBuffer::PrefetchAsync:Start");
+
+  num_file_reads_ = 0;
+  explicit_prefetch_submitted_ = false;
+  bool is_eligible_for_prefetching = false;
+  if (readahead_size_ > 0 &&
+      (!implicit_auto_readahead_ ||
+       num_file_reads_ + 1 >= num_file_reads_for_auto_readahead_)) {
+    is_eligible_for_prefetching = true;
+  }
+
+  // 1. Cancel any pending async read to make code simpler as buffers can be out
+  // of sync.
+  AbortAllIOs();
+
+  // 2. Clear outdated data.
+  UpdateBuffersIfNeeded(offset);
+  uint32_t second = curr_ ^ 1;
+  // Since PrefetchAsync can be called on non sequential reads. So offset can
+  // be less than curr_ buffers' offset. In that case also it clears both
+  // buffers.
+  if (DoesBufferContainData(curr_) && !IsOffsetInBuffer(offset, curr_)) {
+    bufs_[curr_].buffer_.Clear();
+    bufs_[second].buffer_.Clear();
+  }
+
+  UpdateReadPattern(offset, n, /*decrease_readaheadsize=*/false);
+
+  bool data_found = false;
+
+  // 3. If curr_ has full data.
+  if (DoesBufferContainData(curr_) && IsDataBlockInBuffer(offset, n, curr_)) {
+    uint64_t offset_in_buffer = offset - bufs_[curr_].offset_;
+    *result = Slice(bufs_[curr_].buffer_.BufferStart() + offset_in_buffer, n);
+    data_found = true;
+    // Update num_file_reads_ as TryReadFromCacheAsync won't be called for
+    // poll and update num_file_reads_ if data is found.
+    num_file_reads_++;
+
+    // 3.1 If second also has some data or is not eligible for prefetching,
+    // return.
+    if (!is_eligible_for_prefetching || DoesBufferContainData(second)) {
+      return Status::OK();
+    }
+  } else {
+    // Partial data in curr_.
+    bufs_[curr_].buffer_.Clear();
+  }
+  bufs_[second].buffer_.Clear();
+
+  Status s;
+  size_t alignment = reader->file()->GetRequiredBufferAlignment();
+  size_t prefetch_size = is_eligible_for_prefetching ? readahead_size_ / 2 : 0;
+  size_t offset_to_read = static_cast<size_t>(offset);
+  uint64_t rounddown_start1 = 0;
+  uint64_t roundup_end1 = 0;
+  uint64_t rounddown_start2 = 0;
+  uint64_t roundup_end2 = 0;
+  uint64_t chunk_len1 = 0;
+  uint64_t chunk_len2 = 0;
+  size_t read_len1 = 0;
+  size_t read_len2 = 0;
+
+  // - If curr_ is empty.
+  //   - Call async read for full data +  prefetch_size on curr_.
+  //   - Call async read for prefetch_size on second if eligible.
+  // - If curr_ is filled.
+  //   - prefetch_size on second.
+  // Calculate length and offsets for reading.
+  if (!DoesBufferContainData(curr_)) {
+    // Prefetch full data + prefetch_size in curr_.
+    rounddown_start1 = Rounddown(offset_to_read, alignment);
+    roundup_end1 = Roundup(offset_to_read + n + prefetch_size, alignment);
+    uint64_t roundup_len1 = roundup_end1 - rounddown_start1;
+    assert(roundup_len1 >= alignment);
+    assert(roundup_len1 % alignment == 0);
+
+    CalculateOffsetAndLen(alignment, rounddown_start1, roundup_len1, curr_,
+                          false, chunk_len1);
+    assert(chunk_len1 == 0);
+    assert(roundup_len1 >= chunk_len1);
+    read_len1 = static_cast<size_t>(roundup_len1 - chunk_len1);
+    bufs_[curr_].offset_ = rounddown_start1;
+  }
+
+  if (is_eligible_for_prefetching) {
+    if (DoesBufferContainData(curr_)) {
+      rounddown_start2 =
+          bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize();
+    } else {
+      rounddown_start2 = roundup_end1;
+    }
+
+    roundup_end2 = Roundup(rounddown_start2 + prefetch_size, alignment);
+    uint64_t roundup_len2 = roundup_end2 - rounddown_start2;
+
+    assert(roundup_len2 >= alignment);
+    CalculateOffsetAndLen(alignment, rounddown_start2, roundup_len2, second,
+                          false, chunk_len2);
+    assert(chunk_len2 == 0);
+    assert(roundup_len2 >= chunk_len2);
+    read_len2 = static_cast<size_t>(roundup_len2 - chunk_len2);
+    // Update the buffer offset.
+    bufs_[second].offset_ = rounddown_start2;
+  }
+
+  if (read_len1) {
+    s = ReadAsync(opts, reader, read_len1, rounddown_start1, curr_);
+    if (!s.ok()) {
+      DestroyAndClearIOHandle(curr_);
+      bufs_[curr_].buffer_.Clear();
+      return s;
+    }
+    explicit_prefetch_submitted_ = true;
+    prev_len_ = 0;
+  }
+  if (read_len2) {
+    s = ReadAsync(opts, reader, read_len2, rounddown_start2, second);
+    if (!s.ok()) {
+      DestroyAndClearIOHandle(second);
+      bufs_[second].buffer_.Clear();
+      return s;
+    }
+    readahead_size_ = std::min(max_readahead_size_, readahead_size_ * 2);
+  }
+  return (data_found ? Status::OK() : Status::TryAgain());
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_prefetch_buffer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_prefetch_buffer.h
new file mode 100644
index 0000000000..ae84724960
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_prefetch_buffer.h
@@ -0,0 +1,471 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <algorithm>
+#include <atomic>
+#include <sstream>
+#include <string>
+
+#include "file/readahead_file_info.h"
+#include "monitoring/statistics_impl.h"
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/options.h"
+#include "util/aligned_buffer.h"
+#include "util/autovector.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+#define DEFAULT_DECREMENT 8 * 1024
+
+struct IOOptions;
+class RandomAccessFileReader;
+
+struct BufferInfo {
+  AlignedBuffer buffer_;
+
+  uint64_t offset_ = 0;
+
+  // Below parameters are used in case of async read flow.
+  // Length requested for in ReadAsync.
+  size_t async_req_len_ = 0;
+
+  // async_read_in_progress can be used as mutex. Callback can update the buffer
+  // and its size but async_read_in_progress is only set by main thread.
+  bool async_read_in_progress_ = false;
+
+  // io_handle is allocated and used by underlying file system in case of
+  // asynchronous reads.
+  void* io_handle_ = nullptr;
+
+  IOHandleDeleter del_fn_ = nullptr;
+
+  // pos represents the index of this buffer in vector of BufferInfo.
+  uint32_t pos_ = 0;
+};
+
+enum class FilePrefetchBufferUsage {
+  kTableOpenPrefetchTail,
+  kUnknown,
+};
+
+// FilePrefetchBuffer is a smart buffer to store and read data from a file.
+class FilePrefetchBuffer {
+ public:
+  // Constructor.
+  //
+  // All arguments are optional.
+  // readahead_size     : the initial readahead size.
+  // max_readahead_size : the maximum readahead size.
+  //   If max_readahead_size > readahead_size, the readahead size will be
+  //   doubled on every IO until max_readahead_size is hit.
+  //   Typically this is set as a multiple of readahead_size.
+  //   max_readahead_size should be greater than equal to readahead_size.
+  // enable : controls whether reading from the buffer is enabled.
+  //   If false, TryReadFromCache() always return false, and we only take stats
+  //   for the minimum offset if track_min_offset = true.
+  // track_min_offset : Track the minimum offset ever read and collect stats on
+  //   it. Used for adaptable readahead of the file footer/metadata.
+  // implicit_auto_readahead : Readahead is enabled implicitly by rocksdb after
+  //   doing sequential scans for two times.
+  //
+  // Automatic readhead is enabled for a file if readahead_size
+  // and max_readahead_size are passed in.
+  // A user can construct a FilePrefetchBuffer without any arguments, but use
+  // `Prefetch` to load data into the buffer.
+  FilePrefetchBuffer(
+      size_t readahead_size = 0, size_t max_readahead_size = 0,
+      bool enable = true, bool track_min_offset = false,
+      bool implicit_auto_readahead = false, uint64_t num_file_reads = 0,
+      uint64_t num_file_reads_for_auto_readahead = 0, FileSystem* fs = nullptr,
+      SystemClock* clock = nullptr, Statistics* stats = nullptr,
+      FilePrefetchBufferUsage usage = FilePrefetchBufferUsage::kUnknown)
+      : curr_(0),
+        readahead_size_(readahead_size),
+        initial_auto_readahead_size_(readahead_size),
+        max_readahead_size_(max_readahead_size),
+        min_offset_read_(std::numeric_limits<size_t>::max()),
+        enable_(enable),
+        track_min_offset_(track_min_offset),
+        implicit_auto_readahead_(implicit_auto_readahead),
+        prev_offset_(0),
+        prev_len_(0),
+        num_file_reads_for_auto_readahead_(num_file_reads_for_auto_readahead),
+        num_file_reads_(num_file_reads),
+        explicit_prefetch_submitted_(false),
+        fs_(fs),
+        clock_(clock),
+        stats_(stats),
+        usage_(usage) {
+    assert((num_file_reads_ >= num_file_reads_for_auto_readahead_ + 1) ||
+           (num_file_reads_ == 0));
+    // If ReadOptions.async_io is enabled, data is asynchronously filled in
+    // second buffer while curr_ is being consumed. If data is overlapping in
+    // two buffers, data is copied to third buffer to return continuous buffer.
+    bufs_.resize(3);
+    for (uint32_t i = 0; i < 2; i++) {
+      bufs_[i].pos_ = i;
+    }
+  }
+
+  ~FilePrefetchBuffer() {
+    // Abort any pending async read request before destroying the class object.
+    if (fs_ != nullptr) {
+      std::vector<void*> handles;
+      for (uint32_t i = 0; i < 2; i++) {
+        if (bufs_[i].async_read_in_progress_ &&
+            bufs_[i].io_handle_ != nullptr) {
+          handles.emplace_back(bufs_[i].io_handle_);
+        }
+      }
+      if (!handles.empty()) {
+        StopWatch sw(clock_, stats_, ASYNC_PREFETCH_ABORT_MICROS);
+        Status s = fs_->AbortIO(handles);
+        assert(s.ok());
+      }
+    }
+
+    // Prefetch buffer bytes discarded.
+    uint64_t bytes_discarded = 0;
+    // Iterated over 2 buffers.
+    for (int i = 0; i < 2; i++) {
+      int first = i;
+      int second = i ^ 1;
+
+      if (DoesBufferContainData(first)) {
+        // If last block was read completely from first and some bytes in
+        // first buffer are still unconsumed.
+        if (prev_offset_ >= bufs_[first].offset_ &&
+            prev_offset_ + prev_len_ <
+                bufs_[first].offset_ + bufs_[first].buffer_.CurrentSize()) {
+          bytes_discarded += bufs_[first].buffer_.CurrentSize() -
+                             (prev_offset_ + prev_len_ - bufs_[first].offset_);
+        }
+        // If data was in second buffer and some/whole block bytes were read
+        // from second buffer.
+        else if (prev_offset_ < bufs_[first].offset_ &&
+                 !DoesBufferContainData(second)) {
+          // If last block read was completely from different buffer, this
+          // buffer is unconsumed.
+          if (prev_offset_ + prev_len_ <= bufs_[first].offset_) {
+            bytes_discarded += bufs_[first].buffer_.CurrentSize();
+          }
+          // If last block read overlaps with this buffer and some data is
+          // still unconsumed and previous buffer (second) is not cleared.
+          else if (prev_offset_ + prev_len_ > bufs_[first].offset_ &&
+                   bufs_[first].offset_ + bufs_[first].buffer_.CurrentSize() ==
+                       bufs_[second].offset_) {
+            bytes_discarded += bufs_[first].buffer_.CurrentSize() -
+                               (/*bytes read from this buffer=*/prev_len_ -
+                                (bufs_[first].offset_ - prev_offset_));
+          }
+        }
+      }
+    }
+
+    for (uint32_t i = 0; i < 2; i++) {
+      // Release io_handle.
+      DestroyAndClearIOHandle(i);
+    }
+    RecordInHistogram(stats_, PREFETCHED_BYTES_DISCARDED, bytes_discarded);
+  }
+
+  bool Enabled() const { return enable_; }
+
+  // Load data into the buffer from a file.
+  // reader                : the file reader.
+  // offset                : the file offset to start reading from.
+  // n                     : the number of bytes to read.
+  // rate_limiter_priority : rate limiting priority, or `Env::IO_TOTAL` to
+  //                         bypass.
+  Status Prefetch(const IOOptions& opts, RandomAccessFileReader* reader,
+                  uint64_t offset, size_t n,
+                  Env::IOPriority rate_limiter_priority);
+
+  // Request for reading the data from a file asynchronously.
+  // If data already exists in the buffer, result will be updated.
+  // reader                : the file reader.
+  // offset                : the file offset to start reading from.
+  // n                     : the number of bytes to read.
+  // result                : if data already exists in the buffer, result will
+  //                         be updated with the data.
+  //
+  // If data already exist in the buffer, it will return Status::OK, otherwise
+  // it will send asynchronous request and return Status::TryAgain.
+  Status PrefetchAsync(const IOOptions& opts, RandomAccessFileReader* reader,
+                       uint64_t offset, size_t n, Slice* result);
+
+  // Tries returning the data for a file read from this buffer if that data is
+  // in the buffer.
+  // It handles tracking the minimum read offset if track_min_offset = true.
+  // It also does the exponential readahead when readahead_size is set as part
+  // of the constructor.
+  //
+  // opts                  : the IO options to use.
+  // reader                : the file reader.
+  // offset                : the file offset.
+  // n                     : the number of bytes.
+  // result                : output buffer to put the data into.
+  // s                     : output status.
+  // rate_limiter_priority : rate limiting priority, or `Env::IO_TOTAL` to
+  //                         bypass.
+  // for_compaction        : true if cache read is done for compaction read.
+  bool TryReadFromCache(const IOOptions& opts, RandomAccessFileReader* reader,
+                        uint64_t offset, size_t n, Slice* result, Status* s,
+                        Env::IOPriority rate_limiter_priority,
+                        bool for_compaction = false);
+
+  bool TryReadFromCacheAsync(const IOOptions& opts,
+                             RandomAccessFileReader* reader, uint64_t offset,
+                             size_t n, Slice* result, Status* status,
+                             Env::IOPriority rate_limiter_priority);
+
+  // The minimum `offset` ever passed to TryReadFromCache(). This will nly be
+  // tracked if track_min_offset = true.
+  size_t min_offset_read() const { return min_offset_read_; }
+
+  size_t GetPrefetchOffset() const { return bufs_[curr_].offset_; }
+
+  // Called in case of implicit auto prefetching.
+  void UpdateReadPattern(const uint64_t& offset, const size_t& len,
+                         bool decrease_readaheadsize) {
+    if (decrease_readaheadsize) {
+      // Since this block was eligible for prefetch but it was found in
+      // cache, so check and decrease the readahead_size by 8KB (default)
+      // if eligible.
+      DecreaseReadAheadIfEligible(offset, len);
+    }
+    prev_offset_ = offset;
+    prev_len_ = len;
+    explicit_prefetch_submitted_ = false;
+  }
+
+  void GetReadaheadState(ReadaheadFileInfo::ReadaheadInfo* readahead_info) {
+    readahead_info->readahead_size = readahead_size_;
+    readahead_info->num_file_reads = num_file_reads_;
+  }
+
+  void DecreaseReadAheadIfEligible(uint64_t offset, size_t size,
+                                   size_t value = DEFAULT_DECREMENT) {
+    // Decrease the readahead_size if
+    // - its enabled internally by RocksDB (implicit_auto_readahead_) and,
+    // - readahead_size is greater than 0 and,
+    // - this block would have called prefetch API if not found in cache for
+    //   which conditions are:
+    //   - few/no bytes are in buffer and,
+    //   - block is sequential with the previous read and,
+    //   - num_file_reads_ + 1 (including this read) >
+    //   num_file_reads_for_auto_readahead_
+    size_t curr_size = bufs_[curr_].async_read_in_progress_
+                           ? bufs_[curr_].async_req_len_
+                           : bufs_[curr_].buffer_.CurrentSize();
+    if (implicit_auto_readahead_ && readahead_size_ > 0) {
+      if ((offset + size > bufs_[curr_].offset_ + curr_size) &&
+          IsBlockSequential(offset) &&
+          (num_file_reads_ + 1 > num_file_reads_for_auto_readahead_)) {
+        readahead_size_ =
+            std::max(initial_auto_readahead_size_,
+                     (readahead_size_ >= value ? readahead_size_ - value : 0));
+      }
+    }
+  }
+
+  // Callback function passed to underlying FS in case of asynchronous reads.
+  void PrefetchAsyncCallback(const FSReadRequest& req, void* cb_arg);
+
+ private:
+  // Calculates roundoff offset and length to be prefetched based on alignment
+  // and data present in buffer_. It also allocates new buffer or refit tail if
+  // required.
+  void CalculateOffsetAndLen(size_t alignment, uint64_t offset,
+                             size_t roundup_len, uint32_t index,
+                             bool refit_tail, uint64_t& chunk_len);
+
+  void AbortIOIfNeeded(uint64_t offset);
+
+  void AbortAllIOs();
+
+  void UpdateBuffersIfNeeded(uint64_t offset);
+
+  // It calls Poll API if any there is any pending asynchronous request. It then
+  // checks if data is in any buffer. It clears the outdated data and swaps the
+  // buffers if required.
+  void PollAndUpdateBuffersIfNeeded(uint64_t offset);
+
+  Status PrefetchAsyncInternal(const IOOptions& opts,
+                               RandomAccessFileReader* reader, uint64_t offset,
+                               size_t length, size_t readahead_size,
+                               Env::IOPriority rate_limiter_priority,
+                               bool& copy_to_third_buffer);
+
+  Status Read(const IOOptions& opts, RandomAccessFileReader* reader,
+              Env::IOPriority rate_limiter_priority, uint64_t read_len,
+              uint64_t chunk_len, uint64_t rounddown_start, uint32_t index);
+
+  Status ReadAsync(const IOOptions& opts, RandomAccessFileReader* reader,
+                   uint64_t read_len, uint64_t rounddown_start, uint32_t index);
+
+  // Copy the data from src to third buffer.
+  void CopyDataToBuffer(uint32_t src, uint64_t& offset, size_t& length);
+
+  bool IsBlockSequential(const size_t& offset) {
+    return (prev_len_ == 0 || (prev_offset_ + prev_len_ == offset));
+  }
+
+  // Called in case of implicit auto prefetching.
+  void ResetValues() {
+    num_file_reads_ = 1;
+    readahead_size_ = initial_auto_readahead_size_;
+  }
+
+  // Called in case of implicit auto prefetching.
+  bool IsEligibleForPrefetch(uint64_t offset, size_t n) {
+    // Prefetch only if this read is sequential otherwise reset readahead_size_
+    // to initial value.
+    if (!IsBlockSequential(offset)) {
+      UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
+      ResetValues();
+      return false;
+    }
+    num_file_reads_++;
+
+    // Since async request was submitted in last call directly by calling
+    // PrefetchAsync, it skips num_file_reads_ check as this call is to poll the
+    // data submitted in previous call.
+    if (explicit_prefetch_submitted_) {
+      return true;
+    }
+    if (num_file_reads_ <= num_file_reads_for_auto_readahead_) {
+      UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
+      return false;
+    }
+    return true;
+  }
+
+  // Helper functions.
+  bool IsDataBlockInBuffer(uint64_t offset, size_t length, uint32_t index) {
+    return (offset >= bufs_[index].offset_ &&
+            offset + length <=
+                bufs_[index].offset_ + bufs_[index].buffer_.CurrentSize());
+  }
+  bool IsOffsetInBuffer(uint64_t offset, uint32_t index) {
+    return (offset >= bufs_[index].offset_ &&
+            offset < bufs_[index].offset_ + bufs_[index].buffer_.CurrentSize());
+  }
+  bool DoesBufferContainData(uint32_t index) {
+    return bufs_[index].buffer_.CurrentSize() > 0;
+  }
+  bool IsBufferOutdated(uint64_t offset, uint32_t index) {
+    return (
+        !bufs_[index].async_read_in_progress_ && DoesBufferContainData(index) &&
+        offset >= bufs_[index].offset_ + bufs_[index].buffer_.CurrentSize());
+  }
+  bool IsBufferOutdatedWithAsyncProgress(uint64_t offset, uint32_t index) {
+    return (bufs_[index].async_read_in_progress_ &&
+            bufs_[index].io_handle_ != nullptr &&
+            offset >= bufs_[index].offset_ + bufs_[index].async_req_len_);
+  }
+  bool IsOffsetInBufferWithAsyncProgress(uint64_t offset, uint32_t index) {
+    return (bufs_[index].async_read_in_progress_ &&
+            offset >= bufs_[index].offset_ &&
+            offset < bufs_[index].offset_ + bufs_[index].async_req_len_);
+  }
+
+  bool IsSecondBuffEligibleForPrefetching() {
+    uint32_t second = curr_ ^ 1;
+    if (bufs_[second].async_read_in_progress_) {
+      return false;
+    }
+    assert(!bufs_[curr_].async_read_in_progress_);
+
+    if (DoesBufferContainData(curr_) && DoesBufferContainData(second) &&
+        (bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize() ==
+         bufs_[second].offset_)) {
+      return false;
+    }
+    bufs_[second].buffer_.Clear();
+    return true;
+  }
+
+  void DestroyAndClearIOHandle(uint32_t index) {
+    if (bufs_[index].io_handle_ != nullptr && bufs_[index].del_fn_ != nullptr) {
+      bufs_[index].del_fn_(bufs_[index].io_handle_);
+      bufs_[index].io_handle_ = nullptr;
+      bufs_[index].del_fn_ = nullptr;
+    }
+    bufs_[index].async_read_in_progress_ = false;
+  }
+
+  Status HandleOverlappingData(const IOOptions& opts,
+                               RandomAccessFileReader* reader, uint64_t offset,
+                               size_t length, size_t readahead_size,
+                               Env::IOPriority rate_limiter_priority,
+                               bool& copy_to_third_buffer, uint64_t& tmp_offset,
+                               size_t& tmp_length);
+
+  bool TryReadFromCacheUntracked(const IOOptions& opts,
+                                 RandomAccessFileReader* reader,
+                                 uint64_t offset, size_t n, Slice* result,
+                                 Status* s,
+                                 Env::IOPriority rate_limiter_priority,
+                                 bool for_compaction = false);
+
+  bool TryReadFromCacheAsyncUntracked(const IOOptions& opts,
+                                      RandomAccessFileReader* reader,
+                                      uint64_t offset, size_t n, Slice* result,
+                                      Status* status,
+                                      Env::IOPriority rate_limiter_priority);
+
+  std::vector<BufferInfo> bufs_;
+  // curr_ represents the index for bufs_ indicating which buffer is being
+  // consumed currently.
+  uint32_t curr_;
+
+  size_t readahead_size_;
+  size_t initial_auto_readahead_size_;
+  // FilePrefetchBuffer object won't be created from Iterator flow if
+  // max_readahead_size_ = 0.
+  size_t max_readahead_size_;
+
+  // The minimum `offset` ever passed to TryReadFromCache().
+  size_t min_offset_read_;
+  // if false, TryReadFromCache() always return false, and we only take stats
+  // for track_min_offset_ if track_min_offset_ = true
+  bool enable_;
+  // If true, track minimum `offset` ever passed to TryReadFromCache(), which
+  // can be fetched from min_offset_read().
+  bool track_min_offset_;
+
+  // implicit_auto_readahead is enabled by rocksdb internally after 2
+  // sequential IOs.
+  bool implicit_auto_readahead_;
+  uint64_t prev_offset_;
+  size_t prev_len_;
+  // num_file_reads_ and num_file_reads_for_auto_readahead_ is only used when
+  // implicit_auto_readahead_ is set.
+  uint64_t num_file_reads_for_auto_readahead_;
+  uint64_t num_file_reads_;
+
+  // If explicit_prefetch_submitted_ is set then it indicates RocksDB called
+  // PrefetchAsync to submit request. It needs to call TryReadFromCacheAsync to
+  // poll the submitted request without checking if data is sequential and
+  // num_file_reads_.
+  bool explicit_prefetch_submitted_;
+
+  FileSystem* fs_;
+  SystemClock* clock_;
+  Statistics* stats_;
+
+  FilePrefetchBufferUsage usage_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_util.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_util.cc
new file mode 100644
index 0000000000..46faac67cb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_util.cc
@@ -0,0 +1,277 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include "file/file_util.h"
+
+#include <algorithm>
+#include <string>
+
+#include "file/random_access_file_reader.h"
+#include "file/sequence_file_reader.h"
+#include "file/sst_file_manager_impl.h"
+#include "file/writable_file_writer.h"
+#include "rocksdb/env.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Utility function to copy a file up to a specified length
+IOStatus CopyFile(FileSystem* fs, const std::string& source,
+                  std::unique_ptr<WritableFileWriter>& dest_writer,
+                  uint64_t size, bool use_fsync,
+                  const std::shared_ptr<IOTracer>& io_tracer,
+                  const Temperature temperature) {
+  FileOptions soptions;
+  IOStatus io_s;
+  std::unique_ptr<SequentialFileReader> src_reader;
+
+  {
+    soptions.temperature = temperature;
+    std::unique_ptr<FSSequentialFile> srcfile;
+    io_s = fs->NewSequentialFile(source, soptions, &srcfile, nullptr);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+
+    if (size == 0) {
+      // default argument means copy everything
+      io_s = fs->GetFileSize(source, IOOptions(), &size, nullptr);
+      if (!io_s.ok()) {
+        return io_s;
+      }
+    }
+    src_reader.reset(
+        new SequentialFileReader(std::move(srcfile), source, io_tracer));
+  }
+
+  char buffer[4096];
+  Slice slice;
+  while (size > 0) {
+    size_t bytes_to_read = std::min(sizeof(buffer), static_cast<size_t>(size));
+    // TODO: rate limit copy file
+    io_s = status_to_io_status(
+        src_reader->Read(bytes_to_read, &slice, buffer,
+                         Env::IO_TOTAL /* rate_limiter_priority */));
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    if (slice.size() == 0) {
+      return IOStatus::Corruption("file too small");
+    }
+    io_s = dest_writer->Append(slice);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    size -= slice.size();
+  }
+  return dest_writer->Sync(use_fsync);
+}
+
+IOStatus CopyFile(FileSystem* fs, const std::string& source,
+                  const std::string& destination, uint64_t size, bool use_fsync,
+                  const std::shared_ptr<IOTracer>& io_tracer,
+                  const Temperature temperature) {
+  FileOptions options;
+  IOStatus io_s;
+  std::unique_ptr<WritableFileWriter> dest_writer;
+
+  {
+    options.temperature = temperature;
+    std::unique_ptr<FSWritableFile> destfile;
+    io_s = fs->NewWritableFile(destination, options, &destfile, nullptr);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+
+    dest_writer.reset(
+        new WritableFileWriter(std::move(destfile), destination, options));
+  }
+
+  return CopyFile(fs, source, dest_writer, size, use_fsync, io_tracer,
+                  temperature);
+}
+
+// Utility function to create a file with the provided contents
+IOStatus CreateFile(FileSystem* fs, const std::string& destination,
+                    const std::string& contents, bool use_fsync) {
+  const EnvOptions soptions;
+  IOStatus io_s;
+  std::unique_ptr<WritableFileWriter> dest_writer;
+
+  std::unique_ptr<FSWritableFile> destfile;
+  io_s = fs->NewWritableFile(destination, soptions, &destfile, nullptr);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  dest_writer.reset(
+      new WritableFileWriter(std::move(destfile), destination, soptions));
+  io_s = dest_writer->Append(Slice(contents));
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  return dest_writer->Sync(use_fsync);
+}
+
+Status DeleteDBFile(const ImmutableDBOptions* db_options,
+                    const std::string& fname, const std::string& dir_to_sync,
+                    const bool force_bg, const bool force_fg) {
+  SstFileManagerImpl* sfm =
+      static_cast<SstFileManagerImpl*>(db_options->sst_file_manager.get());
+  if (sfm && !force_fg) {
+    return sfm->ScheduleFileDeletion(fname, dir_to_sync, force_bg);
+  } else {
+    return db_options->env->DeleteFile(fname);
+  }
+}
+
+// requested_checksum_func_name brings the function name of the checksum
+// generator in checksum_factory. Empty string is permitted, in which case the
+// name of the generator created by the factory is unchecked. When
+// `requested_checksum_func_name` is non-empty, however, the created generator's
+// name must match it, otherwise an `InvalidArgument` error is returned.
+IOStatus GenerateOneFileChecksum(
+    FileSystem* fs, const std::string& file_path,
+    FileChecksumGenFactory* checksum_factory,
+    const std::string& requested_checksum_func_name, std::string* file_checksum,
+    std::string* file_checksum_func_name,
+    size_t verify_checksums_readahead_size, bool /*allow_mmap_reads*/,
+    std::shared_ptr<IOTracer>& io_tracer, RateLimiter* rate_limiter,
+    Env::IOPriority rate_limiter_priority) {
+  if (checksum_factory == nullptr) {
+    return IOStatus::InvalidArgument("Checksum factory is invalid");
+  }
+  assert(file_checksum != nullptr);
+  assert(file_checksum_func_name != nullptr);
+
+  FileChecksumGenContext gen_context;
+  gen_context.requested_checksum_func_name = requested_checksum_func_name;
+  gen_context.file_name = file_path;
+  std::unique_ptr<FileChecksumGenerator> checksum_generator =
+      checksum_factory->CreateFileChecksumGenerator(gen_context);
+  if (checksum_generator == nullptr) {
+    std::string msg =
+        "Cannot get the file checksum generator based on the requested "
+        "checksum function name: " +
+        requested_checksum_func_name +
+        " from checksum factory: " + checksum_factory->Name();
+    return IOStatus::InvalidArgument(msg);
+  } else {
+    // For backward compatibility and use in file ingestion clients where there
+    // is no stored checksum function name, `requested_checksum_func_name` can
+    // be empty. If we give the requested checksum function name, we expect it
+    // is the same name of the checksum generator.
+    if (!requested_checksum_func_name.empty() &&
+        checksum_generator->Name() != requested_checksum_func_name) {
+      std::string msg = "Expected file checksum generator named '" +
+                        requested_checksum_func_name +
+                        "', while the factory created one "
+                        "named '" +
+                        checksum_generator->Name() + "'";
+      return IOStatus::InvalidArgument(msg);
+    }
+  }
+
+  uint64_t size;
+  IOStatus io_s;
+  std::unique_ptr<RandomAccessFileReader> reader;
+  {
+    std::unique_ptr<FSRandomAccessFile> r_file;
+    io_s = fs->NewRandomAccessFile(file_path, FileOptions(), &r_file, nullptr);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    io_s = fs->GetFileSize(file_path, IOOptions(), &size, nullptr);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    reader.reset(new RandomAccessFileReader(
+        std::move(r_file), file_path, nullptr /*Env*/, io_tracer, nullptr,
+        Histograms::HISTOGRAM_ENUM_MAX, nullptr, rate_limiter));
+  }
+
+  // Found that 256 KB readahead size provides the best performance, based on
+  // experiments, for auto readahead. Experiment data is in PR #3282.
+  size_t default_max_read_ahead_size = 256 * 1024;
+  size_t readahead_size = (verify_checksums_readahead_size != 0)
+                              ? verify_checksums_readahead_size
+                              : default_max_read_ahead_size;
+  std::unique_ptr<char[]> buf;
+  if (reader->use_direct_io()) {
+    size_t alignment = reader->file()->GetRequiredBufferAlignment();
+    readahead_size = (readahead_size + alignment - 1) & ~(alignment - 1);
+  }
+  buf.reset(new char[readahead_size]);
+
+  Slice slice;
+  uint64_t offset = 0;
+  IOOptions opts;
+  while (size > 0) {
+    size_t bytes_to_read =
+        static_cast<size_t>(std::min(uint64_t{readahead_size}, size));
+    io_s = reader->Read(opts, offset, bytes_to_read, &slice, buf.get(), nullptr,
+                        rate_limiter_priority);
+    if (!io_s.ok()) {
+      return IOStatus::Corruption("file read failed with error: " +
+                                  io_s.ToString());
+    }
+    if (slice.size() == 0) {
+      return IOStatus::Corruption("file too small");
+    }
+    checksum_generator->Update(slice.data(), slice.size());
+    size -= slice.size();
+    offset += slice.size();
+
+    TEST_SYNC_POINT("GenerateOneFileChecksum::Chunk:0");
+  }
+  checksum_generator->Finalize();
+  *file_checksum = checksum_generator->GetChecksum();
+  *file_checksum_func_name = checksum_generator->Name();
+  return IOStatus::OK();
+}
+
+Status DestroyDir(Env* env, const std::string& dir) {
+  Status s;
+  if (env->FileExists(dir).IsNotFound()) {
+    return s;
+  }
+  std::vector<std::string> files_in_dir;
+  s = env->GetChildren(dir, &files_in_dir);
+  if (s.ok()) {
+    for (auto& file_in_dir : files_in_dir) {
+      std::string path = dir + "/" + file_in_dir;
+      bool is_dir = false;
+      s = env->IsDirectory(path, &is_dir);
+      if (s.ok()) {
+        if (is_dir) {
+          s = DestroyDir(env, path);
+        } else {
+          s = env->DeleteFile(path);
+        }
+      } else if (s.IsNotSupported()) {
+        s = Status::OK();
+      }
+      if (!s.ok()) {
+        // IsDirectory, etc. might not report NotFound
+        if (s.IsNotFound() || env->FileExists(path).IsNotFound()) {
+          // Allow files to be deleted externally
+          s = Status::OK();
+        } else {
+          break;
+        }
+      }
+    }
+  }
+
+  if (s.ok()) {
+    s = env->DeleteDir(dir);
+    // DeleteDir might or might not report NotFound
+    if (!s.ok() && (s.IsNotFound() || env->FileExists(dir).IsNotFound())) {
+      // Allow to be deleted externally
+      s = Status::OK();
+    }
+  }
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_util.h
new file mode 100644
index 0000000000..e279cfba0a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/file_util.h
@@ -0,0 +1,91 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+#include <string>
+
+#include "file/filename.h"
+#include "options/db_options.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/sst_file_writer.h"
+#include "rocksdb/status.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/types.h"
+#include "trace_replay/io_tracer.h"
+
+namespace ROCKSDB_NAMESPACE {
+// use_fsync maps to options.use_fsync, which determines the way that
+// the file is synced after copying.
+extern IOStatus CopyFile(FileSystem* fs, const std::string& source,
+                         std::unique_ptr<WritableFileWriter>& dest_writer,
+                         uint64_t size, bool use_fsync,
+                         const std::shared_ptr<IOTracer>& io_tracer,
+                         const Temperature temperature);
+extern IOStatus CopyFile(FileSystem* fs, const std::string& source,
+                         const std::string& destination, uint64_t size,
+                         bool use_fsync,
+                         const std::shared_ptr<IOTracer>& io_tracer,
+                         const Temperature temperature);
+inline IOStatus CopyFile(const std::shared_ptr<FileSystem>& fs,
+                         const std::string& source,
+                         const std::string& destination, uint64_t size,
+                         bool use_fsync,
+                         const std::shared_ptr<IOTracer>& io_tracer,
+                         const Temperature temperature) {
+  return CopyFile(fs.get(), source, destination, size, use_fsync, io_tracer,
+                  temperature);
+}
+extern IOStatus CreateFile(FileSystem* fs, const std::string& destination,
+                           const std::string& contents, bool use_fsync);
+
+inline IOStatus CreateFile(const std::shared_ptr<FileSystem>& fs,
+                           const std::string& destination,
+                           const std::string& contents, bool use_fsync) {
+  return CreateFile(fs.get(), destination, contents, use_fsync);
+}
+
+extern Status DeleteDBFile(const ImmutableDBOptions* db_options,
+                           const std::string& fname,
+                           const std::string& path_to_sync, const bool force_bg,
+                           const bool force_fg);
+
+extern IOStatus GenerateOneFileChecksum(
+    FileSystem* fs, const std::string& file_path,
+    FileChecksumGenFactory* checksum_factory,
+    const std::string& requested_checksum_func_name, std::string* file_checksum,
+    std::string* file_checksum_func_name,
+    size_t verify_checksums_readahead_size, bool allow_mmap_reads,
+    std::shared_ptr<IOTracer>& io_tracer, RateLimiter* rate_limiter,
+    Env::IOPriority rate_limiter_priority);
+
+inline IOStatus PrepareIOFromReadOptions(const ReadOptions& ro,
+                                         SystemClock* clock, IOOptions& opts) {
+  if (ro.deadline.count()) {
+    std::chrono::microseconds now =
+        std::chrono::microseconds(clock->NowMicros());
+    // Ensure there is atleast 1us available. We don't want to pass a value of
+    // 0 as that means no timeout
+    if (now >= ro.deadline) {
+      return IOStatus::TimedOut("Deadline exceeded");
+    }
+    opts.timeout = ro.deadline - now;
+  }
+
+  if (ro.io_timeout.count() &&
+      (!opts.timeout.count() || ro.io_timeout < opts.timeout)) {
+    opts.timeout = ro.io_timeout;
+  }
+
+  opts.rate_limiter_priority = ro.rate_limiter_priority;
+  opts.io_activity = ro.io_activity;
+
+  return IOStatus::OK();
+}
+
+// Test method to delete the input directory and all of its contents.
+// This method is destructive and is meant for use only in tests!!!
+Status DestroyDir(Env* env, const std::string& dir);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/filename.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/filename.cc
new file mode 100644
index 0000000000..1e04c73395
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/filename.cc
@@ -0,0 +1,523 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "file/filename.h"
+
+#include <ctype.h>
+#include <stdio.h>
+
+#include <cinttypes>
+#include <vector>
+
+#include "file/writable_file_writer.h"
+#include "rocksdb/env.h"
+#include "test_util/sync_point.h"
+#include "util/stop_watch.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const std::string kCurrentFileName = "CURRENT";
+const std::string kOptionsFileNamePrefix = "OPTIONS-";
+const std::string kTempFileNameSuffix = "dbtmp";
+
+static const std::string kRocksDbTFileExt = "sst";
+static const std::string kLevelDbTFileExt = "ldb";
+static const std::string kRocksDBBlobFileExt = "blob";
+static const std::string kArchivalDirName = "archive";
+
+// Given a path, flatten the path name by replacing all chars not in
+// {[0-9,a-z,A-Z,-,_,.]} with _. And append '_LOG\0' at the end.
+// Return the number of chars stored in dest not including the trailing '\0'.
+static size_t GetInfoLogPrefix(const std::string& path, char* dest, int len) {
+  const char suffix[] = "_LOG";
+
+  size_t write_idx = 0;
+  size_t i = 0;
+  size_t src_len = path.size();
+
+  while (i < src_len && write_idx < len - sizeof(suffix)) {
+    if ((path[i] >= 'a' && path[i] <= 'z') ||
+        (path[i] >= '0' && path[i] <= '9') ||
+        (path[i] >= 'A' && path[i] <= 'Z') || path[i] == '-' ||
+        path[i] == '.' || path[i] == '_') {
+      dest[write_idx++] = path[i];
+    } else {
+      if (i > 0) {
+        dest[write_idx++] = '_';
+      }
+    }
+    i++;
+  }
+  assert(sizeof(suffix) <= len - write_idx);
+  // "\0" is automatically added by snprintf
+  snprintf(dest + write_idx, len - write_idx, suffix);
+  write_idx += sizeof(suffix) - 1;
+  return write_idx;
+}
+
+static std::string MakeFileName(uint64_t number, const char* suffix) {
+  char buf[100];
+  snprintf(buf, sizeof(buf), "%06llu.%s",
+           static_cast<unsigned long long>(number), suffix);
+  return buf;
+}
+
+static std::string MakeFileName(const std::string& name, uint64_t number,
+                                const char* suffix) {
+  return name + "/" + MakeFileName(number, suffix);
+}
+
+std::string LogFileName(const std::string& name, uint64_t number) {
+  assert(number > 0);
+  return MakeFileName(name, number, "log");
+}
+
+std::string LogFileName(uint64_t number) {
+  assert(number > 0);
+  return MakeFileName(number, "log");
+}
+
+std::string BlobFileName(uint64_t number) {
+  assert(number > 0);
+  return MakeFileName(number, kRocksDBBlobFileExt.c_str());
+}
+
+std::string BlobFileName(const std::string& blobdirname, uint64_t number) {
+  assert(number > 0);
+  return MakeFileName(blobdirname, number, kRocksDBBlobFileExt.c_str());
+}
+
+std::string BlobFileName(const std::string& dbname, const std::string& blob_dir,
+                         uint64_t number) {
+  assert(number > 0);
+  return MakeFileName(dbname + "/" + blob_dir, number,
+                      kRocksDBBlobFileExt.c_str());
+}
+
+std::string ArchivalDirectory(const std::string& dir) {
+  return dir + "/" + kArchivalDirName;
+}
+std::string ArchivedLogFileName(const std::string& name, uint64_t number) {
+  assert(number > 0);
+  return MakeFileName(name + "/" + kArchivalDirName, number, "log");
+}
+
+std::string MakeTableFileName(const std::string& path, uint64_t number) {
+  return MakeFileName(path, number, kRocksDbTFileExt.c_str());
+}
+
+std::string MakeTableFileName(uint64_t number) {
+  return MakeFileName(number, kRocksDbTFileExt.c_str());
+}
+
+std::string Rocks2LevelTableFileName(const std::string& fullname) {
+  assert(fullname.size() > kRocksDbTFileExt.size() + 1);
+  if (fullname.size() <= kRocksDbTFileExt.size() + 1) {
+    return "";
+  }
+  return fullname.substr(0, fullname.size() - kRocksDbTFileExt.size()) +
+         kLevelDbTFileExt;
+}
+
+uint64_t TableFileNameToNumber(const std::string& name) {
+  uint64_t number = 0;
+  uint64_t base = 1;
+  int pos = static_cast<int>(name.find_last_of('.'));
+  while (--pos >= 0 && name[pos] >= '0' && name[pos] <= '9') {
+    number += (name[pos] - '0') * base;
+    base *= 10;
+  }
+  return number;
+}
+
+std::string TableFileName(const std::vector<DbPath>& db_paths, uint64_t number,
+                          uint32_t path_id) {
+  assert(number > 0);
+  std::string path;
+  if (path_id >= db_paths.size()) {
+    path = db_paths.back().path;
+  } else {
+    path = db_paths[path_id].path;
+  }
+  return MakeTableFileName(path, number);
+}
+
+void FormatFileNumber(uint64_t number, uint32_t path_id, char* out_buf,
+                      size_t out_buf_size) {
+  if (path_id == 0) {
+    snprintf(out_buf, out_buf_size, "%" PRIu64, number);
+  } else {
+    snprintf(out_buf, out_buf_size,
+             "%" PRIu64
+             "(path "
+             "%" PRIu32 ")",
+             number, path_id);
+  }
+}
+
+std::string DescriptorFileName(uint64_t number) {
+  assert(number > 0);
+  char buf[100];
+  snprintf(buf, sizeof(buf), "MANIFEST-%06llu",
+           static_cast<unsigned long long>(number));
+  return buf;
+}
+
+std::string DescriptorFileName(const std::string& dbname, uint64_t number) {
+  return dbname + "/" + DescriptorFileName(number);
+}
+
+std::string CurrentFileName(const std::string& dbname) {
+  return dbname + "/" + kCurrentFileName;
+}
+
+std::string LockFileName(const std::string& dbname) { return dbname + "/LOCK"; }
+
+std::string TempFileName(const std::string& dbname, uint64_t number) {
+  return MakeFileName(dbname, number, kTempFileNameSuffix.c_str());
+}
+
+InfoLogPrefix::InfoLogPrefix(bool has_log_dir,
+                             const std::string& db_absolute_path) {
+  if (!has_log_dir) {
+    const char kInfoLogPrefix[] = "LOG";
+    // "\0" is automatically added to the end
+    snprintf(buf, sizeof(buf), kInfoLogPrefix);
+    prefix = Slice(buf, sizeof(kInfoLogPrefix) - 1);
+  } else {
+    size_t len =
+        GetInfoLogPrefix(NormalizePath(db_absolute_path), buf, sizeof(buf));
+    prefix = Slice(buf, len);
+  }
+}
+
+std::string InfoLogFileName(const std::string& dbname,
+                            const std::string& db_path,
+                            const std::string& log_dir) {
+  if (log_dir.empty()) {
+    return dbname + "/LOG";
+  }
+
+  InfoLogPrefix info_log_prefix(true, db_path);
+  return log_dir + "/" + info_log_prefix.buf;
+}
+
+// Return the name of the old info log file for "dbname".
+std::string OldInfoLogFileName(const std::string& dbname, uint64_t ts,
+                               const std::string& db_path,
+                               const std::string& log_dir) {
+  char buf[50];
+  snprintf(buf, sizeof(buf), "%llu", static_cast<unsigned long long>(ts));
+
+  if (log_dir.empty()) {
+    return dbname + "/LOG.old." + buf;
+  }
+
+  InfoLogPrefix info_log_prefix(true, db_path);
+  return log_dir + "/" + info_log_prefix.buf + ".old." + buf;
+}
+
+std::string OptionsFileName(uint64_t file_num) {
+  char buffer[256];
+  snprintf(buffer, sizeof(buffer), "%s%06" PRIu64,
+           kOptionsFileNamePrefix.c_str(), file_num);
+  return buffer;
+}
+std::string OptionsFileName(const std::string& dbname, uint64_t file_num) {
+  return dbname + "/" + OptionsFileName(file_num);
+}
+
+std::string TempOptionsFileName(const std::string& dbname, uint64_t file_num) {
+  char buffer[256];
+  snprintf(buffer, sizeof(buffer), "%s%06" PRIu64 ".%s",
+           kOptionsFileNamePrefix.c_str(), file_num,
+           kTempFileNameSuffix.c_str());
+  return dbname + "/" + buffer;
+}
+
+std::string MetaDatabaseName(const std::string& dbname, uint64_t number) {
+  char buf[100];
+  snprintf(buf, sizeof(buf), "/METADB-%llu",
+           static_cast<unsigned long long>(number));
+  return dbname + buf;
+}
+
+std::string IdentityFileName(const std::string& dbname) {
+  return dbname + "/IDENTITY";
+}
+
+// Owned filenames have the form:
+//    dbname/IDENTITY
+//    dbname/CURRENT
+//    dbname/LOCK
+//    dbname/<info_log_name_prefix>
+//    dbname/<info_log_name_prefix>.old.[0-9]+
+//    dbname/MANIFEST-[0-9]+
+//    dbname/[0-9]+.(log|sst|blob)
+//    dbname/METADB-[0-9]+
+//    dbname/OPTIONS-[0-9]+
+//    dbname/OPTIONS-[0-9]+.dbtmp
+//    Disregards / at the beginning
+bool ParseFileName(const std::string& fname, uint64_t* number, FileType* type,
+                   WalFileType* log_type) {
+  return ParseFileName(fname, number, "", type, log_type);
+}
+
+bool ParseFileName(const std::string& fname, uint64_t* number,
+                   const Slice& info_log_name_prefix, FileType* type,
+                   WalFileType* log_type) {
+  Slice rest(fname);
+  if (fname.length() > 1 && fname[0] == '/') {
+    rest.remove_prefix(1);
+  }
+  if (rest == "IDENTITY") {
+    *number = 0;
+    *type = kIdentityFile;
+  } else if (rest == "CURRENT") {
+    *number = 0;
+    *type = kCurrentFile;
+  } else if (rest == "LOCK") {
+    *number = 0;
+    *type = kDBLockFile;
+  } else if (info_log_name_prefix.size() > 0 &&
+             rest.starts_with(info_log_name_prefix)) {
+    rest.remove_prefix(info_log_name_prefix.size());
+    if (rest == "" || rest == ".old") {
+      *number = 0;
+      *type = kInfoLogFile;
+    } else if (rest.starts_with(".old.")) {
+      uint64_t ts_suffix;
+      // sizeof also counts the trailing '\0'.
+      rest.remove_prefix(sizeof(".old.") - 1);
+      if (!ConsumeDecimalNumber(&rest, &ts_suffix)) {
+        return false;
+      }
+      *number = ts_suffix;
+      *type = kInfoLogFile;
+    }
+  } else if (rest.starts_with("MANIFEST-")) {
+    rest.remove_prefix(strlen("MANIFEST-"));
+    uint64_t num;
+    if (!ConsumeDecimalNumber(&rest, &num)) {
+      return false;
+    }
+    if (!rest.empty()) {
+      return false;
+    }
+    *type = kDescriptorFile;
+    *number = num;
+  } else if (rest.starts_with("METADB-")) {
+    rest.remove_prefix(strlen("METADB-"));
+    uint64_t num;
+    if (!ConsumeDecimalNumber(&rest, &num)) {
+      return false;
+    }
+    if (!rest.empty()) {
+      return false;
+    }
+    *type = kMetaDatabase;
+    *number = num;
+  } else if (rest.starts_with(kOptionsFileNamePrefix)) {
+    uint64_t ts_suffix;
+    bool is_temp_file = false;
+    rest.remove_prefix(kOptionsFileNamePrefix.size());
+    const std::string kTempFileNameSuffixWithDot =
+        std::string(".") + kTempFileNameSuffix;
+    if (rest.ends_with(kTempFileNameSuffixWithDot)) {
+      rest.remove_suffix(kTempFileNameSuffixWithDot.size());
+      is_temp_file = true;
+    }
+    if (!ConsumeDecimalNumber(&rest, &ts_suffix)) {
+      return false;
+    }
+    *number = ts_suffix;
+    *type = is_temp_file ? kTempFile : kOptionsFile;
+  } else {
+    // Avoid strtoull() to keep filename format independent of the
+    // current locale
+    bool archive_dir_found = false;
+    if (rest.starts_with(kArchivalDirName)) {
+      if (rest.size() <= kArchivalDirName.size()) {
+        return false;
+      }
+      rest.remove_prefix(kArchivalDirName.size() +
+                         1);  // Add 1 to remove / also
+      if (log_type) {
+        *log_type = kArchivedLogFile;
+      }
+      archive_dir_found = true;
+    }
+    uint64_t num;
+    if (!ConsumeDecimalNumber(&rest, &num)) {
+      return false;
+    }
+    if (rest.size() <= 1 || rest[0] != '.') {
+      return false;
+    }
+    rest.remove_prefix(1);
+
+    Slice suffix = rest;
+    if (suffix == Slice("log")) {
+      *type = kWalFile;
+      if (log_type && !archive_dir_found) {
+        *log_type = kAliveLogFile;
+      }
+    } else if (archive_dir_found) {
+      return false;  // Archive dir can contain only log files
+    } else if (suffix == Slice(kRocksDbTFileExt) ||
+               suffix == Slice(kLevelDbTFileExt)) {
+      *type = kTableFile;
+    } else if (suffix == Slice(kRocksDBBlobFileExt)) {
+      *type = kBlobFile;
+    } else if (suffix == Slice(kTempFileNameSuffix)) {
+      *type = kTempFile;
+    } else {
+      return false;
+    }
+    *number = num;
+  }
+  return true;
+}
+
+IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname,
+                        uint64_t descriptor_number,
+                        FSDirectory* dir_contains_current_file) {
+  // Remove leading "dbname/" and add newline to manifest file name
+  std::string manifest = DescriptorFileName(dbname, descriptor_number);
+  Slice contents = manifest;
+  assert(contents.starts_with(dbname + "/"));
+  contents.remove_prefix(dbname.size() + 1);
+  std::string tmp = TempFileName(dbname, descriptor_number);
+  IOStatus s = WriteStringToFile(fs, contents.ToString() + "\n", tmp, true);
+  TEST_SYNC_POINT_CALLBACK("SetCurrentFile:BeforeRename", &s);
+  if (s.ok()) {
+    TEST_KILL_RANDOM_WITH_WEIGHT("SetCurrentFile:0", REDUCE_ODDS2);
+    s = fs->RenameFile(tmp, CurrentFileName(dbname), IOOptions(), nullptr);
+    TEST_KILL_RANDOM_WITH_WEIGHT("SetCurrentFile:1", REDUCE_ODDS2);
+    TEST_SYNC_POINT_CALLBACK("SetCurrentFile:AfterRename", &s);
+  }
+  if (s.ok()) {
+    if (dir_contains_current_file != nullptr) {
+      s = dir_contains_current_file->FsyncWithDirOptions(
+          IOOptions(), nullptr, DirFsyncOptions(CurrentFileName(dbname)));
+    }
+  } else {
+    fs->DeleteFile(tmp, IOOptions(), nullptr)
+        .PermitUncheckedError();  // NOTE: PermitUncheckedError is acceptable
+                                  // here as we are already handling an error
+                                  // case, and this is just a best-attempt
+                                  // effort at some cleanup
+  }
+  return s;
+}
+
+Status SetIdentityFile(Env* env, const std::string& dbname,
+                       const std::string& db_id) {
+  std::string id;
+  if (db_id.empty()) {
+    id = env->GenerateUniqueId();
+  } else {
+    id = db_id;
+  }
+  assert(!id.empty());
+  // Reserve the filename dbname/000000.dbtmp for the temporary identity file
+  std::string tmp = TempFileName(dbname, 0);
+  std::string identify_file_name = IdentityFileName(dbname);
+  Status s = WriteStringToFile(env, id, tmp, true);
+  if (s.ok()) {
+    s = env->RenameFile(tmp, identify_file_name);
+  }
+  std::unique_ptr<FSDirectory> dir_obj;
+  if (s.ok()) {
+    s = env->GetFileSystem()->NewDirectory(dbname, IOOptions(), &dir_obj,
+                                           nullptr);
+  }
+  if (s.ok()) {
+    s = dir_obj->FsyncWithDirOptions(IOOptions(), nullptr,
+                                     DirFsyncOptions(identify_file_name));
+  }
+
+  // The default Close() could return "NotSupported" and we bypass it
+  // if it is not impelmented. Detailed explanations can be found in
+  // db/db_impl/db_impl.h
+  if (s.ok()) {
+    Status temp_s = dir_obj->Close(IOOptions(), nullptr);
+    if (!temp_s.ok()) {
+      if (temp_s.IsNotSupported()) {
+        temp_s.PermitUncheckedError();
+      } else {
+        s = temp_s;
+      }
+    }
+  }
+  if (!s.ok()) {
+    env->DeleteFile(tmp).PermitUncheckedError();
+  }
+  return s;
+}
+
+IOStatus SyncManifest(const ImmutableDBOptions* db_options,
+                      WritableFileWriter* file) {
+  TEST_KILL_RANDOM_WITH_WEIGHT("SyncManifest:0", REDUCE_ODDS2);
+  StopWatch sw(db_options->clock, db_options->stats, MANIFEST_FILE_SYNC_MICROS);
+  return file->Sync(db_options->use_fsync);
+}
+
+Status GetInfoLogFiles(const std::shared_ptr<FileSystem>& fs,
+                       const std::string& db_log_dir, const std::string& dbname,
+                       std::string* parent_dir,
+                       std::vector<std::string>* info_log_list) {
+  assert(parent_dir != nullptr);
+  assert(info_log_list != nullptr);
+  uint64_t number = 0;
+  FileType type = kWalFile;
+
+  if (!db_log_dir.empty()) {
+    *parent_dir = db_log_dir;
+  } else {
+    *parent_dir = dbname;
+  }
+
+  InfoLogPrefix info_log_prefix(!db_log_dir.empty(), dbname);
+
+  std::vector<std::string> file_names;
+  Status s = fs->GetChildren(*parent_dir, IOOptions(), &file_names, nullptr);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  for (auto& f : file_names) {
+    if (ParseFileName(f, &number, info_log_prefix.prefix, &type) &&
+        (type == kInfoLogFile)) {
+      info_log_list->push_back(f);
+    }
+  }
+  return Status::OK();
+}
+
+std::string NormalizePath(const std::string& path) {
+  std::string dst;
+
+  if (path.length() > 2 && path[0] == kFilePathSeparator &&
+      path[1] == kFilePathSeparator) {  // Handle UNC names
+    dst.append(2, kFilePathSeparator);
+  }
+
+  for (auto c : path) {
+    if (!dst.empty() && (c == kFilePathSeparator || c == '/') &&
+        (dst.back() == kFilePathSeparator || dst.back() == '/')) {
+      continue;
+    }
+    dst.push_back(c);
+  }
+  return dst;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/filename.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/filename.h
new file mode 100644
index 0000000000..2eb125b6a1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/filename.h
@@ -0,0 +1,188 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// File names used by DB code
+
+#pragma once
+#include <stdint.h>
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "options/db_options.h"
+#include "port/port.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "rocksdb/transaction_log.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Env;
+class Directory;
+class SystemClock;
+class WritableFileWriter;
+
+#ifdef OS_WIN
+constexpr char kFilePathSeparator = '\\';
+#else
+constexpr char kFilePathSeparator = '/';
+#endif
+
+// Return the name of the log file with the specified number
+// in the db named by "dbname".  The result will be prefixed with
+// "dbname".
+extern std::string LogFileName(const std::string& dbname, uint64_t number);
+
+extern std::string LogFileName(uint64_t number);
+
+extern std::string BlobFileName(uint64_t number);
+
+extern std::string BlobFileName(const std::string& bdirname, uint64_t number);
+
+extern std::string BlobFileName(const std::string& dbname,
+                                const std::string& blob_dir, uint64_t number);
+
+extern std::string ArchivalDirectory(const std::string& dbname);
+
+//  Return the name of the archived log file with the specified number
+//  in the db named by "dbname". The result will be prefixed with "dbname".
+extern std::string ArchivedLogFileName(const std::string& dbname, uint64_t num);
+
+extern std::string MakeTableFileName(const std::string& name, uint64_t number);
+
+extern std::string MakeTableFileName(uint64_t number);
+
+// Return the name of sstable with LevelDB suffix
+// created from RocksDB sstable suffixed name
+extern std::string Rocks2LevelTableFileName(const std::string& fullname);
+
+// the reverse function of MakeTableFileName
+// TODO(yhchiang): could merge this function with ParseFileName()
+extern uint64_t TableFileNameToNumber(const std::string& name);
+
+// Return the name of the sstable with the specified number
+// in the db named by "dbname".  The result will be prefixed with
+// "dbname".
+extern std::string TableFileName(const std::vector<DbPath>& db_paths,
+                                 uint64_t number, uint32_t path_id);
+
+// Sufficient buffer size for FormatFileNumber.
+const size_t kFormatFileNumberBufSize = 38;
+
+extern void FormatFileNumber(uint64_t number, uint32_t path_id, char* out_buf,
+                             size_t out_buf_size);
+
+// Return the name of the descriptor file for the db named by
+// "dbname" and the specified incarnation number.  The result will be
+// prefixed with "dbname".
+extern std::string DescriptorFileName(const std::string& dbname,
+                                      uint64_t number);
+
+extern std::string DescriptorFileName(uint64_t number);
+
+extern const std::string kCurrentFileName;  // = "CURRENT"
+
+// Return the name of the current file.  This file contains the name
+// of the current manifest file.  The result will be prefixed with
+// "dbname".
+extern std::string CurrentFileName(const std::string& dbname);
+
+// Return the name of the lock file for the db named by
+// "dbname".  The result will be prefixed with "dbname".
+extern std::string LockFileName(const std::string& dbname);
+
+// Return the name of a temporary file owned by the db named "dbname".
+// The result will be prefixed with "dbname".
+extern std::string TempFileName(const std::string& dbname, uint64_t number);
+
+// A helper structure for prefix of info log names.
+struct InfoLogPrefix {
+  char buf[260];
+  Slice prefix;
+  // Prefix with DB absolute path encoded
+  explicit InfoLogPrefix(bool has_log_dir, const std::string& db_absolute_path);
+  // Default Prefix
+  explicit InfoLogPrefix();
+};
+
+// Return the name of the info log file for "dbname".
+extern std::string InfoLogFileName(const std::string& dbname,
+                                   const std::string& db_path = "",
+                                   const std::string& log_dir = "");
+
+// Return the name of the old info log file for "dbname".
+extern std::string OldInfoLogFileName(const std::string& dbname, uint64_t ts,
+                                      const std::string& db_path = "",
+                                      const std::string& log_dir = "");
+
+extern const std::string kOptionsFileNamePrefix;  // = "OPTIONS-"
+extern const std::string kTempFileNameSuffix;     // = "dbtmp"
+
+// Return a options file name given the "dbname" and file number.
+// Format:  OPTIONS-[number].dbtmp
+extern std::string OptionsFileName(const std::string& dbname,
+                                   uint64_t file_num);
+extern std::string OptionsFileName(uint64_t file_num);
+
+// Return a temp options file name given the "dbname" and file number.
+// Format:  OPTIONS-[number]
+extern std::string TempOptionsFileName(const std::string& dbname,
+                                       uint64_t file_num);
+
+// Return the name to use for a metadatabase. The result will be prefixed with
+// "dbname".
+extern std::string MetaDatabaseName(const std::string& dbname, uint64_t number);
+
+// Return the name of the Identity file which stores a unique number for the db
+// that will get regenerated if the db loses all its data and is recreated fresh
+// either from a backup-image or empty
+extern std::string IdentityFileName(const std::string& dbname);
+
+// If filename is a rocksdb file, store the type of the file in *type.
+// The number encoded in the filename is stored in *number.  If the
+// filename was successfully parsed, returns true.  Else return false.
+// info_log_name_prefix is the path of info logs.
+extern bool ParseFileName(const std::string& filename, uint64_t* number,
+                          const Slice& info_log_name_prefix, FileType* type,
+                          WalFileType* log_type = nullptr);
+// Same as previous function, but skip info log files.
+extern bool ParseFileName(const std::string& filename, uint64_t* number,
+                          FileType* type, WalFileType* log_type = nullptr);
+
+// Make the CURRENT file point to the descriptor file with the
+// specified number. On its success and when dir_contains_current_file is not
+// nullptr, the function will fsync the directory containing the CURRENT file
+// when
+extern IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname,
+                               uint64_t descriptor_number,
+                               FSDirectory* dir_contains_current_file);
+
+// Make the IDENTITY file for the db
+extern Status SetIdentityFile(Env* env, const std::string& dbname,
+                              const std::string& db_id = {});
+
+// Sync manifest file `file`.
+extern IOStatus SyncManifest(const ImmutableDBOptions* db_options,
+                             WritableFileWriter* file);
+
+// Return list of file names of info logs in `file_names`.
+// The list only contains file name. The parent directory name is stored
+// in `parent_dir`.
+// `db_log_dir` should be the one as in options.db_log_dir
+extern Status GetInfoLogFiles(const std::shared_ptr<FileSystem>& fs,
+                              const std::string& db_log_dir,
+                              const std::string& dbname,
+                              std::string* parent_dir,
+                              std::vector<std::string>* file_names);
+
+extern std::string NormalizePath(const std::string& path);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/line_file_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/line_file_reader.cc
new file mode 100644
index 0000000000..50c415dc68
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/line_file_reader.cc
@@ -0,0 +1,73 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "file/line_file_reader.h"
+
+#include <cstring>
+
+#include "monitoring/iostats_context_imp.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+IOStatus LineFileReader::Create(const std::shared_ptr<FileSystem>& fs,
+                                const std::string& fname,
+                                const FileOptions& file_opts,
+                                std::unique_ptr<LineFileReader>* reader,
+                                IODebugContext* dbg,
+                                RateLimiter* rate_limiter) {
+  std::unique_ptr<FSSequentialFile> file;
+  IOStatus io_s = fs->NewSequentialFile(fname, file_opts, &file, dbg);
+  if (io_s.ok()) {
+    reader->reset(new LineFileReader(
+        std::move(file), fname, nullptr,
+        std::vector<std::shared_ptr<EventListener>>{}, rate_limiter));
+  }
+  return io_s;
+}
+
+bool LineFileReader::ReadLine(std::string* out,
+                              Env::IOPriority rate_limiter_priority) {
+  assert(out);
+  if (!io_status_.ok()) {
+    // Status should be checked (or permit unchecked) any time we return false.
+    io_status_.MustCheck();
+    return false;
+  }
+  out->clear();
+  for (;;) {
+    // Look for line delimiter
+    const char* found = static_cast<const char*>(
+        std::memchr(buf_begin_, '\n', buf_end_ - buf_begin_));
+    if (found) {
+      size_t len = found - buf_begin_;
+      out->append(buf_begin_, len);
+      buf_begin_ += len + /*delim*/ 1;
+      ++line_number_;
+      return true;
+    }
+    if (at_eof_) {
+      io_status_.MustCheck();
+      return false;
+    }
+    // else flush and reload buffer
+    out->append(buf_begin_, buf_end_ - buf_begin_);
+    Slice result;
+    io_status_ =
+        sfr_.Read(buf_.size(), &result, buf_.data(), rate_limiter_priority);
+    IOSTATS_ADD(bytes_read, result.size());
+    if (!io_status_.ok()) {
+      io_status_.MustCheck();
+      return false;
+    }
+    if (result.size() != buf_.size()) {
+      // The obscure way of indicating EOF
+      at_eof_ = true;
+    }
+    buf_begin_ = result.data();
+    buf_end_ = result.data() + result.size();
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/line_file_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/line_file_reader.h
new file mode 100644
index 0000000000..cc302d3115
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/line_file_reader.h
@@ -0,0 +1,60 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <array>
+
+#include "file/sequence_file_reader.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A wrapper on top of Env::SequentialFile for reading text lines from a file.
+// Lines are delimited by '\n'. The last line may or may not include a
+// trailing newline. Uses SequentialFileReader internally.
+class LineFileReader {
+ private:
+  std::array<char, 8192> buf_;
+  SequentialFileReader sfr_;
+  IOStatus io_status_;
+  const char* buf_begin_ = buf_.data();
+  const char* buf_end_ = buf_.data();
+  size_t line_number_ = 0;
+  bool at_eof_ = false;
+
+ public:
+  // See SequentialFileReader constructors
+  template <typename... Args>
+  explicit LineFileReader(Args&&... args)
+      : sfr_(std::forward<Args&&>(args)...) {}
+
+  static IOStatus Create(const std::shared_ptr<FileSystem>& fs,
+                         const std::string& fname, const FileOptions& file_opts,
+                         std::unique_ptr<LineFileReader>* reader,
+                         IODebugContext* dbg, RateLimiter* rate_limiter);
+
+  LineFileReader(const LineFileReader&) = delete;
+  LineFileReader& operator=(const LineFileReader&) = delete;
+
+  // Reads another line from the file, returning true on success and saving
+  // the line to `out`, without delimiter, or returning false on failure. You
+  // must check GetStatus() to determine whether the failure was just
+  // end-of-file (OK status) or an I/O error (another status).
+  // The internal rate limiter will be charged at the specified priority.
+  bool ReadLine(std::string* out, Env::IOPriority rate_limiter_priority);
+
+  // Returns the number of the line most recently returned from ReadLine.
+  // Return value is unspecified if ReadLine has returned false due to
+  // I/O error. After ReadLine returns false due to end-of-file, return
+  // value is the last returned line number, or equivalently the total
+  // number of lines returned.
+  size_t GetLineNumber() const { return line_number_; }
+
+  // Returns any error encountered during read. The error is considered
+  // permanent and no retry or recovery is attempted with the same
+  // LineFileReader.
+  const IOStatus& GetStatus() const { return io_status_; }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/random_access_file_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/random_access_file_reader.cc
new file mode 100644
index 0000000000..308cd3e5bb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/random_access_file_reader.cc
@@ -0,0 +1,599 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "file/random_access_file_reader.h"
+
+#include <algorithm>
+#include <mutex>
+
+#include "file/file_util.h"
+#include "monitoring/histogram.h"
+#include "monitoring/iostats_context_imp.h"
+#include "port/port.h"
+#include "table/format.h"
+#include "test_util/sync_point.h"
+#include "util/random.h"
+#include "util/rate_limiter_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+const std::array<Histograms, std::size_t(Env::IOActivity::kUnknown)>
+    kReadHistograms{{
+        FILE_READ_FLUSH_MICROS,
+        FILE_READ_COMPACTION_MICROS,
+        FILE_READ_DB_OPEN_MICROS,
+    }};
+inline void RecordIOStats(Statistics* stats, Temperature file_temperature,
+                          bool is_last_level, size_t size) {
+  IOSTATS_ADD(bytes_read, size);
+  // record for last/non-last level
+  if (is_last_level) {
+    RecordTick(stats, LAST_LEVEL_READ_BYTES, size);
+    RecordTick(stats, LAST_LEVEL_READ_COUNT, 1);
+  } else {
+    RecordTick(stats, NON_LAST_LEVEL_READ_BYTES, size);
+    RecordTick(stats, NON_LAST_LEVEL_READ_COUNT, 1);
+  }
+
+  // record for temperature file
+  if (file_temperature != Temperature::kUnknown) {
+    switch (file_temperature) {
+      case Temperature::kHot:
+        IOSTATS_ADD(file_io_stats_by_temperature.hot_file_bytes_read, size);
+        IOSTATS_ADD(file_io_stats_by_temperature.hot_file_read_count, 1);
+        RecordTick(stats, HOT_FILE_READ_BYTES, size);
+        RecordTick(stats, HOT_FILE_READ_COUNT, 1);
+        break;
+      case Temperature::kWarm:
+        IOSTATS_ADD(file_io_stats_by_temperature.warm_file_bytes_read, size);
+        IOSTATS_ADD(file_io_stats_by_temperature.warm_file_read_count, 1);
+        RecordTick(stats, WARM_FILE_READ_BYTES, size);
+        RecordTick(stats, WARM_FILE_READ_COUNT, 1);
+        break;
+      case Temperature::kCold:
+        IOSTATS_ADD(file_io_stats_by_temperature.cold_file_bytes_read, size);
+        IOSTATS_ADD(file_io_stats_by_temperature.cold_file_read_count, 1);
+        RecordTick(stats, COLD_FILE_READ_BYTES, size);
+        RecordTick(stats, COLD_FILE_READ_COUNT, 1);
+        break;
+      default:
+        break;
+    }
+  }
+}
+
+IOStatus RandomAccessFileReader::Create(
+    const std::shared_ptr<FileSystem>& fs, const std::string& fname,
+    const FileOptions& file_opts,
+    std::unique_ptr<RandomAccessFileReader>* reader, IODebugContext* dbg) {
+  std::unique_ptr<FSRandomAccessFile> file;
+  IOStatus io_s = fs->NewRandomAccessFile(fname, file_opts, &file, dbg);
+  if (io_s.ok()) {
+    reader->reset(new RandomAccessFileReader(std::move(file), fname));
+  }
+  return io_s;
+}
+
+IOStatus RandomAccessFileReader::Read(
+    const IOOptions& opts, uint64_t offset, size_t n, Slice* result,
+    char* scratch, AlignedBuf* aligned_buf,
+    Env::IOPriority rate_limiter_priority) const {
+  (void)aligned_buf;
+
+  TEST_SYNC_POINT_CALLBACK("RandomAccessFileReader::Read", nullptr);
+
+  // To be paranoid: modify scratch a little bit, so in case underlying
+  // FileSystem doesn't fill the buffer but return success and `scratch` returns
+  // contains a previous block, returned value will not pass checksum.
+  if (n > 0 && scratch != nullptr) {
+    // This byte might not change anything for direct I/O case, but it's OK.
+    scratch[0]++;
+  }
+
+  IOStatus io_s;
+  uint64_t elapsed = 0;
+  {
+    StopWatch sw(clock_, stats_, hist_type_,
+                 (opts.io_activity != Env::IOActivity::kUnknown)
+                     ? kReadHistograms[(std::size_t)(opts.io_activity)]
+                     : Histograms::HISTOGRAM_ENUM_MAX,
+                 (stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/,
+                 true /*delay_enabled*/);
+    auto prev_perf_level = GetPerfLevel();
+    IOSTATS_TIMER_GUARD(read_nanos);
+    if (use_direct_io()) {
+      size_t alignment = file_->GetRequiredBufferAlignment();
+      size_t aligned_offset =
+          TruncateToPageBoundary(alignment, static_cast<size_t>(offset));
+      size_t offset_advance = static_cast<size_t>(offset) - aligned_offset;
+      size_t read_size =
+          Roundup(static_cast<size_t>(offset + n), alignment) - aligned_offset;
+      AlignedBuffer buf;
+      buf.Alignment(alignment);
+      buf.AllocateNewBuffer(read_size);
+      while (buf.CurrentSize() < read_size) {
+        size_t allowed;
+        if (rate_limiter_priority != Env::IO_TOTAL &&
+            rate_limiter_ != nullptr) {
+          allowed = rate_limiter_->RequestToken(
+              buf.Capacity() - buf.CurrentSize(), buf.Alignment(),
+              rate_limiter_priority, stats_, RateLimiter::OpType::kRead);
+        } else {
+          assert(buf.CurrentSize() == 0);
+          allowed = read_size;
+        }
+        Slice tmp;
+
+        FileOperationInfo::StartTimePoint start_ts;
+        uint64_t orig_offset = 0;
+        if (ShouldNotifyListeners()) {
+          start_ts = FileOperationInfo::StartNow();
+          orig_offset = aligned_offset + buf.CurrentSize();
+        }
+
+        {
+          IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, clock_);
+          // Only user reads are expected to specify a timeout. And user reads
+          // are not subjected to rate_limiter and should go through only
+          // one iteration of this loop, so we don't need to check and adjust
+          // the opts.timeout before calling file_->Read
+          assert(!opts.timeout.count() || allowed == read_size);
+          io_s = file_->Read(aligned_offset + buf.CurrentSize(), allowed, opts,
+                             &tmp, buf.Destination(), nullptr);
+        }
+        if (ShouldNotifyListeners()) {
+          auto finish_ts = FileOperationInfo::FinishNow();
+          NotifyOnFileReadFinish(orig_offset, tmp.size(), start_ts, finish_ts,
+                                 io_s);
+          if (!io_s.ok()) {
+            NotifyOnIOError(io_s, FileOperationType::kRead, file_name(),
+                            tmp.size(), orig_offset);
+          }
+        }
+
+        buf.Size(buf.CurrentSize() + tmp.size());
+        if (!io_s.ok() || tmp.size() < allowed) {
+          break;
+        }
+      }
+      size_t res_len = 0;
+      if (io_s.ok() && offset_advance < buf.CurrentSize()) {
+        res_len = std::min(buf.CurrentSize() - offset_advance, n);
+        if (aligned_buf == nullptr) {
+          buf.Read(scratch, offset_advance, res_len);
+        } else {
+          scratch = buf.BufferStart() + offset_advance;
+          aligned_buf->reset(buf.Release());
+        }
+      }
+      *result = Slice(scratch, res_len);
+    } else {
+      size_t pos = 0;
+      const char* res_scratch = nullptr;
+      while (pos < n) {
+        size_t allowed;
+        if (rate_limiter_priority != Env::IO_TOTAL &&
+            rate_limiter_ != nullptr) {
+          if (rate_limiter_->IsRateLimited(RateLimiter::OpType::kRead)) {
+            sw.DelayStart();
+          }
+          allowed = rate_limiter_->RequestToken(n - pos, 0 /* alignment */,
+                                                rate_limiter_priority, stats_,
+                                                RateLimiter::OpType::kRead);
+          if (rate_limiter_->IsRateLimited(RateLimiter::OpType::kRead)) {
+            sw.DelayStop();
+          }
+        } else {
+          allowed = n;
+        }
+        Slice tmp_result;
+
+        FileOperationInfo::StartTimePoint start_ts;
+        if (ShouldNotifyListeners()) {
+          start_ts = FileOperationInfo::StartNow();
+        }
+
+        {
+          IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, clock_);
+          // Only user reads are expected to specify a timeout. And user reads
+          // are not subjected to rate_limiter and should go through only
+          // one iteration of this loop, so we don't need to check and adjust
+          // the opts.timeout before calling file_->Read
+          assert(!opts.timeout.count() || allowed == n);
+          io_s = file_->Read(offset + pos, allowed, opts, &tmp_result,
+                             scratch + pos, nullptr);
+        }
+        if (ShouldNotifyListeners()) {
+          auto finish_ts = FileOperationInfo::FinishNow();
+          NotifyOnFileReadFinish(offset + pos, tmp_result.size(), start_ts,
+                                 finish_ts, io_s);
+
+          if (!io_s.ok()) {
+            NotifyOnIOError(io_s, FileOperationType::kRead, file_name(),
+                            tmp_result.size(), offset + pos);
+          }
+        }
+        if (res_scratch == nullptr) {
+          // we can't simply use `scratch` because reads of mmap'd files return
+          // data in a different buffer.
+          res_scratch = tmp_result.data();
+        } else {
+          // make sure chunks are inserted contiguously into `res_scratch`.
+          assert(tmp_result.data() == res_scratch + pos);
+        }
+        pos += tmp_result.size();
+        if (!io_s.ok() || tmp_result.size() < allowed) {
+          break;
+        }
+      }
+      *result = Slice(res_scratch, io_s.ok() ? pos : 0);
+    }
+    RecordIOStats(stats_, file_temperature_, is_last_level_, result->size());
+    SetPerfLevel(prev_perf_level);
+  }
+  if (stats_ != nullptr && file_read_hist_ != nullptr) {
+    file_read_hist_->Add(elapsed);
+  }
+
+  return io_s;
+}
+
+size_t End(const FSReadRequest& r) {
+  return static_cast<size_t>(r.offset) + r.len;
+}
+
+FSReadRequest Align(const FSReadRequest& r, size_t alignment) {
+  FSReadRequest req;
+  req.offset = static_cast<uint64_t>(
+      TruncateToPageBoundary(alignment, static_cast<size_t>(r.offset)));
+  req.len = Roundup(End(r), alignment) - req.offset;
+  req.scratch = nullptr;
+  return req;
+}
+
+bool TryMerge(FSReadRequest* dest, const FSReadRequest& src) {
+  size_t dest_offset = static_cast<size_t>(dest->offset);
+  size_t src_offset = static_cast<size_t>(src.offset);
+  size_t dest_end = End(*dest);
+  size_t src_end = End(src);
+  if (std::max(dest_offset, src_offset) > std::min(dest_end, src_end)) {
+    return false;
+  }
+  dest->offset = static_cast<uint64_t>(std::min(dest_offset, src_offset));
+  dest->len = std::max(dest_end, src_end) - dest->offset;
+  return true;
+}
+
+IOStatus RandomAccessFileReader::MultiRead(
+    const IOOptions& opts, FSReadRequest* read_reqs, size_t num_reqs,
+    AlignedBuf* aligned_buf, Env::IOPriority rate_limiter_priority) const {
+  (void)aligned_buf;  // suppress warning of unused variable in LITE mode
+  assert(num_reqs > 0);
+
+#ifndef NDEBUG
+  for (size_t i = 0; i < num_reqs - 1; ++i) {
+    assert(read_reqs[i].offset <= read_reqs[i + 1].offset);
+  }
+#endif  // !NDEBUG
+
+  // To be paranoid modify scratch a little bit, so in case underlying
+  // FileSystem doesn't fill the buffer but return success and `scratch` returns
+  // contains a previous block, returned value will not pass checksum.
+  // This byte might not change anything for direct I/O case, but it's OK.
+  for (size_t i = 0; i < num_reqs; i++) {
+    FSReadRequest& r = read_reqs[i];
+    if (r.len > 0 && r.scratch != nullptr) {
+      r.scratch[0]++;
+    }
+  }
+
+  IOStatus io_s;
+  uint64_t elapsed = 0;
+  {
+    StopWatch sw(clock_, stats_, hist_type_,
+                 (opts.io_activity != Env::IOActivity::kUnknown)
+                     ? kReadHistograms[(std::size_t)(opts.io_activity)]
+                     : Histograms::HISTOGRAM_ENUM_MAX,
+                 (stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/,
+                 true /*delay_enabled*/);
+    auto prev_perf_level = GetPerfLevel();
+    IOSTATS_TIMER_GUARD(read_nanos);
+
+    FSReadRequest* fs_reqs = read_reqs;
+    size_t num_fs_reqs = num_reqs;
+    std::vector<FSReadRequest> aligned_reqs;
+    if (use_direct_io()) {
+      // num_reqs is the max possible size,
+      // this can reduce std::vecector's internal resize operations.
+      aligned_reqs.reserve(num_reqs);
+      // Align and merge the read requests.
+      size_t alignment = file_->GetRequiredBufferAlignment();
+      for (size_t i = 0; i < num_reqs; i++) {
+        const auto& r = Align(read_reqs[i], alignment);
+        if (i == 0) {
+          // head
+          aligned_reqs.push_back(r);
+
+        } else if (!TryMerge(&aligned_reqs.back(), r)) {
+          // head + n
+          aligned_reqs.push_back(r);
+
+        } else {
+          // unused
+          r.status.PermitUncheckedError();
+        }
+      }
+      TEST_SYNC_POINT_CALLBACK("RandomAccessFileReader::MultiRead:AlignedReqs",
+                               &aligned_reqs);
+
+      // Allocate aligned buffer and let scratch buffers point to it.
+      size_t total_len = 0;
+      for (const auto& r : aligned_reqs) {
+        total_len += r.len;
+      }
+      AlignedBuffer buf;
+      buf.Alignment(alignment);
+      buf.AllocateNewBuffer(total_len);
+      char* scratch = buf.BufferStart();
+      for (auto& r : aligned_reqs) {
+        r.scratch = scratch;
+        scratch += r.len;
+      }
+
+      aligned_buf->reset(buf.Release());
+      fs_reqs = aligned_reqs.data();
+      num_fs_reqs = aligned_reqs.size();
+    }
+
+    FileOperationInfo::StartTimePoint start_ts;
+    if (ShouldNotifyListeners()) {
+      start_ts = FileOperationInfo::StartNow();
+    }
+
+    {
+      IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, clock_);
+      if (rate_limiter_priority != Env::IO_TOTAL && rate_limiter_ != nullptr) {
+        // TODO: ideally we should call `RateLimiter::RequestToken()` for
+        // allowed bytes to multi-read and then consume those bytes by
+        // satisfying as many requests in `MultiRead()` as possible, instead of
+        // what we do here, which can cause burst when the
+        // `total_multi_read_size` is big.
+        size_t total_multi_read_size = 0;
+        assert(fs_reqs != nullptr);
+        for (size_t i = 0; i < num_fs_reqs; ++i) {
+          FSReadRequest& req = fs_reqs[i];
+          total_multi_read_size += req.len;
+        }
+        size_t remaining_bytes = total_multi_read_size;
+        size_t request_bytes = 0;
+        while (remaining_bytes > 0) {
+          request_bytes = std::min(
+              static_cast<size_t>(rate_limiter_->GetSingleBurstBytes()),
+              remaining_bytes);
+          rate_limiter_->Request(request_bytes, rate_limiter_priority,
+                                 nullptr /* stats */,
+                                 RateLimiter::OpType::kRead);
+          remaining_bytes -= request_bytes;
+        }
+      }
+      io_s = file_->MultiRead(fs_reqs, num_fs_reqs, opts, nullptr);
+      RecordInHistogram(stats_, MULTIGET_IO_BATCH_SIZE, num_fs_reqs);
+    }
+
+    if (use_direct_io()) {
+      // Populate results in the unaligned read requests.
+      size_t aligned_i = 0;
+      for (size_t i = 0; i < num_reqs; i++) {
+        auto& r = read_reqs[i];
+        if (static_cast<size_t>(r.offset) > End(aligned_reqs[aligned_i])) {
+          aligned_i++;
+        }
+        const auto& fs_r = fs_reqs[aligned_i];
+        r.status = fs_r.status;
+        if (r.status.ok()) {
+          uint64_t offset = r.offset - fs_r.offset;
+          if (fs_r.result.size() <= offset) {
+            // No byte in the read range is returned.
+            r.result = Slice();
+          } else {
+            size_t len = std::min(
+                r.len, static_cast<size_t>(fs_r.result.size() - offset));
+            r.result = Slice(fs_r.scratch + offset, len);
+          }
+        } else {
+          r.result = Slice();
+        }
+      }
+    }
+
+    for (size_t i = 0; i < num_reqs; ++i) {
+      if (ShouldNotifyListeners()) {
+        auto finish_ts = FileOperationInfo::FinishNow();
+        NotifyOnFileReadFinish(read_reqs[i].offset, read_reqs[i].result.size(),
+                               start_ts, finish_ts, read_reqs[i].status);
+      }
+      if (!read_reqs[i].status.ok()) {
+        NotifyOnIOError(read_reqs[i].status, FileOperationType::kRead,
+                        file_name(), read_reqs[i].result.size(),
+                        read_reqs[i].offset);
+      }
+
+      RecordIOStats(stats_, file_temperature_, is_last_level_,
+                    read_reqs[i].result.size());
+    }
+    SetPerfLevel(prev_perf_level);
+  }
+  if (stats_ != nullptr && file_read_hist_ != nullptr) {
+    file_read_hist_->Add(elapsed);
+  }
+
+  return io_s;
+}
+
+IOStatus RandomAccessFileReader::PrepareIOOptions(const ReadOptions& ro,
+                                                  IOOptions& opts) const {
+  if (clock_ != nullptr) {
+    return PrepareIOFromReadOptions(ro, clock_, opts);
+  } else {
+    return PrepareIOFromReadOptions(ro, SystemClock::Default().get(), opts);
+  }
+}
+
+IOStatus RandomAccessFileReader::ReadAsync(
+    FSReadRequest& req, const IOOptions& opts,
+    std::function<void(const FSReadRequest&, void*)> cb, void* cb_arg,
+    void** io_handle, IOHandleDeleter* del_fn, AlignedBuf* aligned_buf) {
+  IOStatus s;
+  // Create a callback and populate info.
+  auto read_async_callback =
+      std::bind(&RandomAccessFileReader::ReadAsyncCallback, this,
+                std::placeholders::_1, std::placeholders::_2);
+  ReadAsyncInfo* read_async_info =
+      new ReadAsyncInfo(cb, cb_arg, clock_->NowMicros());
+
+  if (ShouldNotifyListeners()) {
+    read_async_info->fs_start_ts_ = FileOperationInfo::StartNow();
+  }
+
+  size_t alignment = file_->GetRequiredBufferAlignment();
+  bool is_aligned = (req.offset & (alignment - 1)) == 0 &&
+                    (req.len & (alignment - 1)) == 0 &&
+                    (uintptr_t(req.scratch) & (alignment - 1)) == 0;
+  read_async_info->is_aligned_ = is_aligned;
+
+  uint64_t elapsed = 0;
+  if (use_direct_io() && is_aligned == false) {
+    FSReadRequest aligned_req = Align(req, alignment);
+    aligned_req.status.PermitUncheckedError();
+
+    // Allocate aligned buffer.
+    read_async_info->buf_.Alignment(alignment);
+    read_async_info->buf_.AllocateNewBuffer(aligned_req.len);
+
+    // Set rem fields in aligned FSReadRequest.
+    aligned_req.scratch = read_async_info->buf_.BufferStart();
+
+    // Set user provided fields to populate back in callback.
+    read_async_info->user_scratch_ = req.scratch;
+    read_async_info->user_aligned_buf_ = aligned_buf;
+    read_async_info->user_len_ = req.len;
+    read_async_info->user_offset_ = req.offset;
+    read_async_info->user_result_ = req.result;
+
+    assert(read_async_info->buf_.CurrentSize() == 0);
+
+    StopWatch sw(clock_, nullptr /*stats*/,
+                 Histograms::HISTOGRAM_ENUM_MAX /*hist_type*/,
+                 Histograms::HISTOGRAM_ENUM_MAX, &elapsed, true /*overwrite*/,
+                 true /*delay_enabled*/);
+    s = file_->ReadAsync(aligned_req, opts, read_async_callback,
+                         read_async_info, io_handle, del_fn, nullptr /*dbg*/);
+  } else {
+    StopWatch sw(clock_, nullptr /*stats*/,
+                 Histograms::HISTOGRAM_ENUM_MAX /*hist_type*/,
+                 Histograms::HISTOGRAM_ENUM_MAX, &elapsed, true /*overwrite*/,
+                 true /*delay_enabled*/);
+    s = file_->ReadAsync(req, opts, read_async_callback, read_async_info,
+                         io_handle, del_fn, nullptr /*dbg*/);
+  }
+  RecordTick(stats_, READ_ASYNC_MICROS, elapsed);
+
+// Suppress false positive clang analyzer warnings.
+// Memory is not released if file_->ReadAsync returns !s.ok(), because
+// ReadAsyncCallback is never called in that case. If ReadAsyncCallback is
+// called then ReadAsync should always return IOStatus::OK().
+#ifndef __clang_analyzer__
+  if (!s.ok()) {
+    delete read_async_info;
+  }
+#endif  // __clang_analyzer__
+
+  return s;
+}
+
+void RandomAccessFileReader::ReadAsyncCallback(const FSReadRequest& req,
+                                               void* cb_arg) {
+  ReadAsyncInfo* read_async_info = static_cast<ReadAsyncInfo*>(cb_arg);
+  assert(read_async_info);
+  assert(read_async_info->cb_);
+
+  if (use_direct_io() && read_async_info->is_aligned_ == false) {
+    // Create FSReadRequest with user provided fields.
+    FSReadRequest user_req;
+    user_req.scratch = read_async_info->user_scratch_;
+    user_req.offset = read_async_info->user_offset_;
+    user_req.len = read_async_info->user_len_;
+
+    // Update results in user_req.
+    user_req.result = req.result;
+    user_req.status = req.status;
+
+    read_async_info->buf_.Size(read_async_info->buf_.CurrentSize() +
+                               req.result.size());
+
+    size_t offset_advance_len = static_cast<size_t>(
+        /*offset_passed_by_user=*/read_async_info->user_offset_ -
+        /*aligned_offset=*/req.offset);
+
+    size_t res_len = 0;
+    if (req.status.ok() &&
+        offset_advance_len < read_async_info->buf_.CurrentSize()) {
+      res_len =
+          std::min(read_async_info->buf_.CurrentSize() - offset_advance_len,
+                   read_async_info->user_len_);
+      if (read_async_info->user_aligned_buf_ == nullptr) {
+        // Copy the data into user's scratch.
+// Clang analyzer assumes that it will take use_direct_io() == false in
+// ReadAsync and use_direct_io() == true in Callback which cannot be true.
+#ifndef __clang_analyzer__
+        read_async_info->buf_.Read(user_req.scratch, offset_advance_len,
+                                   res_len);
+#endif  // __clang_analyzer__
+      } else {
+        // Set aligned_buf provided by user without additional copy.
+        user_req.scratch =
+            read_async_info->buf_.BufferStart() + offset_advance_len;
+        read_async_info->user_aligned_buf_->reset(
+            read_async_info->buf_.Release());
+      }
+      user_req.result = Slice(user_req.scratch, res_len);
+    } else {
+      // Either req.status is not ok or data was not read.
+      user_req.result = Slice();
+    }
+    read_async_info->cb_(user_req, read_async_info->cb_arg_);
+  } else {
+    read_async_info->cb_(req, read_async_info->cb_arg_);
+  }
+
+  // Update stats and notify listeners.
+  if (stats_ != nullptr && file_read_hist_ != nullptr) {
+    // elapsed doesn't take into account delay and overwrite as StopWatch does
+    // in Read.
+    uint64_t elapsed = clock_->NowMicros() - read_async_info->start_time_;
+    file_read_hist_->Add(elapsed);
+  }
+  if (req.status.ok()) {
+    RecordInHistogram(stats_, ASYNC_READ_BYTES, req.result.size());
+  } else if (!req.status.IsAborted()) {
+    RecordTick(stats_, ASYNC_READ_ERROR_COUNT, 1);
+  }
+  if (ShouldNotifyListeners()) {
+    auto finish_ts = FileOperationInfo::FinishNow();
+    NotifyOnFileReadFinish(req.offset, req.result.size(),
+                           read_async_info->fs_start_ts_, finish_ts,
+                           req.status);
+  }
+  if (!req.status.ok()) {
+    NotifyOnIOError(req.status, FileOperationType::kRead, file_name(),
+                    req.result.size(), req.offset);
+  }
+  RecordIOStats(stats_, file_temperature_, is_last_level_, req.result.size());
+  delete read_async_info;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/random_access_file_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/random_access_file_reader.h
new file mode 100644
index 0000000000..ab4d1e7978
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/random_access_file_reader.h
@@ -0,0 +1,210 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <atomic>
+#include <sstream>
+#include <string>
+
+#include "env/file_system_tracer.h"
+#include "port/port.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/listener.h"
+#include "rocksdb/options.h"
+#include "rocksdb/rate_limiter.h"
+#include "util/aligned_buffer.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Statistics;
+class HistogramImpl;
+class SystemClock;
+
+using AlignedBuf = std::unique_ptr<char[]>;
+
+// Align the request r according to alignment and return the aligned result.
+FSReadRequest Align(const FSReadRequest& r, size_t alignment);
+
+// Try to merge src to dest if they have overlap.
+//
+// Each request represents an inclusive interval [offset, offset + len].
+// If the intervals have overlap, update offset and len to represent the
+// merged interval, and return true.
+// Otherwise, do nothing and return false.
+bool TryMerge(FSReadRequest* dest, const FSReadRequest& src);
+
+// RandomAccessFileReader is a wrapper on top of FSRandomAccessFile. It is
+// responsible for:
+// - Handling Buffered and Direct reads appropriately.
+// - Rate limiting compaction reads.
+// - Notifying any interested listeners on the completion of a read.
+// - Updating IO stats.
+class RandomAccessFileReader {
+ private:
+  void NotifyOnFileReadFinish(
+      uint64_t offset, size_t length,
+      const FileOperationInfo::StartTimePoint& start_ts,
+      const FileOperationInfo::FinishTimePoint& finish_ts,
+      const Status& status) const {
+    FileOperationInfo info(FileOperationType::kRead, file_name_, start_ts,
+                           finish_ts, status, file_temperature_);
+    info.offset = offset;
+    info.length = length;
+
+    for (auto& listener : listeners_) {
+      listener->OnFileReadFinish(info);
+    }
+    info.status.PermitUncheckedError();
+  }
+
+  void NotifyOnIOError(const IOStatus& io_status, FileOperationType operation,
+                       const std::string& file_path, size_t length,
+                       uint64_t offset) const {
+    if (listeners_.empty()) {
+      return;
+    }
+    IOErrorInfo io_error_info(io_status, operation, file_path, length, offset);
+
+    for (auto& listener : listeners_) {
+      listener->OnIOError(io_error_info);
+    }
+    io_status.PermitUncheckedError();
+  }
+
+
+  bool ShouldNotifyListeners() const { return !listeners_.empty(); }
+
+  FSRandomAccessFilePtr file_;
+  std::string file_name_;
+  SystemClock* clock_;
+  Statistics* stats_;
+  uint32_t hist_type_;
+  HistogramImpl* file_read_hist_;
+  RateLimiter* rate_limiter_;
+  std::vector<std::shared_ptr<EventListener>> listeners_;
+  const Temperature file_temperature_;
+  const bool is_last_level_;
+
+  struct ReadAsyncInfo {
+    ReadAsyncInfo(std::function<void(const FSReadRequest&, void*)> cb,
+                  void* cb_arg, uint64_t start_time)
+        : cb_(cb),
+          cb_arg_(cb_arg),
+          start_time_(start_time),
+          user_scratch_(nullptr),
+          user_aligned_buf_(nullptr),
+          user_offset_(0),
+          user_len_(0),
+          is_aligned_(false) {}
+
+    std::function<void(const FSReadRequest&, void*)> cb_;
+    void* cb_arg_;
+    uint64_t start_time_;
+    FileOperationInfo::StartTimePoint fs_start_ts_;
+    // Below fields stores the parameters passed by caller in case of direct_io.
+    char* user_scratch_;
+    AlignedBuf* user_aligned_buf_;
+    uint64_t user_offset_;
+    size_t user_len_;
+    Slice user_result_;
+    // Used in case of direct_io
+    AlignedBuffer buf_;
+    bool is_aligned_;
+  };
+
+ public:
+  explicit RandomAccessFileReader(
+      std::unique_ptr<FSRandomAccessFile>&& raf, const std::string& _file_name,
+      SystemClock* clock = nullptr,
+      const std::shared_ptr<IOTracer>& io_tracer = nullptr,
+      Statistics* stats = nullptr,
+      uint32_t hist_type = Histograms::HISTOGRAM_ENUM_MAX,
+      HistogramImpl* file_read_hist = nullptr,
+      RateLimiter* rate_limiter = nullptr,
+      const std::vector<std::shared_ptr<EventListener>>& listeners = {},
+      Temperature file_temperature = Temperature::kUnknown,
+      bool is_last_level = false)
+      : file_(std::move(raf), io_tracer, _file_name),
+        file_name_(std::move(_file_name)),
+        clock_(clock),
+        stats_(stats),
+        hist_type_(hist_type),
+        file_read_hist_(file_read_hist),
+        rate_limiter_(rate_limiter),
+        listeners_(),
+        file_temperature_(file_temperature),
+        is_last_level_(is_last_level) {
+    std::for_each(listeners.begin(), listeners.end(),
+                  [this](const std::shared_ptr<EventListener>& e) {
+                    if (e->ShouldBeNotifiedOnFileIO()) {
+                      listeners_.emplace_back(e);
+                    }
+                  });
+  }
+
+  static IOStatus Create(const std::shared_ptr<FileSystem>& fs,
+                         const std::string& fname, const FileOptions& file_opts,
+                         std::unique_ptr<RandomAccessFileReader>* reader,
+                         IODebugContext* dbg);
+  RandomAccessFileReader(const RandomAccessFileReader&) = delete;
+  RandomAccessFileReader& operator=(const RandomAccessFileReader&) = delete;
+
+  // In non-direct IO mode,
+  // 1. if using mmap, result is stored in a buffer other than scratch;
+  // 2. if not using mmap, result is stored in the buffer starting from scratch.
+  //
+  // In direct IO mode, an aligned buffer is allocated internally.
+  // 1. If aligned_buf is null, then results are copied to the buffer
+  // starting from scratch;
+  // 2. Otherwise, scratch is not used and can be null, the aligned_buf owns
+  // the internally allocated buffer on return, and the result refers to a
+  // region in aligned_buf.
+  //
+  // `rate_limiter_priority` is used to charge the internal rate limiter when
+  // enabled. The special value `Env::IO_TOTAL` makes this operation bypass the
+  // rate limiter.
+  IOStatus Read(const IOOptions& opts, uint64_t offset, size_t n, Slice* result,
+                char* scratch, AlignedBuf* aligned_buf,
+                Env::IOPriority rate_limiter_priority) const;
+
+  // REQUIRES:
+  // num_reqs > 0, reqs do not overlap, and offsets in reqs are increasing.
+  // In non-direct IO mode, aligned_buf should be null;
+  // In direct IO mode, aligned_buf stores the aligned buffer allocated inside
+  // MultiRead, the result Slices in reqs refer to aligned_buf.
+  //
+  // `rate_limiter_priority` will be used to charge the internal rate limiter.
+  // It is not yet supported so the client must provide the special value
+  // `Env::IO_TOTAL` to bypass the rate limiter.
+  IOStatus MultiRead(const IOOptions& opts, FSReadRequest* reqs,
+                     size_t num_reqs, AlignedBuf* aligned_buf,
+                     Env::IOPriority rate_limiter_priority) const;
+
+  IOStatus Prefetch(uint64_t offset, size_t n,
+                    const Env::IOPriority rate_limiter_priority) const {
+    IOOptions opts;
+    opts.rate_limiter_priority = rate_limiter_priority;
+    return file_->Prefetch(offset, n, opts, nullptr);
+  }
+
+  FSRandomAccessFile* file() { return file_.get(); }
+
+  const std::string& file_name() const { return file_name_; }
+
+  bool use_direct_io() const { return file_->use_direct_io(); }
+
+  IOStatus PrepareIOOptions(const ReadOptions& ro, IOOptions& opts) const;
+
+  IOStatus ReadAsync(FSReadRequest& req, const IOOptions& opts,
+                     std::function<void(const FSReadRequest&, void*)> cb,
+                     void* cb_arg, void** io_handle, IOHandleDeleter* del_fn,
+                     AlignedBuf* aligned_buf);
+
+  void ReadAsyncCallback(const FSReadRequest& req, void* cb_arg);
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/read_write_util.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/read_write_util.cc
new file mode 100644
index 0000000000..3617a35e33
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/read_write_util.cc
@@ -0,0 +1,33 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "file/read_write_util.h"
+
+#include <sstream>
+
+#include "test_util/sync_point.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+IOStatus NewWritableFile(FileSystem* fs, const std::string& fname,
+                         std::unique_ptr<FSWritableFile>* result,
+                         const FileOptions& options) {
+  TEST_SYNC_POINT_CALLBACK("NewWritableFile::FileOptions.temperature",
+                           const_cast<Temperature*>(&options.temperature));
+  IOStatus s = fs->NewWritableFile(fname, options, result, nullptr);
+  TEST_KILL_RANDOM_WITH_WEIGHT("NewWritableFile:0", REDUCE_ODDS2);
+  return s;
+}
+
+#ifndef NDEBUG
+bool IsFileSectorAligned(const size_t off, size_t sector_size) {
+  return off % sector_size == 0;
+}
+#endif  // NDEBUG
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/read_write_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/read_write_util.h
new file mode 100644
index 0000000000..9f034b705f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/read_write_util.h
@@ -0,0 +1,31 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <atomic>
+
+#include "file/sequence_file_reader.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Returns a WritableFile.
+//
+// env     : the Env.
+// fname   : the file name.
+// result  : output arg. A WritableFile based on `fname` returned.
+// options : the Env Options.
+extern IOStatus NewWritableFile(FileSystem* fs, const std::string& fname,
+                                std::unique_ptr<FSWritableFile>* result,
+                                const FileOptions& options);
+
+#ifndef NDEBUG
+bool IsFileSectorAligned(const size_t off, size_t sector_size);
+#endif  // NDEBUG
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/readahead_file_info.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/readahead_file_info.h
new file mode 100644
index 0000000000..f0208bf2dd
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/readahead_file_info.h
@@ -0,0 +1,33 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// struct ReadaheadFileInfo contains readahead information that is passed from
+// one file to another file per level during iterations. This information helps
+// iterators to carry forward the internal automatic prefetching readahead value
+// to next file during sequential reads instead of starting from the scratch.
+
+struct ReadaheadFileInfo {
+  struct ReadaheadInfo {
+    size_t readahead_size = 0;
+    int64_t num_file_reads = 0;
+  };
+
+  // Used by Data block iterators to update readahead info.
+  ReadaheadInfo data_block_readahead_info;
+
+  // Used by Index block iterators to update readahead info.
+  ReadaheadInfo index_block_readahead_info;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/readahead_raf.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/readahead_raf.cc
new file mode 100644
index 0000000000..dd09822e3e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/readahead_raf.cc
@@ -0,0 +1,169 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "file/readahead_raf.h"
+
+#include <algorithm>
+#include <mutex>
+
+#include "file/read_write_util.h"
+#include "rocksdb/file_system.h"
+#include "util/aligned_buffer.h"
+#include "util/rate_limiter_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+class ReadaheadRandomAccessFile : public FSRandomAccessFile {
+ public:
+  ReadaheadRandomAccessFile(std::unique_ptr<FSRandomAccessFile>&& file,
+                            size_t readahead_size)
+      : file_(std::move(file)),
+        alignment_(file_->GetRequiredBufferAlignment()),
+        readahead_size_(Roundup(readahead_size, alignment_)),
+        buffer_(),
+        buffer_offset_(0) {
+    buffer_.Alignment(alignment_);
+    buffer_.AllocateNewBuffer(readahead_size_);
+  }
+
+  ReadaheadRandomAccessFile(const ReadaheadRandomAccessFile&) = delete;
+
+  ReadaheadRandomAccessFile& operator=(const ReadaheadRandomAccessFile&) =
+      delete;
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override {
+    // Read-ahead only make sense if we have some slack left after reading
+    if (n + alignment_ >= readahead_size_) {
+      return file_->Read(offset, n, options, result, scratch, dbg);
+    }
+
+    std::unique_lock<std::mutex> lk(lock_);
+
+    size_t cached_len = 0;
+    // Check if there is a cache hit, meaning that [offset, offset + n) is
+    // either completely or partially in the buffer. If it's completely cached,
+    // including end of file case when offset + n is greater than EOF, then
+    // return.
+    if (TryReadFromCache(offset, n, &cached_len, scratch) &&
+        (cached_len == n || buffer_.CurrentSize() < readahead_size_)) {
+      // We read exactly what we needed, or we hit end of file - return.
+      *result = Slice(scratch, cached_len);
+      return IOStatus::OK();
+    }
+    size_t advanced_offset = static_cast<size_t>(offset + cached_len);
+    // In the case of cache hit advanced_offset is already aligned, means that
+    // chunk_offset equals to advanced_offset
+    size_t chunk_offset = TruncateToPageBoundary(alignment_, advanced_offset);
+
+    IOStatus s = ReadIntoBuffer(chunk_offset, readahead_size_, options, dbg);
+    if (s.ok()) {
+      // The data we need is now in cache, so we can safely read it
+      size_t remaining_len;
+      TryReadFromCache(advanced_offset, n - cached_len, &remaining_len,
+                       scratch + cached_len);
+      *result = Slice(scratch, cached_len + remaining_len);
+    }
+    return s;
+  }
+
+  IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& options,
+                    IODebugContext* dbg) override {
+    if (n < readahead_size_) {
+      // Don't allow smaller prefetches than the configured `readahead_size_`.
+      // `Read()` assumes a smaller prefetch buffer indicates EOF was reached.
+      return IOStatus::OK();
+    }
+
+    std::unique_lock<std::mutex> lk(lock_);
+
+    size_t offset_ = static_cast<size_t>(offset);
+    size_t prefetch_offset = TruncateToPageBoundary(alignment_, offset_);
+    if (prefetch_offset == buffer_offset_) {
+      return IOStatus::OK();
+    }
+    return ReadIntoBuffer(prefetch_offset,
+                          Roundup(offset_ + n, alignment_) - prefetch_offset,
+                          options, dbg);
+  }
+
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    return file_->GetUniqueId(id, max_size);
+  }
+
+  void Hint(AccessPattern pattern) override { file_->Hint(pattern); }
+
+  IOStatus InvalidateCache(size_t offset, size_t length) override {
+    std::unique_lock<std::mutex> lk(lock_);
+    buffer_.Clear();
+    return file_->InvalidateCache(offset, length);
+  }
+
+  bool use_direct_io() const override { return file_->use_direct_io(); }
+
+ private:
+  // Tries to read from buffer_ n bytes starting at offset. If anything was read
+  // from the cache, it sets cached_len to the number of bytes actually read,
+  // copies these number of bytes to scratch and returns true.
+  // If nothing was read sets cached_len to 0 and returns false.
+  bool TryReadFromCache(uint64_t offset, size_t n, size_t* cached_len,
+                        char* scratch) const {
+    if (offset < buffer_offset_ ||
+        offset >= buffer_offset_ + buffer_.CurrentSize()) {
+      *cached_len = 0;
+      return false;
+    }
+    uint64_t offset_in_buffer = offset - buffer_offset_;
+    *cached_len = std::min(
+        buffer_.CurrentSize() - static_cast<size_t>(offset_in_buffer), n);
+    memcpy(scratch, buffer_.BufferStart() + offset_in_buffer, *cached_len);
+    return true;
+  }
+
+  // Reads into buffer_ the next n bytes from file_ starting at offset.
+  // Can actually read less if EOF was reached.
+  // Returns the status of the read operastion on the file.
+  IOStatus ReadIntoBuffer(uint64_t offset, size_t n, const IOOptions& options,
+                          IODebugContext* dbg) const {
+    if (n > buffer_.Capacity()) {
+      n = buffer_.Capacity();
+    }
+    assert(IsFileSectorAligned(offset, alignment_));
+    assert(IsFileSectorAligned(n, alignment_));
+    Slice result;
+    IOStatus s =
+        file_->Read(offset, n, options, &result, buffer_.BufferStart(), dbg);
+    if (s.ok()) {
+      buffer_offset_ = offset;
+      buffer_.Size(result.size());
+      assert(result.size() == 0 || buffer_.BufferStart() == result.data());
+    }
+    return s;
+  }
+
+  const std::unique_ptr<FSRandomAccessFile> file_;
+  const size_t alignment_;
+  const size_t readahead_size_;
+
+  mutable std::mutex lock_;
+  // The buffer storing the prefetched data
+  mutable AlignedBuffer buffer_;
+  // The offset in file_, corresponding to data stored in buffer_
+  mutable uint64_t buffer_offset_;
+};
+}  // namespace
+
+std::unique_ptr<FSRandomAccessFile> NewReadaheadRandomAccessFile(
+    std::unique_ptr<FSRandomAccessFile>&& file, size_t readahead_size) {
+  std::unique_ptr<FSRandomAccessFile> result(
+      new ReadaheadRandomAccessFile(std::move(file), readahead_size));
+  return result;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/readahead_raf.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/readahead_raf.h
new file mode 100644
index 0000000000..dfaf2b4fa9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/readahead_raf.h
@@ -0,0 +1,29 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <memory>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+class FSRandomAccessFile;
+// This file provides the following main abstractions:
+// SequentialFileReader : wrapper over Env::SequentialFile
+// RandomAccessFileReader : wrapper over Env::RandomAccessFile
+// WritableFileWriter : wrapper over Env::WritableFile
+// In addition, it also exposed NewReadaheadRandomAccessFile, NewWritableFile,
+// and ReadOneLine primitives.
+
+// NewReadaheadRandomAccessFile provides a wrapper over RandomAccessFile to
+// always prefetch additional data with every read. This is mainly used in
+// Compaction Table Readers.
+std::unique_ptr<FSRandomAccessFile> NewReadaheadRandomAccessFile(
+    std::unique_ptr<FSRandomAccessFile>&& file, size_t readahead_size);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sequence_file_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sequence_file_reader.cc
new file mode 100644
index 0000000000..a753c1d098
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sequence_file_reader.cc
@@ -0,0 +1,320 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "file/sequence_file_reader.h"
+
+#include <algorithm>
+#include <mutex>
+
+#include "file/read_write_util.h"
+#include "monitoring/histogram.h"
+#include "monitoring/iostats_context_imp.h"
+#include "port/port.h"
+#include "test_util/sync_point.h"
+#include "util/aligned_buffer.h"
+#include "util/random.h"
+#include "util/rate_limiter_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+IOStatus SequentialFileReader::Create(
+    const std::shared_ptr<FileSystem>& fs, const std::string& fname,
+    const FileOptions& file_opts, std::unique_ptr<SequentialFileReader>* reader,
+    IODebugContext* dbg, RateLimiter* rate_limiter) {
+  std::unique_ptr<FSSequentialFile> file;
+  IOStatus io_s = fs->NewSequentialFile(fname, file_opts, &file, dbg);
+  if (io_s.ok()) {
+    reader->reset(new SequentialFileReader(std::move(file), fname, nullptr, {},
+                                           rate_limiter));
+  }
+  return io_s;
+}
+
+IOStatus SequentialFileReader::Read(size_t n, Slice* result, char* scratch,
+                                    Env::IOPriority rate_limiter_priority) {
+  IOStatus io_s;
+  if (use_direct_io()) {
+    //
+    //    |-offset_advance-|---bytes returned--|
+    //    |----------------------buf size-------------------------|
+    //    |                |                   |                  |
+    // aligned           offset          offset + n  Roundup(offset + n,
+    // offset                                             alignment)
+    //
+    size_t offset = offset_.fetch_add(n);
+    size_t alignment = file_->GetRequiredBufferAlignment();
+    size_t aligned_offset = TruncateToPageBoundary(alignment, offset);
+    size_t offset_advance = offset - aligned_offset;
+    size_t size = Roundup(offset + n, alignment) - aligned_offset;
+    size_t r = 0;
+    AlignedBuffer buf;
+    buf.Alignment(alignment);
+    buf.AllocateNewBuffer(size);
+
+    while (buf.CurrentSize() < size) {
+      size_t allowed;
+      if (rate_limiter_priority != Env::IO_TOTAL && rate_limiter_ != nullptr) {
+        allowed = rate_limiter_->RequestToken(
+            buf.Capacity() - buf.CurrentSize(), buf.Alignment(),
+            rate_limiter_priority, nullptr /* stats */,
+            RateLimiter::OpType::kRead);
+      } else {
+        assert(buf.CurrentSize() == 0);
+        allowed = size;
+      }
+
+      Slice tmp;
+      uint64_t orig_offset = 0;
+      FileOperationInfo::StartTimePoint start_ts;
+      if (ShouldNotifyListeners()) {
+        orig_offset = aligned_offset + buf.CurrentSize();
+        start_ts = FileOperationInfo::StartNow();
+      }
+      io_s = file_->PositionedRead(aligned_offset + buf.CurrentSize(), allowed,
+                                   IOOptions(), &tmp, buf.Destination(),
+                                   nullptr /* dbg */);
+      if (ShouldNotifyListeners()) {
+        auto finish_ts = FileOperationInfo::FinishNow();
+        NotifyOnFileReadFinish(orig_offset, tmp.size(), start_ts, finish_ts,
+                               io_s);
+      }
+      buf.Size(buf.CurrentSize() + tmp.size());
+      if (!io_s.ok() || tmp.size() < allowed) {
+        break;
+      }
+    }
+
+    if (io_s.ok() && offset_advance < buf.CurrentSize()) {
+      r = buf.Read(scratch, offset_advance,
+                   std::min(buf.CurrentSize() - offset_advance, n));
+    }
+    *result = Slice(scratch, r);
+  } else {
+    // To be paranoid, modify scratch a little bit, so in case underlying
+    // FileSystem doesn't fill the buffer but return success and `scratch`
+    // returns contains a previous block, returned value will not pass
+    // checksum.
+    // It's hard to find useful byte for direct I/O case, so we skip it.
+    if (n > 0 && scratch != nullptr) {
+      scratch[0]++;
+    }
+
+    size_t read = 0;
+    while (read < n) {
+      size_t allowed;
+      if (rate_limiter_priority != Env::IO_TOTAL && rate_limiter_ != nullptr) {
+        allowed = rate_limiter_->RequestToken(
+            n - read, 0 /* alignment */, rate_limiter_priority,
+            nullptr /* stats */, RateLimiter::OpType::kRead);
+      } else {
+        allowed = n;
+      }
+      FileOperationInfo::StartTimePoint start_ts;
+      if (ShouldNotifyListeners()) {
+        start_ts = FileOperationInfo::StartNow();
+      }
+      Slice tmp;
+      io_s = file_->Read(allowed, IOOptions(), &tmp, scratch + read,
+                         nullptr /* dbg */);
+      if (ShouldNotifyListeners()) {
+        auto finish_ts = FileOperationInfo::FinishNow();
+        size_t offset = offset_.fetch_add(tmp.size());
+        NotifyOnFileReadFinish(offset, tmp.size(), start_ts, finish_ts, io_s);
+      }
+      read += tmp.size();
+      if (!io_s.ok() || tmp.size() < allowed) {
+        break;
+      }
+    }
+    *result = Slice(scratch, read);
+  }
+  IOSTATS_ADD(bytes_read, result->size());
+  return io_s;
+}
+
+IOStatus SequentialFileReader::Skip(uint64_t n) {
+  if (use_direct_io()) {
+    offset_ += static_cast<size_t>(n);
+    return IOStatus::OK();
+  }
+  return file_->Skip(n);
+}
+
+namespace {
+// This class wraps a SequentialFile, exposing same API, with the differenece
+// of being able to prefetch up to readahead_size bytes and then serve them
+// from memory, avoiding the entire round-trip if, for example, the data for the
+// file is actually remote.
+class ReadaheadSequentialFile : public FSSequentialFile {
+ public:
+  ReadaheadSequentialFile(std::unique_ptr<FSSequentialFile>&& file,
+                          size_t readahead_size)
+      : file_(std::move(file)),
+        alignment_(file_->GetRequiredBufferAlignment()),
+        readahead_size_(Roundup(readahead_size, alignment_)),
+        buffer_(),
+        buffer_offset_(0),
+        read_offset_(0) {
+    buffer_.Alignment(alignment_);
+    buffer_.AllocateNewBuffer(readahead_size_);
+  }
+
+  ReadaheadSequentialFile(const ReadaheadSequentialFile&) = delete;
+
+  ReadaheadSequentialFile& operator=(const ReadaheadSequentialFile&) = delete;
+
+  IOStatus Read(size_t n, const IOOptions& opts, Slice* result, char* scratch,
+                IODebugContext* dbg) override {
+    std::unique_lock<std::mutex> lk(lock_);
+
+    size_t cached_len = 0;
+    // Check if there is a cache hit, meaning that [offset, offset + n) is
+    // either completely or partially in the buffer. If it's completely cached,
+    // including end of file case when offset + n is greater than EOF, then
+    // return.
+    if (TryReadFromCache(n, &cached_len, scratch) &&
+        (cached_len == n || buffer_.CurrentSize() < readahead_size_)) {
+      // We read exactly what we needed, or we hit end of file - return.
+      *result = Slice(scratch, cached_len);
+      return IOStatus::OK();
+    }
+    n -= cached_len;
+
+    IOStatus s;
+    // Read-ahead only make sense if we have some slack left after reading
+    if (n + alignment_ >= readahead_size_) {
+      s = file_->Read(n, opts, result, scratch + cached_len, dbg);
+      if (s.ok()) {
+        read_offset_ += result->size();
+        *result = Slice(scratch, cached_len + result->size());
+      }
+      buffer_.Clear();
+      return s;
+    }
+
+    s = ReadIntoBuffer(readahead_size_, opts, dbg);
+    if (s.ok()) {
+      // The data we need is now in cache, so we can safely read it
+      size_t remaining_len;
+      TryReadFromCache(n, &remaining_len, scratch + cached_len);
+      *result = Slice(scratch, cached_len + remaining_len);
+    }
+    return s;
+  }
+
+  IOStatus Skip(uint64_t n) override {
+    std::unique_lock<std::mutex> lk(lock_);
+    IOStatus s = IOStatus::OK();
+    // First check if we need to skip already cached data
+    if (buffer_.CurrentSize() > 0) {
+      // Do we need to skip beyond cached data?
+      if (read_offset_ + n >= buffer_offset_ + buffer_.CurrentSize()) {
+        // Yes. Skip whaterver is in memory and adjust offset accordingly
+        n -= buffer_offset_ + buffer_.CurrentSize() - read_offset_;
+        read_offset_ = buffer_offset_ + buffer_.CurrentSize();
+      } else {
+        // No. The entire section to be skipped is entirely i cache.
+        read_offset_ += n;
+        n = 0;
+      }
+    }
+    if (n > 0) {
+      // We still need to skip more, so call the file API for skipping
+      s = file_->Skip(n);
+      if (s.ok()) {
+        read_offset_ += n;
+      }
+      buffer_.Clear();
+    }
+    return s;
+  }
+
+  IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& opts,
+                          Slice* result, char* scratch,
+                          IODebugContext* dbg) override {
+    return file_->PositionedRead(offset, n, opts, result, scratch, dbg);
+  }
+
+  IOStatus InvalidateCache(size_t offset, size_t length) override {
+    std::unique_lock<std::mutex> lk(lock_);
+    buffer_.Clear();
+    return file_->InvalidateCache(offset, length);
+  }
+
+  bool use_direct_io() const override { return file_->use_direct_io(); }
+
+ private:
+  // Tries to read from buffer_ n bytes. If anything was read from the cache, it
+  // sets cached_len to the number of bytes actually read, copies these number
+  // of bytes to scratch and returns true.
+  // If nothing was read sets cached_len to 0 and returns false.
+  bool TryReadFromCache(size_t n, size_t* cached_len, char* scratch) {
+    if (read_offset_ < buffer_offset_ ||
+        read_offset_ >= buffer_offset_ + buffer_.CurrentSize()) {
+      *cached_len = 0;
+      return false;
+    }
+    uint64_t offset_in_buffer = read_offset_ - buffer_offset_;
+    *cached_len = std::min(
+        buffer_.CurrentSize() - static_cast<size_t>(offset_in_buffer), n);
+    memcpy(scratch, buffer_.BufferStart() + offset_in_buffer, *cached_len);
+    read_offset_ += *cached_len;
+    return true;
+  }
+
+  // Reads into buffer_ the next n bytes from file_.
+  // Can actually read less if EOF was reached.
+  // Returns the status of the read operastion on the file.
+  IOStatus ReadIntoBuffer(size_t n, const IOOptions& opts,
+                          IODebugContext* dbg) {
+    if (n > buffer_.Capacity()) {
+      n = buffer_.Capacity();
+    }
+    assert(IsFileSectorAligned(n, alignment_));
+    Slice result;
+    IOStatus s = file_->Read(n, opts, &result, buffer_.BufferStart(), dbg);
+    if (s.ok()) {
+      buffer_offset_ = read_offset_;
+      buffer_.Size(result.size());
+      assert(result.size() == 0 || buffer_.BufferStart() == result.data());
+    }
+    return s;
+  }
+
+  const std::unique_ptr<FSSequentialFile> file_;
+  const size_t alignment_;
+  const size_t readahead_size_;
+
+  std::mutex lock_;
+  // The buffer storing the prefetched data
+  AlignedBuffer buffer_;
+  // The offset in file_, corresponding to data stored in buffer_
+  uint64_t buffer_offset_;
+  // The offset up to which data was read from file_. In fact, it can be larger
+  // than the actual file size, since the file_->Skip(n) call doesn't return the
+  // actual number of bytes that were skipped, which can be less than n.
+  // This is not a problemm since read_offset_ is monotonically increasing and
+  // its only use is to figure out if next piece of data should be read from
+  // buffer_ or file_ directly.
+  uint64_t read_offset_;
+};
+}  // namespace
+
+std::unique_ptr<FSSequentialFile>
+SequentialFileReader::NewReadaheadSequentialFile(
+    std::unique_ptr<FSSequentialFile>&& file, size_t readahead_size) {
+  if (file->GetRequiredBufferAlignment() >= readahead_size) {
+    // Short-circuit and return the original file if readahead_size is
+    // too small and hence doesn't make sense to be used for prefetching.
+    return std::move(file);
+  }
+  std::unique_ptr<FSSequentialFile> result(
+      new ReadaheadSequentialFile(std::move(file), readahead_size));
+  return result;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sequence_file_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sequence_file_reader.h
new file mode 100644
index 0000000000..14350e8de4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sequence_file_reader.h
@@ -0,0 +1,119 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <atomic>
+#include <string>
+
+#include "env/file_system_tracer.h"
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// SequentialFileReader is a wrapper on top of Env::SequentialFile. It handles
+// Buffered (i.e when page cache is enabled) and Direct (with O_DIRECT / page
+// cache disabled) reads appropriately, and also updates the IO stats.
+class SequentialFileReader {
+ private:
+  void NotifyOnFileReadFinish(
+      uint64_t offset, size_t length,
+      const FileOperationInfo::StartTimePoint& start_ts,
+      const FileOperationInfo::FinishTimePoint& finish_ts,
+      const Status& status) const {
+    FileOperationInfo info(FileOperationType::kRead, file_name_, start_ts,
+                           finish_ts, status);
+    info.offset = offset;
+    info.length = length;
+
+    for (auto& listener : listeners_) {
+      listener->OnFileReadFinish(info);
+    }
+    info.status.PermitUncheckedError();
+  }
+
+  void AddFileIOListeners(
+      const std::vector<std::shared_ptr<EventListener>>& listeners) {
+    std::for_each(listeners.begin(), listeners.end(),
+                  [this](const std::shared_ptr<EventListener>& e) {
+                    if (e->ShouldBeNotifiedOnFileIO()) {
+                      listeners_.emplace_back(e);
+                    }
+                  });
+  }
+
+  bool ShouldNotifyListeners() const { return !listeners_.empty(); }
+
+  std::string file_name_;
+  FSSequentialFilePtr file_;
+  std::atomic<size_t> offset_{0};  // read offset
+  std::vector<std::shared_ptr<EventListener>> listeners_{};
+  RateLimiter* rate_limiter_;
+
+ public:
+  explicit SequentialFileReader(
+      std::unique_ptr<FSSequentialFile>&& _file, const std::string& _file_name,
+      const std::shared_ptr<IOTracer>& io_tracer = nullptr,
+      const std::vector<std::shared_ptr<EventListener>>& listeners = {},
+      RateLimiter* rate_limiter =
+          nullptr)  // TODO: migrate call sites to provide rate limiter
+      : file_name_(_file_name),
+        file_(std::move(_file), io_tracer, _file_name),
+        listeners_(),
+        rate_limiter_(rate_limiter) {
+    AddFileIOListeners(listeners);
+  }
+
+  explicit SequentialFileReader(
+      std::unique_ptr<FSSequentialFile>&& _file, const std::string& _file_name,
+      size_t _readahead_size,
+      const std::shared_ptr<IOTracer>& io_tracer = nullptr,
+      const std::vector<std::shared_ptr<EventListener>>& listeners = {},
+      RateLimiter* rate_limiter =
+          nullptr)  // TODO: migrate call sites to provide rate limiter
+      : file_name_(_file_name),
+        file_(NewReadaheadSequentialFile(std::move(_file), _readahead_size),
+              io_tracer, _file_name),
+        listeners_(),
+        rate_limiter_(rate_limiter) {
+    AddFileIOListeners(listeners);
+  }
+  static IOStatus Create(const std::shared_ptr<FileSystem>& fs,
+                         const std::string& fname, const FileOptions& file_opts,
+                         std::unique_ptr<SequentialFileReader>* reader,
+                         IODebugContext* dbg, RateLimiter* rate_limiter);
+
+  SequentialFileReader(const SequentialFileReader&) = delete;
+  SequentialFileReader& operator=(const SequentialFileReader&) = delete;
+
+  // `rate_limiter_priority` is used to charge the internal rate limiter when
+  // enabled. The special value `Env::IO_TOTAL` makes this operation bypass the
+  // rate limiter. The amount charged to the internal rate limiter is n, even
+  // when less than n bytes are actually read (e.g. at end of file). To avoid
+  // overcharging the rate limiter, the caller can use file size to cap n to
+  // read until end of file.
+  IOStatus Read(size_t n, Slice* result, char* scratch,
+                Env::IOPriority rate_limiter_priority);
+
+  IOStatus Skip(uint64_t n);
+
+  FSSequentialFile* file() { return file_.get(); }
+
+  std::string file_name() { return file_name_; }
+
+  bool use_direct_io() const { return file_->use_direct_io(); }
+
+ private:
+  // NewReadaheadSequentialFile provides a wrapper over SequentialFile to
+  // always prefetch additional data with every read.
+  static std::unique_ptr<FSSequentialFile> NewReadaheadSequentialFile(
+      std::unique_ptr<FSSequentialFile>&& file, size_t readahead_size);
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sst_file_manager_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sst_file_manager_impl.cc
new file mode 100644
index 0000000000..459ea36cdb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sst_file_manager_impl.cc
@@ -0,0 +1,507 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "file/sst_file_manager_impl.h"
+
+#include <cinttypes>
+#include <vector>
+
+#include "db/db_impl/db_impl.h"
+#include "logging/logging.h"
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/sst_file_manager.h"
+#include "test_util/sync_point.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+SstFileManagerImpl::SstFileManagerImpl(
+    const std::shared_ptr<SystemClock>& clock,
+    const std::shared_ptr<FileSystem>& fs,
+    const std::shared_ptr<Logger>& logger, int64_t rate_bytes_per_sec,
+    double max_trash_db_ratio, uint64_t bytes_max_delete_chunk)
+    : clock_(clock),
+      fs_(fs),
+      logger_(logger),
+      total_files_size_(0),
+      compaction_buffer_size_(0),
+      cur_compactions_reserved_size_(0),
+      max_allowed_space_(0),
+      delete_scheduler_(clock_.get(), fs_.get(), rate_bytes_per_sec,
+                        logger.get(), this, max_trash_db_ratio,
+                        bytes_max_delete_chunk),
+      cv_(&mu_),
+      closing_(false),
+      bg_thread_(nullptr),
+      reserved_disk_buffer_(0),
+      free_space_trigger_(0),
+      cur_instance_(nullptr) {}
+
+SstFileManagerImpl::~SstFileManagerImpl() {
+  Close();
+  bg_err_.PermitUncheckedError();
+}
+
+void SstFileManagerImpl::Close() {
+  {
+    MutexLock l(&mu_);
+    if (closing_) {
+      return;
+    }
+    closing_ = true;
+    cv_.SignalAll();
+  }
+  if (bg_thread_) {
+    bg_thread_->join();
+  }
+}
+
+Status SstFileManagerImpl::OnAddFile(const std::string& file_path) {
+  uint64_t file_size;
+  Status s = fs_->GetFileSize(file_path, IOOptions(), &file_size, nullptr);
+  if (s.ok()) {
+    MutexLock l(&mu_);
+    OnAddFileImpl(file_path, file_size);
+  }
+  TEST_SYNC_POINT_CALLBACK("SstFileManagerImpl::OnAddFile",
+                           const_cast<std::string*>(&file_path));
+  return s;
+}
+
+Status SstFileManagerImpl::OnAddFile(const std::string& file_path,
+                                     uint64_t file_size) {
+  MutexLock l(&mu_);
+  OnAddFileImpl(file_path, file_size);
+  TEST_SYNC_POINT_CALLBACK("SstFileManagerImpl::OnAddFile",
+                           const_cast<std::string*>(&file_path));
+  return Status::OK();
+}
+
+Status SstFileManagerImpl::OnDeleteFile(const std::string& file_path) {
+  {
+    MutexLock l(&mu_);
+    OnDeleteFileImpl(file_path);
+  }
+  TEST_SYNC_POINT_CALLBACK("SstFileManagerImpl::OnDeleteFile",
+                           const_cast<std::string*>(&file_path));
+  return Status::OK();
+}
+
+void SstFileManagerImpl::OnCompactionCompletion(Compaction* c) {
+  MutexLock l(&mu_);
+  uint64_t size_added_by_compaction = 0;
+  for (size_t i = 0; i < c->num_input_levels(); i++) {
+    for (size_t j = 0; j < c->num_input_files(i); j++) {
+      FileMetaData* filemeta = c->input(i, j);
+      size_added_by_compaction += filemeta->fd.GetFileSize();
+    }
+  }
+  cur_compactions_reserved_size_ -= size_added_by_compaction;
+}
+
+Status SstFileManagerImpl::OnMoveFile(const std::string& old_path,
+                                      const std::string& new_path,
+                                      uint64_t* file_size) {
+  {
+    MutexLock l(&mu_);
+    if (file_size != nullptr) {
+      *file_size = tracked_files_[old_path];
+    }
+    OnAddFileImpl(new_path, tracked_files_[old_path]);
+    OnDeleteFileImpl(old_path);
+  }
+  TEST_SYNC_POINT("SstFileManagerImpl::OnMoveFile");
+  return Status::OK();
+}
+
+void SstFileManagerImpl::SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) {
+  MutexLock l(&mu_);
+  max_allowed_space_ = max_allowed_space;
+}
+
+void SstFileManagerImpl::SetCompactionBufferSize(
+    uint64_t compaction_buffer_size) {
+  MutexLock l(&mu_);
+  compaction_buffer_size_ = compaction_buffer_size;
+}
+
+bool SstFileManagerImpl::IsMaxAllowedSpaceReached() {
+  MutexLock l(&mu_);
+  if (max_allowed_space_ <= 0) {
+    return false;
+  }
+  return total_files_size_ >= max_allowed_space_;
+}
+
+bool SstFileManagerImpl::IsMaxAllowedSpaceReachedIncludingCompactions() {
+  MutexLock l(&mu_);
+  if (max_allowed_space_ <= 0) {
+    return false;
+  }
+  return total_files_size_ + cur_compactions_reserved_size_ >=
+         max_allowed_space_;
+}
+
+bool SstFileManagerImpl::EnoughRoomForCompaction(
+    ColumnFamilyData* cfd, const std::vector<CompactionInputFiles>& inputs,
+    const Status& bg_error) {
+  MutexLock l(&mu_);
+  uint64_t size_added_by_compaction = 0;
+  // First check if we even have the space to do the compaction
+  for (size_t i = 0; i < inputs.size(); i++) {
+    for (size_t j = 0; j < inputs[i].size(); j++) {
+      FileMetaData* filemeta = inputs[i][j];
+      size_added_by_compaction += filemeta->fd.GetFileSize();
+    }
+  }
+
+  // Update cur_compactions_reserved_size_ so concurrent compaction
+  // don't max out space
+  size_t needed_headroom = cur_compactions_reserved_size_ +
+                           size_added_by_compaction + compaction_buffer_size_;
+  if (max_allowed_space_ != 0 &&
+      (needed_headroom + total_files_size_ > max_allowed_space_)) {
+    return false;
+  }
+
+  // Implement more aggressive checks only if this DB instance has already
+  // seen a NoSpace() error. This is tin order to contain a single potentially
+  // misbehaving DB instance and prevent it from slowing down compactions of
+  // other DB instances
+  if (bg_error.IsNoSpace() && CheckFreeSpace()) {
+    auto fn =
+        TableFileName(cfd->ioptions()->cf_paths, inputs[0][0]->fd.GetNumber(),
+                      inputs[0][0]->fd.GetPathId());
+    uint64_t free_space = 0;
+    Status s = fs_->GetFreeSpace(fn, IOOptions(), &free_space, nullptr);
+    s.PermitUncheckedError();  // TODO: Check the status
+    // needed_headroom is based on current size reserved by compactions,
+    // minus any files created by running compactions as they would count
+    // against the reserved size. If user didn't specify any compaction
+    // buffer, add reserved_disk_buffer_ that's calculated by default so the
+    // compaction doesn't end up leaving nothing for logs and flush SSTs
+    if (compaction_buffer_size_ == 0) {
+      needed_headroom += reserved_disk_buffer_;
+    }
+    if (free_space < needed_headroom + size_added_by_compaction) {
+      // We hit the condition of not enough disk space
+      ROCKS_LOG_ERROR(logger_,
+                      "free space [%" PRIu64
+                      " bytes] is less than "
+                      "needed headroom [%" ROCKSDB_PRIszt " bytes]\n",
+                      free_space, needed_headroom);
+      return false;
+    }
+  }
+
+  cur_compactions_reserved_size_ += size_added_by_compaction;
+  // Take a snapshot of cur_compactions_reserved_size_ for when we encounter
+  // a NoSpace error.
+  free_space_trigger_ = cur_compactions_reserved_size_;
+  return true;
+}
+
+uint64_t SstFileManagerImpl::GetCompactionsReservedSize() {
+  MutexLock l(&mu_);
+  return cur_compactions_reserved_size_;
+}
+
+uint64_t SstFileManagerImpl::GetTotalSize() {
+  MutexLock l(&mu_);
+  return total_files_size_;
+}
+
+std::unordered_map<std::string, uint64_t>
+SstFileManagerImpl::GetTrackedFiles() {
+  MutexLock l(&mu_);
+  return tracked_files_;
+}
+
+int64_t SstFileManagerImpl::GetDeleteRateBytesPerSecond() {
+  return delete_scheduler_.GetRateBytesPerSecond();
+}
+
+void SstFileManagerImpl::SetDeleteRateBytesPerSecond(int64_t delete_rate) {
+  return delete_scheduler_.SetRateBytesPerSecond(delete_rate);
+}
+
+double SstFileManagerImpl::GetMaxTrashDBRatio() {
+  return delete_scheduler_.GetMaxTrashDBRatio();
+}
+
+void SstFileManagerImpl::SetMaxTrashDBRatio(double r) {
+  return delete_scheduler_.SetMaxTrashDBRatio(r);
+}
+
+uint64_t SstFileManagerImpl::GetTotalTrashSize() {
+  return delete_scheduler_.GetTotalTrashSize();
+}
+
+void SstFileManagerImpl::ReserveDiskBuffer(uint64_t size,
+                                           const std::string& path) {
+  MutexLock l(&mu_);
+
+  reserved_disk_buffer_ += size;
+  if (path_.empty()) {
+    path_ = path;
+  }
+}
+
+void SstFileManagerImpl::ClearError() {
+  while (true) {
+    MutexLock l(&mu_);
+
+    if (error_handler_list_.empty() || closing_) {
+      return;
+    }
+
+    uint64_t free_space = 0;
+    Status s = fs_->GetFreeSpace(path_, IOOptions(), &free_space, nullptr);
+    free_space = max_allowed_space_ > 0
+                     ? std::min(max_allowed_space_, free_space)
+                     : free_space;
+    if (s.ok()) {
+      // In case of multi-DB instances, some of them may have experienced a
+      // soft error and some a hard error. In the SstFileManagerImpl, a hard
+      // error will basically override previously reported soft errors. Once
+      // we clear the hard error, we don't keep track of previous errors for
+      // now
+      if (bg_err_.severity() == Status::Severity::kHardError) {
+        if (free_space < reserved_disk_buffer_) {
+          ROCKS_LOG_ERROR(logger_,
+                          "free space [%" PRIu64
+                          " bytes] is less than "
+                          "required disk buffer [%" PRIu64 " bytes]\n",
+                          free_space, reserved_disk_buffer_);
+          ROCKS_LOG_ERROR(logger_, "Cannot clear hard error\n");
+          s = Status::NoSpace();
+        }
+      } else if (bg_err_.severity() == Status::Severity::kSoftError) {
+        if (free_space < free_space_trigger_) {
+          ROCKS_LOG_WARN(logger_,
+                         "free space [%" PRIu64
+                         " bytes] is less than "
+                         "free space for compaction trigger [%" PRIu64
+                         " bytes]\n",
+                         free_space, free_space_trigger_);
+          ROCKS_LOG_WARN(logger_, "Cannot clear soft error\n");
+          s = Status::NoSpace();
+        }
+      }
+    }
+
+    // Someone could have called CancelErrorRecovery() and the list could have
+    // become empty, so check again here
+    if (s.ok()) {
+      assert(!error_handler_list_.empty());
+      auto error_handler = error_handler_list_.front();
+      // Since we will release the mutex, set cur_instance_ to signal to the
+      // shutdown thread, if it calls // CancelErrorRecovery() the meantime,
+      // to indicate that this DB instance is busy. The DB instance is
+      // guaranteed to not be deleted before RecoverFromBGError() returns,
+      // since the ErrorHandler::recovery_in_prog_ flag would be true
+      cur_instance_ = error_handler;
+      mu_.Unlock();
+      s = error_handler->RecoverFromBGError();
+      TEST_SYNC_POINT("SstFileManagerImpl::ErrorCleared");
+      mu_.Lock();
+      // The DB instance might have been deleted while we were
+      // waiting for the mutex, so check cur_instance_ to make sure its
+      // still non-null
+      if (cur_instance_) {
+        // Check for error again, since the instance may have recovered but
+        // immediately got another error. If that's the case, and the new
+        // error is also a NoSpace() non-fatal error, leave the instance in
+        // the list
+        Status err = cur_instance_->GetBGError();
+        if (s.ok() && err.subcode() == IOStatus::SubCode::kNoSpace &&
+            err.severity() < Status::Severity::kFatalError) {
+          s = err;
+        }
+        cur_instance_ = nullptr;
+      }
+
+      if (s.ok() || s.IsShutdownInProgress() ||
+          (!s.ok() && s.severity() >= Status::Severity::kFatalError)) {
+        // If shutdown is in progress, abandon this handler instance
+        // and continue with the others
+        error_handler_list_.pop_front();
+      }
+    }
+
+    if (!error_handler_list_.empty()) {
+      // If there are more instances to be recovered, reschedule after 5
+      // seconds
+      int64_t wait_until = clock_->NowMicros() + 5000000;
+      cv_.TimedWait(wait_until);
+    }
+
+    // Check again for error_handler_list_ empty, as a DB instance shutdown
+    // could have removed it from the queue while we were in timed wait
+    if (error_handler_list_.empty()) {
+      ROCKS_LOG_INFO(logger_, "Clearing error\n");
+      bg_err_ = Status::OK();
+      return;
+    }
+  }
+}
+
+void SstFileManagerImpl::StartErrorRecovery(ErrorHandler* handler,
+                                            Status bg_error) {
+  MutexLock l(&mu_);
+  if (bg_error.severity() == Status::Severity::kSoftError) {
+    if (bg_err_.ok()) {
+      // Setting bg_err_ basically means we're in degraded mode
+      // Assume that all pending compactions will fail similarly. The trigger
+      // for clearing this condition is set to current compaction reserved
+      // size, so we stop checking disk space available in
+      // EnoughRoomForCompaction once this much free space is available
+      bg_err_ = bg_error;
+    }
+  } else if (bg_error.severity() == Status::Severity::kHardError) {
+    bg_err_ = bg_error;
+  } else {
+    assert(false);
+  }
+
+  // If this is the first instance of this error, kick of a thread to poll
+  // and recover from this condition
+  if (error_handler_list_.empty()) {
+    error_handler_list_.push_back(handler);
+    // Release lock before calling join. Its ok to do so because
+    // error_handler_list_ is now non-empty, so no other invocation of this
+    // function will execute this piece of code
+    mu_.Unlock();
+    if (bg_thread_) {
+      bg_thread_->join();
+    }
+    // Start a new thread. The previous one would have exited.
+    bg_thread_.reset(new port::Thread(&SstFileManagerImpl::ClearError, this));
+    mu_.Lock();
+  } else {
+    // Check if this DB instance is already in the list
+    for (auto iter = error_handler_list_.begin();
+         iter != error_handler_list_.end(); ++iter) {
+      if ((*iter) == handler) {
+        return;
+      }
+    }
+    error_handler_list_.push_back(handler);
+  }
+}
+
+bool SstFileManagerImpl::CancelErrorRecovery(ErrorHandler* handler) {
+  MutexLock l(&mu_);
+
+  if (cur_instance_ == handler) {
+    // This instance is currently busy attempting to recover
+    // Nullify it so the recovery thread doesn't attempt to access it again
+    cur_instance_ = nullptr;
+    return false;
+  }
+
+  for (auto iter = error_handler_list_.begin();
+       iter != error_handler_list_.end(); ++iter) {
+    if ((*iter) == handler) {
+      error_handler_list_.erase(iter);
+      return true;
+    }
+  }
+  return false;
+}
+
+Status SstFileManagerImpl::ScheduleFileDeletion(const std::string& file_path,
+                                                const std::string& path_to_sync,
+                                                const bool force_bg) {
+  TEST_SYNC_POINT_CALLBACK("SstFileManagerImpl::ScheduleFileDeletion",
+                           const_cast<std::string*>(&file_path));
+  return delete_scheduler_.DeleteFile(file_path, path_to_sync, force_bg);
+}
+
+void SstFileManagerImpl::WaitForEmptyTrash() {
+  delete_scheduler_.WaitForEmptyTrash();
+}
+
+void SstFileManagerImpl::OnAddFileImpl(const std::string& file_path,
+                                       uint64_t file_size) {
+  auto tracked_file = tracked_files_.find(file_path);
+  if (tracked_file != tracked_files_.end()) {
+    // File was added before, we will just update the size
+    total_files_size_ -= tracked_file->second;
+    total_files_size_ += file_size;
+    cur_compactions_reserved_size_ -= file_size;
+  } else {
+    total_files_size_ += file_size;
+  }
+  tracked_files_[file_path] = file_size;
+}
+
+void SstFileManagerImpl::OnDeleteFileImpl(const std::string& file_path) {
+  auto tracked_file = tracked_files_.find(file_path);
+  if (tracked_file == tracked_files_.end()) {
+    // File is not tracked
+    return;
+  }
+
+  total_files_size_ -= tracked_file->second;
+  tracked_files_.erase(tracked_file);
+}
+
+SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<Logger> info_log,
+                                  std::string trash_dir,
+                                  int64_t rate_bytes_per_sec,
+                                  bool delete_existing_trash, Status* status,
+                                  double max_trash_db_ratio,
+                                  uint64_t bytes_max_delete_chunk) {
+  const auto& fs = env->GetFileSystem();
+  return NewSstFileManager(env, fs, info_log, trash_dir, rate_bytes_per_sec,
+                           delete_existing_trash, status, max_trash_db_ratio,
+                           bytes_max_delete_chunk);
+}
+
+SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<FileSystem> fs,
+                                  std::shared_ptr<Logger> info_log,
+                                  const std::string& trash_dir,
+                                  int64_t rate_bytes_per_sec,
+                                  bool delete_existing_trash, Status* status,
+                                  double max_trash_db_ratio,
+                                  uint64_t bytes_max_delete_chunk) {
+  const auto& clock = env->GetSystemClock();
+  SstFileManagerImpl* res =
+      new SstFileManagerImpl(clock, fs, info_log, rate_bytes_per_sec,
+                             max_trash_db_ratio, bytes_max_delete_chunk);
+
+  // trash_dir is deprecated and not needed anymore, but if user passed it
+  // we will still remove files in it.
+  Status s = Status::OK();
+  if (delete_existing_trash && trash_dir != "") {
+    std::vector<std::string> files_in_trash;
+    s = fs->GetChildren(trash_dir, IOOptions(), &files_in_trash, nullptr);
+    if (s.ok()) {
+      for (const std::string& trash_file : files_in_trash) {
+        std::string path_in_trash = trash_dir + "/" + trash_file;
+        res->OnAddFile(path_in_trash);
+        Status file_delete =
+            res->ScheduleFileDeletion(path_in_trash, trash_dir);
+        if (s.ok() && !file_delete.ok()) {
+          s = file_delete;
+        }
+      }
+    }
+  }
+
+  if (status) {
+    *status = s;
+  } else {
+    // No one passed us a Status, so they must not care about the error...
+    s.PermitUncheckedError();
+  }
+
+  return res;
+}
+
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sst_file_manager_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sst_file_manager_impl.h
new file mode 100644
index 0000000000..e85376e5ce
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/sst_file_manager_impl.h
@@ -0,0 +1,193 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <string>
+
+#include "db/compaction/compaction.h"
+#include "file/delete_scheduler.h"
+#include "port/port.h"
+#include "rocksdb/sst_file_manager.h"
+
+namespace ROCKSDB_NAMESPACE {
+class ErrorHandler;
+class FileSystem;
+class SystemClock;
+class Logger;
+
+// SstFileManager is used to track SST and blob files in the DB and control
+// their deletion rate. All SstFileManager public functions are thread-safe.
+class SstFileManagerImpl : public SstFileManager {
+ public:
+  explicit SstFileManagerImpl(const std::shared_ptr<SystemClock>& clock,
+                              const std::shared_ptr<FileSystem>& fs,
+                              const std::shared_ptr<Logger>& logger,
+                              int64_t rate_bytes_per_sec,
+                              double max_trash_db_ratio,
+                              uint64_t bytes_max_delete_chunk);
+
+  ~SstFileManagerImpl();
+
+  // DB will call OnAddFile whenever a new sst/blob file is added.
+  Status OnAddFile(const std::string& file_path);
+
+  // Overload where size of the file is provided by the caller rather than
+  // queried from the filesystem. This is an optimization.
+  Status OnAddFile(const std::string& file_path, uint64_t file_size);
+
+  // DB will call OnDeleteFile whenever a sst/blob file is deleted.
+  Status OnDeleteFile(const std::string& file_path);
+
+  // DB will call OnMoveFile whenever a sst/blob file is move to a new path.
+  Status OnMoveFile(const std::string& old_path, const std::string& new_path,
+                    uint64_t* file_size = nullptr);
+
+  // Update the maximum allowed space that should be used by RocksDB, if
+  // the total size of the SST and blob files exceeds max_allowed_space, writes
+  // to RocksDB will fail.
+  //
+  // Setting max_allowed_space to 0 will disable this feature, maximum allowed
+  // space will be infinite (Default value).
+  //
+  // thread-safe.
+  void SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) override;
+
+  void SetCompactionBufferSize(uint64_t compaction_buffer_size) override;
+
+  // Return true if the total size of SST and blob files exceeded the maximum
+  // allowed space usage.
+  //
+  // thread-safe.
+  bool IsMaxAllowedSpaceReached() override;
+
+  bool IsMaxAllowedSpaceReachedIncludingCompactions() override;
+
+  // Returns true is there is enough (approximate) space for the specified
+  // compaction. Space is approximate because this function conservatively
+  // estimates how much space is currently being used by compactions (i.e.
+  // if a compaction has started, this function bumps the used space by
+  // the full compaction size).
+  bool EnoughRoomForCompaction(ColumnFamilyData* cfd,
+                               const std::vector<CompactionInputFiles>& inputs,
+                               const Status& bg_error);
+
+  // Bookkeeping so total_file_sizes_ goes back to normal after compaction
+  // finishes
+  void OnCompactionCompletion(Compaction* c);
+
+  uint64_t GetCompactionsReservedSize();
+
+  // Return the total size of all tracked files.
+  uint64_t GetTotalSize() override;
+
+  // Return a map containing all tracked files and there corresponding sizes.
+  std::unordered_map<std::string, uint64_t> GetTrackedFiles() override;
+
+  // Return delete rate limit in bytes per second.
+  virtual int64_t GetDeleteRateBytesPerSecond() override;
+
+  // Update the delete rate limit in bytes per second.
+  virtual void SetDeleteRateBytesPerSecond(int64_t delete_rate) override;
+
+  // Return trash/DB size ratio where new files will be deleted immediately
+  virtual double GetMaxTrashDBRatio() override;
+
+  // Update trash/DB size ratio where new files will be deleted immediately
+  virtual void SetMaxTrashDBRatio(double ratio) override;
+
+  // Return the total size of trash files
+  uint64_t GetTotalTrashSize() override;
+
+  // Called by each DB instance using this sst file manager to reserve
+  // disk buffer space for recovery from out of space errors
+  void ReserveDiskBuffer(uint64_t buffer, const std::string& path);
+
+  // Set a flag upon encountering disk full. May enqueue the ErrorHandler
+  // instance for background polling and recovery
+  void StartErrorRecovery(ErrorHandler* db, Status bg_error);
+
+  // Remove the given Errorhandler instance from the recovery queue. Its
+  // not guaranteed
+  bool CancelErrorRecovery(ErrorHandler* db);
+
+  // Mark file as trash and schedule it's deletion. If force_bg is set, it
+  // forces the file to be deleting in the background regardless of DB size,
+  // except when rate limited delete is disabled
+  virtual Status ScheduleFileDeletion(const std::string& file_path,
+                                      const std::string& dir_to_sync,
+                                      const bool force_bg = false);
+
+  // Wait for all files being deleteing in the background to finish or for
+  // destructor to be called.
+  virtual void WaitForEmptyTrash();
+
+  DeleteScheduler* delete_scheduler() { return &delete_scheduler_; }
+
+  // Stop the error recovery background thread. This should be called only
+  // once in the object's lifetime, and before the destructor
+  void Close();
+
+  void SetStatisticsPtr(const std::shared_ptr<Statistics>& stats) override {
+    stats_ = stats;
+    delete_scheduler_.SetStatisticsPtr(stats);
+  }
+
+ private:
+  // REQUIRES: mutex locked
+  void OnAddFileImpl(const std::string& file_path, uint64_t file_size);
+  // REQUIRES: mutex locked
+  void OnDeleteFileImpl(const std::string& file_path);
+
+  void ClearError();
+  bool CheckFreeSpace() {
+    return bg_err_.severity() == Status::Severity::kSoftError;
+  }
+
+  std::shared_ptr<SystemClock> clock_;
+  std::shared_ptr<FileSystem> fs_;
+  std::shared_ptr<Logger> logger_;
+  // Mutex to protect tracked_files_, total_files_size_
+  port::Mutex mu_;
+  // The summation of the sizes of all files in tracked_files_ map
+  uint64_t total_files_size_;
+  // Compactions should only execute if they can leave at least
+  // this amount of buffer space for logs and flushes
+  uint64_t compaction_buffer_size_;
+  // Estimated size of the current ongoing compactions
+  uint64_t cur_compactions_reserved_size_;
+  // A map containing all tracked files and there sizes
+  //  file_path => file_size
+  std::unordered_map<std::string, uint64_t> tracked_files_;
+  // The maximum allowed space (in bytes) for sst and blob files.
+  uint64_t max_allowed_space_;
+  // DeleteScheduler used to throttle file deletion.
+  DeleteScheduler delete_scheduler_;
+  port::CondVar cv_;
+  // Flag to force error recovery thread to exit
+  bool closing_;
+  // Background error recovery thread
+  std::unique_ptr<port::Thread> bg_thread_;
+  // A path in the filesystem corresponding to this SFM. This is used for
+  // calling Env::GetFreeSpace. Posix requires a path in the filesystem
+  std::string path_;
+  // Save the current background error
+  Status bg_err_;
+  // Amount of free disk headroom before allowing recovery from hard errors
+  uint64_t reserved_disk_buffer_;
+  // For soft errors, amount of free disk space before we can allow
+  // compactions to run full throttle. If disk space is below this trigger,
+  // compactions will be gated by free disk space > input size
+  uint64_t free_space_trigger_;
+  // List of database error handler instances tracked by this SstFileManager.
+  std::list<ErrorHandler*> error_handler_list_;
+  // Pointer to ErrorHandler instance that is currently processing recovery
+  ErrorHandler* cur_instance_;
+  std::shared_ptr<Statistics> stats_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/writable_file_writer.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/writable_file_writer.cc
new file mode 100644
index 0000000000..908878a5fa
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/writable_file_writer.cc
@@ -0,0 +1,989 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "file/writable_file_writer.h"
+
+#include <algorithm>
+#include <mutex>
+
+#include "db/version_edit.h"
+#include "monitoring/histogram.h"
+#include "monitoring/iostats_context_imp.h"
+#include "port/port.h"
+#include "rocksdb/io_status.h"
+#include "rocksdb/system_clock.h"
+#include "test_util/sync_point.h"
+#include "util/crc32c.h"
+#include "util/random.h"
+#include "util/rate_limiter_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+IOStatus WritableFileWriter::Create(const std::shared_ptr<FileSystem>& fs,
+                                    const std::string& fname,
+                                    const FileOptions& file_opts,
+                                    std::unique_ptr<WritableFileWriter>* writer,
+                                    IODebugContext* dbg) {
+  if (file_opts.use_direct_writes &&
+      0 == file_opts.writable_file_max_buffer_size) {
+    return IOStatus::InvalidArgument(
+        "Direct write requires writable_file_max_buffer_size > 0");
+  }
+  std::unique_ptr<FSWritableFile> file;
+  IOStatus io_s = fs->NewWritableFile(fname, file_opts, &file, dbg);
+  if (io_s.ok()) {
+    writer->reset(new WritableFileWriter(std::move(file), fname, file_opts));
+  }
+  return io_s;
+}
+
+IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
+                                    Env::IOPriority op_rate_limiter_priority) {
+  if (seen_error()) {
+    return AssertFalseAndGetStatusForPrevError();
+  }
+
+  const char* src = data.data();
+  size_t left = data.size();
+  IOStatus s;
+  pending_sync_ = true;
+
+  TEST_KILL_RANDOM_WITH_WEIGHT("WritableFileWriter::Append:0", REDUCE_ODDS2);
+
+  // Calculate the checksum of appended data
+  UpdateFileChecksum(data);
+
+  {
+    IOOptions io_options;
+    io_options.rate_limiter_priority =
+        WritableFileWriter::DecideRateLimiterPriority(
+            writable_file_->GetIOPriority(), op_rate_limiter_priority);
+    IOSTATS_TIMER_GUARD(prepare_write_nanos);
+    TEST_SYNC_POINT("WritableFileWriter::Append:BeforePrepareWrite");
+    writable_file_->PrepareWrite(static_cast<size_t>(GetFileSize()), left,
+                                 io_options, nullptr);
+  }
+
+  // See whether we need to enlarge the buffer to avoid the flush
+  if (buf_.Capacity() - buf_.CurrentSize() < left) {
+    for (size_t cap = buf_.Capacity();
+         cap < max_buffer_size_;  // There is still room to increase
+         cap *= 2) {
+      // See whether the next available size is large enough.
+      // Buffer will never be increased to more than max_buffer_size_.
+      size_t desired_capacity = std::min(cap * 2, max_buffer_size_);
+      if (desired_capacity - buf_.CurrentSize() >= left ||
+          (use_direct_io() && desired_capacity == max_buffer_size_)) {
+        buf_.AllocateNewBuffer(desired_capacity, true);
+        break;
+      }
+    }
+  }
+
+  // Flush only when buffered I/O
+  if (!use_direct_io() && (buf_.Capacity() - buf_.CurrentSize()) < left) {
+    if (buf_.CurrentSize() > 0) {
+      s = Flush(op_rate_limiter_priority);
+      if (!s.ok()) {
+        set_seen_error();
+        return s;
+      }
+    }
+    assert(buf_.CurrentSize() == 0);
+  }
+
+  if (perform_data_verification_ && buffered_data_with_checksum_ &&
+      crc32c_checksum != 0) {
+    // Since we want to use the checksum of the input data, we cannot break it
+    // into several pieces. We will only write them in the buffer when buffer
+    // size is enough. Otherwise, we will directly write it down.
+    if (use_direct_io() || (buf_.Capacity() - buf_.CurrentSize()) >= left) {
+      if ((buf_.Capacity() - buf_.CurrentSize()) >= left) {
+        size_t appended = buf_.Append(src, left);
+        if (appended != left) {
+          s = IOStatus::Corruption("Write buffer append failure");
+        }
+        buffered_data_crc32c_checksum_ = crc32c::Crc32cCombine(
+            buffered_data_crc32c_checksum_, crc32c_checksum, appended);
+      } else {
+        while (left > 0) {
+          size_t appended = buf_.Append(src, left);
+          buffered_data_crc32c_checksum_ =
+              crc32c::Extend(buffered_data_crc32c_checksum_, src, appended);
+          left -= appended;
+          src += appended;
+
+          if (left > 0) {
+            s = Flush(op_rate_limiter_priority);
+            if (!s.ok()) {
+              break;
+            }
+          }
+        }
+      }
+    } else {
+      assert(buf_.CurrentSize() == 0);
+      buffered_data_crc32c_checksum_ = crc32c_checksum;
+      s = WriteBufferedWithChecksum(src, left, op_rate_limiter_priority);
+    }
+  } else {
+    // In this case, either we do not need to do the data verification or
+    // caller does not provide the checksum of the data (crc32c_checksum = 0).
+    //
+    // We never write directly to disk with direct I/O on.
+    // or we simply use it for its original purpose to accumulate many small
+    // chunks
+    if (use_direct_io() || (buf_.Capacity() >= left)) {
+      while (left > 0) {
+        size_t appended = buf_.Append(src, left);
+        if (perform_data_verification_ && buffered_data_with_checksum_) {
+          buffered_data_crc32c_checksum_ =
+              crc32c::Extend(buffered_data_crc32c_checksum_, src, appended);
+        }
+        left -= appended;
+        src += appended;
+
+        if (left > 0) {
+          s = Flush(op_rate_limiter_priority);
+          if (!s.ok()) {
+            break;
+          }
+        }
+      }
+    } else {
+      // Writing directly to file bypassing the buffer
+      assert(buf_.CurrentSize() == 0);
+      if (perform_data_verification_ && buffered_data_with_checksum_) {
+        buffered_data_crc32c_checksum_ = crc32c::Value(src, left);
+        s = WriteBufferedWithChecksum(src, left, op_rate_limiter_priority);
+      } else {
+        s = WriteBuffered(src, left, op_rate_limiter_priority);
+      }
+    }
+  }
+
+  TEST_KILL_RANDOM("WritableFileWriter::Append:1");
+  if (s.ok()) {
+    uint64_t cur_size = filesize_.load(std::memory_order_acquire);
+    filesize_.store(cur_size + data.size(), std::memory_order_release);
+  } else {
+    set_seen_error();
+  }
+  return s;
+}
+
+IOStatus WritableFileWriter::Pad(const size_t pad_bytes,
+                                 Env::IOPriority op_rate_limiter_priority) {
+  if (seen_error()) {
+    return AssertFalseAndGetStatusForPrevError();
+  }
+  assert(pad_bytes < kDefaultPageSize);
+  size_t left = pad_bytes;
+  size_t cap = buf_.Capacity() - buf_.CurrentSize();
+  size_t pad_start = buf_.CurrentSize();
+
+  // Assume pad_bytes is small compared to buf_ capacity. So we always
+  // use buf_ rather than write directly to file in certain cases like
+  // Append() does.
+  while (left) {
+    size_t append_bytes = std::min(cap, left);
+    buf_.PadWith(append_bytes, 0);
+    left -= append_bytes;
+    if (left > 0) {
+      IOStatus s = Flush(op_rate_limiter_priority);
+      if (!s.ok()) {
+        set_seen_error();
+        return s;
+      }
+    }
+    cap = buf_.Capacity() - buf_.CurrentSize();
+  }
+  pending_sync_ = true;
+  uint64_t cur_size = filesize_.load(std::memory_order_acquire);
+  filesize_.store(cur_size + pad_bytes, std::memory_order_release);
+  if (perform_data_verification_) {
+    buffered_data_crc32c_checksum_ =
+        crc32c::Extend(buffered_data_crc32c_checksum_,
+                       buf_.BufferStart() + pad_start, pad_bytes);
+  }
+  return IOStatus::OK();
+}
+
+IOStatus WritableFileWriter::Close() {
+  if (seen_error()) {
+    IOStatus interim;
+    if (writable_file_.get() != nullptr) {
+      interim = writable_file_->Close(IOOptions(), nullptr);
+      writable_file_.reset();
+    }
+    if (interim.ok()) {
+      return IOStatus::IOError(
+          "File is closed but data not flushed as writer has previous error.");
+    } else {
+      return interim;
+    }
+  }
+
+  // Do not quit immediately on failure the file MUST be closed
+
+  // Possible to close it twice now as we MUST close
+  // in __dtor, simply flushing is not enough
+  // Windows when pre-allocating does not fill with zeros
+  // also with unbuffered access we also set the end of data.
+  if (writable_file_.get() == nullptr) {
+    return IOStatus::OK();
+  }
+
+  IOStatus s;
+  s = Flush();  // flush cache to OS
+
+  IOStatus interim;
+  IOOptions io_options;
+  io_options.rate_limiter_priority = writable_file_->GetIOPriority();
+  // In direct I/O mode we write whole pages so
+  // we need to let the file know where data ends.
+  if (use_direct_io()) {
+    {
+      FileOperationInfo::StartTimePoint start_ts;
+      if (ShouldNotifyListeners()) {
+        start_ts = FileOperationInfo::StartNow();
+      }
+      uint64_t filesz = filesize_.load(std::memory_order_acquire);
+      interim = writable_file_->Truncate(filesz, io_options, nullptr);
+      if (ShouldNotifyListeners()) {
+        auto finish_ts = FileOperationInfo::FinishNow();
+        NotifyOnFileTruncateFinish(start_ts, finish_ts, s);
+        if (!interim.ok()) {
+          NotifyOnIOError(interim, FileOperationType::kTruncate, file_name(),
+                          filesz);
+        }
+      }
+    }
+    if (interim.ok()) {
+      {
+        FileOperationInfo::StartTimePoint start_ts;
+        if (ShouldNotifyListeners()) {
+          start_ts = FileOperationInfo::StartNow();
+        }
+        interim = writable_file_->Fsync(io_options, nullptr);
+        if (ShouldNotifyListeners()) {
+          auto finish_ts = FileOperationInfo::FinishNow();
+          NotifyOnFileSyncFinish(start_ts, finish_ts, s,
+                                 FileOperationType::kFsync);
+          if (!interim.ok()) {
+            NotifyOnIOError(interim, FileOperationType::kFsync, file_name());
+          }
+        }
+      }
+    }
+    if (!interim.ok() && s.ok()) {
+      s = interim;
+    }
+  }
+
+  TEST_KILL_RANDOM("WritableFileWriter::Close:0");
+  {
+    FileOperationInfo::StartTimePoint start_ts;
+    if (ShouldNotifyListeners()) {
+      start_ts = FileOperationInfo::StartNow();
+    }
+    interim = writable_file_->Close(io_options, nullptr);
+    if (ShouldNotifyListeners()) {
+      auto finish_ts = FileOperationInfo::FinishNow();
+      NotifyOnFileCloseFinish(start_ts, finish_ts, s);
+      if (!interim.ok()) {
+        NotifyOnIOError(interim, FileOperationType::kClose, file_name());
+      }
+    }
+  }
+  if (!interim.ok() && s.ok()) {
+    s = interim;
+  }
+
+  writable_file_.reset();
+  TEST_KILL_RANDOM("WritableFileWriter::Close:1");
+
+  if (s.ok()) {
+    if (checksum_generator_ != nullptr && !checksum_finalized_) {
+      checksum_generator_->Finalize();
+      checksum_finalized_ = true;
+    }
+  } else {
+    set_seen_error();
+  }
+
+  return s;
+}
+
+// write out the cached data to the OS cache or storage if direct I/O
+// enabled
+IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
+  if (seen_error()) {
+    return AssertFalseAndGetStatusForPrevError();
+  }
+
+  IOStatus s;
+  TEST_KILL_RANDOM_WITH_WEIGHT("WritableFileWriter::Flush:0", REDUCE_ODDS2);
+
+  if (buf_.CurrentSize() > 0) {
+    if (use_direct_io()) {
+      if (pending_sync_) {
+        if (perform_data_verification_ && buffered_data_with_checksum_) {
+          s = WriteDirectWithChecksum(op_rate_limiter_priority);
+        } else {
+          s = WriteDirect(op_rate_limiter_priority);
+        }
+      }
+    } else {
+      if (perform_data_verification_ && buffered_data_with_checksum_) {
+        s = WriteBufferedWithChecksum(buf_.BufferStart(), buf_.CurrentSize(),
+                                      op_rate_limiter_priority);
+      } else {
+        s = WriteBuffered(buf_.BufferStart(), buf_.CurrentSize(),
+                          op_rate_limiter_priority);
+      }
+    }
+    if (!s.ok()) {
+      set_seen_error();
+      return s;
+    }
+  }
+
+  {
+    FileOperationInfo::StartTimePoint start_ts;
+    if (ShouldNotifyListeners()) {
+      start_ts = FileOperationInfo::StartNow();
+    }
+    IOOptions io_options;
+    io_options.rate_limiter_priority =
+        WritableFileWriter::DecideRateLimiterPriority(
+            writable_file_->GetIOPriority(), op_rate_limiter_priority);
+    s = writable_file_->Flush(io_options, nullptr);
+    if (ShouldNotifyListeners()) {
+      auto finish_ts = std::chrono::steady_clock::now();
+      NotifyOnFileFlushFinish(start_ts, finish_ts, s);
+      if (!s.ok()) {
+        NotifyOnIOError(s, FileOperationType::kFlush, file_name());
+      }
+    }
+  }
+
+  if (!s.ok()) {
+    set_seen_error();
+    return s;
+  }
+
+  // sync OS cache to disk for every bytes_per_sync_
+  // TODO: give log file and sst file different options (log
+  // files could be potentially cached in OS for their whole
+  // life time, thus we might not want to flush at all).
+
+  // We try to avoid sync to the last 1MB of data. For two reasons:
+  // (1) avoid rewrite the same page that is modified later.
+  // (2) for older version of OS, write can block while writing out
+  //     the page.
+  // Xfs does neighbor page flushing outside of the specified ranges. We
+  // need to make sure sync range is far from the write offset.
+  if (!use_direct_io() && bytes_per_sync_) {
+    const uint64_t kBytesNotSyncRange =
+        1024 * 1024;                                // recent 1MB is not synced.
+    const uint64_t kBytesAlignWhenSync = 4 * 1024;  // Align 4KB.
+    uint64_t cur_size = filesize_.load(std::memory_order_acquire);
+    if (cur_size > kBytesNotSyncRange) {
+      uint64_t offset_sync_to = cur_size - kBytesNotSyncRange;
+      offset_sync_to -= offset_sync_to % kBytesAlignWhenSync;
+      assert(offset_sync_to >= last_sync_size_);
+      if (offset_sync_to > 0 &&
+          offset_sync_to - last_sync_size_ >= bytes_per_sync_) {
+        s = RangeSync(last_sync_size_, offset_sync_to - last_sync_size_);
+        if (!s.ok()) {
+          set_seen_error();
+        }
+        last_sync_size_ = offset_sync_to;
+      }
+    }
+  }
+
+  return s;
+}
+
+std::string WritableFileWriter::GetFileChecksum() {
+  if (checksum_generator_ != nullptr) {
+    assert(checksum_finalized_);
+    return checksum_generator_->GetChecksum();
+  } else {
+    return kUnknownFileChecksum;
+  }
+}
+
+const char* WritableFileWriter::GetFileChecksumFuncName() const {
+  if (checksum_generator_ != nullptr) {
+    return checksum_generator_->Name();
+  } else {
+    return kUnknownFileChecksumFuncName;
+  }
+}
+
+IOStatus WritableFileWriter::Sync(bool use_fsync) {
+  if (seen_error()) {
+    return AssertFalseAndGetStatusForPrevError();
+  }
+
+  IOStatus s = Flush();
+  if (!s.ok()) {
+    set_seen_error();
+    return s;
+  }
+  TEST_KILL_RANDOM("WritableFileWriter::Sync:0");
+  if (!use_direct_io() && pending_sync_) {
+    s = SyncInternal(use_fsync);
+    if (!s.ok()) {
+      set_seen_error();
+      return s;
+    }
+  }
+  TEST_KILL_RANDOM("WritableFileWriter::Sync:1");
+  pending_sync_ = false;
+  return IOStatus::OK();
+}
+
+IOStatus WritableFileWriter::SyncWithoutFlush(bool use_fsync) {
+  if (seen_error()) {
+    return AssertFalseAndGetStatusForPrevError();
+  }
+  if (!writable_file_->IsSyncThreadSafe()) {
+    return IOStatus::NotSupported(
+        "Can't WritableFileWriter::SyncWithoutFlush() because "
+        "WritableFile::IsSyncThreadSafe() is false");
+  }
+  TEST_SYNC_POINT("WritableFileWriter::SyncWithoutFlush:1");
+  IOStatus s = SyncInternal(use_fsync);
+  TEST_SYNC_POINT("WritableFileWriter::SyncWithoutFlush:2");
+  if (!s.ok()) {
+#ifndef NDEBUG
+    sync_without_flush_called_ = true;
+#endif  // NDEBUG
+    set_seen_error();
+  }
+  return s;
+}
+
+IOStatus WritableFileWriter::SyncInternal(bool use_fsync) {
+  // Caller is supposed to check seen_error_
+  IOStatus s;
+  IOSTATS_TIMER_GUARD(fsync_nanos);
+  TEST_SYNC_POINT("WritableFileWriter::SyncInternal:0");
+  auto prev_perf_level = GetPerfLevel();
+
+  IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, clock_);
+
+  FileOperationInfo::StartTimePoint start_ts;
+  if (ShouldNotifyListeners()) {
+    start_ts = FileOperationInfo::StartNow();
+  }
+
+  IOOptions io_options;
+  io_options.rate_limiter_priority = writable_file_->GetIOPriority();
+  if (use_fsync) {
+    s = writable_file_->Fsync(io_options, nullptr);
+  } else {
+    s = writable_file_->Sync(io_options, nullptr);
+  }
+  if (ShouldNotifyListeners()) {
+    auto finish_ts = std::chrono::steady_clock::now();
+    NotifyOnFileSyncFinish(
+        start_ts, finish_ts, s,
+        use_fsync ? FileOperationType::kFsync : FileOperationType::kSync);
+    if (!s.ok()) {
+      NotifyOnIOError(
+          s, (use_fsync ? FileOperationType::kFsync : FileOperationType::kSync),
+          file_name());
+    }
+  }
+  SetPerfLevel(prev_perf_level);
+
+  // The caller will be responsible to call set_seen_error() if s is not OK.
+  return s;
+}
+
+IOStatus WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) {
+  if (seen_error()) {
+    return AssertFalseAndGetStatusForPrevError();
+  }
+
+  IOSTATS_TIMER_GUARD(range_sync_nanos);
+  TEST_SYNC_POINT("WritableFileWriter::RangeSync:0");
+  FileOperationInfo::StartTimePoint start_ts;
+  if (ShouldNotifyListeners()) {
+    start_ts = FileOperationInfo::StartNow();
+  }
+  IOOptions io_options;
+  io_options.rate_limiter_priority = writable_file_->GetIOPriority();
+  IOStatus s = writable_file_->RangeSync(offset, nbytes, io_options, nullptr);
+  if (!s.ok()) {
+    set_seen_error();
+  }
+  if (ShouldNotifyListeners()) {
+    auto finish_ts = std::chrono::steady_clock::now();
+    NotifyOnFileRangeSyncFinish(offset, nbytes, start_ts, finish_ts, s);
+    if (!s.ok()) {
+      NotifyOnIOError(s, FileOperationType::kRangeSync, file_name(), nbytes,
+                      offset);
+    }
+  }
+  return s;
+}
+
+// This method writes to disk the specified data and makes use of the rate
+// limiter if available
+IOStatus WritableFileWriter::WriteBuffered(
+    const char* data, size_t size, Env::IOPriority op_rate_limiter_priority) {
+  if (seen_error()) {
+    return AssertFalseAndGetStatusForPrevError();
+  }
+
+  IOStatus s;
+  assert(!use_direct_io());
+  const char* src = data;
+  size_t left = size;
+  DataVerificationInfo v_info;
+  char checksum_buf[sizeof(uint32_t)];
+  Env::IOPriority rate_limiter_priority_used =
+      WritableFileWriter::DecideRateLimiterPriority(
+          writable_file_->GetIOPriority(), op_rate_limiter_priority);
+  IOOptions io_options;
+  io_options.rate_limiter_priority = rate_limiter_priority_used;
+
+  while (left > 0) {
+    size_t allowed = left;
+    if (rate_limiter_ != nullptr &&
+        rate_limiter_priority_used != Env::IO_TOTAL) {
+      allowed = rate_limiter_->RequestToken(left, 0 /* alignment */,
+                                            rate_limiter_priority_used, stats_,
+                                            RateLimiter::OpType::kWrite);
+    }
+
+    {
+      IOSTATS_TIMER_GUARD(write_nanos);
+      TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend");
+
+      FileOperationInfo::StartTimePoint start_ts;
+      uint64_t old_size = writable_file_->GetFileSize(io_options, nullptr);
+      if (ShouldNotifyListeners()) {
+        start_ts = FileOperationInfo::StartNow();
+        old_size = next_write_offset_;
+      }
+      {
+        auto prev_perf_level = GetPerfLevel();
+
+        IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, clock_);
+        if (perform_data_verification_) {
+          Crc32cHandoffChecksumCalculation(src, allowed, checksum_buf);
+          v_info.checksum = Slice(checksum_buf, sizeof(uint32_t));
+          s = writable_file_->Append(Slice(src, allowed), io_options, v_info,
+                                     nullptr);
+        } else {
+          s = writable_file_->Append(Slice(src, allowed), io_options, nullptr);
+        }
+        if (!s.ok()) {
+          // If writable_file_->Append() failed, then the data may or may not
+          // exist in the underlying memory buffer, OS page cache, remote file
+          // system's buffer, etc. If WritableFileWriter keeps the data in
+          // buf_, then a future Close() or write retry may send the data to
+          // the underlying file again. If the data does exist in the
+          // underlying buffer and gets written to the file eventually despite
+          // returning error, the file may end up with two duplicate pieces of
+          // data. Therefore, clear the buf_ at the WritableFileWriter layer
+          // and let caller determine error handling.
+          buf_.Size(0);
+          buffered_data_crc32c_checksum_ = 0;
+        }
+        SetPerfLevel(prev_perf_level);
+      }
+      if (ShouldNotifyListeners()) {
+        auto finish_ts = std::chrono::steady_clock::now();
+        NotifyOnFileWriteFinish(old_size, allowed, start_ts, finish_ts, s);
+        if (!s.ok()) {
+          NotifyOnIOError(s, FileOperationType::kAppend, file_name(), allowed,
+                          old_size);
+        }
+      }
+      if (!s.ok()) {
+        set_seen_error();
+        return s;
+      }
+    }
+
+    IOSTATS_ADD(bytes_written, allowed);
+    TEST_KILL_RANDOM("WritableFileWriter::WriteBuffered:0");
+
+    left -= allowed;
+    src += allowed;
+    uint64_t cur_size = flushed_size_.load(std::memory_order_acquire);
+    flushed_size_.store(cur_size + allowed, std::memory_order_release);
+  }
+  buf_.Size(0);
+  buffered_data_crc32c_checksum_ = 0;
+  if (!s.ok()) {
+    set_seen_error();
+  }
+  return s;
+}
+
+IOStatus WritableFileWriter::WriteBufferedWithChecksum(
+    const char* data, size_t size, Env::IOPriority op_rate_limiter_priority) {
+  if (seen_error()) {
+    return AssertFalseAndGetStatusForPrevError();
+  }
+
+  IOStatus s;
+  assert(!use_direct_io());
+  assert(perform_data_verification_ && buffered_data_with_checksum_);
+  const char* src = data;
+  size_t left = size;
+  DataVerificationInfo v_info;
+  char checksum_buf[sizeof(uint32_t)];
+  Env::IOPriority rate_limiter_priority_used =
+      WritableFileWriter::DecideRateLimiterPriority(
+          writable_file_->GetIOPriority(), op_rate_limiter_priority);
+  IOOptions io_options;
+  io_options.rate_limiter_priority = rate_limiter_priority_used;
+  // Check how much is allowed. Here, we loop until the rate limiter allows to
+  // write the entire buffer.
+  // TODO: need to be improved since it sort of defeats the purpose of the rate
+  // limiter
+  size_t data_size = left;
+  if (rate_limiter_ != nullptr && rate_limiter_priority_used != Env::IO_TOTAL) {
+    while (data_size > 0) {
+      size_t tmp_size;
+      tmp_size = rate_limiter_->RequestToken(data_size, buf_.Alignment(),
+                                             rate_limiter_priority_used, stats_,
+                                             RateLimiter::OpType::kWrite);
+      data_size -= tmp_size;
+    }
+  }
+
+  {
+    IOSTATS_TIMER_GUARD(write_nanos);
+    TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend");
+
+    FileOperationInfo::StartTimePoint start_ts;
+    uint64_t old_size = writable_file_->GetFileSize(io_options, nullptr);
+    if (ShouldNotifyListeners()) {
+      start_ts = FileOperationInfo::StartNow();
+      old_size = next_write_offset_;
+    }
+    {
+      auto prev_perf_level = GetPerfLevel();
+
+      IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, clock_);
+
+      EncodeFixed32(checksum_buf, buffered_data_crc32c_checksum_);
+      v_info.checksum = Slice(checksum_buf, sizeof(uint32_t));
+      s = writable_file_->Append(Slice(src, left), io_options, v_info, nullptr);
+      SetPerfLevel(prev_perf_level);
+    }
+    if (ShouldNotifyListeners()) {
+      auto finish_ts = std::chrono::steady_clock::now();
+      NotifyOnFileWriteFinish(old_size, left, start_ts, finish_ts, s);
+      if (!s.ok()) {
+        NotifyOnIOError(s, FileOperationType::kAppend, file_name(), left,
+                        old_size);
+      }
+    }
+    if (!s.ok()) {
+      // If writable_file_->Append() failed, then the data may or may not
+      // exist in the underlying memory buffer, OS page cache, remote file
+      // system's buffer, etc. If WritableFileWriter keeps the data in
+      // buf_, then a future Close() or write retry may send the data to
+      // the underlying file again. If the data does exist in the
+      // underlying buffer and gets written to the file eventually despite
+      // returning error, the file may end up with two duplicate pieces of
+      // data. Therefore, clear the buf_ at the WritableFileWriter layer
+      // and let caller determine error handling.
+      buf_.Size(0);
+      buffered_data_crc32c_checksum_ = 0;
+      set_seen_error();
+      return s;
+    }
+  }
+
+  IOSTATS_ADD(bytes_written, left);
+  TEST_KILL_RANDOM("WritableFileWriter::WriteBuffered:0");
+
+  // Buffer write is successful, reset the buffer current size to 0 and reset
+  // the corresponding checksum value
+  buf_.Size(0);
+  buffered_data_crc32c_checksum_ = 0;
+  uint64_t cur_size = flushed_size_.load(std::memory_order_acquire);
+  flushed_size_.store(cur_size + left, std::memory_order_release);
+  if (!s.ok()) {
+    set_seen_error();
+  }
+  return s;
+}
+
+void WritableFileWriter::UpdateFileChecksum(const Slice& data) {
+  if (checksum_generator_ != nullptr) {
+    checksum_generator_->Update(data.data(), data.size());
+  }
+}
+
+// Currently, crc32c checksum is used to calculate the checksum value of the
+// content in the input buffer for handoff. In the future, the checksum might be
+// calculated from the existing crc32c checksums of the in WAl and Manifest
+// records, or even SST file blocks.
+// TODO: effectively use the existing checksum of the data being writing to
+// generate the crc32c checksum instead of a raw calculation.
+void WritableFileWriter::Crc32cHandoffChecksumCalculation(const char* data,
+                                                          size_t size,
+                                                          char* buf) {
+  uint32_t v_crc32c = crc32c::Extend(0, data, size);
+  EncodeFixed32(buf, v_crc32c);
+}
+
+// This flushes the accumulated data in the buffer. We pad data with zeros if
+// necessary to the whole page.
+// However, during automatic flushes padding would not be necessary.
+// We always use RateLimiter if available. We move (Refit) any buffer bytes
+// that are left over the
+// whole number of pages to be written again on the next flush because we can
+// only write on aligned
+// offsets.
+IOStatus WritableFileWriter::WriteDirect(
+    Env::IOPriority op_rate_limiter_priority) {
+  if (seen_error()) {
+    assert(false);
+
+    return IOStatus::IOError("Writer has previous error.");
+  }
+
+  assert(use_direct_io());
+  IOStatus s;
+  const size_t alignment = buf_.Alignment();
+  assert((next_write_offset_ % alignment) == 0);
+
+  // Calculate whole page final file advance if all writes succeed
+  const size_t file_advance =
+      TruncateToPageBoundary(alignment, buf_.CurrentSize());
+
+  // Calculate the leftover tail, we write it here padded with zeros BUT we
+  // will write it again in the future either on Close() OR when the current
+  // whole page fills out.
+  const size_t leftover_tail = buf_.CurrentSize() - file_advance;
+
+  // Round up and pad
+  buf_.PadToAlignmentWith(0);
+
+  const char* src = buf_.BufferStart();
+  uint64_t write_offset = next_write_offset_;
+  size_t left = buf_.CurrentSize();
+  DataVerificationInfo v_info;
+  char checksum_buf[sizeof(uint32_t)];
+  Env::IOPriority rate_limiter_priority_used =
+      WritableFileWriter::DecideRateLimiterPriority(
+          writable_file_->GetIOPriority(), op_rate_limiter_priority);
+  IOOptions io_options;
+  io_options.rate_limiter_priority = rate_limiter_priority_used;
+
+  while (left > 0) {
+    // Check how much is allowed
+    size_t size = left;
+    if (rate_limiter_ != nullptr &&
+        rate_limiter_priority_used != Env::IO_TOTAL) {
+      size = rate_limiter_->RequestToken(left, buf_.Alignment(),
+                                         rate_limiter_priority_used, stats_,
+                                         RateLimiter::OpType::kWrite);
+    }
+
+    {
+      IOSTATS_TIMER_GUARD(write_nanos);
+      TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend");
+      FileOperationInfo::StartTimePoint start_ts;
+      if (ShouldNotifyListeners()) {
+        start_ts = FileOperationInfo::StartNow();
+      }
+      // direct writes must be positional
+      if (perform_data_verification_) {
+        Crc32cHandoffChecksumCalculation(src, size, checksum_buf);
+        v_info.checksum = Slice(checksum_buf, sizeof(uint32_t));
+        s = writable_file_->PositionedAppend(Slice(src, size), write_offset,
+                                             io_options, v_info, nullptr);
+      } else {
+        s = writable_file_->PositionedAppend(Slice(src, size), write_offset,
+                                             io_options, nullptr);
+      }
+
+      if (ShouldNotifyListeners()) {
+        auto finish_ts = std::chrono::steady_clock::now();
+        NotifyOnFileWriteFinish(write_offset, size, start_ts, finish_ts, s);
+        if (!s.ok()) {
+          NotifyOnIOError(s, FileOperationType::kPositionedAppend, file_name(),
+                          size, write_offset);
+        }
+      }
+      if (!s.ok()) {
+        buf_.Size(file_advance + leftover_tail);
+        set_seen_error();
+        return s;
+      }
+    }
+
+    IOSTATS_ADD(bytes_written, size);
+    left -= size;
+    src += size;
+    write_offset += size;
+    uint64_t cur_size = flushed_size_.load(std::memory_order_acquire);
+    flushed_size_.store(cur_size + size, std::memory_order_release);
+    assert((next_write_offset_ % alignment) == 0);
+  }
+
+  if (s.ok()) {
+    // Move the tail to the beginning of the buffer
+    // This never happens during normal Append but rather during
+    // explicit call to Flush()/Sync() or Close()
+    buf_.RefitTail(file_advance, leftover_tail);
+    // This is where we start writing next time which may or not be
+    // the actual file size on disk. They match if the buffer size
+    // is a multiple of whole pages otherwise filesize_ is leftover_tail
+    // behind
+    next_write_offset_ += file_advance;
+  } else {
+    set_seen_error();
+  }
+  return s;
+}
+
+IOStatus WritableFileWriter::WriteDirectWithChecksum(
+    Env::IOPriority op_rate_limiter_priority) {
+  if (seen_error()) {
+    return AssertFalseAndGetStatusForPrevError();
+  }
+
+  assert(use_direct_io());
+  assert(perform_data_verification_ && buffered_data_with_checksum_);
+  IOStatus s;
+  const size_t alignment = buf_.Alignment();
+  assert((next_write_offset_ % alignment) == 0);
+
+  // Calculate whole page final file advance if all writes succeed
+  const size_t file_advance =
+      TruncateToPageBoundary(alignment, buf_.CurrentSize());
+
+  // Calculate the leftover tail, we write it here padded with zeros BUT we
+  // will write it again in the future either on Close() OR when the current
+  // whole page fills out.
+  const size_t leftover_tail = buf_.CurrentSize() - file_advance;
+
+  // Round up, pad, and combine the checksum.
+  size_t last_cur_size = buf_.CurrentSize();
+  buf_.PadToAlignmentWith(0);
+  size_t padded_size = buf_.CurrentSize() - last_cur_size;
+  const char* padded_start = buf_.BufferStart() + last_cur_size;
+  uint32_t padded_checksum = crc32c::Value(padded_start, padded_size);
+  buffered_data_crc32c_checksum_ = crc32c::Crc32cCombine(
+      buffered_data_crc32c_checksum_, padded_checksum, padded_size);
+
+  const char* src = buf_.BufferStart();
+  uint64_t write_offset = next_write_offset_;
+  size_t left = buf_.CurrentSize();
+  DataVerificationInfo v_info;
+  char checksum_buf[sizeof(uint32_t)];
+
+  Env::IOPriority rate_limiter_priority_used =
+      WritableFileWriter::DecideRateLimiterPriority(
+          writable_file_->GetIOPriority(), op_rate_limiter_priority);
+  IOOptions io_options;
+  io_options.rate_limiter_priority = rate_limiter_priority_used;
+  // Check how much is allowed. Here, we loop until the rate limiter allows to
+  // write the entire buffer.
+  // TODO: need to be improved since it sort of defeats the purpose of the rate
+  // limiter
+  size_t data_size = left;
+  if (rate_limiter_ != nullptr && rate_limiter_priority_used != Env::IO_TOTAL) {
+    while (data_size > 0) {
+      size_t size;
+      size = rate_limiter_->RequestToken(data_size, buf_.Alignment(),
+                                         rate_limiter_priority_used, stats_,
+                                         RateLimiter::OpType::kWrite);
+      data_size -= size;
+    }
+  }
+
+  {
+    IOSTATS_TIMER_GUARD(write_nanos);
+    TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend");
+    FileOperationInfo::StartTimePoint start_ts;
+    if (ShouldNotifyListeners()) {
+      start_ts = FileOperationInfo::StartNow();
+    }
+    // direct writes must be positional
+    EncodeFixed32(checksum_buf, buffered_data_crc32c_checksum_);
+    v_info.checksum = Slice(checksum_buf, sizeof(uint32_t));
+    s = writable_file_->PositionedAppend(Slice(src, left), write_offset,
+                                         io_options, v_info, nullptr);
+
+    if (ShouldNotifyListeners()) {
+      auto finish_ts = std::chrono::steady_clock::now();
+      NotifyOnFileWriteFinish(write_offset, left, start_ts, finish_ts, s);
+      if (!s.ok()) {
+        NotifyOnIOError(s, FileOperationType::kPositionedAppend, file_name(),
+                        left, write_offset);
+      }
+    }
+    if (!s.ok()) {
+      // In this case, we do not change buffered_data_crc32c_checksum_ because
+      // it still aligns with the data in the buffer.
+      buf_.Size(file_advance + leftover_tail);
+      buffered_data_crc32c_checksum_ =
+          crc32c::Value(buf_.BufferStart(), buf_.CurrentSize());
+      set_seen_error();
+      return s;
+    }
+  }
+
+  IOSTATS_ADD(bytes_written, left);
+  assert((next_write_offset_ % alignment) == 0);
+  uint64_t cur_size = flushed_size_.load(std::memory_order_acquire);
+  flushed_size_.store(cur_size + left, std::memory_order_release);
+
+  if (s.ok()) {
+    // Move the tail to the beginning of the buffer
+    // This never happens during normal Append but rather during
+    // explicit call to Flush()/Sync() or Close(). Also the buffer checksum will
+    // recalculated accordingly.
+    buf_.RefitTail(file_advance, leftover_tail);
+    // Adjust the checksum value to align with the data in the buffer
+    buffered_data_crc32c_checksum_ =
+        crc32c::Value(buf_.BufferStart(), buf_.CurrentSize());
+    // This is where we start writing next time which may or not be
+    // the actual file size on disk. They match if the buffer size
+    // is a multiple of whole pages otherwise filesize_ is leftover_tail
+    // behind
+    next_write_offset_ += file_advance;
+  } else {
+    set_seen_error();
+  }
+  return s;
+}
+Env::IOPriority WritableFileWriter::DecideRateLimiterPriority(
+    Env::IOPriority writable_file_io_priority,
+    Env::IOPriority op_rate_limiter_priority) {
+  if (writable_file_io_priority == Env::IO_TOTAL &&
+      op_rate_limiter_priority == Env::IO_TOTAL) {
+    return Env::IO_TOTAL;
+  } else if (writable_file_io_priority == Env::IO_TOTAL) {
+    return op_rate_limiter_priority;
+  } else if (op_rate_limiter_priority == Env::IO_TOTAL) {
+    return writable_file_io_priority;
+  } else {
+    return op_rate_limiter_priority;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/writable_file_writer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/writable_file_writer.h
new file mode 100644
index 0000000000..aac0f59491
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/file/writable_file_writer.h
@@ -0,0 +1,320 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <atomic>
+#include <string>
+
+#include "db/version_edit.h"
+#include "env/file_system_tracer.h"
+#include "port/port.h"
+#include "rocksdb/file_checksum.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/io_status.h"
+#include "rocksdb/listener.h"
+#include "rocksdb/rate_limiter.h"
+#include "test_util/sync_point.h"
+#include "util/aligned_buffer.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Statistics;
+class SystemClock;
+
+// WritableFileWriter is a wrapper on top of Env::WritableFile. It provides
+// facilities to:
+// - Handle Buffered and Direct writes.
+// - Rate limit writes.
+// - Flush and Sync the data to the underlying filesystem.
+// - Notify any interested listeners on the completion of a write.
+// - Update IO stats.
+class WritableFileWriter {
+ private:
+  void NotifyOnFileWriteFinish(
+      uint64_t offset, size_t length,
+      const FileOperationInfo::StartTimePoint& start_ts,
+      const FileOperationInfo::FinishTimePoint& finish_ts,
+      const IOStatus& io_status) {
+    FileOperationInfo info(FileOperationType::kWrite, file_name_, start_ts,
+                           finish_ts, io_status, temperature_);
+    info.offset = offset;
+    info.length = length;
+
+    for (auto& listener : listeners_) {
+      listener->OnFileWriteFinish(info);
+    }
+    info.status.PermitUncheckedError();
+  }
+  void NotifyOnFileFlushFinish(
+      const FileOperationInfo::StartTimePoint& start_ts,
+      const FileOperationInfo::FinishTimePoint& finish_ts,
+      const IOStatus& io_status) {
+    FileOperationInfo info(FileOperationType::kFlush, file_name_, start_ts,
+                           finish_ts, io_status, temperature_);
+
+    for (auto& listener : listeners_) {
+      listener->OnFileFlushFinish(info);
+    }
+    info.status.PermitUncheckedError();
+  }
+  void NotifyOnFileSyncFinish(
+      const FileOperationInfo::StartTimePoint& start_ts,
+      const FileOperationInfo::FinishTimePoint& finish_ts,
+      const IOStatus& io_status,
+      FileOperationType type = FileOperationType::kSync) {
+    FileOperationInfo info(type, file_name_, start_ts, finish_ts, io_status,
+                           temperature_);
+
+    for (auto& listener : listeners_) {
+      listener->OnFileSyncFinish(info);
+    }
+    info.status.PermitUncheckedError();
+  }
+  void NotifyOnFileRangeSyncFinish(
+      uint64_t offset, size_t length,
+      const FileOperationInfo::StartTimePoint& start_ts,
+      const FileOperationInfo::FinishTimePoint& finish_ts,
+      const IOStatus& io_status) {
+    FileOperationInfo info(FileOperationType::kRangeSync, file_name_, start_ts,
+                           finish_ts, io_status, temperature_);
+    info.offset = offset;
+    info.length = length;
+
+    for (auto& listener : listeners_) {
+      listener->OnFileRangeSyncFinish(info);
+    }
+    info.status.PermitUncheckedError();
+  }
+  void NotifyOnFileTruncateFinish(
+      const FileOperationInfo::StartTimePoint& start_ts,
+      const FileOperationInfo::FinishTimePoint& finish_ts,
+      const IOStatus& io_status) {
+    FileOperationInfo info(FileOperationType::kTruncate, file_name_, start_ts,
+                           finish_ts, io_status, temperature_);
+
+    for (auto& listener : listeners_) {
+      listener->OnFileTruncateFinish(info);
+    }
+    info.status.PermitUncheckedError();
+  }
+  void NotifyOnFileCloseFinish(
+      const FileOperationInfo::StartTimePoint& start_ts,
+      const FileOperationInfo::FinishTimePoint& finish_ts,
+      const IOStatus& io_status) {
+    FileOperationInfo info(FileOperationType::kClose, file_name_, start_ts,
+                           finish_ts, io_status, temperature_);
+
+    for (auto& listener : listeners_) {
+      listener->OnFileCloseFinish(info);
+    }
+    info.status.PermitUncheckedError();
+  }
+
+  void NotifyOnIOError(const IOStatus& io_status, FileOperationType operation,
+                       const std::string& file_path, size_t length = 0,
+                       uint64_t offset = 0) {
+    if (listeners_.empty()) {
+      return;
+    }
+    IOErrorInfo io_error_info(io_status, operation, file_path, length, offset);
+    for (auto& listener : listeners_) {
+      listener->OnIOError(io_error_info);
+    }
+    io_error_info.io_status.PermitUncheckedError();
+  }
+
+  bool ShouldNotifyListeners() const { return !listeners_.empty(); }
+  void UpdateFileChecksum(const Slice& data);
+  void Crc32cHandoffChecksumCalculation(const char* data, size_t size,
+                                        char* buf);
+
+  std::string file_name_;
+  FSWritableFilePtr writable_file_;
+  SystemClock* clock_;
+  AlignedBuffer buf_;
+  size_t max_buffer_size_;
+  // Actually written data size can be used for truncate
+  // not counting padding data
+  std::atomic<uint64_t> filesize_;
+  std::atomic<uint64_t> flushed_size_;
+  // This is necessary when we use unbuffered access
+  // and writes must happen on aligned offsets
+  // so we need to go back and write that page again
+  uint64_t next_write_offset_;
+  bool pending_sync_;
+  std::atomic<bool> seen_error_;
+#ifndef NDEBUG
+  // SyncWithoutFlush() is the function that is allowed to be called
+  // concurrently with other function. One of the concurrent call
+  // could set seen_error_, and the other one would hit assertion
+  // in debug mode.
+  std::atomic<bool> sync_without_flush_called_ = false;
+#endif  // NDEBUG
+  uint64_t last_sync_size_;
+  uint64_t bytes_per_sync_;
+  RateLimiter* rate_limiter_;
+  Statistics* stats_;
+  std::vector<std::shared_ptr<EventListener>> listeners_;
+  std::unique_ptr<FileChecksumGenerator> checksum_generator_;
+  bool checksum_finalized_;
+  bool perform_data_verification_;
+  uint32_t buffered_data_crc32c_checksum_;
+  bool buffered_data_with_checksum_;
+  Temperature temperature_;
+
+ public:
+  WritableFileWriter(
+      std::unique_ptr<FSWritableFile>&& file, const std::string& _file_name,
+      const FileOptions& options, SystemClock* clock = nullptr,
+      const std::shared_ptr<IOTracer>& io_tracer = nullptr,
+      Statistics* stats = nullptr,
+      const std::vector<std::shared_ptr<EventListener>>& listeners = {},
+      FileChecksumGenFactory* file_checksum_gen_factory = nullptr,
+      bool perform_data_verification = false,
+      bool buffered_data_with_checksum = false)
+      : file_name_(_file_name),
+        writable_file_(std::move(file), io_tracer, _file_name),
+        clock_(clock),
+        buf_(),
+        max_buffer_size_(options.writable_file_max_buffer_size),
+        filesize_(0),
+        flushed_size_(0),
+        next_write_offset_(0),
+        pending_sync_(false),
+        seen_error_(false),
+        last_sync_size_(0),
+        bytes_per_sync_(options.bytes_per_sync),
+        rate_limiter_(options.rate_limiter),
+        stats_(stats),
+        listeners_(),
+        checksum_generator_(nullptr),
+        checksum_finalized_(false),
+        perform_data_verification_(perform_data_verification),
+        buffered_data_crc32c_checksum_(0),
+        buffered_data_with_checksum_(buffered_data_with_checksum) {
+    temperature_ = options.temperature;
+    assert(!use_direct_io() || max_buffer_size_ > 0);
+    TEST_SYNC_POINT_CALLBACK("WritableFileWriter::WritableFileWriter:0",
+                             reinterpret_cast<void*>(max_buffer_size_));
+    buf_.Alignment(writable_file_->GetRequiredBufferAlignment());
+    buf_.AllocateNewBuffer(std::min((size_t)65536, max_buffer_size_));
+    std::for_each(listeners.begin(), listeners.end(),
+                  [this](const std::shared_ptr<EventListener>& e) {
+                    if (e->ShouldBeNotifiedOnFileIO()) {
+                      listeners_.emplace_back(e);
+                    }
+                  });
+    if (file_checksum_gen_factory != nullptr) {
+      FileChecksumGenContext checksum_gen_context;
+      checksum_gen_context.file_name = _file_name;
+      checksum_generator_ =
+          file_checksum_gen_factory->CreateFileChecksumGenerator(
+              checksum_gen_context);
+    }
+  }
+
+  static IOStatus Create(const std::shared_ptr<FileSystem>& fs,
+                         const std::string& fname, const FileOptions& file_opts,
+                         std::unique_ptr<WritableFileWriter>* writer,
+                         IODebugContext* dbg);
+  WritableFileWriter(const WritableFileWriter&) = delete;
+
+  WritableFileWriter& operator=(const WritableFileWriter&) = delete;
+
+  ~WritableFileWriter() {
+    auto s = Close();
+    s.PermitUncheckedError();
+  }
+
+  std::string file_name() const { return file_name_; }
+
+  // When this Append API is called, if the crc32c_checksum is not provided, we
+  // will calculate the checksum internally.
+  IOStatus Append(const Slice& data, uint32_t crc32c_checksum = 0,
+                  Env::IOPriority op_rate_limiter_priority = Env::IO_TOTAL);
+
+  IOStatus Pad(const size_t pad_bytes,
+               Env::IOPriority op_rate_limiter_priority = Env::IO_TOTAL);
+
+  IOStatus Flush(Env::IOPriority op_rate_limiter_priority = Env::IO_TOTAL);
+
+  IOStatus Close();
+
+  IOStatus Sync(bool use_fsync);
+
+  // Sync only the data that was already Flush()ed. Safe to call concurrently
+  // with Append() and Flush(). If !writable_file_->IsSyncThreadSafe(),
+  // returns NotSupported status.
+  IOStatus SyncWithoutFlush(bool use_fsync);
+
+  uint64_t GetFileSize() const {
+    return filesize_.load(std::memory_order_acquire);
+  }
+
+  // Returns the size of data flushed to the underlying `FSWritableFile`.
+  // Expected to match `writable_file()->GetFileSize()`.
+  // The return value can serve as a lower-bound for the amount of data synced
+  // by a future call to `SyncWithoutFlush()`.
+  uint64_t GetFlushedSize() const {
+    return flushed_size_.load(std::memory_order_acquire);
+  }
+
+  IOStatus InvalidateCache(size_t offset, size_t length) {
+    return writable_file_->InvalidateCache(offset, length);
+  }
+
+  FSWritableFile* writable_file() const { return writable_file_.get(); }
+
+  bool use_direct_io() { return writable_file_->use_direct_io(); }
+
+  bool BufferIsEmpty() { return buf_.CurrentSize() == 0; }
+
+  void TEST_SetFileChecksumGenerator(
+      FileChecksumGenerator* checksum_generator) {
+    checksum_generator_.reset(checksum_generator);
+  }
+
+  std::string GetFileChecksum();
+
+  const char* GetFileChecksumFuncName() const;
+
+  bool seen_error() const {
+    return seen_error_.load(std::memory_order_relaxed);
+  }
+  // For options of relaxed consistency, users might hope to continue
+  // operating on the file after an error happens.
+  void reset_seen_error() {
+    seen_error_.store(false, std::memory_order_relaxed);
+  }
+  void set_seen_error() { seen_error_.store(true, std::memory_order_relaxed); }
+
+  IOStatus AssertFalseAndGetStatusForPrevError() {
+    // This should only happen if SyncWithoutFlush() was called.
+    assert(sync_without_flush_called_);
+    return IOStatus::IOError("Writer has previous error.");
+  }
+
+ private:
+  // Decide the Rate Limiter priority.
+  static Env::IOPriority DecideRateLimiterPriority(
+      Env::IOPriority writable_file_io_priority,
+      Env::IOPriority op_rate_limiter_priority);
+
+  // Used when os buffering is OFF and we are writing
+  // DMA such as in Direct I/O mode
+  IOStatus WriteDirect(Env::IOPriority op_rate_limiter_priority);
+  IOStatus WriteDirectWithChecksum(Env::IOPriority op_rate_limiter_priority);
+  // Normal write.
+  IOStatus WriteBuffered(const char* data, size_t size,
+                         Env::IOPriority op_rate_limiter_priority);
+  IOStatus WriteBufferedWithChecksum(const char* data, size_t size,
+                                     Env::IOPriority op_rate_limiter_priority);
+  IOStatus RangeSync(uint64_t offset, uint64_t nbytes);
+  IOStatus SyncInternal(bool use_fsync);
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/fuzz/util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/fuzz/util.h
new file mode 100644
index 0000000000..97011823a3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/fuzz/util.h
@@ -0,0 +1,29 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#define CHECK_OK(expression)                       \
+  do {                                             \
+    auto status = (expression);                    \
+    if (!status.ok()) {                            \
+      std::cerr << status.ToString() << std::endl; \
+      abort();                                     \
+    }                                              \
+  } while (0)
+
+#define CHECK_EQ(a, b)                                                      \
+  if (a != b) {                                                             \
+    std::cerr << "(" << #a << "=" << a << ") != (" << #b << "=" << b << ")" \
+              << std::endl;                                                 \
+    abort();                                                                \
+  }
+
+#define CHECK_TRUE(cond)                                      \
+  if (!(cond)) {                                              \
+    std::cerr << "\"" << #cond << "\" is false" << std::endl; \
+    abort();                                                  \
+  }
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/advanced_cache.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/advanced_cache.h
new file mode 100644
index 0000000000..997a41499d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/advanced_cache.h
@@ -0,0 +1,623 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// APIs for customizing read caches in RocksDB.
+
+#pragma once
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <string>
+
+#include "rocksdb/cache.h"
+#include "rocksdb/memory_allocator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Logger;
+class SecondaryCacheResultHandle;
+class Statistics;
+
+// A Cache maps keys to objects resident in memory, tracks reference counts
+// on those key-object entries, and is able to remove unreferenced entries
+// whenever it wants. All operations are fully thread safe except as noted.
+// Inserted entries have a specified "charge" which is some quantity in
+// unspecified units, typically bytes of memory used. A Cache will typically
+// have a finite capacity in units of charge, and evict entries as needed
+// to stay at or below that capacity.
+//
+// NOTE: This API is for expert use only and is intended more for customizing
+// cache behavior than for calling into outside of RocksDB. It is subject to
+// change as RocksDB evolves, especially the RocksDB block cache. Overriding
+// CacheWrapper is the preferred way of customizing some operations on an
+// existing implementation.
+//
+// INTERNAL: See typed_cache.h for convenient wrappers on top of this API.
+// New virtual functions must also be added to CacheWrapper below.
+class Cache {
+ public:  // types hidden from API client
+  // Opaque handle to an entry stored in the cache.
+  struct Handle {};
+
+ public:  // types hidden from Cache implementation
+  // Pointer to cached object of unspecified type. (This type alias is
+  // provided for clarity, not really for type checking.)
+  using ObjectPtr = void*;
+
+  // Opaque object providing context (settings, etc.) to create objects
+  // for primary cache from saved (serialized) secondary cache entries.
+  struct CreateContext {};
+
+ public:  // type defs
+  // Depending on implementation, cache entries with higher priority levels
+  // could be less likely to get evicted than entries with lower priority
+  // levels. The "high" priority level applies to certain SST metablocks (e.g.
+  // index and filter blocks) if the option
+  // cache_index_and_filter_blocks_with_high_priority is set. The "low" priority
+  // level is used for other kinds of SST blocks (most importantly, data
+  // blocks), as well as the above metablocks in case
+  // cache_index_and_filter_blocks_with_high_priority is
+  // not set. The "bottom" priority level is for BlobDB's blob values.
+  enum class Priority { HIGH, LOW, BOTTOM };
+
+  // A set of callbacks to allow objects in the primary block cache to be
+  // be persisted in a secondary cache. The purpose of the secondary cache
+  // is to support other ways of caching the object, such as persistent or
+  // compressed data, that may require the object to be parsed and transformed
+  // in some way. Since the primary cache holds C++ objects and the secondary
+  // cache may only hold flat data that doesn't need relocation, these
+  // callbacks need to be provided by the user of the block
+  // cache to do the conversion.
+  // The CacheItemHelper is passed to Insert() and Lookup(). It has pointers
+  // to callback functions for size, saving and deletion of the
+  // object. The callbacks are defined in C-style in order to make them
+  // stateless and not add to the cache metadata size.
+  // Saving multiple std::function objects will take up 32 bytes per
+  // function, even if its not bound to an object and does no capture.
+  //
+  // All the callbacks are C-style function pointers in order to simplify
+  // lifecycle management. Objects in the cache can outlive the parent DB,
+  // so anything required for these operations should be contained in the
+  // object itself.
+  //
+  // The SizeCallback takes a pointer to the object and returns the size
+  // of the persistable data. It can be used by the secondary cache to allocate
+  // memory if needed.
+  //
+  // RocksDB callbacks are NOT exception-safe. A callback completing with an
+  // exception can lead to undefined behavior in RocksDB, including data loss,
+  // unreported corruption, deadlocks, and more.
+  using SizeCallback = size_t (*)(ObjectPtr obj);
+
+  // The SaveToCallback takes an object pointer and saves the persistable
+  // data into a buffer. The secondary cache may decide to not store it in a
+  // contiguous buffer, in which case this callback will be called multiple
+  // times with increasing offset
+  using SaveToCallback = Status (*)(ObjectPtr from_obj, size_t from_offset,
+                                    size_t length, char* out_buf);
+
+  // A function pointer type for destruction of a cache object. This will
+  // typically call the destructor for the appropriate type of the object.
+  // The Cache is responsible for copying and reclaiming space for the key,
+  // but objects are managed in part using this callback. Generally a DeleterFn
+  // can be nullptr if the ObjectPtr does not need destruction (e.g. nullptr or
+  // pointer into static data).
+  using DeleterFn = void (*)(ObjectPtr obj, MemoryAllocator* allocator);
+
+  // The CreateCallback is takes in a buffer from the NVM cache and constructs
+  // an object using it. The callback doesn't have ownership of the buffer and
+  // should copy the contents into its own buffer. The CreateContext* is
+  // provided by Lookup and may be used to follow DB- or CF-specific settings.
+  // In case of some error, non-OK is returned and the caller should ignore
+  // any result in out_obj. (The implementation must clean up after itself.)
+  using CreateCallback = Status (*)(const Slice& data, CreateContext* context,
+                                    MemoryAllocator* allocator,
+                                    ObjectPtr* out_obj, size_t* out_charge);
+
+  // A struct with pointers to helper functions for spilling items from the
+  // cache into the secondary cache. May be extended in the future. An
+  // instance of this struct is expected to outlive the cache.
+  struct CacheItemHelper {
+    // Function for deleting an object on its removal from the Cache.
+    // nullptr is only for entries that require no destruction, such as
+    // "placeholder" cache entries with nullptr object.
+    DeleterFn del_cb;  // (<- Most performance critical)
+    // Next three are used for persisting values as described above.
+    // If any is nullptr, then all three should be nullptr and persisting the
+    // entry to/from secondary cache is not supported.
+    SizeCallback size_cb;
+    SaveToCallback saveto_cb;
+    CreateCallback create_cb;
+    // Classification of the entry for monitoring purposes in block cache.
+    CacheEntryRole role;
+    // Another CacheItemHelper (or this one) without secondary cache support.
+    // This is provided so that items promoted from secondary cache into
+    // primary cache without removal from the secondary cache can be prevented
+    // from attempting re-insertion into secondary cache (for efficiency).
+    const CacheItemHelper* without_secondary_compat;
+
+    CacheItemHelper() : CacheItemHelper(CacheEntryRole::kMisc) {}
+
+    // For helpers without SecondaryCache support
+    explicit CacheItemHelper(CacheEntryRole _role, DeleterFn _del_cb = nullptr)
+        : CacheItemHelper(_role, _del_cb, nullptr, nullptr, nullptr, this) {}
+
+    // For helpers with SecondaryCache support
+    explicit CacheItemHelper(CacheEntryRole _role, DeleterFn _del_cb,
+                             SizeCallback _size_cb, SaveToCallback _saveto_cb,
+                             CreateCallback _create_cb,
+                             const CacheItemHelper* _without_secondary_compat)
+        : del_cb(_del_cb),
+          size_cb(_size_cb),
+          saveto_cb(_saveto_cb),
+          create_cb(_create_cb),
+          role(_role),
+          without_secondary_compat(_without_secondary_compat) {
+      // Either all three secondary cache callbacks are non-nullptr or
+      // all three are nullptr
+      assert((size_cb != nullptr) == (saveto_cb != nullptr));
+      assert((size_cb != nullptr) == (create_cb != nullptr));
+      // without_secondary_compat points to equivalent but without
+      // secondary support
+      assert(role == without_secondary_compat->role);
+      assert(del_cb == without_secondary_compat->del_cb);
+      assert(!without_secondary_compat->IsSecondaryCacheCompatible());
+    }
+    inline bool IsSecondaryCacheCompatible() const {
+      return size_cb != nullptr;
+    }
+  };
+
+ public:  // ctor/dtor/create
+  Cache(std::shared_ptr<MemoryAllocator> allocator = nullptr)
+      : memory_allocator_(std::move(allocator)) {}
+  // No copying allowed
+  Cache(const Cache&) = delete;
+  Cache& operator=(const Cache&) = delete;
+
+  // Destroys all remaining entries by calling the associated "deleter"
+  virtual ~Cache() {}
+
+  // Creates a new Cache based on the input value string and returns the result.
+  // Currently, this method can be used to create LRUCaches only
+  // @param config_options
+  // @param value  The value might be:
+  //   - an old-style cache ("1M") -- equivalent to NewLRUCache(1024*102(
+  //   - Name-value option pairs -- "capacity=1M; num_shard_bits=4;
+  //     For the LRUCache, the values are defined in LRUCacheOptions.
+  // @param result The new Cache object
+  // @return OK if the cache was successfully created
+  // @return NotFound if an invalid name was specified in the value
+  // @return InvalidArgument if either the options were not valid
+  static Status CreateFromString(const ConfigOptions& config_options,
+                                 const std::string& value,
+                                 std::shared_ptr<Cache>* result);
+
+ public:  // functions
+  // The type of the Cache
+  virtual const char* Name() const = 0;
+
+  // The Insert and Lookup APIs below are intended to allow cached objects
+  // to be demoted/promoted between the primary block cache and a secondary
+  // cache. The secondary cache could be a non-volatile cache, and will
+  // likely store the object in a different representation. They rely on a
+  // per object CacheItemHelper to do the conversions.
+  // The secondary cache may persist across process and system restarts,
+  // and may even be moved between hosts. Therefore, the cache key must
+  // be repeatable across restarts/reboots, and globally unique if
+  // multiple DBs share the same cache and the set of DBs can change
+  // over time.
+
+  // Insert a mapping from key->object into the cache and assign it
+  // the specified charge against the total cache capacity. If
+  // strict_capacity_limit is true and cache reaches its full capacity,
+  // return Status::MemoryLimit. `obj` must be non-nullptr if compatible
+  // with secondary cache (helper->size_cb != nullptr), because Value() ==
+  // nullptr is reserved for indicating some secondary cache failure cases.
+  // On success, returns OK and takes ownership of `obj`, eventually deleting
+  // it with helper->del_cb. On non-OK return, the caller maintains ownership
+  // of `obj` so will often need to delete it in such cases.
+  //
+  // The helper argument is saved by the cache and will be used when the
+  // inserted object is evicted or considered for promotion to the secondary
+  // cache. Promotion to secondary cache is only enabled if helper->size_cb
+  // != nullptr. The helper must outlive the cache. Callers may use
+  // &kNoopCacheItemHelper as a trivial helper (no deleter for the object,
+  // no secondary cache). `helper` must not be nullptr (efficiency).
+  //
+  // If `handle` is not nullptr and return status is OK, `handle` is set
+  // to a Handle* for the entry. The caller must call this->Release(handle)
+  // when the returned entry is no longer needed. If `handle` is nullptr, it is
+  // as if Release is called immediately after Insert.
+  //
+  // Regardless of whether the item was inserted into the cache,
+  // it will attempt to insert it into the secondary cache if one is
+  // configured, and the helper supports it.
+  // The cache implementation must support a secondary cache, otherwise
+  // the item is only inserted into the primary cache. It may
+  // defer the insertion to the secondary cache as it sees fit.
+  //
+  // When the inserted entry is no longer needed, it will be destroyed using
+  // helper->del_cb (if non-nullptr).
+  virtual Status Insert(const Slice& key, ObjectPtr obj,
+                        const CacheItemHelper* helper, size_t charge,
+                        Handle** handle = nullptr,
+                        Priority priority = Priority::LOW) = 0;
+
+  // Similar to Insert, but used for creating cache entries that cannot
+  // be found with Lookup, such as for memory charging purposes. The
+  // key is needed for cache sharding purposes.
+  // * If allow_uncharged==true or strict_capacity_limit=false, the operation
+  //   always succeeds and returns a valid Handle.
+  // * If strict_capacity_limit=true and the requested charge cannot be freed
+  //   up in the cache, then
+  //   * If allow_uncharged==true, it's created anyway (GetCharge() == 0).
+  //   * If allow_uncharged==false, returns nullptr to indicate failure.
+  virtual Handle* CreateStandalone(const Slice& key, ObjectPtr obj,
+                                   const CacheItemHelper* helper, size_t charge,
+                                   bool allow_uncharged) = 0;
+
+  // Lookup the key, returning nullptr if not found. If found, returns
+  // a handle to the mapping that must eventually be passed to Release().
+  //
+  // If a non-nullptr helper argument is provided with a non-nullptr
+  // create_cb, and a secondary cache is configured, then the secondary
+  // cache is also queried if lookup in the primary cache fails. If found
+  // in secondary cache, the provided create_db and create_context are
+  // used to promote the entry to an object in the primary cache.
+  // In that case, the helper may be saved and used later when the object
+  // is evicted, so as usual, the pointed-to helper must outlive the cache.
+  virtual Handle* Lookup(const Slice& key,
+                         const CacheItemHelper* helper = nullptr,
+                         CreateContext* create_context = nullptr,
+                         Priority priority = Priority::LOW,
+                         Statistics* stats = nullptr) = 0;
+
+  // Convenience wrapper when secondary cache not supported
+  inline Handle* BasicLookup(const Slice& key, Statistics* stats) {
+    return Lookup(key, nullptr, nullptr, Priority::LOW, stats);
+  }
+
+  // Increments the reference count for the handle if it refers to an entry in
+  // the cache. Returns true if refcount was incremented; otherwise, returns
+  // false.
+  // REQUIRES: handle must have been returned by a method on *this.
+  virtual bool Ref(Handle* handle) = 0;
+
+  /**
+   * Release a mapping returned by a previous Lookup(). A released entry might
+   * still remain in cache in case it is later looked up by others. If
+   * erase_if_last_ref is set then it also erases it from the cache if there is
+   * no other reference to  it. Erasing it should call the deleter function that
+   * was provided when the entry was inserted.
+   *
+   * Returns true if the entry was also erased.
+   */
+  // REQUIRES: handle must not have been released yet.
+  // REQUIRES: handle must have been returned by a method on *this.
+  virtual bool Release(Handle* handle, bool erase_if_last_ref = false) = 0;
+
+  // Return the object assiciated with a handle returned by a successful
+  // Lookup(). For historical reasons, this is also known at the "value"
+  // associated with the key.
+  // REQUIRES: handle must not have been released yet.
+  // REQUIRES: handle must have been returned by a method on *this.
+  virtual ObjectPtr Value(Handle* handle) = 0;
+
+  // If the cache contains the entry for the key, erase it.  Note that the
+  // underlying entry will be kept around until all existing handles
+  // to it have been released.
+  virtual void Erase(const Slice& key) = 0;
+  // Return a new numeric id.  May be used by multiple clients who are
+  // sharding the same cache to partition the key space.  Typically the
+  // client will allocate a new id at startup and prepend the id to
+  // its cache keys.
+  virtual uint64_t NewId() = 0;
+
+  // sets the maximum configured capacity of the cache. When the new
+  // capacity is less than the old capacity and the existing usage is
+  // greater than new capacity, the implementation will do its best job to
+  // purge the released entries from the cache in order to lower the usage
+  virtual void SetCapacity(size_t capacity) = 0;
+
+  // Set whether to return error on insertion when cache reaches its full
+  // capacity.
+  virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0;
+
+  // Get the flag whether to return error on insertion when cache reaches its
+  // full capacity.
+  virtual bool HasStrictCapacityLimit() const = 0;
+
+  // Returns the maximum configured capacity of the cache
+  virtual size_t GetCapacity() const = 0;
+
+  // Returns the memory size for the entries residing in the cache.
+  virtual size_t GetUsage() const = 0;
+
+  // Returns the number of entries currently tracked in the table. SIZE_MAX
+  // means "not supported." This is used for inspecting the load factor, along
+  // with GetTableAddressCount().
+  virtual size_t GetOccupancyCount() const { return SIZE_MAX; }
+
+  // Returns the number of ways the hash function is divided for addressing
+  // entries. Zero means "not supported." This is used for inspecting the load
+  // factor, along with GetOccupancyCount().
+  virtual size_t GetTableAddressCount() const { return 0; }
+
+  // Returns the memory size for a specific entry in the cache.
+  virtual size_t GetUsage(Handle* handle) const = 0;
+
+  // Returns the memory size for the entries in use by the system
+  virtual size_t GetPinnedUsage() const = 0;
+
+  // Returns the charge for the specific entry in the cache.
+  virtual size_t GetCharge(Handle* handle) const = 0;
+
+  // Returns the helper for the specified entry.
+  virtual const CacheItemHelper* GetCacheItemHelper(Handle* handle) const = 0;
+
+  // Call this on shutdown if you want to speed it up. Cache will disown
+  // any underlying data and will not free it on delete. This call will leak
+  // memory - call this only if you're shutting down the process.
+  // Any attempts of using cache after this call will fail terribly.
+  // Always delete the DB object before calling this method!
+  virtual void DisownData() {
+    // default implementation is noop
+  }
+
+  struct ApplyToAllEntriesOptions {
+    // If the Cache uses locks, setting `average_entries_per_lock` to
+    // a higher value suggests iterating over more entries each time a lock
+    // is acquired, likely reducing the time for ApplyToAllEntries but
+    // increasing latency for concurrent users of the Cache. Setting
+    // `average_entries_per_lock` to a smaller value could be helpful if
+    // callback is relatively expensive, such as using large data structures.
+    size_t average_entries_per_lock = 256;
+  };
+
+  // Apply a callback to all entries in the cache. The Cache must ensure
+  // thread safety but does not guarantee that a consistent snapshot of all
+  // entries is iterated over if other threads are operating on the Cache
+  // also.
+  virtual void ApplyToAllEntries(
+      const std::function<void(const Slice& key, ObjectPtr obj, size_t charge,
+                               const CacheItemHelper* helper)>& callback,
+      const ApplyToAllEntriesOptions& opts) = 0;
+
+  // Remove all entries.
+  // Prerequisite: no entry is referenced.
+  virtual void EraseUnRefEntries() = 0;
+
+  virtual std::string GetPrintableOptions() const { return ""; }
+
+  // Check for any warnings or errors in the operation of the cache and
+  // report them to the logger. This is intended only to be called
+  // periodically so does not need to be very efficient. (Obscure calling
+  // conventions for Logger inherited from env.h)
+  virtual void ReportProblems(
+      const std::shared_ptr<Logger>& /*info_log*/) const {}
+
+  MemoryAllocator* memory_allocator() const { return memory_allocator_.get(); }
+
+  // See ShardedCacheOptions::hash_seed
+  virtual uint32_t GetHashSeed() const { return 0; }
+
+  // EXPERIMENTAL
+  // The following APIs are experimental and might change in the future.
+
+  // Release a mapping returned by a previous Lookup(). The "useful"
+  // parameter specifies whether the data was actually used or not,
+  // which may be used by the cache implementation to decide whether
+  // to consider it as a hit for retention purposes. As noted elsewhere,
+  // "pending" handles require Wait()/WaitAll() before Release().
+  virtual bool Release(Handle* handle, bool /*useful*/,
+                       bool erase_if_last_ref) {
+    return Release(handle, erase_if_last_ref);
+  }
+
+  // A temporary handle structure for managing async lookups, which callers
+  // of AsyncLookup() can allocate on the call stack for efficiency.
+  // An AsyncLookupHandle should not be used concurrently across threads.
+  struct AsyncLookupHandle {
+    // Inputs, populated by caller:
+    // NOTE: at least in case of stacked secondary caches, the underlying
+    // key buffer must last until handle is completely waited on.
+    Slice key;
+    const CacheItemHelper* helper = nullptr;
+    CreateContext* create_context = nullptr;
+    Priority priority = Priority::LOW;
+    Statistics* stats = nullptr;
+
+    AsyncLookupHandle() {}
+    AsyncLookupHandle(const Slice& _key, const CacheItemHelper* _helper,
+                      CreateContext* _create_context,
+                      Priority _priority = Priority::LOW,
+                      Statistics* _stats = nullptr)
+        : key(_key),
+          helper(_helper),
+          create_context(_create_context),
+          priority(_priority),
+          stats(_stats) {}
+
+    // AsyncLookupHandle should only be destroyed when no longer pending
+    ~AsyncLookupHandle() { assert(!IsPending()); }
+
+    // No copies or moves (StartAsyncLookup may save a pointer to this)
+    AsyncLookupHandle(const AsyncLookupHandle&) = delete;
+    AsyncLookupHandle operator=(const AsyncLookupHandle&) = delete;
+    AsyncLookupHandle(AsyncLookupHandle&&) = delete;
+    AsyncLookupHandle operator=(AsyncLookupHandle&&) = delete;
+
+    // Determines if the handle returned by Lookup() can give a value without
+    // blocking, though Wait()/WaitAll() might be required to publish it to
+    // Value(). See secondary cache compatible Lookup() above for details.
+    // This call is not thread safe on "pending" handles.
+    // WART/TODO with stacked secondaries: might indicate ready when one
+    // result is ready (a miss) but the next lookup will block.
+    bool IsReady();
+
+    // Returns true if Wait/WaitAll is required before calling Result().
+    bool IsPending();
+
+    // Returns a Lookup()-like result if this AsyncHandle is not pending.
+    // (Undefined behavior on a pending AsyncHandle.) Like Lookup(), the
+    // caller is responsible for eventually Release()ing a non-nullptr
+    // Handle* result.
+    Handle* Result();
+
+    // Implementation details, for RocksDB internal use only
+    Handle* result_handle = nullptr;
+    SecondaryCacheResultHandle* pending_handle = nullptr;
+    SecondaryCache* pending_cache = nullptr;
+    bool found_dummy_entry = false;
+    bool kept_in_sec_cache = false;
+  };
+
+  // Starts a potentially asynchronous Lookup(), based on the populated
+  // "input" fields of the async_handle. The caller is responsible for
+  // keeping the AsyncLookupHandle and the key it references alive through
+  // WaitAll(), and the AsyncLookupHandle alive through
+  // AsyncLookupHandle::Result(). WaitAll() can only be skipped if
+  // AsyncLookupHandle::IsPending() is already false after StartAsyncLookup.
+  // Calling AsyncLookupHandle::Result() is essentially required so that
+  // Release() can be called on non-nullptr Handle result. Wait() is a
+  // concise version of WaitAll()+Result() on a single handle. After an
+  // AsyncLookupHandle has completed this cycle, its input fields can be
+  // updated and re-used for another StartAsyncLookup.
+  //
+  // Handle is thread-safe while AsyncLookupHandle is not thread-safe.
+  //
+  // Default implementation is appropriate for Caches without
+  // true asynchronous support: defers to synchronous Lookup().
+  // (AsyncLookupHandles will only get into the "pending" state with
+  // SecondaryCache configured.)
+  virtual void StartAsyncLookup(AsyncLookupHandle& async_handle);
+
+  // A convenient wrapper around WaitAll() and AsyncLookupHandle::Result()
+  // for a single async handle. See StartAsyncLookup().
+  Handle* Wait(AsyncLookupHandle& async_handle);
+
+  // Wait for an array of async handles to get results, so that none are left
+  // in the "pending" state. Not thread safe. See StartAsyncLookup().
+  // Default implementation is appropriate for Caches without true
+  // asynchronous support: asserts that all handles are not pending (or not
+  // expected to be handled by this cache, in case of wrapped/stacked
+  // WaitAlls()).
+  virtual void WaitAll(AsyncLookupHandle* /*async_handles*/, size_t /*count*/);
+
+  // For a function called on cache entries about to be evicted. The function
+  // returns `true` if it has taken ownership of the Value (object), or
+  // `false` if the cache should destroy it as usual. Regardless, Ref() and
+  // Release() cannot be called on this Handle that is poised for eviction.
+  using EvictionCallback = std::function<bool(const Slice& key, Handle* h)>;
+  // Sets an eviction callback for this Cache. Not thread safe and only
+  // supports being set once, so should only be used during initialization
+  // or destruction, guaranteed before or after any thread-shared operations.
+  void SetEvictionCallback(EvictionCallback&& fn);
+
+ protected:
+  std::shared_ptr<MemoryAllocator> memory_allocator_;
+  EvictionCallback eviction_callback_;
+};
+
+// A wrapper around Cache that can easily be extended with instrumentation,
+// etc.
+class CacheWrapper : public Cache {
+ public:
+  explicit CacheWrapper(std::shared_ptr<Cache> target)
+      : target_(std::move(target)) {}
+
+  // Only function that derived class must provide
+  // const char* Name() const override { ... }
+
+  Status Insert(const Slice& key, ObjectPtr value,
+                const CacheItemHelper* helper, size_t charge,
+                Handle** handle = nullptr,
+                Priority priority = Priority::LOW) override {
+    return target_->Insert(key, value, helper, charge, handle, priority);
+  }
+
+  Handle* CreateStandalone(const Slice& key, ObjectPtr obj,
+                           const CacheItemHelper* helper, size_t charge,
+                           bool allow_uncharged) override {
+    return target_->CreateStandalone(key, obj, helper, charge, allow_uncharged);
+  }
+
+  Handle* Lookup(const Slice& key, const CacheItemHelper* helper,
+                 CreateContext* create_context,
+                 Priority priority = Priority::LOW,
+                 Statistics* stats = nullptr) override {
+    return target_->Lookup(key, helper, create_context, priority, stats);
+  }
+
+  bool Ref(Handle* handle) override { return target_->Ref(handle); }
+
+  using Cache::Release;
+  bool Release(Handle* handle, bool erase_if_last_ref = false) override {
+    return target_->Release(handle, erase_if_last_ref);
+  }
+
+  ObjectPtr Value(Handle* handle) override { return target_->Value(handle); }
+
+  void Erase(const Slice& key) override { target_->Erase(key); }
+  uint64_t NewId() override { return target_->NewId(); }
+
+  void SetCapacity(size_t capacity) override { target_->SetCapacity(capacity); }
+
+  void SetStrictCapacityLimit(bool strict_capacity_limit) override {
+    target_->SetStrictCapacityLimit(strict_capacity_limit);
+  }
+
+  bool HasStrictCapacityLimit() const override {
+    return target_->HasStrictCapacityLimit();
+  }
+
+  size_t GetCapacity() const override { return target_->GetCapacity(); }
+
+  size_t GetUsage() const override { return target_->GetUsage(); }
+
+  size_t GetUsage(Handle* handle) const override {
+    return target_->GetUsage(handle);
+  }
+
+  size_t GetPinnedUsage() const override { return target_->GetPinnedUsage(); }
+
+  size_t GetCharge(Handle* handle) const override {
+    return target_->GetCharge(handle);
+  }
+
+  const CacheItemHelper* GetCacheItemHelper(Handle* handle) const override {
+    return target_->GetCacheItemHelper(handle);
+  }
+
+  void ApplyToAllEntries(
+      const std::function<void(const Slice& key, ObjectPtr value, size_t charge,
+                               const CacheItemHelper* helper)>& callback,
+      const ApplyToAllEntriesOptions& opts) override {
+    target_->ApplyToAllEntries(callback, opts);
+  }
+
+  void EraseUnRefEntries() override { target_->EraseUnRefEntries(); }
+
+  void StartAsyncLookup(AsyncLookupHandle& async_handle) override {
+    target_->StartAsyncLookup(async_handle);
+  }
+
+  void WaitAll(AsyncLookupHandle* async_handles, size_t count) override {
+    target_->WaitAll(async_handles, count);
+  }
+
+ protected:
+  std::shared_ptr<Cache> target_;
+};
+
+// Useful for cache entries requiring no clean-up, such as for cache
+// reservations
+extern const Cache::CacheItemHelper kNoopCacheItemHelper;
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/advanced_options.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/advanced_options.h
new file mode 100644
index 0000000000..817a720282
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/advanced_options.h
@@ -0,0 +1,1177 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <memory>
+
+#include "rocksdb/cache.h"
+#include "rocksdb/compression_type.h"
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/universal_compaction.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+class SliceTransform;
+class TablePropertiesCollectorFactory;
+class TableFactory;
+struct Options;
+
+enum CompactionStyle : char {
+  // level based compaction style
+  kCompactionStyleLevel = 0x0,
+  // Universal compaction style
+  kCompactionStyleUniversal = 0x1,
+  // FIFO compaction style
+  kCompactionStyleFIFO = 0x2,
+  // Disable background compaction. Compaction jobs are submitted
+  // via CompactFiles().
+  kCompactionStyleNone = 0x3,
+};
+
+// In Level-based compaction, it Determines which file from a level to be
+// picked to merge to the next level. We suggest people try
+// kMinOverlappingRatio first when you tune your database.
+enum CompactionPri : char {
+  // Slightly prioritize larger files by size compensated by #deletes
+  kByCompensatedSize = 0x0,
+  // First compact files whose data's latest update time is oldest.
+  // Try this if you only update some hot keys in small ranges.
+  kOldestLargestSeqFirst = 0x1,
+  // First compact files whose range hasn't been compacted to the next level
+  // for the longest. If your updates are random across the key space,
+  // write amplification is slightly better with this option.
+  kOldestSmallestSeqFirst = 0x2,
+  // First compact files whose ratio between overlapping size in next level
+  // and its size is the smallest. It in many cases can optimize write
+  // amplification.
+  kMinOverlappingRatio = 0x3,
+  // Keeps a cursor(s) of the successor of the file (key range) was/were
+  // compacted before, and always picks the next files (key range) in that
+  // level. The file picking process will cycle through all the files in a
+  // round-robin manner.
+  kRoundRobin = 0x4,
+};
+
+// Compression options for different compression algorithms like Zlib
+struct CompressionOptions {
+  // ==> BEGIN options that can be set by deprecated configuration syntax, <==
+  // ==> e.g. compression_opts=5:6:7:8:9:10:true:11:false                  <==
+  // ==> Please use compression_opts={level=6;strategy=7;} form instead.   <==
+
+  // RocksDB's generic default compression level. Internally it'll be translated
+  // to the default compression level specific to the library being used (see
+  // comment above `ColumnFamilyOptions::compression`).
+  //
+  // The default value is the max 16-bit int as it'll be written out in OPTIONS
+  // file, which should be portable.
+  static constexpr int kDefaultCompressionLevel = 32767;
+
+  // zlib only: windowBits parameter. See https://www.zlib.net/manual.html
+  int window_bits = -14;
+
+  // Compression "level" applicable to zstd, zlib, LZ4. Except for
+  // kDefaultCompressionLevel (see above), the meaning of each value depends
+  // on the compression algorithm.
+  int level = kDefaultCompressionLevel;
+
+  // zlib only: strategy parameter. See https://www.zlib.net/manual.html
+  int strategy = 0;
+
+  // Maximum size of dictionaries used to prime the compression library.
+  // Enabling dictionary can improve compression ratios when there are
+  // repetitions across data blocks.
+  //
+  // The dictionary is created by sampling the SST file data. If
+  // `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's
+  // dictionary generator (see comments for option `use_zstd_dict_trainer` for
+  // detail on dictionary generator). If `zstd_max_train_bytes` is zero, the
+  // random samples are used directly as the dictionary.
+  //
+  // When compression dictionary is disabled, we compress and write each block
+  // before buffering data for the next one. When compression dictionary is
+  // enabled, we buffer SST file data in-memory so we can sample it, as data
+  // can only be compressed and written after the dictionary has been finalized.
+  //
+  // The amount of data buffered can be limited by `max_dict_buffer_bytes`. This
+  // buffered memory is charged to the block cache when there is a block cache.
+  // If block cache insertion fails with `Status::MemoryLimit` (i.e., it is
+  // full), we finalize the dictionary with whatever data we have and then stop
+  // buffering.
+  uint32_t max_dict_bytes = 0;
+
+  // Maximum size of training data passed to zstd's dictionary trainer. Using
+  // zstd's dictionary trainer can achieve even better compression ratio
+  // improvements than using `max_dict_bytes` alone.
+  //
+  // The training data will be used to generate a dictionary of max_dict_bytes.
+  uint32_t zstd_max_train_bytes = 0;
+
+  // Number of threads for parallel compression.
+  // Parallel compression is enabled only if threads > 1.
+  // THE FEATURE IS STILL EXPERIMENTAL
+  //
+  // This option is valid only when BlockBasedTable is used.
+  //
+  // When parallel compression is enabled, SST size file sizes might be
+  // more inflated compared to the target size, because more data of unknown
+  // compressed size is in flight when compression is parallelized. To be
+  // reasonably accurate, this inflation is also estimated by using historical
+  // compression ratio and current bytes inflight.
+  uint32_t parallel_threads = 1;
+
+  // When the compression options are set by the user, it will be set to "true".
+  // For bottommost_compression_opts, to enable it, user must set enabled=true.
+  // Otherwise, bottommost compression will use compression_opts as default
+  // compression options.
+  //
+  // For compression_opts, if compression_opts.enabled=false, it is still
+  // used as compression options for compression process.
+  bool enabled = false;
+
+  // Limit on data buffering when gathering samples to build a dictionary. Zero
+  // means no limit. When dictionary is disabled (`max_dict_bytes == 0`),
+  // enabling this limit (`max_dict_buffer_bytes != 0`) has no effect.
+  //
+  // In compaction, the buffering is limited to the target file size (see
+  // `target_file_size_base` and `target_file_size_multiplier`) even if this
+  // setting permits more buffering. Since we cannot determine where the file
+  // should be cut until data blocks are compressed with dictionary, buffering
+  // more than the target file size could lead to selecting samples that belong
+  // to a later output SST.
+  //
+  // Limiting too strictly may harm dictionary effectiveness since it forces
+  // RocksDB to pick samples from the initial portion of the output SST, which
+  // may not be representative of the whole file. Configuring this limit below
+  // `zstd_max_train_bytes` (when enabled) can restrict how many samples we can
+  // pass to the dictionary trainer. Configuring it below `max_dict_bytes` can
+  // restrict the size of the final dictionary.
+  uint64_t max_dict_buffer_bytes = 0;
+
+  // Use zstd trainer to generate dictionaries. When this option is set to true,
+  // zstd_max_train_bytes of training data sampled from max_dict_buffer_bytes
+  // buffered data will be passed to zstd dictionary trainer to generate a
+  // dictionary of size max_dict_bytes.
+  //
+  // When this option is false, zstd's API ZDICT_finalizeDictionary() will be
+  // called to generate dictionaries. zstd_max_train_bytes of training sampled
+  // data will be passed to this API. Using this API should save CPU time on
+  // dictionary training, but the compression ratio may not be as good as using
+  // a dictionary trainer.
+  bool use_zstd_dict_trainer = true;
+
+  // ===> END options that can be set by deprecated configuration syntax <===
+  // ===> Use compression_opts={level=6;strategy=7;} form for below opts <===
+
+  // Essentially specifies a minimum acceptable compression ratio. A block is
+  // stored uncompressed if the compressed block does not achieve this ratio,
+  // because the downstream cost of decompression is not considered worth such
+  // a small savings (if any).
+  // However, the ratio is specified in a way that is efficient for checking.
+  // An integer from 1 to 1024 indicates the maximum allowable compressed bytes
+  // per 1KB of input, so the minimum acceptable ratio is 1024.0 / this value.
+  // For example, for a minimum ratio of 1.5:1, set to 683. See SetMinRatio().
+  // Default: abandon use of compression for a specific block or entry if
+  // compressed by less than 12.5% (minimum ratio of 1.143:1).
+  int max_compressed_bytes_per_kb = 1024 * 7 / 8;
+
+  // A convenience function for setting max_compressed_bytes_per_kb based on a
+  // minimum acceptable compression ratio (uncompressed size over compressed
+  // size).
+  void SetMinRatio(double min_ratio) {
+    max_compressed_bytes_per_kb = static_cast<int>(1024.0 / min_ratio + 0.5);
+  }
+};
+
+// Temperature of a file. Used to pass to FileSystem for a different
+// placement and/or coding.
+// Reserve some numbers in the middle, in case we need to insert new tier
+// there.
+enum class Temperature : uint8_t {
+  kUnknown = 0,
+  kHot = 0x04,
+  kWarm = 0x08,
+  kCold = 0x0C,
+  kLastTemperature,
+};
+
+struct FileTemperatureAge {
+  Temperature temperature = Temperature::kUnknown;
+  uint64_t age = 0;
+};
+
+struct CompactionOptionsFIFO {
+  // once the total sum of table files reaches this, we will delete the oldest
+  // table file
+  // Default: 1GB
+  uint64_t max_table_files_size;
+
+  // If true, try to do compaction to compact smaller files into larger ones.
+  // Minimum files to compact follows options.level0_file_num_compaction_trigger
+  // and compaction won't trigger if average compact bytes per del file is
+  // larger than options.write_buffer_size. This is to protect large files
+  // from being compacted again.
+  // Default: false;
+  bool allow_compaction = false;
+
+  // DEPRECATED
+  // When not 0, if the data in the file is older than this threshold, RocksDB
+  // will soon move the file to warm temperature.
+  uint64_t age_for_warm = 0;
+
+  // EXPERIMENTAL
+  // Age (in seconds) threshold for different file temperatures.
+  // When not empty, each element specifies an age threshold `age` and a
+  // temperature such that if all the data in a file is older than `age`,
+  // RocksDB will compact the file to the specified `temperature`.
+  //
+  // Note:
+  // - Flushed files will always have temperature kUnknown.
+  // - Compaction output files will have temperature kUnknown by default, so
+  //   only temperatures other than kUnknown needs to be specified.
+  // - The elements should be in increasing order with respect to `age` field.
+  //
+  // Dynamically changeable through SetOptions() API, e.g.,
+  //   SetOptions("compaction_options_fifo",
+  //   "{file_temperature_age_thresholds={
+  //    {age=10;temperature=kWarm}:{age=20;temperature=kCold}}}")
+  // In this example, all files that are at least 20 seconds old will be
+  // compacted and output files will have temperature kCold. All files that are
+  // at least 10 seconds old but younger than 20 seconds will be compacted to
+  // files with temperature kWarm.
+  //
+  // Default: empty
+  std::vector<FileTemperatureAge> file_temperature_age_thresholds{};
+
+  CompactionOptionsFIFO() : max_table_files_size(1 * 1024 * 1024 * 1024) {}
+  CompactionOptionsFIFO(uint64_t _max_table_files_size, bool _allow_compaction)
+      : max_table_files_size(_max_table_files_size),
+        allow_compaction(_allow_compaction) {}
+};
+
+// The control option of how the cache tiers will be used. Currently rocksdb
+// support block cache (volatile tier), secondary cache (non-volatile tier).
+// In the future, we may add more caching layers.
+enum class CacheTier : uint8_t {
+  kVolatileTier = 0,
+  kNonVolatileBlockTier = 0x01,
+};
+
+enum UpdateStatus {     // Return status For inplace update callback
+  UPDATE_FAILED = 0,    // Nothing to update
+  UPDATED_INPLACE = 1,  // Value updated inplace
+  UPDATED = 2,          // No inplace update. Merged value set
+};
+
+enum class PrepopulateBlobCache : uint8_t {
+  kDisable = 0x0,    // Disable prepopulate blob cache
+  kFlushOnly = 0x1,  // Prepopulate blobs during flush only
+};
+
+struct AdvancedColumnFamilyOptions {
+  // The maximum number of write buffers that are built up in memory.
+  // The default and the minimum number is 2, so that when 1 write buffer
+  // is being flushed to storage, new writes can continue to the other
+  // write buffer.
+  // If max_write_buffer_number > 3, writing will be slowed down to
+  // options.delayed_write_rate if we are writing to the last write buffer
+  // allowed.
+  //
+  // Default: 2
+  //
+  // Dynamically changeable through SetOptions() API
+  int max_write_buffer_number = 2;
+
+  // The minimum number of write buffers that will be merged together
+  // before writing to storage.  If set to 1, then
+  // all write buffers are flushed to L0 as individual files and this increases
+  // read amplification because a get request has to check in all of these
+  // files. Also, an in-memory merge may result in writing lesser
+  // data to storage if there are duplicate records in each of these
+  // individual write buffers.
+  // If atomic flush is enabled (options.atomic_flush == true), then this
+  // option will be sanitized to 1.
+  // Default: 1
+  int min_write_buffer_number_to_merge = 1;
+
+  // DEPRECATED
+  // The total maximum number of write buffers to maintain in memory including
+  // copies of buffers that have already been flushed.  Unlike
+  // max_write_buffer_number, this parameter does not affect flushing.
+  // This parameter is being replaced by max_write_buffer_size_to_maintain.
+  // If both parameters are set to non-zero values, this parameter will be
+  // ignored.
+  int max_write_buffer_number_to_maintain = 0;
+
+  // The target number of write history bytes to hold in memory. Write history
+  // comprises the latest write buffers (memtables). To reach the target, write
+  // buffers that were most recently flushed to SST files may be retained in
+  // memory.
+  //
+  // This controls the target amount of write history that will be available
+  // in memory for conflict checking when Transactions are used.
+  //
+  // This target may be undershot when the CF first opens and has not recovered
+  // or received enough writes to reach the target. After reaching the target
+  // once, it is guaranteed to never undershoot again. That guarantee is
+  // implemented by retaining flushed write buffers in-memory until the oldest
+  // one can be trimmed without dropping below the target.
+  //
+  // Examples with `max_write_buffer_size_to_maintain` set to 32MB:
+  //
+  // - One mutable memtable of 64MB, one unflushed immutable memtable of 64MB,
+  //   and zero flushed immutable memtables. Nothing trimmable exists.
+  // - One mutable memtable of 16MB, zero unflushed immutable memtables, and
+  //   one flushed immutable memtable of 64MB. Trimming is disallowed because
+  //   dropping the earliest (only) flushed immutable memtable would result in
+  //   write history of 16MB < 32MB.
+  // - One mutable memtable of 24MB, one unflushed immutable memtable of 16MB,
+  //   and one flushed immutable memtable of 16MB. The earliest (only) flushed
+  //   immutable memtable is trimmed because without it we still have
+  //   16MB + 24MB = 40MB > 32MB of write history.
+  //
+  // When using an OptimisticTransactionDB:
+  // If this value is too low, some transactions may fail at commit time due
+  // to not being able to determine whether there were any write conflicts.
+  //
+  // When using a TransactionDB:
+  // If Transaction::SetSnapshot is used, TransactionDB will read either
+  // in-memory write buffers or SST files to do write-conflict checking.
+  // Increasing this value can reduce the number of reads to SST files
+  // done for conflict detection.
+  //
+  // Setting this value to 0 will cause write buffers to be freed immediately
+  // after they are flushed. If this value is set to -1,
+  // 'max_write_buffer_number * write_buffer_size' will be used.
+  //
+  // Default:
+  // If using a TransactionDB/OptimisticTransactionDB, the default value will
+  // be set to the value of 'max_write_buffer_number * write_buffer_size'
+  // if it is not explicitly set by the user.  Otherwise, the default is 0.
+  int64_t max_write_buffer_size_to_maintain = 0;
+
+  // Allows thread-safe inplace updates. If this is true, there is no way to
+  // achieve point-in-time consistency using snapshot or iterator (assuming
+  // concurrent updates). Hence iterator and multi-get will return results
+  // which are not consistent as of any point-in-time.
+  // Backward iteration on memtables will not work either.
+  // If inplace_callback function is not set,
+  //   Put(key, new_value) will update inplace the existing_value iff
+  //   * key exists in current memtable
+  //   * new sizeof(new_value) <= sizeof(existing_value)
+  //   * existing_value for that key is a put i.e. kTypeValue
+  // If inplace_callback function is set, check doc for inplace_callback.
+  // Default: false.
+  bool inplace_update_support = false;
+
+  // Number of locks used for inplace update
+  // Default: 10000, if inplace_update_support = true, else 0.
+  //
+  // Dynamically changeable through SetOptions() API
+  size_t inplace_update_num_locks = 10000;
+
+  // [experimental]
+  // Used to activate or deactive the Mempurge feature (memtable garbage
+  // collection). (deactivated by default). At every flush, the total useful
+  // payload (total entries minus garbage entries) is estimated as a ratio
+  // [useful payload bytes]/[size of a memtable (in bytes)]. This ratio is then
+  // compared to this `threshold` value:
+  //     - if ratio<threshold: the flush is replaced by a mempurge operation
+  //     - else: a regular flush operation takes place.
+  // Threshold values:
+  //   0.0: mempurge deactivated (default).
+  //   1.0: recommended threshold value.
+  //   >1.0 : aggressive mempurge.
+  //   0 < threshold < 1.0: mempurge triggered only for very low useful payload
+  //   ratios.
+  // [experimental]
+  double experimental_mempurge_threshold = 0.0;
+
+  // existing_value - pointer to previous value (from both memtable and sst).
+  //                  nullptr if key doesn't exist
+  // existing_value_size - pointer to size of existing_value).
+  //                       nullptr if key doesn't exist
+  // delta_value - Delta value to be merged with the existing_value.
+  //               Stored in transaction logs.
+  // merged_value - Set when delta is applied on the previous value.
+  //
+  // Applicable only when inplace_update_support is true,
+  // this callback function is called at the time of updating the memtable
+  // as part of a Put operation, lets say Put(key, delta_value). It allows the
+  // 'delta_value' specified as part of the Put operation to be merged with
+  // an 'existing_value' of the key in the database.
+  //
+  // If the merged value is smaller in size that the 'existing_value',
+  // then this function can update the 'existing_value' buffer inplace and
+  // the corresponding 'existing_value'_size pointer, if it wishes to.
+  // The callback should return UpdateStatus::UPDATED_INPLACE.
+  // In this case. (In this case, the snapshot-semantics of the rocksdb
+  // Iterator is not atomic anymore).
+  //
+  // If the merged value is larger in size than the 'existing_value' or the
+  // application does not wish to modify the 'existing_value' buffer inplace,
+  // then the merged value should be returned via *merge_value. It is set by
+  // merging the 'existing_value' and the Put 'delta_value'. The callback should
+  // return UpdateStatus::UPDATED in this case. This merged value will be added
+  // to the memtable.
+  //
+  // If merging fails or the application does not wish to take any action,
+  // then the callback should return UpdateStatus::UPDATE_FAILED.
+  //
+  // Please remember that the original call from the application is Put(key,
+  // delta_value). So the transaction log (if enabled) will still contain (key,
+  // delta_value). The 'merged_value' is not stored in the transaction log.
+  // Hence the inplace_callback function should be consistent across db reopens.
+  //
+  // RocksDB callbacks are NOT exception-safe. A callback completing with an
+  // exception can lead to undefined behavior in RocksDB, including data loss,
+  // unreported corruption, deadlocks, and more.
+  //
+  // Default: nullptr
+  UpdateStatus (*inplace_callback)(char* existing_value,
+                                   uint32_t* existing_value_size,
+                                   Slice delta_value,
+                                   std::string* merged_value) = nullptr;
+
+  // Should really be called `memtable_bloom_size_ratio`. Enables a dynamic
+  // Bloom filter in memtable to optimize many queries that must go beyond
+  // the memtable. The size in bytes of the filter is
+  // write_buffer_size * memtable_prefix_bloom_size_ratio.
+  // * If prefix_extractor is set, the filter includes prefixes.
+  // * If memtable_whole_key_filtering, the filter includes whole keys.
+  // * If both, the filter includes both.
+  // * If neither, the feature is disabled.
+  //
+  // If this value is larger than 0.25, it is sanitized to 0.25.
+  //
+  // Default: 0 (disabled)
+  //
+  // Dynamically changeable through SetOptions() API
+  double memtable_prefix_bloom_size_ratio = 0.0;
+
+  // Enable whole key bloom filter in memtable. Note this will only take effect
+  // if memtable_prefix_bloom_size_ratio is not 0. Enabling whole key filtering
+  // can potentially reduce CPU usage for point-look-ups.
+  //
+  // Default: false (disabled)
+  //
+  // Dynamically changeable through SetOptions() API
+  bool memtable_whole_key_filtering = false;
+
+  // Page size for huge page for the arena used by the memtable. If <=0, it
+  // won't allocate from huge page but from malloc.
+  // Users are responsible to reserve huge pages for it to be allocated. For
+  // example:
+  //      sysctl -w vm.nr_hugepages=20
+  // See linux doc Documentation/vm/hugetlbpage.txt
+  // If there isn't enough free huge page available, it will fall back to
+  // malloc.
+  //
+  // Dynamically changeable through SetOptions() API
+  size_t memtable_huge_page_size = 0;
+
+  // If non-nullptr, memtable will use the specified function to extract
+  // prefixes for keys, and for each prefix maintain a hint of insert location
+  // to reduce CPU usage for inserting keys with the prefix. Keys out of
+  // domain of the prefix extractor will be insert without using hints.
+  //
+  // Currently only the default skiplist based memtable implements the feature.
+  // All other memtable implementation will ignore the option. It incurs ~250
+  // additional bytes of memory overhead to store a hint for each prefix.
+  // Also concurrent writes (when allow_concurrent_memtable_write is true) will
+  // ignore the option.
+  //
+  // The option is best suited for workloads where keys will likely to insert
+  // to a location close the last inserted key with the same prefix.
+  // One example could be inserting keys of the form (prefix + timestamp),
+  // and keys of the same prefix always comes in with time order. Another
+  // example would be updating the same key over and over again, in which case
+  // the prefix can be the key itself.
+  //
+  // Default: nullptr (disabled)
+  std::shared_ptr<const SliceTransform>
+      memtable_insert_with_hint_prefix_extractor = nullptr;
+
+  // Control locality of bloom filter probes to improve CPU cache hit rate.
+  // This option now only applies to plaintable prefix bloom. This
+  // optimization is turned off when set to 0, and positive number to turn
+  // it on.
+  // Default: 0
+  uint32_t bloom_locality = 0;
+
+  // size of one block in arena memory allocation.
+  // If <= 0, a proper value is automatically calculated (usually 1/8 of
+  // writer_buffer_size, rounded up to a multiple of 4KB, or 1MB which ever is
+  // smaller).
+  //
+  // There are two additional restriction of the specified size:
+  // (1) size should be in the range of [4096, 2 << 30] and
+  // (2) be the multiple of the CPU word (which helps with the memory
+  // alignment).
+  //
+  // We'll automatically check and adjust the size number to make sure it
+  // conforms to the restrictions.
+  //
+  // Default: 0
+  //
+  // Dynamically changeable through SetOptions() API
+  size_t arena_block_size = 0;
+
+  // Different levels can have different compression policies. There
+  // are cases where most lower levels would like to use quick compression
+  // algorithms while the higher levels (which have more data) use
+  // compression algorithms that have better compression but could
+  // be slower. This array, if non-empty, should have an entry for
+  // each level of the database; these override the value specified in
+  // the previous field 'compression'.
+  //
+  // NOTICE if level_compaction_dynamic_level_bytes=true,
+  // compression_per_level[0] still determines L0, but other elements
+  // of the array are based on base level (the level L0 files are merged
+  // to), and may not match the level users see from info log for metadata.
+  // If L0 files are merged to level-n, then, for i>0, compression_per_level[i]
+  // determines compaction type for level n+i-1.
+  // For example, if we have three 5 levels, and we determine to merge L0
+  // data to L4 (which means L1..L3 will be empty), then the new files go to
+  // L4 uses compression type compression_per_level[1].
+  // If now L0 is merged to L2. Data goes to L2 will be compressed
+  // according to compression_per_level[1], L3 using compression_per_level[2]
+  // and L4 using compression_per_level[3]. Compaction for each level can
+  // change when data grows.
+  //
+  // NOTE: if the vector size is smaller than the level number, the undefined
+  // lower level uses the last option in the vector, for example, for 3 level
+  // LSM tree the following settings are the same:
+  // {kNoCompression, kSnappyCompression}
+  // {kNoCompression, kSnappyCompression, kSnappyCompression}
+  //
+  // Dynamically changeable through SetOptions() API
+  std::vector<CompressionType> compression_per_level;
+
+  // Number of levels for this database
+  int num_levels = 7;
+
+  // Soft limit on number of level-0 files. We start slowing down writes at this
+  // point. A value <0 means that no writing slow down will be triggered by
+  // number of files in level-0.
+  //
+  // Default: 20
+  //
+  // Dynamically changeable through SetOptions() API
+  int level0_slowdown_writes_trigger = 20;
+
+  // Maximum number of level-0 files.  We stop writes at this point.
+  //
+  // Default: 36
+  //
+  // Dynamically changeable through SetOptions() API
+  int level0_stop_writes_trigger = 36;
+
+  // Target file size for compaction.
+  // target_file_size_base is per-file size for level-1.
+  // Target file size for level L can be calculated by
+  // target_file_size_base * (target_file_size_multiplier ^ (L-1))
+  // For example, if target_file_size_base is 2MB and
+  // target_file_size_multiplier is 10, then each file on level-1 will
+  // be 2MB, and each file on level 2 will be 20MB,
+  // and each file on level-3 will be 200MB.
+  //
+  // Default: 64MB.
+  //
+  // Dynamically changeable through SetOptions() API
+  uint64_t target_file_size_base = 64 * 1048576;
+
+  // By default target_file_size_multiplier is 1, which means
+  // by default files in different levels will have similar size.
+  //
+  // Dynamically changeable through SetOptions() API
+  int target_file_size_multiplier = 1;
+
+  // If true, RocksDB will pick target size of each level dynamically.
+  // We will pick a base level b >= 1. L0 will be directly merged into level b,
+  // instead of always into level 1. Level 1 to b-1 need to be empty.
+  // We try to pick b and its target size so that
+  // 1. target size is in the range of
+  //   (max_bytes_for_level_base / max_bytes_for_level_multiplier,
+  //    max_bytes_for_level_base]
+  // 2. target size of the last level (level num_levels-1) equals to extra size
+  //    of the level.
+  // At the same time max_bytes_for_level_multiplier and
+  // max_bytes_for_level_multiplier_additional are still satisfied.
+  // (When L0 is too large, we make some adjustment. See below.)
+  //
+  // With this option on, from an empty DB, we make last level the base level,
+  // which means merging L0 data into the last level, until it exceeds
+  // max_bytes_for_level_base. And then we make the second last level to be
+  // base level, to start to merge L0 data to second last level, with its
+  // target size to be 1/max_bytes_for_level_multiplier of the last level's
+  // extra size. After the data accumulates more so that we need to move the
+  // base level to the third last one, and so on.
+  //
+  // For example, assume max_bytes_for_level_multiplier=10, num_levels=6,
+  // and max_bytes_for_level_base=10MB.
+  // Target sizes of level 1 to 5 starts with:
+  // [- - - - 10MB]
+  // with base level is level 5. Target sizes of level 1 to 4 are not applicable
+  // because they will not be used.
+  // Until the size of Level 5 grows to more than 10MB, say 11MB, we make
+  // base target to level 4 and now the targets looks like:
+  // [- - - 1.1MB 11MB]
+  // While data are accumulated, size targets are tuned based on actual data
+  // of level 5. When level 5 has 50MB of data, the target is like:
+  // [- - - 5MB 50MB]
+  // Until level 5's actual size is more than 100MB, say 101MB. Now if we keep
+  // level 4 to be the base level, its target size needs to be 10.1MB, which
+  // doesn't satisfy the target size range. So now we make level 3 the target
+  // size and the target sizes of the levels look like:
+  // [- - 1.01MB 10.1MB 101MB]
+  // In the same way, while level 5 further grows, all levels' targets grow,
+  // like
+  // [- - 5MB 50MB 500MB]
+  // Until level 5 exceeds 1000MB and becomes 1001MB, we make level 2 the
+  // base level and make levels' target sizes like this:
+  // [- 1.001MB 10.01MB 100.1MB 1001MB]
+  // and go on...
+  //
+  // By doing it, we give max_bytes_for_level_multiplier a priority against
+  // max_bytes_for_level_base, for a more predictable LSM tree shape. It is
+  // useful to limit worse case space amplification.
+  //
+  //
+  // If the compaction from L0 is lagged behind, a special mode will be turned
+  // on to prioritize write amplification against max_bytes_for_level_multiplier
+  // or max_bytes_for_level_base. The L0 compaction is lagged behind by looking
+  // at number of L0 files and total L0 size. If number of L0 files is at least
+  // the double of level0_file_num_compaction_trigger, or the total size is
+  // at least max_bytes_for_level_base, this mode is on. The target of L1 grows
+  // to the actual data size in L0, and then determine the target for each level
+  // so that each level will have the same level multiplier.
+  //
+  // For example, when L0 size is 100MB, the size of last level is 1600MB,
+  // max_bytes_for_level_base = 80MB, and max_bytes_for_level_multiplier = 10.
+  // Since L0 size is larger than max_bytes_for_level_base, this is a L0
+  // compaction backlogged mode. So that the L1 size is determined to be 100MB.
+  // Based on max_bytes_for_level_multiplier = 10, at least 3 non-0 levels will
+  // be needed. The level multiplier will be calculated to be 4 and the three
+  // levels' target to be [100MB, 400MB, 1600MB].
+  //
+  // In this mode, The number of levels will be no more than the normal mode,
+  // and the level multiplier will be lower. The write amplification will
+  // likely to be reduced.
+  //
+  //
+  // max_bytes_for_level_multiplier_additional is ignored with this flag on.
+  //
+  // To make the migration easier, when turning this feature on, files in the
+  // LSM will be trivially moved down to fill the LSM starting from the
+  // bottommost level during DB open. For example, if the LSM looks like:
+  // L0: f0, f1
+  // L1: f2, f3
+  // L2: f4
+  // L3:
+  // L4: f5
+  // and the DB is opened with num_levels = 7 with this feature turned on,
+  // new LSM after DB open looks like the following:
+  // L0: f0, f1, (and possibly data flushed from WAL)
+  // L4: f2, f3
+  // L5: f4
+  // L6: f5
+  //
+  // If `allow_ingest_behind=true` or `preclude_last_level_data_seconds > 0`,
+  // then the last level is reserved, and we will start filling LSM from the
+  // second last level (L5 in the above example).
+  //
+  // Note that there may be excessive levels (where target level size is 0 when
+  // computed based on this feature) in the LSM after a user migrates to turn
+  // this feature on. This is especially likely when a user migrates from
+  // leveled compaction with a smaller multiplier or from universal compaction.
+  // RocksDB will gradually drain these unnecessary levels by compacting files
+  // down the LSM.
+  //
+  // Default: false
+  bool level_compaction_dynamic_level_bytes = false;
+
+  // Allows RocksDB to generate files that are not exactly the target_file_size
+  // only for the non-bottommost files. Which can reduce the write-amplification
+  // from compaction. The file size could be from 0 to 2x target_file_size.
+  // Once enabled, non-bottommost compaction will try to cut the files align
+  // with the next level file boundaries (grandparent level).
+  //
+  // Default: true
+  bool level_compaction_dynamic_file_size = true;
+
+  // Default: 10.
+  //
+  // Dynamically changeable through SetOptions() API
+  double max_bytes_for_level_multiplier = 10;
+
+  // Different max-size multipliers for different levels.
+  // These are multiplied by max_bytes_for_level_multiplier to arrive
+  // at the max-size of each level.
+  //
+  // Default: 1
+  //
+  // Dynamically changeable through SetOptions() API
+  std::vector<int> max_bytes_for_level_multiplier_additional =
+      std::vector<int>(num_levels, 1);
+
+  // We try to limit number of bytes in one compaction to be lower than this
+  // threshold. But it's not guaranteed.
+  // Value 0 will be sanitized.
+  //
+  // Default: target_file_size_base * 25
+  //
+  // Dynamically changeable through SetOptions() API
+  uint64_t max_compaction_bytes = 0;
+
+  // When setting up compaction input files, we ignore the
+  // `max_compaction_bytes` limit when pulling in input files that are entirely
+  // within output key range.
+  //
+  // Default: true
+  //
+  // Dynamically changeable through SetOptions() API
+  // We could remove this knob and always ignore the limit once it is proven
+  // safe.
+  bool ignore_max_compaction_bytes_for_input = true;
+
+  // All writes will be slowed down to at least delayed_write_rate if estimated
+  // bytes needed to be compaction exceed this threshold.
+  //
+  // Default: 64GB
+  //
+  // Dynamically changeable through SetOptions() API
+  uint64_t soft_pending_compaction_bytes_limit = 64 * 1073741824ull;
+
+  // All writes are stopped if estimated bytes needed to be compaction exceed
+  // this threshold.
+  //
+  // Default: 256GB
+  //
+  // Dynamically changeable through SetOptions() API
+  uint64_t hard_pending_compaction_bytes_limit = 256 * 1073741824ull;
+
+  // The compaction style. Default: kCompactionStyleLevel
+  CompactionStyle compaction_style = kCompactionStyleLevel;
+
+  // If level compaction_style = kCompactionStyleLevel, for each level,
+  // which files are prioritized to be picked to compact.
+  // Default: kMinOverlappingRatio
+  CompactionPri compaction_pri = kMinOverlappingRatio;
+
+  // The options needed to support Universal Style compactions
+  //
+  // Dynamically changeable through SetOptions() API
+  // Dynamic change example:
+  // SetOptions("compaction_options_universal", "{size_ratio=2;}")
+  CompactionOptionsUniversal compaction_options_universal;
+
+  // The options for FIFO compaction style
+  //
+  // Dynamically changeable through SetOptions() API
+  // Dynamic change example:
+  // SetOptions("compaction_options_fifo", "{max_table_files_size=100;}")
+  CompactionOptionsFIFO compaction_options_fifo;
+
+  // An iteration->Next() sequentially skips over keys with the same
+  // user-key unless this option is set. This number specifies the number
+  // of keys (with the same userkey) that will be sequentially
+  // skipped before a reseek is issued.
+  //
+  // Default: 8
+  //
+  // Dynamically changeable through SetOptions() API
+  uint64_t max_sequential_skip_in_iterations = 8;
+
+  // This is a factory that provides MemTableRep objects.
+  // Default: a factory that provides a skip-list-based implementation of
+  // MemTableRep.
+  std::shared_ptr<MemTableRepFactory> memtable_factory =
+      std::shared_ptr<SkipListFactory>(new SkipListFactory);
+
+  // Block-based table related options are moved to BlockBasedTableOptions.
+  // Related options that were originally here but now moved include:
+  //   no_block_cache
+  //   block_cache
+  //   block_cache_compressed (removed)
+  //   block_size
+  //   block_size_deviation
+  //   block_restart_interval
+  //   filter_policy
+  //   whole_key_filtering
+  // If you'd like to customize some of these options, you will need to
+  // use NewBlockBasedTableFactory() to construct a new table factory.
+
+  // This option allows user to collect their own interested statistics of
+  // the tables.
+  // Default: empty vector -- no user-defined statistics collection will be
+  // performed.
+  using TablePropertiesCollectorFactories =
+      std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>;
+  TablePropertiesCollectorFactories table_properties_collector_factories;
+
+  // Maximum number of successive merge operations on a key in the memtable.
+  //
+  // When a merge operation is added to the memtable and the maximum number of
+  // successive merges is reached, the value of the key will be calculated and
+  // inserted into the memtable instead of the merge operation. This will
+  // ensure that there are never more than max_successive_merges merge
+  // operations in the memtable.
+  //
+  // Default: 0 (disabled)
+  //
+  // Dynamically changeable through SetOptions() API
+  size_t max_successive_merges = 0;
+
+  // This flag specifies that the implementation should optimize the filters
+  // mainly for cases where keys are found rather than also optimize for keys
+  // missed. This would be used in cases where the application knows that
+  // there are very few misses or the performance in the case of misses is not
+  // important.
+  //
+  // For now, this flag allows us to not store filters for the last level i.e
+  // the largest level which contains data of the LSM store. For keys which
+  // are hits, the filters in this level are not useful because we will search
+  // for the data anyway. NOTE: the filters in other levels are still useful
+  // even for key hit because they tell us whether to look in that level or go
+  // to the higher level.
+  //
+  // Default: false
+  bool optimize_filters_for_hits = false;
+
+  // During flush or compaction, check whether keys inserted to output files
+  // are in order.
+  //
+  // Default: true
+  //
+  // Dynamically changeable through SetOptions() API
+  bool check_flush_compaction_key_order = true;
+
+  // After writing every SST file, reopen it and read all the keys.
+  // Checks the hash of all of the keys and values written versus the
+  // keys in the file and signals a corruption if they do not match
+  //
+  // Default: false
+  //
+  // Dynamically changeable through SetOptions() API
+  bool paranoid_file_checks = false;
+
+  // In debug mode, RocksDB runs consistency checks on the LSM every time the
+  // LSM changes (Flush, Compaction, AddFile). When this option is true, these
+  // checks are also enabled in release mode. These checks were historically
+  // disabled in release mode, but are now enabled by default for proactive
+  // corruption detection. The CPU overhead is negligible for normal mixed
+  // operations but can slow down saturated writing. See
+  // Options::DisableExtraChecks().
+  // Default: true
+  bool force_consistency_checks = true;
+
+  // Measure IO stats in compactions and flushes, if true.
+  //
+  // Default: false
+  //
+  // Dynamically changeable through SetOptions() API
+  bool report_bg_io_stats = false;
+
+  // Files containing updates older than TTL will go through the compaction
+  // process. This usually happens in a cascading way so that those entries
+  // will be compacted to bottommost level/file.
+  // The feature is used to remove stale entries that have been deleted or
+  // updated from the file system.
+  // Pre-req: This needs max_open_files to be set to -1.
+  // In Level: Non-bottom-level files older than TTL will go through the
+  //           compaction process.
+  // In FIFO: Files older than TTL will be deleted.
+  // unit: seconds. Ex: 1 day = 1 * 24 * 60 * 60
+  // In FIFO, this option will have the same meaning as
+  // periodic_compaction_seconds. Whichever stricter will be used.
+  // 0 means disabling.
+  // UINT64_MAX - 1 (0xfffffffffffffffe) is special flag to allow RocksDB to
+  // pick default.
+  //
+  // Default: 30 days for leveled compaction + block based table. disable
+  //          otherwise.
+  //
+  // Dynamically changeable through SetOptions() API
+  uint64_t ttl = 0xfffffffffffffffe;
+
+  // Files older than this value will be picked up for compaction, and
+  // re-written to the same level as they were before.
+  // One main use of the feature is to make sure a file goes through compaction
+  // filters periodically. Users can also use the feature to clear up SST
+  // files using old format.
+  //
+  // A file's age is computed by looking at file_creation_time or creation_time
+  // table properties in order, if they have valid non-zero values; if not, the
+  // age is based on the file's last modified time (given by the underlying
+  // Env).
+  //
+  // Supported in all compaction styles.
+  // In Universal compaction, rocksdb will try to do a full compaction when
+  // possible, see more in UniversalCompactionBuilder::PickPeriodicCompaction().
+  // In FIFO compaction, this option has the same meaning as TTL and whichever
+  // stricter will be used.
+  // Pre-req: max_open_file == -1.
+  // unit: seconds. Ex: 7 days = 7 * 24 * 60 * 60
+  //
+  // Values:
+  // 0: Turn off Periodic compactions.
+  // UINT64_MAX - 1 (i.e 0xfffffffffffffffe): Let RocksDB control this feature
+  //     as needed. For now, RocksDB will change this value to 30 days
+  //     (i.e 30 * 24 * 60 * 60) so that every file goes through the compaction
+  //     process at least once every 30 days if not compacted sooner.
+  //     In FIFO compaction, since the option has the same meaning as ttl,
+  //     when this value is left default, and ttl is left to 0, 30 days will be
+  //     used. Otherwise, min(ttl, periodic_compaction_seconds) will be used.
+  //
+  // Default: UINT64_MAX - 1 (allow RocksDB to auto-tune)
+  //
+  // Dynamically changeable through SetOptions() API
+  uint64_t periodic_compaction_seconds = 0xfffffffffffffffe;
+
+  // If this option is set then 1 in N blocks are compressed
+  // using a fast (lz4) and slow (zstd) compression algorithm.
+  // The compressibility is reported as stats and the stored
+  // data is left uncompressed (unless compression is also requested).
+  uint64_t sample_for_compression = 0;
+
+  // EXPERIMENTAL
+  // The feature is still in development and is incomplete.
+  // If this option is set, when creating the last level files, pass this
+  // temperature to FileSystem used. Should be no-op for default FileSystem
+  // and users need to plug in their own FileSystem to take advantage of it.
+  //
+  // Note: the feature is changed from `bottommost_temperature` to
+  //  `last_level_temperature` which now only apply for the last level files.
+  //  The option name `bottommost_temperature` is kept only for migration, the
+  //  behavior is the same as `last_level_temperature`. Please stop using
+  //  `bottommost_temperature` and will be removed in next release.
+  //
+  // Dynamically changeable through the SetOptions() API
+  Temperature bottommost_temperature = Temperature::kUnknown;
+  Temperature last_level_temperature = Temperature::kUnknown;
+
+  // EXPERIMENTAL
+  // The feature is still in development and is incomplete.
+  // If this option is set, when data insert time is within this time range, it
+  // will be precluded from the last level.
+  // 0 means no key will be precluded from the last level.
+  //
+  // Note: when enabled, universal size amplification (controlled by option
+  //  `compaction_options_universal.max_size_amplification_percent`) calculation
+  //  will exclude the last level. As the feature is designed for tiered storage
+  //  and a typical setting is the last level is cold tier which is likely not
+  //  size constrained, the size amp is going to be only for non-last levels.
+  //
+  // Default: 0 (disable the feature)
+  //
+  // Not dynamically changeable, change it requires db restart.
+  uint64_t preclude_last_level_data_seconds = 0;
+
+  // EXPERIMENTAL
+  // If this option is set, it will preserve the internal time information about
+  // the data until it's older than the specified time here.
+  // Internally the time information is a map between sequence number and time,
+  // which is the same as `preclude_last_level_data_seconds`. But it won't
+  // preclude the data from the last level and the data in the last level won't
+  // have the sequence number zeroed out.
+  // Internally, rocksdb would sample the sequence number to time pair and store
+  // that in SST property "rocksdb.seqno.time.map". The information is currently
+  // only used for tiered storage compaction (option
+  // `preclude_last_level_data_seconds`).
+  //
+  // Note: if both `preclude_last_level_data_seconds` and this option is set, it
+  //  will preserve the max time of the 2 options and compaction still preclude
+  //  the data based on `preclude_last_level_data_seconds`.
+  //  The higher the preserve_time is, the less the sampling frequency will be (
+  //  which means less accuracy of the time estimation).
+  //
+  // Default: 0 (disable the feature)
+  //
+  // Not dynamically changeable, change it requires db restart.
+  uint64_t preserve_internal_time_seconds = 0;
+
+  // When set, large values (blobs) are written to separate blob files, and
+  // only pointers to them are stored in SST files. This can reduce write
+  // amplification for large-value use cases at the cost of introducing a level
+  // of indirection for reads. See also the options min_blob_size,
+  // blob_file_size, blob_compression_type, enable_blob_garbage_collection,
+  // blob_garbage_collection_age_cutoff,
+  // blob_garbage_collection_force_threshold, and blob_compaction_readahead_size
+  // below.
+  //
+  // Default: false
+  //
+  // Dynamically changeable through the SetOptions() API
+  bool enable_blob_files = false;
+
+  // The size of the smallest value to be stored separately in a blob file.
+  // Values which have an uncompressed size smaller than this threshold are
+  // stored alongside the keys in SST files in the usual fashion. A value of
+  // zero for this option means that all values are stored in blob files. Note
+  // that enable_blob_files has to be set in order for this option to have any
+  // effect.
+  //
+  // Default: 0
+  //
+  // Dynamically changeable through the SetOptions() API
+  uint64_t min_blob_size = 0;
+
+  // The size limit for blob files. When writing blob files, a new file is
+  // opened once this limit is reached. Note that enable_blob_files has to be
+  // set in order for this option to have any effect.
+  //
+  // Default: 256 MB
+  //
+  // Dynamically changeable through the SetOptions() API
+  uint64_t blob_file_size = 1ULL << 28;
+
+  // The compression algorithm to use for large values stored in blob files.
+  // Note that enable_blob_files has to be set in order for this option to have
+  // any effect.
+  //
+  // Default: no compression
+  //
+  // Dynamically changeable through the SetOptions() API
+  CompressionType blob_compression_type = kNoCompression;
+
+  // Enables garbage collection of blobs. Blob GC is performed as part of
+  // compaction. Valid blobs residing in blob files older than a cutoff get
+  // relocated to new files as they are encountered during compaction, which
+  // makes it possible to clean up blob files once they contain nothing but
+  // obsolete/garbage blobs. See also blob_garbage_collection_age_cutoff and
+  // blob_garbage_collection_force_threshold below.
+  //
+  // Default: false
+  //
+  // Dynamically changeable through the SetOptions() API
+  bool enable_blob_garbage_collection = false;
+
+  // The cutoff in terms of blob file age for garbage collection. Blobs in
+  // the oldest N blob files will be relocated when encountered during
+  // compaction, where N = garbage_collection_cutoff * number_of_blob_files.
+  // Note that enable_blob_garbage_collection has to be set in order for this
+  // option to have any effect.
+  //
+  // Default: 0.25
+  //
+  // Dynamically changeable through the SetOptions() API
+  double blob_garbage_collection_age_cutoff = 0.25;
+
+  // If the ratio of garbage in the oldest blob files exceeds this threshold,
+  // targeted compactions are scheduled in order to force garbage collecting
+  // the blob files in question, assuming they are all eligible based on the
+  // value of blob_garbage_collection_age_cutoff above. This option is
+  // currently only supported with leveled compactions.
+  // Note that enable_blob_garbage_collection has to be set in order for this
+  // option to have any effect.
+  //
+  // Default: 1.0
+  //
+  // Dynamically changeable through the SetOptions() API
+  double blob_garbage_collection_force_threshold = 1.0;
+
+  // Compaction readahead for blob files.
+  //
+  // Default: 0
+  //
+  // Dynamically changeable through the SetOptions() API
+  uint64_t blob_compaction_readahead_size = 0;
+
+  // Enable blob files starting from a certain LSM tree level.
+  //
+  // For certain use cases that have a mix of short-lived and long-lived values,
+  // it might make sense to support extracting large values only during
+  // compactions whose output level is greater than or equal to a specified LSM
+  // tree level (e.g. compactions into L1/L2/... or above). This could reduce
+  // the space amplification caused by large values that are turned into garbage
+  // shortly after being written at the price of some write amplification
+  // incurred by long-lived values whose extraction to blob files is delayed.
+  //
+  // Default: 0
+  //
+  // Dynamically changeable through the SetOptions() API
+  int blob_file_starting_level = 0;
+
+  // The Cache object to use for blobs. Using a dedicated object for blobs and
+  // using the same object for the block and blob caches are both supported. In
+  // the latter case, note that blobs are less valuable from a caching
+  // perspective than SST blocks, and some cache implementations have
+  // configuration options that can be used to prioritize items accordingly (see
+  // Cache::Priority and LRUCacheOptions::{high,low}_pri_pool_ratio).
+  //
+  // Default: nullptr (disabled)
+  std::shared_ptr<Cache> blob_cache = nullptr;
+
+  // Enable/disable prepopulating the blob cache. When set to kFlushOnly, BlobDB
+  // will insert newly written blobs into the blob cache during flush. This can
+  // improve performance when reading back these blobs would otherwise be
+  // expensive (e.g. when using direct I/O or remote storage), or when the
+  // workload has a high temporal locality.
+  //
+  // Default: disabled
+  //
+  // Dynamically changeable through the SetOptions() API
+  PrepopulateBlobCache prepopulate_blob_cache = PrepopulateBlobCache::kDisable;
+
+  // Enable memtable per key-value checksum protection.
+  //
+  // Each entry in memtable will be suffixed by a per key-value checksum.
+  // This options determines the size of such checksums.
+  //
+  // It is suggested to turn on write batch per key-value
+  // checksum protection together with this option, so that the checksum
+  // computation is done outside of writer threads (memtable kv checksum can be
+  // computed from write batch checksum) See
+  // WriteOptions::protection_bytes_per_key for more detail.
+  //
+  // Default: 0 (no protection)
+  // Supported values: 0, 1, 2, 4, 8.
+  uint32_t memtable_protection_bytes_per_key = 0;
+
+  // UNDER CONSTRUCTION -- DO NOT USE
+  // When the user-defined timestamp feature is enabled, this flag controls
+  // whether the user-defined timestamps will be persisted.
+  //
+  // When it's false, the user-defined timestamps will be removed from the user
+  // keys when data is flushed from memtables to SST files. Other places that
+  // user keys can be persisted like WAL and blob files go through a similar
+  // process. Users should call `DB::IncreaseFullHistoryTsLow` to set a cutoff
+  // timestamp. RocksDB refrains from flushing a memtable with data still above
+  // the cutoff timestamp with best effort. When users try to read below the
+  // cutoff timestamp, an error will be returned.
+  //
+  // Default: true (user-defined timestamps are persisted)
+  // Not dynamically changeable, change it requires db restart and
+  // only compatible changes are allowed.
+  bool persist_user_defined_timestamps = true;
+
+  // Enable/disable per key-value checksum protection for in memory blocks.
+  //
+  // Checksum is constructed when a block is loaded into memory and verification
+  // is done for each key read from the block. This is useful for detecting
+  // in-memory data corruption. Note that this feature has a non-trivial
+  // negative impact on read performance. Different values of the
+  // option have similar performance impact, but different memory cost and
+  // corruption detection probability (e.g. 1 byte gives 255/256 chance for
+  // detecting a corruption).
+  //
+  // Default: 0 (no protection)
+  // Supported values: 0, 1, 2, 4, 8.
+  uint8_t block_protection_bytes_per_key = 0;
+
+  // Create ColumnFamilyOptions with default values for all fields
+  AdvancedColumnFamilyOptions();
+  // Create ColumnFamilyOptions from Options
+  explicit AdvancedColumnFamilyOptions(const Options& options);
+
+  // ---------------- OPTIONS NOT SUPPORTED ANYMORE ----------------
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/block_cache_trace_writer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/block_cache_trace_writer.h
new file mode 100644
index 0000000000..18d28685b1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/block_cache_trace_writer.h
@@ -0,0 +1,149 @@
+// Copyright (c) 2022, Meta Platforms, Inc. and affiliates.  All rights
+// reserved. This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/options.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/table_reader_caller.h"
+#include "rocksdb/trace_reader_writer.h"
+#include "rocksdb/trace_record.h"
+
+namespace ROCKSDB_NAMESPACE {
+// A record for block cache lookups/inserts. This is passed by the table
+// reader to the BlockCacheTraceWriter for every block cache op.
+struct BlockCacheTraceRecord {
+  // Required fields for all accesses.
+  uint64_t access_timestamp = 0;
+
+  // Info related to the block being looked up or inserted
+  //
+  // 1. The cache key for the block
+  std::string block_key;
+
+  // 2. The type of block
+  TraceType block_type = TraceType::kTraceMax;
+
+  // 3. Size of the block
+  uint64_t block_size = 0;
+
+  // Info about the SST file the block is in
+  //
+  // 1. Column family ID
+  uint64_t cf_id = 0;
+
+  // 2. Column family name
+  std::string cf_name;
+
+  // 3. LSM level of the file
+  uint32_t level = 0;
+
+  // 4. SST file number
+  uint64_t sst_fd_number = 0;
+
+  // Info about the calling context
+  //
+  // 1. The higher level request triggering the block cache request
+  TableReaderCaller caller = TableReaderCaller::kMaxBlockCacheLookupCaller;
+
+  // 2. Cache lookup hit/miss. Not relevant for inserts
+  bool is_cache_hit = false;
+
+  // 3. Whether this request is a lookup
+  bool no_insert = false;
+
+  // Get/MultiGet specific info
+  //
+  // 1. A unique ID for Get/MultiGet
+  uint64_t get_id = kReservedGetId;
+
+  // 2. Whether the Get/MultiGet is from a user-specified snapshot
+  bool get_from_user_specified_snapshot = false;
+
+  // 3. The target user key in the block
+  std::string referenced_key;
+
+  // Required fields for data block and user Get/Multi-Get only.
+  //
+  // 1. Size of te useful data in the block
+  uint64_t referenced_data_size = 0;
+
+  // 2. Only for MultiGet, number of keys from the batch found in the block
+  uint64_t num_keys_in_block = 0;
+
+  // 3. Whether the key was found in the block or not (false positive)
+  bool referenced_key_exist_in_block = false;
+
+  static const uint64_t kReservedGetId;
+
+  BlockCacheTraceRecord() {}
+
+  BlockCacheTraceRecord(uint64_t _access_timestamp, std::string _block_key,
+                        TraceType _block_type, uint64_t _block_size,
+                        uint64_t _cf_id, std::string _cf_name, uint32_t _level,
+                        uint64_t _sst_fd_number, TableReaderCaller _caller,
+                        bool _is_cache_hit, bool _no_insert, uint64_t _get_id,
+                        bool _get_from_user_specified_snapshot = false,
+                        std::string _referenced_key = "",
+                        uint64_t _referenced_data_size = 0,
+                        uint64_t _num_keys_in_block = 0,
+                        bool _referenced_key_exist_in_block = false)
+      : access_timestamp(_access_timestamp),
+        block_key(_block_key),
+        block_type(_block_type),
+        block_size(_block_size),
+        cf_id(_cf_id),
+        cf_name(_cf_name),
+        level(_level),
+        sst_fd_number(_sst_fd_number),
+        caller(_caller),
+        is_cache_hit(_is_cache_hit),
+        no_insert(_no_insert),
+        get_id(_get_id),
+        get_from_user_specified_snapshot(_get_from_user_specified_snapshot),
+        referenced_key(_referenced_key),
+        referenced_data_size(_referenced_data_size),
+        num_keys_in_block(_num_keys_in_block),
+        referenced_key_exist_in_block(_referenced_key_exist_in_block) {}
+};
+
+// Options for tracing block cache accesses
+struct BlockCacheTraceOptions {
+  // Specify trace sampling option, i.e. capture one per how many requests.
+  // Default to 1 (capture every request).
+  uint64_t sampling_frequency = 1;
+};
+
+// Options for the built-in implementation of BlockCacheTraceWriter
+struct BlockCacheTraceWriterOptions {
+  uint64_t max_trace_file_size = uint64_t{64} * 1024 * 1024 * 1024;
+};
+
+// BlockCacheTraceWriter is an abstract class that captures all RocksDB block
+// cache accesses. Every RocksDB operation is passed to WriteBlockAccess()
+// with a BlockCacheTraceRecord.
+class BlockCacheTraceWriter {
+ public:
+  virtual ~BlockCacheTraceWriter() {}
+
+  // Pass Slice references to avoid copy.
+  virtual Status WriteBlockAccess(const BlockCacheTraceRecord& record,
+                                  const Slice& block_key, const Slice& cf_name,
+                                  const Slice& referenced_key) = 0;
+
+  // Write a trace header at the beginning, typically on initiating a trace,
+  // with some metadata like a magic number and RocksDB version.
+  virtual Status WriteHeader() = 0;
+};
+
+// Allocate an instance of the built-in BlockCacheTraceWriter implementation,
+// that traces all block cache accesses to a user-provided TraceWriter. Each
+// access is traced to a file with a timestamp and type, followed by the
+// payload.
+std::unique_ptr<BlockCacheTraceWriter> NewBlockCacheTraceWriter(
+    SystemClock* clock, const BlockCacheTraceWriterOptions& trace_options,
+    std::unique_ptr<TraceWriter>&& trace_writer);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/c.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/c.h
new file mode 100644
index 0000000000..6e0ad08c81
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/c.h
@@ -0,0 +1,2864 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+/* Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+  Use of this source code is governed by a BSD-style license that can be
+  found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+  C bindings for rocksdb.  May be useful as a stable ABI that can be
+  used by programs that keep rocksdb in a shared library, or for
+  a JNI api.
+
+  Does not support:
+  . getters for the option types
+  . custom comparators that implement key shortening
+  . capturing post-write-snapshot
+  . custom iter, db, env, cache implementations using just the C bindings
+
+  Some conventions:
+
+  (1) We expose just opaque struct pointers and functions to clients.
+  This allows us to change internal representations without having to
+  recompile clients.
+
+  (2) For simplicity, there is no equivalent to the Slice type.  Instead,
+  the caller has to pass the pointer and length as separate
+  arguments.
+
+  (3) Errors are represented by a null-terminated c string.  NULL
+  means no error.  All operations that can raise an error are passed
+  a "char** errptr" as the last argument.  One of the following must
+  be true on entry:
+     *errptr == NULL
+     *errptr points to a malloc()ed null-terminated error message
+  On success, a leveldb routine leaves *errptr unchanged.
+  On failure, leveldb frees the old value of *errptr and
+  set *errptr to a malloc()ed error message.
+
+  (4) Bools have the type unsigned char (0 == false; rest == true)
+
+  (5) All of the pointer arguments must be non-NULL.
+*/
+
+#pragma once
+
+#ifdef _WIN32
+#ifdef ROCKSDB_DLL
+#ifdef ROCKSDB_LIBRARY_EXPORTS
+#define ROCKSDB_LIBRARY_API __declspec(dllexport)
+#else
+#define ROCKSDB_LIBRARY_API __declspec(dllimport)
+#endif
+#else
+#define ROCKSDB_LIBRARY_API
+#endif
+#else
+#define ROCKSDB_LIBRARY_API
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/* Exported types */
+
+typedef struct rocksdb_t rocksdb_t;
+typedef struct rocksdb_backup_engine_t rocksdb_backup_engine_t;
+typedef struct rocksdb_backup_engine_info_t rocksdb_backup_engine_info_t;
+typedef struct rocksdb_backup_engine_options_t rocksdb_backup_engine_options_t;
+typedef struct rocksdb_restore_options_t rocksdb_restore_options_t;
+typedef struct rocksdb_memory_allocator_t rocksdb_memory_allocator_t;
+typedef struct rocksdb_lru_cache_options_t rocksdb_lru_cache_options_t;
+typedef struct rocksdb_hyper_clock_cache_options_t
+    rocksdb_hyper_clock_cache_options_t;
+typedef struct rocksdb_cache_t rocksdb_cache_t;
+typedef struct rocksdb_compactionfilter_t rocksdb_compactionfilter_t;
+typedef struct rocksdb_compactionfiltercontext_t
+    rocksdb_compactionfiltercontext_t;
+typedef struct rocksdb_compactionfilterfactory_t
+    rocksdb_compactionfilterfactory_t;
+typedef struct rocksdb_comparator_t rocksdb_comparator_t;
+typedef struct rocksdb_dbpath_t rocksdb_dbpath_t;
+typedef struct rocksdb_env_t rocksdb_env_t;
+typedef struct rocksdb_fifo_compaction_options_t
+    rocksdb_fifo_compaction_options_t;
+typedef struct rocksdb_filelock_t rocksdb_filelock_t;
+typedef struct rocksdb_filterpolicy_t rocksdb_filterpolicy_t;
+typedef struct rocksdb_flushoptions_t rocksdb_flushoptions_t;
+typedef struct rocksdb_iterator_t rocksdb_iterator_t;
+typedef struct rocksdb_logger_t rocksdb_logger_t;
+typedef struct rocksdb_mergeoperator_t rocksdb_mergeoperator_t;
+typedef struct rocksdb_options_t rocksdb_options_t;
+typedef struct rocksdb_compactoptions_t rocksdb_compactoptions_t;
+typedef struct rocksdb_block_based_table_options_t
+    rocksdb_block_based_table_options_t;
+typedef struct rocksdb_cuckoo_table_options_t rocksdb_cuckoo_table_options_t;
+typedef struct rocksdb_randomfile_t rocksdb_randomfile_t;
+typedef struct rocksdb_readoptions_t rocksdb_readoptions_t;
+typedef struct rocksdb_seqfile_t rocksdb_seqfile_t;
+typedef struct rocksdb_slicetransform_t rocksdb_slicetransform_t;
+typedef struct rocksdb_snapshot_t rocksdb_snapshot_t;
+typedef struct rocksdb_writablefile_t rocksdb_writablefile_t;
+typedef struct rocksdb_writebatch_t rocksdb_writebatch_t;
+typedef struct rocksdb_writebatch_wi_t rocksdb_writebatch_wi_t;
+typedef struct rocksdb_writeoptions_t rocksdb_writeoptions_t;
+typedef struct rocksdb_universal_compaction_options_t
+    rocksdb_universal_compaction_options_t;
+typedef struct rocksdb_livefiles_t rocksdb_livefiles_t;
+typedef struct rocksdb_column_family_handle_t rocksdb_column_family_handle_t;
+typedef struct rocksdb_column_family_metadata_t
+    rocksdb_column_family_metadata_t;
+typedef struct rocksdb_level_metadata_t rocksdb_level_metadata_t;
+typedef struct rocksdb_sst_file_metadata_t rocksdb_sst_file_metadata_t;
+typedef struct rocksdb_envoptions_t rocksdb_envoptions_t;
+typedef struct rocksdb_ingestexternalfileoptions_t
+    rocksdb_ingestexternalfileoptions_t;
+typedef struct rocksdb_sstfilewriter_t rocksdb_sstfilewriter_t;
+typedef struct rocksdb_ratelimiter_t rocksdb_ratelimiter_t;
+typedef struct rocksdb_perfcontext_t rocksdb_perfcontext_t;
+typedef struct rocksdb_pinnableslice_t rocksdb_pinnableslice_t;
+typedef struct rocksdb_transactiondb_options_t rocksdb_transactiondb_options_t;
+typedef struct rocksdb_transactiondb_t rocksdb_transactiondb_t;
+typedef struct rocksdb_transaction_options_t rocksdb_transaction_options_t;
+typedef struct rocksdb_optimistictransactiondb_t
+    rocksdb_optimistictransactiondb_t;
+typedef struct rocksdb_optimistictransaction_options_t
+    rocksdb_optimistictransaction_options_t;
+typedef struct rocksdb_transaction_t rocksdb_transaction_t;
+typedef struct rocksdb_checkpoint_t rocksdb_checkpoint_t;
+typedef struct rocksdb_wal_iterator_t rocksdb_wal_iterator_t;
+typedef struct rocksdb_wal_readoptions_t rocksdb_wal_readoptions_t;
+typedef struct rocksdb_memory_consumers_t rocksdb_memory_consumers_t;
+typedef struct rocksdb_memory_usage_t rocksdb_memory_usage_t;
+
+/* DB operations */
+
+extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open(
+    const rocksdb_options_t* options, const char* name, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open_with_ttl(
+    const rocksdb_options_t* options, const char* name, int ttl, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open_for_read_only(
+    const rocksdb_options_t* options, const char* name,
+    unsigned char error_if_wal_file_exists, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open_as_secondary(
+    const rocksdb_options_t* options, const char* name,
+    const char* secondary_path, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_backup_engine_t* rocksdb_backup_engine_open(
+    const rocksdb_options_t* options, const char* path, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_backup_engine_t*
+rocksdb_backup_engine_open_opts(const rocksdb_backup_engine_options_t* options,
+                                rocksdb_env_t* env, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_create_new_backup(
+    rocksdb_backup_engine_t* be, rocksdb_t* db, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_create_new_backup_flush(
+    rocksdb_backup_engine_t* be, rocksdb_t* db,
+    unsigned char flush_before_backup, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_purge_old_backups(
+    rocksdb_backup_engine_t* be, uint32_t num_backups_to_keep, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_restore_options_t*
+rocksdb_restore_options_create(void);
+extern ROCKSDB_LIBRARY_API void rocksdb_restore_options_destroy(
+    rocksdb_restore_options_t* opt);
+extern ROCKSDB_LIBRARY_API void rocksdb_restore_options_set_keep_log_files(
+    rocksdb_restore_options_t* opt, int v);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_verify_backup(
+    rocksdb_backup_engine_t* be, uint32_t backup_id, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_backup_engine_restore_db_from_latest_backup(
+    rocksdb_backup_engine_t* be, const char* db_dir, const char* wal_dir,
+    const rocksdb_restore_options_t* restore_options, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_restore_db_from_backup(
+    rocksdb_backup_engine_t* be, const char* db_dir, const char* wal_dir,
+    const rocksdb_restore_options_t* restore_options, const uint32_t backup_id,
+    char** errptr);
+
+extern ROCKSDB_LIBRARY_API const rocksdb_backup_engine_info_t*
+rocksdb_backup_engine_get_backup_info(rocksdb_backup_engine_t* be);
+
+extern ROCKSDB_LIBRARY_API int rocksdb_backup_engine_info_count(
+    const rocksdb_backup_engine_info_t* info);
+
+extern ROCKSDB_LIBRARY_API int64_t rocksdb_backup_engine_info_timestamp(
+    const rocksdb_backup_engine_info_t* info, int index);
+
+extern ROCKSDB_LIBRARY_API uint32_t rocksdb_backup_engine_info_backup_id(
+    const rocksdb_backup_engine_info_t* info, int index);
+
+extern ROCKSDB_LIBRARY_API uint64_t rocksdb_backup_engine_info_size(
+    const rocksdb_backup_engine_info_t* info, int index);
+
+extern ROCKSDB_LIBRARY_API uint32_t rocksdb_backup_engine_info_number_files(
+    const rocksdb_backup_engine_info_t* info, int index);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_info_destroy(
+    const rocksdb_backup_engine_info_t* info);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_close(
+    rocksdb_backup_engine_t* be);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_put_with_ts(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options, const char* key,
+    size_t keylen, const char* ts, size_t tslen, const char* val, size_t vallen,
+    char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_put_cf_with_ts(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, const char* ts, size_t tslen, const char* val, size_t vallen,
+    char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_delete_with_ts(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options, const char* key,
+    size_t keylen, const char* ts, size_t tslen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_delete_cf_with_ts(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, const char* ts, size_t tslen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_singledelete(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options, const char* key,
+    size_t keylen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_singledelete_cf(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_singledelete_with_ts(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options, const char* key,
+    size_t keylen, const char* ts, size_t tslen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_singledelete_cf_with_ts(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, const char* ts, size_t tslen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_increase_full_history_ts_low(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
+    const char* ts_low, size_t ts_lowlen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_get_full_history_ts_low(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
+    size_t* ts_lowlen, char** errptr);
+
+/* BackupEngineOptions */
+
+extern ROCKSDB_LIBRARY_API rocksdb_backup_engine_options_t*
+rocksdb_backup_engine_options_create(const char* backup_dir);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_options_set_backup_dir(
+    rocksdb_backup_engine_options_t* options, const char* backup_dir);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_options_set_env(
+    rocksdb_backup_engine_options_t* options, rocksdb_env_t* env);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_backup_engine_options_set_share_table_files(
+    rocksdb_backup_engine_options_t* options, unsigned char val);
+
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_backup_engine_options_get_share_table_files(
+    rocksdb_backup_engine_options_t* options);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_options_set_sync(
+    rocksdb_backup_engine_options_t* options, unsigned char val);
+
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_backup_engine_options_get_sync(
+    rocksdb_backup_engine_options_t* options);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_backup_engine_options_set_destroy_old_data(
+    rocksdb_backup_engine_options_t* options, unsigned char val);
+
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_backup_engine_options_get_destroy_old_data(
+    rocksdb_backup_engine_options_t* options);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_backup_engine_options_set_backup_log_files(
+    rocksdb_backup_engine_options_t* options, unsigned char val);
+
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_backup_engine_options_get_backup_log_files(
+    rocksdb_backup_engine_options_t* options);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_backup_engine_options_set_backup_rate_limit(
+    rocksdb_backup_engine_options_t* options, uint64_t limit);
+
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_backup_engine_options_get_backup_rate_limit(
+    rocksdb_backup_engine_options_t* options);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_backup_engine_options_set_restore_rate_limit(
+    rocksdb_backup_engine_options_t* options, uint64_t limit);
+
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_backup_engine_options_get_restore_rate_limit(
+    rocksdb_backup_engine_options_t* options);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_backup_engine_options_set_max_background_operations(
+    rocksdb_backup_engine_options_t* options, int val);
+
+extern ROCKSDB_LIBRARY_API int
+rocksdb_backup_engine_options_get_max_background_operations(
+    rocksdb_backup_engine_options_t* options);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_backup_engine_options_set_callback_trigger_interval_size(
+    rocksdb_backup_engine_options_t* options, uint64_t size);
+
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_backup_engine_options_get_callback_trigger_interval_size(
+    rocksdb_backup_engine_options_t* options);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_backup_engine_options_set_max_valid_backups_to_open(
+    rocksdb_backup_engine_options_t* options, int val);
+
+extern ROCKSDB_LIBRARY_API int
+rocksdb_backup_engine_options_get_max_valid_backups_to_open(
+    rocksdb_backup_engine_options_t* options);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_backup_engine_options_set_share_files_with_checksum_naming(
+    rocksdb_backup_engine_options_t* options, int val);
+
+extern ROCKSDB_LIBRARY_API int
+rocksdb_backup_engine_options_get_share_files_with_checksum_naming(
+    rocksdb_backup_engine_options_t* options);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_options_destroy(
+    rocksdb_backup_engine_options_t*);
+
+/* Checkpoint */
+
+extern ROCKSDB_LIBRARY_API rocksdb_checkpoint_t*
+rocksdb_checkpoint_object_create(rocksdb_t* db, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_checkpoint_create(
+    rocksdb_checkpoint_t* checkpoint, const char* checkpoint_dir,
+    uint64_t log_size_for_flush, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_checkpoint_object_destroy(
+    rocksdb_checkpoint_t* checkpoint);
+
+extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open_and_trim_history(
+    const rocksdb_options_t* options, const char* name, int num_column_families,
+    const char* const* column_family_names,
+    const rocksdb_options_t* const* column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles, char* trim_ts,
+    size_t trim_tslen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open_column_families(
+    const rocksdb_options_t* options, const char* name, int num_column_families,
+    const char* const* column_family_names,
+    const rocksdb_options_t* const* column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open_column_families_with_ttl(
+    const rocksdb_options_t* options, const char* name, int num_column_families,
+    const char* const* column_family_names,
+    const rocksdb_options_t* const* column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles, const int* ttls,
+    char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_t*
+rocksdb_open_for_read_only_column_families(
+    const rocksdb_options_t* options, const char* name, int num_column_families,
+    const char* const* column_family_names,
+    const rocksdb_options_t* const* column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles,
+    unsigned char error_if_wal_file_exists, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open_as_secondary_column_families(
+    const rocksdb_options_t* options, const char* name,
+    const char* secondary_path, int num_column_families,
+    const char* const* column_family_names,
+    const rocksdb_options_t* const* column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles, char** errptr);
+
+extern ROCKSDB_LIBRARY_API char** rocksdb_list_column_families(
+    const rocksdb_options_t* options, const char* name, size_t* lencf,
+    char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_list_column_families_destroy(
+    char** list, size_t len);
+
+extern ROCKSDB_LIBRARY_API rocksdb_column_family_handle_t*
+rocksdb_create_column_family(rocksdb_t* db,
+                             const rocksdb_options_t* column_family_options,
+                             const char* column_family_name, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_column_family_handle_t*
+rocksdb_create_column_family_with_ttl(
+    rocksdb_t* db, const rocksdb_options_t* column_family_options,
+    const char* column_family_name, int ttl, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_drop_column_family(
+    rocksdb_t* db, rocksdb_column_family_handle_t* handle, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_column_family_handle_destroy(
+    rocksdb_column_family_handle_t*);
+
+extern ROCKSDB_LIBRARY_API uint32_t
+rocksdb_column_family_handle_get_id(rocksdb_column_family_handle_t* handle);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_column_family_handle_get_name(
+    rocksdb_column_family_handle_t* handle, size_t* name_len);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_close(rocksdb_t* db);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_put(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options, const char* key,
+    size_t keylen, const char* val, size_t vallen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_put_cf(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, const char* val, size_t vallen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_delete(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options, const char* key,
+    size_t keylen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_delete_cf(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_delete_range_cf(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* start_key,
+    size_t start_key_len, const char* end_key, size_t end_key_len,
+    char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_merge(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options, const char* key,
+    size_t keylen, const char* val, size_t vallen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_merge_cf(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, const char* val, size_t vallen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_write(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options,
+    rocksdb_writebatch_t* batch, char** errptr);
+
+/* Returns NULL if not found.  A malloc()ed array otherwise.
+   Stores the length of the array in *vallen. */
+extern ROCKSDB_LIBRARY_API char* rocksdb_get(
+    rocksdb_t* db, const rocksdb_readoptions_t* options, const char* key,
+    size_t keylen, size_t* vallen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_get_with_ts(
+    rocksdb_t* db, const rocksdb_readoptions_t* options, const char* key,
+    size_t keylen, size_t* vallen, char** ts, size_t* tslen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_get_cf(
+    rocksdb_t* db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, size_t* vallen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_get_cf_with_ts(
+    rocksdb_t* db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, size_t* vallen, char** ts, size_t* tslen, char** errptr);
+
+// if values_list[i] == NULL and errs[i] == NULL,
+// then we got status.IsNotFound(), which we will not return.
+// all errors except status status.ok() and status.IsNotFound() are returned.
+//
+// errs, values_list and values_list_sizes must be num_keys in length,
+// allocated by the caller.
+// errs is a list of strings as opposed to the conventional one error,
+// where errs[i] is the status for retrieval of keys_list[i].
+// each non-NULL errs entry is a malloc()ed, null terminated string.
+// each non-NULL values_list entry is a malloc()ed array, with
+// the length for each stored in values_list_sizes[i].
+extern ROCKSDB_LIBRARY_API void rocksdb_multi_get(
+    rocksdb_t* db, const rocksdb_readoptions_t* options, size_t num_keys,
+    const char* const* keys_list, const size_t* keys_list_sizes,
+    char** values_list, size_t* values_list_sizes, char** errs);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_multi_get_with_ts(
+    rocksdb_t* db, const rocksdb_readoptions_t* options, size_t num_keys,
+    const char* const* keys_list, const size_t* keys_list_sizes,
+    char** values_list, size_t* values_list_sizes, char** timestamp_list,
+    size_t* timestamp_list_sizes, char** errs);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_multi_get_cf(
+    rocksdb_t* db, const rocksdb_readoptions_t* options,
+    const rocksdb_column_family_handle_t* const* column_families,
+    size_t num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, char** values_list,
+    size_t* values_list_sizes, char** errs);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_multi_get_cf_with_ts(
+    rocksdb_t* db, const rocksdb_readoptions_t* options,
+    const rocksdb_column_family_handle_t* const* column_families,
+    size_t num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, char** values_list,
+    size_t* values_list_sizes, char** timestamps_list,
+    size_t* timestamps_list_sizes, char** errs);
+
+// The MultiGet API that improves performance by batching operations
+// in the read path for greater efficiency. Currently, only the block based
+// table format with full filters are supported. Other table formats such
+// as plain table, block based table with block based filters and
+// partitioned indexes will still work, but will not get any performance
+// benefits.
+//
+// Note that all the keys passed to this API are restricted to a single
+// column family.
+//
+// Parameters -
+// db - the RocksDB instance.
+// options - ReadOptions
+// column_family - ColumnFamilyHandle* that the keys belong to. All the keys
+//                 passed to the API are restricted to a single column family
+// num_keys - Number of keys to lookup
+// keys_list - Pointer to C style array of keys with num_keys elements
+// keys_list_sizes - Pointer to C style array of the size of corresponding key
+//   in key_list with num_keys elements.
+// values - Pointer to C style array of PinnableSlices with num_keys elements
+// statuses - Pointer to C style array of Status with num_keys elements
+// sorted_input - If true, it means the input keys are already sorted by key
+//                order, so the MultiGet() API doesn't have to sort them
+//                again. If false, the keys will be copied and sorted
+//                internally by the API - the input array will not be
+//                modified
+extern ROCKSDB_LIBRARY_API void rocksdb_batched_multi_get_cf(
+    rocksdb_t* db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, size_t num_keys,
+    const char* const* keys_list, const size_t* keys_list_sizes,
+    rocksdb_pinnableslice_t** values, char** errs, const bool sorted_input);
+
+// The value is only allocated (using malloc) and returned if it is found and
+// value_found isn't NULL. In that case the user is responsible for freeing it.
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_key_may_exist(
+    rocksdb_t* db, const rocksdb_readoptions_t* options, const char* key,
+    size_t key_len, char** value, size_t* val_len, const char* timestamp,
+    size_t timestamp_len, unsigned char* value_found);
+
+// The value is only allocated (using malloc) and returned if it is found and
+// value_found isn't NULL. In that case the user is responsible for freeing it.
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_key_may_exist_cf(
+    rocksdb_t* db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t key_len, char** value, size_t* val_len, const char* timestamp,
+    size_t timestamp_len, unsigned char* value_found);
+
+extern ROCKSDB_LIBRARY_API rocksdb_iterator_t* rocksdb_create_iterator(
+    rocksdb_t* db, const rocksdb_readoptions_t* options);
+
+extern ROCKSDB_LIBRARY_API rocksdb_wal_iterator_t* rocksdb_get_updates_since(
+    rocksdb_t* db, uint64_t seq_number,
+    const rocksdb_wal_readoptions_t* options, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_iterator_t* rocksdb_create_iterator_cf(
+    rocksdb_t* db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_create_iterators(
+    rocksdb_t* db, rocksdb_readoptions_t* opts,
+    rocksdb_column_family_handle_t** column_families,
+    rocksdb_iterator_t** iterators, size_t size, char** errptr);
+
+extern ROCKSDB_LIBRARY_API const rocksdb_snapshot_t* rocksdb_create_snapshot(
+    rocksdb_t* db);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_release_snapshot(
+    rocksdb_t* db, const rocksdb_snapshot_t* snapshot);
+
+/* Returns NULL if property name is unknown.
+   Else returns a pointer to a malloc()-ed null-terminated value. */
+extern ROCKSDB_LIBRARY_API char* rocksdb_property_value(rocksdb_t* db,
+                                                        const char* propname);
+/* returns 0 on success, -1 otherwise */
+extern ROCKSDB_LIBRARY_API int rocksdb_property_int(rocksdb_t* db,
+                                                    const char* propname,
+                                                    uint64_t* out_val);
+
+/* returns 0 on success, -1 otherwise */
+extern ROCKSDB_LIBRARY_API int rocksdb_property_int_cf(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
+    const char* propname, uint64_t* out_val);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_property_value_cf(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
+    const char* propname);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_approximate_sizes(
+    rocksdb_t* db, int num_ranges, const char* const* range_start_key,
+    const size_t* range_start_key_len, const char* const* range_limit_key,
+    const size_t* range_limit_key_len, uint64_t* sizes, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_approximate_sizes_cf(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
+    int num_ranges, const char* const* range_start_key,
+    const size_t* range_start_key_len, const char* const* range_limit_key,
+    const size_t* range_limit_key_len, uint64_t* sizes, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_compact_range(rocksdb_t* db,
+                                                      const char* start_key,
+                                                      size_t start_key_len,
+                                                      const char* limit_key,
+                                                      size_t limit_key_len);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_compact_range_cf(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
+    const char* start_key, size_t start_key_len, const char* limit_key,
+    size_t limit_key_len);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_suggest_compact_range(
+    rocksdb_t* db, const char* start_key, size_t start_key_len,
+    const char* limit_key, size_t limit_key_len, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_suggest_compact_range_cf(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
+    const char* start_key, size_t start_key_len, const char* limit_key,
+    size_t limit_key_len, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_compact_range_opt(
+    rocksdb_t* db, rocksdb_compactoptions_t* opt, const char* start_key,
+    size_t start_key_len, const char* limit_key, size_t limit_key_len);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_compact_range_cf_opt(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
+    rocksdb_compactoptions_t* opt, const char* start_key, size_t start_key_len,
+    const char* limit_key, size_t limit_key_len);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_delete_file(rocksdb_t* db,
+                                                    const char* name);
+
+extern ROCKSDB_LIBRARY_API const rocksdb_livefiles_t* rocksdb_livefiles(
+    rocksdb_t* db);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_flush(
+    rocksdb_t* db, const rocksdb_flushoptions_t* options, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_flush_cf(
+    rocksdb_t* db, const rocksdb_flushoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_flush_cfs(
+    rocksdb_t* db, const rocksdb_flushoptions_t* options,
+    rocksdb_column_family_handle_t** column_family, int num_column_families,
+    char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_flush_wal(rocksdb_t* db,
+                                                  unsigned char sync,
+                                                  char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_disable_file_deletions(rocksdb_t* db,
+                                                               char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_enable_file_deletions(
+    rocksdb_t* db, unsigned char force, char** errptr);
+
+/* Management operations */
+
+extern ROCKSDB_LIBRARY_API void rocksdb_destroy_db(
+    const rocksdb_options_t* options, const char* name, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_repair_db(
+    const rocksdb_options_t* options, const char* name, char** errptr);
+
+/* Iterator */
+
+extern ROCKSDB_LIBRARY_API void rocksdb_iter_destroy(rocksdb_iterator_t*);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_iter_valid(
+    const rocksdb_iterator_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_iter_seek_to_first(rocksdb_iterator_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_iter_seek_to_last(rocksdb_iterator_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_iter_seek(rocksdb_iterator_t*,
+                                                  const char* k, size_t klen);
+extern ROCKSDB_LIBRARY_API void rocksdb_iter_seek_for_prev(rocksdb_iterator_t*,
+                                                           const char* k,
+                                                           size_t klen);
+extern ROCKSDB_LIBRARY_API void rocksdb_iter_next(rocksdb_iterator_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_iter_prev(rocksdb_iterator_t*);
+extern ROCKSDB_LIBRARY_API const char* rocksdb_iter_key(
+    const rocksdb_iterator_t*, size_t* klen);
+extern ROCKSDB_LIBRARY_API const char* rocksdb_iter_value(
+    const rocksdb_iterator_t*, size_t* vlen);
+extern ROCKSDB_LIBRARY_API const char* rocksdb_iter_timestamp(
+    const rocksdb_iterator_t*, size_t* tslen);
+extern ROCKSDB_LIBRARY_API void rocksdb_iter_get_error(
+    const rocksdb_iterator_t*, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_wal_iter_next(
+    rocksdb_wal_iterator_t* iter);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_wal_iter_valid(
+    const rocksdb_wal_iterator_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_wal_iter_status(
+    const rocksdb_wal_iterator_t* iter, char** errptr);
+extern ROCKSDB_LIBRARY_API rocksdb_writebatch_t* rocksdb_wal_iter_get_batch(
+    const rocksdb_wal_iterator_t* iter, uint64_t* seq);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_get_latest_sequence_number(rocksdb_t* db);
+extern ROCKSDB_LIBRARY_API void rocksdb_wal_iter_destroy(
+    const rocksdb_wal_iterator_t* iter);
+
+/* Write batch */
+
+extern ROCKSDB_LIBRARY_API rocksdb_writebatch_t* rocksdb_writebatch_create(
+    void);
+extern ROCKSDB_LIBRARY_API rocksdb_writebatch_t* rocksdb_writebatch_create_from(
+    const char* rep, size_t size);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_destroy(
+    rocksdb_writebatch_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_clear(rocksdb_writebatch_t*);
+extern ROCKSDB_LIBRARY_API int rocksdb_writebatch_count(rocksdb_writebatch_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_put(rocksdb_writebatch_t*,
+                                                       const char* key,
+                                                       size_t klen,
+                                                       const char* val,
+                                                       size_t vlen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_put_cf(
+    rocksdb_writebatch_t*, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, const char* val, size_t vlen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_put_cf_with_ts(
+    rocksdb_writebatch_t*, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, const char* ts, size_t tslen, const char* val,
+    size_t vlen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_putv(
+    rocksdb_writebatch_t* b, int num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, int num_values,
+    const char* const* values_list, const size_t* values_list_sizes);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_putv_cf(
+    rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family,
+    int num_keys, const char* const* keys_list, const size_t* keys_list_sizes,
+    int num_values, const char* const* values_list,
+    const size_t* values_list_sizes);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_merge(rocksdb_writebatch_t*,
+                                                         const char* key,
+                                                         size_t klen,
+                                                         const char* val,
+                                                         size_t vlen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_merge_cf(
+    rocksdb_writebatch_t*, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, const char* val, size_t vlen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_mergev(
+    rocksdb_writebatch_t* b, int num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, int num_values,
+    const char* const* values_list, const size_t* values_list_sizes);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_mergev_cf(
+    rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family,
+    int num_keys, const char* const* keys_list, const size_t* keys_list_sizes,
+    int num_values, const char* const* values_list,
+    const size_t* values_list_sizes);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_delete(rocksdb_writebatch_t*,
+                                                          const char* key,
+                                                          size_t klen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_singledelete(
+    rocksdb_writebatch_t* b, const char* key, size_t klen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_delete_cf(
+    rocksdb_writebatch_t*, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_delete_cf_with_ts(
+    rocksdb_writebatch_t*, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, const char* ts, size_t tslen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_singledelete_cf(
+    rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_singledelete_cf_with_ts(
+    rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, const char* ts, size_t tslen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_deletev(
+    rocksdb_writebatch_t* b, int num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_deletev_cf(
+    rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family,
+    int num_keys, const char* const* keys_list, const size_t* keys_list_sizes);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_delete_range(
+    rocksdb_writebatch_t* b, const char* start_key, size_t start_key_len,
+    const char* end_key, size_t end_key_len);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_delete_range_cf(
+    rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family,
+    const char* start_key, size_t start_key_len, const char* end_key,
+    size_t end_key_len);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_delete_rangev(
+    rocksdb_writebatch_t* b, int num_keys, const char* const* start_keys_list,
+    const size_t* start_keys_list_sizes, const char* const* end_keys_list,
+    const size_t* end_keys_list_sizes);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_delete_rangev_cf(
+    rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family,
+    int num_keys, const char* const* start_keys_list,
+    const size_t* start_keys_list_sizes, const char* const* end_keys_list,
+    const size_t* end_keys_list_sizes);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_put_log_data(
+    rocksdb_writebatch_t*, const char* blob, size_t len);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_iterate(
+    rocksdb_writebatch_t*, void* state,
+    void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen),
+    void (*deleted)(void*, const char* k, size_t klen));
+extern ROCKSDB_LIBRARY_API const char* rocksdb_writebatch_data(
+    rocksdb_writebatch_t*, size_t* size);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_set_save_point(
+    rocksdb_writebatch_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_rollback_to_save_point(
+    rocksdb_writebatch_t*, char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_pop_save_point(
+    rocksdb_writebatch_t*, char** errptr);
+
+/* Write batch with index */
+
+extern ROCKSDB_LIBRARY_API rocksdb_writebatch_wi_t*
+rocksdb_writebatch_wi_create(size_t reserved_bytes,
+                             unsigned char overwrite_keys);
+extern ROCKSDB_LIBRARY_API rocksdb_writebatch_wi_t*
+rocksdb_writebatch_wi_create_from(const char* rep, size_t size);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_destroy(
+    rocksdb_writebatch_wi_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_clear(
+    rocksdb_writebatch_wi_t*);
+extern ROCKSDB_LIBRARY_API int rocksdb_writebatch_wi_count(
+    rocksdb_writebatch_wi_t* b);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_put(
+    rocksdb_writebatch_wi_t*, const char* key, size_t klen, const char* val,
+    size_t vlen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_put_cf(
+    rocksdb_writebatch_wi_t*, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, const char* val, size_t vlen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_putv(
+    rocksdb_writebatch_wi_t* b, int num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, int num_values,
+    const char* const* values_list, const size_t* values_list_sizes);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_putv_cf(
+    rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family,
+    int num_keys, const char* const* keys_list, const size_t* keys_list_sizes,
+    int num_values, const char* const* values_list,
+    const size_t* values_list_sizes);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_merge(
+    rocksdb_writebatch_wi_t*, const char* key, size_t klen, const char* val,
+    size_t vlen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_merge_cf(
+    rocksdb_writebatch_wi_t*, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, const char* val, size_t vlen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_mergev(
+    rocksdb_writebatch_wi_t* b, int num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, int num_values,
+    const char* const* values_list, const size_t* values_list_sizes);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_mergev_cf(
+    rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family,
+    int num_keys, const char* const* keys_list, const size_t* keys_list_sizes,
+    int num_values, const char* const* values_list,
+    const size_t* values_list_sizes);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_delete(
+    rocksdb_writebatch_wi_t*, const char* key, size_t klen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_singledelete(
+    rocksdb_writebatch_wi_t*, const char* key, size_t klen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_delete_cf(
+    rocksdb_writebatch_wi_t*, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_singledelete_cf(
+    rocksdb_writebatch_wi_t*, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_deletev(
+    rocksdb_writebatch_wi_t* b, int num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_deletev_cf(
+    rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family,
+    int num_keys, const char* const* keys_list, const size_t* keys_list_sizes);
+// DO NOT USE - rocksdb_writebatch_wi_delete_range is not yet supported
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_delete_range(
+    rocksdb_writebatch_wi_t* b, const char* start_key, size_t start_key_len,
+    const char* end_key, size_t end_key_len);
+// DO NOT USE - rocksdb_writebatch_wi_delete_range_cf is not yet supported
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_delete_range_cf(
+    rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family,
+    const char* start_key, size_t start_key_len, const char* end_key,
+    size_t end_key_len);
+// DO NOT USE - rocksdb_writebatch_wi_delete_rangev is not yet supported
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_delete_rangev(
+    rocksdb_writebatch_wi_t* b, int num_keys,
+    const char* const* start_keys_list, const size_t* start_keys_list_sizes,
+    const char* const* end_keys_list, const size_t* end_keys_list_sizes);
+// DO NOT USE - rocksdb_writebatch_wi_delete_rangev_cf is not yet supported
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_delete_rangev_cf(
+    rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family,
+    int num_keys, const char* const* start_keys_list,
+    const size_t* start_keys_list_sizes, const char* const* end_keys_list,
+    const size_t* end_keys_list_sizes);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_put_log_data(
+    rocksdb_writebatch_wi_t*, const char* blob, size_t len);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_iterate(
+    rocksdb_writebatch_wi_t* b, void* state,
+    void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen),
+    void (*deleted)(void*, const char* k, size_t klen));
+extern ROCKSDB_LIBRARY_API const char* rocksdb_writebatch_wi_data(
+    rocksdb_writebatch_wi_t* b, size_t* size);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_set_save_point(
+    rocksdb_writebatch_wi_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_rollback_to_save_point(
+    rocksdb_writebatch_wi_t*, char** errptr);
+extern ROCKSDB_LIBRARY_API char* rocksdb_writebatch_wi_get_from_batch(
+    rocksdb_writebatch_wi_t* wbwi, const rocksdb_options_t* options,
+    const char* key, size_t keylen, size_t* vallen, char** errptr);
+extern ROCKSDB_LIBRARY_API char* rocksdb_writebatch_wi_get_from_batch_cf(
+    rocksdb_writebatch_wi_t* wbwi, const rocksdb_options_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, size_t* vallen, char** errptr);
+extern ROCKSDB_LIBRARY_API char* rocksdb_writebatch_wi_get_from_batch_and_db(
+    rocksdb_writebatch_wi_t* wbwi, rocksdb_t* db,
+    const rocksdb_readoptions_t* options, const char* key, size_t keylen,
+    size_t* vallen, char** errptr);
+extern ROCKSDB_LIBRARY_API char* rocksdb_writebatch_wi_get_from_batch_and_db_cf(
+    rocksdb_writebatch_wi_t* wbwi, rocksdb_t* db,
+    const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, size_t* vallen, char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_write_writebatch_wi(
+    rocksdb_t* db, const rocksdb_writeoptions_t* options,
+    rocksdb_writebatch_wi_t* wbwi, char** errptr);
+extern ROCKSDB_LIBRARY_API rocksdb_iterator_t*
+rocksdb_writebatch_wi_create_iterator_with_base(
+    rocksdb_writebatch_wi_t* wbwi, rocksdb_iterator_t* base_iterator);
+extern ROCKSDB_LIBRARY_API rocksdb_iterator_t*
+rocksdb_writebatch_wi_create_iterator_with_base_cf(
+    rocksdb_writebatch_wi_t* wbwi, rocksdb_iterator_t* base_iterator,
+    rocksdb_column_family_handle_t* cf);
+
+/* Options utils */
+
+// Load the latest rocksdb options from the specified db_path.
+//
+// On success, num_column_families will be updated with a non-zero
+// number indicating the number of column families.
+// The returned db_options, column_family_names, and column_family_options
+// should be released via rocksdb_load_latest_options_destroy().
+//
+// On error, a non-null errptr that includes the error message will be
+// returned.  db_options, column_family_names, and column_family_options
+// will be set to NULL.
+extern ROCKSDB_LIBRARY_API void rocksdb_load_latest_options(
+    const char* db_path, rocksdb_env_t* env, bool ignore_unknown_options,
+    rocksdb_cache_t* cache, rocksdb_options_t** db_options,
+    size_t* num_column_families, char*** column_family_names,
+    rocksdb_options_t*** column_family_options, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_load_latest_options_destroy(
+    rocksdb_options_t* db_options, char** list_column_family_names,
+    rocksdb_options_t** list_column_family_options, size_t len);
+
+/* Block based table options */
+
+extern ROCKSDB_LIBRARY_API rocksdb_block_based_table_options_t*
+rocksdb_block_based_options_create(void);
+extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_destroy(
+    rocksdb_block_based_table_options_t* options);
+extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_checksum(
+    rocksdb_block_based_table_options_t*, char);
+extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_block_size(
+    rocksdb_block_based_table_options_t* options, size_t block_size);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_block_based_options_set_block_size_deviation(
+    rocksdb_block_based_table_options_t* options, int block_size_deviation);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_block_based_options_set_block_restart_interval(
+    rocksdb_block_based_table_options_t* options, int block_restart_interval);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_block_based_options_set_index_block_restart_interval(
+    rocksdb_block_based_table_options_t* options,
+    int index_block_restart_interval);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_block_based_options_set_metadata_block_size(
+    rocksdb_block_based_table_options_t* options, uint64_t metadata_block_size);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_block_based_options_set_partition_filters(
+    rocksdb_block_based_table_options_t* options,
+    unsigned char partition_filters);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_block_based_options_set_optimize_filters_for_memory(
+    rocksdb_block_based_table_options_t* options,
+    unsigned char optimize_filters_for_memory);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_block_based_options_set_use_delta_encoding(
+    rocksdb_block_based_table_options_t* options,
+    unsigned char use_delta_encoding);
+extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_filter_policy(
+    rocksdb_block_based_table_options_t* options,
+    rocksdb_filterpolicy_t* filter_policy);
+extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_no_block_cache(
+    rocksdb_block_based_table_options_t* options, unsigned char no_block_cache);
+extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_block_cache(
+    rocksdb_block_based_table_options_t* options, rocksdb_cache_t* block_cache);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_block_based_options_set_whole_key_filtering(
+    rocksdb_block_based_table_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_format_version(
+    rocksdb_block_based_table_options_t*, int);
+enum {
+  rocksdb_block_based_table_index_type_binary_search = 0,
+  rocksdb_block_based_table_index_type_hash_search = 1,
+  rocksdb_block_based_table_index_type_two_level_index_search = 2,
+};
+extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_index_type(
+    rocksdb_block_based_table_options_t*, int);  // uses one of the above enums
+enum {
+  rocksdb_block_based_table_data_block_index_type_binary_search = 0,
+  rocksdb_block_based_table_data_block_index_type_binary_search_and_hash = 1,
+};
+extern ROCKSDB_LIBRARY_API void
+rocksdb_block_based_options_set_data_block_index_type(
+    rocksdb_block_based_table_options_t*, int);  // uses one of the above enums
+extern ROCKSDB_LIBRARY_API void
+rocksdb_block_based_options_set_data_block_hash_ratio(
+    rocksdb_block_based_table_options_t* options, double v);
+// rocksdb_block_based_options_set_hash_index_allow_collision()
+// is removed since BlockBasedTableOptions.hash_index_allow_collision()
+// is removed
+extern ROCKSDB_LIBRARY_API void
+rocksdb_block_based_options_set_cache_index_and_filter_blocks(
+    rocksdb_block_based_table_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_block_based_options_set_cache_index_and_filter_blocks_with_high_priority(
+    rocksdb_block_based_table_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_block_based_options_set_pin_l0_filter_and_index_blocks_in_cache(
+    rocksdb_block_based_table_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_block_based_options_set_pin_top_level_index_and_filter(
+    rocksdb_block_based_table_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_block_based_table_factory(
+    rocksdb_options_t* opt, rocksdb_block_based_table_options_t* table_options);
+
+/* Cuckoo table options */
+
+extern ROCKSDB_LIBRARY_API rocksdb_cuckoo_table_options_t*
+rocksdb_cuckoo_options_create(void);
+extern ROCKSDB_LIBRARY_API void rocksdb_cuckoo_options_destroy(
+    rocksdb_cuckoo_table_options_t* options);
+extern ROCKSDB_LIBRARY_API void rocksdb_cuckoo_options_set_hash_ratio(
+    rocksdb_cuckoo_table_options_t* options, double v);
+extern ROCKSDB_LIBRARY_API void rocksdb_cuckoo_options_set_max_search_depth(
+    rocksdb_cuckoo_table_options_t* options, uint32_t v);
+extern ROCKSDB_LIBRARY_API void rocksdb_cuckoo_options_set_cuckoo_block_size(
+    rocksdb_cuckoo_table_options_t* options, uint32_t v);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_cuckoo_options_set_identity_as_first_hash(
+    rocksdb_cuckoo_table_options_t* options, unsigned char v);
+extern ROCKSDB_LIBRARY_API void rocksdb_cuckoo_options_set_use_module_hash(
+    rocksdb_cuckoo_table_options_t* options, unsigned char v);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_cuckoo_table_factory(
+    rocksdb_options_t* opt, rocksdb_cuckoo_table_options_t* table_options);
+
+/* Options */
+extern ROCKSDB_LIBRARY_API void rocksdb_set_options(rocksdb_t* db, int count,
+                                                    const char* const keys[],
+                                                    const char* const values[],
+                                                    char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_set_options_cf(
+    rocksdb_t* db, rocksdb_column_family_handle_t* handle, int count,
+    const char* const keys[], const char* const values[], char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_options_t* rocksdb_options_create(void);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_destroy(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API rocksdb_options_t* rocksdb_options_create_copy(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_increase_parallelism(
+    rocksdb_options_t* opt, int total_threads);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_optimize_for_point_lookup(
+    rocksdb_options_t* opt, uint64_t block_cache_size_mb);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_optimize_level_style_compaction(
+    rocksdb_options_t* opt, uint64_t memtable_memory_budget);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_optimize_universal_style_compaction(
+    rocksdb_options_t* opt, uint64_t memtable_memory_budget);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_allow_ingest_behind(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_allow_ingest_behind(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compaction_filter(
+    rocksdb_options_t*, rocksdb_compactionfilter_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compaction_filter_factory(
+    rocksdb_options_t*, rocksdb_compactionfilterfactory_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_compaction_readahead_size(
+    rocksdb_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_compaction_readahead_size(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_comparator(
+    rocksdb_options_t*, rocksdb_comparator_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_merge_operator(
+    rocksdb_options_t*, rocksdb_mergeoperator_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_uint64add_merge_operator(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compression_per_level(
+    rocksdb_options_t* opt, const int* level_values, size_t num_levels);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_create_if_missing(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_create_if_missing(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_create_missing_column_families(rocksdb_options_t*,
+                                                   unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_create_missing_column_families(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_error_if_exists(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_error_if_exists(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_paranoid_checks(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_paranoid_checks(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_db_paths(
+    rocksdb_options_t*, const rocksdb_dbpath_t** path_values, size_t num_paths);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_env(rocksdb_options_t*,
+                                                        rocksdb_env_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_info_log(rocksdb_options_t*,
+                                                             rocksdb_logger_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_info_log_level(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_info_log_level(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_write_buffer_size(
+    rocksdb_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_write_buffer_size(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_db_write_buffer_size(
+    rocksdb_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_db_write_buffer_size(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_open_files(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_max_open_files(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_file_opening_threads(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_max_file_opening_threads(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_total_wal_size(
+    rocksdb_options_t* opt, uint64_t n);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_max_total_wal_size(rocksdb_options_t* opt);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compression_options(
+    rocksdb_options_t*, int, int, int, int);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_compression_options_zstd_max_train_bytes(rocksdb_options_t*,
+                                                             int);
+extern ROCKSDB_LIBRARY_API int
+rocksdb_options_get_compression_options_zstd_max_train_bytes(
+    rocksdb_options_t* opt);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t* opt);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_compression_options_parallel_threads(rocksdb_options_t*,
+                                                         int);
+extern ROCKSDB_LIBRARY_API int
+rocksdb_options_get_compression_options_parallel_threads(
+    rocksdb_options_t* opt);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_compression_options_max_dict_buffer_bytes(
+    rocksdb_options_t*, uint64_t);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_compression_options_max_dict_buffer_bytes(
+    rocksdb_options_t* opt);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_bottommost_compression_options(rocksdb_options_t*, int, int,
+                                                   int, int, unsigned char);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes(
+    rocksdb_options_t*, int, unsigned char);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_bottommost_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t*, unsigned char, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_bottommost_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t* opt);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_bottommost_compression_options_max_dict_buffer_bytes(
+    rocksdb_options_t*, uint64_t, unsigned char);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_prefix_extractor(
+    rocksdb_options_t*, rocksdb_slicetransform_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_num_levels(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_num_levels(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_level0_file_num_compaction_trigger(rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int
+rocksdb_options_get_level0_file_num_compaction_trigger(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_level0_slowdown_writes_trigger(rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int
+rocksdb_options_get_level0_slowdown_writes_trigger(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_level0_stop_writes_trigger(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_level0_stop_writes_trigger(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_target_file_size_base(
+    rocksdb_options_t*, uint64_t);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_target_file_size_base(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_target_file_size_multiplier(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_target_file_size_multiplier(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_bytes_for_level_base(
+    rocksdb_options_t*, uint64_t);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_max_bytes_for_level_base(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_level_compaction_dynamic_level_bytes(rocksdb_options_t*,
+                                                         unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_level_compaction_dynamic_level_bytes(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_max_bytes_for_level_multiplier(rocksdb_options_t*, double);
+extern ROCKSDB_LIBRARY_API double
+rocksdb_options_get_max_bytes_for_level_multiplier(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_max_bytes_for_level_multiplier_additional(
+    rocksdb_options_t*, int* level_values, size_t num_levels);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_enable_statistics(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_skip_stats_update_on_db_open(rocksdb_options_t* opt,
+                                                 unsigned char val);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_skip_stats_update_on_db_open(rocksdb_options_t* opt);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open(
+    rocksdb_options_t* opt, unsigned char val);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(
+    rocksdb_options_t* opt);
+
+/* Blob Options Settings */
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_enable_blob_files(
+    rocksdb_options_t* opt, unsigned char val);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_enable_blob_files(
+    rocksdb_options_t* opt);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_min_blob_size(
+    rocksdb_options_t* opt, uint64_t val);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_min_blob_size(rocksdb_options_t* opt);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_file_size(
+    rocksdb_options_t* opt, uint64_t val);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_blob_file_size(rocksdb_options_t* opt);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_compression_type(
+    rocksdb_options_t* opt, int val);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_blob_compression_type(
+    rocksdb_options_t* opt);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_enable_blob_gc(
+    rocksdb_options_t* opt, unsigned char val);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_enable_blob_gc(
+    rocksdb_options_t* opt);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_gc_age_cutoff(
+    rocksdb_options_t* opt, double val);
+extern ROCKSDB_LIBRARY_API double rocksdb_options_get_blob_gc_age_cutoff(
+    rocksdb_options_t* opt);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_gc_force_threshold(
+    rocksdb_options_t* opt, double val);
+extern ROCKSDB_LIBRARY_API double rocksdb_options_get_blob_gc_force_threshold(
+    rocksdb_options_t* opt);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_blob_compaction_readahead_size(rocksdb_options_t* opt,
+                                                   uint64_t val);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_blob_compaction_readahead_size(rocksdb_options_t* opt);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_file_starting_level(
+    rocksdb_options_t* opt, int val);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_blob_file_starting_level(
+    rocksdb_options_t* opt);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_cache(
+    rocksdb_options_t* opt, rocksdb_cache_t* blob_cache);
+
+enum {
+  rocksdb_prepopulate_blob_disable = 0,
+  rocksdb_prepopulate_blob_flush_only = 1
+};
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_prepopulate_blob_cache(
+    rocksdb_options_t* opt, int val);
+
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_prepopulate_blob_cache(
+    rocksdb_options_t* opt);
+
+/* returns a pointer to a malloc()-ed, null terminated string */
+extern ROCKSDB_LIBRARY_API char* rocksdb_options_statistics_get_string(
+    rocksdb_options_t* opt);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_write_buffer_number(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_max_write_buffer_number(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_min_write_buffer_number_to_merge(rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int
+rocksdb_options_get_min_write_buffer_number_to_merge(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_max_write_buffer_number_to_maintain(rocksdb_options_t*,
+                                                        int);
+extern ROCKSDB_LIBRARY_API int
+rocksdb_options_get_max_write_buffer_number_to_maintain(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_max_write_buffer_size_to_maintain(rocksdb_options_t*,
+                                                      int64_t);
+extern ROCKSDB_LIBRARY_API int64_t
+rocksdb_options_get_max_write_buffer_size_to_maintain(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_enable_pipelined_write(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_enable_pipelined_write(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_unordered_write(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_unordered_write(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_subcompactions(
+    rocksdb_options_t*, uint32_t);
+extern ROCKSDB_LIBRARY_API uint32_t
+rocksdb_options_get_max_subcompactions(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_background_jobs(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_max_background_jobs(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_background_compactions(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_max_background_compactions(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_background_flushes(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_max_background_flushes(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_log_file_size(
+    rocksdb_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_max_log_file_size(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_log_file_time_to_roll(
+    rocksdb_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_log_file_time_to_roll(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_keep_log_file_num(
+    rocksdb_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_keep_log_file_num(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_recycle_log_file_num(
+    rocksdb_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_recycle_log_file_num(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_soft_pending_compaction_bytes_limit(rocksdb_options_t* opt,
+                                                        size_t v);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_soft_pending_compaction_bytes_limit(rocksdb_options_t* opt);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_hard_pending_compaction_bytes_limit(rocksdb_options_t* opt,
+                                                        size_t v);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_hard_pending_compaction_bytes_limit(rocksdb_options_t* opt);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_manifest_file_size(
+    rocksdb_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_max_manifest_file_size(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_table_cache_numshardbits(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_table_cache_numshardbits(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_arena_block_size(
+    rocksdb_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_arena_block_size(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_use_fsync(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_use_fsync(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_db_log_dir(
+    rocksdb_options_t*, const char*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_wal_dir(rocksdb_options_t*,
+                                                            const char*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_WAL_ttl_seconds(
+    rocksdb_options_t*, uint64_t);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_WAL_ttl_seconds(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_WAL_size_limit_MB(
+    rocksdb_options_t*, uint64_t);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_WAL_size_limit_MB(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_manifest_preallocation_size(
+    rocksdb_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_manifest_preallocation_size(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_allow_mmap_reads(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_allow_mmap_reads(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_allow_mmap_writes(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_allow_mmap_writes(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_use_direct_reads(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_use_direct_reads(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_use_direct_io_for_flush_and_compaction(rocksdb_options_t*,
+                                                           unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_use_direct_io_for_flush_and_compaction(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_is_fd_close_on_exec(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_is_fd_close_on_exec(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_stats_dump_period_sec(
+    rocksdb_options_t*, unsigned int);
+extern ROCKSDB_LIBRARY_API unsigned int
+rocksdb_options_get_stats_dump_period_sec(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_stats_persist_period_sec(
+    rocksdb_options_t*, unsigned int);
+extern ROCKSDB_LIBRARY_API unsigned int
+rocksdb_options_get_stats_persist_period_sec(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_advise_random_on_open(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_advise_random_on_open(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_access_hint_on_compaction_start(rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int
+rocksdb_options_get_access_hint_on_compaction_start(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_use_adaptive_mutex(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_use_adaptive_mutex(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_bytes_per_sync(
+    rocksdb_options_t*, uint64_t);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_bytes_per_sync(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_wal_bytes_per_sync(
+    rocksdb_options_t*, uint64_t);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_wal_bytes_per_sync(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_writable_file_max_buffer_size(rocksdb_options_t*, uint64_t);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_writable_file_max_buffer_size(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_allow_concurrent_memtable_write(rocksdb_options_t*,
+                                                    unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_allow_concurrent_memtable_write(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_enable_write_thread_adaptive_yield(rocksdb_options_t*,
+                                                       unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_enable_write_thread_adaptive_yield(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_max_sequential_skip_in_iterations(rocksdb_options_t*,
+                                                      uint64_t);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_max_sequential_skip_in_iterations(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_disable_auto_compactions(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_disable_auto_compactions(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_optimize_filters_for_hits(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_optimize_filters_for_hits(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_delete_obsolete_files_period_micros(rocksdb_options_t*,
+                                                        uint64_t);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_delete_obsolete_files_period_micros(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_prepare_for_bulk_load(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_memtable_vector_rep(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_memtable_prefix_bloom_size_ratio(rocksdb_options_t*,
+                                                     double);
+extern ROCKSDB_LIBRARY_API double
+rocksdb_options_get_memtable_prefix_bloom_size_ratio(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_compaction_bytes(
+    rocksdb_options_t*, uint64_t);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_options_get_max_compaction_bytes(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_hash_skip_list_rep(
+    rocksdb_options_t*, size_t, int32_t, int32_t);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_hash_link_list_rep(
+    rocksdb_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_plain_table_factory(
+    rocksdb_options_t*, uint32_t, int, double, size_t);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_min_level_to_compress(
+    rocksdb_options_t* opt, int level);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_memtable_huge_page_size(
+    rocksdb_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_memtable_huge_page_size(rocksdb_options_t*);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_successive_merges(
+    rocksdb_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_max_successive_merges(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_bloom_locality(
+    rocksdb_options_t*, uint32_t);
+extern ROCKSDB_LIBRARY_API uint32_t
+rocksdb_options_get_bloom_locality(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_inplace_update_support(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_inplace_update_support(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_inplace_update_num_locks(
+    rocksdb_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_options_get_inplace_update_num_locks(rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_report_bg_io_stats(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_report_bg_io_stats(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_avoid_unnecessary_blocking_io(rocksdb_options_t*,
+                                                  unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_avoid_unnecessary_blocking_io(rocksdb_options_t*);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_experimental_mempurge_threshold(rocksdb_options_t*, double);
+extern ROCKSDB_LIBRARY_API double
+rocksdb_options_get_experimental_mempurge_threshold(rocksdb_options_t*);
+
+enum {
+  rocksdb_tolerate_corrupted_tail_records_recovery = 0,
+  rocksdb_absolute_consistency_recovery = 1,
+  rocksdb_point_in_time_recovery = 2,
+  rocksdb_skip_any_corrupted_records_recovery = 3
+};
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_wal_recovery_mode(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_wal_recovery_mode(
+    rocksdb_options_t*);
+
+enum {
+  rocksdb_no_compression = 0,
+  rocksdb_snappy_compression = 1,
+  rocksdb_zlib_compression = 2,
+  rocksdb_bz2_compression = 3,
+  rocksdb_lz4_compression = 4,
+  rocksdb_lz4hc_compression = 5,
+  rocksdb_xpress_compression = 6,
+  rocksdb_zstd_compression = 7
+};
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compression(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_compression(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_bottommost_compression(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_bottommost_compression(
+    rocksdb_options_t*);
+
+enum {
+  rocksdb_level_compaction = 0,
+  rocksdb_universal_compaction = 1,
+  rocksdb_fifo_compaction = 2
+};
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compaction_style(
+    rocksdb_options_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_compaction_style(
+    rocksdb_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_universal_compaction_options(
+    rocksdb_options_t*, rocksdb_universal_compaction_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_fifo_compaction_options(
+    rocksdb_options_t* opt, rocksdb_fifo_compaction_options_t* fifo);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_ratelimiter(
+    rocksdb_options_t* opt, rocksdb_ratelimiter_t* limiter);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_atomic_flush(
+    rocksdb_options_t* opt, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_atomic_flush(
+    rocksdb_options_t* opt);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_row_cache(
+    rocksdb_options_t* opt, rocksdb_cache_t* cache);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_add_compact_on_deletion_collector_factory(
+    rocksdb_options_t*, size_t window_size, size_t num_dels_trigger);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_manual_wal_flush(
+    rocksdb_options_t* opt, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_manual_wal_flush(
+    rocksdb_options_t* opt);
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_wal_compression(
+    rocksdb_options_t* opt, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_options_get_wal_compression(
+    rocksdb_options_t* opt);
+
+/* RateLimiter */
+extern ROCKSDB_LIBRARY_API rocksdb_ratelimiter_t* rocksdb_ratelimiter_create(
+    int64_t rate_bytes_per_sec, int64_t refill_period_us, int32_t fairness);
+extern ROCKSDB_LIBRARY_API void rocksdb_ratelimiter_destroy(
+    rocksdb_ratelimiter_t*);
+
+/* PerfContext */
+enum {
+  rocksdb_uninitialized = 0,
+  rocksdb_disable = 1,
+  rocksdb_enable_count = 2,
+  rocksdb_enable_time_except_for_mutex = 3,
+  rocksdb_enable_time = 4,
+  rocksdb_out_of_bounds = 5
+};
+
+enum {
+  rocksdb_user_key_comparison_count = 0,
+  rocksdb_block_cache_hit_count,
+  rocksdb_block_read_count,
+  rocksdb_block_read_byte,
+  rocksdb_block_read_time,
+  rocksdb_block_checksum_time,
+  rocksdb_block_decompress_time,
+  rocksdb_get_read_bytes,
+  rocksdb_multiget_read_bytes,
+  rocksdb_iter_read_bytes,
+  rocksdb_internal_key_skipped_count,
+  rocksdb_internal_delete_skipped_count,
+  rocksdb_internal_recent_skipped_count,
+  rocksdb_internal_merge_count,
+  rocksdb_get_snapshot_time,
+  rocksdb_get_from_memtable_time,
+  rocksdb_get_from_memtable_count,
+  rocksdb_get_post_process_time,
+  rocksdb_get_from_output_files_time,
+  rocksdb_seek_on_memtable_time,
+  rocksdb_seek_on_memtable_count,
+  rocksdb_next_on_memtable_count,
+  rocksdb_prev_on_memtable_count,
+  rocksdb_seek_child_seek_time,
+  rocksdb_seek_child_seek_count,
+  rocksdb_seek_min_heap_time,
+  rocksdb_seek_max_heap_time,
+  rocksdb_seek_internal_seek_time,
+  rocksdb_find_next_user_entry_time,
+  rocksdb_write_wal_time,
+  rocksdb_write_memtable_time,
+  rocksdb_write_delay_time,
+  rocksdb_write_pre_and_post_process_time,
+  rocksdb_db_mutex_lock_nanos,
+  rocksdb_db_condition_wait_nanos,
+  rocksdb_merge_operator_time_nanos,
+  rocksdb_read_index_block_nanos,
+  rocksdb_read_filter_block_nanos,
+  rocksdb_new_table_block_iter_nanos,
+  rocksdb_new_table_iterator_nanos,
+  rocksdb_block_seek_nanos,
+  rocksdb_find_table_nanos,
+  rocksdb_bloom_memtable_hit_count,
+  rocksdb_bloom_memtable_miss_count,
+  rocksdb_bloom_sst_hit_count,
+  rocksdb_bloom_sst_miss_count,
+  rocksdb_key_lock_wait_time,
+  rocksdb_key_lock_wait_count,
+  rocksdb_env_new_sequential_file_nanos,
+  rocksdb_env_new_random_access_file_nanos,
+  rocksdb_env_new_writable_file_nanos,
+  rocksdb_env_reuse_writable_file_nanos,
+  rocksdb_env_new_random_rw_file_nanos,
+  rocksdb_env_new_directory_nanos,
+  rocksdb_env_file_exists_nanos,
+  rocksdb_env_get_children_nanos,
+  rocksdb_env_get_children_file_attributes_nanos,
+  rocksdb_env_delete_file_nanos,
+  rocksdb_env_create_dir_nanos,
+  rocksdb_env_create_dir_if_missing_nanos,
+  rocksdb_env_delete_dir_nanos,
+  rocksdb_env_get_file_size_nanos,
+  rocksdb_env_get_file_modification_time_nanos,
+  rocksdb_env_rename_file_nanos,
+  rocksdb_env_link_file_nanos,
+  rocksdb_env_lock_file_nanos,
+  rocksdb_env_unlock_file_nanos,
+  rocksdb_env_new_logger_nanos,
+  rocksdb_number_async_seek,
+  rocksdb_blob_cache_hit_count,
+  rocksdb_blob_read_count,
+  rocksdb_blob_read_byte,
+  rocksdb_blob_read_time,
+  rocksdb_blob_checksum_time,
+  rocksdb_blob_decompress_time,
+  rocksdb_internal_range_del_reseek_count,
+  rocksdb_block_read_cpu_time,
+  rocksdb_total_metric_count = 79
+};
+
+extern ROCKSDB_LIBRARY_API void rocksdb_set_perf_level(int);
+extern ROCKSDB_LIBRARY_API rocksdb_perfcontext_t* rocksdb_perfcontext_create(
+    void);
+extern ROCKSDB_LIBRARY_API void rocksdb_perfcontext_reset(
+    rocksdb_perfcontext_t* context);
+extern ROCKSDB_LIBRARY_API char* rocksdb_perfcontext_report(
+    rocksdb_perfcontext_t* context, unsigned char exclude_zero_counters);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_perfcontext_metric(rocksdb_perfcontext_t* context, int metric);
+extern ROCKSDB_LIBRARY_API void rocksdb_perfcontext_destroy(
+    rocksdb_perfcontext_t* context);
+
+/* Compaction Filter */
+
+extern ROCKSDB_LIBRARY_API rocksdb_compactionfilter_t*
+rocksdb_compactionfilter_create(
+    void* state, void (*destructor)(void*),
+    unsigned char (*filter)(void*, int level, const char* key,
+                            size_t key_length, const char* existing_value,
+                            size_t value_length, char** new_value,
+                            size_t* new_value_length,
+                            unsigned char* value_changed),
+    const char* (*name)(void*));
+extern ROCKSDB_LIBRARY_API void rocksdb_compactionfilter_set_ignore_snapshots(
+    rocksdb_compactionfilter_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API void rocksdb_compactionfilter_destroy(
+    rocksdb_compactionfilter_t*);
+
+/* Compaction Filter Context */
+
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_compactionfiltercontext_is_full_compaction(
+    rocksdb_compactionfiltercontext_t* context);
+
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_compactionfiltercontext_is_manual_compaction(
+    rocksdb_compactionfiltercontext_t* context);
+
+/* Compaction Filter Factory */
+
+extern ROCKSDB_LIBRARY_API rocksdb_compactionfilterfactory_t*
+rocksdb_compactionfilterfactory_create(
+    void* state, void (*destructor)(void*),
+    rocksdb_compactionfilter_t* (*create_compaction_filter)(
+        void*, rocksdb_compactionfiltercontext_t* context),
+    const char* (*name)(void*));
+extern ROCKSDB_LIBRARY_API void rocksdb_compactionfilterfactory_destroy(
+    rocksdb_compactionfilterfactory_t*);
+
+/* Comparator */
+
+extern ROCKSDB_LIBRARY_API rocksdb_comparator_t* rocksdb_comparator_create(
+    void* state, void (*destructor)(void*),
+    int (*compare)(void*, const char* a, size_t alen, const char* b,
+                   size_t blen),
+    const char* (*name)(void*));
+extern ROCKSDB_LIBRARY_API void rocksdb_comparator_destroy(
+    rocksdb_comparator_t*);
+
+extern ROCKSDB_LIBRARY_API rocksdb_comparator_t*
+rocksdb_comparator_with_ts_create(
+    void* state, void (*destructor)(void*),
+    int (*compare)(void*, const char* a, size_t alen, const char* b,
+                   size_t blen),
+    int (*compare_ts)(void*, const char* a_ts, size_t a_tslen, const char* b_ts,
+                      size_t b_tslen),
+    int (*compare_without_ts)(void*, const char* a, size_t alen,
+                              unsigned char a_has_ts, const char* b,
+                              size_t blen, unsigned char b_has_ts),
+    const char* (*name)(void*), size_t timestamp_size);
+
+/* Filter policy */
+
+extern ROCKSDB_LIBRARY_API void rocksdb_filterpolicy_destroy(
+    rocksdb_filterpolicy_t*);
+
+extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t*
+rocksdb_filterpolicy_create_bloom(double bits_per_key);
+extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t*
+rocksdb_filterpolicy_create_bloom_full(double bits_per_key);
+extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t*
+rocksdb_filterpolicy_create_ribbon(double bloom_equivalent_bits_per_key);
+extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t*
+rocksdb_filterpolicy_create_ribbon_hybrid(double bloom_equivalent_bits_per_key,
+                                          int bloom_before_level);
+
+/* Merge Operator */
+
+extern ROCKSDB_LIBRARY_API rocksdb_mergeoperator_t*
+rocksdb_mergeoperator_create(
+    void* state, void (*destructor)(void*),
+    char* (*full_merge)(void*, const char* key, size_t key_length,
+                        const char* existing_value,
+                        size_t existing_value_length,
+                        const char* const* operands_list,
+                        const size_t* operands_list_length, int num_operands,
+                        unsigned char* success, size_t* new_value_length),
+    char* (*partial_merge)(void*, const char* key, size_t key_length,
+                           const char* const* operands_list,
+                           const size_t* operands_list_length, int num_operands,
+                           unsigned char* success, size_t* new_value_length),
+    void (*delete_value)(void*, const char* value, size_t value_length),
+    const char* (*name)(void*));
+extern ROCKSDB_LIBRARY_API void rocksdb_mergeoperator_destroy(
+    rocksdb_mergeoperator_t*);
+
+/* Read options */
+
+extern ROCKSDB_LIBRARY_API rocksdb_readoptions_t* rocksdb_readoptions_create(
+    void);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_destroy(
+    rocksdb_readoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_verify_checksums(
+    rocksdb_readoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_readoptions_get_verify_checksums(rocksdb_readoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_fill_cache(
+    rocksdb_readoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_readoptions_get_fill_cache(
+    rocksdb_readoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_snapshot(
+    rocksdb_readoptions_t*, const rocksdb_snapshot_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_iterate_upper_bound(
+    rocksdb_readoptions_t*, const char* key, size_t keylen);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_iterate_lower_bound(
+    rocksdb_readoptions_t*, const char* key, size_t keylen);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_read_tier(
+    rocksdb_readoptions_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_readoptions_get_read_tier(
+    rocksdb_readoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_tailing(
+    rocksdb_readoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_readoptions_get_tailing(
+    rocksdb_readoptions_t*);
+// The functionality that this option controlled has been removed.
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_managed(
+    rocksdb_readoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_readahead_size(
+    rocksdb_readoptions_t*, size_t);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_readoptions_get_readahead_size(rocksdb_readoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_prefix_same_as_start(
+    rocksdb_readoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_readoptions_get_prefix_same_as_start(rocksdb_readoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_pin_data(
+    rocksdb_readoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_readoptions_get_pin_data(
+    rocksdb_readoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_total_order_seek(
+    rocksdb_readoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_readoptions_get_total_order_seek(rocksdb_readoptions_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_readoptions_set_max_skippable_internal_keys(rocksdb_readoptions_t*,
+                                                    uint64_t);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_readoptions_get_max_skippable_internal_keys(rocksdb_readoptions_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_readoptions_set_background_purge_on_iterator_cleanup(
+    rocksdb_readoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_readoptions_get_background_purge_on_iterator_cleanup(
+    rocksdb_readoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_ignore_range_deletions(
+    rocksdb_readoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_readoptions_get_ignore_range_deletions(rocksdb_readoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_deadline(
+    rocksdb_readoptions_t*, uint64_t microseconds);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_readoptions_get_deadline(rocksdb_readoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_io_timeout(
+    rocksdb_readoptions_t*, uint64_t microseconds);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_readoptions_get_io_timeout(rocksdb_readoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_async_io(
+    rocksdb_readoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_readoptions_get_async_io(
+    rocksdb_readoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_timestamp(
+    rocksdb_readoptions_t*, const char* ts, size_t tslen);
+extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_iter_start_ts(
+    rocksdb_readoptions_t*, const char* ts, size_t tslen);
+
+/* Write options */
+
+extern ROCKSDB_LIBRARY_API rocksdb_writeoptions_t* rocksdb_writeoptions_create(
+    void);
+extern ROCKSDB_LIBRARY_API void rocksdb_writeoptions_destroy(
+    rocksdb_writeoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_writeoptions_set_sync(
+    rocksdb_writeoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_writeoptions_get_sync(
+    rocksdb_writeoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_writeoptions_disable_WAL(
+    rocksdb_writeoptions_t* opt, int disable);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_writeoptions_get_disable_WAL(
+    rocksdb_writeoptions_t* opt);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_writeoptions_set_ignore_missing_column_families(rocksdb_writeoptions_t*,
+                                                        unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_writeoptions_get_ignore_missing_column_families(
+    rocksdb_writeoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_writeoptions_set_no_slowdown(
+    rocksdb_writeoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_writeoptions_get_no_slowdown(
+    rocksdb_writeoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_writeoptions_set_low_pri(
+    rocksdb_writeoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_writeoptions_get_low_pri(
+    rocksdb_writeoptions_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_writeoptions_set_memtable_insert_hint_per_batch(rocksdb_writeoptions_t*,
+                                                        unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_writeoptions_get_memtable_insert_hint_per_batch(
+    rocksdb_writeoptions_t*);
+
+/* Compact range options */
+
+extern ROCKSDB_LIBRARY_API rocksdb_compactoptions_t*
+rocksdb_compactoptions_create(void);
+extern ROCKSDB_LIBRARY_API void rocksdb_compactoptions_destroy(
+    rocksdb_compactoptions_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_compactoptions_set_exclusive_manual_compaction(
+    rocksdb_compactoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_compactoptions_get_exclusive_manual_compaction(
+    rocksdb_compactoptions_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_compactoptions_set_bottommost_level_compaction(
+    rocksdb_compactoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_compactoptions_get_bottommost_level_compaction(
+    rocksdb_compactoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_compactoptions_set_change_level(
+    rocksdb_compactoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_compactoptions_get_change_level(rocksdb_compactoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_compactoptions_set_target_level(
+    rocksdb_compactoptions_t*, int);
+extern ROCKSDB_LIBRARY_API int rocksdb_compactoptions_get_target_level(
+    rocksdb_compactoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_compactoptions_set_full_history_ts_low(
+    rocksdb_compactoptions_t*, char* ts, size_t tslen);
+
+/* Flush options */
+
+extern ROCKSDB_LIBRARY_API rocksdb_flushoptions_t* rocksdb_flushoptions_create(
+    void);
+extern ROCKSDB_LIBRARY_API void rocksdb_flushoptions_destroy(
+    rocksdb_flushoptions_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_flushoptions_set_wait(
+    rocksdb_flushoptions_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char rocksdb_flushoptions_get_wait(
+    rocksdb_flushoptions_t*);
+
+/* Memory allocator */
+
+extern ROCKSDB_LIBRARY_API rocksdb_memory_allocator_t*
+rocksdb_jemalloc_nodump_allocator_create(char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_memory_allocator_destroy(
+    rocksdb_memory_allocator_t*);
+
+/* Cache */
+
+extern ROCKSDB_LIBRARY_API rocksdb_lru_cache_options_t*
+rocksdb_lru_cache_options_create(void);
+extern ROCKSDB_LIBRARY_API void rocksdb_lru_cache_options_destroy(
+    rocksdb_lru_cache_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_lru_cache_options_set_capacity(
+    rocksdb_lru_cache_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API void rocksdb_lru_cache_options_set_num_shard_bits(
+    rocksdb_lru_cache_options_t*, int);
+extern ROCKSDB_LIBRARY_API void rocksdb_lru_cache_options_set_memory_allocator(
+    rocksdb_lru_cache_options_t*, rocksdb_memory_allocator_t*);
+
+extern ROCKSDB_LIBRARY_API rocksdb_cache_t* rocksdb_cache_create_lru(
+    size_t capacity);
+extern ROCKSDB_LIBRARY_API rocksdb_cache_t*
+rocksdb_cache_create_lru_with_strict_capacity_limit(size_t capacity);
+extern ROCKSDB_LIBRARY_API rocksdb_cache_t* rocksdb_cache_create_lru_opts(
+    const rocksdb_lru_cache_options_t*);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_cache_destroy(rocksdb_cache_t* cache);
+extern ROCKSDB_LIBRARY_API void rocksdb_cache_disown_data(
+    rocksdb_cache_t* cache);
+extern ROCKSDB_LIBRARY_API void rocksdb_cache_set_capacity(
+    rocksdb_cache_t* cache, size_t capacity);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_cache_get_capacity(const rocksdb_cache_t* cache);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_cache_get_usage(const rocksdb_cache_t* cache);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_cache_get_pinned_usage(const rocksdb_cache_t* cache);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_cache_get_table_address_count(const rocksdb_cache_t* cache);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_cache_get_occupancy_count(const rocksdb_cache_t* cache);
+
+/* HyperClockCache */
+
+extern ROCKSDB_LIBRARY_API rocksdb_hyper_clock_cache_options_t*
+rocksdb_hyper_clock_cache_options_create(size_t capacity,
+                                         size_t estimated_entry_charge);
+extern ROCKSDB_LIBRARY_API void rocksdb_hyper_clock_cache_options_destroy(
+    rocksdb_hyper_clock_cache_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_hyper_clock_cache_options_set_capacity(
+    rocksdb_hyper_clock_cache_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_hyper_clock_cache_options_set_estimated_entry_charge(
+    rocksdb_hyper_clock_cache_options_t*, size_t);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_hyper_clock_cache_options_set_num_shard_bits(
+    rocksdb_hyper_clock_cache_options_t*, int);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_hyper_clock_cache_options_set_memory_allocator(
+    rocksdb_hyper_clock_cache_options_t*, rocksdb_memory_allocator_t*);
+
+extern ROCKSDB_LIBRARY_API rocksdb_cache_t* rocksdb_cache_create_hyper_clock(
+    size_t capacity, size_t estimated_entry_charge);
+extern ROCKSDB_LIBRARY_API rocksdb_cache_t*
+rocksdb_cache_create_hyper_clock_opts(
+    const rocksdb_hyper_clock_cache_options_t*);
+
+/* DBPath */
+
+extern ROCKSDB_LIBRARY_API rocksdb_dbpath_t* rocksdb_dbpath_create(
+    const char* path, uint64_t target_size);
+extern ROCKSDB_LIBRARY_API void rocksdb_dbpath_destroy(rocksdb_dbpath_t*);
+
+/* Env */
+
+extern ROCKSDB_LIBRARY_API rocksdb_env_t* rocksdb_create_default_env(void);
+extern ROCKSDB_LIBRARY_API rocksdb_env_t* rocksdb_create_mem_env(void);
+extern ROCKSDB_LIBRARY_API void rocksdb_env_set_background_threads(
+    rocksdb_env_t* env, int n);
+extern ROCKSDB_LIBRARY_API int rocksdb_env_get_background_threads(
+    rocksdb_env_t* env);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_env_set_high_priority_background_threads(rocksdb_env_t* env, int n);
+extern ROCKSDB_LIBRARY_API int rocksdb_env_get_high_priority_background_threads(
+    rocksdb_env_t* env);
+extern ROCKSDB_LIBRARY_API void rocksdb_env_set_low_priority_background_threads(
+    rocksdb_env_t* env, int n);
+extern ROCKSDB_LIBRARY_API int rocksdb_env_get_low_priority_background_threads(
+    rocksdb_env_t* env);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_env_set_bottom_priority_background_threads(rocksdb_env_t* env, int n);
+extern ROCKSDB_LIBRARY_API int
+rocksdb_env_get_bottom_priority_background_threads(rocksdb_env_t* env);
+extern ROCKSDB_LIBRARY_API void rocksdb_env_join_all_threads(
+    rocksdb_env_t* env);
+extern ROCKSDB_LIBRARY_API void rocksdb_env_lower_thread_pool_io_priority(
+    rocksdb_env_t* env);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_env_lower_high_priority_thread_pool_io_priority(rocksdb_env_t* env);
+extern ROCKSDB_LIBRARY_API void rocksdb_env_lower_thread_pool_cpu_priority(
+    rocksdb_env_t* env);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_env_lower_high_priority_thread_pool_cpu_priority(rocksdb_env_t* env);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_env_destroy(rocksdb_env_t*);
+
+extern ROCKSDB_LIBRARY_API rocksdb_envoptions_t* rocksdb_envoptions_create(
+    void);
+extern ROCKSDB_LIBRARY_API void rocksdb_envoptions_destroy(
+    rocksdb_envoptions_t* opt);
+extern ROCKSDB_LIBRARY_API void rocksdb_create_dir_if_missing(
+    rocksdb_env_t* env, const char* path, char** errptr);
+
+/* SstFile */
+
+extern ROCKSDB_LIBRARY_API rocksdb_sstfilewriter_t*
+rocksdb_sstfilewriter_create(const rocksdb_envoptions_t* env,
+                             const rocksdb_options_t* io_options);
+extern ROCKSDB_LIBRARY_API rocksdb_sstfilewriter_t*
+rocksdb_sstfilewriter_create_with_comparator(
+    const rocksdb_envoptions_t* env, const rocksdb_options_t* io_options,
+    const rocksdb_comparator_t* comparator);
+extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_open(
+    rocksdb_sstfilewriter_t* writer, const char* name, char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_add(
+    rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen,
+    const char* val, size_t vallen, char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_put(
+    rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen,
+    const char* val, size_t vallen, char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_put_with_ts(
+    rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen,
+    const char* ts, size_t tslen, const char* val, size_t vallen,
+    char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_merge(
+    rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen,
+    const char* val, size_t vallen, char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_delete(
+    rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen,
+    char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_delete_with_ts(
+    rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen,
+    const char* ts, size_t tslen, char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_delete_range(
+    rocksdb_sstfilewriter_t* writer, const char* begin_key, size_t begin_keylen,
+    const char* end_key, size_t end_keylen, char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_finish(
+    rocksdb_sstfilewriter_t* writer, char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_file_size(
+    rocksdb_sstfilewriter_t* writer, uint64_t* file_size);
+extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_destroy(
+    rocksdb_sstfilewriter_t* writer);
+extern ROCKSDB_LIBRARY_API rocksdb_ingestexternalfileoptions_t*
+rocksdb_ingestexternalfileoptions_create(void);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_ingestexternalfileoptions_set_move_files(
+    rocksdb_ingestexternalfileoptions_t* opt, unsigned char move_files);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_ingestexternalfileoptions_set_snapshot_consistency(
+    rocksdb_ingestexternalfileoptions_t* opt,
+    unsigned char snapshot_consistency);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_ingestexternalfileoptions_set_allow_global_seqno(
+    rocksdb_ingestexternalfileoptions_t* opt, unsigned char allow_global_seqno);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_ingestexternalfileoptions_set_allow_blocking_flush(
+    rocksdb_ingestexternalfileoptions_t* opt,
+    unsigned char allow_blocking_flush);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_ingestexternalfileoptions_set_ingest_behind(
+    rocksdb_ingestexternalfileoptions_t* opt, unsigned char ingest_behind);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_ingestexternalfileoptions_set_fail_if_not_bottommost_level(
+    rocksdb_ingestexternalfileoptions_t* opt,
+    unsigned char fail_if_not_bottommost_level);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_ingestexternalfileoptions_destroy(
+    rocksdb_ingestexternalfileoptions_t* opt);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_ingest_external_file(
+    rocksdb_t* db, const char* const* file_list, const size_t list_len,
+    const rocksdb_ingestexternalfileoptions_t* opt, char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_ingest_external_file_cf(
+    rocksdb_t* db, rocksdb_column_family_handle_t* handle,
+    const char* const* file_list, const size_t list_len,
+    const rocksdb_ingestexternalfileoptions_t* opt, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_try_catch_up_with_primary(
+    rocksdb_t* db, char** errptr);
+
+/* SliceTransform */
+
+extern ROCKSDB_LIBRARY_API rocksdb_slicetransform_t*
+rocksdb_slicetransform_create(
+    void* state, void (*destructor)(void*),
+    char* (*transform)(void*, const char* key, size_t length,
+                       size_t* dst_length),
+    unsigned char (*in_domain)(void*, const char* key, size_t length),
+    unsigned char (*in_range)(void*, const char* key, size_t length),
+    const char* (*name)(void*));
+extern ROCKSDB_LIBRARY_API rocksdb_slicetransform_t*
+    rocksdb_slicetransform_create_fixed_prefix(size_t);
+extern ROCKSDB_LIBRARY_API rocksdb_slicetransform_t*
+rocksdb_slicetransform_create_noop(void);
+extern ROCKSDB_LIBRARY_API void rocksdb_slicetransform_destroy(
+    rocksdb_slicetransform_t*);
+
+/* Universal Compaction options */
+
+enum {
+  rocksdb_similar_size_compaction_stop_style = 0,
+  rocksdb_total_size_compaction_stop_style = 1
+};
+
+extern ROCKSDB_LIBRARY_API rocksdb_universal_compaction_options_t*
+rocksdb_universal_compaction_options_create(void);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_universal_compaction_options_set_size_ratio(
+    rocksdb_universal_compaction_options_t*, int);
+extern ROCKSDB_LIBRARY_API int
+rocksdb_universal_compaction_options_get_size_ratio(
+    rocksdb_universal_compaction_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_universal_compaction_options_set_min_merge_width(
+    rocksdb_universal_compaction_options_t*, int);
+extern ROCKSDB_LIBRARY_API int
+rocksdb_universal_compaction_options_get_min_merge_width(
+    rocksdb_universal_compaction_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_universal_compaction_options_set_max_merge_width(
+    rocksdb_universal_compaction_options_t*, int);
+extern ROCKSDB_LIBRARY_API int
+rocksdb_universal_compaction_options_get_max_merge_width(
+    rocksdb_universal_compaction_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_universal_compaction_options_set_max_size_amplification_percent(
+    rocksdb_universal_compaction_options_t*, int);
+extern ROCKSDB_LIBRARY_API int
+rocksdb_universal_compaction_options_get_max_size_amplification_percent(
+    rocksdb_universal_compaction_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_universal_compaction_options_set_compression_size_percent(
+    rocksdb_universal_compaction_options_t*, int);
+extern ROCKSDB_LIBRARY_API int
+rocksdb_universal_compaction_options_get_compression_size_percent(
+    rocksdb_universal_compaction_options_t*);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_universal_compaction_options_set_stop_style(
+    rocksdb_universal_compaction_options_t*, int);
+extern ROCKSDB_LIBRARY_API int
+rocksdb_universal_compaction_options_get_stop_style(
+    rocksdb_universal_compaction_options_t*);
+extern ROCKSDB_LIBRARY_API void rocksdb_universal_compaction_options_destroy(
+    rocksdb_universal_compaction_options_t*);
+
+extern ROCKSDB_LIBRARY_API rocksdb_fifo_compaction_options_t*
+rocksdb_fifo_compaction_options_create(void);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_fifo_compaction_options_set_allow_compaction(
+    rocksdb_fifo_compaction_options_t* fifo_opts, unsigned char allow_compaction);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_fifo_compaction_options_get_allow_compaction(
+    rocksdb_fifo_compaction_options_t* fifo_opts);
+extern ROCKSDB_LIBRARY_API void
+rocksdb_fifo_compaction_options_set_max_table_files_size(
+    rocksdb_fifo_compaction_options_t* fifo_opts, uint64_t size);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_fifo_compaction_options_get_max_table_files_size(
+    rocksdb_fifo_compaction_options_t* fifo_opts);
+extern ROCKSDB_LIBRARY_API void rocksdb_fifo_compaction_options_destroy(
+    rocksdb_fifo_compaction_options_t* fifo_opts);
+
+extern ROCKSDB_LIBRARY_API int rocksdb_livefiles_count(
+    const rocksdb_livefiles_t*);
+extern ROCKSDB_LIBRARY_API const char* rocksdb_livefiles_column_family_name(
+    const rocksdb_livefiles_t*, int index);
+extern ROCKSDB_LIBRARY_API const char* rocksdb_livefiles_name(
+    const rocksdb_livefiles_t*, int index);
+extern ROCKSDB_LIBRARY_API int rocksdb_livefiles_level(
+    const rocksdb_livefiles_t*, int index);
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_livefiles_size(const rocksdb_livefiles_t*, int index);
+extern ROCKSDB_LIBRARY_API const char* rocksdb_livefiles_smallestkey(
+    const rocksdb_livefiles_t*, int index, size_t* size);
+extern ROCKSDB_LIBRARY_API const char* rocksdb_livefiles_largestkey(
+    const rocksdb_livefiles_t*, int index, size_t* size);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_livefiles_entries(const rocksdb_livefiles_t*, int index);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_livefiles_deletions(const rocksdb_livefiles_t*, int index);
+extern ROCKSDB_LIBRARY_API void rocksdb_livefiles_destroy(
+    const rocksdb_livefiles_t*);
+
+/* Utility Helpers */
+
+extern ROCKSDB_LIBRARY_API void rocksdb_get_options_from_string(
+    const rocksdb_options_t* base_options, const char* opts_str,
+    rocksdb_options_t* new_options, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_delete_file_in_range(
+    rocksdb_t* db, const char* start_key, size_t start_key_len,
+    const char* limit_key, size_t limit_key_len, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_delete_file_in_range_cf(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
+    const char* start_key, size_t start_key_len, const char* limit_key,
+    size_t limit_key_len, char** errptr);
+
+/* MetaData */
+
+extern ROCKSDB_LIBRARY_API rocksdb_column_family_metadata_t*
+rocksdb_get_column_family_metadata(rocksdb_t* db);
+
+/**
+ * Returns the rocksdb_column_family_metadata_t of the specified
+ * column family.
+ *
+ * Note that the caller is responsible to release the returned memory
+ * using rocksdb_column_family_metadata_destroy.
+ */
+extern ROCKSDB_LIBRARY_API rocksdb_column_family_metadata_t*
+rocksdb_get_column_family_metadata_cf(
+    rocksdb_t* db, rocksdb_column_family_handle_t* column_family);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_column_family_metadata_destroy(
+    rocksdb_column_family_metadata_t* cf_meta);
+
+extern ROCKSDB_LIBRARY_API uint64_t rocksdb_column_family_metadata_get_size(
+    rocksdb_column_family_metadata_t* cf_meta);
+
+extern ROCKSDB_LIBRARY_API size_t rocksdb_column_family_metadata_get_file_count(
+    rocksdb_column_family_metadata_t* cf_meta);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_column_family_metadata_get_name(
+    rocksdb_column_family_metadata_t* cf_meta);
+
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_column_family_metadata_get_level_count(
+    rocksdb_column_family_metadata_t* cf_meta);
+
+/**
+ * Returns the rocksdb_level_metadata_t of the ith level from the specified
+ * column family metadata.
+ *
+ * If the specified i is greater than or equal to the number of levels
+ * in the specified column family, then NULL will be returned.
+ *
+ * Note that the caller is responsible to release the returned memory
+ * using rocksdb_level_metadata_destroy before releasing its parent
+ * rocksdb_column_family_metadata_t.
+ */
+extern ROCKSDB_LIBRARY_API rocksdb_level_metadata_t*
+rocksdb_column_family_metadata_get_level_metadata(
+    rocksdb_column_family_metadata_t* cf_meta, size_t i);
+
+/**
+ * Releases the specified rocksdb_level_metadata_t.
+ *
+ * Note that the specified rocksdb_level_metadata_t must be released
+ * before the release of its parent rocksdb_column_family_metadata_t.
+ */
+extern ROCKSDB_LIBRARY_API void rocksdb_level_metadata_destroy(
+    rocksdb_level_metadata_t* level_meta);
+
+extern ROCKSDB_LIBRARY_API int rocksdb_level_metadata_get_level(
+    rocksdb_level_metadata_t* level_meta);
+
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_level_metadata_get_size(rocksdb_level_metadata_t* level_meta);
+
+extern ROCKSDB_LIBRARY_API size_t
+rocksdb_level_metadata_get_file_count(rocksdb_level_metadata_t* level_meta);
+
+/**
+ * Returns the sst_file_metadata_t of the ith file from the specified level
+ * metadata.
+ *
+ * If the specified i is greater than or equal to the number of files
+ * in the specified level, then NULL will be returned.
+ *
+ * Note that the caller is responsible to release the returned memory
+ * using rocksdb_sst_file_metadata_destroy before releasing its
+ * parent rocksdb_level_metadata_t.
+ */
+extern ROCKSDB_LIBRARY_API rocksdb_sst_file_metadata_t*
+rocksdb_level_metadata_get_sst_file_metadata(
+    rocksdb_level_metadata_t* level_meta, size_t i);
+
+/**
+ * Releases the specified rocksdb_sst_file_metadata_t.
+ *
+ * Note that the specified rocksdb_sst_file_metadata_t must be released
+ * before the release of its parent rocksdb_level_metadata_t.
+ */
+extern ROCKSDB_LIBRARY_API void rocksdb_sst_file_metadata_destroy(
+    rocksdb_sst_file_metadata_t* file_meta);
+
+extern ROCKSDB_LIBRARY_API char*
+rocksdb_sst_file_metadata_get_relative_filename(
+    rocksdb_sst_file_metadata_t* file_meta);
+
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_sst_file_metadata_get_size(rocksdb_sst_file_metadata_t* file_meta);
+
+/**
+ * Returns the smallest key of the specified sst file.
+ * The caller is responsible for releasing the returned memory.
+ *
+ * @param file_meta the metadata of an SST file to obtain its smallest key.
+ * @param len the out value which will contain the length of the returned key
+ *     after the function call.
+ */
+extern ROCKSDB_LIBRARY_API char* rocksdb_sst_file_metadata_get_smallestkey(
+    rocksdb_sst_file_metadata_t* file_meta, size_t* len);
+
+/**
+ * Returns the smallest key of the specified sst file.
+ * The caller is responsible for releasing the returned memory.
+ *
+ * @param file_meta the metadata of an SST file to obtain its smallest key.
+ * @param len the out value which will contain the length of the returned key
+ *     after the function call.
+ */
+extern ROCKSDB_LIBRARY_API char* rocksdb_sst_file_metadata_get_largestkey(
+    rocksdb_sst_file_metadata_t* file_meta, size_t* len);
+
+/* Transactions */
+
+extern ROCKSDB_LIBRARY_API rocksdb_column_family_handle_t*
+rocksdb_transactiondb_create_column_family(
+    rocksdb_transactiondb_t* txn_db,
+    const rocksdb_options_t* column_family_options,
+    const char* column_family_name, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_transactiondb_t* rocksdb_transactiondb_open(
+    const rocksdb_options_t* options,
+    const rocksdb_transactiondb_options_t* txn_db_options, const char* name,
+    char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_transactiondb_t*
+rocksdb_transactiondb_open_column_families(
+    const rocksdb_options_t* options,
+    const rocksdb_transactiondb_options_t* txn_db_options, const char* name,
+    int num_column_families, const char* const* column_family_names,
+    const rocksdb_options_t* const* column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles, char** errptr);
+
+extern ROCKSDB_LIBRARY_API const rocksdb_snapshot_t*
+rocksdb_transactiondb_create_snapshot(rocksdb_transactiondb_t* txn_db);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_release_snapshot(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_snapshot_t* snapshot);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_transactiondb_property_value(
+    rocksdb_transactiondb_t* db, const char* propname);
+
+extern ROCKSDB_LIBRARY_API int rocksdb_transactiondb_property_int(
+    rocksdb_transactiondb_t* db, const char* propname, uint64_t* out_val);
+
+extern ROCKSDB_LIBRARY_API rocksdb_transaction_t* rocksdb_transaction_begin(
+    rocksdb_transactiondb_t* txn_db,
+    const rocksdb_writeoptions_t* write_options,
+    const rocksdb_transaction_options_t* txn_options,
+    rocksdb_transaction_t* old_txn);
+
+extern ROCKSDB_LIBRARY_API rocksdb_transaction_t**
+rocksdb_transactiondb_get_prepared_transactions(rocksdb_transactiondb_t* txn_db,
+                                                size_t* cnt);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_set_name(
+    rocksdb_transaction_t* txn, const char* name, size_t name_len,
+    char** errptr);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_transaction_get_name(
+    rocksdb_transaction_t* txn, size_t* name_len);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_prepare(
+    rocksdb_transaction_t* txn, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_commit(
+    rocksdb_transaction_t* txn, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_rollback(
+    rocksdb_transaction_t* txn, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_set_savepoint(
+    rocksdb_transaction_t* txn);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_rollback_to_savepoint(
+    rocksdb_transaction_t* txn, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_destroy(
+    rocksdb_transaction_t* txn);
+
+extern ROCKSDB_LIBRARY_API rocksdb_writebatch_wi_t*
+rocksdb_transaction_get_writebatch_wi(rocksdb_transaction_t* txn);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_rebuild_from_writebatch(
+    rocksdb_transaction_t* txn, rocksdb_writebatch_t* writebatch,
+    char** errptr);
+
+// This rocksdb_writebatch_wi_t should be freed with rocksdb_free
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_rebuild_from_writebatch_wi(
+    rocksdb_transaction_t* txn, rocksdb_writebatch_wi_t* wi, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_set_commit_timestamp(
+    rocksdb_transaction_t* txn, uint64_t commit_timestamp);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_transaction_set_read_timestamp_for_validation(
+    rocksdb_transaction_t* txn, uint64_t read_timestamp);
+
+// This snapshot should be freed using rocksdb_free
+extern ROCKSDB_LIBRARY_API const rocksdb_snapshot_t*
+rocksdb_transaction_get_snapshot(rocksdb_transaction_t* txn);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_transaction_get(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    const char* key, size_t klen, size_t* vlen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t*
+rocksdb_transaction_get_pinned(rocksdb_transaction_t* txn,
+                               const rocksdb_readoptions_t* options,
+                               const char* key, size_t klen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_transaction_get_cf(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key, size_t klen,
+    size_t* vlen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t*
+rocksdb_transaction_get_pinned_cf(rocksdb_transaction_t* txn,
+                                  const rocksdb_readoptions_t* options,
+                                  rocksdb_column_family_handle_t* column_family,
+                                  const char* key, size_t klen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_transaction_get_for_update(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    const char* key, size_t klen, size_t* vlen, unsigned char exclusive,
+    char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t*
+rocksdb_transaction_get_pinned_for_update(rocksdb_transaction_t* txn,
+                                          const rocksdb_readoptions_t* options,
+                                          const char* key, size_t klen,
+                                          unsigned char exclusive,
+                                          char** errptr);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_transaction_get_for_update_cf(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key, size_t klen,
+    size_t* vlen, unsigned char exclusive, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t*
+rocksdb_transaction_get_pinned_for_update_cf(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key, size_t klen,
+    unsigned char exclusive, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_multi_get(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    size_t num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, char** values_list,
+    size_t* values_list_sizes, char** errs);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_multi_get_for_update(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    size_t num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, char** values_list,
+    size_t* values_list_sizes, char** errs);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_multi_get_cf(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    const rocksdb_column_family_handle_t* const* column_families,
+    size_t num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, char** values_list,
+    size_t* values_list_sizes, char** errs);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_multi_get_for_update_cf(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    const rocksdb_column_family_handle_t* const* column_families,
+    size_t num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, char** values_list,
+    size_t* values_list_sizes, char** errs);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_transactiondb_get(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options,
+    const char* key, size_t klen, size_t* vlen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t*
+rocksdb_transactiondb_get_pinned(rocksdb_transactiondb_t* txn_db,
+                                 const rocksdb_readoptions_t* options,
+                                 const char* key, size_t klen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_transactiondb_get_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, size_t* vallen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t*
+rocksdb_transactiondb_get_pinned_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_multi_get(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options,
+    size_t num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, char** values_list,
+    size_t* values_list_sizes, char** errs);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_multi_get_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options,
+    const rocksdb_column_family_handle_t* const* column_families,
+    size_t num_keys, const char* const* keys_list,
+    const size_t* keys_list_sizes, char** values_list,
+    size_t* values_list_sizes, char** errs);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_put(
+    rocksdb_transaction_t* txn, const char* key, size_t klen, const char* val,
+    size_t vlen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_put_cf(
+    rocksdb_transaction_t* txn, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, const char* val, size_t vlen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_put(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options,
+    const char* key, size_t klen, const char* val, size_t vlen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_put_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, const char* val, size_t vallen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_write(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options,
+    rocksdb_writebatch_t* batch, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_merge(
+    rocksdb_transaction_t* txn, const char* key, size_t klen, const char* val,
+    size_t vlen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_merge_cf(
+    rocksdb_transaction_t* txn, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, const char* val, size_t vlen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_merge(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options,
+    const char* key, size_t klen, const char* val, size_t vlen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_merge_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key, size_t klen,
+    const char* val, size_t vlen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_delete(
+    rocksdb_transaction_t* txn, const char* key, size_t klen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_delete_cf(
+    rocksdb_transaction_t* txn, rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_delete(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options,
+    const char* key, size_t klen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_delete_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_iterator_t*
+rocksdb_transaction_create_iterator(rocksdb_transaction_t* txn,
+                                    const rocksdb_readoptions_t* options);
+
+extern ROCKSDB_LIBRARY_API rocksdb_iterator_t*
+rocksdb_transaction_create_iterator_cf(
+    rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family);
+
+extern ROCKSDB_LIBRARY_API rocksdb_iterator_t*
+rocksdb_transactiondb_create_iterator(rocksdb_transactiondb_t* txn_db,
+                                      const rocksdb_readoptions_t* options);
+
+extern ROCKSDB_LIBRARY_API rocksdb_iterator_t*
+rocksdb_transactiondb_create_iterator_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_close(
+    rocksdb_transactiondb_t* txn_db);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_flush(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_flushoptions_t* options,
+    char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_flush_cf(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_flushoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_flush_cfs(
+    rocksdb_transactiondb_t* txn_db, const rocksdb_flushoptions_t* options,
+    rocksdb_column_family_handle_t** column_families, int num_column_families,
+    char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_flush_wal(
+    rocksdb_transactiondb_t* txn_db, unsigned char sync, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_checkpoint_t*
+rocksdb_transactiondb_checkpoint_object_create(rocksdb_transactiondb_t* txn_db,
+                                               char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_optimistictransactiondb_t*
+rocksdb_optimistictransactiondb_open(const rocksdb_options_t* options,
+                                     const char* name, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_optimistictransactiondb_t*
+rocksdb_optimistictransactiondb_open_column_families(
+    const rocksdb_options_t* options, const char* name, int num_column_families,
+    const char* const* column_family_names,
+    const rocksdb_options_t* const* column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles, char** errptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_t*
+rocksdb_optimistictransactiondb_get_base_db(
+    rocksdb_optimistictransactiondb_t* otxn_db);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_optimistictransactiondb_close_base_db(
+    rocksdb_t* base_db);
+
+extern ROCKSDB_LIBRARY_API rocksdb_transaction_t*
+rocksdb_optimistictransaction_begin(
+    rocksdb_optimistictransactiondb_t* otxn_db,
+    const rocksdb_writeoptions_t* write_options,
+    const rocksdb_optimistictransaction_options_t* otxn_options,
+    rocksdb_transaction_t* old_txn);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_optimistictransactiondb_write(
+    rocksdb_optimistictransactiondb_t* otxn_db,
+    const rocksdb_writeoptions_t* options, rocksdb_writebatch_t* batch,
+    char** errptr);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_optimistictransactiondb_close(
+    rocksdb_optimistictransactiondb_t* otxn_db);
+
+extern ROCKSDB_LIBRARY_API rocksdb_checkpoint_t*
+rocksdb_optimistictransactiondb_checkpoint_object_create(
+    rocksdb_optimistictransactiondb_t* otxn_db, char** errptr);
+
+/* Transaction Options */
+
+extern ROCKSDB_LIBRARY_API rocksdb_transactiondb_options_t*
+rocksdb_transactiondb_options_create(void);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_options_destroy(
+    rocksdb_transactiondb_options_t* opt);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_options_set_max_num_locks(
+    rocksdb_transactiondb_options_t* opt, int64_t max_num_locks);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_options_set_num_stripes(
+    rocksdb_transactiondb_options_t* opt, size_t num_stripes);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_transactiondb_options_set_transaction_lock_timeout(
+    rocksdb_transactiondb_options_t* opt, int64_t txn_lock_timeout);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_transactiondb_options_set_default_lock_timeout(
+    rocksdb_transactiondb_options_t* opt, int64_t default_lock_timeout);
+
+extern ROCKSDB_LIBRARY_API rocksdb_transaction_options_t*
+rocksdb_transaction_options_create(void);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_options_destroy(
+    rocksdb_transaction_options_t* opt);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_options_set_set_snapshot(
+    rocksdb_transaction_options_t* opt, unsigned char v);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_options_set_deadlock_detect(
+    rocksdb_transaction_options_t* opt, unsigned char v);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_options_set_lock_timeout(
+    rocksdb_transaction_options_t* opt, int64_t lock_timeout);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_options_set_expiration(
+    rocksdb_transaction_options_t* opt, int64_t expiration);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_transaction_options_set_deadlock_detect_depth(
+    rocksdb_transaction_options_t* opt, int64_t depth);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_transaction_options_set_max_write_batch_size(
+    rocksdb_transaction_options_t* opt, size_t size);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_transaction_options_set_skip_prepare(
+    rocksdb_transaction_options_t* opt, unsigned char v);
+
+extern ROCKSDB_LIBRARY_API rocksdb_optimistictransaction_options_t*
+rocksdb_optimistictransaction_options_create(void);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_optimistictransaction_options_destroy(
+    rocksdb_optimistictransaction_options_t* opt);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_optimistictransaction_options_set_set_snapshot(
+    rocksdb_optimistictransaction_options_t* opt, unsigned char v);
+
+extern ROCKSDB_LIBRARY_API char* rocksdb_optimistictransactiondb_property_value(
+    rocksdb_optimistictransactiondb_t* db, const char* propname);
+
+extern ROCKSDB_LIBRARY_API int rocksdb_optimistictransactiondb_property_int(
+    rocksdb_optimistictransactiondb_t* db, const char* propname,
+    uint64_t* out_val);
+
+// referring to convention (3), this should be used by client
+// to free memory that was malloc()ed
+extern ROCKSDB_LIBRARY_API void rocksdb_free(void* ptr);
+
+extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t* rocksdb_get_pinned(
+    rocksdb_t* db, const rocksdb_readoptions_t* options, const char* key,
+    size_t keylen, char** errptr);
+extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t* rocksdb_get_pinned_cf(
+    rocksdb_t* db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, const char* key,
+    size_t keylen, char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_pinnableslice_destroy(
+    rocksdb_pinnableslice_t* v);
+extern ROCKSDB_LIBRARY_API const char* rocksdb_pinnableslice_value(
+    const rocksdb_pinnableslice_t* t, size_t* vlen);
+
+extern ROCKSDB_LIBRARY_API rocksdb_memory_consumers_t*
+rocksdb_memory_consumers_create(void);
+extern ROCKSDB_LIBRARY_API void rocksdb_memory_consumers_add_db(
+    rocksdb_memory_consumers_t* consumers, rocksdb_t* db);
+extern ROCKSDB_LIBRARY_API void rocksdb_memory_consumers_add_cache(
+    rocksdb_memory_consumers_t* consumers, rocksdb_cache_t* cache);
+extern ROCKSDB_LIBRARY_API void rocksdb_memory_consumers_destroy(
+    rocksdb_memory_consumers_t* consumers);
+extern ROCKSDB_LIBRARY_API rocksdb_memory_usage_t*
+rocksdb_approximate_memory_usage_create(rocksdb_memory_consumers_t* consumers,
+                                        char** errptr);
+extern ROCKSDB_LIBRARY_API void rocksdb_approximate_memory_usage_destroy(
+    rocksdb_memory_usage_t* usage);
+
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_approximate_memory_usage_get_mem_table_total(
+    rocksdb_memory_usage_t* memory_usage);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_approximate_memory_usage_get_mem_table_unflushed(
+    rocksdb_memory_usage_t* memory_usage);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_approximate_memory_usage_get_mem_table_readers_total(
+    rocksdb_memory_usage_t* memory_usage);
+extern ROCKSDB_LIBRARY_API uint64_t
+rocksdb_approximate_memory_usage_get_cache_total(
+    rocksdb_memory_usage_t* memory_usage);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_options_set_dump_malloc_stats(
+    rocksdb_options_t*, unsigned char);
+
+extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_memtable_whole_key_filtering(rocksdb_options_t*,
+                                                 unsigned char);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_cancel_all_background_work(
+    rocksdb_t* db, unsigned char wait);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_disable_manual_compaction(
+    rocksdb_t* db);
+
+extern ROCKSDB_LIBRARY_API void rocksdb_enable_manual_compaction(rocksdb_t* db);
+
+#ifdef __cplusplus
+} /* end extern "C" */
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/cache.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/cache.h
new file mode 100644
index 0000000000..532e6e4afd
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/cache.h
@@ -0,0 +1,442 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Various APIs for configuring, creating, and monitoring read caches.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "rocksdb/compression_type.h"
+#include "rocksdb/data_structure.h"
+#include "rocksdb/memory_allocator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Cache;  // defined in advanced_cache.h
+struct ConfigOptions;
+class SecondaryCache;
+
+// These definitions begin source compatibility for a future change in which
+// a specific class for block cache is split away from general caches, so that
+// the block cache API can continue to become more specialized and
+// customizeable, including in ways incompatible with a general cache. For
+// example, HyperClockCache is not usable as a general cache because it expects
+// only fixed-size block cache keys, but this limitation is not yet reflected
+// in the API function signatures.
+// * Phase 1 (done) - Make both BlockCache and GeneralCache aliases for Cache,
+// and make a factory function for general caches. Encourage users of row_cache
+// (not common) to switch to the factory function for general caches.
+// * Phase 2 - Split off GenericCache as its own class, removing secondary
+// cache support features and more from the API to simplify it. Between Phase 1
+// and Phase 2 users of row_cache will need to update their code. Any time
+// after Phase 2, the block cache API can become more specialized in ways
+// incompatible with general caches.
+// * Phase 3 - Move existing RocksDB uses of Cache to BlockCache, and deprecate
+// (but not yet remove) Cache as an alias for BlockCache.
+using BlockCache = Cache;
+using GeneralCache = Cache;
+
+// Classifications of block cache entries.
+//
+// Developer notes: Adding a new enum to this class requires corresponding
+// updates to `kCacheEntryRoleToCamelString` and
+// `kCacheEntryRoleToHyphenString`. Do not add to this enum after `kMisc` since
+// `kNumCacheEntryRoles` assumes `kMisc` comes last.
+enum class CacheEntryRole {
+  // Block-based table data block
+  kDataBlock,
+  // Block-based table filter block (full or partitioned)
+  kFilterBlock,
+  // Block-based table metadata block for partitioned filter
+  kFilterMetaBlock,
+  // OBSOLETE / DEPRECATED: old/removed block-based filter
+  kDeprecatedFilterBlock,
+  // Block-based table index block
+  kIndexBlock,
+  // Other kinds of block-based table block
+  kOtherBlock,
+  // WriteBufferManager's charge to account for its memtable usage
+  kWriteBuffer,
+  // Compression dictionary building buffer's charge to account for
+  // its memory usage
+  kCompressionDictionaryBuildingBuffer,
+  // Filter's charge to account for
+  // (new) bloom and ribbon filter construction's memory usage
+  kFilterConstruction,
+  // BlockBasedTableReader's charge to account for its memory usage
+  kBlockBasedTableReader,
+  // FileMetadata's charge to account for its memory usage
+  kFileMetadata,
+  // Blob value (when using the same cache as block cache and blob cache)
+  kBlobValue,
+  // Blob cache's charge to account for its memory usage (when using a
+  // separate block cache and blob cache)
+  kBlobCache,
+  // Default bucket, for miscellaneous cache entries. Do not use for
+  // entries that could potentially add up to large usage.
+  kMisc,
+};
+constexpr uint32_t kNumCacheEntryRoles =
+    static_cast<uint32_t>(CacheEntryRole::kMisc) + 1;
+
+// Obtain a hyphen-separated, lowercase name of a `CacheEntryRole`.
+const std::string& GetCacheEntryRoleName(CacheEntryRole);
+
+// A fast bit set for CacheEntryRoles
+using CacheEntryRoleSet = SmallEnumSet<CacheEntryRole, CacheEntryRole::kMisc>;
+
+// For use with `GetMapProperty()` for property
+// `DB::Properties::kBlockCacheEntryStats`. On success, the map will
+// be populated with all keys that can be obtained from these functions.
+struct BlockCacheEntryStatsMapKeys {
+  static const std::string& CacheId();
+  static const std::string& CacheCapacityBytes();
+  static const std::string& LastCollectionDurationSeconds();
+  static const std::string& LastCollectionAgeSeconds();
+
+  static std::string EntryCount(CacheEntryRole);
+  static std::string UsedBytes(CacheEntryRole);
+  static std::string UsedPercent(CacheEntryRole);
+};
+
+extern const bool kDefaultToAdaptiveMutex;
+
+enum CacheMetadataChargePolicy {
+  // Only the `charge` of each entry inserted into a Cache counts against
+  // the `capacity`
+  kDontChargeCacheMetadata,
+  // In addition to the `charge`, the approximate space overheads in the
+  // Cache (in bytes) also count against `capacity`. These space overheads
+  // are for supporting fast Lookup and managing the lifetime of entries.
+  kFullChargeCacheMetadata
+};
+const CacheMetadataChargePolicy kDefaultCacheMetadataChargePolicy =
+    kFullChargeCacheMetadata;
+
+// Options shared betweeen various cache implementations that
+// divide the key space into shards using hashing.
+struct ShardedCacheOptions {
+  // Capacity of the cache, in the same units as the `charge` of each entry.
+  // This is typically measured in bytes, but can be a different unit if using
+  // kDontChargeCacheMetadata.
+  size_t capacity = 0;
+
+  // Cache is sharded into 2^num_shard_bits shards, by hash of key.
+  // If < 0, a good default is chosen based on the capacity and the
+  // implementation. (Mutex-based implementations are much more reliant
+  // on many shards for parallel scalability.)
+  int num_shard_bits = -1;
+
+  // If strict_capacity_limit is set, Insert() will fail if there is not
+  // enough capacity for the new entry along with all the existing referenced
+  // (pinned) cache entries. (Unreferenced cache entries are evicted as
+  // needed, sometimes immediately.) If strict_capacity_limit == false
+  // (default), Insert() never fails.
+  bool strict_capacity_limit = false;
+
+  // If non-nullptr, RocksDB will use this allocator instead of system
+  // allocator when allocating memory for cache blocks.
+  //
+  // Caveat: when the cache is used as block cache, the memory allocator is
+  // ignored when dealing with compression libraries that allocate memory
+  // internally (currently only XPRESS).
+  std::shared_ptr<MemoryAllocator> memory_allocator;
+
+  // See CacheMetadataChargePolicy
+  CacheMetadataChargePolicy metadata_charge_policy =
+      kDefaultCacheMetadataChargePolicy;
+
+  // A SecondaryCache instance to use the non-volatile tier. For a GeneralCache
+  // this option must be kept as default empty.
+  std::shared_ptr<SecondaryCache> secondary_cache;
+
+  // See hash_seed comments below
+  static constexpr int32_t kQuasiRandomHashSeed = -1;
+  static constexpr int32_t kHostHashSeed = -2;
+
+  // EXPERT OPTION: Specifies how a hash seed should be determined for the
+  // cache, or specifies a specific seed (only recommended for diagnostics or
+  // testing).
+  //
+  // Background: it could be dangerous to have different cache instances
+  // access the same SST files with the same hash seed, as correlated unlucky
+  // hashing across hosts or restarts could cause a widespread issue, rather
+  // than an isolated one. For example, with smaller block caches, it is
+  // possible for large full Bloom filters in a set of SST files to be randomly
+  // clustered into one cache shard, causing mutex contention or a thrashing
+  // condition as there's little or no space left for other entries assigned to
+  // the shard. If a set of SST files is broadcast and used on many hosts, we
+  // should ensure all have an independent chance of balanced shards.
+  //
+  // Values >= 0 will be treated as fixed hash seeds. Values < 0 are reserved
+  // for methods of dynamically choosing a seed, currently:
+  // * kQuasiRandomHashSeed - Each cache created chooses a seed mostly randomly,
+  //   except that within a process, no seed is repeated until all have been
+  //   issued.
+  // * kHostHashSeed - The seed is determined based on hashing the host name.
+  //   Although this is arguably slightly worse for production reliability, it
+  //   solves the essential problem of cross-host correlation while ensuring
+  //   repeatable behavior on a host, for diagnostic purposes.
+  int32_t hash_seed = kHostHashSeed;
+
+  ShardedCacheOptions() {}
+  ShardedCacheOptions(
+      size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
+      std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
+      CacheMetadataChargePolicy _metadata_charge_policy =
+          kDefaultCacheMetadataChargePolicy)
+      : capacity(_capacity),
+        num_shard_bits(_num_shard_bits),
+        strict_capacity_limit(_strict_capacity_limit),
+        memory_allocator(std::move(_memory_allocator)),
+        metadata_charge_policy(_metadata_charge_policy) {}
+};
+
+// LRUCache - A cache using LRU eviction to stay at or below a set capacity.
+// The cache is sharded to 2^num_shard_bits shards, by hash of the key.
+// The total capacity is divided and evenly assigned to each shard, and each
+// shard has its own LRU list for evictions. Each shard also has a mutex for
+// exclusive access during operations; even read operations need exclusive
+// access in order to update the LRU list. Mutex contention is usually low
+// with enough shards.
+struct LRUCacheOptions : public ShardedCacheOptions {
+  // Ratio of cache reserved for high-priority and low-priority entries,
+  // respectively. (See Cache::Priority below more information on the levels.)
+  // Valid values are between 0 and 1 (inclusive), and the sum of the two
+  // values cannot exceed 1.
+  //
+  // If high_pri_pool_ratio is greater than zero, a dedicated high-priority LRU
+  // list is maintained by the cache. A ratio of 0.5 means non-high-priority
+  // entries will use midpoint insertion. Similarly, if low_pri_pool_ratio is
+  // greater than zero, a dedicated low-priority LRU list is maintained.
+  // There is also a bottom-priority LRU list, which is always enabled and not
+  // explicitly configurable. Entries are spilled over to the next available
+  // lower-priority pool if a certain pool's capacity is exceeded.
+  //
+  // Entries with cache hits are inserted into the highest priority LRU list
+  // available regardless of the entry's priority. Entries without hits
+  // are inserted into highest priority LRU list available whose priority
+  // does not exceed the entry's priority. (For example, high-priority items
+  // with no hits are placed in the high-priority pool if available;
+  // otherwise, they are placed in the low-priority pool if available;
+  // otherwise, they are placed in the bottom-priority pool.) This results
+  // in lower-priority entries without hits getting evicted from the cache
+  // sooner.
+  double high_pri_pool_ratio = 0.5;
+  double low_pri_pool_ratio = 0.0;
+
+  // Whether to use adaptive mutexes for cache shards. Note that adaptive
+  // mutexes need to be supported by the platform in order for this to have any
+  // effect. The default value is true if RocksDB is compiled with
+  // -DROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX, false otherwise.
+  bool use_adaptive_mutex = kDefaultToAdaptiveMutex;
+
+  LRUCacheOptions() {}
+  LRUCacheOptions(size_t _capacity, int _num_shard_bits,
+                  bool _strict_capacity_limit, double _high_pri_pool_ratio,
+                  std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
+                  bool _use_adaptive_mutex = kDefaultToAdaptiveMutex,
+                  CacheMetadataChargePolicy _metadata_charge_policy =
+                      kDefaultCacheMetadataChargePolicy,
+                  double _low_pri_pool_ratio = 0.0)
+      : ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
+                            std::move(_memory_allocator),
+                            _metadata_charge_policy),
+        high_pri_pool_ratio(_high_pri_pool_ratio),
+        low_pri_pool_ratio(_low_pri_pool_ratio),
+        use_adaptive_mutex(_use_adaptive_mutex) {}
+
+  // Construct an instance of LRUCache using these options
+  std::shared_ptr<Cache> MakeSharedCache() const;
+
+  // Construct an instance of LRUCache for use as a general cache (e.g. for
+  // row_cache). Some options are not relevant to general caches.
+  std::shared_ptr<GeneralCache> MakeSharedGeneralCache() const;
+};
+
+// DEPRECATED wrapper function
+inline std::shared_ptr<Cache> NewLRUCache(
+    size_t capacity, int num_shard_bits = -1,
+    bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5,
+    std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
+    bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
+    CacheMetadataChargePolicy metadata_charge_policy =
+        kDefaultCacheMetadataChargePolicy,
+    double low_pri_pool_ratio = 0.0) {
+  return LRUCacheOptions(capacity, num_shard_bits, strict_capacity_limit,
+                         high_pri_pool_ratio, memory_allocator,
+                         use_adaptive_mutex, metadata_charge_policy,
+                         low_pri_pool_ratio)
+      .MakeSharedCache();
+}
+
+// DEPRECATED wrapper function
+inline std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
+  return cache_opts.MakeSharedCache();
+}
+
+// EXPERIMENTAL
+// Options structure for configuring a SecondaryCache instance with in-memory
+// compression. The implementation uses LRUCache so inherits its options,
+// except LRUCacheOptions.secondary_cache is not used and should not be set.
+struct CompressedSecondaryCacheOptions : LRUCacheOptions {
+  // The compression method (if any) that is used to compress data.
+  CompressionType compression_type = CompressionType::kLZ4Compression;
+
+  // compress_format_version can have two values:
+  // compress_format_version == 1 -- decompressed size is not included in the
+  // block header.
+  // compress_format_version == 2 -- decompressed size is included in the block
+  // header in varint32 format.
+  uint32_t compress_format_version = 2;
+
+  // Enable the custom split and merge feature, which split the compressed value
+  // into chunks so that they may better fit jemalloc bins.
+  bool enable_custom_split_merge = false;
+
+  // Kinds of entries that should not be compressed, but can be stored.
+  // (Filter blocks are essentially non-compressible but others usually are.)
+  CacheEntryRoleSet do_not_compress_roles = {CacheEntryRole::kFilterBlock};
+
+  CompressedSecondaryCacheOptions() {}
+  CompressedSecondaryCacheOptions(
+      size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
+      double _high_pri_pool_ratio, double _low_pri_pool_ratio = 0.0,
+      std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
+      bool _use_adaptive_mutex = kDefaultToAdaptiveMutex,
+      CacheMetadataChargePolicy _metadata_charge_policy =
+          kDefaultCacheMetadataChargePolicy,
+      CompressionType _compression_type = CompressionType::kLZ4Compression,
+      uint32_t _compress_format_version = 2,
+      bool _enable_custom_split_merge = false,
+      const CacheEntryRoleSet& _do_not_compress_roles =
+          {CacheEntryRole::kFilterBlock})
+      : LRUCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
+                        _high_pri_pool_ratio, std::move(_memory_allocator),
+                        _use_adaptive_mutex, _metadata_charge_policy,
+                        _low_pri_pool_ratio),
+        compression_type(_compression_type),
+        compress_format_version(_compress_format_version),
+        enable_custom_split_merge(_enable_custom_split_merge),
+        do_not_compress_roles(_do_not_compress_roles) {}
+
+  // Construct an instance of CompressedSecondaryCache using these options
+  std::shared_ptr<SecondaryCache> MakeSharedSecondaryCache() const;
+
+  // Avoid confusion with LRUCache
+  std::shared_ptr<Cache> MakeSharedCache() const = delete;
+};
+
+// DEPRECATED wrapper function
+inline std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
+    size_t capacity, int num_shard_bits = -1,
+    bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5,
+    double low_pri_pool_ratio = 0.0,
+    std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
+    bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
+    CacheMetadataChargePolicy metadata_charge_policy =
+        kDefaultCacheMetadataChargePolicy,
+    CompressionType compression_type = CompressionType::kLZ4Compression,
+    uint32_t compress_format_version = 2,
+    bool enable_custom_split_merge = false,
+    const CacheEntryRoleSet& _do_not_compress_roles = {
+        CacheEntryRole::kFilterBlock}) {
+  return CompressedSecondaryCacheOptions(
+             capacity, num_shard_bits, strict_capacity_limit,
+             high_pri_pool_ratio, low_pri_pool_ratio, memory_allocator,
+             use_adaptive_mutex, metadata_charge_policy, compression_type,
+             compress_format_version, enable_custom_split_merge,
+             _do_not_compress_roles)
+      .MakeSharedSecondaryCache();
+}
+
+// DEPRECATED wrapper function
+inline std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
+    const CompressedSecondaryCacheOptions& opts) {
+  return opts.MakeSharedSecondaryCache();
+}
+
+// HyperClockCache - A lock-free Cache alternative for RocksDB block cache
+// that offers much improved CPU efficiency vs. LRUCache under high parallel
+// load or high contention, with some caveats:
+// * Not a general Cache implementation: can only be used for
+// BlockBasedTableOptions::block_cache, which RocksDB uses in a way that is
+// compatible with HyperClockCache.
+// * Requires an extra tuning parameter: see estimated_entry_charge below.
+// Similarly, substantially changing the capacity with SetCapacity could
+// harm efficiency.
+// * Cache priorities are less aggressively enforced, which could cause
+// cache dilution from long range scans (unless they use fill_cache=false).
+// * Can be worse for small caches, because if almost all of a cache shard is
+// pinned (more likely with non-partitioned filters), then CLOCK eviction
+// becomes very CPU intensive.
+//
+// See internal cache/clock_cache.h for full description.
+struct HyperClockCacheOptions : public ShardedCacheOptions {
+  // The estimated average `charge` associated with cache entries. This is a
+  // critical configuration parameter for good performance from the hyper
+  // cache, because having a table size that is fixed at creation time greatly
+  // reduces the required synchronization between threads.
+  // * If the estimate is substantially too low (e.g. less than half the true
+  // average) then metadata space overhead with be substantially higher (e.g.
+  // 200 bytes per entry rather than 100). With kFullChargeCacheMetadata, this
+  // can slightly reduce cache hit rates, and slightly reduce access times due
+  // to the larger working memory size.
+  // * If the estimate is substantially too high (e.g. 25% higher than the true
+  // average) then there might not be sufficient slots in the hash table for
+  // both efficient operation and capacity utilization (hit rate). The hyper
+  // cache will evict entries to prevent load factors that could dramatically
+  // affect lookup times, instead letting the hit rate suffer by not utilizing
+  // the full capacity.
+  //
+  // A reasonable choice is the larger of block_size and metadata_block_size.
+  // When WriteBufferManager (and similar) charge memory usage to the block
+  // cache, this can lead to the same effect as estimate being too low, which
+  // is better than the opposite. Therefore, the general recommendation is to
+  // assume that other memory charged to block cache could be negligible, and
+  // ignore it in making the estimate.
+  //
+  // The best parameter choice based on a cache in use is given by
+  // GetUsage() / GetOccupancyCount(), ignoring metadata overheads such as
+  // with kDontChargeCacheMetadata. More precisely with
+  // kFullChargeCacheMetadata is (GetUsage() - 64 * GetTableAddressCount()) /
+  // GetOccupancyCount(). However, when the average value size might vary
+  // (e.g. balance between metadata and data blocks in cache), it is better
+  // to estimate toward the lower side than the higher side.
+  size_t estimated_entry_charge;
+
+  HyperClockCacheOptions(
+      size_t _capacity, size_t _estimated_entry_charge,
+      int _num_shard_bits = -1, bool _strict_capacity_limit = false,
+      std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
+      CacheMetadataChargePolicy _metadata_charge_policy =
+          kDefaultCacheMetadataChargePolicy)
+      : ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
+                            std::move(_memory_allocator),
+                            _metadata_charge_policy),
+        estimated_entry_charge(_estimated_entry_charge) {}
+
+  // Construct an instance of HyperClockCache using these options
+  std::shared_ptr<Cache> MakeSharedCache() const;
+};
+
+// DEPRECATED - The old Clock Cache implementation had an unresolved bug and
+// has been removed. The new HyperClockCache requires an additional
+// configuration parameter that is not provided by this API. This function
+// simply returns a new LRUCache for functional compatibility.
+extern std::shared_ptr<Cache> NewClockCache(
+    size_t capacity, int num_shard_bits = -1,
+    bool strict_capacity_limit = false,
+    CacheMetadataChargePolicy metadata_charge_policy =
+        kDefaultCacheMetadataChargePolicy);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/cache_bench_tool.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/cache_bench_tool.h
new file mode 100644
index 0000000000..413ce15937
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/cache_bench_tool.h
@@ -0,0 +1,14 @@
+// Copyright (c) 2013-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+int cache_bench_tool(int argc, char** argv);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/cleanable.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/cleanable.h
new file mode 100644
index 0000000000..afc7366732
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/cleanable.h
@@ -0,0 +1,128 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Cleanable {
+ public:
+  Cleanable();
+  // No copy constructor and copy assignment allowed.
+  Cleanable(Cleanable&) = delete;
+  Cleanable& operator=(Cleanable&) = delete;
+
+  // Executes all the registered cleanups
+  ~Cleanable();
+
+  // Move constructor and move assignment is allowed.
+  Cleanable(Cleanable&&) noexcept;
+  Cleanable& operator=(Cleanable&&) noexcept;
+
+  // Clients are allowed to register function/arg1/arg2 triples that
+  // will be invoked when this iterator is destroyed.
+  //
+  // Note that unlike all of the preceding methods, this method is
+  // not abstract and therefore clients should not override it.
+  using CleanupFunction = void (*)(void* arg1, void* arg2);
+
+  // Add another Cleanup to the list
+  void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2);
+
+  // Move the cleanups owned by this Cleanable to another Cleanable, adding to
+  // any existing cleanups it has
+  void DelegateCleanupsTo(Cleanable* other);
+
+  // DoCleanup and also resets the pointers for reuse
+  inline void Reset() {
+    DoCleanup();
+    cleanup_.function = nullptr;
+    cleanup_.next = nullptr;
+  }
+
+  inline bool HasCleanups() { return cleanup_.function != nullptr; }
+
+ protected:
+  struct Cleanup {
+    CleanupFunction function;
+    void* arg1;
+    void* arg2;
+    Cleanup* next;
+  };
+  Cleanup cleanup_;
+  // It also becomes the owner of c
+  void RegisterCleanup(Cleanup* c);
+
+ private:
+  // Performs all the cleanups. It does not reset the pointers. Making it
+  // private
+  // to prevent misuse
+  inline void DoCleanup() {
+    if (cleanup_.function != nullptr) {
+      (*cleanup_.function)(cleanup_.arg1, cleanup_.arg2);
+      for (Cleanup* c = cleanup_.next; c != nullptr;) {
+        (*c->function)(c->arg1, c->arg2);
+        Cleanup* next = c->next;
+        delete c;
+        c = next;
+      }
+    }
+  }
+};
+
+// A copyable, reference-counted pointer to a simple Cleanable that only
+// performs registered cleanups after all copies are destroy. This is like
+// shared_ptr<Cleanable> but works more efficiently with wrapping the pointer
+// in an outer Cleanable (see RegisterCopyWith() and MoveAsCleanupTo()).
+// WARNING: if you create a reference cycle, for example:
+//   SharedCleanablePtr scp;
+//   scp.Allocate();
+//   scp.RegisterCopyWith(&*scp);
+// It will prevent cleanups from ever happening!
+class SharedCleanablePtr {
+ public:
+  // Empy/null pointer
+  SharedCleanablePtr() {}
+  // Copy and move constructors and assignment
+  SharedCleanablePtr(const SharedCleanablePtr& from);
+  SharedCleanablePtr(SharedCleanablePtr&& from) noexcept;
+  SharedCleanablePtr& operator=(const SharedCleanablePtr& from);
+  SharedCleanablePtr& operator=(SharedCleanablePtr&& from) noexcept;
+  // Destructor (decrement refcount if non-null)
+  ~SharedCleanablePtr();
+  // Create a new simple Cleanable and make this assign this pointer to it.
+  // (Reset()s first if necessary.)
+  void Allocate();
+  // Reset to empty/null (decrement refcount if previously non-null)
+  void Reset();
+  // Dereference to pointed-to Cleanable
+  Cleanable& operator*();
+  Cleanable* operator->();
+  // Get as raw pointer to Cleanable
+  Cleanable* get();
+
+  // Creates a (virtual) copy of this SharedCleanablePtr and registers its
+  // destruction with target, so that the cleanups registered with the
+  // Cleanable pointed to by this can only happen after the cleanups in the
+  // target Cleanable are run.
+  // No-op if this is empty (nullptr).
+  void RegisterCopyWith(Cleanable* target);
+
+  // Moves (virtually) this shared pointer to a new cleanup in the target.
+  // This is essentilly a move semantics version of RegisterCopyWith(), for
+  // performance optimization. No-op if this is empty (nullptr).
+  void MoveAsCleanupTo(Cleanable* target);
+
+ private:
+  struct Impl;
+  Impl* ptr_ = nullptr;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/compaction_filter.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/compaction_filter.h
new file mode 100644
index 0000000000..b1b511613b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/compaction_filter.h
@@ -0,0 +1,363 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2013 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <cassert>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/types.h"
+#include "rocksdb/wide_columns.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+class SliceTransform;
+
+// CompactionFilter allows an application to modify/delete a key-value during
+// table file creation.
+//
+// Some general notes:
+//
+// * RocksDB snapshots do not guarantee to preserve the state of the DB in the
+// presence of CompactionFilter. Data seen from a snapshot might disappear after
+// a table file created with a `CompactionFilter` is installed. If you use
+// snapshots, think twice about whether you want to use `CompactionFilter` and
+// whether you are using it in a safe way.
+//
+// * If multithreaded compaction is being used *and* a single CompactionFilter
+// instance was supplied via Options::compaction_filter, CompactionFilter
+// methods may be called from different threads concurrently.  The application
+// must ensure that such calls are thread-safe. If the CompactionFilter was
+// created by a factory, then it will only ever be used by a single thread that
+// is doing the table file creation, and this call does not need to be
+// thread-safe.  However, multiple filters may be in existence and operating
+// concurrently.
+//
+// * The key passed to the filtering methods includes the timestamp if
+// user-defined timestamps are enabled.
+//
+// * Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class CompactionFilter : public Customizable {
+ public:
+  // Value type of the key-value passed to the compaction filter's FilterV2/V3
+  // methods.
+  enum ValueType {
+    // Plain key-value
+    kValue,
+    // Merge operand
+    kMergeOperand,
+    // Used internally by the old stacked BlobDB implementation; this value type
+    // is never passed to application code. Note that when using the new
+    // integrated BlobDB, values stored separately as blobs are retrieved and
+    // presented to FilterV2/V3 with the type kValue above.
+    kBlobIndex,
+    // Wide-column entity
+    kWideColumnEntity,
+  };
+
+  // Potential decisions that can be returned by the compaction filter's
+  // FilterV2/V3 and FilterBlobByKey methods. See decision-specific caveats and
+  // constraints below.
+  enum class Decision {
+    // Keep the current key-value as-is.
+    kKeep,
+
+    // Remove the current key-value. Note that the semantics of removal are
+    // dependent on the value type. If the current key-value is a plain
+    // key-value or a wide-column entity, it is converted to a tombstone
+    // (Delete), resulting in the deletion of any earlier versions of the key.
+    // If it is a merge operand, it is simply dropped. Note: if you are using
+    // a TransactionDB, it is not recommended to filter out merge operands.
+    // If a Merge operation is filtered out, TransactionDB may not realize there
+    // is a write conflict and may allow a Transaction that should have failed
+    // to Commit. Instead, it is better to implement any Merge filtering inside
+    // the MergeOperator.
+    kRemove,
+
+    // Change the value of the current key-value. If the current key-value is a
+    // plain key-value or a merge operand, its value is updated but its value
+    // type remains the same. If the current key-value is a wide-column entity,
+    // it is converted to a plain key-value with the new value specified.
+    kChangeValue,
+
+    // Remove all key-values with key in [key, *skip_until). This range of keys
+    // will be skipped in a way that potentially avoids some IO operations
+    // compared to removing the keys one by one. Note that removal in this case
+    // means dropping the key-value regardless of value type; in other words, in
+    // contrast with kRemove, plain values and entities are not converted to
+    // tombstones.
+    //
+    // *skip_until <= key is treated the same as Decision::kKeep (since the
+    // range [key, *skip_until) is empty).
+    //
+    // Caveats:
+    // * The keys are skipped even if there are snapshots containing them,
+    //   i.e. values removed by kRemoveAndSkipUntil can disappear from a
+    //   snapshot - beware if you're using TransactionDB or DB::GetSnapshot().
+    // * If value for a key was overwritten or merged into (multiple Put()s
+    //   or Merge()s), and `CompactionFilter` skips this key with
+    //   kRemoveAndSkipUntil, it's possible that it will remove only
+    //   the new value, exposing the old value that was supposed to be
+    //   overwritten.
+    // * Doesn't work with PlainTableFactory in prefix mode.
+    // * If you use kRemoveAndSkipUntil for table files created by compaction,
+    //   consider also reducing compaction_readahead_size option.
+    kRemoveAndSkipUntil,
+
+    // Used internally by the old stacked BlobDB implementation. Returning this
+    // decision from application code is not supported.
+    kChangeBlobIndex,
+
+    // Used internally by the old stacked BlobDB implementation. Returning this
+    // decision from application code is not supported.
+    kIOError,
+
+    // Remove the current key-value by converting it to a SingleDelete-type
+    // tombstone. Only supported for plain-key values and wide-column entities;
+    // not supported for merge operands. All the caveats related to
+    // SingleDeletes apply.
+    kPurge,
+
+    // Change the current key-value to the wide-column entity specified. If the
+    // current key-value is already a wide-column entity, only its columns are
+    // updated; if it is a plain key-value, it is converted to a wide-column
+    // entity with the specified columns. Not supported for merge operands.
+    // Only applicable to FilterV3.
+    kChangeWideColumnEntity,
+
+    // When using the integrated BlobDB implementation, it may be possible for
+    // applications to make a filtering decision for a given blob based on
+    // the key only without actually reading the blob value, which saves some
+    // I/O; see the FilterBlobByKey method below. Returning kUndetermined from
+    // FilterBlobByKey signals that making a decision solely based on the
+    // key is not possible; in this case, RocksDB reads the blob value and
+    // passes the key-value to the regular filtering method. Only applicable to
+    // FilterBlobByKey; returning this value from FilterV2/V3 is not supported.
+    kUndetermined,
+  };
+
+  // Used internally by the old stacked BlobDB implementation.
+  enum class BlobDecision { kKeep, kChangeValue, kCorruption, kIOError };
+
+  // Context information for a table file creation.
+  struct Context {
+    // Whether this table file is created as part of a compaction including all
+    // table files.
+    bool is_full_compaction;
+    // Whether this table file is created as part of a compaction requested by
+    // the client.
+    bool is_manual_compaction;
+    // The column family that will contain the created table file.
+    uint32_t column_family_id;
+    // Reason this table file is being created.
+    TableFileCreationReason reason;
+  };
+
+  virtual ~CompactionFilter() {}
+  static const char* Type() { return "CompactionFilter"; }
+  static Status CreateFromString(const ConfigOptions& config_options,
+                                 const std::string& name,
+                                 const CompactionFilter** result);
+
+  // The table file creation process invokes this method before adding a kv to
+  // the table file. A return value of false indicates that the kv should be
+  // preserved in the new table file and a return value of true indicates
+  // that this key-value should be removed (that is, converted to a tombstone).
+  // The application can inspect the existing value of the key and make decision
+  // based on it.
+  //
+  // Key-Values that are results of merge operation during table file creation
+  // are not passed into this function. Currently, when you have a mix of Put()s
+  // and Merge()s on a same key, we only guarantee to process the merge operands
+  // through the `CompactionFilter`s. Put()s might be processed, or might not.
+  //
+  // When the value is to be preserved, the application has the option
+  // to modify the existing_value and pass it back through new_value.
+  // value_changed needs to be set to true in this case.
+  virtual bool Filter(int /*level*/, const Slice& /*key*/,
+                      const Slice& /*existing_value*/,
+                      std::string* /*new_value*/,
+                      bool* /*value_changed*/) const {
+    return false;
+  }
+
+  // The table file creation process invokes this method on every merge operand.
+  // If this method returns true, the merge operand will be ignored and not
+  // written out in the new table file.
+  //
+  // Note: If you are using a TransactionDB, it is not recommended to implement
+  // FilterMergeOperand().  If a Merge operation is filtered out, TransactionDB
+  // may not realize there is a write conflict and may allow a Transaction to
+  // Commit that should have failed.  Instead, it is better to implement any
+  // Merge filtering inside the MergeOperator.
+  virtual bool FilterMergeOperand(int /*level*/, const Slice& /*key*/,
+                                  const Slice& /*operand*/) const {
+    return false;
+  }
+
+  // A unified API for plain values and merge operands that may
+  // return a variety of decisions (see Decision above). The `value_type`
+  // parameter indicates the type of the key-value and the `existing_value`
+  // contains the current value or merge operand. The `new_value` output
+  // parameter can be used to set the updated value or merge operand when the
+  // kChangeValue decision is made by the filter. See the description of
+  // kRemoveAndSkipUntil above for the semantics of the `skip_until` output
+  // parameter, and see Decision above for more information on the semantics of
+  // the potential return values.
+  //
+  // The default implementation uses Filter() and FilterMergeOperand().
+  // If you're overriding this method, no need to override the other two.
+  virtual Decision FilterV2(int level, const Slice& key, ValueType value_type,
+                            const Slice& existing_value, std::string* new_value,
+                            std::string* /*skip_until*/) const {
+    switch (value_type) {
+      case ValueType::kValue: {
+        bool value_changed = false;
+        bool rv = Filter(level, key, existing_value, new_value, &value_changed);
+        if (rv) {
+          return Decision::kRemove;
+        }
+        return value_changed ? Decision::kChangeValue : Decision::kKeep;
+      }
+
+      case ValueType::kMergeOperand: {
+        bool rv = FilterMergeOperand(level, key, existing_value);
+        return rv ? Decision::kRemove : Decision::kKeep;
+      }
+
+      case ValueType::kBlobIndex:
+        return Decision::kKeep;
+
+      default:
+        assert(false);
+        return Decision::kKeep;
+    }
+  }
+
+  // Wide column aware unified API. Called for plain values, merge operands, and
+  // wide-column entities; the `value_type` parameter indicates the type of the
+  // key-value. When the key-value is a plain value or a merge operand, the
+  // `existing_value` parameter contains the existing value and the
+  // `existing_columns` parameter is invalid (nullptr). When the key-value is a
+  // wide-column entity, the `existing_columns` parameter contains the wide
+  // columns of the existing entity and the `existing_value` parameter is
+  // invalid (nullptr). The `new_value` output parameter can be used to set the
+  // updated value or merge operand when the kChangeValue decision is made by
+  // the filter. The `new_columns` output parameter can be used to specify
+  // the pairs of column names and column values when the
+  // kChangeWideColumnEntity decision is returned. See the description of
+  // kRemoveAndSkipUntil above for the semantics of the `skip_until` output
+  // parameter, and see Decision above for more information on the semantics of
+  // the potential return values.
+  //
+  // For compatibility, the default implementation keeps all wide-column
+  // entities, and falls back to FilterV2 for plain values and merge operands.
+  // If you override this method, there is no need to override FilterV2 (or
+  // Filter/FilterMergeOperand).
+  virtual Decision FilterV3(
+      int level, const Slice& key, ValueType value_type,
+      const Slice* existing_value, const WideColumns* existing_columns,
+      std::string* new_value,
+      std::vector<std::pair<std::string, std::string>>* /* new_columns */,
+      std::string* skip_until) const {
+#ifdef NDEBUG
+    (void)existing_columns;
+#endif
+
+    assert(!existing_value || !existing_columns);
+    assert(value_type == ValueType::kWideColumnEntity || existing_value);
+    assert(value_type != ValueType::kWideColumnEntity || existing_columns);
+
+    if (value_type == ValueType::kWideColumnEntity) {
+      return Decision::kKeep;
+    }
+
+    return FilterV2(level, key, value_type, *existing_value, new_value,
+                    skip_until);
+  }
+
+  // Internal (BlobDB) use only. Do not override in application code.
+  virtual BlobDecision PrepareBlobOutput(const Slice& /* key */,
+                                         const Slice& /* existing_value */,
+                                         std::string* /* new_value */) const {
+    return BlobDecision::kKeep;
+  }
+
+  // This function is deprecated. Snapshots will always be ignored for
+  // `CompactionFilter`s, because we realized that not ignoring snapshots
+  // doesn't provide the guarantee we initially thought it would provide.
+  // Repeatable reads will not be guaranteed anyway. If you override the
+  // function and returns false, we will fail the table file creation.
+  virtual bool IgnoreSnapshots() const { return true; }
+
+  // Returns a name that identifies this `CompactionFilter`.
+  // The name will be printed to LOG file on start up for diagnosis.
+  const char* Name() const override = 0;
+
+  // Internal (BlobDB) use only. Do not override in application code.
+  virtual bool IsStackedBlobDbInternalCompactionFilter() const { return false; }
+
+  // In the case of BlobDB, it may be possible to reach a decision with only
+  // the key without reading the actual value, saving some I/O operations.
+  // Keys where the value is stored separately in a blob file will be
+  // passed to this method. If the method returns a supported decision other
+  // than kUndetermined, it will be considered final and performed without
+  // reading the existing value. Returning kUndetermined will cause FilterV3()
+  // to be called to make a decision as usual. The output parameters
+  // `new_value` and `skip_until` are applicable to the decisions kChangeValue
+  // and kRemoveAndSkipUntil respectively, and have the same semantics as
+  // the corresponding parameters of FilterV2/V3.
+  virtual Decision FilterBlobByKey(int /*level*/, const Slice& /*key*/,
+                                   std::string* /*new_value*/,
+                                   std::string* /*skip_until*/) const {
+    return Decision::kUndetermined;
+  }
+};
+
+// Each thread of work involving creating table files will create a new
+// `CompactionFilter` according to `ShouldFilterTableFileCreation()`. This
+// allows the application to know about the different ongoing threads of work
+// and makes it unnecessary for `CompactionFilter` to provide thread-safety.
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class CompactionFilterFactory : public Customizable {
+ public:
+  virtual ~CompactionFilterFactory() {}
+  static const char* Type() { return "CompactionFilterFactory"; }
+  static Status CreateFromString(
+      const ConfigOptions& config_options, const std::string& name,
+      std::shared_ptr<CompactionFilterFactory>* result);
+
+  // Returns whether a thread creating table files for the specified `reason`
+  // should invoke `CreateCompactionFilter()` and pass KVs through the returned
+  // filter.
+  virtual bool ShouldFilterTableFileCreation(
+      TableFileCreationReason reason) const {
+    // For backward compatibility, default implementation only applies
+    // `CompactionFilter` to files generated by compaction.
+    return reason == TableFileCreationReason::kCompaction;
+  }
+
+  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+      const CompactionFilter::Context& context) = 0;
+
+  // Returns a name that identifies this `CompactionFilter` factory.
+  virtual const char* Name() const override = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/compaction_job_stats.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/compaction_job_stats.h
new file mode 100644
index 0000000000..5ff8eccc8b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/compaction_job_stats.h
@@ -0,0 +1,109 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+struct CompactionJobStats {
+  CompactionJobStats() { Reset(); }
+  void Reset();
+  // Aggregate the CompactionJobStats from another instance with this one
+  void Add(const CompactionJobStats& stats);
+
+  // the elapsed time of this compaction in microseconds.
+  uint64_t elapsed_micros;
+
+  // the elapsed CPU time of this compaction in microseconds.
+  uint64_t cpu_micros;
+
+  // the number of compaction input records.
+  uint64_t num_input_records;
+  // the number of blobs read from blob files
+  uint64_t num_blobs_read;
+  // the number of compaction input files (table files)
+  size_t num_input_files;
+  // the number of compaction input files at the output level (table files)
+  size_t num_input_files_at_output_level;
+
+  // the number of compaction output records.
+  uint64_t num_output_records;
+  // the number of compaction output files (table files)
+  size_t num_output_files;
+  // the number of compaction output files (blob files)
+  size_t num_output_files_blob;
+
+  // true if the compaction is a full compaction (all live SST files input)
+  bool is_full_compaction;
+  // true if the compaction is a manual compaction
+  bool is_manual_compaction;
+
+  // the total size of table files in the compaction input
+  uint64_t total_input_bytes;
+  // the total size of blobs read from blob files
+  uint64_t total_blob_bytes_read;
+  // the total size of table files in the compaction output
+  uint64_t total_output_bytes;
+  // the total size of blob files in the compaction output
+  uint64_t total_output_bytes_blob;
+
+  // number of records being replaced by newer record associated with same key.
+  // this could be a new value or a deletion entry for that key so this field
+  // sums up all updated and deleted keys
+  uint64_t num_records_replaced;
+
+  // the sum of the uncompressed input keys in bytes.
+  uint64_t total_input_raw_key_bytes;
+  // the sum of the uncompressed input values in bytes.
+  uint64_t total_input_raw_value_bytes;
+
+  // the number of deletion entries before compaction. Deletion entries
+  // can disappear after compaction because they expired
+  uint64_t num_input_deletion_records;
+  // number of deletion records that were found obsolete and discarded
+  // because it is not possible to delete any more keys with this entry
+  // (i.e. all possible deletions resulting from it have been completed)
+  uint64_t num_expired_deletion_records;
+
+  // number of corrupt keys (ParseInternalKey returned false when applied to
+  // the key) encountered and written out.
+  uint64_t num_corrupt_keys;
+
+  // Following counters are only populated if
+  // options.report_bg_io_stats = true;
+
+  // Time spent on file's Append() call.
+  uint64_t file_write_nanos;
+
+  // Time spent on sync file range.
+  uint64_t file_range_sync_nanos;
+
+  // Time spent on file fsync.
+  uint64_t file_fsync_nanos;
+
+  // Time spent on preparing file write (fallocate, etc)
+  uint64_t file_prepare_write_nanos;
+
+  // 0-terminated strings storing the first 8 bytes of the smallest and
+  // largest key in the output.
+  static const size_t kMaxPrefixLength = 8;
+
+  std::string smallest_output_key_prefix;
+  std::string largest_output_key_prefix;
+
+  // number of single-deletes which do not meet a put
+  uint64_t num_single_del_fallthru;
+
+  // number of single-deletes which meet something other than a put
+  uint64_t num_single_del_mismatch;
+
+  // TODO: Add output_to_penultimate_level output information
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/comparator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/comparator.h
new file mode 100644
index 0000000000..ad1e71a119
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/comparator.h
@@ -0,0 +1,164 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <string>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+
+// The general interface for comparing two Slices are defined for both of
+// Comparator and some internal data structures.
+class CompareInterface {
+ public:
+  virtual ~CompareInterface() {}
+
+  // Three-way comparison.  Returns value:
+  //   < 0 iff "a" < "b",
+  //   == 0 iff "a" == "b",
+  //   > 0 iff "a" > "b"
+  // Note that Compare(a, b) also compares timestamp if timestamp size is
+  // non-zero. For the same user key with different timestamps, larger (newer)
+  // timestamp comes first.
+  virtual int Compare(const Slice& a, const Slice& b) const = 0;
+};
+
+// A Comparator object provides a total order across slices that are
+// used as keys in an sstable or a database.  A Comparator implementation
+// must be thread-safe since rocksdb may invoke its methods concurrently
+// from multiple threads.
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class Comparator : public Customizable, public CompareInterface {
+ public:
+  Comparator() : timestamp_size_(0) {}
+
+  Comparator(size_t ts_sz) : timestamp_size_(ts_sz) {}
+
+  Comparator(const Comparator& orig) : timestamp_size_(orig.timestamp_size_) {}
+
+  Comparator& operator=(const Comparator& rhs) {
+    if (this != &rhs) {
+      timestamp_size_ = rhs.timestamp_size_;
+    }
+    return *this;
+  }
+
+  ~Comparator() override {}
+
+  static Status CreateFromString(const ConfigOptions& opts,
+                                 const std::string& id,
+                                 const Comparator** comp);
+  static const char* Type() { return "Comparator"; }
+
+  // The name of the comparator.  Used to check for comparator
+  // mismatches (i.e., a DB created with one comparator is
+  // accessed using a different comparator.
+  //
+  // The client of this package should switch to a new name whenever
+  // the comparator implementation changes in a way that will cause
+  // the relative ordering of any two keys to change.
+  //
+  // Names starting with "rocksdb." are reserved and should not be used
+  // by any clients of this package.
+  const char* Name() const override = 0;
+
+  // Compares two slices for equality. The following invariant should always
+  // hold (and is the default implementation):
+  //   Equal(a, b) iff Compare(a, b) == 0
+  // Overwrite only if equality comparisons can be done more efficiently than
+  // three-way comparisons.
+  virtual bool Equal(const Slice& a, const Slice& b) const {
+    return Compare(a, b) == 0;
+  }
+
+  // Advanced functions: these are used to reduce the space requirements
+  // for internal data structures like index blocks.
+
+  // If *start < limit, changes *start to a short string in [start,limit).
+  // Simple comparator implementations may return with *start unchanged,
+  // i.e., an implementation of this method that does nothing is correct.
+  virtual void FindShortestSeparator(std::string* start,
+                                     const Slice& limit) const = 0;
+
+  // Changes *key to a short string >= *key.
+  // Simple comparator implementations may return with *key unchanged,
+  // i.e., an implementation of this method that does nothing is correct.
+  virtual void FindShortSuccessor(std::string* key) const = 0;
+
+  // given two keys, determine if t is the successor of s
+  // BUG: only return true if no other keys starting with `t` are ordered
+  // before `t`. Otherwise, the auto_prefix_mode can omit entries within
+  // iterator bounds that have same prefix as upper bound but different
+  // prefix from seek key.
+  virtual bool IsSameLengthImmediateSuccessor(const Slice& /*s*/,
+                                              const Slice& /*t*/) const {
+    return false;
+  }
+
+  // return true if two keys with different byte sequences can be regarded
+  // as equal by this comparator.
+  // The major use case is to determine if DataBlockHashIndex is compatible
+  // with the customized comparator.
+  virtual bool CanKeysWithDifferentByteContentsBeEqual() const { return true; }
+
+  // if it is a wrapped comparator, may return the root one.
+  // return itself it is not wrapped.
+  virtual const Comparator* GetRootComparator() const { return this; }
+
+  inline size_t timestamp_size() const { return timestamp_size_; }
+
+  int CompareWithoutTimestamp(const Slice& a, const Slice& b) const {
+    return CompareWithoutTimestamp(a, /*a_has_ts=*/true, b, /*b_has_ts=*/true);
+  }
+
+  // For two events e1 and e2 whose timestamps are t1 and t2 respectively,
+  // Returns value:
+  // < 0  iff t1 < t2
+  // == 0 iff t1 == t2
+  // > 0  iff t1 > t2
+  // Note that an all-zero byte array will be the smallest (oldest) timestamp
+  // of the same length, and a byte array with all bits 1 will be the largest.
+  // In the future, we can extend Comparator so that subclasses can specify
+  // both largest and smallest timestamps.
+  virtual int CompareTimestamp(const Slice& /*ts1*/,
+                               const Slice& /*ts2*/) const {
+    return 0;
+  }
+
+  virtual int CompareWithoutTimestamp(const Slice& a, bool /*a_has_ts*/,
+                                      const Slice& b, bool /*b_has_ts*/) const {
+    return Compare(a, b);
+  }
+
+  virtual bool EqualWithoutTimestamp(const Slice& a, const Slice& b) const {
+    return 0 ==
+           CompareWithoutTimestamp(a, /*a_has_ts=*/true, b, /*b_has_ts=*/true);
+  }
+
+ private:
+  size_t timestamp_size_;
+};
+
+// Return a builtin comparator that uses lexicographic byte-wise
+// ordering.  The result remains the property of this module and
+// must not be deleted.
+extern const Comparator* BytewiseComparator();
+
+// Return a builtin comparator that uses reverse lexicographic byte-wise
+// ordering.
+extern const Comparator* ReverseBytewiseComparator();
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/compression_type.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/compression_type.h
new file mode 100644
index 0000000000..bfeb00bdef
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/compression_type.h
@@ -0,0 +1,40 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// DB contents are stored in a set of blocks, each of which holds a
+// sequence of key,value pairs.  Each block may be compressed before
+// being stored in a file.  The following enum describes which
+// compression method (if any) is used to compress a block.
+
+enum CompressionType : unsigned char {
+  // NOTE: do not change the values of existing entries, as these are
+  // part of the persistent format on disk.
+  kNoCompression = 0x0,
+  kSnappyCompression = 0x1,
+  kZlibCompression = 0x2,
+  kBZip2Compression = 0x3,
+  kLZ4Compression = 0x4,
+  kLZ4HCCompression = 0x5,
+  kXpressCompression = 0x6,
+  kZSTD = 0x7,
+
+  // Only use kZSTDNotFinalCompression if you have to use ZSTD lib older than
+  // 0.8.0 or consider a possibility of downgrading the service or copying
+  // the database files to another service running with an older version of
+  // RocksDB that doesn't have kZSTD. Otherwise, you should use kZSTD. We will
+  // eventually remove the option from the public API.
+  kZSTDNotFinalCompression = 0x40,
+
+  // kDisableCompressionOption is used to disable some compression options.
+  kDisableCompressionOption = 0xff,
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/concurrent_task_limiter.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/concurrent_task_limiter.h
new file mode 100644
index 0000000000..9ad741f98d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/concurrent_task_limiter.h
@@ -0,0 +1,51 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <stdint.h>
+
+#include <string>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// This is NOT an extensible interface but a public interface for result of
+// NewConcurrentTaskLimiter. Any derived classes must be RocksDB internal.
+class ConcurrentTaskLimiter {
+ public:
+  virtual ~ConcurrentTaskLimiter() {}
+
+  // Returns a name that identifies this concurrent task limiter.
+  virtual const std::string& GetName() const = 0;
+
+  // Set max concurrent tasks.
+  // limit = 0 means no new task allowed.
+  // limit < 0 means no limitation.
+  virtual void SetMaxOutstandingTask(int32_t limit) = 0;
+
+  // Reset to unlimited max concurrent task.
+  virtual void ResetMaxOutstandingTask() = 0;
+
+  // Returns current outstanding task count.
+  virtual int32_t GetOutstandingTask() const = 0;
+};
+
+// Create a ConcurrentTaskLimiter that can be shared with multiple CFs
+// across RocksDB instances to control concurrent tasks.
+//
+// @param name: Name of the limiter.
+// @param limit: max concurrent tasks.
+//        limit = 0 means no new task allowed.
+//        limit < 0 means no limitation.
+extern ConcurrentTaskLimiter* NewConcurrentTaskLimiter(const std::string& name,
+                                                       int32_t limit);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/configurable.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/configurable.h
new file mode 100644
index 0000000000..a200d7e86c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/configurable.h
@@ -0,0 +1,390 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <atomic>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Logger;
+class ObjectRegistry;
+class OptionTypeInfo;
+struct ColumnFamilyOptions;
+struct ConfigOptions;
+struct DBOptions;
+
+// Configurable is a base class used by the rocksdb that describes a
+// standard way of configuring objects.  A Configurable object can:
+//   -> Populate itself given:
+//        - One or more "name/value" pair strings
+//        - A string representing the set of name=value properties
+//        - A map of name/value properties.
+//   -> Convert itself into its string representation
+//   -> Dump itself to a Logger
+//   -> Compare itself to another Configurable object to see if the two objects
+// have equivalent options settings
+//
+// If a derived class calls RegisterOptions to register (by name) how its
+// options objects are to be processed, this functionality can typically be
+// handled by this class without additional overrides. Otherwise, the derived
+// class will need to implement the methods for handling the corresponding
+// functionality.
+class Configurable {
+ protected:
+  friend class ConfigurableHelper;
+  struct RegisteredOptions {
+    // The name of the options being registered
+    std::string name;
+    // Pointer to the object being registered
+    void* opt_ptr;
+    // The map of options being registered
+    const std::unordered_map<std::string, OptionTypeInfo>* type_map;
+  };
+
+ public:
+  virtual ~Configurable() {}
+
+  // Returns the raw pointer of the named options that is used by this
+  // object, or nullptr if this function is not supported.
+  // Since the return value is a raw pointer, the object owns the
+  // pointer and the caller should not delete the pointer.
+  //
+  // Note that changing the underlying options while the object
+  // is currently used by any open DB is undefined behavior.
+  // Developers should use DB::SetOption() instead to dynamically change
+  // options while the DB is open.
+  template <typename T>
+  const T* GetOptions() const {
+    return GetOptions<T>(T::kName());
+  }
+  template <typename T>
+  T* GetOptions() {
+    return GetOptions<T>(T::kName());
+  }
+  template <typename T>
+  const T* GetOptions(const std::string& name) const {
+    return reinterpret_cast<const T*>(GetOptionsPtr(name));
+  }
+  template <typename T>
+  T* GetOptions(const std::string& name) {
+    return reinterpret_cast<T*>(const_cast<void*>(GetOptionsPtr(name)));
+  }
+
+  // Configures the options for this class based on the input parameters.
+  // On successful completion, the object is updated with the settings from
+  // the opt_map.
+  // If this method fails, an attempt is made to revert the object to original
+  // state. Note that the revert may not be the original state but may be an
+  // equivalent. For example, if the object contains an option that is a
+  // shared_ptr, the shared_ptr may not be the original one but a copy (e.g. not
+  // the Cache object that was passed in, but a Cache object of the same size).
+  //
+  // The acceptable values of the name/value pairs are documented with the
+  // specific class/instance.
+  //
+  // @param config_options Controls how the arguments are processed.
+  // @param opt_map Name/value pairs of the options to update
+  // @param unused If specified, this value will return the name/value
+  //      pairs from opt_map that were NotFound for this object.
+  // @return OK If all values in the map were successfully updated
+  //      If invoke_prepare_options is true, OK also implies
+  //      PrepareOptions ran successfully.
+  // @return NotFound If any of the names in the opt_map were not valid
+  //      for this object.  If unused is specified, it will contain the
+  //      collection of NotFound names.
+  // @return NotSupported  If any of the names are valid but the object does
+  //       not know how to convert the value.  This can happen if, for example,
+  //       there is some nested Configurable that cannot be created.
+  // @return InvalidArgument If any of the values cannot be successfully
+  //       parsed.  This can also be returned if PrepareOptions encounters an
+  //       error.
+  // @see ConfigOptions for a description of the controls.
+  Status ConfigureFromMap(
+      const ConfigOptions& config_options,
+      const std::unordered_map<std::string, std::string>& opt_map);
+  Status ConfigureFromMap(
+      const ConfigOptions& config_options,
+      const std::unordered_map<std::string, std::string>& opt_map,
+      std::unordered_map<std::string, std::string>* unused);
+
+  // Updates the named option to the input value, returning OK if successful.
+  // Note that ConfigureOption does not cause PrepareOptions to be invoked.
+  // @param config_options Controls how the name/value is processed.
+  // @param name The name of the option to update
+  // @param value The value to set for the named option
+  // @return OK If the named field was successfully updated to value.
+  // @return NotFound If the name is not valid for this object.
+  // @return NotSupported  If the name is valid but the object does
+  //       not know how to convert the value.  This can happen if, for example,
+  //       there is some nested Configurable that cannot be created.
+  // @return InvalidArgument If the value cannot be successfully  parsed.
+  Status ConfigureOption(const ConfigOptions& config_options,
+                         const std::string& name, const std::string& value);
+
+  // Configures the options for this class based on the input parameters.
+  // On successful completion, the object is updated with the settings from
+  // the opt_map.  If this method fails, an attempt is made to revert the
+  // object to original state.  Note that the revert may not be the original
+  // state but may be an equivalent.
+  // @see ConfigureFromMap for more details
+  // @param config_options Controls how the arguments are processed.
+  // @param opt_str string containing the values to update.
+  // @param unused If specified, this value will return the name/value
+  //      pairs from opt_map that were NotFound for this object.
+  // @return OK If all specified values were successfully updated
+  //      If invoke_prepare_options is true, OK also implies
+  //      PrepareOptions ran successfully.
+  // @return NotFound If any of the names were not valid for this object.
+  //      If unused is specified, it will contain the collection of NotFound
+  //      names.
+  // @return NotSupported  If any of the names are valid but the object does
+  //       not know how to convert the value.  This can happen if, for example,
+  //       there is some nested Configurable that cannot be created.
+  // @return InvalidArgument If any of the values cannot be successfully
+  //       parsed.  This can also be returned if PrepareOptions encounters an
+  //       error.
+  Status ConfigureFromString(const ConfigOptions& config_options,
+                             const std::string& opts);
+
+  // Fills in result with the serialized options for this object.
+  // This is the inverse of ConfigureFromString.
+  // @param config_options Controls how serialization happens.
+  // @param result The string representation of this object.
+  // @return OK If the options for this object were successfully serialized.
+  // @return InvalidArgument If one or more of the options could not be
+  // serialized.
+  Status GetOptionString(const ConfigOptions& config_options,
+                         std::string* result) const;
+  // Returns the serialized options for this object.
+  // This method is similar to GetOptionString with no errors.
+  // @param config_options Controls how serialization happens.
+  // @param prefix A string to prepend to every option.
+  // @return The serialized representation of the options for this object
+  std::string ToString(const ConfigOptions& config_options) const {
+    return ToString(config_options, "");
+  }
+  std::string ToString(const ConfigOptions& config_options,
+                       const std::string& prefix) const;
+
+  // Returns the list of option names associated with this configurable
+  // @param config_options Controls how the names are returned
+  // @param result The set of option names for this object. Note that
+  //      options that are deprecated or aliases are not returned.
+  // @return OK on success.
+  Status GetOptionNames(const ConfigOptions& config_options,
+                        std::unordered_set<std::string>* result) const;
+
+  // Returns the value of the option associated with the input name
+  // This method is the functional inverse of ConfigureOption
+  // @param config_options Controls how the value is returned
+  // @param name The name of the option to return a value for.
+  // @param value The returned value associated with the named option.
+  // @return OK If the named field was successfully updated to value.
+  // @return NotFound If the name is not valid for this object.
+  // @param InvalidArgument If the name is valid for this object but
+  //      its value cannot be serialized.
+  virtual Status GetOption(const ConfigOptions& config_options,
+                           const std::string& name, std::string* value) const;
+
+  // Checks to see if this Configurable is equivalent to other.
+  // This method assumes that the two objects are of the same class.
+  // @param config_options Controls how the options are compared.
+  // @param other The other object to compare to.
+  // @param mismatch If the objects do not match, this parameter contains
+  //      the name of the option that triggered the match failure.
+  // @param True if the objects match, false otherwise.
+  virtual bool AreEquivalent(const ConfigOptions& config_options,
+                             const Configurable* other,
+                             std::string* name) const;
+
+  // Returns a pretty-printed, human-readable version of the options.
+  // This method is typically used to dump the options to a log file.
+  // Classes should override this method
+  virtual std::string GetPrintableOptions() const { return ""; }
+
+  // Validates that the settings are valid/consistent and performs any object
+  // initialization required by this object.  This method may be called as part
+  // of Configure (if invoke_prepare_options is set), or may be invoked
+  // separately.
+  //
+  // Once an object has been prepared, non-mutable options can no longer be
+  // updated.
+  //
+  // Classes must override this method to provide any implementation-specific
+  // initialization, such as opening log files or setting up cache parameters.
+  // Implementations should be idempotent (e.g. don't re-open the log file or
+  // reconfigure the cache), as there is the potential this method can be called
+  // more than once.
+  //
+  // By default, this method will also prepare all nested (Inner and
+  // OptionType::kConfigurable) objects.
+  //
+  // @param config_options Controls how the object is prepared.  Also contains
+  //      a Logger and Env that can be used to initialize this object.
+  // @return OK If the object was successfully initialized.
+  // @return InvalidArgument If this object could not be successfully
+  // initialized.
+  virtual Status PrepareOptions(const ConfigOptions& config_options);
+
+  // Checks to see if the settings are valid for this object.
+  // This method checks to see if the input DBOptions and ColumnFamilyOptions
+  // are valid for the settings of this object.  For example, an Env might not
+  // support certain mmap modes or a TableFactory might require certain
+  // settings.
+  //
+  // By default, this method will also validate all nested (Inner and
+  // OptionType::kConfigurable) objects.
+  //
+  // @param db_opts The DBOptions to validate
+  // @param cf_opts The ColumnFamilyOptions to validate
+  // @return OK if the options are valid
+  // @return InvalidArgument If the arguments are not valid for the options
+  //       of the current object.
+  virtual Status ValidateOptions(const DBOptions& db_opts,
+                                 const ColumnFamilyOptions& cf_opts) const;
+
+  // Splits the input opt_value into the ID field and the remaining options.
+  // The input opt_value can be in the form of "name" or "name=value
+  // [;name=value]". The first form uses the "name" as an id with no options The
+  // latter form converts the input into a map of name=value pairs and sets "id"
+  // to the "id" value from the map.
+  // @param opt_value The value to split into id and options
+  // @param id The id field from the opt_value
+  // @param options The remaining name/value pairs from the opt_value
+  // @param default_id If specified and there is no id field in the map, this
+  // value is returned as the ID
+  // @return OK if the value was converted to a map successfully and an ID was
+  // found.
+  // @return InvalidArgument if the value could not be converted to a map or
+  // there was or there is no id property in the map.
+  static Status GetOptionsMap(
+      const std::string& opt_value, const std::string& default_id,
+      std::string* id, std::unordered_map<std::string, std::string>* options);
+
+ protected:
+  // Returns the raw pointer for the associated named option.
+  // The name is typically the name of an option registered via the
+  // Classes may override this method to provide further specialization (such as
+  // returning a sub-option)
+  //
+  // The default implementation looks at the registered options.  If the
+  // input name matches that of a registered option, the pointer registered
+  // with that name is returned.
+  // e.g,, RegisterOptions("X", &my_ptr, ...); GetOptionsPtr("X") returns
+  // "my_ptr"
+  virtual const void* GetOptionsPtr(const std::string& name) const;
+
+  // Method for allowing options to be configured outside of the normal
+  // registered options framework.  Classes may override this method if they
+  // wish to support non-standard options implementations (such as configuring
+  // themselves from constant or simple ":"-separated strings.
+  //
+  // The default implementation does nothing and returns OK
+  virtual Status ParseStringOptions(const ConfigOptions& config_options,
+                                    const std::string& opts_str);
+
+  // Internal method to configure an object from a map of name-value options.
+  // This method uses the input config_options to drive the configuration of
+  // the options in opt_map.  Any option name that cannot be found from the
+  // input set will be returned in "unused".
+  //
+  // Classes may override this method to extend the functionality if required.
+  // @param config_options Controls how the options are configured and errors
+  // handled.
+  // @param opts_map The set of options to configure
+  // @param unused Any options from opt_map that were not configured.
+  // @returns a Status based on the rules outlined in ConfigureFromMap
+  virtual Status ConfigureOptions(
+      const ConfigOptions& config_options,
+      const std::unordered_map<std::string, std::string>& opts_map,
+      std::unordered_map<std::string, std::string>* unused);
+
+  // Method that configures a the specific opt_name from opt_value.
+  // By default, this method calls opt_info.ParseOption with the
+  // input parameters.
+  // Classes may override this method to extend the functionality, or
+  // change the returned Status.
+  virtual Status ParseOption(const ConfigOptions& config_options,
+                             const OptionTypeInfo& opt_info,
+                             const std::string& opt_name,
+                             const std::string& opt_value, void* opt_ptr);
+
+  // Internal method to see if the single option name/info matches for this and
+  // that Classes may override this value to change its behavior.
+  // @param config_options Controls how the options are being matched
+  // @param opt_info The OptionTypeInfo registered for this option name
+  //      that controls what field is matched (offset) and how (type).
+  // @param name The name associated with this opt_info.
+  // @param this_ptr The base pointer to compare to.  This is the object
+  // registered for
+  //      for this OptionTypeInfo.
+  // @param that_ptr The other pointer to compare to.  This is the object
+  // registered for
+  //      for this OptionTypeInfo.
+  // @param bad_name  If the match fails, the name of the option that failed to
+  // match.
+  virtual bool OptionsAreEqual(const ConfigOptions& config_options,
+                               const OptionTypeInfo& opt_info,
+                               const std::string& name,
+                               const void* const this_ptr,
+                               const void* const that_ptr,
+                               std::string* bad_name) const;
+  // Internal method to serialize options (ToString)
+  // Classes may override this value to change its behavior.
+  virtual std::string SerializeOptions(const ConfigOptions& config_options,
+                                       const std::string& header) const;
+
+  //  Given a name (e.g. rocksdb.my.type.opt), returns the short name (opt)
+  virtual std::string GetOptionName(const std::string& long_name) const;
+
+  // Registers the input name with the options and associated map.
+  // When classes register their options in this manner, most of the
+  // functionality (excluding unknown options and validate/prepare) is
+  // implemented by the base class.
+  //
+  // This method should be called in the class constructor to register the
+  // option set for this object.  For example, to register the options
+  // associated with the BlockBasedTableFactory, the constructor calls this
+  // method passing in:
+  // - the name of the options ("BlockBasedTableOptions");
+  // - the options object (the BlockBasedTableOptions object for this object;
+  // - the options type map for the BlockBasedTableOptions.
+  // This registration allows the Configurable class to process the option
+  // values associated with the BlockBasedTableOptions without further code in
+  // the derived class.
+  //
+  // @param name    The name of this set of options (@see GetOptionsPtr)
+  // @param opt_ptr Pointer to the options to associate with this name
+  // @param opt_map Options map that controls how this option is configured.
+  template <typename T>
+  void RegisterOptions(
+      T* opt_ptr,
+      const std::unordered_map<std::string, OptionTypeInfo>* opt_map) {
+    RegisterOptions(T::kName(), opt_ptr, opt_map);
+  }
+  void RegisterOptions(
+      const std::string& name, void* opt_ptr,
+      const std::unordered_map<std::string, OptionTypeInfo>* opt_map);
+
+  // Returns true if there are registered options for this Configurable object
+  inline bool HasRegisteredOptions() const { return !options_.empty(); }
+
+ private:
+  // Contains the collection of options (name, opt_ptr, opt_map) associated with
+  // this object. This collection is typically set in the constructor of the
+  // Configurable option via
+  std::vector<RegisteredOptions> options_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/convenience.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/convenience.h
new file mode 100644
index 0000000000..7ce676df0c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/convenience.h
@@ -0,0 +1,466 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "rocksdb/compression_type.h"
+#include "rocksdb/db.h"
+#include "rocksdb/status.h"
+#include "rocksdb/table.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Env;
+class Logger;
+class ObjectRegistry;
+
+struct ColumnFamilyOptions;
+struct DBOptions;
+struct Options;
+
+// ConfigOptions containing the parameters/controls for
+// comparing objects and converting to/from strings.
+// These settings control how the methods
+// treat errors (e.g. ignore_unknown_objects), the format
+// of the serialization (e.g. delimiter), and how to compare
+// options (sanity_level).
+struct ConfigOptions {
+  // Constructs a new ConfigOptions with a new object registry.
+  // This method should only be used when a DBOptions is not available,
+  // else registry settings may be lost
+  ConfigOptions();
+
+  // Constructs a new ConfigOptions using the settings from
+  // the input DBOptions.  Currently constructs a new object registry.
+  explicit ConfigOptions(const DBOptions&);
+
+  // This enum defines the RocksDB options sanity level.
+  enum SanityLevel : unsigned char {
+    kSanityLevelNone = 0x01,  // Performs no sanity check at all.
+    // Performs minimum check to ensure the RocksDB instance can be
+    // opened without corrupting / mis-interpreting the data.
+    kSanityLevelLooselyCompatible = 0x02,
+    // Perform exact match sanity check.
+    kSanityLevelExactMatch = 0xFF,
+  };
+
+  enum Depth {
+    kDepthDefault,  // Traverse nested options that are not flagged as "shallow"
+    kDepthShallow,  // Do not traverse into any nested options
+    kDepthDetailed,  // Traverse nested options, overriding the options shallow
+                     // setting
+  };
+
+  // When true, any unused options will be ignored and OK will be returned
+  bool ignore_unknown_options = false;
+
+  // When true, any unsupported options will be ignored and OK will be returned
+  bool ignore_unsupported_options = true;
+
+  // If the strings are escaped (old-style?)
+  bool input_strings_escaped = true;
+
+  // Whether or not to invoke PrepareOptions after configure is called.
+  bool invoke_prepare_options = true;
+
+  // Options can be marked as Mutable (OptionTypeInfo::IsMutable()) or not.
+  // When "mutable_options_only=false", all options are evaluated.
+  // When "mutable_options_only="true", any option not marked as Mutable is
+  // either ignored (in the case of string/equals methods) or results in an
+  // error (in the case of Configure).
+  bool mutable_options_only = false;
+
+  // The separator between options when converting to a string
+  std::string delimiter = ";";
+
+  // Controls how to traverse options during print/match stages
+  Depth depth = Depth::kDepthDefault;
+
+  // Controls how options are serialized
+  // Controls how pedantic the comparison must be for equivalency
+  SanityLevel sanity_level = SanityLevel::kSanityLevelExactMatch;
+  // `file_readahead_size` is used for readahead for the option file.
+  size_t file_readahead_size = 512 * 1024;
+
+  // The environment to use for this option
+  Env* env = Env::Default();
+
+  // The object registry to use for this options
+  std::shared_ptr<ObjectRegistry> registry;
+
+  bool IsShallow() const { return depth == Depth::kDepthShallow; }
+  bool IsDetailed() const { return depth == Depth::kDepthDetailed; }
+
+  bool IsCheckDisabled() const {
+    return sanity_level == SanityLevel::kSanityLevelNone;
+  }
+
+  bool IsCheckEnabled(SanityLevel level) const {
+    return (level > SanityLevel::kSanityLevelNone && level <= sanity_level);
+  }
+};
+
+
+// The following set of functions provide a way to construct RocksDB Options
+// from a string or a string-to-string map.  Here is the general rule of
+// setting option values from strings by type.  Some RocksDB types are also
+// supported in these APIs.  Please refer to the comment of the function itself
+// to find more information about how to config those RocksDB types.
+//
+// * Strings:
+//   Strings will be used as values directly without any truncating or
+//   trimming.
+//
+// * Booleans:
+//   - "true" or "1" => true
+//   - "false" or "0" => false.
+//   [Example]:
+//   - {"optimize_filters_for_hits", "1"} in GetColumnFamilyOptionsFromMap, or
+//   - "optimize_filters_for_hits=true" in GetColumnFamilyOptionsFromString.
+//
+// * Integers:
+//   Integers are converted directly from string, in addition to the following
+//   units that we support:
+//   - 'k' or 'K' => 2^10
+//   - 'm' or 'M' => 2^20
+//   - 'g' or 'G' => 2^30
+//   - 't' or 'T' => 2^40  // only for unsigned int with sufficient bits.
+//   [Example]:
+//   - {"arena_block_size", "19G"} in GetColumnFamilyOptionsFromMap, or
+//   - "arena_block_size=19G" in GetColumnFamilyOptionsFromString.
+//
+// * Doubles / Floating Points:
+//   Doubles / Floating Points are converted directly from string.  Note that
+//   currently we do not support units.
+//   [Example]:
+//   - {"memtable_prefix_bloom_size_ratio", "0.1"} in
+//   GetColumnFamilyOptionsFromMap, or
+//   - "memtable_prefix_bloom_size_ratio=0.1" in
+//   GetColumnFamilyOptionsFromString.
+// * Array / Vectors:
+//   An array is specified by a list of values, where ':' is used as
+//   the delimiter to separate each value.
+//   [Example]:
+//   - {"compression_per_level", "kNoCompression:kSnappyCompression"}
+//     in GetColumnFamilyOptionsFromMap, or
+//   - "compression_per_level=kNoCompression:kSnappyCompression" in
+//     GetColumnFamilyOptionsFromMapString
+// * Enums:
+//   The valid values of each enum are identical to the names of its constants.
+//   [Example]:
+//   - CompressionType: valid values are "kNoCompression",
+//     "kSnappyCompression", "kZlibCompression", "kBZip2Compression", ...
+//   - CompactionStyle: valid values are "kCompactionStyleLevel",
+//     "kCompactionStyleUniversal", "kCompactionStyleFIFO", and
+//     "kCompactionStyleNone".
+//
+
+// Take a ConfigOptions `config_options` and a ColumnFamilyOptions
+// "base_options" as the default option in addition to a map "opts_map" of
+// option name to option value to construct the new ColumnFamilyOptions
+// "new_options".
+//
+// Below are the instructions of how to config some non-primitive-typed
+// options in ColumnFamilyOptions:
+//
+// * table_factory:
+//   table_factory can be configured using our custom nested-option syntax.
+//
+//   {option_a=value_a; option_b=value_b; option_c=value_c; ... }
+//
+//   A nested option is enclosed by two curly braces, within which there are
+//   multiple option assignments.  Each assignment is of the form
+//   "variable_name=value;".
+//
+//   Currently we support the following types of TableFactory:
+//   - BlockBasedTableFactory:
+//     Use name "block_based_table_factory" to initialize table_factory with
+//     BlockBasedTableFactory.  Its BlockBasedTableFactoryOptions can be
+//     configured using the nested-option syntax.
+//     [Example]:
+//     * {"block_based_table_factory", "{block_cache=1M;block_size=4k;}"}
+//       is equivalent to assigning table_factory with a BlockBasedTableFactory
+//       that has 1M LRU block-cache with block size equals to 4k:
+//         ColumnFamilyOptions cf_opt;
+//         BlockBasedTableOptions blk_opt;
+//         blk_opt.block_cache = NewLRUCache(1 * 1024 * 1024);
+//         blk_opt.block_size = 4 * 1024;
+//         cf_opt.table_factory.reset(NewBlockBasedTableFactory(blk_opt));
+//   - PlainTableFactory:
+//     Use name "plain_table_factory" to initialize table_factory with
+//     PlainTableFactory.  Its PlainTableFactoryOptions can be configured using
+//     the nested-option syntax.
+//     [Example]:
+//     * {"plain_table_factory", "{user_key_len=66;bloom_bits_per_key=20;}"}
+//
+// * memtable_factory:
+//   Use "memtable" to config memtable_factory.  Here are the supported
+//   memtable factories:
+//   - SkipList:
+//     Pass "skip_list:<lookahead>" to config memtable to use SkipList,
+//     or simply "skip_list" to use the default SkipList.
+//     [Example]:
+//     * {"memtable", "skip_list:5"} is equivalent to setting
+//       memtable to SkipListFactory(5).
+//   - PrefixHash:
+//     Pass "prefix_hash:<hash_bucket_count>" to config memtable
+//     to use PrefixHash, or simply "prefix_hash" to use the default
+//     PrefixHash.
+//     [Example]:
+//     * {"memtable", "prefix_hash:1000"} is equivalent to setting
+//       memtable to NewHashSkipListRepFactory(hash_bucket_count).
+//   - HashLinkedList:
+//     Pass "hash_linkedlist:<hash_bucket_count>" to config memtable
+//     to use HashLinkedList, or simply "hash_linkedlist" to use the default
+//     HashLinkedList.
+//     [Example]:
+//     * {"memtable", "hash_linkedlist:1000"} is equivalent to
+//       setting memtable to NewHashLinkListRepFactory(1000).
+//   - VectorRepFactory:
+//     Pass "vector:<count>" to config memtable to use VectorRepFactory,
+//     or simply "vector" to use the default Vector memtable.
+//     [Example]:
+//     * {"memtable", "vector:1024"} is equivalent to setting memtable
+//       to VectorRepFactory(1024).
+//
+//  * compression_opts:
+//    Use "compression_opts" to config compression_opts.  The value format
+//    is of the form "<window_bits>:<level>:<strategy>:<max_dict_bytes>".
+//    [Example]:
+//    * {"compression_opts", "4:5:6:7"} is equivalent to setting:
+//        ColumnFamilyOptions cf_opt;
+//        cf_opt.compression_opts.window_bits = 4;
+//        cf_opt.compression_opts.level = 5;
+//        cf_opt.compression_opts.strategy = 6;
+//        cf_opt.compression_opts.max_dict_bytes = 7;
+//
+// @param config_options controls how the map is processed.
+// @param base_options the default options of the output "new_options".
+// @param opts_map an option name to value map for specifying how "new_options"
+//     should be set.
+// @param new_options the resulting options based on "base_options" with the
+//     change specified in "opts_map".
+// @param input_strings_escaped when set to true, each escaped characters
+//     prefixed by '\' in the values of the opts_map will be further converted
+//     back to the raw string before assigning to the associated options.
+// @param ignore_unknown_options when set to true, unknown options are ignored
+//     instead of resulting in an unknown-option error.
+// @return Status::OK() on success.  Otherwise, a non-ok status indicating
+//     error will be returned, and "new_options" will be set to "base_options".
+// @return Status::NotFound means the one (or more) of the option name in
+//     the opts_map is not valid for this option
+// @return Status::NotSupported means we do not know how to parse one of the
+//     value for this option
+// @return Status::InvalidArgument means the one of the option values is not
+//     valid for this option.
+Status GetColumnFamilyOptionsFromMap(
+    const ConfigOptions& config_options,
+    const ColumnFamilyOptions& base_options,
+    const std::unordered_map<std::string, std::string>& opts_map,
+    ColumnFamilyOptions* new_options);
+
+// Take a ConfigOptions `config_options` and a DBOptions "base_options" as the
+// default option in addition to a map "opts_map" of option name to option value
+// to construct the new DBOptions "new_options".
+//
+// Below are the instructions of how to config some non-primitive-typed
+// options in DBOptions:
+//
+// * rate_limiter_bytes_per_sec:
+//   RateLimiter can be configured directly by specifying its bytes_per_sec.
+//   [Example]:
+//   - Passing {"rate_limiter_bytes_per_sec", "1024"} is equivalent to
+//     passing NewGenericRateLimiter(1024) to rate_limiter_bytes_per_sec.
+//
+// @param config_options controls how the map is processed.
+// @param base_options the default options of the output "new_options".
+// @param opts_map an option name to value map for specifying how "new_options"
+//     should be set.
+// @param new_options the resulting options based on "base_options" with the
+//     change specified in "opts_map".
+// @param input_strings_escaped when set to true, each escaped characters
+//     prefixed by '\' in the values of the opts_map will be further converted
+//     back to the raw string before assigning to the associated options.
+// @param ignore_unknown_options when set to true, unknown options are ignored
+//     instead of resulting in an unknown-option error.
+// @return Status::OK() on success.  Otherwise, a non-ok status indicating
+//     error will be returned, and "new_options" will be set to "base_options".
+// @return Status::NotFound means the one (or more) of the option name in
+//     the opts_map is not valid for this option
+// @return Status::NotSupported means we do not know how to parse one of the
+//     value for this option
+// @return Status::InvalidArgument means the one of the option values is not
+//     valid for this option.
+Status GetDBOptionsFromMap(
+    const ConfigOptions& cfg_options, const DBOptions& base_options,
+    const std::unordered_map<std::string, std::string>& opts_map,
+    DBOptions* new_options);
+
+// Take a ConfigOptions `config_options` and a BlockBasedTableOptions
+// "table_options" as the default option in addition to a map "opts_map" of
+// option name to option value to construct the new BlockBasedTableOptions
+// "new_table_options".
+//
+// Below are the instructions of how to config some non-primitive-typed
+// options in BlockBasedTableOptions:
+//
+// * filter_policy:
+//   We currently only support the following FilterPolicy in the convenience
+//   functions:
+//   - BloomFilter: use "bloomfilter:[bits_per_key]:[use_block_based_builder]"
+//     to specify BloomFilter.  The above string is equivalent to calling
+//     NewBloomFilterPolicy(bits_per_key, use_block_based_builder).
+//     [Example]:
+//     - Pass {"filter_policy", "bloomfilter:4:true"} in
+//       GetBlockBasedTableOptionsFromMap to use a BloomFilter with 4-bits
+//       per key and use_block_based_builder enabled.
+//
+// * block_cache / block_cache_compressed:
+//   We currently only support LRU cache in the GetOptions API.  The LRU
+//   cache can be set by directly specifying its size.
+//   [Example]:
+//   - Passing {"block_cache", "1M"} in GetBlockBasedTableOptionsFromMap is
+//     equivalent to setting block_cache using NewLRUCache(1024 * 1024).
+//
+// @param config_options controls how the map is processed.
+// @param table_options the default options of the output "new_table_options".
+// @param opts_map an option name to value map for specifying how
+//     "new_table_options" should be set.
+// @param new_table_options the resulting options based on "table_options"
+//     with the change specified in "opts_map".
+// @param input_strings_escaped when set to true, each escaped characters
+//     prefixed by '\' in the values of the opts_map will be further converted
+//     back to the raw string before assigning to the associated options.
+// @param ignore_unknown_options when set to true, unknown options are ignored
+//     instead of resulting in an unknown-option error.
+// @return Status::OK() on success.  Otherwise, a non-ok status indicating
+//     error will be returned, and "new_table_options" will be set to
+//     "table_options".
+Status GetBlockBasedTableOptionsFromMap(
+    const ConfigOptions& config_options,
+    const BlockBasedTableOptions& table_options,
+    const std::unordered_map<std::string, std::string>& opts_map,
+    BlockBasedTableOptions* new_table_options);
+
+// Take a ConfigOptions `config_options` and a default PlainTableOptions
+// "table_options" as the default option in addition to a map "opts_map" of
+// option name to option value to construct the new PlainTableOptions
+// "new_table_options".
+//
+// @param config_options controls how the map is processed.
+// @param table_options the default options of the output "new_table_options".
+// @param opts_map an option name to value map for specifying how
+//     "new_table_options" should be set.
+// @param new_table_options the resulting options based on "table_options"
+//     with the change specified in "opts_map".
+// @param input_strings_escaped when set to true, each escaped characters
+//     prefixed by '\' in the values of the opts_map will be further converted
+//     back to the raw string before assigning to the associated options.
+// @param ignore_unknown_options when set to true, unknown options are ignored
+//     instead of resulting in an unknown-option error.
+// @return Status::OK() on success.  Otherwise, a non-ok status indicating
+//     error will be returned, and "new_table_options" will be set to
+//     "table_options".
+Status GetPlainTableOptionsFromMap(
+    const ConfigOptions& config_options, const PlainTableOptions& table_options,
+    const std::unordered_map<std::string, std::string>& opts_map,
+    PlainTableOptions* new_table_options);
+
+// Take a ConfigOptions `config_options`, a string representation of option
+// names and values, apply them into the base_options, and return the new
+// options as a result. The string has the following format:
+//   "write_buffer_size=1024;max_write_buffer_number=2"
+// Nested options config is also possible. For example, you can define
+// BlockBasedTableOptions as part of the string for block-based table factory:
+//   "write_buffer_size=1024;block_based_table_factory={block_size=4k};"
+//   "max_write_buffer_num=2"
+//
+Status GetColumnFamilyOptionsFromString(const ConfigOptions& config_options,
+                                        const ColumnFamilyOptions& base_options,
+                                        const std::string& opts_str,
+                                        ColumnFamilyOptions* new_options);
+
+Status GetDBOptionsFromString(const ConfigOptions& config_options,
+                              const DBOptions& base_options,
+                              const std::string& opts_str,
+                              DBOptions* new_options);
+
+Status GetStringFromDBOptions(const ConfigOptions& config_options,
+                              const DBOptions& db_options,
+                              std::string* opts_str);
+
+Status GetStringFromDBOptions(std::string* opts_str,
+                              const DBOptions& db_options,
+                              const std::string& delimiter = ";  ");
+
+Status GetStringFromColumnFamilyOptions(const ConfigOptions& config_options,
+                                        const ColumnFamilyOptions& cf_options,
+                                        std::string* opts_str);
+Status GetStringFromColumnFamilyOptions(std::string* opts_str,
+                                        const ColumnFamilyOptions& cf_options,
+                                        const std::string& delimiter = ";  ");
+Status GetStringFromCompressionType(std::string* compression_str,
+                                    CompressionType compression_type);
+
+std::vector<CompressionType> GetSupportedCompressions();
+
+Status GetBlockBasedTableOptionsFromString(
+    const ConfigOptions& config_options,
+    const BlockBasedTableOptions& table_options, const std::string& opts_str,
+    BlockBasedTableOptions* new_table_options);
+
+Status GetPlainTableOptionsFromString(const ConfigOptions& config_options,
+                                      const PlainTableOptions& table_options,
+                                      const std::string& opts_str,
+                                      PlainTableOptions* new_table_options);
+
+Status GetMemTableRepFactoryFromString(
+    const std::string& opts_str,
+    std::unique_ptr<MemTableRepFactory>* new_mem_factory);
+
+Status GetOptionsFromString(const Options& base_options,
+                            const std::string& opts_str, Options* new_options);
+Status GetOptionsFromString(const ConfigOptions& config_options,
+                            const Options& base_options,
+                            const std::string& opts_str, Options* new_options);
+
+Status StringToMap(const std::string& opts_str,
+                   std::unordered_map<std::string, std::string>* opts_map);
+
+// Request stopping background work, if wait is true wait until it's done
+void CancelAllBackgroundWork(DB* db, bool wait = false);
+
+// Delete files which are entirely in the given range
+// Could leave some keys in the range which are in files which are not
+// entirely in the range. Also leaves L0 files regardless of whether they're
+// in the range.
+// Snapshots before the delete might not see the data in the given range.
+Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
+                          const Slice* begin, const Slice* end,
+                          bool include_end = true);
+
+// Delete files in multiple ranges at once
+// Delete files in a lot of ranges one at a time can be slow, use this API for
+// better performance in that case.
+Status DeleteFilesInRanges(DB* db, ColumnFamilyHandle* column_family,
+                           const RangePtr* ranges, size_t n,
+                           bool include_end = true);
+
+// Verify the checksum of file
+Status VerifySstFileChecksum(const Options& options,
+                             const EnvOptions& env_options,
+                             const std::string& file_path);
+
+// Verify the checksum of file
+Status VerifySstFileChecksum(const Options& options,
+                             const EnvOptions& env_options,
+                             const ReadOptions& read_options,
+                             const std::string& file_path,
+                             const SequenceNumber& largest_seqno = 0);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/customizable.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/customizable.h
new file mode 100644
index 0000000000..076aca6590
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/customizable.h
@@ -0,0 +1,229 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "rocksdb/configurable.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+/**
+ * Customizable a base class used by the rocksdb that describes a
+ * standard way of configuring and creating objects.  Customizable objects
+ * are configurable objects that can be created from an ObjectRegistry.
+ *
+ * Customizable classes are used when there are multiple potential
+ * implementations of a class for use by RocksDB (e.g. Table, Cache,
+ * MergeOperator, etc).  The abstract base class is expected to define a method
+ * declaring its type and a factory method for creating one of these, such as:
+ * static const char *Type() { return "Table"; }
+ * static Status CreateFromString(const ConfigOptions& options,
+ *                                const std::string& id,
+ *                                std::shared_ptr<TableFactory>* result);
+ * The "Type" string is expected to be unique (no two base classes are the same
+ * type). This factory is expected, based on the options and id, create and
+ * return the appropriate derived type of the customizable class (e.g.
+ * BlockBasedTableFactory, PlainTableFactory, etc). For extension developers,
+ * helper classes and methods are provided for writing this factory.
+ *
+ * Instances of a Customizable class need to define:
+ * - A "static const char *kClassName()" method.  This method defines the name
+ * of the class instance (e.g. BlockBasedTable, LRUCache) and is used by the
+ * CheckedCast method.
+ * - The Name() of the object.  This name is used when creating and saving
+ * instances of this class.  Typically this name will be the same as
+ * kClassName().
+ *
+ * Additionally, Customizable classes should register any options used to
+ * configure themselves with the Configurable subsystem.
+ *
+ * When a Customizable is being created, the "name" property specifies
+ * the name of the instance being created.
+ * For custom objects, their configuration and name can be specified by:
+ * [prop]={name=X;option 1 = value1[; option2=value2...]}
+ *
+ * [prop].name=X
+ * [prop].option1 = value1
+ *
+ * [prop].name=X
+ * X.option1 =value1
+ */
+class Customizable : public Configurable {
+ public:
+  ~Customizable() override {}
+
+  // Returns the name of this class of Customizable
+  virtual const char* Name() const = 0;
+
+  // Returns an identifier for this Customizable.
+  // This could be its name or something more complex (like its URL/pattern).
+  // Used for pretty printing.
+  virtual std::string GetId() const {
+    std::string id = Name();
+    return id;
+  }
+
+  // This is typically determined by if the input name matches the
+  // name of this object.
+  // This method is typically used in conjunction with CheckedCast to find the
+  // derived class instance from its base.  For example, if you have an Env
+  // and want the "Default" env, you would IsInstanceOf("Default") to get
+  // the default implementation.  This method should be used when you need a
+  // specific derivative or implementation of a class.
+  //
+  // Intermediary caches (such as SharedCache) may wish to override this method
+  // to check for the intermediary name (SharedCache).  Classes with multiple
+  // potential names (e.g. "PosixEnv", "DefaultEnv") may also wish to override
+  // this method.
+  //
+  // Note that IsInstanceOf only uses the "is-a" relationship and not "has-a".
+  // Wrapped classes that have an Inner "has-a" should not be returned.
+  //
+  // @param name The name of the instance to find.
+  // Returns true if the class is an instance of the input name.
+  virtual bool IsInstanceOf(const std::string& name) const {
+    if (name.empty()) {
+      return false;
+    } else if (name == Name()) {
+      return true;
+    } else {
+      const char* nickname = NickName();
+      if (nickname != nullptr && name == nickname) {
+        return true;
+      } else {
+        return false;
+      }
+    }
+  }
+
+  const void* GetOptionsPtr(const std::string& name) const override {
+    const void* ptr = Configurable::GetOptionsPtr(name);
+    if (ptr != nullptr) {
+      return ptr;
+    } else {
+      const auto inner = Inner();
+      if (inner != nullptr) {
+        return inner->GetOptionsPtr(name);
+      } else {
+        return nullptr;
+      }
+    }
+  }
+
+  // Returns the named instance of the Customizable as a T*, or nullptr if not
+  // found. This method uses IsInstanceOf/Inner to find the appropriate class
+  // instance and then casts it to the expected return type.
+  template <typename T>
+  const T* CheckedCast() const {
+    if (IsInstanceOf(T::kClassName())) {
+      return static_cast<const T*>(this);
+    } else {
+      const auto inner = Inner();
+      if (inner != nullptr) {
+        return inner->CheckedCast<T>();
+      } else {
+        return nullptr;
+      }
+    }
+  }
+
+  template <typename T>
+  T* CheckedCast() {
+    if (IsInstanceOf(T::kClassName())) {
+      return static_cast<T*>(this);
+    } else {
+      auto inner = const_cast<Customizable*>(Inner());
+      if (inner != nullptr) {
+        return inner->CheckedCast<T>();
+      } else {
+        return nullptr;
+      }
+    }
+  }
+
+  // Checks to see if this Customizable is equivalent to other.
+  // This method assumes that the two objects are of the same class.
+  // @param config_options Controls how the options are compared.
+  // @param other The other object to compare to.
+  // @param mismatch If the objects do not match, this parameter contains
+  //      the name of the option that triggered the match failure.
+  // @param True if the objects match, false otherwise.
+  // @see Configurable::AreEquivalent for more details
+  bool AreEquivalent(const ConfigOptions& config_options,
+                     const Configurable* other,
+                     std::string* mismatch) const override;
+  // Gets the value of the option associated with the input name
+  // @see Configurable::GetOption for more details
+  Status GetOption(const ConfigOptions& config_options, const std::string& name,
+                   std::string* value) const override;
+  // Helper method for getting for parsing the opt_value into the corresponding
+  // options for use in potentially creating a new Customizable object (this
+  // method is primarily a support method for LoadSharedObject et al for new
+  // Customizable objects). The opt_value may be either name-value pairs
+  // separated by ";" (a=b; c=d), or a simple name (a). In order to create a new
+  // Customizable, the ID is determined by:
+  // - If the value is a simple name (e.g. "BlockBasedTable"), the id is this
+  // name;
+  // - Otherwise, if there is a "id=value", the id is set to "value"
+  // - Otherwise, if the input customizable is not null, custom->GetId is used
+  // - Otherwise, an error is returned.
+  //
+  // If the opt_value is name-value pairs, these pairs will be returned in
+  // options (without the id pair). If the ID being returned matches the ID of
+  // the input custom object, then the options from the input object will also
+  // be added to the returned options.
+  //
+  // This method returns non-OK if the ID could not be found, or if the
+  // opt_value could not be parsed into name-value pairs.
+  static Status GetOptionsMap(
+      const ConfigOptions& config_options, const Customizable* custom,
+      const std::string& opt_value, std::string* id,
+      std::unordered_map<std::string, std::string>* options);
+
+  // Helper method to configure a new object with the supplied options.
+  // If the object is not null and invoke_prepare_options=true, the object
+  // will be configured and prepared.
+  // Returns success if the object is properly configured and (optionally)
+  // prepared Returns InvalidArgument if the object is nullptr and there are
+  // options in the map Returns the result of the ConfigureFromMap or
+  // PrepareOptions
+  static Status ConfigureNewObject(
+      const ConfigOptions& config_options, Customizable* object,
+      const std::unordered_map<std::string, std::string>& options);
+
+  // Returns the inner class when a Customizable implements a has-a (wrapped)
+  // relationship.  Derived classes that implement a has-a must override this
+  // method in order to get CheckedCast to function properly.
+  virtual const Customizable* Inner() const { return nullptr; }
+
+ protected:
+  // Generates a ID specific for this instance of the customizable.
+  // The unique ID is of the form <name>:<addr>#pid, where:
+  // - name is the Name() of this object;
+  // - addr is the memory address of this object;
+  // - pid is the process ID of this process ID for this process.
+  // Note that if obj1 and obj2 have the same unique IDs, they must be the
+  // same.  However, if an object is deleted and recreated, it may have the
+  // same unique ID as a predecessor
+  //
+  // This method is useful for objects (especially ManagedObjects) that
+  // wish to generate an ID that is specific for this instance and wish to
+  // override the GetId() method.
+  std::string GenerateIndividualId() const;
+
+  // Some classes have both a class name (e.g. PutOperator) and a nickname
+  // (e.g. put). Classes can override this method to return a
+  // nickname.  Nicknames can be used by InstanceOf and object creation.
+  virtual const char* NickName() const { return ""; }
+  //  Given a name (e.g. rocksdb.my.type.opt), returns the short name (opt)
+  std::string GetOptionName(const std::string& long_name) const override;
+  std::string SerializeOptions(const ConfigOptions& options,
+                               const std::string& prefix) const override;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/data_structure.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/data_structure.h
new file mode 100644
index 0000000000..ffab82c514
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/data_structure.h
@@ -0,0 +1,186 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <assert.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace detail {
+int CountTrailingZeroBitsForSmallEnumSet(uint64_t);
+}  // namespace detail
+
+// Represents a set of values of some enum type with a small number of
+// possible enumerators. For now, it supports enums where no enumerator
+// exceeds 63 when converted to int.
+template <typename ENUM_TYPE, ENUM_TYPE MAX_ENUMERATOR>
+class SmallEnumSet {
+ private:
+  using StateT = uint64_t;
+  static constexpr int kStateBits = sizeof(StateT) * 8;
+  static constexpr int kMaxMax = kStateBits - 1;
+  static constexpr int kMaxValue = static_cast<int>(MAX_ENUMERATOR);
+  static_assert(kMaxValue >= 0);
+  static_assert(kMaxValue <= kMaxMax);
+
+ public:
+  // construct / create
+  SmallEnumSet() : state_(0) {}
+
+  template <class... TRest>
+  /*implicit*/ constexpr SmallEnumSet(const ENUM_TYPE e, TRest... rest) {
+    *this = SmallEnumSet(rest...).With(e);
+  }
+
+  // Return the set that includes all valid values, assuming the enum
+  // is "dense" (includes all values converting to 0 through kMaxValue)
+  static constexpr SmallEnumSet All() {
+    StateT tmp = StateT{1} << kMaxValue;
+    return SmallEnumSet(RawStateMarker(), tmp | (tmp - 1));
+  }
+
+  // equality
+  bool operator==(const SmallEnumSet& that) const {
+    return this->state_ == that.state_;
+  }
+  bool operator!=(const SmallEnumSet& that) const { return !(*this == that); }
+
+  // query
+
+  // Return true if the input enum is contained in the "Set".
+  bool Contains(const ENUM_TYPE e) const {
+    int value = static_cast<int>(e);
+    assert(value >= 0 && value <= kMaxValue);
+    StateT tmp = 1;
+    return state_ & (tmp << value);
+  }
+
+  bool empty() const { return state_ == 0; }
+
+  // iterator
+  class const_iterator {
+   public:
+    // copy
+    const_iterator(const const_iterator& that) = default;
+    const_iterator& operator=(const const_iterator& that) = default;
+
+    // move
+    const_iterator(const_iterator&& that) noexcept = default;
+    const_iterator& operator=(const_iterator&& that) noexcept = default;
+
+    // equality
+    bool operator==(const const_iterator& that) const {
+      assert(set_ == that.set_);
+      return this->pos_ == that.pos_;
+    }
+
+    bool operator!=(const const_iterator& that) const {
+      return !(*this == that);
+    }
+
+    // ++iterator
+    const_iterator& operator++() {
+      if (pos_ < kMaxValue) {
+        pos_ = set_->SkipUnset(pos_ + 1);
+      } else {
+        pos_ = kStateBits;
+      }
+      return *this;
+    }
+
+    // iterator++
+    const_iterator operator++(int) {
+      auto old = *this;
+      ++*this;
+      return old;
+    }
+
+    ENUM_TYPE operator*() const {
+      assert(pos_ <= kMaxValue);
+      return static_cast<ENUM_TYPE>(pos_);
+    }
+
+   private:
+    friend class SmallEnumSet;
+    const_iterator(const SmallEnumSet* set, int pos) : set_(set), pos_(pos) {}
+    const SmallEnumSet* set_;
+    int pos_;
+  };
+
+  const_iterator begin() const { return const_iterator(this, SkipUnset(0)); }
+
+  const_iterator end() const { return const_iterator(this, kStateBits); }
+
+  // mutable ops
+
+  // Modifies the set (if needed) to include the given value. Returns true
+  // iff the set was modified.
+  bool Add(const ENUM_TYPE e) {
+    int value = static_cast<int>(e);
+    assert(value >= 0 && value <= kMaxValue);
+    StateT old_state = state_;
+    state_ |= (StateT{1} << value);
+    return old_state != state_;
+  }
+
+  // Modifies the set (if needed) not to include the given value. Returns true
+  // iff the set was modified.
+  bool Remove(const ENUM_TYPE e) {
+    int value = static_cast<int>(e);
+    assert(value >= 0 && value <= kMaxValue);
+    StateT old_state = state_;
+    state_ &= ~(StateT{1} << value);
+    return old_state != state_;
+  }
+
+  // applicative ops
+
+  // Return a new set based on this one with the additional value(s) inserted
+  constexpr SmallEnumSet With(const ENUM_TYPE e) const {
+    int value = static_cast<int>(e);
+    assert(value >= 0 && value <= kMaxValue);
+    return SmallEnumSet(RawStateMarker(), state_ | (StateT{1} << value));
+  }
+  template <class... TRest>
+  constexpr SmallEnumSet With(const ENUM_TYPE e1, const ENUM_TYPE e2,
+                              TRest... rest) const {
+    return With(e1).With(e2, rest...);
+  }
+
+  // Return a new set based on this one excluding the given value(s)
+  constexpr SmallEnumSet Without(const ENUM_TYPE e) const {
+    int value = static_cast<int>(e);
+    assert(value >= 0 && value <= kMaxValue);
+    return SmallEnumSet(RawStateMarker(), state_ & ~(StateT{1} << value));
+  }
+  template <class... TRest>
+  constexpr SmallEnumSet Without(const ENUM_TYPE e1, const ENUM_TYPE e2,
+                                 TRest... rest) const {
+    return Without(e1).Without(e2, rest...);
+  }
+
+ private:
+  int SkipUnset(int pos) const {
+    StateT tmp = state_ >> pos;
+    if (tmp == 0) {
+      return kStateBits;
+    } else {
+      return pos + detail::CountTrailingZeroBitsForSmallEnumSet(tmp);
+    }
+  }
+  struct RawStateMarker {};
+  explicit SmallEnumSet(RawStateMarker, StateT state) : state_(state) {}
+
+  StateT state_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db.h
new file mode 100644
index 0000000000..a42f5b8b6d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db.h
@@ -0,0 +1,1980 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "rocksdb/block_cache_trace_writer.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/listener.h"
+#include "rocksdb/metadata.h"
+#include "rocksdb/options.h"
+#include "rocksdb/snapshot.h"
+#include "rocksdb/sst_file_writer.h"
+#include "rocksdb/thread_status.h"
+#include "rocksdb/transaction_log.h"
+#include "rocksdb/types.h"
+#include "rocksdb/version.h"
+#include "rocksdb/wide_columns.h"
+
+#ifdef _WIN32
+// Windows API macro interference
+#undef DeleteFile
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define ROCKSDB_DEPRECATED_FUNC __attribute__((__deprecated__))
+#elif _WIN32
+#define ROCKSDB_DEPRECATED_FUNC __declspec(deprecated)
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+struct ColumnFamilyOptions;
+struct CompactionOptions;
+struct CompactRangeOptions;
+struct DBOptions;
+struct ExternalSstFileInfo;
+struct FlushOptions;
+struct Options;
+struct ReadOptions;
+struct TableProperties;
+struct WriteOptions;
+class Env;
+class EventListener;
+class FileSystem;
+class Replayer;
+class StatsHistoryIterator;
+class TraceReader;
+class TraceWriter;
+class WriteBatch;
+
+extern const std::string kDefaultColumnFamilyName;
+extern const std::string kPersistentStatsColumnFamilyName;
+struct ColumnFamilyDescriptor {
+  std::string name;
+  ColumnFamilyOptions options;
+  ColumnFamilyDescriptor()
+      : name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {}
+  ColumnFamilyDescriptor(const std::string& _name,
+                         const ColumnFamilyOptions& _options)
+      : name(_name), options(_options) {}
+};
+
+class ColumnFamilyHandle {
+ public:
+  virtual ~ColumnFamilyHandle() {}
+  // Returns the name of the column family associated with the current handle.
+  virtual const std::string& GetName() const = 0;
+  // Returns the ID of the column family associated with the current handle.
+  virtual uint32_t GetID() const = 0;
+  // Fills "*desc" with the up-to-date descriptor of the column family
+  // associated with this handle. Since it fills "*desc" with the up-to-date
+  // information, this call might internally lock and release DB mutex to
+  // access the up-to-date CF options.  In addition, all the pointer-typed
+  // options cannot be referenced any longer than the original options exist.
+  //
+  // Note that this function is not supported in RocksDBLite.
+  virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) = 0;
+  // Returns the comparator of the column family associated with the
+  // current handle.
+  virtual const Comparator* GetComparator() const = 0;
+};
+
+static const int kMajorVersion = __ROCKSDB_MAJOR__;
+static const int kMinorVersion = __ROCKSDB_MINOR__;
+
+// A range of keys
+struct Range {
+  Slice start;
+  Slice limit;
+
+  Range() {}
+  Range(const Slice& s, const Slice& l) : start(s), limit(l) {}
+};
+
+struct RangePtr {
+  const Slice* start;
+  const Slice* limit;
+
+  RangePtr() : start(nullptr), limit(nullptr) {}
+  RangePtr(const Slice* s, const Slice* l) : start(s), limit(l) {}
+};
+
+// It is valid that files_checksums and files_checksum_func_names are both
+// empty (no checksum information is provided for ingestion). Otherwise,
+// their sizes should be the same as external_files. The file order should
+// be the same in three vectors and guaranteed by the caller.
+// Note that, we assume the temperatures of this batch of files to be
+// ingested are the same.
+struct IngestExternalFileArg {
+  ColumnFamilyHandle* column_family = nullptr;
+  std::vector<std::string> external_files;
+  IngestExternalFileOptions options;
+  std::vector<std::string> files_checksums;
+  std::vector<std::string> files_checksum_func_names;
+  Temperature file_temperature = Temperature::kUnknown;
+};
+
+struct GetMergeOperandsOptions {
+  int expected_max_number_of_operands = 0;
+};
+
+// A collections of table properties objects, where
+//  key: is the table's file name.
+//  value: the table properties object of the given table.
+using TablePropertiesCollection =
+    std::unordered_map<std::string, std::shared_ptr<const TableProperties>>;
+
+// A DB is a persistent, versioned ordered map from keys to values.
+// A DB is safe for concurrent access from multiple threads without
+// any external synchronization.
+// DB is an abstract base class with one primary implementation (DBImpl)
+// and a number of wrapper implementations.
+class DB {
+ public:
+  // Open the database with the specified "name" for reads and writes.
+  // Stores a pointer to a heap-allocated database in *dbptr and returns
+  // OK on success.
+  // Stores nullptr in *dbptr and returns a non-OK status on error, including
+  // if the DB is already open (read-write) by another DB object. (This
+  // guarantee depends on options.env->LockFile(), which might not provide
+  // this guarantee in a custom Env implementation.)
+  //
+  // Caller must delete *dbptr when it is no longer needed.
+  static Status Open(const Options& options, const std::string& name,
+                     DB** dbptr);
+
+  // Open DB with column families.
+  // db_options specify database specific options
+  // column_families is the vector of all column families in the database,
+  // containing column family name and options. You need to open ALL column
+  // families in the database. To get the list of column families, you can use
+  // ListColumnFamilies().
+  //
+  // The default column family name is 'default' and it's stored
+  // in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName.
+  // If everything is OK, handles will on return be the same size
+  // as column_families --- handles[i] will be a handle that you
+  // will use to operate on column family column_family[i].
+  // Before delete DB, you have to close All column families by calling
+  // DestroyColumnFamilyHandle() with all the handles.
+  static Status Open(const DBOptions& db_options, const std::string& name,
+                     const std::vector<ColumnFamilyDescriptor>& column_families,
+                     std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
+
+  // OpenForReadOnly() creates a Read-only instance that supports reads alone.
+  //
+  // All DB interfaces that modify data, like put/delete, will return error.
+  // Automatic Flush and Compactions are disabled and any manual calls
+  // to Flush/Compaction will return error.
+  //
+  // While a given DB can be simultaneously opened via OpenForReadOnly
+  // by any number of readers, if a DB is simultaneously opened by Open
+  // and OpenForReadOnly, the read-only instance has undefined behavior
+  // (though can often succeed if quickly closed) and the read-write
+  // instance is unaffected. See also OpenAsSecondary.
+
+  // Open the database for read only.
+  //
+  static Status OpenForReadOnly(const Options& options, const std::string& name,
+                                DB** dbptr,
+                                bool error_if_wal_file_exists = false);
+
+  // Open the database for read only with column families.
+  //
+  // When opening DB with read only, you can specify only a subset of column
+  // families in the database that should be opened. However, you always need
+  // to specify default column family. The default column family name is
+  // 'default' and it's stored in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName
+  //
+  static Status OpenForReadOnly(
+      const DBOptions& db_options, const std::string& name,
+      const std::vector<ColumnFamilyDescriptor>& column_families,
+      std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
+      bool error_if_wal_file_exists = false);
+
+  // OpenAsSecondary() creates a secondary instance that supports read-only
+  // operations and supports dynamic catch up with the primary (through a
+  // call to TryCatchUpWithPrimary()).
+  //
+  // All DB interfaces that modify data, like put/delete, will return error.
+  // Automatic Flush and Compactions are disabled and any manual calls
+  // to Flush/Compaction will return error.
+  //
+  // Multiple secondary instances can co-exist at the same time.
+  //
+
+  // Open DB as secondary instance
+  //
+  // The options argument specifies the options to open the secondary instance.
+  // Options.max_open_files should be set to -1.
+  // The name argument specifies the name of the primary db that you have used
+  // to open the primary instance.
+  // The secondary_path argument points to a directory where the secondary
+  // instance stores its info log.
+  // The dbptr is an out-arg corresponding to the opened secondary instance.
+  // The pointer points to a heap-allocated database, and the caller should
+  // delete it after use.
+  //
+  // Return OK on success, non-OK on failures.
+  static Status OpenAsSecondary(const Options& options, const std::string& name,
+                                const std::string& secondary_path, DB** dbptr);
+
+  // Open DB as secondary instance with specified column families
+  //
+  // When opening DB in secondary mode, you can specify only a subset of column
+  // families in the database that should be opened. However, you always need
+  // to specify default column family. The default column family name is
+  // 'default' and it's stored in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName
+  //
+  // Column families created by the primary after the secondary instance starts
+  // are currently ignored by the secondary instance.  Column families opened
+  // by secondary and dropped by the primary will be dropped by secondary as
+  // well (on next invocation of TryCatchUpWithPrimary()). However the user
+  // of the secondary instance can still access the data of such dropped column
+  // family as long as they do not destroy the corresponding column family
+  // handle.
+  //
+  // The options argument specifies the options to open the secondary instance.
+  // Options.max_open_files should be set to -1.
+  // The name argument specifies the name of the primary db that you have used
+  // to open the primary instance.
+  // The secondary_path argument points to a directory where the secondary
+  // instance stores its info log.
+  // The column_families argument specifies a list of column families to open.
+  // If default column family is not specified or if any specified column
+  // families does not exist, the function returns non-OK status.
+  // The handles is an out-arg corresponding to the opened database column
+  // family handles.
+  // The dbptr is an out-arg corresponding to the opened secondary instance.
+  // The pointer points to a heap-allocated database, and the caller should
+  // delete it after use. Before deleting the dbptr, the user should also
+  // delete the pointers stored in handles vector.
+  //
+  // Return OK on success, non-OK on failures.
+  static Status OpenAsSecondary(
+      const DBOptions& db_options, const std::string& name,
+      const std::string& secondary_path,
+      const std::vector<ColumnFamilyDescriptor>& column_families,
+      std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
+
+  // Open DB and run the compaction.
+  // It's a read-only operation, the result won't be installed to the DB, it
+  // will be output to the `output_directory`. The API should only be used with
+  // `options.CompactionService` to run compaction triggered by
+  // `CompactionService`.
+  static Status OpenAndCompact(
+      const std::string& name, const std::string& output_directory,
+      const std::string& input, std::string* output,
+      const CompactionServiceOptionsOverride& override_options);
+
+  static Status OpenAndCompact(
+      const OpenAndCompactOptions& options, const std::string& name,
+      const std::string& output_directory, const std::string& input,
+      std::string* output,
+      const CompactionServiceOptionsOverride& override_options);
+
+  // Experimental and subject to change
+  // Open DB and trim data newer than specified timestamp.
+  // The trim_ts specified the user-defined timestamp trim bound.
+  // This API should only be used at timestamp enabled column families recovery.
+  // If some input column families do not support timestamp, nothing will
+  // be happened to them. The data with timestamp > trim_ts
+  // will be removed after this API returns successfully.
+  static Status OpenAndTrimHistory(
+      const DBOptions& db_options, const std::string& dbname,
+      const std::vector<ColumnFamilyDescriptor>& column_families,
+      std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
+      std::string trim_ts);
+
+  // Manually, synchronously attempt to resume DB writes after a write failure
+  // to the underlying filesystem. See
+  // https://github.com/facebook/rocksdb/wiki/Background-Error-Handling
+  //
+  // Returns OK if writes are successfully resumed, or there was no
+  // outstanding error to recover from. Returns underlying write error if
+  // it is not recoverable.
+  //
+  // WART: Does not mix well with auto-resume. Will return Busy if an
+  // auto-resume is in progress, without waiting for it to complete.
+  // See DBOptions::max_bgerror_resume_count and
+  // EventListener::OnErrorRecoveryBegin
+  virtual Status Resume() { return Status::NotSupported(); }
+
+  // Close the DB by releasing resources, closing files etc. This should be
+  // called before calling the destructor so that the caller can get back a
+  // status in case there are any errors. This will not fsync the WAL files.
+  // If syncing is required, the caller must first call SyncWAL(), or Write()
+  // using an empty write batch with WriteOptions.sync=true.
+  // Regardless of the return status, the DB must be freed.
+  // If the return status is Aborted(), closing fails because there is
+  // unreleased snapshot in the system. In this case, users can release
+  // the unreleased snapshots and try again and expect it to succeed. For
+  // other status, re-calling Close() will be no-op and return the original
+  // close status. If the return status is NotSupported(), then the DB
+  // implementation does cleanup in the destructor
+  virtual Status Close() { return Status::NotSupported(); }
+
+  // ListColumnFamilies will open the DB specified by argument name
+  // and return the list of all column families in that DB
+  // through column_families argument. The ordering of
+  // column families in column_families is unspecified.
+  static Status ListColumnFamilies(const DBOptions& db_options,
+                                   const std::string& name,
+                                   std::vector<std::string>* column_families);
+
+  // Abstract class ctor
+  DB() {}
+  // No copying allowed
+  DB(const DB&) = delete;
+  void operator=(const DB&) = delete;
+
+  virtual ~DB();
+
+  // Create a column_family and return the handle of column family
+  // through the argument handle.
+  virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
+                                    const std::string& column_family_name,
+                                    ColumnFamilyHandle** handle);
+
+  // Bulk create column families with the same column family options.
+  // Return the handles of the column families through the argument handles.
+  // In case of error, the request may succeed partially, and handles will
+  // contain column family handles that it managed to create, and have size
+  // equal to the number of created column families.
+  virtual Status CreateColumnFamilies(
+      const ColumnFamilyOptions& options,
+      const std::vector<std::string>& column_family_names,
+      std::vector<ColumnFamilyHandle*>* handles);
+
+  // Bulk create column families.
+  // Return the handles of the column families through the argument handles.
+  // In case of error, the request may succeed partially, and handles will
+  // contain column family handles that it managed to create, and have size
+  // equal to the number of created column families.
+  virtual Status CreateColumnFamilies(
+      const std::vector<ColumnFamilyDescriptor>& column_families,
+      std::vector<ColumnFamilyHandle*>* handles);
+
+  // Drop a column family specified by column_family handle. This call
+  // only records a drop record in the manifest and prevents the column
+  // family from flushing and compacting.
+  virtual Status DropColumnFamily(ColumnFamilyHandle* column_family);
+
+  // Bulk drop column families. This call only records drop records in the
+  // manifest and prevents the column families from flushing and compacting.
+  // In case of error, the request may succeed partially. User may call
+  // ListColumnFamilies to check the result.
+  virtual Status DropColumnFamilies(
+      const std::vector<ColumnFamilyHandle*>& column_families);
+
+  // Release and deallocate a column family handle. A column family is only
+  // removed once it is dropped (DropColumnFamily) and all handles have been
+  // destroyed (DestroyColumnFamilyHandle). Use this method to destroy
+  // column family handles (except for DefaultColumnFamily()!) before closing
+  // a DB.
+  virtual Status DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family);
+
+  // Set the database entry for "key" to "value".
+  // If "key" already exists, it will be overwritten.
+  // Returns OK on success, and a non-OK status on error.
+  // Note: consider setting options.sync = true.
+  virtual Status Put(const WriteOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     const Slice& value) = 0;
+  virtual Status Put(const WriteOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     const Slice& ts, const Slice& value) = 0;
+  virtual Status Put(const WriteOptions& options, const Slice& key,
+                     const Slice& value) {
+    return Put(options, DefaultColumnFamily(), key, value);
+  }
+  virtual Status Put(const WriteOptions& options, const Slice& key,
+                     const Slice& ts, const Slice& value) {
+    return Put(options, DefaultColumnFamily(), key, ts, value);
+  }
+
+  // Set the database entry for "key" in the column family specified by
+  // "column_family" to the wide-column entity defined by "columns". If the key
+  // already exists in the column family, it will be overwritten.
+  //
+  // Returns OK on success, and a non-OK status on error.
+  virtual Status PutEntity(const WriteOptions& options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           const WideColumns& columns);
+
+  // Remove the database entry (if any) for "key".  Returns OK on
+  // success, and a non-OK status on error.  It is not an error if "key"
+  // did not exist in the database.
+  // Note: consider setting options.sync = true.
+  virtual Status Delete(const WriteOptions& options,
+                        ColumnFamilyHandle* column_family,
+                        const Slice& key) = 0;
+  virtual Status Delete(const WriteOptions& options,
+                        ColumnFamilyHandle* column_family, const Slice& key,
+                        const Slice& ts) = 0;
+  virtual Status Delete(const WriteOptions& options, const Slice& key) {
+    return Delete(options, DefaultColumnFamily(), key);
+  }
+  virtual Status Delete(const WriteOptions& options, const Slice& key,
+                        const Slice& ts) {
+    return Delete(options, DefaultColumnFamily(), key, ts);
+  }
+
+  // Remove the database entry for "key". Requires that the key exists
+  // and was not overwritten. Returns OK on success, and a non-OK status
+  // on error.  It is not an error if "key" did not exist in the database.
+  //
+  // If a key is overwritten (by calling Put() multiple times), then the result
+  // of calling SingleDelete() on this key is undefined.  SingleDelete() only
+  // behaves correctly if there has been only one Put() for this key since the
+  // previous call to SingleDelete() for this key.
+  //
+  // This feature is currently an experimental performance optimization
+  // for a very specific workload.  It is up to the caller to ensure that
+  // SingleDelete is only used for a key that is not deleted using Delete() or
+  // written using Merge().  Mixing SingleDelete operations with Deletes and
+  // Merges can result in undefined behavior.
+  //
+  // Note: consider setting options.sync = true.
+  virtual Status SingleDelete(const WriteOptions& options,
+                              ColumnFamilyHandle* column_family,
+                              const Slice& key) = 0;
+  virtual Status SingleDelete(const WriteOptions& options,
+                              ColumnFamilyHandle* column_family,
+                              const Slice& key, const Slice& ts) = 0;
+  virtual Status SingleDelete(const WriteOptions& options, const Slice& key) {
+    return SingleDelete(options, DefaultColumnFamily(), key);
+  }
+  virtual Status SingleDelete(const WriteOptions& options, const Slice& key,
+                              const Slice& ts) {
+    return SingleDelete(options, DefaultColumnFamily(), key, ts);
+  }
+
+  // Removes the database entries in the range ["begin_key", "end_key"), i.e.,
+  // including "begin_key" and excluding "end_key". Returns OK on success, and
+  // a non-OK status on error. It is not an error if the database does not
+  // contain any existing data in the range ["begin_key", "end_key").
+  //
+  // If "end_key" comes before "start_key" according to the user's comparator,
+  // a `Status::InvalidArgument` is returned.
+  //
+  // This feature is now usable in production, with the following caveats:
+  // 1) Accumulating too many range tombstones in the memtable will degrade read
+  // performance; this can be avoided by manually flushing occasionally.
+  // 2) Limiting the maximum number of open files in the presence of range
+  // tombstones can degrade read performance. To avoid this problem, set
+  // max_open_files to -1 whenever possible.
+  virtual Status DeleteRange(const WriteOptions& options,
+                             ColumnFamilyHandle* column_family,
+                             const Slice& begin_key, const Slice& end_key);
+  virtual Status DeleteRange(const WriteOptions& options,
+                             ColumnFamilyHandle* column_family,
+                             const Slice& begin_key, const Slice& end_key,
+                             const Slice& ts);
+
+  // Merge the database entry for "key" with "value".  Returns OK on success,
+  // and a non-OK status on error. The semantics of this operation is
+  // determined by the user provided merge_operator when opening DB.
+  // Note: consider setting options.sync = true.
+  virtual Status Merge(const WriteOptions& options,
+                       ColumnFamilyHandle* column_family, const Slice& key,
+                       const Slice& value) = 0;
+  virtual Status Merge(const WriteOptions& options, const Slice& key,
+                       const Slice& value) {
+    return Merge(options, DefaultColumnFamily(), key, value);
+  }
+  virtual Status Merge(const WriteOptions& /*options*/,
+                       ColumnFamilyHandle* /*column_family*/,
+                       const Slice& /*key*/, const Slice& /*ts*/,
+                       const Slice& /*value*/);
+
+  // Apply the specified updates to the database.
+  // If `updates` contains no update, WAL will still be synced if
+  // options.sync=true.
+  // Returns OK on success, non-OK on failure.
+  // Note: consider setting options.sync = true.
+  virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0;
+
+  // If the column family specified by "column_family" contains an entry for
+  // "key", return the corresponding value in "*value". If the entry is a plain
+  // key-value, return the value as-is; if it is a wide-column entity, return
+  // the value of its default anonymous column (see kDefaultWideColumnName) if
+  // any, or an empty value otherwise.
+  //
+  // If timestamp is enabled and a non-null timestamp pointer is passed in,
+  // timestamp is returned.
+  //
+  // Returns OK on success. Returns NotFound and an empty value in "*value" if
+  // there is no entry for "key". Returns some other non-OK status on error.
+  virtual inline Status Get(const ReadOptions& options,
+                            ColumnFamilyHandle* column_family, const Slice& key,
+                            std::string* value) {
+    assert(value != nullptr);
+    PinnableSlice pinnable_val(value);
+    assert(!pinnable_val.IsPinned());
+    auto s = Get(options, column_family, key, &pinnable_val);
+    if (s.ok() && pinnable_val.IsPinned()) {
+      value->assign(pinnable_val.data(), pinnable_val.size());
+    }  // else value is already assigned
+    return s;
+  }
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     PinnableSlice* value) = 0;
+  virtual Status Get(const ReadOptions& options, const Slice& key,
+                     std::string* value) {
+    return Get(options, DefaultColumnFamily(), key, value);
+  }
+
+  // Get() methods that return timestamp. Derived DB classes don't need to worry
+  // about this group of methods if they don't care about timestamp feature.
+  virtual inline Status Get(const ReadOptions& options,
+                            ColumnFamilyHandle* column_family, const Slice& key,
+                            std::string* value, std::string* timestamp) {
+    assert(value != nullptr);
+    PinnableSlice pinnable_val(value);
+    assert(!pinnable_val.IsPinned());
+    auto s = Get(options, column_family, key, &pinnable_val, timestamp);
+    if (s.ok() && pinnable_val.IsPinned()) {
+      value->assign(pinnable_val.data(), pinnable_val.size());
+    }  // else value is already assigned
+    return s;
+  }
+  virtual Status Get(const ReadOptions& /*options*/,
+                     ColumnFamilyHandle* /*column_family*/,
+                     const Slice& /*key*/, PinnableSlice* /*value*/,
+                     std::string* /*timestamp*/) {
+    return Status::NotSupported(
+        "Get() that returns timestamp is not implemented.");
+  }
+  virtual Status Get(const ReadOptions& options, const Slice& key,
+                     std::string* value, std::string* timestamp) {
+    return Get(options, DefaultColumnFamily(), key, value, timestamp);
+  }
+
+  // If the column family specified by "column_family" contains an entry for
+  // "key", return it as a wide-column entity in "*columns". If the entry is a
+  // wide-column entity, return it as-is; if it is a plain key-value, return it
+  // as an entity with a single anonymous column (see kDefaultWideColumnName)
+  // which contains the value.
+  //
+  // Returns OK on success. Returns NotFound and an empty wide-column entity in
+  // "*columns" if there is no entry for "key". Returns some other non-OK status
+  // on error.
+  virtual Status GetEntity(const ReadOptions& /* options */,
+                           ColumnFamilyHandle* /* column_family */,
+                           const Slice& /* key */,
+                           PinnableWideColumns* /* columns */) {
+    return Status::NotSupported("GetEntity not supported");
+  }
+
+  // Populates the `merge_operands` array with all the merge operands in the DB
+  // for `key`. The `merge_operands` array will be populated in the order of
+  // insertion. The number of entries populated in `merge_operands` will be
+  // assigned to `*number_of_operands`.
+  //
+  // If the number of merge operands in DB for `key` is greater than
+  // `merge_operands_options.expected_max_number_of_operands`,
+  // `merge_operands` is not populated and the return value is
+  // `Status::Incomplete`. In that case, `*number_of_operands` will be assigned
+  // the number of merge operands found in the DB for `key`.
+  //
+  // `merge_operands`- Points to an array of at-least
+  //             merge_operands_options.expected_max_number_of_operands and the
+  //             caller is responsible for allocating it.
+  //
+  // The caller should delete or `Reset()` the `merge_operands` entries when
+  // they are no longer needed. All `merge_operands` entries must be destroyed
+  // or `Reset()` before this DB is closed or destroyed.
+  virtual Status GetMergeOperands(
+      const ReadOptions& options, ColumnFamilyHandle* column_family,
+      const Slice& key, PinnableSlice* merge_operands,
+      GetMergeOperandsOptions* get_merge_operands_options,
+      int* number_of_operands) = 0;
+
+  // Consistent Get of many keys across column families without the need
+  // for an explicit snapshot. NOTE: the implementation of this MultiGet API
+  // does not have the performance benefits of the void-returning MultiGet
+  // functions.
+  //
+  // If keys[i] does not exist in the database, then the i'th returned
+  // status will be one for which Status::IsNotFound() is true, and
+  // (*values)[i] will be set to some arbitrary value (often ""). Otherwise,
+  // the i'th returned status will have Status::ok() true, and (*values)[i]
+  // will store the value associated with keys[i].
+  //
+  // (*values) will always be resized to be the same size as (keys).
+  // Similarly, the number of returned statuses will be the number of keys.
+  // Note: keys will not be "de-duplicated". Duplicate keys will return
+  // duplicate values in order.
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_family,
+      const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
+  virtual std::vector<Status> MultiGet(const ReadOptions& options,
+                                       const std::vector<Slice>& keys,
+                                       std::vector<std::string>* values) {
+    return MultiGet(
+        options,
+        std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()),
+        keys, values);
+  }
+
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& /*options*/,
+      const std::vector<ColumnFamilyHandle*>& /*column_family*/,
+      const std::vector<Slice>& keys, std::vector<std::string>* /*values*/,
+      std::vector<std::string>* /*timestamps*/) {
+    return std::vector<Status>(
+        keys.size(), Status::NotSupported(
+                         "MultiGet() returning timestamps not implemented."));
+  }
+  virtual std::vector<Status> MultiGet(const ReadOptions& options,
+                                       const std::vector<Slice>& keys,
+                                       std::vector<std::string>* values,
+                                       std::vector<std::string>* timestamps) {
+    return MultiGet(
+        options,
+        std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()),
+        keys, values, timestamps);
+  }
+
+  // Overloaded MultiGet API that improves performance by batching operations
+  // in the read path for greater efficiency. Currently, only the block based
+  // table format with full filters are supported. Other table formats such
+  // as plain table, block based table with block based filters and
+  // partitioned indexes will still work, but will not get any performance
+  // benefits.
+  // Parameters -
+  // options - ReadOptions
+  // column_family - ColumnFamilyHandle* that the keys belong to. All the keys
+  //                 passed to the API are restricted to a single column family
+  // num_keys - Number of keys to lookup
+  // keys - Pointer to C style array of key Slices with num_keys elements
+  // values - Pointer to C style array of PinnableSlices with num_keys elements
+  // statuses - Pointer to C style array of Status with num_keys elements
+  // sorted_input - If true, it means the input keys are already sorted by key
+  //                order, so the MultiGet() API doesn't have to sort them
+  //                again. If false, the keys will be copied and sorted
+  //                internally by the API - the input array will not be
+  //                modified
+  virtual void MultiGet(const ReadOptions& options,
+                        ColumnFamilyHandle* column_family,
+                        const size_t num_keys, const Slice* keys,
+                        PinnableSlice* values, Status* statuses,
+                        const bool /*sorted_input*/ = false) {
+    std::vector<ColumnFamilyHandle*> cf;
+    std::vector<Slice> user_keys;
+    std::vector<Status> status;
+    std::vector<std::string> vals;
+
+    for (size_t i = 0; i < num_keys; ++i) {
+      cf.emplace_back(column_family);
+      user_keys.emplace_back(keys[i]);
+    }
+    status = MultiGet(options, cf, user_keys, &vals);
+    std::copy(status.begin(), status.end(), statuses);
+    for (auto& value : vals) {
+      values->PinSelf(value);
+      values++;
+    }
+  }
+
+  virtual void MultiGet(const ReadOptions& options,
+                        ColumnFamilyHandle* column_family,
+                        const size_t num_keys, const Slice* keys,
+                        PinnableSlice* values, std::string* timestamps,
+                        Status* statuses, const bool /*sorted_input*/ = false) {
+    std::vector<ColumnFamilyHandle*> cf;
+    std::vector<Slice> user_keys;
+    std::vector<Status> status;
+    std::vector<std::string> vals;
+    std::vector<std::string> tss;
+
+    for (size_t i = 0; i < num_keys; ++i) {
+      cf.emplace_back(column_family);
+      user_keys.emplace_back(keys[i]);
+    }
+    status = MultiGet(options, cf, user_keys, &vals, &tss);
+    std::copy(status.begin(), status.end(), statuses);
+    std::copy(tss.begin(), tss.end(), timestamps);
+    for (auto& value : vals) {
+      values->PinSelf(value);
+      values++;
+    }
+  }
+
+  // Overloaded MultiGet API that improves performance by batching operations
+  // in the read path for greater efficiency. Currently, only the block based
+  // table format with full filters are supported. Other table formats such
+  // as plain table, block based table with block based filters and
+  // partitioned indexes will still work, but will not get any performance
+  // benefits.
+  // Parameters -
+  // options - ReadOptions
+  // column_family - ColumnFamilyHandle* that the keys belong to. All the keys
+  //                 passed to the API are restricted to a single column family
+  // num_keys - Number of keys to lookup
+  // keys - Pointer to C style array of key Slices with num_keys elements
+  // values - Pointer to C style array of PinnableSlices with num_keys elements
+  // statuses - Pointer to C style array of Status with num_keys elements
+  // sorted_input - If true, it means the input keys are already sorted by key
+  //                order, so the MultiGet() API doesn't have to sort them
+  //                again. If false, the keys will be copied and sorted
+  //                internally by the API - the input array will not be
+  //                modified
+  virtual void MultiGet(const ReadOptions& options, const size_t num_keys,
+                        ColumnFamilyHandle** column_families, const Slice* keys,
+                        PinnableSlice* values, Status* statuses,
+                        const bool /*sorted_input*/ = false) {
+    std::vector<ColumnFamilyHandle*> cf;
+    std::vector<Slice> user_keys;
+    std::vector<Status> status;
+    std::vector<std::string> vals;
+
+    for (size_t i = 0; i < num_keys; ++i) {
+      cf.emplace_back(column_families[i]);
+      user_keys.emplace_back(keys[i]);
+    }
+    status = MultiGet(options, cf, user_keys, &vals);
+    std::copy(status.begin(), status.end(), statuses);
+    for (auto& value : vals) {
+      values->PinSelf(value);
+      values++;
+    }
+  }
+  virtual void MultiGet(const ReadOptions& options, const size_t num_keys,
+                        ColumnFamilyHandle** column_families, const Slice* keys,
+                        PinnableSlice* values, std::string* timestamps,
+                        Status* statuses, const bool /*sorted_input*/ = false) {
+    std::vector<ColumnFamilyHandle*> cf;
+    std::vector<Slice> user_keys;
+    std::vector<Status> status;
+    std::vector<std::string> vals;
+    std::vector<std::string> tss;
+
+    for (size_t i = 0; i < num_keys; ++i) {
+      cf.emplace_back(column_families[i]);
+      user_keys.emplace_back(keys[i]);
+    }
+    status = MultiGet(options, cf, user_keys, &vals, &tss);
+    std::copy(status.begin(), status.end(), statuses);
+    std::copy(tss.begin(), tss.end(), timestamps);
+    for (auto& value : vals) {
+      values->PinSelf(value);
+      values++;
+    }
+  }
+
+  // Batched MultiGet-like API that returns wide-column entities from a single
+  // column family. For any given "key[i]" in "keys" (where 0 <= "i" <
+  // "num_keys"), if the column family specified by "column_family" contains an
+  // entry, it is returned it as a wide-column entity in "results[i]". If the
+  // entry is a wide-column entity, it is returned as-is; if it is a plain
+  // key-value, it is returned as an entity with a single anonymous column (see
+  // kDefaultWideColumnName) which contains the value.
+  //
+  // "statuses[i]" is set to OK if "keys[i]" is successfully retrieved. It is
+  // set to NotFound and an empty wide-column entity is returned in "results[i]"
+  // if there is no entry for "keys[i]". Finally, "statuses[i]" is set to some
+  // other non-OK status on error.
+  //
+  // If "keys" are sorted according to the column family's comparator, the
+  // "sorted_input" flag can be set for a small performance improvement.
+  //
+  // Note that it is the caller's responsibility to ensure that "keys",
+  // "results", and "statuses" point to "num_keys" number of contiguous objects
+  // (Slices, PinnableWideColumns, and Statuses respectively).
+  virtual void MultiGetEntity(const ReadOptions& /* options */,
+                              ColumnFamilyHandle* /* column_family */,
+                              size_t num_keys, const Slice* /* keys */,
+                              PinnableWideColumns* /* results */,
+                              Status* statuses,
+                              bool /* sorted_input */ = false) {
+    for (size_t i = 0; i < num_keys; ++i) {
+      statuses[i] = Status::NotSupported("MultiGetEntity not supported");
+    }
+  }
+
+  // Batched MultiGet-like API that returns wide-column entities potentially
+  // from multiple column families. For any given "key[i]" in "keys" (where 0 <=
+  // "i" < "num_keys"), if the column family specified by "column_families[i]"
+  // contains an entry, it is returned it as a wide-column entity in
+  // "results[i]". If the entry is a wide-column entity, it is returned as-is;
+  // if it is a plain key-value, it is returned as an entity with a single
+  // anonymous column (see kDefaultWideColumnName) which contains the value.
+  //
+  // "statuses[i]" is set to OK if "keys[i]" is successfully retrieved. It is
+  // set to NotFound and an empty wide-column entity is returned in "results[i]"
+  // if there is no entry for "keys[i]". Finally, "statuses[i]" is set to some
+  // other non-OK status on error.
+  //
+  // If "keys" are sorted by column family id and within each column family,
+  // according to the column family's comparator, the "sorted_input" flag can be
+  // set for a small performance improvement.
+  //
+  // Note that it is the caller's responsibility to ensure that
+  // "column_families", "keys", "results", and "statuses" point to "num_keys"
+  // number of contiguous objects (ColumnFamilyHandle pointers, Slices,
+  // PinnableWideColumns, and Statuses respectively).
+  virtual void MultiGetEntity(const ReadOptions& /* options */, size_t num_keys,
+                              ColumnFamilyHandle** /* column_families */,
+                              const Slice* /* keys */,
+                              PinnableWideColumns* /* results */,
+                              Status* statuses,
+                              bool /* sorted_input */ = false) {
+    for (size_t i = 0; i < num_keys; ++i) {
+      statuses[i] = Status::NotSupported("MultiGetEntity not supported");
+    }
+  }
+
+  // If the key definitely does not exist in the database, then this method
+  // returns false, else true. If the caller wants to obtain value when the key
+  // is found in memory, a bool for 'value_found' must be passed. 'value_found'
+  // will be true on return if value has been set properly.
+  // This check is potentially lighter-weight than invoking DB::Get(). One way
+  // to make this lighter weight is to avoid doing any IOs.
+  // Default implementation here returns true and sets 'value_found' to false
+  virtual bool KeyMayExist(const ReadOptions& /*options*/,
+                           ColumnFamilyHandle* /*column_family*/,
+                           const Slice& /*key*/, std::string* /*value*/,
+                           std::string* /*timestamp*/,
+                           bool* value_found = nullptr) {
+    if (value_found != nullptr) {
+      *value_found = false;
+    }
+    return true;
+  }
+
+  virtual bool KeyMayExist(const ReadOptions& options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           std::string* value, bool* value_found = nullptr) {
+    return KeyMayExist(options, column_family, key, value,
+                       /*timestamp=*/nullptr, value_found);
+  }
+
+  virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
+                           std::string* value, bool* value_found = nullptr) {
+    return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found);
+  }
+
+  virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
+                           std::string* value, std::string* timestamp,
+                           bool* value_found = nullptr) {
+    return KeyMayExist(options, DefaultColumnFamily(), key, value, timestamp,
+                       value_found);
+  }
+
+  // Return a heap-allocated iterator over the contents of the database.
+  // The result of NewIterator() is initially invalid (caller must
+  // call one of the Seek methods on the iterator before using it).
+  //
+  // Caller should delete the iterator when it is no longer needed.
+  // The returned iterator should be deleted before this db is deleted.
+  virtual Iterator* NewIterator(const ReadOptions& options,
+                                ColumnFamilyHandle* column_family) = 0;
+  virtual Iterator* NewIterator(const ReadOptions& options) {
+    return NewIterator(options, DefaultColumnFamily());
+  }
+  // Returns iterators from a consistent database state across multiple
+  // column families. Iterators are heap allocated and need to be deleted
+  // before the db is deleted
+  virtual Status NewIterators(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_families,
+      std::vector<Iterator*>* iterators) = 0;
+
+  // Return a handle to the current DB state.  Iterators created with
+  // this handle will all observe a stable snapshot of the current DB
+  // state.  The caller must call ReleaseSnapshot(result) when the
+  // snapshot is no longer needed.
+  //
+  // nullptr will be returned if the DB fails to take a snapshot or does
+  // not support snapshot (eg: inplace_update_support enabled).
+  virtual const Snapshot* GetSnapshot() = 0;
+
+  // Release a previously acquired snapshot.  The caller must not
+  // use "snapshot" after this call.
+  virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0;
+
+  // Contains all valid property arguments for GetProperty() or
+  // GetMapProperty(). Each is a "string" property for retrieval with
+  // GetProperty() unless noted as a "map" property, for GetMapProperty().
+  //
+  // NOTE: Property names cannot end in numbers since those are interpreted as
+  //       arguments, e.g., see kNumFilesAtLevelPrefix.
+  struct Properties {
+    //  "rocksdb.num-files-at-level<N>" - returns string containing the number
+    //      of files at level <N>, where <N> is an ASCII representation of a
+    //      level number (e.g., "0").
+    static const std::string kNumFilesAtLevelPrefix;
+
+    //  "rocksdb.compression-ratio-at-level<N>" - returns string containing the
+    //      compression ratio of data at level <N>, where <N> is an ASCII
+    //      representation of a level number (e.g., "0"). Here, compression
+    //      ratio is defined as uncompressed data size / compressed file size.
+    //      Returns "-1.0" if no open files at level <N>.
+    static const std::string kCompressionRatioAtLevelPrefix;
+
+    //  "rocksdb.stats" - returns a multi-line string containing the data
+    //      described by kCFStats followed by the data described by kDBStats.
+    static const std::string kStats;
+
+    //  "rocksdb.sstables" - returns a multi-line string summarizing current
+    //      SST files.
+    static const std::string kSSTables;
+
+    //  "rocksdb.cfstats" - Raw data from "rocksdb.cfstats-no-file-histogram"
+    //      and "rocksdb.cf-file-histogram" as a "map" property.
+    static const std::string kCFStats;
+
+    //  "rocksdb.cfstats-no-file-histogram" - returns a multi-line string with
+    //      general column family stats per-level over db's lifetime ("L<n>"),
+    //      aggregated over db's lifetime ("Sum"), and aggregated over the
+    //      interval since the last retrieval ("Int").
+    static const std::string kCFStatsNoFileHistogram;
+
+    //  "rocksdb.cf-file-histogram" - print out how many file reads to every
+    //      level, as well as the histogram of latency of single requests.
+    static const std::string kCFFileHistogram;
+
+    // "rocksdb.cf-write-stall-stats" - returns a multi-line string or
+    //      map with statistics on CF-scope write stalls for a given CF
+    // See`WriteStallStatsMapKeys` for structured representation of keys
+    // available in the map form.
+    static const std::string kCFWriteStallStats;
+
+    // "rocksdb.db-write-stall-stats" - returns a multi-line string or
+    //      map with statistics on DB-scope write stalls
+    // See`WriteStallStatsMapKeys` for structured representation of keys
+    // available in the map form.
+    static const std::string kDBWriteStallStats;
+
+    //  "rocksdb.dbstats" - As a string property, returns a multi-line string
+    //      with general database stats, both cumulative (over the db's
+    //      lifetime) and interval (since the last retrieval of kDBStats).
+    //      As a map property, returns cumulative stats only and does not
+    //      update the baseline for the interval stats.
+    static const std::string kDBStats;
+
+    //  "rocksdb.levelstats" - returns multi-line string containing the number
+    //      of files per level and total size of each level (MB).
+    static const std::string kLevelStats;
+
+    //  "rocksdb.block-cache-entry-stats" - returns a multi-line string or
+    //      map with statistics on block cache usage. See
+    //      `BlockCacheEntryStatsMapKeys` for structured representation of keys
+    //      available in the map form.
+    static const std::string kBlockCacheEntryStats;
+
+    //  "rocksdb.fast-block-cache-entry-stats" - same as above, but returns
+    //      stale values more frequently to reduce overhead and latency.
+    static const std::string kFastBlockCacheEntryStats;
+
+    //  "rocksdb.num-immutable-mem-table" - returns number of immutable
+    //      memtables that have not yet been flushed.
+    static const std::string kNumImmutableMemTable;
+
+    //  "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable
+    //      memtables that have already been flushed.
+    static const std::string kNumImmutableMemTableFlushed;
+
+    //  "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is
+    //      pending; otherwise, returns 0.
+    static const std::string kMemTableFlushPending;
+
+    //  "rocksdb.num-running-flushes" - returns the number of currently running
+    //      flushes.
+    static const std::string kNumRunningFlushes;
+
+    //  "rocksdb.compaction-pending" - returns 1 if at least one compaction is
+    //      pending; otherwise, returns 0.
+    static const std::string kCompactionPending;
+
+    //  "rocksdb.num-running-compactions" - returns the number of currently
+    //      running compactions.
+    static const std::string kNumRunningCompactions;
+
+    //  "rocksdb.background-errors" - returns accumulated number of background
+    //      errors.
+    static const std::string kBackgroundErrors;
+
+    //  "rocksdb.cur-size-active-mem-table" - returns approximate size of active
+    //      memtable (bytes).
+    static const std::string kCurSizeActiveMemTable;
+
+    //  "rocksdb.cur-size-all-mem-tables" - returns approximate size of active
+    //      and unflushed immutable memtables (bytes).
+    static const std::string kCurSizeAllMemTables;
+
+    //  "rocksdb.size-all-mem-tables" - returns approximate size of active,
+    //      unflushed immutable, and pinned immutable memtables (bytes).
+    static const std::string kSizeAllMemTables;
+
+    //  "rocksdb.num-entries-active-mem-table" - returns total number of entries
+    //      in the active memtable.
+    static const std::string kNumEntriesActiveMemTable;
+
+    //  "rocksdb.num-entries-imm-mem-tables" - returns total number of entries
+    //      in the unflushed immutable memtables.
+    static const std::string kNumEntriesImmMemTables;
+
+    //  "rocksdb.num-deletes-active-mem-table" - returns total number of delete
+    //      entries in the active memtable.
+    static const std::string kNumDeletesActiveMemTable;
+
+    //  "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete
+    //      entries in the unflushed immutable memtables.
+    static const std::string kNumDeletesImmMemTables;
+
+    //  "rocksdb.estimate-num-keys" - returns estimated number of total keys in
+    //      the active and unflushed immutable memtables and storage.
+    static const std::string kEstimateNumKeys;
+
+    //  "rocksdb.estimate-table-readers-mem" - returns estimated memory used for
+    //      reading SST tables, excluding memory used in block cache (e.g.,
+    //      filter and index blocks).
+    static const std::string kEstimateTableReadersMem;
+
+    //  "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete
+    //      files is enabled; otherwise, returns a non-zero number.
+    //  This name may be misleading because true(non-zero) means disable,
+    //  but we keep the name for backward compatibility.
+    static const std::string kIsFileDeletionsEnabled;
+
+    //  "rocksdb.num-snapshots" - returns number of unreleased snapshots of the
+    //      database.
+    static const std::string kNumSnapshots;
+
+    //  "rocksdb.oldest-snapshot-time" - returns number representing unix
+    //      timestamp of oldest unreleased snapshot.
+    static const std::string kOldestSnapshotTime;
+
+    //  "rocksdb.oldest-snapshot-sequence" - returns number representing
+    //      sequence number of oldest unreleased snapshot.
+    static const std::string kOldestSnapshotSequence;
+
+    //  "rocksdb.num-live-versions" - returns number of live versions. `Version`
+    //      is an internal data structure. See version_set.h for details. More
+    //      live versions often mean more SST files are held from being deleted,
+    //      by iterators or unfinished compactions.
+    static const std::string kNumLiveVersions;
+
+    //  "rocksdb.current-super-version-number" - returns number of current LSM
+    //  version. It is a uint64_t integer number, incremented after there is
+    //  any change to the LSM tree. The number is not preserved after restarting
+    //  the DB. After DB restart, it will start from 0 again.
+    static const std::string kCurrentSuperVersionNumber;
+
+    //  "rocksdb.estimate-live-data-size" - returns an estimate of the amount of
+    //      live data in bytes. For BlobDB, it also includes the exact value of
+    //      live bytes in the blob files of the version.
+    static const std::string kEstimateLiveDataSize;
+
+    //  "rocksdb.min-log-number-to-keep" - return the minimum log number of the
+    //      log files that should be kept.
+    static const std::string kMinLogNumberToKeep;
+
+    //  "rocksdb.min-obsolete-sst-number-to-keep" - return the minimum file
+    //      number for an obsolete SST to be kept. The max value of `uint64_t`
+    //      will be returned if all obsolete files can be deleted.
+    static const std::string kMinObsoleteSstNumberToKeep;
+
+    //  "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST
+    //      files.
+    //  WARNING: may slow down online queries if there are too many files.
+    static const std::string kTotalSstFilesSize;
+
+    //  "rocksdb.live-sst-files-size" - returns total size (bytes) of all SST
+    //      files belong to the latest LSM tree.
+    static const std::string kLiveSstFilesSize;
+
+    // "rocksdb.live_sst_files_size_at_temperature" - returns total size (bytes)
+    //      of SST files at all certain file temperature
+    static const std::string kLiveSstFilesSizeAtTemperature;
+
+    //  "rocksdb.base-level" - returns number of level to which L0 data will be
+    //      compacted.
+    static const std::string kBaseLevel;
+
+    //  "rocksdb.estimate-pending-compaction-bytes" - returns estimated total
+    //      number of bytes compaction needs to rewrite to get all levels down
+    //      to under target size. Not valid for other compactions than level-
+    //      based.
+    static const std::string kEstimatePendingCompactionBytes;
+
+    //  "rocksdb.aggregated-table-properties" - returns a string or map
+    //      representation of the aggregated table properties of the target
+    //      column family. Only properties that make sense for aggregation
+    //      are included.
+    static const std::string kAggregatedTableProperties;
+
+    //  "rocksdb.aggregated-table-properties-at-level<N>", same as the previous
+    //      one but only returns the aggregated table properties of the
+    //      specified level "N" at the target column family.
+    static const std::string kAggregatedTablePropertiesAtLevel;
+
+    //  "rocksdb.actual-delayed-write-rate" - returns the current actual delayed
+    //      write rate. 0 means no delay.
+    static const std::string kActualDelayedWriteRate;
+
+    //  "rocksdb.is-write-stopped" - Return 1 if write has been stopped.
+    static const std::string kIsWriteStopped;
+
+    //  "rocksdb.estimate-oldest-key-time" - returns an estimation of
+    //      oldest key timestamp in the DB. Currently only available for
+    //      FIFO compaction with
+    //      compaction_options_fifo.allow_compaction = false.
+    static const std::string kEstimateOldestKeyTime;
+
+    //  "rocksdb.block-cache-capacity" - returns block cache capacity.
+    static const std::string kBlockCacheCapacity;
+
+    //  "rocksdb.block-cache-usage" - returns the memory size for the entries
+    //      residing in block cache.
+    static const std::string kBlockCacheUsage;
+
+    // "rocksdb.block-cache-pinned-usage" - returns the memory size for the
+    //      entries being pinned.
+    static const std::string kBlockCachePinnedUsage;
+
+    // "rocksdb.options-statistics" - returns multi-line string
+    //      of options.statistics
+    static const std::string kOptionsStatistics;
+
+    // "rocksdb.num-blob-files" - returns number of blob files in the current
+    //      version.
+    static const std::string kNumBlobFiles;
+
+    // "rocksdb.blob-stats" - return the total number and size of all blob
+    //      files, and total amount of garbage (bytes) in the blob files in
+    //      the current version.
+    static const std::string kBlobStats;
+
+    // "rocksdb.total-blob-file-size" - returns the total size of all blob
+    //      files over all versions.
+    static const std::string kTotalBlobFileSize;
+
+    // "rocksdb.live-blob-file-size" - returns the total size of all blob
+    //      files in the current version.
+    static const std::string kLiveBlobFileSize;
+
+    // "rocksdb.live-blob-file-garbage-size" - returns the total amount of
+    // garbage in the blob files in the current version.
+    static const std::string kLiveBlobFileGarbageSize;
+
+    //  "rocksdb.blob-cache-capacity" - returns blob cache capacity.
+    static const std::string kBlobCacheCapacity;
+
+    //  "rocksdb.blob-cache-usage" - returns the memory size for the entries
+    //      residing in blob cache.
+    static const std::string kBlobCacheUsage;
+
+    // "rocksdb.blob-cache-pinned-usage" - returns the memory size for the
+    //      entries being pinned in blob cache.
+    static const std::string kBlobCachePinnedUsage;
+  };
+
+  // DB implementations export properties about their state via this method.
+  // If "property" is a valid "string" property understood by this DB
+  // implementation (see Properties struct above for valid options), fills
+  // "*value" with its current value and returns true.  Otherwise, returns
+  // false.
+  virtual bool GetProperty(ColumnFamilyHandle* column_family,
+                           const Slice& property, std::string* value) = 0;
+  virtual bool GetProperty(const Slice& property, std::string* value) {
+    return GetProperty(DefaultColumnFamily(), property, value);
+  }
+
+  // Like GetProperty but for valid "map" properties. (Some properties can be
+  // accessed as either "string" properties or "map" properties.)
+  virtual bool GetMapProperty(ColumnFamilyHandle* column_family,
+                              const Slice& property,
+                              std::map<std::string, std::string>* value) = 0;
+  virtual bool GetMapProperty(const Slice& property,
+                              std::map<std::string, std::string>* value) {
+    return GetMapProperty(DefaultColumnFamily(), property, value);
+  }
+
+  // Similar to GetProperty(), but only works for a subset of properties whose
+  // return value is an integer. Return the value by integer. Supported
+  // properties:
+  //  "rocksdb.num-immutable-mem-table"
+  //  "rocksdb.mem-table-flush-pending"
+  //  "rocksdb.compaction-pending"
+  //  "rocksdb.background-errors"
+  //  "rocksdb.cur-size-active-mem-table"
+  //  "rocksdb.cur-size-all-mem-tables"
+  //  "rocksdb.size-all-mem-tables"
+  //  "rocksdb.num-entries-active-mem-table"
+  //  "rocksdb.num-entries-imm-mem-tables"
+  //  "rocksdb.num-deletes-active-mem-table"
+  //  "rocksdb.num-deletes-imm-mem-tables"
+  //  "rocksdb.estimate-num-keys"
+  //  "rocksdb.estimate-table-readers-mem"
+  //  "rocksdb.is-file-deletions-enabled"
+  //  "rocksdb.num-snapshots"
+  //  "rocksdb.oldest-snapshot-time"
+  //  "rocksdb.num-live-versions"
+  //  "rocksdb.current-super-version-number"
+  //  "rocksdb.estimate-live-data-size"
+  //  "rocksdb.min-log-number-to-keep"
+  //  "rocksdb.min-obsolete-sst-number-to-keep"
+  //  "rocksdb.total-sst-files-size"
+  //  "rocksdb.live-sst-files-size"
+  //  "rocksdb.base-level"
+  //  "rocksdb.estimate-pending-compaction-bytes"
+  //  "rocksdb.num-running-compactions"
+  //  "rocksdb.num-running-flushes"
+  //  "rocksdb.actual-delayed-write-rate"
+  //  "rocksdb.is-write-stopped"
+  //  "rocksdb.estimate-oldest-key-time"
+  //  "rocksdb.block-cache-capacity"
+  //  "rocksdb.block-cache-usage"
+  //  "rocksdb.block-cache-pinned-usage"
+  //
+  //  Properties dedicated for BlobDB:
+  //  "rocksdb.num-blob-files"
+  //  "rocksdb.total-blob-file-size"
+  //  "rocksdb.live-blob-file-size"
+  //  "rocksdb.blob-cache-capacity"
+  //  "rocksdb.blob-cache-usage"
+  //  "rocksdb.blob-cache-pinned-usage"
+  virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
+                              const Slice& property, uint64_t* value) = 0;
+  virtual bool GetIntProperty(const Slice& property, uint64_t* value) {
+    return GetIntProperty(DefaultColumnFamily(), property, value);
+  }
+
+  // Reset internal stats for DB and all column families.
+  // Note this doesn't reset options.statistics as it is not owned by
+  // DB.
+  virtual Status ResetStats() {
+    return Status::NotSupported("Not implemented");
+  }
+
+  // Same as GetIntProperty(), but this one returns the aggregated int
+  // property from all column families.
+  virtual bool GetAggregatedIntProperty(const Slice& property,
+                                        uint64_t* value) = 0;
+
+  // Flags for DB::GetSizeApproximation that specify whether memtable
+  // stats should be included, or file stats approximation or both
+  enum class SizeApproximationFlags : uint8_t {
+    NONE = 0,
+    INCLUDE_MEMTABLES = 1 << 0,
+    INCLUDE_FILES = 1 << 1
+  };
+
+  // For each i in [0,n-1], store in "sizes[i]", the approximate
+  // file system space used by keys in "[range[i].start .. range[i].limit)"
+  // in a single column family.
+  //
+  // Note that the returned sizes measure file system space usage, so
+  // if the user data compresses by a factor of ten, the returned
+  // sizes will be one-tenth the size of the corresponding user data size.
+  virtual Status GetApproximateSizes(const SizeApproximationOptions& options,
+                                     ColumnFamilyHandle* column_family,
+                                     const Range* ranges, int n,
+                                     uint64_t* sizes) = 0;
+
+  // Simpler versions of the GetApproximateSizes() method above.
+  // The include_flags argument must of type DB::SizeApproximationFlags
+  // and can not be NONE.
+  virtual Status GetApproximateSizes(ColumnFamilyHandle* column_family,
+                                     const Range* ranges, int n,
+                                     uint64_t* sizes,
+                                     SizeApproximationFlags include_flags =
+                                         SizeApproximationFlags::INCLUDE_FILES);
+
+  virtual Status GetApproximateSizes(
+      const Range* ranges, int n, uint64_t* sizes,
+      SizeApproximationFlags include_flags =
+          SizeApproximationFlags::INCLUDE_FILES) {
+    return GetApproximateSizes(DefaultColumnFamily(), ranges, n, sizes,
+                               include_flags);
+  }
+
+  // The method is similar to GetApproximateSizes, except it
+  // returns approximate number of records in memtables.
+  virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
+                                           const Range& range,
+                                           uint64_t* const count,
+                                           uint64_t* const size) = 0;
+  virtual void GetApproximateMemTableStats(const Range& range,
+                                           uint64_t* const count,
+                                           uint64_t* const size) {
+    GetApproximateMemTableStats(DefaultColumnFamily(), range, count, size);
+  }
+
+  // Compact the underlying storage for the key range [*begin,*end].
+  // The actual compaction interval might be superset of [*begin, *end].
+  // In particular, deleted and overwritten versions are discarded,
+  // and the data is rearranged to reduce the cost of operations
+  // needed to access the data.  This operation should typically only
+  // be invoked by users who understand the underlying implementation.
+  // This call blocks until the operation completes successfully, fails,
+  // or is aborted (Status::Incomplete). See DisableManualCompaction.
+  //
+  // begin==nullptr is treated as a key before all keys in the database.
+  // end==nullptr is treated as a key after all keys in the database.
+  // Therefore the following call will compact the entire database:
+  //    db->CompactRange(options, nullptr, nullptr);
+  // Note that after the entire database is compacted, all data are pushed
+  // down to the last level containing any data. If the total data size after
+  // compaction is reduced, that level might not be appropriate for hosting all
+  // the files. In this case, client could set options.change_level to true, to
+  // move the files back to the minimum level capable of holding the data set
+  // or a given level (specified by non-negative options.target_level).
+  virtual Status CompactRange(const CompactRangeOptions& options,
+                              ColumnFamilyHandle* column_family,
+                              const Slice* begin, const Slice* end) = 0;
+  virtual Status CompactRange(const CompactRangeOptions& options,
+                              const Slice* begin, const Slice* end) {
+    return CompactRange(options, DefaultColumnFamily(), begin, end);
+  }
+
+  // Dynamically change column family options or table factory options in a
+  // running DB, for the specified column family. Only options internally
+  // marked as "mutable" can be changed. Options not listed in `opts_map` will
+  // keep their current values. See GetColumnFamilyOptionsFromMap() in
+  // convenience.h for the details of `opts_map`. Not supported in LITE mode.
+  //
+  // USABILITY NOTE: SetOptions is intended only for expert users, and does
+  // not apply the same sanitization to options as the standard DB::Open code
+  // path does. Use with caution.
+  //
+  // RELIABILITY & PERFORMANCE NOTE: SetOptions is not fully stress-tested for
+  // reliability, and this is a slow call because a new OPTIONS file is
+  // serialized and persisted for each call. Use only infrequently.
+  //
+  // EXAMPLES:
+  //  s = db->SetOptions(cfh, {{"ttl", "36000"}});
+  //  s = db->SetOptions(cfh, {{"block_based_table_factory",
+  //                            "{prepopulate_block_cache=kDisable;}"}});
+  virtual Status SetOptions(
+      ColumnFamilyHandle* /*column_family*/,
+      const std::unordered_map<std::string, std::string>& /*opts_map*/) {
+    return Status::NotSupported("Not implemented");
+  }
+  // Shortcut for SetOptions on the default column family handle.
+  virtual Status SetOptions(
+      const std::unordered_map<std::string, std::string>& new_options) {
+    return SetOptions(DefaultColumnFamily(), new_options);
+  }
+
+  // Like SetOptions but for DBOptions, including the same caveats for
+  // usability, reliability, and performance. See GetDBOptionsFromMap() (and
+  // GetColumnFamilyOptionsFromMap()) in convenience.h for details on
+  // `opts_map`. Note supported in LITE mode.
+  //
+  // EXAMPLES:
+  //  s = db->SetDBOptions({{"max_subcompactions", "2"}});
+  //  s = db->SetDBOptions({{"stats_dump_period_sec", "0"},
+  //                        {"stats_persist_period_sec", "0"}});
+  virtual Status SetDBOptions(
+      const std::unordered_map<std::string, std::string>& new_options) = 0;
+
+  // CompactFiles() inputs a list of files specified by file numbers and
+  // compacts them to the specified level. A small difference compared to
+  // CompactRange() is that CompactFiles() performs the compaction job
+  // using the CURRENT thread, so is not considered a "background" job.
+  //
+  // @see GetDataBaseMetaData
+  // @see GetColumnFamilyMetaData
+  virtual Status CompactFiles(
+      const CompactionOptions& compact_options,
+      ColumnFamilyHandle* column_family,
+      const std::vector<std::string>& input_file_names, const int output_level,
+      const int output_path_id = -1,
+      std::vector<std::string>* const output_file_names = nullptr,
+      CompactionJobInfo* compaction_job_info = nullptr) = 0;
+
+  virtual Status CompactFiles(
+      const CompactionOptions& compact_options,
+      const std::vector<std::string>& input_file_names, const int output_level,
+      const int output_path_id = -1,
+      std::vector<std::string>* const output_file_names = nullptr,
+      CompactionJobInfo* compaction_job_info = nullptr) {
+    return CompactFiles(compact_options, DefaultColumnFamily(),
+                        input_file_names, output_level, output_path_id,
+                        output_file_names, compaction_job_info);
+  }
+
+  // This function will wait until all currently running background processes
+  // finish. After it returns, no background process will be run until
+  // ContinueBackgroundWork is called, once for each preceding OK-returning
+  // call to PauseBackgroundWork.
+  virtual Status PauseBackgroundWork() = 0;
+  virtual Status ContinueBackgroundWork() = 0;
+
+  // This function will enable automatic compactions for the given column
+  // families if they were previously disabled. The function will first set the
+  // disable_auto_compactions option for each column family to 'false', after
+  // which it will schedule a flush/compaction.
+  //
+  // NOTE: Setting disable_auto_compactions to 'false' through SetOptions() API
+  // does NOT schedule a flush/compaction afterwards, and only changes the
+  // parameter itself within the column family option.
+  //
+  virtual Status EnableAutoCompaction(
+      const std::vector<ColumnFamilyHandle*>& column_family_handles) = 0;
+
+  // After this function call, CompactRange() or CompactFiles() will not
+  // run compactions and fail. Calling this function will tell outstanding
+  // manual compactions to abort and will wait for them to finish or abort
+  // before returning.
+  virtual void DisableManualCompaction() = 0;
+  // Re-enable CompactRange() and ComapctFiles() that are disabled by
+  // DisableManualCompaction(). This function must be called as many times
+  // as DisableManualCompaction() has been called in order to re-enable
+  // manual compactions, and must not be called more times than
+  // DisableManualCompaction() has been called.
+  virtual void EnableManualCompaction() = 0;
+
+  // Number of levels used for this DB.
+  virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0;
+  virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); }
+
+  // Maximum level to which a new compacted memtable is pushed if it
+  // does not create overlap.
+  virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0;
+  virtual int MaxMemCompactionLevel() {
+    return MaxMemCompactionLevel(DefaultColumnFamily());
+  }
+
+  // Number of files in level-0 that would stop writes.
+  virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0;
+  virtual int Level0StopWriteTrigger() {
+    return Level0StopWriteTrigger(DefaultColumnFamily());
+  }
+
+  // Get DB name -- the exact same name that was provided as an argument to
+  // DB::Open()
+  virtual const std::string& GetName() const = 0;
+
+  // Get Env object from the DB
+  virtual Env* GetEnv() const = 0;
+
+  // A shortcut for GetEnv()->->GetFileSystem().get(), possibly cached for
+  // efficiency.
+  virtual FileSystem* GetFileSystem() const;
+
+  // Get DB Options that we use.  During the process of opening the
+  // column family, the options provided when calling DB::Open() or
+  // DB::CreateColumnFamily() will have been "sanitized" and transformed
+  // in an implementation-defined manner.
+  virtual Options GetOptions(ColumnFamilyHandle* column_family) const = 0;
+  virtual Options GetOptions() const {
+    return GetOptions(DefaultColumnFamily());
+  }
+
+  virtual DBOptions GetDBOptions() const = 0;
+
+  // Flush all memtable data.
+  // Flush a single column family, even when atomic flush is enabled. To flush
+  // multiple column families, use Flush(options, column_families).
+  virtual Status Flush(const FlushOptions& options,
+                       ColumnFamilyHandle* column_family) = 0;
+  virtual Status Flush(const FlushOptions& options) {
+    return Flush(options, DefaultColumnFamily());
+  }
+  // Flushes memtables of multiple column families.
+  // If atomic flush is not enabled, Flush(options, column_families) is
+  // equivalent to calling Flush(options, column_family) multiple times.
+  // If atomic flush is enabled, Flush(options, column_families) will flush all
+  // column families specified in 'column_families' up to the latest sequence
+  // number at the time when flush is requested.
+  // Note that RocksDB 5.15 and earlier may not be able to open later versions
+  // with atomic flush enabled.
+  virtual Status Flush(
+      const FlushOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_families) = 0;
+
+  // When using the manual_wal_flush option, flushes RocksDB internal buffers
+  // of WAL data to the file, so that the data can survive process crash or be
+  // included in a Checkpoint or Backup. Without manual_wal_flush, there is no
+  // such internal buffer. If sync is true, it calls SyncWAL() afterwards.
+  virtual Status FlushWAL(bool /*sync*/) {
+    return Status::NotSupported("FlushWAL not implemented");
+  }
+
+  // Ensure all WAL writes have been synced to storage, so that (assuming OS
+  // and hardware support) data will survive power loss. This function does
+  // not imply FlushWAL, so `FlushWAL(true)` is recommended if using
+  // manual_wal_flush=true. Currently only works if allow_mmap_writes = false
+  // in Options.
+  //
+  // Note that Write() followed by SyncWAL() is not exactly the same as Write()
+  // with sync=true: in the latter case the changes won't be visible until the
+  // sync is done.
+  virtual Status SyncWAL() = 0;
+
+  // Freezes the logical state of the DB (by stopping writes), and if WAL is
+  // enabled, ensures that state has been flushed to DB files (as in
+  // FlushWAL()). This can be used for taking a Checkpoint at a known DB
+  // state, though the user must use options to insure no DB flush is invoked
+  // in this frozen state. Other operations allowed on a "read only" DB should
+  // work while frozen. Each LockWAL() call that returns OK must eventually be
+  // followed by a corresponding call to UnlockWAL(). Where supported, non-OK
+  // status is generally only possible with some kind of corruption or I/O
+  // error.
+  virtual Status LockWAL() {
+    return Status::NotSupported("LockWAL not implemented");
+  }
+
+  // Unfreeze the DB state from a successful LockWAL().
+  // The write stop on the database will be cleared when UnlockWAL() have been
+  // called for each successful LockWAL().
+  virtual Status UnlockWAL() {
+    return Status::NotSupported("UnlockWAL not implemented");
+  }
+
+  // The sequence number of the most recent transaction.
+  virtual SequenceNumber GetLatestSequenceNumber() const = 0;
+
+  // Prevent file deletions. Compactions will continue to occur,
+  // but no obsolete files will be deleted. Calling this multiple
+  // times have the same effect as calling it once.
+  virtual Status DisableFileDeletions() = 0;
+
+  // Increase the full_history_ts of column family. The new ts_low value should
+  // be newer than current full_history_ts value.
+  // If another thread updates full_history_ts_low concurrently to a higher
+  // timestamp than the requested ts_low, a try again error will be returned.
+  virtual Status IncreaseFullHistoryTsLow(ColumnFamilyHandle* column_family,
+                                          std::string ts_low) = 0;
+
+  // Get current full_history_ts value.
+  virtual Status GetFullHistoryTsLow(ColumnFamilyHandle* column_family,
+                                     std::string* ts_low) = 0;
+
+  // Allow compactions to delete obsolete files.
+  // If force == true, the call to EnableFileDeletions() will guarantee that
+  // file deletions are enabled after the call, even if DisableFileDeletions()
+  // was called multiple times before.
+  // If force == false, EnableFileDeletions will only enable file deletion
+  // after it's been called at least as many times as DisableFileDeletions(),
+  // enabling the two methods to be called by two threads concurrently without
+  // synchronization -- i.e., file deletions will be enabled only after both
+  // threads call EnableFileDeletions()
+  virtual Status EnableFileDeletions(bool force = true) = 0;
+
+  // Retrieves the creation time of the oldest file in the DB.
+  // This API only works if max_open_files = -1, if it is not then
+  // Status returned is Status::NotSupported()
+  // The file creation time is set using the env provided to the DB.
+  // If the DB was created from a very old release then its possible that
+  // the SST files might not have file_creation_time property and even after
+  // moving to a newer release its possible that some files never got compacted
+  // and may not have file_creation_time property. In both the cases
+  // file_creation_time is considered 0 which means this API will return
+  // creation_time = 0 as there wouldn't be a timestamp lower than 0.
+  virtual Status GetCreationTimeOfOldestFile(uint64_t* creation_time) = 0;
+
+  // Note: this API is not yet consistent with WritePrepared transactions.
+  //
+  // Sets iter to an iterator that is positioned at a write-batch whose
+  // sequence number range [start_seq, end_seq] covers seq_number. If no such
+  // write-batch exists, then iter is positioned at the next write-batch whose
+  // start_seq > seq_number.
+  //
+  // Returns Status::OK if iterator is valid
+  // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to
+  // use this api, else the WAL files will get
+  // cleared aggressively and the iterator might keep getting invalid before
+  // an update is read.
+  virtual Status GetUpdatesSince(
+      SequenceNumber seq_number, std::unique_ptr<TransactionLogIterator>* iter,
+      const TransactionLogIterator::ReadOptions& read_options =
+          TransactionLogIterator::ReadOptions()) = 0;
+
+// Windows API macro interference
+#undef DeleteFile
+  // WARNING: This API is planned for removal in RocksDB 7.0 since it does not
+  // operate at the proper level of abstraction for a key-value store, and its
+  // contract/restrictions are poorly documented. For example, it returns non-OK
+  // `Status` for non-bottommost files and files undergoing compaction. Since we
+  // do not plan to maintain it, the contract will likely remain underspecified
+  // until its removal. Any user is encouraged to read the implementation
+  // carefully and migrate away from it when possible.
+  //
+  // Delete the file name from the db directory and update the internal state to
+  // reflect that. Supports deletion of sst and log files only. 'name' must be
+  // path relative to the db directory. eg. 000001.sst, /archive/000003.log
+  virtual Status DeleteFile(std::string name) = 0;
+
+  // Obtains a list of all live table (SST) files and how they fit into the
+  // LSM-trees, such as column family, level, key range, etc.
+  // This builds a de-normalized form of GetAllColumnFamilyMetaData().
+  // For information about all files in a DB, use GetLiveFilesStorageInfo().
+  virtual void GetLiveFilesMetaData(
+      std::vector<LiveFileMetaData>* /*metadata*/) {}
+
+  // Return a list of all table (SST) and blob files checksum info.
+  // Note: This function might be of limited use because it cannot be
+  // synchronized with other "live files" APIs. GetLiveFilesStorageInfo()
+  // is recommended instead.
+  virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0;
+
+  // Get information about all live files that make up a DB, for making
+  // live copies (Checkpoint, backups, etc.) or other storage-related purposes.
+  // If creating a live copy, use DisableFileDeletions() before and
+  // EnableFileDeletions() after to prevent deletions.
+  // For LSM-tree metadata, use Get*MetaData() functions instead.
+  virtual Status GetLiveFilesStorageInfo(
+      const LiveFilesStorageInfoOptions& opts,
+      std::vector<LiveFileStorageInfo>* files) = 0;
+
+  // Obtains the LSM-tree meta data of the specified column family of the DB,
+  // including metadata for each live table (SST) file in that column family.
+  virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
+                                       ColumnFamilyMetaData* /*metadata*/) {}
+
+  // Get the metadata of the default column family.
+  void GetColumnFamilyMetaData(ColumnFamilyMetaData* metadata) {
+    GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
+  }
+
+  // Obtains the LSM-tree meta data of all column families of the DB, including
+  // metadata for each live table (SST) file and each blob file in the DB.
+  virtual void GetAllColumnFamilyMetaData(
+      std::vector<ColumnFamilyMetaData>* /*metadata*/) {}
+
+  // Retrieve the list of all files in the database except WAL files. The files
+  // are relative to the dbname (or db_paths/cf_paths), not absolute paths.
+  // (Not recommended with db_paths/cf_paths because that information is not
+  // returned.) Despite being relative paths, the file names begin with "/".
+  // The valid size of the manifest file is returned in manifest_file_size.
+  // The manifest file is an ever growing file, but only the portion specified
+  // by manifest_file_size is valid for this snapshot. Setting flush_memtable
+  // to true does Flush before recording the live files (unless DB is
+  // read-only). Setting flush_memtable to false is useful when we don't want
+  // to wait for flush which may have to wait for compaction to complete
+  // taking an indeterminate time.
+  //
+  // NOTE: Although GetLiveFiles() followed by GetSortedWalFiles() can generate
+  // a lossless backup, GetLiveFilesStorageInfo() is strongly recommended
+  // instead, because it ensures a single consistent view of all files is
+  // captured in one call.
+  virtual Status GetLiveFiles(std::vector<std::string>&,
+                              uint64_t* manifest_file_size,
+                              bool flush_memtable = true) = 0;
+
+  // Retrieve the sorted list of all wal files with earliest file first
+  virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
+
+  // Retrieve information about the current wal file
+  //
+  // Note that the log might have rolled after this call in which case
+  // the current_log_file would not point to the current log file.
+  //
+  // Additionally, for the sake of optimization current_log_file->StartSequence
+  // would always be set to 0
+  virtual Status GetCurrentWalFile(
+      std::unique_ptr<LogFile>* current_log_file) = 0;
+
+  // IngestExternalFile() will load a list of external SST files (1) into the DB
+  // Two primary modes are supported:
+  // - Duplicate keys in the new files will overwrite exiting keys (default)
+  // - Duplicate keys will be skipped (set ingest_behind=true)
+  // In the first mode we will try to find the lowest possible level that
+  // the file can fit in, and ingest the file into this level (2). A file that
+  // have a key range that overlap with the memtable key range will require us
+  // to Flush the memtable first before ingesting the file.
+  // In the second mode we will always ingest in the bottom most level (see
+  // docs to IngestExternalFileOptions::ingest_behind).
+  //
+  // (1) External SST files can be created using SstFileWriter
+  // (2) We will try to ingest the files to the lowest possible level
+  //     even if the file compression doesn't match the level compression
+  // (3) If IngestExternalFileOptions->ingest_behind is set to true,
+  //     we always ingest at the bottommost level, which should be reserved
+  //     for this purpose (see DBOPtions::allow_ingest_behind flag).
+  // (4) If IngestExternalFileOptions->fail_if_not_bottommost_level is set to
+  //     true, then this method can return Status:TryAgain() indicating that
+  //     the files cannot be ingested to the bottommost level, and it is the
+  //     user's responsibility to clear the bottommost level in the overlapping
+  //     range before re-attempting the ingestion.
+  virtual Status IngestExternalFile(
+      ColumnFamilyHandle* column_family,
+      const std::vector<std::string>& external_files,
+      const IngestExternalFileOptions& options) = 0;
+
+  virtual Status IngestExternalFile(
+      const std::vector<std::string>& external_files,
+      const IngestExternalFileOptions& options) {
+    return IngestExternalFile(DefaultColumnFamily(), external_files, options);
+  }
+
+  // IngestExternalFiles() will ingest files for multiple column families, and
+  // record the result atomically to the MANIFEST.
+  // If this function returns OK, all column families' ingestion must succeed.
+  // If this function returns NOK, or the process crashes, then non-of the
+  // files will be ingested into the database after recovery.
+  // Note that it is possible for application to observe a mixed state during
+  // the execution of this function. If the user performs range scan over the
+  // column families with iterators, iterator on one column family may return
+  // ingested data, while iterator on other column family returns old data.
+  // Users can use snapshot for a consistent view of data.
+  // If your db ingests multiple SST files using this API, i.e. args.size()
+  // > 1, then RocksDB 5.15 and earlier will not be able to open it.
+  //
+  // REQUIRES: each arg corresponds to a different column family: namely, for
+  // 0 <= i < j < len(args), args[i].column_family != args[j].column_family.
+  virtual Status IngestExternalFiles(
+      const std::vector<IngestExternalFileArg>& args) = 0;
+
+  // CreateColumnFamilyWithImport() will create a new column family with
+  // column_family_name and import external SST files specified in `metadata`
+  // into this column family.
+  // (1) External SST files can be created using SstFileWriter.
+  // (2) External SST files can be exported from a particular column family in
+  //     an existing DB using Checkpoint::ExportColumnFamily. `metadata` should
+  //     be the output from Checkpoint::ExportColumnFamily.
+  // Option in import_options specifies whether the external files are copied or
+  // moved (default is copy). When option specifies copy, managing files at
+  // external_file_path is caller's responsibility. When option specifies a
+  // move, the call makes a best effort to delete the specified files at
+  // external_file_path on successful return, logging any failure to delete
+  // rather than returning in Status. Files are not modified on any error
+  // return, and a best effort is made to remove any newly-created files.
+  // On error return, column family handle returned will be nullptr.
+  // ColumnFamily will be present on successful return and will not be present
+  // on error return. ColumnFamily may be present on any crash during this call.
+  virtual Status CreateColumnFamilyWithImport(
+      const ColumnFamilyOptions& options, const std::string& column_family_name,
+      const ImportColumnFamilyOptions& import_options,
+      const ExportImportFilesMetaData& metadata,
+      ColumnFamilyHandle** handle) = 0;
+
+  // EXPERIMENTAL
+  // ClipColumnFamily() will clip the entries in the CF according to the range
+  // [begin_key,
+  // end_key).
+  // Returns OK on success, and a non-OK status on error.
+  // Any entries outside this range will be completely deleted (including
+  // tombstones).
+  // The main difference between ClipColumnFamily(begin, end) and
+  // DeleteRange(begin, end)
+  // is that the former physically deletes all keys outside the range, but is
+  // more heavyweight than the latter.
+  // This feature is mainly used to ensure that there is no overlapping Key when
+  // calling
+  // CreateColumnFamilyWithImports() to import multiple CFs.
+  // Note that: concurrent updates cannot be performed during Clip.
+  virtual Status ClipColumnFamily(ColumnFamilyHandle* column_family,
+                                  const Slice& begin_key,
+                                  const Slice& end_key) = 0;
+
+  // Verify the checksums of files in db. Currently the whole-file checksum of
+  // table files are checked.
+  virtual Status VerifyFileChecksums(const ReadOptions& /*read_options*/) {
+    return Status::NotSupported("File verification not supported");
+  }
+
+  // Verify the block checksums of files in db. The block checksums of table
+  // files are checked.
+  virtual Status VerifyChecksum(const ReadOptions& read_options) = 0;
+
+  virtual Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); }
+
+
+  // Returns the unique ID which is read from IDENTITY file during the opening
+  // of database by setting in the identity variable
+  // Returns Status::OK if identity could be set properly
+  virtual Status GetDbIdentity(std::string& identity) const = 0;
+
+  // Return a unique identifier for each DB object that is opened
+  // This DB session ID should be unique among all open DB instances on all
+  // hosts, and should be unique among re-openings of the same or other DBs.
+  // (Two open DBs have the same identity from other function GetDbIdentity when
+  // one is physically copied from the other.)
+  virtual Status GetDbSessionId(std::string& session_id) const = 0;
+
+  // Returns default column family handle
+  virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0;
+
+
+  virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
+                                          TablePropertiesCollection* props) = 0;
+  virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
+    return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
+  }
+  virtual Status GetPropertiesOfTablesInRange(
+      ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
+      TablePropertiesCollection* props) = 0;
+
+  virtual Status SuggestCompactRange(ColumnFamilyHandle* /*column_family*/,
+                                     const Slice* /*begin*/,
+                                     const Slice* /*end*/) {
+    return Status::NotSupported("SuggestCompactRange() is not implemented.");
+  }
+
+  virtual Status PromoteL0(ColumnFamilyHandle* /*column_family*/,
+                           int /*target_level*/) {
+    return Status::NotSupported("PromoteL0() is not implemented.");
+  }
+
+  // Trace DB operations. Use EndTrace() to stop tracing.
+  virtual Status StartTrace(const TraceOptions& /*options*/,
+                            std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
+    return Status::NotSupported("StartTrace() is not implemented.");
+  }
+
+  virtual Status EndTrace() {
+    return Status::NotSupported("EndTrace() is not implemented.");
+  }
+
+  // IO Tracing operations. Use EndIOTrace() to stop tracing.
+  virtual Status StartIOTrace(const TraceOptions& /*options*/,
+                              std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
+    return Status::NotSupported("StartIOTrace() is not implemented.");
+  }
+
+  virtual Status EndIOTrace() {
+    return Status::NotSupported("EndIOTrace() is not implemented.");
+  }
+
+  // Trace block cache accesses. Use EndBlockCacheTrace() to stop tracing.
+  virtual Status StartBlockCacheTrace(
+      const TraceOptions& /*trace_options*/,
+      std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
+    return Status::NotSupported("StartBlockCacheTrace() is not implemented.");
+  }
+
+  virtual Status StartBlockCacheTrace(
+      const BlockCacheTraceOptions& /*options*/,
+      std::unique_ptr<BlockCacheTraceWriter>&& /*trace_writer*/) {
+    return Status::NotSupported("StartBlockCacheTrace() is not implemented.");
+  }
+
+  virtual Status EndBlockCacheTrace() {
+    return Status::NotSupported("EndBlockCacheTrace() is not implemented.");
+  }
+
+  // Create a default trace replayer.
+  virtual Status NewDefaultReplayer(
+      const std::vector<ColumnFamilyHandle*>& /*handles*/,
+      std::unique_ptr<TraceReader>&& /*reader*/,
+      std::unique_ptr<Replayer>* /*replayer*/) {
+    return Status::NotSupported("NewDefaultReplayer() is not implemented.");
+  }
+
+
+  // Needed for StackableDB
+  virtual DB* GetRootDB() { return this; }
+
+  // Given a window [start_time, end_time), setup a StatsHistoryIterator
+  // to access stats history. Note the start_time and end_time are epoch
+  // time measured in seconds, and end_time is an exclusive bound.
+  virtual Status GetStatsHistory(
+      uint64_t /*start_time*/, uint64_t /*end_time*/,
+      std::unique_ptr<StatsHistoryIterator>* /*stats_iterator*/) {
+    return Status::NotSupported("GetStatsHistory() is not implemented.");
+  }
+
+  // Make the secondary instance catch up with the primary by tailing and
+  // replaying the MANIFEST and WAL of the primary.
+  // Column families created by the primary after the secondary instance starts
+  // will be ignored unless the secondary instance closes and restarts with the
+  // newly created column families.
+  // Column families that exist before secondary instance starts and dropped by
+  // the primary afterwards will be marked as dropped. However, as long as the
+  // secondary instance does not delete the corresponding column family
+  // handles, the data of the column family is still accessible to the
+  // secondary.
+  virtual Status TryCatchUpWithPrimary() {
+    return Status::NotSupported("Supported only by secondary instance");
+  }
+};
+
+struct WriteStallStatsMapKeys {
+  static const std::string& TotalStops();
+  static const std::string& TotalDelays();
+
+  static const std::string& CFL0FileCountLimitDelaysWithOngoingCompaction();
+  static const std::string& CFL0FileCountLimitStopsWithOngoingCompaction();
+
+  // REQUIRES:
+  // `cause` isn't any of these: `WriteStallCause::kNone`,
+  // `WriteStallCause::kCFScopeWriteStallCauseEnumMax`,
+  // `WriteStallCause::kDBScopeWriteStallCauseEnumMax`
+  //
+  // REQUIRES:
+  // `condition` isn't any of these: `WriteStallCondition::kNormal`
+  static std::string CauseConditionCount(WriteStallCause cause,
+                                         WriteStallCondition condition);
+};
+
+// Overloaded operators for enum class SizeApproximationFlags.
+inline DB::SizeApproximationFlags operator&(DB::SizeApproximationFlags lhs,
+                                            DB::SizeApproximationFlags rhs) {
+  return static_cast<DB::SizeApproximationFlags>(static_cast<uint8_t>(lhs) &
+                                                 static_cast<uint8_t>(rhs));
+}
+inline DB::SizeApproximationFlags operator|(DB::SizeApproximationFlags lhs,
+                                            DB::SizeApproximationFlags rhs) {
+  return static_cast<DB::SizeApproximationFlags>(static_cast<uint8_t>(lhs) |
+                                                 static_cast<uint8_t>(rhs));
+}
+
+inline Status DB::GetApproximateSizes(ColumnFamilyHandle* column_family,
+                                      const Range* ranges, int n,
+                                      uint64_t* sizes,
+                                      SizeApproximationFlags include_flags) {
+  SizeApproximationOptions options;
+  options.include_memtables =
+      ((include_flags & SizeApproximationFlags::INCLUDE_MEMTABLES) !=
+       SizeApproximationFlags::NONE);
+  options.include_files =
+      ((include_flags & SizeApproximationFlags::INCLUDE_FILES) !=
+       SizeApproximationFlags::NONE);
+  return GetApproximateSizes(options, column_family, ranges, n, sizes);
+}
+
+// Destroy the contents of the specified database.
+// Be very careful using this method.
+Status DestroyDB(const std::string& name, const Options& options,
+                 const std::vector<ColumnFamilyDescriptor>& column_families =
+                     std::vector<ColumnFamilyDescriptor>());
+
+// If a DB cannot be opened, you may attempt to call this method to
+// resurrect as much of the contents of the database as possible.
+// Some data may be lost, so be careful when calling this function
+// on a database that contains important information.
+//
+// With this API, we will warn and skip data associated with column families not
+// specified in column_families.
+//
+// @param column_families Descriptors for known column families
+Status RepairDB(const std::string& dbname, const DBOptions& db_options,
+                const std::vector<ColumnFamilyDescriptor>& column_families);
+
+// @param unknown_cf_opts Options for column families encountered during the
+//                        repair that were not specified in column_families.
+Status RepairDB(const std::string& dbname, const DBOptions& db_options,
+                const std::vector<ColumnFamilyDescriptor>& column_families,
+                const ColumnFamilyOptions& unknown_cf_opts);
+
+// @param options These options will be used for the database and for ALL column
+//                families encountered during the repair
+Status RepairDB(const std::string& dbname, const Options& options);
+
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db_bench_tool.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db_bench_tool.h
new file mode 100644
index 0000000000..17f4e6bde7
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db_bench_tool.h
@@ -0,0 +1,11 @@
+// Copyright (c) 2013-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+int db_bench_tool(int argc, char** argv);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db_dump_tool.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db_dump_tool.h
new file mode 100644
index 0000000000..2c97bad755
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db_dump_tool.h
@@ -0,0 +1,43 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+
+#include "rocksdb/db.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct DumpOptions {
+  // Database that will be dumped
+  std::string db_path;
+  // File location that will contain dump output
+  std::string dump_location;
+  // Don't include db information header in the dump
+  bool anonymous = false;
+};
+
+class DbDumpTool {
+ public:
+  bool Run(const DumpOptions& dump_options,
+           ROCKSDB_NAMESPACE::Options options = ROCKSDB_NAMESPACE::Options());
+};
+
+struct UndumpOptions {
+  // Database that we will load the dumped file into
+  std::string db_path;
+  // File location of the dumped file that will be loaded
+  std::string dump_location;
+  // Compact the db after loading the dumped file
+  bool compact_db = false;
+};
+
+class DbUndumpTool {
+ public:
+  bool Run(const UndumpOptions& undump_options,
+           ROCKSDB_NAMESPACE::Options options = ROCKSDB_NAMESPACE::Options());
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db_stress_tool.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db_stress_tool.h
new file mode 100644
index 0000000000..7d3d42c9d6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/db_stress_tool.h
@@ -0,0 +1,11 @@
+// Copyright (c) 2013-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+int db_stress_tool(int argc, char** argv);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/env.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/env.h
new file mode 100644
index 0000000000..e3d4146416
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/env.h
@@ -0,0 +1,1882 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// An Env is an interface used by the rocksdb implementation to access
+// operating system functionality like the filesystem etc.  Callers
+// may wish to provide a custom Env object when opening a database to
+// get fine gain control; e.g., to rate limit file system operations.
+//
+// All Env implementations are safe for concurrent access from
+// multiple threads without any external synchronization.
+
+#pragma once
+
+#include <stdint.h>
+
+#include <cstdarg>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/functor_wrapper.h"
+#include "rocksdb/port_defs.h"
+#include "rocksdb/status.h"
+#include "rocksdb/thread_status.h"
+
+#ifdef _WIN32
+// Windows API macro interference
+#undef DeleteFile
+#undef GetCurrentTime
+#undef LoadLibrary
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define ROCKSDB_PRINTF_FORMAT_ATTR(format_param, dots_param) \
+  __attribute__((__format__(__printf__, format_param, dots_param)))
+#else
+#define ROCKSDB_PRINTF_FORMAT_ATTR(format_param, dots_param)
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+class DynamicLibrary;
+class FileLock;
+class Logger;
+class RandomAccessFile;
+class SequentialFile;
+class Slice;
+struct DataVerificationInfo;
+class WritableFile;
+class RandomRWFile;
+class MemoryMappedFileBuffer;
+class Directory;
+struct DBOptions;
+struct ImmutableDBOptions;
+struct MutableDBOptions;
+class RateLimiter;
+class ThreadStatusUpdater;
+struct ThreadStatus;
+class FileSystem;
+class SystemClock;
+struct ConfigOptions;
+
+const size_t kDefaultPageSize = 4 * 1024;
+
+// Options while opening a file to read/write
+struct EnvOptions {
+  // Construct with default Options
+  EnvOptions();
+
+  // Construct from Options
+  explicit EnvOptions(const DBOptions& options);
+
+  // If true, then use mmap to read data.
+  // Not recommended for 32-bit OS.
+  bool use_mmap_reads = false;
+
+  // If true, then use mmap to write data
+  bool use_mmap_writes = true;
+
+  // If true, then use O_DIRECT for reading data
+  bool use_direct_reads = false;
+
+  // If true, then use O_DIRECT for writing data
+  bool use_direct_writes = false;
+
+  // If false, fallocate() calls are bypassed
+  bool allow_fallocate = true;
+
+  // If true, set the FD_CLOEXEC on open fd.
+  bool set_fd_cloexec = true;
+
+  // Allows OS to incrementally sync files to disk while they are being
+  // written, in the background. Issue one request for every bytes_per_sync
+  // written. 0 turns it off.
+  // Default: 0
+  uint64_t bytes_per_sync = 0;
+
+  // When true, guarantees the file has at most `bytes_per_sync` bytes submitted
+  // for writeback at any given time.
+  //
+  //  - If `sync_file_range` is supported it achieves this by waiting for any
+  //    prior `sync_file_range`s to finish before proceeding. In this way,
+  //    processing (compression, etc.) can proceed uninhibited in the gap
+  //    between `sync_file_range`s, and we block only when I/O falls behind.
+  //  - Otherwise the `WritableFile::Sync` method is used. Note this mechanism
+  //    always blocks, thus preventing the interleaving of I/O and processing.
+  //
+  // Note: Enabling this option does not provide any additional persistence
+  // guarantees, as it may use `sync_file_range`, which does not write out
+  // metadata.
+  //
+  // Default: false
+  bool strict_bytes_per_sync = false;
+
+  // If true, we will preallocate the file with FALLOC_FL_KEEP_SIZE flag, which
+  // means that file size won't change as part of preallocation.
+  // If false, preallocation will also change the file size. This option will
+  // improve the performance in workloads where you sync the data on every
+  // write. By default, we set it to true for MANIFEST writes and false for
+  // WAL writes
+  bool fallocate_with_keep_size = true;
+
+  // See DBOptions doc
+  size_t compaction_readahead_size = 0;
+
+  // See DBOptions doc
+  size_t random_access_max_buffer_size = 0;
+
+  // See DBOptions doc
+  size_t writable_file_max_buffer_size = 1024 * 1024;
+
+  // If not nullptr, write rate limiting is enabled for flush and compaction
+  RateLimiter* rate_limiter = nullptr;
+};
+
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class Env : public Customizable {
+ public:
+  static const char* kDefaultName() { return "DefaultEnv"; }
+  struct FileAttributes {
+    // File name
+    std::string name;
+
+    // Size of file in bytes
+    uint64_t size_bytes;
+  };
+
+  Env();
+  // Construct an Env with a separate FileSystem and/or SystemClock
+  // implementation
+  explicit Env(const std::shared_ptr<FileSystem>& fs);
+  Env(const std::shared_ptr<FileSystem>& fs,
+      const std::shared_ptr<SystemClock>& clock);
+  // No copying allowed
+  Env(const Env&) = delete;
+  void operator=(const Env&) = delete;
+
+  ~Env() override;
+
+  static const char* Type() { return "Environment"; }
+
+  // Deprecated. Will be removed in a major release. Derived classes
+  // should implement this method.
+  const char* Name() const override { return ""; }
+
+  // Loads the environment specified by the input value into the result
+  // @see Customizable for a more detailed description of the parameters and
+  // return codes
+  //
+  // @param config_options Controls how the environment is loaded.
+  // @param value the name and associated properties for the environment.
+  // @param result On success, the environment that was loaded.
+  // @param guard If specified and the loaded environment is not static,
+  //      this value will contain the loaded environment (guard.get() ==
+  //      result).
+  // @return OK If the environment was successfully loaded (and optionally
+  // prepared)
+  // @return not-OK if the load failed.
+  static Status CreateFromString(const ConfigOptions& config_options,
+                                 const std::string& value, Env** result);
+  static Status CreateFromString(const ConfigOptions& config_options,
+                                 const std::string& value, Env** result,
+                                 std::shared_ptr<Env>* guard);
+
+  // Loads the environment specified by the env and fs uri.
+  // If both are specified, an error is returned.
+  // Otherwise, the environment is created by loading (via CreateFromString)
+  // the appropriate env/fs from the corresponding values.
+  static Status CreateFromUri(const ConfigOptions& options,
+                              const std::string& env_uri,
+                              const std::string& fs_uri, Env** result,
+                              std::shared_ptr<Env>* guard);
+
+  // Return a default environment suitable for the current operating
+  // system.  Sophisticated users may wish to provide their own Env
+  // implementation instead of relying on this default environment.
+  //
+  // The result of Default() belongs to rocksdb and must never be deleted.
+  static Env* Default();
+
+  // See FileSystem::RegisterDbPaths.
+  virtual Status RegisterDbPaths(const std::vector<std::string>& /*paths*/) {
+    return Status::OK();
+  }
+  // See FileSystem::UnregisterDbPaths.
+  virtual Status UnregisterDbPaths(const std::vector<std::string>& /*paths*/) {
+    return Status::OK();
+  }
+
+  // Create a brand new sequentially-readable file with the specified name.
+  // On success, stores a pointer to the new file in *result and returns OK.
+  // On failure stores nullptr in *result and returns non-OK.  If the file does
+  // not exist, returns a non-OK status.
+  //
+  // The returned file will only be accessed by one thread at a time.
+  virtual Status NewSequentialFile(const std::string& fname,
+                                   std::unique_ptr<SequentialFile>* result,
+                                   const EnvOptions& options) = 0;
+
+  // Create a brand new random access read-only file with the
+  // specified name.  On success, stores a pointer to the new file in
+  // *result and returns OK.  On failure stores nullptr in *result and
+  // returns non-OK.  If the file does not exist, returns a non-OK
+  // status.
+  //
+  // The returned file may be concurrently accessed by multiple threads.
+  virtual Status NewRandomAccessFile(const std::string& fname,
+                                     std::unique_ptr<RandomAccessFile>* result,
+                                     const EnvOptions& options) = 0;
+  // These values match Linux definition
+  // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/fcntl.h#n56
+  enum WriteLifeTimeHint {
+    WLTH_NOT_SET = 0,  // No hint information set
+    WLTH_NONE,         // No hints about write life time
+    WLTH_SHORT,        // Data written has a short life time
+    WLTH_MEDIUM,       // Data written has a medium life time
+    WLTH_LONG,         // Data written has a long life time
+    WLTH_EXTREME,      // Data written has an extremely long life time
+  };
+
+  // Create an object that writes to a new file with the specified
+  // name.  Deletes any existing file with the same name and creates a
+  // new file.  On success, stores a pointer to the new file in
+  // *result and returns OK.  On failure stores nullptr in *result and
+  // returns non-OK.
+  //
+  // The returned file will only be accessed by one thread at a time.
+  virtual Status NewWritableFile(const std::string& fname,
+                                 std::unique_ptr<WritableFile>* result,
+                                 const EnvOptions& options) = 0;
+
+  // Create an object that writes to a file with the specified name.
+  // `WritableFile::Append()`s will append after any existing content.  If the
+  // file does not already exist, creates it.
+  //
+  // On success, stores a pointer to the file in *result and returns OK.  On
+  // failure stores nullptr in *result and returns non-OK.
+  //
+  // The returned file will only be accessed by one thread at a time.
+  virtual Status ReopenWritableFile(const std::string& /*fname*/,
+                                    std::unique_ptr<WritableFile>* /*result*/,
+                                    const EnvOptions& /*options*/) {
+    return Status::NotSupported("Env::ReopenWritableFile() not supported.");
+  }
+
+  // Reuse an existing file by renaming it and opening it as writable.
+  virtual Status ReuseWritableFile(const std::string& fname,
+                                   const std::string& old_fname,
+                                   std::unique_ptr<WritableFile>* result,
+                                   const EnvOptions& options);
+
+  // Open `fname` for random read and write, if file doesn't exist the file
+  // will be created.  On success, stores a pointer to the new file in
+  // *result and returns OK.  On failure returns non-OK.
+  //
+  // The returned file will only be accessed by one thread at a time.
+  virtual Status NewRandomRWFile(const std::string& /*fname*/,
+                                 std::unique_ptr<RandomRWFile>* /*result*/,
+                                 const EnvOptions& /*options*/) {
+    return Status::NotSupported("RandomRWFile is not implemented in this Env");
+  }
+
+  // Opens `fname` as a memory-mapped file for read and write (in-place updates
+  // only, i.e., no appends). On success, stores a raw buffer covering the whole
+  // file in `*result`. The file must exist prior to this call.
+  virtual Status NewMemoryMappedFileBuffer(
+      const std::string& /*fname*/,
+      std::unique_ptr<MemoryMappedFileBuffer>* /*result*/) {
+    return Status::NotSupported(
+        "MemoryMappedFileBuffer is not implemented in this Env");
+  }
+
+  // Create an object that represents a directory. Will fail if directory
+  // doesn't exist. If the directory exists, it will open the directory
+  // and create a new Directory object.
+  //
+  // On success, stores a pointer to the new Directory in
+  // *result and returns OK. On failure stores nullptr in *result and
+  // returns non-OK.
+  virtual Status NewDirectory(const std::string& name,
+                              std::unique_ptr<Directory>* result) = 0;
+
+  // Returns OK if the named file exists.
+  //         NotFound if the named file does not exist,
+  //                  the calling process does not have permission to determine
+  //                  whether this file exists, or if the path is invalid.
+  //         IOError if an IO Error was encountered
+  virtual Status FileExists(const std::string& fname) = 0;
+
+  // Store in *result the names of the children of the specified directory.
+  // The names are relative to "dir", and shall never include the
+  // names `.` or `..`.
+  // Original contents of *results are dropped.
+  // Returns OK if "dir" exists and "*result" contains its children.
+  //         NotFound if "dir" does not exist, the calling process does not have
+  //                  permission to access "dir", or if "dir" is invalid.
+  //         IOError if an IO Error was encountered
+  virtual Status GetChildren(const std::string& dir,
+                             std::vector<std::string>* result) = 0;
+
+  // Store in *result the attributes of the children of the specified directory.
+  // In case the implementation lists the directory prior to iterating the files
+  // and files are concurrently deleted, the deleted files will be omitted from
+  // result.
+  // The name attributes are relative to "dir", and shall never include the
+  // names `.` or `..`.
+  // Original contents of *results are dropped.
+  // Returns OK if "dir" exists and "*result" contains its children.
+  //         NotFound if "dir" does not exist, the calling process does not have
+  //                  permission to access "dir", or if "dir" is invalid.
+  //         IOError if an IO Error was encountered
+  virtual Status GetChildrenFileAttributes(const std::string& dir,
+                                           std::vector<FileAttributes>* result);
+
+  // Delete the named file.
+  virtual Status DeleteFile(const std::string& fname) = 0;
+
+  // Truncate the named file to the specified size.
+  virtual Status Truncate(const std::string& /*fname*/, size_t /*size*/) {
+    return Status::NotSupported("Truncate is not supported for this Env");
+  }
+
+  // Create the specified directory. Returns error if directory exists.
+  virtual Status CreateDir(const std::string& dirname) = 0;
+
+  // Creates directory if missing. Return Ok if it exists, or successful in
+  // Creating.
+  virtual Status CreateDirIfMissing(const std::string& dirname) = 0;
+
+  // Delete the specified directory.
+  // Many implementations of this function will only delete a directory if it is
+  // empty.
+  virtual Status DeleteDir(const std::string& dirname) = 0;
+
+  // Store the size of fname in *file_size.
+  virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) = 0;
+
+  // Store the last modification time of fname in *file_mtime.
+  virtual Status GetFileModificationTime(const std::string& fname,
+                                         uint64_t* file_mtime) = 0;
+  // Rename file src to target.
+  virtual Status RenameFile(const std::string& src,
+                            const std::string& target) = 0;
+
+  // Hard Link file src to target.
+  virtual Status LinkFile(const std::string& /*src*/,
+                          const std::string& /*target*/) {
+    return Status::NotSupported("LinkFile is not supported for this Env");
+  }
+
+  virtual Status NumFileLinks(const std::string& /*fname*/,
+                              uint64_t* /*count*/) {
+    return Status::NotSupported(
+        "Getting number of file links is not supported for this Env");
+  }
+
+  virtual Status AreFilesSame(const std::string& /*first*/,
+                              const std::string& /*second*/, bool* /*res*/) {
+    return Status::NotSupported("AreFilesSame is not supported for this Env");
+  }
+
+  // Lock the specified file.  Used to prevent concurrent access to
+  // the same db by multiple processes.  On failure, stores nullptr in
+  // *lock and returns non-OK.
+  //
+  // On success, stores a pointer to the object that represents the
+  // acquired lock in *lock and returns OK.  The caller should call
+  // UnlockFile(*lock) to release the lock.  If the process exits,
+  // the lock will be automatically released.
+  //
+  // If somebody else already holds the lock, finishes immediately
+  // with a failure.  I.e., this call does not wait for existing locks
+  // to go away.
+  //
+  // May create the named file if it does not already exist.
+  virtual Status LockFile(const std::string& fname, FileLock** lock) = 0;
+
+  // Release the lock acquired by a previous successful call to LockFile.
+  // REQUIRES: lock was returned by a successful LockFile() call
+  // REQUIRES: lock has not already been unlocked.
+  virtual Status UnlockFile(FileLock* lock) = 0;
+
+  // Opens `lib_name` as a dynamic library.
+  // If the 'search_path' is specified, breaks the path into its components
+  // based on the appropriate platform separator (";" or ";") and looks for the
+  // library in those directories.  If 'search path is not specified, uses the
+  // default library path search mechanism (such as LD_LIBRARY_PATH). On
+  // success, stores a dynamic library in `*result`.
+  virtual Status LoadLibrary(const std::string& /*lib_name*/,
+                             const std::string& /*search_path */,
+                             std::shared_ptr<DynamicLibrary>* /*result*/) {
+    return Status::NotSupported("LoadLibrary is not implemented in this Env");
+  }
+
+  // Priority for scheduling job in thread pool
+  enum Priority { BOTTOM, LOW, HIGH, USER, TOTAL };
+
+  static std::string PriorityToString(Priority priority);
+
+  // Priority for requesting bytes in rate limiter scheduler
+  enum IOPriority {
+    IO_LOW = 0,
+    IO_MID = 1,
+    IO_HIGH = 2,
+    IO_USER = 3,
+    IO_TOTAL = 4
+  };
+
+  // EXPERIMENTAL
+  enum class IOActivity : uint8_t {
+    kFlush = 0,
+    kCompaction = 1,
+    kDBOpen = 2,
+    kUnknown,  // Keep last for easy array of non-unknowns
+  };
+
+  // Arrange to run "(*function)(arg)" once in a background thread, in
+  // the thread pool specified by pri. By default, jobs go to the 'LOW'
+  // priority thread pool.
+
+  // "function" may run in an unspecified thread.  Multiple functions
+  // added to the same Env may run concurrently in different threads.
+  // I.e., the caller may not assume that background work items are
+  // serialized.
+  // When the UnSchedule function is called, the unschedFunction
+  // registered at the time of Schedule is invoked with arg as a parameter.
+  virtual void Schedule(void (*function)(void* arg), void* arg,
+                        Priority pri = LOW, void* tag = nullptr,
+                        void (*unschedFunction)(void* arg) = nullptr) = 0;
+
+  // Arrange to remove jobs for given arg from the queue_ if they are not
+  // already scheduled. Caller is expected to have exclusive lock on arg.
+  virtual int UnSchedule(void* /*arg*/, Priority /*pri*/) { return 0; }
+
+  // Start a new thread, invoking "function(arg)" within the new thread.
+  // When "function(arg)" returns, the thread will be destroyed.
+  virtual void StartThread(void (*function)(void* arg), void* arg) = 0;
+
+  // Start a new thread, invoking "function(args...)" within the new thread.
+  // When "function(args...)" returns, the thread will be destroyed.
+  template <typename FunctionT, typename... Args>
+  void StartThreadTyped(FunctionT function, Args&&... args) {
+    using FWType = FunctorWrapper<Args...>;
+    StartThread(
+        [](void* arg) {
+          auto* functor = static_cast<FWType*>(arg);
+          functor->invoke();
+          delete functor;
+        },
+        new FWType(std::function<void(Args...)>(function),
+                   std::forward<Args>(args)...));
+  }
+
+  // Wait for all threads started by StartThread to terminate.
+  virtual void WaitForJoin() {}
+
+  // Reserve available background threads in the specified thread pool.
+  virtual int ReserveThreads(int /*threads_to_be_reserved*/, Priority /*pri*/) {
+    return 0;
+  }
+
+  // Release a specific number of reserved threads from the specified thread
+  // pool
+  virtual int ReleaseThreads(int /*threads_to_be_released*/, Priority /*pri*/) {
+    return 0;
+  }
+
+  // Get thread pool queue length for specific thread pool.
+  virtual unsigned int GetThreadPoolQueueLen(Priority /*pri*/ = LOW) const {
+    return 0;
+  }
+
+  // *path is set to a temporary directory that can be used for testing. It may
+  // or many not have just been created. The directory may or may not differ
+  // between runs of the same process, but subsequent calls will return the
+  // same directory.
+  virtual Status GetTestDirectory(std::string* path) = 0;
+
+  // Create and returns a default logger (an instance of EnvLogger) for storing
+  // informational messages. Derived classes can override to provide custom
+  // logger.
+  virtual Status NewLogger(const std::string& fname,
+                           std::shared_ptr<Logger>* result);
+
+  // Returns the number of micro-seconds since some fixed point in time.
+  // It is often used as system time such as in GenericRateLimiter
+  // and other places so a port needs to return system time in order to work.
+  virtual uint64_t NowMicros() = 0;
+
+  // Returns the number of nano-seconds since some fixed point in time. Only
+  // useful for computing deltas of time in one run.
+  // Default implementation simply relies on NowMicros.
+  // In platform-specific implementations, NowNanos() should return time points
+  // that are MONOTONIC.
+  virtual uint64_t NowNanos() { return NowMicros() * 1000; }
+
+  // 0 indicates not supported.
+  virtual uint64_t NowCPUNanos() { return 0; }
+
+  // Sleep/delay the thread for the prescribed number of micro-seconds.
+  virtual void SleepForMicroseconds(int micros) = 0;
+
+  // Get the current host name as a null terminated string iff the string
+  // length is < len. The hostname should otherwise be truncated to len.
+  virtual Status GetHostName(char* name, uint64_t len) = 0;
+
+  // Get the current hostname from the given env as a std::string in result.
+  // The result may be truncated if the hostname is too
+  // long
+  virtual Status GetHostNameString(std::string* result);
+
+  // Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC).
+  // Only overwrites *unix_time on success.
+  virtual Status GetCurrentTime(int64_t* unix_time) = 0;
+
+  // Get full directory name for this db.
+  virtual Status GetAbsolutePath(const std::string& db_path,
+                                 std::string* output_path) = 0;
+
+  // The number of background worker threads of a specific thread pool
+  // for this environment. 'LOW' is the default pool.
+  // default number: 1
+  virtual void SetBackgroundThreads(int number, Priority pri = LOW) = 0;
+  virtual int GetBackgroundThreads(Priority pri = LOW) = 0;
+
+  virtual Status SetAllowNonOwnerAccess(bool /*allow_non_owner_access*/) {
+    return Status::NotSupported("Env::SetAllowNonOwnerAccess() not supported.");
+  }
+
+  // Enlarge number of background worker threads of a specific thread pool
+  // for this environment if it is smaller than specified. 'LOW' is the default
+  // pool.
+  virtual void IncBackgroundThreadsIfNeeded(int number, Priority pri) = 0;
+
+  // Lower IO priority for threads from the specified pool.
+  virtual void LowerThreadPoolIOPriority(Priority /*pool*/ = LOW) {}
+
+  // Lower CPU priority for threads from the specified pool.
+  virtual Status LowerThreadPoolCPUPriority(Priority /*pool*/,
+                                            CpuPriority /*pri*/) {
+    return Status::NotSupported(
+        "Env::LowerThreadPoolCPUPriority(Priority, CpuPriority) not supported");
+  }
+
+  // Lower CPU priority for threads from the specified pool.
+  virtual void LowerThreadPoolCPUPriority(Priority /*pool*/ = LOW) {}
+
+  // Converts seconds-since-Jan-01-1970 to a printable string
+  virtual std::string TimeToString(uint64_t time) = 0;
+
+  // Generates a human-readable unique ID that can be used to identify a DB.
+  // In built-in implementations, this is an RFC-4122 UUID string, but might
+  // not be in all implementations. Overriding is not recommended.
+  // NOTE: this has not be validated for use in cryptography
+  virtual std::string GenerateUniqueId();
+
+  // OptimizeForLogWrite will create a new EnvOptions object that is a copy of
+  // the EnvOptions in the parameters, but is optimized for reading log files.
+  virtual EnvOptions OptimizeForLogRead(const EnvOptions& env_options) const;
+
+  // OptimizeForManifestRead will create a new EnvOptions object that is a copy
+  // of the EnvOptions in the parameters, but is optimized for reading manifest
+  // files.
+  virtual EnvOptions OptimizeForManifestRead(
+      const EnvOptions& env_options) const;
+
+  // OptimizeForLogWrite will create a new EnvOptions object that is a copy of
+  // the EnvOptions in the parameters, but is optimized for writing log files.
+  // Default implementation returns the copy of the same object.
+  virtual EnvOptions OptimizeForLogWrite(const EnvOptions& env_options,
+                                         const DBOptions& db_options) const;
+  // OptimizeForManifestWrite will create a new EnvOptions object that is a copy
+  // of the EnvOptions in the parameters, but is optimized for writing manifest
+  // files. Default implementation returns the copy of the same object.
+  virtual EnvOptions OptimizeForManifestWrite(
+      const EnvOptions& env_options) const;
+
+  // OptimizeForCompactionTableWrite will create a new EnvOptions object that is
+  // a copy of the EnvOptions in the parameters, but is optimized for writing
+  // table files.
+  virtual EnvOptions OptimizeForCompactionTableWrite(
+      const EnvOptions& env_options,
+      const ImmutableDBOptions& immutable_ops) const;
+
+  // OptimizeForCompactionTableWrite will create a new EnvOptions object that
+  // is a copy of the EnvOptions in the parameters, but is optimized for reading
+  // table files.
+  virtual EnvOptions OptimizeForCompactionTableRead(
+      const EnvOptions& env_options,
+      const ImmutableDBOptions& db_options) const;
+
+  // OptimizeForBlobFileRead will create a new EnvOptions object that
+  // is a copy of the EnvOptions in the parameters, but is optimized for reading
+  // blob files.
+  virtual EnvOptions OptimizeForBlobFileRead(
+      const EnvOptions& env_options,
+      const ImmutableDBOptions& db_options) const;
+
+  // Returns the status of all threads that belong to the current Env.
+  virtual Status GetThreadList(std::vector<ThreadStatus>* /*thread_list*/) {
+    return Status::NotSupported("Env::GetThreadList() not supported.");
+  }
+
+  // Returns the pointer to ThreadStatusUpdater.  This function will be
+  // used in RocksDB internally to update thread status and supports
+  // GetThreadList().
+  virtual ThreadStatusUpdater* GetThreadStatusUpdater() const {
+    return thread_status_updater_;
+  }
+
+  // Returns the ID of the current thread.
+  virtual uint64_t GetThreadID() const;
+
+// This seems to clash with a macro on Windows, so #undef it here
+#undef GetFreeSpace
+
+  // Get the amount of free disk space
+  virtual Status GetFreeSpace(const std::string& /*path*/,
+                              uint64_t* /*diskfree*/) {
+    return Status::NotSupported("Env::GetFreeSpace() not supported.");
+  }
+
+  // Check whether the specified path is a directory
+  virtual Status IsDirectory(const std::string& /*path*/, bool* /*is_dir*/) {
+    return Status::NotSupported("Env::IsDirectory() not supported.");
+  }
+
+  virtual void SanitizeEnvOptions(EnvOptions* /*env_opts*/) const {}
+
+  // Get the FileSystem implementation this Env was constructed with. It
+  // could be a fully implemented one, or a wrapper class around the Env
+  const std::shared_ptr<FileSystem>& GetFileSystem() const;
+
+  // Get the SystemClock implementation this Env was constructed with. It
+  // could be a fully implemented one, or a wrapper class around the Env
+  const std::shared_ptr<SystemClock>& GetSystemClock() const;
+
+  // If you're adding methods here, remember to add them to EnvWrapper too.
+
+ protected:
+  // The pointer to an internal structure that will update the
+  // status of each thread.
+  ThreadStatusUpdater* thread_status_updater_;
+
+  // Pointer to the underlying FileSystem implementation
+  std::shared_ptr<FileSystem> file_system_;
+
+  // Pointer to the underlying SystemClock implementation
+  std::shared_ptr<SystemClock> system_clock_;
+
+ private:
+  static const size_t kMaxHostNameLen = 256;
+};
+
+// The factory function to construct a ThreadStatusUpdater.  Any Env
+// that supports GetThreadList() feature should call this function in its
+// constructor to initialize thread_status_updater_.
+ThreadStatusUpdater* CreateThreadStatusUpdater();
+
+// A file abstraction for reading sequentially through a file
+class SequentialFile {
+ public:
+  SequentialFile() {}
+  virtual ~SequentialFile();
+
+  // Read up to "n" bytes from the file.  "scratch[0..n-1]" may be
+  // written by this routine.  Sets "*result" to the data that was
+  // read (including if fewer than "n" bytes were successfully read).
+  // May set "*result" to point at data in "scratch[0..n-1]", so
+  // "scratch[0..n-1]" must be live when "*result" is used.
+  // If an error was encountered, returns a non-OK status.
+  //
+  // After call, result->size() < n only if end of file has been
+  // reached (or non-OK status). Read might fail if called again after
+  // first result->size() < n.
+  //
+  // REQUIRES: External synchronization
+  virtual Status Read(size_t n, Slice* result, char* scratch) = 0;
+
+  // Skip "n" bytes from the file. This is guaranteed to be no
+  // slower that reading the same data, but may be faster.
+  //
+  // If end of file is reached, skipping will stop at the end of the
+  // file, and Skip will return OK.
+  //
+  // REQUIRES: External synchronization
+  virtual Status Skip(uint64_t n) = 0;
+
+  // Indicates the upper layers if the current SequentialFile implementation
+  // uses direct IO.
+  virtual bool use_direct_io() const { return false; }
+
+  // Use the returned alignment value to allocate
+  // aligned buffer for Direct I/O
+  virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; }
+
+  // Remove any kind of caching of data from the offset to offset+length
+  // of this file. If the length is 0, then it refers to the end of file.
+  // If the system is not caching the file contents, then this is a noop.
+  virtual Status InvalidateCache(size_t /*offset*/, size_t /*length*/) {
+    return Status::NotSupported(
+        "SequentialFile::InvalidateCache not supported.");
+  }
+
+  // Positioned Read for direct I/O
+  // If Direct I/O enabled, offset, n, and scratch should be properly aligned
+  virtual Status PositionedRead(uint64_t /*offset*/, size_t /*n*/,
+                                Slice* /*result*/, char* /*scratch*/) {
+    return Status::NotSupported(
+        "SequentialFile::PositionedRead() not supported.");
+  }
+
+  // If you're adding methods here, remember to add them to
+  // SequentialFileWrapper too.
+};
+
+// A read IO request structure for use in MultiRead
+struct ReadRequest {
+  // File offset in bytes
+  uint64_t offset;
+
+  // Length to read in bytes. `result` only returns fewer bytes if end of file
+  // is hit (or `status` is not OK).
+  size_t len;
+
+  // A buffer that MultiRead()  can optionally place data in. It can
+  // ignore this and allocate its own buffer
+  char* scratch;
+
+  // Output parameter set by MultiRead() to point to the data buffer, and
+  // the number of valid bytes
+  Slice result;
+
+  // Status of read
+  Status status;
+};
+
+// A file abstraction for randomly reading the contents of a file.
+class RandomAccessFile {
+ public:
+  RandomAccessFile() {}
+  virtual ~RandomAccessFile();
+
+  // Read up to "n" bytes from the file starting at "offset".
+  // "scratch[0..n-1]" may be written by this routine.  Sets "*result"
+  // to the data that was read (including if fewer than "n" bytes were
+  // successfully read).  May set "*result" to point at data in
+  // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
+  // "*result" is used.  If an error was encountered, returns a non-OK
+  // status.
+  //
+  // After call, result->size() < n only if end of file has been
+  // reached (or non-OK status). Read might fail if called again after
+  // first result->size() < n.
+  //
+  // Safe for concurrent use by multiple threads.
+  // If Direct I/O enabled, offset, n, and scratch should be aligned properly.
+  virtual Status Read(uint64_t offset, size_t n, Slice* result,
+                      char* scratch) const = 0;
+
+  // Readahead the file starting from offset by n bytes for caching.
+  virtual Status Prefetch(uint64_t /*offset*/, size_t /*n*/) {
+    return Status::OK();
+  }
+
+  // Read a bunch of blocks as described by reqs. The blocks can
+  // optionally be read in parallel. This is a synchronous call, i.e it
+  // should return after all reads have completed. The reads will be
+  // non-overlapping. If the function return Status is not ok, status of
+  // individual requests will be ignored and return status will be assumed
+  // for all read requests. The function return status is only meant for
+  // any errors that occur before even processing specific read requests
+  virtual Status MultiRead(ReadRequest* reqs, size_t num_reqs) {
+    assert(reqs != nullptr);
+    for (size_t i = 0; i < num_reqs; ++i) {
+      ReadRequest& req = reqs[i];
+      req.status = Read(req.offset, req.len, &req.result, req.scratch);
+    }
+    return Status::OK();
+  }
+
+  // Tries to get an unique ID for this file that will be the same each time
+  // the file is opened (and will stay the same while the file is open).
+  // Furthermore, it tries to make this ID at most "max_size" bytes. If such an
+  // ID can be created this function returns the length of the ID and places it
+  // in "id"; otherwise, this function returns 0, in which case "id"
+  // may not have been modified.
+  //
+  // This function guarantees, for IDs from a given environment, two unique ids
+  // cannot be made equal to each other by adding arbitrary bytes to one of
+  // them. That is, no unique ID is the prefix of another.
+  //
+  // This function guarantees that the returned ID will not be interpretable as
+  // a single varint.
+  //
+  // Note: these IDs are only valid for the duration of the process.
+  virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const {
+    return 0;  // Default implementation to prevent issues with backwards
+               // compatibility.
+  }
+
+  enum AccessPattern { NORMAL, RANDOM, SEQUENTIAL, WILLNEED, DONTNEED };
+
+  virtual void Hint(AccessPattern /*pattern*/) {}
+
+  // Indicates the upper layers if the current RandomAccessFile implementation
+  // uses direct IO.
+  virtual bool use_direct_io() const { return false; }
+
+  // Use the returned alignment value to allocate
+  // aligned buffer for Direct I/O
+  virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; }
+
+  // Remove any kind of caching of data from the offset to offset+length
+  // of this file. If the length is 0, then it refers to the end of file.
+  // If the system is not caching the file contents, then this is a noop.
+  virtual Status InvalidateCache(size_t /*offset*/, size_t /*length*/) {
+    return Status::NotSupported(
+        "RandomAccessFile::InvalidateCache not supported.");
+  }
+
+  // If you're adding methods here, remember to add them to
+  // RandomAccessFileWrapper too.
+};
+
+// A file abstraction for sequential writing.  The implementation
+// must provide buffering since callers may append small fragments
+// at a time to the file.
+class WritableFile {
+ public:
+  WritableFile()
+      : last_preallocated_block_(0),
+        preallocation_block_size_(0),
+        io_priority_(Env::IO_TOTAL),
+        write_hint_(Env::WLTH_NOT_SET),
+        strict_bytes_per_sync_(false) {}
+
+  explicit WritableFile(const EnvOptions& options)
+      : last_preallocated_block_(0),
+        preallocation_block_size_(0),
+        io_priority_(Env::IO_TOTAL),
+        write_hint_(Env::WLTH_NOT_SET),
+        strict_bytes_per_sync_(options.strict_bytes_per_sync) {}
+  // No copying allowed
+  WritableFile(const WritableFile&) = delete;
+  void operator=(const WritableFile&) = delete;
+
+  virtual ~WritableFile();
+
+  // Append data to the end of the file
+  // Note: A WritableFile object must support either Append or
+  // PositionedAppend, so the users cannot mix the two.
+  virtual Status Append(const Slice& data) = 0;
+
+  // Append data with verification information.
+  // Note that this API change is experimental and it might be changed in
+  // the future. Currently, RocksDB only generates crc32c based checksum for
+  // the file writes when the checksum handoff option is set.
+  // Expected behavior: if currently ChecksumType::kCRC32C is not supported by
+  // WritableFile, the information in DataVerificationInfo can be ignored
+  // (i.e. does not perform checksum verification).
+  virtual Status Append(const Slice& data,
+                        const DataVerificationInfo& /* verification_info */) {
+    return Append(data);
+  }
+
+  // PositionedAppend data to the specified offset. The new EOF after append
+  // must be larger than the previous EOF. This is to be used when writes are
+  // not backed by OS buffers and hence has to always start from the start of
+  // the sector. The implementation thus needs to also rewrite the last
+  // partial sector.
+  // Note: PositionAppend does not guarantee moving the file offset after the
+  // write. A WritableFile object must support either Append or
+  // PositionedAppend, so the users cannot mix the two.
+  //
+  // PositionedAppend() can only happen on the page/sector boundaries. For that
+  // reason, if the last write was an incomplete sector we still need to rewind
+  // back to the nearest sector/page and rewrite the portion of it with whatever
+  // we need to add. We need to keep where we stop writing.
+  //
+  // PositionedAppend() can only write whole sectors. For that reason we have to
+  // pad with zeros for the last write and trim the file when closing according
+  // to the position we keep in the previous step.
+  //
+  // PositionedAppend() requires aligned buffer to be passed in. The alignment
+  // required is queried via GetRequiredBufferAlignment()
+  virtual Status PositionedAppend(const Slice& /* data */,
+                                  uint64_t /* offset */) {
+    return Status::NotSupported(
+        "WritableFile::PositionedAppend() not supported.");
+  }
+
+  // PositionedAppend data with verification information.
+  // Note that this API change is experimental and it might be changed in
+  // the future. Currently, RocksDB only generates crc32c based checksum for
+  // the file writes when the checksum handoff option is set.
+  // Expected behavior: if currently ChecksumType::kCRC32C is not supported by
+  // WritableFile, the information in DataVerificationInfo can be ignored
+  // (i.e. does not perform checksum verification).
+  virtual Status PositionedAppend(
+      const Slice& /* data */, uint64_t /* offset */,
+      const DataVerificationInfo& /* verification_info */) {
+    return Status::NotSupported("PositionedAppend");
+  }
+
+  // Truncate is necessary to trim the file to the correct size
+  // before closing. It is not always possible to keep track of the file
+  // size due to whole pages writes. The behavior is undefined if called
+  // with other writes to follow.
+  virtual Status Truncate(uint64_t /*size*/) { return Status::OK(); }
+  virtual Status Close() = 0;
+  virtual Status Flush() = 0;
+  virtual Status Sync() = 0;  // sync data
+
+  /*
+   * Sync data and/or metadata as well.
+   * By default, sync only data.
+   * Override this method for environments where we need to sync
+   * metadata as well.
+   */
+  virtual Status Fsync() { return Sync(); }
+
+  // true if Sync() and Fsync() are safe to call concurrently with Append()
+  // and Flush().
+  virtual bool IsSyncThreadSafe() const { return false; }
+
+  // Indicates the upper layers if the current WritableFile implementation
+  // uses direct IO.
+  virtual bool use_direct_io() const { return false; }
+
+  // Use the returned alignment value to allocate
+  // aligned buffer for Direct I/O
+  virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; }
+
+  /*
+   * If rate limiting is enabled, change the file-granularity priority used in
+   * rate-limiting writes.
+   *
+   * In the presence of finer-granularity priority such as
+   * `WriteOptions::rate_limiter_priority`, this file-granularity priority may
+   * be overridden by a non-Env::IO_TOTAL finer-granularity priority and used as
+   * a fallback for Env::IO_TOTAL finer-granularity priority.
+   *
+   * If rate limiting is not enabled, this call has no effect.
+   */
+  virtual void SetIOPriority(Env::IOPriority pri) { io_priority_ = pri; }
+
+  virtual Env::IOPriority GetIOPriority() { return io_priority_; }
+
+  virtual void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) {
+    write_hint_ = hint;
+  }
+
+  virtual Env::WriteLifeTimeHint GetWriteLifeTimeHint() { return write_hint_; }
+  /*
+   * Get the size of valid data in the file.
+   */
+  virtual uint64_t GetFileSize() { return 0; }
+
+  /*
+   * Get and set the default pre-allocation block size for writes to
+   * this file.  If non-zero, then Allocate will be used to extend the
+   * underlying storage of a file (generally via fallocate) if the Env
+   * instance supports it.
+   */
+  virtual void SetPreallocationBlockSize(size_t size) {
+    preallocation_block_size_ = size;
+  }
+
+  virtual void GetPreallocationStatus(size_t* block_size,
+                                      size_t* last_allocated_block) {
+    *last_allocated_block = last_preallocated_block_;
+    *block_size = preallocation_block_size_;
+  }
+
+  // For documentation, refer to RandomAccessFile::GetUniqueId()
+  virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const {
+    return 0;  // Default implementation to prevent issues with backwards
+  }
+
+  // Remove any kind of caching of data from the offset to offset+length
+  // of this file. If the length is 0, then it refers to the end of file.
+  // If the system is not caching the file contents, then this is a noop.
+  // This call has no effect on dirty pages in the cache.
+  virtual Status InvalidateCache(size_t /*offset*/, size_t /*length*/) {
+    return Status::NotSupported("WritableFile::InvalidateCache not supported.");
+  }
+
+  // Sync a file range with disk.
+  // offset is the starting byte of the file range to be synchronized.
+  // nbytes specifies the length of the range to be synchronized.
+  // This asks the OS to initiate flushing the cached data to disk,
+  // without waiting for completion.
+  // Default implementation does nothing.
+  virtual Status RangeSync(uint64_t /*offset*/, uint64_t /*nbytes*/) {
+    if (strict_bytes_per_sync_) {
+      return Sync();
+    }
+    return Status::OK();
+  }
+
+  // PrepareWrite performs any necessary preparation for a write
+  // before the write actually occurs.  This allows for pre-allocation
+  // of space on devices where it can result in less file
+  // fragmentation and/or less waste from over-zealous filesystem
+  // pre-allocation.
+  virtual void PrepareWrite(size_t offset, size_t len) {
+    if (preallocation_block_size_ == 0) {
+      return;
+    }
+    // If this write would cross one or more preallocation blocks,
+    // determine what the last preallocation block necessary to
+    // cover this write would be and Allocate to that point.
+    const auto block_size = preallocation_block_size_;
+    size_t new_last_preallocated_block =
+        (offset + len + block_size - 1) / block_size;
+    if (new_last_preallocated_block > last_preallocated_block_) {
+      size_t num_spanned_blocks =
+          new_last_preallocated_block - last_preallocated_block_;
+      // TODO: Don't ignore errors from allocate
+      Allocate(block_size * last_preallocated_block_,
+               block_size * num_spanned_blocks)
+          .PermitUncheckedError();
+      last_preallocated_block_ = new_last_preallocated_block;
+    }
+  }
+
+  // Pre-allocates space for a file.
+  virtual Status Allocate(uint64_t /*offset*/, uint64_t /*len*/) {
+    return Status::OK();
+  }
+
+  // If you're adding methods here, remember to add them to
+  // WritableFileWrapper too.
+
+ protected:
+  size_t preallocation_block_size() { return preallocation_block_size_; }
+
+ private:
+  size_t last_preallocated_block_;
+  size_t preallocation_block_size_;
+
+ protected:
+  Env::IOPriority io_priority_;
+  Env::WriteLifeTimeHint write_hint_;
+  const bool strict_bytes_per_sync_;
+};
+
+// A file abstraction for random reading and writing.
+class RandomRWFile {
+ public:
+  RandomRWFile() {}
+  // No copying allowed
+  RandomRWFile(const RandomRWFile&) = delete;
+  RandomRWFile& operator=(const RandomRWFile&) = delete;
+
+  virtual ~RandomRWFile() {}
+
+  // Indicates if the class makes use of direct I/O
+  // If false you must pass aligned buffer to Write()
+  virtual bool use_direct_io() const { return false; }
+
+  // Use the returned alignment value to allocate
+  // aligned buffer for Direct I/O
+  virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; }
+
+  // Write bytes in `data` at  offset `offset`, Returns Status::OK() on success.
+  // Pass aligned buffer when use_direct_io() returns true.
+  virtual Status Write(uint64_t offset, const Slice& data) = 0;
+
+  // Read up to `n` bytes starting from offset `offset` and store them in
+  // result, provided `scratch` size should be at least `n`.
+  //
+  // After call, result->size() < n only if end of file has been
+  // reached (or non-OK status). Read might fail if called again after
+  // first result->size() < n.
+  //
+  // Returns Status::OK() on success.
+  virtual Status Read(uint64_t offset, size_t n, Slice* result,
+                      char* scratch) const = 0;
+
+  virtual Status Flush() = 0;
+
+  virtual Status Sync() = 0;
+
+  virtual Status Fsync() { return Sync(); }
+
+  virtual Status Close() = 0;
+
+  // If you're adding methods here, remember to add them to
+  // RandomRWFileWrapper too.
+};
+
+// MemoryMappedFileBuffer object represents a memory-mapped file's raw buffer.
+// Subclasses should release the mapping upon destruction.
+class MemoryMappedFileBuffer {
+ public:
+  MemoryMappedFileBuffer(void* _base, size_t _length)
+      : base_(_base), length_(_length) {}
+
+  virtual ~MemoryMappedFileBuffer() = 0;
+
+  // We do not want to unmap this twice. We can make this class
+  // movable if desired, however, since
+  MemoryMappedFileBuffer(const MemoryMappedFileBuffer&) = delete;
+  MemoryMappedFileBuffer& operator=(const MemoryMappedFileBuffer&) = delete;
+
+  void* GetBase() const { return base_; }
+  size_t GetLen() const { return length_; }
+
+ protected:
+  void* base_;
+  const size_t length_;
+};
+
+// Directory object represents collection of files and implements
+// filesystem operations that can be executed on directories.
+class Directory {
+ public:
+  virtual ~Directory() {}
+  // Fsync directory. Can be called concurrently from multiple threads.
+  virtual Status Fsync() = 0;
+  // Close directory.
+  virtual Status Close() { return Status::NotSupported("Close"); }
+
+  virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const {
+    return 0;
+  }
+
+  // If you're adding methods here, remember to add them to
+  // DirectoryWrapper too.
+};
+
+enum InfoLogLevel : unsigned char {
+  DEBUG_LEVEL = 0,
+  INFO_LEVEL,
+  WARN_LEVEL,
+  ERROR_LEVEL,
+  FATAL_LEVEL,
+  HEADER_LEVEL,
+  NUM_INFO_LOG_LEVELS,
+};
+
+// An interface for writing log messages.
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class Logger {
+ public:
+  size_t kDoNotSupportGetLogFileSize = (std::numeric_limits<size_t>::max)();
+
+  explicit Logger(const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL)
+      : closed_(false), log_level_(log_level) {}
+  // No copying allowed
+  Logger(const Logger&) = delete;
+  void operator=(const Logger&) = delete;
+
+  virtual ~Logger();
+
+  // Close the log file. Must be called before destructor. If the return
+  // status is NotSupported(), it means the implementation does cleanup in
+  // the destructor
+  virtual Status Close();
+
+  // Write a header to the log file with the specified format
+  // It is recommended that you log all header information at the start of the
+  // application. But it is not enforced.
+  virtual void LogHeader(const char* format, va_list ap) {
+    // Default implementation does a simple INFO level log write.
+    // Please override as per the logger class requirement.
+    Logv(InfoLogLevel::INFO_LEVEL, format, ap);
+  }
+
+  // Write an entry to the log file with the specified format.
+  //
+  // Users who override the `Logv()` overload taking `InfoLogLevel` do not need
+  // to implement this, unless they explicitly invoke it in
+  // `Logv(InfoLogLevel, ...)`.
+  virtual void Logv(const char* /* format */, va_list /* ap */) {
+    assert(false);
+  }
+
+  // Write an entry to the log file with the specified log level
+  // and format.  Any log with level under the internal log level
+  // of *this (see @SetInfoLogLevel and @GetInfoLogLevel) will not be
+  // printed.
+  virtual void Logv(const InfoLogLevel log_level, const char* format,
+                    va_list ap);
+
+  virtual size_t GetLogFileSize() const { return kDoNotSupportGetLogFileSize; }
+  // Flush to the OS buffers
+  virtual void Flush() {}
+  virtual InfoLogLevel GetInfoLogLevel() const { return log_level_; }
+  virtual void SetInfoLogLevel(const InfoLogLevel log_level) {
+    log_level_ = log_level;
+  }
+
+  // If you're adding methods here, remember to add them to LoggerWrapper too.
+
+ protected:
+  virtual Status CloseImpl();
+  bool closed_;
+
+ private:
+  InfoLogLevel log_level_;
+};
+
+// Identifies a locked file. Except in custom Env/Filesystem implementations,
+// the lifetime of a FileLock object should be managed only by LockFile() and
+// UnlockFile().
+class FileLock {
+ public:
+  FileLock() {}
+  virtual ~FileLock();
+
+ private:
+  // No copying allowed
+  FileLock(const FileLock&) = delete;
+  void operator=(const FileLock&) = delete;
+};
+
+class DynamicLibrary {
+ public:
+  virtual ~DynamicLibrary() {}
+
+  // Returns the name of the dynamic library.
+  virtual const char* Name() const = 0;
+
+  // Loads the symbol for sym_name from the library and updates the input
+  // function. Returns the loaded symbol.
+  template <typename T>
+  Status LoadFunction(const std::string& sym_name, std::function<T>* function) {
+    assert(nullptr != function);
+    void* ptr = nullptr;
+    Status s = LoadSymbol(sym_name, &ptr);
+    *function = reinterpret_cast<T*>(ptr);
+    return s;
+  }
+  // Loads and returns the symbol for sym_name from the library.
+  virtual Status LoadSymbol(const std::string& sym_name, void** func) = 0;
+};
+
+extern void LogFlush(const std::shared_ptr<Logger>& info_log);
+
+extern void Log(const InfoLogLevel log_level,
+                const std::shared_ptr<Logger>& info_log, const char* format,
+                ...) ROCKSDB_PRINTF_FORMAT_ATTR(3, 4);
+
+// a set of log functions with different log levels.
+extern void Header(const std::shared_ptr<Logger>& info_log, const char* format,
+                   ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3);
+extern void Debug(const std::shared_ptr<Logger>& info_log, const char* format,
+                  ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3);
+extern void Info(const std::shared_ptr<Logger>& info_log, const char* format,
+                 ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3);
+extern void Warn(const std::shared_ptr<Logger>& info_log, const char* format,
+                 ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3);
+extern void Error(const std::shared_ptr<Logger>& info_log, const char* format,
+                  ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3);
+extern void Fatal(const std::shared_ptr<Logger>& info_log, const char* format,
+                  ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3);
+
+// Log the specified data to *info_log if info_log is non-nullptr.
+// The default info log level is InfoLogLevel::INFO_LEVEL.
+extern void Log(const std::shared_ptr<Logger>& info_log, const char* format,
+                ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3);
+
+extern void LogFlush(Logger* info_log);
+
+extern void Log(const InfoLogLevel log_level, Logger* info_log,
+                const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(3, 4);
+
+// The default info log level is InfoLogLevel::INFO_LEVEL.
+extern void Log(Logger* info_log, const char* format, ...)
+    ROCKSDB_PRINTF_FORMAT_ATTR(2, 3);
+
+// a set of log functions with different log levels.
+extern void Header(Logger* info_log, const char* format, ...)
+    ROCKSDB_PRINTF_FORMAT_ATTR(2, 3);
+extern void Debug(Logger* info_log, const char* format, ...)
+    ROCKSDB_PRINTF_FORMAT_ATTR(2, 3);
+extern void Info(Logger* info_log, const char* format, ...)
+    ROCKSDB_PRINTF_FORMAT_ATTR(2, 3);
+extern void Warn(Logger* info_log, const char* format, ...)
+    ROCKSDB_PRINTF_FORMAT_ATTR(2, 3);
+extern void Error(Logger* info_log, const char* format, ...)
+    ROCKSDB_PRINTF_FORMAT_ATTR(2, 3);
+extern void Fatal(Logger* info_log, const char* format, ...)
+    ROCKSDB_PRINTF_FORMAT_ATTR(2, 3);
+
+// A utility routine: write "data" to the named file.
+extern Status WriteStringToFile(Env* env, const Slice& data,
+                                const std::string& fname,
+                                bool should_sync = false);
+
+// A utility routine: read contents of named file into *data
+extern Status ReadFileToString(Env* env, const std::string& fname,
+                               std::string* data);
+
+// Below are helpers for wrapping most of the classes in this file.
+// They forward all calls to another instance of the class.
+// Useful when wrapping the default implementations.
+// Typical usage is to inherit your wrapper from *Wrapper, e.g.:
+//
+// class MySequentialFileWrapper : public
+// ROCKSDB_NAMESPACE::SequentialFileWrapper {
+//  public:
+//   MySequentialFileWrapper(ROCKSDB_NAMESPACE::SequentialFile* target):
+//     ROCKSDB_NAMESPACE::SequentialFileWrapper(target) {}
+//   Status Read(size_t n, Slice* result, char* scratch) override {
+//     cout << "Doing a read of size " << n << "!" << endl;
+//     return ROCKSDB_NAMESPACE::SequentialFileWrapper::Read(n, result,
+//     scratch);
+//   }
+//   // All other methods are forwarded to target_ automatically.
+// };
+//
+// This is often more convenient than inheriting the class directly because
+// (a) Don't have to override and forward all methods - the Wrapper will
+//     forward everything you're not explicitly overriding.
+// (b) Don't need to update the wrapper when more methods are added to the
+//     rocksdb class. Unless you actually want to override the behavior.
+//     (And unless rocksdb people forgot to update the *Wrapper class.)
+
+// An implementation of Env that forwards all calls to another Env.
+// May be useful to clients who wish to override just part of the
+// functionality of another Env.
+class EnvWrapper : public Env {
+ public:
+  // The Target struct allows an Env to be stored as a raw (Env*) or
+  // std::shared_ptr<Env>.  By using this struct, the wrapping/calling
+  // class does not need to worry about the ownership/lifetime of the
+  // wrapped target env.  If the guard is set, then the Env will point
+  // to the guard.get().
+  struct Target {
+    Env* env;                    // The raw Env
+    std::shared_ptr<Env> guard;  // The guarded Env
+
+    // Creates a Target without assuming ownership of the target Env
+    explicit Target(Env* t) : env(t) {}
+
+    // Creates a Target from the guarded env, assuming ownership
+    explicit Target(std::unique_ptr<Env>&& t) : guard(t.release()) {
+      env = guard.get();
+    }
+
+    // Creates a Target from the guarded env, assuming ownership
+    explicit Target(const std::shared_ptr<Env>& t) : guard(t) {
+      env = guard.get();
+    }
+
+    // Makes sure the raw Env is not nullptr
+    void Prepare() {
+      if (guard.get() != nullptr) {
+        env = guard.get();
+      } else if (env == nullptr) {
+        env = Env::Default();
+      }
+    }
+  };
+
+  // Initialize an EnvWrapper that delegates all calls to *t
+  explicit EnvWrapper(Env* t);
+  explicit EnvWrapper(std::unique_ptr<Env>&& t);
+  explicit EnvWrapper(const std::shared_ptr<Env>& t);
+  ~EnvWrapper() override;
+
+  // Return the target to which this Env forwards all calls
+  Env* target() const { return target_.env; }
+
+  // Deprecated. Will be removed in a major release. Derived classes
+  // should implement this method.
+  const char* Name() const override { return target_.env->Name(); }
+
+  // The following text is boilerplate that forwards all methods to target()
+  Status RegisterDbPaths(const std::vector<std::string>& paths) override {
+    return target_.env->RegisterDbPaths(paths);
+  }
+
+  Status UnregisterDbPaths(const std::vector<std::string>& paths) override {
+    return target_.env->UnregisterDbPaths(paths);
+  }
+
+  Status NewSequentialFile(const std::string& f,
+                           std::unique_ptr<SequentialFile>* r,
+                           const EnvOptions& options) override {
+    return target_.env->NewSequentialFile(f, r, options);
+  }
+  Status NewRandomAccessFile(const std::string& f,
+                             std::unique_ptr<RandomAccessFile>* r,
+                             const EnvOptions& options) override {
+    return target_.env->NewRandomAccessFile(f, r, options);
+  }
+  Status NewWritableFile(const std::string& f, std::unique_ptr<WritableFile>* r,
+                         const EnvOptions& options) override {
+    return target_.env->NewWritableFile(f, r, options);
+  }
+  Status ReopenWritableFile(const std::string& fname,
+                            std::unique_ptr<WritableFile>* result,
+                            const EnvOptions& options) override {
+    return target_.env->ReopenWritableFile(fname, result, options);
+  }
+  Status ReuseWritableFile(const std::string& fname,
+                           const std::string& old_fname,
+                           std::unique_ptr<WritableFile>* r,
+                           const EnvOptions& options) override {
+    return target_.env->ReuseWritableFile(fname, old_fname, r, options);
+  }
+  Status NewRandomRWFile(const std::string& fname,
+                         std::unique_ptr<RandomRWFile>* result,
+                         const EnvOptions& options) override {
+    return target_.env->NewRandomRWFile(fname, result, options);
+  }
+  Status NewMemoryMappedFileBuffer(
+      const std::string& fname,
+      std::unique_ptr<MemoryMappedFileBuffer>* result) override {
+    return target_.env->NewMemoryMappedFileBuffer(fname, result);
+  }
+  Status NewDirectory(const std::string& name,
+                      std::unique_ptr<Directory>* result) override {
+    return target_.env->NewDirectory(name, result);
+  }
+  Status FileExists(const std::string& f) override {
+    return target_.env->FileExists(f);
+  }
+  Status GetChildren(const std::string& dir,
+                     std::vector<std::string>* r) override {
+    return target_.env->GetChildren(dir, r);
+  }
+  Status GetChildrenFileAttributes(
+      const std::string& dir, std::vector<FileAttributes>* result) override {
+    return target_.env->GetChildrenFileAttributes(dir, result);
+  }
+  Status DeleteFile(const std::string& f) override {
+    return target_.env->DeleteFile(f);
+  }
+  Status Truncate(const std::string& fname, size_t size) override {
+    return target_.env->Truncate(fname, size);
+  }
+  Status CreateDir(const std::string& d) override {
+    return target_.env->CreateDir(d);
+  }
+  Status CreateDirIfMissing(const std::string& d) override {
+    return target_.env->CreateDirIfMissing(d);
+  }
+  Status DeleteDir(const std::string& d) override {
+    return target_.env->DeleteDir(d);
+  }
+  Status GetFileSize(const std::string& f, uint64_t* s) override {
+    return target_.env->GetFileSize(f, s);
+  }
+
+  Status GetFileModificationTime(const std::string& fname,
+                                 uint64_t* file_mtime) override {
+    return target_.env->GetFileModificationTime(fname, file_mtime);
+  }
+
+  Status RenameFile(const std::string& s, const std::string& t) override {
+    return target_.env->RenameFile(s, t);
+  }
+
+  Status LinkFile(const std::string& s, const std::string& t) override {
+    return target_.env->LinkFile(s, t);
+  }
+
+  Status NumFileLinks(const std::string& fname, uint64_t* count) override {
+    return target_.env->NumFileLinks(fname, count);
+  }
+
+  Status AreFilesSame(const std::string& first, const std::string& second,
+                      bool* res) override {
+    return target_.env->AreFilesSame(first, second, res);
+  }
+
+  Status LockFile(const std::string& f, FileLock** l) override {
+    return target_.env->LockFile(f, l);
+  }
+
+  Status UnlockFile(FileLock* l) override { return target_.env->UnlockFile(l); }
+
+  Status IsDirectory(const std::string& path, bool* is_dir) override {
+    return target_.env->IsDirectory(path, is_dir);
+  }
+
+  Status LoadLibrary(const std::string& lib_name,
+                     const std::string& search_path,
+                     std::shared_ptr<DynamicLibrary>* result) override {
+    return target_.env->LoadLibrary(lib_name, search_path, result);
+  }
+
+  void Schedule(void (*f)(void* arg), void* a, Priority pri,
+                void* tag = nullptr, void (*u)(void* arg) = nullptr) override {
+    return target_.env->Schedule(f, a, pri, tag, u);
+  }
+
+  int UnSchedule(void* tag, Priority pri) override {
+    return target_.env->UnSchedule(tag, pri);
+  }
+
+  void StartThread(void (*f)(void*), void* a) override {
+    return target_.env->StartThread(f, a);
+  }
+  void WaitForJoin() override { return target_.env->WaitForJoin(); }
+  unsigned int GetThreadPoolQueueLen(Priority pri = LOW) const override {
+    return target_.env->GetThreadPoolQueueLen(pri);
+  }
+
+  int ReserveThreads(int threads_to_be_reserved, Priority pri) override {
+    return target_.env->ReserveThreads(threads_to_be_reserved, pri);
+  }
+
+  int ReleaseThreads(int threads_to_be_released, Priority pri) override {
+    return target_.env->ReleaseThreads(threads_to_be_released, pri);
+  }
+
+  Status GetTestDirectory(std::string* path) override {
+    return target_.env->GetTestDirectory(path);
+  }
+  Status NewLogger(const std::string& fname,
+                   std::shared_ptr<Logger>* result) override {
+    return target_.env->NewLogger(fname, result);
+  }
+  uint64_t NowMicros() override { return target_.env->NowMicros(); }
+  uint64_t NowNanos() override { return target_.env->NowNanos(); }
+  uint64_t NowCPUNanos() override { return target_.env->NowCPUNanos(); }
+
+  void SleepForMicroseconds(int micros) override {
+    target_.env->SleepForMicroseconds(micros);
+  }
+  Status GetHostName(char* name, uint64_t len) override {
+    return target_.env->GetHostName(name, len);
+  }
+  Status GetCurrentTime(int64_t* unix_time) override {
+    return target_.env->GetCurrentTime(unix_time);
+  }
+  Status GetAbsolutePath(const std::string& db_path,
+                         std::string* output_path) override {
+    return target_.env->GetAbsolutePath(db_path, output_path);
+  }
+  void SetBackgroundThreads(int num, Priority pri) override {
+    return target_.env->SetBackgroundThreads(num, pri);
+  }
+  int GetBackgroundThreads(Priority pri) override {
+    return target_.env->GetBackgroundThreads(pri);
+  }
+
+  Status SetAllowNonOwnerAccess(bool allow_non_owner_access) override {
+    return target_.env->SetAllowNonOwnerAccess(allow_non_owner_access);
+  }
+
+  void IncBackgroundThreadsIfNeeded(int num, Priority pri) override {
+    return target_.env->IncBackgroundThreadsIfNeeded(num, pri);
+  }
+
+  void LowerThreadPoolIOPriority(Priority pool) override {
+    target_.env->LowerThreadPoolIOPriority(pool);
+  }
+
+  void LowerThreadPoolCPUPriority(Priority pool) override {
+    target_.env->LowerThreadPoolCPUPriority(pool);
+  }
+
+  Status LowerThreadPoolCPUPriority(Priority pool, CpuPriority pri) override {
+    return target_.env->LowerThreadPoolCPUPriority(pool, pri);
+  }
+
+  std::string TimeToString(uint64_t time) override {
+    return target_.env->TimeToString(time);
+  }
+
+  Status GetThreadList(std::vector<ThreadStatus>* thread_list) override {
+    return target_.env->GetThreadList(thread_list);
+  }
+
+  ThreadStatusUpdater* GetThreadStatusUpdater() const override {
+    return target_.env->GetThreadStatusUpdater();
+  }
+
+  uint64_t GetThreadID() const override { return target_.env->GetThreadID(); }
+
+  std::string GenerateUniqueId() override {
+    return target_.env->GenerateUniqueId();
+  }
+
+  EnvOptions OptimizeForLogRead(const EnvOptions& env_options) const override {
+    return target_.env->OptimizeForLogRead(env_options);
+  }
+  EnvOptions OptimizeForManifestRead(
+      const EnvOptions& env_options) const override {
+    return target_.env->OptimizeForManifestRead(env_options);
+  }
+  EnvOptions OptimizeForLogWrite(const EnvOptions& env_options,
+                                 const DBOptions& db_options) const override {
+    return target_.env->OptimizeForLogWrite(env_options, db_options);
+  }
+  EnvOptions OptimizeForManifestWrite(
+      const EnvOptions& env_options) const override {
+    return target_.env->OptimizeForManifestWrite(env_options);
+  }
+  EnvOptions OptimizeForCompactionTableWrite(
+      const EnvOptions& env_options,
+      const ImmutableDBOptions& immutable_ops) const override {
+    return target_.env->OptimizeForCompactionTableWrite(env_options,
+                                                        immutable_ops);
+  }
+  EnvOptions OptimizeForCompactionTableRead(
+      const EnvOptions& env_options,
+      const ImmutableDBOptions& db_options) const override {
+    return target_.env->OptimizeForCompactionTableRead(env_options, db_options);
+  }
+  EnvOptions OptimizeForBlobFileRead(
+      const EnvOptions& env_options,
+      const ImmutableDBOptions& db_options) const override {
+    return target_.env->OptimizeForBlobFileRead(env_options, db_options);
+  }
+  Status GetFreeSpace(const std::string& path, uint64_t* diskfree) override {
+    return target_.env->GetFreeSpace(path, diskfree);
+  }
+  void SanitizeEnvOptions(EnvOptions* env_opts) const override {
+    target_.env->SanitizeEnvOptions(env_opts);
+  }
+  Status PrepareOptions(const ConfigOptions& options) override;
+  std::string SerializeOptions(const ConfigOptions& config_options,
+                               const std::string& header) const override;
+
+ private:
+  Target target_;
+};
+
+class SequentialFileWrapper : public SequentialFile {
+ public:
+  explicit SequentialFileWrapper(SequentialFile* target) : target_(target) {}
+
+  Status Read(size_t n, Slice* result, char* scratch) override {
+    return target_->Read(n, result, scratch);
+  }
+  Status Skip(uint64_t n) override { return target_->Skip(n); }
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  Status InvalidateCache(size_t offset, size_t length) override {
+    return target_->InvalidateCache(offset, length);
+  }
+  Status PositionedRead(uint64_t offset, size_t n, Slice* result,
+                        char* scratch) override {
+    return target_->PositionedRead(offset, n, result, scratch);
+  }
+
+ private:
+  SequentialFile* target_;
+};
+
+class RandomAccessFileWrapper : public RandomAccessFile {
+ public:
+  explicit RandomAccessFileWrapper(RandomAccessFile* target)
+      : target_(target) {}
+
+  Status Read(uint64_t offset, size_t n, Slice* result,
+              char* scratch) const override {
+    return target_->Read(offset, n, result, scratch);
+  }
+  Status MultiRead(ReadRequest* reqs, size_t num_reqs) override {
+    return target_->MultiRead(reqs, num_reqs);
+  }
+  Status Prefetch(uint64_t offset, size_t n) override {
+    return target_->Prefetch(offset, n);
+  }
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    return target_->GetUniqueId(id, max_size);
+  }
+  void Hint(AccessPattern pattern) override { target_->Hint(pattern); }
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  Status InvalidateCache(size_t offset, size_t length) override {
+    return target_->InvalidateCache(offset, length);
+  }
+
+ private:
+  RandomAccessFile* target_;
+};
+
+class WritableFileWrapper : public WritableFile {
+ public:
+  explicit WritableFileWrapper(WritableFile* t) : target_(t) {}
+
+  Status Append(const Slice& data) override { return target_->Append(data); }
+  Status Append(const Slice& data,
+                const DataVerificationInfo& verification_info) override {
+    return target_->Append(data, verification_info);
+  }
+  Status PositionedAppend(const Slice& data, uint64_t offset) override {
+    return target_->PositionedAppend(data, offset);
+  }
+  Status PositionedAppend(
+      const Slice& data, uint64_t offset,
+      const DataVerificationInfo& verification_info) override {
+    return target_->PositionedAppend(data, offset, verification_info);
+  }
+  Status Truncate(uint64_t size) override { return target_->Truncate(size); }
+  Status Close() override { return target_->Close(); }
+  Status Flush() override { return target_->Flush(); }
+  Status Sync() override { return target_->Sync(); }
+  Status Fsync() override { return target_->Fsync(); }
+  bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); }
+
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+
+  void SetIOPriority(Env::IOPriority pri) override {
+    target_->SetIOPriority(pri);
+  }
+
+  Env::IOPriority GetIOPriority() override { return target_->GetIOPriority(); }
+
+  void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override {
+    target_->SetWriteLifeTimeHint(hint);
+  }
+
+  Env::WriteLifeTimeHint GetWriteLifeTimeHint() override {
+    return target_->GetWriteLifeTimeHint();
+  }
+
+  uint64_t GetFileSize() override { return target_->GetFileSize(); }
+
+  void SetPreallocationBlockSize(size_t size) override {
+    target_->SetPreallocationBlockSize(size);
+  }
+
+  void GetPreallocationStatus(size_t* block_size,
+                              size_t* last_allocated_block) override {
+    target_->GetPreallocationStatus(block_size, last_allocated_block);
+  }
+
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    return target_->GetUniqueId(id, max_size);
+  }
+
+  Status InvalidateCache(size_t offset, size_t length) override {
+    return target_->InvalidateCache(offset, length);
+  }
+
+  Status RangeSync(uint64_t offset, uint64_t nbytes) override {
+    return target_->RangeSync(offset, nbytes);
+  }
+
+  void PrepareWrite(size_t offset, size_t len) override {
+    target_->PrepareWrite(offset, len);
+  }
+
+  Status Allocate(uint64_t offset, uint64_t len) override {
+    return target_->Allocate(offset, len);
+  }
+
+ private:
+  WritableFile* target_;
+};
+
+class RandomRWFileWrapper : public RandomRWFile {
+ public:
+  explicit RandomRWFileWrapper(RandomRWFile* target) : target_(target) {}
+
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  Status Write(uint64_t offset, const Slice& data) override {
+    return target_->Write(offset, data);
+  }
+  Status Read(uint64_t offset, size_t n, Slice* result,
+              char* scratch) const override {
+    return target_->Read(offset, n, result, scratch);
+  }
+  Status Flush() override { return target_->Flush(); }
+  Status Sync() override { return target_->Sync(); }
+  Status Fsync() override { return target_->Fsync(); }
+  Status Close() override { return target_->Close(); }
+
+ private:
+  RandomRWFile* target_;
+};
+
+class DirectoryWrapper : public Directory {
+ public:
+  explicit DirectoryWrapper(Directory* target) : target_(target) {}
+
+  Status Fsync() override { return target_->Fsync(); }
+  Status Close() override { return target_->Close(); }
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    return target_->GetUniqueId(id, max_size);
+  }
+
+ private:
+  Directory* target_;
+};
+
+class LoggerWrapper : public Logger {
+ public:
+  explicit LoggerWrapper(Logger* target) : target_(target) {}
+
+  Status Close() override { return target_->Close(); }
+  void LogHeader(const char* format, va_list ap) override {
+    return target_->LogHeader(format, ap);
+  }
+  void Logv(const char* format, va_list ap) override {
+    return target_->Logv(format, ap);
+  }
+  void Logv(const InfoLogLevel log_level, const char* format,
+            va_list ap) override {
+    return target_->Logv(log_level, format, ap);
+  }
+  size_t GetLogFileSize() const override { return target_->GetLogFileSize(); }
+  void Flush() override { return target_->Flush(); }
+  InfoLogLevel GetInfoLogLevel() const override {
+    return target_->GetInfoLogLevel();
+  }
+  void SetInfoLogLevel(const InfoLogLevel log_level) override {
+    return target_->SetInfoLogLevel(log_level);
+  }
+
+ private:
+  Logger* target_;
+};
+
+// Returns a new environment that stores its data in memory and delegates
+// all non-file-storage tasks to base_env. The caller must delete the result
+// when it is no longer needed.
+// *base_env must remain live while the result is in use.
+Env* NewMemEnv(Env* base_env);
+
+// Returns a new environment that measures function call times for filesystem
+// operations, reporting results to variables in PerfContext.
+// This is a factory method for TimedEnv defined in utilities/env_timed.cc.
+Env* NewTimedEnv(Env* base_env);
+
+// Returns an instance of logger that can be used for storing informational
+// messages.
+// This is a factory method for EnvLogger declared in logging/env_logging.h
+Status NewEnvLogger(const std::string& fname, Env* env,
+                    std::shared_ptr<Logger>* result);
+
+// Creates a new Env based on Env::Default() but modified to use the specified
+// FileSystem.
+std::unique_ptr<Env> NewCompositeEnv(const std::shared_ptr<FileSystem>& fs);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/env_encryption.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/env_encryption.h
new file mode 100644
index 0000000000..3ddb1a7558
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/env_encryption.h
@@ -0,0 +1,463 @@
+//  Copyright (c) 2016-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <string>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class EncryptionProvider;
+
+struct ConfigOptions;
+
+// Returns an Env that encrypts data when stored on disk and decrypts data when
+// read from disk.
+Env* NewEncryptedEnv(Env* base_env,
+                     const std::shared_ptr<EncryptionProvider>& provider);
+std::shared_ptr<FileSystem> NewEncryptedFS(
+    const std::shared_ptr<FileSystem>& base_fs,
+    const std::shared_ptr<EncryptionProvider>& provider);
+
+// BlockAccessCipherStream is the base class for any cipher stream that
+// supports random access at block level (without requiring data from other
+// blocks). E.g. CTR (Counter operation mode) supports this requirement.
+class BlockAccessCipherStream {
+ public:
+  virtual ~BlockAccessCipherStream(){};
+
+  // BlockSize returns the size of each block supported by this cipher stream.
+  virtual size_t BlockSize() = 0;
+
+  // Encrypt one or more (partial) blocks of data at the file offset.
+  // Length of data is given in dataSize.
+  virtual Status Encrypt(uint64_t fileOffset, char* data, size_t dataSize);
+
+  // Decrypt one or more (partial) blocks of data at the file offset.
+  // Length of data is given in dataSize.
+  virtual Status Decrypt(uint64_t fileOffset, char* data, size_t dataSize);
+
+ protected:
+  // Allocate scratch space which is passed to EncryptBlock/DecryptBlock.
+  virtual void AllocateScratch(std::string&) = 0;
+
+  // Encrypt a block of data at the given block index.
+  // Length of data is equal to BlockSize();
+  virtual Status EncryptBlock(uint64_t blockIndex, char* data,
+                              char* scratch) = 0;
+
+  // Decrypt a block of data at the given block index.
+  // Length of data is equal to BlockSize();
+  virtual Status DecryptBlock(uint64_t blockIndex, char* data,
+                              char* scratch) = 0;
+};
+
+// BlockCipher
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class BlockCipher : public Customizable {
+ public:
+  virtual ~BlockCipher(){};
+
+  // Creates a new BlockCipher from the input config_options and value
+  // The value describes the type of provider (and potentially optional
+  // configuration parameters) used to create this provider.
+  // For example, if the value is "ROT13", a ROT13BlockCipher is created.
+  //
+  // @param config_options  Options to control how this cipher is created
+  //                        and initialized.
+  // @param value  The value might be:
+  //   - ROT13         Create a ROT13 Cipher
+  //   - ROT13:nn      Create a ROT13 Cipher with block size of nn
+  // @param result The new cipher object
+  // @return OK if the cipher was successfully created
+  // @return NotFound if an invalid name was specified in the value
+  // @return InvalidArgument if either the options were not valid
+  static Status CreateFromString(const ConfigOptions& config_options,
+                                 const std::string& value,
+                                 std::shared_ptr<BlockCipher>* result);
+
+  static const char* Type() { return "BlockCipher"; }
+  // Short-cut method to create a ROT13 BlockCipher.
+  // This cipher is only suitable for test purposes and should not be used in
+  // production!!!
+  static std::shared_ptr<BlockCipher> NewROT13Cipher(size_t block_size);
+
+  // BlockSize returns the size of each block supported by this cipher stream.
+  virtual size_t BlockSize() = 0;
+
+  // Encrypt a block of data.
+  // Length of data is equal to BlockSize().
+  virtual Status Encrypt(char* data) = 0;
+
+  // Decrypt a block of data.
+  // Length of data is equal to BlockSize().
+  virtual Status Decrypt(char* data) = 0;
+};
+
+// The encryption provider is used to create a cipher stream for a specific
+// file. The returned cipher stream will be used for actual
+// encryption/decryption actions.
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class EncryptionProvider : public Customizable {
+ public:
+  virtual ~EncryptionProvider(){};
+
+  // Creates a new EncryptionProvider from the input config_options and value
+  // The value describes the type of provider (and potentially optional
+  // configuration parameters) used to create this provider.
+  // For example, if the value is "CTR", a CTREncryptionProvider will be
+  // created. If the value is ends with "://test" (e.g CTR://test"), the
+  // provider will be initialized in "TEST" mode prior to being returned.
+  //
+  // @param config_options  Options to control how this provider is created
+  //                        and initialized.
+  // @param value  The value might be:
+  //   - CTR         Create a CTR provider
+  //   - CTR://test Create a CTR provider and initialize it for tests.
+  // @param result The new provider object
+  // @return OK if the provider was successfully created
+  // @return NotFound if an invalid name was specified in the value
+  // @return InvalidArgument if either the options were not valid
+  static Status CreateFromString(const ConfigOptions& config_options,
+                                 const std::string& value,
+                                 std::shared_ptr<EncryptionProvider>* result);
+
+  static const char* Type() { return "EncryptionProvider"; }
+
+  // Short-cut method to create a CTR-provider
+  static std::shared_ptr<EncryptionProvider> NewCTRProvider(
+      const std::shared_ptr<BlockCipher>& cipher);
+
+  // GetPrefixLength returns the length of the prefix that is added to every
+  // file and used for storing encryption options. For optimal performance, the
+  // prefix length should be a multiple of the page size.
+  virtual size_t GetPrefixLength() const = 0;
+
+  // CreateNewPrefix initialized an allocated block of prefix memory
+  // for a new file.
+  virtual Status CreateNewPrefix(const std::string& fname, char* prefix,
+                                 size_t prefixLength) const = 0;
+
+  // Method to add a new cipher key for use by the EncryptionProvider.
+  // @param description  Descriptor for this key.
+  // @param cipher       The cryptographic key to use
+  // @param len          The length of the cipher key
+  // @param for_write If true, this cipher should be used for writing files.
+  //                  If false, this cipher should only be used for reading
+  //                  files
+  // @return OK if the cipher was successfully added to the provider, non-OK
+  // otherwise
+  virtual Status AddCipher(const std::string& descriptor, const char* cipher,
+                           size_t len, bool for_write) = 0;
+
+  // CreateCipherStream creates a block access cipher stream for a file given
+  // given name and options.
+  virtual Status CreateCipherStream(
+      const std::string& fname, const EnvOptions& options, Slice& prefix,
+      std::unique_ptr<BlockAccessCipherStream>* result) = 0;
+
+  // Returns a string representing an encryption marker prefix for this
+  // provider. If a marker is provided, this marker can be used to tell whether
+  // or not a file is encrypted by this provider.  The maker will also be part
+  // of any encryption prefix for this provider.
+  virtual std::string GetMarker() const { return ""; }
+};
+
+class EncryptedSequentialFile : public FSSequentialFile {
+ protected:
+  std::unique_ptr<FSSequentialFile> file_;
+  std::unique_ptr<BlockAccessCipherStream> stream_;
+  uint64_t offset_;
+  size_t prefixLength_;
+
+ public:
+  // Default ctor. Given underlying sequential file is supposed to be at
+  // offset == prefixLength.
+  EncryptedSequentialFile(std::unique_ptr<FSSequentialFile>&& f,
+                          std::unique_ptr<BlockAccessCipherStream>&& s,
+                          size_t prefixLength)
+      : file_(std::move(f)),
+        stream_(std::move(s)),
+        offset_(prefixLength),
+        prefixLength_(prefixLength) {}
+
+  // Read up to "n" bytes from the file.  "scratch[0..n-1]" may be
+  // written by this routine.  Sets "*result" to the data that was
+  // read (including if fewer than "n" bytes were successfully read).
+  // May set "*result" to point at data in "scratch[0..n-1]", so
+  // "scratch[0..n-1]" must be live when "*result" is used.
+  // If an error was encountered, returns a non-OK status.
+  //
+  // REQUIRES: External synchronization
+  IOStatus Read(size_t n, const IOOptions& options, Slice* result,
+                char* scratch, IODebugContext* dbg) override;
+
+  // Skip "n" bytes from the file. This is guaranteed to be no
+  // slower that reading the same data, but may be faster.
+  //
+  // If end of file is reached, skipping will stop at the end of the
+  // file, and Skip will return OK.
+  //
+  // REQUIRES: External synchronization
+  IOStatus Skip(uint64_t n) override;
+
+  // Indicates the upper layers if the current SequentialFile implementation
+  // uses direct IO.
+  bool use_direct_io() const override;
+
+  // Use the returned alignment value to allocate
+  // aligned buffer for Direct I/O
+  size_t GetRequiredBufferAlignment() const override;
+
+  // Remove any kind of caching of data from the offset to offset+length
+  // of this file. If the length is 0, then it refers to the end of file.
+  // If the system is not caching the file contents, then this is a noop.
+  IOStatus InvalidateCache(size_t offset, size_t length) override;
+
+  // Positioned Read for direct I/O
+  // If Direct I/O enabled, offset, n, and scratch should be properly aligned
+  IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& options,
+                          Slice* result, char* scratch,
+                          IODebugContext* dbg) override;
+};
+
+// A file abstraction for randomly reading the contents of a file.
+class EncryptedRandomAccessFile : public FSRandomAccessFile {
+ protected:
+  std::unique_ptr<FSRandomAccessFile> file_;
+  std::unique_ptr<BlockAccessCipherStream> stream_;
+  size_t prefixLength_;
+
+ public:
+  EncryptedRandomAccessFile(std::unique_ptr<FSRandomAccessFile>&& f,
+                            std::unique_ptr<BlockAccessCipherStream>&& s,
+                            size_t prefixLength)
+      : file_(std::move(f)),
+        stream_(std::move(s)),
+        prefixLength_(prefixLength) {}
+
+  // Read up to "n" bytes from the file starting at "offset".
+  // "scratch[0..n-1]" may be written by this routine.  Sets "*result"
+  // to the data that was read (including if fewer than "n" bytes were
+  // successfully read).  May set "*result" to point at data in
+  // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
+  // "*result" is used.  If an error was encountered, returns a non-OK
+  // status.
+  //
+  // Safe for concurrent use by multiple threads.
+  // If Direct I/O enabled, offset, n, and scratch should be aligned properly.
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override;
+
+  // Readahead the file starting from offset by n bytes for caching.
+  IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& options,
+                    IODebugContext* dbg) override;
+
+  // Tries to get an unique ID for this file that will be the same each time
+  // the file is opened (and will stay the same while the file is open).
+  // Furthermore, it tries to make this ID at most "max_size" bytes. If such an
+  // ID can be created this function returns the length of the ID and places it
+  // in "id"; otherwise, this function returns 0, in which case "id"
+  // may not have been modified.
+  //
+  // This function guarantees, for IDs from a given environment, two unique ids
+  // cannot be made equal to each other by adding arbitrary bytes to one of
+  // them. That is, no unique ID is the prefix of another.
+  //
+  // This function guarantees that the returned ID will not be interpretable as
+  // a single varint.
+  //
+  // Note: these IDs are only valid for the duration of the process.
+  size_t GetUniqueId(char* id, size_t max_size) const override;
+
+  void Hint(AccessPattern pattern) override;
+
+  // Indicates the upper layers if the current RandomAccessFile implementation
+  // uses direct IO.
+  bool use_direct_io() const override;
+
+  // Use the returned alignment value to allocate
+  // aligned buffer for Direct I/O
+  size_t GetRequiredBufferAlignment() const override;
+
+  // Remove any kind of caching of data from the offset to offset+length
+  // of this file. If the length is 0, then it refers to the end of file.
+  // If the system is not caching the file contents, then this is a noop.
+  IOStatus InvalidateCache(size_t offset, size_t length) override;
+};
+
+// A file abstraction for sequential writing.  The implementation
+// must provide buffering since callers may append small fragments
+// at a time to the file.
+class EncryptedWritableFile : public FSWritableFile {
+ protected:
+  std::unique_ptr<FSWritableFile> file_;
+  std::unique_ptr<BlockAccessCipherStream> stream_;
+  size_t prefixLength_;
+
+ public:
+  // Default ctor. Prefix is assumed to be written already.
+  EncryptedWritableFile(std::unique_ptr<FSWritableFile>&& f,
+                        std::unique_ptr<BlockAccessCipherStream>&& s,
+                        size_t prefixLength)
+      : file_(std::move(f)),
+        stream_(std::move(s)),
+        prefixLength_(prefixLength) {}
+
+  using FSWritableFile::Append;
+  IOStatus Append(const Slice& data, const IOOptions& options,
+                  IODebugContext* dbg) override;
+
+  using FSWritableFile::PositionedAppend;
+  IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                            const IOOptions& options,
+                            IODebugContext* dbg) override;
+
+  // true if Sync() and Fsync() are safe to call concurrently with Append()
+  // and Flush().
+  bool IsSyncThreadSafe() const override;
+
+  // Indicates the upper layers if the current WritableFile implementation
+  // uses direct IO.
+  bool use_direct_io() const override;
+
+  // Use the returned alignment value to allocate
+  // aligned buffer for Direct I/O
+  size_t GetRequiredBufferAlignment() const override;
+
+  /*
+   * Get the size of valid data in the file.
+   */
+  uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override;
+
+  // Truncate is necessary to trim the file to the correct size
+  // before closing. It is not always possible to keep track of the file
+  // size due to whole pages writes. The behavior is undefined if called
+  // with other writes to follow.
+  IOStatus Truncate(uint64_t size, const IOOptions& options,
+                    IODebugContext* dbg) override;
+
+  // Remove any kind of caching of data from the offset to offset+length
+  // of this file. If the length is 0, then it refers to the end of file.
+  // If the system is not caching the file contents, then this is a noop.
+  // This call has no effect on dirty pages in the cache.
+  IOStatus InvalidateCache(size_t offset, size_t length) override;
+
+  // Sync a file range with disk.
+  // offset is the starting byte of the file range to be synchronized.
+  // nbytes specifies the length of the range to be synchronized.
+  // This asks the OS to initiate flushing the cached data to disk,
+  // without waiting for completion.
+  // Default implementation does nothing.
+  IOStatus RangeSync(uint64_t offset, uint64_t nbytes, const IOOptions& options,
+                     IODebugContext* dbg) override;
+
+  // PrepareWrite performs any necessary preparation for a write
+  // before the write actually occurs.  This allows for pre-allocation
+  // of space on devices where it can result in less file
+  // fragmentation and/or less waste from over-zealous filesystem
+  // pre-allocation.
+  void PrepareWrite(size_t offset, size_t len, const IOOptions& options,
+                    IODebugContext* dbg) override;
+
+  void SetPreallocationBlockSize(size_t size) override;
+
+  void GetPreallocationStatus(size_t* block_size,
+                              size_t* last_allocated_block) override;
+
+  // Pre-allocates space for a file.
+  IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& options,
+                    IODebugContext* dbg) override;
+
+  IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
+};
+
+// A file abstraction for random reading and writing.
+class EncryptedRandomRWFile : public FSRandomRWFile {
+ protected:
+  std::unique_ptr<FSRandomRWFile> file_;
+  std::unique_ptr<BlockAccessCipherStream> stream_;
+  size_t prefixLength_;
+
+ public:
+  EncryptedRandomRWFile(std::unique_ptr<FSRandomRWFile>&& f,
+                        std::unique_ptr<BlockAccessCipherStream>&& s,
+                        size_t prefixLength)
+      : file_(std::move(f)),
+        stream_(std::move(s)),
+        prefixLength_(prefixLength) {}
+
+  // Indicates if the class makes use of direct I/O
+  // If false you must pass aligned buffer to Write()
+  bool use_direct_io() const override;
+
+  // Use the returned alignment value to allocate
+  // aligned buffer for Direct I/O
+  size_t GetRequiredBufferAlignment() const override;
+
+  // Write bytes in `data` at  offset `offset`, Returns Status::OK() on success.
+  // Pass aligned buffer when use_direct_io() returns true.
+  IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options,
+                 IODebugContext* dbg) override;
+
+  // Read up to `n` bytes starting from offset `offset` and store them in
+  // result, provided `scratch` size should be at least `n`.
+  // Returns Status::OK() on success.
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override;
+
+  IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
+};
+
+class EncryptedFileSystem : public FileSystemWrapper {
+ public:
+  explicit EncryptedFileSystem(const std::shared_ptr<FileSystem>& base)
+      : FileSystemWrapper(base) {}
+  // Method to add a new cipher key for use by the EncryptionProvider.
+  // @param description  Descriptor for this key.
+  // @param cipher       The cryptographic key to use
+  // @param len          The length of the cipher key
+  // @param for_write If true, this cipher should be used for writing files.
+  //                  If false, this cipher should only be used for reading
+  //                  files
+  // @return OK if the cipher was successfully added to the provider, non-OK
+  // otherwise
+  virtual Status AddCipher(const std::string& descriptor, const char* cipher,
+                           size_t len, bool for_write) = 0;
+  static const char* kClassName() { return "EncryptedFileSystem"; }
+  bool IsInstanceOf(const std::string& name) const override {
+    if (name == kClassName()) {
+      return true;
+    } else {
+      return FileSystemWrapper::IsInstanceOf(name);
+    }
+  }
+};
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/experimental.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/experimental.h
new file mode 100644
index 0000000000..b59395255c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/experimental.h
@@ -0,0 +1,56 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/db.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace experimental {
+
+// Supported only for Leveled compaction
+Status SuggestCompactRange(DB* db, ColumnFamilyHandle* column_family,
+                           const Slice* begin, const Slice* end);
+Status SuggestCompactRange(DB* db, const Slice* begin, const Slice* end);
+
+// Move all L0 files to target_level skipping compaction.
+// This operation succeeds only if the files in L0 have disjoint ranges; this
+// is guaranteed to happen, for instance, if keys are inserted in sorted
+// order. Furthermore, all levels between 1 and target_level must be empty.
+// If any of the above condition is violated, InvalidArgument will be
+// returned.
+Status PromoteL0(DB* db, ColumnFamilyHandle* column_family,
+                 int target_level = 1);
+
+struct UpdateManifestForFilesStateOptions {
+  // When true, read current file temperatures from FileSystem and update in
+  // DB manifest when a temperature other than Unknown is reported and
+  // inconsistent with manifest.
+  bool update_temperatures = true;
+
+  // TODO: new_checksums: to update files to latest file checksum algorithm
+};
+
+// Utility for updating manifest of DB directory (not open) for current state
+// of files on filesystem. See UpdateManifestForFilesStateOptions.
+//
+// To minimize interference with ongoing DB operations, only the following
+// guarantee is provided, assuming no IO error encountered:
+// * Only files live in DB at start AND end of call to
+// UpdateManifestForFilesState() are guaranteed to be updated (as needed) in
+// manifest.
+//   * For example, new files after start of call to
+//   UpdateManifestForFilesState() might not be updated, but that is not
+//   typically required to achieve goal of manifest consistency/completeness
+//   (because current DB configuration would ensure new files get the desired
+//   consistent metadata).
+Status UpdateManifestForFilesState(
+    const DBOptions& db_opts, const std::string& db_name,
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    const UpdateManifestForFilesStateOptions& opts = {});
+
+}  // namespace experimental
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/file_checksum.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/file_checksum.h
new file mode 100644
index 0000000000..758bae4acf
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/file_checksum.h
@@ -0,0 +1,146 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2013 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <cassert>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// The unknown file checksum.
+constexpr char kUnknownFileChecksum[] = "";
+// The unknown sst file checksum function name.
+constexpr char kUnknownFileChecksumFuncName[] = "Unknown";
+// The standard DB file checksum function name.
+// This is the name of the checksum function returned by
+// GetFileChecksumGenCrc32cFactory();
+constexpr char kStandardDbFileChecksumFuncName[] = "FileChecksumCrc32c";
+
+struct FileChecksumGenContext {
+  std::string file_name;
+  // The name of the requested checksum generator.
+  // Checksum factories may use or ignore requested_checksum_func_name,
+  // and checksum factories written before this field was available are still
+  // compatible.
+  std::string requested_checksum_func_name;
+};
+
+// FileChecksumGenerator is the class to generates the checksum value
+// for each file when the file is written to the file system.
+// Implementations may assume that
+// * Finalize is called at most once during the life of the object
+// * All calls to Update come before Finalize
+// * All calls to GetChecksum come after Finalize
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class FileChecksumGenerator {
+ public:
+  virtual ~FileChecksumGenerator() {}
+
+  // Update the current result after process the data. For different checksum
+  // functions, the temporal results may be stored and used in Update to
+  // include the new data.
+  virtual void Update(const char* data, size_t n) = 0;
+
+  // Generate the final results if no further new data will be updated.
+  virtual void Finalize() = 0;
+
+  // Get the checksum. The result should not be the empty string and may
+  // include arbitrary bytes, including non-printable characters.
+  virtual std::string GetChecksum() const = 0;
+
+  // Returns a name that identifies the current file checksum function.
+  virtual const char* Name() const = 0;
+};
+
+// Create the FileChecksumGenerator object for each SST file.
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class FileChecksumGenFactory : public Customizable {
+ public:
+  ~FileChecksumGenFactory() override {}
+  static const char* Type() { return "FileChecksumGenFactory"; }
+  static Status CreateFromString(
+      const ConfigOptions& options, const std::string& value,
+      std::shared_ptr<FileChecksumGenFactory>* result);
+
+  // Create a new FileChecksumGenerator.
+  virtual std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
+      const FileChecksumGenContext& context) = 0;
+
+  // Return the name of this FileChecksumGenFactory.
+  const char* Name() const override = 0;
+};
+
+// FileChecksumList stores the checksum information of a list of files (e.g.,
+// SST files). The FileChecksumList can be used to store the checksum
+// information of all SST file getting  from the MANIFEST, which are
+// the checksum information of all valid SST file of a DB instance. It can
+// also be used to store the checksum information of a list of SST files to
+// be ingested.
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class FileChecksumList {
+ public:
+  virtual ~FileChecksumList() {}
+
+  // Clean the previously stored file checksum information.
+  virtual void reset() = 0;
+
+  // Get the number of checksums in the checksum list
+  virtual size_t size() const = 0;
+
+  // Return all the file checksum information being stored in a unordered_map.
+  // File_number is the key, the first part of the value is checksum value,
+  // and the second part of the value is checksum function name.
+  virtual Status GetAllFileChecksums(
+      std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums,
+      std::vector<std::string>* checksum_func_names) = 0;
+
+  // Given the file_number, it searches if the file checksum information is
+  // stored.
+  virtual Status SearchOneFileChecksum(uint64_t file_number,
+                                       std::string* checksum,
+                                       std::string* checksum_func_name) = 0;
+
+  // Insert the checksum information of one file to the FileChecksumList.
+  virtual Status InsertOneFileChecksum(
+      uint64_t file_number, const std::string& checksum,
+      const std::string& checksum_func_name) = 0;
+
+  // Remove the checksum information of one SST file.
+  virtual Status RemoveOneFileChecksum(uint64_t file_number) = 0;
+};
+
+// Create a new file checksum list.
+extern FileChecksumList* NewFileChecksumList();
+
+// Return a shared_ptr of the builtin Crc32c based file checksum generator
+// factory object, which can be shared to create the Crc32c based checksum
+// generator object.
+// Note: this implementation is compatible with many other crc32c checksum
+// implementations and uses big-endian encoding of the result, unlike most
+// other crc32c checksums in RocksDB, which alter the result with
+// crc32c::Mask and use little-endian encoding.
+extern std::shared_ptr<FileChecksumGenFactory>
+GetFileChecksumGenCrc32cFactory();
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/file_system.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/file_system.h
new file mode 100644
index 0000000000..ae59ef8009
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/file_system.h
@@ -0,0 +1,1849 @@
+// Copyright (c) 2019-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// A FileSystem is an interface used by the rocksdb implementation to access
+// storage functionality like the filesystem etc.  Callers
+// may wish to provide a custom FileSystem object when opening a database to
+// get fine gain control; e.g., to rate limit file system operations.
+//
+// All FileSystem implementations are safe for concurrent access from
+// multiple threads without any external synchronization.
+//
+// WARNING: Since this is a new interface, it is expected that there will be
+// some changes as storage systems are ported over.
+
+#pragma once
+
+#include <stdint.h>
+
+#include <chrono>
+#include <cstdarg>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/env.h"
+#include "rocksdb/io_status.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
+#include "rocksdb/thread_status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class FileLock;
+class FSDirectory;
+class FSRandomAccessFile;
+class FSRandomRWFile;
+class FSSequentialFile;
+class FSWritableFile;
+class Logger;
+class Slice;
+struct ImmutableDBOptions;
+struct MutableDBOptions;
+class RateLimiter;
+struct ConfigOptions;
+
+using AccessPattern = RandomAccessFile::AccessPattern;
+using FileAttributes = Env::FileAttributes;
+
+// DEPRECATED
+// Priority of an IO request. This is a hint and does not guarantee any
+// particular QoS.
+// IO_LOW - Typically background reads/writes such as compaction/flush
+// IO_HIGH - Typically user reads/synchronous WAL writes
+enum class IOPriority : uint8_t {
+  kIOLow,
+  kIOHigh,
+  kIOTotal,
+};
+
+// Type of the data begin read/written. It can be passed down as a flag
+// for the FileSystem implementation to optionally handle different types in
+// different ways
+enum class IOType : uint8_t {
+  kData,
+  kFilter,
+  kIndex,
+  kMetadata,
+  kWAL,
+  kManifest,
+  kLog,
+  kUnknown,
+  kInvalid,
+};
+
+// Per-request options that can be passed down to the FileSystem
+// implementation. These are hints and are not necessarily guaranteed to be
+// honored. More hints can be added here in the future to indicate things like
+// storage media (HDD/SSD) to be used, replication level etc.
+struct IOOptions {
+  // Timeout for the operation in microseconds
+  std::chrono::microseconds timeout;
+
+  // DEPRECATED
+  // Priority - high or low
+  IOPriority prio;
+
+  // Priority used to charge rate limiter configured in file system level (if
+  // any)
+  // Limitation: right now RocksDB internal does not consider this
+  // rate_limiter_priority
+  Env::IOPriority rate_limiter_priority;
+
+  // Type of data being read/written
+  IOType type;
+
+  // EXPERIMENTAL
+  // An option map that's opaque to RocksDB. It can be used to implement a
+  // custom contract between a FileSystem user and the provider. This is only
+  // useful in cases where a RocksDB user directly uses the FileSystem or file
+  // object for their own purposes, and wants to pass extra options to APIs
+  // such as NewRandomAccessFile and NewWritableFile.
+  std::unordered_map<std::string, std::string> property_bag;
+
+  // Force directory fsync, some file systems like btrfs may skip directory
+  // fsync, set this to force the fsync
+  bool force_dir_fsync;
+
+  // Can be used by underlying file systems to skip recursing through sub
+  // directories and list only files in GetChildren API.
+  bool do_not_recurse;
+
+  Env::IOActivity io_activity = Env::IOActivity::kUnknown;
+
+  IOOptions() : IOOptions(false) {}
+
+  explicit IOOptions(bool force_dir_fsync_)
+      : timeout(std::chrono::microseconds::zero()),
+        prio(IOPriority::kIOLow),
+        rate_limiter_priority(Env::IO_TOTAL),
+        type(IOType::kUnknown),
+        force_dir_fsync(force_dir_fsync_),
+        do_not_recurse(false) {}
+};
+
+struct DirFsyncOptions {
+  enum FsyncReason : uint8_t {
+    kNewFileSynced,
+    kFileRenamed,
+    kDirRenamed,
+    kFileDeleted,
+    kDefault,
+  } reason;
+
+  std::string renamed_new_name;  // for kFileRenamed
+  // add other options for other FsyncReason
+
+  DirFsyncOptions();
+
+  explicit DirFsyncOptions(std::string file_renamed_new_name);
+
+  explicit DirFsyncOptions(FsyncReason fsync_reason);
+};
+
+// File scope options that control how a file is opened/created and accessed
+// while its open. We may add more options here in the future such as
+// redundancy level, media to use etc.
+struct FileOptions : EnvOptions {
+  // Embedded IOOptions to control the parameters for any IOs that need
+  // to be issued for the file open/creation
+  IOOptions io_options;
+
+  // EXPERIMENTAL
+  // The feature is in development and is subject to change.
+  // When creating a new file, set the temperature of the file so that
+  // underlying file systems can put it with appropriate storage media and/or
+  // coding.
+  Temperature temperature = Temperature::kUnknown;
+
+  // The checksum type that is used to calculate the checksum value for
+  // handoff during file writes.
+  ChecksumType handoff_checksum_type;
+
+  FileOptions() : EnvOptions(), handoff_checksum_type(ChecksumType::kCRC32c) {}
+
+  FileOptions(const DBOptions& opts)
+      : EnvOptions(opts), handoff_checksum_type(ChecksumType::kCRC32c) {}
+
+  FileOptions(const EnvOptions& opts)
+      : EnvOptions(opts), handoff_checksum_type(ChecksumType::kCRC32c) {}
+
+  FileOptions(const FileOptions& opts)
+      : EnvOptions(opts),
+        io_options(opts.io_options),
+        temperature(opts.temperature),
+        handoff_checksum_type(opts.handoff_checksum_type) {}
+
+  FileOptions& operator=(const FileOptions&) = default;
+};
+
+// A structure to pass back some debugging information from the FileSystem
+// implementation to RocksDB in case of an IO error
+struct IODebugContext {
+  // file_path to be filled in by RocksDB in case of an error
+  std::string file_path;
+
+  // A map of counter names to values - set by the FileSystem implementation
+  std::map<std::string, uint64_t> counters;
+
+  // To be set by the FileSystem implementation
+  std::string msg;
+
+  // To be set by the underlying FileSystem implementation.
+  std::string request_id;
+
+  // In order to log required information in IO tracing for different
+  // operations, Each bit in trace_data stores which corresponding info from
+  // IODebugContext will be added in the trace. Foreg, if trace_data = 1, it
+  // means bit at position 0 is set so TraceData::kRequestID (request_id) will
+  // be logged in the trace record.
+  //
+  enum TraceData : char {
+    // The value of each enum represents the bitwise position for
+    // that information in trace_data which will be used by IOTracer for
+    // tracing. Make sure to add them sequentially.
+    kRequestID = 0,
+  };
+  uint64_t trace_data = 0;
+
+  IODebugContext() {}
+
+  void AddCounter(std::string& name, uint64_t value) {
+    counters.emplace(name, value);
+  }
+
+  // Called by underlying file system to set request_id and log request_id in
+  // IOTracing.
+  void SetRequestId(const std::string& _request_id) {
+    request_id = _request_id;
+    trace_data |= (1 << TraceData::kRequestID);
+  }
+
+  std::string ToString() {
+    std::ostringstream ss;
+    ss << file_path << ", ";
+    for (auto counter : counters) {
+      ss << counter.first << " = " << counter.second << ",";
+    }
+    ss << msg;
+    return ss.str();
+  }
+};
+
+// A function pointer type for custom destruction of void pointer passed to
+// ReadAsync API. RocksDB/caller is responsible for deleting the void pointer
+// allocated by FS in ReadAsync API.
+using IOHandleDeleter = std::function<void(void*)>;
+
+// The FileSystem, FSSequentialFile, FSRandomAccessFile, FSWritableFile,
+// FSRandomRWFileclass, and FSDIrectory classes define the interface between
+// RocksDB and storage systems, such as Posix filesystems,
+// remote filesystems etc.
+// The interface allows for fine grained control of individual IO operations,
+// such as setting a timeout, prioritization, hints on data placement,
+// different handling based on type of IO etc.
+// This is accomplished by passing an instance of IOOptions to every
+// API call that can potentially perform IO. Additionally, each such API is
+// passed a pointer to a IODebugContext structure that can be used by the
+// storage system to include troubleshooting information. The return values
+// of the APIs is of type IOStatus, which can indicate an error code/sub-code,
+// as well as metadata about the error such as its scope and whether its
+// retryable.
+// NewCompositeEnv can be used to create an Env with a custom FileSystem for
+// DBOptions::env.
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class FileSystem : public Customizable {
+ public:
+  FileSystem();
+
+  // No copying allowed
+  FileSystem(const FileSystem&) = delete;
+
+  virtual ~FileSystem();
+
+  static const char* Type() { return "FileSystem"; }
+  static const char* kDefaultName() { return "DefaultFileSystem"; }
+
+  // Loads the FileSystem specified by the input value into the result
+  // @see Customizable for a more detailed description of the parameters and
+  // return codes
+  // @param config_options Controls how the FileSystem is loaded
+  // @param value The name and optional properties describing the file system
+  //      to load.
+  // @param result On success, returns the loaded FileSystem
+  // @return OK if the FileSystem was successfully loaded.
+  // @return not-OK if the load failed.
+  static Status CreateFromString(const ConfigOptions& options,
+                                 const std::string& value,
+                                 std::shared_ptr<FileSystem>* result);
+
+  // Return a default FileSystem suitable for the current operating
+  // system.
+  static std::shared_ptr<FileSystem> Default();
+
+  // Handles the event when a new DB or a new ColumnFamily starts using the
+  // specified data paths.
+  //
+  // The data paths might be shared by different DBs or ColumnFamilies,
+  // so RegisterDbPaths might be called with the same data paths.
+  // For example, when CreateColumnFamily is called multiple times with the same
+  // data path, RegisterDbPaths will also be called with the same data path.
+  //
+  // If the return status is ok, then the paths must be correspondingly
+  // called in UnregisterDbPaths;
+  // otherwise this method should have no side effect, and UnregisterDbPaths
+  // do not need to be called for the paths.
+  //
+  // Different implementations may take different actions.
+  // By default, it's a no-op and returns Status::OK.
+  virtual Status RegisterDbPaths(const std::vector<std::string>& /*paths*/) {
+    return Status::OK();
+  }
+  // Handles the event a DB or a ColumnFamily stops using the specified data
+  // paths.
+  //
+  // It should be called corresponding to each successful RegisterDbPaths.
+  //
+  // Different implementations may take different actions.
+  // By default, it's a no-op and returns Status::OK.
+  virtual Status UnregisterDbPaths(const std::vector<std::string>& /*paths*/) {
+    return Status::OK();
+  }
+
+  // Create a brand new sequentially-readable file with the specified name.
+  // On success, stores a pointer to the new file in *result and returns OK.
+  // On failure stores nullptr in *result and returns non-OK.  If the file does
+  // not exist, returns a non-OK status.
+  //
+  // The returned file will only be accessed by one thread at a time.
+  virtual IOStatus NewSequentialFile(const std::string& fname,
+                                     const FileOptions& file_opts,
+                                     std::unique_ptr<FSSequentialFile>* result,
+                                     IODebugContext* dbg) = 0;
+
+  // Create a brand new random access read-only file with the
+  // specified name.  On success, stores a pointer to the new file in
+  // *result and returns OK.  On failure stores nullptr in *result and
+  // returns non-OK.  If the file does not exist, returns a non-OK
+  // status.
+  //
+  // The returned file may be concurrently accessed by multiple threads.
+  virtual IOStatus NewRandomAccessFile(
+      const std::string& fname, const FileOptions& file_opts,
+      std::unique_ptr<FSRandomAccessFile>* result, IODebugContext* dbg) = 0;
+  // These values match Linux definition
+  // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/fcntl.h#n56
+  enum WriteLifeTimeHint {
+    kWLTHNotSet = 0,  // No hint information set
+    kWLTHNone,        // No hints about write life time
+    kWLTHShort,       // Data written has a short life time
+    kWLTHMedium,      // Data written has a medium life time
+    kWLTHLong,        // Data written has a long life time
+    kWLTHExtreme,     // Data written has an extremely long life time
+  };
+
+  // Create an object that writes to a new file with the specified
+  // name.  Deletes any existing file with the same name and creates a
+  // new file.  On success, stores a pointer to the new file in
+  // *result and returns OK.  On failure stores nullptr in *result and
+  // returns non-OK.
+  //
+  // The returned file will only be accessed by one thread at a time.
+  virtual IOStatus NewWritableFile(const std::string& fname,
+                                   const FileOptions& file_opts,
+                                   std::unique_ptr<FSWritableFile>* result,
+                                   IODebugContext* dbg) = 0;
+
+  // Create an object that writes to a file with the specified name.
+  // `FSWritableFile::Append()`s will append after any existing content.  If the
+  // file does not already exist, creates it.
+  //
+  // On success, stores a pointer to the file in *result and returns OK.  On
+  // failure stores nullptr in *result and returns non-OK.
+  //
+  // The returned file will only be accessed by one thread at a time.
+  virtual IOStatus ReopenWritableFile(
+      const std::string& /*fname*/, const FileOptions& /*options*/,
+      std::unique_ptr<FSWritableFile>* /*result*/, IODebugContext* /*dbg*/) {
+    return IOStatus::NotSupported("ReopenWritableFile");
+  }
+
+  // Reuse an existing file by renaming it and opening it as writable.
+  virtual IOStatus ReuseWritableFile(const std::string& fname,
+                                     const std::string& old_fname,
+                                     const FileOptions& file_opts,
+                                     std::unique_ptr<FSWritableFile>* result,
+                                     IODebugContext* dbg);
+
+  // Open `fname` for random read and write, if file doesn't exist the file
+  // will be created.  On success, stores a pointer to the new file in
+  // *result and returns OK.  On failure returns non-OK.
+  //
+  // The returned file will only be accessed by one thread at a time.
+  virtual IOStatus NewRandomRWFile(const std::string& /*fname*/,
+                                   const FileOptions& /*options*/,
+                                   std::unique_ptr<FSRandomRWFile>* /*result*/,
+                                   IODebugContext* /*dbg*/) {
+    return IOStatus::NotSupported(
+        "RandomRWFile is not implemented in this FileSystem");
+  }
+
+  // Opens `fname` as a memory-mapped file for read and write (in-place updates
+  // only, i.e., no appends). On success, stores a raw buffer covering the whole
+  // file in `*result`. The file must exist prior to this call.
+  virtual IOStatus NewMemoryMappedFileBuffer(
+      const std::string& /*fname*/,
+      std::unique_ptr<MemoryMappedFileBuffer>* /*result*/) {
+    return IOStatus::NotSupported(
+        "MemoryMappedFileBuffer is not implemented in this FileSystem");
+  }
+
+  // Create an object that represents a directory. Will fail if directory
+  // doesn't exist. If the directory exists, it will open the directory
+  // and create a new Directory object.
+  //
+  // On success, stores a pointer to the new Directory in
+  // *result and returns OK. On failure stores nullptr in *result and
+  // returns non-OK.
+  virtual IOStatus NewDirectory(const std::string& name,
+                                const IOOptions& io_opts,
+                                std::unique_ptr<FSDirectory>* result,
+                                IODebugContext* dbg) = 0;
+
+  // Returns OK if the named file exists.
+  //         NotFound if the named file does not exist,
+  //                  the calling process does not have permission to determine
+  //                  whether this file exists, or if the path is invalid.
+  //         IOError if an IO Error was encountered
+  virtual IOStatus FileExists(const std::string& fname,
+                              const IOOptions& options,
+                              IODebugContext* dbg) = 0;
+
+  // Store in *result the names of the children of the specified directory.
+  // The names are relative to "dir".
+  // Original contents of *results are dropped.
+  // Returns OK if "dir" exists and "*result" contains its children.
+  //         NotFound if "dir" does not exist, the calling process does not have
+  //                  permission to access "dir", or if "dir" is invalid.
+  //         IOError if an IO Error was encountered
+  virtual IOStatus GetChildren(const std::string& dir, const IOOptions& options,
+                               std::vector<std::string>* result,
+                               IODebugContext* dbg) = 0;
+
+  // Store in *result the attributes of the children of the specified directory.
+  // In case the implementation lists the directory prior to iterating the files
+  // and files are concurrently deleted, the deleted files will be omitted from
+  // result.
+  // The name attributes are relative to "dir".
+  // Original contents of *results are dropped.
+  // Returns OK if "dir" exists and "*result" contains its children.
+  //         NotFound if "dir" does not exist, the calling process does not have
+  //                  permission to access "dir", or if "dir" is invalid.
+  //         IOError if an IO Error was encountered
+  virtual IOStatus GetChildrenFileAttributes(
+      const std::string& dir, const IOOptions& options,
+      std::vector<FileAttributes>* result, IODebugContext* dbg) {
+    assert(result != nullptr);
+    std::vector<std::string> child_fnames;
+    IOStatus s = GetChildren(dir, options, &child_fnames, dbg);
+    if (!s.ok()) {
+      return s;
+    }
+    result->resize(child_fnames.size());
+    size_t result_size = 0;
+    for (size_t i = 0; i < child_fnames.size(); ++i) {
+      const std::string path = dir + "/" + child_fnames[i];
+      if (!(s = GetFileSize(path, options, &(*result)[result_size].size_bytes,
+                            dbg))
+               .ok()) {
+        if (FileExists(path, options, dbg).IsNotFound()) {
+          // The file may have been deleted since we listed the directory
+          continue;
+        }
+        return s;
+      }
+      (*result)[result_size].name = std::move(child_fnames[i]);
+      result_size++;
+    }
+    result->resize(result_size);
+    return IOStatus::OK();
+  }
+
+// This seems to clash with a macro on Windows, so #undef it here
+#ifdef DeleteFile
+#undef DeleteFile
+#endif
+  // Delete the named file.
+  virtual IOStatus DeleteFile(const std::string& fname,
+                              const IOOptions& options,
+                              IODebugContext* dbg) = 0;
+
+  // Truncate the named file to the specified size.
+  virtual IOStatus Truncate(const std::string& /*fname*/, size_t /*size*/,
+                            const IOOptions& /*options*/,
+                            IODebugContext* /*dbg*/) {
+    return IOStatus::NotSupported(
+        "Truncate is not supported for this FileSystem");
+  }
+
+  // Create the specified directory. Returns error if directory exists.
+  virtual IOStatus CreateDir(const std::string& dirname,
+                             const IOOptions& options, IODebugContext* dbg) = 0;
+
+  // Creates directory if missing. Return Ok if it exists, or successful in
+  // Creating.
+  virtual IOStatus CreateDirIfMissing(const std::string& dirname,
+                                      const IOOptions& options,
+                                      IODebugContext* dbg) = 0;
+
+  // Delete the specified directory.
+  virtual IOStatus DeleteDir(const std::string& dirname,
+                             const IOOptions& options, IODebugContext* dbg) = 0;
+
+  // Store the size of fname in *file_size.
+  virtual IOStatus GetFileSize(const std::string& fname,
+                               const IOOptions& options, uint64_t* file_size,
+                               IODebugContext* dbg) = 0;
+
+  // Store the last modification time of fname in *file_mtime.
+  virtual IOStatus GetFileModificationTime(const std::string& fname,
+                                           const IOOptions& options,
+                                           uint64_t* file_mtime,
+                                           IODebugContext* dbg) = 0;
+  // Rename file src to target.
+  virtual IOStatus RenameFile(const std::string& src, const std::string& target,
+                              const IOOptions& options,
+                              IODebugContext* dbg) = 0;
+
+  // Hard Link file src to target.
+  virtual IOStatus LinkFile(const std::string& /*src*/,
+                            const std::string& /*target*/,
+                            const IOOptions& /*options*/,
+                            IODebugContext* /*dbg*/) {
+    return IOStatus::NotSupported(
+        "LinkFile is not supported for this FileSystem");
+  }
+
+  virtual IOStatus NumFileLinks(const std::string& /*fname*/,
+                                const IOOptions& /*options*/,
+                                uint64_t* /*count*/, IODebugContext* /*dbg*/) {
+    return IOStatus::NotSupported(
+        "Getting number of file links is not supported for this FileSystem");
+  }
+
+  virtual IOStatus AreFilesSame(const std::string& /*first*/,
+                                const std::string& /*second*/,
+                                const IOOptions& /*options*/, bool* /*res*/,
+                                IODebugContext* /*dbg*/) {
+    return IOStatus::NotSupported(
+        "AreFilesSame is not supported for this FileSystem");
+  }
+
+  // Lock the specified file.  Used to prevent concurrent access to
+  // the same db by multiple processes.  On failure, stores nullptr in
+  // *lock and returns non-OK.
+  //
+  // On success, stores a pointer to the object that represents the
+  // acquired lock in *lock and returns OK.  The caller should call
+  // UnlockFile(*lock) to release the lock.  If the process exits,
+  // the lock will be automatically released.
+  //
+  // If somebody else already holds the lock, finishes immediately
+  // with a failure.  I.e., this call does not wait for existing locks
+  // to go away.
+  //
+  // May create the named file if it does not already exist.
+  virtual IOStatus LockFile(const std::string& fname, const IOOptions& options,
+                            FileLock** lock, IODebugContext* dbg) = 0;
+
+  // Release the lock acquired by a previous successful call to LockFile.
+  // REQUIRES: lock was returned by a successful LockFile() call
+  // REQUIRES: lock has not already been unlocked.
+  virtual IOStatus UnlockFile(FileLock* lock, const IOOptions& options,
+                              IODebugContext* dbg) = 0;
+
+  // *path is set to a temporary directory that can be used for testing. It may
+  // or many not have just been created. The directory may or may not differ
+  // between runs of the same process, but subsequent calls will return the
+  // same directory.
+  virtual IOStatus GetTestDirectory(const IOOptions& options, std::string* path,
+                                    IODebugContext* dbg) = 0;
+
+  // Create and returns a default logger (an instance of EnvLogger) for storing
+  // informational messages. Derived classes can override to provide custom
+  // logger.
+  virtual IOStatus NewLogger(const std::string& fname, const IOOptions& io_opts,
+                             std::shared_ptr<Logger>* result,
+                             IODebugContext* dbg);
+
+  // Get full directory name for this db.
+  virtual IOStatus GetAbsolutePath(const std::string& db_path,
+                                   const IOOptions& options,
+                                   std::string* output_path,
+                                   IODebugContext* dbg) = 0;
+
+  // Sanitize the FileOptions. Typically called by a FileOptions/EnvOptions
+  // copy constructor
+  virtual void SanitizeFileOptions(FileOptions* /*opts*/) const {}
+
+  // OptimizeForLogRead will create a new FileOptions object that is a copy of
+  // the FileOptions in the parameters, but is optimized for reading log files.
+  virtual FileOptions OptimizeForLogRead(const FileOptions& file_options) const;
+
+  // OptimizeForManifestRead will create a new FileOptions object that is a copy
+  // of the FileOptions in the parameters, but is optimized for reading manifest
+  // files.
+  virtual FileOptions OptimizeForManifestRead(
+      const FileOptions& file_options) const;
+
+  // OptimizeForLogWrite will create a new FileOptions object that is a copy of
+  // the FileOptions in the parameters, but is optimized for writing log files.
+  // Default implementation returns the copy of the same object.
+  virtual FileOptions OptimizeForLogWrite(const FileOptions& file_options,
+                                          const DBOptions& db_options) const;
+
+  // OptimizeForManifestWrite will create a new FileOptions object that is a
+  // copy of the FileOptions in the parameters, but is optimized for writing
+  // manifest files. Default implementation returns the copy of the same
+  // object.
+  virtual FileOptions OptimizeForManifestWrite(
+      const FileOptions& file_options) const;
+
+  // OptimizeForCompactionTableWrite will create a new FileOptions object that
+  // is a copy of the FileOptions in the parameters, but is optimized for
+  // writing table files.
+  virtual FileOptions OptimizeForCompactionTableWrite(
+      const FileOptions& file_options,
+      const ImmutableDBOptions& immutable_ops) const;
+
+  // OptimizeForCompactionTableRead will create a new FileOptions object that
+  // is a copy of the FileOptions in the parameters, but is optimized for
+  // reading table files.
+  virtual FileOptions OptimizeForCompactionTableRead(
+      const FileOptions& file_options,
+      const ImmutableDBOptions& db_options) const;
+
+  // OptimizeForBlobFileRead will create a new FileOptions object that
+  // is a copy of the FileOptions in the parameters, but is optimized for
+  // reading blob files.
+  virtual FileOptions OptimizeForBlobFileRead(
+      const FileOptions& file_options,
+      const ImmutableDBOptions& db_options) const;
+
+// This seems to clash with a macro on Windows, so #undef it here
+#ifdef GetFreeSpace
+#undef GetFreeSpace
+#endif
+
+  // Get the amount of free disk space
+  virtual IOStatus GetFreeSpace(const std::string& /*path*/,
+                                const IOOptions& /*options*/,
+                                uint64_t* /*diskfree*/,
+                                IODebugContext* /*dbg*/) {
+    return IOStatus::NotSupported("GetFreeSpace");
+  }
+
+  virtual IOStatus IsDirectory(const std::string& /*path*/,
+                               const IOOptions& options, bool* is_dir,
+                               IODebugContext* /*dgb*/) = 0;
+
+  // EXPERIMENTAL
+  // Poll for completion of read IO requests. The Poll() method should call the
+  // callback functions to indicate completion of read requests.
+  // Underlying FS is required to support Poll API. Poll implementation should
+  // ensure that the callback gets called at IO completion, and return only
+  // after the callback has been called.
+  // If Poll returns partial results for any reads, its caller reponsibility to
+  // call Read or ReadAsync in order to get the remaining bytes.
+  //
+  // Default implementation is to return IOStatus::OK.
+
+  virtual IOStatus Poll(std::vector<void*>& /*io_handles*/,
+                        size_t /*min_completions*/) {
+    return IOStatus::OK();
+  }
+
+  // EXPERIMENTAL
+  // Abort the read IO requests submitted asynchronously. Underlying FS is
+  // required to support AbortIO API. AbortIO implementation should ensure that
+  // the all the read requests related to io_handles should be aborted and
+  // it shouldn't call the callback for these io_handles.
+  //
+  // Default implementation is to return IOStatus::OK.
+  virtual IOStatus AbortIO(std::vector<void*>& /*io_handles*/) {
+    return IOStatus::OK();
+  }
+
+  // Indicates to upper layers whether the FileSystem supports/uses async IO
+  // or not
+  virtual bool use_async_io() { return true; }
+
+  // If you're adding methods here, remember to add them to EnvWrapper too.
+
+ private:
+  void operator=(const FileSystem&);
+};
+
+// A file abstraction for reading sequentially through a file
+class FSSequentialFile {
+ public:
+  FSSequentialFile() {}
+
+  virtual ~FSSequentialFile() {}
+
+  // Read up to "n" bytes from the file.  "scratch[0..n-1]" may be
+  // written by this routine.  Sets "*result" to the data that was
+  // read (including if fewer than "n" bytes were successfully read).
+  // May set "*result" to point at data in "scratch[0..n-1]", so
+  // "scratch[0..n-1]" must be live when "*result" is used.
+  // If an error was encountered, returns a non-OK status.
+  //
+  // After call, result->size() < n only if end of file has been
+  // reached (or non-OK status). Read might fail if called again after
+  // first result->size() < n.
+  //
+  // REQUIRES: External synchronization
+  virtual IOStatus Read(size_t n, const IOOptions& options, Slice* result,
+                        char* scratch, IODebugContext* dbg) = 0;
+
+  // Skip "n" bytes from the file. This is guaranteed to be no
+  // slower that reading the same data, but may be faster.
+  //
+  // If end of file is reached, skipping will stop at the end of the
+  // file, and Skip will return OK.
+  //
+  // REQUIRES: External synchronization
+  virtual IOStatus Skip(uint64_t n) = 0;
+
+  // Indicates the upper layers if the current SequentialFile implementation
+  // uses direct IO.
+  virtual bool use_direct_io() const { return false; }
+
+  // Use the returned alignment value to allocate
+  // aligned buffer for Direct I/O
+  virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; }
+
+  // Remove any kind of caching of data from the offset to offset+length
+  // of this file. If the length is 0, then it refers to the end of file.
+  // If the system is not caching the file contents, then this is a noop.
+  virtual IOStatus InvalidateCache(size_t /*offset*/, size_t /*length*/) {
+    return IOStatus::NotSupported("InvalidateCache not supported.");
+  }
+
+  // Positioned Read for direct I/O
+  // If Direct I/O enabled, offset, n, and scratch should be properly aligned
+  virtual IOStatus PositionedRead(uint64_t /*offset*/, size_t /*n*/,
+                                  const IOOptions& /*options*/,
+                                  Slice* /*result*/, char* /*scratch*/,
+                                  IODebugContext* /*dbg*/) {
+    return IOStatus::NotSupported("PositionedRead");
+  }
+
+  // EXPERIMENTAL
+  // When available, returns the actual temperature for the file. This is
+  // useful in case some outside process moves a file from one tier to another,
+  // though the temperature is generally expected not to change while a file is
+  // open.
+  virtual Temperature GetTemperature() const { return Temperature::kUnknown; }
+
+  // If you're adding methods here, remember to add them to
+  // SequentialFileWrapper too.
+};
+
+// A read IO request structure for use in MultiRead and asynchronous Read APIs.
+struct FSReadRequest {
+  // Input parameter that represents the file offset in bytes.
+  uint64_t offset;
+
+  // Input parameter that represents the length to read in bytes. `result` only
+  // returns fewer bytes if end of file is hit (or `status` is not OK).
+  size_t len;
+
+  // A buffer that MultiRead() can optionally place data in. It can
+  // ignore this and allocate its own buffer.
+  // The lifecycle of scratch will be until IO is completed.
+  //
+  // In case of asynchronous reads, its an output parameter and it will be
+  // maintained until callback has been called. Scratch is allocated by RocksDB
+  // and will be passed to underlying FileSystem.
+  char* scratch;
+
+  // Output parameter set by MultiRead() to point to the data buffer, and
+  // the number of valid bytes
+  //
+  // In case of asynchronous reads, this output parameter is set by Async Read
+  // APIs to point to the data buffer, and
+  // the number of valid bytes.
+  // Slice result should point to scratch i.e the data should
+  // always be read into scratch.
+  Slice result;
+
+  // Output parameter set by underlying FileSystem that represents status of
+  // read request.
+  IOStatus status;
+};
+
+// A file abstraction for randomly reading the contents of a file.
+class FSRandomAccessFile {
+ public:
+  FSRandomAccessFile() {}
+
+  virtual ~FSRandomAccessFile() {}
+
+  // Read up to "n" bytes from the file starting at "offset".
+  // "scratch[0..n-1]" may be written by this routine.  Sets "*result"
+  // to the data that was read (including if fewer than "n" bytes were
+  // successfully read).  May set "*result" to point at data in
+  // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
+  // "*result" is used.  If an error was encountered, returns a non-OK
+  // status.
+  //
+  // After call, result->size() < n only if end of file has been
+  // reached (or non-OK status). Read might fail if called again after
+  // first result->size() < n.
+  //
+  // Safe for concurrent use by multiple threads.
+  // If Direct I/O enabled, offset, n, and scratch should be aligned properly.
+  virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                        Slice* result, char* scratch,
+                        IODebugContext* dbg) const = 0;
+
+  // Readahead the file starting from offset by n bytes for caching.
+  // If it's not implemented (default: `NotSupported`), RocksDB will create
+  // internal prefetch buffer to improve read performance.
+  virtual IOStatus Prefetch(uint64_t /*offset*/, size_t /*n*/,
+                            const IOOptions& /*options*/,
+                            IODebugContext* /*dbg*/) {
+    return IOStatus::NotSupported("Prefetch");
+  }
+
+  // Read a bunch of blocks as described by reqs. The blocks can
+  // optionally be read in parallel. This is a synchronous call, i.e it
+  // should return after all reads have completed. The reads will be
+  // non-overlapping but can be in any order. If the function return Status
+  // is not ok, status of individual requests will be ignored and return
+  // status will be assumed for all read requests. The function return status
+  // is only meant for errors that occur before processing individual read
+  // requests.
+  virtual IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
+                             const IOOptions& options, IODebugContext* dbg) {
+    assert(reqs != nullptr);
+    for (size_t i = 0; i < num_reqs; ++i) {
+      FSReadRequest& req = reqs[i];
+      req.status =
+          Read(req.offset, req.len, options, &req.result, req.scratch, dbg);
+    }
+    return IOStatus::OK();
+  }
+
+  // Tries to get an unique ID for this file that will be the same each time
+  // the file is opened (and will stay the same while the file is open).
+  // Furthermore, it tries to make this ID at most "max_size" bytes. If such an
+  // ID can be created this function returns the length of the ID and places it
+  // in "id"; otherwise, this function returns 0, in which case "id"
+  // may not have been modified.
+  //
+  // This function guarantees, for IDs from a given environment, two unique ids
+  // cannot be made equal to each other by adding arbitrary bytes to one of
+  // them. That is, no unique ID is the prefix of another.
+  //
+  // This function guarantees that the returned ID will not be interpretable as
+  // a single varint.
+  //
+  // Note: these IDs are only valid for the duration of the process.
+  virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const {
+    return 0;  // Default implementation to prevent issues with backwards
+               // compatibility.
+  };
+
+  enum AccessPattern { kNormal, kRandom, kSequential, kWillNeed, kWontNeed };
+
+  virtual void Hint(AccessPattern /*pattern*/) {}
+
+  // Indicates the upper layers if the current RandomAccessFile implementation
+  // uses direct IO.
+  virtual bool use_direct_io() const { return false; }
+
+  // Use the returned alignment value to allocate
+  // aligned buffer for Direct I/O
+  virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; }
+
+  // Remove any kind of caching of data from the offset to offset+length
+  // of this file. If the length is 0, then it refers to the end of file.
+  // If the system is not caching the file contents, then this is a noop.
+  virtual IOStatus InvalidateCache(size_t /*offset*/, size_t /*length*/) {
+    return IOStatus::NotSupported("InvalidateCache not supported.");
+  }
+
+  // EXPERIMENTAL
+  // This API reads the requested data in FSReadRequest asynchronously. This is
+  // a asynchronous call, i.e it should return after submitting the request.
+  //
+  // When the read request is completed, callback function specified in cb
+  // should be called with arguments cb_arg and the result populated in
+  // FSReadRequest with result and status fileds updated by FileSystem.
+  // cb_arg should be used by the callback to track the original request
+  // submitted.
+  //
+  // This API should also populate io_handle which should be used by
+  // underlying FileSystem to store the context in order to distinguish the read
+  // requests at their side and provide the custom deletion function in del_fn.
+  // RocksDB guarantees that the del_fn for io_handle will be called after
+  // receiving the callback. Furthermore, RocksDB guarantees that if it calls
+  // the Poll API for this io_handle, del_fn will be called after the Poll
+  // returns. RocksDB is responsible for managing the lifetime of io_handle.
+  //
+  // req contains the request offset and size passed as input parameter of read
+  // request and result and status fields are output parameter set by underlying
+  // FileSystem. The data should always be read into scratch field.
+  //
+  // Default implementation is to read the data synchronously.
+  virtual IOStatus ReadAsync(
+      FSReadRequest& req, const IOOptions& opts,
+      std::function<void(const FSReadRequest&, void*)> cb, void* cb_arg,
+      void** /*io_handle*/, IOHandleDeleter* /*del_fn*/, IODebugContext* dbg) {
+    req.status =
+        Read(req.offset, req.len, opts, &(req.result), req.scratch, dbg);
+    cb(req, cb_arg);
+    return IOStatus::OK();
+  }
+
+  // EXPERIMENTAL
+  // When available, returns the actual temperature for the file. This is
+  // useful in case some outside process moves a file from one tier to another,
+  // though the temperature is generally expected not to change while a file is
+  // open.
+  virtual Temperature GetTemperature() const { return Temperature::kUnknown; }
+
+  // If you're adding methods here, remember to add them to
+  // RandomAccessFileWrapper too.
+};
+
+// A data structure brings the data verification information, which is
+// used together with data being written to a file.
+struct DataVerificationInfo {
+  // checksum of the data being written.
+  Slice checksum;
+};
+
+// A file abstraction for sequential writing.  The implementation
+// must provide buffering since callers may append small fragments
+// at a time to the file.
+class FSWritableFile {
+ public:
+  FSWritableFile()
+      : last_preallocated_block_(0),
+        preallocation_block_size_(0),
+        io_priority_(Env::IO_TOTAL),
+        write_hint_(Env::WLTH_NOT_SET),
+        strict_bytes_per_sync_(false) {}
+
+  explicit FSWritableFile(const FileOptions& options)
+      : last_preallocated_block_(0),
+        preallocation_block_size_(0),
+        io_priority_(Env::IO_TOTAL),
+        write_hint_(Env::WLTH_NOT_SET),
+        strict_bytes_per_sync_(options.strict_bytes_per_sync) {}
+
+  virtual ~FSWritableFile() {}
+
+  // Append data to the end of the file
+  // Note: A WritableFile object must support either Append or
+  // PositionedAppend, so the users cannot mix the two.
+  virtual IOStatus Append(const Slice& data, const IOOptions& options,
+                          IODebugContext* dbg) = 0;
+
+  // Append data with verification information.
+  // Note that this API change is experimental and it might be changed in
+  // the future. Currently, RocksDB only generates crc32c based checksum for
+  // the file writes when the checksum handoff option is set.
+  // Expected behavior: if the handoff_checksum_type in FileOptions (currently,
+  // ChecksumType::kCRC32C is set as default) is not supported by this
+  // FSWritableFile, the information in DataVerificationInfo can be ignored
+  // (i.e. does not perform checksum verification).
+  virtual IOStatus Append(const Slice& data, const IOOptions& options,
+                          const DataVerificationInfo& /* verification_info */,
+                          IODebugContext* dbg) {
+    return Append(data, options, dbg);
+  }
+
+  // PositionedAppend data to the specified offset. The new EOF after append
+  // must be larger than the previous EOF. This is to be used when writes are
+  // not backed by OS buffers and hence has to always start from the start of
+  // the sector. The implementation thus needs to also rewrite the last
+  // partial sector.
+  // Note: PositionAppend does not guarantee moving the file offset after the
+  // write. A WritableFile object must support either Append or
+  // PositionedAppend, so the users cannot mix the two.
+  //
+  // PositionedAppend() can only happen on the page/sector boundaries. For that
+  // reason, if the last write was an incomplete sector we still need to rewind
+  // back to the nearest sector/page and rewrite the portion of it with whatever
+  // we need to add. We need to keep where we stop writing.
+  //
+  // PositionedAppend() can only write whole sectors. For that reason we have to
+  // pad with zeros for the last write and trim the file when closing according
+  // to the position we keep in the previous step.
+  //
+  // PositionedAppend() requires aligned buffer to be passed in. The alignment
+  // required is queried via GetRequiredBufferAlignment()
+  virtual IOStatus PositionedAppend(const Slice& /* data */,
+                                    uint64_t /* offset */,
+                                    const IOOptions& /*options*/,
+                                    IODebugContext* /*dbg*/) {
+    return IOStatus::NotSupported("PositionedAppend");
+  }
+
+  // PositionedAppend data with verification information.
+  // Note that this API change is experimental and it might be changed in
+  // the future. Currently, RocksDB only generates crc32c based checksum for
+  // the file writes when the checksum handoff option is set.
+  // Expected behavior: if the handoff_checksum_type in FileOptions (currently,
+  // ChecksumType::kCRC32C is set as default) is not supported by this
+  // FSWritableFile, the information in DataVerificationInfo can be ignored
+  // (i.e. does not perform checksum verification).
+  virtual IOStatus PositionedAppend(
+      const Slice& /* data */, uint64_t /* offset */,
+      const IOOptions& /*options*/,
+      const DataVerificationInfo& /* verification_info */,
+      IODebugContext* /*dbg*/) {
+    return IOStatus::NotSupported("PositionedAppend");
+  }
+
+  // Truncate is necessary to trim the file to the correct size
+  // before closing. It is not always possible to keep track of the file
+  // size due to whole pages writes. The behavior is undefined if called
+  // with other writes to follow.
+  virtual IOStatus Truncate(uint64_t /*size*/, const IOOptions& /*options*/,
+                            IODebugContext* /*dbg*/) {
+    return IOStatus::OK();
+  }
+  virtual IOStatus Close(const IOOptions& /*options*/,
+                         IODebugContext* /*dbg*/) = 0;
+
+  virtual IOStatus Flush(const IOOptions& options, IODebugContext* dbg) = 0;
+  virtual IOStatus Sync(const IOOptions& options,
+                        IODebugContext* dbg) = 0;  // sync data
+
+  /*
+   * Sync data and/or metadata as well.
+   * By default, sync only data.
+   * Override this method for environments where we need to sync
+   * metadata as well.
+   */
+  virtual IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) {
+    return Sync(options, dbg);
+  }
+
+  // true if Sync() and Fsync() are safe to call concurrently with Append()
+  // and Flush().
+  virtual bool IsSyncThreadSafe() const { return false; }
+
+  // Indicates the upper layers if the current WritableFile implementation
+  // uses direct IO.
+  virtual bool use_direct_io() const { return false; }
+
+  // Use the returned alignment value to allocate
+  // aligned buffer for Direct I/O
+  virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; }
+
+  virtual void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) {
+    write_hint_ = hint;
+  }
+
+  /*
+   * If rate limiting is enabled, change the file-granularity priority used in
+   * rate-limiting writes.
+   *
+   * In the presence of finer-granularity priority such as
+   * `WriteOptions::rate_limiter_priority`, this file-granularity priority may
+   * be overridden by a non-Env::IO_TOTAL finer-granularity priority and used as
+   * a fallback for Env::IO_TOTAL finer-granularity priority.
+   *
+   * If rate limiting is not enabled, this call has no effect.
+   */
+  virtual void SetIOPriority(Env::IOPriority pri) { io_priority_ = pri; }
+
+  virtual Env::IOPriority GetIOPriority() { return io_priority_; }
+
+  virtual Env::WriteLifeTimeHint GetWriteLifeTimeHint() { return write_hint_; }
+  /*
+   * Get the size of valid data in the file.
+   */
+  virtual uint64_t GetFileSize(const IOOptions& /*options*/,
+                               IODebugContext* /*dbg*/) {
+    return 0;
+  }
+
+  /*
+   * Get and set the default pre-allocation block size for writes to
+   * this file.  If non-zero, then Allocate will be used to extend the
+   * underlying storage of a file (generally via fallocate) if the Env
+   * instance supports it.
+   */
+  virtual void SetPreallocationBlockSize(size_t size) {
+    preallocation_block_size_ = size;
+  }
+
+  virtual void GetPreallocationStatus(size_t* block_size,
+                                      size_t* last_allocated_block) {
+    *last_allocated_block = last_preallocated_block_;
+    *block_size = preallocation_block_size_;
+  }
+
+  // For documentation, refer to RandomAccessFile::GetUniqueId()
+  virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const {
+    return 0;  // Default implementation to prevent issues with backwards
+  }
+
+  // Remove any kind of caching of data from the offset to offset+length
+  // of this file. If the length is 0, then it refers to the end of file.
+  // If the system is not caching the file contents, then this is a noop.
+  // This call has no effect on dirty pages in the cache.
+  virtual IOStatus InvalidateCache(size_t /*offset*/, size_t /*length*/) {
+    return IOStatus::NotSupported("InvalidateCache not supported.");
+  }
+
+  // Sync a file range with disk.
+  // offset is the starting byte of the file range to be synchronized.
+  // nbytes specifies the length of the range to be synchronized.
+  // This asks the OS to initiate flushing the cached data to disk,
+  // without waiting for completion.
+  // Default implementation does nothing.
+  virtual IOStatus RangeSync(uint64_t /*offset*/, uint64_t /*nbytes*/,
+                             const IOOptions& options, IODebugContext* dbg) {
+    if (strict_bytes_per_sync_) {
+      return Sync(options, dbg);
+    }
+    return IOStatus::OK();
+  }
+
+  // PrepareWrite performs any necessary preparation for a write
+  // before the write actually occurs.  This allows for pre-allocation
+  // of space on devices where it can result in less file
+  // fragmentation and/or less waste from over-zealous filesystem
+  // pre-allocation.
+  virtual void PrepareWrite(size_t offset, size_t len, const IOOptions& options,
+                            IODebugContext* dbg) {
+    if (preallocation_block_size_ == 0) {
+      return;
+    }
+    // If this write would cross one or more preallocation blocks,
+    // determine what the last preallocation block necessary to
+    // cover this write would be and Allocate to that point.
+    const auto block_size = preallocation_block_size_;
+    size_t new_last_preallocated_block =
+        (offset + len + block_size - 1) / block_size;
+    if (new_last_preallocated_block > last_preallocated_block_) {
+      size_t num_spanned_blocks =
+          new_last_preallocated_block - last_preallocated_block_;
+      Allocate(block_size * last_preallocated_block_,
+               block_size * num_spanned_blocks, options, dbg)
+          .PermitUncheckedError();
+      last_preallocated_block_ = new_last_preallocated_block;
+    }
+  }
+
+  // Pre-allocates space for a file.
+  virtual IOStatus Allocate(uint64_t /*offset*/, uint64_t /*len*/,
+                            const IOOptions& /*options*/,
+                            IODebugContext* /*dbg*/) {
+    return IOStatus::OK();
+  }
+
+  // If you're adding methods here, remember to add them to
+  // WritableFileWrapper too.
+
+ protected:
+  size_t preallocation_block_size() { return preallocation_block_size_; }
+
+ private:
+  size_t last_preallocated_block_;
+  size_t preallocation_block_size_;
+  // No copying allowed
+  FSWritableFile(const FSWritableFile&);
+  void operator=(const FSWritableFile&);
+
+ protected:
+  Env::IOPriority io_priority_;
+  Env::WriteLifeTimeHint write_hint_;
+  const bool strict_bytes_per_sync_;
+};
+
+// A file abstraction for random reading and writing.
+class FSRandomRWFile {
+ public:
+  FSRandomRWFile() {}
+
+  virtual ~FSRandomRWFile() {}
+
+  // Indicates if the class makes use of direct I/O
+  // If false you must pass aligned buffer to Write()
+  virtual bool use_direct_io() const { return false; }
+
+  // Use the returned alignment value to allocate
+  // aligned buffer for Direct I/O
+  virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; }
+
+  // Write bytes in `data` at  offset `offset`, Returns Status::OK() on success.
+  // Pass aligned buffer when use_direct_io() returns true.
+  virtual IOStatus Write(uint64_t offset, const Slice& data,
+                         const IOOptions& options, IODebugContext* dbg) = 0;
+
+  // Read up to `n` bytes starting from offset `offset` and store them in
+  // result, provided `scratch` size should be at least `n`.
+  //
+  // After call, result->size() < n only if end of file has been
+  // reached (or non-OK status). Read might fail if called again after
+  // first result->size() < n.
+  //
+  // Returns Status::OK() on success.
+  virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                        Slice* result, char* scratch,
+                        IODebugContext* dbg) const = 0;
+
+  virtual IOStatus Flush(const IOOptions& options, IODebugContext* dbg) = 0;
+
+  virtual IOStatus Sync(const IOOptions& options, IODebugContext* dbg) = 0;
+
+  virtual IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) {
+    return Sync(options, dbg);
+  }
+
+  virtual IOStatus Close(const IOOptions& options, IODebugContext* dbg) = 0;
+
+  // EXPERIMENTAL
+  // When available, returns the actual temperature for the file. This is
+  // useful in case some outside process moves a file from one tier to another,
+  // though the temperature is generally expected not to change while a file is
+  // open.
+  virtual Temperature GetTemperature() const { return Temperature::kUnknown; }
+
+  // If you're adding methods here, remember to add them to
+  // RandomRWFileWrapper too.
+
+  // No copying allowed
+  FSRandomRWFile(const RandomRWFile&) = delete;
+  FSRandomRWFile& operator=(const RandomRWFile&) = delete;
+};
+
+// MemoryMappedFileBuffer object represents a memory-mapped file's raw buffer.
+// Subclasses should release the mapping upon destruction.
+class FSMemoryMappedFileBuffer {
+ public:
+  FSMemoryMappedFileBuffer(void* _base, size_t _length)
+      : base_(_base), length_(_length) {}
+
+  virtual ~FSMemoryMappedFileBuffer() = 0;
+
+  // We do not want to unmap this twice. We can make this class
+  // movable if desired, however, since
+  FSMemoryMappedFileBuffer(const FSMemoryMappedFileBuffer&) = delete;
+  FSMemoryMappedFileBuffer& operator=(const FSMemoryMappedFileBuffer&) = delete;
+
+  void* GetBase() const { return base_; }
+  size_t GetLen() const { return length_; }
+
+ protected:
+  void* base_;
+  const size_t length_;
+};
+
+// Directory object represents collection of files and implements
+// filesystem operations that can be executed on directories.
+class FSDirectory {
+ public:
+  virtual ~FSDirectory() {}
+  // Fsync directory. Can be called concurrently from multiple threads.
+  virtual IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) = 0;
+
+  // FsyncWithDirOptions after renaming a file. Depends on the filesystem, it
+  // may fsync directory or just the renaming file (e.g. btrfs). By default, it
+  // just calls directory fsync.
+  virtual IOStatus FsyncWithDirOptions(
+      const IOOptions& options, IODebugContext* dbg,
+      const DirFsyncOptions& /*dir_fsync_options*/) {
+    return Fsync(options, dbg);
+  }
+
+  // Close directory
+  virtual IOStatus Close(const IOOptions& /*options*/,
+                         IODebugContext* /*dbg*/) {
+    return IOStatus::NotSupported("Close");
+  }
+
+  virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const {
+    return 0;
+  }
+
+  // If you're adding methods here, remember to add them to
+  // DirectoryWrapper too.
+};
+
+// Below are helpers for wrapping most of the classes in this file.
+// They forward all calls to another instance of the class.
+// Useful when wrapping the default implementations.
+// Typical usage is to inherit your wrapper from *Wrapper, e.g.:
+//
+// class MySequentialFileWrapper : public
+// ROCKSDB_NAMESPACE::FSSequentialFileWrapper {
+//  public:
+//   MySequentialFileWrapper(ROCKSDB_NAMESPACE::FSSequentialFile* target):
+//     ROCKSDB_NAMESPACE::FSSequentialFileWrapper(target) {}
+//   Status Read(size_t n, FileSystem::IOOptions& options, Slice* result,
+//               char* scratch, FileSystem::IODebugContext* dbg) override {
+//     cout << "Doing a read of size " << n << "!" << endl;
+//     return ROCKSDB_NAMESPACE::FSSequentialFileWrapper::Read(n, options,
+//     result,
+//                                                 scratch, dbg);
+//   }
+//   // All other methods are forwarded to target_ automatically.
+// };
+//
+// This is often more convenient than inheriting the class directly because
+// (a) Don't have to override and forward all methods - the Wrapper will
+//     forward everything you're not explicitly overriding.
+// (b) Don't need to update the wrapper when more methods are added to the
+//     rocksdb class. Unless you actually want to override the behavior.
+//     (And unless rocksdb people forgot to update the *Wrapper class.)
+
+// An implementation of Env that forwards all calls to another Env.
+// May be useful to clients who wish to override just part of the
+// functionality of another Env.
+class FileSystemWrapper : public FileSystem {
+ public:
+  // Initialize an EnvWrapper that delegates all calls to *t
+  explicit FileSystemWrapper(const std::shared_ptr<FileSystem>& t);
+  ~FileSystemWrapper() override {}
+
+  // Return the target to which this Env forwards all calls
+  FileSystem* target() const { return target_.get(); }
+
+  // The following text is boilerplate that forwards all methods to target()
+  IOStatus NewSequentialFile(const std::string& f, const FileOptions& file_opts,
+                             std::unique_ptr<FSSequentialFile>* r,
+                             IODebugContext* dbg) override {
+    return target_->NewSequentialFile(f, file_opts, r, dbg);
+  }
+  IOStatus NewRandomAccessFile(const std::string& f,
+                               const FileOptions& file_opts,
+                               std::unique_ptr<FSRandomAccessFile>* r,
+                               IODebugContext* dbg) override {
+    return target_->NewRandomAccessFile(f, file_opts, r, dbg);
+  }
+  IOStatus NewWritableFile(const std::string& f, const FileOptions& file_opts,
+                           std::unique_ptr<FSWritableFile>* r,
+                           IODebugContext* dbg) override {
+    return target_->NewWritableFile(f, file_opts, r, dbg);
+  }
+  IOStatus ReopenWritableFile(const std::string& fname,
+                              const FileOptions& file_opts,
+                              std::unique_ptr<FSWritableFile>* result,
+                              IODebugContext* dbg) override {
+    return target_->ReopenWritableFile(fname, file_opts, result, dbg);
+  }
+  IOStatus ReuseWritableFile(const std::string& fname,
+                             const std::string& old_fname,
+                             const FileOptions& file_opts,
+                             std::unique_ptr<FSWritableFile>* r,
+                             IODebugContext* dbg) override {
+    return target_->ReuseWritableFile(fname, old_fname, file_opts, r, dbg);
+  }
+  IOStatus NewRandomRWFile(const std::string& fname,
+                           const FileOptions& file_opts,
+                           std::unique_ptr<FSRandomRWFile>* result,
+                           IODebugContext* dbg) override {
+    return target_->NewRandomRWFile(fname, file_opts, result, dbg);
+  }
+  IOStatus NewMemoryMappedFileBuffer(
+      const std::string& fname,
+      std::unique_ptr<MemoryMappedFileBuffer>* result) override {
+    return target_->NewMemoryMappedFileBuffer(fname, result);
+  }
+  IOStatus NewDirectory(const std::string& name, const IOOptions& io_opts,
+                        std::unique_ptr<FSDirectory>* result,
+                        IODebugContext* dbg) override {
+    return target_->NewDirectory(name, io_opts, result, dbg);
+  }
+  IOStatus FileExists(const std::string& f, const IOOptions& io_opts,
+                      IODebugContext* dbg) override {
+    return target_->FileExists(f, io_opts, dbg);
+  }
+  IOStatus GetChildren(const std::string& dir, const IOOptions& io_opts,
+                       std::vector<std::string>* r,
+                       IODebugContext* dbg) override {
+    return target_->GetChildren(dir, io_opts, r, dbg);
+  }
+  IOStatus GetChildrenFileAttributes(const std::string& dir,
+                                     const IOOptions& options,
+                                     std::vector<FileAttributes>* result,
+                                     IODebugContext* dbg) override {
+    return target_->GetChildrenFileAttributes(dir, options, result, dbg);
+  }
+  IOStatus DeleteFile(const std::string& f, const IOOptions& options,
+                      IODebugContext* dbg) override {
+    return target_->DeleteFile(f, options, dbg);
+  }
+  IOStatus Truncate(const std::string& fname, size_t size,
+                    const IOOptions& options, IODebugContext* dbg) override {
+    return target_->Truncate(fname, size, options, dbg);
+  }
+  IOStatus CreateDir(const std::string& d, const IOOptions& options,
+                     IODebugContext* dbg) override {
+    return target_->CreateDir(d, options, dbg);
+  }
+  IOStatus CreateDirIfMissing(const std::string& d, const IOOptions& options,
+                              IODebugContext* dbg) override {
+    return target_->CreateDirIfMissing(d, options, dbg);
+  }
+  IOStatus DeleteDir(const std::string& d, const IOOptions& options,
+                     IODebugContext* dbg) override {
+    return target_->DeleteDir(d, options, dbg);
+  }
+  IOStatus GetFileSize(const std::string& f, const IOOptions& options,
+                       uint64_t* s, IODebugContext* dbg) override {
+    return target_->GetFileSize(f, options, s, dbg);
+  }
+
+  IOStatus GetFileModificationTime(const std::string& fname,
+                                   const IOOptions& options,
+                                   uint64_t* file_mtime,
+                                   IODebugContext* dbg) override {
+    return target_->GetFileModificationTime(fname, options, file_mtime, dbg);
+  }
+
+  IOStatus GetAbsolutePath(const std::string& db_path, const IOOptions& options,
+                           std::string* output_path,
+                           IODebugContext* dbg) override {
+    return target_->GetAbsolutePath(db_path, options, output_path, dbg);
+  }
+
+  IOStatus RenameFile(const std::string& s, const std::string& t,
+                      const IOOptions& options, IODebugContext* dbg) override {
+    return target_->RenameFile(s, t, options, dbg);
+  }
+
+  IOStatus LinkFile(const std::string& s, const std::string& t,
+                    const IOOptions& options, IODebugContext* dbg) override {
+    return target_->LinkFile(s, t, options, dbg);
+  }
+
+  IOStatus NumFileLinks(const std::string& fname, const IOOptions& options,
+                        uint64_t* count, IODebugContext* dbg) override {
+    return target_->NumFileLinks(fname, options, count, dbg);
+  }
+
+  IOStatus AreFilesSame(const std::string& first, const std::string& second,
+                        const IOOptions& options, bool* res,
+                        IODebugContext* dbg) override {
+    return target_->AreFilesSame(first, second, options, res, dbg);
+  }
+
+  IOStatus LockFile(const std::string& f, const IOOptions& options,
+                    FileLock** l, IODebugContext* dbg) override {
+    return target_->LockFile(f, options, l, dbg);
+  }
+
+  IOStatus UnlockFile(FileLock* l, const IOOptions& options,
+                      IODebugContext* dbg) override {
+    return target_->UnlockFile(l, options, dbg);
+  }
+
+  IOStatus GetTestDirectory(const IOOptions& options, std::string* path,
+                            IODebugContext* dbg) override {
+    return target_->GetTestDirectory(options, path, dbg);
+  }
+  IOStatus NewLogger(const std::string& fname, const IOOptions& options,
+                     std::shared_ptr<Logger>* result,
+                     IODebugContext* dbg) override {
+    return target_->NewLogger(fname, options, result, dbg);
+  }
+
+  void SanitizeFileOptions(FileOptions* opts) const override {
+    target_->SanitizeFileOptions(opts);
+  }
+
+  FileOptions OptimizeForLogRead(
+      const FileOptions& file_options) const override {
+    return target_->OptimizeForLogRead(file_options);
+  }
+  FileOptions OptimizeForManifestRead(
+      const FileOptions& file_options) const override {
+    return target_->OptimizeForManifestRead(file_options);
+  }
+  FileOptions OptimizeForLogWrite(const FileOptions& file_options,
+                                  const DBOptions& db_options) const override {
+    return target_->OptimizeForLogWrite(file_options, db_options);
+  }
+  FileOptions OptimizeForManifestWrite(
+      const FileOptions& file_options) const override {
+    return target_->OptimizeForManifestWrite(file_options);
+  }
+  FileOptions OptimizeForCompactionTableWrite(
+      const FileOptions& file_options,
+      const ImmutableDBOptions& immutable_ops) const override {
+    return target_->OptimizeForCompactionTableWrite(file_options,
+                                                    immutable_ops);
+  }
+  FileOptions OptimizeForCompactionTableRead(
+      const FileOptions& file_options,
+      const ImmutableDBOptions& db_options) const override {
+    return target_->OptimizeForCompactionTableRead(file_options, db_options);
+  }
+  FileOptions OptimizeForBlobFileRead(
+      const FileOptions& file_options,
+      const ImmutableDBOptions& db_options) const override {
+    return target_->OptimizeForBlobFileRead(file_options, db_options);
+  }
+  IOStatus GetFreeSpace(const std::string& path, const IOOptions& options,
+                        uint64_t* diskfree, IODebugContext* dbg) override {
+    return target_->GetFreeSpace(path, options, diskfree, dbg);
+  }
+  IOStatus IsDirectory(const std::string& path, const IOOptions& options,
+                       bool* is_dir, IODebugContext* dbg) override {
+    return target_->IsDirectory(path, options, is_dir, dbg);
+  }
+
+  const Customizable* Inner() const override { return target_.get(); }
+  Status PrepareOptions(const ConfigOptions& options) override;
+  std::string SerializeOptions(const ConfigOptions& config_options,
+                               const std::string& header) const override;
+
+  virtual IOStatus Poll(std::vector<void*>& io_handles,
+                        size_t min_completions) override {
+    return target_->Poll(io_handles, min_completions);
+  }
+
+  virtual IOStatus AbortIO(std::vector<void*>& io_handles) override {
+    return target_->AbortIO(io_handles);
+  }
+
+  virtual bool use_async_io() override { return target_->use_async_io(); }
+
+ protected:
+  std::shared_ptr<FileSystem> target_;
+};
+
+class FSSequentialFileWrapper : public FSSequentialFile {
+ public:
+  // Creates a FileWrapper around the input File object and without
+  // taking ownership of the object
+  explicit FSSequentialFileWrapper(FSSequentialFile* t) : target_(t) {}
+
+  FSSequentialFile* target() const { return target_; }
+
+  IOStatus Read(size_t n, const IOOptions& options, Slice* result,
+                char* scratch, IODebugContext* dbg) override {
+    return target_->Read(n, options, result, scratch, dbg);
+  }
+  IOStatus Skip(uint64_t n) override { return target_->Skip(n); }
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  IOStatus InvalidateCache(size_t offset, size_t length) override {
+    return target_->InvalidateCache(offset, length);
+  }
+  IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& options,
+                          Slice* result, char* scratch,
+                          IODebugContext* dbg) override {
+    return target_->PositionedRead(offset, n, options, result, scratch, dbg);
+  }
+  Temperature GetTemperature() const override {
+    return target_->GetTemperature();
+  }
+
+ private:
+  FSSequentialFile* target_;
+};
+
+class FSSequentialFileOwnerWrapper : public FSSequentialFileWrapper {
+ public:
+  // Creates a FileWrapper around the input File object and takes
+  // ownership of the object
+  explicit FSSequentialFileOwnerWrapper(std::unique_ptr<FSSequentialFile>&& t)
+      : FSSequentialFileWrapper(t.get()), guard_(std::move(t)) {}
+
+ private:
+  std::unique_ptr<FSSequentialFile> guard_;
+};
+
+class FSRandomAccessFileWrapper : public FSRandomAccessFile {
+ public:
+  // Creates a FileWrapper around the input File object and without
+  // taking ownership of the object
+  explicit FSRandomAccessFileWrapper(FSRandomAccessFile* t) : target_(t) {}
+
+  FSRandomAccessFile* target() const { return target_; }
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override {
+    return target_->Read(offset, n, options, result, scratch, dbg);
+  }
+  IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
+                     const IOOptions& options, IODebugContext* dbg) override {
+    return target_->MultiRead(reqs, num_reqs, options, dbg);
+  }
+  IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& options,
+                    IODebugContext* dbg) override {
+    return target_->Prefetch(offset, n, options, dbg);
+  }
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    return target_->GetUniqueId(id, max_size);
+  };
+  void Hint(AccessPattern pattern) override { target_->Hint(pattern); }
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  IOStatus InvalidateCache(size_t offset, size_t length) override {
+    return target_->InvalidateCache(offset, length);
+  }
+  IOStatus ReadAsync(FSReadRequest& req, const IOOptions& opts,
+                     std::function<void(const FSReadRequest&, void*)> cb,
+                     void* cb_arg, void** io_handle, IOHandleDeleter* del_fn,
+                     IODebugContext* dbg) override {
+    return target()->ReadAsync(req, opts, cb, cb_arg, io_handle, del_fn, dbg);
+  }
+  Temperature GetTemperature() const override {
+    return target_->GetTemperature();
+  }
+
+ private:
+  std::unique_ptr<FSRandomAccessFile> guard_;
+  FSRandomAccessFile* target_;
+};
+
+class FSRandomAccessFileOwnerWrapper : public FSRandomAccessFileWrapper {
+ public:
+  // Creates a FileWrapper around the input File object and takes
+  // ownership of the object
+  explicit FSRandomAccessFileOwnerWrapper(
+      std::unique_ptr<FSRandomAccessFile>&& t)
+      : FSRandomAccessFileWrapper(t.get()), guard_(std::move(t)) {}
+
+ private:
+  std::unique_ptr<FSRandomAccessFile> guard_;
+};
+
+class FSWritableFileWrapper : public FSWritableFile {
+ public:
+  // Creates a FileWrapper around the input File object and without
+  // taking ownership of the object
+  explicit FSWritableFileWrapper(FSWritableFile* t) : target_(t) {}
+
+  FSWritableFile* target() const { return target_; }
+
+  IOStatus Append(const Slice& data, const IOOptions& options,
+                  IODebugContext* dbg) override {
+    return target_->Append(data, options, dbg);
+  }
+  IOStatus Append(const Slice& data, const IOOptions& options,
+                  const DataVerificationInfo& verification_info,
+                  IODebugContext* dbg) override {
+    return target_->Append(data, options, verification_info, dbg);
+  }
+  IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                            const IOOptions& options,
+                            IODebugContext* dbg) override {
+    return target_->PositionedAppend(data, offset, options, dbg);
+  }
+  IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                            const IOOptions& options,
+                            const DataVerificationInfo& verification_info,
+                            IODebugContext* dbg) override {
+    return target_->PositionedAppend(data, offset, options, verification_info,
+                                     dbg);
+  }
+  IOStatus Truncate(uint64_t size, const IOOptions& options,
+                    IODebugContext* dbg) override {
+    return target_->Truncate(size, options, dbg);
+  }
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override {
+    return target_->Close(options, dbg);
+  }
+  IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override {
+    return target_->Flush(options, dbg);
+  }
+  IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override {
+    return target_->Sync(options, dbg);
+  }
+  IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override {
+    return target_->Fsync(options, dbg);
+  }
+  bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); }
+
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+
+  void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override {
+    target_->SetWriteLifeTimeHint(hint);
+  }
+
+  Env::WriteLifeTimeHint GetWriteLifeTimeHint() override {
+    return target_->GetWriteLifeTimeHint();
+  }
+
+  uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override {
+    return target_->GetFileSize(options, dbg);
+  }
+
+  void SetPreallocationBlockSize(size_t size) override {
+    target_->SetPreallocationBlockSize(size);
+  }
+
+  void GetPreallocationStatus(size_t* block_size,
+                              size_t* last_allocated_block) override {
+    target_->GetPreallocationStatus(block_size, last_allocated_block);
+  }
+
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    return target_->GetUniqueId(id, max_size);
+  }
+
+  IOStatus InvalidateCache(size_t offset, size_t length) override {
+    return target_->InvalidateCache(offset, length);
+  }
+
+  IOStatus RangeSync(uint64_t offset, uint64_t nbytes, const IOOptions& options,
+                     IODebugContext* dbg) override {
+    return target_->RangeSync(offset, nbytes, options, dbg);
+  }
+
+  void PrepareWrite(size_t offset, size_t len, const IOOptions& options,
+                    IODebugContext* dbg) override {
+    target_->PrepareWrite(offset, len, options, dbg);
+  }
+
+  IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& options,
+                    IODebugContext* dbg) override {
+    return target_->Allocate(offset, len, options, dbg);
+  }
+
+ private:
+  FSWritableFile* target_;
+};
+
+class FSWritableFileOwnerWrapper : public FSWritableFileWrapper {
+ public:
+  // Creates a FileWrapper around the input File object and takes
+  // ownership of the object
+  explicit FSWritableFileOwnerWrapper(std::unique_ptr<FSWritableFile>&& t)
+      : FSWritableFileWrapper(t.get()), guard_(std::move(t)) {}
+
+ private:
+  std::unique_ptr<FSWritableFile> guard_;
+};
+
+class FSRandomRWFileWrapper : public FSRandomRWFile {
+ public:
+  // Creates a FileWrapper around the input File object and without
+  // taking ownership of the object
+  explicit FSRandomRWFileWrapper(FSRandomRWFile* t) : target_(t) {}
+
+  FSRandomRWFile* target() const { return target_; }
+
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options,
+                 IODebugContext* dbg) override {
+    return target_->Write(offset, data, options, dbg);
+  }
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override {
+    return target_->Read(offset, n, options, result, scratch, dbg);
+  }
+  IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override {
+    return target_->Flush(options, dbg);
+  }
+  IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override {
+    return target_->Sync(options, dbg);
+  }
+  IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override {
+    return target_->Fsync(options, dbg);
+  }
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override {
+    return target_->Close(options, dbg);
+  }
+  Temperature GetTemperature() const override {
+    return target_->GetTemperature();
+  }
+
+ private:
+  FSRandomRWFile* target_;
+};
+
+class FSRandomRWFileOwnerWrapper : public FSRandomRWFileWrapper {
+ public:
+  // Creates a FileWrapper around the input File object and takes
+  // ownership of the object
+  explicit FSRandomRWFileOwnerWrapper(std::unique_ptr<FSRandomRWFile>&& t)
+      : FSRandomRWFileWrapper(t.get()), guard_(std::move(t)) {}
+
+ private:
+  std::unique_ptr<FSRandomRWFile> guard_;
+};
+
+class FSDirectoryWrapper : public FSDirectory {
+ public:
+  // Creates a FileWrapper around the input File object and takes
+  // ownership of the object
+  explicit FSDirectoryWrapper(std::unique_ptr<FSDirectory>&& t)
+      : guard_(std::move(t)) {
+    target_ = guard_.get();
+  }
+
+  // Creates a FileWrapper around the input File object and without
+  // taking ownership of the object
+  explicit FSDirectoryWrapper(FSDirectory* t) : target_(t) {}
+
+  IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override {
+    return target_->Fsync(options, dbg);
+  }
+
+  IOStatus FsyncWithDirOptions(
+      const IOOptions& options, IODebugContext* dbg,
+      const DirFsyncOptions& dir_fsync_options) override {
+    return target_->FsyncWithDirOptions(options, dbg, dir_fsync_options);
+  }
+
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override {
+    return target_->Close(options, dbg);
+  }
+
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    return target_->GetUniqueId(id, max_size);
+  }
+
+ private:
+  std::unique_ptr<FSDirectory> guard_;
+  FSDirectory* target_;
+};
+
+// A utility routine: write "data" to the named file.
+extern IOStatus WriteStringToFile(FileSystem* fs, const Slice& data,
+                                  const std::string& fname,
+                                  bool should_sync = false);
+
+// A utility routine: read contents of named file into *data
+extern IOStatus ReadFileToString(FileSystem* fs, const std::string& fname,
+                                 std::string* data);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/filter_policy.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/filter_policy.h
new file mode 100644
index 0000000000..954d15b4a1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/filter_policy.h
@@ -0,0 +1,206 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A database can be configured with a custom FilterPolicy object.
+// This object is responsible for creating a small filter from a set
+// of keys.  These filters are stored in rocksdb and are consulted
+// automatically by rocksdb to decide whether or not to read some
+// information from disk. In many cases, a filter can cut down the
+// number of disk seeks form a handful to a single disk seek per
+// DB::Get() call.
+//
+// Most people will want to use the builtin bloom filter support (see
+// NewBloomFilterPolicy() below).
+
+#pragma once
+
+#include <stdlib.h>
+
+#include <algorithm>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "rocksdb/advanced_options.h"
+#include "rocksdb/customizable.h"
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+struct BlockBasedTableOptions;
+struct ConfigOptions;
+
+// As of RocksDB 7.0, the details of these classes are internal
+class FilterBitsBuilder;
+class FilterBitsReader;
+
+// Contextual information passed to BloomFilterPolicy at filter building time.
+// Used in overriding FilterPolicy::GetBuilderWithContext(). References other
+// structs because this is expected to be a temporary, stack-allocated object.
+struct FilterBuildingContext {
+  // This constructor is for internal use only and subject to change.
+  FilterBuildingContext(const BlockBasedTableOptions& table_options);
+
+  // Options for the table being built
+  const BlockBasedTableOptions& table_options;
+
+  // BEGIN from (DB|ColumnFamily)Options in effect at table creation time
+  CompactionStyle compaction_style = kCompactionStyleLevel;
+
+  // Number of LSM levels, or -1 if unknown
+  int num_levels = -1;
+
+  // An optional logger for reporting errors, warnings, etc.
+  Logger* info_log = nullptr;
+  // END from (DB|ColumnFamily)Options
+
+  // Name of the column family for the table (or empty string if unknown)
+  // TODO: consider changing to Slice
+  std::string column_family_name;
+
+  // The table level at time of constructing the SST file, or -1 if unknown
+  // or N/A as in SstFileWriter. (The table file could later be used at a
+  // different level.)
+  int level_at_creation = -1;
+
+  // True if known to be going into bottommost sorted run for applicable
+  // key range (which might not even be last level with data). False
+  // otherwise.
+  bool is_bottommost = false;
+
+  // Reason for creating the file with the filter
+  TableFileCreationReason reason = TableFileCreationReason::kMisc;
+};
+
+// Determines what kind of filter (if any) to generate in SST files, and under
+// which conditions. API users can create custom filter policies that
+// defer to other built-in policies (see NewBloomFilterPolicy and
+// NewRibbonFilterPolicy) based on the context provided to
+// GetBuilderWithContext.
+class FilterPolicy : public Customizable {
+ public:
+  virtual ~FilterPolicy();
+  static const char* Type() { return "FilterPolicy"; }
+
+  // The name used for identifying whether a filter on disk is readable
+  // by this FilterPolicy. If this FilterPolicy is part of a family that
+  // can read each others filters, such as built-in BloomFilterPolcy and
+  // RibbonFilterPolicy, the CompatibilityName is a shared family name,
+  // while kinds of filters in the family can have distinct Customizable
+  // Names. This function is pure virtual so that wrappers around built-in
+  // policies are prompted to defer to CompatibilityName() of the wrapped
+  // policy, which is important for compatibility.
+  //
+  // For custom filter policies that are not part of a read-compatible
+  // family (rare), implementations may return Name().
+  virtual const char* CompatibilityName() const = 0;
+
+  // Creates a new FilterPolicy based on the input value string and returns the
+  // result The value might be an ID, and ID with properties, or an old-style
+  // policy string.
+  // The value describes the FilterPolicy being created.
+  // For BloomFilters, value may be a ":"-delimited value of the form:
+  //   "bloomfilter:[bits_per_key]",
+  //   e.g. ""bloomfilter:4"
+  //   The above string is equivalent to calling NewBloomFilterPolicy(4).
+  static Status CreateFromString(const ConfigOptions& config_options,
+                                 const std::string& value,
+                                 std::shared_ptr<const FilterPolicy>* result);
+
+  // Return a new FilterBitsBuilder for constructing full or partitioned
+  // filter blocks, or return nullptr to indicate "no filter". Custom
+  // implementations should defer to a built-in FilterPolicy to get a
+  // new FilterBitsBuilder, but the FilterBuildingContext can be used
+  // to decide which built-in FilterPolicy to defer to.
+  virtual FilterBitsBuilder* GetBuilderWithContext(
+      const FilterBuildingContext&) const = 0;
+
+  // Return a new FilterBitsReader for full or partitioned filter blocks.
+  // Caller retains ownership of any buffer pointed to by the input Slice.
+  // Custom implementation should defer to GetFilterBitsReader on any
+  // built-in FilterPolicy, which can read filters generated by any other
+  // built-in FilterPolicy.
+  virtual FilterBitsReader* GetFilterBitsReader(
+      const Slice& /*contents*/) const = 0;
+};
+
+// Return a new filter policy that uses a bloom filter with approximately
+// the specified number of bits per key. See
+// https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
+//
+// bits_per_key: average bits allocated per key in bloom filter. A good
+// choice is 9.9, which yields a filter with ~ 1% false positive rate.
+// When format_version < 5, the value will be rounded to the nearest
+// integer. Recommend using no more than three decimal digits after the
+// decimal point, as in 6.667.
+//
+// To avoid configurations that are unlikely to produce good filtering
+// value for the CPU overhead, bits_per_key < 0.5 is rounded down to 0.0
+// which means "generate no filter", and 0.5 <= bits_per_key < 1.0 is
+// rounded up to 1.0, for a 62% FP rate.
+//
+// The caller is responsible for eventually deleting the result, though
+// this is typically handled automatically with BlockBasedTableOptions:
+//   table_options.filter_policy.reset(NewBloomFilterPolicy(...));
+//
+// As of RocksDB 7.0, the use_block_based_builder parameter is ignored.
+// (The old, inefficient block-based filter is no longer accessible in
+// the public API.)
+//
+// Note: if you are using a custom comparator that ignores some parts
+// of the keys being compared, you must not use NewBloomFilterPolicy()
+// and must provide your own FilterPolicy that also ignores the
+// corresponding parts of the keys.  For example, if the comparator
+// ignores trailing spaces, it would be incorrect to use a
+// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
+// trailing spaces in keys.
+extern const FilterPolicy* NewBloomFilterPolicy(
+    double bits_per_key, bool IGNORED_use_block_based_builder = false);
+
+// A new Bloom alternative that saves about 30% space compared to
+// Bloom filters, with similar query times but roughly 3-4x CPU time
+// and 3x temporary space usage during construction.  For example, if
+// you pass in 10 for bloom_equivalent_bits_per_key, you'll get the same
+// 0.95% FP rate as Bloom filter but only using about 7 bits per key.
+//
+// The space savings of Ribbon filters makes sense for lower (higher
+// numbered; larger; longer-lived) levels of LSM, whereas the speed of
+// Bloom filters make sense for highest levels of LSM. Setting
+// bloom_before_level allows for this design with Level and Universal
+// compaction styles. For example, bloom_before_level=1 means that Bloom
+// filters will be used in level 0, including flushes, and Ribbon
+// filters elsewhere, including FIFO compaction and external SST files.
+// For this option, memtable flushes are considered level -1 (so that
+// flushes can be distinguished from intra-L0 compaction).
+// bloom_before_level=0 (default) -> Generate Bloom filters only for
+// flushes under Level and Universal compaction styles.
+// bloom_before_level=-1 -> Always generate Ribbon filters (except in
+// some extreme or exceptional cases).
+//
+// Ribbon filters are compatible with RocksDB >= 6.15.0. Earlier
+// versions reading the data will behave as if no filter was used
+// (degraded performance until compaction rebuilds filters). All
+// built-in FilterPolicies (Bloom or Ribbon) are able to read other
+// kinds of built-in filters.
+//
+// Note: the current Ribbon filter schema uses some extra resources
+// when constructing very large filters. For example, for 100 million
+// keys in a single filter (one SST file without partitioned filters),
+// 3GB of temporary, untracked memory is used, vs. 1GB for Bloom.
+// However, the savings in filter space from just ~60 open SST files
+// makes up for the additional temporary memory use.
+//
+// Also consider using optimize_filters_for_memory to save filter
+// memory.
+extern const FilterPolicy* NewRibbonFilterPolicy(
+    double bloom_equivalent_bits_per_key, int bloom_before_level = 0);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/flush_block_policy.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/flush_block_policy.h
new file mode 100644
index 0000000000..7a5dd957e4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/flush_block_policy.h
@@ -0,0 +1,75 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/table.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+class BlockBuilder;
+struct ConfigOptions;
+struct Options;
+
+// FlushBlockPolicy provides a configurable way to determine when to flush a
+// block in the block based tables.
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class FlushBlockPolicy {
+ public:
+  // Keep track of the key/value sequences and return the boolean value to
+  // determine if table builder should flush current data block.
+  virtual bool Update(const Slice& key, const Slice& value) = 0;
+
+  virtual ~FlushBlockPolicy() {}
+};
+
+class FlushBlockPolicyFactory : public Customizable {
+ public:
+  static const char* Type() { return "FlushBlockPolicyFactory"; }
+
+  // Creates a FlushBlockPolicyFactory based on the input value.
+  // By default, this method can create EveryKey or BySize PolicyFactory,
+  // which take now config_options.
+  static Status CreateFromString(
+      const ConfigOptions& config_options, const std::string& value,
+      std::shared_ptr<FlushBlockPolicyFactory>* result);
+
+  // Return a new block flush policy that flushes data blocks by data size.
+  // FlushBlockPolicy may need to access the metadata of the data block
+  // builder to determine when to flush the blocks.
+  //
+  // Callers must delete the result after any database that is using the
+  // result has been closed.
+  virtual FlushBlockPolicy* NewFlushBlockPolicy(
+      const BlockBasedTableOptions& table_options,
+      const BlockBuilder& data_block_builder) const = 0;
+
+  virtual ~FlushBlockPolicyFactory() {}
+};
+
+class FlushBlockBySizePolicyFactory : public FlushBlockPolicyFactory {
+ public:
+  FlushBlockBySizePolicyFactory();
+
+  static const char* kClassName() { return "FlushBlockBySizePolicyFactory"; }
+  const char* Name() const override { return kClassName(); }
+
+  FlushBlockPolicy* NewFlushBlockPolicy(
+      const BlockBasedTableOptions& table_options,
+      const BlockBuilder& data_block_builder) const override;
+
+  static FlushBlockPolicy* NewFlushBlockPolicy(
+      const uint64_t size, const int deviation,
+      const BlockBuilder& data_block_builder);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/functor_wrapper.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/functor_wrapper.h
new file mode 100644
index 0000000000..17b021bf73
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/functor_wrapper.h
@@ -0,0 +1,56 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <utility>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace detail {
+template <std::size_t...>
+struct IndexSequence {};
+
+template <std::size_t N, std::size_t... Next>
+struct IndexSequenceHelper
+    : public IndexSequenceHelper<N - 1U, N - 1U, Next...> {};
+
+template <std::size_t... Next>
+struct IndexSequenceHelper<0U, Next...> {
+  using type = IndexSequence<Next...>;
+};
+
+template <std::size_t N>
+using make_index_sequence = typename IndexSequenceHelper<N>::type;
+
+template <typename Function, typename Tuple, size_t... I>
+void call(Function f, Tuple t, IndexSequence<I...>) {
+  f(std::get<I>(t)...);
+}
+
+template <typename Function, typename Tuple>
+void call(Function f, Tuple t) {
+  static constexpr auto size = std::tuple_size<Tuple>::value;
+  call(f, t, make_index_sequence<size>{});
+}
+}  // namespace detail
+
+template <typename... Args>
+class FunctorWrapper {
+ public:
+  explicit FunctorWrapper(std::function<void(Args...)> functor, Args &&...args)
+      : functor_(std::move(functor)), args_(std::forward<Args>(args)...) {}
+
+  void invoke() { detail::call(functor_, args_); }
+
+ private:
+  std::function<void(Args...)> functor_;
+  std::tuple<Args...> args_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/io_status.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/io_status.h
new file mode 100644
index 0000000000..0bf5e939a6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/io_status.h
@@ -0,0 +1,244 @@
+// Copyright (c) 2019-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// An IOStatus encapsulates the result of an operation.  It may indicate
+// success, or it may indicate an error with an associated error message.
+//
+// Multiple threads can invoke const methods on an IOStatus without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same IOStatus must use
+// external synchronization.
+
+#pragma once
+
+#include <string>
+
+#include "rocksdb/slice.h"
+#ifdef OS_WIN
+#include <string.h>
+#endif
+#include <cstring>
+
+#include "status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class IOStatus : public Status {
+ public:
+  using Code = Status::Code;
+  using SubCode = Status::SubCode;
+
+  enum IOErrorScope : unsigned char {
+    kIOErrorScopeFileSystem,
+    kIOErrorScopeFile,
+    kIOErrorScopeRange,
+    kIOErrorScopeMax,
+  };
+
+  // Create a success status.
+  IOStatus() : IOStatus(kOk, kNone) {}
+  ~IOStatus() {}
+
+  // Copy the specified status.
+  IOStatus(const IOStatus& s);
+  IOStatus& operator=(const IOStatus& s);
+  IOStatus(IOStatus&& s) noexcept;
+  IOStatus& operator=(IOStatus&& s) noexcept;
+  bool operator==(const IOStatus& rhs) const;
+  bool operator!=(const IOStatus& rhs) const;
+
+  void SetRetryable(bool retryable) { retryable_ = retryable; }
+  void SetDataLoss(bool data_loss) { data_loss_ = data_loss; }
+  void SetScope(IOErrorScope scope) {
+    scope_ = static_cast<unsigned char>(scope);
+  }
+
+  bool GetRetryable() const { return retryable_; }
+  bool GetDataLoss() const { return data_loss_; }
+  IOErrorScope GetScope() const { return static_cast<IOErrorScope>(scope_); }
+
+  // Return a success status.
+  static IOStatus OK() { return IOStatus(); }
+
+  static IOStatus NotSupported(const Slice& msg, const Slice& msg2 = Slice()) {
+    return IOStatus(kNotSupported, msg, msg2);
+  }
+  static IOStatus NotSupported(SubCode msg = kNone) {
+    return IOStatus(kNotSupported, msg);
+  }
+
+  // Return error status of an appropriate type.
+  static IOStatus NotFound(const Slice& msg, const Slice& msg2 = Slice()) {
+    return IOStatus(kNotFound, msg, msg2);
+  }
+  // Fast path for not found without malloc;
+  static IOStatus NotFound(SubCode msg = kNone) {
+    return IOStatus(kNotFound, msg);
+  }
+
+  static IOStatus Corruption(const Slice& msg, const Slice& msg2 = Slice()) {
+    return IOStatus(kCorruption, msg, msg2);
+  }
+  static IOStatus Corruption(SubCode msg = kNone) {
+    return IOStatus(kCorruption, msg);
+  }
+
+  static IOStatus InvalidArgument(const Slice& msg,
+                                  const Slice& msg2 = Slice()) {
+    return IOStatus(kInvalidArgument, msg, msg2);
+  }
+  static IOStatus InvalidArgument(SubCode msg = kNone) {
+    return IOStatus(kInvalidArgument, msg);
+  }
+
+  static IOStatus IOError(const Slice& msg, const Slice& msg2 = Slice()) {
+    return IOStatus(kIOError, msg, msg2);
+  }
+  static IOStatus IOError(SubCode msg = kNone) {
+    return IOStatus(kIOError, msg);
+  }
+
+  static IOStatus Busy(SubCode msg = kNone) { return IOStatus(kBusy, msg); }
+  static IOStatus Busy(const Slice& msg, const Slice& msg2 = Slice()) {
+    return IOStatus(kBusy, msg, msg2);
+  }
+
+  static IOStatus TimedOut(SubCode msg = kNone) {
+    return IOStatus(kTimedOut, msg);
+  }
+  static IOStatus TimedOut(const Slice& msg, const Slice& msg2 = Slice()) {
+    return IOStatus(kTimedOut, msg, msg2);
+  }
+
+  static IOStatus NoSpace() { return IOStatus(kIOError, kNoSpace); }
+  static IOStatus NoSpace(const Slice& msg, const Slice& msg2 = Slice()) {
+    return IOStatus(kIOError, kNoSpace, msg, msg2);
+  }
+
+  static IOStatus PathNotFound() { return IOStatus(kIOError, kPathNotFound); }
+  static IOStatus PathNotFound(const Slice& msg, const Slice& msg2 = Slice()) {
+    return IOStatus(kIOError, kPathNotFound, msg, msg2);
+  }
+
+  static IOStatus IOFenced() { return IOStatus(kIOError, kIOFenced); }
+  static IOStatus IOFenced(const Slice& msg, const Slice& msg2 = Slice()) {
+    return IOStatus(kIOError, kIOFenced, msg, msg2);
+  }
+
+  static IOStatus Aborted(SubCode msg = kNone) {
+    return IOStatus(kAborted, msg);
+  }
+  static IOStatus Aborted(const Slice& msg, const Slice& msg2 = Slice()) {
+    return IOStatus(kAborted, msg, msg2);
+  }
+
+  // Return a string representation of this status suitable for printing.
+  // Returns the string "OK" for success.
+  // std::string ToString() const;
+
+ private:
+  friend IOStatus status_to_io_status(Status&&);
+
+  explicit IOStatus(Code _code, SubCode _subcode = kNone)
+      : Status(_code, _subcode, false, false, kIOErrorScopeFileSystem) {}
+
+  IOStatus(Code _code, SubCode _subcode, const Slice& msg, const Slice& msg2);
+  IOStatus(Code _code, const Slice& msg, const Slice& msg2)
+      : IOStatus(_code, kNone, msg, msg2) {}
+};
+
+inline IOStatus::IOStatus(Code _code, SubCode _subcode, const Slice& msg,
+                          const Slice& msg2)
+    : Status(_code, _subcode, false, false, kIOErrorScopeFileSystem) {
+  assert(code_ != kOk);
+  assert(subcode_ != kMaxSubCode);
+  const size_t len1 = msg.size();
+  const size_t len2 = msg2.size();
+  const size_t size = len1 + (len2 ? (2 + len2) : 0);
+  char* const result = new char[size + 1];  // +1 for null terminator
+  memcpy(result, msg.data(), len1);
+  if (len2) {
+    result[len1] = ':';
+    result[len1 + 1] = ' ';
+    memcpy(result + len1 + 2, msg2.data(), len2);
+  }
+  result[size] = '\0';  // null terminator for C style string
+  state_.reset(result);
+}
+
+inline IOStatus::IOStatus(const IOStatus& s) : Status(s.code_, s.subcode_) {
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+  s.checked_ = true;
+#endif  // ROCKSDB_ASSERT_STATUS_CHECKED
+  retryable_ = s.retryable_;
+  data_loss_ = s.data_loss_;
+  scope_ = s.scope_;
+  state_ = (s.state_ == nullptr) ? nullptr : CopyState(s.state_.get());
+}
+inline IOStatus& IOStatus::operator=(const IOStatus& s) {
+  // The following condition catches both aliasing (when this == &s),
+  // and the common case where both s and *this are ok.
+  if (this != &s) {
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+    s.checked_ = true;
+    checked_ = false;
+#endif  // ROCKSDB_ASSERT_STATUS_CHECKED
+    code_ = s.code_;
+    subcode_ = s.subcode_;
+    retryable_ = s.retryable_;
+    data_loss_ = s.data_loss_;
+    scope_ = s.scope_;
+    state_ = (s.state_ == nullptr) ? nullptr : CopyState(s.state_.get());
+  }
+  return *this;
+}
+
+inline IOStatus::IOStatus(IOStatus&& s) noexcept : IOStatus() {
+  *this = std::move(s);
+}
+
+inline IOStatus& IOStatus::operator=(IOStatus&& s) noexcept {
+  if (this != &s) {
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+    s.checked_ = true;
+    checked_ = false;
+#endif  // ROCKSDB_ASSERT_STATUS_CHECKED
+    code_ = std::move(s.code_);
+    s.code_ = kOk;
+    subcode_ = std::move(s.subcode_);
+    s.subcode_ = kNone;
+    retryable_ = s.retryable_;
+    data_loss_ = s.data_loss_;
+    scope_ = s.scope_;
+    s.scope_ = kIOErrorScopeFileSystem;
+    state_ = std::move(s.state_);
+  }
+  return *this;
+}
+
+inline bool IOStatus::operator==(const IOStatus& rhs) const {
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+  checked_ = true;
+  rhs.checked_ = true;
+#endif  // ROCKSDB_ASSERT_STATUS_CHECKED
+  return (code_ == rhs.code_);
+}
+
+inline bool IOStatus::operator!=(const IOStatus& rhs) const {
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+  checked_ = true;
+  rhs.checked_ = true;
+#endif  // ROCKSDB_ASSERT_STATUS_CHECKED
+  return !(*this == rhs);
+}
+
+inline IOStatus status_to_io_status(Status&& status) {
+  IOStatus io_s;
+  Status& s = io_s;
+  s = std::move(status);
+  return io_s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/iostats_context.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/iostats_context.h
new file mode 100644
index 0000000000..559d44c57a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/iostats_context.h
@@ -0,0 +1,98 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <stdint.h>
+
+#include <string>
+
+#include "rocksdb/perf_level.h"
+
+// A thread local context for gathering io-stats efficiently and transparently.
+// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
+
+namespace ROCKSDB_NAMESPACE {
+
+// EXPERIMENTAL: the IO statistics for tiered storage. It matches with each
+// item in Temperature class.
+struct FileIOByTemperature {
+  // the number of bytes read to Temperature::kHot file
+  uint64_t hot_file_bytes_read;
+  // the number of bytes read to Temperature::kWarm file
+  uint64_t warm_file_bytes_read;
+  // the number of bytes read to Temperature::kCold file
+  uint64_t cold_file_bytes_read;
+  // total number of reads to Temperature::kHot file
+  uint64_t hot_file_read_count;
+  // total number of reads to Temperature::kWarm file
+  uint64_t warm_file_read_count;
+  // total number of reads to Temperature::kCold file
+  uint64_t cold_file_read_count;
+  // reset all the statistics to 0.
+  void Reset() {
+    hot_file_bytes_read = 0;
+    warm_file_bytes_read = 0;
+    cold_file_bytes_read = 0;
+    hot_file_read_count = 0;
+    warm_file_read_count = 0;
+    cold_file_read_count = 0;
+  }
+};
+
+struct IOStatsContext {
+  // reset all io-stats counter to zero
+  void Reset();
+
+  std::string ToString(bool exclude_zero_counters = false) const;
+
+  // the thread pool id
+  uint64_t thread_pool_id;
+
+  // number of bytes that has been written.
+  uint64_t bytes_written;
+  // number of bytes that has been read.
+  uint64_t bytes_read;
+
+  // time spent in open() and fopen().
+  uint64_t open_nanos;
+  // time spent in fallocate().
+  uint64_t allocate_nanos;
+  // time spent in write() and pwrite().
+  uint64_t write_nanos;
+  // time spent in read() and pread()
+  uint64_t read_nanos;
+  // time spent in sync_file_range().
+  uint64_t range_sync_nanos;
+  // time spent in fsync
+  uint64_t fsync_nanos;
+  // time spent in preparing write (fallocate etc).
+  uint64_t prepare_write_nanos;
+  // time spent in Logger::Logv().
+  uint64_t logger_nanos;
+  // CPU time spent in write() and pwrite()
+  uint64_t cpu_write_nanos;
+  // CPU time spent in read() and pread()
+  uint64_t cpu_read_nanos;
+
+  FileIOByTemperature file_io_stats_by_temperature;
+
+  // It is not consistent that whether iostats follows PerfLevel.Timer counters
+  // follows it but BackupEngine relies on counter metrics to always be there.
+  // Here we create a backdoor option to disable some counters, so that some
+  // existing stats are not polluted by file operations, such as logging, by
+  // turning this off.
+  bool disable_iostats = false;
+};
+
+// If RocksDB is compiled with -DNIOSTATS_CONTEXT, then a pointer to a global,
+// non-thread-local IOStatsContext object will be returned. Attempts to update
+// this object will be ignored, and reading from it will also be no-op.
+// Otherwise, a pointer to a thread-local IOStatsContext object will be
+// returned.
+//
+// This function never returns nullptr.
+IOStatsContext* get_iostats_context();
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/iterator.h
new file mode 100644
index 0000000000..9d4c9f73a1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/iterator.h
@@ -0,0 +1,144 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// An iterator yields a sequence of key/value pairs from a source.
+// The following class defines the interface.  Multiple implementations
+// are provided by this library.  In particular, iterators are provided
+// to access the contents of a Table or a DB.
+//
+// Multiple threads can invoke const methods on an Iterator without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same Iterator must use
+// external synchronization.
+
+#pragma once
+
+#include <string>
+
+#include "rocksdb/cleanable.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "rocksdb/wide_columns.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Iterator : public Cleanable {
+ public:
+  Iterator() {}
+  // No copying allowed
+  Iterator(const Iterator&) = delete;
+  void operator=(const Iterator&) = delete;
+
+  virtual ~Iterator() {}
+
+  // An iterator is either positioned at a key/value pair, or
+  // not valid.  This method returns true iff the iterator is valid.
+  // Always returns false if !status().ok().
+  virtual bool Valid() const = 0;
+
+  // Position at the first key in the source.  The iterator is Valid()
+  // after this call iff the source is not empty.
+  virtual void SeekToFirst() = 0;
+
+  // Position at the last key in the source.  The iterator is
+  // Valid() after this call iff the source is not empty.
+  virtual void SeekToLast() = 0;
+
+  // Position at the first key in the source that at or past target.
+  // The iterator is Valid() after this call iff the source contains
+  // an entry that comes at or past target.
+  // All Seek*() methods clear any error status() that the iterator had prior to
+  // the call; after the seek, status() indicates only the error (if any) that
+  // happened during the seek, not any past errors.
+  // Target does not contain timestamp.
+  virtual void Seek(const Slice& target) = 0;
+
+  // Position at the last key in the source that at or before target.
+  // The iterator is Valid() after this call iff the source contains
+  // an entry that comes at or before target.
+  // Target does not contain timestamp.
+  virtual void SeekForPrev(const Slice& target) = 0;
+
+  // Moves to the next entry in the source.  After this call, Valid() is
+  // true iff the iterator was not positioned at the last entry in the source.
+  // REQUIRES: Valid()
+  virtual void Next() = 0;
+
+  // Moves to the previous entry in the source.  After this call, Valid() is
+  // true iff the iterator was not positioned at the first entry in source.
+  // REQUIRES: Valid()
+  virtual void Prev() = 0;
+
+  // Return the key for the current entry.  The underlying storage for
+  // the returned slice is valid only until the next modification of the
+  // iterator (i.e. the next SeekToFirst/SeekToLast/Seek/SeekForPrev/Next/Prev
+  // operation).
+  // REQUIRES: Valid()
+  virtual Slice key() const = 0;
+
+  // Return the value for the current entry.  If the entry is a plain key-value,
+  // return the value as-is; if it is a wide-column entity, return the value of
+  // the default anonymous column (see kDefaultWideColumnName) if any, or an
+  // empty value otherwise.  The underlying storage for the returned slice is
+  // valid only until the next modification of the iterator (i.e. the next
+  // SeekToFirst/SeekToLast/Seek/SeekForPrev/Next/Prev operation).
+  // REQUIRES: Valid()
+  virtual Slice value() const = 0;
+
+  // Return the wide columns for the current entry.  If the entry is a
+  // wide-column entity, return it as-is; if it is a plain key-value, return it
+  // as an entity with a single anonymous column (see kDefaultWideColumnName)
+  // which contains the value.  The underlying storage for the returned
+  // structure is valid only until the next modification of the iterator (i.e.
+  // the next SeekToFirst/SeekToLast/Seek/SeekForPrev/Next/Prev operation).
+  // REQUIRES: Valid()
+  virtual const WideColumns& columns() const {
+    assert(false);
+    return kNoWideColumns;
+  }
+
+  // If an error has occurred, return it.  Else return an ok status.
+  // If non-blocking IO is requested and this operation cannot be
+  // satisfied without doing some IO, then this returns Status::Incomplete().
+  virtual Status status() const = 0;
+
+  // If supported, renew the iterator to represent the latest state. The
+  // iterator will be invalidated after the call. Not supported if
+  // ReadOptions.snapshot is given when creating the iterator.
+  virtual Status Refresh() {
+    return Status::NotSupported("Refresh() is not supported");
+  }
+
+  // Property "rocksdb.iterator.is-key-pinned":
+  //   If returning "1", this means that the Slice returned by key() is valid
+  //   as long as the iterator is not deleted.
+  //   It is guaranteed to always return "1" if
+  //      - Iterator created with ReadOptions::pin_data = true
+  //      - DB tables were created with
+  //        BlockBasedTableOptions::use_delta_encoding = false.
+  // Property "rocksdb.iterator.super-version-number":
+  //   LSM version used by the iterator. The same format as DB Property
+  //   kCurrentSuperVersionNumber. See its comment for more information.
+  // Property "rocksdb.iterator.internal-key":
+  //   Get the user-key portion of the internal key at which the iteration
+  //   stopped.
+  virtual Status GetProperty(std::string prop_name, std::string* prop);
+
+  virtual Slice timestamp() const {
+    assert(false);
+    return Slice();
+  }
+};
+
+// Return an empty iterator (yields nothing).
+extern Iterator* NewEmptyIterator();
+
+// Return an empty iterator with the specified status.
+extern Iterator* NewErrorIterator(const Status& status);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/ldb_tool.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/ldb_tool.h
new file mode 100644
index 0000000000..b8f2e222fa
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/ldb_tool.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+#include <string>
+#include <vector>
+
+#include "rocksdb/db.h"
+#include "rocksdb/options.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// An interface for converting a slice to a readable string
+class SliceFormatter {
+ public:
+  virtual ~SliceFormatter() {}
+  virtual std::string Format(const Slice& s) const = 0;
+};
+
+// Options for customizing ldb tool (beyond the DB Options)
+struct LDBOptions {
+  // Create LDBOptions with default values for all fields
+  LDBOptions();
+
+  // Key formatter that converts a slice to a readable string.
+  // Default: Slice::ToString()
+  std::shared_ptr<SliceFormatter> key_formatter;
+
+  std::string print_help_header = "ldb - RocksDB Tool";
+};
+
+class LDBTool {
+ public:
+  void Run(
+      int argc, char** argv, Options db_options = Options(),
+      const LDBOptions& ldb_options = LDBOptions(),
+      const std::vector<ColumnFamilyDescriptor>* column_families = nullptr);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/listener.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/listener.h
new file mode 100644
index 0000000000..87bc678693
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/listener.h
@@ -0,0 +1,840 @@
+// Copyright (c) 2014 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+#pragma once
+
+#include <chrono>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "rocksdb/advanced_options.h"
+#include "rocksdb/compaction_job_stats.h"
+#include "rocksdb/compression_type.h"
+#include "rocksdb/customizable.h"
+#include "rocksdb/io_status.h"
+#include "rocksdb/status.h"
+#include "rocksdb/table_properties.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+using TablePropertiesCollection =
+    std::unordered_map<std::string, std::shared_ptr<const TableProperties>>;
+
+class DB;
+class ColumnFamilyHandle;
+class Status;
+struct CompactionJobStats;
+
+struct FileCreationBriefInfo {
+  FileCreationBriefInfo() = default;
+  FileCreationBriefInfo(const std::string& _db_name,
+                        const std::string& _cf_name,
+                        const std::string& _file_path, int _job_id)
+      : db_name(_db_name),
+        cf_name(_cf_name),
+        file_path(_file_path),
+        job_id(_job_id) {}
+  // the name of the database where the file was created.
+  std::string db_name;
+  // the name of the column family where the file was created.
+  std::string cf_name;
+  // the path to the created file.
+  std::string file_path;
+  // the id of the job (which could be flush or compaction) that
+  // created the file.
+  int job_id = 0;
+};
+
+struct TableFileCreationBriefInfo : public FileCreationBriefInfo {
+  // reason of creating the table.
+  TableFileCreationReason reason;
+};
+
+struct TableFileCreationInfo : public TableFileCreationBriefInfo {
+  TableFileCreationInfo() = default;
+  explicit TableFileCreationInfo(TableProperties&& prop)
+      : table_properties(prop) {}
+  // the size of the file.
+  uint64_t file_size;
+  // Detailed properties of the created file.
+  TableProperties table_properties;
+  // The status indicating whether the creation was successful or not.
+  Status status;
+  // The checksum of the table file being created
+  std::string file_checksum;
+  // The checksum function name of checksum generator used for this table file
+  std::string file_checksum_func_name;
+};
+
+struct BlobFileCreationBriefInfo : public FileCreationBriefInfo {
+  BlobFileCreationBriefInfo(const std::string& _db_name,
+                            const std::string& _cf_name,
+                            const std::string& _file_path, int _job_id,
+                            BlobFileCreationReason _reason)
+      : FileCreationBriefInfo(_db_name, _cf_name, _file_path, _job_id),
+        reason(_reason) {}
+  // reason of creating the blob file.
+  BlobFileCreationReason reason;
+};
+
+struct BlobFileCreationInfo : public BlobFileCreationBriefInfo {
+  BlobFileCreationInfo(const std::string& _db_name, const std::string& _cf_name,
+                       const std::string& _file_path, int _job_id,
+                       BlobFileCreationReason _reason,
+                       uint64_t _total_blob_count, uint64_t _total_blob_bytes,
+                       Status _status, const std::string& _file_checksum,
+                       const std::string& _file_checksum_func_name)
+      : BlobFileCreationBriefInfo(_db_name, _cf_name, _file_path, _job_id,
+                                  _reason),
+        total_blob_count(_total_blob_count),
+        total_blob_bytes(_total_blob_bytes),
+        status(_status),
+        file_checksum(_file_checksum),
+        file_checksum_func_name(_file_checksum_func_name) {}
+
+  // the number of blob in a file.
+  uint64_t total_blob_count;
+  // the total bytes in a file.
+  uint64_t total_blob_bytes;
+  // The status indicating whether the creation was successful or not.
+  Status status;
+  // The checksum of the blob file being created.
+  std::string file_checksum;
+  // The checksum function name of checksum generator used for this blob file.
+  std::string file_checksum_func_name;
+};
+
+enum class CompactionReason : int {
+  kUnknown = 0,
+  // [Level] number of L0 files > level0_file_num_compaction_trigger
+  kLevelL0FilesNum,
+  // [Level] total size of level > MaxBytesForLevel()
+  kLevelMaxLevelSize,
+  // [Universal] Compacting for size amplification
+  kUniversalSizeAmplification,
+  // [Universal] Compacting for size ratio
+  kUniversalSizeRatio,
+  // [Universal] number of sorted runs > level0_file_num_compaction_trigger
+  kUniversalSortedRunNum,
+  // [FIFO] total size > max_table_files_size
+  kFIFOMaxSize,
+  // [FIFO] reduce number of files.
+  kFIFOReduceNumFiles,
+  // [FIFO] files with creation time < (current_time - interval)
+  kFIFOTtl,
+  // Manual compaction
+  kManualCompaction,
+  // DB::SuggestCompactRange() marked files for compaction
+  kFilesMarkedForCompaction,
+  // [Level] Automatic compaction within bottommost level to cleanup duplicate
+  // versions of same user key, usually due to a released snapshot.
+  kBottommostFiles,
+  // Compaction based on TTL
+  kTtl,
+  // According to the comments in flush_job.cc, RocksDB treats flush as
+  // a level 0 compaction in internal stats.
+  kFlush,
+  // [InternalOnly] External sst file ingestion treated as a compaction
+  // with placeholder input level L0 as file ingestion
+  // technically does not have an input level like other compactions.
+  // Used only for internal stats and conflict checking with other compactions
+  kExternalSstIngestion,
+  // Compaction due to SST file being too old
+  kPeriodicCompaction,
+  // Compaction in order to move files to temperature
+  kChangeTemperature,
+  // Compaction scheduled to force garbage collection of blob files
+  kForcedBlobGC,
+  // A special TTL compaction for RoundRobin policy, which basically the same as
+  // kLevelMaxLevelSize, but the goal is to compact TTLed files.
+  kRoundRobinTtl,
+  // [InternalOnly] DBImpl::ReFitLevel treated as a compaction,
+  // Used only for internal conflict checking with other compactions
+  kRefitLevel,
+  // total number of compaction reasons, new reasons must be added above this.
+  kNumOfReasons,
+};
+
+enum class FlushReason : int {
+  kOthers = 0x00,
+  kGetLiveFiles = 0x01,
+  kShutDown = 0x02,
+  kExternalFileIngestion = 0x03,
+  kManualCompaction = 0x04,
+  kWriteBufferManager = 0x05,
+  kWriteBufferFull = 0x06,
+  kTest = 0x07,
+  kDeleteFiles = 0x08,
+  kAutoCompaction = 0x09,
+  kManualFlush = 0x0a,
+  kErrorRecovery = 0xb,
+  // When set the flush reason to kErrorRecoveryRetryFlush, SwitchMemtable
+  // will not be called to avoid many small immutable memtables.
+  kErrorRecoveryRetryFlush = 0xc,
+  kWalFull = 0xd,
+};
+
+// TODO: In the future, BackgroundErrorReason will only be used to indicate
+// why the BG Error is happening (e.g., flush, compaction). We may introduce
+// other data structure to indicate other essential information such as
+// the file type (e.g., Manifest, SST) and special context.
+enum class BackgroundErrorReason {
+  kFlush,
+  kCompaction,
+  kWriteCallback,
+  kMemTable,
+  kManifestWrite,
+  kFlushNoWAL,
+  kManifestWriteNoWAL,
+};
+
+struct WriteStallInfo {
+  // the name of the column family
+  std::string cf_name;
+  // state of the write controller
+  struct {
+    WriteStallCondition cur;
+    WriteStallCondition prev;
+  } condition;
+};
+
+
+struct FileDeletionInfo {
+  FileDeletionInfo() = default;
+
+  FileDeletionInfo(const std::string& _db_name, const std::string& _file_path,
+                   int _job_id, Status _status)
+      : db_name(_db_name),
+        file_path(_file_path),
+        job_id(_job_id),
+        status(_status) {}
+  // The name of the database where the file was deleted.
+  std::string db_name;
+  // The path to the deleted file.
+  std::string file_path;
+  // The id of the job which deleted the file.
+  int job_id = 0;
+  // The status indicating whether the deletion was successful or not.
+  Status status;
+};
+
+struct TableFileDeletionInfo : public FileDeletionInfo {};
+
+struct BlobFileDeletionInfo : public FileDeletionInfo {
+  BlobFileDeletionInfo(const std::string& _db_name,
+                       const std::string& _file_path, int _job_id,
+                       Status _status)
+      : FileDeletionInfo(_db_name, _file_path, _job_id, _status) {}
+};
+
+enum class FileOperationType {
+  kRead,
+  kWrite,
+  kTruncate,
+  kClose,
+  kFlush,
+  kSync,
+  kFsync,
+  kRangeSync,
+  kAppend,
+  kPositionedAppend,
+  kOpen
+};
+
+struct FileOperationInfo {
+  using Duration = std::chrono::nanoseconds;
+  using SteadyTimePoint =
+      std::chrono::time_point<std::chrono::steady_clock, Duration>;
+  using SystemTimePoint =
+      std::chrono::time_point<std::chrono::system_clock, Duration>;
+  using StartTimePoint = std::pair<SystemTimePoint, SteadyTimePoint>;
+  using FinishTimePoint = SteadyTimePoint;
+
+  FileOperationType type;
+  const std::string& path;
+  // Rocksdb try to provide file temperature information, but it's not
+  // guaranteed.
+  Temperature temperature;
+  uint64_t offset;
+  size_t length;
+  const Duration duration;
+  const SystemTimePoint& start_ts;
+  Status status;
+
+  FileOperationInfo(const FileOperationType _type, const std::string& _path,
+                    const StartTimePoint& _start_ts,
+                    const FinishTimePoint& _finish_ts, const Status& _status,
+                    const Temperature _temperature = Temperature::kUnknown)
+      : type(_type),
+        path(_path),
+        temperature(_temperature),
+        duration(std::chrono::duration_cast<std::chrono::nanoseconds>(
+            _finish_ts - _start_ts.second)),
+        start_ts(_start_ts.first),
+        status(_status) {}
+  static StartTimePoint StartNow() {
+    return std::make_pair<SystemTimePoint, SteadyTimePoint>(
+        std::chrono::system_clock::now(), std::chrono::steady_clock::now());
+  }
+  static FinishTimePoint FinishNow() {
+    return std::chrono::steady_clock::now();
+  }
+};
+
+struct BlobFileInfo {
+  BlobFileInfo(const std::string& _blob_file_path,
+               const uint64_t _blob_file_number)
+      : blob_file_path(_blob_file_path), blob_file_number(_blob_file_number) {}
+
+  std::string blob_file_path;
+  uint64_t blob_file_number;
+};
+
+struct BlobFileAdditionInfo : public BlobFileInfo {
+  BlobFileAdditionInfo(const std::string& _blob_file_path,
+                       const uint64_t _blob_file_number,
+                       const uint64_t _total_blob_count,
+                       const uint64_t _total_blob_bytes)
+      : BlobFileInfo(_blob_file_path, _blob_file_number),
+        total_blob_count(_total_blob_count),
+        total_blob_bytes(_total_blob_bytes) {}
+  uint64_t total_blob_count;
+  uint64_t total_blob_bytes;
+};
+
+struct BlobFileGarbageInfo : public BlobFileInfo {
+  BlobFileGarbageInfo(const std::string& _blob_file_path,
+                      const uint64_t _blob_file_number,
+                      const uint64_t _garbage_blob_count,
+                      const uint64_t _garbage_blob_bytes)
+      : BlobFileInfo(_blob_file_path, _blob_file_number),
+        garbage_blob_count(_garbage_blob_count),
+        garbage_blob_bytes(_garbage_blob_bytes) {}
+  uint64_t garbage_blob_count;
+  uint64_t garbage_blob_bytes;
+};
+
+struct FlushJobInfo {
+  // the id of the column family
+  uint32_t cf_id;
+  // the name of the column family
+  std::string cf_name;
+  // the path to the newly created file
+  std::string file_path;
+  // the file number of the newly created file
+  uint64_t file_number;
+  // the oldest blob file referenced by the newly created file
+  uint64_t oldest_blob_file_number;
+  // the id of the thread that completed this flush job.
+  uint64_t thread_id;
+  // the job id, which is unique in the same thread.
+  int job_id;
+  // If true, then rocksdb is currently slowing-down all writes to prevent
+  // creating too many Level 0 files as compaction seems not able to
+  // catch up the write request speed.  This indicates that there are
+  // too many files in Level 0.
+  bool triggered_writes_slowdown;
+  // If true, then rocksdb is currently blocking any writes to prevent
+  // creating more L0 files.  This indicates that there are too many
+  // files in level 0.  Compactions should try to compact L0 files down
+  // to lower levels as soon as possible.
+  bool triggered_writes_stop;
+  // The smallest sequence number in the newly created file
+  SequenceNumber smallest_seqno;
+  // The largest sequence number in the newly created file
+  SequenceNumber largest_seqno;
+  // Table properties of the table being flushed
+  TableProperties table_properties;
+
+  FlushReason flush_reason;
+
+  // Compression algorithm used for blob output files
+  CompressionType blob_compression_type;
+
+  // Information about blob files created during flush in Integrated BlobDB.
+  std::vector<BlobFileAdditionInfo> blob_file_addition_infos;
+};
+
+struct CompactionFileInfo {
+  // The level of the file.
+  int level;
+
+  // The file number of the file.
+  uint64_t file_number;
+
+  // The file number of the oldest blob file this SST file references.
+  uint64_t oldest_blob_file_number;
+};
+
+struct SubcompactionJobInfo {
+  ~SubcompactionJobInfo() { status.PermitUncheckedError(); }
+  // the id of the column family where the compaction happened.
+  uint32_t cf_id;
+  // the name of the column family where the compaction happened.
+  std::string cf_name;
+  // the status indicating whether the compaction was successful or not.
+  Status status;
+  // the id of the thread that completed this compaction job.
+  uint64_t thread_id;
+  // the job id, which is unique in the same thread.
+  int job_id;
+
+  // sub-compaction job id, which is only unique within the same compaction, so
+  // use both 'job_id' and 'subcompaction_job_id' to identify a subcompaction
+  // within an instance.
+  // For non subcompaction job, it's set to -1.
+  int subcompaction_job_id;
+  // the smallest input level of the compaction.
+  int base_input_level;
+  // the output level of the compaction.
+  int output_level;
+
+  // Reason to run the compaction
+  CompactionReason compaction_reason;
+
+  // Compression algorithm used for output files
+  CompressionType compression;
+
+  // Statistics and other additional details on the compaction
+  CompactionJobStats stats;
+
+  // Compression algorithm used for blob output files.
+  CompressionType blob_compression_type;
+};
+
+struct CompactionJobInfo {
+  ~CompactionJobInfo() { status.PermitUncheckedError(); }
+  // the id of the column family where the compaction happened.
+  uint32_t cf_id;
+  // the name of the column family where the compaction happened.
+  std::string cf_name;
+  // the status indicating whether the compaction was successful or not.
+  Status status;
+  // the id of the thread that completed this compaction job.
+  uint64_t thread_id;
+  // the job id, which is unique in the same thread.
+  int job_id;
+
+  // the smallest input level of the compaction.
+  int base_input_level;
+  // the output level of the compaction.
+  int output_level;
+
+  // The following variables contain information about compaction inputs
+  // and outputs. A file may appear in both the input and output lists
+  // if it was simply moved to a different level. The order of elements
+  // is the same across input_files and input_file_infos; similarly, it is
+  // the same across output_files and output_file_infos.
+
+  // The names of the compaction input files.
+  std::vector<std::string> input_files;
+
+  // Additional information about the compaction input files.
+  std::vector<CompactionFileInfo> input_file_infos;
+
+  // The names of the compaction output files.
+  std::vector<std::string> output_files;
+
+  // Additional information about the compaction output files.
+  std::vector<CompactionFileInfo> output_file_infos;
+
+  // Table properties for input and output tables.
+  // The map is keyed by values from input_files and output_files.
+  TablePropertiesCollection table_properties;
+
+  // Reason to run the compaction
+  CompactionReason compaction_reason;
+
+  // Compression algorithm used for output files
+  CompressionType compression;
+
+  // Statistics and other additional details on the compaction
+  CompactionJobStats stats;
+
+  // Compression algorithm used for blob output files.
+  CompressionType blob_compression_type;
+
+  // Information about blob files created during compaction in Integrated
+  // BlobDB.
+  std::vector<BlobFileAdditionInfo> blob_file_addition_infos;
+
+  // Information about blob files deleted during compaction in Integrated
+  // BlobDB.
+  std::vector<BlobFileGarbageInfo> blob_file_garbage_infos;
+};
+
+struct MemTableInfo {
+  // the name of the column family to which memtable belongs
+  std::string cf_name;
+  // Sequence number of the first element that was inserted
+  // into the memtable.
+  SequenceNumber first_seqno;
+  // Sequence number that is guaranteed to be smaller than or equal
+  // to the sequence number of any key that could be inserted into this
+  // memtable. It can then be assumed that any write with a larger(or equal)
+  // sequence number will be present in this memtable or a later memtable.
+  SequenceNumber earliest_seqno;
+  // Total number of entries in memtable
+  uint64_t num_entries;
+  // Total number of deletes in memtable
+  uint64_t num_deletes;
+};
+
+struct ExternalFileIngestionInfo {
+  // the name of the column family
+  std::string cf_name;
+  // Path of the file outside the DB
+  std::string external_file_path;
+  // Path of the file inside the DB
+  std::string internal_file_path;
+  // The global sequence number assigned to keys in this file
+  SequenceNumber global_seqno;
+  // Table properties of the table being flushed
+  TableProperties table_properties;
+};
+
+// Result of auto background error recovery
+struct BackgroundErrorRecoveryInfo {
+  // The original error that triggered the recovery
+  Status old_bg_error;
+
+  // The final bg_error after all recovery attempts. Status::OK() means
+  // the recovery was successful and the database is fully operational.
+  Status new_bg_error;
+};
+
+struct IOErrorInfo {
+  IOErrorInfo(const IOStatus& _io_status, FileOperationType _operation,
+              const std::string& _file_path, size_t _length, uint64_t _offset)
+      : io_status(_io_status),
+        operation(_operation),
+        file_path(_file_path),
+        length(_length),
+        offset(_offset) {}
+
+  IOStatus io_status;
+  FileOperationType operation;
+  std::string file_path;
+  size_t length;
+  uint64_t offset;
+};
+
+// EventListener class contains a set of callback functions that will
+// be called when specific RocksDB event happens such as flush.  It can
+// be used as a building block for developing custom features such as
+// stats-collector or external compaction algorithm.
+//
+// IMPORTANT
+// Because compaction is needed to resolve a "writes stopped" condition,
+// calling or waiting for any blocking DB write function (no_slowdown=false)
+// from a compaction-related listener callback can hang RocksDB. For DB
+// writes from a callback we recommend a WriteBatch and no_slowdown=true,
+// because the WriteBatch can accumulate writes for later in case DB::Write
+// returns Status::Incomplete. Similarly, calling CompactRange or similar
+// could hang by waiting for a background worker that is occupied until the
+// callback returns.
+//
+// Otherwise, callback functions should not run for an extended period of
+// time before the function returns, because this will slow RocksDB.
+//
+// [Threading] All EventListener callback will be called using the
+// actual thread that involves in that specific event.   For example, it
+// is the RocksDB background flush thread that does the actual flush to
+// call EventListener::OnFlushCompleted().
+//
+// [Locking] All EventListener callbacks are designed to be called without
+// the current thread holding any DB mutex. This is to prevent potential
+// deadlock and performance issue when using EventListener callback
+// in a complex way.
+//
+// [Exceptions] Exceptions MUST NOT propagate out of overridden functions into
+// RocksDB, because RocksDB is not exception-safe. This could cause undefined
+// behavior including data loss, unreported corruption, deadlocks, and more.
+class EventListener : public Customizable {
+ public:
+  static const char* Type() { return "EventListener"; }
+  static Status CreateFromString(const ConfigOptions& options,
+                                 const std::string& id,
+                                 std::shared_ptr<EventListener>* result);
+  const char* Name() const override {
+    // Since EventListeners did not have a name previously, we will assume
+    // an empty name.  Instances should override this method.
+    return "";
+  }
+  // A callback function to RocksDB which will be called whenever a
+  // registered RocksDB flushes a file.  The default implementation is
+  // no-op.
+  //
+  // Note that the this function must be implemented in a way such that
+  // it should not run for an extended period of time before the function
+  // returns.  Otherwise, RocksDB may be blocked.
+  virtual void OnFlushCompleted(DB* /*db*/,
+                                const FlushJobInfo& /*flush_job_info*/) {}
+
+  // A callback function to RocksDB which will be called before a
+  // RocksDB starts to flush memtables.  The default implementation is
+  // no-op.
+  //
+  // Note that the this function must be implemented in a way such that
+  // it should not run for an extended period of time before the function
+  // returns.  Otherwise, RocksDB may be blocked.
+  virtual void OnFlushBegin(DB* /*db*/,
+                            const FlushJobInfo& /*flush_job_info*/) {}
+
+  // A callback function for RocksDB which will be called whenever
+  // a SST file is deleted.  Different from OnCompactionCompleted and
+  // OnFlushCompleted, this callback is designed for external logging
+  // service and thus only provide string parameters instead
+  // of a pointer to DB.  Applications that build logic basic based
+  // on file creations and deletions is suggested to implement
+  // OnFlushCompleted and OnCompactionCompleted.
+  //
+  // Note that if applications would like to use the passed reference
+  // outside this function call, they should make copies from the
+  // returned value.
+  virtual void OnTableFileDeleted(const TableFileDeletionInfo& /*info*/) {}
+
+  // A callback function to RocksDB which will be called before a
+  // RocksDB starts to compact.  The default implementation is
+  // no-op.
+  //
+  // Note that the this function must be implemented in a way such that
+  // it should not run for an extended period of time before the function
+  // returns.  Otherwise, RocksDB may be blocked.
+  virtual void OnCompactionBegin(DB* /*db*/, const CompactionJobInfo& /*ci*/) {}
+
+  // A callback function for RocksDB which will be called whenever
+  // a registered RocksDB compacts a file. The default implementation
+  // is a no-op.
+  //
+  // Note that this function must be implemented in a way such that
+  // it should not run for an extended period of time before the function
+  // returns. Otherwise, RocksDB may be blocked.
+  //
+  // @param db a pointer to the rocksdb instance which just compacted
+  //   a file.
+  // @param ci a reference to a CompactionJobInfo struct. 'ci' is released
+  //  after this function is returned, and must be copied if it is needed
+  //  outside of this function.
+  virtual void OnCompactionCompleted(DB* /*db*/,
+                                     const CompactionJobInfo& /*ci*/) {}
+
+  // A callback function to RocksDB which will be called before a sub-compaction
+  // begins. If a compaction is split to 2 sub-compactions, it will trigger one
+  // `OnCompactionBegin()` first, then two `OnSubcompactionBegin()`.
+  // If compaction is not split, it will still trigger one
+  // `OnSubcompactionBegin()`, as internally, compaction is always handled by
+  // sub-compaction. The default implementation is a no-op.
+  //
+  // Note that this function must be implemented in a way such that
+  // it should not run for an extended period of time before the function
+  // returns.  Otherwise, RocksDB may be blocked.
+  //
+  // @param ci a reference to a CompactionJobInfo struct, it contains a
+  //  `sub_job_id` which is only unique within the specified compaction (which
+  //  can be identified by `job_id`). 'ci' is released after this function is
+  //  returned, and must be copied if it's needed outside this function.
+  //  Note: `table_properties` is not set for sub-compaction, the information
+  //  could be got from `OnCompactionBegin()`.
+  virtual void OnSubcompactionBegin(const SubcompactionJobInfo& /*si*/) {}
+
+  // A callback function to RocksDB which will be called whenever a
+  // sub-compaction completed. The same as `OnSubcompactionBegin()`, if a
+  // compaction is split to 2 sub-compactions, it will be triggered twice. If
+  // a compaction is not split, it will still be triggered once.
+  // The default implementation is a no-op.
+  //
+  // Note that this function must be implemented in a way such that
+  // it should not run for an extended period of time before the function
+  // returns.  Otherwise, RocksDB may be blocked.
+  //
+  // @param ci a reference to a CompactionJobInfo struct, it contains a
+  //  `sub_job_id` which is only unique within the specified compaction (which
+  //  can be identified by `job_id`). 'ci' is released after this function is
+  //  returned, and must be copied if it's needed outside this function.
+  //  Note: `table_properties` is not set for sub-compaction, the information
+  //  could be got from `OnCompactionCompleted()`.
+  virtual void OnSubcompactionCompleted(const SubcompactionJobInfo& /*si*/) {}
+
+  // A callback function for RocksDB which will be called whenever
+  // a SST file is created.  Different from OnCompactionCompleted and
+  // OnFlushCompleted, this callback is designed for external logging
+  // service and thus only provide string parameters instead
+  // of a pointer to DB.  Applications that build logic basic based
+  // on file creations and deletions is suggested to implement
+  // OnFlushCompleted and OnCompactionCompleted.
+  //
+  // Historically it will only be called if the file is successfully created.
+  // Now it will also be called on failure case. User can check info.status
+  // to see if it succeeded or not.
+  //
+  // Note that if applications would like to use the passed reference
+  // outside this function call, they should make copies from these
+  // returned value.
+  virtual void OnTableFileCreated(const TableFileCreationInfo& /*info*/) {}
+
+  // A callback function for RocksDB which will be called before
+  // a SST file is being created. It will follow by OnTableFileCreated after
+  // the creation finishes.
+  //
+  // Note that if applications would like to use the passed reference
+  // outside this function call, they should make copies from these
+  // returned value.
+  virtual void OnTableFileCreationStarted(
+      const TableFileCreationBriefInfo& /*info*/) {}
+
+  // A callback function for RocksDB which will be called before
+  // a memtable is made immutable.
+  //
+  // Note that the this function must be implemented in a way such that
+  // it should not run for an extended period of time before the function
+  // returns.  Otherwise, RocksDB may be blocked.
+  //
+  // Note that if applications would like to use the passed reference
+  // outside this function call, they should make copies from these
+  // returned value.
+  virtual void OnMemTableSealed(const MemTableInfo& /*info*/) {}
+
+  // A callback function for RocksDB which will be called before
+  // a column family handle is deleted.
+  //
+  // Note that the this function must be implemented in a way such that
+  // it should not run for an extended period of time before the function
+  // returns.  Otherwise, RocksDB may be blocked.
+  // @param handle is a pointer to the column family handle to be deleted
+  // which will become a dangling pointer after the deletion.
+  virtual void OnColumnFamilyHandleDeletionStarted(
+      ColumnFamilyHandle* /*handle*/) {}
+
+  // A callback function for RocksDB which will be called after an external
+  // file is ingested using IngestExternalFile.
+  //
+  // Note that the this function will run on the same thread as
+  // IngestExternalFile(), if this function is blocked, IngestExternalFile()
+  // will be blocked from finishing.
+  virtual void OnExternalFileIngested(
+      DB* /*db*/, const ExternalFileIngestionInfo& /*info*/) {}
+
+  // A callback function for RocksDB which will be called before setting the
+  // background error status to a non-OK value. The new background error status
+  // is provided in `bg_error` and can be modified by the callback. E.g., a
+  // callback can suppress errors by resetting it to Status::OK(), thus
+  // preventing the database from entering read-only mode. We do not provide any
+  // guarantee when failed flushes/compactions will be rescheduled if the user
+  // suppresses an error.
+  //
+  // Note that this function can run on the same threads as flush, compaction,
+  // and user writes. So, it is extremely important not to perform heavy
+  // computations or blocking calls in this function.
+  virtual void OnBackgroundError(BackgroundErrorReason /* reason */,
+                                 Status* /* bg_error */) {}
+
+  // A callback function for RocksDB which will be called whenever a change
+  // of superversion triggers a change of the stall conditions.
+  //
+  // Note that the this function must be implemented in a way such that
+  // it should not run for an extended period of time before the function
+  // returns.  Otherwise, RocksDB may be blocked.
+  virtual void OnStallConditionsChanged(const WriteStallInfo& /*info*/) {}
+
+  // A callback function for RocksDB which will be called whenever a file read
+  // operation finishes.
+  virtual void OnFileReadFinish(const FileOperationInfo& /* info */) {}
+
+  // A callback function for RocksDB which will be called whenever a file write
+  // operation finishes.
+  virtual void OnFileWriteFinish(const FileOperationInfo& /* info */) {}
+
+  // A callback function for RocksDB which will be called whenever a file flush
+  // operation finishes.
+  virtual void OnFileFlushFinish(const FileOperationInfo& /* info */) {}
+
+  // A callback function for RocksDB which will be called whenever a file sync
+  // operation finishes.
+  virtual void OnFileSyncFinish(const FileOperationInfo& /* info */) {}
+
+  // A callback function for RocksDB which will be called whenever a file
+  // rangeSync operation finishes.
+  virtual void OnFileRangeSyncFinish(const FileOperationInfo& /* info */) {}
+
+  // A callback function for RocksDB which will be called whenever a file
+  // truncate operation finishes.
+  virtual void OnFileTruncateFinish(const FileOperationInfo& /* info */) {}
+
+  // A callback function for RocksDB which will be called whenever a file close
+  // operation finishes.
+  virtual void OnFileCloseFinish(const FileOperationInfo& /* info */) {}
+
+  // If true, the OnFile*Finish functions will be called. If
+  // false, then they won't be called.
+  virtual bool ShouldBeNotifiedOnFileIO() { return false; }
+
+  // A callback function for RocksDB which will be called just before
+  // starting the automatic recovery process for recoverable background
+  // errors, such as NoSpace(). The callback can suppress the automatic
+  // recovery by setting *auto_recovery to false. The database will then
+  // have to be transitioned out of read-only mode by calling DB::Resume()
+  virtual void OnErrorRecoveryBegin(BackgroundErrorReason /* reason */,
+                                    Status /* bg_error */,
+                                    bool* /* auto_recovery */) {}
+
+  // DEPRECATED
+  // A callback function for RocksDB which will be called once the database
+  // is recovered from read-only mode after an error. When this is called, it
+  // means normal writes to the database can be issued and the user can
+  // initiate any further recovery actions needed
+  virtual void OnErrorRecoveryCompleted(Status old_bg_error) {
+    old_bg_error.PermitUncheckedError();
+  }
+
+  // A callback function for RocksDB which will be called once the recovery
+  // attempt from a background retryable error is completed. The recovery
+  // may have been successful or not. In either case, the callback is called
+  // with the old and new error. If info.new_bg_error is Status::OK(), that
+  // means the recovery succeeded.
+  virtual void OnErrorRecoveryEnd(const BackgroundErrorRecoveryInfo& /*info*/) {
+  }
+
+  // A callback function for RocksDB which will be called before
+  // a blob file is being created. It will follow by OnBlobFileCreated after
+  // the creation finishes.
+  //
+  // Note that if applications would like to use the passed reference
+  // outside this function call, they should make copies from these
+  // returned value.
+  virtual void OnBlobFileCreationStarted(
+      const BlobFileCreationBriefInfo& /*info*/) {}
+
+  // A callback function for RocksDB which will be called whenever
+  // a blob file is created.
+  // It will be called whether the file is successfully created or not. User can
+  // check info.status to see if it succeeded or not.
+  //
+  // Note that if applications would like to use the passed reference
+  // outside this function call, they should make copies from these
+  // returned value.
+  virtual void OnBlobFileCreated(const BlobFileCreationInfo& /*info*/) {}
+
+  // A callback function for RocksDB which will be called whenever
+  // a blob file is deleted.
+  //
+  // Note that if applications would like to use the passed reference
+  // outside this function call, they should make copies from these
+  // returned value.
+  virtual void OnBlobFileDeleted(const BlobFileDeletionInfo& /*info*/) {}
+
+  // A callback function for RocksDB which will be called whenever an IO error
+  // happens. ShouldBeNotifiedOnFileIO should be set to true to get a callback.
+  virtual void OnIOError(const IOErrorInfo& /*info*/) {}
+
+  ~EventListener() override {}
+};
+
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/memory_allocator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/memory_allocator.h
new file mode 100644
index 0000000000..d126abfe6d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/memory_allocator.h
@@ -0,0 +1,87 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <memory>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// MemoryAllocator is an interface that a client can implement to supply custom
+// memory allocation and deallocation methods. See rocksdb/cache.h for more
+// information.
+// All methods should be thread-safe.
+class MemoryAllocator : public Customizable {
+ public:
+  static const char* Type() { return "MemoryAllocator"; }
+  static Status CreateFromString(const ConfigOptions& options,
+                                 const std::string& value,
+                                 std::shared_ptr<MemoryAllocator>* result);
+
+  // Allocate a block of at least size. Has to be thread-safe.
+  virtual void* Allocate(size_t size) = 0;
+
+  // Deallocate previously allocated block. Has to be thread-safe.
+  virtual void Deallocate(void* p) = 0;
+
+  // Returns the memory size of the block allocated at p. The default
+  // implementation that just returns the original allocation_size is fine.
+  virtual size_t UsableSize(void* /*p*/, size_t allocation_size) const {
+    // default implementation just returns the allocation size
+    return allocation_size;
+  }
+
+  std::string GetId() const override { return GenerateIndividualId(); }
+};
+
+struct JemallocAllocatorOptions {
+  static const char* kName() { return "JemallocAllocatorOptions"; }
+  // Jemalloc tcache cache allocations by size class. For each size class,
+  // it caches between 20 (for large size classes) to 200 (for small size
+  // classes). To reduce tcache memory usage in case the allocator is access
+  // by large number of threads, we can control whether to cache an allocation
+  // by its size.
+  bool limit_tcache_size = false;
+
+  // Lower bound of allocation size to use tcache, if limit_tcache_size=true.
+  // When used with block cache, it is recommended to set it to block_size/4.
+  size_t tcache_size_lower_bound = 1024;
+
+  // Upper bound of allocation size to use tcache, if limit_tcache_size=true.
+  // When used with block cache, it is recommended to set it to block_size.
+  size_t tcache_size_upper_bound = 16 * 1024;
+
+  // Number of arenas across which we spread allocation requests. Increasing
+  // this setting can mitigate arena mutex contention. The value must be
+  // positive.
+  size_t num_arenas = 1;
+};
+
+// Generate memory allocator which allocates through Jemalloc and utilize
+// MADV_DONTDUMP through madvise to exclude cache items from core dump.
+// Applications can use the allocator with block cache to exclude block cache
+// usage from core dump.
+//
+// Implementation details:
+// The JemallocNodumpAllocator creates a dedicated jemalloc arena, and all
+// allocations of the JemallocNodumpAllocator are through the same arena.
+// The memory allocator hooks memory allocation of the arena, and calls
+// madvise() with MADV_DONTDUMP flag to exclude the piece of memory from
+// core dump. Side benefit of using single arena would be reduction of jemalloc
+// metadata for some workloads.
+//
+// To mitigate mutex contention for using one single arena (see also
+// `JemallocAllocatorOptions::num_arenas` above), jemalloc tcache
+// (thread-local cache) is enabled to cache unused allocations for future use.
+// The tcache normally incurs 0.5M extra memory usage per-thread. The usage
+// can be reduced by limiting allocation sizes to cache.
+extern Status NewJemallocNodumpAllocator(
+    JemallocAllocatorOptions& options,
+    std::shared_ptr<MemoryAllocator>* memory_allocator);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/memtablerep.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/memtablerep.h
new file mode 100644
index 0000000000..be0f6cd1f1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/memtablerep.h
@@ -0,0 +1,421 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file contains the interface that must be implemented by any collection
+// to be used as the backing store for a MemTable. Such a collection must
+// satisfy the following properties:
+//  (1) It does not store duplicate items.
+//  (2) It uses MemTableRep::KeyComparator to compare items for iteration and
+//     equality.
+//  (3) It can be accessed concurrently by multiple readers and can support
+//     during reads. However, it needn't support multiple concurrent writes.
+//  (4) Items are never deleted.
+// The liberal use of assertions is encouraged to enforce (1).
+//
+// The factory will be passed an MemTableAllocator object when a new MemTableRep
+// is requested.
+//
+// Users can implement their own memtable representations. We include three
+// types built in:
+//  - SkipListRep: This is the default; it is backed by a skip list.
+//  - HashSkipListRep: The memtable rep that is best used for keys that are
+//  structured like "prefix:suffix" where iteration within a prefix is
+//  common and iteration across different prefixes is rare. It is backed by
+//  a hash map where each bucket is a skip list.
+//  - VectorRep: This is backed by an unordered std::vector. On iteration, the
+// vector is sorted. It is intelligent about sorting; once the MarkReadOnly()
+// has been called, the vector will only be sorted once. It is optimized for
+// random-write-heavy workloads.
+//
+// The last four implementations are designed for situations in which
+// iteration over the entire collection is rare since doing so requires all the
+// keys to be copied into a sorted data structure.
+
+#pragma once
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <memory>
+#include <stdexcept>
+#include <unordered_set>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Arena;
+class Allocator;
+class LookupKey;
+class SliceTransform;
+class Logger;
+struct DBOptions;
+
+using KeyHandle = void*;
+
+extern Slice GetLengthPrefixedSlice(const char* data);
+
+class MemTableRep {
+ public:
+  // KeyComparator provides a means to compare keys, which are internal keys
+  // concatenated with values.
+  class KeyComparator {
+   public:
+    using DecodedType = ROCKSDB_NAMESPACE::Slice;
+
+    virtual DecodedType decode_key(const char* key) const {
+      // The format of key is frozen and can be treated as a part of the API
+      // contract. Refer to MemTable::Add for details.
+      return GetLengthPrefixedSlice(key);
+    }
+
+    // Compare a and b. Return a negative value if a is less than b, 0 if they
+    // are equal, and a positive value if a is greater than b
+    virtual int operator()(const char* prefix_len_key1,
+                           const char* prefix_len_key2) const = 0;
+
+    virtual int operator()(const char* prefix_len_key,
+                           const Slice& key) const = 0;
+
+    virtual ~KeyComparator() {}
+  };
+
+  explicit MemTableRep(Allocator* allocator) : allocator_(allocator) {}
+
+  // Allocate a buf of len size for storing key. The idea is that a
+  // specific memtable representation knows its underlying data structure
+  // better. By allowing it to allocate memory, it can possibly put
+  // correlated stuff in consecutive memory area to make processor
+  // prefetching more efficient.
+  virtual KeyHandle Allocate(const size_t len, char** buf);
+
+  // Insert key into the collection. (The caller will pack key and value into a
+  // single buffer and pass that in as the parameter to Insert).
+  // REQUIRES: nothing that compares equal to key is currently in the
+  // collection, and no concurrent modifications to the table in progress
+  virtual void Insert(KeyHandle handle) = 0;
+
+  // Same as ::Insert
+  // Returns false if MemTableRepFactory::CanHandleDuplicatedKey() is true and
+  // the <key, seq> already exists.
+  virtual bool InsertKey(KeyHandle handle) {
+    Insert(handle);
+    return true;
+  }
+
+  // Same as Insert(), but in additional pass a hint to insert location for
+  // the key. If hint points to nullptr, a new hint will be populated.
+  // otherwise the hint will be updated to reflect the last insert location.
+  //
+  // Currently only skip-list based memtable implement the interface. Other
+  // implementations will fallback to Insert() by default.
+  virtual void InsertWithHint(KeyHandle handle, void** /*hint*/) {
+    // Ignore the hint by default.
+    Insert(handle);
+  }
+
+  // Same as ::InsertWithHint
+  // Returns false if MemTableRepFactory::CanHandleDuplicatedKey() is true and
+  // the <key, seq> already exists.
+  virtual bool InsertKeyWithHint(KeyHandle handle, void** hint) {
+    InsertWithHint(handle, hint);
+    return true;
+  }
+
+  // Same as ::InsertWithHint, but allow concurrent write
+  //
+  // If hint points to nullptr, a new hint will be allocated on heap, otherwise
+  // the hint will be updated to reflect the last insert location. The hint is
+  // owned by the caller and it is the caller's responsibility to delete the
+  // hint later.
+  //
+  // Currently only skip-list based memtable implement the interface. Other
+  // implementations will fallback to InsertConcurrently() by default.
+  virtual void InsertWithHintConcurrently(KeyHandle handle, void** /*hint*/) {
+    // Ignore the hint by default.
+    InsertConcurrently(handle);
+  }
+
+  // Same as ::InsertWithHintConcurrently
+  // Returns false if MemTableRepFactory::CanHandleDuplicatedKey() is true and
+  // the <key, seq> already exists.
+  virtual bool InsertKeyWithHintConcurrently(KeyHandle handle, void** hint) {
+    InsertWithHintConcurrently(handle, hint);
+    return true;
+  }
+
+  // Like Insert(handle), but may be called concurrent with other calls
+  // to InsertConcurrently for other handles.
+  //
+  // Returns false if MemTableRepFactory::CanHandleDuplicatedKey() is true and
+  // the <key, seq> already exists.
+  virtual void InsertConcurrently(KeyHandle handle);
+
+  // Same as ::InsertConcurrently
+  // Returns false if MemTableRepFactory::CanHandleDuplicatedKey() is true and
+  // the <key, seq> already exists.
+  virtual bool InsertKeyConcurrently(KeyHandle handle) {
+    InsertConcurrently(handle);
+    return true;
+  }
+
+  // Returns true iff an entry that compares equal to key is in the collection.
+  virtual bool Contains(const char* key) const = 0;
+
+  // Notify this table rep that it will no longer be added to. By default,
+  // does nothing.  After MarkReadOnly() is called, this table rep will
+  // not be written to (ie No more calls to Allocate(), Insert(),
+  // or any writes done directly to entries accessed through the iterator.)
+  virtual void MarkReadOnly() {}
+
+  // Notify this table rep that it has been flushed to stable storage.
+  // By default, does nothing.
+  //
+  // Invariant: MarkReadOnly() is called, before MarkFlushed().
+  // Note that this method if overridden, should not run for an extended period
+  // of time. Otherwise, RocksDB may be blocked.
+  virtual void MarkFlushed() {}
+
+  // Look up key from the mem table, since the first key in the mem table whose
+  // user_key matches the one given k, call the function callback_func(), with
+  // callback_args directly forwarded as the first parameter, and the mem table
+  // key as the second parameter. If the return value is false, then terminates.
+  // Otherwise, go through the next key.
+  //
+  // It's safe for Get() to terminate after having finished all the potential
+  // key for the k.user_key(), or not.
+  //
+  // Default:
+  // Get() function with a default value of dynamically construct an iterator,
+  // seek and call the call back function.
+  virtual void Get(const LookupKey& k, void* callback_args,
+                   bool (*callback_func)(void* arg, const char* entry));
+
+  virtual uint64_t ApproximateNumEntries(const Slice& /*start_ikey*/,
+                                         const Slice& /*end_key*/) {
+    return 0;
+  }
+
+  // Returns a vector of unique random memtable entries of approximate
+  // size 'target_sample_size' (this size is not strictly enforced).
+  virtual void UniqueRandomSample(const uint64_t num_entries,
+                                  const uint64_t target_sample_size,
+                                  std::unordered_set<const char*>* entries) {
+    (void)num_entries;
+    (void)target_sample_size;
+    (void)entries;
+    assert(false);
+  }
+
+  // Report an approximation of how much memory has been used other than memory
+  // that was allocated through the allocator.  Safe to call from any thread.
+  virtual size_t ApproximateMemoryUsage() = 0;
+
+  virtual ~MemTableRep() {}
+
+  // Iteration over the contents of a skip collection
+  class Iterator {
+   public:
+    // Initialize an iterator over the specified collection.
+    // The returned iterator is not valid.
+    // explicit Iterator(const MemTableRep* collection);
+    virtual ~Iterator() {}
+
+    // Returns true iff the iterator is positioned at a valid node.
+    virtual bool Valid() const = 0;
+
+    // Returns the key at the current position.
+    // REQUIRES: Valid()
+    virtual const char* key() const = 0;
+
+    // Advances to the next position.
+    // REQUIRES: Valid()
+    virtual void Next() = 0;
+
+    // Advances to the previous position.
+    // REQUIRES: Valid()
+    virtual void Prev() = 0;
+
+    // Advance to the first entry with a key >= target
+    virtual void Seek(const Slice& internal_key, const char* memtable_key) = 0;
+
+    // retreat to the first entry with a key <= target
+    virtual void SeekForPrev(const Slice& internal_key,
+                             const char* memtable_key) = 0;
+
+    virtual void RandomSeek() {}
+
+    // Position at the first entry in collection.
+    // Final state of iterator is Valid() iff collection is not empty.
+    virtual void SeekToFirst() = 0;
+
+    // Position at the last entry in collection.
+    // Final state of iterator is Valid() iff collection is not empty.
+    virtual void SeekToLast() = 0;
+  };
+
+  // Return an iterator over the keys in this representation.
+  // arena: If not null, the arena needs to be used to allocate the Iterator.
+  //        When destroying the iterator, the caller will not call "delete"
+  //        but Iterator::~Iterator() directly. The destructor needs to destroy
+  //        all the states but those allocated in arena.
+  virtual Iterator* GetIterator(Arena* arena = nullptr) = 0;
+
+  // Return an iterator that has a special Seek semantics. The result of
+  // a Seek might only include keys with the same prefix as the target key.
+  // arena: If not null, the arena is used to allocate the Iterator.
+  //        When destroying the iterator, the caller will not call "delete"
+  //        but Iterator::~Iterator() directly. The destructor needs to destroy
+  //        all the states but those allocated in arena.
+  virtual Iterator* GetDynamicPrefixIterator(Arena* arena = nullptr) {
+    return GetIterator(arena);
+  }
+
+  // Return true if the current MemTableRep supports merge operator.
+  // Default: true
+  virtual bool IsMergeOperatorSupported() const { return true; }
+
+  // Return true if the current MemTableRep supports snapshot
+  // Default: true
+  virtual bool IsSnapshotSupported() const { return true; }
+
+ protected:
+  // When *key is an internal key concatenated with the value, returns the
+  // user key.
+  virtual Slice UserKey(const char* key) const;
+
+  Allocator* allocator_;
+};
+
+// This is the base class for all factories that are used by RocksDB to create
+// new MemTableRep objects
+class MemTableRepFactory : public Customizable {
+ public:
+  ~MemTableRepFactory() override {}
+
+  static const char* Type() { return "MemTableRepFactory"; }
+  static Status CreateFromString(const ConfigOptions& config_options,
+                                 const std::string& id,
+                                 std::unique_ptr<MemTableRepFactory>* factory);
+  static Status CreateFromString(const ConfigOptions& config_options,
+                                 const std::string& id,
+                                 std::shared_ptr<MemTableRepFactory>* factory);
+
+  virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
+                                         Allocator*, const SliceTransform*,
+                                         Logger* logger) = 0;
+  virtual MemTableRep* CreateMemTableRep(
+      const MemTableRep::KeyComparator& key_cmp, Allocator* allocator,
+      const SliceTransform* slice_transform, Logger* logger,
+      uint32_t /* column_family_id */) {
+    return CreateMemTableRep(key_cmp, allocator, slice_transform, logger);
+  }
+
+  const char* Name() const override = 0;
+
+  // Return true if the current MemTableRep supports concurrent inserts
+  // Default: false
+  virtual bool IsInsertConcurrentlySupported() const { return false; }
+
+  // Return true if the current MemTableRep supports detecting duplicate
+  // <key,seq> at insertion time. If true, then MemTableRep::Insert* returns
+  // false when if the <key,seq> already exists.
+  // Default: false
+  virtual bool CanHandleDuplicatedKey() const { return false; }
+};
+
+// This uses a skip list to store keys. It is the default.
+//
+// Parameters:
+//   lookahead: If non-zero, each iterator's seek operation will start the
+//     search from the previously visited record (doing at most 'lookahead'
+//     steps). This is an optimization for the access pattern including many
+//     seeks with consecutive keys.
+class SkipListFactory : public MemTableRepFactory {
+ public:
+  explicit SkipListFactory(size_t lookahead = 0);
+
+  // Methods for Configurable/Customizable class overrides
+  static const char* kClassName() { return "SkipListFactory"; }
+  static const char* kNickName() { return "skip_list"; }
+  virtual const char* Name() const override { return kClassName(); }
+  virtual const char* NickName() const override { return kNickName(); }
+  std::string GetId() const override;
+
+  // Methods for MemTableRepFactory class overrides
+  using MemTableRepFactory::CreateMemTableRep;
+  virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
+                                         Allocator*, const SliceTransform*,
+                                         Logger* logger) override;
+
+  bool IsInsertConcurrentlySupported() const override { return true; }
+
+  bool CanHandleDuplicatedKey() const override { return true; }
+
+ private:
+  size_t lookahead_;
+};
+
+// This creates MemTableReps that are backed by an std::vector. On iteration,
+// the vector is sorted. This is useful for workloads where iteration is very
+// rare and writes are generally not issued after reads begin.
+//
+// Parameters:
+//   count: Passed to the constructor of the underlying std::vector of each
+//     VectorRep. On initialization, the underlying array will be at least count
+//     bytes reserved for usage.
+class VectorRepFactory : public MemTableRepFactory {
+  size_t count_;
+
+ public:
+  explicit VectorRepFactory(size_t count = 0);
+
+  // Methods for Configurable/Customizable class overrides
+  static const char* kClassName() { return "VectorRepFactory"; }
+  static const char* kNickName() { return "vector"; }
+  const char* Name() const override { return kClassName(); }
+  const char* NickName() const override { return kNickName(); }
+
+  // Methods for MemTableRepFactory class overrides
+  using MemTableRepFactory::CreateMemTableRep;
+  virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
+                                         Allocator*, const SliceTransform*,
+                                         Logger* logger) override;
+};
+
+// This class contains a fixed array of buckets, each
+// pointing to a skiplist (null if the bucket is empty).
+// bucket_count: number of fixed array buckets
+// skiplist_height: the max height of the skiplist
+// skiplist_branching_factor: probabilistic size ratio between adjacent
+//                            link lists in the skiplist
+extern MemTableRepFactory* NewHashSkipListRepFactory(
+    size_t bucket_count = 1000000, int32_t skiplist_height = 4,
+    int32_t skiplist_branching_factor = 4);
+
+// The factory is to create memtables based on a hash table:
+// it contains a fixed array of buckets, each pointing to either a linked list
+// or a skip list if number of entries inside the bucket exceeds
+// threshold_use_skiplist.
+// @bucket_count: number of fixed array buckets
+// @huge_page_tlb_size: if <=0, allocate the hash table bytes from malloc.
+//                      Otherwise from huge page TLB. The user needs to reserve
+//                      huge pages for it to be allocated, like:
+//                          sysctl -w vm.nr_hugepages=20
+//                      See linux doc Documentation/vm/hugetlbpage.txt
+// @bucket_entries_logging_threshold: if number of entries in one bucket
+//                                    exceeds this number, log about it.
+// @if_log_bucket_dist_when_flash: if true, log distribution of number of
+//                                 entries when flushing.
+// @threshold_use_skiplist: a bucket switches to skip list if number of
+//                          entries exceed this parameter.
+extern MemTableRepFactory* NewHashLinkListRepFactory(
+    size_t bucket_count = 50000, size_t huge_page_tlb_size = 0,
+    int bucket_entries_logging_threshold = 4096,
+    bool if_log_bucket_dist_when_flash = true,
+    uint32_t threshold_use_skiplist = 256);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/merge_operator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/merge_operator.h
new file mode 100644
index 0000000000..077130475d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/merge_operator.h
@@ -0,0 +1,286 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <deque>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+class Logger;
+
+// The Merge Operator
+//
+// Essentially, a MergeOperator specifies the SEMANTICS of a merge, which only
+// client knows. It could be numeric addition, list append, string
+// concatenation, edit data structure, ... , anything.
+// The library, on the other hand, is concerned with the exercise of this
+// interface, at the right time (during get, iteration, compaction...)
+//
+// To use merge, the client needs to provide an object implementing one of
+// the following interfaces:
+//  a) AssociativeMergeOperator - for most simple semantics (always take
+//    two values, and merge them into one value, which is then put back
+//    into rocksdb); numeric addition and string concatenation are examples;
+//
+//  b) MergeOperator - the generic class for all the more abstract / complex
+//    operations; one method (FullMergeV2) to merge a Put/Delete value with a
+//    merge operand; and another method (PartialMerge) that merges multiple
+//    operands together. this is especially useful if your key values have
+//    complex structures but you would still like to support client-specific
+//    incremental updates.
+//
+// AssociativeMergeOperator is simpler to implement. MergeOperator is simply
+// more powerful.
+//
+// Refer to rocksdb-merge wiki for more details and example implementations.
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class MergeOperator : public Customizable {
+ public:
+  virtual ~MergeOperator() {}
+  static const char* Type() { return "MergeOperator"; }
+  static Status CreateFromString(const ConfigOptions& opts,
+                                 const std::string& id,
+                                 std::shared_ptr<MergeOperator>* result);
+
+  // Gives the client a way to express the read -> modify -> write semantics
+  // key:      (IN)    The key that's associated with this merge operation.
+  //                   Client could multiplex the merge operator based on it
+  //                   if the key space is partitioned and different subspaces
+  //                   refer to different types of data which have different
+  //                   merge operation semantics
+  // existing: (IN)    null indicates that the key does not exist before this op
+  // operand_list:(IN) the sequence of merge operations to apply, front() first.
+  // new_value:(OUT)   Client is responsible for filling the merge result here.
+  // The string that new_value is pointing to will be empty.
+  // logger:   (IN)    Client could use this to log errors during merge.
+  //
+  // Return true on success.
+  // All values passed in will be client-specific values. So if this method
+  // returns false, it is because client specified bad data or there was
+  // internal corruption. This will be treated as an error by the library.
+  //
+  // Also make use of the *logger for error messages.
+  virtual bool FullMerge(const Slice& /*key*/, const Slice* /*existing_value*/,
+                         const std::deque<std::string>& /*operand_list*/,
+                         std::string* /*new_value*/, Logger* /*logger*/) const {
+    // deprecated, please use FullMergeV2()
+    assert(false);
+    return false;
+  }
+
+  struct MergeOperationInput {
+    // If user-defined timestamp is enabled, `_key` includes timestamp.
+    explicit MergeOperationInput(const Slice& _key,
+                                 const Slice* _existing_value,
+                                 const std::vector<Slice>& _operand_list,
+                                 Logger* _logger)
+        : key(_key),
+          existing_value(_existing_value),
+          operand_list(_operand_list),
+          logger(_logger) {}
+
+    // The key associated with the merge operation.
+    const Slice& key;
+    // The existing value of the current key, nullptr means that the
+    // value doesn't exist.
+    const Slice* existing_value;
+    // A list of operands to apply.
+    const std::vector<Slice>& operand_list;
+    // Logger could be used by client to log any errors that happen during
+    // the merge operation.
+    Logger* logger;
+  };
+
+  enum class OpFailureScope {
+    kDefault,
+    kTryMerge,
+    kMustMerge,
+    kOpFailureScopeMax,
+  };
+
+  struct MergeOperationOutput {
+    explicit MergeOperationOutput(std::string& _new_value,
+                                  Slice& _existing_operand)
+        : new_value(_new_value), existing_operand(_existing_operand) {}
+
+    // Client is responsible for filling the merge result here.
+    std::string& new_value;
+    // If the merge result is one of the existing operands (or existing_value),
+    // client can set this field to the operand (or existing_value) instead of
+    // using new_value.
+    Slice& existing_operand;
+    // Indicates the blast radius of the failure. It is only meaningful to
+    // provide a failure scope when returning `false` from the API populating
+    // the `MergeOperationOutput`. Currently RocksDB operations handle these
+    // values as follows:
+    //
+    // - `OpFailureScope::kDefault`: fallback to default
+    //   (`OpFailureScope::kTryMerge`)
+    // - `OpFailureScope::kTryMerge`: operations that try to merge that key will
+    //   fail. This includes flush and compaction, which puts the DB in
+    //   read-only mode.
+    // - `OpFailureScope::kMustMerge`: operations that must merge that key will
+    //   fail (e.g., `Get()`, `MultiGet()`, iteration). Flushes/compactions can
+    //   still proceed by copying the original input operands to the output.
+    OpFailureScope op_failure_scope = OpFailureScope::kDefault;
+  };
+
+  // This function applies a stack of merge operands in chronological order
+  // on top of an existing value. There are two ways in which this method is
+  // being used:
+  // a) During Get() operation, it used to calculate the final value of a key
+  // b) During compaction, in order to collapse some operands with the based
+  //    value.
+  //
+  // Note: The name of the method is somewhat misleading, as both in the cases
+  // of Get() or compaction it may be called on a subset of operands:
+  // K:    0    +1    +2    +7    +4     +5      2     +1     +2
+  //                              ^
+  //                              |
+  //                          snapshot
+  // In the example above, Get(K) operation will call FullMerge with a base
+  // value of 2 and operands [+1, +2]. Compaction process might decide to
+  // collapse the beginning of the history up to the snapshot by performing
+  // full Merge with base value of 0 and operands [+1, +2, +7, +4].
+  virtual bool FullMergeV2(const MergeOperationInput& merge_in,
+                           MergeOperationOutput* merge_out) const;
+
+  // This function performs merge(left_op, right_op)
+  // when both the operands are themselves merge operation types
+  // that you would have passed to a DB::Merge() call in the same order
+  // (i.e.: DB::Merge(key,left_op), followed by DB::Merge(key,right_op)).
+  //
+  // PartialMerge should combine them into a single merge operation that is
+  // saved into *new_value, and then it should return true.
+  // *new_value should be constructed such that a call to
+  // DB::Merge(key, *new_value) would yield the same result as a call
+  // to DB::Merge(key, left_op) followed by DB::Merge(key, right_op).
+  //
+  // The string that new_value is pointing to will be empty.
+  //
+  // The default implementation of PartialMergeMulti will use this function
+  // as a helper, for backward compatibility.  Any successor class of
+  // MergeOperator should either implement PartialMerge or PartialMergeMulti,
+  // although implementing PartialMergeMulti is suggested as it is in general
+  // more effective to merge multiple operands at a time instead of two
+  // operands at a time.
+  //
+  // If it is impossible or infeasible to combine the two operations,
+  // leave new_value unchanged and return false. The library will
+  // internally keep track of the operations, and apply them in the
+  // correct order once a base-value (a Put/Delete/End-of-Database) is seen.
+  //
+  // TODO: Presently there is no way to differentiate between error/corruption
+  // and simply "return false". For now, the client should simply return
+  // false in any case it cannot perform partial-merge, regardless of reason.
+  // If there is corruption in the data, handle it in the FullMergeV2() function
+  // and return false there.  The default implementation of PartialMerge will
+  // always return false.
+  virtual bool PartialMerge(const Slice& /*key*/, const Slice& /*left_operand*/,
+                            const Slice& /*right_operand*/,
+                            std::string* /*new_value*/,
+                            Logger* /*logger*/) const {
+    return false;
+  }
+
+  // This function performs merge when all the operands are themselves merge
+  // operation types that you would have passed to a DB::Merge() call in the
+  // same order (front() first)
+  // (i.e. DB::Merge(key, operand_list[0]), followed by
+  //  DB::Merge(key, operand_list[1]), ...)
+  //
+  // PartialMergeMulti should combine them into a single merge operation that is
+  // saved into *new_value, and then it should return true.  *new_value should
+  // be constructed such that a call to DB::Merge(key, *new_value) would yield
+  // the same result as sequential individual calls to DB::Merge(key, operand)
+  // for each operand in operand_list from front() to back().
+  //
+  // The string that new_value is pointing to will be empty.
+  //
+  // The PartialMergeMulti function will be called when there are at least two
+  // operands.
+  //
+  // In the default implementation, PartialMergeMulti will invoke PartialMerge
+  // multiple times, where each time it only merges two operands.  Developers
+  // should either implement PartialMergeMulti, or implement PartialMerge which
+  // is served as the helper function of the default PartialMergeMulti.
+  virtual bool PartialMergeMulti(const Slice& key,
+                                 const std::deque<Slice>& operand_list,
+                                 std::string* new_value, Logger* logger) const;
+
+  // The name of the MergeOperator. Used to check for MergeOperator
+  // mismatches (i.e., a DB created with one MergeOperator is
+  // accessed using a different MergeOperator)
+  // TODO: the name is currently not stored persistently and thus
+  //       no checking is enforced. Client is responsible for providing
+  //       consistent MergeOperator between DB opens.
+  virtual const char* Name() const override = 0;
+
+  // Determines whether the PartialMerge can be called with just a single
+  // merge operand.
+  // Override and return true for allowing a single operand. PartialMerge
+  // and PartialMergeMulti should be overridden and implemented
+  // correctly to properly handle a single operand.
+  virtual bool AllowSingleOperand() const { return false; }
+
+  // Allows to control when to invoke a full merge during Get.
+  // This could be used to limit the number of merge operands that are looked at
+  // during a point lookup, thereby helping in limiting the number of levels to
+  // read from.
+  // Doesn't help with iterators.
+  //
+  // Note: the merge operands are passed to this function in the reversed order
+  // relative to how they were merged (passed to FullMerge or FullMergeV2)
+  // for performance reasons, see also:
+  // https://github.com/facebook/rocksdb/issues/3865
+  virtual bool ShouldMerge(const std::vector<Slice>& /*operands*/) const {
+    return false;
+  }
+};
+
+// The simpler, associative merge operator.
+class AssociativeMergeOperator : public MergeOperator {
+ public:
+  ~AssociativeMergeOperator() override {}
+
+  // Gives the client a way to express the read -> modify -> write semantics
+  // key:           (IN) The key that's associated with this merge operation.
+  // existing_value:(IN) null indicates the key does not exist before this op
+  // value:         (IN) the value to update/merge the existing_value with
+  // new_value:    (OUT) Client is responsible for filling the merge result
+  // here. The string that new_value is pointing to will be empty.
+  // logger:        (IN) Client could use this to log errors during merge.
+  //
+  // Return true on success.
+  // All values passed in will be client-specific values. So if this method
+  // returns false, it is because client specified bad data or there was
+  // internal corruption. The client should assume that this will be treated
+  // as an error by the library.
+  virtual bool Merge(const Slice& key, const Slice* existing_value,
+                     const Slice& value, std::string* new_value,
+                     Logger* logger) const = 0;
+
+ private:
+  // Default implementations of the MergeOperator functions
+  bool FullMergeV2(const MergeOperationInput& merge_in,
+                   MergeOperationOutput* merge_out) const override;
+
+  bool PartialMerge(const Slice& key, const Slice& left_operand,
+                    const Slice& right_operand, std::string* new_value,
+                    Logger* logger) const override;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/metadata.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/metadata.h
new file mode 100644
index 0000000000..4ab3842dda
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/metadata.h
@@ -0,0 +1,258 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/options.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Basic identifiers and metadata for a file in a DB. This only includes
+// information considered relevant for taking backups, checkpoints, or other
+// services relating to DB file storage.
+// This is only appropriate for immutable files, such as SST files or all
+// files in a backup. See also LiveFileStorageInfo.
+struct FileStorageInfo {
+  // The name of the file within its directory (e.g. "123456.sst")
+  std::string relative_filename;
+  // The directory containing the file, without a trailing '/'. This could be
+  // a DB path, wal_dir, etc.
+  std::string directory;
+
+  // The id of the file within a single DB. Set to 0 if the file does not have
+  // a number (e.g. CURRENT)
+  uint64_t file_number = 0;
+  // The type of the file as part of a DB.
+  FileType file_type = kTempFile;
+
+  // File size in bytes. See also `trim_to_size`.
+  uint64_t size = 0;
+
+  // This feature is experimental and subject to change.
+  Temperature temperature = Temperature::kUnknown;
+
+  // The checksum of a SST file, the value is decided by the file content and
+  // the checksum algorithm used for this SST file. The checksum function is
+  // identified by the file_checksum_func_name. If the checksum function is
+  // not specified, file_checksum is "0" by default.
+  std::string file_checksum;
+
+  // The name of the checksum function used to generate the file checksum
+  // value. If file checksum is not enabled (e.g., sst_file_checksum_func is
+  // null), file_checksum_func_name is UnknownFileChecksumFuncName, which is
+  // "Unknown".
+  std::string file_checksum_func_name;
+};
+
+// Adds to FileStorageInfo the ability to capture the state of files that
+// might change in a running DB.
+struct LiveFileStorageInfo : public FileStorageInfo {
+  // If non-empty, this string represents the "saved" contents of the file
+  // for the current context. (This field is used for checkpointing CURRENT
+  // file.) In that case, size == replacement_contents.size() and file on disk
+  // should be ignored. If empty string, the file on disk should still have
+  // "saved" contents. (See trim_to_size.)
+  std::string replacement_contents;
+
+  // If true, the file on disk is allowed to be larger than `size` but only
+  // the first `size` bytes should be used for the current context. If false,
+  // the file is corrupt if size on disk does not equal `size`.
+  bool trim_to_size = false;
+};
+
+// The metadata that describes an SST file. (Does not need to extend
+// LiveFileStorageInfo because SST files are always immutable.)
+struct SstFileMetaData : public FileStorageInfo {
+  SstFileMetaData() { file_type = kTableFile; }
+
+  SstFileMetaData(const std::string& _file_name, uint64_t _file_number,
+                  const std::string& _directory, uint64_t _size,
+                  SequenceNumber _smallest_seqno, SequenceNumber _largest_seqno,
+                  const std::string& _smallestkey,
+                  const std::string& _largestkey, uint64_t _num_reads_sampled,
+                  bool _being_compacted, Temperature _temperature,
+                  uint64_t _oldest_blob_file_number,
+                  uint64_t _oldest_ancester_time, uint64_t _file_creation_time,
+                  uint64_t _epoch_number, std::string& _file_checksum,
+                  std::string& _file_checksum_func_name)
+      : smallest_seqno(_smallest_seqno),
+        largest_seqno(_largest_seqno),
+        smallestkey(_smallestkey),
+        largestkey(_largestkey),
+        num_reads_sampled(_num_reads_sampled),
+        being_compacted(_being_compacted),
+        num_entries(0),
+        num_deletions(0),
+        oldest_blob_file_number(_oldest_blob_file_number),
+        oldest_ancester_time(_oldest_ancester_time),
+        file_creation_time(_file_creation_time),
+        epoch_number(_epoch_number) {
+    if (!_file_name.empty()) {
+      if (_file_name[0] == '/') {
+        relative_filename = _file_name.substr(1);
+        name = _file_name;  // Deprecated field
+      } else {
+        relative_filename = _file_name;
+        name = std::string("/") + _file_name;  // Deprecated field
+      }
+      assert(relative_filename.size() + 1 == name.size());
+      assert(relative_filename[0] != '/');
+      assert(name[0] == '/');
+    }
+    directory = _directory;
+    db_path = _directory;  // Deprecated field
+    file_number = _file_number;
+    file_type = kTableFile;
+    size = _size;
+    temperature = _temperature;
+    file_checksum = _file_checksum;
+    file_checksum_func_name = _file_checksum_func_name;
+  }
+
+  SequenceNumber smallest_seqno = 0;  // Smallest sequence number in file.
+  SequenceNumber largest_seqno = 0;   // Largest sequence number in file.
+  std::string smallestkey;            // Smallest user defined key in the file.
+  std::string largestkey;             // Largest user defined key in the file.
+  uint64_t num_reads_sampled = 0;     // How many times the file is read.
+  bool being_compacted =
+      false;  // true if the file is currently being compacted.
+
+  uint64_t num_entries = 0;
+  uint64_t num_deletions = 0;
+
+  uint64_t oldest_blob_file_number = 0;  // The id of the oldest blob file
+                                         // referenced by the file.
+  // An SST file may be generated by compactions whose input files may
+  // in turn be generated by earlier compactions. The creation time of the
+  // oldest SST file that is the compaction ancestor of this file.
+  // The timestamp is provided SystemClock::GetCurrentTime().
+  // 0 if the information is not available.
+  //
+  // Note: for TTL blob files, it contains the start of the expiration range.
+  uint64_t oldest_ancester_time = 0;
+  // Timestamp when the SST file is created, provided by
+  // SystemClock::GetCurrentTime(). 0 if the information is not available.
+  uint64_t file_creation_time = 0;
+  // The order of a file being flushed or ingested/imported.
+  // Compaction output file will be assigned with the minimum `epoch_number`
+  // among input files'.
+  // For L0, larger `epoch_number` indicates newer L0 file.
+  // 0 if the information is not available.
+  uint64_t epoch_number = 0;
+
+  // These bounds define the effective key range for range tombstones
+  // in this file.
+  // Currently only used by CreateColumnFamilyWithImport().
+  std::string smallest{};  // Smallest internal key served by table
+  std::string largest{};   // Largest internal key served by table
+
+  // DEPRECATED: The name of the file within its directory with a
+  // leading slash (e.g. "/123456.sst"). Use relative_filename from base struct
+  // instead.
+  std::string name;
+
+  // DEPRECATED: replaced by `directory` in base struct
+  std::string db_path;
+};
+
+// The full set of metadata associated with each SST file.
+struct LiveFileMetaData : SstFileMetaData {
+  std::string column_family_name;  // Name of the column family
+  int level;                       // Level at which this file resides.
+  LiveFileMetaData() : column_family_name(), level(0) {}
+};
+
+// The MetaData that describes a Blob file
+struct BlobMetaData {
+  BlobMetaData()
+      : blob_file_number(0),
+        blob_file_size(0),
+        total_blob_count(0),
+        total_blob_bytes(0),
+        garbage_blob_count(0),
+        garbage_blob_bytes(0) {}
+
+  BlobMetaData(uint64_t _file_number, const std::string& _file_name,
+               const std::string& _file_path, uint64_t _file_size,
+               uint64_t _total_blob_count, uint64_t _total_blob_bytes,
+               uint64_t _garbage_blob_count, uint64_t _garbage_blob_bytes,
+               const std::string& _file_checksum,
+               const std::string& _file_checksum_func_name)
+      : blob_file_number(_file_number),
+        blob_file_name(_file_name),
+        blob_file_path(_file_path),
+        blob_file_size(_file_size),
+        total_blob_count(_total_blob_count),
+        total_blob_bytes(_total_blob_bytes),
+        garbage_blob_count(_garbage_blob_count),
+        garbage_blob_bytes(_garbage_blob_bytes),
+        checksum_method(_file_checksum),
+        checksum_value(_file_checksum_func_name) {}
+  uint64_t blob_file_number;
+  std::string blob_file_name;
+  std::string blob_file_path;
+  uint64_t blob_file_size;
+  uint64_t total_blob_count;
+  uint64_t total_blob_bytes;
+  uint64_t garbage_blob_count;
+  uint64_t garbage_blob_bytes;
+  std::string checksum_method;
+  std::string checksum_value;
+};
+
+// The metadata that describes a level.
+struct LevelMetaData {
+  LevelMetaData(int _level, uint64_t _size,
+                const std::vector<SstFileMetaData>&& _files)
+      : level(_level), size(_size), files(_files) {}
+
+  // The level which this meta data describes.
+  const int level;
+  // The size of this level in bytes, which is equal to the sum of
+  // the file size of its "files".
+  const uint64_t size;
+  // The metadata of all sst files in this level.
+  const std::vector<SstFileMetaData> files;
+};
+
+// The metadata that describes a column family.
+struct ColumnFamilyMetaData {
+  ColumnFamilyMetaData() : size(0), file_count(0), name("") {}
+  ColumnFamilyMetaData(const std::string& _name, uint64_t _size,
+                       const std::vector<LevelMetaData>&& _levels)
+      : size(_size), name(_name), levels(_levels) {}
+
+  // The size of this column family in bytes, which is equal to the sum of
+  // the file size of its "levels".
+  uint64_t size;
+  // The number of files in this column family.
+  size_t file_count;
+  // The name of the column family.
+  std::string name;
+  // The metadata of all levels in this column family.
+  std::vector<LevelMetaData> levels;
+
+  // The total size of all blob files
+  uint64_t blob_file_size = 0;
+  // The number of blob files in this column family.
+  size_t blob_file_count = 0;
+  // The metadata of the blobs in this column family.
+  std::vector<BlobMetaData> blob_files;
+};
+
+// Metadata returned as output from ExportColumnFamily() and used as input to
+// CreateColumnFamiliesWithImport().
+struct ExportImportFilesMetaData {
+  std::string db_comparator_name;       // Used to safety check at import.
+  std::vector<LiveFileMetaData> files;  // Vector of file metadata.
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/options.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/options.h
new file mode 100644
index 0000000000..e9a708aa38
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/options.h
@@ -0,0 +1,2083 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <limits>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "rocksdb/advanced_options.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/compression_type.h"
+#include "rocksdb/customizable.h"
+#include "rocksdb/data_structure.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_checksum.h"
+#include "rocksdb/listener.h"
+#include "rocksdb/sst_partitioner.h"
+#include "rocksdb/types.h"
+#include "rocksdb/universal_compaction.h"
+#include "rocksdb/version.h"
+#include "rocksdb/write_buffer_manager.h"
+
+#ifdef max
+#undef max
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+class Cache;
+class CompactionFilter;
+class CompactionFilterFactory;
+class Comparator;
+class ConcurrentTaskLimiter;
+class Env;
+enum InfoLogLevel : unsigned char;
+class SstFileManager;
+class FilterPolicy;
+class Logger;
+class MergeOperator;
+class Snapshot;
+class MemTableRepFactory;
+class RateLimiter;
+class Slice;
+class Statistics;
+class InternalKeyComparator;
+class WalFilter;
+class FileSystem;
+
+struct Options;
+struct DbPath;
+
+using FileTypeSet = SmallEnumSet<FileType, FileType::kBlobFile>;
+
+struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions {
+  // The function recovers options to a previous version. Only 4.6 or later
+  // versions are supported.
+  // NOT MAINTAINED: This function has not been and is not maintained.
+  // DEPRECATED: This function might be removed in a future release.
+  // In general, defaults are changed to suit broad interests. Opting
+  // out of a change on upgrade should be deliberate and considered.
+  ColumnFamilyOptions* OldDefaults(int rocksdb_major_version = 4,
+                                   int rocksdb_minor_version = 6);
+
+  // Some functions that make it easier to optimize RocksDB
+  // Use this if your DB is very small (like under 1GB) and you don't want to
+  // spend lots of memory for memtables.
+  // An optional cache object is passed in to be used as the block cache
+  ColumnFamilyOptions* OptimizeForSmallDb(
+      std::shared_ptr<Cache>* cache = nullptr);
+
+  // Use this if you don't need to keep the data sorted, i.e. you'll never use
+  // an iterator, only Put() and Get() API calls
+  //
+  ColumnFamilyOptions* OptimizeForPointLookup(uint64_t block_cache_size_mb);
+
+  // Default values for some parameters in ColumnFamilyOptions are not
+  // optimized for heavy workloads and big datasets, which means you might
+  // observe write stalls under some conditions. As a starting point for tuning
+  // RocksDB options, use the following two functions:
+  // * OptimizeLevelStyleCompaction -- optimizes level style compaction
+  // * OptimizeUniversalStyleCompaction -- optimizes universal style compaction
+  // Universal style compaction is focused on reducing Write Amplification
+  // Factor for big data sets, but increases Space Amplification. You can learn
+  // more about the different styles here:
+  // https://github.com/facebook/rocksdb/wiki/Rocksdb-Architecture-Guide
+  // Make sure to also call IncreaseParallelism(), which will provide the
+  // biggest performance gains.
+  // Note: we might use more memory than memtable_memory_budget during high
+  // write rate period
+  ColumnFamilyOptions* OptimizeLevelStyleCompaction(
+      uint64_t memtable_memory_budget = 512 * 1024 * 1024);
+  ColumnFamilyOptions* OptimizeUniversalStyleCompaction(
+      uint64_t memtable_memory_budget = 512 * 1024 * 1024);
+
+  // -------------------
+  // Parameters that affect behavior
+
+  // Comparator used to define the order of keys in the table.
+  // Default: a comparator that uses lexicographic byte-wise ordering
+  //
+  // REQUIRES: The client must ensure that the comparator supplied
+  // here has the same name and orders keys *exactly* the same as the
+  // comparator provided to previous open calls on the same DB.
+  const Comparator* comparator = BytewiseComparator();
+
+  // REQUIRES: The client must provide a merge operator if Merge operation
+  // needs to be accessed. Calling Merge on a DB without a merge operator
+  // would result in Status::NotSupported. The client must ensure that the
+  // merge operator supplied here has the same name and *exactly* the same
+  // semantics as the merge operator provided to previous open calls on
+  // the same DB. The only exception is reserved for upgrade, where a DB
+  // previously without a merge operator is introduced to Merge operation
+  // for the first time. It's necessary to specify a merge operator when
+  // opening the DB in this case.
+  // Default: nullptr
+  std::shared_ptr<MergeOperator> merge_operator = nullptr;
+
+  // A single CompactionFilter instance to call into during compaction.
+  // Allows an application to modify/delete a key-value during background
+  // compaction.
+  //
+  // If the client requires a new `CompactionFilter` to be used for different
+  // compaction runs and/or requires a `CompactionFilter` for table file
+  // creations outside of compaction, it can specify compaction_filter_factory
+  // instead of this option.  The client should specify only one of the two.
+  // compaction_filter takes precedence over compaction_filter_factory if
+  // client specifies both.
+  //
+  // If multithreaded compaction is being used, the supplied CompactionFilter
+  // instance may be used from different threads concurrently and so should be
+  // thread-safe.
+  //
+  // Default: nullptr
+  const CompactionFilter* compaction_filter = nullptr;
+
+  // This is a factory that provides `CompactionFilter` objects which allow
+  // an application to modify/delete a key-value during table file creation.
+  //
+  // Unlike the `compaction_filter` option, which is used when compaction
+  // creates a table file, this factory allows using a `CompactionFilter` when a
+  // table file is created for various reasons. The factory can decide what
+  // `TableFileCreationReason`s use a `CompactionFilter`. For compatibility, by
+  // default the decision is to use a `CompactionFilter` for
+  // `TableFileCreationReason::kCompaction` only.
+  //
+  // Each thread of work involving creating table files will create a new
+  // `CompactionFilter` when it will be used according to the above
+  // `TableFileCreationReason`-based decision. This allows the application to
+  // know about the different ongoing threads of work and makes it unnecessary
+  // for `CompactionFilter` to provide thread-safety.
+  //
+  // Default: nullptr
+  std::shared_ptr<CompactionFilterFactory> compaction_filter_factory = nullptr;
+
+  // -------------------
+  // Parameters that affect performance
+
+  // Amount of data to build up in memory (backed by an unsorted log
+  // on disk) before converting to a sorted on-disk file.
+  //
+  // Larger values increase performance, especially during bulk loads.
+  // Up to max_write_buffer_number write buffers may be held in memory
+  // at the same time,
+  // so you may wish to adjust this parameter to control memory usage.
+  // Also, a larger write buffer will result in a longer recovery time
+  // the next time the database is opened.
+  //
+  // Note that write_buffer_size is enforced per column family.
+  // See db_write_buffer_size for sharing memory across column families.
+  //
+  // Default: 64MB
+  //
+  // Dynamically changeable through SetOptions() API
+  size_t write_buffer_size = 64 << 20;
+
+  // Compress blocks using the specified compression algorithm.
+  //
+  // Default: kSnappyCompression, if it's supported. If snappy is not linked
+  // with the library, the default is kNoCompression.
+  //
+  // Typical speeds of kSnappyCompression on an Intel(R) Core(TM)2 2.4GHz:
+  //    ~200-500MB/s compression
+  //    ~400-800MB/s decompression
+  //
+  // Note that these speeds are significantly faster than most
+  // persistent storage speeds, and therefore it is typically never
+  // worth switching to kNoCompression.  Even if the input data is
+  // incompressible, the kSnappyCompression implementation will
+  // efficiently detect that and will switch to uncompressed mode.
+  //
+  // If you do not set `compression_opts.level`, or set it to
+  // `CompressionOptions::kDefaultCompressionLevel`, we will attempt to pick the
+  // default corresponding to `compression` as follows:
+  //
+  // - kZSTD: 3
+  // - kZlibCompression: Z_DEFAULT_COMPRESSION (currently -1)
+  // - kLZ4HCCompression: 0
+  // - For all others, we do not specify a compression level
+  //
+  // Dynamically changeable through SetOptions() API
+  CompressionType compression;
+
+  // Compression algorithm that will be used for the bottommost level that
+  // contain files. The behavior for num_levels = 1 is not well defined.
+  // Right now, with num_levels = 1,  all compaction outputs will use
+  // bottommost_compression and all flush outputs still use options.compression,
+  // but the behavior is subject to change.
+  //
+  // Default: kDisableCompressionOption (Disabled)
+  CompressionType bottommost_compression = kDisableCompressionOption;
+
+  // different options for compression algorithms used by bottommost_compression
+  // if it is enabled. To enable it, please see the definition of
+  // CompressionOptions. Behavior for num_levels = 1 is the same as
+  // options.bottommost_compression.
+  CompressionOptions bottommost_compression_opts;
+
+  // different options for compression algorithms
+  CompressionOptions compression_opts;
+
+  // Number of files to trigger level-0 compaction. A value <0 means that
+  // level-0 compaction will not be triggered by number of files at all.
+  //
+  // Default: 4
+  //
+  // Dynamically changeable through SetOptions() API
+  int level0_file_num_compaction_trigger = 4;
+
+  // If non-nullptr, use the specified function to put keys in contiguous
+  // groups called "prefixes". These prefixes are used to place one
+  // representative entry for the group into the Bloom filter
+  // rather than an entry for each key (see whole_key_filtering).
+  // Under certain conditions, this enables optimizing some range queries
+  // (Iterators) in addition to some point lookups (Get/MultiGet).
+  //
+  // Together `prefix_extractor` and `comparator` must satisfy one essential
+  // property for valid prefix filtering of range queries:
+  //   If Compare(k1, k2) <= 0 and Compare(k2, k3) <= 0 and
+  //      InDomain(k1) and InDomain(k3) and prefix(k1) == prefix(k3),
+  //   Then InDomain(k2) and prefix(k2) == prefix(k1)
+  //
+  // In other words, all keys with the same prefix must be in a contiguous
+  // group by comparator order, and cannot be interrupted by keys with no
+  // prefix ("out of domain"). (This makes it valid to conclude that no
+  // entries within some bounds are present if the upper and lower bounds
+  // have a common prefix and no entries with that same prefix are present.)
+  //
+  // Some other properties are recommended but not strictly required. Under
+  // most sensible comparators, the following will need to hold true to
+  // satisfy the essential property above:
+  // * "Prefix is a prefix": key.starts_with(prefix(key))
+  // * "Prefixes preserve ordering": If Compare(k1, k2) <= 0, then
+  //   Compare(prefix(k1), prefix(k2)) <= 0
+  //
+  // The next two properties ensure that seeking to a prefix allows
+  // enumerating all entries with that prefix:
+  // * "Prefix starts the group": Compare(prefix(key), key) <= 0
+  // * "Prefix idempotent": prefix(prefix(key)) == prefix(key)
+  //
+  // Default: nullptr
+  std::shared_ptr<const SliceTransform> prefix_extractor = nullptr;
+
+  // Control maximum total data size for a level.
+  // max_bytes_for_level_base is the max total for level-1.
+  // Maximum number of bytes for level L can be calculated as
+  // (max_bytes_for_level_base) * (max_bytes_for_level_multiplier ^ (L-1))
+  // For example, if max_bytes_for_level_base is 200MB, and if
+  // max_bytes_for_level_multiplier is 10, total data size for level-1
+  // will be 200MB, total file size for level-2 will be 2GB,
+  // and total file size for level-3 will be 20GB.
+  //
+  // Default: 256MB.
+  //
+  // Dynamically changeable through SetOptions() API
+  uint64_t max_bytes_for_level_base = 256 * 1048576;
+
+  // Deprecated.
+  uint64_t snap_refresh_nanos = 0;
+
+  // Disable automatic compactions. Manual compactions can still
+  // be issued on this column family
+  //
+  // Dynamically changeable through SetOptions() API
+  bool disable_auto_compactions = false;
+
+  // This is a factory that provides TableFactory objects.
+  // Default: a block-based table factory that provides a default
+  // implementation of TableBuilder and TableReader with default
+  // BlockBasedTableOptions.
+  std::shared_ptr<TableFactory> table_factory;
+
+  // A list of paths where SST files for this column family
+  // can be put into, with its target size. Similar to db_paths,
+  // newer data is placed into paths specified earlier in the
+  // vector while older data gradually moves to paths specified
+  // later in the vector.
+  // Note that, if a path is supplied to multiple column
+  // families, it would have files and total size from all
+  // the column families combined. User should provision for the
+  // total size(from all the column families) in such cases.
+  //
+  // If left empty, db_paths will be used.
+  // Default: empty
+  std::vector<DbPath> cf_paths;
+
+  // Compaction concurrent thread limiter for the column family.
+  // If non-nullptr, use given concurrent thread limiter to control
+  // the max outstanding compaction tasks. Limiter can be shared with
+  // multiple column families across db instances.
+  //
+  // Default: nullptr
+  std::shared_ptr<ConcurrentTaskLimiter> compaction_thread_limiter = nullptr;
+
+  // If non-nullptr, use the specified factory for a function to determine the
+  // partitioning of sst files. This helps compaction to split the files
+  // on interesting boundaries (key prefixes) to make propagation of sst
+  // files less write amplifying (covering the whole key space).
+  // THE FEATURE IS STILL EXPERIMENTAL
+  //
+  // Default: nullptr
+  std::shared_ptr<SstPartitionerFactory> sst_partitioner_factory = nullptr;
+
+  // Create ColumnFamilyOptions with default values for all fields
+  ColumnFamilyOptions();
+  // Create ColumnFamilyOptions from Options
+  explicit ColumnFamilyOptions(const Options& options);
+
+  void Dump(Logger* log) const;
+};
+
+enum class WALRecoveryMode : char {
+  // Original levelDB recovery
+  //
+  // We tolerate the last record in any log to be incomplete due to a crash
+  // while writing it. Zeroed bytes from preallocation are also tolerated in the
+  // trailing data of any log.
+  //
+  // Use case: Applications for which updates, once applied, must not be rolled
+  // back even after a crash-recovery. In this recovery mode, RocksDB guarantees
+  // this as long as `WritableFile::Append()` writes are durable. In case the
+  // user needs the guarantee in more situations (e.g., when
+  // `WritableFile::Append()` writes to page cache, but the user desires this
+  // guarantee in face of power-loss crash-recovery), RocksDB offers various
+  // mechanisms to additionally invoke `WritableFile::Sync()` in order to
+  // strengthen the guarantee.
+  //
+  // This differs from `kPointInTimeRecovery` in that, in case a corruption is
+  // detected during recovery, this mode will refuse to open the DB. Whereas,
+  // `kPointInTimeRecovery` will stop recovery just before the corruption since
+  // that is a valid point-in-time to which to recover.
+  kTolerateCorruptedTailRecords = 0x00,
+  // Recover from clean shutdown
+  // We don't expect to find any corruption in the WAL
+  // Use case : This is ideal for unit tests and rare applications that
+  // can require high consistency guarantee
+  kAbsoluteConsistency = 0x01,
+  // Recover to point-in-time consistency (default)
+  // We stop the WAL playback on discovering WAL inconsistency
+  // Use case : Ideal for systems that have disk controller cache like
+  // hard disk, SSD without super capacitor that store related data
+  kPointInTimeRecovery = 0x02,
+  // Recovery after a disaster
+  // We ignore any corruption in the WAL and try to salvage as much data as
+  // possible
+  // Use case : Ideal for last ditch effort to recover data or systems that
+  // operate with low grade unrelated data
+  kSkipAnyCorruptedRecords = 0x03,
+};
+
+struct DbPath {
+  std::string path;
+  uint64_t target_size;  // Target size of total files under the path, in byte.
+
+  DbPath() : target_size(0) {}
+  DbPath(const std::string& p, uint64_t t) : path(p), target_size(t) {}
+};
+
+extern const char* kHostnameForDbHostId;
+
+enum class CompactionServiceJobStatus : char {
+  kSuccess,
+  kFailure,
+  kUseLocal,
+};
+
+struct CompactionServiceJobInfo {
+  std::string db_name;
+  std::string db_id;
+  std::string db_session_id;
+  uint64_t job_id;  // job_id is only unique within the current DB and session,
+                    // restart DB will reset the job_id. `db_id` and
+                    // `db_session_id` could help you build unique id across
+                    // different DBs and sessions.
+
+  Env::Priority priority;
+
+  CompactionServiceJobInfo(std::string db_name_, std::string db_id_,
+                           std::string db_session_id_, uint64_t job_id_,
+                           Env::Priority priority_)
+      : db_name(std::move(db_name_)),
+        db_id(std::move(db_id_)),
+        db_session_id(std::move(db_session_id_)),
+        job_id(job_id_),
+        priority(priority_) {}
+};
+
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class CompactionService : public Customizable {
+ public:
+  static const char* Type() { return "CompactionService"; }
+
+  // Returns the name of this compaction service.
+  const char* Name() const override = 0;
+
+  // Start the remote compaction with `compaction_service_input`, which can be
+  // passed to `DB::OpenAndCompact()` on the remote side. `info` provides the
+  // information the user might want to know, which includes `job_id`.
+  virtual CompactionServiceJobStatus StartV2(
+      const CompactionServiceJobInfo& /*info*/,
+      const std::string& /*compaction_service_input*/) {
+    return CompactionServiceJobStatus::kUseLocal;
+  }
+
+  // Wait for remote compaction to finish.
+  virtual CompactionServiceJobStatus WaitForCompleteV2(
+      const CompactionServiceJobInfo& /*info*/,
+      std::string* /*compaction_service_result*/) {
+    return CompactionServiceJobStatus::kUseLocal;
+  }
+
+  ~CompactionService() override = default;
+};
+
+struct DBOptions {
+  // The function recovers options to the option as in version 4.6.
+  // NOT MAINTAINED: This function has not been and is not maintained.
+  // DEPRECATED: This function might be removed in a future release.
+  // In general, defaults are changed to suit broad interests. Opting
+  // out of a change on upgrade should be deliberate and considered.
+  DBOptions* OldDefaults(int rocksdb_major_version = 4,
+                         int rocksdb_minor_version = 6);
+
+  // Some functions that make it easier to optimize RocksDB
+
+  // Use this if your DB is very small (like under 1GB) and you don't want to
+  // spend lots of memory for memtables.
+  // An optional cache object is passed in for the memory of the
+  // memtable to cost to
+  DBOptions* OptimizeForSmallDb(std::shared_ptr<Cache>* cache = nullptr);
+
+  // By default, RocksDB uses only one background thread for flush and
+  // compaction. Calling this function will set it up such that total of
+  // `total_threads` is used. Good value for `total_threads` is the number of
+  // cores. You almost definitely want to call this function if your system is
+  // bottlenecked by RocksDB.
+  DBOptions* IncreaseParallelism(int total_threads = 16);
+
+  // If true, the database will be created if it is missing.
+  // Default: false
+  bool create_if_missing = false;
+
+  // If true, missing column families will be automatically created.
+  // Default: false
+  bool create_missing_column_families = false;
+
+  // If true, an error is raised if the database already exists.
+  // Default: false
+  bool error_if_exists = false;
+
+  // If true, RocksDB will aggressively check consistency of the data.
+  // Also, if any of the  writes to the database fails (Put, Delete, Merge,
+  // Write), the database will switch to read-only mode and fail all other
+  // Write operations.
+  // In most cases you want this to be set to true.
+  // Default: true
+  bool paranoid_checks = true;
+
+  // If true, during memtable flush, RocksDB will validate total entries
+  // read in flush, and compare with counter inserted into it.
+  // The option is here to turn the feature off in case this new validation
+  // feature has a bug.
+  // Default: true
+  bool flush_verify_memtable_count = true;
+
+  // If true, the log numbers and sizes of the synced WALs are tracked
+  // in MANIFEST. During DB recovery, if a synced WAL is missing
+  // from disk, or the WAL's size does not match the recorded size in
+  // MANIFEST, an error will be reported and the recovery will be aborted.
+  //
+  // This is one additional protection against WAL corruption besides the
+  // per-WAL-entry checksum.
+  //
+  // Note that this option does not work with secondary instance.
+  // Currently, only syncing closed WALs are tracked. Calling `DB::SyncWAL()`,
+  // etc. or writing with `WriteOptions::sync=true` to sync the live WAL is not
+  // tracked for performance/efficiency reasons.
+  //
+  // Default: false
+  bool track_and_verify_wals_in_manifest = false;
+
+  // If true, verifies the SST unique id between MANIFEST and actual file
+  // each time an SST file is opened. This check ensures an SST file is not
+  // overwritten or misplaced. A corruption error will be reported if mismatch
+  // detected, but only when MANIFEST tracks the unique id, which starts from
+  // RocksDB version 7.3. Although the tracked internal unique id is related
+  // to the one returned by GetUniqueIdFromTableProperties, that is subject to
+  // change.
+  // NOTE: verification is currently only done on SST files using block-based
+  // table format.
+  //
+  // Setting to false should only be needed in case of unexpected problems.
+  //
+  // Although an early version of this option opened all SST files for
+  // verification on DB::Open, that is no longer guaranteed. However, as
+  // documented in an above option, if max_open_files is -1, DB will open all
+  // files on DB::Open().
+  //
+  // Default: true
+  bool verify_sst_unique_id_in_manifest = true;
+
+  // Use the specified object to interact with the environment,
+  // e.g. to read/write files, schedule background work, etc. In the near
+  // future, support for doing storage operations such as read/write files
+  // through env will be deprecated in favor of file_system (see below)
+  // Default: Env::Default()
+  Env* env = Env::Default();
+
+  // Limits internal file read/write bandwidth:
+  //
+  // - Flush requests write bandwidth at `Env::IOPriority::IO_HIGH`
+  // - Compaction requests read and write bandwidth at
+  //   `Env::IOPriority::IO_LOW`
+  // - Reads associated with a `ReadOptions` can be charged at
+  //   `ReadOptions::rate_limiter_priority` (see that option's API doc for usage
+  //   and limitations).
+  // - Writes associated with a `WriteOptions` can be charged at
+  //   `WriteOptions::rate_limiter_priority` (see that option's API doc for
+  //   usage and limitations).
+  //
+  // Rate limiting is disabled if nullptr. If rate limiter is enabled,
+  // bytes_per_sync is set to 1MB by default.
+  //
+  // Default: nullptr
+  std::shared_ptr<RateLimiter> rate_limiter = nullptr;
+
+  // Use to track SST files and control their file deletion rate.
+  //
+  // Features:
+  //  - Throttle the deletion rate of the SST files.
+  //  - Keep track the total size of all SST files.
+  //  - Set a maximum allowed space limit for SST files that when reached
+  //    the DB wont do any further flushes or compactions and will set the
+  //    background error.
+  //  - Can be shared between multiple dbs.
+  // Limitations:
+  //  - Only track and throttle deletes of SST files in
+  //    first db_path (db_name if db_paths is empty).
+  //
+  // Default: nullptr
+  std::shared_ptr<SstFileManager> sst_file_manager = nullptr;
+
+  // Any internal progress/error information generated by the db will
+  // be written to info_log if it is non-nullptr, or to a file stored
+  // in the same directory as the DB contents if info_log is nullptr.
+  // Default: nullptr
+  std::shared_ptr<Logger> info_log = nullptr;
+
+#ifdef NDEBUG
+  InfoLogLevel info_log_level = INFO_LEVEL;
+#else
+  InfoLogLevel info_log_level = DEBUG_LEVEL;
+#endif  // NDEBUG
+
+  // Number of open files that can be used by the DB.  You may need to
+  // increase this if your database has a large working set. Value -1 means
+  // files opened are always kept open. You can estimate number of files based
+  // on target_file_size_base and target_file_size_multiplier for level-based
+  // compaction. For universal-style compaction, you can usually set it to -1.
+  //
+  // A high value or -1 for this option can cause high memory usage.
+  // See BlockBasedTableOptions::cache_usage_options to constrain
+  // memory usage in case of block based table format.
+  //
+  // Default: -1
+  //
+  // Dynamically changeable through SetDBOptions() API.
+  int max_open_files = -1;
+
+  // If max_open_files is -1, DB will open all files on DB::Open(). You can
+  // use this option to increase the number of threads used to open the files.
+  // Default: 16
+  int max_file_opening_threads = 16;
+
+  // Once write-ahead logs exceed this size, we will start forcing the flush of
+  // column families whose memtables are backed by the oldest live WAL file
+  // (i.e. the ones that are causing all the space amplification). If set to 0
+  // (default), we will dynamically choose the WAL size limit to be
+  // [sum of all write_buffer_size * max_write_buffer_number] * 4
+  //
+  // For example, with 15 column families, each with
+  // write_buffer_size = 128 MB
+  // max_write_buffer_number = 6
+  // max_total_wal_size will be calculated to be [15 * 128MB * 6] * 4 = 45GB
+  //
+  // The RocksDB wiki has some discussion about how the WAL interacts
+  // with memtables and flushing of column families.
+  // https://github.com/facebook/rocksdb/wiki/Column-Families
+  //
+  // This option takes effect only when there are more than one column
+  // family as otherwise the wal size is dictated by the write_buffer_size.
+  //
+  // Default: 0
+  //
+  // Dynamically changeable through SetDBOptions() API.
+  uint64_t max_total_wal_size = 0;
+
+  // If non-null, then we should collect metrics about database operations
+  std::shared_ptr<Statistics> statistics = nullptr;
+
+  // By default, writes to stable storage use fdatasync (on platforms
+  // where this function is available). If this option is true,
+  // fsync is used instead.
+  //
+  // fsync and fdatasync are equally safe for our purposes and fdatasync is
+  // faster, so it is rarely necessary to set this option. It is provided
+  // as a workaround for kernel/filesystem bugs, such as one that affected
+  // fdatasync with ext4 in kernel versions prior to 3.7.
+  bool use_fsync = false;
+
+  // A list of paths where SST files can be put into, with its target size.
+  // Newer data is placed into paths specified earlier in the vector while
+  // older data gradually moves to paths specified later in the vector.
+  //
+  // For example, you have a flash device with 10GB allocated for the DB,
+  // as well as a hard drive of 2TB, you should config it to be:
+  //   [{"/flash_path", 10GB}, {"/hard_drive", 2TB}]
+  //
+  // The system will try to guarantee data under each path is close to but
+  // not larger than the target size. But current and future file sizes used
+  // by determining where to place a file are based on best-effort estimation,
+  // which means there is a chance that the actual size under the directory
+  // is slightly more than target size under some workloads. User should give
+  // some buffer room for those cases.
+  //
+  // If none of the paths has sufficient room to place a file, the file will
+  // be placed to the last path anyway, despite to the target size.
+  //
+  // Placing newer data to earlier paths is also best-efforts. User should
+  // expect user files to be placed in higher levels in some extreme cases.
+  //
+  // If left empty, only one path will be used, which is db_name passed when
+  // opening the DB.
+  // Default: empty
+  std::vector<DbPath> db_paths;
+
+  // This specifies the info LOG dir.
+  // If it is empty, the log files will be in the same dir as data.
+  // If it is non empty, the log files will be in the specified dir,
+  // and the db data dir's absolute path will be used as the log file
+  // name's prefix.
+  std::string db_log_dir = "";
+
+  // This specifies the absolute dir path for write-ahead logs (WAL).
+  // If it is empty, the log files will be in the same dir as data,
+  //   dbname is used as the data dir by default
+  // If it is non empty, the log files will be in kept the specified dir.
+  // When destroying the db,
+  //   all log files in wal_dir and the dir itself is deleted
+  std::string wal_dir = "";
+
+  // The periodicity when obsolete files get deleted. The default
+  // value is 6 hours. The files that get out of scope by compaction
+  // process will still get automatically delete on every compaction,
+  // regardless of this setting
+  //
+  // Default: 6 hours
+  //
+  // Dynamically changeable through SetDBOptions() API.
+  uint64_t delete_obsolete_files_period_micros = 6ULL * 60 * 60 * 1000000;
+
+  // Maximum number of concurrent background jobs (compactions and flushes).
+  //
+  // Default: 2
+  //
+  // Dynamically changeable through SetDBOptions() API.
+  int max_background_jobs = 2;
+
+  // DEPRECATED: RocksDB automatically decides this based on the
+  // value of max_background_jobs. For backwards compatibility we will set
+  // `max_background_jobs = max_background_compactions + max_background_flushes`
+  // in the case where user sets at least one of `max_background_compactions` or
+  // `max_background_flushes` (we replace -1 by 1 in case one option is unset).
+  //
+  // Maximum number of concurrent background compaction jobs, submitted to
+  // the default LOW priority thread pool.
+  //
+  // If you're increasing this, also consider increasing number of threads in
+  // LOW priority thread pool. For more information, see
+  // Env::SetBackgroundThreads
+  //
+  // Default: -1
+  //
+  // Dynamically changeable through SetDBOptions() API.
+  int max_background_compactions = -1;
+
+  // This value represents the maximum number of threads that will
+  // concurrently perform a compaction job by breaking it into multiple,
+  // smaller ones that are run simultaneously.
+  // Default: 1 (i.e. no subcompactions)
+  //
+  // Dynamically changeable through SetDBOptions() API.
+  uint32_t max_subcompactions = 1;
+
+  // DEPRECATED: RocksDB automatically decides this based on the
+  // value of max_background_jobs. For backwards compatibility we will set
+  // `max_background_jobs = max_background_compactions + max_background_flushes`
+  // in the case where user sets at least one of `max_background_compactions` or
+  // `max_background_flushes`.
+  //
+  // Maximum number of concurrent background memtable flush jobs, submitted by
+  // default to the HIGH priority thread pool. If the HIGH priority thread pool
+  // is configured to have zero threads, flush jobs will share the LOW priority
+  // thread pool with compaction jobs.
+  //
+  // It is important to use both thread pools when the same Env is shared by
+  // multiple db instances. Without a separate pool, long running compaction
+  // jobs could potentially block memtable flush jobs of other db instances,
+  // leading to unnecessary Put stalls.
+  //
+  // If you're increasing this, also consider increasing number of threads in
+  // HIGH priority thread pool. For more information, see
+  // Env::SetBackgroundThreads
+  // Default: -1
+  int max_background_flushes = -1;
+
+  // Specify the maximal size of the info log file. If the log file
+  // is larger than `max_log_file_size`, a new info log file will
+  // be created.
+  // If max_log_file_size == 0, all logs will be written to one
+  // log file.
+  size_t max_log_file_size = 0;
+
+  // Time for the info log file to roll (in seconds).
+  // If specified with non-zero value, log file will be rolled
+  // if it has been active longer than `log_file_time_to_roll`.
+  // Default: 0 (disabled)
+  size_t log_file_time_to_roll = 0;
+
+  // Maximal info log files to be kept.
+  // Default: 1000
+  size_t keep_log_file_num = 1000;
+
+  // Recycle log files.
+  // If non-zero, we will reuse previously written log files for new
+  // logs, overwriting the old data.  The value indicates how many
+  // such files we will keep around at any point in time for later
+  // use.  This is more efficient because the blocks are already
+  // allocated and fdatasync does not need to update the inode after
+  // each write.
+  // Default: 0
+  size_t recycle_log_file_num = 0;
+
+  // manifest file is rolled over on reaching this limit.
+  // The older manifest file be deleted.
+  // The default value is 1GB so that the manifest file can grow, but not
+  // reach the limit of storage capacity.
+  uint64_t max_manifest_file_size = 1024 * 1024 * 1024;
+
+  // Number of shards used for table cache.
+  int table_cache_numshardbits = 6;
+
+  // The following two fields affect how archived logs will be deleted.
+  // 1. If both set to 0, logs will be deleted asap and will not get into
+  //    the archive.
+  // 2. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
+  //    WAL files will be checked every 10 min and if total size is greater
+  //    then WAL_size_limit_MB, they will be deleted starting with the
+  //    earliest until size_limit is met. All empty files will be deleted.
+  // 3. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
+  //    WAL files will be checked every WAL_ttl_seconds / 2 and those that
+  //    are older than WAL_ttl_seconds will be deleted.
+  // 4. If both are not 0, WAL files will be checked every 10 min and both
+  //    checks will be performed with ttl being first.
+  uint64_t WAL_ttl_seconds = 0;
+  uint64_t WAL_size_limit_MB = 0;
+
+  // Number of bytes to preallocate (via fallocate) the manifest
+  // files.  Default is 4mb, which is reasonable to reduce random IO
+  // as well as prevent overallocation for mounts that preallocate
+  // large amounts of data (such as xfs's allocsize option).
+  size_t manifest_preallocation_size = 4 * 1024 * 1024;
+
+  // Allow the OS to mmap file for reading sst tables.
+  // Not recommended for 32-bit OS.
+  // When the option is set to true and compression is disabled, the blocks
+  // will not be copied and will be read directly from the mmap-ed memory
+  // area, and the block will not be inserted into the block cache. However,
+  // checksums will still be checked if ReadOptions.verify_checksums is set
+  // to be true. It means a checksum check every time a block is read, more
+  // than the setup where the option is set to false and the block cache is
+  // used. The common use of the options is to run RocksDB on ramfs, where
+  // checksum verification is usually not needed.
+  // Default: false
+  bool allow_mmap_reads = false;
+
+  // Allow the OS to mmap file for writing.
+  // DB::SyncWAL() only works if this is set to false.
+  // Default: false
+  bool allow_mmap_writes = false;
+
+  // Enable direct I/O mode for read/write
+  // they may or may not improve performance depending on the use case
+  //
+  // Files will be opened in "direct I/O" mode
+  // which means that data r/w from the disk will not be cached or
+  // buffered. The hardware buffer of the devices may however still
+  // be used. Memory mapped files are not impacted by these parameters.
+
+  // Use O_DIRECT for user and compaction reads.
+  // Default: false
+  bool use_direct_reads = false;
+
+  // Use O_DIRECT for writes in background flush and compactions.
+  // Default: false
+  bool use_direct_io_for_flush_and_compaction = false;
+
+  // If false, fallocate() calls are bypassed, which disables file
+  // preallocation. The file space preallocation is used to increase the file
+  // write/append performance. By default, RocksDB preallocates space for WAL,
+  // SST, Manifest files, the extra space is truncated when the file is written.
+  // Warning: if you're using btrfs, we would recommend setting
+  // `allow_fallocate=false` to disable preallocation. As on btrfs, the extra
+  // allocated space cannot be freed, which could be significant if you have
+  // lots of files. More details about this limitation:
+  // https://github.com/btrfs/btrfs-dev-docs/blob/471c5699336e043114d4bca02adcd57d9dab9c44/data-extent-reference-counts.md
+  bool allow_fallocate = true;
+
+  // Disable child process inherit open files. Default: true
+  bool is_fd_close_on_exec = true;
+
+  // if not zero, dump rocksdb.stats to LOG every stats_dump_period_sec
+  //
+  // Default: 600 (10 min)
+  //
+  // Dynamically changeable through SetDBOptions() API.
+  unsigned int stats_dump_period_sec = 600;
+
+  // if not zero, dump rocksdb.stats to RocksDB every stats_persist_period_sec
+  // Default: 600
+  unsigned int stats_persist_period_sec = 600;
+
+  // If true, automatically persist stats to a hidden column family (column
+  // family name: ___rocksdb_stats_history___) every
+  // stats_persist_period_sec seconds; otherwise, write to an in-memory
+  // struct. User can query through `GetStatsHistory` API.
+  // If user attempts to create a column family with the same name on a DB
+  // which have previously set persist_stats_to_disk to true, the column family
+  // creation will fail, but the hidden column family will survive, as well as
+  // the previously persisted statistics.
+  // When peristing stats to disk, the stat name will be limited at 100 bytes.
+  // Default: false
+  bool persist_stats_to_disk = false;
+
+  // if not zero, periodically take stats snapshots and store in memory, the
+  // memory size for stats snapshots is capped at stats_history_buffer_size
+  // Default: 1MB
+  size_t stats_history_buffer_size = 1024 * 1024;
+
+  // If set true, will hint the underlying file system that the file
+  // access pattern is random, when a sst file is opened.
+  // Default: true
+  bool advise_random_on_open = true;
+
+  // Amount of data to build up in memtables across all column
+  // families before writing to disk.
+  //
+  // This is distinct from write_buffer_size, which enforces a limit
+  // for a single memtable.
+  //
+  // This feature is disabled by default. Specify a non-zero value
+  // to enable it.
+  //
+  // Default: 0 (disabled)
+  size_t db_write_buffer_size = 0;
+
+  // The memory usage of memtable will report to this object. The same object
+  // can be passed into multiple DBs and it will track the sum of size of all
+  // the DBs. If the total size of all live memtables of all the DBs exceeds
+  // a limit, a flush will be triggered in the next DB to which the next write
+  // is issued, as long as there is one or more column family not already
+  // flushing.
+  //
+  // If the object is only passed to one DB, the behavior is the same as
+  // db_write_buffer_size. When write_buffer_manager is set, the value set will
+  // override db_write_buffer_size.
+  //
+  // This feature is disabled by default. Specify a non-zero value
+  // to enable it.
+  //
+  // Default: null
+  std::shared_ptr<WriteBufferManager> write_buffer_manager = nullptr;
+
+  // Specify the file access pattern once a compaction is started.
+  // It will be applied to all input files of a compaction.
+  // Default: NORMAL
+  enum AccessHint { NONE, NORMAL, SEQUENTIAL, WILLNEED };
+  AccessHint access_hint_on_compaction_start = NORMAL;
+
+  // If non-zero, we perform bigger reads when doing compaction. If you're
+  // running RocksDB on spinning disks, you should set this to at least 2MB.
+  // That way RocksDB's compaction is doing sequential instead of random reads.
+  //
+  // Default: 0
+  //
+  // Dynamically changeable through SetDBOptions() API.
+  size_t compaction_readahead_size = 0;
+
+  // This is a maximum buffer size that is used by WinMmapReadableFile in
+  // unbuffered disk I/O mode. We need to maintain an aligned buffer for
+  // reads. We allow the buffer to grow until the specified value and then
+  // for bigger requests allocate one shot buffers. In unbuffered mode we
+  // always bypass read-ahead buffer at ReadaheadRandomAccessFile
+  // When read-ahead is required we then make use of compaction_readahead_size
+  // value and always try to read ahead. With read-ahead we always
+  // pre-allocate buffer to the size instead of growing it up to a limit.
+  //
+  // This option is currently honored only on Windows
+  //
+  // Default: 1 Mb
+  //
+  // Special value: 0 - means do not maintain per instance buffer. Allocate
+  //                per request buffer and avoid locking.
+  size_t random_access_max_buffer_size = 1024 * 1024;
+
+  // This is the maximum buffer size that is used by WritableFileWriter.
+  // With direct IO, we need to maintain an aligned buffer for writes.
+  // We allow the buffer to grow until it's size hits the limit in buffered
+  // IO and fix the buffer size when using direct IO to ensure alignment of
+  // write requests if the logical sector size is unusual
+  //
+  // Default: 1024 * 1024 (1 MB)
+  //
+  // Dynamically changeable through SetDBOptions() API.
+  size_t writable_file_max_buffer_size = 1024 * 1024;
+
+  // Use adaptive mutex, which spins in the user space before resorting
+  // to kernel. This could reduce context switch when the mutex is not
+  // heavily contended. However, if the mutex is hot, we could end up
+  // wasting spin time.
+  // Default: false
+  bool use_adaptive_mutex = false;
+
+  // Create DBOptions with default values for all fields
+  DBOptions();
+  // Create DBOptions from Options
+  explicit DBOptions(const Options& options);
+
+  void Dump(Logger* log) const;
+
+  // Allows OS to incrementally sync files to disk while they are being
+  // written, asynchronously, in the background. This operation can be used
+  // to smooth out write I/Os over time. Users shouldn't rely on it for
+  // persistence guarantee.
+  // Issue one request for every bytes_per_sync written. 0 turns it off.
+  //
+  // You may consider using rate_limiter to regulate write rate to device.
+  // When rate limiter is enabled, it automatically enables bytes_per_sync
+  // to 1MB.
+  //
+  // This option applies to table files
+  //
+  // Default: 0, turned off
+  //
+  // Note: DOES NOT apply to WAL files. See wal_bytes_per_sync instead
+  // Dynamically changeable through SetDBOptions() API.
+  uint64_t bytes_per_sync = 0;
+
+  // Same as bytes_per_sync, but applies to WAL files
+  //
+  // Default: 0, turned off
+  //
+  // Dynamically changeable through SetDBOptions() API.
+  uint64_t wal_bytes_per_sync = 0;
+
+  // When true, guarantees WAL files have at most `wal_bytes_per_sync`
+  // bytes submitted for writeback at any given time, and SST files have at most
+  // `bytes_per_sync` bytes pending writeback at any given time. This can be
+  // used to handle cases where processing speed exceeds I/O speed during file
+  // generation, which can lead to a huge sync when the file is finished, even
+  // with `bytes_per_sync` / `wal_bytes_per_sync` properly configured.
+  //
+  //  - If `sync_file_range` is supported it achieves this by waiting for any
+  //    prior `sync_file_range`s to finish before proceeding. In this way,
+  //    processing (compression, etc.) can proceed uninhibited in the gap
+  //    between `sync_file_range`s, and we block only when I/O falls behind.
+  //  - Otherwise the `WritableFile::Sync` method is used. Note this mechanism
+  //    always blocks, thus preventing the interleaving of I/O and processing.
+  //
+  // Note: Enabling this option does not provide any additional persistence
+  // guarantees, as it may use `sync_file_range`, which does not write out
+  // metadata.
+  //
+  // Default: false
+  bool strict_bytes_per_sync = false;
+
+  // A vector of EventListeners whose callback functions will be called
+  // when specific RocksDB event happens.
+  std::vector<std::shared_ptr<EventListener>> listeners;
+
+  // If true, then the status of the threads involved in this DB will
+  // be tracked and available via GetThreadList() API.
+  //
+  // Default: false
+  bool enable_thread_tracking = false;
+
+  // The limited write rate to DB if soft_pending_compaction_bytes_limit or
+  // level0_slowdown_writes_trigger is triggered, or we are writing to the
+  // last mem table allowed and we allow more than 3 mem tables. It is
+  // calculated using size of user write requests before compression.
+  // RocksDB may decide to slow down more if the compaction still
+  // gets behind further.
+  // If the value is 0, we will infer a value from `rater_limiter` value
+  // if it is not empty, or 16MB if `rater_limiter` is empty. Note that
+  // if users change the rate in `rate_limiter` after DB is opened,
+  // `delayed_write_rate` won't be adjusted.
+  //
+  // Unit: byte per second.
+  //
+  // Default: 0
+  //
+  // Dynamically changeable through SetDBOptions() API.
+  uint64_t delayed_write_rate = 0;
+
+  // By default, a single write thread queue is maintained. The thread gets
+  // to the head of the queue becomes write batch group leader and responsible
+  // for writing to WAL and memtable for the batch group.
+  //
+  // If enable_pipelined_write is true, separate write thread queue is
+  // maintained for WAL write and memtable write. A write thread first enter WAL
+  // writer queue and then memtable writer queue. Pending thread on the WAL
+  // writer queue thus only have to wait for previous writers to finish their
+  // WAL writing but not the memtable writing. Enabling the feature may improve
+  // write throughput and reduce latency of the prepare phase of two-phase
+  // commit.
+  //
+  // Default: false
+  bool enable_pipelined_write = false;
+
+  // Setting unordered_write to true trades higher write throughput with
+  // relaxing the immutability guarantee of snapshots. This violates the
+  // repeatability one expects from ::Get from a snapshot, as well as
+  // ::MultiGet and Iterator's consistent-point-in-time view property.
+  // If the application cannot tolerate the relaxed guarantees, it can implement
+  // its own mechanisms to work around that and yet benefit from the higher
+  // throughput. Using TransactionDB with WRITE_PREPARED write policy and
+  // two_write_queues=true is one way to achieve immutable snapshots despite
+  // unordered_write.
+  //
+  // By default, i.e., when it is false, rocksdb does not advance the sequence
+  // number for new snapshots unless all the writes with lower sequence numbers
+  // are already finished. This provides the immutability that we except from
+  // snapshots. Moreover, since Iterator and MultiGet internally depend on
+  // snapshots, the snapshot immutability results into Iterator and MultiGet
+  // offering consistent-point-in-time view. If set to true, although
+  // Read-Your-Own-Write property is still provided, the snapshot immutability
+  // property is relaxed: the writes issued after the snapshot is obtained (with
+  // larger sequence numbers) will be still not visible to the reads from that
+  // snapshot, however, there still might be pending writes (with lower sequence
+  // number) that will change the state visible to the snapshot after they are
+  // landed to the memtable.
+  //
+  // Default: false
+  bool unordered_write = false;
+
+  // If true, allow multi-writers to update mem tables in parallel.
+  // Only some memtable_factory-s support concurrent writes; currently it
+  // is implemented only for SkipListFactory.  Concurrent memtable writes
+  // are not compatible with inplace_update_support or filter_deletes.
+  // It is strongly recommended to set enable_write_thread_adaptive_yield
+  // if you are going to use this feature.
+  //
+  // Default: true
+  bool allow_concurrent_memtable_write = true;
+
+  // If true, threads synchronizing with the write batch group leader will
+  // wait for up to write_thread_max_yield_usec before blocking on a mutex.
+  // This can substantially improve throughput for concurrent workloads,
+  // regardless of whether allow_concurrent_memtable_write is enabled.
+  //
+  // Default: true
+  bool enable_write_thread_adaptive_yield = true;
+
+  // The maximum limit of number of bytes that are written in a single batch
+  // of WAL or memtable write. It is followed when the leader write size
+  // is larger than 1/8 of this limit.
+  //
+  // Default: 1 MB
+  uint64_t max_write_batch_group_size_bytes = 1 << 20;
+
+  // The maximum number of microseconds that a write operation will use
+  // a yielding spin loop to coordinate with other write threads before
+  // blocking on a mutex.  (Assuming write_thread_slow_yield_usec is
+  // set properly) increasing this value is likely to increase RocksDB
+  // throughput at the expense of increased CPU usage.
+  //
+  // Default: 100
+  uint64_t write_thread_max_yield_usec = 100;
+
+  // The latency in microseconds after which a std::this_thread::yield
+  // call (sched_yield on Linux) is considered to be a signal that
+  // other processes or threads would like to use the current core.
+  // Increasing this makes writer threads more likely to take CPU
+  // by spinning, which will show up as an increase in the number of
+  // involuntary context switches.
+  //
+  // Default: 3
+  uint64_t write_thread_slow_yield_usec = 3;
+
+  // If true, then DB::Open() will not update the statistics used to optimize
+  // compaction decision by loading table properties from many files.
+  // Turning off this feature will improve DBOpen time especially in
+  // disk environment.
+  //
+  // Default: false
+  bool skip_stats_update_on_db_open = false;
+
+  // If true, then DB::Open() will not fetch and check sizes of all sst files.
+  // This may significantly speed up startup if there are many sst files,
+  // especially when using non-default Env with expensive GetFileSize().
+  // We'll still check that all required sst files exist.
+  // If paranoid_checks is false, this option is ignored, and sst files are
+  // not checked at all.
+  //
+  // Default: false
+  bool skip_checking_sst_file_sizes_on_db_open = false;
+
+  // Recovery mode to control the consistency while replaying WAL
+  // Default: kPointInTimeRecovery
+  WALRecoveryMode wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
+
+  // if set to false then recovery will fail when a prepared
+  // transaction is encountered in the WAL
+  bool allow_2pc = false;
+
+  // A global cache for table-level rows.
+  // Default: nullptr (disabled)
+  std::shared_ptr<GeneralCache> row_cache = nullptr;
+
+  // A filter object supplied to be invoked while processing write-ahead-logs
+  // (WALs) during recovery. The filter provides a way to inspect log
+  // records, ignoring a particular record or skipping replay.
+  // The filter is invoked at startup and is invoked from a single-thread
+  // currently.
+  WalFilter* wal_filter = nullptr;
+
+  // If true, then DB::Open / CreateColumnFamily / DropColumnFamily
+  // SetOptions will fail if options file is not properly persisted.
+  //
+  // DEFAULT: false
+  bool fail_if_options_file_error = false;
+
+  // If true, then print malloc stats together with rocksdb.stats
+  // when printing to LOG.
+  // DEFAULT: false
+  bool dump_malloc_stats = false;
+
+  // By default RocksDB replay WAL logs and flush them on DB open, which may
+  // create very small SST files. If this option is enabled, RocksDB will try
+  // to avoid (but not guarantee not to) flush during recovery. Also, existing
+  // WAL logs will be kept, so that if crash happened before flush, we still
+  // have logs to recover from.
+  //
+  // DEFAULT: false
+  bool avoid_flush_during_recovery = false;
+
+  // By default RocksDB will flush all memtables on DB close if there are
+  // unpersisted data (i.e. with WAL disabled) The flush can be skip to speedup
+  // DB close. Unpersisted data WILL BE LOST.
+  //
+  // DEFAULT: false
+  //
+  // Dynamically changeable through SetDBOptions() API.
+  bool avoid_flush_during_shutdown = false;
+
+  // Set this option to true during creation of database if you want
+  // to be able to ingest behind (call IngestExternalFile() skipping keys
+  // that already exist, rather than overwriting matching keys).
+  // Setting this option to true will affect 2 things:
+  // 1) Disable some internal optimizations around SST file compression
+  // 2) Reserve bottom-most level for ingested files only.
+  // Note that only universal compaction supports reserving last level
+  // for file ingestion only.
+  // `num_levels` should be >= 3 if this option is turned on.
+  //
+  //
+  // DEFAULT: false
+  // Immutable.
+  bool allow_ingest_behind = false;
+
+  // If enabled it uses two queues for writes, one for the ones with
+  // disable_memtable and one for the ones that also write to memtable. This
+  // allows the memtable writes not to lag behind other writes. It can be used
+  // to optimize MySQL 2PC in which only the commits, which are serial, write to
+  // memtable.
+  bool two_write_queues = false;
+
+  // If true WAL is not flushed automatically after each write. Instead it
+  // relies on manual invocation of FlushWAL to write the WAL buffer to its
+  // file.
+  bool manual_wal_flush = false;
+
+  // This feature is WORK IN PROGRESS
+  // If enabled WAL records will be compressed before they are written.
+  // Only zstd is supported. Compressed WAL records will be read in supported
+  // versions regardless of the wal_compression settings.
+  CompressionType wal_compression = kNoCompression;
+
+  // If true, RocksDB supports flushing multiple column families and committing
+  // their results atomically to MANIFEST. Note that it is not
+  // necessary to set atomic_flush to true if WAL is always enabled since WAL
+  // allows the database to be restored to the last persistent state in WAL.
+  // This option is useful when there are column families with writes NOT
+  // protected by WAL.
+  // For manual flush, application has to specify which column families to
+  // flush atomically in DB::Flush.
+  // For auto-triggered flush, RocksDB atomically flushes ALL column families.
+  //
+  // Currently, any WAL-enabled writes after atomic flush may be replayed
+  // independently if the process crashes later and tries to recover.
+  bool atomic_flush = false;
+
+  // If true, working thread may avoid doing unnecessary and long-latency
+  // operation (such as deleting obsolete files directly or deleting memtable)
+  // and will instead schedule a background job to do it.
+  // Use it if you're latency-sensitive.
+  // If set to true, takes precedence over
+  // ReadOptions::background_purge_on_iterator_cleanup.
+  bool avoid_unnecessary_blocking_io = false;
+
+  // Historically DB ID has always been stored in Identity File in DB folder.
+  // If this flag is true, the DB ID is written to Manifest file in addition
+  // to the Identity file. By doing this 2 problems are solved
+  // 1. We don't checksum the Identity file where as Manifest file is.
+  // 2. Since the source of truth for DB is Manifest file DB ID will sit with
+  //    the source of truth. Previously the Identity file could be copied
+  //    independent of Manifest and that can result in wrong DB ID.
+  // We recommend setting this flag to true.
+  // Default: false
+  bool write_dbid_to_manifest = false;
+
+  // The number of bytes to prefetch when reading the log. This is mostly useful
+  // for reading a remotely located log, as it can save the number of
+  // round-trips. If 0, then the prefetching is disabled.
+  //
+  // Default: 0
+  size_t log_readahead_size = 0;
+
+  // If user does NOT provide the checksum generator factory, the file checksum
+  // will NOT be used. A new file checksum generator object will be created
+  // when a SST file is created. Therefore, each created FileChecksumGenerator
+  // will only be used from a single thread and so does not need to be
+  // thread-safe.
+  //
+  // Default: nullptr
+  std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory = nullptr;
+
+  // By default, RocksDB will attempt to detect any data losses or corruptions
+  // in DB files and return an error to the user, either at DB::Open time or
+  // later during DB operation. The exception to this policy is the WAL file,
+  // whose recovery is controlled by the wal_recovery_mode option.
+  //
+  // Best-efforts recovery (this option set to true) signals a preference for
+  // opening the DB to any point-in-time valid state for each column family,
+  // including the empty/new state, versus the default of returning non-WAL
+  // data losses to the user as errors. In terms of RocksDB user data, this
+  // is like applying WALRecoveryMode::kPointInTimeRecovery to each column
+  // family rather than just the WAL.
+  //
+  // Best-efforts recovery (BER) is specifically designed to recover a DB with
+  // files that are missing or truncated to some smaller size, such as the
+  // result of an incomplete DB "physical" (FileSystem) copy. BER can also
+  // detect when an SST file has been replaced with a different one of the
+  // same size (assuming SST unique IDs are tracked in DB manifest).
+  // BER is not yet designed to produce a usable DB from other corruptions to
+  // DB files (which should generally be detectable by DB::VerifyChecksum()),
+  // and BER does not yet attempt to recover any WAL files.
+  //
+  // For example, if an SST or blob file referenced by the MANIFEST is missing,
+  // BER might be able to find a set of files corresponding to an old "point in
+  // time" version of the column family, possibly from an older MANIFEST
+  // file. Some other kinds of DB files (e.g. CURRENT, LOCK, IDENTITY) are
+  // either ignored or replaced with BER, or quietly fixed regardless of BER
+  // setting. BER does require at least one valid MANIFEST to recover to a
+  // non-trivial DB state, unlike `ldb repair`.
+  //
+  // Currently, best_efforts_recovery=true is not compatible with atomic flush.
+  //
+  // Default: false
+  bool best_efforts_recovery = false;
+
+  // It defines how many times DB::Resume() is called by a separate thread when
+  // background retryable IO Error happens. When background retryable IO
+  // Error happens, SetBGError is called to deal with the error. If the error
+  // can be auto-recovered (e.g., retryable IO Error during Flush or WAL write),
+  // then db resume is called in background to recover from the error. If this
+  // value is 0 or negative, DB::Resume() will not be called automatically.
+  //
+  // Default: INT_MAX
+  int max_bgerror_resume_count = INT_MAX;
+
+  // If max_bgerror_resume_count is >= 2, db resume is called multiple times.
+  // This option decides how long to wait to retry the next resume if the
+  // previous resume fails and satisfy redo resume conditions.
+  //
+  // Default: 1000000 (microseconds).
+  uint64_t bgerror_resume_retry_interval = 1000000;
+
+  // It allows user to opt-in to get error messages containing corrupted
+  // keys/values. Corrupt keys, values will be logged in the
+  // messages/logs/status that will help users with the useful information
+  // regarding affected data. By default value is set false to prevent users
+  // data to be exposed in the logs/messages etc.
+  //
+  // Default: false
+  bool allow_data_in_errors = false;
+
+  // A string identifying the machine hosting the DB. This
+  // will be written as a property in every SST file written by the DB (or
+  // by offline writers such as SstFileWriter and RepairDB). It can be useful
+  // for troubleshooting in memory corruption caused by a failing host when
+  // writing a file, by tracing back to the writing host. These corruptions
+  // may not be caught by the checksum since they happen before checksumming.
+  // If left as default, the table writer will substitute it with the actual
+  // hostname when writing the SST file. If set to an empty string, the
+  // property will not be written to the SST file.
+  //
+  // Default: hostname
+  std::string db_host_id = kHostnameForDbHostId;
+
+  // Use this if your DB want to enable checksum handoff for specific file
+  // types writes. Make sure that the File_system you use support the
+  // crc32c checksum verification
+  // Currently supported file tyes: kWALFile, kTableFile, kDescriptorFile.
+  // NOTE: currently RocksDB only generates crc32c based checksum for the
+  // handoff. If the storage layer has different checksum support, user
+  // should enble this set as empty. Otherwise,it may cause unexpected
+  // write failures.
+  FileTypeSet checksum_handoff_file_types;
+
+  // EXPERIMENTAL
+  // CompactionService is a feature allows the user to run compactions on a
+  // different host or process, which offloads the background load from the
+  // primary host.
+  // It's an experimental feature, the interface will be changed without
+  // backward/forward compatibility support for now. Some known issues are still
+  // under development.
+  std::shared_ptr<CompactionService> compaction_service = nullptr;
+
+  // It indicates, which lowest cache tier we want to
+  // use for a certain DB. Currently we support volatile_tier and
+  // non_volatile_tier. They are layered. By setting it to kVolatileTier, only
+  // the block cache (current implemented volatile_tier) is used. So
+  // cache entries will not spill to secondary cache (current
+  // implemented non_volatile_tier), and block cache lookup misses will not
+  // lookup in the secondary cache. When kNonVolatileBlockTier is used, we use
+  // both block cache and secondary cache.
+  //
+  // Default: kNonVolatileBlockTier
+  CacheTier lowest_used_cache_tier = CacheTier::kNonVolatileBlockTier;
+
+  // If set to false, when compaction or flush sees a SingleDelete followed by
+  // a Delete for the same user key, compaction job will not fail.
+  // Otherwise, compaction job will fail.
+  // This is a temporary option to help existing use cases migrate, and
+  // will be removed in a future release.
+  // Warning: do not set to false unless you are trying to migrate existing
+  // data in which the contract of single delete
+  // (https://github.com/facebook/rocksdb/wiki/Single-Delete) is not enforced,
+  // thus has Delete mixed with SingleDelete for the same user key. Violation
+  // of the contract leads to undefined behaviors with high possibility of data
+  // inconsistency, e.g. deleted old data become visible again, etc.
+  bool enforce_single_del_contracts = true;
+};
+
+// Options to control the behavior of a database (passed to DB::Open)
+struct Options : public DBOptions, public ColumnFamilyOptions {
+  // Create an Options object with default values for all fields.
+  Options() : DBOptions(), ColumnFamilyOptions() {}
+
+  Options(const DBOptions& db_options,
+          const ColumnFamilyOptions& column_family_options)
+      : DBOptions(db_options), ColumnFamilyOptions(column_family_options) {}
+
+  // Change to some default settings from an older version.
+  // NOT MAINTAINED: This function has not been and is not maintained.
+  // DEPRECATED: This function might be removed in a future release.
+  // In general, defaults are changed to suit broad interests. Opting
+  // out of a change on upgrade should be deliberate and considered.
+  Options* OldDefaults(int rocksdb_major_version = 4,
+                       int rocksdb_minor_version = 6);
+
+  void Dump(Logger* log) const;
+
+  void DumpCFOptions(Logger* log) const;
+
+  // Some functions that make it easier to optimize RocksDB
+
+  // Set appropriate parameters for bulk loading.
+  // The reason that this is a function that returns "this" instead of a
+  // constructor is to enable chaining of multiple similar calls in the future.
+  //
+
+  // All data will be in level 0 without any automatic compaction.
+  // It's recommended to manually call CompactRange(NULL, NULL) before reading
+  // from the database, because otherwise the read can be very slow.
+  Options* PrepareForBulkLoad();
+
+  // Use this if your DB is very small (like under 1GB) and you don't want to
+  // spend lots of memory for memtables.
+  Options* OptimizeForSmallDb();
+
+  // Disable some checks that should not be necessary in the absence of
+  // software logic errors or CPU+memory hardware errors. This can improve
+  // write speeds but is only recommended for temporary use. Does not
+  // change protection against corrupt storage (e.g. verify_checksums).
+  Options* DisableExtraChecks();
+};
+
+// An application can issue a read request (via Get/Iterators) and specify
+// if that read should process data that ALREADY resides on a specified cache
+// level. For example, if an application specifies kBlockCacheTier then the
+// Get call will process data that is already processed in the memtable or
+// the block cache. It will not page in data from the OS cache or data that
+// resides in storage.
+enum ReadTier {
+  kReadAllTier = 0x0,     // data in memtable, block cache, OS cache or storage
+  kBlockCacheTier = 0x1,  // data in memtable or block cache
+  kPersistedTier = 0x2,   // persisted data.  When WAL is disabled, this option
+                          // will skip data in memtable.
+                          // Note that this ReadTier currently only supports
+                          // Get and MultiGet and does not support iterators.
+  kMemtableTier = 0x3     // data in memtable. used for memtable-only iterators.
+};
+
+// Options that control read operations
+struct ReadOptions {
+  // *** BEGIN options relevant to point lookups as well as scans ***
+
+  // If "snapshot" is non-nullptr, read as of the supplied snapshot
+  // (which must belong to the DB that is being read and which must
+  // not have been released).  If "snapshot" is nullptr, use an implicit
+  // snapshot of the state at the beginning of this read operation.
+  const Snapshot* snapshot = nullptr;
+
+  // Timestamp of operation. Read should return the latest data visible to the
+  // specified timestamp. All timestamps of the same database must be of the
+  // same length and format. The user is responsible for providing a customized
+  // compare function via Comparator to order <key, timestamp> tuples.
+  // For iterator, iter_start_ts is the lower bound (older) and timestamp
+  // serves as the upper bound. Versions of the same record that fall in
+  // the timestamp range will be returned. If iter_start_ts is nullptr,
+  // only the most recent version visible to timestamp is returned.
+  // The user-specified timestamp feature is still under active development,
+  // and the API is subject to change.
+  const Slice* timestamp = nullptr;
+  const Slice* iter_start_ts = nullptr;
+
+  // Deadline for completing an API call (Get/MultiGet/Seek/Next for now)
+  // in microseconds.
+  // It should be set to microseconds since epoch, i.e, gettimeofday or
+  // equivalent plus allowed duration in microseconds. The best way is to use
+  // env->NowMicros() + some timeout.
+  // This is best efforts. The call may exceed the deadline if there is IO
+  // involved and the file system doesn't support deadlines, or due to
+  // checking for deadline periodically rather than for every key if
+  // processing a batch
+  std::chrono::microseconds deadline = std::chrono::microseconds::zero();
+
+  // A timeout in microseconds to be passed to the underlying FileSystem for
+  // reads. As opposed to deadline, this determines the timeout for each
+  // individual file read request. If a MultiGet/Get/Seek/Next etc call
+  // results in multiple reads, each read can last up to io_timeout us.
+  std::chrono::microseconds io_timeout = std::chrono::microseconds::zero();
+
+  // Specify if this read request should process data that ALREADY
+  // resides on a particular cache. If the required data is not
+  // found at the specified cache, then Status::Incomplete is returned.
+  ReadTier read_tier = kReadAllTier;
+
+  // For file reads associated with this option, charge the internal rate
+  // limiter (see `DBOptions::rate_limiter`) at the specified priority. The
+  // special value `Env::IO_TOTAL` disables charging the rate limiter.
+  //
+  // The rate limiting is bypassed no matter this option's value for file reads
+  // on plain tables (these can exist when `ColumnFamilyOptions::table_factory`
+  // is a `PlainTableFactory`) and cuckoo tables (these can exist when
+  // `ColumnFamilyOptions::table_factory` is a `CuckooTableFactory`).
+  //
+  // The bytes charged to rate limiter may not exactly match the file read bytes
+  // since there are some seemingly insignificant reads, like for file
+  // headers/footers, that we currently do not charge to rate limiter.
+  Env::IOPriority rate_limiter_priority = Env::IO_TOTAL;
+
+  // It limits the maximum cumulative value size of the keys in batch while
+  // reading through MultiGet. Once the cumulative value size exceeds this
+  // soft limit then all the remaining keys are returned with status Aborted.
+  uint64_t value_size_soft_limit = std::numeric_limits<uint64_t>::max();
+
+  // If true, all data read from underlying storage will be
+  // verified against corresponding checksums.
+  bool verify_checksums = true;
+
+  // Should the "data block"/"index block" read for this iteration be placed in
+  // block cache?
+  // Callers may wish to set this field to false for bulk scans.
+  // This would help not to the change eviction order of existing items in the
+  // block cache.
+  bool fill_cache = true;
+
+  // If true, range tombstones handling will be skipped in key lookup paths.
+  // For DB instances that don't use DeleteRange() calls, this setting can
+  // be used to optimize the read performance.
+  // Note that, if this assumption (of no previous DeleteRange() calls) is
+  // broken, stale keys could be served in read paths.
+  bool ignore_range_deletions = false;
+
+  // Experimental
+  //
+  // If async_io is enabled, RocksDB will prefetch some of data asynchronously.
+  // RocksDB apply it if reads are sequential and its internal automatic
+  // prefetching.
+  bool async_io = false;
+
+  // Experimental
+  //
+  // If async_io is set, then this flag controls whether we read SST files
+  // in multiple levels asynchronously. Enabling this flag can help reduce
+  // MultiGet latency by maximizing the number of SST files read in
+  // parallel if the keys in the MultiGet batch are in different levels. It
+  // comes at the expense of slightly higher CPU overhead.
+  bool optimize_multiget_for_io = true;
+
+  // *** END options relevant to point lookups (as well as scans) ***
+  // *** BEGIN options only relevant to iterators or scans ***
+
+  // RocksDB does auto-readahead for iterators on noticing more than two reads
+  // for a table file. The readahead starts at 8KB and doubles on every
+  // additional read up to 256KB.
+  // This option can help if most of the range scans are large, and if it is
+  // determined that a larger readahead than that enabled by auto-readahead is
+  // needed.
+  // Using a large readahead size (> 2MB) can typically improve the performance
+  // of forward iteration on spinning disks.
+  size_t readahead_size = 0;
+
+  // A threshold for the number of keys that can be skipped before failing an
+  // iterator seek as incomplete. The default value of 0 should be used to
+  // never fail a request as incomplete, even on skipping too many keys.
+  uint64_t max_skippable_internal_keys = 0;
+
+  // `iterate_lower_bound` defines the smallest key at which the backward
+  // iterator can return an entry. Once the bound is passed, Valid() will be
+  // false. `iterate_lower_bound` is inclusive ie the bound value is a valid
+  // entry.
+  //
+  // If prefix_extractor is not null, the Seek target and `iterate_lower_bound`
+  // need to have the same prefix. This is because ordering is not guaranteed
+  // outside of prefix domain.
+  //
+  // In case of user_defined timestamp, if enabled, iterate_lower_bound should
+  // point to key without timestamp part.
+  const Slice* iterate_lower_bound = nullptr;
+
+  // "iterate_upper_bound" defines the extent up to which the forward iterator
+  // can return entries. Once the bound is reached, Valid() will be false.
+  // "iterate_upper_bound" is exclusive ie the bound value is
+  // not a valid entry. If prefix_extractor is not null:
+  // 1. If options.auto_prefix_mode = true, iterate_upper_bound will be used
+  //    to infer whether prefix iterating (e.g. applying prefix bloom filter)
+  //    can be used within RocksDB. This is done by comparing
+  //    iterate_upper_bound with the seek key.
+  // 2. If options.auto_prefix_mode = false, iterate_upper_bound only takes
+  //    effect if it shares the same prefix as the seek key. If
+  //    iterate_upper_bound is outside the prefix of the seek key, then keys
+  //    returned outside the prefix range will be undefined, just as if
+  //    iterate_upper_bound = null.
+  // If iterate_upper_bound is not null, SeekToLast() will position the iterator
+  // at the first key smaller than iterate_upper_bound.
+  //
+  // In case of user_defined timestamp, if enabled, iterate_upper_bound should
+  // point to key without timestamp part.
+  const Slice* iterate_upper_bound = nullptr;
+
+  // Specify to create a tailing iterator -- a special iterator that has a
+  // view of the complete database (i.e. it can also be used to read newly
+  // added data) and is optimized for sequential reads. It will return records
+  // that were inserted into the database after the creation of the iterator.
+  bool tailing = false;
+
+  // This options is not used anymore. It was to turn on a functionality that
+  // has been removed. DEPRECATED
+  bool managed = false;
+
+  // Enable a total order seek regardless of index format (e.g. hash index)
+  // used in the table. Some table format (e.g. plain table) may not support
+  // this option.
+  // If true when calling Get(), we also skip prefix bloom when reading from
+  // block based table, which only affects Get() performance.
+  bool total_order_seek = false;
+
+  // When true, by default use total_order_seek = true, and RocksDB can
+  // selectively enable prefix seek mode if won't generate a different result
+  // from total_order_seek, based on seek key, and iterator upper bound.
+  // BUG: Using Comparator::IsSameLengthImmediateSuccessor and
+  // SliceTransform::FullLengthEnabled to enable prefix mode in cases where
+  // prefix of upper bound differs from prefix of seek key has a flaw.
+  // If present in the DB, "short keys" (shorter than "full length" prefix)
+  // can be omitted from auto_prefix_mode iteration when they would be present
+  // in total_order_seek iteration, regardless of whether the short keys are
+  // "in domain" of the prefix extractor. This is not an issue if no short
+  // keys are added to DB or are not expected to be returned by such
+  // iterators. (We are also assuming the new condition on
+  // IsSameLengthImmediateSuccessor is satisfied; see its BUG section).
+  // A bug example is in DBTest2::AutoPrefixMode1, search for "BUG".
+  bool auto_prefix_mode = false;
+
+  // Enforce that the iterator only iterates over the same prefix as the seek.
+  // This option is effective only for prefix seeks, i.e. prefix_extractor is
+  // non-null for the column family and total_order_seek is false.  Unlike
+  // iterate_upper_bound, prefix_same_as_start only works within a prefix
+  // but in both directions.
+  bool prefix_same_as_start = false;
+
+  // Keep the blocks loaded by the iterator pinned in memory as long as the
+  // iterator is not deleted, If used when reading from tables created with
+  // BlockBasedTableOptions::use_delta_encoding = false,
+  // Iterator's property "rocksdb.iterator.is-key-pinned" is guaranteed to
+  // return 1.
+  bool pin_data = false;
+
+  // For iterators, RocksDB does auto-readahead on noticing more than two
+  // sequential reads for a table file if user doesn't provide readahead_size.
+  // The readahead starts at 8KB and doubles on every additional read upto
+  // max_auto_readahead_size only when reads are sequential. However at each
+  // level, if iterator moves over next file, readahead_size starts again from
+  // 8KB.
+  //
+  // By enabling this option, RocksDB will do some enhancements for
+  // prefetching the data.
+  bool adaptive_readahead = false;
+
+  // If true, when PurgeObsoleteFile is called in CleanupIteratorState, we
+  // schedule a background job in the flush job queue and delete obsolete files
+  // in background.
+  bool background_purge_on_iterator_cleanup = false;
+
+  // A callback to determine whether relevant keys for this scan exist in a
+  // given table based on the table's properties. The callback is passed the
+  // properties of each table during iteration. If the callback returns false,
+  // the table will not be scanned. This option only affects Iterators and has
+  // no impact on point lookups.
+  // Default: empty (every table will be scanned)
+  std::function<bool(const TableProperties&)> table_filter;
+
+  // *** END options only relevant to iterators or scans ***
+
+  // ** For RocksDB internal use only **
+  Env::IOActivity io_activity = Env::IOActivity::kUnknown;
+
+  ReadOptions() {}
+  ReadOptions(bool _verify_checksums, bool _fill_cache);
+  explicit ReadOptions(Env::IOActivity _io_activity);
+};
+
+// Options that control write operations
+struct WriteOptions {
+  // If true, the write will be flushed from the operating system
+  // buffer cache (by calling WritableFile::Sync()) before the write
+  // is considered complete.  If this flag is true, writes will be
+  // slower.
+  //
+  // If this flag is false, and the machine crashes, some recent
+  // writes may be lost.  Note that if it is just the process that
+  // crashes (i.e., the machine does not reboot), no writes will be
+  // lost even if sync==false.
+  //
+  // In other words, a DB write with sync==false has similar
+  // crash semantics as the "write()" system call.  A DB write
+  // with sync==true has similar crash semantics to a "write()"
+  // system call followed by "fdatasync()".
+  //
+  // Default: false
+  bool sync;
+
+  // If true, writes will not first go to the write ahead log,
+  // and the write may get lost after a crash. The backup engine
+  // relies on write-ahead logs to back up the memtable, so if
+  // you disable write-ahead logs, you must create backups with
+  // flush_before_backup=true to avoid losing unflushed memtable data.
+  // Default: false
+  bool disableWAL;
+
+  // If true and if user is trying to write to column families that don't exist
+  // (they were dropped),  ignore the write (don't return an error). If there
+  // are multiple writes in a WriteBatch, other writes will succeed.
+  // Default: false
+  bool ignore_missing_column_families;
+
+  // If true and we need to wait or sleep for the write request, fails
+  // immediately with Status::Incomplete().
+  // Default: false
+  bool no_slowdown;
+
+  // If true, this write request is of lower priority if compaction is
+  // behind. In this case, no_slowdown = true, the request will be canceled
+  // immediately with Status::Incomplete() returned. Otherwise, it will be
+  // slowed down. The slowdown value is determined by RocksDB to guarantee
+  // it introduces minimum impacts to high priority writes.
+  //
+  // Default: false
+  bool low_pri;
+
+  // If true, this writebatch will maintain the last insert positions of each
+  // memtable as hints in concurrent write. It can improve write performance
+  // in concurrent writes if keys in one writebatch are sequential. In
+  // non-concurrent writes (when concurrent_memtable_writes is false) this
+  // option will be ignored.
+  //
+  // Default: false
+  bool memtable_insert_hint_per_batch;
+
+  // For writes associated with this option, charge the internal rate
+  // limiter (see `DBOptions::rate_limiter`) at the specified priority. The
+  // special value `Env::IO_TOTAL` disables charging the rate limiter.
+  //
+  // Currently the support covers automatic WAL flushes, which happen during
+  // live updates (`Put()`, `Write()`, `Delete()`, etc.)
+  // when `WriteOptions::disableWAL == false`
+  // and `DBOptions::manual_wal_flush == false`.
+  //
+  // Only `Env::IO_USER` and `Env::IO_TOTAL` are allowed
+  // due to implementation constraints.
+  //
+  // Default: `Env::IO_TOTAL`
+  Env::IOPriority rate_limiter_priority;
+
+  // `protection_bytes_per_key` is the number of bytes used to store
+  // protection information for each key entry. Currently supported values are
+  // zero (disabled) and eight.
+  //
+  // Default: zero (disabled).
+  size_t protection_bytes_per_key;
+
+  WriteOptions()
+      : sync(false),
+        disableWAL(false),
+        ignore_missing_column_families(false),
+        no_slowdown(false),
+        low_pri(false),
+        memtable_insert_hint_per_batch(false),
+        rate_limiter_priority(Env::IO_TOTAL),
+        protection_bytes_per_key(0) {}
+};
+
+// Options that control flush operations
+struct FlushOptions {
+  // If true, the flush will wait until the flush is done.
+  // Default: true
+  bool wait;
+  // If true, the flush would proceed immediately even it means writes will
+  // stall for the duration of the flush; if false the operation will wait
+  // until it's possible to do flush w/o causing stall or until required flush
+  // is performed by someone else (foreground call or background thread).
+  // Default: false
+  bool allow_write_stall;
+  FlushOptions() : wait(true), allow_write_stall(false) {}
+};
+
+// Create a Logger from provided DBOptions
+extern Status CreateLoggerFromOptions(const std::string& dbname,
+                                      const DBOptions& options,
+                                      std::shared_ptr<Logger>* logger);
+
+// CompactionOptions are used in CompactFiles() call.
+struct CompactionOptions {
+  // Compaction output compression type
+  // Default: snappy
+  // If set to `kDisableCompressionOption`, RocksDB will choose compression type
+  // according to the `ColumnFamilyOptions`, taking into account the output
+  // level if `compression_per_level` is specified.
+  CompressionType compression;
+  // Compaction will create files of size `output_file_size_limit`.
+  // Default: MAX, which means that compaction will create a single file
+  uint64_t output_file_size_limit;
+  // If > 0, it will replace the option in the DBOptions for this compaction.
+  uint32_t max_subcompactions;
+
+  CompactionOptions()
+      : compression(kSnappyCompression),
+        output_file_size_limit(std::numeric_limits<uint64_t>::max()),
+        max_subcompactions(0) {}
+};
+
+// For level based compaction, we can configure if we want to skip/force
+// bottommost level compaction.
+enum class BottommostLevelCompaction {
+  // Skip bottommost level compaction
+  kSkip,
+  // Only compact bottommost level if there is a compaction filter
+  // This is the default option
+  kIfHaveCompactionFilter,
+  // Always compact bottommost level
+  kForce,
+  // Always compact bottommost level but in bottommost level avoid
+  // double-compacting files created in the same compaction
+  kForceOptimized,
+};
+
+// For manual compaction, we can configure if we want to skip/force garbage
+// collection of blob files.
+enum class BlobGarbageCollectionPolicy {
+  // Force blob file garbage collection.
+  kForce,
+  // Skip blob file garbage collection.
+  kDisable,
+  // Inherit blob file garbage collection policy from ColumnFamilyOptions.
+  kUseDefault,
+};
+
+// CompactRangeOptions is used by CompactRange() call.
+struct CompactRangeOptions {
+  // If true, no other compaction will run at the same time as this
+  // manual compaction.
+  //
+  // Default: false
+  bool exclusive_manual_compaction = false;
+
+  // If true, compacted files will be moved to the minimum level capable
+  // of holding the data or given level (specified non-negative target_level).
+  bool change_level = false;
+  // If change_level is true and target_level have non-negative value, compacted
+  // files will be moved to target_level.
+  int target_level = -1;
+  // Compaction outputs will be placed in options.db_paths[target_path_id].
+  // Behavior is undefined if target_path_id is out of range.
+  uint32_t target_path_id = 0;
+  // By default level based compaction will only compact the bottommost level
+  // if there is a compaction filter
+  BottommostLevelCompaction bottommost_level_compaction =
+      BottommostLevelCompaction::kIfHaveCompactionFilter;
+  // If true, will execute immediately even if doing so would cause the DB to
+  // enter write stall mode. Otherwise, it'll sleep until load is low enough.
+  bool allow_write_stall = false;
+  // If > 0, it will replace the option in the DBOptions for this compaction.
+  uint32_t max_subcompactions = 0;
+  // Set user-defined timestamp low bound, the data with older timestamp than
+  // low bound maybe GCed by compaction. Default: nullptr
+  const Slice* full_history_ts_low = nullptr;
+
+  // Allows cancellation of an in-progress manual compaction.
+  //
+  // Cancellation can be delayed waiting on automatic compactions when used
+  // together with `exclusive_manual_compaction == true`.
+  std::atomic<bool>* canceled = nullptr;
+  // NOTE: Calling DisableManualCompaction() overwrites the uer-provided
+  // canceled variable in CompactRangeOptions.
+  // Typically, when CompactRange is being called in one thread (t1) with
+  // canceled = false, and DisableManualCompaction is being called in the
+  // other thread (t2), manual compaction is disabled normally, even if the
+  // compaction iterator may still scan a few items before *canceled is
+  // set to true
+
+  // If set to kForce, RocksDB will override enable_blob_file_garbage_collection
+  // to true; if set to kDisable, RocksDB will override it to false, and
+  // kUseDefault leaves the setting in effect. This enables customers to both
+  // force-enable and force-disable GC when calling CompactRange.
+  BlobGarbageCollectionPolicy blob_garbage_collection_policy =
+      BlobGarbageCollectionPolicy::kUseDefault;
+
+  // If set to < 0 or > 1, RocksDB leaves blob_garbage_collection_age_cutoff
+  // from ColumnFamilyOptions in effect. Otherwise, it will override the
+  // user-provided setting. This enables customers to selectively override the
+  // age cutoff.
+  double blob_garbage_collection_age_cutoff = -1;
+};
+
+// IngestExternalFileOptions is used by IngestExternalFile()
+struct IngestExternalFileOptions {
+  // Can be set to true to move the files instead of copying them.
+  bool move_files = false;
+  // If set to true, ingestion falls back to copy when move fails.
+  bool failed_move_fall_back_to_copy = true;
+  // If set to false, an ingested file keys could appear in existing snapshots
+  // that where created before the file was ingested.
+  bool snapshot_consistency = true;
+  // If set to false, IngestExternalFile() will fail if the file key range
+  // overlaps with existing keys or tombstones or output of ongoing compaction
+  // during file ingestion in the DB (the conditions under which a global_seqno
+  // must be assigned to the ingested file).
+  bool allow_global_seqno = true;
+  // If set to false and the file key range overlaps with the memtable key range
+  // (memtable flush required), IngestExternalFile will fail.
+  bool allow_blocking_flush = true;
+  // Set to true if you would like duplicate keys in the file being ingested
+  // to be skipped rather than overwriting existing data under that key.
+  // Use case: back-fill of some historical data in the database without
+  // over-writing existing newer version of data.
+  // This option could only be used if the DB has been running
+  // with allow_ingest_behind=true since the dawn of time.
+  // All files will be ingested at the bottommost level with seqno=0.
+  bool ingest_behind = false;
+  // DEPRECATED - Set to true if you would like to write global_seqno to
+  // the external SST file on ingestion for backward compatibility before
+  // RocksDB 5.16.0. Such old versions of RocksDB expect any global_seqno to
+  // be written to the SST file rather than recorded in the DB manifest.
+  // This functionality was deprecated because (a) random writes might be
+  // costly or unsupported on some FileSystems, and (b) the file checksum
+  // changes with such a write.
+  bool write_global_seqno = false;
+  // Set to true if you would like to verify the checksums of each block of the
+  // external SST file before ingestion.
+  // Warning: setting this to true causes slowdown in file ingestion because
+  // the external SST file has to be read.
+  bool verify_checksums_before_ingest = false;
+  // When verify_checksums_before_ingest = true, RocksDB uses default
+  // readahead setting to scan the file while verifying checksums before
+  // ingestion.
+  // Users can override the default value using this option.
+  // Using a large readahead size (> 2MB) can typically improve the performance
+  // of forward iteration on spinning disks.
+  size_t verify_checksums_readahead_size = 0;
+  // Set to TRUE if user wants to verify the sst file checksum of ingested
+  // files. The DB checksum function will generate the checksum of each
+  // ingested file (if file_checksum_gen_factory is set) and compare the
+  // checksum function name and checksum with the ingested checksum information.
+  //
+  // If this option is set to True: 1) if DB does not enable checksum
+  // (file_checksum_gen_factory == nullptr), the ingested checksum information
+  // will be ignored; 2) If DB enable the checksum function, we calculate the
+  // sst file checksum after the file is moved or copied and compare the
+  // checksum and checksum name. If checksum or checksum function name does
+  // not match, ingestion will be failed. If the verification is successful,
+  // checksum and checksum function name will be stored in Manifest.
+  // If this option is set to FALSE, 1) if DB does not enable checksum,
+  // the ingested checksum information will be ignored; 2) if DB enable the
+  // checksum, we only verify the ingested checksum function name and we
+  // trust the ingested checksum. If the checksum function name matches, we
+  // store the checksum in Manifest. DB does not calculate the checksum during
+  // ingestion. However, if no checksum information is provided with the
+  // ingested files, DB will generate the checksum and store in the Manifest.
+  bool verify_file_checksum = true;
+  // Set to TRUE if user wants file to be ingested to the bottommost level. An
+  // error of Status::TryAgain() will be returned if a file cannot fit in the
+  // bottommost level when calling
+  // DB::IngestExternalFile()/DB::IngestExternalFiles(). The user should clear
+  // the bottommost level in the overlapping range before re-attempt.
+  //
+  // ingest_behind takes precedence over fail_if_not_bottommost_level.
+  bool fail_if_not_bottommost_level = false;
+};
+
+enum TraceFilterType : uint64_t {
+  // Trace all the operations
+  kTraceFilterNone = 0x0,
+  // Do not trace the get operations
+  kTraceFilterGet = 0x1 << 0,
+  // Do not trace the write operations
+  kTraceFilterWrite = 0x1 << 1,
+  // Do not trace the `Iterator::Seek()` operations
+  kTraceFilterIteratorSeek = 0x1 << 2,
+  // Do not trace the `Iterator::SeekForPrev()` operations
+  kTraceFilterIteratorSeekForPrev = 0x1 << 3,
+  // Do not trace the `MultiGet()` operations
+  kTraceFilterMultiGet = 0x1 << 4,
+};
+
+// TraceOptions is used for StartTrace
+struct TraceOptions {
+  // To avoid the trace file size grows large than the storage space,
+  // user can set the max trace file size in Bytes. Default is 64GB
+  uint64_t max_trace_file_size = uint64_t{64} * 1024 * 1024 * 1024;
+  // Specify trace sampling option, i.e. capture one per how many requests.
+  // Default to 1 (capture every request).
+  uint64_t sampling_frequency = 1;
+  // Note: The filtering happens before sampling.
+  uint64_t filter = kTraceFilterNone;
+  // When true, the order of write records in the trace will match the order of
+  // the corresponding write records in the WAL and applied to the DB. There may
+  // be a performance penalty associated with preserving this ordering.
+  //
+  // Default: false. This means write records in the trace may be in an order
+  // different from the WAL's order.
+  bool preserve_write_order = false;
+};
+
+// ImportColumnFamilyOptions is used by ImportColumnFamily()
+struct ImportColumnFamilyOptions {
+  // Can be set to true to move the files instead of copying them.
+  bool move_files = false;
+};
+
+// Options used with DB::GetApproximateSizes()
+struct SizeApproximationOptions {
+  // Defines whether the returned size should include the recently written
+  // data in the memtables. If set to false, include_files must be true.
+  bool include_memtables = false;
+  // Defines whether the returned size should include data serialized to disk.
+  // If set to false, include_memtables must be true.
+  bool include_files = true;
+  // When approximating the files total size that is used to store a keys range
+  // using DB::GetApproximateSizes, allow approximation with an error margin of
+  // up to total_files_size * files_size_error_margin. This allows to take some
+  // shortcuts in files size approximation, resulting in better performance,
+  // while guaranteeing the resulting error is within a reasonable margin.
+  // E.g., if the value is 0.1, then the error margin of the returned files size
+  // approximation will be within 10%.
+  // If the value is non-positive - a more precise yet more CPU intensive
+  // estimation is performed.
+  double files_size_error_margin = -1.0;
+};
+
+struct CompactionServiceOptionsOverride {
+  // Currently pointer configurations are not passed to compaction service
+  // compaction so the user needs to set it. It will be removed once pointer
+  // configuration passing is supported.
+  Env* env = Env::Default();
+  std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory = nullptr;
+
+  const Comparator* comparator = BytewiseComparator();
+  std::shared_ptr<MergeOperator> merge_operator = nullptr;
+  const CompactionFilter* compaction_filter = nullptr;
+  std::shared_ptr<CompactionFilterFactory> compaction_filter_factory = nullptr;
+  std::shared_ptr<const SliceTransform> prefix_extractor = nullptr;
+  std::shared_ptr<TableFactory> table_factory;
+  std::shared_ptr<SstPartitionerFactory> sst_partitioner_factory = nullptr;
+
+  // Only subsets of events are triggered in remote compaction worker, like:
+  // `OnTableFileCreated`, `OnTableFileCreationStarted`,
+  // `ShouldBeNotifiedOnFileIO` `OnSubcompactionBegin`,
+  // `OnSubcompactionCompleted`, etc. Worth mentioning, `OnCompactionBegin` and
+  // `OnCompactionCompleted` won't be triggered. They will be triggered on the
+  // primary DB side.
+  std::vector<std::shared_ptr<EventListener>> listeners;
+
+  // statistics is used to collect DB operation metrics, the metrics won't be
+  // returned to CompactionService primary host, to collect that, the user needs
+  // to set it here.
+  std::shared_ptr<Statistics> statistics = nullptr;
+
+  // Only compaction generated SST files use this user defined table properties
+  // collector.
+  std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>
+      table_properties_collector_factories;
+};
+
+struct OpenAndCompactOptions {
+  // Allows cancellation of an in-progress compaction.
+  std::atomic<bool>* canceled = nullptr;
+};
+
+struct LiveFilesStorageInfoOptions {
+  // Whether to populate FileStorageInfo::file_checksum* or leave blank
+  bool include_checksum_info = false;
+  // Flushes memtables if total size in bytes of live WAL files is >= this
+  // number (and DB is not read-only).
+  // Default: always force a flush without checking sizes.
+  uint64_t wal_size_for_flush = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/perf_context.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/perf_context.h
new file mode 100644
index 0000000000..46266abbaa
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/perf_context.h
@@ -0,0 +1,314 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <stdint.h>
+
+#include <map>
+#include <string>
+
+#include "rocksdb/perf_level.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+/*
+ * NOTE:
+ * Please do not reorder the fields in this structure. If you plan to do that or
+ * add/remove fields to this structure, builds would fail. The way to fix the
+ * builds would be to add the appropriate fields to the
+ * DEF_PERF_CONTEXT_LEVEL_METRICS() macro in the perf_context.cc file.
+ */
+
+// Break down performance counters by level and store per-level perf context in
+// PerfContextByLevel
+struct PerfContextByLevelBase {
+  // These Bloom stats apply to point reads (Get/MultiGet) for whole key and
+  // prefix filters.
+  // # of times bloom filter has avoided file reads, i.e., negatives.
+  uint64_t bloom_filter_useful = 0;
+  // # of times bloom FullFilter has not avoided the reads.
+  uint64_t bloom_filter_full_positive = 0;
+  // # of times bloom FullFilter has not avoided the reads and data actually
+  // exist.
+  uint64_t bloom_filter_full_true_positive = 0;
+
+  // total number of user key returned (only include keys that are found, does
+  // not include keys that are deleted or merged without a final put
+  uint64_t user_key_return_count = 0;
+
+  // total nanos spent on reading data from SST files
+  uint64_t get_from_table_nanos = 0;
+
+  uint64_t block_cache_hit_count = 0;   // total number of block cache hits
+  uint64_t block_cache_miss_count = 0;  // total number of block cache misses
+};
+
+// A thread local context for gathering performance counter efficiently
+// and transparently.
+// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
+
+// Break down performance counters by level and store per-level perf context in
+// PerfContextByLevel
+struct PerfContextByLevel : public PerfContextByLevelBase {
+  void Reset();  // reset all performance counters to zero
+};
+
+/*
+ * NOTE:
+ * Please do not reorder the fields in this structure. If you plan to do that or
+ * add/remove fields to this structure, builds would fail. The way to fix the
+ * builds would be to add the appropriate fields to the
+ * DEF_PERF_CONTEXT_METRICS() macro in the perf_context.cc file.
+ */
+
+struct PerfContextBase {
+  uint64_t user_key_comparison_count;  // total number of user key comparisons
+  uint64_t block_cache_hit_count;      // total number of block cache hits
+  uint64_t block_read_count;           // total number of block reads (with IO)
+  uint64_t block_read_byte;            // total number of bytes from block reads
+  uint64_t block_read_time;            // total nanos spent on block reads
+  // total cpu time in nanos spent on block reads
+  uint64_t block_read_cpu_time;
+  uint64_t block_cache_index_hit_count;  // total number of index block hits
+  // total number of standalone handles lookup from secondary cache
+  uint64_t block_cache_standalone_handle_count;
+  // total number of real handles lookup from secondary cache that are inserted
+  // into primary cache
+  uint64_t block_cache_real_handle_count;
+  uint64_t index_block_read_count;        // total number of index block reads
+  uint64_t block_cache_filter_hit_count;  // total number of filter block hits
+  uint64_t filter_block_read_count;       // total number of filter block reads
+  uint64_t compression_dict_block_read_count;  // total number of compression
+                                               // dictionary block reads
+
+  uint64_t secondary_cache_hit_count;  // total number of secondary cache hits
+  // total number of real handles inserted into secondary cache
+  uint64_t compressed_sec_cache_insert_real_count;
+  // total number of dummy handles inserted into secondary cache
+  uint64_t compressed_sec_cache_insert_dummy_count;
+  // bytes for vals before compression in secondary cache
+  uint64_t compressed_sec_cache_uncompressed_bytes;
+  // bytes for vals after compression in secondary cache
+  uint64_t compressed_sec_cache_compressed_bytes;
+
+  uint64_t block_checksum_time;    // total nanos spent on block checksum
+  uint64_t block_decompress_time;  // total nanos spent on block decompression
+
+  uint64_t get_read_bytes;       // bytes for vals returned by Get
+  uint64_t multiget_read_bytes;  // bytes for vals returned by MultiGet
+  uint64_t iter_read_bytes;      // bytes for keys/vals decoded by iterator
+
+  uint64_t blob_cache_hit_count;  // total number of blob cache hits
+  uint64_t blob_read_count;       // total number of blob reads (with IO)
+  uint64_t blob_read_byte;        // total number of bytes from blob reads
+  uint64_t blob_read_time;        // total nanos spent on blob reads
+  uint64_t blob_checksum_time;    // total nanos spent on blob checksum
+  uint64_t blob_decompress_time;  // total nanos spent on blob decompression
+
+  // total number of internal keys skipped over during iteration.
+  // There are several reasons for it:
+  // 1. when calling Next(), the iterator is in the position of the previous
+  //    key, so that we'll need to skip it. It means this counter will always
+  //    be incremented in Next().
+  // 2. when calling Next(), we need to skip internal entries for the previous
+  //    keys that are overwritten.
+  // 3. when calling Next(), Seek() or SeekToFirst(), after previous key
+  //    before calling Next(), the seek key in Seek() or the beginning for
+  //    SeekToFirst(), there may be one or more deleted keys before the next
+  //    valid key that the operation should place the iterator to. We need
+  //    to skip both of the tombstone and updates hidden by the tombstones. The
+  //    tombstones are not included in this counter, while previous updates
+  //    hidden by the tombstones will be included here.
+  // 4. symmetric cases for Prev() and SeekToLast()
+  // internal_recent_skipped_count is not included in this counter.
+  //
+  uint64_t internal_key_skipped_count;
+  // Total number of deletes and single deletes skipped over during iteration
+  // When calling Next(), Seek() or SeekToFirst(), after previous position
+  // before calling Next(), the seek key in Seek() or the beginning for
+  // SeekToFirst(), there may be one or more deleted keys before the next valid
+  // key. Every deleted key is counted once. We don't recount here if there are
+  // still older updates invalidated by the tombstones.
+  //
+  uint64_t internal_delete_skipped_count;
+  // How many times iterators skipped over internal keys that are more recent
+  // than the snapshot that iterator is using.
+  //
+  uint64_t internal_recent_skipped_count;
+  // How many merge operands were fed into the merge operator by iterators.
+  // Note: base values are not included in the count.
+  //
+  uint64_t internal_merge_count;
+  // How many merge operands were fed into the merge operator by point lookups.
+  // Note: base values are not included in the count.
+  //
+  uint64_t internal_merge_point_lookup_count;
+  // Number of times we reseeked inside a merging iterator, specifically to skip
+  // after or before a range of keys covered by a range deletion in a newer LSM
+  // component.
+  uint64_t internal_range_del_reseek_count;
+
+  uint64_t get_snapshot_time;        // total nanos spent on getting snapshot
+  uint64_t get_from_memtable_time;   // total nanos spent on querying memtables
+  uint64_t get_from_memtable_count;  // number of mem tables queried
+  // total nanos spent after Get() finds a key
+  uint64_t get_post_process_time;
+  uint64_t get_from_output_files_time;  // total nanos reading from output files
+  // total nanos spent on seeking memtable
+  uint64_t seek_on_memtable_time;
+  // number of seeks issued on memtable
+  // (including SeekForPrev but not SeekToFirst and SeekToLast)
+  uint64_t seek_on_memtable_count;
+  // number of Next()s issued on memtable
+  uint64_t next_on_memtable_count;
+  // number of Prev()s issued on memtable
+  uint64_t prev_on_memtable_count;
+  // total nanos spent on seeking child iters
+  uint64_t seek_child_seek_time;
+  // number of seek issued in child iterators
+  uint64_t seek_child_seek_count;
+  uint64_t seek_min_heap_time;  // total nanos spent on the merge min heap
+  uint64_t seek_max_heap_time;  // total nanos spent on the merge max heap
+  // total nanos spent on seeking the internal entries
+  uint64_t seek_internal_seek_time;
+  // total nanos spent on iterating internal entries to find the next user entry
+  uint64_t find_next_user_entry_time;
+
+  // This group of stats provide a breakdown of time spent by Write().
+  // May be inaccurate when 2PC, two_write_queues or enable_pipelined_write
+  // are enabled.
+  //
+  // total nanos spent on writing to WAL
+  uint64_t write_wal_time;
+  // total nanos spent on writing to mem tables
+  uint64_t write_memtable_time;
+  // total nanos spent on delaying or throttling write
+  uint64_t write_delay_time;
+  // total nanos spent on switching memtable/wal and scheduling
+  // flushes/compactions.
+  uint64_t write_scheduling_flushes_compactions_time;
+  // total nanos spent on writing a record, excluding the above four things
+  uint64_t write_pre_and_post_process_time;
+
+  // time spent waiting for other threads of the batch group
+  uint64_t write_thread_wait_nanos;
+
+  // time spent on acquiring DB mutex.
+  uint64_t db_mutex_lock_nanos;
+  // Time spent on waiting with a condition variable created with DB mutex.
+  uint64_t db_condition_wait_nanos;
+  // Time spent on merge operator.
+  uint64_t merge_operator_time_nanos;
+
+  // Time spent on reading index block from block cache or SST file
+  uint64_t read_index_block_nanos;
+  // Time spent on reading filter block from block cache or SST file
+  uint64_t read_filter_block_nanos;
+  // Time spent on creating data block iterator
+  uint64_t new_table_block_iter_nanos;
+  // Time spent on creating a iterator of an SST file.
+  uint64_t new_table_iterator_nanos;
+  // Time spent on seeking a key in data/index blocks
+  uint64_t block_seek_nanos;
+  // Time spent on finding or creating a table reader
+  uint64_t find_table_nanos;
+  // total number of mem table bloom hits
+  uint64_t bloom_memtable_hit_count;
+  // total number of mem table bloom misses
+  uint64_t bloom_memtable_miss_count;
+  // total number of SST bloom hits
+  uint64_t bloom_sst_hit_count;
+  // total number of SST bloom misses
+  uint64_t bloom_sst_miss_count;
+
+  // Time spent waiting on key locks in transaction lock manager.
+  uint64_t key_lock_wait_time;
+  // number of times acquiring a lock was blocked by another transaction.
+  uint64_t key_lock_wait_count;
+
+  // Total time spent in Env filesystem operations. These are only populated
+  // when TimedEnv is used.
+  uint64_t env_new_sequential_file_nanos;
+  uint64_t env_new_random_access_file_nanos;
+  uint64_t env_new_writable_file_nanos;
+  uint64_t env_reuse_writable_file_nanos;
+  uint64_t env_new_random_rw_file_nanos;
+  uint64_t env_new_directory_nanos;
+  uint64_t env_file_exists_nanos;
+  uint64_t env_get_children_nanos;
+  uint64_t env_get_children_file_attributes_nanos;
+  uint64_t env_delete_file_nanos;
+  uint64_t env_create_dir_nanos;
+  uint64_t env_create_dir_if_missing_nanos;
+  uint64_t env_delete_dir_nanos;
+  uint64_t env_get_file_size_nanos;
+  uint64_t env_get_file_modification_time_nanos;
+  uint64_t env_rename_file_nanos;
+  uint64_t env_link_file_nanos;
+  uint64_t env_lock_file_nanos;
+  uint64_t env_unlock_file_nanos;
+  uint64_t env_new_logger_nanos;
+
+  uint64_t get_cpu_nanos;
+  uint64_t iter_next_cpu_nanos;
+  uint64_t iter_prev_cpu_nanos;
+  uint64_t iter_seek_cpu_nanos;
+
+  // EXPERIMENTAL
+  // Total number of db iterator's Next(), Prev(), Seek-related APIs being
+  // called
+  uint64_t iter_next_count;
+  uint64_t iter_prev_count;
+  uint64_t iter_seek_count;
+
+  // Time spent in encrypting data. Populated when EncryptedEnv is used.
+  uint64_t encrypt_data_nanos;
+  // Time spent in decrypting data. Populated when EncryptedEnv is used.
+  uint64_t decrypt_data_nanos;
+
+  uint64_t number_async_seek;
+};
+
+struct PerfContext : public PerfContextBase {
+  ~PerfContext();
+
+  PerfContext() {}
+
+  PerfContext(const PerfContext&);
+  PerfContext& operator=(const PerfContext&);
+  PerfContext(PerfContext&&) noexcept;
+
+  void Reset();  // reset all performance counters to zero
+
+  std::string ToString(bool exclude_zero_counters = false) const;
+
+  // enable per level perf context and allocate storage for PerfContextByLevel
+  void EnablePerLevelPerfContext();
+
+  // temporarily disable per level perf context by setting the flag to false
+  void DisablePerLevelPerfContext();
+
+  // free the space for PerfContextByLevel, also disable per level perf context
+  void ClearPerLevelPerfContext();
+
+  std::map<uint32_t, PerfContextByLevel>* level_to_perf_context = nullptr;
+  bool per_level_perf_context_enabled = false;
+
+  void copyMetrics(const PerfContext* other) noexcept;
+};
+
+// If RocksDB is compiled with -DNPERF_CONTEXT, then a pointer to a global,
+// non-thread-local PerfContext object will be returned. Attempts to update
+// this object will be ignored, and reading from it will also be no-op.
+// Otherwise,
+// a) if thread-local is supported on the platform, then a pointer to
+//    a thread-local PerfContext object will be returned.
+// b) if thread-local is NOT supported, then compilation will fail.
+//
+// This function never returns nullptr.
+PerfContext* get_perf_context();
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/perf_level.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/perf_level.h
new file mode 100644
index 0000000000..e7dded0e32
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/perf_level.h
@@ -0,0 +1,36 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <stdint.h>
+
+#include <string>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// How much perf stats to collect. Affects perf_context and iostats_context.
+enum PerfLevel : unsigned char {
+  kUninitialized = 0,             // unknown setting
+  kDisable = 1,                   // disable perf stats
+  kEnableCount = 2,               // enable only count stats
+  kEnableTimeExceptForMutex = 3,  // Other than count stats, also enable time
+                                  // stats except for mutexes
+  // Other than time, also measure CPU time counters. Still don't measure
+  // time (neither wall time nor CPU time) for mutexes.
+  kEnableTimeAndCPUTimeExceptForMutex = 4,
+  kEnableTime = 5,  // enable count and time stats
+  kOutOfBounds = 6  // N.B. Must always be the last value!
+};
+
+// set the perf stats level for current thread
+void SetPerfLevel(PerfLevel level);
+
+// get current perf stats level for current thread
+PerfLevel GetPerfLevel();
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/persistent_cache.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/persistent_cache.h
new file mode 100644
index 0000000000..f14f019993
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/persistent_cache.h
@@ -0,0 +1,74 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+
+#include "rocksdb/env.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// PersistentCache
+//
+// Persistent cache interface for caching IO pages on a persistent medium. The
+// cache interface is specifically designed for persistent read cache.
+class PersistentCache {
+ public:
+  using StatsType = std::vector<std::map<std::string, double>>;
+
+  virtual ~PersistentCache() {}
+
+  // Insert to page cache
+  //
+  // page_key   Identifier to identify a page uniquely across restarts
+  // data       Page data to copy (caller retains ownership)
+  // size       Size of the page
+  virtual Status Insert(const Slice& key, const char* data,
+                        const size_t size) = 0;
+
+  // Lookup page cache by page identifier
+  //
+  // page_key   Page identifier
+  // buf        Buffer where the data should be copied
+  // size       Size of the page
+  virtual Status Lookup(const Slice& key, std::unique_ptr<char[]>* data,
+                        size_t* size) = 0;
+
+  // True if the cache is configured to store serialized blocks, which are
+  // potentially compressed and include a trailer (when SST format calls for
+  // one). False if the cache stores uncompressed blocks (no trailer).
+  virtual bool IsCompressed() = 0;
+
+  // Return stats as map of {string, double} per-tier
+  //
+  // Persistent cache can be initialized as a tier of caches. The stats are per
+  // tire top-down
+  virtual StatsType Stats() = 0;
+
+  virtual std::string GetPrintableOptions() const = 0;
+
+  // Return a new numeric id.  May be used by multiple clients who are
+  // sharding the same persistent cache to partition the key space.  Typically
+  // the client will allocate a new id at startup and prepend the id to its
+  // cache keys.
+  virtual uint64_t NewId() = 0;
+};
+
+// Factor method to create a new persistent cache
+Status NewPersistentCache(Env* const env, const std::string& path,
+                          const uint64_t size,
+                          const std::shared_ptr<Logger>& log,
+                          const bool optimized_for_nvm,
+                          std::shared_ptr<PersistentCache>* cache);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/port_defs.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/port_defs.h
new file mode 100644
index 0000000000..9771aacb92
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/port_defs.h
@@ -0,0 +1,22 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file includes the common definitions used in the port/,
+// the public API (this directory), and other directories
+
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+enum class CpuPriority {
+  kIdle = 0,
+  kLow = 1,
+  kNormal = 2,
+  kHigh = 3,
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/rate_limiter.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/rate_limiter.h
new file mode 100644
index 0000000000..9cad6edf4a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/rate_limiter.h
@@ -0,0 +1,159 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "rocksdb/env.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class RateLimiter {
+ public:
+  enum class OpType {
+    kRead,
+    kWrite,
+  };
+
+  enum class Mode {
+    kReadsOnly,
+    kWritesOnly,
+    kAllIo,
+  };
+
+  // For API compatibility, default to rate-limiting writes only.
+  explicit RateLimiter(Mode mode = Mode::kWritesOnly) : mode_(mode) {}
+
+  virtual ~RateLimiter() {}
+
+  // This API allows user to dynamically change rate limiter's bytes per second.
+  // REQUIRED: bytes_per_second > 0
+  virtual void SetBytesPerSecond(int64_t bytes_per_second) = 0;
+
+  // Deprecated. New RateLimiter derived classes should override
+  // Request(const int64_t, const Env::IOPriority, Statistics*) or
+  // Request(const int64_t, const Env::IOPriority, Statistics*, OpType)
+  // instead.
+  //
+  // Request for token for bytes. If this request can not be satisfied, the call
+  // is blocked. Caller is responsible to make sure
+  // bytes <= GetSingleBurstBytes()
+  // and bytes >= 0.
+  virtual void Request(const int64_t /*bytes*/, const Env::IOPriority /*pri*/) {
+    assert(false);
+  }
+
+  // Request for token for bytes and potentially update statistics. If this
+  // request can not be satisfied, the call is blocked. Caller is responsible to
+  // make sure bytes <= GetSingleBurstBytes()
+  // and bytes >= 0.
+  virtual void Request(const int64_t bytes, const Env::IOPriority pri,
+                       Statistics* /* stats */) {
+    // For API compatibility, default implementation calls the older API in
+    // which statistics are unsupported.
+    Request(bytes, pri);
+  }
+
+  // Requests token to read or write bytes and potentially updates statistics.
+  //
+  // If this request can not be satisfied, the call is blocked. Caller is
+  // responsible to make sure bytes <= GetSingleBurstBytes()
+  // and bytes >= 0.
+  virtual void Request(const int64_t bytes, const Env::IOPriority pri,
+                       Statistics* stats, OpType op_type) {
+    if (IsRateLimited(op_type)) {
+      Request(bytes, pri, stats);
+    }
+  }
+
+  // Requests token to read or write bytes and potentially updates statistics.
+  // Takes into account GetSingleBurstBytes() and alignment (e.g., in case of
+  // direct I/O) to allocate an appropriate number of bytes, which may be less
+  // than the number of bytes requested.
+  virtual size_t RequestToken(size_t bytes, size_t alignment,
+                              Env::IOPriority io_priority, Statistics* stats,
+                              RateLimiter::OpType op_type);
+
+  // Max bytes can be granted in a single burst
+  virtual int64_t GetSingleBurstBytes() const = 0;
+
+  // Total bytes that go through rate limiter
+  virtual int64_t GetTotalBytesThrough(
+      const Env::IOPriority pri = Env::IO_TOTAL) const = 0;
+
+  // Total # of requests that go through rate limiter
+  virtual int64_t GetTotalRequests(
+      const Env::IOPriority pri = Env::IO_TOTAL) const = 0;
+
+  // Total # of requests that are pending for bytes in rate limiter
+  // For convenience, this function is supported by the RateLimiter returned
+  // by NewGenericRateLimiter but is not required by RocksDB.
+  //
+  // REQUIRED: total_pending_request != nullptr
+  virtual Status GetTotalPendingRequests(
+      int64_t* total_pending_requests,
+      const Env::IOPriority pri = Env::IO_TOTAL) const {
+    assert(total_pending_requests != nullptr);
+    (void)total_pending_requests;
+    (void)pri;
+    return Status::NotSupported();
+  }
+
+  virtual int64_t GetBytesPerSecond() const = 0;
+
+  virtual bool IsRateLimited(OpType op_type) {
+    if ((mode_ == RateLimiter::Mode::kWritesOnly &&
+         op_type == RateLimiter::OpType::kRead) ||
+        (mode_ == RateLimiter::Mode::kReadsOnly &&
+         op_type == RateLimiter::OpType::kWrite)) {
+      return false;
+    }
+    return true;
+  }
+
+ protected:
+  Mode GetMode() { return mode_; }
+
+ private:
+  const Mode mode_;
+};
+
+// Create a RateLimiter object, which can be shared among RocksDB instances to
+// control write rate of flush and compaction.
+// @rate_bytes_per_sec: this is the only parameter you want to set most of the
+// time. It controls the total write rate of compaction and flush in bytes per
+// second. Currently, RocksDB does not enforce rate limit for anything other
+// than flush and compaction, e.g. write to WAL.
+// @refill_period_us: this controls how often tokens are refilled. For example,
+// when rate_bytes_per_sec is set to 10MB/s and refill_period_us is set to
+// 100ms, then 1MB is refilled every 100ms internally. Larger value can lead to
+// burstier writes while smaller value introduces more CPU overhead.
+// The default should work for most cases.
+// @fairness: RateLimiter accepts high-pri requests and low-pri requests.
+// A low-pri request is usually blocked in favor of hi-pri request. Currently,
+// RocksDB assigns low-pri to request from compaction and high-pri to request
+// from flush. Low-pri requests can get blocked if flush requests come in
+// continuously. This fairness parameter grants low-pri requests permission by
+// 1/fairness chance even though high-pri requests exist to avoid starvation.
+// You should be good by leaving it at default 10.
+// @mode: Mode indicates which types of operations count against the limit.
+// @auto_tuned: Enables dynamic adjustment of rate limit within the range
+//              `[rate_bytes_per_sec / 20, rate_bytes_per_sec]`, according to
+//              the recent demand for background I/O.
+extern RateLimiter* NewGenericRateLimiter(
+    int64_t rate_bytes_per_sec, int64_t refill_period_us = 100 * 1000,
+    int32_t fairness = 10,
+    RateLimiter::Mode mode = RateLimiter::Mode::kWritesOnly,
+    bool auto_tuned = false);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/rocksdb_namespace.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/rocksdb_namespace.h
new file mode 100644
index 0000000000..a339ec2aa5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/rocksdb_namespace.h
@@ -0,0 +1,16 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+// For testing purposes
+#if ROCKSDB_NAMESPACE == 42
+#undef ROCKSDB_NAMESPACE
+#endif
+
+// Normal logic
+#ifndef ROCKSDB_NAMESPACE
+#define ROCKSDB_NAMESPACE rocksdb
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/secondary_cache.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/secondary_cache.h
new file mode 100644
index 0000000000..2aad50280c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/secondary_cache.h
@@ -0,0 +1,139 @@
+// Copyright (c) 2021, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+
+#include "rocksdb/advanced_cache.h"
+#include "rocksdb/customizable.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A handle for lookup result. Immediately after SecondaryCache::Lookup() with
+// wait=false (and depending on the implementation), the handle could be in any
+// of the below states. It must not be destroyed while in the pending state.
+// * Pending state (IsReady() == false): result is not ready. Value() and Size()
+// must not be called.
+// * Ready + not found state (IsReady() == true, Value() == nullptr): the lookup
+// has completed, finding no match. Or an error occurred that prevented
+// normal completion of the Lookup.
+// * Ready + found state (IsReady() == false, Value() != nullptr): the lookup
+// has completed, finding an entry that has been loaded into an object that is
+// now owned by the caller.
+//
+// Wait() or SecondaryCache::WaitAll() may be skipped if IsReady() happens to
+// return true, but (depending on the implementation) IsReady() might never
+// return true without Wait() or SecondaryCache::WaitAll(). After the handle
+// is known ready, calling Value() is required to avoid a memory leak in case
+// of a cache hit.
+class SecondaryCacheResultHandle {
+ public:
+  virtual ~SecondaryCacheResultHandle() = default;
+
+  // Returns whether the handle is ready or not
+  virtual bool IsReady() = 0;
+
+  // Block until handle becomes ready
+  virtual void Wait() = 0;
+
+  // Return the cache entry object (also known as value). If nullptr, it means
+  // the lookup was unsuccessful.
+  virtual Cache::ObjectPtr Value() = 0;
+
+  // Return the out_charge from the helper->create_cb used to construct the
+  // object.
+  // WART: potentially confusing name
+  virtual size_t Size() = 0;
+};
+
+// SecondaryCache
+//
+// Cache interface for caching blocks on a secondary tier (which can include
+// non-volatile media, or alternate forms of caching such as compressed data)
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class SecondaryCache : public Customizable {
+ public:
+  ~SecondaryCache() override = default;
+
+  static const char* Type() { return "SecondaryCache"; }
+  static Status CreateFromString(const ConfigOptions& config_options,
+                                 const std::string& id,
+                                 std::shared_ptr<SecondaryCache>* result);
+
+  // Suggest inserting an entry into this cache. The caller retains ownership
+  // of `obj` (also called the "value"), so is only used directly by the
+  // SecondaryCache during Insert(). When the cache chooses to perform the
+  // suggested insertion, it uses the size_cb and saveto_cb provided by
+  // `helper` to extract the persistable data (typically an uncompressed block)
+  // and writes it to this cache tier. OK may be returned even if the insertion
+  // is not made.
+  virtual Status Insert(const Slice& key, Cache::ObjectPtr obj,
+                        const Cache::CacheItemHelper* helper) = 0;
+
+  // Insert a value from its saved/persistable data (typically uncompressed
+  // block), as if generated by SaveToCallback/SizeCallback. This can be used
+  // in "warming up" the cache from some auxiliary source, and like Insert()
+  // may or may not write it to cache depending on the admission control
+  // policy, even if the return status is success.
+  //
+  // The default implementation only assumes the entry helper's create_cb is
+  // called at Lookup() time and not Insert() time, so should work for all
+  // foreseeable implementations.
+  virtual Status InsertSaved(const Slice& key, const Slice& saved);
+
+  // Lookup the data for the given key in this cache. The create_cb
+  // will be used to create the object. The handle returned may not be
+  // ready yet, unless wait=true, in which case Lookup() will block until
+  // the handle is ready.
+  //
+  // advise_erase is a hint from the primary cache indicating that the handle
+  // will be cached there, so the secondary cache is advised to drop it from
+  // the cache as an optimization. To use this feature, SupportForceErase()
+  // needs to return true.
+  // This hint can also be safely ignored.
+  //
+  // kept_in_sec_cache is to indicate whether the entry will be kept in the
+  // secondary cache after the Lookup (rather than erased because of Lookup)
+  virtual std::unique_ptr<SecondaryCacheResultHandle> Lookup(
+      const Slice& key, const Cache::CacheItemHelper* helper,
+      Cache::CreateContext* create_context, bool wait, bool advise_erase,
+      bool& kept_in_sec_cache) = 0;
+
+  // Indicate whether a handle can be erased in this secondary cache.
+  [[nodiscard]] virtual bool SupportForceErase() const = 0;
+
+  // At the discretion of the implementation, erase the data associated
+  // with key.
+  virtual void Erase(const Slice& key) = 0;
+
+  // Wait for a collection of handles to become ready.
+  virtual void WaitAll(std::vector<SecondaryCacheResultHandle*> handles) = 0;
+
+  // Set the maximum configured capacity of the cache.
+  // When the new capacity is less than the old capacity and the existing usage
+  // is greater than new capacity, the implementation will do its best job to
+  // purge the released entries from the cache in order to lower the usage.
+  //
+  // The derived class can make this function no-op and return NotSupported().
+  virtual Status SetCapacity(size_t /* capacity */) {
+    return Status::NotSupported();
+  }
+
+  // The derived class can make this function no-op and return NotSupported().
+  virtual Status GetCapacity(size_t& /* capacity */) {
+    return Status::NotSupported();
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/slice.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/slice.h
new file mode 100644
index 0000000000..0d7eb59499
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/slice.h
@@ -0,0 +1,264 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Slice is a simple structure containing a pointer into some external
+// storage and a size.  The user of a Slice must ensure that the slice
+// is not used after the corresponding external storage has been
+// deallocated.
+//
+// Multiple threads can invoke const methods on a Slice without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same Slice must use
+// external synchronization.
+
+#pragma once
+
+#include <cassert>
+#include <cstddef>
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <string_view>  // RocksDB now requires C++17 support
+
+#include "rocksdb/cleanable.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice {
+ public:
+  // Create an empty slice.
+  Slice() : data_(""), size_(0) {}
+
+  // Create a slice that refers to d[0,n-1].
+  Slice(const char* d, size_t n) : data_(d), size_(n) {}
+
+  // Create a slice that refers to the contents of "s"
+  /* implicit */
+  Slice(const std::string& s) : data_(s.data()), size_(s.size()) {}
+
+  // Create a slice that refers to the same contents as "sv"
+  /* implicit */
+  Slice(const std::string_view& sv) : data_(sv.data()), size_(sv.size()) {}
+
+  // Create a slice that refers to s[0,strlen(s)-1]
+  /* implicit */
+  Slice(const char* s) : data_(s) { size_ = (s == nullptr) ? 0 : strlen(s); }
+
+  // Create a single slice from SliceParts using buf as storage.
+  // buf must exist as long as the returned Slice exists.
+  Slice(const struct SliceParts& parts, std::string* buf);
+
+  // Return a pointer to the beginning of the referenced data
+  const char* data() const { return data_; }
+
+  // Return the length (in bytes) of the referenced data
+  size_t size() const { return size_; }
+
+  // Return true iff the length of the referenced data is zero
+  bool empty() const { return size_ == 0; }
+
+  // Return the ith byte in the referenced data.
+  // REQUIRES: n < size()
+  char operator[](size_t n) const {
+    assert(n < size());
+    return data_[n];
+  }
+
+  // Change this slice to refer to an empty array
+  void clear() {
+    data_ = "";
+    size_ = 0;
+  }
+
+  // Drop the first "n" bytes from this slice.
+  void remove_prefix(size_t n) {
+    assert(n <= size());
+    data_ += n;
+    size_ -= n;
+  }
+
+  void remove_suffix(size_t n) {
+    assert(n <= size());
+    size_ -= n;
+  }
+
+  // Return a string that contains the copy of the referenced data.
+  // when hex is true, returns a string of twice the length hex encoded (0-9A-F)
+  std::string ToString(bool hex = false) const;
+
+  // Return a string_view that references the same data as this slice.
+  std::string_view ToStringView() const {
+    return std::string_view(data_, size_);
+  }
+
+  // Decodes the current slice interpreted as an hexadecimal string into result,
+  // if successful returns true, if this isn't a valid hex string
+  // (e.g not coming from Slice::ToString(true)) DecodeHex returns false.
+  // This slice is expected to have an even number of 0-9A-F characters
+  // also accepts lowercase (a-f)
+  bool DecodeHex(std::string* result) const;
+
+  // Three-way comparison.  Returns value:
+  //   <  0 iff "*this" <  "b",
+  //   == 0 iff "*this" == "b",
+  //   >  0 iff "*this" >  "b"
+  int compare(const Slice& b) const;
+
+  // Return true iff "x" is a prefix of "*this"
+  bool starts_with(const Slice& x) const {
+    return ((size_ >= x.size_) && (memcmp(data_, x.data_, x.size_) == 0));
+  }
+
+  bool ends_with(const Slice& x) const {
+    return ((size_ >= x.size_) &&
+            (memcmp(data_ + size_ - x.size_, x.data_, x.size_) == 0));
+  }
+
+  // Compare two slices and returns the first byte where they differ
+  size_t difference_offset(const Slice& b) const;
+
+  // private: make these public for rocksdbjni access
+  const char* data_;
+  size_t size_;
+
+  // Intentionally copyable
+};
+
+/**
+ * A Slice that can be pinned with some cleanup tasks, which will be run upon
+ * ::Reset() or object destruction, whichever is invoked first. This can be used
+ * to avoid memcpy by having the PinnableSlice object referring to the data
+ * that is locked in the memory and release them after the data is consumed.
+ */
+class PinnableSlice : public Slice, public Cleanable {
+ public:
+  PinnableSlice() { buf_ = &self_space_; }
+  explicit PinnableSlice(std::string* buf) { buf_ = buf; }
+
+  PinnableSlice(PinnableSlice&& other);
+  PinnableSlice& operator=(PinnableSlice&& other);
+
+  // No copy constructor and copy assignment allowed.
+  PinnableSlice(PinnableSlice&) = delete;
+  PinnableSlice& operator=(PinnableSlice&) = delete;
+
+  inline void PinSlice(const Slice& s, CleanupFunction f, void* arg1,
+                       void* arg2) {
+    assert(!pinned_);
+    pinned_ = true;
+    data_ = s.data();
+    size_ = s.size();
+    RegisterCleanup(f, arg1, arg2);
+    assert(pinned_);
+  }
+
+  inline void PinSlice(const Slice& s, Cleanable* cleanable) {
+    assert(!pinned_);
+    pinned_ = true;
+    data_ = s.data();
+    size_ = s.size();
+    if (cleanable != nullptr) {
+      cleanable->DelegateCleanupsTo(this);
+    }
+    assert(pinned_);
+  }
+
+  inline void PinSelf(const Slice& slice) {
+    assert(!pinned_);
+    buf_->assign(slice.data(), slice.size());
+    data_ = buf_->data();
+    size_ = buf_->size();
+    assert(!pinned_);
+  }
+
+  inline void PinSelf() {
+    assert(!pinned_);
+    data_ = buf_->data();
+    size_ = buf_->size();
+    assert(!pinned_);
+  }
+
+  void remove_suffix(size_t n) {
+    assert(n <= size());
+    if (pinned_) {
+      size_ -= n;
+    } else {
+      buf_->erase(size() - n, n);
+      PinSelf();
+    }
+  }
+
+  void remove_prefix(size_t n) {
+    assert(n <= size());
+    if (pinned_) {
+      data_ += n;
+      size_ -= n;
+    } else {
+      buf_->erase(0, n);
+      PinSelf();
+    }
+  }
+
+  void Reset() {
+    Cleanable::Reset();
+    pinned_ = false;
+    size_ = 0;
+  }
+
+  inline std::string* GetSelf() { return buf_; }
+
+  inline bool IsPinned() const { return pinned_; }
+
+ private:
+  friend class PinnableSlice4Test;
+  std::string self_space_;
+  std::string* buf_;
+  bool pinned_ = false;
+};
+
+// A set of Slices that are virtually concatenated together.  'parts' points
+// to an array of Slices.  The number of elements in the array is 'num_parts'.
+struct SliceParts {
+  SliceParts(const Slice* _parts, int _num_parts)
+      : parts(_parts), num_parts(_num_parts) {}
+  SliceParts() : parts(nullptr), num_parts(0) {}
+
+  const Slice* parts;
+  int num_parts;
+};
+
+inline bool operator==(const Slice& x, const Slice& y) {
+  return ((x.size() == y.size()) &&
+          (memcmp(x.data(), y.data(), x.size()) == 0));
+}
+
+inline bool operator!=(const Slice& x, const Slice& y) { return !(x == y); }
+
+inline int Slice::compare(const Slice& b) const {
+  assert(data_ != nullptr && b.data_ != nullptr);
+  const size_t min_len = (size_ < b.size_) ? size_ : b.size_;
+  int r = memcmp(data_, b.data_, min_len);
+  if (r == 0) {
+    if (size_ < b.size_)
+      r = -1;
+    else if (size_ > b.size_)
+      r = +1;
+  }
+  return r;
+}
+
+inline size_t Slice::difference_offset(const Slice& b) const {
+  size_t off = 0;
+  const size_t len = (size_ < b.size_) ? size_ : b.size_;
+  for (; off < len; off++) {
+    if (data_[off] != b.data_[off]) break;
+  }
+  return off;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/slice_transform.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/slice_transform.h
new file mode 100644
index 0000000000..8909b9c539
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/slice_transform.h
@@ -0,0 +1,135 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Class for specifying user-defined functions which perform a
+// transformation on a slice.  It is not required that every slice
+// belong to the domain and/or range of a function.  Subclasses should
+// define InDomain and InRange to determine which slices are in either
+// of these sets respectively.
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+struct ConfigOptions;
+
+// A SliceTransform is a generic pluggable way of transforming one string
+// to another. Its primary use-case is in configuring RocksDB prefix Bloom
+// filters, by setting prefix_extractor in ColumnFamilyOptions.
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class SliceTransform : public Customizable {
+ public:
+  virtual ~SliceTransform(){};
+
+  // Return the name of this transformation.
+  virtual const char* Name() const override = 0;
+  static const char* Type() { return "SliceTransform"; }
+
+  // Creates and configures a new SliceTransform from the input options and id.
+  static Status CreateFromString(const ConfigOptions& config_options,
+                                 const std::string& id,
+                                 std::shared_ptr<const SliceTransform>* result);
+
+  // Returns a string representation of this SliceTransform, representing the ID
+  // and any additional properties.
+  std::string AsString() const;
+
+  // Extract a prefix from a specified key, partial key, iterator upper bound,
+  // etc. This is normally used for building and checking prefix Bloom filters
+  // but should accept any string for which InDomain() returns true.
+  // See ColumnFamilyOptions::prefix_extractor for specific properties that
+  // must be satisfied by prefix extractors.
+  virtual Slice Transform(const Slice& key) const = 0;
+
+  // Determine whether the specified key is compatible with the logic
+  // specified in the Transform method. Keys for which InDomain returns
+  // false will not be added to or queried against prefix Bloom filters.
+  //
+  // For example, if the Transform method returns a fixed length
+  // prefix of size 4, then an invocation to InDomain("abc") returns
+  // false because the specified key length(3) is shorter than the
+  // prefix size of 4.
+  //
+  // Wiki documentation here:
+  // https://github.com/facebook/rocksdb/wiki/Prefix-Seek
+  //
+  virtual bool InDomain(const Slice& key) const = 0;
+
+  // DEPRECATED: This is currently not used and remains here for backward
+  // compatibility.
+  virtual bool InRange(const Slice& /*dst*/) const { return false; }
+
+  // Returns information on maximum prefix length, if there is one.
+  // If Transform(x).size() == n for some keys and otherwise < n,
+  // should return true and set *len = n. Returning false is safe but
+  // currently disables some auto_prefix_mode filtering.
+  // Specifically, if the iterate_upper_bound is the immediate successor (see
+  // Comparator::IsSameLengthImmediateSuccessor) of the seek key's prefix,
+  // we require this function return true and iterate_upper_bound.size() == n
+  // to recognize and optimize the prefix seek.
+  // Otherwise (including FullLengthEnabled returns false, or prefix length is
+  // less than maximum), Seek with auto_prefix_mode is only optimized if the
+  // iterate_upper_bound and seek key have the same prefix.
+  // BUG: Despite all these conditions and even with the extra condition on
+  // IsSameLengthImmediateSuccessor (see it's "BUG" section), it is not
+  // sufficient to ensure auto_prefix_mode returns all entries that
+  // total_order_seek would return. See auto_prefix_mode "BUG" section.
+  virtual bool FullLengthEnabled(size_t* /*len*/) const { return false; }
+
+  // Transform(s)=Transform(`prefix`) for any s with `prefix` as a prefix.
+  //
+  // This function is not used by RocksDB, but for users. If users pass
+  // Options by string to RocksDB, they might not know what prefix extractor
+  // they are using. This function is to help users can determine:
+  //   if they want to iterate all keys prefixing `prefix`, whether it is
+  //   safe to use prefix bloom filter and seek to key `prefix`.
+  // If this function returns true, this means a user can Seek() to a prefix
+  // using the bloom filter. Otherwise, user needs to skip the bloom filter
+  // by setting ReadOptions.total_order_seek = true.
+  //
+  // Here is an example: Suppose we implement a slice transform that returns
+  // the first part of the string up to and including first ",":
+  // 1. SameResultWhenAppended("abc,") should return true. If applying prefix
+  //    bloom filter using it, all slices matching "abc,.*" will be extracted
+  //    to "abc,", so any SST file or memtable containing any of those key
+  //    will not be filtered out.
+  // 2. SameResultWhenAppended("abc") should return false. A user will not be
+  //    guaranteed to see all the keys matching "abc.*" if a user prefix
+  //    seeks to "abc" against a DB with the same setting. If one SST file
+  //    only contains "abcd,e", the file can be filtered out and the key will
+  //    be invisible, because the prefix according to the configured extractor
+  //    is "abcd,".
+  //
+  // i.e., an implementation always returning false is safe.
+  virtual bool SameResultWhenAppended(const Slice& /*prefix*/) const {
+    return false;
+  }
+};
+
+// The prefix is the first `prefix_len` bytes of the key, and keys shorter
+// then `prefix_len` are not InDomain.
+extern const SliceTransform* NewFixedPrefixTransform(size_t prefix_len);
+
+// The prefix is the first min(length(key),`cap_len`) bytes of the key, and
+// all keys are InDomain.
+extern const SliceTransform* NewCappedPrefixTransform(size_t cap_len);
+
+// Prefix is equal to key. All keys are InDomain.
+extern const SliceTransform* NewNoopTransform();
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/snapshot.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/snapshot.h
new file mode 100644
index 0000000000..1ea56e71e0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/snapshot.h
@@ -0,0 +1,53 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class DB;
+
+// Abstract handle to particular state of a DB.
+// A Snapshot is an immutable object and can therefore be safely
+// accessed from multiple threads without any external synchronization.
+//
+// To Create a Snapshot, call DB::GetSnapshot().
+// To Destroy a Snapshot, call DB::ReleaseSnapshot(snapshot).
+class Snapshot {
+ public:
+  virtual SequenceNumber GetSequenceNumber() const = 0;
+
+  // Returns unix time i.e. the number of seconds since the Epoch, 1970-01-01
+  // 00:00:00 (UTC).
+  virtual int64_t GetUnixTime() const = 0;
+
+  virtual uint64_t GetTimestamp() const = 0;
+
+ protected:
+  virtual ~Snapshot();
+};
+
+// Simple RAII wrapper class for Snapshot.
+// Constructing this object will create a snapshot.  Destructing will
+// release the snapshot.
+class ManagedSnapshot {
+ public:
+  explicit ManagedSnapshot(DB* db);
+
+  // Instead of creating a snapshot, take ownership of the input snapshot.
+  ManagedSnapshot(DB* db, const Snapshot* _snapshot);
+
+  ~ManagedSnapshot();
+
+  const Snapshot* snapshot();
+
+ private:
+  DB* db_;
+  const Snapshot* snapshot_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_dump_tool.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_dump_tool.h
new file mode 100644
index 0000000000..0b81833f7d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_dump_tool.h
@@ -0,0 +1,17 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include "rocksdb/options.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class SSTDumpTool {
+ public:
+  int Run(int argc, char const* const* argv, Options options = Options());
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_file_manager.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_file_manager.h
new file mode 100644
index 0000000000..6132921512
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_file_manager.h
@@ -0,0 +1,136 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "rocksdb/file_system.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Env;
+class Logger;
+
+// SstFileManager is used to track SST and blob files in the DB and control
+// their deletion rate. All SstFileManager public functions are thread-safe.
+// SstFileManager is NOT an extensible interface but a public interface for
+// result of NewSstFileManager. Any derived classes must be RocksDB internal.
+class SstFileManager {
+ public:
+  virtual ~SstFileManager() {}
+
+  // Update the maximum allowed space that should be used by RocksDB, if
+  // the total size of the SST and blob files exceeds max_allowed_space, writes
+  // to RocksDB will fail.
+  //
+  // Setting max_allowed_space to 0 will disable this feature; maximum allowed
+  // space will be infinite (Default value).
+  //
+  // thread-safe.
+  virtual void SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) = 0;
+
+  // Set the amount of buffer room each compaction should be able to leave.
+  // In other words, at its maximum disk space consumption, the compaction
+  // should still leave compaction_buffer_size available on the disk so that
+  // other background functions may continue, such as logging and flushing.
+  virtual void SetCompactionBufferSize(uint64_t compaction_buffer_size) = 0;
+
+  // Return true if the total size of SST  and blob files exceeded the maximum
+  // allowed space usage.
+  //
+  // thread-safe.
+  virtual bool IsMaxAllowedSpaceReached() = 0;
+
+  // Returns true if the total size of SST and blob files as well as estimated
+  // size of ongoing compactions exceeds the maximums allowed space usage.
+  virtual bool IsMaxAllowedSpaceReachedIncludingCompactions() = 0;
+
+  // Return the total size of all tracked files.
+  // thread-safe
+  virtual uint64_t GetTotalSize() = 0;
+
+  // Return a map containing all tracked files and their corresponding sizes.
+  // thread-safe
+  virtual std::unordered_map<std::string, uint64_t> GetTrackedFiles() = 0;
+
+  // Return delete rate limit in bytes per second.
+  // thread-safe
+  virtual int64_t GetDeleteRateBytesPerSecond() = 0;
+
+  // Update the delete rate limit in bytes per second.
+  // zero means disable delete rate limiting and delete files immediately
+  // thread-safe
+  virtual void SetDeleteRateBytesPerSecond(int64_t delete_rate) = 0;
+
+  // Return trash/DB size ratio where new files will be deleted immediately
+  // thread-safe
+  virtual double GetMaxTrashDBRatio() = 0;
+
+  // Update trash/DB size ratio where new files will be deleted immediately
+  // thread-safe
+  virtual void SetMaxTrashDBRatio(double ratio) = 0;
+
+  // Return the total size of trash files
+  // thread-safe
+  virtual uint64_t GetTotalTrashSize() = 0;
+
+  // Set the statistics ptr to dump the stat information
+  virtual void SetStatisticsPtr(const std::shared_ptr<Statistics>& stats) = 0;
+};
+
+// Create a new SstFileManager that can be shared among multiple RocksDB
+// instances to track SST and blob files and control there deletion rate.
+// Even though SstFileManager don't track WAL files but it still control
+// there deletion rate.
+//
+// @param env: Pointer to Env object, please see "rocksdb/env.h".
+// @param fs: Pointer to FileSystem object (rocksdb/file_system.h"
+// @param info_log: If not nullptr, info_log will be used to log errors.
+//
+// == Deletion rate limiting specific arguments ==
+// @param trash_dir: Deprecated, this argument have no effect
+// @param rate_bytes_per_sec: How many bytes should be deleted per second, If
+//    this value is set to 1024 (1 Kb / sec) and we deleted a file of size 4 Kb
+//    in 1 second, we will wait for another 3 seconds before we delete other
+//    files, Set to 0 to disable deletion rate limiting.
+//    This option also affect the delete rate of WAL files in the DB.
+// @param delete_existing_trash: Deprecated, this argument have no effect, but
+//    if user provide trash_dir we will schedule deletes for files in the dir
+// @param status: If not nullptr, status will contain any errors that happened
+//    during creating the missing trash_dir or deleting existing files in trash.
+// @param max_trash_db_ratio: If the trash size constitutes for more than this
+//    fraction of the total DB size we will start deleting new files passed to
+//    DeleteScheduler immediately
+// @param bytes_max_delete_chunk: if a file to delete is larger than delete
+//    chunk, ftruncate the file by this size each time, rather than dropping the
+//    whole file. 0 means to always delete the whole file. If the file has more
+//    than one linked names, the file will be deleted as a whole. Either way,
+//    `rate_bytes_per_sec` will be appreciated. NOTE that with this option,
+//    files already renamed as a trash may be partial, so users should not
+//    directly recover them without checking.
+extern SstFileManager* NewSstFileManager(
+    Env* env, std::shared_ptr<FileSystem> fs,
+    std::shared_ptr<Logger> info_log = nullptr,
+    const std::string& trash_dir = "", int64_t rate_bytes_per_sec = 0,
+    bool delete_existing_trash = true, Status* status = nullptr,
+    double max_trash_db_ratio = 0.25,
+    uint64_t bytes_max_delete_chunk = 64 * 1024 * 1024);
+
+// Same as above, but takes a pointer to a legacy Env object, instead of
+// Env and FileSystem objects
+extern SstFileManager* NewSstFileManager(
+    Env* env, std::shared_ptr<Logger> info_log = nullptr,
+    std::string trash_dir = "", int64_t rate_bytes_per_sec = 0,
+    bool delete_existing_trash = true, Status* status = nullptr,
+    double max_trash_db_ratio = 0.25,
+    uint64_t bytes_max_delete_chunk = 64 * 1024 * 1024);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_file_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_file_reader.h
new file mode 100644
index 0000000000..026ae66d03
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_file_reader.h
@@ -0,0 +1,45 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include "rocksdb/iterator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/table_properties.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// SstFileReader is used to read sst files that are generated by DB or
+// SstFileWriter.
+class SstFileReader {
+ public:
+  SstFileReader(const Options& options);
+
+  ~SstFileReader();
+
+  // Prepares to read from the file located at "file_path".
+  Status Open(const std::string& file_path);
+
+  // Returns a new iterator over the table contents.
+  // Most read options provide the same control as we read from DB.
+  // If "snapshot" is nullptr, the iterator returns only the latest keys.
+  Iterator* NewIterator(const ReadOptions& options);
+
+  std::shared_ptr<const TableProperties> GetTableProperties() const;
+
+  // Verifies whether there is corruption in this table.
+  Status VerifyChecksum(const ReadOptions& /*read_options*/);
+
+  Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); }
+
+ private:
+  struct Rep;
+  std::unique_ptr<Rep> rep_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_file_writer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_file_writer.h
new file mode 100644
index 0000000000..fb2806865f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_file_writer.h
@@ -0,0 +1,189 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <memory>
+#include <string>
+
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table_properties.h"
+#include "rocksdb/types.h"
+
+#if defined(__GNUC__) || defined(__clang__)
+#define ROCKSDB_DEPRECATED_FUNC __attribute__((__deprecated__))
+#elif _WIN32
+#define ROCKSDB_DEPRECATED_FUNC __declspec(deprecated)
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+class Comparator;
+
+// ExternalSstFileInfo include information about sst files created
+// using SstFileWriter.
+struct ExternalSstFileInfo {
+  ExternalSstFileInfo()
+      : file_path(""),
+        smallest_key(""),
+        largest_key(""),
+        smallest_range_del_key(""),
+        largest_range_del_key(""),
+        file_checksum(""),
+        file_checksum_func_name(""),
+        sequence_number(0),
+        file_size(0),
+        num_entries(0),
+        num_range_del_entries(0),
+        version(0) {}
+
+  ExternalSstFileInfo(const std::string& _file_path,
+                      const std::string& _smallest_key,
+                      const std::string& _largest_key,
+                      SequenceNumber _sequence_number, uint64_t _file_size,
+                      int32_t _num_entries, int32_t _version)
+      : file_path(_file_path),
+        smallest_key(_smallest_key),
+        largest_key(_largest_key),
+        smallest_range_del_key(""),
+        largest_range_del_key(""),
+        file_checksum(""),
+        file_checksum_func_name(""),
+        sequence_number(_sequence_number),
+        file_size(_file_size),
+        num_entries(_num_entries),
+        num_range_del_entries(0),
+        version(_version) {}
+
+  std::string file_path;     // external sst file path
+  std::string smallest_key;  // smallest user key in file
+  std::string largest_key;   // largest user key in file
+  std::string
+      smallest_range_del_key;  // smallest range deletion user key in file
+  std::string largest_range_del_key;  // largest range deletion user key in file
+  std::string file_checksum;          // sst file checksum;
+  std::string file_checksum_func_name;  // The name of file checksum function
+  SequenceNumber sequence_number;       // sequence number of all keys in file
+  uint64_t file_size;                   // file size in bytes
+  uint64_t num_entries;                 // number of entries in file
+  uint64_t num_range_del_entries;  // number of range deletion entries in file
+  int32_t version;                 // file version
+};
+
+// SstFileWriter is used to create sst files that can be added to database later
+// All keys in files generated by SstFileWriter will have sequence number = 0.
+class SstFileWriter {
+ public:
+  // User can pass `column_family` to specify that the generated file will
+  // be ingested into this column_family, note that passing nullptr means that
+  // the column_family is unknown.
+  // If invalidate_page_cache is set to true, SstFileWriter will give the OS a
+  // hint that this file pages is not needed every time we write 1MB to the
+  // file. To use the rate limiter an io_priority smaller than IO_TOTAL can be
+  // passed.
+  // The `skip_filters` option is DEPRECATED and could be removed in the
+  // future. Use `BlockBasedTableOptions::filter_policy` to control filter
+  // generation.
+  SstFileWriter(const EnvOptions& env_options, const Options& options,
+                ColumnFamilyHandle* column_family = nullptr,
+                bool invalidate_page_cache = true,
+                Env::IOPriority io_priority = Env::IOPriority::IO_TOTAL,
+                bool skip_filters = false)
+      : SstFileWriter(env_options, options, options.comparator, column_family,
+                      invalidate_page_cache, io_priority, skip_filters) {}
+
+  // Deprecated API
+  SstFileWriter(const EnvOptions& env_options, const Options& options,
+                const Comparator* user_comparator,
+                ColumnFamilyHandle* column_family = nullptr,
+                bool invalidate_page_cache = true,
+                Env::IOPriority io_priority = Env::IOPriority::IO_TOTAL,
+                bool skip_filters = false);
+
+  ~SstFileWriter();
+
+  // Prepare SstFileWriter to write into file located at "file_path".
+  Status Open(const std::string& file_path);
+
+  // Add a Put key with value to currently opened file (deprecated)
+  // REQUIRES: user_key is after any previously added point (Put/Merge/Delete)
+  //           key according to the comparator.
+  // REQUIRES: comparator is *not* timestamp-aware.
+  ROCKSDB_DEPRECATED_FUNC Status Add(const Slice& user_key, const Slice& value);
+
+  // Add a Put key with value to currently opened file
+  // REQUIRES: user_key is after any previously added point (Put/Merge/Delete)
+  //           key according to the comparator.
+  // REQUIRES: comparator is *not* timestamp-aware.
+  Status Put(const Slice& user_key, const Slice& value);
+
+  // Add a Put (key with timestamp, value) to the currently opened file
+  // REQUIRES: user_key is after any previously added point (Put/Merge/Delete)
+  //           key according to the comparator.
+  // REQUIRES: timestamp's size is equal to what is expected by the comparator.
+  Status Put(const Slice& user_key, const Slice& timestamp, const Slice& value);
+
+  // Add a Merge key with value to currently opened file
+  // REQUIRES: user_key is after any previously added point (Put/Merge/Delete)
+  //           key according to the comparator.
+  // REQUIRES: comparator is *not* timestamp-aware.
+  Status Merge(const Slice& user_key, const Slice& value);
+
+  // Add a deletion key to currently opened file
+  // REQUIRES: user_key is after any previously added point (Put/Merge/Delete)
+  //           key according to the comparator.
+  // REQUIRES: comparator is *not* timestamp-aware.
+  Status Delete(const Slice& user_key);
+
+  // Add a deletion key with timestamp to the currently opened file
+  // REQUIRES: user_key is after any previously added point (Put/Merge/Delete)
+  //           key according to the comparator.
+  // REQUIRES: timestamp's size is equal to what is expected by the comparator.
+  Status Delete(const Slice& user_key, const Slice& timestamp);
+
+  // Add a range deletion tombstone to currently opened file. Such a range
+  // deletion tombstone does NOT delete point (Put/Merge/Delete) keys in the
+  // same file.
+  //
+  // Range deletion tombstones may be added in any order, both with respect to
+  // each other and with respect to the point (Put/Merge/Delete) keys in the
+  // same file.
+  //
+  // REQUIRES: The comparator orders `begin_key` at or before `end_key`
+  // REQUIRES: comparator is *not* timestamp-aware.
+  Status DeleteRange(const Slice& begin_key, const Slice& end_key);
+
+  // Add a range deletion tombstone to currently opened file. Such a range
+  // deletion tombstone does NOT delete point (Put/Merge/Delete) keys in the
+  // same file.
+  //
+  // Range deletion tombstones may be added in any order, both with respect to
+  // each other and with respect to the point (Put/Merge/Delete) keys in the
+  // same file.
+  //
+  // REQUIRES: begin_key and end_key are user keys without timestamp.
+  // REQUIRES: The comparator orders `begin_key` at or before `end_key`
+  // REQUIRES: timestamp's size is equal to what is expected by the comparator.
+  Status DeleteRange(const Slice& begin_key, const Slice& end_key,
+                     const Slice& timestamp);
+
+  // Finalize writing to sst file and close file.
+  //
+  // An optional ExternalSstFileInfo pointer can be passed to the function
+  // which will be populated with information about the created sst file.
+  Status Finish(ExternalSstFileInfo* file_info = nullptr);
+
+  // Return the current file size.
+  uint64_t FileSize();
+
+ private:
+  void InvalidatePageCache(bool closing);
+  struct Rep;
+  std::unique_ptr<Rep> rep_;
+};
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_partitioner.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_partitioner.h
new file mode 100644
index 0000000000..3af8e94929
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/sst_partitioner.h
@@ -0,0 +1,142 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+
+enum PartitionerResult : char {
+  // Partitioner does not require to create new file
+  kNotRequired = 0x0,
+  // Partitioner is requesting forcefully to create new file
+  kRequired = 0x1
+  // Additional constants can be added
+};
+
+struct PartitionerRequest {
+  PartitionerRequest(const Slice& prev_user_key_,
+                     const Slice& current_user_key_,
+                     uint64_t current_output_file_size_)
+      : prev_user_key(&prev_user_key_),
+        current_user_key(&current_user_key_),
+        current_output_file_size(current_output_file_size_) {}
+  const Slice* prev_user_key;
+  const Slice* current_user_key;
+  uint64_t current_output_file_size;
+};
+
+/*
+ * A SstPartitioner is a generic pluggable way of defining the partition
+ * of SST files. Compaction job will split the SST files on partition boundary
+ * to lower the write amplification during SST file promote to higher level.
+ */
+class SstPartitioner {
+ public:
+  virtual ~SstPartitioner() {}
+
+  // Return the name of this partitioner.
+  virtual const char* Name() const = 0;
+
+  // It is called for all keys in compaction. When partitioner want to create
+  // new SST file it needs to return true. It means compaction job will finish
+  // current SST file where last key is "prev_user_key" parameter and start new
+  // SST file where first key is "current_user_key". Returns decision if
+  // partition boundary was detected and compaction should create new file.
+  virtual PartitionerResult ShouldPartition(
+      const PartitionerRequest& request) = 0;
+
+  // Called with smallest and largest keys in SST file when compaction try to do
+  // trivial move. Returns true is partitioner allows to do trivial move.
+  virtual bool CanDoTrivialMove(const Slice& smallest_user_key,
+                                const Slice& largest_user_key) = 0;
+
+  // Context information of a compaction run
+  struct Context {
+    // Does this compaction run include all data files
+    bool is_full_compaction;
+    // Is this compaction requested by the client (true),
+    // or is it occurring as an automatic compaction process
+    bool is_manual_compaction;
+    // Output level for this compaction
+    int output_level;
+    // Smallest key for compaction
+    Slice smallest_user_key;
+    // Largest key for compaction
+    Slice largest_user_key;
+  };
+};
+
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class SstPartitionerFactory : public Customizable {
+ public:
+  ~SstPartitionerFactory() override {}
+  static const char* Type() { return "SstPartitionerFactory"; }
+  static Status CreateFromString(
+      const ConfigOptions& options, const std::string& value,
+      std::shared_ptr<SstPartitionerFactory>* result);
+
+  virtual std::unique_ptr<SstPartitioner> CreatePartitioner(
+      const SstPartitioner::Context& context) const = 0;
+
+  // Returns a name that identifies this partitioner factory.
+  const char* Name() const override = 0;
+};
+
+/*
+ * Fixed key prefix partitioner. It splits the output SST files when prefix
+ * defined by size changes.
+ */
+class SstPartitionerFixedPrefix : public SstPartitioner {
+ public:
+  explicit SstPartitionerFixedPrefix(size_t len) : len_(len) {}
+
+  virtual ~SstPartitionerFixedPrefix() override {}
+
+  const char* Name() const override { return "SstPartitionerFixedPrefix"; }
+
+  PartitionerResult ShouldPartition(const PartitionerRequest& request) override;
+
+  bool CanDoTrivialMove(const Slice& smallest_user_key,
+                        const Slice& largest_user_key) override;
+
+ private:
+  size_t len_;
+};
+
+/*
+ * Factory for fixed prefix partitioner.
+ */
+class SstPartitionerFixedPrefixFactory : public SstPartitionerFactory {
+ public:
+  explicit SstPartitionerFixedPrefixFactory(size_t len);
+
+  ~SstPartitionerFixedPrefixFactory() override {}
+
+  static const char* kClassName() { return "SstPartitionerFixedPrefixFactory"; }
+  const char* Name() const override { return kClassName(); }
+
+  std::unique_ptr<SstPartitioner> CreatePartitioner(
+      const SstPartitioner::Context& /* context */) const override;
+
+ private:
+  size_t len_;
+};
+
+extern std::shared_ptr<SstPartitionerFactory>
+NewSstPartitionerFixedPrefixFactory(size_t prefix_len);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/statistics.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/statistics.h
new file mode 100644
index 0000000000..cfeaa5a53e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/statistics.h
@@ -0,0 +1,757 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+/**
+ * Keep adding tickers here. Note that the C++ enum values, unlike the values in
+ * the Java bindings, are not guaranteed to be stable; also, the C++ and Java
+ * values for any given ticker are not guaranteed to match.
+ *  1. Add the new ticker before TICKER_ENUM_MAX.
+ *  2. Add a readable string in TickersNameMap below for the newly added ticker.
+ *  3. Add a corresponding enum value to TickerType.java in the Java API.
+ *  4. Add the enum conversions from/to Java/C++ to portal.h's toJavaTickerType
+ *     and toCppTickers.
+ */
+enum Tickers : uint32_t {
+  // total block cache misses
+  // REQUIRES: BLOCK_CACHE_MISS == BLOCK_CACHE_INDEX_MISS +
+  //                               BLOCK_CACHE_FILTER_MISS +
+  //                               BLOCK_CACHE_DATA_MISS;
+  BLOCK_CACHE_MISS = 0,
+  // total block cache hit
+  // REQUIRES: BLOCK_CACHE_HIT == BLOCK_CACHE_INDEX_HIT +
+  //                              BLOCK_CACHE_FILTER_HIT +
+  //                              BLOCK_CACHE_DATA_HIT;
+  BLOCK_CACHE_HIT,
+  // # of blocks added to block cache.
+  BLOCK_CACHE_ADD,
+  // # of failures when adding blocks to block cache.
+  BLOCK_CACHE_ADD_FAILURES,
+  // # of times cache miss when accessing index block from block cache.
+  BLOCK_CACHE_INDEX_MISS,
+  // # of times cache hit when accessing index block from block cache.
+  BLOCK_CACHE_INDEX_HIT,
+  // # of index blocks added to block cache.
+  BLOCK_CACHE_INDEX_ADD,
+  // # of bytes of index blocks inserted into cache
+  BLOCK_CACHE_INDEX_BYTES_INSERT,
+  // # of times cache miss when accessing filter block from block cache.
+  BLOCK_CACHE_FILTER_MISS,
+  // # of times cache hit when accessing filter block from block cache.
+  BLOCK_CACHE_FILTER_HIT,
+  // # of filter blocks added to block cache.
+  BLOCK_CACHE_FILTER_ADD,
+  // # of bytes of bloom filter blocks inserted into cache
+  BLOCK_CACHE_FILTER_BYTES_INSERT,
+  // # of times cache miss when accessing data block from block cache.
+  BLOCK_CACHE_DATA_MISS,
+  // # of times cache hit when accessing data block from block cache.
+  BLOCK_CACHE_DATA_HIT,
+  // # of data blocks added to block cache.
+  BLOCK_CACHE_DATA_ADD,
+  // # of bytes of data blocks inserted into cache
+  BLOCK_CACHE_DATA_BYTES_INSERT,
+  // # of bytes read from cache.
+  BLOCK_CACHE_BYTES_READ,
+  // # of bytes written into cache.
+  BLOCK_CACHE_BYTES_WRITE,
+
+  // # of times bloom filter has avoided file reads, i.e., negatives.
+  BLOOM_FILTER_USEFUL,
+  // # of times bloom FullFilter has not avoided the reads.
+  BLOOM_FILTER_FULL_POSITIVE,
+  // # of times bloom FullFilter has not avoided the reads and data actually
+  // exist.
+  BLOOM_FILTER_FULL_TRUE_POSITIVE,
+
+  // # persistent cache hit
+  PERSISTENT_CACHE_HIT,
+  // # persistent cache miss
+  PERSISTENT_CACHE_MISS,
+
+  // # total simulation block cache hits
+  SIM_BLOCK_CACHE_HIT,
+  // # total simulation block cache misses
+  SIM_BLOCK_CACHE_MISS,
+
+  // # of memtable hits.
+  MEMTABLE_HIT,
+  // # of memtable misses.
+  MEMTABLE_MISS,
+
+  // # of Get() queries served by L0
+  GET_HIT_L0,
+  // # of Get() queries served by L1
+  GET_HIT_L1,
+  // # of Get() queries served by L2 and up
+  GET_HIT_L2_AND_UP,
+
+  /**
+   * COMPACTION_KEY_DROP_* count the reasons for key drop during compaction
+   * There are 4 reasons currently.
+   */
+  COMPACTION_KEY_DROP_NEWER_ENTRY,  // key was written with a newer value.
+                                    // Also includes keys dropped for range del.
+  COMPACTION_KEY_DROP_OBSOLETE,     // The key is obsolete.
+  COMPACTION_KEY_DROP_RANGE_DEL,    // key was covered by a range tombstone.
+  COMPACTION_KEY_DROP_USER,  // user compaction function has dropped the key.
+  COMPACTION_RANGE_DEL_DROP_OBSOLETE,  // all keys in range were deleted.
+  // Deletions obsoleted before bottom level due to file gap optimization.
+  COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE,
+  // If a compaction was canceled in sfm to prevent ENOSPC
+  COMPACTION_CANCELLED,
+
+  // Number of keys written to the database via the Put and Write call's
+  NUMBER_KEYS_WRITTEN,
+  // Number of Keys read,
+  NUMBER_KEYS_READ,
+  // Number keys updated, if inplace update is enabled
+  NUMBER_KEYS_UPDATED,
+  // The number of uncompressed bytes issued by DB::Put(), DB::Delete(),
+  // DB::Merge(), and DB::Write().
+  BYTES_WRITTEN,
+  // The number of uncompressed bytes read from DB::Get().  It could be
+  // either from memtables, cache, or table files.
+  // For the number of logical bytes read from DB::MultiGet(),
+  // please use NUMBER_MULTIGET_BYTES_READ.
+  BYTES_READ,
+  // The number of calls to seek/next/prev
+  NUMBER_DB_SEEK,
+  NUMBER_DB_NEXT,
+  NUMBER_DB_PREV,
+  // The number of calls to seek/next/prev that returned data
+  NUMBER_DB_SEEK_FOUND,
+  NUMBER_DB_NEXT_FOUND,
+  NUMBER_DB_PREV_FOUND,
+  // The number of uncompressed bytes read from an iterator.
+  // Includes size of key and value.
+  ITER_BYTES_READ,
+  NO_FILE_OPENS,
+  NO_FILE_ERRORS,
+  // Writer has to wait for compaction or flush to finish.
+  STALL_MICROS,
+  // The wait time for db mutex.
+  // Disabled by default. To enable it set stats level to kAll
+  DB_MUTEX_WAIT_MICROS,
+
+  // Number of MultiGet calls, keys read, and bytes read
+  NUMBER_MULTIGET_CALLS,
+  NUMBER_MULTIGET_KEYS_READ,
+  NUMBER_MULTIGET_BYTES_READ,
+
+  NUMBER_MERGE_FAILURES,
+
+  // Prefix filter stats when used for point lookups (Get / MultiGet).
+  // (For prefix filter stats on iterators, see *_LEVEL_SEEK_*.)
+  // Checked: filter was queried
+  BLOOM_FILTER_PREFIX_CHECKED,
+  // Useful: filter returned false so prevented accessing data+index blocks
+  BLOOM_FILTER_PREFIX_USEFUL,
+  // True positive: found a key matching the point query. When another key
+  // with the same prefix matches, it is considered a false positive by
+  // these statistics even though the filter returned a true positive.
+  BLOOM_FILTER_PREFIX_TRUE_POSITIVE,
+
+  // Number of times we had to reseek inside an iteration to skip
+  // over large number of keys with same userkey.
+  NUMBER_OF_RESEEKS_IN_ITERATION,
+
+  // Record the number of calls to GetUpdatesSince. Useful to keep track of
+  // transaction log iterator refreshes
+  GET_UPDATES_SINCE_CALLS,
+  WAL_FILE_SYNCED,  // Number of times WAL sync is done
+  WAL_FILE_BYTES,   // Number of bytes written to WAL
+
+  // Writes can be processed by requesting thread or by the thread at the
+  // head of the writers queue.
+  WRITE_DONE_BY_SELF,
+  WRITE_DONE_BY_OTHER,  // Equivalent to writes done for others
+  WRITE_WITH_WAL,       // Number of Write calls that request WAL
+  COMPACT_READ_BYTES,   // Bytes read during compaction
+  COMPACT_WRITE_BYTES,  // Bytes written during compaction
+  FLUSH_WRITE_BYTES,    // Bytes written during flush
+
+  // Compaction read and write statistics broken down by CompactionReason
+  COMPACT_READ_BYTES_MARKED,
+  COMPACT_READ_BYTES_PERIODIC,
+  COMPACT_READ_BYTES_TTL,
+  COMPACT_WRITE_BYTES_MARKED,
+  COMPACT_WRITE_BYTES_PERIODIC,
+  COMPACT_WRITE_BYTES_TTL,
+
+  // Number of table's properties loaded directly from file, without creating
+  // table reader object.
+  NUMBER_DIRECT_LOAD_TABLE_PROPERTIES,
+  NUMBER_SUPERVERSION_ACQUIRES,
+  NUMBER_SUPERVERSION_RELEASES,
+  NUMBER_SUPERVERSION_CLEANUPS,
+
+  // # of compressions/decompressions executed
+  NUMBER_BLOCK_COMPRESSED,
+  NUMBER_BLOCK_DECOMPRESSED,
+
+  // DEPRECATED / unused (see NUMBER_BLOCK_COMPRESSION_*)
+  NUMBER_BLOCK_NOT_COMPRESSED,
+  MERGE_OPERATION_TOTAL_TIME,
+  FILTER_OPERATION_TOTAL_TIME,
+
+  // Row cache.
+  ROW_CACHE_HIT,
+  ROW_CACHE_MISS,
+
+  // Read amplification statistics.
+  // Read amplification can be calculated using this formula
+  // (READ_AMP_TOTAL_READ_BYTES / READ_AMP_ESTIMATE_USEFUL_BYTES)
+  //
+  // REQUIRES: ReadOptions::read_amp_bytes_per_bit to be enabled
+  READ_AMP_ESTIMATE_USEFUL_BYTES,  // Estimate of total bytes actually used.
+  READ_AMP_TOTAL_READ_BYTES,       // Total size of loaded data blocks.
+
+  // Number of refill intervals where rate limiter's bytes are fully consumed.
+  NUMBER_RATE_LIMITER_DRAINS,
+
+  // Number of internal keys skipped by Iterator
+  NUMBER_ITER_SKIP,
+
+  // BlobDB specific stats
+  // # of Put/PutTTL/PutUntil to BlobDB. Only applicable to legacy BlobDB.
+  BLOB_DB_NUM_PUT,
+  // # of Write to BlobDB. Only applicable to legacy BlobDB.
+  BLOB_DB_NUM_WRITE,
+  // # of Get to BlobDB. Only applicable to legacy BlobDB.
+  BLOB_DB_NUM_GET,
+  // # of MultiGet to BlobDB. Only applicable to legacy BlobDB.
+  BLOB_DB_NUM_MULTIGET,
+  // # of Seek/SeekToFirst/SeekToLast/SeekForPrev to BlobDB iterator. Only
+  // applicable to legacy BlobDB.
+  BLOB_DB_NUM_SEEK,
+  // # of Next to BlobDB iterator. Only applicable to legacy BlobDB.
+  BLOB_DB_NUM_NEXT,
+  // # of Prev to BlobDB iterator. Only applicable to legacy BlobDB.
+  BLOB_DB_NUM_PREV,
+  // # of keys written to BlobDB. Only applicable to legacy BlobDB.
+  BLOB_DB_NUM_KEYS_WRITTEN,
+  // # of keys read from BlobDB. Only applicable to legacy BlobDB.
+  BLOB_DB_NUM_KEYS_READ,
+  // # of bytes (key + value) written to BlobDB. Only applicable to legacy
+  // BlobDB.
+  BLOB_DB_BYTES_WRITTEN,
+  // # of bytes (keys + value) read from BlobDB. Only applicable to legacy
+  // BlobDB.
+  BLOB_DB_BYTES_READ,
+  // # of keys written by BlobDB as non-TTL inlined value. Only applicable to
+  // legacy BlobDB.
+  BLOB_DB_WRITE_INLINED,
+  // # of keys written by BlobDB as TTL inlined value. Only applicable to legacy
+  // BlobDB.
+  BLOB_DB_WRITE_INLINED_TTL,
+  // # of keys written by BlobDB as non-TTL blob value. Only applicable to
+  // legacy BlobDB.
+  BLOB_DB_WRITE_BLOB,
+  // # of keys written by BlobDB as TTL blob value. Only applicable to legacy
+  // BlobDB.
+  BLOB_DB_WRITE_BLOB_TTL,
+  // # of bytes written to blob file.
+  BLOB_DB_BLOB_FILE_BYTES_WRITTEN,
+  // # of bytes read from blob file.
+  BLOB_DB_BLOB_FILE_BYTES_READ,
+  // # of times a blob files being synced.
+  BLOB_DB_BLOB_FILE_SYNCED,
+  // # of blob index evicted from base DB by BlobDB compaction filter because
+  // of expiration. Only applicable to legacy BlobDB.
+  BLOB_DB_BLOB_INDEX_EXPIRED_COUNT,
+  // size of blob index evicted from base DB by BlobDB compaction filter
+  // because of expiration. Only applicable to legacy BlobDB.
+  BLOB_DB_BLOB_INDEX_EXPIRED_SIZE,
+  // # of blob index evicted from base DB by BlobDB compaction filter because
+  // of corresponding file deleted. Only applicable to legacy BlobDB.
+  BLOB_DB_BLOB_INDEX_EVICTED_COUNT,
+  // size of blob index evicted from base DB by BlobDB compaction filter
+  // because of corresponding file deleted. Only applicable to legacy BlobDB.
+  BLOB_DB_BLOB_INDEX_EVICTED_SIZE,
+  // # of blob files that were obsoleted by garbage collection. Only applicable
+  // to legacy BlobDB.
+  BLOB_DB_GC_NUM_FILES,
+  // # of blob files generated by garbage collection. Only applicable to legacy
+  // BlobDB.
+  BLOB_DB_GC_NUM_NEW_FILES,
+  // # of BlobDB garbage collection failures. Only applicable to legacy BlobDB.
+  BLOB_DB_GC_FAILURES,
+  // # of keys relocated to new blob file by garbage collection.
+  BLOB_DB_GC_NUM_KEYS_RELOCATED,
+  // # of bytes relocated to new blob file by garbage collection.
+  BLOB_DB_GC_BYTES_RELOCATED,
+  // # of blob files evicted because of BlobDB is full. Only applicable to
+  // legacy BlobDB.
+  BLOB_DB_FIFO_NUM_FILES_EVICTED,
+  // # of keys in the blob files evicted because of BlobDB is full. Only
+  // applicable to legacy BlobDB.
+  BLOB_DB_FIFO_NUM_KEYS_EVICTED,
+  // # of bytes in the blob files evicted because of BlobDB is full. Only
+  // applicable to legacy BlobDB.
+  BLOB_DB_FIFO_BYTES_EVICTED,
+
+  // These counters indicate a performance issue in WritePrepared transactions.
+  // We should not seem them ticking them much.
+  // # of times prepare_mutex_ is acquired in the fast path.
+  TXN_PREPARE_MUTEX_OVERHEAD,
+  // # of times old_commit_map_mutex_ is acquired in the fast path.
+  TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD,
+  // # of times we checked a batch for duplicate keys.
+  TXN_DUPLICATE_KEY_OVERHEAD,
+  // # of times snapshot_mutex_ is acquired in the fast path.
+  TXN_SNAPSHOT_MUTEX_OVERHEAD,
+  // # of times ::Get returned TryAgain due to expired snapshot seq
+  TXN_GET_TRY_AGAIN,
+
+  // Number of keys actually found in MultiGet calls (vs number requested by
+  // caller)
+  // NUMBER_MULTIGET_KEYS_READ gives the number requested by caller
+  NUMBER_MULTIGET_KEYS_FOUND,
+
+  NO_ITERATOR_CREATED,  // number of iterators created
+  NO_ITERATOR_DELETED,  // number of iterators deleted
+
+  BLOCK_CACHE_COMPRESSION_DICT_MISS,
+  BLOCK_CACHE_COMPRESSION_DICT_HIT,
+  BLOCK_CACHE_COMPRESSION_DICT_ADD,
+  BLOCK_CACHE_COMPRESSION_DICT_BYTES_INSERT,
+
+  // # of blocks redundantly inserted into block cache.
+  // REQUIRES: BLOCK_CACHE_ADD_REDUNDANT <= BLOCK_CACHE_ADD
+  BLOCK_CACHE_ADD_REDUNDANT,
+  // # of index blocks redundantly inserted into block cache.
+  // REQUIRES: BLOCK_CACHE_INDEX_ADD_REDUNDANT <= BLOCK_CACHE_INDEX_ADD
+  BLOCK_CACHE_INDEX_ADD_REDUNDANT,
+  // # of filter blocks redundantly inserted into block cache.
+  // REQUIRES: BLOCK_CACHE_FILTER_ADD_REDUNDANT <= BLOCK_CACHE_FILTER_ADD
+  BLOCK_CACHE_FILTER_ADD_REDUNDANT,
+  // # of data blocks redundantly inserted into block cache.
+  // REQUIRES: BLOCK_CACHE_DATA_ADD_REDUNDANT <= BLOCK_CACHE_DATA_ADD
+  BLOCK_CACHE_DATA_ADD_REDUNDANT,
+  // # of dict blocks redundantly inserted into block cache.
+  // REQUIRES: BLOCK_CACHE_COMPRESSION_DICT_ADD_REDUNDANT
+  //           <= BLOCK_CACHE_COMPRESSION_DICT_ADD
+  BLOCK_CACHE_COMPRESSION_DICT_ADD_REDUNDANT,
+
+  // # of files marked as trash by sst file manager and will be deleted
+  // later by background thread.
+  FILES_MARKED_TRASH,
+  // # of files deleted immediately by sst file manger through delete scheduler.
+  FILES_DELETED_IMMEDIATELY,
+
+  // The counters for error handler, not that, bg_io_error is the subset of
+  // bg_error and bg_retryable_io_error is the subset of bg_io_error
+  ERROR_HANDLER_BG_ERROR_COUNT,
+  ERROR_HANDLER_BG_IO_ERROR_COUNT,
+  ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT,
+  ERROR_HANDLER_AUTORESUME_COUNT,
+  ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT,
+  ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT,
+
+  // Statistics for memtable garbage collection:
+  // Raw bytes of data (payload) present on memtable at flush time.
+  MEMTABLE_PAYLOAD_BYTES_AT_FLUSH,
+  // Outdated bytes of data present on memtable at flush time.
+  MEMTABLE_GARBAGE_BYTES_AT_FLUSH,
+
+  // Secondary cache statistics
+  SECONDARY_CACHE_HITS,
+
+  // Bytes read by `VerifyChecksum()` and `VerifyFileChecksums()` APIs.
+  VERIFY_CHECKSUM_READ_BYTES,
+
+  // Bytes read/written while creating backups
+  BACKUP_READ_BYTES,
+  BACKUP_WRITE_BYTES,
+
+  // Remote compaction read/write statistics
+  REMOTE_COMPACT_READ_BYTES,
+  REMOTE_COMPACT_WRITE_BYTES,
+
+  // Tiered storage related statistics
+  HOT_FILE_READ_BYTES,
+  WARM_FILE_READ_BYTES,
+  COLD_FILE_READ_BYTES,
+  HOT_FILE_READ_COUNT,
+  WARM_FILE_READ_COUNT,
+  COLD_FILE_READ_COUNT,
+
+  // Last level and non-last level read statistics
+  LAST_LEVEL_READ_BYTES,
+  LAST_LEVEL_READ_COUNT,
+  NON_LAST_LEVEL_READ_BYTES,
+  NON_LAST_LEVEL_READ_COUNT,
+
+  // Statistics on iterator Seek() (and variants) for each sorted run. I.e. a
+  // single user Seek() can result in many sorted run Seek()s.
+  // The stats are split between last level and non-last level.
+  // Filtered: a filter such as prefix Bloom filter indicate the Seek() would
+  // not find anything relevant, so avoided a likely access to data+index
+  // blocks.
+  LAST_LEVEL_SEEK_FILTERED,
+  // Filter match: a filter such as prefix Bloom filter was queried but did
+  // not filter out the seek.
+  LAST_LEVEL_SEEK_FILTER_MATCH,
+  // At least one data block was accessed for a Seek() (or variant) on a
+  // sorted run.
+  LAST_LEVEL_SEEK_DATA,
+  // At least one value() was accessed for the seek (suggesting it was useful),
+  // and no filter such as prefix Bloom was queried.
+  LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER,
+  // At least one value() was accessed for the seek (suggesting it was useful),
+  // after querying a filter such as prefix Bloom.
+  LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH,
+  // The same set of stats, but for non-last level seeks.
+  NON_LAST_LEVEL_SEEK_FILTERED,
+  NON_LAST_LEVEL_SEEK_FILTER_MATCH,
+  NON_LAST_LEVEL_SEEK_DATA,
+  NON_LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER,
+  NON_LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH,
+
+  // Number of block checksum verifications
+  BLOCK_CHECKSUM_COMPUTE_COUNT,
+  // Number of times RocksDB detected a corruption while verifying a block
+  // checksum. RocksDB does not remember corruptions that happened during user
+  // reads so the same block corruption may be detected multiple times.
+  BLOCK_CHECKSUM_MISMATCH_COUNT,
+
+  MULTIGET_COROUTINE_COUNT,
+
+  // Integrated BlobDB specific stats
+  // # of times cache miss when accessing blob from blob cache.
+  BLOB_DB_CACHE_MISS,
+  // # of times cache hit when accessing blob from blob cache.
+  BLOB_DB_CACHE_HIT,
+  // # of data blocks added to blob cache.
+  BLOB_DB_CACHE_ADD,
+  // # of failures when adding blobs to blob cache.
+  BLOB_DB_CACHE_ADD_FAILURES,
+  // # of bytes read from blob cache.
+  BLOB_DB_CACHE_BYTES_READ,
+  // # of bytes written into blob cache.
+  BLOB_DB_CACHE_BYTES_WRITE,
+
+  // Time spent in the ReadAsync file system call
+  READ_ASYNC_MICROS,
+  // Number of errors returned to the async read callback
+  ASYNC_READ_ERROR_COUNT,
+
+  // Fine grained secondary cache stats
+  SECONDARY_CACHE_FILTER_HITS,
+  SECONDARY_CACHE_INDEX_HITS,
+  SECONDARY_CACHE_DATA_HITS,
+
+  // Number of lookup into the prefetched tail (see
+  // `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`)
+  // that can't find its data for table open
+  TABLE_OPEN_PREFETCH_TAIL_MISS,
+  // Number of lookup into the prefetched tail (see
+  // `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`)
+  // that finds its data for table open
+  TABLE_OPEN_PREFETCH_TAIL_HIT,
+
+  // Statistics on the filtering by user-defined timestamps
+  // # of times timestamps are checked on accessing the table
+  TIMESTAMP_FILTER_TABLE_CHECKED,
+  // # of times timestamps can successfully help skip the table access
+  TIMESTAMP_FILTER_TABLE_FILTERED,
+
+  // Number of input bytes (uncompressed) to compression for SST blocks that
+  // are stored compressed.
+  BYTES_COMPRESSED_FROM,
+  // Number of output bytes (compressed) from compression for SST blocks that
+  // are stored compressed.
+  BYTES_COMPRESSED_TO,
+  // Number of uncompressed bytes for SST blocks that are stored uncompressed
+  // because compression type is kNoCompression, or some error case caused
+  // compression not to run or produce an output. Index blocks are only counted
+  // if enable_index_compression is true.
+  BYTES_COMPRESSION_BYPASSED,
+  // Number of input bytes (uncompressed) to compression for SST blocks that
+  // are stored uncompressed because the compression result was rejected,
+  // either because the ratio was not acceptable (see
+  // CompressionOptions::max_compressed_bytes_per_kb) or found invalid by the
+  // `verify_compression` option.
+  BYTES_COMPRESSION_REJECTED,
+
+  // Like BYTES_COMPRESSION_BYPASSED but counting number of blocks
+  NUMBER_BLOCK_COMPRESSION_BYPASSED,
+  // Like BYTES_COMPRESSION_REJECTED but counting number of blocks
+  NUMBER_BLOCK_COMPRESSION_REJECTED,
+
+  // Number of input bytes (compressed) to decompression in reading compressed
+  // SST blocks from storage.
+  BYTES_DECOMPRESSED_FROM,
+  // Number of output bytes (uncompressed) from decompression in reading
+  // compressed SST blocks from storage.
+  BYTES_DECOMPRESSED_TO,
+
+  TICKER_ENUM_MAX
+};
+
+// The order of items listed in  Tickers should be the same as
+// the order listed in TickersNameMap
+extern const std::vector<std::pair<Tickers, std::string>> TickersNameMap;
+
+/**
+ * Keep adding histograms here. Note that the C++ enum values, unlike the values
+ * in the Java bindings, are not guaranteed to be stable; also, the C++ and Java
+ * values for any given histogram are not guaranteed to match.
+ *  1. Add the new histogram before HISTOGRAM_ENUM_MAX.
+ *  2. Add a readable string in HistogramsNameMap below for the newly added
+ *     histogram.
+ *  3. Add a corresponding enum value to HistogramType.java in the Java API.
+ *  4. Add the enum conversions from/to Java/C++ to portal.h's
+ *     toJavaHistogramsType and toCppHistograms.
+ */
+enum Histograms : uint32_t {
+  DB_GET = 0,
+  DB_WRITE,
+  COMPACTION_TIME,
+  COMPACTION_CPU_TIME,
+  SUBCOMPACTION_SETUP_TIME,
+  TABLE_SYNC_MICROS,
+  COMPACTION_OUTFILE_SYNC_MICROS,
+  WAL_FILE_SYNC_MICROS,
+  MANIFEST_FILE_SYNC_MICROS,
+  // TIME SPENT IN IO DURING TABLE OPEN
+  TABLE_OPEN_IO_MICROS,
+  DB_MULTIGET,
+  READ_BLOCK_COMPACTION_MICROS,
+  READ_BLOCK_GET_MICROS,
+  WRITE_RAW_BLOCK_MICROS,
+  NUM_FILES_IN_SINGLE_COMPACTION,
+  DB_SEEK,
+  WRITE_STALL,
+  // Time spent in reading block-based or plain SST table
+  SST_READ_MICROS,
+  // Time spent in reading SST table (currently only block-based table) or blob
+  // file corresponding to `Env::IOActivity`
+  FILE_READ_FLUSH_MICROS,
+  FILE_READ_COMPACTION_MICROS,
+  FILE_READ_DB_OPEN_MICROS,
+
+  // The number of subcompactions actually scheduled during a compaction
+  NUM_SUBCOMPACTIONS_SCHEDULED,
+  // Value size distribution in each operation
+  BYTES_PER_READ,
+  BYTES_PER_WRITE,
+  BYTES_PER_MULTIGET,
+
+  BYTES_COMPRESSED,    // DEPRECATED / unused (see BYTES_COMPRESSED_{FROM,TO})
+  BYTES_DECOMPRESSED,  // DEPRECATED / unused (see BYTES_DECOMPRESSED_{FROM,TO})
+  COMPRESSION_TIMES_NANOS,
+  DECOMPRESSION_TIMES_NANOS,
+  // Number of merge operands passed to the merge operator in user read
+  // requests.
+  READ_NUM_MERGE_OPERANDS,
+
+  // BlobDB specific stats
+  // Size of keys written to BlobDB. Only applicable to legacy BlobDB.
+  BLOB_DB_KEY_SIZE,
+  // Size of values written to BlobDB. Only applicable to legacy BlobDB.
+  BLOB_DB_VALUE_SIZE,
+  // BlobDB Put/PutWithTTL/PutUntil/Write latency. Only applicable to legacy
+  // BlobDB.
+  BLOB_DB_WRITE_MICROS,
+  // BlobDB Get latency. Only applicable to legacy BlobDB.
+  BLOB_DB_GET_MICROS,
+  // BlobDB MultiGet latency. Only applicable to legacy BlobDB.
+  BLOB_DB_MULTIGET_MICROS,
+  // BlobDB Seek/SeekToFirst/SeekToLast/SeekForPrev latency. Only applicable to
+  // legacy BlobDB.
+  BLOB_DB_SEEK_MICROS,
+  // BlobDB Next latency. Only applicable to legacy BlobDB.
+  BLOB_DB_NEXT_MICROS,
+  // BlobDB Prev latency. Only applicable to legacy BlobDB.
+  BLOB_DB_PREV_MICROS,
+  // Blob file write latency.
+  BLOB_DB_BLOB_FILE_WRITE_MICROS,
+  // Blob file read latency.
+  BLOB_DB_BLOB_FILE_READ_MICROS,
+  // Blob file sync latency.
+  BLOB_DB_BLOB_FILE_SYNC_MICROS,
+  // BlobDB compression time.
+  BLOB_DB_COMPRESSION_MICROS,
+  // BlobDB decompression time.
+  BLOB_DB_DECOMPRESSION_MICROS,
+  // Time spent flushing memtable to disk
+  FLUSH_TIME,
+  SST_BATCH_SIZE,
+
+  // MultiGet stats logged per level
+  // Num of index and filter blocks read from file system per level.
+  NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
+  // Num of sst files read from file system per level.
+  NUM_SST_READ_PER_LEVEL,
+
+  // Error handler statistics
+  ERROR_HANDLER_AUTORESUME_RETRY_COUNT,
+
+  // Stats related to asynchronous read requests.
+  ASYNC_READ_BYTES,
+  POLL_WAIT_MICROS,
+
+  // Number of prefetched bytes discarded by RocksDB.
+  PREFETCHED_BYTES_DISCARDED,
+
+  // Number of IOs issued in parallel in a MultiGet batch
+  MULTIGET_IO_BATCH_SIZE,
+
+  // Number of levels requiring IO for MultiGet
+  NUM_LEVEL_READ_PER_MULTIGET,
+
+  // Wait time for aborting async read in FilePrefetchBuffer destructor
+  ASYNC_PREFETCH_ABORT_MICROS,
+
+  // Number of bytes read for RocksDB's prefetching contents (as opposed to file
+  // system's prefetch) from the end of SST table during block based table open
+  TABLE_OPEN_PREFETCH_TAIL_READ_BYTES,
+
+  HISTOGRAM_ENUM_MAX
+};
+
+extern const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap;
+
+struct HistogramData {
+  double median;
+  double percentile95;
+  double percentile99;
+  double average;
+  double standard_deviation;
+  // zero-initialize new members since old Statistics::histogramData()
+  // implementations won't write them.
+  double max = 0.0;
+  uint64_t count = 0;
+  uint64_t sum = 0;
+  double min = 0.0;
+};
+
+// StatsLevel can be used to reduce statistics overhead by skipping certain
+// types of stats in the stats collection process.
+// Usage:
+//   options.statistics->set_stats_level(StatsLevel::kExceptTimeForMutex);
+enum StatsLevel : uint8_t {
+  // Disable all metrics
+  kDisableAll,
+  // Disable tickers
+  kExceptTickers = kDisableAll,
+  // Disable timer stats, and skip histogram stats
+  kExceptHistogramOrTimers,
+  // Skip timer stats
+  kExceptTimers,
+  // Collect all stats except time inside mutex lock AND time spent on
+  // compression.
+  kExceptDetailedTimers,
+  // Collect all stats except the counters requiring to get time inside the
+  // mutex lock.
+  kExceptTimeForMutex,
+  // Collect all stats, including measuring duration of mutex operations.
+  // If getting time is expensive on the platform to run, it can
+  // reduce scalability to more threads, especially for writes.
+  kAll,
+};
+
+// Analyze the performance of a db by providing cumulative stats over time.
+// Usage:
+//  Options options;
+//  options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+//  Status s = DB::Open(options, kDBPath, &db);
+//  ...
+//  options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED);
+//  HistogramData hist;
+//  options.statistics->histogramData(FLUSH_TIME, &hist);
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class Statistics : public Customizable {
+ public:
+  ~Statistics() override {}
+  static const char* Type() { return "Statistics"; }
+  static Status CreateFromString(const ConfigOptions& opts,
+                                 const std::string& value,
+                                 std::shared_ptr<Statistics>* result);
+  // Default name of empty, for backwards compatibility.  Derived classes should
+  // override this method.
+  // This default implementation will likely be removed in a future release
+  const char* Name() const override { return ""; }
+  virtual uint64_t getTickerCount(uint32_t tickerType) const = 0;
+  virtual void histogramData(uint32_t type,
+                             HistogramData* const data) const = 0;
+  virtual std::string getHistogramString(uint32_t /*type*/) const { return ""; }
+  virtual void recordTick(uint32_t tickerType, uint64_t count = 1) = 0;
+  virtual void setTickerCount(uint32_t tickerType, uint64_t count) = 0;
+  virtual uint64_t getAndResetTickerCount(uint32_t tickerType) = 0;
+  virtual void reportTimeToHistogram(uint32_t histogramType, uint64_t time) {
+    if (get_stats_level() <= StatsLevel::kExceptTimers) {
+      return;
+    }
+    recordInHistogram(histogramType, time);
+  }
+  // The function is here only for backward compatibility reason.
+  // Users implementing their own Statistics class should override
+  // recordInHistogram() instead and leave measureTime() as it is.
+  virtual void measureTime(uint32_t /*histogramType*/, uint64_t /*time*/) {
+    // This is not supposed to be called.
+    assert(false);
+  }
+  virtual void recordInHistogram(uint32_t histogramType, uint64_t time) {
+    // measureTime() is the old and inaccurate function name.
+    // To keep backward compatible. If users implement their own
+    // statistics, which overrides measureTime() but doesn't override
+    // this function. We forward to measureTime().
+    measureTime(histogramType, time);
+  }
+
+  // Resets all ticker and histogram stats
+  virtual Status Reset() { return Status::NotSupported("Not implemented"); }
+
+  using Customizable::ToString;
+  // String representation of the statistic object. Must be thread-safe.
+  virtual std::string ToString() const {
+    // Do nothing by default
+    return std::string("ToString(): not implemented");
+  }
+
+  virtual bool getTickerMap(std::map<std::string, uint64_t>*) const {
+    // Do nothing by default
+    return false;
+  }
+
+  // Override this function to disable particular histogram collection
+  virtual bool HistEnabledForType(uint32_t type) const {
+    return type < HISTOGRAM_ENUM_MAX;
+  }
+  void set_stats_level(StatsLevel sl) {
+    stats_level_.store(sl, std::memory_order_relaxed);
+  }
+  StatsLevel get_stats_level() const {
+    return stats_level_.load(std::memory_order_relaxed);
+  }
+
+ private:
+  std::atomic<StatsLevel> stats_level_{kExceptDetailedTimers};
+};
+
+// Create a concrete DBStatistics object
+std::shared_ptr<Statistics> CreateDBStatistics();
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/stats_history.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/stats_history.h
new file mode 100644
index 0000000000..57e469295b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/stats_history.h
@@ -0,0 +1,70 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <map>
+#include <string>
+
+#include "rocksdb/statistics.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class DBImpl;
+
+// StatsHistoryIterator is the main interface for users to programmatically
+// access statistics snapshots that was automatically stored by RocksDB.
+// Depending on options, the stats can be in memory or on disk.
+// The stats snapshots are indexed by time that they were recorded, and each
+// stats snapshot contains individual stat name and value at the time of
+// recording.
+// Example:
+//   std::unique_ptr<StatsHistoryIterator> stats_iter;
+//   Status s = db->GetStatsHistory(0 /* start_time */,
+//                                  env->NowMicros() /* end_time*/,
+//                                  &stats_iter);
+//   if (s.ok) {
+//     for (; stats_iter->Valid(); stats_iter->Next()) {
+//       uint64_t stats_time = stats_iter->GetStatsTime();
+//       const std::map<std::string, uint64_t>& stats_map =
+//           stats_iter->GetStatsMap();
+//       process(stats_time, stats_map);
+//     }
+//   }
+class StatsHistoryIterator {
+ public:
+  StatsHistoryIterator() {}
+  virtual ~StatsHistoryIterator() {}
+
+  virtual bool Valid() const = 0;
+
+  // Moves to the next stats history record.  After this call, Valid() is
+  // true iff the iterator was not positioned at the last entry in the source.
+  // REQUIRES: Valid()
+  virtual void Next() = 0;
+
+  // Return the time stamp (in seconds) when stats history is recorded.
+  // REQUIRES: Valid()
+  virtual uint64_t GetStatsTime() const = 0;
+
+  // DEPRECATED (was never used)
+  virtual int GetFormatVersion() const { return -1; }
+
+  // Return the current stats history as an std::map which specifies the
+  // mapping from stats name to stats value . The underlying storage
+  // for the returned map is valid only until the next modification of
+  // the iterator.
+  // REQUIRES: Valid()
+  virtual const std::map<std::string, uint64_t>& GetStatsMap() const = 0;
+
+  // If an error has occurred, return it.  Else return an ok status.
+  virtual Status status() const = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/status.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/status.h
new file mode 100644
index 0000000000..447c3b9fef
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/status.h
@@ -0,0 +1,574 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A Status encapsulates the result of an operation.  It may indicate success,
+// or it may indicate an error with an associated error message.
+//
+// Multiple threads can invoke const methods on a Status without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same Status must use
+// external synchronization.
+
+#pragma once
+
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+#include <stdio.h>
+#include <stdlib.h>
+#endif
+
+#include <memory>
+#include <string>
+
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+#include "port/stack_trace.h"
+#endif
+
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Status {
+ public:
+  // Create a success status.
+  Status()
+      : code_(kOk),
+        subcode_(kNone),
+        sev_(kNoError),
+        retryable_(false),
+        data_loss_(false),
+        scope_(0),
+        state_(nullptr) {}
+  ~Status() {
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+    if (!checked_) {
+      fprintf(stderr, "Failed to check Status %p\n", this);
+      port::PrintStack();
+      std::abort();
+    }
+#endif  // ROCKSDB_ASSERT_STATUS_CHECKED
+  }
+
+  // Copy the specified status.
+  Status(const Status& s);
+  Status& operator=(const Status& s);
+  Status(Status&& s) noexcept;
+  Status& operator=(Status&& s) noexcept;
+  bool operator==(const Status& rhs) const;
+  bool operator!=(const Status& rhs) const;
+
+  // In case of intentionally swallowing an error, user must explicitly call
+  // this function. That way we are easily able to search the code to find where
+  // error swallowing occurs.
+  inline void PermitUncheckedError() const { MarkChecked(); }
+
+  inline void MustCheck() const {
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+    checked_ = false;
+#endif  // ROCKSDB_ASSERT_STATUS_CHECKED
+  }
+
+  enum Code : unsigned char {
+    kOk = 0,
+    kNotFound = 1,
+    kCorruption = 2,
+    kNotSupported = 3,
+    kInvalidArgument = 4,
+    kIOError = 5,
+    kMergeInProgress = 6,
+    kIncomplete = 7,
+    kShutdownInProgress = 8,
+    kTimedOut = 9,
+    kAborted = 10,
+    kBusy = 11,
+    kExpired = 12,
+    kTryAgain = 13,
+    kCompactionTooLarge = 14,
+    kColumnFamilyDropped = 15,
+    kMaxCode
+  };
+
+  Code code() const {
+    MarkChecked();
+    return code_;
+  }
+
+  enum SubCode : unsigned char {
+    kNone = 0,
+    kMutexTimeout = 1,
+    kLockTimeout = 2,
+    kLockLimit = 3,
+    kNoSpace = 4,
+    kDeadlock = 5,
+    kStaleFile = 6,
+    kMemoryLimit = 7,
+    kSpaceLimit = 8,
+    kPathNotFound = 9,
+    KMergeOperandsInsufficientCapacity = 10,
+    kManualCompactionPaused = 11,
+    kOverwritten = 12,
+    kTxnNotPrepared = 13,
+    kIOFenced = 14,
+    kMergeOperatorFailed = 15,
+    kMaxSubCode
+  };
+
+  SubCode subcode() const {
+    MarkChecked();
+    return subcode_;
+  }
+
+  enum Severity : unsigned char {
+    kNoError = 0,
+    kSoftError = 1,
+    kHardError = 2,
+    kFatalError = 3,
+    kUnrecoverableError = 4,
+    kMaxSeverity
+  };
+
+  Status(const Status& s, Severity sev);
+
+  Status(Code _code, SubCode _subcode, Severity _sev, const Slice& msg)
+      : Status(_code, _subcode, msg, "", _sev) {}
+
+  static Status CopyAppendMessage(const Status& s, const Slice& delim,
+                                  const Slice& msg);
+
+  Severity severity() const {
+    MarkChecked();
+    return sev_;
+  }
+
+  // Returns a C style string indicating the message of the Status
+  const char* getState() const {
+    MarkChecked();
+    return state_.get();
+  }
+
+  // Return a success status.
+  static Status OK() { return Status(); }
+
+  // Successful, though an existing something was overwritten
+  // Note: using variants of OK status for program logic is discouraged,
+  // but it can be useful for communicating statistical information without
+  // changing public APIs.
+  static Status OkOverwritten() { return Status(kOk, kOverwritten); }
+
+  // Return error status of an appropriate type.
+  static Status NotFound(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kNotFound, msg, msg2);
+  }
+
+  // Fast path for not found without malloc;
+  static Status NotFound(SubCode msg = kNone) { return Status(kNotFound, msg); }
+
+  static Status NotFound(SubCode sc, const Slice& msg,
+                         const Slice& msg2 = Slice()) {
+    return Status(kNotFound, sc, msg, msg2);
+  }
+
+  static Status Corruption(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kCorruption, msg, msg2);
+  }
+  static Status Corruption(SubCode msg = kNone) {
+    return Status(kCorruption, msg);
+  }
+
+  static Status NotSupported(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kNotSupported, msg, msg2);
+  }
+  static Status NotSupported(SubCode msg = kNone) {
+    return Status(kNotSupported, msg);
+  }
+
+  static Status InvalidArgument(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kInvalidArgument, msg, msg2);
+  }
+  static Status InvalidArgument(SubCode msg = kNone) {
+    return Status(kInvalidArgument, msg);
+  }
+
+  static Status IOError(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kIOError, msg, msg2);
+  }
+  static Status IOError(SubCode msg = kNone) { return Status(kIOError, msg); }
+
+  static Status MergeInProgress(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kMergeInProgress, msg, msg2);
+  }
+  static Status MergeInProgress(SubCode msg = kNone) {
+    return Status(kMergeInProgress, msg);
+  }
+
+  static Status Incomplete(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kIncomplete, msg, msg2);
+  }
+  static Status Incomplete(SubCode msg = kNone) {
+    return Status(kIncomplete, msg);
+  }
+
+  static Status ShutdownInProgress(SubCode msg = kNone) {
+    return Status(kShutdownInProgress, msg);
+  }
+  static Status ShutdownInProgress(const Slice& msg,
+                                   const Slice& msg2 = Slice()) {
+    return Status(kShutdownInProgress, msg, msg2);
+  }
+  static Status Aborted(SubCode msg = kNone) { return Status(kAborted, msg); }
+  static Status Aborted(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kAborted, msg, msg2);
+  }
+
+  static Status Busy(SubCode msg = kNone) { return Status(kBusy, msg); }
+  static Status Busy(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kBusy, msg, msg2);
+  }
+
+  static Status TimedOut(SubCode msg = kNone) { return Status(kTimedOut, msg); }
+  static Status TimedOut(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kTimedOut, msg, msg2);
+  }
+
+  static Status Expired(SubCode msg = kNone) { return Status(kExpired, msg); }
+  static Status Expired(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kExpired, msg, msg2);
+  }
+
+  static Status TryAgain(SubCode msg = kNone) { return Status(kTryAgain, msg); }
+  static Status TryAgain(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kTryAgain, msg, msg2);
+  }
+
+  static Status CompactionTooLarge(SubCode msg = kNone) {
+    return Status(kCompactionTooLarge, msg);
+  }
+  static Status CompactionTooLarge(const Slice& msg,
+                                   const Slice& msg2 = Slice()) {
+    return Status(kCompactionTooLarge, msg, msg2);
+  }
+
+  static Status ColumnFamilyDropped(SubCode msg = kNone) {
+    return Status(kColumnFamilyDropped, msg);
+  }
+
+  static Status ColumnFamilyDropped(const Slice& msg,
+                                    const Slice& msg2 = Slice()) {
+    return Status(kColumnFamilyDropped, msg, msg2);
+  }
+
+  static Status NoSpace() { return Status(kIOError, kNoSpace); }
+  static Status NoSpace(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kIOError, kNoSpace, msg, msg2);
+  }
+
+  static Status MemoryLimit() { return Status(kAborted, kMemoryLimit); }
+  static Status MemoryLimit(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kAborted, kMemoryLimit, msg, msg2);
+  }
+
+  static Status SpaceLimit() { return Status(kIOError, kSpaceLimit); }
+  static Status SpaceLimit(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kIOError, kSpaceLimit, msg, msg2);
+  }
+
+  static Status PathNotFound() { return Status(kIOError, kPathNotFound); }
+  static Status PathNotFound(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kIOError, kPathNotFound, msg, msg2);
+  }
+
+  static Status TxnNotPrepared() {
+    return Status(kInvalidArgument, kTxnNotPrepared);
+  }
+  static Status TxnNotPrepared(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kInvalidArgument, kTxnNotPrepared, msg, msg2);
+  }
+
+  // Returns true iff the status indicates success.
+  bool ok() const {
+    MarkChecked();
+    return code() == kOk;
+  }
+
+  // Returns true iff the status indicates success *with* something
+  // overwritten
+  bool IsOkOverwritten() const {
+    MarkChecked();
+    return code() == kOk && subcode() == kOverwritten;
+  }
+
+  // Returns true iff the status indicates a NotFound error.
+  bool IsNotFound() const {
+    MarkChecked();
+    return code() == kNotFound;
+  }
+
+  // Returns true iff the status indicates a Corruption error.
+  bool IsCorruption() const {
+    MarkChecked();
+    return code() == kCorruption;
+  }
+
+  // Returns true iff the status indicates a NotSupported error.
+  bool IsNotSupported() const {
+    MarkChecked();
+    return code() == kNotSupported;
+  }
+
+  // Returns true iff the status indicates an InvalidArgument error.
+  bool IsInvalidArgument() const {
+    MarkChecked();
+    return code() == kInvalidArgument;
+  }
+
+  // Returns true iff the status indicates an IOError.
+  bool IsIOError() const {
+    MarkChecked();
+    return code() == kIOError;
+  }
+
+  // Returns true iff the status indicates an MergeInProgress.
+  bool IsMergeInProgress() const {
+    MarkChecked();
+    return code() == kMergeInProgress;
+  }
+
+  // Returns true iff the status indicates Incomplete
+  bool IsIncomplete() const {
+    MarkChecked();
+    return code() == kIncomplete;
+  }
+
+  // Returns true iff the status indicates Shutdown In progress
+  bool IsShutdownInProgress() const {
+    MarkChecked();
+    return code() == kShutdownInProgress;
+  }
+
+  bool IsTimedOut() const {
+    MarkChecked();
+    return code() == kTimedOut;
+  }
+
+  bool IsAborted() const {
+    MarkChecked();
+    return code() == kAborted;
+  }
+
+  bool IsLockLimit() const {
+    MarkChecked();
+    return code() == kAborted && subcode() == kLockLimit;
+  }
+
+  // Returns true iff the status indicates that a resource is Busy and
+  // temporarily could not be acquired.
+  bool IsBusy() const {
+    MarkChecked();
+    return code() == kBusy;
+  }
+
+  bool IsDeadlock() const {
+    MarkChecked();
+    return code() == kBusy && subcode() == kDeadlock;
+  }
+
+  // Returns true iff the status indicated that the operation has Expired.
+  bool IsExpired() const {
+    MarkChecked();
+    return code() == kExpired;
+  }
+
+  // Returns true iff the status indicates a TryAgain error.
+  // This usually means that the operation failed, but may succeed if
+  // re-attempted.
+  bool IsTryAgain() const {
+    MarkChecked();
+    return code() == kTryAgain;
+  }
+
+  // Returns true iff the status indicates the proposed compaction is too large
+  bool IsCompactionTooLarge() const {
+    MarkChecked();
+    return code() == kCompactionTooLarge;
+  }
+
+  // Returns true iff the status indicates Column Family Dropped
+  bool IsColumnFamilyDropped() const {
+    MarkChecked();
+    return code() == kColumnFamilyDropped;
+  }
+
+  // Returns true iff the status indicates a NoSpace error
+  // This is caused by an I/O error returning the specific "out of space"
+  // error condition. Stricto sensu, an NoSpace error is an I/O error
+  // with a specific subcode, enabling users to take the appropriate action
+  // if needed
+  bool IsNoSpace() const {
+    MarkChecked();
+    return (code() == kIOError) && (subcode() == kNoSpace);
+  }
+
+  // Returns true iff the status indicates a memory limit error.  There may be
+  // cases where we limit the memory used in certain operations (eg. the size
+  // of a write batch) in order to avoid out of memory exceptions.
+  bool IsMemoryLimit() const {
+    MarkChecked();
+    return (code() == kAborted) && (subcode() == kMemoryLimit);
+  }
+
+  // Returns true iff the status indicates a PathNotFound error
+  // This is caused by an I/O error returning the specific "no such file or
+  // directory" error condition. A PathNotFound error is an I/O error with
+  // a specific subcode, enabling users to take appropriate action if necessary
+  bool IsPathNotFound() const {
+    MarkChecked();
+    return (code() == kIOError || code() == kNotFound) &&
+           (subcode() == kPathNotFound);
+  }
+
+  // Returns true iff the status indicates manual compaction paused. This
+  // is caused by a call to PauseManualCompaction
+  bool IsManualCompactionPaused() const {
+    MarkChecked();
+    return (code() == kIncomplete) && (subcode() == kManualCompactionPaused);
+  }
+
+  // Returns true iff the status indicates a TxnNotPrepared error.
+  bool IsTxnNotPrepared() const {
+    MarkChecked();
+    return (code() == kInvalidArgument) && (subcode() == kTxnNotPrepared);
+  }
+
+  // Returns true iff the status indicates a IOFenced error.
+  bool IsIOFenced() const {
+    MarkChecked();
+    return (code() == kIOError) && (subcode() == kIOFenced);
+  }
+
+  // Return a string representation of this status suitable for printing.
+  // Returns the string "OK" for success.
+  std::string ToString() const;
+
+ protected:
+  Code code_;
+  SubCode subcode_;
+  Severity sev_;
+  bool retryable_;
+  bool data_loss_;
+  unsigned char scope_;
+  // A nullptr state_ (which is at least the case for OK) means the extra
+  // message is empty.
+  std::unique_ptr<const char[]> state_;
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+  mutable bool checked_ = false;
+#endif  // ROCKSDB_ASSERT_STATUS_CHECKED
+
+  explicit Status(Code _code, SubCode _subcode = kNone)
+      : code_(_code),
+        subcode_(_subcode),
+        sev_(kNoError),
+        retryable_(false),
+        data_loss_(false),
+        scope_(0) {}
+
+  explicit Status(Code _code, SubCode _subcode, bool retryable, bool data_loss,
+                  unsigned char scope)
+      : code_(_code),
+        subcode_(_subcode),
+        sev_(kNoError),
+        retryable_(retryable),
+        data_loss_(data_loss),
+        scope_(scope) {}
+
+  Status(Code _code, SubCode _subcode, const Slice& msg, const Slice& msg2,
+         Severity sev = kNoError);
+  Status(Code _code, const Slice& msg, const Slice& msg2)
+      : Status(_code, kNone, msg, msg2) {}
+
+  static std::unique_ptr<const char[]> CopyState(const char* s);
+
+  inline void MarkChecked() const {
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+    checked_ = true;
+#endif  // ROCKSDB_ASSERT_STATUS_CHECKED
+  }
+};
+
+inline Status::Status(const Status& s)
+    : code_(s.code_),
+      subcode_(s.subcode_),
+      sev_(s.sev_),
+      retryable_(s.retryable_),
+      data_loss_(s.data_loss_),
+      scope_(s.scope_) {
+  s.MarkChecked();
+  state_ = (s.state_ == nullptr) ? nullptr : CopyState(s.state_.get());
+}
+inline Status::Status(const Status& s, Severity sev)
+    : code_(s.code_),
+      subcode_(s.subcode_),
+      sev_(sev),
+      retryable_(s.retryable_),
+      data_loss_(s.data_loss_),
+      scope_(s.scope_) {
+  s.MarkChecked();
+  state_ = (s.state_ == nullptr) ? nullptr : CopyState(s.state_.get());
+}
+inline Status& Status::operator=(const Status& s) {
+  if (this != &s) {
+    s.MarkChecked();
+    MustCheck();
+    code_ = s.code_;
+    subcode_ = s.subcode_;
+    sev_ = s.sev_;
+    retryable_ = s.retryable_;
+    data_loss_ = s.data_loss_;
+    scope_ = s.scope_;
+    state_ = (s.state_ == nullptr) ? nullptr : CopyState(s.state_.get());
+  }
+  return *this;
+}
+
+inline Status::Status(Status&& s) noexcept : Status() {
+  s.MarkChecked();
+  *this = std::move(s);
+}
+
+inline Status& Status::operator=(Status&& s) noexcept {
+  if (this != &s) {
+    s.MarkChecked();
+    MustCheck();
+    code_ = std::move(s.code_);
+    s.code_ = kOk;
+    subcode_ = std::move(s.subcode_);
+    s.subcode_ = kNone;
+    sev_ = std::move(s.sev_);
+    s.sev_ = kNoError;
+    retryable_ = std::move(s.retryable_);
+    s.retryable_ = false;
+    data_loss_ = std::move(s.data_loss_);
+    s.data_loss_ = false;
+    scope_ = std::move(s.scope_);
+    s.scope_ = 0;
+    state_ = std::move(s.state_);
+  }
+  return *this;
+}
+
+inline bool Status::operator==(const Status& rhs) const {
+  MarkChecked();
+  rhs.MarkChecked();
+  return (code_ == rhs.code_);
+}
+
+inline bool Status::operator!=(const Status& rhs) const {
+  MarkChecked();
+  rhs.MarkChecked();
+  return !(*this == rhs);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/system_clock.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/system_clock.h
new file mode 100644
index 0000000000..7ca92e54e3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/system_clock.h
@@ -0,0 +1,114 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <stdint.h>
+
+#include <memory>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/status.h"
+
+#ifdef _WIN32
+// Windows API macro interference
+#undef GetCurrentTime
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+struct ConfigOptions;
+
+// A SystemClock is an interface used by the rocksdb implementation to access
+// operating system time-related functionality.
+class SystemClock : public Customizable {
+ public:
+  ~SystemClock() override {}
+
+  static const char* Type() { return "SystemClock"; }
+  static Status CreateFromString(const ConfigOptions& options,
+                                 const std::string& value,
+                                 std::shared_ptr<SystemClock>* result);
+  // The name of this system clock
+  virtual const char* Name() const override = 0;
+
+  // The name/nickname for the Default SystemClock.  This name can be used
+  // to determine if the clock is the default one.
+  static const char* kDefaultName() { return "DefaultClock"; }
+
+  // Return a default SystemClock suitable for the current operating
+  // system.
+  static const std::shared_ptr<SystemClock>& Default();
+
+  // Returns the number of micro-seconds since some fixed point in time.
+  // It is often used as system time such as in GenericRateLimiter
+  // and other places so a port needs to return system time in order to work.
+  virtual uint64_t NowMicros() = 0;
+
+  // Returns the number of nano-seconds since some fixed point in time. Only
+  // useful for computing deltas of time in one run.
+  // Default implementation simply relies on NowMicros.
+  // In platform-specific implementations, NowNanos() should return time points
+  // that are MONOTONIC.
+  virtual uint64_t NowNanos() { return NowMicros() * 1000; }
+
+  // Returns the number of micro-seconds of CPU time used by the current thread.
+  // 0 indicates not supported.
+  virtual uint64_t CPUMicros() { return 0; }
+
+  // Returns the number of nano-seconds of CPU time used by the current thread.
+  // Default implementation simply relies on CPUMicros.
+  // 0 indicates not supported.
+  virtual uint64_t CPUNanos() { return CPUMicros() * 1000; }
+
+  // Sleep/delay the thread for the prescribed number of micro-seconds.
+  virtual void SleepForMicroseconds(int micros) = 0;
+
+  // Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC).
+  // Only overwrites *unix_time on success.
+  virtual Status GetCurrentTime(int64_t* unix_time) = 0;
+
+  // Converts seconds-since-Jan-01-1970 to a printable string
+  virtual std::string TimeToString(uint64_t time) = 0;
+};
+
+// Wrapper class for a SystemClock.  Redirects all methods (except Name)
+// of the SystemClock interface to the target/wrapped class.
+class SystemClockWrapper : public SystemClock {
+ public:
+  explicit SystemClockWrapper(const std::shared_ptr<SystemClock>& t);
+
+  uint64_t NowMicros() override { return target_->NowMicros(); }
+
+  uint64_t NowNanos() override { return target_->NowNanos(); }
+
+  uint64_t CPUMicros() override { return target_->CPUMicros(); }
+
+  uint64_t CPUNanos() override { return target_->CPUNanos(); }
+
+  virtual void SleepForMicroseconds(int micros) override {
+    return target_->SleepForMicroseconds(micros);
+  }
+
+  Status GetCurrentTime(int64_t* unix_time) override {
+    return target_->GetCurrentTime(unix_time);
+  }
+
+  std::string TimeToString(uint64_t time) override {
+    return target_->TimeToString(time);
+  }
+
+  Status PrepareOptions(const ConfigOptions& options) override;
+  std::string SerializeOptions(const ConfigOptions& config_options,
+                               const std::string& header) const override;
+  const Customizable* Inner() const override { return target_.get(); }
+
+ protected:
+  std::shared_ptr<SystemClock> target_;
+};
+
+}  // end namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/table.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/table.h
new file mode 100644
index 0000000000..6e8f60577a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/table.h
@@ -0,0 +1,927 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Currently we support two types of tables: plain table and block-based table.
+//   1. Block-based table: this is the default table type that we inherited from
+//      LevelDB, which was designed for storing data in hard disk or flash
+//      device.
+//   2. Plain table: it is one of RocksDB's SST file format optimized
+//      for low query latency on pure-memory or really low-latency media.
+//
+// A tutorial of rocksdb table formats is available here:
+//   https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats
+//
+// Example code is also available
+//   https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats#wiki-examples
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "rocksdb/cache.h"
+#include "rocksdb/customizable.h"
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// -- Block-based Table
+class Cache;
+class FilterPolicy;
+class FlushBlockPolicyFactory;
+class PersistentCache;
+class RandomAccessFile;
+struct TableReaderOptions;
+struct TableBuilderOptions;
+class TableBuilder;
+class TableFactory;
+class TableReader;
+class WritableFileWriter;
+struct ConfigOptions;
+struct EnvOptions;
+
+// Types of checksums to use for checking integrity of logical blocks within
+// files. All checksums currently use 32 bits of checking power (1 in 4B
+// chance of failing to detect random corruption).
+enum ChecksumType : char {
+  kNoChecksum = 0x0,
+  kCRC32c = 0x1,
+  kxxHash = 0x2,
+  kxxHash64 = 0x3,
+  kXXH3 = 0x4,  // Supported since RocksDB 6.27
+};
+
+// `PinningTier` is used to specify which tier of block-based tables should
+// be affected by a block cache pinning setting (see
+// `MetadataCacheOptions` below).
+enum class PinningTier {
+  // For compatibility, this value specifies to fallback to the behavior
+  // indicated by the deprecated options,
+  // `pin_l0_filter_and_index_blocks_in_cache` and
+  // `pin_top_level_index_and_filter`.
+  kFallback,
+
+  // This tier contains no block-based tables.
+  kNone,
+
+  // This tier contains block-based tables that may have originated from a
+  // memtable flush. In particular, it includes tables from L0 that are smaller
+  // than 1.5 times the current `write_buffer_size`. Note these criteria imply
+  // it can include intra-L0 compaction outputs and ingested files, as long as
+  // they are not abnormally large compared to flushed files in L0.
+  kFlushedAndSimilar,
+
+  // This tier contains all block-based tables.
+  kAll,
+};
+
+// `MetadataCacheOptions` contains members indicating the desired caching
+// behavior for the different categories of metadata blocks.
+struct MetadataCacheOptions {
+  // The tier of block-based tables whose top-level index into metadata
+  // partitions will be pinned. Currently indexes and filters may be
+  // partitioned.
+  //
+  // Note `cache_index_and_filter_blocks` must be true for this option to have
+  // any effect. Otherwise any top-level index into metadata partitions would be
+  // held in table reader memory, outside the block cache.
+  PinningTier top_level_index_pinning = PinningTier::kFallback;
+
+  // The tier of block-based tables whose metadata partitions will be pinned.
+  // Currently indexes and filters may be partitioned.
+  PinningTier partition_pinning = PinningTier::kFallback;
+
+  // The tier of block-based tables whose unpartitioned metadata blocks will be
+  // pinned.
+  //
+  // Note `cache_index_and_filter_blocks` must be true for this option to have
+  // any effect. Otherwise the unpartitioned meta-blocks would be held in table
+  // reader memory, outside the block cache.
+  PinningTier unpartitioned_pinning = PinningTier::kFallback;
+};
+
+struct CacheEntryRoleOptions {
+  enum class Decision {
+    kEnabled,
+    kDisabled,
+    kFallback,
+  };
+  Decision charged = Decision::kFallback;
+  bool operator==(const CacheEntryRoleOptions& other) const {
+    return charged == other.charged;
+  }
+};
+
+struct CacheUsageOptions {
+  CacheEntryRoleOptions options;
+  std::map<CacheEntryRole, CacheEntryRoleOptions> options_overrides;
+};
+
+// For advanced user only
+struct BlockBasedTableOptions {
+  static const char* kName() { return "BlockTableOptions"; };
+  // @flush_block_policy_factory creates the instances of flush block policy.
+  // which provides a configurable way to determine when to flush a block in
+  // the block based tables.  If not set, table builder will use the default
+  // block flush policy, which cut blocks by block size (please refer to
+  // `FlushBlockBySizePolicy`).
+  std::shared_ptr<FlushBlockPolicyFactory> flush_block_policy_factory;
+
+  // TODO(kailiu) Temporarily disable this feature by making the default value
+  // to be false.
+  //
+  // TODO(ajkr) we need to update names of variables controlling meta-block
+  // caching as they should now apply to range tombstone and compression
+  // dictionary meta-blocks, in addition to index and filter meta-blocks.
+  //
+  // Whether to put index/filter blocks in the block cache. When false,
+  // each "table reader" object will pre-load index/filter blocks during
+  // table initialization. Index and filter partition blocks always use
+  // block cache regardless of this option.
+  bool cache_index_and_filter_blocks = false;
+
+  // If cache_index_and_filter_blocks is enabled, cache index and filter
+  // blocks with high priority. If set to true, depending on implementation of
+  // block cache, index, filter, and other metadata blocks may be less likely
+  // to be evicted than data blocks.
+  bool cache_index_and_filter_blocks_with_high_priority = true;
+
+  // DEPRECATED: This option will be removed in a future version. For now, this
+  // option still takes effect by updating each of the following variables that
+  // has the default value, `PinningTier::kFallback`:
+  //
+  // - `MetadataCacheOptions::partition_pinning`
+  // - `MetadataCacheOptions::unpartitioned_pinning`
+  //
+  // The updated value is chosen as follows:
+  //
+  // - `pin_l0_filter_and_index_blocks_in_cache == false` ->
+  //   `PinningTier::kNone`
+  // - `pin_l0_filter_and_index_blocks_in_cache == true` ->
+  //   `PinningTier::kFlushedAndSimilar`
+  //
+  // To migrate away from this flag, explicitly configure
+  // `MetadataCacheOptions` as described above.
+  //
+  // if cache_index_and_filter_blocks is true and the below is true, then
+  // filter and index blocks are stored in the cache, but a reference is
+  // held in the "table reader" object so the blocks are pinned and only
+  // evicted from cache when the table reader is freed.
+  bool pin_l0_filter_and_index_blocks_in_cache = false;
+
+  // DEPRECATED: This option will be removed in a future version. For now, this
+  // option still takes effect by updating
+  // `MetadataCacheOptions::top_level_index_pinning` when it has the
+  // default value, `PinningTier::kFallback`.
+  //
+  // The updated value is chosen as follows:
+  //
+  // - `pin_top_level_index_and_filter == false` ->
+  //   `PinningTier::kNone`
+  // - `pin_top_level_index_and_filter == true` ->
+  //   `PinningTier::kAll`
+  //
+  // To migrate away from this flag, explicitly configure
+  // `MetadataCacheOptions` as described above.
+  //
+  // If cache_index_and_filter_blocks is true and the below is true, then
+  // the top-level index of partitioned filter and index blocks are stored in
+  // the cache, but a reference is held in the "table reader" object so the
+  // blocks are pinned and only evicted from cache when the table reader is
+  // freed. This is not limited to l0 in LSM tree.
+  bool pin_top_level_index_and_filter = true;
+
+  // The desired block cache pinning behavior for the different categories of
+  // metadata blocks. While pinning can reduce block cache contention, users
+  // must take care not to pin excessive amounts of data, which risks
+  // overflowing block cache.
+  MetadataCacheOptions metadata_cache_options;
+
+  // The index type that will be used for this table.
+  enum IndexType : char {
+    // A space efficient index block that is optimized for
+    // binary-search-based index.
+    kBinarySearch = 0x00,
+
+    // The hash index, if enabled, will do the hash lookup when
+    // `Options.prefix_extractor` is provided.
+    kHashSearch = 0x01,
+
+    // A two-level index implementation. Both levels are binary search indexes.
+    // Second level index blocks ("partitions") use block cache even when
+    // cache_index_and_filter_blocks=false.
+    kTwoLevelIndexSearch = 0x02,
+
+    // Like kBinarySearch, but index also contains first key of each block.
+    // This allows iterators to defer reading the block until it's actually
+    // needed. May significantly reduce read amplification of short range scans.
+    // Without it, iterator seek usually reads one block from each level-0 file
+    // and from each level, which may be expensive.
+    // Works best in combination with:
+    //  - IndexShorteningMode::kNoShortening,
+    //  - custom FlushBlockPolicy to cut blocks at some meaningful boundaries,
+    //    e.g. when prefix changes.
+    // Makes the index significantly bigger (2x or more), especially when keys
+    // are long.
+    kBinarySearchWithFirstKey = 0x03,
+  };
+
+  IndexType index_type = kBinarySearch;
+
+  // The index type that will be used for the data block.
+  enum DataBlockIndexType : char {
+    kDataBlockBinarySearch = 0,   // traditional block type
+    kDataBlockBinaryAndHash = 1,  // additional hash index
+  };
+
+  DataBlockIndexType data_block_index_type = kDataBlockBinarySearch;
+
+  // #entries/#buckets. It is valid only when data_block_hash_index_type is
+  // kDataBlockBinaryAndHash.
+  double data_block_hash_table_util_ratio = 0.75;
+
+  // Option hash_index_allow_collision is now deleted.
+  // It will behave as if hash_index_allow_collision=true.
+
+  // Use the specified checksum type. Newly created table files will be
+  // protected with this checksum type. Old table files will still be readable,
+  // even though they have different checksum type.
+  ChecksumType checksum = kXXH3;
+
+  // Disable block cache. If this is set to true,
+  // then no block cache should be used, and the block_cache should
+  // point to a nullptr object.
+  bool no_block_cache = false;
+
+  // If non-NULL use the specified cache for blocks.
+  // If NULL, rocksdb will automatically create and use a 32MB internal cache.
+  std::shared_ptr<Cache> block_cache = nullptr;
+
+  // If non-NULL use the specified cache for pages read from device
+  // IF NULL, no page cache is used
+  std::shared_ptr<PersistentCache> persistent_cache = nullptr;
+
+  // Approximate size of user data packed per block.  Note that the
+  // block size specified here corresponds to uncompressed data.  The
+  // actual size of the unit read from disk may be smaller if
+  // compression is enabled.  This parameter can be changed dynamically.
+  uint64_t block_size = 4 * 1024;
+
+  // This is used to close a block before it reaches the configured
+  // 'block_size'. If the percentage of free space in the current block is less
+  // than this specified number and adding a new record to the block will
+  // exceed the configured block size, then this block will be closed and the
+  // new record will be written to the next block.
+  int block_size_deviation = 10;
+
+  // Number of keys between restart points for delta encoding of keys.
+  // This parameter can be changed dynamically.  Most clients should
+  // leave this parameter alone.  The minimum value allowed is 1.  Any smaller
+  // value will be silently overwritten with 1.
+  int block_restart_interval = 16;
+
+  // Same as block_restart_interval but used for the index block.
+  int index_block_restart_interval = 1;
+
+  // Block size for partitioned metadata. Currently applied to indexes when
+  // kTwoLevelIndexSearch is used and to filters when partition_filters is used.
+  // Note: Since in the current implementation the filters and index partitions
+  // are aligned, an index/filter block is created when either index or filter
+  // block size reaches the specified limit.
+  // Note: this limit is currently applied to only index blocks; a filter
+  // partition is cut right after an index block is cut
+  // TODO(myabandeh): remove the note above when filter partitions are cut
+  // separately
+  uint64_t metadata_block_size = 4096;
+
+  // `cache_usage_options` allows users to specify the default
+  // options (`cache_usage_options.options`) and the overriding
+  // options (`cache_usage_options.options_overrides`)
+  // for different `CacheEntryRole` under various features related to cache
+  // usage.
+  //
+  // For a certain `CacheEntryRole role` and a certain feature `f` of
+  // `CacheEntryRoleOptions`:
+  // 1. If `options_overrides` has an entry for `role` and
+  // `options_overrides[role].f != kFallback`, we use
+  // `options_overrides[role].f`
+  // 2. Otherwise, if `options[role].f != kFallback`, we use `options[role].f`
+  // 3. Otherwise, we follow the compatible existing behavior for `f` (see
+  // each feature's comment for more)
+  //
+  // `cache_usage_options` currently supports specifying options for the
+  // following features:
+  //
+  // 1. Memory charging to block cache (`CacheEntryRoleOptions::charged`)
+  // Memory charging is a feature of accounting memory usage of specific area
+  // (represented by `CacheEntryRole`) toward usage in block cache (if
+  // available), by updating a dynamical charge to the block cache loosely based
+  // on the actual memory usage of that area.
+  //
+  // (a) CacheEntryRole::kCompressionDictionaryBuildingBuffer
+  // (i) If kEnabled:
+  // Charge memory usage of the buffered data used as training samples for
+  // dictionary compression.
+  // If such memory usage exceeds the avaible space left in the block cache
+  // at some point (i.e, causing a cache full under
+  // `LRUCacheOptions::strict_capacity_limit` = true), the data will then be
+  // unbuffered.
+  // (ii) If kDisabled:
+  // Does not charge the memory usage mentioned above.
+  // (iii) Compatible existing behavior:
+  // Same as kEnabled.
+  //
+  // (b) CacheEntryRole::kFilterConstruction
+  // (i) If kEnabled:
+  // Charge memory usage of Bloom Filter
+  // (format_version >= 5) and Ribbon Filter construction.
+  // If additional temporary memory of Ribbon Filter exceeds the avaible
+  // space left in the block cache at some point (i.e, causing a cache full
+  // under `LRUCacheOptions::strict_capacity_limit` = true),
+  // construction will fall back to Bloom Filter.
+  // (ii) If kDisabled:
+  // Does not charge the memory usage mentioned above.
+  // (iii) Compatible existing behavior:
+  // Same as kDisabled.
+  //
+  // (c) CacheEntryRole::kBlockBasedTableReader
+  // (i) If kEnabled:
+  // Charge memory usage of table properties +
+  // index block/filter block/uncompression dictionary (when stored in table
+  // reader i.e, BlockBasedTableOptions::cache_index_and_filter_blocks ==
+  // false) + some internal data structures during table reader creation.
+  // If such a table reader exceeds
+  // the avaible space left in the block cache at some point (i.e, causing
+  // a cache full under `LRUCacheOptions::strict_capacity_limit` = true),
+  // creation will fail with Status::MemoryLimit().
+  // (ii) If kDisabled:
+  // Does not charge the memory usage mentioned above.
+  // (iii) Compatible existing behavior:
+  // Same as kDisabled.
+  //
+  // (d) CacheEntryRole::kFileMetadata
+  // (i) If kEnabled:
+  // Charge memory usage of file metadata. RocksDB holds one file metadata
+  // structure in-memory per on-disk table file.
+  // If such file metadata's
+  // memory exceeds the avaible space left in the block cache at some point
+  // (i.e, causing a cache full under `LRUCacheOptions::strict_capacity_limit` =
+  // true), creation will fail with Status::MemoryLimit().
+  // (ii) If kDisabled:
+  // Does not charge the memory usage mentioned above.
+  // (iii) Compatible existing behavior:
+  // Same as kDisabled.
+  //
+  // (e) Other CacheEntryRole
+  // Not supported.
+  // `Status::kNotSupported` will be returned if
+  // `CacheEntryRoleOptions::charged` is set to {`kEnabled`, `kDisabled`}.
+  //
+  //
+  // 2. More to come ...
+  //
+  CacheUsageOptions cache_usage_options;
+
+  // Note: currently this option requires kTwoLevelIndexSearch to be set as
+  // well.
+  // TODO(myabandeh): remove the note above once the limitation is lifted
+  // Use partitioned full filters for each SST file. This option is
+  // incompatible with block-based filters. Filter partition blocks use
+  // block cache even when cache_index_and_filter_blocks=false.
+  bool partition_filters = false;
+
+  // Option to generate Bloom/Ribbon filters that minimize memory
+  // internal fragmentation.
+  //
+  // When false, malloc_usable_size is not available, or format_version < 5,
+  // filters are generated without regard to internal fragmentation when
+  // loaded into memory (historical behavior). When true (and
+  // malloc_usable_size is available and format_version >= 5), then
+  // filters are generated to "round up" and "round down" their sizes to
+  // minimize internal fragmentation when loaded into memory, assuming the
+  // reading DB has the same memory allocation characteristics as the
+  // generating DB. This option does not break forward or backward
+  // compatibility.
+  //
+  // While individual filters will vary in bits/key and false positive rate
+  // when setting is true, the implementation attempts to maintain a weighted
+  // average FP rate for filters consistent with this option set to false.
+  //
+  // With Jemalloc for example, this setting is expected to save about 10% of
+  // the memory footprint and block cache charge of filters, while increasing
+  // disk usage of filters by about 1-2% due to encoding efficiency losses
+  // with variance in bits/key.
+  //
+  // NOTE: Because some memory counted by block cache might be unmapped pages
+  // within internal fragmentation, this option can increase observed RSS
+  // memory usage. With cache_index_and_filter_blocks=true, this option makes
+  // the block cache better at using space it is allowed. (These issues
+  // should not arise with partitioned filters.)
+  //
+  // NOTE: Do not set to true if you do not trust malloc_usable_size. With
+  // this option, RocksDB might access an allocated memory object beyond its
+  // original size if malloc_usable_size says it is safe to do so. While this
+  // can be considered bad practice, it should not produce undefined behavior
+  // unless malloc_usable_size is buggy or broken.
+  bool optimize_filters_for_memory = false;
+
+  // Use delta encoding to compress keys in blocks.
+  // ReadOptions::pin_data requires this option to be disabled.
+  //
+  // Default: true
+  bool use_delta_encoding = true;
+
+  // If non-nullptr, use the specified filter policy to reduce disk reads.
+  // Many applications will benefit from passing the result of
+  // NewBloomFilterPolicy() here.
+  std::shared_ptr<const FilterPolicy> filter_policy = nullptr;
+
+  // If true, place whole keys in the filter (not just prefixes).
+  // This must generally be true for gets to be efficient.
+  bool whole_key_filtering = true;
+
+  // If true, detect corruption during Bloom Filter (format_version >= 5)
+  // and Ribbon Filter construction.
+  //
+  // This is an extra check that is only
+  // useful in detecting software bugs or CPU+memory malfunction.
+  // Turning on this feature increases filter construction time by 30%.
+  //
+  // This parameter can be changed dynamically by
+  // DB::SetOptions({{"block_based_table_factory",
+  //                  "{detect_filter_construct_corruption=true;}"}});
+  //
+  // TODO: optimize this performance
+  bool detect_filter_construct_corruption = false;
+
+  // Verify that decompressing the compressed block gives back the input. This
+  // is a verification mode that we use to detect bugs in compression
+  // algorithms.
+  bool verify_compression = false;
+
+  // If used, For every data block we load into memory, we will create a bitmap
+  // of size ((block_size / `read_amp_bytes_per_bit`) / 8) bytes. This bitmap
+  // will be used to figure out the percentage we actually read of the blocks.
+  //
+  // When this feature is used Tickers::READ_AMP_ESTIMATE_USEFUL_BYTES and
+  // Tickers::READ_AMP_TOTAL_READ_BYTES can be used to calculate the
+  // read amplification using this formula
+  // (READ_AMP_TOTAL_READ_BYTES / READ_AMP_ESTIMATE_USEFUL_BYTES)
+  //
+  // value  =>  memory usage (percentage of loaded blocks memory)
+  // 1      =>  12.50 %
+  // 2      =>  06.25 %
+  // 4      =>  03.12 %
+  // 8      =>  01.56 %
+  // 16     =>  00.78 %
+  //
+  // Note: This number must be a power of 2, if not it will be sanitized
+  // to be the next lowest power of 2, for example a value of 7 will be
+  // treated as 4, a value of 19 will be treated as 16.
+  //
+  // Default: 0 (disabled)
+  uint32_t read_amp_bytes_per_bit = 0;
+
+  // We currently have these versions:
+  // 0 -- This version can be read by really old RocksDB's. Doesn't support
+  // changing checksum type (default is CRC32).
+  // 1 -- Can be read by RocksDB's versions since 3.0. Supports non-default
+  // checksum, like xxHash. It is written by RocksDB when
+  // BlockBasedTableOptions::checksum is something other than kCRC32c. (version
+  // 0 is silently upconverted)
+  // 2 -- Can be read by RocksDB's versions since 3.10. Changes the way we
+  // encode compressed blocks with LZ4, BZip2 and Zlib compression. If you
+  // don't plan to run RocksDB before version 3.10, you should probably use
+  // this.
+  // 3 -- Can be read by RocksDB's versions since 5.15. Changes the way we
+  // encode the keys in index blocks. If you don't plan to run RocksDB before
+  // version 5.15, you should probably use this.
+  // This option only affects newly written tables. When reading existing
+  // tables, the information about version is read from the footer.
+  // 4 -- Can be read by RocksDB's versions since 5.16. Changes the way we
+  // encode the values in index blocks. If you don't plan to run RocksDB before
+  // version 5.16 and you are using index_block_restart_interval > 1, you should
+  // probably use this as it would reduce the index size.
+  // This option only affects newly written tables. When reading existing
+  // tables, the information about version is read from the footer.
+  // 5 -- Can be read by RocksDB's versions since 6.6.0. Full and partitioned
+  // filters use a generally faster and more accurate Bloom filter
+  // implementation, with a different schema.
+  uint32_t format_version = 5;
+
+  // Store index blocks on disk in compressed format. Changing this option to
+  // false  will avoid the overhead of decompression if index blocks are evicted
+  // and read back
+  bool enable_index_compression = true;
+
+  // Align data blocks on lesser of page size and block size
+  bool block_align = false;
+
+  // This enum allows trading off increased index size for improved iterator
+  // seek performance in some situations, particularly when block cache is
+  // disabled (ReadOptions::fill_cache = false) and direct IO is
+  // enabled (DBOptions::use_direct_reads = true).
+  // The default mode is the best tradeoff for most use cases.
+  // This option only affects newly written tables.
+  //
+  // The index contains a key separating each pair of consecutive blocks.
+  // Let A be the highest key in one block, B the lowest key in the next block,
+  // and I the index entry separating these two blocks:
+  // [ ... A] I [B ...]
+  // I is allowed to be anywhere in [A, B).
+  // If an iterator is seeked to a key in (A, I], we'll unnecessarily read the
+  // first block, then immediately fall through to the second block.
+  // However, if I=A, this can't happen, and we'll read only the second block.
+  // In kNoShortening mode, we use I=A. In other modes, we use the shortest
+  // key in [A, B), which usually significantly reduces index size.
+  //
+  // There's a similar story for the last index entry, which is an upper bound
+  // of the highest key in the file. If it's shortened and therefore
+  // overestimated, iterator is likely to unnecessarily read the last data block
+  // from each file on each seek.
+  enum class IndexShorteningMode : char {
+    // Use full keys.
+    kNoShortening,
+    // Shorten index keys between blocks, but use full key for the last index
+    // key, which is the upper bound of the whole file.
+    kShortenSeparators,
+    // Shorten both keys between blocks and key after last block.
+    kShortenSeparatorsAndSuccessor,
+  };
+
+  IndexShorteningMode index_shortening =
+      IndexShorteningMode::kShortenSeparators;
+
+  // RocksDB does auto-readahead for iterators on noticing more than two reads
+  // for a table file if user doesn't provide readahead_size. The readahead
+  // starts at BlockBasedTableOptions.initial_auto_readahead_size (default: 8KB)
+  // and doubles on every additional read upto max_auto_readahead_size and
+  // max_auto_readahead_size can be configured.
+  //
+  // Special Value: 0 - If max_auto_readahead_size is set 0 then it will disable
+  // the implicit auto prefetching.
+  // If max_auto_readahead_size provided is less
+  // than initial_auto_readahead_size, then RocksDB will sanitize the
+  // initial_auto_readahead_size and set it to max_auto_readahead_size.
+  //
+  // Value should be provided along with KB i.e. 256 * 1024 as it will prefetch
+  // the blocks.
+  //
+  // Found that 256 KB readahead size provides the best performance, based on
+  // experiments, for auto readahead. Experiment data is in PR #3282.
+  //
+  // This parameter can be changed dynamically by
+  // DB::SetOptions({{"block_based_table_factory",
+  //                  "{max_auto_readahead_size=0;}"}}));
+  //
+  // Changing the value dynamically will only affect files opened after the
+  // change.
+  //
+  // Default: 256 KB (256 * 1024).
+  size_t max_auto_readahead_size = 256 * 1024;
+
+  // If enabled, prepopulate warm/hot blocks (data, uncompressed dict, index and
+  // filter blocks) which are already in memory into block cache at the time of
+  // flush. On a flush, the block that is in memory (in memtables) get flushed
+  // to the device. If using Direct IO, additional IO is incurred to read this
+  // data back into memory again, which is avoided by enabling this option. This
+  // further helps if the workload exhibits high temporal locality, where most
+  // of the reads go to recently written data. This also helps in case of
+  // Distributed FileSystem.
+  //
+  // This parameter can be changed dynamically by
+  // DB::SetOptions({{"block_based_table_factory",
+  //                  "{prepopulate_block_cache=kFlushOnly;}"}}));
+  enum class PrepopulateBlockCache : char {
+    // Disable prepopulate block cache.
+    kDisable,
+    // Prepopulate blocks during flush only.
+    kFlushOnly,
+  };
+
+  PrepopulateBlockCache prepopulate_block_cache =
+      PrepopulateBlockCache::kDisable;
+
+  // RocksDB does auto-readahead for iterators on noticing more than two reads
+  // for a table file if user doesn't provide readahead_size. The readahead size
+  // starts at initial_auto_readahead_size and doubles on every additional read
+  // upto BlockBasedTableOptions.max_auto_readahead_size.
+  // max_auto_readahead_size can also be configured.
+  //
+  // Scenarios:
+  // - If initial_auto_readahead_size is set 0 then it will disabled the
+  //   implicit auto prefetching irrespective of max_auto_readahead_size.
+  // - If max_auto_readahead_size is set 0, it will disable the internal
+  //    prefetching irrespective of initial_auto_readahead_size.
+  // - If initial_auto_readahead_size > max_auto_readahead_size, then RocksDB
+  //   will sanitize the value of initial_auto_readahead_size to
+  //   max_auto_readahead_size and readahead_size will be
+  //   max_auto_readahead_size.
+  //
+  // Value should be provided along with KB i.e. 8 * 1024 as it will prefetch
+  // the blocks.
+  //
+  // This parameter can be changed dynamically by
+  // DB::SetOptions({{"block_based_table_factory",
+  //                  "{initial_auto_readahead_size=0;}"}}));
+  //
+  // Changing the value dynamically will only affect files opened after the
+  // change.
+  //
+  // Default: 8 KB (8 * 1024).
+  size_t initial_auto_readahead_size = 8 * 1024;
+
+  // RocksDB does auto-readahead for iterators on noticing more than two reads
+  // for a table file if user doesn't provide readahead_size and reads are
+  // sequential.
+  // num_file_reads_for_auto_readahead indicates after how many
+  // sequential reads internal auto prefetching should be start.
+  //
+  // For example, if value is 2 then after reading 2 sequential data blocks on
+  // third data block prefetching will start.
+  // If set 0, it will start prefetching from the first read.
+  //
+  // This parameter can be changed dynamically by
+  // DB::SetOptions({{"block_based_table_factory",
+  //                  "{num_file_reads_for_auto_readahead=0;}"}}));
+  //
+  // Changing the value dynamically will only affect files opened after the
+  // change.
+  //
+  // Default: 2
+  uint64_t num_file_reads_for_auto_readahead = 2;
+};
+
+// Table Properties that are specific to block-based table properties.
+struct BlockBasedTablePropertyNames {
+  // value of this properties is a fixed int32 number.
+  static const std::string kIndexType;
+  // value is "1" for true and "0" for false.
+  static const std::string kWholeKeyFiltering;
+  // value is "1" for true and "0" for false.
+  static const std::string kPrefixFiltering;
+};
+
+// Create default block based table factory.
+extern TableFactory* NewBlockBasedTableFactory(
+    const BlockBasedTableOptions& table_options = BlockBasedTableOptions());
+
+
+enum EncodingType : char {
+  // Always write full keys without any special encoding.
+  kPlain,
+  // Find opportunity to write the same prefix once for multiple rows.
+  // In some cases, when a key follows a previous key with the same prefix,
+  // instead of writing out the full key, it just writes out the size of the
+  // shared prefix, as well as other bytes, to save some bytes.
+  //
+  // When using this option, the user is required to use the same prefix
+  // extractor to make sure the same prefix will be extracted from the same key.
+  // The Name() value of the prefix extractor will be stored in the file. When
+  // reopening the file, the name of the options.prefix_extractor given will be
+  // bitwise compared to the prefix extractors stored in the file. An error
+  // will be returned if the two don't match.
+  kPrefix,
+};
+
+// Table Properties that are specific to plain table properties.
+struct PlainTablePropertyNames {
+  static const std::string kEncodingType;
+  static const std::string kBloomVersion;
+  static const std::string kNumBloomBlocks;
+};
+
+const uint32_t kPlainTableVariableLength = 0;
+
+struct PlainTableOptions {
+  static const char* kName() { return "PlainTableOptions"; };
+  // @user_key_len: plain table has optimization for fix-sized keys, which can
+  //                be specified via user_key_len.  Alternatively, you can pass
+  //                `kPlainTableVariableLength` if your keys have variable
+  //                lengths.
+  uint32_t user_key_len = kPlainTableVariableLength;
+
+  // @bloom_bits_per_key: the number of bits used for bloom filer per prefix.
+  //                      You may disable it by passing a zero.
+  int bloom_bits_per_key = 10;
+
+  // @hash_table_ratio: the desired utilization of the hash table used for
+  //                    prefix hashing.
+  //                    hash_table_ratio = number of prefixes / #buckets in the
+  //                    hash table
+  double hash_table_ratio = 0.75;
+
+  // @index_sparseness: inside each prefix, need to build one index record for
+  //                    how many keys for binary search inside each hash bucket.
+  //                    For encoding type kPrefix, the value will be used when
+  //                    writing to determine an interval to rewrite the full
+  //                    key. It will also be used as a suggestion and satisfied
+  //                    when possible.
+  size_t index_sparseness = 16;
+
+  // @huge_page_tlb_size: if <=0, allocate hash indexes and blooms from malloc.
+  //                      Otherwise from huge page TLB. The user needs to
+  //                      reserve huge pages for it to be allocated, like:
+  //                          sysctl -w vm.nr_hugepages=20
+  //                      See linux doc Documentation/vm/hugetlbpage.txt
+  size_t huge_page_tlb_size = 0;
+
+  // @encoding_type: how to encode the keys. See enum EncodingType above for
+  //                 the choices. The value will determine how to encode keys
+  //                 when writing to a new SST file. This value will be stored
+  //                 inside the SST file which will be used when reading from
+  //                 the file, which makes it possible for users to choose
+  //                 different encoding type when reopening a DB. Files with
+  //                 different encoding types can co-exist in the same DB and
+  //                 can be read.
+  EncodingType encoding_type = kPlain;
+
+  // @full_scan_mode: mode for reading the whole file one record by one without
+  //                  using the index.
+  bool full_scan_mode = false;
+
+  // @store_index_in_file: compute plain table index and bloom filter during
+  //                       file building and store it in file. When reading
+  //                       file, index will be mapped instead of recomputation.
+  bool store_index_in_file = false;
+};
+
+// -- Plain Table with prefix-only seek
+// For this factory, you need to set Options.prefix_extractor properly to make
+// it work. Look-up will starts with prefix hash lookup for key prefix. Inside
+// the hash bucket found, a binary search is executed for hash conflicts.
+// Finally, a linear search is used.
+
+extern TableFactory* NewPlainTableFactory(
+    const PlainTableOptions& options = PlainTableOptions());
+
+struct CuckooTablePropertyNames {
+  // The key that is used to fill empty buckets.
+  static const std::string kEmptyKey;
+  // Fixed length of value.
+  static const std::string kValueLength;
+  // Number of hash functions used in Cuckoo Hash.
+  static const std::string kNumHashFunc;
+  // It denotes the number of buckets in a Cuckoo Block. Given a key and a
+  // particular hash function, a Cuckoo Block is a set of consecutive buckets,
+  // where starting bucket id is given by the hash function on the key. In case
+  // of a collision during inserting the key, the builder tries to insert the
+  // key in other locations of the cuckoo block before using the next hash
+  // function. This reduces cache miss during read operation in case of
+  // collision.
+  static const std::string kCuckooBlockSize;
+  // Size of the hash table. Use this number to compute the modulo of hash
+  // function. The actual number of buckets will be kMaxHashTableSize +
+  // kCuckooBlockSize - 1. The last kCuckooBlockSize-1 buckets are used to
+  // accommodate the Cuckoo Block from end of hash table, due to cache friendly
+  // implementation.
+  static const std::string kHashTableSize;
+  // Denotes if the key sorted in the file is Internal Key (if false)
+  // or User Key only (if true).
+  static const std::string kIsLastLevel;
+  // Indicate if using identity function for the first hash function.
+  static const std::string kIdentityAsFirstHash;
+  // Indicate if using module or bit and to calculate hash value
+  static const std::string kUseModuleHash;
+  // Fixed user key length
+  static const std::string kUserKeyLength;
+};
+
+struct CuckooTableOptions {
+  static const char* kName() { return "CuckooTableOptions"; };
+
+  // Determines the utilization of hash tables. Smaller values
+  // result in larger hash tables with fewer collisions.
+  double hash_table_ratio = 0.9;
+  // A property used by builder to determine the depth to go to
+  // to search for a path to displace elements in case of
+  // collision. See Builder.MakeSpaceForKey method. Higher
+  // values result in more efficient hash tables with fewer
+  // lookups but take more time to build.
+  uint32_t max_search_depth = 100;
+  // In case of collision while inserting, the builder
+  // attempts to insert in the next cuckoo_block_size
+  // locations before skipping over to the next Cuckoo hash
+  // function. This makes lookups more cache friendly in case
+  // of collisions.
+  uint32_t cuckoo_block_size = 5;
+  // If this option is enabled, user key is treated as uint64_t and its value
+  // is used as hash value directly. This option changes builder's behavior.
+  // Reader ignore this option and behave according to what specified in table
+  // property.
+  bool identity_as_first_hash = false;
+  // If this option is set to true, module is used during hash calculation.
+  // This often yields better space efficiency at the cost of performance.
+  // If this option is set to false, # of entries in table is constrained to be
+  // power of two, and bit and is used to calculate hash, which is faster in
+  // general.
+  bool use_module_hash = true;
+};
+
+// Cuckoo Table Factory for SST table format using Cache Friendly Cuckoo Hashing
+extern TableFactory* NewCuckooTableFactory(
+    const CuckooTableOptions& table_options = CuckooTableOptions());
+
+
+class RandomAccessFileReader;
+
+// A base class for table factories.
+class TableFactory : public Customizable {
+ public:
+  virtual ~TableFactory() override {}
+
+  static const char* kBlockCacheOpts() { return "BlockCache"; };
+  static const char* kBlockBasedTableName() { return "BlockBasedTable"; };
+  static const char* kPlainTableName() { return "PlainTable"; }
+  static const char* kCuckooTableName() { return "CuckooTable"; };
+
+  // Creates and configures a new TableFactory from the input options and id.
+  static Status CreateFromString(const ConfigOptions& config_options,
+                                 const std::string& id,
+                                 std::shared_ptr<TableFactory>* factory);
+
+  static const char* Type() { return "TableFactory"; }
+
+  // Returns a Table object table that can fetch data from file specified
+  // in parameter file. It's the caller's responsibility to make sure
+  // file is in the correct format.
+  //
+  // NewTableReader() is called in three places:
+  // (1) TableCache::FindTable() calls the function when table cache miss
+  //     and cache the table object returned.
+  // (2) SstFileDumper (for SST Dump) opens the table and dump the table
+  //     contents using the iterator of the table.
+  // (3) DBImpl::IngestExternalFile() calls this function to read the contents
+  //     of the sst file it's attempting to add
+  //
+  // table_reader_options is a TableReaderOptions which contain all the
+  //    needed parameters and configuration to open the table.
+  // file is a file handler to handle the file for the table.
+  // file_size is the physical file size of the file.
+  // table_reader is the output table reader.
+  virtual Status NewTableReader(
+      const TableReaderOptions& table_reader_options,
+      std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
+      std::unique_ptr<TableReader>* table_reader,
+      bool prefetch_index_and_filter_in_cache = true) const {
+    ReadOptions ro;
+    return NewTableReader(ro, table_reader_options, std::move(file), file_size,
+                          table_reader, prefetch_index_and_filter_in_cache);
+  }
+
+  // Overload of the above function that allows the caller to pass in a
+  // ReadOptions
+  virtual Status NewTableReader(
+      const ReadOptions& ro, const TableReaderOptions& table_reader_options,
+      std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
+      std::unique_ptr<TableReader>* table_reader,
+      bool prefetch_index_and_filter_in_cache) const = 0;
+
+  // Return a table builder to write to a file for this table type.
+  //
+  // It is called in several places:
+  // (1) When flushing memtable to a level-0 output file, it creates a table
+  //     builder (In DBImpl::WriteLevel0Table(), by calling BuildTable())
+  // (2) During compaction, it gets the builder for writing compaction output
+  //     files in DBImpl::OpenCompactionOutputFile().
+  // (3) When recovering from transaction logs, it creates a table builder to
+  //     write to a level-0 output file (In DBImpl::WriteLevel0TableForRecovery,
+  //     by calling BuildTable())
+  // (4) When running Repairer, it creates a table builder to convert logs to
+  //     SST files (In Repairer::ConvertLogToTable() by calling BuildTable())
+  //
+  // Multiple configured can be accessed from there, including and not limited
+  // to compression options. file is a handle of a writable file.
+  // It is the caller's responsibility to keep the file open and close the file
+  // after closing the table builder. compression_type is the compression type
+  // to use in this table.
+  virtual TableBuilder* NewTableBuilder(
+      const TableBuilderOptions& table_builder_options,
+      WritableFileWriter* file) const = 0;
+
+  // Return is delete range supported
+  virtual bool IsDeleteRangeSupported() const { return false; }
+};
+
+// Create a special table factory that can open either of the supported
+// table formats, based on setting inside the SST files. It should be used to
+// convert a DB from one table format to another.
+// @table_factory_to_write: the table factory used when writing to new files.
+// @block_based_table_factory:  block based table factory to use. If NULL, use
+//                              a default one.
+// @plain_table_factory: plain table factory to use. If NULL, use a default one.
+// @cuckoo_table_factory: cuckoo table factory to use. If NULL, use a default
+// one.
+extern TableFactory* NewAdaptiveTableFactory(
+    std::shared_ptr<TableFactory> table_factory_to_write = nullptr,
+    std::shared_ptr<TableFactory> block_based_table_factory = nullptr,
+    std::shared_ptr<TableFactory> plain_table_factory = nullptr,
+    std::shared_ptr<TableFactory> cuckoo_table_factory = nullptr);
+
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/table_properties.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/table_properties.h
new file mode 100644
index 0000000000..ab259f930b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/table_properties.h
@@ -0,0 +1,332 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#pragma once
+
+#include <stdint.h>
+
+#include <map>
+#include <memory>
+#include <string>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// -- Table Properties
+// Other than basic table properties, each table may also have the user
+// collected properties.
+// The value of the user-collected properties are encoded as raw bytes --
+// users have to interpret these values by themselves.
+// Note: To do prefix seek/scan in `UserCollectedProperties`, you can do
+// something similar to:
+//
+// UserCollectedProperties props = ...;
+// for (auto pos = props.lower_bound(prefix);
+//      pos != props.end() && pos->first.compare(0, prefix.size(), prefix) == 0;
+//      ++pos) {
+//   ...
+// }
+using UserCollectedProperties = std::map<std::string, std::string>;
+
+// table properties' human-readable names in the property block.
+struct TablePropertiesNames {
+  static const std::string kDbId;
+  static const std::string kDbSessionId;
+  static const std::string kDbHostId;
+  static const std::string kOriginalFileNumber;
+  static const std::string kDataSize;
+  static const std::string kIndexSize;
+  static const std::string kIndexPartitions;
+  static const std::string kTopLevelIndexSize;
+  static const std::string kIndexKeyIsUserKey;
+  static const std::string kIndexValueIsDeltaEncoded;
+  static const std::string kFilterSize;
+  static const std::string kRawKeySize;
+  static const std::string kRawValueSize;
+  static const std::string kNumDataBlocks;
+  static const std::string kNumEntries;
+  static const std::string kNumFilterEntries;
+  static const std::string kDeletedKeys;
+  static const std::string kMergeOperands;
+  static const std::string kNumRangeDeletions;
+  static const std::string kFormatVersion;
+  static const std::string kFixedKeyLen;
+  static const std::string kFilterPolicy;
+  static const std::string kColumnFamilyName;
+  static const std::string kColumnFamilyId;
+  static const std::string kComparator;
+  static const std::string kMergeOperator;
+  static const std::string kPrefixExtractorName;
+  static const std::string kPropertyCollectors;
+  static const std::string kCompression;
+  static const std::string kCompressionOptions;
+  static const std::string kCreationTime;
+  static const std::string kOldestKeyTime;
+  static const std::string kFileCreationTime;
+  static const std::string kSlowCompressionEstimatedDataSize;
+  static const std::string kFastCompressionEstimatedDataSize;
+  static const std::string kSequenceNumberTimeMapping;
+  static const std::string kTailStartOffset;
+};
+
+// `TablePropertiesCollector` provides the mechanism for users to collect
+// their own properties that they are interested in. This class is essentially
+// a collection of callback functions that will be invoked during table
+// building. It is constructed with TablePropertiesCollectorFactory. The methods
+// don't need to be thread-safe, as we will create exactly one
+// TablePropertiesCollector object per table and then call it sequentially.
+//
+// Statuses from these callbacks are currently logged when not OK, but
+// otherwise ignored by RocksDB.
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class TablePropertiesCollector {
+ public:
+  virtual ~TablePropertiesCollector() {}
+
+  // DEPRECATE User defined collector should implement AddUserKey(), though
+  //           this old function still works for backward compatible reason.
+  // Add() will be called when a new key/value pair is inserted into the table.
+  // @params key    the user key that is inserted into the table.
+  // @params value  the value that is inserted into the table.
+  virtual Status Add(const Slice& /*key*/, const Slice& /*value*/) {
+    return Status::InvalidArgument(
+        "TablePropertiesCollector::Add() deprecated.");
+  }
+
+  // AddUserKey() will be called when a new key/value pair is inserted into the
+  // table.
+  // @params key    the user key that is inserted into the table.
+  // @params value  the value that is inserted into the table.
+  virtual Status AddUserKey(const Slice& key, const Slice& value,
+                            EntryType /*type*/, SequenceNumber /*seq*/,
+                            uint64_t /*file_size*/) {
+    // For backwards-compatibility.
+    return Add(key, value);
+  }
+
+  // Called after each new block is cut
+  virtual void BlockAdd(uint64_t /* block_uncomp_bytes */,
+                        uint64_t /* block_compressed_bytes_fast */,
+                        uint64_t /* block_compressed_bytes_slow */) {
+    // Nothing to do here. Callback registers can override.
+    return;
+  }
+
+  // Finish() will be called when a table has already been built and is ready
+  // for writing the properties block.
+  // @params properties  User will add their collected statistics to
+  // `properties`.
+  virtual Status Finish(UserCollectedProperties* properties) = 0;
+
+  // Return the human-readable properties, where the key is property name and
+  // the value is the human-readable form of value.
+  virtual UserCollectedProperties GetReadableProperties() const = 0;
+
+  // The name of the properties collector can be used for debugging purpose.
+  virtual const char* Name() const = 0;
+
+  // EXPERIMENTAL Return whether the output file should be further compacted
+  virtual bool NeedCompact() const { return false; }
+};
+
+// Constructs TablePropertiesCollector. Internals create a new
+// TablePropertiesCollector for each new table
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class TablePropertiesCollectorFactory : public Customizable {
+ public:
+  struct Context {
+    uint32_t column_family_id;
+    // The level at creating the SST file (i.e, table), of which the
+    // properties are being collected.
+    int level_at_creation = kUnknownLevelAtCreation;
+    static const uint32_t kUnknownColumnFamily;
+    static const int kUnknownLevelAtCreation = -1;
+  };
+
+  ~TablePropertiesCollectorFactory() override {}
+  static const char* Type() { return "TablePropertiesCollectorFactory"; }
+  static Status CreateFromString(
+      const ConfigOptions& options, const std::string& value,
+      std::shared_ptr<TablePropertiesCollectorFactory>* result);
+
+  // has to be thread-safe
+  virtual TablePropertiesCollector* CreateTablePropertiesCollector(
+      TablePropertiesCollectorFactory::Context context) = 0;
+
+  // The name of the properties collector can be used for debugging purpose.
+  const char* Name() const override = 0;
+
+  // Can be overridden by sub-classes to return the Name, followed by
+  // configuration info that will // be logged to the info log when the
+  // DB is opened
+  virtual std::string ToString() const { return Name(); }
+};
+
+// TableProperties contains a bunch of read-only properties of its associated
+// table.
+struct TableProperties {
+ public:
+  // the file number at creation time, or 0 for unknown. When known,
+  // combining with db_session_id must uniquely identify an SST file.
+  uint64_t orig_file_number = 0;
+  // the total size of all data blocks.
+  uint64_t data_size = 0;
+  // the size of index block.
+  uint64_t index_size = 0;
+  // Total number of index partitions if kTwoLevelIndexSearch is used
+  uint64_t index_partitions = 0;
+  // Size of the top-level index if kTwoLevelIndexSearch is used
+  uint64_t top_level_index_size = 0;
+  // Whether the index key is user key. Otherwise it includes 8 byte of sequence
+  // number added by internal key format.
+  uint64_t index_key_is_user_key = 0;
+  // Whether delta encoding is used to encode the index values.
+  uint64_t index_value_is_delta_encoded = 0;
+  // the size of filter block.
+  uint64_t filter_size = 0;
+  // total raw (uncompressed, undelineated) key size
+  uint64_t raw_key_size = 0;
+  // total raw (uncompressed, undelineated) value size
+  uint64_t raw_value_size = 0;
+  // the number of blocks in this table
+  uint64_t num_data_blocks = 0;
+  // the number of entries in this table
+  uint64_t num_entries = 0;
+  // the number of unique entries (keys or prefixes) added to filters
+  uint64_t num_filter_entries = 0;
+  // the number of deletions in the table
+  uint64_t num_deletions = 0;
+  // the number of merge operands in the table
+  uint64_t num_merge_operands = 0;
+  // the number of range deletions in this table
+  uint64_t num_range_deletions = 0;
+  // format version, reserved for backward compatibility
+  uint64_t format_version = 0;
+  // If 0, key is variable length. Otherwise number of bytes for each key.
+  uint64_t fixed_key_len = 0;
+  // ID of column family for this SST file, corresponding to the CF identified
+  // by column_family_name.
+  uint64_t column_family_id = ROCKSDB_NAMESPACE::
+      TablePropertiesCollectorFactory::Context::kUnknownColumnFamily;
+  // Timestamp of the latest key. 0 means unknown.
+  // TODO(sagar0): Should be changed to latest_key_time ... but don't know the
+  // full implications of backward compatibility. Hence retaining for now.
+  uint64_t creation_time = 0;
+
+  // Timestamp of the earliest key. 0 means unknown.
+  uint64_t oldest_key_time = 0;
+  // Actual SST file creation time. 0 means unknown.
+  uint64_t file_creation_time = 0;
+  // Estimated size of data blocks if compressed using a relatively slower
+  // compression algorithm (see `ColumnFamilyOptions::sample_for_compression`).
+  // 0 means unknown.
+  uint64_t slow_compression_estimated_data_size = 0;
+  // Estimated size of data blocks if compressed using a relatively faster
+  // compression algorithm (see `ColumnFamilyOptions::sample_for_compression`).
+  // 0 means unknown.
+  uint64_t fast_compression_estimated_data_size = 0;
+  // Offset of the value of the property "external sst file global seqno" in the
+  // file if the property exists.
+  // 0 means not exists.
+  uint64_t external_sst_file_global_seqno_offset = 0;
+
+  // Offset where the "tail" part of SST file starts
+  // "Tail" refers to all blocks after data blocks till the end of the SST file
+  uint64_t tail_start_offset = 0;
+
+  // DB identity
+  // db_id is an identifier generated the first time the DB is created
+  // If DB identity is unset or unassigned, `db_id` will be an empty string.
+  std::string db_id;
+
+  // DB session identity
+  // db_session_id is an identifier that gets reset every time the DB is opened
+  // If DB session identity is unset or unassigned, `db_session_id` will be an
+  // empty string.
+  std::string db_session_id;
+
+  // Location of the machine hosting the DB instance
+  // db_host_id identifies the location of the host in some form
+  // (hostname by default, but can also be any string of the user's choosing).
+  // It can potentially change whenever the DB is opened
+  std::string db_host_id;
+
+  // Name of the column family with which this SST file is associated.
+  // If column family is unknown, `column_family_name` will be an empty string.
+  std::string column_family_name;
+
+  // The name of the filter policy used in this table.
+  // If no filter policy is used, `filter_policy_name` will be an empty string.
+  std::string filter_policy_name;
+
+  // The name of the comparator used in this table.
+  std::string comparator_name;
+
+  // The name of the merge operator used in this table.
+  // If no merge operator is used, `merge_operator_name` will be "nullptr".
+  std::string merge_operator_name;
+
+  // The name of the prefix extractor used in this table
+  // If no prefix extractor is used, `prefix_extractor_name` will be "nullptr".
+  std::string prefix_extractor_name;
+
+  // The names of the property collectors factories used in this table
+  // separated by commas
+  // {collector_name[1]},{collector_name[2]},{collector_name[3]} ..
+  std::string property_collectors_names;
+
+  // The compression algo used to compress the SST files.
+  std::string compression_name;
+
+  // Compression options used to compress the SST files.
+  std::string compression_options;
+
+  // Sequence number to time mapping, delta encoded.
+  std::string seqno_to_time_mapping;
+
+  // user collected properties
+  UserCollectedProperties user_collected_properties;
+  UserCollectedProperties readable_properties;
+
+  // convert this object to a human readable form
+  //   @prop_delim: delimiter for each property.
+  std::string ToString(const std::string& prop_delim = "; ",
+                       const std::string& kv_delim = "=") const;
+
+  // Aggregate the numerical member variables of the specified
+  // TableProperties.
+  void Add(const TableProperties& tp);
+
+  // Subset of properties that make sense when added together
+  // between tables. Keys match field names in this class instead
+  // of using full property names.
+  std::map<std::string, uint64_t> GetAggregatablePropertiesAsMap() const;
+
+  // Return the approximated memory usage of this TableProperties object,
+  // including memory used by the string properties and UserCollectedProperties
+  std::size_t ApproximateMemoryUsage() const;
+};
+
+// Extra properties
+// Below is a list of non-basic properties that are collected by database
+// itself. Especially some properties regarding to the internal keys (which
+// is unknown to `table`).
+//
+// DEPRECATED: these properties now belong as TableProperties members. Please
+// use TableProperties::num_deletions and TableProperties::num_merge_operands,
+// respectively.
+extern uint64_t GetDeletedKeys(const UserCollectedProperties& props);
+extern uint64_t GetMergeOperands(const UserCollectedProperties& props,
+                                 bool* property_present);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/table_reader_caller.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/table_reader_caller.h
new file mode 100644
index 0000000000..10ec08130f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/table_reader_caller.h
@@ -0,0 +1,41 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+// A list of callers for a table reader. It is used to trace the caller that
+// accesses on a block. This is only used for block cache tracing and analysis.
+// A user may use kUncategorized if the caller is not interesting for analysis
+// or the table reader is called in the test environment, e.g., unit test, table
+// reader benchmark, etc.
+enum TableReaderCaller : char {
+  kUserGet = 1,
+  kUserMultiGet = 2,
+  kUserIterator = 3,
+  kUserApproximateSize = 4,
+  kUserVerifyChecksum = 5,
+  kSSTDumpTool = 6,
+  kExternalSSTIngestion = 7,
+  kRepair = 8,
+  kPrefetch = 9,
+  kCompaction = 10,
+  // A compaction job may refill the block cache with blocks in the new SST
+  // files if paranoid_file_checks is true.
+  kCompactionRefill = 11,
+  // After building a table, it may load all its blocks into the block cache if
+  // paranoid_file_checks is true.
+  kFlush = 12,
+  // sst_file_reader.
+  kSSTFileReader = 13,
+  // A list of callers that are either not interesting for analysis or are
+  // calling from a test environment, e.g., unit test, benchmark, etc.
+  kUncategorized = 14,
+  // All callers should be added before kMaxBlockCacheLookupCaller.
+  kMaxBlockCacheLookupCaller
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/thread_status.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/thread_status.h
new file mode 100644
index 0000000000..beecdfd258
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/thread_status.h
@@ -0,0 +1,190 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file defines the structures for exposing run-time status of any
+// rocksdb-related thread.  Such run-time status can be obtained via
+// GetThreadList() API.
+//
+// Note that all thread-status features are still under-development, and
+// thus APIs and class definitions might subject to change at this point.
+// Will remove this comment once the APIs have been finalized.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+#if !defined(NROCKSDB_THREAD_STATUS)
+#define ROCKSDB_USING_THREAD_STATUS
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+// TODO(yhchiang): remove this function once c++14 is available
+//                 as std::max will be able to cover this.
+// Current MS compiler does not support constexpr
+template <int A, int B>
+struct constexpr_max {
+  static const int result = (A > B) ? A : B;
+};
+
+// A structure that describes the current status of a thread.
+// The status of active threads can be fetched using
+// ROCKSDB_NAMESPACE::GetThreadList().
+struct ThreadStatus {
+  // The type of a thread.
+  enum ThreadType : int {
+    HIGH_PRIORITY = 0,  // RocksDB BG thread in high-pri thread pool
+    LOW_PRIORITY,       // RocksDB BG thread in low-pri thread pool
+    USER,               // User thread (Non-RocksDB BG thread)
+    BOTTOM_PRIORITY,    // RocksDB BG thread in bottom-pri thread pool
+    NUM_THREAD_TYPES
+  };
+
+  // The type used to refer to a thread operation.
+  // A thread operation describes high-level action of a thread.
+  // Examples include compaction and flush.
+  enum OperationType : int {
+    OP_UNKNOWN = 0,
+    OP_COMPACTION,
+    OP_FLUSH,
+    OP_DBOPEN,
+    NUM_OP_TYPES
+  };
+
+  enum OperationStage : int {
+    STAGE_UNKNOWN = 0,
+    STAGE_FLUSH_RUN,
+    STAGE_FLUSH_WRITE_L0,
+    STAGE_COMPACTION_PREPARE,
+    STAGE_COMPACTION_RUN,
+    STAGE_COMPACTION_PROCESS_KV,
+    STAGE_COMPACTION_INSTALL,
+    STAGE_COMPACTION_SYNC_FILE,
+    STAGE_PICK_MEMTABLES_TO_FLUSH,
+    STAGE_MEMTABLE_ROLLBACK,
+    STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS,
+    NUM_OP_STAGES
+  };
+
+  enum CompactionPropertyType : int {
+    COMPACTION_JOB_ID = 0,
+    COMPACTION_INPUT_OUTPUT_LEVEL,
+    COMPACTION_PROP_FLAGS,
+    COMPACTION_TOTAL_INPUT_BYTES,
+    COMPACTION_BYTES_READ,
+    COMPACTION_BYTES_WRITTEN,
+    NUM_COMPACTION_PROPERTIES
+  };
+
+  enum FlushPropertyType : int {
+    FLUSH_JOB_ID = 0,
+    FLUSH_BYTES_MEMTABLES,
+    FLUSH_BYTES_WRITTEN,
+    NUM_FLUSH_PROPERTIES
+  };
+
+  // The maximum number of properties of an operation.
+  // This number should be set to the biggest NUM_XXX_PROPERTIES.
+  static const int kNumOperationProperties =
+      constexpr_max<NUM_COMPACTION_PROPERTIES, NUM_FLUSH_PROPERTIES>::result;
+
+  // The type used to refer to a thread state.
+  // A state describes lower-level action of a thread
+  // such as reading / writing a file or waiting for a mutex.
+  enum StateType : int {
+    STATE_UNKNOWN = 0,
+    STATE_MUTEX_WAIT = 1,
+    NUM_STATE_TYPES
+  };
+
+  ThreadStatus(const uint64_t _id, const ThreadType _thread_type,
+               const std::string& _db_name, const std::string& _cf_name,
+               const OperationType _operation_type,
+               const uint64_t _op_elapsed_micros,
+               const OperationStage _operation_stage,
+               const uint64_t _op_props[], const StateType _state_type)
+      : thread_id(_id),
+        thread_type(_thread_type),
+        db_name(_db_name),
+        cf_name(_cf_name),
+        operation_type(_operation_type),
+        op_elapsed_micros(_op_elapsed_micros),
+        operation_stage(_operation_stage),
+        state_type(_state_type) {
+    for (int i = 0; i < kNumOperationProperties; ++i) {
+      op_properties[i] = _op_props[i];
+    }
+  }
+
+  // An unique ID for the thread.
+  const uint64_t thread_id;
+
+  // The type of the thread, it could be HIGH_PRIORITY,
+  // LOW_PRIORITY, and USER
+  const ThreadType thread_type;
+
+  // The name of the DB instance where the thread is currently
+  // involved with.  It would be set to empty string if the thread
+  // does not involve in any DB operation.
+  const std::string db_name;
+
+  // The name of the column family where the thread is currently
+  // It would be set to empty string if the thread does not involve
+  // in any column family.
+  const std::string cf_name;
+
+  // The operation (high-level action) that the current thread is involved.
+  const OperationType operation_type;
+
+  // The elapsed time of the current thread operation in microseconds.
+  const uint64_t op_elapsed_micros;
+
+  // An integer showing the current stage where the thread is involved
+  // in the current operation.
+  const OperationStage operation_stage;
+
+  // A list of properties that describe some details about the current
+  // operation.  Same field in op_properties[] might have different
+  // meanings for different operations.
+  uint64_t op_properties[kNumOperationProperties];
+
+  // The state (lower-level action) that the current thread is involved.
+  const StateType state_type;
+
+  // The followings are a set of utility functions for interpreting
+  // the information of ThreadStatus
+
+  static std::string GetThreadTypeName(ThreadType thread_type);
+
+  // Obtain the name of an operation given its type.
+  static const std::string& GetOperationName(OperationType op_type);
+
+  static const std::string MicrosToString(uint64_t op_elapsed_time);
+
+  // Obtain a human-readable string describing the specified operation stage.
+  static const std::string& GetOperationStageName(OperationStage stage);
+
+  // Obtain the name of the "i"th operation property of the
+  // specified operation.
+  static const std::string& GetOperationPropertyName(OperationType op_type,
+                                                     int i);
+
+  // Translate the "i"th property of the specified operation given
+  // a property value.
+  static std::map<std::string, uint64_t> InterpretOperationProperties(
+      OperationType op_type, const uint64_t* op_properties);
+
+  // Obtain the name of a state given its type.
+  static const std::string& GetStateName(StateType state_type);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/threadpool.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/threadpool.h
new file mode 100644
index 0000000000..f1cc557524
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/threadpool.h
@@ -0,0 +1,67 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <functional>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+/*
+ * ThreadPool is a component that will spawn N background threads that will
+ * be used to execute scheduled work, The number of background threads could
+ * be modified by calling SetBackgroundThreads().
+ * */
+class ThreadPool {
+ public:
+  virtual ~ThreadPool() {}
+
+  // Wait for all threads to finish.
+  // Discard those threads that did not start
+  // executing
+  virtual void JoinAllThreads() = 0;
+
+  // Set the number of background threads that will be executing the
+  // scheduled jobs.
+  virtual void SetBackgroundThreads(int num) = 0;
+  virtual int GetBackgroundThreads() = 0;
+
+  // Get the number of jobs scheduled in the ThreadPool queue.
+  virtual unsigned int GetQueueLen() const = 0;
+
+  // Waits for all jobs to complete those
+  // that already started running and those that did not
+  // start yet. This ensures that everything that was thrown
+  // on the TP runs even though
+  // we may not have specified enough threads for the amount
+  // of jobs
+  virtual void WaitForJobsAndJoinAllThreads() = 0;
+
+  // Submit a fire and forget jobs
+  // This allows to submit the same job multiple times
+  virtual void SubmitJob(const std::function<void()>&) = 0;
+  // This moves the function in for efficiency
+  virtual void SubmitJob(std::function<void()>&&) = 0;
+
+  // Reserve available background threads. This function does not ensure
+  // so many threads can be reserved, instead it will return the number of
+  // threads that can be reserved against the desired one. In other words,
+  // the number of available threads could be less than the input.
+  virtual int ReserveThreads(int /*threads_to_be_reserved*/) { return 0; }
+
+  // Release a specific number of reserved threads
+  virtual int ReleaseThreads(int /*threads_to_be_released*/) { return 0; }
+};
+
+// NewThreadPool() is a function that could be used to create a ThreadPool
+// with `num_threads` background threads.
+extern ThreadPool* NewThreadPool(int num_threads);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/trace_reader_writer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/trace_reader_writer.h
new file mode 100644
index 0000000000..335e091dc8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/trace_reader_writer.h
@@ -0,0 +1,52 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/env.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Allow custom implementations of TraceWriter and TraceReader.
+// By default, RocksDB provides a way to capture the traces to a file using the
+// factory NewFileTraceWriter(). But users could also choose to export traces to
+// any other system by providing custom implementations of TraceWriter and
+// TraceReader.
+
+// TraceWriter allows exporting RocksDB traces to any system, one operation at
+// a time.
+class TraceWriter {
+ public:
+  virtual ~TraceWriter() = default;
+
+  virtual Status Write(const Slice& data) = 0;
+  virtual Status Close() = 0;
+  virtual uint64_t GetFileSize() = 0;
+};
+
+// TraceReader allows reading RocksDB traces from any system, one operation at
+// a time. A RocksDB Replayer could depend on this to replay operations.
+class TraceReader {
+ public:
+  virtual ~TraceReader() = default;
+
+  virtual Status Read(std::string* data) = 0;
+  virtual Status Close() = 0;
+
+  // Seek back to the trace header. Replayer can call this method to restart
+  // replaying. Note this method may fail if the reader is already closed.
+  virtual Status Reset() = 0;
+};
+
+// Factory methods to write/read traces to/from a file.
+// The implementations may not be thread-safe.
+Status NewFileTraceWriter(Env* env, const EnvOptions& env_options,
+                          const std::string& trace_filename,
+                          std::unique_ptr<TraceWriter>* trace_writer);
+Status NewFileTraceReader(Env* env, const EnvOptions& env_options,
+                          const std::string& trace_filename,
+                          std::unique_ptr<TraceReader>* trace_reader);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/trace_record.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/trace_record.h
new file mode 100644
index 0000000000..c00f5cafbe
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/trace_record.h
@@ -0,0 +1,248 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class ColumnFamilyHandle;
+class DB;
+
+// Supported trace record types.
+enum TraceType : char {
+  kTraceNone = 0,
+  kTraceBegin = 1,
+  kTraceEnd = 2,
+  // Query level tracing related trace types.
+  kTraceWrite = 3,
+  kTraceGet = 4,
+  kTraceIteratorSeek = 5,
+  kTraceIteratorSeekForPrev = 6,
+  // Block cache tracing related trace types.
+  kBlockTraceIndexBlock = 7,
+  // TODO: split out kinds of filter blocks?
+  kBlockTraceFilterBlock = 8,
+  kBlockTraceDataBlock = 9,
+  kBlockTraceUncompressionDictBlock = 10,
+  kBlockTraceRangeDeletionBlock = 11,
+  // IO tracing related trace type.
+  kIOTracer = 12,
+  // Query level tracing related trace type.
+  kTraceMultiGet = 13,
+  // All trace types should be added before kTraceMax
+  kTraceMax,
+};
+
+class GetQueryTraceRecord;
+class IteratorSeekQueryTraceRecord;
+class MultiGetQueryTraceRecord;
+class TraceRecordResult;
+class WriteQueryTraceRecord;
+
+// Base class for all types of trace records.
+class TraceRecord {
+ public:
+  explicit TraceRecord(uint64_t timestamp);
+
+  virtual ~TraceRecord() = default;
+
+  // Type of the trace record.
+  virtual TraceType GetTraceType() const = 0;
+
+  // Timestamp (in microseconds) of this trace.
+  virtual uint64_t GetTimestamp() const;
+
+  class Handler {
+   public:
+    virtual ~Handler() = default;
+
+    virtual Status Handle(const WriteQueryTraceRecord& record,
+                          std::unique_ptr<TraceRecordResult>* result) = 0;
+
+    virtual Status Handle(const GetQueryTraceRecord& record,
+                          std::unique_ptr<TraceRecordResult>* result) = 0;
+
+    virtual Status Handle(const IteratorSeekQueryTraceRecord& record,
+                          std::unique_ptr<TraceRecordResult>* result) = 0;
+
+    virtual Status Handle(const MultiGetQueryTraceRecord& record,
+                          std::unique_ptr<TraceRecordResult>* result) = 0;
+  };
+
+  // Accept the handler and report the corresponding result in `result`.
+  virtual Status Accept(Handler* handler,
+                        std::unique_ptr<TraceRecordResult>* result) = 0;
+
+  // Create a handler for the exeution of TraceRecord.
+  static Handler* NewExecutionHandler(
+      DB* db, const std::vector<ColumnFamilyHandle*>& handles);
+
+ private:
+  uint64_t timestamp_;
+};
+
+// Base class for all query types of trace records.
+class QueryTraceRecord : public TraceRecord {
+ public:
+  explicit QueryTraceRecord(uint64_t timestamp);
+};
+
+// Trace record for DB::Write() operation.
+class WriteQueryTraceRecord : public QueryTraceRecord {
+ public:
+  WriteQueryTraceRecord(PinnableSlice&& write_batch_rep, uint64_t timestamp);
+
+  WriteQueryTraceRecord(const std::string& write_batch_rep, uint64_t timestamp);
+
+  virtual ~WriteQueryTraceRecord() override;
+
+  TraceType GetTraceType() const override { return kTraceWrite; }
+
+  // rep string for the WriteBatch.
+  virtual Slice GetWriteBatchRep() const;
+
+  Status Accept(Handler* handler,
+                std::unique_ptr<TraceRecordResult>* result) override;
+
+ private:
+  PinnableSlice rep_;
+};
+
+// Trace record for DB::Get() operation
+class GetQueryTraceRecord : public QueryTraceRecord {
+ public:
+  GetQueryTraceRecord(uint32_t column_family_id, PinnableSlice&& key,
+                      uint64_t timestamp);
+
+  GetQueryTraceRecord(uint32_t column_family_id, const std::string& key,
+                      uint64_t timestamp);
+
+  virtual ~GetQueryTraceRecord() override;
+
+  TraceType GetTraceType() const override { return kTraceGet; }
+
+  // Column family ID.
+  virtual uint32_t GetColumnFamilyID() const;
+
+  // Key to get.
+  virtual Slice GetKey() const;
+
+  Status Accept(Handler* handler,
+                std::unique_ptr<TraceRecordResult>* result) override;
+
+ private:
+  uint32_t cf_id_;
+  PinnableSlice key_;
+};
+
+// Base class for all Iterator related operations.
+class IteratorQueryTraceRecord : public QueryTraceRecord {
+ public:
+  explicit IteratorQueryTraceRecord(uint64_t timestamp);
+
+  IteratorQueryTraceRecord(PinnableSlice&& lower_bound,
+                           PinnableSlice&& upper_bound, uint64_t timestamp);
+
+  IteratorQueryTraceRecord(const std::string& lower_bound,
+                           const std::string& upper_bound, uint64_t timestamp);
+
+  virtual ~IteratorQueryTraceRecord() override;
+
+  // Get the iterator's lower/upper bound. They may be used in ReadOptions to
+  // create an Iterator instance.
+  virtual Slice GetLowerBound() const;
+  virtual Slice GetUpperBound() const;
+
+ private:
+  PinnableSlice lower_;
+  PinnableSlice upper_;
+};
+
+// Trace record for Iterator::Seek() and Iterator::SeekForPrev() operation.
+class IteratorSeekQueryTraceRecord : public IteratorQueryTraceRecord {
+ public:
+  // Currently we only support Seek() and SeekForPrev().
+  enum SeekType {
+    kSeek = kTraceIteratorSeek,
+    kSeekForPrev = kTraceIteratorSeekForPrev
+  };
+
+  IteratorSeekQueryTraceRecord(SeekType seekType, uint32_t column_family_id,
+                               PinnableSlice&& key, uint64_t timestamp);
+
+  IteratorSeekQueryTraceRecord(SeekType seekType, uint32_t column_family_id,
+                               const std::string& key, uint64_t timestamp);
+
+  IteratorSeekQueryTraceRecord(SeekType seekType, uint32_t column_family_id,
+                               PinnableSlice&& key, PinnableSlice&& lower_bound,
+                               PinnableSlice&& upper_bound, uint64_t timestamp);
+
+  IteratorSeekQueryTraceRecord(SeekType seekType, uint32_t column_family_id,
+                               const std::string& key,
+                               const std::string& lower_bound,
+                               const std::string& upper_bound,
+                               uint64_t timestamp);
+
+  virtual ~IteratorSeekQueryTraceRecord() override;
+
+  // Trace type matches the seek type.
+  TraceType GetTraceType() const override;
+
+  // Type of seek, Seek or SeekForPrev.
+  virtual SeekType GetSeekType() const;
+
+  // Column family ID.
+  virtual uint32_t GetColumnFamilyID() const;
+
+  // Key to seek to.
+  virtual Slice GetKey() const;
+
+  Status Accept(Handler* handler,
+                std::unique_ptr<TraceRecordResult>* result) override;
+
+ private:
+  SeekType type_;
+  uint32_t cf_id_;
+  PinnableSlice key_;
+};
+
+// Trace record for DB::MultiGet() operation.
+class MultiGetQueryTraceRecord : public QueryTraceRecord {
+ public:
+  MultiGetQueryTraceRecord(std::vector<uint32_t> column_family_ids,
+                           std::vector<PinnableSlice>&& keys,
+                           uint64_t timestamp);
+
+  MultiGetQueryTraceRecord(std::vector<uint32_t> column_family_ids,
+                           const std::vector<std::string>& keys,
+                           uint64_t timestamp);
+
+  virtual ~MultiGetQueryTraceRecord() override;
+
+  TraceType GetTraceType() const override { return kTraceMultiGet; }
+
+  // Column familiy IDs.
+  virtual std::vector<uint32_t> GetColumnFamilyIDs() const;
+
+  // Keys to get.
+  virtual std::vector<Slice> GetKeys() const;
+
+  Status Accept(Handler* handler,
+                std::unique_ptr<TraceRecordResult>* result) override;
+
+ private:
+  std::vector<uint32_t> cf_ids_;
+  std::vector<PinnableSlice> keys_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/trace_record_result.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/trace_record_result.h
new file mode 100644
index 0000000000..0cd0004a6a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/trace_record_result.h
@@ -0,0 +1,187 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "rocksdb/trace_record.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class IteratorTraceExecutionResult;
+class MultiValuesTraceExecutionResult;
+class SingleValueTraceExecutionResult;
+class StatusOnlyTraceExecutionResult;
+
+// Base class for the results of all types of trace records.
+// Theses classes can be used to report the execution result of
+// TraceRecord::Handler::Handle() or TraceRecord::Accept().
+class TraceRecordResult {
+ public:
+  explicit TraceRecordResult(TraceType trace_type);
+
+  virtual ~TraceRecordResult() = default;
+
+  // Trace type of the corresponding TraceRecord.
+  virtual TraceType GetTraceType() const;
+
+  class Handler {
+   public:
+    virtual ~Handler() = default;
+
+    virtual Status Handle(const StatusOnlyTraceExecutionResult& result) = 0;
+
+    virtual Status Handle(const SingleValueTraceExecutionResult& result) = 0;
+
+    virtual Status Handle(const MultiValuesTraceExecutionResult& result) = 0;
+
+    virtual Status Handle(const IteratorTraceExecutionResult& result) = 0;
+  };
+
+  // Accept the handler.
+  virtual Status Accept(Handler* handler) = 0;
+
+ private:
+  TraceType trace_type_;
+};
+
+// Base class for the results from the trace record execution handler (created
+// by TraceRecord::NewExecutionHandler()).
+//
+// The actual execution status or returned values may be hidden from
+// TraceRecord::Handler::Handle and TraceRecord::Accept. For example, a
+// GetQueryTraceRecord's execution calls DB::Get() internally. DB::Get() may
+// return Status::NotFound() but TraceRecord::Handler::Handle() or
+// TraceRecord::Accept() will still return Status::OK(). The actual status from
+// DB::Get() and the returned value string may be saved in a
+// SingleValueTraceExecutionResult.
+class TraceExecutionResult : public TraceRecordResult {
+ public:
+  TraceExecutionResult(uint64_t start_timestamp, uint64_t end_timestamp,
+                       TraceType trace_type);
+
+  // Execution start/end timestamps and request latency in microseconds.
+  virtual uint64_t GetStartTimestamp() const;
+  virtual uint64_t GetEndTimestamp() const;
+  inline uint64_t GetLatency() const {
+    return GetEndTimestamp() - GetStartTimestamp();
+  }
+
+ private:
+  uint64_t ts_start_;
+  uint64_t ts_end_;
+};
+
+// Result for operations that only return a single Status.
+// Example operation: DB::Write()
+class StatusOnlyTraceExecutionResult : public TraceExecutionResult {
+ public:
+  StatusOnlyTraceExecutionResult(Status status, uint64_t start_timestamp,
+                                 uint64_t end_timestamp, TraceType trace_type);
+
+  virtual ~StatusOnlyTraceExecutionResult() override = default;
+
+  // Return value of DB::Write(), etc.
+  virtual const Status& GetStatus() const;
+
+  virtual Status Accept(Handler* handler) override;
+
+ private:
+  Status status_;
+};
+
+// Result for operations that return a Status and a value.
+// Example operation: DB::Get()
+class SingleValueTraceExecutionResult : public TraceExecutionResult {
+ public:
+  SingleValueTraceExecutionResult(Status status, const std::string& value,
+                                  uint64_t start_timestamp,
+                                  uint64_t end_timestamp, TraceType trace_type);
+
+  SingleValueTraceExecutionResult(Status status, std::string&& value,
+                                  uint64_t start_timestamp,
+                                  uint64_t end_timestamp, TraceType trace_type);
+
+  virtual ~SingleValueTraceExecutionResult() override;
+
+  // Return status of DB::Get().
+  virtual const Status& GetStatus() const;
+
+  // Value for the searched key.
+  virtual const std::string& GetValue() const;
+
+  virtual Status Accept(Handler* handler) override;
+
+ private:
+  Status status_;
+  std::string value_;
+};
+
+// Result for operations that return multiple Status(es) and values as vectors.
+// Example operation: DB::MultiGet()
+class MultiValuesTraceExecutionResult : public TraceExecutionResult {
+ public:
+  MultiValuesTraceExecutionResult(std::vector<Status> multi_status,
+                                  std::vector<std::string> values,
+                                  uint64_t start_timestamp,
+                                  uint64_t end_timestamp, TraceType trace_type);
+
+  virtual ~MultiValuesTraceExecutionResult() override;
+
+  // Returned Status(es) of DB::MultiGet().
+  virtual const std::vector<Status>& GetMultiStatus() const;
+
+  // Returned values for the searched keys.
+  virtual const std::vector<std::string>& GetValues() const;
+
+  virtual Status Accept(Handler* handler) override;
+
+ private:
+  std::vector<Status> multi_status_;
+  std::vector<std::string> values_;
+};
+
+// Result for Iterator operations.
+// Example operations: Iterator::Seek(), Iterator::SeekForPrev()
+class IteratorTraceExecutionResult : public TraceExecutionResult {
+ public:
+  IteratorTraceExecutionResult(bool valid, Status status, PinnableSlice&& key,
+                               PinnableSlice&& value, uint64_t start_timestamp,
+                               uint64_t end_timestamp, TraceType trace_type);
+
+  IteratorTraceExecutionResult(bool valid, Status status,
+                               const std::string& key, const std::string& value,
+                               uint64_t start_timestamp, uint64_t end_timestamp,
+                               TraceType trace_type);
+
+  virtual ~IteratorTraceExecutionResult() override;
+
+  // Return if the Iterator is valid.
+  virtual bool GetValid() const;
+
+  // Return the status of the Iterator.
+  virtual const Status& GetStatus() const;
+
+  // Key of the current iterating entry, empty if GetValid() is false.
+  virtual Slice GetKey() const;
+
+  // Value of the current iterating entry, empty if GetValid() is false.
+  virtual Slice GetValue() const;
+
+  virtual Status Accept(Handler* handler) override;
+
+ private:
+  bool valid_;
+  Status status_;
+  PinnableSlice key_;
+  PinnableSlice value_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/transaction_log.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/transaction_log.h
new file mode 100644
index 0000000000..e13ad8f80a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/transaction_log.h
@@ -0,0 +1,122 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+#include "rocksdb/write_batch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class LogFile;
+using VectorLogPtr = std::vector<std::unique_ptr<LogFile>>;
+
+enum WalFileType {
+  /* Indicates that WAL file is in archive directory. WAL files are moved from
+   * the main db directory to archive directory once they are not live and stay
+   * there until cleaned up. Files are cleaned depending on archive size
+   * (Options::WAL_size_limit_MB) and time since last cleaning
+   * (Options::WAL_ttl_seconds).
+   */
+  kArchivedLogFile = 0,
+
+  /* Indicates that WAL file is live and resides in the main db directory */
+  kAliveLogFile = 1
+};
+
+class LogFile {
+ public:
+  LogFile() {}
+  virtual ~LogFile() {}
+
+  // Returns log file's pathname relative to the main db dir
+  // Eg. For a live-log-file = /000003.log
+  //     For an archived-log-file = /archive/000003.log
+  virtual std::string PathName() const = 0;
+
+  // Primary identifier for log file.
+  // This is directly proportional to creation time of the log file
+  virtual uint64_t LogNumber() const = 0;
+
+  // Log file can be either alive or archived
+  virtual WalFileType Type() const = 0;
+
+  // Starting sequence number of writebatch written in this log file
+  virtual SequenceNumber StartSequence() const = 0;
+
+  // Size of log file on disk in Bytes
+  virtual uint64_t SizeFileBytes() const = 0;
+};
+
+struct BatchResult {
+  SequenceNumber sequence = 0;
+  std::unique_ptr<WriteBatch> writeBatchPtr;
+
+  // Add empty __ctor and __dtor for the rule of five
+  // However, preserve the original semantics and prohibit copying
+  // as the std::unique_ptr member does not copy.
+  BatchResult() {}
+
+  ~BatchResult() {}
+
+  BatchResult(const BatchResult&) = delete;
+
+  BatchResult& operator=(const BatchResult&) = delete;
+
+  BatchResult(BatchResult&& bResult)
+      : sequence(std::move(bResult.sequence)),
+        writeBatchPtr(std::move(bResult.writeBatchPtr)) {}
+
+  BatchResult& operator=(BatchResult&& bResult) {
+    sequence = std::move(bResult.sequence);
+    writeBatchPtr = std::move(bResult.writeBatchPtr);
+    return *this;
+  }
+};
+
+// A TransactionLogIterator is used to iterate over the transactions in a db.
+// One run of the iterator is continuous, i.e. the iterator will stop at the
+// beginning of any gap in sequences
+class TransactionLogIterator {
+ public:
+  TransactionLogIterator() {}
+  virtual ~TransactionLogIterator() {}
+
+  // An iterator is either positioned at a WriteBatch or not valid.
+  // This method returns true if the iterator is valid.
+  // Can read data from a valid iterator.
+  virtual bool Valid() = 0;
+
+  // Moves the iterator to the next WriteBatch.
+  // REQUIRES: Valid() to be true.
+  virtual void Next() = 0;
+
+  // Returns ok if the iterator is valid.
+  // Returns the Error when something has gone wrong.
+  virtual Status status() = 0;
+
+  // If valid return's the current write_batch and the sequence number of the
+  // earliest transaction contained in the batch.
+  // ONLY use if Valid() is true and status() is OK.
+  virtual BatchResult GetBatch() = 0;
+
+  // The read options for TransactionLogIterator.
+  struct ReadOptions {
+    // If true, all data read from underlying storage will be
+    // verified against corresponding checksums.
+    // Default: true
+    bool verify_checksums_;
+
+    ReadOptions() : verify_checksums_(true) {}
+
+    explicit ReadOptions(bool verify_checksums)
+        : verify_checksums_(verify_checksums) {}
+  };
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/types.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/types.h
new file mode 100644
index 0000000000..3f8ce97959
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/types.h
@@ -0,0 +1,94 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <stdint.h>
+
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Define all public custom types here.
+
+using ColumnFamilyId = uint32_t;
+
+// Represents a sequence number in a WAL file.
+using SequenceNumber = uint64_t;
+
+const SequenceNumber kMinUnCommittedSeq = 1;  // 0 is always committed
+
+enum class TableFileCreationReason {
+  kFlush,
+  kCompaction,
+  kRecovery,
+  kMisc,
+};
+
+enum class BlobFileCreationReason {
+  kFlush,
+  kCompaction,
+  kRecovery,
+};
+
+// The types of files RocksDB uses in a DB directory. (Available for
+// advanced options.)
+enum FileType {
+  kWalFile,
+  kDBLockFile,
+  kTableFile,
+  kDescriptorFile,
+  kCurrentFile,
+  kTempFile,
+  kInfoLogFile,  // Either the current one, or an old one
+  kMetaDatabase,
+  kIdentityFile,
+  kOptionsFile,
+  kBlobFile
+};
+
+// User-oriented representation of internal key types.
+// Ordering of this enum entries should not change.
+enum EntryType {
+  kEntryPut,
+  kEntryDelete,
+  kEntrySingleDelete,
+  kEntryMerge,
+  kEntryRangeDeletion,
+  kEntryBlobIndex,
+  kEntryDeleteWithTimestamp,
+  kEntryWideColumnEntity,
+  kEntryOther,
+};
+
+enum class WriteStallCause {
+  // Beginning of CF-scope write stall causes
+  //
+  // Always keep `kMemtableLimit` as the first stat in this section
+  kMemtableLimit,
+  kL0FileCountLimit,
+  kPendingCompactionBytes,
+  kCFScopeWriteStallCauseEnumMax,
+  // End of CF-scope write stall causes
+
+  // Beginning of DB-scope write stall causes
+  //
+  // Always keep `kWriteBufferManagerLimit` as the first stat in this section
+  kWriteBufferManagerLimit,
+  kDBScopeWriteStallCauseEnumMax,
+  // End of DB-scope write stall causes
+
+  // Always add new WriteStallCause before `kNone`
+  kNone,
+};
+
+enum class WriteStallCondition {
+  kDelayed,
+  kStopped,
+  // Always add new WriteStallCondition before `kNormal`
+  kNormal,
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/unique_id.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/unique_id.h
new file mode 100644
index 0000000000..eb0c778266
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/unique_id.h
@@ -0,0 +1,55 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/table_properties.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Computes a stable, universally unique 128-bit (16 binary char) identifier
+// for an SST file from TableProperties. This is supported for table (SST)
+// files created with RocksDB 6.24 and later. NotSupported will be returned
+// for other cases. The first 16 bytes (128 bits) is of sufficient quality
+// for almost all applications, and shorter prefixes are usable as a
+// hash of the full unique id.
+//
+// Note: .c_str() is not compatible with binary char strings, so using
+// .c_str() on the result will often result in information loss and very
+// poor uniqueness probability.
+//
+// More detail: the value is *guaranteed* unique for SST files
+// generated in the same process (even different DBs, RocksDB >= 6.26),
+// and first 128 bits are guaranteed not "all zeros" (RocksDB >= 6.26)
+// so that the "all zeros" value can be used reliably for a null ID.
+// These IDs are more than sufficient for SST uniqueness within each of
+// many DBs or hosts. For an extreme example assuming random IDs, consider
+// 10^9 hosts each with 10^9 live SST files being replaced at 10^6/second.
+// Such a service would need to run for 10 million years to see an ID
+// collision among live SST files on any host.
+//
+// And assuming one generates many SST files in the lifetime of each process,
+// the probability of ID collisions is much "better than random"; see
+// https://github.com/pdillinger/unique_id
+Status GetUniqueIdFromTableProperties(const TableProperties &props,
+                                      std::string *out_id);
+
+// Computes a 192-bit (24 binary char) stable, universally unique ID
+// with an extra 64 bits of uniqueness compared to the standard ID. It is only
+// appropriate to use this ID instead of the 128-bit ID if ID collisions
+// between files among any hosts in a vast fleet is a problem, such as a shared
+// global namespace for SST file backups. Under this criteria, the extreme
+// example above would expect a global file ID collision every 4 days with
+// 128-bit IDs (using some worst-case assumptions about process lifetime).
+// It's 10^17 years with 192-bit IDs.
+Status GetExtendedUniqueIdFromTableProperties(const TableProperties &props,
+                                              std::string *out_id);
+
+// Converts a binary string (unique id) to hexadecimal, with each 64 bits
+// separated by '-', e.g. 6474DF650323BDF0-B48E64F3039308CA-17284B32E7F7444B
+// Also works on unique id prefix.
+std::string UniqueIdToHumanString(const std::string &id);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/universal_compaction.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/universal_compaction.h
new file mode 100644
index 0000000000..0b0a85e1c8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/universal_compaction.h
@@ -0,0 +1,96 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <climits>
+#include <cstdint>
+#include <vector>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+//
+// Algorithm used to make a compaction request stop picking new files
+// into a single compaction run
+//
+enum CompactionStopStyle {
+  kCompactionStopStyleSimilarSize,  // pick files of similar size
+  kCompactionStopStyleTotalSize     // total size of picked files > next file
+};
+
+class CompactionOptionsUniversal {
+ public:
+  // Percentage flexibility while comparing file size. If the candidate file(s)
+  // size is 1% smaller than the next file's size, then include next file into
+  // this candidate set. // Default: 1
+  unsigned int size_ratio;
+
+  // The minimum number of files in a single compaction run. Default: 2
+  unsigned int min_merge_width;
+
+  // The maximum number of files in a single compaction run. Default: UINT_MAX
+  unsigned int max_merge_width;
+
+  // The size amplification is defined as the amount (in percentage) of
+  // additional storage needed to store a single byte of data in the database.
+  // For example, a size amplification of 2% means that a database that
+  // contains 100 bytes of user-data may occupy up to 102 bytes of
+  // physical storage. By this definition, a fully compacted database has
+  // a size amplification of 0%. Rocksdb uses the following heuristic
+  // to calculate size amplification: it assumes that all files excluding
+  // the earliest file contribute to the size amplification.
+  // Default: 200, which means that a 100 byte database could require up to
+  // 300 bytes of storage.
+  unsigned int max_size_amplification_percent;
+
+  // If this option is set to be -1 (the default value), all the output files
+  // will follow compression type specified.
+  //
+  // If this option is not negative, we will try to make sure compressed
+  // size is just above this value. In normal cases, at least this percentage
+  // of data will be compressed.
+  // When we are compacting to a new file, here is the criteria whether
+  // it needs to be compressed: assuming here are the list of files sorted
+  // by generation time:
+  //    A1...An B1...Bm C1...Ct
+  // where A1 is the newest and Ct is the oldest, and we are going to compact
+  // B1...Bm, we calculate the total size of all the files as total_size, as
+  // well as the total size of C1...Ct as total_C, the compaction output file
+  // will be compressed iff
+  //   total_C / total_size < this percentage
+  // Default: -1
+  int compression_size_percent;
+
+  // The algorithm used to stop picking files into a single compaction run
+  // Default: kCompactionStopStyleTotalSize
+  CompactionStopStyle stop_style;
+
+  // Option to optimize the universal multi level compaction by enabling
+  // trivial move for non overlapping files.
+  // Default: false
+  bool allow_trivial_move;
+
+  // EXPERIMENTAL
+  // If true, try to limit compaction size under max_compaction_bytes.
+  // This might cause higher write amplification, but can prevent some
+  // problem caused by large compactions.
+  // Default: false
+  bool incremental;
+
+  // Default set of parameters
+  CompactionOptionsUniversal()
+      : size_ratio(1),
+        min_merge_width(2),
+        max_merge_width(UINT_MAX),
+        max_size_amplification_percent(200),
+        compression_size_percent(-1),
+        stop_style(kCompactionStopStyleTotalSize),
+        allow_trivial_move(false),
+        incremental(false) {}
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/agg_merge.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/agg_merge.h
new file mode 100644
index 0000000000..4e21082db6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/agg_merge.h
@@ -0,0 +1,138 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+// The feature is still in development so the encoding format is subject
+// to change.
+//
+// Aggregation Merge Operator is a merge operator that allows users to
+// aggregate merge operands of different keys with different registered
+// aggregation functions. The aggregation can also change for the same
+// key if the functions store the data in the same format.
+// The target application highly overlaps with merge operator in general
+// but we try to provide a better interface so that users are more likely
+// to use pre-implemented plug-in functions and connect with existing
+// third-party aggregation functions (such as those from SQL engines).
+// In this case, the need for users to write customized C++ plug-in code
+// is reduced.
+// If the idea proves to useful, we might consider to move it to be
+// a core functionality of RocksDB, and reduce the support of merge
+// operators.
+//
+// Users can implement aggregation functions by implementing abstract
+// class Aggregator, and register it using AddAggregator().
+// The merge operator can be retrieved from GetAggMergeOperator() and
+// it is a singleton.
+//
+// Users can push values to be updated with a merge operand encoded with
+// registered function name and payload using EncodeAggFuncAndPayload(),
+// and the merge operator will invoke the aggregation function.
+// An example:
+//
+//    // Assume class ExampleSumAggregator is implemented to do simple sum.
+//    AddAggregator("sum", std::make_unique<ExampleSumAggregator>());
+//    std::shared_ptr<MergeOperator> mp_guard = CreateAggMergeOperator();
+//    options.merge_operator = mp_guard.get();
+//    ...... // Creating DB
+//
+//
+//    std::string encoded_value;
+//    s = EncodeAggFuncAndPayload(kUnamedFuncName, "200", encoded_value);
+//    assert(s.ok());
+//    db->Put(WriteOptions(), "foo", encoded_value);
+//    s = EncodeAggFuncAndPayload("sum", "200", encoded_value);
+//    assert(s.ok());
+//    db->Merge(WriteOptions(), "foo", encoded_value);
+//    s = EncodeAggFuncAndPayload("sum", "200", encoded_value);
+//    assert(s.ok());
+//    db->Merge(WriteOptions(), "foo", encoded_value);
+//
+//    std::string value;
+//    Status s = db->Get(ReadOptions, "foo", &value);
+//    assert(s.ok());
+//    Slice func, aggregated_value;
+//    assert(ExtractAggFuncAndValue(value, func, aggregated_value));
+//    assert(func == "sum");
+//    assert(aggregated_value == "600");
+//
+//
+// DB::Put() can also be used to add a payloadin the same way as Merge().
+//
+// kUnamedFuncName can be used as a placeholder function name. This will
+// be aggregated with merge operands inserted later based on function
+// name given there.
+//
+// If the aggregation function is not registered or there is an error
+// returned by aggregation function, the result will be encoded with a fake
+// aggregation function kErrorFuncName, with each merge operands to be encoded
+// into a list that can be extracted using ExtractList();
+//
+// If users add a merge operand using a different aggregation function from
+// the previous one, the merge operands for the previous one is aggregated
+// and the payload part of the result is treated as the first payload of
+// the items for the new aggregation function. For example, users can
+// Merge("plus, 1"), merge("plus 2"), merge("minus 3") and the aggregation
+// result would be "minus 0".
+//
+
+// A class used to aggregate data per key/value. The plug-in function is
+// implemented and registered using AddAggregator(). And then use it
+// with merge operator created using CreateAggMergeOperator().
+class Aggregator {
+ public:
+  virtual ~Aggregator() {}
+  // The input list is in reverse insertion order, with values[0] to be
+  // the one inserted last and values.back() to be the one inserted first.
+  // The oldest one might be from Get().
+  // Return whether aggregation succeeded. False for aggregation error.
+  virtual bool Aggregate(const std::vector<Slice>& values,
+                         std::string& result) const = 0;
+
+  // True if a partial aggregation should be invoked. Some aggregators
+  // might opt to skip partial aggregation if possible.
+  virtual bool DoPartialAggregate() const { return true; }
+};
+
+// The function adds aggregation plugin by function name. It is used
+// by all the aggregation operator created using CreateAggMergeOperator().
+// It's currently not thread safe to run concurrently with the aggregation
+// merge operator. It is recommended that all the aggregation function
+// is added before calling CreateAggMergeOperator().
+Status AddAggregator(const std::string& function_name,
+                     std::unique_ptr<Aggregator>&& agg);
+
+// Get the singleton instance of merge operator for aggregation.
+// Always the same one is returned with a shared_ptr is hold as a
+// static variable by the function.
+// This is done so because options.merge_operator is shared_ptr.
+std::shared_ptr<MergeOperator> GetAggMergeOperator();
+
+// Encode aggregation function and payload that can be consumed by aggregation
+// merge operator.
+Status EncodeAggFuncAndPayload(const Slice& function_name, const Slice& payload,
+                               std::string& output);
+// Helper function to extract aggregation function name and payload.
+// Return false if it fails to decode.
+bool ExtractAggFuncAndValue(const Slice& op, Slice& func, Slice& value);
+
+// Extract encoded list. This can be used to extract error merge operands when
+// the returned function name is kErrorFuncName.
+bool ExtractList(const Slice& encoded_list, std::vector<Slice>& decoded_list);
+
+// Special function name that allows it to be merged to subsequent type.
+extern const std::string kUnnamedFuncName;
+
+// Special error function name reserved for merging or aggregation error.
+extern const std::string kErrorFuncName;
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/backup_engine.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/backup_engine.h
new file mode 100644
index 0000000000..204d12d6fe
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/backup_engine.h
@@ -0,0 +1,689 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <cstdint>
+#include <forward_list>
+#include <functional>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "rocksdb/env.h"
+#include "rocksdb/io_status.h"
+#include "rocksdb/metadata.h"
+#include "rocksdb/options.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+class BackupEngineReadOnlyBase;
+class BackupEngine;
+
+// The default DB file checksum function name.
+constexpr char kDbFileChecksumFuncName[] = "FileChecksumCrc32c";
+// The default BackupEngine file checksum function name.
+constexpr char kBackupFileChecksumFuncName[] = "crc32c";
+
+struct BackupEngineOptions {
+  // Where to keep the backup files. Has to be different than dbname_
+  // Best to set this to dbname_ + "/backups"
+  // Required
+  std::string backup_dir;
+
+  // Backup Env object. It will be used for backup file I/O. If it's
+  // nullptr, backups will be written out using DBs Env. If it's
+  // non-nullptr, backup's I/O will be performed using this object.
+  // Default: nullptr
+  Env* backup_env;
+
+  // share_table_files supports table and blob files.
+  //
+  // If share_table_files == true, the backup directory will share table and
+  // blob files among backups, to save space among backups of the same DB and to
+  // enable incremental backups by only copying new files.
+  // If share_table_files == false, each backup will be on its own and will not
+  // share any data with other backups.
+  //
+  // default: true
+  bool share_table_files;
+
+  // Backup info and error messages will be written to info_log
+  // if non-nullptr.
+  // Default: nullptr
+  Logger* info_log;
+
+  // If sync == true, we can guarantee you'll get consistent backup and
+  // restore even on a machine crash/reboot. Backup and restore processes are
+  // slower with sync enabled. If sync == false, we can only guarantee that
+  // other previously synced backups and restores are not modified while
+  // creating a new one.
+  // Default: true
+  bool sync;
+
+  // If true, it will delete whatever backups there are already
+  // Default: false
+  bool destroy_old_data;
+
+  // If false, we won't backup log files. This option can be useful for backing
+  // up in-memory databases where log file are persisted, but table files are in
+  // memory.
+  // Default: true
+  bool backup_log_files;
+
+  // Max bytes that can be transferred in a second during backup.
+  // If 0, go as fast as you can
+  // This limit only applies to writes. To also limit reads,
+  // a rate limiter able to also limit reads (e.g, its mode = kAllIo)
+  // have to be passed in through the option "backup_rate_limiter"
+  // Default: 0
+  uint64_t backup_rate_limit;
+
+  // Backup rate limiter. Used to control transfer speed for backup. If this is
+  // not null, backup_rate_limit is ignored.
+  // Default: nullptr
+  std::shared_ptr<RateLimiter> backup_rate_limiter{nullptr};
+
+  // Max bytes that can be transferred in a second during restore.
+  // If 0, go as fast as you can
+  // This limit only applies to writes. To also limit reads,
+  // a rate limiter able to also limit reads (e.g, its mode = kAllIo)
+  // have to be passed in through the option "restore_rate_limiter"
+  // Default: 0
+  uint64_t restore_rate_limit;
+
+  // Restore rate limiter. Used to control transfer speed during restore. If
+  // this is not null, restore_rate_limit is ignored.
+  // Default: nullptr
+  std::shared_ptr<RateLimiter> restore_rate_limiter{nullptr};
+
+  // share_files_with_checksum supports table and blob files.
+  //
+  // Only used if share_table_files is set to true. Setting to false is
+  // DEPRECATED and potentially dangerous because in that case BackupEngine
+  // can lose data if backing up databases with distinct or divergent
+  // history, for example if restoring from a backup other than the latest,
+  // writing to the DB, and creating another backup. Setting to true (default)
+  // prevents these issues by ensuring that different table files (SSTs) and
+  // blob files with the same number are treated as distinct. See
+  // share_files_with_checksum_naming and ShareFilesNaming.
+  //
+  // Default: true
+  bool share_files_with_checksum;
+
+  // Up to this many background threads will copy files for CreateNewBackup()
+  // and RestoreDBFromBackup()
+  // Default: 1
+  int max_background_operations;
+
+  // During backup user can get callback every time next
+  // callback_trigger_interval_size bytes being copied.
+  // Default: 4194304
+  uint64_t callback_trigger_interval_size;
+
+  // For BackupEngineReadOnly, Open() will open at most this many of the
+  // latest non-corrupted backups.
+  //
+  // Note: this setting is ignored (behaves like INT_MAX) for any kind of
+  // writable BackupEngine because it would inhibit accounting for shared
+  // files for proper backup deletion, including purging any incompletely
+  // created backups on creation of a new backup.
+  //
+  // Default: INT_MAX
+  int max_valid_backups_to_open;
+
+  // ShareFilesNaming describes possible naming schemes for backup
+  // table and blob file names when they are stored in the
+  // shared_checksum directory (i.e., both share_table_files and
+  // share_files_with_checksum are true).
+  enum ShareFilesNaming : uint32_t {
+    // Backup blob filenames are <file_number>_<crc32c>_<file_size>.blob and
+    // backup SST filenames are <file_number>_<crc32c>_<file_size>.sst
+    // where <crc32c> is an unsigned decimal integer. This is the
+    // original/legacy naming scheme for share_files_with_checksum,
+    // with two problems:
+    // * At massive scale, collisions on this triple with different file
+    //   contents is plausible.
+    // * Determining the name to use requires computing the checksum,
+    //   so generally requires reading the whole file even if the file
+    //   is already backed up.
+    //
+    // ** ONLY RECOMMENDED FOR PRESERVING OLD BEHAVIOR **
+    kLegacyCrc32cAndFileSize = 1U,
+
+    // Backup SST filenames are <file_number>_s<db_session_id>.sst. This
+    // pair of values should be very strongly unique for a given SST file
+    // and easily determined before computing a checksum. The 's' indicates
+    // the value is a DB session id, not a checksum.
+    //
+    // Exceptions:
+    // * For blob files, kLegacyCrc32cAndFileSize is used as currently
+    //   db_session_id is not supported by the blob file format.
+    // * For old SST files without a DB session id, kLegacyCrc32cAndFileSize
+    //   will be used instead, matching the names assigned by RocksDB versions
+    //   not supporting the newer naming scheme.
+    // * See also flags below.
+    kUseDbSessionId = 2U,
+
+    kMaskNoNamingFlags = 0xffffU,
+
+    // If not already part of the naming scheme, insert
+    //   _<file_size>
+    // before .sst and .blob in the name. In case of user code actually parsing
+    // the last _<whatever> before the .sst  and .blob as the file size, this
+    // preserves that feature of kLegacyCrc32cAndFileSize. In other words, this
+    // option makes official that unofficial feature of the backup metadata.
+    //
+    // We do not consider SST and blob file sizes to have sufficient entropy to
+    // contribute significantly to naming uniqueness.
+    kFlagIncludeFileSize = 1U << 31,
+
+    kMaskNamingFlags = ~kMaskNoNamingFlags,
+  };
+
+  // Naming option for share_files_with_checksum table and blob files. See
+  // ShareFilesNaming for details.
+  //
+  // Modifying this option cannot introduce a downgrade compatibility issue
+  // because RocksDB can read, restore, and delete backups using different file
+  // names, and it's OK for a backup directory to use a mixture of table and
+  // blob files naming schemes.
+  //
+  // However, modifying this option and saving more backups to the same
+  // directory can lead to the same file getting saved again to that
+  // directory, under the new shared name in addition to the old shared
+  // name.
+  //
+  // Default: kUseDbSessionId | kFlagIncludeFileSize
+  //
+  // Note: This option comes into effect only if both share_files_with_checksum
+  // and share_table_files are true.
+  ShareFilesNaming share_files_with_checksum_naming;
+
+  // Major schema version to use when writing backup meta files
+  // 1 (default) - compatible with very old versions of RocksDB.
+  // 2 - can be read by RocksDB versions >= 6.19.0. Minimum schema version for
+  //   * (Experimental) saving and restoring file temperature metadata
+  int schema_version = 1;
+
+  // (Experimental - subject to change or removal) When taking a backup and
+  // saving file temperature info (minimum schema_version is 2), there are
+  // two potential sources of truth for the placement of files into temperature
+  // tiers: (a) the current file temperature reported by the FileSystem or
+  // (b) the expected file temperature recorded in DB manifest. When this
+  // option is false (default), (b) overrides (a) if both are not UNKNOWN.
+  // When true, (a) overrides (b) if both are not UNKNOWN. Regardless of this
+  // setting, a known temperature overrides UNKNOWN.
+  bool current_temperatures_override_manifest = false;
+
+  void Dump(Logger* logger) const;
+
+  explicit BackupEngineOptions(
+      const std::string& _backup_dir, Env* _backup_env = nullptr,
+      bool _share_table_files = true, Logger* _info_log = nullptr,
+      bool _sync = true, bool _destroy_old_data = false,
+      bool _backup_log_files = true, uint64_t _backup_rate_limit = 0,
+      uint64_t _restore_rate_limit = 0, int _max_background_operations = 1,
+      uint64_t _callback_trigger_interval_size = 4 * 1024 * 1024,
+      int _max_valid_backups_to_open = INT_MAX,
+      ShareFilesNaming _share_files_with_checksum_naming =
+          static_cast<ShareFilesNaming>(kUseDbSessionId | kFlagIncludeFileSize))
+      : backup_dir(_backup_dir),
+        backup_env(_backup_env),
+        share_table_files(_share_table_files),
+        info_log(_info_log),
+        sync(_sync),
+        destroy_old_data(_destroy_old_data),
+        backup_log_files(_backup_log_files),
+        backup_rate_limit(_backup_rate_limit),
+        restore_rate_limit(_restore_rate_limit),
+        share_files_with_checksum(true),
+        max_background_operations(_max_background_operations),
+        callback_trigger_interval_size(_callback_trigger_interval_size),
+        max_valid_backups_to_open(_max_valid_backups_to_open),
+        share_files_with_checksum_naming(_share_files_with_checksum_naming) {
+    assert(share_table_files || !share_files_with_checksum);
+    assert((share_files_with_checksum_naming & kMaskNoNamingFlags) != 0);
+  }
+};
+
+inline BackupEngineOptions::ShareFilesNaming operator&(
+    BackupEngineOptions::ShareFilesNaming lhs,
+    BackupEngineOptions::ShareFilesNaming rhs) {
+  uint32_t l = static_cast<uint32_t>(lhs);
+  uint32_t r = static_cast<uint32_t>(rhs);
+  assert(r == BackupEngineOptions::kMaskNoNamingFlags ||
+         (r & BackupEngineOptions::kMaskNoNamingFlags) == 0);
+  return static_cast<BackupEngineOptions::ShareFilesNaming>(l & r);
+}
+
+inline BackupEngineOptions::ShareFilesNaming operator|(
+    BackupEngineOptions::ShareFilesNaming lhs,
+    BackupEngineOptions::ShareFilesNaming rhs) {
+  uint32_t l = static_cast<uint32_t>(lhs);
+  uint32_t r = static_cast<uint32_t>(rhs);
+  assert((r & BackupEngineOptions::kMaskNoNamingFlags) == 0);
+  return static_cast<BackupEngineOptions::ShareFilesNaming>(l | r);
+}
+
+// Identifying information about a backup shared file that is (or might be)
+// excluded from a backup using exclude_files_callback.
+struct BackupExcludedFileInfo {
+  explicit BackupExcludedFileInfo(const std::string& _relative_file)
+      : relative_file(_relative_file) {}
+
+  // File name and path relative to the backup dir.
+  std::string relative_file;
+};
+
+// An auxiliary structure for exclude_files_callback
+struct MaybeExcludeBackupFile {
+  explicit MaybeExcludeBackupFile(BackupExcludedFileInfo&& _info)
+      : info(std::move(_info)) {}
+
+  // Identifying information about a backup shared file that could be excluded
+  const BackupExcludedFileInfo info;
+
+  // API user sets to true if the file should be excluded from this backup
+  bool exclude_decision = false;
+};
+
+struct CreateBackupOptions {
+  // Flush will always trigger if 2PC is enabled.
+  // If write-ahead logs are disabled, set flush_before_backup=true to
+  // avoid losing unflushed key/value pairs from the memtable.
+  bool flush_before_backup = false;
+
+  // Callback for reporting progress, based on callback_trigger_interval_size.
+  //
+  // An exception thrown from the callback will result in Status::Aborted from
+  // the operation.
+  std::function<void()> progress_callback = {};
+
+  // A callback that allows the API user to select files for exclusion, such
+  // as if the files are known to exist in an alternate backup directory.
+  // Only "shared" files can be excluded from backups. This is an advanced
+  // feature because the BackupEngine user is trusted to keep track of files
+  // such that the DB can be restored.
+  //
+  // Input to the callback is a [begin,end) range of sharable files live in
+  // the DB being backed up, and the callback implementation sets
+  // exclude_decision=true for files to exclude. A callback offers maximum
+  // flexibility, e.g. if remote files are unavailable at backup time but
+  // whose existence has been recorded somewhere. In case of an empty or
+  // no-op callback, all files are included in the backup .
+  //
+  // To restore the DB, RestoreOptions::alternate_dirs must be used to provide
+  // the excluded files.
+  //
+  // An exception thrown from the callback will result in Status::Aborted from
+  // the operation.
+  std::function<void(MaybeExcludeBackupFile* files_begin,
+                     MaybeExcludeBackupFile* files_end)>
+      exclude_files_callback = {};
+
+  // If false, background_thread_cpu_priority is ignored.
+  // Otherwise, the cpu priority can be decreased,
+  // if you try to increase the priority, the priority will not change.
+  // The initial priority of the threads is CpuPriority::kNormal,
+  // so you can decrease to priorities lower than kNormal.
+  bool decrease_background_thread_cpu_priority = false;
+  CpuPriority background_thread_cpu_priority = CpuPriority::kNormal;
+};
+
+struct RestoreOptions {
+  // If true, restore won't overwrite the existing log files in wal_dir. It will
+  // also move all log files from archive directory to wal_dir. Use this option
+  // in combination with BackupEngineOptions::backup_log_files = false for
+  // persisting in-memory databases.
+  // Default: false
+  bool keep_log_files;
+
+  // For backups that were created using exclude_files_callback, this
+  // option enables restoring those backups by providing BackupEngines on
+  // directories known to contain the required files.
+  std::forward_list<BackupEngineReadOnlyBase*> alternate_dirs;
+
+  explicit RestoreOptions(bool _keep_log_files = false)
+      : keep_log_files(_keep_log_files) {}
+};
+
+using BackupID = uint32_t;
+
+using BackupFileInfo = FileStorageInfo;
+
+struct BackupInfo {
+  BackupID backup_id = 0U;
+  // Creation time, according to GetCurrentTime
+  int64_t timestamp = 0;
+
+  // Total size in bytes (based on file payloads, not including filesystem
+  // overheads or backup meta file)
+  uint64_t size = 0U;
+
+  // Number of backed up files, some of which might be shared with other
+  // backups. Does not include backup meta file.
+  uint32_t number_files = 0U;
+
+  // Backup API user metadata
+  std::string app_metadata;
+
+  // Backup file details, if requested with include_file_details=true.
+  // Does not include excluded_files.
+  std::vector<BackupFileInfo> file_details;
+
+  // Identifying information about shared files that were excluded from the
+  // created backup. See exclude_files_callback and alternate_dirs.
+  // This information is only provided if include_file_details=true.
+  std::vector<BackupExcludedFileInfo> excluded_files;
+
+  // DB "name" (a directory in the backup_env) for opening this backup as a
+  // read-only DB. This should also be used as the DBOptions::wal_dir, such
+  // as by default setting wal_dir="". See also env_for_open.
+  // This field is only set if include_file_details=true
+  std::string name_for_open;
+
+  // An Env(+FileSystem) for opening this backup as a read-only DB, with
+  // DB::OpenForReadOnly or similar. This field is only set if
+  // include_file_details=true. (The FileSystem in this Env takes care
+  // of making shared backup files openable from the `name_for_open` DB
+  // directory.) See also name_for_open.
+  //
+  // This Env might or might not be shared with other backups. To work
+  // around DBOptions::env being a raw pointer, this is a shared_ptr so
+  // that keeping either this BackupInfo, the BackupEngine, or a copy of
+  // this shared_ptr alive is sufficient to keep the Env alive for use by
+  // a read-only DB.
+  std::shared_ptr<Env> env_for_open;
+
+  BackupInfo() {}
+
+  explicit BackupInfo(BackupID _backup_id, int64_t _timestamp, uint64_t _size,
+                      uint32_t _number_files, const std::string& _app_metadata)
+      : backup_id(_backup_id),
+        timestamp(_timestamp),
+        size(_size),
+        number_files(_number_files),
+        app_metadata(_app_metadata) {}
+};
+
+class BackupStatistics {
+ public:
+  BackupStatistics() {
+    number_success_backup = 0;
+    number_fail_backup = 0;
+  }
+
+  explicit BackupStatistics(uint32_t _number_success_backup,
+                            uint32_t _number_fail_backup)
+      : number_success_backup(_number_success_backup),
+        number_fail_backup(_number_fail_backup) {}
+
+  ~BackupStatistics() {}
+
+  void IncrementNumberSuccessBackup();
+  void IncrementNumberFailBackup();
+
+  uint32_t GetNumberSuccessBackup() const;
+  uint32_t GetNumberFailBackup() const;
+
+  std::string ToString() const;
+
+ private:
+  uint32_t number_success_backup;
+  uint32_t number_fail_backup;
+};
+
+// Read-only functions of a BackupEngine. (Restore writes to another directory
+// not the backup directory.) See BackupEngine comments for details on
+// safe concurrent operations.
+class BackupEngineReadOnlyBase {
+ public:
+  virtual ~BackupEngineReadOnlyBase() {}
+
+  // Returns info about the latest good backup in backup_info, or NotFound
+  // no good backup exists.
+  // Setting include_file_details=true provides information about each
+  // backed-up file in BackupInfo::file_details and more.
+  virtual Status GetLatestBackupInfo(
+      BackupInfo* backup_info, bool include_file_details = false) const = 0;
+
+  // Returns info about a specific backup in backup_info, or NotFound
+  // or Corruption status if the requested backup id does not exist or is
+  // known corrupt.
+  // Setting include_file_details=true provides information about each
+  // backed-up file in BackupInfo::file_details and more.
+  virtual Status GetBackupInfo(BackupID backup_id, BackupInfo* backup_info,
+                               bool include_file_details = false) const = 0;
+
+  // Returns info about non-corrupt backups in backup_infos.
+  // Setting include_file_details=true provides information about each
+  // backed-up file in BackupInfo::file_details and more.
+  virtual void GetBackupInfo(std::vector<BackupInfo>* backup_infos,
+                             bool include_file_details = false) const = 0;
+
+  // Returns info about corrupt backups in corrupt_backups.
+  // WARNING: Any write to the BackupEngine could trigger automatic
+  // GarbageCollect(), which could delete files that would be needed to
+  // manually recover a corrupt backup or to preserve an unrecognized (e.g.
+  // incompatible future version) backup.
+  virtual void GetCorruptedBackups(
+      std::vector<BackupID>* corrupt_backup_ids) const = 0;
+
+  // Restore to specified db_dir and wal_dir from backup_id.
+  virtual IOStatus RestoreDBFromBackup(const RestoreOptions& options,
+                                       BackupID backup_id,
+                                       const std::string& db_dir,
+                                       const std::string& wal_dir) const = 0;
+
+  // keep for backward compatibility.
+  virtual IOStatus RestoreDBFromBackup(
+      BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
+      const RestoreOptions& options = RestoreOptions()) const {
+    return RestoreDBFromBackup(options, backup_id, db_dir, wal_dir);
+  }
+
+  // Like RestoreDBFromBackup but restores from latest non-corrupt backup_id
+  virtual IOStatus RestoreDBFromLatestBackup(
+      const RestoreOptions& options, const std::string& db_dir,
+      const std::string& wal_dir) const = 0;
+
+  // keep for backward compatibility.
+  virtual IOStatus RestoreDBFromLatestBackup(
+      const std::string& db_dir, const std::string& wal_dir,
+      const RestoreOptions& options = RestoreOptions()) const {
+    return RestoreDBFromLatestBackup(options, db_dir, wal_dir);
+  }
+
+  // If verify_with_checksum is true, this function
+  // inspects the current checksums and file sizes of backup files to see if
+  // they match our expectation.
+  //
+  // If verify_with_checksum is false, this function
+  // checks that each file exists and that the size of the file matches our
+  // expectation. It does not check file checksum.
+  //
+  // If this BackupEngine created the backup, it compares the files' current
+  // sizes (and current checksum) against the number of bytes written to
+  // them (and the checksum calculated) during creation.
+  // Otherwise, it compares the files' current sizes (and checksums) against
+  // their sizes (and checksums) when the BackupEngine was opened.
+  //
+  // Returns Status::OK() if all checks are good
+  virtual IOStatus VerifyBackup(BackupID backup_id,
+                                bool verify_with_checksum = false) const = 0;
+
+  // Internal use only
+  virtual BackupEngine* AsBackupEngine() = 0;
+};
+
+// Append-only functions of a BackupEngine. See BackupEngine comment for
+// details on distinction between Append and Write operations and safe
+// concurrent operations.
+class BackupEngineAppendOnlyBase {
+ public:
+  virtual ~BackupEngineAppendOnlyBase() {}
+
+  // same as CreateNewBackup, but stores extra application metadata.
+  virtual IOStatus CreateNewBackupWithMetadata(
+      const CreateBackupOptions& options, DB* db,
+      const std::string& app_metadata, BackupID* new_backup_id = nullptr) = 0;
+
+  // keep here for backward compatibility.
+  virtual IOStatus CreateNewBackupWithMetadata(
+      DB* db, const std::string& app_metadata, bool flush_before_backup = false,
+      std::function<void()> progress_callback = []() {}) {
+    CreateBackupOptions options;
+    options.flush_before_backup = flush_before_backup;
+    options.progress_callback = progress_callback;
+    return CreateNewBackupWithMetadata(options, db, app_metadata);
+  }
+
+  // Captures the state of the database by creating a new (latest) backup.
+  // On success (OK status), the BackupID of the new backup is saved to
+  // *new_backup_id when not nullptr.
+  // NOTE: db_paths and cf_paths are not supported for creating backups,
+  // and NotSupported will be returned when the DB (without WALs) uses more
+  // than one directory.
+  virtual IOStatus CreateNewBackup(const CreateBackupOptions& options, DB* db,
+                                   BackupID* new_backup_id = nullptr) {
+    return CreateNewBackupWithMetadata(options, db, "", new_backup_id);
+  }
+
+  // keep here for backward compatibility.
+  virtual IOStatus CreateNewBackup(
+      DB* db, bool flush_before_backup = false,
+      std::function<void()> progress_callback = []() {}) {
+    CreateBackupOptions options;
+    options.flush_before_backup = flush_before_backup;
+    options.progress_callback = progress_callback;
+    return CreateNewBackup(options, db);
+  }
+
+  // Call this from another thread if you want to stop the backup
+  // that is currently happening. It will return immediately, will
+  // not wait for the backup to stop.
+  // The backup will stop ASAP and the call to CreateNewBackup will
+  // return Status::Incomplete(). It will not clean up after itself, but
+  // the state will remain consistent. The state will be cleaned up the
+  // next time you call CreateNewBackup or GarbageCollect.
+  virtual void StopBackup() = 0;
+
+  // Will delete any files left over from incomplete creation or deletion of
+  // a backup. This is not normally needed as those operations also clean up
+  // after prior incomplete calls to the same kind of operation (create or
+  // delete). This does not delete corrupt backups but can delete files that
+  // would be needed to manually recover a corrupt backup or to preserve an
+  // unrecognized (e.g. incompatible future version) backup.
+  // NOTE: This is not designed to delete arbitrary files added to the backup
+  // directory outside of BackupEngine, and clean-up is always subject to
+  // permissions on and availability of the underlying filesystem.
+  // NOTE2: For concurrency and interference purposes (see BackupEngine
+  // comment), GarbageCollect (GC) is like other Append operations, even
+  // though it seems different. Although GC can delete physical data, it does
+  // not delete any logical data read by Read operations. GC can interfere
+  // with Append or Write operations in another BackupEngine on the same
+  // backup_dir, because temporary files will be treated as obsolete and
+  // deleted.
+  virtual IOStatus GarbageCollect() = 0;
+};
+
+// A backup engine for organizing and managing backups.
+// This class is not user-extensible.
+//
+// This class declaration adds "Write" operations in addition to the
+// operations from BackupEngineAppendOnlyBase and BackupEngineReadOnlyBase.
+//
+// # Concurrency between threads on the same BackupEngine* object
+//
+// As of version 6.20, BackupEngine* operations are generally thread-safe,
+// using a read-write lock, though single-thread operation is still
+// recommended to avoid TOCTOU bugs. Specifically, particular kinds of
+// concurrent operations behave like this:
+//
+// op1\op2| Read  | Append | Write
+// -------|-------|--------|--------
+//   Read | conc  | block  | block
+// Append | block | block  | block
+//  Write | block | block  | block
+//
+// conc = operations safely proceed concurrently
+// block = one of the operations safely blocks until the other completes.
+//   There is generally no guarantee as to which completes first.
+//
+// StopBackup is the only operation that affects an ongoing operation.
+//
+// # Interleaving operations between BackupEngine* objects open on the
+// same backup_dir
+//
+// It is recommended only to have one BackupEngine* object open for a given
+// backup_dir, but it is possible to mix / interleave some operations
+// (regardless of whether they are concurrent) with these caveats:
+//
+// op1\op2|  Open  |  Read  | Append | Write
+// -------|--------|--------|--------|--------
+//   Open | conc   | conc   | atomic | unspec
+//   Read | conc   | conc   | old    | unspec
+// Append | atomic | old    | unspec | unspec
+//  Write | unspec | unspec | unspec | unspec
+//
+// Special case: Open with destroy_old_data=true is really a Write
+//
+// conc = operations safely proceed, concurrently when applicable
+// atomic = operations are effectively atomic; if a concurrent Append
+//   operation has not completed at some key point during Open, the
+//   opened BackupEngine* will never see the result of the Append op.
+// old = Read operations do not include any state changes from other
+//   BackupEngine* objects; they return the state at their Open time.
+// unspec = Behavior is unspecified, including possibly trashing the
+//   backup_dir, but is "memory safe" (no C++ undefined behavior)
+//
+class BackupEngine : public BackupEngineReadOnlyBase,
+                     public BackupEngineAppendOnlyBase {
+ public:
+  virtual ~BackupEngine() {}
+
+  // BackupEngineOptions have to be the same as the ones used in previous
+  // BackupEngines for the same backup directory.
+  static IOStatus Open(const BackupEngineOptions& options, Env* db_env,
+                       BackupEngine** backup_engine_ptr);
+
+  // keep for backward compatibility.
+  static IOStatus Open(Env* db_env, const BackupEngineOptions& options,
+                       BackupEngine** backup_engine_ptr) {
+    return BackupEngine::Open(options, db_env, backup_engine_ptr);
+  }
+
+  // Deletes old backups, keeping latest num_backups_to_keep alive.
+  // See also DeleteBackup.
+  virtual IOStatus PurgeOldBackups(uint32_t num_backups_to_keep) = 0;
+
+  // Deletes a specific backup. If this operation (or PurgeOldBackups)
+  // is not completed due to crash, power failure, etc. the state
+  // will be cleaned up the next time you call DeleteBackup,
+  // PurgeOldBackups, or GarbageCollect.
+  virtual IOStatus DeleteBackup(BackupID backup_id) = 0;
+};
+
+// A variant of BackupEngine that only allows "Read" operations. See
+// BackupEngine comment for details. This class is not user-extensible.
+class BackupEngineReadOnly : public BackupEngineReadOnlyBase {
+ public:
+  virtual ~BackupEngineReadOnly() {}
+
+  static IOStatus Open(const BackupEngineOptions& options, Env* db_env,
+                       BackupEngineReadOnly** backup_engine_ptr);
+  // keep for backward compatibility.
+  static IOStatus Open(Env* db_env, const BackupEngineOptions& options,
+                       BackupEngineReadOnly** backup_engine_ptr) {
+    return BackupEngineReadOnly::Open(options, db_env, backup_engine_ptr);
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/cache_dump_load.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/cache_dump_load.h
new file mode 100644
index 0000000000..8b91bb7e15
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/cache_dump_load.h
@@ -0,0 +1,140 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <set>
+
+#include "rocksdb/cache.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/io_status.h"
+#include "rocksdb/secondary_cache.h"
+#include "rocksdb/table.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// The classes and functions in this header file is used for dumping out the
+// blocks in a block cache, storing or transfering the blocks to another
+// destination host, and load these blocks to the secondary cache at destination
+// host.
+// NOTE that: The classes, functions, and data structures are EXPERIMENTAL! They
+// my be changed in the future when the development continues.
+
+// The major and minor version number of the data format to be stored/trandfered
+// via CacheDumpWriter and read out via CacheDumpReader
+static const int kCacheDumpMajorVersion = 0;
+static const int kCacheDumpMinorVersion = 1;
+
+// NOTE that: this class is EXPERIMENTAL! May be changed in the future!
+// This is an abstract class to write or transfer the data that is created by
+// CacheDumper. We pack one block with its block type, dump time, block key in
+// the block cache, block len, block crc32c checksum and block itself as a unit
+// and it is stored via WritePacket. Before we call WritePacket, we must call
+// WriteMetadata once, which stores the sequence number, block unit checksum,
+// and block unit size.
+// We provide file based CacheDumpWriter to store the metadata and its package
+// sequentially in a file as the defualt implementation. Users can implement
+// their own CacheDumpWriter to store/transfer the data. For example, user can
+// create a subclass which transfer the metadata and package on the fly.
+class CacheDumpWriter {
+ public:
+  virtual ~CacheDumpWriter() = default;
+
+  // Called ONCE before the calls to WritePacket
+  virtual IOStatus WriteMetadata(const Slice& metadata) = 0;
+  virtual IOStatus WritePacket(const Slice& data) = 0;
+  virtual IOStatus Close() = 0;
+};
+
+// NOTE that: this class is EXPERIMENTAL! May be changed in the future!
+// This is an abstract class to read or receive the data that is stored
+// or transfered by CacheDumpWriter. Note that, ReadMetadata must be called
+// once before we call a ReadPacket.
+class CacheDumpReader {
+ public:
+  virtual ~CacheDumpReader() = default;
+  // Called ONCE before the calls to ReadPacket
+  virtual IOStatus ReadMetadata(std::string* metadata) = 0;
+  // Sets data to empty string on EOF
+  virtual IOStatus ReadPacket(std::string* data) = 0;
+  // (Close not needed)
+};
+
+// CacheDumpOptions is the option for CacheDumper and CacheDumpedLoader. Any
+// dump or load process related control variables can be added here.
+struct CacheDumpOptions {
+  SystemClock* clock;
+};
+
+// NOTE that: this class is EXPERIMENTAL! May be changed in the future!
+// This the class to dump out the block in the block cache, store/transfer them
+// via CacheDumpWriter. In order to dump out the blocks belonging to a certain
+// DB or a list of DB (block cache can be shared by many DB), user needs to call
+// SetDumpFilter to specify a list of DB to filter out the blocks that do not
+// belong to those DB.
+// A typical use case is: when we migrate a DB instance from host A to host B.
+// We need to reopen the DB at host B after all the files are copied to host B.
+// At this moment, the block cache at host B does not have any block from this
+// migrated DB. Therefore, the read performance can be low due to cache warm up.
+// By using CacheDumper before we shut down the DB at host A and using
+// CacheDumpedLoader at host B before we reopen the DB, we can warmup the cache
+// ahead. This function can be used in other use cases also.
+class CacheDumper {
+ public:
+  virtual ~CacheDumper() = default;
+  // Only dump the blocks in the block cache that belong to the DBs in this list
+  virtual Status SetDumpFilter(std::vector<DB*> db_list) {
+    (void)db_list;
+    return Status::NotSupported("SetDumpFilter is not supported");
+  }
+  // The main function to dump out all the blocks that satisfy the filter
+  // condition from block cache to a certain CacheDumpWriter in one shot. This
+  // process may take some time.
+  virtual IOStatus DumpCacheEntriesToWriter() {
+    return IOStatus::NotSupported("DumpCacheEntriesToWriter is not supported");
+  }
+};
+
+// NOTE that: this class is EXPERIMENTAL! May be changed in the future!
+// This is the class to load the dumped blocks to the destination cache. For now
+// we only load the blocks to the SecondaryCache. In the future, we may plan to
+// support loading to the block cache.
+class CacheDumpedLoader {
+ public:
+  virtual ~CacheDumpedLoader() = default;
+  virtual IOStatus RestoreCacheEntriesToSecondaryCache() {
+    return IOStatus::NotSupported(
+        "RestoreCacheEntriesToSecondaryCache is not supported");
+  }
+};
+
+// Get the writer which stores all the metadata and data sequentially to a file
+IOStatus NewToFileCacheDumpWriter(const std::shared_ptr<FileSystem>& fs,
+                                  const FileOptions& file_opts,
+                                  const std::string& file_name,
+                                  std::unique_ptr<CacheDumpWriter>* writer);
+
+// Get the reader which read out the metadata and data sequentially from a file
+IOStatus NewFromFileCacheDumpReader(const std::shared_ptr<FileSystem>& fs,
+                                    const FileOptions& file_opts,
+                                    const std::string& file_name,
+                                    std::unique_ptr<CacheDumpReader>* reader);
+
+// Get the default cache dumper
+Status NewDefaultCacheDumper(const CacheDumpOptions& dump_options,
+                             const std::shared_ptr<Cache>& cache,
+                             std::unique_ptr<CacheDumpWriter>&& writer,
+                             std::unique_ptr<CacheDumper>* cache_dumper);
+
+// Get the default cache dump loader
+Status NewDefaultCacheDumpedLoader(
+    const CacheDumpOptions& dump_options,
+    const BlockBasedTableOptions& toptions,
+    const std::shared_ptr<SecondaryCache>& secondary_cache,
+    std::unique_ptr<CacheDumpReader>&& reader,
+    std::unique_ptr<CacheDumpedLoader>* cache_dump_loader);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/checkpoint.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/checkpoint.h
new file mode 100644
index 0000000000..6509f38d96
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/checkpoint.h
@@ -0,0 +1,63 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// A checkpoint is an openable snapshot of a database at a point in time.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class DB;
+class ColumnFamilyHandle;
+struct LiveFileMetaData;
+struct ExportImportFilesMetaData;
+
+class Checkpoint {
+ public:
+  // Creates a Checkpoint object to be used for creating openable snapshots
+  static Status Create(DB* db, Checkpoint** checkpoint_ptr);
+
+  // Builds an openable snapshot of RocksDB. checkpoint_dir should contain an
+  // absolute path. The specified directory should not exist, since it will be
+  // created by the API.
+  // When a checkpoint is created,
+  // (1) SST and blob files are hard linked if the output directory is on the
+  // same filesystem as the database, and copied otherwise.
+  // (2) other required files (like MANIFEST) are always copied.
+  // log_size_for_flush: if the total log file size is equal or larger than
+  // this value, then a flush is triggered for all the column families. The
+  // default value is 0, which means flush is always triggered. If you move
+  // away from the default, the checkpoint may not contain up-to-date data
+  // if WAL writing is not always enabled.
+  // Flush will always trigger if it is 2PC.
+  // sequence_number_ptr: if it is not nullptr, the value it points to will be
+  // set to a sequence number guaranteed to be part of the DB, not necessarily
+  // the latest. The default value of this parameter is nullptr.
+  // NOTE: db_paths and cf_paths are not supported for creating checkpoints
+  // and NotSupported will be returned when the DB (without WALs) uses more
+  // than one directory.
+  virtual Status CreateCheckpoint(const std::string& checkpoint_dir,
+                                  uint64_t log_size_for_flush = 0,
+                                  uint64_t* sequence_number_ptr = nullptr);
+
+  // Exports all live SST files of a specified Column Family onto export_dir,
+  // returning SST files information in metadata.
+  // - SST files will be created as hard links when the directory specified
+  //   is in the same partition as the db directory, copied otherwise.
+  // - export_dir should not already exist and will be created by this API.
+  // - Always triggers a flush.
+  virtual Status ExportColumnFamily(ColumnFamilyHandle* handle,
+                                    const std::string& export_dir,
+                                    ExportImportFilesMetaData** metadata);
+
+  virtual ~Checkpoint() {}
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/convenience.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/convenience.h
new file mode 100644
index 0000000000..f61afd69ef
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/convenience.h
@@ -0,0 +1,10 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+// This file was moved to rocksdb/convenience.h"
+
+#include "rocksdb/convenience.h"
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/customizable_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/customizable_util.h
new file mode 100644
index 0000000000..adf2540540
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/customizable_util.h
@@ -0,0 +1,322 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// The methods in this file are used to instantiate new Customizable
+// instances of objects.  These methods are most typically used by
+// the "CreateFromString" method of a customizable class.
+// If not developing a new Type of customizable class, you probably
+// do not need the methods in this file.
+//
+// See https://github.com/facebook/rocksdb/wiki/RocksDB-Configurable-Objects
+// for more information on how to develop and use customizable objects
+
+#pragma once
+#include <memory>
+#include <unordered_map>
+
+#include "options/configurable_helper.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/customizable.h"
+#include "rocksdb/status.h"
+#include "rocksdb/utilities/object_registry.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Creates a new shared customizable instance object based on the
+// input parameters using the object registry.
+//
+// The id parameter specifies the instance class of the object to create.
+// The opt_map parameter specifies the configuration of the new instance.
+//
+// The config_options parameter controls the process and how errors are
+// returned. If ignore_unknown_options=true, unknown values are ignored during
+// the configuration. If ignore_unsupported_options=true, unknown instance types
+// are ignored. If invoke_prepare_options=true, the resulting instance will be
+// initialized (via PrepareOptions)
+//
+// @param config_options Controls how the instance is created and errors are
+// handled
+// @param id The identifier of the new object being created.  This string
+// will be used by the object registry to locate the appropriate object to
+// create.
+// @param opt_map Optional name-value pairs of properties to set for the newly
+// created object
+// @param result The newly created and configured instance.
+template <typename T>
+static Status NewSharedObject(
+    const ConfigOptions& config_options, const std::string& id,
+    const std::unordered_map<std::string, std::string>& opt_map,
+    std::shared_ptr<T>* result) {
+  if (!id.empty()) {
+    Status status;
+    status = config_options.registry->NewSharedObject(id, result);
+    if (config_options.ignore_unsupported_options && status.IsNotSupported()) {
+      status = Status::OK();
+    } else if (status.ok()) {
+      status = Customizable::ConfigureNewObject(config_options, result->get(),
+                                                opt_map);
+    }
+    return status;
+  } else if (opt_map.empty()) {
+    // There was no ID and no map (everything empty), so reset/clear the result
+    result->reset();
+    return Status::OK();
+  } else {
+    return Status::NotSupported("Cannot reset object ");
+  }
+}
+
+// Creates a new managed customizable instance object based on the
+// input parameters using the object registry.  Unlike "shared" objects,
+// managed objects are limited to a single instance per ID.
+//
+// The id parameter specifies the instance class of the object to create.
+// If an object with this id exists in the registry, the existing object
+// will be returned.  If the object does not exist, a new one will be created.
+//
+// The opt_map parameter specifies the configuration of the new instance.
+// If the object already exists, the existing object is returned "as is" and
+// this parameter is ignored.
+//
+// The config_options parameter controls the process and how errors are
+// returned. If ignore_unknown_options=true, unknown values are ignored during
+// the configuration. If ignore_unsupported_options=true, unknown instance types
+// are ignored. If invoke_prepare_options=true, the resulting instance will be
+// initialized (via PrepareOptions)
+//
+// @param config_options Controls how the instance is created and errors are
+// handled
+// @param id The identifier of the object.  This string
+// will be used by the object registry to locate the appropriate object to
+// create or return.
+// @param opt_map Optional name-value pairs of properties to set for the newly
+// created object
+// @param result The managed instance.
+template <typename T>
+static Status NewManagedObject(
+    const ConfigOptions& config_options, const std::string& id,
+    const std::unordered_map<std::string, std::string>& opt_map,
+    std::shared_ptr<T>* result) {
+  Status status;
+  if (!id.empty()) {
+    status = config_options.registry->GetOrCreateManagedObject<T>(
+        id, result, [config_options, opt_map](T* object) {
+          return object->ConfigureFromMap(config_options, opt_map);
+        });
+    if (config_options.ignore_unsupported_options && status.IsNotSupported()) {
+      return Status::OK();
+    }
+  } else {
+    status = Status::NotSupported("Cannot reset object ");
+  }
+  return status;
+}
+
+// Creates a new shared Customizable object based on the input parameters.
+// This method parses the input value to determine the type of instance to
+// create. If there is an existing instance (in result) and it is the same ID
+// as the object being created, the existing configuration is stored and used as
+// the default for the new object.
+//
+// The value parameter specified the instance class of the object to create.
+// If it is a simple string (e.g. BlockBasedTable), then the instance will be
+// created using the default settings.  If the value is a set of name-value
+// pairs, then the "id" value is used to determine the instance to create and
+// the remaining parameters are used to configure the object.  Id name-value
+// pairs are specified, there should be an "id=value" pairing or an error may
+// result.
+//
+// The config_options parameter controls the process and how errors are
+// returned. If ignore_unknown_options=true, unknown values are ignored during
+// the configuration. If ignore_unsupported_options=true, unknown instance types
+// are ignored. If invoke_prepare_options=true, the resulting instance will be
+// initialized (via PrepareOptions)
+//
+// @param config_options Controls how the instance is created and errors are
+// handled
+// @param value Either the simple name of the instance to create, or a set of
+// name-value pairs to create and initailize the object
+// @param result The newly created instance.
+template <typename T>
+static Status LoadSharedObject(const ConfigOptions& config_options,
+                               const std::string& value,
+                               std::shared_ptr<T>* result) {
+  std::string id;
+  std::unordered_map<std::string, std::string> opt_map;
+
+  Status status = Customizable::GetOptionsMap(config_options, result->get(),
+                                              value, &id, &opt_map);
+  if (!status.ok()) {  // GetOptionsMap failed
+    return status;
+  } else {
+    return NewSharedObject(config_options, id, opt_map, result);
+  }
+}
+
+// Creates a new shared Customizable object based on the input parameters.
+//
+// The value parameter specified the instance class of the object to create.
+// If it is a simple string (e.g. BlockBasedTable), then the instance will be
+// created using the default settings.  If the value is a set of name-value
+// pairs, then the "id" value is used to determine the instance to create and
+// the remaining parameters are used to configure the object.  Id name-value
+// pairs are specified, there should be an "id=value" pairing or an error may
+// result.
+//
+// The "id" field from the value (either the whole field or "id=XX") is used
+// to determine the type/id of the object to return.  For a given id, there
+// the same instance of the object will be returned from this method (as opposed
+// to LoadSharedObject which would create different objects for the same id.
+//
+// The config_options parameter controls the process and how errors are
+// returned. If ignore_unknown_options=true, unknown values are ignored during
+// the configuration. If ignore_unsupported_options=true, unknown instance types
+// are ignored. If invoke_prepare_options=true, the resulting instance will be
+// initialized (via PrepareOptions)
+//
+// @param config_options Controls how the instance is created and errors are
+// handled
+// @param value Either the simple name of the instance to create, or a set of
+// name-value pairs to create and initailize the object
+// @param result The newly created instance.
+template <typename T>
+static Status LoadManagedObject(const ConfigOptions& config_options,
+                                const std::string& value,
+                                std::shared_ptr<T>* result) {
+  std::string id;
+  std::unordered_map<std::string, std::string> opt_map;
+  Status status = Customizable::GetOptionsMap(config_options, nullptr, value,
+                                              &id, &opt_map);
+  if (!status.ok()) {  // GetOptionsMap failed
+    return status;
+  } else if (value.empty()) {  // No Id and no options.  Clear the object
+    *result = nullptr;
+    return Status::OK();
+  } else {
+    return NewManagedObject(config_options, id, opt_map, result);
+  }
+}
+
+// Creates a new unique pointer customizable instance object based on the
+// input parameters using the object registry.
+// @see NewSharedObject for more information on the inner workings of this
+// method.
+//
+// @param config_options Controls how the instance is created and errors are
+// handled
+// @param id The identifier of the new object being created.  This string
+// will be used by the object registry to locate the appropriate object to
+// create.
+// @param opt_map Optional name-value pairs of properties to set for the newly
+// created object
+// @param result The newly created and configured instance.
+template <typename T>
+static Status NewUniqueObject(
+    const ConfigOptions& config_options, const std::string& id,
+    const std::unordered_map<std::string, std::string>& opt_map,
+    std::unique_ptr<T>* result) {
+  if (!id.empty()) {
+    Status status;
+    status = config_options.registry->NewUniqueObject(id, result);
+    if (config_options.ignore_unsupported_options && status.IsNotSupported()) {
+      status = Status::OK();
+    } else if (status.ok()) {
+      status = Customizable::ConfigureNewObject(config_options, result->get(),
+                                                opt_map);
+    }
+    return status;
+  } else if (opt_map.empty()) {
+    // There was no ID and no map (everything empty), so reset/clear the result
+    result->reset();
+    return Status::OK();
+  } else {
+    return Status::NotSupported("Cannot reset object ");
+  }
+}
+
+// Creates a new unique customizable instance object based on the input
+// parameters.
+// @see LoadSharedObject for more information on the inner workings of this
+// method.
+//
+// @param config_options Controls how the instance is created and errors are
+// handled
+// @param value Either the simple name of the instance to create, or a set of
+// name-value pairs to create and initailize the object
+// @param result The newly created instance.
+template <typename T>
+static Status LoadUniqueObject(const ConfigOptions& config_options,
+                               const std::string& value,
+                               std::unique_ptr<T>* result) {
+  std::string id;
+  std::unordered_map<std::string, std::string> opt_map;
+  Status status = Customizable::GetOptionsMap(config_options, result->get(),
+                                              value, &id, &opt_map);
+  if (!status.ok()) {  // GetOptionsMap failed
+    return status;
+  } else {
+    return NewUniqueObject(config_options, id, opt_map, result);
+  }
+}
+
+// Creates a new static (raw pointer) customizable instance object based on the
+// input parameters using the object registry.
+// @see NewSharedObject for more information on the inner workings of this
+// method.
+//
+// @param config_options Controls how the instance is created and errors are
+// handled
+// @param id The identifier of the new object being created.  This string
+// will be used by the object registry to locate the appropriate object to
+// create.
+// @param opt_map Optional name-value pairs of properties to set for the newly
+// created object
+// @param result The newly created and configured instance.
+template <typename T>
+static Status NewStaticObject(
+    const ConfigOptions& config_options, const std::string& id,
+    const std::unordered_map<std::string, std::string>& opt_map, T** result) {
+  if (!id.empty()) {
+    Status status;
+    status = config_options.registry->NewStaticObject(id, result);
+    if (config_options.ignore_unsupported_options && status.IsNotSupported()) {
+      status = Status::OK();
+    } else if (status.ok()) {
+      status =
+          Customizable::ConfigureNewObject(config_options, *result, opt_map);
+    }
+    return status;
+  } else if (opt_map.empty()) {
+    // There was no ID and no map (everything empty), so reset/clear the result
+    *result = nullptr;
+    return Status::OK();
+  } else {
+    return Status::NotSupported("Cannot reset object ");
+  }
+}
+
+// Creates a new static (raw pointer) customizable instance object based on the
+// input parameters.
+// @see LoadSharedObject for more information on the inner workings of this
+// method.
+//
+// @param config_options Controls how the instance is created and errors are
+// handled
+// @param value Either the simple name of the instance to create, or a set of
+// name-value pairs to create and initailize the object
+// @param result The newly created instance.
+template <typename T>
+static Status LoadStaticObject(const ConfigOptions& config_options,
+                               const std::string& value, T** result) {
+  std::string id;
+  std::unordered_map<std::string, std::string> opt_map;
+  Status status = Customizable::GetOptionsMap(config_options, *result, value,
+                                              &id, &opt_map);
+  if (!status.ok()) {  // GetOptionsMap failed
+    return status;
+  } else {
+    return NewStaticObject(config_options, id, opt_map, result);
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/db_ttl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/db_ttl.h
new file mode 100644
index 0000000000..12f5cbac0f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/db_ttl.h
@@ -0,0 +1,70 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "rocksdb/db.h"
+#include "rocksdb/utilities/stackable_db.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Database with TTL support.
+//
+// USE-CASES:
+// This API should be used to open the db when key-values inserted are
+//  meant to be removed from the db in a non-strict 'ttl' amount of time
+//  Therefore, this guarantees that key-values inserted will remain in the
+//  db for >= ttl amount of time and the db will make efforts to remove the
+//  key-values as soon as possible after ttl seconds of their insertion.
+//
+// BEHAVIOUR:
+// TTL is accepted in seconds
+// (int32_t)Timestamp(creation) is suffixed to values in Put internally
+// Expired TTL values deleted in compaction only:(Timestamp+ttl<time_now)
+// Get/Iterator may return expired entries(compaction not run on them yet)
+// Different TTL may be used during different Opens
+// Example: Open1 at t=0 with ttl=4 and insert k1,k2, close at t=2
+//          Open2 at t=3 with ttl=5. Now k1,k2 should be deleted at t>=5
+// read_only=true opens in the usual read-only mode. Compactions will not be
+//  triggered(neither manual nor automatic), so no expired entries removed
+//
+// CONSTRAINTS:
+// Not specifying/passing or non-positive TTL behaves like TTL = infinity
+//
+// !!!WARNING!!!:
+// Calling DB::Open directly to re-open a db created by this API will get
+//  corrupt values(timestamp suffixed) and no ttl effect will be there
+//  during the second Open, so use this API consistently to open the db
+// Be careful when passing ttl with a small positive value because the
+//  whole database may be deleted in a small amount of time
+
+class DBWithTTL : public StackableDB {
+ public:
+  virtual Status CreateColumnFamilyWithTtl(
+      const ColumnFamilyOptions& options, const std::string& column_family_name,
+      ColumnFamilyHandle** handle, int ttl) = 0;
+
+  static Status Open(const Options& options, const std::string& dbname,
+                     DBWithTTL** dbptr, int32_t ttl = 0,
+                     bool read_only = false);
+
+  static Status Open(const DBOptions& db_options, const std::string& dbname,
+                     const std::vector<ColumnFamilyDescriptor>& column_families,
+                     std::vector<ColumnFamilyHandle*>* handles,
+                     DBWithTTL** dbptr, const std::vector<int32_t>& ttls,
+                     bool read_only = false);
+
+  virtual void SetTtl(int32_t ttl) = 0;
+
+  virtual void SetTtl(ColumnFamilyHandle* h, int32_t ttl) = 0;
+
+ protected:
+  explicit DBWithTTL(DB* db) : StackableDB(db) {}
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/debug.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/debug.h
new file mode 100644
index 0000000000..e1fc76e3e0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/debug.h
@@ -0,0 +1,46 @@
+//  Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include "rocksdb/db.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Data associated with a particular version of a key. A database may internally
+// store multiple versions of a same user key due to snapshots, compaction not
+// happening yet, etc.
+struct KeyVersion {
+  KeyVersion() : user_key(""), value(""), sequence(0), type(0) {}
+
+  KeyVersion(const std::string& _user_key, const std::string& _value,
+             SequenceNumber _sequence, int _type)
+      : user_key(_user_key), value(_value), sequence(_sequence), type(_type) {}
+
+  std::string user_key;
+  std::string value;
+  SequenceNumber sequence;
+  int type;
+  std::string GetTypeName() const;
+};
+
+// Returns listing of all versions of keys in the provided user key range.
+// The range is inclusive-inclusive, i.e., [`begin_key`, `end_key`], or
+// `max_num_ikeys` has been reached. Since all those keys returned will be
+// copied to memory, if the range covers too many keys, the memory usage
+// may be huge. `max_num_ikeys` can be used to cap the memory usage.
+// The result is inserted into the provided vector, `key_versions`.
+Status GetAllKeyVersions(DB* db, Slice begin_key, Slice end_key,
+                         size_t max_num_ikeys,
+                         std::vector<KeyVersion>* key_versions);
+
+Status GetAllKeyVersions(DB* db, ColumnFamilyHandle* cfh, Slice begin_key,
+                         Slice end_key, size_t max_num_ikeys,
+                         std::vector<KeyVersion>* key_versions);
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/env_mirror.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/env_mirror.h
new file mode 100644
index 0000000000..2a12612870
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/env_mirror.h
@@ -0,0 +1,179 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2015, Red Hat, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// MirrorEnv is an Env implementation that mirrors all file-related
+// operations to two backing Env's (provided at construction time).
+// Writes are mirrored.  For read operations, we do the read from both
+// backends and assert that the results match.
+//
+// This is useful when implementing a new Env and ensuring that the
+// semantics and behavior are correct (in that they match that of an
+// existing, stable Env, like the default POSIX one).
+
+#pragma once
+
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include "rocksdb/env.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class SequentialFileMirror;
+class RandomAccessFileMirror;
+class WritableFileMirror;
+
+class EnvMirror : public EnvWrapper {
+  Env *a_, *b_;
+  bool free_a_, free_b_;
+
+ public:
+  EnvMirror(Env* a, Env* b, bool free_a = false, bool free_b = false)
+      : EnvWrapper(a), a_(a), b_(b), free_a_(free_a), free_b_(free_b) {}
+  ~EnvMirror() {
+    if (free_a_) delete a_;
+    if (free_b_) delete b_;
+  }
+
+  Status NewSequentialFile(const std::string& f,
+                           std::unique_ptr<SequentialFile>* r,
+                           const EnvOptions& options) override;
+  Status NewRandomAccessFile(const std::string& f,
+                             std::unique_ptr<RandomAccessFile>* r,
+                             const EnvOptions& options) override;
+  Status NewWritableFile(const std::string& f, std::unique_ptr<WritableFile>* r,
+                         const EnvOptions& options) override;
+  Status ReuseWritableFile(const std::string& fname,
+                           const std::string& old_fname,
+                           std::unique_ptr<WritableFile>* r,
+                           const EnvOptions& options) override;
+  virtual Status NewDirectory(const std::string& name,
+                              std::unique_ptr<Directory>* result) override {
+    std::unique_ptr<Directory> br;
+    Status as = a_->NewDirectory(name, result);
+    Status bs = b_->NewDirectory(name, &br);
+    assert(as == bs);
+    return as;
+  }
+  Status FileExists(const std::string& f) override {
+    Status as = a_->FileExists(f);
+    Status bs = b_->FileExists(f);
+    assert(as == bs);
+    return as;
+  }
+#if defined(_MSC_VER)
+#pragma warning(push)
+// logical operation on address of string constant
+#pragma warning(disable : 4130)
+#endif
+  Status GetChildren(const std::string& dir,
+                     std::vector<std::string>* r) override {
+    std::vector<std::string> ar, br;
+    Status as = a_->GetChildren(dir, &ar);
+    Status bs = b_->GetChildren(dir, &br);
+    assert(as == bs);
+    std::sort(ar.begin(), ar.end());
+    std::sort(br.begin(), br.end());
+    if (!as.ok() || ar != br) {
+      assert(0 == "getchildren results don't match");
+    }
+    *r = ar;
+    return as;
+  }
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+  Status DeleteFile(const std::string& f) override {
+    Status as = a_->DeleteFile(f);
+    Status bs = b_->DeleteFile(f);
+    assert(as == bs);
+    return as;
+  }
+  Status CreateDir(const std::string& d) override {
+    Status as = a_->CreateDir(d);
+    Status bs = b_->CreateDir(d);
+    assert(as == bs);
+    return as;
+  }
+  Status CreateDirIfMissing(const std::string& d) override {
+    Status as = a_->CreateDirIfMissing(d);
+    Status bs = b_->CreateDirIfMissing(d);
+    assert(as == bs);
+    return as;
+  }
+  Status DeleteDir(const std::string& d) override {
+    Status as = a_->DeleteDir(d);
+    Status bs = b_->DeleteDir(d);
+    assert(as == bs);
+    return as;
+  }
+  Status GetFileSize(const std::string& f, uint64_t* s) override {
+    uint64_t asize, bsize;
+    Status as = a_->GetFileSize(f, &asize);
+    Status bs = b_->GetFileSize(f, &bsize);
+    assert(as == bs);
+    assert(!as.ok() || asize == bsize);
+    *s = asize;
+    return as;
+  }
+
+  Status GetFileModificationTime(const std::string& fname,
+                                 uint64_t* file_mtime) override {
+    uint64_t amtime, bmtime;
+    Status as = a_->GetFileModificationTime(fname, &amtime);
+    Status bs = b_->GetFileModificationTime(fname, &bmtime);
+    assert(as == bs);
+    assert(!as.ok() || amtime - bmtime < 10000 || bmtime - amtime < 10000);
+    *file_mtime = amtime;
+    return as;
+  }
+
+  Status RenameFile(const std::string& s, const std::string& t) override {
+    Status as = a_->RenameFile(s, t);
+    Status bs = b_->RenameFile(s, t);
+    assert(as == bs);
+    return as;
+  }
+
+  Status LinkFile(const std::string& s, const std::string& t) override {
+    Status as = a_->LinkFile(s, t);
+    Status bs = b_->LinkFile(s, t);
+    assert(as == bs);
+    return as;
+  }
+
+  class FileLockMirror : public FileLock {
+   public:
+    FileLock *a_, *b_;
+    FileLockMirror(FileLock* a, FileLock* b) : a_(a), b_(b) {}
+  };
+
+  Status LockFile(const std::string& f, FileLock** l) override {
+    FileLock *al, *bl;
+    Status as = a_->LockFile(f, &al);
+    Status bs = b_->LockFile(f, &bl);
+    assert(as == bs);
+    if (as.ok()) *l = new FileLockMirror(al, bl);
+    return as;
+  }
+
+  Status UnlockFile(FileLock* l) override {
+    FileLockMirror* ml = static_cast<FileLockMirror*>(l);
+    Status as = a_->UnlockFile(ml->a_);
+    Status bs = b_->UnlockFile(ml->b_);
+    assert(as == bs);
+    delete ml;
+    return as;
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/info_log_finder.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/info_log_finder.h
new file mode 100644
index 0000000000..824f8a3dfe
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/info_log_finder.h
@@ -0,0 +1,19 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "rocksdb/db.h"
+#include "rocksdb/options.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// This function can be used to list the Information logs,
+// given the db pointer.
+Status GetInfoLogList(DB* db, std::vector<std::string>* info_log_list);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/ldb_cmd.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/ldb_cmd.h
new file mode 100644
index 0000000000..af5ee4ba98
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/ldb_cmd.h
@@ -0,0 +1,316 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "rocksdb/convenience.h"
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/ldb_tool.h"
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/db_ttl.h"
+#include "rocksdb/utilities/ldb_cmd_execute_result.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class LDBCommand {
+ public:
+  // Command-line arguments
+  static const std::string ARG_ENV_URI;
+  static const std::string ARG_FS_URI;
+  static const std::string ARG_DB;
+  static const std::string ARG_PATH;
+  static const std::string ARG_SECONDARY_PATH;
+  static const std::string ARG_HEX;
+  static const std::string ARG_KEY_HEX;
+  static const std::string ARG_VALUE_HEX;
+  static const std::string ARG_CF_NAME;
+  static const std::string ARG_TTL;
+  static const std::string ARG_TTL_START;
+  static const std::string ARG_TTL_END;
+  static const std::string ARG_TIMESTAMP;
+  static const std::string ARG_TRY_LOAD_OPTIONS;
+  static const std::string ARG_IGNORE_UNKNOWN_OPTIONS;
+  static const std::string ARG_FROM;
+  static const std::string ARG_TO;
+  static const std::string ARG_MAX_KEYS;
+  static const std::string ARG_BLOOM_BITS;
+  static const std::string ARG_FIX_PREFIX_LEN;
+  static const std::string ARG_COMPRESSION_TYPE;
+  static const std::string ARG_COMPRESSION_MAX_DICT_BYTES;
+  static const std::string ARG_BLOCK_SIZE;
+  static const std::string ARG_AUTO_COMPACTION;
+  static const std::string ARG_DB_WRITE_BUFFER_SIZE;
+  static const std::string ARG_WRITE_BUFFER_SIZE;
+  static const std::string ARG_FILE_SIZE;
+  static const std::string ARG_CREATE_IF_MISSING;
+  static const std::string ARG_NO_VALUE;
+  static const std::string ARG_DISABLE_CONSISTENCY_CHECKS;
+  static const std::string ARG_ENABLE_BLOB_FILES;
+  static const std::string ARG_MIN_BLOB_SIZE;
+  static const std::string ARG_BLOB_FILE_SIZE;
+  static const std::string ARG_BLOB_COMPRESSION_TYPE;
+  static const std::string ARG_ENABLE_BLOB_GARBAGE_COLLECTION;
+  static const std::string ARG_BLOB_GARBAGE_COLLECTION_AGE_CUTOFF;
+  static const std::string ARG_BLOB_GARBAGE_COLLECTION_FORCE_THRESHOLD;
+  static const std::string ARG_BLOB_COMPACTION_READAHEAD_SIZE;
+  static const std::string ARG_BLOB_FILE_STARTING_LEVEL;
+  static const std::string ARG_PREPOPULATE_BLOB_CACHE;
+  static const std::string ARG_DECODE_BLOB_INDEX;
+  static const std::string ARG_DUMP_UNCOMPRESSED_BLOBS;
+
+  struct ParsedParams {
+    std::string cmd;
+    std::vector<std::string> cmd_params;
+    std::map<std::string, std::string> option_map;
+    std::vector<std::string> flags;
+  };
+
+  static LDBCommand* SelectCommand(const ParsedParams& parsed_parms);
+
+  static LDBCommand* InitFromCmdLineArgs(
+      const std::vector<std::string>& args, const Options& options,
+      const LDBOptions& ldb_options,
+      const std::vector<ColumnFamilyDescriptor>* column_families,
+      const std::function<LDBCommand*(const ParsedParams&)>& selector =
+          SelectCommand);
+
+  static LDBCommand* InitFromCmdLineArgs(
+      int argc, char const* const* argv, const Options& options,
+      const LDBOptions& ldb_options,
+      const std::vector<ColumnFamilyDescriptor>* column_families);
+
+  bool ValidateCmdLineOptions();
+
+  virtual void PrepareOptions();
+
+  virtual void OverrideBaseOptions();
+
+  virtual void OverrideBaseCFOptions(ColumnFamilyOptions* cf_opts);
+
+  virtual void SetDBOptions(Options options) { options_ = options; }
+
+  virtual void SetColumnFamilies(
+      const std::vector<ColumnFamilyDescriptor>* column_families) {
+    if (column_families != nullptr) {
+      column_families_ = *column_families;
+    } else {
+      column_families_.clear();
+    }
+  }
+
+  void SetLDBOptions(const LDBOptions& ldb_options) {
+    ldb_options_ = ldb_options;
+  }
+
+  const std::map<std::string, std::string>& TEST_GetOptionMap() {
+    return option_map_;
+  }
+
+  const std::vector<std::string>& TEST_GetFlags() { return flags_; }
+
+  virtual bool NoDBOpen() { return false; }
+
+  virtual ~LDBCommand() { CloseDB(); }
+
+  /* Run the command, and return the execute result. */
+  void Run();
+
+  virtual void DoCommand() = 0;
+
+  LDBCommandExecuteResult GetExecuteState() { return exec_state_; }
+
+  void ClearPreviousRunState() { exec_state_.Reset(); }
+
+  // Consider using Slice::DecodeHex directly instead if you don't need the
+  // 0x prefix
+  static std::string HexToString(const std::string& str);
+
+  // Consider using Slice::ToString(true) directly instead if
+  // you don't need the 0x prefix
+  static std::string StringToHex(const std::string& str);
+
+  static const char* DELIM;
+
+ protected:
+  LDBCommandExecuteResult exec_state_;
+  std::string env_uri_;
+  std::string fs_uri_;
+  std::string db_path_;
+  // If empty, open DB as primary. If non-empty, open the DB as secondary
+  // with this secondary path. When running against a database opened by
+  // another process, ldb wll leave the source directory completely intact.
+  std::string secondary_path_;
+  std::string column_family_name_;
+  DB* db_;
+  DBWithTTL* db_ttl_;
+  std::map<std::string, ColumnFamilyHandle*> cf_handles_;
+
+  /**
+   * true implies that this command can work if the db is opened in read-only
+   * mode.
+   */
+  bool is_read_only_;
+
+  /** If true, the key is input/output as hex in get/put/scan/delete etc. */
+  bool is_key_hex_;
+
+  /** If true, the value is input/output as hex in get/put/scan/delete etc. */
+  bool is_value_hex_;
+
+  /** If true, the value is treated as timestamp suffixed */
+  bool is_db_ttl_;
+
+  // If true, the kvs are output with their insert/modify timestamp in a ttl db
+  bool timestamp_;
+
+  // If true, try to construct options from DB's option files.
+  bool try_load_options_;
+
+  // The value passed to options.force_consistency_checks.
+  bool force_consistency_checks_;
+
+  bool enable_blob_files_;
+
+  bool enable_blob_garbage_collection_;
+
+  bool create_if_missing_;
+
+  /**
+   * Map of options passed on the command-line.
+   */
+  const std::map<std::string, std::string> option_map_;
+
+  /**
+   * Flags passed on the command-line.
+   */
+  const std::vector<std::string> flags_;
+
+  /** List of command-line options valid for this command */
+  const std::vector<std::string> valid_cmd_line_options_;
+
+  /** Shared pointer to underlying environment if applicable **/
+  std::shared_ptr<Env> env_guard_;
+
+  bool ParseKeyValue(const std::string& line, std::string* key,
+                     std::string* value, bool is_key_hex, bool is_value_hex);
+
+  LDBCommand(const std::map<std::string, std::string>& options,
+             const std::vector<std::string>& flags, bool is_read_only,
+             const std::vector<std::string>& valid_cmd_line_options);
+
+  void OpenDB();
+
+  void CloseDB();
+
+  ColumnFamilyHandle* GetCfHandle();
+
+  static std::string PrintKeyValue(const std::string& key,
+                                   const std::string& value, bool is_key_hex,
+                                   bool is_value_hex);
+
+  static std::string PrintKeyValue(const std::string& key,
+                                   const std::string& value, bool is_hex);
+
+  /**
+   * Return true if the specified flag is present in the specified flags vector
+   */
+  static bool IsFlagPresent(const std::vector<std::string>& flags,
+                            const std::string& flag) {
+    return (std::find(flags.begin(), flags.end(), flag) != flags.end());
+  }
+
+  static std::string HelpRangeCmdArgs();
+
+  /**
+   * A helper function that returns a list of command line options
+   * used by this command.  It includes the common options and the ones
+   * passed in.
+   */
+  static std::vector<std::string> BuildCmdLineOptions(
+      std::vector<std::string> options);
+
+  bool ParseIntOption(const std::map<std::string, std::string>& options,
+                      const std::string& option, int& value,
+                      LDBCommandExecuteResult& exec_state);
+
+  bool ParseDoubleOption(const std::map<std::string, std::string>& options,
+                         const std::string& option, double& value,
+                         LDBCommandExecuteResult& exec_state);
+
+  bool ParseStringOption(const std::map<std::string, std::string>& options,
+                         const std::string& option, std::string* value);
+
+  bool ParseCompressionTypeOption(
+      const std::map<std::string, std::string>& options,
+      const std::string& option, CompressionType& value,
+      LDBCommandExecuteResult& exec_state);
+
+  /**
+   * Returns the value of the specified option as a boolean.
+   * default_val is used if the option is not found in options.
+   * Throws an exception if the value of the option is not
+   * "true" or "false" (case insensitive).
+   */
+  bool ParseBooleanOption(const std::map<std::string, std::string>& options,
+                          const std::string& option, bool default_val);
+
+  Options options_;
+  std::vector<ColumnFamilyDescriptor> column_families_;
+  ConfigOptions config_options_;
+  LDBOptions ldb_options_;
+
+ private:
+  /**
+   * Interpret command line options and flags to determine if the key
+   * should be input/output in hex.
+   */
+  bool IsKeyHex(const std::map<std::string, std::string>& options,
+                const std::vector<std::string>& flags);
+
+  /**
+   * Interpret command line options and flags to determine if the value
+   * should be input/output in hex.
+   */
+  bool IsValueHex(const std::map<std::string, std::string>& options,
+                  const std::vector<std::string>& flags);
+
+  bool IsTryLoadOptions(const std::map<std::string, std::string>& options,
+                        const std::vector<std::string>& flags);
+
+  /**
+   * Converts val to a boolean.
+   * val must be either true or false (case insensitive).
+   * Otherwise an exception is thrown.
+   */
+  bool StringToBool(std::string val);
+};
+
+class LDBCommandRunner {
+ public:
+  static void PrintHelp(const LDBOptions& ldb_options, const char* exec_name,
+                        bool to_stderr = true);
+
+  // Returns the status code to return. 0 is no error.
+  static int RunCommand(
+      int argc, char const* const* argv, Options options,
+      const LDBOptions& ldb_options,
+      const std::vector<ColumnFamilyDescriptor>* column_families);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/ldb_cmd_execute_result.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/ldb_cmd_execute_result.h
new file mode 100644
index 0000000000..57bac33468
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/ldb_cmd_execute_result.h
@@ -0,0 +1,75 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+#include <string>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+#ifdef FAILED
+#undef FAILED
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+class LDBCommandExecuteResult {
+ public:
+  enum State {
+    EXEC_NOT_STARTED = 0,
+    EXEC_SUCCEED = 1,
+    EXEC_FAILED = 2,
+  };
+
+  LDBCommandExecuteResult() : state_(EXEC_NOT_STARTED), message_("") {}
+
+  LDBCommandExecuteResult(State state, std::string& msg)
+      : state_(state), message_(msg) {}
+
+  std::string ToString() {
+    std::string ret;
+    switch (state_) {
+      case EXEC_SUCCEED:
+        break;
+      case EXEC_FAILED:
+        ret.append("Failed: ");
+        break;
+      case EXEC_NOT_STARTED:
+        ret.append("Not started: ");
+    }
+    if (!message_.empty()) {
+      ret.append(message_);
+    }
+    return ret;
+  }
+
+  void Reset() {
+    state_ = EXEC_NOT_STARTED;
+    message_ = "";
+  }
+
+  bool IsSucceed() { return state_ == EXEC_SUCCEED; }
+
+  bool IsNotStarted() { return state_ == EXEC_NOT_STARTED; }
+
+  bool IsFailed() { return state_ == EXEC_FAILED; }
+
+  static LDBCommandExecuteResult Succeed(std::string msg) {
+    return LDBCommandExecuteResult(EXEC_SUCCEED, msg);
+  }
+
+  static LDBCommandExecuteResult Failed(std::string msg) {
+    return LDBCommandExecuteResult(EXEC_FAILED, msg);
+  }
+
+ private:
+  State state_;
+  std::string message_;
+
+  bool operator==(const LDBCommandExecuteResult&);
+  bool operator!=(const LDBCommandExecuteResult&);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/leveldb_options.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/leveldb_options.h
new file mode 100644
index 0000000000..7e4a6faa4e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/leveldb_options.h
@@ -0,0 +1,145 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <stddef.h>
+
+#include "rocksdb/compression_type.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Cache;
+class Comparator;
+class Env;
+class FilterPolicy;
+class Logger;
+struct Options;
+class Snapshot;
+
+// Options to control the behavior of a database (passed to
+// DB::Open). A LevelDBOptions object can be initialized as though
+// it were a LevelDB Options object, and then it can be converted into
+// a RocksDB Options object.
+struct LevelDBOptions {
+  // -------------------
+  // Parameters that affect behavior
+
+  // Comparator used to define the order of keys in the table.
+  // Default: a comparator that uses lexicographic byte-wise ordering
+  //
+  // REQUIRES: The client must ensure that the comparator supplied
+  // here has the same name and orders keys *exactly* the same as the
+  // comparator provided to previous open calls on the same DB.
+  const Comparator* comparator;
+
+  // If true, the database will be created if it is missing.
+  // Default: false
+  bool create_if_missing;
+
+  // If true, an error is raised if the database already exists.
+  // Default: false
+  bool error_if_exists;
+
+  // If true, the implementation will do aggressive checking of the
+  // data it is processing and will stop early if it detects any
+  // errors.  This may have unforeseen ramifications: for example, a
+  // corruption of one DB entry may cause a large number of entries to
+  // become unreadable or for the entire DB to become unopenable.
+  // Default: false
+  bool paranoid_checks;
+
+  // Use the specified object to interact with the environment,
+  // e.g. to read/write files, schedule background work, etc.
+  // Default: Env::Default()
+  Env* env;
+
+  // Any internal progress/error information generated by the db will
+  // be written to info_log if it is non-NULL, or to a file stored
+  // in the same directory as the DB contents if info_log is NULL.
+  // Default: NULL
+  Logger* info_log;
+
+  // -------------------
+  // Parameters that affect performance
+
+  // Amount of data to build up in memory (backed by an unsorted log
+  // on disk) before converting to a sorted on-disk file.
+  //
+  // Larger values increase performance, especially during bulk loads.
+  // Up to two write buffers may be held in memory at the same time,
+  // so you may wish to adjust this parameter to control memory usage.
+  // Also, a larger write buffer will result in a longer recovery time
+  // the next time the database is opened.
+  //
+  // Default: 4MB
+  size_t write_buffer_size;
+
+  // Number of open files that can be used by the DB.  You may need to
+  // increase this if your database has a large working set (budget
+  // one open file per 2MB of working set).
+  //
+  // Default: 1000
+  int max_open_files;
+
+  // Control over blocks (user data is stored in a set of blocks, and
+  // a block is the unit of reading from disk).
+
+  // If non-NULL, use the specified cache for blocks.
+  // If NULL, leveldb will automatically create and use an 8MB internal cache.
+  // Default: NULL
+  Cache* block_cache;
+
+  // Approximate size of user data packed per block.  Note that the
+  // block size specified here corresponds to uncompressed data.  The
+  // actual size of the unit read from disk may be smaller if
+  // compression is enabled.  This parameter can be changed dynamically.
+  //
+  // Default: 4K
+  size_t block_size;
+
+  // Number of keys between restart points for delta encoding of keys.
+  // This parameter can be changed dynamically.  Most clients should
+  // leave this parameter alone.
+  //
+  // Default: 16
+  int block_restart_interval;
+
+  // Compress blocks using the specified compression algorithm.  This
+  // parameter can be changed dynamically.
+  //
+  // Default: kSnappyCompression, which gives lightweight but fast
+  // compression.
+  //
+  // Typical speeds of kSnappyCompression on an Intel(R) Core(TM)2 2.4GHz:
+  //    ~200-500MB/s compression
+  //    ~400-800MB/s decompression
+  // Note that these speeds are significantly faster than most
+  // persistent storage speeds, and therefore it is typically never
+  // worth switching to kNoCompression.  Even if the input data is
+  // incompressible, the kSnappyCompression implementation will
+  // efficiently detect that and will switch to uncompressed mode.
+  CompressionType compression;
+
+  // If non-NULL, use the specified filter policy to reduce disk reads.
+  // Many applications will benefit from passing the result of
+  // NewBloomFilterPolicy() here.
+  //
+  // Default: NULL
+  const FilterPolicy* filter_policy;
+
+  // Create a LevelDBOptions object with default values for all fields.
+  LevelDBOptions();
+};
+
+// Converts a LevelDBOptions object into a RocksDB Options object.
+Options ConvertOptions(const LevelDBOptions& leveldb_options);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/lua/rocks_lua_custom_library.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/lua/rocks_lua_custom_library.h
new file mode 100644
index 0000000000..f617da02be
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/lua/rocks_lua_custom_library.h
@@ -0,0 +1,43 @@
+//  Copyright (c) 2016, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#ifdef LUA
+
+// lua headers
+extern "C" {
+#include <lauxlib.h>
+#include <lua.h>
+#include <lualib.h>
+}
+
+namespace ROCKSDB_NAMESPACE {
+namespace lua {
+// A class that used to define custom C Library that is callable
+// from Lua script
+class RocksLuaCustomLibrary {
+ public:
+  virtual ~RocksLuaCustomLibrary() {}
+  // The name of the C library.  This name will also be used as the table
+  // (namespace) in Lua that contains the C library.
+  virtual const char* Name() const = 0;
+
+  // Returns a "static const struct luaL_Reg[]", which includes a list of
+  // C functions.  Note that the last entry of this static array must be
+  // {nullptr, nullptr} as required by Lua.
+  //
+  // More details about how to implement Lua C libraries can be found
+  // in the official Lua document http://www.lua.org/pil/26.2.html
+  virtual const struct luaL_Reg* Lib() const = 0;
+
+  // A function that will be called right after the library has been created
+  // and pushed on the top of the lua_State.  This custom setup function
+  // allows developers to put additional table or constant values inside
+  // the same table / namespace.
+  virtual void CustomSetup(lua_State* /*L*/) const {}
+};
+}  // namespace lua
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // LUA
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/lua/rocks_lua_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/lua/rocks_lua_util.h
new file mode 100644
index 0000000000..3427b65ef6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/lua/rocks_lua_util.h
@@ -0,0 +1,55 @@
+//  Copyright (c) 2016, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+// lua headers
+extern "C" {
+#include <lauxlib.h>
+#include <lua.h>
+#include <lualib.h>
+}
+
+#ifdef LUA
+#include <string>
+#include <vector>
+
+#include "rocksdb/utilities/lua/rocks_lua_custom_library.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace lua {
+class LuaStateWrapper {
+ public:
+  explicit LuaStateWrapper(const std::string& lua_script) {
+    lua_state_ = luaL_newstate();
+    Init(lua_script, {});
+  }
+  LuaStateWrapper(
+      const std::string& lua_script,
+      const std::vector<std::shared_ptr<RocksLuaCustomLibrary>>& libraries) {
+    lua_state_ = luaL_newstate();
+    Init(lua_script, libraries);
+  }
+  lua_State* GetLuaState() const { return lua_state_; }
+  ~LuaStateWrapper() { lua_close(lua_state_); }
+
+ private:
+  void Init(
+      const std::string& lua_script,
+      const std::vector<std::shared_ptr<RocksLuaCustomLibrary>>& libraries) {
+    if (lua_state_) {
+      luaL_openlibs(lua_state_);
+      for (const auto& library : libraries) {
+        luaL_openlib(lua_state_, library->Name(), library->Lib(), 0);
+        library->CustomSetup(lua_state_);
+      }
+      luaL_dostring(lua_state_, lua_script.c_str());
+    }
+  }
+
+  lua_State* lua_state_;
+};
+}  // namespace lua
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // LUA
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/memory_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/memory_util.h
new file mode 100644
index 0000000000..b141b4ef0e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/memory_util.h
@@ -0,0 +1,48 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "rocksdb/cache.h"
+#include "rocksdb/db.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Returns the current memory usage of the specified DB instances.
+class MemoryUtil {
+ public:
+  enum UsageType : int {
+    // Memory usage of all the mem-tables.
+    kMemTableTotal = 0,
+    // Memory usage of those un-flushed mem-tables.
+    kMemTableUnFlushed = 1,
+    // Memory usage of all the table readers.
+    kTableReadersTotal = 2,
+    // Memory usage by Cache.
+    kCacheTotal = 3,
+    kNumUsageTypes = 4
+  };
+
+  // Returns the approximate memory usage of different types in the input
+  // list of DBs and Cache set.  For instance, in the output map
+  // usage_by_type, usage_by_type[kMemTableTotal] will store the memory
+  // usage of all the mem-tables from all the input rocksdb instances.
+  //
+  // Note that for memory usage inside Cache class, we will
+  // only report the usage of the input "cache_set" without
+  // including those Cache usage inside the input list "dbs"
+  // of DBs.
+  static Status GetApproximateMemoryUsageByType(
+      const std::vector<DB*>& dbs,
+      const std::unordered_set<const Cache*> cache_set,
+      std::map<MemoryUtil::UsageType, uint64_t>* usage_by_type);
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/object_registry.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/object_registry.h
new file mode 100644
index 0000000000..613ef1cd9b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/object_registry.h
@@ -0,0 +1,583 @@
+// Copyright (c) 2016-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <functional>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Customizable;
+class Logger;
+class ObjectLibrary;
+
+// Returns a new T when called with a string. Populates the std::unique_ptr
+// argument if granting ownership to caller.
+template <typename T>
+using FactoryFunc =
+    std::function<T*(const std::string&, std::unique_ptr<T>*, std::string*)>;
+
+// The signature of the function for loading factories
+// into an object library.  This method is expected to register
+// factory functions in the supplied ObjectLibrary.
+// The ObjectLibrary is the library in which the factories will be loaded.
+// The std::string is the argument passed to the loader function.
+// The RegistrarFunc should return the number of objects loaded into this
+// library
+using RegistrarFunc = std::function<int(ObjectLibrary&, const std::string&)>;
+
+template <typename T>
+using ConfigureFunc = std::function<Status(T*)>;
+
+class ObjectLibrary {
+ private:
+  // Base class for an Entry in the Registry.
+  class Entry {
+   public:
+    virtual ~Entry() {}
+    virtual bool Matches(const std::string& target) const = 0;
+    virtual const char* Name() const = 0;
+  };
+
+ public:
+  // Class for matching target strings to a pattern.
+  // Entries consist of a name that starts the pattern and attributes
+  // The following attributes can be added to the entry:
+  //   -Suffix: Comparable to name(suffix)
+  //   -Separator: Comparable to name(separator).+ or name(separator).*
+  //   -Number: Comparable to name(separator).[0-9]+
+  //   -AltName: Comparable to (name|alt)
+  //   -Optional: Comparable to name(separator)?
+  // Multiple separators can be combined and cause multiple matches.
+  // For example, Pattern("A").AnotherName("B").AddSeparator("@").AddNumber("#")
+  // is roughly equivalent to "(A|B)@.+#.+"
+  //
+  // Note that though this class does provide some regex-style matching,
+  // it is not a full regex parser and has some key differences:
+  //   - Separators are matched left-most.  For example, an entry
+  //     Name("Hello").AddSeparator(" ").AddSuffix("!") would match
+  //     "Hello world!", but not "Hello world!!"
+  //   - No backtracking is necessary, enabling reliably efficient matching
+  class PatternEntry : public Entry {
+   private:
+    enum Quantifier {
+      kMatchZeroOrMore,  // [suffix].*
+      kMatchAtLeastOne,  // [suffix].+
+      kMatchExact,       // [suffix]
+      kMatchInteger,     // [suffix][0-9]+
+      kMatchDecimal,     // [suffix][0-9]+[.][0-9]+
+    };
+
+   public:
+    // Short-cut for creating an entry that matches to a
+    // Customizable::IndividualId
+    static PatternEntry AsIndividualId(const std::string& name) {
+      PatternEntry entry(name, true);
+      entry.AddSeparator("@");
+      entry.AddSeparator("#");
+      return entry;
+    }
+
+    // Creates a new PatternEntry for "name".  If optional is true,
+    // Matches will also return true if name==target
+    explicit PatternEntry(const std::string& name, bool optional = true)
+        : name_(name), optional_(optional), slength_(0) {
+      nlength_ = name_.size();
+    }
+
+    // Adds a suffix (exact match of separator with no trailing characters) to
+    // the separator
+    PatternEntry& AddSuffix(const std::string& suffix) {
+      separators_.emplace_back(suffix, kMatchExact);
+      slength_ += suffix.size();
+      return *this;
+    }
+
+    // Adds a separator (exact match of separator with trailing characters) to
+    // the entry
+    // If at_least_one is true, the separator must be followed by at least
+    // one character (e.g. separator.+).
+    // If at_least_one is false, the separator may be followed by zero or
+    // more characters (e.g. separator.*).
+    PatternEntry& AddSeparator(const std::string& separator,
+                               bool at_least_one = true) {
+      slength_ += separator.size();
+      if (at_least_one) {
+        separators_.emplace_back(separator, kMatchAtLeastOne);
+        ++slength_;
+      } else {
+        separators_.emplace_back(separator, kMatchZeroOrMore);
+      }
+      return *this;
+    }
+
+    // Adds a separator (exact match of separator with trailing numbers) to the
+    // entry
+    PatternEntry& AddNumber(const std::string& separator, bool is_int = true) {
+      separators_.emplace_back(separator,
+                               (is_int) ? kMatchInteger : kMatchDecimal);
+      slength_ += separator.size() + 1;
+      return *this;
+    }
+
+    // Sets another name that this entry will match, similar to (name|alt)
+    PatternEntry& AnotherName(const std::string& alt) {
+      names_.emplace_back(alt);
+      return *this;
+    }
+
+    // Sets whether the separators are required -- similar to name(separator)?
+    // If optional is true, then name(separator)? would match
+    // If optional is false, then the separators must also match
+    PatternEntry& SetOptional(bool optional) {
+      optional_ = optional;
+      return *this;
+    }
+
+    // Checks to see if the target matches this entry
+    bool Matches(const std::string& target) const override;
+    const char* Name() const override { return name_.c_str(); }
+
+   private:
+    size_t MatchSeparatorAt(size_t start, Quantifier mode,
+                            const std::string& target, size_t tlen,
+                            const std::string& pattern) const;
+
+    bool MatchesTarget(const std::string& name, size_t nlen,
+                       const std::string& target, size_t ylen) const;
+    std::string name_;                // The base name for this entry
+    size_t nlength_;                  // The length of name_
+    std::vector<std::string> names_;  // Alternative names for this entry
+    bool optional_;   // Whether matching of separators is required
+    size_t slength_;  // The minimum required length to match the separators
+    std::vector<std::pair<std::string, Quantifier>>
+        separators_;  // What to match
+  };                  // End class Entry
+
+ private:
+  // An Entry containing a FactoryFunc for creating new Objects
+  template <typename T>
+  class FactoryEntry : public Entry {
+   public:
+    FactoryEntry(Entry* e, FactoryFunc<T> f)
+        : entry_(e), factory_(std::move(f)) {}
+    bool Matches(const std::string& target) const override {
+      return entry_->Matches(target);
+    }
+    const char* Name() const override { return entry_->Name(); }
+
+    // Creates a new T object.
+    T* NewFactoryObject(const std::string& target, std::unique_ptr<T>* guard,
+                        std::string* msg) const {
+      return factory_(target, guard, msg);
+    }
+    const FactoryFunc<T>& GetFactory() const { return factory_; }
+
+   private:
+    std::unique_ptr<Entry> entry_;  // What to match for this entry
+    FactoryFunc<T> factory_;
+  };  // End class FactoryEntry
+ public:
+  explicit ObjectLibrary(const std::string& id) { id_ = id; }
+
+  const std::string& GetID() const { return id_; }
+
+  // Finds the factory function for the input target.
+  // @see PatternEntry for the matching rules to target
+  // @return If matched, the FactoryFunc for this target, else nullptr
+  template <typename T>
+  FactoryFunc<T> FindFactory(const std::string& target) const {
+    std::unique_lock<std::mutex> lock(mu_);
+    auto factories = factories_.find(T::Type());
+    if (factories != factories_.end()) {
+      for (const auto& e : factories->second) {
+        if (e->Matches(target)) {
+          const auto* fe =
+              static_cast<const ObjectLibrary::FactoryEntry<T>*>(e.get());
+          return fe->GetFactory();
+        }
+      }
+    }
+    return nullptr;
+  }
+
+  // Returns the total number of factories registered for this library.
+  // This method returns the sum of all factories registered for all types.
+  // @param num_types returns how many unique types are registered.
+  size_t GetFactoryCount(size_t* num_types) const;
+
+  // Returns the number of factories registered for this library
+  // for the input type.
+  // @param num_types returns how many unique types are registered.
+  size_t GetFactoryCount(const std::string& type) const;
+
+  // Returns the registered factory names for the input type
+  // names is updated to include the names for the type
+  void GetFactoryNames(const std::string& type,
+                       std::vector<std::string>* names) const;
+
+  void GetFactoryTypes(std::unordered_set<std::string>* types) const;
+
+  void Dump(Logger* logger) const;
+
+  // Registers the factory with the library for the name.
+  // If name==target, the factory may be used to create a new object.
+  template <typename T>
+  const FactoryFunc<T>& AddFactory(const std::string& name,
+                                   const FactoryFunc<T>& func) {
+    std::unique_ptr<Entry> entry(
+        new FactoryEntry<T>(new PatternEntry(name), func));
+    AddFactoryEntry(T::Type(), std::move(entry));
+    return func;
+  }
+
+  // Registers the factory with the library for the entry.
+  // If the entry matches the target, the factory may be used to create a new
+  // object.
+  // @see PatternEntry for the matching rules.
+  // NOTE: This function replaces the old ObjectLibrary::Register()
+  template <typename T>
+  const FactoryFunc<T>& AddFactory(const PatternEntry& entry,
+                                   const FactoryFunc<T>& func) {
+    std::unique_ptr<Entry> factory(
+        new FactoryEntry<T>(new PatternEntry(entry), func));
+    AddFactoryEntry(T::Type(), std::move(factory));
+    return func;
+  }
+
+  // Invokes the registrar function with the supplied arg for this library.
+  int Register(const RegistrarFunc& registrar, const std::string& arg) {
+    return registrar(*this, arg);
+  }
+
+  // Returns the default ObjectLibrary
+  static std::shared_ptr<ObjectLibrary>& Default();
+
+ private:
+  void AddFactoryEntry(const char* type, std::unique_ptr<Entry>&& entry) {
+    std::unique_lock<std::mutex> lock(mu_);
+    auto& factories = factories_[type];
+    factories.emplace_back(std::move(entry));
+  }
+
+  // Protects the entry map
+  mutable std::mutex mu_;
+  // ** FactoryFunctions for this loader, organized by type
+  std::unordered_map<std::string, std::vector<std::unique_ptr<Entry>>>
+      factories_;
+
+  // The name for this library
+  std::string id_;
+};
+
+// The ObjectRegistry is used to register objects that can be created by a
+// name/pattern at run-time where the specific implementation of the object may
+// not be known in advance.
+class ObjectRegistry {
+ public:
+  static std::shared_ptr<ObjectRegistry> NewInstance();
+  static std::shared_ptr<ObjectRegistry> NewInstance(
+      const std::shared_ptr<ObjectRegistry>& parent);
+  static std::shared_ptr<ObjectRegistry> Default();
+  explicit ObjectRegistry(const std::shared_ptr<ObjectRegistry>& parent)
+      : parent_(parent) {}
+  explicit ObjectRegistry(const std::shared_ptr<ObjectLibrary>& library);
+
+  std::shared_ptr<ObjectLibrary> AddLibrary(const std::string& id) {
+    auto library = std::make_shared<ObjectLibrary>(id);
+    AddLibrary(library);
+    return library;
+  }
+
+  void AddLibrary(const std::shared_ptr<ObjectLibrary>& library) {
+    std::unique_lock<std::mutex> lock(library_mutex_);
+    libraries_.push_back(library);
+  }
+
+  void AddLibrary(const std::string& id, const RegistrarFunc& registrar,
+                  const std::string& arg) {
+    auto library = AddLibrary(id);
+    library->Register(registrar, arg);
+  }
+
+  // Finds the factory for target and instantiates a new T.
+  // Returns NotSupported if no factory is found
+  // Returns InvalidArgument if a factory is found but the factory failed.
+  template <typename T>
+  Status NewObject(const std::string& target, T** object,
+                   std::unique_ptr<T>* guard) {
+    assert(guard != nullptr);
+    guard->reset();
+    auto factory = FindFactory<T>(target);
+    if (factory != nullptr) {
+      std::string errmsg;
+      *object = factory(target, guard, &errmsg);
+      if (*object != nullptr) {
+        return Status::OK();
+      } else if (errmsg.empty()) {
+        return Status::InvalidArgument(
+            std::string("Could not load ") + T::Type(), target);
+      } else {
+        return Status::InvalidArgument(errmsg, target);
+      }
+    } else {
+      return Status::NotSupported(std::string("Could not load ") + T::Type(),
+                                  target);
+    }
+  }
+  // Creates a new unique T using the input factory functions.
+  // Returns OK if a new unique T was successfully created
+  // Returns NotSupported if the type/target could not be created
+  // Returns InvalidArgument if the factory return an unguarded object
+  //                      (meaning it cannot be managed by a unique ptr)
+  template <typename T>
+  Status NewUniqueObject(const std::string& target,
+                         std::unique_ptr<T>* result) {
+    T* ptr = nullptr;
+    std::unique_ptr<T> guard;
+    Status s = NewObject(target, &ptr, &guard);
+    if (!s.ok()) {
+      return s;
+    } else if (guard) {
+      result->reset(guard.release());
+      return Status::OK();
+    } else {
+      return Status::InvalidArgument(std::string("Cannot make a unique ") +
+                                         T::Type() + " from unguarded one ",
+                                     target);
+    }
+  }
+
+  // Creates a new shared T using the input factory functions.
+  // Returns OK if a new shared T was successfully created
+  // Returns NotSupported if the type/target could not be created
+  // Returns InvalidArgument if the factory return an unguarded object
+  //                      (meaning it cannot be managed by a shared ptr)
+  template <typename T>
+  Status NewSharedObject(const std::string& target,
+                         std::shared_ptr<T>* result) {
+    std::unique_ptr<T> guard;
+    T* ptr = nullptr;
+    Status s = NewObject(target, &ptr, &guard);
+    if (!s.ok()) {
+      return s;
+    } else if (guard) {
+      result->reset(guard.release());
+      return Status::OK();
+    } else {
+      return Status::InvalidArgument(std::string("Cannot make a shared ") +
+                                         T::Type() + " from unguarded one ",
+                                     target);
+    }
+  }
+
+  // Creates a new static T using the input factory functions.
+  // Returns OK if a new static T was successfully created
+  // Returns NotSupported if the type/target could not be created
+  // Returns InvalidArgument if the factory return a guarded object
+  //                      (meaning it is managed by a unique ptr)
+  template <typename T>
+  Status NewStaticObject(const std::string& target, T** result) {
+    std::unique_ptr<T> guard;
+    T* ptr = nullptr;
+    Status s = NewObject(target, &ptr, &guard);
+    if (!s.ok()) {
+      return s;
+    } else if (guard.get()) {
+      return Status::InvalidArgument(std::string("Cannot make a static ") +
+                                         T::Type() + " from a guarded one ",
+                                     target);
+    } else {
+      *result = ptr;
+      return Status::OK();
+    }
+  }
+
+  // Sets the object for the given id/type to be the input object
+  // If the registry does not contain this id/type, the object is added and OK
+  // is returned. If the registry contains a different object, an error is
+  // returned. If the registry contains the input object, OK is returned.
+  template <typename T>
+  Status SetManagedObject(const std::shared_ptr<T>& object) {
+    assert(object != nullptr);
+    return SetManagedObject(object->GetId(), object);
+  }
+
+  template <typename T>
+  Status SetManagedObject(const std::string& id,
+                          const std::shared_ptr<T>& object) {
+    const auto c = std::static_pointer_cast<Customizable>(object);
+    return SetManagedObject(T::Type(), id, c);
+  }
+
+  // Returns the object for the given id, if one exists.
+  // If the object is not found in the registry, a nullptr is returned
+  template <typename T>
+  std::shared_ptr<T> GetManagedObject(const std::string& id) const {
+    auto c = GetManagedObject(T::Type(), id);
+    return std::static_pointer_cast<T>(c);
+  }
+
+  // Returns the set of managed objects found in the registry matching
+  // the input type and ID.
+  // If the input id is not empty, then only objects of that class
+  // (IsInstanceOf(id)) will be returned (for example, only return LRUCache
+  // objects) If the input id is empty, then all objects of that type (all Cache
+  // objects)
+  template <typename T>
+  Status ListManagedObjects(const std::string& id,
+                            std::vector<std::shared_ptr<T>>* results) const {
+    std::vector<std::shared_ptr<Customizable>> customizables;
+    results->clear();
+    Status s = ListManagedObjects(T::Type(), id, &customizables);
+    if (s.ok()) {
+      for (const auto& c : customizables) {
+        results->push_back(std::static_pointer_cast<T>(c));
+      }
+    }
+    return s;
+  }
+
+  template <typename T>
+  Status ListManagedObjects(std::vector<std::shared_ptr<T>>* results) const {
+    return ListManagedObjects("", results);
+  }
+
+  // Creates a new ManagedObject in the registry for the id if one does not
+  // currently exist.  If an object with that ID already exists, the current
+  // object is returned.
+  //
+  // The ID is the identifier of the object to be returned/created and returned
+  // in result
+  // If a new object is created (using the object factories), the cfunc
+  // parameter will be invoked to configure the new object.
+  template <typename T>
+  Status GetOrCreateManagedObject(const std::string& id,
+                                  std::shared_ptr<T>* result,
+                                  const ConfigureFunc<T>& cfunc = nullptr) {
+    if (parent_ != nullptr) {
+      auto object = parent_->GetManagedObject(T::Type(), id);
+      if (object != nullptr) {
+        *result = std::static_pointer_cast<T>(object);
+        return Status::OK();
+      }
+    }
+    {
+      std::unique_lock<std::mutex> lock(objects_mutex_);
+      auto key = ToManagedObjectKey(T::Type(), id);
+      auto iter = managed_objects_.find(key);
+      if (iter != managed_objects_.end()) {
+        auto object = iter->second.lock();
+        if (object != nullptr) {
+          *result = std::static_pointer_cast<T>(object);
+          return Status::OK();
+        }
+      }
+      std::shared_ptr<T> object;
+      Status s = NewSharedObject(id, &object);
+      if (s.ok() && cfunc != nullptr) {
+        s = cfunc(object.get());
+      }
+      if (s.ok()) {
+        auto c = std::static_pointer_cast<Customizable>(object);
+        if (id != c->Name()) {
+          // If the ID is not the base name of the class, add the new
+          // object under the input ID
+          managed_objects_[key] = c;
+        }
+        if (id != c->GetId() && c->GetId() != c->Name()) {
+          // If the input and current ID do not match, and the
+          // current ID is not the base bame, add the new object under
+          // its new ID
+          key = ToManagedObjectKey(T::Type(), c->GetId());
+          managed_objects_[key] = c;
+        }
+        *result = object;
+      }
+      return s;
+    }
+  }
+
+  // Returns the number of factories registered for this library
+  // for the input type.
+  // @param num_types returns how many unique types are registered.
+  size_t GetFactoryCount(const std::string& type) const;
+
+  // Returns the names of registered factories for the input type.
+  // names is updated to include the names for the type
+  void GetFactoryNames(const std::string& type,
+                       std::vector<std::string>* names) const;
+
+  void GetFactoryTypes(std::unordered_set<std::string>* types) const;
+
+  // Dump the contents of the registry to the logger
+  void Dump(Logger* logger) const;
+
+  // Invokes the input function to retrieve the properties for this plugin.
+  int RegisterPlugin(const std::string& name, const RegistrarFunc& func);
+
+ private:
+  static std::string ToManagedObjectKey(const std::string& type,
+                                        const std::string& id) {
+    return type + "://" + id;
+  }
+
+  // Returns the Customizable managed object associated with the key (Type/ID).
+  // If not found, nullptr is returned.
+  std::shared_ptr<Customizable> GetManagedObject(const std::string& type,
+                                                 const std::string& id) const;
+  Status ListManagedObjects(
+      const std::string& type, const std::string& pattern,
+      std::vector<std::shared_ptr<Customizable>>* results) const;
+  // Sets the managed object associated with the key (Type/ID) to c.
+  // If the named managed object does not exist, the object is added and OK is
+  // returned If the object exists and is the same as c, OK is returned
+  // Otherwise, an error status is returned.
+  Status SetManagedObject(const std::string& type, const std::string& id,
+                          const std::shared_ptr<Customizable>& c);
+
+  // Searches (from back to front) the libraries looking for the
+  // factory that matches this name.
+  // Returns the factory if it is found, and nullptr otherwise
+  template <typename T>
+  const FactoryFunc<T> FindFactory(const std::string& name) const {
+    {
+      std::unique_lock<std::mutex> lock(library_mutex_);
+      for (auto iter = libraries_.crbegin(); iter != libraries_.crend();
+           ++iter) {
+        const auto factory = iter->get()->FindFactory<T>(name);
+        if (factory != nullptr) {
+          return factory;
+        }
+      }
+    }
+    if (parent_ == nullptr) {
+      return nullptr;
+    } else {
+      return parent_->FindFactory<T>(name);
+    }
+  }
+
+  // The set of libraries to search for factories for this registry.
+  // The libraries are searched in reverse order (back to front) when
+  // searching for entries.
+  std::vector<std::shared_ptr<ObjectLibrary>> libraries_;
+  std::vector<std::string> plugins_;
+  static std::unordered_map<std::string, RegistrarFunc> builtins_;
+  std::map<std::string, std::weak_ptr<Customizable>> managed_objects_;
+  std::shared_ptr<ObjectRegistry> parent_;
+  mutable std::mutex objects_mutex_;  // Mutex for managed objects
+  mutable std::mutex library_mutex_;  // Mutex for managed libraries
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/optimistic_transaction_db.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/optimistic_transaction_db.h
new file mode 100644
index 0000000000..261883782f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/optimistic_transaction_db.h
@@ -0,0 +1,98 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "rocksdb/comparator.h"
+#include "rocksdb/db.h"
+#include "rocksdb/utilities/stackable_db.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Transaction;
+
+// Database with Transaction support.
+//
+// See optimistic_transaction.h and examples/transaction_example.cc
+
+// Options to use when starting an Optimistic Transaction
+struct OptimisticTransactionOptions {
+  // Setting set_snapshot=true is the same as calling SetSnapshot().
+  bool set_snapshot = false;
+
+  // Should be set if the DB has a non-default comparator.
+  // See comment in WriteBatchWithIndex constructor.
+  const Comparator* cmp = BytewiseComparator();
+};
+
+enum class OccValidationPolicy {
+  // Validate serially at commit stage, AFTER entering the write-group.
+  // Isolation validation is processed single-threaded(since in the
+  // write-group).
+  // May suffer from high mutex contention, as per this link:
+  // https://github.com/facebook/rocksdb/issues/4402
+  kValidateSerial = 0,
+  // Validate parallelly before commit stage, BEFORE entering the write-group to
+  // reduce mutex contention. Each txn acquires locks for its write-set
+  // records in some well-defined order.
+  kValidateParallel = 1
+};
+
+struct OptimisticTransactionDBOptions {
+  OccValidationPolicy validate_policy = OccValidationPolicy::kValidateParallel;
+
+  // works only if validate_policy == OccValidationPolicy::kValidateParallel
+  uint32_t occ_lock_buckets = (1 << 20);
+};
+
+// Range deletions (including those in `WriteBatch`es passed to `Write()`) are
+// incompatible with `OptimisticTransactionDB` and will return a non-OK `Status`
+class OptimisticTransactionDB : public StackableDB {
+ public:
+  // Open an OptimisticTransactionDB similar to DB::Open().
+  static Status Open(const Options& options, const std::string& dbname,
+                     OptimisticTransactionDB** dbptr);
+
+  static Status Open(const DBOptions& db_options, const std::string& dbname,
+                     const std::vector<ColumnFamilyDescriptor>& column_families,
+                     std::vector<ColumnFamilyHandle*>* handles,
+                     OptimisticTransactionDB** dbptr);
+
+  static Status Open(const DBOptions& db_options,
+                     const OptimisticTransactionDBOptions& occ_options,
+                     const std::string& dbname,
+                     const std::vector<ColumnFamilyDescriptor>& column_families,
+                     std::vector<ColumnFamilyHandle*>* handles,
+                     OptimisticTransactionDB** dbptr);
+
+  virtual ~OptimisticTransactionDB() {}
+
+  // Starts a new Transaction.
+  //
+  // Caller is responsible for deleting the returned transaction when no
+  // longer needed.
+  //
+  // If old_txn is not null, BeginTransaction will reuse this Transaction
+  // handle instead of allocating a new one.  This is an optimization to avoid
+  // extra allocations when repeatedly creating transactions.
+  virtual Transaction* BeginTransaction(
+      const WriteOptions& write_options,
+      const OptimisticTransactionOptions& txn_options =
+          OptimisticTransactionOptions(),
+      Transaction* old_txn = nullptr) = 0;
+
+  OptimisticTransactionDB(const OptimisticTransactionDB&) = delete;
+  void operator=(const OptimisticTransactionDB&) = delete;
+
+ protected:
+  // To Create an OptimisticTransactionDB, call Open()
+  explicit OptimisticTransactionDB(DB* db) : StackableDB(db) {}
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/option_change_migration.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/option_change_migration.h
new file mode 100644
index 0000000000..a73324a9e3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/option_change_migration.h
@@ -0,0 +1,24 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+
+#include "rocksdb/options.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Try to migrate DB created with old_opts to be use new_opts.
+// Multiple column families is not supported.
+// It is best-effort. No guarantee to succeed.
+// A full compaction may be executed.
+// If the target options use FIFO compaction, the FIFO condition might be
+// sacrificed: for data migrated, data inserted later might be dropped
+// earlier. This is to gurantee FIFO compaction won't drop all the
+// migrated data to fit max_table_files_size.
+Status OptionChangeMigration(std::string dbname, const Options& old_opts,
+                             const Options& new_opts);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/options_type.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/options_type.h
new file mode 100644
index 0000000000..cd340ed596
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/options_type.h
@@ -0,0 +1,1221 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// The OptionTypeInfo and related classes provide a framework for
+// configuring and validating RocksDB classes via the Options framework.
+// This file is part of the public API to allow developers who wish to
+// write their own extensions and plugins to take use the Options
+// framework in their custom implementations.
+//
+// See https://github.com/facebook/rocksdb/wiki/RocksDB-Configurable-Objects
+// for more information on how to develop and use custom extensions
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <unordered_map>
+
+#include "rocksdb/convenience.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+class OptionTypeInfo;
+struct ColumnFamilyOptions;
+struct DBOptions;
+
+// The underlying "class/type" of the option.
+// This enum is used to determine how the option should
+// be converted to/from strings and compared.
+enum class OptionType {
+  kBoolean,
+  kInt,
+  kInt32T,
+  kInt64T,
+  kUInt,
+  kUInt8T,
+  kUInt32T,
+  kUInt64T,
+  kSizeT,
+  kString,
+  kDouble,
+  kCompactionStyle,
+  kCompactionPri,
+  kCompressionType,
+  kCompactionStopStyle,
+  kChecksumType,
+  kEncodingType,
+  kEnv,
+  kEnum,
+  kStruct,
+  kVector,
+  kConfigurable,
+  kCustomizable,
+  kEncodedString,
+  kTemperature,
+  kArray,
+  kUnknown,
+};
+
+enum class OptionVerificationType {
+  kNormal,
+  kByName,               // The option is pointer typed so we can only verify
+                         // based on it's name.
+  kByNameAllowNull,      // Same as kByName, but it also allows the case
+                         // where one of them is a nullptr.
+  kByNameAllowFromNull,  // Same as kByName, but it also allows the case
+                         // where the old option is nullptr.
+  kDeprecated,           // The option is no longer used in rocksdb. The RocksDB
+                         // OptionsParser will still accept this option if it
+                         // happen to exists in some Options file.  However,
+                         // the parser will not include it in serialization
+                         // and verification processes.
+  kAlias,                // This option represents is a name/shortcut for
+                         // another option and should not be written or verified
+                         // independently
+};
+
+// A set of modifier flags used to alter how an option is evaluated or
+// processed. These flags can be combined together (e.g. kMutable | kShared).
+// The kCompare flags can be used to control if/when options are compared.
+// If kCompareNever is set, two related options would never be compared (always
+// equal) If kCompareExact is set, the options will only be compared if the
+// sanity mode
+//                  is exact
+// kMutable       means the option can be changed after it is prepared
+// kShared        means the option is contained in a std::shared_ptr
+// kUnique        means the option is contained in a std::uniqued_ptr
+// kRawPointer    means the option is a raw pointer value.
+// kAllowNull     means that an option is allowed to be null for verification
+//                purposes.
+// kDontSerialize means this option should not be serialized and included in
+//                the string representation.
+// kDontPrepare   means do not call PrepareOptions for this pointer value.
+enum class OptionTypeFlags : uint32_t {
+  kNone = 0x00,  // No flags
+  kCompareDefault = 0x0,
+  kCompareNever = ConfigOptions::kSanityLevelNone,
+  kCompareLoose = ConfigOptions::kSanityLevelLooselyCompatible,
+  kCompareExact = ConfigOptions::kSanityLevelExactMatch,
+
+  kMutable = 0x0100,         // Option is mutable
+  kRawPointer = 0x0200,      // The option is stored as a raw pointer
+  kShared = 0x0400,          // The option is stored as a shared_ptr
+  kUnique = 0x0800,          // The option is stored as a unique_ptr
+  kAllowNull = 0x1000,       // The option can be null
+  kDontSerialize = 0x2000,   // Don't serialize the option
+  kDontPrepare = 0x4000,     // Don't prepare or sanitize this option
+  kStringNameOnly = 0x8000,  // The option serializes to a name only
+};
+
+inline OptionTypeFlags operator|(const OptionTypeFlags& a,
+                                 const OptionTypeFlags& b) {
+  return static_cast<OptionTypeFlags>(static_cast<uint32_t>(a) |
+                                      static_cast<uint32_t>(b));
+}
+
+inline OptionTypeFlags operator&(const OptionTypeFlags& a,
+                                 const OptionTypeFlags& b) {
+  return static_cast<OptionTypeFlags>(static_cast<uint32_t>(a) &
+                                      static_cast<uint32_t>(b));
+}
+
+// Converts an string into its enumerated value.
+// @param type_map Mapping between strings and enum values
+// @param type The string representation of the enum
+// @param value Returns the enum value represented by the string
+// @return true if the string was found in the enum map, false otherwise.
+template <typename T>
+bool ParseEnum(const std::unordered_map<std::string, T>& type_map,
+               const std::string& type, T* value) {
+  auto iter = type_map.find(type);
+  if (iter != type_map.end()) {
+    *value = iter->second;
+    return true;
+  }
+  return false;
+}
+
+// Converts an enum into its string representation.
+// @param type_map Mapping between strings and enum values
+// @param type The enum
+// @param value Returned as the string representation of the enum
+// @return true if the enum was found in the enum map, false otherwise.
+template <typename T>
+bool SerializeEnum(const std::unordered_map<std::string, T>& type_map,
+                   const T& type, std::string* value) {
+  for (const auto& pair : type_map) {
+    if (pair.second == type) {
+      *value = pair.first;
+      return true;
+    }
+  }
+  return false;
+}
+
+template <typename T, size_t kSize>
+Status ParseArray(const ConfigOptions& config_options,
+                  const OptionTypeInfo& elem_info, char separator,
+                  const std::string& name, const std::string& value,
+                  std::array<T, kSize>* result);
+
+template <typename T, size_t kSize>
+Status SerializeArray(const ConfigOptions& config_options,
+                      const OptionTypeInfo& elem_info, char separator,
+                      const std::string& name, const std::array<T, kSize>& vec,
+                      std::string* value);
+
+template <typename T, size_t kSize>
+bool ArraysAreEqual(const ConfigOptions& config_options,
+                    const OptionTypeInfo& elem_info, const std::string& name,
+                    const std::array<T, kSize>& array1,
+                    const std::array<T, kSize>& array2, std::string* mismatch);
+
+template <typename T>
+Status ParseVector(const ConfigOptions& config_options,
+                   const OptionTypeInfo& elem_info, char separator,
+                   const std::string& name, const std::string& value,
+                   std::vector<T>* result);
+
+template <typename T>
+Status SerializeVector(const ConfigOptions& config_options,
+                       const OptionTypeInfo& elem_info, char separator,
+                       const std::string& name, const std::vector<T>& vec,
+                       std::string* value);
+template <typename T>
+bool VectorsAreEqual(const ConfigOptions& config_options,
+                     const OptionTypeInfo& elem_info, const std::string& name,
+                     const std::vector<T>& vec1, const std::vector<T>& vec2,
+                     std::string* mismatch);
+
+// Function for converting a option string value into its underlying
+// representation in "addr"
+// On success, Status::OK is returned and addr is set to the parsed form
+// On failure, a non-OK status is returned
+// @param opts  The ConfigOptions controlling how the value is parsed
+// @param name  The name of the options being parsed
+// @param value The string representation of the option
+// @param addr  Pointer to the object
+using ParseFunc = std::function<Status(
+    const ConfigOptions& /*opts*/, const std::string& /*name*/,
+    const std::string& /*value*/, void* /*addr*/)>;
+
+// Function for converting an option "addr" into its string representation.
+// On success, Status::OK is returned and value is the serialized form.
+// On failure, a non-OK status is returned
+// @param opts  The ConfigOptions controlling how the values are serialized
+// @param name  The name of the options being serialized
+// @param addr  Pointer to the value being serialized
+// @param value The result of the serialization.
+using SerializeFunc = std::function<Status(
+    const ConfigOptions& /*opts*/, const std::string& /*name*/,
+    const void* /*addr*/, std::string* /*value*/)>;
+
+// Function for comparing two option values
+// If they are not equal, updates "mismatch" with the name of the bad option
+// @param opts  The ConfigOptions controlling how the values are compared
+// @param name  The name of the options being compared
+// @param addr1 The first address to compare
+// @param addr2 The address to compare to
+// @param mismatch If the values are not equal, the name of the option that
+// first differs
+using EqualsFunc = std::function<bool(
+    const ConfigOptions& /*opts*/, const std::string& /*name*/,
+    const void* /*addr1*/, const void* /*addr2*/, std::string* mismatch)>;
+
+// Function for preparing/initializing an option.
+using PrepareFunc =
+    std::function<Status(const ConfigOptions& /*opts*/,
+                         const std::string& /*name*/, void* /*addr*/)>;
+
+// Function for validating an option.
+using ValidateFunc = std::function<Status(
+    const DBOptions& /*db_opts*/, const ColumnFamilyOptions& /*cf_opts*/,
+    const std::string& /*name*/, const void* /*addr*/)>;
+
+// A struct for storing constant option information such as option name,
+// option type, and offset.
+class OptionTypeInfo {
+ public:
+  // A simple "normal", non-mutable Type "type" at offset
+  OptionTypeInfo(int offset, OptionType type)
+      : offset_(offset),
+        parse_func_(nullptr),
+        serialize_func_(nullptr),
+        equals_func_(nullptr),
+        type_(type),
+        verification_(OptionVerificationType::kNormal),
+        flags_(OptionTypeFlags::kNone) {}
+
+  OptionTypeInfo(int offset, OptionType type,
+                 OptionVerificationType verification, OptionTypeFlags flags)
+      : offset_(offset),
+        parse_func_(nullptr),
+        serialize_func_(nullptr),
+        equals_func_(nullptr),
+        type_(type),
+        verification_(verification),
+        flags_(flags) {}
+
+  OptionTypeInfo(int offset, OptionType type,
+                 OptionVerificationType verification, OptionTypeFlags flags,
+                 const ParseFunc& parse_func)
+      : offset_(offset),
+        parse_func_(parse_func),
+        serialize_func_(nullptr),
+        equals_func_(nullptr),
+        type_(type),
+        verification_(verification),
+        flags_(flags) {}
+
+  OptionTypeInfo(int offset, OptionType type,
+                 OptionVerificationType verification, OptionTypeFlags flags,
+                 const ParseFunc& parse_func,
+                 const SerializeFunc& serialize_func,
+                 const EqualsFunc& equals_func)
+      : offset_(offset),
+        parse_func_(parse_func),
+        serialize_func_(serialize_func),
+        equals_func_(equals_func),
+        type_(type),
+        verification_(verification),
+        flags_(flags) {}
+
+  // Creates an OptionTypeInfo for an enum type.  Enums use an additional
+  // map to convert the enums to/from their string representation.
+  // To create an OptionTypeInfo that is an Enum, one should:
+  // - Create a static map of string values to the corresponding enum value
+  // - Call this method passing the static map in as a parameter.
+  // Note that it is not necessary to add a new OptionType or make any
+  // other changes -- the returned object handles parsing, serialization, and
+  // comparisons.
+  //
+  // @param offset The offset in the option object for this enum
+  // @param map The string to enum mapping for this enum
+  template <typename T>
+  static OptionTypeInfo Enum(
+      int offset, const std::unordered_map<std::string, T>* const map,
+      OptionTypeFlags flags = OptionTypeFlags::kNone) {
+    OptionTypeInfo info(offset, OptionType::kEnum,
+                        OptionVerificationType::kNormal, flags);
+    info.SetParseFunc(
+        // Uses the map argument to convert the input string into
+        // its corresponding enum value.  If value is found in the map,
+        // addr is updated to the corresponding map entry.
+        // @return OK if the value is found in the map
+        // @return InvalidArgument if the value is not found in the map
+        [map](const ConfigOptions&, const std::string& name,
+              const std::string& value, void* addr) {
+          if (map == nullptr) {
+            return Status::NotSupported("No enum mapping ", name);
+          } else if (ParseEnum<T>(*map, value, static_cast<T*>(addr))) {
+            return Status::OK();
+          } else {
+            return Status::InvalidArgument("No mapping for enum ", name);
+          }
+        });
+    info.SetSerializeFunc(
+        // Uses the map argument to convert the input enum into
+        // its corresponding string value.  If enum value is found in the map,
+        // value is updated to the corresponding string value in the map.
+        // @return OK if the enum is found in the map
+        // @return InvalidArgument if the enum is not found in the map
+        [map](const ConfigOptions&, const std::string& name, const void* addr,
+              std::string* value) {
+          if (map == nullptr) {
+            return Status::NotSupported("No enum mapping ", name);
+          } else if (SerializeEnum<T>(*map, (*static_cast<const T*>(addr)),
+                                      value)) {
+            return Status::OK();
+          } else {
+            return Status::InvalidArgument("No mapping for enum ", name);
+          }
+        });
+    info.SetEqualsFunc(
+        // Casts addr1 and addr2 to the enum type and returns true if
+        // they are equal, false otherwise.
+        [](const ConfigOptions&, const std::string&, const void* addr1,
+           const void* addr2, std::string*) {
+          return (*static_cast<const T*>(addr1) ==
+                  *static_cast<const T*>(addr2));
+        });
+    return info;
+  }  // End OptionTypeInfo::Enum
+
+  // Creates an OptionTypeInfo for a Struct type.  Structs have a
+  // map of string-OptionTypeInfo associated with them that describes how
+  // to process the object for parsing, serializing, and matching.
+  // Structs also have a struct_name, which is the name of the object
+  // as registered in the parent map.
+  // When processing a struct, the option name can be specified as:
+  //   - <struct_name>       Meaning to process the entire struct.
+  //   - <struct_name.field> Meaning to process the single field
+  //   - <field>             Process the single fields
+  // The CompactionOptionsFIFO, CompactionOptionsUniversal, and LRUCacheOptions
+  // are all examples of Struct options.
+  //
+  // To create an OptionTypeInfo that is a Struct, one should:
+  // - Create a static map of string-OptionTypeInfo corresponding to the
+  //   properties of the object that can be set via the options.
+  // - Call this method passing the name and map in as parameters.
+  // Note that it is not necessary to add a new OptionType or make any
+  // other changes -- the returned object handles parsing, serialization, and
+  // comparisons.
+  //
+  // @param offset The offset in the option object for this enum
+  // @param map The string to enum mapping for this enum
+  static OptionTypeInfo Struct(
+      const std::string& struct_name,
+      const std::unordered_map<std::string, OptionTypeInfo>* struct_map,
+      int offset, OptionVerificationType verification, OptionTypeFlags flags) {
+    OptionTypeInfo info(offset, OptionType::kStruct, verification, flags);
+    info.SetParseFunc(
+        // Parses the struct and updates the fields at addr
+        [struct_name, struct_map](const ConfigOptions& opts,
+                                  const std::string& name,
+                                  const std::string& value, void* addr) {
+          return ParseStruct(opts, struct_name, struct_map, name, value, addr);
+        });
+    info.SetSerializeFunc(
+        // Serializes the struct options into value
+        [struct_name, struct_map](const ConfigOptions& opts,
+                                  const std::string& name, const void* addr,
+                                  std::string* value) {
+          return SerializeStruct(opts, struct_name, struct_map, name, addr,
+                                 value);
+        });
+    info.SetEqualsFunc(
+        // Compares the struct fields of addr1 and addr2 for equality
+        [struct_name, struct_map](const ConfigOptions& opts,
+                                  const std::string& name, const void* addr1,
+                                  const void* addr2, std::string* mismatch) {
+          return StructsAreEqual(opts, struct_name, struct_map, name, addr1,
+                                 addr2, mismatch);
+        });
+    return info;
+  }
+  static OptionTypeInfo Struct(
+      const std::string& struct_name,
+      const std::unordered_map<std::string, OptionTypeInfo>* struct_map,
+      int offset, OptionVerificationType verification, OptionTypeFlags flags,
+      const ParseFunc& parse_func) {
+    OptionTypeInfo info(
+        Struct(struct_name, struct_map, offset, verification, flags));
+    return info.SetParseFunc(parse_func);
+  }
+
+  template <typename T, size_t kSize>
+  static OptionTypeInfo Array(int _offset, OptionVerificationType _verification,
+                              OptionTypeFlags _flags,
+                              const OptionTypeInfo& elem_info,
+                              char separator = ':') {
+    OptionTypeInfo info(_offset, OptionType::kArray, _verification, _flags);
+    info.SetParseFunc([elem_info, separator](
+                          const ConfigOptions& opts, const std::string& name,
+                          const std::string& value, void* addr) {
+      auto result = static_cast<std::array<T, kSize>*>(addr);
+      return ParseArray<T, kSize>(opts, elem_info, separator, name, value,
+                                  result);
+    });
+    info.SetSerializeFunc([elem_info, separator](const ConfigOptions& opts,
+                                                 const std::string& name,
+                                                 const void* addr,
+                                                 std::string* value) {
+      const auto& array = *(static_cast<const std::array<T, kSize>*>(addr));
+      return SerializeArray<T, kSize>(opts, elem_info, separator, name, array,
+                                      value);
+    });
+    info.SetEqualsFunc([elem_info](const ConfigOptions& opts,
+                                   const std::string& name, const void* addr1,
+                                   const void* addr2, std::string* mismatch) {
+      const auto& array1 = *(static_cast<const std::array<T, kSize>*>(addr1));
+      const auto& array2 = *(static_cast<const std::array<T, kSize>*>(addr2));
+      return ArraysAreEqual<T, kSize>(opts, elem_info, name, array1, array2,
+                                      mismatch);
+    });
+    return info;
+  }
+
+  template <typename T>
+  static OptionTypeInfo Vector(int _offset,
+                               OptionVerificationType _verification,
+                               OptionTypeFlags _flags,
+                               const OptionTypeInfo& elem_info,
+                               char separator = ':') {
+    OptionTypeInfo info(_offset, OptionType::kVector, _verification, _flags);
+    info.SetParseFunc([elem_info, separator](
+                          const ConfigOptions& opts, const std::string& name,
+                          const std::string& value, void* addr) {
+      auto result = static_cast<std::vector<T>*>(addr);
+      return ParseVector<T>(opts, elem_info, separator, name, value, result);
+    });
+    info.SetSerializeFunc([elem_info, separator](const ConfigOptions& opts,
+                                                 const std::string& name,
+                                                 const void* addr,
+                                                 std::string* value) {
+      const auto& vec = *(static_cast<const std::vector<T>*>(addr));
+      return SerializeVector<T>(opts, elem_info, separator, name, vec, value);
+    });
+    info.SetEqualsFunc([elem_info](const ConfigOptions& opts,
+                                   const std::string& name, const void* addr1,
+                                   const void* addr2, std::string* mismatch) {
+      const auto& vec1 = *(static_cast<const std::vector<T>*>(addr1));
+      const auto& vec2 = *(static_cast<const std::vector<T>*>(addr2));
+      return VectorsAreEqual<T>(opts, elem_info, name, vec1, vec2, mismatch);
+    });
+    return info;
+  }
+
+  // Create a new std::shared_ptr<Customizable> OptionTypeInfo
+  // This function will call the T::CreateFromString method to create a new
+  // std::shared_ptr<T> object.
+  //
+  // @param offset The offset for the Customizable from the base pointer
+  // @param ovt How to verify this option
+  // @param flags, Extra flags specifying the behavior of this option
+  // @param _sfunc Optional function for serializing this option
+  // @param _efunc Optional function for comparing this option
+  template <typename T>
+  static OptionTypeInfo AsCustomSharedPtr(int offset,
+                                          OptionVerificationType ovt,
+                                          OptionTypeFlags flags) {
+    OptionTypeInfo info(offset, OptionType::kCustomizable, ovt,
+                        flags | OptionTypeFlags::kShared);
+    return info.SetParseFunc([](const ConfigOptions& opts,
+                                const std::string& name,
+                                const std::string& value, void* addr) {
+      auto* shared = static_cast<std::shared_ptr<T>*>(addr);
+      if (name == kIdPropName() && value.empty()) {
+        shared->reset();
+        return Status::OK();
+      } else {
+        return T::CreateFromString(opts, value, shared);
+      }
+    });
+  }
+
+  template <typename T>
+  static OptionTypeInfo AsCustomSharedPtr(int offset,
+                                          OptionVerificationType ovt,
+                                          OptionTypeFlags flags,
+                                          const SerializeFunc& serialize_func,
+                                          const EqualsFunc& equals_func) {
+    OptionTypeInfo info(AsCustomSharedPtr<T>(offset, ovt, flags));
+    info.SetSerializeFunc(serialize_func);
+    info.SetEqualsFunc(equals_func);
+    return info;
+  }
+
+  // Create a new std::unique_ptr<Customizable> OptionTypeInfo
+  // This function will call the T::CreateFromString method to create a new
+  // std::unique_ptr<T> object.
+  //
+  // @param offset The offset for the Customizable from the base pointer
+  // @param ovt How to verify this option
+  // @param flags, Extra flags specifying the behavior of this option
+  // @param _sfunc Optional function for serializing this option
+  // @param _efunc Optional function for comparing this option
+  template <typename T>
+  static OptionTypeInfo AsCustomUniquePtr(int offset,
+                                          OptionVerificationType ovt,
+                                          OptionTypeFlags flags) {
+    OptionTypeInfo info(offset, OptionType::kCustomizable, ovt,
+                        flags | OptionTypeFlags::kUnique);
+    return info.SetParseFunc([](const ConfigOptions& opts,
+                                const std::string& name,
+                                const std::string& value, void* addr) {
+      auto* unique = static_cast<std::unique_ptr<T>*>(addr);
+      if (name == kIdPropName() && value.empty()) {
+        unique->reset();
+        return Status::OK();
+      } else {
+        return T::CreateFromString(opts, value, unique);
+      }
+    });
+  }
+
+  template <typename T>
+  static OptionTypeInfo AsCustomUniquePtr(int offset,
+                                          OptionVerificationType ovt,
+                                          OptionTypeFlags flags,
+                                          const SerializeFunc& serialize_func,
+                                          const EqualsFunc& equals_func) {
+    OptionTypeInfo info(AsCustomUniquePtr<T>(offset, ovt, flags));
+    info.SetSerializeFunc(serialize_func);
+    info.SetEqualsFunc(equals_func);
+    return info;
+  }
+
+  // Create a new Customizable* OptionTypeInfo
+  // This function will call the T::CreateFromString method to create a new
+  // T object.
+  //
+  // @param _offset The offset for the Customizable from the base pointer
+  // @param ovt How to verify this option
+  // @param flags, Extra flags specifying the behavior of this option
+  // @param _sfunc Optional function for serializing this option
+  // @param _efunc Optional function for comparing this option
+  template <typename T>
+  static OptionTypeInfo AsCustomRawPtr(int offset, OptionVerificationType ovt,
+                                       OptionTypeFlags flags) {
+    OptionTypeInfo info(offset, OptionType::kCustomizable, ovt,
+                        flags | OptionTypeFlags::kRawPointer);
+    return info.SetParseFunc([](const ConfigOptions& opts,
+                                const std::string& name,
+                                const std::string& value, void* addr) {
+      auto** pointer = static_cast<T**>(addr);
+      if (name == kIdPropName() && value.empty()) {
+        *pointer = nullptr;
+        return Status::OK();
+      } else {
+        return T::CreateFromString(opts, value, pointer);
+      }
+    });
+  }
+
+  template <typename T>
+  static OptionTypeInfo AsCustomRawPtr(int offset, OptionVerificationType ovt,
+                                       OptionTypeFlags flags,
+                                       const SerializeFunc& serialize_func,
+                                       const EqualsFunc& equals_func) {
+    OptionTypeInfo info(AsCustomRawPtr<T>(offset, ovt, flags));
+    info.SetSerializeFunc(serialize_func);
+    info.SetEqualsFunc(equals_func);
+    return info;
+  }
+
+  OptionTypeInfo& SetParseFunc(const ParseFunc& f) {
+    parse_func_ = f;
+    return *this;
+  }
+
+  OptionTypeInfo& SetSerializeFunc(const SerializeFunc& f) {
+    serialize_func_ = f;
+    return *this;
+  }
+  OptionTypeInfo& SetEqualsFunc(const EqualsFunc& f) {
+    equals_func_ = f;
+    return *this;
+  }
+
+  OptionTypeInfo& SetPrepareFunc(const PrepareFunc& f) {
+    prepare_func_ = f;
+    return *this;
+  }
+
+  OptionTypeInfo& SetValidateFunc(const ValidateFunc& f) {
+    validate_func_ = f;
+    return *this;
+  }
+
+  bool IsEnabled(OptionTypeFlags otf) const { return (flags_ & otf) == otf; }
+
+  bool IsEditable(const ConfigOptions& opts) const {
+    if (opts.mutable_options_only) {
+      return IsMutable();
+    } else {
+      return true;
+    }
+  }
+  bool IsMutable() const { return IsEnabled(OptionTypeFlags::kMutable); }
+
+  bool IsDeprecated() const {
+    return IsEnabled(OptionVerificationType::kDeprecated);
+  }
+
+  // Returns true if the option is marked as an Alias.
+  // Aliases are valid options that are parsed but are not converted to strings
+  // or compared.
+  bool IsAlias() const { return IsEnabled(OptionVerificationType::kAlias); }
+
+  bool IsEnabled(OptionVerificationType ovf) const {
+    return verification_ == ovf;
+  }
+
+  // Returns the sanity level for comparing the option.
+  // If the options should not be compared, returns None
+  // If the option has a compare flag, returns it.
+  // Otherwise, returns "exact"
+  ConfigOptions::SanityLevel GetSanityLevel() const {
+    if (IsDeprecated() || IsAlias()) {
+      return ConfigOptions::SanityLevel::kSanityLevelNone;
+    } else {
+      auto match = (flags_ & OptionTypeFlags::kCompareExact);
+      if (match == OptionTypeFlags::kCompareDefault) {
+        return ConfigOptions::SanityLevel::kSanityLevelExactMatch;
+      } else {
+        return (ConfigOptions::SanityLevel)match;
+      }
+    }
+  }
+
+  // Returns true if the option should be serialized.
+  // Options should be serialized if the are not deprecated, aliases,
+  // or marked as "Don't Serialize".
+  bool ShouldSerialize() const {
+    if (IsDeprecated() || IsAlias()) {
+      return false;
+    } else if (IsEnabled(OptionTypeFlags::kDontSerialize)) {
+      return false;
+    } else {
+      return true;
+    }
+  }
+
+  bool ShouldPrepare() const {
+    if (IsDeprecated() || IsAlias()) {
+      return false;
+    } else if (IsEnabled(OptionTypeFlags::kDontPrepare)) {
+      return false;
+    } else {
+      return (prepare_func_ != nullptr || IsConfigurable());
+    }
+  }
+
+  bool ShouldValidate() const {
+    if (IsDeprecated() || IsAlias()) {
+      return false;
+    } else {
+      return (validate_func_ != nullptr || IsConfigurable());
+    }
+  }
+
+  // Returns true if the option is allowed to be null.
+  // Options can be null if the verification type is allow from null
+  // or if the flags specify allow null.
+  bool CanBeNull() const {
+    return (IsEnabled(OptionTypeFlags::kAllowNull) ||
+            IsEnabled(OptionVerificationType::kByNameAllowNull) ||
+            IsEnabled(OptionVerificationType::kByNameAllowFromNull));
+  }
+
+  bool IsSharedPtr() const { return IsEnabled(OptionTypeFlags::kShared); }
+
+  bool IsUniquePtr() const { return IsEnabled(OptionTypeFlags::kUnique); }
+
+  bool IsRawPtr() const { return IsEnabled(OptionTypeFlags::kRawPointer); }
+
+  bool IsByName() const {
+    return (verification_ == OptionVerificationType::kByName ||
+            verification_ == OptionVerificationType::kByNameAllowNull ||
+            verification_ == OptionVerificationType::kByNameAllowFromNull);
+  }
+
+  bool IsStruct() const { return (type_ == OptionType::kStruct); }
+
+  bool IsConfigurable() const {
+    return (type_ == OptionType::kConfigurable ||
+            type_ == OptionType::kCustomizable);
+  }
+
+  bool IsCustomizable() const { return (type_ == OptionType::kCustomizable); }
+
+  inline const void* GetOffset(const void* base) const {
+    return static_cast<const char*>(base) + offset_;
+  }
+
+  inline void* GetOffset(void* base) const {
+    return static_cast<char*>(base) + offset_;
+  }
+
+  template <typename T>
+  const T* GetOffsetAs(const void* base) const {
+    const void* addr = GetOffset(base);
+    return static_cast<const T*>(addr);
+  }
+
+  template <typename T>
+  T* GetOffsetAs(void* base) const {
+    void* addr = GetOffset(base);
+    return static_cast<T*>(addr);
+  }
+
+  // Returns the underlying pointer for the type at base_addr
+  // The value returned is the underlying "raw" pointer, offset from base.
+  template <typename T>
+  const T* AsRawPointer(const void* const base_addr) const {
+    if (base_addr == nullptr) {
+      return nullptr;
+    }
+    if (IsUniquePtr()) {
+      const auto ptr = GetOffsetAs<std::unique_ptr<T>>(base_addr);
+      return ptr->get();
+    } else if (IsSharedPtr()) {
+      const auto ptr = GetOffsetAs<std::shared_ptr<T>>(base_addr);
+      return ptr->get();
+    } else if (IsRawPtr()) {
+      const T* const* ptr = GetOffsetAs<T* const>(base_addr);
+      return *ptr;
+    } else {
+      return GetOffsetAs<T>(base_addr);
+    }
+  }
+
+  // Returns the underlying pointer for the type at base_addr
+  // The value returned is the underlying "raw" pointer, offset from base.
+  template <typename T>
+  T* AsRawPointer(void* base_addr) const {
+    if (base_addr == nullptr) {
+      return nullptr;
+    }
+    if (IsUniquePtr()) {
+      auto ptr = GetOffsetAs<std::unique_ptr<T>>(base_addr);
+      return ptr->get();
+    } else if (IsSharedPtr()) {
+      auto ptr = GetOffsetAs<std::shared_ptr<T>>(base_addr);
+      return ptr->get();
+    } else if (IsRawPtr()) {
+      auto ptr = GetOffsetAs<T*>(base_addr);
+      return *ptr;
+    } else {
+      return GetOffsetAs<T>(base_addr);
+    }
+  }
+
+  // Parses the option in "opt_value" according to the rules of this class
+  // and updates the value at "opt_ptr".
+  // On success, Status::OK() is returned.  On failure:
+  // NotFound means the opt_name is not valid for this option
+  // NotSupported means we do not know how to parse the value for this option
+  // InvalidArgument means the opt_value is not valid for this option.
+  Status Parse(const ConfigOptions& config_options, const std::string& opt_name,
+               const std::string& opt_value, void* const opt_ptr) const;
+
+  // Serializes the option in "opt_addr" according to the rules of this class
+  // into the value at "opt_value".
+  Status Serialize(const ConfigOptions& config_options,
+                   const std::string& opt_name, const void* const opt_ptr,
+                   std::string* opt_value) const;
+
+  // Compares the "addr1" and "addr2" values according to the rules of this
+  // class and returns true if they match.  On a failed match, mismatch is the
+  // name of the option that failed to match.
+  bool AreEqual(const ConfigOptions& config_options,
+                const std::string& opt_name, const void* const addr1,
+                const void* const addr2, std::string* mismatch) const;
+
+  // Used to override the match rules for "ByName" options.
+  bool AreEqualByName(const ConfigOptions& config_options,
+                      const std::string& opt_name, const void* const this_ptr,
+                      const void* const that_ptr) const;
+  bool AreEqualByName(const ConfigOptions& config_options,
+                      const std::string& opt_name, const void* const this_ptr,
+                      const std::string& that_value) const;
+
+  Status Prepare(const ConfigOptions& config_options, const std::string& name,
+                 void* opt_ptr) const;
+  Status Validate(const DBOptions& db_opts, const ColumnFamilyOptions& cf_opts,
+                  const std::string& name, const void* opt_ptr) const;
+
+  // Parses the input opts_map according to the type_map for the opt_addr
+  // For each name-value pair in opts_map, find the corresponding name in
+  // type_map If the name is found:
+  //    - set the corresponding value in opt_addr, returning the status on
+  //    failure;
+  // If the name is not found:
+  //    - If unused is specified, add the name-value to unused and continue
+  //    - If ingore_unknown_options is false, return NotFound
+  // Returns OK if all options were either:
+  //    - Successfully set
+  //    - options were not found and ignore_unknown_options=true
+  //    - options were not found and unused was specified
+  // Note that this method is much less sophisticated than the comparable
+  // Configurable::Configure methods.  For example, on error, there is no
+  // attempt to return opt_addr to the initial state.  Additionally, there
+  // is no effort to initialize (Configurable::PrepareOptions) the object
+  // on success.  This method should typically only be used for simpler,
+  // standalone structures and not those that contain shared and embedded
+  // objects.
+  static Status ParseType(
+      const ConfigOptions& config_options, const std::string& opts_str,
+      const std::unordered_map<std::string, OptionTypeInfo>& type_map,
+      void* opt_addr,
+      std::unordered_map<std::string, std::string>* unused = nullptr);
+  static Status ParseType(
+      const ConfigOptions& config_options,
+      const std::unordered_map<std::string, std::string>& opts_map,
+      const std::unordered_map<std::string, OptionTypeInfo>& type_map,
+      void* opt_addr,
+      std::unordered_map<std::string, std::string>* unused = nullptr);
+
+  // Parses the input value according to the map for the struct at opt_addr
+  // struct_name is the name of the struct option as registered
+  // opt_name is the name of the option being evaluated.  This may
+  // be the whole struct or a sub-element of it, based on struct_name and
+  // opt_name.
+  static Status ParseStruct(
+      const ConfigOptions& config_options, const std::string& struct_name,
+      const std::unordered_map<std::string, OptionTypeInfo>* map,
+      const std::string& opt_name, const std::string& value, void* opt_addr);
+
+  // Serializes the values from opt_addr using the rules in type_map.
+  // Returns the serialized form in result.
+  // Returns OK on success or non-OK if some option could not be serialized.
+  static Status SerializeType(
+      const ConfigOptions& config_options,
+      const std::unordered_map<std::string, OptionTypeInfo>& type_map,
+      const void* opt_addr, std::string* value);
+
+  // Serializes the input addr according to the map for the struct to value.
+  // struct_name is the name of the struct option as registered
+  // opt_name is the name of the option being evaluated.  This may
+  // be the whole struct or a sub-element of it
+  static Status SerializeStruct(
+      const ConfigOptions& config_options, const std::string& struct_name,
+      const std::unordered_map<std::string, OptionTypeInfo>* map,
+      const std::string& opt_name, const void* opt_addr, std::string* value);
+
+  // Compares the values in this_addr and that_addr using the rules in type_map.
+  // If the values are equal, returns true
+  // If the values are not equal, returns false and sets mismatch to the name
+  // of the first value that did not match.
+  static bool TypesAreEqual(
+      const ConfigOptions& config_options,
+      const std::unordered_map<std::string, OptionTypeInfo>& map,
+      const void* this_addr, const void* that_addr, std::string* mismatch);
+
+  // Compares the input offsets according to the map for the struct and returns
+  // true if they are equivalent, false otherwise.
+  // struct_name is the name of the struct option as registered
+  // opt_name is the name of the option being evaluated.  This may
+  // be the whole struct or a sub-element of it
+  static bool StructsAreEqual(
+      const ConfigOptions& config_options, const std::string& struct_name,
+      const std::unordered_map<std::string, OptionTypeInfo>* map,
+      const std::string& opt_name, const void* this_offset,
+      const void* that_offset, std::string* mismatch);
+
+  // Finds the entry for the opt_name in the opt_map, returning
+  // nullptr if not found.
+  // If found, elem_name will be the name of option to find.
+  // This may be opt_name, or a substring of opt_name.
+  // For "simple" options, opt_name will be equal to elem_name.  Given the
+  // opt_name "opt", elem_name will equal "opt".
+  // For "embedded" options (like structs), elem_name may be opt_name
+  // or a field within the opt_name.  For example, given the struct "struct",
+  // and opt_name of "struct.field", elem_name will be "field"
+  static const OptionTypeInfo* Find(
+      const std::string& opt_name,
+      const std::unordered_map<std::string, OptionTypeInfo>& opt_map,
+      std::string* elem_name);
+
+  // Returns the next token marked by the delimiter from "opts" after start in
+  // token and updates end to point to where that token stops. Delimiters inside
+  // of braces are ignored. Returns OK if a token is found and an error if the
+  // input opts string is mis-formatted.
+  // Given "a=AA;b=BB;" start=2 and delimiter=";", token is "AA" and end points
+  // to "b" Given "{a=A;b=B}", the token would be "a=A;b=B"
+  //
+  // @param opts The string in which to find the next token
+  // @param delimiter The delimiter between tokens
+  // @param start     The position in opts to start looking for the token
+  // @param ed        Returns the end position in opts of the token
+  // @param token     Returns the token
+  // @returns OK if a token was found
+  // @return InvalidArgument if the braces mismatch
+  //          (e.g. "{a={b=c;}" ) -- missing closing brace
+  // @return InvalidArgument if an expected delimiter is not found
+  //        e.g. "{a=b}c=d;" -- missing delimiter before "c"
+  static Status NextToken(const std::string& opts, char delimiter, size_t start,
+                          size_t* end, std::string* token);
+
+  constexpr static const char* kIdPropName() { return "id"; }
+  constexpr static const char* kIdPropSuffix() { return ".id"; }
+
+ private:
+  int offset_;
+
+  // The optional function to convert a string to its representation
+  ParseFunc parse_func_;
+
+  // The optional function to convert a value to its string representation
+  SerializeFunc serialize_func_;
+
+  // The optional function to match two option values
+  EqualsFunc equals_func_;
+
+  PrepareFunc prepare_func_;
+  ValidateFunc validate_func_;
+  OptionType type_;
+  OptionVerificationType verification_;
+  OptionTypeFlags flags_;
+};
+
+// Parses the input value into elements of the result array, which has fixed
+// array size. For example, if the value=1:2:3 and elem_info parses integers,
+// the result array will be {1,2,3}. Array size is defined in the OptionTypeInfo
+// the input value has to match with that.
+// @param config_options Controls how the option value is parsed.
+// @param elem_info Controls how individual tokens in value are parsed
+// @param separator Character separating tokens in values (':' in the above
+// example)
+// @param name      The name associated with this array option
+// @param value     The input string to parse into tokens
+// @param result    Returns the results of parsing value into its elements.
+// @return OK if the value was successfully parse
+// @return InvalidArgument if the value is improperly formed or element number
+//                          doesn't match array size defined in OptionTypeInfo
+//                          or if the token could not be parsed
+// @return NotFound         If the tokenized value contains unknown options for
+// its type
+template <typename T, size_t kSize>
+Status ParseArray(const ConfigOptions& config_options,
+                  const OptionTypeInfo& elem_info, char separator,
+                  const std::string& name, const std::string& value,
+                  std::array<T, kSize>* result) {
+  Status status;
+
+  ConfigOptions copy = config_options;
+  copy.ignore_unsupported_options = false;
+  size_t i = 0, start = 0, end = 0;
+  for (; status.ok() && i < kSize && start < value.size() &&
+         end != std::string::npos;
+       i++, start = end + 1) {
+    std::string token;
+    status = OptionTypeInfo::NextToken(value, separator, start, &end, &token);
+    if (status.ok()) {
+      status = elem_info.Parse(copy, name, token, &((*result)[i]));
+      if (config_options.ignore_unsupported_options &&
+          status.IsNotSupported()) {
+        // If we were ignoring unsupported options and this one should be
+        // ignored, ignore it by setting the status to OK
+        status = Status::OK();
+      }
+    }
+  }
+  if (!status.ok()) {
+    return status;
+  }
+  // make sure the element number matches the array size
+  if (i < kSize) {
+    return Status::InvalidArgument(
+        "Serialized value has less elements than array size", name);
+  }
+  if (start < value.size() && end != std::string::npos) {
+    return Status::InvalidArgument(
+        "Serialized value has more elements than array size", name);
+  }
+  return status;
+}
+
+// Serializes the fixed size input array into its output value.  Elements are
+// separated by the separator character.  This element will convert all of the
+// elements in array into their serialized form, using elem_info to perform the
+// serialization.
+// For example, if the array contains the integers 1,2,3 and elem_info
+// serializes the output would be 1:2:3 for separator ":".
+// @param config_options Controls how the option value is serialized.
+// @param elem_info Controls how individual tokens in value are serialized
+// @param separator Character separating tokens in value (':' in the above
+// example)
+// @param name      The name associated with this array option
+// @param array     The input array to serialize
+// @param value     The output string of serialized options
+// @return OK if the value was successfully parse
+// @return InvalidArgument if the value is improperly formed or if the token
+//                          could not be parsed
+// @return NotFound        If the tokenized value contains unknown options for
+//                          its type
+template <typename T, size_t kSize>
+Status SerializeArray(const ConfigOptions& config_options,
+                      const OptionTypeInfo& elem_info, char separator,
+                      const std::string& name,
+                      const std::array<T, kSize>& array, std::string* value) {
+  std::string result;
+  ConfigOptions embedded = config_options;
+  embedded.delimiter = ";";
+  int printed = 0;
+  for (const auto& elem : array) {
+    std::string elem_str;
+    Status s = elem_info.Serialize(embedded, name, &elem, &elem_str);
+    if (!s.ok()) {
+      return s;
+    } else if (!elem_str.empty()) {
+      if (printed++ > 0) {
+        result += separator;
+      }
+      // If the element contains embedded separators, put it inside of brackets
+      if (elem_str.find(separator) != std::string::npos) {
+        result += "{" + elem_str + "}";
+      } else {
+        result += elem_str;
+      }
+    }
+  }
+  if (result.find("=") != std::string::npos) {
+    *value = "{" + result + "}";
+  } else if (printed > 1 && result.at(0) == '{') {
+    *value = "{" + result + "}";
+  } else {
+    *value = result;
+  }
+  return Status::OK();
+}
+
+// Compares the input arrays array1 and array2 for equality
+// Elements of the array are compared one by one using elem_info to perform the
+// comparison.
+//
+// @param config_options Controls how the arrays are compared.
+// @param elem_info  Controls how individual elements in the arrays are compared
+// @param name          The name associated with this array option
+// @param array1,array2 The arrays to compare.
+// @param mismatch      If the arrays are not equivalent, mismatch will point to
+//                       the first element of the comparison that did not match.
+// @return true         If vec1 and vec2 are "equal", false otherwise
+template <typename T, size_t kSize>
+bool ArraysAreEqual(const ConfigOptions& config_options,
+                    const OptionTypeInfo& elem_info, const std::string& name,
+                    const std::array<T, kSize>& array1,
+                    const std::array<T, kSize>& array2, std::string* mismatch) {
+  assert(array1.size() == kSize);
+  assert(array2.size() == kSize);
+  for (size_t i = 0; i < kSize; ++i) {
+    if (!elem_info.AreEqual(config_options, name, &array1[i], &array2[i],
+                            mismatch)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Parses the input value into elements of the result vector.  This method
+// will break the input value into the individual tokens (based on the
+// separator), where each of those tokens will be parsed based on the rules of
+// elem_info. The result vector will be populated with elements based on the
+// input tokens. For example, if the value=1:2:3:4:5 and elem_info parses
+// integers, the result vector will contain the integers 1,2,3,4,5
+// @param config_options Controls how the option value is parsed.
+// @param elem_info Controls how individual tokens in value are parsed
+// @param separator Character separating tokens in values (':' in the above
+// example)
+// @param name      The name associated with this vector option
+// @param value     The input string to parse into tokens
+// @param result    Returns the results of parsing value into its elements.
+// @return OK if the value was successfully parse
+// @return InvalidArgument if the value is improperly formed or if the token
+//                          could not be parsed
+// @return NotFound         If the tokenized value contains unknown options for
+// its type
+template <typename T>
+Status ParseVector(const ConfigOptions& config_options,
+                   const OptionTypeInfo& elem_info, char separator,
+                   const std::string& name, const std::string& value,
+                   std::vector<T>* result) {
+  result->clear();
+  Status status;
+
+  // Turn off ignore_unknown_objects so we can tell if the returned
+  // object is valid or not.
+  ConfigOptions copy = config_options;
+  copy.ignore_unsupported_options = false;
+  for (size_t start = 0, end = 0;
+       status.ok() && start < value.size() && end != std::string::npos;
+       start = end + 1) {
+    std::string token;
+    status = OptionTypeInfo::NextToken(value, separator, start, &end, &token);
+    if (status.ok()) {
+      T elem;
+      status = elem_info.Parse(copy, name, token, &elem);
+      if (status.ok()) {
+        result->emplace_back(elem);
+      } else if (config_options.ignore_unsupported_options &&
+                 status.IsNotSupported()) {
+        // If we were ignoring unsupported options and this one should be
+        // ignored, ignore it by setting the status to OK
+        status = Status::OK();
+      }
+    }
+  }
+  return status;
+}
+
+// Serializes the input vector into its output value.  Elements are
+// separated by the separator character.  This element will convert all of the
+// elements in vec into their serialized form, using elem_info to perform the
+// serialization.
+// For example, if the vec contains the integers 1,2,3,4,5 and elem_info
+// serializes the output would be 1:2:3:4:5 for separator ":".
+// @param config_options Controls how the option value is serialized.
+// @param elem_info Controls how individual tokens in value are serialized
+// @param separator Character separating tokens in value (':' in the above
+// example)
+// @param name      The name associated with this vector option
+// @param vec       The input vector to serialize
+// @param value     The output string of serialized options
+// @return OK if the value was successfully parse
+// @return InvalidArgument if the value is improperly formed or if the token
+//                          could not be parsed
+// @return NotFound         If the tokenized value contains unknown options for
+// its type
+template <typename T>
+Status SerializeVector(const ConfigOptions& config_options,
+                       const OptionTypeInfo& elem_info, char separator,
+                       const std::string& name, const std::vector<T>& vec,
+                       std::string* value) {
+  std::string result;
+  ConfigOptions embedded = config_options;
+  embedded.delimiter = ";";
+  int printed = 0;
+  for (const auto& elem : vec) {
+    std::string elem_str;
+    Status s = elem_info.Serialize(embedded, name, &elem, &elem_str);
+    if (!s.ok()) {
+      return s;
+    } else if (!elem_str.empty()) {
+      if (printed++ > 0) {
+        result += separator;
+      }
+      // If the element contains embedded separators, put it inside of brackets
+      if (elem_str.find(separator) != std::string::npos) {
+        result += "{" + elem_str + "}";
+      } else {
+        result += elem_str;
+      }
+    }
+  }
+  if (result.find("=") != std::string::npos) {
+    *value = "{" + result + "}";
+  } else if (printed > 1 && result.at(0) == '{') {
+    *value = "{" + result + "}";
+  } else {
+    *value = result;
+  }
+  return Status::OK();
+}
+
+// Compares the input vectors vec1 and vec2 for equality
+// If the vectors are the same size, elements of the vectors are compared one by
+// one using elem_info to perform the comparison.
+//
+// @param config_options Controls how the vectors are compared.
+// @param elem_info Controls how individual elements in the vectors are compared
+// @param name      The name associated with this vector option
+// @param vec1,vec2 The vectors to compare.
+// @param mismatch  If the vectors are not equivalent, mismatch will point to
+// the first
+//                  element of the comparison that did not match.
+// @return true     If vec1 and vec2 are "equal", false otherwise
+template <typename T>
+bool VectorsAreEqual(const ConfigOptions& config_options,
+                     const OptionTypeInfo& elem_info, const std::string& name,
+                     const std::vector<T>& vec1, const std::vector<T>& vec2,
+                     std::string* mismatch) {
+  if (vec1.size() != vec2.size()) {
+    *mismatch = name;
+    return false;
+  } else {
+    for (size_t i = 0; i < vec1.size(); ++i) {
+      if (!elem_info.AreEqual(
+              config_options, name, reinterpret_cast<const char*>(&vec1[i]),
+              reinterpret_cast<const char*>(&vec2[i]), mismatch)) {
+        return false;
+      }
+    }
+    return true;
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/options_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/options_util.h
new file mode 100644
index 0000000000..8d9488f5f8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/options_util.h
@@ -0,0 +1,105 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// This file contains utility functions for RocksDB Options.
+#pragma once
+
+
+#include <string>
+#include <vector>
+
+#include "rocksdb/convenience.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+struct ConfigOptions;
+// Constructs the DBOptions and ColumnFamilyDescriptors by loading the
+// latest RocksDB options file stored in the specified rocksdb database.
+//
+// Note that the all the pointer options (except table_factory, which will
+// be described in more details below) will be initialized with the default
+// values.  Developers can further initialize them after this function call.
+// Below is an example list of pointer options which will be initialized
+//
+// * env
+// * memtable_factory
+// * compaction_filter_factory
+// * prefix_extractor
+// * comparator
+// * merge_operator
+// * compaction_filter
+//
+// User can also choose to load customized comparator, env, and/or
+// merge_operator through object registry:
+// * comparator needs to be registered through Registrar<const Comparator>
+// * env needs to be registered through Registrar<Env>
+// * merge operator needs to be registered through
+//     Registrar<std::shared_ptr<MergeOperator>>.
+//
+// For table_factory, this function further supports deserializing
+// BlockBasedTableFactory and its BlockBasedTableOptions except the
+// pointer options of BlockBasedTableOptions (flush_block_policy_factory,
+// block_cache, and block_cache_compressed), which will be initialized with
+// default values.  Developers can further specify these three options by
+// casting the return value of TableFactory::GetOptions() to
+// BlockBasedTableOptions and making necessary changes.
+//
+// config_options contains a set of options that controls the processing
+// of the options.
+//
+// config_options.ignore_unknown_options can be set to true if you want to
+// ignore options that are from a newer version of the db, essentially for
+// forward compatibility.
+//
+// examples/options_file_example.cc demonstrates how to use this function
+// to open a RocksDB instance.
+//
+// @return the function returns an OK status when it went successfully.  If
+//     the specified "dbpath" does not contain any option file, then a
+//     Status::NotFound will be returned.  A return value other than
+//     Status::OK or Status::NotFound indicates there is some error related
+//     to the options file itself.
+//
+// @see LoadOptionsFromFile
+Status LoadLatestOptions(const ConfigOptions& config_options,
+                         const std::string& dbpath, DBOptions* db_options,
+                         std::vector<ColumnFamilyDescriptor>* cf_descs,
+                         std::shared_ptr<Cache>* cache = {});
+
+// Similar to LoadLatestOptions, this function constructs the DBOptions
+// and ColumnFamilyDescriptors based on the specified RocksDB Options file.
+//
+// @see LoadLatestOptions
+Status LoadOptionsFromFile(const ConfigOptions& config_options,
+                           const std::string& options_file_name,
+                           DBOptions* db_options,
+                           std::vector<ColumnFamilyDescriptor>* cf_descs,
+                           std::shared_ptr<Cache>* cache = {});
+
+// Returns the latest options file name under the specified db path.
+Status GetLatestOptionsFileName(const std::string& dbpath, Env* env,
+                                std::string* options_file_name);
+
+// Returns Status::OK if the input DBOptions and ColumnFamilyDescriptors
+// are compatible with the latest options stored in the specified DB path.
+//
+// If the return status is non-ok, it means the specified RocksDB instance
+// might not be correctly opened with the input set of options.  Currently,
+// changing one of the following options will fail the compatibility check:
+//
+// * comparator
+// * prefix_extractor
+// * table_factory
+// * merge_operator
+// * persist_user_defined_timestamps
+Status CheckOptionsCompatibility(
+    const ConfigOptions& config_options, const std::string& dbpath,
+    const DBOptions& db_options,
+    const std::vector<ColumnFamilyDescriptor>& cf_descs);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/replayer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/replayer.h
new file mode 100644
index 0000000000..fc5319989b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/replayer.h
@@ -0,0 +1,85 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <functional>
+#include <memory>
+
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class TraceRecord;
+class TraceRecordResult;
+
+struct ReplayOptions {
+  // Number of threads used for replaying. If 0 or 1, replay using
+  // single thread.
+  uint32_t num_threads;
+
+  // Enables fast forwarding a replay by increasing/reducing the delay between
+  // the ingested traces.
+  //   If > 0.0 and < 1.0, slow down the replay by this amount.
+  //   If 1.0, replay the operations at the same rate as in the trace stream.
+  //   If > 1, speed up the replay by this amount.
+  double fast_forward;
+
+  ReplayOptions() : num_threads(1), fast_forward(1.0) {}
+
+  ReplayOptions(uint32_t num_of_threads, double fast_forward_ratio)
+      : num_threads(num_of_threads), fast_forward(fast_forward_ratio) {}
+};
+
+// Replayer helps to replay the captured RocksDB query level operations.
+// The Replayer can either be created from DB::NewReplayer method, or be
+// instantiated via db_bench today, on using "replay" benchmark.
+class Replayer {
+ public:
+  virtual ~Replayer() = default;
+
+  // Make some preparation before replaying the trace. This will also reset the
+  // replayer in order to restart replaying.
+  virtual Status Prepare() = 0;
+
+  // Return the timestamp when the trace recording was started.
+  virtual uint64_t GetHeaderTimestamp() const = 0;
+
+  // Atomically read one trace into a TraceRecord (excluding the header and
+  // footer traces).
+  // Return Status::OK() on success;
+  // Status::Incomplete() if Prepare() was not called or no more available
+  // trace;
+  // Status::NotSupported() if the read trace type is not supported.
+  virtual Status Next(std::unique_ptr<TraceRecord>* record) = 0;
+
+  // Execute one TraceRecord.
+  // Return Status::OK() if the execution was successful. Get/MultiGet traces
+  // will still return Status::OK() even if they got Status::NotFound()
+  // from DB::Get() or DB::MultiGet();
+  // Status::Incomplete() if Prepare() was not called or no more available
+  // trace;
+  // Status::NotSupported() if the operation is not supported;
+  // Otherwise, return the corresponding error status.
+  //
+  // The actual operation execution status and result(s) will be saved in
+  // result. For example, a GetQueryTraceRecord will have its DB::Get() status
+  // and the returned value saved in a SingleValueTraceExecutionResult.
+  virtual Status Execute(const std::unique_ptr<TraceRecord>& record,
+                         std::unique_ptr<TraceRecordResult>* result) = 0;
+
+  // Replay all the traces from the provided trace stream, taking the delay
+  // between the traces into consideration.
+  //
+  // result_callback reports the status of executing a trace record, and the
+  // actual operation execution result (See the description for Execute()).
+  virtual Status Replay(
+      const ReplayOptions& options,
+      const std::function<void(Status, std::unique_ptr<TraceRecordResult>&&)>&
+          result_callback) = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/sim_cache.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/sim_cache.h
new file mode 100644
index 0000000000..6c52453e7e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/sim_cache.h
@@ -0,0 +1,93 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+
+#include "rocksdb/advanced_cache.h"
+#include "rocksdb/env.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class SimCache;
+
+// For instrumentation purpose, use NewSimCache instead of NewLRUCache API
+// NewSimCache is a wrapper function returning a SimCache instance that can
+// have additional interface provided in Simcache class besides Cache interface
+// to predict block cache hit rate without actually allocating the memory. It
+// can help users tune their current block cache size, and determine how
+// efficient they are using the memory.
+//
+// Since GetSimCapacity() returns the capacity for simulation, it differs from
+// actual memory usage, which can be estimated as:
+// sim_capacity * entry_size / (entry_size + block_size),
+// where 76 <= entry_size <= 104,
+// BlockBasedTableOptions.block_size = 4096 by default but is configurable,
+// Therefore, generally the actual memory overhead of SimCache is Less than
+// sim_capacity * 2%
+extern std::shared_ptr<SimCache> NewSimCache(std::shared_ptr<Cache> cache,
+                                             size_t sim_capacity,
+                                             int num_shard_bits);
+
+extern std::shared_ptr<SimCache> NewSimCache(std::shared_ptr<Cache> sim_cache,
+                                             std::shared_ptr<Cache> cache,
+                                             int num_shard_bits);
+
+// An abstract base class (public interface) to the SimCache implementation
+class SimCache : public CacheWrapper {
+ public:
+  using CacheWrapper::CacheWrapper;
+
+  // returns the maximum configured capacity of the simcache for simulation
+  virtual size_t GetSimCapacity() const = 0;
+
+  // simcache doesn't provide internal handler reference to user, so always
+  // PinnedUsage = 0 and the behavior will be not exactly consistent the
+  // with real cache.
+  // returns the memory size for the entries residing in the simcache.
+  virtual size_t GetSimUsage() const = 0;
+
+  // sets the maximum configured capacity of the simcache. When the new
+  // capacity is less than the old capacity and the existing usage is
+  // greater than new capacity, the implementation will purge old entries
+  // to fit new capacity.
+  virtual void SetSimCapacity(size_t capacity) = 0;
+
+  // returns the lookup times of simcache
+  virtual uint64_t get_miss_counter() const = 0;
+  // returns the hit times of simcache
+  virtual uint64_t get_hit_counter() const = 0;
+  // reset the lookup and hit counters
+  virtual void reset_counter() = 0;
+  // String representation of the statistics of the simcache
+  virtual std::string ToString() const = 0;
+
+  // Start storing logs of the cache activity (Add/Lookup) into
+  // a file located at activity_log_file, max_logging_size option can be used to
+  // stop logging to the file automatically after reaching a specific size in
+  // bytes, a values of 0 disable this feature
+  virtual Status StartActivityLogging(const std::string& activity_log_file,
+                                      Env* env,
+                                      uint64_t max_logging_size = 0) = 0;
+
+  // Stop cache activity logging if any
+  virtual void StopActivityLogging() = 0;
+
+  // Status of cache logging happening in background
+  virtual Status GetActivityLoggingStatus() = 0;
+
+ private:
+  SimCache(const SimCache&);
+  SimCache& operator=(const SimCache&);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/stackable_db.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/stackable_db.h
new file mode 100644
index 0000000000..1a87a11366
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/stackable_db.h
@@ -0,0 +1,587 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <map>
+#include <memory>
+#include <string>
+
+#include "rocksdb/db.h"
+
+#ifdef _WIN32
+// Windows API macro interference
+#undef DeleteFile
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+// This class contains APIs to stack rocksdb wrappers.Eg. Stack TTL over base d
+class StackableDB : public DB {
+ public:
+  // StackableDB take sole ownership of the underlying db.
+  explicit StackableDB(DB* db) : db_(db) {}
+
+  // StackableDB take shared ownership of the underlying db.
+  explicit StackableDB(std::shared_ptr<DB> db)
+      : db_(db.get()), shared_db_ptr_(db) {}
+
+  ~StackableDB() {
+    if (shared_db_ptr_ == nullptr) {
+      delete db_;
+    } else {
+      assert(shared_db_ptr_.get() == db_);
+    }
+    db_ = nullptr;
+  }
+
+  virtual Status Close() override { return db_->Close(); }
+
+  virtual DB* GetBaseDB() { return db_; }
+
+  virtual DB* GetRootDB() override { return db_->GetRootDB(); }
+
+  virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
+                                    const std::string& column_family_name,
+                                    ColumnFamilyHandle** handle) override {
+    return db_->CreateColumnFamily(options, column_family_name, handle);
+  }
+
+  virtual Status CreateColumnFamilies(
+      const ColumnFamilyOptions& options,
+      const std::vector<std::string>& column_family_names,
+      std::vector<ColumnFamilyHandle*>* handles) override {
+    return db_->CreateColumnFamilies(options, column_family_names, handles);
+  }
+
+  virtual Status CreateColumnFamilies(
+      const std::vector<ColumnFamilyDescriptor>& column_families,
+      std::vector<ColumnFamilyHandle*>* handles) override {
+    return db_->CreateColumnFamilies(column_families, handles);
+  }
+
+  virtual Status DropColumnFamily(ColumnFamilyHandle* column_family) override {
+    return db_->DropColumnFamily(column_family);
+  }
+
+  virtual Status DropColumnFamilies(
+      const std::vector<ColumnFamilyHandle*>& column_families) override {
+    return db_->DropColumnFamilies(column_families);
+  }
+
+  virtual Status DestroyColumnFamilyHandle(
+      ColumnFamilyHandle* column_family) override {
+    return db_->DestroyColumnFamilyHandle(column_family);
+  }
+
+  using DB::Put;
+  virtual Status Put(const WriteOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     const Slice& val) override {
+    return db_->Put(options, column_family, key, val);
+  }
+  Status Put(const WriteOptions& options, ColumnFamilyHandle* column_family,
+             const Slice& key, const Slice& ts, const Slice& val) override {
+    return db_->Put(options, column_family, key, ts, val);
+  }
+
+  using DB::PutEntity;
+  Status PutEntity(const WriteOptions& options,
+                   ColumnFamilyHandle* column_family, const Slice& key,
+                   const WideColumns& columns) override {
+    return db_->PutEntity(options, column_family, key, columns);
+  }
+
+  using DB::Get;
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     PinnableSlice* value) override {
+    return db_->Get(options, column_family, key, value);
+  }
+
+  using DB::GetEntity;
+  Status GetEntity(const ReadOptions& options,
+                   ColumnFamilyHandle* column_family, const Slice& key,
+                   PinnableWideColumns* columns) override {
+    return db_->GetEntity(options, column_family, key, columns);
+  }
+
+  using DB::GetMergeOperands;
+  virtual Status GetMergeOperands(
+      const ReadOptions& options, ColumnFamilyHandle* column_family,
+      const Slice& key, PinnableSlice* slice,
+      GetMergeOperandsOptions* get_merge_operands_options,
+      int* number_of_operands) override {
+    return db_->GetMergeOperands(options, column_family, key, slice,
+                                 get_merge_operands_options,
+                                 number_of_operands);
+  }
+
+  using DB::MultiGet;
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_family,
+      const std::vector<Slice>& keys,
+      std::vector<std::string>* values) override {
+    return db_->MultiGet(options, column_family, keys, values);
+  }
+
+  virtual void MultiGet(const ReadOptions& options,
+                        ColumnFamilyHandle* column_family,
+                        const size_t num_keys, const Slice* keys,
+                        PinnableSlice* values, Status* statuses,
+                        const bool sorted_input = false) override {
+    return db_->MultiGet(options, column_family, num_keys, keys, values,
+                         statuses, sorted_input);
+  }
+
+  using DB::MultiGetEntity;
+
+  void MultiGetEntity(const ReadOptions& options,
+                      ColumnFamilyHandle* column_family, size_t num_keys,
+                      const Slice* keys, PinnableWideColumns* results,
+                      Status* statuses, bool sorted_input) override {
+    db_->MultiGetEntity(options, column_family, num_keys, keys, results,
+                        statuses, sorted_input);
+  }
+
+  void MultiGetEntity(const ReadOptions& options, size_t num_keys,
+                      ColumnFamilyHandle** column_families, const Slice* keys,
+                      PinnableWideColumns* results, Status* statuses,
+                      bool sorted_input) override {
+    db_->MultiGetEntity(options, num_keys, column_families, keys, results,
+                        statuses, sorted_input);
+  }
+
+  using DB::IngestExternalFile;
+  virtual Status IngestExternalFile(
+      ColumnFamilyHandle* column_family,
+      const std::vector<std::string>& external_files,
+      const IngestExternalFileOptions& options) override {
+    return db_->IngestExternalFile(column_family, external_files, options);
+  }
+
+  using DB::IngestExternalFiles;
+  virtual Status IngestExternalFiles(
+      const std::vector<IngestExternalFileArg>& args) override {
+    return db_->IngestExternalFiles(args);
+  }
+
+  using DB::CreateColumnFamilyWithImport;
+  virtual Status CreateColumnFamilyWithImport(
+      const ColumnFamilyOptions& options, const std::string& column_family_name,
+      const ImportColumnFamilyOptions& import_options,
+      const ExportImportFilesMetaData& metadata,
+      ColumnFamilyHandle** handle) override {
+    return db_->CreateColumnFamilyWithImport(options, column_family_name,
+                                             import_options, metadata, handle);
+  }
+
+  using DB::ClipColumnFamily;
+  virtual Status ClipColumnFamily(ColumnFamilyHandle* column_family,
+                                  const Slice& begin_key,
+                                  const Slice& end_key) override {
+    return db_->ClipColumnFamily(column_family, begin_key, end_key);
+  }
+
+  using DB::VerifyFileChecksums;
+  Status VerifyFileChecksums(const ReadOptions& read_opts) override {
+    return db_->VerifyFileChecksums(read_opts);
+  }
+
+  virtual Status VerifyChecksum() override { return db_->VerifyChecksum(); }
+
+  virtual Status VerifyChecksum(const ReadOptions& options) override {
+    return db_->VerifyChecksum(options);
+  }
+
+  using DB::KeyMayExist;
+  virtual bool KeyMayExist(const ReadOptions& options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           std::string* value,
+                           bool* value_found = nullptr) override {
+    return db_->KeyMayExist(options, column_family, key, value, value_found);
+  }
+
+  using DB::Delete;
+  virtual Status Delete(const WriteOptions& wopts,
+                        ColumnFamilyHandle* column_family,
+                        const Slice& key) override {
+    return db_->Delete(wopts, column_family, key);
+  }
+  Status Delete(const WriteOptions& wopts, ColumnFamilyHandle* column_family,
+                const Slice& key, const Slice& ts) override {
+    return db_->Delete(wopts, column_family, key, ts);
+  }
+
+  using DB::SingleDelete;
+  virtual Status SingleDelete(const WriteOptions& wopts,
+                              ColumnFamilyHandle* column_family,
+                              const Slice& key) override {
+    return db_->SingleDelete(wopts, column_family, key);
+  }
+  Status SingleDelete(const WriteOptions& wopts,
+                      ColumnFamilyHandle* column_family, const Slice& key,
+                      const Slice& ts) override {
+    return db_->SingleDelete(wopts, column_family, key, ts);
+  }
+
+  using DB::DeleteRange;
+  Status DeleteRange(const WriteOptions& wopts,
+                     ColumnFamilyHandle* column_family, const Slice& start_key,
+                     const Slice& end_key) override {
+    return db_->DeleteRange(wopts, column_family, start_key, end_key);
+  }
+
+  using DB::Merge;
+  virtual Status Merge(const WriteOptions& options,
+                       ColumnFamilyHandle* column_family, const Slice& key,
+                       const Slice& value) override {
+    return db_->Merge(options, column_family, key, value);
+  }
+  Status Merge(const WriteOptions& options, ColumnFamilyHandle* column_family,
+               const Slice& key, const Slice& ts, const Slice& value) override {
+    return db_->Merge(options, column_family, key, ts, value);
+  }
+
+  virtual Status Write(const WriteOptions& opts, WriteBatch* updates) override {
+    return db_->Write(opts, updates);
+  }
+
+  using DB::NewIterator;
+  virtual Iterator* NewIterator(const ReadOptions& opts,
+                                ColumnFamilyHandle* column_family) override {
+    return db_->NewIterator(opts, column_family);
+  }
+
+  virtual Status NewIterators(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_families,
+      std::vector<Iterator*>* iterators) override {
+    return db_->NewIterators(options, column_families, iterators);
+  }
+
+  virtual const Snapshot* GetSnapshot() override { return db_->GetSnapshot(); }
+
+  virtual void ReleaseSnapshot(const Snapshot* snapshot) override {
+    return db_->ReleaseSnapshot(snapshot);
+  }
+
+  using DB::GetMapProperty;
+  using DB::GetProperty;
+  virtual bool GetProperty(ColumnFamilyHandle* column_family,
+                           const Slice& property, std::string* value) override {
+    return db_->GetProperty(column_family, property, value);
+  }
+  virtual bool GetMapProperty(
+      ColumnFamilyHandle* column_family, const Slice& property,
+      std::map<std::string, std::string>* value) override {
+    return db_->GetMapProperty(column_family, property, value);
+  }
+
+  using DB::GetIntProperty;
+  virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
+                              const Slice& property, uint64_t* value) override {
+    return db_->GetIntProperty(column_family, property, value);
+  }
+
+  using DB::GetAggregatedIntProperty;
+  virtual bool GetAggregatedIntProperty(const Slice& property,
+                                        uint64_t* value) override {
+    return db_->GetAggregatedIntProperty(property, value);
+  }
+
+  using DB::GetApproximateSizes;
+  virtual Status GetApproximateSizes(const SizeApproximationOptions& options,
+                                     ColumnFamilyHandle* column_family,
+                                     const Range* r, int n,
+                                     uint64_t* sizes) override {
+    return db_->GetApproximateSizes(options, column_family, r, n, sizes);
+  }
+
+  using DB::GetApproximateMemTableStats;
+  virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
+                                           const Range& range,
+                                           uint64_t* const count,
+                                           uint64_t* const size) override {
+    return db_->GetApproximateMemTableStats(column_family, range, count, size);
+  }
+
+  using DB::CompactRange;
+  virtual Status CompactRange(const CompactRangeOptions& options,
+                              ColumnFamilyHandle* column_family,
+                              const Slice* begin, const Slice* end) override {
+    return db_->CompactRange(options, column_family, begin, end);
+  }
+
+  using DB::CompactFiles;
+  virtual Status CompactFiles(
+      const CompactionOptions& compact_options,
+      ColumnFamilyHandle* column_family,
+      const std::vector<std::string>& input_file_names, const int output_level,
+      const int output_path_id = -1,
+      std::vector<std::string>* const output_file_names = nullptr,
+      CompactionJobInfo* compaction_job_info = nullptr) override {
+    return db_->CompactFiles(compact_options, column_family, input_file_names,
+                             output_level, output_path_id, output_file_names,
+                             compaction_job_info);
+  }
+
+  virtual Status PauseBackgroundWork() override {
+    return db_->PauseBackgroundWork();
+  }
+  virtual Status ContinueBackgroundWork() override {
+    return db_->ContinueBackgroundWork();
+  }
+
+  virtual Status EnableAutoCompaction(
+      const std::vector<ColumnFamilyHandle*>& column_family_handles) override {
+    return db_->EnableAutoCompaction(column_family_handles);
+  }
+
+  virtual void EnableManualCompaction() override {
+    return db_->EnableManualCompaction();
+  }
+  virtual void DisableManualCompaction() override {
+    return db_->DisableManualCompaction();
+  }
+
+  using DB::NumberLevels;
+  virtual int NumberLevels(ColumnFamilyHandle* column_family) override {
+    return db_->NumberLevels(column_family);
+  }
+
+  using DB::MaxMemCompactionLevel;
+  virtual int MaxMemCompactionLevel(
+      ColumnFamilyHandle* column_family) override {
+    return db_->MaxMemCompactionLevel(column_family);
+  }
+
+  using DB::Level0StopWriteTrigger;
+  virtual int Level0StopWriteTrigger(
+      ColumnFamilyHandle* column_family) override {
+    return db_->Level0StopWriteTrigger(column_family);
+  }
+
+  virtual const std::string& GetName() const override { return db_->GetName(); }
+
+  virtual Env* GetEnv() const override { return db_->GetEnv(); }
+
+  virtual FileSystem* GetFileSystem() const override {
+    return db_->GetFileSystem();
+  }
+
+  using DB::GetOptions;
+  virtual Options GetOptions(ColumnFamilyHandle* column_family) const override {
+    return db_->GetOptions(column_family);
+  }
+
+  using DB::GetDBOptions;
+  virtual DBOptions GetDBOptions() const override {
+    return db_->GetDBOptions();
+  }
+
+  using DB::Flush;
+  virtual Status Flush(const FlushOptions& fopts,
+                       ColumnFamilyHandle* column_family) override {
+    return db_->Flush(fopts, column_family);
+  }
+  virtual Status Flush(
+      const FlushOptions& fopts,
+      const std::vector<ColumnFamilyHandle*>& column_families) override {
+    return db_->Flush(fopts, column_families);
+  }
+
+  virtual Status SyncWAL() override { return db_->SyncWAL(); }
+
+  virtual Status FlushWAL(bool sync) override { return db_->FlushWAL(sync); }
+
+  virtual Status LockWAL() override { return db_->LockWAL(); }
+
+  virtual Status UnlockWAL() override { return db_->UnlockWAL(); }
+
+
+  virtual Status DisableFileDeletions() override {
+    return db_->DisableFileDeletions();
+  }
+
+  virtual Status EnableFileDeletions(bool force) override {
+    return db_->EnableFileDeletions(force);
+  }
+
+  virtual void GetLiveFilesMetaData(
+      std::vector<LiveFileMetaData>* metadata) override {
+    db_->GetLiveFilesMetaData(metadata);
+  }
+
+  virtual Status GetLiveFilesChecksumInfo(
+      FileChecksumList* checksum_list) override {
+    return db_->GetLiveFilesChecksumInfo(checksum_list);
+  }
+
+  virtual Status GetLiveFilesStorageInfo(
+      const LiveFilesStorageInfoOptions& opts,
+      std::vector<LiveFileStorageInfo>* files) override {
+    return db_->GetLiveFilesStorageInfo(opts, files);
+  }
+
+  virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* column_family,
+                                       ColumnFamilyMetaData* cf_meta) override {
+    db_->GetColumnFamilyMetaData(column_family, cf_meta);
+  }
+
+  using DB::StartBlockCacheTrace;
+  Status StartBlockCacheTrace(
+      const TraceOptions& trace_options,
+      std::unique_ptr<TraceWriter>&& trace_writer) override {
+    return db_->StartBlockCacheTrace(trace_options, std::move(trace_writer));
+  }
+
+  Status StartBlockCacheTrace(
+      const BlockCacheTraceOptions& options,
+      std::unique_ptr<BlockCacheTraceWriter>&& trace_writer) override {
+    return db_->StartBlockCacheTrace(options, std::move(trace_writer));
+  }
+
+  using DB::EndBlockCacheTrace;
+  Status EndBlockCacheTrace() override { return db_->EndBlockCacheTrace(); }
+
+  using DB::StartIOTrace;
+  Status StartIOTrace(const TraceOptions& options,
+                      std::unique_ptr<TraceWriter>&& trace_writer) override {
+    return db_->StartIOTrace(options, std::move(trace_writer));
+  }
+
+  using DB::EndIOTrace;
+  Status EndIOTrace() override { return db_->EndIOTrace(); }
+
+  using DB::StartTrace;
+  Status StartTrace(const TraceOptions& options,
+                    std::unique_ptr<TraceWriter>&& trace_writer) override {
+    return db_->StartTrace(options, std::move(trace_writer));
+  }
+
+  using DB::EndTrace;
+  Status EndTrace() override { return db_->EndTrace(); }
+
+  using DB::NewDefaultReplayer;
+  Status NewDefaultReplayer(const std::vector<ColumnFamilyHandle*>& handles,
+                            std::unique_ptr<TraceReader>&& reader,
+                            std::unique_ptr<Replayer>* replayer) override {
+    return db_->NewDefaultReplayer(handles, std::move(reader), replayer);
+  }
+
+
+  virtual Status GetLiveFiles(std::vector<std::string>& vec, uint64_t* mfs,
+                              bool flush_memtable = true) override {
+    return db_->GetLiveFiles(vec, mfs, flush_memtable);
+  }
+
+  virtual SequenceNumber GetLatestSequenceNumber() const override {
+    return db_->GetLatestSequenceNumber();
+  }
+
+  Status IncreaseFullHistoryTsLow(ColumnFamilyHandle* column_family,
+                                  std::string ts_low) override {
+    return db_->IncreaseFullHistoryTsLow(column_family, ts_low);
+  }
+
+  Status GetFullHistoryTsLow(ColumnFamilyHandle* column_family,
+                             std::string* ts_low) override {
+    return db_->GetFullHistoryTsLow(column_family, ts_low);
+  }
+
+  virtual Status GetSortedWalFiles(VectorLogPtr& files) override {
+    return db_->GetSortedWalFiles(files);
+  }
+
+  virtual Status GetCurrentWalFile(
+      std::unique_ptr<LogFile>* current_log_file) override {
+    return db_->GetCurrentWalFile(current_log_file);
+  }
+
+  virtual Status GetCreationTimeOfOldestFile(uint64_t* creation_time) override {
+    return db_->GetCreationTimeOfOldestFile(creation_time);
+  }
+
+  // WARNING: This API is planned for removal in RocksDB 7.0 since it does not
+  // operate at the proper level of abstraction for a key-value store, and its
+  // contract/restrictions are poorly documented. For example, it returns non-OK
+  // `Status` for non-bottommost files and files undergoing compaction. Since we
+  // do not plan to maintain it, the contract will likely remain underspecified
+  // until its removal. Any user is encouraged to read the implementation
+  // carefully and migrate away from it when possible.
+  virtual Status DeleteFile(std::string name) override {
+    return db_->DeleteFile(name);
+  }
+
+  virtual Status GetDbIdentity(std::string& identity) const override {
+    return db_->GetDbIdentity(identity);
+  }
+
+  virtual Status GetDbSessionId(std::string& session_id) const override {
+    return db_->GetDbSessionId(session_id);
+  }
+
+  using DB::SetOptions;
+  virtual Status SetOptions(ColumnFamilyHandle* column_family_handle,
+                            const std::unordered_map<std::string, std::string>&
+                                new_options) override {
+    return db_->SetOptions(column_family_handle, new_options);
+  }
+
+  virtual Status SetDBOptions(
+      const std::unordered_map<std::string, std::string>& new_options)
+      override {
+    return db_->SetDBOptions(new_options);
+  }
+
+  using DB::ResetStats;
+  virtual Status ResetStats() override { return db_->ResetStats(); }
+
+  using DB::GetPropertiesOfAllTables;
+  virtual Status GetPropertiesOfAllTables(
+      ColumnFamilyHandle* column_family,
+      TablePropertiesCollection* props) override {
+    return db_->GetPropertiesOfAllTables(column_family, props);
+  }
+
+  using DB::GetPropertiesOfTablesInRange;
+  virtual Status GetPropertiesOfTablesInRange(
+      ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
+      TablePropertiesCollection* props) override {
+    return db_->GetPropertiesOfTablesInRange(column_family, range, n, props);
+  }
+
+  virtual Status GetUpdatesSince(
+      SequenceNumber seq_number, std::unique_ptr<TransactionLogIterator>* iter,
+      const TransactionLogIterator::ReadOptions& read_options) override {
+    return db_->GetUpdatesSince(seq_number, iter, read_options);
+  }
+
+  virtual Status SuggestCompactRange(ColumnFamilyHandle* column_family,
+                                     const Slice* begin,
+                                     const Slice* end) override {
+    return db_->SuggestCompactRange(column_family, begin, end);
+  }
+
+  virtual Status PromoteL0(ColumnFamilyHandle* column_family,
+                           int target_level) override {
+    return db_->PromoteL0(column_family, target_level);
+  }
+
+  virtual ColumnFamilyHandle* DefaultColumnFamily() const override {
+    return db_->DefaultColumnFamily();
+  }
+
+  Status TryCatchUpWithPrimary() override {
+    return db_->TryCatchUpWithPrimary();
+  }
+
+ protected:
+  DB* db_;
+  std::shared_ptr<DB> shared_db_ptr_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/table_properties_collectors.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/table_properties_collectors.h
new file mode 100644
index 0000000000..f9d8d5dcdd
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/table_properties_collectors.h
@@ -0,0 +1,88 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <atomic>
+#include <memory>
+
+#include "rocksdb/table_properties.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A factory of a table property collector that marks a SST
+// file as need-compaction when it observe at least "D" deletion
+// entries in any "N" consecutive entries or the ratio of tombstone
+// entries in the whole file >= the specified deletion ratio.
+class CompactOnDeletionCollectorFactory
+    : public TablePropertiesCollectorFactory {
+ public:
+  // A factory of a table property collector that marks a SST
+  // file as need-compaction when it observe at least "D" deletion
+  // entries in any "N" consecutive entries, or the ratio of tombstone
+  // entries >= deletion_ratio.
+  //
+  // @param sliding_window_size "N"
+  // @param deletion_trigger "D"
+  // @param deletion_ratio, if <= 0 or > 1, disable triggering compaction
+  //     based on deletion ratio.
+  CompactOnDeletionCollectorFactory(size_t sliding_window_size,
+                                    size_t deletion_trigger,
+                                    double deletion_ratio);
+
+  ~CompactOnDeletionCollectorFactory() {}
+
+  TablePropertiesCollector* CreateTablePropertiesCollector(
+      TablePropertiesCollectorFactory::Context context) override;
+
+  // Change the value of sliding_window_size "N"
+  // Setting it to 0 disables the delete triggered compaction
+  void SetWindowSize(size_t sliding_window_size) {
+    sliding_window_size_.store(sliding_window_size);
+  }
+  size_t GetWindowSize() const { return sliding_window_size_.load(); }
+
+  // Change the value of deletion_trigger "D"
+  void SetDeletionTrigger(size_t deletion_trigger) {
+    deletion_trigger_.store(deletion_trigger);
+  }
+
+  size_t GetDeletionTrigger() const { return deletion_trigger_.load(); }
+  // Change deletion ratio.
+  // @param deletion_ratio, if <= 0 or > 1, disable triggering compaction
+  //     based on deletion ratio.
+  void SetDeletionRatio(double deletion_ratio) {
+    deletion_ratio_.store(deletion_ratio);
+  }
+
+  double GetDeletionRatio() const { return deletion_ratio_.load(); }
+  static const char* kClassName() { return "CompactOnDeletionCollector"; }
+  const char* Name() const override { return kClassName(); }
+
+  std::string ToString() const override;
+
+ private:
+  std::atomic<size_t> sliding_window_size_;
+  std::atomic<size_t> deletion_trigger_;
+  std::atomic<double> deletion_ratio_;
+};
+
+// Creates a factory of a table property collector that marks a SST
+// file as need-compaction when it observe at least "D" deletion
+// entries in any "N" consecutive entries, or the ratio of tombstone
+// entries >= deletion_ratio.
+//
+// @param sliding_window_size "N". Note that this number will be
+//     round up to the smallest multiple of 128 that is no less
+//     than the specified size.
+// @param deletion_trigger "D".  Note that even when "N" is changed,
+//     the specified number for "D" will not be changed.
+// @param deletion_ratio, if <= 0 or > 1, disable triggering compaction
+//     based on deletion ratio. Disabled by default.
+extern std::shared_ptr<CompactOnDeletionCollectorFactory>
+NewCompactOnDeletionCollectorFactory(size_t sliding_window_size,
+                                     size_t deletion_trigger,
+                                     double deletion_ratio = 0);
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/transaction.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/transaction.h
new file mode 100644
index 0000000000..947fcec55a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/transaction.h
@@ -0,0 +1,683 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "rocksdb/comparator.h"
+#include "rocksdb/db.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Iterator;
+class TransactionDB;
+class WriteBatchWithIndex;
+
+using TransactionName = std::string;
+
+using TransactionID = uint64_t;
+
+using TxnTimestamp = uint64_t;
+
+constexpr TxnTimestamp kMaxTxnTimestamp =
+    std::numeric_limits<TxnTimestamp>::max();
+
+/*
+  class Endpoint allows to define prefix ranges.
+
+  Prefix ranges are introduced below.
+
+  == Basic Ranges ==
+  Let's start from basic ranges. Key Comparator defines ordering of rowkeys.
+  Then, one can specify finite closed ranges by just providing rowkeys of their
+  endpoints:
+
+    lower_endpoint <= X <= upper_endpoint
+
+  However our goal is to provide a richer set of endpoints. Read on.
+
+  == Lexicographic ordering ==
+  A lexicographic (or dictionary) ordering satisfies these criteria: If there
+  are two keys in form
+    key_a = {prefix_a, suffix_a}
+    key_b = {prefix_b, suffix_b}
+  and
+    prefix_a < prefix_b
+  then
+    key_a < key_b.
+
+  == Prefix ranges ==
+  With lexicographic ordering, one may want to define ranges in form
+
+     "prefix is $PREFIX"
+
+  which translates to a range in form
+
+    {$PREFIX, -infinity} < X < {$PREFIX, +infinity}
+
+  where -infinity will compare less than any possible suffix, and +infinity
+  will compare as greater than any possible suffix.
+
+  class Endpoint allows to define these kind of rangtes.
+
+  == Notes ==
+  BytewiseComparator and ReverseBytewiseComparator produce lexicographic
+  ordering.
+
+  The row comparison function is able to compare key prefixes. If the data
+  domain includes keys A and B, then the comparison function is able to compare
+  equal-length prefixes:
+
+    min_len= min(byte_length(A), byte_length(B));
+    cmp(Slice(A, min_len), Slice(B, min_len));  // this call is valid
+
+  == Other options ==
+  As far as MyRocks is concerned, the alternative to prefix ranges would be to
+  support both open (non-inclusive) and closed (inclusive) range endpoints.
+*/
+
+class Endpoint {
+ public:
+  Slice slice;
+
+  /*
+    true  : the key has a "+infinity" suffix. A suffix that would compare as
+            greater than any other suffix
+    false : otherwise
+  */
+  bool inf_suffix;
+
+  explicit Endpoint(const Slice& slice_arg, bool inf_suffix_arg = false)
+      : slice(slice_arg), inf_suffix(inf_suffix_arg) {}
+
+  explicit Endpoint(const char* s, bool inf_suffix_arg = false)
+      : slice(s), inf_suffix(inf_suffix_arg) {}
+
+  Endpoint(const char* s, size_t size, bool inf_suffix_arg = false)
+      : slice(s, size), inf_suffix(inf_suffix_arg) {}
+
+  Endpoint() : inf_suffix(false) {}
+};
+
+// Provides notification to the caller of SetSnapshotOnNextOperation when
+// the actual snapshot gets created
+class TransactionNotifier {
+ public:
+  virtual ~TransactionNotifier() {}
+
+  // Implement this method to receive notification when a snapshot is
+  // requested via SetSnapshotOnNextOperation.
+  // Do not take exclusive ownership of `newSnapshot` because it is shared with
+  // the underlying transaction.
+  virtual void SnapshotCreated(const Snapshot* newSnapshot) = 0;
+};
+
+// Provides BEGIN/COMMIT/ROLLBACK transactions.
+//
+// To use transactions, you must first create either an OptimisticTransactionDB
+// or a TransactionDB.  See examples/[optimistic_]transaction_example.cc for
+// more information.
+//
+// To create a transaction, use [Optimistic]TransactionDB::BeginTransaction().
+//
+// It is up to the caller to synchronize access to this object.
+//
+// See examples/transaction_example.cc for some simple examples.
+//
+// TODO(agiardullo): Not yet implemented
+//  -PerfContext statistics
+//  -Support for using Transactions with DBWithTTL
+class Transaction {
+ public:
+  // No copying allowed
+  Transaction(const Transaction&) = delete;
+  void operator=(const Transaction&) = delete;
+
+  virtual ~Transaction() {}
+
+  // If a transaction has a snapshot set, the transaction will ensure that
+  // any keys successfully written(or fetched via GetForUpdate()) have not
+  // been modified outside of this transaction since the time the snapshot was
+  // set.
+  // If a snapshot has not been set, the transaction guarantees that keys have
+  // not been modified since the time each key was first written (or fetched via
+  // GetForUpdate()).
+  //
+  // Using SetSnapshot() will provide stricter isolation guarantees at the
+  // expense of potentially more transaction failures due to conflicts with
+  // other writes.
+  //
+  // Calling SetSnapshot() has no effect on keys written before this function
+  // has been called.
+  //
+  // SetSnapshot() may be called multiple times if you would like to change
+  // the snapshot used for different operations in this transaction.
+  //
+  // Calling SetSnapshot will not affect the version of Data returned by Get()
+  // methods.  See Transaction::Get() for more details.
+  virtual void SetSnapshot() = 0;
+
+  // Similar to SetSnapshot(), but will not change the current snapshot
+  // until Put/Merge/Delete/GetForUpdate/MultigetForUpdate is called.
+  // By calling this function, the transaction will essentially call
+  // SetSnapshot() for you right before performing the next write/GetForUpdate.
+  //
+  // Calling SetSnapshotOnNextOperation() will not affect what snapshot is
+  // returned by GetSnapshot() until the next write/GetForUpdate is executed.
+  //
+  // When the snapshot is created the notifier's SnapshotCreated method will
+  // be called so that the caller can get access to the snapshot.
+  //
+  // This is an optimization to reduce the likelihood of conflicts that
+  // could occur in between the time SetSnapshot() is called and the first
+  // write/GetForUpdate operation.  Eg, this prevents the following
+  // race-condition:
+  //
+  //   txn1->SetSnapshot();
+  //                             txn2->Put("A", ...);
+  //                             txn2->Commit();
+  //   txn1->GetForUpdate(opts, "A", ...);  // FAIL!
+  //
+  // WriteCommittedTxn only: a new snapshot will be taken upon next operation,
+  // and next operation can be a Commit.
+  // TODO(yanqin) remove the "write-committed only" limitation.
+  virtual void SetSnapshotOnNextOperation(
+      std::shared_ptr<TransactionNotifier> notifier = nullptr) = 0;
+
+  // Returns the Snapshot created by the last call to SetSnapshot().
+  //
+  // REQUIRED: The returned Snapshot is only valid up until the next time
+  // SetSnapshot()/SetSnapshotOnNextSavePoint() is called, ClearSnapshot()
+  // is called, or the Transaction is deleted.
+  virtual const Snapshot* GetSnapshot() const = 0;
+
+  // Returns the Snapshot created by the last call to SetSnapshot().
+  // The returned snapshot can outlive the transaction.
+  virtual std::shared_ptr<const Snapshot> GetTimestampedSnapshot() const = 0;
+
+  // Clears the current snapshot (i.e. no snapshot will be 'set')
+  //
+  // This removes any snapshot that currently exists or is set to be created
+  // on the next update operation (SetSnapshotOnNextOperation).
+  //
+  // Calling ClearSnapshot() has no effect on keys written before this function
+  // has been called.
+  //
+  // If a reference to a snapshot was retrieved via GetSnapshot(), it will no
+  // longer be valid and should be discarded after a call to ClearSnapshot().
+  virtual void ClearSnapshot() = 0;
+
+  // Prepare the current transaction for 2PC
+  virtual Status Prepare() = 0;
+
+  // Write all batched keys to the db atomically.
+  //
+  // Returns OK on success.
+  //
+  // May return any error status that could be returned by DB:Write().
+  //
+  // If this transaction was created by an OptimisticTransactionDB(),
+  // Status::Busy() may be returned if the transaction could not guarantee
+  // that there are no write conflicts.  Status::TryAgain() may be returned
+  // if the memtable history size is not large enough
+  //  (See max_write_buffer_size_to_maintain).
+  //
+  // If this transaction was created by a TransactionDB(), Status::Expired()
+  // may be returned if this transaction has lived for longer than
+  // TransactionOptions.expiration. Status::TxnNotPrepared() may be returned if
+  // TransactionOptions.skip_prepare is false and Prepare is not called on this
+  // transaction before Commit.
+  virtual Status Commit() = 0;
+
+  // In addition to Commit(), also creates a snapshot of the db after all
+  // writes by this txn are visible to other readers.
+  // Caller is responsible for ensuring that
+  // snapshot1.seq < snapshot2.seq iff. snapshot1.ts < snapshot2.ts
+  // in which snapshot1 and snapshot2 are created by this API.
+  //
+  // Currently only supported by WriteCommittedTxn. Calling this method on
+  // other types of transactions will return non-ok Status resulting from
+  // Commit() or a `NotSupported` error.
+  // This method returns OK if and only if the transaction successfully
+  // commits. It is possible that transaction commits successfully but fails to
+  // create a timestamped snapshot. Therefore, the caller should check that the
+  // snapshot is created.
+  // notifier will be notified upon next snapshot creation. Nullable.
+  // ret non-null output argument storing a shared_ptr to the newly created
+  // snapshot.
+  Status CommitAndTryCreateSnapshot(
+      std::shared_ptr<TransactionNotifier> notifier =
+          std::shared_ptr<TransactionNotifier>(),
+      TxnTimestamp ts = kMaxTxnTimestamp,
+      std::shared_ptr<const Snapshot>* snapshot = nullptr);
+
+  // Discard all batched writes in this transaction.
+  virtual Status Rollback() = 0;
+
+  // Records the state of the transaction for future calls to
+  // RollbackToSavePoint().  May be called multiple times to set multiple save
+  // points.
+  virtual void SetSavePoint() = 0;
+
+  // Undo all operations in this transaction (Put, Merge, Delete, PutLogData)
+  // since the most recent call to SetSavePoint() and removes the most recent
+  // SetSavePoint().
+  // If there is no previous call to SetSavePoint(), returns Status::NotFound()
+  virtual Status RollbackToSavePoint() = 0;
+
+  // Pop the most recent save point.
+  // If there is no previous call to SetSavePoint(), Status::NotFound()
+  // will be returned.
+  // Otherwise returns Status::OK().
+  virtual Status PopSavePoint() = 0;
+
+  // This function is similar to DB::Get() except it will also read pending
+  // changes in this transaction.  Currently, this function will return
+  // Status::MergeInProgress if the most recent write to the queried key in
+  // this batch is a Merge.
+  //
+  // If read_options.snapshot is not set, the current version of the key will
+  // be read.  Calling SetSnapshot() does not affect the version of the data
+  // returned.
+  //
+  // Note that setting read_options.snapshot will affect what is read from the
+  // DB but will NOT change which keys are read from this transaction (the keys
+  // in this transaction do not yet belong to any snapshot and will be fetched
+  // regardless).
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     std::string* value) = 0;
+
+  // An overload of the above method that receives a PinnableSlice
+  // For backward compatibility a default implementation is provided
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     PinnableSlice* pinnable_val) {
+    assert(pinnable_val != nullptr);
+    auto s = Get(options, column_family, key, pinnable_val->GetSelf());
+    pinnable_val->PinSelf();
+    return s;
+  }
+
+  virtual Status Get(const ReadOptions& options, const Slice& key,
+                     std::string* value) = 0;
+  virtual Status Get(const ReadOptions& options, const Slice& key,
+                     PinnableSlice* pinnable_val) {
+    assert(pinnable_val != nullptr);
+    auto s = Get(options, key, pinnable_val->GetSelf());
+    pinnable_val->PinSelf();
+    return s;
+  }
+
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_family,
+      const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
+
+  virtual std::vector<Status> MultiGet(const ReadOptions& options,
+                                       const std::vector<Slice>& keys,
+                                       std::vector<std::string>* values) = 0;
+
+  // Batched version of MultiGet - see DBImpl::MultiGet(). Sub-classes are
+  // expected to override this with an implementation that calls
+  // DBImpl::MultiGet()
+  virtual void MultiGet(const ReadOptions& options,
+                        ColumnFamilyHandle* column_family,
+                        const size_t num_keys, const Slice* keys,
+                        PinnableSlice* values, Status* statuses,
+                        const bool /*sorted_input*/ = false) {
+    for (size_t i = 0; i < num_keys; ++i) {
+      statuses[i] = Get(options, column_family, keys[i], &values[i]);
+    }
+  }
+
+  // Read this key and ensure that this transaction will only
+  // be able to be committed if this key is not written outside this
+  // transaction after it has first been read (or after the snapshot if a
+  // snapshot is set in this transaction and do_validate is true). If
+  // do_validate is false, ReadOptions::snapshot is expected to be nullptr so
+  // that GetForUpdate returns the latest committed value. The transaction
+  // behavior is the same regardless of whether the key exists or not.
+  //
+  // Note: Currently, this function will return Status::MergeInProgress
+  // if the most recent write to the queried key in this batch is a Merge.
+  //
+  // The values returned by this function are similar to Transaction::Get().
+  // If value==nullptr, then this function will not read any data, but will
+  // still ensure that this key cannot be written to by outside of this
+  // transaction.
+  //
+  // If this transaction was created by an OptimisticTransaction, GetForUpdate()
+  // could cause commit() to fail.  Otherwise, it could return any error
+  // that could be returned by DB::Get().
+  //
+  // If this transaction was created by a TransactionDB, it can return
+  // Status::OK() on success,
+  // Status::Busy() if there is a write conflict,
+  // Status::TimedOut() if a lock could not be acquired,
+  // Status::TryAgain() if the memtable history size is not large enough
+  //  (See max_write_buffer_size_to_maintain)
+  // Status::MergeInProgress() if merge operations cannot be resolved.
+  // or other errors if this key could not be read.
+  virtual Status GetForUpdate(const ReadOptions& options,
+                              ColumnFamilyHandle* column_family,
+                              const Slice& key, std::string* value,
+                              bool exclusive = true,
+                              const bool do_validate = true) = 0;
+
+  // An overload of the above method that receives a PinnableSlice
+  // For backward compatibility a default implementation is provided
+  virtual Status GetForUpdate(const ReadOptions& options,
+                              ColumnFamilyHandle* column_family,
+                              const Slice& key, PinnableSlice* pinnable_val,
+                              bool exclusive = true,
+                              const bool do_validate = true) {
+    if (pinnable_val == nullptr) {
+      std::string* null_str = nullptr;
+      return GetForUpdate(options, column_family, key, null_str, exclusive,
+                          do_validate);
+    } else {
+      auto s = GetForUpdate(options, column_family, key,
+                            pinnable_val->GetSelf(), exclusive, do_validate);
+      pinnable_val->PinSelf();
+      return s;
+    }
+  }
+
+  // Get a range lock on [start_endpoint; end_endpoint].
+  virtual Status GetRangeLock(ColumnFamilyHandle*, const Endpoint&,
+                              const Endpoint&) {
+    return Status::NotSupported();
+  }
+
+  virtual Status GetForUpdate(const ReadOptions& options, const Slice& key,
+                              std::string* value, bool exclusive = true,
+                              const bool do_validate = true) = 0;
+
+  virtual std::vector<Status> MultiGetForUpdate(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_family,
+      const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
+
+  virtual std::vector<Status> MultiGetForUpdate(
+      const ReadOptions& options, const std::vector<Slice>& keys,
+      std::vector<std::string>* values) = 0;
+
+  // Returns an iterator that will iterate on all keys in the default
+  // column family including both keys in the DB and uncommitted keys in this
+  // transaction.
+  //
+  // Setting read_options.snapshot will affect what is read from the
+  // DB but will NOT change which keys are read from this transaction (the keys
+  // in this transaction do not yet belong to any snapshot and will be fetched
+  // regardless).
+  //
+  // Caller is responsible for deleting the returned Iterator.
+  //
+  // The returned iterator is only valid until Commit(), Rollback(), or
+  // RollbackToSavePoint() is called.
+  virtual Iterator* GetIterator(const ReadOptions& read_options) = 0;
+
+  virtual Iterator* GetIterator(const ReadOptions& read_options,
+                                ColumnFamilyHandle* column_family) = 0;
+
+  // Put, Merge, Delete, and SingleDelete behave similarly to the corresponding
+  // functions in WriteBatch, but will also do conflict checking on the
+  // keys being written.
+  //
+  // assume_tracked=true expects the key be already tracked. More
+  // specifically, it means the the key was previous tracked in the same
+  // savepoint, with the same exclusive flag, and at a lower sequence number.
+  // If valid then it skips ValidateSnapshot.  Returns error otherwise.
+  //
+  // If this Transaction was created on an OptimisticTransactionDB, these
+  // functions should always return Status::OK().
+  //
+  // If this Transaction was created on a TransactionDB, the status returned
+  // can be:
+  // Status::OK() on success,
+  // Status::Busy() if there is a write conflict,
+  // Status::TimedOut() if a lock could not be acquired,
+  // Status::TryAgain() if the memtable history size is not large enough
+  //  (See max_write_buffer_size_to_maintain)
+  // or other errors on unexpected failures.
+  virtual Status Put(ColumnFamilyHandle* column_family, const Slice& key,
+                     const Slice& value, const bool assume_tracked = false) = 0;
+  virtual Status Put(const Slice& key, const Slice& value) = 0;
+  virtual Status Put(ColumnFamilyHandle* column_family, const SliceParts& key,
+                     const SliceParts& value,
+                     const bool assume_tracked = false) = 0;
+  virtual Status Put(const SliceParts& key, const SliceParts& value) = 0;
+
+  virtual Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
+                       const Slice& value,
+                       const bool assume_tracked = false) = 0;
+  virtual Status Merge(const Slice& key, const Slice& value) = 0;
+
+  virtual Status Delete(ColumnFamilyHandle* column_family, const Slice& key,
+                        const bool assume_tracked = false) = 0;
+  virtual Status Delete(const Slice& key) = 0;
+  virtual Status Delete(ColumnFamilyHandle* column_family,
+                        const SliceParts& key,
+                        const bool assume_tracked = false) = 0;
+  virtual Status Delete(const SliceParts& key) = 0;
+
+  virtual Status SingleDelete(ColumnFamilyHandle* column_family,
+                              const Slice& key,
+                              const bool assume_tracked = false) = 0;
+  virtual Status SingleDelete(const Slice& key) = 0;
+  virtual Status SingleDelete(ColumnFamilyHandle* column_family,
+                              const SliceParts& key,
+                              const bool assume_tracked = false) = 0;
+  virtual Status SingleDelete(const SliceParts& key) = 0;
+
+  // PutUntracked() will write a Put to the batch of operations to be committed
+  // in this transaction.  This write will only happen if this transaction
+  // gets committed successfully.  But unlike Transaction::Put(),
+  // no conflict checking will be done for this key.
+  //
+  // If this Transaction was created on a PessimisticTransactionDB, this
+  // function will still acquire locks necessary to make sure this write doesn't
+  // cause conflicts in other transactions and may return Status::Busy().
+  virtual Status PutUntracked(ColumnFamilyHandle* column_family,
+                              const Slice& key, const Slice& value) = 0;
+  virtual Status PutUntracked(const Slice& key, const Slice& value) = 0;
+  virtual Status PutUntracked(ColumnFamilyHandle* column_family,
+                              const SliceParts& key,
+                              const SliceParts& value) = 0;
+  virtual Status PutUntracked(const SliceParts& key,
+                              const SliceParts& value) = 0;
+
+  virtual Status MergeUntracked(ColumnFamilyHandle* column_family,
+                                const Slice& key, const Slice& value) = 0;
+  virtual Status MergeUntracked(const Slice& key, const Slice& value) = 0;
+
+  virtual Status DeleteUntracked(ColumnFamilyHandle* column_family,
+                                 const Slice& key) = 0;
+
+  virtual Status DeleteUntracked(const Slice& key) = 0;
+  virtual Status DeleteUntracked(ColumnFamilyHandle* column_family,
+                                 const SliceParts& key) = 0;
+  virtual Status DeleteUntracked(const SliceParts& key) = 0;
+  virtual Status SingleDeleteUntracked(ColumnFamilyHandle* column_family,
+                                       const Slice& key) = 0;
+
+  virtual Status SingleDeleteUntracked(const Slice& key) = 0;
+
+  // Similar to WriteBatch::PutLogData
+  virtual void PutLogData(const Slice& blob) = 0;
+
+  // By default, all Put/Merge/Delete operations will be indexed in the
+  // transaction so that Get/GetForUpdate/GetIterator can search for these
+  // keys.
+  //
+  // If the caller does not want to fetch the keys about to be written,
+  // they may want to avoid indexing as a performance optimization.
+  // Calling DisableIndexing() will turn off indexing for all future
+  // Put/Merge/Delete operations until EnableIndexing() is called.
+  //
+  // If a key is Put/Merge/Deleted after DisableIndexing is called and then
+  // is fetched via Get/GetForUpdate/GetIterator, the result of the fetch is
+  // undefined.
+  virtual void DisableIndexing() = 0;
+  virtual void EnableIndexing() = 0;
+
+  // Returns the number of distinct Keys being tracked by this transaction.
+  // If this transaction was created by a TransactionDB, this is the number of
+  // keys that are currently locked by this transaction.
+  // If this transaction was created by an OptimisticTransactionDB, this is the
+  // number of keys that need to be checked for conflicts at commit time.
+  virtual uint64_t GetNumKeys() const = 0;
+
+  // Returns the number of Puts/Deletes/Merges that have been applied to this
+  // transaction so far.
+  virtual uint64_t GetNumPuts() const = 0;
+  virtual uint64_t GetNumDeletes() const = 0;
+  virtual uint64_t GetNumMerges() const = 0;
+
+  // Returns the elapsed time in milliseconds since this Transaction began.
+  virtual uint64_t GetElapsedTime() const = 0;
+
+  // Fetch the underlying write batch that contains all pending changes to be
+  // committed.
+  //
+  // Note:  You should not write or delete anything from the batch directly and
+  // should only use the functions in the Transaction class to
+  // write to this transaction.
+  virtual WriteBatchWithIndex* GetWriteBatch() = 0;
+
+  // Change the value of TransactionOptions.lock_timeout (in milliseconds) for
+  // this transaction.
+  // Has no effect on OptimisticTransactions.
+  virtual void SetLockTimeout(int64_t timeout) = 0;
+
+  // Return the WriteOptions that will be used during Commit()
+  virtual WriteOptions* GetWriteOptions() = 0;
+
+  // Reset the WriteOptions that will be used during Commit().
+  virtual void SetWriteOptions(const WriteOptions& write_options) = 0;
+
+  // If this key was previously fetched in this transaction using
+  // GetForUpdate/MultigetForUpdate(), calling UndoGetForUpdate will tell
+  // the transaction that it no longer needs to do any conflict checking
+  // for this key.
+  //
+  // If a key has been fetched N times via GetForUpdate/MultigetForUpdate(),
+  // then UndoGetForUpdate will only have an effect if it is also called N
+  // times.  If this key has been written to in this transaction,
+  // UndoGetForUpdate() will have no effect.
+  //
+  // If SetSavePoint() has been called after the GetForUpdate(),
+  // UndoGetForUpdate() will not have any effect.
+  //
+  // If this Transaction was created by an OptimisticTransactionDB,
+  // calling UndoGetForUpdate can affect whether this key is conflict checked
+  // at commit time.
+  // If this Transaction was created by a TransactionDB,
+  // calling UndoGetForUpdate may release any held locks for this key.
+  virtual void UndoGetForUpdate(ColumnFamilyHandle* column_family,
+                                const Slice& key) = 0;
+  virtual void UndoGetForUpdate(const Slice& key) = 0;
+
+  virtual Status RebuildFromWriteBatch(WriteBatch* src_batch) = 0;
+
+  // Note: data in the commit-time-write-batch bypasses concurrency control,
+  // thus should be used with great caution.
+  // For write-prepared/write-unprepared transactions,
+  // GetCommitTimeWriteBatch() can be used only if the transaction is started
+  // with
+  // `TransactionOptions::use_only_the_last_commit_time_batch_for_recovery` set
+  // to true. Otherwise, it is possible that two uncommitted versions of the
+  // same key exist in the database due to the current implementation (see the
+  // explanation in WritePreparedTxn::CommitInternal).
+  // During bottommost compaction, RocksDB may
+  // set the sequence numbers of both to zero once becoming committed, causing
+  // output SST file to have two identical internal keys.
+  virtual WriteBatch* GetCommitTimeWriteBatch() = 0;
+
+  virtual void SetLogNumber(uint64_t log) { log_number_ = log; }
+
+  virtual uint64_t GetLogNumber() const { return log_number_; }
+
+  virtual Status SetName(const TransactionName& name) = 0;
+
+  virtual TransactionName GetName() const { return name_; }
+
+  virtual TransactionID GetID() const { return 0; }
+
+  virtual bool IsDeadlockDetect() const { return false; }
+
+  virtual std::vector<TransactionID> GetWaitingTxns(
+      uint32_t* /*column_family_id*/, std::string* /*key*/) const {
+    assert(false);
+    return std::vector<TransactionID>();
+  }
+
+  enum TransactionState {
+    STARTED = 0,
+    AWAITING_PREPARE = 1,
+    PREPARED = 2,
+    AWAITING_COMMIT = 3,
+    COMMITTED = 4,
+    COMMITED = COMMITTED,  // old misspelled name
+    AWAITING_ROLLBACK = 5,
+    ROLLEDBACK = 6,
+    LOCKS_STOLEN = 7,
+  };
+
+  TransactionState GetState() const { return txn_state_; }
+  void SetState(TransactionState state) { txn_state_ = state; }
+
+  // NOTE: Experimental feature
+  // The globally unique id with which the transaction is identified. This id
+  // might or might not be set depending on the implementation. Similarly the
+  // implementation decides the point in lifetime of a transaction at which it
+  // assigns the id. Although currently it is the case, the id is not guaranteed
+  // to remain the same across restarts.
+  uint64_t GetId() { return id_; }
+
+  virtual Status SetReadTimestampForValidation(TxnTimestamp /*ts*/) {
+    return Status::NotSupported("timestamp not supported");
+  }
+
+  virtual Status SetCommitTimestamp(TxnTimestamp /*ts*/) {
+    return Status::NotSupported("timestamp not supported");
+  }
+
+  virtual TxnTimestamp GetCommitTimestamp() const { return kMaxTxnTimestamp; }
+
+ protected:
+  explicit Transaction(const TransactionDB* /*db*/) {}
+  Transaction() : log_number_(0), txn_state_(STARTED) {}
+
+  // the log in which the prepared section for this txn resides
+  // (for two phase commit)
+  uint64_t log_number_;
+  TransactionName name_;
+
+  // Execution status of the transaction.
+  std::atomic<TransactionState> txn_state_;
+
+  uint64_t id_ = 0;
+  virtual void SetId(uint64_t id) {
+    assert(id_ == 0);
+    id_ = id;
+  }
+
+  virtual uint64_t GetLastLogNumber() const { return log_number_; }
+
+ private:
+  friend class PessimisticTransactionDB;
+  friend class WriteUnpreparedTxnDB;
+  friend class TransactionTest_TwoPhaseLogRollingTest_Test;
+  friend class TransactionTest_TwoPhaseLogRollingTest2_Test;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/transaction_db.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/transaction_db.h
new file mode 100644
index 0000000000..3c4b63068e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/transaction_db.h
@@ -0,0 +1,506 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "rocksdb/comparator.h"
+#include "rocksdb/db.h"
+#include "rocksdb/utilities/stackable_db.h"
+#include "rocksdb/utilities/transaction.h"
+
+// Database with Transaction support.
+//
+// See transaction.h and examples/transaction_example.cc
+
+namespace ROCKSDB_NAMESPACE {
+
+class TransactionDBMutexFactory;
+
+enum TxnDBWritePolicy {
+  WRITE_COMMITTED = 0,  // write only the committed data
+  WRITE_PREPARED,       // write data after the prepare phase of 2pc
+  WRITE_UNPREPARED      // write data before the prepare phase of 2pc
+};
+
+constexpr uint32_t kInitialMaxDeadlocks = 5;
+
+class LockManager;
+struct RangeLockInfo;
+
+// A lock manager handle
+// The workflow is as follows:
+//  * Use a factory method (like NewRangeLockManager()) to create a lock
+//    manager and get its handle.
+//  * A Handle for a particular kind of lock manager will have extra
+//    methods and parameters to control the lock manager
+//  * Pass the handle to RocksDB in TransactionDBOptions::lock_mgr_handle. It
+//    will be used to perform locking.
+class LockManagerHandle {
+ public:
+  // PessimisticTransactionDB will call this to get the Lock Manager it's going
+  // to use.
+  virtual LockManager* getLockManager() = 0;
+
+  virtual ~LockManagerHandle() {}
+};
+
+// Same as class Endpoint, but use std::string to manage the buffer allocation
+struct EndpointWithString {
+  std::string slice;
+  bool inf_suffix;
+};
+
+struct RangeDeadlockInfo {
+  TransactionID m_txn_id;
+  uint32_t m_cf_id;
+  bool m_exclusive;
+
+  EndpointWithString m_start;
+  EndpointWithString m_end;
+};
+
+struct RangeDeadlockPath {
+  std::vector<RangeDeadlockInfo> path;
+  bool limit_exceeded;
+  int64_t deadlock_time;
+
+  explicit RangeDeadlockPath(std::vector<RangeDeadlockInfo> path_entry,
+                             const int64_t& dl_time)
+      : path(path_entry), limit_exceeded(false), deadlock_time(dl_time) {}
+
+  // empty path, limit exceeded constructor and default constructor
+  explicit RangeDeadlockPath(const int64_t& dl_time = 0, bool limit = false)
+      : path(0), limit_exceeded(limit), deadlock_time(dl_time) {}
+
+  bool empty() { return path.empty() && !limit_exceeded; }
+};
+
+// A handle to control RangeLockManager (Range-based lock manager) from outside
+// RocksDB
+class RangeLockManagerHandle : public LockManagerHandle {
+ public:
+  // Set total amount of lock memory to use.
+  //
+  //  @return 0 Ok
+  //  @return EDOM Failed to set because currently using more memory than
+  //        specified
+  virtual int SetMaxLockMemory(size_t max_lock_memory) = 0;
+  virtual size_t GetMaxLockMemory() = 0;
+
+  using RangeLockStatus =
+      std::unordered_multimap<ColumnFamilyId, RangeLockInfo>;
+
+  // Lock Escalation barrier check function.
+  // It is called for a couple of endpoints A and B, such that A < B.
+  // If escalation_barrier_check_func(A, B)==true, then there's a lock
+  // escalation barrier between A and B, and lock escalation is not allowed
+  // to bridge the gap between A and B.
+  //
+  // The function may be called from any thread that acquires or releases
+  // locks. It should not throw exceptions. There is currently no way to return
+  // an error.
+  using EscalationBarrierFunc =
+      std::function<bool(const Endpoint& a, const Endpoint& b)>;
+
+  // Set the user-provided barrier check function
+  virtual void SetEscalationBarrierFunc(EscalationBarrierFunc func) = 0;
+
+  virtual RangeLockStatus GetRangeLockStatusData() = 0;
+
+  class Counters {
+   public:
+    // Number of times lock escalation was triggered (for all column families)
+    uint64_t escalation_count;
+
+    // Number of times lock acquisition had to wait for a conflicting lock
+    // to be released. This counts both successful waits (where the desired
+    // lock was acquired) and waits that timed out or got other error.
+    uint64_t lock_wait_count;
+
+    // How much memory is currently used for locks (total for all column
+    // families)
+    uint64_t current_lock_memory;
+  };
+
+  // Get the current counter values
+  virtual Counters GetStatus() = 0;
+
+  // Functions for range-based Deadlock reporting.
+  virtual std::vector<RangeDeadlockPath> GetRangeDeadlockInfoBuffer() = 0;
+  virtual void SetRangeDeadlockInfoBufferSize(uint32_t target_size) = 0;
+
+  virtual ~RangeLockManagerHandle() {}
+};
+
+// A factory function to create a Range Lock Manager. The created object should
+// be:
+//  1. Passed in TransactionDBOptions::lock_mgr_handle to open the database in
+//     range-locking mode
+//  2. Used to control the lock manager when the DB is already open.
+RangeLockManagerHandle* NewRangeLockManager(
+    std::shared_ptr<TransactionDBMutexFactory> mutex_factory);
+
+struct TransactionDBOptions {
+  // Specifies the maximum number of keys that can be locked at the same time
+  // per column family.
+  // If the number of locked keys is greater than max_num_locks, transaction
+  // writes (or GetForUpdate) will return an error.
+  // If this value is not positive, no limit will be enforced.
+  int64_t max_num_locks = -1;
+
+  // Stores the number of latest deadlocks to track
+  uint32_t max_num_deadlocks = kInitialMaxDeadlocks;
+
+  // Increasing this value will increase the concurrency by dividing the lock
+  // table (per column family) into more sub-tables, each with their own
+  // separate mutex.
+  size_t num_stripes = 16;
+
+  // If positive, specifies the default wait timeout in milliseconds when
+  // a transaction attempts to lock a key if not specified by
+  // TransactionOptions::lock_timeout.
+  //
+  // If 0, no waiting is done if a lock cannot instantly be acquired.
+  // If negative, there is no timeout.  Not using a timeout is not recommended
+  // as it can lead to deadlocks.  Currently, there is no deadlock-detection to
+  // recover from a deadlock.
+  int64_t transaction_lock_timeout = 1000;  // 1 second
+
+  // If positive, specifies the wait timeout in milliseconds when writing a key
+  // OUTSIDE of a transaction (ie by calling DB::Put(),Merge(),Delete(),Write()
+  // directly).
+  // If 0, no waiting is done if a lock cannot instantly be acquired.
+  // If negative, there is no timeout and will block indefinitely when acquiring
+  // a lock.
+  //
+  // Not using a timeout can lead to deadlocks.  Currently, there
+  // is no deadlock-detection to recover from a deadlock.  While DB writes
+  // cannot deadlock with other DB writes, they can deadlock with a transaction.
+  // A negative timeout should only be used if all transactions have a small
+  // expiration set.
+  int64_t default_lock_timeout = 1000;  // 1 second
+
+  // If set, the TransactionDB will use this implementation of a mutex and
+  // condition variable for all transaction locking instead of the default
+  // mutex/condvar implementation.
+  std::shared_ptr<TransactionDBMutexFactory> custom_mutex_factory;
+
+  // The policy for when to write the data into the DB. The default policy is to
+  // write only the committed data (WRITE_COMMITTED). The data could be written
+  // before the commit phase. The DB then needs to provide the mechanisms to
+  // tell apart committed from uncommitted data.
+  TxnDBWritePolicy write_policy = TxnDBWritePolicy::WRITE_COMMITTED;
+
+  // TODO(myabandeh): remove this option
+  // Note: this is a temporary option as a hot fix in rollback of writeprepared
+  // txns in myrocks. MyRocks uses merge operands for autoinc column id without
+  // however obtaining locks. This breaks the assumption behind the rollback
+  // logic in myrocks. This hack of simply not rolling back merge operands works
+  // for the special way that myrocks uses this operands.
+  bool rollback_merge_operands = false;
+
+  // nullptr means use default lock manager.
+  // Other value means the user provides a custom lock manager.
+  std::shared_ptr<LockManagerHandle> lock_mgr_handle;
+
+  // If true, the TransactionDB implementation might skip concurrency control
+  // unless it is overridden by TransactionOptions or
+  // TransactionDBWriteOptimizations. This can be used in conjunction with
+  // DBOptions::unordered_write when the TransactionDB is used solely for write
+  // ordering rather than concurrency control.
+  bool skip_concurrency_control = false;
+
+  // This option is only valid for write unprepared. If a write batch exceeds
+  // this threshold, then the transaction will implicitly flush the currently
+  // pending writes into the database. A value of 0 or less means no limit.
+  int64_t default_write_batch_flush_threshold = 0;
+
+  // This option is valid only for write-prepared/write-unprepared. Transaction
+  // will rely on this callback to determine if a key should be rolled back
+  // with Delete or SingleDelete when necessary. If the callback returns true,
+  // then SingleDelete should be used. If the callback is not callable or the
+  // callback returns false, then a Delete is used.
+  // The application should ensure thread-safety of this callback.
+  // The callback should not throw because RocksDB is not exception-safe.
+  // The callback may be removed if we allow mixing Delete and SingleDelete in
+  // the future.
+  std::function<bool(TransactionDB* /*db*/,
+                     ColumnFamilyHandle* /*column_family*/,
+                     const Slice& /*key*/)>
+      rollback_deletion_type_callback;
+
+ private:
+  // 128 entries
+  // Should the default value change, please also update wp_snapshot_cache_bits
+  // in db_stress_gflags.cc
+  size_t wp_snapshot_cache_bits = static_cast<size_t>(7);
+  // 8m entry, 64MB size
+  // Should the default value change, please also update wp_commit_cache_bits
+  // in db_stress_gflags.cc
+  size_t wp_commit_cache_bits = static_cast<size_t>(23);
+
+  // For testing, whether transaction name should be auto-generated or not. This
+  // is useful for write unprepared which requires named transactions.
+  bool autogenerate_name = false;
+
+  friend class WritePreparedTxnDB;
+  friend class WriteUnpreparedTxn;
+  friend class WritePreparedTransactionTestBase;
+  friend class TransactionTestBase;
+  friend class MySQLStyleTransactionTest;
+  friend class StressTest;
+};
+
+struct TransactionOptions {
+  // Setting set_snapshot=true is the same as calling
+  // Transaction::SetSnapshot().
+  bool set_snapshot = false;
+
+  // Setting to true means that before acquiring locks, this transaction will
+  // check if doing so will cause a deadlock. If so, it will return with
+  // Status::Busy.  The user should retry their transaction.
+  bool deadlock_detect = false;
+
+  // If set, it states that the CommitTimeWriteBatch represents the latest state
+  // of the application, has only one sub-batch, i.e., no duplicate keys,  and
+  // meant to be used later during recovery. It enables an optimization to
+  // postpone updating the memtable with CommitTimeWriteBatch to only
+  // SwitchMemtable or recovery.
+  // This option does not affect write-committed. Only
+  // write-prepared/write-unprepared transactions will be affected.
+  bool use_only_the_last_commit_time_batch_for_recovery = false;
+
+  // TODO(agiardullo): TransactionDB does not yet support comparators that allow
+  // two non-equal keys to be equivalent.  Ie, cmp->Compare(a,b) should only
+  // return 0 if
+  // a.compare(b) returns 0.
+
+  // If positive, specifies the wait timeout in milliseconds when
+  // a transaction attempts to lock a key.
+  //
+  // If 0, no waiting is done if a lock cannot instantly be acquired.
+  // If negative, TransactionDBOptions::transaction_lock_timeout will be used.
+  int64_t lock_timeout = -1;
+
+  // Expiration duration in milliseconds.  If non-negative, transactions that
+  // last longer than this many milliseconds will fail to commit.  If not set,
+  // a forgotten transaction that is never committed, rolled back, or deleted
+  // will never relinquish any locks it holds.  This could prevent keys from
+  // being written by other writers.
+  int64_t expiration = -1;
+
+  // The number of traversals to make during deadlock detection.
+  int64_t deadlock_detect_depth = 50;
+
+  // The maximum number of bytes used for the write batch. 0 means no limit.
+  size_t max_write_batch_size = 0;
+
+  // Skip Concurrency Control. This could be as an optimization if the
+  // application knows that the transaction would not have any conflict with
+  // concurrent transactions. It could also be used during recovery if (i)
+  // application guarantees no conflict between prepared transactions in the WAL
+  // (ii) application guarantees that recovered transactions will be rolled
+  // back/commit before new transactions start.
+  // Default: false
+  bool skip_concurrency_control = false;
+
+  // In pessimistic transaction, if this is true, then you can skip Prepare
+  // before Commit, otherwise, you must Prepare before Commit.
+  bool skip_prepare = true;
+
+  // See TransactionDBOptions::default_write_batch_flush_threshold for
+  // description. If a negative value is specified, then the default value from
+  // TransactionDBOptions is used.
+  int64_t write_batch_flush_threshold = -1;
+};
+
+// The per-write optimizations that do not involve transactions. TransactionDB
+// implementation might or might not make use of the specified optimizations.
+struct TransactionDBWriteOptimizations {
+  // If it is true it means that the application guarantees that the
+  // key-set in the write batch do not conflict with any concurrent transaction
+  // and hence the concurrency control mechanism could be skipped for this
+  // write.
+  bool skip_concurrency_control = false;
+  // If true, the application guarantees that there is no duplicate <column
+  // family, key> in the write batch and any employed mechanism to handle
+  // duplicate keys could be skipped.
+  bool skip_duplicate_key_check = false;
+};
+
+struct KeyLockInfo {
+  std::string key;
+  std::vector<TransactionID> ids;
+  bool exclusive;
+};
+
+struct RangeLockInfo {
+  EndpointWithString start;
+  EndpointWithString end;
+  std::vector<TransactionID> ids;
+  bool exclusive;
+};
+
+struct DeadlockInfo {
+  TransactionID m_txn_id;
+  uint32_t m_cf_id;
+  bool m_exclusive;
+  std::string m_waiting_key;
+};
+
+struct DeadlockPath {
+  std::vector<DeadlockInfo> path;
+  bool limit_exceeded;
+  int64_t deadlock_time;
+
+  explicit DeadlockPath(std::vector<DeadlockInfo> path_entry,
+                        const int64_t& dl_time)
+      : path(path_entry), limit_exceeded(false), deadlock_time(dl_time) {}
+
+  // empty path, limit exceeded constructor and default constructor
+  explicit DeadlockPath(const int64_t& dl_time = 0, bool limit = false)
+      : path(0), limit_exceeded(limit), deadlock_time(dl_time) {}
+
+  bool empty() { return path.empty() && !limit_exceeded; }
+};
+
+class TransactionDB : public StackableDB {
+ public:
+  // Optimized version of ::Write that receives more optimization request such
+  // as skip_concurrency_control.
+  using StackableDB::Write;
+  virtual Status Write(const WriteOptions& opts,
+                       const TransactionDBWriteOptimizations&,
+                       WriteBatch* updates) {
+    // The default implementation ignores TransactionDBWriteOptimizations and
+    // falls back to the un-optimized version of ::Write
+    return Write(opts, updates);
+  }
+  // Transactional `DeleteRange()` is not yet supported.
+  // However, users who know their deleted range does not conflict with
+  // anything can still use it via the `Write()` API. In all cases, the
+  // `Write()` overload specifying `TransactionDBWriteOptimizations` must be
+  // used and `skip_concurrency_control` must be set. When using either
+  // WRITE_PREPARED or WRITE_UNPREPARED , `skip_duplicate_key_check` must
+  // additionally be set.
+  using StackableDB::DeleteRange;
+  virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*,
+                             const Slice&, const Slice&) override {
+    return Status::NotSupported();
+  }
+  // Open a TransactionDB similar to DB::Open().
+  // Internally call PrepareWrap() and WrapDB()
+  // If the return status is not ok, then dbptr is set to nullptr.
+  static Status Open(const Options& options,
+                     const TransactionDBOptions& txn_db_options,
+                     const std::string& dbname, TransactionDB** dbptr);
+
+  static Status Open(const DBOptions& db_options,
+                     const TransactionDBOptions& txn_db_options,
+                     const std::string& dbname,
+                     const std::vector<ColumnFamilyDescriptor>& column_families,
+                     std::vector<ColumnFamilyHandle*>* handles,
+                     TransactionDB** dbptr);
+  // Note: PrepareWrap() may change parameters, make copies before the
+  // invocation if needed.
+  static void PrepareWrap(DBOptions* db_options,
+                          std::vector<ColumnFamilyDescriptor>* column_families,
+                          std::vector<size_t>* compaction_enabled_cf_indices);
+  // If the return status is not ok, then dbptr will bet set to nullptr. The
+  // input db parameter might or might not be deleted as a result of the
+  // failure. If it is properly deleted it will be set to nullptr. If the return
+  // status is ok, the ownership of db is transferred to dbptr.
+  static Status WrapDB(DB* db, const TransactionDBOptions& txn_db_options,
+                       const std::vector<size_t>& compaction_enabled_cf_indices,
+                       const std::vector<ColumnFamilyHandle*>& handles,
+                       TransactionDB** dbptr);
+  // If the return status is not ok, then dbptr will bet set to nullptr. The
+  // input db parameter might or might not be deleted as a result of the
+  // failure. If it is properly deleted it will be set to nullptr. If the return
+  // status is ok, the ownership of db is transferred to dbptr.
+  static Status WrapStackableDB(
+      StackableDB* db, const TransactionDBOptions& txn_db_options,
+      const std::vector<size_t>& compaction_enabled_cf_indices,
+      const std::vector<ColumnFamilyHandle*>& handles, TransactionDB** dbptr);
+  // Since the destructor in StackableDB is virtual, this destructor is virtual
+  // too. The root db will be deleted by the base's destructor.
+  ~TransactionDB() override {}
+
+  // Starts a new Transaction.
+  //
+  // Caller is responsible for deleting the returned transaction when no
+  // longer needed.
+  //
+  // If old_txn is not null, BeginTransaction will reuse this Transaction
+  // handle instead of allocating a new one.  This is an optimization to avoid
+  // extra allocations when repeatedly creating transactions.
+  virtual Transaction* BeginTransaction(
+      const WriteOptions& write_options,
+      const TransactionOptions& txn_options = TransactionOptions(),
+      Transaction* old_txn = nullptr) = 0;
+
+  virtual Transaction* GetTransactionByName(const TransactionName& name) = 0;
+  virtual void GetAllPreparedTransactions(std::vector<Transaction*>* trans) = 0;
+
+  // Returns set of all locks held.
+  //
+  // The mapping is column family id -> KeyLockInfo
+  virtual std::unordered_multimap<uint32_t, KeyLockInfo>
+  GetLockStatusData() = 0;
+
+  virtual std::vector<DeadlockPath> GetDeadlockInfoBuffer() = 0;
+  virtual void SetDeadlockInfoBufferSize(uint32_t target_size) = 0;
+
+  // Create a snapshot and assign ts to it. Return the snapshot to caller. The
+  // snapshot-timestamp mapping is also tracked by the database.
+  // Caller must ensure there are no active writes when this API is called.
+  virtual std::pair<Status, std::shared_ptr<const Snapshot>>
+  CreateTimestampedSnapshot(TxnTimestamp ts) = 0;
+
+  // Return the latest timestamped snapshot if present.
+  std::shared_ptr<const Snapshot> GetLatestTimestampedSnapshot() const {
+    return GetTimestampedSnapshot(kMaxTxnTimestamp);
+  }
+  // Return the snapshot correponding to given timestamp. If ts is
+  // kMaxTxnTimestamp, then we return the latest timestamped snapshot if
+  // present. Othersise, we return the snapshot whose timestamp is equal to
+  // `ts`. If no such snapshot exists, then we return null.
+  virtual std::shared_ptr<const Snapshot> GetTimestampedSnapshot(
+      TxnTimestamp ts) const = 0;
+  // Release timestamped snapshots whose timestamps are less than or equal to
+  // ts.
+  virtual void ReleaseTimestampedSnapshotsOlderThan(TxnTimestamp ts) = 0;
+
+  // Get all timestamped snapshots which will be stored in
+  // timestamped_snapshots.
+  Status GetAllTimestampedSnapshots(
+      std::vector<std::shared_ptr<const Snapshot>>& timestamped_snapshots)
+      const {
+    return GetTimestampedSnapshots(/*ts_lb=*/0, /*ts_ub=*/kMaxTxnTimestamp,
+                                   timestamped_snapshots);
+  }
+
+  // Get all timestamped snapshots whose timestamps fall within [ts_lb, ts_ub).
+  // timestamped_snapshots will be cleared and contain returned snapshots.
+  virtual Status GetTimestampedSnapshots(
+      TxnTimestamp ts_lb, TxnTimestamp ts_ub,
+      std::vector<std::shared_ptr<const Snapshot>>& timestamped_snapshots)
+      const = 0;
+
+ protected:
+  // To Create an TransactionDB, call Open()
+  // The ownership of db is transferred to the base StackableDB
+  explicit TransactionDB(DB* db) : StackableDB(db) {}
+  // No copying allowed
+  TransactionDB(const TransactionDB&) = delete;
+  void operator=(const TransactionDB&) = delete;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/transaction_db_mutex.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/transaction_db_mutex.h
new file mode 100644
index 0000000000..4ef566dad9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/transaction_db_mutex.h
@@ -0,0 +1,89 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <memory>
+
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// TransactionDBMutex and TransactionDBCondVar APIs allows applications to
+// implement custom mutexes and condition variables to be used by a
+// TransactionDB when locking keys.
+//
+// To open a TransactionDB with a custom TransactionDBMutexFactory, set
+// TransactionDBOptions.custom_mutex_factory.
+class TransactionDBMutex {
+ public:
+  virtual ~TransactionDBMutex() {}
+
+  // Attempt to acquire lock.  Return OK on success, or other Status on failure.
+  // If returned status is OK, TransactionDB will eventually call UnLock().
+  virtual Status Lock() = 0;
+
+  // Attempt to acquire lock.  If timeout is non-negative, operation may be
+  // failed after this many microseconds.
+  // Returns OK on success,
+  //         TimedOut if timed out,
+  //         or other Status on failure.
+  // If returned status is OK, TransactionDB will eventually call UnLock().
+  virtual Status TryLockFor(int64_t timeout_time) = 0;
+
+  // Unlock Mutex that was successfully locked by Lock() or TryLockUntil()
+  virtual void UnLock() = 0;
+};
+
+class TransactionDBCondVar {
+ public:
+  virtual ~TransactionDBCondVar() {}
+
+  // Block current thread until condition variable is notified by a call to
+  // Notify() or NotifyAll().  Wait() will be called with mutex locked.
+  // Returns OK if notified.
+  // Returns non-OK if TransactionDB should stop waiting and fail the operation.
+  // May return OK spuriously even if not notified.
+  virtual Status Wait(std::shared_ptr<TransactionDBMutex> mutex) = 0;
+
+  // Block current thread until condition variable is notified by a call to
+  // Notify() or NotifyAll(), or if the timeout is reached.
+  // Wait() will be called with mutex locked.
+  //
+  // If timeout is non-negative, operation should be failed after this many
+  // microseconds.
+  // If implementing a custom version of this class, the implementation may
+  // choose to ignore the timeout.
+  //
+  // Returns OK if notified.
+  // Returns TimedOut if timeout is reached.
+  // Returns other status if TransactionDB should otherwise stop waiting and
+  //  fail the operation.
+  // May return OK spuriously even if not notified.
+  virtual Status WaitFor(std::shared_ptr<TransactionDBMutex> mutex,
+                         int64_t timeout_time) = 0;
+
+  // If any threads are waiting on *this, unblock at least one of the
+  // waiting threads.
+  virtual void Notify() = 0;
+
+  // Unblocks all threads waiting on *this.
+  virtual void NotifyAll() = 0;
+};
+
+// Factory class that can allocate mutexes and condition variables.
+class TransactionDBMutexFactory {
+ public:
+  // Create a TransactionDBMutex object.
+  virtual std::shared_ptr<TransactionDBMutex> AllocateMutex() = 0;
+
+  // Create a TransactionDBCondVar object.
+  virtual std::shared_ptr<TransactionDBCondVar> AllocateCondVar() = 0;
+
+  virtual ~TransactionDBMutexFactory() {}
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/write_batch_with_index.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/write_batch_with_index.h
new file mode 100644
index 0000000000..d5867567ba
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/utilities/write_batch_with_index.h
@@ -0,0 +1,307 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A WriteBatchWithIndex with a binary searchable index built for all the keys
+// inserted.
+#pragma once
+
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/comparator.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "rocksdb/write_batch.h"
+#include "rocksdb/write_batch_base.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class ColumnFamilyHandle;
+class Comparator;
+class DB;
+class ReadCallback;
+struct ReadOptions;
+struct DBOptions;
+
+enum WriteType {
+  kPutRecord,
+  kMergeRecord,
+  kDeleteRecord,
+  kSingleDeleteRecord,
+  kDeleteRangeRecord,
+  kLogDataRecord,
+  kXIDRecord,
+  kUnknownRecord,
+};
+
+// an entry for Put, Merge, Delete, or SingleDelete entry for write batches.
+// Used in WBWIIterator.
+struct WriteEntry {
+  WriteType type = kUnknownRecord;
+  Slice key;
+  Slice value;
+};
+
+// Iterator of one column family out of a WriteBatchWithIndex.
+class WBWIIterator {
+ public:
+  virtual ~WBWIIterator() {}
+
+  virtual bool Valid() const = 0;
+
+  virtual void SeekToFirst() = 0;
+
+  virtual void SeekToLast() = 0;
+
+  virtual void Seek(const Slice& key) = 0;
+
+  virtual void SeekForPrev(const Slice& key) = 0;
+
+  virtual void Next() = 0;
+
+  virtual void Prev() = 0;
+
+  // the return WriteEntry is only valid until the next mutation of
+  // WriteBatchWithIndex
+  virtual WriteEntry Entry() const = 0;
+
+  virtual Status status() const = 0;
+};
+
+// A WriteBatchWithIndex with a binary searchable index built for all the keys
+// inserted.
+// In Put(), Merge() Delete(), or SingleDelete(), the same function of the
+// wrapped will be called. At the same time, indexes will be built.
+// By calling GetWriteBatch(), a user will get the WriteBatch for the data
+// they inserted, which can be used for DB::Write().
+// A user can call NewIterator() to create an iterator.
+class WriteBatchWithIndex : public WriteBatchBase {
+ public:
+  // backup_index_comparator: the backup comparator used to compare keys
+  // within the same column family, if column family is not given in the
+  // interface, or we can't find a column family from the column family handle
+  // passed in, backup_index_comparator will be used for the column family.
+  // reserved_bytes: reserved bytes in underlying WriteBatch
+  // max_bytes: maximum size of underlying WriteBatch in bytes
+  // overwrite_key: if true, overwrite the key in the index when inserting
+  //                the same key as previously, so iterator will never
+  //                show two entries with the same key.
+  explicit WriteBatchWithIndex(
+      const Comparator* backup_index_comparator = BytewiseComparator(),
+      size_t reserved_bytes = 0, bool overwrite_key = false,
+      size_t max_bytes = 0, size_t protection_bytes_per_key = 0);
+
+  ~WriteBatchWithIndex() override;
+  WriteBatchWithIndex(WriteBatchWithIndex&&);
+  WriteBatchWithIndex& operator=(WriteBatchWithIndex&&);
+
+  using WriteBatchBase::Put;
+  Status Put(ColumnFamilyHandle* column_family, const Slice& key,
+             const Slice& value) override;
+
+  Status Put(const Slice& key, const Slice& value) override;
+
+  Status Put(ColumnFamilyHandle* column_family, const Slice& key,
+             const Slice& ts, const Slice& value) override;
+
+  Status PutEntity(ColumnFamilyHandle* column_family, const Slice& /* key */,
+                   const WideColumns& /* columns */) override {
+    if (!column_family) {
+      return Status::InvalidArgument(
+          "Cannot call this method without a column family handle");
+    }
+
+    return Status::NotSupported(
+        "PutEntity not supported by WriteBatchWithIndex");
+  }
+
+  using WriteBatchBase::Merge;
+  Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
+               const Slice& value) override;
+
+  Status Merge(const Slice& key, const Slice& value) override;
+  Status Merge(ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/,
+               const Slice& /*ts*/, const Slice& /*value*/) override {
+    return Status::NotSupported(
+        "Merge does not support user-defined timestamp");
+  }
+
+  using WriteBatchBase::Delete;
+  Status Delete(ColumnFamilyHandle* column_family, const Slice& key) override;
+  Status Delete(const Slice& key) override;
+  Status Delete(ColumnFamilyHandle* column_family, const Slice& key,
+                const Slice& ts) override;
+
+  using WriteBatchBase::SingleDelete;
+  Status SingleDelete(ColumnFamilyHandle* column_family,
+                      const Slice& key) override;
+  Status SingleDelete(const Slice& key) override;
+  Status SingleDelete(ColumnFamilyHandle* column_family, const Slice& key,
+                      const Slice& ts) override;
+
+  using WriteBatchBase::DeleteRange;
+  Status DeleteRange(ColumnFamilyHandle* /* column_family */,
+                     const Slice& /* begin_key */,
+                     const Slice& /* end_key */) override {
+    return Status::NotSupported(
+        "DeleteRange unsupported in WriteBatchWithIndex");
+  }
+  Status DeleteRange(const Slice& /* begin_key */,
+                     const Slice& /* end_key */) override {
+    return Status::NotSupported(
+        "DeleteRange unsupported in WriteBatchWithIndex");
+  }
+  Status DeleteRange(ColumnFamilyHandle* /*column_family*/,
+                     const Slice& /*begin_key*/, const Slice& /*end_key*/,
+                     const Slice& /*ts*/) override {
+    return Status::NotSupported(
+        "DeleteRange unsupported in WriteBatchWithIndex");
+  }
+
+  using WriteBatchBase::PutLogData;
+  Status PutLogData(const Slice& blob) override;
+
+  using WriteBatchBase::Clear;
+  void Clear() override;
+
+  using WriteBatchBase::GetWriteBatch;
+  WriteBatch* GetWriteBatch() override;
+
+  // Create an iterator of a column family. User can call iterator.Seek() to
+  // search to the next entry of or after a key. Keys will be iterated in the
+  // order given by index_comparator. For multiple updates on the same key,
+  // each update will be returned as a separate entry, in the order of update
+  // time.
+  //
+  // The returned iterator should be deleted by the caller.
+  WBWIIterator* NewIterator(ColumnFamilyHandle* column_family);
+  // Create an iterator of the default column family.
+  WBWIIterator* NewIterator();
+
+  // Will create a new Iterator that will use WBWIIterator as a delta and
+  // base_iterator as base.
+  //
+  // This function is only supported if the WriteBatchWithIndex was
+  // constructed with overwrite_key=true.
+  //
+  // The returned iterator should be deleted by the caller.
+  // The base_iterator is now 'owned' by the returned iterator. Deleting the
+  // returned iterator will also delete the base_iterator.
+  //
+  // Updating write batch with the current key of the iterator is not safe.
+  // We strongly recommend users not to do it. It will invalidate the current
+  // key() and value() of the iterator. This invalidation happens even before
+  // the write batch update finishes. The state may recover after Next() is
+  // called.
+  Iterator* NewIteratorWithBase(ColumnFamilyHandle* column_family,
+                                Iterator* base_iterator,
+                                const ReadOptions* opts = nullptr);
+  // default column family
+  Iterator* NewIteratorWithBase(Iterator* base_iterator);
+
+  // Similar to DB::Get() but will only read the key from this batch.
+  // If the batch does not have enough data to resolve Merge operations,
+  // MergeInProgress status may be returned.
+  Status GetFromBatch(ColumnFamilyHandle* column_family,
+                      const DBOptions& options, const Slice& key,
+                      std::string* value);
+
+  // Similar to previous function but does not require a column_family.
+  // Note:  An InvalidArgument status will be returned if there are any Merge
+  // operators for this key.  Use previous method instead.
+  Status GetFromBatch(const DBOptions& options, const Slice& key,
+                      std::string* value) {
+    return GetFromBatch(nullptr, options, key, value);
+  }
+
+  // Similar to DB::Get() but will also read writes from this batch.
+  //
+  // This function will query both this batch and the DB and then merge
+  // the results using the DB's merge operator (if the batch contains any
+  // merge requests).
+  //
+  // Setting read_options.snapshot will affect what is read from the DB
+  // but will NOT change which keys are read from the batch (the keys in
+  // this batch do not yet belong to any snapshot and will be fetched
+  // regardless).
+  Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
+                           const Slice& key, std::string* value);
+
+  // An overload of the above method that receives a PinnableSlice
+  Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
+                           const Slice& key, PinnableSlice* value);
+
+  Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           std::string* value);
+
+  // An overload of the above method that receives a PinnableSlice
+  Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           PinnableSlice* value);
+
+  void MultiGetFromBatchAndDB(DB* db, const ReadOptions& read_options,
+                              ColumnFamilyHandle* column_family,
+                              const size_t num_keys, const Slice* keys,
+                              PinnableSlice* values, Status* statuses,
+                              bool sorted_input);
+
+  // Records the state of the batch for future calls to RollbackToSavePoint().
+  // May be called multiple times to set multiple save points.
+  void SetSavePoint() override;
+
+  // Remove all entries in this batch (Put, Merge, Delete, SingleDelete,
+  // PutLogData) since the most recent call to SetSavePoint() and removes the
+  // most recent save point.
+  // If there is no previous call to SetSavePoint(), behaves the same as
+  // Clear().
+  //
+  // Calling RollbackToSavePoint invalidates any open iterators on this batch.
+  //
+  // Returns Status::OK() on success,
+  //         Status::NotFound() if no previous call to SetSavePoint(),
+  //         or other Status on corruption.
+  Status RollbackToSavePoint() override;
+
+  // Pop the most recent save point.
+  // If there is no previous call to SetSavePoint(), Status::NotFound()
+  // will be returned.
+  // Otherwise returns Status::OK().
+  Status PopSavePoint() override;
+
+  void SetMaxBytes(size_t max_bytes) override;
+  size_t GetDataSize() const;
+
+ private:
+  friend class PessimisticTransactionDB;
+  friend class WritePreparedTxn;
+  friend class WriteUnpreparedTxn;
+  friend class WriteBatchWithIndex_SubBatchCnt_Test;
+  friend class WriteBatchWithIndexInternal;
+  // Returns the number of sub-batches inside the write batch. A sub-batch
+  // starts right before inserting a key that is a duplicate of a key in the
+  // last sub-batch.
+  size_t SubBatchCnt();
+
+  Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           PinnableSlice* value, ReadCallback* callback);
+  void MultiGetFromBatchAndDB(DB* db, const ReadOptions& read_options,
+                              ColumnFamilyHandle* column_family,
+                              const size_t num_keys, const Slice* keys,
+                              PinnableSlice* values, Status* statuses,
+                              bool sorted_input, ReadCallback* callback);
+  struct Rep;
+  std::unique_ptr<Rep> rep;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/version.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/version.h
new file mode 100644
index 0000000000..5cfa477f6a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/version.h
@@ -0,0 +1,43 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <string>
+#include <unordered_map>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+// NOTE: in 'main' development branch, this should be the *next*
+// minor or major version number planned for release.
+#define ROCKSDB_MAJOR 8
+#define ROCKSDB_MINOR 3
+#define ROCKSDB_PATCH 2
+
+// Do not use these. We made the mistake of declaring macros starting with
+// double underscore. Now we have to live with our choice. We'll deprecate these
+// at some point
+#define __ROCKSDB_MAJOR__ ROCKSDB_MAJOR
+#define __ROCKSDB_MINOR__ ROCKSDB_MINOR
+#define __ROCKSDB_PATCH__ ROCKSDB_PATCH
+
+namespace ROCKSDB_NAMESPACE {
+// Returns a set of properties indicating how/when/where this version of RocksDB
+// was created.
+const std::unordered_map<std::string, std::string>& GetRocksBuildProperties();
+
+// Returns the current version of RocksDB as a string (e.g. "6.16.0").
+// If with_patch is true, the patch is included (6.16.x).
+// Otherwise, only major and minor version is included (6.16)
+std::string GetRocksVersionAsString(bool with_patch = true);
+
+// Gets the set of build properties (@see GetRocksBuildProperties) into a
+// string. Properties are returned one-per-line, with the first line being:
+// "<program> from RocksDB <version>.
+// If verbose is true, the full set of properties is
+// printed. If verbose is false, only the version information (@see
+// GetRocksVersionString) is printed.
+std::string GetRocksBuildInfoAsString(const std::string& program,
+                                      bool verbose = false);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/wal_filter.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/wal_filter.h
new file mode 100644
index 0000000000..3e66c39e46
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/wal_filter.h
@@ -0,0 +1,111 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <map>
+#include <string>
+
+#include "rocksdb/customizable.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class WriteBatch;
+struct ConfigOptions;
+
+// WALFilter allows an application to inspect write-ahead-log (WAL)
+// records or modify their processing on recovery.
+// Please see the details below.
+//
+// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
+// because RocksDB is not exception-safe. This could cause undefined behavior
+// including data loss, unreported corruption, deadlocks, and more.
+class WalFilter : public Customizable {
+ public:
+  static const char* Type() { return "WalFilter"; }
+  static Status CreateFromString(const ConfigOptions& options,
+                                 const std::string& value, WalFilter** result);
+  enum class WalProcessingOption {
+    // Continue processing as usual
+    kContinueProcessing = 0,
+    // Ignore the current record but continue processing of log(s)
+    kIgnoreCurrentRecord = 1,
+    // Stop replay of logs and discard logs
+    // Logs won't be replayed on subsequent recovery
+    kStopReplay = 2,
+    // Corrupted record detected by filter
+    kCorruptedRecord = 3,
+    // Marker for enum count
+    kWalProcessingOptionMax = 4
+  };
+
+  virtual ~WalFilter() {}
+
+  // Provide ColumnFamily->LogNumber map to filter
+  // so that filter can determine whether a log number applies to a given
+  // column family (i.e. that log hasn't been flushed to SST already for the
+  // column family).
+  // We also pass in name->id map as only name is known during
+  // recovery (as handles are opened post-recovery).
+  // while write batch callbacks happen in terms of column family id.
+  //
+  // @params cf_lognumber_map column_family_id to lognumber map
+  // @params cf_name_id_map   column_family_name to column_family_id map
+
+  virtual void ColumnFamilyLogNumberMap(
+      const std::map<uint32_t, uint64_t>& /*cf_lognumber_map*/,
+      const std::map<std::string, uint32_t>& /*cf_name_id_map*/) {}
+
+  // LogRecord is invoked for each log record encountered for all the logs
+  // during replay on logs on recovery. This method can be used to:
+  //  * inspect the record (using the batch parameter)
+  //  * ignoring current record
+  //    (by returning WalProcessingOption::kIgnoreCurrentRecord)
+  //  * reporting corrupted record
+  //    (by returning WalProcessingOption::kCorruptedRecord)
+  //  * stop log replay
+  //    (by returning kStop replay) - please note that this implies
+  //    discarding the logs from current record onwards.
+  //
+  // @params log_number     log_number of the current log.
+  //                        Filter might use this to determine if the log
+  //                        record is applicable to a certain column family.
+  // @params log_file_name  log file name - only for informational purposes
+  // @params batch          batch encountered in the log during recovery
+  // @params new_batch      new_batch to populate if filter wants to change
+  //                        the batch (for example to filter some records out,
+  //                        or alter some records).
+  //                        Please note that the new batch MUST NOT contain
+  //                        more records than original, else recovery would
+  //                        be failed.
+  // @params batch_changed  Whether batch was changed by the filter.
+  //                        It must be set to true if new_batch was populated,
+  //                        else new_batch has no effect.
+  // @returns               Processing option for the current record.
+  //                        Please see WalProcessingOption enum above for
+  //                        details.
+  virtual WalProcessingOption LogRecordFound(
+      unsigned long long /*log_number*/, const std::string& /*log_file_name*/,
+      const WriteBatch& batch, WriteBatch* new_batch, bool* batch_changed) {
+    // Default implementation falls back to older function for compatibility
+    return LogRecord(batch, new_batch, batch_changed);
+  }
+
+  // Please see the comments for LogRecord above. This function is for
+  // compatibility only and contains a subset of parameters.
+  // New code should use the function above.
+  virtual WalProcessingOption LogRecord(const WriteBatch& /*batch*/,
+                                        WriteBatch* /*new_batch*/,
+                                        bool* /*batch_changed*/) const {
+    return WalProcessingOption::kContinueProcessing;
+  }
+
+  // Returns a name that identifies this WAL filter.
+  // The name will be printed to LOG file on start up for diagnosis.
+  virtual const char* Name() const override = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/wide_columns.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/wide_columns.h
new file mode 100644
index 0000000000..5af3f51de8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/wide_columns.h
@@ -0,0 +1,210 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <ostream>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Class representing a wide column, which is defined as a pair of column name
+// and column value.
+class WideColumn {
+ public:
+  WideColumn() = default;
+
+  // Initializes a WideColumn object by forwarding the name and value
+  // arguments to the corresponding member Slices. This makes it possible to
+  // construct a WideColumn using combinations of const char*, const
+  // std::string&, const Slice& etc., for example:
+  //
+  // constexpr char foo[] = "foo";
+  // const std::string bar("bar");
+  // WideColumn column(foo, bar);
+  template <typename N, typename V>
+  WideColumn(N&& name, V&& value)
+      : name_(std::forward<N>(name)), value_(std::forward<V>(value)) {}
+
+  // Initializes a WideColumn object by forwarding the elements of
+  // name_tuple and value_tuple to the constructors of the corresponding member
+  // Slices. This makes it possible to initialize the Slices using the Slice
+  // constructors that take more than one argument, for example:
+  //
+  // constexpr char foo_name[] = "foo_name";
+  // constexpr char bar_value[] = "bar_value";
+  // WideColumn column(std::piecewise_construct,
+  //                   std::forward_as_tuple(foo_name, 3),
+  //                   std::forward_as_tuple(bar_value, 3));
+  template <typename NTuple, typename VTuple>
+  WideColumn(std::piecewise_construct_t, NTuple&& name_tuple,
+             VTuple&& value_tuple)
+      : name_(std::make_from_tuple<Slice>(std::forward<NTuple>(name_tuple))),
+        value_(std::make_from_tuple<Slice>(std::forward<VTuple>(value_tuple))) {
+  }
+
+  const Slice& name() const { return name_; }
+  const Slice& value() const { return value_; }
+
+  Slice& name() { return name_; }
+  Slice& value() { return value_; }
+
+ private:
+  Slice name_;
+  Slice value_;
+};
+
+// Note: column names and values are compared bytewise.
+inline bool operator==(const WideColumn& lhs, const WideColumn& rhs) {
+  return lhs.name() == rhs.name() && lhs.value() == rhs.value();
+}
+
+inline bool operator!=(const WideColumn& lhs, const WideColumn& rhs) {
+  return !(lhs == rhs);
+}
+
+inline std::ostream& operator<<(std::ostream& os, const WideColumn& column) {
+  const bool hex =
+      (os.flags() & std::ios_base::basefield) == std::ios_base::hex;
+  os << column.name().ToString(hex) << ':' << column.value().ToString(hex);
+
+  return os;
+}
+
+// A collection of wide columns.
+using WideColumns = std::vector<WideColumn>;
+
+// The anonymous default wide column (an empty Slice).
+extern const Slice kDefaultWideColumnName;
+
+// An empty set of wide columns.
+extern const WideColumns kNoWideColumns;
+
+// A self-contained collection of wide columns. Used for the results of
+// wide-column queries.
+class PinnableWideColumns {
+ public:
+  const WideColumns& columns() const { return columns_; }
+  size_t serialized_size() const { return value_.size(); }
+
+  void SetPlainValue(const Slice& value);
+  void SetPlainValue(const Slice& value, Cleanable* cleanable);
+  void SetPlainValue(PinnableSlice&& value);
+  void SetPlainValue(std::string&& value);
+
+  Status SetWideColumnValue(const Slice& value);
+  Status SetWideColumnValue(const Slice& value, Cleanable* cleanable);
+  Status SetWideColumnValue(PinnableSlice&& value);
+  Status SetWideColumnValue(std::string&& value);
+
+  void Reset();
+
+ private:
+  void CopyValue(const Slice& value);
+  void PinOrCopyValue(const Slice& value, Cleanable* cleanable);
+  void MoveValue(PinnableSlice&& value);
+  void MoveValue(std::string&& value);
+
+  void CreateIndexForPlainValue();
+  Status CreateIndexForWideColumns();
+
+  PinnableSlice value_;
+  WideColumns columns_;
+};
+
+inline void PinnableWideColumns::CopyValue(const Slice& value) {
+  value_.PinSelf(value);
+}
+
+inline void PinnableWideColumns::PinOrCopyValue(const Slice& value,
+                                                Cleanable* cleanable) {
+  if (!cleanable) {
+    CopyValue(value);
+    return;
+  }
+
+  value_.PinSlice(value, cleanable);
+}
+
+inline void PinnableWideColumns::MoveValue(PinnableSlice&& value) {
+  value_ = std::move(value);
+}
+
+inline void PinnableWideColumns::MoveValue(std::string&& value) {
+  std::string* const buf = value_.GetSelf();
+  assert(buf);
+
+  *buf = std::move(value);
+  value_.PinSelf();
+}
+
+inline void PinnableWideColumns::CreateIndexForPlainValue() {
+  columns_ = WideColumns{{kDefaultWideColumnName, value_}};
+}
+
+inline void PinnableWideColumns::SetPlainValue(const Slice& value) {
+  CopyValue(value);
+  CreateIndexForPlainValue();
+}
+
+inline void PinnableWideColumns::SetPlainValue(const Slice& value,
+                                               Cleanable* cleanable) {
+  PinOrCopyValue(value, cleanable);
+  CreateIndexForPlainValue();
+}
+
+inline void PinnableWideColumns::SetPlainValue(PinnableSlice&& value) {
+  MoveValue(std::move(value));
+  CreateIndexForPlainValue();
+}
+
+inline void PinnableWideColumns::SetPlainValue(std::string&& value) {
+  MoveValue(std::move(value));
+  CreateIndexForPlainValue();
+}
+
+inline Status PinnableWideColumns::SetWideColumnValue(const Slice& value) {
+  CopyValue(value);
+  return CreateIndexForWideColumns();
+}
+
+inline Status PinnableWideColumns::SetWideColumnValue(const Slice& value,
+                                                      Cleanable* cleanable) {
+  PinOrCopyValue(value, cleanable);
+  return CreateIndexForWideColumns();
+}
+
+inline Status PinnableWideColumns::SetWideColumnValue(PinnableSlice&& value) {
+  MoveValue(std::move(value));
+  return CreateIndexForWideColumns();
+}
+
+inline Status PinnableWideColumns::SetWideColumnValue(std::string&& value) {
+  MoveValue(std::move(value));
+  return CreateIndexForWideColumns();
+}
+
+inline void PinnableWideColumns::Reset() {
+  value_.Reset();
+  columns_.clear();
+}
+
+inline bool operator==(const PinnableWideColumns& lhs,
+                       const PinnableWideColumns& rhs) {
+  return lhs.columns() == rhs.columns();
+}
+
+inline bool operator!=(const PinnableWideColumns& lhs,
+                       const PinnableWideColumns& rhs) {
+  return !(lhs == rhs);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/write_batch.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/write_batch.h
new file mode 100644
index 0000000000..61ba5a7391
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/write_batch.h
@@ -0,0 +1,494 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// WriteBatch holds a collection of updates to apply atomically to a DB.
+//
+// The updates are applied in the order in which they are added
+// to the WriteBatch.  For example, the value of "key" will be "v3"
+// after the following batch is written:
+//
+//    batch.Put("key", "v1");
+//    batch.Delete("key");
+//    batch.Put("key", "v2");
+//    batch.Put("key", "v3");
+//
+// Multiple threads can invoke const methods on a WriteBatch without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same WriteBatch must use
+// external synchronization.
+
+#pragma once
+
+#include <stdint.h>
+
+#include <atomic>
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/status.h"
+#include "rocksdb/write_batch_base.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+class ColumnFamilyHandle;
+struct SavePoints;
+struct SliceParts;
+
+struct SavePoint {
+  size_t size;  // size of rep_
+  int count;    // count of elements in rep_
+  uint32_t content_flags;
+
+  SavePoint() : size(0), count(0), content_flags(0) {}
+
+  SavePoint(size_t _size, int _count, uint32_t _flags)
+      : size(_size), count(_count), content_flags(_flags) {}
+
+  void clear() {
+    size = 0;
+    count = 0;
+    content_flags = 0;
+  }
+
+  bool is_cleared() const { return (size | count | content_flags) == 0; }
+};
+
+class WriteBatch : public WriteBatchBase {
+ public:
+  explicit WriteBatch(size_t reserved_bytes = 0, size_t max_bytes = 0)
+      : WriteBatch(reserved_bytes, max_bytes, 0, 0) {}
+
+  // `protection_bytes_per_key` is the number of bytes used to store
+  // protection information for each key entry. Currently supported values are
+  // zero (disabled) and eight.
+  explicit WriteBatch(size_t reserved_bytes, size_t max_bytes,
+                      size_t protection_bytes_per_key, size_t default_cf_ts_sz);
+  ~WriteBatch() override;
+
+  using WriteBatchBase::Put;
+  // Store the mapping "key->value" in the database.
+  // The following Put(..., const Slice& key, ...) API can also be used when
+  // user-defined timestamp is enabled as long as `key` points to a contiguous
+  // buffer with timestamp appended after user key. The caller is responsible
+  // for setting up the memory buffer pointed to by `key`.
+  Status Put(ColumnFamilyHandle* column_family, const Slice& key,
+             const Slice& value) override;
+  Status Put(const Slice& key, const Slice& value) override {
+    return Put(nullptr, key, value);
+  }
+  Status Put(ColumnFamilyHandle* column_family, const Slice& key,
+             const Slice& ts, const Slice& value) override;
+
+  // Variant of Put() that gathers output like writev(2).  The key and value
+  // that will be written to the database are concatenations of arrays of
+  // slices.
+  // The following Put(..., const SliceParts& key, ...) API can be used when
+  // user-defined timestamp is enabled as long as the timestamp is the last
+  // Slice in `key`, a SliceParts (array of Slices). The caller is responsible
+  // for setting up the `key` SliceParts object.
+  Status Put(ColumnFamilyHandle* column_family, const SliceParts& key,
+             const SliceParts& value) override;
+  Status Put(const SliceParts& key, const SliceParts& value) override {
+    return Put(nullptr, key, value);
+  }
+
+  // Store the mapping "key->{column1:value1, column2:value2, ...}" in the
+  // column family specified by "column_family".
+  using WriteBatchBase::PutEntity;
+  Status PutEntity(ColumnFamilyHandle* column_family, const Slice& key,
+                   const WideColumns& columns) override;
+
+  using WriteBatchBase::Delete;
+  // If the database contains a mapping for "key", erase it.  Else do nothing.
+  // The following Delete(..., const Slice& key) can be used when user-defined
+  // timestamp is enabled as long as `key` points to a contiguous buffer with
+  // timestamp appended after user key. The caller is responsible for setting
+  // up the memory buffer pointed to by `key`.
+  Status Delete(ColumnFamilyHandle* column_family, const Slice& key) override;
+  Status Delete(const Slice& key) override { return Delete(nullptr, key); }
+  Status Delete(ColumnFamilyHandle* column_family, const Slice& key,
+                const Slice& ts) override;
+
+  // variant that takes SliceParts
+  // These two variants of Delete(..., const SliceParts& key) can be used when
+  // user-defined timestamp is enabled as long as the timestamp is the last
+  // Slice in `key`, a SliceParts (array of Slices). The caller is responsible
+  // for setting up the `key` SliceParts object.
+  Status Delete(ColumnFamilyHandle* column_family,
+                const SliceParts& key) override;
+  Status Delete(const SliceParts& key) override { return Delete(nullptr, key); }
+
+  using WriteBatchBase::SingleDelete;
+  // WriteBatch implementation of DB::SingleDelete().  See db.h.
+  Status SingleDelete(ColumnFamilyHandle* column_family,
+                      const Slice& key) override;
+  Status SingleDelete(const Slice& key) override {
+    return SingleDelete(nullptr, key);
+  }
+  Status SingleDelete(ColumnFamilyHandle* column_family, const Slice& key,
+                      const Slice& ts) override;
+
+  // variant that takes SliceParts
+  Status SingleDelete(ColumnFamilyHandle* column_family,
+                      const SliceParts& key) override;
+  Status SingleDelete(const SliceParts& key) override {
+    return SingleDelete(nullptr, key);
+  }
+
+  using WriteBatchBase::DeleteRange;
+  // WriteBatch implementation of DB::DeleteRange().  See db.h.
+  Status DeleteRange(ColumnFamilyHandle* column_family, const Slice& begin_key,
+                     const Slice& end_key) override;
+  Status DeleteRange(const Slice& begin_key, const Slice& end_key) override {
+    return DeleteRange(nullptr, begin_key, end_key);
+  }
+  // begin_key and end_key should be user keys without timestamp.
+  Status DeleteRange(ColumnFamilyHandle* column_family, const Slice& begin_key,
+                     const Slice& end_key, const Slice& ts) override;
+
+  // variant that takes SliceParts
+  Status DeleteRange(ColumnFamilyHandle* column_family,
+                     const SliceParts& begin_key,
+                     const SliceParts& end_key) override;
+  Status DeleteRange(const SliceParts& begin_key,
+                     const SliceParts& end_key) override {
+    return DeleteRange(nullptr, begin_key, end_key);
+  }
+
+  using WriteBatchBase::Merge;
+  // Merge "value" with the existing value of "key" in the database.
+  // "key->merge(existing, value)"
+  Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
+               const Slice& value) override;
+  Status Merge(const Slice& key, const Slice& value) override {
+    return Merge(nullptr, key, value);
+  }
+  Status Merge(ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/,
+               const Slice& /*ts*/, const Slice& /*value*/) override;
+
+  // variant that takes SliceParts
+  Status Merge(ColumnFamilyHandle* column_family, const SliceParts& key,
+               const SliceParts& value) override;
+  Status Merge(const SliceParts& key, const SliceParts& value) override {
+    return Merge(nullptr, key, value);
+  }
+
+  using WriteBatchBase::PutLogData;
+  // Append a blob of arbitrary size to the records in this batch. The blob will
+  // be stored in the transaction log but not in any other file. In particular,
+  // it will not be persisted to the SST files. When iterating over this
+  // WriteBatch, WriteBatch::Handler::LogData will be called with the contents
+  // of the blob as it is encountered. Blobs, puts, deletes, and merges will be
+  // encountered in the same order in which they were inserted. The blob will
+  // NOT consume sequence number(s) and will NOT increase the count of the batch
+  //
+  // Example application: add timestamps to the transaction log for use in
+  // replication.
+  Status PutLogData(const Slice& blob) override;
+
+  using WriteBatchBase::Clear;
+  // Clear all updates buffered in this batch.
+  void Clear() override;
+
+  // Records the state of the batch for future calls to RollbackToSavePoint().
+  // May be called multiple times to set multiple save points.
+  void SetSavePoint() override;
+
+  // Remove all entries in this batch (Put, Merge, Delete, PutLogData) since the
+  // most recent call to SetSavePoint() and removes the most recent save point.
+  // If there is no previous call to SetSavePoint(), Status::NotFound()
+  // will be returned.
+  // Otherwise returns Status::OK().
+  Status RollbackToSavePoint() override;
+
+  // Pop the most recent save point.
+  // If there is no previous call to SetSavePoint(), Status::NotFound()
+  // will be returned.
+  // Otherwise returns Status::OK().
+  Status PopSavePoint() override;
+
+  // Support for iterating over the contents of a batch.
+  // Objects of subclasses of Handler will be used by WriteBatch::Iterate().
+  class Handler {
+   public:
+    virtual ~Handler();
+    // All handler functions in this class provide default implementations so
+    // we won't break existing clients of Handler on a source code level when
+    // adding a new member function.
+
+    // default implementation will just call Put without column family for
+    // backwards compatibility. If the column family is not default,
+    // the function is noop
+    // If user-defined timestamp is enabled, then `key` includes timestamp.
+    virtual Status PutCF(uint32_t column_family_id, const Slice& key,
+                         const Slice& value) {
+      if (column_family_id == 0) {
+        // Put() historically doesn't return status. We didn't want to be
+        // backwards incompatible so we didn't change the return status
+        // (this is a public API). We do an ordinary get and return Status::OK()
+        Put(key, value);
+        return Status::OK();
+      }
+      return Status::InvalidArgument(
+          "non-default column family and PutCF not implemented");
+    }
+    // If user-defined timestamp is enabled, then `key` includes timestamp.
+    virtual void Put(const Slice& /*key*/, const Slice& /*value*/) {}
+
+    // If user-defined timestamp is enabled, then `key` includes timestamp.
+    virtual Status PutEntityCF(uint32_t /* column_family_id */,
+                               const Slice& /* key */,
+                               const Slice& /* entity */) {
+      return Status::NotSupported("PutEntityCF not implemented");
+    }
+
+    // If user-defined timestamp is enabled, then `key` includes timestamp.
+    virtual Status DeleteCF(uint32_t column_family_id, const Slice& key) {
+      if (column_family_id == 0) {
+        Delete(key);
+        return Status::OK();
+      }
+      return Status::InvalidArgument(
+          "non-default column family and DeleteCF not implemented");
+    }
+    // If user-defined timestamp is enabled, then `key` includes timestamp.
+    virtual void Delete(const Slice& /*key*/) {}
+
+    // If user-defined timestamp is enabled, then `key` includes timestamp.
+    virtual Status SingleDeleteCF(uint32_t column_family_id, const Slice& key) {
+      if (column_family_id == 0) {
+        SingleDelete(key);
+        return Status::OK();
+      }
+      return Status::InvalidArgument(
+          "non-default column family and SingleDeleteCF not implemented");
+    }
+    // If user-defined timestamp is enabled, then `key` includes timestamp.
+    virtual void SingleDelete(const Slice& /*key*/) {}
+
+    // If user-defined timestamp is enabled, then `begin_key` and `end_key`
+    // both include timestamp.
+    virtual Status DeleteRangeCF(uint32_t /*column_family_id*/,
+                                 const Slice& /*begin_key*/,
+                                 const Slice& /*end_key*/) {
+      return Status::InvalidArgument("DeleteRangeCF not implemented");
+    }
+
+    // If user-defined timestamp is enabled, then `key` includes timestamp.
+    virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
+                           const Slice& value) {
+      if (column_family_id == 0) {
+        Merge(key, value);
+        return Status::OK();
+      }
+      return Status::InvalidArgument(
+          "non-default column family and MergeCF not implemented");
+    }
+    // If user-defined timestamp is enabled, then `key` includes timestamp.
+    virtual void Merge(const Slice& /*key*/, const Slice& /*value*/) {}
+
+    // If user-defined timestamp is enabled, then `key` includes timestamp.
+    virtual Status PutBlobIndexCF(uint32_t /*column_family_id*/,
+                                  const Slice& /*key*/,
+                                  const Slice& /*value*/) {
+      return Status::InvalidArgument("PutBlobIndexCF not implemented");
+    }
+
+    // The default implementation of LogData does nothing.
+    virtual void LogData(const Slice& blob);
+
+    virtual Status MarkBeginPrepare(bool = false) {
+      return Status::InvalidArgument("MarkBeginPrepare() handler not defined.");
+    }
+
+    virtual Status MarkEndPrepare(const Slice& /*xid*/) {
+      return Status::InvalidArgument("MarkEndPrepare() handler not defined.");
+    }
+
+    virtual Status MarkNoop(bool /*empty_batch*/) {
+      return Status::InvalidArgument("MarkNoop() handler not defined.");
+    }
+
+    virtual Status MarkRollback(const Slice& /*xid*/) {
+      return Status::InvalidArgument(
+          "MarkRollbackPrepare() handler not defined.");
+    }
+
+    virtual Status MarkCommit(const Slice& /*xid*/) {
+      return Status::InvalidArgument("MarkCommit() handler not defined.");
+    }
+
+    virtual Status MarkCommitWithTimestamp(const Slice& /*xid*/,
+                                           const Slice& /*commit_ts*/) {
+      return Status::InvalidArgument(
+          "MarkCommitWithTimestamp() handler not defined.");
+    }
+
+    // Continue is called by WriteBatch::Iterate. If it returns false,
+    // iteration is halted. Otherwise, it continues iterating. The default
+    // implementation always returns true.
+    virtual bool Continue();
+
+   protected:
+    friend class WriteBatchInternal;
+    enum class OptionState {
+      kUnknown,
+      kDisabled,
+      kEnabled,
+    };
+    virtual OptionState WriteAfterCommit() const {
+      return OptionState::kUnknown;
+    }
+    virtual OptionState WriteBeforePrepare() const {
+      return OptionState::kUnknown;
+    }
+  };
+  Status Iterate(Handler* handler) const;
+
+  // Retrieve the serialized version of this batch.
+  const std::string& Data() const { return rep_; }
+
+  // Retrieve data size of the batch.
+  size_t GetDataSize() const { return rep_.size(); }
+
+  // Returns the number of updates in the batch
+  uint32_t Count() const;
+
+  // Returns true if PutCF will be called during Iterate
+  bool HasPut() const;
+
+  // Returns true if PutEntityCF will be called during Iterate
+  bool HasPutEntity() const;
+
+  // Returns true if DeleteCF will be called during Iterate
+  bool HasDelete() const;
+
+  // Returns true if SingleDeleteCF will be called during Iterate
+  bool HasSingleDelete() const;
+
+  // Returns true if DeleteRangeCF will be called during Iterate
+  bool HasDeleteRange() const;
+
+  // Returns true if MergeCF will be called during Iterate
+  bool HasMerge() const;
+
+  // Returns true if MarkBeginPrepare will be called during Iterate
+  bool HasBeginPrepare() const;
+
+  // Returns true if MarkEndPrepare will be called during Iterate
+  bool HasEndPrepare() const;
+
+  // Returns true if MarkCommit will be called during Iterate
+  bool HasCommit() const;
+
+  // Returns true if MarkRollback will be called during Iterate
+  bool HasRollback() const;
+
+  // Experimental.
+  //
+  // Update timestamps of existing entries in the write batch if
+  // applicable. If a key is intended for a column family that disables
+  // timestamp, then this API won't set the timestamp for this key.
+  // This requires that all keys, if enable timestamp, (possibly from multiple
+  // column families) in the write batch have timestamps of the same format.
+  //
+  // ts_sz_func: callable object to obtain the timestamp sizes of column
+  // families. If ts_sz_func() accesses data structures, then the caller of this
+  // API must guarantee thread-safety. Like other parts of RocksDB, this API is
+  // not exception-safe. Therefore, ts_sz_func() must not throw.
+  //
+  // in: cf, the column family id.
+  // ret: timestamp size of the given column family. Return
+  //      std::numeric_limits<size_t>::max() indicating "don't know or column
+  //      family info not found", this will cause UpdateTimestamps() to fail.
+  // size_t ts_sz_func(uint32_t cf);
+  Status UpdateTimestamps(const Slice& ts,
+                          std::function<size_t(uint32_t /*cf*/)> ts_sz_func);
+
+  // Verify the per-key-value checksums of this write batch.
+  // Corruption status will be returned if the verification fails.
+  // If this write batch does not have per-key-value checksum,
+  // OK status will be returned.
+  Status VerifyChecksum() const;
+
+  using WriteBatchBase::GetWriteBatch;
+  WriteBatch* GetWriteBatch() override { return this; }
+
+  // Constructor with a serialized string object
+  explicit WriteBatch(const std::string& rep);
+  explicit WriteBatch(std::string&& rep);
+
+  WriteBatch(const WriteBatch& src);
+  WriteBatch(WriteBatch&& src) noexcept;
+  WriteBatch& operator=(const WriteBatch& src);
+  WriteBatch& operator=(WriteBatch&& src);
+
+  // marks this point in the WriteBatch as the last record to
+  // be inserted into the WAL, provided the WAL is enabled
+  void MarkWalTerminationPoint();
+  const SavePoint& GetWalTerminationPoint() const { return wal_term_point_; }
+
+  void SetMaxBytes(size_t max_bytes) override { max_bytes_ = max_bytes; }
+
+  struct ProtectionInfo;
+  size_t GetProtectionBytesPerKey() const;
+
+ private:
+  friend class WriteBatchInternal;
+  friend class LocalSavePoint;
+  // TODO(myabandeh): this is needed for a hack to collapse the write batch and
+  // remove duplicate keys. Remove it when the hack is replaced with a proper
+  // solution.
+  friend class WriteBatchWithIndex;
+  std::unique_ptr<SavePoints> save_points_;
+
+  // When sending a WriteBatch through WriteImpl we might want to
+  // specify that only the first x records of the batch be written to
+  // the WAL.
+  SavePoint wal_term_point_;
+
+  // Is the content of the batch the application's latest state that meant only
+  // to be used for recovery? Refer to
+  // TransactionOptions::use_only_the_last_commit_time_batch_for_recovery for
+  // more details.
+  bool is_latest_persistent_state_ = false;
+
+  // False if all keys are from column families that disable user-defined
+  // timestamp OR UpdateTimestamps() has been called at least once.
+  // This flag will be set to true if any of the above Put(), Delete(),
+  // SingleDelete(), etc. APIs are called at least once.
+  // Calling Put(ts), Delete(ts), SingleDelete(ts), etc. will not set this flag
+  // to true because the assumption is that these APIs have already set the
+  // timestamps to desired values.
+  bool needs_in_place_update_ts_ = false;
+
+  // True if the write batch contains at least one key from a column family
+  // that enables user-defined timestamp.
+  bool has_key_with_ts_ = false;
+
+  // For HasXYZ.  Mutable to allow lazy computation of results
+  mutable std::atomic<uint32_t> content_flags_;
+
+  // Performs deferred computation of content_flags if necessary
+  uint32_t ComputeContentFlags() const;
+
+  // Maximum size of rep_.
+  size_t max_bytes_;
+
+  std::unique_ptr<ProtectionInfo> prot_info_;
+
+  size_t default_cf_ts_sz_ = 0;
+
+ protected:
+  std::string rep_;  // See comment in write_batch.cc for the format of rep_
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/write_batch_base.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/write_batch_base.h
new file mode 100644
index 0000000000..f6f39ef0be
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/write_batch_base.h
@@ -0,0 +1,144 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <cstddef>
+
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/wide_columns.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+class Status;
+class ColumnFamilyHandle;
+class WriteBatch;
+struct SliceParts;
+
+// Abstract base class that defines the basic interface for a write batch.
+// See WriteBatch for a basic implementation and WrithBatchWithIndex for an
+// indexed implementation.
+class WriteBatchBase {
+ public:
+  virtual ~WriteBatchBase() {}
+
+  // Store the mapping "key->value" in the database.
+  virtual Status Put(ColumnFamilyHandle* column_family, const Slice& key,
+                     const Slice& value) = 0;
+  virtual Status Put(const Slice& key, const Slice& value) = 0;
+  virtual Status Put(ColumnFamilyHandle* column_family, const Slice& key,
+                     const Slice& ts, const Slice& value) = 0;
+
+  // Variant of Put() that gathers output like writev(2).  The key and value
+  // that will be written to the database are concatenations of arrays of
+  // slices.
+  virtual Status Put(ColumnFamilyHandle* column_family, const SliceParts& key,
+                     const SliceParts& value);
+  virtual Status Put(const SliceParts& key, const SliceParts& value);
+
+  // Store the mapping "key->{column1:value1, column2:value2, ...}" in the
+  // column family specified by "column_family".
+  virtual Status PutEntity(ColumnFamilyHandle* column_family, const Slice& key,
+                           const WideColumns& columns) = 0;
+
+  // Merge "value" with the existing value of "key" in the database.
+  // "key->merge(existing, value)"
+  virtual Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
+                       const Slice& value) = 0;
+  virtual Status Merge(const Slice& key, const Slice& value) = 0;
+  virtual Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
+                       const Slice& ts, const Slice& value) = 0;
+
+  // variant that takes SliceParts
+  virtual Status Merge(ColumnFamilyHandle* column_family, const SliceParts& key,
+                       const SliceParts& value);
+  virtual Status Merge(const SliceParts& key, const SliceParts& value);
+
+  // If the database contains a mapping for "key", erase it.  Else do nothing.
+  virtual Status Delete(ColumnFamilyHandle* column_family,
+                        const Slice& key) = 0;
+  virtual Status Delete(const Slice& key) = 0;
+  virtual Status Delete(ColumnFamilyHandle* column_family, const Slice& key,
+                        const Slice& ts) = 0;
+
+  // variant that takes SliceParts
+  virtual Status Delete(ColumnFamilyHandle* column_family,
+                        const SliceParts& key);
+  virtual Status Delete(const SliceParts& key);
+
+  // If the database contains a mapping for "key", erase it. Expects that the
+  // key was not overwritten. Else do nothing.
+  virtual Status SingleDelete(ColumnFamilyHandle* column_family,
+                              const Slice& key) = 0;
+  virtual Status SingleDelete(const Slice& key) = 0;
+  virtual Status SingleDelete(ColumnFamilyHandle* column_family,
+                              const Slice& key, const Slice& ts) = 0;
+
+  // variant that takes SliceParts
+  virtual Status SingleDelete(ColumnFamilyHandle* column_family,
+                              const SliceParts& key);
+  virtual Status SingleDelete(const SliceParts& key);
+
+  // If the database contains mappings in the range ["begin_key", "end_key"),
+  // erase them. Else do nothing.
+  virtual Status DeleteRange(ColumnFamilyHandle* column_family,
+                             const Slice& begin_key, const Slice& end_key) = 0;
+  virtual Status DeleteRange(const Slice& begin_key, const Slice& end_key) = 0;
+  virtual Status DeleteRange(ColumnFamilyHandle* column_family,
+                             const Slice& begin_key, const Slice& end_key,
+                             const Slice& ts) = 0;
+
+  // variant that takes SliceParts
+  virtual Status DeleteRange(ColumnFamilyHandle* column_family,
+                             const SliceParts& begin_key,
+                             const SliceParts& end_key);
+  virtual Status DeleteRange(const SliceParts& begin_key,
+                             const SliceParts& end_key);
+
+  // Append a blob of arbitrary size to the records in this batch. The blob will
+  // be stored in the transaction log but not in any other file. In particular,
+  // it will not be persisted to the SST files. When iterating over this
+  // WriteBatch, WriteBatch::Handler::LogData will be called with the contents
+  // of the blob as it is encountered. Blobs, puts, deletes, and merges will be
+  // encountered in the same order in which they were inserted. The blob will
+  // NOT consume sequence number(s) and will NOT increase the count of the batch
+  //
+  // Example application: add timestamps to the transaction log for use in
+  // replication.
+  virtual Status PutLogData(const Slice& blob) = 0;
+
+  // Clear all updates buffered in this batch.
+  virtual void Clear() = 0;
+
+  // Covert this batch into a WriteBatch.  This is an abstracted way of
+  // converting any WriteBatchBase(eg WriteBatchWithIndex) into a basic
+  // WriteBatch.
+  virtual WriteBatch* GetWriteBatch() = 0;
+
+  // Records the state of the batch for future calls to RollbackToSavePoint().
+  // May be called multiple times to set multiple save points.
+  virtual void SetSavePoint() = 0;
+
+  // Remove all entries in this batch (Put, Merge, Delete, PutLogData) since the
+  // most recent call to SetSavePoint() and removes the most recent save point.
+  // If there is no previous call to SetSavePoint(), behaves the same as
+  // Clear().
+  virtual Status RollbackToSavePoint() = 0;
+
+  // Pop the most recent save point.
+  // If there is no previous call to SetSavePoint(), Status::NotFound()
+  // will be returned.
+  // Otherwise returns Status::OK().
+  virtual Status PopSavePoint() = 0;
+
+  // Sets the maximum size of the write batch in bytes. 0 means no limit.
+  virtual void SetMaxBytes(size_t max_bytes) = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/write_buffer_manager.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/write_buffer_manager.h
new file mode 100644
index 0000000000..61e75c8888
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/include/rocksdb/write_buffer_manager.h
@@ -0,0 +1,183 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// WriteBufferManager is for managing memory allocation for one or more
+// MemTables.
+
+#pragma once
+
+#include <atomic>
+#include <condition_variable>
+#include <cstddef>
+#include <list>
+#include <mutex>
+
+#include "rocksdb/cache.h"
+
+namespace ROCKSDB_NAMESPACE {
+class CacheReservationManager;
+
+// Interface to block and signal DB instances, intended for RocksDB
+// internal use only. Each DB instance contains ptr to StallInterface.
+class StallInterface {
+ public:
+  virtual ~StallInterface() {}
+
+  virtual void Block() = 0;
+
+  virtual void Signal() = 0;
+};
+
+class WriteBufferManager final {
+ public:
+  // Parameters:
+  // _buffer_size: _buffer_size = 0 indicates no limit. Memory won't be capped.
+  // memory_usage() won't be valid and ShouldFlush() will always return true.
+  //
+  // cache_: if `cache` is provided, we'll put dummy entries in the cache and
+  // cost the memory allocated to the cache. It can be used even if _buffer_size
+  // = 0.
+  //
+  // allow_stall: if set true, it will enable stalling of writes when
+  // memory_usage() exceeds buffer_size. It will wait for flush to complete and
+  // memory usage to drop down.
+  explicit WriteBufferManager(size_t _buffer_size,
+                              std::shared_ptr<Cache> cache = {},
+                              bool allow_stall = false);
+  // No copying allowed
+  WriteBufferManager(const WriteBufferManager&) = delete;
+  WriteBufferManager& operator=(const WriteBufferManager&) = delete;
+
+  ~WriteBufferManager();
+
+  // Returns true if buffer_limit is passed to limit the total memory usage and
+  // is greater than 0.
+  bool enabled() const { return buffer_size() > 0; }
+
+  // Returns true if pointer to cache is passed.
+  bool cost_to_cache() const { return cache_res_mgr_ != nullptr; }
+
+  // Returns the total memory used by memtables.
+  // Only valid if enabled()
+  size_t memory_usage() const {
+    return memory_used_.load(std::memory_order_relaxed);
+  }
+
+  // Returns the total memory used by active memtables.
+  size_t mutable_memtable_memory_usage() const {
+    return memory_active_.load(std::memory_order_relaxed);
+  }
+
+  size_t dummy_entries_in_cache_usage() const;
+
+  // Returns the buffer_size.
+  size_t buffer_size() const {
+    return buffer_size_.load(std::memory_order_relaxed);
+  }
+
+  // REQUIRED: `new_size` > 0
+  void SetBufferSize(size_t new_size) {
+    assert(new_size > 0);
+    buffer_size_.store(new_size, std::memory_order_relaxed);
+    mutable_limit_.store(new_size * 7 / 8, std::memory_order_relaxed);
+    // Check if stall is active and can be ended.
+    MaybeEndWriteStall();
+  }
+
+  void SetAllowStall(bool new_allow_stall) {
+    allow_stall_.store(new_allow_stall, std::memory_order_relaxed);
+    MaybeEndWriteStall();
+  }
+
+  // Below functions should be called by RocksDB internally.
+
+  // Should only be called from write thread
+  bool ShouldFlush() const {
+    if (enabled()) {
+      if (mutable_memtable_memory_usage() >
+          mutable_limit_.load(std::memory_order_relaxed)) {
+        return true;
+      }
+      size_t local_size = buffer_size();
+      if (memory_usage() >= local_size &&
+          mutable_memtable_memory_usage() >= local_size / 2) {
+        // If the memory exceeds the buffer size, we trigger more aggressive
+        // flush. But if already more than half memory is being flushed,
+        // triggering more flush may not help. We will hold it instead.
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Returns true if total memory usage exceeded buffer_size.
+  // We stall the writes untill memory_usage drops below buffer_size. When the
+  // function returns true, all writer threads (including one checking this
+  // condition) across all DBs will be stalled. Stall is allowed only if user
+  // pass allow_stall = true during WriteBufferManager instance creation.
+  //
+  // Should only be called by RocksDB internally .
+  bool ShouldStall() const {
+    if (!allow_stall_.load(std::memory_order_relaxed) || !enabled()) {
+      return false;
+    }
+
+    return IsStallActive() || IsStallThresholdExceeded();
+  }
+
+  // Returns true if stall is active.
+  bool IsStallActive() const {
+    return stall_active_.load(std::memory_order_relaxed);
+  }
+
+  // Returns true if stalling condition is met.
+  bool IsStallThresholdExceeded() const {
+    return memory_usage() >= buffer_size_;
+  }
+
+  void ReserveMem(size_t mem);
+
+  // We are in the process of freeing `mem` bytes, so it is not considered
+  // when checking the soft limit.
+  void ScheduleFreeMem(size_t mem);
+
+  void FreeMem(size_t mem);
+
+  // Add the DB instance to the queue and block the DB.
+  // Should only be called by RocksDB internally.
+  void BeginWriteStall(StallInterface* wbm_stall);
+
+  // If stall conditions have resolved, remove DB instances from queue and
+  // signal them to continue.
+  void MaybeEndWriteStall();
+
+  void RemoveDBFromQueue(StallInterface* wbm_stall);
+
+ private:
+  std::atomic<size_t> buffer_size_;
+  std::atomic<size_t> mutable_limit_;
+  std::atomic<size_t> memory_used_;
+  // Memory that hasn't been scheduled to free.
+  std::atomic<size_t> memory_active_;
+  std::shared_ptr<CacheReservationManager> cache_res_mgr_;
+  // Protects cache_res_mgr_
+  std::mutex cache_res_mgr_mu_;
+
+  std::list<StallInterface*> queue_;
+  // Protects the queue_ and stall_active_.
+  std::mutex mu_;
+  std::atomic<bool> allow_stall_;
+  // Value should only be changed by BeginWriteStall() and MaybeEndWriteStall()
+  // while holding mu_, but it can be read without a lock.
+  std::atomic<bool> stall_active_;
+
+  void ReserveMemWithCache(size_t mem);
+  void FreeMemWithCache(size_t mem);
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/compaction_filter_factory_jnicallback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/compaction_filter_factory_jnicallback.h
new file mode 100644
index 0000000000..2f26f8dbe5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/compaction_filter_factory_jnicallback.h
@@ -0,0 +1,37 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file implements the callback "bridge" between Java and C++ for
+// ROCKSDB_NAMESPACE::CompactionFilterFactory.
+
+#ifndef JAVA_ROCKSJNI_COMPACTION_FILTER_FACTORY_JNICALLBACK_H_
+#define JAVA_ROCKSJNI_COMPACTION_FILTER_FACTORY_JNICALLBACK_H_
+
+#include <jni.h>
+
+#include <memory>
+
+#include "rocksdb/compaction_filter.h"
+#include "rocksjni/jnicallback.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class CompactionFilterFactoryJniCallback : public JniCallback,
+                                           public CompactionFilterFactory {
+ public:
+  CompactionFilterFactoryJniCallback(JNIEnv* env,
+                                     jobject jcompaction_filter_factory);
+  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+      const CompactionFilter::Context& context);
+  virtual const char* Name() const;
+
+ private:
+  std::unique_ptr<const char[]> m_name;
+  jmethodID m_jcreate_compaction_filter_methodid;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // JAVA_ROCKSJNI_COMPACTION_FILTER_FACTORY_JNICALLBACK_H_
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/comparatorjnicallback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/comparatorjnicallback.h
new file mode 100644
index 0000000000..034c0d5d7d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/comparatorjnicallback.h
@@ -0,0 +1,137 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file implements the callback "bridge" between Java and C++ for
+// ROCKSDB_NAMESPACE::Comparator
+
+#ifndef JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_
+#define JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_
+
+#include <jni.h>
+
+#include <memory>
+#include <string>
+
+#include "port/port.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/slice.h"
+#include "rocksjni/jnicallback.h"
+#include "util/thread_local.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+enum ReusedSynchronisationType {
+  /**
+   * Standard mutex.
+   */
+  MUTEX,
+
+  /**
+   * Use adaptive mutex, which spins in the user space before resorting
+   * to kernel. This could reduce context switch when the mutex is not
+   * heavily contended. However, if the mutex is hot, we could end up
+   * wasting spin time.
+   */
+  ADAPTIVE_MUTEX,
+
+  /**
+   * There is a reused buffer per-thread.
+   */
+  THREAD_LOCAL
+};
+
+struct ComparatorJniCallbackOptions {
+  // Set the synchronisation type used to guard the reused buffers.
+  // Only used if max_reused_buffer_size > 0.
+  ReusedSynchronisationType reused_synchronisation_type = ADAPTIVE_MUTEX;
+
+  // Indicates if a direct byte buffer (i.e. outside of the normal
+  // garbage-collected heap) is used for the callbacks to Java,
+  // as opposed to a non-direct byte buffer which is a wrapper around
+  // an on-heap byte[].
+  bool direct_buffer = true;
+
+  // Maximum size of a buffer (in bytes) that will be reused.
+  // Comparators will use 5 of these buffers,
+  // so the retained memory size will be 5 * max_reused_buffer_size.
+  // When a buffer is needed for transferring data to a callback,
+  // if it requires less than max_reused_buffer_size, then an
+  // existing buffer will be reused, else a new buffer will be
+  // allocated just for that callback. -1 to disable.
+  int32_t max_reused_buffer_size = 64;
+};
+
+/**
+ * This class acts as a bridge between C++
+ * and Java. The methods in this class will be
+ * called back from the RocksDB storage engine (C++)
+ * we then callback to the appropriate Java method
+ * this enables Comparators to be implemented in Java.
+ *
+ * The design of this Comparator caches the Java Slice
+ * objects that are used in the compare and findShortestSeparator
+ * method callbacks. Instead of creating new objects for each callback
+ * of those functions, by reuse via setHandle we are a lot
+ * faster; Unfortunately this means that we have to
+ * introduce independent locking in regions of each of those methods
+ * via the mutexs mtx_compare and mtx_findShortestSeparator respectively
+ */
+class ComparatorJniCallback : public JniCallback, public Comparator {
+ public:
+  ComparatorJniCallback(JNIEnv* env, jobject jcomparator,
+                        const ComparatorJniCallbackOptions* options);
+  ~ComparatorJniCallback();
+  virtual const char* Name() const;
+  virtual int Compare(const Slice& a, const Slice& b) const;
+  virtual void FindShortestSeparator(std::string* start,
+                                     const Slice& limit) const;
+  virtual void FindShortSuccessor(std::string* key) const;
+  const std::unique_ptr<ComparatorJniCallbackOptions> m_options;
+
+ private:
+  struct ThreadLocalBuf {
+    ThreadLocalBuf(JavaVM* _jvm, bool _direct_buffer, jobject _jbuf)
+        : jvm(_jvm), direct_buffer(_direct_buffer), jbuf(_jbuf) {}
+    JavaVM* jvm;
+    bool direct_buffer;
+    jobject jbuf;
+  };
+  inline void MaybeLockForReuse(const std::unique_ptr<port::Mutex>& mutex,
+                                const bool cond) const;
+  inline void MaybeUnlockForReuse(const std::unique_ptr<port::Mutex>& mutex,
+                                  const bool cond) const;
+  jobject GetBuffer(JNIEnv* env, const Slice& src, bool reuse_buffer,
+                    ThreadLocalPtr* tl_buf, jobject jreuse_buffer) const;
+  jobject ReuseBuffer(JNIEnv* env, const Slice& src,
+                      jobject jreuse_buffer) const;
+  jobject NewBuffer(JNIEnv* env, const Slice& src) const;
+  void DeleteBuffer(JNIEnv* env, jobject jbuffer) const;
+  // used for synchronisation in compare method
+  std::unique_ptr<port::Mutex> mtx_compare;
+  // used for synchronisation in findShortestSeparator method
+  std::unique_ptr<port::Mutex> mtx_shortest;
+  // used for synchronisation in findShortSuccessor method
+  std::unique_ptr<port::Mutex> mtx_short;
+  std::unique_ptr<const char[]> m_name;
+  jclass m_abstract_comparator_jni_bridge_clazz;  // TODO(AR) could we make this
+                                                  // static somehow?
+  jclass m_jbytebuffer_clazz;  // TODO(AR) we could cache this globally for the
+                               // entire VM if we switch more APIs to use
+                               // ByteBuffer // TODO(AR) could we make this
+                               // static somehow?
+  jmethodID m_jcompare_mid;    // TODO(AR) could we make this static somehow?
+  jmethodID m_jshortest_mid;   // TODO(AR) could we make this static somehow?
+  jmethodID m_jshort_mid;      // TODO(AR) could we make this static somehow?
+  jobject m_jcompare_buf_a;
+  jobject m_jcompare_buf_b;
+  jobject m_jshortest_buf_start;
+  jobject m_jshortest_buf_limit;
+  jobject m_jshort_buf_key;
+  ThreadLocalPtr* m_tl_buf_a;
+  ThreadLocalPtr* m_tl_buf_b;
+};
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/cplusplus_to_java_convert.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/cplusplus_to_java_convert.h
new file mode 100644
index 0000000000..0eea6fa2c4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/cplusplus_to_java_convert.h
@@ -0,0 +1,37 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+/*
+ * This macro is used for 32 bit OS. In 32 bit OS, the result number is a
+ negative number if we use reinterpret_cast<jlong>(pointer).
+ * For example, jlong ptr = reinterpret_cast<jlong>(pointer), ptr is a negative
+ number in 32 bit OS.
+ * If we check ptr using ptr > 0, it fails. For example, the following code is
+ not correct.
+ * if (jblock_cache_handle > 0) {
+      std::shared_ptr<ROCKSDB_NAMESPACE::Cache> *pCache =
+          reinterpret_cast<std::shared_ptr<ROCKSDB_NAMESPACE::Cache> *>(
+              jblock_cache_handle);
+      options.block_cache = *pCache;
+    }
+ * But the result number is positive number if we do
+ reinterpret_cast<size_t>(pointer) first and then cast it to jlong. size_t is 4
+ bytes long in 32 bit OS and 8 bytes long in 64 bit OS.
+ static_cast<jlong>(reinterpret_cast<size_t>(_pointer)) is also working in 64
+ bit OS.
+ *
+ * We don't need an opposite cast because it works from jlong to c++ pointer in
+ both 32 bit and 64 bit OS.
+ * For example, the following code is working in both 32 bit and 64 bit OS.
+ jblock_cache_handle is jlong.
+ *   std::shared_ptr<ROCKSDB_NAMESPACE::Cache> *pCache =
+          reinterpret_cast<std::shared_ptr<ROCKSDB_NAMESPACE::Cache> *>(
+              jblock_cache_handle);
+*/
+
+#define GET_CPLUSPLUS_POINTER(_pointer) \
+  static_cast<jlong>(reinterpret_cast<size_t>(_pointer))
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/event_listener_jnicallback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/event_listener_jnicallback.h
new file mode 100644
index 0000000000..f4a235a232
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/event_listener_jnicallback.h
@@ -0,0 +1,122 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file implements the callback "bridge" between Java and C++ for
+// ROCKSDB_NAMESPACE::EventListener.
+
+#ifndef JAVA_ROCKSJNI_EVENT_LISTENER_JNICALLBACK_H_
+#define JAVA_ROCKSJNI_EVENT_LISTENER_JNICALLBACK_H_
+
+#include <jni.h>
+
+#include <memory>
+#include <set>
+
+#include "rocksdb/listener.h"
+#include "rocksjni/jnicallback.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+enum EnabledEventCallback {
+  ON_FLUSH_COMPLETED = 0x0,
+  ON_FLUSH_BEGIN = 0x1,
+  ON_TABLE_FILE_DELETED = 0x2,
+  ON_COMPACTION_BEGIN = 0x3,
+  ON_COMPACTION_COMPLETED = 0x4,
+  ON_TABLE_FILE_CREATED = 0x5,
+  ON_TABLE_FILE_CREATION_STARTED = 0x6,
+  ON_MEMTABLE_SEALED = 0x7,
+  ON_COLUMN_FAMILY_HANDLE_DELETION_STARTED = 0x8,
+  ON_EXTERNAL_FILE_INGESTED = 0x9,
+  ON_BACKGROUND_ERROR = 0xA,
+  ON_STALL_CONDITIONS_CHANGED = 0xB,
+  ON_FILE_READ_FINISH = 0xC,
+  ON_FILE_WRITE_FINISH = 0xD,
+  ON_FILE_FLUSH_FINISH = 0xE,
+  ON_FILE_SYNC_FINISH = 0xF,
+  ON_FILE_RANGE_SYNC_FINISH = 0x10,
+  ON_FILE_TRUNCATE_FINISH = 0x11,
+  ON_FILE_CLOSE_FINISH = 0x12,
+  SHOULD_BE_NOTIFIED_ON_FILE_IO = 0x13,
+  ON_ERROR_RECOVERY_BEGIN = 0x14,
+  ON_ERROR_RECOVERY_COMPLETED = 0x15,
+
+  NUM_ENABLED_EVENT_CALLBACK = 0x16,
+};
+
+class EventListenerJniCallback : public JniCallback, public EventListener {
+ public:
+  EventListenerJniCallback(
+      JNIEnv* env, jobject jevent_listener,
+      const std::set<EnabledEventCallback>& enabled_event_callbacks);
+  virtual ~EventListenerJniCallback();
+  virtual void OnFlushCompleted(DB* db, const FlushJobInfo& flush_job_info);
+  virtual void OnFlushBegin(DB* db, const FlushJobInfo& flush_job_info);
+  virtual void OnTableFileDeleted(const TableFileDeletionInfo& info);
+  virtual void OnCompactionBegin(DB* db, const CompactionJobInfo& ci);
+  virtual void OnCompactionCompleted(DB* db, const CompactionJobInfo& ci);
+  virtual void OnTableFileCreated(const TableFileCreationInfo& info);
+  virtual void OnTableFileCreationStarted(
+      const TableFileCreationBriefInfo& info);
+  virtual void OnMemTableSealed(const MemTableInfo& info);
+  virtual void OnColumnFamilyHandleDeletionStarted(ColumnFamilyHandle* handle);
+  virtual void OnExternalFileIngested(DB* db,
+                                      const ExternalFileIngestionInfo& info);
+  virtual void OnBackgroundError(BackgroundErrorReason reason,
+                                 Status* bg_error);
+  virtual void OnStallConditionsChanged(const WriteStallInfo& info);
+  virtual void OnFileReadFinish(const FileOperationInfo& info);
+  virtual void OnFileWriteFinish(const FileOperationInfo& info);
+  virtual void OnFileFlushFinish(const FileOperationInfo& info);
+  virtual void OnFileSyncFinish(const FileOperationInfo& info);
+  virtual void OnFileRangeSyncFinish(const FileOperationInfo& info);
+  virtual void OnFileTruncateFinish(const FileOperationInfo& info);
+  virtual void OnFileCloseFinish(const FileOperationInfo& info);
+  virtual bool ShouldBeNotifiedOnFileIO();
+  virtual void OnErrorRecoveryBegin(BackgroundErrorReason reason,
+                                    Status bg_error, bool* auto_recovery);
+  virtual void OnErrorRecoveryCompleted(Status old_bg_error);
+
+ private:
+  inline void InitCallbackMethodId(jmethodID& mid, EnabledEventCallback eec,
+                                   JNIEnv* env,
+                                   jmethodID (*get_id)(JNIEnv* env));
+  template <class T>
+  inline jobject SetupCallbackInvocation(
+      JNIEnv*& env, jboolean& attached_thread, const T& cpp_obj,
+      jobject (*convert)(JNIEnv* env, const T* cpp_obj));
+  inline void CleanupCallbackInvocation(JNIEnv* env, jboolean attached_thread,
+                                        std::initializer_list<jobject*> refs);
+  inline void OnFileOperation(const jmethodID& mid,
+                              const FileOperationInfo& info);
+
+  const std::set<EnabledEventCallback> m_enabled_event_callbacks;
+  jmethodID m_on_flush_completed_proxy_mid;
+  jmethodID m_on_flush_begin_proxy_mid;
+  jmethodID m_on_table_file_deleted_mid;
+  jmethodID m_on_compaction_begin_proxy_mid;
+  jmethodID m_on_compaction_completed_proxy_mid;
+  jmethodID m_on_table_file_created_mid;
+  jmethodID m_on_table_file_creation_started_mid;
+  jmethodID m_on_mem_table_sealed_mid;
+  jmethodID m_on_column_family_handle_deletion_started_mid;
+  jmethodID m_on_external_file_ingested_proxy_mid;
+  jmethodID m_on_background_error_proxy_mid;
+  jmethodID m_on_stall_conditions_changed_mid;
+  jmethodID m_on_file_read_finish_mid;
+  jmethodID m_on_file_write_finish_mid;
+  jmethodID m_on_file_flush_finish_mid;
+  jmethodID m_on_file_sync_finish_mid;
+  jmethodID m_on_file_range_sync_finish_mid;
+  jmethodID m_on_file_truncate_finish_mid;
+  jmethodID m_on_file_close_finish_mid;
+  jmethodID m_should_be_notified_on_file_io;
+  jmethodID m_on_error_recovery_begin_proxy_mid;
+  jmethodID m_on_error_recovery_completed_mid;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // JAVA_ROCKSJNI_EVENT_LISTENER_JNICALLBACK_H_
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/jnicallback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/jnicallback.h
new file mode 100644
index 0000000000..a03a041282
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/jnicallback.h
@@ -0,0 +1,32 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file implements the callback "bridge" between Java and C++ for
+// JNI Callbacks from C++ to sub-classes or org.rocksdb.RocksCallbackObject
+
+#ifndef JAVA_ROCKSJNI_JNICALLBACK_H_
+#define JAVA_ROCKSJNI_JNICALLBACK_H_
+
+#include <jni.h>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+class JniCallback {
+ public:
+  JniCallback(JNIEnv* env, jobject jcallback_obj);
+  virtual ~JniCallback();
+
+  const jobject& GetJavaObject() const { return m_jcallback_obj; }
+
+ protected:
+  JavaVM* m_jvm;
+  jobject m_jcallback_obj;
+  JNIEnv* getJniEnv(jboolean* attached) const;
+  void releaseJniEnv(jboolean& attached) const;
+};
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // JAVA_ROCKSJNI_JNICALLBACK_H_
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/loggerjnicallback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/loggerjnicallback.h
new file mode 100644
index 0000000000..57774988c5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/loggerjnicallback.h
@@ -0,0 +1,51 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file implements the callback "bridge" between Java and C++ for
+// ROCKSDB_NAMESPACE::Logger
+
+#ifndef JAVA_ROCKSJNI_LOGGERJNICALLBACK_H_
+#define JAVA_ROCKSJNI_LOGGERJNICALLBACK_H_
+
+#include <jni.h>
+
+#include <memory>
+#include <string>
+
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksjni/jnicallback.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class LoggerJniCallback : public JniCallback, public Logger {
+ public:
+  LoggerJniCallback(JNIEnv* env, jobject jLogger);
+  ~LoggerJniCallback();
+
+  using Logger::GetInfoLogLevel;
+  using Logger::SetInfoLogLevel;
+  // Write an entry to the log file with the specified format.
+  virtual void Logv(const char* format, va_list ap);
+  // Write an entry to the log file with the specified log level
+  // and format.  Any log with level under the internal log level
+  // of *this (see @SetInfoLogLevel and @GetInfoLogLevel) will not be
+  // printed.
+  virtual void Logv(const InfoLogLevel log_level, const char* format,
+                    va_list ap);
+
+ private:
+  jmethodID m_jLogMethodId;
+  jobject m_jdebug_level;
+  jobject m_jinfo_level;
+  jobject m_jwarn_level;
+  jobject m_jerror_level;
+  jobject m_jfatal_level;
+  jobject m_jheader_level;
+  std::unique_ptr<char[]> format_str(const char* format, va_list ap) const;
+};
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // JAVA_ROCKSJNI_LOGGERJNICALLBACK_H_
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/portal.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/portal.h
new file mode 100644
index 0000000000..aa0cc61317
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/portal.h
@@ -0,0 +1,8706 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// This file is designed for caching those frequently used IDs and provide
+// efficient portal (i.e, a set of static functions) to access java code
+// from c++.
+
+#ifndef JAVA_ROCKSJNI_PORTAL_H_
+#define JAVA_ROCKSJNI_PORTAL_H_
+
+#include <jni.h>
+
+#include <algorithm>
+#include <cstring>
+#include <functional>
+#include <iostream>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <set>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "rocksdb/convenience.h"
+#include "rocksdb/db.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/rate_limiter.h"
+#include "rocksdb/status.h"
+#include "rocksdb/table.h"
+#include "rocksdb/utilities/backup_engine.h"
+#include "rocksdb/utilities/memory_util.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "rocksdb/utilities/write_batch_with_index.h"
+#include "rocksjni/compaction_filter_factory_jnicallback.h"
+#include "rocksjni/comparatorjnicallback.h"
+#include "rocksjni/cplusplus_to_java_convert.h"
+#include "rocksjni/event_listener_jnicallback.h"
+#include "rocksjni/loggerjnicallback.h"
+#include "rocksjni/table_filter_jnicallback.h"
+#include "rocksjni/trace_writer_jnicallback.h"
+#include "rocksjni/transaction_notifier_jnicallback.h"
+#include "rocksjni/wal_filter_jnicallback.h"
+#include "rocksjni/writebatchhandlerjnicallback.h"
+
+// Remove macro on windows
+#ifdef DELETE
+#undef DELETE
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+class JavaClass {
+ public:
+  /**
+   * Gets and initializes a Java Class
+   *
+   * @param env A pointer to the Java environment
+   * @param jclazz_name The fully qualified JNI name of the Java Class
+   *     e.g. "java/lang/String"
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env, const char* jclazz_name) {
+    jclass jclazz = env->FindClass(jclazz_name);
+    assert(jclazz != nullptr);
+    return jclazz;
+  }
+};
+
+// Native class template
+template <class PTR, class DERIVED>
+class RocksDBNativeClass : public JavaClass {};
+
+// Native class template for sub-classes of RocksMutableObject
+template <class PTR, class DERIVED>
+class NativeRocksMutableObject : public RocksDBNativeClass<PTR, DERIVED> {
+ public:
+  /**
+   * Gets the Java Method ID for the
+   * RocksMutableObject#setNativeHandle(long, boolean) method
+   *
+   * @param env A pointer to the Java environment
+   * @return The Java Method ID or nullptr the RocksMutableObject class cannot
+   *     be accessed, or if one of the NoSuchMethodError,
+   *     ExceptionInInitializerError or OutOfMemoryError exceptions is thrown
+   */
+  static jmethodID getSetNativeHandleMethod(JNIEnv* env) {
+    static jclass jclazz = DERIVED::getJClass(env);
+    if (jclazz == nullptr) {
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "setNativeHandle", "(JZ)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Sets the C++ object pointer handle in the Java object
+   *
+   * @param env A pointer to the Java environment
+   * @param jobj The Java object on which to set the pointer handle
+   * @param ptr The C++ object pointer
+   * @param java_owns_handle JNI_TRUE if ownership of the C++ object is
+   *     managed by the Java object
+   *
+   * @return true if a Java exception is pending, false otherwise
+   */
+  static bool setHandle(JNIEnv* env, jobject jobj, PTR ptr,
+                        jboolean java_owns_handle) {
+    assert(jobj != nullptr);
+    static jmethodID mid = getSetNativeHandleMethod(env);
+    if (mid == nullptr) {
+      return true;  // signal exception
+    }
+
+    env->CallVoidMethod(jobj, mid, GET_CPLUSPLUS_POINTER(ptr),
+                        java_owns_handle);
+    if (env->ExceptionCheck()) {
+      return true;  // signal exception
+    }
+
+    return false;
+  }
+};
+
+// Java Exception template
+template <class DERIVED>
+class JavaException : public JavaClass {
+ public:
+  /**
+   * Create and throw a java exception with the provided message
+   *
+   * @param env A pointer to the Java environment
+   * @param msg The message for the exception
+   *
+   * @return true if an exception was thrown, false otherwise
+   */
+  static bool ThrowNew(JNIEnv* env, const std::string& msg) {
+    jclass jclazz = DERIVED::getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      std::cerr << "JavaException::ThrowNew - Error: unexpected exception!"
+                << std::endl;
+      return env->ExceptionCheck();
+    }
+
+    const jint rs = env->ThrowNew(jclazz, msg.c_str());
+    if (rs != JNI_OK) {
+      // exception could not be thrown
+      std::cerr << "JavaException::ThrowNew - Fatal: could not throw exception!"
+                << std::endl;
+      return env->ExceptionCheck();
+    }
+
+    return true;
+  }
+};
+
+// The portal class for java.lang.IllegalArgumentException
+class IllegalArgumentExceptionJni
+    : public JavaException<IllegalArgumentExceptionJni> {
+ public:
+  /**
+   * Get the Java Class java.lang.IllegalArgumentException
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaException::getJClass(env, "java/lang/IllegalArgumentException");
+  }
+
+  /**
+   * Create and throw a Java IllegalArgumentException with the provided status
+   *
+   * If s.ok() == true, then this function will not throw any exception.
+   *
+   * @param env A pointer to the Java environment
+   * @param s The status for the exception
+   *
+   * @return true if an exception was thrown, false otherwise
+   */
+  static bool ThrowNew(JNIEnv* env, const Status& s) {
+    assert(!s.ok());
+    if (s.ok()) {
+      return false;
+    }
+
+    // get the IllegalArgumentException class
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      std::cerr << "IllegalArgumentExceptionJni::ThrowNew/class - Error: "
+                   "unexpected exception!"
+                << std::endl;
+      return env->ExceptionCheck();
+    }
+
+    return JavaException::ThrowNew(env, s.ToString());
+  }
+};
+
+// The portal class for org.rocksdb.Status.Code
+class CodeJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.Status.Code
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/Status$Code");
+  }
+
+  /**
+   * Get the Java Method: Status.Code#getValue
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getValueMethod(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "getValue", "()b");
+    assert(mid != nullptr);
+    return mid;
+  }
+};
+
+// The portal class for org.rocksdb.Status.SubCode
+class SubCodeJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.Status.SubCode
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/Status$SubCode");
+  }
+
+  /**
+   * Get the Java Method: Status.SubCode#getValue
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getValueMethod(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "getValue", "()b");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  static ROCKSDB_NAMESPACE::Status::SubCode toCppSubCode(
+      const jbyte jsub_code) {
+    switch (jsub_code) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::Status::SubCode::kNone;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::Status::SubCode::kMutexTimeout;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::Status::SubCode::kLockTimeout;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::Status::SubCode::kLockLimit;
+      case 0x4:
+        return ROCKSDB_NAMESPACE::Status::SubCode::kNoSpace;
+      case 0x5:
+        return ROCKSDB_NAMESPACE::Status::SubCode::kDeadlock;
+      case 0x6:
+        return ROCKSDB_NAMESPACE::Status::SubCode::kStaleFile;
+      case 0x7:
+        return ROCKSDB_NAMESPACE::Status::SubCode::kMemoryLimit;
+
+      case 0x7F:
+      default:
+        return ROCKSDB_NAMESPACE::Status::SubCode::kNone;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.Status
+class StatusJni
+    : public RocksDBNativeClass<ROCKSDB_NAMESPACE::Status*, StatusJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.Status
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/Status");
+  }
+
+  /**
+   * Get the Java Method: Status#getCode
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getCodeMethod(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "getCode", "()Lorg/rocksdb/Status$Code;");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: Status#getSubCode
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getSubCodeMethod(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "getSubCode",
+                                            "()Lorg/rocksdb/Status$SubCode;");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: Status#getState
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getStateMethod(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "getState", "()Ljava/lang/String;");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Create a new Java org.rocksdb.Status object with the same properties as
+   * the provided C++ ROCKSDB_NAMESPACE::Status object
+   *
+   * @param env A pointer to the Java environment
+   * @param status The ROCKSDB_NAMESPACE::Status object
+   *
+   * @return A reference to a Java org.rocksdb.Status object, or nullptr
+   *     if an an exception occurs
+   */
+  static jobject construct(JNIEnv* env, const Status& status) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid =
+        env->GetMethodID(jclazz, "<init>", "(BBLjava/lang/String;)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    // convert the Status state for Java
+    jstring jstate = nullptr;
+    if (status.getState() != nullptr) {
+      const char* const state = status.getState();
+      jstate = env->NewStringUTF(state);
+      if (env->ExceptionCheck()) {
+        if (jstate != nullptr) {
+          env->DeleteLocalRef(jstate);
+        }
+        return nullptr;
+      }
+    }
+
+    jobject jstatus =
+        env->NewObject(jclazz, mid, toJavaStatusCode(status.code()),
+                       toJavaStatusSubCode(status.subcode()), jstate);
+    if (env->ExceptionCheck()) {
+      // exception occurred
+      if (jstate != nullptr) {
+        env->DeleteLocalRef(jstate);
+      }
+      return nullptr;
+    }
+
+    if (jstate != nullptr) {
+      env->DeleteLocalRef(jstate);
+    }
+
+    return jstatus;
+  }
+
+  static jobject construct(JNIEnv* env, const Status* status) {
+    return construct(env, *status);
+  }
+
+  // Returns the equivalent org.rocksdb.Status.Code for the provided
+  // C++ ROCKSDB_NAMESPACE::Status::Code enum
+  static jbyte toJavaStatusCode(const ROCKSDB_NAMESPACE::Status::Code& code) {
+    switch (code) {
+      case ROCKSDB_NAMESPACE::Status::Code::kOk:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::Status::Code::kNotFound:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::Status::Code::kCorruption:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::Status::Code::kNotSupported:
+        return 0x3;
+      case ROCKSDB_NAMESPACE::Status::Code::kInvalidArgument:
+        return 0x4;
+      case ROCKSDB_NAMESPACE::Status::Code::kIOError:
+        return 0x5;
+      case ROCKSDB_NAMESPACE::Status::Code::kMergeInProgress:
+        return 0x6;
+      case ROCKSDB_NAMESPACE::Status::Code::kIncomplete:
+        return 0x7;
+      case ROCKSDB_NAMESPACE::Status::Code::kShutdownInProgress:
+        return 0x8;
+      case ROCKSDB_NAMESPACE::Status::Code::kTimedOut:
+        return 0x9;
+      case ROCKSDB_NAMESPACE::Status::Code::kAborted:
+        return 0xA;
+      case ROCKSDB_NAMESPACE::Status::Code::kBusy:
+        return 0xB;
+      case ROCKSDB_NAMESPACE::Status::Code::kExpired:
+        return 0xC;
+      case ROCKSDB_NAMESPACE::Status::Code::kTryAgain:
+        return 0xD;
+      case ROCKSDB_NAMESPACE::Status::Code::kColumnFamilyDropped:
+        return 0xE;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent org.rocksdb.Status.SubCode for the provided
+  // C++ ROCKSDB_NAMESPACE::Status::SubCode enum
+  static jbyte toJavaStatusSubCode(
+      const ROCKSDB_NAMESPACE::Status::SubCode& subCode) {
+    switch (subCode) {
+      case ROCKSDB_NAMESPACE::Status::SubCode::kNone:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::Status::SubCode::kMutexTimeout:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::Status::SubCode::kLockTimeout:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::Status::SubCode::kLockLimit:
+        return 0x3;
+      case ROCKSDB_NAMESPACE::Status::SubCode::kNoSpace:
+        return 0x4;
+      case ROCKSDB_NAMESPACE::Status::SubCode::kDeadlock:
+        return 0x5;
+      case ROCKSDB_NAMESPACE::Status::SubCode::kStaleFile:
+        return 0x6;
+      case ROCKSDB_NAMESPACE::Status::SubCode::kMemoryLimit:
+        return 0x7;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  static std::unique_ptr<ROCKSDB_NAMESPACE::Status> toCppStatus(
+      const jbyte jcode_value, const jbyte jsub_code_value) {
+    std::unique_ptr<ROCKSDB_NAMESPACE::Status> status;
+    switch (jcode_value) {
+      case 0x0:
+        // Ok
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::OK()));
+        break;
+      case 0x1:
+        // NotFound
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::NotFound(
+                ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value))));
+        break;
+      case 0x2:
+        // Corruption
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Corruption(
+                ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value))));
+        break;
+      case 0x3:
+        // NotSupported
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(
+                ROCKSDB_NAMESPACE::Status::NotSupported(
+                    ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(
+                        jsub_code_value))));
+        break;
+      case 0x4:
+        // InvalidArgument
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(
+                ROCKSDB_NAMESPACE::Status::InvalidArgument(
+                    ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(
+                        jsub_code_value))));
+        break;
+      case 0x5:
+        // IOError
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::IOError(
+                ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value))));
+        break;
+      case 0x6:
+        // MergeInProgress
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(
+                ROCKSDB_NAMESPACE::Status::MergeInProgress(
+                    ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(
+                        jsub_code_value))));
+        break;
+      case 0x7:
+        // Incomplete
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Incomplete(
+                ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value))));
+        break;
+      case 0x8:
+        // ShutdownInProgress
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(
+                ROCKSDB_NAMESPACE::Status::ShutdownInProgress(
+                    ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(
+                        jsub_code_value))));
+        break;
+      case 0x9:
+        // TimedOut
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::TimedOut(
+                ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value))));
+        break;
+      case 0xA:
+        // Aborted
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Aborted(
+                ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value))));
+        break;
+      case 0xB:
+        // Busy
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Busy(
+                ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value))));
+        break;
+      case 0xC:
+        // Expired
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Expired(
+                ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value))));
+        break;
+      case 0xD:
+        // TryAgain
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::TryAgain(
+                ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value))));
+        break;
+      case 0xE:
+        // ColumnFamilyDropped
+        status = std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+            new ROCKSDB_NAMESPACE::Status(
+                ROCKSDB_NAMESPACE::Status::ColumnFamilyDropped(
+                    ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(
+                        jsub_code_value))));
+        break;
+      case 0x7F:
+      default:
+        return nullptr;
+    }
+    return status;
+  }
+
+  // Returns the equivalent ROCKSDB_NAMESPACE::Status for the Java
+  // org.rocksdb.Status
+  static std::unique_ptr<ROCKSDB_NAMESPACE::Status> toCppStatus(
+      JNIEnv* env, const jobject jstatus) {
+    jmethodID mid_code = getCodeMethod(env);
+    if (mid_code == nullptr) {
+      // exception occurred
+      return nullptr;
+    }
+    jobject jcode = env->CallObjectMethod(jstatus, mid_code);
+    if (env->ExceptionCheck()) {
+      // exception occurred
+      return nullptr;
+    }
+
+    jmethodID mid_code_value = ROCKSDB_NAMESPACE::CodeJni::getValueMethod(env);
+    if (mid_code_value == nullptr) {
+      // exception occurred
+      return nullptr;
+    }
+    jbyte jcode_value = env->CallByteMethod(jcode, mid_code_value);
+    if (env->ExceptionCheck()) {
+      // exception occurred
+      if (jcode != nullptr) {
+        env->DeleteLocalRef(jcode);
+      }
+      return nullptr;
+    }
+
+    jmethodID mid_subCode = getSubCodeMethod(env);
+    if (mid_subCode == nullptr) {
+      // exception occurred
+      return nullptr;
+    }
+    jobject jsubCode = env->CallObjectMethod(jstatus, mid_subCode);
+    if (env->ExceptionCheck()) {
+      // exception occurred
+      if (jcode != nullptr) {
+        env->DeleteLocalRef(jcode);
+      }
+      return nullptr;
+    }
+
+    jbyte jsub_code_value = 0x0;  // None
+    if (jsubCode != nullptr) {
+      jmethodID mid_subCode_value =
+          ROCKSDB_NAMESPACE::SubCodeJni::getValueMethod(env);
+      if (mid_subCode_value == nullptr) {
+        // exception occurred
+        return nullptr;
+      }
+      jsub_code_value = env->CallByteMethod(jsubCode, mid_subCode_value);
+      if (env->ExceptionCheck()) {
+        // exception occurred
+        if (jcode != nullptr) {
+          env->DeleteLocalRef(jcode);
+        }
+        return nullptr;
+      }
+    }
+
+    jmethodID mid_state = getStateMethod(env);
+    if (mid_state == nullptr) {
+      // exception occurred
+      return nullptr;
+    }
+    jobject jstate = env->CallObjectMethod(jstatus, mid_state);
+    if (env->ExceptionCheck()) {
+      // exception occurred
+      if (jsubCode != nullptr) {
+        env->DeleteLocalRef(jsubCode);
+      }
+      if (jcode != nullptr) {
+        env->DeleteLocalRef(jcode);
+      }
+      return nullptr;
+    }
+
+    std::unique_ptr<ROCKSDB_NAMESPACE::Status> status =
+        toCppStatus(jcode_value, jsub_code_value);
+
+    // delete all local refs
+    if (jstate != nullptr) {
+      env->DeleteLocalRef(jstate);
+    }
+    if (jsubCode != nullptr) {
+      env->DeleteLocalRef(jsubCode);
+    }
+    if (jcode != nullptr) {
+      env->DeleteLocalRef(jcode);
+    }
+
+    return status;
+  }
+};
+
+// The portal class for org.rocksdb.RocksDBException
+class RocksDBExceptionJni : public JavaException<RocksDBExceptionJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.RocksDBException
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaException::getJClass(env, "org/rocksdb/RocksDBException");
+  }
+
+  /**
+   * Create and throw a Java RocksDBException with the provided message
+   *
+   * @param env A pointer to the Java environment
+   * @param msg The message for the exception
+   *
+   * @return true if an exception was thrown, false otherwise
+   */
+  static bool ThrowNew(JNIEnv* env, const std::string& msg) {
+    return JavaException::ThrowNew(env, msg);
+  }
+
+  /**
+   * Create and throw a Java RocksDBException with the provided status
+   *
+   * If s->ok() == true, then this function will not throw any exception.
+   *
+   * @param env A pointer to the Java environment
+   * @param s The status for the exception
+   *
+   * @return true if an exception was thrown, false otherwise
+   */
+  static bool ThrowNew(JNIEnv* env, std::unique_ptr<Status>& s) {
+    return ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, *(s.get()));
+  }
+
+  /**
+   * Create and throw a Java RocksDBException with the provided status
+   *
+   * If s.ok() == true, then this function will not throw any exception.
+   *
+   * @param env A pointer to the Java environment
+   * @param s The status for the exception
+   *
+   * @return true if an exception was thrown, false otherwise
+   */
+  static bool ThrowNew(JNIEnv* env, const Status& s) {
+    if (s.ok()) {
+      return false;
+    }
+
+    // get the RocksDBException class
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      std::cerr << "RocksDBExceptionJni::ThrowNew/class - Error: unexpected "
+                   "exception!"
+                << std::endl;
+      return env->ExceptionCheck();
+    }
+
+    // get the constructor of org.rocksdb.RocksDBException
+    jmethodID mid =
+        env->GetMethodID(jclazz, "<init>", "(Lorg/rocksdb/Status;)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      std::cerr
+          << "RocksDBExceptionJni::ThrowNew/cstr - Error: unexpected exception!"
+          << std::endl;
+      return env->ExceptionCheck();
+    }
+
+    // get the Java status object
+    jobject jstatus = StatusJni::construct(env, s);
+    if (jstatus == nullptr) {
+      // exception occcurred
+      std::cerr << "RocksDBExceptionJni::ThrowNew/StatusJni - Error: "
+                   "unexpected exception!"
+                << std::endl;
+      return env->ExceptionCheck();
+    }
+
+    // construct the RocksDBException
+    jthrowable rocksdb_exception =
+        reinterpret_cast<jthrowable>(env->NewObject(jclazz, mid, jstatus));
+    if (env->ExceptionCheck()) {
+      if (jstatus != nullptr) {
+        env->DeleteLocalRef(jstatus);
+      }
+      if (rocksdb_exception != nullptr) {
+        env->DeleteLocalRef(rocksdb_exception);
+      }
+      std::cerr << "RocksDBExceptionJni::ThrowNew/NewObject - Error: "
+                   "unexpected exception!"
+                << std::endl;
+      return true;
+    }
+
+    // throw the RocksDBException
+    const jint rs = env->Throw(rocksdb_exception);
+    if (rs != JNI_OK) {
+      // exception could not be thrown
+      std::cerr
+          << "RocksDBExceptionJni::ThrowNew - Fatal: could not throw exception!"
+          << std::endl;
+      if (jstatus != nullptr) {
+        env->DeleteLocalRef(jstatus);
+      }
+      if (rocksdb_exception != nullptr) {
+        env->DeleteLocalRef(rocksdb_exception);
+      }
+      return env->ExceptionCheck();
+    }
+
+    if (jstatus != nullptr) {
+      env->DeleteLocalRef(jstatus);
+    }
+    if (rocksdb_exception != nullptr) {
+      env->DeleteLocalRef(rocksdb_exception);
+    }
+
+    return true;
+  }
+
+  /**
+   * Create and throw a Java RocksDBException with the provided message
+   * and status
+   *
+   * If s.ok() == true, then this function will not throw any exception.
+   *
+   * @param env A pointer to the Java environment
+   * @param msg The message for the exception
+   * @param s The status for the exception
+   *
+   * @return true if an exception was thrown, false otherwise
+   */
+  static bool ThrowNew(JNIEnv* env, const std::string& msg, const Status& s) {
+    assert(!s.ok());
+    if (s.ok()) {
+      return false;
+    }
+
+    // get the RocksDBException class
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      std::cerr << "RocksDBExceptionJni::ThrowNew/class - Error: unexpected "
+                   "exception!"
+                << std::endl;
+      return env->ExceptionCheck();
+    }
+
+    // get the constructor of org.rocksdb.RocksDBException
+    jmethodID mid = env->GetMethodID(
+        jclazz, "<init>", "(Ljava/lang/String;Lorg/rocksdb/Status;)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      std::cerr
+          << "RocksDBExceptionJni::ThrowNew/cstr - Error: unexpected exception!"
+          << std::endl;
+      return env->ExceptionCheck();
+    }
+
+    jstring jmsg = env->NewStringUTF(msg.c_str());
+    if (jmsg == nullptr) {
+      // exception thrown: OutOfMemoryError
+      std::cerr
+          << "RocksDBExceptionJni::ThrowNew/msg - Error: unexpected exception!"
+          << std::endl;
+      return env->ExceptionCheck();
+    }
+
+    // get the Java status object
+    jobject jstatus = StatusJni::construct(env, s);
+    if (jstatus == nullptr) {
+      // exception occcurred
+      std::cerr << "RocksDBExceptionJni::ThrowNew/StatusJni - Error: "
+                   "unexpected exception!"
+                << std::endl;
+      if (jmsg != nullptr) {
+        env->DeleteLocalRef(jmsg);
+      }
+      return env->ExceptionCheck();
+    }
+
+    // construct the RocksDBException
+    jthrowable rocksdb_exception = reinterpret_cast<jthrowable>(
+        env->NewObject(jclazz, mid, jmsg, jstatus));
+    if (env->ExceptionCheck()) {
+      if (jstatus != nullptr) {
+        env->DeleteLocalRef(jstatus);
+      }
+      if (jmsg != nullptr) {
+        env->DeleteLocalRef(jmsg);
+      }
+      if (rocksdb_exception != nullptr) {
+        env->DeleteLocalRef(rocksdb_exception);
+      }
+      std::cerr << "RocksDBExceptionJni::ThrowNew/NewObject - Error: "
+                   "unexpected exception!"
+                << std::endl;
+      return true;
+    }
+
+    // throw the RocksDBException
+    const jint rs = env->Throw(rocksdb_exception);
+    if (rs != JNI_OK) {
+      // exception could not be thrown
+      std::cerr
+          << "RocksDBExceptionJni::ThrowNew - Fatal: could not throw exception!"
+          << std::endl;
+      if (jstatus != nullptr) {
+        env->DeleteLocalRef(jstatus);
+      }
+      if (jmsg != nullptr) {
+        env->DeleteLocalRef(jmsg);
+      }
+      if (rocksdb_exception != nullptr) {
+        env->DeleteLocalRef(rocksdb_exception);
+      }
+      return env->ExceptionCheck();
+    }
+
+    if (jstatus != nullptr) {
+      env->DeleteLocalRef(jstatus);
+    }
+    if (jmsg != nullptr) {
+      env->DeleteLocalRef(jmsg);
+    }
+    if (rocksdb_exception != nullptr) {
+      env->DeleteLocalRef(rocksdb_exception);
+    }
+
+    return true;
+  }
+
+  /**
+   * Get the Java Method: RocksDBException#getStatus
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getStatusMethod(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "getStatus", "()Lorg/rocksdb/Status;");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  static std::unique_ptr<ROCKSDB_NAMESPACE::Status> toCppStatus(
+      JNIEnv* env, jthrowable jrocksdb_exception) {
+    if (!env->IsInstanceOf(jrocksdb_exception, getJClass(env))) {
+      // not an instance of RocksDBException
+      return nullptr;
+    }
+
+    // get the java status object
+    jmethodID mid = getStatusMethod(env);
+    if (mid == nullptr) {
+      // exception occurred accessing class or method
+      return nullptr;
+    }
+
+    jobject jstatus = env->CallObjectMethod(jrocksdb_exception, mid);
+    if (env->ExceptionCheck()) {
+      // exception occurred
+      return nullptr;
+    }
+
+    if (jstatus == nullptr) {
+      return nullptr;  // no status available
+    }
+
+    return ROCKSDB_NAMESPACE::StatusJni::toCppStatus(env, jstatus);
+  }
+};
+
+// The portal class for java.util.List
+class ListJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class java.util.List
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getListClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "java/util/List");
+  }
+
+  /**
+   * Get the Java Class java.util.ArrayList
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getArrayListClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "java/util/ArrayList");
+  }
+
+  /**
+   * Get the Java Class java.util.Iterator
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getIteratorClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "java/util/Iterator");
+  }
+
+  /**
+   * Get the Java Method: List#iterator
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getIteratorMethod(JNIEnv* env) {
+    jclass jlist_clazz = getListClass(env);
+    if (jlist_clazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetMethodID(jlist_clazz, "iterator", "()Ljava/util/Iterator;");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: Iterator#hasNext
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getHasNextMethod(JNIEnv* env) {
+    jclass jiterator_clazz = getIteratorClass(env);
+    if (jiterator_clazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jiterator_clazz, "hasNext", "()Z");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: Iterator#next
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getNextMethod(JNIEnv* env) {
+    jclass jiterator_clazz = getIteratorClass(env);
+    if (jiterator_clazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetMethodID(jiterator_clazz, "next", "()Ljava/lang/Object;");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: ArrayList constructor
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getArrayListConstructorMethodId(JNIEnv* env) {
+    jclass jarray_list_clazz = getArrayListClass(env);
+    if (jarray_list_clazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+    static jmethodID mid =
+        env->GetMethodID(jarray_list_clazz, "<init>", "(I)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: List#add
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getListAddMethodId(JNIEnv* env) {
+    jclass jlist_clazz = getListClass(env);
+    if (jlist_clazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetMethodID(jlist_clazz, "add", "(Ljava/lang/Object;)Z");
+    assert(mid != nullptr);
+    return mid;
+  }
+};
+
+// The portal class for java.lang.Byte
+class ByteJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class java.lang.Byte
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "java/lang/Byte");
+  }
+
+  /**
+   * Get the Java Class byte[]
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getArrayJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "[B");
+  }
+
+  /**
+   * Creates a new 2-dimensional Java Byte Array byte[][]
+   *
+   * @param env A pointer to the Java environment
+   * @param len The size of the first dimension
+   *
+   * @return A reference to the Java byte[][] or nullptr if an exception occurs
+   */
+  static jobjectArray new2dByteArray(JNIEnv* env, const jsize len) {
+    jclass clazz = getArrayJClass(env);
+    if (clazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    return env->NewObjectArray(len, clazz, nullptr);
+  }
+
+  /**
+   * Get the Java Method: Byte#byteValue
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getByteValueMethod(JNIEnv* env) {
+    jclass clazz = getJClass(env);
+    if (clazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(clazz, "byteValue", "()B");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Calls the Java Method: Byte#valueOf, returning a constructed Byte jobject
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A constructing Byte object or nullptr if the class or method id
+   * could not be retrieved, or an exception occurred
+   */
+  static jobject valueOf(JNIEnv* env, jbyte jprimitive_byte) {
+    jclass clazz = getJClass(env);
+    if (clazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetStaticMethodID(clazz, "valueOf", "(B)Ljava/lang/Byte;");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    const jobject jbyte_obj =
+        env->CallStaticObjectMethod(clazz, mid, jprimitive_byte);
+    if (env->ExceptionCheck()) {
+      // exception occurred
+      return nullptr;
+    }
+
+    return jbyte_obj;
+  }
+};
+
+// The portal class for java.nio.ByteBuffer
+class ByteBufferJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class java.nio.ByteBuffer
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "java/nio/ByteBuffer");
+  }
+
+  /**
+   * Get the Java Method: ByteBuffer#allocate
+   *
+   * @param env A pointer to the Java environment
+   * @param jbytebuffer_clazz if you have a reference to a ByteBuffer class, or
+   * nullptr
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getAllocateMethodId(JNIEnv* env,
+                                       jclass jbytebuffer_clazz = nullptr) {
+    const jclass jclazz =
+        jbytebuffer_clazz == nullptr ? getJClass(env) : jbytebuffer_clazz;
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetStaticMethodID(jclazz, "allocate", "(I)Ljava/nio/ByteBuffer;");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: ByteBuffer#array
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getArrayMethodId(JNIEnv* env,
+                                    jclass jbytebuffer_clazz = nullptr) {
+    const jclass jclazz =
+        jbytebuffer_clazz == nullptr ? getJClass(env) : jbytebuffer_clazz;
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "array", "()[B");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  static jobject construct(JNIEnv* env, const bool direct,
+                           const size_t capacity,
+                           jclass jbytebuffer_clazz = nullptr) {
+    return constructWith(env, direct, nullptr, capacity, jbytebuffer_clazz);
+  }
+
+  static jobject constructWith(JNIEnv* env, const bool direct, const char* buf,
+                               const size_t capacity,
+                               jclass jbytebuffer_clazz = nullptr) {
+    if (direct) {
+      bool allocated = false;
+      if (buf == nullptr) {
+        buf = new char[capacity];
+        allocated = true;
+      }
+      jobject jbuf = env->NewDirectByteBuffer(const_cast<char*>(buf),
+                                              static_cast<jlong>(capacity));
+      if (jbuf == nullptr) {
+        // exception occurred
+        if (allocated) {
+          delete[] static_cast<const char*>(buf);
+        }
+        return nullptr;
+      }
+      return jbuf;
+    } else {
+      const jclass jclazz =
+          jbytebuffer_clazz == nullptr ? getJClass(env) : jbytebuffer_clazz;
+      if (jclazz == nullptr) {
+        // exception occurred accessing class
+        return nullptr;
+      }
+      const jmethodID jmid_allocate =
+          getAllocateMethodId(env, jbytebuffer_clazz);
+      if (jmid_allocate == nullptr) {
+        // exception occurred accessing class, or NoSuchMethodException or
+        // OutOfMemoryError
+        return nullptr;
+      }
+      const jobject jbuf = env->CallStaticObjectMethod(
+          jclazz, jmid_allocate, static_cast<jint>(capacity));
+      if (env->ExceptionCheck()) {
+        // exception occurred
+        return nullptr;
+      }
+
+      // set buffer data?
+      if (buf != nullptr) {
+        jbyteArray jarray = array(env, jbuf, jbytebuffer_clazz);
+        if (jarray == nullptr) {
+          // exception occurred
+          env->DeleteLocalRef(jbuf);
+          return nullptr;
+        }
+
+        jboolean is_copy = JNI_FALSE;
+        jbyte* ja = reinterpret_cast<jbyte*>(
+            env->GetPrimitiveArrayCritical(jarray, &is_copy));
+        if (ja == nullptr) {
+          // exception occurred
+          env->DeleteLocalRef(jarray);
+          env->DeleteLocalRef(jbuf);
+          return nullptr;
+        }
+
+        memcpy(ja, const_cast<char*>(buf), capacity);
+
+        env->ReleasePrimitiveArrayCritical(jarray, ja, 0);
+
+        env->DeleteLocalRef(jarray);
+      }
+
+      return jbuf;
+    }
+  }
+
+  static jbyteArray array(JNIEnv* env, const jobject& jbyte_buffer,
+                          jclass jbytebuffer_clazz = nullptr) {
+    const jmethodID mid = getArrayMethodId(env, jbytebuffer_clazz);
+    if (mid == nullptr) {
+      // exception occurred accessing class, or NoSuchMethodException or
+      // OutOfMemoryError
+      return nullptr;
+    }
+    const jobject jarray = env->CallObjectMethod(jbyte_buffer, mid);
+    if (env->ExceptionCheck()) {
+      // exception occurred
+      return nullptr;
+    }
+    return static_cast<jbyteArray>(jarray);
+  }
+};
+
+// The portal class for java.lang.Integer
+class IntegerJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class java.lang.Integer
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "java/lang/Integer");
+  }
+
+  static jobject valueOf(JNIEnv* env, jint jprimitive_int) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid =
+        env->GetStaticMethodID(jclazz, "valueOf", "(I)Ljava/lang/Integer;");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    const jobject jinteger_obj =
+        env->CallStaticObjectMethod(jclazz, mid, jprimitive_int);
+    if (env->ExceptionCheck()) {
+      // exception occurred
+      return nullptr;
+    }
+
+    return jinteger_obj;
+  }
+};
+
+// The portal class for java.lang.Long
+class LongJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class java.lang.Long
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "java/lang/Long");
+  }
+
+  static jobject valueOf(JNIEnv* env, jlong jprimitive_long) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid =
+        env->GetStaticMethodID(jclazz, "valueOf", "(J)Ljava/lang/Long;");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    const jobject jlong_obj =
+        env->CallStaticObjectMethod(jclazz, mid, jprimitive_long);
+    if (env->ExceptionCheck()) {
+      // exception occurred
+      return nullptr;
+    }
+
+    return jlong_obj;
+  }
+};
+
+// The portal class for java.lang.StringBuilder
+class StringBuilderJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class java.lang.StringBuilder
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "java/lang/StringBuilder");
+  }
+
+  /**
+   * Get the Java Method: StringBuilder#append
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getListAddMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(
+        jclazz, "append", "(Ljava/lang/String;)Ljava/lang/StringBuilder;");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Appends a C-style string to a StringBuilder
+   *
+   * @param env A pointer to the Java environment
+   * @param jstring_builder Reference to a java.lang.StringBuilder
+   * @param c_str A C-style string to append to the StringBuilder
+   *
+   * @return A reference to the updated StringBuilder, or a nullptr if
+   *     an exception occurs
+   */
+  static jobject append(JNIEnv* env, jobject jstring_builder,
+                        const char* c_str) {
+    jmethodID mid = getListAddMethodId(env);
+    if (mid == nullptr) {
+      // exception occurred accessing class or method
+      return nullptr;
+    }
+
+    jstring new_value_str = env->NewStringUTF(c_str);
+    if (new_value_str == nullptr) {
+      // exception thrown: OutOfMemoryError
+      return nullptr;
+    }
+
+    jobject jresult_string_builder =
+        env->CallObjectMethod(jstring_builder, mid, new_value_str);
+    if (env->ExceptionCheck()) {
+      // exception occurred
+      env->DeleteLocalRef(new_value_str);
+      return nullptr;
+    }
+
+    return jresult_string_builder;
+  }
+};
+
+// various utility functions for working with RocksDB and JNI
+class JniUtil {
+ public:
+  /**
+   * Detect if jlong overflows size_t
+   *
+   * @param jvalue the jlong value
+   *
+   * @return
+   */
+  inline static Status check_if_jlong_fits_size_t(const jlong& jvalue) {
+    Status s = Status::OK();
+    if (static_cast<uint64_t>(jvalue) > std::numeric_limits<size_t>::max()) {
+      s = Status::InvalidArgument(Slice("jlong overflows 32 bit value."));
+    }
+    return s;
+  }
+
+  /**
+   * Obtains a reference to the JNIEnv from
+   * the JVM
+   *
+   * If the current thread is not attached to the JavaVM
+   * then it will be attached so as to retrieve the JNIEnv
+   *
+   * If a thread is attached, it must later be manually
+   * released by calling JavaVM::DetachCurrentThread.
+   * This can be handled by always matching calls to this
+   * function with calls to {@link JniUtil::releaseJniEnv(JavaVM*, jboolean)}
+   *
+   * @param jvm (IN) A pointer to the JavaVM instance
+   * @param attached (OUT) A pointer to a boolean which
+   *     will be set to JNI_TRUE if we had to attach the thread
+   *
+   * @return A pointer to the JNIEnv or nullptr if a fatal error
+   *     occurs and the JNIEnv cannot be retrieved
+   */
+  static JNIEnv* getJniEnv(JavaVM* jvm, jboolean* attached) {
+    assert(jvm != nullptr);
+
+    JNIEnv* env;
+    const jint env_rs =
+        jvm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION_1_6);
+
+    if (env_rs == JNI_OK) {
+      // current thread is already attached, return the JNIEnv
+      *attached = JNI_FALSE;
+      return env;
+    } else if (env_rs == JNI_EDETACHED) {
+      // current thread is not attached, attempt to attach
+      const jint rs_attach =
+          jvm->AttachCurrentThread(reinterpret_cast<void**>(&env), NULL);
+      if (rs_attach == JNI_OK) {
+        *attached = JNI_TRUE;
+        return env;
+      } else {
+        // error, could not attach the thread
+        std::cerr << "JniUtil::getJniEnv - Fatal: could not attach current "
+                     "thread to JVM!"
+                  << std::endl;
+        return nullptr;
+      }
+    } else if (env_rs == JNI_EVERSION) {
+      // error, JDK does not support JNI_VERSION_1_6+
+      std::cerr
+          << "JniUtil::getJniEnv - Fatal: JDK does not support JNI_VERSION_1_6"
+          << std::endl;
+      return nullptr;
+    } else {
+      std::cerr << "JniUtil::getJniEnv - Fatal: Unknown error: env_rs="
+                << env_rs << std::endl;
+      return nullptr;
+    }
+  }
+
+  /**
+   * Counterpart to {@link JniUtil::getJniEnv(JavaVM*, jboolean*)}
+   *
+   * Detachess the current thread from the JVM if it was previously
+   * attached
+   *
+   * @param jvm (IN) A pointer to the JavaVM instance
+   * @param attached (IN) JNI_TRUE if we previously had to attach the thread
+   *     to the JavaVM to get the JNIEnv
+   */
+  static void releaseJniEnv(JavaVM* jvm, jboolean& attached) {
+    assert(jvm != nullptr);
+    if (attached == JNI_TRUE) {
+      const jint rs_detach = jvm->DetachCurrentThread();
+      assert(rs_detach == JNI_OK);
+      if (rs_detach != JNI_OK) {
+        std::cerr << "JniUtil::getJniEnv - Warn: Unable to detach current "
+                     "thread from JVM!"
+                  << std::endl;
+      }
+    }
+  }
+
+  /**
+   * Copies a Java String[] to a C++ std::vector<std::string>
+   *
+   * @param env (IN) A pointer to the java environment
+   * @param jss (IN) The Java String array to copy
+   * @param has_exception (OUT) will be set to JNI_TRUE
+   *     if an OutOfMemoryError or ArrayIndexOutOfBoundsException
+   *     exception occurs
+   *
+   * @return A std::vector<std:string> containing copies of the Java strings
+   */
+  static std::vector<std::string> copyStrings(JNIEnv* env, jobjectArray jss,
+                                              jboolean* has_exception) {
+    return ROCKSDB_NAMESPACE::JniUtil::copyStrings(
+        env, jss, env->GetArrayLength(jss), has_exception);
+  }
+
+  /**
+   * Copies a Java String[] to a C++ std::vector<std::string>
+   *
+   * @param env (IN) A pointer to the java environment
+   * @param jss (IN) The Java String array to copy
+   * @param jss_len (IN) The length of the Java String array to copy
+   * @param has_exception (OUT) will be set to JNI_TRUE
+   *     if an OutOfMemoryError or ArrayIndexOutOfBoundsException
+   *     exception occurs
+   *
+   * @return A std::vector<std:string> containing copies of the Java strings
+   */
+  static std::vector<std::string> copyStrings(JNIEnv* env, jobjectArray jss,
+                                              const jsize jss_len,
+                                              jboolean* has_exception) {
+    std::vector<std::string> strs;
+    strs.reserve(jss_len);
+    for (jsize i = 0; i < jss_len; i++) {
+      jobject js = env->GetObjectArrayElement(jss, i);
+      if (env->ExceptionCheck()) {
+        // exception thrown: ArrayIndexOutOfBoundsException
+        *has_exception = JNI_TRUE;
+        return strs;
+      }
+
+      jstring jstr = static_cast<jstring>(js);
+      const char* str = env->GetStringUTFChars(jstr, nullptr);
+      if (str == nullptr) {
+        // exception thrown: OutOfMemoryError
+        env->DeleteLocalRef(js);
+        *has_exception = JNI_TRUE;
+        return strs;
+      }
+
+      strs.push_back(std::string(str));
+
+      env->ReleaseStringUTFChars(jstr, str);
+      env->DeleteLocalRef(js);
+    }
+
+    *has_exception = JNI_FALSE;
+    return strs;
+  }
+
+  /**
+   * Copies a jstring to a C-style null-terminated byte string
+   * and releases the original jstring
+   *
+   * The jstring is copied as UTF-8
+   *
+   * If an exception occurs, then JNIEnv::ExceptionCheck()
+   * will have been called
+   *
+   * @param env (IN) A pointer to the java environment
+   * @param js (IN) The java string to copy
+   * @param has_exception (OUT) will be set to JNI_TRUE
+   *     if an OutOfMemoryError exception occurs
+   *
+   * @return A pointer to the copied string, or a
+   *     nullptr if has_exception == JNI_TRUE
+   */
+  static std::unique_ptr<char[]> copyString(JNIEnv* env, jstring js,
+                                            jboolean* has_exception) {
+    const char* utf = env->GetStringUTFChars(js, nullptr);
+    if (utf == nullptr) {
+      // exception thrown: OutOfMemoryError
+      env->ExceptionCheck();
+      *has_exception = JNI_TRUE;
+      return nullptr;
+    } else if (env->ExceptionCheck()) {
+      // exception thrown
+      env->ReleaseStringUTFChars(js, utf);
+      *has_exception = JNI_TRUE;
+      return nullptr;
+    }
+
+    const jsize utf_len = env->GetStringUTFLength(js);
+    std::unique_ptr<char[]> str(
+        new char[utf_len +
+                 1]);  // Note: + 1 is needed for the c_str null terminator
+    std::strcpy(str.get(), utf);
+    env->ReleaseStringUTFChars(js, utf);
+    *has_exception = JNI_FALSE;
+    return str;
+  }
+
+  /**
+   * Copies a jstring to a std::string
+   * and releases the original jstring
+   *
+   * If an exception occurs, then JNIEnv::ExceptionCheck()
+   * will have been called
+   *
+   * @param env (IN) A pointer to the java environment
+   * @param js (IN) The java string to copy
+   * @param has_exception (OUT) will be set to JNI_TRUE
+   *     if an OutOfMemoryError exception occurs
+   *
+   * @return A std:string copy of the jstring, or an
+   *     empty std::string if has_exception == JNI_TRUE
+   */
+  static std::string copyStdString(JNIEnv* env, jstring js,
+                                   jboolean* has_exception) {
+    const char* utf = env->GetStringUTFChars(js, nullptr);
+    if (utf == nullptr) {
+      // exception thrown: OutOfMemoryError
+      env->ExceptionCheck();
+      *has_exception = JNI_TRUE;
+      return std::string();
+    } else if (env->ExceptionCheck()) {
+      // exception thrown
+      env->ReleaseStringUTFChars(js, utf);
+      *has_exception = JNI_TRUE;
+      return std::string();
+    }
+
+    std::string name(utf);
+    env->ReleaseStringUTFChars(js, utf);
+    *has_exception = JNI_FALSE;
+    return name;
+  }
+
+  /**
+   * Copies bytes from a std::string to a jByteArray
+   *
+   * @param env A pointer to the java environment
+   * @param bytes The bytes to copy
+   *
+   * @return the Java byte[], or nullptr if an exception occurs
+   *
+   * @throws RocksDBException thrown
+   *   if memory size to copy exceeds general java specific array size
+   * limitation.
+   */
+  static jbyteArray copyBytes(JNIEnv* env, std::string bytes) {
+    return createJavaByteArrayWithSizeCheck(env, bytes.c_str(), bytes.size());
+  }
+
+  /**
+   * Given a Java byte[][] which is an array of java.lang.Strings
+   * where each String is a byte[], the passed function `string_fn`
+   * will be called on each String, the result is the collected by
+   * calling the passed function `collector_fn`
+   *
+   * @param env (IN) A pointer to the java environment
+   * @param jbyte_strings (IN) A Java array of Strings expressed as bytes
+   * @param string_fn (IN) A transform function to call for each String
+   * @param collector_fn (IN) A collector which is called for the result
+   *     of each `string_fn`
+   * @param has_exception (OUT) will be set to JNI_TRUE
+   *     if an ArrayIndexOutOfBoundsException or OutOfMemoryError
+   *     exception occurs
+   */
+  template <typename T>
+  static void byteStrings(JNIEnv* env, jobjectArray jbyte_strings,
+                          std::function<T(const char*, const size_t)> string_fn,
+                          std::function<void(size_t, T)> collector_fn,
+                          jboolean* has_exception) {
+    const jsize jlen = env->GetArrayLength(jbyte_strings);
+
+    for (jsize i = 0; i < jlen; i++) {
+      jobject jbyte_string_obj = env->GetObjectArrayElement(jbyte_strings, i);
+      if (env->ExceptionCheck()) {
+        // exception thrown: ArrayIndexOutOfBoundsException
+        *has_exception = JNI_TRUE;  // signal error
+        return;
+      }
+
+      jbyteArray jbyte_string_ary =
+          reinterpret_cast<jbyteArray>(jbyte_string_obj);
+      T result = byteString(env, jbyte_string_ary, string_fn, has_exception);
+
+      env->DeleteLocalRef(jbyte_string_obj);
+
+      if (*has_exception == JNI_TRUE) {
+        // exception thrown: OutOfMemoryError
+        return;
+      }
+
+      collector_fn(i, result);
+    }
+
+    *has_exception = JNI_FALSE;
+  }
+
+  /**
+   * Given a Java String which is expressed as a Java Byte Array byte[],
+   * the passed function `string_fn` will be called on the String
+   * and the result returned
+   *
+   * @param env (IN) A pointer to the java environment
+   * @param jbyte_string_ary (IN) A Java String expressed in bytes
+   * @param string_fn (IN) A transform function to call on the String
+   * @param has_exception (OUT) will be set to JNI_TRUE
+   *     if an OutOfMemoryError exception occurs
+   */
+  template <typename T>
+  static T byteString(JNIEnv* env, jbyteArray jbyte_string_ary,
+                      std::function<T(const char*, const size_t)> string_fn,
+                      jboolean* has_exception) {
+    const jsize jbyte_string_len = env->GetArrayLength(jbyte_string_ary);
+    return byteString<T>(env, jbyte_string_ary, jbyte_string_len, string_fn,
+                         has_exception);
+  }
+
+  /**
+   * Given a Java String which is expressed as a Java Byte Array byte[],
+   * the passed function `string_fn` will be called on the String
+   * and the result returned
+   *
+   * @param env (IN) A pointer to the java environment
+   * @param jbyte_string_ary (IN) A Java String expressed in bytes
+   * @param jbyte_string_len (IN) The length of the Java String
+   *     expressed in bytes
+   * @param string_fn (IN) A transform function to call on the String
+   * @param has_exception (OUT) will be set to JNI_TRUE
+   *     if an OutOfMemoryError exception occurs
+   */
+  template <typename T>
+  static T byteString(JNIEnv* env, jbyteArray jbyte_string_ary,
+                      const jsize jbyte_string_len,
+                      std::function<T(const char*, const size_t)> string_fn,
+                      jboolean* has_exception) {
+    jbyte* jbyte_string = env->GetByteArrayElements(jbyte_string_ary, nullptr);
+    if (jbyte_string == nullptr) {
+      // exception thrown: OutOfMemoryError
+      *has_exception = JNI_TRUE;
+      return nullptr;  // signal error
+    }
+
+    T result =
+        string_fn(reinterpret_cast<char*>(jbyte_string), jbyte_string_len);
+
+    env->ReleaseByteArrayElements(jbyte_string_ary, jbyte_string, JNI_ABORT);
+
+    *has_exception = JNI_FALSE;
+    return result;
+  }
+
+  /**
+   * Converts a std::vector<string> to a Java byte[][] where each Java String
+   * is expressed as a Java Byte Array byte[].
+   *
+   * @param env A pointer to the java environment
+   * @param strings A vector of Strings
+   *
+   * @return A Java array of Strings expressed as bytes,
+   *     or nullptr if an exception is thrown
+   */
+  static jobjectArray stringsBytes(JNIEnv* env,
+                                   std::vector<std::string> strings) {
+    jclass jcls_ba = ByteJni::getArrayJClass(env);
+    if (jcls_ba == nullptr) {
+      // exception occurred
+      return nullptr;
+    }
+
+    const jsize len = static_cast<jsize>(strings.size());
+
+    jobjectArray jbyte_strings = env->NewObjectArray(len, jcls_ba, nullptr);
+    if (jbyte_strings == nullptr) {
+      // exception thrown: OutOfMemoryError
+      return nullptr;
+    }
+
+    for (jsize i = 0; i < len; i++) {
+      std::string* str = &strings[i];
+      const jsize str_len = static_cast<jsize>(str->size());
+
+      jbyteArray jbyte_string_ary = env->NewByteArray(str_len);
+      if (jbyte_string_ary == nullptr) {
+        // exception thrown: OutOfMemoryError
+        env->DeleteLocalRef(jbyte_strings);
+        return nullptr;
+      }
+
+      env->SetByteArrayRegion(
+          jbyte_string_ary, 0, str_len,
+          const_cast<jbyte*>(reinterpret_cast<const jbyte*>(str->c_str())));
+      if (env->ExceptionCheck()) {
+        // exception thrown: ArrayIndexOutOfBoundsException
+        env->DeleteLocalRef(jbyte_string_ary);
+        env->DeleteLocalRef(jbyte_strings);
+        return nullptr;
+      }
+
+      env->SetObjectArrayElement(jbyte_strings, i, jbyte_string_ary);
+      if (env->ExceptionCheck()) {
+        // exception thrown: ArrayIndexOutOfBoundsException
+        // or ArrayStoreException
+        env->DeleteLocalRef(jbyte_string_ary);
+        env->DeleteLocalRef(jbyte_strings);
+        return nullptr;
+      }
+
+      env->DeleteLocalRef(jbyte_string_ary);
+    }
+
+    return jbyte_strings;
+  }
+
+  /**
+   * Converts a std::vector<std::string> to a Java String[].
+   *
+   * @param env A pointer to the java environment
+   * @param strings A vector of Strings
+   *
+   * @return A Java array of Strings,
+   *     or nullptr if an exception is thrown
+   */
+  static jobjectArray toJavaStrings(JNIEnv* env,
+                                    const std::vector<std::string>* strings) {
+    jclass jcls_str = env->FindClass("java/lang/String");
+    if (jcls_str == nullptr) {
+      // exception occurred
+      return nullptr;
+    }
+
+    const jsize len = static_cast<jsize>(strings->size());
+
+    jobjectArray jstrings = env->NewObjectArray(len, jcls_str, nullptr);
+    if (jstrings == nullptr) {
+      // exception thrown: OutOfMemoryError
+      return nullptr;
+    }
+
+    for (jsize i = 0; i < len; i++) {
+      const std::string* str = &((*strings)[i]);
+      jstring js = ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, str);
+      if (js == nullptr) {
+        env->DeleteLocalRef(jstrings);
+        return nullptr;
+      }
+
+      env->SetObjectArrayElement(jstrings, i, js);
+      if (env->ExceptionCheck()) {
+        // exception thrown: ArrayIndexOutOfBoundsException
+        // or ArrayStoreException
+        env->DeleteLocalRef(js);
+        env->DeleteLocalRef(jstrings);
+        return nullptr;
+      }
+    }
+
+    return jstrings;
+  }
+
+  /**
+   * Creates a Java UTF String from a C++ std::string
+   *
+   * @param env A pointer to the java environment
+   * @param string the C++ std::string
+   * @param treat_empty_as_null true if empty strings should be treated as null
+   *
+   * @return the Java UTF string, or nullptr if the provided string
+   *     is null (or empty and treat_empty_as_null is set), or if an
+   *     exception occurs allocating the Java String.
+   */
+  static jstring toJavaString(JNIEnv* env, const std::string* string,
+                              const bool treat_empty_as_null = false) {
+    if (string == nullptr) {
+      return nullptr;
+    }
+
+    if (treat_empty_as_null && string->empty()) {
+      return nullptr;
+    }
+
+    return env->NewStringUTF(string->c_str());
+  }
+
+  /**
+   * Copies bytes to a new jByteArray with the check of java array size
+   * limitation.
+   *
+   * @param bytes pointer to memory to copy to a new jByteArray
+   * @param size number of bytes to copy
+   *
+   * @return the Java byte[], or nullptr if an exception occurs
+   *
+   * @throws RocksDBException thrown
+   *   if memory size to copy exceeds general java array size limitation to
+   * avoid overflow.
+   */
+  static jbyteArray createJavaByteArrayWithSizeCheck(JNIEnv* env,
+                                                     const char* bytes,
+                                                     const size_t size) {
+    // Limitation for java array size is vm specific
+    // In general it cannot exceed Integer.MAX_VALUE (2^31 - 1)
+    // Current HotSpot VM limitation for array size is Integer.MAX_VALUE - 5
+    // (2^31 - 1 - 5) It means that the next call to env->NewByteArray can still
+    // end with OutOfMemoryError("Requested array size exceeds VM limit") coming
+    // from VM
+    static const size_t MAX_JARRAY_SIZE = (static_cast<size_t>(1)) << 31;
+    if (size > MAX_JARRAY_SIZE) {
+      ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(
+          env, "Requested array size exceeds VM limit");
+      return nullptr;
+    }
+
+    const jsize jlen = static_cast<jsize>(size);
+    jbyteArray jbytes = env->NewByteArray(jlen);
+    if (jbytes == nullptr) {
+      // exception thrown: OutOfMemoryError
+      return nullptr;
+    }
+
+    env->SetByteArrayRegion(
+        jbytes, 0, jlen,
+        const_cast<jbyte*>(reinterpret_cast<const jbyte*>(bytes)));
+    if (env->ExceptionCheck()) {
+      // exception thrown: ArrayIndexOutOfBoundsException
+      env->DeleteLocalRef(jbytes);
+      return nullptr;
+    }
+
+    return jbytes;
+  }
+
+  /**
+   * Copies bytes from a ROCKSDB_NAMESPACE::Slice to a jByteArray
+   *
+   * @param env A pointer to the java environment
+   * @param bytes The bytes to copy
+   *
+   * @return the Java byte[] or nullptr if an exception occurs
+   *
+   * @throws RocksDBException thrown
+   *   if memory size to copy exceeds general java specific array size
+   * limitation.
+   */
+  static jbyteArray copyBytes(JNIEnv* env, const Slice& bytes) {
+    return createJavaByteArrayWithSizeCheck(env, bytes.data(), bytes.size());
+  }
+
+  /*
+   * Helper for operations on a key and value
+   * for example WriteBatch->Put
+   *
+   * TODO(AR) could be used for RocksDB->Put etc.
+   */
+  static std::unique_ptr<ROCKSDB_NAMESPACE::Status> kv_op(
+      std::function<ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Slice,
+                                              ROCKSDB_NAMESPACE::Slice)>
+          op,
+      JNIEnv* env, jobject /*jobj*/, jbyteArray jkey, jint jkey_len,
+      jbyteArray jvalue, jint jvalue_len) {
+    jbyte* key = env->GetByteArrayElements(jkey, nullptr);
+    if (env->ExceptionCheck()) {
+      // exception thrown: OutOfMemoryError
+      return nullptr;
+    }
+
+    jbyte* value = env->GetByteArrayElements(jvalue, nullptr);
+    if (env->ExceptionCheck()) {
+      // exception thrown: OutOfMemoryError
+      if (key != nullptr) {
+        env->ReleaseByteArrayElements(jkey, key, JNI_ABORT);
+      }
+      return nullptr;
+    }
+
+    ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast<char*>(key), jkey_len);
+    ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast<char*>(value),
+                                         jvalue_len);
+
+    auto status = op(key_slice, value_slice);
+
+    if (value != nullptr) {
+      env->ReleaseByteArrayElements(jvalue, value, JNI_ABORT);
+    }
+    if (key != nullptr) {
+      env->ReleaseByteArrayElements(jkey, key, JNI_ABORT);
+    }
+
+    return std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+        new ROCKSDB_NAMESPACE::Status(status));
+  }
+
+  /*
+   * Helper for operations on a key
+   * for example WriteBatch->Delete
+   *
+   * TODO(AR) could be used for RocksDB->Delete etc.
+   */
+  static std::unique_ptr<ROCKSDB_NAMESPACE::Status> k_op(
+      std::function<ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Slice)> op,
+      JNIEnv* env, jobject /*jobj*/, jbyteArray jkey, jint jkey_len) {
+    jbyte* key = env->GetByteArrayElements(jkey, nullptr);
+    if (env->ExceptionCheck()) {
+      // exception thrown: OutOfMemoryError
+      return nullptr;
+    }
+
+    ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast<char*>(key), jkey_len);
+
+    auto status = op(key_slice);
+
+    if (key != nullptr) {
+      env->ReleaseByteArrayElements(jkey, key, JNI_ABORT);
+    }
+
+    return std::unique_ptr<ROCKSDB_NAMESPACE::Status>(
+        new ROCKSDB_NAMESPACE::Status(status));
+  }
+
+  /*
+   * Helper for operations on a key which is a region of an array
+   * Used to extract the common code from seek/seekForPrev.
+   * Possible that it can be generalised from that.
+   *
+   * We use GetByteArrayRegion to copy the key region of the whole array into
+   * a char[] We suspect this is not much slower than GetByteArrayElements,
+   * which probably copies anyway.
+   */
+  static void k_op_region(std::function<void(ROCKSDB_NAMESPACE::Slice&)> op,
+                          JNIEnv* env, jbyteArray jkey, jint jkey_off,
+                          jint jkey_len) {
+    const std::unique_ptr<char[]> key(new char[jkey_len]);
+    if (key == nullptr) {
+      jclass oom_class = env->FindClass("/lang/java/OutOfMemoryError");
+      env->ThrowNew(oom_class,
+                    "Memory allocation failed in RocksDB JNI function");
+      return;
+    }
+    env->GetByteArrayRegion(jkey, jkey_off, jkey_len,
+                            reinterpret_cast<jbyte*>(key.get()));
+    if (env->ExceptionCheck()) {
+      // exception thrown: OutOfMemoryError
+      return;
+    }
+
+    ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast<char*>(key.get()),
+                                       jkey_len);
+    op(key_slice);
+  }
+
+  /*
+   * Helper for operations on a value
+   * for example WriteBatchWithIndex->GetFromBatch
+   */
+  static jbyteArray v_op(std::function<ROCKSDB_NAMESPACE::Status(
+                             ROCKSDB_NAMESPACE::Slice, std::string*)>
+                             op,
+                         JNIEnv* env, jbyteArray jkey, jint jkey_len) {
+    jbyte* key = env->GetByteArrayElements(jkey, nullptr);
+    if (env->ExceptionCheck()) {
+      // exception thrown: OutOfMemoryError
+      return nullptr;
+    }
+
+    ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast<char*>(key), jkey_len);
+
+    std::string value;
+    ROCKSDB_NAMESPACE::Status s = op(key_slice, &value);
+
+    if (key != nullptr) {
+      env->ReleaseByteArrayElements(jkey, key, JNI_ABORT);
+    }
+
+    if (s.IsNotFound()) {
+      return nullptr;
+    }
+
+    if (s.ok()) {
+      jbyteArray jret_value =
+          env->NewByteArray(static_cast<jsize>(value.size()));
+      if (jret_value == nullptr) {
+        // exception thrown: OutOfMemoryError
+        return nullptr;
+      }
+
+      env->SetByteArrayRegion(
+          jret_value, 0, static_cast<jsize>(value.size()),
+          const_cast<jbyte*>(reinterpret_cast<const jbyte*>(value.c_str())));
+      if (env->ExceptionCheck()) {
+        // exception thrown: ArrayIndexOutOfBoundsException
+        if (jret_value != nullptr) {
+          env->DeleteLocalRef(jret_value);
+        }
+        return nullptr;
+      }
+
+      return jret_value;
+    }
+
+    ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s);
+    return nullptr;
+  }
+
+  /**
+   * Creates a vector<T*> of C++ pointers from
+   *     a Java array of C++ pointer addresses.
+   *
+   * @param env (IN) A pointer to the java environment
+   * @param pointers (IN) A Java array of C++ pointer addresses
+   * @param has_exception (OUT) will be set to JNI_TRUE
+   *     if an ArrayIndexOutOfBoundsException or OutOfMemoryError
+   *     exception occurs.
+   *
+   * @return A vector of C++ pointers.
+   */
+  template <typename T>
+  static std::vector<T*> fromJPointers(JNIEnv* env, jlongArray jptrs,
+                                       jboolean* has_exception) {
+    const jsize jptrs_len = env->GetArrayLength(jptrs);
+    std::vector<T*> ptrs;
+    jlong* jptr = env->GetLongArrayElements(jptrs, nullptr);
+    if (jptr == nullptr) {
+      // exception thrown: OutOfMemoryError
+      *has_exception = JNI_TRUE;
+      return ptrs;
+    }
+    ptrs.reserve(jptrs_len);
+    for (jsize i = 0; i < jptrs_len; i++) {
+      ptrs.push_back(reinterpret_cast<T*>(jptr[i]));
+    }
+    env->ReleaseLongArrayElements(jptrs, jptr, JNI_ABORT);
+    return ptrs;
+  }
+
+  /**
+   * Creates a Java array of C++ pointer addresses
+   *     from a vector of C++ pointers.
+   *
+   * @param env (IN) A pointer to the java environment
+   * @param pointers (IN) A vector of C++ pointers
+   * @param has_exception (OUT) will be set to JNI_TRUE
+   *     if an ArrayIndexOutOfBoundsException or OutOfMemoryError
+   *     exception occurs
+   *
+   * @return Java array of C++ pointer addresses.
+   */
+  template <typename T>
+  static jlongArray toJPointers(JNIEnv* env, const std::vector<T*>& pointers,
+                                jboolean* has_exception) {
+    const jsize len = static_cast<jsize>(pointers.size());
+    std::unique_ptr<jlong[]> results(new jlong[len]);
+    std::transform(
+        pointers.begin(), pointers.end(), results.get(),
+        [](T* pointer) -> jlong { return GET_CPLUSPLUS_POINTER(pointer); });
+
+    jlongArray jpointers = env->NewLongArray(len);
+    if (jpointers == nullptr) {
+      // exception thrown: OutOfMemoryError
+      *has_exception = JNI_TRUE;
+      return nullptr;
+    }
+
+    env->SetLongArrayRegion(jpointers, 0, len, results.get());
+    if (env->ExceptionCheck()) {
+      // exception thrown: ArrayIndexOutOfBoundsException
+      *has_exception = JNI_TRUE;
+      env->DeleteLocalRef(jpointers);
+      return nullptr;
+    }
+
+    *has_exception = JNI_FALSE;
+
+    return jpointers;
+  }
+
+  /*
+   * Helper for operations on a key and value
+   * for example WriteBatch->Put
+   *
+   * TODO(AR) could be extended to cover returning ROCKSDB_NAMESPACE::Status
+   * from `op` and used for RocksDB->Put etc.
+   */
+  static void kv_op_direct(
+      std::function<void(ROCKSDB_NAMESPACE::Slice&, ROCKSDB_NAMESPACE::Slice&)>
+          op,
+      JNIEnv* env, jobject jkey, jint jkey_off, jint jkey_len, jobject jval,
+      jint jval_off, jint jval_len) {
+    char* key = reinterpret_cast<char*>(env->GetDirectBufferAddress(jkey));
+    if (key == nullptr ||
+        env->GetDirectBufferCapacity(jkey) < (jkey_off + jkey_len)) {
+      ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env,
+                                                       "Invalid key argument");
+      return;
+    }
+
+    char* value = reinterpret_cast<char*>(env->GetDirectBufferAddress(jval));
+    if (value == nullptr ||
+        env->GetDirectBufferCapacity(jval) < (jval_off + jval_len)) {
+      ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(
+          env, "Invalid value argument");
+      return;
+    }
+
+    key += jkey_off;
+    value += jval_off;
+
+    ROCKSDB_NAMESPACE::Slice key_slice(key, jkey_len);
+    ROCKSDB_NAMESPACE::Slice value_slice(value, jval_len);
+
+    op(key_slice, value_slice);
+  }
+
+  /*
+   * Helper for operations on a key and value
+   * for example WriteBatch->Delete
+   *
+   * TODO(AR) could be extended to cover returning ROCKSDB_NAMESPACE::Status
+   * from `op` and used for RocksDB->Delete etc.
+   */
+  static void k_op_direct(std::function<void(ROCKSDB_NAMESPACE::Slice&)> op,
+                          JNIEnv* env, jobject jkey, jint jkey_off,
+                          jint jkey_len) {
+    char* key = reinterpret_cast<char*>(env->GetDirectBufferAddress(jkey));
+    if (key == nullptr ||
+        env->GetDirectBufferCapacity(jkey) < (jkey_off + jkey_len)) {
+      ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env,
+                                                       "Invalid key argument");
+      return;
+    }
+
+    key += jkey_off;
+
+    ROCKSDB_NAMESPACE::Slice key_slice(key, jkey_len);
+
+    return op(key_slice);
+  }
+
+  template <class T>
+  static jint copyToDirect(JNIEnv* env, T& source, jobject jtarget,
+                           jint jtarget_off, jint jtarget_len) {
+    char* target =
+        reinterpret_cast<char*>(env->GetDirectBufferAddress(jtarget));
+    if (target == nullptr ||
+        env->GetDirectBufferCapacity(jtarget) < (jtarget_off + jtarget_len)) {
+      ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(
+          env, "Invalid target argument");
+      return 0;
+    }
+
+    target += jtarget_off;
+
+    const jint cvalue_len = static_cast<jint>(source.size());
+    const jint length = std::min(jtarget_len, cvalue_len);
+
+    memcpy(target, source.data(), length);
+
+    return cvalue_len;
+  }
+};
+
+class MapJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class java.util.Map
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "java/util/Map");
+  }
+
+  /**
+   * Get the Java Method: Map#put
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getMapPutMethodId(JNIEnv* env) {
+    jclass jlist_clazz = getJClass(env);
+    if (jlist_clazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(
+        jlist_clazz, "put",
+        "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;");
+    assert(mid != nullptr);
+    return mid;
+  }
+};
+
+class HashMapJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class java.util.HashMap
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "java/util/HashMap");
+  }
+
+  /**
+   * Create a new Java java.util.HashMap object.
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A reference to a Java java.util.HashMap object, or
+   * nullptr if an an exception occurs
+   */
+  static jobject construct(JNIEnv* env, const uint32_t initial_capacity = 16) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = env->GetMethodID(jclazz, "<init>", "(I)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    jobject jhash_map =
+        env->NewObject(jclazz, mid, static_cast<jint>(initial_capacity));
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+
+    return jhash_map;
+  }
+
+  /**
+   * A function which maps a std::pair<K,V> to a std::pair<JK, JV>
+   *
+   * @return Either a pointer to a std::pair<jobject, jobject>, or nullptr
+   *     if an error occurs during the mapping
+   */
+  template <typename K, typename V, typename JK, typename JV>
+  using FnMapKV =
+      std::function<std::unique_ptr<std::pair<JK, JV>>(const std::pair<K, V>&)>;
+
+  // template <class I, typename K, typename V, typename K1, typename V1,
+  // typename std::enable_if<std::is_same<typename
+  // std::iterator_traits<I>::value_type, std::pair<const K,V>>::value,
+  // int32_t>::type = 0> static void putAll(JNIEnv* env, const jobject
+  // jhash_map, I iterator, const FnMapKV<const K,V,K1,V1> &fn_map_kv) {
+  /**
+   * Returns true if it succeeds, false if an error occurs
+   */
+  template <class iterator_type, typename K, typename V>
+  static bool putAll(JNIEnv* env, const jobject jhash_map,
+                     iterator_type iterator, iterator_type end,
+                     const FnMapKV<K, V, jobject, jobject>& fn_map_kv) {
+    const jmethodID jmid_put =
+        ROCKSDB_NAMESPACE::MapJni::getMapPutMethodId(env);
+    if (jmid_put == nullptr) {
+      return false;
+    }
+
+    for (auto it = iterator; it != end; ++it) {
+      const std::unique_ptr<std::pair<jobject, jobject>> result =
+          fn_map_kv(*it);
+      if (result == nullptr) {
+        // an error occurred during fn_map_kv
+        return false;
+      }
+      env->CallObjectMethod(jhash_map, jmid_put, result->first, result->second);
+      if (env->ExceptionCheck()) {
+        // exception occurred
+        env->DeleteLocalRef(result->second);
+        env->DeleteLocalRef(result->first);
+        return false;
+      }
+
+      // release local references
+      env->DeleteLocalRef(result->second);
+      env->DeleteLocalRef(result->first);
+    }
+
+    return true;
+  }
+
+  /**
+   * Creates a java.util.Map<String, String> from a std::map<std::string,
+   * std::string>
+   *
+   * @param env A pointer to the Java environment
+   * @param map the Cpp map
+   *
+   * @return a reference to the Java java.util.Map object, or nullptr if an
+   * exception occcurred
+   */
+  static jobject fromCppMap(JNIEnv* env,
+                            const std::map<std::string, std::string>* map) {
+    if (map == nullptr) {
+      return nullptr;
+    }
+
+    jobject jhash_map = construct(env, static_cast<uint32_t>(map->size()));
+    if (jhash_map == nullptr) {
+      // exception occurred
+      return nullptr;
+    }
+
+    const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV<
+        const std::string, const std::string, jobject, jobject>
+        fn_map_kv =
+            [env](const std::pair<const std::string, const std::string>& kv) {
+              jstring jkey = ROCKSDB_NAMESPACE::JniUtil::toJavaString(
+                  env, &(kv.first), false);
+              if (env->ExceptionCheck()) {
+                // an error occurred
+                return std::unique_ptr<std::pair<jobject, jobject>>(nullptr);
+              }
+
+              jstring jvalue = ROCKSDB_NAMESPACE::JniUtil::toJavaString(
+                  env, &(kv.second), true);
+              if (env->ExceptionCheck()) {
+                // an error occurred
+                env->DeleteLocalRef(jkey);
+                return std::unique_ptr<std::pair<jobject, jobject>>(nullptr);
+              }
+
+              return std::unique_ptr<std::pair<jobject, jobject>>(
+                  new std::pair<jobject, jobject>(
+                      static_cast<jobject>(jkey),
+                      static_cast<jobject>(jvalue)));
+            };
+
+    if (!putAll(env, jhash_map, map->begin(), map->end(), fn_map_kv)) {
+      // exception occurred
+      return nullptr;
+    }
+
+    return jhash_map;
+  }
+
+  /**
+   * Creates a java.util.Map<String, Long> from a std::map<std::string,
+   * uint32_t>
+   *
+   * @param env A pointer to the Java environment
+   * @param map the Cpp map
+   *
+   * @return a reference to the Java java.util.Map object, or nullptr if an
+   * exception occcurred
+   */
+  static jobject fromCppMap(JNIEnv* env,
+                            const std::map<std::string, uint32_t>* map) {
+    if (map == nullptr) {
+      return nullptr;
+    }
+
+    if (map == nullptr) {
+      return nullptr;
+    }
+
+    jobject jhash_map = construct(env, static_cast<uint32_t>(map->size()));
+    if (jhash_map == nullptr) {
+      // exception occurred
+      return nullptr;
+    }
+
+    const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV<
+        const std::string, const uint32_t, jobject, jobject>
+        fn_map_kv =
+            [env](const std::pair<const std::string, const uint32_t>& kv) {
+              jstring jkey = ROCKSDB_NAMESPACE::JniUtil::toJavaString(
+                  env, &(kv.first), false);
+              if (env->ExceptionCheck()) {
+                // an error occurred
+                return std::unique_ptr<std::pair<jobject, jobject>>(nullptr);
+              }
+
+              jobject jvalue = ROCKSDB_NAMESPACE::IntegerJni::valueOf(
+                  env, static_cast<jint>(kv.second));
+              if (env->ExceptionCheck()) {
+                // an error occurred
+                env->DeleteLocalRef(jkey);
+                return std::unique_ptr<std::pair<jobject, jobject>>(nullptr);
+              }
+
+              return std::unique_ptr<std::pair<jobject, jobject>>(
+                  new std::pair<jobject, jobject>(static_cast<jobject>(jkey),
+                                                  jvalue));
+            };
+
+    if (!putAll(env, jhash_map, map->begin(), map->end(), fn_map_kv)) {
+      // exception occurred
+      return nullptr;
+    }
+
+    return jhash_map;
+  }
+
+  /**
+   * Creates a java.util.Map<String, Long> from a std::map<std::string,
+   * uint64_t>
+   *
+   * @param env A pointer to the Java environment
+   * @param map the Cpp map
+   *
+   * @return a reference to the Java java.util.Map object, or nullptr if an
+   * exception occcurred
+   */
+  static jobject fromCppMap(JNIEnv* env,
+                            const std::map<std::string, uint64_t>* map) {
+    if (map == nullptr) {
+      return nullptr;
+    }
+
+    jobject jhash_map = construct(env, static_cast<uint32_t>(map->size()));
+    if (jhash_map == nullptr) {
+      // exception occurred
+      return nullptr;
+    }
+
+    const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV<
+        const std::string, const uint64_t, jobject, jobject>
+        fn_map_kv =
+            [env](const std::pair<const std::string, const uint64_t>& kv) {
+              jstring jkey = ROCKSDB_NAMESPACE::JniUtil::toJavaString(
+                  env, &(kv.first), false);
+              if (env->ExceptionCheck()) {
+                // an error occurred
+                return std::unique_ptr<std::pair<jobject, jobject>>(nullptr);
+              }
+
+              jobject jvalue = ROCKSDB_NAMESPACE::LongJni::valueOf(
+                  env, static_cast<jlong>(kv.second));
+              if (env->ExceptionCheck()) {
+                // an error occurred
+                env->DeleteLocalRef(jkey);
+                return std::unique_ptr<std::pair<jobject, jobject>>(nullptr);
+              }
+
+              return std::unique_ptr<std::pair<jobject, jobject>>(
+                  new std::pair<jobject, jobject>(static_cast<jobject>(jkey),
+                                                  jvalue));
+            };
+
+    if (!putAll(env, jhash_map, map->begin(), map->end(), fn_map_kv)) {
+      // exception occurred
+      return nullptr;
+    }
+
+    return jhash_map;
+  }
+
+  /**
+   * Creates a java.util.Map<String, Long> from a std::map<uint32_t, uint64_t>
+   *
+   * @param env A pointer to the Java environment
+   * @param map the Cpp map
+   *
+   * @return a reference to the Java java.util.Map object, or nullptr if an
+   * exception occcurred
+   */
+  static jobject fromCppMap(JNIEnv* env,
+                            const std::map<uint32_t, uint64_t>* map) {
+    if (map == nullptr) {
+      return nullptr;
+    }
+
+    jobject jhash_map = construct(env, static_cast<uint32_t>(map->size()));
+    if (jhash_map == nullptr) {
+      // exception occurred
+      return nullptr;
+    }
+
+    const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV<const uint32_t, const uint64_t,
+                                                 jobject, jobject>
+        fn_map_kv = [env](const std::pair<const uint32_t, const uint64_t>& kv) {
+          jobject jkey = ROCKSDB_NAMESPACE::IntegerJni::valueOf(
+              env, static_cast<jint>(kv.first));
+          if (env->ExceptionCheck()) {
+            // an error occurred
+            return std::unique_ptr<std::pair<jobject, jobject>>(nullptr);
+          }
+
+          jobject jvalue = ROCKSDB_NAMESPACE::LongJni::valueOf(
+              env, static_cast<jlong>(kv.second));
+          if (env->ExceptionCheck()) {
+            // an error occurred
+            env->DeleteLocalRef(jkey);
+            return std::unique_ptr<std::pair<jobject, jobject>>(nullptr);
+          }
+
+          return std::unique_ptr<std::pair<jobject, jobject>>(
+              new std::pair<jobject, jobject>(static_cast<jobject>(jkey),
+                                              jvalue));
+        };
+
+    if (!putAll(env, jhash_map, map->begin(), map->end(), fn_map_kv)) {
+      // exception occurred
+      return nullptr;
+    }
+
+    return jhash_map;
+  }
+};
+
+// The portal class for org.rocksdb.RocksDB
+class RocksDBJni
+    : public RocksDBNativeClass<ROCKSDB_NAMESPACE::DB*, RocksDBJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.RocksDB
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/RocksDB");
+  }
+};
+
+// The portal class for org.rocksdb.Options
+class OptionsJni
+    : public RocksDBNativeClass<ROCKSDB_NAMESPACE::Options*, OptionsJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.Options
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/Options");
+  }
+};
+
+// The portal class for org.rocksdb.DBOptions
+class DBOptionsJni
+    : public RocksDBNativeClass<ROCKSDB_NAMESPACE::DBOptions*, DBOptionsJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.DBOptions
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/DBOptions");
+  }
+};
+
+// The portal class for org.rocksdb.ColumnFamilyOptions
+class ColumnFamilyOptionsJni
+    : public RocksDBNativeClass<ROCKSDB_NAMESPACE::ColumnFamilyOptions*,
+                                ColumnFamilyOptionsJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.ColumnFamilyOptions
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env,
+                                         "org/rocksdb/ColumnFamilyOptions");
+  }
+
+  /**
+   * Create a new Java org.rocksdb.ColumnFamilyOptions object with the same
+   * properties as the provided C++ ROCKSDB_NAMESPACE::ColumnFamilyOptions
+   * object
+   *
+   * @param env A pointer to the Java environment
+   * @param cfoptions A pointer to ROCKSDB_NAMESPACE::ColumnFamilyOptions object
+   *
+   * @return A reference to a Java org.rocksdb.ColumnFamilyOptions object, or
+   * nullptr if an an exception occurs
+   */
+  static jobject construct(JNIEnv* env, const ColumnFamilyOptions* cfoptions) {
+    auto* cfo = new ROCKSDB_NAMESPACE::ColumnFamilyOptions(*cfoptions);
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = env->GetMethodID(jclazz, "<init>", "(J)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    jobject jcfd = env->NewObject(jclazz, mid, GET_CPLUSPLUS_POINTER(cfo));
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+
+    return jcfd;
+  }
+};
+
+// The portal class for org.rocksdb.WriteOptions
+class WriteOptionsJni
+    : public RocksDBNativeClass<ROCKSDB_NAMESPACE::WriteOptions*,
+                                WriteOptionsJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.WriteOptions
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/WriteOptions");
+  }
+};
+
+// The portal class for org.rocksdb.ReadOptions
+class ReadOptionsJni
+    : public RocksDBNativeClass<ROCKSDB_NAMESPACE::ReadOptions*,
+                                ReadOptionsJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.ReadOptions
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/ReadOptions");
+  }
+};
+
+// The portal class for org.rocksdb.WriteBatch
+class WriteBatchJni
+    : public RocksDBNativeClass<ROCKSDB_NAMESPACE::WriteBatch*, WriteBatchJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.WriteBatch
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/WriteBatch");
+  }
+
+  /**
+   * Create a new Java org.rocksdb.WriteBatch object
+   *
+   * @param env A pointer to the Java environment
+   * @param wb A pointer to ROCKSDB_NAMESPACE::WriteBatch object
+   *
+   * @return A reference to a Java org.rocksdb.WriteBatch object, or
+   * nullptr if an an exception occurs
+   */
+  static jobject construct(JNIEnv* env, const WriteBatch* wb) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = env->GetMethodID(jclazz, "<init>", "(J)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    jobject jwb = env->NewObject(jclazz, mid, GET_CPLUSPLUS_POINTER(wb));
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+
+    return jwb;
+  }
+};
+
+// The portal class for org.rocksdb.WriteBatch.Handler
+class WriteBatchHandlerJni
+    : public RocksDBNativeClass<
+          const ROCKSDB_NAMESPACE::WriteBatchHandlerJniCallback*,
+          WriteBatchHandlerJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.WriteBatch.Handler
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/WriteBatch$Handler");
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#put
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getPutCfMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "put", "(I[B[B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#put
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getPutMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "put", "([B[B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#merge
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getMergeCfMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "merge", "(I[B[B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#merge
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getMergeMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "merge", "([B[B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#delete
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getDeleteCfMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "delete", "(I[B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#delete
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getDeleteMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "delete", "([B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#singleDelete
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getSingleDeleteCfMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "singleDelete", "(I[B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#singleDelete
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getSingleDeleteMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "singleDelete", "([B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#deleteRange
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getDeleteRangeCfMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "deleteRange", "(I[B[B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#deleteRange
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getDeleteRangeMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "deleteRange", "([B[B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#logData
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getLogDataMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "logData", "([B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#putBlobIndex
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getPutBlobIndexCfMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "putBlobIndex", "(I[B[B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#markBeginPrepare
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getMarkBeginPrepareMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "markBeginPrepare", "()V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#markEndPrepare
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getMarkEndPrepareMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "markEndPrepare", "([B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#markNoop
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getMarkNoopMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "markNoop", "(Z)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#markRollback
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getMarkRollbackMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "markRollback", "([B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#markCommit
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getMarkCommitMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "markCommit", "([B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#markCommitWithTimestamp
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getMarkCommitWithTimestampMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "markCommitWithTimestamp", "([B[B)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: WriteBatch.Handler#shouldContinue
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getContinueMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "shouldContinue", "()Z");
+    assert(mid != nullptr);
+    return mid;
+  }
+};
+
+class WriteBatchSavePointJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.WriteBatch.SavePoint
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/WriteBatch$SavePoint");
+  }
+
+  /**
+   * Get the Java Method: HistogramData constructor
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getConstructorMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "<init>", "(JJJ)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Create a new Java org.rocksdb.WriteBatch.SavePoint object
+   *
+   * @param env A pointer to the Java environment
+   * @param savePoint A pointer to ROCKSDB_NAMESPACE::WriteBatch::SavePoint
+   * object
+   *
+   * @return A reference to a Java org.rocksdb.WriteBatch.SavePoint object, or
+   * nullptr if an an exception occurs
+   */
+  static jobject construct(JNIEnv* env, const SavePoint& save_point) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = getConstructorMethodId(env);
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    jobject jsave_point =
+        env->NewObject(jclazz, mid, static_cast<jlong>(save_point.size),
+                       static_cast<jlong>(save_point.count),
+                       static_cast<jlong>(save_point.content_flags));
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+
+    return jsave_point;
+  }
+};
+
+// The portal class for org.rocksdb.WriteBatchWithIndex
+class WriteBatchWithIndexJni
+    : public RocksDBNativeClass<ROCKSDB_NAMESPACE::WriteBatchWithIndex*,
+                                WriteBatchWithIndexJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.WriteBatchWithIndex
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env,
+                                         "org/rocksdb/WriteBatchWithIndex");
+  }
+};
+
+// The portal class for org.rocksdb.HistogramData
+class HistogramDataJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.HistogramData
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/HistogramData");
+  }
+
+  /**
+   * Get the Java Method: HistogramData constructor
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getConstructorMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "<init>", "(DDDDDDJJD)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+};
+
+// The portal class for org.rocksdb.BackupEngineOptions
+class BackupEngineOptionsJni
+    : public RocksDBNativeClass<ROCKSDB_NAMESPACE::BackupEngineOptions*,
+                                BackupEngineOptionsJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.BackupEngineOptions
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env,
+                                         "org/rocksdb/BackupEngineOptions");
+  }
+};
+
+// The portal class for org.rocksdb.BackupEngine
+class BackupEngineJni
+    : public RocksDBNativeClass<ROCKSDB_NAMESPACE::BackupEngine*,
+                                BackupEngineJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.BackupEngine
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/BackupEngine");
+  }
+};
+
+// The portal class for org.rocksdb.RocksIterator
+class IteratorJni
+    : public RocksDBNativeClass<ROCKSDB_NAMESPACE::Iterator*, IteratorJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.RocksIterator
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/RocksIterator");
+  }
+};
+
+// The portal class for org.rocksdb.Filter
+class FilterJni
+    : public RocksDBNativeClass<
+          std::shared_ptr<ROCKSDB_NAMESPACE::FilterPolicy>*, FilterJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.Filter
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/Filter");
+  }
+};
+
+// The portal class for org.rocksdb.ColumnFamilyHandle
+class ColumnFamilyHandleJni
+    : public RocksDBNativeClass<ROCKSDB_NAMESPACE::ColumnFamilyHandle*,
+                                ColumnFamilyHandleJni> {
+ public:
+  static jobject fromCppColumnFamilyHandle(
+      JNIEnv* env, const ROCKSDB_NAMESPACE::ColumnFamilyHandle* info) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID ctor = getConstructorMethodId(env, jclazz);
+    assert(ctor != nullptr);
+    return env->NewObject(jclazz, ctor, GET_CPLUSPLUS_POINTER(info));
+  }
+
+  static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) {
+    return env->GetMethodID(clazz, "<init>", "(J)V");
+  }
+
+  /**
+   * Get the Java Class org.rocksdb.ColumnFamilyHandle
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/ColumnFamilyHandle");
+  }
+};
+
+// The portal class for org.rocksdb.FlushOptions
+class FlushOptionsJni
+    : public RocksDBNativeClass<ROCKSDB_NAMESPACE::FlushOptions*,
+                                FlushOptionsJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.FlushOptions
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/FlushOptions");
+  }
+};
+
+// The portal class for org.rocksdb.ComparatorOptions
+class ComparatorOptionsJni
+    : public RocksDBNativeClass<
+          ROCKSDB_NAMESPACE::ComparatorJniCallbackOptions*,
+          ComparatorOptionsJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.ComparatorOptions
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/ComparatorOptions");
+  }
+};
+
+// The portal class for org.rocksdb.AbstractCompactionFilterFactory
+class AbstractCompactionFilterFactoryJni
+    : public RocksDBNativeClass<
+          const ROCKSDB_NAMESPACE::CompactionFilterFactoryJniCallback*,
+          AbstractCompactionFilterFactoryJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.AbstractCompactionFilterFactory
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(
+        env, "org/rocksdb/AbstractCompactionFilterFactory");
+  }
+
+  /**
+   * Get the Java Method: AbstractCompactionFilterFactory#name
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getNameMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "name", "()Ljava/lang/String;");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractCompactionFilterFactory#createCompactionFilter
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getCreateCompactionFilterMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "createCompactionFilter", "(ZZ)J");
+    assert(mid != nullptr);
+    return mid;
+  }
+};
+
+// The portal class for org.rocksdb.AbstractTransactionNotifier
+class AbstractTransactionNotifierJni
+    : public RocksDBNativeClass<
+          const ROCKSDB_NAMESPACE::TransactionNotifierJniCallback*,
+          AbstractTransactionNotifierJni> {
+ public:
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(
+        env, "org/rocksdb/AbstractTransactionNotifier");
+  }
+
+  // Get the java method `snapshotCreated`
+  // of org.rocksdb.AbstractTransactionNotifier.
+  static jmethodID getSnapshotCreatedMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "snapshotCreated", "(J)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+};
+
+// The portal class for org.rocksdb.AbstractComparatorJniBridge
+class AbstractComparatorJniBridge : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.AbstractComparatorJniBridge
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/AbstractComparatorJniBridge");
+  }
+
+  /**
+   * Get the Java Method: Comparator#compareInternal
+   *
+   * @param env A pointer to the Java environment
+   * @param jclazz the AbstractComparatorJniBridge class
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getCompareInternalMethodId(JNIEnv* env, jclass jclazz) {
+    static jmethodID mid =
+        env->GetStaticMethodID(jclazz, "compareInternal",
+                               "(Lorg/rocksdb/AbstractComparator;Ljava/nio/"
+                               "ByteBuffer;ILjava/nio/ByteBuffer;I)I");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: Comparator#findShortestSeparatorInternal
+   *
+   * @param env A pointer to the Java environment
+   * @param jclazz the AbstractComparatorJniBridge class
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getFindShortestSeparatorInternalMethodId(JNIEnv* env,
+                                                            jclass jclazz) {
+    static jmethodID mid =
+        env->GetStaticMethodID(jclazz, "findShortestSeparatorInternal",
+                               "(Lorg/rocksdb/AbstractComparator;Ljava/nio/"
+                               "ByteBuffer;ILjava/nio/ByteBuffer;I)I");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: Comparator#findShortSuccessorInternal
+   *
+   * @param env A pointer to the Java environment
+   * @param jclazz the AbstractComparatorJniBridge class
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getFindShortSuccessorInternalMethodId(JNIEnv* env,
+                                                         jclass jclazz) {
+    static jmethodID mid = env->GetStaticMethodID(
+        jclazz, "findShortSuccessorInternal",
+        "(Lorg/rocksdb/AbstractComparator;Ljava/nio/ByteBuffer;I)I");
+    assert(mid != nullptr);
+    return mid;
+  }
+};
+
+// The portal class for org.rocksdb.AbstractComparator
+class AbstractComparatorJni
+    : public RocksDBNativeClass<const ROCKSDB_NAMESPACE::ComparatorJniCallback*,
+                                AbstractComparatorJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.AbstractComparator
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractComparator");
+  }
+
+  /**
+   * Get the Java Method: Comparator#name
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getNameMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "name", "()Ljava/lang/String;");
+    assert(mid != nullptr);
+    return mid;
+  }
+};
+
+// The portal class for org.rocksdb.AbstractSlice
+class AbstractSliceJni
+    : public NativeRocksMutableObject<const ROCKSDB_NAMESPACE::Slice*,
+                                      AbstractSliceJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.AbstractSlice
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractSlice");
+  }
+};
+
+// The portal class for org.rocksdb.Slice
+class SliceJni
+    : public NativeRocksMutableObject<const ROCKSDB_NAMESPACE::Slice*,
+                                      AbstractSliceJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.Slice
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/Slice");
+  }
+
+  /**
+   * Constructs a Slice object
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A reference to a Java Slice object, or a nullptr if an
+   *     exception occurs
+   */
+  static jobject construct0(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "<init>", "()V");
+    if (mid == nullptr) {
+      // exception occurred accessing method
+      return nullptr;
+    }
+
+    jobject jslice = env->NewObject(jclazz, mid);
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+
+    return jslice;
+  }
+};
+
+// The portal class for org.rocksdb.DirectSlice
+class DirectSliceJni
+    : public NativeRocksMutableObject<const ROCKSDB_NAMESPACE::Slice*,
+                                      AbstractSliceJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.DirectSlice
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/DirectSlice");
+  }
+
+  /**
+   * Constructs a DirectSlice object
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A reference to a Java DirectSlice object, or a nullptr if an
+   *     exception occurs
+   */
+  static jobject construct0(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "<init>", "()V");
+    if (mid == nullptr) {
+      // exception occurred accessing method
+      return nullptr;
+    }
+
+    jobject jdirect_slice = env->NewObject(jclazz, mid);
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+
+    return jdirect_slice;
+  }
+};
+
+// The portal class for org.rocksdb.BackupInfo
+class BackupInfoJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.BackupInfo
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/BackupInfo");
+  }
+
+  /**
+   * Constructs a BackupInfo object
+   *
+   * @param env A pointer to the Java environment
+   * @param backup_id id of the backup
+   * @param timestamp timestamp of the backup
+   * @param size size of the backup
+   * @param number_files number of files related to the backup
+   * @param app_metadata application specific metadata
+   *
+   * @return A reference to a Java BackupInfo object, or a nullptr if an
+   *     exception occurs
+   */
+  static jobject construct0(JNIEnv* env, uint32_t backup_id, int64_t timestamp,
+                            uint64_t size, uint32_t number_files,
+                            const std::string& app_metadata) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "<init>", "(IJJILjava/lang/String;)V");
+    if (mid == nullptr) {
+      // exception occurred accessing method
+      return nullptr;
+    }
+
+    jstring japp_metadata = nullptr;
+    if (app_metadata != nullptr) {
+      japp_metadata = env->NewStringUTF(app_metadata.c_str());
+      if (japp_metadata == nullptr) {
+        // exception occurred creating java string
+        return nullptr;
+      }
+    }
+
+    jobject jbackup_info = env->NewObject(jclazz, mid, backup_id, timestamp,
+                                          size, number_files, japp_metadata);
+    if (env->ExceptionCheck()) {
+      env->DeleteLocalRef(japp_metadata);
+      return nullptr;
+    }
+
+    return jbackup_info;
+  }
+};
+
+class BackupInfoListJni {
+ public:
+  /**
+   * Converts a C++ std::vector<BackupInfo> object to
+   * a Java ArrayList<org.rocksdb.BackupInfo> object
+   *
+   * @param env A pointer to the Java environment
+   * @param backup_infos A vector of BackupInfo
+   *
+   * @return Either a reference to a Java ArrayList object, or a nullptr
+   *     if an exception occurs
+   */
+  static jobject getBackupInfo(JNIEnv* env,
+                               std::vector<BackupInfo> backup_infos) {
+    jclass jarray_list_clazz =
+        ROCKSDB_NAMESPACE::ListJni::getArrayListClass(env);
+    if (jarray_list_clazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID cstr_mid =
+        ROCKSDB_NAMESPACE::ListJni::getArrayListConstructorMethodId(env);
+    if (cstr_mid == nullptr) {
+      // exception occurred accessing method
+      return nullptr;
+    }
+
+    jmethodID add_mid = ROCKSDB_NAMESPACE::ListJni::getListAddMethodId(env);
+    if (add_mid == nullptr) {
+      // exception occurred accessing method
+      return nullptr;
+    }
+
+    // create java list
+    jobject jbackup_info_handle_list =
+        env->NewObject(jarray_list_clazz, cstr_mid, backup_infos.size());
+    if (env->ExceptionCheck()) {
+      // exception occurred constructing object
+      return nullptr;
+    }
+
+    // insert in java list
+    auto end = backup_infos.end();
+    for (auto it = backup_infos.begin(); it != end; ++it) {
+      auto backup_info = *it;
+
+      jobject obj = ROCKSDB_NAMESPACE::BackupInfoJni::construct0(
+          env, backup_info.backup_id, backup_info.timestamp, backup_info.size,
+          backup_info.number_files, backup_info.app_metadata);
+      if (env->ExceptionCheck()) {
+        // exception occurred constructing object
+        if (obj != nullptr) {
+          env->DeleteLocalRef(obj);
+        }
+        if (jbackup_info_handle_list != nullptr) {
+          env->DeleteLocalRef(jbackup_info_handle_list);
+        }
+        return nullptr;
+      }
+
+      jboolean rs =
+          env->CallBooleanMethod(jbackup_info_handle_list, add_mid, obj);
+      if (env->ExceptionCheck() || rs == JNI_FALSE) {
+        // exception occurred calling method, or could not add
+        if (obj != nullptr) {
+          env->DeleteLocalRef(obj);
+        }
+        if (jbackup_info_handle_list != nullptr) {
+          env->DeleteLocalRef(jbackup_info_handle_list);
+        }
+        return nullptr;
+      }
+    }
+
+    return jbackup_info_handle_list;
+  }
+};
+
+// The portal class for org.rocksdb.WBWIRocksIterator
+class WBWIRocksIteratorJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.WBWIRocksIterator
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/WBWIRocksIterator");
+  }
+
+  /**
+   * Get the Java Field: WBWIRocksIterator#entry
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Field ID or nullptr if the class or field id could not
+   *     be retrieved
+   */
+  static jfieldID getWriteEntryField(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jfieldID fid = env->GetFieldID(
+        jclazz, "entry", "Lorg/rocksdb/WBWIRocksIterator$WriteEntry;");
+    assert(fid != nullptr);
+    return fid;
+  }
+
+  /**
+   * Gets the value of the WBWIRocksIterator#entry
+   *
+   * @param env A pointer to the Java environment
+   * @param jwbwi_rocks_iterator A reference to a WBWIIterator
+   *
+   * @return A reference to a Java WBWIRocksIterator.WriteEntry object, or
+   *     a nullptr if an exception occurs
+   */
+  static jobject getWriteEntry(JNIEnv* env, jobject jwbwi_rocks_iterator) {
+    assert(jwbwi_rocks_iterator != nullptr);
+
+    jfieldID jwrite_entry_field = getWriteEntryField(env);
+    if (jwrite_entry_field == nullptr) {
+      // exception occurred accessing the field
+      return nullptr;
+    }
+
+    jobject jwe = env->GetObjectField(jwbwi_rocks_iterator, jwrite_entry_field);
+    assert(jwe != nullptr);
+    return jwe;
+  }
+};
+
+// The portal class for org.rocksdb.WBWIRocksIterator.WriteType
+class WriteTypeJni : public JavaClass {
+ public:
+  /**
+   * Get the PUT enum field value of WBWIRocksIterator.WriteType
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A reference to the enum field value or a nullptr if
+   *     the enum field value could not be retrieved
+   */
+  static jobject PUT(JNIEnv* env) { return getEnum(env, "PUT"); }
+
+  /**
+   * Get the MERGE enum field value of WBWIRocksIterator.WriteType
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A reference to the enum field value or a nullptr if
+   *     the enum field value could not be retrieved
+   */
+  static jobject MERGE(JNIEnv* env) { return getEnum(env, "MERGE"); }
+
+  /**
+   * Get the DELETE enum field value of WBWIRocksIterator.WriteType
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A reference to the enum field value or a nullptr if
+   *     the enum field value could not be retrieved
+   */
+  static jobject DELETE(JNIEnv* env) { return getEnum(env, "DELETE"); }
+
+  /**
+   * Get the LOG enum field value of WBWIRocksIterator.WriteType
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A reference to the enum field value or a nullptr if
+   *     the enum field value could not be retrieved
+   */
+  static jobject LOG(JNIEnv* env) { return getEnum(env, "LOG"); }
+
+  // Returns the equivalent org.rocksdb.WBWIRocksIterator.WriteType for the
+  // provided C++ ROCKSDB_NAMESPACE::WriteType enum
+  static jbyte toJavaWriteType(const ROCKSDB_NAMESPACE::WriteType& writeType) {
+    switch (writeType) {
+      case ROCKSDB_NAMESPACE::WriteType::kPutRecord:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::WriteType::kMergeRecord:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::WriteType::kDeleteRecord:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::WriteType::kSingleDeleteRecord:
+        return 0x3;
+      case ROCKSDB_NAMESPACE::WriteType::kDeleteRangeRecord:
+        return 0x4;
+      case ROCKSDB_NAMESPACE::WriteType::kLogDataRecord:
+        return 0x5;
+      case ROCKSDB_NAMESPACE::WriteType::kXIDRecord:
+        return 0x6;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+ private:
+  /**
+   * Get the Java Class org.rocksdb.WBWIRocksIterator.WriteType
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/WBWIRocksIterator$WriteType");
+  }
+
+  /**
+   * Get an enum field of org.rocksdb.WBWIRocksIterator.WriteType
+   *
+   * @param env A pointer to the Java environment
+   * @param name The name of the enum field
+   *
+   * @return A reference to the enum field value or a nullptr if
+   *     the enum field value could not be retrieved
+   */
+  static jobject getEnum(JNIEnv* env, const char name[]) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jfieldID jfid = env->GetStaticFieldID(
+        jclazz, name, "Lorg/rocksdb/WBWIRocksIterator$WriteType;");
+    if (env->ExceptionCheck()) {
+      // exception occurred while getting field
+      return nullptr;
+    } else if (jfid == nullptr) {
+      return nullptr;
+    }
+
+    jobject jwrite_type = env->GetStaticObjectField(jclazz, jfid);
+    assert(jwrite_type != nullptr);
+    return jwrite_type;
+  }
+};
+
+// The portal class for org.rocksdb.WBWIRocksIterator.WriteEntry
+class WriteEntryJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.WBWIRocksIterator.WriteEntry
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env,
+                                "org/rocksdb/WBWIRocksIterator$WriteEntry");
+  }
+};
+
+// The portal class for org.rocksdb.InfoLogLevel
+class InfoLogLevelJni : public JavaClass {
+ public:
+  /**
+   * Get the DEBUG_LEVEL enum field value of InfoLogLevel
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A reference to the enum field value or a nullptr if
+   *     the enum field value could not be retrieved
+   */
+  static jobject DEBUG_LEVEL(JNIEnv* env) {
+    return getEnum(env, "DEBUG_LEVEL");
+  }
+
+  /**
+   * Get the INFO_LEVEL enum field value of InfoLogLevel
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A reference to the enum field value or a nullptr if
+   *     the enum field value could not be retrieved
+   */
+  static jobject INFO_LEVEL(JNIEnv* env) { return getEnum(env, "INFO_LEVEL"); }
+
+  /**
+   * Get the WARN_LEVEL enum field value of InfoLogLevel
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A reference to the enum field value or a nullptr if
+   *     the enum field value could not be retrieved
+   */
+  static jobject WARN_LEVEL(JNIEnv* env) { return getEnum(env, "WARN_LEVEL"); }
+
+  /**
+   * Get the ERROR_LEVEL enum field value of InfoLogLevel
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A reference to the enum field value or a nullptr if
+   *     the enum field value could not be retrieved
+   */
+  static jobject ERROR_LEVEL(JNIEnv* env) {
+    return getEnum(env, "ERROR_LEVEL");
+  }
+
+  /**
+   * Get the FATAL_LEVEL enum field value of InfoLogLevel
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A reference to the enum field value or a nullptr if
+   *     the enum field value could not be retrieved
+   */
+  static jobject FATAL_LEVEL(JNIEnv* env) {
+    return getEnum(env, "FATAL_LEVEL");
+  }
+
+  /**
+   * Get the HEADER_LEVEL enum field value of InfoLogLevel
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A reference to the enum field value or a nullptr if
+   *     the enum field value could not be retrieved
+   */
+  static jobject HEADER_LEVEL(JNIEnv* env) {
+    return getEnum(env, "HEADER_LEVEL");
+  }
+
+ private:
+  /**
+   * Get the Java Class org.rocksdb.InfoLogLevel
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/InfoLogLevel");
+  }
+
+  /**
+   * Get an enum field of org.rocksdb.InfoLogLevel
+   *
+   * @param env A pointer to the Java environment
+   * @param name The name of the enum field
+   *
+   * @return A reference to the enum field value or a nullptr if
+   *     the enum field value could not be retrieved
+   */
+  static jobject getEnum(JNIEnv* env, const char name[]) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jfieldID jfid =
+        env->GetStaticFieldID(jclazz, name, "Lorg/rocksdb/InfoLogLevel;");
+    if (env->ExceptionCheck()) {
+      // exception occurred while getting field
+      return nullptr;
+    } else if (jfid == nullptr) {
+      return nullptr;
+    }
+
+    jobject jinfo_log_level = env->GetStaticObjectField(jclazz, jfid);
+    assert(jinfo_log_level != nullptr);
+    return jinfo_log_level;
+  }
+};
+
+// The portal class for org.rocksdb.Logger
+class LoggerJni
+    : public RocksDBNativeClass<
+          std::shared_ptr<ROCKSDB_NAMESPACE::LoggerJniCallback>*, LoggerJni> {
+ public:
+  /**
+   * Get the Java Class org/rocksdb/Logger
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/Logger");
+  }
+
+  /**
+   * Get the Java Method: Logger#log
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getLogMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(
+        jclazz, "log", "(Lorg/rocksdb/InfoLogLevel;Ljava/lang/String;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+};
+
+// The portal class for org.rocksdb.TransactionLogIterator.BatchResult
+class BatchResultJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.TransactionLogIterator.BatchResult
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(
+        env, "org/rocksdb/TransactionLogIterator$BatchResult");
+  }
+
+  /**
+   * Create a new Java org.rocksdb.TransactionLogIterator.BatchResult object
+   * with the same properties as the provided C++ ROCKSDB_NAMESPACE::BatchResult
+   * object
+   *
+   * @param env A pointer to the Java environment
+   * @param batch_result The ROCKSDB_NAMESPACE::BatchResult object
+   *
+   * @return A reference to a Java
+   *     org.rocksdb.TransactionLogIterator.BatchResult object,
+   *     or nullptr if an an exception occurs
+   */
+  static jobject construct(JNIEnv* env,
+                           ROCKSDB_NAMESPACE::BatchResult& batch_result) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = env->GetMethodID(jclazz, "<init>", "(JJ)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    jobject jbatch_result = env->NewObject(jclazz, mid, batch_result.sequence,
+                                           batch_result.writeBatchPtr.get());
+    if (jbatch_result == nullptr) {
+      // exception thrown: InstantiationException or OutOfMemoryError
+      return nullptr;
+    }
+
+    batch_result.writeBatchPtr.release();
+    return jbatch_result;
+  }
+};
+
+// The portal class for org.rocksdb.BottommostLevelCompaction
+class BottommostLevelCompactionJni {
+ public:
+  // Returns the equivalent org.rocksdb.BottommostLevelCompaction for the
+  // provided C++ ROCKSDB_NAMESPACE::BottommostLevelCompaction enum
+  static jint toJavaBottommostLevelCompaction(
+      const ROCKSDB_NAMESPACE::BottommostLevelCompaction&
+          bottommost_level_compaction) {
+    switch (bottommost_level_compaction) {
+      case ROCKSDB_NAMESPACE::BottommostLevelCompaction::kSkip:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::BottommostLevelCompaction::
+          kIfHaveCompactionFilter:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::BottommostLevelCompaction::kForce:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::BottommostLevelCompaction::kForceOptimized:
+        return 0x3;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::BottommostLevelCompaction
+  // enum for the provided Java org.rocksdb.BottommostLevelCompaction
+  static ROCKSDB_NAMESPACE::BottommostLevelCompaction
+  toCppBottommostLevelCompaction(jint bottommost_level_compaction) {
+    switch (bottommost_level_compaction) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::BottommostLevelCompaction::kSkip;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::BottommostLevelCompaction::
+            kIfHaveCompactionFilter;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::BottommostLevelCompaction::kForce;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::BottommostLevelCompaction::kForceOptimized;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::BottommostLevelCompaction::
+            kIfHaveCompactionFilter;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.CompactionStopStyle
+class CompactionStopStyleJni {
+ public:
+  // Returns the equivalent org.rocksdb.CompactionStopStyle for the provided
+  // C++ ROCKSDB_NAMESPACE::CompactionStopStyle enum
+  static jbyte toJavaCompactionStopStyle(
+      const ROCKSDB_NAMESPACE::CompactionStopStyle& compaction_stop_style) {
+    switch (compaction_stop_style) {
+      case ROCKSDB_NAMESPACE::CompactionStopStyle::
+          kCompactionStopStyleSimilarSize:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::CompactionStopStyle::
+          kCompactionStopStyleTotalSize:
+        return 0x1;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompactionStopStyle enum for
+  // the provided Java org.rocksdb.CompactionStopStyle
+  static ROCKSDB_NAMESPACE::CompactionStopStyle toCppCompactionStopStyle(
+      jbyte jcompaction_stop_style) {
+    switch (jcompaction_stop_style) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::CompactionStopStyle::
+            kCompactionStopStyleSimilarSize;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::CompactionStopStyle::
+            kCompactionStopStyleTotalSize;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::CompactionStopStyle::
+            kCompactionStopStyleSimilarSize;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.CompressionType
+class CompressionTypeJni {
+ public:
+  // Returns the equivalent org.rocksdb.CompressionType for the provided
+  // C++ ROCKSDB_NAMESPACE::CompressionType enum
+  static jbyte toJavaCompressionType(
+      const ROCKSDB_NAMESPACE::CompressionType& compression_type) {
+    switch (compression_type) {
+      case ROCKSDB_NAMESPACE::CompressionType::kNoCompression:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::CompressionType::kSnappyCompression:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::CompressionType::kZlibCompression:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::CompressionType::kBZip2Compression:
+        return 0x3;
+      case ROCKSDB_NAMESPACE::CompressionType::kLZ4Compression:
+        return 0x4;
+      case ROCKSDB_NAMESPACE::CompressionType::kLZ4HCCompression:
+        return 0x5;
+      case ROCKSDB_NAMESPACE::CompressionType::kXpressCompression:
+        return 0x6;
+      case ROCKSDB_NAMESPACE::CompressionType::kZSTD:
+        return 0x7;
+      case ROCKSDB_NAMESPACE::CompressionType::kDisableCompressionOption:
+      default:
+        return 0x7F;
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompressionType enum for the
+  // provided Java org.rocksdb.CompressionType
+  static ROCKSDB_NAMESPACE::CompressionType toCppCompressionType(
+      jbyte jcompression_type) {
+    switch (jcompression_type) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::CompressionType::kNoCompression;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::CompressionType::kSnappyCompression;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::CompressionType::kZlibCompression;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::CompressionType::kBZip2Compression;
+      case 0x4:
+        return ROCKSDB_NAMESPACE::CompressionType::kLZ4Compression;
+      case 0x5:
+        return ROCKSDB_NAMESPACE::CompressionType::kLZ4HCCompression;
+      case 0x6:
+        return ROCKSDB_NAMESPACE::CompressionType::kXpressCompression;
+      case 0x7:
+        return ROCKSDB_NAMESPACE::CompressionType::kZSTD;
+      case 0x7F:
+      default:
+        return ROCKSDB_NAMESPACE::CompressionType::kDisableCompressionOption;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.CompactionPriority
+class CompactionPriorityJni {
+ public:
+  // Returns the equivalent org.rocksdb.CompactionPriority for the provided
+  // C++ ROCKSDB_NAMESPACE::CompactionPri enum
+  static jbyte toJavaCompactionPriority(
+      const ROCKSDB_NAMESPACE::CompactionPri& compaction_priority) {
+    switch (compaction_priority) {
+      case ROCKSDB_NAMESPACE::CompactionPri::kByCompensatedSize:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::CompactionPri::kOldestLargestSeqFirst:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::CompactionPri::kOldestSmallestSeqFirst:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::CompactionPri::kMinOverlappingRatio:
+        return 0x3;
+      case ROCKSDB_NAMESPACE::CompactionPri::kRoundRobin:
+        return 0x4;
+      default:
+        return 0x0;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompactionPri enum for the
+  // provided Java org.rocksdb.CompactionPriority
+  static ROCKSDB_NAMESPACE::CompactionPri toCppCompactionPriority(
+      jbyte jcompaction_priority) {
+    switch (jcompaction_priority) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::CompactionPri::kByCompensatedSize;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::CompactionPri::kOldestLargestSeqFirst;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::CompactionPri::kOldestSmallestSeqFirst;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::CompactionPri::kMinOverlappingRatio;
+      case 0x4:
+        return ROCKSDB_NAMESPACE::CompactionPri::kRoundRobin;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::CompactionPri::kByCompensatedSize;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.AccessHint
+class AccessHintJni {
+ public:
+  // Returns the equivalent org.rocksdb.AccessHint for the provided
+  // C++ ROCKSDB_NAMESPACE::DBOptions::AccessHint enum
+  static jbyte toJavaAccessHint(
+      const ROCKSDB_NAMESPACE::DBOptions::AccessHint& access_hint) {
+    switch (access_hint) {
+      case ROCKSDB_NAMESPACE::DBOptions::AccessHint::NONE:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::DBOptions::AccessHint::NORMAL:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::DBOptions::AccessHint::SEQUENTIAL:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::DBOptions::AccessHint::WILLNEED:
+        return 0x3;
+      default:
+        // undefined/default
+        return 0x1;
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::DBOptions::AccessHint enum
+  // for the provided Java org.rocksdb.AccessHint
+  static ROCKSDB_NAMESPACE::DBOptions::AccessHint toCppAccessHint(
+      jbyte jaccess_hint) {
+    switch (jaccess_hint) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::DBOptions::AccessHint::NONE;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::DBOptions::AccessHint::NORMAL;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::DBOptions::AccessHint::SEQUENTIAL;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::DBOptions::AccessHint::WILLNEED;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::DBOptions::AccessHint::NORMAL;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.WALRecoveryMode
+class WALRecoveryModeJni {
+ public:
+  // Returns the equivalent org.rocksdb.WALRecoveryMode for the provided
+  // C++ ROCKSDB_NAMESPACE::WALRecoveryMode enum
+  static jbyte toJavaWALRecoveryMode(
+      const ROCKSDB_NAMESPACE::WALRecoveryMode& wal_recovery_mode) {
+    switch (wal_recovery_mode) {
+      case ROCKSDB_NAMESPACE::WALRecoveryMode::kTolerateCorruptedTailRecords:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::WALRecoveryMode::kAbsoluteConsistency:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::WALRecoveryMode::kPointInTimeRecovery:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::WALRecoveryMode::kSkipAnyCorruptedRecords:
+        return 0x3;
+      default:
+        // undefined/default
+        return 0x2;
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::WALRecoveryMode enum for the
+  // provided Java org.rocksdb.WALRecoveryMode
+  static ROCKSDB_NAMESPACE::WALRecoveryMode toCppWALRecoveryMode(
+      jbyte jwal_recovery_mode) {
+    switch (jwal_recovery_mode) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::WALRecoveryMode::
+            kTolerateCorruptedTailRecords;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::WALRecoveryMode::kAbsoluteConsistency;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::WALRecoveryMode::kPointInTimeRecovery;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::WALRecoveryMode::kSkipAnyCorruptedRecords;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::WALRecoveryMode::kPointInTimeRecovery;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.TickerType
+class TickerTypeJni {
+ public:
+  // Returns the equivalent org.rocksdb.TickerType for the provided
+  // C++ ROCKSDB_NAMESPACE::Tickers enum
+  static jbyte toJavaTickerType(const ROCKSDB_NAMESPACE::Tickers& tickers) {
+    switch (tickers) {
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_MISS:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_HIT:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_ADD:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_ADD_FAILURES:
+        return 0x3;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_MISS:
+        return 0x4;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_HIT:
+        return 0x5;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_ADD:
+        return 0x6;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_BYTES_INSERT:
+        return 0x7;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_MISS:
+        return 0x9;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_HIT:
+        return 0xA;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_ADD:
+        return 0xB;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_BYTES_INSERT:
+        return 0xC;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_MISS:
+        return 0xE;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_HIT:
+        return 0xF;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_ADD:
+        return 0x10;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_BYTES_INSERT:
+        return 0x11;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_BYTES_READ:
+        return 0x12;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_BYTES_WRITE:
+        return 0x13;
+      case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_USEFUL:
+        return 0x14;
+      case ROCKSDB_NAMESPACE::Tickers::PERSISTENT_CACHE_HIT:
+        return 0x15;
+      case ROCKSDB_NAMESPACE::Tickers::PERSISTENT_CACHE_MISS:
+        return 0x16;
+      case ROCKSDB_NAMESPACE::Tickers::SIM_BLOCK_CACHE_HIT:
+        return 0x17;
+      case ROCKSDB_NAMESPACE::Tickers::SIM_BLOCK_CACHE_MISS:
+        return 0x18;
+      case ROCKSDB_NAMESPACE::Tickers::MEMTABLE_HIT:
+        return 0x19;
+      case ROCKSDB_NAMESPACE::Tickers::MEMTABLE_MISS:
+        return 0x1A;
+      case ROCKSDB_NAMESPACE::Tickers::GET_HIT_L0:
+        return 0x1B;
+      case ROCKSDB_NAMESPACE::Tickers::GET_HIT_L1:
+        return 0x1C;
+      case ROCKSDB_NAMESPACE::Tickers::GET_HIT_L2_AND_UP:
+        return 0x1D;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_NEWER_ENTRY:
+        return 0x1E;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_OBSOLETE:
+        return 0x1F;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_RANGE_DEL:
+        return 0x20;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_USER:
+        return 0x21;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACTION_RANGE_DEL_DROP_OBSOLETE:
+        return 0x22;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_WRITTEN:
+        return 0x23;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_READ:
+        return 0x24;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_UPDATED:
+        return 0x25;
+      case ROCKSDB_NAMESPACE::Tickers::BYTES_WRITTEN:
+        return 0x26;
+      case ROCKSDB_NAMESPACE::Tickers::BYTES_READ:
+        return 0x27;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_SEEK:
+        return 0x28;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_NEXT:
+        return 0x29;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_PREV:
+        return 0x2A;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_SEEK_FOUND:
+        return 0x2B;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_NEXT_FOUND:
+        return 0x2C;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_PREV_FOUND:
+        return 0x2D;
+      case ROCKSDB_NAMESPACE::Tickers::ITER_BYTES_READ:
+        return 0x2E;
+      case ROCKSDB_NAMESPACE::Tickers::NO_FILE_OPENS:
+        return 0x30;
+      case ROCKSDB_NAMESPACE::Tickers::NO_FILE_ERRORS:
+        return 0x31;
+      case ROCKSDB_NAMESPACE::Tickers::STALL_MICROS:
+        return 0x35;
+      case ROCKSDB_NAMESPACE::Tickers::DB_MUTEX_WAIT_MICROS:
+        return 0x36;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_CALLS:
+        return 0x39;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_READ:
+        return 0x3A;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_BYTES_READ:
+        return 0x3B;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_MERGE_FAILURES:
+        return 0x3D;
+      case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_PREFIX_CHECKED:
+        return 0x3E;
+      case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_PREFIX_USEFUL:
+        return 0x3F;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_OF_RESEEKS_IN_ITERATION:
+        return 0x40;
+      case ROCKSDB_NAMESPACE::Tickers::GET_UPDATES_SINCE_CALLS:
+        return 0x41;
+      case ROCKSDB_NAMESPACE::Tickers::WAL_FILE_SYNCED:
+        return 0x46;
+      case ROCKSDB_NAMESPACE::Tickers::WAL_FILE_BYTES:
+        return 0x47;
+      case ROCKSDB_NAMESPACE::Tickers::WRITE_DONE_BY_SELF:
+        return 0x48;
+      case ROCKSDB_NAMESPACE::Tickers::WRITE_DONE_BY_OTHER:
+        return 0x49;
+      case ROCKSDB_NAMESPACE::Tickers::WRITE_WITH_WAL:
+        return 0x4B;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES:
+        return 0x4C;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES:
+        return 0x4D;
+      case ROCKSDB_NAMESPACE::Tickers::FLUSH_WRITE_BYTES:
+        return 0x4E;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_DIRECT_LOAD_TABLE_PROPERTIES:
+        return 0x4F;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_ACQUIRES:
+        return 0x50;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_RELEASES:
+        return 0x51;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_CLEANUPS:
+        return 0x52;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_COMPRESSED:
+        return 0x53;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_DECOMPRESSED:
+        return 0x54;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_NOT_COMPRESSED:
+        return 0x55;
+      case ROCKSDB_NAMESPACE::Tickers::MERGE_OPERATION_TOTAL_TIME:
+        return 0x56;
+      case ROCKSDB_NAMESPACE::Tickers::FILTER_OPERATION_TOTAL_TIME:
+        return 0x57;
+      case ROCKSDB_NAMESPACE::Tickers::ROW_CACHE_HIT:
+        return 0x58;
+      case ROCKSDB_NAMESPACE::Tickers::ROW_CACHE_MISS:
+        return 0x59;
+      case ROCKSDB_NAMESPACE::Tickers::READ_AMP_ESTIMATE_USEFUL_BYTES:
+        return 0x5A;
+      case ROCKSDB_NAMESPACE::Tickers::READ_AMP_TOTAL_READ_BYTES:
+        return 0x5B;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_RATE_LIMITER_DRAINS:
+        return 0x5C;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_ITER_SKIP:
+        return 0x5D;
+      case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND:
+        return 0x5E;
+      case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED:
+        // -0x01 so we can skip over the already taken 0x5F (TICKER_ENUM_MAX).
+        return -0x01;
+      case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED:
+        return 0x60;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE:
+        return 0x61;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACTION_CANCELLED:
+        return 0x62;
+      case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_FULL_POSITIVE:
+        return 0x63;
+      case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_FULL_TRUE_POSITIVE:
+        return 0x64;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_PUT:
+        return 0x65;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_WRITE:
+        return 0x66;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_GET:
+        return 0x67;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_MULTIGET:
+        return 0x68;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_SEEK:
+        return 0x69;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_NEXT:
+        return 0x6A;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_PREV:
+        return 0x6B;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_KEYS_WRITTEN:
+        return 0x6C;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_KEYS_READ:
+        return 0x6D;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BYTES_WRITTEN:
+        return 0x6E;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BYTES_READ:
+        return 0x6F;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_INLINED:
+        return 0x70;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_INLINED_TTL:
+        return 0x71;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_BLOB:
+        return 0x72;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_BLOB_TTL:
+        return 0x73;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_BYTES_WRITTEN:
+        return 0x74;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_BYTES_READ:
+        return 0x75;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_SYNCED:
+        return 0x76;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EXPIRED_COUNT:
+        return 0x77;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EXPIRED_SIZE:
+        return 0x78;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EVICTED_COUNT:
+        return 0x79;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EVICTED_SIZE:
+        return 0x7A;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_FILES:
+        return 0x7B;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_NEW_FILES:
+        return 0x7C;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_FAILURES:
+        return 0x7D;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_KEYS_RELOCATED:
+        return -0x02;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_BYTES_RELOCATED:
+        return -0x05;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_NUM_FILES_EVICTED:
+        return -0x06;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_NUM_KEYS_EVICTED:
+        return -0x07;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_BYTES_EVICTED:
+        return -0x08;
+      case ROCKSDB_NAMESPACE::Tickers::TXN_PREPARE_MUTEX_OVERHEAD:
+        return -0x09;
+      case ROCKSDB_NAMESPACE::Tickers::TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD:
+        return -0x0A;
+      case ROCKSDB_NAMESPACE::Tickers::TXN_DUPLICATE_KEY_OVERHEAD:
+        return -0x0B;
+      case ROCKSDB_NAMESPACE::Tickers::TXN_SNAPSHOT_MUTEX_OVERHEAD:
+        return -0x0C;
+      case ROCKSDB_NAMESPACE::Tickers::TXN_GET_TRY_AGAIN:
+        return -0x0D;
+      case ROCKSDB_NAMESPACE::Tickers::FILES_MARKED_TRASH:
+        return -0x0E;
+      case ROCKSDB_NAMESPACE::Tickers::FILES_DELETED_IMMEDIATELY:
+        return -0X0F;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES_MARKED:
+        return -0x10;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES_PERIODIC:
+        return -0x11;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES_TTL:
+        return -0x12;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_MARKED:
+        return -0x13;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_PERIODIC:
+        return -0x14;
+      case ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_TTL:
+        return -0x15;
+      case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_ERROR_COUNT:
+        return -0x16;
+      case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_IO_ERROR_COUNT:
+        return -0x17;
+      case ROCKSDB_NAMESPACE::Tickers::
+          ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT:
+        return -0x18;
+      case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_AUTORESUME_COUNT:
+        return -0x19;
+      case ROCKSDB_NAMESPACE::Tickers::
+          ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT:
+        return -0x1A;
+      case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT:
+        return -0x1B;
+      case ROCKSDB_NAMESPACE::Tickers::MEMTABLE_PAYLOAD_BYTES_AT_FLUSH:
+        return -0x1C;
+      case ROCKSDB_NAMESPACE::Tickers::MEMTABLE_GARBAGE_BYTES_AT_FLUSH:
+        return -0x1D;
+      case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_HITS:
+        return -0x1E;
+      case ROCKSDB_NAMESPACE::Tickers::VERIFY_CHECKSUM_READ_BYTES:
+        return -0x1F;
+      case ROCKSDB_NAMESPACE::Tickers::BACKUP_READ_BYTES:
+        return -0x20;
+      case ROCKSDB_NAMESPACE::Tickers::BACKUP_WRITE_BYTES:
+        return -0x21;
+      case ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_READ_BYTES:
+        return -0x22;
+      case ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_WRITE_BYTES:
+        return -0x23;
+      case ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_BYTES:
+        return -0x24;
+      case ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_BYTES:
+        return -0x25;
+      case ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_BYTES:
+        return -0x26;
+      case ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_COUNT:
+        return -0x27;
+      case ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_COUNT:
+        return -0x28;
+      case ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_COUNT:
+        return -0x29;
+      case ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_BYTES:
+        return -0x2A;
+      case ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_COUNT:
+        return -0x2B;
+      case ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_BYTES:
+        return -0x2C;
+      case ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT:
+        return -0x2D;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_COMPUTE_COUNT:
+        return -0x2E;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_MISS:
+        return -0x2F;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_HIT:
+        return -0x30;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_ADD:
+        return -0x31;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_ADD_FAILURES:
+        return -0x32;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_BYTES_READ:
+        return -0x33;
+      case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_BYTES_WRITE:
+        return -0x34;
+      case ROCKSDB_NAMESPACE::Tickers::READ_ASYNC_MICROS:
+        return -0x35;
+      case ROCKSDB_NAMESPACE::Tickers::ASYNC_READ_ERROR_COUNT:
+        return -0x36;
+      case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_FILTER_HITS:
+        return -0x37;
+      case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_INDEX_HITS:
+        return -0x38;
+      case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_DATA_HITS:
+        return -0x39;
+      case ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_MISS:
+        return -0x3A;
+      case ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_HIT:
+        return -0x3B;
+      case ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_MISMATCH_COUNT:
+        return -0x3C;
+      case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
+        // 0x5F was the max value in the initial copy of tickers to Java.
+        // Since these values are exposed directly to Java clients, we keep
+        // the value the same forever.
+        //
+        // TODO: This particular case seems confusing and unnecessary to pin the
+        // value since it's meant to be the number of tickers, not an actual
+        // ticker value. But we aren't yet in a position to fix it since the
+        // number of tickers doesn't fit in the Java representation (jbyte).
+        return 0x5F;
+      default:
+        // undefined/default
+        return 0x0;
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::Tickers enum for the
+  // provided Java org.rocksdb.TickerType
+  static ROCKSDB_NAMESPACE::Tickers toCppTickers(jbyte jticker_type) {
+    switch (jticker_type) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_MISS;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_HIT;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_ADD;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_ADD_FAILURES;
+      case 0x4:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_MISS;
+      case 0x5:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_HIT;
+      case 0x6:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_ADD;
+      case 0x7:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_BYTES_INSERT;
+      case 0x9:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_MISS;
+      case 0xA:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_HIT;
+      case 0xB:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_ADD;
+      case 0xC:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_BYTES_INSERT;
+      case 0xE:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_MISS;
+      case 0xF:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_HIT;
+      case 0x10:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_ADD;
+      case 0x11:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_BYTES_INSERT;
+      case 0x12:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_BYTES_READ;
+      case 0x13:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_BYTES_WRITE;
+      case 0x14:
+        return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_USEFUL;
+      case 0x15:
+        return ROCKSDB_NAMESPACE::Tickers::PERSISTENT_CACHE_HIT;
+      case 0x16:
+        return ROCKSDB_NAMESPACE::Tickers::PERSISTENT_CACHE_MISS;
+      case 0x17:
+        return ROCKSDB_NAMESPACE::Tickers::SIM_BLOCK_CACHE_HIT;
+      case 0x18:
+        return ROCKSDB_NAMESPACE::Tickers::SIM_BLOCK_CACHE_MISS;
+      case 0x19:
+        return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_HIT;
+      case 0x1A:
+        return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_MISS;
+      case 0x1B:
+        return ROCKSDB_NAMESPACE::Tickers::GET_HIT_L0;
+      case 0x1C:
+        return ROCKSDB_NAMESPACE::Tickers::GET_HIT_L1;
+      case 0x1D:
+        return ROCKSDB_NAMESPACE::Tickers::GET_HIT_L2_AND_UP;
+      case 0x1E:
+        return ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_NEWER_ENTRY;
+      case 0x1F:
+        return ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_OBSOLETE;
+      case 0x20:
+        return ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_RANGE_DEL;
+      case 0x21:
+        return ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_USER;
+      case 0x22:
+        return ROCKSDB_NAMESPACE::Tickers::COMPACTION_RANGE_DEL_DROP_OBSOLETE;
+      case 0x23:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_WRITTEN;
+      case 0x24:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_READ;
+      case 0x25:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_UPDATED;
+      case 0x26:
+        return ROCKSDB_NAMESPACE::Tickers::BYTES_WRITTEN;
+      case 0x27:
+        return ROCKSDB_NAMESPACE::Tickers::BYTES_READ;
+      case 0x28:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_SEEK;
+      case 0x29:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_NEXT;
+      case 0x2A:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_PREV;
+      case 0x2B:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_SEEK_FOUND;
+      case 0x2C:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_NEXT_FOUND;
+      case 0x2D:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_PREV_FOUND;
+      case 0x2E:
+        return ROCKSDB_NAMESPACE::Tickers::ITER_BYTES_READ;
+      case 0x30:
+        return ROCKSDB_NAMESPACE::Tickers::NO_FILE_OPENS;
+      case 0x31:
+        return ROCKSDB_NAMESPACE::Tickers::NO_FILE_ERRORS;
+      case 0x35:
+        return ROCKSDB_NAMESPACE::Tickers::STALL_MICROS;
+      case 0x36:
+        return ROCKSDB_NAMESPACE::Tickers::DB_MUTEX_WAIT_MICROS;
+      case 0x39:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_CALLS;
+      case 0x3A:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_READ;
+      case 0x3B:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_BYTES_READ;
+      case 0x3D:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_MERGE_FAILURES;
+      case 0x3E:
+        return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_PREFIX_CHECKED;
+      case 0x3F:
+        return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_PREFIX_USEFUL;
+      case 0x40:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_OF_RESEEKS_IN_ITERATION;
+      case 0x41:
+        return ROCKSDB_NAMESPACE::Tickers::GET_UPDATES_SINCE_CALLS;
+      case 0x46:
+        return ROCKSDB_NAMESPACE::Tickers::WAL_FILE_SYNCED;
+      case 0x47:
+        return ROCKSDB_NAMESPACE::Tickers::WAL_FILE_BYTES;
+      case 0x48:
+        return ROCKSDB_NAMESPACE::Tickers::WRITE_DONE_BY_SELF;
+      case 0x49:
+        return ROCKSDB_NAMESPACE::Tickers::WRITE_DONE_BY_OTHER;
+      case 0x4B:
+        return ROCKSDB_NAMESPACE::Tickers::WRITE_WITH_WAL;
+      case 0x4C:
+        return ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES;
+      case 0x4D:
+        return ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES;
+      case 0x4E:
+        return ROCKSDB_NAMESPACE::Tickers::FLUSH_WRITE_BYTES;
+      case 0x4F:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_DIRECT_LOAD_TABLE_PROPERTIES;
+      case 0x50:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_ACQUIRES;
+      case 0x51:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_RELEASES;
+      case 0x52:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_CLEANUPS;
+      case 0x53:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_COMPRESSED;
+      case 0x54:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_DECOMPRESSED;
+      case 0x55:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_NOT_COMPRESSED;
+      case 0x56:
+        return ROCKSDB_NAMESPACE::Tickers::MERGE_OPERATION_TOTAL_TIME;
+      case 0x57:
+        return ROCKSDB_NAMESPACE::Tickers::FILTER_OPERATION_TOTAL_TIME;
+      case 0x58:
+        return ROCKSDB_NAMESPACE::Tickers::ROW_CACHE_HIT;
+      case 0x59:
+        return ROCKSDB_NAMESPACE::Tickers::ROW_CACHE_MISS;
+      case 0x5A:
+        return ROCKSDB_NAMESPACE::Tickers::READ_AMP_ESTIMATE_USEFUL_BYTES;
+      case 0x5B:
+        return ROCKSDB_NAMESPACE::Tickers::READ_AMP_TOTAL_READ_BYTES;
+      case 0x5C:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_RATE_LIMITER_DRAINS;
+      case 0x5D:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_ITER_SKIP;
+      case 0x5E:
+        return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND;
+      case -0x01:
+        // -0x01 so we can skip over the already taken 0x5F (TICKER_ENUM_MAX).
+        return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED;
+      case 0x60:
+        return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED;
+      case 0x61:
+        return ROCKSDB_NAMESPACE::Tickers::
+            COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE;
+      case 0x62:
+        return ROCKSDB_NAMESPACE::Tickers::COMPACTION_CANCELLED;
+      case 0x63:
+        return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_FULL_POSITIVE;
+      case 0x64:
+        return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_FULL_TRUE_POSITIVE;
+      case 0x65:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_PUT;
+      case 0x66:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_WRITE;
+      case 0x67:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_GET;
+      case 0x68:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_MULTIGET;
+      case 0x69:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_SEEK;
+      case 0x6A:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_NEXT;
+      case 0x6B:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_PREV;
+      case 0x6C:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_KEYS_WRITTEN;
+      case 0x6D:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_KEYS_READ;
+      case 0x6E:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BYTES_WRITTEN;
+      case 0x6F:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BYTES_READ;
+      case 0x70:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_INLINED;
+      case 0x71:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_INLINED_TTL;
+      case 0x72:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_BLOB;
+      case 0x73:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_BLOB_TTL;
+      case 0x74:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_BYTES_WRITTEN;
+      case 0x75:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_BYTES_READ;
+      case 0x76:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_SYNCED;
+      case 0x77:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EXPIRED_COUNT;
+      case 0x78:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EXPIRED_SIZE;
+      case 0x79:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EVICTED_COUNT;
+      case 0x7A:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EVICTED_SIZE;
+      case 0x7B:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_FILES;
+      case 0x7C:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_NEW_FILES;
+      case 0x7D:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_FAILURES;
+      case -0x02:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_KEYS_RELOCATED;
+      case -0x05:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_BYTES_RELOCATED;
+      case -0x06:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_NUM_FILES_EVICTED;
+      case -0x07:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_NUM_KEYS_EVICTED;
+      case -0x08:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_BYTES_EVICTED;
+      case -0x09:
+        return ROCKSDB_NAMESPACE::Tickers::TXN_PREPARE_MUTEX_OVERHEAD;
+      case -0x0A:
+        return ROCKSDB_NAMESPACE::Tickers::TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD;
+      case -0x0B:
+        return ROCKSDB_NAMESPACE::Tickers::TXN_DUPLICATE_KEY_OVERHEAD;
+      case -0x0C:
+        return ROCKSDB_NAMESPACE::Tickers::TXN_SNAPSHOT_MUTEX_OVERHEAD;
+      case -0x0D:
+        return ROCKSDB_NAMESPACE::Tickers::TXN_GET_TRY_AGAIN;
+      case -0x0E:
+        return ROCKSDB_NAMESPACE::Tickers::FILES_MARKED_TRASH;
+      case -0x0F:
+        return ROCKSDB_NAMESPACE::Tickers::FILES_DELETED_IMMEDIATELY;
+      case -0x10:
+        return ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES_MARKED;
+      case -0x11:
+        return ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES_PERIODIC;
+      case -0x12:
+        return ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES_TTL;
+      case -0x13:
+        return ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_MARKED;
+      case -0x14:
+        return ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_PERIODIC;
+      case -0x15:
+        return ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_TTL;
+      case -0x16:
+        return ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_ERROR_COUNT;
+      case -0x17:
+        return ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_IO_ERROR_COUNT;
+      case -0x18:
+        return ROCKSDB_NAMESPACE::Tickers::
+            ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT;
+      case -0x19:
+        return ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_AUTORESUME_COUNT;
+      case -0x1A:
+        return ROCKSDB_NAMESPACE::Tickers::
+            ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT;
+      case -0x1B:
+        return ROCKSDB_NAMESPACE::Tickers::
+            ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT;
+      case -0x1C:
+        return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_PAYLOAD_BYTES_AT_FLUSH;
+      case -0x1D:
+        return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_GARBAGE_BYTES_AT_FLUSH;
+      case -0x1E:
+        return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_HITS;
+      case -0x1F:
+        return ROCKSDB_NAMESPACE::Tickers::VERIFY_CHECKSUM_READ_BYTES;
+      case -0x20:
+        return ROCKSDB_NAMESPACE::Tickers::BACKUP_READ_BYTES;
+      case -0x21:
+        return ROCKSDB_NAMESPACE::Tickers::BACKUP_WRITE_BYTES;
+      case -0x22:
+        return ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_READ_BYTES;
+      case -0x23:
+        return ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_WRITE_BYTES;
+      case -0x24:
+        return ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_BYTES;
+      case -0x25:
+        return ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_BYTES;
+      case -0x26:
+        return ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_BYTES;
+      case -0x27:
+        return ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_COUNT;
+      case -0x28:
+        return ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_COUNT;
+      case -0x29:
+        return ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_COUNT;
+      case -0x2A:
+        return ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_BYTES;
+      case -0x2B:
+        return ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_COUNT;
+      case -0x2C:
+        return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_BYTES;
+      case -0x2D:
+        return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT;
+      case -0x2E:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_COMPUTE_COUNT;
+      case -0x2F:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_MISS;
+      case -0x30:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_HIT;
+      case -0x31:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_ADD;
+      case -0x32:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_ADD_FAILURES;
+      case -0x33:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_BYTES_READ;
+      case -0x34:
+        return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_BYTES_WRITE;
+      case -0x35:
+        return ROCKSDB_NAMESPACE::Tickers::READ_ASYNC_MICROS;
+      case -0x36:
+        return ROCKSDB_NAMESPACE::Tickers::ASYNC_READ_ERROR_COUNT;
+      case -0x37:
+        return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_FILTER_HITS;
+      case -0x38:
+        return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_INDEX_HITS;
+      case -0x39:
+        return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_DATA_HITS;
+      case -0x3A:
+        return ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_MISS;
+      case -0x3B:
+        return ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_HIT;
+      case -0x3C:
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_MISMATCH_COUNT;
+      case 0x5F:
+        // 0x5F was the max value in the initial copy of tickers to Java.
+        // Since these values are exposed directly to Java clients, we keep
+        // the value the same forever.
+        //
+        // TODO: This particular case seems confusing and unnecessary to pin the
+        // value since it's meant to be the number of tickers, not an actual
+        // ticker value. But we aren't yet in a position to fix it since the
+        // number of tickers doesn't fit in the Java representation (jbyte).
+        return ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX;
+
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_MISS;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.HistogramType
+class HistogramTypeJni {
+ public:
+  // Returns the equivalent org.rocksdb.HistogramType for the provided
+  // C++ ROCKSDB_NAMESPACE::Histograms enum
+  static jbyte toJavaHistogramsType(
+      const ROCKSDB_NAMESPACE::Histograms& histograms) {
+    switch (histograms) {
+      case ROCKSDB_NAMESPACE::Histograms::DB_GET:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::Histograms::DB_WRITE:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::Histograms::COMPACTION_TIME:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::Histograms::SUBCOMPACTION_SETUP_TIME:
+        return 0x3;
+      case ROCKSDB_NAMESPACE::Histograms::TABLE_SYNC_MICROS:
+        return 0x4;
+      case ROCKSDB_NAMESPACE::Histograms::COMPACTION_OUTFILE_SYNC_MICROS:
+        return 0x5;
+      case ROCKSDB_NAMESPACE::Histograms::WAL_FILE_SYNC_MICROS:
+        return 0x6;
+      case ROCKSDB_NAMESPACE::Histograms::MANIFEST_FILE_SYNC_MICROS:
+        return 0x7;
+      case ROCKSDB_NAMESPACE::Histograms::TABLE_OPEN_IO_MICROS:
+        return 0x8;
+      case ROCKSDB_NAMESPACE::Histograms::DB_MULTIGET:
+        return 0x9;
+      case ROCKSDB_NAMESPACE::Histograms::READ_BLOCK_COMPACTION_MICROS:
+        return 0xA;
+      case ROCKSDB_NAMESPACE::Histograms::READ_BLOCK_GET_MICROS:
+        return 0xB;
+      case ROCKSDB_NAMESPACE::Histograms::WRITE_RAW_BLOCK_MICROS:
+        return 0xC;
+      case ROCKSDB_NAMESPACE::Histograms::NUM_FILES_IN_SINGLE_COMPACTION:
+        return 0x12;
+      case ROCKSDB_NAMESPACE::Histograms::DB_SEEK:
+        return 0x13;
+      case ROCKSDB_NAMESPACE::Histograms::WRITE_STALL:
+        return 0x14;
+      case ROCKSDB_NAMESPACE::Histograms::SST_READ_MICROS:
+        return 0x15;
+      case ROCKSDB_NAMESPACE::Histograms::NUM_SUBCOMPACTIONS_SCHEDULED:
+        return 0x16;
+      case ROCKSDB_NAMESPACE::Histograms::BYTES_PER_READ:
+        return 0x17;
+      case ROCKSDB_NAMESPACE::Histograms::BYTES_PER_WRITE:
+        return 0x18;
+      case ROCKSDB_NAMESPACE::Histograms::BYTES_PER_MULTIGET:
+        return 0x19;
+      case ROCKSDB_NAMESPACE::Histograms::BYTES_COMPRESSED:
+        return 0x1A;
+      case ROCKSDB_NAMESPACE::Histograms::BYTES_DECOMPRESSED:
+        return 0x1B;
+      case ROCKSDB_NAMESPACE::Histograms::COMPRESSION_TIMES_NANOS:
+        return 0x1C;
+      case ROCKSDB_NAMESPACE::Histograms::DECOMPRESSION_TIMES_NANOS:
+        return 0x1D;
+      case ROCKSDB_NAMESPACE::Histograms::READ_NUM_MERGE_OPERANDS:
+        return 0x1E;
+      // 0x20 to skip 0x1F so TICKER_ENUM_MAX remains unchanged for minor
+      // version compatibility.
+      case ROCKSDB_NAMESPACE::Histograms::FLUSH_TIME:
+        return 0x20;
+      case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_KEY_SIZE:
+        return 0x21;
+      case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_VALUE_SIZE:
+        return 0x22;
+      case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_WRITE_MICROS:
+        return 0x23;
+      case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_GET_MICROS:
+        return 0x24;
+      case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_MULTIGET_MICROS:
+        return 0x25;
+      case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_SEEK_MICROS:
+        return 0x26;
+      case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_NEXT_MICROS:
+        return 0x27;
+      case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_PREV_MICROS:
+        return 0x28;
+      case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_WRITE_MICROS:
+        return 0x29;
+      case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_READ_MICROS:
+        return 0x2A;
+      case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_SYNC_MICROS:
+        return 0x2B;
+      case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_COMPRESSION_MICROS:
+        return 0x2D;
+      case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_DECOMPRESSION_MICROS:
+        return 0x2E;
+      case ROCKSDB_NAMESPACE::Histograms::
+          NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL:
+        return 0x2F;
+      case ROCKSDB_NAMESPACE::Histograms::NUM_SST_READ_PER_LEVEL:
+        return 0x31;
+      case ROCKSDB_NAMESPACE::Histograms::ERROR_HANDLER_AUTORESUME_RETRY_COUNT:
+        return 0x32;
+      case ROCKSDB_NAMESPACE::Histograms::ASYNC_READ_BYTES:
+        return 0x33;
+      case ROCKSDB_NAMESPACE::Histograms::POLL_WAIT_MICROS:
+        return 0x34;
+      case ROCKSDB_NAMESPACE::Histograms::PREFETCHED_BYTES_DISCARDED:
+        return 0x35;
+      case ROCKSDB_NAMESPACE::Histograms::MULTIGET_IO_BATCH_SIZE:
+        return 0x36;
+      case NUM_LEVEL_READ_PER_MULTIGET:
+        return 0x37;
+      case ASYNC_PREFETCH_ABORT_MICROS:
+        return 0x38;
+      case ROCKSDB_NAMESPACE::Histograms::TABLE_OPEN_PREFETCH_TAIL_READ_BYTES:
+        return 0x39;
+      case ROCKSDB_NAMESPACE::Histograms::FILE_READ_FLUSH_MICROS:
+        return 0x3A;
+      case ROCKSDB_NAMESPACE::Histograms::FILE_READ_COMPACTION_MICROS:
+        return 0x3B;
+      case ROCKSDB_NAMESPACE::Histograms::FILE_READ_DB_OPEN_MICROS:
+        return 0x3C;
+      case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX:
+        // 0x1F for backwards compatibility on current minor version.
+        return 0x1F;
+
+      default:
+        // undefined/default
+        return 0x0;
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::Histograms enum for the
+  // provided Java org.rocksdb.HistogramsType
+  static ROCKSDB_NAMESPACE::Histograms toCppHistograms(jbyte jhistograms_type) {
+    switch (jhistograms_type) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::Histograms::DB_GET;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::Histograms::DB_WRITE;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::Histograms::COMPACTION_TIME;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::Histograms::SUBCOMPACTION_SETUP_TIME;
+      case 0x4:
+        return ROCKSDB_NAMESPACE::Histograms::TABLE_SYNC_MICROS;
+      case 0x5:
+        return ROCKSDB_NAMESPACE::Histograms::COMPACTION_OUTFILE_SYNC_MICROS;
+      case 0x6:
+        return ROCKSDB_NAMESPACE::Histograms::WAL_FILE_SYNC_MICROS;
+      case 0x7:
+        return ROCKSDB_NAMESPACE::Histograms::MANIFEST_FILE_SYNC_MICROS;
+      case 0x8:
+        return ROCKSDB_NAMESPACE::Histograms::TABLE_OPEN_IO_MICROS;
+      case 0x9:
+        return ROCKSDB_NAMESPACE::Histograms::DB_MULTIGET;
+      case 0xA:
+        return ROCKSDB_NAMESPACE::Histograms::READ_BLOCK_COMPACTION_MICROS;
+      case 0xB:
+        return ROCKSDB_NAMESPACE::Histograms::READ_BLOCK_GET_MICROS;
+      case 0xC:
+        return ROCKSDB_NAMESPACE::Histograms::WRITE_RAW_BLOCK_MICROS;
+      case 0x12:
+        return ROCKSDB_NAMESPACE::Histograms::NUM_FILES_IN_SINGLE_COMPACTION;
+      case 0x13:
+        return ROCKSDB_NAMESPACE::Histograms::DB_SEEK;
+      case 0x14:
+        return ROCKSDB_NAMESPACE::Histograms::WRITE_STALL;
+      case 0x15:
+        return ROCKSDB_NAMESPACE::Histograms::SST_READ_MICROS;
+      case 0x16:
+        return ROCKSDB_NAMESPACE::Histograms::NUM_SUBCOMPACTIONS_SCHEDULED;
+      case 0x17:
+        return ROCKSDB_NAMESPACE::Histograms::BYTES_PER_READ;
+      case 0x18:
+        return ROCKSDB_NAMESPACE::Histograms::BYTES_PER_WRITE;
+      case 0x19:
+        return ROCKSDB_NAMESPACE::Histograms::BYTES_PER_MULTIGET;
+      case 0x1A:
+        return ROCKSDB_NAMESPACE::Histograms::BYTES_COMPRESSED;
+      case 0x1B:
+        return ROCKSDB_NAMESPACE::Histograms::BYTES_DECOMPRESSED;
+      case 0x1C:
+        return ROCKSDB_NAMESPACE::Histograms::COMPRESSION_TIMES_NANOS;
+      case 0x1D:
+        return ROCKSDB_NAMESPACE::Histograms::DECOMPRESSION_TIMES_NANOS;
+      case 0x1E:
+        return ROCKSDB_NAMESPACE::Histograms::READ_NUM_MERGE_OPERANDS;
+      // 0x20 to skip 0x1F so TICKER_ENUM_MAX remains unchanged for minor
+      // version compatibility.
+      case 0x20:
+        return ROCKSDB_NAMESPACE::Histograms::FLUSH_TIME;
+      case 0x21:
+        return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_KEY_SIZE;
+      case 0x22:
+        return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_VALUE_SIZE;
+      case 0x23:
+        return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_WRITE_MICROS;
+      case 0x24:
+        return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_GET_MICROS;
+      case 0x25:
+        return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_MULTIGET_MICROS;
+      case 0x26:
+        return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_SEEK_MICROS;
+      case 0x27:
+        return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_NEXT_MICROS;
+      case 0x28:
+        return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_PREV_MICROS;
+      case 0x29:
+        return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_WRITE_MICROS;
+      case 0x2A:
+        return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_READ_MICROS;
+      case 0x2B:
+        return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_SYNC_MICROS;
+      case 0x2D:
+        return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_COMPRESSION_MICROS;
+      case 0x2E:
+        return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_DECOMPRESSION_MICROS;
+      case 0x2F:
+        return ROCKSDB_NAMESPACE::Histograms::
+            NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL;
+      case 0x31:
+        return ROCKSDB_NAMESPACE::Histograms::NUM_SST_READ_PER_LEVEL;
+      case 0x32:
+        return ROCKSDB_NAMESPACE::Histograms::
+            ERROR_HANDLER_AUTORESUME_RETRY_COUNT;
+      case 0x33:
+        return ROCKSDB_NAMESPACE::Histograms::ASYNC_READ_BYTES;
+      case 0x34:
+        return ROCKSDB_NAMESPACE::Histograms::POLL_WAIT_MICROS;
+      case 0x35:
+        return ROCKSDB_NAMESPACE::Histograms::PREFETCHED_BYTES_DISCARDED;
+      case 0x36:
+        return ROCKSDB_NAMESPACE::Histograms::MULTIGET_IO_BATCH_SIZE;
+      case 0x37:
+        return ROCKSDB_NAMESPACE::Histograms::NUM_LEVEL_READ_PER_MULTIGET;
+      case 0x38:
+        return ROCKSDB_NAMESPACE::Histograms::ASYNC_PREFETCH_ABORT_MICROS;
+      case 0x39:
+        return ROCKSDB_NAMESPACE::Histograms::
+            TABLE_OPEN_PREFETCH_TAIL_READ_BYTES;
+      case 0x3A:
+        return ROCKSDB_NAMESPACE::Histograms::FILE_READ_FLUSH_MICROS;
+      case 0x3B:
+        return ROCKSDB_NAMESPACE::Histograms::FILE_READ_COMPACTION_MICROS;
+      case 0x3C:
+        return ROCKSDB_NAMESPACE::Histograms::FILE_READ_DB_OPEN_MICROS;
+      case 0x1F:
+        // 0x1F for backwards compatibility on current minor version.
+        return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX;
+
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::Histograms::DB_GET;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.StatsLevel
+class StatsLevelJni {
+ public:
+  // Returns the equivalent org.rocksdb.StatsLevel for the provided
+  // C++ ROCKSDB_NAMESPACE::StatsLevel enum
+  static jbyte toJavaStatsLevel(
+      const ROCKSDB_NAMESPACE::StatsLevel& stats_level) {
+    switch (stats_level) {
+      case ROCKSDB_NAMESPACE::StatsLevel::kExceptDetailedTimers:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::StatsLevel::kExceptTimeForMutex:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::StatsLevel::kAll:
+        return 0x2;
+
+      default:
+        // undefined/default
+        return 0x0;
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::StatsLevel enum for the
+  // provided Java org.rocksdb.StatsLevel
+  static ROCKSDB_NAMESPACE::StatsLevel toCppStatsLevel(jbyte jstats_level) {
+    switch (jstats_level) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::StatsLevel::kExceptDetailedTimers;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::StatsLevel::kExceptTimeForMutex;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::StatsLevel::kAll;
+
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::StatsLevel::kExceptDetailedTimers;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.RateLimiterMode
+class RateLimiterModeJni {
+ public:
+  // Returns the equivalent org.rocksdb.RateLimiterMode for the provided
+  // C++ ROCKSDB_NAMESPACE::RateLimiter::Mode enum
+  static jbyte toJavaRateLimiterMode(
+      const ROCKSDB_NAMESPACE::RateLimiter::Mode& rate_limiter_mode) {
+    switch (rate_limiter_mode) {
+      case ROCKSDB_NAMESPACE::RateLimiter::Mode::kReadsOnly:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::RateLimiter::Mode::kWritesOnly:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::RateLimiter::Mode::kAllIo:
+        return 0x2;
+
+      default:
+        // undefined/default
+        return 0x1;
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::RateLimiter::Mode enum for
+  // the provided Java org.rocksdb.RateLimiterMode
+  static ROCKSDB_NAMESPACE::RateLimiter::Mode toCppRateLimiterMode(
+      jbyte jrate_limiter_mode) {
+    switch (jrate_limiter_mode) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::RateLimiter::Mode::kReadsOnly;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::RateLimiter::Mode::kWritesOnly;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::RateLimiter::Mode::kAllIo;
+
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::RateLimiter::Mode::kWritesOnly;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.MemoryUsageType
+class MemoryUsageTypeJni {
+ public:
+  // Returns the equivalent org.rocksdb.MemoryUsageType for the provided
+  // C++ ROCKSDB_NAMESPACE::MemoryUtil::UsageType enum
+  static jbyte toJavaMemoryUsageType(
+      const ROCKSDB_NAMESPACE::MemoryUtil::UsageType& usage_type) {
+    switch (usage_type) {
+      case ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kMemTableTotal:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kMemTableUnFlushed:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kTableReadersTotal:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kCacheTotal:
+        return 0x3;
+      default:
+        // undefined: use kNumUsageTypes
+        return 0x4;
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::MemoryUtil::UsageType enum
+  // for the provided Java org.rocksdb.MemoryUsageType
+  static ROCKSDB_NAMESPACE::MemoryUtil::UsageType toCppMemoryUsageType(
+      jbyte usage_type) {
+    switch (usage_type) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kMemTableTotal;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kMemTableUnFlushed;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kTableReadersTotal;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kCacheTotal;
+      default:
+        // undefined/default: use kNumUsageTypes
+        return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kNumUsageTypes;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.Transaction
+class TransactionJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.Transaction
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/Transaction");
+  }
+
+  /**
+   * Create a new Java org.rocksdb.Transaction.WaitingTransactions object
+   *
+   * @param env A pointer to the Java environment
+   * @param jtransaction A Java org.rocksdb.Transaction object
+   * @param column_family_id The id of the column family
+   * @param key The key
+   * @param transaction_ids The transaction ids
+   *
+   * @return A reference to a Java
+   *     org.rocksdb.Transaction.WaitingTransactions object,
+   *     or nullptr if an an exception occurs
+   */
+  static jobject newWaitingTransactions(
+      JNIEnv* env, jobject jtransaction, const uint32_t column_family_id,
+      const std::string& key,
+      const std::vector<TransactionID>& transaction_ids) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = env->GetMethodID(
+        jclazz, "newWaitingTransactions",
+        "(JLjava/lang/String;[J)Lorg/rocksdb/Transaction$WaitingTransactions;");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    jstring jkey = env->NewStringUTF(key.c_str());
+    if (jkey == nullptr) {
+      // exception thrown: OutOfMemoryError
+      return nullptr;
+    }
+
+    const size_t len = transaction_ids.size();
+    jlongArray jtransaction_ids = env->NewLongArray(static_cast<jsize>(len));
+    if (jtransaction_ids == nullptr) {
+      // exception thrown: OutOfMemoryError
+      env->DeleteLocalRef(jkey);
+      return nullptr;
+    }
+
+    jboolean is_copy;
+    jlong* body = env->GetLongArrayElements(jtransaction_ids, &is_copy);
+    if (body == nullptr) {
+      // exception thrown: OutOfMemoryError
+      env->DeleteLocalRef(jkey);
+      env->DeleteLocalRef(jtransaction_ids);
+      return nullptr;
+    }
+    for (size_t i = 0; i < len; ++i) {
+      body[i] = static_cast<jlong>(transaction_ids[i]);
+    }
+    env->ReleaseLongArrayElements(jtransaction_ids, body,
+                                  is_copy == JNI_TRUE ? 0 : JNI_ABORT);
+
+    jobject jwaiting_transactions = env->CallObjectMethod(
+        jtransaction, mid, static_cast<jlong>(column_family_id), jkey,
+        jtransaction_ids);
+    if (env->ExceptionCheck()) {
+      // exception thrown: InstantiationException or OutOfMemoryError
+      env->DeleteLocalRef(jkey);
+      env->DeleteLocalRef(jtransaction_ids);
+      return nullptr;
+    }
+
+    return jwaiting_transactions;
+  }
+};
+
+// The portal class for org.rocksdb.TransactionDB
+class TransactionDBJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.TransactionDB
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/TransactionDB");
+  }
+
+  /**
+   * Create a new Java org.rocksdb.TransactionDB.DeadlockInfo object
+   *
+   * @param env A pointer to the Java environment
+   * @param jtransaction A Java org.rocksdb.Transaction object
+   * @param column_family_id The id of the column family
+   * @param key The key
+   * @param transaction_ids The transaction ids
+   *
+   * @return A reference to a Java
+   *     org.rocksdb.Transaction.WaitingTransactions object,
+   *     or nullptr if an an exception occurs
+   */
+  static jobject newDeadlockInfo(
+      JNIEnv* env, jobject jtransaction_db,
+      const ROCKSDB_NAMESPACE::TransactionID transaction_id,
+      const uint32_t column_family_id, const std::string& waiting_key,
+      const bool exclusive) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = env->GetMethodID(
+        jclazz, "newDeadlockInfo",
+        "(JJLjava/lang/String;Z)Lorg/rocksdb/TransactionDB$DeadlockInfo;");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    jstring jwaiting_key = env->NewStringUTF(waiting_key.c_str());
+    if (jwaiting_key == nullptr) {
+      // exception thrown: OutOfMemoryError
+      return nullptr;
+    }
+
+    // resolve the column family id to a ColumnFamilyHandle
+    jobject jdeadlock_info = env->CallObjectMethod(
+        jtransaction_db, mid, transaction_id,
+        static_cast<jlong>(column_family_id), jwaiting_key, exclusive);
+    if (env->ExceptionCheck()) {
+      // exception thrown: InstantiationException or OutOfMemoryError
+      env->DeleteLocalRef(jwaiting_key);
+      return nullptr;
+    }
+
+    return jdeadlock_info;
+  }
+};
+
+// The portal class for org.rocksdb.TxnDBWritePolicy
+class TxnDBWritePolicyJni {
+ public:
+  // Returns the equivalent org.rocksdb.TxnDBWritePolicy for the provided
+  // C++ ROCKSDB_NAMESPACE::TxnDBWritePolicy enum
+  static jbyte toJavaTxnDBWritePolicy(
+      const ROCKSDB_NAMESPACE::TxnDBWritePolicy& txndb_write_policy) {
+    switch (txndb_write_policy) {
+      case ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_COMMITTED:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_PREPARED:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_UNPREPARED:
+        return 0x2;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::TxnDBWritePolicy enum for the
+  // provided Java org.rocksdb.TxnDBWritePolicy
+  static ROCKSDB_NAMESPACE::TxnDBWritePolicy toCppTxnDBWritePolicy(
+      jbyte jtxndb_write_policy) {
+    switch (jtxndb_write_policy) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_COMMITTED;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_PREPARED;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_UNPREPARED;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_COMMITTED;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.TransactionDB.KeyLockInfo
+class KeyLockInfoJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.TransactionDB.KeyLockInfo
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/TransactionDB$KeyLockInfo");
+  }
+
+  /**
+   * Create a new Java org.rocksdb.TransactionDB.KeyLockInfo object
+   * with the same properties as the provided C++ ROCKSDB_NAMESPACE::KeyLockInfo
+   * object
+   *
+   * @param env A pointer to the Java environment
+   * @param key_lock_info The ROCKSDB_NAMESPACE::KeyLockInfo object
+   *
+   * @return A reference to a Java
+   *     org.rocksdb.TransactionDB.KeyLockInfo object,
+   *     or nullptr if an an exception occurs
+   */
+  static jobject construct(
+      JNIEnv* env, const ROCKSDB_NAMESPACE::KeyLockInfo& key_lock_info) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid =
+        env->GetMethodID(jclazz, "<init>", "(Ljava/lang/String;[JZ)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    jstring jkey = env->NewStringUTF(key_lock_info.key.c_str());
+    if (jkey == nullptr) {
+      // exception thrown: OutOfMemoryError
+      return nullptr;
+    }
+
+    const jsize jtransaction_ids_len =
+        static_cast<jsize>(key_lock_info.ids.size());
+    jlongArray jtransactions_ids = env->NewLongArray(jtransaction_ids_len);
+    if (jtransactions_ids == nullptr) {
+      // exception thrown: OutOfMemoryError
+      env->DeleteLocalRef(jkey);
+      return nullptr;
+    }
+
+    const jobject jkey_lock_info = env->NewObject(
+        jclazz, mid, jkey, jtransactions_ids, key_lock_info.exclusive);
+    if (jkey_lock_info == nullptr) {
+      // exception thrown: InstantiationException or OutOfMemoryError
+      env->DeleteLocalRef(jtransactions_ids);
+      env->DeleteLocalRef(jkey);
+      return nullptr;
+    }
+
+    return jkey_lock_info;
+  }
+};
+
+// The portal class for org.rocksdb.TransactionDB.DeadlockInfo
+class DeadlockInfoJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.TransactionDB.DeadlockInfo
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/TransactionDB$DeadlockInfo");
+  }
+};
+
+// The portal class for org.rocksdb.TransactionDB.DeadlockPath
+class DeadlockPathJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.TransactionDB.DeadlockPath
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/TransactionDB$DeadlockPath");
+  }
+
+  /**
+   * Create a new Java org.rocksdb.TransactionDB.DeadlockPath object
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return A reference to a Java
+   *     org.rocksdb.TransactionDB.DeadlockPath object,
+   *     or nullptr if an an exception occurs
+   */
+  static jobject construct(JNIEnv* env, const jobjectArray jdeadlock_infos,
+                           const bool limit_exceeded) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = env->GetMethodID(jclazz, "<init>", "([LDeadlockInfo;Z)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    const jobject jdeadlock_path =
+        env->NewObject(jclazz, mid, jdeadlock_infos, limit_exceeded);
+    if (jdeadlock_path == nullptr) {
+      // exception thrown: InstantiationException or OutOfMemoryError
+      return nullptr;
+    }
+
+    return jdeadlock_path;
+  }
+};
+
+class AbstractTableFilterJni
+    : public RocksDBNativeClass<
+          const ROCKSDB_NAMESPACE::TableFilterJniCallback*,
+          AbstractTableFilterJni> {
+ public:
+  /**
+   * Get the Java Method: TableFilter#filter(TableProperties)
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getFilterMethod(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "filter", "(Lorg/rocksdb/TableProperties;)Z");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+ private:
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/TableFilter");
+  }
+};
+
+class TablePropertiesJni : public JavaClass {
+ public:
+  /**
+   * Create a new Java org.rocksdb.TableProperties object.
+   *
+   * @param env A pointer to the Java environment
+   * @param table_properties A Cpp table properties object
+   *
+   * @return A reference to a Java org.rocksdb.TableProperties object, or
+   * nullptr if an an exception occurs
+   */
+  static jobject fromCppTableProperties(
+      JNIEnv* env, const ROCKSDB_NAMESPACE::TableProperties& table_properties) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = env->GetMethodID(
+        jclazz, "<init>",
+        "(JJJJJJJJJJJJJJJJJJJJJJ[BLjava/lang/String;Ljava/lang/String;Ljava/"
+        "lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/"
+        "String;Ljava/util/Map;Ljava/util/Map;)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    jbyteArray jcolumn_family_name = ROCKSDB_NAMESPACE::JniUtil::copyBytes(
+        env, table_properties.column_family_name);
+    if (jcolumn_family_name == nullptr) {
+      // exception occurred creating java string
+      return nullptr;
+    }
+
+    jstring jfilter_policy_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString(
+        env, &table_properties.filter_policy_name, true);
+    if (env->ExceptionCheck()) {
+      // exception occurred creating java string
+      env->DeleteLocalRef(jcolumn_family_name);
+      return nullptr;
+    }
+
+    jstring jcomparator_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString(
+        env, &table_properties.comparator_name, true);
+    if (env->ExceptionCheck()) {
+      // exception occurred creating java string
+      env->DeleteLocalRef(jcolumn_family_name);
+      env->DeleteLocalRef(jfilter_policy_name);
+      return nullptr;
+    }
+
+    jstring jmerge_operator_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString(
+        env, &table_properties.merge_operator_name, true);
+    if (env->ExceptionCheck()) {
+      // exception occurred creating java string
+      env->DeleteLocalRef(jcolumn_family_name);
+      env->DeleteLocalRef(jfilter_policy_name);
+      env->DeleteLocalRef(jcomparator_name);
+      return nullptr;
+    }
+
+    jstring jprefix_extractor_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString(
+        env, &table_properties.prefix_extractor_name, true);
+    if (env->ExceptionCheck()) {
+      // exception occurred creating java string
+      env->DeleteLocalRef(jcolumn_family_name);
+      env->DeleteLocalRef(jfilter_policy_name);
+      env->DeleteLocalRef(jcomparator_name);
+      env->DeleteLocalRef(jmerge_operator_name);
+      return nullptr;
+    }
+
+    jstring jproperty_collectors_names =
+        ROCKSDB_NAMESPACE::JniUtil::toJavaString(
+            env, &table_properties.property_collectors_names, true);
+    if (env->ExceptionCheck()) {
+      // exception occurred creating java string
+      env->DeleteLocalRef(jcolumn_family_name);
+      env->DeleteLocalRef(jfilter_policy_name);
+      env->DeleteLocalRef(jcomparator_name);
+      env->DeleteLocalRef(jmerge_operator_name);
+      env->DeleteLocalRef(jprefix_extractor_name);
+      return nullptr;
+    }
+
+    jstring jcompression_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString(
+        env, &table_properties.compression_name, true);
+    if (env->ExceptionCheck()) {
+      // exception occurred creating java string
+      env->DeleteLocalRef(jcolumn_family_name);
+      env->DeleteLocalRef(jfilter_policy_name);
+      env->DeleteLocalRef(jcomparator_name);
+      env->DeleteLocalRef(jmerge_operator_name);
+      env->DeleteLocalRef(jprefix_extractor_name);
+      env->DeleteLocalRef(jproperty_collectors_names);
+      return nullptr;
+    }
+
+    // Map<String, String>
+    jobject juser_collected_properties =
+        ROCKSDB_NAMESPACE::HashMapJni::fromCppMap(
+            env, &table_properties.user_collected_properties);
+    if (env->ExceptionCheck()) {
+      // exception occurred creating java map
+      env->DeleteLocalRef(jcolumn_family_name);
+      env->DeleteLocalRef(jfilter_policy_name);
+      env->DeleteLocalRef(jcomparator_name);
+      env->DeleteLocalRef(jmerge_operator_name);
+      env->DeleteLocalRef(jprefix_extractor_name);
+      env->DeleteLocalRef(jproperty_collectors_names);
+      env->DeleteLocalRef(jcompression_name);
+      return nullptr;
+    }
+
+    // Map<String, String>
+    jobject jreadable_properties = ROCKSDB_NAMESPACE::HashMapJni::fromCppMap(
+        env, &table_properties.readable_properties);
+    if (env->ExceptionCheck()) {
+      // exception occurred creating java map
+      env->DeleteLocalRef(jcolumn_family_name);
+      env->DeleteLocalRef(jfilter_policy_name);
+      env->DeleteLocalRef(jcomparator_name);
+      env->DeleteLocalRef(jmerge_operator_name);
+      env->DeleteLocalRef(jprefix_extractor_name);
+      env->DeleteLocalRef(jproperty_collectors_names);
+      env->DeleteLocalRef(jcompression_name);
+      env->DeleteLocalRef(juser_collected_properties);
+      return nullptr;
+    }
+
+    jobject jtable_properties = env->NewObject(
+        jclazz, mid, static_cast<jlong>(table_properties.data_size),
+        static_cast<jlong>(table_properties.index_size),
+        static_cast<jlong>(table_properties.index_partitions),
+        static_cast<jlong>(table_properties.top_level_index_size),
+        static_cast<jlong>(table_properties.index_key_is_user_key),
+        static_cast<jlong>(table_properties.index_value_is_delta_encoded),
+        static_cast<jlong>(table_properties.filter_size),
+        static_cast<jlong>(table_properties.raw_key_size),
+        static_cast<jlong>(table_properties.raw_value_size),
+        static_cast<jlong>(table_properties.num_data_blocks),
+        static_cast<jlong>(table_properties.num_entries),
+        static_cast<jlong>(table_properties.num_deletions),
+        static_cast<jlong>(table_properties.num_merge_operands),
+        static_cast<jlong>(table_properties.num_range_deletions),
+        static_cast<jlong>(table_properties.format_version),
+        static_cast<jlong>(table_properties.fixed_key_len),
+        static_cast<jlong>(table_properties.column_family_id),
+        static_cast<jlong>(table_properties.creation_time),
+        static_cast<jlong>(table_properties.oldest_key_time),
+        static_cast<jlong>(
+            table_properties.slow_compression_estimated_data_size),
+        static_cast<jlong>(
+            table_properties.fast_compression_estimated_data_size),
+        static_cast<jlong>(
+            table_properties.external_sst_file_global_seqno_offset),
+        jcolumn_family_name, jfilter_policy_name, jcomparator_name,
+        jmerge_operator_name, jprefix_extractor_name,
+        jproperty_collectors_names, jcompression_name,
+        juser_collected_properties, jreadable_properties);
+
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+
+    return jtable_properties;
+  }
+
+ private:
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/TableProperties");
+  }
+};
+
+class ColumnFamilyDescriptorJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.ColumnFamilyDescriptor
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/ColumnFamilyDescriptor");
+  }
+
+  /**
+   * Create a new Java org.rocksdb.ColumnFamilyDescriptor object with the same
+   * properties as the provided C++ ROCKSDB_NAMESPACE::ColumnFamilyDescriptor
+   * object
+   *
+   * @param env A pointer to the Java environment
+   * @param cfd A pointer to ROCKSDB_NAMESPACE::ColumnFamilyDescriptor object
+   *
+   * @return A reference to a Java org.rocksdb.ColumnFamilyDescriptor object, or
+   * nullptr if an an exception occurs
+   */
+  static jobject construct(JNIEnv* env, ColumnFamilyDescriptor* cfd) {
+    jbyteArray jcf_name = JniUtil::copyBytes(env, cfd->name);
+    jobject cfopts = ColumnFamilyOptionsJni::construct(env, &(cfd->options));
+
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = env->GetMethodID(jclazz, "<init>",
+                                     "([BLorg/rocksdb/ColumnFamilyOptions;)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      env->DeleteLocalRef(jcf_name);
+      return nullptr;
+    }
+
+    jobject jcfd = env->NewObject(jclazz, mid, jcf_name, cfopts);
+    if (env->ExceptionCheck()) {
+      env->DeleteLocalRef(jcf_name);
+      return nullptr;
+    }
+
+    return jcfd;
+  }
+
+  /**
+   * Get the Java Method: ColumnFamilyDescriptor#columnFamilyName
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getColumnFamilyNameMethod(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "columnFamilyName", "()[B");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: ColumnFamilyDescriptor#columnFamilyOptions
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getColumnFamilyOptionsMethod(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(
+        jclazz, "columnFamilyOptions", "()Lorg/rocksdb/ColumnFamilyOptions;");
+    assert(mid != nullptr);
+    return mid;
+  }
+};
+
+// The portal class for org.rocksdb.IndexType
+class IndexTypeJni {
+ public:
+  // Returns the equivalent org.rocksdb.IndexType for the provided
+  // C++ ROCKSDB_NAMESPACE::IndexType enum
+  static jbyte toJavaIndexType(
+      const ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType& index_type) {
+    switch (index_type) {
+      case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType::kBinarySearch:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType::kHashSearch:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType::
+          kTwoLevelIndexSearch:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType::
+          kBinarySearchWithFirstKey:
+        return 0x3;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::IndexType enum for the
+  // provided Java org.rocksdb.IndexType
+  static ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType toCppIndexType(
+      jbyte jindex_type) {
+    switch (jindex_type) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType::
+            kBinarySearch;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType::
+            kHashSearch;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType::
+            kTwoLevelIndexSearch;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType::
+            kBinarySearchWithFirstKey;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType::
+            kBinarySearch;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.DataBlockIndexType
+class DataBlockIndexTypeJni {
+ public:
+  // Returns the equivalent org.rocksdb.DataBlockIndexType for the provided
+  // C++ ROCKSDB_NAMESPACE::DataBlockIndexType enum
+  static jbyte toJavaDataBlockIndexType(
+      const ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType&
+          index_type) {
+    switch (index_type) {
+      case ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType::
+          kDataBlockBinarySearch:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType::
+          kDataBlockBinaryAndHash:
+        return 0x1;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::DataBlockIndexType enum for
+  // the provided Java org.rocksdb.DataBlockIndexType
+  static ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType
+  toCppDataBlockIndexType(jbyte jindex_type) {
+    switch (jindex_type) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType::
+            kDataBlockBinarySearch;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType::
+            kDataBlockBinaryAndHash;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType::
+            kDataBlockBinarySearch;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.ChecksumType
+class ChecksumTypeJni {
+ public:
+  // Returns the equivalent org.rocksdb.ChecksumType for the provided
+  // C++ ROCKSDB_NAMESPACE::ChecksumType enum
+  static jbyte toJavaChecksumType(
+      const ROCKSDB_NAMESPACE::ChecksumType& checksum_type) {
+    switch (checksum_type) {
+      case ROCKSDB_NAMESPACE::ChecksumType::kNoChecksum:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::ChecksumType::kCRC32c:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::ChecksumType::kxxHash:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::ChecksumType::kxxHash64:
+        return 0x3;
+      case ROCKSDB_NAMESPACE::ChecksumType::kXXH3:
+        return 0x4;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::ChecksumType enum for the
+  // provided Java org.rocksdb.ChecksumType
+  static ROCKSDB_NAMESPACE::ChecksumType toCppChecksumType(
+      jbyte jchecksum_type) {
+    switch (jchecksum_type) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::ChecksumType::kNoChecksum;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::ChecksumType::kCRC32c;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::ChecksumType::kxxHash;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::ChecksumType::kxxHash64;
+      case 0x4:
+        return ROCKSDB_NAMESPACE::ChecksumType::kXXH3;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::ChecksumType::kCRC32c;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.IndexShorteningMode
+class IndexShorteningModeJni {
+ public:
+  // Returns the equivalent org.rocksdb.IndexShorteningMode for the provided
+  // C++ ROCKSDB_NAMESPACE::IndexShorteningMode enum
+  static jbyte toJavaIndexShorteningMode(
+      const ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode&
+          index_shortening_mode) {
+    switch (index_shortening_mode) {
+      case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode::
+          kNoShortening:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode::
+          kShortenSeparators:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode::
+          kShortenSeparatorsAndSuccessor:
+        return 0x2;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::IndexShorteningMode enum for
+  // the provided Java org.rocksdb.IndexShorteningMode
+  static ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode
+  toCppIndexShorteningMode(jbyte jindex_shortening_mode) {
+    switch (jindex_shortening_mode) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode::
+            kNoShortening;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode::
+            kShortenSeparators;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode::
+            kShortenSeparatorsAndSuccessor;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode::
+            kShortenSeparators;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.Priority
+class PriorityJni {
+ public:
+  // Returns the equivalent org.rocksdb.Priority for the provided
+  // C++ ROCKSDB_NAMESPACE::Env::Priority enum
+  static jbyte toJavaPriority(
+      const ROCKSDB_NAMESPACE::Env::Priority& priority) {
+    switch (priority) {
+      case ROCKSDB_NAMESPACE::Env::Priority::BOTTOM:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::Env::Priority::LOW:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::Env::Priority::HIGH:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::Env::Priority::TOTAL:
+        return 0x3;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::env::Priority enum for the
+  // provided Java org.rocksdb.Priority
+  static ROCKSDB_NAMESPACE::Env::Priority toCppPriority(jbyte jpriority) {
+    switch (jpriority) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::Env::Priority::BOTTOM;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::Env::Priority::LOW;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::Env::Priority::HIGH;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::Env::Priority::TOTAL;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::Env::Priority::LOW;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.ThreadType
+class ThreadTypeJni {
+ public:
+  // Returns the equivalent org.rocksdb.ThreadType for the provided
+  // C++ ROCKSDB_NAMESPACE::ThreadStatus::ThreadType enum
+  static jbyte toJavaThreadType(
+      const ROCKSDB_NAMESPACE::ThreadStatus::ThreadType& thread_type) {
+    switch (thread_type) {
+      case ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::HIGH_PRIORITY:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::LOW_PRIORITY:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::USER:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::BOTTOM_PRIORITY:
+        return 0x3;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::ThreadStatus::ThreadType enum
+  // for the provided Java org.rocksdb.ThreadType
+  static ROCKSDB_NAMESPACE::ThreadStatus::ThreadType toCppThreadType(
+      jbyte jthread_type) {
+    switch (jthread_type) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::HIGH_PRIORITY;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::LOW_PRIORITY;
+      case 0x2:
+        return ThreadStatus::ThreadType::USER;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::BOTTOM_PRIORITY;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::LOW_PRIORITY;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.OperationType
+class OperationTypeJni {
+ public:
+  // Returns the equivalent org.rocksdb.OperationType for the provided
+  // C++ ROCKSDB_NAMESPACE::ThreadStatus::OperationType enum
+  static jbyte toJavaOperationType(
+      const ROCKSDB_NAMESPACE::ThreadStatus::OperationType& operation_type) {
+    switch (operation_type) {
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_UNKNOWN:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_COMPACTION:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_FLUSH:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_DBOPEN:
+        return 0x3;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::ThreadStatus::OperationType
+  // enum for the provided Java org.rocksdb.OperationType
+  static ROCKSDB_NAMESPACE::ThreadStatus::OperationType toCppOperationType(
+      jbyte joperation_type) {
+    switch (joperation_type) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_UNKNOWN;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_COMPACTION;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_FLUSH;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_DBOPEN;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_UNKNOWN;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.OperationStage
+class OperationStageJni {
+ public:
+  // Returns the equivalent org.rocksdb.OperationStage for the provided
+  // C++ ROCKSDB_NAMESPACE::ThreadStatus::OperationStage enum
+  static jbyte toJavaOperationStage(
+      const ROCKSDB_NAMESPACE::ThreadStatus::OperationStage& operation_stage) {
+    switch (operation_stage) {
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_UNKNOWN:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_FLUSH_RUN:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+          STAGE_FLUSH_WRITE_L0:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+          STAGE_COMPACTION_PREPARE:
+        return 0x3;
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+          STAGE_COMPACTION_RUN:
+        return 0x4;
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+          STAGE_COMPACTION_PROCESS_KV:
+        return 0x5;
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+          STAGE_COMPACTION_INSTALL:
+        return 0x6;
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+          STAGE_COMPACTION_SYNC_FILE:
+        return 0x7;
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+          STAGE_PICK_MEMTABLES_TO_FLUSH:
+        return 0x8;
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+          STAGE_MEMTABLE_ROLLBACK:
+        return 0x9;
+      case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+          STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS:
+        return 0xA;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::ThreadStatus::OperationStage
+  // enum for the provided Java org.rocksdb.OperationStage
+  static ROCKSDB_NAMESPACE::ThreadStatus::OperationStage toCppOperationStage(
+      jbyte joperation_stage) {
+    switch (joperation_stage) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_UNKNOWN;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_FLUSH_RUN;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+            STAGE_FLUSH_WRITE_L0;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+            STAGE_COMPACTION_PREPARE;
+      case 0x4:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+            STAGE_COMPACTION_RUN;
+      case 0x5:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+            STAGE_COMPACTION_PROCESS_KV;
+      case 0x6:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+            STAGE_COMPACTION_INSTALL;
+      case 0x7:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+            STAGE_COMPACTION_SYNC_FILE;
+      case 0x8:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+            STAGE_PICK_MEMTABLES_TO_FLUSH;
+      case 0x9:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+            STAGE_MEMTABLE_ROLLBACK;
+      case 0xA:
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::
+            STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_UNKNOWN;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.StateType
+class StateTypeJni {
+ public:
+  // Returns the equivalent org.rocksdb.StateType for the provided
+  // C++ ROCKSDB_NAMESPACE::ThreadStatus::StateType enum
+  static jbyte toJavaStateType(
+      const ROCKSDB_NAMESPACE::ThreadStatus::StateType& state_type) {
+    switch (state_type) {
+      case ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_UNKNOWN:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_MUTEX_WAIT:
+        return 0x1;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::ThreadStatus::StateType enum
+  // for the provided Java org.rocksdb.StateType
+  static ROCKSDB_NAMESPACE::ThreadStatus::StateType toCppStateType(
+      jbyte jstate_type) {
+    switch (jstate_type) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_UNKNOWN;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_MUTEX_WAIT;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_UNKNOWN;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.ThreadStatus
+class ThreadStatusJni : public JavaClass {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.ThreadStatus
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/ThreadStatus");
+  }
+
+  /**
+   * Create a new Java org.rocksdb.ThreadStatus object with the same
+   * properties as the provided C++ ROCKSDB_NAMESPACE::ThreadStatus object
+   *
+   * @param env A pointer to the Java environment
+   * @param thread_status A pointer to ROCKSDB_NAMESPACE::ThreadStatus object
+   *
+   * @return A reference to a Java org.rocksdb.ColumnFamilyOptions object, or
+   * nullptr if an an exception occurs
+   */
+  static jobject construct(
+      JNIEnv* env, const ROCKSDB_NAMESPACE::ThreadStatus* thread_status) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = env->GetMethodID(
+        jclazz, "<init>", "(JBLjava/lang/String;Ljava/lang/String;BJB[JB)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    jstring jdb_name =
+        JniUtil::toJavaString(env, &(thread_status->db_name), true);
+    if (env->ExceptionCheck()) {
+      // an error occurred
+      return nullptr;
+    }
+
+    jstring jcf_name =
+        JniUtil::toJavaString(env, &(thread_status->cf_name), true);
+    if (env->ExceptionCheck()) {
+      // an error occurred
+      env->DeleteLocalRef(jdb_name);
+      return nullptr;
+    }
+
+    // long[]
+    const jsize len = static_cast<jsize>(
+        ROCKSDB_NAMESPACE::ThreadStatus::kNumOperationProperties);
+    jlongArray joperation_properties = env->NewLongArray(len);
+    if (joperation_properties == nullptr) {
+      // an exception occurred
+      env->DeleteLocalRef(jdb_name);
+      env->DeleteLocalRef(jcf_name);
+      return nullptr;
+    }
+    jboolean is_copy;
+    jlong* body = env->GetLongArrayElements(joperation_properties, &is_copy);
+    if (body == nullptr) {
+      // exception thrown: OutOfMemoryError
+      env->DeleteLocalRef(jdb_name);
+      env->DeleteLocalRef(jcf_name);
+      env->DeleteLocalRef(joperation_properties);
+      return nullptr;
+    }
+    for (size_t i = 0; i < len; ++i) {
+      body[i] = static_cast<jlong>(thread_status->op_properties[i]);
+    }
+    env->ReleaseLongArrayElements(joperation_properties, body,
+                                  is_copy == JNI_TRUE ? 0 : JNI_ABORT);
+
+    jobject jcfd = env->NewObject(
+        jclazz, mid, static_cast<jlong>(thread_status->thread_id),
+        ThreadTypeJni::toJavaThreadType(thread_status->thread_type), jdb_name,
+        jcf_name,
+        OperationTypeJni::toJavaOperationType(thread_status->operation_type),
+        static_cast<jlong>(thread_status->op_elapsed_micros),
+        OperationStageJni::toJavaOperationStage(thread_status->operation_stage),
+        joperation_properties,
+        StateTypeJni::toJavaStateType(thread_status->state_type));
+    if (env->ExceptionCheck()) {
+      // exception occurred
+      env->DeleteLocalRef(jdb_name);
+      env->DeleteLocalRef(jcf_name);
+      env->DeleteLocalRef(joperation_properties);
+      return nullptr;
+    }
+
+    // cleanup
+    env->DeleteLocalRef(jdb_name);
+    env->DeleteLocalRef(jcf_name);
+    env->DeleteLocalRef(joperation_properties);
+
+    return jcfd;
+  }
+};
+
+// The portal class for org.rocksdb.CompactionStyle
+class CompactionStyleJni {
+ public:
+  // Returns the equivalent org.rocksdb.CompactionStyle for the provided
+  // C++ ROCKSDB_NAMESPACE::CompactionStyle enum
+  static jbyte toJavaCompactionStyle(
+      const ROCKSDB_NAMESPACE::CompactionStyle& compaction_style) {
+    switch (compaction_style) {
+      case ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleLevel:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleUniversal:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleFIFO:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleNone:
+        return 0x3;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompactionStyle enum for the
+  // provided Java org.rocksdb.CompactionStyle
+  static ROCKSDB_NAMESPACE::CompactionStyle toCppCompactionStyle(
+      jbyte jcompaction_style) {
+    switch (jcompaction_style) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleLevel;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleUniversal;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleFIFO;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleNone;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleLevel;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.CompactionReason
+class CompactionReasonJni {
+ public:
+  // Returns the equivalent org.rocksdb.CompactionReason for the provided
+  // C++ ROCKSDB_NAMESPACE::CompactionReason enum
+  static jbyte toJavaCompactionReason(
+      const ROCKSDB_NAMESPACE::CompactionReason& compaction_reason) {
+    switch (compaction_reason) {
+      case ROCKSDB_NAMESPACE::CompactionReason::kUnknown:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::CompactionReason::kLevelL0FilesNum:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::CompactionReason::kLevelMaxLevelSize:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::CompactionReason::kUniversalSizeAmplification:
+        return 0x3;
+      case ROCKSDB_NAMESPACE::CompactionReason::kUniversalSizeRatio:
+        return 0x4;
+      case ROCKSDB_NAMESPACE::CompactionReason::kUniversalSortedRunNum:
+        return 0x5;
+      case ROCKSDB_NAMESPACE::CompactionReason::kFIFOMaxSize:
+        return 0x6;
+      case ROCKSDB_NAMESPACE::CompactionReason::kFIFOReduceNumFiles:
+        return 0x7;
+      case ROCKSDB_NAMESPACE::CompactionReason::kFIFOTtl:
+        return 0x8;
+      case ROCKSDB_NAMESPACE::CompactionReason::kManualCompaction:
+        return 0x9;
+      case ROCKSDB_NAMESPACE::CompactionReason::kFilesMarkedForCompaction:
+        return 0x10;
+      case ROCKSDB_NAMESPACE::CompactionReason::kBottommostFiles:
+        return 0x0A;
+      case ROCKSDB_NAMESPACE::CompactionReason::kTtl:
+        return 0x0B;
+      case ROCKSDB_NAMESPACE::CompactionReason::kFlush:
+        return 0x0C;
+      case ROCKSDB_NAMESPACE::CompactionReason::kExternalSstIngestion:
+        return 0x0D;
+      case ROCKSDB_NAMESPACE::CompactionReason::kPeriodicCompaction:
+        return 0x0E;
+      case ROCKSDB_NAMESPACE::CompactionReason::kChangeTemperature:
+        return 0x0F;
+      case ROCKSDB_NAMESPACE::CompactionReason::kForcedBlobGC:
+        return 0x11;
+      case ROCKSDB_NAMESPACE::CompactionReason::kRoundRobinTtl:
+        return 0x12;
+      case ROCKSDB_NAMESPACE::CompactionReason::kRefitLevel:
+        return 0x13;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompactionReason enum for the
+  // provided Java org.rocksdb.CompactionReason
+  static ROCKSDB_NAMESPACE::CompactionReason toCppCompactionReason(
+      jbyte jcompaction_reason) {
+    switch (jcompaction_reason) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::CompactionReason::kUnknown;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::CompactionReason::kLevelL0FilesNum;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::CompactionReason::kLevelMaxLevelSize;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::CompactionReason::kUniversalSizeAmplification;
+      case 0x4:
+        return ROCKSDB_NAMESPACE::CompactionReason::kUniversalSizeRatio;
+      case 0x5:
+        return ROCKSDB_NAMESPACE::CompactionReason::kUniversalSortedRunNum;
+      case 0x6:
+        return ROCKSDB_NAMESPACE::CompactionReason::kFIFOMaxSize;
+      case 0x7:
+        return ROCKSDB_NAMESPACE::CompactionReason::kFIFOReduceNumFiles;
+      case 0x8:
+        return ROCKSDB_NAMESPACE::CompactionReason::kFIFOTtl;
+      case 0x9:
+        return ROCKSDB_NAMESPACE::CompactionReason::kManualCompaction;
+      case 0x10:
+        return ROCKSDB_NAMESPACE::CompactionReason::kFilesMarkedForCompaction;
+      case 0x0A:
+        return ROCKSDB_NAMESPACE::CompactionReason::kBottommostFiles;
+      case 0x0B:
+        return ROCKSDB_NAMESPACE::CompactionReason::kTtl;
+      case 0x0C:
+        return ROCKSDB_NAMESPACE::CompactionReason::kFlush;
+      case 0x0D:
+        return ROCKSDB_NAMESPACE::CompactionReason::kExternalSstIngestion;
+      case 0x0E:
+        return ROCKSDB_NAMESPACE::CompactionReason::kPeriodicCompaction;
+      case 0x0F:
+        return ROCKSDB_NAMESPACE::CompactionReason::kChangeTemperature;
+      case 0x11:
+        return ROCKSDB_NAMESPACE::CompactionReason::kForcedBlobGC;
+      case 0x12:
+        return ROCKSDB_NAMESPACE::CompactionReason::kRoundRobinTtl;
+      case 0x13:
+        return ROCKSDB_NAMESPACE::CompactionReason::kRefitLevel;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::CompactionReason::kUnknown;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.WalFileType
+class WalFileTypeJni {
+ public:
+  // Returns the equivalent org.rocksdb.WalFileType for the provided
+  // C++ ROCKSDB_NAMESPACE::WalFileType enum
+  static jbyte toJavaWalFileType(
+      const ROCKSDB_NAMESPACE::WalFileType& wal_file_type) {
+    switch (wal_file_type) {
+      case ROCKSDB_NAMESPACE::WalFileType::kArchivedLogFile:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::WalFileType::kAliveLogFile:
+        return 0x1;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::WalFileType enum for the
+  // provided Java org.rocksdb.WalFileType
+  static ROCKSDB_NAMESPACE::WalFileType toCppWalFileType(jbyte jwal_file_type) {
+    switch (jwal_file_type) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::WalFileType::kArchivedLogFile;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::WalFileType::kAliveLogFile;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::WalFileType::kAliveLogFile;
+    }
+  }
+};
+
+class LogFileJni : public JavaClass {
+ public:
+  /**
+   * Create a new Java org.rocksdb.LogFile object.
+   *
+   * @param env A pointer to the Java environment
+   * @param log_file A Cpp log file object
+   *
+   * @return A reference to a Java org.rocksdb.LogFile object, or
+   * nullptr if an an exception occurs
+   */
+  static jobject fromCppLogFile(JNIEnv* env,
+                                ROCKSDB_NAMESPACE::LogFile* log_file) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid =
+        env->GetMethodID(jclazz, "<init>", "(Ljava/lang/String;JBJJ)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    std::string path_name = log_file->PathName();
+    jstring jpath_name =
+        ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &path_name, true);
+    if (env->ExceptionCheck()) {
+      // exception occurred creating java string
+      return nullptr;
+    }
+
+    jobject jlog_file = env->NewObject(
+        jclazz, mid, jpath_name, static_cast<jlong>(log_file->LogNumber()),
+        ROCKSDB_NAMESPACE::WalFileTypeJni::toJavaWalFileType(log_file->Type()),
+        static_cast<jlong>(log_file->StartSequence()),
+        static_cast<jlong>(log_file->SizeFileBytes()));
+
+    if (env->ExceptionCheck()) {
+      env->DeleteLocalRef(jpath_name);
+      return nullptr;
+    }
+
+    // cleanup
+    env->DeleteLocalRef(jpath_name);
+
+    return jlog_file;
+  }
+
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/LogFile");
+  }
+};
+
+class LiveFileMetaDataJni : public JavaClass {
+ public:
+  /**
+   * Create a new Java org.rocksdb.LiveFileMetaData object.
+   *
+   * @param env A pointer to the Java environment
+   * @param live_file_meta_data A Cpp live file meta data object
+   *
+   * @return A reference to a Java org.rocksdb.LiveFileMetaData object, or
+   * nullptr if an an exception occurs
+   */
+  static jobject fromCppLiveFileMetaData(
+      JNIEnv* env, ROCKSDB_NAMESPACE::LiveFileMetaData* live_file_meta_data) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = env->GetMethodID(
+        jclazz, "<init>",
+        "([BILjava/lang/String;Ljava/lang/String;JJJ[B[BJZJJ)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    jbyteArray jcolumn_family_name = ROCKSDB_NAMESPACE::JniUtil::copyBytes(
+        env, live_file_meta_data->column_family_name);
+    if (jcolumn_family_name == nullptr) {
+      // exception occurred creating java byte array
+      return nullptr;
+    }
+
+    jstring jfile_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString(
+        env, &live_file_meta_data->name, true);
+    if (env->ExceptionCheck()) {
+      // exception occurred creating java string
+      env->DeleteLocalRef(jcolumn_family_name);
+      return nullptr;
+    }
+
+    jstring jpath = ROCKSDB_NAMESPACE::JniUtil::toJavaString(
+        env, &live_file_meta_data->db_path, true);
+    if (env->ExceptionCheck()) {
+      // exception occurred creating java string
+      env->DeleteLocalRef(jcolumn_family_name);
+      env->DeleteLocalRef(jfile_name);
+      return nullptr;
+    }
+
+    jbyteArray jsmallest_key = ROCKSDB_NAMESPACE::JniUtil::copyBytes(
+        env, live_file_meta_data->smallestkey);
+    if (jsmallest_key == nullptr) {
+      // exception occurred creating java byte array
+      env->DeleteLocalRef(jcolumn_family_name);
+      env->DeleteLocalRef(jfile_name);
+      env->DeleteLocalRef(jpath);
+      return nullptr;
+    }
+
+    jbyteArray jlargest_key = ROCKSDB_NAMESPACE::JniUtil::copyBytes(
+        env, live_file_meta_data->largestkey);
+    if (jlargest_key == nullptr) {
+      // exception occurred creating java byte array
+      env->DeleteLocalRef(jcolumn_family_name);
+      env->DeleteLocalRef(jfile_name);
+      env->DeleteLocalRef(jpath);
+      env->DeleteLocalRef(jsmallest_key);
+      return nullptr;
+    }
+
+    jobject jlive_file_meta_data = env->NewObject(
+        jclazz, mid, jcolumn_family_name,
+        static_cast<jint>(live_file_meta_data->level), jfile_name, jpath,
+        static_cast<jlong>(live_file_meta_data->size),
+        static_cast<jlong>(live_file_meta_data->smallest_seqno),
+        static_cast<jlong>(live_file_meta_data->largest_seqno), jsmallest_key,
+        jlargest_key,
+        static_cast<jlong>(live_file_meta_data->num_reads_sampled),
+        static_cast<jboolean>(live_file_meta_data->being_compacted),
+        static_cast<jlong>(live_file_meta_data->num_entries),
+        static_cast<jlong>(live_file_meta_data->num_deletions));
+
+    if (env->ExceptionCheck()) {
+      env->DeleteLocalRef(jcolumn_family_name);
+      env->DeleteLocalRef(jfile_name);
+      env->DeleteLocalRef(jpath);
+      env->DeleteLocalRef(jsmallest_key);
+      env->DeleteLocalRef(jlargest_key);
+      return nullptr;
+    }
+
+    // cleanup
+    env->DeleteLocalRef(jcolumn_family_name);
+    env->DeleteLocalRef(jfile_name);
+    env->DeleteLocalRef(jpath);
+    env->DeleteLocalRef(jsmallest_key);
+    env->DeleteLocalRef(jlargest_key);
+
+    return jlive_file_meta_data;
+  }
+
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/LiveFileMetaData");
+  }
+};
+
+class SstFileMetaDataJni : public JavaClass {
+ public:
+  /**
+   * Create a new Java org.rocksdb.SstFileMetaData object.
+   *
+   * @param env A pointer to the Java environment
+   * @param sst_file_meta_data A Cpp sst file meta data object
+   *
+   * @return A reference to a Java org.rocksdb.SstFileMetaData object, or
+   * nullptr if an an exception occurs
+   */
+  static jobject fromCppSstFileMetaData(
+      JNIEnv* env,
+      const ROCKSDB_NAMESPACE::SstFileMetaData* sst_file_meta_data) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = env->GetMethodID(
+        jclazz, "<init>", "(Ljava/lang/String;Ljava/lang/String;JJJ[B[BJZJJ)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    jstring jfile_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString(
+        env, &sst_file_meta_data->name, true);
+    if (jfile_name == nullptr) {
+      // exception occurred creating java byte array
+      return nullptr;
+    }
+
+    jstring jpath = ROCKSDB_NAMESPACE::JniUtil::toJavaString(
+        env, &sst_file_meta_data->db_path, true);
+    if (jpath == nullptr) {
+      // exception occurred creating java byte array
+      env->DeleteLocalRef(jfile_name);
+      return nullptr;
+    }
+
+    jbyteArray jsmallest_key = ROCKSDB_NAMESPACE::JniUtil::copyBytes(
+        env, sst_file_meta_data->smallestkey);
+    if (jsmallest_key == nullptr) {
+      // exception occurred creating java byte array
+      env->DeleteLocalRef(jfile_name);
+      env->DeleteLocalRef(jpath);
+      return nullptr;
+    }
+
+    jbyteArray jlargest_key = ROCKSDB_NAMESPACE::JniUtil::copyBytes(
+        env, sst_file_meta_data->largestkey);
+    if (jlargest_key == nullptr) {
+      // exception occurred creating java byte array
+      env->DeleteLocalRef(jfile_name);
+      env->DeleteLocalRef(jpath);
+      env->DeleteLocalRef(jsmallest_key);
+      return nullptr;
+    }
+
+    jobject jsst_file_meta_data = env->NewObject(
+        jclazz, mid, jfile_name, jpath,
+        static_cast<jlong>(sst_file_meta_data->size),
+        static_cast<jint>(sst_file_meta_data->smallest_seqno),
+        static_cast<jlong>(sst_file_meta_data->largest_seqno), jsmallest_key,
+        jlargest_key, static_cast<jlong>(sst_file_meta_data->num_reads_sampled),
+        static_cast<jboolean>(sst_file_meta_data->being_compacted),
+        static_cast<jlong>(sst_file_meta_data->num_entries),
+        static_cast<jlong>(sst_file_meta_data->num_deletions));
+
+    if (env->ExceptionCheck()) {
+      env->DeleteLocalRef(jfile_name);
+      env->DeleteLocalRef(jpath);
+      env->DeleteLocalRef(jsmallest_key);
+      env->DeleteLocalRef(jlargest_key);
+      return nullptr;
+    }
+
+    // cleanup
+    env->DeleteLocalRef(jfile_name);
+    env->DeleteLocalRef(jpath);
+    env->DeleteLocalRef(jsmallest_key);
+    env->DeleteLocalRef(jlargest_key);
+
+    return jsst_file_meta_data;
+  }
+
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/SstFileMetaData");
+  }
+};
+
+class LevelMetaDataJni : public JavaClass {
+ public:
+  /**
+   * Create a new Java org.rocksdb.LevelMetaData object.
+   *
+   * @param env A pointer to the Java environment
+   * @param level_meta_data A Cpp level meta data object
+   *
+   * @return A reference to a Java org.rocksdb.LevelMetaData object, or
+   * nullptr if an an exception occurs
+   */
+  static jobject fromCppLevelMetaData(
+      JNIEnv* env, const ROCKSDB_NAMESPACE::LevelMetaData* level_meta_data) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = env->GetMethodID(jclazz, "<init>",
+                                     "(IJ[Lorg/rocksdb/SstFileMetaData;)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    const jsize jlen = static_cast<jsize>(level_meta_data->files.size());
+    jobjectArray jfiles =
+        env->NewObjectArray(jlen, SstFileMetaDataJni::getJClass(env), nullptr);
+    if (jfiles == nullptr) {
+      // exception thrown: OutOfMemoryError
+      return nullptr;
+    }
+
+    jsize i = 0;
+    for (auto it = level_meta_data->files.begin();
+         it != level_meta_data->files.end(); ++it) {
+      jobject jfile = SstFileMetaDataJni::fromCppSstFileMetaData(env, &(*it));
+      if (jfile == nullptr) {
+        // exception occurred
+        env->DeleteLocalRef(jfiles);
+        return nullptr;
+      }
+      env->SetObjectArrayElement(jfiles, i++, jfile);
+    }
+
+    jobject jlevel_meta_data =
+        env->NewObject(jclazz, mid, static_cast<jint>(level_meta_data->level),
+                       static_cast<jlong>(level_meta_data->size), jfiles);
+
+    if (env->ExceptionCheck()) {
+      env->DeleteLocalRef(jfiles);
+      return nullptr;
+    }
+
+    // cleanup
+    env->DeleteLocalRef(jfiles);
+
+    return jlevel_meta_data;
+  }
+
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/LevelMetaData");
+  }
+};
+
+class ColumnFamilyMetaDataJni : public JavaClass {
+ public:
+  /**
+   * Create a new Java org.rocksdb.ColumnFamilyMetaData object.
+   *
+   * @param env A pointer to the Java environment
+   * @param column_famly_meta_data A Cpp live file meta data object
+   *
+   * @return A reference to a Java org.rocksdb.ColumnFamilyMetaData object, or
+   * nullptr if an an exception occurs
+   */
+  static jobject fromCppColumnFamilyMetaData(
+      JNIEnv* env,
+      const ROCKSDB_NAMESPACE::ColumnFamilyMetaData* column_famly_meta_data) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    jmethodID mid = env->GetMethodID(jclazz, "<init>",
+                                     "(JJ[B[Lorg/rocksdb/LevelMetaData;)V");
+    if (mid == nullptr) {
+      // exception thrown: NoSuchMethodException or OutOfMemoryError
+      return nullptr;
+    }
+
+    jbyteArray jname = ROCKSDB_NAMESPACE::JniUtil::copyBytes(
+        env, column_famly_meta_data->name);
+    if (jname == nullptr) {
+      // exception occurred creating java byte array
+      return nullptr;
+    }
+
+    const jsize jlen =
+        static_cast<jsize>(column_famly_meta_data->levels.size());
+    jobjectArray jlevels =
+        env->NewObjectArray(jlen, LevelMetaDataJni::getJClass(env), nullptr);
+    if (jlevels == nullptr) {
+      // exception thrown: OutOfMemoryError
+      env->DeleteLocalRef(jname);
+      return nullptr;
+    }
+
+    jsize i = 0;
+    for (auto it = column_famly_meta_data->levels.begin();
+         it != column_famly_meta_data->levels.end(); ++it) {
+      jobject jlevel = LevelMetaDataJni::fromCppLevelMetaData(env, &(*it));
+      if (jlevel == nullptr) {
+        // exception occurred
+        env->DeleteLocalRef(jname);
+        env->DeleteLocalRef(jlevels);
+        return nullptr;
+      }
+      env->SetObjectArrayElement(jlevels, i++, jlevel);
+    }
+
+    jobject jcolumn_family_meta_data = env->NewObject(
+        jclazz, mid, static_cast<jlong>(column_famly_meta_data->size),
+        static_cast<jlong>(column_famly_meta_data->file_count), jname, jlevels);
+
+    if (env->ExceptionCheck()) {
+      env->DeleteLocalRef(jname);
+      env->DeleteLocalRef(jlevels);
+      return nullptr;
+    }
+
+    // cleanup
+    env->DeleteLocalRef(jname);
+    env->DeleteLocalRef(jlevels);
+
+    return jcolumn_family_meta_data;
+  }
+
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/ColumnFamilyMetaData");
+  }
+};
+
+// The portal class for org.rocksdb.AbstractTraceWriter
+class AbstractTraceWriterJni
+    : public RocksDBNativeClass<
+          const ROCKSDB_NAMESPACE::TraceWriterJniCallback*,
+          AbstractTraceWriterJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.AbstractTraceWriter
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env,
+                                         "org/rocksdb/AbstractTraceWriter");
+  }
+
+  /**
+   * Get the Java Method: AbstractTraceWriter#write
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getWriteProxyMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "writeProxy", "(J)S");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractTraceWriter#closeWriter
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getCloseWriterProxyMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "closeWriterProxy", "()S");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractTraceWriter#getFileSize
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getGetFileSizeMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "getFileSize", "()J");
+    assert(mid != nullptr);
+    return mid;
+  }
+};
+
+// The portal class for org.rocksdb.AbstractWalFilter
+class AbstractWalFilterJni
+    : public RocksDBNativeClass<const ROCKSDB_NAMESPACE::WalFilterJniCallback*,
+                                AbstractWalFilterJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.AbstractWalFilter
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractWalFilter");
+  }
+
+  /**
+   * Get the Java Method: AbstractWalFilter#columnFamilyLogNumberMap
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getColumnFamilyLogNumberMapMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "columnFamilyLogNumberMap",
+                         "(Ljava/util/Map;Ljava/util/Map;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractTraceWriter#logRecordFoundProxy
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getLogRecordFoundProxyMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid = env->GetMethodID(jclazz, "logRecordFoundProxy",
+                                            "(JLjava/lang/String;JJ)S");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractTraceWriter#name
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID or nullptr if the class or method id could not
+   *     be retrieved
+   */
+  static jmethodID getNameMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "name", "()Ljava/lang/String;");
+    assert(mid != nullptr);
+    return mid;
+  }
+};
+
+// The portal class for org.rocksdb.WalProcessingOption
+class WalProcessingOptionJni {
+ public:
+  // Returns the equivalent org.rocksdb.WalProcessingOption for the provided
+  // C++ ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption enum
+  static jbyte toJavaWalProcessingOption(
+      const ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption&
+          wal_processing_option) {
+    switch (wal_processing_option) {
+      case ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::
+          kContinueProcessing:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::
+          kIgnoreCurrentRecord:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::kStopReplay:
+        return 0x2;
+      case ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::kCorruptedRecord:
+        return 0x3;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++
+  // ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption enum for the provided
+  // Java org.rocksdb.WalProcessingOption
+  static ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption
+  toCppWalProcessingOption(jbyte jwal_processing_option) {
+    switch (jwal_processing_option) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::
+            kContinueProcessing;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::
+            kIgnoreCurrentRecord;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::kStopReplay;
+      case 0x3:
+        return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::
+            kCorruptedRecord;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::
+            kCorruptedRecord;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.ReusedSynchronisationType
+class ReusedSynchronisationTypeJni {
+ public:
+  // Returns the equivalent org.rocksdb.ReusedSynchronisationType for the
+  // provided C++ ROCKSDB_NAMESPACE::ReusedSynchronisationType enum
+  static jbyte toJavaReusedSynchronisationType(
+      const ROCKSDB_NAMESPACE::ReusedSynchronisationType&
+          reused_synchronisation_type) {
+    switch (reused_synchronisation_type) {
+      case ROCKSDB_NAMESPACE::ReusedSynchronisationType::MUTEX:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::ReusedSynchronisationType::ADAPTIVE_MUTEX:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::ReusedSynchronisationType::THREAD_LOCAL:
+        return 0x2;
+      default:
+        return 0x7F;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::ReusedSynchronisationType
+  // enum for the provided Java org.rocksdb.ReusedSynchronisationType
+  static ROCKSDB_NAMESPACE::ReusedSynchronisationType
+  toCppReusedSynchronisationType(jbyte reused_synchronisation_type) {
+    switch (reused_synchronisation_type) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::ReusedSynchronisationType::MUTEX;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::ReusedSynchronisationType::ADAPTIVE_MUTEX;
+      case 0x2:
+        return ROCKSDB_NAMESPACE::ReusedSynchronisationType::THREAD_LOCAL;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::ReusedSynchronisationType::ADAPTIVE_MUTEX;
+    }
+  }
+};
+// The portal class for org.rocksdb.SanityLevel
+class SanityLevelJni {
+ public:
+  // Returns the equivalent org.rocksdb.SanityLevel for the provided
+  // C++ ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel enum
+  static jbyte toJavaSanityLevel(
+      const ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel& sanity_level) {
+    switch (sanity_level) {
+      case ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel::kSanityLevelNone:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel::
+          kSanityLevelLooselyCompatible:
+        return 0x1;
+      case ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel::
+          kSanityLevelExactMatch:
+        return -0x01;
+      default:
+        return -0x01;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel
+  // enum for the provided Java org.rocksdb.SanityLevel
+  static ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel toCppSanityLevel(
+      jbyte sanity_level) {
+    switch (sanity_level) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::ConfigOptions::kSanityLevelNone;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::ConfigOptions::kSanityLevelLooselyCompatible;
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::ConfigOptions::kSanityLevelExactMatch;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.PrepopulateBlobCache
+class PrepopulateBlobCacheJni {
+ public:
+  // Returns the equivalent org.rocksdb.PrepopulateBlobCache for the provided
+  // C++ ROCKSDB_NAMESPACE::PrepopulateBlobCache enum
+  static jbyte toJavaPrepopulateBlobCache(
+      ROCKSDB_NAMESPACE::PrepopulateBlobCache prepopulate_blob_cache) {
+    switch (prepopulate_blob_cache) {
+      case ROCKSDB_NAMESPACE::PrepopulateBlobCache::kDisable:
+        return 0x0;
+      case ROCKSDB_NAMESPACE::PrepopulateBlobCache::kFlushOnly:
+        return 0x1;
+      default:
+        return 0x7f;  // undefined
+    }
+  }
+
+  // Returns the equivalent C++ ROCKSDB_NAMESPACE::PrepopulateBlobCache enum for
+  // the provided Java org.rocksdb.PrepopulateBlobCache
+  static ROCKSDB_NAMESPACE::PrepopulateBlobCache toCppPrepopulateBlobCache(
+      jbyte jprepopulate_blob_cache) {
+    switch (jprepopulate_blob_cache) {
+      case 0x0:
+        return ROCKSDB_NAMESPACE::PrepopulateBlobCache::kDisable;
+      case 0x1:
+        return ROCKSDB_NAMESPACE::PrepopulateBlobCache::kFlushOnly;
+      case 0x7F:
+      default:
+        // undefined/default
+        return ROCKSDB_NAMESPACE::PrepopulateBlobCache::kDisable;
+    }
+  }
+};
+
+// The portal class for org.rocksdb.AbstractListener.EnabledEventCallback
+class EnabledEventCallbackJni {
+ public:
+  // Returns the set of equivalent C++
+  // ROCKSDB_NAMESPACE::EnabledEventCallbackJni::EnabledEventCallback enums for
+  // the provided Java jenabled_event_callback_values
+  static std::set<EnabledEventCallback> toCppEnabledEventCallbacks(
+      jlong jenabled_event_callback_values) {
+    std::set<EnabledEventCallback> enabled_event_callbacks;
+    for (size_t i = 0; i < EnabledEventCallback::NUM_ENABLED_EVENT_CALLBACK;
+         ++i) {
+      if (((1ULL << i) & jenabled_event_callback_values) > 0) {
+        enabled_event_callbacks.emplace(static_cast<EnabledEventCallback>(i));
+      }
+    }
+    return enabled_event_callbacks;
+  }
+};
+
+// The portal class for org.rocksdb.AbstractEventListener
+class AbstractEventListenerJni
+    : public RocksDBNativeClass<
+          const ROCKSDB_NAMESPACE::EventListenerJniCallback*,
+          AbstractEventListenerJni> {
+ public:
+  /**
+   * Get the Java Class org.rocksdb.AbstractEventListener
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Class or nullptr if one of the
+   *     ClassFormatError, ClassCircularityError, NoClassDefFoundError,
+   *     OutOfMemoryError or ExceptionInInitializerError exceptions is thrown
+   */
+  static jclass getJClass(JNIEnv* env) {
+    return RocksDBNativeClass::getJClass(env,
+                                         "org/rocksdb/AbstractEventListener");
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onFlushCompletedProxy
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnFlushCompletedProxyMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(jclazz, "onFlushCompletedProxy",
+                                            "(JLorg/rocksdb/FlushJobInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onFlushBeginProxy
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnFlushBeginProxyMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(jclazz, "onFlushBeginProxy",
+                                            "(JLorg/rocksdb/FlushJobInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onTableFileDeleted
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnTableFileDeletedMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(
+        jclazz, "onTableFileDeleted", "(Lorg/rocksdb/TableFileDeletionInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onCompactionBeginProxy
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnCompactionBeginProxyMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "onCompactionBeginProxy",
+                         "(JLorg/rocksdb/CompactionJobInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onCompactionCompletedProxy
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnCompactionCompletedProxyMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "onCompactionCompletedProxy",
+                         "(JLorg/rocksdb/CompactionJobInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onTableFileCreated
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnTableFileCreatedMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(
+        jclazz, "onTableFileCreated", "(Lorg/rocksdb/TableFileCreationInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onTableFileCreationStarted
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnTableFileCreationStartedMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "onTableFileCreationStarted",
+                         "(Lorg/rocksdb/TableFileCreationBriefInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onMemTableSealed
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnMemTableSealedMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(jclazz, "onMemTableSealed",
+                                            "(Lorg/rocksdb/MemTableInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method:
+   * AbstractEventListener#onColumnFamilyHandleDeletionStarted
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnColumnFamilyHandleDeletionStartedMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "onColumnFamilyHandleDeletionStarted",
+                         "(Lorg/rocksdb/ColumnFamilyHandle;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onExternalFileIngestedProxy
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnExternalFileIngestedProxyMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "onExternalFileIngestedProxy",
+                         "(JLorg/rocksdb/ExternalFileIngestionInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onBackgroundError
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnBackgroundErrorProxyMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(jclazz, "onBackgroundErrorProxy",
+                                            "(BLorg/rocksdb/Status;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onStallConditionsChanged
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnStallConditionsChangedMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(jclazz, "onStallConditionsChanged",
+                                            "(Lorg/rocksdb/WriteStallInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onFileReadFinish
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnFileReadFinishMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(
+        jclazz, "onFileReadFinish", "(Lorg/rocksdb/FileOperationInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onFileWriteFinish
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnFileWriteFinishMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(
+        jclazz, "onFileWriteFinish", "(Lorg/rocksdb/FileOperationInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onFileFlushFinish
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnFileFlushFinishMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(
+        jclazz, "onFileFlushFinish", "(Lorg/rocksdb/FileOperationInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onFileSyncFinish
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnFileSyncFinishMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(
+        jclazz, "onFileSyncFinish", "(Lorg/rocksdb/FileOperationInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onFileRangeSyncFinish
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnFileRangeSyncFinishMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(
+        jclazz, "onFileRangeSyncFinish", "(Lorg/rocksdb/FileOperationInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onFileTruncateFinish
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnFileTruncateFinishMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(
+        jclazz, "onFileTruncateFinish", "(Lorg/rocksdb/FileOperationInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onFileCloseFinish
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnFileCloseFinishMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(
+        jclazz, "onFileCloseFinish", "(Lorg/rocksdb/FileOperationInfo;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#shouldBeNotifiedOnFileIO
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getShouldBeNotifiedOnFileIOMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid =
+        env->GetMethodID(jclazz, "shouldBeNotifiedOnFileIO", "()Z");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onErrorRecoveryBeginProxy
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnErrorRecoveryBeginProxyMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(jclazz, "onErrorRecoveryBeginProxy",
+                                            "(BLorg/rocksdb/Status;)Z");
+    assert(mid != nullptr);
+    return mid;
+  }
+
+  /**
+   * Get the Java Method: AbstractEventListener#onErrorRecoveryCompleted
+   *
+   * @param env A pointer to the Java environment
+   *
+   * @return The Java Method ID
+   */
+  static jmethodID getOnErrorRecoveryCompletedMethodId(JNIEnv* env) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID mid = env->GetMethodID(jclazz, "onErrorRecoveryCompleted",
+                                            "(Lorg/rocksdb/Status;)V");
+    assert(mid != nullptr);
+    return mid;
+  }
+};
+
+class FlushJobInfoJni : public JavaClass {
+ public:
+  /**
+   * Create a new Java org.rocksdb.FlushJobInfo object.
+   *
+   * @param env A pointer to the Java environment
+   * @param flush_job_info A Cpp flush job info object
+   *
+   * @return A reference to a Java org.rocksdb.FlushJobInfo object, or
+   * nullptr if an an exception occurs
+   */
+  static jobject fromCppFlushJobInfo(
+      JNIEnv* env, const ROCKSDB_NAMESPACE::FlushJobInfo* flush_job_info) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+    static jmethodID ctor = getConstructorMethodId(env, jclazz);
+    assert(ctor != nullptr);
+    jstring jcf_name = JniUtil::toJavaString(env, &flush_job_info->cf_name);
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+    jstring jfile_path = JniUtil::toJavaString(env, &flush_job_info->file_path);
+    if (env->ExceptionCheck()) {
+      env->DeleteLocalRef(jfile_path);
+      return nullptr;
+    }
+    jobject jtable_properties = TablePropertiesJni::fromCppTableProperties(
+        env, flush_job_info->table_properties);
+    if (jtable_properties == nullptr) {
+      env->DeleteLocalRef(jcf_name);
+      env->DeleteLocalRef(jfile_path);
+      return nullptr;
+    }
+    return env->NewObject(
+        jclazz, ctor, static_cast<jlong>(flush_job_info->cf_id), jcf_name,
+        jfile_path, static_cast<jlong>(flush_job_info->thread_id),
+        static_cast<jint>(flush_job_info->job_id),
+        static_cast<jboolean>(flush_job_info->triggered_writes_slowdown),
+        static_cast<jboolean>(flush_job_info->triggered_writes_stop),
+        static_cast<jlong>(flush_job_info->smallest_seqno),
+        static_cast<jlong>(flush_job_info->largest_seqno), jtable_properties,
+        static_cast<jbyte>(flush_job_info->flush_reason));
+  }
+
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/FlushJobInfo");
+  }
+
+  static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) {
+    return env->GetMethodID(clazz, "<init>",
+                            "(JLjava/lang/String;Ljava/lang/String;JIZZJJLorg/"
+                            "rocksdb/TableProperties;B)V");
+  }
+};
+
+class TableFileDeletionInfoJni : public JavaClass {
+ public:
+  /**
+   * Create a new Java org.rocksdb.TableFileDeletionInfo object.
+   *
+   * @param env A pointer to the Java environment
+   * @param file_del_info A Cpp table file deletion info object
+   *
+   * @return A reference to a Java org.rocksdb.TableFileDeletionInfo object, or
+   * nullptr if an an exception occurs
+   */
+  static jobject fromCppTableFileDeletionInfo(
+      JNIEnv* env,
+      const ROCKSDB_NAMESPACE::TableFileDeletionInfo* file_del_info) {
+    jclass jclazz = getJClass(env);
+    if (jclazz == nullptr) {
+      // exception occurred accessing class
+      return nullptr;
+    }
+    static jmethodID ctor = getConstructorMethodId(env, jclazz);
+    assert(ctor != nullptr);
+    jstring jdb_name = JniUtil::toJavaString(env, &file_del_info->db_name);
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+    jobject jstatus = StatusJni::construct(env, file_del_info->status);
+    if (jstatus == nullptr) {
+      env->DeleteLocalRef(jdb_name);
+      return nullptr;
+    }
+    return env->NewObject(jclazz, ctor, jdb_name,
+                          JniUtil::toJavaString(env, &file_del_info->file_path),
+                          static_cast<jint>(file_del_info->job_id), jstatus);
+  }
+
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/TableFileDeletionInfo");
+  }
+
+  static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) {
+    return env->GetMethodID(
+        clazz, "<init>",
+        "(Ljava/lang/String;Ljava/lang/String;ILorg/rocksdb/Status;)V");
+  }
+};
+
+class CompactionJobInfoJni : public JavaClass {
+ public:
+  static jobject fromCppCompactionJobInfo(
+      JNIEnv* env,
+      const ROCKSDB_NAMESPACE::CompactionJobInfo* compaction_job_info) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID ctor = getConstructorMethodId(env, jclazz);
+    assert(ctor != nullptr);
+    return env->NewObject(jclazz, ctor,
+                          GET_CPLUSPLUS_POINTER(compaction_job_info));
+  }
+
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/CompactionJobInfo");
+  }
+
+  static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) {
+    return env->GetMethodID(clazz, "<init>", "(J)V");
+  }
+};
+
+class TableFileCreationInfoJni : public JavaClass {
+ public:
+  static jobject fromCppTableFileCreationInfo(
+      JNIEnv* env, const ROCKSDB_NAMESPACE::TableFileCreationInfo* info) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID ctor = getConstructorMethodId(env, jclazz);
+    assert(ctor != nullptr);
+    jstring jdb_name = JniUtil::toJavaString(env, &info->db_name);
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+    jstring jcf_name = JniUtil::toJavaString(env, &info->cf_name);
+    if (env->ExceptionCheck()) {
+      env->DeleteLocalRef(jdb_name);
+      return nullptr;
+    }
+    jstring jfile_path = JniUtil::toJavaString(env, &info->file_path);
+    if (env->ExceptionCheck()) {
+      env->DeleteLocalRef(jdb_name);
+      env->DeleteLocalRef(jcf_name);
+      return nullptr;
+    }
+    jobject jtable_properties =
+        TablePropertiesJni::fromCppTableProperties(env, info->table_properties);
+    if (jtable_properties == nullptr) {
+      env->DeleteLocalRef(jdb_name);
+      env->DeleteLocalRef(jcf_name);
+      return nullptr;
+    }
+    jobject jstatus = StatusJni::construct(env, info->status);
+    if (jstatus == nullptr) {
+      env->DeleteLocalRef(jdb_name);
+      env->DeleteLocalRef(jcf_name);
+      env->DeleteLocalRef(jtable_properties);
+      return nullptr;
+    }
+    return env->NewObject(jclazz, ctor, static_cast<jlong>(info->file_size),
+                          jtable_properties, jstatus, jdb_name, jcf_name,
+                          jfile_path, static_cast<jint>(info->job_id),
+                          static_cast<jbyte>(info->reason));
+  }
+
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/TableFileCreationInfo");
+  }
+
+  static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) {
+    return env->GetMethodID(
+        clazz, "<init>",
+        "(JLorg/rocksdb/TableProperties;Lorg/rocksdb/Status;Ljava/lang/"
+        "String;Ljava/lang/String;Ljava/lang/String;IB)V");
+  }
+};
+
+class TableFileCreationBriefInfoJni : public JavaClass {
+ public:
+  static jobject fromCppTableFileCreationBriefInfo(
+      JNIEnv* env, const ROCKSDB_NAMESPACE::TableFileCreationBriefInfo* info) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID ctor = getConstructorMethodId(env, jclazz);
+    assert(ctor != nullptr);
+    jstring jdb_name = JniUtil::toJavaString(env, &info->db_name);
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+    jstring jcf_name = JniUtil::toJavaString(env, &info->cf_name);
+    if (env->ExceptionCheck()) {
+      env->DeleteLocalRef(jdb_name);
+      return nullptr;
+    }
+    jstring jfile_path = JniUtil::toJavaString(env, &info->file_path);
+    if (env->ExceptionCheck()) {
+      env->DeleteLocalRef(jdb_name);
+      env->DeleteLocalRef(jcf_name);
+      return nullptr;
+    }
+    return env->NewObject(jclazz, ctor, jdb_name, jcf_name, jfile_path,
+                          static_cast<jint>(info->job_id),
+                          static_cast<jbyte>(info->reason));
+  }
+
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/TableFileCreationBriefInfo");
+  }
+
+  static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) {
+    return env->GetMethodID(
+        clazz, "<init>",
+        "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;IB)V");
+  }
+};
+
+class MemTableInfoJni : public JavaClass {
+ public:
+  static jobject fromCppMemTableInfo(
+      JNIEnv* env, const ROCKSDB_NAMESPACE::MemTableInfo* info) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID ctor = getConstructorMethodId(env, jclazz);
+    assert(ctor != nullptr);
+    jstring jcf_name = JniUtil::toJavaString(env, &info->cf_name);
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+    return env->NewObject(jclazz, ctor, jcf_name,
+                          static_cast<jlong>(info->first_seqno),
+                          static_cast<jlong>(info->earliest_seqno),
+                          static_cast<jlong>(info->num_entries),
+                          static_cast<jlong>(info->num_deletes));
+  }
+
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/MemTableInfo");
+  }
+
+  static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) {
+    return env->GetMethodID(clazz, "<init>", "(Ljava/lang/String;JJJJ)V");
+  }
+};
+
+class ExternalFileIngestionInfoJni : public JavaClass {
+ public:
+  static jobject fromCppExternalFileIngestionInfo(
+      JNIEnv* env, const ROCKSDB_NAMESPACE::ExternalFileIngestionInfo* info) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID ctor = getConstructorMethodId(env, jclazz);
+    assert(ctor != nullptr);
+    jstring jcf_name = JniUtil::toJavaString(env, &info->cf_name);
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+    jstring jexternal_file_path =
+        JniUtil::toJavaString(env, &info->external_file_path);
+    if (env->ExceptionCheck()) {
+      env->DeleteLocalRef(jcf_name);
+      return nullptr;
+    }
+    jstring jinternal_file_path =
+        JniUtil::toJavaString(env, &info->internal_file_path);
+    if (env->ExceptionCheck()) {
+      env->DeleteLocalRef(jcf_name);
+      env->DeleteLocalRef(jexternal_file_path);
+      return nullptr;
+    }
+    jobject jtable_properties =
+        TablePropertiesJni::fromCppTableProperties(env, info->table_properties);
+    if (jtable_properties == nullptr) {
+      env->DeleteLocalRef(jcf_name);
+      env->DeleteLocalRef(jexternal_file_path);
+      env->DeleteLocalRef(jinternal_file_path);
+      return nullptr;
+    }
+    return env->NewObject(
+        jclazz, ctor, jcf_name, jexternal_file_path, jinternal_file_path,
+        static_cast<jlong>(info->global_seqno), jtable_properties);
+  }
+
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/ExternalFileIngestionInfo");
+  }
+
+  static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) {
+    return env->GetMethodID(clazz, "<init>",
+                            "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/"
+                            "String;JLorg/rocksdb/TableProperties;)V");
+  }
+};
+
+class WriteStallInfoJni : public JavaClass {
+ public:
+  static jobject fromCppWriteStallInfo(
+      JNIEnv* env, const ROCKSDB_NAMESPACE::WriteStallInfo* info) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID ctor = getConstructorMethodId(env, jclazz);
+    assert(ctor != nullptr);
+    jstring jcf_name = JniUtil::toJavaString(env, &info->cf_name);
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+    return env->NewObject(jclazz, ctor, jcf_name,
+                          static_cast<jbyte>(info->condition.cur),
+                          static_cast<jbyte>(info->condition.prev));
+  }
+
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/WriteStallInfo");
+  }
+
+  static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) {
+    return env->GetMethodID(clazz, "<init>", "(Ljava/lang/String;BB)V");
+  }
+};
+
+class FileOperationInfoJni : public JavaClass {
+ public:
+  static jobject fromCppFileOperationInfo(
+      JNIEnv* env, const ROCKSDB_NAMESPACE::FileOperationInfo* info) {
+    jclass jclazz = getJClass(env);
+    assert(jclazz != nullptr);
+    static jmethodID ctor = getConstructorMethodId(env, jclazz);
+    assert(ctor != nullptr);
+    jstring jpath = JniUtil::toJavaString(env, &info->path);
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+    jobject jstatus = StatusJni::construct(env, info->status);
+    if (jstatus == nullptr) {
+      env->DeleteLocalRef(jpath);
+      return nullptr;
+    }
+    return env->NewObject(
+        jclazz, ctor, jpath, static_cast<jlong>(info->offset),
+        static_cast<jlong>(info->length),
+        static_cast<jlong>(info->start_ts.time_since_epoch().count()),
+        static_cast<jlong>(info->duration.count()), jstatus);
+  }
+
+  static jclass getJClass(JNIEnv* env) {
+    return JavaClass::getJClass(env, "org/rocksdb/FileOperationInfo");
+  }
+
+  static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) {
+    return env->GetMethodID(clazz, "<init>",
+                            "(Ljava/lang/String;JJJJLorg/rocksdb/Status;)V");
+  }
+};
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // JAVA_ROCKSJNI_PORTAL_H_
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/statisticsjni.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/statisticsjni.h
new file mode 100644
index 0000000000..3262b296cf
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/statisticsjni.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file implements the callback "bridge" between Java and C++ for
+// ROCKSDB_NAMESPACE::Statistics
+
+#ifndef JAVA_ROCKSJNI_STATISTICSJNI_H_
+#define JAVA_ROCKSJNI_STATISTICSJNI_H_
+
+#include <memory>
+#include <set>
+#include <string>
+
+#include "monitoring/statistics_impl.h"
+#include "rocksdb/statistics.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class StatisticsJni : public StatisticsImpl {
+ public:
+  StatisticsJni(std::shared_ptr<Statistics> stats);
+  StatisticsJni(std::shared_ptr<Statistics> stats,
+                const std::set<uint32_t> ignore_histograms);
+  virtual bool HistEnabledForType(uint32_t type) const override;
+
+ private:
+  const std::set<uint32_t> m_ignore_histograms;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // JAVA_ROCKSJNI_STATISTICSJNI_H_
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/table_filter_jnicallback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/table_filter_jnicallback.h
new file mode 100644
index 0000000000..0ef404ca22
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/table_filter_jnicallback.h
@@ -0,0 +1,36 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file implements the callback "bridge" between Java and C++ for
+// ROCKSDB_NAMESPACE::TableFilter.
+
+#ifndef JAVA_ROCKSJNI_TABLE_FILTER_JNICALLBACK_H_
+#define JAVA_ROCKSJNI_TABLE_FILTER_JNICALLBACK_H_
+
+#include <jni.h>
+
+#include <functional>
+#include <memory>
+
+#include "rocksdb/table_properties.h"
+#include "rocksjni/jnicallback.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class TableFilterJniCallback : public JniCallback {
+ public:
+  TableFilterJniCallback(JNIEnv* env, jobject jtable_filter);
+  std::function<bool(const ROCKSDB_NAMESPACE::TableProperties&)>
+  GetTableFilterFunction();
+
+ private:
+  jmethodID m_jfilter_methodid;
+  std::function<bool(const ROCKSDB_NAMESPACE::TableProperties&)>
+      m_table_filter_function;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // JAVA_ROCKSJNI_TABLE_FILTER_JNICALLBACK_H_
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/trace_writer_jnicallback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/trace_writer_jnicallback.h
new file mode 100644
index 0000000000..c82a3a72cf
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/trace_writer_jnicallback.h
@@ -0,0 +1,36 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file implements the callback "bridge" between Java and C++ for
+// ROCKSDB_NAMESPACE::TraceWriter.
+
+#ifndef JAVA_ROCKSJNI_TRACE_WRITER_JNICALLBACK_H_
+#define JAVA_ROCKSJNI_TRACE_WRITER_JNICALLBACK_H_
+
+#include <jni.h>
+
+#include <memory>
+
+#include "rocksdb/trace_reader_writer.h"
+#include "rocksjni/jnicallback.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class TraceWriterJniCallback : public JniCallback, public TraceWriter {
+ public:
+  TraceWriterJniCallback(JNIEnv* env, jobject jtrace_writer);
+  virtual Status Write(const Slice& data);
+  virtual Status Close();
+  virtual uint64_t GetFileSize();
+
+ private:
+  jmethodID m_jwrite_proxy_methodid;
+  jmethodID m_jclose_writer_proxy_methodid;
+  jmethodID m_jget_file_size_methodid;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // JAVA_ROCKSJNI_TRACE_WRITER_JNICALLBACK_H_
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/transaction_notifier_jnicallback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/transaction_notifier_jnicallback.h
new file mode 100644
index 0000000000..089a5ee4a5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/transaction_notifier_jnicallback.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file implements the callback "bridge" between Java and C++ for
+// ROCKSDB_NAMESPACE::TransactionNotifier.
+
+#ifndef JAVA_ROCKSJNI_TRANSACTION_NOTIFIER_JNICALLBACK_H_
+#define JAVA_ROCKSJNI_TRANSACTION_NOTIFIER_JNICALLBACK_H_
+
+#include <jni.h>
+
+#include "rocksdb/utilities/transaction.h"
+#include "rocksjni/jnicallback.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+/**
+ * This class acts as a bridge between C++
+ * and Java. The methods in this class will be
+ * called back from the RocksDB TransactionDB or OptimisticTransactionDB (C++),
+ * we then callback to the appropriate Java method
+ * this enables TransactionNotifier to be implemented in Java.
+ *
+ * Unlike RocksJava's Comparator JNI Callback, we do not attempt
+ * to reduce Java object allocations by caching the Snapshot object
+ * presented to the callback. This could be revisited in future
+ * if performance is lacking.
+ */
+class TransactionNotifierJniCallback : public JniCallback,
+                                       public TransactionNotifier {
+ public:
+  TransactionNotifierJniCallback(JNIEnv* env, jobject jtransaction_notifier);
+  virtual void SnapshotCreated(const Snapshot* newSnapshot);
+
+ private:
+  jmethodID m_jsnapshot_created_methodID;
+};
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // JAVA_ROCKSJNI_TRANSACTION_NOTIFIER_JNICALLBACK_H_
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/wal_filter_jnicallback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/wal_filter_jnicallback.h
new file mode 100644
index 0000000000..5cdc659786
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/wal_filter_jnicallback.h
@@ -0,0 +1,42 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file implements the callback "bridge" between Java and C++ for
+// ROCKSDB_NAMESPACE::WalFilter.
+
+#ifndef JAVA_ROCKSJNI_WAL_FILTER_JNICALLBACK_H_
+#define JAVA_ROCKSJNI_WAL_FILTER_JNICALLBACK_H_
+
+#include <jni.h>
+
+#include <map>
+#include <memory>
+#include <string>
+
+#include "rocksdb/wal_filter.h"
+#include "rocksjni/jnicallback.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class WalFilterJniCallback : public JniCallback, public WalFilter {
+ public:
+  WalFilterJniCallback(JNIEnv* env, jobject jwal_filter);
+  virtual void ColumnFamilyLogNumberMap(
+      const std::map<uint32_t, uint64_t>& cf_lognumber_map,
+      const std::map<std::string, uint32_t>& cf_name_id_map);
+  virtual WalFilter::WalProcessingOption LogRecordFound(
+      unsigned long long log_number, const std::string& log_file_name,
+      const WriteBatch& batch, WriteBatch* new_batch, bool* batch_changed);
+  virtual const char* Name() const;
+
+ private:
+  std::unique_ptr<const char[]> m_name;
+  jmethodID m_column_family_log_number_map_mid;
+  jmethodID m_log_record_found_proxy_mid;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // JAVA_ROCKSJNI_WAL_FILTER_JNICALLBACK_H_
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/writebatchhandlerjnicallback.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/writebatchhandlerjnicallback.h
new file mode 100644
index 0000000000..9629797ca7
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/java/rocksjni/writebatchhandlerjnicallback.h
@@ -0,0 +1,92 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file implements the callback "bridge" between Java and C++ for
+// ROCKSDB_NAMESPACE::WriteBatch::Handler.
+
+#ifndef JAVA_ROCKSJNI_WRITEBATCHHANDLERJNICALLBACK_H_
+#define JAVA_ROCKSJNI_WRITEBATCHHANDLERJNICALLBACK_H_
+
+#include <jni.h>
+
+#include <functional>
+#include <memory>
+
+#include "rocksdb/write_batch.h"
+#include "rocksjni/jnicallback.h"
+
+namespace ROCKSDB_NAMESPACE {
+/**
+ * This class acts as a bridge between C++
+ * and Java. The methods in this class will be
+ * called back from the RocksDB storage engine (C++)
+ * which calls the appropriate Java method.
+ * This enables Write Batch Handlers to be implemented in Java.
+ */
+class WriteBatchHandlerJniCallback : public JniCallback,
+                                     public WriteBatch::Handler {
+ public:
+  WriteBatchHandlerJniCallback(JNIEnv* env, jobject jWriteBackHandler);
+  Status PutCF(uint32_t column_family_id, const Slice& key, const Slice& value);
+  void Put(const Slice& key, const Slice& value);
+  Status MergeCF(uint32_t column_family_id, const Slice& key,
+                 const Slice& value);
+  void Merge(const Slice& key, const Slice& value);
+  Status DeleteCF(uint32_t column_family_id, const Slice& key);
+  void Delete(const Slice& key);
+  Status SingleDeleteCF(uint32_t column_family_id, const Slice& key);
+  void SingleDelete(const Slice& key);
+  Status DeleteRangeCF(uint32_t column_family_id, const Slice& beginKey,
+                       const Slice& endKey);
+  void DeleteRange(const Slice& beginKey, const Slice& endKey);
+  void LogData(const Slice& blob);
+  Status PutBlobIndexCF(uint32_t column_family_id, const Slice& key,
+                        const Slice& value);
+  Status MarkBeginPrepare(bool);
+  Status MarkEndPrepare(const Slice& xid);
+  Status MarkNoop(bool empty_batch);
+  Status MarkRollback(const Slice& xid);
+  Status MarkCommit(const Slice& xid);
+  Status MarkCommitWithTimestamp(const Slice& xid, const Slice& commit_ts);
+  bool Continue();
+
+ private:
+  JNIEnv* m_env;
+  jmethodID m_jPutCfMethodId;
+  jmethodID m_jPutMethodId;
+  jmethodID m_jMergeCfMethodId;
+  jmethodID m_jMergeMethodId;
+  jmethodID m_jDeleteCfMethodId;
+  jmethodID m_jDeleteMethodId;
+  jmethodID m_jSingleDeleteCfMethodId;
+  jmethodID m_jSingleDeleteMethodId;
+  jmethodID m_jDeleteRangeCfMethodId;
+  jmethodID m_jDeleteRangeMethodId;
+  jmethodID m_jLogDataMethodId;
+  jmethodID m_jPutBlobIndexCfMethodId;
+  jmethodID m_jMarkBeginPrepareMethodId;
+  jmethodID m_jMarkEndPrepareMethodId;
+  jmethodID m_jMarkNoopMethodId;
+  jmethodID m_jMarkRollbackMethodId;
+  jmethodID m_jMarkCommitMethodId;
+  jmethodID m_jMarkCommitWithTimestampMethodId;
+  jmethodID m_jContinueMethodId;
+  /**
+   * @return A pointer to a ROCKSDB_NAMESPACE::Status or nullptr if an
+   * unexpected exception occurred
+   */
+  std::unique_ptr<ROCKSDB_NAMESPACE::Status> kv_op(
+      const Slice& key, const Slice& value,
+      std::function<void(jbyteArray, jbyteArray)> kvFn);
+  /**
+   * @return A pointer to a ROCKSDB_NAMESPACE::Status or nullptr if an
+   * unexpected exception occurred
+   */
+  std::unique_ptr<ROCKSDB_NAMESPACE::Status> k_op(
+      const Slice& key, std::function<void(jbyteArray)> kFn);
+};
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // JAVA_ROCKSJNI_WRITEBATCHHANDLERJNICALLBACK_H_
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/auto_roll_logger.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/auto_roll_logger.cc
new file mode 100644
index 0000000000..9e9ad45aee
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/auto_roll_logger.cc
@@ -0,0 +1,368 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include "logging/auto_roll_logger.h"
+
+#include <algorithm>
+
+#include "file/filename.h"
+#include "logging/logging.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/system_clock.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// -- AutoRollLogger
+
+AutoRollLogger::AutoRollLogger(const std::shared_ptr<FileSystem>& fs,
+                               const std::shared_ptr<SystemClock>& clock,
+                               const std::string& dbname,
+                               const std::string& db_log_dir,
+                               size_t log_max_size,
+                               size_t log_file_time_to_roll,
+                               size_t keep_log_file_num,
+                               const InfoLogLevel log_level)
+    : Logger(log_level),
+      dbname_(dbname),
+      db_log_dir_(db_log_dir),
+      fs_(fs),
+      clock_(clock),
+      status_(Status::OK()),
+      kMaxLogFileSize(log_max_size),
+      kLogFileTimeToRoll(log_file_time_to_roll),
+      kKeepLogFileNum(keep_log_file_num),
+      cached_now(static_cast<uint64_t>(clock_->NowMicros() * 1e-6)),
+      ctime_(cached_now),
+      cached_now_access_count(0),
+      call_NowMicros_every_N_records_(100),
+      mutex_() {
+  Status s = fs->GetAbsolutePath(dbname, io_options_, &db_absolute_path_,
+                                 &io_context_);
+  if (s.IsNotSupported()) {
+    db_absolute_path_ = dbname;
+  } else {
+    status_ = s;
+  }
+  log_fname_ = InfoLogFileName(dbname_, db_absolute_path_, db_log_dir_);
+  if (fs_->FileExists(log_fname_, io_options_, &io_context_).ok()) {
+    RollLogFile();
+  }
+  GetExistingFiles();
+  s = ResetLogger();
+  if (s.ok() && status_.ok()) {
+    status_ = TrimOldLogFiles();
+  }
+}
+
+Status AutoRollLogger::ResetLogger() {
+  TEST_SYNC_POINT("AutoRollLogger::ResetLogger:BeforeNewLogger");
+  status_ = fs_->NewLogger(log_fname_, io_options_, &logger_, &io_context_);
+  TEST_SYNC_POINT("AutoRollLogger::ResetLogger:AfterNewLogger");
+
+  if (!status_.ok()) {
+    return status_;
+  }
+  assert(logger_);
+  logger_->SetInfoLogLevel(Logger::GetInfoLogLevel());
+
+  if (logger_->GetLogFileSize() == Logger::kDoNotSupportGetLogFileSize) {
+    status_ = Status::NotSupported(
+        "The underlying logger doesn't support GetLogFileSize()");
+  }
+  if (status_.ok()) {
+    cached_now = static_cast<uint64_t>(clock_->NowMicros() * 1e-6);
+    ctime_ = cached_now;
+    cached_now_access_count = 0;
+  }
+
+  return status_;
+}
+
+void AutoRollLogger::RollLogFile() {
+  // This function is called when log is rotating. Two rotations
+  // can happen quickly (NowMicro returns same value). To not overwrite
+  // previous log file we increment by one micro second and try again.
+  uint64_t now = clock_->NowMicros();
+  std::string old_fname;
+  do {
+    old_fname =
+        OldInfoLogFileName(dbname_, now, db_absolute_path_, db_log_dir_);
+    now++;
+  } while (fs_->FileExists(old_fname, io_options_, &io_context_).ok());
+  // Wait for logger_ reference count to turn to 1 as it might be pinned by
+  // Flush. Pinned Logger can't be closed till Flush is completed on that
+  // Logger.
+  while (logger_.use_count() > 1) {
+  }
+  // Close the existing logger first to release the existing handle
+  // before renaming the file using the file system. If this call
+  // fails there is nothing much we can do and we will continue with the
+  // rename and hence ignoring the result status.
+  if (logger_) {
+    logger_->Close().PermitUncheckedError();
+  }
+  Status s = fs_->RenameFile(log_fname_, old_fname, io_options_, &io_context_);
+  if (!s.ok()) {
+    // What should we do on error?
+  }
+  old_log_files_.push(old_fname);
+}
+
+void AutoRollLogger::GetExistingFiles() {
+  {
+    // Empty the queue to avoid duplicated entries in the queue.
+    std::queue<std::string> empty;
+    std::swap(old_log_files_, empty);
+  }
+
+  std::string parent_dir;
+  std::vector<std::string> info_log_files;
+  Status s =
+      GetInfoLogFiles(fs_, db_log_dir_, dbname_, &parent_dir, &info_log_files);
+  if (status_.ok()) {
+    status_ = s;
+  }
+  // We need to sort the file before enqueing it so that when we
+  // delete file from the front, it is the oldest file.
+  std::sort(info_log_files.begin(), info_log_files.end());
+
+  for (const std::string& f : info_log_files) {
+    old_log_files_.push(parent_dir + "/" + f);
+  }
+}
+
+Status AutoRollLogger::TrimOldLogFiles() {
+  // Here we directly list info files and delete them through FileSystem.
+  // The deletion isn't going through DB, so there are shortcomes:
+  // 1. the deletion is not rate limited by SstFileManager
+  // 2. there is a chance that an I/O will be issued here
+  // Since it's going to be complicated to pass DB object down to
+  // here, we take a simple approach to keep the code easier to
+  // maintain.
+
+  // old_log_files_.empty() is helpful for the corner case that
+  // kKeepLogFileNum == 0. We can instead check kKeepLogFileNum != 0 but
+  // it's essentially the same thing, and checking empty before accessing
+  // the queue feels safer.
+  while (!old_log_files_.empty() && old_log_files_.size() >= kKeepLogFileNum) {
+    Status s =
+        fs_->DeleteFile(old_log_files_.front(), io_options_, &io_context_);
+    // Remove the file from the tracking anyway. It's possible that
+    // DB cleaned up the old log file, or people cleaned it up manually.
+    old_log_files_.pop();
+    // To make the file really go away, we should sync parent directory.
+    // Since there isn't any consistency issue involved here, skipping
+    // this part to avoid one I/O here.
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  return Status::OK();
+}
+
+std::string AutoRollLogger::ValistToString(const char* format,
+                                           va_list args) const {
+  // Any log messages longer than 1024 will get truncated.
+  // The user is responsible for chopping longer messages into multi line log
+  static const int MAXBUFFERSIZE = 1024;
+  char buffer[MAXBUFFERSIZE];
+
+  int count = vsnprintf(buffer, MAXBUFFERSIZE, format, args);
+  (void)count;
+  assert(count >= 0);
+
+  return buffer;
+}
+
+void AutoRollLogger::LogInternal(const char* format, ...) {
+  mutex_.AssertHeld();
+
+  if (!logger_) {
+    return;
+  }
+
+  va_list args;
+  va_start(args, format);
+  logger_->Logv(format, args);
+  va_end(args);
+}
+
+void AutoRollLogger::Logv(const char* format, va_list ap) {
+  assert(GetStatus().ok());
+  if (!logger_) {
+    return;
+  }
+
+  std::shared_ptr<Logger> logger;
+  {
+    MutexLock l(&mutex_);
+    if ((kLogFileTimeToRoll > 0 && LogExpired()) ||
+        (kMaxLogFileSize > 0 && logger_->GetLogFileSize() >= kMaxLogFileSize)) {
+      RollLogFile();
+      Status s = ResetLogger();
+      Status s2 = TrimOldLogFiles();
+
+      if (!s.ok()) {
+        // can't really log the error if creating a new LOG file failed
+        return;
+      }
+
+      WriteHeaderInfo();
+
+      if (!s2.ok()) {
+        ROCKS_LOG_WARN(logger.get(), "Fail to trim old info log file: %s",
+                       s2.ToString().c_str());
+      }
+    }
+
+    // pin down the current logger_ instance before releasing the mutex.
+    logger = logger_;
+  }
+
+  // Another thread could have put a new Logger instance into logger_ by now.
+  // However, since logger is still hanging on to the previous instance
+  // (reference count is not zero), we don't have to worry about it being
+  // deleted while we are accessing it.
+  // Note that logv itself is not mutex protected to allow maximum concurrency,
+  // as thread safety should have been handled by the underlying logger.
+  logger->Logv(format, ap);
+}
+
+void AutoRollLogger::WriteHeaderInfo() {
+  mutex_.AssertHeld();
+  for (auto& header : headers_) {
+    LogInternal("%s", header.c_str());
+  }
+}
+
+void AutoRollLogger::LogHeader(const char* format, va_list args) {
+  if (!logger_) {
+    return;
+  }
+
+  // header message are to be retained in memory. Since we cannot make any
+  // assumptions about the data contained in va_list, we will retain them as
+  // strings
+  va_list tmp;
+  va_copy(tmp, args);
+  std::string data = ValistToString(format, tmp);
+  va_end(tmp);
+
+  MutexLock l(&mutex_);
+  headers_.push_back(data);
+
+  // Log the original message to the current log
+  logger_->Logv(format, args);
+}
+
+bool AutoRollLogger::LogExpired() {
+  if (cached_now_access_count >= call_NowMicros_every_N_records_) {
+    cached_now = static_cast<uint64_t>(clock_->NowMicros() * 1e-6);
+    cached_now_access_count = 0;
+  }
+
+  ++cached_now_access_count;
+  return cached_now >= ctime_ + kLogFileTimeToRoll;
+}
+
+Status CreateLoggerFromOptions(const std::string& dbname,
+                               const DBOptions& options,
+                               std::shared_ptr<Logger>* logger) {
+  if (options.info_log) {
+    *logger = options.info_log;
+    return Status::OK();
+  }
+
+  Env* env = options.env;
+  std::string db_absolute_path;
+  Status s = env->GetAbsolutePath(dbname, &db_absolute_path);
+  TEST_SYNC_POINT_CALLBACK("rocksdb::CreateLoggerFromOptions:AfterGetPath", &s);
+  if (!s.ok()) {
+    return s;
+  }
+  std::string fname =
+      InfoLogFileName(dbname, db_absolute_path, options.db_log_dir);
+
+  const auto& clock = env->GetSystemClock();
+  // In case it does not exist.
+  s = env->CreateDirIfMissing(dbname);
+  if (!s.ok()) {
+    if (options.db_log_dir.empty()) {
+      return s;
+    } else {
+      // Ignore the error returned during creation of dbname because dbname and
+      // db_log_dir can be on different filesystems in which case dbname will
+      // not exist and error should be ignored. db_log_dir creation will handle
+      // the error in case there is any error in the creation of dbname on same
+      // filesystem.
+      s = Status::OK();
+    }
+  }
+  assert(s.ok());
+
+  if (!options.db_log_dir.empty()) {
+    s = env->CreateDirIfMissing(options.db_log_dir);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  // Currently we only support roll by time-to-roll and log size
+  if (options.log_file_time_to_roll > 0 || options.max_log_file_size > 0) {
+    AutoRollLogger* result = new AutoRollLogger(
+        env->GetFileSystem(), clock, dbname, options.db_log_dir,
+        options.max_log_file_size, options.log_file_time_to_roll,
+        options.keep_log_file_num, options.info_log_level);
+    s = result->GetStatus();
+    if (!s.ok()) {
+      delete result;
+    } else {
+      logger->reset(result);
+    }
+    return s;
+  }
+  // Open a log file in the same directory as the db
+  s = env->FileExists(fname);
+  if (s.ok()) {
+    s = env->RenameFile(
+        fname, OldInfoLogFileName(dbname, clock->NowMicros(), db_absolute_path,
+                                  options.db_log_dir));
+
+    // The operation sequence of "FileExists -> Rename" is not atomic. It's
+    // possible that FileExists returns OK but file gets deleted before Rename.
+    // This can cause Rename to return IOError with subcode PathNotFound.
+    // Although it may be a rare case and applications should be discouraged
+    // to not concurrently modifying the contents of the directories accessed
+    // by the database instance, it is still helpful if we can perform some
+    // simple handling of this case. Therefore, we do the following:
+    // 1. if Rename() returns IOError with PathNotFound subcode, then we check
+    //    whether the source file, i.e. LOG, exists.
+    // 2. if LOG exists, it means Rename() failed due to something else. Then
+    //    we report error.
+    // 3. if LOG does not exist, it means it may have been removed/renamed by
+    //    someone else. Since it does not exist, we can reset Status to OK so
+    //    that this caller can try creating a new LOG file. If this succeeds,
+    //    we should still allow it.
+    if (s.IsPathNotFound()) {
+      s = env->FileExists(fname);
+      if (s.IsNotFound()) {
+        s = Status::OK();
+      }
+    }
+  } else if (s.IsNotFound()) {
+    // "LOG" is not required to exist since this could be a new DB.
+    s = Status::OK();
+  }
+  if (s.ok()) {
+    s = env->NewLogger(fname, logger);
+  }
+  if (s.ok() && logger->get() != nullptr) {
+    (*logger)->SetInfoLogLevel(options.info_log_level);
+  }
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/auto_roll_logger.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/auto_roll_logger.h
new file mode 100644
index 0000000000..dca9996fea
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/auto_roll_logger.h
@@ -0,0 +1,166 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Logger implementation that can be shared by all environments
+// where enough posix functionality is available.
+
+#pragma once
+#include <list>
+#include <queue>
+#include <string>
+
+#include "file/filename.h"
+#include "port/port.h"
+#include "port/util_logger.h"
+#include "test_util/sync_point.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+class FileSystem;
+class SystemClock;
+
+// Rolls the log file by size and/or time
+class AutoRollLogger : public Logger {
+ public:
+  AutoRollLogger(const std::shared_ptr<FileSystem>& fs,
+                 const std::shared_ptr<SystemClock>& clock,
+                 const std::string& dbname, const std::string& db_log_dir,
+                 size_t log_max_size, size_t log_file_time_to_roll,
+                 size_t keep_log_file_num,
+                 const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL);
+
+  using Logger::Logv;
+  void Logv(const char* format, va_list ap) override;
+
+  // Write a header entry to the log. All header information will be written
+  // again every time the log rolls over.
+  virtual void LogHeader(const char* format, va_list ap) override;
+
+  // check if the logger has encountered any problem.
+  Status GetStatus() { return status_; }
+
+  size_t GetLogFileSize() const override {
+    if (!logger_) {
+      return 0;
+    }
+
+    std::shared_ptr<Logger> logger;
+    {
+      MutexLock l(&mutex_);
+      // pin down the current logger_ instance before releasing the mutex.
+      logger = logger_;
+    }
+    return logger->GetLogFileSize();
+  }
+
+  void Flush() override {
+    std::shared_ptr<Logger> logger;
+    {
+      MutexLock l(&mutex_);
+      // pin down the current logger_ instance before releasing the mutex.
+      logger = logger_;
+    }
+    TEST_SYNC_POINT("AutoRollLogger::Flush:PinnedLogger");
+    if (logger) {
+      logger->Flush();
+    }
+  }
+
+  virtual ~AutoRollLogger() {
+    if (logger_ && !closed_) {
+      logger_->Close().PermitUncheckedError();
+    }
+    status_.PermitUncheckedError();
+  }
+
+  using Logger::GetInfoLogLevel;
+  InfoLogLevel GetInfoLogLevel() const override {
+    MutexLock l(&mutex_);
+    if (!logger_) {
+      return Logger::GetInfoLogLevel();
+    }
+    return logger_->GetInfoLogLevel();
+  }
+
+  using Logger::SetInfoLogLevel;
+  void SetInfoLogLevel(const InfoLogLevel log_level) override {
+    MutexLock lock(&mutex_);
+    Logger::SetInfoLogLevel(log_level);
+    if (logger_) {
+      logger_->SetInfoLogLevel(log_level);
+    }
+  }
+
+  void SetCallNowMicrosEveryNRecords(uint64_t call_NowMicros_every_N_records) {
+    call_NowMicros_every_N_records_ = call_NowMicros_every_N_records;
+  }
+
+  // Expose the log file path for testing purpose
+  std::string TEST_log_fname() const { return log_fname_; }
+
+  uint64_t TEST_ctime() const { return ctime_; }
+
+  Logger* TEST_inner_logger() const { return logger_.get(); }
+
+ protected:
+  // Implementation of Close()
+  virtual Status CloseImpl() override {
+    if (logger_) {
+      return logger_->Close();
+    } else {
+      return Status::OK();
+    }
+  }
+
+ private:
+  bool LogExpired();
+  Status ResetLogger();
+  void RollLogFile();
+  // Read all names of old log files into old_log_files_
+  // If there is any error, put the error code in status_
+  void GetExistingFiles();
+  // Delete old log files if it excceeds the limit.
+  Status TrimOldLogFiles();
+  // Log message to logger without rolling
+  void LogInternal(const char* format, ...);
+  // Serialize the va_list to a string
+  std::string ValistToString(const char* format, va_list args) const;
+  // Write the logs marked as headers to the new log file
+  void WriteHeaderInfo();
+  std::string log_fname_;  // Current active info log's file name.
+  std::string dbname_;
+  std::string db_log_dir_;
+  std::string db_absolute_path_;
+  std::shared_ptr<FileSystem> fs_;
+  std::shared_ptr<SystemClock> clock_;
+  std::shared_ptr<Logger> logger_;
+  // current status of the logger
+  Status status_;
+  const size_t kMaxLogFileSize;
+  const size_t kLogFileTimeToRoll;
+  const size_t kKeepLogFileNum;
+  // header information
+  std::list<std::string> headers_;
+  // List of all existing info log files. Used for enforcing number of
+  // info log files.
+  // Full path is stored here. It consumes signifianctly more memory
+  // than only storing file name. Can optimize if it causes a problem.
+  std::queue<std::string> old_log_files_;
+  // to avoid frequent clock->NowMicros() calls, we cached the current time
+  uint64_t cached_now;
+  uint64_t ctime_;
+  uint64_t cached_now_access_count;
+  uint64_t call_NowMicros_every_N_records_;
+  IOOptions io_options_;
+  IODebugContext io_context_;
+  mutable port::Mutex mutex_;
+};
+
+// Facade to craete logger automatically
+Status CreateLoggerFromOptions(const std::string& dbname,
+                               const DBOptions& options,
+                               std::shared_ptr<Logger>* logger);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/env_logger.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/env_logger.h
new file mode 100644
index 0000000000..fc9b245504
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/env_logger.h
@@ -0,0 +1,195 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Logger implementation that uses custom Env object for logging.
+
+#pragma once
+
+#include <time.h>
+
+#include <atomic>
+#include <memory>
+
+#include "file/writable_file_writer.h"
+#include "monitoring/iostats_context_imp.h"
+#include "port/sys_time.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/perf_level.h"
+#include "rocksdb/slice.h"
+#include "test_util/sync_point.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class EnvLogger : public Logger {
+ public:
+  EnvLogger(std::unique_ptr<FSWritableFile>&& writable_file,
+            const std::string& fname, const EnvOptions& options, Env* env,
+            InfoLogLevel log_level = InfoLogLevel::ERROR_LEVEL)
+      : Logger(log_level),
+        env_(env),
+        clock_(env_->GetSystemClock().get()),
+        file_(std::move(writable_file), fname, options, clock_),
+        last_flush_micros_(0),
+        flush_pending_(false) {}
+
+  ~EnvLogger() {
+    if (!closed_) {
+      closed_ = true;
+      CloseHelper().PermitUncheckedError();
+    }
+  }
+
+ private:
+  // A guard to prepare file operations, such as mutex and skip
+  // I/O context.
+  class FileOpGuard {
+   public:
+    explicit FileOpGuard(EnvLogger& logger)
+        : logger_(logger), prev_perf_level_(GetPerfLevel()) {
+      // Preserve iostats not to pollute writes from user writes. We might
+      // need a better solution than this.
+      SetPerfLevel(PerfLevel::kDisable);
+      IOSTATS_SET_DISABLE(true);
+      logger.mutex_.Lock();
+    }
+    ~FileOpGuard() {
+      logger_.mutex_.Unlock();
+      IOSTATS_SET_DISABLE(false);
+      SetPerfLevel(prev_perf_level_);
+    }
+
+   private:
+    EnvLogger& logger_;
+    PerfLevel prev_perf_level_;
+  };
+
+  void FlushLocked() {
+    mutex_.AssertHeld();
+    if (flush_pending_) {
+      flush_pending_ = false;
+      file_.Flush().PermitUncheckedError();
+      file_.reset_seen_error();
+    }
+    last_flush_micros_ = clock_->NowMicros();
+  }
+
+  void Flush() override {
+    TEST_SYNC_POINT("EnvLogger::Flush:Begin1");
+    TEST_SYNC_POINT("EnvLogger::Flush:Begin2");
+
+    FileOpGuard guard(*this);
+    FlushLocked();
+  }
+
+  Status CloseImpl() override { return CloseHelper(); }
+
+  Status CloseHelper() {
+    FileOpGuard guard(*this);
+    const auto close_status = file_.Close();
+
+    if (close_status.ok()) {
+      return close_status;
+    }
+    return Status::IOError("Close of log file failed with error:" +
+                           (close_status.getState()
+                                ? std::string(close_status.getState())
+                                : std::string()));
+  }
+
+  using Logger::Logv;
+  void Logv(const char* format, va_list ap) override {
+    IOSTATS_TIMER_GUARD(logger_nanos);
+
+    const uint64_t thread_id = env_->GetThreadID();
+
+    // We try twice: the first time with a fixed-size stack allocated buffer,
+    // and the second time with a much larger dynamically allocated buffer.
+    char buffer[500];
+    for (int iter = 0; iter < 2; iter++) {
+      char* base;
+      int bufsize;
+      if (iter == 0) {
+        bufsize = sizeof(buffer);
+        base = buffer;
+      } else {
+        bufsize = 65536;
+        base = new char[bufsize];
+      }
+      char* p = base;
+      char* limit = base + bufsize;
+
+      port::TimeVal now_tv;
+      port::GetTimeOfDay(&now_tv, nullptr);
+      const time_t seconds = now_tv.tv_sec;
+      struct tm t;
+      port::LocalTimeR(&seconds, &t);
+      p += snprintf(p, limit - p, "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llu ",
+                    t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour,
+                    t.tm_min, t.tm_sec, static_cast<int>(now_tv.tv_usec),
+                    static_cast<long long unsigned int>(thread_id));
+
+      // Print the message
+      if (p < limit) {
+        va_list backup_ap;
+        va_copy(backup_ap, ap);
+        p += vsnprintf(p, limit - p, format, backup_ap);
+        va_end(backup_ap);
+      }
+
+      // Truncate to available space if necessary
+      if (p >= limit) {
+        if (iter == 0) {
+          continue;  // Try again with larger buffer
+        } else {
+          p = limit - 1;
+        }
+      }
+
+      // Add newline if necessary
+      if (p == base || p[-1] != '\n') {
+        *p++ = '\n';
+      }
+
+      assert(p <= limit);
+      {
+        FileOpGuard guard(*this);
+        // We will ignore any error returned by Append().
+        file_.Append(Slice(base, p - base)).PermitUncheckedError();
+        file_.reset_seen_error();
+        flush_pending_ = true;
+        const uint64_t now_micros = clock_->NowMicros();
+        if (now_micros - last_flush_micros_ >= flush_every_seconds_ * 1000000) {
+          FlushLocked();
+        }
+      }
+      if (base != buffer) {
+        delete[] base;
+      }
+      break;
+    }
+  }
+
+  size_t GetLogFileSize() const override {
+    MutexLock l(&mutex_);
+    return file_.GetFileSize();
+  }
+
+ private:
+  Env* env_;
+  SystemClock* clock_;
+  WritableFileWriter file_;
+  mutable port::Mutex mutex_;  // Mutex to protect the shared variables below.
+  const static uint64_t flush_every_seconds_ = 5;
+  std::atomic_uint_fast64_t last_flush_micros_;
+  std::atomic<bool> flush_pending_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/event_logger.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/event_logger.cc
new file mode 100644
index 0000000000..cb9eca6871
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/event_logger.cc
@@ -0,0 +1,68 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "logging/event_logger.h"
+
+#include <cassert>
+#include <cinttypes>
+#include <sstream>
+#include <string>
+
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+EventLoggerStream::EventLoggerStream(Logger* logger)
+    : logger_(logger),
+      log_buffer_(nullptr),
+      max_log_size_(0),
+      json_writer_(nullptr) {}
+
+EventLoggerStream::EventLoggerStream(LogBuffer* log_buffer,
+                                     const size_t max_log_size)
+    : logger_(nullptr),
+      log_buffer_(log_buffer),
+      max_log_size_(max_log_size),
+      json_writer_(nullptr) {}
+
+EventLoggerStream::~EventLoggerStream() {
+  if (json_writer_) {
+    json_writer_->EndObject();
+#ifdef ROCKSDB_PRINT_EVENTS_TO_STDOUT
+    printf("%s\n", json_writer_->Get().c_str());
+#else
+    if (logger_) {
+      EventLogger::Log(logger_, *json_writer_);
+    } else if (log_buffer_) {
+      assert(max_log_size_);
+      EventLogger::LogToBuffer(log_buffer_, *json_writer_, max_log_size_);
+    }
+#endif
+    delete json_writer_;
+  }
+}
+
+void EventLogger::Log(const JSONWriter& jwriter) { Log(logger_, jwriter); }
+
+void EventLogger::Log(Logger* logger, const JSONWriter& jwriter) {
+#ifdef ROCKSDB_PRINT_EVENTS_TO_STDOUT
+  printf("%s\n", jwriter.Get().c_str());
+#else
+  ROCKSDB_NAMESPACE::Log(logger, "%s %s", Prefix(), jwriter.Get().c_str());
+#endif
+}
+
+void EventLogger::LogToBuffer(LogBuffer* log_buffer, const JSONWriter& jwriter,
+                              const size_t max_log_size) {
+#ifdef ROCKSDB_PRINT_EVENTS_TO_STDOUT
+  printf("%s\n", jwriter.Get().c_str());
+#else
+  assert(log_buffer);
+  ROCKSDB_NAMESPACE::LogToBuffer(log_buffer, max_log_size, "%s %s", Prefix(),
+                                 jwriter.Get().c_str());
+#endif
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/event_logger.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/event_logger.h
new file mode 100644
index 0000000000..9ce982f50e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/event_logger.h
@@ -0,0 +1,202 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <chrono>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "logging/log_buffer.h"
+#include "rocksdb/env.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class JSONWriter {
+ public:
+  JSONWriter() : state_(kExpectKey), first_element_(true), in_array_(false) {
+    stream_ << "{";
+  }
+
+  void AddKey(const std::string& key) {
+    assert(state_ == kExpectKey);
+    if (!first_element_) {
+      stream_ << ", ";
+    }
+    stream_ << "\"" << key << "\": ";
+    state_ = kExpectValue;
+    first_element_ = false;
+  }
+
+  void AddValue(const char* value) {
+    assert(state_ == kExpectValue || state_ == kInArray);
+    if (state_ == kInArray && !first_element_) {
+      stream_ << ", ";
+    }
+    stream_ << "\"" << value << "\"";
+    if (state_ != kInArray) {
+      state_ = kExpectKey;
+    }
+    first_element_ = false;
+  }
+
+  template <typename T>
+  void AddValue(const T& value) {
+    assert(state_ == kExpectValue || state_ == kInArray);
+    if (state_ == kInArray && !first_element_) {
+      stream_ << ", ";
+    }
+    stream_ << value;
+    if (state_ != kInArray) {
+      state_ = kExpectKey;
+    }
+    first_element_ = false;
+  }
+
+  void StartArray() {
+    assert(state_ == kExpectValue);
+    state_ = kInArray;
+    in_array_ = true;
+    stream_ << "[";
+    first_element_ = true;
+  }
+
+  void EndArray() {
+    assert(state_ == kInArray);
+    state_ = kExpectKey;
+    in_array_ = false;
+    stream_ << "]";
+    first_element_ = false;
+  }
+
+  void StartObject() {
+    assert(state_ == kExpectValue);
+    state_ = kExpectKey;
+    stream_ << "{";
+    first_element_ = true;
+  }
+
+  void EndObject() {
+    assert(state_ == kExpectKey);
+    stream_ << "}";
+    first_element_ = false;
+  }
+
+  void StartArrayedObject() {
+    assert(state_ == kInArray && in_array_);
+    state_ = kExpectValue;
+    if (!first_element_) {
+      stream_ << ", ";
+    }
+    StartObject();
+  }
+
+  void EndArrayedObject() {
+    assert(in_array_);
+    EndObject();
+    state_ = kInArray;
+  }
+
+  std::string Get() const { return stream_.str(); }
+
+  JSONWriter& operator<<(const char* val) {
+    if (state_ == kExpectKey) {
+      AddKey(val);
+    } else {
+      AddValue(val);
+    }
+    return *this;
+  }
+
+  JSONWriter& operator<<(const std::string& val) {
+    return *this << val.c_str();
+  }
+
+  template <typename T>
+  JSONWriter& operator<<(const T& val) {
+    assert(state_ != kExpectKey);
+    AddValue(val);
+    return *this;
+  }
+
+ private:
+  enum JSONWriterState {
+    kExpectKey,
+    kExpectValue,
+    kInArray,
+    kInArrayedObject,
+  };
+  JSONWriterState state_;
+  bool first_element_;
+  bool in_array_;
+  std::ostringstream stream_;
+};
+
+class EventLoggerStream {
+ public:
+  template <typename T>
+  EventLoggerStream& operator<<(const T& val) {
+    MakeStream();
+    *json_writer_ << val;
+    return *this;
+  }
+
+  void StartArray() { json_writer_->StartArray(); }
+  void EndArray() { json_writer_->EndArray(); }
+  void StartObject() { json_writer_->StartObject(); }
+  void EndObject() { json_writer_->EndObject(); }
+
+  ~EventLoggerStream();
+
+ private:
+  void MakeStream() {
+    if (!json_writer_) {
+      json_writer_ = new JSONWriter();
+      *this << "time_micros"
+            << std::chrono::duration_cast<std::chrono::microseconds>(
+                   std::chrono::system_clock::now().time_since_epoch())
+                   .count();
+    }
+  }
+  friend class EventLogger;
+  explicit EventLoggerStream(Logger* logger);
+  explicit EventLoggerStream(LogBuffer* log_buffer, const size_t max_log_size);
+  // exactly one is non-nullptr
+  Logger* const logger_;
+  LogBuffer* const log_buffer_;
+  const size_t max_log_size_;  // used only for log_buffer_
+  // ownership
+  JSONWriter* json_writer_;
+};
+
+// here is an example of the output that will show up in the LOG:
+// 2015/01/15-14:13:25.788019 1105ef000 EVENT_LOG_v1 {"time_micros":
+// 1421360005788015, "event": "table_file_creation", "file_number": 12,
+// "file_size": 1909699}
+class EventLogger {
+ public:
+  static const char* Prefix() { return "EVENT_LOG_v1"; }
+
+  explicit EventLogger(Logger* logger) : logger_(logger) {}
+  EventLoggerStream Log() { return EventLoggerStream(logger_); }
+  EventLoggerStream LogToBuffer(LogBuffer* log_buffer) {
+    return EventLoggerStream(log_buffer, LogBuffer::kDefaultMaxLogSize);
+  }
+  EventLoggerStream LogToBuffer(LogBuffer* log_buffer,
+                                const size_t max_log_size) {
+    return EventLoggerStream(log_buffer, max_log_size);
+  }
+  void Log(const JSONWriter& jwriter);
+  static void Log(Logger* logger, const JSONWriter& jwriter);
+  static void LogToBuffer(
+      LogBuffer* log_buffer, const JSONWriter& jwriter,
+      const size_t max_log_size = LogBuffer::kDefaultMaxLogSize);
+
+ private:
+  Logger* logger_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/log_buffer.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/log_buffer.cc
new file mode 100644
index 0000000000..2763e617f4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/log_buffer.cc
@@ -0,0 +1,91 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "logging/log_buffer.h"
+
+#include "port/port.h"
+#include "port/sys_time.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+LogBuffer::LogBuffer(const InfoLogLevel log_level, Logger* info_log)
+    : log_level_(log_level), info_log_(info_log) {}
+
+void LogBuffer::AddLogToBuffer(size_t max_log_size, const char* format,
+                               va_list ap) {
+  if (!info_log_ || log_level_ < info_log_->GetInfoLogLevel()) {
+    // Skip the level because of its level.
+    return;
+  }
+
+  char* alloc_mem = arena_.AllocateAligned(max_log_size);
+  BufferedLog* buffered_log = new (alloc_mem) BufferedLog();
+  char* p = buffered_log->message;
+  char* limit = alloc_mem + max_log_size - 1;
+
+  // store the time
+  port::GetTimeOfDay(&(buffered_log->now_tv), nullptr);
+
+  // Print the message
+  if (p < limit) {
+    va_list backup_ap;
+    va_copy(backup_ap, ap);
+    auto n = vsnprintf(p, limit - p, format, backup_ap);
+#ifndef OS_WIN
+    // MS reports -1 when the buffer is too short
+    assert(n >= 0);
+#endif
+    if (n > 0) {
+      p += n;
+    } else {
+      p = limit;
+    }
+    va_end(backup_ap);
+  }
+
+  if (p > limit) {
+    p = limit;
+  }
+
+  // Add '\0' to the end
+  *p = '\0';
+
+  logs_.push_back(buffered_log);
+}
+
+void LogBuffer::FlushBufferToLog() {
+  for (BufferedLog* log : logs_) {
+    const time_t seconds = log->now_tv.tv_sec;
+    struct tm t;
+    if (port::LocalTimeR(&seconds, &t) != nullptr) {
+      Log(log_level_, info_log_,
+          "(Original Log Time %04d/%02d/%02d-%02d:%02d:%02d.%06d) %s",
+          t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min,
+          t.tm_sec, static_cast<int>(log->now_tv.tv_usec), log->message);
+    }
+  }
+  logs_.clear();
+}
+
+void LogToBuffer(LogBuffer* log_buffer, size_t max_log_size, const char* format,
+                 ...) {
+  if (log_buffer != nullptr) {
+    va_list ap;
+    va_start(ap, format);
+    log_buffer->AddLogToBuffer(max_log_size, format, ap);
+    va_end(ap);
+  }
+}
+
+void LogToBuffer(LogBuffer* log_buffer, const char* format, ...) {
+  if (log_buffer != nullptr) {
+    va_list ap;
+    va_start(ap, format);
+    log_buffer->AddLogToBuffer(LogBuffer::kDefaultMaxLogSize, format, ap);
+    va_end(ap);
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/log_buffer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/log_buffer.h
new file mode 100644
index 0000000000..92d38d10d1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/log_buffer.h
@@ -0,0 +1,57 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <ctime>
+
+#include "memory/arena.h"
+#include "port/sys_time.h"
+#include "rocksdb/env.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Logger;
+
+// A class to buffer info log entries and flush them in the end.
+class LogBuffer {
+ public:
+  // log_level: the log level for all the logs
+  // info_log:  logger to write the logs to
+  LogBuffer(const InfoLogLevel log_level, Logger* info_log);
+
+  // Add a log entry to the buffer. Use default max_log_size.
+  // max_log_size indicates maximize log size, including some metadata.
+  void AddLogToBuffer(size_t max_log_size, const char* format, va_list ap);
+
+  size_t IsEmpty() const { return logs_.empty(); }
+
+  // Flush all buffered log to the info log.
+  void FlushBufferToLog();
+  static const size_t kDefaultMaxLogSize = 512;
+
+ private:
+  // One log entry with its timestamp
+  struct BufferedLog {
+    port::TimeVal now_tv;  // Timestamp of the log
+    char message[1];       // Beginning of log message
+  };
+
+  const InfoLogLevel log_level_;
+  Logger* info_log_;
+  Arena arena_;
+  autovector<BufferedLog*> logs_;
+};
+
+// Add log to the LogBuffer for a delayed info logging. It can be used when
+// we want to add some logs inside a mutex.
+// max_log_size indicates maximize log size, including some metadata.
+extern void LogToBuffer(LogBuffer* log_buffer, size_t max_log_size,
+                        const char* format, ...);
+// Same as previous function, but with default max log size.
+extern void LogToBuffer(LogBuffer* log_buffer, const char* format, ...);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/logging.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/logging.h
new file mode 100644
index 0000000000..0fa882a786
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/logging/logging.h
@@ -0,0 +1,62 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Must not be included from any .h files to avoid polluting the namespace
+// with macros.
+
+#pragma once
+
+// Helper macros that include information about file name and line number
+#define ROCKS_LOG_STRINGIFY(x) #x
+#define ROCKS_LOG_TOSTRING(x) ROCKS_LOG_STRINGIFY(x)
+#define ROCKS_LOG_PREPEND_FILE_LINE(FMT) \
+  ("[%s:" ROCKS_LOG_TOSTRING(__LINE__) "] " FMT)
+
+inline const char* RocksLogShorterFileName(const char* file) {
+  // 18 is the length of "logging/logging.h".
+  // If the name of this file changed, please change this number, too.
+  return file + (sizeof(__FILE__) > 18 ? sizeof(__FILE__) - 18 : 0);
+}
+
+// Don't inclide file/line info in HEADER level
+#define ROCKS_LOG_HEADER(LGR, FMT, ...) \
+  ROCKSDB_NAMESPACE::Log(InfoLogLevel::HEADER_LEVEL, LGR, FMT, ##__VA_ARGS__)
+
+#define ROCKS_LOG_AT_LEVEL(LGR, LVL, FMT, ...)                           \
+  ROCKSDB_NAMESPACE::Log((LVL), (LGR), ROCKS_LOG_PREPEND_FILE_LINE(FMT), \
+                         RocksLogShorterFileName(__FILE__), ##__VA_ARGS__)
+
+#define ROCKS_LOG_DEBUG(LGR, FMT, ...) \
+  ROCKS_LOG_AT_LEVEL((LGR), InfoLogLevel::DEBUG_LEVEL, FMT, ##__VA_ARGS__)
+
+#define ROCKS_LOG_INFO(LGR, FMT, ...) \
+  ROCKS_LOG_AT_LEVEL((LGR), InfoLogLevel::INFO_LEVEL, FMT, ##__VA_ARGS__)
+
+#define ROCKS_LOG_WARN(LGR, FMT, ...) \
+  ROCKS_LOG_AT_LEVEL((LGR), InfoLogLevel::WARN_LEVEL, FMT, ##__VA_ARGS__)
+
+#define ROCKS_LOG_ERROR(LGR, FMT, ...) \
+  ROCKS_LOG_AT_LEVEL((LGR), InfoLogLevel::ERROR_LEVEL, FMT, ##__VA_ARGS__)
+
+#define ROCKS_LOG_FATAL(LGR, FMT, ...) \
+  ROCKS_LOG_AT_LEVEL((LGR), InfoLogLevel::FATAL_LEVEL, FMT, ##__VA_ARGS__)
+
+#define ROCKS_LOG_BUFFER(LOG_BUF, FMT, ...)                                 \
+  ROCKSDB_NAMESPACE::LogToBuffer(LOG_BUF, ROCKS_LOG_PREPEND_FILE_LINE(FMT), \
+                                 RocksLogShorterFileName(__FILE__),         \
+                                 ##__VA_ARGS__)
+
+#define ROCKS_LOG_BUFFER_MAX_SZ(LOG_BUF, MAX_LOG_SIZE, FMT, ...) \
+  ROCKSDB_NAMESPACE::LogToBuffer(                                \
+      LOG_BUF, MAX_LOG_SIZE, ROCKS_LOG_PREPEND_FILE_LINE(FMT),   \
+      RocksLogShorterFileName(__FILE__), ##__VA_ARGS__)
+
+#define ROCKS_LOG_DETAILS(LGR, FMT, ...) \
+  ;  // due to overhead by default skip such lines
+// ROCKS_LOG_DEBUG(LGR, FMT, ##__VA_ARGS__)
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/allocator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/allocator.h
new file mode 100644
index 0000000000..0d7cd60a99
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/allocator.h
@@ -0,0 +1,58 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Abstract interface for allocating memory in blocks. This memory is freed
+// when the allocator object is destroyed. See the Arena class for more info.
+
+#pragma once
+#include <cerrno>
+#include <cstddef>
+
+#include "rocksdb/write_buffer_manager.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Logger;
+
+class Allocator {
+ public:
+  virtual ~Allocator() {}
+
+  virtual char* Allocate(size_t bytes) = 0;
+  virtual char* AllocateAligned(size_t bytes, size_t huge_page_size = 0,
+                                Logger* logger = nullptr) = 0;
+
+  virtual size_t BlockSize() const = 0;
+};
+
+class AllocTracker {
+ public:
+  explicit AllocTracker(WriteBufferManager* write_buffer_manager);
+  // No copying allowed
+  AllocTracker(const AllocTracker&) = delete;
+  void operator=(const AllocTracker&) = delete;
+
+  ~AllocTracker();
+  void Allocate(size_t bytes);
+  // Call when we're finished allocating memory so we can free it from
+  // the write buffer's limit.
+  void DoneAllocating();
+
+  void FreeMem();
+
+  bool is_freed() const { return write_buffer_manager_ == nullptr || freed_; }
+
+ private:
+  WriteBufferManager* write_buffer_manager_;
+  std::atomic<size_t> bytes_allocated_;
+  bool done_allocating_;
+  bool freed_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/arena.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/arena.cc
new file mode 100644
index 0000000000..0a920203dc
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/arena.cc
@@ -0,0 +1,170 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "memory/arena.h"
+
+#include <algorithm>
+
+#include "logging/logging.h"
+#include "port/malloc.h"
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "test_util/sync_point.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+size_t Arena::OptimizeBlockSize(size_t block_size) {
+  // Make sure block_size is in optimal range
+  block_size = std::max(Arena::kMinBlockSize, block_size);
+  block_size = std::min(Arena::kMaxBlockSize, block_size);
+
+  // make sure block_size is the multiple of kAlignUnit
+  if (block_size % kAlignUnit != 0) {
+    block_size = (1 + block_size / kAlignUnit) * kAlignUnit;
+  }
+
+  return block_size;
+}
+
+Arena::Arena(size_t block_size, AllocTracker* tracker, size_t huge_page_size)
+    : kBlockSize(OptimizeBlockSize(block_size)), tracker_(tracker) {
+  assert(kBlockSize >= kMinBlockSize && kBlockSize <= kMaxBlockSize &&
+         kBlockSize % kAlignUnit == 0);
+  TEST_SYNC_POINT_CALLBACK("Arena::Arena:0", const_cast<size_t*>(&kBlockSize));
+  alloc_bytes_remaining_ = sizeof(inline_block_);
+  blocks_memory_ += alloc_bytes_remaining_;
+  aligned_alloc_ptr_ = inline_block_;
+  unaligned_alloc_ptr_ = inline_block_ + alloc_bytes_remaining_;
+  if (MemMapping::kHugePageSupported) {
+    hugetlb_size_ = huge_page_size;
+    if (hugetlb_size_ && kBlockSize > hugetlb_size_) {
+      hugetlb_size_ = ((kBlockSize - 1U) / hugetlb_size_ + 1U) * hugetlb_size_;
+    }
+  }
+  if (tracker_ != nullptr) {
+    tracker_->Allocate(kInlineSize);
+  }
+}
+
+Arena::~Arena() {
+  if (tracker_ != nullptr) {
+    assert(tracker_->is_freed());
+    tracker_->FreeMem();
+  }
+}
+
+char* Arena::AllocateFallback(size_t bytes, bool aligned) {
+  if (bytes > kBlockSize / 4) {
+    ++irregular_block_num;
+    // Object is more than a quarter of our block size.  Allocate it separately
+    // to avoid wasting too much space in leftover bytes.
+    return AllocateNewBlock(bytes);
+  }
+
+  // We waste the remaining space in the current block.
+  size_t size = 0;
+  char* block_head = nullptr;
+  if (MemMapping::kHugePageSupported && hugetlb_size_ > 0) {
+    size = hugetlb_size_;
+    block_head = AllocateFromHugePage(size);
+  }
+  if (!block_head) {
+    size = kBlockSize;
+    block_head = AllocateNewBlock(size);
+  }
+  alloc_bytes_remaining_ = size - bytes;
+
+  if (aligned) {
+    aligned_alloc_ptr_ = block_head + bytes;
+    unaligned_alloc_ptr_ = block_head + size;
+    return block_head;
+  } else {
+    aligned_alloc_ptr_ = block_head;
+    unaligned_alloc_ptr_ = block_head + size - bytes;
+    return unaligned_alloc_ptr_;
+  }
+}
+
+char* Arena::AllocateFromHugePage(size_t bytes) {
+  MemMapping mm = MemMapping::AllocateHuge(bytes);
+  auto addr = static_cast<char*>(mm.Get());
+  if (addr) {
+    huge_blocks_.push_back(std::move(mm));
+    blocks_memory_ += bytes;
+    if (tracker_ != nullptr) {
+      tracker_->Allocate(bytes);
+    }
+  }
+  return addr;
+}
+
+char* Arena::AllocateAligned(size_t bytes, size_t huge_page_size,
+                             Logger* logger) {
+  if (MemMapping::kHugePageSupported && hugetlb_size_ > 0 &&
+      huge_page_size > 0 && bytes > 0) {
+    // Allocate from a huge page TLB table.
+    size_t reserved_size =
+        ((bytes - 1U) / huge_page_size + 1U) * huge_page_size;
+    assert(reserved_size >= bytes);
+
+    char* addr = AllocateFromHugePage(reserved_size);
+    if (addr == nullptr) {
+      ROCKS_LOG_WARN(logger,
+                     "AllocateAligned fail to allocate huge TLB pages: %s",
+                     errnoStr(errno).c_str());
+      // fail back to malloc
+    } else {
+      return addr;
+    }
+  }
+
+  size_t current_mod =
+      reinterpret_cast<uintptr_t>(aligned_alloc_ptr_) & (kAlignUnit - 1);
+  size_t slop = (current_mod == 0 ? 0 : kAlignUnit - current_mod);
+  size_t needed = bytes + slop;
+  char* result;
+  if (needed <= alloc_bytes_remaining_) {
+    result = aligned_alloc_ptr_ + slop;
+    aligned_alloc_ptr_ += needed;
+    alloc_bytes_remaining_ -= needed;
+  } else {
+    // AllocateFallback always returns aligned memory
+    result = AllocateFallback(bytes, true /* aligned */);
+  }
+  assert((reinterpret_cast<uintptr_t>(result) & (kAlignUnit - 1)) == 0);
+  return result;
+}
+
+char* Arena::AllocateNewBlock(size_t block_bytes) {
+  // NOTE: std::make_unique zero-initializes the block so is not appropriate
+  // here
+  char* block = new char[block_bytes];
+  blocks_.push_back(std::unique_ptr<char[]>(block));
+
+  size_t allocated_size;
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+  allocated_size = malloc_usable_size(block);
+#ifndef NDEBUG
+  // It's hard to predict what malloc_usable_size() returns.
+  // A callback can allow users to change the costed size.
+  std::pair<size_t*, size_t*> pair(&allocated_size, &block_bytes);
+  TEST_SYNC_POINT_CALLBACK("Arena::AllocateNewBlock:0", &pair);
+#endif  // NDEBUG
+#else
+  allocated_size = block_bytes;
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+  blocks_memory_ += allocated_size;
+  if (tracker_ != nullptr) {
+    tracker_->Allocate(allocated_size);
+  }
+  return block;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/arena.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/arena.h
new file mode 100644
index 0000000000..39399aa71b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/arena.h
@@ -0,0 +1,135 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+// Arena is an implementation of Allocator class. For a request of small size,
+// it allocates a block with pre-defined block size. For a request of big
+// size, it uses malloc to directly get the requested size.
+
+#pragma once
+
+#include <cstddef>
+#include <deque>
+
+#include "memory/allocator.h"
+#include "port/mmap.h"
+#include "rocksdb/env.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Arena : public Allocator {
+ public:
+  // No copying allowed
+  Arena(const Arena&) = delete;
+  void operator=(const Arena&) = delete;
+
+  static constexpr size_t kInlineSize = 2048;
+  static constexpr size_t kMinBlockSize = 4096;
+  static constexpr size_t kMaxBlockSize = 2u << 30;
+
+  static constexpr unsigned kAlignUnit = alignof(std::max_align_t);
+  static_assert((kAlignUnit & (kAlignUnit - 1)) == 0,
+                "Pointer size should be power of 2");
+
+  // huge_page_size: if 0, don't use huge page TLB. If > 0 (should set to the
+  // supported hugepage size of the system), block allocation will try huge
+  // page TLB first. If allocation fails, will fall back to normal case.
+  explicit Arena(size_t block_size = kMinBlockSize,
+                 AllocTracker* tracker = nullptr, size_t huge_page_size = 0);
+  ~Arena();
+
+  char* Allocate(size_t bytes) override;
+
+  // huge_page_size: if >0, will try to allocate from huage page TLB.
+  // The argument will be the size of the page size for huge page TLB. Bytes
+  // will be rounded up to multiple of the page size to allocate through mmap
+  // anonymous option with huge page on. The extra  space allocated will be
+  // wasted. If allocation fails, will fall back to normal case. To enable it,
+  // need to reserve huge pages for it to be allocated, like:
+  //     sysctl -w vm.nr_hugepages=20
+  // See linux doc Documentation/vm/hugetlbpage.txt for details.
+  // huge page allocation can fail. In this case it will fail back to
+  // normal cases. The messages will be logged to logger. So when calling with
+  // huge_page_tlb_size > 0, we highly recommend a logger is passed in.
+  // Otherwise, the error message will be printed out to stderr directly.
+  char* AllocateAligned(size_t bytes, size_t huge_page_size = 0,
+                        Logger* logger = nullptr) override;
+
+  // Returns an estimate of the total memory usage of data allocated
+  // by the arena (exclude the space allocated but not yet used for future
+  // allocations).
+  size_t ApproximateMemoryUsage() const {
+    return blocks_memory_ + blocks_.size() * sizeof(char*) -
+           alloc_bytes_remaining_;
+  }
+
+  size_t MemoryAllocatedBytes() const { return blocks_memory_; }
+
+  size_t AllocatedAndUnused() const { return alloc_bytes_remaining_; }
+
+  // If an allocation is too big, we'll allocate an irregular block with the
+  // same size of that allocation.
+  size_t IrregularBlockNum() const { return irregular_block_num; }
+
+  size_t BlockSize() const override { return kBlockSize; }
+
+  bool IsInInlineBlock() const {
+    return blocks_.empty() && huge_blocks_.empty();
+  }
+
+  // check and adjust the block_size so that the return value is
+  //  1. in the range of [kMinBlockSize, kMaxBlockSize].
+  //  2. the multiple of align unit.
+  static size_t OptimizeBlockSize(size_t block_size);
+
+ private:
+  alignas(std::max_align_t) char inline_block_[kInlineSize];
+  // Number of bytes allocated in one block
+  const size_t kBlockSize;
+  // Allocated memory blocks
+  std::deque<std::unique_ptr<char[]>> blocks_;
+  // Huge page allocations
+  std::deque<MemMapping> huge_blocks_;
+  size_t irregular_block_num = 0;
+
+  // Stats for current active block.
+  // For each block, we allocate aligned memory chucks from one end and
+  // allocate unaligned memory chucks from the other end. Otherwise the
+  // memory waste for alignment will be higher if we allocate both types of
+  // memory from one direction.
+  char* unaligned_alloc_ptr_ = nullptr;
+  char* aligned_alloc_ptr_ = nullptr;
+  // How many bytes left in currently active block?
+  size_t alloc_bytes_remaining_ = 0;
+
+  size_t hugetlb_size_ = 0;
+
+  char* AllocateFromHugePage(size_t bytes);
+  char* AllocateFallback(size_t bytes, bool aligned);
+  char* AllocateNewBlock(size_t block_bytes);
+
+  // Bytes of memory in blocks allocated so far
+  size_t blocks_memory_ = 0;
+  // Non-owned
+  AllocTracker* tracker_;
+};
+
+inline char* Arena::Allocate(size_t bytes) {
+  // The semantics of what to return are a bit messy if we allow
+  // 0-byte allocations, so we disallow them here (we don't need
+  // them for our internal use).
+  assert(bytes > 0);
+  if (bytes <= alloc_bytes_remaining_) {
+    unaligned_alloc_ptr_ -= bytes;
+    alloc_bytes_remaining_ -= bytes;
+    return unaligned_alloc_ptr_;
+  }
+  return AllocateFallback(bytes, false /* unaligned */);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/concurrent_arena.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/concurrent_arena.cc
new file mode 100644
index 0000000000..1619bd93b0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/concurrent_arena.cc
@@ -0,0 +1,45 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "memory/concurrent_arena.h"
+
+#include <thread>
+
+#include "port/port.h"
+#include "util/random.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+thread_local size_t ConcurrentArena::tls_cpuid = 0;
+
+namespace {
+// If the shard block size is too large, in the worst case, every core
+// allocates a block without populate it. If the shared block size is
+// 1MB, 64 cores will quickly allocate 64MB, and may quickly trigger a
+// flush. Cap the size instead.
+const size_t kMaxShardBlockSize = size_t{128 * 1024};
+}  // namespace
+
+ConcurrentArena::ConcurrentArena(size_t block_size, AllocTracker* tracker,
+                                 size_t huge_page_size)
+    : shard_block_size_(std::min(kMaxShardBlockSize, block_size / 8)),
+      shards_(),
+      arena_(block_size, tracker, huge_page_size) {
+  Fixup();
+}
+
+ConcurrentArena::Shard* ConcurrentArena::Repick() {
+  auto shard_and_index = shards_.AccessElementAndIndex();
+  // even if we are cpu 0, use a non-zero tls_cpuid so we can tell we
+  // have repicked
+  tls_cpuid = shard_and_index.second | shards_.Size();
+  return shard_and_index.first;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/concurrent_arena.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/concurrent_arena.h
new file mode 100644
index 0000000000..f14507d302
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/concurrent_arena.h
@@ -0,0 +1,215 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <atomic>
+#include <memory>
+#include <utility>
+
+#include "memory/allocator.h"
+#include "memory/arena.h"
+#include "port/lang.h"
+#include "port/likely.h"
+#include "util/core_local.h"
+#include "util/mutexlock.h"
+#include "util/thread_local.h"
+
+// Only generate field unused warning for padding array, or build under
+// GCC 4.8.1 will fail.
+#ifdef __clang__
+#define ROCKSDB_FIELD_UNUSED __attribute__((__unused__))
+#else
+#define ROCKSDB_FIELD_UNUSED
+#endif  // __clang__
+
+namespace ROCKSDB_NAMESPACE {
+
+class Logger;
+
+// ConcurrentArena wraps an Arena.  It makes it thread safe using a fast
+// inlined spinlock, and adds small per-core allocation caches to avoid
+// contention for small allocations.  To avoid any memory waste from the
+// per-core shards, they are kept small, they are lazily instantiated
+// only if ConcurrentArena actually notices concurrent use, and they
+// adjust their size so that there is no fragmentation waste when the
+// shard blocks are allocated from the underlying main arena.
+class ConcurrentArena : public Allocator {
+ public:
+  // block_size and huge_page_size are the same as for Arena (and are
+  // in fact just passed to the constructor of arena_.  The core-local
+  // shards compute their shard_block_size as a fraction of block_size
+  // that varies according to the hardware concurrency level.
+  explicit ConcurrentArena(size_t block_size = Arena::kMinBlockSize,
+                           AllocTracker* tracker = nullptr,
+                           size_t huge_page_size = 0);
+
+  char* Allocate(size_t bytes) override {
+    return AllocateImpl(bytes, false /*force_arena*/,
+                        [this, bytes]() { return arena_.Allocate(bytes); });
+  }
+
+  char* AllocateAligned(size_t bytes, size_t huge_page_size = 0,
+                        Logger* logger = nullptr) override {
+    size_t rounded_up = ((bytes - 1) | (sizeof(void*) - 1)) + 1;
+    assert(rounded_up >= bytes && rounded_up < bytes + sizeof(void*) &&
+           (rounded_up % sizeof(void*)) == 0);
+
+    return AllocateImpl(rounded_up, huge_page_size != 0 /*force_arena*/,
+                        [this, rounded_up, huge_page_size, logger]() {
+                          return arena_.AllocateAligned(rounded_up,
+                                                        huge_page_size, logger);
+                        });
+  }
+
+  size_t ApproximateMemoryUsage() const {
+    std::unique_lock<SpinMutex> lock(arena_mutex_, std::defer_lock);
+    lock.lock();
+    return arena_.ApproximateMemoryUsage() - ShardAllocatedAndUnused();
+  }
+
+  size_t MemoryAllocatedBytes() const {
+    return memory_allocated_bytes_.load(std::memory_order_relaxed);
+  }
+
+  size_t AllocatedAndUnused() const {
+    return arena_allocated_and_unused_.load(std::memory_order_relaxed) +
+           ShardAllocatedAndUnused();
+  }
+
+  size_t IrregularBlockNum() const {
+    return irregular_block_num_.load(std::memory_order_relaxed);
+  }
+
+  size_t BlockSize() const override { return arena_.BlockSize(); }
+
+ private:
+  struct Shard {
+    char padding[40] ROCKSDB_FIELD_UNUSED;
+    mutable SpinMutex mutex;
+    char* free_begin_;
+    std::atomic<size_t> allocated_and_unused_;
+
+    Shard() : free_begin_(nullptr), allocated_and_unused_(0) {}
+  };
+
+  static thread_local size_t tls_cpuid;
+
+  char padding0[56] ROCKSDB_FIELD_UNUSED;
+
+  size_t shard_block_size_;
+
+  CoreLocalArray<Shard> shards_;
+
+  Arena arena_;
+  mutable SpinMutex arena_mutex_;
+  std::atomic<size_t> arena_allocated_and_unused_;
+  std::atomic<size_t> memory_allocated_bytes_;
+  std::atomic<size_t> irregular_block_num_;
+
+  char padding1[56] ROCKSDB_FIELD_UNUSED;
+
+  Shard* Repick();
+
+  size_t ShardAllocatedAndUnused() const {
+    size_t total = 0;
+    for (size_t i = 0; i < shards_.Size(); ++i) {
+      total += shards_.AccessAtCore(i)->allocated_and_unused_.load(
+          std::memory_order_relaxed);
+    }
+    return total;
+  }
+
+  template <typename Func>
+  char* AllocateImpl(size_t bytes, bool force_arena, const Func& func) {
+    size_t cpu;
+
+    // Go directly to the arena if the allocation is too large, or if
+    // we've never needed to Repick() and the arena mutex is available
+    // with no waiting.  This keeps the fragmentation penalty of
+    // concurrency zero unless it might actually confer an advantage.
+    std::unique_lock<SpinMutex> arena_lock(arena_mutex_, std::defer_lock);
+    if (bytes > shard_block_size_ / 4 || force_arena ||
+        ((cpu = tls_cpuid) == 0 &&
+         !shards_.AccessAtCore(0)->allocated_and_unused_.load(
+             std::memory_order_relaxed) &&
+         arena_lock.try_lock())) {
+      if (!arena_lock.owns_lock()) {
+        arena_lock.lock();
+      }
+      auto rv = func();
+      Fixup();
+      return rv;
+    }
+
+    // pick a shard from which to allocate
+    Shard* s = shards_.AccessAtCore(cpu & (shards_.Size() - 1));
+    if (!s->mutex.try_lock()) {
+      s = Repick();
+      s->mutex.lock();
+    }
+    std::unique_lock<SpinMutex> lock(s->mutex, std::adopt_lock);
+
+    size_t avail = s->allocated_and_unused_.load(std::memory_order_relaxed);
+    if (avail < bytes) {
+      // reload
+      std::lock_guard<SpinMutex> reload_lock(arena_mutex_);
+
+      // If the arena's current block is within a factor of 2 of the right
+      // size, we adjust our request to avoid arena waste.
+      auto exact = arena_allocated_and_unused_.load(std::memory_order_relaxed);
+      assert(exact == arena_.AllocatedAndUnused());
+
+      if (exact >= bytes && arena_.IsInInlineBlock()) {
+        // If we haven't exhausted arena's inline block yet, allocate from arena
+        // directly. This ensures that we'll do the first few small allocations
+        // without allocating any blocks.
+        // In particular this prevents empty memtables from using
+        // disproportionately large amount of memory: a memtable allocates on
+        // the order of 1 KB of memory when created; we wouldn't want to
+        // allocate a full arena block (typically a few megabytes) for that,
+        // especially if there are thousands of empty memtables.
+        auto rv = func();
+        Fixup();
+        return rv;
+      }
+
+      avail = exact >= shard_block_size_ / 2 && exact < shard_block_size_ * 2
+                  ? exact
+                  : shard_block_size_;
+      s->free_begin_ = arena_.AllocateAligned(avail);
+      Fixup();
+    }
+    s->allocated_and_unused_.store(avail - bytes, std::memory_order_relaxed);
+
+    char* rv;
+    if ((bytes % sizeof(void*)) == 0) {
+      // aligned allocation from the beginning
+      rv = s->free_begin_;
+      s->free_begin_ += bytes;
+    } else {
+      // unaligned from the end
+      rv = s->free_begin_ + avail - bytes;
+    }
+    return rv;
+  }
+
+  void Fixup() {
+    arena_allocated_and_unused_.store(arena_.AllocatedAndUnused(),
+                                      std::memory_order_relaxed);
+    memory_allocated_bytes_.store(arena_.MemoryAllocatedBytes(),
+                                  std::memory_order_relaxed);
+    irregular_block_num_.store(arena_.IrregularBlockNum(),
+                               std::memory_order_relaxed);
+  }
+
+  ConcurrentArena(const ConcurrentArena&) = delete;
+  ConcurrentArena& operator=(const ConcurrentArena&) = delete;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/jemalloc_nodump_allocator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/jemalloc_nodump_allocator.cc
new file mode 100644
index 0000000000..d05248224d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/jemalloc_nodump_allocator.cc
@@ -0,0 +1,303 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "memory/jemalloc_nodump_allocator.h"
+
+#include <string>
+#include <thread>
+
+#include "port/likely.h"
+#include "port/port.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/fastrange.h"
+#include "util/random.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+std::atomic<extent_alloc_t*> JemallocNodumpAllocator::original_alloc_{nullptr};
+#endif  // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+
+static std::unordered_map<std::string, OptionTypeInfo> jemalloc_type_info = {
+    {"limit_tcache_size",
+     {offsetof(struct JemallocAllocatorOptions, limit_tcache_size),
+      OptionType::kBoolean, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"tcache_size_lower_bound",
+     {offsetof(struct JemallocAllocatorOptions, tcache_size_lower_bound),
+      OptionType::kSizeT, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"tcache_size_upper_bound",
+     {offsetof(struct JemallocAllocatorOptions, tcache_size_upper_bound),
+      OptionType::kSizeT, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"num_arenas",
+     {offsetof(struct JemallocAllocatorOptions, num_arenas), OptionType::kSizeT,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+};
+bool JemallocNodumpAllocator::IsSupported(std::string* why) {
+#ifndef ROCKSDB_JEMALLOC
+  *why = "Not compiled with ROCKSDB_JEMALLOC";
+  return false;
+#else
+  static const std::string unsupported =
+      "JemallocNodumpAllocator only available with jemalloc version >= 5 "
+      "and MADV_DONTDUMP is available.";
+  if (!HasJemalloc()) {
+    *why = unsupported;
+    return false;
+  }
+#ifndef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+  *why = unsupported;
+  return false;
+#else
+  return true;
+#endif  // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+#endif  // ROCKSDB_MALLOC
+}
+
+JemallocNodumpAllocator::JemallocNodumpAllocator(
+    JemallocAllocatorOptions& options)
+    : options_(options)
+#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+      ,
+      tcache_(&JemallocNodumpAllocator::DestroyThreadSpecificCache) {
+#else   // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+{
+#endif  // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+  RegisterOptions(&options_, &jemalloc_type_info);
+}
+
+#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+JemallocNodumpAllocator::~JemallocNodumpAllocator() {
+  // Destroy tcache before destroying arena.
+  autovector<void*> tcache_list;
+  tcache_.Scrape(&tcache_list, nullptr);
+  for (void* tcache_index : tcache_list) {
+    DestroyThreadSpecificCache(tcache_index);
+  }
+  for (auto arena_index : arena_indexes_) {
+    // Destroy arena. Silently ignore error.
+    Status s = DestroyArena(arena_index);
+    assert(s.ok());
+    s.PermitUncheckedError();
+  }
+}
+
+size_t JemallocNodumpAllocator::UsableSize(void* p,
+                                           size_t /*allocation_size*/) const {
+  return malloc_usable_size(static_cast<void*>(p));
+}
+
+void* JemallocNodumpAllocator::Allocate(size_t size) {
+  int tcache_flag = GetThreadSpecificCache(size);
+  uint32_t arena_index = GetArenaIndex();
+  return mallocx(size, MALLOCX_ARENA(arena_index) | tcache_flag);
+}
+
+void JemallocNodumpAllocator::Deallocate(void* p) {
+  // Obtain tcache.
+  size_t size = 0;
+  if (options_.limit_tcache_size) {
+    size = malloc_usable_size(p);
+  }
+  int tcache_flag = GetThreadSpecificCache(size);
+  // No need to pass arena index to dallocx(). Jemalloc will find arena index
+  // from its own metadata.
+  dallocx(p, tcache_flag);
+}
+
+uint32_t JemallocNodumpAllocator::GetArenaIndex() const {
+  if (arena_indexes_.size() == 1) {
+    return arena_indexes_[0];
+  }
+
+  static std::atomic<uint32_t> next_seed = 0;
+  // Core-local may work in place of `thread_local` as we should be able to
+  // tolerate occasional stale reads in thread migration cases. However we need
+  // to make Random thread-safe and prevent cacheline bouncing. Whether this is
+  // worthwhile is still an open question.
+  thread_local Random tl_random(next_seed.fetch_add(1));
+  return arena_indexes_[FastRange32(tl_random.Next(), arena_indexes_.size())];
+}
+
+Status JemallocNodumpAllocator::InitializeArenas() {
+  assert(!init_);
+  init_ = true;
+
+  for (size_t i = 0; i < options_.num_arenas; i++) {
+    // Create arena.
+    unsigned arena_index;
+    size_t arena_index_size = sizeof(arena_index);
+    int ret =
+        mallctl("arenas.create", &arena_index, &arena_index_size, nullptr, 0);
+    if (ret != 0) {
+      return Status::Incomplete(
+          "Failed to create jemalloc arena, error code: " +
+          std::to_string(ret));
+    }
+    arena_indexes_.push_back(arena_index);
+
+    // Read existing hooks.
+    std::string key =
+        "arena." + std::to_string(arena_indexes_[i]) + ".extent_hooks";
+    extent_hooks_t* hooks;
+    size_t hooks_size = sizeof(hooks);
+    ret = mallctl(key.c_str(), &hooks, &hooks_size, nullptr, 0);
+    if (ret != 0) {
+      return Status::Incomplete("Failed to read existing hooks, error code: " +
+                                std::to_string(ret));
+    }
+
+    // Store existing alloc.
+    extent_alloc_t* original_alloc = hooks->alloc;
+    extent_alloc_t* expected = nullptr;
+    bool success =
+        JemallocNodumpAllocator::original_alloc_.compare_exchange_strong(
+            expected, original_alloc);
+    if (!success && original_alloc != expected) {
+      // This could happen if jemalloc creates new arenas with different initial
+      // values in their `alloc` function pointers. See `original_alloc_` API
+      // doc for more details.
+      return Status::Incomplete("Original alloc conflict.");
+    }
+
+    // Set the custom hook.
+    per_arena_hooks_.emplace_back();
+    per_arena_hooks_.back().reset(new extent_hooks_t(*hooks));
+    per_arena_hooks_.back()->alloc = &JemallocNodumpAllocator::Alloc;
+    extent_hooks_t* hooks_ptr = per_arena_hooks_.back().get();
+    ret = mallctl(key.c_str(), nullptr, nullptr, &hooks_ptr, sizeof(hooks_ptr));
+    if (ret != 0) {
+      return Status::Incomplete("Failed to set custom hook, error code: " +
+                                std::to_string(ret));
+    }
+  }
+  return Status::OK();
+}
+
+#endif  // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+
+Status JemallocNodumpAllocator::PrepareOptions(
+    const ConfigOptions& config_options) {
+  std::string message;
+
+  if (!IsSupported(&message)) {
+    return Status::NotSupported(message);
+  } else if (options_.limit_tcache_size &&
+             options_.tcache_size_lower_bound >=
+                 options_.tcache_size_upper_bound) {
+    return Status::InvalidArgument(
+        "tcache_size_lower_bound larger or equal to tcache_size_upper_bound.");
+  } else if (options_.num_arenas < 1) {
+    return Status::InvalidArgument("num_arenas must be a positive integer");
+  } else if (IsMutable()) {
+    Status s = MemoryAllocator::PrepareOptions(config_options);
+#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+    if (s.ok()) {
+      s = InitializeArenas();
+    }
+#endif  // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+    return s;
+  } else {
+    // Already prepared
+    return Status::OK();
+  }
+}
+
+#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+int JemallocNodumpAllocator::GetThreadSpecificCache(size_t size) {
+  // We always enable tcache. The only corner case is when there are a ton of
+  // threads accessing with low frequency, then it could consume a lot of
+  // memory (may reach # threads * ~1MB) without bringing too much benefit.
+  if (options_.limit_tcache_size && (size <= options_.tcache_size_lower_bound ||
+                                     size > options_.tcache_size_upper_bound)) {
+    return MALLOCX_TCACHE_NONE;
+  }
+  unsigned* tcache_index = reinterpret_cast<unsigned*>(tcache_.Get());
+  if (UNLIKELY(tcache_index == nullptr)) {
+    // Instantiate tcache.
+    tcache_index = new unsigned(0);
+    size_t tcache_index_size = sizeof(unsigned);
+    int ret =
+        mallctl("tcache.create", tcache_index, &tcache_index_size, nullptr, 0);
+    if (ret != 0) {
+      // No good way to expose the error. Silently disable tcache.
+      delete tcache_index;
+      return MALLOCX_TCACHE_NONE;
+    }
+    tcache_.Reset(static_cast<void*>(tcache_index));
+  }
+  return MALLOCX_TCACHE(*tcache_index);
+}
+void* JemallocNodumpAllocator::Alloc(extent_hooks_t* extent, void* new_addr,
+                                     size_t size, size_t alignment, bool* zero,
+                                     bool* commit, unsigned arena_ind) {
+  extent_alloc_t* original_alloc =
+      original_alloc_.load(std::memory_order_relaxed);
+  assert(original_alloc != nullptr);
+  void* result = original_alloc(extent, new_addr, size, alignment, zero, commit,
+                                arena_ind);
+  if (result != nullptr) {
+    int ret = madvise(result, size, MADV_DONTDUMP);
+    if (ret != 0) {
+      fprintf(
+          stderr,
+          "JemallocNodumpAllocator failed to set MADV_DONTDUMP, error code: %d",
+          ret);
+      assert(false);
+    }
+  }
+  return result;
+}
+
+Status JemallocNodumpAllocator::DestroyArena(uint32_t arena_index) {
+  assert(arena_index != 0);
+  std::string key = "arena." + std::to_string(arena_index) + ".destroy";
+  int ret = mallctl(key.c_str(), nullptr, 0, nullptr, 0);
+  if (ret != 0) {
+    return Status::Incomplete("Failed to destroy jemalloc arena, error code: " +
+                              std::to_string(ret));
+  }
+  return Status::OK();
+}
+
+void JemallocNodumpAllocator::DestroyThreadSpecificCache(void* ptr) {
+  assert(ptr != nullptr);
+  unsigned* tcache_index = static_cast<unsigned*>(ptr);
+  size_t tcache_index_size = sizeof(unsigned);
+  int ret __attribute__((__unused__)) =
+      mallctl("tcache.destroy", nullptr, 0, tcache_index, tcache_index_size);
+  // Silently ignore error.
+  assert(ret == 0);
+  delete tcache_index;
+}
+
+#endif  // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+
+Status NewJemallocNodumpAllocator(
+    JemallocAllocatorOptions& options,
+    std::shared_ptr<MemoryAllocator>* memory_allocator) {
+  if (memory_allocator == nullptr) {
+    return Status::InvalidArgument("memory_allocator must be non-null.");
+  }
+#ifndef ROCKSDB_JEMALLOC
+  (void)options;
+  return Status::NotSupported("Not compiled with JEMALLOC");
+#else
+  std::unique_ptr<MemoryAllocator> allocator(
+      new JemallocNodumpAllocator(options));
+  Status s = allocator->PrepareOptions(ConfigOptions());
+  if (s.ok()) {
+    memory_allocator->reset(allocator.release());
+  }
+  return s;
+#endif
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/jemalloc_nodump_allocator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/jemalloc_nodump_allocator.h
new file mode 100644
index 0000000000..2bdbaeb328
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/jemalloc_nodump_allocator.h
@@ -0,0 +1,99 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <vector>
+
+#include "port/jemalloc_helper.h"
+#include "port/port.h"
+#include "rocksdb/memory_allocator.h"
+#include "util/thread_local.h"
+#include "utilities/memory_allocators.h"
+
+#if defined(ROCKSDB_JEMALLOC) && defined(ROCKSDB_PLATFORM_POSIX)
+
+#include <sys/mman.h>
+
+#if (JEMALLOC_VERSION_MAJOR >= 5) && defined(MADV_DONTDUMP)
+#define ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+#endif  // (JEMALLOC_VERSION_MAJOR >= 5) && MADV_DONTDUMP
+#endif  // ROCKSDB_JEMALLOC && ROCKSDB_PLATFORM_POSIX
+
+namespace ROCKSDB_NAMESPACE {
+
+// Allocation requests are randomly sharded across
+// `JemallocAllocatorOptions::num_arenas` arenas to reduce contention on per-
+// arena mutexes.
+class JemallocNodumpAllocator : public BaseMemoryAllocator {
+ public:
+  explicit JemallocNodumpAllocator(JemallocAllocatorOptions& options);
+#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+  ~JemallocNodumpAllocator();
+#endif  // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+
+  static const char* kClassName() { return "JemallocNodumpAllocator"; }
+  const char* Name() const override { return kClassName(); }
+  static bool IsSupported() {
+    std::string unused;
+    return IsSupported(&unused);
+  }
+  static bool IsSupported(std::string* why);
+  bool IsMutable() const { return !init_; }
+
+  Status PrepareOptions(const ConfigOptions& config_options) override;
+
+#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+  void* Allocate(size_t size) override;
+  void Deallocate(void* p) override;
+  size_t UsableSize(void* p, size_t allocation_size) const override;
+#endif  // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+
+ private:
+#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+  Status InitializeArenas();
+
+  uint32_t GetArenaIndex() const;
+
+  // Custom alloc hook to replace jemalloc default alloc.
+  static void* Alloc(extent_hooks_t* extent, void* new_addr, size_t size,
+                     size_t alignment, bool* zero, bool* commit,
+                     unsigned arena_ind);
+
+  // Destroy arena on destruction of the allocator, or on failure.
+  static Status DestroyArena(uint32_t arena_index);
+
+  // Destroy tcache on destruction of the allocator, or thread exit.
+  static void DestroyThreadSpecificCache(void* ptr);
+
+  // Get or create tcache. Return flag suitable to use with `mallocx`:
+  // either MALLOCX_TCACHE_NONE or MALLOCX_TCACHE(tc).
+  int GetThreadSpecificCache(size_t size);
+#endif  // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+  JemallocAllocatorOptions options_;
+
+#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+  // A function pointer to jemalloc default alloc. Use atomic to make sure
+  // NewJemallocNodumpAllocator is thread-safe.
+  //
+  // Hack: original_alloc_ needs to be static for Alloc() to access it.
+  // alloc needs to be static to pass to jemalloc as function pointer. We can
+  // use a single process-wide value as long as we assume that any newly created
+  // arena has the same original value in its `alloc` function pointer.
+  static std::atomic<extent_alloc_t*> original_alloc_;
+
+  // Custom hooks has to outlive corresponding arena.
+  std::vector<std::unique_ptr<extent_hooks_t>> per_arena_hooks_;
+
+  // Hold thread-local tcache index.
+  ThreadLocalPtr tcache_;
+
+  std::vector<uint32_t> arena_indexes_;
+#endif  // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
+
+  bool init_ = false;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memkind_kmem_allocator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memkind_kmem_allocator.cc
new file mode 100644
index 0000000000..635c2210e7
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memkind_kmem_allocator.cc
@@ -0,0 +1,44 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  Copyright (c) 2019 Intel Corporation
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#ifdef MEMKIND
+#include <memkind.h>
+#endif  // MEMKIND
+
+#include "memory/memkind_kmem_allocator.h"
+
+namespace ROCKSDB_NAMESPACE {
+Status MemkindKmemAllocator::PrepareOptions(const ConfigOptions& options) {
+  std::string message;
+  if (!IsSupported(&message)) {
+    return Status::NotSupported(message);
+  } else {
+    return MemoryAllocator::PrepareOptions(options);
+  }
+}
+
+#ifdef MEMKIND
+void* MemkindKmemAllocator::Allocate(size_t size) {
+  void* p = memkind_malloc(MEMKIND_DAX_KMEM, size);
+  if (p == NULL) {
+    throw std::bad_alloc();
+  }
+  return p;
+}
+
+void MemkindKmemAllocator::Deallocate(void* p) {
+  memkind_free(MEMKIND_DAX_KMEM, p);
+}
+
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+size_t MemkindKmemAllocator::UsableSize(void* p,
+                                        size_t /*allocation_size*/) const {
+  return memkind_malloc_usable_size(MEMKIND_DAX_KMEM, p);
+}
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+#endif  // MEMKIND
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memkind_kmem_allocator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memkind_kmem_allocator.h
new file mode 100644
index 0000000000..7176f17e33
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memkind_kmem_allocator.h
@@ -0,0 +1,43 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  Copyright (c) 2019 Intel Corporation
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/memory_allocator.h"
+#include "utilities/memory_allocators.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class MemkindKmemAllocator : public BaseMemoryAllocator {
+ public:
+  static const char* kClassName() { return "MemkindKmemAllocator"; }
+  const char* Name() const override { return kClassName(); }
+  static bool IsSupported() {
+    std::string unused;
+    return IsSupported(&unused);
+  }
+
+  static bool IsSupported(std::string* msg) {
+#ifdef MEMKIND
+    (void)msg;
+    return true;
+#else
+    *msg = "Not compiled with MemKind";
+    return false;
+#endif
+  }
+  Status PrepareOptions(const ConfigOptions& options) override;
+
+#ifdef MEMKIND
+  void* Allocate(size_t size) override;
+  void Deallocate(void* p) override;
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+  size_t UsableSize(void* p, size_t /*allocation_size*/) const override;
+#endif
+#endif  // MEMKIND
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memory_allocator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memory_allocator.cc
new file mode 100644
index 0000000000..d0de26b94d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memory_allocator.cc
@@ -0,0 +1,80 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/memory_allocator.h"
+
+#include "memory/jemalloc_nodump_allocator.h"
+#include "memory/memkind_kmem_allocator.h"
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "rocksdb/utilities/options_type.h"
+#include "utilities/memory_allocators.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+static std::unordered_map<std::string, OptionTypeInfo> ma_wrapper_type_info = {
+    {"target", OptionTypeInfo::AsCustomSharedPtr<MemoryAllocator>(
+                   0, OptionVerificationType::kByName, OptionTypeFlags::kNone)},
+};
+
+static int RegisterBuiltinAllocators(ObjectLibrary& library,
+                                     const std::string& /*arg*/) {
+  library.AddFactory<MemoryAllocator>(
+      DefaultMemoryAllocator::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<MemoryAllocator>* guard,
+         std::string* /*errmsg*/) {
+        guard->reset(new DefaultMemoryAllocator());
+        return guard->get();
+      });
+  library.AddFactory<MemoryAllocator>(
+      CountedMemoryAllocator::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<MemoryAllocator>* guard,
+         std::string* /*errmsg*/) {
+        guard->reset(new CountedMemoryAllocator(
+            std::make_shared<DefaultMemoryAllocator>()));
+        return guard->get();
+      });
+  library.AddFactory<MemoryAllocator>(
+      JemallocNodumpAllocator::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<MemoryAllocator>* guard,
+         std::string* errmsg) {
+        if (JemallocNodumpAllocator::IsSupported(errmsg)) {
+          JemallocAllocatorOptions options;
+          guard->reset(new JemallocNodumpAllocator(options));
+        }
+        return guard->get();
+      });
+  library.AddFactory<MemoryAllocator>(
+      MemkindKmemAllocator::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<MemoryAllocator>* guard,
+         std::string* errmsg) {
+        if (MemkindKmemAllocator::IsSupported(errmsg)) {
+          guard->reset(new MemkindKmemAllocator());
+        }
+        return guard->get();
+      });
+  size_t num_types;
+  return static_cast<int>(library.GetFactoryCount(&num_types));
+}
+}  // namespace
+
+MemoryAllocatorWrapper::MemoryAllocatorWrapper(
+    const std::shared_ptr<MemoryAllocator>& t)
+    : target_(t) {
+  RegisterOptions("", &target_, &ma_wrapper_type_info);
+}
+
+Status MemoryAllocator::CreateFromString(
+    const ConfigOptions& options, const std::string& value,
+    std::shared_ptr<MemoryAllocator>* result) {
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    RegisterBuiltinAllocators(*(ObjectLibrary::Default().get()), "");
+  });
+  ConfigOptions copy = options;
+  copy.invoke_prepare_options = true;
+  return LoadManagedObject<MemoryAllocator>(copy, value, result);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memory_allocator_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memory_allocator_impl.h
new file mode 100644
index 0000000000..68aa35beb8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memory_allocator_impl.h
@@ -0,0 +1,47 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+
+#include <algorithm>
+
+#include "rocksdb/memory_allocator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct CustomDeleter {
+  CustomDeleter(MemoryAllocator* a = nullptr) : allocator(a) {}
+
+  void operator()(char* ptr) const {
+    if (allocator) {
+      allocator->Deallocate(reinterpret_cast<void*>(ptr));
+    } else {
+      delete[] ptr;
+    }
+  }
+
+  MemoryAllocator* allocator;
+};
+
+using CacheAllocationPtr = std::unique_ptr<char[], CustomDeleter>;
+
+inline CacheAllocationPtr AllocateBlock(size_t size,
+                                        MemoryAllocator* allocator) {
+  if (allocator) {
+    auto block = reinterpret_cast<char*>(allocator->Allocate(size));
+    return CacheAllocationPtr(block, allocator);
+  }
+  return CacheAllocationPtr(new char[size]);
+}
+
+inline CacheAllocationPtr AllocateAndCopyBlock(const Slice& data,
+                                               MemoryAllocator* allocator) {
+  CacheAllocationPtr cap = AllocateBlock(data.size(), allocator);
+  std::copy_n(data.data(), data.size(), cap.get());
+  return cap;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memory_usage.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memory_usage.h
new file mode 100644
index 0000000000..76b9bd1305
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memory/memory_usage.h
@@ -0,0 +1,38 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstddef>
+#include <unordered_map>
+#ifdef USE_FOLLY
+#include <folly/container/F14Map.h>
+#endif
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Helper methods to estimate memroy usage by std containers.
+
+template <class Key, class Value, class Hash>
+size_t ApproximateMemoryUsage(
+    const std::unordered_map<Key, Value, Hash>& umap) {
+  using Map = std::unordered_map<Key, Value, Hash>;
+  return sizeof(umap) +
+         // Size of all items plus a next pointer for each item.
+         (sizeof(typename Map::value_type) + sizeof(void*)) * umap.size() +
+         // Size of hash buckets.
+         umap.bucket_count() * sizeof(void*);
+}
+
+#ifdef USE_FOLLY
+template <class Key, class Value, class Hash>
+size_t ApproximateMemoryUsage(const folly::F14FastMap<Key, Value, Hash>& umap) {
+  return sizeof(umap) + umap.getAllocatedMemorySize();
+}
+#endif
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/alloc_tracker.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/alloc_tracker.cc
new file mode 100644
index 0000000000..4c6d354319
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/alloc_tracker.cc
@@ -0,0 +1,63 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <assert.h>
+
+#include "memory/allocator.h"
+#include "memory/arena.h"
+#include "rocksdb/write_buffer_manager.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+AllocTracker::AllocTracker(WriteBufferManager* write_buffer_manager)
+    : write_buffer_manager_(write_buffer_manager),
+      bytes_allocated_(0),
+      done_allocating_(false),
+      freed_(false) {}
+
+AllocTracker::~AllocTracker() { FreeMem(); }
+
+void AllocTracker::Allocate(size_t bytes) {
+  assert(write_buffer_manager_ != nullptr);
+  if (write_buffer_manager_->enabled() ||
+      write_buffer_manager_->cost_to_cache()) {
+    bytes_allocated_.fetch_add(bytes, std::memory_order_relaxed);
+    write_buffer_manager_->ReserveMem(bytes);
+  }
+}
+
+void AllocTracker::DoneAllocating() {
+  if (write_buffer_manager_ != nullptr && !done_allocating_) {
+    if (write_buffer_manager_->enabled() ||
+        write_buffer_manager_->cost_to_cache()) {
+      write_buffer_manager_->ScheduleFreeMem(
+          bytes_allocated_.load(std::memory_order_relaxed));
+    } else {
+      assert(bytes_allocated_.load(std::memory_order_relaxed) == 0);
+    }
+    done_allocating_ = true;
+  }
+}
+
+void AllocTracker::FreeMem() {
+  if (!done_allocating_) {
+    DoneAllocating();
+  }
+  if (write_buffer_manager_ != nullptr && !freed_) {
+    if (write_buffer_manager_->enabled() ||
+        write_buffer_manager_->cost_to_cache()) {
+      write_buffer_manager_->FreeMem(
+          bytes_allocated_.load(std::memory_order_relaxed));
+    } else {
+      assert(bytes_allocated_.load(std::memory_order_relaxed) == 0);
+    }
+    freed_ = true;
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/hash_linklist_rep.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/hash_linklist_rep.cc
new file mode 100644
index 0000000000..9e60f9be37
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/hash_linklist_rep.cc
@@ -0,0 +1,924 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+
+#include <algorithm>
+#include <atomic>
+
+#include "db/memtable.h"
+#include "memory/arena.h"
+#include "memtable/skiplist.h"
+#include "monitoring/histogram.h"
+#include "port/port.h"
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+
+using Key = const char*;
+using MemtableSkipList = SkipList<Key, const MemTableRep::KeyComparator&>;
+using Pointer = std::atomic<void*>;
+
+// A data structure used as the header of a link list of a hash bucket.
+struct BucketHeader {
+  Pointer next;
+  std::atomic<uint32_t> num_entries;
+
+  explicit BucketHeader(void* n, uint32_t count)
+      : next(n), num_entries(count) {}
+
+  bool IsSkipListBucket() {
+    return next.load(std::memory_order_relaxed) == this;
+  }
+
+  uint32_t GetNumEntries() const {
+    return num_entries.load(std::memory_order_relaxed);
+  }
+
+  // REQUIRES: called from single-threaded Insert()
+  void IncNumEntries() {
+    // Only one thread can do write at one time. No need to do atomic
+    // incremental. Update it with relaxed load and store.
+    num_entries.store(GetNumEntries() + 1, std::memory_order_relaxed);
+  }
+};
+
+// A data structure used as the header of a skip list of a hash bucket.
+struct SkipListBucketHeader {
+  BucketHeader Counting_header;
+  MemtableSkipList skip_list;
+
+  explicit SkipListBucketHeader(const MemTableRep::KeyComparator& cmp,
+                                Allocator* allocator, uint32_t count)
+      : Counting_header(this,  // Pointing to itself to indicate header type.
+                        count),
+        skip_list(cmp, allocator) {}
+};
+
+struct Node {
+  // Accessors/mutators for links.  Wrapped in methods so we can
+  // add the appropriate barriers as necessary.
+  Node* Next() {
+    // Use an 'acquire load' so that we observe a fully initialized
+    // version of the returned Node.
+    return next_.load(std::memory_order_acquire);
+  }
+  void SetNext(Node* x) {
+    // Use a 'release store' so that anybody who reads through this
+    // pointer observes a fully initialized version of the inserted node.
+    next_.store(x, std::memory_order_release);
+  }
+  // No-barrier variants that can be safely used in a few locations.
+  Node* NoBarrier_Next() { return next_.load(std::memory_order_relaxed); }
+
+  void NoBarrier_SetNext(Node* x) { next_.store(x, std::memory_order_relaxed); }
+
+  // Needed for placement new below which is fine
+  Node() {}
+
+ private:
+  std::atomic<Node*> next_;
+
+  // Prohibit copying due to the below
+  Node(const Node&) = delete;
+  Node& operator=(const Node&) = delete;
+
+ public:
+  char key[1];
+};
+
+// Memory structure of the mem table:
+// It is a hash table, each bucket points to one entry, a linked list or a
+// skip list. In order to track total number of records in a bucket to determine
+// whether should switch to skip list, a header is added just to indicate
+// number of entries in the bucket.
+//
+//
+//          +-----> NULL    Case 1. Empty bucket
+//          |
+//          |
+//          | +---> +-------+
+//          | |     | Next  +--> NULL
+//          | |     +-------+
+//  +-----+ | |     |       |  Case 2. One Entry in bucket.
+//  |     +-+ |     | Data  |          next pointer points to
+//  +-----+   |     |       |          NULL. All other cases
+//  |     |   |     |       |          next pointer is not NULL.
+//  +-----+   |     +-------+
+//  |     +---+
+//  +-----+     +-> +-------+  +> +-------+  +-> +-------+
+//  |     |     |   | Next  +--+  | Next  +--+   | Next  +-->NULL
+//  +-----+     |   +-------+     +-------+      +-------+
+//  |     +-----+   | Count |     |       |      |       |
+//  +-----+         +-------+     | Data  |      | Data  |
+//  |     |                       |       |      |       |
+//  +-----+          Case 3.      |       |      |       |
+//  |     |          A header     +-------+      +-------+
+//  +-----+          points to
+//  |     |          a linked list. Count indicates total number
+//  +-----+          of rows in this bucket.
+//  |     |
+//  +-----+    +-> +-------+ <--+
+//  |     |    |   | Next  +----+
+//  +-----+    |   +-------+   Case 4. A header points to a skip
+//  |     +----+   | Count |           list and next pointer points to
+//  +-----+        +-------+           itself, to distinguish case 3 or 4.
+//  |     |        |       |           Count still is kept to indicates total
+//  +-----+        | Skip +-->         of entries in the bucket for debugging
+//  |     |        | List  |   Data    purpose.
+//  |     |        |      +-->
+//  +-----+        |       |
+//  |     |        +-------+
+//  +-----+
+//
+// We don't have data race when changing cases because:
+// (1) When changing from case 2->3, we create a new bucket header, put the
+//     single node there first without changing the original node, and do a
+//     release store when changing the bucket pointer. In that case, a reader
+//     who sees a stale value of the bucket pointer will read this node, while
+//     a reader sees the correct value because of the release store.
+// (2) When changing case 3->4, a new header is created with skip list points
+//     to the data, before doing an acquire store to change the bucket pointer.
+//     The old header and nodes are never changed, so any reader sees any
+//     of those existing pointers will guarantee to be able to iterate to the
+//     end of the linked list.
+// (3) Header's next pointer in case 3 might change, but they are never equal
+//     to itself, so no matter a reader sees any stale or newer value, it will
+//     be able to correctly distinguish case 3 and 4.
+//
+// The reason that we use case 2 is we want to make the format to be efficient
+// when the utilization of buckets is relatively low. If we use case 3 for
+// single entry bucket, we will need to waste 12 bytes for every entry,
+// which can be significant decrease of memory utilization.
+class HashLinkListRep : public MemTableRep {
+ public:
+  HashLinkListRep(const MemTableRep::KeyComparator& compare,
+                  Allocator* allocator, const SliceTransform* transform,
+                  size_t bucket_size, uint32_t threshold_use_skiplist,
+                  size_t huge_page_tlb_size, Logger* logger,
+                  int bucket_entries_logging_threshold,
+                  bool if_log_bucket_dist_when_flash);
+
+  KeyHandle Allocate(const size_t len, char** buf) override;
+
+  void Insert(KeyHandle handle) override;
+
+  bool Contains(const char* key) const override;
+
+  size_t ApproximateMemoryUsage() override;
+
+  void Get(const LookupKey& k, void* callback_args,
+           bool (*callback_func)(void* arg, const char* entry)) override;
+
+  ~HashLinkListRep() override;
+
+  MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override;
+
+  MemTableRep::Iterator* GetDynamicPrefixIterator(
+      Arena* arena = nullptr) override;
+
+ private:
+  friend class DynamicIterator;
+
+  size_t bucket_size_;
+
+  // Maps slices (which are transformed user keys) to buckets of keys sharing
+  // the same transform.
+  Pointer* buckets_;
+
+  const uint32_t threshold_use_skiplist_;
+
+  // The user-supplied transform whose domain is the user keys.
+  const SliceTransform* transform_;
+
+  const MemTableRep::KeyComparator& compare_;
+
+  Logger* logger_;
+  int bucket_entries_logging_threshold_;
+  bool if_log_bucket_dist_when_flash_;
+
+  bool LinkListContains(Node* head, const Slice& key) const;
+
+  bool IsEmptyBucket(Pointer& bucket_pointer) const {
+    return bucket_pointer.load(std::memory_order_acquire) == nullptr;
+  }
+
+  // Precondition: GetLinkListFirstNode() must have been called first and return
+  // null so that it must be a skip list bucket
+  SkipListBucketHeader* GetSkipListBucketHeader(Pointer& bucket_pointer) const;
+
+  // Returning nullptr indicates it is a skip list bucket.
+  Node* GetLinkListFirstNode(Pointer& bucket_pointer) const;
+
+  Slice GetPrefix(const Slice& internal_key) const {
+    return transform_->Transform(ExtractUserKey(internal_key));
+  }
+
+  size_t GetHash(const Slice& slice) const {
+    return GetSliceRangedNPHash(slice, bucket_size_);
+  }
+
+  Pointer& GetBucket(size_t i) const { return buckets_[i]; }
+
+  Pointer& GetBucket(const Slice& slice) const {
+    return GetBucket(GetHash(slice));
+  }
+
+  bool Equal(const Slice& a, const Key& b) const {
+    return (compare_(b, a) == 0);
+  }
+
+  bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }
+
+  bool KeyIsAfterNode(const Slice& internal_key, const Node* n) const {
+    // nullptr n is considered infinite
+    return (n != nullptr) && (compare_(n->key, internal_key) < 0);
+  }
+
+  bool KeyIsAfterNode(const Key& key, const Node* n) const {
+    // nullptr n is considered infinite
+    return (n != nullptr) && (compare_(n->key, key) < 0);
+  }
+
+  bool KeyIsAfterOrAtNode(const Slice& internal_key, const Node* n) const {
+    // nullptr n is considered infinite
+    return (n != nullptr) && (compare_(n->key, internal_key) <= 0);
+  }
+
+  bool KeyIsAfterOrAtNode(const Key& key, const Node* n) const {
+    // nullptr n is considered infinite
+    return (n != nullptr) && (compare_(n->key, key) <= 0);
+  }
+
+  Node* FindGreaterOrEqualInBucket(Node* head, const Slice& key) const;
+  Node* FindLessOrEqualInBucket(Node* head, const Slice& key) const;
+
+  class FullListIterator : public MemTableRep::Iterator {
+   public:
+    explicit FullListIterator(MemtableSkipList* list, Allocator* allocator)
+        : iter_(list), full_list_(list), allocator_(allocator) {}
+
+    ~FullListIterator() override {}
+
+    // Returns true iff the iterator is positioned at a valid node.
+    bool Valid() const override { return iter_.Valid(); }
+
+    // Returns the key at the current position.
+    // REQUIRES: Valid()
+    const char* key() const override {
+      assert(Valid());
+      return iter_.key();
+    }
+
+    // Advances to the next position.
+    // REQUIRES: Valid()
+    void Next() override {
+      assert(Valid());
+      iter_.Next();
+    }
+
+    // Advances to the previous position.
+    // REQUIRES: Valid()
+    void Prev() override {
+      assert(Valid());
+      iter_.Prev();
+    }
+
+    // Advance to the first entry with a key >= target
+    void Seek(const Slice& internal_key, const char* memtable_key) override {
+      const char* encoded_key = (memtable_key != nullptr)
+                                    ? memtable_key
+                                    : EncodeKey(&tmp_, internal_key);
+      iter_.Seek(encoded_key);
+    }
+
+    // Retreat to the last entry with a key <= target
+    void SeekForPrev(const Slice& internal_key,
+                     const char* memtable_key) override {
+      const char* encoded_key = (memtable_key != nullptr)
+                                    ? memtable_key
+                                    : EncodeKey(&tmp_, internal_key);
+      iter_.SeekForPrev(encoded_key);
+    }
+
+    // Position at the first entry in collection.
+    // Final state of iterator is Valid() iff collection is not empty.
+    void SeekToFirst() override { iter_.SeekToFirst(); }
+
+    // Position at the last entry in collection.
+    // Final state of iterator is Valid() iff collection is not empty.
+    void SeekToLast() override { iter_.SeekToLast(); }
+
+   private:
+    MemtableSkipList::Iterator iter_;
+    // To destruct with the iterator.
+    std::unique_ptr<MemtableSkipList> full_list_;
+    std::unique_ptr<Allocator> allocator_;
+    std::string tmp_;  // For passing to EncodeKey
+  };
+
+  class LinkListIterator : public MemTableRep::Iterator {
+   public:
+    explicit LinkListIterator(const HashLinkListRep* const hash_link_list_rep,
+                              Node* head)
+        : hash_link_list_rep_(hash_link_list_rep),
+          head_(head),
+          node_(nullptr) {}
+
+    ~LinkListIterator() override {}
+
+    // Returns true iff the iterator is positioned at a valid node.
+    bool Valid() const override { return node_ != nullptr; }
+
+    // Returns the key at the current position.
+    // REQUIRES: Valid()
+    const char* key() const override {
+      assert(Valid());
+      return node_->key;
+    }
+
+    // Advances to the next position.
+    // REQUIRES: Valid()
+    void Next() override {
+      assert(Valid());
+      node_ = node_->Next();
+    }
+
+    // Advances to the previous position.
+    // REQUIRES: Valid()
+    void Prev() override {
+      // Prefix iterator does not support total order.
+      // We simply set the iterator to invalid state
+      Reset(nullptr);
+    }
+
+    // Advance to the first entry with a key >= target
+    void Seek(const Slice& internal_key,
+              const char* /*memtable_key*/) override {
+      node_ =
+          hash_link_list_rep_->FindGreaterOrEqualInBucket(head_, internal_key);
+    }
+
+    // Retreat to the last entry with a key <= target
+    void SeekForPrev(const Slice& /*internal_key*/,
+                     const char* /*memtable_key*/) override {
+      // Since we do not support Prev()
+      // We simply do not support SeekForPrev
+      Reset(nullptr);
+    }
+
+    // Position at the first entry in collection.
+    // Final state of iterator is Valid() iff collection is not empty.
+    void SeekToFirst() override {
+      // Prefix iterator does not support total order.
+      // We simply set the iterator to invalid state
+      Reset(nullptr);
+    }
+
+    // Position at the last entry in collection.
+    // Final state of iterator is Valid() iff collection is not empty.
+    void SeekToLast() override {
+      // Prefix iterator does not support total order.
+      // We simply set the iterator to invalid state
+      Reset(nullptr);
+    }
+
+   protected:
+    void Reset(Node* head) {
+      head_ = head;
+      node_ = nullptr;
+    }
+
+   private:
+    friend class HashLinkListRep;
+    const HashLinkListRep* const hash_link_list_rep_;
+    Node* head_;
+    Node* node_;
+
+    virtual void SeekToHead() { node_ = head_; }
+  };
+
+  class DynamicIterator : public HashLinkListRep::LinkListIterator {
+   public:
+    explicit DynamicIterator(HashLinkListRep& memtable_rep)
+        : HashLinkListRep::LinkListIterator(&memtable_rep, nullptr),
+          memtable_rep_(memtable_rep) {}
+
+    // Advance to the first entry with a key >= target
+    void Seek(const Slice& k, const char* memtable_key) override {
+      auto transformed = memtable_rep_.GetPrefix(k);
+      Pointer& bucket = memtable_rep_.GetBucket(transformed);
+
+      if (memtable_rep_.IsEmptyBucket(bucket)) {
+        skip_list_iter_.reset();
+        Reset(nullptr);
+      } else {
+        Node* first_linked_list_node =
+            memtable_rep_.GetLinkListFirstNode(bucket);
+        if (first_linked_list_node != nullptr) {
+          // The bucket is organized as a linked list
+          skip_list_iter_.reset();
+          Reset(first_linked_list_node);
+          HashLinkListRep::LinkListIterator::Seek(k, memtable_key);
+
+        } else {
+          SkipListBucketHeader* skip_list_header =
+              memtable_rep_.GetSkipListBucketHeader(bucket);
+          assert(skip_list_header != nullptr);
+          // The bucket is organized as a skip list
+          if (!skip_list_iter_) {
+            skip_list_iter_.reset(
+                new MemtableSkipList::Iterator(&skip_list_header->skip_list));
+          } else {
+            skip_list_iter_->SetList(&skip_list_header->skip_list);
+          }
+          if (memtable_key != nullptr) {
+            skip_list_iter_->Seek(memtable_key);
+          } else {
+            IterKey encoded_key;
+            encoded_key.EncodeLengthPrefixedKey(k);
+            skip_list_iter_->Seek(encoded_key.GetUserKey().data());
+          }
+        }
+      }
+    }
+
+    bool Valid() const override {
+      if (skip_list_iter_) {
+        return skip_list_iter_->Valid();
+      }
+      return HashLinkListRep::LinkListIterator::Valid();
+    }
+
+    const char* key() const override {
+      if (skip_list_iter_) {
+        return skip_list_iter_->key();
+      }
+      return HashLinkListRep::LinkListIterator::key();
+    }
+
+    void Next() override {
+      if (skip_list_iter_) {
+        skip_list_iter_->Next();
+      } else {
+        HashLinkListRep::LinkListIterator::Next();
+      }
+    }
+
+   private:
+    // the underlying memtable
+    const HashLinkListRep& memtable_rep_;
+    std::unique_ptr<MemtableSkipList::Iterator> skip_list_iter_;
+  };
+
+  class EmptyIterator : public MemTableRep::Iterator {
+    // This is used when there wasn't a bucket. It is cheaper than
+    // instantiating an empty bucket over which to iterate.
+   public:
+    EmptyIterator() {}
+    bool Valid() const override { return false; }
+    const char* key() const override {
+      assert(false);
+      return nullptr;
+    }
+    void Next() override {}
+    void Prev() override {}
+    void Seek(const Slice& /*user_key*/,
+              const char* /*memtable_key*/) override {}
+    void SeekForPrev(const Slice& /*user_key*/,
+                     const char* /*memtable_key*/) override {}
+    void SeekToFirst() override {}
+    void SeekToLast() override {}
+
+   private:
+  };
+};
+
+HashLinkListRep::HashLinkListRep(
+    const MemTableRep::KeyComparator& compare, Allocator* allocator,
+    const SliceTransform* transform, size_t bucket_size,
+    uint32_t threshold_use_skiplist, size_t huge_page_tlb_size, Logger* logger,
+    int bucket_entries_logging_threshold, bool if_log_bucket_dist_when_flash)
+    : MemTableRep(allocator),
+      bucket_size_(bucket_size),
+      // Threshold to use skip list doesn't make sense if less than 3, so we
+      // force it to be minimum of 3 to simplify implementation.
+      threshold_use_skiplist_(std::max(threshold_use_skiplist, 3U)),
+      transform_(transform),
+      compare_(compare),
+      logger_(logger),
+      bucket_entries_logging_threshold_(bucket_entries_logging_threshold),
+      if_log_bucket_dist_when_flash_(if_log_bucket_dist_when_flash) {
+  char* mem = allocator_->AllocateAligned(sizeof(Pointer) * bucket_size,
+                                          huge_page_tlb_size, logger);
+
+  buckets_ = new (mem) Pointer[bucket_size];
+
+  for (size_t i = 0; i < bucket_size_; ++i) {
+    buckets_[i].store(nullptr, std::memory_order_relaxed);
+  }
+}
+
+HashLinkListRep::~HashLinkListRep() {}
+
+KeyHandle HashLinkListRep::Allocate(const size_t len, char** buf) {
+  char* mem = allocator_->AllocateAligned(sizeof(Node) + len);
+  Node* x = new (mem) Node();
+  *buf = x->key;
+  return static_cast<void*>(x);
+}
+
+SkipListBucketHeader* HashLinkListRep::GetSkipListBucketHeader(
+    Pointer& bucket_pointer) const {
+  Pointer* first_next_pointer =
+      static_cast<Pointer*>(bucket_pointer.load(std::memory_order_acquire));
+  assert(first_next_pointer != nullptr);
+  assert(first_next_pointer->load(std::memory_order_relaxed) != nullptr);
+
+  // Counting header
+  BucketHeader* header = reinterpret_cast<BucketHeader*>(first_next_pointer);
+  assert(header->IsSkipListBucket());
+  assert(header->GetNumEntries() > threshold_use_skiplist_);
+  auto* skip_list_bucket_header =
+      reinterpret_cast<SkipListBucketHeader*>(header);
+  assert(skip_list_bucket_header->Counting_header.next.load(
+             std::memory_order_relaxed) == header);
+  return skip_list_bucket_header;
+}
+
+Node* HashLinkListRep::GetLinkListFirstNode(Pointer& bucket_pointer) const {
+  Pointer* first_next_pointer =
+      static_cast<Pointer*>(bucket_pointer.load(std::memory_order_acquire));
+  assert(first_next_pointer != nullptr);
+  if (first_next_pointer->load(std::memory_order_relaxed) == nullptr) {
+    // Single entry bucket
+    return reinterpret_cast<Node*>(first_next_pointer);
+  }
+
+  // It is possible that after we fetch first_next_pointer it is modified
+  // and the next is not null anymore. In this case, the bucket should have been
+  // modified to a counting header, so we should reload the first_next_pointer
+  // to make sure we see the update.
+  first_next_pointer =
+      static_cast<Pointer*>(bucket_pointer.load(std::memory_order_acquire));
+  // Counting header
+  BucketHeader* header = reinterpret_cast<BucketHeader*>(first_next_pointer);
+  if (!header->IsSkipListBucket()) {
+    assert(header->GetNumEntries() <= threshold_use_skiplist_);
+    return reinterpret_cast<Node*>(
+        header->next.load(std::memory_order_acquire));
+  }
+  assert(header->GetNumEntries() > threshold_use_skiplist_);
+  return nullptr;
+}
+
+void HashLinkListRep::Insert(KeyHandle handle) {
+  Node* x = static_cast<Node*>(handle);
+  assert(!Contains(x->key));
+  Slice internal_key = GetLengthPrefixedSlice(x->key);
+  auto transformed = GetPrefix(internal_key);
+  auto& bucket = buckets_[GetHash(transformed)];
+  Pointer* first_next_pointer =
+      static_cast<Pointer*>(bucket.load(std::memory_order_relaxed));
+
+  if (first_next_pointer == nullptr) {
+    // Case 1. empty bucket
+    // NoBarrier_SetNext() suffices since we will add a barrier when
+    // we publish a pointer to "x" in prev[i].
+    x->NoBarrier_SetNext(nullptr);
+    bucket.store(x, std::memory_order_release);
+    return;
+  }
+
+  BucketHeader* header = nullptr;
+  if (first_next_pointer->load(std::memory_order_relaxed) == nullptr) {
+    // Case 2. only one entry in the bucket
+    // Need to convert to a Counting bucket and turn to case 4.
+    Node* first = reinterpret_cast<Node*>(first_next_pointer);
+    // Need to add a bucket header.
+    // We have to first convert it to a bucket with header before inserting
+    // the new node. Otherwise, we might need to change next pointer of first.
+    // In that case, a reader might sees the next pointer is NULL and wrongly
+    // think the node is a bucket header.
+    auto* mem = allocator_->AllocateAligned(sizeof(BucketHeader));
+    header = new (mem) BucketHeader(first, 1);
+    bucket.store(header, std::memory_order_release);
+  } else {
+    header = reinterpret_cast<BucketHeader*>(first_next_pointer);
+    if (header->IsSkipListBucket()) {
+      // Case 4. Bucket is already a skip list
+      assert(header->GetNumEntries() > threshold_use_skiplist_);
+      auto* skip_list_bucket_header =
+          reinterpret_cast<SkipListBucketHeader*>(header);
+      // Only one thread can execute Insert() at one time. No need to do atomic
+      // incremental.
+      skip_list_bucket_header->Counting_header.IncNumEntries();
+      skip_list_bucket_header->skip_list.Insert(x->key);
+      return;
+    }
+  }
+
+  if (bucket_entries_logging_threshold_ > 0 &&
+      header->GetNumEntries() ==
+          static_cast<uint32_t>(bucket_entries_logging_threshold_)) {
+    Info(logger_,
+         "HashLinkedList bucket %" ROCKSDB_PRIszt
+         " has more than %d "
+         "entries. Key to insert: %s",
+         GetHash(transformed), header->GetNumEntries(),
+         GetLengthPrefixedSlice(x->key).ToString(true).c_str());
+  }
+
+  if (header->GetNumEntries() == threshold_use_skiplist_) {
+    // Case 3. number of entries reaches the threshold so need to convert to
+    // skip list.
+    LinkListIterator bucket_iter(
+        this, reinterpret_cast<Node*>(
+                  first_next_pointer->load(std::memory_order_relaxed)));
+    auto mem = allocator_->AllocateAligned(sizeof(SkipListBucketHeader));
+    SkipListBucketHeader* new_skip_list_header = new (mem)
+        SkipListBucketHeader(compare_, allocator_, header->GetNumEntries() + 1);
+    auto& skip_list = new_skip_list_header->skip_list;
+
+    // Add all current entries to the skip list
+    for (bucket_iter.SeekToHead(); bucket_iter.Valid(); bucket_iter.Next()) {
+      skip_list.Insert(bucket_iter.key());
+    }
+
+    // insert the new entry
+    skip_list.Insert(x->key);
+    // Set the bucket
+    bucket.store(new_skip_list_header, std::memory_order_release);
+  } else {
+    // Case 5. Need to insert to the sorted linked list without changing the
+    // header.
+    Node* first =
+        reinterpret_cast<Node*>(header->next.load(std::memory_order_relaxed));
+    assert(first != nullptr);
+    // Advance counter unless the bucket needs to be advanced to skip list.
+    // In that case, we need to make sure the previous count never exceeds
+    // threshold_use_skiplist_ to avoid readers to cast to wrong format.
+    header->IncNumEntries();
+
+    Node* cur = first;
+    Node* prev = nullptr;
+    while (true) {
+      if (cur == nullptr) {
+        break;
+      }
+      Node* next = cur->Next();
+      // Make sure the lists are sorted.
+      // If x points to head_ or next points nullptr, it is trivially satisfied.
+      assert((cur == first) || (next == nullptr) ||
+             KeyIsAfterNode(next->key, cur));
+      if (KeyIsAfterNode(internal_key, cur)) {
+        // Keep searching in this list
+        prev = cur;
+        cur = next;
+      } else {
+        break;
+      }
+    }
+
+    // Our data structure does not allow duplicate insertion
+    assert(cur == nullptr || !Equal(x->key, cur->key));
+
+    // NoBarrier_SetNext() suffices since we will add a barrier when
+    // we publish a pointer to "x" in prev[i].
+    x->NoBarrier_SetNext(cur);
+
+    if (prev) {
+      prev->SetNext(x);
+    } else {
+      header->next.store(static_cast<void*>(x), std::memory_order_release);
+    }
+  }
+}
+
+bool HashLinkListRep::Contains(const char* key) const {
+  Slice internal_key = GetLengthPrefixedSlice(key);
+
+  auto transformed = GetPrefix(internal_key);
+  Pointer& bucket = GetBucket(transformed);
+  if (IsEmptyBucket(bucket)) {
+    return false;
+  }
+
+  Node* linked_list_node = GetLinkListFirstNode(bucket);
+  if (linked_list_node != nullptr) {
+    return LinkListContains(linked_list_node, internal_key);
+  }
+
+  SkipListBucketHeader* skip_list_header = GetSkipListBucketHeader(bucket);
+  if (skip_list_header != nullptr) {
+    return skip_list_header->skip_list.Contains(key);
+  }
+  return false;
+}
+
+size_t HashLinkListRep::ApproximateMemoryUsage() {
+  // Memory is always allocated from the allocator.
+  return 0;
+}
+
+void HashLinkListRep::Get(const LookupKey& k, void* callback_args,
+                          bool (*callback_func)(void* arg, const char* entry)) {
+  auto transformed = transform_->Transform(k.user_key());
+  Pointer& bucket = GetBucket(transformed);
+
+  if (IsEmptyBucket(bucket)) {
+    return;
+  }
+
+  auto* link_list_head = GetLinkListFirstNode(bucket);
+  if (link_list_head != nullptr) {
+    LinkListIterator iter(this, link_list_head);
+    for (iter.Seek(k.internal_key(), nullptr);
+         iter.Valid() && callback_func(callback_args, iter.key());
+         iter.Next()) {
+    }
+  } else {
+    auto* skip_list_header = GetSkipListBucketHeader(bucket);
+    if (skip_list_header != nullptr) {
+      // Is a skip list
+      MemtableSkipList::Iterator iter(&skip_list_header->skip_list);
+      for (iter.Seek(k.memtable_key().data());
+           iter.Valid() && callback_func(callback_args, iter.key());
+           iter.Next()) {
+      }
+    }
+  }
+}
+
+MemTableRep::Iterator* HashLinkListRep::GetIterator(Arena* alloc_arena) {
+  // allocate a new arena of similar size to the one currently in use
+  Arena* new_arena = new Arena(allocator_->BlockSize());
+  auto list = new MemtableSkipList(compare_, new_arena);
+  HistogramImpl keys_per_bucket_hist;
+
+  for (size_t i = 0; i < bucket_size_; ++i) {
+    int count = 0;
+    Pointer& bucket = GetBucket(i);
+    if (!IsEmptyBucket(bucket)) {
+      auto* link_list_head = GetLinkListFirstNode(bucket);
+      if (link_list_head != nullptr) {
+        LinkListIterator itr(this, link_list_head);
+        for (itr.SeekToHead(); itr.Valid(); itr.Next()) {
+          list->Insert(itr.key());
+          count++;
+        }
+      } else {
+        auto* skip_list_header = GetSkipListBucketHeader(bucket);
+        assert(skip_list_header != nullptr);
+        // Is a skip list
+        MemtableSkipList::Iterator itr(&skip_list_header->skip_list);
+        for (itr.SeekToFirst(); itr.Valid(); itr.Next()) {
+          list->Insert(itr.key());
+          count++;
+        }
+      }
+    }
+    if (if_log_bucket_dist_when_flash_) {
+      keys_per_bucket_hist.Add(count);
+    }
+  }
+  if (if_log_bucket_dist_when_flash_ && logger_ != nullptr) {
+    Info(logger_, "hashLinkedList Entry distribution among buckets: %s",
+         keys_per_bucket_hist.ToString().c_str());
+  }
+
+  if (alloc_arena == nullptr) {
+    return new FullListIterator(list, new_arena);
+  } else {
+    auto mem = alloc_arena->AllocateAligned(sizeof(FullListIterator));
+    return new (mem) FullListIterator(list, new_arena);
+  }
+}
+
+MemTableRep::Iterator* HashLinkListRep::GetDynamicPrefixIterator(
+    Arena* alloc_arena) {
+  if (alloc_arena == nullptr) {
+    return new DynamicIterator(*this);
+  } else {
+    auto mem = alloc_arena->AllocateAligned(sizeof(DynamicIterator));
+    return new (mem) DynamicIterator(*this);
+  }
+}
+
+bool HashLinkListRep::LinkListContains(Node* head,
+                                       const Slice& user_key) const {
+  Node* x = FindGreaterOrEqualInBucket(head, user_key);
+  return (x != nullptr && Equal(user_key, x->key));
+}
+
+Node* HashLinkListRep::FindGreaterOrEqualInBucket(Node* head,
+                                                  const Slice& key) const {
+  Node* x = head;
+  while (true) {
+    if (x == nullptr) {
+      return x;
+    }
+    Node* next = x->Next();
+    // Make sure the lists are sorted.
+    // If x points to head_ or next points nullptr, it is trivially satisfied.
+    assert((x == head) || (next == nullptr) || KeyIsAfterNode(next->key, x));
+    if (KeyIsAfterNode(key, x)) {
+      // Keep searching in this list
+      x = next;
+    } else {
+      break;
+    }
+  }
+  return x;
+}
+
+struct HashLinkListRepOptions {
+  static const char* kName() { return "HashLinkListRepFactoryOptions"; }
+  size_t bucket_count;
+  uint32_t threshold_use_skiplist;
+  size_t huge_page_tlb_size;
+  int bucket_entries_logging_threshold;
+  bool if_log_bucket_dist_when_flash;
+};
+
+static std::unordered_map<std::string, OptionTypeInfo> hash_linklist_info = {
+    {"bucket_count",
+     {offsetof(struct HashLinkListRepOptions, bucket_count), OptionType::kSizeT,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+    {"threshold",
+     {offsetof(struct HashLinkListRepOptions, threshold_use_skiplist),
+      OptionType::kUInt32T, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"huge_page_size",
+     {offsetof(struct HashLinkListRepOptions, huge_page_tlb_size),
+      OptionType::kSizeT, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"logging_threshold",
+     {offsetof(struct HashLinkListRepOptions, bucket_entries_logging_threshold),
+      OptionType::kInt, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"log_when_flash",
+     {offsetof(struct HashLinkListRepOptions, if_log_bucket_dist_when_flash),
+      OptionType::kBoolean, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+};
+
+class HashLinkListRepFactory : public MemTableRepFactory {
+ public:
+  explicit HashLinkListRepFactory(size_t bucket_count,
+                                  uint32_t threshold_use_skiplist,
+                                  size_t huge_page_tlb_size,
+                                  int bucket_entries_logging_threshold,
+                                  bool if_log_bucket_dist_when_flash) {
+    options_.bucket_count = bucket_count;
+    options_.threshold_use_skiplist = threshold_use_skiplist;
+    options_.huge_page_tlb_size = huge_page_tlb_size;
+    options_.bucket_entries_logging_threshold =
+        bucket_entries_logging_threshold;
+    options_.if_log_bucket_dist_when_flash = if_log_bucket_dist_when_flash;
+    RegisterOptions(&options_, &hash_linklist_info);
+  }
+
+  using MemTableRepFactory::CreateMemTableRep;
+  virtual MemTableRep* CreateMemTableRep(
+      const MemTableRep::KeyComparator& compare, Allocator* allocator,
+      const SliceTransform* transform, Logger* logger) override;
+
+  static const char* kClassName() { return "HashLinkListRepFactory"; }
+  static const char* kNickName() { return "hash_linkedlist"; }
+  virtual const char* Name() const override { return kClassName(); }
+  virtual const char* NickName() const override { return kNickName(); }
+
+ private:
+  HashLinkListRepOptions options_;
+};
+
+}  // namespace
+
+MemTableRep* HashLinkListRepFactory::CreateMemTableRep(
+    const MemTableRep::KeyComparator& compare, Allocator* allocator,
+    const SliceTransform* transform, Logger* logger) {
+  return new HashLinkListRep(
+      compare, allocator, transform, options_.bucket_count,
+      options_.threshold_use_skiplist, options_.huge_page_tlb_size, logger,
+      options_.bucket_entries_logging_threshold,
+      options_.if_log_bucket_dist_when_flash);
+}
+
+MemTableRepFactory* NewHashLinkListRepFactory(
+    size_t bucket_count, size_t huge_page_tlb_size,
+    int bucket_entries_logging_threshold, bool if_log_bucket_dist_when_flash,
+    uint32_t threshold_use_skiplist) {
+  return new HashLinkListRepFactory(
+      bucket_count, threshold_use_skiplist, huge_page_tlb_size,
+      bucket_entries_logging_threshold, if_log_bucket_dist_when_flash);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/hash_skiplist_rep.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/hash_skiplist_rep.cc
new file mode 100644
index 0000000000..15ff4f0719
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/hash_skiplist_rep.cc
@@ -0,0 +1,391 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include <atomic>
+
+#include "db/memtable.h"
+#include "memory/arena.h"
+#include "memtable/skiplist.h"
+#include "port/port.h"
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/murmurhash.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+
+class HashSkipListRep : public MemTableRep {
+ public:
+  HashSkipListRep(const MemTableRep::KeyComparator& compare,
+                  Allocator* allocator, const SliceTransform* transform,
+                  size_t bucket_size, int32_t skiplist_height,
+                  int32_t skiplist_branching_factor);
+
+  void Insert(KeyHandle handle) override;
+
+  bool Contains(const char* key) const override;
+
+  size_t ApproximateMemoryUsage() override;
+
+  void Get(const LookupKey& k, void* callback_args,
+           bool (*callback_func)(void* arg, const char* entry)) override;
+
+  ~HashSkipListRep() override;
+
+  MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override;
+
+  MemTableRep::Iterator* GetDynamicPrefixIterator(
+      Arena* arena = nullptr) override;
+
+ private:
+  friend class DynamicIterator;
+  using Bucket = SkipList<const char*, const MemTableRep::KeyComparator&>;
+
+  size_t bucket_size_;
+
+  const int32_t skiplist_height_;
+  const int32_t skiplist_branching_factor_;
+
+  // Maps slices (which are transformed user keys) to buckets of keys sharing
+  // the same transform.
+  std::atomic<Bucket*>* buckets_;
+
+  // The user-supplied transform whose domain is the user keys.
+  const SliceTransform* transform_;
+
+  const MemTableRep::KeyComparator& compare_;
+  // immutable after construction
+  Allocator* const allocator_;
+
+  inline size_t GetHash(const Slice& slice) const {
+    return MurmurHash(slice.data(), static_cast<int>(slice.size()), 0) %
+           bucket_size_;
+  }
+  inline Bucket* GetBucket(size_t i) const {
+    return buckets_[i].load(std::memory_order_acquire);
+  }
+  inline Bucket* GetBucket(const Slice& slice) const {
+    return GetBucket(GetHash(slice));
+  }
+  // Get a bucket from buckets_. If the bucket hasn't been initialized yet,
+  // initialize it before returning.
+  Bucket* GetInitializedBucket(const Slice& transformed);
+
+  class Iterator : public MemTableRep::Iterator {
+   public:
+    explicit Iterator(Bucket* list, bool own_list = true,
+                      Arena* arena = nullptr)
+        : list_(list), iter_(list), own_list_(own_list), arena_(arena) {}
+
+    ~Iterator() override {
+      // if we own the list, we should also delete it
+      if (own_list_) {
+        assert(list_ != nullptr);
+        delete list_;
+      }
+    }
+
+    // Returns true iff the iterator is positioned at a valid node.
+    bool Valid() const override { return list_ != nullptr && iter_.Valid(); }
+
+    // Returns the key at the current position.
+    // REQUIRES: Valid()
+    const char* key() const override {
+      assert(Valid());
+      return iter_.key();
+    }
+
+    // Advances to the next position.
+    // REQUIRES: Valid()
+    void Next() override {
+      assert(Valid());
+      iter_.Next();
+    }
+
+    // Advances to the previous position.
+    // REQUIRES: Valid()
+    void Prev() override {
+      assert(Valid());
+      iter_.Prev();
+    }
+
+    // Advance to the first entry with a key >= target
+    void Seek(const Slice& internal_key, const char* memtable_key) override {
+      if (list_ != nullptr) {
+        const char* encoded_key = (memtable_key != nullptr)
+                                      ? memtable_key
+                                      : EncodeKey(&tmp_, internal_key);
+        iter_.Seek(encoded_key);
+      }
+    }
+
+    // Retreat to the last entry with a key <= target
+    void SeekForPrev(const Slice& /*internal_key*/,
+                     const char* /*memtable_key*/) override {
+      // not supported
+      assert(false);
+    }
+
+    // Position at the first entry in collection.
+    // Final state of iterator is Valid() iff collection is not empty.
+    void SeekToFirst() override {
+      if (list_ != nullptr) {
+        iter_.SeekToFirst();
+      }
+    }
+
+    // Position at the last entry in collection.
+    // Final state of iterator is Valid() iff collection is not empty.
+    void SeekToLast() override {
+      if (list_ != nullptr) {
+        iter_.SeekToLast();
+      }
+    }
+
+   protected:
+    void Reset(Bucket* list) {
+      if (own_list_) {
+        assert(list_ != nullptr);
+        delete list_;
+      }
+      list_ = list;
+      iter_.SetList(list);
+      own_list_ = false;
+    }
+
+   private:
+    // if list_ is nullptr, we should NEVER call any methods on iter_
+    // if list_ is nullptr, this Iterator is not Valid()
+    Bucket* list_;
+    Bucket::Iterator iter_;
+    // here we track if we own list_. If we own it, we are also
+    // responsible for it's cleaning. This is a poor man's std::shared_ptr
+    bool own_list_;
+    std::unique_ptr<Arena> arena_;
+    std::string tmp_;  // For passing to EncodeKey
+  };
+
+  class DynamicIterator : public HashSkipListRep::Iterator {
+   public:
+    explicit DynamicIterator(const HashSkipListRep& memtable_rep)
+        : HashSkipListRep::Iterator(nullptr, false),
+          memtable_rep_(memtable_rep) {}
+
+    // Advance to the first entry with a key >= target
+    void Seek(const Slice& k, const char* memtable_key) override {
+      auto transformed = memtable_rep_.transform_->Transform(ExtractUserKey(k));
+      Reset(memtable_rep_.GetBucket(transformed));
+      HashSkipListRep::Iterator::Seek(k, memtable_key);
+    }
+
+    // Position at the first entry in collection.
+    // Final state of iterator is Valid() iff collection is not empty.
+    void SeekToFirst() override {
+      // Prefix iterator does not support total order.
+      // We simply set the iterator to invalid state
+      Reset(nullptr);
+    }
+
+    // Position at the last entry in collection.
+    // Final state of iterator is Valid() iff collection is not empty.
+    void SeekToLast() override {
+      // Prefix iterator does not support total order.
+      // We simply set the iterator to invalid state
+      Reset(nullptr);
+    }
+
+   private:
+    // the underlying memtable
+    const HashSkipListRep& memtable_rep_;
+  };
+
+  class EmptyIterator : public MemTableRep::Iterator {
+    // This is used when there wasn't a bucket. It is cheaper than
+    // instantiating an empty bucket over which to iterate.
+   public:
+    EmptyIterator() {}
+    bool Valid() const override { return false; }
+    const char* key() const override {
+      assert(false);
+      return nullptr;
+    }
+    void Next() override {}
+    void Prev() override {}
+    void Seek(const Slice& /*internal_key*/,
+              const char* /*memtable_key*/) override {}
+    void SeekForPrev(const Slice& /*internal_key*/,
+                     const char* /*memtable_key*/) override {}
+    void SeekToFirst() override {}
+    void SeekToLast() override {}
+
+   private:
+  };
+};
+
+HashSkipListRep::HashSkipListRep(const MemTableRep::KeyComparator& compare,
+                                 Allocator* allocator,
+                                 const SliceTransform* transform,
+                                 size_t bucket_size, int32_t skiplist_height,
+                                 int32_t skiplist_branching_factor)
+    : MemTableRep(allocator),
+      bucket_size_(bucket_size),
+      skiplist_height_(skiplist_height),
+      skiplist_branching_factor_(skiplist_branching_factor),
+      transform_(transform),
+      compare_(compare),
+      allocator_(allocator) {
+  auto mem =
+      allocator->AllocateAligned(sizeof(std::atomic<void*>) * bucket_size);
+  buckets_ = new (mem) std::atomic<Bucket*>[bucket_size];
+
+  for (size_t i = 0; i < bucket_size_; ++i) {
+    buckets_[i].store(nullptr, std::memory_order_relaxed);
+  }
+}
+
+HashSkipListRep::~HashSkipListRep() {}
+
+HashSkipListRep::Bucket* HashSkipListRep::GetInitializedBucket(
+    const Slice& transformed) {
+  size_t hash = GetHash(transformed);
+  auto bucket = GetBucket(hash);
+  if (bucket == nullptr) {
+    auto addr = allocator_->AllocateAligned(sizeof(Bucket));
+    bucket = new (addr) Bucket(compare_, allocator_, skiplist_height_,
+                               skiplist_branching_factor_);
+    buckets_[hash].store(bucket, std::memory_order_release);
+  }
+  return bucket;
+}
+
+void HashSkipListRep::Insert(KeyHandle handle) {
+  auto* key = static_cast<char*>(handle);
+  assert(!Contains(key));
+  auto transformed = transform_->Transform(UserKey(key));
+  auto bucket = GetInitializedBucket(transformed);
+  bucket->Insert(key);
+}
+
+bool HashSkipListRep::Contains(const char* key) const {
+  auto transformed = transform_->Transform(UserKey(key));
+  auto bucket = GetBucket(transformed);
+  if (bucket == nullptr) {
+    return false;
+  }
+  return bucket->Contains(key);
+}
+
+size_t HashSkipListRep::ApproximateMemoryUsage() { return 0; }
+
+void HashSkipListRep::Get(const LookupKey& k, void* callback_args,
+                          bool (*callback_func)(void* arg, const char* entry)) {
+  auto transformed = transform_->Transform(k.user_key());
+  auto bucket = GetBucket(transformed);
+  if (bucket != nullptr) {
+    Bucket::Iterator iter(bucket);
+    for (iter.Seek(k.memtable_key().data());
+         iter.Valid() && callback_func(callback_args, iter.key());
+         iter.Next()) {
+    }
+  }
+}
+
+MemTableRep::Iterator* HashSkipListRep::GetIterator(Arena* arena) {
+  // allocate a new arena of similar size to the one currently in use
+  Arena* new_arena = new Arena(allocator_->BlockSize());
+  auto list = new Bucket(compare_, new_arena);
+  for (size_t i = 0; i < bucket_size_; ++i) {
+    auto bucket = GetBucket(i);
+    if (bucket != nullptr) {
+      Bucket::Iterator itr(bucket);
+      for (itr.SeekToFirst(); itr.Valid(); itr.Next()) {
+        list->Insert(itr.key());
+      }
+    }
+  }
+  if (arena == nullptr) {
+    return new Iterator(list, true, new_arena);
+  } else {
+    auto mem = arena->AllocateAligned(sizeof(Iterator));
+    return new (mem) Iterator(list, true, new_arena);
+  }
+}
+
+MemTableRep::Iterator* HashSkipListRep::GetDynamicPrefixIterator(Arena* arena) {
+  if (arena == nullptr) {
+    return new DynamicIterator(*this);
+  } else {
+    auto mem = arena->AllocateAligned(sizeof(DynamicIterator));
+    return new (mem) DynamicIterator(*this);
+  }
+}
+
+struct HashSkipListRepOptions {
+  static const char* kName() { return "HashSkipListRepFactoryOptions"; }
+  size_t bucket_count;
+  int32_t skiplist_height;
+  int32_t skiplist_branching_factor;
+};
+
+static std::unordered_map<std::string, OptionTypeInfo> hash_skiplist_info = {
+    {"bucket_count",
+     {offsetof(struct HashSkipListRepOptions, bucket_count), OptionType::kSizeT,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+    {"skiplist_height",
+     {offsetof(struct HashSkipListRepOptions, skiplist_height),
+      OptionType::kInt32T, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"branching_factor",
+     {offsetof(struct HashSkipListRepOptions, skiplist_branching_factor),
+      OptionType::kInt32T, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+};
+
+class HashSkipListRepFactory : public MemTableRepFactory {
+ public:
+  explicit HashSkipListRepFactory(size_t bucket_count, int32_t skiplist_height,
+                                  int32_t skiplist_branching_factor) {
+    options_.bucket_count = bucket_count;
+    options_.skiplist_height = skiplist_height;
+    options_.skiplist_branching_factor = skiplist_branching_factor;
+    RegisterOptions(&options_, &hash_skiplist_info);
+  }
+
+  using MemTableRepFactory::CreateMemTableRep;
+  virtual MemTableRep* CreateMemTableRep(
+      const MemTableRep::KeyComparator& compare, Allocator* allocator,
+      const SliceTransform* transform, Logger* logger) override;
+
+  static const char* kClassName() { return "HashSkipListRepFactory"; }
+  static const char* kNickName() { return "prefix_hash"; }
+
+  virtual const char* Name() const override { return kClassName(); }
+  virtual const char* NickName() const override { return kNickName(); }
+
+ private:
+  HashSkipListRepOptions options_;
+};
+
+}  // namespace
+
+MemTableRep* HashSkipListRepFactory::CreateMemTableRep(
+    const MemTableRep::KeyComparator& compare, Allocator* allocator,
+    const SliceTransform* transform, Logger* /*logger*/) {
+  return new HashSkipListRep(compare, allocator, transform,
+                             options_.bucket_count, options_.skiplist_height,
+                             options_.skiplist_branching_factor);
+}
+
+MemTableRepFactory* NewHashSkipListRepFactory(
+    size_t bucket_count, int32_t skiplist_height,
+    int32_t skiplist_branching_factor) {
+  return new HashSkipListRepFactory(bucket_count, skiplist_height,
+                                    skiplist_branching_factor);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/inlineskiplist.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/inlineskiplist.h
new file mode 100644
index 0000000000..abb3c3ddb7
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/inlineskiplist.h
@@ -0,0 +1,1051 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.  Use of
+// this source code is governed by a BSD-style license that can be found
+// in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// InlineSkipList is derived from SkipList (skiplist.h), but it optimizes
+// the memory layout by requiring that the key storage be allocated through
+// the skip list instance.  For the common case of SkipList<const char*,
+// Cmp> this saves 1 pointer per skip list node and gives better cache
+// locality, at the expense of wasted padding from using AllocateAligned
+// instead of Allocate for the keys.  The unused padding will be from
+// 0 to sizeof(void*)-1 bytes, and the space savings are sizeof(void*)
+// bytes, so despite the padding the space used is always less than
+// SkipList<const char*, ..>.
+//
+// Thread safety -------------
+//
+// Writes via Insert require external synchronization, most likely a mutex.
+// InsertConcurrently can be safely called concurrently with reads and
+// with other concurrent inserts.  Reads require a guarantee that the
+// InlineSkipList will not be destroyed while the read is in progress.
+// Apart from that, reads progress without any internal locking or
+// synchronization.
+//
+// Invariants:
+//
+// (1) Allocated nodes are never deleted until the InlineSkipList is
+// destroyed.  This is trivially guaranteed by the code since we never
+// delete any skip list nodes.
+//
+// (2) The contents of a Node except for the next/prev pointers are
+// immutable after the Node has been linked into the InlineSkipList.
+// Only Insert() modifies the list, and it is careful to initialize a
+// node and use release-stores to publish the nodes in one or more lists.
+//
+// ... prev vs. next pointer ordering ...
+//
+
+#pragma once
+#include <assert.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <atomic>
+#include <type_traits>
+
+#include "memory/allocator.h"
+#include "port/likely.h"
+#include "port/port.h"
+#include "rocksdb/slice.h"
+#include "util/coding.h"
+#include "util/random.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+template <class Comparator>
+class InlineSkipList {
+ private:
+  struct Node;
+  struct Splice;
+
+ public:
+  using DecodedKey =
+      typename std::remove_reference<Comparator>::type::DecodedType;
+
+  static const uint16_t kMaxPossibleHeight = 32;
+
+  // Create a new InlineSkipList object that will use "cmp" for comparing
+  // keys, and will allocate memory using "*allocator".  Objects allocated
+  // in the allocator must remain allocated for the lifetime of the
+  // skiplist object.
+  explicit InlineSkipList(Comparator cmp, Allocator* allocator,
+                          int32_t max_height = 12,
+                          int32_t branching_factor = 4);
+  // No copying allowed
+  InlineSkipList(const InlineSkipList&) = delete;
+  InlineSkipList& operator=(const InlineSkipList&) = delete;
+
+  // Allocates a key and a skip-list node, returning a pointer to the key
+  // portion of the node.  This method is thread-safe if the allocator
+  // is thread-safe.
+  char* AllocateKey(size_t key_size);
+
+  // Allocate a splice using allocator.
+  Splice* AllocateSplice();
+
+  // Allocate a splice on heap.
+  Splice* AllocateSpliceOnHeap();
+
+  // Inserts a key allocated by AllocateKey, after the actual key value
+  // has been filled in.
+  //
+  // REQUIRES: nothing that compares equal to key is currently in the list.
+  // REQUIRES: no concurrent calls to any of inserts.
+  bool Insert(const char* key);
+
+  // Inserts a key allocated by AllocateKey with a hint of last insert
+  // position in the skip-list. If hint points to nullptr, a new hint will be
+  // populated, which can be used in subsequent calls.
+  //
+  // It can be used to optimize the workload where there are multiple groups
+  // of keys, and each key is likely to insert to a location close to the last
+  // inserted key in the same group. One example is sequential inserts.
+  //
+  // REQUIRES: nothing that compares equal to key is currently in the list.
+  // REQUIRES: no concurrent calls to any of inserts.
+  bool InsertWithHint(const char* key, void** hint);
+
+  // Like InsertConcurrently, but with a hint
+  //
+  // REQUIRES: nothing that compares equal to key is currently in the list.
+  // REQUIRES: no concurrent calls that use same hint
+  bool InsertWithHintConcurrently(const char* key, void** hint);
+
+  // Like Insert, but external synchronization is not required.
+  bool InsertConcurrently(const char* key);
+
+  // Inserts a node into the skip list.  key must have been allocated by
+  // AllocateKey and then filled in by the caller.  If UseCAS is true,
+  // then external synchronization is not required, otherwise this method
+  // may not be called concurrently with any other insertions.
+  //
+  // Regardless of whether UseCAS is true, the splice must be owned
+  // exclusively by the current thread.  If allow_partial_splice_fix is
+  // true, then the cost of insertion is amortized O(log D), where D is
+  // the distance from the splice to the inserted key (measured as the
+  // number of intervening nodes).  Note that this bound is very good for
+  // sequential insertions!  If allow_partial_splice_fix is false then
+  // the existing splice will be ignored unless the current key is being
+  // inserted immediately after the splice.  allow_partial_splice_fix ==
+  // false has worse running time for the non-sequential case O(log N),
+  // but a better constant factor.
+  template <bool UseCAS>
+  bool Insert(const char* key, Splice* splice, bool allow_partial_splice_fix);
+
+  // Returns true iff an entry that compares equal to key is in the list.
+  bool Contains(const char* key) const;
+
+  // Return estimated number of entries smaller than `key`.
+  uint64_t EstimateCount(const char* key) const;
+
+  // Validate correctness of the skip-list.
+  void TEST_Validate() const;
+
+  // Iteration over the contents of a skip list
+  class Iterator {
+   public:
+    // Initialize an iterator over the specified list.
+    // The returned iterator is not valid.
+    explicit Iterator(const InlineSkipList* list);
+
+    // Change the underlying skiplist used for this iterator
+    // This enables us not changing the iterator without deallocating
+    // an old one and then allocating a new one
+    void SetList(const InlineSkipList* list);
+
+    // Returns true iff the iterator is positioned at a valid node.
+    bool Valid() const;
+
+    // Returns the key at the current position.
+    // REQUIRES: Valid()
+    const char* key() const;
+
+    // Advances to the next position.
+    // REQUIRES: Valid()
+    void Next();
+
+    // Advances to the previous position.
+    // REQUIRES: Valid()
+    void Prev();
+
+    // Advance to the first entry with a key >= target
+    void Seek(const char* target);
+
+    // Retreat to the last entry with a key <= target
+    void SeekForPrev(const char* target);
+
+    // Advance to a random entry in the list.
+    void RandomSeek();
+
+    // Position at the first entry in list.
+    // Final state of iterator is Valid() iff list is not empty.
+    void SeekToFirst();
+
+    // Position at the last entry in list.
+    // Final state of iterator is Valid() iff list is not empty.
+    void SeekToLast();
+
+   private:
+    const InlineSkipList* list_;
+    Node* node_;
+    // Intentionally copyable
+  };
+
+ private:
+  const uint16_t kMaxHeight_;
+  const uint16_t kBranching_;
+  const uint32_t kScaledInverseBranching_;
+
+  Allocator* const allocator_;  // Allocator used for allocations of nodes
+  // Immutable after construction
+  Comparator const compare_;
+  Node* const head_;
+
+  // Modified only by Insert().  Read racily by readers, but stale
+  // values are ok.
+  std::atomic<int> max_height_;  // Height of the entire list
+
+  // seq_splice_ is a Splice used for insertions in the non-concurrent
+  // case.  It caches the prev and next found during the most recent
+  // non-concurrent insertion.
+  Splice* seq_splice_;
+
+  inline int GetMaxHeight() const {
+    return max_height_.load(std::memory_order_relaxed);
+  }
+
+  int RandomHeight();
+
+  Node* AllocateNode(size_t key_size, int height);
+
+  bool Equal(const char* a, const char* b) const {
+    return (compare_(a, b) == 0);
+  }
+
+  bool LessThan(const char* a, const char* b) const {
+    return (compare_(a, b) < 0);
+  }
+
+  // Return true if key is greater than the data stored in "n".  Null n
+  // is considered infinite.  n should not be head_.
+  bool KeyIsAfterNode(const char* key, Node* n) const;
+  bool KeyIsAfterNode(const DecodedKey& key, Node* n) const;
+
+  // Returns the earliest node with a key >= key.
+  // Return nullptr if there is no such node.
+  Node* FindGreaterOrEqual(const char* key) const;
+
+  // Return the latest node with a key < key.
+  // Return head_ if there is no such node.
+  // Fills prev[level] with pointer to previous node at "level" for every
+  // level in [0..max_height_-1], if prev is non-null.
+  Node* FindLessThan(const char* key, Node** prev = nullptr) const;
+
+  // Return the latest node with a key < key on bottom_level. Start searching
+  // from root node on the level below top_level.
+  // Fills prev[level] with pointer to previous node at "level" for every
+  // level in [bottom_level..top_level-1], if prev is non-null.
+  Node* FindLessThan(const char* key, Node** prev, Node* root, int top_level,
+                     int bottom_level) const;
+
+  // Return the last node in the list.
+  // Return head_ if list is empty.
+  Node* FindLast() const;
+
+  // Returns a random entry.
+  Node* FindRandomEntry() const;
+
+  // Traverses a single level of the list, setting *out_prev to the last
+  // node before the key and *out_next to the first node after. Assumes
+  // that the key is not present in the skip list. On entry, before should
+  // point to a node that is before the key, and after should point to
+  // a node that is after the key.  after should be nullptr if a good after
+  // node isn't conveniently available.
+  template <bool prefetch_before>
+  void FindSpliceForLevel(const DecodedKey& key, Node* before, Node* after,
+                          int level, Node** out_prev, Node** out_next);
+
+  // Recomputes Splice levels from highest_level (inclusive) down to
+  // lowest_level (inclusive).
+  void RecomputeSpliceLevels(const DecodedKey& key, Splice* splice,
+                             int recompute_level);
+};
+
+// Implementation details follow
+
+template <class Comparator>
+struct InlineSkipList<Comparator>::Splice {
+  // The invariant of a Splice is that prev_[i+1].key <= prev_[i].key <
+  // next_[i].key <= next_[i+1].key for all i.  That means that if a
+  // key is bracketed by prev_[i] and next_[i] then it is bracketed by
+  // all higher levels.  It is _not_ required that prev_[i]->Next(i) ==
+  // next_[i] (it probably did at some point in the past, but intervening
+  // or concurrent operations might have inserted nodes in between).
+  int height_ = 0;
+  Node** prev_;
+  Node** next_;
+};
+
+// The Node data type is more of a pointer into custom-managed memory than
+// a traditional C++ struct.  The key is stored in the bytes immediately
+// after the struct, and the next_ pointers for nodes with height > 1 are
+// stored immediately _before_ the struct.  This avoids the need to include
+// any pointer or sizing data, which reduces per-node memory overheads.
+template <class Comparator>
+struct InlineSkipList<Comparator>::Node {
+  // Stores the height of the node in the memory location normally used for
+  // next_[0].  This is used for passing data from AllocateKey to Insert.
+  void StashHeight(const int height) {
+    assert(sizeof(int) <= sizeof(next_[0]));
+    memcpy(static_cast<void*>(&next_[0]), &height, sizeof(int));
+  }
+
+  // Retrieves the value passed to StashHeight.  Undefined after a call
+  // to SetNext or NoBarrier_SetNext.
+  int UnstashHeight() const {
+    int rv;
+    memcpy(&rv, &next_[0], sizeof(int));
+    return rv;
+  }
+
+  const char* Key() const { return reinterpret_cast<const char*>(&next_[1]); }
+
+  // Accessors/mutators for links.  Wrapped in methods so we can add
+  // the appropriate barriers as necessary, and perform the necessary
+  // addressing trickery for storing links below the Node in memory.
+  Node* Next(int n) {
+    assert(n >= 0);
+    // Use an 'acquire load' so that we observe a fully initialized
+    // version of the returned Node.
+    return ((&next_[0] - n)->load(std::memory_order_acquire));
+  }
+
+  void SetNext(int n, Node* x) {
+    assert(n >= 0);
+    // Use a 'release store' so that anybody who reads through this
+    // pointer observes a fully initialized version of the inserted node.
+    (&next_[0] - n)->store(x, std::memory_order_release);
+  }
+
+  bool CASNext(int n, Node* expected, Node* x) {
+    assert(n >= 0);
+    return (&next_[0] - n)->compare_exchange_strong(expected, x);
+  }
+
+  // No-barrier variants that can be safely used in a few locations.
+  Node* NoBarrier_Next(int n) {
+    assert(n >= 0);
+    return (&next_[0] - n)->load(std::memory_order_relaxed);
+  }
+
+  void NoBarrier_SetNext(int n, Node* x) {
+    assert(n >= 0);
+    (&next_[0] - n)->store(x, std::memory_order_relaxed);
+  }
+
+  // Insert node after prev on specific level.
+  void InsertAfter(Node* prev, int level) {
+    // NoBarrier_SetNext() suffices since we will add a barrier when
+    // we publish a pointer to "this" in prev.
+    NoBarrier_SetNext(level, prev->NoBarrier_Next(level));
+    prev->SetNext(level, this);
+  }
+
+ private:
+  // next_[0] is the lowest level link (level 0).  Higher levels are
+  // stored _earlier_, so level 1 is at next_[-1].
+  std::atomic<Node*> next_[1];
+};
+
+template <class Comparator>
+inline InlineSkipList<Comparator>::Iterator::Iterator(
+    const InlineSkipList* list) {
+  SetList(list);
+}
+
+template <class Comparator>
+inline void InlineSkipList<Comparator>::Iterator::SetList(
+    const InlineSkipList* list) {
+  list_ = list;
+  node_ = nullptr;
+}
+
+template <class Comparator>
+inline bool InlineSkipList<Comparator>::Iterator::Valid() const {
+  return node_ != nullptr;
+}
+
+template <class Comparator>
+inline const char* InlineSkipList<Comparator>::Iterator::key() const {
+  assert(Valid());
+  return node_->Key();
+}
+
+template <class Comparator>
+inline void InlineSkipList<Comparator>::Iterator::Next() {
+  assert(Valid());
+  node_ = node_->Next(0);
+}
+
+template <class Comparator>
+inline void InlineSkipList<Comparator>::Iterator::Prev() {
+  // Instead of using explicit "prev" links, we just search for the
+  // last node that falls before key.
+  assert(Valid());
+  node_ = list_->FindLessThan(node_->Key());
+  if (node_ == list_->head_) {
+    node_ = nullptr;
+  }
+}
+
+template <class Comparator>
+inline void InlineSkipList<Comparator>::Iterator::Seek(const char* target) {
+  node_ = list_->FindGreaterOrEqual(target);
+}
+
+template <class Comparator>
+inline void InlineSkipList<Comparator>::Iterator::SeekForPrev(
+    const char* target) {
+  Seek(target);
+  if (!Valid()) {
+    SeekToLast();
+  }
+  while (Valid() && list_->LessThan(target, key())) {
+    Prev();
+  }
+}
+
+template <class Comparator>
+inline void InlineSkipList<Comparator>::Iterator::RandomSeek() {
+  node_ = list_->FindRandomEntry();
+}
+
+template <class Comparator>
+inline void InlineSkipList<Comparator>::Iterator::SeekToFirst() {
+  node_ = list_->head_->Next(0);
+}
+
+template <class Comparator>
+inline void InlineSkipList<Comparator>::Iterator::SeekToLast() {
+  node_ = list_->FindLast();
+  if (node_ == list_->head_) {
+    node_ = nullptr;
+  }
+}
+
+template <class Comparator>
+int InlineSkipList<Comparator>::RandomHeight() {
+  auto rnd = Random::GetTLSInstance();
+
+  // Increase height with probability 1 in kBranching
+  int height = 1;
+  while (height < kMaxHeight_ && height < kMaxPossibleHeight &&
+         rnd->Next() < kScaledInverseBranching_) {
+    height++;
+  }
+  assert(height > 0);
+  assert(height <= kMaxHeight_);
+  assert(height <= kMaxPossibleHeight);
+  return height;
+}
+
+template <class Comparator>
+bool InlineSkipList<Comparator>::KeyIsAfterNode(const char* key,
+                                                Node* n) const {
+  // nullptr n is considered infinite
+  assert(n != head_);
+  return (n != nullptr) && (compare_(n->Key(), key) < 0);
+}
+
+template <class Comparator>
+bool InlineSkipList<Comparator>::KeyIsAfterNode(const DecodedKey& key,
+                                                Node* n) const {
+  // nullptr n is considered infinite
+  assert(n != head_);
+  return (n != nullptr) && (compare_(n->Key(), key) < 0);
+}
+
+template <class Comparator>
+typename InlineSkipList<Comparator>::Node*
+InlineSkipList<Comparator>::FindGreaterOrEqual(const char* key) const {
+  // Note: It looks like we could reduce duplication by implementing
+  // this function as FindLessThan(key)->Next(0), but we wouldn't be able
+  // to exit early on equality and the result wouldn't even be correct.
+  // A concurrent insert might occur after FindLessThan(key) but before
+  // we get a chance to call Next(0).
+  Node* x = head_;
+  int level = GetMaxHeight() - 1;
+  Node* last_bigger = nullptr;
+  const DecodedKey key_decoded = compare_.decode_key(key);
+  while (true) {
+    Node* next = x->Next(level);
+    if (next != nullptr) {
+      PREFETCH(next->Next(level), 0, 1);
+    }
+    // Make sure the lists are sorted
+    assert(x == head_ || next == nullptr || KeyIsAfterNode(next->Key(), x));
+    // Make sure we haven't overshot during our search
+    assert(x == head_ || KeyIsAfterNode(key_decoded, x));
+    int cmp = (next == nullptr || next == last_bigger)
+                  ? 1
+                  : compare_(next->Key(), key_decoded);
+    if (cmp == 0 || (cmp > 0 && level == 0)) {
+      return next;
+    } else if (cmp < 0) {
+      // Keep searching in this list
+      x = next;
+    } else {
+      // Switch to next list, reuse compare_() result
+      last_bigger = next;
+      level--;
+    }
+  }
+}
+
+template <class Comparator>
+typename InlineSkipList<Comparator>::Node*
+InlineSkipList<Comparator>::FindLessThan(const char* key, Node** prev) const {
+  return FindLessThan(key, prev, head_, GetMaxHeight(), 0);
+}
+
+template <class Comparator>
+typename InlineSkipList<Comparator>::Node*
+InlineSkipList<Comparator>::FindLessThan(const char* key, Node** prev,
+                                         Node* root, int top_level,
+                                         int bottom_level) const {
+  assert(top_level > bottom_level);
+  int level = top_level - 1;
+  Node* x = root;
+  // KeyIsAfter(key, last_not_after) is definitely false
+  Node* last_not_after = nullptr;
+  const DecodedKey key_decoded = compare_.decode_key(key);
+  while (true) {
+    assert(x != nullptr);
+    Node* next = x->Next(level);
+    if (next != nullptr) {
+      PREFETCH(next->Next(level), 0, 1);
+    }
+    assert(x == head_ || next == nullptr || KeyIsAfterNode(next->Key(), x));
+    assert(x == head_ || KeyIsAfterNode(key_decoded, x));
+    if (next != last_not_after && KeyIsAfterNode(key_decoded, next)) {
+      // Keep searching in this list
+      assert(next != nullptr);
+      x = next;
+    } else {
+      if (prev != nullptr) {
+        prev[level] = x;
+      }
+      if (level == bottom_level) {
+        return x;
+      } else {
+        // Switch to next list, reuse KeyIsAfterNode() result
+        last_not_after = next;
+        level--;
+      }
+    }
+  }
+}
+
+template <class Comparator>
+typename InlineSkipList<Comparator>::Node*
+InlineSkipList<Comparator>::FindLast() const {
+  Node* x = head_;
+  int level = GetMaxHeight() - 1;
+  while (true) {
+    Node* next = x->Next(level);
+    if (next == nullptr) {
+      if (level == 0) {
+        return x;
+      } else {
+        // Switch to next list
+        level--;
+      }
+    } else {
+      x = next;
+    }
+  }
+}
+
+template <class Comparator>
+typename InlineSkipList<Comparator>::Node*
+InlineSkipList<Comparator>::FindRandomEntry() const {
+  // TODO(bjlemaire): consider adding PREFETCH calls.
+  Node *x = head_, *scan_node = nullptr, *limit_node = nullptr;
+
+  // We start at the max level.
+  // FOr each level, we look at all the nodes at the level, and
+  // we randomly pick one of them. Then decrement the level
+  // and reiterate the process.
+  // eg: assume GetMaxHeight()=5, and there are #100 elements (nodes).
+  // level 4 nodes: lvl_nodes={#1, #15, #67, #84}. Randomly pick #15.
+  // We will consider all the nodes between #15 (inclusive) and #67
+  // (exclusive). #67 is called 'limit_node' here.
+  // level 3 nodes: lvl_nodes={#15, #21, #45, #51}. Randomly choose
+  // #51. #67 remains 'limit_node'.
+  // [...]
+  // level 0 nodes: lvl_nodes={#56,#57,#58,#59}. Randomly pick $57.
+  // Return Node #57.
+  std::vector<Node*> lvl_nodes;
+  Random* rnd = Random::GetTLSInstance();
+  int level = GetMaxHeight() - 1;
+
+  while (level >= 0) {
+    lvl_nodes.clear();
+    scan_node = x;
+    while (scan_node != limit_node) {
+      lvl_nodes.push_back(scan_node);
+      scan_node = scan_node->Next(level);
+    }
+    uint32_t rnd_idx = rnd->Next() % lvl_nodes.size();
+    x = lvl_nodes[rnd_idx];
+    if (rnd_idx + 1 < lvl_nodes.size()) {
+      limit_node = lvl_nodes[rnd_idx + 1];
+    }
+    level--;
+  }
+  // There is a special case where x could still be the head_
+  // (note that the head_ contains no key).
+  return x == head_ && head_ != nullptr ? head_->Next(0) : x;
+}
+
+template <class Comparator>
+uint64_t InlineSkipList<Comparator>::EstimateCount(const char* key) const {
+  uint64_t count = 0;
+
+  Node* x = head_;
+  int level = GetMaxHeight() - 1;
+  const DecodedKey key_decoded = compare_.decode_key(key);
+  while (true) {
+    assert(x == head_ || compare_(x->Key(), key_decoded) < 0);
+    Node* next = x->Next(level);
+    if (next != nullptr) {
+      PREFETCH(next->Next(level), 0, 1);
+    }
+    if (next == nullptr || compare_(next->Key(), key_decoded) >= 0) {
+      if (level == 0) {
+        return count;
+      } else {
+        // Switch to next list
+        count *= kBranching_;
+        level--;
+      }
+    } else {
+      x = next;
+      count++;
+    }
+  }
+}
+
+template <class Comparator>
+InlineSkipList<Comparator>::InlineSkipList(const Comparator cmp,
+                                           Allocator* allocator,
+                                           int32_t max_height,
+                                           int32_t branching_factor)
+    : kMaxHeight_(static_cast<uint16_t>(max_height)),
+      kBranching_(static_cast<uint16_t>(branching_factor)),
+      kScaledInverseBranching_((Random::kMaxNext + 1) / kBranching_),
+      allocator_(allocator),
+      compare_(cmp),
+      head_(AllocateNode(0, max_height)),
+      max_height_(1),
+      seq_splice_(AllocateSplice()) {
+  assert(max_height > 0 && kMaxHeight_ == static_cast<uint32_t>(max_height));
+  assert(branching_factor > 1 &&
+         kBranching_ == static_cast<uint32_t>(branching_factor));
+  assert(kScaledInverseBranching_ > 0);
+
+  for (int i = 0; i < kMaxHeight_; ++i) {
+    head_->SetNext(i, nullptr);
+  }
+}
+
+template <class Comparator>
+char* InlineSkipList<Comparator>::AllocateKey(size_t key_size) {
+  return const_cast<char*>(AllocateNode(key_size, RandomHeight())->Key());
+}
+
+template <class Comparator>
+typename InlineSkipList<Comparator>::Node*
+InlineSkipList<Comparator>::AllocateNode(size_t key_size, int height) {
+  auto prefix = sizeof(std::atomic<Node*>) * (height - 1);
+
+  // prefix is space for the height - 1 pointers that we store before
+  // the Node instance (next_[-(height - 1) .. -1]).  Node starts at
+  // raw + prefix, and holds the bottom-mode (level 0) skip list pointer
+  // next_[0].  key_size is the bytes for the key, which comes just after
+  // the Node.
+  char* raw = allocator_->AllocateAligned(prefix + sizeof(Node) + key_size);
+  Node* x = reinterpret_cast<Node*>(raw + prefix);
+
+  // Once we've linked the node into the skip list we don't actually need
+  // to know its height, because we can implicitly use the fact that we
+  // traversed into a node at level h to known that h is a valid level
+  // for that node.  We need to convey the height to the Insert step,
+  // however, so that it can perform the proper links.  Since we're not
+  // using the pointers at the moment, StashHeight temporarily borrow
+  // storage from next_[0] for that purpose.
+  x->StashHeight(height);
+  return x;
+}
+
+template <class Comparator>
+typename InlineSkipList<Comparator>::Splice*
+InlineSkipList<Comparator>::AllocateSplice() {
+  // size of prev_ and next_
+  size_t array_size = sizeof(Node*) * (kMaxHeight_ + 1);
+  char* raw = allocator_->AllocateAligned(sizeof(Splice) + array_size * 2);
+  Splice* splice = reinterpret_cast<Splice*>(raw);
+  splice->height_ = 0;
+  splice->prev_ = reinterpret_cast<Node**>(raw + sizeof(Splice));
+  splice->next_ = reinterpret_cast<Node**>(raw + sizeof(Splice) + array_size);
+  return splice;
+}
+
+template <class Comparator>
+typename InlineSkipList<Comparator>::Splice*
+InlineSkipList<Comparator>::AllocateSpliceOnHeap() {
+  size_t array_size = sizeof(Node*) * (kMaxHeight_ + 1);
+  char* raw = new char[sizeof(Splice) + array_size * 2];
+  Splice* splice = reinterpret_cast<Splice*>(raw);
+  splice->height_ = 0;
+  splice->prev_ = reinterpret_cast<Node**>(raw + sizeof(Splice));
+  splice->next_ = reinterpret_cast<Node**>(raw + sizeof(Splice) + array_size);
+  return splice;
+}
+
+template <class Comparator>
+bool InlineSkipList<Comparator>::Insert(const char* key) {
+  return Insert<false>(key, seq_splice_, false);
+}
+
+template <class Comparator>
+bool InlineSkipList<Comparator>::InsertConcurrently(const char* key) {
+  Node* prev[kMaxPossibleHeight];
+  Node* next[kMaxPossibleHeight];
+  Splice splice;
+  splice.prev_ = prev;
+  splice.next_ = next;
+  return Insert<true>(key, &splice, false);
+}
+
+template <class Comparator>
+bool InlineSkipList<Comparator>::InsertWithHint(const char* key, void** hint) {
+  assert(hint != nullptr);
+  Splice* splice = reinterpret_cast<Splice*>(*hint);
+  if (splice == nullptr) {
+    splice = AllocateSplice();
+    *hint = reinterpret_cast<void*>(splice);
+  }
+  return Insert<false>(key, splice, true);
+}
+
+template <class Comparator>
+bool InlineSkipList<Comparator>::InsertWithHintConcurrently(const char* key,
+                                                            void** hint) {
+  assert(hint != nullptr);
+  Splice* splice = reinterpret_cast<Splice*>(*hint);
+  if (splice == nullptr) {
+    splice = AllocateSpliceOnHeap();
+    *hint = reinterpret_cast<void*>(splice);
+  }
+  return Insert<true>(key, splice, true);
+}
+
+template <class Comparator>
+template <bool prefetch_before>
+void InlineSkipList<Comparator>::FindSpliceForLevel(const DecodedKey& key,
+                                                    Node* before, Node* after,
+                                                    int level, Node** out_prev,
+                                                    Node** out_next) {
+  while (true) {
+    Node* next = before->Next(level);
+    if (next != nullptr) {
+      PREFETCH(next->Next(level), 0, 1);
+    }
+    if (prefetch_before == true) {
+      if (next != nullptr && level > 0) {
+        PREFETCH(next->Next(level - 1), 0, 1);
+      }
+    }
+    assert(before == head_ || next == nullptr ||
+           KeyIsAfterNode(next->Key(), before));
+    assert(before == head_ || KeyIsAfterNode(key, before));
+    if (next == after || !KeyIsAfterNode(key, next)) {
+      // found it
+      *out_prev = before;
+      *out_next = next;
+      return;
+    }
+    before = next;
+  }
+}
+
+template <class Comparator>
+void InlineSkipList<Comparator>::RecomputeSpliceLevels(const DecodedKey& key,
+                                                       Splice* splice,
+                                                       int recompute_level) {
+  assert(recompute_level > 0);
+  assert(recompute_level <= splice->height_);
+  for (int i = recompute_level - 1; i >= 0; --i) {
+    FindSpliceForLevel<true>(key, splice->prev_[i + 1], splice->next_[i + 1], i,
+                             &splice->prev_[i], &splice->next_[i]);
+  }
+}
+
+template <class Comparator>
+template <bool UseCAS>
+bool InlineSkipList<Comparator>::Insert(const char* key, Splice* splice,
+                                        bool allow_partial_splice_fix) {
+  Node* x = reinterpret_cast<Node*>(const_cast<char*>(key)) - 1;
+  const DecodedKey key_decoded = compare_.decode_key(key);
+  int height = x->UnstashHeight();
+  assert(height >= 1 && height <= kMaxHeight_);
+
+  int max_height = max_height_.load(std::memory_order_relaxed);
+  while (height > max_height) {
+    if (max_height_.compare_exchange_weak(max_height, height)) {
+      // successfully updated it
+      max_height = height;
+      break;
+    }
+    // else retry, possibly exiting the loop because somebody else
+    // increased it
+  }
+  assert(max_height <= kMaxPossibleHeight);
+
+  int recompute_height = 0;
+  if (splice->height_ < max_height) {
+    // Either splice has never been used or max_height has grown since
+    // last use.  We could potentially fix it in the latter case, but
+    // that is tricky.
+    splice->prev_[max_height] = head_;
+    splice->next_[max_height] = nullptr;
+    splice->height_ = max_height;
+    recompute_height = max_height;
+  } else {
+    // Splice is a valid proper-height splice that brackets some
+    // key, but does it bracket this one?  We need to validate it and
+    // recompute a portion of the splice (levels 0..recompute_height-1)
+    // that is a superset of all levels that don't bracket the new key.
+    // Several choices are reasonable, because we have to balance the work
+    // saved against the extra comparisons required to validate the Splice.
+    //
+    // One strategy is just to recompute all of orig_splice_height if the
+    // bottom level isn't bracketing.  This pessimistically assumes that
+    // we will either get a perfect Splice hit (increasing sequential
+    // inserts) or have no locality.
+    //
+    // Another strategy is to walk up the Splice's levels until we find
+    // a level that brackets the key.  This strategy lets the Splice
+    // hint help for other cases: it turns insertion from O(log N) into
+    // O(log D), where D is the number of nodes in between the key that
+    // produced the Splice and the current insert (insertion is aided
+    // whether the new key is before or after the splice).  If you have
+    // a way of using a prefix of the key to map directly to the closest
+    // Splice out of O(sqrt(N)) Splices and we make it so that splices
+    // can also be used as hints during read, then we end up with Oshman's
+    // and Shavit's SkipTrie, which has O(log log N) lookup and insertion
+    // (compare to O(log N) for skip list).
+    //
+    // We control the pessimistic strategy with allow_partial_splice_fix.
+    // A good strategy is probably to be pessimistic for seq_splice_,
+    // optimistic if the caller actually went to the work of providing
+    // a Splice.
+    while (recompute_height < max_height) {
+      if (splice->prev_[recompute_height]->Next(recompute_height) !=
+          splice->next_[recompute_height]) {
+        // splice isn't tight at this level, there must have been some inserts
+        // to this
+        // location that didn't update the splice.  We might only be a little
+        // stale, but if
+        // the splice is very stale it would be O(N) to fix it.  We haven't used
+        // up any of
+        // our budget of comparisons, so always move up even if we are
+        // pessimistic about
+        // our chances of success.
+        ++recompute_height;
+      } else if (splice->prev_[recompute_height] != head_ &&
+                 !KeyIsAfterNode(key_decoded,
+                                 splice->prev_[recompute_height])) {
+        // key is from before splice
+        if (allow_partial_splice_fix) {
+          // skip all levels with the same node without more comparisons
+          Node* bad = splice->prev_[recompute_height];
+          while (splice->prev_[recompute_height] == bad) {
+            ++recompute_height;
+          }
+        } else {
+          // we're pessimistic, recompute everything
+          recompute_height = max_height;
+        }
+      } else if (KeyIsAfterNode(key_decoded, splice->next_[recompute_height])) {
+        // key is from after splice
+        if (allow_partial_splice_fix) {
+          Node* bad = splice->next_[recompute_height];
+          while (splice->next_[recompute_height] == bad) {
+            ++recompute_height;
+          }
+        } else {
+          recompute_height = max_height;
+        }
+      } else {
+        // this level brackets the key, we won!
+        break;
+      }
+    }
+  }
+  assert(recompute_height <= max_height);
+  if (recompute_height > 0) {
+    RecomputeSpliceLevels(key_decoded, splice, recompute_height);
+  }
+
+  bool splice_is_valid = true;
+  if (UseCAS) {
+    for (int i = 0; i < height; ++i) {
+      while (true) {
+        // Checking for duplicate keys on the level 0 is sufficient
+        if (UNLIKELY(i == 0 && splice->next_[i] != nullptr &&
+                     compare_(x->Key(), splice->next_[i]->Key()) >= 0)) {
+          // duplicate key
+          return false;
+        }
+        if (UNLIKELY(i == 0 && splice->prev_[i] != head_ &&
+                     compare_(splice->prev_[i]->Key(), x->Key()) >= 0)) {
+          // duplicate key
+          return false;
+        }
+        assert(splice->next_[i] == nullptr ||
+               compare_(x->Key(), splice->next_[i]->Key()) < 0);
+        assert(splice->prev_[i] == head_ ||
+               compare_(splice->prev_[i]->Key(), x->Key()) < 0);
+        x->NoBarrier_SetNext(i, splice->next_[i]);
+        if (splice->prev_[i]->CASNext(i, splice->next_[i], x)) {
+          // success
+          break;
+        }
+        // CAS failed, we need to recompute prev and next. It is unlikely
+        // to be helpful to try to use a different level as we redo the
+        // search, because it should be unlikely that lots of nodes have
+        // been inserted between prev[i] and next[i]. No point in using
+        // next[i] as the after hint, because we know it is stale.
+        FindSpliceForLevel<false>(key_decoded, splice->prev_[i], nullptr, i,
+                                  &splice->prev_[i], &splice->next_[i]);
+
+        // Since we've narrowed the bracket for level i, we might have
+        // violated the Splice constraint between i and i-1.  Make sure
+        // we recompute the whole thing next time.
+        if (i > 0) {
+          splice_is_valid = false;
+        }
+      }
+    }
+  } else {
+    for (int i = 0; i < height; ++i) {
+      if (i >= recompute_height &&
+          splice->prev_[i]->Next(i) != splice->next_[i]) {
+        FindSpliceForLevel<false>(key_decoded, splice->prev_[i], nullptr, i,
+                                  &splice->prev_[i], &splice->next_[i]);
+      }
+      // Checking for duplicate keys on the level 0 is sufficient
+      if (UNLIKELY(i == 0 && splice->next_[i] != nullptr &&
+                   compare_(x->Key(), splice->next_[i]->Key()) >= 0)) {
+        // duplicate key
+        return false;
+      }
+      if (UNLIKELY(i == 0 && splice->prev_[i] != head_ &&
+                   compare_(splice->prev_[i]->Key(), x->Key()) >= 0)) {
+        // duplicate key
+        return false;
+      }
+      assert(splice->next_[i] == nullptr ||
+             compare_(x->Key(), splice->next_[i]->Key()) < 0);
+      assert(splice->prev_[i] == head_ ||
+             compare_(splice->prev_[i]->Key(), x->Key()) < 0);
+      assert(splice->prev_[i]->Next(i) == splice->next_[i]);
+      x->NoBarrier_SetNext(i, splice->next_[i]);
+      splice->prev_[i]->SetNext(i, x);
+    }
+  }
+  if (splice_is_valid) {
+    for (int i = 0; i < height; ++i) {
+      splice->prev_[i] = x;
+    }
+    assert(splice->prev_[splice->height_] == head_);
+    assert(splice->next_[splice->height_] == nullptr);
+    for (int i = 0; i < splice->height_; ++i) {
+      assert(splice->next_[i] == nullptr ||
+             compare_(key, splice->next_[i]->Key()) < 0);
+      assert(splice->prev_[i] == head_ ||
+             compare_(splice->prev_[i]->Key(), key) <= 0);
+      assert(splice->prev_[i + 1] == splice->prev_[i] ||
+             splice->prev_[i + 1] == head_ ||
+             compare_(splice->prev_[i + 1]->Key(), splice->prev_[i]->Key()) <
+                 0);
+      assert(splice->next_[i + 1] == splice->next_[i] ||
+             splice->next_[i + 1] == nullptr ||
+             compare_(splice->next_[i]->Key(), splice->next_[i + 1]->Key()) <
+                 0);
+    }
+  } else {
+    splice->height_ = 0;
+  }
+  return true;
+}
+
+template <class Comparator>
+bool InlineSkipList<Comparator>::Contains(const char* key) const {
+  Node* x = FindGreaterOrEqual(key);
+  if (x != nullptr && Equal(key, x->Key())) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+template <class Comparator>
+void InlineSkipList<Comparator>::TEST_Validate() const {
+  // Interate over all levels at the same time, and verify nodes appear in
+  // the right order, and nodes appear in upper level also appear in lower
+  // levels.
+  Node* nodes[kMaxPossibleHeight];
+  int max_height = GetMaxHeight();
+  assert(max_height > 0);
+  for (int i = 0; i < max_height; i++) {
+    nodes[i] = head_;
+  }
+  while (nodes[0] != nullptr) {
+    Node* l0_next = nodes[0]->Next(0);
+    if (l0_next == nullptr) {
+      break;
+    }
+    assert(nodes[0] == head_ || compare_(nodes[0]->Key(), l0_next->Key()) < 0);
+    nodes[0] = l0_next;
+
+    int i = 1;
+    while (i < max_height) {
+      Node* next = nodes[i]->Next(i);
+      if (next == nullptr) {
+        break;
+      }
+      auto cmp = compare_(nodes[0]->Key(), next->Key());
+      assert(cmp <= 0);
+      if (cmp == 0) {
+        assert(next == nodes[0]);
+        nodes[i] = next;
+      } else {
+        break;
+      }
+      i++;
+    }
+  }
+  for (int i = 1; i < max_height; i++) {
+    assert(nodes[i] != nullptr && nodes[i]->Next(i) == nullptr);
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/skiplist.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/skiplist.h
new file mode 100644
index 0000000000..e3cecd30c1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/skiplist.h
@@ -0,0 +1,498 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Thread safety
+// -------------
+//
+// Writes require external synchronization, most likely a mutex.
+// Reads require a guarantee that the SkipList will not be destroyed
+// while the read is in progress.  Apart from that, reads progress
+// without any internal locking or synchronization.
+//
+// Invariants:
+//
+// (1) Allocated nodes are never deleted until the SkipList is
+// destroyed.  This is trivially guaranteed by the code since we
+// never delete any skip list nodes.
+//
+// (2) The contents of a Node except for the next/prev pointers are
+// immutable after the Node has been linked into the SkipList.
+// Only Insert() modifies the list, and it is careful to initialize
+// a node and use release-stores to publish the nodes in one or
+// more lists.
+//
+// ... prev vs. next pointer ordering ...
+//
+
+#pragma once
+#include <assert.h>
+#include <stdlib.h>
+
+#include <atomic>
+
+#include "memory/allocator.h"
+#include "port/port.h"
+#include "util/random.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+template <typename Key, class Comparator>
+class SkipList {
+ private:
+  struct Node;
+
+ public:
+  // Create a new SkipList object that will use "cmp" for comparing keys,
+  // and will allocate memory using "*allocator".  Objects allocated in the
+  // allocator must remain allocated for the lifetime of the skiplist object.
+  explicit SkipList(Comparator cmp, Allocator* allocator,
+                    int32_t max_height = 12, int32_t branching_factor = 4);
+  // No copying allowed
+  SkipList(const SkipList&) = delete;
+  void operator=(const SkipList&) = delete;
+
+  // Insert key into the list.
+  // REQUIRES: nothing that compares equal to key is currently in the list.
+  void Insert(const Key& key);
+
+  // Returns true iff an entry that compares equal to key is in the list.
+  bool Contains(const Key& key) const;
+
+  // Return estimated number of entries smaller than `key`.
+  uint64_t EstimateCount(const Key& key) const;
+
+  // Iteration over the contents of a skip list
+  class Iterator {
+   public:
+    // Initialize an iterator over the specified list.
+    // The returned iterator is not valid.
+    explicit Iterator(const SkipList* list);
+
+    // Change the underlying skiplist used for this iterator
+    // This enables us not changing the iterator without deallocating
+    // an old one and then allocating a new one
+    void SetList(const SkipList* list);
+
+    // Returns true iff the iterator is positioned at a valid node.
+    bool Valid() const;
+
+    // Returns the key at the current position.
+    // REQUIRES: Valid()
+    const Key& key() const;
+
+    // Advances to the next position.
+    // REQUIRES: Valid()
+    void Next();
+
+    // Advances to the previous position.
+    // REQUIRES: Valid()
+    void Prev();
+
+    // Advance to the first entry with a key >= target
+    void Seek(const Key& target);
+
+    // Retreat to the last entry with a key <= target
+    void SeekForPrev(const Key& target);
+
+    // Position at the first entry in list.
+    // Final state of iterator is Valid() iff list is not empty.
+    void SeekToFirst();
+
+    // Position at the last entry in list.
+    // Final state of iterator is Valid() iff list is not empty.
+    void SeekToLast();
+
+   private:
+    const SkipList* list_;
+    Node* node_;
+    // Intentionally copyable
+  };
+
+ private:
+  const uint16_t kMaxHeight_;
+  const uint16_t kBranching_;
+  const uint32_t kScaledInverseBranching_;
+
+  // Immutable after construction
+  Comparator const compare_;
+  Allocator* const allocator_;  // Allocator used for allocations of nodes
+
+  Node* const head_;
+
+  // Modified only by Insert().  Read racily by readers, but stale
+  // values are ok.
+  std::atomic<int> max_height_;  // Height of the entire list
+
+  // Used for optimizing sequential insert patterns.  Tricky.  prev_[i] for
+  // i up to max_height_ is the predecessor of prev_[0] and prev_height_
+  // is the height of prev_[0].  prev_[0] can only be equal to head before
+  // insertion, in which case max_height_ and prev_height_ are 1.
+  Node** prev_;
+  int32_t prev_height_;
+
+  inline int GetMaxHeight() const {
+    return max_height_.load(std::memory_order_relaxed);
+  }
+
+  Node* NewNode(const Key& key, int height);
+  int RandomHeight();
+  bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }
+  bool LessThan(const Key& a, const Key& b) const {
+    return (compare_(a, b) < 0);
+  }
+
+  // Return true if key is greater than the data stored in "n"
+  bool KeyIsAfterNode(const Key& key, Node* n) const;
+
+  // Returns the earliest node with a key >= key.
+  // Return nullptr if there is no such node.
+  Node* FindGreaterOrEqual(const Key& key) const;
+
+  // Return the latest node with a key < key.
+  // Return head_ if there is no such node.
+  // Fills prev[level] with pointer to previous node at "level" for every
+  // level in [0..max_height_-1], if prev is non-null.
+  Node* FindLessThan(const Key& key, Node** prev = nullptr) const;
+
+  // Return the last node in the list.
+  // Return head_ if list is empty.
+  Node* FindLast() const;
+};
+
+// Implementation details follow
+template <typename Key, class Comparator>
+struct SkipList<Key, Comparator>::Node {
+  explicit Node(const Key& k) : key(k) {}
+
+  Key const key;
+
+  // Accessors/mutators for links.  Wrapped in methods so we can
+  // add the appropriate barriers as necessary.
+  Node* Next(int n) {
+    assert(n >= 0);
+    // Use an 'acquire load' so that we observe a fully initialized
+    // version of the returned Node.
+    return (next_[n].load(std::memory_order_acquire));
+  }
+  void SetNext(int n, Node* x) {
+    assert(n >= 0);
+    // Use a 'release store' so that anybody who reads through this
+    // pointer observes a fully initialized version of the inserted node.
+    next_[n].store(x, std::memory_order_release);
+  }
+
+  // No-barrier variants that can be safely used in a few locations.
+  Node* NoBarrier_Next(int n) {
+    assert(n >= 0);
+    return next_[n].load(std::memory_order_relaxed);
+  }
+  void NoBarrier_SetNext(int n, Node* x) {
+    assert(n >= 0);
+    next_[n].store(x, std::memory_order_relaxed);
+  }
+
+ private:
+  // Array of length equal to the node height.  next_[0] is lowest level link.
+  std::atomic<Node*> next_[1];
+};
+
+template <typename Key, class Comparator>
+typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::NewNode(
+    const Key& key, int height) {
+  char* mem = allocator_->AllocateAligned(
+      sizeof(Node) + sizeof(std::atomic<Node*>) * (height - 1));
+  return new (mem) Node(key);
+}
+
+template <typename Key, class Comparator>
+inline SkipList<Key, Comparator>::Iterator::Iterator(const SkipList* list) {
+  SetList(list);
+}
+
+template <typename Key, class Comparator>
+inline void SkipList<Key, Comparator>::Iterator::SetList(const SkipList* list) {
+  list_ = list;
+  node_ = nullptr;
+}
+
+template <typename Key, class Comparator>
+inline bool SkipList<Key, Comparator>::Iterator::Valid() const {
+  return node_ != nullptr;
+}
+
+template <typename Key, class Comparator>
+inline const Key& SkipList<Key, Comparator>::Iterator::key() const {
+  assert(Valid());
+  return node_->key;
+}
+
+template <typename Key, class Comparator>
+inline void SkipList<Key, Comparator>::Iterator::Next() {
+  assert(Valid());
+  node_ = node_->Next(0);
+}
+
+template <typename Key, class Comparator>
+inline void SkipList<Key, Comparator>::Iterator::Prev() {
+  // Instead of using explicit "prev" links, we just search for the
+  // last node that falls before key.
+  assert(Valid());
+  node_ = list_->FindLessThan(node_->key);
+  if (node_ == list_->head_) {
+    node_ = nullptr;
+  }
+}
+
+template <typename Key, class Comparator>
+inline void SkipList<Key, Comparator>::Iterator::Seek(const Key& target) {
+  node_ = list_->FindGreaterOrEqual(target);
+}
+
+template <typename Key, class Comparator>
+inline void SkipList<Key, Comparator>::Iterator::SeekForPrev(
+    const Key& target) {
+  Seek(target);
+  if (!Valid()) {
+    SeekToLast();
+  }
+  while (Valid() && list_->LessThan(target, key())) {
+    Prev();
+  }
+}
+
+template <typename Key, class Comparator>
+inline void SkipList<Key, Comparator>::Iterator::SeekToFirst() {
+  node_ = list_->head_->Next(0);
+}
+
+template <typename Key, class Comparator>
+inline void SkipList<Key, Comparator>::Iterator::SeekToLast() {
+  node_ = list_->FindLast();
+  if (node_ == list_->head_) {
+    node_ = nullptr;
+  }
+}
+
+template <typename Key, class Comparator>
+int SkipList<Key, Comparator>::RandomHeight() {
+  auto rnd = Random::GetTLSInstance();
+
+  // Increase height with probability 1 in kBranching
+  int height = 1;
+  while (height < kMaxHeight_ && rnd->Next() < kScaledInverseBranching_) {
+    height++;
+  }
+  assert(height > 0);
+  assert(height <= kMaxHeight_);
+  return height;
+}
+
+template <typename Key, class Comparator>
+bool SkipList<Key, Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
+  // nullptr n is considered infinite
+  return (n != nullptr) && (compare_(n->key, key) < 0);
+}
+
+template <typename Key, class Comparator>
+typename SkipList<Key, Comparator>::Node*
+SkipList<Key, Comparator>::FindGreaterOrEqual(const Key& key) const {
+  // Note: It looks like we could reduce duplication by implementing
+  // this function as FindLessThan(key)->Next(0), but we wouldn't be able
+  // to exit early on equality and the result wouldn't even be correct.
+  // A concurrent insert might occur after FindLessThan(key) but before
+  // we get a chance to call Next(0).
+  Node* x = head_;
+  int level = GetMaxHeight() - 1;
+  Node* last_bigger = nullptr;
+  while (true) {
+    assert(x != nullptr);
+    Node* next = x->Next(level);
+    // Make sure the lists are sorted
+    assert(x == head_ || next == nullptr || KeyIsAfterNode(next->key, x));
+    // Make sure we haven't overshot during our search
+    assert(x == head_ || KeyIsAfterNode(key, x));
+    int cmp =
+        (next == nullptr || next == last_bigger) ? 1 : compare_(next->key, key);
+    if (cmp == 0 || (cmp > 0 && level == 0)) {
+      return next;
+    } else if (cmp < 0) {
+      // Keep searching in this list
+      x = next;
+    } else {
+      // Switch to next list, reuse compare_() result
+      last_bigger = next;
+      level--;
+    }
+  }
+}
+
+template <typename Key, class Comparator>
+typename SkipList<Key, Comparator>::Node*
+SkipList<Key, Comparator>::FindLessThan(const Key& key, Node** prev) const {
+  Node* x = head_;
+  int level = GetMaxHeight() - 1;
+  // KeyIsAfter(key, last_not_after) is definitely false
+  Node* last_not_after = nullptr;
+  while (true) {
+    assert(x != nullptr);
+    Node* next = x->Next(level);
+    assert(x == head_ || next == nullptr || KeyIsAfterNode(next->key, x));
+    assert(x == head_ || KeyIsAfterNode(key, x));
+    if (next != last_not_after && KeyIsAfterNode(key, next)) {
+      // Keep searching in this list
+      x = next;
+    } else {
+      if (prev != nullptr) {
+        prev[level] = x;
+      }
+      if (level == 0) {
+        return x;
+      } else {
+        // Switch to next list, reuse KeyIUsAfterNode() result
+        last_not_after = next;
+        level--;
+      }
+    }
+  }
+}
+
+template <typename Key, class Comparator>
+typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::FindLast()
+    const {
+  Node* x = head_;
+  int level = GetMaxHeight() - 1;
+  while (true) {
+    Node* next = x->Next(level);
+    if (next == nullptr) {
+      if (level == 0) {
+        return x;
+      } else {
+        // Switch to next list
+        level--;
+      }
+    } else {
+      x = next;
+    }
+  }
+}
+
+template <typename Key, class Comparator>
+uint64_t SkipList<Key, Comparator>::EstimateCount(const Key& key) const {
+  uint64_t count = 0;
+
+  Node* x = head_;
+  int level = GetMaxHeight() - 1;
+  while (true) {
+    assert(x == head_ || compare_(x->key, key) < 0);
+    Node* next = x->Next(level);
+    if (next == nullptr || compare_(next->key, key) >= 0) {
+      if (level == 0) {
+        return count;
+      } else {
+        // Switch to next list
+        count *= kBranching_;
+        level--;
+      }
+    } else {
+      x = next;
+      count++;
+    }
+  }
+}
+
+template <typename Key, class Comparator>
+SkipList<Key, Comparator>::SkipList(const Comparator cmp, Allocator* allocator,
+                                    int32_t max_height,
+                                    int32_t branching_factor)
+    : kMaxHeight_(static_cast<uint16_t>(max_height)),
+      kBranching_(static_cast<uint16_t>(branching_factor)),
+      kScaledInverseBranching_((Random::kMaxNext + 1) / kBranching_),
+      compare_(cmp),
+      allocator_(allocator),
+      head_(NewNode(0 /* any key will do */, max_height)),
+      max_height_(1),
+      prev_height_(1) {
+  assert(max_height > 0 && kMaxHeight_ == static_cast<uint32_t>(max_height));
+  assert(branching_factor > 0 &&
+         kBranching_ == static_cast<uint32_t>(branching_factor));
+  assert(kScaledInverseBranching_ > 0);
+  // Allocate the prev_ Node* array, directly from the passed-in allocator.
+  // prev_ does not need to be freed, as its life cycle is tied up with
+  // the allocator as a whole.
+  prev_ = reinterpret_cast<Node**>(
+      allocator_->AllocateAligned(sizeof(Node*) * kMaxHeight_));
+  for (int i = 0; i < kMaxHeight_; i++) {
+    head_->SetNext(i, nullptr);
+    prev_[i] = head_;
+  }
+}
+
+template <typename Key, class Comparator>
+void SkipList<Key, Comparator>::Insert(const Key& key) {
+  // fast path for sequential insertion
+  if (!KeyIsAfterNode(key, prev_[0]->NoBarrier_Next(0)) &&
+      (prev_[0] == head_ || KeyIsAfterNode(key, prev_[0]))) {
+    assert(prev_[0] != head_ || (prev_height_ == 1 && GetMaxHeight() == 1));
+
+    // Outside of this method prev_[1..max_height_] is the predecessor
+    // of prev_[0], and prev_height_ refers to prev_[0].  Inside Insert
+    // prev_[0..max_height - 1] is the predecessor of key.  Switch from
+    // the external state to the internal
+    for (int i = 1; i < prev_height_; i++) {
+      prev_[i] = prev_[0];
+    }
+  } else {
+    // TODO(opt): we could use a NoBarrier predecessor search as an
+    // optimization for architectures where memory_order_acquire needs
+    // a synchronization instruction.  Doesn't matter on x86
+    FindLessThan(key, prev_);
+  }
+
+  // Our data structure does not allow duplicate insertion
+  assert(prev_[0]->Next(0) == nullptr || !Equal(key, prev_[0]->Next(0)->key));
+
+  int height = RandomHeight();
+  if (height > GetMaxHeight()) {
+    for (int i = GetMaxHeight(); i < height; i++) {
+      prev_[i] = head_;
+    }
+    // fprintf(stderr, "Change height from %d to %d\n", max_height_, height);
+
+    // It is ok to mutate max_height_ without any synchronization
+    // with concurrent readers.  A concurrent reader that observes
+    // the new value of max_height_ will see either the old value of
+    // new level pointers from head_ (nullptr), or a new value set in
+    // the loop below.  In the former case the reader will
+    // immediately drop to the next level since nullptr sorts after all
+    // keys.  In the latter case the reader will use the new node.
+    max_height_.store(height, std::memory_order_relaxed);
+  }
+
+  Node* x = NewNode(key, height);
+  for (int i = 0; i < height; i++) {
+    // NoBarrier_SetNext() suffices since we will add a barrier when
+    // we publish a pointer to "x" in prev[i].
+    x->NoBarrier_SetNext(i, prev_[i]->NoBarrier_Next(i));
+    prev_[i]->SetNext(i, x);
+  }
+  prev_[0] = x;
+  prev_height_ = height;
+}
+
+template <typename Key, class Comparator>
+bool SkipList<Key, Comparator>::Contains(const Key& key) const {
+  Node* x = FindGreaterOrEqual(key);
+  if (x != nullptr && Equal(key, x->key)) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/skiplistrep.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/skiplistrep.cc
new file mode 100644
index 0000000000..c3b4c785d3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/skiplistrep.cc
@@ -0,0 +1,368 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include <random>
+
+#include "db/memtable.h"
+#include "memory/arena.h"
+#include "memtable/inlineskiplist.h"
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+class SkipListRep : public MemTableRep {
+  InlineSkipList<const MemTableRep::KeyComparator&> skip_list_;
+  const MemTableRep::KeyComparator& cmp_;
+  const SliceTransform* transform_;
+  const size_t lookahead_;
+
+  friend class LookaheadIterator;
+
+ public:
+  explicit SkipListRep(const MemTableRep::KeyComparator& compare,
+                       Allocator* allocator, const SliceTransform* transform,
+                       const size_t lookahead)
+      : MemTableRep(allocator),
+        skip_list_(compare, allocator),
+        cmp_(compare),
+        transform_(transform),
+        lookahead_(lookahead) {}
+
+  KeyHandle Allocate(const size_t len, char** buf) override {
+    *buf = skip_list_.AllocateKey(len);
+    return static_cast<KeyHandle>(*buf);
+  }
+
+  // Insert key into the list.
+  // REQUIRES: nothing that compares equal to key is currently in the list.
+  void Insert(KeyHandle handle) override {
+    skip_list_.Insert(static_cast<char*>(handle));
+  }
+
+  bool InsertKey(KeyHandle handle) override {
+    return skip_list_.Insert(static_cast<char*>(handle));
+  }
+
+  void InsertWithHint(KeyHandle handle, void** hint) override {
+    skip_list_.InsertWithHint(static_cast<char*>(handle), hint);
+  }
+
+  bool InsertKeyWithHint(KeyHandle handle, void** hint) override {
+    return skip_list_.InsertWithHint(static_cast<char*>(handle), hint);
+  }
+
+  void InsertWithHintConcurrently(KeyHandle handle, void** hint) override {
+    skip_list_.InsertWithHintConcurrently(static_cast<char*>(handle), hint);
+  }
+
+  bool InsertKeyWithHintConcurrently(KeyHandle handle, void** hint) override {
+    return skip_list_.InsertWithHintConcurrently(static_cast<char*>(handle),
+                                                 hint);
+  }
+
+  void InsertConcurrently(KeyHandle handle) override {
+    skip_list_.InsertConcurrently(static_cast<char*>(handle));
+  }
+
+  bool InsertKeyConcurrently(KeyHandle handle) override {
+    return skip_list_.InsertConcurrently(static_cast<char*>(handle));
+  }
+
+  // Returns true iff an entry that compares equal to key is in the list.
+  bool Contains(const char* key) const override {
+    return skip_list_.Contains(key);
+  }
+
+  size_t ApproximateMemoryUsage() override {
+    // All memory is allocated through allocator; nothing to report here
+    return 0;
+  }
+
+  void Get(const LookupKey& k, void* callback_args,
+           bool (*callback_func)(void* arg, const char* entry)) override {
+    SkipListRep::Iterator iter(&skip_list_);
+    Slice dummy_slice;
+    for (iter.Seek(dummy_slice, k.memtable_key().data());
+         iter.Valid() && callback_func(callback_args, iter.key());
+         iter.Next()) {
+    }
+  }
+
+  uint64_t ApproximateNumEntries(const Slice& start_ikey,
+                                 const Slice& end_ikey) override {
+    std::string tmp;
+    uint64_t start_count =
+        skip_list_.EstimateCount(EncodeKey(&tmp, start_ikey));
+    uint64_t end_count = skip_list_.EstimateCount(EncodeKey(&tmp, end_ikey));
+    return (end_count >= start_count) ? (end_count - start_count) : 0;
+  }
+
+  void UniqueRandomSample(const uint64_t num_entries,
+                          const uint64_t target_sample_size,
+                          std::unordered_set<const char*>* entries) override {
+    entries->clear();
+    // Avoid divide-by-0.
+    assert(target_sample_size > 0);
+    assert(num_entries > 0);
+    // NOTE: the size of entries is not enforced to be exactly
+    // target_sample_size at the end of this function, it might be slightly
+    // greater or smaller.
+    SkipListRep::Iterator iter(&skip_list_);
+    // There are two methods to create the subset of samples (size m)
+    // from the table containing N elements:
+    // 1-Iterate linearly through the N memtable entries. For each entry i,
+    //   add it to the sample set with a probability
+    //   (target_sample_size - entries.size() ) / (N-i).
+    //
+    // 2-Pick m random elements without repetition.
+    // We pick Option 2 when m<sqrt(N) and
+    // Option 1 when m > sqrt(N).
+    if (target_sample_size >
+        static_cast<uint64_t>(std::sqrt(1.0 * num_entries))) {
+      Random* rnd = Random::GetTLSInstance();
+      iter.SeekToFirst();
+      uint64_t counter = 0, num_samples_left = target_sample_size;
+      for (; iter.Valid() && (num_samples_left > 0); iter.Next(), counter++) {
+        // Add entry to sample set with probability
+        // num_samples_left/(num_entries - counter).
+        if (rnd->Next() % (num_entries - counter) < num_samples_left) {
+          entries->insert(iter.key());
+          num_samples_left--;
+        }
+      }
+    } else {
+      // Option 2: pick m random elements with no duplicates.
+      // If Option 2 is picked, then target_sample_size<sqrt(N)
+      // Using a set spares the need to check for duplicates.
+      for (uint64_t i = 0; i < target_sample_size; i++) {
+        // We give it 5 attempts to find a non-duplicate
+        // With 5 attempts, the chances of returning `entries` set
+        // of size target_sample_size is:
+        // PROD_{i=1}^{target_sample_size-1} [1-(i/N)^5]
+        // which is monotonically increasing with N in the worse case
+        // of target_sample_size=sqrt(N), and is always >99.9% for N>4.
+        // At worst, for the final pick , when m=sqrt(N) there is
+        // a probability of p= 1/sqrt(N) chances to find a duplicate.
+        for (uint64_t j = 0; j < 5; j++) {
+          iter.RandomSeek();
+          // unordered_set::insert returns pair<iterator, bool>.
+          // The second element is true if an insert successfully happened.
+          // If element is already in the set, this bool will be false, and
+          // true otherwise.
+          if ((entries->insert(iter.key())).second) {
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  ~SkipListRep() override {}
+
+  // Iteration over the contents of a skip list
+  class Iterator : public MemTableRep::Iterator {
+    InlineSkipList<const MemTableRep::KeyComparator&>::Iterator iter_;
+
+   public:
+    // Initialize an iterator over the specified list.
+    // The returned iterator is not valid.
+    explicit Iterator(
+        const InlineSkipList<const MemTableRep::KeyComparator&>* list)
+        : iter_(list) {}
+
+    ~Iterator() override {}
+
+    // Returns true iff the iterator is positioned at a valid node.
+    bool Valid() const override { return iter_.Valid(); }
+
+    // Returns the key at the current position.
+    // REQUIRES: Valid()
+    const char* key() const override { return iter_.key(); }
+
+    // Advances to the next position.
+    // REQUIRES: Valid()
+    void Next() override { iter_.Next(); }
+
+    // Advances to the previous position.
+    // REQUIRES: Valid()
+    void Prev() override { iter_.Prev(); }
+
+    // Advance to the first entry with a key >= target
+    void Seek(const Slice& user_key, const char* memtable_key) override {
+      if (memtable_key != nullptr) {
+        iter_.Seek(memtable_key);
+      } else {
+        iter_.Seek(EncodeKey(&tmp_, user_key));
+      }
+    }
+
+    // Retreat to the last entry with a key <= target
+    void SeekForPrev(const Slice& user_key, const char* memtable_key) override {
+      if (memtable_key != nullptr) {
+        iter_.SeekForPrev(memtable_key);
+      } else {
+        iter_.SeekForPrev(EncodeKey(&tmp_, user_key));
+      }
+    }
+
+    void RandomSeek() override { iter_.RandomSeek(); }
+
+    // Position at the first entry in list.
+    // Final state of iterator is Valid() iff list is not empty.
+    void SeekToFirst() override { iter_.SeekToFirst(); }
+
+    // Position at the last entry in list.
+    // Final state of iterator is Valid() iff list is not empty.
+    void SeekToLast() override { iter_.SeekToLast(); }
+
+   protected:
+    std::string tmp_;  // For passing to EncodeKey
+  };
+
+  // Iterator over the contents of a skip list which also keeps track of the
+  // previously visited node. In Seek(), it examines a few nodes after it
+  // first, falling back to O(log n) search from the head of the list only if
+  // the target key hasn't been found.
+  class LookaheadIterator : public MemTableRep::Iterator {
+   public:
+    explicit LookaheadIterator(const SkipListRep& rep)
+        : rep_(rep), iter_(&rep_.skip_list_), prev_(iter_) {}
+
+    ~LookaheadIterator() override {}
+
+    bool Valid() const override { return iter_.Valid(); }
+
+    const char* key() const override {
+      assert(Valid());
+      return iter_.key();
+    }
+
+    void Next() override {
+      assert(Valid());
+
+      bool advance_prev = true;
+      if (prev_.Valid()) {
+        auto k1 = rep_.UserKey(prev_.key());
+        auto k2 = rep_.UserKey(iter_.key());
+
+        if (k1.compare(k2) == 0) {
+          // same user key, don't move prev_
+          advance_prev = false;
+        } else if (rep_.transform_) {
+          // only advance prev_ if it has the same prefix as iter_
+          auto t1 = rep_.transform_->Transform(k1);
+          auto t2 = rep_.transform_->Transform(k2);
+          advance_prev = t1.compare(t2) == 0;
+        }
+      }
+
+      if (advance_prev) {
+        prev_ = iter_;
+      }
+      iter_.Next();
+    }
+
+    void Prev() override {
+      assert(Valid());
+      iter_.Prev();
+      prev_ = iter_;
+    }
+
+    void Seek(const Slice& internal_key, const char* memtable_key) override {
+      const char* encoded_key = (memtable_key != nullptr)
+                                    ? memtable_key
+                                    : EncodeKey(&tmp_, internal_key);
+
+      if (prev_.Valid() && rep_.cmp_(encoded_key, prev_.key()) >= 0) {
+        // prev_.key() is smaller or equal to our target key; do a quick
+        // linear search (at most lookahead_ steps) starting from prev_
+        iter_ = prev_;
+
+        size_t cur = 0;
+        while (cur++ <= rep_.lookahead_ && iter_.Valid()) {
+          if (rep_.cmp_(encoded_key, iter_.key()) <= 0) {
+            return;
+          }
+          Next();
+        }
+      }
+
+      iter_.Seek(encoded_key);
+      prev_ = iter_;
+    }
+
+    void SeekForPrev(const Slice& internal_key,
+                     const char* memtable_key) override {
+      const char* encoded_key = (memtable_key != nullptr)
+                                    ? memtable_key
+                                    : EncodeKey(&tmp_, internal_key);
+      iter_.SeekForPrev(encoded_key);
+      prev_ = iter_;
+    }
+
+    void SeekToFirst() override {
+      iter_.SeekToFirst();
+      prev_ = iter_;
+    }
+
+    void SeekToLast() override {
+      iter_.SeekToLast();
+      prev_ = iter_;
+    }
+
+   protected:
+    std::string tmp_;  // For passing to EncodeKey
+
+   private:
+    const SkipListRep& rep_;
+    InlineSkipList<const MemTableRep::KeyComparator&>::Iterator iter_;
+    InlineSkipList<const MemTableRep::KeyComparator&>::Iterator prev_;
+  };
+
+  MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override {
+    if (lookahead_ > 0) {
+      void* mem =
+          arena ? arena->AllocateAligned(sizeof(SkipListRep::LookaheadIterator))
+                :
+                operator new(sizeof(SkipListRep::LookaheadIterator));
+      return new (mem) SkipListRep::LookaheadIterator(*this);
+    } else {
+      void* mem = arena ? arena->AllocateAligned(sizeof(SkipListRep::Iterator))
+                        :
+                        operator new(sizeof(SkipListRep::Iterator));
+      return new (mem) SkipListRep::Iterator(&skip_list_);
+    }
+  }
+};
+}  // namespace
+
+static std::unordered_map<std::string, OptionTypeInfo> skiplist_factory_info = {
+    {"lookahead",
+     {0, OptionType::kSizeT, OptionVerificationType::kNormal,
+      OptionTypeFlags::kDontSerialize /*Since it is part of the ID*/}},
+};
+
+SkipListFactory::SkipListFactory(size_t lookahead) : lookahead_(lookahead) {
+  RegisterOptions("SkipListFactoryOptions", &lookahead_,
+                  &skiplist_factory_info);
+}
+
+std::string SkipListFactory::GetId() const {
+  std::string id = Name();
+  if (lookahead_ > 0) {
+    id.append(":").append(std::to_string(lookahead_));
+  }
+  return id;
+}
+
+MemTableRep* SkipListFactory::CreateMemTableRep(
+    const MemTableRep::KeyComparator& compare, Allocator* allocator,
+    const SliceTransform* transform, Logger* /*logger*/) {
+  return new SkipListRep(compare, allocator, transform, lookahead_);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/stl_wrappers.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/stl_wrappers.h
new file mode 100644
index 0000000000..783a8088d0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/stl_wrappers.h
@@ -0,0 +1,33 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <map>
+#include <string>
+
+#include "rocksdb/comparator.h"
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/slice.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace stl_wrappers {
+
+class Base {
+ protected:
+  const MemTableRep::KeyComparator& compare_;
+  explicit Base(const MemTableRep::KeyComparator& compare)
+      : compare_(compare) {}
+};
+
+struct Compare : private Base {
+  explicit Compare(const MemTableRep::KeyComparator& compare) : Base(compare) {}
+  inline bool operator()(const char* a, const char* b) const {
+    return compare_(a, b) < 0;
+  }
+};
+
+}  // namespace stl_wrappers
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/vectorrep.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/vectorrep.cc
new file mode 100644
index 0000000000..e42ae4439c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/vectorrep.cc
@@ -0,0 +1,307 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include <algorithm>
+#include <memory>
+#include <set>
+#include <type_traits>
+#include <unordered_set>
+
+#include "db/memtable.h"
+#include "memory/arena.h"
+#include "memtable/stl_wrappers.h"
+#include "port/port.h"
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+
+class VectorRep : public MemTableRep {
+ public:
+  VectorRep(const KeyComparator& compare, Allocator* allocator, size_t count);
+
+  // Insert key into the collection. (The caller will pack key and value into a
+  // single buffer and pass that in as the parameter to Insert)
+  // REQUIRES: nothing that compares equal to key is currently in the
+  // collection.
+  void Insert(KeyHandle handle) override;
+
+  // Returns true iff an entry that compares equal to key is in the collection.
+  bool Contains(const char* key) const override;
+
+  void MarkReadOnly() override;
+
+  size_t ApproximateMemoryUsage() override;
+
+  void Get(const LookupKey& k, void* callback_args,
+           bool (*callback_func)(void* arg, const char* entry)) override;
+
+  ~VectorRep() override {}
+
+  class Iterator : public MemTableRep::Iterator {
+    class VectorRep* vrep_;
+    std::shared_ptr<std::vector<const char*>> bucket_;
+    std::vector<const char*>::const_iterator mutable cit_;
+    const KeyComparator& compare_;
+    std::string tmp_;  // For passing to EncodeKey
+    bool mutable sorted_;
+    void DoSort() const;
+
+   public:
+    explicit Iterator(class VectorRep* vrep,
+                      std::shared_ptr<std::vector<const char*>> bucket,
+                      const KeyComparator& compare);
+
+    // Initialize an iterator over the specified collection.
+    // The returned iterator is not valid.
+    // explicit Iterator(const MemTableRep* collection);
+    ~Iterator() override{};
+
+    // Returns true iff the iterator is positioned at a valid node.
+    bool Valid() const override;
+
+    // Returns the key at the current position.
+    // REQUIRES: Valid()
+    const char* key() const override;
+
+    // Advances to the next position.
+    // REQUIRES: Valid()
+    void Next() override;
+
+    // Advances to the previous position.
+    // REQUIRES: Valid()
+    void Prev() override;
+
+    // Advance to the first entry with a key >= target
+    void Seek(const Slice& user_key, const char* memtable_key) override;
+
+    // Advance to the first entry with a key <= target
+    void SeekForPrev(const Slice& user_key, const char* memtable_key) override;
+
+    // Position at the first entry in collection.
+    // Final state of iterator is Valid() iff collection is not empty.
+    void SeekToFirst() override;
+
+    // Position at the last entry in collection.
+    // Final state of iterator is Valid() iff collection is not empty.
+    void SeekToLast() override;
+  };
+
+  // Return an iterator over the keys in this representation.
+  MemTableRep::Iterator* GetIterator(Arena* arena) override;
+
+ private:
+  friend class Iterator;
+  using Bucket = std::vector<const char*>;
+  std::shared_ptr<Bucket> bucket_;
+  mutable port::RWMutex rwlock_;
+  bool immutable_;
+  bool sorted_;
+  const KeyComparator& compare_;
+};
+
+void VectorRep::Insert(KeyHandle handle) {
+  auto* key = static_cast<char*>(handle);
+  WriteLock l(&rwlock_);
+  assert(!immutable_);
+  bucket_->push_back(key);
+}
+
+// Returns true iff an entry that compares equal to key is in the collection.
+bool VectorRep::Contains(const char* key) const {
+  ReadLock l(&rwlock_);
+  return std::find(bucket_->begin(), bucket_->end(), key) != bucket_->end();
+}
+
+void VectorRep::MarkReadOnly() {
+  WriteLock l(&rwlock_);
+  immutable_ = true;
+}
+
+size_t VectorRep::ApproximateMemoryUsage() {
+  return sizeof(bucket_) + sizeof(*bucket_) +
+         bucket_->size() *
+             sizeof(
+                 std::remove_reference<decltype(*bucket_)>::type::value_type);
+}
+
+VectorRep::VectorRep(const KeyComparator& compare, Allocator* allocator,
+                     size_t count)
+    : MemTableRep(allocator),
+      bucket_(new Bucket()),
+      immutable_(false),
+      sorted_(false),
+      compare_(compare) {
+  bucket_.get()->reserve(count);
+}
+
+VectorRep::Iterator::Iterator(class VectorRep* vrep,
+                              std::shared_ptr<std::vector<const char*>> bucket,
+                              const KeyComparator& compare)
+    : vrep_(vrep),
+      bucket_(bucket),
+      cit_(bucket_->end()),
+      compare_(compare),
+      sorted_(false) {}
+
+void VectorRep::Iterator::DoSort() const {
+  // vrep is non-null means that we are working on an immutable memtable
+  if (!sorted_ && vrep_ != nullptr) {
+    WriteLock l(&vrep_->rwlock_);
+    if (!vrep_->sorted_) {
+      std::sort(bucket_->begin(), bucket_->end(),
+                stl_wrappers::Compare(compare_));
+      cit_ = bucket_->begin();
+      vrep_->sorted_ = true;
+    }
+    sorted_ = true;
+  }
+  if (!sorted_) {
+    std::sort(bucket_->begin(), bucket_->end(),
+              stl_wrappers::Compare(compare_));
+    cit_ = bucket_->begin();
+    sorted_ = true;
+  }
+  assert(sorted_);
+  assert(vrep_ == nullptr || vrep_->sorted_);
+}
+
+// Returns true iff the iterator is positioned at a valid node.
+bool VectorRep::Iterator::Valid() const {
+  DoSort();
+  return cit_ != bucket_->end();
+}
+
+// Returns the key at the current position.
+// REQUIRES: Valid()
+const char* VectorRep::Iterator::key() const {
+  assert(sorted_);
+  return *cit_;
+}
+
+// Advances to the next position.
+// REQUIRES: Valid()
+void VectorRep::Iterator::Next() {
+  assert(sorted_);
+  if (cit_ == bucket_->end()) {
+    return;
+  }
+  ++cit_;
+}
+
+// Advances to the previous position.
+// REQUIRES: Valid()
+void VectorRep::Iterator::Prev() {
+  assert(sorted_);
+  if (cit_ == bucket_->begin()) {
+    // If you try to go back from the first element, the iterator should be
+    // invalidated. So we set it to past-the-end. This means that you can
+    // treat the container circularly.
+    cit_ = bucket_->end();
+  } else {
+    --cit_;
+  }
+}
+
+// Advance to the first entry with a key >= target
+void VectorRep::Iterator::Seek(const Slice& user_key,
+                               const char* memtable_key) {
+  DoSort();
+  // Do binary search to find first value not less than the target
+  const char* encoded_key =
+      (memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, user_key);
+  cit_ = std::equal_range(bucket_->begin(), bucket_->end(), encoded_key,
+                          [this](const char* a, const char* b) {
+                            return compare_(a, b) < 0;
+                          })
+             .first;
+}
+
+// Advance to the first entry with a key <= target
+void VectorRep::Iterator::SeekForPrev(const Slice& /*user_key*/,
+                                      const char* /*memtable_key*/) {
+  assert(false);
+}
+
+// Position at the first entry in collection.
+// Final state of iterator is Valid() iff collection is not empty.
+void VectorRep::Iterator::SeekToFirst() {
+  DoSort();
+  cit_ = bucket_->begin();
+}
+
+// Position at the last entry in collection.
+// Final state of iterator is Valid() iff collection is not empty.
+void VectorRep::Iterator::SeekToLast() {
+  DoSort();
+  cit_ = bucket_->end();
+  if (bucket_->size() != 0) {
+    --cit_;
+  }
+}
+
+void VectorRep::Get(const LookupKey& k, void* callback_args,
+                    bool (*callback_func)(void* arg, const char* entry)) {
+  rwlock_.ReadLock();
+  VectorRep* vector_rep;
+  std::shared_ptr<Bucket> bucket;
+  if (immutable_) {
+    vector_rep = this;
+  } else {
+    vector_rep = nullptr;
+    bucket.reset(new Bucket(*bucket_));  // make a copy
+  }
+  VectorRep::Iterator iter(vector_rep, immutable_ ? bucket_ : bucket, compare_);
+  rwlock_.ReadUnlock();
+
+  for (iter.Seek(k.user_key(), k.memtable_key().data());
+       iter.Valid() && callback_func(callback_args, iter.key()); iter.Next()) {
+  }
+}
+
+MemTableRep::Iterator* VectorRep::GetIterator(Arena* arena) {
+  char* mem = nullptr;
+  if (arena != nullptr) {
+    mem = arena->AllocateAligned(sizeof(Iterator));
+  }
+  ReadLock l(&rwlock_);
+  // Do not sort here. The sorting would be done the first time
+  // a Seek is performed on the iterator.
+  if (immutable_) {
+    if (arena == nullptr) {
+      return new Iterator(this, bucket_, compare_);
+    } else {
+      return new (mem) Iterator(this, bucket_, compare_);
+    }
+  } else {
+    std::shared_ptr<Bucket> tmp;
+    tmp.reset(new Bucket(*bucket_));  // make a copy
+    if (arena == nullptr) {
+      return new Iterator(nullptr, tmp, compare_);
+    } else {
+      return new (mem) Iterator(nullptr, tmp, compare_);
+    }
+  }
+}
+}  // namespace
+
+static std::unordered_map<std::string, OptionTypeInfo> vector_rep_table_info = {
+    {"count",
+     {0, OptionType::kSizeT, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+};
+
+VectorRepFactory::VectorRepFactory(size_t count) : count_(count) {
+  RegisterOptions("VectorRepFactoryOptions", &count_, &vector_rep_table_info);
+}
+
+MemTableRep* VectorRepFactory::CreateMemTableRep(
+    const MemTableRep::KeyComparator& compare, Allocator* allocator,
+    const SliceTransform*, Logger* /*logger*/) {
+  return new VectorRep(compare, allocator, count_);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/write_buffer_manager.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/write_buffer_manager.cc
new file mode 100644
index 0000000000..ce1789c20d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/memtable/write_buffer_manager.cc
@@ -0,0 +1,185 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/write_buffer_manager.h"
+
+#include <memory>
+
+#include "cache/cache_entry_roles.h"
+#include "cache/cache_reservation_manager.h"
+#include "db/db_impl/db_impl.h"
+#include "rocksdb/status.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+WriteBufferManager::WriteBufferManager(size_t _buffer_size,
+                                       std::shared_ptr<Cache> cache,
+                                       bool allow_stall)
+    : buffer_size_(_buffer_size),
+      mutable_limit_(buffer_size_ * 7 / 8),
+      memory_used_(0),
+      memory_active_(0),
+      cache_res_mgr_(nullptr),
+      allow_stall_(allow_stall),
+      stall_active_(false) {
+  if (cache) {
+    // Memtable's memory usage tends to fluctuate frequently
+    // therefore we set delayed_decrease = true to save some dummy entry
+    // insertion on memory increase right after memory decrease
+    cache_res_mgr_ = std::make_shared<
+        CacheReservationManagerImpl<CacheEntryRole::kWriteBuffer>>(
+        cache, true /* delayed_decrease */);
+  }
+}
+
+WriteBufferManager::~WriteBufferManager() {
+#ifndef NDEBUG
+  std::unique_lock<std::mutex> lock(mu_);
+  assert(queue_.empty());
+#endif
+}
+
+std::size_t WriteBufferManager::dummy_entries_in_cache_usage() const {
+  if (cache_res_mgr_ != nullptr) {
+    return cache_res_mgr_->GetTotalReservedCacheSize();
+  } else {
+    return 0;
+  }
+}
+
+void WriteBufferManager::ReserveMem(size_t mem) {
+  if (cache_res_mgr_ != nullptr) {
+    ReserveMemWithCache(mem);
+  } else if (enabled()) {
+    memory_used_.fetch_add(mem, std::memory_order_relaxed);
+  }
+  if (enabled()) {
+    memory_active_.fetch_add(mem, std::memory_order_relaxed);
+  }
+}
+
+// Should only be called from write thread
+void WriteBufferManager::ReserveMemWithCache(size_t mem) {
+  assert(cache_res_mgr_ != nullptr);
+  // Use a mutex to protect various data structures. Can be optimized to a
+  // lock-free solution if it ends up with a performance bottleneck.
+  std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
+
+  size_t new_mem_used = memory_used_.load(std::memory_order_relaxed) + mem;
+  memory_used_.store(new_mem_used, std::memory_order_relaxed);
+  Status s = cache_res_mgr_->UpdateCacheReservation(new_mem_used);
+
+  // We absorb the error since WriteBufferManager is not able to handle
+  // this failure properly. Ideallly we should prevent this allocation
+  // from happening if this cache charging fails.
+  // [TODO] We'll need to improve it in the future and figure out what to do on
+  // error
+  s.PermitUncheckedError();
+}
+
+void WriteBufferManager::ScheduleFreeMem(size_t mem) {
+  if (enabled()) {
+    memory_active_.fetch_sub(mem, std::memory_order_relaxed);
+  }
+}
+
+void WriteBufferManager::FreeMem(size_t mem) {
+  if (cache_res_mgr_ != nullptr) {
+    FreeMemWithCache(mem);
+  } else if (enabled()) {
+    memory_used_.fetch_sub(mem, std::memory_order_relaxed);
+  }
+  // Check if stall is active and can be ended.
+  MaybeEndWriteStall();
+}
+
+void WriteBufferManager::FreeMemWithCache(size_t mem) {
+  assert(cache_res_mgr_ != nullptr);
+  // Use a mutex to protect various data structures. Can be optimized to a
+  // lock-free solution if it ends up with a performance bottleneck.
+  std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
+  size_t new_mem_used = memory_used_.load(std::memory_order_relaxed) - mem;
+  memory_used_.store(new_mem_used, std::memory_order_relaxed);
+  Status s = cache_res_mgr_->UpdateCacheReservation(new_mem_used);
+
+  // We absorb the error since WriteBufferManager is not able to handle
+  // this failure properly.
+  // [TODO] We'll need to improve it in the future and figure out what to do on
+  // error
+  s.PermitUncheckedError();
+}
+
+void WriteBufferManager::BeginWriteStall(StallInterface* wbm_stall) {
+  assert(wbm_stall != nullptr);
+
+  // Allocate outside of the lock.
+  std::list<StallInterface*> new_node = {wbm_stall};
+
+  {
+    std::unique_lock<std::mutex> lock(mu_);
+    // Verify if the stall conditions are stil active.
+    if (ShouldStall()) {
+      stall_active_.store(true, std::memory_order_relaxed);
+      queue_.splice(queue_.end(), std::move(new_node));
+    }
+  }
+
+  // If the node was not consumed, the stall has ended already and we can signal
+  // the caller.
+  if (!new_node.empty()) {
+    new_node.front()->Signal();
+  }
+}
+
+// Called when memory is freed in FreeMem or the buffer size has changed.
+void WriteBufferManager::MaybeEndWriteStall() {
+  // Stall conditions have not been resolved.
+  if (allow_stall_.load(std::memory_order_relaxed) &&
+      IsStallThresholdExceeded()) {
+    return;
+  }
+
+  // Perform all deallocations outside of the lock.
+  std::list<StallInterface*> cleanup;
+
+  std::unique_lock<std::mutex> lock(mu_);
+  if (!stall_active_.load(std::memory_order_relaxed)) {
+    return;  // Nothing to do.
+  }
+
+  // Unblock new writers.
+  stall_active_.store(false, std::memory_order_relaxed);
+
+  // Unblock the writers in the queue.
+  for (StallInterface* wbm_stall : queue_) {
+    wbm_stall->Signal();
+  }
+  cleanup = std::move(queue_);
+}
+
+void WriteBufferManager::RemoveDBFromQueue(StallInterface* wbm_stall) {
+  assert(wbm_stall != nullptr);
+
+  // Deallocate the removed nodes outside of the lock.
+  std::list<StallInterface*> cleanup;
+
+  if (enabled() && allow_stall_.load(std::memory_order_relaxed)) {
+    std::unique_lock<std::mutex> lock(mu_);
+    for (auto it = queue_.begin(); it != queue_.end();) {
+      auto next = std::next(it);
+      if (*it == wbm_stall) {
+        cleanup.splice(cleanup.end(), queue_, std::move(it));
+      }
+      it = next;
+    }
+  }
+  wbm_stall->Signal();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/file_read_sample.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/file_read_sample.h
new file mode 100644
index 0000000000..82a933e0a1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/file_read_sample.h
@@ -0,0 +1,23 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+#include "db/version_edit.h"
+#include "util/random.h"
+
+namespace ROCKSDB_NAMESPACE {
+static const uint32_t kFileReadSampleRate = 1024;
+extern bool should_sample_file_read();
+extern void sample_file_read_inc(FileMetaData*);
+
+inline bool should_sample_file_read() {
+  return (Random::GetTLSInstance()->Next() % kFileReadSampleRate == 307);
+}
+
+inline void sample_file_read_inc(FileMetaData* meta) {
+  meta->stats.num_reads_sampled.fetch_add(kFileReadSampleRate,
+                                          std::memory_order_relaxed);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram.cc
new file mode 100644
index 0000000000..61bc6c1409
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram.cc
@@ -0,0 +1,270 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "monitoring/histogram.h"
+
+#include <stdio.h>
+
+#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cmath>
+
+#include "port/port.h"
+#include "util/cast_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+HistogramBucketMapper::HistogramBucketMapper() {
+  // If you change this, you also need to change
+  // size of array buckets_ in HistogramImpl
+  bucketValues_ = {1, 2};
+  double bucket_val = static_cast<double>(bucketValues_.back());
+  while ((bucket_val = 1.5 * bucket_val) <=
+         static_cast<double>(std::numeric_limits<uint64_t>::max())) {
+    bucketValues_.push_back(static_cast<uint64_t>(bucket_val));
+    // Extracts two most significant digits to make histogram buckets more
+    // human-readable. E.g., 172 becomes 170.
+    uint64_t pow_of_ten = 1;
+    while (bucketValues_.back() / 10 > 10) {
+      bucketValues_.back() /= 10;
+      pow_of_ten *= 10;
+    }
+    bucketValues_.back() *= pow_of_ten;
+  }
+  maxBucketValue_ = bucketValues_.back();
+  minBucketValue_ = bucketValues_.front();
+}
+
+size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const {
+  auto beg = bucketValues_.begin();
+  auto end = bucketValues_.end();
+  if (value >= maxBucketValue_)
+    return end - beg - 1;  // bucketValues_.size() - 1
+  else
+    return std::lower_bound(beg, end, value) - beg;
+}
+
+namespace {
+const HistogramBucketMapper bucketMapper;
+}
+
+HistogramStat::HistogramStat() : num_buckets_(bucketMapper.BucketCount()) {
+  assert(num_buckets_ == sizeof(buckets_) / sizeof(*buckets_));
+  Clear();
+}
+
+void HistogramStat::Clear() {
+  min_.store(bucketMapper.LastValue(), std::memory_order_relaxed);
+  max_.store(0, std::memory_order_relaxed);
+  num_.store(0, std::memory_order_relaxed);
+  sum_.store(0, std::memory_order_relaxed);
+  sum_squares_.store(0, std::memory_order_relaxed);
+  for (unsigned int b = 0; b < num_buckets_; b++) {
+    buckets_[b].store(0, std::memory_order_relaxed);
+  }
+};
+
+bool HistogramStat::Empty() const { return num() == 0; }
+
+void HistogramStat::Add(uint64_t value) {
+  // This function is designed to be lock free, as it's in the critical path
+  // of any operation. Each individual value is atomic and the order of updates
+  // by concurrent threads is tolerable.
+  const size_t index = bucketMapper.IndexForValue(value);
+  assert(index < num_buckets_);
+  buckets_[index].store(buckets_[index].load(std::memory_order_relaxed) + 1,
+                        std::memory_order_relaxed);
+
+  uint64_t old_min = min();
+  if (value < old_min) {
+    min_.store(value, std::memory_order_relaxed);
+  }
+
+  uint64_t old_max = max();
+  if (value > old_max) {
+    max_.store(value, std::memory_order_relaxed);
+  }
+
+  num_.store(num_.load(std::memory_order_relaxed) + 1,
+             std::memory_order_relaxed);
+  sum_.store(sum_.load(std::memory_order_relaxed) + value,
+             std::memory_order_relaxed);
+  sum_squares_.store(
+      sum_squares_.load(std::memory_order_relaxed) + value * value,
+      std::memory_order_relaxed);
+}
+
+void HistogramStat::Merge(const HistogramStat& other) {
+  // This function needs to be performned with the outer lock acquired
+  // However, atomic operation on every member is still need, since Add()
+  // requires no lock and value update can still happen concurrently
+  uint64_t old_min = min();
+  uint64_t other_min = other.min();
+  while (other_min < old_min &&
+         !min_.compare_exchange_weak(old_min, other_min)) {
+  }
+
+  uint64_t old_max = max();
+  uint64_t other_max = other.max();
+  while (other_max > old_max &&
+         !max_.compare_exchange_weak(old_max, other_max)) {
+  }
+
+  num_.fetch_add(other.num(), std::memory_order_relaxed);
+  sum_.fetch_add(other.sum(), std::memory_order_relaxed);
+  sum_squares_.fetch_add(other.sum_squares(), std::memory_order_relaxed);
+  for (unsigned int b = 0; b < num_buckets_; b++) {
+    buckets_[b].fetch_add(other.bucket_at(b), std::memory_order_relaxed);
+  }
+}
+
+double HistogramStat::Median() const { return Percentile(50.0); }
+
+double HistogramStat::Percentile(double p) const {
+  double threshold = num() * (p / 100.0);
+  uint64_t cumulative_sum = 0;
+  for (unsigned int b = 0; b < num_buckets_; b++) {
+    uint64_t bucket_value = bucket_at(b);
+    cumulative_sum += bucket_value;
+    if (cumulative_sum >= threshold) {
+      // Scale linearly within this bucket
+      uint64_t left_point = (b == 0) ? 0 : bucketMapper.BucketLimit(b - 1);
+      uint64_t right_point = bucketMapper.BucketLimit(b);
+      uint64_t left_sum = cumulative_sum - bucket_value;
+      uint64_t right_sum = cumulative_sum;
+      double pos = 0;
+      uint64_t right_left_diff = right_sum - left_sum;
+      if (right_left_diff != 0) {
+        pos = (threshold - left_sum) / right_left_diff;
+      }
+      double r = left_point + (right_point - left_point) * pos;
+      uint64_t cur_min = min();
+      uint64_t cur_max = max();
+      if (r < cur_min) r = static_cast<double>(cur_min);
+      if (r > cur_max) r = static_cast<double>(cur_max);
+      return r;
+    }
+  }
+  return static_cast<double>(max());
+}
+
+double HistogramStat::Average() const {
+  uint64_t cur_num = num();
+  uint64_t cur_sum = sum();
+  if (cur_num == 0) return 0;
+  return static_cast<double>(cur_sum) / static_cast<double>(cur_num);
+}
+
+double HistogramStat::StandardDeviation() const {
+  double cur_num =
+      static_cast<double>(num());  // Use double to avoid integer overflow
+  double cur_sum = static_cast<double>(sum());
+  double cur_sum_squares = static_cast<double>(sum_squares());
+  if (cur_num == 0.0) {
+    return 0.0;
+  }
+  double variance =
+      (cur_sum_squares * cur_num - cur_sum * cur_sum) / (cur_num * cur_num);
+  return std::sqrt(std::max(variance, 0.0));
+}
+
+std::string HistogramStat::ToString() const {
+  uint64_t cur_num = num();
+  std::string r;
+  char buf[1650];
+  snprintf(buf, sizeof(buf), "Count: %" PRIu64 " Average: %.4f  StdDev: %.2f\n",
+           cur_num, Average(), StandardDeviation());
+  r.append(buf);
+  snprintf(buf, sizeof(buf),
+           "Min: %" PRIu64 "  Median: %.4f  Max: %" PRIu64 "\n",
+           (cur_num == 0 ? 0 : min()), Median(), (cur_num == 0 ? 0 : max()));
+  r.append(buf);
+  snprintf(buf, sizeof(buf),
+           "Percentiles: "
+           "P50: %.2f P75: %.2f P99: %.2f P99.9: %.2f P99.99: %.2f\n",
+           Percentile(50), Percentile(75), Percentile(99), Percentile(99.9),
+           Percentile(99.99));
+  r.append(buf);
+  r.append("------------------------------------------------------\n");
+  if (cur_num == 0) return r;  // all buckets are empty
+  const double mult = 100.0 / cur_num;
+  uint64_t cumulative_sum = 0;
+  for (unsigned int b = 0; b < num_buckets_; b++) {
+    uint64_t bucket_value = bucket_at(b);
+    if (bucket_value <= 0.0) continue;
+    cumulative_sum += bucket_value;
+    snprintf(buf, sizeof(buf),
+             "%c %7" PRIu64 ", %7" PRIu64 " ] %8" PRIu64 " %7.3f%% %7.3f%% ",
+             (b == 0) ? '[' : '(',
+             (b == 0) ? 0 : bucketMapper.BucketLimit(b - 1),  // left
+             bucketMapper.BucketLimit(b),                     // right
+             bucket_value,                                    // count
+             (mult * bucket_value),                           // percentage
+             (mult * cumulative_sum));  // cumulative percentage
+    r.append(buf);
+
+    // Add hash marks based on percentage; 20 marks for 100%.
+    size_t marks = static_cast<size_t>(mult * bucket_value / 5 + 0.5);
+    r.append(marks, '#');
+    r.push_back('\n');
+  }
+  return r;
+}
+
+void HistogramStat::Data(HistogramData* const data) const {
+  assert(data);
+  data->median = Median();
+  data->percentile95 = Percentile(95);
+  data->percentile99 = Percentile(99);
+  data->max = static_cast<double>(max());
+  data->average = Average();
+  data->standard_deviation = StandardDeviation();
+  data->count = num();
+  data->sum = sum();
+  data->min = static_cast<double>(min());
+}
+
+void HistogramImpl::Clear() {
+  std::lock_guard<std::mutex> lock(mutex_);
+  stats_.Clear();
+}
+
+bool HistogramImpl::Empty() const { return stats_.Empty(); }
+
+void HistogramImpl::Add(uint64_t value) { stats_.Add(value); }
+
+void HistogramImpl::Merge(const Histogram& other) {
+  if (strcmp(Name(), other.Name()) == 0) {
+    Merge(*static_cast_with_check<const HistogramImpl>(&other));
+  }
+}
+
+void HistogramImpl::Merge(const HistogramImpl& other) {
+  std::lock_guard<std::mutex> lock(mutex_);
+  stats_.Merge(other.stats_);
+}
+
+double HistogramImpl::Median() const { return stats_.Median(); }
+
+double HistogramImpl::Percentile(double p) const {
+  return stats_.Percentile(p);
+}
+
+double HistogramImpl::Average() const { return stats_.Average(); }
+
+double HistogramImpl::StandardDeviation() const {
+  return stats_.StandardDeviation();
+}
+
+std::string HistogramImpl::ToString() const { return stats_.ToString(); }
+
+void HistogramImpl::Data(HistogramData* const data) const { stats_.Data(data); }
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram.h
new file mode 100644
index 0000000000..15fee2b4f8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram.h
@@ -0,0 +1,143 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <cassert>
+#include <map>
+#include <mutex>
+#include <string>
+#include <vector>
+
+#include "rocksdb/statistics.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class HistogramBucketMapper {
+ public:
+  HistogramBucketMapper();
+
+  // converts a value to the bucket index.
+  size_t IndexForValue(uint64_t value) const;
+  // number of buckets required.
+
+  size_t BucketCount() const { return bucketValues_.size(); }
+
+  uint64_t LastValue() const { return maxBucketValue_; }
+
+  uint64_t FirstValue() const { return minBucketValue_; }
+
+  uint64_t BucketLimit(const size_t bucketNumber) const {
+    assert(bucketNumber < BucketCount());
+    return bucketValues_[bucketNumber];
+  }
+
+ private:
+  std::vector<uint64_t> bucketValues_;
+  uint64_t maxBucketValue_;
+  uint64_t minBucketValue_;
+};
+
+struct HistogramStat {
+  HistogramStat();
+  ~HistogramStat() {}
+
+  HistogramStat(const HistogramStat&) = delete;
+  HistogramStat& operator=(const HistogramStat&) = delete;
+
+  void Clear();
+  bool Empty() const;
+  void Add(uint64_t value);
+  void Merge(const HistogramStat& other);
+
+  inline uint64_t min() const { return min_.load(std::memory_order_relaxed); }
+  inline uint64_t max() const { return max_.load(std::memory_order_relaxed); }
+  inline uint64_t num() const { return num_.load(std::memory_order_relaxed); }
+  inline uint64_t sum() const { return sum_.load(std::memory_order_relaxed); }
+  inline uint64_t sum_squares() const {
+    return sum_squares_.load(std::memory_order_relaxed);
+  }
+  inline uint64_t bucket_at(size_t b) const {
+    return buckets_[b].load(std::memory_order_relaxed);
+  }
+
+  double Median() const;
+  double Percentile(double p) const;
+  double Average() const;
+  double StandardDeviation() const;
+  void Data(HistogramData* const data) const;
+  std::string ToString() const;
+
+  // To be able to use HistogramStat as thread local variable, it
+  // cannot have dynamic allocated member. That's why we're
+  // using manually values from BucketMapper
+  std::atomic_uint_fast64_t min_;
+  std::atomic_uint_fast64_t max_;
+  std::atomic_uint_fast64_t num_;
+  std::atomic_uint_fast64_t sum_;
+  std::atomic_uint_fast64_t sum_squares_;
+  std::atomic_uint_fast64_t buckets_[109];  // 109==BucketMapper::BucketCount()
+  const uint64_t num_buckets_;
+};
+
+class Histogram {
+ public:
+  Histogram() {}
+  virtual ~Histogram(){};
+
+  virtual void Clear() = 0;
+  virtual bool Empty() const = 0;
+  virtual void Add(uint64_t value) = 0;
+  virtual void Merge(const Histogram&) = 0;
+
+  virtual std::string ToString() const = 0;
+  virtual const char* Name() const = 0;
+  virtual uint64_t min() const = 0;
+  virtual uint64_t max() const = 0;
+  virtual uint64_t num() const = 0;
+  virtual double Median() const = 0;
+  virtual double Percentile(double p) const = 0;
+  virtual double Average() const = 0;
+  virtual double StandardDeviation() const = 0;
+  virtual void Data(HistogramData* const data) const = 0;
+};
+
+class HistogramImpl : public Histogram {
+ public:
+  HistogramImpl() { Clear(); }
+
+  HistogramImpl(const HistogramImpl&) = delete;
+  HistogramImpl& operator=(const HistogramImpl&) = delete;
+
+  virtual void Clear() override;
+  virtual bool Empty() const override;
+  virtual void Add(uint64_t value) override;
+  virtual void Merge(const Histogram& other) override;
+  void Merge(const HistogramImpl& other);
+
+  virtual std::string ToString() const override;
+  virtual const char* Name() const override { return "HistogramImpl"; }
+  virtual uint64_t min() const override { return stats_.min(); }
+  virtual uint64_t max() const override { return stats_.max(); }
+  virtual uint64_t num() const override { return stats_.num(); }
+  virtual double Median() const override;
+  virtual double Percentile(double p) const override;
+  virtual double Average() const override;
+  virtual double StandardDeviation() const override;
+  virtual void Data(HistogramData* const data) const override;
+
+  virtual ~HistogramImpl() {}
+
+  inline HistogramStat& TEST_GetStats() { return stats_; }
+
+ private:
+  HistogramStat stats_;
+  std::mutex mutex_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram_windowing.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram_windowing.cc
new file mode 100644
index 0000000000..c41ae8a03d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram_windowing.cc
@@ -0,0 +1,194 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "monitoring/histogram_windowing.h"
+
+#include <algorithm>
+
+#include "monitoring/histogram.h"
+#include "rocksdb/system_clock.h"
+#include "util/cast_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+HistogramWindowingImpl::HistogramWindowingImpl() {
+  clock_ = SystemClock::Default();
+  window_stats_.reset(new HistogramStat[static_cast<size_t>(num_windows_)]);
+  Clear();
+}
+
+HistogramWindowingImpl::HistogramWindowingImpl(uint64_t num_windows,
+                                               uint64_t micros_per_window,
+                                               uint64_t min_num_per_window)
+    : num_windows_(num_windows),
+      micros_per_window_(micros_per_window),
+      min_num_per_window_(min_num_per_window) {
+  clock_ = SystemClock::Default();
+  window_stats_.reset(new HistogramStat[static_cast<size_t>(num_windows_)]);
+  Clear();
+}
+
+HistogramWindowingImpl::~HistogramWindowingImpl() {}
+
+void HistogramWindowingImpl::Clear() {
+  std::lock_guard<std::mutex> lock(mutex_);
+
+  stats_.Clear();
+  for (size_t i = 0; i < num_windows_; i++) {
+    window_stats_[i].Clear();
+  }
+  current_window_.store(0, std::memory_order_relaxed);
+  last_swap_time_.store(clock_->NowMicros(), std::memory_order_relaxed);
+}
+
+bool HistogramWindowingImpl::Empty() const { return stats_.Empty(); }
+
+// This function is designed to be lock free, as it's in the critical path
+// of any operation.
+// Each individual value is atomic, it is just that some samples can go
+// in the older bucket which is tolerable.
+void HistogramWindowingImpl::Add(uint64_t value) {
+  TimerTick();
+
+  // Parent (global) member update
+  stats_.Add(value);
+
+  // Current window update
+  window_stats_[static_cast<size_t>(current_window())].Add(value);
+}
+
+void HistogramWindowingImpl::Merge(const Histogram& other) {
+  if (strcmp(Name(), other.Name()) == 0) {
+    Merge(*static_cast_with_check<const HistogramWindowingImpl>(&other));
+  }
+}
+
+void HistogramWindowingImpl::Merge(const HistogramWindowingImpl& other) {
+  std::lock_guard<std::mutex> lock(mutex_);
+  stats_.Merge(other.stats_);
+
+  if (stats_.num_buckets_ != other.stats_.num_buckets_ ||
+      micros_per_window_ != other.micros_per_window_) {
+    return;
+  }
+
+  uint64_t cur_window = current_window();
+  uint64_t other_cur_window = other.current_window();
+  // going backwards for alignment
+  for (unsigned int i = 0; i < std::min(num_windows_, other.num_windows_);
+       i++) {
+    uint64_t window_index = (cur_window + num_windows_ - i) % num_windows_;
+    uint64_t other_window_index =
+        (other_cur_window + other.num_windows_ - i) % other.num_windows_;
+    size_t windex = static_cast<size_t>(window_index);
+    size_t other_windex = static_cast<size_t>(other_window_index);
+
+    window_stats_[windex].Merge(other.window_stats_[other_windex]);
+  }
+}
+
+std::string HistogramWindowingImpl::ToString() const {
+  return stats_.ToString();
+}
+
+double HistogramWindowingImpl::Median() const { return Percentile(50.0); }
+
+double HistogramWindowingImpl::Percentile(double p) const {
+  // Retry 3 times in total
+  for (int retry = 0; retry < 3; retry++) {
+    uint64_t start_num = stats_.num();
+    double result = stats_.Percentile(p);
+    // Detect if swap buckets or Clear() was called during calculation
+    if (stats_.num() >= start_num) {
+      return result;
+    }
+  }
+  return 0.0;
+}
+
+double HistogramWindowingImpl::Average() const { return stats_.Average(); }
+
+double HistogramWindowingImpl::StandardDeviation() const {
+  return stats_.StandardDeviation();
+}
+
+void HistogramWindowingImpl::Data(HistogramData* const data) const {
+  stats_.Data(data);
+}
+
+void HistogramWindowingImpl::TimerTick() {
+  uint64_t curr_time = clock_->NowMicros();
+  size_t curr_window_ = static_cast<size_t>(current_window());
+  if (curr_time - last_swap_time() > micros_per_window_ &&
+      window_stats_[curr_window_].num() >= min_num_per_window_) {
+    SwapHistoryBucket();
+  }
+}
+
+void HistogramWindowingImpl::SwapHistoryBucket() {
+  // Threads executing Add() would be competing for this mutex, the first one
+  // who got the metex would take care of the bucket swap, other threads
+  // can skip this.
+  // If mutex is held by Merge() or Clear(), next Add() will take care of the
+  // swap, if needed.
+  if (mutex_.try_lock()) {
+    last_swap_time_.store(clock_->NowMicros(), std::memory_order_relaxed);
+
+    uint64_t curr_window = current_window();
+    uint64_t next_window =
+        (curr_window == num_windows_ - 1) ? 0 : curr_window + 1;
+
+    // subtract next buckets from totals and swap to next buckets
+    HistogramStat& stats_to_drop =
+        window_stats_[static_cast<size_t>(next_window)];
+
+    if (!stats_to_drop.Empty()) {
+      for (size_t b = 0; b < stats_.num_buckets_; b++) {
+        stats_.buckets_[b].fetch_sub(stats_to_drop.bucket_at(b),
+                                     std::memory_order_relaxed);
+      }
+
+      if (stats_.min() == stats_to_drop.min()) {
+        uint64_t new_min = std::numeric_limits<uint64_t>::max();
+        for (unsigned int i = 0; i < num_windows_; i++) {
+          if (i != next_window) {
+            uint64_t m = window_stats_[i].min();
+            if (m < new_min) new_min = m;
+          }
+        }
+        stats_.min_.store(new_min, std::memory_order_relaxed);
+      }
+
+      if (stats_.max() == stats_to_drop.max()) {
+        uint64_t new_max = 0;
+        for (unsigned int i = 0; i < num_windows_; i++) {
+          if (i != next_window) {
+            uint64_t m = window_stats_[i].max();
+            if (m > new_max) new_max = m;
+          }
+        }
+        stats_.max_.store(new_max, std::memory_order_relaxed);
+      }
+
+      stats_.num_.fetch_sub(stats_to_drop.num(), std::memory_order_relaxed);
+      stats_.sum_.fetch_sub(stats_to_drop.sum(), std::memory_order_relaxed);
+      stats_.sum_squares_.fetch_sub(stats_to_drop.sum_squares(),
+                                    std::memory_order_relaxed);
+
+      stats_to_drop.Clear();
+    }
+
+    // advance to next window bucket
+    current_window_.store(next_window, std::memory_order_relaxed);
+
+    mutex_.unlock();
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram_windowing.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram_windowing.h
new file mode 100644
index 0000000000..9a862671f4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/histogram_windowing.h
@@ -0,0 +1,84 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "monitoring/histogram.h"
+
+namespace ROCKSDB_NAMESPACE {
+class SystemClock;
+
+class HistogramWindowingImpl : public Histogram {
+ public:
+  HistogramWindowingImpl();
+  HistogramWindowingImpl(uint64_t num_windows, uint64_t micros_per_window,
+                         uint64_t min_num_per_window);
+
+  HistogramWindowingImpl(const HistogramWindowingImpl&) = delete;
+  HistogramWindowingImpl& operator=(const HistogramWindowingImpl&) = delete;
+
+  ~HistogramWindowingImpl();
+
+  virtual void Clear() override;
+  virtual bool Empty() const override;
+  virtual void Add(uint64_t value) override;
+  virtual void Merge(const Histogram& other) override;
+  void Merge(const HistogramWindowingImpl& other);
+
+  virtual std::string ToString() const override;
+  virtual const char* Name() const override { return "HistogramWindowingImpl"; }
+  virtual uint64_t min() const override { return stats_.min(); }
+  virtual uint64_t max() const override { return stats_.max(); }
+  virtual uint64_t num() const override { return stats_.num(); }
+  virtual double Median() const override;
+  virtual double Percentile(double p) const override;
+  virtual double Average() const override;
+  virtual double StandardDeviation() const override;
+  virtual void Data(HistogramData* const data) const override;
+
+#ifndef NDEBUG
+  void TEST_UpdateClock(const std::shared_ptr<SystemClock>& clock) {
+    clock_ = clock;
+  }
+#endif  // NDEBUG
+
+ private:
+  void TimerTick();
+  void SwapHistoryBucket();
+  inline uint64_t current_window() const {
+    return current_window_.load(std::memory_order_relaxed);
+  }
+  inline uint64_t last_swap_time() const {
+    return last_swap_time_.load(std::memory_order_relaxed);
+  }
+
+  std::shared_ptr<SystemClock> clock_;
+  std::mutex mutex_;
+
+  // Aggregated stats over windows_stats_, all the computation is done
+  // upon aggregated values
+  HistogramStat stats_;
+
+  // This is a circular array representing the latest N time-windows.
+  // Each entry stores a time-window of data. Expiration is done
+  // on window-based.
+  std::unique_ptr<HistogramStat[]> window_stats_;
+
+  std::atomic_uint_fast64_t current_window_;
+  std::atomic_uint_fast64_t last_swap_time_;
+
+  // Following parameters are configuable
+  uint64_t num_windows_ = 5;
+  uint64_t micros_per_window_ = 60000000;
+  // By default, don't care about the number of values in current window
+  // when decide whether to swap windows or not.
+  uint64_t min_num_per_window_ = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/in_memory_stats_history.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/in_memory_stats_history.cc
new file mode 100644
index 0000000000..568d8ec134
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/in_memory_stats_history.cc
@@ -0,0 +1,50 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "monitoring/in_memory_stats_history.h"
+
+#include "db/db_impl/db_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+InMemoryStatsHistoryIterator::~InMemoryStatsHistoryIterator() {}
+
+bool InMemoryStatsHistoryIterator::Valid() const { return valid_; }
+
+Status InMemoryStatsHistoryIterator::status() const { return status_; }
+
+// Because of garbage collection, the next stats snapshot may or may not be
+// right after the current one. When reading from DBImpl::stats_history_, this
+// call will be protected by DB Mutex so it will not return partial or
+// corrupted results.
+void InMemoryStatsHistoryIterator::Next() {
+  // increment start_time by 1 to avoid infinite loop
+  AdvanceIteratorByTime(GetStatsTime() + 1, end_time_);
+}
+
+uint64_t InMemoryStatsHistoryIterator::GetStatsTime() const { return time_; }
+
+const std::map<std::string, uint64_t>&
+InMemoryStatsHistoryIterator::GetStatsMap() const {
+  return stats_map_;
+}
+
+// advance the iterator to the next time between [start_time, end_time)
+// if success, update time_ and stats_map_ with new_time and stats_map
+void InMemoryStatsHistoryIterator::AdvanceIteratorByTime(uint64_t start_time,
+                                                         uint64_t end_time) {
+  // try to find next entry in stats_history_ map
+  if (db_impl_ != nullptr) {
+    valid_ =
+        db_impl_->FindStatsByTime(start_time, end_time, &time_, &stats_map_);
+  } else {
+    valid_ = false;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/in_memory_stats_history.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/in_memory_stats_history.h
new file mode 100644
index 0000000000..3be864fe26
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/in_memory_stats_history.h
@@ -0,0 +1,74 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "rocksdb/stats_history.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// InMemoryStatsHistoryIterator can be used to access stats history that was
+// stored by an in-memory two level std::map(DBImpl::stats_history_). It keeps
+// a copy of the stats snapshot (in stats_map_) that is currently being pointed
+// to, which allows the iterator to access the stats snapshot even when
+// the background garbage collecting thread purges it from the source of truth
+// (`DBImpl::stats_history_`). In that case, the iterator will continue to be
+// valid until a call to `Next()` returns no result and invalidates it. In
+// some extreme cases, the iterator may also return fragmented segments of
+// stats snapshots due to long gaps between `Next()` calls and interleaved
+// garbage collection.
+class InMemoryStatsHistoryIterator final : public StatsHistoryIterator {
+ public:
+  // Setup InMemoryStatsHistoryIterator to return stats snapshots between
+  // seconds timestamps [start_time, end_time)
+  InMemoryStatsHistoryIterator(uint64_t start_time, uint64_t end_time,
+                               DBImpl* db_impl)
+      : start_time_(start_time),
+        end_time_(end_time),
+        valid_(true),
+        db_impl_(db_impl) {
+    AdvanceIteratorByTime(start_time_, end_time_);
+  }
+  // no copying allowed
+  InMemoryStatsHistoryIterator(const InMemoryStatsHistoryIterator&) = delete;
+  void operator=(const InMemoryStatsHistoryIterator&) = delete;
+  InMemoryStatsHistoryIterator(InMemoryStatsHistoryIterator&&) = delete;
+  InMemoryStatsHistoryIterator& operator=(InMemoryStatsHistoryIterator&&) =
+      delete;
+
+  ~InMemoryStatsHistoryIterator() override;
+  bool Valid() const override;
+  Status status() const override;
+
+  // Move to the next stats snapshot currently available
+  // This function may invalidate the iterator
+  // REQUIRES: Valid()
+  void Next() override;
+
+  // REQUIRES: Valid()
+  uint64_t GetStatsTime() const override;
+
+  // This function is idempotent
+  // REQUIRES: Valid()
+  const std::map<std::string, uint64_t>& GetStatsMap() const override;
+
+ private:
+  // advance the iterator to the next stats history record with timestamp
+  // between [start_time, end_time)
+  void AdvanceIteratorByTime(uint64_t start_time, uint64_t end_time);
+
+  uint64_t time_;
+  uint64_t start_time_;
+  uint64_t end_time_;
+  std::map<std::string, uint64_t> stats_map_;
+  Status status_;
+  bool valid_;
+  DBImpl* db_impl_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/instrumented_mutex.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/instrumented_mutex.cc
new file mode 100644
index 0000000000..699495a348
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/instrumented_mutex.cc
@@ -0,0 +1,90 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "monitoring/instrumented_mutex.h"
+
+#include "monitoring/perf_context_imp.h"
+#include "monitoring/thread_status_util.h"
+#include "rocksdb/system_clock.h"
+#include "test_util/sync_point.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+#ifndef NPERF_CONTEXT
+Statistics* stats_for_report(SystemClock* clock, Statistics* stats) {
+  if (clock != nullptr && stats != nullptr &&
+      stats->get_stats_level() > kExceptTimeForMutex) {
+    return stats;
+  } else {
+    return nullptr;
+  }
+}
+#endif  // NPERF_CONTEXT
+}  // namespace
+
+void InstrumentedMutex::Lock() {
+  PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(
+      db_mutex_lock_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS,
+      stats_for_report(clock_, stats_), stats_code_);
+  LockInternal();
+}
+
+void InstrumentedMutex::LockInternal() {
+#ifndef NDEBUG
+  ThreadStatusUtil::TEST_StateDelay(ThreadStatus::STATE_MUTEX_WAIT);
+#endif
+#ifdef COERCE_CONTEXT_SWITCH
+  if (stats_code_ == DB_MUTEX_WAIT_MICROS) {
+    thread_local Random rnd(301);
+    if (rnd.OneIn(2)) {
+      if (bg_cv_) {
+        bg_cv_->SignalAll();
+      }
+      sched_yield();
+    } else {
+      uint32_t sleep_us = rnd.Uniform(11) * 1000;
+      if (bg_cv_) {
+        bg_cv_->SignalAll();
+      }
+      SystemClock::Default()->SleepForMicroseconds(sleep_us);
+    }
+  }
+#endif
+  mutex_.Lock();
+}
+
+void InstrumentedCondVar::Wait() {
+  PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(
+      db_condition_wait_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS,
+      stats_for_report(clock_, stats_), stats_code_);
+  WaitInternal();
+}
+
+void InstrumentedCondVar::WaitInternal() {
+#ifndef NDEBUG
+  ThreadStatusUtil::TEST_StateDelay(ThreadStatus::STATE_MUTEX_WAIT);
+#endif
+  cond_.Wait();
+}
+
+bool InstrumentedCondVar::TimedWait(uint64_t abs_time_us) {
+  PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(
+      db_condition_wait_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS,
+      stats_for_report(clock_, stats_), stats_code_);
+  return TimedWaitInternal(abs_time_us);
+}
+
+bool InstrumentedCondVar::TimedWaitInternal(uint64_t abs_time_us) {
+#ifndef NDEBUG
+  ThreadStatusUtil::TEST_StateDelay(ThreadStatus::STATE_MUTEX_WAIT);
+#endif
+
+  TEST_SYNC_POINT_CALLBACK("InstrumentedCondVar::TimedWaitInternal",
+                           &abs_time_us);
+
+  return cond_.TimedWait(abs_time_us);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/instrumented_mutex.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/instrumented_mutex.h
new file mode 100644
index 0000000000..b97d2502e4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/instrumented_mutex.h
@@ -0,0 +1,126 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "monitoring/statistics_impl.h"
+#include "port/port.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/thread_status.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+class InstrumentedCondVar;
+
+// A wrapper class for port::Mutex that provides additional layer
+// for collecting stats and instrumentation.
+class InstrumentedMutex {
+ public:
+  explicit InstrumentedMutex(bool adaptive = false)
+      : mutex_(adaptive), stats_(nullptr), clock_(nullptr), stats_code_(0) {}
+
+  explicit InstrumentedMutex(SystemClock* clock, bool adaptive = false)
+      : mutex_(adaptive), stats_(nullptr), clock_(clock), stats_code_(0) {}
+
+  InstrumentedMutex(Statistics* stats, SystemClock* clock, int stats_code,
+                    bool adaptive = false)
+      : mutex_(adaptive),
+        stats_(stats),
+        clock_(clock),
+        stats_code_(stats_code) {}
+
+#ifdef COERCE_CONTEXT_SWITCH
+  InstrumentedMutex(Statistics* stats, SystemClock* clock, int stats_code,
+                    InstrumentedCondVar* bg_cv, bool adaptive = false)
+      : mutex_(adaptive),
+        stats_(stats),
+        clock_(clock),
+        stats_code_(stats_code),
+        bg_cv_(bg_cv) {}
+#endif
+
+  void Lock();
+
+  void Unlock() { mutex_.Unlock(); }
+
+  void AssertHeld() { mutex_.AssertHeld(); }
+
+ private:
+  void LockInternal();
+  friend class InstrumentedCondVar;
+  port::Mutex mutex_;
+  Statistics* stats_;
+  SystemClock* clock_;
+  int stats_code_;
+#ifdef COERCE_CONTEXT_SWITCH
+  InstrumentedCondVar* bg_cv_ = nullptr;
+#endif
+};
+
+class ALIGN_AS(CACHE_LINE_SIZE) CacheAlignedInstrumentedMutex
+    : public InstrumentedMutex {
+  using InstrumentedMutex::InstrumentedMutex;
+};
+static_assert(alignof(CacheAlignedInstrumentedMutex) != CACHE_LINE_SIZE ||
+              sizeof(CacheAlignedInstrumentedMutex) % CACHE_LINE_SIZE == 0);
+
+// RAII wrapper for InstrumentedMutex
+class InstrumentedMutexLock {
+ public:
+  explicit InstrumentedMutexLock(InstrumentedMutex* mutex) : mutex_(mutex) {
+    mutex_->Lock();
+  }
+
+  ~InstrumentedMutexLock() { mutex_->Unlock(); }
+
+ private:
+  InstrumentedMutex* const mutex_;
+  InstrumentedMutexLock(const InstrumentedMutexLock&) = delete;
+  void operator=(const InstrumentedMutexLock&) = delete;
+};
+
+// RAII wrapper for temporary releasing InstrumentedMutex inside
+// InstrumentedMutexLock
+class InstrumentedMutexUnlock {
+ public:
+  explicit InstrumentedMutexUnlock(InstrumentedMutex* mutex) : mutex_(mutex) {
+    mutex_->Unlock();
+  }
+
+  ~InstrumentedMutexUnlock() { mutex_->Lock(); }
+
+ private:
+  InstrumentedMutex* const mutex_;
+  InstrumentedMutexUnlock(const InstrumentedMutexUnlock&) = delete;
+  void operator=(const InstrumentedMutexUnlock&) = delete;
+};
+
+class InstrumentedCondVar {
+ public:
+  explicit InstrumentedCondVar(InstrumentedMutex* instrumented_mutex)
+      : cond_(&(instrumented_mutex->mutex_)),
+        stats_(instrumented_mutex->stats_),
+        clock_(instrumented_mutex->clock_),
+        stats_code_(instrumented_mutex->stats_code_) {}
+
+  void Wait();
+
+  bool TimedWait(uint64_t abs_time_us);
+
+  void Signal() { cond_.Signal(); }
+
+  void SignalAll() { cond_.SignalAll(); }
+
+ private:
+  void WaitInternal();
+  bool TimedWaitInternal(uint64_t abs_time_us);
+  port::CondVar cond_;
+  Statistics* stats_;
+  SystemClock* clock_;
+  int stats_code_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/iostats_context.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/iostats_context.cc
new file mode 100644
index 0000000000..04e98914da
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/iostats_context.cc
@@ -0,0 +1,78 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include <sstream>
+
+#include "monitoring/iostats_context_imp.h"
+#include "rocksdb/env.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+#ifdef NIOSTATS_CONTEXT
+// Should not be used because the counters are not thread-safe.
+// Put here just to make get_iostats_context() simple without ifdef.
+static IOStatsContext iostats_context;
+#else
+thread_local IOStatsContext iostats_context;
+#endif
+
+IOStatsContext* get_iostats_context() { return &iostats_context; }
+
+void IOStatsContext::Reset() {
+#ifndef NIOSTATS_CONTEXT
+  thread_pool_id = Env::Priority::TOTAL;
+  bytes_read = 0;
+  bytes_written = 0;
+  open_nanos = 0;
+  allocate_nanos = 0;
+  write_nanos = 0;
+  read_nanos = 0;
+  range_sync_nanos = 0;
+  prepare_write_nanos = 0;
+  fsync_nanos = 0;
+  logger_nanos = 0;
+  cpu_write_nanos = 0;
+  cpu_read_nanos = 0;
+  file_io_stats_by_temperature.Reset();
+#endif  //! NIOSTATS_CONTEXT
+}
+
+#define IOSTATS_CONTEXT_OUTPUT(counter)         \
+  if (!exclude_zero_counters || counter > 0) {  \
+    ss << #counter << " = " << counter << ", "; \
+  }
+
+std::string IOStatsContext::ToString(bool exclude_zero_counters) const {
+#ifdef NIOSTATS_CONTEXT
+  (void)exclude_zero_counters;
+  return "";
+#else
+  std::ostringstream ss;
+  IOSTATS_CONTEXT_OUTPUT(thread_pool_id);
+  IOSTATS_CONTEXT_OUTPUT(bytes_read);
+  IOSTATS_CONTEXT_OUTPUT(bytes_written);
+  IOSTATS_CONTEXT_OUTPUT(open_nanos);
+  IOSTATS_CONTEXT_OUTPUT(allocate_nanos);
+  IOSTATS_CONTEXT_OUTPUT(write_nanos);
+  IOSTATS_CONTEXT_OUTPUT(read_nanos);
+  IOSTATS_CONTEXT_OUTPUT(range_sync_nanos);
+  IOSTATS_CONTEXT_OUTPUT(fsync_nanos);
+  IOSTATS_CONTEXT_OUTPUT(prepare_write_nanos);
+  IOSTATS_CONTEXT_OUTPUT(logger_nanos);
+  IOSTATS_CONTEXT_OUTPUT(cpu_write_nanos);
+  IOSTATS_CONTEXT_OUTPUT(cpu_read_nanos);
+  IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.hot_file_bytes_read);
+  IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.warm_file_bytes_read);
+  IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.cold_file_bytes_read);
+  IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.hot_file_read_count);
+  IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.warm_file_read_count);
+  IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.cold_file_read_count);
+  std::string str = ss.str();
+  str.erase(str.find_last_not_of(", ") + 1);
+  return str;
+#endif  //! NIOSTATS_CONTEXT
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/iostats_context_imp.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/iostats_context_imp.h
new file mode 100644
index 0000000000..a0b4292dfd
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/iostats_context_imp.h
@@ -0,0 +1,62 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+#include "monitoring/perf_step_timer.h"
+#include "rocksdb/iostats_context.h"
+
+#if !defined(NIOSTATS_CONTEXT)
+namespace ROCKSDB_NAMESPACE {
+extern thread_local IOStatsContext iostats_context;
+}  // namespace ROCKSDB_NAMESPACE
+
+// increment a specific counter by the specified value
+#define IOSTATS_ADD(metric, value)        \
+  if (!iostats_context.disable_iostats) { \
+    iostats_context.metric += value;      \
+  }
+
+// reset a specific counter to zero
+#define IOSTATS_RESET(metric) (iostats_context.metric = 0)
+
+// reset all counters to zero
+#define IOSTATS_RESET_ALL() (iostats_context.Reset())
+
+#define IOSTATS_SET_THREAD_POOL_ID(value) \
+  (iostats_context.thread_pool_id = value)
+
+#define IOSTATS_THREAD_POOL_ID() (iostats_context.thread_pool_id)
+
+#define IOSTATS(metric) (iostats_context.metric)
+
+// Declare and set start time of the timer
+#define IOSTATS_TIMER_GUARD(metric)                                     \
+  PerfStepTimer iostats_step_timer_##metric(&(iostats_context.metric)); \
+  iostats_step_timer_##metric.Start();
+
+// Declare and set start time of the timer
+#define IOSTATS_CPU_TIMER_GUARD(metric, clock)         \
+  PerfStepTimer iostats_step_timer_##metric(           \
+      &(iostats_context.metric), clock, true,          \
+      PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \
+  iostats_step_timer_##metric.Start();
+
+#define IOSTATS_SET_DISABLE(disable) (iostats_context.disable_iostats = disable)
+
+#else  // !NIOSTATS_CONTEXT
+
+#define IOSTATS_ADD(metric, value)
+#define IOSTATS_ADD_IF_POSITIVE(metric, value)
+#define IOSTATS_RESET(metric)
+#define IOSTATS_RESET_ALL()
+#define IOSTATS_SET_THREAD_POOL_ID(value)
+#define IOSTATS_THREAD_POOL_ID()
+#define IOSTATS(metric) 0
+#define IOSTATS_SET_DISABLE(disable)
+
+#define IOSTATS_TIMER_GUARD(metric)
+#define IOSTATS_CPU_TIMER_GUARD(metric, clock) static_cast<void>(clock)
+
+#endif  // !NIOSTATS_CONTEXT
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_context.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_context.cc
new file mode 100644
index 0000000000..6839e90598
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_context.cc
@@ -0,0 +1,313 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include <sstream>
+
+#include "monitoring/perf_context_imp.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+/*
+ * Please add new metrics to this macro and appropriate fields will be copied,
+ * and/or emitted when converted to string.
+ * When people need to add new metrics please add the metric to the macro below
+ * and enclose the name of the specific metric within defCmd().
+ * The position of the field will be dictated by the
+ * order in which the macros are enumerated and the offsets of the fields will
+ * be matched against ''PerfContextByLevelBase'' declared in perf_context.h.
+ */
+// clang-format off
+#define DEF_PERF_CONTEXT_LEVEL_METRICS(defCmd) \
+  defCmd(bloom_filter_useful)                  \
+  defCmd(bloom_filter_full_positive)           \
+  defCmd(bloom_filter_full_true_positive)      \
+  defCmd(user_key_return_count)                \
+  defCmd(get_from_table_nanos)                 \
+  defCmd(block_cache_hit_count)                \
+  defCmd(block_cache_miss_count)
+// clang-format on
+
+// Break down performance counters by level and store per-level perf context in
+// PerfContextByLevel
+struct PerfContextByLevelInt {
+#define EMIT_FIELDS(x) uint64_t x = 0;
+  DEF_PERF_CONTEXT_LEVEL_METRICS(EMIT_FIELDS)
+#undef EMIT_FIELDS
+};
+
+/*
+ * Please add new metrics to this macro and appropriate fields will be copied,
+ * and/or emitted when converted to string.
+ * When people need to add new metrics please enclose the name of the specific
+ * metric within defCmd(). The position of the field will be dictated by the
+ * order in which the macros are enumerated and the offsets of the fields will
+ * be matched against ''PerfContextBase'' declared in perf_context.h.
+ */
+
+// clang-format off
+#define DEF_PERF_CONTEXT_METRICS(defCmd)           \
+  defCmd(user_key_comparison_count)                \
+  defCmd(block_cache_hit_count)                    \
+  defCmd(block_read_count)                         \
+  defCmd(block_read_byte)                          \
+  defCmd(block_read_time)                          \
+  defCmd(block_read_cpu_time)                      \
+  defCmd(block_cache_index_hit_count)              \
+  defCmd(block_cache_standalone_handle_count)      \
+  defCmd(block_cache_real_handle_count)            \
+  defCmd(index_block_read_count)                   \
+  defCmd(block_cache_filter_hit_count)             \
+  defCmd(filter_block_read_count)                  \
+  defCmd(compression_dict_block_read_count)        \
+  defCmd(secondary_cache_hit_count)                \
+  defCmd(compressed_sec_cache_insert_real_count)   \
+  defCmd(compressed_sec_cache_insert_dummy_count)  \
+  defCmd(compressed_sec_cache_uncompressed_bytes)  \
+  defCmd(compressed_sec_cache_compressed_bytes)    \
+  defCmd(block_checksum_time)                      \
+  defCmd(block_decompress_time)                    \
+  defCmd(get_read_bytes)                           \
+  defCmd(multiget_read_bytes)                      \
+  defCmd(iter_read_bytes)                          \
+  defCmd(blob_cache_hit_count)                     \
+  defCmd(blob_read_count)                          \
+  defCmd(blob_read_byte)                           \
+  defCmd(blob_read_time)                           \
+  defCmd(blob_checksum_time)                       \
+  defCmd(blob_decompress_time)                     \
+  defCmd(internal_key_skipped_count)               \
+  defCmd(internal_delete_skipped_count)            \
+  defCmd(internal_recent_skipped_count)            \
+  defCmd(internal_merge_count)                     \
+  defCmd(internal_merge_point_lookup_count)        \
+  defCmd(internal_range_del_reseek_count)          \
+  defCmd(get_snapshot_time)                        \
+  defCmd(get_from_memtable_time)                   \
+  defCmd(get_from_memtable_count)                  \
+  defCmd(get_post_process_time)                    \
+  defCmd(get_from_output_files_time)               \
+  defCmd(seek_on_memtable_time)                    \
+  defCmd(seek_on_memtable_count)                   \
+  defCmd(next_on_memtable_count)                   \
+  defCmd(prev_on_memtable_count)                   \
+  defCmd(seek_child_seek_time)                     \
+  defCmd(seek_child_seek_count)                    \
+  defCmd(seek_min_heap_time)                       \
+  defCmd(seek_max_heap_time)                       \
+  defCmd(seek_internal_seek_time)                  \
+  defCmd(find_next_user_entry_time)                \
+  defCmd(write_wal_time)                           \
+  defCmd(write_memtable_time)                      \
+  defCmd(write_delay_time)                         \
+  defCmd(write_scheduling_flushes_compactions_time)\
+  defCmd(write_pre_and_post_process_time)          \
+  defCmd(write_thread_wait_nanos)                  \
+  defCmd(db_mutex_lock_nanos)                      \
+  defCmd(db_condition_wait_nanos)                  \
+  defCmd(merge_operator_time_nanos)                \
+  defCmd(read_index_block_nanos)                   \
+  defCmd(read_filter_block_nanos)                  \
+  defCmd(new_table_block_iter_nanos)               \
+  defCmd(new_table_iterator_nanos)                 \
+  defCmd(block_seek_nanos)                         \
+  defCmd(find_table_nanos)                         \
+  defCmd(bloom_memtable_hit_count)                 \
+  defCmd(bloom_memtable_miss_count)                \
+  defCmd(bloom_sst_hit_count)                      \
+  defCmd(bloom_sst_miss_count)                     \
+  defCmd(key_lock_wait_time)                       \
+  defCmd(key_lock_wait_count)                      \
+  defCmd(env_new_sequential_file_nanos)            \
+  defCmd(env_new_random_access_file_nanos)         \
+  defCmd(env_new_writable_file_nanos)              \
+  defCmd(env_reuse_writable_file_nanos)            \
+  defCmd(env_new_random_rw_file_nanos)             \
+  defCmd(env_new_directory_nanos)                  \
+  defCmd(env_file_exists_nanos)                    \
+  defCmd(env_get_children_nanos)                   \
+  defCmd(env_get_children_file_attributes_nanos)   \
+  defCmd(env_delete_file_nanos)                    \
+  defCmd(env_create_dir_nanos)                     \
+  defCmd(env_create_dir_if_missing_nanos)          \
+  defCmd(env_delete_dir_nanos)                     \
+  defCmd(env_get_file_size_nanos)                  \
+  defCmd(env_get_file_modification_time_nanos)     \
+  defCmd(env_rename_file_nanos)                    \
+  defCmd(env_link_file_nanos)                      \
+  defCmd(env_lock_file_nanos)                      \
+  defCmd(env_unlock_file_nanos)                    \
+  defCmd(env_new_logger_nanos)                     \
+  defCmd(get_cpu_nanos)                            \
+  defCmd(iter_next_cpu_nanos)                      \
+  defCmd(iter_prev_cpu_nanos)                      \
+  defCmd(iter_seek_cpu_nanos)                      \
+  defCmd(iter_next_count)                          \
+  defCmd(iter_prev_count)                          \
+  defCmd(iter_seek_count)                          \
+  defCmd(encrypt_data_nanos)                       \
+  defCmd(decrypt_data_nanos)                       \
+  defCmd(number_async_seek)
+// clang-format on
+
+struct PerfContextInt {
+#define EMIT_FIELDS(x) uint64_t x;
+  DEF_PERF_CONTEXT_METRICS(EMIT_FIELDS)
+#undef EMIT_FIELDS
+};
+
+#if defined(NPERF_CONTEXT)
+// Should not be used because the counters are not thread-safe.
+// Put here just to make get_perf_context() simple without ifdef.
+PerfContext perf_context;
+#else
+thread_local PerfContext perf_context;
+#endif
+
+PerfContext* get_perf_context() {
+  static_assert(sizeof(PerfContextBase) == sizeof(PerfContextInt));
+  static_assert(sizeof(PerfContextByLevelBase) ==
+                sizeof(PerfContextByLevelInt));
+  /*
+   * Validate that we have the same fields and offsets between the external user
+   * facing
+   * ''PerfContextBase'' and ''PerfContextByLevelBase' structures with the
+   * internal structures that we generate from the DEF_* macros above. This way
+   * if people add metrics to the user-facing header file, they will have a
+   * build failure and need to add similar fields to the macros in this file.
+   * These are compile-time validations and don't impose any run-time penalties.
+   */
+#define EMIT_OFFSET_ASSERTION(x) \
+  static_assert(offsetof(PerfContextBase, x) == offsetof(PerfContextInt, x));
+  DEF_PERF_CONTEXT_METRICS(EMIT_OFFSET_ASSERTION)
+#undef EMIT_OFFSET_ASSERTION
+#define EMIT_OFFSET_ASSERTION(x)                       \
+  static_assert(offsetof(PerfContextByLevelBase, x) == \
+                offsetof(PerfContextByLevelInt, x));
+  DEF_PERF_CONTEXT_LEVEL_METRICS(EMIT_OFFSET_ASSERTION)
+#undef EMIT_OFFSET_ASSERTION
+  return &perf_context;
+}
+
+PerfContext::~PerfContext() {
+#if !defined(NPERF_CONTEXT) && !defined(OS_SOLARIS)
+  ClearPerLevelPerfContext();
+#endif
+}
+
+PerfContext::PerfContext(const PerfContext& other) {
+#ifdef NPERF_CONTEXT
+  (void)other;
+#else
+  copyMetrics(&other);
+#endif
+}
+
+PerfContext::PerfContext(PerfContext&& other) noexcept {
+#ifdef NPERF_CONTEXT
+  (void)other;
+#else
+  copyMetrics(&other);
+#endif
+}
+
+PerfContext& PerfContext::operator=(const PerfContext& other) {
+#ifdef NPERF_CONTEXT
+  (void)other;
+#else
+  copyMetrics(&other);
+#endif
+  return *this;
+}
+
+void PerfContext::copyMetrics(const PerfContext* other) noexcept {
+#ifdef NPERF_CONTEXT
+  (void)other;
+#else
+#define EMIT_COPY_FIELDS(x) x = other->x;
+  DEF_PERF_CONTEXT_METRICS(EMIT_COPY_FIELDS)
+#undef EMIT_COPY_FIELDS
+  if (per_level_perf_context_enabled && level_to_perf_context != nullptr) {
+    ClearPerLevelPerfContext();
+  }
+  if (other->level_to_perf_context != nullptr) {
+    level_to_perf_context = new std::map<uint32_t, PerfContextByLevel>();
+    *level_to_perf_context = *other->level_to_perf_context;
+  }
+  per_level_perf_context_enabled = other->per_level_perf_context_enabled;
+#endif
+}
+
+void PerfContext::Reset() {
+#ifndef NPERF_CONTEXT
+#define EMIT_FIELDS(x) x = 0;
+  DEF_PERF_CONTEXT_METRICS(EMIT_FIELDS)
+#undef EMIT_FIELDS
+  if (per_level_perf_context_enabled && level_to_perf_context) {
+    for (auto& kv : *level_to_perf_context) {
+      kv.second.Reset();
+    }
+  }
+#endif
+}
+
+void PerfContextByLevel::Reset() {
+#ifndef NPERF_CONTEXT
+#define EMIT_FIELDS(x) x = 0;
+  DEF_PERF_CONTEXT_LEVEL_METRICS(EMIT_FIELDS)
+#undef EMIT_FIELDS
+#endif
+}
+
+std::string PerfContext::ToString(bool exclude_zero_counters) const {
+#ifdef NPERF_CONTEXT
+  (void)exclude_zero_counters;
+  return "";
+#else
+  std::ostringstream ss;
+#define PERF_CONTEXT_OUTPUT(counter)             \
+  if (!exclude_zero_counters || (counter > 0)) { \
+    ss << #counter << " = " << counter << ", ";  \
+  }
+  DEF_PERF_CONTEXT_METRICS(PERF_CONTEXT_OUTPUT)
+#undef PERF_CONTEXT_OUTPUT
+  if (per_level_perf_context_enabled && level_to_perf_context) {
+#define PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(counter)      \
+  ss << #counter << " = ";                                     \
+  for (auto& kv : *level_to_perf_context) {                    \
+    if (!exclude_zero_counters || (kv.second.counter > 0)) {   \
+      ss << kv.second.counter << "@level" << kv.first << ", "; \
+    }                                                          \
+  }
+    DEF_PERF_CONTEXT_LEVEL_METRICS(PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER)
+#undef PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER
+  }
+  std::string str = ss.str();
+  str.erase(str.find_last_not_of(", ") + 1);
+  return str;
+#endif
+}
+
+void PerfContext::EnablePerLevelPerfContext() {
+  if (level_to_perf_context == nullptr) {
+    level_to_perf_context = new std::map<uint32_t, PerfContextByLevel>();
+  }
+  per_level_perf_context_enabled = true;
+}
+
+void PerfContext::DisablePerLevelPerfContext() {
+  per_level_perf_context_enabled = false;
+}
+
+void PerfContext::ClearPerLevelPerfContext() {
+  if (level_to_perf_context != nullptr) {
+    level_to_perf_context->clear();
+    delete level_to_perf_context;
+    level_to_perf_context = nullptr;
+  }
+  per_level_perf_context_enabled = false;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_context_imp.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_context_imp.h
new file mode 100644
index 0000000000..5b66ff2ff9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_context_imp.h
@@ -0,0 +1,96 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+#include "monitoring/perf_step_timer.h"
+#include "rocksdb/perf_context.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+#if defined(NPERF_CONTEXT)
+extern PerfContext perf_context;
+#else
+#if defined(OS_SOLARIS)
+extern thread_local PerfContext perf_context_;
+#define perf_context (*get_perf_context())
+#else
+extern thread_local PerfContext perf_context;
+#endif
+#endif
+
+#if defined(NPERF_CONTEXT)
+
+#define PERF_TIMER_STOP(metric)
+#define PERF_TIMER_START(metric)
+#define PERF_TIMER_GUARD(metric)
+#define PERF_TIMER_GUARD_WITH_CLOCK(metric, clock)
+#define PERF_CPU_TIMER_GUARD(metric, clock)
+#define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition, stats, \
+                                               ticker_type)
+#define PERF_TIMER_MEASURE(metric)
+#define PERF_COUNTER_ADD(metric, value)
+#define PERF_COUNTER_BY_LEVEL_ADD(metric, value, level)
+
+#else
+
+// Stop the timer and update the metric
+#define PERF_TIMER_STOP(metric) perf_step_timer_##metric.Stop();
+
+#define PERF_TIMER_START(metric) perf_step_timer_##metric.Start();
+
+// Declare and set start time of the timer
+#define PERF_TIMER_GUARD(metric)                                  \
+  PerfStepTimer perf_step_timer_##metric(&(perf_context.metric)); \
+  perf_step_timer_##metric.Start();
+
+// Declare and set start time of the timer
+#define PERF_TIMER_GUARD_WITH_CLOCK(metric, clock)                       \
+  PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), clock); \
+  perf_step_timer_##metric.Start();
+
+// Declare and set start time of the timer
+#define PERF_CPU_TIMER_GUARD(metric, clock)            \
+  PerfStepTimer perf_step_timer_##metric(              \
+      &(perf_context.metric), clock, true,             \
+      PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \
+  perf_step_timer_##metric.Start();
+
+#define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition, stats,       \
+                                               ticker_type)                    \
+  PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), nullptr,      \
+                                         false, PerfLevel::kEnableTime, stats, \
+                                         ticker_type);                         \
+  if (condition) {                                                             \
+    perf_step_timer_##metric.Start();                                          \
+  }
+
+// Update metric with time elapsed since last START. start time is reset
+// to current timestamp.
+#define PERF_TIMER_MEASURE(metric) perf_step_timer_##metric.Measure();
+
+// Increase metric value
+#define PERF_COUNTER_ADD(metric, value)        \
+  if (perf_level >= PerfLevel::kEnableCount) { \
+    perf_context.metric += value;              \
+  }
+
+// Increase metric value
+#define PERF_COUNTER_BY_LEVEL_ADD(metric, value, level)               \
+  if (perf_level >= PerfLevel::kEnableCount &&                        \
+      perf_context.per_level_perf_context_enabled &&                  \
+      perf_context.level_to_perf_context) {                           \
+    if ((*(perf_context.level_to_perf_context)).find(level) !=        \
+        (*(perf_context.level_to_perf_context)).end()) {              \
+      (*(perf_context.level_to_perf_context))[level].metric += value; \
+    } else {                                                          \
+      PerfContextByLevel empty_context;                               \
+      (*(perf_context.level_to_perf_context))[level] = empty_context; \
+      (*(perf_context.level_to_perf_context))[level].metric += value; \
+    }                                                                 \
+  }
+
+#endif
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_level.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_level.cc
new file mode 100644
index 0000000000..e3507624b6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_level.cc
@@ -0,0 +1,23 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include <assert.h>
+
+#include "monitoring/perf_level_imp.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+thread_local PerfLevel perf_level = kEnableCount;
+
+void SetPerfLevel(PerfLevel level) {
+  assert(level > kUninitialized);
+  assert(level < kOutOfBounds);
+  perf_level = level;
+}
+
+PerfLevel GetPerfLevel() { return perf_level; }
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_level_imp.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_level_imp.h
new file mode 100644
index 0000000000..28bd185cd1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_level_imp.h
@@ -0,0 +1,14 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+#include "port/port.h"
+#include "rocksdb/perf_level.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+extern thread_local PerfLevel perf_level;
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_step_timer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_step_timer.h
new file mode 100644
index 0000000000..f6c45d773c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/perf_step_timer.h
@@ -0,0 +1,77 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+#include "monitoring/perf_level_imp.h"
+#include "monitoring/statistics_impl.h"
+#include "rocksdb/system_clock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class PerfStepTimer {
+ public:
+  explicit PerfStepTimer(
+      uint64_t* metric, SystemClock* clock = nullptr, bool use_cpu_time = false,
+      PerfLevel enable_level = PerfLevel::kEnableTimeExceptForMutex,
+      Statistics* statistics = nullptr, uint32_t ticker_type = 0)
+      : perf_counter_enabled_(perf_level >= enable_level),
+        use_cpu_time_(use_cpu_time),
+        ticker_type_(ticker_type),
+        clock_((perf_counter_enabled_ || statistics != nullptr)
+                   ? (clock ? clock : SystemClock::Default().get())
+                   : nullptr),
+        start_(0),
+        metric_(metric),
+        statistics_(statistics) {}
+
+  ~PerfStepTimer() { Stop(); }
+
+  void Start() {
+    if (perf_counter_enabled_ || statistics_ != nullptr) {
+      start_ = time_now();
+    }
+  }
+
+  void Measure() {
+    if (start_) {
+      uint64_t now = time_now();
+      *metric_ += now - start_;
+      start_ = now;
+    }
+  }
+
+  void Stop() {
+    if (start_) {
+      uint64_t duration = time_now() - start_;
+      if (perf_counter_enabled_) {
+        *metric_ += duration;
+      }
+
+      if (statistics_ != nullptr) {
+        RecordTick(statistics_, ticker_type_, duration);
+      }
+      start_ = 0;
+    }
+  }
+
+ private:
+  uint64_t time_now() {
+    if (!use_cpu_time_) {
+      return clock_->NowNanos();
+    } else {
+      return clock_->CPUNanos();
+    }
+  }
+
+  const bool perf_counter_enabled_;
+  const bool use_cpu_time_;
+  uint32_t ticker_type_;
+  SystemClock* const clock_;
+  uint64_t start_;
+  uint64_t* metric_;
+  Statistics* statistics_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/persistent_stats_history.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/persistent_stats_history.cc
new file mode 100644
index 0000000000..f4c022148c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/persistent_stats_history.cc
@@ -0,0 +1,170 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "monitoring/persistent_stats_history.h"
+
+#include <cstring>
+#include <string>
+#include <utility>
+
+#include "db/db_impl/db_impl.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+// 10 digit seconds timestamp => [Sep 9, 2001 ~ Nov 20, 2286]
+const int kNowSecondsStringLength = 10;
+const std::string kFormatVersionKeyString =
+    "__persistent_stats_format_version__";
+const std::string kCompatibleVersionKeyString =
+    "__persistent_stats_compatible_version__";
+// Every release maintains two versions numbers for persistents stats: Current
+// format version and compatible format version. Current format version
+// designates what type of encoding will be used when writing to stats CF;
+// compatible format version designates the minimum format version that
+// can decode the stats CF encoded using the current format version.
+const uint64_t kStatsCFCurrentFormatVersion = 1;
+const uint64_t kStatsCFCompatibleFormatVersion = 1;
+
+Status DecodePersistentStatsVersionNumber(DBImpl* db, StatsVersionKeyType type,
+                                          uint64_t* version_number) {
+  if (type >= StatsVersionKeyType::kKeyTypeMax) {
+    return Status::InvalidArgument("Invalid stats version key type provided");
+  }
+  std::string key;
+  if (type == StatsVersionKeyType::kFormatVersion) {
+    key = kFormatVersionKeyString;
+  } else if (type == StatsVersionKeyType::kCompatibleVersion) {
+    key = kCompatibleVersionKeyString;
+  }
+  ReadOptions options;
+  options.verify_checksums = true;
+  std::string result;
+  Status s = db->Get(options, db->PersistentStatsColumnFamily(), key, &result);
+  if (!s.ok() || result.empty()) {
+    return Status::NotFound("Persistent stats version key " + key +
+                            " not found.");
+  }
+
+  // read version_number but do nothing in current version
+  *version_number = ParseUint64(result);
+  return Status::OK();
+}
+
+int EncodePersistentStatsKey(uint64_t now_seconds, const std::string& key,
+                             int size, char* buf) {
+  char timestamp[kNowSecondsStringLength + 1];
+  // make time stamp string equal in length to allow sorting by time
+  snprintf(timestamp, sizeof(timestamp), "%010d",
+           static_cast<int>(now_seconds));
+  timestamp[kNowSecondsStringLength] = '\0';
+  return snprintf(buf, size, "%s#%s", timestamp, key.c_str());
+}
+
+void OptimizeForPersistentStats(ColumnFamilyOptions* cfo) {
+  cfo->write_buffer_size = 2 << 20;
+  cfo->target_file_size_base = 2 * 1048576;
+  cfo->max_bytes_for_level_base = 10 * 1048576;
+  cfo->soft_pending_compaction_bytes_limit = 256 * 1048576;
+  cfo->hard_pending_compaction_bytes_limit = 1073741824ul;
+  cfo->compression = kNoCompression;
+}
+
+PersistentStatsHistoryIterator::~PersistentStatsHistoryIterator() {}
+
+bool PersistentStatsHistoryIterator::Valid() const { return valid_; }
+
+Status PersistentStatsHistoryIterator::status() const { return status_; }
+
+void PersistentStatsHistoryIterator::Next() {
+  // increment start_time by 1 to avoid infinite loop
+  AdvanceIteratorByTime(GetStatsTime() + 1, end_time_);
+}
+
+uint64_t PersistentStatsHistoryIterator::GetStatsTime() const { return time_; }
+
+const std::map<std::string, uint64_t>&
+PersistentStatsHistoryIterator::GetStatsMap() const {
+  return stats_map_;
+}
+
+std::pair<uint64_t, std::string> parseKey(const Slice& key,
+                                          uint64_t start_time) {
+  std::pair<uint64_t, std::string> result;
+  std::string key_str = key.ToString();
+  std::string::size_type pos = key_str.find("#");
+  // TODO(Zhongyi): add counters to track parse failures?
+  if (pos == std::string::npos) {
+    result.first = std::numeric_limits<uint64_t>::max();
+    result.second.clear();
+  } else {
+    uint64_t parsed_time = ParseUint64(key_str.substr(0, pos));
+    // skip entries with timestamp smaller than start_time
+    if (parsed_time < start_time) {
+      result.first = std::numeric_limits<uint64_t>::max();
+      result.second = "";
+    } else {
+      result.first = parsed_time;
+      std::string key_resize = key_str.substr(pos + 1);
+      result.second = key_resize;
+    }
+  }
+  return result;
+}
+
+// advance the iterator to the next time between [start_time, end_time)
+// if success, update time_ and stats_map_ with new_time and stats_map
+void PersistentStatsHistoryIterator::AdvanceIteratorByTime(uint64_t start_time,
+                                                           uint64_t end_time) {
+  // try to find next entry in stats_history_ map
+  if (db_impl_ != nullptr) {
+    ReadOptions ro;
+    Iterator* iter =
+        db_impl_->NewIterator(ro, db_impl_->PersistentStatsColumnFamily());
+
+    char timestamp[kNowSecondsStringLength + 1];
+    snprintf(timestamp, sizeof(timestamp), "%010d",
+             static_cast<int>(std::max(time_, start_time)));
+    timestamp[kNowSecondsStringLength] = '\0';
+
+    iter->Seek(timestamp);
+    // no more entries with timestamp >= start_time is found or version key
+    // is found to be incompatible
+    if (!iter->Valid()) {
+      valid_ = false;
+      delete iter;
+      return;
+    }
+    time_ = parseKey(iter->key(), start_time).first;
+    valid_ = true;
+    // check parsed time and invalid if it exceeds end_time
+    if (time_ > end_time) {
+      valid_ = false;
+      delete iter;
+      return;
+    }
+    // find all entries with timestamp equal to time_
+    std::map<std::string, uint64_t> new_stats_map;
+    std::pair<uint64_t, std::string> kv;
+    for (; iter->Valid(); iter->Next()) {
+      kv = parseKey(iter->key(), start_time);
+      if (kv.first != time_) {
+        break;
+      }
+      if (kv.second.compare(kFormatVersionKeyString) == 0) {
+        continue;
+      }
+      new_stats_map[kv.second] = ParseUint64(iter->value().ToString());
+    }
+    stats_map_.swap(new_stats_map);
+    delete iter;
+  } else {
+    valid_ = false;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/persistent_stats_history.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/persistent_stats_history.h
new file mode 100644
index 0000000000..7c711fe4ef
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/persistent_stats_history.h
@@ -0,0 +1,83 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "db/db_impl/db_impl.h"
+#include "rocksdb/stats_history.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+extern const std::string kFormatVersionKeyString;
+extern const std::string kCompatibleVersionKeyString;
+extern const uint64_t kStatsCFCurrentFormatVersion;
+extern const uint64_t kStatsCFCompatibleFormatVersion;
+
+enum StatsVersionKeyType : uint32_t {
+  kFormatVersion = 1,
+  kCompatibleVersion = 2,
+  kKeyTypeMax = 3
+};
+
+// Read the version number from persitent stats cf depending on type provided
+// stores the version number in `*version_number`
+// returns Status::OK() on success, or other status code on failure
+Status DecodePersistentStatsVersionNumber(DBImpl* db, StatsVersionKeyType type,
+                                          uint64_t* version_number);
+
+// Encode timestamp and stats key into buf
+// Format: timestamp(10 digit) + '#' + key
+// Total length of encoded key will be capped at 100 bytes
+int EncodePersistentStatsKey(uint64_t timestamp, const std::string& key,
+                             int size, char* buf);
+
+void OptimizeForPersistentStats(ColumnFamilyOptions* cfo);
+
+class PersistentStatsHistoryIterator final : public StatsHistoryIterator {
+ public:
+  PersistentStatsHistoryIterator(uint64_t start_time, uint64_t end_time,
+                                 DBImpl* db_impl)
+      : time_(0),
+        start_time_(start_time),
+        end_time_(end_time),
+        valid_(true),
+        db_impl_(db_impl) {
+    AdvanceIteratorByTime(start_time_, end_time_);
+  }
+  ~PersistentStatsHistoryIterator() override;
+  bool Valid() const override;
+  Status status() const override;
+
+  void Next() override;
+  uint64_t GetStatsTime() const override;
+
+  const std::map<std::string, uint64_t>& GetStatsMap() const override;
+
+ private:
+  // advance the iterator to the next stats history record with timestamp
+  // between [start_time, end_time)
+  void AdvanceIteratorByTime(uint64_t start_time, uint64_t end_time);
+
+  // No copying allowed
+  PersistentStatsHistoryIterator(const PersistentStatsHistoryIterator&) =
+      delete;
+  void operator=(const PersistentStatsHistoryIterator&) = delete;
+  PersistentStatsHistoryIterator(PersistentStatsHistoryIterator&&) = delete;
+  PersistentStatsHistoryIterator& operator=(PersistentStatsHistoryIterator&&) =
+      delete;
+
+  uint64_t time_;
+  uint64_t start_time_;
+  uint64_t end_time_;
+  std::map<std::string, uint64_t> stats_map_;
+  Status status_;
+  bool valid_;
+  DBImpl* db_impl_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/statistics.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/statistics.cc
new file mode 100644
index 0000000000..0e9c2250f2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/statistics.cc
@@ -0,0 +1,534 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include "rocksdb/statistics.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <cstdio>
+
+#include "monitoring/statistics_impl.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// The order of items listed in  Tickers should be the same as
+// the order listed in TickersNameMap
+const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
+    {BLOCK_CACHE_MISS, "rocksdb.block.cache.miss"},
+    {BLOCK_CACHE_HIT, "rocksdb.block.cache.hit"},
+    {BLOCK_CACHE_ADD, "rocksdb.block.cache.add"},
+    {BLOCK_CACHE_ADD_FAILURES, "rocksdb.block.cache.add.failures"},
+    {BLOCK_CACHE_INDEX_MISS, "rocksdb.block.cache.index.miss"},
+    {BLOCK_CACHE_INDEX_HIT, "rocksdb.block.cache.index.hit"},
+    {BLOCK_CACHE_INDEX_ADD, "rocksdb.block.cache.index.add"},
+    {BLOCK_CACHE_INDEX_BYTES_INSERT, "rocksdb.block.cache.index.bytes.insert"},
+    {BLOCK_CACHE_FILTER_MISS, "rocksdb.block.cache.filter.miss"},
+    {BLOCK_CACHE_FILTER_HIT, "rocksdb.block.cache.filter.hit"},
+    {BLOCK_CACHE_FILTER_ADD, "rocksdb.block.cache.filter.add"},
+    {BLOCK_CACHE_FILTER_BYTES_INSERT,
+     "rocksdb.block.cache.filter.bytes.insert"},
+    {BLOCK_CACHE_DATA_MISS, "rocksdb.block.cache.data.miss"},
+    {BLOCK_CACHE_DATA_HIT, "rocksdb.block.cache.data.hit"},
+    {BLOCK_CACHE_DATA_ADD, "rocksdb.block.cache.data.add"},
+    {BLOCK_CACHE_DATA_BYTES_INSERT, "rocksdb.block.cache.data.bytes.insert"},
+    {BLOCK_CACHE_BYTES_READ, "rocksdb.block.cache.bytes.read"},
+    {BLOCK_CACHE_BYTES_WRITE, "rocksdb.block.cache.bytes.write"},
+    {BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful"},
+    {BLOOM_FILTER_FULL_POSITIVE, "rocksdb.bloom.filter.full.positive"},
+    {BLOOM_FILTER_FULL_TRUE_POSITIVE,
+     "rocksdb.bloom.filter.full.true.positive"},
+    {PERSISTENT_CACHE_HIT, "rocksdb.persistent.cache.hit"},
+    {PERSISTENT_CACHE_MISS, "rocksdb.persistent.cache.miss"},
+    {SIM_BLOCK_CACHE_HIT, "rocksdb.sim.block.cache.hit"},
+    {SIM_BLOCK_CACHE_MISS, "rocksdb.sim.block.cache.miss"},
+    {MEMTABLE_HIT, "rocksdb.memtable.hit"},
+    {MEMTABLE_MISS, "rocksdb.memtable.miss"},
+    {GET_HIT_L0, "rocksdb.l0.hit"},
+    {GET_HIT_L1, "rocksdb.l1.hit"},
+    {GET_HIT_L2_AND_UP, "rocksdb.l2andup.hit"},
+    {COMPACTION_KEY_DROP_NEWER_ENTRY, "rocksdb.compaction.key.drop.new"},
+    {COMPACTION_KEY_DROP_OBSOLETE, "rocksdb.compaction.key.drop.obsolete"},
+    {COMPACTION_KEY_DROP_RANGE_DEL, "rocksdb.compaction.key.drop.range_del"},
+    {COMPACTION_KEY_DROP_USER, "rocksdb.compaction.key.drop.user"},
+    {COMPACTION_RANGE_DEL_DROP_OBSOLETE,
+     "rocksdb.compaction.range_del.drop.obsolete"},
+    {COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE,
+     "rocksdb.compaction.optimized.del.drop.obsolete"},
+    {COMPACTION_CANCELLED, "rocksdb.compaction.cancelled"},
+    {NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written"},
+    {NUMBER_KEYS_READ, "rocksdb.number.keys.read"},
+    {NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated"},
+    {BYTES_WRITTEN, "rocksdb.bytes.written"},
+    {BYTES_READ, "rocksdb.bytes.read"},
+    {NUMBER_DB_SEEK, "rocksdb.number.db.seek"},
+    {NUMBER_DB_NEXT, "rocksdb.number.db.next"},
+    {NUMBER_DB_PREV, "rocksdb.number.db.prev"},
+    {NUMBER_DB_SEEK_FOUND, "rocksdb.number.db.seek.found"},
+    {NUMBER_DB_NEXT_FOUND, "rocksdb.number.db.next.found"},
+    {NUMBER_DB_PREV_FOUND, "rocksdb.number.db.prev.found"},
+    {ITER_BYTES_READ, "rocksdb.db.iter.bytes.read"},
+    {NO_FILE_OPENS, "rocksdb.no.file.opens"},
+    {NO_FILE_ERRORS, "rocksdb.no.file.errors"},
+    {STALL_MICROS, "rocksdb.stall.micros"},
+    {DB_MUTEX_WAIT_MICROS, "rocksdb.db.mutex.wait.micros"},
+    {NUMBER_MULTIGET_CALLS, "rocksdb.number.multiget.get"},
+    {NUMBER_MULTIGET_KEYS_READ, "rocksdb.number.multiget.keys.read"},
+    {NUMBER_MULTIGET_BYTES_READ, "rocksdb.number.multiget.bytes.read"},
+    {NUMBER_MERGE_FAILURES, "rocksdb.number.merge.failures"},
+    {BLOOM_FILTER_PREFIX_CHECKED, "rocksdb.bloom.filter.prefix.checked"},
+    {BLOOM_FILTER_PREFIX_USEFUL, "rocksdb.bloom.filter.prefix.useful"},
+    {BLOOM_FILTER_PREFIX_TRUE_POSITIVE,
+     "rocksdb.bloom.filter.prefix.true.positive"},
+    {NUMBER_OF_RESEEKS_IN_ITERATION, "rocksdb.number.reseeks.iteration"},
+    {GET_UPDATES_SINCE_CALLS, "rocksdb.getupdatessince.calls"},
+    {WAL_FILE_SYNCED, "rocksdb.wal.synced"},
+    {WAL_FILE_BYTES, "rocksdb.wal.bytes"},
+    {WRITE_DONE_BY_SELF, "rocksdb.write.self"},
+    {WRITE_DONE_BY_OTHER, "rocksdb.write.other"},
+    {WRITE_WITH_WAL, "rocksdb.write.wal"},
+    {COMPACT_READ_BYTES, "rocksdb.compact.read.bytes"},
+    {COMPACT_WRITE_BYTES, "rocksdb.compact.write.bytes"},
+    {FLUSH_WRITE_BYTES, "rocksdb.flush.write.bytes"},
+    {COMPACT_READ_BYTES_MARKED, "rocksdb.compact.read.marked.bytes"},
+    {COMPACT_READ_BYTES_PERIODIC, "rocksdb.compact.read.periodic.bytes"},
+    {COMPACT_READ_BYTES_TTL, "rocksdb.compact.read.ttl.bytes"},
+    {COMPACT_WRITE_BYTES_MARKED, "rocksdb.compact.write.marked.bytes"},
+    {COMPACT_WRITE_BYTES_PERIODIC, "rocksdb.compact.write.periodic.bytes"},
+    {COMPACT_WRITE_BYTES_TTL, "rocksdb.compact.write.ttl.bytes"},
+    {NUMBER_DIRECT_LOAD_TABLE_PROPERTIES,
+     "rocksdb.number.direct.load.table.properties"},
+    {NUMBER_SUPERVERSION_ACQUIRES, "rocksdb.number.superversion_acquires"},
+    {NUMBER_SUPERVERSION_RELEASES, "rocksdb.number.superversion_releases"},
+    {NUMBER_SUPERVERSION_CLEANUPS, "rocksdb.number.superversion_cleanups"},
+    {NUMBER_BLOCK_COMPRESSED, "rocksdb.number.block.compressed"},
+    {NUMBER_BLOCK_DECOMPRESSED, "rocksdb.number.block.decompressed"},
+    {NUMBER_BLOCK_NOT_COMPRESSED, "rocksdb.number.block.not_compressed"},
+    {MERGE_OPERATION_TOTAL_TIME, "rocksdb.merge.operation.time.nanos"},
+    {FILTER_OPERATION_TOTAL_TIME, "rocksdb.filter.operation.time.nanos"},
+    {ROW_CACHE_HIT, "rocksdb.row.cache.hit"},
+    {ROW_CACHE_MISS, "rocksdb.row.cache.miss"},
+    {READ_AMP_ESTIMATE_USEFUL_BYTES, "rocksdb.read.amp.estimate.useful.bytes"},
+    {READ_AMP_TOTAL_READ_BYTES, "rocksdb.read.amp.total.read.bytes"},
+    {NUMBER_RATE_LIMITER_DRAINS, "rocksdb.number.rate_limiter.drains"},
+    {NUMBER_ITER_SKIP, "rocksdb.number.iter.skip"},
+    {BLOB_DB_NUM_PUT, "rocksdb.blobdb.num.put"},
+    {BLOB_DB_NUM_WRITE, "rocksdb.blobdb.num.write"},
+    {BLOB_DB_NUM_GET, "rocksdb.blobdb.num.get"},
+    {BLOB_DB_NUM_MULTIGET, "rocksdb.blobdb.num.multiget"},
+    {BLOB_DB_NUM_SEEK, "rocksdb.blobdb.num.seek"},
+    {BLOB_DB_NUM_NEXT, "rocksdb.blobdb.num.next"},
+    {BLOB_DB_NUM_PREV, "rocksdb.blobdb.num.prev"},
+    {BLOB_DB_NUM_KEYS_WRITTEN, "rocksdb.blobdb.num.keys.written"},
+    {BLOB_DB_NUM_KEYS_READ, "rocksdb.blobdb.num.keys.read"},
+    {BLOB_DB_BYTES_WRITTEN, "rocksdb.blobdb.bytes.written"},
+    {BLOB_DB_BYTES_READ, "rocksdb.blobdb.bytes.read"},
+    {BLOB_DB_WRITE_INLINED, "rocksdb.blobdb.write.inlined"},
+    {BLOB_DB_WRITE_INLINED_TTL, "rocksdb.blobdb.write.inlined.ttl"},
+    {BLOB_DB_WRITE_BLOB, "rocksdb.blobdb.write.blob"},
+    {BLOB_DB_WRITE_BLOB_TTL, "rocksdb.blobdb.write.blob.ttl"},
+    {BLOB_DB_BLOB_FILE_BYTES_WRITTEN, "rocksdb.blobdb.blob.file.bytes.written"},
+    {BLOB_DB_BLOB_FILE_BYTES_READ, "rocksdb.blobdb.blob.file.bytes.read"},
+    {BLOB_DB_BLOB_FILE_SYNCED, "rocksdb.blobdb.blob.file.synced"},
+    {BLOB_DB_BLOB_INDEX_EXPIRED_COUNT,
+     "rocksdb.blobdb.blob.index.expired.count"},
+    {BLOB_DB_BLOB_INDEX_EXPIRED_SIZE, "rocksdb.blobdb.blob.index.expired.size"},
+    {BLOB_DB_BLOB_INDEX_EVICTED_COUNT,
+     "rocksdb.blobdb.blob.index.evicted.count"},
+    {BLOB_DB_BLOB_INDEX_EVICTED_SIZE, "rocksdb.blobdb.blob.index.evicted.size"},
+    {BLOB_DB_GC_NUM_FILES, "rocksdb.blobdb.gc.num.files"},
+    {BLOB_DB_GC_NUM_NEW_FILES, "rocksdb.blobdb.gc.num.new.files"},
+    {BLOB_DB_GC_FAILURES, "rocksdb.blobdb.gc.failures"},
+    {BLOB_DB_GC_NUM_KEYS_RELOCATED, "rocksdb.blobdb.gc.num.keys.relocated"},
+    {BLOB_DB_GC_BYTES_RELOCATED, "rocksdb.blobdb.gc.bytes.relocated"},
+    {BLOB_DB_FIFO_NUM_FILES_EVICTED, "rocksdb.blobdb.fifo.num.files.evicted"},
+    {BLOB_DB_FIFO_NUM_KEYS_EVICTED, "rocksdb.blobdb.fifo.num.keys.evicted"},
+    {BLOB_DB_FIFO_BYTES_EVICTED, "rocksdb.blobdb.fifo.bytes.evicted"},
+    {TXN_PREPARE_MUTEX_OVERHEAD, "rocksdb.txn.overhead.mutex.prepare"},
+    {TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD,
+     "rocksdb.txn.overhead.mutex.old.commit.map"},
+    {TXN_DUPLICATE_KEY_OVERHEAD, "rocksdb.txn.overhead.duplicate.key"},
+    {TXN_SNAPSHOT_MUTEX_OVERHEAD, "rocksdb.txn.overhead.mutex.snapshot"},
+    {TXN_GET_TRY_AGAIN, "rocksdb.txn.get.tryagain"},
+    {NUMBER_MULTIGET_KEYS_FOUND, "rocksdb.number.multiget.keys.found"},
+    {NO_ITERATOR_CREATED, "rocksdb.num.iterator.created"},
+    {NO_ITERATOR_DELETED, "rocksdb.num.iterator.deleted"},
+    {BLOCK_CACHE_COMPRESSION_DICT_MISS,
+     "rocksdb.block.cache.compression.dict.miss"},
+    {BLOCK_CACHE_COMPRESSION_DICT_HIT,
+     "rocksdb.block.cache.compression.dict.hit"},
+    {BLOCK_CACHE_COMPRESSION_DICT_ADD,
+     "rocksdb.block.cache.compression.dict.add"},
+    {BLOCK_CACHE_COMPRESSION_DICT_BYTES_INSERT,
+     "rocksdb.block.cache.compression.dict.bytes.insert"},
+    {BLOCK_CACHE_ADD_REDUNDANT, "rocksdb.block.cache.add.redundant"},
+    {BLOCK_CACHE_INDEX_ADD_REDUNDANT,
+     "rocksdb.block.cache.index.add.redundant"},
+    {BLOCK_CACHE_FILTER_ADD_REDUNDANT,
+     "rocksdb.block.cache.filter.add.redundant"},
+    {BLOCK_CACHE_DATA_ADD_REDUNDANT, "rocksdb.block.cache.data.add.redundant"},
+    {BLOCK_CACHE_COMPRESSION_DICT_ADD_REDUNDANT,
+     "rocksdb.block.cache.compression.dict.add.redundant"},
+    {FILES_MARKED_TRASH, "rocksdb.files.marked.trash"},
+    {FILES_DELETED_IMMEDIATELY, "rocksdb.files.deleted.immediately"},
+    {ERROR_HANDLER_BG_ERROR_COUNT, "rocksdb.error.handler.bg.errro.count"},
+    {ERROR_HANDLER_BG_IO_ERROR_COUNT,
+     "rocksdb.error.handler.bg.io.errro.count"},
+    {ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT,
+     "rocksdb.error.handler.bg.retryable.io.errro.count"},
+    {ERROR_HANDLER_AUTORESUME_COUNT, "rocksdb.error.handler.autoresume.count"},
+    {ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT,
+     "rocksdb.error.handler.autoresume.retry.total.count"},
+    {ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT,
+     "rocksdb.error.handler.autoresume.success.count"},
+    {MEMTABLE_PAYLOAD_BYTES_AT_FLUSH,
+     "rocksdb.memtable.payload.bytes.at.flush"},
+    {MEMTABLE_GARBAGE_BYTES_AT_FLUSH,
+     "rocksdb.memtable.garbage.bytes.at.flush"},
+    {SECONDARY_CACHE_HITS, "rocksdb.secondary.cache.hits"},
+    {VERIFY_CHECKSUM_READ_BYTES, "rocksdb.verify_checksum.read.bytes"},
+    {BACKUP_READ_BYTES, "rocksdb.backup.read.bytes"},
+    {BACKUP_WRITE_BYTES, "rocksdb.backup.write.bytes"},
+    {REMOTE_COMPACT_READ_BYTES, "rocksdb.remote.compact.read.bytes"},
+    {REMOTE_COMPACT_WRITE_BYTES, "rocksdb.remote.compact.write.bytes"},
+    {HOT_FILE_READ_BYTES, "rocksdb.hot.file.read.bytes"},
+    {WARM_FILE_READ_BYTES, "rocksdb.warm.file.read.bytes"},
+    {COLD_FILE_READ_BYTES, "rocksdb.cold.file.read.bytes"},
+    {HOT_FILE_READ_COUNT, "rocksdb.hot.file.read.count"},
+    {WARM_FILE_READ_COUNT, "rocksdb.warm.file.read.count"},
+    {COLD_FILE_READ_COUNT, "rocksdb.cold.file.read.count"},
+    {LAST_LEVEL_READ_BYTES, "rocksdb.last.level.read.bytes"},
+    {LAST_LEVEL_READ_COUNT, "rocksdb.last.level.read.count"},
+    {NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"},
+    {NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"},
+    {LAST_LEVEL_SEEK_FILTERED, "rocksdb.last.level.seek.filtered"},
+    {LAST_LEVEL_SEEK_FILTER_MATCH, "rocksdb.last.level.seek.filter.match"},
+    {LAST_LEVEL_SEEK_DATA, "rocksdb.last.level.seek.data"},
+    {LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER,
+     "rocksdb.last.level.seek.data.useful.no.filter"},
+    {LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH,
+     "rocksdb.last.level.seek.data.useful.filter.match"},
+    {NON_LAST_LEVEL_SEEK_FILTERED, "rocksdb.non.last.level.seek.filtered"},
+    {NON_LAST_LEVEL_SEEK_FILTER_MATCH,
+     "rocksdb.non.last.level.seek.filter.match"},
+    {NON_LAST_LEVEL_SEEK_DATA, "rocksdb.non.last.level.seek.data"},
+    {NON_LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER,
+     "rocksdb.non.last.level.seek.data.useful.no.filter"},
+    {NON_LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH,
+     "rocksdb.non.last.level.seek.data.useful.filter.match"},
+    {BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"},
+    {BLOCK_CHECKSUM_MISMATCH_COUNT, "rocksdb.block.checksum.mismatch.count"},
+    {MULTIGET_COROUTINE_COUNT, "rocksdb.multiget.coroutine.count"},
+    {BLOB_DB_CACHE_MISS, "rocksdb.blobdb.cache.miss"},
+    {BLOB_DB_CACHE_HIT, "rocksdb.blobdb.cache.hit"},
+    {BLOB_DB_CACHE_ADD, "rocksdb.blobdb.cache.add"},
+    {BLOB_DB_CACHE_ADD_FAILURES, "rocksdb.blobdb.cache.add.failures"},
+    {BLOB_DB_CACHE_BYTES_READ, "rocksdb.blobdb.cache.bytes.read"},
+    {BLOB_DB_CACHE_BYTES_WRITE, "rocksdb.blobdb.cache.bytes.write"},
+    {READ_ASYNC_MICROS, "rocksdb.read.async.micros"},
+    {ASYNC_READ_ERROR_COUNT, "rocksdb.async.read.error.count"},
+    {SECONDARY_CACHE_FILTER_HITS, "rocksdb.secondary.cache.filter.hits"},
+    {SECONDARY_CACHE_INDEX_HITS, "rocksdb.secondary.cache.index.hits"},
+    {SECONDARY_CACHE_DATA_HITS, "rocksdb.secondary.cache.data.hits"},
+    {TABLE_OPEN_PREFETCH_TAIL_MISS, "rocksdb.table.open.prefetch.tail.miss"},
+    {TABLE_OPEN_PREFETCH_TAIL_HIT, "rocksdb.table.open.prefetch.tail.hit"},
+    {TIMESTAMP_FILTER_TABLE_CHECKED, "rocksdb.timestamp.filter.table.checked"},
+    {TIMESTAMP_FILTER_TABLE_FILTERED,
+     "rocksdb.timestamp.filter.table.filtered"},
+    {BYTES_COMPRESSED_FROM, "rocksdb.bytes.compressed.from"},
+    {BYTES_COMPRESSED_TO, "rocksdb.bytes.compressed.to"},
+    {BYTES_COMPRESSION_BYPASSED, "rocksdb.bytes.compression_bypassed"},
+    {BYTES_COMPRESSION_REJECTED, "rocksdb.bytes.compression.rejected"},
+    {NUMBER_BLOCK_COMPRESSION_BYPASSED,
+     "rocksdb.number.block_compression_bypassed"},
+    {NUMBER_BLOCK_COMPRESSION_REJECTED,
+     "rocksdb.number.block_compression_rejected"},
+    {BYTES_DECOMPRESSED_FROM, "rocksdb.bytes.decompressed.from"},
+    {BYTES_DECOMPRESSED_TO, "rocksdb.bytes.decompressed.to"},
+};
+
+const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
+    {DB_GET, "rocksdb.db.get.micros"},
+    {DB_WRITE, "rocksdb.db.write.micros"},
+    {COMPACTION_TIME, "rocksdb.compaction.times.micros"},
+    {COMPACTION_CPU_TIME, "rocksdb.compaction.times.cpu_micros"},
+    {SUBCOMPACTION_SETUP_TIME, "rocksdb.subcompaction.setup.times.micros"},
+    {TABLE_SYNC_MICROS, "rocksdb.table.sync.micros"},
+    {COMPACTION_OUTFILE_SYNC_MICROS, "rocksdb.compaction.outfile.sync.micros"},
+    {WAL_FILE_SYNC_MICROS, "rocksdb.wal.file.sync.micros"},
+    {MANIFEST_FILE_SYNC_MICROS, "rocksdb.manifest.file.sync.micros"},
+    {TABLE_OPEN_IO_MICROS, "rocksdb.table.open.io.micros"},
+    {DB_MULTIGET, "rocksdb.db.multiget.micros"},
+    {READ_BLOCK_COMPACTION_MICROS, "rocksdb.read.block.compaction.micros"},
+    {READ_BLOCK_GET_MICROS, "rocksdb.read.block.get.micros"},
+    {WRITE_RAW_BLOCK_MICROS, "rocksdb.write.raw.block.micros"},
+    {NUM_FILES_IN_SINGLE_COMPACTION, "rocksdb.numfiles.in.singlecompaction"},
+    {DB_SEEK, "rocksdb.db.seek.micros"},
+    {WRITE_STALL, "rocksdb.db.write.stall"},
+    {SST_READ_MICROS, "rocksdb.sst.read.micros"},
+    {FILE_READ_FLUSH_MICROS, "rocksdb.file.read.flush.micros"},
+    {FILE_READ_COMPACTION_MICROS, "rocksdb.file.read.compaction.micros"},
+    {FILE_READ_DB_OPEN_MICROS, "rocksdb.file.read.db.open.micros"},
+    {NUM_SUBCOMPACTIONS_SCHEDULED, "rocksdb.num.subcompactions.scheduled"},
+    {BYTES_PER_READ, "rocksdb.bytes.per.read"},
+    {BYTES_PER_WRITE, "rocksdb.bytes.per.write"},
+    {BYTES_PER_MULTIGET, "rocksdb.bytes.per.multiget"},
+    {BYTES_COMPRESSED, "rocksdb.bytes.compressed"},
+    {BYTES_DECOMPRESSED, "rocksdb.bytes.decompressed"},
+    {COMPRESSION_TIMES_NANOS, "rocksdb.compression.times.nanos"},
+    {DECOMPRESSION_TIMES_NANOS, "rocksdb.decompression.times.nanos"},
+    {READ_NUM_MERGE_OPERANDS, "rocksdb.read.num.merge_operands"},
+    {BLOB_DB_KEY_SIZE, "rocksdb.blobdb.key.size"},
+    {BLOB_DB_VALUE_SIZE, "rocksdb.blobdb.value.size"},
+    {BLOB_DB_WRITE_MICROS, "rocksdb.blobdb.write.micros"},
+    {BLOB_DB_GET_MICROS, "rocksdb.blobdb.get.micros"},
+    {BLOB_DB_MULTIGET_MICROS, "rocksdb.blobdb.multiget.micros"},
+    {BLOB_DB_SEEK_MICROS, "rocksdb.blobdb.seek.micros"},
+    {BLOB_DB_NEXT_MICROS, "rocksdb.blobdb.next.micros"},
+    {BLOB_DB_PREV_MICROS, "rocksdb.blobdb.prev.micros"},
+    {BLOB_DB_BLOB_FILE_WRITE_MICROS, "rocksdb.blobdb.blob.file.write.micros"},
+    {BLOB_DB_BLOB_FILE_READ_MICROS, "rocksdb.blobdb.blob.file.read.micros"},
+    {BLOB_DB_BLOB_FILE_SYNC_MICROS, "rocksdb.blobdb.blob.file.sync.micros"},
+    {BLOB_DB_COMPRESSION_MICROS, "rocksdb.blobdb.compression.micros"},
+    {BLOB_DB_DECOMPRESSION_MICROS, "rocksdb.blobdb.decompression.micros"},
+    {FLUSH_TIME, "rocksdb.db.flush.micros"},
+    {SST_BATCH_SIZE, "rocksdb.sst.batch.size"},
+    {NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
+     "rocksdb.num.index.and.filter.blocks.read.per.level"},
+    {NUM_SST_READ_PER_LEVEL, "rocksdb.num.sst.read.per.level"},
+    {ERROR_HANDLER_AUTORESUME_RETRY_COUNT,
+     "rocksdb.error.handler.autoresume.retry.count"},
+    {ASYNC_READ_BYTES, "rocksdb.async.read.bytes"},
+    {POLL_WAIT_MICROS, "rocksdb.poll.wait.micros"},
+    {PREFETCHED_BYTES_DISCARDED, "rocksdb.prefetched.bytes.discarded"},
+    {MULTIGET_IO_BATCH_SIZE, "rocksdb.multiget.io.batch.size"},
+    {NUM_LEVEL_READ_PER_MULTIGET, "rocksdb.num.level.read.per.multiget"},
+    {ASYNC_PREFETCH_ABORT_MICROS, "rocksdb.async.prefetch.abort.micros"},
+    {TABLE_OPEN_PREFETCH_TAIL_READ_BYTES,
+     "rocksdb.table.open.prefetch.tail.read.bytes"},
+};
+
+std::shared_ptr<Statistics> CreateDBStatistics() {
+  return std::make_shared<StatisticsImpl>(nullptr);
+}
+
+static int RegisterBuiltinStatistics(ObjectLibrary& library,
+                                     const std::string& /*arg*/) {
+  library.AddFactory<Statistics>(
+      StatisticsImpl::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<Statistics>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new StatisticsImpl(nullptr));
+        return guard->get();
+      });
+  return 1;
+}
+
+Status Statistics::CreateFromString(const ConfigOptions& config_options,
+                                    const std::string& id,
+                                    std::shared_ptr<Statistics>* result) {
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    RegisterBuiltinStatistics(*(ObjectLibrary::Default().get()), "");
+  });
+  Status s;
+  if (id == "" || id == StatisticsImpl::kClassName()) {
+    result->reset(new StatisticsImpl(nullptr));
+  } else if (id == kNullptrString) {
+    result->reset();
+  } else {
+    s = LoadSharedObject<Statistics>(config_options, id, result);
+  }
+  return s;
+}
+
+static std::unordered_map<std::string, OptionTypeInfo> stats_type_info = {
+    {"inner", OptionTypeInfo::AsCustomSharedPtr<Statistics>(
+                  0, OptionVerificationType::kByNameAllowFromNull,
+                  OptionTypeFlags::kCompareNever)},
+};
+
+StatisticsImpl::StatisticsImpl(std::shared_ptr<Statistics> stats)
+    : stats_(std::move(stats)) {
+  RegisterOptions("StatisticsOptions", &stats_, &stats_type_info);
+}
+
+StatisticsImpl::~StatisticsImpl() {}
+
+uint64_t StatisticsImpl::getTickerCount(uint32_t tickerType) const {
+  MutexLock lock(&aggregate_lock_);
+  return getTickerCountLocked(tickerType);
+}
+
+uint64_t StatisticsImpl::getTickerCountLocked(uint32_t tickerType) const {
+  assert(tickerType < TICKER_ENUM_MAX);
+  uint64_t res = 0;
+  for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) {
+    res += per_core_stats_.AccessAtCore(core_idx)->tickers_[tickerType];
+  }
+  return res;
+}
+
+void StatisticsImpl::histogramData(uint32_t histogramType,
+                                   HistogramData* const data) const {
+  MutexLock lock(&aggregate_lock_);
+  getHistogramImplLocked(histogramType)->Data(data);
+}
+
+std::unique_ptr<HistogramImpl> StatisticsImpl::getHistogramImplLocked(
+    uint32_t histogramType) const {
+  assert(histogramType < HISTOGRAM_ENUM_MAX);
+  std::unique_ptr<HistogramImpl> res_hist(new HistogramImpl());
+  for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) {
+    res_hist->Merge(
+        per_core_stats_.AccessAtCore(core_idx)->histograms_[histogramType]);
+  }
+  return res_hist;
+}
+
+std::string StatisticsImpl::getHistogramString(uint32_t histogramType) const {
+  MutexLock lock(&aggregate_lock_);
+  return getHistogramImplLocked(histogramType)->ToString();
+}
+
+void StatisticsImpl::setTickerCount(uint32_t tickerType, uint64_t count) {
+  {
+    MutexLock lock(&aggregate_lock_);
+    setTickerCountLocked(tickerType, count);
+  }
+  if (stats_ && tickerType < TICKER_ENUM_MAX) {
+    stats_->setTickerCount(tickerType, count);
+  }
+}
+
+void StatisticsImpl::setTickerCountLocked(uint32_t tickerType, uint64_t count) {
+  assert(tickerType < TICKER_ENUM_MAX);
+  for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) {
+    if (core_idx == 0) {
+      per_core_stats_.AccessAtCore(core_idx)->tickers_[tickerType] = count;
+    } else {
+      per_core_stats_.AccessAtCore(core_idx)->tickers_[tickerType] = 0;
+    }
+  }
+}
+
+uint64_t StatisticsImpl::getAndResetTickerCount(uint32_t tickerType) {
+  uint64_t sum = 0;
+  {
+    MutexLock lock(&aggregate_lock_);
+    assert(tickerType < TICKER_ENUM_MAX);
+    for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) {
+      sum +=
+          per_core_stats_.AccessAtCore(core_idx)->tickers_[tickerType].exchange(
+              0, std::memory_order_relaxed);
+    }
+  }
+  if (stats_ && tickerType < TICKER_ENUM_MAX) {
+    stats_->setTickerCount(tickerType, 0);
+  }
+  return sum;
+}
+
+void StatisticsImpl::recordTick(uint32_t tickerType, uint64_t count) {
+  if (get_stats_level() <= StatsLevel::kExceptTickers) {
+    return;
+  }
+  if (tickerType < TICKER_ENUM_MAX) {
+    per_core_stats_.Access()->tickers_[tickerType].fetch_add(
+        count, std::memory_order_relaxed);
+    if (stats_) {
+      stats_->recordTick(tickerType, count);
+    }
+  } else {
+    assert(false);
+  }
+}
+
+void StatisticsImpl::recordInHistogram(uint32_t histogramType, uint64_t value) {
+  assert(histogramType < HISTOGRAM_ENUM_MAX);
+  if (get_stats_level() <= StatsLevel::kExceptHistogramOrTimers) {
+    return;
+  }
+  per_core_stats_.Access()->histograms_[histogramType].Add(value);
+  if (stats_ && histogramType < HISTOGRAM_ENUM_MAX) {
+    stats_->recordInHistogram(histogramType, value);
+  }
+}
+
+Status StatisticsImpl::Reset() {
+  MutexLock lock(&aggregate_lock_);
+  for (uint32_t i = 0; i < TICKER_ENUM_MAX; ++i) {
+    setTickerCountLocked(i, 0);
+  }
+  for (uint32_t i = 0; i < HISTOGRAM_ENUM_MAX; ++i) {
+    for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) {
+      per_core_stats_.AccessAtCore(core_idx)->histograms_[i].Clear();
+    }
+  }
+  return Status::OK();
+}
+
+namespace {
+
+// a buffer size used for temp string buffers
+const int kTmpStrBufferSize = 200;
+
+}  // namespace
+
+std::string StatisticsImpl::ToString() const {
+  MutexLock lock(&aggregate_lock_);
+  std::string res;
+  res.reserve(20000);
+  for (const auto& t : TickersNameMap) {
+    assert(t.first < TICKER_ENUM_MAX);
+    char buffer[kTmpStrBufferSize];
+    snprintf(buffer, kTmpStrBufferSize, "%s COUNT : %" PRIu64 "\n",
+             t.second.c_str(), getTickerCountLocked(t.first));
+    res.append(buffer);
+  }
+  for (const auto& h : HistogramsNameMap) {
+    assert(h.first < HISTOGRAM_ENUM_MAX);
+    char buffer[kTmpStrBufferSize];
+    HistogramData hData;
+    getHistogramImplLocked(h.first)->Data(&hData);
+    // don't handle failures - buffer should always be big enough and arguments
+    // should be provided correctly
+    int ret =
+        snprintf(buffer, kTmpStrBufferSize,
+                 "%s P50 : %f P95 : %f P99 : %f P100 : %f COUNT : %" PRIu64
+                 " SUM : %" PRIu64 "\n",
+                 h.second.c_str(), hData.median, hData.percentile95,
+                 hData.percentile99, hData.max, hData.count, hData.sum);
+    if (ret < 0 || ret >= kTmpStrBufferSize) {
+      assert(false);
+      continue;
+    }
+    res.append(buffer);
+  }
+  res.shrink_to_fit();
+  return res;
+}
+
+bool StatisticsImpl::getTickerMap(
+    std::map<std::string, uint64_t>* stats_map) const {
+  assert(stats_map);
+  if (!stats_map) return false;
+  stats_map->clear();
+  MutexLock lock(&aggregate_lock_);
+  for (const auto& t : TickersNameMap) {
+    assert(t.first < TICKER_ENUM_MAX);
+    (*stats_map)[t.second.c_str()] = getTickerCountLocked(t.first);
+  }
+  return true;
+}
+
+bool StatisticsImpl::HistEnabledForType(uint32_t type) const {
+  return type < HISTOGRAM_ENUM_MAX;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/statistics_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/statistics_impl.h
new file mode 100644
index 0000000000..e0dc29d284
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/statistics_impl.h
@@ -0,0 +1,144 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+#include <atomic>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "monitoring/histogram.h"
+#include "port/likely.h"
+#include "port/port.h"
+#include "rocksdb/statistics.h"
+#include "util/core_local.h"
+#include "util/mutexlock.h"
+
+#ifdef __clang__
+#define ROCKSDB_FIELD_UNUSED __attribute__((__unused__))
+#else
+#define ROCKSDB_FIELD_UNUSED
+#endif  // __clang__
+
+#ifndef STRINGIFY
+#define STRINGIFY(x) #x
+#define TOSTRING(x) STRINGIFY(x)
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+enum TickersInternal : uint32_t {
+  INTERNAL_TICKER_ENUM_START = TICKER_ENUM_MAX,
+  INTERNAL_TICKER_ENUM_MAX
+};
+
+enum HistogramsInternal : uint32_t {
+  INTERNAL_HISTOGRAM_START = HISTOGRAM_ENUM_MAX,
+  INTERNAL_HISTOGRAM_ENUM_MAX
+};
+
+class StatisticsImpl : public Statistics {
+ public:
+  StatisticsImpl(std::shared_ptr<Statistics> stats);
+  virtual ~StatisticsImpl();
+  const char* Name() const override { return kClassName(); }
+  static const char* kClassName() { return "BasicStatistics"; }
+
+  virtual uint64_t getTickerCount(uint32_t ticker_type) const override;
+  virtual void histogramData(uint32_t histogram_type,
+                             HistogramData* const data) const override;
+  std::string getHistogramString(uint32_t histogram_type) const override;
+
+  virtual void setTickerCount(uint32_t ticker_type, uint64_t count) override;
+  virtual uint64_t getAndResetTickerCount(uint32_t ticker_type) override;
+  virtual void recordTick(uint32_t ticker_type, uint64_t count) override;
+  // The function is implemented for now for backward compatibility reason.
+  // In case a user explictly calls it, for example, they may have a wrapped
+  // Statistics object, passing the call to recordTick() into here, nothing
+  // will break.
+  void measureTime(uint32_t histogramType, uint64_t time) override {
+    recordInHistogram(histogramType, time);
+  }
+  virtual void recordInHistogram(uint32_t histogram_type,
+                                 uint64_t value) override;
+
+  virtual Status Reset() override;
+  virtual std::string ToString() const override;
+  virtual bool getTickerMap(std::map<std::string, uint64_t>*) const override;
+  virtual bool HistEnabledForType(uint32_t type) const override;
+
+  const Customizable* Inner() const override { return stats_.get(); }
+
+ private:
+  // If non-nullptr, forwards updates to the object pointed to by `stats_`.
+  std::shared_ptr<Statistics> stats_;
+  // Synchronizes anything that operates across other cores' local data,
+  // such that operations like Reset() can be performed atomically.
+  mutable port::Mutex aggregate_lock_;
+
+  // The ticker/histogram data are stored in this structure, which we will store
+  // per-core. It is cache-aligned, so tickers/histograms belonging to different
+  // cores can never share the same cache line.
+  //
+  // Alignment attributes expand to nothing depending on the platform
+  struct ALIGN_AS(CACHE_LINE_SIZE) StatisticsData {
+    std::atomic_uint_fast64_t tickers_[INTERNAL_TICKER_ENUM_MAX] = {{0}};
+    HistogramImpl histograms_[INTERNAL_HISTOGRAM_ENUM_MAX];
+#ifndef HAVE_ALIGNED_NEW
+    char
+        padding[(CACHE_LINE_SIZE -
+                 (INTERNAL_TICKER_ENUM_MAX * sizeof(std::atomic_uint_fast64_t) +
+                  INTERNAL_HISTOGRAM_ENUM_MAX * sizeof(HistogramImpl)) %
+                     CACHE_LINE_SIZE)] ROCKSDB_FIELD_UNUSED;
+#endif
+    void* operator new(size_t s) { return port::cacheline_aligned_alloc(s); }
+    void* operator new[](size_t s) { return port::cacheline_aligned_alloc(s); }
+    void operator delete(void* p) { port::cacheline_aligned_free(p); }
+    void operator delete[](void* p) { port::cacheline_aligned_free(p); }
+  };
+
+#ifndef TEST_CACHE_LINE_SIZE
+  static_assert(sizeof(StatisticsData) % CACHE_LINE_SIZE == 0,
+                "Expected " TOSTRING(CACHE_LINE_SIZE) "-byte aligned");
+#endif
+
+  CoreLocalArray<StatisticsData> per_core_stats_;
+
+  uint64_t getTickerCountLocked(uint32_t ticker_type) const;
+  std::unique_ptr<HistogramImpl> getHistogramImplLocked(
+      uint32_t histogram_type) const;
+  void setTickerCountLocked(uint32_t ticker_type, uint64_t count);
+};
+
+// Utility functions
+inline void RecordInHistogram(Statistics* statistics, uint32_t histogram_type,
+                              uint64_t value) {
+  if (statistics) {
+    statistics->recordInHistogram(histogram_type, value);
+  }
+}
+
+inline void RecordTimeToHistogram(Statistics* statistics,
+                                  uint32_t histogram_type, uint64_t value) {
+  if (statistics) {
+    statistics->reportTimeToHistogram(histogram_type, value);
+  }
+}
+
+inline void RecordTick(Statistics* statistics, uint32_t ticker_type,
+                       uint64_t count = 1) {
+  if (statistics) {
+    statistics->recordTick(ticker_type, count);
+  }
+}
+
+inline void SetTickerCount(Statistics* statistics, uint32_t ticker_type,
+                           uint64_t count) {
+  if (statistics) {
+    statistics->setTickerCount(ticker_type, count);
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_impl.cc
new file mode 100644
index 0000000000..9619dfd81e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_impl.cc
@@ -0,0 +1,163 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include <sstream>
+
+#include "rocksdb/env.h"
+#include "rocksdb/thread_status.h"
+#include "util/string_util.h"
+#include "util/thread_operation.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+#ifdef ROCKSDB_USING_THREAD_STATUS
+std::string ThreadStatus::GetThreadTypeName(
+    ThreadStatus::ThreadType thread_type) {
+  switch (thread_type) {
+    case ThreadStatus::ThreadType::HIGH_PRIORITY:
+      return "High Pri";
+    case ThreadStatus::ThreadType::LOW_PRIORITY:
+      return "Low Pri";
+    case ThreadStatus::ThreadType::USER:
+      return "User";
+    case ThreadStatus::ThreadType::BOTTOM_PRIORITY:
+      return "Bottom Pri";
+    case ThreadStatus::ThreadType::NUM_THREAD_TYPES:
+      assert(false);
+  }
+  return "Unknown";
+}
+
+const std::string& ThreadStatus::GetOperationName(
+    ThreadStatus::OperationType op_type) {
+  if (op_type < 0 || op_type >= NUM_OP_TYPES) {
+    return global_operation_table[OP_UNKNOWN].name;
+  }
+  return global_operation_table[op_type].name;
+}
+
+const std::string& ThreadStatus::GetOperationStageName(
+    ThreadStatus::OperationStage stage) {
+  if (stage < 0 || stage >= NUM_OP_STAGES) {
+    return global_op_stage_table[STAGE_UNKNOWN].name;
+  }
+  return global_op_stage_table[stage].name;
+}
+
+const std::string& ThreadStatus::GetStateName(
+    ThreadStatus::StateType state_type) {
+  if (state_type < 0 || state_type >= NUM_STATE_TYPES) {
+    return global_state_table[STATE_UNKNOWN].name;
+  }
+  return global_state_table[state_type].name;
+}
+
+const std::string ThreadStatus::MicrosToString(uint64_t micros) {
+  if (micros == 0) {
+    return "";
+  }
+  const int kBufferLen = 100;
+  char buffer[kBufferLen];
+  AppendHumanMicros(micros, buffer, kBufferLen, false);
+  return std::string(buffer);
+}
+
+const std::string& ThreadStatus::GetOperationPropertyName(
+    ThreadStatus::OperationType op_type, int i) {
+  static const std::string empty_str = "";
+  switch (op_type) {
+    case ThreadStatus::OP_COMPACTION:
+      if (i >= NUM_COMPACTION_PROPERTIES) {
+        return empty_str;
+      }
+      return compaction_operation_properties[i].name;
+    case ThreadStatus::OP_FLUSH:
+      if (i >= NUM_FLUSH_PROPERTIES) {
+        return empty_str;
+      }
+      return flush_operation_properties[i].name;
+    default:
+      return empty_str;
+  }
+}
+
+std::map<std::string, uint64_t> ThreadStatus::InterpretOperationProperties(
+    ThreadStatus::OperationType op_type, const uint64_t* op_properties) {
+  int num_properties;
+  switch (op_type) {
+    case OP_COMPACTION:
+      num_properties = NUM_COMPACTION_PROPERTIES;
+      break;
+    case OP_FLUSH:
+      num_properties = NUM_FLUSH_PROPERTIES;
+      break;
+    default:
+      num_properties = 0;
+  }
+
+  std::map<std::string, uint64_t> property_map;
+  for (int i = 0; i < num_properties; ++i) {
+    if (op_type == OP_COMPACTION && i == COMPACTION_INPUT_OUTPUT_LEVEL) {
+      property_map.insert({"BaseInputLevel", op_properties[i] >> 32});
+      property_map.insert(
+          {"OutputLevel", op_properties[i] % (uint64_t(1) << 32U)});
+    } else if (op_type == OP_COMPACTION && i == COMPACTION_PROP_FLAGS) {
+      property_map.insert({"IsManual", ((op_properties[i] & 2) >> 1)});
+      property_map.insert({"IsDeletion", ((op_properties[i] & 4) >> 2)});
+      property_map.insert({"IsTrivialMove", ((op_properties[i] & 8) >> 3)});
+    } else {
+      property_map.insert(
+          {GetOperationPropertyName(op_type, i), op_properties[i]});
+    }
+  }
+  return property_map;
+}
+
+#else
+
+std::string ThreadStatus::GetThreadTypeName(
+    ThreadStatus::ThreadType /*thread_type*/) {
+  static std::string dummy_str = "";
+  return dummy_str;
+}
+
+const std::string& ThreadStatus::GetOperationName(
+    ThreadStatus::OperationType /*op_type*/) {
+  static std::string dummy_str = "";
+  return dummy_str;
+}
+
+const std::string& ThreadStatus::GetOperationStageName(
+    ThreadStatus::OperationStage /*stage*/) {
+  static std::string dummy_str = "";
+  return dummy_str;
+}
+
+const std::string& ThreadStatus::GetStateName(
+    ThreadStatus::StateType /*state_type*/) {
+  static std::string dummy_str = "";
+  return dummy_str;
+}
+
+const std::string ThreadStatus::MicrosToString(uint64_t /*op_elapsed_time*/) {
+  static std::string dummy_str = "";
+  return dummy_str;
+}
+
+const std::string& ThreadStatus::GetOperationPropertyName(
+    ThreadStatus::OperationType /*op_type*/, int /*i*/) {
+  static std::string dummy_str = "";
+  return dummy_str;
+}
+
+std::map<std::string, uint64_t> ThreadStatus::InterpretOperationProperties(
+    ThreadStatus::OperationType /*op_type*/,
+    const uint64_t* /*op_properties*/) {
+  return std::map<std::string, uint64_t>();
+}
+
+#endif  // ROCKSDB_USING_THREAD_STATUS
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_updater.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_updater.cc
new file mode 100644
index 0000000000..37fcef62b0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_updater.cc
@@ -0,0 +1,328 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "monitoring/thread_status_updater.h"
+
+#include <memory>
+
+#include "port/likely.h"
+#include "rocksdb/env.h"
+#include "rocksdb/system_clock.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+#ifdef ROCKSDB_USING_THREAD_STATUS
+
+thread_local ThreadStatusData* ThreadStatusUpdater::thread_status_data_ =
+    nullptr;
+
+void ThreadStatusUpdater::RegisterThread(ThreadStatus::ThreadType ttype,
+                                         uint64_t thread_id) {
+  if (UNLIKELY(thread_status_data_ == nullptr)) {
+    thread_status_data_ = new ThreadStatusData();
+    thread_status_data_->thread_type = ttype;
+    thread_status_data_->thread_id = thread_id;
+    std::lock_guard<std::mutex> lck(thread_list_mutex_);
+    thread_data_set_.insert(thread_status_data_);
+  }
+
+  ClearThreadOperationProperties();
+}
+
+void ThreadStatusUpdater::UnregisterThread() {
+  if (thread_status_data_ != nullptr) {
+    std::lock_guard<std::mutex> lck(thread_list_mutex_);
+    thread_data_set_.erase(thread_status_data_);
+    delete thread_status_data_;
+    thread_status_data_ = nullptr;
+  }
+}
+
+void ThreadStatusUpdater::ResetThreadStatus() {
+  ClearThreadState();
+  ClearThreadOperation();
+  SetColumnFamilyInfoKey(nullptr);
+}
+
+void ThreadStatusUpdater::SetEnableTracking(bool enable_tracking) {
+  auto* data = Get();
+  if (data == nullptr) {
+    return;
+  }
+  data->enable_tracking.store(enable_tracking, std::memory_order_relaxed);
+}
+
+void ThreadStatusUpdater::SetColumnFamilyInfoKey(const void* cf_key) {
+  auto* data = Get();
+  if (data == nullptr) {
+    return;
+  }
+  data->cf_key.store(const_cast<void*>(cf_key), std::memory_order_relaxed);
+}
+
+const void* ThreadStatusUpdater::GetColumnFamilyInfoKey() {
+  auto* data = GetLocalThreadStatus();
+  if (data == nullptr) {
+    return nullptr;
+  }
+  return data->cf_key.load(std::memory_order_relaxed);
+}
+
+void ThreadStatusUpdater::SetThreadOperation(
+    const ThreadStatus::OperationType type) {
+  auto* data = GetLocalThreadStatus();
+  if (data == nullptr) {
+    return;
+  }
+  // NOTE: Our practice here is to set all the thread operation properties
+  //       and stage before we set thread operation, and thread operation
+  //       will be set in std::memory_order_release.  This is to ensure
+  //       whenever a thread operation is not OP_UNKNOWN, we will always
+  //       have a consistent information on its properties.
+  data->operation_type.store(type, std::memory_order_release);
+  if (type == ThreadStatus::OP_UNKNOWN) {
+    data->operation_stage.store(ThreadStatus::STAGE_UNKNOWN,
+                                std::memory_order_relaxed);
+    ClearThreadOperationProperties();
+  }
+}
+
+ThreadStatus::OperationType ThreadStatusUpdater::GetThreadOperation() {
+  ThreadStatusData* data = GetLocalThreadStatus();
+  if (data == nullptr) {
+    return ThreadStatus::OperationType::OP_UNKNOWN;
+  }
+  return data->operation_type.load(std::memory_order_relaxed);
+}
+
+void ThreadStatusUpdater::SetThreadOperationProperty(int i, uint64_t value) {
+  auto* data = GetLocalThreadStatus();
+  if (data == nullptr) {
+    return;
+  }
+  data->op_properties[i].store(value, std::memory_order_relaxed);
+}
+
+void ThreadStatusUpdater::IncreaseThreadOperationProperty(int i,
+                                                          uint64_t delta) {
+  auto* data = GetLocalThreadStatus();
+  if (data == nullptr) {
+    return;
+  }
+  data->op_properties[i].fetch_add(delta, std::memory_order_relaxed);
+}
+
+void ThreadStatusUpdater::SetOperationStartTime(const uint64_t start_time) {
+  auto* data = GetLocalThreadStatus();
+  if (data == nullptr) {
+    return;
+  }
+  data->op_start_time.store(start_time, std::memory_order_relaxed);
+}
+
+void ThreadStatusUpdater::ClearThreadOperation() {
+  auto* data = GetLocalThreadStatus();
+  if (data == nullptr) {
+    return;
+  }
+  data->operation_stage.store(ThreadStatus::STAGE_UNKNOWN,
+                              std::memory_order_relaxed);
+  data->operation_type.store(ThreadStatus::OP_UNKNOWN,
+                             std::memory_order_relaxed);
+  ClearThreadOperationProperties();
+}
+
+void ThreadStatusUpdater::ClearThreadOperationProperties() {
+  auto* data = GetLocalThreadStatus();
+  if (data == nullptr) {
+    return;
+  }
+  for (int i = 0; i < ThreadStatus::kNumOperationProperties; ++i) {
+    data->op_properties[i].store(0, std::memory_order_relaxed);
+  }
+}
+
+ThreadStatus::OperationStage ThreadStatusUpdater::SetThreadOperationStage(
+    ThreadStatus::OperationStage stage) {
+  auto* data = GetLocalThreadStatus();
+  if (data == nullptr) {
+    return ThreadStatus::STAGE_UNKNOWN;
+  }
+  return data->operation_stage.exchange(stage, std::memory_order_relaxed);
+}
+
+void ThreadStatusUpdater::SetThreadState(const ThreadStatus::StateType type) {
+  auto* data = GetLocalThreadStatus();
+  if (data == nullptr) {
+    return;
+  }
+  data->state_type.store(type, std::memory_order_relaxed);
+}
+
+void ThreadStatusUpdater::ClearThreadState() {
+  auto* data = GetLocalThreadStatus();
+  if (data == nullptr) {
+    return;
+  }
+  data->state_type.store(ThreadStatus::STATE_UNKNOWN,
+                         std::memory_order_relaxed);
+}
+
+Status ThreadStatusUpdater::GetThreadList(
+    std::vector<ThreadStatus>* thread_list) {
+  thread_list->clear();
+  std::vector<std::shared_ptr<ThreadStatusData>> valid_list;
+  uint64_t now_micros = SystemClock::Default()->NowMicros();
+
+  std::lock_guard<std::mutex> lck(thread_list_mutex_);
+  for (auto* thread_data : thread_data_set_) {
+    assert(thread_data);
+    auto thread_id = thread_data->thread_id.load(std::memory_order_relaxed);
+    auto thread_type = thread_data->thread_type.load(std::memory_order_relaxed);
+    // Since any change to cf_info_map requires thread_list_mutex,
+    // which is currently held by GetThreadList(), here we can safely
+    // use "memory_order_relaxed" to load the cf_key.
+    auto cf_key = thread_data->cf_key.load(std::memory_order_relaxed);
+
+    ThreadStatus::OperationType op_type = ThreadStatus::OP_UNKNOWN;
+    ThreadStatus::OperationStage op_stage = ThreadStatus::STAGE_UNKNOWN;
+    ThreadStatus::StateType state_type = ThreadStatus::STATE_UNKNOWN;
+    uint64_t op_elapsed_micros = 0;
+    uint64_t op_props[ThreadStatus::kNumOperationProperties] = {0};
+
+    auto iter = cf_info_map_.find(cf_key);
+    if (iter != cf_info_map_.end()) {
+      op_type = thread_data->operation_type.load(std::memory_order_acquire);
+      // display lower-level info only when higher-level info is available.
+      if (op_type != ThreadStatus::OP_UNKNOWN) {
+        op_elapsed_micros = now_micros - thread_data->op_start_time.load(
+                                             std::memory_order_relaxed);
+        op_stage = thread_data->operation_stage.load(std::memory_order_relaxed);
+        state_type = thread_data->state_type.load(std::memory_order_relaxed);
+        for (int i = 0; i < ThreadStatus::kNumOperationProperties; ++i) {
+          op_props[i] =
+              thread_data->op_properties[i].load(std::memory_order_relaxed);
+        }
+      }
+    }
+
+    thread_list->emplace_back(
+        thread_id, thread_type,
+        iter != cf_info_map_.end() ? iter->second.db_name : "",
+        iter != cf_info_map_.end() ? iter->second.cf_name : "", op_type,
+        op_elapsed_micros, op_stage, op_props, state_type);
+  }
+
+  return Status::OK();
+}
+
+ThreadStatusData* ThreadStatusUpdater::GetLocalThreadStatus() {
+  if (thread_status_data_ == nullptr) {
+    return nullptr;
+  }
+  if (!thread_status_data_->enable_tracking.load(std::memory_order_relaxed)) {
+    return nullptr;
+  }
+  return thread_status_data_;
+}
+
+void ThreadStatusUpdater::NewColumnFamilyInfo(const void* db_key,
+                                              const std::string& db_name,
+                                              const void* cf_key,
+                                              const std::string& cf_name) {
+  // Acquiring same lock as GetThreadList() to guarantee
+  // a consistent view of global column family table (cf_info_map).
+  std::lock_guard<std::mutex> lck(thread_list_mutex_);
+
+  cf_info_map_.emplace(std::piecewise_construct, std::make_tuple(cf_key),
+                       std::make_tuple(db_key, db_name, cf_name));
+  db_key_map_[db_key].insert(cf_key);
+}
+
+void ThreadStatusUpdater::EraseColumnFamilyInfo(const void* cf_key) {
+  // Acquiring same lock as GetThreadList() to guarantee
+  // a consistent view of global column family table (cf_info_map).
+  std::lock_guard<std::mutex> lck(thread_list_mutex_);
+
+  auto cf_pair = cf_info_map_.find(cf_key);
+  if (cf_pair != cf_info_map_.end()) {
+    // Remove its entry from db_key_map_ by the following steps:
+    // 1. Obtain the entry in db_key_map_ whose set contains cf_key
+    // 2. Remove it from the set.
+    ConstantColumnFamilyInfo& cf_info = cf_pair->second;
+    auto db_pair = db_key_map_.find(cf_info.db_key);
+    assert(db_pair != db_key_map_.end());
+    size_t result __attribute__((__unused__));
+    result = db_pair->second.erase(cf_key);
+    assert(result);
+    cf_info_map_.erase(cf_pair);
+  }
+}
+
+void ThreadStatusUpdater::EraseDatabaseInfo(const void* db_key) {
+  // Acquiring same lock as GetThreadList() to guarantee
+  // a consistent view of global column family table (cf_info_map).
+  std::lock_guard<std::mutex> lck(thread_list_mutex_);
+  auto db_pair = db_key_map_.find(db_key);
+  if (UNLIKELY(db_pair == db_key_map_.end())) {
+    // In some occasional cases such as DB::Open fails, we won't
+    // register ColumnFamilyInfo for a db.
+    return;
+  }
+
+  for (auto cf_key : db_pair->second) {
+    auto cf_pair = cf_info_map_.find(cf_key);
+    if (cf_pair != cf_info_map_.end()) {
+      cf_info_map_.erase(cf_pair);
+    }
+  }
+  db_key_map_.erase(db_key);
+}
+
+#else
+
+void ThreadStatusUpdater::RegisterThread(ThreadStatus::ThreadType /*ttype*/,
+                                         uint64_t /*thread_id*/) {}
+
+void ThreadStatusUpdater::UnregisterThread() {}
+
+void ThreadStatusUpdater::ResetThreadStatus() {}
+
+void ThreadStatusUpdater::SetColumnFamilyInfoKey(const void* /*cf_key*/) {}
+
+void ThreadStatusUpdater::SetThreadOperation(
+    const ThreadStatus::OperationType /*type*/) {}
+
+void ThreadStatusUpdater::ClearThreadOperation() {}
+
+void ThreadStatusUpdater::SetThreadState(
+    const ThreadStatus::StateType /*type*/) {}
+
+void ThreadStatusUpdater::ClearThreadState() {}
+
+Status ThreadStatusUpdater::GetThreadList(
+    std::vector<ThreadStatus>* /*thread_list*/) {
+  return Status::NotSupported(
+      "GetThreadList is not supported in the current running environment.");
+}
+
+void ThreadStatusUpdater::NewColumnFamilyInfo(const void* /*db_key*/,
+                                              const std::string& /*db_name*/,
+                                              const void* /*cf_key*/,
+                                              const std::string& /*cf_name*/) {}
+
+void ThreadStatusUpdater::EraseColumnFamilyInfo(const void* /*cf_key*/) {}
+
+void ThreadStatusUpdater::EraseDatabaseInfo(const void* /*db_key*/) {}
+
+void ThreadStatusUpdater::SetThreadOperationProperty(int /*i*/,
+                                                     uint64_t /*value*/) {}
+
+void ThreadStatusUpdater::IncreaseThreadOperationProperty(int /*i*/,
+                                                          uint64_t /*delta*/) {}
+
+#endif  // ROCKSDB_USING_THREAD_STATUS
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_updater.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_updater.h
new file mode 100644
index 0000000000..696063cb46
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_updater.h
@@ -0,0 +1,226 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// The implementation of ThreadStatus.
+//
+// Note that we make get and set access to ThreadStatusData lockless.
+// As a result, ThreadStatusData as a whole is not atomic.  However,
+// we guarantee consistent ThreadStatusData all the time whenever
+// user call GetThreadList().  This consistency guarantee is done
+// by having the following constraint in the internal implementation
+// of set and get order:
+//
+// 1. When reset any information in ThreadStatusData, always start from
+//    clearing up the lower-level information first.
+// 2. When setting any information in ThreadStatusData, always start from
+//    setting the higher-level information.
+// 3. When returning ThreadStatusData to the user, fields are fetched from
+//    higher-level to lower-level.  In addition, where there's a nullptr
+//    in one field, then all fields that has lower-level than that field
+//    should be ignored.
+//
+// The high to low level information would be:
+// thread_id > thread_type > db > cf > operation > state
+//
+// This means user might not always get full information, but whenever
+// returned by the GetThreadList() is guaranteed to be consistent.
+#pragma once
+#include <atomic>
+#include <list>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "port/port.h"
+#include "rocksdb/status.h"
+#include "rocksdb/thread_status.h"
+#include "util/thread_operation.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class ColumnFamilyHandle;
+
+// The structure that keeps constant information about a column family.
+struct ConstantColumnFamilyInfo {
+#ifdef ROCKSDB_USING_THREAD_STATUS
+ public:
+  ConstantColumnFamilyInfo(const void* _db_key, const std::string& _db_name,
+                           const std::string& _cf_name)
+      : db_key(_db_key), db_name(_db_name), cf_name(_cf_name) {}
+  const void* db_key;
+  const std::string db_name;
+  const std::string cf_name;
+#endif  // ROCKSDB_USING_THREAD_STATUS
+};
+
+// the internal data-structure that is used to reflect the current
+// status of a thread using a set of atomic pointers.
+struct ThreadStatusData {
+#ifdef ROCKSDB_USING_THREAD_STATUS
+  explicit ThreadStatusData() {
+    enable_tracking.store(false);
+    thread_id.store(0);
+    thread_type.store(ThreadStatus::USER);
+    cf_key.store(nullptr);
+    operation_type.store(ThreadStatus::OP_UNKNOWN);
+    op_start_time.store(0);
+    state_type.store(ThreadStatus::STATE_UNKNOWN);
+  }
+
+  // A flag to indicate whether the thread tracking is enabled
+  // in the current thread.
+  // If set to false, then SetThreadOperation and SetThreadState
+  // will be no-op.
+  std::atomic<bool> enable_tracking;
+
+  std::atomic<uint64_t> thread_id;
+  std::atomic<ThreadStatus::ThreadType> thread_type;
+  std::atomic<void*> cf_key;
+  std::atomic<ThreadStatus::OperationType> operation_type;
+  std::atomic<uint64_t> op_start_time;
+  std::atomic<ThreadStatus::OperationStage> operation_stage;
+  std::atomic<uint64_t> op_properties[ThreadStatus::kNumOperationProperties];
+  std::atomic<ThreadStatus::StateType> state_type;
+#endif  // ROCKSDB_USING_THREAD_STATUS
+};
+
+// The class that stores and updates the status of the current thread
+// using a thread-local ThreadStatusData.
+//
+// In most of the case, you should use ThreadStatusUtil to update
+// the status of the current thread instead of using ThreadSatusUpdater
+// directly.
+//
+// @see ThreadStatusUtil
+class ThreadStatusUpdater {
+ public:
+  ThreadStatusUpdater() {}
+
+  // Releases all ThreadStatusData of all active threads.
+  virtual ~ThreadStatusUpdater() {}
+
+  // Unregister the current thread.
+  void UnregisterThread();
+
+  // Reset the status of the current thread.  This includes resetting
+  // ColumnFamilyInfoKey, ThreadOperation, and ThreadState.
+  void ResetThreadStatus();
+
+  // Set the id of the current thread.
+  void SetThreadID(uint64_t thread_id);
+
+  // Register the current thread for tracking.
+  void RegisterThread(ThreadStatus::ThreadType ttype, uint64_t thread_id);
+
+  void SetEnableTracking(bool enable_tracking);
+
+  // Update the column-family info of the current thread by setting
+  // its thread-local pointer of ThreadStatusData to the correct entry.
+  void SetColumnFamilyInfoKey(const void* cf_key);
+
+  // returns the column family info key.
+  const void* GetColumnFamilyInfoKey();
+
+  // Update the thread operation of the current thread.
+  void SetThreadOperation(const ThreadStatus::OperationType type);
+
+  // Return the thread operation of the current thread.
+  ThreadStatus::OperationType GetThreadOperation();
+
+  // The start time of the current thread operation.  It is in the format
+  // of micro-seconds since some fixed point in time.
+  void SetOperationStartTime(const uint64_t start_time);
+
+  // Set the "i"th property of the current operation.
+  //
+  // NOTE: Our practice here is to set all the thread operation properties
+  //       and stage before we set thread operation, and thread operation
+  //       will be set in std::memory_order_release.  This is to ensure
+  //       whenever a thread operation is not OP_UNKNOWN, we will always
+  //       have a consistent information on its properties.
+  void SetThreadOperationProperty(int i, uint64_t value);
+
+  // Increase the "i"th property of the current operation with
+  // the specified delta.
+  void IncreaseThreadOperationProperty(int i, uint64_t delta);
+
+  // Update the thread operation stage of the current thread.
+  ThreadStatus::OperationStage SetThreadOperationStage(
+      const ThreadStatus::OperationStage stage);
+
+  // Clear thread operation of the current thread.
+  void ClearThreadOperation();
+
+  // Reset all thread-operation-properties to 0.
+  void ClearThreadOperationProperties();
+
+  // Update the thread state of the current thread.
+  void SetThreadState(const ThreadStatus::StateType type);
+
+  // Clear the thread state of the current thread.
+  void ClearThreadState();
+
+  // Obtain the status of all active registered threads.
+  Status GetThreadList(std::vector<ThreadStatus>* thread_list);
+
+  // Create an entry in the global ColumnFamilyInfo table for the
+  // specified column family.  This function should be called only
+  // when the current thread does not hold db_mutex.
+  void NewColumnFamilyInfo(const void* db_key, const std::string& db_name,
+                           const void* cf_key, const std::string& cf_name);
+
+  // Erase all ConstantColumnFamilyInfo that is associated with the
+  // specified db instance.  This function should be called only when
+  // the current thread does not hold db_mutex.
+  void EraseDatabaseInfo(const void* db_key);
+
+  // Erase the ConstantColumnFamilyInfo that is associated with the
+  // specified ColumnFamilyData.  This function should be called only
+  // when the current thread does not hold db_mutex.
+  void EraseColumnFamilyInfo(const void* cf_key);
+
+  // Verifies whether the input ColumnFamilyHandles matches
+  // the information stored in the current cf_info_map.
+  void TEST_VerifyColumnFamilyInfoMap(
+      const std::vector<ColumnFamilyHandle*>& handles, bool check_exist);
+
+ protected:
+#ifdef ROCKSDB_USING_THREAD_STATUS
+  // The thread-local variable for storing thread status.
+  static thread_local ThreadStatusData* thread_status_data_;
+
+  // Returns the pointer to the thread status data only when the
+  // thread status data is non-null and has enable_tracking == true.
+  ThreadStatusData* GetLocalThreadStatus();
+
+  // Directly returns the pointer to thread_status_data_ without
+  // checking whether enabling_tracking is true of not.
+  ThreadStatusData* Get() { return thread_status_data_; }
+
+  // The mutex that protects cf_info_map and db_key_map.
+  std::mutex thread_list_mutex_;
+
+  // The current status data of all active threads.
+  std::unordered_set<ThreadStatusData*> thread_data_set_;
+
+  // A global map that keeps the column family information.  It is stored
+  // globally instead of inside DB is to avoid the situation where DB is
+  // closing while GetThreadList function already get the pointer to its
+  // CopnstantColumnFamilyInfo.
+  std::unordered_map<const void*, ConstantColumnFamilyInfo> cf_info_map_;
+
+  // A db_key to cf_key map that allows erasing elements in cf_info_map
+  // associated to the same db_key faster.
+  std::unordered_map<const void*, std::unordered_set<const void*>> db_key_map_;
+
+#else
+  static ThreadStatusData* thread_status_data_;
+#endif  // ROCKSDB_USING_THREAD_STATUS
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_updater_debug.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_updater_debug.cc
new file mode 100644
index 0000000000..464c23bbaa
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_updater_debug.cc
@@ -0,0 +1,43 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include <mutex>
+
+#include "db/column_family.h"
+#include "monitoring/thread_status_updater.h"
+#include "util/cast_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+#ifndef NDEBUG
+#ifdef ROCKSDB_USING_THREAD_STATUS
+void ThreadStatusUpdater::TEST_VerifyColumnFamilyInfoMap(
+    const std::vector<ColumnFamilyHandle*>& handles, bool check_exist) {
+  std::unique_lock<std::mutex> lock(thread_list_mutex_);
+  if (check_exist) {
+    assert(cf_info_map_.size() == handles.size());
+  }
+  for (auto* handle : handles) {
+    auto* cfd = static_cast_with_check<ColumnFamilyHandleImpl>(handle)->cfd();
+    auto iter __attribute__((__unused__)) = cf_info_map_.find(cfd);
+    if (check_exist) {
+      assert(iter != cf_info_map_.end());
+      assert(iter->second.cf_name == cfd->GetName());
+    } else {
+      assert(iter == cf_info_map_.end());
+    }
+  }
+}
+
+#else
+
+void ThreadStatusUpdater::TEST_VerifyColumnFamilyInfoMap(
+    const std::vector<ColumnFamilyHandle*>& /*handles*/, bool /*check_exist*/) {
+}
+
+#endif  // ROCKSDB_USING_THREAD_STATUS
+#endif  // !NDEBUG
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_util.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_util.cc
new file mode 100644
index 0000000000..9b66dc28e8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_util.cc
@@ -0,0 +1,208 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "monitoring/thread_status_util.h"
+
+#include "monitoring/thread_status_updater.h"
+#include "rocksdb/env.h"
+#include "rocksdb/system_clock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+#ifdef ROCKSDB_USING_THREAD_STATUS
+thread_local ThreadStatusUpdater*
+    ThreadStatusUtil::thread_updater_local_cache_ = nullptr;
+thread_local bool ThreadStatusUtil::thread_updater_initialized_ = false;
+
+void ThreadStatusUtil::RegisterThread(const Env* env,
+                                      ThreadStatus::ThreadType thread_type) {
+  if (!MaybeInitThreadLocalUpdater(env)) {
+    return;
+  }
+  assert(thread_updater_local_cache_);
+  thread_updater_local_cache_->RegisterThread(thread_type, env->GetThreadID());
+}
+
+void ThreadStatusUtil::UnregisterThread() {
+  thread_updater_initialized_ = false;
+  if (thread_updater_local_cache_ != nullptr) {
+    thread_updater_local_cache_->UnregisterThread();
+    thread_updater_local_cache_ = nullptr;
+  }
+}
+
+void ThreadStatusUtil::SetEnableTracking(bool enable_tracking) {
+  if (thread_updater_local_cache_ == nullptr) {
+    return;
+  }
+  thread_updater_local_cache_->SetEnableTracking(enable_tracking);
+}
+
+void ThreadStatusUtil::SetColumnFamily(const ColumnFamilyData* cfd) {
+  if (thread_updater_local_cache_ == nullptr) {
+    return;
+  }
+  assert(cfd);
+  thread_updater_local_cache_->SetColumnFamilyInfoKey(cfd);
+}
+
+void ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType op) {
+  if (thread_updater_local_cache_ == nullptr) {
+    return;
+  }
+
+  if (op != ThreadStatus::OP_UNKNOWN) {
+    uint64_t current_time = SystemClock::Default()->NowMicros();
+    thread_updater_local_cache_->SetOperationStartTime(current_time);
+  } else {
+    // TDOO(yhchiang): we could report the time when we set operation to
+    // OP_UNKNOWN once the whole instrumentation has been done.
+    thread_updater_local_cache_->SetOperationStartTime(0);
+  }
+  thread_updater_local_cache_->SetThreadOperation(op);
+}
+
+ThreadStatus::OperationType ThreadStatusUtil::GetThreadOperation() {
+  if (thread_updater_local_cache_ == nullptr) {
+    return ThreadStatus::OperationType::OP_UNKNOWN;
+  }
+  return thread_updater_local_cache_->GetThreadOperation();
+}
+
+ThreadStatus::OperationStage ThreadStatusUtil::SetThreadOperationStage(
+    ThreadStatus::OperationStage stage) {
+  if (thread_updater_local_cache_ == nullptr) {
+    // thread_updater_local_cache_ must be set in SetColumnFamily
+    // or other ThreadStatusUtil functions.
+    return ThreadStatus::STAGE_UNKNOWN;
+  }
+
+  return thread_updater_local_cache_->SetThreadOperationStage(stage);
+}
+
+void ThreadStatusUtil::SetThreadOperationProperty(int code, uint64_t value) {
+  if (thread_updater_local_cache_ == nullptr) {
+    // thread_updater_local_cache_ must be set in SetColumnFamily
+    // or other ThreadStatusUtil functions.
+    return;
+  }
+
+  thread_updater_local_cache_->SetThreadOperationProperty(code, value);
+}
+
+void ThreadStatusUtil::IncreaseThreadOperationProperty(int code,
+                                                       uint64_t delta) {
+  if (thread_updater_local_cache_ == nullptr) {
+    // thread_updater_local_cache_ must be set in SetColumnFamily
+    // or other ThreadStatusUtil functions.
+    return;
+  }
+
+  thread_updater_local_cache_->IncreaseThreadOperationProperty(code, delta);
+}
+
+void ThreadStatusUtil::SetThreadState(ThreadStatus::StateType state) {
+  if (thread_updater_local_cache_ == nullptr) {
+    // thread_updater_local_cache_ must be set in SetColumnFamily
+    // or other ThreadStatusUtil functions.
+    return;
+  }
+
+  thread_updater_local_cache_->SetThreadState(state);
+}
+
+void ThreadStatusUtil::ResetThreadStatus() {
+  if (thread_updater_local_cache_ == nullptr) {
+    return;
+  }
+  thread_updater_local_cache_->ResetThreadStatus();
+}
+
+void ThreadStatusUtil::NewColumnFamilyInfo(const DB* db,
+                                           const ColumnFamilyData* cfd,
+                                           const std::string& cf_name,
+                                           const Env* env) {
+  if (!MaybeInitThreadLocalUpdater(env)) {
+    return;
+  }
+  assert(thread_updater_local_cache_);
+  if (thread_updater_local_cache_) {
+    thread_updater_local_cache_->NewColumnFamilyInfo(db, db->GetName(), cfd,
+                                                     cf_name);
+  }
+}
+
+void ThreadStatusUtil::EraseColumnFamilyInfo(const ColumnFamilyData* cfd) {
+  if (thread_updater_local_cache_ == nullptr) {
+    return;
+  }
+  thread_updater_local_cache_->EraseColumnFamilyInfo(cfd);
+}
+
+void ThreadStatusUtil::EraseDatabaseInfo(const DB* db) {
+  ThreadStatusUpdater* thread_updater = db->GetEnv()->GetThreadStatusUpdater();
+  if (thread_updater == nullptr) {
+    return;
+  }
+  thread_updater->EraseDatabaseInfo(db);
+}
+
+bool ThreadStatusUtil::MaybeInitThreadLocalUpdater(const Env* env) {
+  if (!thread_updater_initialized_ && env != nullptr) {
+    thread_updater_initialized_ = true;
+    thread_updater_local_cache_ = env->GetThreadStatusUpdater();
+  }
+  return (thread_updater_local_cache_ != nullptr);
+}
+
+AutoThreadOperationStageUpdater::AutoThreadOperationStageUpdater(
+    ThreadStatus::OperationStage stage) {
+  prev_stage_ = ThreadStatusUtil::SetThreadOperationStage(stage);
+}
+
+AutoThreadOperationStageUpdater::~AutoThreadOperationStageUpdater() {
+  ThreadStatusUtil::SetThreadOperationStage(prev_stage_);
+}
+
+#else
+
+ThreadStatusUpdater* ThreadStatusUtil::thread_updater_local_cache_ = nullptr;
+bool ThreadStatusUtil::thread_updater_initialized_ = false;
+
+bool ThreadStatusUtil::MaybeInitThreadLocalUpdater(const Env* /*env*/) {
+  return false;
+}
+
+void ThreadStatusUtil::SetColumnFamily(const ColumnFamilyData* /*cfd*/) {}
+
+void ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType /*op*/) {}
+
+void ThreadStatusUtil::SetThreadOperationProperty(int /*code*/,
+                                                  uint64_t /*value*/) {}
+
+void ThreadStatusUtil::IncreaseThreadOperationProperty(int /*code*/,
+                                                       uint64_t /*delta*/) {}
+
+void ThreadStatusUtil::SetThreadState(ThreadStatus::StateType /*state*/) {}
+
+void ThreadStatusUtil::NewColumnFamilyInfo(const DB* /*db*/,
+                                           const ColumnFamilyData* /*cfd*/,
+                                           const std::string& /*cf_name*/,
+                                           const Env* env) {}
+
+void ThreadStatusUtil::EraseColumnFamilyInfo(const ColumnFamilyData* /*cfd*/) {}
+
+void ThreadStatusUtil::EraseDatabaseInfo(const DB* /*db*/) {}
+
+void ThreadStatusUtil::ResetThreadStatus() {}
+
+AutoThreadOperationStageUpdater::AutoThreadOperationStageUpdater(
+    ThreadStatus::OperationStage /*stage*/) {}
+
+AutoThreadOperationStageUpdater::~AutoThreadOperationStageUpdater() {}
+
+#endif  // ROCKSDB_USING_THREAD_STATUS
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_util.h
new file mode 100644
index 0000000000..df148a0395
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_util.h
@@ -0,0 +1,139 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+
+#include "monitoring/thread_status_updater.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/thread_status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class ColumnFamilyData;
+
+// The static utility class for updating thread-local status.
+//
+// The thread-local status is updated via the thread-local cached
+// pointer thread_updater_local_cache_.  During each function call,
+// when ThreadStatusUtil finds thread_updater_local_cache_ is
+// left uninitialized (determined by thread_updater_initialized_),
+// it will tries to initialize it using the return value of
+// Env::GetThreadStatusUpdater().  When thread_updater_local_cache_
+// is initialized by a non-null pointer, each function call will
+// then update the status of the current thread.  Otherwise,
+// all function calls to ThreadStatusUtil will be no-op.
+class ThreadStatusUtil {
+ public:
+  // Register the current thread for tracking.
+  static void RegisterThread(const Env* env,
+                             ThreadStatus::ThreadType thread_type);
+
+  // Unregister the current thread.
+  static void UnregisterThread();
+
+  // Create an entry in the global ColumnFamilyInfo table for the
+  // specified column family.  This function should be called only
+  // when the current thread does not hold db_mutex.
+  static void NewColumnFamilyInfo(const DB* db, const ColumnFamilyData* cfd,
+                                  const std::string& cf_name, const Env* env);
+
+  // Erase the ConstantColumnFamilyInfo that is associated with the
+  // specified ColumnFamilyData.  This function should be called only
+  // when the current thread does not hold db_mutex.
+  static void EraseColumnFamilyInfo(const ColumnFamilyData* cfd);
+
+  // Erase all ConstantColumnFamilyInfo that is associated with the
+  // specified db instance.  This function should be called only when
+  // the current thread does not hold db_mutex.
+  static void EraseDatabaseInfo(const DB* db);
+
+  static void SetEnableTracking(bool enable_tracking);
+
+  // Update the thread status to indicate the current thread is doing
+  // something related to the specified column family.
+  //
+  // REQUIRES: cfd != nullptr
+  static void SetColumnFamily(const ColumnFamilyData* cfd);
+
+  static void SetThreadOperation(ThreadStatus::OperationType type);
+
+  static ThreadStatus::OperationType GetThreadOperation();
+
+  static ThreadStatus::OperationStage SetThreadOperationStage(
+      ThreadStatus::OperationStage stage);
+
+  static void SetThreadOperationProperty(int code, uint64_t value);
+
+  static void IncreaseThreadOperationProperty(int code, uint64_t delta);
+
+  static void SetThreadState(ThreadStatus::StateType type);
+
+  static void ResetThreadStatus();
+
+#ifndef NDEBUG
+  static void TEST_SetStateDelay(const ThreadStatus::StateType state,
+                                 int micro);
+  static void TEST_StateDelay(const ThreadStatus::StateType state);
+
+  static Env::IOActivity TEST_GetExpectedIOActivity(
+      ThreadStatus::OperationType thread_op);
+#endif
+
+ protected:
+  // Initialize the thread-local ThreadStatusUpdater when it finds
+  // the cached value is nullptr.  Returns true if it has cached
+  // a non-null pointer.
+  static bool MaybeInitThreadLocalUpdater(const Env* env);
+
+#ifdef ROCKSDB_USING_THREAD_STATUS
+  // A boolean flag indicating whether thread_updater_local_cache_
+  // is initialized.  It is set to true when an Env uses any
+  // ThreadStatusUtil functions using the current thread other
+  // than UnregisterThread().  It will be set to false when
+  // UnregisterThread() is called.
+  //
+  // When this variable is set to true, thread_updater_local_cache_
+  // will not be updated until this variable is again set to false
+  // in UnregisterThread().
+  static thread_local bool thread_updater_initialized_;
+
+  // The thread-local cached ThreadStatusUpdater that caches the
+  // thread_status_updater_ of the first Env that uses any ThreadStatusUtil
+  // function other than UnregisterThread().  This variable will
+  // be cleared when UnregisterThread() is called.
+  //
+  // When this variable is set to a non-null pointer, then the status
+  // of the current thread will be updated when a function of
+  // ThreadStatusUtil is called.  Otherwise, all functions of
+  // ThreadStatusUtil will be no-op.
+  //
+  // When thread_updater_initialized_ is set to true, this variable
+  // will not be updated until this thread_updater_initialized_ is
+  // again set to false in UnregisterThread().
+  static thread_local ThreadStatusUpdater* thread_updater_local_cache_;
+#else
+  static bool thread_updater_initialized_;
+  static ThreadStatusUpdater* thread_updater_local_cache_;
+#endif
+};
+
+// A helper class for updating thread state.  It will set the
+// thread state according to the input parameter in its constructor
+// and set the thread state to the previous state in its destructor.
+class AutoThreadOperationStageUpdater {
+ public:
+  explicit AutoThreadOperationStageUpdater(ThreadStatus::OperationStage stage);
+  ~AutoThreadOperationStageUpdater();
+
+#ifdef ROCKSDB_USING_THREAD_STATUS
+ private:
+  ThreadStatus::OperationStage prev_stage_;
+#endif
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_util_debug.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_util_debug.cc
new file mode 100644
index 0000000000..6e4fe8a9f6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/monitoring/thread_status_util_debug.cc
@@ -0,0 +1,46 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include <atomic>
+
+#include "monitoring/thread_status_updater.h"
+#include "monitoring/thread_status_util.h"
+#include "rocksdb/system_clock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+#ifndef NDEBUG
+// the delay for debugging purpose.
+static std::atomic<int> states_delay[ThreadStatus::NUM_STATE_TYPES];
+
+void ThreadStatusUtil::TEST_SetStateDelay(const ThreadStatus::StateType state,
+                                          int micro) {
+  states_delay[state].store(micro, std::memory_order_relaxed);
+}
+
+void ThreadStatusUtil::TEST_StateDelay(const ThreadStatus::StateType state) {
+  auto delay = states_delay[state].load(std::memory_order_relaxed);
+  if (delay > 0) {
+    SystemClock::Default()->SleepForMicroseconds(delay);
+  }
+}
+
+Env::IOActivity ThreadStatusUtil::TEST_GetExpectedIOActivity(
+    ThreadStatus::OperationType thread_op) {
+  switch (thread_op) {
+    case ThreadStatus::OperationType::OP_FLUSH:
+      return Env::IOActivity::kFlush;
+    case ThreadStatus::OperationType::OP_COMPACTION:
+      return Env::IOActivity::kCompaction;
+    case ThreadStatus::OperationType::OP_DBOPEN:
+      return Env::IOActivity::kDBOpen;
+    default:
+      return Env::IOActivity::kUnknown;
+  }
+}
+
+#endif  // !NDEBUG
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/cf_options.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/cf_options.cc
new file mode 100644
index 0000000000..ad1e669df5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/cf_options.cc
@@ -0,0 +1,1196 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "options/cf_options.h"
+
+#include <cassert>
+#include <cinttypes>
+#include <limits>
+#include <string>
+
+#include "logging/logging.h"
+#include "options/configurable_helper.h"
+#include "options/db_options.h"
+#include "options/options_helper.h"
+#include "options/options_parser.h"
+#include "port/port.h"
+#include "rocksdb/advanced_cache.h"
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/concurrent_task_limiter.h"
+#include "rocksdb/configurable.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/cast_util.h"
+
+// NOTE: in this file, many option flags that were deprecated
+// and removed from the rest of the code have to be kept here
+// and marked as kDeprecated in order to be able to read old
+// OPTIONS files.
+
+namespace ROCKSDB_NAMESPACE {
+
+static Status ParseCompressionOptions(const std::string& value,
+                                      const std::string& name,
+                                      CompressionOptions& compression_opts) {
+  const char kDelimiter = ':';
+  std::istringstream field_stream(value);
+  std::string field;
+
+  if (!std::getline(field_stream, field, kDelimiter)) {
+    return Status::InvalidArgument("unable to parse the specified CF option " +
+                                   name);
+  }
+  compression_opts.window_bits = ParseInt(field);
+
+  if (!std::getline(field_stream, field, kDelimiter)) {
+    return Status::InvalidArgument("unable to parse the specified CF option " +
+                                   name);
+  }
+  compression_opts.level = ParseInt(field);
+
+  if (!std::getline(field_stream, field, kDelimiter)) {
+    return Status::InvalidArgument("unable to parse the specified CF option " +
+                                   name);
+  }
+  compression_opts.strategy = ParseInt(field);
+
+  // max_dict_bytes is optional for backwards compatibility
+  if (!field_stream.eof()) {
+    if (!std::getline(field_stream, field, kDelimiter)) {
+      return Status::InvalidArgument(
+          "unable to parse the specified CF option " + name);
+    }
+    compression_opts.max_dict_bytes = ParseInt(field);
+  }
+
+  // zstd_max_train_bytes is optional for backwards compatibility
+  if (!field_stream.eof()) {
+    if (!std::getline(field_stream, field, kDelimiter)) {
+      return Status::InvalidArgument(
+          "unable to parse the specified CF option " + name);
+    }
+    compression_opts.zstd_max_train_bytes = ParseInt(field);
+  }
+
+  // parallel_threads is optional for backwards compatibility
+  if (!field_stream.eof()) {
+    if (!std::getline(field_stream, field, kDelimiter)) {
+      return Status::InvalidArgument(
+          "unable to parse the specified CF option " + name);
+    }
+    // Since parallel_threads comes before enabled but was added optionally
+    // later, we need to check if this is the final token (meaning it is the
+    // enabled bit), or if there are more tokens (meaning this one is
+    // parallel_threads).
+    if (!field_stream.eof()) {
+      compression_opts.parallel_threads = ParseInt(field);
+    } else {
+      // parallel_threads is not serialized with this format, but enabled is
+      compression_opts.enabled = ParseBoolean("", field);
+    }
+  }
+
+  // enabled is optional for backwards compatibility
+  if (!field_stream.eof()) {
+    if (!std::getline(field_stream, field, kDelimiter)) {
+      return Status::InvalidArgument(
+          "unable to parse the specified CF option " + name);
+    }
+    compression_opts.enabled = ParseBoolean("", field);
+  }
+
+  // max_dict_buffer_bytes is optional for backwards compatibility
+  if (!field_stream.eof()) {
+    if (!std::getline(field_stream, field, kDelimiter)) {
+      return Status::InvalidArgument(
+          "unable to parse the specified CF option " + name);
+    }
+    compression_opts.max_dict_buffer_bytes = ParseUint64(field);
+  }
+
+  // use_zstd_dict_trainer is optional for backwards compatibility
+  if (!field_stream.eof()) {
+    if (!std::getline(field_stream, field, kDelimiter)) {
+      return Status::InvalidArgument(
+          "unable to parse the specified CF option " + name);
+    }
+    compression_opts.use_zstd_dict_trainer = ParseBoolean("", field);
+  }
+
+  if (!field_stream.eof()) {
+    return Status::InvalidArgument("unable to parse the specified CF option " +
+                                   name);
+  }
+  return Status::OK();
+}
+
+const std::string kOptNameBMCompOpts = "bottommost_compression_opts";
+const std::string kOptNameCompOpts = "compression_opts";
+
+// OptionTypeInfo map for CompressionOptions
+static std::unordered_map<std::string, OptionTypeInfo>
+    compression_options_type_info = {
+        {"window_bits",
+         {offsetof(struct CompressionOptions, window_bits), OptionType::kInt,
+          OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
+        {"level",
+         {offsetof(struct CompressionOptions, level), OptionType::kInt,
+          OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
+        {"strategy",
+         {offsetof(struct CompressionOptions, strategy), OptionType::kInt,
+          OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
+        {"max_compressed_bytes_per_kb",
+         {offsetof(struct CompressionOptions, max_compressed_bytes_per_kb),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"max_dict_bytes",
+         {offsetof(struct CompressionOptions, max_dict_bytes), OptionType::kInt,
+          OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
+        {"zstd_max_train_bytes",
+         {offsetof(struct CompressionOptions, zstd_max_train_bytes),
+          OptionType::kUInt32T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"parallel_threads",
+         {offsetof(struct CompressionOptions, parallel_threads),
+          OptionType::kUInt32T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"enabled",
+         {offsetof(struct CompressionOptions, enabled), OptionType::kBoolean,
+          OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
+        {"max_dict_buffer_bytes",
+         {offsetof(struct CompressionOptions, max_dict_buffer_bytes),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"use_zstd_dict_trainer",
+         {offsetof(struct CompressionOptions, use_zstd_dict_trainer),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+};
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    file_temperature_age_type_info = {
+        {"temperature",
+         {offsetof(struct FileTemperatureAge, temperature),
+          OptionType::kTemperature, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"age",
+         {offsetof(struct FileTemperatureAge, age), OptionType::kUInt64T,
+          OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
+};
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    fifo_compaction_options_type_info = {
+        {"max_table_files_size",
+         {offsetof(struct CompactionOptionsFIFO, max_table_files_size),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"age_for_warm",
+         {offsetof(struct CompactionOptionsFIFO, age_for_warm),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"ttl",
+         {0, OptionType::kUInt64T, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"allow_compaction",
+         {offsetof(struct CompactionOptionsFIFO, allow_compaction),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"file_temperature_age_thresholds",
+         OptionTypeInfo::Vector<struct FileTemperatureAge>(
+             offsetof(struct CompactionOptionsFIFO,
+                      file_temperature_age_thresholds),
+             OptionVerificationType::kNormal, OptionTypeFlags::kMutable,
+             OptionTypeInfo::Struct("file_temperature_age_thresholds",
+                                    &file_temperature_age_type_info, 0,
+                                    OptionVerificationType::kNormal,
+                                    OptionTypeFlags::kMutable))}};
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    universal_compaction_options_type_info = {
+        {"size_ratio",
+         {offsetof(class CompactionOptionsUniversal, size_ratio),
+          OptionType::kUInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"min_merge_width",
+         {offsetof(class CompactionOptionsUniversal, min_merge_width),
+          OptionType::kUInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"max_merge_width",
+         {offsetof(class CompactionOptionsUniversal, max_merge_width),
+          OptionType::kUInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"max_size_amplification_percent",
+         {offsetof(class CompactionOptionsUniversal,
+                   max_size_amplification_percent),
+          OptionType::kUInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"compression_size_percent",
+         {offsetof(class CompactionOptionsUniversal, compression_size_percent),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"stop_style",
+         {offsetof(class CompactionOptionsUniversal, stop_style),
+          OptionType::kCompactionStopStyle, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"incremental",
+         {offsetof(class CompactionOptionsUniversal, incremental),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"allow_trivial_move",
+         {offsetof(class CompactionOptionsUniversal, allow_trivial_move),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}}};
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    cf_mutable_options_type_info = {
+        {"report_bg_io_stats",
+         {offsetof(struct MutableCFOptions, report_bg_io_stats),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"disable_auto_compactions",
+         {offsetof(struct MutableCFOptions, disable_auto_compactions),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"filter_deletes",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"check_flush_compaction_key_order",
+         {offsetof(struct MutableCFOptions, check_flush_compaction_key_order),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"paranoid_file_checks",
+         {offsetof(struct MutableCFOptions, paranoid_file_checks),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"verify_checksums_in_compaction",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"soft_pending_compaction_bytes_limit",
+         {offsetof(struct MutableCFOptions,
+                   soft_pending_compaction_bytes_limit),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"hard_pending_compaction_bytes_limit",
+         {offsetof(struct MutableCFOptions,
+                   hard_pending_compaction_bytes_limit),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"hard_rate_limit",
+         {0, OptionType::kDouble, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"soft_rate_limit",
+         {0, OptionType::kDouble, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"max_compaction_bytes",
+         {offsetof(struct MutableCFOptions, max_compaction_bytes),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"ignore_max_compaction_bytes_for_input",
+         {offsetof(struct MutableCFOptions,
+                   ignore_max_compaction_bytes_for_input),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"expanded_compaction_factor",
+         {0, OptionType::kInt, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"level0_file_num_compaction_trigger",
+         {offsetof(struct MutableCFOptions, level0_file_num_compaction_trigger),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"level0_slowdown_writes_trigger",
+         {offsetof(struct MutableCFOptions, level0_slowdown_writes_trigger),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"level0_stop_writes_trigger",
+         {offsetof(struct MutableCFOptions, level0_stop_writes_trigger),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"max_grandparent_overlap_factor",
+         {0, OptionType::kInt, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"max_write_buffer_number",
+         {offsetof(struct MutableCFOptions, max_write_buffer_number),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"source_compaction_factor",
+         {0, OptionType::kInt, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"target_file_size_multiplier",
+         {offsetof(struct MutableCFOptions, target_file_size_multiplier),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"arena_block_size",
+         {offsetof(struct MutableCFOptions, arena_block_size),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"inplace_update_num_locks",
+         {offsetof(struct MutableCFOptions, inplace_update_num_locks),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"max_successive_merges",
+         {offsetof(struct MutableCFOptions, max_successive_merges),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"memtable_huge_page_size",
+         {offsetof(struct MutableCFOptions, memtable_huge_page_size),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"memtable_prefix_bloom_huge_page_tlb_size",
+         {0, OptionType::kSizeT, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"write_buffer_size",
+         {offsetof(struct MutableCFOptions, write_buffer_size),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"memtable_prefix_bloom_bits",
+         {0, OptionType::kUInt32T, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"memtable_prefix_bloom_size_ratio",
+         {offsetof(struct MutableCFOptions, memtable_prefix_bloom_size_ratio),
+          OptionType::kDouble, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"memtable_prefix_bloom_probes",
+         {0, OptionType::kUInt32T, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"memtable_whole_key_filtering",
+         {offsetof(struct MutableCFOptions, memtable_whole_key_filtering),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"min_partial_merge_operands",
+         {0, OptionType::kUInt32T, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"max_bytes_for_level_base",
+         {offsetof(struct MutableCFOptions, max_bytes_for_level_base),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"snap_refresh_nanos",
+         {0, OptionType::kUInt64T, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"max_bytes_for_level_multiplier",
+         {offsetof(struct MutableCFOptions, max_bytes_for_level_multiplier),
+          OptionType::kDouble, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"max_bytes_for_level_multiplier_additional",
+         OptionTypeInfo::Vector<int>(
+             offsetof(struct MutableCFOptions,
+                      max_bytes_for_level_multiplier_additional),
+             OptionVerificationType::kNormal, OptionTypeFlags::kMutable,
+             {0, OptionType::kInt})},
+        {"max_sequential_skip_in_iterations",
+         {offsetof(struct MutableCFOptions, max_sequential_skip_in_iterations),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"target_file_size_base",
+         {offsetof(struct MutableCFOptions, target_file_size_base),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"compression",
+         {offsetof(struct MutableCFOptions, compression),
+          OptionType::kCompressionType, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"prefix_extractor",
+         OptionTypeInfo::AsCustomSharedPtr<const SliceTransform>(
+             offsetof(struct MutableCFOptions, prefix_extractor),
+             OptionVerificationType::kByNameAllowNull,
+             (OptionTypeFlags::kMutable | OptionTypeFlags::kAllowNull))},
+        {"compaction_options_fifo",
+         OptionTypeInfo::Struct(
+             "compaction_options_fifo", &fifo_compaction_options_type_info,
+             offsetof(struct MutableCFOptions, compaction_options_fifo),
+             OptionVerificationType::kNormal, OptionTypeFlags::kMutable)
+             .SetParseFunc([](const ConfigOptions& opts,
+                              const std::string& name, const std::string& value,
+                              void* addr) {
+               // This is to handle backward compatibility, where
+               // compaction_options_fifo could be assigned a single scalar
+               // value, say, like "23", which would be assigned to
+               // max_table_files_size.
+               if (name == "compaction_options_fifo" &&
+                   value.find("=") == std::string::npos) {
+                 // Old format. Parse just a single uint64_t value.
+                 auto options = static_cast<CompactionOptionsFIFO*>(addr);
+                 options->max_table_files_size = ParseUint64(value);
+                 return Status::OK();
+               } else {
+                 return OptionTypeInfo::ParseStruct(
+                     opts, "compaction_options_fifo",
+                     &fifo_compaction_options_type_info, name, value, addr);
+               }
+             })},
+        {"compaction_options_universal",
+         OptionTypeInfo::Struct(
+             "compaction_options_universal",
+             &universal_compaction_options_type_info,
+             offsetof(struct MutableCFOptions, compaction_options_universal),
+             OptionVerificationType::kNormal, OptionTypeFlags::kMutable)},
+        {"ttl",
+         {offsetof(struct MutableCFOptions, ttl), OptionType::kUInt64T,
+          OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
+        {"periodic_compaction_seconds",
+         {offsetof(struct MutableCFOptions, periodic_compaction_seconds),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"bottommost_temperature",
+         {0, OptionType::kTemperature, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"last_level_temperature",
+         {offsetof(struct MutableCFOptions, last_level_temperature),
+          OptionType::kTemperature, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"enable_blob_files",
+         {offsetof(struct MutableCFOptions, enable_blob_files),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"min_blob_size",
+         {offsetof(struct MutableCFOptions, min_blob_size),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"blob_file_size",
+         {offsetof(struct MutableCFOptions, blob_file_size),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"blob_compression_type",
+         {offsetof(struct MutableCFOptions, blob_compression_type),
+          OptionType::kCompressionType, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"enable_blob_garbage_collection",
+         {offsetof(struct MutableCFOptions, enable_blob_garbage_collection),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"blob_garbage_collection_age_cutoff",
+         {offsetof(struct MutableCFOptions, blob_garbage_collection_age_cutoff),
+          OptionType::kDouble, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"blob_garbage_collection_force_threshold",
+         {offsetof(struct MutableCFOptions,
+                   blob_garbage_collection_force_threshold),
+          OptionType::kDouble, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"blob_compaction_readahead_size",
+         {offsetof(struct MutableCFOptions, blob_compaction_readahead_size),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"blob_file_starting_level",
+         {offsetof(struct MutableCFOptions, blob_file_starting_level),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"prepopulate_blob_cache",
+         OptionTypeInfo::Enum<PrepopulateBlobCache>(
+             offsetof(struct MutableCFOptions, prepopulate_blob_cache),
+             &prepopulate_blob_cache_string_map, OptionTypeFlags::kMutable)},
+        {"sample_for_compression",
+         {offsetof(struct MutableCFOptions, sample_for_compression),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"bottommost_compression",
+         {offsetof(struct MutableCFOptions, bottommost_compression),
+          OptionType::kCompressionType, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"compression_per_level",
+         OptionTypeInfo::Vector<CompressionType>(
+             offsetof(struct MutableCFOptions, compression_per_level),
+             OptionVerificationType::kNormal, OptionTypeFlags::kMutable,
+             {0, OptionType::kCompressionType})},
+        {"experimental_mempurge_threshold",
+         {offsetof(struct MutableCFOptions, experimental_mempurge_threshold),
+          OptionType::kDouble, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"memtable_protection_bytes_per_key",
+         {offsetof(struct MutableCFOptions, memtable_protection_bytes_per_key),
+          OptionType::kUInt32T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"block_protection_bytes_per_key",
+         {offsetof(struct MutableCFOptions, block_protection_bytes_per_key),
+          OptionType::kUInt8T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {kOptNameCompOpts,
+         OptionTypeInfo::Struct(
+             kOptNameCompOpts, &compression_options_type_info,
+             offsetof(struct MutableCFOptions, compression_opts),
+             OptionVerificationType::kNormal,
+             (OptionTypeFlags::kMutable | OptionTypeFlags::kCompareNever),
+             [](const ConfigOptions& opts, const std::string& name,
+                const std::string& value, void* addr) {
+               // This is to handle backward compatibility, where
+               // compression_options was a ":" separated list.
+               if (name == kOptNameCompOpts &&
+                   value.find("=") == std::string::npos) {
+                 auto* compression = static_cast<CompressionOptions*>(addr);
+                 return ParseCompressionOptions(value, name, *compression);
+               } else {
+                 return OptionTypeInfo::ParseStruct(
+                     opts, kOptNameCompOpts, &compression_options_type_info,
+                     name, value, addr);
+               }
+             })},
+        {kOptNameBMCompOpts,
+         OptionTypeInfo::Struct(
+             kOptNameBMCompOpts, &compression_options_type_info,
+             offsetof(struct MutableCFOptions, bottommost_compression_opts),
+             OptionVerificationType::kNormal,
+             (OptionTypeFlags::kMutable | OptionTypeFlags::kCompareNever),
+             [](const ConfigOptions& opts, const std::string& name,
+                const std::string& value, void* addr) {
+               // This is to handle backward compatibility, where
+               // compression_options was a ":" separated list.
+               if (name == kOptNameBMCompOpts &&
+                   value.find("=") == std::string::npos) {
+                 auto* compression = static_cast<CompressionOptions*>(addr);
+                 return ParseCompressionOptions(value, name, *compression);
+               } else {
+                 return OptionTypeInfo::ParseStruct(
+                     opts, kOptNameBMCompOpts, &compression_options_type_info,
+                     name, value, addr);
+               }
+             })},
+        // End special case properties
+};
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    cf_immutable_options_type_info = {
+        /* not yet supported
+        CompressionOptions compression_opts;
+        TablePropertiesCollectorFactories table_properties_collector_factories;
+        using TablePropertiesCollectorFactories =
+            std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>;
+        UpdateStatus (*inplace_callback)(char* existing_value,
+                                         uint34_t* existing_value_size,
+                                         Slice delta_value,
+                                         std::string* merged_value);
+        std::vector<DbPath> cf_paths;
+         */
+        {"compaction_measure_io_stats",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"purge_redundant_kvs_while_flush",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"inplace_update_support",
+         {offsetof(struct ImmutableCFOptions, inplace_update_support),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"level_compaction_dynamic_level_bytes",
+         {offsetof(struct ImmutableCFOptions,
+                   level_compaction_dynamic_level_bytes),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"level_compaction_dynamic_file_size",
+         {offsetof(struct ImmutableCFOptions,
+                   level_compaction_dynamic_file_size),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"optimize_filters_for_hits",
+         {offsetof(struct ImmutableCFOptions, optimize_filters_for_hits),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"force_consistency_checks",
+         {offsetof(struct ImmutableCFOptions, force_consistency_checks),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"preclude_last_level_data_seconds",
+         {offsetof(struct ImmutableCFOptions, preclude_last_level_data_seconds),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"preserve_internal_time_seconds",
+         {offsetof(struct ImmutableCFOptions, preserve_internal_time_seconds),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        // Need to keep this around to be able to read old OPTIONS files.
+        {"max_mem_compaction_level",
+         {0, OptionType::kInt, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"max_write_buffer_number_to_maintain",
+         {offsetof(struct ImmutableCFOptions,
+                   max_write_buffer_number_to_maintain),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone, 0}},
+        {"max_write_buffer_size_to_maintain",
+         {offsetof(struct ImmutableCFOptions,
+                   max_write_buffer_size_to_maintain),
+          OptionType::kInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"min_write_buffer_number_to_merge",
+         {offsetof(struct ImmutableCFOptions, min_write_buffer_number_to_merge),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone, 0}},
+        {"num_levels",
+         {offsetof(struct ImmutableCFOptions, num_levels), OptionType::kInt,
+          OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+        {"bloom_locality",
+         {offsetof(struct ImmutableCFOptions, bloom_locality),
+          OptionType::kUInt32T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"rate_limit_delay_max_milliseconds",
+         {0, OptionType::kUInt, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"comparator",
+         OptionTypeInfo::AsCustomRawPtr<const Comparator>(
+             offsetof(struct ImmutableCFOptions, user_comparator),
+             OptionVerificationType::kByName, OptionTypeFlags::kCompareLoose)
+             .SetSerializeFunc(
+                 // Serializes a Comparator
+                 [](const ConfigOptions& opts, const std::string&,
+                    const void* addr, std::string* value) {
+                   // it's a const pointer of const Comparator*
+                   const auto* ptr =
+                       static_cast<const Comparator* const*>(addr);
+                   // Since the user-specified comparator will be wrapped by
+                   // InternalKeyComparator, we should persist the
+                   // user-specified one instead of InternalKeyComparator.
+                   if (*ptr == nullptr) {
+                     *value = kNullptrString;
+                   } else if (opts.mutable_options_only) {
+                     *value = "";
+                   } else {
+                     const Comparator* root_comp = (*ptr)->GetRootComparator();
+                     if (root_comp == nullptr) {
+                       root_comp = (*ptr);
+                     }
+                     *value = root_comp->ToString(opts);
+                   }
+                   return Status::OK();
+                 })},
+        {"memtable_insert_with_hint_prefix_extractor",
+         OptionTypeInfo::AsCustomSharedPtr<const SliceTransform>(
+             offsetof(struct ImmutableCFOptions,
+                      memtable_insert_with_hint_prefix_extractor),
+             OptionVerificationType::kByNameAllowNull, OptionTypeFlags::kNone)},
+        {"memtable_factory",
+         {offsetof(struct ImmutableCFOptions, memtable_factory),
+          OptionType::kCustomizable, OptionVerificationType::kByName,
+          OptionTypeFlags::kShared,
+          [](const ConfigOptions& opts, const std::string&,
+             const std::string& value, void* addr) {
+            std::unique_ptr<MemTableRepFactory> factory;
+            auto* shared =
+                static_cast<std::shared_ptr<MemTableRepFactory>*>(addr);
+            Status s =
+                MemTableRepFactory::CreateFromString(opts, value, shared);
+            return s;
+          }}},
+        {"memtable",
+         {offsetof(struct ImmutableCFOptions, memtable_factory),
+          OptionType::kCustomizable, OptionVerificationType::kAlias,
+          OptionTypeFlags::kShared,
+          [](const ConfigOptions& opts, const std::string&,
+             const std::string& value, void* addr) {
+            std::unique_ptr<MemTableRepFactory> factory;
+            auto* shared =
+                static_cast<std::shared_ptr<MemTableRepFactory>*>(addr);
+            Status s =
+                MemTableRepFactory::CreateFromString(opts, value, shared);
+            return s;
+          }}},
+        {"table_factory",
+         OptionTypeInfo::AsCustomSharedPtr<TableFactory>(
+             offsetof(struct ImmutableCFOptions, table_factory),
+             OptionVerificationType::kByName,
+             (OptionTypeFlags::kCompareLoose |
+              OptionTypeFlags::kStringNameOnly |
+              OptionTypeFlags::kDontPrepare))},
+        {"block_based_table_factory",
+         {offsetof(struct ImmutableCFOptions, table_factory),
+          OptionType::kCustomizable, OptionVerificationType::kAlias,
+          OptionTypeFlags::kShared | OptionTypeFlags::kCompareLoose,
+          // Parses the input value and creates a BlockBasedTableFactory
+          [](const ConfigOptions& opts, const std::string& name,
+             const std::string& value, void* addr) {
+            BlockBasedTableOptions* old_opts = nullptr;
+            auto table_factory =
+                static_cast<std::shared_ptr<TableFactory>*>(addr);
+            if (table_factory->get() != nullptr) {
+              old_opts =
+                  table_factory->get()->GetOptions<BlockBasedTableOptions>();
+            }
+            if (name == "block_based_table_factory") {
+              std::unique_ptr<TableFactory> new_factory;
+              if (old_opts != nullptr) {
+                new_factory.reset(NewBlockBasedTableFactory(*old_opts));
+              } else {
+                new_factory.reset(NewBlockBasedTableFactory());
+              }
+              Status s = new_factory->ConfigureFromString(opts, value);
+              if (s.ok()) {
+                table_factory->reset(new_factory.release());
+              }
+              return s;
+            } else if (old_opts != nullptr) {
+              return table_factory->get()->ConfigureOption(opts, name, value);
+            } else {
+              return Status::NotFound("Mismatched table option: ", name);
+            }
+          }}},
+        {"plain_table_factory",
+         {offsetof(struct ImmutableCFOptions, table_factory),
+          OptionType::kCustomizable, OptionVerificationType::kAlias,
+          OptionTypeFlags::kShared | OptionTypeFlags::kCompareLoose,
+          // Parses the input value and creates a PlainTableFactory
+          [](const ConfigOptions& opts, const std::string& name,
+             const std::string& value, void* addr) {
+            PlainTableOptions* old_opts = nullptr;
+            auto table_factory =
+                static_cast<std::shared_ptr<TableFactory>*>(addr);
+            if (table_factory->get() != nullptr) {
+              old_opts = table_factory->get()->GetOptions<PlainTableOptions>();
+            }
+            if (name == "plain_table_factory") {
+              std::unique_ptr<TableFactory> new_factory;
+              if (old_opts != nullptr) {
+                new_factory.reset(NewPlainTableFactory(*old_opts));
+              } else {
+                new_factory.reset(NewPlainTableFactory());
+              }
+              Status s = new_factory->ConfigureFromString(opts, value);
+              if (s.ok()) {
+                table_factory->reset(new_factory.release());
+              }
+              return s;
+            } else if (old_opts != nullptr) {
+              return table_factory->get()->ConfigureOption(opts, name, value);
+            } else {
+              return Status::NotFound("Mismatched table option: ", name);
+            }
+          }}},
+        {"table_properties_collectors",
+         OptionTypeInfo::Vector<
+             std::shared_ptr<TablePropertiesCollectorFactory>>(
+             offsetof(struct ImmutableCFOptions,
+                      table_properties_collector_factories),
+             OptionVerificationType::kByName, OptionTypeFlags::kNone,
+             OptionTypeInfo::AsCustomSharedPtr<TablePropertiesCollectorFactory>(
+                 0, OptionVerificationType::kByName, OptionTypeFlags::kNone))},
+        {"compaction_filter",
+         OptionTypeInfo::AsCustomRawPtr<const CompactionFilter>(
+             offsetof(struct ImmutableCFOptions, compaction_filter),
+             OptionVerificationType::kByName, OptionTypeFlags::kAllowNull)},
+        {"compaction_filter_factory",
+         OptionTypeInfo::AsCustomSharedPtr<CompactionFilterFactory>(
+             offsetof(struct ImmutableCFOptions, compaction_filter_factory),
+             OptionVerificationType::kByName, OptionTypeFlags::kAllowNull)},
+        {"merge_operator",
+         OptionTypeInfo::AsCustomSharedPtr<MergeOperator>(
+             offsetof(struct ImmutableCFOptions, merge_operator),
+             OptionVerificationType::kByNameAllowFromNull,
+             OptionTypeFlags::kCompareLoose | OptionTypeFlags::kAllowNull)},
+        {"compaction_style",
+         {offsetof(struct ImmutableCFOptions, compaction_style),
+          OptionType::kCompactionStyle, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"compaction_pri",
+         {offsetof(struct ImmutableCFOptions, compaction_pri),
+          OptionType::kCompactionPri, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"sst_partitioner_factory",
+         OptionTypeInfo::AsCustomSharedPtr<SstPartitionerFactory>(
+             offsetof(struct ImmutableCFOptions, sst_partitioner_factory),
+             OptionVerificationType::kByName, OptionTypeFlags::kAllowNull)},
+        {"blob_cache",
+         {offsetof(struct ImmutableCFOptions, blob_cache), OptionType::kUnknown,
+          OptionVerificationType::kNormal,
+          (OptionTypeFlags::kCompareNever | OptionTypeFlags::kDontSerialize),
+          // Parses the input value as a Cache
+          [](const ConfigOptions& opts, const std::string&,
+             const std::string& value, void* addr) {
+            auto* cache = static_cast<std::shared_ptr<Cache>*>(addr);
+            return Cache::CreateFromString(opts, value, cache);
+          }}},
+        {"persist_user_defined_timestamps",
+         {offsetof(struct ImmutableCFOptions, persist_user_defined_timestamps),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kCompareLoose}},
+};
+
+const std::string OptionsHelper::kCFOptionsName = "ColumnFamilyOptions";
+
+class ConfigurableMutableCFOptions : public Configurable {
+ public:
+  explicit ConfigurableMutableCFOptions(const MutableCFOptions& mcf) {
+    mutable_ = mcf;
+    RegisterOptions(&mutable_, &cf_mutable_options_type_info);
+  }
+
+ protected:
+  MutableCFOptions mutable_;
+};
+
+class ConfigurableCFOptions : public ConfigurableMutableCFOptions {
+ public:
+  ConfigurableCFOptions(const ColumnFamilyOptions& opts,
+                        const std::unordered_map<std::string, std::string>* map)
+      : ConfigurableMutableCFOptions(MutableCFOptions(opts)),
+        immutable_(opts),
+        cf_options_(opts),
+        opt_map_(map) {
+    RegisterOptions(&immutable_, &cf_immutable_options_type_info);
+  }
+
+ protected:
+  Status ConfigureOptions(
+      const ConfigOptions& config_options,
+      const std::unordered_map<std::string, std::string>& opts_map,
+      std::unordered_map<std::string, std::string>* unused) override {
+    Status s = Configurable::ConfigureOptions(config_options, opts_map, unused);
+    if (s.ok()) {
+      UpdateColumnFamilyOptions(mutable_, &cf_options_);
+      UpdateColumnFamilyOptions(immutable_, &cf_options_);
+      s = PrepareOptions(config_options);
+    }
+    return s;
+  }
+
+  virtual const void* GetOptionsPtr(const std::string& name) const override {
+    if (name == OptionsHelper::kCFOptionsName) {
+      return &cf_options_;
+    } else {
+      return ConfigurableMutableCFOptions::GetOptionsPtr(name);
+    }
+  }
+
+  bool OptionsAreEqual(const ConfigOptions& config_options,
+                       const OptionTypeInfo& opt_info,
+                       const std::string& opt_name, const void* const this_ptr,
+                       const void* const that_ptr,
+                       std::string* mismatch) const override {
+    bool equals = opt_info.AreEqual(config_options, opt_name, this_ptr,
+                                    that_ptr, mismatch);
+    if (!equals && opt_info.IsByName()) {
+      if (opt_map_ == nullptr) {
+        equals = true;
+      } else {
+        const auto& iter = opt_map_->find(opt_name);
+        if (iter == opt_map_->end()) {
+          equals = true;
+        } else {
+          equals = opt_info.AreEqualByName(config_options, opt_name, this_ptr,
+                                           iter->second);
+        }
+      }
+      if (equals) {  // False alarm, clear mismatch
+        *mismatch = "";
+      }
+    }
+    if (equals && opt_info.IsConfigurable() && opt_map_ != nullptr) {
+      const auto* this_config = opt_info.AsRawPointer<Configurable>(this_ptr);
+      if (this_config == nullptr) {
+        const auto& iter = opt_map_->find(opt_name);
+        // If the name exists in the map and is not empty/null,
+        // then the this_config should be set.
+        if (iter != opt_map_->end() && !iter->second.empty() &&
+            iter->second != kNullptrString) {
+          *mismatch = opt_name;
+          equals = false;
+        }
+      }
+    }
+    return equals;
+  }
+
+ private:
+  ImmutableCFOptions immutable_;
+  ColumnFamilyOptions cf_options_;
+  const std::unordered_map<std::string, std::string>* opt_map_;
+};
+
+std::unique_ptr<Configurable> CFOptionsAsConfigurable(
+    const MutableCFOptions& opts) {
+  std::unique_ptr<Configurable> ptr(new ConfigurableMutableCFOptions(opts));
+  return ptr;
+}
+std::unique_ptr<Configurable> CFOptionsAsConfigurable(
+    const ColumnFamilyOptions& opts,
+    const std::unordered_map<std::string, std::string>* opt_map) {
+  std::unique_ptr<Configurable> ptr(new ConfigurableCFOptions(opts, opt_map));
+  return ptr;
+}
+
+ImmutableCFOptions::ImmutableCFOptions() : ImmutableCFOptions(Options()) {}
+
+ImmutableCFOptions::ImmutableCFOptions(const ColumnFamilyOptions& cf_options)
+    : compaction_style(cf_options.compaction_style),
+      compaction_pri(cf_options.compaction_pri),
+      user_comparator(cf_options.comparator),
+      internal_comparator(InternalKeyComparator(cf_options.comparator)),
+      merge_operator(cf_options.merge_operator),
+      compaction_filter(cf_options.compaction_filter),
+      compaction_filter_factory(cf_options.compaction_filter_factory),
+      min_write_buffer_number_to_merge(
+          cf_options.min_write_buffer_number_to_merge),
+      max_write_buffer_number_to_maintain(
+          cf_options.max_write_buffer_number_to_maintain),
+      max_write_buffer_size_to_maintain(
+          cf_options.max_write_buffer_size_to_maintain),
+      inplace_update_support(cf_options.inplace_update_support),
+      inplace_callback(cf_options.inplace_callback),
+      memtable_factory(cf_options.memtable_factory),
+      table_factory(cf_options.table_factory),
+      table_properties_collector_factories(
+          cf_options.table_properties_collector_factories),
+      bloom_locality(cf_options.bloom_locality),
+      level_compaction_dynamic_level_bytes(
+          cf_options.level_compaction_dynamic_level_bytes),
+      level_compaction_dynamic_file_size(
+          cf_options.level_compaction_dynamic_file_size),
+      num_levels(cf_options.num_levels),
+      optimize_filters_for_hits(cf_options.optimize_filters_for_hits),
+      force_consistency_checks(cf_options.force_consistency_checks),
+      preclude_last_level_data_seconds(
+          cf_options.preclude_last_level_data_seconds),
+      preserve_internal_time_seconds(cf_options.preserve_internal_time_seconds),
+      memtable_insert_with_hint_prefix_extractor(
+          cf_options.memtable_insert_with_hint_prefix_extractor),
+      cf_paths(cf_options.cf_paths),
+      compaction_thread_limiter(cf_options.compaction_thread_limiter),
+      sst_partitioner_factory(cf_options.sst_partitioner_factory),
+      blob_cache(cf_options.blob_cache),
+      persist_user_defined_timestamps(
+          cf_options.persist_user_defined_timestamps) {}
+
+ImmutableOptions::ImmutableOptions() : ImmutableOptions(Options()) {}
+
+ImmutableOptions::ImmutableOptions(const Options& options)
+    : ImmutableOptions(options, options) {}
+
+ImmutableOptions::ImmutableOptions(const DBOptions& db_options,
+                                   const ColumnFamilyOptions& cf_options)
+    : ImmutableDBOptions(db_options), ImmutableCFOptions(cf_options) {}
+
+ImmutableOptions::ImmutableOptions(const DBOptions& db_options,
+                                   const ImmutableCFOptions& cf_options)
+    : ImmutableDBOptions(db_options), ImmutableCFOptions(cf_options) {}
+
+ImmutableOptions::ImmutableOptions(const ImmutableDBOptions& db_options,
+                                   const ColumnFamilyOptions& cf_options)
+    : ImmutableDBOptions(db_options), ImmutableCFOptions(cf_options) {}
+
+ImmutableOptions::ImmutableOptions(const ImmutableDBOptions& db_options,
+                                   const ImmutableCFOptions& cf_options)
+    : ImmutableDBOptions(db_options), ImmutableCFOptions(cf_options) {}
+
+// Multiple two operands. If they overflow, return op1.
+uint64_t MultiplyCheckOverflow(uint64_t op1, double op2) {
+  if (op1 == 0 || op2 <= 0) {
+    return 0;
+  }
+  if (std::numeric_limits<uint64_t>::max() / op1 < op2) {
+    return op1;
+  }
+  return static_cast<uint64_t>(op1 * op2);
+}
+
+// when level_compaction_dynamic_level_bytes is true and leveled compaction
+// is used, the base level is not always L1, so precomupted max_file_size can
+// no longer be used. Recompute file_size_for_level from base level.
+uint64_t MaxFileSizeForLevel(const MutableCFOptions& cf_options,
+    int level, CompactionStyle compaction_style, int base_level,
+    bool level_compaction_dynamic_level_bytes) {
+  if (!level_compaction_dynamic_level_bytes || level < base_level ||
+      compaction_style != kCompactionStyleLevel) {
+    assert(level >= 0);
+    assert(level < (int)cf_options.max_file_size.size());
+    return cf_options.max_file_size[level];
+  } else {
+    assert(level >= 0 && base_level >= 0);
+    assert(level - base_level < (int)cf_options.max_file_size.size());
+    return cf_options.max_file_size[level - base_level];
+  }
+}
+
+size_t MaxFileSizeForL0MetaPin(const MutableCFOptions& cf_options) {
+  // We do not want to pin meta-blocks that almost certainly came from intra-L0
+  // or a former larger `write_buffer_size` value to avoid surprising users with
+  // pinned memory usage. We use a factor of 1.5 to account for overhead
+  // introduced during flush in most cases.
+  if (std::numeric_limits<size_t>::max() / 3 <
+      cf_options.write_buffer_size / 2) {
+    return std::numeric_limits<size_t>::max();
+  }
+  return cf_options.write_buffer_size / 2 * 3;
+}
+
+void MutableCFOptions::RefreshDerivedOptions(int num_levels,
+                                             CompactionStyle compaction_style) {
+  max_file_size.resize(num_levels);
+  for (int i = 0; i < num_levels; ++i) {
+    if (i == 0 && compaction_style == kCompactionStyleUniversal) {
+      max_file_size[i] = ULLONG_MAX;
+    } else if (i > 1) {
+      max_file_size[i] = MultiplyCheckOverflow(max_file_size[i - 1],
+                                               target_file_size_multiplier);
+    } else {
+      max_file_size[i] = target_file_size_base;
+    }
+  }
+}
+
+void MutableCFOptions::Dump(Logger* log) const {
+  // Memtable related options
+  ROCKS_LOG_INFO(log,
+                 "                        write_buffer_size: %" ROCKSDB_PRIszt,
+                 write_buffer_size);
+  ROCKS_LOG_INFO(log, "                  max_write_buffer_number: %d",
+                 max_write_buffer_number);
+  ROCKS_LOG_INFO(log,
+                 "                         arena_block_size: %" ROCKSDB_PRIszt,
+                 arena_block_size);
+  ROCKS_LOG_INFO(log, "              memtable_prefix_bloom_ratio: %f",
+                 memtable_prefix_bloom_size_ratio);
+  ROCKS_LOG_INFO(log, "              memtable_whole_key_filtering: %d",
+                 memtable_whole_key_filtering);
+  ROCKS_LOG_INFO(log,
+                 "                  memtable_huge_page_size: %" ROCKSDB_PRIszt,
+                 memtable_huge_page_size);
+  ROCKS_LOG_INFO(log,
+                 "                    max_successive_merges: %" ROCKSDB_PRIszt,
+                 max_successive_merges);
+  ROCKS_LOG_INFO(log,
+                 "                 inplace_update_num_locks: %" ROCKSDB_PRIszt,
+                 inplace_update_num_locks);
+  ROCKS_LOG_INFO(log, "                         prefix_extractor: %s",
+                 prefix_extractor == nullptr
+                     ? "nullptr"
+                     : prefix_extractor->GetId().c_str());
+  ROCKS_LOG_INFO(log, "                 disable_auto_compactions: %d",
+                 disable_auto_compactions);
+  ROCKS_LOG_INFO(log, "      soft_pending_compaction_bytes_limit: %" PRIu64,
+                 soft_pending_compaction_bytes_limit);
+  ROCKS_LOG_INFO(log, "      hard_pending_compaction_bytes_limit: %" PRIu64,
+                 hard_pending_compaction_bytes_limit);
+  ROCKS_LOG_INFO(log, "       level0_file_num_compaction_trigger: %d",
+                 level0_file_num_compaction_trigger);
+  ROCKS_LOG_INFO(log, "           level0_slowdown_writes_trigger: %d",
+                 level0_slowdown_writes_trigger);
+  ROCKS_LOG_INFO(log, "               level0_stop_writes_trigger: %d",
+                 level0_stop_writes_trigger);
+  ROCKS_LOG_INFO(log, "                     max_compaction_bytes: %" PRIu64,
+                 max_compaction_bytes);
+  ROCKS_LOG_INFO(log, "    ignore_max_compaction_bytes_for_input: %s",
+                 ignore_max_compaction_bytes_for_input ? "true" : "false");
+  ROCKS_LOG_INFO(log, "                    target_file_size_base: %" PRIu64,
+                 target_file_size_base);
+  ROCKS_LOG_INFO(log, "              target_file_size_multiplier: %d",
+                 target_file_size_multiplier);
+  ROCKS_LOG_INFO(log, "                 max_bytes_for_level_base: %" PRIu64,
+                 max_bytes_for_level_base);
+  ROCKS_LOG_INFO(log, "           max_bytes_for_level_multiplier: %f",
+                 max_bytes_for_level_multiplier);
+  ROCKS_LOG_INFO(log, "                                      ttl: %" PRIu64,
+                 ttl);
+  ROCKS_LOG_INFO(log, "              periodic_compaction_seconds: %" PRIu64,
+                 periodic_compaction_seconds);
+  std::string result;
+  char buf[10];
+  for (const auto m : max_bytes_for_level_multiplier_additional) {
+    snprintf(buf, sizeof(buf), "%d, ", m);
+    result += buf;
+  }
+  if (result.size() >= 2) {
+    result.resize(result.size() - 2);
+  } else {
+    result = "";
+  }
+
+  ROCKS_LOG_INFO(log, "max_bytes_for_level_multiplier_additional: %s",
+                 result.c_str());
+  ROCKS_LOG_INFO(log, "        max_sequential_skip_in_iterations: %" PRIu64,
+                 max_sequential_skip_in_iterations);
+  ROCKS_LOG_INFO(log, "         check_flush_compaction_key_order: %d",
+                 check_flush_compaction_key_order);
+  ROCKS_LOG_INFO(log, "                     paranoid_file_checks: %d",
+                 paranoid_file_checks);
+  ROCKS_LOG_INFO(log, "                       report_bg_io_stats: %d",
+                 report_bg_io_stats);
+  ROCKS_LOG_INFO(log, "                              compression: %d",
+                 static_cast<int>(compression));
+  ROCKS_LOG_INFO(log,
+                 "                       experimental_mempurge_threshold: %f",
+                 experimental_mempurge_threshold);
+
+  // Universal Compaction Options
+  ROCKS_LOG_INFO(log, "compaction_options_universal.size_ratio : %d",
+                 compaction_options_universal.size_ratio);
+  ROCKS_LOG_INFO(log, "compaction_options_universal.min_merge_width : %d",
+                 compaction_options_universal.min_merge_width);
+  ROCKS_LOG_INFO(log, "compaction_options_universal.max_merge_width : %d",
+                 compaction_options_universal.max_merge_width);
+  ROCKS_LOG_INFO(
+      log, "compaction_options_universal.max_size_amplification_percent : %d",
+      compaction_options_universal.max_size_amplification_percent);
+  ROCKS_LOG_INFO(log,
+                 "compaction_options_universal.compression_size_percent : %d",
+                 compaction_options_universal.compression_size_percent);
+  ROCKS_LOG_INFO(log, "compaction_options_universal.stop_style : %d",
+                 compaction_options_universal.stop_style);
+  ROCKS_LOG_INFO(
+      log, "compaction_options_universal.allow_trivial_move : %d",
+      static_cast<int>(compaction_options_universal.allow_trivial_move));
+  ROCKS_LOG_INFO(log, "compaction_options_universal.incremental        : %d",
+                 static_cast<int>(compaction_options_universal.incremental));
+
+  // FIFO Compaction Options
+  ROCKS_LOG_INFO(log, "compaction_options_fifo.max_table_files_size : %" PRIu64,
+                 compaction_options_fifo.max_table_files_size);
+  ROCKS_LOG_INFO(log, "compaction_options_fifo.allow_compaction : %d",
+                 compaction_options_fifo.allow_compaction);
+
+  // Blob file related options
+  ROCKS_LOG_INFO(log, "                        enable_blob_files: %s",
+                 enable_blob_files ? "true" : "false");
+  ROCKS_LOG_INFO(log, "                            min_blob_size: %" PRIu64,
+                 min_blob_size);
+  ROCKS_LOG_INFO(log, "                           blob_file_size: %" PRIu64,
+                 blob_file_size);
+  ROCKS_LOG_INFO(log, "                    blob_compression_type: %s",
+                 CompressionTypeToString(blob_compression_type).c_str());
+  ROCKS_LOG_INFO(log, "           enable_blob_garbage_collection: %s",
+                 enable_blob_garbage_collection ? "true" : "false");
+  ROCKS_LOG_INFO(log, "       blob_garbage_collection_age_cutoff: %f",
+                 blob_garbage_collection_age_cutoff);
+  ROCKS_LOG_INFO(log, "  blob_garbage_collection_force_threshold: %f",
+                 blob_garbage_collection_force_threshold);
+  ROCKS_LOG_INFO(log, "           blob_compaction_readahead_size: %" PRIu64,
+                 blob_compaction_readahead_size);
+  ROCKS_LOG_INFO(log, "                 blob_file_starting_level: %d",
+                 blob_file_starting_level);
+  ROCKS_LOG_INFO(log, "                   prepopulate_blob_cache: %s",
+                 prepopulate_blob_cache == PrepopulateBlobCache::kFlushOnly
+                     ? "flush only"
+                     : "disable");
+  ROCKS_LOG_INFO(log, "                   last_level_temperature: %d",
+                 static_cast<int>(last_level_temperature));
+}
+
+MutableCFOptions::MutableCFOptions(const Options& options)
+    : MutableCFOptions(ColumnFamilyOptions(options)) {}
+
+Status GetMutableOptionsFromStrings(
+    const MutableCFOptions& base_options,
+    const std::unordered_map<std::string, std::string>& options_map,
+    Logger* /*info_log*/, MutableCFOptions* new_options) {
+  assert(new_options);
+  *new_options = base_options;
+  ConfigOptions config_options;
+  Status s = OptionTypeInfo::ParseType(
+      config_options, options_map, cf_mutable_options_type_info, new_options);
+  if (!s.ok()) {
+    *new_options = base_options;
+  }
+  return s;
+}
+
+Status GetStringFromMutableCFOptions(const ConfigOptions& config_options,
+                                     const MutableCFOptions& mutable_opts,
+                                     std::string* opt_string) {
+  assert(opt_string);
+  opt_string->clear();
+  return OptionTypeInfo::SerializeType(
+      config_options, cf_mutable_options_type_info, &mutable_opts, opt_string);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/cf_options.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/cf_options.h
new file mode 100644
index 0000000000..37ef54c0cb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/cf_options.h
@@ -0,0 +1,347 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "db/dbformat.h"
+#include "options/db_options.h"
+#include "rocksdb/options.h"
+#include "util/compression.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// ImmutableCFOptions is a data struct used by RocksDB internal. It contains a
+// subset of Options that should not be changed during the entire lifetime
+// of DB. Raw pointers defined in this struct do not have ownership to the data
+// they point to. Options contains std::shared_ptr to these data.
+struct ImmutableCFOptions {
+ public:
+  static const char* kName() { return "ImmutableCFOptions"; }
+  explicit ImmutableCFOptions();
+  explicit ImmutableCFOptions(const ColumnFamilyOptions& cf_options);
+
+  CompactionStyle compaction_style;
+
+  CompactionPri compaction_pri;
+
+  const Comparator* user_comparator;
+  InternalKeyComparator internal_comparator;  // Only in Immutable
+
+  std::shared_ptr<MergeOperator> merge_operator;
+
+  const CompactionFilter* compaction_filter;
+
+  std::shared_ptr<CompactionFilterFactory> compaction_filter_factory;
+
+  int min_write_buffer_number_to_merge;
+
+  int max_write_buffer_number_to_maintain;
+
+  int64_t max_write_buffer_size_to_maintain;
+
+  bool inplace_update_support;
+
+  UpdateStatus (*inplace_callback)(char* existing_value,
+                                   uint32_t* existing_value_size,
+                                   Slice delta_value,
+                                   std::string* merged_value);
+
+  std::shared_ptr<MemTableRepFactory> memtable_factory;
+
+  std::shared_ptr<TableFactory> table_factory;
+
+  Options::TablePropertiesCollectorFactories
+      table_properties_collector_factories;
+
+  // This options is required by PlainTableReader. May need to move it
+  // to PlainTableOptions just like bloom_bits_per_key
+  uint32_t bloom_locality;
+
+  bool level_compaction_dynamic_level_bytes;
+
+  bool level_compaction_dynamic_file_size;
+
+  int num_levels;
+
+  bool optimize_filters_for_hits;
+
+  bool force_consistency_checks;
+
+  uint64_t preclude_last_level_data_seconds;
+
+  uint64_t preserve_internal_time_seconds;
+
+  std::shared_ptr<const SliceTransform>
+      memtable_insert_with_hint_prefix_extractor;
+
+  std::vector<DbPath> cf_paths;
+
+  std::shared_ptr<ConcurrentTaskLimiter> compaction_thread_limiter;
+
+  std::shared_ptr<SstPartitionerFactory> sst_partitioner_factory;
+
+  std::shared_ptr<Cache> blob_cache;
+
+  bool persist_user_defined_timestamps;
+};
+
+struct ImmutableOptions : public ImmutableDBOptions, public ImmutableCFOptions {
+  explicit ImmutableOptions();
+  explicit ImmutableOptions(const Options& options);
+
+  ImmutableOptions(const DBOptions& db_options,
+                   const ColumnFamilyOptions& cf_options);
+
+  ImmutableOptions(const ImmutableDBOptions& db_options,
+                   const ImmutableCFOptions& cf_options);
+
+  ImmutableOptions(const DBOptions& db_options,
+                   const ImmutableCFOptions& cf_options);
+
+  ImmutableOptions(const ImmutableDBOptions& db_options,
+                   const ColumnFamilyOptions& cf_options);
+};
+
+struct MutableCFOptions {
+  static const char* kName() { return "MutableCFOptions"; }
+  explicit MutableCFOptions(const ColumnFamilyOptions& options)
+      : write_buffer_size(options.write_buffer_size),
+        max_write_buffer_number(options.max_write_buffer_number),
+        arena_block_size(options.arena_block_size),
+        memtable_prefix_bloom_size_ratio(
+            options.memtable_prefix_bloom_size_ratio),
+        memtable_whole_key_filtering(options.memtable_whole_key_filtering),
+        memtable_huge_page_size(options.memtable_huge_page_size),
+        max_successive_merges(options.max_successive_merges),
+        inplace_update_num_locks(options.inplace_update_num_locks),
+        prefix_extractor(options.prefix_extractor),
+        experimental_mempurge_threshold(
+            options.experimental_mempurge_threshold),
+        disable_auto_compactions(options.disable_auto_compactions),
+        soft_pending_compaction_bytes_limit(
+            options.soft_pending_compaction_bytes_limit),
+        hard_pending_compaction_bytes_limit(
+            options.hard_pending_compaction_bytes_limit),
+        level0_file_num_compaction_trigger(
+            options.level0_file_num_compaction_trigger),
+        level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger),
+        level0_stop_writes_trigger(options.level0_stop_writes_trigger),
+        max_compaction_bytes(options.max_compaction_bytes),
+        ignore_max_compaction_bytes_for_input(
+            options.ignore_max_compaction_bytes_for_input),
+        target_file_size_base(options.target_file_size_base),
+        target_file_size_multiplier(options.target_file_size_multiplier),
+        max_bytes_for_level_base(options.max_bytes_for_level_base),
+        max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier),
+        ttl(options.ttl),
+        periodic_compaction_seconds(options.periodic_compaction_seconds),
+        max_bytes_for_level_multiplier_additional(
+            options.max_bytes_for_level_multiplier_additional),
+        compaction_options_fifo(options.compaction_options_fifo),
+        compaction_options_universal(options.compaction_options_universal),
+        enable_blob_files(options.enable_blob_files),
+        min_blob_size(options.min_blob_size),
+        blob_file_size(options.blob_file_size),
+        blob_compression_type(options.blob_compression_type),
+        enable_blob_garbage_collection(options.enable_blob_garbage_collection),
+        blob_garbage_collection_age_cutoff(
+            options.blob_garbage_collection_age_cutoff),
+        blob_garbage_collection_force_threshold(
+            options.blob_garbage_collection_force_threshold),
+        blob_compaction_readahead_size(options.blob_compaction_readahead_size),
+        blob_file_starting_level(options.blob_file_starting_level),
+        prepopulate_blob_cache(options.prepopulate_blob_cache),
+        max_sequential_skip_in_iterations(
+            options.max_sequential_skip_in_iterations),
+        check_flush_compaction_key_order(
+            options.check_flush_compaction_key_order),
+        paranoid_file_checks(options.paranoid_file_checks),
+        report_bg_io_stats(options.report_bg_io_stats),
+        compression(options.compression),
+        bottommost_compression(options.bottommost_compression),
+        compression_opts(options.compression_opts),
+        bottommost_compression_opts(options.bottommost_compression_opts),
+        last_level_temperature(options.last_level_temperature ==
+                                       Temperature::kUnknown
+                                   ? options.bottommost_temperature
+                                   : options.last_level_temperature),
+        memtable_protection_bytes_per_key(
+            options.memtable_protection_bytes_per_key),
+        block_protection_bytes_per_key(options.block_protection_bytes_per_key),
+        sample_for_compression(
+            options.sample_for_compression),  // TODO: is 0 fine here?
+        compression_per_level(options.compression_per_level) {
+    RefreshDerivedOptions(options.num_levels, options.compaction_style);
+  }
+
+  MutableCFOptions()
+      : write_buffer_size(0),
+        max_write_buffer_number(0),
+        arena_block_size(0),
+        memtable_prefix_bloom_size_ratio(0),
+        memtable_whole_key_filtering(false),
+        memtable_huge_page_size(0),
+        max_successive_merges(0),
+        inplace_update_num_locks(0),
+        prefix_extractor(nullptr),
+        experimental_mempurge_threshold(0.0),
+        disable_auto_compactions(false),
+        soft_pending_compaction_bytes_limit(0),
+        hard_pending_compaction_bytes_limit(0),
+        level0_file_num_compaction_trigger(0),
+        level0_slowdown_writes_trigger(0),
+        level0_stop_writes_trigger(0),
+        max_compaction_bytes(0),
+        ignore_max_compaction_bytes_for_input(true),
+        target_file_size_base(0),
+        target_file_size_multiplier(0),
+        max_bytes_for_level_base(0),
+        max_bytes_for_level_multiplier(0),
+        ttl(0),
+        periodic_compaction_seconds(0),
+        compaction_options_fifo(),
+        enable_blob_files(false),
+        min_blob_size(0),
+        blob_file_size(0),
+        blob_compression_type(kNoCompression),
+        enable_blob_garbage_collection(false),
+        blob_garbage_collection_age_cutoff(0.0),
+        blob_garbage_collection_force_threshold(0.0),
+        blob_compaction_readahead_size(0),
+        blob_file_starting_level(0),
+        prepopulate_blob_cache(PrepopulateBlobCache::kDisable),
+        max_sequential_skip_in_iterations(0),
+        check_flush_compaction_key_order(true),
+        paranoid_file_checks(false),
+        report_bg_io_stats(false),
+        compression(Snappy_Supported() ? kSnappyCompression : kNoCompression),
+        bottommost_compression(kDisableCompressionOption),
+        last_level_temperature(Temperature::kUnknown),
+        memtable_protection_bytes_per_key(0),
+        block_protection_bytes_per_key(0),
+        sample_for_compression(0) {}
+
+  explicit MutableCFOptions(const Options& options);
+
+  // Must be called after any change to MutableCFOptions
+  void RefreshDerivedOptions(int num_levels, CompactionStyle compaction_style);
+
+  void RefreshDerivedOptions(const ImmutableCFOptions& ioptions) {
+    RefreshDerivedOptions(ioptions.num_levels, ioptions.compaction_style);
+  }
+
+  int MaxBytesMultiplerAdditional(int level) const {
+    if (level >=
+        static_cast<int>(max_bytes_for_level_multiplier_additional.size())) {
+      return 1;
+    }
+    return max_bytes_for_level_multiplier_additional[level];
+  }
+
+  void Dump(Logger* log) const;
+
+  // Memtable related options
+  size_t write_buffer_size;
+  int max_write_buffer_number;
+  size_t arena_block_size;
+  double memtable_prefix_bloom_size_ratio;
+  bool memtable_whole_key_filtering;
+  size_t memtable_huge_page_size;
+  size_t max_successive_merges;
+  size_t inplace_update_num_locks;
+  std::shared_ptr<const SliceTransform> prefix_extractor;
+  // [experimental]
+  // Used to activate or deactive the Mempurge feature (memtable garbage
+  // collection). (deactivated by default). At every flush, the total useful
+  // payload (total entries minus garbage entries) is estimated as a ratio
+  // [useful payload bytes]/[size of a memtable (in bytes)]. This ratio is then
+  // compared to this `threshold` value:
+  //     - if ratio<threshold: the flush is replaced by a mempurge operation
+  //     - else: a regular flush operation takes place.
+  // Threshold values:
+  //   0.0: mempurge deactivated (default).
+  //   1.0: recommended threshold value.
+  //   >1.0 : aggressive mempurge.
+  //   0 < threshold < 1.0: mempurge triggered only for very low useful payload
+  //   ratios.
+  // [experimental]
+  double experimental_mempurge_threshold;
+
+  // Compaction related options
+  bool disable_auto_compactions;
+  uint64_t soft_pending_compaction_bytes_limit;
+  uint64_t hard_pending_compaction_bytes_limit;
+  int level0_file_num_compaction_trigger;
+  int level0_slowdown_writes_trigger;
+  int level0_stop_writes_trigger;
+  uint64_t max_compaction_bytes;
+  bool ignore_max_compaction_bytes_for_input;
+  uint64_t target_file_size_base;
+  int target_file_size_multiplier;
+  uint64_t max_bytes_for_level_base;
+  double max_bytes_for_level_multiplier;
+  uint64_t ttl;
+  uint64_t periodic_compaction_seconds;
+  std::vector<int> max_bytes_for_level_multiplier_additional;
+  CompactionOptionsFIFO compaction_options_fifo;
+  CompactionOptionsUniversal compaction_options_universal;
+
+  // Blob file related options
+  bool enable_blob_files;
+  uint64_t min_blob_size;
+  uint64_t blob_file_size;
+  CompressionType blob_compression_type;
+  bool enable_blob_garbage_collection;
+  double blob_garbage_collection_age_cutoff;
+  double blob_garbage_collection_force_threshold;
+  uint64_t blob_compaction_readahead_size;
+  int blob_file_starting_level;
+  PrepopulateBlobCache prepopulate_blob_cache;
+
+  // Misc options
+  uint64_t max_sequential_skip_in_iterations;
+  bool check_flush_compaction_key_order;
+  bool paranoid_file_checks;
+  bool report_bg_io_stats;
+  CompressionType compression;
+  CompressionType bottommost_compression;
+  CompressionOptions compression_opts;
+  CompressionOptions bottommost_compression_opts;
+  Temperature last_level_temperature;
+  uint32_t memtable_protection_bytes_per_key;
+  uint8_t block_protection_bytes_per_key;
+
+  uint64_t sample_for_compression;
+  std::vector<CompressionType> compression_per_level;
+
+  // Derived options
+  // Per-level target file size.
+  std::vector<uint64_t> max_file_size;
+};
+
+uint64_t MultiplyCheckOverflow(uint64_t op1, double op2);
+
+// Get the max file size in a given level.
+uint64_t MaxFileSizeForLevel(const MutableCFOptions& cf_options,
+    int level, CompactionStyle compaction_style, int base_level = 1,
+    bool level_compaction_dynamic_level_bytes = false);
+
+// Get the max size of an L0 file for which we will pin its meta-blocks when
+// `pin_l0_filter_and_index_blocks_in_cache` is set.
+size_t MaxFileSizeForL0MetaPin(const MutableCFOptions& cf_options);
+
+Status GetStringFromMutableCFOptions(const ConfigOptions& config_options,
+                                     const MutableCFOptions& mutable_opts,
+                                     std::string* opt_string);
+
+Status GetMutableOptionsFromStrings(
+    const MutableCFOptions& base_options,
+    const std::unordered_map<std::string, std::string>& options_map,
+    Logger* info_log, MutableCFOptions* new_options);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/configurable.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/configurable.cc
new file mode 100644
index 0000000000..5491336e0a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/configurable.cc
@@ -0,0 +1,712 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/configurable.h"
+
+#include "logging/logging.h"
+#include "options/configurable_helper.h"
+#include "options/options_helper.h"
+#include "rocksdb/customizable.h"
+#include "rocksdb/status.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/coding.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void Configurable::RegisterOptions(
+    const std::string& name, void* opt_ptr,
+    const std::unordered_map<std::string, OptionTypeInfo>* type_map) {
+  RegisteredOptions opts;
+  opts.name = name;
+  opts.type_map = type_map;
+  opts.opt_ptr = opt_ptr;
+  options_.emplace_back(opts);
+}
+
+//*************************************************************************
+//
+//       Methods for Initializing and Validating Configurable Objects
+//
+//*************************************************************************
+
+Status Configurable::PrepareOptions(const ConfigOptions& opts) {
+  // We ignore the invoke_prepare_options here intentionally,
+  // as if you are here, you must have called PrepareOptions explicitly.
+  Status status = Status::OK();
+  for (auto opt_iter : options_) {
+    if (opt_iter.type_map != nullptr) {
+      for (auto map_iter : *(opt_iter.type_map)) {
+        auto& opt_info = map_iter.second;
+        if (opt_info.ShouldPrepare()) {
+          status = opt_info.Prepare(opts, map_iter.first, opt_iter.opt_ptr);
+          if (!status.ok()) {
+            return status;
+          }
+        }
+      }
+    }
+  }
+  return status;
+}
+
+Status Configurable::ValidateOptions(const DBOptions& db_opts,
+                                     const ColumnFamilyOptions& cf_opts) const {
+  Status status;
+  for (auto opt_iter : options_) {
+    if (opt_iter.type_map != nullptr) {
+      for (auto map_iter : *(opt_iter.type_map)) {
+        auto& opt_info = map_iter.second;
+        if (opt_info.ShouldValidate()) {
+          status = opt_info.Validate(db_opts, cf_opts, map_iter.first,
+                                     opt_iter.opt_ptr);
+          if (!status.ok()) {
+            return status;
+          }
+        }
+      }
+    }
+  }
+  return status;
+}
+
+/*********************************************************************************/
+/*                                                                               */
+/*       Methods for Retrieving Options from Configurables */
+/*                                                                               */
+/*********************************************************************************/
+
+const void* Configurable::GetOptionsPtr(const std::string& name) const {
+  for (auto o : options_) {
+    if (o.name == name) {
+      return o.opt_ptr;
+    }
+  }
+  return nullptr;
+}
+
+std::string Configurable::GetOptionName(const std::string& opt_name) const {
+  return opt_name;
+}
+
+const OptionTypeInfo* ConfigurableHelper::FindOption(
+    const std::vector<Configurable::RegisteredOptions>& options,
+    const std::string& short_name, std::string* opt_name, void** opt_ptr) {
+  for (auto iter : options) {
+    if (iter.type_map != nullptr) {
+      const auto opt_info =
+          OptionTypeInfo::Find(short_name, *(iter.type_map), opt_name);
+      if (opt_info != nullptr) {
+        *opt_ptr = iter.opt_ptr;
+        return opt_info;
+      }
+    }
+  }
+  return nullptr;
+}
+
+//*************************************************************************
+//
+//       Methods for Configuring Options from Strings/Name-Value Pairs/Maps
+//
+//*************************************************************************
+
+Status Configurable::ConfigureFromMap(
+    const ConfigOptions& config_options,
+    const std::unordered_map<std::string, std::string>& opts_map) {
+  Status s = ConfigureFromMap(config_options, opts_map, nullptr);
+  return s;
+}
+
+Status Configurable::ConfigureFromMap(
+    const ConfigOptions& config_options,
+    const std::unordered_map<std::string, std::string>& opts_map,
+    std::unordered_map<std::string, std::string>* unused) {
+  return ConfigureOptions(config_options, opts_map, unused);
+}
+
+Status Configurable::ConfigureOptions(
+    const ConfigOptions& config_options,
+    const std::unordered_map<std::string, std::string>& opts_map,
+    std::unordered_map<std::string, std::string>* unused) {
+  std::string curr_opts;
+  Status s;
+  if (!opts_map.empty()) {
+    // There are options in the map.
+    // Save the current configuration in curr_opts and then configure the
+    // options, but do not prepare them now.  We will do all the prepare when
+    // the configuration is complete.
+    ConfigOptions copy = config_options;
+    copy.invoke_prepare_options = false;
+    if (!config_options.ignore_unknown_options) {
+      // If we are not ignoring unused, get the defaults in case we need to
+      // reset
+      copy.depth = ConfigOptions::kDepthDetailed;
+      copy.delimiter = "; ";
+      GetOptionString(copy, &curr_opts).PermitUncheckedError();
+    }
+
+    s = ConfigurableHelper::ConfigureOptions(copy, *this, opts_map, unused);
+  }
+  if (config_options.invoke_prepare_options && s.ok()) {
+    s = PrepareOptions(config_options);
+  }
+  if (!s.ok() && !curr_opts.empty()) {
+    ConfigOptions reset = config_options;
+    reset.ignore_unknown_options = true;
+    reset.invoke_prepare_options = true;
+    reset.ignore_unsupported_options = true;
+    // There are some options to reset from this current error
+    ConfigureFromString(reset, curr_opts).PermitUncheckedError();
+  }
+  return s;
+}
+
+Status Configurable::ParseStringOptions(const ConfigOptions& /*config_options*/,
+                                        const std::string& /*opts_str*/) {
+  return Status::OK();
+}
+
+Status Configurable::ConfigureFromString(const ConfigOptions& config_options,
+                                         const std::string& opts_str) {
+  Status s;
+  if (!opts_str.empty()) {
+    if (opts_str.find(';') != std::string::npos ||
+        opts_str.find('=') != std::string::npos) {
+      std::unordered_map<std::string, std::string> opt_map;
+      s = StringToMap(opts_str, &opt_map);
+      if (s.ok()) {
+        s = ConfigureFromMap(config_options, opt_map, nullptr);
+      }
+    } else {
+      s = ParseStringOptions(config_options, opts_str);
+      if (s.ok() && config_options.invoke_prepare_options) {
+        s = PrepareOptions(config_options);
+      }
+    }
+  } else if (config_options.invoke_prepare_options) {
+    s = PrepareOptions(config_options);
+  } else {
+    s = Status::OK();
+  }
+  return s;
+}
+
+/**
+ * Sets the value of the named property to the input value, returning OK on
+ * succcess.
+ */
+Status Configurable::ConfigureOption(const ConfigOptions& config_options,
+                                     const std::string& name,
+                                     const std::string& value) {
+  return ConfigurableHelper::ConfigureSingleOption(config_options, *this, name,
+                                                   value);
+}
+
+/**
+ * Looks for the named option amongst the options for this type and sets
+ * the value for it to be the input value.
+ * If the name was found, found_option will be set to true and the resulting
+ * status should be returned.
+ */
+
+Status Configurable::ParseOption(const ConfigOptions& config_options,
+                                 const OptionTypeInfo& opt_info,
+                                 const std::string& opt_name,
+                                 const std::string& opt_value, void* opt_ptr) {
+  if (opt_info.IsMutable()) {
+    if (config_options.mutable_options_only) {
+      // This option is mutable. Treat all of its children as mutable as well
+      ConfigOptions copy = config_options;
+      copy.mutable_options_only = false;
+      return opt_info.Parse(copy, opt_name, opt_value, opt_ptr);
+    } else {
+      return opt_info.Parse(config_options, opt_name, opt_value, opt_ptr);
+    }
+  } else if (config_options.mutable_options_only) {
+    return Status::InvalidArgument("Option not changeable: " + opt_name);
+  } else {
+    return opt_info.Parse(config_options, opt_name, opt_value, opt_ptr);
+  }
+}
+
+
+Status ConfigurableHelper::ConfigureOptions(
+    const ConfigOptions& config_options, Configurable& configurable,
+    const std::unordered_map<std::string, std::string>& opts_map,
+    std::unordered_map<std::string, std::string>* unused) {
+  std::unordered_map<std::string, std::string> remaining = opts_map;
+  Status s = Status::OK();
+  if (!opts_map.empty()) {
+    for (const auto& iter : configurable.options_) {
+      if (iter.type_map != nullptr) {
+        s = ConfigureSomeOptions(config_options, configurable, *(iter.type_map),
+                                 &remaining, iter.opt_ptr);
+        if (remaining.empty()) {  // Are there more options left?
+          break;
+        } else if (!s.ok()) {
+          break;
+        }
+      }
+    }
+  }
+  if (unused != nullptr && !remaining.empty()) {
+    unused->insert(remaining.begin(), remaining.end());
+  }
+  if (config_options.ignore_unknown_options) {
+    s = Status::OK();
+  } else if (s.ok() && unused == nullptr && !remaining.empty()) {
+    s = Status::NotFound("Could not find option: ", remaining.begin()->first);
+  }
+  return s;
+}
+
+/**
+ * Updates the object with the named-value property values, returning OK on
+ * succcess. Any properties that were found are removed from the options list;
+ * upon return only options that were not found in this opt_map remain.
+
+ * Returns:
+ * -  OK if ignore_unknown_options is set
+ * - InvalidArgument, if any option was invalid
+ * - NotSupported, if any option is unsupported and ignore_unsupported_options
+ is OFF
+ * - OK, if no option was invalid or not supported (or ignored)
+ */
+Status ConfigurableHelper::ConfigureSomeOptions(
+    const ConfigOptions& config_options, Configurable& configurable,
+    const std::unordered_map<std::string, OptionTypeInfo>& type_map,
+    std::unordered_map<std::string, std::string>* options, void* opt_ptr) {
+  Status result = Status::OK();  // The last non-OK result (if any)
+  Status notsup = Status::OK();  // The last NotSupported result (if any)
+  std::string elem_name;
+  int found = 1;
+  std::unordered_set<std::string> unsupported;
+  // While there are unused properties and we processed at least one,
+  // go through the remaining unused properties and attempt to configure them.
+  while (found > 0 && !options->empty()) {
+    found = 0;
+    notsup = Status::OK();
+    for (auto it = options->begin(); it != options->end();) {
+      const std::string& opt_name = configurable.GetOptionName(it->first);
+      const std::string& opt_value = it->second;
+      const auto opt_info =
+          OptionTypeInfo::Find(opt_name, type_map, &elem_name);
+      if (opt_info == nullptr) {  // Did not find the option.  Skip it
+        ++it;
+      } else {
+        Status s = ConfigureOption(config_options, configurable, *opt_info,
+                                   opt_name, elem_name, opt_value, opt_ptr);
+        if (s.IsNotFound()) {
+          ++it;
+        } else if (s.IsNotSupported()) {
+          notsup = s;
+          unsupported.insert(it->first);
+          ++it;  // Skip it for now
+        } else {
+          found++;
+          it = options->erase(it);
+          if (!s.ok()) {
+            result = s;
+          }
+        }
+      }
+    }  // End for all remaining options
+  }    // End while found one or options remain
+
+  // Now that we have been through the list, remove any unsupported
+  for (auto u : unsupported) {
+    auto it = options->find(u);
+    if (it != options->end()) {
+      options->erase(it);
+    }
+  }
+  if (config_options.ignore_unknown_options) {
+    if (!result.ok()) result.PermitUncheckedError();
+    if (!notsup.ok()) notsup.PermitUncheckedError();
+    return Status::OK();
+  } else if (!result.ok()) {
+    if (!notsup.ok()) notsup.PermitUncheckedError();
+    return result;
+  } else if (config_options.ignore_unsupported_options) {
+    if (!notsup.ok()) notsup.PermitUncheckedError();
+    return Status::OK();
+  } else {
+    return notsup;
+  }
+}
+
+Status ConfigurableHelper::ConfigureSingleOption(
+    const ConfigOptions& config_options, Configurable& configurable,
+    const std::string& name, const std::string& value) {
+  const std::string& opt_name = configurable.GetOptionName(name);
+  std::string elem_name;
+  void* opt_ptr = nullptr;
+  const auto opt_info =
+      FindOption(configurable.options_, opt_name, &elem_name, &opt_ptr);
+  if (opt_info == nullptr) {
+    return Status::NotFound("Could not find option: ", name);
+  } else {
+    return ConfigureOption(config_options, configurable, *opt_info, opt_name,
+                           elem_name, value, opt_ptr);
+  }
+}
+Status ConfigurableHelper::ConfigureCustomizableOption(
+    const ConfigOptions& config_options, Configurable& configurable,
+    const OptionTypeInfo& opt_info, const std::string& opt_name,
+    const std::string& name, const std::string& value, void* opt_ptr) {
+  Customizable* custom = opt_info.AsRawPointer<Customizable>(opt_ptr);
+  ConfigOptions copy = config_options;
+  if (opt_info.IsMutable()) {
+    // This option is mutable. Pass that property on to any subsequent calls
+    copy.mutable_options_only = false;
+  }
+
+  if (opt_info.IsMutable() || !config_options.mutable_options_only) {
+    // Either the option is mutable, or we are processing all of the options
+    if (opt_name == name || name == OptionTypeInfo::kIdPropName() ||
+        EndsWith(opt_name, OptionTypeInfo::kIdPropSuffix())) {
+      return configurable.ParseOption(copy, opt_info, name, value, opt_ptr);
+    } else if (value.empty()) {
+      return Status::OK();
+    } else if (custom == nullptr || !StartsWith(name, custom->GetId() + ".")) {
+      return configurable.ParseOption(copy, opt_info, name, value, opt_ptr);
+    } else if (value.find("=") != std::string::npos) {
+      return custom->ConfigureFromString(copy, value);
+    } else {
+      return custom->ConfigureOption(copy, name, value);
+    }
+  } else {
+    // We are processing immutable options, which means that we cannot change
+    // the Customizable object itself, but could change its mutable properties.
+    // Check to make sure that nothing is trying to change the Customizable
+    if (custom == nullptr) {
+      // We do not have a Customizable to configure.  This is OK if the
+      // value is empty (nothing being configured) but an error otherwise
+      if (value.empty()) {
+        return Status::OK();
+      } else {
+        return Status::InvalidArgument("Option not changeable: " + opt_name);
+      }
+    } else if (EndsWith(opt_name, OptionTypeInfo::kIdPropSuffix()) ||
+               name == OptionTypeInfo::kIdPropName()) {
+      // We have a property of the form "id=value" or "table.id=value"
+      // This is OK if we ID/value matches the current customizable object
+      if (custom->GetId() == value) {
+        return Status::OK();
+      } else {
+        return Status::InvalidArgument("Option not changeable: " + opt_name);
+      }
+    } else if (opt_name == name) {
+      // The properties are of one of forms:
+      //    name = { id = id; prop1 = value1; ... }
+      //    name = { prop1=value1; prop2=value2; ... }
+      //    name = ID
+      // Convert the value to a map and extract the ID
+      // If the ID does not match that of the current customizable, return an
+      // error. Otherwise, update the current customizable via the properties
+      // map
+      std::unordered_map<std::string, std::string> props;
+      std::string id;
+      Status s =
+          Configurable::GetOptionsMap(value, custom->GetId(), &id, &props);
+      if (!s.ok()) {
+        return s;
+      } else if (custom->GetId() != id) {
+        return Status::InvalidArgument("Option not changeable: " + opt_name);
+      } else if (props.empty()) {
+        return Status::OK();
+      } else {
+        return custom->ConfigureFromMap(copy, props);
+      }
+    } else {
+      // Attempting to configure one of the properties of the customizable
+      // Let it through
+      return custom->ConfigureOption(copy, name, value);
+    }
+  }
+}
+
+Status ConfigurableHelper::ConfigureOption(
+    const ConfigOptions& config_options, Configurable& configurable,
+    const OptionTypeInfo& opt_info, const std::string& opt_name,
+    const std::string& name, const std::string& value, void* opt_ptr) {
+  if (opt_info.IsCustomizable()) {
+    return ConfigureCustomizableOption(config_options, configurable, opt_info,
+                                       opt_name, name, value, opt_ptr);
+  } else if (opt_name == name) {
+    return configurable.ParseOption(config_options, opt_info, opt_name, value,
+                                    opt_ptr);
+  } else if (opt_info.IsStruct() || opt_info.IsConfigurable()) {
+    return configurable.ParseOption(config_options, opt_info, name, value,
+                                    opt_ptr);
+  } else {
+    return Status::NotFound("Could not find option: ", name);
+  }
+}
+
+//*******************************************************************************
+//
+//       Methods for Converting Options into strings
+//
+//*******************************************************************************
+
+Status Configurable::GetOptionString(const ConfigOptions& config_options,
+                                     std::string* result) const {
+  assert(result);
+  result->clear();
+  return ConfigurableHelper::SerializeOptions(config_options, *this, "",
+                                              result);
+}
+
+std::string Configurable::ToString(const ConfigOptions& config_options,
+                                   const std::string& prefix) const {
+  std::string result = SerializeOptions(config_options, prefix);
+  if (result.empty() || result.find('=') == std::string::npos) {
+    return result;
+  } else {
+    return "{" + result + "}";
+  }
+}
+
+std::string Configurable::SerializeOptions(const ConfigOptions& config_options,
+                                           const std::string& header) const {
+  std::string result;
+  Status s = ConfigurableHelper::SerializeOptions(config_options, *this, header,
+                                                  &result);
+  assert(s.ok());
+  return result;
+}
+
+Status Configurable::GetOption(const ConfigOptions& config_options,
+                               const std::string& name,
+                               std::string* value) const {
+  return ConfigurableHelper::GetOption(config_options, *this,
+                                       GetOptionName(name), value);
+}
+
+Status ConfigurableHelper::GetOption(const ConfigOptions& config_options,
+                                     const Configurable& configurable,
+                                     const std::string& short_name,
+                                     std::string* value) {
+  // Look for option directly
+  assert(value);
+  value->clear();
+
+  std::string opt_name;
+  void* opt_ptr = nullptr;
+  const auto opt_info =
+      FindOption(configurable.options_, short_name, &opt_name, &opt_ptr);
+  if (opt_info != nullptr) {
+    ConfigOptions embedded = config_options;
+    embedded.delimiter = ";";
+    if (short_name == opt_name) {
+      return opt_info->Serialize(embedded, opt_name, opt_ptr, value);
+    } else if (opt_info->IsStruct()) {
+      return opt_info->Serialize(embedded, opt_name, opt_ptr, value);
+    } else if (opt_info->IsConfigurable()) {
+      auto const* config = opt_info->AsRawPointer<Configurable>(opt_ptr);
+      if (config != nullptr) {
+        return config->GetOption(embedded, opt_name, value);
+      }
+    }
+  }
+  return Status::NotFound("Cannot find option: ", short_name);
+}
+
+Status ConfigurableHelper::SerializeOptions(const ConfigOptions& config_options,
+                                            const Configurable& configurable,
+                                            const std::string& prefix,
+                                            std::string* result) {
+  assert(result);
+  for (auto const& opt_iter : configurable.options_) {
+    if (opt_iter.type_map != nullptr) {
+      for (const auto& map_iter : *(opt_iter.type_map)) {
+        const auto& opt_name = map_iter.first;
+        const auto& opt_info = map_iter.second;
+        if (opt_info.ShouldSerialize()) {
+          std::string value;
+          Status s;
+          if (!config_options.mutable_options_only) {
+            s = opt_info.Serialize(config_options, prefix + opt_name,
+                                   opt_iter.opt_ptr, &value);
+          } else if (opt_info.IsMutable()) {
+            ConfigOptions copy = config_options;
+            copy.mutable_options_only = false;
+            s = opt_info.Serialize(copy, prefix + opt_name, opt_iter.opt_ptr,
+                                   &value);
+          } else if (opt_info.IsConfigurable()) {
+            // If it is a Configurable and we are either printing all of the
+            // details or not printing only the name, this option should be
+            // included in the list
+            if (config_options.IsDetailed() ||
+                !opt_info.IsEnabled(OptionTypeFlags::kStringNameOnly)) {
+              s = opt_info.Serialize(config_options, prefix + opt_name,
+                                     opt_iter.opt_ptr, &value);
+            }
+          }
+          if (!s.ok()) {
+            return s;
+          } else if (!value.empty()) {
+            // <prefix><opt_name>=<value><delimiter>
+            result->append(prefix + opt_name + "=" + value +
+                           config_options.delimiter);
+          }
+        }
+      }
+    }
+  }
+  return Status::OK();
+}
+
+//********************************************************************************
+//
+// Methods for listing the options from Configurables
+//
+//********************************************************************************
+Status Configurable::GetOptionNames(
+    const ConfigOptions& config_options,
+    std::unordered_set<std::string>* result) const {
+  assert(result);
+  return ConfigurableHelper::ListOptions(config_options, *this, "", result);
+}
+
+Status ConfigurableHelper::ListOptions(
+    const ConfigOptions& config_options, const Configurable& configurable,
+    const std::string& prefix, std::unordered_set<std::string>* result) {
+  Status status;
+  for (auto const& opt_iter : configurable.options_) {
+    if (opt_iter.type_map != nullptr) {
+      for (const auto& map_iter : *(opt_iter.type_map)) {
+        const auto& opt_name = map_iter.first;
+        const auto& opt_info = map_iter.second;
+        // If the option is no longer used in rocksdb and marked as deprecated,
+        // we skip it in the serialization.
+        if (!opt_info.IsDeprecated() && !opt_info.IsAlias()) {
+          if (!config_options.mutable_options_only) {
+            result->emplace(prefix + opt_name);
+          } else if (opt_info.IsMutable()) {
+            result->emplace(prefix + opt_name);
+          }
+        }
+      }
+    }
+  }
+  return status;
+}
+
+//*******************************************************************************
+//
+//       Methods for Comparing Configurables
+//
+//*******************************************************************************
+
+bool Configurable::AreEquivalent(const ConfigOptions& config_options,
+                                 const Configurable* other,
+                                 std::string* name) const {
+  assert(name);
+  name->clear();
+  if (this == other || config_options.IsCheckDisabled()) {
+    return true;
+  } else if (other != nullptr) {
+    return ConfigurableHelper::AreEquivalent(config_options, *this, *other,
+                                             name);
+  } else {
+    return false;
+  }
+}
+
+bool Configurable::OptionsAreEqual(const ConfigOptions& config_options,
+                                   const OptionTypeInfo& opt_info,
+                                   const std::string& opt_name,
+                                   const void* const this_ptr,
+                                   const void* const that_ptr,
+                                   std::string* mismatch) const {
+  if (opt_info.AreEqual(config_options, opt_name, this_ptr, that_ptr,
+                        mismatch)) {
+    return true;
+  } else if (opt_info.AreEqualByName(config_options, opt_name, this_ptr,
+                                     that_ptr)) {
+    *mismatch = "";
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool ConfigurableHelper::AreEquivalent(const ConfigOptions& config_options,
+                                       const Configurable& this_one,
+                                       const Configurable& that_one,
+                                       std::string* mismatch) {
+  assert(mismatch != nullptr);
+  for (auto const& o : this_one.options_) {
+    const auto this_offset = this_one.GetOptionsPtr(o.name);
+    const auto that_offset = that_one.GetOptionsPtr(o.name);
+    if (this_offset != that_offset) {
+      if (this_offset == nullptr || that_offset == nullptr) {
+        return false;
+      } else if (o.type_map != nullptr) {
+        for (const auto& map_iter : *(o.type_map)) {
+          const auto& opt_info = map_iter.second;
+          if (config_options.IsCheckEnabled(opt_info.GetSanityLevel())) {
+            if (!config_options.mutable_options_only) {
+              if (!this_one.OptionsAreEqual(config_options, opt_info,
+                                            map_iter.first, this_offset,
+                                            that_offset, mismatch)) {
+                return false;
+              }
+            } else if (opt_info.IsMutable()) {
+              ConfigOptions copy = config_options;
+              copy.mutable_options_only = false;
+              if (!this_one.OptionsAreEqual(copy, opt_info, map_iter.first,
+                                            this_offset, that_offset,
+                                            mismatch)) {
+                return false;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+
+Status Configurable::GetOptionsMap(
+    const std::string& value, const std::string& default_id, std::string* id,
+    std::unordered_map<std::string, std::string>* props) {
+  assert(id);
+  assert(props);
+  Status status;
+  if (value.empty() || value == kNullptrString) {
+    *id = default_id;
+  } else if (value.find('=') == std::string::npos) {
+    *id = value;
+  } else {
+    status = StringToMap(value, props);
+    if (!status.ok()) {       // There was an error creating the map.
+      *id = value;            // Treat the value as id
+      props->clear();         // Clear the properties
+      status = Status::OK();  // and ignore the error
+    } else {
+      auto iter = props->find(OptionTypeInfo::kIdPropName());
+      if (iter != props->end()) {
+        *id = iter->second;
+        props->erase(iter);
+        if (*id == kNullptrString) {
+          id->clear();
+        }
+      } else if (!default_id.empty()) {
+        *id = default_id;
+      } else {           // No id property and no default
+        *id = value;     // Treat the value as id
+        props->clear();  // Clear the properties
+      }
+    }
+  }
+  return status;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/configurable_helper.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/configurable_helper.h
new file mode 100644
index 0000000000..5d409f82a4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/configurable_helper.h
@@ -0,0 +1,185 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "rocksdb/configurable.h"
+#include "rocksdb/convenience.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Helper class defining static methods for supporting the Configurable
+// class.  The purpose of this class is to keep the Configurable class
+// as tight as possible and provide methods for doing the actual work
+// of configuring the objects.
+class ConfigurableHelper {
+ public:
+  // Configures the input Configurable object based on the parameters.
+  // On successful completion, the Configurable is updated with the settings
+  // from the opt_map.
+  //
+  // The acceptable values of the name/value pairs are documented with the
+  // specific class/instance.
+  //
+  // @param config_options Controls how the arguments are processed.
+  // @param opt_map Name/value pairs of the options to update
+  // @param unused If specified, this value will return the name/value
+  //      pairs from opt_map that were NotFound for this object.
+  // @return OK If all values in the map were successfully updated
+  // @return NotFound If any of the names in the opt_map were not valid
+  //      for this object.  If unused is specified, it will contain the
+  //      collection of NotFound entries
+  // @return NotSupported  If any of the names are valid but the object does
+  //       not know how to convert the value.  This can happen if, for example,
+  //       there is some nested Configurable that cannot be created.
+  // @return InvalidArgument If any of the values cannot be successfully
+  //       parsed.  This can also be returned if PrepareOptions encounters an
+  //       error.
+  static Status ConfigureOptions(
+      const ConfigOptions& config_options, Configurable& configurable,
+      const std::unordered_map<std::string, std::string>& options,
+      std::unordered_map<std::string, std::string>* unused);
+
+  // Internal method to configure a set of options for this object.
+  // Classes may override this value to change its behavior.
+  // @param config_options Controls how the options are being configured
+  // @param type_name The name that was registered for this set of options
+  // @param type_map The map of options for this name
+  // @param opt_ptr Pointer to the object being configured for this option set.
+  // @param options The option name/values being updated.  On return, any
+  //    option that was found is removed from the list.
+  // @return OK If all of the options were successfully updated.
+  // @return InvalidArgument If an option was found but the value could not
+  //       be updated.
+  // @return NotFound If an option name was not found in type_mape
+  // @return NotSupported If the option was found but no rule for converting
+  //       the value could be found.
+  static Status ConfigureSomeOptions(
+      const ConfigOptions& config_options, Configurable& configurable,
+      const std::unordered_map<std::string, OptionTypeInfo>& type_map,
+      std::unordered_map<std::string, std::string>* options, void* opt_ptr);
+
+  // Configures a single option in the input Configurable.
+  // This method will look through the set of option names for this
+  // Configurable searching for one with the input name.  If such an option
+  // is found, it will be configured via the input value.
+  //
+  // @param config_options Controls how the option is being configured
+  // @param configurable The object to configure
+  // @param name For options with sub-options (like Structs or
+  // Configurables),
+  //      this value may be the name of the sub-field of the option being
+  //      updated. For example, if the option is
+  //      "compaction_options_fifo.allow_compaction", then field name would be
+  //      "allow_compaction".  For most options, field_name and opt_name will be
+  //      equivalent.
+  // @param value The new value for this option.
+  // @param See ConfigureOptions for the possible return values
+  static Status ConfigureSingleOption(const ConfigOptions& config_options,
+                                      Configurable& configurable,
+                                      const std::string& name,
+                                      const std::string& value);
+
+  // Configures the option referenced by opt_info for this configurable
+  // This method configures the option based on opt_info for the input
+  // configurable.
+  // @param config_options Controls how the option is being configured
+  // @param configurable The object to configure
+  // @param opt_name The full option name
+  // @param name For options with sub-options (like Structs or
+  // Configurables),
+  //      this value may be the name of the sub-field of the option being
+  //      updated. For example, if the option is
+  //      "compaction_options_fifo.allow_compaction", then field name would be
+  //      "allow_compaction".  For most options, field_name and opt_name will be
+  //      equivalent.
+  // @param value The new value for this option.
+  // @param See ConfigureOptions for the possible return values
+  static Status ConfigureOption(const ConfigOptions& config_options,
+                                Configurable& configurable,
+                                const OptionTypeInfo& opt_info,
+                                const std::string& opt_name,
+                                const std::string& name,
+                                const std::string& value, void* opt_ptr);
+
+  // Returns the value of the option associated with the input name
+  // This method is the functional inverse of ConfigureOption
+  // @param config_options Controls how the value is returned
+  // @param configurable The object from which to get the option.
+  // @param name The name of the option to return a value for.
+  // @param value The returned value associated with the named option.
+  //              Note that value will be only the serialized version
+  //              of the option and not "name=value"
+  // @return OK If the named field was successfully updated to value.
+  // @return NotFound If the name is not valid for this object.
+  // @param InvalidArgument If the name is valid for this object but
+  //      its value cannot be serialized.
+  static Status GetOption(const ConfigOptions& config_options,
+                          const Configurable& configurable,
+                          const std::string& name, std::string* value);
+
+  // Serializes the input Configurable into the output result.
+  // This is the inverse of ConfigureOptions
+  // @param config_options Controls how serialization happens.
+  // @param configurable The object to serialize
+  // @param prefix A prefix to add to the each option as it is serialized.
+  // @param result The string representation of the configurable.
+  // @return OK If the options for this object wer successfully serialized.
+  // @return InvalidArgument If one or more of the options could not be
+  // serialized.
+  static Status SerializeOptions(const ConfigOptions& config_options,
+                                 const Configurable& configurable,
+                                 const std::string& prefix,
+                                 std::string* result);
+
+  // Internal method to list the option names for this object.
+  // Classes may override this value to change its behavior.
+  // @see ListOptions for more details
+  static Status ListOptions(const ConfigOptions& config_options,
+                            const Configurable& configurable,
+                            const std::string& prefix,
+                            std::unordered_set<std::string>* result);
+
+  // Checks to see if the two configurables are equivalent to one other.
+  // This method assumes that the two objects are of the same class.
+  // @param config_options Controls how the options are compared.
+  // @param this_one The object to compare to.
+  // @param that_one The other object being compared.
+  // @param mismatch If the objects do not match, this parameter contains
+  //      the name of the option that triggered the match failure.
+  // @param True if the objects match, false otherwise.
+  static bool AreEquivalent(const ConfigOptions& config_options,
+                            const Configurable& this_one,
+                            const Configurable& that_one,
+                            std::string* mismatch);
+
+ private:
+  // Looks for the option specified by name in the RegisteredOptions.
+  // This method traverses the types in the input options vector.  If an entry
+  // matching name is found, that entry, opt_name, and pointer are returned.
+  // @param options  The vector of options to search through
+  // @param name     The name of the option to search for in the OptionType map
+  // @param opt_name If the name was found, this value is set to the option name
+  //                 associated with the input name/type.
+  // @param opt_ptr  If the name was found, this value is set to the option
+  // pointer
+  //                 in the RegisteredOptions vector associated with this entry
+  // @return         A pointer to the OptionTypeInfo from the options if found,
+  //                 nullptr if the name was not found in the input options
+  static const OptionTypeInfo* FindOption(
+      const std::vector<Configurable::RegisteredOptions>& options,
+      const std::string& name, std::string* opt_name, void** opt_ptr);
+
+  static Status ConfigureCustomizableOption(
+      const ConfigOptions& config_options, Configurable& configurable,
+      const OptionTypeInfo& opt_info, const std::string& opt_name,
+      const std::string& name, const std::string& value, void* opt_ptr);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/configurable_test.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/configurable_test.h
new file mode 100644
index 0000000000..3d6fe84108
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/configurable_test.h
@@ -0,0 +1,116 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <algorithm>
+#include <memory>
+#include <unordered_map>
+
+#include "options/configurable_helper.h"
+#include "rocksdb/configurable.h"
+#include "rocksdb/utilities/options_type.h"
+
+namespace ROCKSDB_NAMESPACE {
+struct ColumnFamilyOptions;
+struct DBOptions;
+
+namespace test {
+enum TestEnum { kTestA, kTestB };
+
+static const std::unordered_map<std::string, int> test_enum_map = {
+    {"A", TestEnum::kTestA},
+    {"B", TestEnum::kTestB},
+};
+
+struct TestOptions {
+  int i = 0;
+  bool b = false;
+  bool d = true;
+  TestEnum e = TestEnum::kTestA;
+  std::string s = "";
+  std::string u = "";
+};
+
+static std::unordered_map<std::string, OptionTypeInfo> simple_option_info = {
+    {"int",
+     {offsetof(struct TestOptions, i), OptionType::kInt,
+      OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
+    {"bool",
+     {offsetof(struct TestOptions, b), OptionType::kBoolean,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+    {"string",
+     {offsetof(struct TestOptions, s), OptionType::kString,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+};
+
+static std::unordered_map<std::string, OptionTypeInfo> enum_option_info = {
+    {"enum",
+     OptionTypeInfo::Enum(offsetof(struct TestOptions, e), &test_enum_map)}
+};
+
+static std::unordered_map<std::string, OptionTypeInfo> unique_option_info = {
+    {"unique",
+     {0, OptionType::kConfigurable, OptionVerificationType::kNormal,
+      (OptionTypeFlags::kUnique | OptionTypeFlags::kMutable)}},
+};
+
+static std::unordered_map<std::string, OptionTypeInfo> shared_option_info = {
+    {"shared",
+     {0, OptionType::kConfigurable, OptionVerificationType::kNormal,
+      (OptionTypeFlags::kShared)}},
+};
+static std::unordered_map<std::string, OptionTypeInfo> pointer_option_info = {
+    {"pointer",
+     {0, OptionType::kConfigurable, OptionVerificationType::kNormal,
+      OptionTypeFlags::kRawPointer}},
+};
+
+enum TestConfigMode {
+  kEmptyMode = 0x0,            // Don't register anything
+  kMutableMode = 0x01,         // Configuration is mutable
+  kSimpleMode = 0x02,          // Use the simple options
+  kEnumMode = 0x04,            // Use the enum options
+  kDefaultMode = kSimpleMode,  // Use no inner nested configurations
+  kSharedMode = 0x10,          // Use shared configuration
+  kUniqueMode = 0x20,          // Use unique configuration
+  kRawPtrMode = 0x40,          // Use pointer configuration
+  kNestedMode = (kSharedMode | kUniqueMode | kRawPtrMode),
+  kAllOptMode = (kNestedMode | kEnumMode | kSimpleMode),
+};
+
+template <typename T>
+class TestConfigurable : public Configurable {
+ protected:
+  std::string name_;
+  std::string prefix_;
+  TestOptions options_;
+
+ public:
+  std::unique_ptr<T> unique_;
+  std::shared_ptr<T> shared_;
+  T* pointer_;
+
+  TestConfigurable(const std::string& name, int mode,
+                   const std::unordered_map<std::string, OptionTypeInfo>* map =
+                       &simple_option_info)
+      : name_(name), pointer_(nullptr) {
+    prefix_ = "test." + name + ".";
+    if ((mode & TestConfigMode::kSimpleMode) != 0) {
+      RegisterOptions(name_, &options_, map);
+    }
+    if ((mode & TestConfigMode::kEnumMode) != 0) {
+      RegisterOptions(name_ + "Enum", &options_, &enum_option_info);
+    }
+  }
+
+  ~TestConfigurable() override { delete pointer_; }
+};
+
+}  // namespace test
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/customizable.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/customizable.cc
new file mode 100644
index 0000000000..2f154d84c5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/customizable.cc
@@ -0,0 +1,133 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/customizable.h"
+
+#include <sstream>
+
+#include "options/options_helper.h"
+#include "port/port.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/status.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+std::string Customizable::GetOptionName(const std::string& long_name) const {
+  const std::string& name = Name();
+  size_t name_len = name.size();
+  if (long_name.size() > name_len + 1 &&
+      long_name.compare(0, name_len, name) == 0 &&
+      long_name.at(name_len) == '.') {
+    return long_name.substr(name_len + 1);
+  } else {
+    return Configurable::GetOptionName(long_name);
+  }
+}
+
+std::string Customizable::GenerateIndividualId() const {
+  std::ostringstream ostr;
+  ostr << Name() << "@" << static_cast<const void*>(this) << "#"
+       << port::GetProcessID();
+  return ostr.str();
+}
+
+Status Customizable::GetOption(const ConfigOptions& config_options,
+                               const std::string& opt_name,
+                               std::string* value) const {
+  if (opt_name == OptionTypeInfo::kIdPropName()) {
+    *value = GetId();
+    return Status::OK();
+  } else {
+    return Configurable::GetOption(config_options, opt_name, value);
+  }
+}
+
+std::string Customizable::SerializeOptions(const ConfigOptions& config_options,
+                                           const std::string& prefix) const {
+  std::string result;
+  std::string parent;
+  std::string id = GetId();
+  if (!config_options.IsShallow() && !id.empty()) {
+    parent = Configurable::SerializeOptions(config_options, "");
+  }
+  if (parent.empty()) {
+    result = id;
+  } else {
+    result.append(prefix);
+    result.append(OptionTypeInfo::kIdPropName());
+    result.append("=");
+    result.append(id);
+    result.append(config_options.delimiter);
+    result.append(parent);
+  }
+  return result;
+}
+
+
+bool Customizable::AreEquivalent(const ConfigOptions& config_options,
+                                 const Configurable* other,
+                                 std::string* mismatch) const {
+  if (config_options.sanity_level > ConfigOptions::kSanityLevelNone &&
+      this != other) {
+    const Customizable* custom = reinterpret_cast<const Customizable*>(other);
+    if (custom == nullptr) {  // Cast failed
+      return false;
+    } else if (GetId() != custom->GetId()) {
+      *mismatch = OptionTypeInfo::kIdPropName();
+      return false;
+    } else if (config_options.sanity_level >
+               ConfigOptions::kSanityLevelLooselyCompatible) {
+      bool matches =
+          Configurable::AreEquivalent(config_options, other, mismatch);
+      return matches;
+    }
+  }
+  return true;
+}
+
+Status Customizable::GetOptionsMap(
+    const ConfigOptions& config_options, const Customizable* customizable,
+    const std::string& value, std::string* id,
+    std::unordered_map<std::string, std::string>* props) {
+  Status status;
+  if (value.empty() || value == kNullptrString) {
+    *id = "";
+    props->clear();
+  } else if (customizable != nullptr) {
+    status =
+        Configurable::GetOptionsMap(value, customizable->GetId(), id, props);
+    if (status.ok() && customizable->IsInstanceOf(*id)) {
+      // The new ID and the old ID match, so the objects are the same type.
+      // Try to get the existing options, ignoring any errors
+      ConfigOptions embedded = config_options;
+      embedded.delimiter = ";";
+      std::string curr_opts;
+      if (customizable->GetOptionString(embedded, &curr_opts).ok()) {
+        std::unordered_map<std::string, std::string> curr_props;
+        if (StringToMap(curr_opts, &curr_props).ok()) {
+          props->insert(curr_props.begin(), curr_props.end());
+        }
+      }
+    }
+  } else {
+    status = Configurable::GetOptionsMap(value, "", id, props);
+  }
+  return status;
+}
+
+Status Customizable::ConfigureNewObject(
+    const ConfigOptions& config_options, Customizable* object,
+    const std::unordered_map<std::string, std::string>& opt_map) {
+  Status status;
+  if (object != nullptr) {
+    status = object->ConfigureFromMap(config_options, opt_map);
+  } else if (!opt_map.empty()) {
+    status = Status::InvalidArgument("Cannot configure null object ");
+  }
+  return status;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/db_options.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/db_options.cc
new file mode 100644
index 0000000000..d81e72833c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/db_options.cc
@@ -0,0 +1,1079 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "options/db_options.h"
+
+#include <cinttypes>
+
+#include "logging/logging.h"
+#include "options/configurable_helper.h"
+#include "options/options_helper.h"
+#include "options/options_parser.h"
+#include "port/port.h"
+#include "rocksdb/advanced_cache.h"
+#include "rocksdb/configurable.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/listener.h"
+#include "rocksdb/rate_limiter.h"
+#include "rocksdb/sst_file_manager.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/utilities/options_type.h"
+#include "rocksdb/wal_filter.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+static std::unordered_map<std::string, WALRecoveryMode>
+    wal_recovery_mode_string_map = {
+        {"kTolerateCorruptedTailRecords",
+         WALRecoveryMode::kTolerateCorruptedTailRecords},
+        {"kAbsoluteConsistency", WALRecoveryMode::kAbsoluteConsistency},
+        {"kPointInTimeRecovery", WALRecoveryMode::kPointInTimeRecovery},
+        {"kSkipAnyCorruptedRecords",
+         WALRecoveryMode::kSkipAnyCorruptedRecords}};
+
+static std::unordered_map<std::string, DBOptions::AccessHint>
+    access_hint_string_map = {{"NONE", DBOptions::AccessHint::NONE},
+                              {"NORMAL", DBOptions::AccessHint::NORMAL},
+                              {"SEQUENTIAL", DBOptions::AccessHint::SEQUENTIAL},
+                              {"WILLNEED", DBOptions::AccessHint::WILLNEED}};
+
+static std::unordered_map<std::string, CacheTier> cache_tier_string_map = {
+    {"kVolatileTier", CacheTier::kVolatileTier},
+    {"kNonVolatileBlockTier", CacheTier::kNonVolatileBlockTier}};
+
+static std::unordered_map<std::string, InfoLogLevel> info_log_level_string_map =
+    {{"DEBUG_LEVEL", InfoLogLevel::DEBUG_LEVEL},
+     {"INFO_LEVEL", InfoLogLevel::INFO_LEVEL},
+     {"WARN_LEVEL", InfoLogLevel::WARN_LEVEL},
+     {"ERROR_LEVEL", InfoLogLevel::ERROR_LEVEL},
+     {"FATAL_LEVEL", InfoLogLevel::FATAL_LEVEL},
+     {"HEADER_LEVEL", InfoLogLevel::HEADER_LEVEL}};
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    db_mutable_options_type_info = {
+        {"allow_os_buffer",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"base_background_compactions",
+         {0, OptionType::kInt, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kMutable}},
+        {"max_background_jobs",
+         {offsetof(struct MutableDBOptions, max_background_jobs),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"max_background_compactions",
+         {offsetof(struct MutableDBOptions, max_background_compactions),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"max_subcompactions",
+         {offsetof(struct MutableDBOptions, max_subcompactions),
+          OptionType::kUInt32T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"avoid_flush_during_shutdown",
+         {offsetof(struct MutableDBOptions, avoid_flush_during_shutdown),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"writable_file_max_buffer_size",
+         {offsetof(struct MutableDBOptions, writable_file_max_buffer_size),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"delayed_write_rate",
+         {offsetof(struct MutableDBOptions, delayed_write_rate),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"max_total_wal_size",
+         {offsetof(struct MutableDBOptions, max_total_wal_size),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"delete_obsolete_files_period_micros",
+         {offsetof(struct MutableDBOptions,
+                   delete_obsolete_files_period_micros),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"stats_dump_period_sec",
+         {offsetof(struct MutableDBOptions, stats_dump_period_sec),
+          OptionType::kUInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"stats_persist_period_sec",
+         {offsetof(struct MutableDBOptions, stats_persist_period_sec),
+          OptionType::kUInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"stats_history_buffer_size",
+         {offsetof(struct MutableDBOptions, stats_history_buffer_size),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"max_open_files",
+         {offsetof(struct MutableDBOptions, max_open_files), OptionType::kInt,
+          OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
+        {"bytes_per_sync",
+         {offsetof(struct MutableDBOptions, bytes_per_sync),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"wal_bytes_per_sync",
+         {offsetof(struct MutableDBOptions, wal_bytes_per_sync),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"strict_bytes_per_sync",
+         {offsetof(struct MutableDBOptions, strict_bytes_per_sync),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"compaction_readahead_size",
+         {offsetof(struct MutableDBOptions, compaction_readahead_size),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"max_background_flushes",
+         {offsetof(struct MutableDBOptions, max_background_flushes),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+};
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    db_immutable_options_type_info = {
+        /*
+         // not yet supported
+          std::shared_ptr<Cache> row_cache;
+          std::shared_ptr<DeleteScheduler> delete_scheduler;
+          std::shared_ptr<Logger> info_log;
+          std::shared_ptr<RateLimiter> rate_limiter;
+          std::shared_ptr<Statistics> statistics;
+          std::vector<DbPath> db_paths;
+          FileTypeSet checksum_handoff_file_types;
+         */
+        {"advise_random_on_open",
+         {offsetof(struct ImmutableDBOptions, advise_random_on_open),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"allow_mmap_reads",
+         {offsetof(struct ImmutableDBOptions, allow_mmap_reads),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"allow_fallocate",
+         {offsetof(struct ImmutableDBOptions, allow_fallocate),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"allow_mmap_writes",
+         {offsetof(struct ImmutableDBOptions, allow_mmap_writes),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"use_direct_reads",
+         {offsetof(struct ImmutableDBOptions, use_direct_reads),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"use_direct_writes",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"use_direct_io_for_flush_and_compaction",
+         {offsetof(struct ImmutableDBOptions,
+                   use_direct_io_for_flush_and_compaction),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"allow_2pc",
+         {offsetof(struct ImmutableDBOptions, allow_2pc), OptionType::kBoolean,
+          OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+        {"wal_filter",
+         OptionTypeInfo::AsCustomRawPtr<WalFilter>(
+             offsetof(struct ImmutableDBOptions, wal_filter),
+             OptionVerificationType::kByName,
+             (OptionTypeFlags::kAllowNull | OptionTypeFlags::kCompareNever))},
+        {"create_if_missing",
+         {offsetof(struct ImmutableDBOptions, create_if_missing),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"create_missing_column_families",
+         {offsetof(struct ImmutableDBOptions, create_missing_column_families),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"disableDataSync",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"disable_data_sync",  // for compatibility
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"enable_thread_tracking",
+         {offsetof(struct ImmutableDBOptions, enable_thread_tracking),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"error_if_exists",
+         {offsetof(struct ImmutableDBOptions, error_if_exists),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"experimental_allow_mempurge",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"experimental_mempurge_policy",
+         {0, OptionType::kString, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"experimental_mempurge_threshold",
+         {0, OptionType::kDouble, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"is_fd_close_on_exec",
+         {offsetof(struct ImmutableDBOptions, is_fd_close_on_exec),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"paranoid_checks",
+         {offsetof(struct ImmutableDBOptions, paranoid_checks),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"flush_verify_memtable_count",
+         {offsetof(struct ImmutableDBOptions, flush_verify_memtable_count),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"track_and_verify_wals_in_manifest",
+         {offsetof(struct ImmutableDBOptions,
+                   track_and_verify_wals_in_manifest),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"verify_sst_unique_id_in_manifest",
+         {offsetof(struct ImmutableDBOptions, verify_sst_unique_id_in_manifest),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"skip_log_error_on_recovery",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"skip_stats_update_on_db_open",
+         {offsetof(struct ImmutableDBOptions, skip_stats_update_on_db_open),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"skip_checking_sst_file_sizes_on_db_open",
+         {offsetof(struct ImmutableDBOptions,
+                   skip_checking_sst_file_sizes_on_db_open),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"new_table_reader_for_compaction_inputs",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"random_access_max_buffer_size",
+         {offsetof(struct ImmutableDBOptions, random_access_max_buffer_size),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"use_adaptive_mutex",
+         {offsetof(struct ImmutableDBOptions, use_adaptive_mutex),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"use_fsync",
+         {offsetof(struct ImmutableDBOptions, use_fsync), OptionType::kBoolean,
+          OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+        {"max_file_opening_threads",
+         {offsetof(struct ImmutableDBOptions, max_file_opening_threads),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"table_cache_numshardbits",
+         {offsetof(struct ImmutableDBOptions, table_cache_numshardbits),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"db_write_buffer_size",
+         {offsetof(struct ImmutableDBOptions, db_write_buffer_size),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"keep_log_file_num",
+         {offsetof(struct ImmutableDBOptions, keep_log_file_num),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"recycle_log_file_num",
+         {offsetof(struct ImmutableDBOptions, recycle_log_file_num),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"log_file_time_to_roll",
+         {offsetof(struct ImmutableDBOptions, log_file_time_to_roll),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"manifest_preallocation_size",
+         {offsetof(struct ImmutableDBOptions, manifest_preallocation_size),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"max_log_file_size",
+         {offsetof(struct ImmutableDBOptions, max_log_file_size),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"db_log_dir",
+         {offsetof(struct ImmutableDBOptions, db_log_dir), OptionType::kString,
+          OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+        {"wal_dir",
+         {offsetof(struct ImmutableDBOptions, wal_dir), OptionType::kString,
+          OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+        {"WAL_size_limit_MB",
+         {offsetof(struct ImmutableDBOptions, WAL_size_limit_MB),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"WAL_ttl_seconds",
+         {offsetof(struct ImmutableDBOptions, WAL_ttl_seconds),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"max_manifest_file_size",
+         {offsetof(struct ImmutableDBOptions, max_manifest_file_size),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"persist_stats_to_disk",
+         {offsetof(struct ImmutableDBOptions, persist_stats_to_disk),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"fail_if_options_file_error",
+         {offsetof(struct ImmutableDBOptions, fail_if_options_file_error),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"enable_pipelined_write",
+         {offsetof(struct ImmutableDBOptions, enable_pipelined_write),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"unordered_write",
+         {offsetof(struct ImmutableDBOptions, unordered_write),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"allow_concurrent_memtable_write",
+         {offsetof(struct ImmutableDBOptions, allow_concurrent_memtable_write),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"wal_recovery_mode",
+         OptionTypeInfo::Enum<WALRecoveryMode>(
+             offsetof(struct ImmutableDBOptions, wal_recovery_mode),
+             &wal_recovery_mode_string_map)},
+        {"enable_write_thread_adaptive_yield",
+         {offsetof(struct ImmutableDBOptions,
+                   enable_write_thread_adaptive_yield),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"write_thread_slow_yield_usec",
+         {offsetof(struct ImmutableDBOptions, write_thread_slow_yield_usec),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"max_write_batch_group_size_bytes",
+         {offsetof(struct ImmutableDBOptions, max_write_batch_group_size_bytes),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"write_thread_max_yield_usec",
+         {offsetof(struct ImmutableDBOptions, write_thread_max_yield_usec),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"access_hint_on_compaction_start",
+         OptionTypeInfo::Enum<DBOptions::AccessHint>(
+             offsetof(struct ImmutableDBOptions,
+                      access_hint_on_compaction_start),
+             &access_hint_string_map)},
+        {"info_log_level",
+         OptionTypeInfo::Enum<InfoLogLevel>(
+             offsetof(struct ImmutableDBOptions, info_log_level),
+             &info_log_level_string_map)},
+        {"dump_malloc_stats",
+         {offsetof(struct ImmutableDBOptions, dump_malloc_stats),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"avoid_flush_during_recovery",
+         {offsetof(struct ImmutableDBOptions, avoid_flush_during_recovery),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"allow_ingest_behind",
+         {offsetof(struct ImmutableDBOptions, allow_ingest_behind),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"preserve_deletes",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"concurrent_prepare",  // Deprecated by two_write_queues
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"two_write_queues",
+         {offsetof(struct ImmutableDBOptions, two_write_queues),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"manual_wal_flush",
+         {offsetof(struct ImmutableDBOptions, manual_wal_flush),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"wal_compression",
+         {offsetof(struct ImmutableDBOptions, wal_compression),
+          OptionType::kCompressionType, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"seq_per_batch",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"atomic_flush",
+         {offsetof(struct ImmutableDBOptions, atomic_flush),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"avoid_unnecessary_blocking_io",
+         {offsetof(struct ImmutableDBOptions, avoid_unnecessary_blocking_io),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"write_dbid_to_manifest",
+         {offsetof(struct ImmutableDBOptions, write_dbid_to_manifest),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"log_readahead_size",
+         {offsetof(struct ImmutableDBOptions, log_readahead_size),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"best_efforts_recovery",
+         {offsetof(struct ImmutableDBOptions, best_efforts_recovery),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"max_bgerror_resume_count",
+         {offsetof(struct ImmutableDBOptions, max_bgerror_resume_count),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"bgerror_resume_retry_interval",
+         {offsetof(struct ImmutableDBOptions, bgerror_resume_retry_interval),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"db_host_id",
+         {offsetof(struct ImmutableDBOptions, db_host_id), OptionType::kString,
+          OptionVerificationType::kNormal, OptionTypeFlags::kCompareNever}},
+        // Temporarily deprecated due to race conditions (examples in PR 10375).
+        {"rate_limiter",
+         {offsetof(struct ImmutableDBOptions, rate_limiter),
+          OptionType::kUnknown, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kDontSerialize | OptionTypeFlags::kCompareNever}},
+        // The following properties were handled as special cases in ParseOption
+        // This means that the properties could be read from the options file
+        // but never written to the file or compared to each other.
+        {"rate_limiter_bytes_per_sec",
+         {offsetof(struct ImmutableDBOptions, rate_limiter),
+          OptionType::kUnknown, OptionVerificationType::kNormal,
+          (OptionTypeFlags::kDontSerialize | OptionTypeFlags::kCompareNever),
+          // Parse the input value as a RateLimiter
+          [](const ConfigOptions& /*opts*/, const std::string& /*name*/,
+             const std::string& value, void* addr) {
+            auto limiter = static_cast<std::shared_ptr<RateLimiter>*>(addr);
+            limiter->reset(NewGenericRateLimiter(
+                static_cast<int64_t>(ParseUint64(value))));
+            return Status::OK();
+          }}},
+        {"env",  //**TODO: Should this be kCustomizable?
+         OptionTypeInfo(
+             offsetof(struct ImmutableDBOptions, env), OptionType::kUnknown,
+             OptionVerificationType::kNormal,
+             (OptionTypeFlags::kDontSerialize | OptionTypeFlags::kCompareNever))
+             .SetParseFunc([](const ConfigOptions& opts,
+                              const std::string& /*name*/,
+                              const std::string& value, void* addr) {
+               // Parse the input value as an Env
+               auto old_env = static_cast<Env**>(addr);  // Get the old value
+               Env* new_env = *old_env;                  // Set new to old
+               Status s = Env::CreateFromString(opts, value,
+                                                &new_env);  // Update new value
+               if (s.ok()) {                                // It worked
+                 *old_env = new_env;  // Update the old one
+               }
+               return s;
+             })
+             .SetPrepareFunc([](const ConfigOptions& opts,
+                                const std::string& /*name*/, void* addr) {
+               auto env = static_cast<Env**>(addr);
+               return (*env)->PrepareOptions(opts);
+             })
+             .SetValidateFunc([](const DBOptions& db_opts,
+                                 const ColumnFamilyOptions& cf_opts,
+                                 const std::string& /*name*/,
+                                 const void* addr) {
+               const auto env = static_cast<const Env* const*>(addr);
+               return (*env)->ValidateOptions(db_opts, cf_opts);
+             })},
+        {"allow_data_in_errors",
+         {offsetof(struct ImmutableDBOptions, allow_data_in_errors),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"file_checksum_gen_factory",
+         OptionTypeInfo::AsCustomSharedPtr<FileChecksumGenFactory>(
+             offsetof(struct ImmutableDBOptions, file_checksum_gen_factory),
+             OptionVerificationType::kByNameAllowFromNull,
+             OptionTypeFlags::kAllowNull)},
+        {"statistics",
+         OptionTypeInfo::AsCustomSharedPtr<Statistics>(
+             // Statistics should not be compared and can be null
+             // Statistics are maked "don't serialize" until they can be shared
+             // between DBs
+             offsetof(struct ImmutableDBOptions, statistics),
+             OptionVerificationType::kNormal,
+             OptionTypeFlags::kCompareNever | OptionTypeFlags::kDontSerialize |
+                 OptionTypeFlags::kAllowNull)},
+        // Allow EventListeners that have a non-empty Name() to be read/written
+        // as options Each listener will either be
+        // - A simple name (e.g. "MyEventListener")
+        // - A name with properties (e.g. "{id=MyListener1; timeout=60}"
+        // Multiple listeners will be separated by a ":":
+        //   - "MyListener0;{id=MyListener1; timeout=60}
+        {"listeners",
+         {offsetof(struct ImmutableDBOptions, listeners), OptionType::kVector,
+          OptionVerificationType::kByNameAllowNull,
+          OptionTypeFlags::kCompareNever,
+          [](const ConfigOptions& opts, const std::string& /*name*/,
+             const std::string& value, void* addr) {
+            ConfigOptions embedded = opts;
+            embedded.ignore_unsupported_options = true;
+            std::vector<std::shared_ptr<EventListener>> listeners;
+            Status s;
+            for (size_t start = 0, end = 0;
+                 s.ok() && start < value.size() && end != std::string::npos;
+                 start = end + 1) {
+              std::string token;
+              s = OptionTypeInfo::NextToken(value, ':', start, &end, &token);
+              if (s.ok() && !token.empty()) {
+                std::shared_ptr<EventListener> listener;
+                s = EventListener::CreateFromString(embedded, token, &listener);
+                if (s.ok() && listener != nullptr) {
+                  listeners.push_back(listener);
+                }
+              }
+            }
+            if (s.ok()) {  // It worked
+              *(static_cast<std::vector<std::shared_ptr<EventListener>>*>(
+                  addr)) = listeners;
+            }
+            return s;
+          },
+          [](const ConfigOptions& opts, const std::string& /*name*/,
+             const void* addr, std::string* value) {
+            const auto listeners =
+                static_cast<const std::vector<std::shared_ptr<EventListener>>*>(
+                    addr);
+            ConfigOptions embedded = opts;
+            embedded.delimiter = ";";
+            int printed = 0;
+            for (const auto& listener : *listeners) {
+              auto id = listener->GetId();
+              if (!id.empty()) {
+                std::string elem_str = listener->ToString(embedded, "");
+                if (printed++ == 0) {
+                  value->append("{");
+                } else {
+                  value->append(":");
+                }
+                value->append(elem_str);
+              }
+            }
+            if (printed > 0) {
+              value->append("}");
+            }
+            return Status::OK();
+          },
+          nullptr}},
+        {"lowest_used_cache_tier",
+         OptionTypeInfo::Enum<CacheTier>(
+             offsetof(struct ImmutableDBOptions, lowest_used_cache_tier),
+             &cache_tier_string_map, OptionTypeFlags::kNone)},
+        {"enforce_single_del_contracts",
+         {offsetof(struct ImmutableDBOptions, enforce_single_del_contracts),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+};
+
+const std::string OptionsHelper::kDBOptionsName = "DBOptions";
+
+class MutableDBConfigurable : public Configurable {
+ public:
+  explicit MutableDBConfigurable(
+      const MutableDBOptions& mdb,
+      const std::unordered_map<std::string, std::string>* map = nullptr)
+      : mutable_(mdb), opt_map_(map) {
+    RegisterOptions(&mutable_, &db_mutable_options_type_info);
+  }
+
+  bool OptionsAreEqual(const ConfigOptions& config_options,
+                       const OptionTypeInfo& opt_info,
+                       const std::string& opt_name, const void* const this_ptr,
+                       const void* const that_ptr,
+                       std::string* mismatch) const override {
+    bool equals = opt_info.AreEqual(config_options, opt_name, this_ptr,
+                                    that_ptr, mismatch);
+    if (!equals && opt_info.IsByName()) {
+      if (opt_map_ == nullptr) {
+        equals = true;
+      } else {
+        const auto& iter = opt_map_->find(opt_name);
+        if (iter == opt_map_->end()) {
+          equals = true;
+        } else {
+          equals = opt_info.AreEqualByName(config_options, opt_name, this_ptr,
+                                           iter->second);
+        }
+      }
+      if (equals) {  // False alarm, clear mismatch
+        *mismatch = "";
+      }
+    }
+    if (equals && opt_info.IsConfigurable() && opt_map_ != nullptr) {
+      const auto* this_config = opt_info.AsRawPointer<Configurable>(this_ptr);
+      if (this_config == nullptr) {
+        const auto& iter = opt_map_->find(opt_name);
+        // If the name exists in the map and is not empty/null,
+        // then the this_config should be set.
+        if (iter != opt_map_->end() && !iter->second.empty() &&
+            iter->second != kNullptrString) {
+          *mismatch = opt_name;
+          equals = false;
+        }
+      }
+    }
+    return equals;
+  }
+
+ protected:
+  MutableDBOptions mutable_;
+  const std::unordered_map<std::string, std::string>* opt_map_;
+};
+
+class DBOptionsConfigurable : public MutableDBConfigurable {
+ public:
+  explicit DBOptionsConfigurable(
+      const DBOptions& opts,
+      const std::unordered_map<std::string, std::string>* map = nullptr)
+      : MutableDBConfigurable(MutableDBOptions(opts), map), db_options_(opts) {
+    // The ImmutableDBOptions currently requires the env to be non-null.  Make
+    // sure it is
+    if (opts.env != nullptr) {
+      immutable_ = ImmutableDBOptions(opts);
+    } else {
+      DBOptions copy = opts;
+      copy.env = Env::Default();
+      immutable_ = ImmutableDBOptions(copy);
+    }
+    RegisterOptions(&immutable_, &db_immutable_options_type_info);
+  }
+
+ protected:
+  Status ConfigureOptions(
+      const ConfigOptions& config_options,
+      const std::unordered_map<std::string, std::string>& opts_map,
+      std::unordered_map<std::string, std::string>* unused) override {
+    Status s = Configurable::ConfigureOptions(config_options, opts_map, unused);
+    if (s.ok()) {
+      db_options_ = BuildDBOptions(immutable_, mutable_);
+      s = PrepareOptions(config_options);
+    }
+    return s;
+  }
+
+  const void* GetOptionsPtr(const std::string& name) const override {
+    if (name == OptionsHelper::kDBOptionsName) {
+      return &db_options_;
+    } else {
+      return MutableDBConfigurable::GetOptionsPtr(name);
+    }
+  }
+
+ private:
+  ImmutableDBOptions immutable_;
+  DBOptions db_options_;
+};
+
+std::unique_ptr<Configurable> DBOptionsAsConfigurable(
+    const MutableDBOptions& opts) {
+  std::unique_ptr<Configurable> ptr(new MutableDBConfigurable(opts));
+  return ptr;
+}
+std::unique_ptr<Configurable> DBOptionsAsConfigurable(
+    const DBOptions& opts,
+    const std::unordered_map<std::string, std::string>* opt_map) {
+  std::unique_ptr<Configurable> ptr(new DBOptionsConfigurable(opts, opt_map));
+  return ptr;
+}
+
+ImmutableDBOptions::ImmutableDBOptions() : ImmutableDBOptions(Options()) {}
+
+ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
+    : create_if_missing(options.create_if_missing),
+      create_missing_column_families(options.create_missing_column_families),
+      error_if_exists(options.error_if_exists),
+      paranoid_checks(options.paranoid_checks),
+      flush_verify_memtable_count(options.flush_verify_memtable_count),
+      track_and_verify_wals_in_manifest(
+          options.track_and_verify_wals_in_manifest),
+      verify_sst_unique_id_in_manifest(
+          options.verify_sst_unique_id_in_manifest),
+      env(options.env),
+      rate_limiter(options.rate_limiter),
+      sst_file_manager(options.sst_file_manager),
+      info_log(options.info_log),
+      info_log_level(options.info_log_level),
+      max_file_opening_threads(options.max_file_opening_threads),
+      statistics(options.statistics),
+      use_fsync(options.use_fsync),
+      db_paths(options.db_paths),
+      db_log_dir(options.db_log_dir),
+      wal_dir(options.wal_dir),
+      max_log_file_size(options.max_log_file_size),
+      log_file_time_to_roll(options.log_file_time_to_roll),
+      keep_log_file_num(options.keep_log_file_num),
+      recycle_log_file_num(options.recycle_log_file_num),
+      max_manifest_file_size(options.max_manifest_file_size),
+      table_cache_numshardbits(options.table_cache_numshardbits),
+      WAL_ttl_seconds(options.WAL_ttl_seconds),
+      WAL_size_limit_MB(options.WAL_size_limit_MB),
+      max_write_batch_group_size_bytes(
+          options.max_write_batch_group_size_bytes),
+      manifest_preallocation_size(options.manifest_preallocation_size),
+      allow_mmap_reads(options.allow_mmap_reads),
+      allow_mmap_writes(options.allow_mmap_writes),
+      use_direct_reads(options.use_direct_reads),
+      use_direct_io_for_flush_and_compaction(
+          options.use_direct_io_for_flush_and_compaction),
+      allow_fallocate(options.allow_fallocate),
+      is_fd_close_on_exec(options.is_fd_close_on_exec),
+      advise_random_on_open(options.advise_random_on_open),
+      db_write_buffer_size(options.db_write_buffer_size),
+      write_buffer_manager(options.write_buffer_manager),
+      access_hint_on_compaction_start(options.access_hint_on_compaction_start),
+      random_access_max_buffer_size(options.random_access_max_buffer_size),
+      use_adaptive_mutex(options.use_adaptive_mutex),
+      listeners(options.listeners),
+      enable_thread_tracking(options.enable_thread_tracking),
+      enable_pipelined_write(options.enable_pipelined_write),
+      unordered_write(options.unordered_write),
+      allow_concurrent_memtable_write(options.allow_concurrent_memtable_write),
+      enable_write_thread_adaptive_yield(
+          options.enable_write_thread_adaptive_yield),
+      write_thread_max_yield_usec(options.write_thread_max_yield_usec),
+      write_thread_slow_yield_usec(options.write_thread_slow_yield_usec),
+      skip_stats_update_on_db_open(options.skip_stats_update_on_db_open),
+      skip_checking_sst_file_sizes_on_db_open(
+          options.skip_checking_sst_file_sizes_on_db_open),
+      wal_recovery_mode(options.wal_recovery_mode),
+      allow_2pc(options.allow_2pc),
+      row_cache(options.row_cache),
+      wal_filter(options.wal_filter),
+      fail_if_options_file_error(options.fail_if_options_file_error),
+      dump_malloc_stats(options.dump_malloc_stats),
+      avoid_flush_during_recovery(options.avoid_flush_during_recovery),
+      allow_ingest_behind(options.allow_ingest_behind),
+      two_write_queues(options.two_write_queues),
+      manual_wal_flush(options.manual_wal_flush),
+      wal_compression(options.wal_compression),
+      atomic_flush(options.atomic_flush),
+      avoid_unnecessary_blocking_io(options.avoid_unnecessary_blocking_io),
+      persist_stats_to_disk(options.persist_stats_to_disk),
+      write_dbid_to_manifest(options.write_dbid_to_manifest),
+      log_readahead_size(options.log_readahead_size),
+      file_checksum_gen_factory(options.file_checksum_gen_factory),
+      best_efforts_recovery(options.best_efforts_recovery),
+      max_bgerror_resume_count(options.max_bgerror_resume_count),
+      bgerror_resume_retry_interval(options.bgerror_resume_retry_interval),
+      allow_data_in_errors(options.allow_data_in_errors),
+      db_host_id(options.db_host_id),
+      checksum_handoff_file_types(options.checksum_handoff_file_types),
+      lowest_used_cache_tier(options.lowest_used_cache_tier),
+      compaction_service(options.compaction_service),
+      enforce_single_del_contracts(options.enforce_single_del_contracts) {
+  fs = env->GetFileSystem();
+  clock = env->GetSystemClock().get();
+  logger = info_log.get();
+  stats = statistics.get();
+}
+
+void ImmutableDBOptions::Dump(Logger* log) const {
+  ROCKS_LOG_HEADER(log, "                        Options.error_if_exists: %d",
+                   error_if_exists);
+  ROCKS_LOG_HEADER(log, "                      Options.create_if_missing: %d",
+                   create_if_missing);
+  ROCKS_LOG_HEADER(log, "                        Options.paranoid_checks: %d",
+                   paranoid_checks);
+  ROCKS_LOG_HEADER(log, "            Options.flush_verify_memtable_count: %d",
+                   flush_verify_memtable_count);
+  ROCKS_LOG_HEADER(log,
+                   "                              "
+                   "Options.track_and_verify_wals_in_manifest: %d",
+                   track_and_verify_wals_in_manifest);
+  ROCKS_LOG_HEADER(log, "       Options.verify_sst_unique_id_in_manifest: %d",
+                   verify_sst_unique_id_in_manifest);
+  ROCKS_LOG_HEADER(log, "                                    Options.env: %p",
+                   env);
+  ROCKS_LOG_HEADER(log, "                                     Options.fs: %s",
+                   fs->Name());
+  ROCKS_LOG_HEADER(log, "                               Options.info_log: %p",
+                   info_log.get());
+  ROCKS_LOG_HEADER(log, "               Options.max_file_opening_threads: %d",
+                   max_file_opening_threads);
+  ROCKS_LOG_HEADER(log, "                             Options.statistics: %p",
+                   stats);
+  ROCKS_LOG_HEADER(log, "                              Options.use_fsync: %d",
+                   use_fsync);
+  ROCKS_LOG_HEADER(
+      log, "                      Options.max_log_file_size: %" ROCKSDB_PRIszt,
+      max_log_file_size);
+  ROCKS_LOG_HEADER(log,
+                   "                 Options.max_manifest_file_size: %" PRIu64,
+                   max_manifest_file_size);
+  ROCKS_LOG_HEADER(
+      log, "                  Options.log_file_time_to_roll: %" ROCKSDB_PRIszt,
+      log_file_time_to_roll);
+  ROCKS_LOG_HEADER(
+      log, "                      Options.keep_log_file_num: %" ROCKSDB_PRIszt,
+      keep_log_file_num);
+  ROCKS_LOG_HEADER(
+      log, "                   Options.recycle_log_file_num: %" ROCKSDB_PRIszt,
+      recycle_log_file_num);
+  ROCKS_LOG_HEADER(log, "                        Options.allow_fallocate: %d",
+                   allow_fallocate);
+  ROCKS_LOG_HEADER(log, "                       Options.allow_mmap_reads: %d",
+                   allow_mmap_reads);
+  ROCKS_LOG_HEADER(log, "                      Options.allow_mmap_writes: %d",
+                   allow_mmap_writes);
+  ROCKS_LOG_HEADER(log, "                       Options.use_direct_reads: %d",
+                   use_direct_reads);
+  ROCKS_LOG_HEADER(log,
+                   "                       "
+                   "Options.use_direct_io_for_flush_and_compaction: %d",
+                   use_direct_io_for_flush_and_compaction);
+  ROCKS_LOG_HEADER(log, "         Options.create_missing_column_families: %d",
+                   create_missing_column_families);
+  ROCKS_LOG_HEADER(log, "                             Options.db_log_dir: %s",
+                   db_log_dir.c_str());
+  ROCKS_LOG_HEADER(log, "                                Options.wal_dir: %s",
+                   wal_dir.c_str());
+  ROCKS_LOG_HEADER(log, "               Options.table_cache_numshardbits: %d",
+                   table_cache_numshardbits);
+  ROCKS_LOG_HEADER(log,
+                   "                        Options.WAL_ttl_seconds: %" PRIu64,
+                   WAL_ttl_seconds);
+  ROCKS_LOG_HEADER(log,
+                   "                      Options.WAL_size_limit_MB: %" PRIu64,
+                   WAL_size_limit_MB);
+  ROCKS_LOG_HEADER(log,
+                   "                       "
+                   "Options.max_write_batch_group_size_bytes: %" PRIu64,
+                   max_write_batch_group_size_bytes);
+  ROCKS_LOG_HEADER(
+      log, "            Options.manifest_preallocation_size: %" ROCKSDB_PRIszt,
+      manifest_preallocation_size);
+  ROCKS_LOG_HEADER(log, "                    Options.is_fd_close_on_exec: %d",
+                   is_fd_close_on_exec);
+  ROCKS_LOG_HEADER(log, "                  Options.advise_random_on_open: %d",
+                   advise_random_on_open);
+  ROCKS_LOG_HEADER(
+      log, "                   Options.db_write_buffer_size: %" ROCKSDB_PRIszt,
+      db_write_buffer_size);
+  ROCKS_LOG_HEADER(log, "                   Options.write_buffer_manager: %p",
+                   write_buffer_manager.get());
+  ROCKS_LOG_HEADER(log, "        Options.access_hint_on_compaction_start: %d",
+                   static_cast<int>(access_hint_on_compaction_start));
+  ROCKS_LOG_HEADER(
+      log, "          Options.random_access_max_buffer_size: %" ROCKSDB_PRIszt,
+      random_access_max_buffer_size);
+  ROCKS_LOG_HEADER(log, "                     Options.use_adaptive_mutex: %d",
+                   use_adaptive_mutex);
+  ROCKS_LOG_HEADER(log, "                           Options.rate_limiter: %p",
+                   rate_limiter.get());
+  Header(
+      log, "    Options.sst_file_manager.rate_bytes_per_sec: %" PRIi64,
+      sst_file_manager ? sst_file_manager->GetDeleteRateBytesPerSecond() : 0);
+  ROCKS_LOG_HEADER(log, "                      Options.wal_recovery_mode: %d",
+                   static_cast<int>(wal_recovery_mode));
+  ROCKS_LOG_HEADER(log, "                 Options.enable_thread_tracking: %d",
+                   enable_thread_tracking);
+  ROCKS_LOG_HEADER(log, "                 Options.enable_pipelined_write: %d",
+                   enable_pipelined_write);
+  ROCKS_LOG_HEADER(log, "                 Options.unordered_write: %d",
+                   unordered_write);
+  ROCKS_LOG_HEADER(log, "        Options.allow_concurrent_memtable_write: %d",
+                   allow_concurrent_memtable_write);
+  ROCKS_LOG_HEADER(log, "     Options.enable_write_thread_adaptive_yield: %d",
+                   enable_write_thread_adaptive_yield);
+  ROCKS_LOG_HEADER(log,
+                   "            Options.write_thread_max_yield_usec: %" PRIu64,
+                   write_thread_max_yield_usec);
+  ROCKS_LOG_HEADER(log,
+                   "           Options.write_thread_slow_yield_usec: %" PRIu64,
+                   write_thread_slow_yield_usec);
+  if (row_cache) {
+    ROCKS_LOG_HEADER(
+        log,
+        "                              Options.row_cache: %" ROCKSDB_PRIszt,
+        row_cache->GetCapacity());
+  } else {
+    ROCKS_LOG_HEADER(log,
+                     "                              Options.row_cache: None");
+  }
+  ROCKS_LOG_HEADER(log, "                             Options.wal_filter: %s",
+                   wal_filter ? wal_filter->Name() : "None");
+
+  ROCKS_LOG_HEADER(log, "            Options.avoid_flush_during_recovery: %d",
+                   avoid_flush_during_recovery);
+  ROCKS_LOG_HEADER(log, "            Options.allow_ingest_behind: %d",
+                   allow_ingest_behind);
+  ROCKS_LOG_HEADER(log, "            Options.two_write_queues: %d",
+                   two_write_queues);
+  ROCKS_LOG_HEADER(log, "            Options.manual_wal_flush: %d",
+                   manual_wal_flush);
+  ROCKS_LOG_HEADER(log, "            Options.wal_compression: %d",
+                   wal_compression);
+  ROCKS_LOG_HEADER(log, "            Options.atomic_flush: %d", atomic_flush);
+  ROCKS_LOG_HEADER(log,
+                   "            Options.avoid_unnecessary_blocking_io: %d",
+                   avoid_unnecessary_blocking_io);
+  ROCKS_LOG_HEADER(log, "                Options.persist_stats_to_disk: %u",
+                   persist_stats_to_disk);
+  ROCKS_LOG_HEADER(log, "                Options.write_dbid_to_manifest: %d",
+                   write_dbid_to_manifest);
+  ROCKS_LOG_HEADER(
+      log, "                Options.log_readahead_size: %" ROCKSDB_PRIszt,
+      log_readahead_size);
+  ROCKS_LOG_HEADER(log, "                Options.file_checksum_gen_factory: %s",
+                   file_checksum_gen_factory ? file_checksum_gen_factory->Name()
+                                             : kUnknownFileChecksumFuncName);
+  ROCKS_LOG_HEADER(log, "                Options.best_efforts_recovery: %d",
+                   static_cast<int>(best_efforts_recovery));
+  ROCKS_LOG_HEADER(log, "               Options.max_bgerror_resume_count: %d",
+                   max_bgerror_resume_count);
+  ROCKS_LOG_HEADER(log,
+                   "           Options.bgerror_resume_retry_interval: %" PRIu64,
+                   bgerror_resume_retry_interval);
+  ROCKS_LOG_HEADER(log, "            Options.allow_data_in_errors: %d",
+                   allow_data_in_errors);
+  ROCKS_LOG_HEADER(log, "            Options.db_host_id: %s",
+                   db_host_id.c_str());
+  ROCKS_LOG_HEADER(log, "            Options.enforce_single_del_contracts: %s",
+                   enforce_single_del_contracts ? "true" : "false");
+}
+
+bool ImmutableDBOptions::IsWalDirSameAsDBPath() const {
+  assert(!db_paths.empty());
+  return IsWalDirSameAsDBPath(db_paths[0].path);
+}
+
+bool ImmutableDBOptions::IsWalDirSameAsDBPath(
+    const std::string& db_path) const {
+  bool same = wal_dir.empty();
+  if (!same) {
+    Status s = env->AreFilesSame(wal_dir, db_path, &same);
+    if (s.IsNotSupported()) {
+      same = wal_dir == db_path;
+    }
+  }
+  return same;
+}
+
+const std::string& ImmutableDBOptions::GetWalDir() const {
+  if (wal_dir.empty()) {
+    assert(!db_paths.empty());
+    return db_paths[0].path;
+  } else {
+    return wal_dir;
+  }
+}
+
+const std::string& ImmutableDBOptions::GetWalDir(
+    const std::string& path) const {
+  if (wal_dir.empty()) {
+    return path;
+  } else {
+    return wal_dir;
+  }
+}
+
+MutableDBOptions::MutableDBOptions()
+    : max_background_jobs(2),
+      max_background_compactions(-1),
+      max_subcompactions(0),
+      avoid_flush_during_shutdown(false),
+      writable_file_max_buffer_size(1024 * 1024),
+      delayed_write_rate(2 * 1024U * 1024U),
+      max_total_wal_size(0),
+      delete_obsolete_files_period_micros(6ULL * 60 * 60 * 1000000),
+      stats_dump_period_sec(600),
+      stats_persist_period_sec(600),
+      stats_history_buffer_size(1024 * 1024),
+      max_open_files(-1),
+      bytes_per_sync(0),
+      wal_bytes_per_sync(0),
+      strict_bytes_per_sync(false),
+      compaction_readahead_size(0),
+      max_background_flushes(-1) {}
+
+MutableDBOptions::MutableDBOptions(const DBOptions& options)
+    : max_background_jobs(options.max_background_jobs),
+      max_background_compactions(options.max_background_compactions),
+      max_subcompactions(options.max_subcompactions),
+      avoid_flush_during_shutdown(options.avoid_flush_during_shutdown),
+      writable_file_max_buffer_size(options.writable_file_max_buffer_size),
+      delayed_write_rate(options.delayed_write_rate),
+      max_total_wal_size(options.max_total_wal_size),
+      delete_obsolete_files_period_micros(
+          options.delete_obsolete_files_period_micros),
+      stats_dump_period_sec(options.stats_dump_period_sec),
+      stats_persist_period_sec(options.stats_persist_period_sec),
+      stats_history_buffer_size(options.stats_history_buffer_size),
+      max_open_files(options.max_open_files),
+      bytes_per_sync(options.bytes_per_sync),
+      wal_bytes_per_sync(options.wal_bytes_per_sync),
+      strict_bytes_per_sync(options.strict_bytes_per_sync),
+      compaction_readahead_size(options.compaction_readahead_size),
+      max_background_flushes(options.max_background_flushes) {}
+
+void MutableDBOptions::Dump(Logger* log) const {
+  ROCKS_LOG_HEADER(log, "            Options.max_background_jobs: %d",
+                   max_background_jobs);
+  ROCKS_LOG_HEADER(log, "            Options.max_background_compactions: %d",
+                   max_background_compactions);
+  ROCKS_LOG_HEADER(log, "            Options.max_subcompactions: %" PRIu32,
+                   max_subcompactions);
+  ROCKS_LOG_HEADER(log, "            Options.avoid_flush_during_shutdown: %d",
+                   avoid_flush_during_shutdown);
+  ROCKS_LOG_HEADER(
+      log, "          Options.writable_file_max_buffer_size: %" ROCKSDB_PRIszt,
+      writable_file_max_buffer_size);
+  ROCKS_LOG_HEADER(log, "            Options.delayed_write_rate : %" PRIu64,
+                   delayed_write_rate);
+  ROCKS_LOG_HEADER(log, "            Options.max_total_wal_size: %" PRIu64,
+                   max_total_wal_size);
+  ROCKS_LOG_HEADER(
+      log, "            Options.delete_obsolete_files_period_micros: %" PRIu64,
+      delete_obsolete_files_period_micros);
+  ROCKS_LOG_HEADER(log, "                  Options.stats_dump_period_sec: %u",
+                   stats_dump_period_sec);
+  ROCKS_LOG_HEADER(log, "                Options.stats_persist_period_sec: %d",
+                   stats_persist_period_sec);
+  ROCKS_LOG_HEADER(
+      log,
+      "                Options.stats_history_buffer_size: %" ROCKSDB_PRIszt,
+      stats_history_buffer_size);
+  ROCKS_LOG_HEADER(log, "                         Options.max_open_files: %d",
+                   max_open_files);
+  ROCKS_LOG_HEADER(log,
+                   "                         Options.bytes_per_sync: %" PRIu64,
+                   bytes_per_sync);
+  ROCKS_LOG_HEADER(log,
+                   "                     Options.wal_bytes_per_sync: %" PRIu64,
+                   wal_bytes_per_sync);
+  ROCKS_LOG_HEADER(log,
+                   "                  Options.strict_bytes_per_sync: %d",
+                   strict_bytes_per_sync);
+  ROCKS_LOG_HEADER(log,
+                   "      Options.compaction_readahead_size: %" ROCKSDB_PRIszt,
+                   compaction_readahead_size);
+  ROCKS_LOG_HEADER(log, "                 Options.max_background_flushes: %d",
+                          max_background_flushes);
+}
+
+Status GetMutableDBOptionsFromStrings(
+    const MutableDBOptions& base_options,
+    const std::unordered_map<std::string, std::string>& options_map,
+    MutableDBOptions* new_options) {
+  assert(new_options);
+  *new_options = base_options;
+  ConfigOptions config_options;
+  Status s = OptionTypeInfo::ParseType(
+      config_options, options_map, db_mutable_options_type_info, new_options);
+  if (!s.ok()) {
+    *new_options = base_options;
+  }
+  return s;
+}
+
+bool MutableDBOptionsAreEqual(const MutableDBOptions& this_options,
+                              const MutableDBOptions& that_options) {
+  ConfigOptions config_options;
+  std::string mismatch;
+  return OptionTypeInfo::StructsAreEqual(
+      config_options, "MutableDBOptions", &db_mutable_options_type_info,
+      "MutableDBOptions", &this_options, &that_options, &mismatch);
+}
+
+Status GetStringFromMutableDBOptions(const ConfigOptions& config_options,
+                                     const MutableDBOptions& mutable_opts,
+                                     std::string* opt_string) {
+  return OptionTypeInfo::SerializeType(
+      config_options, db_mutable_options_type_info, &mutable_opts, opt_string);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/db_options.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/db_options.h
new file mode 100644
index 0000000000..2a9d98b250
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/db_options.h
@@ -0,0 +1,152 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "rocksdb/options.h"
+
+namespace ROCKSDB_NAMESPACE {
+class SystemClock;
+
+struct ImmutableDBOptions {
+  static const char* kName() { return "ImmutableDBOptions"; }
+  ImmutableDBOptions();
+  explicit ImmutableDBOptions(const DBOptions& options);
+
+  void Dump(Logger* log) const;
+
+  bool create_if_missing;
+  bool create_missing_column_families;
+  bool error_if_exists;
+  bool paranoid_checks;
+  bool flush_verify_memtable_count;
+  bool track_and_verify_wals_in_manifest;
+  bool verify_sst_unique_id_in_manifest;
+  Env* env;
+  std::shared_ptr<RateLimiter> rate_limiter;
+  std::shared_ptr<SstFileManager> sst_file_manager;
+  std::shared_ptr<Logger> info_log;
+  InfoLogLevel info_log_level;
+  int max_file_opening_threads;
+  std::shared_ptr<Statistics> statistics;
+  bool use_fsync;
+  std::vector<DbPath> db_paths;
+  std::string db_log_dir;
+  // The wal_dir option from the file.  To determine the
+  // directory in use, the GetWalDir or IsWalDirSameAsDBPath
+  // methods should be used instead of accessing this variable directly.
+  std::string wal_dir;
+  size_t max_log_file_size;
+  size_t log_file_time_to_roll;
+  size_t keep_log_file_num;
+  size_t recycle_log_file_num;
+  uint64_t max_manifest_file_size;
+  int table_cache_numshardbits;
+  uint64_t WAL_ttl_seconds;
+  uint64_t WAL_size_limit_MB;
+  uint64_t max_write_batch_group_size_bytes;
+  size_t manifest_preallocation_size;
+  bool allow_mmap_reads;
+  bool allow_mmap_writes;
+  bool use_direct_reads;
+  bool use_direct_io_for_flush_and_compaction;
+  bool allow_fallocate;
+  bool is_fd_close_on_exec;
+  bool advise_random_on_open;
+  size_t db_write_buffer_size;
+  std::shared_ptr<WriteBufferManager> write_buffer_manager;
+  DBOptions::AccessHint access_hint_on_compaction_start;
+  size_t random_access_max_buffer_size;
+  bool use_adaptive_mutex;
+  std::vector<std::shared_ptr<EventListener>> listeners;
+  bool enable_thread_tracking;
+  bool enable_pipelined_write;
+  bool unordered_write;
+  bool allow_concurrent_memtable_write;
+  bool enable_write_thread_adaptive_yield;
+  uint64_t write_thread_max_yield_usec;
+  uint64_t write_thread_slow_yield_usec;
+  bool skip_stats_update_on_db_open;
+  bool skip_checking_sst_file_sizes_on_db_open;
+  WALRecoveryMode wal_recovery_mode;
+  bool allow_2pc;
+  std::shared_ptr<Cache> row_cache;
+  WalFilter* wal_filter;
+  bool fail_if_options_file_error;
+  bool dump_malloc_stats;
+  bool avoid_flush_during_recovery;
+  bool allow_ingest_behind;
+  bool two_write_queues;
+  bool manual_wal_flush;
+  CompressionType wal_compression;
+  bool atomic_flush;
+  bool avoid_unnecessary_blocking_io;
+  bool persist_stats_to_disk;
+  bool write_dbid_to_manifest;
+  size_t log_readahead_size;
+  std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory;
+  bool best_efforts_recovery;
+  int max_bgerror_resume_count;
+  uint64_t bgerror_resume_retry_interval;
+  bool allow_data_in_errors;
+  std::string db_host_id;
+  FileTypeSet checksum_handoff_file_types;
+  CacheTier lowest_used_cache_tier;
+  // Convenience/Helper objects that are not part of the base DBOptions
+  std::shared_ptr<FileSystem> fs;
+  SystemClock* clock;
+  Statistics* stats;
+  Logger* logger;
+  std::shared_ptr<CompactionService> compaction_service;
+  bool enforce_single_del_contracts;
+
+  bool IsWalDirSameAsDBPath() const;
+  bool IsWalDirSameAsDBPath(const std::string& path) const;
+  const std::string& GetWalDir() const;
+  const std::string& GetWalDir(const std::string& path) const;
+};
+
+struct MutableDBOptions {
+  static const char* kName() { return "MutableDBOptions"; }
+  MutableDBOptions();
+  explicit MutableDBOptions(const DBOptions& options);
+
+  void Dump(Logger* log) const;
+
+  int max_background_jobs;
+  int max_background_compactions;
+  uint32_t max_subcompactions;
+  bool avoid_flush_during_shutdown;
+  size_t writable_file_max_buffer_size;
+  uint64_t delayed_write_rate;
+  uint64_t max_total_wal_size;
+  uint64_t delete_obsolete_files_period_micros;
+  unsigned int stats_dump_period_sec;
+  unsigned int stats_persist_period_sec;
+  size_t stats_history_buffer_size;
+  int max_open_files;
+  uint64_t bytes_per_sync;
+  uint64_t wal_bytes_per_sync;
+  bool strict_bytes_per_sync;
+  size_t compaction_readahead_size;
+  int max_background_flushes;
+};
+
+Status GetStringFromMutableDBOptions(const ConfigOptions& config_options,
+                                     const MutableDBOptions& mutable_opts,
+                                     std::string* opt_string);
+
+Status GetMutableDBOptionsFromStrings(
+    const MutableDBOptions& base_options,
+    const std::unordered_map<std::string, std::string>& options_map,
+    MutableDBOptions* new_options);
+
+bool MutableDBOptionsAreEqual(const MutableDBOptions& this_options,
+                              const MutableDBOptions& that_options);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options.cc
new file mode 100644
index 0000000000..1caebdfb2f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options.cc
@@ -0,0 +1,692 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/options.h"
+
+#include <cinttypes>
+#include <limits>
+
+#include "logging/logging.h"
+#include "monitoring/statistics_impl.h"
+#include "options/db_options.h"
+#include "options/options_helper.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/env.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/sst_file_manager.h"
+#include "rocksdb/sst_partitioner.h"
+#include "rocksdb/table.h"
+#include "rocksdb/table_properties.h"
+#include "rocksdb/wal_filter.h"
+#include "table/block_based/block_based_table_factory.h"
+#include "util/compression.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions() {
+  assert(memtable_factory.get() != nullptr);
+}
+
+AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
+    : max_write_buffer_number(options.max_write_buffer_number),
+      min_write_buffer_number_to_merge(
+          options.min_write_buffer_number_to_merge),
+      max_write_buffer_number_to_maintain(
+          options.max_write_buffer_number_to_maintain),
+      max_write_buffer_size_to_maintain(
+          options.max_write_buffer_size_to_maintain),
+      inplace_update_support(options.inplace_update_support),
+      inplace_update_num_locks(options.inplace_update_num_locks),
+      experimental_mempurge_threshold(options.experimental_mempurge_threshold),
+      inplace_callback(options.inplace_callback),
+      memtable_prefix_bloom_size_ratio(
+          options.memtable_prefix_bloom_size_ratio),
+      memtable_whole_key_filtering(options.memtable_whole_key_filtering),
+      memtable_huge_page_size(options.memtable_huge_page_size),
+      memtable_insert_with_hint_prefix_extractor(
+          options.memtable_insert_with_hint_prefix_extractor),
+      bloom_locality(options.bloom_locality),
+      arena_block_size(options.arena_block_size),
+      compression_per_level(options.compression_per_level),
+      num_levels(options.num_levels),
+      level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger),
+      level0_stop_writes_trigger(options.level0_stop_writes_trigger),
+      target_file_size_base(options.target_file_size_base),
+      target_file_size_multiplier(options.target_file_size_multiplier),
+      level_compaction_dynamic_level_bytes(
+          options.level_compaction_dynamic_level_bytes),
+      max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier),
+      max_bytes_for_level_multiplier_additional(
+          options.max_bytes_for_level_multiplier_additional),
+      max_compaction_bytes(options.max_compaction_bytes),
+      ignore_max_compaction_bytes_for_input(
+          options.ignore_max_compaction_bytes_for_input),
+      soft_pending_compaction_bytes_limit(
+          options.soft_pending_compaction_bytes_limit),
+      hard_pending_compaction_bytes_limit(
+          options.hard_pending_compaction_bytes_limit),
+      compaction_style(options.compaction_style),
+      compaction_pri(options.compaction_pri),
+      compaction_options_universal(options.compaction_options_universal),
+      compaction_options_fifo(options.compaction_options_fifo),
+      max_sequential_skip_in_iterations(
+          options.max_sequential_skip_in_iterations),
+      memtable_factory(options.memtable_factory),
+      table_properties_collector_factories(
+          options.table_properties_collector_factories),
+      max_successive_merges(options.max_successive_merges),
+      optimize_filters_for_hits(options.optimize_filters_for_hits),
+      paranoid_file_checks(options.paranoid_file_checks),
+      force_consistency_checks(options.force_consistency_checks),
+      report_bg_io_stats(options.report_bg_io_stats),
+      ttl(options.ttl),
+      periodic_compaction_seconds(options.periodic_compaction_seconds),
+      sample_for_compression(options.sample_for_compression),
+      preclude_last_level_data_seconds(
+          options.preclude_last_level_data_seconds),
+      preserve_internal_time_seconds(options.preserve_internal_time_seconds),
+      enable_blob_files(options.enable_blob_files),
+      min_blob_size(options.min_blob_size),
+      blob_file_size(options.blob_file_size),
+      blob_compression_type(options.blob_compression_type),
+      enable_blob_garbage_collection(options.enable_blob_garbage_collection),
+      blob_garbage_collection_age_cutoff(
+          options.blob_garbage_collection_age_cutoff),
+      blob_garbage_collection_force_threshold(
+          options.blob_garbage_collection_force_threshold),
+      blob_compaction_readahead_size(options.blob_compaction_readahead_size),
+      blob_file_starting_level(options.blob_file_starting_level),
+      blob_cache(options.blob_cache),
+      prepopulate_blob_cache(options.prepopulate_blob_cache),
+      persist_user_defined_timestamps(options.persist_user_defined_timestamps) {
+  assert(memtable_factory.get() != nullptr);
+  if (max_bytes_for_level_multiplier_additional.size() <
+      static_cast<unsigned int>(num_levels)) {
+    max_bytes_for_level_multiplier_additional.resize(num_levels, 1);
+  }
+}
+
+ColumnFamilyOptions::ColumnFamilyOptions()
+    : compression(Snappy_Supported() ? kSnappyCompression : kNoCompression),
+      table_factory(
+          std::shared_ptr<TableFactory>(new BlockBasedTableFactory())) {}
+
+ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
+    : ColumnFamilyOptions(*static_cast<const ColumnFamilyOptions*>(&options)) {}
+
+DBOptions::DBOptions() {}
+DBOptions::DBOptions(const Options& options)
+    : DBOptions(*static_cast<const DBOptions*>(&options)) {}
+
+void DBOptions::Dump(Logger* log) const {
+    ImmutableDBOptions(*this).Dump(log);
+    MutableDBOptions(*this).Dump(log);
+}  // DBOptions::Dump
+
+void ColumnFamilyOptions::Dump(Logger* log) const {
+  ROCKS_LOG_HEADER(log, "              Options.comparator: %s",
+                   comparator->Name());
+  if (comparator->timestamp_size() > 0) {
+    ROCKS_LOG_HEADER(
+        log, "              Options.persist_user_defined_timestamps: %s",
+        persist_user_defined_timestamps ? "true" : "false");
+  }
+  ROCKS_LOG_HEADER(log, "          Options.merge_operator: %s",
+                   merge_operator ? merge_operator->Name() : "None");
+  ROCKS_LOG_HEADER(log, "       Options.compaction_filter: %s",
+                   compaction_filter ? compaction_filter->Name() : "None");
+  ROCKS_LOG_HEADER(
+      log, "       Options.compaction_filter_factory: %s",
+      compaction_filter_factory ? compaction_filter_factory->Name() : "None");
+  ROCKS_LOG_HEADER(
+      log, " Options.sst_partitioner_factory: %s",
+      sst_partitioner_factory ? sst_partitioner_factory->Name() : "None");
+  ROCKS_LOG_HEADER(log, "        Options.memtable_factory: %s",
+                   memtable_factory->Name());
+  ROCKS_LOG_HEADER(log, "           Options.table_factory: %s",
+                   table_factory->Name());
+  ROCKS_LOG_HEADER(log, "           table_factory options: %s",
+                   table_factory->GetPrintableOptions().c_str());
+  ROCKS_LOG_HEADER(log, "       Options.write_buffer_size: %" ROCKSDB_PRIszt,
+                   write_buffer_size);
+  ROCKS_LOG_HEADER(log, " Options.max_write_buffer_number: %d",
+                   max_write_buffer_number);
+  if (!compression_per_level.empty()) {
+    for (unsigned int i = 0; i < compression_per_level.size(); i++) {
+      ROCKS_LOG_HEADER(
+          log, "       Options.compression[%d]: %s", i,
+          CompressionTypeToString(compression_per_level[i]).c_str());
+    }
+    } else {
+      ROCKS_LOG_HEADER(log, "         Options.compression: %s",
+                       CompressionTypeToString(compression).c_str());
+    }
+    ROCKS_LOG_HEADER(
+        log, "                 Options.bottommost_compression: %s",
+        bottommost_compression == kDisableCompressionOption
+            ? "Disabled"
+            : CompressionTypeToString(bottommost_compression).c_str());
+    ROCKS_LOG_HEADER(
+        log, "      Options.prefix_extractor: %s",
+        prefix_extractor == nullptr ? "nullptr" : prefix_extractor->Name());
+    ROCKS_LOG_HEADER(log,
+                     "  Options.memtable_insert_with_hint_prefix_extractor: %s",
+                     memtable_insert_with_hint_prefix_extractor == nullptr
+                         ? "nullptr"
+                         : memtable_insert_with_hint_prefix_extractor->Name());
+    ROCKS_LOG_HEADER(log, "            Options.num_levels: %d", num_levels);
+    ROCKS_LOG_HEADER(log, "       Options.min_write_buffer_number_to_merge: %d",
+                     min_write_buffer_number_to_merge);
+    ROCKS_LOG_HEADER(log, "    Options.max_write_buffer_number_to_maintain: %d",
+                     max_write_buffer_number_to_maintain);
+    ROCKS_LOG_HEADER(log,
+                     "    Options.max_write_buffer_size_to_maintain: %" PRIu64,
+                     max_write_buffer_size_to_maintain);
+    ROCKS_LOG_HEADER(
+        log, "           Options.bottommost_compression_opts.window_bits: %d",
+        bottommost_compression_opts.window_bits);
+    ROCKS_LOG_HEADER(
+        log, "                 Options.bottommost_compression_opts.level: %d",
+        bottommost_compression_opts.level);
+    ROCKS_LOG_HEADER(
+        log, "              Options.bottommost_compression_opts.strategy: %d",
+        bottommost_compression_opts.strategy);
+    ROCKS_LOG_HEADER(
+        log,
+        "        Options.bottommost_compression_opts.max_dict_bytes: "
+        "%" PRIu32,
+        bottommost_compression_opts.max_dict_bytes);
+    ROCKS_LOG_HEADER(
+        log,
+        "        Options.bottommost_compression_opts.zstd_max_train_bytes: "
+        "%" PRIu32,
+        bottommost_compression_opts.zstd_max_train_bytes);
+    ROCKS_LOG_HEADER(
+        log,
+        "        Options.bottommost_compression_opts.parallel_threads: "
+        "%" PRIu32,
+        bottommost_compression_opts.parallel_threads);
+    ROCKS_LOG_HEADER(
+        log, "                 Options.bottommost_compression_opts.enabled: %s",
+        bottommost_compression_opts.enabled ? "true" : "false");
+    ROCKS_LOG_HEADER(
+        log,
+        "        Options.bottommost_compression_opts.max_dict_buffer_bytes: "
+        "%" PRIu64,
+        bottommost_compression_opts.max_dict_buffer_bytes);
+    ROCKS_LOG_HEADER(
+        log,
+        "        Options.bottommost_compression_opts.use_zstd_dict_trainer: %s",
+        bottommost_compression_opts.use_zstd_dict_trainer ? "true" : "false");
+    ROCKS_LOG_HEADER(log, "           Options.compression_opts.window_bits: %d",
+                     compression_opts.window_bits);
+    ROCKS_LOG_HEADER(log, "                 Options.compression_opts.level: %d",
+                     compression_opts.level);
+    ROCKS_LOG_HEADER(log, "              Options.compression_opts.strategy: %d",
+                     compression_opts.strategy);
+    ROCKS_LOG_HEADER(
+        log,
+        "        Options.compression_opts.max_dict_bytes: %" PRIu32,
+        compression_opts.max_dict_bytes);
+    ROCKS_LOG_HEADER(log,
+                     "        Options.compression_opts.zstd_max_train_bytes: "
+                     "%" PRIu32,
+                     compression_opts.zstd_max_train_bytes);
+    ROCKS_LOG_HEADER(
+        log, "        Options.compression_opts.use_zstd_dict_trainer: %s",
+        compression_opts.use_zstd_dict_trainer ? "true" : "false");
+    ROCKS_LOG_HEADER(log,
+                     "        Options.compression_opts.parallel_threads: "
+                     "%" PRIu32,
+                     compression_opts.parallel_threads);
+    ROCKS_LOG_HEADER(log,
+                     "                 Options.compression_opts.enabled: %s",
+                     compression_opts.enabled ? "true" : "false");
+    ROCKS_LOG_HEADER(log,
+                     "        Options.compression_opts.max_dict_buffer_bytes: "
+                     "%" PRIu64,
+                     compression_opts.max_dict_buffer_bytes);
+    ROCKS_LOG_HEADER(log, "     Options.level0_file_num_compaction_trigger: %d",
+                     level0_file_num_compaction_trigger);
+    ROCKS_LOG_HEADER(log, "         Options.level0_slowdown_writes_trigger: %d",
+                     level0_slowdown_writes_trigger);
+    ROCKS_LOG_HEADER(log, "             Options.level0_stop_writes_trigger: %d",
+                     level0_stop_writes_trigger);
+    ROCKS_LOG_HEADER(
+        log, "                  Options.target_file_size_base: %" PRIu64,
+        target_file_size_base);
+    ROCKS_LOG_HEADER(log, "            Options.target_file_size_multiplier: %d",
+                     target_file_size_multiplier);
+    ROCKS_LOG_HEADER(
+        log, "               Options.max_bytes_for_level_base: %" PRIu64,
+        max_bytes_for_level_base);
+    ROCKS_LOG_HEADER(log, "Options.level_compaction_dynamic_level_bytes: %d",
+                     level_compaction_dynamic_level_bytes);
+    ROCKS_LOG_HEADER(log, "         Options.max_bytes_for_level_multiplier: %f",
+                     max_bytes_for_level_multiplier);
+    for (size_t i = 0; i < max_bytes_for_level_multiplier_additional.size();
+         i++) {
+      ROCKS_LOG_HEADER(
+          log, "Options.max_bytes_for_level_multiplier_addtl[%" ROCKSDB_PRIszt
+               "]: %d",
+          i, max_bytes_for_level_multiplier_additional[i]);
+    }
+    ROCKS_LOG_HEADER(
+        log, "      Options.max_sequential_skip_in_iterations: %" PRIu64,
+        max_sequential_skip_in_iterations);
+    ROCKS_LOG_HEADER(
+        log, "                   Options.max_compaction_bytes: %" PRIu64,
+        max_compaction_bytes);
+    ROCKS_LOG_HEADER(log, "  Options.ignore_max_compaction_bytes_for_input: %s",
+                     ignore_max_compaction_bytes_for_input ? "true" : "false");
+    ROCKS_LOG_HEADER(
+        log,
+        "                       Options.arena_block_size: %" ROCKSDB_PRIszt,
+        arena_block_size);
+    ROCKS_LOG_HEADER(log,
+                     "  Options.soft_pending_compaction_bytes_limit: %" PRIu64,
+                     soft_pending_compaction_bytes_limit);
+    ROCKS_LOG_HEADER(log,
+                     "  Options.hard_pending_compaction_bytes_limit: %" PRIu64,
+                     hard_pending_compaction_bytes_limit);
+    ROCKS_LOG_HEADER(log, "               Options.disable_auto_compactions: %d",
+                     disable_auto_compactions);
+
+    const auto& it_compaction_style =
+        compaction_style_to_string.find(compaction_style);
+    std::string str_compaction_style;
+    if (it_compaction_style == compaction_style_to_string.end()) {
+      assert(false);
+      str_compaction_style = "unknown_" + std::to_string(compaction_style);
+    } else {
+      str_compaction_style = it_compaction_style->second;
+    }
+    ROCKS_LOG_HEADER(log,
+                     "                       Options.compaction_style: %s",
+                     str_compaction_style.c_str());
+
+    const auto& it_compaction_pri =
+        compaction_pri_to_string.find(compaction_pri);
+    std::string str_compaction_pri;
+    if (it_compaction_pri == compaction_pri_to_string.end()) {
+      assert(false);
+      str_compaction_pri = "unknown_" + std::to_string(compaction_pri);
+    } else {
+      str_compaction_pri = it_compaction_pri->second;
+    }
+    ROCKS_LOG_HEADER(log,
+                     "                         Options.compaction_pri: %s",
+                     str_compaction_pri.c_str());
+    ROCKS_LOG_HEADER(log,
+                     "Options.compaction_options_universal.size_ratio: %u",
+                     compaction_options_universal.size_ratio);
+    ROCKS_LOG_HEADER(log,
+                     "Options.compaction_options_universal.min_merge_width: %u",
+                     compaction_options_universal.min_merge_width);
+    ROCKS_LOG_HEADER(log,
+                     "Options.compaction_options_universal.max_merge_width: %u",
+                     compaction_options_universal.max_merge_width);
+    ROCKS_LOG_HEADER(
+        log,
+        "Options.compaction_options_universal."
+        "max_size_amplification_percent: %u",
+        compaction_options_universal.max_size_amplification_percent);
+    ROCKS_LOG_HEADER(
+        log,
+        "Options.compaction_options_universal.compression_size_percent: %d",
+        compaction_options_universal.compression_size_percent);
+    const auto& it_compaction_stop_style = compaction_stop_style_to_string.find(
+        compaction_options_universal.stop_style);
+    std::string str_compaction_stop_style;
+    if (it_compaction_stop_style == compaction_stop_style_to_string.end()) {
+      assert(false);
+      str_compaction_stop_style =
+          "unknown_" + std::to_string(compaction_options_universal.stop_style);
+    } else {
+      str_compaction_stop_style = it_compaction_stop_style->second;
+    }
+    ROCKS_LOG_HEADER(log,
+                     "Options.compaction_options_universal.stop_style: %s",
+                     str_compaction_stop_style.c_str());
+    ROCKS_LOG_HEADER(
+        log, "Options.compaction_options_fifo.max_table_files_size: %" PRIu64,
+        compaction_options_fifo.max_table_files_size);
+    ROCKS_LOG_HEADER(log,
+                     "Options.compaction_options_fifo.allow_compaction: %d",
+                     compaction_options_fifo.allow_compaction);
+    std::ostringstream collector_info;
+    for (const auto& collector_factory : table_properties_collector_factories) {
+      collector_info << collector_factory->ToString() << ';';
+    }
+    ROCKS_LOG_HEADER(
+        log, "                  Options.table_properties_collectors: %s",
+        collector_info.str().c_str());
+    ROCKS_LOG_HEADER(log,
+                     "                  Options.inplace_update_support: %d",
+                     inplace_update_support);
+    ROCKS_LOG_HEADER(
+        log,
+        "                Options.inplace_update_num_locks: %" ROCKSDB_PRIszt,
+        inplace_update_num_locks);
+    // TODO: easier config for bloom (maybe based on avg key/value size)
+    ROCKS_LOG_HEADER(
+        log, "              Options.memtable_prefix_bloom_size_ratio: %f",
+        memtable_prefix_bloom_size_ratio);
+    ROCKS_LOG_HEADER(log,
+                     "              Options.memtable_whole_key_filtering: %d",
+                     memtable_whole_key_filtering);
+
+    ROCKS_LOG_HEADER(log, "  Options.memtable_huge_page_size: %" ROCKSDB_PRIszt,
+                     memtable_huge_page_size);
+    ROCKS_LOG_HEADER(log,
+                     "                          Options.bloom_locality: %d",
+                     bloom_locality);
+
+    ROCKS_LOG_HEADER(
+        log,
+        "                   Options.max_successive_merges: %" ROCKSDB_PRIszt,
+        max_successive_merges);
+    ROCKS_LOG_HEADER(log,
+                     "               Options.optimize_filters_for_hits: %d",
+                     optimize_filters_for_hits);
+    ROCKS_LOG_HEADER(log, "               Options.paranoid_file_checks: %d",
+                     paranoid_file_checks);
+    ROCKS_LOG_HEADER(log, "               Options.force_consistency_checks: %d",
+                     force_consistency_checks);
+    ROCKS_LOG_HEADER(log, "               Options.report_bg_io_stats: %d",
+                     report_bg_io_stats);
+    ROCKS_LOG_HEADER(log, "                              Options.ttl: %" PRIu64,
+                     ttl);
+    ROCKS_LOG_HEADER(log,
+                     "         Options.periodic_compaction_seconds: %" PRIu64,
+                     periodic_compaction_seconds);
+    ROCKS_LOG_HEADER(log, " Options.preclude_last_level_data_seconds: %" PRIu64,
+                     preclude_last_level_data_seconds);
+    ROCKS_LOG_HEADER(log, "   Options.preserve_internal_time_seconds: %" PRIu64,
+                     preserve_internal_time_seconds);
+    ROCKS_LOG_HEADER(log, "                      Options.enable_blob_files: %s",
+                     enable_blob_files ? "true" : "false");
+    ROCKS_LOG_HEADER(
+        log, "                          Options.min_blob_size: %" PRIu64,
+        min_blob_size);
+    ROCKS_LOG_HEADER(
+        log, "                         Options.blob_file_size: %" PRIu64,
+        blob_file_size);
+    ROCKS_LOG_HEADER(log, "                  Options.blob_compression_type: %s",
+                     CompressionTypeToString(blob_compression_type).c_str());
+    ROCKS_LOG_HEADER(log, "         Options.enable_blob_garbage_collection: %s",
+                     enable_blob_garbage_collection ? "true" : "false");
+    ROCKS_LOG_HEADER(log, "     Options.blob_garbage_collection_age_cutoff: %f",
+                     blob_garbage_collection_age_cutoff);
+    ROCKS_LOG_HEADER(log, "Options.blob_garbage_collection_force_threshold: %f",
+                     blob_garbage_collection_force_threshold);
+    ROCKS_LOG_HEADER(
+        log, "         Options.blob_compaction_readahead_size: %" PRIu64,
+        blob_compaction_readahead_size);
+    ROCKS_LOG_HEADER(log, "               Options.blob_file_starting_level: %d",
+                     blob_file_starting_level);
+    if (blob_cache) {
+      ROCKS_LOG_HEADER(log, "                          Options.blob_cache: %s",
+                       blob_cache->Name());
+      ROCKS_LOG_HEADER(log, "                          blob_cache options: %s",
+                       blob_cache->GetPrintableOptions().c_str());
+      ROCKS_LOG_HEADER(
+          log, "                          blob_cache prepopulated: %s",
+          prepopulate_blob_cache == PrepopulateBlobCache::kFlushOnly
+              ? "flush only"
+              : "disabled");
+    }
+    ROCKS_LOG_HEADER(log, "Options.experimental_mempurge_threshold: %f",
+                     experimental_mempurge_threshold);
+}  // ColumnFamilyOptions::Dump
+
+void Options::Dump(Logger* log) const {
+  DBOptions::Dump(log);
+  ColumnFamilyOptions::Dump(log);
+}  // Options::Dump
+
+void Options::DumpCFOptions(Logger* log) const {
+  ColumnFamilyOptions::Dump(log);
+}  // Options::DumpCFOptions
+
+//
+// The goal of this method is to create a configuration that
+// allows an application to write all files into L0 and
+// then do a single compaction to output all files into L1.
+Options*
+Options::PrepareForBulkLoad()
+{
+  // never slowdown ingest.
+  level0_file_num_compaction_trigger = (1<<30);
+  level0_slowdown_writes_trigger = (1<<30);
+  level0_stop_writes_trigger = (1<<30);
+  soft_pending_compaction_bytes_limit = 0;
+  hard_pending_compaction_bytes_limit = 0;
+
+  // no auto compactions please. The application should issue a
+  // manual compaction after all data is loaded into L0.
+  disable_auto_compactions = true;
+  // A manual compaction run should pick all files in L0 in
+  // a single compaction run.
+  max_compaction_bytes = (static_cast<uint64_t>(1) << 60);
+
+  // It is better to have only 2 levels, otherwise a manual
+  // compaction would compact at every possible level, thereby
+  // increasing the total time needed for compactions.
+  num_levels = 2;
+
+  // Need to allow more write buffers to allow more parallism
+  // of flushes.
+  max_write_buffer_number = 6;
+  min_write_buffer_number_to_merge = 1;
+
+  // When compaction is disabled, more parallel flush threads can
+  // help with write throughput.
+  max_background_flushes = 4;
+
+  // Prevent a memtable flush to automatically promote files
+  // to L1. This is helpful so that all files that are
+  // input to the manual compaction are all at L0.
+  max_background_compactions = 2;
+
+  // The compaction would create large files in L1.
+  target_file_size_base = 256 * 1024 * 1024;
+  return this;
+}
+
+Options* Options::OptimizeForSmallDb() {
+  // 16MB block cache
+  std::shared_ptr<Cache> cache = NewLRUCache(16 << 20);
+
+  ColumnFamilyOptions::OptimizeForSmallDb(&cache);
+  DBOptions::OptimizeForSmallDb(&cache);
+  return this;
+}
+
+Options* Options::DisableExtraChecks() {
+  // See https://github.com/facebook/rocksdb/issues/9354
+  force_consistency_checks = false;
+  // Considered but no clear performance impact seen:
+  // * check_flush_compaction_key_order
+  // * paranoid_checks
+  // * flush_verify_memtable_count
+  // By current API contract, not including
+  // * verify_checksums
+  // because checking storage data integrity is a more standard practice.
+  return this;
+}
+
+Options* Options::OldDefaults(int rocksdb_major_version,
+                              int rocksdb_minor_version) {
+  ColumnFamilyOptions::OldDefaults(rocksdb_major_version,
+                                   rocksdb_minor_version);
+  DBOptions::OldDefaults(rocksdb_major_version, rocksdb_minor_version);
+  return this;
+}
+
+DBOptions* DBOptions::OldDefaults(int rocksdb_major_version,
+                                  int rocksdb_minor_version) {
+  if (rocksdb_major_version < 4 ||
+      (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) {
+    max_file_opening_threads = 1;
+    table_cache_numshardbits = 4;
+  }
+  if (rocksdb_major_version < 5 ||
+      (rocksdb_major_version == 5 && rocksdb_minor_version < 2)) {
+    delayed_write_rate = 2 * 1024U * 1024U;
+  } else if (rocksdb_major_version < 5 ||
+             (rocksdb_major_version == 5 && rocksdb_minor_version < 6)) {
+    delayed_write_rate = 16 * 1024U * 1024U;
+  }
+  max_open_files = 5000;
+  wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords;
+  return this;
+}
+
+ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults(
+    int rocksdb_major_version, int rocksdb_minor_version) {
+  if (rocksdb_major_version < 5 ||
+      (rocksdb_major_version == 5 && rocksdb_minor_version <= 18)) {
+    compaction_pri = CompactionPri::kByCompensatedSize;
+  }
+  if (rocksdb_major_version < 4 ||
+      (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) {
+    write_buffer_size = 4 << 20;
+    target_file_size_base = 2 * 1048576;
+    max_bytes_for_level_base = 10 * 1048576;
+    soft_pending_compaction_bytes_limit = 0;
+    hard_pending_compaction_bytes_limit = 0;
+  }
+  if (rocksdb_major_version < 5) {
+    level0_stop_writes_trigger = 24;
+  } else if (rocksdb_major_version == 5 && rocksdb_minor_version < 2) {
+    level0_stop_writes_trigger = 30;
+  }
+
+  return this;
+}
+
+// Optimization functions
+DBOptions* DBOptions::OptimizeForSmallDb(std::shared_ptr<Cache>* cache) {
+  max_file_opening_threads = 1;
+  max_open_files = 5000;
+
+  // Cost memtable to block cache too.
+  std::shared_ptr<ROCKSDB_NAMESPACE::WriteBufferManager> wbm =
+      std::make_shared<ROCKSDB_NAMESPACE::WriteBufferManager>(
+          0, (cache != nullptr) ? *cache : std::shared_ptr<Cache>());
+  write_buffer_manager = wbm;
+
+  return this;
+}
+
+ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForSmallDb(
+    std::shared_ptr<Cache>* cache) {
+  write_buffer_size = 2 << 20;
+  target_file_size_base = 2 * 1048576;
+  max_bytes_for_level_base = 10 * 1048576;
+  soft_pending_compaction_bytes_limit = 256 * 1048576;
+  hard_pending_compaction_bytes_limit = 1073741824ul;
+
+  BlockBasedTableOptions table_options;
+  table_options.block_cache =
+      (cache != nullptr) ? *cache : std::shared_ptr<Cache>();
+  table_options.cache_index_and_filter_blocks = true;
+  // Two level iterator to avoid LRU cache imbalance
+  table_options.index_type =
+      BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
+  table_factory.reset(new BlockBasedTableFactory(table_options));
+
+  return this;
+}
+
+ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForPointLookup(
+    uint64_t block_cache_size_mb) {
+  BlockBasedTableOptions block_based_options;
+  block_based_options.data_block_index_type =
+      BlockBasedTableOptions::kDataBlockBinaryAndHash;
+  block_based_options.data_block_hash_table_util_ratio = 0.75;
+  block_based_options.filter_policy.reset(NewBloomFilterPolicy(10));
+  block_based_options.block_cache =
+      NewLRUCache(static_cast<size_t>(block_cache_size_mb * 1024 * 1024));
+  table_factory.reset(new BlockBasedTableFactory(block_based_options));
+  memtable_prefix_bloom_size_ratio = 0.02;
+  memtable_whole_key_filtering = true;
+  return this;
+}
+
+ColumnFamilyOptions* ColumnFamilyOptions::OptimizeLevelStyleCompaction(
+    uint64_t memtable_memory_budget) {
+  write_buffer_size = static_cast<size_t>(memtable_memory_budget / 4);
+  // merge two memtables when flushing to L0
+  min_write_buffer_number_to_merge = 2;
+  // this means we'll use 50% extra memory in the worst case, but will reduce
+  // write stalls.
+  max_write_buffer_number = 6;
+  // start flushing L0->L1 as soon as possible. each file on level0 is
+  // (memtable_memory_budget / 2). This will flush level 0 when it's bigger than
+  // memtable_memory_budget.
+  level0_file_num_compaction_trigger = 2;
+  // doesn't really matter much, but we don't want to create too many files
+  target_file_size_base = memtable_memory_budget / 8;
+  // make Level1 size equal to Level0 size, so that L0->L1 compactions are fast
+  max_bytes_for_level_base = memtable_memory_budget;
+
+  // level style compaction
+  compaction_style = kCompactionStyleLevel;
+
+  // only compress levels >= 2
+  compression_per_level.resize(num_levels);
+  for (int i = 0; i < num_levels; ++i) {
+    if (i < 2) {
+      compression_per_level[i] = kNoCompression;
+    } else {
+      compression_per_level[i] =
+          LZ4_Supported()
+              ? kLZ4Compression
+              : (Snappy_Supported() ? kSnappyCompression : kNoCompression);
+    }
+  }
+  return this;
+}
+
+ColumnFamilyOptions* ColumnFamilyOptions::OptimizeUniversalStyleCompaction(
+    uint64_t memtable_memory_budget) {
+  write_buffer_size = static_cast<size_t>(memtable_memory_budget / 4);
+  // merge two memtables when flushing to L0
+  min_write_buffer_number_to_merge = 2;
+  // this means we'll use 50% extra memory in the worst case, but will reduce
+  // write stalls.
+  max_write_buffer_number = 6;
+  // universal style compaction
+  compaction_style = kCompactionStyleUniversal;
+  compaction_options_universal.compression_size_percent = 80;
+  return this;
+}
+
+DBOptions* DBOptions::IncreaseParallelism(int total_threads) {
+  max_background_jobs = total_threads;
+  env->SetBackgroundThreads(total_threads, Env::LOW);
+  env->SetBackgroundThreads(1, Env::HIGH);
+  return this;
+}
+
+ReadOptions::ReadOptions(bool _verify_checksums, bool _fill_cache)
+    : verify_checksums(_verify_checksums), fill_cache(_fill_cache) {}
+
+ReadOptions::ReadOptions(Env::IOActivity _io_activity)
+    : io_activity(_io_activity) {}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_helper.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_helper.cc
new file mode 100644
index 0000000000..abe5053d22
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_helper.cc
@@ -0,0 +1,1424 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#include "options/options_helper.h"
+
+#include <cassert>
+#include <cctype>
+#include <cstdlib>
+#include <set>
+#include <unordered_set>
+#include <vector>
+
+#include "options/cf_options.h"
+#include "options/db_options.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/flush_block_policy.h"
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/rate_limiter.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/table.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+ConfigOptions::ConfigOptions()
+    : registry(ObjectRegistry::NewInstance())
+{
+  env = Env::Default();
+}
+
+ConfigOptions::ConfigOptions(const DBOptions& db_opts) : env(db_opts.env) {
+  registry = ObjectRegistry::NewInstance();
+}
+
+Status ValidateOptions(const DBOptions& db_opts,
+                       const ColumnFamilyOptions& cf_opts) {
+  Status s;
+  auto db_cfg = DBOptionsAsConfigurable(db_opts);
+  auto cf_cfg = CFOptionsAsConfigurable(cf_opts);
+  s = db_cfg->ValidateOptions(db_opts, cf_opts);
+  if (s.ok()) s = cf_cfg->ValidateOptions(db_opts, cf_opts);
+  return s;
+}
+
+DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
+                         const MutableDBOptions& mutable_db_options) {
+  DBOptions options;
+
+  options.create_if_missing = immutable_db_options.create_if_missing;
+  options.create_missing_column_families =
+      immutable_db_options.create_missing_column_families;
+  options.error_if_exists = immutable_db_options.error_if_exists;
+  options.paranoid_checks = immutable_db_options.paranoid_checks;
+  options.flush_verify_memtable_count =
+      immutable_db_options.flush_verify_memtable_count;
+  options.track_and_verify_wals_in_manifest =
+      immutable_db_options.track_and_verify_wals_in_manifest;
+  options.verify_sst_unique_id_in_manifest =
+      immutable_db_options.verify_sst_unique_id_in_manifest;
+  options.env = immutable_db_options.env;
+  options.rate_limiter = immutable_db_options.rate_limiter;
+  options.sst_file_manager = immutable_db_options.sst_file_manager;
+  options.info_log = immutable_db_options.info_log;
+  options.info_log_level = immutable_db_options.info_log_level;
+  options.max_open_files = mutable_db_options.max_open_files;
+  options.max_file_opening_threads =
+      immutable_db_options.max_file_opening_threads;
+  options.max_total_wal_size = mutable_db_options.max_total_wal_size;
+  options.statistics = immutable_db_options.statistics;
+  options.use_fsync = immutable_db_options.use_fsync;
+  options.db_paths = immutable_db_options.db_paths;
+  options.db_log_dir = immutable_db_options.db_log_dir;
+  options.wal_dir = immutable_db_options.wal_dir;
+  options.delete_obsolete_files_period_micros =
+      mutable_db_options.delete_obsolete_files_period_micros;
+  options.max_background_jobs = mutable_db_options.max_background_jobs;
+  options.max_background_compactions =
+      mutable_db_options.max_background_compactions;
+  options.bytes_per_sync = mutable_db_options.bytes_per_sync;
+  options.wal_bytes_per_sync = mutable_db_options.wal_bytes_per_sync;
+  options.strict_bytes_per_sync = mutable_db_options.strict_bytes_per_sync;
+  options.max_subcompactions = mutable_db_options.max_subcompactions;
+  options.max_background_flushes = mutable_db_options.max_background_flushes;
+  options.max_log_file_size = immutable_db_options.max_log_file_size;
+  options.log_file_time_to_roll = immutable_db_options.log_file_time_to_roll;
+  options.keep_log_file_num = immutable_db_options.keep_log_file_num;
+  options.recycle_log_file_num = immutable_db_options.recycle_log_file_num;
+  options.max_manifest_file_size = immutable_db_options.max_manifest_file_size;
+  options.table_cache_numshardbits =
+      immutable_db_options.table_cache_numshardbits;
+  options.WAL_ttl_seconds = immutable_db_options.WAL_ttl_seconds;
+  options.WAL_size_limit_MB = immutable_db_options.WAL_size_limit_MB;
+  options.manifest_preallocation_size =
+      immutable_db_options.manifest_preallocation_size;
+  options.allow_mmap_reads = immutable_db_options.allow_mmap_reads;
+  options.allow_mmap_writes = immutable_db_options.allow_mmap_writes;
+  options.use_direct_reads = immutable_db_options.use_direct_reads;
+  options.use_direct_io_for_flush_and_compaction =
+      immutable_db_options.use_direct_io_for_flush_and_compaction;
+  options.allow_fallocate = immutable_db_options.allow_fallocate;
+  options.is_fd_close_on_exec = immutable_db_options.is_fd_close_on_exec;
+  options.stats_dump_period_sec = mutable_db_options.stats_dump_period_sec;
+  options.stats_persist_period_sec =
+      mutable_db_options.stats_persist_period_sec;
+  options.persist_stats_to_disk = immutable_db_options.persist_stats_to_disk;
+  options.stats_history_buffer_size =
+      mutable_db_options.stats_history_buffer_size;
+  options.advise_random_on_open = immutable_db_options.advise_random_on_open;
+  options.db_write_buffer_size = immutable_db_options.db_write_buffer_size;
+  options.write_buffer_manager = immutable_db_options.write_buffer_manager;
+  options.access_hint_on_compaction_start =
+      immutable_db_options.access_hint_on_compaction_start;
+  options.compaction_readahead_size =
+      mutable_db_options.compaction_readahead_size;
+  options.random_access_max_buffer_size =
+      immutable_db_options.random_access_max_buffer_size;
+  options.writable_file_max_buffer_size =
+      mutable_db_options.writable_file_max_buffer_size;
+  options.use_adaptive_mutex = immutable_db_options.use_adaptive_mutex;
+  options.listeners = immutable_db_options.listeners;
+  options.enable_thread_tracking = immutable_db_options.enable_thread_tracking;
+  options.delayed_write_rate = mutable_db_options.delayed_write_rate;
+  options.enable_pipelined_write = immutable_db_options.enable_pipelined_write;
+  options.unordered_write = immutable_db_options.unordered_write;
+  options.allow_concurrent_memtable_write =
+      immutable_db_options.allow_concurrent_memtable_write;
+  options.enable_write_thread_adaptive_yield =
+      immutable_db_options.enable_write_thread_adaptive_yield;
+  options.max_write_batch_group_size_bytes =
+      immutable_db_options.max_write_batch_group_size_bytes;
+  options.write_thread_max_yield_usec =
+      immutable_db_options.write_thread_max_yield_usec;
+  options.write_thread_slow_yield_usec =
+      immutable_db_options.write_thread_slow_yield_usec;
+  options.skip_stats_update_on_db_open =
+      immutable_db_options.skip_stats_update_on_db_open;
+  options.skip_checking_sst_file_sizes_on_db_open =
+      immutable_db_options.skip_checking_sst_file_sizes_on_db_open;
+  options.wal_recovery_mode = immutable_db_options.wal_recovery_mode;
+  options.allow_2pc = immutable_db_options.allow_2pc;
+  options.row_cache = immutable_db_options.row_cache;
+  options.wal_filter = immutable_db_options.wal_filter;
+  options.fail_if_options_file_error =
+      immutable_db_options.fail_if_options_file_error;
+  options.dump_malloc_stats = immutable_db_options.dump_malloc_stats;
+  options.avoid_flush_during_recovery =
+      immutable_db_options.avoid_flush_during_recovery;
+  options.avoid_flush_during_shutdown =
+      mutable_db_options.avoid_flush_during_shutdown;
+  options.allow_ingest_behind = immutable_db_options.allow_ingest_behind;
+  options.two_write_queues = immutable_db_options.two_write_queues;
+  options.manual_wal_flush = immutable_db_options.manual_wal_flush;
+  options.wal_compression = immutable_db_options.wal_compression;
+  options.atomic_flush = immutable_db_options.atomic_flush;
+  options.avoid_unnecessary_blocking_io =
+      immutable_db_options.avoid_unnecessary_blocking_io;
+  options.log_readahead_size = immutable_db_options.log_readahead_size;
+  options.file_checksum_gen_factory =
+      immutable_db_options.file_checksum_gen_factory;
+  options.best_efforts_recovery = immutable_db_options.best_efforts_recovery;
+  options.max_bgerror_resume_count =
+      immutable_db_options.max_bgerror_resume_count;
+  options.bgerror_resume_retry_interval =
+      immutable_db_options.bgerror_resume_retry_interval;
+  options.db_host_id = immutable_db_options.db_host_id;
+  options.allow_data_in_errors = immutable_db_options.allow_data_in_errors;
+  options.checksum_handoff_file_types =
+      immutable_db_options.checksum_handoff_file_types;
+  options.lowest_used_cache_tier = immutable_db_options.lowest_used_cache_tier;
+  options.enforce_single_del_contracts =
+      immutable_db_options.enforce_single_del_contracts;
+  return options;
+}
+
+ColumnFamilyOptions BuildColumnFamilyOptions(
+    const ColumnFamilyOptions& options,
+    const MutableCFOptions& mutable_cf_options) {
+  ColumnFamilyOptions cf_opts(options);
+  UpdateColumnFamilyOptions(mutable_cf_options, &cf_opts);
+  // TODO(yhchiang): find some way to handle the following derived options
+  // * max_file_size
+  return cf_opts;
+}
+
+void UpdateColumnFamilyOptions(const MutableCFOptions& moptions,
+                               ColumnFamilyOptions* cf_opts) {
+  // Memtable related options
+  cf_opts->write_buffer_size = moptions.write_buffer_size;
+  cf_opts->max_write_buffer_number = moptions.max_write_buffer_number;
+  cf_opts->arena_block_size = moptions.arena_block_size;
+  cf_opts->memtable_prefix_bloom_size_ratio =
+      moptions.memtable_prefix_bloom_size_ratio;
+  cf_opts->memtable_whole_key_filtering = moptions.memtable_whole_key_filtering;
+  cf_opts->memtable_huge_page_size = moptions.memtable_huge_page_size;
+  cf_opts->max_successive_merges = moptions.max_successive_merges;
+  cf_opts->inplace_update_num_locks = moptions.inplace_update_num_locks;
+  cf_opts->prefix_extractor = moptions.prefix_extractor;
+  cf_opts->experimental_mempurge_threshold =
+      moptions.experimental_mempurge_threshold;
+  cf_opts->memtable_protection_bytes_per_key =
+      moptions.memtable_protection_bytes_per_key;
+  cf_opts->block_protection_bytes_per_key =
+      moptions.block_protection_bytes_per_key;
+
+  // Compaction related options
+  cf_opts->disable_auto_compactions = moptions.disable_auto_compactions;
+  cf_opts->soft_pending_compaction_bytes_limit =
+      moptions.soft_pending_compaction_bytes_limit;
+  cf_opts->hard_pending_compaction_bytes_limit =
+      moptions.hard_pending_compaction_bytes_limit;
+  cf_opts->level0_file_num_compaction_trigger =
+      moptions.level0_file_num_compaction_trigger;
+  cf_opts->level0_slowdown_writes_trigger =
+      moptions.level0_slowdown_writes_trigger;
+  cf_opts->level0_stop_writes_trigger = moptions.level0_stop_writes_trigger;
+  cf_opts->max_compaction_bytes = moptions.max_compaction_bytes;
+  cf_opts->ignore_max_compaction_bytes_for_input =
+      moptions.ignore_max_compaction_bytes_for_input;
+  cf_opts->target_file_size_base = moptions.target_file_size_base;
+  cf_opts->target_file_size_multiplier = moptions.target_file_size_multiplier;
+  cf_opts->max_bytes_for_level_base = moptions.max_bytes_for_level_base;
+  cf_opts->max_bytes_for_level_multiplier =
+      moptions.max_bytes_for_level_multiplier;
+  cf_opts->ttl = moptions.ttl;
+  cf_opts->periodic_compaction_seconds = moptions.periodic_compaction_seconds;
+
+  cf_opts->max_bytes_for_level_multiplier_additional.clear();
+  for (auto value : moptions.max_bytes_for_level_multiplier_additional) {
+    cf_opts->max_bytes_for_level_multiplier_additional.emplace_back(value);
+  }
+
+  cf_opts->compaction_options_fifo = moptions.compaction_options_fifo;
+  cf_opts->compaction_options_universal = moptions.compaction_options_universal;
+
+  // Blob file related options
+  cf_opts->enable_blob_files = moptions.enable_blob_files;
+  cf_opts->min_blob_size = moptions.min_blob_size;
+  cf_opts->blob_file_size = moptions.blob_file_size;
+  cf_opts->blob_compression_type = moptions.blob_compression_type;
+  cf_opts->enable_blob_garbage_collection =
+      moptions.enable_blob_garbage_collection;
+  cf_opts->blob_garbage_collection_age_cutoff =
+      moptions.blob_garbage_collection_age_cutoff;
+  cf_opts->blob_garbage_collection_force_threshold =
+      moptions.blob_garbage_collection_force_threshold;
+  cf_opts->blob_compaction_readahead_size =
+      moptions.blob_compaction_readahead_size;
+  cf_opts->blob_file_starting_level = moptions.blob_file_starting_level;
+  cf_opts->prepopulate_blob_cache = moptions.prepopulate_blob_cache;
+
+  // Misc options
+  cf_opts->max_sequential_skip_in_iterations =
+      moptions.max_sequential_skip_in_iterations;
+  cf_opts->check_flush_compaction_key_order =
+      moptions.check_flush_compaction_key_order;
+  cf_opts->paranoid_file_checks = moptions.paranoid_file_checks;
+  cf_opts->report_bg_io_stats = moptions.report_bg_io_stats;
+  cf_opts->compression = moptions.compression;
+  cf_opts->compression_opts = moptions.compression_opts;
+  cf_opts->bottommost_compression = moptions.bottommost_compression;
+  cf_opts->bottommost_compression_opts = moptions.bottommost_compression_opts;
+  cf_opts->sample_for_compression = moptions.sample_for_compression;
+  cf_opts->compression_per_level = moptions.compression_per_level;
+  cf_opts->last_level_temperature = moptions.last_level_temperature;
+  cf_opts->bottommost_temperature = moptions.last_level_temperature;
+}
+
+void UpdateColumnFamilyOptions(const ImmutableCFOptions& ioptions,
+                               ColumnFamilyOptions* cf_opts) {
+  cf_opts->compaction_style = ioptions.compaction_style;
+  cf_opts->compaction_pri = ioptions.compaction_pri;
+  cf_opts->comparator = ioptions.user_comparator;
+  cf_opts->merge_operator = ioptions.merge_operator;
+  cf_opts->compaction_filter = ioptions.compaction_filter;
+  cf_opts->compaction_filter_factory = ioptions.compaction_filter_factory;
+  cf_opts->min_write_buffer_number_to_merge =
+      ioptions.min_write_buffer_number_to_merge;
+  cf_opts->max_write_buffer_number_to_maintain =
+      ioptions.max_write_buffer_number_to_maintain;
+  cf_opts->max_write_buffer_size_to_maintain =
+      ioptions.max_write_buffer_size_to_maintain;
+  cf_opts->inplace_update_support = ioptions.inplace_update_support;
+  cf_opts->inplace_callback = ioptions.inplace_callback;
+  cf_opts->memtable_factory = ioptions.memtable_factory;
+  cf_opts->table_factory = ioptions.table_factory;
+  cf_opts->table_properties_collector_factories =
+      ioptions.table_properties_collector_factories;
+  cf_opts->bloom_locality = ioptions.bloom_locality;
+  cf_opts->level_compaction_dynamic_level_bytes =
+      ioptions.level_compaction_dynamic_level_bytes;
+  cf_opts->level_compaction_dynamic_file_size =
+      ioptions.level_compaction_dynamic_file_size;
+  cf_opts->num_levels = ioptions.num_levels;
+  cf_opts->optimize_filters_for_hits = ioptions.optimize_filters_for_hits;
+  cf_opts->force_consistency_checks = ioptions.force_consistency_checks;
+  cf_opts->memtable_insert_with_hint_prefix_extractor =
+      ioptions.memtable_insert_with_hint_prefix_extractor;
+  cf_opts->cf_paths = ioptions.cf_paths;
+  cf_opts->compaction_thread_limiter = ioptions.compaction_thread_limiter;
+  cf_opts->sst_partitioner_factory = ioptions.sst_partitioner_factory;
+  cf_opts->blob_cache = ioptions.blob_cache;
+  cf_opts->preclude_last_level_data_seconds =
+      ioptions.preclude_last_level_data_seconds;
+  cf_opts->preserve_internal_time_seconds =
+      ioptions.preserve_internal_time_seconds;
+  cf_opts->persist_user_defined_timestamps =
+      ioptions.persist_user_defined_timestamps;
+
+  // TODO(yhchiang): find some way to handle the following derived options
+  // * max_file_size
+}
+
+std::map<CompactionStyle, std::string>
+    OptionsHelper::compaction_style_to_string = {
+        {kCompactionStyleLevel, "kCompactionStyleLevel"},
+        {kCompactionStyleUniversal, "kCompactionStyleUniversal"},
+        {kCompactionStyleFIFO, "kCompactionStyleFIFO"},
+        {kCompactionStyleNone, "kCompactionStyleNone"}};
+
+std::map<CompactionPri, std::string> OptionsHelper::compaction_pri_to_string = {
+    {kByCompensatedSize, "kByCompensatedSize"},
+    {kOldestLargestSeqFirst, "kOldestLargestSeqFirst"},
+    {kOldestSmallestSeqFirst, "kOldestSmallestSeqFirst"},
+    {kMinOverlappingRatio, "kMinOverlappingRatio"},
+    {kRoundRobin, "kRoundRobin"}};
+
+std::map<CompactionStopStyle, std::string>
+    OptionsHelper::compaction_stop_style_to_string = {
+        {kCompactionStopStyleSimilarSize, "kCompactionStopStyleSimilarSize"},
+        {kCompactionStopStyleTotalSize, "kCompactionStopStyleTotalSize"}};
+
+std::map<Temperature, std::string> OptionsHelper::temperature_to_string = {
+    {Temperature::kUnknown, "kUnknown"},
+    {Temperature::kHot, "kHot"},
+    {Temperature::kWarm, "kWarm"},
+    {Temperature::kCold, "kCold"}};
+
+std::unordered_map<std::string, ChecksumType>
+    OptionsHelper::checksum_type_string_map = {{"kNoChecksum", kNoChecksum},
+                                               {"kCRC32c", kCRC32c},
+                                               {"kxxHash", kxxHash},
+                                               {"kxxHash64", kxxHash64},
+                                               {"kXXH3", kXXH3}};
+
+std::unordered_map<std::string, CompressionType>
+    OptionsHelper::compression_type_string_map = {
+        {"kNoCompression", kNoCompression},
+        {"kSnappyCompression", kSnappyCompression},
+        {"kZlibCompression", kZlibCompression},
+        {"kBZip2Compression", kBZip2Compression},
+        {"kLZ4Compression", kLZ4Compression},
+        {"kLZ4HCCompression", kLZ4HCCompression},
+        {"kXpressCompression", kXpressCompression},
+        {"kZSTD", kZSTD},
+        {"kZSTDNotFinalCompression", kZSTDNotFinalCompression},
+        {"kDisableCompressionOption", kDisableCompressionOption}};
+
+std::vector<CompressionType> GetSupportedCompressions() {
+  // std::set internally to deduplicate potential name aliases
+  std::set<CompressionType> supported_compressions;
+  for (const auto& comp_to_name : OptionsHelper::compression_type_string_map) {
+    CompressionType t = comp_to_name.second;
+    if (t != kDisableCompressionOption && CompressionTypeSupported(t)) {
+      supported_compressions.insert(t);
+    }
+  }
+  return std::vector<CompressionType>(supported_compressions.begin(),
+                                      supported_compressions.end());
+}
+
+std::vector<CompressionType> GetSupportedDictCompressions() {
+  std::set<CompressionType> dict_compression_types;
+  for (const auto& comp_to_name : OptionsHelper::compression_type_string_map) {
+    CompressionType t = comp_to_name.second;
+    if (t != kDisableCompressionOption && DictCompressionTypeSupported(t)) {
+      dict_compression_types.insert(t);
+    }
+  }
+  return std::vector<CompressionType>(dict_compression_types.begin(),
+                                      dict_compression_types.end());
+}
+
+std::vector<ChecksumType> GetSupportedChecksums() {
+  std::set<ChecksumType> checksum_types;
+  for (const auto& e : OptionsHelper::checksum_type_string_map) {
+    checksum_types.insert(e.second);
+  }
+  return std::vector<ChecksumType>(checksum_types.begin(),
+                                   checksum_types.end());
+}
+
+static bool ParseOptionHelper(void* opt_address, const OptionType& opt_type,
+                              const std::string& value) {
+  switch (opt_type) {
+    case OptionType::kBoolean:
+      *static_cast<bool*>(opt_address) = ParseBoolean("", value);
+      break;
+    case OptionType::kInt:
+      *static_cast<int*>(opt_address) = ParseInt(value);
+      break;
+    case OptionType::kInt32T:
+      *static_cast<int32_t*>(opt_address) = ParseInt32(value);
+      break;
+    case OptionType::kInt64T:
+      PutUnaligned(static_cast<int64_t*>(opt_address), ParseInt64(value));
+      break;
+    case OptionType::kUInt:
+      *static_cast<unsigned int*>(opt_address) = ParseUint32(value);
+      break;
+    case OptionType::kUInt8T:
+      *static_cast<uint8_t*>(opt_address) = ParseUint8(value);
+      break;
+    case OptionType::kUInt32T:
+      *static_cast<uint32_t*>(opt_address) = ParseUint32(value);
+      break;
+    case OptionType::kUInt64T:
+      PutUnaligned(static_cast<uint64_t*>(opt_address), ParseUint64(value));
+      break;
+    case OptionType::kSizeT:
+      PutUnaligned(static_cast<size_t*>(opt_address), ParseSizeT(value));
+      break;
+    case OptionType::kString:
+      *static_cast<std::string*>(opt_address) = value;
+      break;
+    case OptionType::kDouble:
+      *static_cast<double*>(opt_address) = ParseDouble(value);
+      break;
+    case OptionType::kCompactionStyle:
+      return ParseEnum<CompactionStyle>(
+          compaction_style_string_map, value,
+          static_cast<CompactionStyle*>(opt_address));
+    case OptionType::kCompactionPri:
+      return ParseEnum<CompactionPri>(compaction_pri_string_map, value,
+                                      static_cast<CompactionPri*>(opt_address));
+    case OptionType::kCompressionType:
+      return ParseEnum<CompressionType>(
+          compression_type_string_map, value,
+          static_cast<CompressionType*>(opt_address));
+    case OptionType::kChecksumType:
+      return ParseEnum<ChecksumType>(checksum_type_string_map, value,
+                                     static_cast<ChecksumType*>(opt_address));
+    case OptionType::kEncodingType:
+      return ParseEnum<EncodingType>(encoding_type_string_map, value,
+                                     static_cast<EncodingType*>(opt_address));
+    case OptionType::kCompactionStopStyle:
+      return ParseEnum<CompactionStopStyle>(
+          compaction_stop_style_string_map, value,
+          static_cast<CompactionStopStyle*>(opt_address));
+    case OptionType::kEncodedString: {
+      std::string* output_addr = static_cast<std::string*>(opt_address);
+      (Slice(value)).DecodeHex(output_addr);
+      break;
+    }
+    case OptionType::kTemperature: {
+      return ParseEnum<Temperature>(temperature_string_map, value,
+                                    static_cast<Temperature*>(opt_address));
+    }
+    default:
+      return false;
+  }
+  return true;
+}
+
+bool SerializeSingleOptionHelper(const void* opt_address,
+                                 const OptionType opt_type,
+                                 std::string* value) {
+  assert(value);
+  switch (opt_type) {
+    case OptionType::kBoolean:
+      *value = *(static_cast<const bool*>(opt_address)) ? "true" : "false";
+      break;
+    case OptionType::kInt:
+      *value = std::to_string(*(static_cast<const int*>(opt_address)));
+      break;
+    case OptionType::kInt32T:
+      *value = std::to_string(*(static_cast<const int32_t*>(opt_address)));
+      break;
+    case OptionType::kInt64T:
+      {
+        int64_t v;
+        GetUnaligned(static_cast<const int64_t*>(opt_address), &v);
+        *value = std::to_string(v);
+      }
+      break;
+    case OptionType::kUInt:
+      *value = std::to_string(*(static_cast<const unsigned int*>(opt_address)));
+      break;
+    case OptionType::kUInt8T:
+      *value = std::to_string(*(static_cast<const uint8_t*>(opt_address)));
+      break;
+    case OptionType::kUInt32T:
+      *value = std::to_string(*(static_cast<const uint32_t*>(opt_address)));
+      break;
+    case OptionType::kUInt64T:
+      {
+        uint64_t v;
+        GetUnaligned(static_cast<const uint64_t*>(opt_address), &v);
+        *value = std::to_string(v);
+      }
+      break;
+    case OptionType::kSizeT:
+      {
+        size_t v;
+        GetUnaligned(static_cast<const size_t*>(opt_address), &v);
+        *value = std::to_string(v);
+      }
+      break;
+    case OptionType::kDouble:
+      *value = std::to_string(*(static_cast<const double*>(opt_address)));
+      break;
+    case OptionType::kString:
+      *value =
+          EscapeOptionString(*(static_cast<const std::string*>(opt_address)));
+      break;
+    case OptionType::kCompactionStyle:
+      return SerializeEnum<CompactionStyle>(
+          compaction_style_string_map,
+          *(static_cast<const CompactionStyle*>(opt_address)), value);
+    case OptionType::kCompactionPri:
+      return SerializeEnum<CompactionPri>(
+          compaction_pri_string_map,
+          *(static_cast<const CompactionPri*>(opt_address)), value);
+    case OptionType::kCompressionType:
+      return SerializeEnum<CompressionType>(
+          compression_type_string_map,
+          *(static_cast<const CompressionType*>(opt_address)), value);
+      break;
+    case OptionType::kChecksumType:
+      return SerializeEnum<ChecksumType>(
+          checksum_type_string_map,
+          *static_cast<const ChecksumType*>(opt_address), value);
+    case OptionType::kEncodingType:
+      return SerializeEnum<EncodingType>(
+          encoding_type_string_map,
+          *static_cast<const EncodingType*>(opt_address), value);
+    case OptionType::kCompactionStopStyle:
+      return SerializeEnum<CompactionStopStyle>(
+          compaction_stop_style_string_map,
+          *static_cast<const CompactionStopStyle*>(opt_address), value);
+    case OptionType::kEncodedString: {
+      const auto* ptr = static_cast<const std::string*>(opt_address);
+      *value = (Slice(*ptr)).ToString(true);
+      break;
+    }
+    case OptionType::kTemperature: {
+      return SerializeEnum<Temperature>(
+          temperature_string_map, *static_cast<const Temperature*>(opt_address),
+          value);
+    }
+    default:
+      return false;
+  }
+  return true;
+}
+
+template <typename T>
+Status ConfigureFromMap(
+    const ConfigOptions& config_options,
+    const std::unordered_map<std::string, std::string>& opt_map,
+    const std::string& option_name, Configurable* config, T* new_opts) {
+  Status s = config->ConfigureFromMap(config_options, opt_map);
+  if (s.ok()) {
+    *new_opts = *(config->GetOptions<T>(option_name));
+  }
+  return s;
+}
+
+
+Status StringToMap(const std::string& opts_str,
+                   std::unordered_map<std::string, std::string>* opts_map) {
+  assert(opts_map);
+  // Example:
+  //   opts_str = "write_buffer_size=1024;max_write_buffer_number=2;"
+  //              "nested_opt={opt1=1;opt2=2};max_bytes_for_level_base=100"
+  size_t pos = 0;
+  std::string opts = trim(opts_str);
+  // If the input string starts and ends with "{...}", strip off the brackets
+  while (opts.size() > 2 && opts[0] == '{' && opts[opts.size() - 1] == '}') {
+    opts = trim(opts.substr(1, opts.size() - 2));
+  }
+
+  while (pos < opts.size()) {
+    size_t eq_pos = opts.find_first_of("={};", pos);
+    if (eq_pos == std::string::npos) {
+      return Status::InvalidArgument("Mismatched key value pair, '=' expected");
+    } else if (opts[eq_pos] != '=') {
+      return Status::InvalidArgument("Unexpected char in key");
+    }
+
+    std::string key = trim(opts.substr(pos, eq_pos - pos));
+    if (key.empty()) {
+      return Status::InvalidArgument("Empty key found");
+    }
+
+    std::string value;
+    Status s = OptionTypeInfo::NextToken(opts, ';', eq_pos + 1, &pos, &value);
+    if (!s.ok()) {
+      return s;
+    } else {
+      (*opts_map)[key] = value;
+      if (pos == std::string::npos) {
+        break;
+      } else {
+        pos++;
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
+
+Status GetStringFromDBOptions(std::string* opt_string,
+                              const DBOptions& db_options,
+                              const std::string& delimiter) {
+  ConfigOptions config_options(db_options);
+  config_options.delimiter = delimiter;
+  return GetStringFromDBOptions(config_options, db_options, opt_string);
+}
+
+Status GetStringFromDBOptions(const ConfigOptions& config_options,
+                              const DBOptions& db_options,
+                              std::string* opt_string) {
+  assert(opt_string);
+  opt_string->clear();
+  auto config = DBOptionsAsConfigurable(db_options);
+  return config->GetOptionString(config_options, opt_string);
+}
+
+
+Status GetStringFromColumnFamilyOptions(std::string* opt_string,
+                                        const ColumnFamilyOptions& cf_options,
+                                        const std::string& delimiter) {
+  ConfigOptions config_options;
+  config_options.delimiter = delimiter;
+  return GetStringFromColumnFamilyOptions(config_options, cf_options,
+                                          opt_string);
+}
+
+Status GetStringFromColumnFamilyOptions(const ConfigOptions& config_options,
+                                        const ColumnFamilyOptions& cf_options,
+                                        std::string* opt_string) {
+  const auto config = CFOptionsAsConfigurable(cf_options);
+  return config->GetOptionString(config_options, opt_string);
+}
+
+Status GetStringFromCompressionType(std::string* compression_str,
+                                    CompressionType compression_type) {
+  bool ok = SerializeEnum<CompressionType>(compression_type_string_map,
+                                           compression_type, compression_str);
+  if (ok) {
+    return Status::OK();
+  } else {
+    return Status::InvalidArgument("Invalid compression types");
+  }
+}
+
+Status GetColumnFamilyOptionsFromMap(
+    const ConfigOptions& config_options,
+    const ColumnFamilyOptions& base_options,
+    const std::unordered_map<std::string, std::string>& opts_map,
+    ColumnFamilyOptions* new_options) {
+  assert(new_options);
+
+  *new_options = base_options;
+
+  const auto config = CFOptionsAsConfigurable(base_options);
+  Status s = ConfigureFromMap<ColumnFamilyOptions>(
+      config_options, opts_map, OptionsHelper::kCFOptionsName, config.get(),
+      new_options);
+  // Translate any errors (NotFound, NotSupported, to InvalidArgument
+  if (s.ok() || s.IsInvalidArgument()) {
+    return s;
+  } else {
+    return Status::InvalidArgument(s.getState());
+  }
+}
+
+Status GetColumnFamilyOptionsFromString(const ConfigOptions& config_options,
+                                        const ColumnFamilyOptions& base_options,
+                                        const std::string& opts_str,
+                                        ColumnFamilyOptions* new_options) {
+  std::unordered_map<std::string, std::string> opts_map;
+  Status s = StringToMap(opts_str, &opts_map);
+  if (!s.ok()) {
+    *new_options = base_options;
+    return s;
+  }
+  return GetColumnFamilyOptionsFromMap(config_options, base_options, opts_map,
+                                       new_options);
+}
+
+Status GetDBOptionsFromMap(
+    const ConfigOptions& config_options, const DBOptions& base_options,
+    const std::unordered_map<std::string, std::string>& opts_map,
+    DBOptions* new_options) {
+  assert(new_options);
+  *new_options = base_options;
+  auto config = DBOptionsAsConfigurable(base_options);
+  Status s = ConfigureFromMap<DBOptions>(config_options, opts_map,
+                                         OptionsHelper::kDBOptionsName,
+                                         config.get(), new_options);
+  // Translate any errors (NotFound, NotSupported, to InvalidArgument
+  if (s.ok() || s.IsInvalidArgument()) {
+    return s;
+  } else {
+    return Status::InvalidArgument(s.getState());
+  }
+}
+
+Status GetDBOptionsFromString(const ConfigOptions& config_options,
+                              const DBOptions& base_options,
+                              const std::string& opts_str,
+                              DBOptions* new_options) {
+  std::unordered_map<std::string, std::string> opts_map;
+  Status s = StringToMap(opts_str, &opts_map);
+  if (!s.ok()) {
+    *new_options = base_options;
+    return s;
+  }
+  return GetDBOptionsFromMap(config_options, base_options, opts_map,
+                             new_options);
+}
+
+Status GetOptionsFromString(const Options& base_options,
+                            const std::string& opts_str, Options* new_options) {
+  ConfigOptions config_options(base_options);
+  config_options.input_strings_escaped = false;
+  config_options.ignore_unknown_options = false;
+
+  return GetOptionsFromString(config_options, base_options, opts_str,
+                              new_options);
+}
+
+Status GetOptionsFromString(const ConfigOptions& config_options,
+                            const Options& base_options,
+                            const std::string& opts_str, Options* new_options) {
+  ColumnFamilyOptions new_cf_options;
+  std::unordered_map<std::string, std::string> unused_opts;
+  std::unordered_map<std::string, std::string> opts_map;
+
+  assert(new_options);
+  *new_options = base_options;
+  Status s = StringToMap(opts_str, &opts_map);
+  if (!s.ok()) {
+    return s;
+  }
+  auto config = DBOptionsAsConfigurable(base_options);
+  s = config->ConfigureFromMap(config_options, opts_map, &unused_opts);
+
+  if (s.ok()) {
+    DBOptions* new_db_options =
+        config->GetOptions<DBOptions>(OptionsHelper::kDBOptionsName);
+    if (!unused_opts.empty()) {
+      s = GetColumnFamilyOptionsFromMap(config_options, base_options,
+                                        unused_opts, &new_cf_options);
+      if (s.ok()) {
+        *new_options = Options(*new_db_options, new_cf_options);
+      }
+    } else {
+      *new_options = Options(*new_db_options, base_options);
+    }
+  }
+  // Translate any errors (NotFound, NotSupported, to InvalidArgument
+  if (s.ok() || s.IsInvalidArgument()) {
+    return s;
+  } else {
+    return Status::InvalidArgument(s.getState());
+  }
+}
+
+std::unordered_map<std::string, EncodingType>
+    OptionsHelper::encoding_type_string_map = {{"kPlain", kPlain},
+                                               {"kPrefix", kPrefix}};
+
+std::unordered_map<std::string, CompactionStyle>
+    OptionsHelper::compaction_style_string_map = {
+        {"kCompactionStyleLevel", kCompactionStyleLevel},
+        {"kCompactionStyleUniversal", kCompactionStyleUniversal},
+        {"kCompactionStyleFIFO", kCompactionStyleFIFO},
+        {"kCompactionStyleNone", kCompactionStyleNone}};
+
+std::unordered_map<std::string, CompactionPri>
+    OptionsHelper::compaction_pri_string_map = {
+        {"kByCompensatedSize", kByCompensatedSize},
+        {"kOldestLargestSeqFirst", kOldestLargestSeqFirst},
+        {"kOldestSmallestSeqFirst", kOldestSmallestSeqFirst},
+        {"kMinOverlappingRatio", kMinOverlappingRatio},
+        {"kRoundRobin", kRoundRobin}};
+
+std::unordered_map<std::string, CompactionStopStyle>
+    OptionsHelper::compaction_stop_style_string_map = {
+        {"kCompactionStopStyleSimilarSize", kCompactionStopStyleSimilarSize},
+        {"kCompactionStopStyleTotalSize", kCompactionStopStyleTotalSize}};
+
+std::unordered_map<std::string, Temperature>
+    OptionsHelper::temperature_string_map = {
+        {"kUnknown", Temperature::kUnknown},
+        {"kHot", Temperature::kHot},
+        {"kWarm", Temperature::kWarm},
+        {"kCold", Temperature::kCold}};
+
+std::unordered_map<std::string, PrepopulateBlobCache>
+    OptionsHelper::prepopulate_blob_cache_string_map = {
+        {"kDisable", PrepopulateBlobCache::kDisable},
+        {"kFlushOnly", PrepopulateBlobCache::kFlushOnly}};
+
+Status OptionTypeInfo::NextToken(const std::string& opts, char delimiter,
+                                 size_t pos, size_t* end, std::string* token) {
+  while (pos < opts.size() && isspace(opts[pos])) {
+    ++pos;
+  }
+  // Empty value at the end
+  if (pos >= opts.size()) {
+    *token = "";
+    *end = std::string::npos;
+    return Status::OK();
+  } else if (opts[pos] == '{') {
+    int count = 1;
+    size_t brace_pos = pos + 1;
+    while (brace_pos < opts.size()) {
+      if (opts[brace_pos] == '{') {
+        ++count;
+      } else if (opts[brace_pos] == '}') {
+        --count;
+        if (count == 0) {
+          break;
+        }
+      }
+      ++brace_pos;
+    }
+    // found the matching closing brace
+    if (count == 0) {
+      *token = trim(opts.substr(pos + 1, brace_pos - pos - 1));
+      // skip all whitespace and move to the next delimiter
+      // brace_pos points to the next position after the matching '}'
+      pos = brace_pos + 1;
+      while (pos < opts.size() && isspace(opts[pos])) {
+        ++pos;
+      }
+      if (pos < opts.size() && opts[pos] != delimiter) {
+        return Status::InvalidArgument("Unexpected chars after nested options");
+      }
+      *end = pos;
+    } else {
+      return Status::InvalidArgument(
+          "Mismatched curly braces for nested options");
+    }
+  } else {
+    *end = opts.find(delimiter, pos);
+    if (*end == std::string::npos) {
+      // It either ends with a trailing semi-colon or the last key-value pair
+      *token = trim(opts.substr(pos));
+    } else {
+      *token = trim(opts.substr(pos, *end - pos));
+    }
+  }
+  return Status::OK();
+}
+
+Status OptionTypeInfo::Parse(const ConfigOptions& config_options,
+                             const std::string& opt_name,
+                             const std::string& value, void* opt_ptr) const {
+  if (IsDeprecated()) {
+    return Status::OK();
+  }
+  try {
+    const std::string& opt_value = config_options.input_strings_escaped
+                                       ? UnescapeOptionString(value)
+                                       : value;
+
+    if (opt_ptr == nullptr) {
+      return Status::NotFound("Could not find option", opt_name);
+    } else if (parse_func_ != nullptr) {
+      ConfigOptions copy = config_options;
+      copy.invoke_prepare_options = false;
+      void* opt_addr = GetOffset(opt_ptr);
+      return parse_func_(copy, opt_name, opt_value, opt_addr);
+    } else if (ParseOptionHelper(GetOffset(opt_ptr), type_, opt_value)) {
+      return Status::OK();
+    } else if (IsConfigurable()) {
+      // The option is <config>.<name>
+      Configurable* config = AsRawPointer<Configurable>(opt_ptr);
+      if (opt_value.empty()) {
+        return Status::OK();
+      } else if (config == nullptr) {
+        return Status::NotFound("Could not find configurable: ", opt_name);
+      } else {
+        ConfigOptions copy = config_options;
+        copy.ignore_unknown_options = false;
+        copy.invoke_prepare_options = false;
+        if (opt_value.find("=") != std::string::npos) {
+          return config->ConfigureFromString(copy, opt_value);
+        } else {
+          return config->ConfigureOption(copy, opt_name, opt_value);
+        }
+      }
+    } else if (IsByName()) {
+      return Status::NotSupported("Deserializing the option " + opt_name +
+                                  " is not supported");
+    } else {
+      return Status::InvalidArgument("Error parsing:", opt_name);
+    }
+  } catch (std::exception& e) {
+    return Status::InvalidArgument("Error parsing " + opt_name + ":" +
+                                   std::string(e.what()));
+  }
+}
+
+Status OptionTypeInfo::ParseType(
+    const ConfigOptions& config_options, const std::string& opts_str,
+    const std::unordered_map<std::string, OptionTypeInfo>& type_map,
+    void* opt_addr, std::unordered_map<std::string, std::string>* unused) {
+  std::unordered_map<std::string, std::string> opts_map;
+  Status status = StringToMap(opts_str, &opts_map);
+  if (!status.ok()) {
+    return status;
+  } else {
+    return ParseType(config_options, opts_map, type_map, opt_addr, unused);
+  }
+}
+
+Status OptionTypeInfo::ParseType(
+    const ConfigOptions& config_options,
+    const std::unordered_map<std::string, std::string>& opts_map,
+    const std::unordered_map<std::string, OptionTypeInfo>& type_map,
+    void* opt_addr, std::unordered_map<std::string, std::string>* unused) {
+  for (const auto& opts_iter : opts_map) {
+    std::string opt_name;
+    const auto* opt_info = Find(opts_iter.first, type_map, &opt_name);
+    if (opt_info != nullptr) {
+      Status status =
+          opt_info->Parse(config_options, opt_name, opts_iter.second, opt_addr);
+      if (!status.ok()) {
+        return status;
+      }
+    } else if (unused != nullptr) {
+      (*unused)[opts_iter.first] = opts_iter.second;
+    } else if (!config_options.ignore_unknown_options) {
+      return Status::NotFound("Unrecognized option", opts_iter.first);
+    }
+  }
+  return Status::OK();
+}
+
+Status OptionTypeInfo::ParseStruct(
+    const ConfigOptions& config_options, const std::string& struct_name,
+    const std::unordered_map<std::string, OptionTypeInfo>* struct_map,
+    const std::string& opt_name, const std::string& opt_value, void* opt_addr) {
+  assert(struct_map);
+  Status status;
+  if (opt_name == struct_name || EndsWith(opt_name, "." + struct_name)) {
+    // This option represents the entire struct
+    std::unordered_map<std::string, std::string> unused;
+    status =
+        ParseType(config_options, opt_value, *struct_map, opt_addr, &unused);
+    if (status.ok() && !unused.empty()) {
+      status = Status::InvalidArgument(
+          "Unrecognized option", struct_name + "." + unused.begin()->first);
+    }
+  } else if (StartsWith(opt_name, struct_name + ".")) {
+    // This option represents a nested field in the struct (e.g, struct.field)
+    std::string elem_name;
+    const auto opt_info =
+        Find(opt_name.substr(struct_name.size() + 1), *struct_map, &elem_name);
+    if (opt_info != nullptr) {
+      status = opt_info->Parse(config_options, elem_name, opt_value, opt_addr);
+    } else {
+      status = Status::InvalidArgument("Unrecognized option", opt_name);
+    }
+  } else {
+    // This option represents a field in the struct (e.g. field)
+    std::string elem_name;
+    const auto opt_info = Find(opt_name, *struct_map, &elem_name);
+    if (opt_info != nullptr) {
+      status = opt_info->Parse(config_options, elem_name, opt_value, opt_addr);
+    } else {
+      status = Status::InvalidArgument("Unrecognized option",
+                                       struct_name + "." + opt_name);
+    }
+  }
+  return status;
+}
+
+Status OptionTypeInfo::Serialize(const ConfigOptions& config_options,
+                                 const std::string& opt_name,
+                                 const void* const opt_ptr,
+                                 std::string* opt_value) const {
+  // If the option is no longer used in rocksdb and marked as deprecated,
+  // we skip it in the serialization.
+  if (opt_ptr == nullptr || IsDeprecated()) {
+    return Status::OK();
+  } else if (IsEnabled(OptionTypeFlags::kDontSerialize)) {
+    return Status::NotSupported("Cannot serialize option: ", opt_name);
+  } else if (serialize_func_ != nullptr) {
+    const void* opt_addr = GetOffset(opt_ptr);
+    return serialize_func_(config_options, opt_name, opt_addr, opt_value);
+  } else if (IsCustomizable()) {
+    const Customizable* custom = AsRawPointer<Customizable>(opt_ptr);
+    opt_value->clear();
+    if (custom == nullptr) {
+      // We do not have a custom object to serialize.
+      // If the option is not mutable and we are doing only mutable options,
+      // we return an empty string (which will cause the option not to be
+      // printed). Otherwise, we return the "nullptr" string, which will result
+      // in "option=nullptr" being printed.
+      if (IsMutable() || !config_options.mutable_options_only) {
+        *opt_value = kNullptrString;
+      } else {
+        *opt_value = "";
+      }
+    } else if (IsEnabled(OptionTypeFlags::kStringNameOnly) &&
+               !config_options.IsDetailed()) {
+      if (!config_options.mutable_options_only || IsMutable()) {
+        *opt_value = custom->GetId();
+      }
+    } else {
+      ConfigOptions embedded = config_options;
+      embedded.delimiter = ";";
+      // If this option is mutable, everything inside it should be considered
+      // mutable
+      if (IsMutable()) {
+        embedded.mutable_options_only = false;
+      }
+      std::string value = custom->ToString(embedded);
+      if (!embedded.mutable_options_only ||
+          value.find("=") != std::string::npos) {
+        *opt_value = value;
+      } else {
+        *opt_value = "";
+      }
+    }
+    return Status::OK();
+  } else if (IsConfigurable()) {
+    const Configurable* config = AsRawPointer<Configurable>(opt_ptr);
+    if (config != nullptr) {
+      ConfigOptions embedded = config_options;
+      embedded.delimiter = ";";
+      *opt_value = config->ToString(embedded);
+    }
+    return Status::OK();
+  } else if (config_options.mutable_options_only && !IsMutable()) {
+    return Status::OK();
+  } else if (SerializeSingleOptionHelper(GetOffset(opt_ptr), type_,
+                                         opt_value)) {
+    return Status::OK();
+  } else {
+    return Status::InvalidArgument("Cannot serialize option: ", opt_name);
+  }
+}
+
+Status OptionTypeInfo::SerializeType(
+    const ConfigOptions& config_options,
+    const std::unordered_map<std::string, OptionTypeInfo>& type_map,
+    const void* opt_addr, std::string* result) {
+  Status status;
+  for (const auto& iter : type_map) {
+    std::string single;
+    const auto& opt_info = iter.second;
+    if (opt_info.ShouldSerialize()) {
+      status =
+          opt_info.Serialize(config_options, iter.first, opt_addr, &single);
+      if (!status.ok()) {
+        return status;
+      } else {
+        result->append(iter.first + "=" + single + config_options.delimiter);
+      }
+    }
+  }
+  return status;
+}
+
+Status OptionTypeInfo::SerializeStruct(
+    const ConfigOptions& config_options, const std::string& struct_name,
+    const std::unordered_map<std::string, OptionTypeInfo>* struct_map,
+    const std::string& opt_name, const void* opt_addr, std::string* value) {
+  assert(struct_map);
+  Status status;
+  if (EndsWith(opt_name, struct_name)) {
+    // We are going to write the struct as "{ prop1=value1; prop2=value2;}.
+    // Set the delimiter to ";" so that the everything will be on one line.
+    ConfigOptions embedded = config_options;
+    embedded.delimiter = ";";
+
+    // This option represents the entire struct
+    std::string result;
+    status = SerializeType(embedded, *struct_map, opt_addr, &result);
+    if (!status.ok()) {
+      return status;
+    } else {
+      *value = "{" + result + "}";
+    }
+  } else if (StartsWith(opt_name, struct_name + ".")) {
+    // This option represents a nested field in the struct (e.g, struct.field)
+    std::string elem_name;
+    const auto opt_info =
+        Find(opt_name.substr(struct_name.size() + 1), *struct_map, &elem_name);
+    if (opt_info != nullptr) {
+      status = opt_info->Serialize(config_options, elem_name, opt_addr, value);
+    } else {
+      status = Status::InvalidArgument("Unrecognized option", opt_name);
+    }
+  } else {
+    // This option represents a field in the struct (e.g. field)
+    std::string elem_name;
+    const auto opt_info = Find(opt_name, *struct_map, &elem_name);
+    if (opt_info == nullptr) {
+      status = Status::InvalidArgument("Unrecognized option", opt_name);
+    } else if (opt_info->ShouldSerialize()) {
+      status = opt_info->Serialize(config_options, opt_name + "." + elem_name,
+                                   opt_addr, value);
+    }
+  }
+  return status;
+}
+
+template <typename T>
+bool IsOptionEqual(const void* offset1, const void* offset2) {
+  return (*static_cast<const T*>(offset1) == *static_cast<const T*>(offset2));
+}
+
+static bool AreEqualDoubles(const double a, const double b) {
+  return (fabs(a - b) < 0.00001);
+}
+
+static bool AreOptionsEqual(OptionType type, const void* this_offset,
+                            const void* that_offset) {
+  switch (type) {
+    case OptionType::kBoolean:
+      return IsOptionEqual<bool>(this_offset, that_offset);
+    case OptionType::kInt:
+      return IsOptionEqual<int>(this_offset, that_offset);
+    case OptionType::kUInt:
+      return IsOptionEqual<unsigned int>(this_offset, that_offset);
+    case OptionType::kInt32T:
+      return IsOptionEqual<int32_t>(this_offset, that_offset);
+    case OptionType::kInt64T: {
+      int64_t v1, v2;
+      GetUnaligned(static_cast<const int64_t*>(this_offset), &v1);
+      GetUnaligned(static_cast<const int64_t*>(that_offset), &v2);
+      return (v1 == v2);
+    }
+    case OptionType::kUInt8T:
+      return IsOptionEqual<uint8_t>(this_offset, that_offset);
+    case OptionType::kUInt32T:
+      return IsOptionEqual<uint32_t>(this_offset, that_offset);
+    case OptionType::kUInt64T: {
+      uint64_t v1, v2;
+      GetUnaligned(static_cast<const uint64_t*>(this_offset), &v1);
+      GetUnaligned(static_cast<const uint64_t*>(that_offset), &v2);
+      return (v1 == v2);
+    }
+    case OptionType::kSizeT: {
+      size_t v1, v2;
+      GetUnaligned(static_cast<const size_t*>(this_offset), &v1);
+      GetUnaligned(static_cast<const size_t*>(that_offset), &v2);
+      return (v1 == v2);
+    }
+    case OptionType::kString:
+      return IsOptionEqual<std::string>(this_offset, that_offset);
+    case OptionType::kDouble:
+      return AreEqualDoubles(*static_cast<const double*>(this_offset),
+                             *static_cast<const double*>(that_offset));
+    case OptionType::kCompactionStyle:
+      return IsOptionEqual<CompactionStyle>(this_offset, that_offset);
+    case OptionType::kCompactionStopStyle:
+      return IsOptionEqual<CompactionStopStyle>(this_offset, that_offset);
+    case OptionType::kCompactionPri:
+      return IsOptionEqual<CompactionPri>(this_offset, that_offset);
+    case OptionType::kCompressionType:
+      return IsOptionEqual<CompressionType>(this_offset, that_offset);
+    case OptionType::kChecksumType:
+      return IsOptionEqual<ChecksumType>(this_offset, that_offset);
+    case OptionType::kEncodingType:
+      return IsOptionEqual<EncodingType>(this_offset, that_offset);
+    case OptionType::kEncodedString:
+      return IsOptionEqual<std::string>(this_offset, that_offset);
+    case OptionType::kTemperature:
+      return IsOptionEqual<Temperature>(this_offset, that_offset);
+    default:
+      return false;
+  }  // End switch
+}
+
+bool OptionTypeInfo::AreEqual(const ConfigOptions& config_options,
+                              const std::string& opt_name,
+                              const void* const this_ptr,
+                              const void* const that_ptr,
+                              std::string* mismatch) const {
+  auto level = GetSanityLevel();
+  if (!config_options.IsCheckEnabled(level)) {
+    return true;  // If the sanity level is not being checked, skip it
+  }
+  if (this_ptr == nullptr || that_ptr == nullptr) {
+    if (this_ptr == that_ptr) {
+      return true;
+    }
+  } else if (equals_func_ != nullptr) {
+    const void* this_addr = GetOffset(this_ptr);
+    const void* that_addr = GetOffset(that_ptr);
+    if (equals_func_(config_options, opt_name, this_addr, that_addr,
+                     mismatch)) {
+      return true;
+    }
+  } else {
+    const void* this_addr = GetOffset(this_ptr);
+    const void* that_addr = GetOffset(that_ptr);
+    if (AreOptionsEqual(type_, this_addr, that_addr)) {
+      return true;
+    } else if (IsConfigurable()) {
+      const auto* this_config = AsRawPointer<Configurable>(this_ptr);
+      const auto* that_config = AsRawPointer<Configurable>(that_ptr);
+      if (this_config == that_config) {
+        return true;
+      } else if (this_config != nullptr && that_config != nullptr) {
+        std::string bad_name;
+        bool matches;
+        if (level < config_options.sanity_level) {
+          ConfigOptions copy = config_options;
+          copy.sanity_level = level;
+          matches = this_config->AreEquivalent(copy, that_config, &bad_name);
+        } else {
+          matches = this_config->AreEquivalent(config_options, that_config,
+                                               &bad_name);
+        }
+        if (!matches) {
+          *mismatch = opt_name + "." + bad_name;
+        }
+        return matches;
+      }
+    }
+  }
+  if (mismatch->empty()) {
+    *mismatch = opt_name;
+  }
+  return false;
+}
+
+bool OptionTypeInfo::TypesAreEqual(
+    const ConfigOptions& config_options,
+    const std::unordered_map<std::string, OptionTypeInfo>& type_map,
+    const void* this_addr, const void* that_addr, std::string* mismatch) {
+  for (const auto& iter : type_map) {
+    const auto& opt_info = iter.second;
+    if (!opt_info.AreEqual(config_options, iter.first, this_addr, that_addr,
+                           mismatch)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool OptionTypeInfo::StructsAreEqual(
+    const ConfigOptions& config_options, const std::string& struct_name,
+    const std::unordered_map<std::string, OptionTypeInfo>* struct_map,
+    const std::string& opt_name, const void* this_addr, const void* that_addr,
+    std::string* mismatch) {
+  assert(struct_map);
+  bool matches = true;
+  std::string result;
+  if (EndsWith(opt_name, struct_name)) {
+    // This option represents the entire struct
+    matches = TypesAreEqual(config_options, *struct_map, this_addr, that_addr,
+                            &result);
+    if (!matches) {
+      *mismatch = struct_name + "." + result;
+      return false;
+    }
+  } else if (StartsWith(opt_name, struct_name + ".")) {
+    // This option represents a nested field in the struct (e.g, struct.field)
+    std::string elem_name;
+    const auto opt_info =
+        Find(opt_name.substr(struct_name.size() + 1), *struct_map, &elem_name);
+    assert(opt_info);
+    if (opt_info == nullptr) {
+      *mismatch = opt_name;
+      matches = false;
+    } else if (!opt_info->AreEqual(config_options, elem_name, this_addr,
+                                   that_addr, &result)) {
+      matches = false;
+      *mismatch = struct_name + "." + result;
+    }
+  } else {
+    // This option represents a field in the struct (e.g. field)
+    std::string elem_name;
+    const auto opt_info = Find(opt_name, *struct_map, &elem_name);
+    assert(opt_info);
+    if (opt_info == nullptr) {
+      *mismatch = struct_name + "." + opt_name;
+      matches = false;
+    } else if (!opt_info->AreEqual(config_options, elem_name, this_addr,
+                                   that_addr, &result)) {
+      matches = false;
+      *mismatch = struct_name + "." + result;
+    }
+  }
+  return matches;
+}
+
+bool MatchesOptionsTypeFromMap(
+    const ConfigOptions& config_options,
+    const std::unordered_map<std::string, OptionTypeInfo>& type_map,
+    const void* const this_ptr, const void* const that_ptr,
+    std::string* mismatch) {
+  for (auto& pair : type_map) {
+    // We skip checking deprecated variables as they might
+    // contain random values since they might not be initialized
+    if (config_options.IsCheckEnabled(pair.second.GetSanityLevel())) {
+      if (!pair.second.AreEqual(config_options, pair.first, this_ptr, that_ptr,
+                                mismatch) &&
+          !pair.second.AreEqualByName(config_options, pair.first, this_ptr,
+                                      that_ptr)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool OptionTypeInfo::AreEqualByName(const ConfigOptions& config_options,
+                                    const std::string& opt_name,
+                                    const void* const this_ptr,
+                                    const void* const that_ptr) const {
+  if (IsByName()) {
+    std::string that_value;
+    if (Serialize(config_options, opt_name, that_ptr, &that_value).ok()) {
+      return AreEqualByName(config_options, opt_name, this_ptr, that_value);
+    }
+  }
+  return false;
+}
+
+bool OptionTypeInfo::AreEqualByName(const ConfigOptions& config_options,
+                                    const std::string& opt_name,
+                                    const void* const opt_ptr,
+                                    const std::string& that_value) const {
+  std::string this_value;
+  if (!IsByName()) {
+    return false;
+  } else if (!Serialize(config_options, opt_name, opt_ptr, &this_value).ok()) {
+    return false;
+  } else if (IsEnabled(OptionVerificationType::kByNameAllowFromNull)) {
+    if (that_value == kNullptrString) {
+      return true;
+    }
+  } else if (IsEnabled(OptionVerificationType::kByNameAllowNull)) {
+    if (that_value == kNullptrString) {
+      return true;
+    }
+  }
+  return (this_value == that_value);
+}
+
+Status OptionTypeInfo::Prepare(const ConfigOptions& config_options,
+                               const std::string& name, void* opt_ptr) const {
+  if (ShouldPrepare()) {
+    if (prepare_func_ != nullptr) {
+      void* opt_addr = GetOffset(opt_ptr);
+      return prepare_func_(config_options, name, opt_addr);
+    } else if (IsConfigurable()) {
+      Configurable* config = AsRawPointer<Configurable>(opt_ptr);
+      if (config != nullptr) {
+        return config->PrepareOptions(config_options);
+      } else if (!CanBeNull()) {
+        return Status::NotFound("Missing configurable object", name);
+      }
+    }
+  }
+  return Status::OK();
+}
+
+Status OptionTypeInfo::Validate(const DBOptions& db_opts,
+                                const ColumnFamilyOptions& cf_opts,
+                                const std::string& name,
+                                const void* opt_ptr) const {
+  if (ShouldValidate()) {
+    if (validate_func_ != nullptr) {
+      const void* opt_addr = GetOffset(opt_ptr);
+      return validate_func_(db_opts, cf_opts, name, opt_addr);
+    } else if (IsConfigurable()) {
+      const Configurable* config = AsRawPointer<Configurable>(opt_ptr);
+      if (config != nullptr) {
+        return config->ValidateOptions(db_opts, cf_opts);
+      } else if (!CanBeNull()) {
+        return Status::NotFound("Missing configurable object", name);
+      }
+    }
+  }
+  return Status::OK();
+}
+
+const OptionTypeInfo* OptionTypeInfo::Find(
+    const std::string& opt_name,
+    const std::unordered_map<std::string, OptionTypeInfo>& opt_map,
+    std::string* elem_name) {
+  const auto iter = opt_map.find(opt_name);  // Look up the value in the map
+  if (iter != opt_map.end()) {               // Found the option in the map
+    *elem_name = opt_name;                   // Return the name
+    return &(iter->second);  // Return the contents of the iterator
+  } else {
+    auto idx = opt_name.find(".");              // Look for a separator
+    if (idx > 0 && idx != std::string::npos) {  // We found a separator
+      auto siter =
+          opt_map.find(opt_name.substr(0, idx));  // Look for the short name
+      if (siter != opt_map.end()) {               // We found the short name
+        if (siter->second.IsStruct() ||           // If the object is a struct
+            siter->second.IsConfigurable()) {     // or a Configurable
+          *elem_name = opt_name.substr(idx + 1);  // Return the rest
+          return &(siter->second);  // Return the contents of the iterator
+        }
+      }
+    }
+  }
+  return nullptr;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_helper.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_helper.h
new file mode 100644
index 0000000000..76e312a63c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_helper.h
@@ -0,0 +1,116 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "rocksdb/advanced_options.h"
+#include "rocksdb/options.h"
+#include "rocksdb/status.h"
+#include "rocksdb/table.h"
+
+namespace ROCKSDB_NAMESPACE {
+struct ColumnFamilyOptions;
+struct ConfigOptions;
+struct DBOptions;
+struct ImmutableCFOptions;
+struct ImmutableDBOptions;
+struct MutableDBOptions;
+struct MutableCFOptions;
+struct Options;
+
+std::vector<CompressionType> GetSupportedCompressions();
+
+std::vector<CompressionType> GetSupportedDictCompressions();
+
+std::vector<ChecksumType> GetSupportedChecksums();
+
+inline bool IsSupportedChecksumType(ChecksumType type) {
+  // Avoid annoying compiler warning-as-error (-Werror=type-limits)
+  auto min = kNoChecksum;
+  auto max = kXXH3;
+  return type >= min && type <= max;
+}
+
+// Checks that the combination of DBOptions and ColumnFamilyOptions are valid
+Status ValidateOptions(const DBOptions& db_opts,
+                       const ColumnFamilyOptions& cf_opts);
+
+DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
+                         const MutableDBOptions& mutable_db_options);
+
+ColumnFamilyOptions BuildColumnFamilyOptions(
+    const ColumnFamilyOptions& ioptions,
+    const MutableCFOptions& mutable_cf_options);
+
+void UpdateColumnFamilyOptions(const ImmutableCFOptions& ioptions,
+                               ColumnFamilyOptions* cf_opts);
+void UpdateColumnFamilyOptions(const MutableCFOptions& moptions,
+                               ColumnFamilyOptions* cf_opts);
+
+std::unique_ptr<Configurable> DBOptionsAsConfigurable(
+    const MutableDBOptions& opts);
+std::unique_ptr<Configurable> DBOptionsAsConfigurable(
+    const DBOptions& opts,
+    const std::unordered_map<std::string, std::string>* opt_map = nullptr);
+std::unique_ptr<Configurable> CFOptionsAsConfigurable(
+    const MutableCFOptions& opts);
+std::unique_ptr<Configurable> CFOptionsAsConfigurable(
+    const ColumnFamilyOptions& opts,
+    const std::unordered_map<std::string, std::string>* opt_map = nullptr);
+
+extern Status StringToMap(
+    const std::string& opts_str,
+    std::unordered_map<std::string, std::string>* opts_map);
+
+struct OptionsHelper {
+  static const std::string kCFOptionsName /*= "ColumnFamilyOptions"*/;
+  static const std::string kDBOptionsName /*= "DBOptions" */;
+  static std::map<CompactionStyle, std::string> compaction_style_to_string;
+  static std::map<CompactionPri, std::string> compaction_pri_to_string;
+  static std::map<CompactionStopStyle, std::string>
+      compaction_stop_style_to_string;
+  static std::map<Temperature, std::string> temperature_to_string;
+  static std::unordered_map<std::string, ChecksumType> checksum_type_string_map;
+  static std::unordered_map<std::string, CompressionType>
+      compression_type_string_map;
+  static std::unordered_map<std::string, PrepopulateBlobCache>
+      prepopulate_blob_cache_string_map;
+  static std::unordered_map<std::string, CompactionStopStyle>
+      compaction_stop_style_string_map;
+  static std::unordered_map<std::string, EncodingType> encoding_type_string_map;
+  static std::unordered_map<std::string, CompactionStyle>
+      compaction_style_string_map;
+  static std::unordered_map<std::string, CompactionPri>
+      compaction_pri_string_map;
+  static std::unordered_map<std::string, Temperature> temperature_string_map;
+};
+
+// Some aliasing
+static auto& compaction_style_to_string =
+    OptionsHelper::compaction_style_to_string;
+static auto& compaction_pri_to_string = OptionsHelper::compaction_pri_to_string;
+static auto& compaction_stop_style_to_string =
+    OptionsHelper::compaction_stop_style_to_string;
+static auto& temperature_to_string = OptionsHelper::temperature_to_string;
+static auto& checksum_type_string_map = OptionsHelper::checksum_type_string_map;
+static auto& compaction_stop_style_string_map =
+    OptionsHelper::compaction_stop_style_string_map;
+static auto& compression_type_string_map =
+    OptionsHelper::compression_type_string_map;
+static auto& encoding_type_string_map = OptionsHelper::encoding_type_string_map;
+static auto& compaction_style_string_map =
+    OptionsHelper::compaction_style_string_map;
+static auto& compaction_pri_string_map =
+    OptionsHelper::compaction_pri_string_map;
+static auto& temperature_string_map = OptionsHelper::temperature_string_map;
+static auto& prepopulate_blob_cache_string_map =
+    OptionsHelper::prepopulate_blob_cache_string_map;
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_parser.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_parser.cc
new file mode 100644
index 0000000000..a8c855d6e2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_parser.cc
@@ -0,0 +1,736 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "options/options_parser.h"
+
+#include <cmath>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "file/line_file_reader.h"
+#include "file/writable_file_writer.h"
+#include "options/cf_options.h"
+#include "options/db_options.h"
+#include "options/options_helper.h"
+#include "port/port.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/db.h"
+#include "rocksdb/utilities/options_type.h"
+#include "test_util/sync_point.h"
+#include "util/cast_util.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+static const std::string option_file_header =
+    "# This is a RocksDB option file.\n"
+    "#\n"
+    "# For detailed file format spec, please refer to the example file\n"
+    "# in examples/rocksdb_option_file_example.ini\n"
+    "#\n"
+    "\n";
+
+Status PersistRocksDBOptions(const DBOptions& db_opt,
+                             const std::vector<std::string>& cf_names,
+                             const std::vector<ColumnFamilyOptions>& cf_opts,
+                             const std::string& file_name, FileSystem* fs) {
+  ConfigOptions
+      config_options;  // Use default for escaped(true) and check (exact)
+  config_options.delimiter = "\n  ";
+  // Do not invoke PrepareOptions when we are doing validation.
+  config_options.invoke_prepare_options = false;
+  // If a readahead size was set in the input options, use it
+  if (db_opt.log_readahead_size > 0) {
+    config_options.file_readahead_size = db_opt.log_readahead_size;
+  }
+  return PersistRocksDBOptions(config_options, db_opt, cf_names, cf_opts,
+                               file_name, fs);
+}
+
+Status PersistRocksDBOptions(const ConfigOptions& config_options_in,
+                             const DBOptions& db_opt,
+                             const std::vector<std::string>& cf_names,
+                             const std::vector<ColumnFamilyOptions>& cf_opts,
+                             const std::string& file_name, FileSystem* fs) {
+  ConfigOptions config_options = config_options_in;
+  config_options.delimiter = "\n  ";  // Override the default to nl
+
+  TEST_SYNC_POINT("PersistRocksDBOptions:start");
+  if (cf_names.size() != cf_opts.size()) {
+    return Status::InvalidArgument(
+        "cf_names.size() and cf_opts.size() must be the same");
+  }
+  std::unique_ptr<FSWritableFile> wf;
+
+  Status s =
+      fs->NewWritableFile(file_name, FileOptions(), &wf, nullptr);
+  if (!s.ok()) {
+    return s;
+  }
+  std::unique_ptr<WritableFileWriter> writable;
+  writable.reset(new WritableFileWriter(std::move(wf), file_name, EnvOptions(),
+                                        nullptr /* statistics */));
+  TEST_SYNC_POINT("PersistRocksDBOptions:create");
+
+  std::string options_file_content;
+
+  s = writable->Append(
+      option_file_header + "[" + opt_section_titles[kOptionSectionVersion] +
+      "]\n"
+      "  rocksdb_version=" +
+      std::to_string(ROCKSDB_MAJOR) + "." + std::to_string(ROCKSDB_MINOR) +
+      "." + std::to_string(ROCKSDB_PATCH) + "\n");
+  if (s.ok()) {
+    s = writable->Append(
+        "  options_file_version=" + std::to_string(ROCKSDB_OPTION_FILE_MAJOR) +
+        "." + std::to_string(ROCKSDB_OPTION_FILE_MINOR) + "\n");
+  }
+  if (s.ok()) {
+    s = writable->Append("\n[" + opt_section_titles[kOptionSectionDBOptions] +
+                         "]\n  ");
+  }
+
+  if (s.ok()) {
+    s = GetStringFromDBOptions(config_options, db_opt, &options_file_content);
+  }
+  if (s.ok()) {
+    s = writable->Append(options_file_content + "\n");
+  }
+
+  for (size_t i = 0; s.ok() && i < cf_opts.size(); ++i) {
+    // CFOptions section
+    s = writable->Append("\n[" + opt_section_titles[kOptionSectionCFOptions] +
+                         " \"" + EscapeOptionString(cf_names[i]) + "\"]\n  ");
+    if (s.ok()) {
+      s = GetStringFromColumnFamilyOptions(config_options, cf_opts[i],
+                                           &options_file_content);
+    }
+    if (s.ok()) {
+      s = writable->Append(options_file_content + "\n");
+    }
+    // TableOptions section
+    auto* tf = cf_opts[i].table_factory.get();
+    if (tf != nullptr) {
+      if (s.ok()) {
+        s = writable->Append(
+            "[" + opt_section_titles[kOptionSectionTableOptions] + tf->Name() +
+            " \"" + EscapeOptionString(cf_names[i]) + "\"]\n  ");
+      }
+      if (s.ok()) {
+        options_file_content.clear();
+        s = tf->GetOptionString(config_options, &options_file_content);
+      }
+      if (s.ok()) {
+        s = writable->Append(options_file_content + "\n");
+      }
+    }
+  }
+  if (s.ok()) {
+    s = writable->Sync(true /* use_fsync */);
+  }
+  if (s.ok()) {
+    s = writable->Close();
+  }
+  TEST_SYNC_POINT("PersistRocksDBOptions:written");
+  if (s.ok()) {
+    return RocksDBOptionsParser::VerifyRocksDBOptionsFromFile(
+        config_options, db_opt, cf_names, cf_opts, file_name, fs);
+  }
+  return s;
+}
+
+RocksDBOptionsParser::RocksDBOptionsParser() { Reset(); }
+
+void RocksDBOptionsParser::Reset() {
+  db_opt_ = DBOptions();
+  db_opt_map_.clear();
+  cf_names_.clear();
+  cf_opts_.clear();
+  cf_opt_maps_.clear();
+  has_version_section_ = false;
+  has_db_options_ = false;
+  has_default_cf_options_ = false;
+  for (int i = 0; i < 3; ++i) {
+    db_version[i] = 0;
+    opt_file_version[i] = 0;
+  }
+}
+
+bool RocksDBOptionsParser::IsSection(const std::string& line) {
+  if (line.size() < 2) {
+    return false;
+  }
+  if (line[0] != '[' || line[line.size() - 1] != ']') {
+    return false;
+  }
+  return true;
+}
+
+Status RocksDBOptionsParser::ParseSection(OptionSection* section,
+                                          std::string* title,
+                                          std::string* argument,
+                                          const std::string& line,
+                                          const int line_num) {
+  *section = kOptionSectionUnknown;
+  // A section is of the form [<SectionName> "<SectionArg>"], where
+  // "<SectionArg>" is optional.
+  size_t arg_start_pos = line.find("\"");
+  size_t arg_end_pos = line.rfind("\"");
+  // The following if-then check tries to identify whether the input
+  // section has the optional section argument.
+  if (arg_start_pos != std::string::npos && arg_start_pos != arg_end_pos) {
+    *title = TrimAndRemoveComment(line.substr(1, arg_start_pos - 1), true);
+    *argument = UnescapeOptionString(
+        line.substr(arg_start_pos + 1, arg_end_pos - arg_start_pos - 1));
+  } else {
+    *title = TrimAndRemoveComment(line.substr(1, line.size() - 2), true);
+    *argument = "";
+  }
+  for (int i = 0; i < kOptionSectionUnknown; ++i) {
+    if (title->find(opt_section_titles[i]) == 0) {
+      if (i == kOptionSectionVersion || i == kOptionSectionDBOptions ||
+          i == kOptionSectionCFOptions) {
+        if (title->size() == opt_section_titles[i].size()) {
+          // if true, then it indicats equal
+          *section = static_cast<OptionSection>(i);
+          return CheckSection(*section, *argument, line_num);
+        }
+      } else if (i == kOptionSectionTableOptions) {
+        // This type of sections has a sufffix at the end of the
+        // section title
+        if (title->size() > opt_section_titles[i].size()) {
+          *section = static_cast<OptionSection>(i);
+          return CheckSection(*section, *argument, line_num);
+        }
+      }
+    }
+  }
+  return Status::InvalidArgument(std::string("Unknown section ") + line);
+}
+
+Status RocksDBOptionsParser::InvalidArgument(const int line_num,
+                                             const std::string& message) {
+  return Status::InvalidArgument(
+      "[RocksDBOptionsParser Error] ",
+      message + " (at line " + std::to_string(line_num) + ")");
+}
+
+Status RocksDBOptionsParser::ParseStatement(std::string* name,
+                                            std::string* value,
+                                            const std::string& line,
+                                            const int line_num) {
+  size_t eq_pos = line.find("=");
+  if (eq_pos == std::string::npos) {
+    return InvalidArgument(line_num, "A valid statement must have a '='.");
+  }
+
+  *name = TrimAndRemoveComment(line.substr(0, eq_pos), true);
+  *value =
+      TrimAndRemoveComment(line.substr(eq_pos + 1, line.size() - eq_pos - 1));
+  if (name->empty()) {
+    return InvalidArgument(line_num,
+                           "A valid statement must have a variable name.");
+  }
+  return Status::OK();
+}
+
+Status RocksDBOptionsParser::Parse(const std::string& file_name, FileSystem* fs,
+                                   bool ignore_unknown_options,
+                                   size_t file_readahead_size) {
+  ConfigOptions
+      config_options;  // Use default for escaped(true) and check (exact)
+  config_options.ignore_unknown_options = ignore_unknown_options;
+  if (file_readahead_size > 0) {
+    config_options.file_readahead_size = file_readahead_size;
+  }
+  return Parse(config_options, file_name, fs);
+}
+
+Status RocksDBOptionsParser::Parse(const ConfigOptions& config_options_in,
+                                   const std::string& file_name,
+                                   FileSystem* fs) {
+  Reset();
+  ConfigOptions config_options = config_options_in;
+
+  std::unique_ptr<FSSequentialFile> seq_file;
+  Status s = fs->NewSequentialFile(file_name, FileOptions(), &seq_file,
+                                   nullptr);
+  if (!s.ok()) {
+    return s;
+  }
+  LineFileReader lf_reader(std::move(seq_file), file_name,
+                           config_options.file_readahead_size);
+
+  OptionSection section = kOptionSectionUnknown;
+  std::string title;
+  std::string argument;
+  std::unordered_map<std::string, std::string> opt_map;
+  std::string line;
+  // we only support single-lined statement.
+  while (lf_reader.ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */)) {
+    int line_num = static_cast<int>(lf_reader.GetLineNumber());
+    line = TrimAndRemoveComment(line);
+    if (line.empty()) {
+      continue;
+    }
+    if (IsSection(line)) {
+      s = EndSection(config_options, section, title, argument, opt_map);
+      opt_map.clear();
+      if (!s.ok()) {
+        return s;
+      }
+
+      // If the option file is not generated by a higher minor version,
+      // there shouldn't be any unknown option.
+      if (config_options.ignore_unknown_options &&
+          section == kOptionSectionVersion) {
+        if (db_version[0] < ROCKSDB_MAJOR || (db_version[0] == ROCKSDB_MAJOR &&
+                                              db_version[1] <= ROCKSDB_MINOR)) {
+          config_options.ignore_unknown_options = false;
+        }
+      }
+
+      s = ParseSection(&section, &title, &argument, line, line_num);
+      if (!s.ok()) {
+        return s;
+      }
+    } else {
+      std::string name;
+      std::string value;
+      s = ParseStatement(&name, &value, line, line_num);
+      if (!s.ok()) {
+        return s;
+      }
+      opt_map.insert({name, value});
+    }
+  }
+  s = lf_reader.GetStatus();
+  if (!s.ok()) {
+    return s;
+  }
+
+  s = EndSection(config_options, section, title, argument, opt_map);
+  opt_map.clear();
+  if (!s.ok()) {
+    return s;
+  }
+  return ValidityCheck();
+}
+
+Status RocksDBOptionsParser::CheckSection(const OptionSection section,
+                                          const std::string& section_arg,
+                                          const int line_num) {
+  if (section == kOptionSectionDBOptions) {
+    if (has_db_options_) {
+      return InvalidArgument(
+          line_num,
+          "More than one DBOption section found in the option config file");
+    }
+    has_db_options_ = true;
+  } else if (section == kOptionSectionCFOptions) {
+    bool is_default_cf = (section_arg == kDefaultColumnFamilyName);
+    if (cf_opts_.size() == 0 && !is_default_cf) {
+      return InvalidArgument(
+          line_num,
+          "Default column family must be the first CFOptions section "
+          "in the option config file");
+    } else if (cf_opts_.size() != 0 && is_default_cf) {
+      return InvalidArgument(
+          line_num,
+          "Default column family must be the first CFOptions section "
+          "in the optio/n config file");
+    } else if (GetCFOptions(section_arg) != nullptr) {
+      return InvalidArgument(
+          line_num,
+          "Two identical column families found in option config file");
+    }
+    has_default_cf_options_ |= is_default_cf;
+  } else if (section == kOptionSectionTableOptions) {
+    if (GetCFOptions(section_arg) == nullptr) {
+      return InvalidArgument(
+          line_num, std::string(
+                        "Does not find a matched column family name in "
+                        "TableOptions section.  Column Family Name:") +
+                        section_arg);
+    }
+  } else if (section == kOptionSectionVersion) {
+    if (has_version_section_) {
+      return InvalidArgument(
+          line_num,
+          "More than one Version section found in the option config file.");
+    }
+    has_version_section_ = true;
+  }
+  return Status::OK();
+}
+
+Status RocksDBOptionsParser::ParseVersionNumber(const std::string& ver_name,
+                                                const std::string& ver_string,
+                                                const int max_count,
+                                                int* version) {
+  int version_index = 0;
+  int current_number = 0;
+  int current_digit_count = 0;
+  bool has_dot = false;
+  for (int i = 0; i < max_count; ++i) {
+    version[i] = 0;
+  }
+  constexpr int kBufferSize = 200;
+  char buffer[kBufferSize];
+  for (size_t i = 0; i < ver_string.size(); ++i) {
+    if (ver_string[i] == '.') {
+      if (version_index >= max_count - 1) {
+        snprintf(buffer, sizeof(buffer) - 1,
+                 "A valid %s can only contains at most %d dots.",
+                 ver_name.c_str(), max_count - 1);
+        return Status::InvalidArgument(buffer);
+      }
+      if (current_digit_count == 0) {
+        snprintf(buffer, sizeof(buffer) - 1,
+                 "A valid %s must have at least one digit before each dot.",
+                 ver_name.c_str());
+        return Status::InvalidArgument(buffer);
+      }
+      version[version_index++] = current_number;
+      current_number = 0;
+      current_digit_count = 0;
+      has_dot = true;
+    } else if (isdigit(ver_string[i])) {
+      current_number = current_number * 10 + (ver_string[i] - '0');
+      current_digit_count++;
+    } else {
+      snprintf(buffer, sizeof(buffer) - 1,
+               "A valid %s can only contains dots and numbers.",
+               ver_name.c_str());
+      return Status::InvalidArgument(buffer);
+    }
+  }
+  version[version_index] = current_number;
+  if (has_dot && current_digit_count == 0) {
+    snprintf(buffer, sizeof(buffer) - 1,
+             "A valid %s must have at least one digit after each dot.",
+             ver_name.c_str());
+    return Status::InvalidArgument(buffer);
+  }
+  return Status::OK();
+}
+
+Status RocksDBOptionsParser::EndSection(
+    const ConfigOptions& config_options, const OptionSection section,
+    const std::string& section_title, const std::string& section_arg,
+    const std::unordered_map<std::string, std::string>& opt_map) {
+  Status s;
+  if (section == kOptionSectionDBOptions) {
+    s = GetDBOptionsFromMap(config_options, DBOptions(), opt_map, &db_opt_);
+    if (!s.ok()) {
+      return s;
+    }
+    db_opt_map_ = opt_map;
+  } else if (section == kOptionSectionCFOptions) {
+    // This condition should be ensured earlier in ParseSection
+    // so we make an assertion here.
+    assert(GetCFOptions(section_arg) == nullptr);
+    cf_names_.emplace_back(section_arg);
+    cf_opts_.emplace_back();
+    s = GetColumnFamilyOptionsFromMap(config_options, ColumnFamilyOptions(),
+                                      opt_map, &cf_opts_.back());
+    if (!s.ok()) {
+      return s;
+    }
+    // keep the parsed string.
+    cf_opt_maps_.emplace_back(opt_map);
+  } else if (section == kOptionSectionTableOptions) {
+    assert(GetCFOptions(section_arg) != nullptr);
+    auto* cf_opt = GetCFOptionsImpl(section_arg);
+    if (cf_opt == nullptr) {
+      return Status::InvalidArgument(
+          "The specified column family must be defined before the "
+          "TableOptions section:",
+          section_arg);
+    }
+    // Ignore error as table factory deserialization is optional
+    cf_opt->table_factory.reset();
+    s = TableFactory::CreateFromString(
+        config_options,
+        section_title.substr(
+            opt_section_titles[kOptionSectionTableOptions].size()),
+        &(cf_opt->table_factory));
+    if (s.ok() && cf_opt->table_factory != nullptr) {
+      s = cf_opt->table_factory->ConfigureFromMap(config_options, opt_map);
+      // Translate any errors (NotFound, NotSupported, to InvalidArgument
+      if (s.ok() || s.IsInvalidArgument()) {
+        return s;
+      } else {
+        return Status::InvalidArgument(s.getState());
+      }
+    } else {
+      // Return OK for not supported table factories as TableFactory
+      // Deserialization is optional.
+      cf_opt->table_factory.reset();
+      return Status::OK();
+    }
+  } else if (section == kOptionSectionVersion) {
+    for (const auto& pair : opt_map) {
+      if (pair.first == "rocksdb_version") {
+        s = ParseVersionNumber(pair.first, pair.second, 3, db_version);
+        if (!s.ok()) {
+          return s;
+        }
+      } else if (pair.first == "options_file_version") {
+        s = ParseVersionNumber(pair.first, pair.second, 2, opt_file_version);
+        if (!s.ok()) {
+          return s;
+        }
+        if (opt_file_version[0] < 1) {
+          return Status::InvalidArgument(
+              "A valid options_file_version must be at least 1.");
+        }
+      }
+    }
+  }
+  return s;
+}
+
+Status RocksDBOptionsParser::ValidityCheck() {
+  if (!has_db_options_) {
+    return Status::Corruption(
+        "A RocksDB Option file must have a single DBOptions section");
+  }
+  if (!has_default_cf_options_) {
+    return Status::Corruption(
+        "A RocksDB Option file must have a single CFOptions:default section");
+  }
+
+  return Status::OK();
+}
+
+std::string RocksDBOptionsParser::TrimAndRemoveComment(const std::string& line,
+                                                       bool trim_only) {
+  size_t start = 0;
+  size_t end = line.size();
+
+  // we only support "#" style comment
+  if (!trim_only) {
+    size_t search_pos = 0;
+    while (search_pos < line.size()) {
+      size_t comment_pos = line.find('#', search_pos);
+      if (comment_pos == std::string::npos) {
+        break;
+      }
+      if (comment_pos == 0 || line[comment_pos - 1] != '\\') {
+        end = comment_pos;
+        break;
+      }
+      search_pos = comment_pos + 1;
+    }
+  }
+
+  while (start < end && isspace(line[start]) != 0) {
+    ++start;
+  }
+
+  // start < end implies end > 0.
+  while (start < end && isspace(line[end - 1]) != 0) {
+    --end;
+  }
+
+  if (start < end) {
+    return line.substr(start, end - start);
+  }
+
+  return "";
+}
+
+Status RocksDBOptionsParser::VerifyRocksDBOptionsFromFile(
+    const ConfigOptions& config_options_in, const DBOptions& db_opt,
+    const std::vector<std::string>& cf_names,
+    const std::vector<ColumnFamilyOptions>& cf_opts,
+    const std::string& file_name, FileSystem* fs) {
+  RocksDBOptionsParser parser;
+  ConfigOptions config_options = config_options_in;
+  config_options.invoke_prepare_options =
+      false;  // No need to do a prepare for verify
+  if (config_options.sanity_level < ConfigOptions::kSanityLevelExactMatch) {
+    // If we are not doing an exact comparison, we should ignore
+    // unsupported options, as they may cause the Parse to fail
+    // (if the ObjectRegistry is not initialized)
+    config_options.ignore_unsupported_options = true;
+  }
+  Status s = parser.Parse(config_options, file_name, fs);
+  if (!s.ok()) {
+    return s;
+  }
+
+  // Verify DBOptions
+  s = VerifyDBOptions(config_options, db_opt, *parser.db_opt(),
+                      parser.db_opt_map());
+  if (!s.ok()) {
+    return s;
+  }
+
+  // Verify ColumnFamily Name
+  if (cf_names.size() != parser.cf_names()->size()) {
+    if (config_options.sanity_level >=
+        ConfigOptions::kSanityLevelLooselyCompatible) {
+      return Status::InvalidArgument(
+          "[RocksDBOptionParser Error] The persisted options does not have "
+          "the same number of column family names as the db instance.");
+    } else if (cf_opts.size() > parser.cf_opts()->size()) {
+      return Status::InvalidArgument(
+          "[RocksDBOptionsParser Error]",
+          "The persisted options file has less number of column family "
+          "names than that of the specified one.");
+    }
+  }
+  for (size_t i = 0; i < cf_names.size(); ++i) {
+    if (cf_names[i] != parser.cf_names()->at(i)) {
+      return Status::InvalidArgument(
+          "[RocksDBOptionParser Error] The persisted options and the db"
+          "instance does not have the same name for column family ",
+          std::to_string(i));
+    }
+  }
+
+  // Verify Column Family Options
+  if (cf_opts.size() != parser.cf_opts()->size()) {
+    if (config_options.sanity_level >=
+        ConfigOptions::kSanityLevelLooselyCompatible) {
+      return Status::InvalidArgument(
+          "[RocksDBOptionsParser Error]",
+          "The persisted options does not have the same number of "
+          "column families as the db instance.");
+    } else if (cf_opts.size() > parser.cf_opts()->size()) {
+      return Status::InvalidArgument(
+          "[RocksDBOptionsParser Error]",
+          "The persisted options file has less number of column families "
+          "than that of the specified number.");
+    }
+  }
+  for (size_t i = 0; i < cf_opts.size(); ++i) {
+    s = VerifyCFOptions(config_options, cf_opts[i], parser.cf_opts()->at(i),
+                        &(parser.cf_opt_maps()->at(i)));
+    if (!s.ok()) {
+      return s;
+    }
+    s = VerifyTableFactory(config_options, cf_opts[i].table_factory.get(),
+                           parser.cf_opts()->at(i).table_factory.get());
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  return Status::OK();
+}
+
+Status RocksDBOptionsParser::VerifyDBOptions(
+    const ConfigOptions& config_options, const DBOptions& base_opt,
+    const DBOptions& file_opt,
+    const std::unordered_map<std::string, std::string>* opt_map) {
+  auto base_config = DBOptionsAsConfigurable(base_opt, opt_map);
+  auto file_config = DBOptionsAsConfigurable(file_opt, opt_map);
+  std::string mismatch;
+  if (!base_config->AreEquivalent(config_options, file_config.get(),
+                                  &mismatch)) {
+    const size_t kBufferSize = 2048;
+    char buffer[kBufferSize];
+    std::string base_value;
+    std::string file_value;
+    int offset = snprintf(buffer, sizeof(buffer),
+                          "[RocksDBOptionsParser]: "
+                          "failed the verification on DBOptions::%s -- ",
+                          mismatch.c_str());
+    Status s = base_config->GetOption(config_options, mismatch, &base_value);
+    if (s.ok()) {
+      s = file_config->GetOption(config_options, mismatch, &file_value);
+    }
+    assert(offset >= 0);
+    assert(static_cast<size_t>(offset) < sizeof(buffer));
+    if (s.ok()) {
+      snprintf(buffer + offset, sizeof(buffer) - static_cast<size_t>(offset),
+               "-- The specified one is %s while the persisted one is %s.\n",
+               base_value.c_str(), file_value.c_str());
+    } else {
+      snprintf(buffer + offset, sizeof(buffer) - static_cast<size_t>(offset),
+               "-- Unable to re-serialize an option: %s.\n",
+               s.ToString().c_str());
+    }
+    return Status::InvalidArgument(Slice(buffer, strlen(buffer)));
+  }
+  return Status::OK();
+}
+
+Status RocksDBOptionsParser::VerifyCFOptions(
+    const ConfigOptions& config_options, const ColumnFamilyOptions& base_opt,
+    const ColumnFamilyOptions& file_opt,
+    const std::unordered_map<std::string, std::string>* opt_map) {
+  auto base_config = CFOptionsAsConfigurable(base_opt, opt_map);
+  auto file_config = CFOptionsAsConfigurable(file_opt, opt_map);
+  std::string mismatch;
+  if (!base_config->AreEquivalent(config_options, file_config.get(),
+                                  &mismatch)) {
+    std::string base_value;
+    std::string file_value;
+    // The options do not match
+    const size_t kBufferSize = 2048;
+    char buffer[kBufferSize];
+    Status s = base_config->GetOption(config_options, mismatch, &base_value);
+    if (s.ok()) {
+      s = file_config->GetOption(config_options, mismatch, &file_value);
+      // In file_opt, certain options like MergeOperator may be nullptr due to
+      //   factor methods not available. So we use opt_map to get
+      //   option value to use in the error message below.
+      if (s.ok() && file_value == kNullptrString && opt_map) {
+        auto const& opt_val_str = (opt_map->find(mismatch));
+        if (opt_val_str != opt_map->end()) {
+          file_value = opt_val_str->second;
+        }
+      }
+    }
+    int offset = snprintf(buffer, sizeof(buffer),
+                          "[RocksDBOptionsParser]: "
+                          "failed the verification on ColumnFamilyOptions::%s",
+                          mismatch.c_str());
+    assert(offset >= 0);
+    assert(static_cast<size_t>(offset) < sizeof(buffer));
+    if (s.ok()) {
+      snprintf(buffer + offset, sizeof(buffer) - static_cast<size_t>(offset),
+               "--- The specified one is %s while the persisted one is %s.\n",
+               base_value.c_str(), file_value.c_str());
+    } else {
+      snprintf(buffer + offset, sizeof(buffer) - static_cast<size_t>(offset),
+               "--- Unable to re-serialize an option: %s.\n",
+               s.ToString().c_str());
+    }
+    return Status::InvalidArgument(Slice(buffer, sizeof(buffer)));
+  }  // For each option
+  return Status::OK();
+}
+
+Status RocksDBOptionsParser::VerifyTableFactory(
+    const ConfigOptions& config_options, const TableFactory* base_tf,
+    const TableFactory* file_tf) {
+  std::string mismatch;
+  if (base_tf && file_tf) {
+    if (config_options.sanity_level > ConfigOptions::kSanityLevelNone &&
+        std::string(base_tf->Name()) != std::string(file_tf->Name())) {
+      return Status::Corruption(
+          "[RocksDBOptionsParser]: "
+          "failed the verification on TableFactory->Name()");
+    } else if (!base_tf->AreEquivalent(config_options, file_tf, &mismatch)) {
+      return Status::Corruption(std::string("[RocksDBOptionsParser]:"
+                                            "failed the verification on ") +
+                                    base_tf->Name() + "::",
+                                mismatch);
+    }
+  } else {
+    // TODO(yhchiang): further support sanity check here
+  }
+  return Status::OK();
+}
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_parser.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_parser.h
new file mode 100644
index 0000000000..4268051f34
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/options/options_parser.h
@@ -0,0 +1,149 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct ConfigOptions;
+class OptionTypeInfo;
+class TableFactory;
+
+#define ROCKSDB_OPTION_FILE_MAJOR 1
+#define ROCKSDB_OPTION_FILE_MINOR 1
+
+enum OptionSection : char {
+  kOptionSectionVersion = 0,
+  kOptionSectionDBOptions,
+  kOptionSectionCFOptions,
+  kOptionSectionTableOptions,
+  kOptionSectionUnknown
+};
+
+static const std::string opt_section_titles[] = {
+    "Version", "DBOptions", "CFOptions", "TableOptions/", "Unknown"};
+
+Status PersistRocksDBOptions(const DBOptions& db_opt,
+                             const std::vector<std::string>& cf_names,
+                             const std::vector<ColumnFamilyOptions>& cf_opts,
+                             const std::string& file_name, FileSystem* fs);
+Status PersistRocksDBOptions(const ConfigOptions& config_options,
+                             const DBOptions& db_opt,
+                             const std::vector<std::string>& cf_names,
+                             const std::vector<ColumnFamilyOptions>& cf_opts,
+                             const std::string& file_name, FileSystem* fs);
+
+class RocksDBOptionsParser {
+ public:
+  explicit RocksDBOptionsParser();
+  ~RocksDBOptionsParser() {}
+  void Reset();
+
+  // `file_readahead_size` is used for readahead for the option file.
+  // If 0 is given, a default value will be used.
+  Status Parse(const std::string& file_name, FileSystem* fs,
+               bool ignore_unknown_options, size_t file_readahead_size);
+
+  Status Parse(const ConfigOptions& config_options,
+               const std::string& file_name, FileSystem* fs);
+
+  static std::string TrimAndRemoveComment(const std::string& line,
+                                          const bool trim_only = false);
+
+  const DBOptions* db_opt() const { return &db_opt_; }
+  const std::unordered_map<std::string, std::string>* db_opt_map() const {
+    return &db_opt_map_;
+  }
+  const std::vector<ColumnFamilyOptions>* cf_opts() const { return &cf_opts_; }
+  const std::vector<std::string>* cf_names() const { return &cf_names_; }
+  const std::vector<std::unordered_map<std::string, std::string>>* cf_opt_maps()
+      const {
+    return &cf_opt_maps_;
+  }
+
+  const ColumnFamilyOptions* GetCFOptions(const std::string& name) {
+    return GetCFOptionsImpl(name);
+  }
+  size_t NumColumnFamilies() { return cf_opts_.size(); }
+  static Status VerifyRocksDBOptionsFromFile(
+      const ConfigOptions& config_options, const DBOptions& db_opt,
+      const std::vector<std::string>& cf_names,
+      const std::vector<ColumnFamilyOptions>& cf_opts,
+      const std::string& file_name, FileSystem* fs);
+  static Status VerifyDBOptions(
+      const ConfigOptions& config_options, const DBOptions& base_opt,
+      const DBOptions& new_opt,
+      const std::unordered_map<std::string, std::string>* new_opt_map =
+          nullptr);
+
+  static Status VerifyCFOptions(
+      const ConfigOptions& config_options, const ColumnFamilyOptions& base_opt,
+      const ColumnFamilyOptions& new_opt,
+      const std::unordered_map<std::string, std::string>* new_opt_map =
+          nullptr);
+
+  static Status VerifyTableFactory(const ConfigOptions& config_options,
+                                   const TableFactory* base_tf,
+                                   const TableFactory* file_tf);
+
+  static Status ExtraParserCheck(const RocksDBOptionsParser& input_parser);
+
+  static Status ParseStatement(std::string* name, std::string* value,
+                               const std::string& line, const int line_num);
+
+ protected:
+  bool IsSection(const std::string& line);
+  Status ParseSection(OptionSection* section, std::string* title,
+                      std::string* argument, const std::string& line,
+                      const int line_num);
+
+  Status CheckSection(const OptionSection section,
+                      const std::string& section_arg, const int line_num);
+
+  Status EndSection(
+      const ConfigOptions& config_options, const OptionSection section,
+      const std::string& title, const std::string& section_arg,
+      const std::unordered_map<std::string, std::string>& opt_map);
+
+  Status ValidityCheck();
+
+  static Status InvalidArgument(const int line_num, const std::string& message);
+
+  Status ParseVersionNumber(const std::string& ver_name,
+                            const std::string& ver_string, const int max_count,
+                            int* version);
+
+  ColumnFamilyOptions* GetCFOptionsImpl(const std::string& name) {
+    assert(cf_names_.size() == cf_opts_.size());
+    for (size_t i = 0; i < cf_names_.size(); ++i) {
+      if (cf_names_[i] == name) {
+        return &cf_opts_[i];
+      }
+    }
+    return nullptr;
+  }
+
+ private:
+  DBOptions db_opt_;
+  std::unordered_map<std::string, std::string> db_opt_map_;
+  std::vector<std::string> cf_names_;
+  std::vector<ColumnFamilyOptions> cf_opts_;
+  std::vector<std::unordered_map<std::string, std::string>> cf_opt_maps_;
+  bool has_version_section_;
+  bool has_db_options_;
+  bool has_default_cf_options_;
+  int db_version[3];
+  int opt_file_version[3];
+};
+
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/jemalloc_helper.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/jemalloc_helper.h
new file mode 100644
index 0000000000..f085f6226d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/jemalloc_helper.h
@@ -0,0 +1,107 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#if defined(__clang__) && defined(__GLIBC__)
+// glibc's `posix_memalign()` declaration specifies `throw()` while clang's
+// declaration does not. There is a hack in clang to make its re-declaration
+// compatible with glibc's if they are declared consecutively. That hack breaks
+// if yet another `posix_memalign()` declaration comes between glibc's and
+// clang's declarations. Include "mm_malloc.h" here ensures glibc's and clang's
+// declarations both come before "jemalloc.h"'s `posix_memalign()` declaration.
+//
+// This problem could also be avoided if "jemalloc.h"'s `posix_memalign()`
+// declaration did not specify `throw()` when built with clang.
+#include <mm_malloc.h>
+#endif
+
+#ifdef ROCKSDB_JEMALLOC
+#ifdef __FreeBSD__
+#include <malloc_np.h>
+#define JEMALLOC_USABLE_SIZE_CONST const
+#else
+#define JEMALLOC_MANGLE
+#include <jemalloc/jemalloc.h>
+#endif
+
+#ifndef JEMALLOC_CXX_THROW
+#define JEMALLOC_CXX_THROW
+#endif
+
+#if defined(OS_WIN) && defined(_MSC_VER)
+
+// MSVC does not have weak symbol support. As long as ROCKSDB_JEMALLOC is
+// defined, Jemalloc memory allocator is used.
+static inline bool HasJemalloc() { return true; }
+
+#else
+
+// definitions for compatibility with older versions of jemalloc
+#if !defined(JEMALLOC_ALLOCATOR)
+#define JEMALLOC_ALLOCATOR
+#endif
+#if !defined(JEMALLOC_RESTRICT_RETURN)
+#define JEMALLOC_RESTRICT_RETURN
+#endif
+#if !defined(JEMALLOC_NOTHROW)
+#define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow)
+#endif
+#if !defined(JEMALLOC_ALLOC_SIZE)
+#ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE
+#define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s))
+#else
+#define JEMALLOC_ALLOC_SIZE(s)
+#endif
+#endif
+
+// Declare non-standard jemalloc APIs as weak symbols. We can null-check these
+// symbols to detect whether jemalloc is linked with the binary.
+extern "C" JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+mallocx(size_t, int) JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
+    __attribute__((__weak__));
+extern "C" JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+rallocx(void *, size_t, int) JEMALLOC_ALLOC_SIZE(2) __attribute__((__weak__));
+extern "C" size_t JEMALLOC_NOTHROW xallocx(void *, size_t, size_t, int)
+    __attribute__((__weak__));
+extern "C" size_t JEMALLOC_NOTHROW sallocx(const void *, int)
+    JEMALLOC_ATTR(pure) __attribute__((__weak__));
+extern "C" void JEMALLOC_NOTHROW dallocx(void *, int) __attribute__((__weak__));
+extern "C" void JEMALLOC_NOTHROW sdallocx(void *, size_t, int)
+    __attribute__((__weak__));
+extern "C" size_t JEMALLOC_NOTHROW nallocx(size_t, int) JEMALLOC_ATTR(pure)
+    __attribute__((__weak__));
+extern "C" int JEMALLOC_NOTHROW mallctl(const char *, void *, size_t *, void *,
+                                        size_t) __attribute__((__weak__));
+extern "C" int JEMALLOC_NOTHROW mallctlnametomib(const char *, size_t *,
+                                                 size_t *)
+    __attribute__((__weak__));
+extern "C" int JEMALLOC_NOTHROW mallctlbymib(const size_t *, size_t, void *,
+                                             size_t *, void *, size_t)
+    __attribute__((__weak__));
+extern "C" void JEMALLOC_NOTHROW
+malloc_stats_print(void (*)(void *, const char *), void *, const char *)
+    __attribute__((__weak__));
+extern "C" size_t JEMALLOC_NOTHROW
+malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *) JEMALLOC_CXX_THROW
+    __attribute__((__weak__));
+
+// Check if Jemalloc is linked with the binary. Note the main program might be
+// using a different memory allocator even this method return true.
+// It is loosely based on folly::usingJEMalloc(), minus the check that actually
+// allocate memory and see if it is through jemalloc, to handle the dlopen()
+// case:
+// https://github.com/facebook/folly/blob/76cf8b5841fb33137cfbf8b224f0226437c855bc/folly/memory/Malloc.h#L147
+static inline bool HasJemalloc() {
+  return mallocx != nullptr && rallocx != nullptr && xallocx != nullptr &&
+         sallocx != nullptr && dallocx != nullptr && sdallocx != nullptr &&
+         nallocx != nullptr && mallctl != nullptr &&
+         mallctlnametomib != nullptr && mallctlbymib != nullptr &&
+         malloc_stats_print != nullptr && malloc_usable_size != nullptr;
+}
+
+#endif
+
+#endif  // ROCKSDB_JEMALLOC
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/lang.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/lang.h
new file mode 100644
index 0000000000..a4201ca3b2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/lang.h
@@ -0,0 +1,97 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#ifndef FALLTHROUGH_INTENDED
+#if defined(__clang__)
+#define FALLTHROUGH_INTENDED [[clang::fallthrough]]
+#elif defined(__GNUC__) && __GNUC__ >= 7
+#define FALLTHROUGH_INTENDED [[gnu::fallthrough]]
+#else
+#define FALLTHROUGH_INTENDED \
+  do {                       \
+  } while (0)
+#endif
+#endif
+
+#define DECLARE_DEFAULT_MOVES(Name) \
+  Name(Name&&) noexcept = default;  \
+  Name& operator=(Name&&) = default
+
+// ASAN (Address sanitizer)
+
+#if defined(__clang__)
+#if defined(__has_feature)
+#if __has_feature(address_sanitizer)
+#define MUST_FREE_HEAP_ALLOCATIONS 1
+#endif  // __has_feature(address_sanitizer)
+#endif  // defined(__has_feature)
+#else   // __clang__
+#ifdef __SANITIZE_ADDRESS__
+#define MUST_FREE_HEAP_ALLOCATIONS 1
+#endif  // __SANITIZE_ADDRESS__
+#endif  // __clang__
+
+#ifdef ROCKSDB_VALGRIND_RUN
+#define MUST_FREE_HEAP_ALLOCATIONS 1
+#endif  // ROCKSDB_VALGRIND_RUN
+
+// Coding guidelines say to avoid static objects with non-trivial destructors,
+// because it's easy to cause trouble (UB) in static destruction. This
+// macro makes it easier to define static objects that are normally never
+// destructed, except are destructed when running under ASAN. This should
+// avoid unexpected, unnecessary destruction behavior in production.
+// Note that constructor arguments can be provided as in
+//   STATIC_AVOID_DESTRUCTION(Foo, foo)(arg1, arg2);
+#ifdef MUST_FREE_HEAP_ALLOCATIONS
+#define STATIC_AVOID_DESTRUCTION(Type, name) static Type name
+constexpr bool kMustFreeHeapAllocations = true;
+#else
+#define STATIC_AVOID_DESTRUCTION(Type, name) static Type& name = *new Type
+constexpr bool kMustFreeHeapAllocations = false;
+#endif
+
+// TSAN (Thread sanitizer)
+
+// For simplicity, standardize on the GCC define
+#if defined(__clang__)
+#if defined(__has_feature) && __has_feature(thread_sanitizer)
+#define __SANITIZE_THREAD__ 1
+#endif  // __has_feature(thread_sanitizer)
+#endif  // __clang__
+
+#ifdef __SANITIZE_THREAD__
+#define TSAN_SUPPRESSION __attribute__((no_sanitize("thread")))
+#else
+#define TSAN_SUPPRESSION
+#endif  // TSAN_SUPPRESSION
+
+// Compile-time CPU feature testing compatibility
+//
+// A way to be extra sure these defines have been included.
+#define ASSERT_FEATURE_COMPAT_HEADER() /* empty */
+
+// MSVC doesn't support the same defines that gcc and clang provide
+// but does some like __AVX__. Here we can infer some features from others.
+#ifdef __AVX__
+#define __SSE4_2__ 1
+#define __PCLMUL__ 1
+#endif  // __AVX__
+
+// A way to disable PCLMUL
+#ifdef NO_PCLMUL
+#undef __PCLMUL__
+#endif
+
+// popcnt is generally implied by SSE4.2
+#if defined(__SSE4_2__)
+#define __POPCNT__ 1
+#endif
+
+// A way to disable POPCNT
+#ifdef NO_POPCNT
+#undef __POPCNT__
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/likely.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/likely.h
new file mode 100644
index 0000000000..0bd90d7015
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/likely.h
@@ -0,0 +1,18 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#if defined(__GNUC__) && __GNUC__ >= 4
+#define LIKELY(x) (__builtin_expect((x), 1))
+#define UNLIKELY(x) (__builtin_expect((x), 0))
+#else
+#define LIKELY(x) (x)
+#define UNLIKELY(x) (x)
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/malloc.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/malloc.h
new file mode 100644
index 0000000000..f973263e2a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/malloc.h
@@ -0,0 +1,17 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+#ifdef OS_FREEBSD
+#include <malloc_np.h>
+#else
+#include <malloc.h>
+#endif  // OS_FREEBSD
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/mmap.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/mmap.cc
new file mode 100644
index 0000000000..36e8f32617
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/mmap.cc
@@ -0,0 +1,98 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "port/mmap.h"
+
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+#include <new>
+#include <utility>
+
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+MemMapping::~MemMapping() {
+#ifdef OS_WIN
+  if (addr_ != nullptr) {
+    (void)::UnmapViewOfFile(addr_);
+  }
+  if (page_file_handle_ != NULL) {
+    (void)::CloseHandle(page_file_handle_);
+  }
+#else   // OS_WIN -> !OS_WIN
+  if (addr_ != nullptr) {
+    auto status = munmap(addr_, length_);
+    assert(status == 0);
+    if (status != 0) {
+      // TODO: handle error?
+    }
+  }
+#endif  // OS_WIN
+}
+
+MemMapping::MemMapping(MemMapping&& other) noexcept {
+  *this = std::move(other);
+}
+
+MemMapping& MemMapping::operator=(MemMapping&& other) noexcept {
+  if (&other == this) {
+    return *this;
+  }
+  this->~MemMapping();
+  std::memcpy(this, &other, sizeof(*this));
+  new (&other) MemMapping();
+  return *this;
+}
+
+MemMapping MemMapping::AllocateAnonymous(size_t length, bool huge) {
+  MemMapping mm;
+  mm.length_ = length;
+  assert(mm.addr_ == nullptr);
+  if (length == 0) {
+    // OK to leave addr as nullptr
+    return mm;
+  }
+  int huge_flag = 0;
+#ifdef OS_WIN
+  if (huge) {
+#ifdef FILE_MAP_LARGE_PAGES
+    huge_flag = FILE_MAP_LARGE_PAGES;
+#endif  // FILE_MAP_LARGE_PAGES
+  }
+  mm.page_file_handle_ = ::CreateFileMapping(
+      INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE | SEC_COMMIT,
+      Upper32of64(length), Lower32of64(length), nullptr);
+  if (mm.page_file_handle_ == NULL) {
+    // Failure
+    return mm;
+  }
+  mm.addr_ = ::MapViewOfFile(mm.page_file_handle_, FILE_MAP_WRITE | huge_flag,
+                             0, 0, length);
+#else  // OS_WIN -> !OS_WIN
+  if (huge) {
+#ifdef MAP_HUGETLB
+    huge_flag = MAP_HUGETLB;
+#endif  // MAP_HUGE_TLB
+  }
+  mm.addr_ = mmap(nullptr, length, PROT_READ | PROT_WRITE,
+                  MAP_PRIVATE | MAP_ANONYMOUS | huge_flag, -1, 0);
+  if (mm.addr_ == MAP_FAILED) {
+    mm.addr_ = nullptr;
+  }
+#endif  // OS_WIN
+  return mm;
+}
+
+MemMapping MemMapping::AllocateHuge(size_t length) {
+  return AllocateAnonymous(length, /*huge*/ true);
+}
+
+MemMapping MemMapping::AllocateLazyZeroed(size_t length) {
+  return AllocateAnonymous(length, /*huge*/ false);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/mmap.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/mmap.h
new file mode 100644
index 0000000000..7342a13f96
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/mmap.h
@@ -0,0 +1,70 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#ifdef OS_WIN
+#include "port/win/port_win.h"
+// ^^^ For proper/safe inclusion of windows.h. Must come first.
+#include <memoryapi.h>
+#else
+#include <sys/mman.h>
+#endif  // OS_WIN
+
+#include <cstdint>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// An RAII wrapper for mmaped memory
+class MemMapping {
+ public:
+  static constexpr bool kHugePageSupported =
+#if defined(MAP_HUGETLB) || defined(FILE_MAP_LARGE_PAGES)
+      true;
+#else
+      false;
+#endif
+
+  // Allocate memory requesting to be backed by huge pages
+  static MemMapping AllocateHuge(size_t length);
+
+  // Allocate memory that is only lazily mapped to resident memory and
+  // guaranteed to be zero-initialized. Note that some platforms like
+  // Linux allow memory over-commit, where only the used portion of memory
+  // matters, while other platforms require enough swap space (page file) to
+  // back the full mapping.
+  static MemMapping AllocateLazyZeroed(size_t length);
+
+  // No copies
+  MemMapping(const MemMapping&) = delete;
+  MemMapping& operator=(const MemMapping&) = delete;
+  // Move
+  MemMapping(MemMapping&&) noexcept;
+  MemMapping& operator=(MemMapping&&) noexcept;
+
+  // Releases the mapping
+  ~MemMapping();
+
+  inline void* Get() const { return addr_; }
+  inline size_t Length() const { return length_; }
+
+ private:
+  MemMapping() {}
+
+  // The mapped memory, or nullptr on failure / not supported
+  void* addr_ = nullptr;
+  // The known usable number of bytes starting at that address
+  size_t length_ = 0;
+
+#ifdef OS_WIN
+  HANDLE page_file_handle_ = NULL;
+#endif  // OS_WIN
+
+  static MemMapping AllocateAnonymous(size_t length, bool huge);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port.h
new file mode 100644
index 0000000000..13aa56d47b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port.h
@@ -0,0 +1,21 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <string>
+
+// Include the appropriate platform specific file below.  If you are
+// porting to a new platform, see "port_example.h" for documentation
+// of what the new port_<platform>.h file must provide.
+#if defined(ROCKSDB_PLATFORM_POSIX)
+#include "port/port_posix.h"
+#elif defined(OS_WIN)
+#include "port/win/port_win.h"
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_dirent.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_dirent.h
new file mode 100644
index 0000000000..2b23e2f076
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_dirent.h
@@ -0,0 +1,44 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// See port_example.h for documentation for the following types/functions.
+
+#pragma once
+
+#ifdef ROCKSDB_PLATFORM_POSIX
+#include <dirent.h>
+#include <sys/types.h>
+#elif defined(OS_WIN)
+
+namespace ROCKSDB_NAMESPACE {
+namespace port {
+
+struct dirent {
+  char d_name[_MAX_PATH]; /* filename */
+};
+
+struct DIR;
+
+DIR* opendir(const char* name);
+
+dirent* readdir(DIR* dirp);
+
+int closedir(DIR* dirp);
+
+}  // namespace port
+
+using port::closedir;
+using port::DIR;
+using port::dirent;
+using port::opendir;
+using port::readdir;
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // OS_WIN
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_example.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_example.h
new file mode 100644
index 0000000000..794149a690
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_example.h
@@ -0,0 +1,101 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// This file contains the specification, but not the implementations,
+// of the types/operations/etc. that should be defined by a platform
+// specific port_<platform>.h file.  Use this file as a reference for
+// how to port this package to a new platform.
+
+#pragma once
+
+namespace ROCKSDB_NAMESPACE {
+namespace port {
+
+// TODO(jorlow): Many of these belong more in the environment class rather than
+//               here. We should try moving them and see if it affects perf.
+
+// The following boolean constant must be true on a little-endian machine
+// and false otherwise.
+static const bool kLittleEndian = true /* or some other expression */;
+
+// ------------------ Threading -------------------
+
+// A Mutex represents an exclusive lock.
+class Mutex {
+ public:
+  Mutex();
+  ~Mutex();
+
+  // Lock the mutex.  Waits until other lockers have exited.
+  // Will deadlock if the mutex is already locked by this thread.
+  void Lock();
+
+  // Unlock the mutex.
+  // REQUIRES: This mutex was locked by this thread.
+  void Unlock();
+
+  // Optionally crash if this thread does not hold this mutex.
+  // The implementation must be fast, especially if NDEBUG is
+  // defined.  The implementation is allowed to skip all checks.
+  void AssertHeld();
+};
+
+class CondVar {
+ public:
+  explicit CondVar(Mutex* mu);
+  ~CondVar();
+
+  // Atomically release *mu and block on this condition variable until
+  // either a call to SignalAll(), or a call to Signal() that picks
+  // this thread to wakeup.
+  // REQUIRES: this thread holds *mu
+  void Wait();
+
+  // If there are some threads waiting, wake up at least one of them.
+  void Signal();
+
+  // Wake up all waiting threads.
+  void SignallAll();
+};
+
+// Thread-safe initialization.
+// Used as follows:
+//      static port::OnceType init_control = LEVELDB_ONCE_INIT;
+//      static void Initializer() { ... do something ...; }
+//      ...
+//      port::InitOnce(&init_control, &Initializer);
+using OnceType = intptr_t;
+#define LEVELDB_ONCE_INIT 0
+extern void InitOnce(port::OnceType*, void (*initializer)());
+
+// ------------------ Compression -------------------
+
+// Store the snappy compression of "input[0,input_length-1]" in *output.
+// Returns false if snappy is not supported by this port.
+extern bool Snappy_Compress(const char* input, size_t input_length,
+                            std::string* output);
+
+// If input[0,input_length-1] looks like a valid snappy compressed
+// buffer, store the size of the uncompressed data in *result and
+// return true.  Else return false.
+extern bool Snappy_GetUncompressedLength(const char* input, size_t length,
+                                         size_t* result);
+
+// Attempt to snappy uncompress input[0,input_length-1] into *output.
+// Returns true if successful, false if the input is invalid lightweight
+// compressed data.
+//
+// REQUIRES: at least the first "n" bytes of output[] must be writable
+// where "n" is the result of a successful call to
+// Snappy_GetUncompressedLength.
+extern bool Snappy_Uncompress(const char* input_data, size_t input_length,
+                              char* output);
+
+}  // namespace port
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_posix.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_posix.cc
new file mode 100644
index 0000000000..3872293b81
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_posix.cc
@@ -0,0 +1,300 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#if !defined(OS_WIN)
+
+#include "port/port_posix.h"
+
+#include <assert.h>
+#if defined(__i386__) || defined(__x86_64__)
+#include <cpuid.h>
+#endif
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <string>
+
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// We want to give users opportunity to default all the mutexes to adaptive if
+// not specified otherwise. This enables a quick way to conduct various
+// performance related experiements.
+//
+// NB! Support for adaptive mutexes is turned on by definining
+// ROCKSDB_PTHREAD_ADAPTIVE_MUTEX during the compilation. If you use RocksDB
+// build environment then this happens automatically; otherwise it's up to the
+// consumer to define the identifier.
+#ifdef ROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX
+extern const bool kDefaultToAdaptiveMutex = true;
+#else
+extern const bool kDefaultToAdaptiveMutex = false;
+#endif
+
+namespace port {
+
+static int PthreadCall(const char* label, int result) {
+  if (result != 0 && result != ETIMEDOUT && result != EBUSY) {
+    fprintf(stderr, "pthread %s: %s\n", label, errnoStr(result).c_str());
+    abort();
+  }
+  return result;
+}
+
+Mutex::Mutex(bool adaptive) {
+  (void)adaptive;
+#ifdef ROCKSDB_PTHREAD_ADAPTIVE_MUTEX
+  if (!adaptive) {
+    PthreadCall("init mutex", pthread_mutex_init(&mu_, nullptr));
+  } else {
+    pthread_mutexattr_t mutex_attr;
+    PthreadCall("init mutex attr", pthread_mutexattr_init(&mutex_attr));
+    PthreadCall("set mutex attr", pthread_mutexattr_settype(
+                                      &mutex_attr, PTHREAD_MUTEX_ADAPTIVE_NP));
+    PthreadCall("init mutex", pthread_mutex_init(&mu_, &mutex_attr));
+    PthreadCall("destroy mutex attr", pthread_mutexattr_destroy(&mutex_attr));
+  }
+#else
+  PthreadCall("init mutex", pthread_mutex_init(&mu_, nullptr));
+#endif  // ROCKSDB_PTHREAD_ADAPTIVE_MUTEX
+}
+
+Mutex::~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); }
+
+void Mutex::Lock() {
+  PthreadCall("lock", pthread_mutex_lock(&mu_));
+#ifndef NDEBUG
+  locked_ = true;
+#endif
+}
+
+void Mutex::Unlock() {
+#ifndef NDEBUG
+  locked_ = false;
+#endif
+  PthreadCall("unlock", pthread_mutex_unlock(&mu_));
+}
+
+bool Mutex::TryLock() {
+  bool ret = PthreadCall("trylock", pthread_mutex_trylock(&mu_)) == 0;
+#ifndef NDEBUG
+  if (ret) {
+    locked_ = true;
+  }
+#endif
+  return ret;
+}
+
+void Mutex::AssertHeld() {
+#ifndef NDEBUG
+  assert(locked_);
+#endif
+}
+
+CondVar::CondVar(Mutex* mu) : mu_(mu) {
+  PthreadCall("init cv", pthread_cond_init(&cv_, nullptr));
+}
+
+CondVar::~CondVar() { PthreadCall("destroy cv", pthread_cond_destroy(&cv_)); }
+
+void CondVar::Wait() {
+#ifndef NDEBUG
+  mu_->locked_ = false;
+#endif
+  PthreadCall("wait", pthread_cond_wait(&cv_, &mu_->mu_));
+#ifndef NDEBUG
+  mu_->locked_ = true;
+#endif
+}
+
+bool CondVar::TimedWait(uint64_t abs_time_us) {
+  struct timespec ts;
+  ts.tv_sec = static_cast<time_t>(abs_time_us / 1000000);
+  ts.tv_nsec = static_cast<suseconds_t>((abs_time_us % 1000000) * 1000);
+
+#ifndef NDEBUG
+  mu_->locked_ = false;
+#endif
+  int err = pthread_cond_timedwait(&cv_, &mu_->mu_, &ts);
+#ifndef NDEBUG
+  mu_->locked_ = true;
+#endif
+  if (err == ETIMEDOUT) {
+    return true;
+  }
+  if (err != 0) {
+    PthreadCall("timedwait", err);
+  }
+  return false;
+}
+
+void CondVar::Signal() { PthreadCall("signal", pthread_cond_signal(&cv_)); }
+
+void CondVar::SignalAll() {
+  PthreadCall("broadcast", pthread_cond_broadcast(&cv_));
+}
+
+RWMutex::RWMutex() {
+  PthreadCall("init mutex", pthread_rwlock_init(&mu_, nullptr));
+}
+
+RWMutex::~RWMutex() {
+  PthreadCall("destroy mutex", pthread_rwlock_destroy(&mu_));
+}
+
+void RWMutex::ReadLock() {
+  PthreadCall("read lock", pthread_rwlock_rdlock(&mu_));
+}
+
+void RWMutex::WriteLock() {
+  PthreadCall("write lock", pthread_rwlock_wrlock(&mu_));
+}
+
+void RWMutex::ReadUnlock() {
+  PthreadCall("read unlock", pthread_rwlock_unlock(&mu_));
+}
+
+void RWMutex::WriteUnlock() {
+  PthreadCall("write unlock", pthread_rwlock_unlock(&mu_));
+}
+
+int PhysicalCoreID() {
+#if defined(ROCKSDB_SCHED_GETCPU_PRESENT) && defined(__x86_64__) && \
+    (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 22))
+  // sched_getcpu uses VDSO getcpu() syscall since 2.22. I believe Linux offers
+  // VDSO support only on x86_64. This is the fastest/preferred method if
+  // available.
+  int cpuno = sched_getcpu();
+  if (cpuno < 0) {
+    return -1;
+  }
+  return cpuno;
+#elif defined(__x86_64__) || defined(__i386__)
+  // clang/gcc both provide cpuid.h, which defines __get_cpuid(), for x86_64 and
+  // i386.
+  unsigned eax, ebx = 0, ecx, edx;
+  if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
+    return -1;
+  }
+  return ebx >> 24;
+#else
+  // give up, the caller can generate a random number or something.
+  return -1;
+#endif
+}
+
+void InitOnce(OnceType* once, void (*initializer)()) {
+  PthreadCall("once", pthread_once(once, initializer));
+}
+
+void Crash(const std::string& srcfile, int srcline) {
+  fprintf(stdout, "Crashing at %s:%d\n", srcfile.c_str(), srcline);
+  fflush(stdout);
+  kill(getpid(), SIGTERM);
+}
+
+int GetMaxOpenFiles() {
+#if defined(RLIMIT_NOFILE)
+  struct rlimit no_files_limit;
+  if (getrlimit(RLIMIT_NOFILE, &no_files_limit) != 0) {
+    return -1;
+  }
+  // protect against overflow
+  if (static_cast<uintmax_t>(no_files_limit.rlim_cur) >=
+      static_cast<uintmax_t>(std::numeric_limits<int>::max())) {
+    return std::numeric_limits<int>::max();
+  }
+  return static_cast<int>(no_files_limit.rlim_cur);
+#endif
+  return -1;
+}
+
+void* cacheline_aligned_alloc(size_t size) {
+#if __GNUC__ < 5 && defined(__SANITIZE_ADDRESS__)
+  return malloc(size);
+#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || defined(__APPLE__))
+  void* m;
+  errno = posix_memalign(&m, CACHE_LINE_SIZE, size);
+  return errno ? nullptr : m;
+#else
+  return malloc(size);
+#endif
+}
+
+void cacheline_aligned_free(void* memblock) { free(memblock); }
+
+static size_t GetPageSize() {
+#if defined(OS_LINUX) || defined(_SC_PAGESIZE)
+  long v = sysconf(_SC_PAGESIZE);
+  if (v >= 1024) {
+    return static_cast<size_t>(v);
+  }
+#endif
+  // Default assume 4KB
+  return 4U * 1024U;
+}
+
+const size_t kPageSize = GetPageSize();
+
+void SetCpuPriority(ThreadId id, CpuPriority priority) {
+#ifdef OS_LINUX
+  sched_param param;
+  param.sched_priority = 0;
+  switch (priority) {
+    case CpuPriority::kHigh:
+      sched_setscheduler(id, SCHED_OTHER, &param);
+      setpriority(PRIO_PROCESS, id, -20);
+      break;
+    case CpuPriority::kNormal:
+      sched_setscheduler(id, SCHED_OTHER, &param);
+      setpriority(PRIO_PROCESS, id, 0);
+      break;
+    case CpuPriority::kLow:
+      sched_setscheduler(id, SCHED_OTHER, &param);
+      setpriority(PRIO_PROCESS, id, 19);
+      break;
+    case CpuPriority::kIdle:
+      sched_setscheduler(id, SCHED_IDLE, &param);
+      break;
+    default:
+      assert(false);
+  }
+#else
+  (void)id;
+  (void)priority;
+#endif
+}
+
+int64_t GetProcessID() { return getpid(); }
+
+bool GenerateRfcUuid(std::string* output) {
+  output->clear();
+  std::ifstream f("/proc/sys/kernel/random/uuid");
+  std::getline(f, /*&*/ *output);
+  if (output->size() == 36) {
+    return true;
+  } else {
+    output->clear();
+    return false;
+  }
+}
+
+}  // namespace port
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_posix.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_posix.h
new file mode 100644
index 0000000000..cdb256a6d6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/port_posix.h
@@ -0,0 +1,243 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// See port_example.h for documentation for the following types/functions.
+
+#pragma once
+
+#include <thread>
+
+#include "rocksdb/port_defs.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+// size_t printf formatting named in the manner of C99 standard formatting
+// strings such as PRIu64
+// in fact, we could use that one
+#define ROCKSDB_PRIszt "zu"
+
+#define __declspec(S)
+
+#undef PLATFORM_IS_LITTLE_ENDIAN
+#if defined(OS_MACOSX)
+#include <machine/endian.h>
+#if defined(__DARWIN_LITTLE_ENDIAN) && defined(__DARWIN_BYTE_ORDER)
+#define PLATFORM_IS_LITTLE_ENDIAN \
+  (__DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN)
+#endif
+#elif defined(OS_SOLARIS)
+#include <sys/isa_defs.h>
+#ifdef _LITTLE_ENDIAN
+#define PLATFORM_IS_LITTLE_ENDIAN true
+#else
+#define PLATFORM_IS_LITTLE_ENDIAN false
+#endif
+#include <alloca.h>
+#elif defined(OS_AIX)
+#include <arpa/nameser_compat.h>
+#include <sys/types.h>
+#define PLATFORM_IS_LITTLE_ENDIAN (BYTE_ORDER == LITTLE_ENDIAN)
+#include <alloca.h>
+#elif defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || \
+    defined(OS_DRAGONFLYBSD) || defined(OS_ANDROID)
+#include <sys/endian.h>
+#include <sys/types.h>
+#define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN)
+#else
+#include <endian.h>
+#endif
+#include <pthread.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <limits>
+#include <string>
+
+#ifndef PLATFORM_IS_LITTLE_ENDIAN
+#define PLATFORM_IS_LITTLE_ENDIAN (__BYTE_ORDER == __LITTLE_ENDIAN)
+#endif
+
+#if defined(OS_MACOSX) || defined(OS_SOLARIS) || defined(OS_FREEBSD) ||      \
+    defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) || \
+    defined(OS_ANDROID) || defined(CYGWIN) || defined(OS_AIX)
+// Use fread/fwrite/fflush on platforms without _unlocked variants
+#define fread_unlocked fread
+#define fwrite_unlocked fwrite
+#define fflush_unlocked fflush
+#endif
+
+#if defined(OS_MACOSX) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || \
+    defined(OS_DRAGONFLYBSD)
+// Use fsync() on platforms without fdatasync()
+#define fdatasync fsync
+#endif
+
+#if defined(OS_ANDROID) && __ANDROID_API__ < 9
+// fdatasync() was only introduced in API level 9 on Android. Use fsync()
+// when targeting older platforms.
+#define fdatasync fsync
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+extern const bool kDefaultToAdaptiveMutex;
+
+namespace port {
+constexpr bool kLittleEndian = PLATFORM_IS_LITTLE_ENDIAN;
+#undef PLATFORM_IS_LITTLE_ENDIAN
+
+class CondVar;
+
+class Mutex {
+ public:
+  static const char* kName() { return "pthread_mutex_t"; }
+
+  explicit Mutex(bool adaptive = kDefaultToAdaptiveMutex);
+  // No copying
+  Mutex(const Mutex&) = delete;
+  void operator=(const Mutex&) = delete;
+
+  ~Mutex();
+
+  void Lock();
+  void Unlock();
+
+  bool TryLock();
+
+  // this will assert if the mutex is not locked
+  // it does NOT verify that mutex is held by a calling thread
+  void AssertHeld();
+
+  // Also implement std Lockable
+  inline void lock() { Lock(); }
+  inline void unlock() { Unlock(); }
+  inline bool try_lock() { return TryLock(); }
+
+ private:
+  friend class CondVar;
+  pthread_mutex_t mu_;
+#ifndef NDEBUG
+  bool locked_ = false;
+#endif
+};
+
+class RWMutex {
+ public:
+  RWMutex();
+  // No copying allowed
+  RWMutex(const RWMutex&) = delete;
+  void operator=(const RWMutex&) = delete;
+
+  ~RWMutex();
+
+  void ReadLock();
+  void WriteLock();
+  void ReadUnlock();
+  void WriteUnlock();
+  void AssertHeld() {}
+
+ private:
+  pthread_rwlock_t mu_;  // the underlying platform mutex
+};
+
+class CondVar {
+ public:
+  explicit CondVar(Mutex* mu);
+  ~CondVar();
+  void Wait();
+  // Timed condition wait.  Returns true if timeout occurred.
+  bool TimedWait(uint64_t abs_time_us);
+  void Signal();
+  void SignalAll();
+
+ private:
+  pthread_cond_t cv_;
+  Mutex* mu_;
+};
+
+using Thread = std::thread;
+
+static inline void AsmVolatilePause() {
+#if defined(__i386__) || defined(__x86_64__)
+  asm volatile("pause");
+#elif defined(__aarch64__)
+  asm volatile("isb");
+#elif defined(__powerpc64__)
+  asm volatile("or 27,27,27");
+#elif defined(__loongarch64)
+  asm volatile("dbar 0");
+#endif
+  // it's okay for other platforms to be no-ops
+}
+
+// Returns -1 if not available on this platform
+extern int PhysicalCoreID();
+
+using OnceType = pthread_once_t;
+#define LEVELDB_ONCE_INIT PTHREAD_ONCE_INIT
+extern void InitOnce(OnceType* once, void (*initializer)());
+
+#ifndef CACHE_LINE_SIZE
+// To test behavior with non-native cache line size, e.g. for
+// Bloom filters, set TEST_CACHE_LINE_SIZE to the desired test size.
+// This disables ALIGN_AS to keep it from failing compilation.
+#ifdef TEST_CACHE_LINE_SIZE
+#define CACHE_LINE_SIZE TEST_CACHE_LINE_SIZE
+#define ALIGN_AS(n) /*empty*/
+#else
+#if defined(__s390__)
+#if defined(__GNUC__) && __GNUC__ < 7
+#define CACHE_LINE_SIZE 64U
+#else
+#define CACHE_LINE_SIZE 256U
+#endif
+#elif defined(__powerpc__) || defined(__aarch64__)
+#define CACHE_LINE_SIZE 128U
+#else
+#define CACHE_LINE_SIZE 64U
+#endif
+#define ALIGN_AS(n) alignas(n)
+#endif
+#endif
+
+static_assert((CACHE_LINE_SIZE & (CACHE_LINE_SIZE - 1)) == 0,
+              "Cache line size must be a power of 2 number of bytes");
+
+extern void* cacheline_aligned_alloc(size_t size);
+
+extern void cacheline_aligned_free(void* memblock);
+
+#if defined(__aarch64__)
+//  __builtin_prefetch(..., 1) turns into a prefetch into prfm pldl3keep. On
+// arm64 we want this as close to the core as possible to turn it into a
+// L1 prefetech unless locality == 0 in which case it will be turned into a
+// non-temporal prefetch
+#define PREFETCH(addr, rw, locality) \
+  __builtin_prefetch(addr, rw, locality >= 1 ? 3 : locality)
+#else
+#define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality)
+#endif
+
+extern void Crash(const std::string& srcfile, int srcline);
+
+extern int GetMaxOpenFiles();
+
+extern const size_t kPageSize;
+
+using ThreadId = pid_t;
+
+extern void SetCpuPriority(ThreadId id, CpuPriority priority);
+
+int64_t GetProcessID();
+
+// Uses platform APIs to generate a 36-character RFC-4122 UUID. Returns
+// true on success or false on failure.
+bool GenerateRfcUuid(std::string* output);
+
+}  // namespace port
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/stack_trace.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/stack_trace.cc
new file mode 100644
index 0000000000..9e9d8b8b54
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/stack_trace.cc
@@ -0,0 +1,336 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include "port/stack_trace.h"
+
+#if !(defined(ROCKSDB_BACKTRACE) || defined(OS_MACOSX)) || defined(CYGWIN) || \
+    defined(OS_SOLARIS) || defined(OS_WIN)
+
+// noop
+
+namespace ROCKSDB_NAMESPACE {
+namespace port {
+void InstallStackTraceHandler() {}
+void PrintStack(int /*first_frames_to_skip*/) {}
+void PrintAndFreeStack(void* /*callstack*/, int /*num_frames*/) {}
+void* SaveStack(int* /*num_frames*/, int /*first_frames_to_skip*/) {
+  return nullptr;
+}
+}  // namespace port
+}  // namespace ROCKSDB_NAMESPACE
+
+#else
+
+#include <cxxabi.h>
+#include <execinfo.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#ifdef OS_FREEBSD
+#include <sys/sysctl.h>
+#endif  // OS_FREEBSD
+#ifdef OS_LINUX
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#if __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 30)
+#include <sys/syscall.h>
+#define gettid() syscall(SYS_gettid)
+#endif  // GLIBC version
+#endif  // OS_LINUX
+
+#include "port/lang.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace port {
+
+namespace {
+
+#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD)
+const char* GetExecutableName() {
+  static char name[1024];
+
+#if !defined(OS_FREEBSD)
+  char link[1024];
+  snprintf(link, sizeof(link), "/proc/%d/exe", getpid());
+  auto read = readlink(link, name, sizeof(name) - 1);
+  if (-1 == read) {
+    return nullptr;
+  } else {
+    name[read] = 0;
+    return name;
+  }
+#else
+  int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
+  size_t namesz = sizeof(name);
+
+  auto ret = sysctl(mib, 4, name, &namesz, nullptr, 0);
+  if (-1 == ret) {
+    return nullptr;
+  } else {
+    return name;
+  }
+#endif
+}
+
+void PrintStackTraceLine(const char* symbol, void* frame) {
+  static const char* executable = GetExecutableName();
+  if (symbol) {
+    fprintf(stderr, "%s ", symbol);
+  }
+  if (executable) {
+    // out source to addr2line, for the address translation
+    const int kLineMax = 256;
+    char cmd[kLineMax];
+    snprintf(cmd, kLineMax, "addr2line %p -e %s -f -C 2>&1", frame, executable);
+    auto f = popen(cmd, "r");
+    if (f) {
+      char line[kLineMax];
+      while (fgets(line, sizeof(line), f)) {
+        line[strlen(line) - 1] = 0;  // remove newline
+        fprintf(stderr, "%s\t", line);
+      }
+      pclose(f);
+    }
+  } else {
+    fprintf(stderr, " %p", frame);
+  }
+
+  fprintf(stderr, "\n");
+}
+#elif defined(OS_MACOSX)
+
+void PrintStackTraceLine(const char* symbol, void* frame) {
+  static int pid = getpid();
+  // out source to atos, for the address translation
+  const int kLineMax = 256;
+  char cmd[kLineMax];
+  snprintf(cmd, kLineMax, "xcrun atos %p -p %d  2>&1", frame, pid);
+  auto f = popen(cmd, "r");
+  if (f) {
+    char line[kLineMax];
+    while (fgets(line, sizeof(line), f)) {
+      line[strlen(line) - 1] = 0;  // remove newline
+      fprintf(stderr, "%s\t", line);
+    }
+    pclose(f);
+  } else if (symbol) {
+    fprintf(stderr, "%s ", symbol);
+  }
+
+  fprintf(stderr, "\n");
+}
+
+#endif
+
+const char* GetLldbScriptSelectThread(long long tid) {
+  // NOTE: called from a signal handler, so no heap allocation
+  static char script[80];
+  snprintf(script, sizeof(script),
+           "script -l python -- lldb.process.SetSelectedThreadByID(%lld)", tid);
+  return script;
+}
+
+}  // namespace
+
+void PrintStack(void* frames[], int num_frames) {
+  auto symbols = backtrace_symbols(frames, num_frames);
+
+  for (int i = 0; i < num_frames; ++i) {
+    fprintf(stderr, "#%-2d  ", i);
+    PrintStackTraceLine((symbols != nullptr) ? symbols[i] : nullptr, frames[i]);
+  }
+  free(symbols);
+}
+
+void PrintStack(int first_frames_to_skip) {
+  // Default to getting stack traces with GDB, at least on Linux where we
+  // know how to attach to a particular thread.
+  //
+  // * Address space layout randomization (ASLR) interferes with getting good
+  //   stack information from backtrace+addr2line. This is more likely to show
+  //   up with LIB_MODE=shared builds (when kernel.randomize_va_space >= 1)
+  //   but can also show up with LIB_MODE=static builds ((when
+  //   kernel.randomize_va_space == 2).
+  // * It doesn't appear easy to detect when ASLR is in use.
+  // * With DEBUG_LEVEL < 2, backtrace() can skip frames that are not skipped
+  //   in GDB.
+  //
+  // LLDB also available as an option
+  bool lldb_stack_trace = getenv("ROCKSDB_LLDB_STACK") != nullptr;
+#if defined(OS_LINUX)
+  // Default true, override with ROCKSDB_BACKTRACE_STACK=1
+  bool gdb_stack_trace =
+      !lldb_stack_trace && getenv("ROCKSDB_BACKTRACE_STACK") == nullptr;
+#else
+  // Default false, override with ROCKSDB_GDB_STACK=1
+  bool gdb_stack_trace = getenv("ROCKSDB_GDB_STACK") != nullptr;
+#endif
+  // Also support invoking interactive debugger on stack trace, with this
+  // envvar set to non-empty
+  char* debug_env = getenv("ROCKSDB_DEBUG");
+  bool debug = debug_env != nullptr && strlen(debug_env) > 0;
+
+  if (lldb_stack_trace || gdb_stack_trace || debug) {
+    // Allow ouside debugger to attach, even with Yama security restrictions
+#ifdef PR_SET_PTRACER_ANY
+    (void)prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
+#endif
+    // Try to invoke GDB, either for stack trace or debugging.
+    long long attach_pid = getpid();
+    // NOTE: we're in a signal handler, so no heap allocation
+    char attach_pid_str[20];
+    snprintf(attach_pid_str, sizeof(attach_pid_str), "%lld", attach_pid);
+
+    // `gdb -p PID` seems to always attach to main thread, but `gdb -p TID`
+    // seems to be able to attach to a particular thread in a process, which
+    // makes sense as the main thread TID == PID of the process.
+    // But I haven't found that gdb capability documented anywhere, so leave
+    // a back door to attach to main thread.
+    long long gdb_attach_id = attach_pid;
+    // Save current thread id before fork
+    long long attach_tid = 0;
+#ifdef OS_LINUX
+    attach_tid = gettid();
+    if (getenv("ROCKSDB_DEBUG_USE_PID") == nullptr) {
+      gdb_attach_id = attach_tid;
+    }
+#endif
+
+    char gdb_attach_id_str[20];
+    snprintf(gdb_attach_id_str, sizeof(gdb_attach_id_str), "%lld",
+             gdb_attach_id);
+
+    pid_t child_pid = fork();
+    if (child_pid == 0) {
+      // child process
+      if (debug) {
+        if (strcmp(debug_env, "lldb") == 0) {
+          fprintf(stderr, "Invoking LLDB for debugging (ROCKSDB_DEBUG=%s)...\n",
+                  debug_env);
+          execlp(/*cmd in PATH*/ "lldb", /*arg0*/ "lldb", "-p", attach_pid_str,
+                 /*"-Q",*/ "-o", GetLldbScriptSelectThread(attach_tid),
+                 (char*)nullptr);
+          return;
+        } else {
+          fprintf(stderr, "Invoking GDB for debugging (ROCKSDB_DEBUG=%s)...\n",
+                  debug_env);
+          execlp(/*cmd in PATH*/ "gdb", /*arg0*/ "gdb", "-p", gdb_attach_id_str,
+                 (char*)nullptr);
+          return;
+        }
+      } else {
+        // Redirect child stdout to original stderr
+        dup2(2, 1);
+        // No child stdin (don't use pager)
+        close(0);
+        if (lldb_stack_trace) {
+          fprintf(stderr, "Invoking LLDB for stack trace...\n");
+
+          // Skip top ~8 frames here in PrintStack
+          auto bt_in_lldb =
+              "script -l python -- for f in lldb.thread.frames[8:]: print(f)";
+          execlp(/*cmd in PATH*/ "lldb", /*arg0*/ "lldb", "-p", attach_pid_str,
+                 "-b", "-Q", "-o", GetLldbScriptSelectThread(attach_tid), "-o",
+                 bt_in_lldb, (char*)nullptr);
+        } else {
+          // gdb_stack_trace
+          fprintf(stderr, "Invoking GDB for stack trace...\n");
+
+          // Skip top ~4 frames here in PrintStack
+          // See https://stackoverflow.com/q/40991943/454544
+          auto bt_in_gdb =
+              "frame apply level 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 "
+              "21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 "
+              "42 43 44 -q frame";
+          // -n : Loading config files can apparently cause failures with the
+          // other options here.
+          // -batch : non-interactive; suppress banners as much as possible
+          execlp(/*cmd in PATH*/ "gdb", /*arg0*/ "gdb", "-n", "-batch", "-p",
+                 gdb_attach_id_str, "-ex", bt_in_gdb, (char*)nullptr);
+        }
+        return;
+      }
+    } else {
+      // parent process; wait for child
+      int wstatus;
+      waitpid(child_pid, &wstatus, 0);
+      if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == EXIT_SUCCESS) {
+        // Good
+        return;
+      }
+    }
+    fprintf(stderr, "GDB failed; falling back on backtrace+addr2line...\n");
+  }
+
+  const int kMaxFrames = 100;
+  void* frames[kMaxFrames];
+
+  auto num_frames = backtrace(frames, kMaxFrames);
+  PrintStack(&frames[first_frames_to_skip], num_frames - first_frames_to_skip);
+}
+
+void PrintAndFreeStack(void* callstack, int num_frames) {
+  PrintStack(static_cast<void**>(callstack), num_frames);
+  free(callstack);
+}
+
+void* SaveStack(int* num_frames, int first_frames_to_skip) {
+  const int kMaxFrames = 100;
+  void* frames[kMaxFrames];
+
+  auto count = backtrace(frames, kMaxFrames);
+  *num_frames = count - first_frames_to_skip;
+  void* callstack = malloc(sizeof(void*) * *num_frames);
+  memcpy(callstack, &frames[first_frames_to_skip], sizeof(void*) * *num_frames);
+  return callstack;
+}
+
+static void StackTraceHandler(int sig) {
+  // reset to default handler
+  signal(sig, SIG_DFL);
+  fprintf(stderr, "Received signal %d (%s)\n", sig, strsignal(sig));
+  // skip the top three signal handler related frames
+  PrintStack(3);
+
+  // Efforts to fix or suppress TSAN warnings "signal-unsafe call inside of
+  // a signal" have failed, so just warn the user about them.
+#ifdef __SANITIZE_THREAD__
+  fprintf(stderr,
+          "==> NOTE: any above warnings about \"signal-unsafe call\" are\n"
+          "==> ignorable, as they are expected when generating a stack\n"
+          "==> trace because of a signal under TSAN. Consider why the\n"
+          "==> signal was generated to begin with, and the stack trace\n"
+          "==> in the TSAN warning can be useful for that. (The stack\n"
+          "==> trace printed by the signal handler is likely obscured\n"
+          "==> by TSAN output.)\n");
+#endif
+
+  // re-signal to default handler (so we still get core dump if needed...)
+  raise(sig);
+}
+
+void InstallStackTraceHandler() {
+  // just use the plain old signal as it's simple and sufficient
+  // for this use case
+  signal(SIGILL, StackTraceHandler);
+  signal(SIGSEGV, StackTraceHandler);
+  signal(SIGBUS, StackTraceHandler);
+  signal(SIGABRT, StackTraceHandler);
+  // Allow ouside debugger to attach, even with Yama security restrictions.
+  // This is needed even outside of PrintStack() so that external mechanisms
+  // can dump stacks if they suspect that a test has hung.
+#ifdef PR_SET_PTRACER_ANY
+  (void)prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
+#endif
+}
+
+}  // namespace port
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/stack_trace.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/stack_trace.h
new file mode 100644
index 0000000000..5b3bf93205
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/stack_trace.h
@@ -0,0 +1,31 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace port {
+
+// Install a signal handler to print callstack on the following signals:
+// SIGILL SIGSEGV SIGBUS SIGABRT
+// And also (Linux ony for now) overrides security settings to allow outside
+// processes to attach to this one as a debugger. ONLY USE FOR NON-SECURITY
+// CRITICAL PROCESSES such as unit tests or benchmarking tools.
+// Currently supports only some POSIX implementations. No-op otherwise.
+void InstallStackTraceHandler();
+
+// Prints stack, skips skip_first_frames frames
+void PrintStack(int first_frames_to_skip = 0);
+
+// Prints the given callstack
+void PrintAndFreeStack(void* callstack, int num_frames);
+
+// Save the current callstack
+void* SaveStack(int* num_frame, int first_frames_to_skip = 0);
+
+}  // namespace port
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/sys_time.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/sys_time.h
new file mode 100644
index 0000000000..f2137526b1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/sys_time.h
@@ -0,0 +1,63 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+// This file is a portable substitute for sys/time.h which does not exist on
+// Windows
+
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+
+#if defined(OS_WIN) && (defined(_MSC_VER) || defined(__MINGW32__))
+
+#include <time.h>
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace port {
+
+struct TimeVal {
+  long tv_sec;
+  long tv_usec;
+};
+
+void GetTimeOfDay(TimeVal* tv, struct timezone* tz);
+
+inline struct tm* LocalTimeR(const time_t* timep, struct tm* result) {
+  errno_t ret = localtime_s(result, timep);
+  return (ret == 0) ? result : NULL;
+}
+
+}  // namespace port
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#else
+#include <sys/time.h>
+#include <time.h>
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace port {
+
+using TimeVal = struct timeval;
+
+inline void GetTimeOfDay(TimeVal* tv, struct timezone* tz) {
+  gettimeofday(tv, tz);
+}
+
+inline struct tm* LocalTimeR(const time_t* timep, struct tm* result) {
+  return localtime_r(timep, result);
+}
+
+}  // namespace port
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/util_logger.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/util_logger.h
new file mode 100644
index 0000000000..ce7e3a9416
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/util_logger.h
@@ -0,0 +1,18 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+// Include the appropriate platform specific file below.  If you are
+// porting to a new platform, see "port_example.h" for documentation
+// of what the new port_<platform>.h file must provide.
+
+#if defined(OS_WIN)
+#include "port/win/win_logger.h"
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_default.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_default.cc
new file mode 100644
index 0000000000..48853f26e4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_default.cc
@@ -0,0 +1,45 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#if defined(OS_WIN)
+
+#include <mutex>
+
+#include "port/win/env_win.h"
+#include "rocksdb/env.h"
+#include "test_util/sync_point.h"
+#include "util/compression_context_cache.h"
+#include "util/thread_local.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace port {
+
+// We choose not to destroy the env because joining the threads from the
+// system loader
+//    which destroys the statics (same as from DLLMain) creates a system loader
+//    dead-lock.
+//    in this manner any remaining threads are terminated OK.
+namespace {
+std::once_flag winenv_once_flag;
+Env* envptr;
+};  // namespace
+}  // namespace port
+
+Env* Env::Default() {
+  ThreadLocalPtr::InitSingletons();
+  CompressionContextCache::InitSingleton();
+  INIT_SYNC_POINT_SINGLETONS();
+  std::call_once(port::winenv_once_flag,
+                 []() { port::envptr = new port::WinEnv(); });
+  return port::envptr;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_win.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_win.cc
new file mode 100644
index 0000000000..2262eb59c4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_win.cc
@@ -0,0 +1,1437 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#if defined(OS_WIN)
+
+#include "port/win/env_win.h"
+
+#include <direct.h>  // _rmdir, _mkdir, _getcwd
+#include <errno.h>
+#include <io.h>   // _access
+#include <rpc.h>  // for uuid generation
+#include <shlwapi.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <windows.h>
+#include <winioctl.h>
+
+#include <algorithm>
+#include <ctime>
+#include <thread>
+
+#include "monitoring/iostats_context_imp.h"
+#include "monitoring/thread_status_updater.h"
+#include "monitoring/thread_status_util.h"
+#include "port/lang.h"
+#include "port/port.h"
+#include "port/port_dirent.h"
+#include "port/win/io_win.h"
+#include "port/win/win_logger.h"
+#include "rocksdb/env.h"
+#include "rocksdb/slice.h"
+#include "strsafe.h"
+#include "util/string_util.h"
+
+// Undefine the functions  windows might use (again)...
+#undef GetCurrentTime
+#undef DeleteFile
+#undef LoadLibrary
+
+namespace ROCKSDB_NAMESPACE {
+
+ThreadStatusUpdater* CreateThreadStatusUpdater() {
+  return new ThreadStatusUpdater();
+}
+
+namespace {
+
+// Sector size used when physical sector size cannot be obtained from device.
+static const size_t kSectorSize = 512;
+
+// RAII helpers for HANDLEs
+const auto CloseHandleFunc = [](HANDLE h) { ::CloseHandle(h); };
+using UniqueCloseHandlePtr = std::unique_ptr<void, decltype(CloseHandleFunc)>;
+
+const auto FindCloseFunc = [](HANDLE h) { ::FindClose(h); };
+using UniqueFindClosePtr = std::unique_ptr<void, decltype(FindCloseFunc)>;
+
+void WinthreadCall(const char* label, std::error_code result) {
+  if (0 != result.value()) {
+    fprintf(stderr, "Winthread %s: %s\n", label,
+            errnoStr(result.value()).c_str());
+    abort();
+  }
+}
+
+}  // namespace
+
+namespace port {
+WinClock::WinClock()
+    : perf_counter_frequency_(0),
+      nano_seconds_per_period_(0),
+      GetSystemTimePreciseAsFileTime_(NULL) {
+  {
+    LARGE_INTEGER qpf;
+    BOOL ret __attribute__((__unused__));
+    ret = QueryPerformanceFrequency(&qpf);
+    assert(ret == TRUE);
+    perf_counter_frequency_ = qpf.QuadPart;
+
+    if (std::nano::den % perf_counter_frequency_ == 0) {
+      nano_seconds_per_period_ = std::nano::den / perf_counter_frequency_;
+    }
+  }
+
+  HMODULE module = GetModuleHandle("kernel32.dll");
+  if (module != NULL) {
+    GetSystemTimePreciseAsFileTime_ = (FnGetSystemTimePreciseAsFileTime)(
+        void*)GetProcAddress(module, "GetSystemTimePreciseAsFileTime");
+  }
+}
+
+void WinClock::SleepForMicroseconds(int micros) {
+  std::this_thread::sleep_for(std::chrono::microseconds(micros));
+}
+
+std::string WinClock::TimeToString(uint64_t secondsSince1970) {
+  std::string result;
+
+  const time_t seconds = secondsSince1970;
+  const int maxsize = 64;
+
+  struct tm t;
+  errno_t ret = localtime_s(&t, &seconds);
+
+  if (ret) {
+    result = std::to_string(seconds);
+  } else {
+    result.resize(maxsize);
+    char* p = &result[0];
+
+    int len =
+        snprintf(p, maxsize, "%04d/%02d/%02d-%02d:%02d:%02d ", t.tm_year + 1900,
+                 t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec);
+    assert(len > 0);
+
+    result.resize(len);
+  }
+
+  return result;
+}
+
+uint64_t WinClock::NowMicros() {
+  if (GetSystemTimePreciseAsFileTime_ != NULL) {
+    // all std::chrono clocks on windows proved to return
+    // values that may repeat that is not good enough for some uses.
+    const int64_t c_UnixEpochStartTicks = 116444736000000000LL;
+    const int64_t c_FtToMicroSec = 10;
+
+    // This interface needs to return system time and not
+    // just any microseconds because it is often used as an argument
+    // to TimedWait() on condition variable
+    FILETIME ftSystemTime;
+    GetSystemTimePreciseAsFileTime_(&ftSystemTime);
+
+    LARGE_INTEGER li;
+    li.LowPart = ftSystemTime.dwLowDateTime;
+    li.HighPart = ftSystemTime.dwHighDateTime;
+    // Subtract unix epoch start
+    li.QuadPart -= c_UnixEpochStartTicks;
+    // Convert to microsecs
+    li.QuadPart /= c_FtToMicroSec;
+    return li.QuadPart;
+  }
+  return std::chrono::duration_cast<std::chrono::microseconds>(
+             std::chrono::system_clock::now().time_since_epoch())
+      .count();
+}
+
+uint64_t WinClock::NowNanos() {
+  if (nano_seconds_per_period_ != 0) {
+    // all std::chrono clocks on windows have the same resolution that is only
+    // good enough for microseconds but not nanoseconds
+    // On Windows 8 and Windows 2012 Server
+    // GetSystemTimePreciseAsFileTime(&current_time) can be used
+    LARGE_INTEGER li;
+    QueryPerformanceCounter(&li);
+    // Convert performance counter to nanoseconds by precomputed ratio.
+    // Directly multiply nano::den with li.QuadPart causes overflow.
+    // Only do this when nano::den is divisible by perf_counter_frequency_,
+    // which most likely is the case in reality. If it's not, fall back to
+    // high_resolution_clock, which may be less precise under old compilers.
+    li.QuadPart *= nano_seconds_per_period_;
+    return li.QuadPart;
+  }
+  return std::chrono::duration_cast<std::chrono::nanoseconds>(
+             std::chrono::high_resolution_clock::now().time_since_epoch())
+      .count();
+}
+
+Status WinClock::GetCurrentTime(int64_t* unix_time) {
+  time_t time = std::time(nullptr);
+  if (time == (time_t)(-1)) {
+    return Status::NotSupported("Failed to get time");
+  }
+
+  *unix_time = time;
+  return Status::OK();
+}
+
+WinFileSystem::WinFileSystem(const std::shared_ptr<SystemClock>& clock)
+    : clock_(clock), page_size_(4 * 1024), allocation_granularity_(page_size_) {
+  SYSTEM_INFO sinfo;
+  GetSystemInfo(&sinfo);
+
+  page_size_ = sinfo.dwPageSize;
+  allocation_granularity_ = sinfo.dwAllocationGranularity;
+}
+
+const std::shared_ptr<WinFileSystem>& WinFileSystem::Default() {
+  STATIC_AVOID_DESTRUCTION(std::shared_ptr<WinFileSystem>, fs)
+  (std::make_shared<WinFileSystem>(WinClock::Default()));
+  return fs;
+}
+
+WinEnvIO::WinEnvIO(Env* hosted_env) : hosted_env_(hosted_env) {}
+
+WinEnvIO::~WinEnvIO() {}
+
+IOStatus WinFileSystem::DeleteFile(const std::string& fname,
+                                   const IOOptions& /*options*/,
+                                   IODebugContext* /*dbg*/) {
+  IOStatus result;
+
+  BOOL ret = RX_DeleteFile(RX_FN(fname).c_str());
+
+  if (!ret) {
+    auto lastError = GetLastError();
+    result = IOErrorFromWindowsError("Failed to delete: " + fname, lastError);
+  }
+
+  return result;
+}
+
+IOStatus WinFileSystem::Truncate(const std::string& fname, size_t size,
+                                 const IOOptions& /*options*/,
+                                 IODebugContext* /*dbg*/) {
+  IOStatus s;
+  int result = ROCKSDB_NAMESPACE::port::Truncate(fname, size);
+  if (result != 0) {
+    s = IOError("Failed to truncate: " + fname, errno);
+  }
+  return s;
+}
+
+IOStatus WinFileSystem::NewSequentialFile(
+    const std::string& fname, const FileOptions& options,
+    std::unique_ptr<FSSequentialFile>* result, IODebugContext* /*dbg*/) {
+  IOStatus s;
+
+  result->reset();
+
+  // Corruption test needs to rename and delete files of these kind
+  // while they are still open with another handle. For that reason we
+  // allow share_write and delete(allows rename).
+  HANDLE hFile = INVALID_HANDLE_VALUE;
+
+  DWORD fileFlags = FILE_ATTRIBUTE_READONLY;
+
+  if (options.use_direct_reads && !options.use_mmap_reads) {
+    fileFlags |= FILE_FLAG_NO_BUFFERING;
+  }
+
+  {
+    IOSTATS_TIMER_GUARD(open_nanos);
+    hFile = RX_CreateFile(
+        RX_FN(fname).c_str(), GENERIC_READ,
+        FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL,
+        OPEN_EXISTING,  // Original fopen mode is "rb"
+        fileFlags, NULL);
+  }
+
+  if (INVALID_HANDLE_VALUE == hFile) {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError("Failed to open NewSequentialFile" + fname,
+                                lastError);
+  } else {
+    result->reset(new WinSequentialFile(fname, hFile, options));
+  }
+  return s;
+}
+
+IOStatus WinFileSystem::NewRandomAccessFile(
+    const std::string& fname, const FileOptions& options,
+    std::unique_ptr<FSRandomAccessFile>* result, IODebugContext* dbg) {
+  result->reset();
+  IOStatus s;
+
+  // Open the file for read-only random access
+  // Random access is to disable read-ahead as the system reads too much data
+  DWORD fileFlags = FILE_ATTRIBUTE_READONLY;
+
+  if (options.use_direct_reads && !options.use_mmap_reads) {
+    fileFlags |= FILE_FLAG_NO_BUFFERING;
+  } else {
+    fileFlags |= FILE_FLAG_RANDOM_ACCESS;
+  }
+
+  /// Shared access is necessary for corruption test to pass
+  // almost all tests would work with a possible exception of fault_injection
+  HANDLE hFile = 0;
+  {
+    IOSTATS_TIMER_GUARD(open_nanos);
+    hFile =
+        RX_CreateFile(RX_FN(fname).c_str(), GENERIC_READ,
+                      FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+                      NULL, OPEN_EXISTING, fileFlags, NULL);
+  }
+
+  if (INVALID_HANDLE_VALUE == hFile) {
+    auto lastError = GetLastError();
+    return IOErrorFromWindowsError(
+        "NewRandomAccessFile failed to Create/Open: " + fname, lastError);
+  }
+
+  UniqueCloseHandlePtr fileGuard(hFile, CloseHandleFunc);
+
+  // CAUTION! This will map the entire file into the process address space.
+  // Not recommended for 32-bit platforms.
+  if (options.use_mmap_reads) {
+    uint64_t fileSize;
+
+    s = GetFileSize(fname, IOOptions(), &fileSize, dbg);
+
+    if (s.ok()) {
+      // Will not map empty files
+      if (fileSize == 0) {
+        return IOError("NewRandomAccessFile failed to map empty file: " + fname,
+                       EINVAL);
+      }
+
+      HANDLE hMap = RX_CreateFileMapping(hFile, NULL, PAGE_READONLY,
+                                         0,  // At its present length
+                                         0,
+                                         NULL);  // Mapping name
+
+      if (!hMap) {
+        auto lastError = GetLastError();
+        return IOErrorFromWindowsError(
+            "Failed to create file mapping for NewRandomAccessFile: " + fname,
+            lastError);
+      }
+
+      UniqueCloseHandlePtr mapGuard(hMap, CloseHandleFunc);
+
+      const void* mapped_region =
+          MapViewOfFileEx(hMap, FILE_MAP_READ,
+                          0,  // High DWORD of access start
+                          0,  // Low DWORD
+                          static_cast<SIZE_T>(fileSize),
+                          NULL);  // Let the OS choose the mapping
+
+      if (!mapped_region) {
+        auto lastError = GetLastError();
+        return IOErrorFromWindowsError(
+            "Failed to MapViewOfFile for NewRandomAccessFile: " + fname,
+            lastError);
+      }
+
+      result->reset(new WinMmapReadableFile(fname, hFile, hMap, mapped_region,
+                                            static_cast<size_t>(fileSize)));
+
+      mapGuard.release();
+      fileGuard.release();
+    }
+  } else {
+    result->reset(new WinRandomAccessFile(fname, hFile, page_size_, options));
+    fileGuard.release();
+  }
+  return s;
+}
+
+IOStatus WinFileSystem::OpenWritableFile(
+    const std::string& fname, const FileOptions& options,
+    std::unique_ptr<FSWritableFile>* result, bool reopen) {
+  const size_t c_BufferCapacity = 64 * 1024;
+
+  EnvOptions local_options(options);
+
+  result->reset();
+  IOStatus s;
+
+  DWORD fileFlags = FILE_ATTRIBUTE_NORMAL;
+
+  if (local_options.use_direct_writes && !local_options.use_mmap_writes) {
+    fileFlags = FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
+  }
+
+  // Desired access. We are want to write only here but if we want to memory
+  // map
+  // the file then there is no write only mode so we have to create it
+  // Read/Write
+  // However, MapViewOfFile specifies only Write only
+  DWORD desired_access = GENERIC_WRITE;
+  DWORD shared_mode = FILE_SHARE_READ;
+
+  if (local_options.use_mmap_writes) {
+    desired_access |= GENERIC_READ;
+  } else {
+    // Adding this solely for tests to pass (fault_injection_test,
+    // wal_manager_test).
+    shared_mode |= (FILE_SHARE_WRITE | FILE_SHARE_DELETE);
+  }
+
+  // This will always truncate the file
+  DWORD creation_disposition = CREATE_ALWAYS;
+  if (reopen) {
+    creation_disposition = OPEN_ALWAYS;
+  }
+
+  HANDLE hFile = 0;
+  {
+    IOSTATS_TIMER_GUARD(open_nanos);
+    hFile = RX_CreateFile(
+        RX_FN(fname).c_str(),
+        desired_access,  // Access desired
+        shared_mode,
+        NULL,  // Security attributes
+        // Posix env says (reopen) ? (O_CREATE | O_APPEND) : O_CREAT | O_TRUNC
+        creation_disposition,
+        fileFlags,  // Flags
+        NULL);      // Template File
+  }
+
+  if (INVALID_HANDLE_VALUE == hFile) {
+    auto lastError = GetLastError();
+    return IOErrorFromWindowsError(
+        "Failed to create a NewWritableFile: " + fname, lastError);
+  }
+
+  // We will start writing at the end, appending
+  if (reopen) {
+    LARGE_INTEGER zero_move;
+    zero_move.QuadPart = 0;
+    BOOL ret = SetFilePointerEx(hFile, zero_move, NULL, FILE_END);
+    if (!ret) {
+      auto lastError = GetLastError();
+      return IOErrorFromWindowsError(
+          "Failed to create a ReopenWritableFile move to the end: " + fname,
+          lastError);
+    }
+  }
+
+  if (options.use_mmap_writes) {
+    // We usually do not use mmmapping on SSD and thus we pass memory
+    // page_size
+    result->reset(new WinMmapFile(fname, hFile, page_size_,
+                                  allocation_granularity_, local_options));
+  } else {
+    // Here we want the buffer allocation to be aligned by the SSD page size
+    // and to be a multiple of it
+    result->reset(new WinWritableFile(fname, hFile, GetPageSize(),
+                                      c_BufferCapacity, local_options));
+  }
+  return s;
+}
+
+IOStatus WinFileSystem::NewWritableFile(const std::string& fname,
+                                        const FileOptions& options,
+                                        std::unique_ptr<FSWritableFile>* result,
+                                        IODebugContext* /*dbg*/) {
+  return OpenWritableFile(fname, options, result, false);
+}
+
+IOStatus WinFileSystem::ReopenWritableFile(
+    const std::string& fname, const FileOptions& options,
+    std::unique_ptr<FSWritableFile>* result, IODebugContext* /*dbg*/) {
+  return OpenWritableFile(fname, options, result, true);
+}
+
+IOStatus WinFileSystem::NewRandomRWFile(const std::string& fname,
+                                        const FileOptions& options,
+                                        std::unique_ptr<FSRandomRWFile>* result,
+                                        IODebugContext* /*dbg*/) {
+  IOStatus s;
+
+  // Open the file for read-only random access
+  // Random access is to disable read-ahead as the system reads too much data
+  DWORD desired_access = GENERIC_READ | GENERIC_WRITE;
+  DWORD shared_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
+  DWORD creation_disposition = OPEN_EXISTING;  // Fail if file does not exist
+  DWORD file_flags = FILE_FLAG_RANDOM_ACCESS;
+
+  if (options.use_direct_reads && options.use_direct_writes) {
+    file_flags |= FILE_FLAG_NO_BUFFERING;
+  }
+
+  /// Shared access is necessary for corruption test to pass
+  // almost all tests would work with a possible exception of fault_injection
+  HANDLE hFile = 0;
+  {
+    IOSTATS_TIMER_GUARD(open_nanos);
+    hFile = RX_CreateFile(RX_FN(fname).c_str(), desired_access, shared_mode,
+                          NULL,  // Security attributes
+                          creation_disposition, file_flags, NULL);
+  }
+
+  if (INVALID_HANDLE_VALUE == hFile) {
+    auto lastError = GetLastError();
+    return IOErrorFromWindowsError(
+        "NewRandomRWFile failed to Create/Open: " + fname, lastError);
+  }
+
+  UniqueCloseHandlePtr fileGuard(hFile, CloseHandleFunc);
+  result->reset(new WinRandomRWFile(fname, hFile, GetPageSize(), options));
+  fileGuard.release();
+
+  return s;
+}
+
+IOStatus WinFileSystem::NewMemoryMappedFileBuffer(
+    const std::string& fname, std::unique_ptr<MemoryMappedFileBuffer>* result) {
+  IOStatus s;
+  result->reset();
+
+  DWORD fileFlags = FILE_ATTRIBUTE_READONLY;
+
+  HANDLE hFile = INVALID_HANDLE_VALUE;
+  {
+    IOSTATS_TIMER_GUARD(open_nanos);
+    hFile = RX_CreateFile(
+        RX_FN(fname).c_str(), GENERIC_READ | GENERIC_WRITE,
+        FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL,
+        OPEN_EXISTING,  // Open only if it exists
+        fileFlags, NULL);
+  }
+
+  if (INVALID_HANDLE_VALUE == hFile) {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError(
+        "Failed to open NewMemoryMappedFileBuffer: " + fname, lastError);
+    return s;
+  }
+  UniqueCloseHandlePtr fileGuard(hFile, CloseHandleFunc);
+
+  uint64_t fileSize = 0;
+  s = GetFileSize(fname, IOOptions(), &fileSize, nullptr);
+  if (!s.ok()) {
+    return s;
+  }
+  // Will not map empty files
+  if (fileSize == 0) {
+    return IOStatus::NotSupported(
+        "NewMemoryMappedFileBuffer can not map zero length files: " + fname);
+  }
+
+  // size_t is 32-bit with 32-bit builds
+  if (fileSize > std::numeric_limits<size_t>::max()) {
+    return IOStatus::NotSupported(
+        "The specified file size does not fit into 32-bit memory addressing: " +
+        fname);
+  }
+
+  HANDLE hMap = RX_CreateFileMapping(hFile, NULL, PAGE_READWRITE,
+                                     0,  // Whole file at its present length
+                                     0,
+                                     NULL);  // Mapping name
+
+  if (!hMap) {
+    auto lastError = GetLastError();
+    return IOErrorFromWindowsError(
+        "Failed to create file mapping for: " + fname, lastError);
+  }
+  UniqueCloseHandlePtr mapGuard(hMap, CloseHandleFunc);
+
+  void* base = MapViewOfFileEx(hMap, FILE_MAP_WRITE,
+                               0,  // High DWORD of access start
+                               0,  // Low DWORD
+                               static_cast<SIZE_T>(fileSize),
+                               NULL);  // Let the OS choose the mapping
+
+  if (!base) {
+    auto lastError = GetLastError();
+    return IOErrorFromWindowsError(
+        "Failed to MapViewOfFile for NewMemoryMappedFileBuffer: " + fname,
+        lastError);
+  }
+
+  result->reset(new WinMemoryMappedBuffer(hFile, hMap, base,
+                                          static_cast<size_t>(fileSize)));
+
+  mapGuard.release();
+  fileGuard.release();
+
+  return s;
+}
+
+IOStatus WinFileSystem::NewDirectory(const std::string& name,
+                                     const IOOptions& /*options*/,
+                                     std::unique_ptr<FSDirectory>* result,
+                                     IODebugContext* /*dbg*/) {
+  IOStatus s;
+  // Must be nullptr on failure
+  result->reset();
+
+  if (!DirExists(name)) {
+    s = IOErrorFromWindowsError("open folder: " + name, ERROR_DIRECTORY);
+    return s;
+  }
+
+  HANDLE handle = INVALID_HANDLE_VALUE;
+  // 0 - for access means read metadata
+  {
+    IOSTATS_TIMER_GUARD(open_nanos);
+    handle = RX_CreateFile(
+        RX_FN(name).c_str(), 0,
+        FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
+        OPEN_EXISTING,
+        FILE_FLAG_BACKUP_SEMANTICS,  // make opening folders possible
+        NULL);
+  }
+
+  if (INVALID_HANDLE_VALUE == handle) {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError("open folder: " + name, lastError);
+    return s;
+  }
+
+  result->reset(new WinDirectory(name, handle));
+
+  return s;
+}
+
+IOStatus WinFileSystem::FileExists(const std::string& fname,
+                                   const IOOptions& /*opts*/,
+                                   IODebugContext* /*dbg*/) {
+  IOStatus s;
+  // TODO: This does not follow symbolic links at this point
+  // which is consistent with _access() impl on windows
+  // but can be added
+  WIN32_FILE_ATTRIBUTE_DATA attrs;
+  if (FALSE == RX_GetFileAttributesEx(RX_FN(fname).c_str(),
+                                      GetFileExInfoStandard, &attrs)) {
+    auto lastError = GetLastError();
+    switch (lastError) {
+      case ERROR_ACCESS_DENIED:
+      case ERROR_NOT_FOUND:
+      case ERROR_FILE_NOT_FOUND:
+      case ERROR_PATH_NOT_FOUND:
+        s = IOStatus::NotFound();
+        break;
+      default:
+        s = IOErrorFromWindowsError("Unexpected error for: " + fname,
+                                    lastError);
+        break;
+    }
+  }
+  return s;
+}
+
+IOStatus WinFileSystem::GetChildren(const std::string& dir,
+                                    const IOOptions& /*opts*/,
+                                    std::vector<std::string>* result,
+                                    IODebugContext* /*dbg*/) {
+  IOStatus status;
+  result->clear();
+
+  RX_WIN32_FIND_DATA data;
+  memset(&data, 0, sizeof(data));
+  std::string pattern(dir);
+  pattern.append("\\").append("*");
+
+  HANDLE handle =
+      RX_FindFirstFileEx(RX_FN(pattern).c_str(),
+                         // Do not want alternative name
+                         FindExInfoBasic, &data, FindExSearchNameMatch,
+                         NULL,  // lpSearchFilter
+                         0);
+
+  if (handle == INVALID_HANDLE_VALUE) {
+    auto lastError = GetLastError();
+    switch (lastError) {
+      case ERROR_NOT_FOUND:
+      case ERROR_ACCESS_DENIED:
+      case ERROR_FILE_NOT_FOUND:
+      case ERROR_PATH_NOT_FOUND:
+        status = IOStatus::NotFound();
+        break;
+      default:
+        status = IOErrorFromWindowsError("Failed to GetChhildren for: " + dir,
+                                         lastError);
+    }
+    return status;
+  }
+
+  UniqueFindClosePtr fc(handle, FindCloseFunc);
+
+  // For safety
+  data.cFileName[MAX_PATH - 1] = 0;
+
+  while (true) {
+    // filter out '.' and '..' directory entries
+    // which appear only on some platforms
+    const bool ignore =
+        ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) &&
+        (RX_FNCMP(data.cFileName, ".") == 0 ||
+         RX_FNCMP(data.cFileName, "..") == 0);
+    if (!ignore) {
+      auto x = RX_FILESTRING(data.cFileName, RX_FNLEN(data.cFileName));
+      result->push_back(FN_TO_RX(x));
+    }
+
+    BOOL ret = -RX_FindNextFile(handle, &data);
+    // If the function fails the return value is zero
+    // and non-zero otherwise. Not TRUE or FALSE.
+    if (ret == FALSE) {
+      // Posix does not care why we stopped
+      break;
+    }
+    data.cFileName[MAX_PATH - 1] = 0;
+  }
+  return status;
+}
+
+IOStatus WinFileSystem::CreateDir(const std::string& name,
+                                  const IOOptions& /*opts*/,
+                                  IODebugContext* /*dbg*/) {
+  IOStatus result;
+  BOOL ret = RX_CreateDirectory(RX_FN(name).c_str(), NULL);
+  if (!ret) {
+    auto lastError = GetLastError();
+    result = IOErrorFromWindowsError("Failed to create a directory: " + name,
+                                     lastError);
+  }
+
+  return result;
+}
+
+IOStatus WinFileSystem::CreateDirIfMissing(const std::string& name,
+                                           const IOOptions& /*opts*/,
+                                           IODebugContext* /*dbg*/) {
+  IOStatus result;
+
+  if (DirExists(name)) {
+    return result;
+  }
+
+  BOOL ret = RX_CreateDirectory(RX_FN(name).c_str(), NULL);
+  if (!ret) {
+    auto lastError = GetLastError();
+    if (lastError != ERROR_ALREADY_EXISTS) {
+      result = IOErrorFromWindowsError("Failed to create a directory: " + name,
+                                       lastError);
+    } else {
+      result = IOStatus::IOError(name + ": exists but is not a directory");
+    }
+  }
+  return result;
+}
+
+IOStatus WinFileSystem::DeleteDir(const std::string& name,
+                                  const IOOptions& /*options*/,
+                                  IODebugContext* /*dbg*/) {
+  IOStatus result;
+  BOOL ret = RX_RemoveDirectory(RX_FN(name).c_str());
+  if (!ret) {
+    auto lastError = GetLastError();
+    result =
+        IOErrorFromWindowsError("Failed to remove dir: " + name, lastError);
+  }
+  return result;
+}
+
+IOStatus WinFileSystem::GetFileSize(const std::string& fname,
+                                    const IOOptions& /*opts*/, uint64_t* size,
+                                    IODebugContext* /*dbg*/) {
+  IOStatus s;
+
+  WIN32_FILE_ATTRIBUTE_DATA attrs;
+  if (RX_GetFileAttributesEx(RX_FN(fname).c_str(), GetFileExInfoStandard,
+                             &attrs)) {
+    ULARGE_INTEGER file_size;
+    file_size.HighPart = attrs.nFileSizeHigh;
+    file_size.LowPart = attrs.nFileSizeLow;
+    *size = file_size.QuadPart;
+  } else {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError("Can not get size for: " + fname, lastError);
+  }
+  return s;
+}
+
+uint64_t WinFileSystem::FileTimeToUnixTime(const FILETIME& ftTime) {
+  const uint64_t c_FileTimePerSecond = 10000000U;
+  // UNIX epoch starts on 1970-01-01T00:00:00Z
+  // Windows FILETIME starts on 1601-01-01T00:00:00Z
+  // Therefore, we need to subtract the below number of seconds from
+  // the seconds that we obtain from FILETIME with an obvious loss of
+  // precision
+  const uint64_t c_SecondBeforeUnixEpoch = 11644473600U;
+
+  ULARGE_INTEGER li;
+  li.HighPart = ftTime.dwHighDateTime;
+  li.LowPart = ftTime.dwLowDateTime;
+
+  uint64_t result =
+      (li.QuadPart / c_FileTimePerSecond) - c_SecondBeforeUnixEpoch;
+  return result;
+}
+
+IOStatus WinFileSystem::GetFileModificationTime(const std::string& fname,
+                                                const IOOptions& /*opts*/,
+                                                uint64_t* file_mtime,
+                                                IODebugContext* /*dbg*/) {
+  IOStatus s;
+
+  WIN32_FILE_ATTRIBUTE_DATA attrs;
+  if (RX_GetFileAttributesEx(RX_FN(fname).c_str(), GetFileExInfoStandard,
+                             &attrs)) {
+    *file_mtime = FileTimeToUnixTime(attrs.ftLastWriteTime);
+  } else {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError(
+        "Can not get file modification time for: " + fname, lastError);
+    *file_mtime = 0;
+  }
+
+  return s;
+}
+
+IOStatus WinFileSystem::RenameFile(const std::string& src,
+                                   const std::string& target,
+                                   const IOOptions& /*opts*/,
+                                   IODebugContext* /*dbg*/) {
+  IOStatus result;
+
+  // rename() is not capable of replacing the existing file as on Linux
+  // so use OS API directly
+  if (!RX_MoveFileEx(RX_FN(src).c_str(), RX_FN(target).c_str(),
+                     MOVEFILE_REPLACE_EXISTING)) {
+    DWORD lastError = GetLastError();
+
+    std::string text("Failed to rename: ");
+    text.append(src).append(" to: ").append(target);
+
+    result = IOErrorFromWindowsError(text, lastError);
+  }
+
+  return result;
+}
+
+IOStatus WinFileSystem::LinkFile(const std::string& src,
+                                 const std::string& target,
+                                 const IOOptions& /*opts*/,
+                                 IODebugContext* /*dbg*/) {
+  IOStatus result;
+
+  if (!RX_CreateHardLink(RX_FN(target).c_str(), RX_FN(src).c_str(), NULL)) {
+    DWORD lastError = GetLastError();
+    if (lastError == ERROR_NOT_SAME_DEVICE) {
+      return IOStatus::NotSupported("No cross FS links allowed");
+    }
+
+    std::string text("Failed to link: ");
+    text.append(src).append(" to: ").append(target);
+
+    result = IOErrorFromWindowsError(text, lastError);
+  }
+
+  return result;
+}
+
+IOStatus WinFileSystem::NumFileLinks(const std::string& fname,
+                                     const IOOptions& /*opts*/, uint64_t* count,
+                                     IODebugContext* /*dbg*/) {
+  IOStatus s;
+  HANDLE handle =
+      RX_CreateFile(RX_FN(fname).c_str(), 0,
+                    FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+                    NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL);
+
+  if (INVALID_HANDLE_VALUE == handle) {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError("NumFileLinks: " + fname, lastError);
+    return s;
+  }
+  UniqueCloseHandlePtr handle_guard(handle, CloseHandleFunc);
+  FILE_STANDARD_INFO standard_info;
+  if (0 != GetFileInformationByHandleEx(handle, FileStandardInfo,
+                                        &standard_info,
+                                        sizeof(standard_info))) {
+    *count = standard_info.NumberOfLinks;
+  } else {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError("GetFileInformationByHandleEx: " + fname,
+                                lastError);
+  }
+  return s;
+}
+
+IOStatus WinFileSystem::AreFilesSame(const std::string& first,
+                                     const std::string& second,
+                                     const IOOptions& /*opts*/, bool* res,
+                                     IODebugContext* /*dbg*/) {
+// For MinGW builds
+#if (_WIN32_WINNT == _WIN32_WINNT_VISTA)
+  IOStatus s = IOStatus::NotSupported();
+#else
+  assert(res != nullptr);
+  IOStatus s;
+  if (res == nullptr) {
+    s = IOStatus::InvalidArgument("res");
+    return s;
+  }
+
+  // 0 - for access means read metadata
+  HANDLE file_1 = RX_CreateFile(
+      RX_FN(first).c_str(), 0,
+      FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
+      OPEN_EXISTING,
+      FILE_FLAG_BACKUP_SEMANTICS,  // make opening folders possible
+      NULL);
+
+  if (INVALID_HANDLE_VALUE == file_1) {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError("open file: " + first, lastError);
+    return s;
+  }
+  UniqueCloseHandlePtr g_1(file_1, CloseHandleFunc);
+
+  HANDLE file_2 = RX_CreateFile(
+      RX_FN(second).c_str(), 0,
+      FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
+      OPEN_EXISTING,
+      FILE_FLAG_BACKUP_SEMANTICS,  // make opening folders possible
+      NULL);
+
+  if (INVALID_HANDLE_VALUE == file_2) {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError("open file: " + second, lastError);
+    return s;
+  }
+  UniqueCloseHandlePtr g_2(file_2, CloseHandleFunc);
+
+  FILE_ID_INFO FileInfo_1;
+  BOOL result = GetFileInformationByHandleEx(file_1, FileIdInfo, &FileInfo_1,
+                                             sizeof(FileInfo_1));
+
+  if (!result) {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError("stat file: " + first, lastError);
+    return s;
+  }
+
+  FILE_ID_INFO FileInfo_2;
+  result = GetFileInformationByHandleEx(file_2, FileIdInfo, &FileInfo_2,
+                                        sizeof(FileInfo_2));
+
+  if (!result) {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError("stat file: " + second, lastError);
+    return s;
+  }
+
+  if (FileInfo_1.VolumeSerialNumber == FileInfo_2.VolumeSerialNumber) {
+    *res =
+        (0 == memcmp(FileInfo_1.FileId.Identifier, FileInfo_2.FileId.Identifier,
+                     sizeof(FileInfo_1.FileId.Identifier)));
+  } else {
+    *res = false;
+  }
+#endif
+  return s;
+}
+
+IOStatus WinFileSystem::LockFile(const std::string& lockFname,
+                                 const IOOptions& /*opts*/, FileLock** lock,
+                                 IODebugContext* /*dbg*/) {
+  assert(lock != nullptr);
+
+  *lock = NULL;
+  IOStatus result;
+
+  // No-sharing, this is a LOCK file
+  const DWORD ExclusiveAccessON = 0;
+
+  // Obtain exclusive access to the LOCK file
+  // Previously, instead of NORMAL attr we set DELETE on close and that worked
+  // well except with fault_injection test that insists on deleting it.
+  HANDLE hFile = 0;
+  {
+    IOSTATS_TIMER_GUARD(open_nanos);
+    hFile = RX_CreateFile(RX_FN(lockFname).c_str(),
+                          (GENERIC_READ | GENERIC_WRITE), ExclusiveAccessON,
+                          NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+  }
+
+  if (INVALID_HANDLE_VALUE == hFile) {
+    auto lastError = GetLastError();
+    result = IOErrorFromWindowsError("Failed to create lock file: " + lockFname,
+                                     lastError);
+  } else {
+    *lock = new WinFileLock(hFile);
+  }
+
+  return result;
+}
+
+IOStatus WinFileSystem::UnlockFile(FileLock* lock, const IOOptions& /*opts*/,
+                                   IODebugContext* /*dbg*/) {
+  IOStatus result;
+
+  assert(lock != nullptr);
+
+  delete lock;
+
+  return result;
+}
+
+IOStatus WinFileSystem::GetTestDirectory(const IOOptions& opts,
+                                         std::string* result,
+                                         IODebugContext* dbg) {
+  std::string output;
+
+  const char* env = getenv("TEST_TMPDIR");
+  if (env && env[0] != '\0') {
+    output = env;
+  } else {
+    env = getenv("TMP");
+
+    if (env && env[0] != '\0') {
+      output = env;
+    } else {
+      output = "c:\\tmp";
+    }
+  }
+  CreateDir(output, opts, dbg);
+
+  output.append("\\testrocksdb-");
+  output.append(std::to_string(GetCurrentProcessId()));
+
+  CreateDir(output, opts, dbg);
+
+  output.swap(*result);
+
+  return IOStatus::OK();
+}
+
+IOStatus WinFileSystem::NewLogger(const std::string& fname,
+                                  const IOOptions& /*opts*/,
+                                  std::shared_ptr<Logger>* result,
+                                  IODebugContext* /*dbg*/) {
+  IOStatus s;
+
+  result->reset();
+
+  HANDLE hFile = 0;
+  {
+    IOSTATS_TIMER_GUARD(open_nanos);
+    hFile = RX_CreateFile(
+        RX_FN(fname).c_str(), GENERIC_WRITE,
+        FILE_SHARE_READ | FILE_SHARE_DELETE,  // In RocksDb log files are
+        // renamed and deleted before
+        // they are closed. This enables
+        // doing so.
+        NULL,
+        CREATE_ALWAYS,  // Original fopen mode is "w"
+        FILE_ATTRIBUTE_NORMAL, NULL);
+  }
+
+  if (INVALID_HANDLE_VALUE == hFile) {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError("Failed to open LogFile" + fname, lastError);
+  } else {
+    {
+      // With log files we want to set the true creation time as of now
+      // because the system
+      // for some reason caches the attributes of the previous file that just
+      // been renamed from
+      // this name so auto_roll_logger_test fails
+      FILETIME ft;
+      GetSystemTimeAsFileTime(&ft);
+      // Set creation, last access and last write time to the same value
+      SetFileTime(hFile, &ft, &ft, &ft);
+    }
+    result->reset(new WinLogger(&WinEnvThreads::gettid, clock_.get(), hFile));
+  }
+  return s;
+}
+
+IOStatus WinFileSystem::IsDirectory(const std::string& path,
+                                    const IOOptions& /*opts*/, bool* is_dir,
+                                    IODebugContext* /*dbg*/) {
+  BOOL ret = RX_PathIsDirectory(RX_FN(path).c_str());
+  if (is_dir) {
+    *is_dir = ret ? true : false;
+  }
+  return IOStatus::OK();
+}
+
+Status WinEnvIO::GetHostName(char* name, uint64_t len) {
+  Status s;
+  DWORD nSize = static_cast<DWORD>(
+      std::min<uint64_t>(len, std::numeric_limits<DWORD>::max()));
+
+  if (!::GetComputerNameA(name, &nSize)) {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError("GetHostName", lastError);
+  } else {
+    name[nSize] = 0;
+  }
+
+  return s;
+}
+
+IOStatus WinFileSystem::GetAbsolutePath(const std::string& db_path,
+                                        const IOOptions& /*options*/,
+                                        std::string* output_path,
+                                        IODebugContext* dbg) {
+  // Check if we already have an absolute path
+  // For test compatibility we will consider starting slash as an
+  // absolute path
+  if ((!db_path.empty() && (db_path[0] == '\\' || db_path[0] == '/')) ||
+      !RX_PathIsRelative(RX_FN(db_path).c_str())) {
+    *output_path = db_path;
+    return IOStatus::OK();
+  }
+
+  RX_FILESTRING result;
+  result.resize(MAX_PATH);
+
+  // Hopefully no changes the current directory while we do this
+  // however _getcwd also suffers from the same limitation
+  DWORD len = RX_GetCurrentDirectory(MAX_PATH, &result[0]);
+  if (len == 0) {
+    auto lastError = GetLastError();
+    return IOErrorFromWindowsError("Failed to get current working directory",
+                                   lastError);
+  }
+
+  result.resize(len);
+  std::string res = FN_TO_RX(result);
+
+  res.swap(*output_path);
+  return IOStatus::OK();
+}
+
+IOStatus WinFileSystem::GetFreeSpace(const std::string& path,
+                                     const IOOptions& /*options*/,
+                                     uint64_t* diskfree,
+                                     IODebugContext* /*dbg*/) {
+  assert(diskfree != nullptr);
+  ULARGE_INTEGER freeBytes;
+  BOOL f = RX_GetDiskFreeSpaceEx(RX_FN(path).c_str(), &freeBytes, NULL, NULL);
+  if (f) {
+    *diskfree = freeBytes.QuadPart;
+    return IOStatus::OK();
+  } else {
+    DWORD lastError = GetLastError();
+    return IOErrorFromWindowsError("Failed to get free space: " + path,
+                                   lastError);
+  }
+}
+
+FileOptions WinFileSystem::OptimizeForLogWrite(
+    const FileOptions& file_options, const DBOptions& db_options) const {
+  FileOptions optimized(file_options);
+  // These two the same as default optimizations
+  optimized.bytes_per_sync = db_options.wal_bytes_per_sync;
+  optimized.writable_file_max_buffer_size =
+      db_options.writable_file_max_buffer_size;
+
+  // This adversely affects %999 on windows
+  optimized.use_mmap_writes = false;
+  // Direct writes will produce a huge perf impact on
+  // Windows. Pre-allocate space for WAL.
+  optimized.use_direct_writes = false;
+  return optimized;
+}
+
+FileOptions WinFileSystem::OptimizeForManifestWrite(
+    const FileOptions& options) const {
+  FileOptions optimized(options);
+  optimized.use_mmap_writes = false;
+  optimized.use_direct_reads = false;
+  return optimized;
+}
+
+FileOptions WinFileSystem::OptimizeForManifestRead(
+    const FileOptions& file_options) const {
+  FileOptions optimized(file_options);
+  optimized.use_mmap_writes = false;
+  optimized.use_direct_reads = false;
+  return optimized;
+}
+
+// Returns true iff the named directory exists and is a directory.
+bool WinFileSystem::DirExists(const std::string& dname) {
+  WIN32_FILE_ATTRIBUTE_DATA attrs;
+  if (RX_GetFileAttributesEx(RX_FN(dname).c_str(), GetFileExInfoStandard,
+                             &attrs)) {
+    return 0 != (attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY);
+  }
+  return false;
+}
+
+size_t WinFileSystem::GetSectorSize(const std::string& fname) {
+  size_t sector_size = kSectorSize;
+
+  // obtain device handle
+  char devicename[7] = "\\\\.\\";
+  int erresult = 0;
+  if (RX_PathIsRelative(RX_FN(fname).c_str())) {
+    RX_FILESTRING rx_current_dir;
+    rx_current_dir.resize(MAX_PATH);
+    DWORD len = RX_GetCurrentDirectory(MAX_PATH, &rx_current_dir[0]);
+    if (len == 0) {
+      return sector_size;
+    }
+    rx_current_dir.resize(len);
+    std::string current_dir = FN_TO_RX(rx_current_dir);
+    erresult =
+        strncat_s(devicename, sizeof(devicename), current_dir.c_str(), 2);
+  } else {
+    erresult = strncat_s(devicename, sizeof(devicename), fname.c_str(), 2);
+  }
+
+  if (erresult) {
+    assert(false);
+    return sector_size;
+  }
+
+  HANDLE hDevice = CreateFile(devicename, 0, 0, nullptr, OPEN_EXISTING,
+                              FILE_ATTRIBUTE_NORMAL, nullptr);
+
+  if (hDevice == INVALID_HANDLE_VALUE) {
+    return sector_size;
+  }
+
+  STORAGE_PROPERTY_QUERY spropertyquery;
+  spropertyquery.PropertyId = StorageAccessAlignmentProperty;
+  spropertyquery.QueryType = PropertyStandardQuery;
+
+  BYTE output_buffer[sizeof(STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR)];
+  DWORD output_bytes = 0;
+
+  BOOL ret = DeviceIoControl(
+      hDevice, IOCTL_STORAGE_QUERY_PROPERTY, &spropertyquery,
+      sizeof(spropertyquery), output_buffer,
+      sizeof(STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR), &output_bytes, nullptr);
+
+  if (ret) {
+    sector_size = ((STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR*)output_buffer)
+                      ->BytesPerLogicalSector;
+  } else {
+    // many devices do not support StorageProcessAlignmentProperty. Any failure
+    // here and we fall back to logical alignment
+
+    DISK_GEOMETRY_EX geometry = {0};
+    ret = DeviceIoControl(hDevice, IOCTL_DISK_GET_DRIVE_GEOMETRY, nullptr, 0,
+                          &geometry, sizeof(geometry), &output_bytes, nullptr);
+    if (ret) {
+      sector_size = geometry.Geometry.BytesPerSector;
+    }
+  }
+
+  if (hDevice != INVALID_HANDLE_VALUE) {
+    CloseHandle(hDevice);
+  }
+
+  return sector_size;
+}
+
+////////////////////////////////////////////////////////////////////////
+// WinEnvThreads
+
+WinEnvThreads::WinEnvThreads(Env* hosted_env)
+    : hosted_env_(hosted_env), thread_pools_(Env::Priority::TOTAL) {
+  for (int pool_id = 0; pool_id < Env::Priority::TOTAL; ++pool_id) {
+    thread_pools_[pool_id].SetThreadPriority(
+        static_cast<Env::Priority>(pool_id));
+    // This allows later initializing the thread-local-env of each thread.
+    thread_pools_[pool_id].SetHostEnv(hosted_env);
+  }
+}
+
+WinEnvThreads::~WinEnvThreads() {
+  WaitForJoin();
+
+  for (auto& thpool : thread_pools_) {
+    thpool.JoinAllThreads();
+  }
+}
+
+void WinEnvThreads::Schedule(void (*function)(void*), void* arg,
+                             Env::Priority pri, void* tag,
+                             void (*unschedFunction)(void* arg)) {
+  assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH);
+  thread_pools_[pri].Schedule(function, arg, tag, unschedFunction);
+}
+
+int WinEnvThreads::UnSchedule(void* arg, Env::Priority pri) {
+  return thread_pools_[pri].UnSchedule(arg);
+}
+
+namespace {
+
+struct StartThreadState {
+  void (*user_function)(void*);
+  void* arg;
+};
+
+void* StartThreadWrapper(void* arg) {
+  std::unique_ptr<StartThreadState> state(
+      reinterpret_cast<StartThreadState*>(arg));
+  state->user_function(state->arg);
+  return nullptr;
+}
+
+}  // namespace
+
+void WinEnvThreads::StartThread(void (*function)(void* arg), void* arg) {
+  std::unique_ptr<StartThreadState> state(new StartThreadState);
+  state->user_function = function;
+  state->arg = arg;
+  try {
+    Thread th(&StartThreadWrapper, state.get());
+    state.release();
+
+    std::lock_guard<std::mutex> lg(mu_);
+    threads_to_join_.push_back(std::move(th));
+
+  } catch (const std::system_error& ex) {
+    WinthreadCall("start thread", ex.code());
+  }
+}
+
+void WinEnvThreads::WaitForJoin() {
+  for (auto& th : threads_to_join_) {
+    th.join();
+  }
+  threads_to_join_.clear();
+}
+
+unsigned int WinEnvThreads::GetThreadPoolQueueLen(Env::Priority pri) const {
+  assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH);
+  return thread_pools_[pri].GetQueueLen();
+}
+
+int WinEnvThreads::ReserveThreads(int threads_to_reserved, Env::Priority pri) {
+  assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH);
+  return thread_pools_[pri].ReserveThreads(threads_to_reserved);
+}
+
+int WinEnvThreads::ReleaseThreads(int threads_to_released, Env::Priority pri) {
+  assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH);
+  return thread_pools_[pri].ReleaseThreads(threads_to_released);
+}
+
+uint64_t WinEnvThreads::gettid() {
+  uint64_t thread_id = GetCurrentThreadId();
+  return thread_id;
+}
+
+uint64_t WinEnvThreads::GetThreadID() const { return gettid(); }
+
+void WinEnvThreads::SetBackgroundThreads(int num, Env::Priority pri) {
+  assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH);
+  thread_pools_[pri].SetBackgroundThreads(num);
+}
+
+int WinEnvThreads::GetBackgroundThreads(Env::Priority pri) {
+  assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH);
+  return thread_pools_[pri].GetBackgroundThreads();
+}
+
+void WinEnvThreads::IncBackgroundThreadsIfNeeded(int num, Env::Priority pri) {
+  assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH);
+  thread_pools_[pri].IncBackgroundThreadsIfNeeded(num);
+}
+
+/////////////////////////////////////////////////////////////////////////
+// WinEnv
+
+WinEnv::WinEnv()
+    : CompositeEnv(WinFileSystem::Default(), WinClock::Default()),
+      winenv_io_(this),
+      winenv_threads_(this) {
+  // Protected member of the base class
+  thread_status_updater_ = CreateThreadStatusUpdater();
+}
+
+WinEnv::~WinEnv() {
+  // All threads must be joined before the deletion of
+  // thread_status_updater_.
+  delete thread_status_updater_;
+}
+
+Status WinEnv::GetThreadList(std::vector<ThreadStatus>* thread_list) {
+  assert(thread_status_updater_);
+  return thread_status_updater_->GetThreadList(thread_list);
+}
+
+Status WinEnv::GetHostName(char* name, uint64_t len) {
+  return winenv_io_.GetHostName(name, len);
+}
+
+void WinEnv::Schedule(void (*function)(void*), void* arg, Env::Priority pri,
+                      void* tag, void (*unschedFunction)(void* arg)) {
+  return winenv_threads_.Schedule(function, arg, pri, tag, unschedFunction);
+}
+
+int WinEnv::UnSchedule(void* arg, Env::Priority pri) {
+  return winenv_threads_.UnSchedule(arg, pri);
+}
+
+void WinEnv::StartThread(void (*function)(void* arg), void* arg) {
+  return winenv_threads_.StartThread(function, arg);
+}
+
+void WinEnv::WaitForJoin() { return winenv_threads_.WaitForJoin(); }
+
+unsigned int WinEnv::GetThreadPoolQueueLen(Env::Priority pri) const {
+  return winenv_threads_.GetThreadPoolQueueLen(pri);
+}
+int WinEnv::ReserveThreads(int threads_to_reserved, Env::Priority pri) {
+  return winenv_threads_.ReserveThreads(threads_to_reserved, pri);
+}
+
+int WinEnv::ReleaseThreads(int threads_to_released, Env::Priority pri) {
+  return winenv_threads_.ReleaseThreads(threads_to_released, pri);
+}
+
+uint64_t WinEnv::GetThreadID() const { return winenv_threads_.GetThreadID(); }
+
+// Allow increasing the number of worker threads.
+void WinEnv::SetBackgroundThreads(int num, Env::Priority pri) {
+  return winenv_threads_.SetBackgroundThreads(num, pri);
+}
+
+int WinEnv::GetBackgroundThreads(Env::Priority pri) {
+  return winenv_threads_.GetBackgroundThreads(pri);
+}
+
+void WinEnv::IncBackgroundThreadsIfNeeded(int num, Env::Priority pri) {
+  return winenv_threads_.IncBackgroundThreadsIfNeeded(num, pri);
+}
+
+}  // namespace port
+
+std::shared_ptr<FileSystem> FileSystem::Default() {
+  return port::WinFileSystem::Default();
+}
+
+const std::shared_ptr<SystemClock>& SystemClock::Default() {
+  STATIC_AVOID_DESTRUCTION(std::shared_ptr<SystemClock>, clock)
+  (std::make_shared<port::WinClock>());
+  return clock;
+}
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_win.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_win.h
new file mode 100644
index 0000000000..cf04ec2fec
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/env_win.h
@@ -0,0 +1,305 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// An Env is an interface used by the rocksdb implementation to access
+// operating system functionality like the filesystem etc.  Callers
+// may wish to provide a custom Env object when opening a database to
+// get fine gain control; e.g., to rate limit file system operations.
+//
+// All Env implementations are safe for concurrent access from
+// multiple threads without any external synchronization.
+
+#pragma once
+#include <stdint.h>
+#include <windows.h>
+
+#include <mutex>
+#include <string>
+#include <vector>
+
+#include "env/composite_env_wrapper.h"
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/system_clock.h"
+#include "util/threadpool_imp.h"
+
+#undef GetCurrentTime
+#undef DeleteFile
+#undef LoadLibrary
+
+namespace ROCKSDB_NAMESPACE {
+namespace port {
+
+// Currently not designed for inheritance but rather a replacement
+class WinEnvThreads {
+ public:
+  explicit WinEnvThreads(Env* hosted_env);
+
+  ~WinEnvThreads();
+
+  WinEnvThreads(const WinEnvThreads&) = delete;
+  WinEnvThreads& operator=(const WinEnvThreads&) = delete;
+
+  void Schedule(void (*function)(void*), void* arg, Env::Priority pri,
+                void* tag, void (*unschedFunction)(void* arg));
+
+  int UnSchedule(void* arg, Env::Priority pri);
+
+  void StartThread(void (*function)(void* arg), void* arg);
+
+  void WaitForJoin();
+
+  unsigned int GetThreadPoolQueueLen(Env::Priority pri) const;
+
+  int ReserveThreads(int threads_to_be_reserved, Env::Priority pri);
+
+  int ReleaseThreads(int threads_to_be_released, Env::Priority pri);
+
+  static uint64_t gettid();
+
+  uint64_t GetThreadID() const;
+
+  // Allow increasing the number of worker threads.
+  void SetBackgroundThreads(int num, Env::Priority pri);
+  int GetBackgroundThreads(Env::Priority pri);
+
+  void IncBackgroundThreadsIfNeeded(int num, Env::Priority pri);
+
+ private:
+  Env* hosted_env_;
+  mutable std::mutex mu_;
+  std::vector<ThreadPoolImpl> thread_pools_;
+  std::vector<Thread> threads_to_join_;
+};
+
+class WinClock : public SystemClock {
+ public:
+  WinClock();
+  virtual ~WinClock() {}
+
+  static const char* kClassName() { return "WindowsClock"; }
+  const char* Name() const override { return kDefaultName(); }
+  const char* NickName() const override { return kClassName(); }
+
+  uint64_t NowMicros() override;
+
+  uint64_t NowNanos() override;
+
+  // 0 indicates not supported
+  uint64_t CPUMicros() override { return 0; }
+  void SleepForMicroseconds(int micros) override;
+
+  Status GetCurrentTime(int64_t* unix_time) override;
+  // Converts seconds-since-Jan-01-1970 to a printable string
+  std::string TimeToString(uint64_t time) override;
+
+  uint64_t GetPerfCounterFrequency() const { return perf_counter_frequency_; }
+
+ private:
+  using FnGetSystemTimePreciseAsFileTime = VOID(WINAPI*)(LPFILETIME);
+
+  uint64_t perf_counter_frequency_;
+  uint64_t nano_seconds_per_period_;
+  FnGetSystemTimePreciseAsFileTime GetSystemTimePreciseAsFileTime_;
+};
+
+class WinFileSystem : public FileSystem {
+ public:
+  static const std::shared_ptr<WinFileSystem>& Default();
+  WinFileSystem(const std::shared_ptr<SystemClock>& clock);
+  ~WinFileSystem() {}
+  static const char* kClassName() { return "WinFS"; }
+  const char* Name() const override { return kClassName(); }
+  const char* NickName() const override { return kDefaultName(); }
+
+  static size_t GetSectorSize(const std::string& fname);
+  size_t GetPageSize() const { return page_size_; }
+  size_t GetAllocationGranularity() const { return allocation_granularity_; }
+
+  IOStatus DeleteFile(const std::string& fname, const IOOptions& options,
+                      IODebugContext* dbg) override;
+
+  // Truncate the named file to the specified size.
+  IOStatus Truncate(const std::string& /*fname*/, size_t /*size*/,
+                    const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override;
+  IOStatus NewSequentialFile(const std::string& fname,
+                             const FileOptions& file_opts,
+                             std::unique_ptr<FSSequentialFile>* result,
+                             IODebugContext* dbg) override;
+
+  IOStatus NewRandomAccessFile(const std::string& fname,
+                               const FileOptions& options,
+                               std::unique_ptr<FSRandomAccessFile>* result,
+                               IODebugContext* /*dbg*/) override;
+  IOStatus NewWritableFile(const std::string& f, const FileOptions& file_opts,
+                           std::unique_ptr<FSWritableFile>* r,
+                           IODebugContext* dbg) override;
+  IOStatus ReopenWritableFile(const std::string& fname,
+                              const FileOptions& options,
+                              std::unique_ptr<FSWritableFile>* result,
+                              IODebugContext* dbg) override;
+
+  IOStatus NewRandomRWFile(const std::string& fname,
+                           const FileOptions& file_opts,
+                           std::unique_ptr<FSRandomRWFile>* result,
+                           IODebugContext* dbg) override;
+  IOStatus NewMemoryMappedFileBuffer(
+      const std::string& fname,
+      std::unique_ptr<MemoryMappedFileBuffer>* result) override;
+
+  IOStatus NewDirectory(const std::string& name, const IOOptions& io_opts,
+                        std::unique_ptr<FSDirectory>* result,
+                        IODebugContext* dbg) override;
+  IOStatus FileExists(const std::string& f, const IOOptions& io_opts,
+                      IODebugContext* dbg) override;
+  IOStatus GetChildren(const std::string& dir, const IOOptions& io_opts,
+                       std::vector<std::string>* r,
+                       IODebugContext* dbg) override;
+  IOStatus CreateDir(const std::string& dirname, const IOOptions& options,
+                     IODebugContext* dbg) override;
+
+  // Creates directory if missing. Return Ok if it exists, or successful in
+  // Creating.
+  IOStatus CreateDirIfMissing(const std::string& dirname,
+                              const IOOptions& options,
+                              IODebugContext* dbg) override;
+
+  // Delete the specified directory.
+  IOStatus DeleteDir(const std::string& dirname, const IOOptions& options,
+                     IODebugContext* dbg) override;
+  // Store the size of fname in *file_size.
+  IOStatus GetFileSize(const std::string& fname, const IOOptions& options,
+                       uint64_t* file_size, IODebugContext* dbg) override;
+  // Store the last modification time of fname in *file_mtime.
+  IOStatus GetFileModificationTime(const std::string& fname,
+                                   const IOOptions& options,
+                                   uint64_t* file_mtime,
+                                   IODebugContext* dbg) override;
+  // Rename file src to target.
+  IOStatus RenameFile(const std::string& src, const std::string& target,
+                      const IOOptions& options, IODebugContext* dbg) override;
+
+  // Hard Link file src to target.
+  IOStatus LinkFile(const std::string& /*src*/, const std::string& /*target*/,
+                    const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override;
+  IOStatus NumFileLinks(const std::string& /*fname*/,
+                        const IOOptions& /*options*/, uint64_t* /*count*/,
+                        IODebugContext* /*dbg*/) override;
+  IOStatus AreFilesSame(const std::string& /*first*/,
+                        const std::string& /*second*/,
+                        const IOOptions& /*options*/, bool* /*res*/,
+                        IODebugContext* /*dbg*/) override;
+  IOStatus LockFile(const std::string& fname, const IOOptions& options,
+                    FileLock** lock, IODebugContext* dbg) override;
+  IOStatus UnlockFile(FileLock* lock, const IOOptions& options,
+                      IODebugContext* dbg) override;
+  IOStatus GetTestDirectory(const IOOptions& options, std::string* path,
+                            IODebugContext* dbg) override;
+
+  // Create and returns a default logger (an instance of EnvLogger) for storing
+  // informational messages. Derived classes can override to provide custom
+  // logger.
+  IOStatus NewLogger(const std::string& fname, const IOOptions& io_opts,
+                     std::shared_ptr<Logger>* result,
+                     IODebugContext* dbg) override;
+  // Get full directory name for this db.
+  IOStatus GetAbsolutePath(const std::string& db_path, const IOOptions& options,
+                           std::string* output_path,
+                           IODebugContext* dbg) override;
+  IOStatus IsDirectory(const std::string& /*path*/, const IOOptions& options,
+                       bool* is_dir, IODebugContext* /*dgb*/) override;
+  // This seems to clash with a macro on Windows, so #undef it here
+#undef GetFreeSpace
+  IOStatus GetFreeSpace(const std::string& /*path*/,
+                        const IOOptions& /*options*/, uint64_t* /*diskfree*/,
+                        IODebugContext* /*dbg*/) override;
+  FileOptions OptimizeForLogWrite(const FileOptions& file_options,
+                                  const DBOptions& db_options) const override;
+  FileOptions OptimizeForManifestRead(
+      const FileOptions& file_options) const override;
+  FileOptions OptimizeForManifestWrite(
+      const FileOptions& file_options) const override;
+  bool use_async_io() override { return false; }
+
+ protected:
+  static uint64_t FileTimeToUnixTime(const FILETIME& ftTime);
+  // Returns true iff the named directory exists and is a directory.
+
+  virtual bool DirExists(const std::string& dname);
+  // Helper for NewWritable and ReopenWritableFile
+  virtual IOStatus OpenWritableFile(const std::string& fname,
+                                    const FileOptions& options,
+                                    std::unique_ptr<FSWritableFile>* result,
+                                    bool reopen);
+
+ private:
+  std::shared_ptr<SystemClock> clock_;
+  size_t page_size_;
+  size_t allocation_granularity_;
+};
+
+// Designed for inheritance so can be re-used
+// but certain parts replaced
+class WinEnvIO {
+ public:
+  explicit WinEnvIO(Env* hosted_env);
+
+  virtual ~WinEnvIO();
+
+  virtual Status GetHostName(char* name, uint64_t len);
+
+ private:
+  Env* hosted_env_;
+};
+
+class WinEnv : public CompositeEnv {
+ public:
+  WinEnv();
+
+  ~WinEnv();
+  static const char* kClassName() { return "WinEnv"; }
+  const char* Name() const override { return kClassName(); }
+  const char* NickName() const override { return kDefaultName(); }
+
+  Status GetHostName(char* name, uint64_t len) override;
+
+  Status GetThreadList(std::vector<ThreadStatus>* thread_list) override;
+
+  void Schedule(void (*function)(void*), void* arg, Env::Priority pri,
+                void* tag, void (*unschedFunction)(void* arg)) override;
+
+  int UnSchedule(void* arg, Env::Priority pri) override;
+
+  void StartThread(void (*function)(void* arg), void* arg) override;
+
+  void WaitForJoin() override;
+
+  unsigned int GetThreadPoolQueueLen(Env::Priority pri) const override;
+
+  int ReserveThreads(int threads_to_be_reserved, Env::Priority pri) override;
+
+  int ReleaseThreads(int threads_to_be_released, Env::Priority pri) override;
+
+  uint64_t GetThreadID() const override;
+
+  // Allow increasing the number of worker threads.
+  void SetBackgroundThreads(int num, Env::Priority pri) override;
+  int GetBackgroundThreads(Env::Priority pri) override;
+
+  void IncBackgroundThreadsIfNeeded(int num, Env::Priority pri) override;
+
+ private:
+  WinEnvIO winenv_io_;
+  WinEnvThreads winenv_threads_;
+};
+
+}  // namespace port
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/io_win.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/io_win.cc
new file mode 100644
index 0000000000..4fa735518f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/io_win.cc
@@ -0,0 +1,1101 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#if defined(OS_WIN)
+
+#include "port/win/io_win.h"
+
+#include "env_win.h"
+#include "monitoring/iostats_context_imp.h"
+#include "test_util/sync_point.h"
+#include "util/aligned_buffer.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace port {
+
+/*
+ * DirectIOHelper
+ */
+namespace {
+
+const size_t kSectorSize = 512;
+
+inline bool IsPowerOfTwo(const size_t alignment) {
+  return ((alignment) & (alignment - 1)) == 0;
+}
+
+inline bool IsAligned(size_t alignment, const void* ptr) {
+  return ((uintptr_t(ptr)) & (alignment - 1)) == 0;
+}
+}  // namespace
+
+std::string GetWindowsErrSz(DWORD err) {
+  std::string Err;
+  LPSTR lpMsgBuf = nullptr;
+  FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
+                     FORMAT_MESSAGE_IGNORE_INSERTS,
+                 NULL, err,
+                 0,  // Default language
+                 reinterpret_cast<LPSTR>(&lpMsgBuf), 0, NULL);
+
+  if (lpMsgBuf) {
+    Err = lpMsgBuf;
+    LocalFree(lpMsgBuf);
+  }
+  return Err;
+}
+
+// We preserve the original name of this interface to denote the original idea
+// behind it.
+// All reads happen by a specified offset and pwrite interface does not change
+// the position of the file pointer. Judging from the man page and errno it does
+// execute
+// lseek atomically to return the position of the file back where it was.
+// WriteFile() does not
+// have this capability. Therefore, for both pread and pwrite the pointer is
+// advanced to the next position
+// which is fine for writes because they are (should be) sequential.
+// Because all the reads/writes happen by the specified offset, the caller in
+// theory should not
+// rely on the current file offset.
+IOStatus pwrite(const WinFileData* file_data, const Slice& data,
+                uint64_t offset, size_t& bytes_written) {
+  IOStatus s;
+  bytes_written = 0;
+
+  size_t num_bytes = data.size();
+  if (num_bytes > std::numeric_limits<DWORD>::max()) {
+    // May happen in 64-bit builds where size_t is 64-bits but
+    // long is still 32-bit, but that's the API here at the moment
+    return IOStatus::InvalidArgument(
+        "num_bytes is too large for a single write: " + file_data->GetName());
+  }
+
+  OVERLAPPED overlapped = {0};
+  ULARGE_INTEGER offsetUnion;
+  offsetUnion.QuadPart = offset;
+
+  overlapped.Offset = offsetUnion.LowPart;
+  overlapped.OffsetHigh = offsetUnion.HighPart;
+
+  DWORD bytesWritten = 0;
+
+  if (FALSE == WriteFile(file_data->GetFileHandle(), data.data(),
+                         static_cast<DWORD>(num_bytes), &bytesWritten,
+                         &overlapped)) {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError("WriteFile failed: " + file_data->GetName(),
+                                lastError);
+  } else {
+    bytes_written = bytesWritten;
+  }
+
+  return s;
+}
+
+// See comments for pwrite above
+IOStatus pread(const WinFileData* file_data, char* src, size_t num_bytes,
+               uint64_t offset, size_t& bytes_read) {
+  IOStatus s;
+  bytes_read = 0;
+
+  if (num_bytes > std::numeric_limits<DWORD>::max()) {
+    return IOStatus::InvalidArgument(
+        "num_bytes is too large for a single read: " + file_data->GetName());
+  }
+
+  OVERLAPPED overlapped = {0};
+  ULARGE_INTEGER offsetUnion;
+  offsetUnion.QuadPart = offset;
+
+  overlapped.Offset = offsetUnion.LowPart;
+  overlapped.OffsetHigh = offsetUnion.HighPart;
+
+  DWORD bytesRead = 0;
+
+  if (FALSE == ReadFile(file_data->GetFileHandle(), src,
+                        static_cast<DWORD>(num_bytes), &bytesRead,
+                        &overlapped)) {
+    auto lastError = GetLastError();
+    // EOF is OK with zero bytes read
+    if (lastError != ERROR_HANDLE_EOF) {
+      s = IOErrorFromWindowsError("ReadFile failed: " + file_data->GetName(),
+                                  lastError);
+    }
+  } else {
+    bytes_read = bytesRead;
+  }
+
+  return s;
+}
+
+// SetFileInformationByHandle() is capable of fast pre-allocates.
+// However, this does not change the file end position unless the file is
+// truncated and the pre-allocated space is not considered filled with zeros.
+IOStatus fallocate(const std::string& filename, HANDLE hFile,
+                   uint64_t to_size) {
+  IOStatus status;
+
+  FILE_ALLOCATION_INFO alloc_info;
+  alloc_info.AllocationSize.QuadPart = to_size;
+
+  if (!SetFileInformationByHandle(hFile, FileAllocationInfo, &alloc_info,
+                                  sizeof(FILE_ALLOCATION_INFO))) {
+    auto lastError = GetLastError();
+    status = IOErrorFromWindowsError(
+        "Failed to pre-allocate space: " + filename, lastError);
+  }
+
+  return status;
+}
+
+IOStatus ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize) {
+  IOStatus status;
+
+  FILE_END_OF_FILE_INFO end_of_file;
+  end_of_file.EndOfFile.QuadPart = toSize;
+
+  if (!SetFileInformationByHandle(hFile, FileEndOfFileInfo, &end_of_file,
+                                  sizeof(FILE_END_OF_FILE_INFO))) {
+    auto lastError = GetLastError();
+    status = IOErrorFromWindowsError("Failed to Set end of file: " + filename,
+                                     lastError);
+  }
+
+  return status;
+}
+
+size_t GetUniqueIdFromFile(HANDLE /*hFile*/, char* /*id*/,
+                           size_t /*max_size*/) {
+  // Returning 0 is safe as it causes the table reader to generate a unique ID.
+  // This is suboptimal for performance as it prevents multiple table readers
+  // for the same file from sharing cached blocks. For example, if users have
+  // a low value for `max_open_files`, there can be many table readers opened
+  // for the same file.
+  //
+  // TODO: this is a temporarily solution as it is safe but not optimal for
+  // performance. For more details see discussion in
+  // https://github.com/facebook/rocksdb/pull/5844.
+  return 0;
+}
+
+WinFileData::WinFileData(const std::string& filename, HANDLE hFile,
+                         bool direct_io)
+    : filename_(filename),
+      hFile_(hFile),
+      use_direct_io_(direct_io),
+      sector_size_(WinFileSystem::GetSectorSize(filename)) {}
+
+bool WinFileData::IsSectorAligned(const size_t off) const {
+  return (off & (sector_size_ - 1)) == 0;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// WinMmapReadableFile
+
+WinMmapReadableFile::WinMmapReadableFile(const std::string& fileName,
+                                         HANDLE hFile, HANDLE hMap,
+                                         const void* mapped_region,
+                                         size_t length)
+    : WinFileData(fileName, hFile, false /* use_direct_io */),
+      hMap_(hMap),
+      mapped_region_(mapped_region),
+      length_(length) {}
+
+WinMmapReadableFile::~WinMmapReadableFile() {
+  BOOL ret __attribute__((__unused__));
+  ret = ::UnmapViewOfFile(mapped_region_);
+  assert(ret);
+
+  ret = ::CloseHandle(hMap_);
+  assert(ret);
+}
+
+IOStatus WinMmapReadableFile::Read(uint64_t offset, size_t n,
+                                   const IOOptions& /*options*/, Slice* result,
+                                   char* scratch,
+                                   IODebugContext* /*dbg*/) const {
+  IOStatus s;
+
+  if (offset > length_) {
+    *result = Slice();
+    return IOError(filename_, EINVAL);
+  } else if (offset + n > length_) {
+    n = length_ - static_cast<size_t>(offset);
+  }
+  *result = Slice(reinterpret_cast<const char*>(mapped_region_) + offset, n);
+  return s;
+}
+
+IOStatus WinMmapReadableFile::InvalidateCache(size_t offset, size_t length) {
+  return IOStatus::OK();
+}
+
+size_t WinMmapReadableFile::GetUniqueId(char* id, size_t max_size) const {
+  return GetUniqueIdFromFile(hFile_, id, max_size);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+/// WinMmapFile
+
+// Can only truncate or reserve to a sector size aligned if
+// used on files that are opened with Unbuffered I/O
+IOStatus WinMmapFile::TruncateFile(uint64_t toSize) {
+  return ftruncate(filename_, hFile_, toSize);
+}
+
+IOStatus WinMmapFile::UnmapCurrentRegion() {
+  IOStatus status;
+
+  if (mapped_begin_ != nullptr) {
+    if (!::UnmapViewOfFile(mapped_begin_)) {
+      status = IOErrorFromWindowsError(
+          "Failed to unmap file view: " + filename_, GetLastError());
+    }
+
+    // Move on to the next portion of the file
+    file_offset_ += view_size_;
+
+    // UnmapView automatically sends data to disk but not the metadata
+    // which is good and provides some equivalent of fdatasync() on Linux
+    // therefore, we donot need separate flag for metadata
+    mapped_begin_ = nullptr;
+    mapped_end_ = nullptr;
+    dst_ = nullptr;
+
+    last_sync_ = nullptr;
+    pending_sync_ = false;
+  }
+
+  return status;
+}
+
+IOStatus WinMmapFile::MapNewRegion(const IOOptions& options,
+                                   IODebugContext* dbg) {
+  IOStatus status;
+
+  assert(mapped_begin_ == nullptr);
+
+  size_t minDiskSize = static_cast<size_t>(file_offset_) + view_size_;
+
+  if (minDiskSize > reserved_size_) {
+    status = Allocate(file_offset_, view_size_, options, dbg);
+    if (!status.ok()) {
+      return status;
+    }
+  }
+
+  // Need to remap
+  if (hMap_ == NULL || reserved_size_ > mapping_size_) {
+    if (hMap_ != NULL) {
+      // Unmap the previous one
+      BOOL ret __attribute__((__unused__));
+      ret = ::CloseHandle(hMap_);
+      assert(ret);
+      hMap_ = NULL;
+    }
+
+    ULARGE_INTEGER mappingSize;
+    mappingSize.QuadPart = reserved_size_;
+
+    hMap_ = CreateFileMappingA(
+        hFile_,
+        NULL,                  // Security attributes
+        PAGE_READWRITE,        // There is not a write only mode for mapping
+        mappingSize.HighPart,  // Enable mapping the whole file but the actual
+        // amount mapped is determined by MapViewOfFile
+        mappingSize.LowPart,
+        NULL);  // Mapping name
+
+    if (NULL == hMap_) {
+      return IOErrorFromWindowsError(
+          "WindowsMmapFile failed to create file mapping for: " + filename_,
+          GetLastError());
+    }
+
+    mapping_size_ = reserved_size_;
+  }
+
+  ULARGE_INTEGER offset;
+  offset.QuadPart = file_offset_;
+
+  // View must begin at the granularity aligned offset
+  mapped_begin_ = reinterpret_cast<char*>(
+      MapViewOfFileEx(hMap_, FILE_MAP_WRITE, offset.HighPart, offset.LowPart,
+                      view_size_, NULL));
+
+  if (!mapped_begin_) {
+    status = IOErrorFromWindowsError(
+        "WindowsMmapFile failed to map file view: " + filename_,
+        GetLastError());
+  } else {
+    mapped_end_ = mapped_begin_ + view_size_;
+    dst_ = mapped_begin_;
+    last_sync_ = mapped_begin_;
+    pending_sync_ = false;
+  }
+  return status;
+}
+
+IOStatus WinMmapFile::PreallocateInternal(uint64_t spaceToReserve) {
+  return fallocate(filename_, hFile_, spaceToReserve);
+}
+
+WinMmapFile::WinMmapFile(const std::string& fname, HANDLE hFile,
+                         size_t page_size, size_t allocation_granularity,
+                         const FileOptions& options)
+    : WinFileData(fname, hFile, false),
+      FSWritableFile(options),
+      hMap_(NULL),
+      page_size_(page_size),
+      allocation_granularity_(allocation_granularity),
+      reserved_size_(0),
+      mapping_size_(0),
+      view_size_(0),
+      mapped_begin_(nullptr),
+      mapped_end_(nullptr),
+      dst_(nullptr),
+      last_sync_(nullptr),
+      file_offset_(0),
+      pending_sync_(false) {
+  // Allocation granularity must be obtained from GetSystemInfo() and must be
+  // a power of two.
+  assert(allocation_granularity > 0);
+  assert((allocation_granularity & (allocation_granularity - 1)) == 0);
+
+  assert(page_size > 0);
+  assert((page_size & (page_size - 1)) == 0);
+
+  // Only for memory mapped writes
+  assert(options.use_mmap_writes);
+
+  // View size must be both the multiple of allocation_granularity AND the
+  // page size and the granularity is usually a multiple of a page size.
+  const size_t viewSize =
+      32 * 1024;  // 32Kb similar to the Windows File Cache in buffered mode
+  view_size_ = Roundup(viewSize, allocation_granularity_);
+}
+
+WinMmapFile::~WinMmapFile() {
+  if (hFile_) {
+    this->Close(IOOptions(), nullptr);
+  }
+}
+
+IOStatus WinMmapFile::Append(const Slice& data, const IOOptions& options,
+                             IODebugContext* dbg) {
+  const char* src = data.data();
+  size_t left = data.size();
+
+  while (left > 0) {
+    assert(mapped_begin_ <= dst_);
+    size_t avail = mapped_end_ - dst_;
+
+    if (avail == 0) {
+      IOStatus s = UnmapCurrentRegion();
+      if (s.ok()) {
+        s = MapNewRegion(options, dbg);
+      }
+
+      if (!s.ok()) {
+        return s;
+      }
+    } else {
+      size_t n = std::min(left, avail);
+      memcpy(dst_, src, n);
+      dst_ += n;
+      src += n;
+      left -= n;
+      pending_sync_ = true;
+    }
+  }
+
+  // Now make sure that the last partial page is padded with zeros if needed
+  size_t bytesToPad = Roundup(size_t(dst_), page_size_) - size_t(dst_);
+  if (bytesToPad > 0) {
+    memset(dst_, 0, bytesToPad);
+  }
+
+  return IOStatus::OK();
+}
+
+// Means Close() will properly take care of truncate
+// and it does not need any additional information
+IOStatus WinMmapFile::Truncate(uint64_t size, const IOOptions& /*options*/,
+                               IODebugContext* /*dbg*/) {
+  return IOStatus::OK();
+}
+
+IOStatus WinMmapFile::Close(const IOOptions& options, IODebugContext* dbg) {
+  IOStatus s;
+
+  assert(NULL != hFile_);
+
+  // We truncate to the precise size so no
+  // uninitialized data at the end. SetEndOfFile
+  // which we use does not write zeros and it is good.
+  uint64_t targetSize = GetFileSize(options, dbg);
+
+  if (mapped_begin_ != nullptr) {
+    // Sync before unmapping to make sure everything
+    // is on disk and there is not a lazy writing
+    // so we are deterministic with the tests
+    Sync(options, dbg);
+    s = UnmapCurrentRegion();
+  }
+
+  if (NULL != hMap_) {
+    BOOL ret = ::CloseHandle(hMap_);
+    if (!ret && s.ok()) {
+      auto lastError = GetLastError();
+      s = IOErrorFromWindowsError(
+          "Failed to Close mapping for file: " + filename_, lastError);
+    }
+
+    hMap_ = NULL;
+  }
+
+  if (hFile_ != NULL) {
+    TruncateFile(targetSize);
+
+    BOOL ret = ::CloseHandle(hFile_);
+    hFile_ = NULL;
+
+    if (!ret && s.ok()) {
+      auto lastError = GetLastError();
+      s = IOErrorFromWindowsError(
+          "Failed to close file map handle: " + filename_, lastError);
+    }
+  }
+
+  return s;
+}
+
+IOStatus WinMmapFile::Flush(const IOOptions& /*options*/,
+                            IODebugContext* /*dbg*/) {
+  return IOStatus::OK();
+}
+
+// Flush only data
+IOStatus WinMmapFile::Sync(const IOOptions& /*options*/,
+                           IODebugContext* /*dbg*/) {
+  IOStatus s;
+
+  // Some writes occurred since last sync
+  if (dst_ > last_sync_) {
+    assert(mapped_begin_);
+    assert(dst_);
+    assert(dst_ > mapped_begin_);
+    assert(dst_ < mapped_end_);
+
+    size_t page_begin =
+        TruncateToPageBoundary(page_size_, last_sync_ - mapped_begin_);
+    size_t page_end =
+        TruncateToPageBoundary(page_size_, dst_ - mapped_begin_ - 1);
+
+    // Flush only the amount of that is a multiple of pages
+    if (!::FlushViewOfFile(mapped_begin_ + page_begin,
+                           (page_end - page_begin) + page_size_)) {
+      s = IOErrorFromWindowsError("Failed to FlushViewOfFile: " + filename_,
+                                  GetLastError());
+    } else {
+      last_sync_ = dst_;
+    }
+  }
+
+  return s;
+}
+
+/**
+ * Flush data as well as metadata to stable storage.
+ */
+IOStatus WinMmapFile::Fsync(const IOOptions& options, IODebugContext* dbg) {
+  IOStatus s = Sync(options, dbg);
+
+  // Flush metadata
+  if (s.ok() && pending_sync_) {
+    if (!::FlushFileBuffers(hFile_)) {
+      s = IOErrorFromWindowsError("Failed to FlushFileBuffers: " + filename_,
+                                  GetLastError());
+    }
+    pending_sync_ = false;
+  }
+
+  return s;
+}
+
+/**
+ * Get the size of valid data in the file. This will not match the
+ * size that is returned from the filesystem because we use mmap
+ * to extend file by map_size every time.
+ */
+uint64_t WinMmapFile::GetFileSize(const IOOptions& /*options*/,
+                                  IODebugContext* /*dbg*/) {
+  size_t used = dst_ - mapped_begin_;
+  return file_offset_ + used;
+}
+
+IOStatus WinMmapFile::InvalidateCache(size_t offset, size_t length) {
+  return IOStatus::OK();
+}
+
+IOStatus WinMmapFile::Allocate(uint64_t offset, uint64_t len,
+                               const IOOptions& /*options*/,
+                               IODebugContext* /*dbg*/) {
+  IOStatus status;
+  TEST_KILL_RANDOM("WinMmapFile::Allocate");
+
+  // Make sure that we reserve an aligned amount of space
+  // since the reservation block size is driven outside so we want
+  // to check if we are ok with reservation here
+  size_t spaceToReserve =
+      Roundup(static_cast<size_t>(offset + len), view_size_);
+  // Nothing to do
+  if (spaceToReserve <= reserved_size_) {
+    return status;
+  }
+
+  IOSTATS_TIMER_GUARD(allocate_nanos);
+  status = PreallocateInternal(spaceToReserve);
+  if (status.ok()) {
+    reserved_size_ = spaceToReserve;
+  }
+  return status;
+}
+
+size_t WinMmapFile::GetUniqueId(char* id, size_t max_size) const {
+  return GetUniqueIdFromFile(hFile_, id, max_size);
+}
+
+//////////////////////////////////////////////////////////////////////////////////
+// WinSequentialFile
+
+WinSequentialFile::WinSequentialFile(const std::string& fname, HANDLE f,
+                                     const FileOptions& options)
+    : WinFileData(fname, f, options.use_direct_reads) {}
+
+WinSequentialFile::~WinSequentialFile() {
+  assert(hFile_ != INVALID_HANDLE_VALUE);
+}
+
+IOStatus WinSequentialFile::Read(size_t n, const IOOptions& /*opts*/,
+                                 Slice* result, char* scratch,
+                                 IODebugContext* /*dbg*/) {
+  IOStatus s;
+  size_t r = 0;
+
+  assert(result != nullptr);
+  if (WinFileData::use_direct_io()) {
+    return IOStatus::NotSupported("Read() does not support direct_io");
+  }
+
+  // Windows ReadFile API accepts a DWORD.
+  // While it is possible to read in a loop if n is too big
+  // it is an unlikely case.
+  if (n > std::numeric_limits<DWORD>::max()) {
+    return IOStatus::InvalidArgument("n is too big for a single ReadFile: " +
+                                     filename_);
+  }
+
+  DWORD bytesToRead =
+      static_cast<DWORD>(n);  // cast is safe due to the check above
+  DWORD bytesRead = 0;
+  BOOL ret = ReadFile(hFile_, scratch, bytesToRead, &bytesRead, NULL);
+  if (ret != FALSE) {
+    r = bytesRead;
+  } else {
+    auto lastError = GetLastError();
+    if (lastError != ERROR_HANDLE_EOF) {
+      s = IOErrorFromWindowsError("ReadFile failed: " + filename_, lastError);
+    }
+  }
+
+  *result = Slice(scratch, r);
+  return s;
+}
+
+IOStatus WinSequentialFile::PositionedReadInternal(char* src, size_t numBytes,
+                                                   uint64_t offset,
+                                                   size_t& bytes_read) const {
+  return pread(this, src, numBytes, offset, bytes_read);
+}
+
+IOStatus WinSequentialFile::PositionedRead(uint64_t offset, size_t n,
+                                           const IOOptions& /*opts*/,
+                                           Slice* result, char* scratch,
+                                           IODebugContext* /*dbg*/) {
+  if (!WinFileData::use_direct_io()) {
+    return IOStatus::NotSupported("This function is only used for direct_io");
+  }
+
+  assert(IsSectorAligned(static_cast<size_t>(offset)));
+  assert(IsSectorAligned(static_cast<size_t>(n)));
+
+  size_t bytes_read = 0;  // out param
+  IOStatus s = PositionedReadInternal(scratch, static_cast<size_t>(n), offset,
+                                      bytes_read);
+  *result = Slice(scratch, bytes_read);
+  return s;
+}
+
+IOStatus WinSequentialFile::Skip(uint64_t n) {
+  // Can't handle more than signed max as SetFilePointerEx accepts a signed
+  // 64-bit integer. As such it is a highly unlikley case to have n so large.
+  if (n > static_cast<uint64_t>(std::numeric_limits<LONGLONG>::max())) {
+    return IOStatus::InvalidArgument(
+        "n is too large for a single SetFilePointerEx() call" + filename_);
+  }
+
+  LARGE_INTEGER li;
+  li.QuadPart = static_cast<LONGLONG>(n);  // cast is safe due to the check
+                                           // above
+  BOOL ret = SetFilePointerEx(hFile_, li, NULL, FILE_CURRENT);
+  if (ret == FALSE) {
+    auto lastError = GetLastError();
+    return IOErrorFromWindowsError("Skip SetFilePointerEx():" + filename_,
+                                   lastError);
+  }
+  return IOStatus::OK();
+}
+
+IOStatus WinSequentialFile::InvalidateCache(size_t offset, size_t length) {
+  return IOStatus::OK();
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+/// WinRandomAccessBase
+
+inline IOStatus WinRandomAccessImpl::PositionedReadInternal(
+    char* src, size_t numBytes, uint64_t offset, size_t& bytes_read) const {
+  return pread(file_base_, src, numBytes, offset, bytes_read);
+}
+
+inline WinRandomAccessImpl::WinRandomAccessImpl(WinFileData* file_base,
+                                                size_t alignment,
+                                                const FileOptions& options)
+    : file_base_(file_base),
+      alignment_(std::max(alignment, file_base->GetSectorSize())) {
+  assert(!options.use_mmap_reads);
+}
+
+inline IOStatus WinRandomAccessImpl::ReadImpl(uint64_t offset, size_t n,
+                                              Slice* result,
+                                              char* scratch) const {
+  // Check buffer alignment
+  if (file_base_->use_direct_io()) {
+    assert(file_base_->IsSectorAligned(static_cast<size_t>(offset)));
+    assert(IsAligned(alignment_, scratch));
+  }
+
+  if (n == 0) {
+    *result = Slice(scratch, 0);
+    return IOStatus::OK();
+  }
+
+  size_t bytes_read = 0;
+  IOStatus s = PositionedReadInternal(scratch, n, offset, bytes_read);
+  *result = Slice(scratch, bytes_read);
+  return s;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+/// WinRandomAccessFile
+
+WinRandomAccessFile::WinRandomAccessFile(const std::string& fname, HANDLE hFile,
+                                         size_t alignment,
+                                         const FileOptions& options)
+    : WinFileData(fname, hFile, options.use_direct_reads),
+      WinRandomAccessImpl(this, alignment, options) {}
+
+WinRandomAccessFile::~WinRandomAccessFile() {}
+
+IOStatus WinRandomAccessFile::Read(uint64_t offset, size_t n,
+                                   const IOOptions& /*options*/, Slice* result,
+                                   char* scratch,
+                                   IODebugContext* /*dbg*/) const {
+  return ReadImpl(offset, n, result, scratch);
+}
+
+IOStatus WinRandomAccessFile::InvalidateCache(size_t offset, size_t length) {
+  return IOStatus::OK();
+}
+
+size_t WinRandomAccessFile::GetUniqueId(char* id, size_t max_size) const {
+  return GetUniqueIdFromFile(GetFileHandle(), id, max_size);
+}
+
+size_t WinRandomAccessFile::GetRequiredBufferAlignment() const {
+  return GetAlignment();
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// WinWritableImpl
+//
+
+inline IOStatus WinWritableImpl::PreallocateInternal(uint64_t spaceToReserve) {
+  return fallocate(file_data_->GetName(), file_data_->GetFileHandle(),
+                   spaceToReserve);
+}
+
+inline WinWritableImpl::WinWritableImpl(WinFileData* file_data,
+                                        size_t alignment)
+    : file_data_(file_data),
+      alignment_(std::max(alignment, file_data->GetSectorSize())),
+      next_write_offset_(0),
+      reservedsize_(0) {
+  // Query current position in case ReopenWritableFile is called
+  // This position is only important for buffered writes
+  // for unbuffered writes we explicitely specify the position.
+  LARGE_INTEGER zero_move;
+  zero_move.QuadPart = 0;  // Do not move
+  LARGE_INTEGER pos;
+  pos.QuadPart = 0;
+  BOOL ret = SetFilePointerEx(file_data_->GetFileHandle(), zero_move, &pos,
+                              FILE_CURRENT);
+  // Querying no supped to fail
+  if (ret != 0) {
+    next_write_offset_ = pos.QuadPart;
+  } else {
+    assert(false);
+  }
+}
+
+inline IOStatus WinWritableImpl::AppendImpl(const Slice& data) {
+  IOStatus s;
+
+  if (data.size() > std::numeric_limits<DWORD>::max()) {
+    return IOStatus::InvalidArgument("data is too long for a single write" +
+                                     file_data_->GetName());
+  }
+
+  size_t bytes_written = 0;  // out param
+
+  if (file_data_->use_direct_io()) {
+    // With no offset specified we are appending
+    // to the end of the file
+    assert(file_data_->IsSectorAligned(next_write_offset_));
+    assert(file_data_->IsSectorAligned(data.size()));
+    assert(IsAligned(static_cast<size_t>(GetAlignment()), data.data()));
+    s = pwrite(file_data_, data, next_write_offset_, bytes_written);
+  } else {
+    DWORD bytesWritten = 0;
+    if (!WriteFile(file_data_->GetFileHandle(), data.data(),
+                   static_cast<DWORD>(data.size()), &bytesWritten, NULL)) {
+      auto lastError = GetLastError();
+      s = IOErrorFromWindowsError(
+          "Failed to WriteFile: " + file_data_->GetName(), lastError);
+    } else {
+      bytes_written = bytesWritten;
+    }
+  }
+
+  if (s.ok()) {
+    if (bytes_written == data.size()) {
+      // This matters for direct_io cases where
+      // we rely on the fact that next_write_offset_
+      // is sector aligned
+      next_write_offset_ += bytes_written;
+    } else {
+      s = IOStatus::IOError("Failed to write all bytes: " +
+                            file_data_->GetName());
+    }
+  }
+
+  return s;
+}
+
+inline IOStatus WinWritableImpl::PositionedAppendImpl(const Slice& data,
+                                                      uint64_t offset) {
+  if (file_data_->use_direct_io()) {
+    assert(file_data_->IsSectorAligned(static_cast<size_t>(offset)));
+    assert(file_data_->IsSectorAligned(data.size()));
+    assert(IsAligned(static_cast<size_t>(GetAlignment()), data.data()));
+  }
+
+  size_t bytes_written = 0;
+  IOStatus s = pwrite(file_data_, data, offset, bytes_written);
+
+  if (s.ok()) {
+    if (bytes_written == data.size()) {
+      // For sequential write this would be simple
+      // size extension by data.size()
+      uint64_t write_end = offset + bytes_written;
+      if (write_end >= next_write_offset_) {
+        next_write_offset_ = write_end;
+      }
+    } else {
+      s = IOStatus::IOError("Failed to write all of the requested data: " +
+                            file_data_->GetName());
+    }
+  }
+  return s;
+}
+
+inline IOStatus WinWritableImpl::TruncateImpl(uint64_t size) {
+  // It is tempting to check for the size for sector alignment
+  // but truncation may come at the end and there is not a requirement
+  // for this to be sector aligned so long as we do not attempt to write
+  // after that. The interface docs state that the behavior is undefined
+  // in that case.
+  IOStatus s =
+      ftruncate(file_data_->GetName(), file_data_->GetFileHandle(), size);
+
+  if (s.ok()) {
+    next_write_offset_ = size;
+  }
+  return s;
+}
+
+inline IOStatus WinWritableImpl::CloseImpl() {
+  IOStatus s;
+
+  auto hFile = file_data_->GetFileHandle();
+  assert(INVALID_HANDLE_VALUE != hFile);
+
+  if (!::FlushFileBuffers(hFile)) {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError(
+        "FlushFileBuffers failed at Close() for: " + file_data_->GetName(),
+        lastError);
+  }
+
+  if (!file_data_->CloseFile() && s.ok()) {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError(
+        "CloseHandle failed for: " + file_data_->GetName(), lastError);
+  }
+  return s;
+}
+
+inline IOStatus WinWritableImpl::SyncImpl(const IOOptions& /*options*/,
+                                          IODebugContext* /*dbg*/) {
+  IOStatus s;
+  if (!::FlushFileBuffers(file_data_->GetFileHandle())) {
+    auto lastError = GetLastError();
+    s = IOErrorFromWindowsError(
+        "FlushFileBuffers failed at Sync() for: " + file_data_->GetName(),
+        lastError);
+  }
+  return s;
+}
+
+inline IOStatus WinWritableImpl::AllocateImpl(uint64_t offset, uint64_t len) {
+  IOStatus status;
+  TEST_KILL_RANDOM("WinWritableFile::Allocate");
+
+  // Make sure that we reserve an aligned amount of space
+  // since the reservation block size is driven outside so we want
+  // to check if we are ok with reservation here
+  size_t spaceToReserve = Roundup(static_cast<size_t>(offset + len),
+                                  static_cast<size_t>(alignment_));
+  // Nothing to do
+  if (spaceToReserve <= reservedsize_) {
+    return status;
+  }
+
+  IOSTATS_TIMER_GUARD(allocate_nanos);
+  status = PreallocateInternal(spaceToReserve);
+  if (status.ok()) {
+    reservedsize_ = spaceToReserve;
+  }
+  return status;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// WinWritableFile
+
+WinWritableFile::WinWritableFile(const std::string& fname, HANDLE hFile,
+                                 size_t alignment, size_t /* capacity */,
+                                 const FileOptions& options)
+    : WinFileData(fname, hFile, options.use_direct_writes),
+      WinWritableImpl(this, alignment),
+      FSWritableFile(options) {
+  assert(!options.use_mmap_writes);
+}
+
+WinWritableFile::~WinWritableFile() {}
+
+// Indicates if the class makes use of direct I/O
+bool WinWritableFile::use_direct_io() const {
+  return WinFileData::use_direct_io();
+}
+
+size_t WinWritableFile::GetRequiredBufferAlignment() const {
+  return static_cast<size_t>(GetAlignment());
+}
+
+IOStatus WinWritableFile::Append(const Slice& data,
+                                 const IOOptions& /*options*/,
+                                 IODebugContext* /*dbg*/) {
+  return AppendImpl(data);
+}
+
+IOStatus WinWritableFile::PositionedAppend(const Slice& data, uint64_t offset,
+                                           const IOOptions& /*options*/,
+                                           IODebugContext* /*dbg*/) {
+  return PositionedAppendImpl(data, offset);
+}
+
+// Need to implement this so the file is truncated correctly
+// when buffered and unbuffered mode
+IOStatus WinWritableFile::Truncate(uint64_t size, const IOOptions& /*options*/,
+                                   IODebugContext* /*dbg*/) {
+  return TruncateImpl(size);
+}
+
+IOStatus WinWritableFile::Close(const IOOptions& /*options*/,
+                                IODebugContext* /*dbg*/) {
+  return CloseImpl();
+}
+
+// write out the cached data to the OS cache
+// This is now taken care of the WritableFileWriter
+IOStatus WinWritableFile::Flush(const IOOptions& /*options*/,
+                                IODebugContext* /*dbg*/) {
+  return IOStatus::OK();
+}
+
+IOStatus WinWritableFile::Sync(const IOOptions& options, IODebugContext* dbg) {
+  return SyncImpl(options, dbg);
+}
+
+IOStatus WinWritableFile::Fsync(const IOOptions& options, IODebugContext* dbg) {
+  return SyncImpl(options, dbg);
+}
+
+bool WinWritableFile::IsSyncThreadSafe() const { return true; }
+
+uint64_t WinWritableFile::GetFileSize(const IOOptions& /*options*/,
+                                      IODebugContext* /*dbg*/) {
+  return GetFileNextWriteOffset();
+}
+
+IOStatus WinWritableFile::Allocate(uint64_t offset, uint64_t len,
+                                   const IOOptions& /*options*/,
+                                   IODebugContext* /*dbg*/) {
+  return AllocateImpl(offset, len);
+}
+
+size_t WinWritableFile::GetUniqueId(char* id, size_t max_size) const {
+  return GetUniqueIdFromFile(GetFileHandle(), id, max_size);
+}
+
+/////////////////////////////////////////////////////////////////////////
+/// WinRandomRWFile
+
+WinRandomRWFile::WinRandomRWFile(const std::string& fname, HANDLE hFile,
+                                 size_t alignment, const FileOptions& options)
+    : WinFileData(fname, hFile,
+                  options.use_direct_reads && options.use_direct_writes),
+      WinRandomAccessImpl(this, alignment, options),
+      WinWritableImpl(this, alignment) {}
+
+bool WinRandomRWFile::use_direct_io() const {
+  return WinFileData::use_direct_io();
+}
+
+size_t WinRandomRWFile::GetRequiredBufferAlignment() const {
+  assert(WinRandomAccessImpl::GetAlignment() ==
+         WinWritableImpl::GetAlignment());
+  return static_cast<size_t>(WinRandomAccessImpl::GetAlignment());
+}
+
+IOStatus WinRandomRWFile::Write(uint64_t offset, const Slice& data,
+                                const IOOptions& /*options*/,
+                                IODebugContext* /*dbg*/) {
+  return PositionedAppendImpl(data, offset);
+}
+
+IOStatus WinRandomRWFile::Read(uint64_t offset, size_t n,
+                               const IOOptions& /*options*/, Slice* result,
+                               char* scratch, IODebugContext* /*dbg*/) const {
+  return ReadImpl(offset, n, result, scratch);
+}
+
+IOStatus WinRandomRWFile::Flush(const IOOptions& /*options*/,
+                                IODebugContext* /*dbg*/) {
+  return IOStatus::OK();
+}
+
+IOStatus WinRandomRWFile::Sync(const IOOptions& options, IODebugContext* dbg) {
+  return SyncImpl(options, dbg);
+}
+
+IOStatus WinRandomRWFile::Close(const IOOptions& /*options*/,
+                                IODebugContext* /*dbg*/) {
+  return CloseImpl();
+}
+
+//////////////////////////////////////////////////////////////////////////
+/// WinMemoryMappedBufer
+WinMemoryMappedBuffer::~WinMemoryMappedBuffer() {
+  BOOL ret
+#if defined(_MSC_VER)
+      = FALSE;
+#else
+      __attribute__((__unused__));
+#endif
+  if (base_ != nullptr) {
+    ret = ::UnmapViewOfFile(base_);
+    assert(ret);
+    base_ = nullptr;
+  }
+  if (map_handle_ != NULL && map_handle_ != INVALID_HANDLE_VALUE) {
+    ret = ::CloseHandle(map_handle_);
+    assert(ret);
+    map_handle_ = NULL;
+  }
+  if (file_handle_ != NULL && file_handle_ != INVALID_HANDLE_VALUE) {
+    ret = ::CloseHandle(file_handle_);
+    assert(ret);
+    file_handle_ = NULL;
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////
+/// WinDirectory
+
+IOStatus WinDirectory::Fsync(const IOOptions& /*options*/,
+                             IODebugContext* /*dbg*/) {
+  return IOStatus::OK();
+}
+
+IOStatus WinDirectory::Close(const IOOptions& /*options*/,
+                             IODebugContext* /*dbg*/) {
+  IOStatus s = IOStatus::OK();
+  BOOL ret __attribute__((__unused__));
+  if (handle_ != INVALID_HANDLE_VALUE) {
+    ret = ::CloseHandle(handle_);
+    if (!ret) {
+      auto lastError = GetLastError();
+      s = IOErrorFromWindowsError("Directory closes failed for : " + GetName(),
+                                  lastError);
+    }
+    handle_ = NULL;
+  }
+  return s;
+}
+
+size_t WinDirectory::GetUniqueId(char* id, size_t max_size) const {
+  return GetUniqueIdFromFile(handle_, id, max_size);
+}
+//////////////////////////////////////////////////////////////////////////
+/// WinFileLock
+
+WinFileLock::~WinFileLock() {
+  BOOL ret __attribute__((__unused__));
+  ret = ::CloseHandle(hFile_);
+  assert(ret);
+}
+
+}  // namespace port
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/io_win.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/io_win.h
new file mode 100644
index 0000000000..a4fee8346c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/io_win.h
@@ -0,0 +1,508 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <stdint.h>
+#include <windows.h>
+
+#include <mutex>
+#include <string>
+
+#include "rocksdb/file_system.h"
+#include "rocksdb/status.h"
+#include "util/aligned_buffer.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace port {
+
+std::string GetWindowsErrSz(DWORD err);
+
+inline IOStatus IOErrorFromWindowsError(const std::string& context, DWORD err) {
+  return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL))
+             ? IOStatus::NoSpace(context, GetWindowsErrSz(err))
+         : ((err == ERROR_FILE_NOT_FOUND) || (err == ERROR_PATH_NOT_FOUND))
+             ? IOStatus::PathNotFound(context, GetWindowsErrSz(err))
+             : IOStatus::IOError(context, GetWindowsErrSz(err));
+}
+
+inline IOStatus IOErrorFromLastWindowsError(const std::string& context) {
+  return IOErrorFromWindowsError(context, GetLastError());
+}
+
+inline IOStatus IOError(const std::string& context, int err_number) {
+  return (err_number == ENOSPC)
+             ? IOStatus::NoSpace(context, errnoStr(err_number).c_str())
+         : (err_number == ENOENT)
+             ? IOStatus::PathNotFound(context, errnoStr(err_number).c_str())
+             : IOStatus::IOError(context, errnoStr(err_number).c_str());
+}
+
+class WinFileData;
+
+IOStatus pwrite(const WinFileData* file_data, const Slice& data,
+                uint64_t offset, size_t& bytes_written);
+
+IOStatus pread(const WinFileData* file_data, char* src, size_t num_bytes,
+               uint64_t offset, size_t& bytes_read);
+
+IOStatus fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size);
+
+IOStatus ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize);
+
+size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size);
+
+class WinFileData {
+ protected:
+  const std::string filename_;
+  HANDLE hFile_;
+  // If true, the I/O issued would be direct I/O which the buffer
+  // will need to be aligned (not sure there is a guarantee that the buffer
+  // passed in is aligned).
+  const bool use_direct_io_;
+  const size_t sector_size_;
+
+ public:
+  // We want this class be usable both for inheritance (prive
+  // or protected) and for containment so __ctor and __dtor public
+  WinFileData(const std::string& filename, HANDLE hFile, bool direct_io);
+
+  virtual ~WinFileData() { this->CloseFile(); }
+
+  bool CloseFile() {
+    bool result = true;
+
+    if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) {
+      result = ::CloseHandle(hFile_);
+      assert(result);
+      hFile_ = NULL;
+    }
+    return result;
+  }
+
+  const std::string& GetName() const { return filename_; }
+
+  HANDLE GetFileHandle() const { return hFile_; }
+
+  bool use_direct_io() const { return use_direct_io_; }
+
+  size_t GetSectorSize() const { return sector_size_; }
+
+  bool IsSectorAligned(const size_t off) const;
+
+  WinFileData(const WinFileData&) = delete;
+  WinFileData& operator=(const WinFileData&) = delete;
+};
+
+class WinSequentialFile : protected WinFileData, public FSSequentialFile {
+  // Override for behavior change when creating a custom env
+  virtual IOStatus PositionedReadInternal(char* src, size_t numBytes,
+                                          uint64_t offset,
+                                          size_t& bytes_read) const;
+
+ public:
+  WinSequentialFile(const std::string& fname, HANDLE f,
+                    const FileOptions& options);
+
+  ~WinSequentialFile();
+
+  WinSequentialFile(const WinSequentialFile&) = delete;
+  WinSequentialFile& operator=(const WinSequentialFile&) = delete;
+
+  IOStatus Read(size_t n, const IOOptions& options, Slice* result,
+                char* scratch, IODebugContext* dbg) override;
+  IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& options,
+                          Slice* result, char* scratch,
+                          IODebugContext* dbg) override;
+
+  IOStatus Skip(uint64_t n) override;
+
+  IOStatus InvalidateCache(size_t offset, size_t length) override;
+
+  virtual bool use_direct_io() const override {
+    return WinFileData::use_direct_io();
+  }
+};
+
+// mmap() based random-access
+class WinMmapReadableFile : private WinFileData, public FSRandomAccessFile {
+  HANDLE hMap_;
+
+  const void* mapped_region_;
+  const size_t length_;
+
+ public:
+  // mapped_region_[0,length-1] contains the mmapped contents of the file.
+  WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap,
+                      const void* mapped_region, size_t length);
+
+  ~WinMmapReadableFile();
+
+  WinMmapReadableFile(const WinMmapReadableFile&) = delete;
+  WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete;
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override;
+
+  virtual IOStatus InvalidateCache(size_t offset, size_t length) override;
+
+  virtual size_t GetUniqueId(char* id, size_t max_size) const override;
+};
+
+// We preallocate and use memcpy to append new
+// data to the file.  This is safe since we either properly close the
+// file before reading from it, or for log files, the reading code
+// knows enough to skip zero suffixes.
+class WinMmapFile : private WinFileData, public FSWritableFile {
+ private:
+  HANDLE hMap_;
+
+  const size_t page_size_;  // We flush the mapping view in page_size
+  // increments. We may decide if this is a memory
+  // page size or SSD page size
+  const size_t
+      allocation_granularity_;  // View must start at such a granularity
+
+  size_t reserved_size_;  // Preallocated size
+
+  size_t mapping_size_;  // The max size of the mapping object
+  // we want to guess the final file size to minimize the remapping
+  size_t view_size_;  // How much memory to map into a view at a time
+
+  char* mapped_begin_;  // Must begin at the file offset that is aligned with
+  // allocation_granularity_
+  char* mapped_end_;
+  char* dst_;  // Where to write next  (in range [mapped_begin_,mapped_end_])
+  char* last_sync_;  // Where have we synced up to
+
+  uint64_t file_offset_;  // Offset of mapped_begin_ in file
+
+  // Do we have unsynced writes?
+  bool pending_sync_;
+
+  // Can only truncate or reserve to a sector size aligned if
+  // used on files that are opened with Unbuffered I/O
+  IOStatus TruncateFile(uint64_t toSize);
+
+  IOStatus UnmapCurrentRegion();
+
+  IOStatus MapNewRegion(const IOOptions& options, IODebugContext* dbg);
+
+  virtual IOStatus PreallocateInternal(uint64_t spaceToReserve);
+
+ public:
+  WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size,
+              size_t allocation_granularity, const FileOptions& options);
+
+  ~WinMmapFile();
+
+  WinMmapFile(const WinMmapFile&) = delete;
+  WinMmapFile& operator=(const WinMmapFile&) = delete;
+
+  IOStatus Append(const Slice& data, const IOOptions& options,
+                  IODebugContext* dbg) override;
+  IOStatus Append(const Slice& data, const IOOptions& opts,
+                  const DataVerificationInfo& /* verification_info */,
+                  IODebugContext* dbg) override {
+    return Append(data, opts, dbg);
+  }
+
+  // Means Close() will properly take care of truncate
+  // and it does not need any additional information
+  IOStatus Truncate(uint64_t size, const IOOptions& options,
+                    IODebugContext* dbg) override;
+
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
+
+  // Flush only data
+  IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
+
+  /**
+   * Flush data as well as metadata to stable storage.
+   */
+  IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
+
+  /**
+   * Get the size of valid data in the file. This will not match the
+   * size that is returned from the filesystem because we use mmap
+   * to extend file by map_size every time.
+   */
+  uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus InvalidateCache(size_t offset, size_t length) override;
+
+  IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& options,
+                    IODebugContext* dbg) override;
+
+  virtual size_t GetUniqueId(char* id, size_t max_size) const override;
+};
+
+class WinRandomAccessImpl {
+ protected:
+  WinFileData* file_base_;
+  size_t alignment_;
+
+  // Override for behavior change when creating a custom env
+  virtual IOStatus PositionedReadInternal(char* src, size_t numBytes,
+                                          uint64_t offset,
+                                          size_t& bytes_read) const;
+
+  WinRandomAccessImpl(WinFileData* file_base, size_t alignment,
+                      const FileOptions& options);
+
+  virtual ~WinRandomAccessImpl() {}
+
+  IOStatus ReadImpl(uint64_t offset, size_t n, Slice* result,
+                    char* scratch) const;
+
+  size_t GetAlignment() const { return alignment_; }
+
+ public:
+  WinRandomAccessImpl(const WinRandomAccessImpl&) = delete;
+  WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete;
+};
+
+// pread() based random-access
+class WinRandomAccessFile
+    : private WinFileData,
+      protected WinRandomAccessImpl,  // Want to be able to override
+                                      // PositionedReadInternal
+      public FSRandomAccessFile {
+ public:
+  WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
+                      const FileOptions& options);
+
+  ~WinRandomAccessFile();
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override;
+
+  virtual size_t GetUniqueId(char* id, size_t max_size) const override;
+
+  virtual bool use_direct_io() const override {
+    return WinFileData::use_direct_io();
+  }
+
+  IOStatus InvalidateCache(size_t offset, size_t length) override;
+
+  virtual size_t GetRequiredBufferAlignment() const override;
+};
+
+// This is a sequential write class. It has been mimicked (as others) after
+// the original Posix class. We add support for unbuffered I/O on windows as
+// well
+// we utilize the original buffer as an alignment buffer to write directly to
+// file with no buffering.
+// No buffering requires that the provided buffer is aligned to the physical
+// sector size (SSD page size) and
+// that all SetFilePointer() operations to occur with such an alignment.
+// We thus always write in sector/page size increments to the drive and leave
+// the tail for the next write OR for Close() at which point we pad with zeros.
+// No padding is required for
+// buffered access.
+class WinWritableImpl {
+ protected:
+  WinFileData* file_data_;
+  const uint64_t alignment_;
+  uint64_t
+      next_write_offset_;  // Needed because Windows does not support O_APPEND
+  uint64_t reservedsize_;  // how far we have reserved space
+
+  virtual IOStatus PreallocateInternal(uint64_t spaceToReserve);
+
+  WinWritableImpl(WinFileData* file_data, size_t alignment);
+
+  ~WinWritableImpl() {}
+
+  uint64_t GetAlignment() const { return alignment_; }
+
+  IOStatus AppendImpl(const Slice& data);
+
+  // Requires that the data is aligned as specified by
+  // GetRequiredBufferAlignment()
+  IOStatus PositionedAppendImpl(const Slice& data, uint64_t offset);
+
+  IOStatus TruncateImpl(uint64_t size);
+
+  IOStatus CloseImpl();
+
+  IOStatus SyncImpl(const IOOptions& options, IODebugContext* dbg);
+
+  uint64_t GetFileNextWriteOffset() {
+    // Double accounting now here with WritableFileWriter
+    // and this size will be wrong when unbuffered access is used
+    // but tests implement their own writable files and do not use
+    // WritableFileWrapper
+    // so we need to squeeze a square peg through
+    // a round hole here.
+    return next_write_offset_;
+  }
+
+  IOStatus AllocateImpl(uint64_t offset, uint64_t len);
+
+ public:
+  WinWritableImpl(const WinWritableImpl&) = delete;
+  WinWritableImpl& operator=(const WinWritableImpl&) = delete;
+};
+
+class WinWritableFile : private WinFileData,
+                        protected WinWritableImpl,
+                        public FSWritableFile {
+ public:
+  WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
+                  size_t capacity, const FileOptions& options);
+
+  ~WinWritableFile();
+
+  IOStatus Append(const Slice& data, const IOOptions& options,
+                  IODebugContext* dbg) override;
+  IOStatus Append(const Slice& data, const IOOptions& opts,
+                  const DataVerificationInfo& /* verification_info */,
+                  IODebugContext* dbg) override {
+    return Append(data, opts, dbg);
+  }
+
+  // Requires that the data is aligned as specified by
+  // GetRequiredBufferAlignment()
+  IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                            const IOOptions& options,
+                            IODebugContext* dbg) override;
+  IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                            const IOOptions& opts,
+                            const DataVerificationInfo& /* verification_info */,
+                            IODebugContext* dbg) override {
+    return PositionedAppend(data, offset, opts, dbg);
+  }
+
+  // Need to implement this so the file is truncated correctly
+  // when buffered and unbuffered mode
+  IOStatus Truncate(uint64_t size, const IOOptions& options,
+                    IODebugContext* dbg) override;
+
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
+
+  // write out the cached data to the OS cache
+  // This is now taken care of the WritableFileWriter
+  IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
+
+  virtual bool IsSyncThreadSafe() const override;
+
+  // Indicates if the class makes use of direct I/O
+  // Use PositionedAppend
+  virtual bool use_direct_io() const override;
+
+  virtual size_t GetRequiredBufferAlignment() const override;
+
+  uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& options,
+                    IODebugContext* dbg) override;
+
+  virtual size_t GetUniqueId(char* id, size_t max_size) const override;
+};
+
+class WinRandomRWFile : private WinFileData,
+                        protected WinRandomAccessImpl,
+                        protected WinWritableImpl,
+                        public FSRandomRWFile {
+ public:
+  WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment,
+                  const FileOptions& options);
+
+  ~WinRandomRWFile() {}
+
+  // Indicates if the class makes use of direct I/O
+  // If false you must pass aligned buffer to Write()
+  virtual bool use_direct_io() const override;
+
+  // Use the returned alignment value to allocate aligned
+  // buffer for Write() when use_direct_io() returns true
+  virtual size_t GetRequiredBufferAlignment() const override;
+
+  // Write bytes in `data` at  offset `offset`, Returns Status::OK() on success.
+  // Pass aligned buffer when use_direct_io() returns true.
+  IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options,
+                 IODebugContext* dbg) override;
+
+  // Read up to `n` bytes starting from offset `offset` and store them in
+  // result, provided `scratch` size should be at least `n`.
+  // Returns Status::OK() on success.
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override;
+
+  IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override {
+    return Sync(options, dbg);
+  }
+
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
+};
+
+class WinMemoryMappedBuffer : public MemoryMappedFileBuffer {
+ private:
+  HANDLE file_handle_;
+  HANDLE map_handle_;
+
+ public:
+  WinMemoryMappedBuffer(HANDLE file_handle, HANDLE map_handle, void* base,
+                        size_t size)
+      : MemoryMappedFileBuffer(base, size),
+        file_handle_(file_handle),
+        map_handle_(map_handle) {}
+  ~WinMemoryMappedBuffer() override;
+};
+
+class WinDirectory : public FSDirectory {
+  const std::string filename_;
+  HANDLE handle_;
+
+ public:
+  explicit WinDirectory(const std::string& filename, HANDLE h) noexcept
+      : filename_(filename), handle_(h) {
+    assert(handle_ != INVALID_HANDLE_VALUE);
+  }
+  ~WinDirectory() {
+    if (handle_ != NULL) {
+      IOStatus s = WinDirectory::Close(IOOptions(), nullptr);
+      s.PermitUncheckedError();
+    }
+  }
+  const std::string& GetName() const { return filename_; }
+  IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
+
+  size_t GetUniqueId(char* id, size_t max_size) const override;
+};
+
+class WinFileLock : public FileLock {
+ public:
+  explicit WinFileLock(HANDLE hFile) : hFile_(hFile) {
+    assert(hFile != NULL);
+    assert(hFile != INVALID_HANDLE_VALUE);
+  }
+
+  ~WinFileLock();
+
+ private:
+  HANDLE hFile_;
+};
+}  // namespace port
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/port_win.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/port_win.cc
new file mode 100644
index 0000000000..37e8f655ce
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/port_win.cc
@@ -0,0 +1,303 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#if defined(OS_WIN)
+
+#include "port/win/port_win.h"
+
+#include <assert.h>
+#include <io.h>
+#include <rpc.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <chrono>
+#include <cstdlib>
+#include <exception>
+#include <memory>
+
+#include "port/port_dirent.h"
+#include "port/sys_time.h"
+
+#ifdef ROCKSDB_WINDOWS_UTF8_FILENAMES
+// utf8 <-> utf16
+#include <codecvt>
+#include <locale>
+#include <string>
+#endif
+
+#include "logging/logging.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+extern const bool kDefaultToAdaptiveMutex = false;
+
+namespace port {
+
+#ifdef ROCKSDB_WINDOWS_UTF8_FILENAMES
+std::string utf16_to_utf8(const std::wstring& utf16) {
+  std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> convert;
+  return convert.to_bytes(utf16);
+}
+
+std::wstring utf8_to_utf16(const std::string& utf8) {
+  std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
+  return converter.from_bytes(utf8);
+}
+#endif
+
+void GetTimeOfDay(TimeVal* tv, struct timezone* /* tz */) {
+  std::chrono::microseconds usNow(
+      std::chrono::duration_cast<std::chrono::microseconds>(
+          std::chrono::system_clock::now().time_since_epoch()));
+
+  std::chrono::seconds secNow(
+      std::chrono::duration_cast<std::chrono::seconds>(usNow));
+
+  tv->tv_sec = static_cast<long>(secNow.count());
+  tv->tv_usec = static_cast<long>(
+      usNow.count() -
+      std::chrono::duration_cast<std::chrono::microseconds>(secNow).count());
+}
+
+Mutex::~Mutex() {}
+
+CondVar::~CondVar() {}
+
+void CondVar::Wait() {
+  // Caller must ensure that mutex is held prior to calling this method
+  std::unique_lock<std::mutex> lk(mu_->getLock(), std::adopt_lock);
+#ifndef NDEBUG
+  mu_->locked_ = false;
+#endif
+  cv_.wait(lk);
+#ifndef NDEBUG
+  mu_->locked_ = true;
+#endif
+  // Release ownership of the lock as we don't want it to be unlocked when
+  // it goes out of scope (as we adopted the lock and didn't lock it ourselves)
+  lk.release();
+}
+
+bool CondVar::TimedWait(uint64_t abs_time_us) {
+  // MSVC++ library implements wait_until in terms of wait_for so
+  // we need to convert absolute wait into relative wait.
+  std::chrono::microseconds usAbsTime(abs_time_us);
+
+  std::chrono::microseconds usNow(
+      std::chrono::duration_cast<std::chrono::microseconds>(
+          std::chrono::system_clock::now().time_since_epoch()));
+  std::chrono::microseconds relTimeUs = (usAbsTime > usNow)
+                                            ? (usAbsTime - usNow)
+                                            : std::chrono::microseconds::zero();
+
+  // Caller must ensure that mutex is held prior to calling this method
+  std::unique_lock<std::mutex> lk(mu_->getLock(), std::adopt_lock);
+
+  // Work around https://github.com/microsoft/STL/issues/369
+#if defined(_MSC_VER) && \
+    (!defined(_MSVC_STL_UPDATE) || _MSVC_STL_UPDATE < 202008L)
+  if (relTimeUs == std::chrono::microseconds::zero()) {
+    lk.unlock();
+    lk.lock();
+  }
+#endif
+#ifndef NDEBUG
+  mu_->locked_ = false;
+#endif
+  std::cv_status cvStatus = cv_.wait_for(lk, relTimeUs);
+#ifndef NDEBUG
+  mu_->locked_ = true;
+#endif
+  // Release ownership of the lock as we don't want it to be unlocked when
+  // it goes out of scope (as we adopted the lock and didn't lock it ourselves)
+  lk.release();
+
+  if (cvStatus == std::cv_status::timeout) {
+    return true;
+  }
+
+  return false;
+}
+
+void CondVar::Signal() { cv_.notify_one(); }
+
+void CondVar::SignalAll() { cv_.notify_all(); }
+
+int PhysicalCoreID() { return GetCurrentProcessorNumber(); }
+
+void InitOnce(OnceType* once, void (*initializer)()) {
+  std::call_once(once->flag_, initializer);
+}
+
+// Private structure, exposed only by pointer
+struct DIR {
+  HANDLE handle_;
+  bool firstread_;
+  RX_WIN32_FIND_DATA data_;
+  dirent entry_;
+
+  DIR() : handle_(INVALID_HANDLE_VALUE), firstread_(true) {}
+
+  DIR(const DIR&) = delete;
+  DIR& operator=(const DIR&) = delete;
+
+  ~DIR() {
+    if (INVALID_HANDLE_VALUE != handle_) {
+      ::FindClose(handle_);
+    }
+  }
+};
+
+DIR* opendir(const char* name) {
+  if (!name || *name == 0) {
+    errno = ENOENT;
+    return nullptr;
+  }
+
+  std::string pattern(name);
+  pattern.append("\\").append("*");
+
+  std::unique_ptr<DIR> dir(new DIR);
+
+  dir->handle_ =
+      RX_FindFirstFileEx(RX_FN(pattern).c_str(),
+                         FindExInfoBasic,  // Do not want alternative name
+                         &dir->data_, FindExSearchNameMatch,
+                         NULL,  // lpSearchFilter
+                         0);
+
+  if (dir->handle_ == INVALID_HANDLE_VALUE) {
+    return nullptr;
+  }
+
+  RX_FILESTRING x(dir->data_.cFileName, RX_FNLEN(dir->data_.cFileName));
+  strcpy_s(dir->entry_.d_name, sizeof(dir->entry_.d_name), FN_TO_RX(x).c_str());
+
+  return dir.release();
+}
+
+struct dirent* readdir(DIR* dirp) {
+  if (!dirp || dirp->handle_ == INVALID_HANDLE_VALUE) {
+    errno = EBADF;
+    return nullptr;
+  }
+
+  if (dirp->firstread_) {
+    dirp->firstread_ = false;
+    return &dirp->entry_;
+  }
+
+  auto ret = RX_FindNextFile(dirp->handle_, &dirp->data_);
+
+  if (ret == 0) {
+    return nullptr;
+  }
+
+  RX_FILESTRING x(dirp->data_.cFileName, RX_FNLEN(dirp->data_.cFileName));
+  strcpy_s(dirp->entry_.d_name, sizeof(dirp->entry_.d_name),
+           FN_TO_RX(x).c_str());
+
+  return &dirp->entry_;
+}
+
+int closedir(DIR* dirp) {
+  delete dirp;
+  return 0;
+}
+
+int truncate(const char* path, int64_t length) {
+  if (path == nullptr) {
+    errno = EFAULT;
+    return -1;
+  }
+  return ROCKSDB_NAMESPACE::port::Truncate(path, length);
+}
+
+int Truncate(std::string path, int64_t len) {
+  if (len < 0) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  HANDLE hFile =
+      RX_CreateFile(RX_FN(path).c_str(), GENERIC_READ | GENERIC_WRITE,
+                    FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+                    NULL,           // Security attrs
+                    OPEN_EXISTING,  // Truncate existing file only
+                    FILE_ATTRIBUTE_NORMAL, NULL);
+
+  if (INVALID_HANDLE_VALUE == hFile) {
+    auto lastError = GetLastError();
+    if (lastError == ERROR_FILE_NOT_FOUND) {
+      errno = ENOENT;
+    } else if (lastError == ERROR_ACCESS_DENIED) {
+      errno = EACCES;
+    } else {
+      errno = EIO;
+    }
+    return -1;
+  }
+
+  int result = 0;
+  FILE_END_OF_FILE_INFO end_of_file;
+  end_of_file.EndOfFile.QuadPart = len;
+
+  if (!SetFileInformationByHandle(hFile, FileEndOfFileInfo, &end_of_file,
+                                  sizeof(FILE_END_OF_FILE_INFO))) {
+    errno = EIO;
+    result = -1;
+  }
+
+  CloseHandle(hFile);
+  return result;
+}
+
+void Crash(const std::string& srcfile, int srcline) {
+  fprintf(stdout, "Crashing at %s:%d\n", srcfile.c_str(), srcline);
+  fflush(stdout);
+  abort();
+}
+
+int GetMaxOpenFiles() { return -1; }
+
+// Assume 4KB page size
+const size_t kPageSize = 4U * 1024U;
+
+void SetCpuPriority(ThreadId id, CpuPriority priority) {
+  // Not supported
+  (void)id;
+  (void)priority;
+}
+
+int64_t GetProcessID() { return GetCurrentProcessId(); }
+
+bool GenerateRfcUuid(std::string* output) {
+  UUID uuid;
+  UuidCreateSequential(&uuid);
+
+  RPC_CSTR rpc_str;
+  auto status = UuidToStringA(&uuid, &rpc_str);
+  if (status != RPC_S_OK) {
+    return false;
+  }
+
+  // rpc_str is nul-terminated
+  *output = reinterpret_cast<char*>(rpc_str);
+
+  status = RpcStringFreeA(&rpc_str);
+  assert(status == RPC_S_OK);
+
+  return true;
+}
+
+}  // namespace port
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/port_win.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/port_win.h
new file mode 100644
index 0000000000..4d9883b63a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/port_win.h
@@ -0,0 +1,379 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// See port_example.h for documentation for the following types/functions.
+
+#pragma once
+
+// Always want minimum headers
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+
+#include <windows.h>
+//^^ <windows.h> should be included first before other system lib
+#include <intrin.h>
+#include <malloc.h>
+#include <process.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <cassert>
+#include <condition_variable>
+#include <limits>
+#include <mutex>
+#include <string>
+#include <thread>
+
+#include "port/win/win_thread.h"
+#include "rocksdb/port_defs.h"
+
+#undef min
+#undef max
+#undef DeleteFile
+#undef GetCurrentTime
+
+#ifndef strcasecmp
+#define strcasecmp _stricmp
+#endif
+
+#undef GetCurrentTime
+#undef DeleteFile
+
+#ifndef _SSIZE_T_DEFINED
+using ssize_t = SSIZE_T;
+#endif
+
+// size_t printf formatting named in the manner of C99 standard formatting
+// strings such as PRIu64
+// in fact, we could use that one
+#ifndef ROCKSDB_PRIszt
+#define ROCKSDB_PRIszt "Iu"
+#endif
+
+#ifdef _MSC_VER
+#define __attribute__(A)
+
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+#define PREFETCH(addr, rw, locality)
+
+extern const bool kDefaultToAdaptiveMutex;
+
+namespace port {
+
+// "Windows is designed to run on little-endian computer architectures."
+// https://docs.microsoft.com/en-us/windows/win32/sysinfo/registry-value-types
+constexpr bool kLittleEndian = true;
+#undef PLATFORM_IS_LITTLE_ENDIAN
+
+class CondVar;
+
+class Mutex {
+ public:
+  static const char* kName() { return "std::mutex"; }
+
+  explicit Mutex(bool IGNORED_adaptive = kDefaultToAdaptiveMutex)
+#ifndef NDEBUG
+      : locked_(false)
+#endif
+  {
+    (void)IGNORED_adaptive;
+  }
+
+  ~Mutex();
+
+  void Lock() {
+    mutex_.lock();
+#ifndef NDEBUG
+    locked_ = true;
+#endif
+  }
+
+  void Unlock() {
+#ifndef NDEBUG
+    locked_ = false;
+#endif
+    mutex_.unlock();
+  }
+
+  bool TryLock() {
+    bool ret = mutex_.try_lock();
+#ifndef NDEBUG
+    if (ret) {
+      locked_ = true;
+    }
+#endif
+    return ret;
+  }
+
+  // this will assert if the mutex is not locked
+  // it does NOT verify that mutex is held by a calling thread
+  void AssertHeld() {
+#ifndef NDEBUG
+    assert(locked_);
+#endif
+  }
+
+  // Also implement std Lockable
+  inline void lock() { Lock(); }
+  inline void unlock() { Unlock(); }
+  inline bool try_lock() { return TryLock(); }
+
+  // Mutex is move only with lock ownership transfer
+  Mutex(const Mutex&) = delete;
+  void operator=(const Mutex&) = delete;
+
+ private:
+  friend class CondVar;
+
+  std::mutex& getLock() { return mutex_; }
+
+  std::mutex mutex_;
+#ifndef NDEBUG
+  bool locked_;
+#endif
+};
+
+class RWMutex {
+ public:
+  RWMutex() { InitializeSRWLock(&srwLock_); }
+  // No copying allowed
+  RWMutex(const RWMutex&) = delete;
+  void operator=(const RWMutex&) = delete;
+
+  void ReadLock() { AcquireSRWLockShared(&srwLock_); }
+
+  void WriteLock() { AcquireSRWLockExclusive(&srwLock_); }
+
+  void ReadUnlock() { ReleaseSRWLockShared(&srwLock_); }
+
+  void WriteUnlock() { ReleaseSRWLockExclusive(&srwLock_); }
+
+  // Empty as in POSIX
+  void AssertHeld() {}
+
+ private:
+  SRWLOCK srwLock_;
+};
+
+class CondVar {
+ public:
+  explicit CondVar(Mutex* mu) : mu_(mu) {}
+
+  ~CondVar();
+  void Wait();
+  bool TimedWait(uint64_t expiration_time);
+  void Signal();
+  void SignalAll();
+
+  // Condition var is not copy/move constructible
+  CondVar(const CondVar&) = delete;
+  CondVar& operator=(const CondVar&) = delete;
+
+  CondVar(CondVar&&) = delete;
+  CondVar& operator=(CondVar&&) = delete;
+
+ private:
+  std::condition_variable cv_;
+  Mutex* mu_;
+};
+
+#ifdef _POSIX_THREADS
+using Thread = std::thread;
+#else
+// Wrapper around the platform efficient
+// or otherwise preferrable implementation
+using Thread = WindowsThread;
+#endif
+
+// OnceInit type helps emulate
+// Posix semantics with initialization
+// adopted in the project
+struct OnceType {
+  struct Init {};
+
+  OnceType() {}
+  OnceType(const Init&) {}
+  OnceType(const OnceType&) = delete;
+  OnceType& operator=(const OnceType&) = delete;
+
+  std::once_flag flag_;
+};
+
+#define LEVELDB_ONCE_INIT port::OnceType::Init()
+extern void InitOnce(OnceType* once, void (*initializer)());
+
+#ifndef CACHE_LINE_SIZE
+#define CACHE_LINE_SIZE 64U
+#endif
+
+#ifdef ROCKSDB_JEMALLOC
+// Separate inlines so they can be replaced if needed
+void* jemalloc_aligned_alloc(size_t size, size_t alignment) noexcept;
+void jemalloc_aligned_free(void* p) noexcept;
+#endif
+
+inline void* cacheline_aligned_alloc(size_t size) {
+#ifdef ROCKSDB_JEMALLOC
+  return jemalloc_aligned_alloc(size, CACHE_LINE_SIZE);
+#else
+  return _aligned_malloc(size, CACHE_LINE_SIZE);
+#endif
+}
+
+inline void cacheline_aligned_free(void* memblock) {
+#ifdef ROCKSDB_JEMALLOC
+  jemalloc_aligned_free(memblock);
+#else
+  _aligned_free(memblock);
+#endif
+}
+
+extern const size_t kPageSize;
+
+// Part of C++11
+#define ALIGN_AS(n) alignas(n)
+
+static inline void AsmVolatilePause() {
+#if defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM)
+  YieldProcessor();
+#endif
+  // it would be nice to get "wfe" on ARM here
+}
+
+extern int PhysicalCoreID();
+
+// For Thread Local Storage abstraction
+using pthread_key_t = DWORD;
+
+inline int pthread_key_create(pthread_key_t* key, void (*destructor)(void*)) {
+  // Not used
+  (void)destructor;
+
+  pthread_key_t k = TlsAlloc();
+  if (TLS_OUT_OF_INDEXES == k) {
+    return ENOMEM;
+  }
+
+  *key = k;
+  return 0;
+}
+
+inline int pthread_key_delete(pthread_key_t key) {
+  if (!TlsFree(key)) {
+    return EINVAL;
+  }
+  return 0;
+}
+
+inline int pthread_setspecific(pthread_key_t key, const void* value) {
+  if (!TlsSetValue(key, const_cast<void*>(value))) {
+    return ENOMEM;
+  }
+  return 0;
+}
+
+inline void* pthread_getspecific(pthread_key_t key) {
+  void* result = TlsGetValue(key);
+  if (!result) {
+    if (GetLastError() != ERROR_SUCCESS) {
+      errno = EINVAL;
+    } else {
+      errno = NOERROR;
+    }
+  }
+  return result;
+}
+
+// UNIX equiv although errno numbers will be off
+// using C-runtime to implement. Note, this does not
+// feel space with zeros in case the file is extended.
+int truncate(const char* path, int64_t length);
+int Truncate(std::string path, int64_t length);
+void Crash(const std::string& srcfile, int srcline);
+extern int GetMaxOpenFiles();
+std::string utf16_to_utf8(const std::wstring& utf16);
+std::wstring utf8_to_utf16(const std::string& utf8);
+
+using ThreadId = int;
+
+extern void SetCpuPriority(ThreadId id, CpuPriority priority);
+
+int64_t GetProcessID();
+
+// Uses platform APIs to generate a 36-character RFC-4122 UUID. Returns
+// true on success or false on failure.
+bool GenerateRfcUuid(std::string* output);
+
+}  // namespace port
+
+#ifdef ROCKSDB_WINDOWS_UTF8_FILENAMES
+
+#define RX_FILESTRING std::wstring
+#define RX_FN(a) ROCKSDB_NAMESPACE::port::utf8_to_utf16(a)
+#define FN_TO_RX(a) ROCKSDB_NAMESPACE::port::utf16_to_utf8(a)
+#define RX_FNCMP(a, b) ::wcscmp(a, RX_FN(b).c_str())
+#define RX_FNLEN(a) ::wcslen(a)
+
+#define RX_DeleteFile DeleteFileW
+#define RX_CreateFile CreateFileW
+#define RX_CreateFileMapping CreateFileMappingW
+#define RX_GetFileAttributesEx GetFileAttributesExW
+#define RX_FindFirstFileEx FindFirstFileExW
+#define RX_FindNextFile FindNextFileW
+#define RX_WIN32_FIND_DATA WIN32_FIND_DATAW
+#define RX_CreateDirectory CreateDirectoryW
+#define RX_RemoveDirectory RemoveDirectoryW
+#define RX_GetFileAttributesEx GetFileAttributesExW
+#define RX_MoveFileEx MoveFileExW
+#define RX_CreateHardLink CreateHardLinkW
+#define RX_PathIsRelative PathIsRelativeW
+#define RX_GetCurrentDirectory GetCurrentDirectoryW
+#define RX_GetDiskFreeSpaceEx GetDiskFreeSpaceExW
+#define RX_PathIsDirectory PathIsDirectoryW
+
+#else
+
+#define RX_FILESTRING std::string
+#define RX_FN(a) a
+#define FN_TO_RX(a) a
+#define RX_FNCMP(a, b) strcmp(a, b)
+#define RX_FNLEN(a) strlen(a)
+
+#define RX_DeleteFile DeleteFileA
+#define RX_CreateFile CreateFileA
+#define RX_CreateFileMapping CreateFileMappingA
+#define RX_GetFileAttributesEx GetFileAttributesExA
+#define RX_FindFirstFileEx FindFirstFileExA
+#define RX_CreateDirectory CreateDirectoryA
+#define RX_FindNextFile FindNextFileA
+#define RX_WIN32_FIND_DATA WIN32_FIND_DATAA
+#define RX_CreateDirectory CreateDirectoryA
+#define RX_RemoveDirectory RemoveDirectoryA
+#define RX_GetFileAttributesEx GetFileAttributesExA
+#define RX_MoveFileEx MoveFileExA
+#define RX_CreateHardLink CreateHardLinkA
+#define RX_PathIsRelative PathIsRelativeA
+#define RX_GetCurrentDirectory GetCurrentDirectoryA
+#define RX_GetDiskFreeSpaceEx GetDiskFreeSpaceExA
+#define RX_PathIsDirectory PathIsDirectoryA
+
+#endif
+
+using port::pthread_getspecific;
+using port::pthread_key_create;
+using port::pthread_key_delete;
+using port::pthread_key_t;
+using port::pthread_setspecific;
+using port::truncate;
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_logger.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_logger.cc
new file mode 100644
index 0000000000..072ea419a1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_logger.cc
@@ -0,0 +1,192 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Logger implementation that can be shared by all environments
+// where enough posix functionality is available.
+
+#if defined(OS_WIN)
+
+#include "port/win/win_logger.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <algorithm>
+#include <atomic>
+
+#include "monitoring/iostats_context_imp.h"
+#include "port/sys_time.h"
+#include "port/win/env_win.h"
+#include "port/win/io_win.h"
+#include "rocksdb/env.h"
+#include "rocksdb/system_clock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace port {
+
+WinLogger::WinLogger(uint64_t (*gettid)(), SystemClock* clock, HANDLE file,
+                     const InfoLogLevel log_level)
+    : Logger(log_level),
+      file_(file),
+      gettid_(gettid),
+      log_size_(0),
+      last_flush_micros_(0),
+      clock_(clock),
+      flush_pending_(false) {
+  assert(file_ != NULL);
+  assert(file_ != INVALID_HANDLE_VALUE);
+}
+
+void WinLogger::DebugWriter(const char* str, int len) {
+  assert(file_ != INVALID_HANDLE_VALUE);
+  DWORD bytesWritten = 0;
+  BOOL ret = WriteFile(file_, str, len, &bytesWritten, NULL);
+  if (ret == FALSE) {
+    std::string errSz = GetWindowsErrSz(GetLastError());
+    fprintf(stderr, "%s", errSz.c_str());
+  }
+}
+
+WinLogger::~WinLogger() { CloseInternal().PermitUncheckedError(); }
+
+Status WinLogger::CloseImpl() { return CloseInternal(); }
+
+Status WinLogger::CloseInternal() {
+  Status s;
+  if (INVALID_HANDLE_VALUE != file_) {
+    BOOL ret = FlushFileBuffers(file_);
+    if (ret == 0) {
+      auto lastError = GetLastError();
+      s = IOErrorFromWindowsError("Failed to flush LOG on Close() ", lastError);
+    }
+    ret = CloseHandle(file_);
+    // On error the return value is zero
+    if (ret == 0 && s.ok()) {
+      auto lastError = GetLastError();
+      s = IOErrorFromWindowsError("Failed to flush LOG on Close() ", lastError);
+    }
+    file_ = INVALID_HANDLE_VALUE;
+    closed_ = true;
+  }
+  return s;
+}
+
+void WinLogger::Flush() {
+  assert(file_ != INVALID_HANDLE_VALUE);
+  if (flush_pending_) {
+    flush_pending_ = false;
+    // With Windows API writes go to OS buffers directly so no fflush needed
+    // unlike with C runtime API. We don't flush all the way to disk
+    // for perf reasons.
+  }
+
+  last_flush_micros_ = clock_->NowMicros();
+}
+
+void WinLogger::Logv(const char* format, va_list ap) {
+  IOSTATS_TIMER_GUARD(logger_nanos);
+  assert(file_ != INVALID_HANDLE_VALUE);
+
+  const uint64_t thread_id = (*gettid_)();
+
+  // We try twice: the first time with a fixed-size stack allocated buffer,
+  // and the second time with a much larger dynamically allocated buffer.
+  char buffer[500];
+  std::unique_ptr<char[]> largeBuffer;
+  for (int iter = 0; iter < 2; ++iter) {
+    char* base;
+    int bufsize;
+    if (iter == 0) {
+      bufsize = sizeof(buffer);
+      base = buffer;
+    } else {
+      bufsize = 30000;
+      largeBuffer.reset(new char[bufsize]);
+      base = largeBuffer.get();
+    }
+
+    char* p = base;
+    char* limit = base + bufsize;
+
+    port::TimeVal now_tv;
+    port::GetTimeOfDay(&now_tv, nullptr);
+    const time_t seconds = now_tv.tv_sec;
+    struct tm t;
+    localtime_s(&t, &seconds);
+    p += snprintf(p, limit - p, "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ",
+                  t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour,
+                  t.tm_min, t.tm_sec, static_cast<int>(now_tv.tv_usec),
+                  static_cast<long long unsigned int>(thread_id));
+
+    // Print the message
+    if (p < limit) {
+      va_list backup_ap;
+      va_copy(backup_ap, ap);
+      int done = vsnprintf(p, limit - p, format, backup_ap);
+      if (done > 0) {
+        p += done;
+      } else {
+        continue;
+      }
+      va_end(backup_ap);
+    }
+
+    // Truncate to available space if necessary
+    if (p >= limit) {
+      if (iter == 0) {
+        continue;  // Try again with larger buffer
+      } else {
+        p = limit - 1;
+      }
+    }
+
+    // Add newline if necessary
+    if (p == base || p[-1] != '\n') {
+      *p++ = '\n';
+    }
+
+    assert(p <= limit);
+    const size_t write_size = p - base;
+
+    DWORD bytesWritten = 0;
+    BOOL ret = WriteFile(file_, base, static_cast<DWORD>(write_size),
+                         &bytesWritten, NULL);
+    if (ret == FALSE) {
+      std::string errSz = GetWindowsErrSz(GetLastError());
+      fprintf(stderr, "%s", errSz.c_str());
+    }
+
+    flush_pending_ = true;
+    assert((bytesWritten == write_size) || (ret == FALSE));
+    if (bytesWritten > 0) {
+      log_size_ += write_size;
+    }
+
+    uint64_t now_micros =
+        static_cast<uint64_t>(now_tv.tv_sec) * 1000000 + now_tv.tv_usec;
+    if (now_micros - last_flush_micros_ >= flush_every_seconds_ * 1000000) {
+      flush_pending_ = false;
+      // With Windows API writes go to OS buffers directly so no fflush needed
+      // unlike with C runtime API. We don't flush all the way to disk
+      // for perf reasons.
+      last_flush_micros_ = now_micros;
+    }
+    break;
+  }
+}
+
+size_t WinLogger::GetLogFileSize() const { return log_size_; }
+
+}  // namespace port
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_logger.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_logger.h
new file mode 100644
index 0000000000..1ca4610e9e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_logger.h
@@ -0,0 +1,64 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Logger implementation that can be shared by all environments
+// where enough posix functionality is available.
+
+#pragma once
+
+#include <stdint.h>
+#include <windows.h>
+
+#include <atomic>
+#include <memory>
+
+#include "rocksdb/env.h"
+
+namespace ROCKSDB_NAMESPACE {
+class SystemClock;
+
+namespace port {
+class WinLogger : public ROCKSDB_NAMESPACE::Logger {
+ public:
+  WinLogger(uint64_t (*gettid)(), SystemClock* clock, HANDLE file,
+            const InfoLogLevel log_level = InfoLogLevel::ERROR_LEVEL);
+
+  virtual ~WinLogger();
+
+  WinLogger(const WinLogger&) = delete;
+
+  WinLogger& operator=(const WinLogger&) = delete;
+
+  void Flush() override;
+
+  using ROCKSDB_NAMESPACE::Logger::Logv;
+  void Logv(const char* format, va_list ap) override;
+
+  size_t GetLogFileSize() const override;
+
+  void DebugWriter(const char* str, int len);
+
+ protected:
+  Status CloseImpl() override;
+
+ private:
+  HANDLE file_;
+  uint64_t (*gettid_)();  // Return the thread id for the current thread
+  std::atomic_size_t log_size_;
+  std::atomic_uint_fast64_t last_flush_micros_;
+  SystemClock* clock_;
+  bool flush_pending_;
+
+  Status CloseInternal();
+
+  const static uint64_t flush_every_seconds_ = 5;
+};
+}  // namespace port
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_thread.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_thread.cc
new file mode 100644
index 0000000000..3c82e736ec
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_thread.cc
@@ -0,0 +1,170 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#if defined(OS_WIN)
+// Most Mingw builds support std::thread only when using posix threads.
+// In that case, some of these functions will be unavailable.
+// Note that we're using either WindowsThread or std::thread, depending on
+// which one is available.
+#ifndef _POSIX_THREADS
+
+#include "port/win/win_thread.h"
+
+#include <assert.h>
+#include <process.h>  // __beginthreadex
+#include <windows.h>
+
+#include <stdexcept>
+#include <system_error>
+#include <thread>
+
+namespace ROCKSDB_NAMESPACE {
+namespace port {
+
+struct WindowsThread::Data {
+  std::function<void()> func_;
+  uintptr_t handle_;
+
+  Data(std::function<void()>&& func) : func_(std::move(func)), handle_(0) {}
+
+  Data(const Data&) = delete;
+  Data& operator=(const Data&) = delete;
+
+  static unsigned int __stdcall ThreadProc(void* arg);
+};
+
+void WindowsThread::Init(std::function<void()>&& func) {
+  data_ = std::make_shared<Data>(std::move(func));
+  // We create another instance of std::shared_ptr to get an additional ref
+  // since we may detach and destroy this instance before the threadproc
+  // may start to run. We choose to allocate this additional ref on the heap
+  // so we do not need to synchronize and allow this thread to proceed
+  std::unique_ptr<std::shared_ptr<Data>> th_data(
+      new std::shared_ptr<Data>(data_));
+
+  data_->handle_ = _beginthreadex(NULL,
+                                  0,  // stack size
+                                  &Data::ThreadProc, th_data.get(),
+                                  0,  // init flag
+                                  &th_id_);
+
+  if (data_->handle_ == 0) {
+    throw std::system_error(
+        std::make_error_code(std::errc::resource_unavailable_try_again),
+        "Unable to create a thread");
+  }
+  th_data.release();
+}
+
+WindowsThread::WindowsThread() : data_(nullptr), th_id_(0) {}
+
+WindowsThread::~WindowsThread() {
+  // Must be joined or detached
+  // before destruction.
+  // This is the same as std::thread
+  if (data_) {
+    if (joinable()) {
+      assert(false);
+      std::terminate();
+    }
+    data_.reset();
+  }
+}
+
+WindowsThread::WindowsThread(WindowsThread&& o) noexcept : WindowsThread() {
+  *this = std::move(o);
+}
+
+WindowsThread& WindowsThread::operator=(WindowsThread&& o) noexcept {
+  if (joinable()) {
+    assert(false);
+    std::terminate();
+  }
+
+  data_ = std::move(o.data_);
+
+  // Per spec both instances will have the same id
+  th_id_ = o.th_id_;
+
+  return *this;
+}
+
+bool WindowsThread::joinable() const { return (data_ && data_->handle_ != 0); }
+
+WindowsThread::native_handle_type WindowsThread::native_handle() const {
+  return reinterpret_cast<native_handle_type>(data_->handle_);
+}
+
+unsigned WindowsThread::hardware_concurrency() {
+  return std::thread::hardware_concurrency();
+}
+
+void WindowsThread::join() {
+  if (!joinable()) {
+    assert(false);
+    throw std::system_error(std::make_error_code(std::errc::invalid_argument),
+                            "Thread is no longer joinable");
+  }
+
+  if (GetThreadId(GetCurrentThread()) == th_id_) {
+    assert(false);
+    throw std::system_error(
+        std::make_error_code(std::errc::resource_deadlock_would_occur),
+        "Can not join itself");
+  }
+
+  auto ret =
+      WaitForSingleObject(reinterpret_cast<HANDLE>(data_->handle_), INFINITE);
+  if (ret != WAIT_OBJECT_0) {
+    auto lastError = GetLastError();
+    assert(false);
+    throw std::system_error(static_cast<int>(lastError), std::system_category(),
+                            "WaitForSingleObjectFailed: thread join");
+  }
+
+  BOOL rc
+#if defined(_MSC_VER)
+      = FALSE;
+#else
+      __attribute__((__unused__));
+#endif
+  rc = CloseHandle(reinterpret_cast<HANDLE>(data_->handle_));
+  assert(rc != 0);
+  data_->handle_ = 0;
+}
+
+bool WindowsThread::detach() {
+  if (!joinable()) {
+    assert(false);
+    throw std::system_error(std::make_error_code(std::errc::invalid_argument),
+                            "Thread is no longer available");
+  }
+
+  BOOL ret = CloseHandle(reinterpret_cast<HANDLE>(data_->handle_));
+  data_->handle_ = 0;
+
+  return (ret != 0);
+}
+
+void WindowsThread::swap(WindowsThread& o) {
+  data_.swap(o.data_);
+  std::swap(th_id_, o.th_id_);
+}
+
+unsigned int __stdcall WindowsThread::Data::ThreadProc(void* arg) {
+  auto ptr = reinterpret_cast<std::shared_ptr<Data>*>(arg);
+  std::unique_ptr<std::shared_ptr<Data>> data(ptr);
+  (*data)->func_();
+  return 0;
+}
+}  // namespace port
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // !_POSIX_THREADS
+#endif  // OS_WIN
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_thread.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_thread.h
new file mode 100644
index 0000000000..916033b77c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_thread.h
@@ -0,0 +1,117 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#ifndef _POSIX_THREADS
+
+#include <functional>
+#include <memory>
+#include <type_traits>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace port {
+
+// This class is a replacement for std::thread
+// 2 reasons we do not like std::thread:
+//  -- is that it dynamically allocates its internals that are automatically
+//     freed when  the thread terminates and not on the destruction of the
+//     object. This makes it difficult to control the source of memory
+//     allocation
+//  -  This implements Pimpl so we can easily replace the guts of the
+//      object in our private version if necessary.
+class WindowsThread {
+  struct Data;
+
+  std::shared_ptr<Data> data_;
+  unsigned int th_id_;
+
+  void Init(std::function<void()>&&);
+
+ public:
+  using native_handle_type = void*;
+
+  // Construct with no thread
+  WindowsThread();
+
+  // Template constructor
+  //
+  // This templated constructor accomplishes several things
+  //
+  // - Allows the class as whole to be not a template
+  //
+  // - take "universal" references to support both _lvalues and _rvalues
+  //
+  // -  because this constructor is a catchall case in many respects it
+  //    may prevent us from using both the default __ctor, the move __ctor.
+  //    Also it may circumvent copy __ctor deletion. To work around this
+  //    we make sure this one has at least one argument and eliminate
+  //    it from the overload  selection when WindowsThread is the first
+  //    argument.
+  //
+  // - construct with Fx(Ax...) with a variable number of types/arguments.
+  //
+  // - Gathers together the callable object with its arguments and constructs
+  //   a single callable entity
+  //
+  // - Makes use of std::function to convert it to a specification-template
+  //   dependent type that both checks the signature conformance to ensure
+  //   that all of the necessary arguments are provided and allows pimpl
+  //   implementation.
+  template <class Fn, class... Args,
+            class = typename std::enable_if<!std::is_same<
+                typename std::decay<Fn>::type, WindowsThread>::value>::type>
+  explicit WindowsThread(Fn&& fx, Args&&... ax) : WindowsThread() {
+    // Use binder to create a single callable entity
+    auto binder = std::bind(std::forward<Fn>(fx), std::forward<Args>(ax)...);
+    // Use std::function to take advantage of the type erasure
+    // so we can still hide implementation within pimpl
+    // This also makes sure that the binder signature is compliant
+    std::function<void()> target = binder;
+
+    Init(std::move(target));
+  }
+
+  ~WindowsThread();
+
+  WindowsThread(const WindowsThread&) = delete;
+
+  WindowsThread& operator=(const WindowsThread&) = delete;
+
+  WindowsThread(WindowsThread&&) noexcept;
+
+  WindowsThread& operator=(WindowsThread&&) noexcept;
+
+  bool joinable() const;
+
+  unsigned int get_id() const { return th_id_; }
+
+  native_handle_type native_handle() const;
+
+  static unsigned hardware_concurrency();
+
+  void join();
+
+  bool detach();
+
+  void swap(WindowsThread&);
+};
+}  // namespace port
+}  // namespace ROCKSDB_NAMESPACE
+
+namespace std {
+inline void swap(ROCKSDB_NAMESPACE::port::WindowsThread& th1,
+                 ROCKSDB_NAMESPACE::port::WindowsThread& th2) {
+  th1.swap(th2);
+}
+}  // namespace std
+
+#endif  // !_POSIX_THREADS
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/xpress_win.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/xpress_win.h
new file mode 100644
index 0000000000..187adffa65
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/xpress_win.h
@@ -0,0 +1,26 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <string>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace port {
+namespace xpress {
+
+bool Compress(const char* input, size_t length, std::string* output);
+
+char* Decompress(const char* input_data, size_t input_length,
+                 size_t* uncompressed_size);
+}  // namespace xpress
+}  // namespace port
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/xpress.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/xpress.h
new file mode 100644
index 0000000000..457025f666
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/xpress.h
@@ -0,0 +1,17 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+// Xpress on Windows is implemeted using Win API
+#if defined(ROCKSDB_PLATFORM_POSIX)
+#error "Xpress compression not implemented"
+#elif defined(OS_WIN)
+#include "port/win/xpress_win.h"
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/adaptive/adaptive_table_factory.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/adaptive/adaptive_table_factory.cc
new file mode 100644
index 0000000000..5a573ca992
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/adaptive/adaptive_table_factory.cc
@@ -0,0 +1,125 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/adaptive/adaptive_table_factory.h"
+
+#include "port/port.h"
+#include "table/format.h"
+#include "table/table_builder.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+AdaptiveTableFactory::AdaptiveTableFactory(
+    std::shared_ptr<TableFactory> table_factory_to_write,
+    std::shared_ptr<TableFactory> block_based_table_factory,
+    std::shared_ptr<TableFactory> plain_table_factory,
+    std::shared_ptr<TableFactory> cuckoo_table_factory)
+    : table_factory_to_write_(table_factory_to_write),
+      block_based_table_factory_(block_based_table_factory),
+      plain_table_factory_(plain_table_factory),
+      cuckoo_table_factory_(cuckoo_table_factory) {
+  if (!plain_table_factory_) {
+    plain_table_factory_.reset(NewPlainTableFactory());
+  }
+  if (!block_based_table_factory_) {
+    block_based_table_factory_.reset(NewBlockBasedTableFactory());
+  }
+  if (!cuckoo_table_factory_) {
+    cuckoo_table_factory_.reset(NewCuckooTableFactory());
+  }
+  if (!table_factory_to_write_) {
+    table_factory_to_write_ = block_based_table_factory_;
+  }
+}
+
+extern const uint64_t kPlainTableMagicNumber;
+extern const uint64_t kLegacyPlainTableMagicNumber;
+extern const uint64_t kBlockBasedTableMagicNumber;
+extern const uint64_t kLegacyBlockBasedTableMagicNumber;
+extern const uint64_t kCuckooTableMagicNumber;
+
+Status AdaptiveTableFactory::NewTableReader(
+    const ReadOptions& ro, const TableReaderOptions& table_reader_options,
+    std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
+    std::unique_ptr<TableReader>* table,
+    bool prefetch_index_and_filter_in_cache) const {
+  Footer footer;
+  IOOptions opts;
+  auto s =
+      ReadFooterFromFile(opts, file.get(), *table_reader_options.ioptions.fs,
+                         nullptr /* prefetch_buffer */, file_size, &footer);
+  if (!s.ok()) {
+    return s;
+  }
+  if (footer.table_magic_number() == kPlainTableMagicNumber ||
+      footer.table_magic_number() == kLegacyPlainTableMagicNumber) {
+    return plain_table_factory_->NewTableReader(
+        table_reader_options, std::move(file), file_size, table);
+  } else if (footer.table_magic_number() == kBlockBasedTableMagicNumber ||
+             footer.table_magic_number() == kLegacyBlockBasedTableMagicNumber) {
+    return block_based_table_factory_->NewTableReader(
+        ro, table_reader_options, std::move(file), file_size, table,
+        prefetch_index_and_filter_in_cache);
+  } else if (footer.table_magic_number() == kCuckooTableMagicNumber) {
+    return cuckoo_table_factory_->NewTableReader(
+        table_reader_options, std::move(file), file_size, table);
+  } else {
+    return Status::NotSupported("Unidentified table format");
+  }
+}
+
+TableBuilder* AdaptiveTableFactory::NewTableBuilder(
+    const TableBuilderOptions& table_builder_options,
+    WritableFileWriter* file) const {
+  return table_factory_to_write_->NewTableBuilder(table_builder_options, file);
+}
+
+std::string AdaptiveTableFactory::GetPrintableOptions() const {
+  std::string ret;
+  ret.reserve(20000);
+  const int kBufferSize = 200;
+  char buffer[kBufferSize];
+
+  if (table_factory_to_write_) {
+    snprintf(buffer, kBufferSize, "  write factory (%s) options:\n%s\n",
+             (table_factory_to_write_->Name() ? table_factory_to_write_->Name()
+                                              : ""),
+             table_factory_to_write_->GetPrintableOptions().c_str());
+    ret.append(buffer);
+  }
+  if (plain_table_factory_) {
+    snprintf(buffer, kBufferSize, "  %s options:\n%s\n",
+             plain_table_factory_->Name() ? plain_table_factory_->Name() : "",
+             plain_table_factory_->GetPrintableOptions().c_str());
+    ret.append(buffer);
+  }
+  if (block_based_table_factory_) {
+    snprintf(
+        buffer, kBufferSize, "  %s options:\n%s\n",
+        (block_based_table_factory_->Name() ? block_based_table_factory_->Name()
+                                            : ""),
+        block_based_table_factory_->GetPrintableOptions().c_str());
+    ret.append(buffer);
+  }
+  if (cuckoo_table_factory_) {
+    snprintf(buffer, kBufferSize, "  %s options:\n%s\n",
+             cuckoo_table_factory_->Name() ? cuckoo_table_factory_->Name() : "",
+             cuckoo_table_factory_->GetPrintableOptions().c_str());
+    ret.append(buffer);
+  }
+  return ret;
+}
+
+extern TableFactory* NewAdaptiveTableFactory(
+    std::shared_ptr<TableFactory> table_factory_to_write,
+    std::shared_ptr<TableFactory> block_based_table_factory,
+    std::shared_ptr<TableFactory> plain_table_factory,
+    std::shared_ptr<TableFactory> cuckoo_table_factory) {
+  return new AdaptiveTableFactory(table_factory_to_write,
+                                  block_based_table_factory,
+                                  plain_table_factory, cuckoo_table_factory);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/adaptive/adaptive_table_factory.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/adaptive/adaptive_table_factory.h
new file mode 100644
index 0000000000..55c8bca1f4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/adaptive/adaptive_table_factory.h
@@ -0,0 +1,56 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+
+#include <string>
+
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct EnvOptions;
+
+class Status;
+class RandomAccessFile;
+class WritableFile;
+class Table;
+class TableBuilder;
+
+class AdaptiveTableFactory : public TableFactory {
+ public:
+  ~AdaptiveTableFactory() {}
+
+  explicit AdaptiveTableFactory(
+      std::shared_ptr<TableFactory> table_factory_to_write,
+      std::shared_ptr<TableFactory> block_based_table_factory,
+      std::shared_ptr<TableFactory> plain_table_factory,
+      std::shared_ptr<TableFactory> cuckoo_table_factory);
+
+  const char* Name() const override { return "AdaptiveTableFactory"; }
+
+  using TableFactory::NewTableReader;
+  Status NewTableReader(
+      const ReadOptions& ro, const TableReaderOptions& table_reader_options,
+      std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
+      std::unique_ptr<TableReader>* table,
+      bool prefetch_index_and_filter_in_cache = true) const override;
+
+  TableBuilder* NewTableBuilder(
+      const TableBuilderOptions& table_builder_options,
+      WritableFileWriter* file) const override;
+
+  std::string GetPrintableOptions() const override;
+
+ private:
+  std::shared_ptr<TableFactory> table_factory_to_write_;
+  std::shared_ptr<TableFactory> block_based_table_factory_;
+  std::shared_ptr<TableFactory> plain_table_factory_;
+  std::shared_ptr<TableFactory> cuckoo_table_factory_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/binary_search_index_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/binary_search_index_reader.cc
new file mode 100644
index 0000000000..2cf9a55318
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/binary_search_index_reader.cc
@@ -0,0 +1,73 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "table/block_based/binary_search_index_reader.h"
+
+namespace ROCKSDB_NAMESPACE {
+Status BinarySearchIndexReader::Create(
+    const BlockBasedTable* table, const ReadOptions& ro,
+    FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
+    bool pin, BlockCacheLookupContext* lookup_context,
+    std::unique_ptr<IndexReader>* index_reader) {
+  assert(table != nullptr);
+  assert(table->get_rep());
+  assert(!pin || prefetch);
+  assert(index_reader != nullptr);
+
+  CachableEntry<Block> index_block;
+  if (prefetch || !use_cache) {
+    const Status s =
+        ReadIndexBlock(table, prefetch_buffer, ro, use_cache,
+                       /*get_context=*/nullptr, lookup_context, &index_block);
+    if (!s.ok()) {
+      return s;
+    }
+
+    if (use_cache && !pin) {
+      index_block.Reset();
+    }
+  }
+
+  index_reader->reset(
+      new BinarySearchIndexReader(table, std::move(index_block)));
+
+  return Status::OK();
+}
+
+InternalIteratorBase<IndexValue>* BinarySearchIndexReader::NewIterator(
+    const ReadOptions& read_options, bool /* disable_prefix_seek */,
+    IndexBlockIter* iter, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context) {
+  const BlockBasedTable::Rep* rep = table()->get_rep();
+  const bool no_io = (read_options.read_tier == kBlockCacheTier);
+  CachableEntry<Block> index_block;
+  const Status s = GetOrReadIndexBlock(no_io, get_context, lookup_context,
+                                       &index_block, read_options);
+  if (!s.ok()) {
+    if (iter != nullptr) {
+      iter->Invalidate(s);
+      return iter;
+    }
+
+    return NewErrorInternalIterator<IndexValue>(s);
+  }
+
+  Statistics* kNullStats = nullptr;
+  // We don't return pinned data from index blocks, so no need
+  // to set `block_contents_pinned`.
+  auto it = index_block.GetValue()->NewIndexIterator(
+      internal_comparator()->user_comparator(),
+      rep->get_global_seqno(BlockType::kIndex), iter, kNullStats, true,
+      index_has_first_key(), index_key_includes_seq(), index_value_is_full());
+
+  assert(it != nullptr);
+  index_block.TransferTo(it);
+
+  return it;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/binary_search_index_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/binary_search_index_reader.h
new file mode 100644
index 0000000000..d4a611ecca
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/binary_search_index_reader.h
@@ -0,0 +1,48 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+#include "table/block_based/index_reader_common.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Index that allows binary search lookup for the first key of each block.
+// This class can be viewed as a thin wrapper for `Block` class which already
+// supports binary search.
+class BinarySearchIndexReader : public BlockBasedTable::IndexReaderCommon {
+ public:
+  // Read index from the file and create an intance for
+  // `BinarySearchIndexReader`.
+  // On success, index_reader will be populated; otherwise it will remain
+  // unmodified.
+  static Status Create(const BlockBasedTable* table, const ReadOptions& ro,
+                       FilePrefetchBuffer* prefetch_buffer, bool use_cache,
+                       bool prefetch, bool pin,
+                       BlockCacheLookupContext* lookup_context,
+                       std::unique_ptr<IndexReader>* index_reader);
+
+  InternalIteratorBase<IndexValue>* NewIterator(
+      const ReadOptions& read_options, bool /* disable_prefix_seek */,
+      IndexBlockIter* iter, GetContext* get_context,
+      BlockCacheLookupContext* lookup_context) override;
+
+  size_t ApproximateMemoryUsage() const override {
+    size_t usage = ApproximateIndexBlockMemoryUsage();
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+    usage += malloc_usable_size(const_cast<BinarySearchIndexReader*>(this));
+#else
+    usage += sizeof(*this);
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+    return usage;
+  }
+
+ private:
+  BinarySearchIndexReader(const BlockBasedTable* t,
+                          CachableEntry<Block>&& index_block)
+      : IndexReaderCommon(t, std::move(index_block)) {}
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block.cc
new file mode 100644
index 0000000000..136275b6c8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block.cc
@@ -0,0 +1,1291 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Decodes the blocks generated by block_builder.cc.
+
+#include "table/block_based/block.h"
+
+#include <algorithm>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "monitoring/perf_context_imp.h"
+#include "port/port.h"
+#include "port/stack_trace.h"
+#include "rocksdb/comparator.h"
+#include "table/block_based/block_prefix_index.h"
+#include "table/block_based/data_block_footer.h"
+#include "table/format.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Helper routine: decode the next block entry starting at "p",
+// storing the number of shared key bytes, non_shared key bytes,
+// and the length of the value in "*shared", "*non_shared", and
+// "*value_length", respectively.  Will not dereference past "limit".
+//
+// If any errors are detected, returns nullptr.  Otherwise, returns a
+// pointer to the key delta (just past the three decoded values).
+struct DecodeEntry {
+  inline const char* operator()(const char* p, const char* limit,
+                                uint32_t* shared, uint32_t* non_shared,
+                                uint32_t* value_length) {
+    // We need 2 bytes for shared and non_shared size. We also need one more
+    // byte either for value size or the actual value in case of value delta
+    // encoding.
+    assert(limit - p >= 3);
+    *shared = reinterpret_cast<const unsigned char*>(p)[0];
+    *non_shared = reinterpret_cast<const unsigned char*>(p)[1];
+    *value_length = reinterpret_cast<const unsigned char*>(p)[2];
+    if ((*shared | *non_shared | *value_length) < 128) {
+      // Fast path: all three values are encoded in one byte each
+      p += 3;
+    } else {
+      if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr;
+      if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr;
+      if ((p = GetVarint32Ptr(p, limit, value_length)) == nullptr) {
+        return nullptr;
+      }
+    }
+
+    // Using an assert in place of "return null" since we should not pay the
+    // cost of checking for corruption on every single key decoding
+    assert(!(static_cast<uint32_t>(limit - p) < (*non_shared + *value_length)));
+    return p;
+  }
+};
+
+// Helper routine: similar to DecodeEntry but does not have assertions.
+// Instead, returns nullptr so that caller can detect and report failure.
+struct CheckAndDecodeEntry {
+  inline const char* operator()(const char* p, const char* limit,
+                                uint32_t* shared, uint32_t* non_shared,
+                                uint32_t* value_length) {
+    // We need 2 bytes for shared and non_shared size. We also need one more
+    // byte either for value size or the actual value in case of value delta
+    // encoding.
+    if (limit - p < 3) {
+      return nullptr;
+    }
+    *shared = reinterpret_cast<const unsigned char*>(p)[0];
+    *non_shared = reinterpret_cast<const unsigned char*>(p)[1];
+    *value_length = reinterpret_cast<const unsigned char*>(p)[2];
+    if ((*shared | *non_shared | *value_length) < 128) {
+      // Fast path: all three values are encoded in one byte each
+      p += 3;
+    } else {
+      if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr;
+      if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr;
+      if ((p = GetVarint32Ptr(p, limit, value_length)) == nullptr) {
+        return nullptr;
+      }
+    }
+
+    if (static_cast<uint32_t>(limit - p) < (*non_shared + *value_length)) {
+      return nullptr;
+    }
+    return p;
+  }
+};
+
+struct DecodeKey {
+  inline const char* operator()(const char* p, const char* limit,
+                                uint32_t* shared, uint32_t* non_shared) {
+    uint32_t value_length;
+    return DecodeEntry()(p, limit, shared, non_shared, &value_length);
+  }
+};
+
+// In format_version 4, which is used by index blocks, the value size is not
+// encoded before the entry, as the value is known to be the handle with the
+// known size.
+struct DecodeKeyV4 {
+  inline const char* operator()(const char* p, const char* limit,
+                                uint32_t* shared, uint32_t* non_shared) {
+    // We need 2 bytes for shared and non_shared size. We also need one more
+    // byte either for value size or the actual value in case of value delta
+    // encoding.
+    if (limit - p < 3) return nullptr;
+    *shared = reinterpret_cast<const unsigned char*>(p)[0];
+    *non_shared = reinterpret_cast<const unsigned char*>(p)[1];
+    if ((*shared | *non_shared) < 128) {
+      // Fast path: all three values are encoded in one byte each
+      p += 2;
+    } else {
+      if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr;
+      if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr;
+    }
+    return p;
+  }
+};
+
+struct DecodeEntryV4 {
+  inline const char* operator()(const char* p, const char* limit,
+                                uint32_t* shared, uint32_t* non_shared,
+                                uint32_t* value_length) {
+    assert(value_length);
+
+    *value_length = 0;
+    return DecodeKeyV4()(p, limit, shared, non_shared);
+  }
+};
+
+void DataBlockIter::NextImpl() {
+#ifndef NDEBUG
+  if (TEST_Corrupt_Callback("DataBlockIter::NextImpl")) return;
+#endif
+  bool is_shared = false;
+  ParseNextDataKey(&is_shared);
+  ++cur_entry_idx_;
+}
+
+void MetaBlockIter::NextImpl() {
+  bool is_shared = false;
+  ParseNextKey<CheckAndDecodeEntry>(&is_shared);
+  ++cur_entry_idx_;
+}
+
+void IndexBlockIter::NextImpl() {
+  ParseNextIndexKey();
+  ++cur_entry_idx_;
+}
+
+void IndexBlockIter::PrevImpl() {
+  assert(Valid());
+  // Scan backwards to a restart point before current_
+  const uint32_t original = current_;
+  while (GetRestartPoint(restart_index_) >= original) {
+    if (restart_index_ == 0) {
+      // No more entries
+      current_ = restarts_;
+      restart_index_ = num_restarts_;
+      return;
+    }
+    restart_index_--;
+  }
+  SeekToRestartPoint(restart_index_);
+  // Loop until end of current entry hits the start of original entry
+  while (ParseNextIndexKey() && NextEntryOffset() < original) {
+  }
+  --cur_entry_idx_;
+}
+
+void MetaBlockIter::PrevImpl() {
+  assert(Valid());
+  // Scan backwards to a restart point before current_
+  const uint32_t original = current_;
+  while (GetRestartPoint(restart_index_) >= original) {
+    if (restart_index_ == 0) {
+      // No more entries
+      current_ = restarts_;
+      restart_index_ = num_restarts_;
+      return;
+    }
+    restart_index_--;
+  }
+  SeekToRestartPoint(restart_index_);
+  bool is_shared = false;
+  // Loop until end of current entry hits the start of original entry
+  while (ParseNextKey<CheckAndDecodeEntry>(&is_shared) &&
+         NextEntryOffset() < original) {
+  }
+  --cur_entry_idx_;
+}
+
+// Similar to IndexBlockIter::PrevImpl but also caches the prev entries
+void DataBlockIter::PrevImpl() {
+  assert(Valid());
+
+  assert(prev_entries_idx_ == -1 ||
+         static_cast<size_t>(prev_entries_idx_) < prev_entries_.size());
+  --cur_entry_idx_;
+  // Check if we can use cached prev_entries_
+  if (prev_entries_idx_ > 0 &&
+      prev_entries_[prev_entries_idx_].offset == current_) {
+    // Read cached CachedPrevEntry
+    prev_entries_idx_--;
+    const CachedPrevEntry& current_prev_entry =
+        prev_entries_[prev_entries_idx_];
+
+    const char* key_ptr = nullptr;
+    bool raw_key_cached;
+    if (current_prev_entry.key_ptr != nullptr) {
+      // The key is not delta encoded and stored in the data block
+      key_ptr = current_prev_entry.key_ptr;
+      raw_key_cached = false;
+    } else {
+      // The key is delta encoded and stored in prev_entries_keys_buff_
+      key_ptr = prev_entries_keys_buff_.data() + current_prev_entry.key_offset;
+      raw_key_cached = true;
+    }
+    const Slice current_key(key_ptr, current_prev_entry.key_size);
+
+    current_ = current_prev_entry.offset;
+    // TODO(ajkr): the copy when `raw_key_cached` is done here for convenience,
+    // not necessity. It is convenient since this class treats keys as pinned
+    // when `raw_key_` points to an outside buffer. So we cannot allow
+    // `raw_key_` point into Prev cache as it is a transient outside buffer
+    // (i.e., keys in it are not actually pinned).
+    raw_key_.SetKey(current_key, raw_key_cached /* copy */);
+    value_ = current_prev_entry.value;
+
+    return;
+  }
+
+  // Clear prev entries cache
+  prev_entries_idx_ = -1;
+  prev_entries_.clear();
+  prev_entries_keys_buff_.clear();
+
+  // Scan backwards to a restart point before current_
+  const uint32_t original = current_;
+  while (GetRestartPoint(restart_index_) >= original) {
+    if (restart_index_ == 0) {
+      // No more entries
+      current_ = restarts_;
+      restart_index_ = num_restarts_;
+      return;
+    }
+    restart_index_--;
+  }
+
+  SeekToRestartPoint(restart_index_);
+
+  do {
+    bool is_shared = false;
+    if (!ParseNextDataKey(&is_shared)) {
+      break;
+    }
+    Slice current_key = raw_key_.GetKey();
+
+    if (raw_key_.IsKeyPinned()) {
+      // The key is not delta encoded
+      prev_entries_.emplace_back(current_, current_key.data(), 0,
+                                 current_key.size(), value());
+    } else {
+      // The key is delta encoded, cache decoded key in buffer
+      size_t new_key_offset = prev_entries_keys_buff_.size();
+      prev_entries_keys_buff_.append(current_key.data(), current_key.size());
+
+      prev_entries_.emplace_back(current_, nullptr, new_key_offset,
+                                 current_key.size(), value());
+    }
+    // Loop until end of current entry hits the start of original entry
+  } while (NextEntryOffset() < original);
+  prev_entries_idx_ = static_cast<int32_t>(prev_entries_.size()) - 1;
+}
+
+void DataBlockIter::SeekImpl(const Slice& target) {
+  Slice seek_key = target;
+  PERF_TIMER_GUARD(block_seek_nanos);
+  if (data_ == nullptr) {  // Not init yet
+    return;
+  }
+  uint32_t index = 0;
+  bool skip_linear_scan = false;
+  bool ok = BinarySeek<DecodeKey>(seek_key, &index, &skip_linear_scan);
+
+  if (!ok) {
+    return;
+  }
+  FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan);
+}
+
+void MetaBlockIter::SeekImpl(const Slice& target) {
+  Slice seek_key = target;
+  PERF_TIMER_GUARD(block_seek_nanos);
+  if (data_ == nullptr) {  // Not init yet
+    return;
+  }
+  uint32_t index = 0;
+  bool skip_linear_scan = false;
+  bool ok = BinarySeek<DecodeKey>(seek_key, &index, &skip_linear_scan);
+
+  if (!ok) {
+    return;
+  }
+  FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan);
+}
+
+// Optimized Seek for point lookup for an internal key `target`
+// target = "seek_user_key @ type | seqno".
+//
+// For any type other than kTypeValue, kTypeDeletion, kTypeSingleDeletion,
+// kTypeBlobIndex, kTypeWideColumnEntity or kTypeMerge, this function behaves
+// identically to Seek().
+//
+// For any type in kTypeValue, kTypeDeletion, kTypeSingleDeletion,
+// kTypeBlobIndex, kTypeWideColumnEntity, or kTypeMerge:
+//
+// If the return value is FALSE, iter location is undefined, and it means:
+// 1) there is no key in this block falling into the range:
+//    ["seek_user_key @ type | seqno", "seek_user_key @ kTypeDeletion | 0"],
+//    inclusive; AND
+// 2) the last key of this block has a greater user_key from seek_user_key
+//
+// If the return value is TRUE, iter location has two possibilities:
+// 1) If iter is valid, it is set to a location as if set by SeekImpl(target).
+//    In this case, it points to the first key with a larger user_key or a
+//    matching user_key with a seqno no greater than the seeking seqno.
+// 2) If the iter is invalid, it means that either all the user_key is less
+//    than the seek_user_key, or the block ends with a matching user_key but
+//    with a smaller [ type | seqno ] (i.e. a larger seqno, or the same seqno
+//    but larger type).
+bool DataBlockIter::SeekForGetImpl(const Slice& target) {
+  Slice target_user_key = ExtractUserKey(target);
+  uint32_t map_offset = restarts_ + num_restarts_ * sizeof(uint32_t);
+  uint8_t entry =
+      data_block_hash_index_->Lookup(data_, map_offset, target_user_key);
+
+  if (entry == kCollision) {
+    // HashSeek not effective, falling back
+    SeekImpl(target);
+    return true;
+  }
+
+  if (entry == kNoEntry) {
+    // Even if we cannot find the user_key in this block, the result may
+    // exist in the next block. Consider this example:
+    //
+    // Block N:    [aab@100, ... , app@120]
+    // boundary key: axy@50 (we make minimal assumption about a boundary key)
+    // Block N+1:  [axy@10, ...   ]
+    //
+    // If seek_key = axy@60, the search will start from Block N.
+    // Even if the user_key is not found in the hash map, the caller still
+    // have to continue searching the next block.
+    //
+    // In this case, we pretend the key is in the last restart interval.
+    // The while-loop below will search the last restart interval for the
+    // key. It will stop at the first key that is larger than the seek_key,
+    // or to the end of the block if no one is larger.
+    entry = static_cast<uint8_t>(num_restarts_ - 1);
+  }
+
+  uint32_t restart_index = entry;
+
+  // check if the key is in the restart_interval
+  assert(restart_index < num_restarts_);
+  SeekToRestartPoint(restart_index);
+  current_ = GetRestartPoint(restart_index);
+  cur_entry_idx_ =
+      static_cast<int32_t>(restart_index * block_restart_interval_) - 1;
+
+  uint32_t limit = restarts_;
+  if (restart_index + 1 < num_restarts_) {
+    limit = GetRestartPoint(restart_index + 1);
+  }
+  while (current_ < limit) {
+    ++cur_entry_idx_;
+    bool shared;
+    // Here we only linear seek the target key inside the restart interval.
+    // If a key does not exist inside a restart interval, we avoid
+    // further searching the block content across restart interval boundary.
+    //
+    // TODO(fwu): check the left and right boundary of the restart interval
+    // to avoid linear seek a target key that is out of range.
+    if (!ParseNextDataKey(&shared) || CompareCurrentKey(target) >= 0) {
+      // we stop at the first potential matching user key.
+      break;
+    }
+    // If the loop exits due to CompareCurrentKey(target) >= 0, then current key
+    // exists, and its checksum verification will be done in UpdateKey() called
+    // in SeekForGet().
+    // TODO(cbi): If this loop exits with current_ == restart_, per key-value
+    //  checksum will not be verified in UpdateKey() since Valid()
+    //  will return false.
+  }
+
+  if (current_ == restarts_) {
+    // Search reaches to the end of the block. There are three possibilities:
+    // 1) there is only one user_key match in the block (otherwise collision).
+    //    the matching user_key resides in the last restart interval, and it
+    //    is the last key of the restart interval and of the block as well.
+    //    ParseNextKey() skipped it as its [ type | seqno ] is smaller.
+    //
+    // 2) The seek_key is not found in the HashIndex Lookup(), i.e. kNoEntry,
+    //    AND all existing user_keys in the restart interval are smaller than
+    //    seek_user_key.
+    //
+    // 3) The seek_key is a false positive and happens to be hashed to the
+    //    last restart interval, AND all existing user_keys in the restart
+    //    interval are smaller than seek_user_key.
+    //
+    // The result may exist in the next block each case, so we return true.
+    return true;
+  }
+
+  if (icmp_->user_comparator()->Compare(raw_key_.GetUserKey(),
+                                        target_user_key) != 0) {
+    // the key is not in this block and cannot be at the next block either.
+    return false;
+  }
+
+  // Here we are conservative and only support a limited set of cases
+  ValueType value_type = ExtractValueType(raw_key_.GetInternalKey());
+  if (value_type != ValueType::kTypeValue &&
+      value_type != ValueType::kTypeDeletion &&
+      value_type != ValueType::kTypeMerge &&
+      value_type != ValueType::kTypeSingleDeletion &&
+      value_type != ValueType::kTypeBlobIndex &&
+      value_type != ValueType::kTypeWideColumnEntity) {
+    SeekImpl(target);
+  }
+
+  // Result found, and the iter is correctly set.
+  return true;
+}
+
+void IndexBlockIter::SeekImpl(const Slice& target) {
+#ifndef NDEBUG
+  if (TEST_Corrupt_Callback("IndexBlockIter::SeekImpl")) return;
+#endif
+  TEST_SYNC_POINT("IndexBlockIter::Seek:0");
+  PERF_TIMER_GUARD(block_seek_nanos);
+  if (data_ == nullptr) {  // Not init yet
+    return;
+  }
+  Slice seek_key = target;
+  if (raw_key_.IsUserKey()) {
+    seek_key = ExtractUserKey(target);
+  }
+  status_ = Status::OK();
+  uint32_t index = 0;
+  bool skip_linear_scan = false;
+  bool ok = false;
+  if (prefix_index_) {
+    bool prefix_may_exist = true;
+    ok = PrefixSeek(target, &index, &prefix_may_exist);
+    if (!prefix_may_exist) {
+      // This is to let the caller to distinguish between non-existing prefix,
+      // and when key is larger than the last key, which both set Valid() to
+      // false.
+      current_ = restarts_;
+      status_ = Status::NotFound();
+    }
+    // restart interval must be one when hash search is enabled so the binary
+    // search simply lands at the right place.
+    skip_linear_scan = true;
+  } else if (value_delta_encoded_) {
+    ok = BinarySeek<DecodeKeyV4>(seek_key, &index, &skip_linear_scan);
+  } else {
+    ok = BinarySeek<DecodeKey>(seek_key, &index, &skip_linear_scan);
+  }
+
+  if (!ok) {
+    return;
+  }
+  FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan);
+}
+
+void DataBlockIter::SeekForPrevImpl(const Slice& target) {
+  PERF_TIMER_GUARD(block_seek_nanos);
+  Slice seek_key = target;
+  if (data_ == nullptr) {  // Not init yet
+    return;
+  }
+  uint32_t index = 0;
+  bool skip_linear_scan = false;
+  bool ok = BinarySeek<DecodeKey>(seek_key, &index, &skip_linear_scan);
+
+  if (!ok) {
+    return;
+  }
+  FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan);
+
+  if (!Valid()) {
+    if (status_.ok()) {
+      SeekToLastImpl();
+    }
+  } else {
+    while (Valid() && CompareCurrentKey(seek_key) > 0) {
+      PrevImpl();
+    }
+  }
+}
+
+void MetaBlockIter::SeekForPrevImpl(const Slice& target) {
+  PERF_TIMER_GUARD(block_seek_nanos);
+  Slice seek_key = target;
+  if (data_ == nullptr) {  // Not init yet
+    return;
+  }
+  uint32_t index = 0;
+  bool skip_linear_scan = false;
+  bool ok = BinarySeek<DecodeKey>(seek_key, &index, &skip_linear_scan);
+
+  if (!ok) {
+    return;
+  }
+  FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan);
+
+  if (!Valid()) {
+    if (status_.ok()) {
+      SeekToLastImpl();
+    }
+  } else {
+    while (Valid() && CompareCurrentKey(seek_key) > 0) {
+      PrevImpl();
+    }
+  }
+}
+
+void DataBlockIter::SeekToFirstImpl() {
+  if (data_ == nullptr) {  // Not init yet
+    return;
+  }
+  SeekToRestartPoint(0);
+  bool is_shared = false;
+  ParseNextDataKey(&is_shared);
+  cur_entry_idx_ = 0;
+}
+
+void MetaBlockIter::SeekToFirstImpl() {
+  if (data_ == nullptr) {  // Not init yet
+    return;
+  }
+  SeekToRestartPoint(0);
+  bool is_shared = false;
+  ParseNextKey<CheckAndDecodeEntry>(&is_shared);
+  cur_entry_idx_ = 0;
+}
+
+void IndexBlockIter::SeekToFirstImpl() {
+#ifndef NDEBUG
+  if (TEST_Corrupt_Callback("IndexBlockIter::SeekToFirstImpl")) return;
+#endif
+  if (data_ == nullptr) {  // Not init yet
+    return;
+  }
+  status_ = Status::OK();
+  SeekToRestartPoint(0);
+  ParseNextIndexKey();
+  cur_entry_idx_ = 0;
+}
+
+void DataBlockIter::SeekToLastImpl() {
+  if (data_ == nullptr) {  // Not init yet
+    return;
+  }
+  SeekToRestartPoint(num_restarts_ - 1);
+  bool is_shared = false;
+  cur_entry_idx_ = (num_restarts_ - 1) * block_restart_interval_;
+  while (ParseNextDataKey(&is_shared) && NextEntryOffset() < restarts_) {
+    // Keep skipping
+    ++cur_entry_idx_;
+  }
+}
+
+void MetaBlockIter::SeekToLastImpl() {
+  if (data_ == nullptr) {  // Not init yet
+    return;
+  }
+  SeekToRestartPoint(num_restarts_ - 1);
+  bool is_shared = false;
+  assert(num_restarts_ >= 1);
+  cur_entry_idx_ =
+      static_cast<int32_t>((num_restarts_ - 1) * block_restart_interval_);
+  while (ParseNextKey<CheckAndDecodeEntry>(&is_shared) &&
+         NextEntryOffset() < restarts_) {
+    // Will probably never reach here since restart_interval is always 1
+    ++cur_entry_idx_;
+  }
+}
+
+void IndexBlockIter::SeekToLastImpl() {
+  if (data_ == nullptr) {  // Not init yet
+    return;
+  }
+  status_ = Status::OK();
+  SeekToRestartPoint(num_restarts_ - 1);
+  cur_entry_idx_ = (num_restarts_ - 1) * block_restart_interval_;
+  while (ParseNextIndexKey() && NextEntryOffset() < restarts_) {
+    ++cur_entry_idx_;
+  }
+}
+
+template <class TValue>
+template <typename DecodeEntryFunc>
+bool BlockIter<TValue>::ParseNextKey(bool* is_shared) {
+  current_ = NextEntryOffset();
+  const char* p = data_ + current_;
+  const char* limit = data_ + restarts_;  // Restarts come right after data
+
+  if (p >= limit) {
+    // No more entries to return.  Mark as invalid.
+    current_ = restarts_;
+    restart_index_ = num_restarts_;
+    return false;
+  }
+  // Decode next entry
+  uint32_t shared, non_shared, value_length;
+  p = DecodeEntryFunc()(p, limit, &shared, &non_shared, &value_length);
+  if (p == nullptr || raw_key_.Size() < shared) {
+    CorruptionError();
+    return false;
+  } else {
+    if (shared == 0) {
+      *is_shared = false;
+      // If this key doesn't share any bytes with prev key then we don't need
+      // to decode it and can use its address in the block directly.
+      raw_key_.SetKey(Slice(p, non_shared), false /* copy */);
+    } else {
+      // This key share `shared` bytes with prev key, we need to decode it
+      *is_shared = true;
+      raw_key_.TrimAppend(shared, p, non_shared);
+    }
+    value_ = Slice(p + non_shared, value_length);
+    if (shared == 0) {
+      while (restart_index_ + 1 < num_restarts_ &&
+             GetRestartPoint(restart_index_ + 1) < current_) {
+        ++restart_index_;
+      }
+    }
+    // else we are in the middle of a restart interval and the restart_index_
+    // thus has not changed
+    return true;
+  }
+}
+
+bool DataBlockIter::ParseNextDataKey(bool* is_shared) {
+  if (ParseNextKey<DecodeEntry>(is_shared)) {
+#ifndef NDEBUG
+    if (global_seqno_ != kDisableGlobalSequenceNumber) {
+      // If we are reading a file with a global sequence number we should
+      // expect that all encoded sequence numbers are zeros and any value
+      // type is kTypeValue, kTypeMerge, kTypeDeletion,
+      // kTypeDeletionWithTimestamp, or kTypeRangeDeletion.
+      uint64_t packed = ExtractInternalKeyFooter(raw_key_.GetKey());
+      SequenceNumber seqno;
+      ValueType value_type;
+      UnPackSequenceAndType(packed, &seqno, &value_type);
+      assert(value_type == ValueType::kTypeValue ||
+             value_type == ValueType::kTypeMerge ||
+             value_type == ValueType::kTypeDeletion ||
+             value_type == ValueType::kTypeDeletionWithTimestamp ||
+             value_type == ValueType::kTypeRangeDeletion);
+      assert(seqno == 0);
+    }
+#endif  // NDEBUG
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool IndexBlockIter::ParseNextIndexKey() {
+  bool is_shared = false;
+  bool ok = (value_delta_encoded_) ? ParseNextKey<DecodeEntryV4>(&is_shared)
+                                   : ParseNextKey<DecodeEntry>(&is_shared);
+  if (ok) {
+    if (value_delta_encoded_ || global_seqno_state_ != nullptr) {
+      DecodeCurrentValue(is_shared);
+    }
+  }
+  return ok;
+}
+
+// The format:
+// restart_point   0: k, v (off, sz), k, v (delta-sz), ..., k, v (delta-sz)
+// restart_point   1: k, v (off, sz), k, v (delta-sz), ..., k, v (delta-sz)
+// ...
+// restart_point n-1: k, v (off, sz), k, v (delta-sz), ..., k, v (delta-sz)
+// where, k is key, v is value, and its encoding is in parentheses.
+// The format of each key is (shared_size, non_shared_size, shared, non_shared)
+// The format of each value, i.e., block handle, is (offset, size) whenever the
+// is_shared is false, which included the first entry in each restart point.
+// Otherwise, the format is delta-size = the size of current block - the size o
+// last block.
+void IndexBlockIter::DecodeCurrentValue(bool is_shared) {
+  Slice v(value_.data(), data_ + restarts_ - value_.data());
+  // Delta encoding is used if `shared` != 0.
+  Status decode_s __attribute__((__unused__)) = decoded_value_.DecodeFrom(
+      &v, have_first_key_,
+      (value_delta_encoded_ && is_shared) ? &decoded_value_.handle : nullptr);
+  assert(decode_s.ok());
+  value_ = Slice(value_.data(), v.data() - value_.data());
+
+  if (global_seqno_state_ != nullptr) {
+    // Overwrite sequence number the same way as in DataBlockIter.
+
+    IterKey& first_internal_key = global_seqno_state_->first_internal_key;
+    first_internal_key.SetInternalKey(decoded_value_.first_internal_key,
+                                      /* copy */ true);
+
+    assert(GetInternalKeySeqno(first_internal_key.GetInternalKey()) == 0);
+
+    ValueType value_type = ExtractValueType(first_internal_key.GetKey());
+    assert(value_type == ValueType::kTypeValue ||
+           value_type == ValueType::kTypeMerge ||
+           value_type == ValueType::kTypeDeletion ||
+           value_type == ValueType::kTypeRangeDeletion);
+
+    first_internal_key.UpdateInternalKey(global_seqno_state_->global_seqno,
+                                         value_type);
+    decoded_value_.first_internal_key = first_internal_key.GetKey();
+  }
+}
+
+template <class TValue>
+void BlockIter<TValue>::FindKeyAfterBinarySeek(const Slice& target,
+                                               uint32_t index,
+                                               bool skip_linear_scan) {
+  // SeekToRestartPoint() only does the lookup in the restart block. We need
+  // to follow it up with NextImpl() to position the iterator at the restart
+  // key.
+  SeekToRestartPoint(index);
+  cur_entry_idx_ = static_cast<int32_t>(index * block_restart_interval_) - 1;
+  NextImpl();
+
+  if (!skip_linear_scan) {
+    // Linear search (within restart block) for first key >= target
+    uint32_t max_offset;
+    if (index + 1 < num_restarts_) {
+      // We are in a non-last restart interval. Since `BinarySeek()` guarantees
+      // the next restart key is strictly greater than `target`, we can
+      // terminate upon reaching it without any additional key comparison.
+      max_offset = GetRestartPoint(index + 1);
+    } else {
+      // We are in the last restart interval. The while-loop will terminate by
+      // `Valid()` returning false upon advancing past the block's last key.
+      max_offset = std::numeric_limits<uint32_t>::max();
+    }
+    while (true) {
+      NextImpl();
+      if (!Valid()) {
+        // TODO(cbi): per key-value checksum will not be verified in UpdateKey()
+        //  since Valid() will returns false.
+        break;
+      }
+      if (current_ == max_offset) {
+        assert(CompareCurrentKey(target) > 0);
+        break;
+      } else if (CompareCurrentKey(target) >= 0) {
+        break;
+      }
+    }
+  }
+}
+
+// Binary searches in restart array to find the starting restart point for the
+// linear scan, and stores it in `*index`. Assumes restart array does not
+// contain duplicate keys. It is guaranteed that the restart key at `*index + 1`
+// is strictly greater than `target` or does not exist (this can be used to
+// elide a comparison when linear scan reaches all the way to the next restart
+// key). Furthermore, `*skip_linear_scan` is set to indicate whether the
+// `*index`th restart key is the final result so that key does not need to be
+// compared again later.
+template <class TValue>
+template <typename DecodeKeyFunc>
+bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t* index,
+                                   bool* skip_linear_scan) {
+  if (restarts_ == 0) {
+    // SST files dedicated to range tombstones are written with index blocks
+    // that have no keys while also having `num_restarts_ == 1`. This would
+    // cause a problem for `BinarySeek()` as it'd try to access the first key
+    // which does not exist. We identify such blocks by the offset at which
+    // their restarts are stored, and return false to prevent any attempted
+    // key accesses.
+    return false;
+  }
+
+  *skip_linear_scan = false;
+  // Loop invariants:
+  // - Restart key at index `left` is less than or equal to the target key. The
+  //   sentinel index `-1` is considered to have a key that is less than all
+  //   keys.
+  // - Any restart keys after index `right` are strictly greater than the target
+  //   key.
+  int64_t left = -1, right = num_restarts_ - 1;
+  while (left != right) {
+    // The `mid` is computed by rounding up so it lands in (`left`, `right`].
+    int64_t mid = left + (right - left + 1) / 2;
+    uint32_t region_offset = GetRestartPoint(static_cast<uint32_t>(mid));
+    uint32_t shared, non_shared;
+    const char* key_ptr = DecodeKeyFunc()(
+        data_ + region_offset, data_ + restarts_, &shared, &non_shared);
+    if (key_ptr == nullptr || (shared != 0)) {
+      CorruptionError();
+      return false;
+    }
+    Slice mid_key(key_ptr, non_shared);
+    raw_key_.SetKey(mid_key, false /* copy */);
+    int cmp = CompareCurrentKey(target);
+    if (cmp < 0) {
+      // Key at "mid" is smaller than "target". Therefore all
+      // blocks before "mid" are uninteresting.
+      left = mid;
+    } else if (cmp > 0) {
+      // Key at "mid" is >= "target". Therefore all blocks at or
+      // after "mid" are uninteresting.
+      right = mid - 1;
+    } else {
+      *skip_linear_scan = true;
+      left = right = mid;
+    }
+  }
+
+  if (left == -1) {
+    // All keys in the block were strictly greater than `target`. So the very
+    // first key in the block is the final seek result.
+    *skip_linear_scan = true;
+    *index = 0;
+  } else {
+    *index = static_cast<uint32_t>(left);
+  }
+  return true;
+}
+
+// Compare target key and the block key of the block of `block_index`.
+// Return -1 if error.
+int IndexBlockIter::CompareBlockKey(uint32_t block_index, const Slice& target) {
+  uint32_t region_offset = GetRestartPoint(block_index);
+  uint32_t shared, non_shared;
+  const char* key_ptr =
+      value_delta_encoded_
+          ? DecodeKeyV4()(data_ + region_offset, data_ + restarts_, &shared,
+                          &non_shared)
+          : DecodeKey()(data_ + region_offset, data_ + restarts_, &shared,
+                        &non_shared);
+  if (key_ptr == nullptr || (shared != 0)) {
+    CorruptionError();
+    return 1;  // Return target is smaller
+  }
+  Slice block_key(key_ptr, non_shared);
+  raw_key_.SetKey(block_key, false /* copy */);
+  return CompareCurrentKey(target);
+}
+
+// Binary search in block_ids to find the first block
+// with a key >= target
+bool IndexBlockIter::BinaryBlockIndexSeek(const Slice& target,
+                                          uint32_t* block_ids, uint32_t left,
+                                          uint32_t right, uint32_t* index,
+                                          bool* prefix_may_exist) {
+  assert(left <= right);
+  assert(index);
+  assert(prefix_may_exist);
+  *prefix_may_exist = true;
+  uint32_t left_bound = left;
+
+  while (left <= right) {
+    uint32_t mid = (right + left) / 2;
+
+    int cmp = CompareBlockKey(block_ids[mid], target);
+    if (!status_.ok()) {
+      return false;
+    }
+    if (cmp < 0) {
+      // Key at "target" is larger than "mid". Therefore all
+      // blocks before or at "mid" are uninteresting.
+      left = mid + 1;
+    } else {
+      // Key at "target" is <= "mid". Therefore all blocks
+      // after "mid" are uninteresting.
+      // If there is only one block left, we found it.
+      if (left == right) break;
+      right = mid;
+    }
+  }
+
+  if (left == right) {
+    // In one of the two following cases:
+    // (1) left is the first one of block_ids
+    // (2) there is a gap of blocks between block of `left` and `left-1`.
+    // we can further distinguish the case of key in the block or key not
+    // existing, by comparing the target key and the key of the previous
+    // block to the left of the block found.
+    if (block_ids[left] > 0 &&
+        (left == left_bound || block_ids[left - 1] != block_ids[left] - 1) &&
+        CompareBlockKey(block_ids[left] - 1, target) > 0) {
+      current_ = restarts_;
+      *prefix_may_exist = false;
+      return false;
+    }
+
+    *index = block_ids[left];
+    return true;
+  } else {
+    assert(left > right);
+
+    // If the next block key is larger than seek key, it is possible that
+    // no key shares the prefix with `target`, or all keys with the same
+    // prefix as `target` are smaller than prefix. In the latter case,
+    // we are mandated to set the position the same as the total order.
+    // In the latter case, either:
+    // (1) `target` falls into the range of the next block. In this case,
+    //     we can place the iterator to the next block, or
+    // (2) `target` is larger than all block keys. In this case we can
+    //     keep the iterator invalidate without setting `prefix_may_exist`
+    //     to false.
+    // We might sometimes end up with setting the total order position
+    // while there is no key sharing the prefix as `target`, but it
+    // still follows the contract.
+    uint32_t right_index = block_ids[right];
+    assert(right_index + 1 <= num_restarts_);
+    if (right_index + 1 < num_restarts_) {
+      if (CompareBlockKey(right_index + 1, target) >= 0) {
+        *index = right_index + 1;
+        return true;
+      } else {
+        // We have to set the flag here because we are not positioning
+        // the iterator to the total order position.
+        *prefix_may_exist = false;
+      }
+    }
+
+    // Mark iterator invalid
+    current_ = restarts_;
+    return false;
+  }
+}
+
+bool IndexBlockIter::PrefixSeek(const Slice& target, uint32_t* index,
+                                bool* prefix_may_exist) {
+  assert(index);
+  assert(prefix_may_exist);
+  assert(prefix_index_);
+  *prefix_may_exist = true;
+  Slice seek_key = target;
+  if (raw_key_.IsUserKey()) {
+    seek_key = ExtractUserKey(target);
+  }
+  uint32_t* block_ids = nullptr;
+  uint32_t num_blocks = prefix_index_->GetBlocks(target, &block_ids);
+
+  if (num_blocks == 0) {
+    current_ = restarts_;
+    *prefix_may_exist = false;
+    return false;
+  } else {
+    assert(block_ids);
+    return BinaryBlockIndexSeek(seek_key, block_ids, 0, num_blocks - 1, index,
+                                prefix_may_exist);
+  }
+}
+
+uint32_t Block::NumRestarts() const {
+  assert(size_ >= 2 * sizeof(uint32_t));
+  uint32_t block_footer = DecodeFixed32(data_ + size_ - sizeof(uint32_t));
+  uint32_t num_restarts = block_footer;
+  if (size_ > kMaxBlockSizeSupportedByHashIndex) {
+    // In BlockBuilder, we have ensured a block with HashIndex is less than
+    // kMaxBlockSizeSupportedByHashIndex (64KiB).
+    //
+    // Therefore, if we encounter a block with a size > 64KiB, the block
+    // cannot have HashIndex. So the footer will directly interpreted as
+    // num_restarts.
+    //
+    // Such check is for backward compatibility. We can ensure legacy block
+    // with a vary large num_restarts i.e. >= 0x80000000 can be interpreted
+    // correctly as no HashIndex even if the MSB of num_restarts is set.
+    return num_restarts;
+  }
+  BlockBasedTableOptions::DataBlockIndexType index_type;
+  UnPackIndexTypeAndNumRestarts(block_footer, &index_type, &num_restarts);
+  return num_restarts;
+}
+
+BlockBasedTableOptions::DataBlockIndexType Block::IndexType() const {
+  assert(size_ >= 2 * sizeof(uint32_t));
+  if (size_ > kMaxBlockSizeSupportedByHashIndex) {
+    // The check is for the same reason as that in NumRestarts()
+    return BlockBasedTableOptions::kDataBlockBinarySearch;
+  }
+  uint32_t block_footer = DecodeFixed32(data_ + size_ - sizeof(uint32_t));
+  uint32_t num_restarts = block_footer;
+  BlockBasedTableOptions::DataBlockIndexType index_type;
+  UnPackIndexTypeAndNumRestarts(block_footer, &index_type, &num_restarts);
+  return index_type;
+}
+
+Block::~Block() {
+  // This sync point can be re-enabled if RocksDB can control the
+  // initialization order of any/all static options created by the user.
+  // TEST_SYNC_POINT("Block::~Block");
+  delete[] kv_checksum_;
+}
+
+Block::Block(BlockContents&& contents, size_t read_amp_bytes_per_bit,
+             Statistics* statistics)
+    : contents_(std::move(contents)),
+      data_(contents_.data.data()),
+      size_(contents_.data.size()),
+      restart_offset_(0),
+      num_restarts_(0) {
+  TEST_SYNC_POINT("Block::Block:0");
+  if (size_ < sizeof(uint32_t)) {
+    size_ = 0;  // Error marker
+  } else {
+    // Should only decode restart points for uncompressed blocks
+    num_restarts_ = NumRestarts();
+    switch (IndexType()) {
+      case BlockBasedTableOptions::kDataBlockBinarySearch:
+        restart_offset_ = static_cast<uint32_t>(size_) -
+                          (1 + num_restarts_) * sizeof(uint32_t);
+        if (restart_offset_ > size_ - sizeof(uint32_t)) {
+          // The size is too small for NumRestarts() and therefore
+          // restart_offset_ wrapped around.
+          size_ = 0;
+        }
+        break;
+      case BlockBasedTableOptions::kDataBlockBinaryAndHash:
+        if (size_ < sizeof(uint32_t) /* block footer */ +
+                        sizeof(uint16_t) /* NUM_BUCK */) {
+          size_ = 0;
+          break;
+        }
+
+        uint16_t map_offset;
+        data_block_hash_index_.Initialize(
+            contents.data.data(),
+            static_cast<uint16_t>(contents.data.size() -
+                                  sizeof(uint32_t)), /*chop off
+                                                 NUM_RESTARTS*/
+            &map_offset);
+
+        restart_offset_ = map_offset - num_restarts_ * sizeof(uint32_t);
+
+        if (restart_offset_ > map_offset) {
+          // map_offset is too small for NumRestarts() and
+          // therefore restart_offset_ wrapped around.
+          size_ = 0;
+          break;
+        }
+        break;
+      default:
+        size_ = 0;  // Error marker
+    }
+  }
+  if (read_amp_bytes_per_bit != 0 && statistics && size_ != 0) {
+    read_amp_bitmap_.reset(new BlockReadAmpBitmap(
+        restart_offset_, read_amp_bytes_per_bit, statistics));
+  }
+}
+
+void Block::InitializeDataBlockProtectionInfo(uint8_t protection_bytes_per_key,
+                                              const Comparator* raw_ucmp) {
+  protection_bytes_per_key_ = 0;
+  if (protection_bytes_per_key > 0 && num_restarts_ > 0) {
+    // NewDataIterator() is called with protection_bytes_per_key_ = 0.
+    // This is intended since checksum is not constructed yet.
+    //
+    // We do not know global_seqno yet, so checksum computation and
+    // verification all assume global_seqno = 0.
+    std::unique_ptr<DataBlockIter> iter{NewDataIterator(
+        raw_ucmp, kDisableGlobalSequenceNumber, nullptr /* iter */,
+        nullptr /* stats */, true /* block_contents_pinned */)};
+    if (iter->status().ok()) {
+      block_restart_interval_ = iter->GetRestartInterval();
+    }
+    uint32_t num_keys = 0;
+    if (iter->status().ok()) {
+      num_keys = iter->NumberOfKeys(block_restart_interval_);
+    }
+    if (iter->status().ok()) {
+      checksum_size_ = num_keys * protection_bytes_per_key;
+      kv_checksum_ = new char[(size_t)checksum_size_];
+      size_t i = 0;
+      iter->SeekToFirst();
+      while (iter->Valid()) {
+        GenerateKVChecksum(kv_checksum_ + i, protection_bytes_per_key,
+                           iter->key(), iter->value());
+        iter->Next();
+        i += protection_bytes_per_key;
+      }
+      assert(!iter->status().ok() || i == num_keys * protection_bytes_per_key);
+    }
+    if (!iter->status().ok()) {
+      size_ = 0;  // Error marker
+      return;
+    }
+    protection_bytes_per_key_ = protection_bytes_per_key;
+  }
+}
+
+void Block::InitializeIndexBlockProtectionInfo(uint8_t protection_bytes_per_key,
+                                               const Comparator* raw_ucmp,
+                                               bool value_is_full,
+                                               bool index_has_first_key) {
+  protection_bytes_per_key_ = 0;
+  if (num_restarts_ > 0 && protection_bytes_per_key > 0) {
+    // Note that `global_seqno` and `key_includes_seq` are hardcoded here. They
+    // do not impact how the index block is parsed. During checksum
+    // construction/verification, we use the entire key buffer from
+    // raw_key_.GetKey() returned by iter->key() as the `key` part of key-value
+    // checksum, and the content of this buffer do not change for different
+    // values of `global_seqno` or `key_includes_seq`.
+    std::unique_ptr<IndexBlockIter> iter{NewIndexIterator(
+        raw_ucmp, kDisableGlobalSequenceNumber /* global_seqno */, nullptr,
+        nullptr /* Statistics */, true /* total_order_seek */,
+        index_has_first_key /* have_first_key */, false /* key_includes_seq */,
+        value_is_full, true /* block_contents_pinned */,
+        nullptr /* prefix_index */)};
+    if (iter->status().ok()) {
+      block_restart_interval_ = iter->GetRestartInterval();
+    }
+    uint32_t num_keys = 0;
+    if (iter->status().ok()) {
+      num_keys = iter->NumberOfKeys(block_restart_interval_);
+    }
+    if (iter->status().ok()) {
+      checksum_size_ = num_keys * protection_bytes_per_key;
+      kv_checksum_ = new char[(size_t)checksum_size_];
+      iter->SeekToFirst();
+      size_t i = 0;
+      while (iter->Valid()) {
+        GenerateKVChecksum(kv_checksum_ + i, protection_bytes_per_key,
+                           iter->key(), iter->raw_value());
+        iter->Next();
+        i += protection_bytes_per_key;
+      }
+      assert(!iter->status().ok() || i == num_keys * protection_bytes_per_key);
+    }
+    if (!iter->status().ok()) {
+      size_ = 0;  // Error marker
+      return;
+    }
+    protection_bytes_per_key_ = protection_bytes_per_key;
+  }
+}
+
+void Block::InitializeMetaIndexBlockProtectionInfo(
+    uint8_t protection_bytes_per_key) {
+  protection_bytes_per_key_ = 0;
+  if (num_restarts_ > 0 && protection_bytes_per_key > 0) {
+    std::unique_ptr<MetaBlockIter> iter{
+        NewMetaIterator(true /* block_contents_pinned */)};
+    if (iter->status().ok()) {
+      block_restart_interval_ = iter->GetRestartInterval();
+    }
+    uint32_t num_keys = 0;
+    if (iter->status().ok()) {
+      num_keys = iter->NumberOfKeys(block_restart_interval_);
+    }
+    if (iter->status().ok()) {
+      checksum_size_ = num_keys * protection_bytes_per_key;
+      kv_checksum_ = new char[(size_t)checksum_size_];
+      iter->SeekToFirst();
+      size_t i = 0;
+      while (iter->Valid()) {
+        GenerateKVChecksum(kv_checksum_ + i, protection_bytes_per_key,
+                           iter->key(), iter->value());
+        iter->Next();
+        i += protection_bytes_per_key;
+      }
+      assert(!iter->status().ok() || i == num_keys * protection_bytes_per_key);
+    }
+    if (!iter->status().ok()) {
+      size_ = 0;  // Error marker
+      return;
+    }
+    protection_bytes_per_key_ = protection_bytes_per_key;
+  }
+}
+
+MetaBlockIter* Block::NewMetaIterator(bool block_contents_pinned) {
+  MetaBlockIter* iter = new MetaBlockIter();
+  if (size_ < 2 * sizeof(uint32_t)) {
+    iter->Invalidate(Status::Corruption("bad block contents"));
+    return iter;
+  } else if (num_restarts_ == 0) {
+    // Empty block.
+    iter->Invalidate(Status::OK());
+  } else {
+    iter->Initialize(data_, restart_offset_, num_restarts_,
+                     block_contents_pinned, protection_bytes_per_key_,
+                     kv_checksum_, block_restart_interval_);
+  }
+  return iter;
+}
+
+DataBlockIter* Block::NewDataIterator(const Comparator* raw_ucmp,
+                                      SequenceNumber global_seqno,
+                                      DataBlockIter* iter, Statistics* stats,
+                                      bool block_contents_pinned) {
+  DataBlockIter* ret_iter;
+  if (iter != nullptr) {
+    ret_iter = iter;
+  } else {
+    ret_iter = new DataBlockIter;
+  }
+  if (size_ < 2 * sizeof(uint32_t)) {
+    ret_iter->Invalidate(Status::Corruption("bad block contents"));
+    return ret_iter;
+  }
+  if (num_restarts_ == 0) {
+    // Empty block.
+    ret_iter->Invalidate(Status::OK());
+    return ret_iter;
+  } else {
+    ret_iter->Initialize(
+        raw_ucmp, data_, restart_offset_, num_restarts_, global_seqno,
+        read_amp_bitmap_.get(), block_contents_pinned,
+        data_block_hash_index_.Valid() ? &data_block_hash_index_ : nullptr,
+        protection_bytes_per_key_, kv_checksum_, block_restart_interval_);
+    if (read_amp_bitmap_) {
+      if (read_amp_bitmap_->GetStatistics() != stats) {
+        // DB changed the Statistics pointer, we need to notify read_amp_bitmap_
+        read_amp_bitmap_->SetStatistics(stats);
+      }
+    }
+  }
+
+  return ret_iter;
+}
+
+IndexBlockIter* Block::NewIndexIterator(
+    const Comparator* raw_ucmp, SequenceNumber global_seqno,
+    IndexBlockIter* iter, Statistics* /*stats*/, bool total_order_seek,
+    bool have_first_key, bool key_includes_seq, bool value_is_full,
+    bool block_contents_pinned, BlockPrefixIndex* prefix_index) {
+  IndexBlockIter* ret_iter;
+  if (iter != nullptr) {
+    ret_iter = iter;
+  } else {
+    ret_iter = new IndexBlockIter;
+  }
+  if (size_ < 2 * sizeof(uint32_t)) {
+    ret_iter->Invalidate(Status::Corruption("bad block contents"));
+    return ret_iter;
+  }
+  if (num_restarts_ == 0) {
+    // Empty block.
+    ret_iter->Invalidate(Status::OK());
+    return ret_iter;
+  } else {
+    BlockPrefixIndex* prefix_index_ptr =
+        total_order_seek ? nullptr : prefix_index;
+    ret_iter->Initialize(raw_ucmp, data_, restart_offset_, num_restarts_,
+                         global_seqno, prefix_index_ptr, have_first_key,
+                         key_includes_seq, value_is_full, block_contents_pinned,
+                         protection_bytes_per_key_, kv_checksum_,
+                         block_restart_interval_);
+  }
+
+  return ret_iter;
+}
+
+size_t Block::ApproximateMemoryUsage() const {
+  size_t usage = usable_size();
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+  usage += malloc_usable_size((void*)this);
+#else
+  usage += sizeof(*this);
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+  if (read_amp_bitmap_) {
+    usage += read_amp_bitmap_->ApproximateMemoryUsage();
+  }
+  usage += checksum_size_;
+  return usage;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block.h
new file mode 100644
index 0000000000..68b6906fac
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block.h
@@ -0,0 +1,921 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "db/kv_checksum.h"
+#include "db/pinned_iterators_manager.h"
+#include "port/malloc.h"
+#include "rocksdb/advanced_cache.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/table.h"
+#include "table/block_based/block_prefix_index.h"
+#include "table/block_based/data_block_hash_index.h"
+#include "table/format.h"
+#include "table/internal_iterator.h"
+#include "test_util/sync_point.h"
+#include "util/random.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct BlockContents;
+class Comparator;
+template <class TValue>
+class BlockIter;
+class DataBlockIter;
+class IndexBlockIter;
+class MetaBlockIter;
+class BlockPrefixIndex;
+
+// BlockReadAmpBitmap is a bitmap that map the ROCKSDB_NAMESPACE::Block data
+// bytes to a bitmap with ratio bytes_per_bit. Whenever we access a range of
+// bytes in the Block we update the bitmap and increment
+// READ_AMP_ESTIMATE_USEFUL_BYTES.
+class BlockReadAmpBitmap {
+ public:
+  explicit BlockReadAmpBitmap(size_t block_size, size_t bytes_per_bit,
+                              Statistics* statistics)
+      : bitmap_(nullptr),
+        bytes_per_bit_pow_(0),
+        statistics_(statistics),
+        rnd_(Random::GetTLSInstance()->Uniform(
+            static_cast<int>(bytes_per_bit))) {
+    TEST_SYNC_POINT_CALLBACK("BlockReadAmpBitmap:rnd", &rnd_);
+    assert(block_size > 0 && bytes_per_bit > 0);
+
+    // convert bytes_per_bit to be a power of 2
+    while (bytes_per_bit >>= 1) {
+      bytes_per_bit_pow_++;
+    }
+
+    // num_bits_needed = ceil(block_size / bytes_per_bit)
+    size_t num_bits_needed = ((block_size - 1) >> bytes_per_bit_pow_) + 1;
+    assert(num_bits_needed > 0);
+
+    // bitmap_size = ceil(num_bits_needed / kBitsPerEntry)
+    size_t bitmap_size = (num_bits_needed - 1) / kBitsPerEntry + 1;
+
+    // Create bitmap and set all the bits to 0
+    bitmap_ = new std::atomic<uint32_t>[bitmap_size]();
+
+    RecordTick(GetStatistics(), READ_AMP_TOTAL_READ_BYTES, block_size);
+  }
+
+  ~BlockReadAmpBitmap() { delete[] bitmap_; }
+
+  void Mark(uint32_t start_offset, uint32_t end_offset) {
+    assert(end_offset >= start_offset);
+    // Index of first bit in mask
+    uint32_t start_bit =
+        (start_offset + (1 << bytes_per_bit_pow_) - rnd_ - 1) >>
+        bytes_per_bit_pow_;
+    // Index of last bit in mask + 1
+    uint32_t exclusive_end_bit =
+        (end_offset + (1 << bytes_per_bit_pow_) - rnd_) >> bytes_per_bit_pow_;
+    if (start_bit >= exclusive_end_bit) {
+      return;
+    }
+    assert(exclusive_end_bit > 0);
+
+    if (GetAndSet(start_bit) == 0) {
+      uint32_t new_useful_bytes = (exclusive_end_bit - start_bit)
+                                  << bytes_per_bit_pow_;
+      RecordTick(GetStatistics(), READ_AMP_ESTIMATE_USEFUL_BYTES,
+                 new_useful_bytes);
+    }
+  }
+
+  Statistics* GetStatistics() {
+    return statistics_.load(std::memory_order_relaxed);
+  }
+
+  void SetStatistics(Statistics* stats) { statistics_.store(stats); }
+
+  uint32_t GetBytesPerBit() { return 1 << bytes_per_bit_pow_; }
+
+  size_t ApproximateMemoryUsage() const {
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+    return malloc_usable_size((void*)this);
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+    return sizeof(*this);
+  }
+
+ private:
+  // Get the current value of bit at `bit_idx` and set it to 1
+  inline bool GetAndSet(uint32_t bit_idx) {
+    const uint32_t byte_idx = bit_idx / kBitsPerEntry;
+    const uint32_t bit_mask = 1 << (bit_idx % kBitsPerEntry);
+
+    return bitmap_[byte_idx].fetch_or(bit_mask, std::memory_order_relaxed) &
+           bit_mask;
+  }
+
+  const uint32_t kBytesPersEntry = sizeof(uint32_t);   // 4 bytes
+  const uint32_t kBitsPerEntry = kBytesPersEntry * 8;  // 32 bits
+
+  // Bitmap used to record the bytes that we read, use atomic to protect
+  // against multiple threads updating the same bit
+  std::atomic<uint32_t>* bitmap_;
+  // (1 << bytes_per_bit_pow_) is bytes_per_bit. Use power of 2 to optimize
+  // muliplication and division
+  uint8_t bytes_per_bit_pow_;
+  // Pointer to DB Statistics object, Since this bitmap may outlive the DB
+  // this pointer maybe invalid, but the DB will update it to a valid pointer
+  // by using SetStatistics() before calling Mark()
+  std::atomic<Statistics*> statistics_;
+  uint32_t rnd_;
+};
+
+// class Block is the uncompressed and "parsed" form for blocks containing
+// key-value pairs. (See BlockContents comments for more on terminology.)
+// This includes the in-memory representation of data blocks, index blocks
+// (including partitions), range deletion blocks, properties blocks, metaindex
+// blocks, as well as the top level of the partitioned filter structure (which
+// is actually an index of the filter partitions). It is NOT suitable for
+// compressed blocks in general, filter blocks/partitions, or compression
+// dictionaries.
+//
+// See https://github.com/facebook/rocksdb/wiki/Rocksdb-BlockBasedTable-Format
+// for details of the format and the various block types.
+//
+// TODO: Rename to ParsedKvBlock?
+class Block {
+ public:
+  // Initialize the block with the specified contents.
+  explicit Block(BlockContents&& contents, size_t read_amp_bytes_per_bit = 0,
+                 Statistics* statistics = nullptr);
+  // No copying allowed
+  Block(const Block&) = delete;
+  void operator=(const Block&) = delete;
+
+  ~Block();
+
+  size_t size() const { return size_; }
+  const char* data() const { return data_; }
+  // The additional memory space taken by the block data.
+  size_t usable_size() const { return contents_.usable_size(); }
+  uint32_t NumRestarts() const;
+  bool own_bytes() const { return contents_.own_bytes(); }
+
+  BlockBasedTableOptions::DataBlockIndexType IndexType() const;
+
+  // raw_ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key
+  // comparator.
+  //
+  // If iter is null, return new Iterator
+  // If iter is not null, update this one and return it as Iterator*
+  //
+  // Updates read_amp_bitmap_ if it is not nullptr.
+  //
+  // If `block_contents_pinned` is true, the caller will guarantee that when
+  // the cleanup functions are transferred from the iterator to other
+  // classes, e.g. PinnableSlice, the pointer to the bytes will still be
+  // valid. Either the iterator holds cache handle or ownership of some resource
+  // and release them in a release function, or caller is sure that the data
+  // will not go away (for example, it's from mmapped file which will not be
+  // closed).
+  //
+  // NOTE: for the hash based lookup, if a key prefix doesn't match any key,
+  // the iterator will simply be set as "invalid", rather than returning
+  // the key that is just pass the target key.
+  DataBlockIter* NewDataIterator(const Comparator* raw_ucmp,
+                                 SequenceNumber global_seqno,
+                                 DataBlockIter* iter = nullptr,
+                                 Statistics* stats = nullptr,
+                                 bool block_contents_pinned = false);
+
+  // Returns an MetaBlockIter for iterating over blocks containing metadata
+  // (like Properties blocks).  Unlike data blocks, the keys for these blocks
+  // do not contain sequence numbers, do not use a user-define comparator, and
+  // do not track read amplification/statistics.  Additionally, MetaBlocks will
+  // not assert if the block is formatted improperly.
+  //
+  // If `block_contents_pinned` is true, the caller will guarantee that when
+  // the cleanup functions are transferred from the iterator to other
+  // classes, e.g. PinnableSlice, the pointer to the bytes will still be
+  // valid. Either the iterator holds cache handle or ownership of some resource
+  // and release them in a release function, or caller is sure that the data
+  // will not go away (for example, it's from mmapped file which will not be
+  // closed).
+  MetaBlockIter* NewMetaIterator(bool block_contents_pinned = false);
+
+  // raw_ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key
+  // comparator.
+  //
+  // key_includes_seq, default true, means that the keys are in internal key
+  // format.
+  // value_is_full, default true, means that no delta encoding is
+  // applied to values.
+  //
+  // If `prefix_index` is not nullptr this block will do hash lookup for the key
+  // prefix. If total_order_seek is true, prefix_index_ is ignored.
+  //
+  // `have_first_key` controls whether IndexValue will contain
+  // first_internal_key. It affects data serialization format, so the same value
+  // have_first_key must be used when writing and reading index.
+  // It is determined by IndexType property of the table.
+  IndexBlockIter* NewIndexIterator(const Comparator* raw_ucmp,
+                                   SequenceNumber global_seqno,
+                                   IndexBlockIter* iter, Statistics* stats,
+                                   bool total_order_seek, bool have_first_key,
+                                   bool key_includes_seq, bool value_is_full,
+                                   bool block_contents_pinned = false,
+                                   BlockPrefixIndex* prefix_index = nullptr);
+
+  // Report an approximation of how much memory has been used.
+  size_t ApproximateMemoryUsage() const;
+
+  // For TypedCacheInterface
+  const Slice& ContentSlice() const { return contents_.data; }
+
+  // Initializes per key-value checksum protection.
+  // After this method is called, each DataBlockIterator returned
+  // by NewDataIterator will verify per key-value checksum for any key it read.
+  void InitializeDataBlockProtectionInfo(uint8_t protection_bytes_per_key,
+                                         const Comparator* raw_ucmp);
+
+  // Initializes per key-value checksum protection.
+  // After this method is called, each IndexBlockIterator returned
+  // by NewIndexIterator will verify per key-value checksum for any key it read.
+  // value_is_full and index_has_first_key are needed to be able to parse
+  // the index block content and construct checksums.
+  void InitializeIndexBlockProtectionInfo(uint8_t protection_bytes_per_key,
+                                          const Comparator* raw_ucmp,
+                                          bool value_is_full,
+                                          bool index_has_first_key);
+
+  // Initializes per key-value checksum protection.
+  // After this method is called, each MetaBlockIter returned
+  // by NewMetaIterator will verify per key-value checksum for any key it read.
+  void InitializeMetaIndexBlockProtectionInfo(uint8_t protection_bytes_per_key);
+
+  static void GenerateKVChecksum(char* checksum_ptr, uint8_t checksum_len,
+                                 const Slice& key, const Slice& value) {
+    ProtectionInfo64().ProtectKV(key, value).Encode(checksum_len, checksum_ptr);
+  }
+
+  const char* TEST_GetKVChecksum() const { return kv_checksum_; }
+
+ private:
+  BlockContents contents_;
+  const char* data_;         // contents_.data.data()
+  size_t size_;              // contents_.data.size()
+  uint32_t restart_offset_;  // Offset in data_ of restart array
+  uint32_t num_restarts_;
+  std::unique_ptr<BlockReadAmpBitmap> read_amp_bitmap_;
+  char* kv_checksum_{nullptr};
+  uint32_t checksum_size_{0};
+  // Used by block iterators to calculate current key index within a block
+  uint32_t block_restart_interval_{0};
+  uint8_t protection_bytes_per_key_{0};
+  DataBlockHashIndex data_block_hash_index_;
+};
+
+// A `BlockIter` iterates over the entries in a `Block`'s data buffer. The
+// format of this data buffer is an uncompressed, sorted sequence of key-value
+// pairs (see `Block` API for more details).
+//
+// Notably, the keys may either be in internal key format or user key format.
+// Subclasses are responsible for configuring the key format.
+//
+// `BlockIter` intends to provide final overrides for all of
+// `InternalIteratorBase` functions that can move the iterator. It does
+// this to guarantee `UpdateKey()` is called exactly once after each key
+// movement potentially visible to users. In this step, the key is prepared
+// (e.g., serialized if global seqno is in effect) so it can be returned
+// immediately when the user asks for it via calling `key() const`.
+//
+// For its subclasses, it provides protected variants of the above-mentioned
+// final-overridden methods. They are named with the "Impl" suffix, e.g.,
+// `Seek()` logic would be implemented by subclasses in `SeekImpl()`. These
+// "Impl" functions are responsible for positioning `raw_key_` but not
+// invoking `UpdateKey()`.
+//
+// Per key-value checksum is enabled if relevant states are passed in during
+// `InitializeBase()`. The checksum verification is done in each call to
+// UpdateKey() for the current key. Each subclass is responsible for keeping
+// track of cur_entry_idx_, the index of the current key within the block.
+// BlockIter uses this index to get the corresponding checksum for current key.
+// Additional checksum verification may be done in subclasses if they read keys
+// other than the key being processed in UpdateKey().
+template <class TValue>
+class BlockIter : public InternalIteratorBase<TValue> {
+ public:
+  // Makes Valid() return false, status() return `s`, and Seek()/Prev()/etc do
+  // nothing. Calls cleanup functions.
+  virtual void Invalidate(const Status& s) {
+    // Assert that the BlockIter is never deleted while Pinning is Enabled.
+    assert(!pinned_iters_mgr_ || !pinned_iters_mgr_->PinningEnabled());
+
+    data_ = nullptr;
+    current_ = restarts_;
+    status_ = s;
+
+    // Call cleanup callbacks.
+    Cleanable::Reset();
+  }
+
+  bool Valid() const override {
+    // When status_ is not ok, iter should be invalid.
+    assert(status_.ok() || current_ >= restarts_);
+    return current_ < restarts_;
+  }
+
+  virtual void SeekToFirst() override final {
+#ifndef NDEBUG
+    if (TEST_Corrupt_Callback("BlockIter::SeekToFirst")) return;
+#endif
+    SeekToFirstImpl();
+    UpdateKey();
+  }
+
+  virtual void SeekToLast() override final {
+    SeekToLastImpl();
+    UpdateKey();
+  }
+
+  virtual void Seek(const Slice& target) override final {
+    SeekImpl(target);
+    UpdateKey();
+  }
+
+  virtual void SeekForPrev(const Slice& target) override final {
+    SeekForPrevImpl(target);
+    UpdateKey();
+  }
+
+  virtual void Next() override final {
+    NextImpl();
+    UpdateKey();
+  }
+
+  virtual bool NextAndGetResult(IterateResult* result) override final {
+    // This does not need to call `UpdateKey()` as the parent class only has
+    // access to the `UpdateKey()`-invoking functions.
+    return InternalIteratorBase<TValue>::NextAndGetResult(result);
+  }
+
+  virtual void Prev() override final {
+    PrevImpl();
+    UpdateKey();
+  }
+
+  Status status() const override { return status_; }
+
+  Slice key() const override {
+    assert(Valid());
+    return key_;
+  }
+
+#ifndef NDEBUG
+  ~BlockIter() override {
+    // Assert that the BlockIter is never deleted while Pinning is Enabled.
+    assert(!pinned_iters_mgr_ ||
+           (pinned_iters_mgr_ && !pinned_iters_mgr_->PinningEnabled()));
+    status_.PermitUncheckedError();
+  }
+
+  void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
+    pinned_iters_mgr_ = pinned_iters_mgr;
+  }
+
+  PinnedIteratorsManager* pinned_iters_mgr_ = nullptr;
+
+  bool TEST_Corrupt_Callback(const std::string& sync_point) {
+    bool corrupt = false;
+    TEST_SYNC_POINT_CALLBACK(sync_point, static_cast<void*>(&corrupt));
+
+    if (corrupt) {
+      CorruptionError();
+    }
+    return corrupt;
+  }
+#endif
+
+  bool IsKeyPinned() const override {
+    return block_contents_pinned_ && key_pinned_;
+  }
+
+  bool IsValuePinned() const override { return block_contents_pinned_; }
+
+  size_t TEST_CurrentEntrySize() { return NextEntryOffset() - current_; }
+
+  uint32_t ValueOffset() const {
+    return static_cast<uint32_t>(value_.data() - data_);
+  }
+
+  void SetCacheHandle(Cache::Handle* handle) { cache_handle_ = handle; }
+
+  Cache::Handle* cache_handle() { return cache_handle_; }
+
+ protected:
+  std::unique_ptr<InternalKeyComparator> icmp_;
+  const char* data_;       // underlying block contents
+  uint32_t num_restarts_;  // Number of uint32_t entries in restart array
+
+  // Index of restart block in which current_ or current_-1 falls
+  uint32_t restart_index_;
+  uint32_t restarts_;  // Offset of restart array (list of fixed32)
+  // current_ is offset in data_ of current entry.  >= restarts_ if !Valid
+  uint32_t current_;
+  // Raw key from block.
+  IterKey raw_key_;
+  // Buffer for key data when global seqno assignment is enabled.
+  IterKey key_buf_;
+  Slice value_;
+  Status status_;
+  // Key to be exposed to users.
+  Slice key_;
+  SequenceNumber global_seqno_;
+
+  // Per key-value checksum related states
+  const char* kv_checksum_;
+  int32_t cur_entry_idx_;
+  uint32_t block_restart_interval_;
+  uint8_t protection_bytes_per_key_;
+
+  bool key_pinned_;
+  // Whether the block data is guaranteed to outlive this iterator, and
+  // as long as the cleanup functions are transferred to another class,
+  // e.g. PinnableSlice, the pointer to the bytes will still be valid.
+  bool block_contents_pinned_;
+
+  virtual void SeekToFirstImpl() = 0;
+  virtual void SeekToLastImpl() = 0;
+  virtual void SeekImpl(const Slice& target) = 0;
+  virtual void SeekForPrevImpl(const Slice& target) = 0;
+  virtual void NextImpl() = 0;
+  virtual void PrevImpl() = 0;
+
+  // Returns the restart interval of this block.
+  // Returns 0 if num_restarts_ <= 1 or if the BlockIter is not initialized.
+  virtual uint32_t GetRestartInterval() {
+    if (num_restarts_ <= 1 || data_ == nullptr) {
+      return 0;
+    }
+    SeekToFirstImpl();
+    uint32_t end_index = GetRestartPoint(1);
+    uint32_t count = 1;
+    while (NextEntryOffset() < end_index && status_.ok()) {
+      assert(Valid());
+      NextImpl();
+      ++count;
+    }
+    return count;
+  }
+
+  // Returns the number of keys in this block.
+  virtual uint32_t NumberOfKeys(uint32_t block_restart_interval) {
+    if (num_restarts_ == 0 || data_ == nullptr) {
+      return 0;
+    }
+    uint32_t count = (num_restarts_ - 1) * block_restart_interval;
+    // Add number of keys from the last restart interval
+    SeekToRestartPoint(num_restarts_ - 1);
+    while (NextEntryOffset() < restarts_ && status_.ok()) {
+      NextImpl();
+      ++count;
+    }
+    return count;
+  }
+
+  // Stores whether the current key has a shared bytes with prev key in
+  // *is_shared.
+  // Sets raw_key_, value_ to the current parsed key and value.
+  // Sets restart_index_ to point to the restart interval that contains
+  // the current key.
+  template <typename DecodeEntryFunc>
+  inline bool ParseNextKey(bool* is_shared);
+
+  // protection_bytes_per_key, kv_checksum, and block_restart_interval
+  // are needed only for per kv checksum verification.
+  void InitializeBase(const Comparator* raw_ucmp, const char* data,
+                      uint32_t restarts, uint32_t num_restarts,
+                      SequenceNumber global_seqno, bool block_contents_pinned,
+                      uint8_t protection_bytes_per_key, const char* kv_checksum,
+                      uint32_t block_restart_interval) {
+    assert(data_ == nullptr);  // Ensure it is called only once
+    assert(num_restarts > 0);  // Ensure the param is valid
+
+    icmp_ = std::make_unique<InternalKeyComparator>(raw_ucmp);
+    data_ = data;
+    restarts_ = restarts;
+    num_restarts_ = num_restarts;
+    current_ = restarts_;
+    restart_index_ = num_restarts_;
+    global_seqno_ = global_seqno;
+    block_contents_pinned_ = block_contents_pinned;
+    cache_handle_ = nullptr;
+    cur_entry_idx_ = -1;
+    protection_bytes_per_key_ = protection_bytes_per_key;
+    kv_checksum_ = kv_checksum;
+    block_restart_interval_ = block_restart_interval;
+    // Checksum related states are either all 0/nullptr or all non-zero.
+    // One exception is when num_restarts == 0, block_restart_interval can be 0
+    // since we are not able to compute it.
+    assert((protection_bytes_per_key == 0 && kv_checksum == nullptr) ||
+           (protection_bytes_per_key > 0 && kv_checksum != nullptr &&
+            (block_restart_interval > 0 || num_restarts == 1)));
+  }
+
+  void CorruptionError(const std::string& error_msg = "bad entry in block") {
+    current_ = restarts_;
+    restart_index_ = num_restarts_;
+    status_ = Status::Corruption(error_msg);
+    raw_key_.Clear();
+    value_.clear();
+  }
+
+  void PerKVChecksumCorruptionError() {
+    std::string error_msg{
+        "Corrupted block entry: per key-value checksum verification "
+        "failed."};
+    error_msg.append(" Offset: " + std::to_string(current_) + ".");
+    error_msg.append(" Entry index: " + std::to_string(cur_entry_idx_) + ".");
+    CorruptionError(error_msg);
+  }
+
+  // Must be called every time a key is found that needs to be returned to user,
+  // and may be called when no key is found (as a no-op). Updates `key_`,
+  // `key_buf_`, and `key_pinned_` with info about the found key.
+  // Per key-value checksum verification is done if available for the key to be
+  // returned. Iterator is invalidated with corruption status if checksum
+  // verification fails.
+  void UpdateKey() {
+    key_buf_.Clear();
+    if (!Valid()) {
+      return;
+    }
+    if (raw_key_.IsUserKey()) {
+      assert(global_seqno_ == kDisableGlobalSequenceNumber);
+      key_ = raw_key_.GetUserKey();
+      key_pinned_ = raw_key_.IsKeyPinned();
+    } else if (global_seqno_ == kDisableGlobalSequenceNumber) {
+      key_ = raw_key_.GetInternalKey();
+      key_pinned_ = raw_key_.IsKeyPinned();
+    } else {
+      key_buf_.SetInternalKey(raw_key_.GetUserKey(), global_seqno_,
+                              ExtractValueType(raw_key_.GetInternalKey()));
+      key_ = key_buf_.GetInternalKey();
+      key_pinned_ = false;
+    }
+    TEST_SYNC_POINT_CALLBACK("BlockIter::UpdateKey::value",
+                             (void*)value_.data());
+    TEST_SYNC_POINT_CALLBACK("Block::VerifyChecksum::checksum_len",
+                             &protection_bytes_per_key_);
+    if (protection_bytes_per_key_ > 0) {
+      if (!ProtectionInfo64()
+               .ProtectKV(raw_key_.GetKey(), value_)
+               .Verify(
+                   protection_bytes_per_key_,
+                   kv_checksum_ + protection_bytes_per_key_ * cur_entry_idx_)) {
+        PerKVChecksumCorruptionError();
+      }
+    }
+  }
+
+  // Returns the result of `Comparator::Compare()`, where the appropriate
+  // comparator is used for the block contents, the LHS argument is the current
+  // key with global seqno applied, and the RHS argument is `other`.
+  int CompareCurrentKey(const Slice& other) {
+    if (raw_key_.IsUserKey()) {
+      assert(global_seqno_ == kDisableGlobalSequenceNumber);
+      return icmp_->user_comparator()->Compare(raw_key_.GetUserKey(), other);
+    } else if (global_seqno_ == kDisableGlobalSequenceNumber) {
+      return icmp_->Compare(raw_key_.GetInternalKey(), other);
+    }
+    return icmp_->Compare(raw_key_.GetInternalKey(), global_seqno_, other,
+                          kDisableGlobalSequenceNumber);
+  }
+
+ private:
+  // Store the cache handle, if the block is cached. We need this since the
+  // only other place the handle is stored is as an argument to the Cleanable
+  // function callback, which is hard to retrieve. When multiple value
+  // PinnableSlices reference the block, they need the cache handle in order
+  // to bump up the ref count
+  Cache::Handle* cache_handle_;
+
+ public:
+  // Return the offset in data_ just past the end of the current entry.
+  inline uint32_t NextEntryOffset() const {
+    // NOTE: We don't support blocks bigger than 2GB
+    return static_cast<uint32_t>((value_.data() + value_.size()) - data_);
+  }
+
+  uint32_t GetRestartPoint(uint32_t index) const {
+    assert(index < num_restarts_);
+    return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t));
+  }
+
+  void SeekToRestartPoint(uint32_t index) {
+    raw_key_.Clear();
+    restart_index_ = index;
+    // current_ will be fixed by ParseNextKey();
+
+    // ParseNextKey() starts at the end of value_, so set value_ accordingly
+    uint32_t offset = GetRestartPoint(index);
+    value_ = Slice(data_ + offset, 0);
+  }
+
+ protected:
+  template <typename DecodeKeyFunc>
+  inline bool BinarySeek(const Slice& target, uint32_t* index,
+                         bool* is_index_key_result);
+
+  // Find the first key in restart interval `index` that is >= `target`.
+  // If there is no such key, iterator is positioned at the first key in
+  // restart interval `index + 1`.
+  // If is_index_key_result is true, it positions the iterator at the first key
+  // in this restart interval.
+  // Per key-value checksum verification is done for all keys scanned
+  // up to but not including the last key (the key that current_ points to
+  // when this function returns). This key's checksum is verified in
+  // UpdateKey().
+  void FindKeyAfterBinarySeek(const Slice& target, uint32_t index,
+                              bool is_index_key_result);
+};
+
+class DataBlockIter final : public BlockIter<Slice> {
+ public:
+  DataBlockIter()
+      : BlockIter(), read_amp_bitmap_(nullptr), last_bitmap_offset_(0) {}
+  void Initialize(const Comparator* raw_ucmp, const char* data,
+                  uint32_t restarts, uint32_t num_restarts,
+                  SequenceNumber global_seqno,
+                  BlockReadAmpBitmap* read_amp_bitmap,
+                  bool block_contents_pinned,
+                  DataBlockHashIndex* data_block_hash_index,
+                  uint8_t protection_bytes_per_key, const char* kv_checksum,
+                  uint32_t block_restart_interval) {
+    InitializeBase(raw_ucmp, data, restarts, num_restarts, global_seqno,
+                   block_contents_pinned, protection_bytes_per_key, kv_checksum,
+                   block_restart_interval);
+    raw_key_.SetIsUserKey(false);
+    read_amp_bitmap_ = read_amp_bitmap;
+    last_bitmap_offset_ = current_ + 1;
+    data_block_hash_index_ = data_block_hash_index;
+  }
+
+  Slice value() const override {
+    assert(Valid());
+    if (read_amp_bitmap_ && current_ < restarts_ &&
+        current_ != last_bitmap_offset_) {
+      read_amp_bitmap_->Mark(current_ /* current entry offset */,
+                             NextEntryOffset() - 1);
+      last_bitmap_offset_ = current_;
+    }
+    return value_;
+  }
+
+  // Returns if `target` may exist.
+  inline bool SeekForGet(const Slice& target) {
+#ifndef NDEBUG
+    if (TEST_Corrupt_Callback("DataBlockIter::SeekForGet")) return true;
+#endif
+    if (!data_block_hash_index_) {
+      SeekImpl(target);
+      UpdateKey();
+      return true;
+    }
+    bool res = SeekForGetImpl(target);
+    UpdateKey();
+    return res;
+  }
+
+  void Invalidate(const Status& s) override {
+    BlockIter::Invalidate(s);
+    // Clear prev entries cache.
+    prev_entries_keys_buff_.clear();
+    prev_entries_.clear();
+    prev_entries_idx_ = -1;
+  }
+
+ protected:
+  friend Block;
+  inline bool ParseNextDataKey(bool* is_shared);
+  void SeekToFirstImpl() override;
+  void SeekToLastImpl() override;
+  void SeekImpl(const Slice& target) override;
+  void SeekForPrevImpl(const Slice& target) override;
+  void NextImpl() override;
+  void PrevImpl() override;
+
+ private:
+  // read-amp bitmap
+  BlockReadAmpBitmap* read_amp_bitmap_;
+  // last `current_` value we report to read-amp bitmp
+  mutable uint32_t last_bitmap_offset_;
+  struct CachedPrevEntry {
+    explicit CachedPrevEntry(uint32_t _offset, const char* _key_ptr,
+                             size_t _key_offset, size_t _key_size, Slice _value)
+        : offset(_offset),
+          key_ptr(_key_ptr),
+          key_offset(_key_offset),
+          key_size(_key_size),
+          value(_value) {}
+
+    // offset of entry in block
+    uint32_t offset;
+    // Pointer to key data in block (nullptr if key is delta-encoded)
+    const char* key_ptr;
+    // offset of key in prev_entries_keys_buff_ (0 if key_ptr is not nullptr)
+    size_t key_offset;
+    // size of key
+    size_t key_size;
+    // value slice pointing to data in block
+    Slice value;
+  };
+  std::string prev_entries_keys_buff_;
+  std::vector<CachedPrevEntry> prev_entries_;
+  int32_t prev_entries_idx_ = -1;
+
+  DataBlockHashIndex* data_block_hash_index_;
+
+  bool SeekForGetImpl(const Slice& target);
+};
+
+// Iterator over MetaBlocks.  MetaBlocks are similar to Data Blocks and
+// are used to store Properties associated with table.
+// Meta blocks always store user keys (no sequence number) and always
+// use the BytewiseComparator.  Additionally, MetaBlock accesses are
+// not recorded in the Statistics or for Read-Amplification.
+class MetaBlockIter final : public BlockIter<Slice> {
+ public:
+  MetaBlockIter() : BlockIter() { raw_key_.SetIsUserKey(true); }
+  void Initialize(const char* data, uint32_t restarts, uint32_t num_restarts,
+                  bool block_contents_pinned, uint8_t protection_bytes_per_key,
+                  const char* kv_checksum, uint32_t block_restart_interval) {
+    // Initializes the iterator with a BytewiseComparator and
+    // the raw key being a user key.
+    InitializeBase(BytewiseComparator(), data, restarts, num_restarts,
+                   kDisableGlobalSequenceNumber, block_contents_pinned,
+                   protection_bytes_per_key, kv_checksum,
+                   block_restart_interval);
+    raw_key_.SetIsUserKey(true);
+  }
+
+  Slice value() const override {
+    assert(Valid());
+    return value_;
+  }
+
+ protected:
+  friend Block;
+  void SeekToFirstImpl() override;
+  void SeekToLastImpl() override;
+  void SeekImpl(const Slice& target) override;
+  void SeekForPrevImpl(const Slice& target) override;
+  void NextImpl() override;
+  void PrevImpl() override;
+  // Meta index block's restart interval is always 1. See
+  // MetaIndexBuilder::MetaIndexBuilder() for hard-coded restart interval.
+  uint32_t GetRestartInterval() override { return 1; }
+  uint32_t NumberOfKeys(uint32_t) override { return num_restarts_; }
+};
+
+class IndexBlockIter final : public BlockIter<IndexValue> {
+ public:
+  IndexBlockIter() : BlockIter(), prefix_index_(nullptr) {}
+
+  // key_includes_seq, default true, means that the keys are in internal key
+  // format.
+  // value_is_full, default true, means that no delta encoding is
+  // applied to values.
+  void Initialize(const Comparator* raw_ucmp, const char* data,
+                  uint32_t restarts, uint32_t num_restarts,
+                  SequenceNumber global_seqno, BlockPrefixIndex* prefix_index,
+                  bool have_first_key, bool key_includes_seq,
+                  bool value_is_full, bool block_contents_pinned,
+                  uint8_t protection_bytes_per_key, const char* kv_checksum,
+                  uint32_t block_restart_interval) {
+    InitializeBase(raw_ucmp, data, restarts, num_restarts,
+                   kDisableGlobalSequenceNumber, block_contents_pinned,
+                   protection_bytes_per_key, kv_checksum,
+                   block_restart_interval);
+    raw_key_.SetIsUserKey(!key_includes_seq);
+    prefix_index_ = prefix_index;
+    value_delta_encoded_ = !value_is_full;
+    have_first_key_ = have_first_key;
+    if (have_first_key_ && global_seqno != kDisableGlobalSequenceNumber) {
+      global_seqno_state_.reset(new GlobalSeqnoState(global_seqno));
+    } else {
+      global_seqno_state_.reset();
+    }
+  }
+
+  Slice user_key() const override {
+    assert(Valid());
+    return raw_key_.GetUserKey();
+  }
+
+  IndexValue value() const override {
+    assert(Valid());
+    if (value_delta_encoded_ || global_seqno_state_ != nullptr) {
+      return decoded_value_;
+    } else {
+      IndexValue entry;
+      Slice v = value_;
+      Status decode_s __attribute__((__unused__)) =
+          entry.DecodeFrom(&v, have_first_key_, nullptr);
+      assert(decode_s.ok());
+      return entry;
+    }
+  }
+
+  Slice raw_value() const {
+    assert(Valid());
+    return value_;
+  }
+
+  bool IsValuePinned() const override {
+    return global_seqno_state_ != nullptr ? false : BlockIter::IsValuePinned();
+  }
+
+ protected:
+  friend Block;
+  // IndexBlockIter follows a different contract for prefix iterator
+  // from data iterators.
+  // If prefix of the seek key `target` exists in the file, it must
+  // return the same result as total order seek.
+  // If the prefix of `target` doesn't exist in the file, it can either
+  // return the result of total order seek, or set both of Valid() = false
+  // and status() = NotFound().
+  void SeekImpl(const Slice& target) override;
+
+  void SeekForPrevImpl(const Slice&) override {
+    assert(false);
+    current_ = restarts_;
+    restart_index_ = num_restarts_;
+    status_ = Status::InvalidArgument(
+        "RocksDB internal error: should never call SeekForPrev() on index "
+        "blocks");
+    raw_key_.Clear();
+    value_.clear();
+  }
+
+  void PrevImpl() override;
+  void NextImpl() override;
+  void SeekToFirstImpl() override;
+  void SeekToLastImpl() override;
+
+ private:
+  bool value_delta_encoded_;
+  bool have_first_key_;  // value includes first_internal_key
+  BlockPrefixIndex* prefix_index_;
+  // Whether the value is delta encoded. In that case the value is assumed to be
+  // BlockHandle. The first value in each restart interval is the full encoded
+  // BlockHandle; the restart of encoded size part of the BlockHandle. The
+  // offset of delta encoded BlockHandles is computed by adding the size of
+  // previous delta encoded values in the same restart interval to the offset of
+  // the first value in that restart interval.
+  IndexValue decoded_value_;
+
+  // When sequence number overwriting is enabled, this struct contains the seqno
+  // to overwrite with, and current first_internal_key with overwritten seqno.
+  // This is rarely used, so we put it behind a pointer and only allocate when
+  // needed.
+  struct GlobalSeqnoState {
+    // First internal key according to current index entry, but with sequence
+    // number overwritten to global_seqno.
+    IterKey first_internal_key;
+    SequenceNumber global_seqno;
+
+    explicit GlobalSeqnoState(SequenceNumber seqno) : global_seqno(seqno) {}
+  };
+
+  std::unique_ptr<GlobalSeqnoState> global_seqno_state_;
+
+  // Set *prefix_may_exist to false if no key possibly share the same prefix
+  // as `target`. If not set, the result position should be the same as total
+  // order Seek.
+  bool PrefixSeek(const Slice& target, uint32_t* index, bool* prefix_may_exist);
+  // Set *prefix_may_exist to false if no key can possibly share the same
+  // prefix as `target`. If not set, the result position should be the same
+  // as total order seek.
+  bool BinaryBlockIndexSeek(const Slice& target, uint32_t* block_ids,
+                            uint32_t left, uint32_t right, uint32_t* index,
+                            bool* prefix_may_exist);
+  inline int CompareBlockKey(uint32_t block_index, const Slice& target);
+
+  inline bool ParseNextIndexKey();
+
+  // When value_delta_encoded_ is enabled it decodes the value which is assumed
+  // to be BlockHandle and put it to decoded_value_
+  inline void DecodeCurrentValue(bool is_shared);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_builder.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_builder.cc
new file mode 100644
index 0000000000..ee8865a50d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_builder.cc
@@ -0,0 +1,2027 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/block_based/block_based_table_builder.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+#include <atomic>
+#include <list>
+#include <map>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <unordered_map>
+#include <utility>
+
+#include "block_cache.h"
+#include "cache/cache_entry_roles.h"
+#include "cache/cache_helpers.h"
+#include "cache/cache_key.h"
+#include "cache/cache_reservation_manager.h"
+#include "db/dbformat.h"
+#include "index_builder.h"
+#include "logging/logging.h"
+#include "memory/memory_allocator_impl.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/env.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/flush_block_policy.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/table.h"
+#include "rocksdb/types.h"
+#include "table/block_based/block.h"
+#include "table/block_based/block_based_table_factory.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "table/block_based/block_builder.h"
+#include "table/block_based/filter_block.h"
+#include "table/block_based/filter_policy_internal.h"
+#include "table/block_based/full_filter_block.h"
+#include "table/block_based/partitioned_filter_block.h"
+#include "table/format.h"
+#include "table/meta_blocks.h"
+#include "table/table_builder.h"
+#include "util/coding.h"
+#include "util/compression.h"
+#include "util/stop_watch.h"
+#include "util/string_util.h"
+#include "util/work_queue.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+extern const std::string kHashIndexPrefixesBlock;
+extern const std::string kHashIndexPrefixesMetadataBlock;
+
+// Without anonymous namespace here, we fail the warning -Wmissing-prototypes
+namespace {
+
+constexpr size_t kBlockTrailerSize = BlockBasedTable::kBlockTrailerSize;
+
+// Create a filter block builder based on its type.
+FilterBlockBuilder* CreateFilterBlockBuilder(
+    const ImmutableCFOptions& /*opt*/, const MutableCFOptions& mopt,
+    const FilterBuildingContext& context,
+    const bool use_delta_encoding_for_index_values,
+    PartitionedIndexBuilder* const p_index_builder) {
+  const BlockBasedTableOptions& table_opt = context.table_options;
+  assert(table_opt.filter_policy);  // precondition
+
+  FilterBitsBuilder* filter_bits_builder =
+      BloomFilterPolicy::GetBuilderFromContext(context);
+  if (filter_bits_builder == nullptr) {
+    return nullptr;
+  } else {
+    if (table_opt.partition_filters) {
+      assert(p_index_builder != nullptr);
+      // Since after partition cut request from filter builder it takes time
+      // until index builder actully cuts the partition, until the end of a
+      // data block potentially with many keys, we take the lower bound as
+      // partition size.
+      assert(table_opt.block_size_deviation <= 100);
+      auto partition_size =
+          static_cast<uint32_t>(((table_opt.metadata_block_size *
+                                  (100 - table_opt.block_size_deviation)) +
+                                 99) /
+                                100);
+      partition_size = std::max(partition_size, static_cast<uint32_t>(1));
+      return new PartitionedFilterBlockBuilder(
+          mopt.prefix_extractor.get(), table_opt.whole_key_filtering,
+          filter_bits_builder, table_opt.index_block_restart_interval,
+          use_delta_encoding_for_index_values, p_index_builder, partition_size);
+    } else {
+      return new FullFilterBlockBuilder(mopt.prefix_extractor.get(),
+                                        table_opt.whole_key_filtering,
+                                        filter_bits_builder);
+    }
+  }
+}
+
+bool GoodCompressionRatio(size_t compressed_size, size_t uncomp_size,
+                          int max_compressed_bytes_per_kb) {
+  // For efficiency, avoid floating point and division
+  return compressed_size <=
+         (static_cast<uint64_t>(max_compressed_bytes_per_kb) * uncomp_size) >>
+         10;
+}
+
+}  // namespace
+
+// format_version is the block format as defined in include/rocksdb/table.h
+Slice CompressBlock(const Slice& uncompressed_data, const CompressionInfo& info,
+                    CompressionType* type, uint32_t format_version,
+                    bool allow_sample, std::string* compressed_output,
+                    std::string* sampled_output_fast,
+                    std::string* sampled_output_slow) {
+  assert(type);
+  assert(compressed_output);
+  assert(compressed_output->empty());
+
+  // If requested, we sample one in every N block with a
+  // fast and slow compression algorithm and report the stats.
+  // The users can use these stats to decide if it is worthwhile
+  // enabling compression and they also get a hint about which
+  // compression algorithm wil be beneficial.
+  if (allow_sample && info.SampleForCompression() &&
+      Random::GetTLSInstance()->OneIn(
+          static_cast<int>(info.SampleForCompression()))) {
+    // Sampling with a fast compression algorithm
+    if (sampled_output_fast && (LZ4_Supported() || Snappy_Supported())) {
+      CompressionType c =
+          LZ4_Supported() ? kLZ4Compression : kSnappyCompression;
+      CompressionContext context(c);
+      CompressionOptions options;
+      CompressionInfo info_tmp(options, context,
+                               CompressionDict::GetEmptyDict(), c,
+                               info.SampleForCompression());
+
+      CompressData(uncompressed_data, info_tmp,
+                   GetCompressFormatForVersion(format_version),
+                   sampled_output_fast);
+    }
+
+    // Sampling with a slow but high-compression algorithm
+    if (sampled_output_slow && (ZSTD_Supported() || Zlib_Supported())) {
+      CompressionType c = ZSTD_Supported() ? kZSTD : kZlibCompression;
+      CompressionContext context(c);
+      CompressionOptions options;
+      CompressionInfo info_tmp(options, context,
+                               CompressionDict::GetEmptyDict(), c,
+                               info.SampleForCompression());
+
+      CompressData(uncompressed_data, info_tmp,
+                   GetCompressFormatForVersion(format_version),
+                   sampled_output_slow);
+    }
+  }
+
+  int max_compressed_bytes_per_kb = info.options().max_compressed_bytes_per_kb;
+  if (info.type() == kNoCompression || max_compressed_bytes_per_kb <= 0) {
+    *type = kNoCompression;
+    return uncompressed_data;
+  }
+
+  // Actually compress the data; if the compression method is not supported,
+  // or the compression fails etc., just fall back to uncompressed
+  if (!CompressData(uncompressed_data, info,
+                    GetCompressFormatForVersion(format_version),
+                    compressed_output)) {
+    *type = kNoCompression;
+    return uncompressed_data;
+  }
+
+  // Check the compression ratio; if it's not good enough, just fall back to
+  // uncompressed
+  if (!GoodCompressionRatio(compressed_output->size(), uncompressed_data.size(),
+                            max_compressed_bytes_per_kb)) {
+    *type = kNoCompression;
+    return uncompressed_data;
+  }
+
+  *type = info.type();
+  return *compressed_output;
+}
+
+// kBlockBasedTableMagicNumber was picked by running
+//    echo rocksdb.table.block_based | sha1sum
+// and taking the leading 64 bits.
+// Please note that kBlockBasedTableMagicNumber may also be accessed by other
+// .cc files
+// for that reason we declare it extern in the header but to get the space
+// allocated
+// it must be not extern in one place.
+const uint64_t kBlockBasedTableMagicNumber = 0x88e241b785f4cff7ull;
+// We also support reading and writing legacy block based table format (for
+// backwards compatibility)
+const uint64_t kLegacyBlockBasedTableMagicNumber = 0xdb4775248b80fb57ull;
+
+// A collector that collects properties of interest to block-based table.
+// For now this class looks heavy-weight since we only write one additional
+// property.
+// But in the foreseeable future, we will add more and more properties that are
+// specific to block-based table.
+class BlockBasedTableBuilder::BlockBasedTablePropertiesCollector
+    : public IntTblPropCollector {
+ public:
+  explicit BlockBasedTablePropertiesCollector(
+      BlockBasedTableOptions::IndexType index_type, bool whole_key_filtering,
+      bool prefix_filtering)
+      : index_type_(index_type),
+        whole_key_filtering_(whole_key_filtering),
+        prefix_filtering_(prefix_filtering) {}
+
+  Status InternalAdd(const Slice& /*key*/, const Slice& /*value*/,
+                     uint64_t /*file_size*/) override {
+    // Intentionally left blank. Have no interest in collecting stats for
+    // individual key/value pairs.
+    return Status::OK();
+  }
+
+  virtual void BlockAdd(uint64_t /* block_uncomp_bytes */,
+                        uint64_t /* block_compressed_bytes_fast */,
+                        uint64_t /* block_compressed_bytes_slow */) override {
+    // Intentionally left blank. No interest in collecting stats for
+    // blocks.
+    return;
+  }
+
+  Status Finish(UserCollectedProperties* properties) override {
+    std::string val;
+    PutFixed32(&val, static_cast<uint32_t>(index_type_));
+    properties->insert({BlockBasedTablePropertyNames::kIndexType, val});
+    properties->insert({BlockBasedTablePropertyNames::kWholeKeyFiltering,
+                        whole_key_filtering_ ? kPropTrue : kPropFalse});
+    properties->insert({BlockBasedTablePropertyNames::kPrefixFiltering,
+                        prefix_filtering_ ? kPropTrue : kPropFalse});
+    return Status::OK();
+  }
+
+  // The name of the properties collector can be used for debugging purpose.
+  const char* Name() const override {
+    return "BlockBasedTablePropertiesCollector";
+  }
+
+  UserCollectedProperties GetReadableProperties() const override {
+    // Intentionally left blank.
+    return UserCollectedProperties();
+  }
+
+ private:
+  BlockBasedTableOptions::IndexType index_type_;
+  bool whole_key_filtering_;
+  bool prefix_filtering_;
+};
+
+struct BlockBasedTableBuilder::Rep {
+  const ImmutableOptions ioptions;
+  const MutableCFOptions moptions;
+  const BlockBasedTableOptions table_options;
+  const InternalKeyComparator& internal_comparator;
+  WritableFileWriter* file;
+  std::atomic<uint64_t> offset;
+  size_t alignment;
+  BlockBuilder data_block;
+  // Buffers uncompressed data blocks to replay later. Needed when
+  // compression dictionary is enabled so we can finalize the dictionary before
+  // compressing any data blocks.
+  std::vector<std::string> data_block_buffers;
+  BlockBuilder range_del_block;
+
+  InternalKeySliceTransform internal_prefix_transform;
+  std::unique_ptr<IndexBuilder> index_builder;
+  PartitionedIndexBuilder* p_index_builder_ = nullptr;
+
+  std::string last_key;
+  const Slice* first_key_in_next_block = nullptr;
+  CompressionType compression_type;
+  uint64_t sample_for_compression;
+  std::atomic<uint64_t> compressible_input_data_bytes;
+  std::atomic<uint64_t> uncompressible_input_data_bytes;
+  std::atomic<uint64_t> sampled_input_data_bytes;
+  std::atomic<uint64_t> sampled_output_slow_data_bytes;
+  std::atomic<uint64_t> sampled_output_fast_data_bytes;
+  CompressionOptions compression_opts;
+  std::unique_ptr<CompressionDict> compression_dict;
+  std::vector<std::unique_ptr<CompressionContext>> compression_ctxs;
+  std::vector<std::unique_ptr<UncompressionContext>> verify_ctxs;
+  std::unique_ptr<UncompressionDict> verify_dict;
+
+  size_t data_begin_offset = 0;
+
+  TableProperties props;
+
+  // States of the builder.
+  //
+  // - `kBuffered`: This is the initial state where zero or more data blocks are
+  //   accumulated uncompressed in-memory. From this state, call
+  //   `EnterUnbuffered()` to finalize the compression dictionary if enabled,
+  //   compress/write out any buffered blocks, and proceed to the `kUnbuffered`
+  //   state.
+  //
+  // - `kUnbuffered`: This is the state when compression dictionary is finalized
+  //   either because it wasn't enabled in the first place or it's been created
+  //   from sampling previously buffered data. In this state, blocks are simply
+  //   compressed/written out as they fill up. From this state, call `Finish()`
+  //   to complete the file (write meta-blocks, etc.), or `Abandon()` to delete
+  //   the partially created file.
+  //
+  // - `kClosed`: This indicates either `Finish()` or `Abandon()` has been
+  //   called, so the table builder is no longer usable. We must be in this
+  //   state by the time the destructor runs.
+  enum class State {
+    kBuffered,
+    kUnbuffered,
+    kClosed,
+  };
+  State state;
+  // `kBuffered` state is allowed only as long as the buffering of uncompressed
+  // data blocks (see `data_block_buffers`) does not exceed `buffer_limit`.
+  uint64_t buffer_limit;
+  std::shared_ptr<CacheReservationManager>
+      compression_dict_buffer_cache_res_mgr;
+  const bool use_delta_encoding_for_index_values;
+  std::unique_ptr<FilterBlockBuilder> filter_builder;
+  OffsetableCacheKey base_cache_key;
+  const TableFileCreationReason reason;
+
+  BlockHandle pending_handle;  // Handle to add to index block
+
+  std::string compressed_output;
+  std::unique_ptr<FlushBlockPolicy> flush_block_policy;
+
+  std::vector<std::unique_ptr<IntTblPropCollector>> table_properties_collectors;
+
+  std::unique_ptr<ParallelCompressionRep> pc_rep;
+  BlockCreateContext create_context;
+
+  // The size of the "tail" part of a SST file. "Tail" refers to
+  // all blocks after data blocks till the end of the SST file.
+  uint64_t tail_size;
+
+  uint64_t get_offset() { return offset.load(std::memory_order_relaxed); }
+  void set_offset(uint64_t o) { offset.store(o, std::memory_order_relaxed); }
+
+  bool IsParallelCompressionEnabled() const {
+    return compression_opts.parallel_threads > 1;
+  }
+
+  Status GetStatus() {
+    // We need to make modifications of status visible when status_ok is set
+    // to false, and this is ensured by status_mutex, so no special memory
+    // order for status_ok is required.
+    if (status_ok.load(std::memory_order_relaxed)) {
+      return Status::OK();
+    } else {
+      return CopyStatus();
+    }
+  }
+
+  Status CopyStatus() {
+    std::lock_guard<std::mutex> lock(status_mutex);
+    return status;
+  }
+
+  IOStatus GetIOStatus() {
+    // We need to make modifications of io_status visible when status_ok is set
+    // to false, and this is ensured by io_status_mutex, so no special memory
+    // order for io_status_ok is required.
+    if (io_status_ok.load(std::memory_order_relaxed)) {
+      return IOStatus::OK();
+    } else {
+      return CopyIOStatus();
+    }
+  }
+
+  IOStatus CopyIOStatus() {
+    std::lock_guard<std::mutex> lock(io_status_mutex);
+    return io_status;
+  }
+
+  // Never erase an existing status that is not OK.
+  void SetStatus(Status s) {
+    if (!s.ok() && status_ok.load(std::memory_order_relaxed)) {
+      // Locking is an overkill for non compression_opts.parallel_threads
+      // case but since it's unlikely that s is not OK, we take this cost
+      // to be simplicity.
+      std::lock_guard<std::mutex> lock(status_mutex);
+      status = s;
+      status_ok.store(false, std::memory_order_relaxed);
+    }
+  }
+
+  // Never erase an existing I/O status that is not OK.
+  // Calling this will also SetStatus(ios)
+  void SetIOStatus(IOStatus ios) {
+    if (!ios.ok() && io_status_ok.load(std::memory_order_relaxed)) {
+      // Locking is an overkill for non compression_opts.parallel_threads
+      // case but since it's unlikely that s is not OK, we take this cost
+      // to be simplicity.
+      std::lock_guard<std::mutex> lock(io_status_mutex);
+      io_status = ios;
+      io_status_ok.store(false, std::memory_order_relaxed);
+    }
+    SetStatus(ios);
+  }
+
+  Rep(const BlockBasedTableOptions& table_opt, const TableBuilderOptions& tbo,
+      WritableFileWriter* f)
+      : ioptions(tbo.ioptions),
+        moptions(tbo.moptions),
+        table_options(table_opt),
+        internal_comparator(tbo.internal_comparator),
+        file(f),
+        offset(0),
+        alignment(table_options.block_align
+                      ? std::min(static_cast<size_t>(table_options.block_size),
+                                 kDefaultPageSize)
+                      : 0),
+        data_block(table_options.block_restart_interval,
+                   table_options.use_delta_encoding,
+                   false /* use_value_delta_encoding */,
+                   tbo.internal_comparator.user_comparator()
+                           ->CanKeysWithDifferentByteContentsBeEqual()
+                       ? BlockBasedTableOptions::kDataBlockBinarySearch
+                       : table_options.data_block_index_type,
+                   table_options.data_block_hash_table_util_ratio),
+        range_del_block(1 /* block_restart_interval */),
+        internal_prefix_transform(tbo.moptions.prefix_extractor.get()),
+        compression_type(tbo.compression_type),
+        sample_for_compression(tbo.moptions.sample_for_compression),
+        compressible_input_data_bytes(0),
+        uncompressible_input_data_bytes(0),
+        sampled_input_data_bytes(0),
+        sampled_output_slow_data_bytes(0),
+        sampled_output_fast_data_bytes(0),
+        compression_opts(tbo.compression_opts),
+        compression_dict(),
+        compression_ctxs(tbo.compression_opts.parallel_threads),
+        verify_ctxs(tbo.compression_opts.parallel_threads),
+        verify_dict(),
+        state((tbo.compression_opts.max_dict_bytes > 0) ? State::kBuffered
+                                                        : State::kUnbuffered),
+        use_delta_encoding_for_index_values(table_opt.format_version >= 4 &&
+                                            !table_opt.block_align),
+        reason(tbo.reason),
+        flush_block_policy(
+            table_options.flush_block_policy_factory->NewFlushBlockPolicy(
+                table_options, data_block)),
+        create_context(&table_options, ioptions.stats,
+                       compression_type == kZSTD ||
+                           compression_type == kZSTDNotFinalCompression,
+                       tbo.moptions.block_protection_bytes_per_key,
+                       tbo.internal_comparator.user_comparator(),
+                       !use_delta_encoding_for_index_values,
+                       table_opt.index_type ==
+                           BlockBasedTableOptions::kBinarySearchWithFirstKey),
+        tail_size(0),
+        status_ok(true),
+        io_status_ok(true) {
+    if (tbo.target_file_size == 0) {
+      buffer_limit = compression_opts.max_dict_buffer_bytes;
+    } else if (compression_opts.max_dict_buffer_bytes == 0) {
+      buffer_limit = tbo.target_file_size;
+    } else {
+      buffer_limit = std::min(tbo.target_file_size,
+                              compression_opts.max_dict_buffer_bytes);
+    }
+
+    const auto compress_dict_build_buffer_charged =
+        table_options.cache_usage_options.options_overrides
+            .at(CacheEntryRole::kCompressionDictionaryBuildingBuffer)
+            .charged;
+    if (table_options.block_cache &&
+        (compress_dict_build_buffer_charged ==
+             CacheEntryRoleOptions::Decision::kEnabled ||
+         compress_dict_build_buffer_charged ==
+             CacheEntryRoleOptions::Decision::kFallback)) {
+      compression_dict_buffer_cache_res_mgr =
+          std::make_shared<CacheReservationManagerImpl<
+              CacheEntryRole::kCompressionDictionaryBuildingBuffer>>(
+              table_options.block_cache);
+    } else {
+      compression_dict_buffer_cache_res_mgr = nullptr;
+    }
+
+    for (uint32_t i = 0; i < compression_opts.parallel_threads; i++) {
+      compression_ctxs[i].reset(new CompressionContext(compression_type));
+    }
+    if (table_options.index_type ==
+        BlockBasedTableOptions::kTwoLevelIndexSearch) {
+      p_index_builder_ = PartitionedIndexBuilder::CreateIndexBuilder(
+          &internal_comparator, use_delta_encoding_for_index_values,
+          table_options);
+      index_builder.reset(p_index_builder_);
+    } else {
+      index_builder.reset(IndexBuilder::CreateIndexBuilder(
+          table_options.index_type, &internal_comparator,
+          &this->internal_prefix_transform, use_delta_encoding_for_index_values,
+          table_options));
+    }
+    if (ioptions.optimize_filters_for_hits && tbo.is_bottommost) {
+      // Apply optimize_filters_for_hits setting here when applicable by
+      // skipping filter generation
+      filter_builder.reset();
+    } else if (tbo.skip_filters) {
+      // For SstFileWriter skip_filters
+      filter_builder.reset();
+    } else if (!table_options.filter_policy) {
+      // Null filter_policy -> no filter
+      filter_builder.reset();
+    } else {
+      FilterBuildingContext filter_context(table_options);
+
+      filter_context.info_log = ioptions.logger;
+      filter_context.column_family_name = tbo.column_family_name;
+      filter_context.reason = reason;
+
+      // Only populate other fields if known to be in LSM rather than
+      // generating external SST file
+      if (reason != TableFileCreationReason::kMisc) {
+        filter_context.compaction_style = ioptions.compaction_style;
+        filter_context.num_levels = ioptions.num_levels;
+        filter_context.level_at_creation = tbo.level_at_creation;
+        filter_context.is_bottommost = tbo.is_bottommost;
+        assert(filter_context.level_at_creation < filter_context.num_levels);
+      }
+
+      filter_builder.reset(CreateFilterBlockBuilder(
+          ioptions, moptions, filter_context,
+          use_delta_encoding_for_index_values, p_index_builder_));
+    }
+
+    assert(tbo.int_tbl_prop_collector_factories);
+    for (auto& factory : *tbo.int_tbl_prop_collector_factories) {
+      assert(factory);
+
+      table_properties_collectors.emplace_back(
+          factory->CreateIntTblPropCollector(tbo.column_family_id,
+                                             tbo.level_at_creation));
+    }
+    table_properties_collectors.emplace_back(
+        new BlockBasedTablePropertiesCollector(
+            table_options.index_type, table_options.whole_key_filtering,
+            moptions.prefix_extractor != nullptr));
+    const Comparator* ucmp = tbo.internal_comparator.user_comparator();
+    assert(ucmp);
+    if (ucmp->timestamp_size() > 0) {
+      table_properties_collectors.emplace_back(
+          new TimestampTablePropertiesCollector(ucmp));
+    }
+    if (table_options.verify_compression) {
+      for (uint32_t i = 0; i < compression_opts.parallel_threads; i++) {
+        verify_ctxs[i].reset(new UncompressionContext(compression_type));
+      }
+    }
+
+    // These are only needed for populating table properties
+    props.column_family_id = tbo.column_family_id;
+    props.column_family_name = tbo.column_family_name;
+    props.oldest_key_time = tbo.oldest_key_time;
+    props.file_creation_time = tbo.file_creation_time;
+    props.orig_file_number = tbo.cur_file_num;
+    props.db_id = tbo.db_id;
+    props.db_session_id = tbo.db_session_id;
+    props.db_host_id = ioptions.db_host_id;
+    if (!ReifyDbHostIdProperty(ioptions.env, &props.db_host_id).ok()) {
+      ROCKS_LOG_INFO(ioptions.logger, "db_host_id property will not be set");
+    }
+  }
+
+  Rep(const Rep&) = delete;
+  Rep& operator=(const Rep&) = delete;
+
+ private:
+  // Synchronize status & io_status accesses across threads from main thread,
+  // compression thread and write thread in parallel compression.
+  std::mutex status_mutex;
+  std::atomic<bool> status_ok;
+  Status status;
+  std::mutex io_status_mutex;
+  std::atomic<bool> io_status_ok;
+  IOStatus io_status;
+};
+
+struct BlockBasedTableBuilder::ParallelCompressionRep {
+  // Keys is a wrapper of vector of strings avoiding
+  // releasing string memories during vector clear()
+  // in order to save memory allocation overhead
+  class Keys {
+   public:
+    Keys() : keys_(kKeysInitSize), size_(0) {}
+    void PushBack(const Slice& key) {
+      if (size_ == keys_.size()) {
+        keys_.emplace_back(key.data(), key.size());
+      } else {
+        keys_[size_].assign(key.data(), key.size());
+      }
+      size_++;
+    }
+    void SwapAssign(std::vector<std::string>& keys) {
+      size_ = keys.size();
+      std::swap(keys_, keys);
+    }
+    void Clear() { size_ = 0; }
+    size_t Size() { return size_; }
+    std::string& Back() { return keys_[size_ - 1]; }
+    std::string& operator[](size_t idx) {
+      assert(idx < size_);
+      return keys_[idx];
+    }
+
+   private:
+    const size_t kKeysInitSize = 32;
+    std::vector<std::string> keys_;
+    size_t size_;
+  };
+  std::unique_ptr<Keys> curr_block_keys;
+
+  class BlockRepSlot;
+
+  // BlockRep instances are fetched from and recycled to
+  // block_rep_pool during parallel compression.
+  struct BlockRep {
+    Slice contents;
+    Slice compressed_contents;
+    std::unique_ptr<std::string> data;
+    std::unique_ptr<std::string> compressed_data;
+    CompressionType compression_type;
+    std::unique_ptr<std::string> first_key_in_next_block;
+    std::unique_ptr<Keys> keys;
+    std::unique_ptr<BlockRepSlot> slot;
+    Status status;
+  };
+  // Use a vector of BlockRep as a buffer for a determined number
+  // of BlockRep structures. All data referenced by pointers in
+  // BlockRep will be freed when this vector is destructed.
+  using BlockRepBuffer = std::vector<BlockRep>;
+  BlockRepBuffer block_rep_buf;
+  // Use a thread-safe queue for concurrent access from block
+  // building thread and writer thread.
+  using BlockRepPool = WorkQueue<BlockRep*>;
+  BlockRepPool block_rep_pool;
+
+  // Use BlockRepSlot to keep block order in write thread.
+  // slot_ will pass references to BlockRep
+  class BlockRepSlot {
+   public:
+    BlockRepSlot() : slot_(1) {}
+    template <typename T>
+    void Fill(T&& rep) {
+      slot_.push(std::forward<T>(rep));
+    };
+    void Take(BlockRep*& rep) { slot_.pop(rep); }
+
+   private:
+    // slot_ will pass references to BlockRep in block_rep_buf,
+    // and those references are always valid before the destruction of
+    // block_rep_buf.
+    WorkQueue<BlockRep*> slot_;
+  };
+
+  // Compression queue will pass references to BlockRep in block_rep_buf,
+  // and those references are always valid before the destruction of
+  // block_rep_buf.
+  using CompressQueue = WorkQueue<BlockRep*>;
+  CompressQueue compress_queue;
+  std::vector<port::Thread> compress_thread_pool;
+
+  // Write queue will pass references to BlockRep::slot in block_rep_buf,
+  // and those references are always valid before the corresponding
+  // BlockRep::slot is destructed, which is before the destruction of
+  // block_rep_buf.
+  using WriteQueue = WorkQueue<BlockRepSlot*>;
+  WriteQueue write_queue;
+  std::unique_ptr<port::Thread> write_thread;
+
+  // Estimate output file size when parallel compression is enabled. This is
+  // necessary because compression & flush are no longer synchronized,
+  // and BlockBasedTableBuilder::FileSize() is no longer accurate.
+  // memory_order_relaxed suffices because accurate statistics is not required.
+  class FileSizeEstimator {
+   public:
+    explicit FileSizeEstimator()
+        : uncomp_bytes_compressed(0),
+          uncomp_bytes_curr_block(0),
+          uncomp_bytes_curr_block_set(false),
+          uncomp_bytes_inflight(0),
+          blocks_inflight(0),
+          curr_compression_ratio(0),
+          estimated_file_size(0) {}
+
+    // Estimate file size when a block is about to be emitted to
+    // compression thread
+    void EmitBlock(uint64_t uncomp_block_size, uint64_t curr_file_size) {
+      uint64_t new_uncomp_bytes_inflight =
+          uncomp_bytes_inflight.fetch_add(uncomp_block_size,
+                                          std::memory_order_relaxed) +
+          uncomp_block_size;
+
+      uint64_t new_blocks_inflight =
+          blocks_inflight.fetch_add(1, std::memory_order_relaxed) + 1;
+
+      estimated_file_size.store(
+          curr_file_size +
+              static_cast<uint64_t>(
+                  static_cast<double>(new_uncomp_bytes_inflight) *
+                  curr_compression_ratio.load(std::memory_order_relaxed)) +
+              new_blocks_inflight * kBlockTrailerSize,
+          std::memory_order_relaxed);
+    }
+
+    // Estimate file size when a block is already reaped from
+    // compression thread
+    void ReapBlock(uint64_t compressed_block_size, uint64_t curr_file_size) {
+      assert(uncomp_bytes_curr_block_set);
+
+      uint64_t new_uncomp_bytes_compressed =
+          uncomp_bytes_compressed + uncomp_bytes_curr_block;
+      assert(new_uncomp_bytes_compressed > 0);
+
+      curr_compression_ratio.store(
+          (curr_compression_ratio.load(std::memory_order_relaxed) *
+               uncomp_bytes_compressed +
+           compressed_block_size) /
+              static_cast<double>(new_uncomp_bytes_compressed),
+          std::memory_order_relaxed);
+      uncomp_bytes_compressed = new_uncomp_bytes_compressed;
+
+      uint64_t new_uncomp_bytes_inflight =
+          uncomp_bytes_inflight.fetch_sub(uncomp_bytes_curr_block,
+                                          std::memory_order_relaxed) -
+          uncomp_bytes_curr_block;
+
+      uint64_t new_blocks_inflight =
+          blocks_inflight.fetch_sub(1, std::memory_order_relaxed) - 1;
+
+      estimated_file_size.store(
+          curr_file_size +
+              static_cast<uint64_t>(
+                  static_cast<double>(new_uncomp_bytes_inflight) *
+                  curr_compression_ratio.load(std::memory_order_relaxed)) +
+              new_blocks_inflight * kBlockTrailerSize,
+          std::memory_order_relaxed);
+
+      uncomp_bytes_curr_block_set = false;
+    }
+
+    void SetEstimatedFileSize(uint64_t size) {
+      estimated_file_size.store(size, std::memory_order_relaxed);
+    }
+
+    uint64_t GetEstimatedFileSize() {
+      return estimated_file_size.load(std::memory_order_relaxed);
+    }
+
+    void SetCurrBlockUncompSize(uint64_t size) {
+      uncomp_bytes_curr_block = size;
+      uncomp_bytes_curr_block_set = true;
+    }
+
+   private:
+    // Input bytes compressed so far.
+    uint64_t uncomp_bytes_compressed;
+    // Size of current block being appended.
+    uint64_t uncomp_bytes_curr_block;
+    // Whether uncomp_bytes_curr_block has been set for next
+    // ReapBlock call.
+    bool uncomp_bytes_curr_block_set;
+    // Input bytes under compression and not appended yet.
+    std::atomic<uint64_t> uncomp_bytes_inflight;
+    // Number of blocks under compression and not appended yet.
+    std::atomic<uint64_t> blocks_inflight;
+    // Current compression ratio, maintained by BGWorkWriteMaybeCompressedBlock.
+    std::atomic<double> curr_compression_ratio;
+    // Estimated SST file size.
+    std::atomic<uint64_t> estimated_file_size;
+  };
+  FileSizeEstimator file_size_estimator;
+
+  // Facilities used for waiting first block completion. Need to Wait for
+  // the completion of first block compression and flush to get a non-zero
+  // compression ratio.
+  std::atomic<bool> first_block_processed;
+  std::condition_variable first_block_cond;
+  std::mutex first_block_mutex;
+
+  explicit ParallelCompressionRep(uint32_t parallel_threads)
+      : curr_block_keys(new Keys()),
+        block_rep_buf(parallel_threads),
+        block_rep_pool(parallel_threads),
+        compress_queue(parallel_threads),
+        write_queue(parallel_threads),
+        first_block_processed(false) {
+    for (uint32_t i = 0; i < parallel_threads; i++) {
+      block_rep_buf[i].contents = Slice();
+      block_rep_buf[i].compressed_contents = Slice();
+      block_rep_buf[i].data.reset(new std::string());
+      block_rep_buf[i].compressed_data.reset(new std::string());
+      block_rep_buf[i].compression_type = CompressionType();
+      block_rep_buf[i].first_key_in_next_block.reset(new std::string());
+      block_rep_buf[i].keys.reset(new Keys());
+      block_rep_buf[i].slot.reset(new BlockRepSlot());
+      block_rep_buf[i].status = Status::OK();
+      block_rep_pool.push(&block_rep_buf[i]);
+    }
+  }
+
+  ~ParallelCompressionRep() { block_rep_pool.finish(); }
+
+  // Make a block prepared to be emitted to compression thread
+  // Used in non-buffered mode
+  BlockRep* PrepareBlock(CompressionType compression_type,
+                         const Slice* first_key_in_next_block,
+                         BlockBuilder* data_block) {
+    BlockRep* block_rep =
+        PrepareBlockInternal(compression_type, first_key_in_next_block);
+    assert(block_rep != nullptr);
+    data_block->SwapAndReset(*(block_rep->data));
+    block_rep->contents = *(block_rep->data);
+    std::swap(block_rep->keys, curr_block_keys);
+    curr_block_keys->Clear();
+    return block_rep;
+  }
+
+  // Used in EnterUnbuffered
+  BlockRep* PrepareBlock(CompressionType compression_type,
+                         const Slice* first_key_in_next_block,
+                         std::string* data_block,
+                         std::vector<std::string>* keys) {
+    BlockRep* block_rep =
+        PrepareBlockInternal(compression_type, first_key_in_next_block);
+    assert(block_rep != nullptr);
+    std::swap(*(block_rep->data), *data_block);
+    block_rep->contents = *(block_rep->data);
+    block_rep->keys->SwapAssign(*keys);
+    return block_rep;
+  }
+
+  // Emit a block to compression thread
+  void EmitBlock(BlockRep* block_rep) {
+    assert(block_rep != nullptr);
+    assert(block_rep->status.ok());
+    if (!write_queue.push(block_rep->slot.get())) {
+      return;
+    }
+    if (!compress_queue.push(block_rep)) {
+      return;
+    }
+
+    if (!first_block_processed.load(std::memory_order_relaxed)) {
+      std::unique_lock<std::mutex> lock(first_block_mutex);
+      first_block_cond.wait(lock, [this] {
+        return first_block_processed.load(std::memory_order_relaxed);
+      });
+    }
+  }
+
+  // Reap a block from compression thread
+  void ReapBlock(BlockRep* block_rep) {
+    assert(block_rep != nullptr);
+    block_rep->compressed_data->clear();
+    block_rep_pool.push(block_rep);
+
+    if (!first_block_processed.load(std::memory_order_relaxed)) {
+      std::lock_guard<std::mutex> lock(first_block_mutex);
+      first_block_processed.store(true, std::memory_order_relaxed);
+      first_block_cond.notify_one();
+    }
+  }
+
+ private:
+  BlockRep* PrepareBlockInternal(CompressionType compression_type,
+                                 const Slice* first_key_in_next_block) {
+    BlockRep* block_rep = nullptr;
+    block_rep_pool.pop(block_rep);
+    assert(block_rep != nullptr);
+
+    assert(block_rep->data);
+
+    block_rep->compression_type = compression_type;
+
+    if (first_key_in_next_block == nullptr) {
+      block_rep->first_key_in_next_block.reset(nullptr);
+    } else {
+      block_rep->first_key_in_next_block->assign(
+          first_key_in_next_block->data(), first_key_in_next_block->size());
+    }
+
+    return block_rep;
+  }
+};
+
+BlockBasedTableBuilder::BlockBasedTableBuilder(
+    const BlockBasedTableOptions& table_options, const TableBuilderOptions& tbo,
+    WritableFileWriter* file) {
+  BlockBasedTableOptions sanitized_table_options(table_options);
+  if (sanitized_table_options.format_version == 0 &&
+      sanitized_table_options.checksum != kCRC32c) {
+    ROCKS_LOG_WARN(
+        tbo.ioptions.logger,
+        "Silently converting format_version to 1 because checksum is "
+        "non-default");
+    // silently convert format_version to 1 to keep consistent with current
+    // behavior
+    sanitized_table_options.format_version = 1;
+  }
+
+  rep_ = new Rep(sanitized_table_options, tbo, file);
+
+  TEST_SYNC_POINT_CALLBACK(
+      "BlockBasedTableBuilder::BlockBasedTableBuilder:PreSetupBaseCacheKey",
+      const_cast<TableProperties*>(&rep_->props));
+
+  BlockBasedTable::SetupBaseCacheKey(&rep_->props, tbo.db_session_id,
+                                     tbo.cur_file_num, &rep_->base_cache_key);
+
+  if (rep_->IsParallelCompressionEnabled()) {
+    StartParallelCompression();
+  }
+}
+
+BlockBasedTableBuilder::~BlockBasedTableBuilder() {
+  // Catch errors where caller forgot to call Finish()
+  assert(rep_->state == Rep::State::kClosed);
+  delete rep_;
+}
+
+void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) {
+  Rep* r = rep_;
+  assert(rep_->state != Rep::State::kClosed);
+  if (!ok()) return;
+  ValueType value_type = ExtractValueType(key);
+  if (IsValueType(value_type)) {
+#ifndef NDEBUG
+    if (r->props.num_entries > r->props.num_range_deletions) {
+      assert(r->internal_comparator.Compare(key, Slice(r->last_key)) > 0);
+    }
+#endif  // !NDEBUG
+
+    auto should_flush = r->flush_block_policy->Update(key, value);
+    if (should_flush) {
+      assert(!r->data_block.empty());
+      r->first_key_in_next_block = &key;
+      Flush();
+      if (r->state == Rep::State::kBuffered) {
+        bool exceeds_buffer_limit =
+            (r->buffer_limit != 0 && r->data_begin_offset > r->buffer_limit);
+        bool exceeds_global_block_cache_limit = false;
+
+        // Increase cache charging for the last buffered data block
+        // only if the block is not going to be unbuffered immediately
+        // and there exists a cache reservation manager
+        if (!exceeds_buffer_limit &&
+            r->compression_dict_buffer_cache_res_mgr != nullptr) {
+          Status s =
+              r->compression_dict_buffer_cache_res_mgr->UpdateCacheReservation(
+                  r->data_begin_offset);
+          exceeds_global_block_cache_limit = s.IsMemoryLimit();
+        }
+
+        if (exceeds_buffer_limit || exceeds_global_block_cache_limit) {
+          EnterUnbuffered();
+        }
+      }
+
+      // Add item to index block.
+      // We do not emit the index entry for a block until we have seen the
+      // first key for the next data block.  This allows us to use shorter
+      // keys in the index block.  For example, consider a block boundary
+      // between the keys "the quick brown fox" and "the who".  We can use
+      // "the r" as the key for the index block entry since it is >= all
+      // entries in the first block and < all entries in subsequent
+      // blocks.
+      if (ok() && r->state == Rep::State::kUnbuffered) {
+        if (r->IsParallelCompressionEnabled()) {
+          r->pc_rep->curr_block_keys->Clear();
+        } else {
+          r->index_builder->AddIndexEntry(&r->last_key, &key,
+                                          r->pending_handle);
+        }
+      }
+    }
+
+    // Note: PartitionedFilterBlockBuilder requires key being added to filter
+    // builder after being added to index builder.
+    if (r->state == Rep::State::kUnbuffered) {
+      if (r->IsParallelCompressionEnabled()) {
+        r->pc_rep->curr_block_keys->PushBack(key);
+      } else {
+        if (r->filter_builder != nullptr) {
+          size_t ts_sz =
+              r->internal_comparator.user_comparator()->timestamp_size();
+          r->filter_builder->Add(ExtractUserKeyAndStripTimestamp(key, ts_sz));
+        }
+      }
+    }
+
+    r->data_block.AddWithLastKey(key, value, r->last_key);
+    r->last_key.assign(key.data(), key.size());
+    if (r->state == Rep::State::kBuffered) {
+      // Buffered keys will be replayed from data_block_buffers during
+      // `Finish()` once compression dictionary has been finalized.
+    } else {
+      if (!r->IsParallelCompressionEnabled()) {
+        r->index_builder->OnKeyAdded(key);
+      }
+    }
+    // TODO offset passed in is not accurate for parallel compression case
+    NotifyCollectTableCollectorsOnAdd(key, value, r->get_offset(),
+                                      r->table_properties_collectors,
+                                      r->ioptions.logger);
+
+  } else if (value_type == kTypeRangeDeletion) {
+    r->range_del_block.Add(key, value);
+    // TODO offset passed in is not accurate for parallel compression case
+    NotifyCollectTableCollectorsOnAdd(key, value, r->get_offset(),
+                                      r->table_properties_collectors,
+                                      r->ioptions.logger);
+  } else {
+    assert(false);
+  }
+
+  r->props.num_entries++;
+  r->props.raw_key_size += key.size();
+  r->props.raw_value_size += value.size();
+  if (value_type == kTypeDeletion || value_type == kTypeSingleDeletion ||
+      value_type == kTypeDeletionWithTimestamp) {
+    r->props.num_deletions++;
+  } else if (value_type == kTypeRangeDeletion) {
+    r->props.num_deletions++;
+    r->props.num_range_deletions++;
+  } else if (value_type == kTypeMerge) {
+    r->props.num_merge_operands++;
+  }
+}
+
+void BlockBasedTableBuilder::Flush() {
+  Rep* r = rep_;
+  assert(rep_->state != Rep::State::kClosed);
+  if (!ok()) return;
+  if (r->data_block.empty()) return;
+  if (r->IsParallelCompressionEnabled() &&
+      r->state == Rep::State::kUnbuffered) {
+    r->data_block.Finish();
+    ParallelCompressionRep::BlockRep* block_rep = r->pc_rep->PrepareBlock(
+        r->compression_type, r->first_key_in_next_block, &(r->data_block));
+    assert(block_rep != nullptr);
+    r->pc_rep->file_size_estimator.EmitBlock(block_rep->data->size(),
+                                             r->get_offset());
+    r->pc_rep->EmitBlock(block_rep);
+  } else {
+    WriteBlock(&r->data_block, &r->pending_handle, BlockType::kData);
+  }
+}
+
+void BlockBasedTableBuilder::WriteBlock(BlockBuilder* block,
+                                        BlockHandle* handle,
+                                        BlockType block_type) {
+  block->Finish();
+  std::string uncompressed_block_data;
+  uncompressed_block_data.reserve(rep_->table_options.block_size);
+  block->SwapAndReset(uncompressed_block_data);
+  if (rep_->state == Rep::State::kBuffered) {
+    assert(block_type == BlockType::kData);
+    rep_->data_block_buffers.emplace_back(std::move(uncompressed_block_data));
+    rep_->data_begin_offset += rep_->data_block_buffers.back().size();
+    return;
+  }
+  WriteBlock(uncompressed_block_data, handle, block_type);
+}
+
+void BlockBasedTableBuilder::WriteBlock(const Slice& uncompressed_block_data,
+                                        BlockHandle* handle,
+                                        BlockType block_type) {
+  Rep* r = rep_;
+  assert(r->state == Rep::State::kUnbuffered);
+  Slice block_contents;
+  CompressionType type;
+  Status compress_status;
+  bool is_data_block = block_type == BlockType::kData;
+  CompressAndVerifyBlock(uncompressed_block_data, is_data_block,
+                         *(r->compression_ctxs[0]), r->verify_ctxs[0].get(),
+                         &(r->compressed_output), &(block_contents), &type,
+                         &compress_status);
+  r->SetStatus(compress_status);
+  if (!ok()) {
+    return;
+  }
+
+  WriteMaybeCompressedBlock(block_contents, type, handle, block_type,
+                            &uncompressed_block_data);
+  r->compressed_output.clear();
+  if (is_data_block) {
+    r->props.data_size = r->get_offset();
+    ++r->props.num_data_blocks;
+  }
+}
+
+void BlockBasedTableBuilder::BGWorkCompression(
+    const CompressionContext& compression_ctx,
+    UncompressionContext* verify_ctx) {
+  ParallelCompressionRep::BlockRep* block_rep = nullptr;
+  while (rep_->pc_rep->compress_queue.pop(block_rep)) {
+    assert(block_rep != nullptr);
+    CompressAndVerifyBlock(block_rep->contents, true, /* is_data_block*/
+                           compression_ctx, verify_ctx,
+                           block_rep->compressed_data.get(),
+                           &block_rep->compressed_contents,
+                           &(block_rep->compression_type), &block_rep->status);
+    block_rep->slot->Fill(block_rep);
+  }
+}
+
+void BlockBasedTableBuilder::CompressAndVerifyBlock(
+    const Slice& uncompressed_block_data, bool is_data_block,
+    const CompressionContext& compression_ctx, UncompressionContext* verify_ctx,
+    std::string* compressed_output, Slice* block_contents,
+    CompressionType* type, Status* out_status) {
+  Rep* r = rep_;
+  bool is_status_ok = ok();
+  if (!r->IsParallelCompressionEnabled()) {
+    assert(is_status_ok);
+  }
+
+  if (is_status_ok && uncompressed_block_data.size() < kCompressionSizeLimit) {
+    StopWatchNano timer(
+        r->ioptions.clock,
+        ShouldReportDetailedTime(r->ioptions.env, r->ioptions.stats));
+
+    if (is_data_block) {
+      r->compressible_input_data_bytes.fetch_add(uncompressed_block_data.size(),
+                                                 std::memory_order_relaxed);
+    }
+    const CompressionDict* compression_dict;
+    if (!is_data_block || r->compression_dict == nullptr) {
+      compression_dict = &CompressionDict::GetEmptyDict();
+    } else {
+      compression_dict = r->compression_dict.get();
+    }
+    assert(compression_dict != nullptr);
+    CompressionInfo compression_info(r->compression_opts, compression_ctx,
+                                     *compression_dict, r->compression_type,
+                                     r->sample_for_compression);
+
+    std::string sampled_output_fast;
+    std::string sampled_output_slow;
+    *block_contents = CompressBlock(
+        uncompressed_block_data, compression_info, type,
+        r->table_options.format_version, is_data_block /* allow_sample */,
+        compressed_output, &sampled_output_fast, &sampled_output_slow);
+
+    if (sampled_output_slow.size() > 0 || sampled_output_fast.size() > 0) {
+      // Currently compression sampling is only enabled for data block.
+      assert(is_data_block);
+      r->sampled_input_data_bytes.fetch_add(uncompressed_block_data.size(),
+                                            std::memory_order_relaxed);
+      r->sampled_output_slow_data_bytes.fetch_add(sampled_output_slow.size(),
+                                                  std::memory_order_relaxed);
+      r->sampled_output_fast_data_bytes.fetch_add(sampled_output_fast.size(),
+                                                  std::memory_order_relaxed);
+    }
+    // notify collectors on block add
+    NotifyCollectTableCollectorsOnBlockAdd(
+        r->table_properties_collectors, uncompressed_block_data.size(),
+        sampled_output_fast.size(), sampled_output_slow.size());
+
+    // Some of the compression algorithms are known to be unreliable. If
+    // the verify_compression flag is set then try to de-compress the
+    // compressed data and compare to the input.
+    if (*type != kNoCompression && r->table_options.verify_compression) {
+      // Retrieve the uncompressed contents into a new buffer
+      const UncompressionDict* verify_dict;
+      if (!is_data_block || r->verify_dict == nullptr) {
+        verify_dict = &UncompressionDict::GetEmptyDict();
+      } else {
+        verify_dict = r->verify_dict.get();
+      }
+      assert(verify_dict != nullptr);
+      BlockContents contents;
+      UncompressionInfo uncompression_info(*verify_ctx, *verify_dict,
+                                           r->compression_type);
+      Status uncompress_status = UncompressBlockData(
+          uncompression_info, block_contents->data(), block_contents->size(),
+          &contents, r->table_options.format_version, r->ioptions);
+
+      if (uncompress_status.ok()) {
+        bool data_match = contents.data.compare(uncompressed_block_data) == 0;
+        if (!data_match) {
+          // The result of the compression was invalid. abort.
+          const char* const msg =
+              "Decompressed block did not match pre-compression block";
+          ROCKS_LOG_ERROR(r->ioptions.logger, "%s", msg);
+          *out_status = Status::Corruption(msg);
+          *type = kNoCompression;
+        }
+      } else {
+        // Decompression reported an error. abort.
+        *out_status = Status::Corruption(std::string("Could not decompress: ") +
+                                         uncompress_status.getState());
+        *type = kNoCompression;
+      }
+    }
+    if (timer.IsStarted()) {
+      RecordTimeToHistogram(r->ioptions.stats, COMPRESSION_TIMES_NANOS,
+                            timer.ElapsedNanos());
+    }
+  } else {
+    // Status is not OK, or block is too big to be compressed.
+    if (is_data_block) {
+      r->uncompressible_input_data_bytes.fetch_add(
+          uncompressed_block_data.size(), std::memory_order_relaxed);
+    }
+    *type = kNoCompression;
+  }
+  if (is_data_block) {
+    r->uncompressible_input_data_bytes.fetch_add(kBlockTrailerSize,
+                                                 std::memory_order_relaxed);
+  }
+
+  // Abort compression if the block is too big, or did not pass
+  // verification.
+  if (*type == kNoCompression) {
+    *block_contents = uncompressed_block_data;
+    bool compression_attempted = !compressed_output->empty();
+    RecordTick(r->ioptions.stats, compression_attempted
+                                      ? NUMBER_BLOCK_COMPRESSION_REJECTED
+                                      : NUMBER_BLOCK_COMPRESSION_BYPASSED);
+    RecordTick(r->ioptions.stats,
+               compression_attempted ? BYTES_COMPRESSION_REJECTED
+                                     : BYTES_COMPRESSION_BYPASSED,
+               uncompressed_block_data.size());
+  } else {
+    RecordTick(r->ioptions.stats, NUMBER_BLOCK_COMPRESSED);
+    RecordTick(r->ioptions.stats, BYTES_COMPRESSED_FROM,
+               uncompressed_block_data.size());
+    RecordTick(r->ioptions.stats, BYTES_COMPRESSED_TO,
+               compressed_output->size());
+  }
+}
+
+void BlockBasedTableBuilder::WriteMaybeCompressedBlock(
+    const Slice& block_contents, CompressionType comp_type, BlockHandle* handle,
+    BlockType block_type, const Slice* uncompressed_block_data) {
+  // File format contains a sequence of blocks where each block has:
+  //    block_data: uint8[n]
+  //    compression_type: uint8
+  //    checksum: uint32
+  Rep* r = rep_;
+  bool is_data_block = block_type == BlockType::kData;
+  // Old, misleading name of this function: WriteRawBlock
+  StopWatch sw(r->ioptions.clock, r->ioptions.stats, WRITE_RAW_BLOCK_MICROS);
+  handle->set_offset(r->get_offset());
+  handle->set_size(block_contents.size());
+  assert(status().ok());
+  assert(io_status().ok());
+  if (uncompressed_block_data == nullptr) {
+    uncompressed_block_data = &block_contents;
+    assert(comp_type == kNoCompression);
+  }
+
+  {
+    IOStatus io_s = r->file->Append(block_contents);
+    if (!io_s.ok()) {
+      r->SetIOStatus(io_s);
+      return;
+    }
+  }
+
+  std::array<char, kBlockTrailerSize> trailer;
+  trailer[0] = comp_type;
+  uint32_t checksum = ComputeBuiltinChecksumWithLastByte(
+      r->table_options.checksum, block_contents.data(), block_contents.size(),
+      /*last_byte*/ comp_type);
+
+  if (block_type == BlockType::kFilter) {
+    Status s = r->filter_builder->MaybePostVerifyFilter(block_contents);
+    if (!s.ok()) {
+      r->SetStatus(s);
+      return;
+    }
+  }
+
+  EncodeFixed32(trailer.data() + 1, checksum);
+  TEST_SYNC_POINT_CALLBACK(
+      "BlockBasedTableBuilder::WriteMaybeCompressedBlock:TamperWithChecksum",
+      trailer.data());
+  {
+    IOStatus io_s = r->file->Append(Slice(trailer.data(), trailer.size()));
+    if (!io_s.ok()) {
+      r->SetIOStatus(io_s);
+      return;
+    }
+  }
+
+  {
+    bool warm_cache;
+    switch (r->table_options.prepopulate_block_cache) {
+      case BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly:
+        warm_cache = (r->reason == TableFileCreationReason::kFlush);
+        break;
+      case BlockBasedTableOptions::PrepopulateBlockCache::kDisable:
+        warm_cache = false;
+        break;
+      default:
+        // missing case
+        assert(false);
+        warm_cache = false;
+    }
+    if (warm_cache) {
+      Status s = InsertBlockInCacheHelper(*uncompressed_block_data, handle,
+                                          block_type);
+      if (!s.ok()) {
+        r->SetStatus(s);
+        return;
+      }
+    }
+  }
+
+  r->set_offset(r->get_offset() + block_contents.size() + kBlockTrailerSize);
+  if (r->table_options.block_align && is_data_block) {
+    size_t pad_bytes =
+        (r->alignment -
+         ((block_contents.size() + kBlockTrailerSize) & (r->alignment - 1))) &
+        (r->alignment - 1);
+    IOStatus io_s = r->file->Pad(pad_bytes);
+    if (io_s.ok()) {
+      r->set_offset(r->get_offset() + pad_bytes);
+    } else {
+      r->SetIOStatus(io_s);
+      return;
+    }
+  }
+
+  if (r->IsParallelCompressionEnabled()) {
+    if (is_data_block) {
+      r->pc_rep->file_size_estimator.ReapBlock(block_contents.size(),
+                                               r->get_offset());
+    } else {
+      r->pc_rep->file_size_estimator.SetEstimatedFileSize(r->get_offset());
+    }
+  }
+}
+
+void BlockBasedTableBuilder::BGWorkWriteMaybeCompressedBlock() {
+  Rep* r = rep_;
+  ParallelCompressionRep::BlockRepSlot* slot = nullptr;
+  ParallelCompressionRep::BlockRep* block_rep = nullptr;
+  while (r->pc_rep->write_queue.pop(slot)) {
+    assert(slot != nullptr);
+    slot->Take(block_rep);
+    assert(block_rep != nullptr);
+    if (!block_rep->status.ok()) {
+      r->SetStatus(block_rep->status);
+      // Reap block so that blocked Flush() can finish
+      // if there is one, and Flush() will notice !ok() next time.
+      block_rep->status = Status::OK();
+      r->pc_rep->ReapBlock(block_rep);
+      continue;
+    }
+
+    for (size_t i = 0; i < block_rep->keys->Size(); i++) {
+      auto& key = (*block_rep->keys)[i];
+      if (r->filter_builder != nullptr) {
+        size_t ts_sz =
+            r->internal_comparator.user_comparator()->timestamp_size();
+        r->filter_builder->Add(ExtractUserKeyAndStripTimestamp(key, ts_sz));
+      }
+      r->index_builder->OnKeyAdded(key);
+    }
+
+    r->pc_rep->file_size_estimator.SetCurrBlockUncompSize(
+        block_rep->data->size());
+    WriteMaybeCompressedBlock(block_rep->compressed_contents,
+                              block_rep->compression_type, &r->pending_handle,
+                              BlockType::kData, &block_rep->contents);
+    if (!ok()) {
+      break;
+    }
+
+    r->props.data_size = r->get_offset();
+    ++r->props.num_data_blocks;
+
+    if (block_rep->first_key_in_next_block == nullptr) {
+      r->index_builder->AddIndexEntry(&(block_rep->keys->Back()), nullptr,
+                                      r->pending_handle);
+    } else {
+      Slice first_key_in_next_block =
+          Slice(*block_rep->first_key_in_next_block);
+      r->index_builder->AddIndexEntry(&(block_rep->keys->Back()),
+                                      &first_key_in_next_block,
+                                      r->pending_handle);
+    }
+
+    r->pc_rep->ReapBlock(block_rep);
+  }
+}
+
+void BlockBasedTableBuilder::StartParallelCompression() {
+  rep_->pc_rep.reset(
+      new ParallelCompressionRep(rep_->compression_opts.parallel_threads));
+  rep_->pc_rep->compress_thread_pool.reserve(
+      rep_->compression_opts.parallel_threads);
+  for (uint32_t i = 0; i < rep_->compression_opts.parallel_threads; i++) {
+    rep_->pc_rep->compress_thread_pool.emplace_back([this, i] {
+      BGWorkCompression(*(rep_->compression_ctxs[i]),
+                        rep_->verify_ctxs[i].get());
+    });
+  }
+  rep_->pc_rep->write_thread.reset(
+      new port::Thread([this] { BGWorkWriteMaybeCompressedBlock(); }));
+}
+
+void BlockBasedTableBuilder::StopParallelCompression() {
+  rep_->pc_rep->compress_queue.finish();
+  for (auto& thread : rep_->pc_rep->compress_thread_pool) {
+    thread.join();
+  }
+  rep_->pc_rep->write_queue.finish();
+  rep_->pc_rep->write_thread->join();
+}
+
+Status BlockBasedTableBuilder::status() const { return rep_->GetStatus(); }
+
+IOStatus BlockBasedTableBuilder::io_status() const {
+  return rep_->GetIOStatus();
+}
+
+Status BlockBasedTableBuilder::InsertBlockInCacheHelper(
+    const Slice& block_contents, const BlockHandle* handle,
+    BlockType block_type) {
+
+  Cache* block_cache = rep_->table_options.block_cache.get();
+  Status s;
+  auto helper =
+      GetCacheItemHelper(block_type, rep_->ioptions.lowest_used_cache_tier);
+  if (block_cache && helper && helper->create_cb) {
+    CacheKey key = BlockBasedTable::GetCacheKey(rep_->base_cache_key, *handle);
+    size_t charge;
+    s = WarmInCache(block_cache, key.AsSlice(), block_contents,
+                    &rep_->create_context, helper, Cache::Priority::LOW,
+                    &charge);
+
+    if (s.ok()) {
+      BlockBasedTable::UpdateCacheInsertionMetrics(
+          block_type, nullptr /*get_context*/, charge, s.IsOkOverwritten(),
+          rep_->ioptions.stats);
+    } else {
+      RecordTick(rep_->ioptions.stats, BLOCK_CACHE_ADD_FAILURES);
+    }
+  }
+  return s;
+}
+
+void BlockBasedTableBuilder::WriteFilterBlock(
+    MetaIndexBuilder* meta_index_builder) {
+  if (rep_->filter_builder == nullptr || rep_->filter_builder->IsEmpty()) {
+    // No filter block needed
+    return;
+  }
+  BlockHandle filter_block_handle;
+  bool is_partitioned_filter = rep_->table_options.partition_filters;
+  if (ok()) {
+    rep_->props.num_filter_entries +=
+        rep_->filter_builder->EstimateEntriesAdded();
+    Status s = Status::Incomplete();
+    while (ok() && s.IsIncomplete()) {
+      // filter_data is used to store the transferred filter data payload from
+      // FilterBlockBuilder and deallocate the payload by going out of scope.
+      // Otherwise, the payload will unnecessarily remain until
+      // BlockBasedTableBuilder is deallocated.
+      //
+      // See FilterBlockBuilder::Finish() for more on the difference in
+      // transferred filter data payload among different FilterBlockBuilder
+      // subtypes.
+      std::unique_ptr<const char[]> filter_data;
+      Slice filter_content =
+          rep_->filter_builder->Finish(filter_block_handle, &s, &filter_data);
+
+      assert(s.ok() || s.IsIncomplete() || s.IsCorruption());
+      if (s.IsCorruption()) {
+        rep_->SetStatus(s);
+        break;
+      }
+
+      rep_->props.filter_size += filter_content.size();
+
+      BlockType btype = is_partitioned_filter && /* last */ s.ok()
+                            ? BlockType::kFilterPartitionIndex
+                            : BlockType::kFilter;
+      WriteMaybeCompressedBlock(filter_content, kNoCompression,
+                                &filter_block_handle, btype);
+    }
+    rep_->filter_builder->ResetFilterBitsBuilder();
+  }
+  if (ok()) {
+    // Add mapping from "<filter_block_prefix>.Name" to location
+    // of filter data.
+    std::string key;
+    key = is_partitioned_filter ? BlockBasedTable::kPartitionedFilterBlockPrefix
+                                : BlockBasedTable::kFullFilterBlockPrefix;
+    key.append(rep_->table_options.filter_policy->CompatibilityName());
+    meta_index_builder->Add(key, filter_block_handle);
+  }
+}
+
+void BlockBasedTableBuilder::WriteIndexBlock(
+    MetaIndexBuilder* meta_index_builder, BlockHandle* index_block_handle) {
+  if (!ok()) {
+    return;
+  }
+  IndexBuilder::IndexBlocks index_blocks;
+  auto index_builder_status = rep_->index_builder->Finish(&index_blocks);
+  if (index_builder_status.IsIncomplete()) {
+    // We we have more than one index partition then meta_blocks are not
+    // supported for the index. Currently meta_blocks are used only by
+    // HashIndexBuilder which is not multi-partition.
+    assert(index_blocks.meta_blocks.empty());
+  } else if (ok() && !index_builder_status.ok()) {
+    rep_->SetStatus(index_builder_status);
+  }
+  if (ok()) {
+    for (const auto& item : index_blocks.meta_blocks) {
+      BlockHandle block_handle;
+      WriteBlock(item.second, &block_handle, BlockType::kIndex);
+      if (!ok()) {
+        break;
+      }
+      meta_index_builder->Add(item.first, block_handle);
+    }
+  }
+  if (ok()) {
+    if (rep_->table_options.enable_index_compression) {
+      WriteBlock(index_blocks.index_block_contents, index_block_handle,
+                 BlockType::kIndex);
+    } else {
+      WriteMaybeCompressedBlock(index_blocks.index_block_contents,
+                                kNoCompression, index_block_handle,
+                                BlockType::kIndex);
+    }
+  }
+  // If there are more index partitions, finish them and write them out
+  if (index_builder_status.IsIncomplete()) {
+    bool index_building_finished = false;
+    while (ok() && !index_building_finished) {
+      Status s =
+          rep_->index_builder->Finish(&index_blocks, *index_block_handle);
+      if (s.ok()) {
+        index_building_finished = true;
+      } else if (s.IsIncomplete()) {
+        // More partitioned index after this one
+        assert(!index_building_finished);
+      } else {
+        // Error
+        rep_->SetStatus(s);
+        return;
+      }
+
+      if (rep_->table_options.enable_index_compression) {
+        WriteBlock(index_blocks.index_block_contents, index_block_handle,
+                   BlockType::kIndex);
+      } else {
+        WriteMaybeCompressedBlock(index_blocks.index_block_contents,
+                                  kNoCompression, index_block_handle,
+                                  BlockType::kIndex);
+      }
+      // The last index_block_handle will be for the partition index block
+    }
+  }
+}
+
+void BlockBasedTableBuilder::WritePropertiesBlock(
+    MetaIndexBuilder* meta_index_builder) {
+  BlockHandle properties_block_handle;
+  if (ok()) {
+    PropertyBlockBuilder property_block_builder;
+    rep_->props.filter_policy_name =
+        rep_->table_options.filter_policy != nullptr
+            ? rep_->table_options.filter_policy->Name()
+            : "";
+    rep_->props.index_size =
+        rep_->index_builder->IndexSize() + kBlockTrailerSize;
+    rep_->props.comparator_name = rep_->ioptions.user_comparator != nullptr
+                                      ? rep_->ioptions.user_comparator->Name()
+                                      : "nullptr";
+    rep_->props.merge_operator_name =
+        rep_->ioptions.merge_operator != nullptr
+            ? rep_->ioptions.merge_operator->Name()
+            : "nullptr";
+    rep_->props.compression_name =
+        CompressionTypeToString(rep_->compression_type);
+    rep_->props.compression_options =
+        CompressionOptionsToString(rep_->compression_opts);
+    rep_->props.prefix_extractor_name =
+        rep_->moptions.prefix_extractor != nullptr
+            ? rep_->moptions.prefix_extractor->AsString()
+            : "nullptr";
+    std::string property_collectors_names = "[";
+    for (size_t i = 0;
+         i < rep_->ioptions.table_properties_collector_factories.size(); ++i) {
+      if (i != 0) {
+        property_collectors_names += ",";
+      }
+      property_collectors_names +=
+          rep_->ioptions.table_properties_collector_factories[i]->Name();
+    }
+    property_collectors_names += "]";
+    rep_->props.property_collectors_names = property_collectors_names;
+    if (rep_->table_options.index_type ==
+        BlockBasedTableOptions::kTwoLevelIndexSearch) {
+      assert(rep_->p_index_builder_ != nullptr);
+      rep_->props.index_partitions = rep_->p_index_builder_->NumPartitions();
+      rep_->props.top_level_index_size =
+          rep_->p_index_builder_->TopLevelIndexSize(rep_->offset);
+    }
+    rep_->props.index_key_is_user_key =
+        !rep_->index_builder->seperator_is_key_plus_seq();
+    rep_->props.index_value_is_delta_encoded =
+        rep_->use_delta_encoding_for_index_values;
+    if (rep_->sampled_input_data_bytes > 0) {
+      rep_->props.slow_compression_estimated_data_size = static_cast<uint64_t>(
+          static_cast<double>(rep_->sampled_output_slow_data_bytes) /
+              rep_->sampled_input_data_bytes *
+              rep_->compressible_input_data_bytes +
+          rep_->uncompressible_input_data_bytes + 0.5);
+      rep_->props.fast_compression_estimated_data_size = static_cast<uint64_t>(
+          static_cast<double>(rep_->sampled_output_fast_data_bytes) /
+              rep_->sampled_input_data_bytes *
+              rep_->compressible_input_data_bytes +
+          rep_->uncompressible_input_data_bytes + 0.5);
+    } else if (rep_->sample_for_compression > 0) {
+      // We tried to sample but none were found. Assume worst-case (compression
+      // ratio 1.0) so data is complete and aggregatable.
+      rep_->props.slow_compression_estimated_data_size =
+          rep_->compressible_input_data_bytes +
+          rep_->uncompressible_input_data_bytes;
+      rep_->props.fast_compression_estimated_data_size =
+          rep_->compressible_input_data_bytes +
+          rep_->uncompressible_input_data_bytes;
+    }
+
+    // Add basic properties
+    property_block_builder.AddTableProperty(rep_->props);
+
+    // Add use collected properties
+    NotifyCollectTableCollectorsOnFinish(rep_->table_properties_collectors,
+                                         rep_->ioptions.logger,
+                                         &property_block_builder);
+
+    Slice block_data = property_block_builder.Finish();
+    TEST_SYNC_POINT_CALLBACK(
+        "BlockBasedTableBuilder::WritePropertiesBlock:BlockData", &block_data);
+    WriteMaybeCompressedBlock(block_data, kNoCompression,
+                              &properties_block_handle, BlockType::kProperties);
+  }
+  if (ok()) {
+#ifndef NDEBUG
+    {
+      uint64_t props_block_offset = properties_block_handle.offset();
+      uint64_t props_block_size = properties_block_handle.size();
+      TEST_SYNC_POINT_CALLBACK(
+          "BlockBasedTableBuilder::WritePropertiesBlock:GetPropsBlockOffset",
+          &props_block_offset);
+      TEST_SYNC_POINT_CALLBACK(
+          "BlockBasedTableBuilder::WritePropertiesBlock:GetPropsBlockSize",
+          &props_block_size);
+    }
+#endif  // !NDEBUG
+
+    const std::string* properties_block_meta = &kPropertiesBlockName;
+    TEST_SYNC_POINT_CALLBACK(
+        "BlockBasedTableBuilder::WritePropertiesBlock:Meta",
+        &properties_block_meta);
+    meta_index_builder->Add(*properties_block_meta, properties_block_handle);
+  }
+}
+
+void BlockBasedTableBuilder::WriteCompressionDictBlock(
+    MetaIndexBuilder* meta_index_builder) {
+  if (rep_->compression_dict != nullptr &&
+      rep_->compression_dict->GetRawDict().size()) {
+    BlockHandle compression_dict_block_handle;
+    if (ok()) {
+      WriteMaybeCompressedBlock(rep_->compression_dict->GetRawDict(),
+                                kNoCompression, &compression_dict_block_handle,
+                                BlockType::kCompressionDictionary);
+#ifndef NDEBUG
+      Slice compression_dict = rep_->compression_dict->GetRawDict();
+      TEST_SYNC_POINT_CALLBACK(
+          "BlockBasedTableBuilder::WriteCompressionDictBlock:RawDict",
+          &compression_dict);
+#endif  // NDEBUG
+    }
+    if (ok()) {
+      meta_index_builder->Add(kCompressionDictBlockName,
+                              compression_dict_block_handle);
+    }
+  }
+}
+
+void BlockBasedTableBuilder::WriteRangeDelBlock(
+    MetaIndexBuilder* meta_index_builder) {
+  if (ok() && !rep_->range_del_block.empty()) {
+    BlockHandle range_del_block_handle;
+    WriteMaybeCompressedBlock(rep_->range_del_block.Finish(), kNoCompression,
+                              &range_del_block_handle,
+                              BlockType::kRangeDeletion);
+    meta_index_builder->Add(kRangeDelBlockName, range_del_block_handle);
+  }
+}
+
+void BlockBasedTableBuilder::WriteFooter(BlockHandle& metaindex_block_handle,
+                                         BlockHandle& index_block_handle) {
+  Rep* r = rep_;
+  // this is guaranteed by BlockBasedTableBuilder's constructor
+  assert(r->table_options.checksum == kCRC32c ||
+         r->table_options.format_version != 0);
+  assert(ok());
+
+  FooterBuilder footer;
+  footer.Build(kBlockBasedTableMagicNumber, r->table_options.format_version,
+               r->get_offset(), r->table_options.checksum,
+               metaindex_block_handle, index_block_handle);
+  IOStatus ios = r->file->Append(footer.GetSlice());
+  if (ios.ok()) {
+    r->set_offset(r->get_offset() + footer.GetSlice().size());
+  } else {
+    r->SetIOStatus(ios);
+  }
+}
+
+void BlockBasedTableBuilder::EnterUnbuffered() {
+  Rep* r = rep_;
+  assert(r->state == Rep::State::kBuffered);
+  r->state = Rep::State::kUnbuffered;
+  const size_t kSampleBytes = r->compression_opts.zstd_max_train_bytes > 0
+                                  ? r->compression_opts.zstd_max_train_bytes
+                                  : r->compression_opts.max_dict_bytes;
+  const size_t kNumBlocksBuffered = r->data_block_buffers.size();
+  if (kNumBlocksBuffered == 0) {
+    // The below code is neither safe nor necessary for handling zero data
+    // blocks.
+    return;
+  }
+
+  // Abstract algebra teaches us that a finite cyclic group (such as the
+  // additive group of integers modulo N) can be generated by a number that is
+  // coprime with N. Since N is variable (number of buffered data blocks), we
+  // must then pick a prime number in order to guarantee coprimeness with any N.
+  //
+  // One downside of this approach is the spread will be poor when
+  // `kPrimeGeneratorRemainder` is close to zero or close to
+  // `kNumBlocksBuffered`.
+  //
+  // Picked a random number between one and one trillion and then chose the
+  // next prime number greater than or equal to it.
+  const uint64_t kPrimeGenerator = 545055921143ull;
+  // Can avoid repeated division by just adding the remainder repeatedly.
+  const size_t kPrimeGeneratorRemainder = static_cast<size_t>(
+      kPrimeGenerator % static_cast<uint64_t>(kNumBlocksBuffered));
+  const size_t kInitSampleIdx = kNumBlocksBuffered / 2;
+
+  std::string compression_dict_samples;
+  std::vector<size_t> compression_dict_sample_lens;
+  size_t buffer_idx = kInitSampleIdx;
+  for (size_t i = 0;
+       i < kNumBlocksBuffered && compression_dict_samples.size() < kSampleBytes;
+       ++i) {
+    size_t copy_len = std::min(kSampleBytes - compression_dict_samples.size(),
+                               r->data_block_buffers[buffer_idx].size());
+    compression_dict_samples.append(r->data_block_buffers[buffer_idx], 0,
+                                    copy_len);
+    compression_dict_sample_lens.emplace_back(copy_len);
+
+    buffer_idx += kPrimeGeneratorRemainder;
+    if (buffer_idx >= kNumBlocksBuffered) {
+      buffer_idx -= kNumBlocksBuffered;
+    }
+  }
+
+  // final data block flushed, now we can generate dictionary from the samples.
+  // OK if compression_dict_samples is empty, we'll just get empty dictionary.
+  std::string dict;
+  if (r->compression_opts.zstd_max_train_bytes > 0) {
+    if (r->compression_opts.use_zstd_dict_trainer) {
+      dict = ZSTD_TrainDictionary(compression_dict_samples,
+                                  compression_dict_sample_lens,
+                                  r->compression_opts.max_dict_bytes);
+    } else {
+      dict = ZSTD_FinalizeDictionary(
+          compression_dict_samples, compression_dict_sample_lens,
+          r->compression_opts.max_dict_bytes, r->compression_opts.level);
+    }
+  } else {
+    dict = std::move(compression_dict_samples);
+  }
+  r->compression_dict.reset(new CompressionDict(dict, r->compression_type,
+                                                r->compression_opts.level));
+  r->verify_dict.reset(new UncompressionDict(
+      dict, r->compression_type == kZSTD ||
+                r->compression_type == kZSTDNotFinalCompression));
+
+  auto get_iterator_for_block = [&r](size_t i) {
+    auto& data_block = r->data_block_buffers[i];
+    assert(!data_block.empty());
+
+    Block reader{BlockContents{data_block}};
+    DataBlockIter* iter = reader.NewDataIterator(
+        r->internal_comparator.user_comparator(), kDisableGlobalSequenceNumber);
+
+    iter->SeekToFirst();
+    assert(iter->Valid());
+    return std::unique_ptr<DataBlockIter>(iter);
+  };
+
+  std::unique_ptr<DataBlockIter> iter = nullptr, next_block_iter = nullptr;
+
+  for (size_t i = 0; ok() && i < r->data_block_buffers.size(); ++i) {
+    if (iter == nullptr) {
+      iter = get_iterator_for_block(i);
+      assert(iter != nullptr);
+    };
+
+    if (i + 1 < r->data_block_buffers.size()) {
+      next_block_iter = get_iterator_for_block(i + 1);
+    }
+
+    auto& data_block = r->data_block_buffers[i];
+    if (r->IsParallelCompressionEnabled()) {
+      Slice first_key_in_next_block;
+      const Slice* first_key_in_next_block_ptr = &first_key_in_next_block;
+      if (i + 1 < r->data_block_buffers.size()) {
+        assert(next_block_iter != nullptr);
+        first_key_in_next_block = next_block_iter->key();
+      } else {
+        first_key_in_next_block_ptr = r->first_key_in_next_block;
+      }
+
+      std::vector<std::string> keys;
+      for (; iter->Valid(); iter->Next()) {
+        keys.emplace_back(iter->key().ToString());
+      }
+
+      ParallelCompressionRep::BlockRep* block_rep = r->pc_rep->PrepareBlock(
+          r->compression_type, first_key_in_next_block_ptr, &data_block, &keys);
+
+      assert(block_rep != nullptr);
+      r->pc_rep->file_size_estimator.EmitBlock(block_rep->data->size(),
+                                               r->get_offset());
+      r->pc_rep->EmitBlock(block_rep);
+    } else {
+      for (; iter->Valid(); iter->Next()) {
+        Slice key = iter->key();
+        if (r->filter_builder != nullptr) {
+          size_t ts_sz =
+              r->internal_comparator.user_comparator()->timestamp_size();
+          r->filter_builder->Add(ExtractUserKeyAndStripTimestamp(key, ts_sz));
+        }
+        r->index_builder->OnKeyAdded(key);
+      }
+      WriteBlock(Slice(data_block), &r->pending_handle, BlockType::kData);
+      if (ok() && i + 1 < r->data_block_buffers.size()) {
+        assert(next_block_iter != nullptr);
+        Slice first_key_in_next_block = next_block_iter->key();
+
+        Slice* first_key_in_next_block_ptr = &first_key_in_next_block;
+
+        iter->SeekToLast();
+        std::string last_key = iter->key().ToString();
+        r->index_builder->AddIndexEntry(&last_key, first_key_in_next_block_ptr,
+                                        r->pending_handle);
+      }
+    }
+    std::swap(iter, next_block_iter);
+  }
+  r->data_block_buffers.clear();
+  r->data_begin_offset = 0;
+  // Release all reserved cache for data block buffers
+  if (r->compression_dict_buffer_cache_res_mgr != nullptr) {
+    Status s = r->compression_dict_buffer_cache_res_mgr->UpdateCacheReservation(
+        r->data_begin_offset);
+    s.PermitUncheckedError();
+  }
+}
+
+Status BlockBasedTableBuilder::Finish() {
+  Rep* r = rep_;
+  assert(r->state != Rep::State::kClosed);
+  bool empty_data_block = r->data_block.empty();
+  r->first_key_in_next_block = nullptr;
+  Flush();
+  if (r->state == Rep::State::kBuffered) {
+    EnterUnbuffered();
+  }
+  if (r->IsParallelCompressionEnabled()) {
+    StopParallelCompression();
+#ifndef NDEBUG
+    for (const auto& br : r->pc_rep->block_rep_buf) {
+      assert(br.status.ok());
+    }
+#endif  // !NDEBUG
+  } else {
+    // To make sure properties block is able to keep the accurate size of index
+    // block, we will finish writing all index entries first.
+    if (ok() && !empty_data_block) {
+      r->index_builder->AddIndexEntry(
+          &r->last_key, nullptr /* no next data block */, r->pending_handle);
+    }
+  }
+
+  r->props.tail_start_offset = r->offset;
+
+  // Write meta blocks, metaindex block and footer in the following order.
+  //    1. [meta block: filter]
+  //    2. [meta block: index]
+  //    3. [meta block: compression dictionary]
+  //    4. [meta block: range deletion tombstone]
+  //    5. [meta block: properties]
+  //    6. [metaindex block]
+  //    7. Footer
+  BlockHandle metaindex_block_handle, index_block_handle;
+  MetaIndexBuilder meta_index_builder;
+  WriteFilterBlock(&meta_index_builder);
+  WriteIndexBlock(&meta_index_builder, &index_block_handle);
+  WriteCompressionDictBlock(&meta_index_builder);
+  WriteRangeDelBlock(&meta_index_builder);
+  WritePropertiesBlock(&meta_index_builder);
+  if (ok()) {
+    // flush the meta index block
+    WriteMaybeCompressedBlock(meta_index_builder.Finish(), kNoCompression,
+                              &metaindex_block_handle, BlockType::kMetaIndex);
+  }
+  if (ok()) {
+    WriteFooter(metaindex_block_handle, index_block_handle);
+  }
+  r->state = Rep::State::kClosed;
+  r->SetStatus(r->CopyIOStatus());
+  Status ret_status = r->CopyStatus();
+  assert(!ret_status.ok() || io_status().ok());
+  r->tail_size = r->offset - r->props.tail_start_offset;
+  return ret_status;
+}
+
+void BlockBasedTableBuilder::Abandon() {
+  assert(rep_->state != Rep::State::kClosed);
+  if (rep_->IsParallelCompressionEnabled()) {
+    StopParallelCompression();
+  }
+  rep_->state = Rep::State::kClosed;
+  rep_->CopyStatus().PermitUncheckedError();
+  rep_->CopyIOStatus().PermitUncheckedError();
+}
+
+uint64_t BlockBasedTableBuilder::NumEntries() const {
+  return rep_->props.num_entries;
+}
+
+bool BlockBasedTableBuilder::IsEmpty() const {
+  return rep_->props.num_entries == 0 && rep_->props.num_range_deletions == 0;
+}
+
+uint64_t BlockBasedTableBuilder::FileSize() const { return rep_->offset; }
+
+uint64_t BlockBasedTableBuilder::EstimatedFileSize() const {
+  if (rep_->IsParallelCompressionEnabled()) {
+    // Use compression ratio so far and inflight uncompressed bytes to estimate
+    // final SST size.
+    return rep_->pc_rep->file_size_estimator.GetEstimatedFileSize();
+  } else {
+    return FileSize();
+  }
+}
+
+uint64_t BlockBasedTableBuilder::GetTailSize() const { return rep_->tail_size; }
+
+bool BlockBasedTableBuilder::NeedCompact() const {
+  for (const auto& collector : rep_->table_properties_collectors) {
+    if (collector->NeedCompact()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+TableProperties BlockBasedTableBuilder::GetTableProperties() const {
+  TableProperties ret = rep_->props;
+  for (const auto& collector : rep_->table_properties_collectors) {
+    for (const auto& prop : collector->GetReadableProperties()) {
+      ret.readable_properties.insert(prop);
+    }
+    collector->Finish(&ret.user_collected_properties).PermitUncheckedError();
+  }
+  return ret;
+}
+
+std::string BlockBasedTableBuilder::GetFileChecksum() const {
+  if (rep_->file != nullptr) {
+    return rep_->file->GetFileChecksum();
+  } else {
+    return kUnknownFileChecksum;
+  }
+}
+
+const char* BlockBasedTableBuilder::GetFileChecksumFuncName() const {
+  if (rep_->file != nullptr) {
+    return rep_->file->GetFileChecksumFuncName();
+  } else {
+    return kUnknownFileChecksumFuncName;
+  }
+}
+void BlockBasedTableBuilder::SetSeqnoTimeTableProperties(
+    const std::string& encoded_seqno_to_time_mapping,
+    uint64_t oldest_ancestor_time) {
+  rep_->props.seqno_to_time_mapping = encoded_seqno_to_time_mapping;
+  rep_->props.creation_time = oldest_ancestor_time;
+}
+
+const std::string BlockBasedTable::kObsoleteFilterBlockPrefix = "filter.";
+const std::string BlockBasedTable::kFullFilterBlockPrefix = "fullfilter.";
+const std::string BlockBasedTable::kPartitionedFilterBlockPrefix =
+    "partitionedfilter.";
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_builder.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_builder.h
new file mode 100644
index 0000000000..3949474c58
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_builder.h
@@ -0,0 +1,209 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <stdint.h>
+
+#include <array>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "db/version_edit.h"
+#include "rocksdb/flush_block_policy.h"
+#include "rocksdb/listener.h"
+#include "rocksdb/options.h"
+#include "rocksdb/status.h"
+#include "rocksdb/table.h"
+#include "table/meta_blocks.h"
+#include "table/table_builder.h"
+#include "util/compression.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class BlockBuilder;
+class BlockHandle;
+class WritableFile;
+struct BlockBasedTableOptions;
+
+extern const uint64_t kBlockBasedTableMagicNumber;
+extern const uint64_t kLegacyBlockBasedTableMagicNumber;
+
+class BlockBasedTableBuilder : public TableBuilder {
+ public:
+  // Create a builder that will store the contents of the table it is
+  // building in *file.  Does not close the file.  It is up to the
+  // caller to close the file after calling Finish().
+  BlockBasedTableBuilder(const BlockBasedTableOptions& table_options,
+                         const TableBuilderOptions& table_builder_options,
+                         WritableFileWriter* file);
+
+  // No copying allowed
+  BlockBasedTableBuilder(const BlockBasedTableBuilder&) = delete;
+  BlockBasedTableBuilder& operator=(const BlockBasedTableBuilder&) = delete;
+
+  // REQUIRES: Either Finish() or Abandon() has been called.
+  ~BlockBasedTableBuilder();
+
+  // Add key,value to the table being constructed.
+  // REQUIRES: Unless key has type kTypeRangeDeletion, key is after any
+  //           previously added non-kTypeRangeDeletion key according to
+  //           comparator.
+  // REQUIRES: Finish(), Abandon() have not been called
+  void Add(const Slice& key, const Slice& value) override;
+
+  // Return non-ok iff some error has been detected.
+  Status status() const override;
+
+  // Return non-ok iff some error happens during IO.
+  IOStatus io_status() const override;
+
+  // Finish building the table.  Stops using the file passed to the
+  // constructor after this function returns.
+  // REQUIRES: Finish(), Abandon() have not been called
+  Status Finish() override;
+
+  // Indicate that the contents of this builder should be abandoned.  Stops
+  // using the file passed to the constructor after this function returns.
+  // If the caller is not going to call Finish(), it must call Abandon()
+  // before destroying this builder.
+  // REQUIRES: Finish(), Abandon() have not been called
+  void Abandon() override;
+
+  // Number of calls to Add() so far.
+  uint64_t NumEntries() const override;
+
+  bool IsEmpty() const override;
+
+  // Size of the file generated so far.  If invoked after a successful
+  // Finish() call, returns the size of the final generated file.
+  uint64_t FileSize() const override;
+
+  // Estimated size of the file generated so far. This is used when
+  // FileSize() cannot estimate final SST size, e.g. parallel compression
+  // is enabled.
+  uint64_t EstimatedFileSize() const override;
+
+  // Get the size of the "tail" part of a SST file. "Tail" refers to
+  // all blocks after data blocks till the end of the SST file.
+  uint64_t GetTailSize() const override;
+
+  bool NeedCompact() const override;
+
+  // Get table properties
+  TableProperties GetTableProperties() const override;
+
+  // Get file checksum
+  std::string GetFileChecksum() const override;
+
+  // Get file checksum function name
+  const char* GetFileChecksumFuncName() const override;
+
+  void SetSeqnoTimeTableProperties(
+      const std::string& encoded_seqno_to_time_mapping,
+      uint64_t oldest_ancestor_time) override;
+
+ private:
+  bool ok() const { return status().ok(); }
+
+  // Transition state from buffered to unbuffered. See `Rep::State` API comment
+  // for details of the states.
+  // REQUIRES: `rep_->state == kBuffered`
+  void EnterUnbuffered();
+
+  // Call block's Finish() method and then
+  // - in buffered mode, buffer the uncompressed block contents.
+  // - in unbuffered mode, write the compressed block contents to file.
+  void WriteBlock(BlockBuilder* block, BlockHandle* handle,
+                  BlockType blocktype);
+
+  // Compress and write block content to the file.
+  void WriteBlock(const Slice& block_contents, BlockHandle* handle,
+                  BlockType block_type);
+  // Directly write data to the file.
+  void WriteMaybeCompressedBlock(
+      const Slice& block_contents, CompressionType, BlockHandle* handle,
+      BlockType block_type, const Slice* uncompressed_block_data = nullptr);
+
+  void SetupCacheKeyPrefix(const TableBuilderOptions& tbo);
+
+  template <typename TBlocklike>
+  Status InsertBlockInCache(const Slice& block_contents,
+                            const BlockHandle* handle, BlockType block_type);
+
+  Status InsertBlockInCacheHelper(const Slice& block_contents,
+                                  const BlockHandle* handle,
+                                  BlockType block_type);
+
+  Status InsertBlockInCompressedCache(const Slice& block_contents,
+                                      const CompressionType type,
+                                      const BlockHandle* handle);
+
+  void WriteFilterBlock(MetaIndexBuilder* meta_index_builder);
+  void WriteIndexBlock(MetaIndexBuilder* meta_index_builder,
+                       BlockHandle* index_block_handle);
+  void WritePropertiesBlock(MetaIndexBuilder* meta_index_builder);
+  void WriteCompressionDictBlock(MetaIndexBuilder* meta_index_builder);
+  void WriteRangeDelBlock(MetaIndexBuilder* meta_index_builder);
+  void WriteFooter(BlockHandle& metaindex_block_handle,
+                   BlockHandle& index_block_handle);
+
+  struct Rep;
+  class BlockBasedTablePropertiesCollectorFactory;
+  class BlockBasedTablePropertiesCollector;
+  Rep* rep_;
+
+  struct ParallelCompressionRep;
+
+  // Advanced operation: flush any buffered key/value pairs to file.
+  // Can be used to ensure that two adjacent entries never live in
+  // the same data block.  Most clients should not need to use this method.
+  // REQUIRES: Finish(), Abandon() have not been called
+  void Flush();
+
+  // Some compression libraries fail when the uncompressed size is bigger than
+  // int. If uncompressed size is bigger than kCompressionSizeLimit, don't
+  // compress it
+  const uint64_t kCompressionSizeLimit = std::numeric_limits<int>::max();
+
+  // Get blocks from mem-table walking thread, compress them and
+  // pass them to the write thread. Used in parallel compression mode only
+  void BGWorkCompression(const CompressionContext& compression_ctx,
+                         UncompressionContext* verify_ctx);
+
+  // Given uncompressed block content, try to compress it and return result and
+  // compression type
+  void CompressAndVerifyBlock(const Slice& uncompressed_block_data,
+                              bool is_data_block,
+                              const CompressionContext& compression_ctx,
+                              UncompressionContext* verify_ctx,
+                              std::string* compressed_output,
+                              Slice* result_block_contents,
+                              CompressionType* result_compression_type,
+                              Status* out_status);
+
+  // Get compressed blocks from BGWorkCompression and write them into SST
+  void BGWorkWriteMaybeCompressedBlock();
+
+  // Initialize parallel compression context and
+  // start BGWorkCompression and BGWorkWriteMaybeCompressedBlock threads
+  void StartParallelCompression();
+
+  // Stop BGWorkCompression and BGWorkWriteMaybeCompressedBlock threads
+  void StopParallelCompression();
+};
+
+Slice CompressBlock(const Slice& uncompressed_data, const CompressionInfo& info,
+                    CompressionType* type, uint32_t format_version,
+                    bool do_sample, std::string* compressed_output,
+                    std::string* sampled_output_fast,
+                    std::string* sampled_output_slow);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_factory.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_factory.cc
new file mode 100644
index 0000000000..653c222d5e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_factory.cc
@@ -0,0 +1,962 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/block_based/block_based_table_factory.h"
+
+#include <stdint.h>
+
+#include <cinttypes>
+#include <memory>
+#include <string>
+
+#include "cache/cache_entry_roles.h"
+#include "cache/cache_reservation_manager.h"
+#include "logging/logging.h"
+#include "options/options_helper.h"
+#include "port/port.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/flush_block_policy.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/table.h"
+#include "rocksdb/utilities/options_type.h"
+#include "table/block_based/block_based_table_builder.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "table/format.h"
+#include "util/mutexlock.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void TailPrefetchStats::RecordEffectiveSize(size_t len) {
+  MutexLock l(&mutex_);
+  if (num_records_ < kNumTracked) {
+    num_records_++;
+  }
+  records_[next_++] = len;
+  if (next_ == kNumTracked) {
+    next_ = 0;
+  }
+}
+
+size_t TailPrefetchStats::GetSuggestedPrefetchSize() {
+  std::vector<size_t> sorted;
+  {
+    MutexLock l(&mutex_);
+
+    if (num_records_ == 0) {
+      return 0;
+    }
+    sorted.assign(records_, records_ + num_records_);
+  }
+
+  // Of the historic size, we find the maximum one that satisifis the condtiion
+  // that if prefetching all, less than 1/8 will be wasted.
+  std::sort(sorted.begin(), sorted.end());
+
+  // Assuming we have 5 data points, and after sorting it looks like this:
+  //
+  //                                     +---+
+  //                             +---+   |   |
+  //                             |   |   |   |
+  //                             |   |   |   |
+  //                             |   |   |   |
+  //                             |   |   |   |
+  //                    +---+    |   |   |   |
+  //                    |   |    |   |   |   |
+  //           +---+    |   |    |   |   |   |
+  //           |   |    |   |    |   |   |   |
+  //  +---+    |   |    |   |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   |
+  //  +---+    +---+    +---+    +---+   +---+
+  //
+  // and we use every of the value as a candidate, and estimate how much we
+  // wasted, compared to read. For example, when we use the 3rd record
+  // as candiate. This area is what we read:
+  //                                     +---+
+  //                             +---+   |   |
+  //                             |   |   |   |
+  //                             |   |   |   |
+  //                             |   |   |   |
+  //                             |   |   |   |
+  //  ***  ***  ***  ***+ ***  ***  *** *** **
+  //  *                 |   |    |   |   |   |
+  //           +---+    |   |    |   |   |   *
+  //  *        |   |    |   |    |   |   |   |
+  //  +---+    |   |    |   |    |   |   |   *
+  //  *   |    |   |    | X |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   *
+  //  *   |    |   |    |   |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   *
+  //  *   |    |   |    |   |    |   |   |   |
+  //  *** *** ***-***  ***--*** ***--*** +****
+  // which is (size of the record) X (number of records).
+  //
+  // While wasted is this area:
+  //                                     +---+
+  //                             +---+   |   |
+  //                             |   |   |   |
+  //                             |   |   |   |
+  //                             |   |   |   |
+  //                             |   |   |   |
+  //  ***  ***  ***  ****---+    |   |   |   |
+  //  *                 *   |    |   |   |   |
+  //  *        *-***  ***   |    |   |   |   |
+  //  *        *   |    |   |    |   |   |   |
+  //  *--**  ***   |    |   |    |   |   |   |
+  //  |   |    |   |    | X |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   |
+  //  +---+    +---+    +---+    +---+   +---+
+  //
+  // Which can be calculated iteratively.
+  // The difference between wasted using 4st and 3rd record, will
+  // be following area:
+  //                                     +---+
+  //  +--+  +-+   ++  +-+  +-+   +---+   |   |
+  //  + xxxxxxxxxxxxxxxxxxxxxxxx |   |   |   |
+  //    xxxxxxxxxxxxxxxxxxxxxxxx |   |   |   |
+  //  + xxxxxxxxxxxxxxxxxxxxxxxx |   |   |   |
+  //  | xxxxxxxxxxxxxxxxxxxxxxxx |   |   |   |
+  //  +-+ +-+  +-+  ++  +---+ +--+   |   |   |
+  //  |                 |   |    |   |   |   |
+  //           +---+ ++ |   |    |   |   |   |
+  //  |        |   |    |   |    | X |   |   |
+  //  +---+ ++ |   |    |   |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   |
+  //  |   |    |   |    |   |    |   |   |   |
+  //  +---+    +---+    +---+    +---+   +---+
+  //
+  // which will be the size difference between 4st and 3rd record,
+  // times 3, which is number of records before the 4st.
+  // Here we assume that all data within the prefetch range will be useful. In
+  // reality, it may not be the case when a partial block is inside the range,
+  // or there are data in the middle that is not read. We ignore those cases
+  // for simplicity.
+  assert(!sorted.empty());
+  size_t prev_size = sorted[0];
+  size_t max_qualified_size = sorted[0];
+  size_t wasted = 0;
+  for (size_t i = 1; i < sorted.size(); i++) {
+    size_t read = sorted[i] * sorted.size();
+    wasted += (sorted[i] - prev_size) * i;
+    if (wasted <= read / 8) {
+      max_qualified_size = sorted[i];
+    }
+    prev_size = sorted[i];
+  }
+  const size_t kMaxPrefetchSize = 512 * 1024;  // Never exceed 512KB
+  return std::min(kMaxPrefetchSize, max_qualified_size);
+}
+
+
+const std::string kOptNameMetadataCacheOpts = "metadata_cache_options";
+
+static std::unordered_map<std::string, PinningTier>
+    pinning_tier_type_string_map = {
+        {"kFallback", PinningTier::kFallback},
+        {"kNone", PinningTier::kNone},
+        {"kFlushedAndSimilar", PinningTier::kFlushedAndSimilar},
+        {"kAll", PinningTier::kAll}};
+
+static std::unordered_map<std::string, BlockBasedTableOptions::IndexType>
+    block_base_table_index_type_string_map = {
+        {"kBinarySearch", BlockBasedTableOptions::IndexType::kBinarySearch},
+        {"kHashSearch", BlockBasedTableOptions::IndexType::kHashSearch},
+        {"kTwoLevelIndexSearch",
+         BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch},
+        {"kBinarySearchWithFirstKey",
+         BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey}};
+
+static std::unordered_map<std::string,
+                          BlockBasedTableOptions::DataBlockIndexType>
+    block_base_table_data_block_index_type_string_map = {
+        {"kDataBlockBinarySearch",
+         BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch},
+        {"kDataBlockBinaryAndHash",
+         BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinaryAndHash}};
+
+static std::unordered_map<std::string,
+                          BlockBasedTableOptions::IndexShorteningMode>
+    block_base_table_index_shortening_mode_string_map = {
+        {"kNoShortening",
+         BlockBasedTableOptions::IndexShorteningMode::kNoShortening},
+        {"kShortenSeparators",
+         BlockBasedTableOptions::IndexShorteningMode::kShortenSeparators},
+        {"kShortenSeparatorsAndSuccessor",
+         BlockBasedTableOptions::IndexShorteningMode::
+             kShortenSeparatorsAndSuccessor}};
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    metadata_cache_options_type_info = {
+        {"top_level_index_pinning",
+         OptionTypeInfo::Enum<PinningTier>(
+             offsetof(struct MetadataCacheOptions, top_level_index_pinning),
+             &pinning_tier_type_string_map)},
+        {"partition_pinning",
+         OptionTypeInfo::Enum<PinningTier>(
+             offsetof(struct MetadataCacheOptions, partition_pinning),
+             &pinning_tier_type_string_map)},
+        {"unpartitioned_pinning",
+         OptionTypeInfo::Enum<PinningTier>(
+             offsetof(struct MetadataCacheOptions, unpartitioned_pinning),
+             &pinning_tier_type_string_map)}};
+
+static std::unordered_map<std::string,
+                          BlockBasedTableOptions::PrepopulateBlockCache>
+    block_base_table_prepopulate_block_cache_string_map = {
+        {"kDisable", BlockBasedTableOptions::PrepopulateBlockCache::kDisable},
+        {"kFlushOnly",
+         BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly}};
+
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    block_based_table_type_info = {
+        /* currently not supported
+          std::shared_ptr<Cache> block_cache = nullptr;
+          CacheUsageOptions cache_usage_options;
+         */
+        {"flush_block_policy_factory",
+         OptionTypeInfo::AsCustomSharedPtr<FlushBlockPolicyFactory>(
+             offsetof(struct BlockBasedTableOptions,
+                      flush_block_policy_factory),
+             OptionVerificationType::kByName, OptionTypeFlags::kCompareNever)},
+        {"cache_index_and_filter_blocks",
+         {offsetof(struct BlockBasedTableOptions,
+                   cache_index_and_filter_blocks),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"cache_index_and_filter_blocks_with_high_priority",
+         {offsetof(struct BlockBasedTableOptions,
+                   cache_index_and_filter_blocks_with_high_priority),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"pin_l0_filter_and_index_blocks_in_cache",
+         {offsetof(struct BlockBasedTableOptions,
+                   pin_l0_filter_and_index_blocks_in_cache),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"index_type", OptionTypeInfo::Enum<BlockBasedTableOptions::IndexType>(
+                           offsetof(struct BlockBasedTableOptions, index_type),
+                           &block_base_table_index_type_string_map)},
+        {"hash_index_allow_collision",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"data_block_index_type",
+         OptionTypeInfo::Enum<BlockBasedTableOptions::DataBlockIndexType>(
+             offsetof(struct BlockBasedTableOptions, data_block_index_type),
+             &block_base_table_data_block_index_type_string_map)},
+        {"index_shortening",
+         OptionTypeInfo::Enum<BlockBasedTableOptions::IndexShorteningMode>(
+             offsetof(struct BlockBasedTableOptions, index_shortening),
+             &block_base_table_index_shortening_mode_string_map)},
+        {"data_block_hash_table_util_ratio",
+         {offsetof(struct BlockBasedTableOptions,
+                   data_block_hash_table_util_ratio),
+          OptionType::kDouble, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"checksum",
+         {offsetof(struct BlockBasedTableOptions, checksum),
+          OptionType::kChecksumType, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"no_block_cache",
+         {offsetof(struct BlockBasedTableOptions, no_block_cache),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"block_size",
+         {offsetof(struct BlockBasedTableOptions, block_size),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"block_size_deviation",
+         {offsetof(struct BlockBasedTableOptions, block_size_deviation),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"block_restart_interval",
+         {offsetof(struct BlockBasedTableOptions, block_restart_interval),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"index_block_restart_interval",
+         {offsetof(struct BlockBasedTableOptions, index_block_restart_interval),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"index_per_partition",
+         {0, OptionType::kUInt64T, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"metadata_block_size",
+         {offsetof(struct BlockBasedTableOptions, metadata_block_size),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"partition_filters",
+         {offsetof(struct BlockBasedTableOptions, partition_filters),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"optimize_filters_for_memory",
+         {offsetof(struct BlockBasedTableOptions, optimize_filters_for_memory),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"filter_policy",
+         OptionTypeInfo::AsCustomSharedPtr<const FilterPolicy>(
+             offsetof(struct BlockBasedTableOptions, filter_policy),
+             OptionVerificationType::kByNameAllowFromNull,
+             OptionTypeFlags::kNone)},
+        {"whole_key_filtering",
+         {offsetof(struct BlockBasedTableOptions, whole_key_filtering),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"detect_filter_construct_corruption",
+         {offsetof(struct BlockBasedTableOptions,
+                   detect_filter_construct_corruption),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"reserve_table_builder_memory",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"reserve_table_reader_memory",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"skip_table_builder_flush",
+         {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"format_version",
+         {offsetof(struct BlockBasedTableOptions, format_version),
+          OptionType::kUInt32T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"verify_compression",
+         {offsetof(struct BlockBasedTableOptions, verify_compression),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"read_amp_bytes_per_bit",
+         {offsetof(struct BlockBasedTableOptions, read_amp_bytes_per_bit),
+          OptionType::kUInt32T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone,
+          [](const ConfigOptions& /*opts*/, const std::string& /*name*/,
+             const std::string& value, void* addr) {
+            // A workaround to fix a bug in 6.10, 6.11, 6.12, 6.13
+            // and 6.14. The bug will write out 8 bytes to OPTIONS file from the
+            // starting address of BlockBasedTableOptions.read_amp_bytes_per_bit
+            // which is actually a uint32. Consequently, the value of
+            // read_amp_bytes_per_bit written in the OPTIONS file is wrong.
+            // From 6.15, RocksDB will try to parse the read_amp_bytes_per_bit
+            // from OPTIONS file as a uint32. To be able to load OPTIONS file
+            // generated by affected releases before the fix, we need to
+            // manually parse read_amp_bytes_per_bit with this special hack.
+            uint64_t read_amp_bytes_per_bit = ParseUint64(value);
+            *(static_cast<uint32_t*>(addr)) =
+                static_cast<uint32_t>(read_amp_bytes_per_bit);
+            return Status::OK();
+          }}},
+        {"enable_index_compression",
+         {offsetof(struct BlockBasedTableOptions, enable_index_compression),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"block_align",
+         {offsetof(struct BlockBasedTableOptions, block_align),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"pin_top_level_index_and_filter",
+         {offsetof(struct BlockBasedTableOptions,
+                   pin_top_level_index_and_filter),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {kOptNameMetadataCacheOpts,
+         OptionTypeInfo::Struct(
+             kOptNameMetadataCacheOpts, &metadata_cache_options_type_info,
+             offsetof(struct BlockBasedTableOptions, metadata_cache_options),
+             OptionVerificationType::kNormal, OptionTypeFlags::kNone)},
+        {"block_cache",
+         {offsetof(struct BlockBasedTableOptions, block_cache),
+          OptionType::kUnknown, OptionVerificationType::kNormal,
+          (OptionTypeFlags::kCompareNever | OptionTypeFlags::kDontSerialize),
+          // Parses the input value as a Cache
+          [](const ConfigOptions& opts, const std::string&,
+             const std::string& value, void* addr) {
+            auto* cache = static_cast<std::shared_ptr<Cache>*>(addr);
+            return Cache::CreateFromString(opts, value, cache);
+          }}},
+        {"block_cache_compressed",
+         {0, OptionType::kUnknown, OptionVerificationType::kDeprecated,
+          OptionTypeFlags::kNone}},
+        {"max_auto_readahead_size",
+         {offsetof(struct BlockBasedTableOptions, max_auto_readahead_size),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"prepopulate_block_cache",
+         OptionTypeInfo::Enum<BlockBasedTableOptions::PrepopulateBlockCache>(
+             offsetof(struct BlockBasedTableOptions, prepopulate_block_cache),
+             &block_base_table_prepopulate_block_cache_string_map,
+             OptionTypeFlags::kMutable)},
+        {"initial_auto_readahead_size",
+         {offsetof(struct BlockBasedTableOptions, initial_auto_readahead_size),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"num_file_reads_for_auto_readahead",
+         {offsetof(struct BlockBasedTableOptions,
+                   num_file_reads_for_auto_readahead),
+          OptionType::kUInt64T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+
+};
+
+// TODO(myabandeh): We should return an error instead of silently changing the
+// options
+BlockBasedTableFactory::BlockBasedTableFactory(
+    const BlockBasedTableOptions& _table_options)
+    : table_options_(_table_options) {
+  InitializeOptions();
+  RegisterOptions(&table_options_, &block_based_table_type_info);
+
+  const auto table_reader_charged =
+      table_options_.cache_usage_options.options_overrides
+          .at(CacheEntryRole::kBlockBasedTableReader)
+          .charged;
+  if (table_options_.block_cache &&
+      table_reader_charged == CacheEntryRoleOptions::Decision::kEnabled) {
+    table_reader_cache_res_mgr_.reset(new ConcurrentCacheReservationManager(
+        std::make_shared<CacheReservationManagerImpl<
+            CacheEntryRole::kBlockBasedTableReader>>(
+            table_options_.block_cache)));
+  }
+}
+
+void BlockBasedTableFactory::InitializeOptions() {
+  if (table_options_.flush_block_policy_factory == nullptr) {
+    table_options_.flush_block_policy_factory.reset(
+        new FlushBlockBySizePolicyFactory());
+  }
+  if (table_options_.no_block_cache) {
+    table_options_.block_cache.reset();
+  } else if (table_options_.block_cache == nullptr) {
+    LRUCacheOptions co;
+    // 32MB, the recommended minimum size for 64 shards, to reduce contention
+    co.capacity = 32 << 20;
+    table_options_.block_cache = NewLRUCache(co);
+  }
+  if (table_options_.block_size_deviation < 0 ||
+      table_options_.block_size_deviation > 100) {
+    table_options_.block_size_deviation = 0;
+  }
+  if (table_options_.block_restart_interval < 1) {
+    table_options_.block_restart_interval = 1;
+  }
+  if (table_options_.index_block_restart_interval < 1) {
+    table_options_.index_block_restart_interval = 1;
+  }
+  if (table_options_.index_type == BlockBasedTableOptions::kHashSearch &&
+      table_options_.index_block_restart_interval != 1) {
+    // Currently kHashSearch is incompatible with
+    // index_block_restart_interval > 1
+    table_options_.index_block_restart_interval = 1;
+  }
+  if (table_options_.partition_filters &&
+      table_options_.index_type !=
+          BlockBasedTableOptions::kTwoLevelIndexSearch) {
+    // We do not support partitioned filters without partitioning indexes
+    table_options_.partition_filters = false;
+  }
+  auto& options_overrides =
+      table_options_.cache_usage_options.options_overrides;
+  const auto options = table_options_.cache_usage_options.options;
+  for (std::uint32_t i = 0; i < kNumCacheEntryRoles; ++i) {
+    CacheEntryRole role = static_cast<CacheEntryRole>(i);
+    auto options_overrides_iter = options_overrides.find(role);
+    if (options_overrides_iter == options_overrides.end()) {
+      options_overrides.insert({role, options});
+    } else if (options_overrides_iter->second.charged ==
+               CacheEntryRoleOptions::Decision::kFallback) {
+      options_overrides_iter->second.charged = options.charged;
+    }
+  }
+}
+
+Status BlockBasedTableFactory::PrepareOptions(const ConfigOptions& opts) {
+  InitializeOptions();
+  return TableFactory::PrepareOptions(opts);
+}
+
+namespace {
+// Different cache kinds use the same keys for physically different values, so
+// they must not share an underlying key space with each other.
+Status CheckCacheOptionCompatibility(const BlockBasedTableOptions& bbto) {
+  int cache_count = (bbto.block_cache != nullptr) +
+                    (bbto.persistent_cache != nullptr);
+  if (cache_count <= 1) {
+    // Nothing to share / overlap
+    return Status::OK();
+  }
+
+  // More complex test of shared key space, in case the instances are wrappers
+  // for some shared underlying cache.
+  static Cache::CacheItemHelper kHelper{CacheEntryRole::kMisc};
+  CacheKey sentinel_key = CacheKey::CreateUniqueForProcessLifetime();
+  struct SentinelValue {
+    explicit SentinelValue(char _c) : c(_c) {}
+    char c;
+  };
+  static SentinelValue kRegularBlockCacheMarker{'b'};
+  static char kPersistentCacheMarker{'p'};
+  if (bbto.block_cache) {
+    bbto.block_cache
+        ->Insert(sentinel_key.AsSlice(), &kRegularBlockCacheMarker, &kHelper, 1)
+        .PermitUncheckedError();
+  }
+  if (bbto.persistent_cache) {
+    // Note: persistent cache copies the data, not keeping the pointer
+    bbto.persistent_cache
+        ->Insert(sentinel_key.AsSlice(), &kPersistentCacheMarker, 1)
+        .PermitUncheckedError();
+  }
+  // If we get something different from what we inserted, that indicates
+  // dangerously overlapping key spaces.
+  if (bbto.block_cache) {
+    auto handle = bbto.block_cache->Lookup(sentinel_key.AsSlice());
+    if (handle) {
+      auto v = static_cast<SentinelValue*>(bbto.block_cache->Value(handle));
+      char c = v->c;
+      bbto.block_cache->Release(handle);
+      if (c == kPersistentCacheMarker) {
+        return Status::InvalidArgument(
+            "block_cache and persistent_cache share the same key space, "
+            "which is not supported");
+      } else if (v != &kRegularBlockCacheMarker) {
+        return Status::Corruption("Unexpected mutation to block_cache");
+      }
+    }
+  }
+
+  if (bbto.persistent_cache) {
+    std::unique_ptr<char[]> data;
+    size_t size = 0;
+    bbto.persistent_cache->Lookup(sentinel_key.AsSlice(), &data, &size)
+        .PermitUncheckedError();
+    if (data && size > 0) {
+      if (data[0] == kRegularBlockCacheMarker.c) {
+        return Status::InvalidArgument(
+            "persistent_cache and block_cache share the same key space, "
+            "which is not supported");
+      } else if (data[0] != kPersistentCacheMarker) {
+        return Status::Corruption("Unexpected mutation to persistent_cache");
+      }
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+Status BlockBasedTableFactory::NewTableReader(
+    const ReadOptions& ro, const TableReaderOptions& table_reader_options,
+    std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
+    std::unique_ptr<TableReader>* table_reader,
+    bool prefetch_index_and_filter_in_cache) const {
+  return BlockBasedTable::Open(
+      ro, table_reader_options.ioptions, table_reader_options.env_options,
+      table_options_, table_reader_options.internal_comparator, std::move(file),
+      file_size, table_reader_options.block_protection_bytes_per_key,
+      table_reader, table_reader_options.tail_size, table_reader_cache_res_mgr_,
+      table_reader_options.prefix_extractor, prefetch_index_and_filter_in_cache,
+      table_reader_options.skip_filters, table_reader_options.level,
+      table_reader_options.immortal, table_reader_options.largest_seqno,
+      table_reader_options.force_direct_prefetch, &tail_prefetch_stats_,
+      table_reader_options.block_cache_tracer,
+      table_reader_options.max_file_size_for_l0_meta_pin,
+      table_reader_options.cur_db_session_id, table_reader_options.cur_file_num,
+      table_reader_options.unique_id);
+}
+
+TableBuilder* BlockBasedTableFactory::NewTableBuilder(
+    const TableBuilderOptions& table_builder_options,
+    WritableFileWriter* file) const {
+  return new BlockBasedTableBuilder(table_options_, table_builder_options,
+                                    file);
+}
+
+Status BlockBasedTableFactory::ValidateOptions(
+    const DBOptions& db_opts, const ColumnFamilyOptions& cf_opts) const {
+  if (table_options_.index_type == BlockBasedTableOptions::kHashSearch &&
+      cf_opts.prefix_extractor == nullptr) {
+    return Status::InvalidArgument(
+        "Hash index is specified for block-based "
+        "table, but prefix_extractor is not given");
+  }
+  if (table_options_.cache_index_and_filter_blocks &&
+      table_options_.no_block_cache) {
+    return Status::InvalidArgument(
+        "Enable cache_index_and_filter_blocks, "
+        ", but block cache is disabled");
+  }
+  if (table_options_.pin_l0_filter_and_index_blocks_in_cache &&
+      table_options_.no_block_cache) {
+    return Status::InvalidArgument(
+        "Enable pin_l0_filter_and_index_blocks_in_cache, "
+        ", but block cache is disabled");
+  }
+  if (!IsSupportedFormatVersion(table_options_.format_version)) {
+    return Status::InvalidArgument(
+        "Unsupported BlockBasedTable format_version. Please check "
+        "include/rocksdb/table.h for more info");
+  }
+  if (table_options_.block_align && (cf_opts.compression != kNoCompression)) {
+    return Status::InvalidArgument(
+        "Enable block_align, but compression "
+        "enabled");
+  }
+  if (table_options_.block_align &&
+      (table_options_.block_size & (table_options_.block_size - 1))) {
+    return Status::InvalidArgument(
+        "Block alignment requested but block size is not a power of 2");
+  }
+  if (table_options_.block_size > std::numeric_limits<uint32_t>::max()) {
+    return Status::InvalidArgument(
+        "block size exceeds maximum number (4GiB) allowed");
+  }
+  if (table_options_.data_block_index_type ==
+          BlockBasedTableOptions::kDataBlockBinaryAndHash &&
+      table_options_.data_block_hash_table_util_ratio <= 0) {
+    return Status::InvalidArgument(
+        "data_block_hash_table_util_ratio should be greater than 0 when "
+        "data_block_index_type is set to kDataBlockBinaryAndHash");
+  }
+  if (db_opts.unordered_write && cf_opts.max_successive_merges > 0) {
+    // TODO(myabandeh): support it
+    return Status::InvalidArgument(
+        "max_successive_merges larger than 0 is currently inconsistent with "
+        "unordered_write");
+  }
+  const auto& options_overrides =
+      table_options_.cache_usage_options.options_overrides;
+  for (auto options_overrides_iter = options_overrides.cbegin();
+       options_overrides_iter != options_overrides.cend();
+       ++options_overrides_iter) {
+    const CacheEntryRole role = options_overrides_iter->first;
+    const CacheEntryRoleOptions options = options_overrides_iter->second;
+    static const std::set<CacheEntryRole> kMemoryChargingSupported = {
+        CacheEntryRole::kCompressionDictionaryBuildingBuffer,
+        CacheEntryRole::kFilterConstruction,
+        CacheEntryRole::kBlockBasedTableReader, CacheEntryRole::kFileMetadata,
+        CacheEntryRole::kBlobCache};
+    if (options.charged != CacheEntryRoleOptions::Decision::kFallback &&
+        kMemoryChargingSupported.count(role) == 0) {
+      return Status::NotSupported(
+          "Enable/Disable CacheEntryRoleOptions::charged"
+          " for CacheEntryRole " +
+          kCacheEntryRoleToCamelString[static_cast<uint32_t>(role)] +
+          " is not supported");
+    }
+    if (table_options_.no_block_cache &&
+        options.charged == CacheEntryRoleOptions::Decision::kEnabled) {
+      return Status::InvalidArgument(
+          "Enable CacheEntryRoleOptions::charged"
+          " for CacheEntryRole " +
+          kCacheEntryRoleToCamelString[static_cast<uint32_t>(role)] +
+          " but block cache is disabled");
+    }
+    if (role == CacheEntryRole::kBlobCache &&
+        options.charged == CacheEntryRoleOptions::Decision::kEnabled) {
+      if (cf_opts.blob_cache == nullptr) {
+        return Status::InvalidArgument(
+            "Enable CacheEntryRoleOptions::charged"
+            " for CacheEntryRole " +
+            kCacheEntryRoleToCamelString[static_cast<uint32_t>(role)] +
+            " but blob cache is not configured");
+      }
+      if (table_options_.no_block_cache) {
+        return Status::InvalidArgument(
+            "Enable CacheEntryRoleOptions::charged"
+            " for CacheEntryRole " +
+            kCacheEntryRoleToCamelString[static_cast<uint32_t>(role)] +
+            " but block cache is disabled");
+      }
+      if (table_options_.block_cache == cf_opts.blob_cache) {
+        return Status::InvalidArgument(
+            "Enable CacheEntryRoleOptions::charged"
+            " for CacheEntryRole " +
+            kCacheEntryRoleToCamelString[static_cast<uint32_t>(role)] +
+            " but blob cache is the same as block cache");
+      }
+      if (cf_opts.blob_cache->GetCapacity() >
+          table_options_.block_cache->GetCapacity()) {
+        return Status::InvalidArgument(
+            "Enable CacheEntryRoleOptions::charged"
+            " for CacheEntryRole " +
+            kCacheEntryRoleToCamelString[static_cast<uint32_t>(role)] +
+            " but blob cache capacity is larger than block cache capacity");
+      }
+    }
+  }
+  {
+    Status s = CheckCacheOptionCompatibility(table_options_);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  std::string garbage;
+  if (!SerializeEnum<ChecksumType>(checksum_type_string_map,
+                                   table_options_.checksum, &garbage)) {
+    return Status::InvalidArgument(
+        "Unrecognized ChecksumType for checksum: " +
+        std::to_string(static_cast<uint32_t>(table_options_.checksum)));
+  }
+  return TableFactory::ValidateOptions(db_opts, cf_opts);
+}
+
+std::string BlockBasedTableFactory::GetPrintableOptions() const {
+  std::string ret;
+  ret.reserve(20000);
+  const int kBufferSize = 200;
+  char buffer[kBufferSize];
+
+  snprintf(buffer, kBufferSize, "  flush_block_policy_factory: %s (%p)\n",
+           table_options_.flush_block_policy_factory->Name(),
+           static_cast<void*>(table_options_.flush_block_policy_factory.get()));
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  cache_index_and_filter_blocks: %d\n",
+           table_options_.cache_index_and_filter_blocks);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize,
+           "  cache_index_and_filter_blocks_with_high_priority: %d\n",
+           table_options_.cache_index_and_filter_blocks_with_high_priority);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize,
+           "  pin_l0_filter_and_index_blocks_in_cache: %d\n",
+           table_options_.pin_l0_filter_and_index_blocks_in_cache);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  pin_top_level_index_and_filter: %d\n",
+           table_options_.pin_top_level_index_and_filter);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  index_type: %d\n",
+           table_options_.index_type);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  data_block_index_type: %d\n",
+           table_options_.data_block_index_type);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  index_shortening: %d\n",
+           static_cast<int>(table_options_.index_shortening));
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  data_block_hash_table_util_ratio: %lf\n",
+           table_options_.data_block_hash_table_util_ratio);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  checksum: %d\n", table_options_.checksum);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  no_block_cache: %d\n",
+           table_options_.no_block_cache);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  block_cache: %p\n",
+           static_cast<void*>(table_options_.block_cache.get()));
+  ret.append(buffer);
+  if (table_options_.block_cache) {
+    const char* block_cache_name = table_options_.block_cache->Name();
+    if (block_cache_name != nullptr) {
+      snprintf(buffer, kBufferSize, "  block_cache_name: %s\n",
+               block_cache_name);
+      ret.append(buffer);
+    }
+    ret.append("  block_cache_options:\n");
+    ret.append(table_options_.block_cache->GetPrintableOptions());
+  }
+  snprintf(buffer, kBufferSize, "  persistent_cache: %p\n",
+           static_cast<void*>(table_options_.persistent_cache.get()));
+  ret.append(buffer);
+  if (table_options_.persistent_cache) {
+    snprintf(buffer, kBufferSize, "  persistent_cache_options:\n");
+    ret.append(buffer);
+    ret.append(table_options_.persistent_cache->GetPrintableOptions());
+  }
+  snprintf(buffer, kBufferSize, "  block_size: %" PRIu64 "\n",
+           table_options_.block_size);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  block_size_deviation: %d\n",
+           table_options_.block_size_deviation);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  block_restart_interval: %d\n",
+           table_options_.block_restart_interval);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  index_block_restart_interval: %d\n",
+           table_options_.index_block_restart_interval);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  metadata_block_size: %" PRIu64 "\n",
+           table_options_.metadata_block_size);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  partition_filters: %d\n",
+           table_options_.partition_filters);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  use_delta_encoding: %d\n",
+           table_options_.use_delta_encoding);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  filter_policy: %s\n",
+           table_options_.filter_policy == nullptr
+               ? "nullptr"
+               : table_options_.filter_policy->Name());
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  whole_key_filtering: %d\n",
+           table_options_.whole_key_filtering);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  verify_compression: %d\n",
+           table_options_.verify_compression);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  read_amp_bytes_per_bit: %d\n",
+           table_options_.read_amp_bytes_per_bit);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  format_version: %d\n",
+           table_options_.format_version);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  enable_index_compression: %d\n",
+           table_options_.enable_index_compression);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  block_align: %d\n",
+           table_options_.block_align);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize,
+           "  max_auto_readahead_size: %" ROCKSDB_PRIszt "\n",
+           table_options_.max_auto_readahead_size);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  prepopulate_block_cache: %d\n",
+           static_cast<int>(table_options_.prepopulate_block_cache));
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize,
+           "  initial_auto_readahead_size: %" ROCKSDB_PRIszt "\n",
+           table_options_.initial_auto_readahead_size);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize,
+           "  num_file_reads_for_auto_readahead: %" PRIu64 "\n",
+           table_options_.num_file_reads_for_auto_readahead);
+  ret.append(buffer);
+  return ret;
+}
+
+const void* BlockBasedTableFactory::GetOptionsPtr(
+    const std::string& name) const {
+  if (name == kBlockCacheOpts()) {
+    if (table_options_.no_block_cache) {
+      return nullptr;
+    } else {
+      return table_options_.block_cache.get();
+    }
+  } else {
+    return TableFactory::GetOptionsPtr(name);
+  }
+}
+
+// Take a default BlockBasedTableOptions "table_options" in addition to a
+// map "opts_map" of option name to option value to construct the new
+// BlockBasedTableOptions "new_table_options".
+//
+// Below are the instructions of how to config some non-primitive-typed
+// options in BlockBasedTableOptions:
+//
+// * filter_policy:
+//   We currently only support the following FilterPolicy in the convenience
+//   functions:
+//   - BloomFilter: use "bloomfilter:[bits_per_key]:[use_block_based_builder]"
+//     to specify BloomFilter.  The above string is equivalent to calling
+//     NewBloomFilterPolicy(bits_per_key, use_block_based_builder).
+//     [Example]:
+//     - Pass {"filter_policy", "bloomfilter:4:true"} in
+//       GetBlockBasedTableOptionsFromMap to use a BloomFilter with 4-bits
+//       per key and use_block_based_builder enabled.
+//
+// * block_cache / block_cache_compressed:
+//   We currently only support LRU cache in the GetOptions API.  The LRU
+//   cache can be set by directly specifying its size.
+//   [Example]:
+//   - Passing {"block_cache", "1M"} in GetBlockBasedTableOptionsFromMap is
+//     equivalent to setting block_cache using NewLRUCache(1024 * 1024).
+//
+// @param table_options the default options of the output "new_table_options".
+// @param opts_map an option name to value map for specifying how
+//     "new_table_options" should be set.
+// @param new_table_options the resulting options based on "table_options"
+//     with the change specified in "opts_map".
+// @param input_strings_escaped when set to true, each escaped characters
+//     prefixed by '\' in the values of the opts_map will be further converted
+//     back to the raw string before assigning to the associated options.
+// @param ignore_unknown_options when set to true, unknown options are ignored
+//     instead of resulting in an unknown-option error.
+// @return Status::OK() on success.  Otherwise, a non-ok status indicating
+//     error will be returned, and "new_table_options" will be set to
+//     "table_options".
+Status BlockBasedTableFactory::ParseOption(const ConfigOptions& config_options,
+                                           const OptionTypeInfo& opt_info,
+                                           const std::string& opt_name,
+                                           const std::string& opt_value,
+                                           void* opt_ptr) {
+  Status status = TableFactory::ParseOption(config_options, opt_info, opt_name,
+                                            opt_value, opt_ptr);
+  if (config_options.input_strings_escaped && !status.ok()) {  // Got an error
+    // !input_strings_escaped indicates the old API, where everything is
+    // parsable.
+    if (opt_info.IsByName()) {
+      status = Status::OK();
+    }
+  }
+  return status;
+}
+
+Status GetBlockBasedTableOptionsFromString(
+    const ConfigOptions& config_options,
+    const BlockBasedTableOptions& table_options, const std::string& opts_str,
+    BlockBasedTableOptions* new_table_options) {
+  std::unordered_map<std::string, std::string> opts_map;
+  Status s = StringToMap(opts_str, &opts_map);
+  if (!s.ok()) {
+    return s;
+  }
+  s = GetBlockBasedTableOptionsFromMap(config_options, table_options, opts_map,
+                                       new_table_options);
+  // Translate any errors (NotFound, NotSupported, to InvalidArgument
+  if (s.ok() || s.IsInvalidArgument()) {
+    return s;
+  } else {
+    return Status::InvalidArgument(s.getState());
+  }
+}
+
+Status GetBlockBasedTableOptionsFromMap(
+    const ConfigOptions& config_options,
+    const BlockBasedTableOptions& table_options,
+    const std::unordered_map<std::string, std::string>& opts_map,
+    BlockBasedTableOptions* new_table_options) {
+  assert(new_table_options);
+  BlockBasedTableFactory bbtf(table_options);
+  Status s = bbtf.ConfigureFromMap(config_options, opts_map);
+  if (s.ok()) {
+    *new_table_options = *(bbtf.GetOptions<BlockBasedTableOptions>());
+  } else {
+    *new_table_options = table_options;
+  }
+  return s;
+}
+
+TableFactory* NewBlockBasedTableFactory(
+    const BlockBasedTableOptions& _table_options) {
+  return new BlockBasedTableFactory(_table_options);
+}
+
+const std::string BlockBasedTablePropertyNames::kIndexType =
+    "rocksdb.block.based.table.index.type";
+const std::string BlockBasedTablePropertyNames::kWholeKeyFiltering =
+    "rocksdb.block.based.table.whole.key.filtering";
+const std::string BlockBasedTablePropertyNames::kPrefixFiltering =
+    "rocksdb.block.based.table.prefix.filtering";
+const std::string kHashIndexPrefixesBlock = "rocksdb.hashindex.prefixes";
+const std::string kHashIndexPrefixesMetadataBlock =
+    "rocksdb.hashindex.metadata";
+const std::string kPropTrue = "1";
+const std::string kPropFalse = "0";
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_factory.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_factory.h
new file mode 100644
index 0000000000..1f78769777
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_factory.h
@@ -0,0 +1,102 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+
+#include "cache/cache_reservation_manager.h"
+#include "port/port.h"
+#include "rocksdb/flush_block_policy.h"
+#include "rocksdb/table.h"
+
+namespace ROCKSDB_NAMESPACE {
+struct ColumnFamilyOptions;
+struct ConfigOptions;
+struct DBOptions;
+struct EnvOptions;
+
+class BlockBasedTableBuilder;
+class RandomAccessFileReader;
+class WritableFileWriter;
+
+// TODO: deprecate this class as it can be replaced with
+// `FileMetaData::tail_size`
+//
+// A class used to track actual bytes written from the tail in the recent SST
+// file opens, and provide a suggestion for following open.
+class TailPrefetchStats {
+ public:
+  void RecordEffectiveSize(size_t len);
+  // 0 indicates no information to determine.
+  size_t GetSuggestedPrefetchSize();
+
+ private:
+  const static size_t kNumTracked = 32;
+  size_t records_[kNumTracked];
+  port::Mutex mutex_;
+  size_t next_ = 0;
+  size_t num_records_ = 0;
+};
+
+class BlockBasedTableFactory : public TableFactory {
+ public:
+  explicit BlockBasedTableFactory(
+      const BlockBasedTableOptions& table_options = BlockBasedTableOptions());
+
+  ~BlockBasedTableFactory() {}
+
+  // Method to allow CheckedCast to work for this class
+  static const char* kClassName() { return kBlockBasedTableName(); }
+
+  const char* Name() const override { return kBlockBasedTableName(); }
+
+  using TableFactory::NewTableReader;
+  Status NewTableReader(
+      const ReadOptions& ro, const TableReaderOptions& table_reader_options,
+      std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
+      std::unique_ptr<TableReader>* table_reader,
+      bool prefetch_index_and_filter_in_cache = true) const override;
+
+  TableBuilder* NewTableBuilder(
+      const TableBuilderOptions& table_builder_options,
+      WritableFileWriter* file) const override;
+
+  // Valdates the specified DB Options.
+  Status ValidateOptions(const DBOptions& db_opts,
+                         const ColumnFamilyOptions& cf_opts) const override;
+  Status PrepareOptions(const ConfigOptions& opts) override;
+
+  std::string GetPrintableOptions() const override;
+
+  bool IsDeleteRangeSupported() const override { return true; }
+
+  TailPrefetchStats* tail_prefetch_stats() { return &tail_prefetch_stats_; }
+
+ protected:
+  const void* GetOptionsPtr(const std::string& name) const override;
+  Status ParseOption(const ConfigOptions& config_options,
+                     const OptionTypeInfo& opt_info,
+                     const std::string& opt_name, const std::string& opt_value,
+                     void* opt_ptr) override;
+  void InitializeOptions();
+
+ private:
+  BlockBasedTableOptions table_options_;
+  std::shared_ptr<CacheReservationManager> table_reader_cache_res_mgr_;
+  mutable TailPrefetchStats tail_prefetch_stats_;
+};
+
+extern const std::string kHashIndexPrefixesBlock;
+extern const std::string kHashIndexPrefixesMetadataBlock;
+extern const std::string kPropTrue;
+extern const std::string kPropFalse;
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_iterator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_iterator.cc
new file mode 100644
index 0000000000..57744a5877
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_iterator.cc
@@ -0,0 +1,500 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "table/block_based/block_based_table_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void BlockBasedTableIterator::SeekToFirst() { SeekImpl(nullptr, false); }
+
+void BlockBasedTableIterator::Seek(const Slice& target) {
+  SeekImpl(&target, true);
+}
+
+void BlockBasedTableIterator::SeekImpl(const Slice* target,
+                                       bool async_prefetch) {
+  bool is_first_pass = true;
+  if (async_read_in_progress_) {
+    AsyncInitDataBlock(false);
+    is_first_pass = false;
+  }
+
+  is_out_of_bound_ = false;
+  is_at_first_key_from_index_ = false;
+  seek_stat_state_ = kNone;
+  bool filter_checked = false;
+  if (target &&
+      !CheckPrefixMayMatch(*target, IterDirection::kForward, &filter_checked)) {
+    ResetDataIter();
+    RecordTick(table_->GetStatistics(), is_last_level_
+                                            ? LAST_LEVEL_SEEK_FILTERED
+                                            : NON_LAST_LEVEL_SEEK_FILTERED);
+    return;
+  }
+  if (filter_checked) {
+    seek_stat_state_ = kFilterUsed;
+    RecordTick(table_->GetStatistics(), is_last_level_
+                                            ? LAST_LEVEL_SEEK_FILTER_MATCH
+                                            : NON_LAST_LEVEL_SEEK_FILTER_MATCH);
+  }
+
+  bool need_seek_index = true;
+  if (block_iter_points_to_real_block_ && block_iter_.Valid()) {
+    // Reseek.
+    prev_block_offset_ = index_iter_->value().handle.offset();
+
+    if (target) {
+      // We can avoid an index seek if:
+      // 1. The new seek key is larger than the current key
+      // 2. The new seek key is within the upper bound of the block
+      // Since we don't necessarily know the internal key for either
+      // the current key or the upper bound, we check user keys and
+      // exclude the equality case. Considering internal keys can
+      // improve for the boundary cases, but it would complicate the
+      // code.
+      if (user_comparator_.Compare(ExtractUserKey(*target),
+                                   block_iter_.user_key()) > 0 &&
+          user_comparator_.Compare(ExtractUserKey(*target),
+                                   index_iter_->user_key()) < 0) {
+        need_seek_index = false;
+      }
+    }
+  }
+
+  if (need_seek_index) {
+    if (target) {
+      index_iter_->Seek(*target);
+    } else {
+      index_iter_->SeekToFirst();
+    }
+
+    if (!index_iter_->Valid()) {
+      ResetDataIter();
+      return;
+    }
+  }
+
+  IndexValue v = index_iter_->value();
+  const bool same_block = block_iter_points_to_real_block_ &&
+                          v.handle.offset() == prev_block_offset_;
+
+  if (!v.first_internal_key.empty() && !same_block &&
+      (!target || icomp_.Compare(*target, v.first_internal_key) <= 0) &&
+      allow_unprepared_value_) {
+    // Index contains the first key of the block, and it's >= target.
+    // We can defer reading the block.
+    is_at_first_key_from_index_ = true;
+    // ResetDataIter() will invalidate block_iter_. Thus, there is no need to
+    // call CheckDataBlockWithinUpperBound() to check for iterate_upper_bound
+    // as that will be done later when the data block is actually read.
+    ResetDataIter();
+  } else {
+    // Need to use the data block.
+    if (!same_block) {
+      if (read_options_.async_io && async_prefetch) {
+        if (is_first_pass) {
+          AsyncInitDataBlock(is_first_pass);
+        }
+        if (async_read_in_progress_) {
+          // Status::TryAgain indicates asynchronous request for retrieval of
+          // data blocks has been submitted. So it should return at this point
+          // and Seek should be called again to retrieve the requested block and
+          // execute the remaining code.
+          return;
+        }
+      } else {
+        InitDataBlock();
+      }
+    } else {
+      // When the user does a reseek, the iterate_upper_bound might have
+      // changed. CheckDataBlockWithinUpperBound() needs to be called
+      // explicitly if the reseek ends up in the same data block.
+      // If the reseek ends up in a different block, InitDataBlock() will do
+      // the iterator upper bound check.
+      CheckDataBlockWithinUpperBound();
+    }
+
+    if (target) {
+      block_iter_.Seek(*target);
+    } else {
+      block_iter_.SeekToFirst();
+    }
+    FindKeyForward();
+  }
+
+  CheckOutOfBound();
+
+  if (target) {
+    assert(!Valid() || icomp_.Compare(*target, key()) <= 0);
+  }
+}
+
+void BlockBasedTableIterator::SeekForPrev(const Slice& target) {
+  is_out_of_bound_ = false;
+  is_at_first_key_from_index_ = false;
+  seek_stat_state_ = kNone;
+  bool filter_checked = false;
+  // For now totally disable prefix seek in auto prefix mode because we don't
+  // have logic
+  if (!CheckPrefixMayMatch(target, IterDirection::kBackward, &filter_checked)) {
+    ResetDataIter();
+    RecordTick(table_->GetStatistics(), is_last_level_
+                                            ? LAST_LEVEL_SEEK_FILTERED
+                                            : NON_LAST_LEVEL_SEEK_FILTERED);
+    return;
+  }
+  if (filter_checked) {
+    seek_stat_state_ = kFilterUsed;
+    RecordTick(table_->GetStatistics(), is_last_level_
+                                            ? LAST_LEVEL_SEEK_FILTER_MATCH
+                                            : NON_LAST_LEVEL_SEEK_FILTER_MATCH);
+  }
+
+  SavePrevIndexValue();
+
+  // Call Seek() rather than SeekForPrev() in the index block, because the
+  // target data block will likely to contain the position for `target`, the
+  // same as Seek(), rather than than before.
+  // For example, if we have three data blocks, each containing two keys:
+  //   [2, 4]  [6, 8] [10, 12]
+  //  (the keys in the index block would be [4, 8, 12])
+  // and the user calls SeekForPrev(7), we need to go to the second block,
+  // just like if they call Seek(7).
+  // The only case where the block is difference is when they seek to a position
+  // in the boundary. For example, if they SeekForPrev(5), we should go to the
+  // first block, rather than the second. However, we don't have the information
+  // to distinguish the two unless we read the second block. In this case, we'll
+  // end up with reading two blocks.
+  index_iter_->Seek(target);
+
+  if (!index_iter_->Valid()) {
+    auto seek_status = index_iter_->status();
+    // Check for IO error
+    if (!seek_status.IsNotFound() && !seek_status.ok()) {
+      ResetDataIter();
+      return;
+    }
+
+    // With prefix index, Seek() returns NotFound if the prefix doesn't exist
+    if (seek_status.IsNotFound()) {
+      // Any key less than the target is fine for prefix seek
+      ResetDataIter();
+      return;
+    } else {
+      index_iter_->SeekToLast();
+    }
+    // Check for IO error
+    if (!index_iter_->Valid()) {
+      ResetDataIter();
+      return;
+    }
+  }
+
+  InitDataBlock();
+
+  block_iter_.SeekForPrev(target);
+
+  FindKeyBackward();
+  CheckDataBlockWithinUpperBound();
+  assert(!block_iter_.Valid() ||
+         icomp_.Compare(target, block_iter_.key()) >= 0);
+}
+
+void BlockBasedTableIterator::SeekToLast() {
+  is_out_of_bound_ = false;
+  is_at_first_key_from_index_ = false;
+  seek_stat_state_ = kNone;
+  SavePrevIndexValue();
+  index_iter_->SeekToLast();
+  if (!index_iter_->Valid()) {
+    ResetDataIter();
+    return;
+  }
+  InitDataBlock();
+  block_iter_.SeekToLast();
+  FindKeyBackward();
+  CheckDataBlockWithinUpperBound();
+}
+
+void BlockBasedTableIterator::Next() {
+  if (is_at_first_key_from_index_ && !MaterializeCurrentBlock()) {
+    return;
+  }
+  assert(block_iter_points_to_real_block_);
+  block_iter_.Next();
+  FindKeyForward();
+  CheckOutOfBound();
+}
+
+bool BlockBasedTableIterator::NextAndGetResult(IterateResult* result) {
+  Next();
+  bool is_valid = Valid();
+  if (is_valid) {
+    result->key = key();
+    result->bound_check_result = UpperBoundCheckResult();
+    result->value_prepared = !is_at_first_key_from_index_;
+  }
+  return is_valid;
+}
+
+void BlockBasedTableIterator::Prev() {
+  if (is_at_first_key_from_index_) {
+    is_at_first_key_from_index_ = false;
+
+    index_iter_->Prev();
+    if (!index_iter_->Valid()) {
+      return;
+    }
+
+    InitDataBlock();
+    block_iter_.SeekToLast();
+  } else {
+    assert(block_iter_points_to_real_block_);
+    block_iter_.Prev();
+  }
+
+  FindKeyBackward();
+}
+
+void BlockBasedTableIterator::InitDataBlock() {
+  BlockHandle data_block_handle = index_iter_->value().handle;
+  if (!block_iter_points_to_real_block_ ||
+      data_block_handle.offset() != prev_block_offset_ ||
+      // if previous attempt of reading the block missed cache, try again
+      block_iter_.status().IsIncomplete()) {
+    if (block_iter_points_to_real_block_) {
+      ResetDataIter();
+    }
+    auto* rep = table_->get_rep();
+
+    bool is_for_compaction =
+        lookup_context_.caller == TableReaderCaller::kCompaction;
+    // Prefetch additional data for range scans (iterators).
+    // Implicit auto readahead:
+    //   Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0.
+    // Explicit user requested readahead:
+    //   Enabled from the very first IO when ReadOptions.readahead_size is set.
+    block_prefetcher_.PrefetchIfNeeded(
+        rep, data_block_handle, read_options_.readahead_size, is_for_compaction,
+        /*no_sequential_checking=*/false, read_options_.rate_limiter_priority);
+    Status s;
+    table_->NewDataBlockIterator<DataBlockIter>(
+        read_options_, data_block_handle, &block_iter_, BlockType::kData,
+        /*get_context=*/nullptr, &lookup_context_,
+        block_prefetcher_.prefetch_buffer(),
+        /*for_compaction=*/is_for_compaction, /*async_read=*/false, s);
+    block_iter_points_to_real_block_ = true;
+    CheckDataBlockWithinUpperBound();
+    if (!is_for_compaction &&
+        (seek_stat_state_ & kDataBlockReadSinceLastSeek) == 0) {
+      RecordTick(table_->GetStatistics(), is_last_level_
+                                              ? LAST_LEVEL_SEEK_DATA
+                                              : NON_LAST_LEVEL_SEEK_DATA);
+      seek_stat_state_ = static_cast<SeekStatState>(
+          seek_stat_state_ | kDataBlockReadSinceLastSeek | kReportOnUseful);
+    }
+  }
+}
+
+void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) {
+  BlockHandle data_block_handle = index_iter_->value().handle;
+  bool is_for_compaction =
+      lookup_context_.caller == TableReaderCaller::kCompaction;
+  if (is_first_pass) {
+    if (!block_iter_points_to_real_block_ ||
+        data_block_handle.offset() != prev_block_offset_ ||
+        // if previous attempt of reading the block missed cache, try again
+        block_iter_.status().IsIncomplete()) {
+      if (block_iter_points_to_real_block_) {
+        ResetDataIter();
+      }
+      auto* rep = table_->get_rep();
+      // Prefetch additional data for range scans (iterators).
+      // Implicit auto readahead:
+      //   Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0.
+      // Explicit user requested readahead:
+      //   Enabled from the very first IO when ReadOptions.readahead_size is
+      //   set.
+      // In case of async_io with Implicit readahead, block_prefetcher_ will
+      // always the create the prefetch buffer by setting no_sequential_checking
+      // = true.
+      block_prefetcher_.PrefetchIfNeeded(
+          rep, data_block_handle, read_options_.readahead_size,
+          is_for_compaction, /*no_sequential_checking=*/read_options_.async_io,
+          read_options_.rate_limiter_priority);
+
+      Status s;
+      table_->NewDataBlockIterator<DataBlockIter>(
+          read_options_, data_block_handle, &block_iter_, BlockType::kData,
+          /*get_context=*/nullptr, &lookup_context_,
+          block_prefetcher_.prefetch_buffer(),
+          /*for_compaction=*/is_for_compaction, /*async_read=*/true, s);
+
+      if (s.IsTryAgain()) {
+        async_read_in_progress_ = true;
+        return;
+      }
+    }
+  } else {
+    // Second pass will call the Poll to get the data block which has been
+    // requested asynchronously.
+    Status s;
+    table_->NewDataBlockIterator<DataBlockIter>(
+        read_options_, data_block_handle, &block_iter_, BlockType::kData,
+        /*get_context=*/nullptr, &lookup_context_,
+        block_prefetcher_.prefetch_buffer(),
+        /*for_compaction=*/is_for_compaction, /*async_read=*/false, s);
+  }
+  block_iter_points_to_real_block_ = true;
+  CheckDataBlockWithinUpperBound();
+
+  if (!is_for_compaction &&
+      (seek_stat_state_ & kDataBlockReadSinceLastSeek) == 0) {
+    RecordTick(table_->GetStatistics(), is_last_level_
+                                            ? LAST_LEVEL_SEEK_DATA
+                                            : NON_LAST_LEVEL_SEEK_DATA);
+    seek_stat_state_ = static_cast<SeekStatState>(
+        seek_stat_state_ | kDataBlockReadSinceLastSeek | kReportOnUseful);
+  }
+  async_read_in_progress_ = false;
+}
+
+bool BlockBasedTableIterator::MaterializeCurrentBlock() {
+  assert(is_at_first_key_from_index_);
+  assert(!block_iter_points_to_real_block_);
+  assert(index_iter_->Valid());
+
+  is_at_first_key_from_index_ = false;
+  InitDataBlock();
+  assert(block_iter_points_to_real_block_);
+
+  if (!block_iter_.status().ok()) {
+    return false;
+  }
+
+  block_iter_.SeekToFirst();
+
+  if (!block_iter_.Valid() ||
+      icomp_.Compare(block_iter_.key(),
+                     index_iter_->value().first_internal_key) != 0) {
+    block_iter_.Invalidate(Status::Corruption(
+        "first key in index doesn't match first key in block"));
+    return false;
+  }
+
+  return true;
+}
+
+void BlockBasedTableIterator::FindKeyForward() {
+  // This method's code is kept short to make it likely to be inlined.
+
+  assert(!is_out_of_bound_);
+  assert(block_iter_points_to_real_block_);
+
+  if (!block_iter_.Valid()) {
+    // This is the only call site of FindBlockForward(), but it's extracted into
+    // a separate method to keep FindKeyForward() short and likely to be
+    // inlined. When transitioning to a different block, we call
+    // FindBlockForward(), which is much longer and is probably not inlined.
+    FindBlockForward();
+  } else {
+    // This is the fast path that avoids a function call.
+  }
+}
+
+void BlockBasedTableIterator::FindBlockForward() {
+  // TODO the while loop inherits from two-level-iterator. We don't know
+  // whether a block can be empty so it can be replaced by an "if".
+  do {
+    if (!block_iter_.status().ok()) {
+      return;
+    }
+    // Whether next data block is out of upper bound, if there is one.
+    const bool next_block_is_out_of_bound =
+        read_options_.iterate_upper_bound != nullptr &&
+        block_iter_points_to_real_block_ &&
+        block_upper_bound_check_ == BlockUpperBound::kUpperBoundInCurBlock;
+    assert(!next_block_is_out_of_bound ||
+           user_comparator_.CompareWithoutTimestamp(
+               *read_options_.iterate_upper_bound, /*a_has_ts=*/false,
+               index_iter_->user_key(), /*b_has_ts=*/true) <= 0);
+    ResetDataIter();
+    index_iter_->Next();
+    if (next_block_is_out_of_bound) {
+      // The next block is out of bound. No need to read it.
+      TEST_SYNC_POINT_CALLBACK("BlockBasedTableIterator:out_of_bound", nullptr);
+      // We need to make sure this is not the last data block before setting
+      // is_out_of_bound_, since the index key for the last data block can be
+      // larger than smallest key of the next file on the same level.
+      if (index_iter_->Valid()) {
+        is_out_of_bound_ = true;
+      }
+      return;
+    }
+
+    if (!index_iter_->Valid()) {
+      return;
+    }
+
+    IndexValue v = index_iter_->value();
+
+    if (!v.first_internal_key.empty() && allow_unprepared_value_) {
+      // Index contains the first key of the block. Defer reading the block.
+      is_at_first_key_from_index_ = true;
+      return;
+    }
+
+    InitDataBlock();
+    block_iter_.SeekToFirst();
+  } while (!block_iter_.Valid());
+}
+
+void BlockBasedTableIterator::FindKeyBackward() {
+  while (!block_iter_.Valid()) {
+    if (!block_iter_.status().ok()) {
+      return;
+    }
+
+    ResetDataIter();
+    index_iter_->Prev();
+
+    if (index_iter_->Valid()) {
+      InitDataBlock();
+      block_iter_.SeekToLast();
+    } else {
+      return;
+    }
+  }
+
+  // We could have check lower bound here too, but we opt not to do it for
+  // code simplicity.
+}
+
+void BlockBasedTableIterator::CheckOutOfBound() {
+  if (read_options_.iterate_upper_bound != nullptr &&
+      block_upper_bound_check_ != BlockUpperBound::kUpperBoundBeyondCurBlock &&
+      Valid()) {
+    is_out_of_bound_ =
+        user_comparator_.CompareWithoutTimestamp(
+            *read_options_.iterate_upper_bound, /*a_has_ts=*/false, user_key(),
+            /*b_has_ts=*/true) <= 0;
+  }
+}
+
+void BlockBasedTableIterator::CheckDataBlockWithinUpperBound() {
+  if (read_options_.iterate_upper_bound != nullptr &&
+      block_iter_points_to_real_block_) {
+    block_upper_bound_check_ = (user_comparator_.CompareWithoutTimestamp(
+                                    *read_options_.iterate_upper_bound,
+                                    /*a_has_ts=*/false, index_iter_->user_key(),
+                                    /*b_has_ts=*/true) > 0)
+                                   ? BlockUpperBound::kUpperBoundBeyondCurBlock
+                                   : BlockUpperBound::kUpperBoundInCurBlock;
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_iterator.h
new file mode 100644
index 0000000000..6ea53f3315
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_iterator.h
@@ -0,0 +1,310 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+#include "table/block_based/block_based_table_reader.h"
+#include "table/block_based/block_based_table_reader_impl.h"
+#include "table/block_based/block_prefetcher.h"
+#include "table/block_based/reader_common.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Iterates over the contents of BlockBasedTable.
+class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
+  // compaction_readahead_size: its value will only be used if for_compaction =
+  // true
+  // @param read_options Must outlive this iterator.
+ public:
+  BlockBasedTableIterator(
+      const BlockBasedTable* table, const ReadOptions& read_options,
+      const InternalKeyComparator& icomp,
+      std::unique_ptr<InternalIteratorBase<IndexValue>>&& index_iter,
+      bool check_filter, bool need_upper_bound_check,
+      const SliceTransform* prefix_extractor, TableReaderCaller caller,
+      size_t compaction_readahead_size = 0, bool allow_unprepared_value = false)
+      : index_iter_(std::move(index_iter)),
+        table_(table),
+        read_options_(read_options),
+        icomp_(icomp),
+        user_comparator_(icomp.user_comparator()),
+        pinned_iters_mgr_(nullptr),
+        prefix_extractor_(prefix_extractor),
+        lookup_context_(caller),
+        block_prefetcher_(
+            compaction_readahead_size,
+            table_->get_rep()->table_options.initial_auto_readahead_size),
+        allow_unprepared_value_(allow_unprepared_value),
+        block_iter_points_to_real_block_(false),
+        check_filter_(check_filter),
+        need_upper_bound_check_(need_upper_bound_check),
+        async_read_in_progress_(false),
+        is_last_level_(table->IsLastLevel()) {}
+
+  ~BlockBasedTableIterator() {}
+
+  void Seek(const Slice& target) override;
+  void SeekForPrev(const Slice& target) override;
+  void SeekToFirst() override;
+  void SeekToLast() override;
+  void Next() final override;
+  bool NextAndGetResult(IterateResult* result) override;
+  void Prev() override;
+  bool Valid() const override {
+    return !is_out_of_bound_ &&
+           (is_at_first_key_from_index_ ||
+            (block_iter_points_to_real_block_ && block_iter_.Valid()));
+  }
+  Slice key() const override {
+    assert(Valid());
+    if (is_at_first_key_from_index_) {
+      return index_iter_->value().first_internal_key;
+    } else {
+      return block_iter_.key();
+    }
+  }
+  Slice user_key() const override {
+    assert(Valid());
+    if (is_at_first_key_from_index_) {
+      return ExtractUserKey(index_iter_->value().first_internal_key);
+    } else {
+      return block_iter_.user_key();
+    }
+  }
+  bool PrepareValue() override {
+    assert(Valid());
+
+    if (!is_at_first_key_from_index_) {
+      return true;
+    }
+
+    return const_cast<BlockBasedTableIterator*>(this)
+        ->MaterializeCurrentBlock();
+  }
+  Slice value() const override {
+    // PrepareValue() must have been called.
+    assert(!is_at_first_key_from_index_);
+    assert(Valid());
+
+    if (seek_stat_state_ & kReportOnUseful) {
+      bool filter_used = (seek_stat_state_ & kFilterUsed) != 0;
+      RecordTick(
+          table_->GetStatistics(),
+          filter_used
+              ? (is_last_level_ ? LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH
+                                : NON_LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH)
+              : (is_last_level_ ? LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER
+                                : NON_LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER));
+      seek_stat_state_ = kDataBlockReadSinceLastSeek;
+    }
+
+    return block_iter_.value();
+  }
+  Status status() const override {
+    // Prefix index set status to NotFound when the prefix does not exist
+    if (!index_iter_->status().ok() && !index_iter_->status().IsNotFound()) {
+      return index_iter_->status();
+    } else if (block_iter_points_to_real_block_) {
+      return block_iter_.status();
+    } else if (async_read_in_progress_) {
+      return Status::TryAgain();
+    } else {
+      return Status::OK();
+    }
+  }
+
+  inline IterBoundCheck UpperBoundCheckResult() override {
+    if (is_out_of_bound_) {
+      return IterBoundCheck::kOutOfBound;
+    } else if (block_upper_bound_check_ ==
+               BlockUpperBound::kUpperBoundBeyondCurBlock) {
+      assert(!is_out_of_bound_);
+      return IterBoundCheck::kInbound;
+    } else {
+      return IterBoundCheck::kUnknown;
+    }
+  }
+
+  void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
+    pinned_iters_mgr_ = pinned_iters_mgr;
+  }
+  bool IsKeyPinned() const override {
+    // Our key comes either from block_iter_'s current key
+    // or index_iter_'s current *value*.
+    return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
+           ((is_at_first_key_from_index_ && index_iter_->IsValuePinned()) ||
+            (block_iter_points_to_real_block_ && block_iter_.IsKeyPinned()));
+  }
+  bool IsValuePinned() const override {
+    assert(!is_at_first_key_from_index_);
+    assert(Valid());
+
+    // BlockIter::IsValuePinned() is always true. No need to check
+    return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
+           block_iter_points_to_real_block_;
+  }
+
+  void ResetDataIter() {
+    if (block_iter_points_to_real_block_) {
+      if (pinned_iters_mgr_ != nullptr && pinned_iters_mgr_->PinningEnabled()) {
+        block_iter_.DelegateCleanupsTo(pinned_iters_mgr_);
+      }
+      block_iter_.Invalidate(Status::OK());
+      block_iter_points_to_real_block_ = false;
+    }
+    block_upper_bound_check_ = BlockUpperBound::kUnknown;
+  }
+
+  void SavePrevIndexValue() {
+    if (block_iter_points_to_real_block_) {
+      // Reseek. If they end up with the same data block, we shouldn't re-fetch
+      // the same data block.
+      prev_block_offset_ = index_iter_->value().handle.offset();
+    }
+  }
+
+  void GetReadaheadState(ReadaheadFileInfo* readahead_file_info) override {
+    if (block_prefetcher_.prefetch_buffer() != nullptr &&
+        read_options_.adaptive_readahead) {
+      block_prefetcher_.prefetch_buffer()->GetReadaheadState(
+          &(readahead_file_info->data_block_readahead_info));
+      if (index_iter_) {
+        index_iter_->GetReadaheadState(readahead_file_info);
+      }
+    }
+  }
+
+  void SetReadaheadState(ReadaheadFileInfo* readahead_file_info) override {
+    if (read_options_.adaptive_readahead) {
+      block_prefetcher_.SetReadaheadState(
+          &(readahead_file_info->data_block_readahead_info));
+      if (index_iter_) {
+        index_iter_->SetReadaheadState(readahead_file_info);
+      }
+    }
+  }
+
+  std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter_;
+
+ private:
+  enum class IterDirection {
+    kForward,
+    kBackward,
+  };
+  // This enum indicates whether the upper bound falls into current block
+  // or beyond.
+  //   +-------------+
+  //   |  cur block  |       <-- (1)
+  //   +-------------+
+  //                         <-- (2)
+  //  --- <boundary key> ---
+  //                         <-- (3)
+  //   +-------------+
+  //   |  next block |       <-- (4)
+  //        ......
+  //
+  // When the block is smaller than <boundary key>, kUpperBoundInCurBlock
+  // is the value to use. The examples are (1) or (2) in the graph. It means
+  // all keys in the next block or beyond will be out of bound. Keys within
+  // the current block may or may not be out of bound.
+  // When the block is larger or equal to <boundary key>,
+  // kUpperBoundBeyondCurBlock is to be used. The examples are (3) and (4)
+  // in the graph. It means that all keys in the current block is within the
+  // upper bound and keys in the next block may or may not be within the uppder
+  // bound.
+  // If the boundary key hasn't been checked against the upper bound,
+  // kUnknown can be used.
+  enum class BlockUpperBound : uint8_t {
+    kUpperBoundInCurBlock,
+    kUpperBoundBeyondCurBlock,
+    kUnknown,
+  };
+
+  // State bits for collecting stats on seeks and whether they returned useful
+  // results.
+  enum SeekStatState : uint8_t {
+    kNone = 0,
+    // Most recent seek checked prefix filter (or similar future feature)
+    kFilterUsed = 1 << 0,
+    // Already recorded that a data block was accessed since the last seek.
+    kDataBlockReadSinceLastSeek = 1 << 1,
+    // Have not yet recorded that a value() was accessed.
+    kReportOnUseful = 1 << 2,
+  };
+
+  const BlockBasedTable* table_;
+  const ReadOptions& read_options_;
+  const InternalKeyComparator& icomp_;
+  UserComparatorWrapper user_comparator_;
+  PinnedIteratorsManager* pinned_iters_mgr_;
+  DataBlockIter block_iter_;
+  const SliceTransform* prefix_extractor_;
+  uint64_t prev_block_offset_ = std::numeric_limits<uint64_t>::max();
+  BlockCacheLookupContext lookup_context_;
+
+  BlockPrefetcher block_prefetcher_;
+
+  const bool allow_unprepared_value_;
+  // True if block_iter_ is initialized and points to the same block
+  // as index iterator.
+  bool block_iter_points_to_real_block_;
+  // See InternalIteratorBase::IsOutOfBound().
+  bool is_out_of_bound_ = false;
+  // How current data block's boundary key with the next block is compared with
+  // iterate upper bound.
+  BlockUpperBound block_upper_bound_check_ = BlockUpperBound::kUnknown;
+  // True if we're standing at the first key of a block, and we haven't loaded
+  // that block yet. A call to PrepareValue() will trigger loading the block.
+  bool is_at_first_key_from_index_ = false;
+  bool check_filter_;
+  // TODO(Zhongyi): pick a better name
+  bool need_upper_bound_check_;
+
+  bool async_read_in_progress_;
+
+  mutable SeekStatState seek_stat_state_ = SeekStatState::kNone;
+  bool is_last_level_;
+
+  // If `target` is null, seek to first.
+  void SeekImpl(const Slice* target, bool async_prefetch);
+
+  void InitDataBlock();
+  void AsyncInitDataBlock(bool is_first_pass);
+  bool MaterializeCurrentBlock();
+  void FindKeyForward();
+  void FindBlockForward();
+  void FindKeyBackward();
+  void CheckOutOfBound();
+
+  // Check if data block is fully within iterate_upper_bound.
+  //
+  // Note MyRocks may update iterate bounds between seek. To workaround it,
+  // we need to check and update data_block_within_upper_bound_ accordingly.
+  void CheckDataBlockWithinUpperBound();
+
+  bool CheckPrefixMayMatch(const Slice& ikey, IterDirection direction,
+                           bool* filter_checked) {
+    if (need_upper_bound_check_ && direction == IterDirection::kBackward) {
+      // Upper bound check isn't sufficient for backward direction to
+      // guarantee the same result as total order, so disable prefix
+      // check.
+      return true;
+    }
+    if (check_filter_ &&
+        !table_->PrefixRangeMayMatch(ikey, read_options_, prefix_extractor_,
+                                     need_upper_bound_check_, &lookup_context_,
+                                     filter_checked)) {
+      // TODO remember the iterator is invalidated because of prefix
+      // match. This can avoid the upper level file iterator to falsely
+      // believe the position is the end of the SST file and move to
+      // the first key of the next file.
+      ResetDataIter();
+      return false;
+    }
+    return true;
+  }
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader.cc
new file mode 100644
index 0000000000..a6897ad617
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader.cc
@@ -0,0 +1,3043 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "table/block_based/block_based_table_reader.h"
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "block_cache.h"
+#include "cache/cache_entry_roles.h"
+#include "cache/cache_key.h"
+#include "db/compaction/compaction_picker.h"
+#include "db/dbformat.h"
+#include "db/pinned_iterators_manager.h"
+#include "file/file_prefetch_buffer.h"
+#include "file/file_util.h"
+#include "file/random_access_file_reader.h"
+#include "logging/logging.h"
+#include "monitoring/perf_context_imp.h"
+#include "parsed_full_filter_block.h"
+#include "port/lang.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/snapshot.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/table.h"
+#include "rocksdb/table_properties.h"
+#include "rocksdb/trace_record.h"
+#include "table/block_based/binary_search_index_reader.h"
+#include "table/block_based/block.h"
+#include "table/block_based/block_based_table_factory.h"
+#include "table/block_based/block_based_table_iterator.h"
+#include "table/block_based/block_prefix_index.h"
+#include "table/block_based/block_type.h"
+#include "table/block_based/filter_block.h"
+#include "table/block_based/filter_policy_internal.h"
+#include "table/block_based/full_filter_block.h"
+#include "table/block_based/hash_index_reader.h"
+#include "table/block_based/partitioned_filter_block.h"
+#include "table/block_based/partitioned_index_reader.h"
+#include "table/block_fetcher.h"
+#include "table/format.h"
+#include "table/get_context.h"
+#include "table/internal_iterator.h"
+#include "table/meta_blocks.h"
+#include "table/multiget_context.h"
+#include "table/persistent_cache_helper.h"
+#include "table/persistent_cache_options.h"
+#include "table/sst_file_writer_collectors.h"
+#include "table/two_level_iterator.h"
+#include "test_util/sync_point.h"
+#include "util/coding.h"
+#include "util/crc32c.h"
+#include "util/stop_watch.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+
+CacheAllocationPtr CopyBufferToHeap(MemoryAllocator* allocator, Slice& buf) {
+  CacheAllocationPtr heap_buf;
+  heap_buf = AllocateBlock(buf.size(), allocator);
+  memcpy(heap_buf.get(), buf.data(), buf.size());
+  return heap_buf;
+}
+}  // namespace
+
+// Explicitly instantiate templates for each "blocklike" type we use (and
+// before implicit specialization).
+// This makes it possible to keep the template definitions in the .cc file.
+#define INSTANTIATE_RETRIEVE_BLOCK(T)                                         \
+  template Status BlockBasedTable::RetrieveBlock<T>(                          \
+      FilePrefetchBuffer * prefetch_buffer, const ReadOptions& ro,            \
+      const BlockHandle& handle, const UncompressionDict& uncompression_dict, \
+      CachableEntry<T>* out_parsed_block, GetContext* get_context,            \
+      BlockCacheLookupContext* lookup_context, bool for_compaction,           \
+      bool use_cache, bool async_read) const;
+
+INSTANTIATE_RETRIEVE_BLOCK(ParsedFullFilterBlock);
+INSTANTIATE_RETRIEVE_BLOCK(UncompressionDict);
+INSTANTIATE_RETRIEVE_BLOCK(Block_kData);
+INSTANTIATE_RETRIEVE_BLOCK(Block_kIndex);
+INSTANTIATE_RETRIEVE_BLOCK(Block_kFilterPartitionIndex);
+INSTANTIATE_RETRIEVE_BLOCK(Block_kRangeDeletion);
+INSTANTIATE_RETRIEVE_BLOCK(Block_kMetaIndex);
+
+}  // namespace ROCKSDB_NAMESPACE
+
+// Generate the regular and coroutine versions of some methods by
+// including block_based_table_reader_sync_and_async.h twice
+// Macros in the header will expand differently based on whether
+// WITH_COROUTINES or WITHOUT_COROUTINES is defined
+// clang-format off
+#define WITHOUT_COROUTINES
+#include "table/block_based/block_based_table_reader_sync_and_async.h"
+#undef WITHOUT_COROUTINES
+#define WITH_COROUTINES
+#include "table/block_based/block_based_table_reader_sync_and_async.h"
+#undef WITH_COROUTINES
+// clang-format on
+
+namespace ROCKSDB_NAMESPACE {
+
+extern const uint64_t kBlockBasedTableMagicNumber;
+extern const std::string kHashIndexPrefixesBlock;
+extern const std::string kHashIndexPrefixesMetadataBlock;
+
+BlockBasedTable::~BlockBasedTable() { delete rep_; }
+
+namespace {
+// Read the block identified by "handle" from "file".
+// The only relevant option is options.verify_checksums for now.
+// On failure return non-OK.
+// On success fill *result and return OK - caller owns *result
+// @param uncompression_dict Data for presetting the compression library's
+//    dictionary.
+template <typename TBlocklike>
+Status ReadAndParseBlockFromFile(
+    RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer,
+    const Footer& footer, const ReadOptions& options, const BlockHandle& handle,
+    std::unique_ptr<TBlocklike>* result, const ImmutableOptions& ioptions,
+    BlockCreateContext& create_context, bool maybe_compressed,
+    const UncompressionDict& uncompression_dict,
+    const PersistentCacheOptions& cache_options,
+    MemoryAllocator* memory_allocator, bool for_compaction, bool async_read) {
+  assert(result);
+
+  BlockContents contents;
+  BlockFetcher block_fetcher(
+      file, prefetch_buffer, footer, options, handle, &contents, ioptions,
+      /*do_uncompress*/ maybe_compressed, maybe_compressed,
+      TBlocklike::kBlockType, uncompression_dict, cache_options,
+      memory_allocator, nullptr, for_compaction);
+  Status s;
+  // If prefetch_buffer is not allocated, it will fallback to synchronous
+  // reading of block contents.
+  if (async_read && prefetch_buffer != nullptr) {
+    s = block_fetcher.ReadAsyncBlockContents();
+    if (!s.ok()) {
+      return s;
+    }
+  } else {
+    s = block_fetcher.ReadBlockContents();
+  }
+  if (s.ok()) {
+    create_context.Create(result, std::move(contents));
+  }
+  return s;
+}
+
+// For hash based index, return false if table_properties->prefix_extractor_name
+// and prefix_extractor both exist and match, otherwise true.
+inline bool PrefixExtractorChangedHelper(
+    const TableProperties* table_properties,
+    const SliceTransform* prefix_extractor) {
+  // BlockBasedTableOptions::kHashSearch requires prefix_extractor to be set.
+  // Turn off hash index in prefix_extractor is not set; if  prefix_extractor
+  // is set but prefix_extractor_block is not set, also disable hash index
+  if (prefix_extractor == nullptr || table_properties == nullptr ||
+      table_properties->prefix_extractor_name.empty()) {
+    return true;
+  }
+
+  // prefix_extractor and prefix_extractor_block are both non-empty
+  if (table_properties->prefix_extractor_name != prefix_extractor->AsString()) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+template <typename TBlocklike>
+uint32_t GetBlockNumRestarts(const TBlocklike& block) {
+  if constexpr (std::is_convertible_v<const TBlocklike&, const Block&>) {
+    const Block& b = block;
+    return b.NumRestarts();
+  } else {
+    return 0;
+  }
+}
+
+}  // namespace
+
+void BlockBasedTable::UpdateCacheHitMetrics(BlockType block_type,
+                                            GetContext* get_context,
+                                            size_t usage) const {
+  Statistics* const statistics = rep_->ioptions.stats;
+
+  PERF_COUNTER_ADD(block_cache_hit_count, 1);
+  PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 1,
+                            static_cast<uint32_t>(rep_->level));
+
+  if (get_context) {
+    ++get_context->get_context_stats_.num_cache_hit;
+    get_context->get_context_stats_.num_cache_bytes_read += usage;
+  } else {
+    RecordTick(statistics, BLOCK_CACHE_HIT);
+    RecordTick(statistics, BLOCK_CACHE_BYTES_READ, usage);
+  }
+
+  switch (block_type) {
+    case BlockType::kFilter:
+    case BlockType::kFilterPartitionIndex:
+      PERF_COUNTER_ADD(block_cache_filter_hit_count, 1);
+
+      if (get_context) {
+        ++get_context->get_context_stats_.num_cache_filter_hit;
+      } else {
+        RecordTick(statistics, BLOCK_CACHE_FILTER_HIT);
+      }
+      break;
+
+    case BlockType::kCompressionDictionary:
+      // TODO: introduce perf counter for compression dictionary hit count
+      if (get_context) {
+        ++get_context->get_context_stats_.num_cache_compression_dict_hit;
+      } else {
+        RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_HIT);
+      }
+      break;
+
+    case BlockType::kIndex:
+      PERF_COUNTER_ADD(block_cache_index_hit_count, 1);
+
+      if (get_context) {
+        ++get_context->get_context_stats_.num_cache_index_hit;
+      } else {
+        RecordTick(statistics, BLOCK_CACHE_INDEX_HIT);
+      }
+      break;
+
+    default:
+      // TODO: introduce dedicated tickers/statistics/counters
+      // for range tombstones
+      if (get_context) {
+        ++get_context->get_context_stats_.num_cache_data_hit;
+      } else {
+        RecordTick(statistics, BLOCK_CACHE_DATA_HIT);
+      }
+      break;
+  }
+}
+
+void BlockBasedTable::UpdateCacheMissMetrics(BlockType block_type,
+                                             GetContext* get_context) const {
+  Statistics* const statistics = rep_->ioptions.stats;
+
+  // TODO: introduce aggregate (not per-level) block cache miss count
+  PERF_COUNTER_BY_LEVEL_ADD(block_cache_miss_count, 1,
+                            static_cast<uint32_t>(rep_->level));
+
+  if (get_context) {
+    ++get_context->get_context_stats_.num_cache_miss;
+  } else {
+    RecordTick(statistics, BLOCK_CACHE_MISS);
+  }
+
+  // TODO: introduce perf counters for misses per block type
+  switch (block_type) {
+    case BlockType::kFilter:
+    case BlockType::kFilterPartitionIndex:
+      if (get_context) {
+        ++get_context->get_context_stats_.num_cache_filter_miss;
+      } else {
+        RecordTick(statistics, BLOCK_CACHE_FILTER_MISS);
+      }
+      break;
+
+    case BlockType::kCompressionDictionary:
+      if (get_context) {
+        ++get_context->get_context_stats_.num_cache_compression_dict_miss;
+      } else {
+        RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_MISS);
+      }
+      break;
+
+    case BlockType::kIndex:
+      if (get_context) {
+        ++get_context->get_context_stats_.num_cache_index_miss;
+      } else {
+        RecordTick(statistics, BLOCK_CACHE_INDEX_MISS);
+      }
+      break;
+
+    default:
+      // TODO: introduce dedicated tickers/statistics/counters
+      // for range tombstones
+      if (get_context) {
+        ++get_context->get_context_stats_.num_cache_data_miss;
+      } else {
+        RecordTick(statistics, BLOCK_CACHE_DATA_MISS);
+      }
+      break;
+  }
+}
+
+void BlockBasedTable::UpdateCacheInsertionMetrics(
+    BlockType block_type, GetContext* get_context, size_t usage, bool redundant,
+    Statistics* const statistics) {
+  // TODO: introduce perf counters for block cache insertions
+  if (get_context) {
+    ++get_context->get_context_stats_.num_cache_add;
+    if (redundant) {
+      ++get_context->get_context_stats_.num_cache_add_redundant;
+    }
+    get_context->get_context_stats_.num_cache_bytes_write += usage;
+  } else {
+    RecordTick(statistics, BLOCK_CACHE_ADD);
+    if (redundant) {
+      RecordTick(statistics, BLOCK_CACHE_ADD_REDUNDANT);
+    }
+    RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, usage);
+  }
+
+  switch (block_type) {
+    case BlockType::kFilter:
+    case BlockType::kFilterPartitionIndex:
+      if (get_context) {
+        ++get_context->get_context_stats_.num_cache_filter_add;
+        if (redundant) {
+          ++get_context->get_context_stats_.num_cache_filter_add_redundant;
+        }
+        get_context->get_context_stats_.num_cache_filter_bytes_insert += usage;
+      } else {
+        RecordTick(statistics, BLOCK_CACHE_FILTER_ADD);
+        if (redundant) {
+          RecordTick(statistics, BLOCK_CACHE_FILTER_ADD_REDUNDANT);
+        }
+        RecordTick(statistics, BLOCK_CACHE_FILTER_BYTES_INSERT, usage);
+      }
+      break;
+
+    case BlockType::kCompressionDictionary:
+      if (get_context) {
+        ++get_context->get_context_stats_.num_cache_compression_dict_add;
+        if (redundant) {
+          ++get_context->get_context_stats_
+                .num_cache_compression_dict_add_redundant;
+        }
+        get_context->get_context_stats_
+            .num_cache_compression_dict_bytes_insert += usage;
+      } else {
+        RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_ADD);
+        if (redundant) {
+          RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_ADD_REDUNDANT);
+        }
+        RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_BYTES_INSERT,
+                   usage);
+      }
+      break;
+
+    case BlockType::kIndex:
+      if (get_context) {
+        ++get_context->get_context_stats_.num_cache_index_add;
+        if (redundant) {
+          ++get_context->get_context_stats_.num_cache_index_add_redundant;
+        }
+        get_context->get_context_stats_.num_cache_index_bytes_insert += usage;
+      } else {
+        RecordTick(statistics, BLOCK_CACHE_INDEX_ADD);
+        if (redundant) {
+          RecordTick(statistics, BLOCK_CACHE_INDEX_ADD_REDUNDANT);
+        }
+        RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT, usage);
+      }
+      break;
+
+    default:
+      // TODO: introduce dedicated tickers/statistics/counters
+      // for range tombstones
+      if (get_context) {
+        ++get_context->get_context_stats_.num_cache_data_add;
+        if (redundant) {
+          ++get_context->get_context_stats_.num_cache_data_add_redundant;
+        }
+        get_context->get_context_stats_.num_cache_data_bytes_insert += usage;
+      } else {
+        RecordTick(statistics, BLOCK_CACHE_DATA_ADD);
+        if (redundant) {
+          RecordTick(statistics, BLOCK_CACHE_DATA_ADD_REDUNDANT);
+        }
+        RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT, usage);
+      }
+      break;
+  }
+}
+
+namespace {
+// Return True if table_properties has `user_prop_name` has a `true` value
+// or it doesn't contain this property (for backward compatible).
+bool IsFeatureSupported(const TableProperties& table_properties,
+                        const std::string& user_prop_name, Logger* info_log) {
+  auto& props = table_properties.user_collected_properties;
+  auto pos = props.find(user_prop_name);
+  // Older version doesn't have this value set. Skip this check.
+  if (pos != props.end()) {
+    if (pos->second == kPropFalse) {
+      return false;
+    } else if (pos->second != kPropTrue) {
+      ROCKS_LOG_WARN(info_log, "Property %s has invalidate value %s",
+                     user_prop_name.c_str(), pos->second.c_str());
+    }
+  }
+  return true;
+}
+
+// Caller has to ensure seqno is not nullptr.
+Status GetGlobalSequenceNumber(const TableProperties& table_properties,
+                               SequenceNumber largest_seqno,
+                               SequenceNumber* seqno) {
+  const auto& props = table_properties.user_collected_properties;
+  const auto version_pos = props.find(ExternalSstFilePropertyNames::kVersion);
+  const auto seqno_pos = props.find(ExternalSstFilePropertyNames::kGlobalSeqno);
+
+  *seqno = kDisableGlobalSequenceNumber;
+  if (version_pos == props.end()) {
+    if (seqno_pos != props.end()) {
+      std::array<char, 200> msg_buf;
+      // This is not an external sst file, global_seqno is not supported.
+      snprintf(
+          msg_buf.data(), msg_buf.max_size(),
+          "A non-external sst file have global seqno property with value %s",
+          seqno_pos->second.c_str());
+      return Status::Corruption(msg_buf.data());
+    }
+    return Status::OK();
+  }
+
+  uint32_t version = DecodeFixed32(version_pos->second.c_str());
+  if (version < 2) {
+    if (seqno_pos != props.end() || version != 1) {
+      std::array<char, 200> msg_buf;
+      // This is a v1 external sst file, global_seqno is not supported.
+      snprintf(msg_buf.data(), msg_buf.max_size(),
+               "An external sst file with version %u have global seqno "
+               "property with value %s",
+               version, seqno_pos->second.c_str());
+      return Status::Corruption(msg_buf.data());
+    }
+    return Status::OK();
+  }
+
+  // Since we have a plan to deprecate global_seqno, we do not return failure
+  // if seqno_pos == props.end(). We rely on version_pos to detect whether the
+  // SST is external.
+  SequenceNumber global_seqno(0);
+  if (seqno_pos != props.end()) {
+    global_seqno = DecodeFixed64(seqno_pos->second.c_str());
+  }
+  // SstTableReader open table reader with kMaxSequenceNumber as largest_seqno
+  // to denote it is unknown.
+  if (largest_seqno < kMaxSequenceNumber) {
+    if (global_seqno == 0) {
+      global_seqno = largest_seqno;
+    }
+    if (global_seqno != largest_seqno) {
+      std::array<char, 200> msg_buf;
+      snprintf(
+          msg_buf.data(), msg_buf.max_size(),
+          "An external sst file with version %u have global seqno property "
+          "with value %s, while largest seqno in the file is %llu",
+          version, seqno_pos->second.c_str(),
+          static_cast<unsigned long long>(largest_seqno));
+      return Status::Corruption(msg_buf.data());
+    }
+  }
+  *seqno = global_seqno;
+
+  if (global_seqno > kMaxSequenceNumber) {
+    std::array<char, 200> msg_buf;
+    snprintf(msg_buf.data(), msg_buf.max_size(),
+             "An external sst file with version %u have global seqno property "
+             "with value %llu, which is greater than kMaxSequenceNumber",
+             version, static_cast<unsigned long long>(global_seqno));
+    return Status::Corruption(msg_buf.data());
+  }
+
+  return Status::OK();
+}
+}  // namespace
+
+void BlockBasedTable::SetupBaseCacheKey(const TableProperties* properties,
+                                        const std::string& cur_db_session_id,
+                                        uint64_t cur_file_number,
+                                        OffsetableCacheKey* out_base_cache_key,
+                                        bool* out_is_stable) {
+  // Use a stable cache key if sufficient data is in table properties
+  std::string db_session_id;
+  uint64_t file_num;
+  std::string db_id;
+  if (properties && !properties->db_session_id.empty() &&
+      properties->orig_file_number > 0) {
+    // (Newer SST file case)
+    // We must have both properties to get a stable unique id because
+    // CreateColumnFamilyWithImport or IngestExternalFiles can change the
+    // file numbers on a file.
+    db_session_id = properties->db_session_id;
+    file_num = properties->orig_file_number;
+    // Less critical, populated in earlier release than above
+    db_id = properties->db_id;
+    if (out_is_stable) {
+      *out_is_stable = true;
+    }
+  } else {
+    // (Old SST file case)
+    // We use (unique) cache keys based on current identifiers. These are at
+    // least stable across table file close and re-open, but not across
+    // different DBs nor DB close and re-open.
+    db_session_id = cur_db_session_id;
+    file_num = cur_file_number;
+    // Plumbing through the DB ID to here would be annoying, and of limited
+    // value because of the case of VersionSet::Recover opening some table
+    // files and later setting the DB ID. So we just rely on uniqueness
+    // level provided by session ID.
+    db_id = "unknown";
+    if (out_is_stable) {
+      *out_is_stable = false;
+    }
+  }
+
+  // Too many tests to update to get these working
+  // assert(file_num > 0);
+  // assert(!db_session_id.empty());
+  // assert(!db_id.empty());
+
+  // Minimum block size is 5 bytes; therefore we can trim off two lower bits
+  // from offsets. See GetCacheKey.
+  *out_base_cache_key = OffsetableCacheKey(db_id, db_session_id, file_num);
+}
+
+CacheKey BlockBasedTable::GetCacheKey(const OffsetableCacheKey& base_cache_key,
+                                      const BlockHandle& handle) {
+  // Minimum block size is 5 bytes; therefore we can trim off two lower bits
+  // from offet.
+  return base_cache_key.WithOffset(handle.offset() >> 2);
+}
+
+Status BlockBasedTable::Open(
+    const ReadOptions& read_options, const ImmutableOptions& ioptions,
+    const EnvOptions& env_options, const BlockBasedTableOptions& table_options,
+    const InternalKeyComparator& internal_comparator,
+    std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
+    uint8_t block_protection_bytes_per_key,
+    std::unique_ptr<TableReader>* table_reader, uint64_t tail_size,
+    std::shared_ptr<CacheReservationManager> table_reader_cache_res_mgr,
+    const std::shared_ptr<const SliceTransform>& prefix_extractor,
+    const bool prefetch_index_and_filter_in_cache, const bool skip_filters,
+    const int level, const bool immortal_table,
+    const SequenceNumber largest_seqno, const bool force_direct_prefetch,
+    TailPrefetchStats* tail_prefetch_stats,
+    BlockCacheTracer* const block_cache_tracer,
+    size_t max_file_size_for_l0_meta_pin, const std::string& cur_db_session_id,
+    uint64_t cur_file_num, UniqueId64x2 expected_unique_id) {
+  table_reader->reset();
+
+  Status s;
+  Footer footer;
+  std::unique_ptr<FilePrefetchBuffer> prefetch_buffer;
+
+  // From read_options, retain deadline, io_timeout, rate_limiter_priority, and
+  // verify_checksums. In future, we may retain more options.
+  ReadOptions ro;
+  ro.deadline = read_options.deadline;
+  ro.io_timeout = read_options.io_timeout;
+  ro.rate_limiter_priority = read_options.rate_limiter_priority;
+  ro.verify_checksums = read_options.verify_checksums;
+  ro.io_activity = read_options.io_activity;
+
+  // prefetch both index and filters, down to all partitions
+  const bool prefetch_all = prefetch_index_and_filter_in_cache || level == 0;
+  const bool preload_all = !table_options.cache_index_and_filter_blocks;
+
+  if (!ioptions.allow_mmap_reads) {
+    s = PrefetchTail(ro, file.get(), file_size, force_direct_prefetch,
+                     tail_prefetch_stats, prefetch_all, preload_all,
+                     &prefetch_buffer, ioptions.stats, tail_size,
+                     ioptions.logger);
+    // Return error in prefetch path to users.
+    if (!s.ok()) {
+      return s;
+    }
+  } else {
+    // Should not prefetch for mmap mode.
+    prefetch_buffer.reset(new FilePrefetchBuffer(
+        0 /* readahead_size */, 0 /* max_readahead_size */, false /* enable */,
+        true /* track_min_offset */));
+  }
+
+  // Read in the following order:
+  //    1. Footer
+  //    2. [metaindex block]
+  //    3. [meta block: properties]
+  //    4. [meta block: range deletion tombstone]
+  //    5. [meta block: compression dictionary]
+  //    6. [meta block: index]
+  //    7. [meta block: filter]
+  IOOptions opts;
+  s = file->PrepareIOOptions(ro, opts);
+  if (s.ok()) {
+    s = ReadFooterFromFile(opts, file.get(), *ioptions.fs,
+                           prefetch_buffer.get(), file_size, &footer,
+                           kBlockBasedTableMagicNumber);
+  }
+  if (!s.ok()) {
+    return s;
+  }
+  if (!IsSupportedFormatVersion(footer.format_version())) {
+    return Status::Corruption(
+        "Unknown Footer version. Maybe this file was created with newer "
+        "version of RocksDB?");
+  }
+
+  BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch};
+  Rep* rep = new BlockBasedTable::Rep(ioptions, env_options, table_options,
+                                      internal_comparator, skip_filters,
+                                      file_size, level, immortal_table);
+  rep->file = std::move(file);
+  rep->footer = footer;
+
+  // For fully portable/stable cache keys, we need to read the properties
+  // block before setting up cache keys. TODO: consider setting up a bootstrap
+  // cache key for PersistentCache to use for metaindex and properties blocks.
+  rep->persistent_cache_options = PersistentCacheOptions();
+
+  // Meta-blocks are not dictionary compressed. Explicitly set the dictionary
+  // handle to null, otherwise it may be seen as uninitialized during the below
+  // meta-block reads.
+  rep->compression_dict_handle = BlockHandle::NullBlockHandle();
+
+  rep->create_context.protection_bytes_per_key = block_protection_bytes_per_key;
+  // Read metaindex
+  std::unique_ptr<BlockBasedTable> new_table(
+      new BlockBasedTable(rep, block_cache_tracer));
+  std::unique_ptr<Block> metaindex;
+  std::unique_ptr<InternalIterator> metaindex_iter;
+  s = new_table->ReadMetaIndexBlock(ro, prefetch_buffer.get(), &metaindex,
+                                    &metaindex_iter);
+  if (!s.ok()) {
+    return s;
+  }
+
+  // Populates table_properties and some fields that depend on it,
+  // such as index_type.
+  s = new_table->ReadPropertiesBlock(ro, prefetch_buffer.get(),
+                                     metaindex_iter.get(), largest_seqno);
+  if (!s.ok()) {
+    return s;
+  }
+
+  // Populate BlockCreateContext
+  bool blocks_definitely_zstd_compressed =
+      rep->table_properties &&
+      (rep->table_properties->compression_name ==
+           CompressionTypeToString(kZSTD) ||
+       rep->table_properties->compression_name ==
+           CompressionTypeToString(kZSTDNotFinalCompression));
+  rep->create_context = BlockCreateContext(
+      &rep->table_options, rep->ioptions.stats,
+      blocks_definitely_zstd_compressed, block_protection_bytes_per_key,
+      rep->internal_comparator.user_comparator(), rep->index_value_is_full,
+      rep->index_has_first_key);
+
+  // Check expected unique id if provided
+  if (expected_unique_id != kNullUniqueId64x2) {
+    auto props = rep->table_properties;
+    if (!props) {
+      return Status::Corruption("Missing table properties on file " +
+                                std::to_string(cur_file_num) +
+                                " with known unique ID");
+    }
+    UniqueId64x2 actual_unique_id{};
+    s = GetSstInternalUniqueId(props->db_id, props->db_session_id,
+                               props->orig_file_number, &actual_unique_id,
+                               /*force*/ true);
+    assert(s.ok());  // because force=true
+    if (expected_unique_id != actual_unique_id) {
+      return Status::Corruption(
+          "Mismatch in unique ID on table file " +
+          std::to_string(cur_file_num) +
+          ". Expected: " + InternalUniqueIdToHumanString(&expected_unique_id) +
+          " Actual: " + InternalUniqueIdToHumanString(&actual_unique_id));
+    }
+    TEST_SYNC_POINT_CALLBACK("BlockBasedTable::Open::PassedVerifyUniqueId",
+                             &actual_unique_id);
+  } else {
+    TEST_SYNC_POINT_CALLBACK("BlockBasedTable::Open::SkippedVerifyUniqueId",
+                             nullptr);
+    if (ioptions.verify_sst_unique_id_in_manifest && ioptions.logger) {
+      // A crude but isolated way of reporting unverified files. This should not
+      // be an ongoing concern so doesn't deserve a place in Statistics IMHO.
+      static std::atomic<uint64_t> unverified_count{0};
+      auto prev_count =
+          unverified_count.fetch_add(1, std::memory_order_relaxed);
+      if (prev_count == 0) {
+        ROCKS_LOG_WARN(
+            ioptions.logger,
+            "At least one SST file opened without unique ID to verify: %" PRIu64
+            ".sst",
+            cur_file_num);
+      } else if (prev_count % 1000 == 0) {
+        ROCKS_LOG_WARN(
+            ioptions.logger,
+            "Another ~1000 SST files opened without unique ID to verify");
+      }
+    }
+  }
+
+  // Set up prefix extracto as needed
+  bool force_null_table_prefix_extractor = false;
+  TEST_SYNC_POINT_CALLBACK(
+      "BlockBasedTable::Open::ForceNullTablePrefixExtractor",
+      &force_null_table_prefix_extractor);
+  if (force_null_table_prefix_extractor) {
+    assert(!rep->table_prefix_extractor);
+  } else if (!PrefixExtractorChangedHelper(rep->table_properties.get(),
+                                           prefix_extractor.get())) {
+    // Establish fast path for unchanged prefix_extractor
+    rep->table_prefix_extractor = prefix_extractor;
+  } else {
+    // Current prefix_extractor doesn't match table
+    if (rep->table_properties) {
+      //**TODO: If/When the DBOptions has a registry in it, the ConfigOptions
+      // will need to use it
+      ConfigOptions config_options;
+      Status st = SliceTransform::CreateFromString(
+          config_options, rep->table_properties->prefix_extractor_name,
+          &(rep->table_prefix_extractor));
+      if (!st.ok()) {
+        //**TODO: Should this be error be returned or swallowed?
+        ROCKS_LOG_ERROR(rep->ioptions.logger,
+                        "Failed to create prefix extractor[%s]: %s",
+                        rep->table_properties->prefix_extractor_name.c_str(),
+                        st.ToString().c_str());
+      }
+    }
+  }
+
+  // With properties loaded, we can set up portable/stable cache keys
+  SetupBaseCacheKey(rep->table_properties.get(), cur_db_session_id,
+                    cur_file_num, &rep->base_cache_key);
+
+  rep->persistent_cache_options =
+      PersistentCacheOptions(rep->table_options.persistent_cache,
+                             rep->base_cache_key, rep->ioptions.stats);
+
+  s = new_table->ReadRangeDelBlock(ro, prefetch_buffer.get(),
+                                   metaindex_iter.get(), internal_comparator,
+                                   &lookup_context);
+  if (!s.ok()) {
+    return s;
+  }
+  s = new_table->PrefetchIndexAndFilterBlocks(
+      ro, prefetch_buffer.get(), metaindex_iter.get(), new_table.get(),
+      prefetch_all, table_options, level, file_size,
+      max_file_size_for_l0_meta_pin, &lookup_context);
+
+  if (s.ok()) {
+    // Update tail prefetch stats
+    assert(prefetch_buffer.get() != nullptr);
+    if (tail_prefetch_stats != nullptr) {
+      assert(prefetch_buffer->min_offset_read() < file_size);
+      tail_prefetch_stats->RecordEffectiveSize(
+          static_cast<size_t>(file_size) - prefetch_buffer->min_offset_read());
+    }
+  }
+
+  if (s.ok() && table_reader_cache_res_mgr) {
+    std::size_t mem_usage = new_table->ApproximateMemoryUsage();
+    s = table_reader_cache_res_mgr->MakeCacheReservation(
+        mem_usage, &(rep->table_reader_cache_res_handle));
+    if (s.IsMemoryLimit()) {
+      s = Status::MemoryLimit(
+          "Can't allocate " +
+          kCacheEntryRoleToCamelString[static_cast<std::uint32_t>(
+              CacheEntryRole::kBlockBasedTableReader)] +
+          " due to memory limit based on "
+          "cache capacity for memory allocation");
+    }
+  }
+
+  if (s.ok()) {
+    *table_reader = std::move(new_table);
+  }
+  return s;
+}
+
+Status BlockBasedTable::PrefetchTail(
+    const ReadOptions& ro, RandomAccessFileReader* file, uint64_t file_size,
+    bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats,
+    const bool prefetch_all, const bool preload_all,
+    std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer, Statistics* stats,
+    uint64_t tail_size, Logger* const logger) {
+  assert(tail_size <= file_size);
+
+  size_t tail_prefetch_size = 0;
+  if (tail_size != 0) {
+    tail_prefetch_size = tail_size;
+  } else {
+    if (tail_prefetch_stats != nullptr) {
+      // Multiple threads may get a 0 (no history) when running in parallel,
+      // but it will get cleared after the first of them finishes.
+      tail_prefetch_size = tail_prefetch_stats->GetSuggestedPrefetchSize();
+    }
+    if (tail_prefetch_size == 0) {
+      // Before read footer, readahead backwards to prefetch data. Do more
+      // readahead if we're going to read index/filter.
+      // TODO: This may incorrectly select small readahead in case partitioned
+      // index/filter is enabled and top-level partition pinning is enabled.
+      // That's because we need to issue readahead before we read the
+      // properties, at which point we don't yet know the index type.
+      tail_prefetch_size = prefetch_all || preload_all ? 512 * 1024 : 4 * 1024;
+
+      ROCKS_LOG_WARN(logger,
+                     "Tail prefetch size %zu is calculated based on heuristics",
+                     tail_prefetch_size);
+    } else {
+      ROCKS_LOG_WARN(
+          logger,
+          "Tail prefetch size %zu is calculated based on TailPrefetchStats",
+          tail_prefetch_size);
+    }
+  }
+  size_t prefetch_off;
+  size_t prefetch_len;
+  if (file_size < tail_prefetch_size) {
+    prefetch_off = 0;
+    prefetch_len = static_cast<size_t>(file_size);
+  } else {
+    prefetch_off = static_cast<size_t>(file_size - tail_prefetch_size);
+    prefetch_len = tail_prefetch_size;
+  }
+  TEST_SYNC_POINT_CALLBACK("BlockBasedTable::Open::TailPrefetchLen",
+                           &tail_prefetch_size);
+
+  // Try file system prefetch
+  if (!file->use_direct_io() && !force_direct_prefetch) {
+    if (!file->Prefetch(prefetch_off, prefetch_len, ro.rate_limiter_priority)
+             .IsNotSupported()) {
+      prefetch_buffer->reset(new FilePrefetchBuffer(
+          0 /* readahead_size */, 0 /* max_readahead_size */,
+          false /* enable */, true /* track_min_offset */));
+      return Status::OK();
+    }
+  }
+
+  // Use `FilePrefetchBuffer`
+  prefetch_buffer->reset(new FilePrefetchBuffer(
+      0 /* readahead_size */, 0 /* max_readahead_size */, true /* enable */,
+      true /* track_min_offset */, false /* implicit_auto_readahead */,
+      0 /* num_file_reads */, 0 /* num_file_reads_for_auto_readahead */,
+      nullptr /* fs */, nullptr /* clock */, stats,
+      FilePrefetchBufferUsage::kTableOpenPrefetchTail));
+
+  IOOptions opts;
+  Status s = file->PrepareIOOptions(ro, opts);
+  if (s.ok()) {
+    s = (*prefetch_buffer)
+            ->Prefetch(opts, file, prefetch_off, prefetch_len,
+                       ro.rate_limiter_priority);
+  }
+  return s;
+}
+
+Status BlockBasedTable::ReadPropertiesBlock(
+    const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
+    InternalIterator* meta_iter, const SequenceNumber largest_seqno) {
+  Status s;
+  BlockHandle handle;
+  s = FindOptionalMetaBlock(meta_iter, kPropertiesBlockName, &handle);
+
+  if (!s.ok()) {
+    ROCKS_LOG_WARN(rep_->ioptions.logger,
+                   "Error when seeking to properties block from file: %s",
+                   s.ToString().c_str());
+  } else if (!handle.IsNull()) {
+    s = meta_iter->status();
+    std::unique_ptr<TableProperties> table_properties;
+    if (s.ok()) {
+      s = ReadTablePropertiesHelper(
+          ro, handle, rep_->file.get(), prefetch_buffer, rep_->footer,
+          rep_->ioptions, &table_properties, nullptr /* memory_allocator */);
+    }
+    IGNORE_STATUS_IF_ERROR(s);
+
+    if (!s.ok()) {
+      ROCKS_LOG_WARN(rep_->ioptions.logger,
+                     "Encountered error while reading data from properties "
+                     "block %s",
+                     s.ToString().c_str());
+    } else {
+      assert(table_properties != nullptr);
+      rep_->table_properties = std::move(table_properties);
+      rep_->blocks_maybe_compressed =
+          rep_->table_properties->compression_name !=
+          CompressionTypeToString(kNoCompression);
+    }
+  } else {
+    ROCKS_LOG_ERROR(rep_->ioptions.logger,
+                    "Cannot find Properties block from file.");
+  }
+
+  // Read the table properties, if provided.
+  if (rep_->table_properties) {
+    rep_->whole_key_filtering &=
+        IsFeatureSupported(*(rep_->table_properties),
+                           BlockBasedTablePropertyNames::kWholeKeyFiltering,
+                           rep_->ioptions.logger);
+    rep_->prefix_filtering &= IsFeatureSupported(
+        *(rep_->table_properties),
+        BlockBasedTablePropertyNames::kPrefixFiltering, rep_->ioptions.logger);
+
+    rep_->index_key_includes_seq =
+        rep_->table_properties->index_key_is_user_key == 0;
+    rep_->index_value_is_full =
+        rep_->table_properties->index_value_is_delta_encoded == 0;
+
+    // Update index_type with the true type.
+    // If table properties don't contain index type, we assume that the table
+    // is in very old format and has kBinarySearch index type.
+    auto& props = rep_->table_properties->user_collected_properties;
+    auto index_type_pos = props.find(BlockBasedTablePropertyNames::kIndexType);
+    if (index_type_pos != props.end()) {
+      rep_->index_type = static_cast<BlockBasedTableOptions::IndexType>(
+          DecodeFixed32(index_type_pos->second.c_str()));
+    }
+    auto min_ts_pos = props.find("rocksdb.timestamp_min");
+    if (min_ts_pos != props.end()) {
+      rep_->min_timestamp = Slice(min_ts_pos->second);
+    }
+    auto max_ts_pos = props.find("rocksdb.timestamp_max");
+    if (max_ts_pos != props.end()) {
+      rep_->max_timestamp = Slice(max_ts_pos->second);
+    }
+
+    rep_->index_has_first_key =
+        rep_->index_type == BlockBasedTableOptions::kBinarySearchWithFirstKey;
+
+    s = GetGlobalSequenceNumber(*(rep_->table_properties), largest_seqno,
+                                &(rep_->global_seqno));
+    if (!s.ok()) {
+      ROCKS_LOG_ERROR(rep_->ioptions.logger, "%s", s.ToString().c_str());
+    }
+  }
+  return s;
+}
+
+Status BlockBasedTable::ReadRangeDelBlock(
+    const ReadOptions& read_options, FilePrefetchBuffer* prefetch_buffer,
+    InternalIterator* meta_iter,
+    const InternalKeyComparator& internal_comparator,
+    BlockCacheLookupContext* lookup_context) {
+  Status s;
+  BlockHandle range_del_handle;
+  s = FindOptionalMetaBlock(meta_iter, kRangeDelBlockName, &range_del_handle);
+  if (!s.ok()) {
+    ROCKS_LOG_WARN(
+        rep_->ioptions.logger,
+        "Error when seeking to range delete tombstones block from file: %s",
+        s.ToString().c_str());
+  } else if (!range_del_handle.IsNull()) {
+    Status tmp_status;
+    std::unique_ptr<InternalIterator> iter(NewDataBlockIterator<DataBlockIter>(
+        read_options, range_del_handle,
+        /*input_iter=*/nullptr, BlockType::kRangeDeletion,
+        /*get_context=*/nullptr, lookup_context, prefetch_buffer,
+        /*for_compaction= */ false, /*async_read= */ false, tmp_status));
+    assert(iter != nullptr);
+    s = iter->status();
+    if (!s.ok()) {
+      ROCKS_LOG_WARN(
+          rep_->ioptions.logger,
+          "Encountered error while reading data from range del block %s",
+          s.ToString().c_str());
+      IGNORE_STATUS_IF_ERROR(s);
+    } else {
+      rep_->fragmented_range_dels =
+          std::make_shared<FragmentedRangeTombstoneList>(std::move(iter),
+                                                         internal_comparator);
+    }
+  }
+  return s;
+}
+
+Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
+    const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
+    InternalIterator* meta_iter, BlockBasedTable* new_table, bool prefetch_all,
+    const BlockBasedTableOptions& table_options, const int level,
+    size_t file_size, size_t max_file_size_for_l0_meta_pin,
+    BlockCacheLookupContext* lookup_context) {
+  // Find filter handle and filter type
+  if (rep_->filter_policy) {
+    auto name = rep_->filter_policy->CompatibilityName();
+    bool builtin_compatible =
+        strcmp(name, BuiltinFilterPolicy::kCompatibilityName()) == 0;
+
+    for (const auto& [filter_type, prefix] :
+         {std::make_pair(Rep::FilterType::kFullFilter, kFullFilterBlockPrefix),
+          std::make_pair(Rep::FilterType::kPartitionedFilter,
+                         kPartitionedFilterBlockPrefix),
+          std::make_pair(Rep::FilterType::kNoFilter,
+                         kObsoleteFilterBlockPrefix)}) {
+      if (builtin_compatible) {
+        // This code is only here to deal with a hiccup in early 7.0.x where
+        // there was an unintentional name change in the SST files metadata.
+        // It should be OK to remove this in the future (late 2022) and just
+        // have the 'else' code.
+        // NOTE: the test:: names below are likely not needed but included
+        // out of caution
+        static const std::unordered_set<std::string> kBuiltinNameAndAliases = {
+            BuiltinFilterPolicy::kCompatibilityName(),
+            test::LegacyBloomFilterPolicy::kClassName(),
+            test::FastLocalBloomFilterPolicy::kClassName(),
+            test::Standard128RibbonFilterPolicy::kClassName(),
+            "rocksdb.internal.DeprecatedBlockBasedBloomFilter",
+            BloomFilterPolicy::kClassName(),
+            RibbonFilterPolicy::kClassName(),
+        };
+
+        // For efficiency, do a prefix seek and see if the first match is
+        // good.
+        meta_iter->Seek(prefix);
+        if (meta_iter->status().ok() && meta_iter->Valid()) {
+          Slice key = meta_iter->key();
+          if (key.starts_with(prefix)) {
+            key.remove_prefix(prefix.size());
+            if (kBuiltinNameAndAliases.find(key.ToString()) !=
+                kBuiltinNameAndAliases.end()) {
+              Slice v = meta_iter->value();
+              Status s = rep_->filter_handle.DecodeFrom(&v);
+              if (s.ok()) {
+                rep_->filter_type = filter_type;
+                if (filter_type == Rep::FilterType::kNoFilter) {
+                  ROCKS_LOG_WARN(rep_->ioptions.logger,
+                                 "Detected obsolete filter type in %s. Read "
+                                 "performance might suffer until DB is fully "
+                                 "re-compacted.",
+                                 rep_->file->file_name().c_str());
+                }
+                break;
+              }
+            }
+          }
+        }
+      } else {
+        std::string filter_block_key = prefix + name;
+        if (FindMetaBlock(meta_iter, filter_block_key, &rep_->filter_handle)
+                .ok()) {
+          rep_->filter_type = filter_type;
+          if (filter_type == Rep::FilterType::kNoFilter) {
+            ROCKS_LOG_WARN(
+                rep_->ioptions.logger,
+                "Detected obsolete filter type in %s. Read performance might "
+                "suffer until DB is fully re-compacted.",
+                rep_->file->file_name().c_str());
+          }
+          break;
+        }
+      }
+    }
+  }
+  // Partition filters cannot be enabled without partition indexes
+  assert(rep_->filter_type != Rep::FilterType::kPartitionedFilter ||
+         rep_->index_type == BlockBasedTableOptions::kTwoLevelIndexSearch);
+
+  // Find compression dictionary handle
+  Status s = FindOptionalMetaBlock(meta_iter, kCompressionDictBlockName,
+                                   &rep_->compression_dict_handle);
+  if (!s.ok()) {
+    return s;
+  }
+
+  BlockBasedTableOptions::IndexType index_type = rep_->index_type;
+
+  const bool use_cache = table_options.cache_index_and_filter_blocks;
+
+  const bool maybe_flushed =
+      level == 0 && file_size <= max_file_size_for_l0_meta_pin;
+  std::function<bool(PinningTier, PinningTier)> is_pinned =
+      [maybe_flushed, &is_pinned](PinningTier pinning_tier,
+                                  PinningTier fallback_pinning_tier) {
+        // Fallback to fallback would lead to infinite recursion. Disallow it.
+        assert(fallback_pinning_tier != PinningTier::kFallback);
+
+        switch (pinning_tier) {
+          case PinningTier::kFallback:
+            return is_pinned(fallback_pinning_tier,
+                             PinningTier::kNone /* fallback_pinning_tier */);
+          case PinningTier::kNone:
+            return false;
+          case PinningTier::kFlushedAndSimilar:
+            return maybe_flushed;
+          case PinningTier::kAll:
+            return true;
+        };
+
+        // In GCC, this is needed to suppress `control reaches end of non-void
+        // function [-Werror=return-type]`.
+        assert(false);
+        return false;
+      };
+  const bool pin_top_level_index = is_pinned(
+      table_options.metadata_cache_options.top_level_index_pinning,
+      table_options.pin_top_level_index_and_filter ? PinningTier::kAll
+                                                   : PinningTier::kNone);
+  const bool pin_partition =
+      is_pinned(table_options.metadata_cache_options.partition_pinning,
+                table_options.pin_l0_filter_and_index_blocks_in_cache
+                    ? PinningTier::kFlushedAndSimilar
+                    : PinningTier::kNone);
+  const bool pin_unpartitioned =
+      is_pinned(table_options.metadata_cache_options.unpartitioned_pinning,
+                table_options.pin_l0_filter_and_index_blocks_in_cache
+                    ? PinningTier::kFlushedAndSimilar
+                    : PinningTier::kNone);
+
+  // pin the first level of index
+  const bool pin_index =
+      index_type == BlockBasedTableOptions::kTwoLevelIndexSearch
+          ? pin_top_level_index
+          : pin_unpartitioned;
+  // prefetch the first level of index
+  // WART: this might be redundant (unnecessary cache hit) if !pin_index,
+  // depending on prepopulate_block_cache option
+  const bool prefetch_index = prefetch_all || pin_index;
+
+  std::unique_ptr<IndexReader> index_reader;
+  s = new_table->CreateIndexReader(ro, prefetch_buffer, meta_iter, use_cache,
+                                   prefetch_index, pin_index, lookup_context,
+                                   &index_reader);
+  if (!s.ok()) {
+    return s;
+  }
+
+  rep_->index_reader = std::move(index_reader);
+
+  // The partitions of partitioned index are always stored in cache. They
+  // are hence follow the configuration for pin and prefetch regardless of
+  // the value of cache_index_and_filter_blocks
+  if (prefetch_all || pin_partition) {
+    s = rep_->index_reader->CacheDependencies(ro, pin_partition,
+                                              prefetch_buffer);
+  }
+  if (!s.ok()) {
+    return s;
+  }
+
+  // pin the first level of filter
+  const bool pin_filter =
+      rep_->filter_type == Rep::FilterType::kPartitionedFilter
+          ? pin_top_level_index
+          : pin_unpartitioned;
+  // prefetch the first level of filter
+  // WART: this might be redundant (unnecessary cache hit) if !pin_filter,
+  // depending on prepopulate_block_cache option
+  const bool prefetch_filter = prefetch_all || pin_filter;
+
+  if (rep_->filter_policy) {
+    auto filter = new_table->CreateFilterBlockReader(
+        ro, prefetch_buffer, use_cache, prefetch_filter, pin_filter,
+        lookup_context);
+
+    if (filter) {
+      // Refer to the comment above about paritioned indexes always being cached
+      if (prefetch_all || pin_partition) {
+        s = filter->CacheDependencies(ro, pin_partition, prefetch_buffer);
+        if (!s.ok()) {
+          return s;
+        }
+      }
+      rep_->filter = std::move(filter);
+    }
+  }
+
+  if (!rep_->compression_dict_handle.IsNull()) {
+    std::unique_ptr<UncompressionDictReader> uncompression_dict_reader;
+    s = UncompressionDictReader::Create(
+        this, ro, prefetch_buffer, use_cache, prefetch_all || pin_unpartitioned,
+        pin_unpartitioned, lookup_context, &uncompression_dict_reader);
+    if (!s.ok()) {
+      return s;
+    }
+
+    rep_->uncompression_dict_reader = std::move(uncompression_dict_reader);
+  }
+
+  assert(s.ok());
+  return s;
+}
+
+void BlockBasedTable::SetupForCompaction() {
+  switch (rep_->ioptions.access_hint_on_compaction_start) {
+    case Options::NONE:
+      break;
+    case Options::NORMAL:
+      rep_->file->file()->Hint(FSRandomAccessFile::kNormal);
+      break;
+    case Options::SEQUENTIAL:
+      rep_->file->file()->Hint(FSRandomAccessFile::kSequential);
+      break;
+    case Options::WILLNEED:
+      rep_->file->file()->Hint(FSRandomAccessFile::kWillNeed);
+      break;
+    default:
+      assert(false);
+  }
+}
+
+std::shared_ptr<const TableProperties> BlockBasedTable::GetTableProperties()
+    const {
+  return rep_->table_properties;
+}
+
+size_t BlockBasedTable::ApproximateMemoryUsage() const {
+  size_t usage = 0;
+  if (rep_) {
+    usage += rep_->ApproximateMemoryUsage();
+  } else {
+    return usage;
+  }
+  if (rep_->filter) {
+    usage += rep_->filter->ApproximateMemoryUsage();
+  }
+  if (rep_->index_reader) {
+    usage += rep_->index_reader->ApproximateMemoryUsage();
+  }
+  if (rep_->uncompression_dict_reader) {
+    usage += rep_->uncompression_dict_reader->ApproximateMemoryUsage();
+  }
+  if (rep_->table_properties) {
+    usage += rep_->table_properties->ApproximateMemoryUsage();
+  }
+  return usage;
+}
+
+// Load the meta-index-block from the file. On success, return the loaded
+// metaindex
+// block and its iterator.
+Status BlockBasedTable::ReadMetaIndexBlock(
+    const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
+    std::unique_ptr<Block>* metaindex_block,
+    std::unique_ptr<InternalIterator>* iter) {
+  // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
+  // it is an empty block.
+  std::unique_ptr<Block_kMetaIndex> metaindex;
+  Status s = ReadAndParseBlockFromFile(
+      rep_->file.get(), prefetch_buffer, rep_->footer, ro,
+      rep_->footer.metaindex_handle(), &metaindex, rep_->ioptions,
+      rep_->create_context, true /*maybe_compressed*/,
+      UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options,
+      GetMemoryAllocator(rep_->table_options), false /* for_compaction */,
+      false /* async_read */);
+
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(rep_->ioptions.logger,
+                    "Encountered error while reading data from properties"
+                    " block %s",
+                    s.ToString().c_str());
+    return s;
+  }
+
+  *metaindex_block = std::move(metaindex);
+  // meta block uses bytewise comparator.
+  iter->reset(metaindex_block->get()->NewMetaIterator());
+  return Status::OK();
+}
+
+template <typename TBlocklike>
+Cache::Priority BlockBasedTable::GetCachePriority() const {
+  // Here we treat the legacy name "...index_and_filter_blocks..." to mean all
+  // metadata blocks that might go into block cache, EXCEPT only those needed
+  // for the read path (Get, etc.). TableProperties should not be needed on the
+  // read path (prefix extractor setting is an O(1) size special case that we
+  // are working not to require from TableProperties), so it is not given
+  // high-priority treatment if it should go into BlockCache.
+  if constexpr (TBlocklike::kBlockType == BlockType::kData ||
+                TBlocklike::kBlockType == BlockType::kProperties) {
+    return Cache::Priority::LOW;
+  } else if (rep_->table_options
+                 .cache_index_and_filter_blocks_with_high_priority) {
+    return Cache::Priority::HIGH;
+  } else {
+    return Cache::Priority::LOW;
+  }
+}
+
+template <typename TBlocklike>
+WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::GetDataBlockFromCache(
+    const Slice& cache_key, BlockCacheInterface<TBlocklike> block_cache,
+    CachableEntry<TBlocklike>* out_parsed_block,
+    GetContext* get_context) const {
+  assert(out_parsed_block);
+  assert(out_parsed_block->IsEmpty());
+
+  Status s;
+  Statistics* statistics = rep_->ioptions.statistics.get();
+
+  // Lookup uncompressed cache first
+  if (block_cache) {
+    assert(!cache_key.empty());
+    auto cache_handle = block_cache.LookupFull(
+        cache_key, &rep_->create_context, GetCachePriority<TBlocklike>(),
+        statistics, rep_->ioptions.lowest_used_cache_tier);
+
+    // Avoid updating metrics here if the handle is not complete yet. This
+    // happens with MultiGet and secondary cache. So update the metrics only
+    // if its a miss, or a hit and value is ready
+    if (!cache_handle) {
+      UpdateCacheMissMetrics(TBlocklike::kBlockType, get_context);
+    } else {
+      TBlocklike* value = block_cache.Value(cache_handle);
+      if (value) {
+        UpdateCacheHitMetrics(TBlocklike::kBlockType, get_context,
+                              block_cache.get()->GetUsage(cache_handle));
+      }
+      out_parsed_block->SetCachedValue(value, block_cache.get(), cache_handle);
+      return s;
+    }
+  }
+
+  // If not found, search from the compressed block cache.
+  assert(out_parsed_block->IsEmpty());
+
+  return s;
+}
+
+template <typename TBlocklike>
+WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::PutDataBlockToCache(
+    const Slice& cache_key, BlockCacheInterface<TBlocklike> block_cache,
+    CachableEntry<TBlocklike>* out_parsed_block, BlockContents&& block_contents,
+    CompressionType block_comp_type,
+    const UncompressionDict& uncompression_dict,
+    MemoryAllocator* memory_allocator, GetContext* get_context) const {
+  const ImmutableOptions& ioptions = rep_->ioptions;
+  const uint32_t format_version = rep_->table_options.format_version;
+  assert(out_parsed_block);
+  assert(out_parsed_block->IsEmpty());
+
+  Status s;
+  Statistics* statistics = ioptions.stats;
+
+  std::unique_ptr<TBlocklike> block_holder;
+  if (block_comp_type != kNoCompression) {
+    // Retrieve the uncompressed contents into a new buffer
+    BlockContents uncompressed_block_contents;
+    UncompressionContext context(block_comp_type);
+    UncompressionInfo info(context, uncompression_dict, block_comp_type);
+    s = UncompressBlockData(info, block_contents.data.data(),
+                            block_contents.data.size(),
+                            &uncompressed_block_contents, format_version,
+                            ioptions, memory_allocator);
+    if (!s.ok()) {
+      return s;
+    }
+    rep_->create_context.Create(&block_holder,
+                                std::move(uncompressed_block_contents));
+  } else {
+    rep_->create_context.Create(&block_holder, std::move(block_contents));
+  }
+
+  // insert into uncompressed block cache
+  if (block_cache && block_holder->own_bytes()) {
+    size_t charge = block_holder->ApproximateMemoryUsage();
+    BlockCacheTypedHandle<TBlocklike>* cache_handle = nullptr;
+    s = block_cache.InsertFull(cache_key, block_holder.get(), charge,
+                               &cache_handle, GetCachePriority<TBlocklike>(),
+                               rep_->ioptions.lowest_used_cache_tier);
+
+    if (s.ok()) {
+      assert(cache_handle != nullptr);
+      out_parsed_block->SetCachedValue(block_holder.release(),
+                                       block_cache.get(), cache_handle);
+
+      UpdateCacheInsertionMetrics(TBlocklike::kBlockType, get_context, charge,
+                                  s.IsOkOverwritten(), rep_->ioptions.stats);
+    } else {
+      RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES);
+    }
+  } else {
+    out_parsed_block->SetOwnedValue(std::move(block_holder));
+  }
+
+  return s;
+}
+
+std::unique_ptr<FilterBlockReader> BlockBasedTable::CreateFilterBlockReader(
+    const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer, bool use_cache,
+    bool prefetch, bool pin, BlockCacheLookupContext* lookup_context) {
+  auto& rep = rep_;
+  auto filter_type = rep->filter_type;
+  if (filter_type == Rep::FilterType::kNoFilter) {
+    return std::unique_ptr<FilterBlockReader>();
+  }
+
+  assert(rep->filter_policy);
+
+  switch (filter_type) {
+    case Rep::FilterType::kPartitionedFilter:
+      return PartitionedFilterBlockReader::Create(
+          this, ro, prefetch_buffer, use_cache, prefetch, pin, lookup_context);
+
+    case Rep::FilterType::kFullFilter:
+      return FullFilterBlockReader::Create(this, ro, prefetch_buffer, use_cache,
+                                           prefetch, pin, lookup_context);
+
+    default:
+      // filter_type is either kNoFilter (exited the function at the first if),
+      // or it must be covered in this switch block
+      assert(false);
+      return std::unique_ptr<FilterBlockReader>();
+  }
+}
+
+// disable_prefix_seek should be set to true when prefix_extractor found in SST
+// differs from the one in mutable_cf_options and index type is HashBasedIndex
+InternalIteratorBase<IndexValue>* BlockBasedTable::NewIndexIterator(
+    const ReadOptions& read_options, bool disable_prefix_seek,
+    IndexBlockIter* input_iter, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context) const {
+  assert(rep_ != nullptr);
+  assert(rep_->index_reader != nullptr);
+
+  // We don't return pinned data from index blocks, so no need
+  // to set `block_contents_pinned`.
+  return rep_->index_reader->NewIterator(read_options, disable_prefix_seek,
+                                         input_iter, get_context,
+                                         lookup_context);
+}
+
+// TODO?
+template <>
+DataBlockIter* BlockBasedTable::InitBlockIterator<DataBlockIter>(
+    const Rep* rep, Block* block, BlockType block_type,
+    DataBlockIter* input_iter, bool block_contents_pinned) {
+  return block->NewDataIterator(rep->internal_comparator.user_comparator(),
+                                rep->get_global_seqno(block_type), input_iter,
+                                rep->ioptions.stats, block_contents_pinned);
+}
+
+// TODO?
+template <>
+IndexBlockIter* BlockBasedTable::InitBlockIterator<IndexBlockIter>(
+    const Rep* rep, Block* block, BlockType block_type,
+    IndexBlockIter* input_iter, bool block_contents_pinned) {
+  return block->NewIndexIterator(
+      rep->internal_comparator.user_comparator(),
+      rep->get_global_seqno(block_type), input_iter, rep->ioptions.stats,
+      /* total_order_seek */ true, rep->index_has_first_key,
+      rep->index_key_includes_seq, rep->index_value_is_full,
+      block_contents_pinned);
+}
+
+// If contents is nullptr, this function looks up the block caches for the
+// data block referenced by handle, and read the block from disk if necessary.
+// If contents is non-null, it skips the cache lookup and disk read, since
+// the caller has already read it. In both cases, if ro.fill_cache is true,
+// it inserts the block into the block cache.
+template <typename TBlocklike>
+WithBlocklikeCheck<Status, TBlocklike>
+BlockBasedTable::MaybeReadBlockAndLoadToCache(
+    FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
+    const BlockHandle& handle, const UncompressionDict& uncompression_dict,
+    bool for_compaction, CachableEntry<TBlocklike>* out_parsed_block,
+    GetContext* get_context, BlockCacheLookupContext* lookup_context,
+    BlockContents* contents, bool async_read) const {
+  assert(out_parsed_block != nullptr);
+  const bool no_io = (ro.read_tier == kBlockCacheTier);
+  BlockCacheInterface<TBlocklike> block_cache{
+      rep_->table_options.block_cache.get()};
+
+  // First, try to get the block from the cache
+  //
+  // If either block cache is enabled, we'll try to read from it.
+  Status s;
+  CacheKey key_data;
+  Slice key;
+  bool is_cache_hit = false;
+  if (block_cache) {
+    // create key for block cache
+    key_data = GetCacheKey(rep_->base_cache_key, handle);
+    key = key_data.AsSlice();
+
+    if (!contents) {
+      s = GetDataBlockFromCache(key, block_cache, out_parsed_block,
+                                get_context);
+      // Value could still be null at this point, so check the cache handle
+      // and update the read pattern for prefetching
+      if (out_parsed_block->GetValue() || out_parsed_block->GetCacheHandle()) {
+        // TODO(haoyu): Differentiate cache hit on uncompressed block cache and
+        // compressed block cache.
+        is_cache_hit = true;
+        if (prefetch_buffer) {
+          // Update the block details so that PrefetchBuffer can use the read
+          // pattern to determine if reads are sequential or not for
+          // prefetching. It should also take in account blocks read from cache.
+          prefetch_buffer->UpdateReadPattern(
+              handle.offset(), BlockSizeWithTrailer(handle),
+              ro.adaptive_readahead /*decrease_readahead_size*/);
+        }
+      }
+    }
+
+    // Can't find the block from the cache. If I/O is allowed, read from the
+    // file.
+    if (out_parsed_block->GetValue() == nullptr &&
+        out_parsed_block->GetCacheHandle() == nullptr && !no_io &&
+        ro.fill_cache) {
+      Statistics* statistics = rep_->ioptions.stats;
+      const bool maybe_compressed =
+          TBlocklike::kBlockType != BlockType::kFilter &&
+          TBlocklike::kBlockType != BlockType::kCompressionDictionary &&
+          rep_->blocks_maybe_compressed;
+      const bool do_uncompress = maybe_compressed;
+      CompressionType contents_comp_type;
+      // Maybe serialized or uncompressed
+      BlockContents tmp_contents;
+      if (!contents) {
+        Histograms histogram = for_compaction ? READ_BLOCK_COMPACTION_MICROS
+                                              : READ_BLOCK_GET_MICROS;
+        StopWatch sw(rep_->ioptions.clock, statistics, histogram);
+        BlockFetcher block_fetcher(
+            rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle,
+            &tmp_contents, rep_->ioptions, do_uncompress, maybe_compressed,
+            TBlocklike::kBlockType, uncompression_dict,
+            rep_->persistent_cache_options,
+            GetMemoryAllocator(rep_->table_options),
+            /*allocator=*/nullptr);
+
+        // If prefetch_buffer is not allocated, it will fallback to synchronous
+        // reading of block contents.
+        if (async_read && prefetch_buffer != nullptr) {
+          s = block_fetcher.ReadAsyncBlockContents();
+          if (!s.ok()) {
+            return s;
+          }
+        } else {
+          s = block_fetcher.ReadBlockContents();
+        }
+
+        contents_comp_type = block_fetcher.get_compression_type();
+        contents = &tmp_contents;
+        if (get_context) {
+          switch (TBlocklike::kBlockType) {
+            case BlockType::kIndex:
+              ++get_context->get_context_stats_.num_index_read;
+              break;
+            case BlockType::kFilter:
+            case BlockType::kFilterPartitionIndex:
+              ++get_context->get_context_stats_.num_filter_read;
+              break;
+            default:
+              break;
+          }
+        }
+      } else {
+        contents_comp_type = GetBlockCompressionType(*contents);
+      }
+
+      if (s.ok()) {
+        // If filling cache is allowed and a cache is configured, try to put the
+        // block to the cache.
+        s = PutDataBlockToCache(
+            key, block_cache, out_parsed_block, std::move(*contents),
+            contents_comp_type, uncompression_dict,
+            GetMemoryAllocator(rep_->table_options), get_context);
+      }
+    }
+  }
+
+  // TODO: optimize so that lookup_context != nullptr implies the others
+  if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled() &&
+      lookup_context) {
+    SaveLookupContextOrTraceRecord(
+        key, is_cache_hit, ro, out_parsed_block->GetValue(), lookup_context);
+  }
+
+  assert(s.ok() || out_parsed_block->GetValue() == nullptr);
+  return s;
+}
+
+template <typename TBlocklike>
+WithBlocklikeCheck<void, TBlocklike>
+BlockBasedTable::SaveLookupContextOrTraceRecord(
+    const Slice& block_key, bool is_cache_hit, const ReadOptions& ro,
+    const TBlocklike* parsed_block_value,
+    BlockCacheLookupContext* lookup_context) const {
+  assert(lookup_context);
+  size_t usage = 0;
+  uint64_t nkeys = 0;
+  if (parsed_block_value) {
+    // Approximate the number of keys in the block using restarts.
+    int interval = rep_->table_options.block_restart_interval;
+    nkeys = interval * GetBlockNumRestarts(*parsed_block_value);
+    // On average, the last restart should be just over half utilized.
+    // Specifically, 1..N should be N/2 + 0.5. For example, 7 -> 4, 8 -> 4.5.
+    // Use the get_id to alternate between rounding up vs. down.
+    if (nkeys > 0) {
+      bool rounding = static_cast<int>(lookup_context->get_id) & 1;
+      nkeys -= (interval - rounding) / 2;
+    }
+    usage = parsed_block_value->ApproximateMemoryUsage();
+  }
+  TraceType trace_block_type = TraceType::kTraceMax;
+  switch (TBlocklike::kBlockType) {
+    case BlockType::kData:
+      trace_block_type = TraceType::kBlockTraceDataBlock;
+      break;
+    case BlockType::kFilter:
+    case BlockType::kFilterPartitionIndex:
+      trace_block_type = TraceType::kBlockTraceFilterBlock;
+      break;
+    case BlockType::kCompressionDictionary:
+      trace_block_type = TraceType::kBlockTraceUncompressionDictBlock;
+      break;
+    case BlockType::kRangeDeletion:
+      trace_block_type = TraceType::kBlockTraceRangeDeletionBlock;
+      break;
+    case BlockType::kIndex:
+      trace_block_type = TraceType::kBlockTraceIndexBlock;
+      break;
+    default:
+      // This cannot happen.
+      assert(false);
+      break;
+  }
+  const bool no_io = ro.read_tier == kBlockCacheTier;
+  bool no_insert = no_io || !ro.fill_cache;
+  if (BlockCacheTraceHelper::IsGetOrMultiGetOnDataBlock(
+          trace_block_type, lookup_context->caller)) {
+    // Make a copy of the block key here since it will be logged later.
+    lookup_context->FillLookupContext(is_cache_hit, no_insert, trace_block_type,
+                                      /*block_size=*/usage,
+                                      block_key.ToString(), nkeys);
+
+    // Defer logging the access to Get() and MultiGet() to trace additional
+    // information, e.g., referenced_key
+  } else {
+    // Avoid making copy of block_key if it doesn't need to be saved in
+    // BlockCacheLookupContext
+    lookup_context->FillLookupContext(is_cache_hit, no_insert, trace_block_type,
+                                      /*block_size=*/usage,
+                                      /*block_key=*/{}, nkeys);
+
+    // Fill in default values for irrelevant/unknown fields
+    FinishTraceRecord(*lookup_context, block_key,
+                      lookup_context->referenced_key,
+                      /*does_referenced_key_exist*/ false,
+                      /*referenced_data_size*/ 0);
+  }
+}
+
+void BlockBasedTable::FinishTraceRecord(
+    const BlockCacheLookupContext& lookup_context, const Slice& block_key,
+    const Slice& referenced_key, bool does_referenced_key_exist,
+    uint64_t referenced_data_size) const {
+  // Avoid making copy of referenced_key if it doesn't need to be saved in
+  // BlockCacheLookupContext
+  BlockCacheTraceRecord access_record(
+      rep_->ioptions.clock->NowMicros(),
+      /*block_key=*/"", lookup_context.block_type, lookup_context.block_size,
+      rep_->cf_id_for_tracing(),
+      /*cf_name=*/"", rep_->level_for_tracing(), rep_->sst_number_for_tracing(),
+      lookup_context.caller, lookup_context.is_cache_hit,
+      lookup_context.no_insert, lookup_context.get_id,
+      lookup_context.get_from_user_specified_snapshot,
+      /*referenced_key=*/"", referenced_data_size,
+      lookup_context.num_keys_in_block, does_referenced_key_exist);
+  // TODO: Should handle status here?
+  block_cache_tracer_
+      ->WriteBlockAccess(access_record, block_key, rep_->cf_name_for_tracing(),
+                         referenced_key)
+      .PermitUncheckedError();
+}
+
+template <typename TBlocklike /*, auto*/>
+WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::RetrieveBlock(
+    FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
+    const BlockHandle& handle, const UncompressionDict& uncompression_dict,
+    CachableEntry<TBlocklike>* out_parsed_block, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context, bool for_compaction,
+    bool use_cache, bool async_read) const {
+  assert(out_parsed_block);
+  assert(out_parsed_block->IsEmpty());
+
+  Status s;
+  if (use_cache) {
+    s = MaybeReadBlockAndLoadToCache(
+        prefetch_buffer, ro, handle, uncompression_dict, for_compaction,
+        out_parsed_block, get_context, lookup_context,
+        /*contents=*/nullptr, async_read);
+
+    if (!s.ok()) {
+      return s;
+    }
+
+    if (out_parsed_block->GetValue() != nullptr ||
+        out_parsed_block->GetCacheHandle() != nullptr) {
+      assert(s.ok());
+      return s;
+    }
+  }
+
+  assert(out_parsed_block->IsEmpty());
+
+  const bool no_io = ro.read_tier == kBlockCacheTier;
+  if (no_io) {
+    return Status::Incomplete("no blocking io");
+  }
+
+  const bool maybe_compressed =
+      TBlocklike::kBlockType != BlockType::kFilter &&
+      TBlocklike::kBlockType != BlockType::kCompressionDictionary &&
+      rep_->blocks_maybe_compressed;
+  std::unique_ptr<TBlocklike> block;
+
+  {
+    Histograms histogram =
+        for_compaction ? READ_BLOCK_COMPACTION_MICROS : READ_BLOCK_GET_MICROS;
+    StopWatch sw(rep_->ioptions.clock, rep_->ioptions.stats, histogram);
+    s = ReadAndParseBlockFromFile(
+        rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle, &block,
+        rep_->ioptions, rep_->create_context, maybe_compressed,
+        uncompression_dict, rep_->persistent_cache_options,
+        GetMemoryAllocator(rep_->table_options), for_compaction, async_read);
+
+    if (get_context) {
+      switch (TBlocklike::kBlockType) {
+        case BlockType::kIndex:
+          ++(get_context->get_context_stats_.num_index_read);
+          break;
+        case BlockType::kFilter:
+        case BlockType::kFilterPartitionIndex:
+          ++(get_context->get_context_stats_.num_filter_read);
+          break;
+        default:
+          break;
+      }
+    }
+  }
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  out_parsed_block->SetOwnedValue(std::move(block));
+
+  assert(s.ok());
+  return s;
+}
+
+BlockBasedTable::PartitionedIndexIteratorState::PartitionedIndexIteratorState(
+    const BlockBasedTable* table,
+    UnorderedMap<uint64_t, CachableEntry<Block>>* block_map)
+    : table_(table), block_map_(block_map) {}
+
+InternalIteratorBase<IndexValue>*
+BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator(
+    const BlockHandle& handle) {
+  // Return a block iterator on the index partition
+  auto block = block_map_->find(handle.offset());
+  // block_map_ must be exhaustive
+  if (block == block_map_->end()) {
+    assert(false);
+    // Signal problem to caller
+    return nullptr;
+  }
+  const Rep* rep = table_->get_rep();
+  assert(rep);
+
+  Statistics* kNullStats = nullptr;
+  // We don't return pinned data from index blocks, so no need
+  // to set `block_contents_pinned`.
+  return block->second.GetValue()->NewIndexIterator(
+      rep->internal_comparator.user_comparator(),
+      rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true,
+      rep->index_has_first_key, rep->index_key_includes_seq,
+      rep->index_value_is_full);
+}
+
+// This will be broken if the user specifies an unusual implementation
+// of Options.comparator, or if the user specifies an unusual
+// definition of prefixes in BlockBasedTableOptions.filter_policy.
+// In particular, we require the following three properties:
+//
+// 1) key.starts_with(prefix(key))
+// 2) Compare(prefix(key), key) <= 0.
+// 3) If Compare(key1, key2) <= 0, then Compare(prefix(key1), prefix(key2)) <= 0
+//
+// If read_options.read_tier == kBlockCacheTier, this method will do no I/O and
+// will return true if the filter block is not in memory and not found in block
+// cache.
+//
+// REQUIRES: this method shouldn't be called while the DB lock is held.
+bool BlockBasedTable::PrefixRangeMayMatch(
+    const Slice& internal_key, const ReadOptions& read_options,
+    const SliceTransform* options_prefix_extractor,
+    const bool need_upper_bound_check, BlockCacheLookupContext* lookup_context,
+    bool* filter_checked) const {
+  if (!rep_->filter_policy) {
+    return true;
+  }
+
+  const SliceTransform* prefix_extractor;
+
+  if (rep_->table_prefix_extractor == nullptr) {
+    if (need_upper_bound_check) {
+      return true;
+    }
+    prefix_extractor = options_prefix_extractor;
+  } else {
+    prefix_extractor = rep_->table_prefix_extractor.get();
+  }
+  auto ts_sz = rep_->internal_comparator.user_comparator()->timestamp_size();
+  auto user_key_without_ts =
+      ExtractUserKeyAndStripTimestamp(internal_key, ts_sz);
+  if (!prefix_extractor->InDomain(user_key_without_ts)) {
+    return true;
+  }
+
+  bool may_match = true;
+
+  FilterBlockReader* const filter = rep_->filter.get();
+  *filter_checked = false;
+  if (filter != nullptr) {
+    const bool no_io = read_options.read_tier == kBlockCacheTier;
+
+    const Slice* const const_ikey_ptr = &internal_key;
+    may_match = filter->RangeMayExist(
+        read_options.iterate_upper_bound, user_key_without_ts, prefix_extractor,
+        rep_->internal_comparator.user_comparator(), const_ikey_ptr,
+        filter_checked, need_upper_bound_check, no_io, lookup_context,
+        read_options);
+  }
+
+  return may_match;
+}
+
+bool BlockBasedTable::PrefixExtractorChanged(
+    const SliceTransform* prefix_extractor) const {
+  if (prefix_extractor == nullptr) {
+    return true;
+  } else if (prefix_extractor == rep_->table_prefix_extractor.get()) {
+    return false;
+  } else {
+    return PrefixExtractorChangedHelper(rep_->table_properties.get(),
+                                        prefix_extractor);
+  }
+}
+
+Statistics* BlockBasedTable::GetStatistics() const {
+  return rep_->ioptions.stats;
+}
+bool BlockBasedTable::IsLastLevel() const {
+  return rep_->level == rep_->ioptions.num_levels - 1;
+}
+
+InternalIterator* BlockBasedTable::NewIterator(
+    const ReadOptions& read_options, const SliceTransform* prefix_extractor,
+    Arena* arena, bool skip_filters, TableReaderCaller caller,
+    size_t compaction_readahead_size, bool allow_unprepared_value) {
+  BlockCacheLookupContext lookup_context{caller};
+  bool need_upper_bound_check =
+      read_options.auto_prefix_mode || PrefixExtractorChanged(prefix_extractor);
+  std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter(NewIndexIterator(
+      read_options,
+      /*disable_prefix_seek=*/need_upper_bound_check &&
+          rep_->index_type == BlockBasedTableOptions::kHashSearch,
+      /*input_iter=*/nullptr, /*get_context=*/nullptr, &lookup_context));
+  if (arena == nullptr) {
+    return new BlockBasedTableIterator(
+        this, read_options, rep_->internal_comparator, std::move(index_iter),
+        !skip_filters && !read_options.total_order_seek &&
+            prefix_extractor != nullptr,
+        need_upper_bound_check, prefix_extractor, caller,
+        compaction_readahead_size, allow_unprepared_value);
+  } else {
+    auto* mem = arena->AllocateAligned(sizeof(BlockBasedTableIterator));
+    return new (mem) BlockBasedTableIterator(
+        this, read_options, rep_->internal_comparator, std::move(index_iter),
+        !skip_filters && !read_options.total_order_seek &&
+            prefix_extractor != nullptr,
+        need_upper_bound_check, prefix_extractor, caller,
+        compaction_readahead_size, allow_unprepared_value);
+  }
+}
+
+FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator(
+    const ReadOptions& read_options) {
+  if (rep_->fragmented_range_dels == nullptr) {
+    return nullptr;
+  }
+  SequenceNumber snapshot = kMaxSequenceNumber;
+  if (read_options.snapshot != nullptr) {
+    snapshot = read_options.snapshot->GetSequenceNumber();
+  }
+  return new FragmentedRangeTombstoneIterator(rep_->fragmented_range_dels,
+                                              rep_->internal_comparator,
+                                              snapshot, read_options.timestamp);
+}
+
+bool BlockBasedTable::FullFilterKeyMayMatch(
+    FilterBlockReader* filter, const Slice& internal_key, const bool no_io,
+    const SliceTransform* prefix_extractor, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context,
+    const ReadOptions& read_options) const {
+  if (filter == nullptr) {
+    return true;
+  }
+  Slice user_key = ExtractUserKey(internal_key);
+  const Slice* const const_ikey_ptr = &internal_key;
+  bool may_match = true;
+  size_t ts_sz = rep_->internal_comparator.user_comparator()->timestamp_size();
+  Slice user_key_without_ts = StripTimestampFromUserKey(user_key, ts_sz);
+  if (rep_->whole_key_filtering) {
+    may_match = filter->KeyMayMatch(user_key_without_ts, no_io, const_ikey_ptr,
+                                    get_context, lookup_context, read_options);
+    if (may_match) {
+      RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE);
+      PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, rep_->level);
+    } else {
+      RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL);
+      PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level);
+    }
+  } else if (!PrefixExtractorChanged(prefix_extractor) &&
+             prefix_extractor->InDomain(user_key_without_ts)) {
+    // FIXME ^^^: there should be no reason for Get() to depend on current
+    // prefix_extractor at all. It should always use table_prefix_extractor.
+    may_match = filter->PrefixMayMatch(
+        prefix_extractor->Transform(user_key_without_ts), no_io, const_ikey_ptr,
+        get_context, lookup_context, read_options);
+    RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED);
+    if (may_match) {
+      // Includes prefix stats
+      PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, rep_->level);
+    } else {
+      RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_USEFUL);
+      // Includes prefix stats
+      PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level);
+    }
+  }
+  return may_match;
+}
+
+void BlockBasedTable::FullFilterKeysMayMatch(
+    FilterBlockReader* filter, MultiGetRange* range, const bool no_io,
+    const SliceTransform* prefix_extractor,
+    BlockCacheLookupContext* lookup_context,
+    const ReadOptions& read_options) const {
+  if (filter == nullptr) {
+    return;
+  }
+  uint64_t before_keys = range->KeysLeft();
+  assert(before_keys > 0);  // Caller should ensure
+  if (rep_->whole_key_filtering) {
+    filter->KeysMayMatch(range, no_io, lookup_context, read_options);
+    uint64_t after_keys = range->KeysLeft();
+    if (after_keys) {
+      RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE, after_keys);
+      PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, after_keys,
+                                rep_->level);
+    }
+    uint64_t filtered_keys = before_keys - after_keys;
+    if (filtered_keys) {
+      RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL, filtered_keys);
+      PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, filtered_keys,
+                                rep_->level);
+    }
+  } else if (!PrefixExtractorChanged(prefix_extractor)) {
+    // FIXME ^^^: there should be no reason for MultiGet() to depend on current
+    // prefix_extractor at all. It should always use table_prefix_extractor.
+    filter->PrefixesMayMatch(range, prefix_extractor, false, lookup_context,
+                             read_options);
+    RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED, before_keys);
+    uint64_t after_keys = range->KeysLeft();
+    if (after_keys) {
+      // Includes prefix stats
+      PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, after_keys,
+                                rep_->level);
+    }
+    uint64_t filtered_keys = before_keys - after_keys;
+    if (filtered_keys) {
+      RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_USEFUL,
+                 filtered_keys);
+      // Includes prefix stats
+      PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, filtered_keys,
+                                rep_->level);
+    }
+  }
+}
+
+Status BlockBasedTable::ApproximateKeyAnchors(const ReadOptions& read_options,
+                                              std::vector<Anchor>& anchors) {
+  // We iterator the whole index block here. More efficient implementation
+  // is possible if we push this operation into IndexReader. For example, we
+  // can directly sample from restart block entries in the index block and
+  // only read keys needed. Here we take a simple solution. Performance is
+  // likely not to be a problem. We are compacting the whole file, so all
+  // keys will be read out anyway. An extra read to index block might be
+  // a small share of the overhead. We can try to optimize if needed.
+  //
+  // `CacheDependencies()` brings all the blocks into cache using one I/O. That
+  // way the full index scan usually finds the index data it is looking for in
+  // cache rather than doing an I/O for each "dependency" (partition).
+  Status s = rep_->index_reader->CacheDependencies(
+      read_options, false /* pin */, nullptr /* prefetch_buffer */);
+  if (!s.ok()) {
+    return s;
+  }
+
+  IndexBlockIter iiter_on_stack;
+  auto iiter = NewIndexIterator(
+      read_options, /*disable_prefix_seek=*/false, &iiter_on_stack,
+      /*get_context=*/nullptr, /*lookup_context=*/nullptr);
+  std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
+  if (iiter != &iiter_on_stack) {
+    iiter_unique_ptr.reset(iiter);
+  }
+
+  // If needed the threshold could be more adaptive. For example, it can be
+  // based on size, so that a larger will be sampled to more partitions than a
+  // smaller file. The size might also need to be passed in by the caller based
+  // on total compaction size.
+  const uint64_t kMaxNumAnchors = uint64_t{128};
+  uint64_t num_blocks = this->GetTableProperties()->num_data_blocks;
+  uint64_t num_blocks_per_anchor = num_blocks / kMaxNumAnchors;
+  if (num_blocks_per_anchor == 0) {
+    num_blocks_per_anchor = 1;
+  }
+
+  uint64_t count = 0;
+  std::string last_key;
+  uint64_t range_size = 0;
+  uint64_t prev_offset = 0;
+  for (iiter->SeekToFirst(); iiter->Valid(); iiter->Next()) {
+    const BlockHandle& bh = iiter->value().handle;
+    range_size += bh.offset() + bh.size() - prev_offset;
+    prev_offset = bh.offset() + bh.size();
+    if (++count % num_blocks_per_anchor == 0) {
+      count = 0;
+      anchors.emplace_back(iiter->user_key(), range_size);
+      range_size = 0;
+    } else {
+      last_key = iiter->user_key().ToString();
+    }
+  }
+  if (count != 0) {
+    anchors.emplace_back(last_key, range_size);
+  }
+  return Status::OK();
+}
+
+bool BlockBasedTable::TimestampMayMatch(const ReadOptions& read_options) const {
+  if (read_options.timestamp != nullptr && !rep_->min_timestamp.empty()) {
+    RecordTick(rep_->ioptions.stats, TIMESTAMP_FILTER_TABLE_CHECKED);
+    auto read_ts = read_options.timestamp;
+    auto comparator = rep_->internal_comparator.user_comparator();
+    if (comparator->CompareTimestamp(*read_ts, rep_->min_timestamp) < 0) {
+      RecordTick(rep_->ioptions.stats, TIMESTAMP_FILTER_TABLE_FILTERED);
+      return false;
+    }
+  }
+  return true;
+}
+
+Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
+                            GetContext* get_context,
+                            const SliceTransform* prefix_extractor,
+                            bool skip_filters) {
+  // Similar to Bloom filter !may_match
+  // If timestamp is beyond the range of the table, skip
+  if (!TimestampMayMatch(read_options)) {
+    return Status::OK();
+  }
+  assert(key.size() >= 8);  // key must be internal key
+  assert(get_context != nullptr);
+  Status s;
+  const bool no_io = read_options.read_tier == kBlockCacheTier;
+
+  FilterBlockReader* const filter =
+      !skip_filters ? rep_->filter.get() : nullptr;
+
+  // First check the full filter
+  // If full filter not useful, Then go into each block
+  uint64_t tracing_get_id = get_context->get_tracing_get_id();
+  BlockCacheLookupContext lookup_context{
+      TableReaderCaller::kUserGet, tracing_get_id,
+      /*get_from_user_specified_snapshot=*/read_options.snapshot != nullptr};
+  if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) {
+    // Trace the key since it contains both user key and sequence number.
+    lookup_context.referenced_key = key.ToString();
+    lookup_context.get_from_user_specified_snapshot =
+        read_options.snapshot != nullptr;
+  }
+  TEST_SYNC_POINT("BlockBasedTable::Get:BeforeFilterMatch");
+  const bool may_match =
+      FullFilterKeyMayMatch(filter, key, no_io, prefix_extractor, get_context,
+                            &lookup_context, read_options);
+  TEST_SYNC_POINT("BlockBasedTable::Get:AfterFilterMatch");
+  if (may_match) {
+    IndexBlockIter iiter_on_stack;
+    // if prefix_extractor found in block differs from options, disable
+    // BlockPrefixIndex. Only do this check when index_type is kHashSearch.
+    bool need_upper_bound_check = false;
+    if (rep_->index_type == BlockBasedTableOptions::kHashSearch) {
+      need_upper_bound_check = PrefixExtractorChanged(prefix_extractor);
+    }
+    auto iiter =
+        NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack,
+                         get_context, &lookup_context);
+    std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
+    if (iiter != &iiter_on_stack) {
+      iiter_unique_ptr.reset(iiter);
+    }
+
+    size_t ts_sz =
+        rep_->internal_comparator.user_comparator()->timestamp_size();
+    bool matched = false;  // if such user key matched a key in SST
+    bool done = false;
+    for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
+      IndexValue v = iiter->value();
+
+      if (!v.first_internal_key.empty() && !skip_filters &&
+          UserComparatorWrapper(rep_->internal_comparator.user_comparator())
+                  .CompareWithoutTimestamp(
+                      ExtractUserKey(key),
+                      ExtractUserKey(v.first_internal_key)) < 0) {
+        // The requested key falls between highest key in previous block and
+        // lowest key in current block.
+        break;
+      }
+
+      BlockCacheLookupContext lookup_data_block_context{
+          TableReaderCaller::kUserGet, tracing_get_id,
+          /*get_from_user_specified_snapshot=*/read_options.snapshot !=
+              nullptr};
+      bool does_referenced_key_exist = false;
+      DataBlockIter biter;
+      uint64_t referenced_data_size = 0;
+      Status tmp_status;
+      NewDataBlockIterator<DataBlockIter>(
+          read_options, v.handle, &biter, BlockType::kData, get_context,
+          &lookup_data_block_context, /*prefetch_buffer=*/nullptr,
+          /*for_compaction=*/false, /*async_read=*/false, tmp_status);
+
+      if (no_io && biter.status().IsIncomplete()) {
+        // couldn't get block from block_cache
+        // Update Saver.state to Found because we are only looking for
+        // whether we can guarantee the key is not there when "no_io" is set
+        get_context->MarkKeyMayExist();
+        s = biter.status();
+        break;
+      }
+      if (!biter.status().ok()) {
+        s = biter.status();
+        break;
+      }
+
+      bool may_exist = biter.SeekForGet(key);
+      // If user-specified timestamp is supported, we cannot end the search
+      // just because hash index lookup indicates the key+ts does not exist.
+      if (!may_exist && ts_sz == 0) {
+        // HashSeek cannot find the key this block and the the iter is not
+        // the end of the block, i.e. cannot be in the following blocks
+        // either. In this case, the seek_key cannot be found, so we break
+        // from the top level for-loop.
+        done = true;
+      } else {
+        // Call the *saver function on each entry/block until it returns false
+        for (; biter.Valid(); biter.Next()) {
+          ParsedInternalKey parsed_key;
+          Status pik_status = ParseInternalKey(
+              biter.key(), &parsed_key, false /* log_err_key */);  // TODO
+          if (!pik_status.ok()) {
+            s = pik_status;
+          }
+
+          if (!get_context->SaveValue(
+                  parsed_key, biter.value(), &matched,
+                  biter.IsValuePinned() ? &biter : nullptr)) {
+            if (get_context->State() == GetContext::GetState::kFound) {
+              does_referenced_key_exist = true;
+              referenced_data_size = biter.key().size() + biter.value().size();
+            }
+            done = true;
+            break;
+          }
+        }
+        s = biter.status();
+        if (!s.ok()) {
+          break;
+        }
+      }
+      // Write the block cache access record.
+      if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) {
+        // Avoid making copy of block_key, cf_name, and referenced_key when
+        // constructing the access record.
+        Slice referenced_key;
+        if (does_referenced_key_exist) {
+          referenced_key = biter.key();
+        } else {
+          referenced_key = key;
+        }
+        FinishTraceRecord(lookup_data_block_context,
+                          lookup_data_block_context.block_key, referenced_key,
+                          does_referenced_key_exist, referenced_data_size);
+      }
+
+      if (done) {
+        // Avoid the extra Next which is expensive in two-level indexes
+        break;
+      }
+    }
+    if (matched && filter != nullptr) {
+      if (rep_->whole_key_filtering) {
+        RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE);
+      } else {
+        RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_TRUE_POSITIVE);
+      }
+      // Includes prefix stats
+      PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1,
+                                rep_->level);
+    }
+
+    if (s.ok() && !iiter->status().IsNotFound()) {
+      s = iiter->status();
+    }
+  }
+
+  return s;
+}
+
+Status BlockBasedTable::MultiGetFilter(const ReadOptions& read_options,
+                                       const SliceTransform* prefix_extractor,
+                                       MultiGetRange* mget_range) {
+  if (mget_range->empty()) {
+    // Caller should ensure non-empty (performance bug)
+    assert(false);
+    return Status::OK();  // Nothing to do
+  }
+
+  FilterBlockReader* const filter = rep_->filter.get();
+  if (!filter) {
+    return Status::OK();
+  }
+
+  // First check the full filter
+  // If full filter not useful, Then go into each block
+  const bool no_io = read_options.read_tier == kBlockCacheTier;
+  uint64_t tracing_mget_id = BlockCacheTraceHelper::kReservedGetId;
+  if (mget_range->begin()->get_context) {
+    tracing_mget_id = mget_range->begin()->get_context->get_tracing_get_id();
+  }
+  BlockCacheLookupContext lookup_context{
+      TableReaderCaller::kUserMultiGet, tracing_mget_id,
+      /*_get_from_user_specified_snapshot=*/read_options.snapshot != nullptr};
+  FullFilterKeysMayMatch(filter, mget_range, no_io, prefix_extractor,
+                         &lookup_context, read_options);
+
+  return Status::OK();
+}
+
+Status BlockBasedTable::Prefetch(const ReadOptions& read_options,
+                                 const Slice* const begin,
+                                 const Slice* const end) {
+  auto& comparator = rep_->internal_comparator;
+  UserComparatorWrapper user_comparator(comparator.user_comparator());
+  // pre-condition
+  if (begin && end && comparator.Compare(*begin, *end) > 0) {
+    return Status::InvalidArgument(*begin, *end);
+  }
+  BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch};
+  IndexBlockIter iiter_on_stack;
+  auto iiter = NewIndexIterator(read_options, /*need_upper_bound_check=*/false,
+                                &iiter_on_stack, /*get_context=*/nullptr,
+                                &lookup_context);
+  std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
+  if (iiter != &iiter_on_stack) {
+    iiter_unique_ptr = std::unique_ptr<InternalIteratorBase<IndexValue>>(iiter);
+  }
+
+  if (!iiter->status().ok()) {
+    // error opening index iterator
+    return iiter->status();
+  }
+
+  // indicates if we are on the last page that need to be pre-fetched
+  bool prefetching_boundary_page = false;
+
+  for (begin ? iiter->Seek(*begin) : iiter->SeekToFirst(); iiter->Valid();
+       iiter->Next()) {
+    BlockHandle block_handle = iiter->value().handle;
+    const bool is_user_key = !rep_->index_key_includes_seq;
+    if (end &&
+        ((!is_user_key && comparator.Compare(iiter->key(), *end) >= 0) ||
+         (is_user_key &&
+          user_comparator.Compare(iiter->key(), ExtractUserKey(*end)) >= 0))) {
+      if (prefetching_boundary_page) {
+        break;
+      }
+
+      // The index entry represents the last key in the data block.
+      // We should load this page into memory as well, but no more
+      prefetching_boundary_page = true;
+    }
+
+    // Load the block specified by the block_handle into the block cache
+    DataBlockIter biter;
+    Status tmp_status;
+    NewDataBlockIterator<DataBlockIter>(
+        read_options, block_handle, &biter, /*type=*/BlockType::kData,
+        /*get_context=*/nullptr, &lookup_context,
+        /*prefetch_buffer=*/nullptr, /*for_compaction=*/false,
+        /*async_read=*/false, tmp_status);
+
+    if (!biter.status().ok()) {
+      // there was an unexpected error while pre-fetching
+      return biter.status();
+    }
+  }
+
+  return Status::OK();
+}
+
+Status BlockBasedTable::VerifyChecksum(const ReadOptions& read_options,
+                                       TableReaderCaller caller) {
+  Status s;
+  // Check Meta blocks
+  std::unique_ptr<Block> metaindex;
+  std::unique_ptr<InternalIterator> metaindex_iter;
+  s = ReadMetaIndexBlock(read_options, nullptr /* prefetch buffer */,
+                         &metaindex, &metaindex_iter);
+  if (s.ok()) {
+    s = VerifyChecksumInMetaBlocks(read_options, metaindex_iter.get());
+    if (!s.ok()) {
+      return s;
+    }
+  } else {
+    return s;
+  }
+  // Check Data blocks
+  IndexBlockIter iiter_on_stack;
+  BlockCacheLookupContext context{caller};
+  InternalIteratorBase<IndexValue>* iiter = NewIndexIterator(
+      read_options, /*disable_prefix_seek=*/false, &iiter_on_stack,
+      /*get_context=*/nullptr, &context);
+  std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
+  if (iiter != &iiter_on_stack) {
+    iiter_unique_ptr = std::unique_ptr<InternalIteratorBase<IndexValue>>(iiter);
+  }
+  if (!iiter->status().ok()) {
+    // error opening index iterator
+    return iiter->status();
+  }
+  s = VerifyChecksumInBlocks(read_options, iiter);
+  return s;
+}
+
+Status BlockBasedTable::VerifyChecksumInBlocks(
+    const ReadOptions& read_options,
+    InternalIteratorBase<IndexValue>* index_iter) {
+  Status s;
+  // We are scanning the whole file, so no need to do exponential
+  // increasing of the buffer size.
+  size_t readahead_size = (read_options.readahead_size != 0)
+                              ? read_options.readahead_size
+                              : rep_->table_options.max_auto_readahead_size;
+  // FilePrefetchBuffer doesn't work in mmap mode and readahead is not
+  // needed there.
+  FilePrefetchBuffer prefetch_buffer(
+      readahead_size /* readahead_size */,
+      readahead_size /* max_readahead_size */,
+      !rep_->ioptions.allow_mmap_reads /* enable */);
+
+  for (index_iter->SeekToFirst(); index_iter->Valid(); index_iter->Next()) {
+    s = index_iter->status();
+    if (!s.ok()) {
+      break;
+    }
+    BlockHandle handle = index_iter->value().handle;
+    BlockContents contents;
+    BlockFetcher block_fetcher(
+        rep_->file.get(), &prefetch_buffer, rep_->footer, read_options, handle,
+        &contents, rep_->ioptions, false /* decompress */,
+        false /*maybe_compressed*/, BlockType::kData,
+        UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options);
+    s = block_fetcher.ReadBlockContents();
+    if (!s.ok()) {
+      break;
+    }
+  }
+  if (s.ok()) {
+    // In the case of two level indexes, we would have exited the above loop
+    // by checking index_iter->Valid(), but Valid() might have returned false
+    // due to an IO error. So check the index_iter status
+    s = index_iter->status();
+  }
+  return s;
+}
+
+BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName(
+    const Slice& meta_block_name) {
+  if (meta_block_name.starts_with(kFullFilterBlockPrefix)) {
+    return BlockType::kFilter;
+  }
+
+  if (meta_block_name.starts_with(kPartitionedFilterBlockPrefix)) {
+    return BlockType::kFilterPartitionIndex;
+  }
+
+  if (meta_block_name == kPropertiesBlockName) {
+    return BlockType::kProperties;
+  }
+
+  if (meta_block_name == kCompressionDictBlockName) {
+    return BlockType::kCompressionDictionary;
+  }
+
+  if (meta_block_name == kRangeDelBlockName) {
+    return BlockType::kRangeDeletion;
+  }
+
+  if (meta_block_name == kHashIndexPrefixesBlock) {
+    return BlockType::kHashIndexPrefixes;
+  }
+
+  if (meta_block_name == kHashIndexPrefixesMetadataBlock) {
+    return BlockType::kHashIndexMetadata;
+  }
+
+  if (meta_block_name.starts_with(kObsoleteFilterBlockPrefix)) {
+    // Obsolete but possible in old files
+    return BlockType::kInvalid;
+  }
+
+  assert(false);
+  return BlockType::kInvalid;
+}
+
+Status BlockBasedTable::VerifyChecksumInMetaBlocks(
+    const ReadOptions& read_options, InternalIteratorBase<Slice>* index_iter) {
+  Status s;
+  for (index_iter->SeekToFirst(); index_iter->Valid(); index_iter->Next()) {
+    s = index_iter->status();
+    if (!s.ok()) {
+      break;
+    }
+    BlockHandle handle;
+    Slice input = index_iter->value();
+    s = handle.DecodeFrom(&input);
+    BlockContents contents;
+    const Slice meta_block_name = index_iter->key();
+    if (meta_block_name == kPropertiesBlockName) {
+      // Unfortunate special handling for properties block checksum w/
+      // global seqno
+      std::unique_ptr<TableProperties> table_properties;
+      s = ReadTablePropertiesHelper(read_options, handle, rep_->file.get(),
+                                    nullptr /* prefetch_buffer */, rep_->footer,
+                                    rep_->ioptions, &table_properties,
+                                    nullptr /* memory_allocator */);
+    } else {
+      s = BlockFetcher(
+              rep_->file.get(), nullptr /* prefetch buffer */, rep_->footer,
+              read_options, handle, &contents, rep_->ioptions,
+              false /* decompress */, false /*maybe_compressed*/,
+              GetBlockTypeForMetaBlockByName(meta_block_name),
+              UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options)
+              .ReadBlockContents();
+    }
+    if (!s.ok()) {
+      break;
+    }
+  }
+  return s;
+}
+
+bool BlockBasedTable::TEST_BlockInCache(const BlockHandle& handle) const {
+  assert(rep_ != nullptr);
+
+  Cache* const cache = rep_->table_options.block_cache.get();
+  if (cache == nullptr) {
+    return false;
+  }
+
+  CacheKey key = GetCacheKey(rep_->base_cache_key, handle);
+
+  Cache::Handle* const cache_handle = cache->Lookup(key.AsSlice());
+  if (cache_handle == nullptr) {
+    return false;
+  }
+
+  cache->Release(cache_handle);
+
+  return true;
+}
+
+bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
+                                      const Slice& key) {
+  std::unique_ptr<InternalIteratorBase<IndexValue>> iiter(NewIndexIterator(
+      options, /*need_upper_bound_check=*/false, /*input_iter=*/nullptr,
+      /*get_context=*/nullptr, /*lookup_context=*/nullptr));
+  iiter->Seek(key);
+  assert(iiter->Valid());
+
+  return TEST_BlockInCache(iiter->value().handle);
+}
+
+// REQUIRES: The following fields of rep_ should have already been populated:
+//  1. file
+//  2. index_handle,
+//  3. options
+//  4. internal_comparator
+//  5. index_type
+Status BlockBasedTable::CreateIndexReader(
+    const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
+    InternalIterator* meta_iter, bool use_cache, bool prefetch, bool pin,
+    BlockCacheLookupContext* lookup_context,
+    std::unique_ptr<IndexReader>* index_reader) {
+  switch (rep_->index_type) {
+    case BlockBasedTableOptions::kTwoLevelIndexSearch: {
+      return PartitionIndexReader::Create(this, ro, prefetch_buffer, use_cache,
+                                          prefetch, pin, lookup_context,
+                                          index_reader);
+    }
+    case BlockBasedTableOptions::kBinarySearch:
+      FALLTHROUGH_INTENDED;
+    case BlockBasedTableOptions::kBinarySearchWithFirstKey: {
+      return BinarySearchIndexReader::Create(this, ro, prefetch_buffer,
+                                             use_cache, prefetch, pin,
+                                             lookup_context, index_reader);
+    }
+    case BlockBasedTableOptions::kHashSearch: {
+      if (!rep_->table_prefix_extractor) {
+        ROCKS_LOG_WARN(rep_->ioptions.logger,
+                       "Missing prefix extractor for hash index. Fall back to"
+                       " binary search index.");
+        return BinarySearchIndexReader::Create(this, ro, prefetch_buffer,
+                                               use_cache, prefetch, pin,
+                                               lookup_context, index_reader);
+      } else {
+        return HashIndexReader::Create(this, ro, prefetch_buffer, meta_iter,
+                                       use_cache, prefetch, pin, lookup_context,
+                                       index_reader);
+      }
+    }
+    default: {
+      std::string error_message =
+          "Unrecognized index type: " + std::to_string(rep_->index_type);
+      return Status::InvalidArgument(error_message.c_str());
+    }
+  }
+}
+
+uint64_t BlockBasedTable::ApproximateDataOffsetOf(
+    const InternalIteratorBase<IndexValue>& index_iter,
+    uint64_t data_size) const {
+  assert(index_iter.status().ok());
+  if (index_iter.Valid()) {
+    BlockHandle handle = index_iter.value().handle;
+    return handle.offset();
+  } else {
+    // The iterator is past the last key in the file.
+    return data_size;
+  }
+}
+
+uint64_t BlockBasedTable::GetApproximateDataSize() {
+  // Should be in table properties unless super old version
+  if (rep_->table_properties) {
+    return rep_->table_properties->data_size;
+  }
+  // Fall back to rough estimate from footer
+  return rep_->footer.metaindex_handle().offset();
+}
+
+uint64_t BlockBasedTable::ApproximateOffsetOf(const ReadOptions& read_options,
+                                              const Slice& key,
+                                              TableReaderCaller caller) {
+  uint64_t data_size = GetApproximateDataSize();
+  if (UNLIKELY(data_size == 0)) {
+    // Hmm. Let's just split in half to avoid skewing one way or another,
+    // since we don't know whether we're operating on lower bound or
+    // upper bound.
+    return rep_->file_size / 2;
+  }
+
+  BlockCacheLookupContext context(caller);
+  IndexBlockIter iiter_on_stack;
+  ReadOptions ro;
+  ro.total_order_seek = true;
+  ro.io_activity = read_options.io_activity;
+  auto index_iter =
+      NewIndexIterator(ro, /*disable_prefix_seek=*/true,
+                       /*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr,
+                       /*lookup_context=*/&context);
+  std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
+  if (index_iter != &iiter_on_stack) {
+    iiter_unique_ptr.reset(index_iter);
+  }
+
+  index_iter->Seek(key);
+  uint64_t offset;
+  if (index_iter->status().ok()) {
+    offset = ApproximateDataOffsetOf(*index_iter, data_size);
+  } else {
+    // Split in half to avoid skewing one way or another,
+    // since we don't know whether we're operating on lower bound or
+    // upper bound.
+    return rep_->file_size / 2;
+  }
+
+  // Pro-rate file metadata (incl filters) size-proportionally across data
+  // blocks.
+  double size_ratio =
+      static_cast<double>(offset) / static_cast<double>(data_size);
+  return static_cast<uint64_t>(size_ratio *
+                               static_cast<double>(rep_->file_size));
+}
+
+uint64_t BlockBasedTable::ApproximateSize(const ReadOptions& read_options,
+                                          const Slice& start, const Slice& end,
+                                          TableReaderCaller caller) {
+  assert(rep_->internal_comparator.Compare(start, end) <= 0);
+
+  uint64_t data_size = GetApproximateDataSize();
+  if (UNLIKELY(data_size == 0)) {
+    // Hmm. Assume whole file is involved, since we have lower and upper
+    // bound. This likely skews the estimate if we consider that this function
+    // is typically called with `[start, end]` fully contained in the file's
+    // key-range.
+    return rep_->file_size;
+  }
+
+  BlockCacheLookupContext context(caller);
+  IndexBlockIter iiter_on_stack;
+  ReadOptions ro;
+  ro.total_order_seek = true;
+  ro.io_activity = read_options.io_activity;
+  auto index_iter =
+      NewIndexIterator(ro, /*disable_prefix_seek=*/true,
+                       /*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr,
+                       /*lookup_context=*/&context);
+  std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
+  if (index_iter != &iiter_on_stack) {
+    iiter_unique_ptr.reset(index_iter);
+  }
+
+  index_iter->Seek(start);
+  uint64_t start_offset;
+  if (index_iter->status().ok()) {
+    start_offset = ApproximateDataOffsetOf(*index_iter, data_size);
+  } else {
+    // Assume file is involved from the start. This likely skews the estimate
+    // but is consistent with the above error handling.
+    start_offset = 0;
+  }
+
+  index_iter->Seek(end);
+  uint64_t end_offset;
+  if (index_iter->status().ok()) {
+    end_offset = ApproximateDataOffsetOf(*index_iter, data_size);
+  } else {
+    // Assume file is involved until the end. This likely skews the estimate
+    // but is consistent with the above error handling.
+    end_offset = data_size;
+  }
+
+  assert(end_offset >= start_offset);
+  // Pro-rate file metadata (incl filters) size-proportionally across data
+  // blocks.
+  double size_ratio = static_cast<double>(end_offset - start_offset) /
+                      static_cast<double>(data_size);
+  return static_cast<uint64_t>(size_ratio *
+                               static_cast<double>(rep_->file_size));
+}
+
+bool BlockBasedTable::TEST_FilterBlockInCache() const {
+  assert(rep_ != nullptr);
+  return rep_->filter_type != Rep::FilterType::kNoFilter &&
+         TEST_BlockInCache(rep_->filter_handle);
+}
+
+bool BlockBasedTable::TEST_IndexBlockInCache() const {
+  assert(rep_ != nullptr);
+
+  return TEST_BlockInCache(rep_->footer.index_handle());
+}
+
+Status BlockBasedTable::GetKVPairsFromDataBlocks(
+    const ReadOptions& read_options, std::vector<KVPairBlock>* kv_pair_blocks) {
+  std::unique_ptr<InternalIteratorBase<IndexValue>> blockhandles_iter(
+      NewIndexIterator(read_options, /*need_upper_bound_check=*/false,
+                       /*input_iter=*/nullptr, /*get_context=*/nullptr,
+                       /*lookup_contex=*/nullptr));
+
+  Status s = blockhandles_iter->status();
+  if (!s.ok()) {
+    // Cannot read Index Block
+    return s;
+  }
+
+  for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid();
+       blockhandles_iter->Next()) {
+    s = blockhandles_iter->status();
+
+    if (!s.ok()) {
+      break;
+    }
+
+    std::unique_ptr<InternalIterator> datablock_iter;
+    Status tmp_status;
+    datablock_iter.reset(NewDataBlockIterator<DataBlockIter>(
+        read_options, blockhandles_iter->value().handle,
+        /*input_iter=*/nullptr, /*type=*/BlockType::kData,
+        /*get_context=*/nullptr, /*lookup_context=*/nullptr,
+        /*prefetch_buffer=*/nullptr, /*for_compaction=*/false,
+        /*async_read=*/false, tmp_status));
+    s = datablock_iter->status();
+
+    if (!s.ok()) {
+      // Error reading the block - Skipped
+      continue;
+    }
+
+    KVPairBlock kv_pair_block;
+    for (datablock_iter->SeekToFirst(); datablock_iter->Valid();
+         datablock_iter->Next()) {
+      s = datablock_iter->status();
+      if (!s.ok()) {
+        // Error reading the block - Skipped
+        break;
+      }
+      const Slice& key = datablock_iter->key();
+      const Slice& value = datablock_iter->value();
+      std::string key_copy = std::string(key.data(), key.size());
+      std::string value_copy = std::string(value.data(), value.size());
+
+      kv_pair_block.push_back(
+          std::make_pair(std::move(key_copy), std::move(value_copy)));
+    }
+    kv_pair_blocks->push_back(std::move(kv_pair_block));
+  }
+  return Status::OK();
+}
+
+Status BlockBasedTable::DumpTable(WritableFile* out_file) {
+  WritableFileStringStreamAdapter out_file_wrapper(out_file);
+  std::ostream out_stream(&out_file_wrapper);
+  // Output Footer
+  out_stream << "Footer Details:\n"
+                "--------------------------------------\n";
+  out_stream << "  " << rep_->footer.ToString() << "\n";
+
+  // Output MetaIndex
+  out_stream << "Metaindex Details:\n"
+                "--------------------------------------\n";
+  std::unique_ptr<Block> metaindex;
+  std::unique_ptr<InternalIterator> metaindex_iter;
+  // TODO: plumb Env::IOActivity
+  const ReadOptions ro;
+  Status s = ReadMetaIndexBlock(ro, nullptr /* prefetch_buffer */, &metaindex,
+                                &metaindex_iter);
+  if (s.ok()) {
+    for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid();
+         metaindex_iter->Next()) {
+      s = metaindex_iter->status();
+      if (!s.ok()) {
+        return s;
+      }
+      if (metaindex_iter->key() == kPropertiesBlockName) {
+        out_stream << "  Properties block handle: "
+                   << metaindex_iter->value().ToString(true) << "\n";
+      } else if (metaindex_iter->key() == kCompressionDictBlockName) {
+        out_stream << "  Compression dictionary block handle: "
+                   << metaindex_iter->value().ToString(true) << "\n";
+      } else if (strstr(metaindex_iter->key().ToString().c_str(),
+                        "filter.rocksdb.") != nullptr) {
+        out_stream << "  Filter block handle: "
+                   << metaindex_iter->value().ToString(true) << "\n";
+      } else if (metaindex_iter->key() == kRangeDelBlockName) {
+        out_stream << "  Range deletion block handle: "
+                   << metaindex_iter->value().ToString(true) << "\n";
+      }
+    }
+    out_stream << "\n";
+  } else {
+    return s;
+  }
+
+  // Output TableProperties
+  const ROCKSDB_NAMESPACE::TableProperties* table_properties;
+  table_properties = rep_->table_properties.get();
+
+  if (table_properties != nullptr) {
+    out_stream << "Table Properties:\n"
+                  "--------------------------------------\n";
+    out_stream << "  " << table_properties->ToString("\n  ", ": ") << "\n";
+  }
+
+  if (rep_->filter) {
+    out_stream << "Filter Details:\n"
+                  "--------------------------------------\n";
+    out_stream << "  " << rep_->filter->ToString() << "\n";
+  }
+
+  // Output Index block
+  s = DumpIndexBlock(out_stream);
+  if (!s.ok()) {
+    return s;
+  }
+
+  // Output compression dictionary
+  if (rep_->uncompression_dict_reader) {
+    CachableEntry<UncompressionDict> uncompression_dict;
+    s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
+        nullptr /* prefetch_buffer */, ro, false /* no_io */,
+        false, /* verify_checksums */
+        nullptr /* get_context */, nullptr /* lookup_context */,
+        &uncompression_dict);
+    if (!s.ok()) {
+      return s;
+    }
+
+    assert(uncompression_dict.GetValue());
+
+    const Slice& raw_dict = uncompression_dict.GetValue()->GetRawDict();
+    out_stream << "Compression Dictionary:\n"
+                  "--------------------------------------\n";
+    out_stream << "  size (bytes): " << raw_dict.size() << "\n\n";
+    out_stream << "  HEX    " << raw_dict.ToString(true) << "\n\n";
+  }
+
+  // Output range deletions block
+  auto* range_del_iter = NewRangeTombstoneIterator(ro);
+  if (range_del_iter != nullptr) {
+    range_del_iter->SeekToFirst();
+    if (range_del_iter->Valid()) {
+      out_stream << "Range deletions:\n"
+                    "--------------------------------------\n";
+      for (; range_del_iter->Valid(); range_del_iter->Next()) {
+        DumpKeyValue(range_del_iter->key(), range_del_iter->value(),
+                     out_stream);
+      }
+      out_stream << "\n";
+    }
+    delete range_del_iter;
+  }
+  // Output Data blocks
+  s = DumpDataBlocks(out_stream);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (!out_stream.good()) {
+    return Status::IOError("Failed to write to output file");
+  }
+  return Status::OK();
+}
+
+Status BlockBasedTable::DumpIndexBlock(std::ostream& out_stream) {
+  out_stream << "Index Details:\n"
+                "--------------------------------------\n";
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  std::unique_ptr<InternalIteratorBase<IndexValue>> blockhandles_iter(
+      NewIndexIterator(read_options, /*need_upper_bound_check=*/false,
+                       /*input_iter=*/nullptr, /*get_context=*/nullptr,
+                       /*lookup_contex=*/nullptr));
+  Status s = blockhandles_iter->status();
+  if (!s.ok()) {
+    out_stream << "Can not read Index Block \n\n";
+    return s;
+  }
+
+  out_stream << "  Block key hex dump: Data block handle\n";
+  out_stream << "  Block key ascii\n\n";
+  for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid();
+       blockhandles_iter->Next()) {
+    s = blockhandles_iter->status();
+    if (!s.ok()) {
+      break;
+    }
+    Slice key = blockhandles_iter->key();
+    Slice user_key;
+    InternalKey ikey;
+    if (!rep_->index_key_includes_seq) {
+      user_key = key;
+    } else {
+      ikey.DecodeFrom(key);
+      user_key = ikey.user_key();
+    }
+
+    out_stream << "  HEX    " << user_key.ToString(true) << ": "
+               << blockhandles_iter->value().ToString(true,
+                                                      rep_->index_has_first_key)
+               << " offset " << blockhandles_iter->value().handle.offset()
+               << " size " << blockhandles_iter->value().handle.size() << "\n";
+
+    std::string str_key = user_key.ToString();
+    std::string res_key("");
+    char cspace = ' ';
+    for (size_t i = 0; i < str_key.size(); i++) {
+      res_key.append(&str_key[i], 1);
+      res_key.append(1, cspace);
+    }
+    out_stream << "  ASCII  " << res_key << "\n";
+    out_stream << "  ------\n";
+  }
+  out_stream << "\n";
+  return Status::OK();
+}
+
+Status BlockBasedTable::DumpDataBlocks(std::ostream& out_stream) {
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  std::unique_ptr<InternalIteratorBase<IndexValue>> blockhandles_iter(
+      NewIndexIterator(read_options, /*need_upper_bound_check=*/false,
+                       /*input_iter=*/nullptr, /*get_context=*/nullptr,
+                       /*lookup_contex=*/nullptr));
+  Status s = blockhandles_iter->status();
+  if (!s.ok()) {
+    out_stream << "Can not read Index Block \n\n";
+    return s;
+  }
+
+  uint64_t datablock_size_min = std::numeric_limits<uint64_t>::max();
+  uint64_t datablock_size_max = 0;
+  uint64_t datablock_size_sum = 0;
+
+  size_t block_id = 1;
+  for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid();
+       block_id++, blockhandles_iter->Next()) {
+    s = blockhandles_iter->status();
+    if (!s.ok()) {
+      break;
+    }
+
+    BlockHandle bh = blockhandles_iter->value().handle;
+    uint64_t datablock_size = bh.size();
+    datablock_size_min = std::min(datablock_size_min, datablock_size);
+    datablock_size_max = std::max(datablock_size_max, datablock_size);
+    datablock_size_sum += datablock_size;
+
+    out_stream << "Data Block # " << block_id << " @ "
+               << blockhandles_iter->value().handle.ToString(true) << "\n";
+    out_stream << "--------------------------------------\n";
+
+    std::unique_ptr<InternalIterator> datablock_iter;
+    Status tmp_status;
+    datablock_iter.reset(NewDataBlockIterator<DataBlockIter>(
+        read_options, blockhandles_iter->value().handle,
+        /*input_iter=*/nullptr, /*type=*/BlockType::kData,
+        /*get_context=*/nullptr, /*lookup_context=*/nullptr,
+        /*prefetch_buffer=*/nullptr, /*for_compaction=*/false,
+        /*async_read=*/false, tmp_status));
+    s = datablock_iter->status();
+
+    if (!s.ok()) {
+      out_stream << "Error reading the block - Skipped \n\n";
+      continue;
+    }
+
+    for (datablock_iter->SeekToFirst(); datablock_iter->Valid();
+         datablock_iter->Next()) {
+      s = datablock_iter->status();
+      if (!s.ok()) {
+        out_stream << "Error reading the block - Skipped \n";
+        break;
+      }
+      DumpKeyValue(datablock_iter->key(), datablock_iter->value(), out_stream);
+    }
+    out_stream << "\n";
+  }
+
+  uint64_t num_datablocks = block_id - 1;
+  if (num_datablocks) {
+    double datablock_size_avg =
+        static_cast<double>(datablock_size_sum) / num_datablocks;
+    out_stream << "Data Block Summary:\n";
+    out_stream << "--------------------------------------\n";
+    out_stream << "  # data blocks: " << num_datablocks << "\n";
+    out_stream << "  min data block size: " << datablock_size_min << "\n";
+    out_stream << "  max data block size: " << datablock_size_max << "\n";
+    out_stream << "  avg data block size: "
+               << std::to_string(datablock_size_avg) << "\n";
+  }
+
+  return Status::OK();
+}
+
+void BlockBasedTable::DumpKeyValue(const Slice& key, const Slice& value,
+                                   std::ostream& out_stream) {
+  InternalKey ikey;
+  ikey.DecodeFrom(key);
+
+  out_stream << "  HEX    " << ikey.user_key().ToString(true) << ": "
+             << value.ToString(true) << "\n";
+
+  std::string str_key = ikey.user_key().ToString();
+  std::string str_value = value.ToString();
+  std::string res_key(""), res_value("");
+  char cspace = ' ';
+  for (size_t i = 0; i < str_key.size(); i++) {
+    if (str_key[i] == '\0') {
+      res_key.append("\\0", 2);
+    } else {
+      res_key.append(&str_key[i], 1);
+    }
+    res_key.append(1, cspace);
+  }
+  for (size_t i = 0; i < str_value.size(); i++) {
+    if (str_value[i] == '\0') {
+      res_value.append("\\0", 2);
+    } else {
+      res_value.append(&str_value[i], 1);
+    }
+    res_value.append(1, cspace);
+  }
+
+  out_stream << "  ASCII  " << res_key << ": " << res_value << "\n";
+  out_stream << "  ------\n";
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader.h
new file mode 100644
index 0000000000..8cdda06425
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader.h
@@ -0,0 +1,745 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "cache/cache_entry_roles.h"
+#include "cache/cache_key.h"
+#include "cache/cache_reservation_manager.h"
+#include "db/range_tombstone_fragmenter.h"
+#include "file/filename.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/table_properties.h"
+#include "table/block_based/block.h"
+#include "table/block_based/block_based_table_factory.h"
+#include "table/block_based/block_cache.h"
+#include "table/block_based/block_type.h"
+#include "table/block_based/cachable_entry.h"
+#include "table/block_based/filter_block.h"
+#include "table/block_based/uncompression_dict_reader.h"
+#include "table/format.h"
+#include "table/persistent_cache_options.h"
+#include "table/table_properties_internal.h"
+#include "table/table_reader.h"
+#include "table/two_level_iterator.h"
+#include "trace_replay/block_cache_tracer.h"
+#include "util/coro_utils.h"
+#include "util/hash_containers.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Cache;
+class FilterBlockReader;
+class FullFilterBlockReader;
+class Footer;
+class InternalKeyComparator;
+class Iterator;
+class FSRandomAccessFile;
+class TableCache;
+class TableReader;
+class WritableFile;
+struct BlockBasedTableOptions;
+struct EnvOptions;
+struct ReadOptions;
+class GetContext;
+
+using KVPairBlock = std::vector<std::pair<std::string, std::string>>;
+
+// Reader class for BlockBasedTable format.
+// For the format of BlockBasedTable refer to
+// https://github.com/facebook/rocksdb/wiki/Rocksdb-BlockBasedTable-Format.
+// This is the default table type. Data is chucked into fixed size blocks and
+// each block in-turn stores entries. When storing data, we can compress and/or
+// encode data efficiently within a block, which often results in a much smaller
+// data size compared with the raw data size. As for the record retrieval, we'll
+// first locate the block where target record may reside, then read the block to
+// memory, and finally search that record within the block. Of course, to avoid
+// frequent reads of the same block, we introduced the block cache to keep the
+// loaded blocks in the memory.
+class BlockBasedTable : public TableReader {
+ public:
+  static const std::string kObsoleteFilterBlockPrefix;
+  static const std::string kFullFilterBlockPrefix;
+  static const std::string kPartitionedFilterBlockPrefix;
+
+  // 1-byte compression type + 32-bit checksum
+  static constexpr size_t kBlockTrailerSize = 5;
+
+  // Attempt to open the table that is stored in bytes [0..file_size)
+  // of "file", and read the metadata entries necessary to allow
+  // retrieving data from the table.
+  //
+  // If successful, returns ok and sets "*table_reader" to the newly opened
+  // table.  The client should delete "*table_reader" when no longer needed.
+  // If there was an error while initializing the table, sets "*table_reader"
+  // to nullptr and returns a non-ok status.
+  //
+  // @param file must remain live while this Table is in use.
+  // @param prefetch_index_and_filter_in_cache can be used to disable
+  // prefetching of
+  //    index and filter blocks into block cache at startup
+  // @param skip_filters Disables loading/accessing the filter block. Overrides
+  //    prefetch_index_and_filter_in_cache, so filter will be skipped if both
+  //    are set.
+  // @param force_direct_prefetch if true, always prefetching to RocksDB
+  //    buffer, rather than calling RandomAccessFile::Prefetch().
+  static Status Open(
+      const ReadOptions& ro, const ImmutableOptions& ioptions,
+      const EnvOptions& env_options,
+      const BlockBasedTableOptions& table_options,
+      const InternalKeyComparator& internal_key_comparator,
+      std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
+      uint8_t block_protection_bytes_per_key,
+      std::unique_ptr<TableReader>* table_reader, uint64_t tail_size,
+      std::shared_ptr<CacheReservationManager> table_reader_cache_res_mgr =
+          nullptr,
+      const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
+      bool prefetch_index_and_filter_in_cache = true, bool skip_filters = false,
+      int level = -1, const bool immortal_table = false,
+      const SequenceNumber largest_seqno = 0,
+      bool force_direct_prefetch = false,
+      TailPrefetchStats* tail_prefetch_stats = nullptr,
+      BlockCacheTracer* const block_cache_tracer = nullptr,
+      size_t max_file_size_for_l0_meta_pin = 0,
+      const std::string& cur_db_session_id = "", uint64_t cur_file_num = 0,
+      UniqueId64x2 expected_unique_id = {});
+
+  bool PrefixRangeMayMatch(const Slice& internal_key,
+                           const ReadOptions& read_options,
+                           const SliceTransform* options_prefix_extractor,
+                           const bool need_upper_bound_check,
+                           BlockCacheLookupContext* lookup_context,
+                           bool* filter_checked) const;
+
+  // Returns a new iterator over the table contents.
+  // The result of NewIterator() is initially invalid (caller must
+  // call one of the Seek methods on the iterator before using it).
+  // @param read_options Must outlive the returned iterator.
+  // @param skip_filters Disables loading/accessing the filter block
+  // compaction_readahead_size: its value will only be used if caller =
+  // kCompaction.
+  InternalIterator* NewIterator(const ReadOptions&,
+                                const SliceTransform* prefix_extractor,
+                                Arena* arena, bool skip_filters,
+                                TableReaderCaller caller,
+                                size_t compaction_readahead_size = 0,
+                                bool allow_unprepared_value = false) override;
+
+  FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
+      const ReadOptions& read_options) override;
+
+  // @param skip_filters Disables loading/accessing the filter block
+  Status Get(const ReadOptions& readOptions, const Slice& key,
+             GetContext* get_context, const SliceTransform* prefix_extractor,
+             bool skip_filters = false) override;
+
+  Status MultiGetFilter(const ReadOptions& read_options,
+                        const SliceTransform* prefix_extractor,
+                        MultiGetRange* mget_range) override;
+
+  DECLARE_SYNC_AND_ASYNC_OVERRIDE(void, MultiGet,
+                                  const ReadOptions& readOptions,
+                                  const MultiGetContext::Range* mget_range,
+                                  const SliceTransform* prefix_extractor,
+                                  bool skip_filters = false);
+
+  // Pre-fetch the disk blocks that correspond to the key range specified by
+  // (kbegin, kend). The call will return error status in the event of
+  // IO or iteration error.
+  Status Prefetch(const ReadOptions& read_options, const Slice* begin,
+                  const Slice* end) override;
+
+  // Given a key, return an approximate byte offset in the file where
+  // the data for that key begins (or would begin if the key were
+  // present in the file). The returned value is in terms of file
+  // bytes, and so includes effects like compression of the underlying data.
+  // E.g., the approximate offset of the last key in the table will
+  // be close to the file length.
+  uint64_t ApproximateOffsetOf(const ReadOptions& read_options,
+                               const Slice& key,
+                               TableReaderCaller caller) override;
+
+  // Given start and end keys, return the approximate data size in the file
+  // between the keys. The returned value is in terms of file bytes, and so
+  // includes effects like compression of the underlying data.
+  // The start key must not be greater than the end key.
+  uint64_t ApproximateSize(const ReadOptions& read_options, const Slice& start,
+                           const Slice& end, TableReaderCaller caller) override;
+
+  Status ApproximateKeyAnchors(const ReadOptions& read_options,
+                               std::vector<Anchor>& anchors) override;
+
+  bool TEST_BlockInCache(const BlockHandle& handle) const;
+
+  // Returns true if the block for the specified key is in cache.
+  // REQUIRES: key is in this table && block cache enabled
+  bool TEST_KeyInCache(const ReadOptions& options, const Slice& key);
+
+  // Set up the table for Compaction. Might change some parameters with
+  // posix_fadvise
+  void SetupForCompaction() override;
+
+  std::shared_ptr<const TableProperties> GetTableProperties() const override;
+
+  size_t ApproximateMemoryUsage() const override;
+
+  // convert SST file to a human readable form
+  Status DumpTable(WritableFile* out_file) override;
+
+  Status VerifyChecksum(const ReadOptions& readOptions,
+                        TableReaderCaller caller) override;
+
+  ~BlockBasedTable();
+
+  bool TEST_FilterBlockInCache() const;
+  bool TEST_IndexBlockInCache() const;
+
+  // IndexReader is the interface that provides the functionality for index
+  // access.
+  class IndexReader {
+   public:
+    virtual ~IndexReader() = default;
+
+    // Create an iterator for index access. If iter is null, then a new object
+    // is created on the heap, and the callee will have the ownership.
+    // If a non-null iter is passed in, it will be used, and the returned value
+    // is either the same as iter or a new on-heap object that
+    // wraps the passed iter. In the latter case the return value points
+    // to a different object then iter, and the callee has the ownership of the
+    // returned object.
+    virtual InternalIteratorBase<IndexValue>* NewIterator(
+        const ReadOptions& read_options, bool disable_prefix_seek,
+        IndexBlockIter* iter, GetContext* get_context,
+        BlockCacheLookupContext* lookup_context) = 0;
+
+    // Report an approximation of how much memory has been used other than
+    // memory that was allocated in block cache.
+    virtual size_t ApproximateMemoryUsage() const = 0;
+    // Cache the dependencies of the index reader (e.g. the partitions
+    // of a partitioned index).
+    virtual Status CacheDependencies(
+        const ReadOptions& /*ro*/, bool /* pin */,
+        FilePrefetchBuffer* /* tail_prefetch_buffer */) {
+      return Status::OK();
+    }
+  };
+
+  class IndexReaderCommon;
+
+  static void SetupBaseCacheKey(const TableProperties* properties,
+                                const std::string& cur_db_session_id,
+                                uint64_t cur_file_number,
+                                OffsetableCacheKey* out_base_cache_key,
+                                bool* out_is_stable = nullptr);
+
+  static CacheKey GetCacheKey(const OffsetableCacheKey& base_cache_key,
+                              const BlockHandle& handle);
+
+  static void UpdateCacheInsertionMetrics(BlockType block_type,
+                                          GetContext* get_context, size_t usage,
+                                          bool redundant,
+                                          Statistics* const statistics);
+
+  Statistics* GetStatistics() const;
+  bool IsLastLevel() const;
+
+  // Get the size to read from storage for a BlockHandle. size_t because we
+  // are about to load into memory.
+  static inline size_t BlockSizeWithTrailer(const BlockHandle& handle) {
+    return static_cast<size_t>(handle.size() + kBlockTrailerSize);
+  }
+
+  // It is the caller's responsibility to make sure that this is called with
+  // block-based table serialized block contents, which contains the compression
+  // byte in the trailer after `block_size`.
+  static inline CompressionType GetBlockCompressionType(const char* block_data,
+                                                        size_t block_size) {
+    return static_cast<CompressionType>(block_data[block_size]);
+  }
+  static inline CompressionType GetBlockCompressionType(
+      const BlockContents& contents) {
+    assert(contents.has_trailer);
+    return GetBlockCompressionType(contents.data.data(), contents.data.size());
+  }
+
+  // Retrieve all key value pairs from data blocks in the table.
+  // The key retrieved are internal keys.
+  Status GetKVPairsFromDataBlocks(const ReadOptions& read_options,
+                                  std::vector<KVPairBlock>* kv_pair_blocks);
+
+  struct Rep;
+
+  Rep* get_rep() { return rep_; }
+  const Rep* get_rep() const { return rep_; }
+
+  // input_iter: if it is not null, update this one and return it as Iterator
+  template <typename TBlockIter>
+  TBlockIter* NewDataBlockIterator(const ReadOptions& ro,
+                                   const BlockHandle& block_handle,
+                                   TBlockIter* input_iter, BlockType block_type,
+                                   GetContext* get_context,
+                                   BlockCacheLookupContext* lookup_context,
+                                   FilePrefetchBuffer* prefetch_buffer,
+                                   bool for_compaction, bool async_read,
+                                   Status& s) const;
+
+  // input_iter: if it is not null, update this one and return it as Iterator
+  template <typename TBlockIter>
+  TBlockIter* NewDataBlockIterator(const ReadOptions& ro,
+                                   CachableEntry<Block>& block,
+                                   TBlockIter* input_iter, Status s) const;
+
+  class PartitionedIndexIteratorState;
+
+  template <typename TBlocklike>
+  friend class FilterBlockReaderCommon;
+
+  friend class PartitionIndexReader;
+
+  friend class UncompressionDictReader;
+
+ protected:
+  Rep* rep_;
+  explicit BlockBasedTable(Rep* rep, BlockCacheTracer* const block_cache_tracer)
+      : rep_(rep), block_cache_tracer_(block_cache_tracer) {}
+  // No copying allowed
+  explicit BlockBasedTable(const TableReader&) = delete;
+  void operator=(const TableReader&) = delete;
+
+ private:
+  friend class MockedBlockBasedTable;
+  friend class BlockBasedTableReaderTestVerifyChecksum_ChecksumMismatch_Test;
+  BlockCacheTracer* const block_cache_tracer_;
+
+  void UpdateCacheHitMetrics(BlockType block_type, GetContext* get_context,
+                             size_t usage) const;
+  void UpdateCacheMissMetrics(BlockType block_type,
+                              GetContext* get_context) const;
+
+  // Either Block::NewDataIterator() or Block::NewIndexIterator().
+  template <typename TBlockIter>
+  static TBlockIter* InitBlockIterator(const Rep* rep, Block* block,
+                                       BlockType block_type,
+                                       TBlockIter* input_iter,
+                                       bool block_contents_pinned);
+
+  // If block cache enabled (compressed or uncompressed), looks for the block
+  // identified by handle in (1) uncompressed cache, (2) compressed cache, and
+  // then (3) file. If found, inserts into the cache(s) that were searched
+  // unsuccessfully (e.g., if found in file, will add to both uncompressed and
+  // compressed caches if they're enabled).
+  //
+  // @param block_entry value is set to the uncompressed block if found. If
+  //    in uncompressed block cache, also sets cache_handle to reference that
+  //    block.
+  template <typename TBlocklike>
+  WithBlocklikeCheck<Status, TBlocklike> MaybeReadBlockAndLoadToCache(
+      FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
+      const BlockHandle& handle, const UncompressionDict& uncompression_dict,
+      bool for_compaction, CachableEntry<TBlocklike>* block_entry,
+      GetContext* get_context, BlockCacheLookupContext* lookup_context,
+      BlockContents* contents, bool async_read) const;
+
+  // Similar to the above, with one crucial difference: it will retrieve the
+  // block from the file even if there are no caches configured (assuming the
+  // read options allow I/O).
+  template <typename TBlocklike>
+  WithBlocklikeCheck<Status, TBlocklike> RetrieveBlock(
+      FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
+      const BlockHandle& handle, const UncompressionDict& uncompression_dict,
+      CachableEntry<TBlocklike>* block_entry, GetContext* get_context,
+      BlockCacheLookupContext* lookup_context, bool for_compaction,
+      bool use_cache, bool async_read) const;
+
+  template <typename TBlocklike>
+  WithBlocklikeCheck<void, TBlocklike> SaveLookupContextOrTraceRecord(
+      const Slice& block_key, bool is_cache_hit, const ReadOptions& ro,
+      const TBlocklike* parsed_block_value,
+      BlockCacheLookupContext* lookup_context) const;
+
+  void FinishTraceRecord(const BlockCacheLookupContext& lookup_context,
+                         const Slice& block_key, const Slice& referenced_key,
+                         bool does_referenced_key_exist,
+                         uint64_t referenced_data_size) const;
+
+  DECLARE_SYNC_AND_ASYNC_CONST(
+      void, RetrieveMultipleBlocks, const ReadOptions& options,
+      const MultiGetRange* batch,
+      const autovector<BlockHandle, MultiGetContext::MAX_BATCH_SIZE>* handles,
+      Status* statuses, CachableEntry<Block_kData>* results, char* scratch,
+      const UncompressionDict& uncompression_dict);
+
+  // Get the iterator from the index reader.
+  //
+  // If input_iter is not set, return a new Iterator.
+  // If input_iter is set, try to update it and return it as Iterator.
+  // However note that in some cases the returned iterator may be different
+  // from input_iter. In such case the returned iterator should be freed.
+  //
+  // Note: ErrorIterator with Status::Incomplete shall be returned if all the
+  // following conditions are met:
+  //  1. We enabled table_options.cache_index_and_filter_blocks.
+  //  2. index is not present in block cache.
+  //  3. We disallowed any io to be performed, that is, read_options ==
+  //     kBlockCacheTier
+  InternalIteratorBase<IndexValue>* NewIndexIterator(
+      const ReadOptions& read_options, bool need_upper_bound_check,
+      IndexBlockIter* input_iter, GetContext* get_context,
+      BlockCacheLookupContext* lookup_context) const;
+
+  template <typename TBlocklike>
+  Cache::Priority GetCachePriority() const;
+
+  // Read block cache from block caches (if set): block_cache.
+  // On success, Status::OK with be returned and @block will be populated with
+  // pointer to the block as well as its block handle.
+  // @param uncompression_dict Data for presetting the compression library's
+  //    dictionary.
+  template <typename TBlocklike>
+  WithBlocklikeCheck<Status, TBlocklike> GetDataBlockFromCache(
+      const Slice& cache_key, BlockCacheInterface<TBlocklike> block_cache,
+      CachableEntry<TBlocklike>* block, GetContext* get_context) const;
+
+  // Put a maybe compressed block to the corresponding block caches.
+  // This method will perform decompression against block_contents if needed
+  // and then populate the block caches.
+  // On success, Status::OK will be returned; also @block will be populated with
+  // uncompressed block and its cache handle.
+  //
+  // Allocated memory managed by block_contents will be transferred to
+  // PutDataBlockToCache(). After the call, the object will be invalid.
+  // @param uncompression_dict Data for presetting the compression library's
+  //    dictionary.
+  template <typename TBlocklike>
+  WithBlocklikeCheck<Status, TBlocklike> PutDataBlockToCache(
+      const Slice& cache_key, BlockCacheInterface<TBlocklike> block_cache,
+      CachableEntry<TBlocklike>* cached_block, BlockContents&& block_contents,
+      CompressionType block_comp_type,
+      const UncompressionDict& uncompression_dict,
+      MemoryAllocator* memory_allocator, GetContext* get_context) const;
+
+  // Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found
+  // after a call to Seek(key), until handle_result returns false.
+  // May not make such a call if filter policy says that key is not present.
+  friend class TableCache;
+  friend class BlockBasedTableBuilder;
+
+  // Create a index reader based on the index type stored in the table.
+  // Optionally, user can pass a preloaded meta_index_iter for the index that
+  // need to access extra meta blocks for index construction. This parameter
+  // helps avoid re-reading meta index block if caller already created one.
+  Status CreateIndexReader(const ReadOptions& ro,
+                           FilePrefetchBuffer* prefetch_buffer,
+                           InternalIterator* preloaded_meta_index_iter,
+                           bool use_cache, bool prefetch, bool pin,
+                           BlockCacheLookupContext* lookup_context,
+                           std::unique_ptr<IndexReader>* index_reader);
+
+  bool FullFilterKeyMayMatch(FilterBlockReader* filter, const Slice& user_key,
+                             const bool no_io,
+                             const SliceTransform* prefix_extractor,
+                             GetContext* get_context,
+                             BlockCacheLookupContext* lookup_context,
+                             const ReadOptions& read_options) const;
+
+  void FullFilterKeysMayMatch(FilterBlockReader* filter, MultiGetRange* range,
+                              const bool no_io,
+                              const SliceTransform* prefix_extractor,
+                              BlockCacheLookupContext* lookup_context,
+                              const ReadOptions& read_options) const;
+
+  // If force_direct_prefetch is true, always prefetching to RocksDB
+  //    buffer, rather than calling RandomAccessFile::Prefetch().
+  static Status PrefetchTail(
+      const ReadOptions& ro, RandomAccessFileReader* file, uint64_t file_size,
+      bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats,
+      const bool prefetch_all, const bool preload_all,
+      std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer, Statistics* stats,
+      uint64_t tail_size, Logger* const logger);
+  Status ReadMetaIndexBlock(const ReadOptions& ro,
+                            FilePrefetchBuffer* prefetch_buffer,
+                            std::unique_ptr<Block>* metaindex_block,
+                            std::unique_ptr<InternalIterator>* iter);
+  Status ReadPropertiesBlock(const ReadOptions& ro,
+                             FilePrefetchBuffer* prefetch_buffer,
+                             InternalIterator* meta_iter,
+                             const SequenceNumber largest_seqno);
+  Status ReadRangeDelBlock(const ReadOptions& ro,
+                           FilePrefetchBuffer* prefetch_buffer,
+                           InternalIterator* meta_iter,
+                           const InternalKeyComparator& internal_comparator,
+                           BlockCacheLookupContext* lookup_context);
+  Status PrefetchIndexAndFilterBlocks(
+      const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
+      InternalIterator* meta_iter, BlockBasedTable* new_table,
+      bool prefetch_all, const BlockBasedTableOptions& table_options,
+      const int level, size_t file_size, size_t max_file_size_for_l0_meta_pin,
+      BlockCacheLookupContext* lookup_context);
+
+  static BlockType GetBlockTypeForMetaBlockByName(const Slice& meta_block_name);
+
+  Status VerifyChecksumInMetaBlocks(const ReadOptions& read_options,
+                                    InternalIteratorBase<Slice>* index_iter);
+  Status VerifyChecksumInBlocks(const ReadOptions& read_options,
+                                InternalIteratorBase<IndexValue>* index_iter);
+
+  // Create the filter from the filter block.
+  std::unique_ptr<FilterBlockReader> CreateFilterBlockReader(
+      const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
+      bool use_cache, bool prefetch, bool pin,
+      BlockCacheLookupContext* lookup_context);
+
+  // Size of all data blocks, maybe approximate
+  uint64_t GetApproximateDataSize();
+
+  // Given an iterator return its offset in data block section of file.
+  uint64_t ApproximateDataOffsetOf(
+      const InternalIteratorBase<IndexValue>& index_iter,
+      uint64_t data_size) const;
+
+  // Helper functions for DumpTable()
+  Status DumpIndexBlock(std::ostream& out_stream);
+  Status DumpDataBlocks(std::ostream& out_stream);
+  void DumpKeyValue(const Slice& key, const Slice& value,
+                    std::ostream& out_stream);
+
+  // Returns false if prefix_extractor exists and is compatible with that used
+  // in building the table file, otherwise true.
+  bool PrefixExtractorChanged(const SliceTransform* prefix_extractor) const;
+
+  bool TimestampMayMatch(const ReadOptions& read_options) const;
+
+  // A cumulative data block file read in MultiGet lower than this size will
+  // use a stack buffer
+  static constexpr size_t kMultiGetReadStackBufSize = 8192;
+
+  friend class PartitionedFilterBlockReader;
+  friend class PartitionedFilterBlockTest;
+  friend class DBBasicTest_MultiGetIOBufferOverrun_Test;
+};
+
+// Maintaining state of a two-level iteration on a partitioned index structure.
+class BlockBasedTable::PartitionedIndexIteratorState
+    : public TwoLevelIteratorState {
+ public:
+  PartitionedIndexIteratorState(
+      const BlockBasedTable* table,
+      UnorderedMap<uint64_t, CachableEntry<Block>>* block_map);
+  InternalIteratorBase<IndexValue>* NewSecondaryIterator(
+      const BlockHandle& index_value) override;
+
+ private:
+  // Don't own table_
+  const BlockBasedTable* table_;
+  UnorderedMap<uint64_t, CachableEntry<Block>>* block_map_;
+};
+
+// Stores all the properties associated with a BlockBasedTable.
+// These are immutable.
+struct BlockBasedTable::Rep {
+  Rep(const ImmutableOptions& _ioptions, const EnvOptions& _env_options,
+      const BlockBasedTableOptions& _table_opt,
+      const InternalKeyComparator& _internal_comparator, bool skip_filters,
+      uint64_t _file_size, int _level, const bool _immortal_table)
+      : ioptions(_ioptions),
+        env_options(_env_options),
+        table_options(_table_opt),
+        filter_policy(skip_filters ? nullptr : _table_opt.filter_policy.get()),
+        internal_comparator(_internal_comparator),
+        filter_type(FilterType::kNoFilter),
+        index_type(BlockBasedTableOptions::IndexType::kBinarySearch),
+        whole_key_filtering(_table_opt.whole_key_filtering),
+        prefix_filtering(true),
+        global_seqno(kDisableGlobalSequenceNumber),
+        file_size(_file_size),
+        level(_level),
+        immortal_table(_immortal_table) {}
+  ~Rep() { status.PermitUncheckedError(); }
+  const ImmutableOptions& ioptions;
+  const EnvOptions& env_options;
+  const BlockBasedTableOptions table_options;
+  const FilterPolicy* const filter_policy;
+  const InternalKeyComparator& internal_comparator;
+  Status status;
+  std::unique_ptr<RandomAccessFileReader> file;
+  OffsetableCacheKey base_cache_key;
+  PersistentCacheOptions persistent_cache_options;
+
+  // Footer contains the fixed table information
+  Footer footer;
+
+  std::unique_ptr<IndexReader> index_reader;
+  std::unique_ptr<FilterBlockReader> filter;
+  std::unique_ptr<UncompressionDictReader> uncompression_dict_reader;
+
+  enum class FilterType {
+    kNoFilter,
+    kFullFilter,
+    kPartitionedFilter,
+  };
+  FilterType filter_type;
+  BlockHandle filter_handle;
+  BlockHandle compression_dict_handle;
+
+  std::shared_ptr<const TableProperties> table_properties;
+  BlockBasedTableOptions::IndexType index_type;
+  bool whole_key_filtering;
+  bool prefix_filtering;
+  std::shared_ptr<const SliceTransform> table_prefix_extractor;
+
+  std::shared_ptr<FragmentedRangeTombstoneList> fragmented_range_dels;
+
+  // FIXME
+  // If true, data blocks in this file are definitely ZSTD compressed. If false
+  // they might not be. When false we skip creating a ZSTD digested
+  // uncompression dictionary. Even if we get a false negative, things should
+  // still work, just not as quickly.
+  BlockCreateContext create_context;
+
+  // If global_seqno is used, all Keys in this file will have the same
+  // seqno with value `global_seqno`.
+  //
+  // A value of kDisableGlobalSequenceNumber means that this feature is disabled
+  // and every key have it's own seqno.
+  SequenceNumber global_seqno;
+
+  // Size of the table file on disk
+  uint64_t file_size;
+
+  // the level when the table is opened, could potentially change when trivial
+  // move is involved
+  int level;
+
+  // the timestamp range of table
+  // Points into memory owned by TableProperties. This would need to change if
+  // TableProperties become subject to cache eviction.
+  Slice min_timestamp;
+  Slice max_timestamp;
+
+  // If false, blocks in this file are definitely all uncompressed. Knowing this
+  // before reading individual blocks enables certain optimizations.
+  bool blocks_maybe_compressed = true;
+
+  // These describe how index is encoded.
+  bool index_has_first_key = false;
+  bool index_key_includes_seq = true;
+  bool index_value_is_full = true;
+
+  const bool immortal_table;
+
+  std::unique_ptr<CacheReservationManager::CacheReservationHandle>
+      table_reader_cache_res_handle = nullptr;
+
+  SequenceNumber get_global_seqno(BlockType block_type) const {
+    return (block_type == BlockType::kFilterPartitionIndex ||
+            block_type == BlockType::kCompressionDictionary)
+               ? kDisableGlobalSequenceNumber
+               : global_seqno;
+  }
+
+  uint64_t cf_id_for_tracing() const {
+    return table_properties
+               ? table_properties->column_family_id
+               : ROCKSDB_NAMESPACE::TablePropertiesCollectorFactory::Context::
+                     kUnknownColumnFamily;
+  }
+
+  Slice cf_name_for_tracing() const {
+    return table_properties ? table_properties->column_family_name
+                            : BlockCacheTraceHelper::kUnknownColumnFamilyName;
+  }
+
+  uint32_t level_for_tracing() const { return level >= 0 ? level : UINT32_MAX; }
+
+  uint64_t sst_number_for_tracing() const {
+    return file ? TableFileNameToNumber(file->file_name()) : UINT64_MAX;
+  }
+  void CreateFilePrefetchBuffer(
+      size_t readahead_size, size_t max_readahead_size,
+      std::unique_ptr<FilePrefetchBuffer>* fpb, bool implicit_auto_readahead,
+      uint64_t num_file_reads,
+      uint64_t num_file_reads_for_auto_readahead) const {
+    fpb->reset(new FilePrefetchBuffer(
+        readahead_size, max_readahead_size,
+        !ioptions.allow_mmap_reads /* enable */, false /* track_min_offset */,
+        implicit_auto_readahead, num_file_reads,
+        num_file_reads_for_auto_readahead, ioptions.fs.get(), ioptions.clock,
+        ioptions.stats));
+  }
+
+  void CreateFilePrefetchBufferIfNotExists(
+      size_t readahead_size, size_t max_readahead_size,
+      std::unique_ptr<FilePrefetchBuffer>* fpb, bool implicit_auto_readahead,
+      uint64_t num_file_reads,
+      uint64_t num_file_reads_for_auto_readahead) const {
+    if (!(*fpb)) {
+      CreateFilePrefetchBuffer(readahead_size, max_readahead_size, fpb,
+                               implicit_auto_readahead, num_file_reads,
+                               num_file_reads_for_auto_readahead);
+    }
+  }
+
+  std::size_t ApproximateMemoryUsage() const {
+    std::size_t usage = 0;
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+    usage += malloc_usable_size(const_cast<BlockBasedTable::Rep*>(this));
+#else
+    usage += sizeof(*this);
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+    return usage;
+  }
+};
+
+// This is an adapter class for `WritableFile` to be used for `std::ostream`.
+// The adapter wraps a `WritableFile`, which can be passed to a `std::ostream`
+// constructor for storing streaming data.
+// Note:
+//  * This adapter doesn't provide any buffering, each write is forwarded to
+//    `WritableFile->Append()` directly.
+//  * For a failed write, the user needs to check the status by `ostream.good()`
+class WritableFileStringStreamAdapter : public std::stringbuf {
+ public:
+  explicit WritableFileStringStreamAdapter(WritableFile* writable_file)
+      : file_(writable_file) {}
+
+  // Override overflow() to handle `sputc()`. There are cases that will not go
+  // through `xsputn()` e.g. `std::endl` or an unsigned long long is written by
+  // `os.put()` directly and will call `sputc()` By internal implementation:
+  //    int_type __CLR_OR_THIS_CALL sputc(_Elem _Ch) {  // put a character
+  //        return 0 < _Pnavail() ? _Traits::to_int_type(*_Pninc() = _Ch) :
+  //        overflow(_Traits::to_int_type(_Ch));
+  //    }
+  // As we explicitly disabled buffering (_Pnavail() is always 0), every write,
+  // not captured by xsputn(), becomes an overflow here.
+  int overflow(int ch = EOF) override {
+    if (ch != EOF) {
+      Status s = file_->Append(Slice((char*)&ch, 1));
+      if (s.ok()) {
+        return ch;
+      }
+    }
+    return EOF;
+  }
+
+  std::streamsize xsputn(char const* p, std::streamsize n) override {
+    Status s = file_->Append(Slice(p, n));
+    if (!s.ok()) {
+      return 0;
+    }
+    return n;
+  }
+
+ private:
+  WritableFile* file_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader_impl.h
new file mode 100644
index 0000000000..801b4614f0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader_impl.h
@@ -0,0 +1,203 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+#include <type_traits>
+
+#include "block.h"
+#include "block_cache.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "table/block_based/reader_common.h"
+
+// The file contains some member functions of BlockBasedTable that
+// cannot be implemented in block_based_table_reader.cc because
+// it's called by other files (e.g. block_based_iterator.h) and
+// are templates.
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+using IterPlaceholderCacheInterface =
+    PlaceholderCacheInterface<CacheEntryRole::kMisc>;
+
+template <typename TBlockIter>
+struct IterTraits {};
+
+template <>
+struct IterTraits<DataBlockIter> {
+  using IterBlocklike = Block_kData;
+};
+
+template <>
+struct IterTraits<IndexBlockIter> {
+  using IterBlocklike = Block_kIndex;
+};
+
+}  // namespace
+
+// Convert an index iterator value (i.e., an encoded BlockHandle)
+// into an iterator over the contents of the corresponding block.
+// If input_iter is null, new a iterator
+// If input_iter is not null, update this iter and return it
+template <typename TBlockIter>
+TBlockIter* BlockBasedTable::NewDataBlockIterator(
+    const ReadOptions& ro, const BlockHandle& handle, TBlockIter* input_iter,
+    BlockType block_type, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context,
+    FilePrefetchBuffer* prefetch_buffer, bool for_compaction, bool async_read,
+    Status& s) const {
+  using IterBlocklike = typename IterTraits<TBlockIter>::IterBlocklike;
+  PERF_TIMER_GUARD(new_table_block_iter_nanos);
+
+  TBlockIter* iter = input_iter != nullptr ? input_iter : new TBlockIter;
+  if (!s.ok()) {
+    iter->Invalidate(s);
+    return iter;
+  }
+
+  CachableEntry<Block> block;
+  if (rep_->uncompression_dict_reader && block_type == BlockType::kData) {
+    CachableEntry<UncompressionDict> uncompression_dict;
+    const bool no_io = (ro.read_tier == kBlockCacheTier);
+    // For async scans, don't use the prefetch buffer since an async prefetch
+    // might already be under way and this would invalidate it. Also, the
+    // uncompression dict is typically at the end of the file and would
+    // most likely break the sequentiality of the access pattern.
+    s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
+        ro.async_io ? nullptr : prefetch_buffer, ro, no_io, ro.verify_checksums,
+        get_context, lookup_context, &uncompression_dict);
+    if (!s.ok()) {
+      iter->Invalidate(s);
+      return iter;
+    }
+    const UncompressionDict& dict = uncompression_dict.GetValue()
+                                        ? *uncompression_dict.GetValue()
+                                        : UncompressionDict::GetEmptyDict();
+    s = RetrieveBlock(prefetch_buffer, ro, handle, dict,
+                      &block.As<IterBlocklike>(), get_context, lookup_context,
+                      for_compaction,
+                      /* use_cache */ true, async_read);
+  } else {
+    s = RetrieveBlock(
+        prefetch_buffer, ro, handle, UncompressionDict::GetEmptyDict(),
+        &block.As<IterBlocklike>(), get_context, lookup_context, for_compaction,
+        /* use_cache */ true, async_read);
+  }
+
+  if (s.IsTryAgain() && async_read) {
+    return iter;
+  }
+
+  if (!s.ok()) {
+    assert(block.IsEmpty());
+    iter->Invalidate(s);
+    return iter;
+  }
+
+  assert(block.GetValue() != nullptr);
+
+  // Block contents are pinned and it is still pinned after the iterator
+  // is destroyed as long as cleanup functions are moved to another object,
+  // when:
+  // 1. block cache handle is set to be released in cleanup function, or
+  // 2. it's pointing to immortal source. If own_bytes is true then we are
+  //    not reading data from the original source, whether immortal or not.
+  //    Otherwise, the block is pinned iff the source is immortal.
+  const bool block_contents_pinned =
+      block.IsCached() ||
+      (!block.GetValue()->own_bytes() && rep_->immortal_table);
+  iter = InitBlockIterator<TBlockIter>(rep_, block.GetValue(), block_type, iter,
+                                       block_contents_pinned);
+
+  if (!block.IsCached()) {
+    if (!ro.fill_cache) {
+      IterPlaceholderCacheInterface block_cache{
+          rep_->table_options.block_cache.get()};
+      if (block_cache) {
+        // insert a dummy record to block cache to track the memory usage
+        Cache::Handle* cache_handle = nullptr;
+        CacheKey key =
+            CacheKey::CreateUniqueForCacheLifetime(block_cache.get());
+        s = block_cache.Insert(key.AsSlice(),
+                               block.GetValue()->ApproximateMemoryUsage(),
+                               &cache_handle);
+
+        if (s.ok()) {
+          assert(cache_handle != nullptr);
+          iter->RegisterCleanup(&ForceReleaseCachedEntry, block_cache.get(),
+                                cache_handle);
+        }
+      }
+    }
+  } else {
+    iter->SetCacheHandle(block.GetCacheHandle());
+  }
+
+  block.TransferTo(iter);
+
+  return iter;
+}
+
+// Convert an uncompressed data block (i.e CachableEntry<Block>)
+// into an iterator over the contents of the corresponding block.
+// If input_iter is null, new a iterator
+// If input_iter is not null, update this iter and return it
+template <typename TBlockIter>
+TBlockIter* BlockBasedTable::NewDataBlockIterator(const ReadOptions& ro,
+                                                  CachableEntry<Block>& block,
+                                                  TBlockIter* input_iter,
+                                                  Status s) const {
+  PERF_TIMER_GUARD(new_table_block_iter_nanos);
+
+  TBlockIter* iter = input_iter != nullptr ? input_iter : new TBlockIter;
+  if (!s.ok()) {
+    iter->Invalidate(s);
+    return iter;
+  }
+
+  assert(block.GetValue() != nullptr);
+  // Block contents are pinned and it is still pinned after the iterator
+  // is destroyed as long as cleanup functions are moved to another object,
+  // when:
+  // 1. block cache handle is set to be released in cleanup function, or
+  // 2. it's pointing to immortal source. If own_bytes is true then we are
+  //    not reading data from the original source, whether immortal or not.
+  //    Otherwise, the block is pinned iff the source is immortal.
+  const bool block_contents_pinned =
+      block.IsCached() ||
+      (!block.GetValue()->own_bytes() && rep_->immortal_table);
+  iter = InitBlockIterator<TBlockIter>(rep_, block.GetValue(), BlockType::kData,
+                                       iter, block_contents_pinned);
+
+  if (!block.IsCached()) {
+    if (!ro.fill_cache) {
+      IterPlaceholderCacheInterface block_cache{
+          rep_->table_options.block_cache.get()};
+      if (block_cache) {
+        // insert a dummy record to block cache to track the memory usage
+        Cache::Handle* cache_handle = nullptr;
+        CacheKey key =
+            CacheKey::CreateUniqueForCacheLifetime(block_cache.get());
+        s = block_cache.Insert(key.AsSlice(),
+                               block.GetValue()->ApproximateMemoryUsage(),
+                               &cache_handle);
+
+        if (s.ok()) {
+          assert(cache_handle != nullptr);
+          iter->RegisterCleanup(&ForceReleaseCachedEntry, block_cache.get(),
+                                cache_handle);
+        }
+      }
+    }
+  } else {
+    iter->SetCacheHandle(block.GetCacheHandle());
+  }
+
+  block.TransferTo(iter);
+  return iter;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader_sync_and_async.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader_sync_and_async.h
new file mode 100644
index 0000000000..0cb251afa5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_based_table_reader_sync_and_async.h
@@ -0,0 +1,758 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/async_file_reader.h"
+#include "util/coro_utils.h"
+
+#if defined(WITHOUT_COROUTINES) || \
+    (defined(USE_COROUTINES) && defined(WITH_COROUTINES))
+
+namespace ROCKSDB_NAMESPACE {
+
+// This function reads multiple data blocks from disk using Env::MultiRead()
+// and optionally inserts them into the block cache. It uses the scratch
+// buffer provided by the caller, which is contiguous. If scratch is a nullptr
+// it allocates a separate buffer for each block. Typically, if the blocks
+// need to be uncompressed and there is no compressed block cache, callers
+// can allocate a temporary scratch buffer in order to minimize memory
+// allocations.
+// If options.fill_cache is true, it inserts the blocks into cache. If its
+// false and scratch is non-null and the blocks are uncompressed, it copies
+// the buffers to heap. In any case, the CachableEntry<Block> returned will
+// own the data bytes.
+// If compression is enabled and also there is no compressed block cache,
+// the adjacent blocks are read out in one IO (combined read)
+// batch - A MultiGetRange with only those keys with unique data blocks not
+//         found in cache
+// handles - A vector of block handles. Some of them me be NULL handles
+// scratch - An optional contiguous buffer to read compressed blocks into
+DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
+(const ReadOptions& options, const MultiGetRange* batch,
+ const autovector<BlockHandle, MultiGetContext::MAX_BATCH_SIZE>* handles,
+ Status* statuses, CachableEntry<Block_kData>* results, char* scratch,
+ const UncompressionDict& uncompression_dict) const {
+  RandomAccessFileReader* file = rep_->file.get();
+  const Footer& footer = rep_->footer;
+  const ImmutableOptions& ioptions = rep_->ioptions;
+  size_t read_amp_bytes_per_bit = rep_->table_options.read_amp_bytes_per_bit;
+  MemoryAllocator* memory_allocator = GetMemoryAllocator(rep_->table_options);
+
+  if (ioptions.allow_mmap_reads) {
+    size_t idx_in_batch = 0;
+    for (auto mget_iter = batch->begin(); mget_iter != batch->end();
+         ++mget_iter, ++idx_in_batch) {
+      const BlockHandle& handle = (*handles)[idx_in_batch];
+      if (handle.IsNull()) {
+        continue;
+      }
+
+      // XXX: use_cache=true means double cache query?
+      statuses[idx_in_batch] =
+          RetrieveBlock(nullptr, options, handle, uncompression_dict,
+                        &results[idx_in_batch].As<Block_kData>(),
+                        mget_iter->get_context, /* lookup_context */ nullptr,
+                        /* for_compaction */ false, /* use_cache */ true,
+                        /* async_read */ false);
+    }
+    assert(idx_in_batch == handles->size());
+    CO_RETURN;
+  }
+
+  // In direct IO mode, blocks share the direct io buffer.
+  // Otherwise, blocks share the scratch buffer.
+  const bool use_shared_buffer = file->use_direct_io() || scratch != nullptr;
+
+  autovector<FSReadRequest, MultiGetContext::MAX_BATCH_SIZE> read_reqs;
+  size_t buf_offset = 0;
+  size_t idx_in_batch = 0;
+
+  uint64_t prev_offset = 0;
+  size_t prev_len = 0;
+  autovector<size_t, MultiGetContext::MAX_BATCH_SIZE> req_idx_for_block;
+  autovector<size_t, MultiGetContext::MAX_BATCH_SIZE> req_offset_for_block;
+  for (auto mget_iter = batch->begin(); mget_iter != batch->end();
+       ++mget_iter, ++idx_in_batch) {
+    const BlockHandle& handle = (*handles)[idx_in_batch];
+    if (handle.IsNull()) {
+      continue;
+    }
+
+    size_t prev_end = static_cast<size_t>(prev_offset) + prev_len;
+
+    // If current block is adjacent to the previous one, at the same time,
+    // compression is enabled and there is no compressed cache, we combine
+    // the two block read as one.
+    // We don't combine block reads here in direct IO mode, because when doing
+    // direct IO read, the block requests will be realigned and merged when
+    // necessary.
+    if (use_shared_buffer && !file->use_direct_io() &&
+        prev_end == handle.offset()) {
+      req_offset_for_block.emplace_back(prev_len);
+      prev_len += BlockSizeWithTrailer(handle);
+    } else {
+      // No compression or current block and previous one is not adjacent:
+      // Step 1, create a new request for previous blocks
+      if (prev_len != 0) {
+        FSReadRequest req;
+        req.offset = prev_offset;
+        req.len = prev_len;
+        if (file->use_direct_io()) {
+          req.scratch = nullptr;
+        } else if (use_shared_buffer) {
+          req.scratch = scratch + buf_offset;
+          buf_offset += req.len;
+        } else {
+          req.scratch = new char[req.len];
+        }
+        read_reqs.emplace_back(req);
+      }
+
+      // Step 2, remeber the previous block info
+      prev_offset = handle.offset();
+      prev_len = BlockSizeWithTrailer(handle);
+      req_offset_for_block.emplace_back(0);
+    }
+    req_idx_for_block.emplace_back(read_reqs.size());
+
+    PERF_COUNTER_ADD(block_read_count, 1);
+    PERF_COUNTER_ADD(block_read_byte, BlockSizeWithTrailer(handle));
+  }
+  // Handle the last block and process the pending last request
+  if (prev_len != 0) {
+    FSReadRequest req;
+    req.offset = prev_offset;
+    req.len = prev_len;
+    if (file->use_direct_io()) {
+      req.scratch = nullptr;
+    } else if (use_shared_buffer) {
+      req.scratch = scratch + buf_offset;
+    } else {
+      req.scratch = new char[req.len];
+    }
+    read_reqs.emplace_back(req);
+  }
+
+  AlignedBuf direct_io_buf;
+  {
+    IOOptions opts;
+    IOStatus s = file->PrepareIOOptions(options, opts);
+    if (s.ok()) {
+#if defined(WITH_COROUTINES)
+      if (file->use_direct_io()) {
+#endif  // WITH_COROUTINES
+        s = file->MultiRead(opts, &read_reqs[0], read_reqs.size(),
+                            &direct_io_buf, options.rate_limiter_priority);
+#if defined(WITH_COROUTINES)
+      } else {
+        co_await batch->context()->reader().MultiReadAsync(
+            file, opts, &read_reqs[0], read_reqs.size(), &direct_io_buf);
+      }
+#endif  // WITH_COROUTINES
+    }
+    if (!s.ok()) {
+      // Discard all the results in this batch if there is any time out
+      // or overall MultiRead error
+      for (FSReadRequest& req : read_reqs) {
+        req.status = s;
+      }
+    }
+  }
+
+  idx_in_batch = 0;
+  size_t valid_batch_idx = 0;
+  for (auto mget_iter = batch->begin(); mget_iter != batch->end();
+       ++mget_iter, ++idx_in_batch) {
+    const BlockHandle& handle = (*handles)[idx_in_batch];
+
+    if (handle.IsNull()) {
+      continue;
+    }
+
+    assert(valid_batch_idx < req_idx_for_block.size());
+    assert(valid_batch_idx < req_offset_for_block.size());
+    assert(req_idx_for_block[valid_batch_idx] < read_reqs.size());
+    size_t& req_idx = req_idx_for_block[valid_batch_idx];
+    size_t& req_offset = req_offset_for_block[valid_batch_idx];
+    valid_batch_idx++;
+    FSReadRequest& req = read_reqs[req_idx];
+    Status s = req.status;
+    if (s.ok()) {
+      if ((req.result.size() != req.len) ||
+          (req_offset + BlockSizeWithTrailer(handle) > req.result.size())) {
+        s = Status::Corruption("truncated block read from " +
+                               rep_->file->file_name() + " offset " +
+                               std::to_string(handle.offset()) + ", expected " +
+                               std::to_string(req.len) + " bytes, got " +
+                               std::to_string(req.result.size()));
+      }
+    }
+
+    BlockContents serialized_block;
+    if (s.ok()) {
+      if (!use_shared_buffer) {
+        // We allocated a buffer for this block. Give ownership of it to
+        // BlockContents so it can free the memory
+        assert(req.result.data() == req.scratch);
+        assert(req.result.size() == BlockSizeWithTrailer(handle));
+        assert(req_offset == 0);
+        serialized_block =
+            BlockContents(std::unique_ptr<char[]>(req.scratch), handle.size());
+      } else {
+        // We used the scratch buffer or direct io buffer
+        // which are shared by the blocks.
+        // serialized_block does not have the ownership.
+        serialized_block =
+            BlockContents(Slice(req.result.data() + req_offset, handle.size()));
+      }
+#ifndef NDEBUG
+      serialized_block.has_trailer = true;
+#endif
+
+      if (options.verify_checksums) {
+        PERF_TIMER_GUARD(block_checksum_time);
+        const char* data = req.result.data();
+        // Since the scratch might be shared, the offset of the data block in
+        // the buffer might not be 0. req.result.data() only point to the
+        // begin address of each read request, we need to add the offset
+        // in each read request. Checksum is stored in the block trailer,
+        // beyond the payload size.
+        s = VerifyBlockChecksum(footer.checksum_type(), data + req_offset,
+                                handle.size(), rep_->file->file_name(),
+                                handle.offset());
+        TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s);
+      }
+    } else if (!use_shared_buffer) {
+      // Free the allocated scratch buffer.
+      delete[] req.scratch;
+    }
+
+    if (s.ok()) {
+      // When the blocks share the same underlying buffer (scratch or direct io
+      // buffer), we may need to manually copy the block into heap if the
+      // serialized block has to be inserted into a cache. That falls into the
+      // following cases -
+      // 1. serialized block is not compressed, it needs to be inserted into
+      //    the uncompressed block cache if there is one
+      // 2. If the serialized block is compressed, it needs to be inserted
+      //    into the compressed block cache if there is one
+      //
+      // In all other cases, the serialized block is either uncompressed into a
+      // heap buffer or there is no cache at all.
+      CompressionType compression_type =
+          GetBlockCompressionType(serialized_block);
+      if (use_shared_buffer && compression_type == kNoCompression) {
+        Slice serialized =
+            Slice(req.result.data() + req_offset, BlockSizeWithTrailer(handle));
+        serialized_block = BlockContents(
+            CopyBufferToHeap(GetMemoryAllocator(rep_->table_options),
+                             serialized),
+            handle.size());
+#ifndef NDEBUG
+        serialized_block.has_trailer = true;
+#endif
+      }
+    }
+
+    if (s.ok()) {
+      if (options.fill_cache) {
+        CachableEntry<Block_kData>* block_entry = &results[idx_in_batch];
+        // MaybeReadBlockAndLoadToCache will insert into the block caches if
+        // necessary. Since we're passing the serialized block contents, it
+        // will avoid looking up the block cache
+        s = MaybeReadBlockAndLoadToCache(
+            nullptr, options, handle, uncompression_dict,
+            /*for_compaction=*/false, block_entry, mget_iter->get_context,
+            /*lookup_context=*/nullptr, &serialized_block,
+            /*async_read=*/false);
+
+        // block_entry value could be null if no block cache is present, i.e
+        // BlockBasedTableOptions::no_block_cache is true and no compressed
+        // block cache is configured. In that case, fall
+        // through and set up the block explicitly
+        if (block_entry->GetValue() != nullptr) {
+          s.PermitUncheckedError();
+          continue;
+        }
+      }
+
+      CompressionType compression_type =
+          GetBlockCompressionType(serialized_block);
+      BlockContents contents;
+      if (compression_type != kNoCompression) {
+        UncompressionContext context(compression_type);
+        UncompressionInfo info(context, uncompression_dict, compression_type);
+        s = UncompressSerializedBlock(
+            info, req.result.data() + req_offset, handle.size(), &contents,
+            footer.format_version(), rep_->ioptions, memory_allocator);
+      } else {
+        // There are two cases here:
+        // 1) caller uses the shared buffer (scratch or direct io buffer);
+        // 2) we use the requst buffer.
+        // If scratch buffer or direct io buffer is used, we ensure that
+        // all serialized blocks are copyed to the heap as single blocks. If
+        // scratch buffer is not used, we also have no combined read, so the
+        // serialized block can be used directly.
+        contents = std::move(serialized_block);
+      }
+      if (s.ok()) {
+        results[idx_in_batch].SetOwnedValue(std::make_unique<Block_kData>(
+            std::move(contents), read_amp_bytes_per_bit, ioptions.stats));
+      }
+    }
+    statuses[idx_in_batch] = s;
+  }
+}
+
+using MultiGetRange = MultiGetContext::Range;
+DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
+(const ReadOptions& read_options, const MultiGetRange* mget_range,
+ const SliceTransform* prefix_extractor, bool skip_filters) {
+  if (mget_range->empty()) {
+    // Caller should ensure non-empty (performance bug)
+    assert(false);
+    CO_RETURN;  // Nothing to do
+  }
+
+  FilterBlockReader* const filter =
+      !skip_filters ? rep_->filter.get() : nullptr;
+  MultiGetRange sst_file_range(*mget_range, mget_range->begin(),
+                               mget_range->end());
+
+  // First check the full filter
+  // If full filter not useful, Then go into each block
+  const bool no_io = read_options.read_tier == kBlockCacheTier;
+  uint64_t tracing_mget_id = BlockCacheTraceHelper::kReservedGetId;
+  if (sst_file_range.begin()->get_context) {
+    tracing_mget_id = sst_file_range.begin()->get_context->get_tracing_get_id();
+  }
+  // TODO: need more than one lookup_context here to track individual filter
+  // and index partition hits and misses.
+  BlockCacheLookupContext metadata_lookup_context{
+      TableReaderCaller::kUserMultiGet, tracing_mget_id,
+      /*_get_from_user_specified_snapshot=*/read_options.snapshot != nullptr};
+  FullFilterKeysMayMatch(filter, &sst_file_range, no_io, prefix_extractor,
+                         &metadata_lookup_context, read_options);
+
+  if (!sst_file_range.empty()) {
+    IndexBlockIter iiter_on_stack;
+    // if prefix_extractor found in block differs from options, disable
+    // BlockPrefixIndex. Only do this check when index_type is kHashSearch.
+    bool need_upper_bound_check = false;
+    if (rep_->index_type == BlockBasedTableOptions::kHashSearch) {
+      need_upper_bound_check = PrefixExtractorChanged(prefix_extractor);
+    }
+    auto iiter = NewIndexIterator(
+        read_options, need_upper_bound_check, &iiter_on_stack,
+        sst_file_range.begin()->get_context, &metadata_lookup_context);
+    std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
+    if (iiter != &iiter_on_stack) {
+      iiter_unique_ptr.reset(iiter);
+    }
+
+    uint64_t prev_offset = std::numeric_limits<uint64_t>::max();
+    autovector<BlockHandle, MultiGetContext::MAX_BATCH_SIZE> block_handles;
+    std::array<CachableEntry<Block_kData>, MultiGetContext::MAX_BATCH_SIZE>
+        results;
+    std::array<Status, MultiGetContext::MAX_BATCH_SIZE> statuses;
+    // Empty data_lookup_contexts means "unused," when block cache tracing is
+    // disabled. (Limited options as element type is not default contructible.)
+    std::vector<BlockCacheLookupContext> data_lookup_contexts;
+    MultiGetContext::Mask reused_mask = 0;
+    char stack_buf[kMultiGetReadStackBufSize];
+    std::unique_ptr<char[]> block_buf;
+    if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) {
+      // Awkward because BlockCacheLookupContext is not CopyAssignable
+      data_lookup_contexts.reserve(MultiGetContext::MAX_BATCH_SIZE);
+      for (size_t i = 0; i < MultiGetContext::MAX_BATCH_SIZE; ++i) {
+        data_lookup_contexts.push_back(metadata_lookup_context);
+      }
+    }
+    {
+      MultiGetRange data_block_range(sst_file_range, sst_file_range.begin(),
+                                     sst_file_range.end());
+      CachableEntry<UncompressionDict> uncompression_dict;
+      Status uncompression_dict_status;
+      uncompression_dict_status.PermitUncheckedError();
+      bool uncompression_dict_inited = false;
+      size_t total_len = 0;
+
+      // GetContext for any key will do, as the stats will be aggregated
+      // anyway
+      GetContext* get_context = sst_file_range.begin()->get_context;
+
+      {
+        using BCI = BlockCacheInterface<Block_kData>;
+        BCI block_cache{rep_->table_options.block_cache.get()};
+        std::array<BCI::TypedAsyncLookupHandle, MultiGetContext::MAX_BATCH_SIZE>
+            async_handles;
+        std::array<CacheKey, MultiGetContext::MAX_BATCH_SIZE> cache_keys;
+        size_t cache_lookup_count = 0;
+
+        for (auto miter = data_block_range.begin();
+             miter != data_block_range.end(); ++miter) {
+          const Slice& key = miter->ikey;
+          iiter->Seek(miter->ikey);
+
+          IndexValue v;
+          if (iiter->Valid()) {
+            v = iiter->value();
+          }
+          if (!iiter->Valid() ||
+              (!v.first_internal_key.empty() && !skip_filters &&
+               UserComparatorWrapper(
+                   rep_->internal_comparator.user_comparator())
+                       .CompareWithoutTimestamp(
+                           ExtractUserKey(key),
+                           ExtractUserKey(v.first_internal_key)) < 0)) {
+            // The requested key falls between highest key in previous block and
+            // lowest key in current block.
+            if (!iiter->status().IsNotFound()) {
+              *(miter->s) = iiter->status();
+            }
+            data_block_range.SkipKey(miter);
+            sst_file_range.SkipKey(miter);
+            continue;
+          }
+
+          if (!uncompression_dict_inited && rep_->uncompression_dict_reader) {
+            uncompression_dict_status =
+                rep_->uncompression_dict_reader
+                    ->GetOrReadUncompressionDictionary(
+                        nullptr /* prefetch_buffer */, read_options, no_io,
+                        read_options.verify_checksums, get_context,
+                        &metadata_lookup_context, &uncompression_dict);
+            uncompression_dict_inited = true;
+          }
+
+          if (!uncompression_dict_status.ok()) {
+            assert(!uncompression_dict_status.IsNotFound());
+            *(miter->s) = uncompression_dict_status;
+            data_block_range.SkipKey(miter);
+            sst_file_range.SkipKey(miter);
+            continue;
+          }
+
+          if (v.handle.offset() == prev_offset) {
+            // This key can reuse the previous block (later on).
+            // Mark previous as "reused"
+            reused_mask |= MultiGetContext::Mask{1}
+                           << (block_handles.size() - 1);
+            // Use null handle to indicate this one reuses same block as
+            // previous.
+            block_handles.emplace_back(BlockHandle::NullBlockHandle());
+            continue;
+          }
+          prev_offset = v.handle.offset();
+          block_handles.emplace_back(v.handle);
+
+          if (block_cache) {
+            // Lookup the cache for the given data block referenced by an index
+            // iterator value (i.e BlockHandle). If it exists in the cache,
+            // initialize block to the contents of the data block.
+
+            // An async version of MaybeReadBlockAndLoadToCache /
+            // GetDataBlockFromCache
+            BCI::TypedAsyncLookupHandle& async_handle =
+                async_handles[cache_lookup_count];
+            cache_keys[cache_lookup_count] =
+                GetCacheKey(rep_->base_cache_key, v.handle);
+            async_handle.key = cache_keys[cache_lookup_count].AsSlice();
+            // NB: StartAsyncLookupFull populates async_handle.helper
+            async_handle.create_context = &rep_->create_context;
+            async_handle.priority = GetCachePriority<Block_kData>();
+            async_handle.stats = rep_->ioptions.statistics.get();
+
+            block_cache.StartAsyncLookupFull(
+                async_handle, rep_->ioptions.lowest_used_cache_tier);
+            ++cache_lookup_count;
+            // TODO: stats?
+          }
+        }
+
+        if (block_cache) {
+          block_cache.get()->WaitAll(&async_handles[0], cache_lookup_count);
+        }
+        size_t lookup_idx = 0;
+        for (size_t i = 0; i < block_handles.size(); ++i) {
+          // If this block was a success or failure or not needed because
+          // the corresponding key is in the same block as a prior key, skip
+          if (block_handles[i] == BlockHandle::NullBlockHandle()) {
+            continue;
+          }
+          if (!block_cache) {
+            total_len += BlockSizeWithTrailer(block_handles[i]);
+          } else {
+            BCI::TypedHandle* h = async_handles[lookup_idx].Result();
+            if (h) {
+              // Cache hit
+              results[i].SetCachedValue(block_cache.Value(h), block_cache.get(),
+                                        h);
+              // Don't need to fetch
+              block_handles[i] = BlockHandle::NullBlockHandle();
+              UpdateCacheHitMetrics(BlockType::kData, get_context,
+                                    block_cache.get()->GetUsage(h));
+            } else {
+              // Cache miss
+              total_len += BlockSizeWithTrailer(block_handles[i]);
+              UpdateCacheMissMetrics(BlockType::kData, get_context);
+            }
+            if (!data_lookup_contexts.empty()) {
+              // Populate cache key before it's discarded
+              data_lookup_contexts[i].block_key =
+                  async_handles[lookup_idx].key.ToString();
+            }
+            ++lookup_idx;
+          }
+        }
+        assert(lookup_idx == cache_lookup_count);
+      }
+
+      if (total_len) {
+        char* scratch = nullptr;
+        const UncompressionDict& dict = uncompression_dict.GetValue()
+                                            ? *uncompression_dict.GetValue()
+                                            : UncompressionDict::GetEmptyDict();
+        assert(uncompression_dict_inited || !rep_->uncompression_dict_reader);
+        assert(uncompression_dict_status.ok());
+        // If using direct IO, then scratch is not used, so keep it nullptr.
+        // If the blocks need to be uncompressed and we don't need the
+        // compressed blocks, then we can use a contiguous block of
+        // memory to read in all the blocks as it will be temporary
+        // storage
+        // 1. If blocks are compressed and compressed block cache is there,
+        //    alloc heap bufs
+        // 2. If blocks are uncompressed, alloc heap bufs
+        // 3. If blocks are compressed and no compressed block cache, use
+        //    stack buf
+        if (!rep_->file->use_direct_io() &&
+            rep_->blocks_maybe_compressed) {
+          if (total_len <= kMultiGetReadStackBufSize) {
+            scratch = stack_buf;
+          } else {
+            scratch = new char[total_len];
+            block_buf.reset(scratch);
+          }
+        }
+        CO_AWAIT(RetrieveMultipleBlocks)
+        (read_options, &data_block_range, &block_handles, &statuses[0],
+         &results[0], scratch, dict);
+        if (get_context) {
+          ++(get_context->get_context_stats_.num_sst_read);
+        }
+      }
+    }
+
+    DataBlockIter first_biter;
+    DataBlockIter next_biter;
+    size_t idx_in_batch = 0;
+    SharedCleanablePtr shared_cleanable;
+    for (auto miter = sst_file_range.begin(); miter != sst_file_range.end();
+         ++miter) {
+      Status s;
+      GetContext* get_context = miter->get_context;
+      const Slice& key = miter->ikey;
+      bool matched = false;  // if such user key matched a key in SST
+      bool done = false;
+      bool first_block = true;
+      do {
+        DataBlockIter* biter = nullptr;
+        uint64_t referenced_data_size = 0;
+        Block_kData* parsed_block_value = nullptr;
+        bool reusing_prev_block;
+        bool later_reused;
+        bool does_referenced_key_exist = false;
+        bool handle_present = false;
+        BlockCacheLookupContext* lookup_data_block_context =
+            data_lookup_contexts.empty() ? nullptr
+                                         : &data_lookup_contexts[idx_in_batch];
+        if (first_block) {
+          handle_present = !block_handles[idx_in_batch].IsNull();
+          parsed_block_value = results[idx_in_batch].GetValue();
+          if (handle_present || parsed_block_value) {
+            first_biter.Invalidate(Status::OK());
+            NewDataBlockIterator<DataBlockIter>(
+                read_options, results[idx_in_batch].As<Block>(), &first_biter,
+                statuses[idx_in_batch]);
+            reusing_prev_block = false;
+          } else {
+            // If handle is null and result is empty, then the status is never
+            // set, which should be the initial value: ok().
+            assert(statuses[idx_in_batch].ok());
+            reusing_prev_block = true;
+          }
+          biter = &first_biter;
+          later_reused =
+              (reused_mask & (MultiGetContext::Mask{1} << idx_in_batch)) != 0;
+          idx_in_batch++;
+        } else {
+          IndexValue v = iiter->value();
+          if (!v.first_internal_key.empty() && !skip_filters &&
+              UserComparatorWrapper(rep_->internal_comparator.user_comparator())
+                      .CompareWithoutTimestamp(
+                          ExtractUserKey(key),
+                          ExtractUserKey(v.first_internal_key)) < 0) {
+            // The requested key falls between highest key in previous block and
+            // lowest key in current block.
+            break;
+          }
+
+          next_biter.Invalidate(Status::OK());
+          Status tmp_s;
+          NewDataBlockIterator<DataBlockIter>(
+              read_options, iiter->value().handle, &next_biter,
+              BlockType::kData, get_context, lookup_data_block_context,
+              /* prefetch_buffer= */ nullptr, /* for_compaction = */ false,
+              /*async_read = */ false, tmp_s);
+          biter = &next_biter;
+          reusing_prev_block = false;
+          later_reused = false;
+        }
+
+        if (read_options.read_tier == kBlockCacheTier &&
+            biter->status().IsIncomplete()) {
+          // couldn't get block from block_cache
+          // Update Saver.state to Found because we are only looking for
+          // whether we can guarantee the key is not there when "no_io" is set
+          get_context->MarkKeyMayExist();
+          break;
+        }
+        if (!biter->status().ok()) {
+          s = biter->status();
+          break;
+        }
+
+        // Reusing blocks complicates pinning/Cleanable, because the cache
+        // entry referenced by biter can only be released once all returned
+        // pinned values are released. This code previously did an extra
+        // block_cache Ref for each reuse, but that unnecessarily increases
+        // block cache contention. Instead we can use a variant of shared_ptr
+        // to release in block cache only once.
+        //
+        // Although the biter loop below might SaveValue multiple times for
+        // merges, just one value_pinner suffices, as MultiGet will merge
+        // the operands before returning to the API user.
+        Cleanable* value_pinner;
+        if (biter->IsValuePinned()) {
+          if (reusing_prev_block) {
+            // Note that we don't yet know if the MultiGet results will need
+            // to pin this block, so we might wrap a block for sharing and
+            // still end up with 1 (or 0) pinning ref. Not ideal but OK.
+            //
+            // Here we avoid adding redundant cleanups if we didn't end up
+            // delegating the cleanup from last time around.
+            if (!biter->HasCleanups()) {
+              assert(shared_cleanable.get());
+              if (later_reused) {
+                shared_cleanable.RegisterCopyWith(biter);
+              } else {
+                shared_cleanable.MoveAsCleanupTo(biter);
+              }
+            }
+          } else if (later_reused) {
+            assert(biter->HasCleanups());
+            // Make the existing cleanups on `biter` sharable:
+            shared_cleanable.Allocate();
+            // Move existing `biter` cleanup(s) to `shared_cleanable`
+            biter->DelegateCleanupsTo(&*shared_cleanable);
+            // Reference `shared_cleanable` as new cleanup for `biter`
+            shared_cleanable.RegisterCopyWith(biter);
+          }
+          assert(biter->HasCleanups());
+          value_pinner = biter;
+        } else {
+          value_pinner = nullptr;
+        }
+
+        bool may_exist = biter->SeekForGet(key);
+        if (!may_exist) {
+          // HashSeek cannot find the key this block and the the iter is not
+          // the end of the block, i.e. cannot be in the following blocks
+          // either. In this case, the seek_key cannot be found, so we break
+          // from the top level for-loop.
+          break;
+        }
+
+        // Call the *saver function on each entry/block until it returns false
+        for (; biter->Valid(); biter->Next()) {
+          ParsedInternalKey parsed_key;
+          Status pik_status = ParseInternalKey(
+              biter->key(), &parsed_key, false /* log_err_key */);  // TODO
+          if (!pik_status.ok()) {
+            s = pik_status;
+          }
+          if (!get_context->SaveValue(parsed_key, biter->value(), &matched,
+                                      value_pinner)) {
+            if (get_context->State() == GetContext::GetState::kFound) {
+              does_referenced_key_exist = true;
+              referenced_data_size =
+                  biter->key().size() + biter->value().size();
+            }
+            done = true;
+            break;
+          }
+          s = biter->status();
+        }
+        // Write the block cache access.
+        // XXX: There appear to be 'break' statements above that bypass this
+        // writing of the block cache trace record
+        if (lookup_data_block_context && !reusing_prev_block && first_block) {
+          Slice referenced_key;
+          if (does_referenced_key_exist) {
+            referenced_key = biter->key();
+          } else {
+            referenced_key = key;
+          }
+
+          // block_key is self-assigned here (previously assigned from
+          // cache_keys / async_handles, now out of scope)
+          SaveLookupContextOrTraceRecord(lookup_data_block_context->block_key,
+                                         /*is_cache_hit=*/!handle_present,
+                                         read_options, parsed_block_value,
+                                         lookup_data_block_context);
+          FinishTraceRecord(
+              *lookup_data_block_context, lookup_data_block_context->block_key,
+              referenced_key, does_referenced_key_exist, referenced_data_size);
+        }
+        s = biter->status();
+        if (done) {
+          // Avoid the extra Next which is expensive in two-level indexes
+          break;
+        }
+        if (first_block) {
+          iiter->Seek(key);
+          if (!iiter->Valid()) {
+            break;
+          }
+        }
+        first_block = false;
+        iiter->Next();
+      } while (iiter->Valid());
+
+      if (matched && filter != nullptr) {
+        if (rep_->whole_key_filtering) {
+          RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE);
+        } else {
+          RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_TRUE_POSITIVE);
+        }
+        // Includes prefix stats
+        PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1,
+                                  rep_->level);
+      }
+      if (s.ok() && !iiter->status().IsNotFound()) {
+        s = iiter->status();
+      }
+      *(miter->s) = s;
+    }
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+    // Not sure why we need to do it. Should investigate more.
+    for (auto& st : statuses) {
+      st.PermitUncheckedError();
+    }
+#endif  // ROCKSDB_ASSERT_STATUS_CHECKED
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_builder.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_builder.cc
new file mode 100644
index 0000000000..92702b17d0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_builder.cc
@@ -0,0 +1,234 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// BlockBuilder generates blocks where keys are prefix-compressed:
+//
+// When we store a key, we drop the prefix shared with the previous
+// string.  This helps reduce the space requirement significantly.
+// Furthermore, once every K keys, we do not apply the prefix
+// compression and store the entire key.  We call this a "restart
+// point".  The tail end of the block stores the offsets of all of the
+// restart points, and can be used to do a binary search when looking
+// for a particular key.  Values are stored as-is (without compression)
+// immediately following the corresponding key.
+//
+// An entry for a particular key-value pair has the form:
+//     shared_bytes: varint32
+//     unshared_bytes: varint32
+//     value_length: varint32
+//     key_delta: char[unshared_bytes]
+//     value: char[value_length]
+// shared_bytes == 0 for restart points.
+//
+// The trailer of the block has the form:
+//     restarts: uint32[num_restarts]
+//     num_restarts: uint32
+// restarts[i] contains the offset within the block of the ith restart point.
+
+#include "table/block_based/block_builder.h"
+
+#include <assert.h>
+
+#include <algorithm>
+
+#include "db/dbformat.h"
+#include "rocksdb/comparator.h"
+#include "table/block_based/data_block_footer.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+BlockBuilder::BlockBuilder(
+    int block_restart_interval, bool use_delta_encoding,
+    bool use_value_delta_encoding,
+    BlockBasedTableOptions::DataBlockIndexType index_type,
+    double data_block_hash_table_util_ratio)
+    : block_restart_interval_(block_restart_interval),
+      use_delta_encoding_(use_delta_encoding),
+      use_value_delta_encoding_(use_value_delta_encoding),
+      restarts_(1, 0),  // First restart point is at offset 0
+      counter_(0),
+      finished_(false) {
+  switch (index_type) {
+    case BlockBasedTableOptions::kDataBlockBinarySearch:
+      break;
+    case BlockBasedTableOptions::kDataBlockBinaryAndHash:
+      data_block_hash_index_builder_.Initialize(
+          data_block_hash_table_util_ratio);
+      break;
+    default:
+      assert(0);
+  }
+  assert(block_restart_interval_ >= 1);
+  estimate_ = sizeof(uint32_t) + sizeof(uint32_t);
+}
+
+void BlockBuilder::Reset() {
+  buffer_.clear();
+  restarts_.resize(1);  // First restart point is at offset 0
+  assert(restarts_[0] == 0);
+  estimate_ = sizeof(uint32_t) + sizeof(uint32_t);
+  counter_ = 0;
+  finished_ = false;
+  last_key_.clear();
+  if (data_block_hash_index_builder_.Valid()) {
+    data_block_hash_index_builder_.Reset();
+  }
+#ifndef NDEBUG
+  add_with_last_key_called_ = false;
+#endif
+}
+
+void BlockBuilder::SwapAndReset(std::string& buffer) {
+  std::swap(buffer_, buffer);
+  Reset();
+}
+
+size_t BlockBuilder::EstimateSizeAfterKV(const Slice& key,
+                                         const Slice& value) const {
+  size_t estimate = CurrentSizeEstimate();
+  // Note: this is an imprecise estimate as it accounts for the whole key size
+  // instead of non-shared key size.
+  estimate += key.size();
+  // In value delta encoding we estimate the value delta size as half the full
+  // value size since only the size field of block handle is encoded.
+  estimate +=
+      !use_value_delta_encoding_ || (counter_ >= block_restart_interval_)
+          ? value.size()
+          : value.size() / 2;
+
+  if (counter_ >= block_restart_interval_) {
+    estimate += sizeof(uint32_t);  // a new restart entry.
+  }
+
+  estimate += sizeof(int32_t);  // varint for shared prefix length.
+  // Note: this is an imprecise estimate as we will have to encoded size, one
+  // for shared key and one for non-shared key.
+  estimate += VarintLength(key.size());  // varint for key length.
+  if (!use_value_delta_encoding_ || (counter_ >= block_restart_interval_)) {
+    estimate += VarintLength(value.size());  // varint for value length.
+  }
+
+  return estimate;
+}
+
+Slice BlockBuilder::Finish() {
+  // Append restart array
+  for (size_t i = 0; i < restarts_.size(); i++) {
+    PutFixed32(&buffer_, restarts_[i]);
+  }
+
+  uint32_t num_restarts = static_cast<uint32_t>(restarts_.size());
+  BlockBasedTableOptions::DataBlockIndexType index_type =
+      BlockBasedTableOptions::kDataBlockBinarySearch;
+  if (data_block_hash_index_builder_.Valid() &&
+      CurrentSizeEstimate() <= kMaxBlockSizeSupportedByHashIndex) {
+    data_block_hash_index_builder_.Finish(buffer_);
+    index_type = BlockBasedTableOptions::kDataBlockBinaryAndHash;
+  }
+
+  // footer is a packed format of data_block_index_type and num_restarts
+  uint32_t block_footer = PackIndexTypeAndNumRestarts(index_type, num_restarts);
+
+  PutFixed32(&buffer_, block_footer);
+  finished_ = true;
+  return Slice(buffer_);
+}
+
+void BlockBuilder::Add(const Slice& key, const Slice& value,
+                       const Slice* const delta_value) {
+  // Ensure no unsafe mixing of Add and AddWithLastKey
+  assert(!add_with_last_key_called_);
+
+  AddWithLastKeyImpl(key, value, last_key_, delta_value, buffer_.size());
+  if (use_delta_encoding_) {
+    // Update state
+    // We used to just copy the changed data, but it appears to be
+    // faster to just copy the whole thing.
+    last_key_.assign(key.data(), key.size());
+  }
+}
+
+void BlockBuilder::AddWithLastKey(const Slice& key, const Slice& value,
+                                  const Slice& last_key_param,
+                                  const Slice* const delta_value) {
+  // Ensure no unsafe mixing of Add and AddWithLastKey
+  assert(last_key_.empty());
+#ifndef NDEBUG
+  add_with_last_key_called_ = false;
+#endif
+
+  // Here we make sure to use an empty `last_key` on first call after creation
+  // or Reset. This is more convenient for the caller and we can be more
+  // clever inside BlockBuilder. On this hot code path, we want to avoid
+  // conditional jumps like `buffer_.empty() ? ... : ...` so we can use a
+  // fast min operation instead, with an assertion to be sure our logic is
+  // sound.
+  size_t buffer_size = buffer_.size();
+  size_t last_key_size = last_key_param.size();
+  assert(buffer_size == 0 || buffer_size >= last_key_size);
+
+  Slice last_key(last_key_param.data(), std::min(buffer_size, last_key_size));
+
+  AddWithLastKeyImpl(key, value, last_key, delta_value, buffer_size);
+}
+
+inline void BlockBuilder::AddWithLastKeyImpl(const Slice& key,
+                                             const Slice& value,
+                                             const Slice& last_key,
+                                             const Slice* const delta_value,
+                                             size_t buffer_size) {
+  assert(!finished_);
+  assert(counter_ <= block_restart_interval_);
+  assert(!use_value_delta_encoding_ || delta_value);
+  size_t shared = 0;  // number of bytes shared with prev key
+  if (counter_ >= block_restart_interval_) {
+    // Restart compression
+    restarts_.push_back(static_cast<uint32_t>(buffer_size));
+    estimate_ += sizeof(uint32_t);
+    counter_ = 0;
+  } else if (use_delta_encoding_) {
+    // See how much sharing to do with previous string
+    shared = key.difference_offset(last_key);
+  }
+
+  const size_t non_shared = key.size() - shared;
+
+  if (use_value_delta_encoding_) {
+    // Add "<shared><non_shared>" to buffer_
+    PutVarint32Varint32(&buffer_, static_cast<uint32_t>(shared),
+                        static_cast<uint32_t>(non_shared));
+  } else {
+    // Add "<shared><non_shared><value_size>" to buffer_
+    PutVarint32Varint32Varint32(&buffer_, static_cast<uint32_t>(shared),
+                                static_cast<uint32_t>(non_shared),
+                                static_cast<uint32_t>(value.size()));
+  }
+
+  // Add string delta to buffer_ followed by value
+  buffer_.append(key.data() + shared, non_shared);
+  // Use value delta encoding only when the key has shared bytes. This would
+  // simplify the decoding, where it can figure which decoding to use simply by
+  // looking at the shared bytes size.
+  if (shared != 0 && use_value_delta_encoding_) {
+    buffer_.append(delta_value->data(), delta_value->size());
+  } else {
+    buffer_.append(value.data(), value.size());
+  }
+
+  if (data_block_hash_index_builder_.Valid()) {
+    data_block_hash_index_builder_.Add(ExtractUserKey(key),
+                                       restarts_.size() - 1);
+  }
+
+  counter_++;
+  estimate_ += buffer_.size() - buffer_size;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_builder.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_builder.h
new file mode 100644
index 0000000000..7cb94b3118
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_builder.h
@@ -0,0 +1,104 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <stdint.h>
+
+#include <vector>
+
+#include "rocksdb/slice.h"
+#include "rocksdb/table.h"
+#include "table/block_based/data_block_hash_index.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class BlockBuilder {
+ public:
+  BlockBuilder(const BlockBuilder&) = delete;
+  void operator=(const BlockBuilder&) = delete;
+
+  explicit BlockBuilder(int block_restart_interval,
+                        bool use_delta_encoding = true,
+                        bool use_value_delta_encoding = false,
+                        BlockBasedTableOptions::DataBlockIndexType index_type =
+                            BlockBasedTableOptions::kDataBlockBinarySearch,
+                        double data_block_hash_table_util_ratio = 0.75);
+
+  // Reset the contents as if the BlockBuilder was just constructed.
+  void Reset();
+
+  // Swap the contents in BlockBuilder with buffer, then reset the BlockBuilder.
+  void SwapAndReset(std::string& buffer);
+
+  // REQUIRES: Finish() has not been called since the last call to Reset().
+  // REQUIRES: Unless a range tombstone block, key is larger than any previously
+  //           added key
+  // DO NOT mix with AddWithLastKey() between Resets. For efficiency, use
+  // AddWithLastKey() in contexts where previous added key is already known
+  // and delta encoding might be used.
+  void Add(const Slice& key, const Slice& value,
+           const Slice* const delta_value = nullptr);
+
+  // A faster version of Add() if the previous key is already known for all
+  // Add()s.
+  // REQUIRES: Finish() has not been called since the last call to Reset().
+  // REQUIRES: Unless a range tombstone block, key is larger than any previously
+  //           added key
+  // REQUIRES: if AddWithLastKey has been called since last Reset(), last_key
+  // is the key from most recent AddWithLastKey. (For convenience, last_key
+  // is ignored on first call after creation or Reset().)
+  // DO NOT mix with Add() between Resets.
+  void AddWithLastKey(const Slice& key, const Slice& value,
+                      const Slice& last_key,
+                      const Slice* const delta_value = nullptr);
+
+  // Finish building the block and return a slice that refers to the
+  // block contents.  The returned slice will remain valid for the
+  // lifetime of this builder or until Reset() is called.
+  Slice Finish();
+
+  // Returns an estimate of the current (uncompressed) size of the block
+  // we are building.
+  inline size_t CurrentSizeEstimate() const {
+    return estimate_ + (data_block_hash_index_builder_.Valid()
+                            ? data_block_hash_index_builder_.EstimateSize()
+                            : 0);
+  }
+
+  // Returns an estimated block size after appending key and value.
+  size_t EstimateSizeAfterKV(const Slice& key, const Slice& value) const;
+
+  // Return true iff no entries have been added since the last Reset()
+  bool empty() const { return buffer_.empty(); }
+
+ private:
+  inline void AddWithLastKeyImpl(const Slice& key, const Slice& value,
+                                 const Slice& last_key,
+                                 const Slice* const delta_value,
+                                 size_t buffer_size);
+
+  const int block_restart_interval_;
+  // TODO(myabandeh): put it into a separate IndexBlockBuilder
+  const bool use_delta_encoding_;
+  // Refer to BlockIter::DecodeCurrentValue for format of delta encoded values
+  const bool use_value_delta_encoding_;
+
+  std::string buffer_;              // Destination buffer
+  std::vector<uint32_t> restarts_;  // Restart points
+  size_t estimate_;
+  int counter_;    // Number of entries emitted since restart
+  bool finished_;  // Has Finish() been called?
+  std::string last_key_;
+  DataBlockHashIndexBuilder data_block_hash_index_builder_;
+#ifndef NDEBUG
+  bool add_with_last_key_called_ = false;
+#endif
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_cache.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_cache.cc
new file mode 100644
index 0000000000..a252899d24
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_cache.cc
@@ -0,0 +1,106 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "table/block_based/block_cache.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void BlockCreateContext::Create(std::unique_ptr<Block_kData>* parsed_out,
+                                BlockContents&& block) {
+  parsed_out->reset(new Block_kData(
+      std::move(block), table_options->read_amp_bytes_per_bit, statistics));
+  parsed_out->get()->InitializeDataBlockProtectionInfo(protection_bytes_per_key,
+                                                       raw_ucmp);
+}
+void BlockCreateContext::Create(std::unique_ptr<Block_kIndex>* parsed_out,
+                                BlockContents&& block) {
+  parsed_out->reset(new Block_kIndex(std::move(block),
+                                     /*read_amp_bytes_per_bit*/ 0, statistics));
+  parsed_out->get()->InitializeIndexBlockProtectionInfo(
+      protection_bytes_per_key, raw_ucmp, index_value_is_full,
+      index_has_first_key);
+}
+void BlockCreateContext::Create(
+    std::unique_ptr<Block_kFilterPartitionIndex>* parsed_out,
+    BlockContents&& block) {
+  parsed_out->reset(new Block_kFilterPartitionIndex(
+      std::move(block), /*read_amp_bytes_per_bit*/ 0, statistics));
+  parsed_out->get()->InitializeIndexBlockProtectionInfo(
+      protection_bytes_per_key, raw_ucmp, index_value_is_full,
+      index_has_first_key);
+}
+void BlockCreateContext::Create(
+    std::unique_ptr<Block_kRangeDeletion>* parsed_out, BlockContents&& block) {
+  parsed_out->reset(new Block_kRangeDeletion(
+      std::move(block), /*read_amp_bytes_per_bit*/ 0, statistics));
+}
+void BlockCreateContext::Create(std::unique_ptr<Block_kMetaIndex>* parsed_out,
+                                BlockContents&& block) {
+  parsed_out->reset(new Block_kMetaIndex(
+      std::move(block), /*read_amp_bytes_per_bit*/ 0, statistics));
+  parsed_out->get()->InitializeMetaIndexBlockProtectionInfo(
+      protection_bytes_per_key);
+}
+
+void BlockCreateContext::Create(
+    std::unique_ptr<ParsedFullFilterBlock>* parsed_out, BlockContents&& block) {
+  parsed_out->reset(new ParsedFullFilterBlock(
+      table_options->filter_policy.get(), std::move(block)));
+}
+
+void BlockCreateContext::Create(std::unique_ptr<UncompressionDict>* parsed_out,
+                                BlockContents&& block) {
+  parsed_out->reset(new UncompressionDict(
+      block.data, std::move(block.allocation), using_zstd));
+}
+
+namespace {
+// For getting SecondaryCache-compatible helpers from a BlockType. This is
+// useful for accessing block cache in untyped contexts, such as for generic
+// cache warming in table builder.
+const std::array<const Cache::CacheItemHelper*,
+                 static_cast<unsigned>(BlockType::kInvalid) + 1>
+    kCacheItemFullHelperForBlockType{{
+        BlockCacheInterface<Block_kData>::GetFullHelper(),
+        BlockCacheInterface<ParsedFullFilterBlock>::GetFullHelper(),
+        BlockCacheInterface<Block_kFilterPartitionIndex>::GetFullHelper(),
+        nullptr,  // kProperties
+        BlockCacheInterface<UncompressionDict>::GetFullHelper(),
+        BlockCacheInterface<Block_kRangeDeletion>::GetFullHelper(),
+        nullptr,  // kHashIndexPrefixes
+        nullptr,  // kHashIndexMetadata
+        nullptr,  // kMetaIndex (not yet stored in block cache)
+        BlockCacheInterface<Block_kIndex>::GetFullHelper(),
+        nullptr,  // kInvalid
+    }};
+
+// For getting basic helpers from a BlockType (no SecondaryCache support)
+const std::array<const Cache::CacheItemHelper*,
+                 static_cast<unsigned>(BlockType::kInvalid) + 1>
+    kCacheItemBasicHelperForBlockType{{
+        BlockCacheInterface<Block_kData>::GetBasicHelper(),
+        BlockCacheInterface<ParsedFullFilterBlock>::GetBasicHelper(),
+        BlockCacheInterface<Block_kFilterPartitionIndex>::GetBasicHelper(),
+        nullptr,  // kProperties
+        BlockCacheInterface<UncompressionDict>::GetBasicHelper(),
+        BlockCacheInterface<Block_kRangeDeletion>::GetBasicHelper(),
+        nullptr,  // kHashIndexPrefixes
+        nullptr,  // kHashIndexMetadata
+        nullptr,  // kMetaIndex (not yet stored in block cache)
+        BlockCacheInterface<Block_kIndex>::GetBasicHelper(),
+        nullptr,  // kInvalid
+    }};
+}  // namespace
+
+const Cache::CacheItemHelper* GetCacheItemHelper(
+    BlockType block_type, CacheTier lowest_used_cache_tier) {
+  if (lowest_used_cache_tier == CacheTier::kNonVolatileBlockTier) {
+    return kCacheItemFullHelperForBlockType[static_cast<unsigned>(block_type)];
+  } else {
+    return kCacheItemBasicHelperForBlockType[static_cast<unsigned>(block_type)];
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_cache.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_cache.h
new file mode 100644
index 0000000000..00eaface37
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_cache.h
@@ -0,0 +1,140 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// Code supporting block cache (Cache) access for block-based table, based on
+// the convenient APIs in typed_cache.h
+
+#pragma once
+
+#include <type_traits>
+
+#include "cache/typed_cache.h"
+#include "port/lang.h"
+#include "table/block_based/block.h"
+#include "table/block_based/block_type.h"
+#include "table/block_based/parsed_full_filter_block.h"
+#include "table/format.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Metaprogramming wrappers for Block, to give each type a single role when
+// used with FullTypedCacheInterface.
+// (NOTE: previous attempts to create actual derived classes of Block with
+// virtual calls resulted in performance regression)
+
+class Block_kData : public Block {
+ public:
+  using Block::Block;
+
+  static constexpr CacheEntryRole kCacheEntryRole = CacheEntryRole::kDataBlock;
+  static constexpr BlockType kBlockType = BlockType::kData;
+};
+
+class Block_kIndex : public Block {
+ public:
+  using Block::Block;
+
+  static constexpr CacheEntryRole kCacheEntryRole = CacheEntryRole::kIndexBlock;
+  static constexpr BlockType kBlockType = BlockType::kIndex;
+};
+
+class Block_kFilterPartitionIndex : public Block {
+ public:
+  using Block::Block;
+
+  static constexpr CacheEntryRole kCacheEntryRole =
+      CacheEntryRole::kFilterMetaBlock;
+  static constexpr BlockType kBlockType = BlockType::kFilterPartitionIndex;
+};
+
+class Block_kRangeDeletion : public Block {
+ public:
+  using Block::Block;
+
+  static constexpr CacheEntryRole kCacheEntryRole = CacheEntryRole::kOtherBlock;
+  static constexpr BlockType kBlockType = BlockType::kRangeDeletion;
+};
+
+// Useful for creating the Block even though meta index blocks are not
+// yet stored in block cache
+class Block_kMetaIndex : public Block {
+ public:
+  using Block::Block;
+
+  static constexpr CacheEntryRole kCacheEntryRole = CacheEntryRole::kOtherBlock;
+  static constexpr BlockType kBlockType = BlockType::kMetaIndex;
+};
+
+struct BlockCreateContext : public Cache::CreateContext {
+  BlockCreateContext() {}
+  BlockCreateContext(const BlockBasedTableOptions* _table_options,
+                     Statistics* _statistics, bool _using_zstd,
+                     uint8_t _protection_bytes_per_key,
+                     const Comparator* _raw_ucmp,
+                     bool _index_value_is_full = false,
+                     bool _index_has_first_key = false)
+      : table_options(_table_options),
+        statistics(_statistics),
+        using_zstd(_using_zstd),
+        protection_bytes_per_key(_protection_bytes_per_key),
+        raw_ucmp(_raw_ucmp),
+        index_value_is_full(_index_value_is_full),
+        index_has_first_key(_index_has_first_key) {}
+
+  const BlockBasedTableOptions* table_options = nullptr;
+  Statistics* statistics = nullptr;
+  bool using_zstd = false;
+  uint8_t protection_bytes_per_key = 0;
+  const Comparator* raw_ucmp = nullptr;
+  bool index_value_is_full;
+  bool index_has_first_key;
+
+  // For TypedCacheInterface
+  template <typename TBlocklike>
+  inline void Create(std::unique_ptr<TBlocklike>* parsed_out,
+                     size_t* charge_out, const Slice& data,
+                     MemoryAllocator* alloc) {
+    Create(parsed_out,
+           BlockContents(AllocateAndCopyBlock(data, alloc), data.size()));
+    *charge_out = parsed_out->get()->ApproximateMemoryUsage();
+  }
+
+  void Create(std::unique_ptr<Block_kData>* parsed_out, BlockContents&& block);
+  void Create(std::unique_ptr<Block_kIndex>* parsed_out, BlockContents&& block);
+  void Create(std::unique_ptr<Block_kFilterPartitionIndex>* parsed_out,
+              BlockContents&& block);
+  void Create(std::unique_ptr<Block_kRangeDeletion>* parsed_out,
+              BlockContents&& block);
+  void Create(std::unique_ptr<Block_kMetaIndex>* parsed_out,
+              BlockContents&& block);
+  void Create(std::unique_ptr<ParsedFullFilterBlock>* parsed_out,
+              BlockContents&& block);
+  void Create(std::unique_ptr<UncompressionDict>* parsed_out,
+              BlockContents&& block);
+};
+
+// Convenient cache interface to use for block_cache, with support for
+// SecondaryCache.
+template <typename TBlocklike>
+using BlockCacheInterface =
+    FullTypedCacheInterface<TBlocklike, BlockCreateContext>;
+
+// Shortcut name for cache handles under BlockCacheInterface
+template <typename TBlocklike>
+using BlockCacheTypedHandle =
+    typename BlockCacheInterface<TBlocklike>::TypedHandle;
+
+// Selects the right helper based on BlockType and CacheTier
+const Cache::CacheItemHelper* GetCacheItemHelper(
+    BlockType block_type,
+    CacheTier lowest_used_cache_tier = CacheTier::kNonVolatileBlockTier);
+
+// For SFINAE check that a type is "blocklike" with a kCacheEntryRole member.
+// Can get difficult compiler/linker errors without a good check like this.
+template <typename TUse, typename TBlocklike>
+using WithBlocklikeCheck = std::enable_if_t<
+    TBlocklike::kCacheEntryRole == CacheEntryRole::kMisc || true, TUse>;
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefetcher.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefetcher.cc
new file mode 100644
index 0000000000..83ec2cb060
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefetcher.cc
@@ -0,0 +1,120 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "table/block_based/block_prefetcher.h"
+
+#include "rocksdb/file_system.h"
+#include "table/block_based/block_based_table_reader.h"
+
+namespace ROCKSDB_NAMESPACE {
+void BlockPrefetcher::PrefetchIfNeeded(
+    const BlockBasedTable::Rep* rep, const BlockHandle& handle,
+    const size_t readahead_size, bool is_for_compaction,
+    const bool no_sequential_checking,
+    const Env::IOPriority rate_limiter_priority) {
+  // num_file_reads is used  by FilePrefetchBuffer only when
+  // implicit_auto_readahead is set.
+  if (is_for_compaction) {
+    rep->CreateFilePrefetchBufferIfNotExists(
+        compaction_readahead_size_, compaction_readahead_size_,
+        &prefetch_buffer_, /*implicit_auto_readahead=*/false,
+        /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0);
+    return;
+  }
+
+  // Explicit user requested readahead.
+  if (readahead_size > 0) {
+    rep->CreateFilePrefetchBufferIfNotExists(
+        readahead_size, readahead_size, &prefetch_buffer_,
+        /*implicit_auto_readahead=*/false, /*num_file_reads=*/0,
+        /*num_file_reads_for_auto_readahead=*/0);
+    return;
+  }
+
+  // Implicit readahead.
+
+  // If max_auto_readahead_size is set to be 0 by user, no data will be
+  // prefetched.
+  size_t max_auto_readahead_size = rep->table_options.max_auto_readahead_size;
+  if (max_auto_readahead_size == 0 || initial_auto_readahead_size_ == 0) {
+    return;
+  }
+
+  if (initial_auto_readahead_size_ > max_auto_readahead_size) {
+    initial_auto_readahead_size_ = max_auto_readahead_size;
+  }
+
+  // In case of no_sequential_checking, it will skip the num_file_reads_ and
+  // will always creates the FilePrefetchBuffer.
+  if (no_sequential_checking) {
+    rep->CreateFilePrefetchBufferIfNotExists(
+        initial_auto_readahead_size_, max_auto_readahead_size,
+        &prefetch_buffer_, /*implicit_auto_readahead=*/true,
+        /*num_file_reads=*/0,
+        rep->table_options.num_file_reads_for_auto_readahead);
+    return;
+  }
+
+  size_t len = BlockBasedTable::BlockSizeWithTrailer(handle);
+  size_t offset = handle.offset();
+
+  // If FS supports prefetching (readahead_limit_ will be non zero in that case)
+  // and current block exists in prefetch buffer then return.
+  if (offset + len <= readahead_limit_) {
+    UpdateReadPattern(offset, len);
+    return;
+  }
+
+  if (!IsBlockSequential(offset)) {
+    UpdateReadPattern(offset, len);
+    ResetValues(rep->table_options.initial_auto_readahead_size);
+    return;
+  }
+  UpdateReadPattern(offset, len);
+
+  // Implicit auto readahead, which will be enabled if the number of reads
+  // reached `table_options.num_file_reads_for_auto_readahead` (default: 2)  and
+  // scans are sequential.
+  num_file_reads_++;
+  if (num_file_reads_ <= rep->table_options.num_file_reads_for_auto_readahead) {
+    return;
+  }
+
+  if (rep->file->use_direct_io()) {
+    rep->CreateFilePrefetchBufferIfNotExists(
+        initial_auto_readahead_size_, max_auto_readahead_size,
+        &prefetch_buffer_, /*implicit_auto_readahead=*/true, num_file_reads_,
+        rep->table_options.num_file_reads_for_auto_readahead);
+    return;
+  }
+
+  if (readahead_size_ > max_auto_readahead_size) {
+    readahead_size_ = max_auto_readahead_size;
+  }
+
+  // If prefetch is not supported, fall back to use internal prefetch buffer.
+  // Discarding other return status of Prefetch calls intentionally, as
+  // we can fallback to reading from disk if Prefetch fails.
+  Status s = rep->file->Prefetch(
+      handle.offset(),
+      BlockBasedTable::BlockSizeWithTrailer(handle) + readahead_size_,
+      rate_limiter_priority);
+  if (s.IsNotSupported()) {
+    rep->CreateFilePrefetchBufferIfNotExists(
+        initial_auto_readahead_size_, max_auto_readahead_size,
+        &prefetch_buffer_, /*implicit_auto_readahead=*/true, num_file_reads_,
+        rep->table_options.num_file_reads_for_auto_readahead);
+    return;
+  }
+
+  readahead_limit_ = offset + len + readahead_size_;
+  // Keep exponentially increasing readahead size until
+  // max_auto_readahead_size.
+  readahead_size_ = std::min(max_auto_readahead_size, readahead_size_ * 2);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefetcher.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefetcher.h
new file mode 100644
index 0000000000..518868a301
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefetcher.h
@@ -0,0 +1,72 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+#include "table/block_based/block_based_table_reader.h"
+
+namespace ROCKSDB_NAMESPACE {
+class BlockPrefetcher {
+ public:
+  explicit BlockPrefetcher(size_t compaction_readahead_size,
+                           size_t initial_auto_readahead_size)
+      : compaction_readahead_size_(compaction_readahead_size),
+        readahead_size_(initial_auto_readahead_size),
+        initial_auto_readahead_size_(initial_auto_readahead_size) {}
+
+  void PrefetchIfNeeded(const BlockBasedTable::Rep* rep,
+                        const BlockHandle& handle, size_t readahead_size,
+                        bool is_for_compaction,
+                        const bool no_sequential_checking,
+                        Env::IOPriority rate_limiter_priority);
+  FilePrefetchBuffer* prefetch_buffer() { return prefetch_buffer_.get(); }
+
+  void UpdateReadPattern(const uint64_t& offset, const size_t& len) {
+    prev_offset_ = offset;
+    prev_len_ = len;
+  }
+
+  bool IsBlockSequential(const uint64_t& offset) {
+    return (prev_len_ == 0 || (prev_offset_ + prev_len_ == offset));
+  }
+
+  void ResetValues(size_t initial_auto_readahead_size) {
+    num_file_reads_ = 1;
+    // Since initial_auto_readahead_size_ can be different from
+    // the value passed to BlockBasedTableOptions.initial_auto_readahead_size in
+    // case of adaptive_readahead, so fallback the readahead_size_ to that value
+    // in case of reset.
+    initial_auto_readahead_size_ = initial_auto_readahead_size;
+    readahead_size_ = initial_auto_readahead_size_;
+    readahead_limit_ = 0;
+    return;
+  }
+
+  void SetReadaheadState(ReadaheadFileInfo::ReadaheadInfo* readahead_info) {
+    num_file_reads_ = readahead_info->num_file_reads;
+    initial_auto_readahead_size_ = readahead_info->readahead_size;
+    TEST_SYNC_POINT_CALLBACK("BlockPrefetcher::SetReadaheadState",
+                             &initial_auto_readahead_size_);
+  }
+
+ private:
+  // Readahead size used in compaction, its value is used only if
+  // lookup_context_.caller = kCompaction.
+  size_t compaction_readahead_size_;
+
+  // readahead_size_ is used if underlying FS supports prefetching.
+  size_t readahead_size_;
+  size_t readahead_limit_ = 0;
+  // initial_auto_readahead_size_ is used if RocksDB uses internal prefetch
+  // buffer.
+  uint64_t initial_auto_readahead_size_;
+  uint64_t num_file_reads_ = 0;
+  uint64_t prev_offset_ = 0;
+  size_t prev_len_ = 0;
+  std::unique_ptr<FilePrefetchBuffer> prefetch_buffer_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefix_index.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefix_index.cc
new file mode 100644
index 0000000000..c83701d69c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefix_index.cc
@@ -0,0 +1,226 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "table/block_based/block_prefix_index.h"
+
+#include <vector>
+
+#include "memory/arena.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/slice_transform.h"
+#include "util/coding.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+inline uint32_t Hash(const Slice& s) {
+  return ROCKSDB_NAMESPACE::Hash(s.data(), s.size(), 0);
+}
+
+inline uint32_t PrefixToBucket(const Slice& prefix, uint32_t num_buckets) {
+  return Hash(prefix) % num_buckets;
+}
+
+// The prefix block index is simply a bucket array, with each entry pointing to
+// the blocks that span the prefixes hashed to this bucket.
+//
+// To reduce memory footprint, if there is only one block per bucket, the entry
+// stores the block id directly. If there are more than one blocks per bucket,
+// because of hash collision or a single prefix spanning multiple blocks,
+// the entry points to an array of block ids. The block array is an array of
+// uint32_t's. The first uint32_t indicates the total number of blocks, followed
+// by the block ids.
+//
+// To differentiate the two cases, the high order bit of the entry indicates
+// whether it is a 'pointer' into a separate block array.
+// 0x7FFFFFFF is reserved for empty bucket.
+
+const uint32_t kNoneBlock = 0x7FFFFFFF;
+const uint32_t kBlockArrayMask = 0x80000000;
+
+inline bool IsNone(uint32_t block_id) { return block_id == kNoneBlock; }
+
+inline bool IsBlockId(uint32_t block_id) {
+  return (block_id & kBlockArrayMask) == 0;
+}
+
+inline uint32_t DecodeIndex(uint32_t block_id) {
+  uint32_t index = block_id ^ kBlockArrayMask;
+  assert(index < kBlockArrayMask);
+  return index;
+}
+
+inline uint32_t EncodeIndex(uint32_t index) {
+  assert(index < kBlockArrayMask);
+  return index | kBlockArrayMask;
+}
+
+// temporary storage for prefix information during index building
+struct PrefixRecord {
+  Slice prefix;
+  uint32_t start_block;
+  uint32_t end_block;
+  uint32_t num_blocks;
+  PrefixRecord* next;
+};
+
+class BlockPrefixIndex::Builder {
+ public:
+  void Add(const Slice& key_prefix, uint32_t start_block, uint32_t num_blocks) {
+    PrefixRecord* record = reinterpret_cast<PrefixRecord*>(
+        arena_.AllocateAligned(sizeof(PrefixRecord)));
+    record->prefix = key_prefix;
+    record->start_block = start_block;
+    record->end_block = start_block + num_blocks - 1;
+    record->num_blocks = num_blocks;
+    prefixes_.push_back(record);
+  }
+
+  BlockPrefixIndex* Finish(const SliceTransform* prefix_extractor) {
+    // For now, use roughly 1:1 prefix to bucket ratio.
+    uint32_t num_buckets = static_cast<uint32_t>(prefixes_.size()) + 1;
+
+    // Collect prefix records that hash to the same bucket, into a single
+    // linklist.
+    std::vector<PrefixRecord*> prefixes_per_bucket(num_buckets, nullptr);
+    std::vector<uint32_t> num_blocks_per_bucket(num_buckets, 0);
+    for (PrefixRecord* current : prefixes_) {
+      uint32_t bucket = PrefixToBucket(current->prefix, num_buckets);
+      // merge the prefix block span if the first block of this prefix is
+      // connected to the last block of the previous prefix.
+      PrefixRecord* prev = prefixes_per_bucket[bucket];
+      if (prev) {
+        assert(current->start_block >= prev->end_block);
+        auto distance = current->start_block - prev->end_block;
+        if (distance <= 1) {
+          prev->end_block = current->end_block;
+          prev->num_blocks = prev->end_block - prev->start_block + 1;
+          num_blocks_per_bucket[bucket] += (current->num_blocks + distance - 1);
+          continue;
+        }
+      }
+      current->next = prev;
+      prefixes_per_bucket[bucket] = current;
+      num_blocks_per_bucket[bucket] += current->num_blocks;
+    }
+
+    // Calculate the block array buffer size
+    uint32_t total_block_array_entries = 0;
+    for (uint32_t i = 0; i < num_buckets; i++) {
+      uint32_t num_blocks = num_blocks_per_bucket[i];
+      if (num_blocks > 1) {
+        total_block_array_entries += (num_blocks + 1);
+      }
+    }
+
+    // Populate the final prefix block index
+    uint32_t* block_array_buffer = new uint32_t[total_block_array_entries];
+    uint32_t* buckets = new uint32_t[num_buckets];
+    uint32_t offset = 0;
+    for (uint32_t i = 0; i < num_buckets; i++) {
+      uint32_t num_blocks = num_blocks_per_bucket[i];
+      if (num_blocks == 0) {
+        assert(prefixes_per_bucket[i] == nullptr);
+        buckets[i] = kNoneBlock;
+      } else if (num_blocks == 1) {
+        assert(prefixes_per_bucket[i] != nullptr);
+        assert(prefixes_per_bucket[i]->next == nullptr);
+        buckets[i] = prefixes_per_bucket[i]->start_block;
+      } else {
+        assert(total_block_array_entries > 0);
+        assert(prefixes_per_bucket[i] != nullptr);
+        buckets[i] = EncodeIndex(offset);
+        block_array_buffer[offset] = num_blocks;
+        uint32_t* last_block = &block_array_buffer[offset + num_blocks];
+        auto current = prefixes_per_bucket[i];
+        // populate block ids from largest to smallest
+        while (current != nullptr) {
+          for (uint32_t iter = 0; iter < current->num_blocks; iter++) {
+            *last_block = current->end_block - iter;
+            last_block--;
+          }
+          current = current->next;
+        }
+        assert(last_block == &block_array_buffer[offset]);
+        offset += (num_blocks + 1);
+      }
+    }
+
+    assert(offset == total_block_array_entries);
+
+    return new BlockPrefixIndex(prefix_extractor, num_buckets, buckets,
+                                total_block_array_entries, block_array_buffer);
+  }
+
+ private:
+  std::vector<PrefixRecord*> prefixes_;
+  Arena arena_;
+};
+
+Status BlockPrefixIndex::Create(const SliceTransform* prefix_extractor,
+                                const Slice& prefixes, const Slice& prefix_meta,
+                                BlockPrefixIndex** prefix_index) {
+  uint64_t pos = 0;
+  auto meta_pos = prefix_meta;
+  Status s;
+  Builder builder;
+
+  while (!meta_pos.empty()) {
+    uint32_t prefix_size = 0;
+    uint32_t entry_index = 0;
+    uint32_t num_blocks = 0;
+    if (!GetVarint32(&meta_pos, &prefix_size) ||
+        !GetVarint32(&meta_pos, &entry_index) ||
+        !GetVarint32(&meta_pos, &num_blocks)) {
+      s = Status::Corruption(
+          "Corrupted prefix meta block: unable to read from it.");
+      break;
+    }
+    if (pos + prefix_size > prefixes.size()) {
+      s = Status::Corruption(
+          "Corrupted prefix meta block: size inconsistency.");
+      break;
+    }
+    Slice prefix(prefixes.data() + pos, prefix_size);
+    builder.Add(prefix, entry_index, num_blocks);
+
+    pos += prefix_size;
+  }
+
+  if (s.ok() && pos != prefixes.size()) {
+    s = Status::Corruption("Corrupted prefix meta block");
+  }
+
+  if (s.ok()) {
+    *prefix_index = builder.Finish(prefix_extractor);
+  }
+
+  return s;
+}
+
+uint32_t BlockPrefixIndex::GetBlocks(const Slice& key, uint32_t** blocks) {
+  Slice prefix = internal_prefix_extractor_.Transform(key);
+
+  uint32_t bucket = PrefixToBucket(prefix, num_buckets_);
+  uint32_t block_id = buckets_[bucket];
+
+  if (IsNone(block_id)) {
+    return 0;
+  } else if (IsBlockId(block_id)) {
+    *blocks = &buckets_[bucket];
+    return 1;
+  } else {
+    uint32_t index = DecodeIndex(block_id);
+    assert(index < num_block_array_buffer_entries_);
+    *blocks = &block_array_buffer_[index + 1];
+    uint32_t num_blocks = block_array_buffer_[index];
+    assert(num_blocks > 1);
+    assert(index + num_blocks < num_block_array_buffer_entries_);
+    return num_blocks;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefix_index.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefix_index.h
new file mode 100644
index 0000000000..4db8e2c654
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_prefix_index.h
@@ -0,0 +1,70 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <stdint.h>
+
+#include "db/dbformat.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Comparator;
+class Iterator;
+class Slice;
+class SliceTransform;
+
+// Build a hash-based index to speed up the lookup for "index block".
+// BlockHashIndex accepts a key and, if found, returns its restart index within
+// that index block.
+class BlockPrefixIndex {
+ public:
+  // Maps a key to a list of data blocks that could potentially contain
+  // the key, based on the prefix.
+  // Returns the total number of relevant blocks, 0 means the key does
+  // not exist.
+  uint32_t GetBlocks(const Slice& key, uint32_t** blocks);
+
+  size_t ApproximateMemoryUsage() const {
+    return sizeof(BlockPrefixIndex) +
+           (num_block_array_buffer_entries_ + num_buckets_) * sizeof(uint32_t);
+  }
+
+  // Create hash index by reading from the metadata blocks.
+  // Note: table reader (caller) is responsible for keeping shared_ptr to
+  // underlying prefix extractor
+  // @params prefixes: a sequence of prefixes.
+  // @params prefix_meta: contains the "metadata" to of the prefixes.
+  static Status Create(const SliceTransform* hash_key_extractor,
+                       const Slice& prefixes, const Slice& prefix_meta,
+                       BlockPrefixIndex** prefix_index);
+
+  ~BlockPrefixIndex() {
+    delete[] buckets_;
+    delete[] block_array_buffer_;
+  }
+
+ private:
+  class Builder;
+  friend Builder;
+
+  BlockPrefixIndex(const SliceTransform* prefix_extractor, uint32_t num_buckets,
+                   uint32_t* buckets, uint32_t num_block_array_buffer_entries,
+                   uint32_t* block_array_buffer)
+      : internal_prefix_extractor_(prefix_extractor),
+        num_buckets_(num_buckets),
+        num_block_array_buffer_entries_(num_block_array_buffer_entries),
+        buckets_(buckets),
+        block_array_buffer_(block_array_buffer) {}
+
+  InternalKeySliceTransform internal_prefix_extractor_;
+
+  uint32_t num_buckets_;
+  uint32_t num_block_array_buffer_entries_;
+  uint32_t* buckets_;
+  uint32_t* block_array_buffer_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_type.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_type.h
new file mode 100644
index 0000000000..a9d6a1a773
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/block_type.h
@@ -0,0 +1,34 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstdint>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Represents the types of blocks used in the block based table format.
+// See https://github.com/facebook/rocksdb/wiki/Rocksdb-BlockBasedTable-Format
+// for details.
+// For code sanity, BlockType should imply a specific TBlocklike for
+// BlocklikeTraits.
+enum class BlockType : uint8_t {
+  kData,
+  kFilter,  // for second level partitioned filters and full filters
+  kFilterPartitionIndex,  // for top-level index of filter partitions
+  kProperties,
+  kCompressionDictionary,
+  kRangeDeletion,
+  kHashIndexPrefixes,
+  kHashIndexMetadata,
+  kMetaIndex,
+  kIndex,
+  // Note: keep kInvalid the last value when adding new enum values.
+  kInvalid
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/cachable_entry.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/cachable_entry.h
new file mode 100644
index 0000000000..3cd1bb807a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/cachable_entry.h
@@ -0,0 +1,244 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <cassert>
+#include <type_traits>
+
+#include "port/likely.h"
+#include "rocksdb/advanced_cache.h"
+#include "rocksdb/cleanable.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// CachableEntry is a handle to an object that may or may not be in the block
+// cache. It is used in a variety of ways:
+//
+// 1) It may refer to an object in the block cache. In this case, cache_ and
+// cache_handle_ are not nullptr, and the cache handle has to be released when
+// the CachableEntry is destroyed (the lifecycle of the cached object, on the
+// other hand, is managed by the cache itself).
+// 2) It may uniquely own the (non-cached) object it refers to (examples include
+// a block read directly from file, or uncompressed blocks when there is a
+// compressed block cache but no uncompressed block cache). In such cases, the
+// object has to be destroyed when the CachableEntry is destroyed.
+// 3) It may point to an object (cached or not) without owning it. In this case,
+// no action is needed when the CachableEntry is destroyed.
+// 4) Sometimes, management of a cached or owned object (see #1 and #2 above)
+// is transferred to some other object. This is used for instance with iterators
+// (where cleanup is performed using a chain of cleanup functions,
+// see Cleanable).
+//
+// Because of #1 and #2 above, copying a CachableEntry is not safe (and thus not
+// allowed); hence, this is a move-only type, where a move transfers the
+// management responsibilities, and leaves the source object in an empty state.
+
+template <class T>
+class CachableEntry {
+ public:
+  CachableEntry() = default;
+
+  CachableEntry(T* value, Cache* cache, Cache::Handle* cache_handle,
+                bool own_value)
+      : value_(value),
+        cache_(cache),
+        cache_handle_(cache_handle),
+        own_value_(own_value) {
+    assert(value_ != nullptr ||
+           (cache_ == nullptr && cache_handle_ == nullptr && !own_value_));
+    assert(!!cache_ == !!cache_handle_);
+    assert(!cache_handle_ || !own_value_);
+  }
+
+  CachableEntry(const CachableEntry&) = delete;
+  CachableEntry& operator=(const CachableEntry&) = delete;
+
+  CachableEntry(CachableEntry&& rhs) noexcept
+      : value_(rhs.value_),
+        cache_(rhs.cache_),
+        cache_handle_(rhs.cache_handle_),
+        own_value_(rhs.own_value_) {
+    assert(value_ != nullptr ||
+           (cache_ == nullptr && cache_handle_ == nullptr && !own_value_));
+    assert(!!cache_ == !!cache_handle_);
+    assert(!cache_handle_ || !own_value_);
+
+    rhs.ResetFields();
+  }
+
+  CachableEntry& operator=(CachableEntry&& rhs) noexcept {
+    if (UNLIKELY(this == &rhs)) {
+      return *this;
+    }
+
+    ReleaseResource();
+
+    value_ = rhs.value_;
+    cache_ = rhs.cache_;
+    cache_handle_ = rhs.cache_handle_;
+    own_value_ = rhs.own_value_;
+
+    assert(value_ != nullptr ||
+           (cache_ == nullptr && cache_handle_ == nullptr && !own_value_));
+    assert(!!cache_ == !!cache_handle_);
+    assert(!cache_handle_ || !own_value_);
+
+    rhs.ResetFields();
+
+    return *this;
+  }
+
+  ~CachableEntry() { ReleaseResource(); }
+
+  bool IsEmpty() const {
+    return value_ == nullptr && cache_ == nullptr && cache_handle_ == nullptr &&
+           !own_value_;
+  }
+
+  bool IsCached() const {
+    assert(!!cache_ == !!cache_handle_);
+
+    return cache_handle_ != nullptr;
+  }
+
+  T* GetValue() const { return value_; }
+  Cache* GetCache() const { return cache_; }
+  Cache::Handle* GetCacheHandle() const { return cache_handle_; }
+  bool GetOwnValue() const { return own_value_; }
+
+  void Reset() {
+    ReleaseResource();
+    ResetFields();
+  }
+
+  void TransferTo(Cleanable* cleanable) {
+    if (cleanable) {
+      if (cache_handle_ != nullptr) {
+        assert(cache_ != nullptr);
+        cleanable->RegisterCleanup(&ReleaseCacheHandle, cache_, cache_handle_);
+      } else if (own_value_) {
+        cleanable->RegisterCleanup(&DeleteValue, value_, nullptr);
+      }
+    }
+
+    ResetFields();
+  }
+
+  void SetOwnedValue(std::unique_ptr<T>&& value) {
+    assert(value.get() != nullptr);
+
+    if (UNLIKELY(value_ == value.get() && own_value_)) {
+      assert(cache_ == nullptr && cache_handle_ == nullptr);
+      return;
+    }
+
+    Reset();
+
+    value_ = value.release();
+    own_value_ = true;
+  }
+
+  void SetUnownedValue(T* value) {
+    assert(value != nullptr);
+
+    if (UNLIKELY(value_ == value && cache_ == nullptr &&
+                 cache_handle_ == nullptr && !own_value_)) {
+      return;
+    }
+
+    Reset();
+
+    value_ = value;
+    assert(!own_value_);
+  }
+
+  void SetCachedValue(T* value, Cache* cache, Cache::Handle* cache_handle) {
+    assert(cache != nullptr);
+    assert(cache_handle != nullptr);
+
+    if (UNLIKELY(value_ == value && cache_ == cache &&
+                 cache_handle_ == cache_handle && !own_value_)) {
+      return;
+    }
+
+    Reset();
+
+    value_ = value;
+    cache_ = cache;
+    cache_handle_ = cache_handle;
+    assert(!own_value_);
+  }
+
+  // Since this class is essentially an elaborate pointer, it's sometimes
+  // useful to be able to upcast or downcast the base type of the pointer,
+  // especially when interacting with typed_cache.h.
+  template <class TWrapper>
+  std::enable_if_t<sizeof(TWrapper) == sizeof(T) &&
+                       (std::is_base_of_v<TWrapper, T> ||
+                        std::is_base_of_v<T, TWrapper>),
+                   /* Actual return type */
+                   CachableEntry<TWrapper>&>
+  As() {
+    CachableEntry<TWrapper>* result_ptr =
+        reinterpret_cast<CachableEntry<TWrapper>*>(this);
+    // Ensure no weirdness in template instantiations
+    assert(static_cast<void*>(&this->value_) ==
+           static_cast<void*>(&result_ptr->value_));
+    assert(&this->cache_handle_ == &result_ptr->cache_handle_);
+    // This function depends on no arithmetic involved in the pointer
+    // conversion, which is not statically checkable.
+    assert(static_cast<void*>(this->value_) ==
+           static_cast<void*>(result_ptr->value_));
+    return *result_ptr;
+  }
+
+ private:
+  void ReleaseResource() noexcept {
+    if (LIKELY(cache_handle_ != nullptr)) {
+      assert(cache_ != nullptr);
+      cache_->Release(cache_handle_);
+    } else if (own_value_) {
+      delete value_;
+    }
+  }
+
+  void ResetFields() noexcept {
+    value_ = nullptr;
+    cache_ = nullptr;
+    cache_handle_ = nullptr;
+    own_value_ = false;
+  }
+
+  static void ReleaseCacheHandle(void* arg1, void* arg2) {
+    Cache* const cache = static_cast<Cache*>(arg1);
+    assert(cache);
+
+    Cache::Handle* const cache_handle = static_cast<Cache::Handle*>(arg2);
+    assert(cache_handle);
+
+    cache->Release(cache_handle);
+  }
+
+  static void DeleteValue(void* arg1, void* /* arg2 */) {
+    delete static_cast<T*>(arg1);
+  }
+
+ private:
+  // Have to be your own best friend
+  template <class TT>
+  friend class CachableEntry;
+
+  T* value_ = nullptr;
+  Cache* cache_ = nullptr;
+  Cache::Handle* cache_handle_ = nullptr;
+  bool own_value_ = false;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_footer.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_footer.cc
new file mode 100644
index 0000000000..5d5d8ed55e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_footer.cc
@@ -0,0 +1,59 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/block_based/data_block_footer.h"
+
+#include "rocksdb/table.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const int kDataBlockIndexTypeBitShift = 31;
+
+// 0x7FFFFFFF
+const uint32_t kMaxNumRestarts = (1u << kDataBlockIndexTypeBitShift) - 1u;
+
+// 0x7FFFFFFF
+const uint32_t kNumRestartsMask = (1u << kDataBlockIndexTypeBitShift) - 1u;
+
+uint32_t PackIndexTypeAndNumRestarts(
+    BlockBasedTableOptions::DataBlockIndexType index_type,
+    uint32_t num_restarts) {
+  if (num_restarts > kMaxNumRestarts) {
+    assert(0);  // mute travis "unused" warning
+  }
+
+  uint32_t block_footer = num_restarts;
+  if (index_type == BlockBasedTableOptions::kDataBlockBinaryAndHash) {
+    block_footer |= 1u << kDataBlockIndexTypeBitShift;
+  } else if (index_type != BlockBasedTableOptions::kDataBlockBinarySearch) {
+    assert(0);
+  }
+
+  return block_footer;
+}
+
+void UnPackIndexTypeAndNumRestarts(
+    uint32_t block_footer,
+    BlockBasedTableOptions::DataBlockIndexType* index_type,
+    uint32_t* num_restarts) {
+  if (index_type) {
+    if (block_footer & 1u << kDataBlockIndexTypeBitShift) {
+      *index_type = BlockBasedTableOptions::kDataBlockBinaryAndHash;
+    } else {
+      *index_type = BlockBasedTableOptions::kDataBlockBinarySearch;
+    }
+  }
+
+  if (num_restarts) {
+    *num_restarts = block_footer & kNumRestartsMask;
+    assert(*num_restarts <= kMaxNumRestarts);
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_footer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_footer.h
new file mode 100644
index 0000000000..c1cfd47309
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_footer.h
@@ -0,0 +1,25 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "rocksdb/table.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+uint32_t PackIndexTypeAndNumRestarts(
+    BlockBasedTableOptions::DataBlockIndexType index_type,
+    uint32_t num_restarts);
+
+void UnPackIndexTypeAndNumRestarts(
+    uint32_t block_footer,
+    BlockBasedTableOptions::DataBlockIndexType* index_type,
+    uint32_t* num_restarts);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_hash_index.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_hash_index.cc
new file mode 100644
index 0000000000..c579dcc43d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_hash_index.cc
@@ -0,0 +1,94 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#include "table/block_based/data_block_hash_index.h"
+
+#include <string>
+#include <vector>
+
+#include "rocksdb/slice.h"
+#include "util/coding.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void DataBlockHashIndexBuilder::Add(const Slice& key,
+                                    const size_t restart_index) {
+  assert(Valid());
+  if (restart_index > kMaxRestartSupportedByHashIndex) {
+    valid_ = false;
+    return;
+  }
+
+  uint32_t hash_value = GetSliceHash(key);
+  hash_and_restart_pairs_.emplace_back(hash_value,
+                                       static_cast<uint8_t>(restart_index));
+  estimated_num_buckets_ += bucket_per_key_;
+}
+
+void DataBlockHashIndexBuilder::Finish(std::string& buffer) {
+  assert(Valid());
+  uint16_t num_buckets = static_cast<uint16_t>(estimated_num_buckets_);
+
+  if (num_buckets == 0) {
+    num_buckets = 1;  // sanity check
+  }
+
+  // The build-in hash cannot well distribute strings when into different
+  // buckets when num_buckets is power of two, resulting in high hash
+  // collision.
+  // We made the num_buckets to be odd to avoid this issue.
+  num_buckets |= 1;
+
+  std::vector<uint8_t> buckets(num_buckets, kNoEntry);
+  // write the restart_index array
+  for (auto& entry : hash_and_restart_pairs_) {
+    uint32_t hash_value = entry.first;
+    uint8_t restart_index = entry.second;
+    uint16_t buck_idx = static_cast<uint16_t>(hash_value % num_buckets);
+    if (buckets[buck_idx] == kNoEntry) {
+      buckets[buck_idx] = restart_index;
+    } else if (buckets[buck_idx] != restart_index) {
+      // same bucket cannot store two different restart_index, mark collision
+      buckets[buck_idx] = kCollision;
+    }
+  }
+
+  for (uint8_t restart_index : buckets) {
+    buffer.append(
+        const_cast<const char*>(reinterpret_cast<char*>(&restart_index)),
+        sizeof(restart_index));
+  }
+
+  // write NUM_BUCK
+  PutFixed16(&buffer, num_buckets);
+
+  assert(buffer.size() <= kMaxBlockSizeSupportedByHashIndex);
+}
+
+void DataBlockHashIndexBuilder::Reset() {
+  estimated_num_buckets_ = 0;
+  valid_ = true;
+  hash_and_restart_pairs_.clear();
+}
+
+void DataBlockHashIndex::Initialize(const char* data, uint16_t size,
+                                    uint16_t* map_offset) {
+  assert(size >= sizeof(uint16_t));  // NUM_BUCKETS
+  num_buckets_ = DecodeFixed16(data + size - sizeof(uint16_t));
+  assert(num_buckets_ > 0);
+  assert(size > num_buckets_ * sizeof(uint8_t));
+  *map_offset = static_cast<uint16_t>(size - sizeof(uint16_t) -
+                                      num_buckets_ * sizeof(uint8_t));
+}
+
+uint8_t DataBlockHashIndex::Lookup(const char* data, uint32_t map_offset,
+                                   const Slice& key) const {
+  uint32_t hash_value = GetSliceHash(key);
+  uint16_t idx = static_cast<uint16_t>(hash_value % num_buckets_);
+  const char* bucket_table = data + map_offset;
+  return static_cast<uint8_t>(*(bucket_table + idx * sizeof(uint8_t)));
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_hash_index.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_hash_index.h
new file mode 100644
index 0000000000..3215221755
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/data_block_hash_index.h
@@ -0,0 +1,137 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+// This is an experimental feature aiming to reduce the CPU utilization of
+// point-lookup within a data-block. It is only used in data blocks, and not
+// in meta-data blocks or per-table index blocks.
+//
+// It only used to support BlockBasedTable::Get().
+//
+// A serialized hash index is appended to the data-block. The new block data
+// format is as follows:
+//
+// DATA_BLOCK: [RI RI RI ... RI RI_IDX HASH_IDX FOOTER]
+//
+// RI:       Restart Interval (the same as the default data-block format)
+// RI_IDX:   Restart Interval index (the same as the default data-block format)
+// HASH_IDX: The new data-block hash index feature.
+// FOOTER:   A 32bit block footer, which is the NUM_RESTARTS with the MSB as
+//           the flag indicating if this hash index is in use. Note that
+//           given a data block < 32KB, the MSB is never used. So we can
+//           borrow the MSB as the hash index flag. Therefore, this format is
+//           compatible with the legacy data-blocks with num_restarts < 32768,
+//           as the MSB is 0.
+//
+// The format of the data-block hash index is as follows:
+//
+// HASH_IDX: [B B B ... B NUM_BUCK]
+//
+// B:         bucket, an array of restart index. Each buckets is uint8_t.
+// NUM_BUCK:  Number of buckets, which is the length of the bucket array.
+//
+// We reserve two special flag:
+//    kNoEntry=255,
+//    kCollision=254.
+//
+// Therefore, the max number of restarts this hash index can supoport is 253.
+//
+// Buckets are initialized to be kNoEntry.
+//
+// When storing a key in the hash index, the key is first hashed to a bucket.
+// If there the bucket is empty (kNoEntry), the restart index is stored in
+// the bucket. If there is already a restart index there, we will update the
+// existing restart index to a collision marker (kCollision). If the
+// the bucket is already marked as collision, we do not store the restart
+// index either.
+//
+// During query process, a key is first hashed to a bucket. Then we examine if
+// the buckets store nothing (kNoEntry) or the bucket had a collision
+// (kCollision). If either of those happens, we get the restart index of
+// the key and will directly go to the restart interval to search the key.
+//
+// Note that we only support blocks with #restart_interval < 254. If a block
+// has more restart interval than that, hash index will not be create for it.
+
+const uint8_t kNoEntry = 255;
+const uint8_t kCollision = 254;
+const uint8_t kMaxRestartSupportedByHashIndex = 253;
+
+// Because we use uint16_t address, we only support block no more than 64KB
+const size_t kMaxBlockSizeSupportedByHashIndex = 1u << 16;
+const double kDefaultUtilRatio = 0.75;
+
+class DataBlockHashIndexBuilder {
+ public:
+  DataBlockHashIndexBuilder()
+      : bucket_per_key_(-1 /*uninitialized marker*/),
+        estimated_num_buckets_(0),
+        valid_(false) {}
+
+  void Initialize(double util_ratio) {
+    if (util_ratio <= 0) {
+      util_ratio = kDefaultUtilRatio;  // sanity check
+    }
+    bucket_per_key_ = 1 / util_ratio;
+    valid_ = true;
+  }
+
+  inline bool Valid() const { return valid_ && bucket_per_key_ > 0; }
+  void Add(const Slice& key, const size_t restart_index);
+  void Finish(std::string& buffer);
+  void Reset();
+  inline size_t EstimateSize() const {
+    uint16_t estimated_num_buckets =
+        static_cast<uint16_t>(estimated_num_buckets_);
+
+    // Maching the num_buckets number in DataBlockHashIndexBuilder::Finish.
+    estimated_num_buckets |= 1;
+
+    return sizeof(uint16_t) +
+           static_cast<size_t>(estimated_num_buckets * sizeof(uint8_t));
+  }
+
+ private:
+  double bucket_per_key_;  // is the multiplicative inverse of util_ratio_
+  double estimated_num_buckets_;
+
+  // Now the only usage for `valid_` is to mark false when the inserted
+  // restart_index is larger than supported. In this case HashIndex is not
+  // appended to the block content.
+  bool valid_;
+
+  std::vector<std::pair<uint32_t, uint8_t>> hash_and_restart_pairs_;
+  friend class DataBlockHashIndex_DataBlockHashTestSmall_Test;
+};
+
+class DataBlockHashIndex {
+ public:
+  DataBlockHashIndex() : num_buckets_(0) {}
+
+  void Initialize(const char* data, uint16_t size, uint16_t* map_offset);
+
+  uint8_t Lookup(const char* data, uint32_t map_offset, const Slice& key) const;
+
+  inline bool Valid() { return num_buckets_ != 0; }
+
+ private:
+  // To make the serialized hash index compact and to save the space overhead,
+  // here all the data fields persisted in the block are in uint16 format.
+  // We find that a uint16 is large enough to index every offset of a 64KiB
+  // block.
+  // So in other words, DataBlockHashIndex does not support block size equal
+  // or greater then 64KiB.
+  uint16_t num_buckets_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_block.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_block.h
new file mode 100644
index 0000000000..b14858c020
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_block.h
@@ -0,0 +1,183 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A filter block is stored near the end of a Table file.  It contains
+// filters (e.g., bloom filters) for all data blocks in the table combined
+// into a single filter block.
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/table.h"
+#include "table/format.h"
+#include "table/multiget_context.h"
+#include "trace_replay/block_cache_tracer.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const uint64_t kNotValid = ULLONG_MAX;
+class FilterPolicy;
+
+class GetContext;
+using MultiGetRange = MultiGetContext::Range;
+
+// A FilterBlockBuilder is used to construct all of the filters for a
+// particular Table.  It generates a single string which is stored as
+// a special block in the Table, or partitioned into smaller filters.
+//
+// The sequence of calls to FilterBlockBuilder must match the regexp:
+//      Add* Finish
+class FilterBlockBuilder {
+ public:
+  explicit FilterBlockBuilder() {}
+  // No copying allowed
+  FilterBlockBuilder(const FilterBlockBuilder&) = delete;
+  void operator=(const FilterBlockBuilder&) = delete;
+
+  virtual ~FilterBlockBuilder() {}
+
+  virtual void Add(
+      const Slice& key_without_ts) = 0;  // Add a key to current filter
+  virtual bool IsEmpty() const = 0;      // Empty == none added
+  // For reporting stats on how many entries the builder considered unique
+  virtual size_t EstimateEntriesAdded() = 0;
+  Slice Finish() {  // Generate Filter
+    const BlockHandle empty_handle;
+    Status dont_care_status;
+    auto ret = Finish(empty_handle, &dont_care_status);
+    assert(dont_care_status.ok());
+    return ret;
+  }
+  // If filter_data is not nullptr, Finish() may transfer ownership of
+  // underlying filter data to the caller,  so that it can be freed as soon as
+  // possible. BlockBasedFilterBlock will ignore this parameter.
+  //
+  virtual Slice Finish(
+      const BlockHandle& tmp /* only used in PartitionedFilterBlock as
+                                last_partition_block_handle */
+      ,
+      Status* status, std::unique_ptr<const char[]>* filter_data = nullptr) = 0;
+
+  // This is called when finishes using the FilterBitsBuilder
+  // in order to release memory usage and cache charge
+  // associated with it timely
+  virtual void ResetFilterBitsBuilder() {}
+
+  // To optionally post-verify the filter returned from
+  // FilterBlockBuilder::Finish.
+  // Return Status::OK() if skipped.
+  virtual Status MaybePostVerifyFilter(const Slice& /* filter_content */) {
+    return Status::OK();
+  }
+};
+
+// A FilterBlockReader is used to parse filter from SST table.
+// KeyMayMatch and PrefixMayMatch would trigger filter checking
+//
+// BlockBased/Full FilterBlock would be called in the same way.
+class FilterBlockReader {
+ public:
+  FilterBlockReader() = default;
+  virtual ~FilterBlockReader() = default;
+
+  FilterBlockReader(const FilterBlockReader&) = delete;
+  FilterBlockReader& operator=(const FilterBlockReader&) = delete;
+
+  /**
+   * If no_io is set, then it returns true if it cannot answer the query without
+   * reading data from disk. This is used in PartitionedFilterBlockReader to
+   * avoid reading partitions that are not in block cache already
+   *
+   * Normally filters are built on only the user keys and the InternalKey is not
+   * needed for a query. The index in PartitionedFilterBlockReader however is
+   * built upon InternalKey and must be provided via const_ikey_ptr when running
+   * queries.
+   */
+  virtual bool KeyMayMatch(const Slice& key, const bool no_io,
+                           const Slice* const const_ikey_ptr,
+                           GetContext* get_context,
+                           BlockCacheLookupContext* lookup_context,
+                           const ReadOptions& read_options) = 0;
+
+  virtual void KeysMayMatch(MultiGetRange* range, const bool no_io,
+                            BlockCacheLookupContext* lookup_context,
+                            const ReadOptions& read_options) {
+    for (auto iter = range->begin(); iter != range->end(); ++iter) {
+      const Slice ukey_without_ts = iter->ukey_without_ts;
+      const Slice ikey = iter->ikey;
+      GetContext* const get_context = iter->get_context;
+      if (!KeyMayMatch(ukey_without_ts, no_io, &ikey, get_context,
+                       lookup_context, read_options)) {
+        range->SkipKey(iter);
+      }
+    }
+  }
+
+  /**
+   * no_io and const_ikey_ptr here means the same as in KeyMayMatch
+   */
+  virtual bool PrefixMayMatch(const Slice& prefix, const bool no_io,
+                              const Slice* const const_ikey_ptr,
+                              GetContext* get_context,
+                              BlockCacheLookupContext* lookup_context,
+                              const ReadOptions& read_options) = 0;
+
+  virtual void PrefixesMayMatch(MultiGetRange* range,
+                                const SliceTransform* prefix_extractor,
+                                const bool no_io,
+                                BlockCacheLookupContext* lookup_context,
+                                const ReadOptions& read_options) {
+    for (auto iter = range->begin(); iter != range->end(); ++iter) {
+      const Slice ukey_without_ts = iter->ukey_without_ts;
+      const Slice ikey = iter->ikey;
+      GetContext* const get_context = iter->get_context;
+      if (prefix_extractor->InDomain(ukey_without_ts) &&
+          !PrefixMayMatch(prefix_extractor->Transform(ukey_without_ts), no_io,
+                          &ikey, get_context, lookup_context, read_options)) {
+        range->SkipKey(iter);
+      }
+    }
+  }
+
+  virtual size_t ApproximateMemoryUsage() const = 0;
+
+  // convert this object to a human readable form
+  virtual std::string ToString() const {
+    std::string error_msg("Unsupported filter \n");
+    return error_msg;
+  }
+
+  virtual Status CacheDependencies(
+      const ReadOptions& /*ro*/, bool /*pin*/,
+      FilePrefetchBuffer* /* tail_prefetch_buffer */) {
+    return Status::OK();
+  }
+
+  virtual bool RangeMayExist(const Slice* /*iterate_upper_bound*/,
+                             const Slice& user_key_without_ts,
+                             const SliceTransform* prefix_extractor,
+                             const Comparator* /*comparator*/,
+                             const Slice* const const_ikey_ptr,
+                             bool* filter_checked, bool need_upper_bound_check,
+                             bool no_io,
+                             BlockCacheLookupContext* lookup_context,
+                             const ReadOptions& read_options) = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_block_reader_common.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_block_reader_common.cc
new file mode 100644
index 0000000000..32f800db79
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_block_reader_common.cc
@@ -0,0 +1,163 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include "table/block_based/filter_block_reader_common.h"
+
+#include "block_cache.h"
+#include "monitoring/perf_context_imp.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "table/block_based/parsed_full_filter_block.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+template <typename TBlocklike>
+Status FilterBlockReaderCommon<TBlocklike>::ReadFilterBlock(
+    const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer,
+    const ReadOptions& read_options, bool use_cache, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context,
+    CachableEntry<TBlocklike>* filter_block) {
+  PERF_TIMER_GUARD(read_filter_block_nanos);
+
+  assert(table);
+  assert(filter_block);
+  assert(filter_block->IsEmpty());
+
+  const BlockBasedTable::Rep* const rep = table->get_rep();
+  assert(rep);
+
+  const Status s =
+      table->RetrieveBlock(prefetch_buffer, read_options, rep->filter_handle,
+                           UncompressionDict::GetEmptyDict(), filter_block,
+                           get_context, lookup_context,
+                           /* for_compaction */ false, use_cache,
+                           /* async_read */ false);
+
+  return s;
+}
+
+template <typename TBlocklike>
+const SliceTransform*
+FilterBlockReaderCommon<TBlocklike>::table_prefix_extractor() const {
+  assert(table_);
+
+  const BlockBasedTable::Rep* const rep = table_->get_rep();
+  assert(rep);
+
+  return rep->prefix_filtering ? rep->table_prefix_extractor.get() : nullptr;
+}
+
+template <typename TBlocklike>
+bool FilterBlockReaderCommon<TBlocklike>::whole_key_filtering() const {
+  assert(table_);
+  assert(table_->get_rep());
+
+  return table_->get_rep()->whole_key_filtering;
+}
+
+template <typename TBlocklike>
+bool FilterBlockReaderCommon<TBlocklike>::cache_filter_blocks() const {
+  assert(table_);
+  assert(table_->get_rep());
+
+  return table_->get_rep()->table_options.cache_index_and_filter_blocks;
+}
+
+template <typename TBlocklike>
+Status FilterBlockReaderCommon<TBlocklike>::GetOrReadFilterBlock(
+    bool no_io, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context,
+    CachableEntry<TBlocklike>* filter_block,
+    const ReadOptions& read_options) const {
+  assert(filter_block);
+
+  if (!filter_block_.IsEmpty()) {
+    filter_block->SetUnownedValue(filter_block_.GetValue());
+    return Status::OK();
+  }
+
+  ReadOptions ro = read_options;
+  if (no_io) {
+    ro.read_tier = kBlockCacheTier;
+  }
+
+  return ReadFilterBlock(table_, nullptr /* prefetch_buffer */, ro,
+                         cache_filter_blocks(), get_context, lookup_context,
+                         filter_block);
+}
+
+template <typename TBlocklike>
+size_t FilterBlockReaderCommon<TBlocklike>::ApproximateFilterBlockMemoryUsage()
+    const {
+  assert(!filter_block_.GetOwnValue() || filter_block_.GetValue() != nullptr);
+  return filter_block_.GetOwnValue()
+             ? filter_block_.GetValue()->ApproximateMemoryUsage()
+             : 0;
+}
+
+template <typename TBlocklike>
+bool FilterBlockReaderCommon<TBlocklike>::RangeMayExist(
+    const Slice* iterate_upper_bound, const Slice& user_key_without_ts,
+    const SliceTransform* prefix_extractor, const Comparator* comparator,
+    const Slice* const const_ikey_ptr, bool* filter_checked,
+    bool need_upper_bound_check, bool no_io,
+    BlockCacheLookupContext* lookup_context, const ReadOptions& read_options) {
+  if (!prefix_extractor || !prefix_extractor->InDomain(user_key_without_ts)) {
+    *filter_checked = false;
+    return true;
+  }
+  Slice prefix = prefix_extractor->Transform(user_key_without_ts);
+  if (need_upper_bound_check &&
+      !IsFilterCompatible(iterate_upper_bound, prefix, comparator)) {
+    *filter_checked = false;
+    return true;
+  } else {
+    *filter_checked = true;
+    return PrefixMayMatch(prefix, no_io, const_ikey_ptr,
+                          /* get_context */ nullptr, lookup_context,
+                          read_options);
+  }
+}
+
+template <typename TBlocklike>
+bool FilterBlockReaderCommon<TBlocklike>::IsFilterCompatible(
+    const Slice* iterate_upper_bound, const Slice& prefix,
+    const Comparator* comparator) const {
+  // Try to reuse the bloom filter in the SST table if prefix_extractor in
+  // mutable_cf_options has changed. If range [user_key, upper_bound) all
+  // share the same prefix then we may still be able to use the bloom filter.
+  const SliceTransform* const prefix_extractor = table_prefix_extractor();
+  if (iterate_upper_bound != nullptr && prefix_extractor) {
+    if (!prefix_extractor->InDomain(*iterate_upper_bound)) {
+      return false;
+    }
+    Slice upper_bound_xform = prefix_extractor->Transform(*iterate_upper_bound);
+    // first check if user_key and upper_bound all share the same prefix
+    if (comparator->CompareWithoutTimestamp(prefix, false, upper_bound_xform,
+                                            false) != 0) {
+      // second check if user_key's prefix is the immediate predecessor of
+      // upper_bound and have the same length. If so, we know for sure all
+      // keys in the range [user_key, upper_bound) share the same prefix.
+      // Also need to make sure upper_bound are full length to ensure
+      // correctness
+      if (!full_length_enabled_ ||
+          iterate_upper_bound->size() != prefix_extractor_full_length_ ||
+          !comparator->IsSameLengthImmediateSuccessor(prefix,
+                                                      *iterate_upper_bound)) {
+        return false;
+      }
+    }
+    return true;
+  } else {
+    return false;
+  }
+}
+
+// Explicitly instantiate templates for both "blocklike" types we use.
+// This makes it possible to keep the template definitions in the .cc file.
+template class FilterBlockReaderCommon<Block_kFilterPartitionIndex>;
+template class FilterBlockReaderCommon<ParsedFullFilterBlock>;
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_block_reader_common.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_block_reader_common.h
new file mode 100644
index 0000000000..62335b30be
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_block_reader_common.h
@@ -0,0 +1,76 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+
+#include <cassert>
+
+#include "table/block_based/cachable_entry.h"
+#include "table/block_based/filter_block.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class BlockBasedTable;
+class FilePrefetchBuffer;
+
+// Encapsulates common functionality for the various filter block reader
+// implementations. Provides access to the filter block regardless of whether
+// it is owned by the reader or stored in the cache, or whether it is pinned
+// in the cache or not.
+template <typename TBlocklike>
+class FilterBlockReaderCommon : public FilterBlockReader {
+ public:
+  FilterBlockReaderCommon(const BlockBasedTable* t,
+                          CachableEntry<TBlocklike>&& filter_block)
+      : table_(t), filter_block_(std::move(filter_block)) {
+    assert(table_);
+    const SliceTransform* const prefix_extractor = table_prefix_extractor();
+    if (prefix_extractor) {
+      full_length_enabled_ =
+          prefix_extractor->FullLengthEnabled(&prefix_extractor_full_length_);
+    }
+  }
+
+  bool RangeMayExist(const Slice* iterate_upper_bound, const Slice& user_key,
+                     const SliceTransform* prefix_extractor,
+                     const Comparator* comparator,
+                     const Slice* const const_ikey_ptr, bool* filter_checked,
+                     bool need_upper_bound_check, bool no_io,
+                     BlockCacheLookupContext* lookup_context,
+                     const ReadOptions& read_options) override;
+
+ protected:
+  static Status ReadFilterBlock(const BlockBasedTable* table,
+                                FilePrefetchBuffer* prefetch_buffer,
+                                const ReadOptions& read_options, bool use_cache,
+                                GetContext* get_context,
+                                BlockCacheLookupContext* lookup_context,
+                                CachableEntry<TBlocklike>* filter_block);
+
+  const BlockBasedTable* table() const { return table_; }
+  const SliceTransform* table_prefix_extractor() const;
+  bool whole_key_filtering() const;
+  bool cache_filter_blocks() const;
+
+  Status GetOrReadFilterBlock(bool no_io, GetContext* get_context,
+                              BlockCacheLookupContext* lookup_context,
+                              CachableEntry<TBlocklike>* filter_block,
+                              const ReadOptions& read_options) const;
+
+  size_t ApproximateFilterBlockMemoryUsage() const;
+
+ private:
+  bool IsFilterCompatible(const Slice* iterate_upper_bound, const Slice& prefix,
+                          const Comparator* comparator) const;
+
+ private:
+  const BlockBasedTable* table_;
+  CachableEntry<TBlocklike> filter_block_;
+  size_t prefix_extractor_full_length_ = 0;
+  bool full_length_enabled_ = false;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_policy.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_policy.cc
new file mode 100644
index 0000000000..36f3b16d4b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_policy.cc
@@ -0,0 +1,1966 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/filter_policy.h"
+
+#include <array>
+#include <climits>
+#include <cstring>
+#include <deque>
+#include <limits>
+#include <memory>
+
+#include "cache/cache_entry_roles.h"
+#include "cache/cache_reservation_manager.h"
+#include "logging/logging.h"
+#include "port/lang.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "table/block_based/filter_policy_internal.h"
+#include "table/block_based/full_filter_block.h"
+#include "util/bloom_impl.h"
+#include "util/coding.h"
+#include "util/hash.h"
+#include "util/math.h"
+#include "util/ribbon_config.h"
+#include "util/ribbon_impl.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+// Metadata trailer size for built-in filters. (This is separate from
+// block-based table block trailer.)
+//
+// Originally this was 1 byte for num_probes and 4 bytes for number of
+// cache lines in the Bloom filter, but now the first trailer byte is
+// usually an implementation marker and remaining 4 bytes have various
+// meanings.
+static constexpr uint32_t kMetadataLen = 5;
+
+Slice FinishAlwaysFalse(std::unique_ptr<const char[]>* /*buf*/) {
+  // Missing metadata, treated as zero entries
+  return Slice(nullptr, 0);
+}
+
+Slice FinishAlwaysTrue(std::unique_ptr<const char[]>* /*buf*/) {
+  return Slice("\0\0\0\0\0\0", 6);
+}
+
+// Base class for filter builders using the XXH3 preview hash,
+// also known as Hash64 or GetSliceHash64.
+class XXPH3FilterBitsBuilder : public BuiltinFilterBitsBuilder {
+ public:
+  explicit XXPH3FilterBitsBuilder(
+      std::atomic<int64_t>* aggregate_rounding_balance,
+      std::shared_ptr<CacheReservationManager> cache_res_mgr,
+      bool detect_filter_construct_corruption)
+      : aggregate_rounding_balance_(aggregate_rounding_balance),
+        cache_res_mgr_(cache_res_mgr),
+        detect_filter_construct_corruption_(
+            detect_filter_construct_corruption) {}
+
+  ~XXPH3FilterBitsBuilder() override {}
+
+  virtual void AddKey(const Slice& key) override {
+    uint64_t hash = GetSliceHash64(key);
+    // Especially with prefixes, it is common to have repetition,
+    // though only adjacent repetition, which we want to immediately
+    // recognize and collapse for estimating true filter space
+    // requirements.
+    if (hash_entries_info_.entries.empty() ||
+        hash != hash_entries_info_.entries.back()) {
+      if (detect_filter_construct_corruption_) {
+        hash_entries_info_.xor_checksum ^= hash;
+      }
+      hash_entries_info_.entries.push_back(hash);
+      if (cache_res_mgr_ &&
+          // Traditional rounding to whole bucket size
+          ((hash_entries_info_.entries.size() %
+            kUint64tHashEntryCacheResBucketSize) ==
+           kUint64tHashEntryCacheResBucketSize / 2)) {
+        hash_entries_info_.cache_res_bucket_handles.emplace_back(nullptr);
+        Status s = cache_res_mgr_->MakeCacheReservation(
+            kUint64tHashEntryCacheResBucketSize * sizeof(hash),
+            &hash_entries_info_.cache_res_bucket_handles.back());
+        s.PermitUncheckedError();
+      }
+    }
+  }
+
+  virtual size_t EstimateEntriesAdded() override {
+    return hash_entries_info_.entries.size();
+  }
+
+  virtual Status MaybePostVerify(const Slice& filter_content) override;
+
+ protected:
+  static constexpr uint32_t kMetadataLen = 5;
+
+  // Number of hash entries to accumulate before charging their memory usage to
+  // the cache when cache charging is available
+  static const std::size_t kUint64tHashEntryCacheResBucketSize =
+      CacheReservationManagerImpl<
+          CacheEntryRole::kFilterConstruction>::GetDummyEntrySize() /
+      sizeof(uint64_t);
+
+  // For delegating between XXPH3FilterBitsBuilders
+  void SwapEntriesWith(XXPH3FilterBitsBuilder* other) {
+    assert(other != nullptr);
+    hash_entries_info_.Swap(&(other->hash_entries_info_));
+  }
+
+  void ResetEntries() { hash_entries_info_.Reset(); }
+
+  virtual size_t RoundDownUsableSpace(size_t available_size) = 0;
+
+  // To choose size using malloc_usable_size, we have to actually allocate.
+  size_t AllocateMaybeRounding(size_t target_len_with_metadata,
+                               size_t num_entries,
+                               std::unique_ptr<char[]>* buf) {
+    // Return value set to a default; overwritten in some cases
+    size_t rv = target_len_with_metadata;
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+    if (aggregate_rounding_balance_ != nullptr) {
+      // Do optimize_filters_for_memory, using malloc_usable_size.
+      // Approach: try to keep FP rate balance better than or on
+      // target (negative aggregate_rounding_balance_). We can then select a
+      // lower bound filter size (within reasonable limits) that gets us as
+      // close to on target as possible. We request allocation for that filter
+      // size and use malloc_usable_size to "round up" to the actual
+      // allocation size.
+
+      // Although it can be considered bad practice to use malloc_usable_size
+      // to access an object beyond its original size, this approach should be
+      // quite general: working for all allocators that properly support
+      // malloc_usable_size.
+
+      // Race condition on balance is OK because it can only cause temporary
+      // skew in rounding up vs. rounding down, as long as updates are atomic
+      // and relative.
+      int64_t balance = aggregate_rounding_balance_->load();
+
+      double target_fp_rate =
+          EstimatedFpRate(num_entries, target_len_with_metadata);
+      double rv_fp_rate = target_fp_rate;
+
+      if (balance < 0) {
+        // See formula for BloomFilterPolicy::aggregate_rounding_balance_
+        double for_balance_fp_rate =
+            -balance / double{0x100000000} + target_fp_rate;
+
+        // To simplify, we just try a few modified smaller sizes. This also
+        // caps how much we vary filter size vs. target, to avoid outlier
+        // behavior from excessive variance.
+        size_t target_len = target_len_with_metadata - kMetadataLen;
+        assert(target_len < target_len_with_metadata);  // check underflow
+        for (uint64_t maybe_len_rough :
+             {uint64_t{3} * target_len / 4, uint64_t{13} * target_len / 16,
+              uint64_t{7} * target_len / 8, uint64_t{15} * target_len / 16}) {
+          size_t maybe_len_with_metadata =
+              RoundDownUsableSpace(maybe_len_rough + kMetadataLen);
+          double maybe_fp_rate =
+              EstimatedFpRate(num_entries, maybe_len_with_metadata);
+          if (maybe_fp_rate <= for_balance_fp_rate) {
+            rv = maybe_len_with_metadata;
+            rv_fp_rate = maybe_fp_rate;
+            break;
+          }
+        }
+      }
+
+      // Filter blocks are loaded into block cache with their block trailer.
+      // We need to make sure that's accounted for in choosing a
+      // fragmentation-friendly size.
+      const size_t kExtraPadding = BlockBasedTable::kBlockTrailerSize;
+      size_t requested = rv + kExtraPadding;
+
+      // Allocate and get usable size
+      buf->reset(new char[requested]);
+      size_t usable = malloc_usable_size(buf->get());
+
+      if (usable - usable / 4 > requested) {
+        // Ratio greater than 4/3 is too much for utilizing, if it's
+        // not a buggy or mislinked malloc_usable_size implementation.
+        // Non-linearity of FP rates with bits/key means rapidly
+        // diminishing returns in overall accuracy for additional
+        // storage on disk.
+        // Nothing to do, except assert that the result is accurate about
+        // the usable size. (Assignment never used.)
+        assert(((*buf)[usable - 1] = 'x'));
+      } else if (usable > requested) {
+        rv = RoundDownUsableSpace(usable - kExtraPadding);
+        assert(rv <= usable - kExtraPadding);
+        rv_fp_rate = EstimatedFpRate(num_entries, rv);
+      } else {
+        // Too small means bad malloc_usable_size
+        assert(usable == requested);
+      }
+      memset(buf->get(), 0, rv);
+
+      // Update balance
+      int64_t diff = static_cast<int64_t>((rv_fp_rate - target_fp_rate) *
+                                          double{0x100000000});
+      *aggregate_rounding_balance_ += diff;
+    } else {
+      buf->reset(new char[rv]());
+    }
+#else
+    (void)num_entries;
+    buf->reset(new char[rv]());
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+    return rv;
+  }
+
+  // TODO: Ideally we want to verify the hash entry
+  // as it is added to the filter and eliminate this function
+  // for speeding up and leaving fewer spaces for undetected memory/CPU
+  // corruption. For Ribbon Filter, it's bit harder.
+  // Possible solution:
+  // pass a custom iterator that tracks the xor checksum as
+  // it iterates to ResetAndFindSeedToSolve
+  Status MaybeVerifyHashEntriesChecksum() {
+    if (!detect_filter_construct_corruption_) {
+      return Status::OK();
+    }
+
+    uint64_t actual_hash_entries_xor_checksum = 0;
+    for (uint64_t h : hash_entries_info_.entries) {
+      actual_hash_entries_xor_checksum ^= h;
+    }
+
+    if (actual_hash_entries_xor_checksum == hash_entries_info_.xor_checksum) {
+      return Status::OK();
+    } else {
+      // Since these hash entries are corrupted and they will not be used
+      // anymore, we can reset them and release memory.
+      ResetEntries();
+      return Status::Corruption("Filter's hash entries checksum mismatched");
+    }
+  }
+
+  // See BloomFilterPolicy::aggregate_rounding_balance_. If nullptr,
+  // always "round up" like historic behavior.
+  std::atomic<int64_t>* aggregate_rounding_balance_;
+
+  // For reserving memory used in (new) Bloom and Ribbon Filter construction
+  std::shared_ptr<CacheReservationManager> cache_res_mgr_;
+
+  // For managing cache charge for final filter in (new) Bloom and Ribbon
+  // Filter construction
+  std::deque<std::unique_ptr<CacheReservationManager::CacheReservationHandle>>
+      final_filter_cache_res_handles_;
+
+  bool detect_filter_construct_corruption_;
+
+  struct HashEntriesInfo {
+    // A deque avoids unnecessary copying of already-saved values
+    // and has near-minimal peak memory use.
+    std::deque<uint64_t> entries;
+
+    // If cache_res_mgr_ != nullptr,
+    // it manages cache charge for buckets of hash entries in (new) Bloom
+    // or Ribbon Filter construction.
+    // Otherwise, it is empty.
+    std::deque<std::unique_ptr<CacheReservationManager::CacheReservationHandle>>
+        cache_res_bucket_handles;
+
+    // If detect_filter_construct_corruption_ == true,
+    // it records the xor checksum of hash entries.
+    // Otherwise, it is 0.
+    uint64_t xor_checksum = 0;
+
+    void Swap(HashEntriesInfo* other) {
+      assert(other != nullptr);
+      std::swap(entries, other->entries);
+      std::swap(cache_res_bucket_handles, other->cache_res_bucket_handles);
+      std::swap(xor_checksum, other->xor_checksum);
+    }
+
+    void Reset() {
+      entries.clear();
+      cache_res_bucket_handles.clear();
+      xor_checksum = 0;
+    }
+  };
+
+  HashEntriesInfo hash_entries_info_;
+};
+
+// #################### FastLocalBloom implementation ################## //
+// ############## also known as format_version=5 Bloom filter ########## //
+
+// See description in FastLocalBloomImpl
+class FastLocalBloomBitsBuilder : public XXPH3FilterBitsBuilder {
+ public:
+  // Non-null aggregate_rounding_balance implies optimize_filters_for_memory
+  explicit FastLocalBloomBitsBuilder(
+      const int millibits_per_key,
+      std::atomic<int64_t>* aggregate_rounding_balance,
+      std::shared_ptr<CacheReservationManager> cache_res_mgr,
+      bool detect_filter_construct_corruption)
+      : XXPH3FilterBitsBuilder(aggregate_rounding_balance, cache_res_mgr,
+                               detect_filter_construct_corruption),
+        millibits_per_key_(millibits_per_key) {
+    assert(millibits_per_key >= 1000);
+  }
+
+  // No Copy allowed
+  FastLocalBloomBitsBuilder(const FastLocalBloomBitsBuilder&) = delete;
+  void operator=(const FastLocalBloomBitsBuilder&) = delete;
+
+  ~FastLocalBloomBitsBuilder() override {}
+
+  using FilterBitsBuilder::Finish;
+
+  virtual Slice Finish(std::unique_ptr<const char[]>* buf) override {
+    return Finish(buf, nullptr);
+  }
+
+  virtual Slice Finish(std::unique_ptr<const char[]>* buf,
+                       Status* status) override {
+    size_t num_entries = hash_entries_info_.entries.size();
+    size_t len_with_metadata = CalculateSpace(num_entries);
+
+    std::unique_ptr<char[]> mutable_buf;
+    std::unique_ptr<CacheReservationManager::CacheReservationHandle>
+        final_filter_cache_res_handle;
+    len_with_metadata =
+        AllocateMaybeRounding(len_with_metadata, num_entries, &mutable_buf);
+    // Cache charging for mutable_buf
+    if (cache_res_mgr_) {
+      Status s = cache_res_mgr_->MakeCacheReservation(
+          len_with_metadata * sizeof(char), &final_filter_cache_res_handle);
+      s.PermitUncheckedError();
+    }
+
+    assert(mutable_buf);
+    assert(len_with_metadata >= kMetadataLen);
+
+    // Max size supported by implementation
+    assert(len_with_metadata <= 0xffffffffU);
+
+    // Compute num_probes after any rounding / adjustments
+    int num_probes = GetNumProbes(num_entries, len_with_metadata);
+
+    uint32_t len = static_cast<uint32_t>(len_with_metadata - kMetadataLen);
+    if (len > 0) {
+      TEST_SYNC_POINT_CALLBACK(
+          "XXPH3FilterBitsBuilder::Finish::"
+          "TamperHashEntries",
+          &hash_entries_info_.entries);
+      AddAllEntries(mutable_buf.get(), len, num_probes);
+      Status verify_hash_entries_checksum_status =
+          MaybeVerifyHashEntriesChecksum();
+      if (!verify_hash_entries_checksum_status.ok()) {
+        if (status) {
+          *status = verify_hash_entries_checksum_status;
+        }
+        return FinishAlwaysTrue(buf);
+      }
+    }
+
+    bool keep_entries_for_postverify = detect_filter_construct_corruption_;
+    if (!keep_entries_for_postverify) {
+      ResetEntries();
+    }
+
+    // See BloomFilterPolicy::GetBloomBitsReader re: metadata
+    // -1 = Marker for newer Bloom implementations
+    mutable_buf[len] = static_cast<char>(-1);
+    // 0 = Marker for this sub-implementation
+    mutable_buf[len + 1] = static_cast<char>(0);
+    // num_probes (and 0 in upper bits for 64-byte block size)
+    mutable_buf[len + 2] = static_cast<char>(num_probes);
+    // rest of metadata stays zero
+
+    auto TEST_arg_pair __attribute__((__unused__)) =
+        std::make_pair(&mutable_buf, len_with_metadata);
+    TEST_SYNC_POINT_CALLBACK("XXPH3FilterBitsBuilder::Finish::TamperFilter",
+                             &TEST_arg_pair);
+
+    Slice rv(mutable_buf.get(), len_with_metadata);
+    *buf = std::move(mutable_buf);
+    final_filter_cache_res_handles_.push_back(
+        std::move(final_filter_cache_res_handle));
+    if (status) {
+      *status = Status::OK();
+    }
+    return rv;
+  }
+
+  size_t ApproximateNumEntries(size_t bytes) override {
+    size_t bytes_no_meta =
+        bytes >= kMetadataLen ? RoundDownUsableSpace(bytes) - kMetadataLen : 0;
+    return static_cast<size_t>(uint64_t{8000} * bytes_no_meta /
+                               millibits_per_key_);
+  }
+
+  size_t CalculateSpace(size_t num_entries) override {
+    // If not for cache line blocks in the filter, what would the target
+    // length in bytes be?
+    size_t raw_target_len = static_cast<size_t>(
+        (uint64_t{num_entries} * millibits_per_key_ + 7999) / 8000);
+
+    if (raw_target_len >= size_t{0xffffffc0}) {
+      // Max supported for this data structure implementation
+      raw_target_len = size_t{0xffffffc0};
+    }
+
+    // Round up to nearest multiple of 64 (block size). This adjustment is
+    // used for target FP rate only so that we don't receive complaints about
+    // lower FP rate vs. historic Bloom filter behavior.
+    return ((raw_target_len + 63) & ~size_t{63}) + kMetadataLen;
+  }
+
+  double EstimatedFpRate(size_t keys, size_t len_with_metadata) override {
+    int num_probes = GetNumProbes(keys, len_with_metadata);
+    return FastLocalBloomImpl::EstimatedFpRate(
+        keys, len_with_metadata - kMetadataLen, num_probes, /*hash bits*/ 64);
+  }
+
+ protected:
+  size_t RoundDownUsableSpace(size_t available_size) override {
+    size_t rv = available_size - kMetadataLen;
+
+    if (rv >= size_t{0xffffffc0}) {
+      // Max supported for this data structure implementation
+      rv = size_t{0xffffffc0};
+    }
+
+    // round down to multiple of 64 (block size)
+    rv &= ~size_t{63};
+
+    return rv + kMetadataLen;
+  }
+
+ private:
+  // Compute num_probes after any rounding / adjustments
+  int GetNumProbes(size_t keys, size_t len_with_metadata) {
+    uint64_t millibits = uint64_t{len_with_metadata - kMetadataLen} * 8000;
+    int actual_millibits_per_key =
+        static_cast<int>(millibits / std::max(keys, size_t{1}));
+    // BEGIN XXX/TODO(peterd): preserving old/default behavior for now to
+    // minimize unit test churn. Remove this some time.
+    if (!aggregate_rounding_balance_) {
+      actual_millibits_per_key = millibits_per_key_;
+    }
+    // END XXX/TODO
+    return FastLocalBloomImpl::ChooseNumProbes(actual_millibits_per_key);
+  }
+
+  void AddAllEntries(char* data, uint32_t len, int num_probes) {
+    // Simple version without prefetching:
+    //
+    // for (auto h : hash_entries_info_.entries) {
+    //   FastLocalBloomImpl::AddHash(Lower32of64(h), Upper32of64(h), len,
+    //                               num_probes, data);
+    // }
+
+    const size_t num_entries = hash_entries_info_.entries.size();
+    constexpr size_t kBufferMask = 7;
+    static_assert(((kBufferMask + 1) & kBufferMask) == 0,
+                  "Must be power of 2 minus 1");
+
+    std::array<uint32_t, kBufferMask + 1> hashes;
+    std::array<uint32_t, kBufferMask + 1> byte_offsets;
+
+    // Prime the buffer
+    size_t i = 0;
+    std::deque<uint64_t>::iterator hash_entries_it =
+        hash_entries_info_.entries.begin();
+    for (; i <= kBufferMask && i < num_entries; ++i) {
+      uint64_t h = *hash_entries_it;
+      FastLocalBloomImpl::PrepareHash(Lower32of64(h), len, data,
+                                      /*out*/ &byte_offsets[i]);
+      hashes[i] = Upper32of64(h);
+      ++hash_entries_it;
+    }
+
+    // Process and buffer
+    for (; i < num_entries; ++i) {
+      uint32_t& hash_ref = hashes[i & kBufferMask];
+      uint32_t& byte_offset_ref = byte_offsets[i & kBufferMask];
+      // Process (add)
+      FastLocalBloomImpl::AddHashPrepared(hash_ref, num_probes,
+                                          data + byte_offset_ref);
+      // And buffer
+      uint64_t h = *hash_entries_it;
+      FastLocalBloomImpl::PrepareHash(Lower32of64(h), len, data,
+                                      /*out*/ &byte_offset_ref);
+      hash_ref = Upper32of64(h);
+      ++hash_entries_it;
+    }
+
+    // Finish processing
+    for (i = 0; i <= kBufferMask && i < num_entries; ++i) {
+      FastLocalBloomImpl::AddHashPrepared(hashes[i], num_probes,
+                                          data + byte_offsets[i]);
+    }
+  }
+
+  // Target allocation per added key, in thousandths of a bit.
+  int millibits_per_key_;
+};
+
+// See description in FastLocalBloomImpl
+class FastLocalBloomBitsReader : public BuiltinFilterBitsReader {
+ public:
+  FastLocalBloomBitsReader(const char* data, int num_probes, uint32_t len_bytes)
+      : data_(data), num_probes_(num_probes), len_bytes_(len_bytes) {}
+
+  // No Copy allowed
+  FastLocalBloomBitsReader(const FastLocalBloomBitsReader&) = delete;
+  void operator=(const FastLocalBloomBitsReader&) = delete;
+
+  ~FastLocalBloomBitsReader() override {}
+
+  bool MayMatch(const Slice& key) override {
+    uint64_t h = GetSliceHash64(key);
+    uint32_t byte_offset;
+    FastLocalBloomImpl::PrepareHash(Lower32of64(h), len_bytes_, data_,
+                                    /*out*/ &byte_offset);
+    return FastLocalBloomImpl::HashMayMatchPrepared(Upper32of64(h), num_probes_,
+                                                    data_ + byte_offset);
+  }
+
+  virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override {
+    std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
+    std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
+    for (int i = 0; i < num_keys; ++i) {
+      uint64_t h = GetSliceHash64(*keys[i]);
+      FastLocalBloomImpl::PrepareHash(Lower32of64(h), len_bytes_, data_,
+                                      /*out*/ &byte_offsets[i]);
+      hashes[i] = Upper32of64(h);
+    }
+    for (int i = 0; i < num_keys; ++i) {
+      may_match[i] = FastLocalBloomImpl::HashMayMatchPrepared(
+          hashes[i], num_probes_, data_ + byte_offsets[i]);
+    }
+  }
+
+  bool HashMayMatch(const uint64_t h) override {
+    return FastLocalBloomImpl::HashMayMatch(Lower32of64(h), Upper32of64(h),
+                                            len_bytes_, num_probes_, data_);
+  }
+
+ private:
+  const char* data_;
+  const int num_probes_;
+  const uint32_t len_bytes_;
+};
+
+// ##################### Ribbon filter implementation ################### //
+
+// Implements concept RehasherTypesAndSettings in ribbon_impl.h
+struct Standard128RibbonRehasherTypesAndSettings {
+  // These are schema-critical. Any change almost certainly changes
+  // underlying data.
+  static constexpr bool kIsFilter = true;
+  static constexpr bool kHomogeneous = false;
+  static constexpr bool kFirstCoeffAlwaysOne = true;
+  static constexpr bool kUseSmash = false;
+  using CoeffRow = ROCKSDB_NAMESPACE::Unsigned128;
+  using Hash = uint64_t;
+  using Seed = uint32_t;
+  // Changing these doesn't necessarily change underlying data,
+  // but might affect supported scalability of those dimensions.
+  using Index = uint32_t;
+  using ResultRow = uint32_t;
+  // Save a conditional in Ribbon queries
+  static constexpr bool kAllowZeroStarts = false;
+};
+
+using Standard128RibbonTypesAndSettings =
+    ribbon::StandardRehasherAdapter<Standard128RibbonRehasherTypesAndSettings>;
+
+class Standard128RibbonBitsBuilder : public XXPH3FilterBitsBuilder {
+ public:
+  explicit Standard128RibbonBitsBuilder(
+      double desired_one_in_fp_rate, int bloom_millibits_per_key,
+      std::atomic<int64_t>* aggregate_rounding_balance,
+      std::shared_ptr<CacheReservationManager> cache_res_mgr,
+      bool detect_filter_construct_corruption, Logger* info_log)
+      : XXPH3FilterBitsBuilder(aggregate_rounding_balance, cache_res_mgr,
+                               detect_filter_construct_corruption),
+        desired_one_in_fp_rate_(desired_one_in_fp_rate),
+        info_log_(info_log),
+        bloom_fallback_(bloom_millibits_per_key, aggregate_rounding_balance,
+                        cache_res_mgr, detect_filter_construct_corruption) {
+    assert(desired_one_in_fp_rate >= 1.0);
+  }
+
+  // No Copy allowed
+  Standard128RibbonBitsBuilder(const Standard128RibbonBitsBuilder&) = delete;
+  void operator=(const Standard128RibbonBitsBuilder&) = delete;
+
+  ~Standard128RibbonBitsBuilder() override {}
+
+  using FilterBitsBuilder::Finish;
+
+  virtual Slice Finish(std::unique_ptr<const char[]>* buf) override {
+    return Finish(buf, nullptr);
+  }
+
+  virtual Slice Finish(std::unique_ptr<const char[]>* buf,
+                       Status* status) override {
+    if (hash_entries_info_.entries.size() > kMaxRibbonEntries) {
+      ROCKS_LOG_WARN(
+          info_log_, "Too many keys for Ribbon filter: %llu",
+          static_cast<unsigned long long>(hash_entries_info_.entries.size()));
+      SwapEntriesWith(&bloom_fallback_);
+      assert(hash_entries_info_.entries.empty());
+      return bloom_fallback_.Finish(buf, status);
+    }
+    if (hash_entries_info_.entries.size() == 0) {
+      // Save a conditional in Ribbon queries by using alternate reader
+      // for zero entries added.
+      if (status) {
+        *status = Status::OK();
+      }
+      return FinishAlwaysFalse(buf);
+    }
+    uint32_t num_entries =
+        static_cast<uint32_t>(hash_entries_info_.entries.size());
+    uint32_t num_slots;
+    size_t len_with_metadata;
+
+    CalculateSpaceAndSlots(num_entries, &len_with_metadata, &num_slots);
+
+    // Bloom fall-back indicator
+    if (num_slots == 0) {
+      SwapEntriesWith(&bloom_fallback_);
+      assert(hash_entries_info_.entries.empty());
+      return bloom_fallback_.Finish(buf, status);
+    }
+
+    uint32_t entropy = 0;
+    if (!hash_entries_info_.entries.empty()) {
+      entropy = Lower32of64(hash_entries_info_.entries.front());
+    }
+
+    BandingType banding;
+    std::size_t bytes_banding = ribbon::StandardBanding<
+        Standard128RibbonTypesAndSettings>::EstimateMemoryUsage(num_slots);
+    Status status_banding_cache_res = Status::OK();
+
+    // Cache charging for banding
+    std::unique_ptr<CacheReservationManager::CacheReservationHandle>
+        banding_res_handle;
+    if (cache_res_mgr_) {
+      status_banding_cache_res = cache_res_mgr_->MakeCacheReservation(
+          bytes_banding, &banding_res_handle);
+    }
+
+    if (status_banding_cache_res.IsMemoryLimit()) {
+      ROCKS_LOG_WARN(info_log_,
+                     "Cache charging for Ribbon filter banding failed due "
+                     "to cache full");
+      SwapEntriesWith(&bloom_fallback_);
+      assert(hash_entries_info_.entries.empty());
+      // Release cache for banding since the banding won't be allocated
+      banding_res_handle.reset();
+      return bloom_fallback_.Finish(buf, status);
+    }
+
+    TEST_SYNC_POINT_CALLBACK(
+        "XXPH3FilterBitsBuilder::Finish::"
+        "TamperHashEntries",
+        &hash_entries_info_.entries);
+
+    bool success = banding.ResetAndFindSeedToSolve(
+        num_slots, hash_entries_info_.entries.begin(),
+        hash_entries_info_.entries.end(),
+        /*starting seed*/ entropy & 255, /*seed mask*/ 255);
+    if (!success) {
+      ROCKS_LOG_WARN(
+          info_log_, "Too many re-seeds (256) for Ribbon filter, %llu / %llu",
+          static_cast<unsigned long long>(hash_entries_info_.entries.size()),
+          static_cast<unsigned long long>(num_slots));
+      SwapEntriesWith(&bloom_fallback_);
+      assert(hash_entries_info_.entries.empty());
+      return bloom_fallback_.Finish(buf, status);
+    }
+
+    Status verify_hash_entries_checksum_status =
+        MaybeVerifyHashEntriesChecksum();
+    if (!verify_hash_entries_checksum_status.ok()) {
+      ROCKS_LOG_WARN(info_log_, "Verify hash entries checksum error: %s",
+                     verify_hash_entries_checksum_status.getState());
+      if (status) {
+        *status = verify_hash_entries_checksum_status;
+      }
+      return FinishAlwaysTrue(buf);
+    }
+
+    bool keep_entries_for_postverify = detect_filter_construct_corruption_;
+    if (!keep_entries_for_postverify) {
+      ResetEntries();
+    }
+
+    uint32_t seed = banding.GetOrdinalSeed();
+    assert(seed < 256);
+
+    std::unique_ptr<char[]> mutable_buf;
+    std::unique_ptr<CacheReservationManager::CacheReservationHandle>
+        final_filter_cache_res_handle;
+    len_with_metadata =
+        AllocateMaybeRounding(len_with_metadata, num_entries, &mutable_buf);
+    // Cache charging for mutable_buf
+    if (cache_res_mgr_) {
+      Status s = cache_res_mgr_->MakeCacheReservation(
+          len_with_metadata * sizeof(char), &final_filter_cache_res_handle);
+      s.PermitUncheckedError();
+    }
+
+    SolnType soln(mutable_buf.get(), len_with_metadata);
+    soln.BackSubstFrom(banding);
+    uint32_t num_blocks = soln.GetNumBlocks();
+    // This should be guaranteed:
+    // num_entries < 2^30
+    // => (overhead_factor < 2.0)
+    // num_entries * overhead_factor == num_slots < 2^31
+    // => (num_blocks = num_slots / 128)
+    // num_blocks < 2^24
+    assert(num_blocks < 0x1000000U);
+
+    // See BloomFilterPolicy::GetBloomBitsReader re: metadata
+    // -2 = Marker for Standard128 Ribbon
+    mutable_buf[len_with_metadata - 5] = static_cast<char>(-2);
+    // Hash seed
+    mutable_buf[len_with_metadata - 4] = static_cast<char>(seed);
+    // Number of blocks, in 24 bits
+    // (Along with bytes, we can derive other settings)
+    mutable_buf[len_with_metadata - 3] = static_cast<char>(num_blocks & 255);
+    mutable_buf[len_with_metadata - 2] =
+        static_cast<char>((num_blocks >> 8) & 255);
+    mutable_buf[len_with_metadata - 1] =
+        static_cast<char>((num_blocks >> 16) & 255);
+
+    auto TEST_arg_pair __attribute__((__unused__)) =
+        std::make_pair(&mutable_buf, len_with_metadata);
+    TEST_SYNC_POINT_CALLBACK("XXPH3FilterBitsBuilder::Finish::TamperFilter",
+                             &TEST_arg_pair);
+
+    Slice rv(mutable_buf.get(), len_with_metadata);
+    *buf = std::move(mutable_buf);
+    final_filter_cache_res_handles_.push_back(
+        std::move(final_filter_cache_res_handle));
+    if (status) {
+      *status = Status::OK();
+    }
+    return rv;
+  }
+
+  // Setting num_slots to 0 means "fall back on Bloom filter."
+  // And note this implementation does not support num_entries or num_slots
+  // beyond uint32_t; see kMaxRibbonEntries.
+  void CalculateSpaceAndSlots(size_t num_entries,
+                              size_t* target_len_with_metadata,
+                              uint32_t* num_slots) {
+    if (num_entries > kMaxRibbonEntries) {
+      // More entries than supported by this Ribbon
+      *num_slots = 0;  // use Bloom
+      *target_len_with_metadata = bloom_fallback_.CalculateSpace(num_entries);
+      return;
+    }
+    uint32_t entropy = 0;
+    if (!hash_entries_info_.entries.empty()) {
+      entropy = Upper32of64(hash_entries_info_.entries.front());
+    }
+
+    *num_slots = NumEntriesToNumSlots(static_cast<uint32_t>(num_entries));
+    *target_len_with_metadata =
+        SolnType::GetBytesForOneInFpRate(*num_slots, desired_one_in_fp_rate_,
+                                         /*rounding*/ entropy) +
+        kMetadataLen;
+
+    // Consider possible Bloom fallback for small filters
+    if (*num_slots < 1024) {
+      size_t bloom = bloom_fallback_.CalculateSpace(num_entries);
+      if (bloom < *target_len_with_metadata) {
+        *num_slots = 0;  // use Bloom
+        *target_len_with_metadata = bloom;
+        return;
+      }
+    }
+  }
+
+  size_t CalculateSpace(size_t num_entries) override {
+    if (num_entries == 0) {
+      // See FinishAlwaysFalse
+      return 0;
+    }
+    size_t target_len_with_metadata;
+    uint32_t num_slots;
+    CalculateSpaceAndSlots(num_entries, &target_len_with_metadata, &num_slots);
+    (void)num_slots;
+    return target_len_with_metadata;
+  }
+
+  // This is a somewhat ugly but reasonably fast and reasonably accurate
+  // reversal of CalculateSpace.
+  size_t ApproximateNumEntries(size_t bytes) override {
+    size_t len_no_metadata =
+        RoundDownUsableSpace(std::max(bytes, size_t{kMetadataLen})) -
+        kMetadataLen;
+
+    if (!(desired_one_in_fp_rate_ > 1.0)) {
+      // Effectively asking for 100% FP rate, or NaN etc.
+      // Note that NaN is neither < 1.0 nor > 1.0
+      return kMaxRibbonEntries;
+    }
+
+    // Find a slight under-estimate for actual average bits per slot
+    double min_real_bits_per_slot;
+    if (desired_one_in_fp_rate_ >= 1.0 + std::numeric_limits<uint32_t>::max()) {
+      // Max of 32 solution columns (result bits)
+      min_real_bits_per_slot = 32.0;
+    } else {
+      // Account for mix of b and b+1 solution columns being slightly
+      // suboptimal vs. ideal log2(1/fp_rate) bits.
+      uint32_t rounded = static_cast<uint32_t>(desired_one_in_fp_rate_);
+      int upper_bits_per_key = 1 + FloorLog2(rounded);
+      double fp_rate_for_upper = std::pow(2.0, -upper_bits_per_key);
+      double portion_lower =
+          (1.0 / desired_one_in_fp_rate_ - fp_rate_for_upper) /
+          fp_rate_for_upper;
+      min_real_bits_per_slot = upper_bits_per_key - portion_lower;
+      assert(min_real_bits_per_slot > 0.0);
+      assert(min_real_bits_per_slot <= 32.0);
+    }
+
+    // An overestimate, but this should only be O(1) slots away from truth.
+    double max_slots = len_no_metadata * 8.0 / min_real_bits_per_slot;
+
+    // Let's not bother accounting for overflow to Bloom filter
+    // (Includes NaN case)
+    if (!(max_slots < ConfigHelper::GetNumSlots(kMaxRibbonEntries))) {
+      return kMaxRibbonEntries;
+    }
+
+    // Set up for short iteration
+    uint32_t slots = static_cast<uint32_t>(max_slots);
+    slots = SolnType::RoundUpNumSlots(slots);
+
+    // Assert that we have a valid upper bound on slots
+    assert(SolnType::GetBytesForOneInFpRate(
+               SolnType::RoundUpNumSlots(slots + 1), desired_one_in_fp_rate_,
+               /*rounding*/ 0) > len_no_metadata);
+
+    // Iterate up to a few times to rather precisely account for small effects
+    for (int i = 0; slots > 0; ++i) {
+      size_t reqd_bytes =
+          SolnType::GetBytesForOneInFpRate(slots, desired_one_in_fp_rate_,
+                                           /*rounding*/ 0);
+      if (reqd_bytes <= len_no_metadata) {
+        break;  // done
+      }
+      if (i >= 2) {
+        // should have been enough iterations
+        assert(false);
+        break;
+      }
+      slots = SolnType::RoundDownNumSlots(slots - 1);
+    }
+
+    uint32_t num_entries = ConfigHelper::GetNumToAdd(slots);
+
+    // Consider possible Bloom fallback for small filters
+    if (slots < 1024) {
+      size_t bloom = bloom_fallback_.ApproximateNumEntries(bytes);
+      if (bloom > num_entries) {
+        return bloom;
+      } else {
+        return num_entries;
+      }
+    } else {
+      return std::min(num_entries, kMaxRibbonEntries);
+    }
+  }
+
+  double EstimatedFpRate(size_t num_entries,
+                         size_t len_with_metadata) override {
+    if (num_entries > kMaxRibbonEntries) {
+      // More entries than supported by this Ribbon
+      return bloom_fallback_.EstimatedFpRate(num_entries, len_with_metadata);
+    }
+    uint32_t num_slots =
+        NumEntriesToNumSlots(static_cast<uint32_t>(num_entries));
+    SolnType fake_soln(nullptr, len_with_metadata);
+    fake_soln.ConfigureForNumSlots(num_slots);
+    return fake_soln.ExpectedFpRate();
+  }
+
+  Status MaybePostVerify(const Slice& filter_content) override {
+    bool fall_back = (bloom_fallback_.EstimateEntriesAdded() > 0);
+    return fall_back ? bloom_fallback_.MaybePostVerify(filter_content)
+                     : XXPH3FilterBitsBuilder::MaybePostVerify(filter_content);
+  }
+
+ protected:
+  size_t RoundDownUsableSpace(size_t available_size) override {
+    size_t rv = available_size - kMetadataLen;
+
+    // round down to multiple of 16 (segment size)
+    rv &= ~size_t{15};
+
+    return rv + kMetadataLen;
+  }
+
+ private:
+  using TS = Standard128RibbonTypesAndSettings;
+  using SolnType = ribbon::SerializableInterleavedSolution<TS>;
+  using BandingType = ribbon::StandardBanding<TS>;
+  using ConfigHelper = ribbon::BandingConfigHelper1TS<ribbon::kOneIn20, TS>;
+
+  static uint32_t NumEntriesToNumSlots(uint32_t num_entries) {
+    uint32_t num_slots1 = ConfigHelper::GetNumSlots(num_entries);
+    return SolnType::RoundUpNumSlots(num_slots1);
+  }
+
+  // Approximate num_entries to ensure number of bytes fits in 32 bits, which
+  // is not an inherent limitation but does ensure somewhat graceful Bloom
+  // fallback for crazy high number of entries, since the Bloom implementation
+  // does not support number of bytes bigger than fits in 32 bits. This is
+  // within an order of magnitude of implementation limit on num_slots
+  // fitting in 32 bits, and even closer for num_blocks fitting in 24 bits
+  // (for filter metadata).
+  static constexpr uint32_t kMaxRibbonEntries = 950000000;  // ~ 1 billion
+
+  // A desired value for 1/fp_rate. For example, 100 -> 1% fp rate.
+  double desired_one_in_fp_rate_;
+
+  // For warnings, or can be nullptr
+  Logger* info_log_;
+
+  // For falling back on Bloom filter in some exceptional cases and
+  // very small filter cases
+  FastLocalBloomBitsBuilder bloom_fallback_;
+};
+
+// for the linker, at least with DEBUG_LEVEL=2
+constexpr uint32_t Standard128RibbonBitsBuilder::kMaxRibbonEntries;
+
+class Standard128RibbonBitsReader : public BuiltinFilterBitsReader {
+ public:
+  Standard128RibbonBitsReader(const char* data, size_t len_bytes,
+                              uint32_t num_blocks, uint32_t seed)
+      : soln_(const_cast<char*>(data), len_bytes) {
+    soln_.ConfigureForNumBlocks(num_blocks);
+    hasher_.SetOrdinalSeed(seed);
+  }
+
+  // No Copy allowed
+  Standard128RibbonBitsReader(const Standard128RibbonBitsReader&) = delete;
+  void operator=(const Standard128RibbonBitsReader&) = delete;
+
+  ~Standard128RibbonBitsReader() override {}
+
+  bool MayMatch(const Slice& key) override {
+    uint64_t h = GetSliceHash64(key);
+    return soln_.FilterQuery(h, hasher_);
+  }
+
+  virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override {
+    struct SavedData {
+      uint64_t seeded_hash;
+      uint32_t segment_num;
+      uint32_t num_columns;
+      uint32_t start_bits;
+    };
+    std::array<SavedData, MultiGetContext::MAX_BATCH_SIZE> saved;
+    for (int i = 0; i < num_keys; ++i) {
+      ribbon::InterleavedPrepareQuery(
+          GetSliceHash64(*keys[i]), hasher_, soln_, &saved[i].seeded_hash,
+          &saved[i].segment_num, &saved[i].num_columns, &saved[i].start_bits);
+    }
+    for (int i = 0; i < num_keys; ++i) {
+      may_match[i] = ribbon::InterleavedFilterQuery(
+          saved[i].seeded_hash, saved[i].segment_num, saved[i].num_columns,
+          saved[i].start_bits, hasher_, soln_);
+    }
+  }
+
+  bool HashMayMatch(const uint64_t h) override {
+    return soln_.FilterQuery(h, hasher_);
+  }
+
+ private:
+  using TS = Standard128RibbonTypesAndSettings;
+  ribbon::SerializableInterleavedSolution<TS> soln_;
+  ribbon::StandardHasher<TS> hasher_;
+};
+
+// ##################### Legacy Bloom implementation ################### //
+
+using LegacyBloomImpl = LegacyLocalityBloomImpl</*ExtraRotates*/ false>;
+
+class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder {
+ public:
+  explicit LegacyBloomBitsBuilder(const int bits_per_key, Logger* info_log);
+
+  // No Copy allowed
+  LegacyBloomBitsBuilder(const LegacyBloomBitsBuilder&) = delete;
+  void operator=(const LegacyBloomBitsBuilder&) = delete;
+
+  ~LegacyBloomBitsBuilder() override;
+
+  void AddKey(const Slice& key) override;
+
+  virtual size_t EstimateEntriesAdded() override {
+    return hash_entries_.size();
+  }
+
+  using FilterBitsBuilder::Finish;
+
+  Slice Finish(std::unique_ptr<const char[]>* buf) override;
+
+  size_t CalculateSpace(size_t num_entries) override {
+    uint32_t dont_care1;
+    uint32_t dont_care2;
+    return CalculateSpace(num_entries, &dont_care1, &dont_care2);
+  }
+
+  double EstimatedFpRate(size_t keys, size_t bytes) override {
+    return LegacyBloomImpl::EstimatedFpRate(keys, bytes - kMetadataLen,
+                                            num_probes_);
+  }
+
+  size_t ApproximateNumEntries(size_t bytes) override;
+
+ private:
+  int bits_per_key_;
+  int num_probes_;
+  std::vector<uint32_t> hash_entries_;
+  Logger* info_log_;
+
+  // Get totalbits that optimized for cpu cache line
+  uint32_t GetTotalBitsForLocality(uint32_t total_bits);
+
+  // Reserve space for new filter
+  char* ReserveSpace(size_t num_entries, uint32_t* total_bits,
+                     uint32_t* num_lines);
+
+  // Implementation-specific variant of public CalculateSpace
+  uint32_t CalculateSpace(size_t num_entries, uint32_t* total_bits,
+                          uint32_t* num_lines);
+
+  // Assuming single threaded access to this function.
+  void AddHash(uint32_t h, char* data, uint32_t num_lines, uint32_t total_bits);
+};
+
+LegacyBloomBitsBuilder::LegacyBloomBitsBuilder(const int bits_per_key,
+                                               Logger* info_log)
+    : bits_per_key_(bits_per_key),
+      num_probes_(LegacyNoLocalityBloomImpl::ChooseNumProbes(bits_per_key_)),
+      info_log_(info_log) {
+  assert(bits_per_key_);
+}
+
+LegacyBloomBitsBuilder::~LegacyBloomBitsBuilder() {}
+
+void LegacyBloomBitsBuilder::AddKey(const Slice& key) {
+  uint32_t hash = BloomHash(key);
+  if (hash_entries_.size() == 0 || hash != hash_entries_.back()) {
+    hash_entries_.push_back(hash);
+  }
+}
+
+Slice LegacyBloomBitsBuilder::Finish(std::unique_ptr<const char[]>* buf) {
+  uint32_t total_bits, num_lines;
+  size_t num_entries = hash_entries_.size();
+  char* data =
+      ReserveSpace(static_cast<int>(num_entries), &total_bits, &num_lines);
+  assert(data);
+
+  if (total_bits != 0 && num_lines != 0) {
+    for (auto h : hash_entries_) {
+      AddHash(h, data, num_lines, total_bits);
+    }
+
+    // Check for excessive entries for 32-bit hash function
+    if (num_entries >= /* minimum of 3 million */ 3000000U) {
+      // More specifically, we can detect that the 32-bit hash function
+      // is causing significant increase in FP rate by comparing current
+      // estimated FP rate to what we would get with a normal number of
+      // keys at same memory ratio.
+      double est_fp_rate = LegacyBloomImpl::EstimatedFpRate(
+          num_entries, total_bits / 8, num_probes_);
+      double vs_fp_rate = LegacyBloomImpl::EstimatedFpRate(
+          1U << 16, (1U << 16) * bits_per_key_ / 8, num_probes_);
+
+      if (est_fp_rate >= 1.50 * vs_fp_rate) {
+        // For more details, see
+        // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
+        ROCKS_LOG_WARN(
+            info_log_,
+            "Using legacy SST/BBT Bloom filter with excessive key count "
+            "(%.1fM @ %dbpk), causing estimated %.1fx higher filter FP rate. "
+            "Consider using new Bloom with format_version>=5, smaller SST "
+            "file size, or partitioned filters.",
+            num_entries / 1000000.0, bits_per_key_, est_fp_rate / vs_fp_rate);
+      }
+    }
+  }
+  // See BloomFilterPolicy::GetFilterBitsReader for metadata
+  data[total_bits / 8] = static_cast<char>(num_probes_);
+  EncodeFixed32(data + total_bits / 8 + 1, static_cast<uint32_t>(num_lines));
+
+  const char* const_data = data;
+  buf->reset(const_data);
+  hash_entries_.clear();
+
+  return Slice(data, total_bits / 8 + kMetadataLen);
+}
+
+size_t LegacyBloomBitsBuilder::ApproximateNumEntries(size_t bytes) {
+  assert(bits_per_key_);
+  assert(bytes > 0);
+
+  uint64_t total_bits_tmp = bytes * 8;
+  // total bits, including temporary computations, cannot exceed 2^32
+  // for compatibility
+  total_bits_tmp = std::min(total_bits_tmp, uint64_t{0xffff0000});
+
+  uint32_t high = static_cast<uint32_t>(total_bits_tmp) /
+                      static_cast<uint32_t>(bits_per_key_) +
+                  1;
+  uint32_t low = 1;
+  uint32_t n = high;
+  for (; n >= low; n--) {
+    if (CalculateSpace(n) <= bytes) {
+      break;
+    }
+  }
+  return n;
+}
+
+uint32_t LegacyBloomBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits) {
+  uint32_t num_lines =
+      (total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8);
+
+  // Make num_lines an odd number to make sure more bits are involved
+  // when determining which block.
+  if (num_lines % 2 == 0) {
+    num_lines++;
+  }
+  return num_lines * (CACHE_LINE_SIZE * 8);
+}
+
+uint32_t LegacyBloomBitsBuilder::CalculateSpace(size_t num_entries,
+                                                uint32_t* total_bits,
+                                                uint32_t* num_lines) {
+  assert(bits_per_key_);
+  if (num_entries != 0) {
+    size_t total_bits_tmp = num_entries * bits_per_key_;
+    // total bits, including temporary computations, cannot exceed 2^32
+    // for compatibility
+    total_bits_tmp = std::min(total_bits_tmp, size_t{0xffff0000});
+
+    *total_bits =
+        GetTotalBitsForLocality(static_cast<uint32_t>(total_bits_tmp));
+    *num_lines = *total_bits / (CACHE_LINE_SIZE * 8);
+    assert(*total_bits > 0 && *total_bits % 8 == 0);
+  } else {
+    // filter is empty, just leave space for metadata
+    *total_bits = 0;
+    *num_lines = 0;
+  }
+
+  // Reserve space for Filter
+  uint32_t sz = *total_bits / 8;
+  sz += kMetadataLen;  // 4 bytes for num_lines, 1 byte for num_probes
+  return sz;
+}
+
+char* LegacyBloomBitsBuilder::ReserveSpace(size_t num_entries,
+                                           uint32_t* total_bits,
+                                           uint32_t* num_lines) {
+  uint32_t sz = CalculateSpace(num_entries, total_bits, num_lines);
+  char* data = new char[sz];
+  memset(data, 0, sz);
+  return data;
+}
+
+inline void LegacyBloomBitsBuilder::AddHash(uint32_t h, char* data,
+                                            uint32_t num_lines,
+                                            uint32_t total_bits) {
+#ifdef NDEBUG
+  static_cast<void>(total_bits);
+#endif
+  assert(num_lines > 0 && total_bits > 0);
+
+  LegacyBloomImpl::AddHash(h, num_lines, num_probes_, data,
+                           ConstexprFloorLog2(CACHE_LINE_SIZE));
+}
+
+class LegacyBloomBitsReader : public BuiltinFilterBitsReader {
+ public:
+  LegacyBloomBitsReader(const char* data, int num_probes, uint32_t num_lines,
+                        uint32_t log2_cache_line_size)
+      : data_(data),
+        num_probes_(num_probes),
+        num_lines_(num_lines),
+        log2_cache_line_size_(log2_cache_line_size) {}
+
+  // No Copy allowed
+  LegacyBloomBitsReader(const LegacyBloomBitsReader&) = delete;
+  void operator=(const LegacyBloomBitsReader&) = delete;
+
+  ~LegacyBloomBitsReader() override {}
+
+  // "contents" contains the data built by a preceding call to
+  // FilterBitsBuilder::Finish. MayMatch must return true if the key was
+  // passed to FilterBitsBuilder::AddKey. This method may return true or false
+  // if the key was not on the list, but it should aim to return false with a
+  // high probability.
+  bool MayMatch(const Slice& key) override {
+    uint32_t hash = BloomHash(key);
+    uint32_t byte_offset;
+    LegacyBloomImpl::PrepareHashMayMatch(
+        hash, num_lines_, data_, /*out*/ &byte_offset, log2_cache_line_size_);
+    return LegacyBloomImpl::HashMayMatchPrepared(
+        hash, num_probes_, data_ + byte_offset, log2_cache_line_size_);
+  }
+
+  virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override {
+    std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
+    std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
+    for (int i = 0; i < num_keys; ++i) {
+      hashes[i] = BloomHash(*keys[i]);
+      LegacyBloomImpl::PrepareHashMayMatch(hashes[i], num_lines_, data_,
+                                           /*out*/ &byte_offsets[i],
+                                           log2_cache_line_size_);
+    }
+    for (int i = 0; i < num_keys; ++i) {
+      may_match[i] = LegacyBloomImpl::HashMayMatchPrepared(
+          hashes[i], num_probes_, data_ + byte_offsets[i],
+          log2_cache_line_size_);
+    }
+  }
+
+  bool HashMayMatch(const uint64_t /* h */) override { return false; }
+
+ private:
+  const char* data_;
+  const int num_probes_;
+  const uint32_t num_lines_;
+  const uint32_t log2_cache_line_size_;
+};
+
+class AlwaysTrueFilter : public BuiltinFilterBitsReader {
+ public:
+  bool MayMatch(const Slice&) override { return true; }
+  using FilterBitsReader::MayMatch;  // inherit overload
+  bool HashMayMatch(const uint64_t) override { return true; }
+  using BuiltinFilterBitsReader::HashMayMatch;  // inherit overload
+};
+
+class AlwaysFalseFilter : public BuiltinFilterBitsReader {
+ public:
+  bool MayMatch(const Slice&) override { return false; }
+  using FilterBitsReader::MayMatch;  // inherit overload
+  bool HashMayMatch(const uint64_t) override { return false; }
+  using BuiltinFilterBitsReader::HashMayMatch;  // inherit overload
+};
+
+Status XXPH3FilterBitsBuilder::MaybePostVerify(const Slice& filter_content) {
+  Status s = Status::OK();
+
+  if (!detect_filter_construct_corruption_) {
+    return s;
+  }
+
+  std::unique_ptr<BuiltinFilterBitsReader> bits_reader(
+      BuiltinFilterPolicy::GetBuiltinFilterBitsReader(filter_content));
+
+  for (uint64_t h : hash_entries_info_.entries) {
+    // The current approach will not detect corruption from XXPH3Filter to
+    // AlwaysTrueFilter, which can lead to performance cost later due to
+    // AlwaysTrueFilter not filtering anything. But this cost is acceptable
+    // given the extra implementation complixity to detect such case.
+    bool may_match = bits_reader->HashMayMatch(h);
+    if (!may_match) {
+      s = Status::Corruption("Corrupted filter content");
+      break;
+    }
+  }
+
+  ResetEntries();
+  return s;
+}
+}  // namespace
+
+const char* BuiltinFilterPolicy::kClassName() {
+  return "rocksdb.internal.BuiltinFilter";
+}
+
+bool BuiltinFilterPolicy::IsInstanceOf(const std::string& name) const {
+  if (name == kClassName()) {
+    return true;
+  } else {
+    return FilterPolicy::IsInstanceOf(name);
+  }
+}
+
+static const char* kBuiltinFilterMetadataName = "rocksdb.BuiltinBloomFilter";
+
+const char* BuiltinFilterPolicy::kCompatibilityName() {
+  return kBuiltinFilterMetadataName;
+}
+
+const char* BuiltinFilterPolicy::CompatibilityName() const {
+  return kBuiltinFilterMetadataName;
+}
+
+BloomLikeFilterPolicy::BloomLikeFilterPolicy(double bits_per_key)
+    : warned_(false), aggregate_rounding_balance_(0) {
+  // Sanitize bits_per_key
+  if (bits_per_key < 0.5) {
+    // Round down to no filter
+    bits_per_key = 0;
+  } else if (bits_per_key < 1.0) {
+    // Minimum 1 bit per key (equiv) when creating filter
+    bits_per_key = 1.0;
+  } else if (!(bits_per_key < 100.0)) {  // including NaN
+    bits_per_key = 100.0;
+  }
+
+  // Includes a nudge toward rounding up, to ensure on all platforms
+  // that doubles specified with three decimal digits after the decimal
+  // point are interpreted accurately.
+  millibits_per_key_ = static_cast<int>(bits_per_key * 1000.0 + 0.500001);
+
+  // For now configure Ribbon filter to match Bloom FP rate and save
+  // memory. (Ribbon bits per key will be ~30% less than Bloom bits per key
+  // for same FP rate.)
+  desired_one_in_fp_rate_ =
+      1.0 / BloomMath::CacheLocalFpRate(
+                bits_per_key,
+                FastLocalBloomImpl::ChooseNumProbes(millibits_per_key_),
+                /*cache_line_bits*/ 512);
+
+  // For better or worse, this is a rounding up of a nudged rounding up,
+  // e.g. 7.4999999999999 will round up to 8, but that provides more
+  // predictability against small arithmetic errors in floating point.
+  whole_bits_per_key_ = (millibits_per_key_ + 500) / 1000;
+}
+
+BloomLikeFilterPolicy::~BloomLikeFilterPolicy() {}
+const char* BloomLikeFilterPolicy::kClassName() {
+  return "rocksdb.internal.BloomLikeFilter";
+}
+
+bool BloomLikeFilterPolicy::IsInstanceOf(const std::string& name) const {
+  if (name == kClassName()) {
+    return true;
+  } else {
+    return BuiltinFilterPolicy::IsInstanceOf(name);
+  }
+}
+
+const char* ReadOnlyBuiltinFilterPolicy::kClassName() {
+  return kBuiltinFilterMetadataName;
+}
+
+std::string BloomLikeFilterPolicy::GetId() const {
+  return Name() + GetBitsPerKeySuffix();
+}
+
+BloomFilterPolicy::BloomFilterPolicy(double bits_per_key)
+    : BloomLikeFilterPolicy(bits_per_key) {}
+
+FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext(
+    const FilterBuildingContext& context) const {
+  if (GetMillibitsPerKey() == 0) {
+    // "No filter" special case
+    return nullptr;
+  } else if (context.table_options.format_version < 5) {
+    return GetLegacyBloomBuilderWithContext(context);
+  } else {
+    return GetFastLocalBloomBuilderWithContext(context);
+  }
+}
+
+const char* BloomFilterPolicy::kClassName() { return "bloomfilter"; }
+const char* BloomFilterPolicy::kNickName() { return "rocksdb.BloomFilter"; }
+
+std::string BloomFilterPolicy::GetId() const {
+  // Including ":false" for better forward-compatibility with 6.29 and earlier
+  // which required a boolean `use_block_based_builder` parameter
+  return BloomLikeFilterPolicy::GetId() + ":false";
+}
+
+FilterBitsBuilder* BloomLikeFilterPolicy::GetFastLocalBloomBuilderWithContext(
+    const FilterBuildingContext& context) const {
+  bool offm = context.table_options.optimize_filters_for_memory;
+  const auto options_overrides_iter =
+      context.table_options.cache_usage_options.options_overrides.find(
+          CacheEntryRole::kFilterConstruction);
+  const auto filter_construction_charged =
+      options_overrides_iter !=
+              context.table_options.cache_usage_options.options_overrides.end()
+          ? options_overrides_iter->second.charged
+          : context.table_options.cache_usage_options.options.charged;
+
+  std::shared_ptr<CacheReservationManager> cache_res_mgr;
+  if (context.table_options.block_cache &&
+      filter_construction_charged ==
+          CacheEntryRoleOptions::Decision::kEnabled) {
+    cache_res_mgr = std::make_shared<
+        CacheReservationManagerImpl<CacheEntryRole::kFilterConstruction>>(
+        context.table_options.block_cache);
+  }
+  return new FastLocalBloomBitsBuilder(
+      millibits_per_key_, offm ? &aggregate_rounding_balance_ : nullptr,
+      cache_res_mgr, context.table_options.detect_filter_construct_corruption);
+}
+
+FilterBitsBuilder* BloomLikeFilterPolicy::GetLegacyBloomBuilderWithContext(
+    const FilterBuildingContext& context) const {
+  if (whole_bits_per_key_ >= 14 && context.info_log &&
+      !warned_.load(std::memory_order_relaxed)) {
+    warned_ = true;
+    const char* adjective;
+    if (whole_bits_per_key_ >= 20) {
+      adjective = "Dramatic";
+    } else {
+      adjective = "Significant";
+    }
+    // For more details, see
+    // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
+    ROCKS_LOG_WARN(context.info_log,
+                   "Using legacy Bloom filter with high (%d) bits/key. "
+                   "%s filter space and/or accuracy improvement is available "
+                   "with format_version>=5.",
+                   whole_bits_per_key_, adjective);
+  }
+  return new LegacyBloomBitsBuilder(whole_bits_per_key_, context.info_log);
+}
+
+FilterBitsBuilder*
+BloomLikeFilterPolicy::GetStandard128RibbonBuilderWithContext(
+    const FilterBuildingContext& context) const {
+  // FIXME: code duplication with GetFastLocalBloomBuilderWithContext
+  bool offm = context.table_options.optimize_filters_for_memory;
+  const auto options_overrides_iter =
+      context.table_options.cache_usage_options.options_overrides.find(
+          CacheEntryRole::kFilterConstruction);
+  const auto filter_construction_charged =
+      options_overrides_iter !=
+              context.table_options.cache_usage_options.options_overrides.end()
+          ? options_overrides_iter->second.charged
+          : context.table_options.cache_usage_options.options.charged;
+
+  std::shared_ptr<CacheReservationManager> cache_res_mgr;
+  if (context.table_options.block_cache &&
+      filter_construction_charged ==
+          CacheEntryRoleOptions::Decision::kEnabled) {
+    cache_res_mgr = std::make_shared<
+        CacheReservationManagerImpl<CacheEntryRole::kFilterConstruction>>(
+        context.table_options.block_cache);
+  }
+  return new Standard128RibbonBitsBuilder(
+      desired_one_in_fp_rate_, millibits_per_key_,
+      offm ? &aggregate_rounding_balance_ : nullptr, cache_res_mgr,
+      context.table_options.detect_filter_construct_corruption,
+      context.info_log);
+}
+
+std::string BloomLikeFilterPolicy::GetBitsPerKeySuffix() const {
+  std::string rv = ":" + std::to_string(millibits_per_key_ / 1000);
+  int frac = millibits_per_key_ % 1000;
+  if (frac > 0) {
+    rv.push_back('.');
+    rv.push_back(static_cast<char>('0' + (frac / 100)));
+    frac %= 100;
+    if (frac > 0) {
+      rv.push_back(static_cast<char>('0' + (frac / 10)));
+      frac %= 10;
+      if (frac > 0) {
+        rv.push_back(static_cast<char>('0' + frac));
+      }
+    }
+  }
+  return rv;
+}
+
+FilterBitsBuilder* BuiltinFilterPolicy::GetBuilderFromContext(
+    const FilterBuildingContext& context) {
+  if (context.table_options.filter_policy) {
+    return context.table_options.filter_policy->GetBuilderWithContext(context);
+  } else {
+    return nullptr;
+  }
+}
+
+// For testing only, but always constructable with internal names
+namespace test {
+
+const char* LegacyBloomFilterPolicy::kClassName() {
+  return "rocksdb.internal.LegacyBloomFilter";
+}
+
+FilterBitsBuilder* LegacyBloomFilterPolicy::GetBuilderWithContext(
+    const FilterBuildingContext& context) const {
+  if (GetMillibitsPerKey() == 0) {
+    // "No filter" special case
+    return nullptr;
+  }
+  return GetLegacyBloomBuilderWithContext(context);
+}
+
+const char* FastLocalBloomFilterPolicy::kClassName() {
+  return "rocksdb.internal.FastLocalBloomFilter";
+}
+
+FilterBitsBuilder* FastLocalBloomFilterPolicy::GetBuilderWithContext(
+    const FilterBuildingContext& context) const {
+  if (GetMillibitsPerKey() == 0) {
+    // "No filter" special case
+    return nullptr;
+  }
+  return GetFastLocalBloomBuilderWithContext(context);
+}
+
+const char* Standard128RibbonFilterPolicy::kClassName() {
+  return "rocksdb.internal.Standard128RibbonFilter";
+}
+
+FilterBitsBuilder* Standard128RibbonFilterPolicy::GetBuilderWithContext(
+    const FilterBuildingContext& context) const {
+  if (GetMillibitsPerKey() == 0) {
+    // "No filter" special case
+    return nullptr;
+  }
+  return GetStandard128RibbonBuilderWithContext(context);
+}
+
+}  // namespace test
+
+BuiltinFilterBitsReader* BuiltinFilterPolicy::GetBuiltinFilterBitsReader(
+    const Slice& contents) {
+  uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
+  if (len_with_meta <= kMetadataLen) {
+    // filter is empty or broken. Treat like zero keys added.
+    return new AlwaysFalseFilter();
+  }
+
+  // Legacy Bloom filter data:
+  //             0 +-----------------------------------+
+  //               | Raw Bloom filter data             |
+  //               | ...                               |
+  //           len +-----------------------------------+
+  //               | byte for num_probes or            |
+  //               |   marker for new implementations  |
+  //         len+1 +-----------------------------------+
+  //               | four bytes for number of cache    |
+  //               |   lines                           |
+  // len_with_meta +-----------------------------------+
+
+  int8_t raw_num_probes =
+      static_cast<int8_t>(contents.data()[len_with_meta - kMetadataLen]);
+  // NB: *num_probes > 30 and < 128 probably have not been used, because of
+  // BloomFilterPolicy::initialize, unless directly calling
+  // LegacyBloomBitsBuilder as an API, but we are leaving those cases in
+  // limbo with LegacyBloomBitsReader for now.
+
+  if (raw_num_probes < 1) {
+    // Note: < 0 (or unsigned > 127) indicate special new implementations
+    // (or reserved for future use)
+    switch (raw_num_probes) {
+      case 0:
+        // Treat as zero probes (always FP)
+        return new AlwaysTrueFilter();
+      case -1:
+        // Marker for newer Bloom implementations
+        return GetBloomBitsReader(contents);
+      case -2:
+        // Marker for Ribbon implementations
+        return GetRibbonBitsReader(contents);
+      default:
+        // Reserved (treat as zero probes, always FP, for now)
+        return new AlwaysTrueFilter();
+    }
+  }
+  // else attempt decode for LegacyBloomBitsReader
+
+  int num_probes = raw_num_probes;
+  assert(num_probes >= 1);
+  assert(num_probes <= 127);
+
+  uint32_t len = len_with_meta - kMetadataLen;
+  assert(len > 0);
+
+  uint32_t num_lines = DecodeFixed32(contents.data() + len_with_meta - 4);
+  uint32_t log2_cache_line_size;
+
+  if (num_lines * CACHE_LINE_SIZE == len) {
+    // Common case
+    log2_cache_line_size = ConstexprFloorLog2(CACHE_LINE_SIZE);
+  } else if (num_lines == 0 || len % num_lines != 0) {
+    // Invalid (no solution to num_lines * x == len)
+    // Treat as zero probes (always FP) for now.
+    return new AlwaysTrueFilter();
+  } else {
+    // Determine the non-native cache line size (from another system)
+    log2_cache_line_size = 0;
+    while ((num_lines << log2_cache_line_size) < len) {
+      ++log2_cache_line_size;
+    }
+    if ((num_lines << log2_cache_line_size) != len) {
+      // Invalid (block size not a power of two)
+      // Treat as zero probes (always FP) for now.
+      return new AlwaysTrueFilter();
+    }
+  }
+  // if not early return
+  return new LegacyBloomBitsReader(contents.data(), num_probes, num_lines,
+                                   log2_cache_line_size);
+}
+
+// Read metadata to determine what kind of FilterBitsReader is needed
+// and return a new one.
+FilterBitsReader* BuiltinFilterPolicy::GetFilterBitsReader(
+    const Slice& contents) const {
+  return BuiltinFilterPolicy::GetBuiltinFilterBitsReader(contents);
+}
+
+BuiltinFilterBitsReader* BuiltinFilterPolicy::GetRibbonBitsReader(
+    const Slice& contents) {
+  uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
+  uint32_t len = len_with_meta - kMetadataLen;
+
+  assert(len > 0);  // precondition
+
+  uint32_t seed = static_cast<uint8_t>(contents.data()[len + 1]);
+  uint32_t num_blocks = static_cast<uint8_t>(contents.data()[len + 2]);
+  num_blocks |= static_cast<uint8_t>(contents.data()[len + 3]) << 8;
+  num_blocks |= static_cast<uint8_t>(contents.data()[len + 4]) << 16;
+  if (num_blocks < 2) {
+    // Not supported
+    // num_blocks == 1 is not used because num_starts == 1 is problematic
+    // for the hashing scheme. num_blocks == 0 is unused because there's
+    // already a concise encoding of an "always false" filter.
+    // Return something safe:
+    return new AlwaysTrueFilter();
+  }
+  return new Standard128RibbonBitsReader(contents.data(), len, num_blocks,
+                                         seed);
+}
+
+// For newer Bloom filter implementations
+BuiltinFilterBitsReader* BuiltinFilterPolicy::GetBloomBitsReader(
+    const Slice& contents) {
+  uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
+  uint32_t len = len_with_meta - kMetadataLen;
+
+  assert(len > 0);  // precondition
+
+  // New Bloom filter data:
+  //             0 +-----------------------------------+
+  //               | Raw Bloom filter data             |
+  //               | ...                               |
+  //           len +-----------------------------------+
+  //               | char{-1} byte -> new Bloom filter |
+  //         len+1 +-----------------------------------+
+  //               | byte for subimplementation        |
+  //               |   0: FastLocalBloom               |
+  //               |   other: reserved                 |
+  //         len+2 +-----------------------------------+
+  //               | byte for block_and_probes         |
+  //               |   0 in top 3 bits -> 6 -> 64-byte |
+  //               |   reserved:                       |
+  //               |   1 in top 3 bits -> 7 -> 128-byte|
+  //               |   2 in top 3 bits -> 8 -> 256-byte|
+  //               |   ...                             |
+  //               |   num_probes in bottom 5 bits,    |
+  //               |     except 0 and 31 reserved      |
+  //         len+3 +-----------------------------------+
+  //               | two bytes reserved                |
+  //               |   possibly for hash seed          |
+  // len_with_meta +-----------------------------------+
+
+  // Read more metadata (see above)
+  char sub_impl_val = contents.data()[len_with_meta - 4];
+  char block_and_probes = contents.data()[len_with_meta - 3];
+  int log2_block_bytes = ((block_and_probes >> 5) & 7) + 6;
+
+  int num_probes = (block_and_probes & 31);
+  if (num_probes < 1 || num_probes > 30) {
+    // Reserved / future safe
+    return new AlwaysTrueFilter();
+  }
+
+  uint16_t rest = DecodeFixed16(contents.data() + len_with_meta - 2);
+  if (rest != 0) {
+    // Reserved, possibly for hash seed
+    // Future safe
+    return new AlwaysTrueFilter();
+  }
+
+  if (sub_impl_val == 0) {        // FastLocalBloom
+    if (log2_block_bytes == 6) {  // Only block size supported for now
+      return new FastLocalBloomBitsReader(contents.data(), num_probes, len);
+    }
+  }
+  // otherwise
+  // Reserved / future safe
+  return new AlwaysTrueFilter();
+}
+
+const FilterPolicy* NewBloomFilterPolicy(double bits_per_key,
+                                         bool /*use_block_based_builder*/) {
+  // NOTE: use_block_based_builder now ignored so block-based filter is no
+  // longer accessible in public API.
+  return new BloomFilterPolicy(bits_per_key);
+}
+
+RibbonFilterPolicy::RibbonFilterPolicy(double bloom_equivalent_bits_per_key,
+                                       int bloom_before_level)
+    : BloomLikeFilterPolicy(bloom_equivalent_bits_per_key),
+      bloom_before_level_(bloom_before_level) {}
+
+FilterBitsBuilder* RibbonFilterPolicy::GetBuilderWithContext(
+    const FilterBuildingContext& context) const {
+  if (GetMillibitsPerKey() == 0) {
+    // "No filter" special case
+    return nullptr;
+  }
+  // Treat unknown same as bottommost
+  int levelish = INT_MAX;
+
+  switch (context.compaction_style) {
+    case kCompactionStyleLevel:
+    case kCompactionStyleUniversal: {
+      if (context.reason == TableFileCreationReason::kFlush) {
+        // Treat flush as level -1
+        assert(context.level_at_creation == 0);
+        levelish = -1;
+      } else if (context.level_at_creation == -1) {
+        // Unknown level
+        assert(levelish == INT_MAX);
+      } else {
+        levelish = context.level_at_creation;
+      }
+      break;
+    }
+    case kCompactionStyleFIFO:
+    case kCompactionStyleNone:
+      // Treat as bottommost
+      assert(levelish == INT_MAX);
+      break;
+  }
+  if (levelish < bloom_before_level_) {
+    return GetFastLocalBloomBuilderWithContext(context);
+  } else {
+    return GetStandard128RibbonBuilderWithContext(context);
+  }
+}
+
+const char* RibbonFilterPolicy::kClassName() { return "ribbonfilter"; }
+const char* RibbonFilterPolicy::kNickName() { return "rocksdb.RibbonFilter"; }
+
+std::string RibbonFilterPolicy::GetId() const {
+  return BloomLikeFilterPolicy::GetId() + ":" +
+         std::to_string(bloom_before_level_);
+}
+
+const FilterPolicy* NewRibbonFilterPolicy(double bloom_equivalent_bits_per_key,
+                                          int bloom_before_level) {
+  return new RibbonFilterPolicy(bloom_equivalent_bits_per_key,
+                                bloom_before_level);
+}
+
+FilterBuildingContext::FilterBuildingContext(
+    const BlockBasedTableOptions& _table_options)
+    : table_options(_table_options) {}
+
+FilterPolicy::~FilterPolicy() {}
+
+std::shared_ptr<const FilterPolicy> BloomLikeFilterPolicy::Create(
+    const std::string& name, double bits_per_key) {
+  if (name == test::LegacyBloomFilterPolicy::kClassName()) {
+    return std::make_shared<test::LegacyBloomFilterPolicy>(bits_per_key);
+  } else if (name == test::FastLocalBloomFilterPolicy::kClassName()) {
+    return std::make_shared<test::FastLocalBloomFilterPolicy>(bits_per_key);
+  } else if (name == test::Standard128RibbonFilterPolicy::kClassName()) {
+    return std::make_shared<test::Standard128RibbonFilterPolicy>(bits_per_key);
+  } else if (name == BloomFilterPolicy::kClassName()) {
+    // For testing
+    return std::make_shared<BloomFilterPolicy>(bits_per_key);
+  } else if (name == RibbonFilterPolicy::kClassName()) {
+    // For testing
+    return std::make_shared<RibbonFilterPolicy>(bits_per_key,
+                                                /*bloom_before_level*/ 0);
+  } else {
+    return nullptr;
+  }
+}
+
+namespace {
+static ObjectLibrary::PatternEntry FilterPatternEntryWithBits(
+    const char* name) {
+  return ObjectLibrary::PatternEntry(name, false).AddNumber(":", false);
+}
+
+template <typename T>
+T* NewBuiltinFilterPolicyWithBits(const std::string& uri) {
+  const std::vector<std::string> vals = StringSplit(uri, ':');
+  double bits_per_key = ParseDouble(vals[1]);
+  return new T(bits_per_key);
+}
+static int RegisterBuiltinFilterPolicies(ObjectLibrary& library,
+                                         const std::string& /*arg*/) {
+  library.AddFactory<const FilterPolicy>(
+      ReadOnlyBuiltinFilterPolicy::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<const FilterPolicy>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new ReadOnlyBuiltinFilterPolicy());
+        return guard->get();
+      });
+
+  library.AddFactory<const FilterPolicy>(
+      FilterPatternEntryWithBits(BloomFilterPolicy::kClassName())
+          .AnotherName(BloomFilterPolicy::kNickName()),
+      [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(NewBuiltinFilterPolicyWithBits<BloomFilterPolicy>(uri));
+        return guard->get();
+      });
+  library.AddFactory<const FilterPolicy>(
+      FilterPatternEntryWithBits(BloomFilterPolicy::kClassName())
+          .AnotherName(BloomFilterPolicy::kNickName())
+          .AddSuffix(":false"),
+      [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(NewBuiltinFilterPolicyWithBits<BloomFilterPolicy>(uri));
+        return guard->get();
+      });
+  library.AddFactory<const FilterPolicy>(
+      FilterPatternEntryWithBits(BloomFilterPolicy::kClassName())
+          .AnotherName(BloomFilterPolicy::kNickName())
+          .AddSuffix(":true"),
+      [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard,
+         std::string* /* errmsg */) {
+        const std::vector<std::string> vals = StringSplit(uri, ':');
+        double bits_per_key = ParseDouble(vals[1]);
+        // NOTE: This case previously configured the deprecated block-based
+        // filter, but old ways of configuring that now map to full filter. We
+        // defer to the corresponding API to ensure consistency in case that
+        // change is reverted.
+        guard->reset(NewBloomFilterPolicy(bits_per_key, true));
+        return guard->get();
+      });
+  library.AddFactory<const FilterPolicy>(
+      FilterPatternEntryWithBits(RibbonFilterPolicy::kClassName())
+          .AnotherName(RibbonFilterPolicy::kNickName()),
+      [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard,
+         std::string* /* errmsg */) {
+        const std::vector<std::string> vals = StringSplit(uri, ':');
+        double bits_per_key = ParseDouble(vals[1]);
+        guard->reset(NewRibbonFilterPolicy(bits_per_key));
+        return guard->get();
+      });
+  library.AddFactory<const FilterPolicy>(
+      FilterPatternEntryWithBits(RibbonFilterPolicy::kClassName())
+          .AnotherName(RibbonFilterPolicy::kNickName())
+          .AddNumber(":", true),
+      [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard,
+         std::string* /* errmsg */) {
+        const std::vector<std::string> vals = StringSplit(uri, ':');
+        double bits_per_key = ParseDouble(vals[1]);
+        int bloom_before_level = ParseInt(vals[2]);
+        guard->reset(NewRibbonFilterPolicy(bits_per_key, bloom_before_level));
+        return guard->get();
+      });
+  library.AddFactory<const FilterPolicy>(
+      FilterPatternEntryWithBits(test::LegacyBloomFilterPolicy::kClassName()),
+      [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(
+            NewBuiltinFilterPolicyWithBits<test::LegacyBloomFilterPolicy>(uri));
+        return guard->get();
+      });
+  library.AddFactory<const FilterPolicy>(
+      FilterPatternEntryWithBits(
+          test::FastLocalBloomFilterPolicy::kClassName()),
+      [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(
+            NewBuiltinFilterPolicyWithBits<test::FastLocalBloomFilterPolicy>(
+                uri));
+        return guard->get();
+      });
+  library.AddFactory<const FilterPolicy>(
+      FilterPatternEntryWithBits(
+          test::Standard128RibbonFilterPolicy::kClassName()),
+      [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(
+            NewBuiltinFilterPolicyWithBits<test::Standard128RibbonFilterPolicy>(
+                uri));
+        return guard->get();
+      });
+  size_t num_types;
+  return static_cast<int>(library.GetFactoryCount(&num_types));
+}
+}  // namespace
+
+Status FilterPolicy::CreateFromString(
+    const ConfigOptions& options, const std::string& value,
+    std::shared_ptr<const FilterPolicy>* policy) {
+  if (value == kNullptrString || value.empty()) {
+    policy->reset();
+    return Status::OK();
+  } else if (value == ReadOnlyBuiltinFilterPolicy::kClassName()) {
+    *policy = std::make_shared<ReadOnlyBuiltinFilterPolicy>();
+    return Status::OK();
+  }
+
+  std::string id;
+  std::unordered_map<std::string, std::string> opt_map;
+  Status status =
+      Customizable::GetOptionsMap(options, policy->get(), value, &id, &opt_map);
+  if (!status.ok()) {  // GetOptionsMap failed
+    return status;
+  } else if (id.empty()) {  // We have no Id but have options.  Not good
+    return Status::NotSupported("Cannot reset object ", id);
+  } else {
+    static std::once_flag loaded;
+    std::call_once(loaded, [&]() {
+      RegisterBuiltinFilterPolicies(*(ObjectLibrary::Default().get()), "");
+    });
+    status = options.registry->NewSharedObject(id, policy);
+  }
+  if (options.ignore_unsupported_options && status.IsNotSupported()) {
+    return Status::OK();
+  } else if (status.ok()) {
+    status = Customizable::ConfigureNewObject(
+        options, const_cast<FilterPolicy*>(policy->get()), opt_map);
+  }
+  return status;
+}
+
+const std::vector<std::string>& BloomLikeFilterPolicy::GetAllFixedImpls() {
+  STATIC_AVOID_DESTRUCTION(std::vector<std::string>, impls){
+      // Match filter_bench -impl=x ordering
+      test::LegacyBloomFilterPolicy::kClassName(),
+      test::FastLocalBloomFilterPolicy::kClassName(),
+      test::Standard128RibbonFilterPolicy::kClassName(),
+  };
+  return impls;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_policy_internal.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_policy_internal.h
new file mode 100644
index 0000000000..9bc3a24829
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/filter_policy_internal.h
@@ -0,0 +1,340 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <atomic>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/table.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A class that takes a bunch of keys, then generates filter
+class FilterBitsBuilder {
+ public:
+  virtual ~FilterBitsBuilder() {}
+
+  // Add a key (or prefix) to the filter. Typically, a builder will keep
+  // a set of 64-bit key hashes and only build the filter in Finish
+  // when the final number of keys is known. Keys are added in sorted order
+  // and duplicated keys are possible, so typically, the builder will
+  // only add this key if its hash is different from the most recently
+  // added.
+  virtual void AddKey(const Slice& key) = 0;
+
+  // Called by RocksDB before Finish to populate
+  // TableProperties::num_filter_entries, so should represent the
+  // number of unique keys (and/or prefixes) added, but does not have
+  // to be exact. `return 0;` may be used to conspicuously indicate "unknown".
+  virtual size_t EstimateEntriesAdded() = 0;
+
+  // Generate the filter using the keys that are added
+  // The return value of this function would be the filter bits,
+  // The ownership of actual data is set to buf
+  virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
+
+  // Similar to Finish(std::unique_ptr<const char[]>* buf), except that
+  // for a non-null status pointer argument, it will point to
+  // Status::Corruption() when there is any corruption during filter
+  // construction or Status::OK() otherwise.
+  //
+  // WARNING: do not use a filter resulted from a corrupted construction
+  // TODO: refactor this to have a better signature, consolidate
+  virtual Slice Finish(std::unique_ptr<const char[]>* buf,
+                       Status* /* status */) {
+    return Finish(buf);
+  }
+
+  // Verify the filter returned from calling FilterBitsBuilder::Finish.
+  // The function returns Status::Corruption() if there is any corruption in the
+  // constructed filter or Status::OK() otherwise.
+  //
+  // Implementations should normally consult
+  // FilterBuildingContext::table_options.detect_filter_construct_corruption
+  // to determine whether to perform verification or to skip by returning
+  // Status::OK(). The decision is left to the FilterBitsBuilder so that
+  // verification prerequisites before PostVerify can be skipped when not
+  // configured.
+  //
+  // RocksDB internal will always call MaybePostVerify() on the filter after
+  // it is returned from calling FilterBitsBuilder::Finish
+  // except for FilterBitsBuilder::Finish resulting a corruption
+  // status, which indicates the filter is already in a corrupted state and
+  // there is no need to post-verify
+  virtual Status MaybePostVerify(const Slice& /* filter_content */) {
+    return Status::OK();
+  }
+
+  // Approximate the number of keys that can be added and generate a filter
+  // <= the specified number of bytes. Callers (including RocksDB) should
+  // only use this result for optimizing performance and not as a guarantee.
+  virtual size_t ApproximateNumEntries(size_t bytes) = 0;
+};
+
+// A class that checks if a key can be in filter
+// It should be initialized by Slice generated by BitsBuilder
+class FilterBitsReader {
+ public:
+  virtual ~FilterBitsReader() {}
+
+  // Check if the entry match the bits in filter
+  virtual bool MayMatch(const Slice& entry) = 0;
+
+  // Check if an array of entries match the bits in filter
+  virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) {
+    for (int i = 0; i < num_keys; ++i) {
+      may_match[i] = MayMatch(*keys[i]);
+    }
+  }
+};
+
+// Exposes any extra information needed for testing built-in
+// FilterBitsBuilders
+class BuiltinFilterBitsBuilder : public FilterBitsBuilder {
+ public:
+  // Calculate number of bytes needed for a new filter, including
+  // metadata. Passing the result to ApproximateNumEntries should
+  // (ideally, usually) return >= the num_entry passed in.
+  // When optimize_filters_for_memory is enabled, this function
+  // is not authoritative but represents a target size that should
+  // be close to the average size.
+  virtual size_t CalculateSpace(size_t num_entries) = 0;
+
+  // Returns an estimate of the FP rate of the returned filter if
+  // `num_entries` keys are added and the filter returned by Finish
+  // is `bytes` bytes.
+  virtual double EstimatedFpRate(size_t num_entries, size_t bytes) = 0;
+};
+
+// Base class for RocksDB built-in filter reader with
+// extra useful functionalities for inernal.
+class BuiltinFilterBitsReader : public FilterBitsReader {
+ public:
+  // Check if the hash of the entry match the bits in filter
+  virtual bool HashMayMatch(const uint64_t /* h */) { return true; }
+};
+
+// Base class for RocksDB built-in filter policies. This provides the
+// ability to read all kinds of built-in filters (so that old filters can
+// be used even when you change between built-in policies).
+class BuiltinFilterPolicy : public FilterPolicy {
+ public:  // overrides
+  // Read metadata to determine what kind of FilterBitsReader is needed
+  // and return a new one. This must successfully process any filter data
+  // generated by a built-in FilterBitsBuilder, regardless of the impl
+  // chosen for this BloomFilterPolicy.
+  FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override;
+  static const char* kClassName();
+  bool IsInstanceOf(const std::string& id) const override;
+  // All variants of BuiltinFilterPolicy can read each others filters.
+  const char* CompatibilityName() const override;
+  static const char* kCompatibilityName();
+
+ public:  // new
+  // An internal function for the implementation of
+  // BuiltinFilterBitsReader::GetFilterBitsReader without requiring an instance
+  // or working around potential virtual overrides.
+  static BuiltinFilterBitsReader* GetBuiltinFilterBitsReader(
+      const Slice& contents);
+
+  // Returns a new FilterBitsBuilder from the filter_policy in
+  // table_options of a context, or nullptr if not applicable.
+  // (An internal convenience function to save boilerplate.)
+  static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&);
+
+ private:
+  // For Bloom filter implementation(s)
+  static BuiltinFilterBitsReader* GetBloomBitsReader(const Slice& contents);
+
+  // For Ribbon filter implementation(s)
+  static BuiltinFilterBitsReader* GetRibbonBitsReader(const Slice& contents);
+};
+
+// A "read only" filter policy used for backward compatibility with old
+// OPTIONS files, which did not specifying a Bloom configuration, just
+// "rocksdb.BuiltinBloomFilter". Although this can read existing filters,
+// this policy does not build new filters, so new SST files generated
+// under the policy will get no filters (like nullptr FilterPolicy).
+// This class is considered internal API and subject to change.
+class ReadOnlyBuiltinFilterPolicy : public BuiltinFilterPolicy {
+ public:
+  const char* Name() const override { return kClassName(); }
+  static const char* kClassName();
+
+  // Does not write filters.
+  FilterBitsBuilder* GetBuilderWithContext(
+      const FilterBuildingContext&) const override {
+    return nullptr;
+  }
+};
+
+// RocksDB built-in filter policy for Bloom or Bloom-like filters including
+// Ribbon filters.
+// This class is considered internal API and subject to change.
+// See NewBloomFilterPolicy and NewRibbonFilterPolicy.
+class BloomLikeFilterPolicy : public BuiltinFilterPolicy {
+ public:
+  explicit BloomLikeFilterPolicy(double bits_per_key);
+
+  ~BloomLikeFilterPolicy() override;
+  static const char* kClassName();
+  bool IsInstanceOf(const std::string& id) const override;
+
+  std::string GetId() const override;
+
+  // Essentially for testing only: configured millibits/key
+  int GetMillibitsPerKey() const { return millibits_per_key_; }
+  // Essentially for testing only: legacy whole bits/key
+  int GetWholeBitsPerKey() const { return whole_bits_per_key_; }
+
+  // All the different underlying implementations that a BloomLikeFilterPolicy
+  // might use, as a configuration string name for a testing mode for
+  // "always use this implementation." Only appropriate for unit tests.
+  static const std::vector<std::string>& GetAllFixedImpls();
+
+  // Convenience function for creating by name for fixed impls
+  static std::shared_ptr<const FilterPolicy> Create(const std::string& name,
+                                                    double bits_per_key);
+
+ protected:
+  // Some implementations used by aggregating policies
+  FilterBitsBuilder* GetLegacyBloomBuilderWithContext(
+      const FilterBuildingContext& context) const;
+  FilterBitsBuilder* GetFastLocalBloomBuilderWithContext(
+      const FilterBuildingContext& context) const;
+  FilterBitsBuilder* GetStandard128RibbonBuilderWithContext(
+      const FilterBuildingContext& context) const;
+
+  std::string GetBitsPerKeySuffix() const;
+
+ private:
+  // Bits per key settings are for configuring Bloom filters.
+
+  // Newer filters support fractional bits per key. For predictable behavior
+  // of 0.001-precision values across floating point implementations, we
+  // round to thousandths of a bit (on average) per key.
+  int millibits_per_key_;
+
+  // Older filters round to whole number bits per key. (There *should* be no
+  // compatibility issue with fractional bits per key, but preserving old
+  // behavior with format_version < 5 just in case.)
+  int whole_bits_per_key_;
+
+  // For configuring Ribbon filter: a desired value for 1/fp_rate. For
+  // example, 100 -> 1% fp rate.
+  double desired_one_in_fp_rate_;
+
+  // Whether relevant warnings have been logged already. (Remember so we
+  // only report once per BloomFilterPolicy instance, to keep the noise down.)
+  mutable std::atomic<bool> warned_;
+
+  // State for implementing optimize_filters_for_memory. Essentially, this
+  // tracks a surplus or deficit in total FP rate of filters generated by
+  // builders under this policy vs. what would have been generated without
+  // optimize_filters_for_memory.
+  //
+  // To avoid floating point weirdness, the actual value is
+  //  Sum over all generated filters f:
+  //   (predicted_fp_rate(f) - predicted_fp_rate(f|o_f_f_m=false)) * 2^32
+  mutable std::atomic<int64_t> aggregate_rounding_balance_;
+};
+
+// For NewBloomFilterPolicy
+//
+// This is a user-facing policy that automatically choose between
+// LegacyBloom and FastLocalBloom based on context at build time,
+// including compatibility with format_version.
+class BloomFilterPolicy : public BloomLikeFilterPolicy {
+ public:
+  explicit BloomFilterPolicy(double bits_per_key);
+
+  // To use this function, call BuiltinFilterPolicy::GetBuilderFromContext().
+  //
+  // Neither the context nor any objects therein should be saved beyond
+  // the call to this function, unless it's shared_ptr.
+  FilterBitsBuilder* GetBuilderWithContext(
+      const FilterBuildingContext&) const override;
+
+  static const char* kClassName();
+  const char* Name() const override { return kClassName(); }
+  static const char* kNickName();
+  const char* NickName() const override { return kNickName(); }
+  std::string GetId() const override;
+};
+
+// For NewRibbonFilterPolicy
+//
+// This is a user-facing policy that chooses between Standard128Ribbon
+// and FastLocalBloom based on context at build time (LSM level and other
+// factors in extreme cases).
+class RibbonFilterPolicy : public BloomLikeFilterPolicy {
+ public:
+  explicit RibbonFilterPolicy(double bloom_equivalent_bits_per_key,
+                              int bloom_before_level);
+
+  FilterBitsBuilder* GetBuilderWithContext(
+      const FilterBuildingContext&) const override;
+
+  int GetBloomBeforeLevel() const { return bloom_before_level_; }
+
+  static const char* kClassName();
+  const char* Name() const override { return kClassName(); }
+  static const char* kNickName();
+  const char* NickName() const override { return kNickName(); }
+  std::string GetId() const override;
+
+ private:
+  const int bloom_before_level_;
+};
+
+// For testing only, but always constructable with internal names
+namespace test {
+
+class LegacyBloomFilterPolicy : public BloomLikeFilterPolicy {
+ public:
+  explicit LegacyBloomFilterPolicy(double bits_per_key)
+      : BloomLikeFilterPolicy(bits_per_key) {}
+
+  FilterBitsBuilder* GetBuilderWithContext(
+      const FilterBuildingContext& context) const override;
+
+  static const char* kClassName();
+  const char* Name() const override { return kClassName(); }
+};
+
+class FastLocalBloomFilterPolicy : public BloomLikeFilterPolicy {
+ public:
+  explicit FastLocalBloomFilterPolicy(double bits_per_key)
+      : BloomLikeFilterPolicy(bits_per_key) {}
+
+  FilterBitsBuilder* GetBuilderWithContext(
+      const FilterBuildingContext& context) const override;
+
+  static const char* kClassName();
+  const char* Name() const override { return kClassName(); }
+};
+
+class Standard128RibbonFilterPolicy : public BloomLikeFilterPolicy {
+ public:
+  explicit Standard128RibbonFilterPolicy(double bloom_equiv_bits_per_key)
+      : BloomLikeFilterPolicy(bloom_equiv_bits_per_key) {}
+
+  FilterBitsBuilder* GetBuilderWithContext(
+      const FilterBuildingContext& context) const override;
+
+  static const char* kClassName();
+  const char* Name() const override { return kClassName(); }
+};
+
+}  // namespace test
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/flush_block_policy.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/flush_block_policy.cc
new file mode 100644
index 0000000000..d5cc310013
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/flush_block_policy.cc
@@ -0,0 +1,132 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/flush_block_policy.h"
+
+#include <cassert>
+#include <mutex>
+
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/customizable_util.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "table/block_based/block_builder.h"
+#include "table/block_based/flush_block_policy_impl.h"
+#include "table/format.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Flush block by size
+class FlushBlockBySizePolicy : public FlushBlockPolicy {
+ public:
+  // @params block_size:           Approximate size of user data packed per
+  //                               block.
+  // @params block_size_deviation: This is used to close a block before it
+  //                               reaches the configured
+  FlushBlockBySizePolicy(const uint64_t block_size,
+                         const uint64_t block_size_deviation, const bool align,
+                         const BlockBuilder& data_block_builder)
+      : block_size_(block_size),
+        block_size_deviation_limit_(
+            ((block_size * (100 - block_size_deviation)) + 99) / 100),
+        align_(align),
+        data_block_builder_(data_block_builder) {}
+
+  bool Update(const Slice& key, const Slice& value) override {
+    // it makes no sense to flush when the data block is empty
+    if (data_block_builder_.empty()) {
+      return false;
+    }
+
+    auto curr_size = data_block_builder_.CurrentSizeEstimate();
+
+    // Do flush if one of the below two conditions is true:
+    // 1) if the current estimated size already exceeds the block size,
+    // 2) block_size_deviation is set and the estimated size after appending
+    // the kv will exceed the block size and the current size is under the
+    // the deviation.
+    return curr_size >= block_size_ || BlockAlmostFull(key, value);
+  }
+
+ private:
+  bool BlockAlmostFull(const Slice& key, const Slice& value) const {
+    if (block_size_deviation_limit_ == 0) {
+      return false;
+    }
+
+    const auto curr_size = data_block_builder_.CurrentSizeEstimate();
+    auto estimated_size_after =
+        data_block_builder_.EstimateSizeAfterKV(key, value);
+
+    if (align_) {
+      estimated_size_after += BlockBasedTable::kBlockTrailerSize;
+      return estimated_size_after > block_size_;
+    }
+
+    return estimated_size_after > block_size_ &&
+           curr_size > block_size_deviation_limit_;
+  }
+
+  const uint64_t block_size_;
+  const uint64_t block_size_deviation_limit_;
+  const bool align_;
+  const BlockBuilder& data_block_builder_;
+};
+
+FlushBlockPolicy* FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
+    const BlockBasedTableOptions& table_options,
+    const BlockBuilder& data_block_builder) const {
+  return new FlushBlockBySizePolicy(
+      table_options.block_size, table_options.block_size_deviation,
+      table_options.block_align, data_block_builder);
+}
+
+FlushBlockPolicy* FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
+    const uint64_t size, const int deviation,
+    const BlockBuilder& data_block_builder) {
+  return new FlushBlockBySizePolicy(size, deviation, false, data_block_builder);
+}
+
+static int RegisterFlushBlockPolicyFactories(ObjectLibrary& library,
+                                             const std::string& /*arg*/) {
+  library.AddFactory<FlushBlockPolicyFactory>(
+      FlushBlockBySizePolicyFactory::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<FlushBlockPolicyFactory>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new FlushBlockBySizePolicyFactory());
+        return guard->get();
+      });
+  library.AddFactory<FlushBlockPolicyFactory>(
+      FlushBlockEveryKeyPolicyFactory::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<FlushBlockPolicyFactory>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new FlushBlockEveryKeyPolicyFactory());
+        return guard->get();
+      });
+  return 2;
+}
+
+FlushBlockBySizePolicyFactory::FlushBlockBySizePolicyFactory()
+    : FlushBlockPolicyFactory() {}
+
+Status FlushBlockPolicyFactory::CreateFromString(
+    const ConfigOptions& config_options, const std::string& value,
+    std::shared_ptr<FlushBlockPolicyFactory>* factory) {
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    RegisterFlushBlockPolicyFactories(*(ObjectLibrary::Default().get()), "");
+  });
+
+  if (value.empty()) {
+    factory->reset(new FlushBlockBySizePolicyFactory());
+    return Status::OK();
+  } else {
+    return LoadSharedObject<FlushBlockPolicyFactory>(config_options, value,
+                                                     factory);
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/flush_block_policy_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/flush_block_policy_impl.h
new file mode 100644
index 0000000000..4f79682bc2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/flush_block_policy_impl.h
@@ -0,0 +1,40 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/flush_block_policy.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// FlushBlockEveryKeyPolicy currently used only in tests.
+
+class FlushBlockEveryKeyPolicy : public FlushBlockPolicy {
+ public:
+  bool Update(const Slice& /*key*/, const Slice& /*value*/) override {
+    if (!start_) {
+      start_ = true;
+      return false;
+    }
+    return true;
+  }
+
+ private:
+  bool start_ = false;
+};
+
+class FlushBlockEveryKeyPolicyFactory : public FlushBlockPolicyFactory {
+ public:
+  explicit FlushBlockEveryKeyPolicyFactory() {}
+
+  static const char* kClassName() { return "FlushBlockEveryKeyPolicyFactory"; }
+  const char* Name() const override { return kClassName(); }
+
+  FlushBlockPolicy* NewFlushBlockPolicy(
+      const BlockBasedTableOptions& /*table_options*/,
+      const BlockBuilder& /*data_block_builder*/) const override {
+    return new FlushBlockEveryKeyPolicy;
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/full_filter_block.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/full_filter_block.cc
new file mode 100644
index 0000000000..60ff7c44f3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/full_filter_block.cc
@@ -0,0 +1,290 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "table/block_based/full_filter_block.h"
+
+#include <array>
+
+#include "block_type.h"
+#include "monitoring/perf_context_imp.h"
+#include "port/malloc.h"
+#include "port/port.h"
+#include "rocksdb/filter_policy.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+FullFilterBlockBuilder::FullFilterBlockBuilder(
+    const SliceTransform* _prefix_extractor, bool whole_key_filtering,
+    FilterBitsBuilder* filter_bits_builder)
+    : prefix_extractor_(_prefix_extractor),
+      whole_key_filtering_(whole_key_filtering),
+      last_whole_key_recorded_(false),
+      last_prefix_recorded_(false),
+      last_key_in_domain_(false),
+      any_added_(false) {
+  assert(filter_bits_builder != nullptr);
+  filter_bits_builder_.reset(filter_bits_builder);
+}
+
+size_t FullFilterBlockBuilder::EstimateEntriesAdded() {
+  return filter_bits_builder_->EstimateEntriesAdded();
+}
+
+void FullFilterBlockBuilder::Add(const Slice& key_without_ts) {
+  const bool add_prefix =
+      prefix_extractor_ && prefix_extractor_->InDomain(key_without_ts);
+
+  if (!last_prefix_recorded_ && last_key_in_domain_) {
+    // We can reach here when a new filter partition starts in partitioned
+    // filter. The last prefix in the previous partition should be added if
+    // necessary regardless of key_without_ts, to support prefix SeekForPrev.
+    AddKey(last_prefix_str_);
+    last_prefix_recorded_ = true;
+  }
+
+  if (whole_key_filtering_) {
+    if (!add_prefix) {
+      AddKey(key_without_ts);
+    } else {
+      // if both whole_key and prefix are added to bloom then we will have whole
+      // key_without_ts and prefix addition being interleaved and thus cannot
+      // rely on the bits builder to properly detect the duplicates by comparing
+      // with the last item.
+      Slice last_whole_key = Slice(last_whole_key_str_);
+      if (!last_whole_key_recorded_ ||
+          last_whole_key.compare(key_without_ts) != 0) {
+        AddKey(key_without_ts);
+        last_whole_key_recorded_ = true;
+        last_whole_key_str_.assign(key_without_ts.data(),
+                                   key_without_ts.size());
+      }
+    }
+  }
+  if (add_prefix) {
+    last_key_in_domain_ = true;
+    AddPrefix(key_without_ts);
+  } else {
+    last_key_in_domain_ = false;
+  }
+}
+
+// Add key to filter if needed
+inline void FullFilterBlockBuilder::AddKey(const Slice& key) {
+  filter_bits_builder_->AddKey(key);
+  any_added_ = true;
+}
+
+// Add prefix to filter if needed
+void FullFilterBlockBuilder::AddPrefix(const Slice& key) {
+  assert(prefix_extractor_ && prefix_extractor_->InDomain(key));
+  Slice prefix = prefix_extractor_->Transform(key);
+  if (whole_key_filtering_) {
+    // if both whole_key and prefix are added to bloom then we will have whole
+    // key and prefix addition being interleaved and thus cannot rely on the
+    // bits builder to properly detect the duplicates by comparing with the last
+    // item.
+    Slice last_prefix = Slice(last_prefix_str_);
+    if (!last_prefix_recorded_ || last_prefix.compare(prefix) != 0) {
+      AddKey(prefix);
+      last_prefix_recorded_ = true;
+      last_prefix_str_.assign(prefix.data(), prefix.size());
+    }
+  } else {
+    AddKey(prefix);
+  }
+}
+
+void FullFilterBlockBuilder::Reset() {
+  last_whole_key_recorded_ = false;
+  last_prefix_recorded_ = false;
+}
+
+Slice FullFilterBlockBuilder::Finish(
+    const BlockHandle& /*tmp*/, Status* status,
+    std::unique_ptr<const char[]>* filter_data) {
+  Reset();
+  // In this impl we ignore BlockHandle
+  *status = Status::OK();
+  if (any_added_) {
+    any_added_ = false;
+    Slice filter_content = filter_bits_builder_->Finish(
+        filter_data ? filter_data : &filter_data_, status);
+    return filter_content;
+  }
+  return Slice();
+}
+
+FullFilterBlockReader::FullFilterBlockReader(
+    const BlockBasedTable* t,
+    CachableEntry<ParsedFullFilterBlock>&& filter_block)
+    : FilterBlockReaderCommon(t, std::move(filter_block)) {}
+
+bool FullFilterBlockReader::KeyMayMatch(const Slice& key, const bool no_io,
+                                        const Slice* const /*const_ikey_ptr*/,
+                                        GetContext* get_context,
+                                        BlockCacheLookupContext* lookup_context,
+                                        const ReadOptions& read_options) {
+  if (!whole_key_filtering()) {
+    return true;
+  }
+  return MayMatch(key, no_io, get_context, lookup_context, read_options);
+}
+
+std::unique_ptr<FilterBlockReader> FullFilterBlockReader::Create(
+    const BlockBasedTable* table, const ReadOptions& ro,
+    FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
+    bool pin, BlockCacheLookupContext* lookup_context) {
+  assert(table);
+  assert(table->get_rep());
+  assert(!pin || prefetch);
+
+  CachableEntry<ParsedFullFilterBlock> filter_block;
+  if (prefetch || !use_cache) {
+    const Status s = ReadFilterBlock(table, prefetch_buffer, ro, use_cache,
+                                     nullptr /* get_context */, lookup_context,
+                                     &filter_block);
+    if (!s.ok()) {
+      IGNORE_STATUS_IF_ERROR(s);
+      return std::unique_ptr<FilterBlockReader>();
+    }
+
+    if (use_cache && !pin) {
+      filter_block.Reset();
+    }
+  }
+
+  return std::unique_ptr<FilterBlockReader>(
+      new FullFilterBlockReader(table, std::move(filter_block)));
+}
+
+bool FullFilterBlockReader::PrefixMayMatch(
+    const Slice& prefix, const bool no_io,
+    const Slice* const /*const_ikey_ptr*/, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context, const ReadOptions& read_options) {
+  return MayMatch(prefix, no_io, get_context, lookup_context, read_options);
+}
+
+bool FullFilterBlockReader::MayMatch(const Slice& entry, bool no_io,
+                                     GetContext* get_context,
+                                     BlockCacheLookupContext* lookup_context,
+                                     const ReadOptions& read_options) const {
+  CachableEntry<ParsedFullFilterBlock> filter_block;
+
+  const Status s = GetOrReadFilterBlock(no_io, get_context, lookup_context,
+                                        &filter_block, read_options);
+  if (!s.ok()) {
+    IGNORE_STATUS_IF_ERROR(s);
+    return true;
+  }
+
+  assert(filter_block.GetValue());
+
+  FilterBitsReader* const filter_bits_reader =
+      filter_block.GetValue()->filter_bits_reader();
+
+  if (filter_bits_reader) {
+    if (filter_bits_reader->MayMatch(entry)) {
+      PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
+      return true;
+    } else {
+      PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
+      return false;
+    }
+  }
+  return true;
+}
+
+void FullFilterBlockReader::KeysMayMatch(
+    MultiGetRange* range, const bool no_io,
+    BlockCacheLookupContext* lookup_context, const ReadOptions& read_options) {
+  if (!whole_key_filtering()) {
+    // Simply return. Don't skip any key - consider all keys as likely to be
+    // present
+    return;
+  }
+  MayMatch(range, no_io, nullptr, lookup_context, read_options);
+}
+
+void FullFilterBlockReader::PrefixesMayMatch(
+    MultiGetRange* range, const SliceTransform* prefix_extractor,
+    const bool no_io, BlockCacheLookupContext* lookup_context,
+    const ReadOptions& read_options) {
+  MayMatch(range, no_io, prefix_extractor, lookup_context, read_options);
+}
+
+void FullFilterBlockReader::MayMatch(MultiGetRange* range, bool no_io,
+                                     const SliceTransform* prefix_extractor,
+                                     BlockCacheLookupContext* lookup_context,
+                                     const ReadOptions& read_options) const {
+  CachableEntry<ParsedFullFilterBlock> filter_block;
+
+  const Status s =
+      GetOrReadFilterBlock(no_io, range->begin()->get_context, lookup_context,
+                           &filter_block, read_options);
+  if (!s.ok()) {
+    IGNORE_STATUS_IF_ERROR(s);
+    return;
+  }
+
+  assert(filter_block.GetValue());
+
+  FilterBitsReader* const filter_bits_reader =
+      filter_block.GetValue()->filter_bits_reader();
+
+  if (!filter_bits_reader) {
+    return;
+  }
+
+  // We need to use an array instead of autovector for may_match since
+  // &may_match[0] doesn't work for autovector<bool> (compiler error). So
+  // declare both keys and may_match as arrays, which is also slightly less
+  // expensive compared to autovector
+  std::array<Slice*, MultiGetContext::MAX_BATCH_SIZE> keys;
+  std::array<bool, MultiGetContext::MAX_BATCH_SIZE> may_match = {{true}};
+  autovector<Slice, MultiGetContext::MAX_BATCH_SIZE> prefixes;
+  int num_keys = 0;
+  MultiGetRange filter_range(*range, range->begin(), range->end());
+  for (auto iter = filter_range.begin(); iter != filter_range.end(); ++iter) {
+    if (!prefix_extractor) {
+      keys[num_keys++] = &iter->ukey_without_ts;
+    } else if (prefix_extractor->InDomain(iter->ukey_without_ts)) {
+      prefixes.emplace_back(prefix_extractor->Transform(iter->ukey_without_ts));
+      keys[num_keys++] = &prefixes.back();
+    } else {
+      filter_range.SkipKey(iter);
+    }
+  }
+
+  filter_bits_reader->MayMatch(num_keys, &keys[0], &may_match[0]);
+
+  int i = 0;
+  for (auto iter = filter_range.begin(); iter != filter_range.end(); ++iter) {
+    if (!may_match[i]) {
+      // Update original MultiGet range to skip this key. The filter_range
+      // was temporarily used just to skip keys not in prefix_extractor domain
+      range->SkipKey(iter);
+      PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
+    } else {
+      // PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
+      PerfContext* perf_ctx = get_perf_context();
+      perf_ctx->bloom_sst_hit_count++;
+    }
+    ++i;
+  }
+}
+
+size_t FullFilterBlockReader::ApproximateMemoryUsage() const {
+  size_t usage = ApproximateFilterBlockMemoryUsage();
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+  usage += malloc_usable_size(const_cast<FullFilterBlockReader*>(this));
+#else
+  usage += sizeof(*this);
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+  return usage;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/full_filter_block.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/full_filter_block.h
new file mode 100644
index 0000000000..7b0890d10c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/full_filter_block.h
@@ -0,0 +1,147 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/slice_transform.h"
+#include "table/block_based/filter_block_reader_common.h"
+#include "table/block_based/filter_policy_internal.h"
+#include "table/block_based/parsed_full_filter_block.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class FilterPolicy;
+class FilterBitsBuilder;
+class FilterBitsReader;
+
+// A FullFilterBlockBuilder is used to construct a full filter for a
+// particular Table.  It generates a single string which is stored as
+// a special block in the Table.
+// The format of full filter block is:
+// +----------------------------------------------------------------+
+// |              full filter for all keys in sst file              |
+// +----------------------------------------------------------------+
+// The full filter can be very large. At the end of it, we put
+// num_probes: how many hash functions are used in bloom filter
+//
+class FullFilterBlockBuilder : public FilterBlockBuilder {
+ public:
+  explicit FullFilterBlockBuilder(const SliceTransform* prefix_extractor,
+                                  bool whole_key_filtering,
+                                  FilterBitsBuilder* filter_bits_builder);
+  // No copying allowed
+  FullFilterBlockBuilder(const FullFilterBlockBuilder&) = delete;
+  void operator=(const FullFilterBlockBuilder&) = delete;
+
+  // bits_builder is created in filter_policy, it should be passed in here
+  // directly. and be deleted here
+  ~FullFilterBlockBuilder() {}
+
+  virtual void Add(const Slice& key_without_ts) override;
+  virtual bool IsEmpty() const override { return !any_added_; }
+  virtual size_t EstimateEntriesAdded() override;
+  virtual Slice Finish(
+      const BlockHandle& tmp, Status* status,
+      std::unique_ptr<const char[]>* filter_data = nullptr) override;
+  using FilterBlockBuilder::Finish;
+
+  virtual void ResetFilterBitsBuilder() override {
+    filter_bits_builder_.reset();
+  }
+
+  virtual Status MaybePostVerifyFilter(const Slice& filter_content) override {
+    return filter_bits_builder_->MaybePostVerify(filter_content);
+  }
+
+ protected:
+  virtual void AddKey(const Slice& key);
+  std::unique_ptr<FilterBitsBuilder> filter_bits_builder_;
+  virtual void Reset();
+  void AddPrefix(const Slice& key);
+  const SliceTransform* prefix_extractor() { return prefix_extractor_; }
+  const std::string& last_prefix_str() const { return last_prefix_str_; }
+
+ private:
+  // important: all of these might point to invalid addresses
+  // at the time of destruction of this filter block. destructor
+  // should NOT dereference them.
+  const SliceTransform* prefix_extractor_;
+  bool whole_key_filtering_;
+  bool last_whole_key_recorded_;
+  std::string last_whole_key_str_;
+  bool last_prefix_recorded_;
+  std::string last_prefix_str_;
+  // Whether prefix_extractor_->InDomain(last_whole_key_) is true.
+  // Used in partitioned filters so that the last prefix from the previous
+  // filter partition will be added to the current partition if
+  // last_key_in_domain_ is true, regardless of the current key.
+  bool last_key_in_domain_;
+  bool any_added_;
+  std::unique_ptr<const char[]> filter_data_;
+};
+
+// A FilterBlockReader is used to parse filter from SST table.
+// KeyMayMatch and PrefixMayMatch would trigger filter checking
+class FullFilterBlockReader
+    : public FilterBlockReaderCommon<ParsedFullFilterBlock> {
+ public:
+  FullFilterBlockReader(const BlockBasedTable* t,
+                        CachableEntry<ParsedFullFilterBlock>&& filter_block);
+
+  static std::unique_ptr<FilterBlockReader> Create(
+      const BlockBasedTable* table, const ReadOptions& ro,
+      FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
+      bool pin, BlockCacheLookupContext* lookup_context);
+
+  bool KeyMayMatch(const Slice& key, const bool no_io,
+                   const Slice* const const_ikey_ptr, GetContext* get_context,
+                   BlockCacheLookupContext* lookup_context,
+                   const ReadOptions& read_options) override;
+
+  bool PrefixMayMatch(const Slice& prefix, const bool no_io,
+                      const Slice* const const_ikey_ptr,
+                      GetContext* get_context,
+                      BlockCacheLookupContext* lookup_context,
+                      const ReadOptions& read_options) override;
+
+  void KeysMayMatch(MultiGetRange* range, const bool no_io,
+                    BlockCacheLookupContext* lookup_context,
+                    const ReadOptions& read_options) override;
+  // Used in partitioned filter code
+  void KeysMayMatch2(MultiGetRange* range,
+                     const SliceTransform* /*prefix_extractor*/,
+                     const bool no_io, BlockCacheLookupContext* lookup_context,
+                     const ReadOptions& read_options) {
+    KeysMayMatch(range, no_io, lookup_context, read_options);
+  }
+
+  void PrefixesMayMatch(MultiGetRange* range,
+                        const SliceTransform* prefix_extractor,
+                        const bool no_io,
+                        BlockCacheLookupContext* lookup_context,
+                        const ReadOptions& read_options) override;
+  size_t ApproximateMemoryUsage() const override;
+
+ private:
+  bool MayMatch(const Slice& entry, bool no_io, GetContext* get_context,
+                BlockCacheLookupContext* lookup_context,
+                const ReadOptions& read_options) const;
+  void MayMatch(MultiGetRange* range, bool no_io,
+                const SliceTransform* prefix_extractor,
+                BlockCacheLookupContext* lookup_context,
+                const ReadOptions& read_options) const;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/hash_index_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/hash_index_reader.cc
new file mode 100644
index 0000000000..e158d8039d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/hash_index_reader.cc
@@ -0,0 +1,147 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "table/block_based/hash_index_reader.h"
+
+#include "table/block_fetcher.h"
+#include "table/meta_blocks.h"
+
+namespace ROCKSDB_NAMESPACE {
+Status HashIndexReader::Create(const BlockBasedTable* table,
+                               const ReadOptions& ro,
+                               FilePrefetchBuffer* prefetch_buffer,
+                               InternalIterator* meta_index_iter,
+                               bool use_cache, bool prefetch, bool pin,
+                               BlockCacheLookupContext* lookup_context,
+                               std::unique_ptr<IndexReader>* index_reader) {
+  assert(table != nullptr);
+  assert(index_reader != nullptr);
+  assert(!pin || prefetch);
+
+  const BlockBasedTable::Rep* rep = table->get_rep();
+  assert(rep != nullptr);
+
+  CachableEntry<Block> index_block;
+  if (prefetch || !use_cache) {
+    const Status s =
+        ReadIndexBlock(table, prefetch_buffer, ro, use_cache,
+                       /*get_context=*/nullptr, lookup_context, &index_block);
+    if (!s.ok()) {
+      return s;
+    }
+
+    if (use_cache && !pin) {
+      index_block.Reset();
+    }
+  }
+
+  // Note, failure to create prefix hash index does not need to be a
+  // hard error. We can still fall back to the original binary search index.
+  // So, Create will succeed regardless, from this point on.
+
+  index_reader->reset(new HashIndexReader(table, std::move(index_block)));
+
+  // Get prefixes block
+  BlockHandle prefixes_handle;
+  Status s =
+      FindMetaBlock(meta_index_iter, kHashIndexPrefixesBlock, &prefixes_handle);
+  if (!s.ok()) {
+    // TODO: log error
+    return Status::OK();
+  }
+
+  // Get index metadata block
+  BlockHandle prefixes_meta_handle;
+  s = FindMetaBlock(meta_index_iter, kHashIndexPrefixesMetadataBlock,
+                    &prefixes_meta_handle);
+  if (!s.ok()) {
+    // TODO: log error
+    return Status::OK();
+  }
+
+  RandomAccessFileReader* const file = rep->file.get();
+  const Footer& footer = rep->footer;
+  const ImmutableOptions& ioptions = rep->ioptions;
+  const PersistentCacheOptions& cache_options = rep->persistent_cache_options;
+  MemoryAllocator* const memory_allocator =
+      GetMemoryAllocator(rep->table_options);
+
+  // Read contents for the blocks
+  BlockContents prefixes_contents;
+  BlockFetcher prefixes_block_fetcher(
+      file, prefetch_buffer, footer, ro, prefixes_handle, &prefixes_contents,
+      ioptions, true /*decompress*/, true /*maybe_compressed*/,
+      BlockType::kHashIndexPrefixes, UncompressionDict::GetEmptyDict(),
+      cache_options, memory_allocator);
+  s = prefixes_block_fetcher.ReadBlockContents();
+  if (!s.ok()) {
+    return s;
+  }
+  BlockContents prefixes_meta_contents;
+  BlockFetcher prefixes_meta_block_fetcher(
+      file, prefetch_buffer, footer, ro, prefixes_meta_handle,
+      &prefixes_meta_contents, ioptions, true /*decompress*/,
+      true /*maybe_compressed*/, BlockType::kHashIndexMetadata,
+      UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
+  s = prefixes_meta_block_fetcher.ReadBlockContents();
+  if (!s.ok()) {
+    // TODO: log error
+    return Status::OK();
+  }
+
+  BlockPrefixIndex* prefix_index = nullptr;
+  assert(rep->table_prefix_extractor);
+  s = BlockPrefixIndex::Create(rep->table_prefix_extractor.get(),
+                               prefixes_contents.data,
+                               prefixes_meta_contents.data, &prefix_index);
+  // TODO: log error
+  if (s.ok()) {
+    HashIndexReader* const hash_index_reader =
+        static_cast<HashIndexReader*>(index_reader->get());
+    hash_index_reader->prefix_index_.reset(prefix_index);
+  }
+
+  return Status::OK();
+}
+
+InternalIteratorBase<IndexValue>* HashIndexReader::NewIterator(
+    const ReadOptions& read_options, bool disable_prefix_seek,
+    IndexBlockIter* iter, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context) {
+  const BlockBasedTable::Rep* rep = table()->get_rep();
+  const bool no_io = (read_options.read_tier == kBlockCacheTier);
+  CachableEntry<Block> index_block;
+  const Status s = GetOrReadIndexBlock(no_io, get_context, lookup_context,
+                                       &index_block, read_options);
+  if (!s.ok()) {
+    if (iter != nullptr) {
+      iter->Invalidate(s);
+      return iter;
+    }
+
+    return NewErrorInternalIterator<IndexValue>(s);
+  }
+
+  Statistics* kNullStats = nullptr;
+  const bool total_order_seek =
+      read_options.total_order_seek || disable_prefix_seek;
+  // We don't return pinned data from index blocks, so no need
+  // to set `block_contents_pinned`.
+  auto it = index_block.GetValue()->NewIndexIterator(
+      internal_comparator()->user_comparator(),
+      rep->get_global_seqno(BlockType::kIndex), iter, kNullStats,
+      total_order_seek, index_has_first_key(), index_key_includes_seq(),
+      index_value_is_full(), false /* block_contents_pinned */,
+      prefix_index_.get());
+
+  assert(it != nullptr);
+  index_block.TransferTo(it);
+
+  return it;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/hash_index_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/hash_index_reader.h
new file mode 100644
index 0000000000..9037efc877
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/hash_index_reader.h
@@ -0,0 +1,49 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include "table/block_based/index_reader_common.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Index that leverages an internal hash table to quicken the lookup for a given
+// key.
+class HashIndexReader : public BlockBasedTable::IndexReaderCommon {
+ public:
+  static Status Create(const BlockBasedTable* table, const ReadOptions& ro,
+                       FilePrefetchBuffer* prefetch_buffer,
+                       InternalIterator* meta_index_iter, bool use_cache,
+                       bool prefetch, bool pin,
+                       BlockCacheLookupContext* lookup_context,
+                       std::unique_ptr<IndexReader>* index_reader);
+
+  InternalIteratorBase<IndexValue>* NewIterator(
+      const ReadOptions& read_options, bool disable_prefix_seek,
+      IndexBlockIter* iter, GetContext* get_context,
+      BlockCacheLookupContext* lookup_context) override;
+
+  size_t ApproximateMemoryUsage() const override {
+    size_t usage = ApproximateIndexBlockMemoryUsage();
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+    usage += malloc_usable_size(const_cast<HashIndexReader*>(this));
+#else
+    if (prefix_index_) {
+      usage += prefix_index_->ApproximateMemoryUsage();
+    }
+    usage += sizeof(*this);
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+    return usage;
+  }
+
+ private:
+  HashIndexReader(const BlockBasedTable* t, CachableEntry<Block>&& index_block)
+      : IndexReaderCommon(t, std::move(index_block)) {}
+
+  std::unique_ptr<BlockPrefixIndex> prefix_index_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_builder.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_builder.cc
new file mode 100644
index 0000000000..0247301782
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_builder.cc
@@ -0,0 +1,282 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/block_based/index_builder.h"
+
+#include <assert.h>
+
+#include <cinttypes>
+#include <list>
+#include <string>
+
+#include "db/dbformat.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/flush_block_policy.h"
+#include "table/block_based/partitioned_filter_block.h"
+#include "table/format.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Create a index builder based on its type.
+IndexBuilder* IndexBuilder::CreateIndexBuilder(
+    BlockBasedTableOptions::IndexType index_type,
+    const InternalKeyComparator* comparator,
+    const InternalKeySliceTransform* int_key_slice_transform,
+    const bool use_value_delta_encoding,
+    const BlockBasedTableOptions& table_opt) {
+  IndexBuilder* result = nullptr;
+  switch (index_type) {
+    case BlockBasedTableOptions::kBinarySearch: {
+      result = new ShortenedIndexBuilder(
+          comparator, table_opt.index_block_restart_interval,
+          table_opt.format_version, use_value_delta_encoding,
+          table_opt.index_shortening, /* include_first_key */ false);
+      break;
+    }
+    case BlockBasedTableOptions::kHashSearch: {
+      // Currently kHashSearch is incompatible with index_block_restart_interval
+      // > 1
+      assert(table_opt.index_block_restart_interval == 1);
+      result = new HashIndexBuilder(
+          comparator, int_key_slice_transform,
+          table_opt.index_block_restart_interval, table_opt.format_version,
+          use_value_delta_encoding, table_opt.index_shortening);
+      break;
+    }
+    case BlockBasedTableOptions::kTwoLevelIndexSearch: {
+      result = PartitionedIndexBuilder::CreateIndexBuilder(
+          comparator, use_value_delta_encoding, table_opt);
+      break;
+    }
+    case BlockBasedTableOptions::kBinarySearchWithFirstKey: {
+      result = new ShortenedIndexBuilder(
+          comparator, table_opt.index_block_restart_interval,
+          table_opt.format_version, use_value_delta_encoding,
+          table_opt.index_shortening, /* include_first_key */ true);
+      break;
+    }
+    default: {
+      assert(!"Do not recognize the index type ");
+      break;
+    }
+  }
+  return result;
+}
+
+void ShortenedIndexBuilder::FindShortestInternalKeySeparator(
+    const Comparator& comparator, std::string* start, const Slice& limit) {
+  // Attempt to shorten the user portion of the key
+  Slice user_start = ExtractUserKey(*start);
+  Slice user_limit = ExtractUserKey(limit);
+  std::string tmp(user_start.data(), user_start.size());
+  comparator.FindShortestSeparator(&tmp, user_limit);
+  if (tmp.size() <= user_start.size() &&
+      comparator.Compare(user_start, tmp) < 0) {
+    // User key has become shorter physically, but larger logically.
+    // Tack on the earliest possible number to the shortened user key.
+    PutFixed64(&tmp,
+               PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
+    assert(InternalKeyComparator(&comparator).Compare(*start, tmp) < 0);
+    assert(InternalKeyComparator(&comparator).Compare(tmp, limit) < 0);
+    start->swap(tmp);
+  }
+}
+
+void ShortenedIndexBuilder::FindShortInternalKeySuccessor(
+    const Comparator& comparator, std::string* key) {
+  Slice user_key = ExtractUserKey(*key);
+  std::string tmp(user_key.data(), user_key.size());
+  comparator.FindShortSuccessor(&tmp);
+  if (tmp.size() <= user_key.size() && comparator.Compare(user_key, tmp) < 0) {
+    // User key has become shorter physically, but larger logically.
+    // Tack on the earliest possible number to the shortened user key.
+    PutFixed64(&tmp,
+               PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
+    assert(InternalKeyComparator(&comparator).Compare(*key, tmp) < 0);
+    key->swap(tmp);
+  }
+}
+
+PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder(
+    const InternalKeyComparator* comparator,
+    const bool use_value_delta_encoding,
+    const BlockBasedTableOptions& table_opt) {
+  return new PartitionedIndexBuilder(comparator, table_opt,
+                                     use_value_delta_encoding);
+}
+
+PartitionedIndexBuilder::PartitionedIndexBuilder(
+    const InternalKeyComparator* comparator,
+    const BlockBasedTableOptions& table_opt,
+    const bool use_value_delta_encoding)
+    : IndexBuilder(comparator),
+      index_block_builder_(table_opt.index_block_restart_interval,
+                           true /*use_delta_encoding*/,
+                           use_value_delta_encoding),
+      index_block_builder_without_seq_(table_opt.index_block_restart_interval,
+                                       true /*use_delta_encoding*/,
+                                       use_value_delta_encoding),
+      sub_index_builder_(nullptr),
+      table_opt_(table_opt),
+      // We start by false. After each partition we revise the value based on
+      // what the sub_index_builder has decided. If the feature is disabled
+      // entirely, this will be set to true after switching the first
+      // sub_index_builder. Otherwise, it could be set to true even one of the
+      // sub_index_builders could not safely exclude seq from the keys, then it
+      // wil be enforced on all sub_index_builders on ::Finish.
+      seperator_is_key_plus_seq_(false),
+      use_value_delta_encoding_(use_value_delta_encoding) {}
+
+PartitionedIndexBuilder::~PartitionedIndexBuilder() {
+  delete sub_index_builder_;
+}
+
+void PartitionedIndexBuilder::MakeNewSubIndexBuilder() {
+  assert(sub_index_builder_ == nullptr);
+  sub_index_builder_ = new ShortenedIndexBuilder(
+      comparator_, table_opt_.index_block_restart_interval,
+      table_opt_.format_version, use_value_delta_encoding_,
+      table_opt_.index_shortening, /* include_first_key */ false);
+
+  // Set sub_index_builder_->seperator_is_key_plus_seq_ to true if
+  // seperator_is_key_plus_seq_ is true (internal-key mode) (set to false by
+  // default on Creation) so that flush policy can point to
+  // sub_index_builder_->index_block_builder_
+  if (seperator_is_key_plus_seq_) {
+    sub_index_builder_->seperator_is_key_plus_seq_ = true;
+  }
+
+  flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
+      table_opt_.metadata_block_size, table_opt_.block_size_deviation,
+      // Note: this is sub-optimal since sub_index_builder_ could later reset
+      // seperator_is_key_plus_seq_ but the probability of that is low.
+      sub_index_builder_->seperator_is_key_plus_seq_
+          ? sub_index_builder_->index_block_builder_
+          : sub_index_builder_->index_block_builder_without_seq_));
+  partition_cut_requested_ = false;
+}
+
+void PartitionedIndexBuilder::RequestPartitionCut() {
+  partition_cut_requested_ = true;
+}
+
+void PartitionedIndexBuilder::AddIndexEntry(
+    std::string* last_key_in_current_block,
+    const Slice* first_key_in_next_block, const BlockHandle& block_handle) {
+  // Note: to avoid two consecuitive flush in the same method call, we do not
+  // check flush policy when adding the last key
+  if (UNLIKELY(first_key_in_next_block == nullptr)) {  // no more keys
+    if (sub_index_builder_ == nullptr) {
+      MakeNewSubIndexBuilder();
+    }
+    sub_index_builder_->AddIndexEntry(last_key_in_current_block,
+                                      first_key_in_next_block, block_handle);
+    if (!seperator_is_key_plus_seq_ &&
+        sub_index_builder_->seperator_is_key_plus_seq_) {
+      // then we need to apply it to all sub-index builders and reset
+      // flush_policy to point to Block Builder of sub_index_builder_ that store
+      // internal keys.
+      seperator_is_key_plus_seq_ = true;
+      flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
+          table_opt_.metadata_block_size, table_opt_.block_size_deviation,
+          sub_index_builder_->index_block_builder_));
+    }
+    sub_index_last_key_ = std::string(*last_key_in_current_block);
+    entries_.push_back(
+        {sub_index_last_key_,
+         std::unique_ptr<ShortenedIndexBuilder>(sub_index_builder_)});
+    sub_index_builder_ = nullptr;
+    cut_filter_block = true;
+  } else {
+    // apply flush policy only to non-empty sub_index_builder_
+    if (sub_index_builder_ != nullptr) {
+      std::string handle_encoding;
+      block_handle.EncodeTo(&handle_encoding);
+      bool do_flush =
+          partition_cut_requested_ ||
+          flush_policy_->Update(*last_key_in_current_block, handle_encoding);
+      if (do_flush) {
+        entries_.push_back(
+            {sub_index_last_key_,
+             std::unique_ptr<ShortenedIndexBuilder>(sub_index_builder_)});
+        cut_filter_block = true;
+        sub_index_builder_ = nullptr;
+      }
+    }
+    if (sub_index_builder_ == nullptr) {
+      MakeNewSubIndexBuilder();
+    }
+    sub_index_builder_->AddIndexEntry(last_key_in_current_block,
+                                      first_key_in_next_block, block_handle);
+    sub_index_last_key_ = std::string(*last_key_in_current_block);
+    if (!seperator_is_key_plus_seq_ &&
+        sub_index_builder_->seperator_is_key_plus_seq_) {
+      // then we need to apply it to all sub-index builders and reset
+      // flush_policy to point to Block Builder of sub_index_builder_ that store
+      // internal keys.
+      seperator_is_key_plus_seq_ = true;
+      flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
+          table_opt_.metadata_block_size, table_opt_.block_size_deviation,
+          sub_index_builder_->index_block_builder_));
+    }
+  }
+}
+
+Status PartitionedIndexBuilder::Finish(
+    IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) {
+  if (partition_cnt_ == 0) {
+    partition_cnt_ = entries_.size();
+  }
+  // It must be set to null after last key is added
+  assert(sub_index_builder_ == nullptr);
+  if (finishing_indexes == true) {
+    Entry& last_entry = entries_.front();
+    std::string handle_encoding;
+    last_partition_block_handle.EncodeTo(&handle_encoding);
+    std::string handle_delta_encoding;
+    PutVarsignedint64(
+        &handle_delta_encoding,
+        last_partition_block_handle.size() - last_encoded_handle_.size());
+    last_encoded_handle_ = last_partition_block_handle;
+    const Slice handle_delta_encoding_slice(handle_delta_encoding);
+    index_block_builder_.Add(last_entry.key, handle_encoding,
+                             &handle_delta_encoding_slice);
+    if (!seperator_is_key_plus_seq_) {
+      index_block_builder_without_seq_.Add(ExtractUserKey(last_entry.key),
+                                           handle_encoding,
+                                           &handle_delta_encoding_slice);
+    }
+    entries_.pop_front();
+  }
+  // If there is no sub_index left, then return the 2nd level index.
+  if (UNLIKELY(entries_.empty())) {
+    if (seperator_is_key_plus_seq_) {
+      index_blocks->index_block_contents = index_block_builder_.Finish();
+    } else {
+      index_blocks->index_block_contents =
+          index_block_builder_without_seq_.Finish();
+    }
+    top_level_index_size_ = index_blocks->index_block_contents.size();
+    index_size_ += top_level_index_size_;
+    return Status::OK();
+  } else {
+    // Finish the next partition index in line and Incomplete() to indicate we
+    // expect more calls to Finish
+    Entry& entry = entries_.front();
+    // Apply the policy to all sub-indexes
+    entry.value->seperator_is_key_plus_seq_ = seperator_is_key_plus_seq_;
+    auto s = entry.value->Finish(index_blocks);
+    index_size_ += index_blocks->index_block_contents.size();
+    finishing_indexes = true;
+    return s.ok() ? Status::Incomplete() : s;
+  }
+}
+
+size_t PartitionedIndexBuilder::NumPartitions() const { return partition_cnt_; }
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_builder.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_builder.h
new file mode 100644
index 0000000000..dd3be03316
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_builder.h
@@ -0,0 +1,455 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <assert.h>
+
+#include <cinttypes>
+#include <list>
+#include <string>
+#include <unordered_map>
+
+#include "rocksdb/comparator.h"
+#include "table/block_based/block_based_table_factory.h"
+#include "table/block_based/block_builder.h"
+#include "table/format.h"
+
+namespace ROCKSDB_NAMESPACE {
+// The interface for building index.
+// Instruction for adding a new concrete IndexBuilder:
+//  1. Create a subclass instantiated from IndexBuilder.
+//  2. Add a new entry associated with that subclass in TableOptions::IndexType.
+//  3. Add a create function for the new subclass in CreateIndexBuilder.
+// Note: we can devise more advanced design to simplify the process for adding
+// new subclass, which will, on the other hand, increase the code complexity and
+// catch unwanted attention from readers. Given that we won't add/change
+// indexes frequently, it makes sense to just embrace a more straightforward
+// design that just works.
+class IndexBuilder {
+ public:
+  static IndexBuilder* CreateIndexBuilder(
+      BlockBasedTableOptions::IndexType index_type,
+      const ROCKSDB_NAMESPACE::InternalKeyComparator* comparator,
+      const InternalKeySliceTransform* int_key_slice_transform,
+      const bool use_value_delta_encoding,
+      const BlockBasedTableOptions& table_opt);
+
+  // Index builder will construct a set of blocks which contain:
+  //  1. One primary index block.
+  //  2. (Optional) a set of metablocks that contains the metadata of the
+  //     primary index.
+  struct IndexBlocks {
+    Slice index_block_contents;
+    std::unordered_map<std::string, Slice> meta_blocks;
+  };
+  explicit IndexBuilder(const InternalKeyComparator* comparator)
+      : comparator_(comparator) {}
+
+  virtual ~IndexBuilder() {}
+
+  // Add a new index entry to index block.
+  // To allow further optimization, we provide `last_key_in_current_block` and
+  // `first_key_in_next_block`, based on which the specific implementation can
+  // determine the best index key to be used for the index block.
+  // Called before the OnKeyAdded() call for first_key_in_next_block.
+  // @last_key_in_current_block: this parameter maybe overridden with the value
+  //                             "substitute key".
+  // @first_key_in_next_block: it will be nullptr if the entry being added is
+  //                           the last one in the table
+  //
+  // REQUIRES: Finish() has not yet been called.
+  virtual void AddIndexEntry(std::string* last_key_in_current_block,
+                             const Slice* first_key_in_next_block,
+                             const BlockHandle& block_handle) = 0;
+
+  // This method will be called whenever a key is added. The subclasses may
+  // override OnKeyAdded() if they need to collect additional information.
+  virtual void OnKeyAdded(const Slice& /*key*/) {}
+
+  // Inform the index builder that all entries has been written. Block builder
+  // may therefore perform any operation required for block finalization.
+  //
+  // REQUIRES: Finish() has not yet been called.
+  inline Status Finish(IndexBlocks* index_blocks) {
+    // Throw away the changes to last_partition_block_handle. It has no effect
+    // on the first call to Finish anyway.
+    BlockHandle last_partition_block_handle;
+    return Finish(index_blocks, last_partition_block_handle);
+  }
+
+  // This override of Finish can be utilized to build the 2nd level index in
+  // PartitionIndexBuilder.
+  //
+  // index_blocks will be filled with the resulting index data. If the return
+  // value is Status::InComplete() then it means that the index is partitioned
+  // and the callee should keep calling Finish until Status::OK() is returned.
+  // In that case, last_partition_block_handle is pointer to the block written
+  // with the result of the last call to Finish. This can be utilized to build
+  // the second level index pointing to each block of partitioned indexes. The
+  // last call to Finish() that returns Status::OK() populates index_blocks with
+  // the 2nd level index content.
+  virtual Status Finish(IndexBlocks* index_blocks,
+                        const BlockHandle& last_partition_block_handle) = 0;
+
+  // Get the size for index block. Must be called after ::Finish.
+  virtual size_t IndexSize() const = 0;
+
+  virtual bool seperator_is_key_plus_seq() { return true; }
+
+ protected:
+  const InternalKeyComparator* comparator_;
+  // Set after ::Finish is called
+  size_t index_size_ = 0;
+};
+
+// This index builder builds space-efficient index block.
+//
+// Optimizations:
+//  1. Made block's `block_restart_interval` to be 1, which will avoid linear
+//     search when doing index lookup (can be disabled by setting
+//     index_block_restart_interval).
+//  2. Shorten the key length for index block. Other than honestly using the
+//     last key in the data block as the index key, we instead find a shortest
+//     substitute key that serves the same function.
+class ShortenedIndexBuilder : public IndexBuilder {
+ public:
+  explicit ShortenedIndexBuilder(
+      const InternalKeyComparator* comparator,
+      const int index_block_restart_interval, const uint32_t format_version,
+      const bool use_value_delta_encoding,
+      BlockBasedTableOptions::IndexShorteningMode shortening_mode,
+      bool include_first_key)
+      : IndexBuilder(comparator),
+        index_block_builder_(index_block_restart_interval,
+                             true /*use_delta_encoding*/,
+                             use_value_delta_encoding),
+        index_block_builder_without_seq_(index_block_restart_interval,
+                                         true /*use_delta_encoding*/,
+                                         use_value_delta_encoding),
+        use_value_delta_encoding_(use_value_delta_encoding),
+        include_first_key_(include_first_key),
+        shortening_mode_(shortening_mode) {
+    // Making the default true will disable the feature for old versions
+    seperator_is_key_plus_seq_ = (format_version <= 2);
+  }
+
+  virtual void OnKeyAdded(const Slice& key) override {
+    if (include_first_key_ && current_block_first_internal_key_.empty()) {
+      current_block_first_internal_key_.assign(key.data(), key.size());
+    }
+  }
+
+  virtual void AddIndexEntry(std::string* last_key_in_current_block,
+                             const Slice* first_key_in_next_block,
+                             const BlockHandle& block_handle) override {
+    if (first_key_in_next_block != nullptr) {
+      if (shortening_mode_ !=
+          BlockBasedTableOptions::IndexShorteningMode::kNoShortening) {
+        FindShortestInternalKeySeparator(*comparator_->user_comparator(),
+                                         last_key_in_current_block,
+                                         *first_key_in_next_block);
+      }
+      if (!seperator_is_key_plus_seq_ &&
+          comparator_->user_comparator()->Compare(
+              ExtractUserKey(*last_key_in_current_block),
+              ExtractUserKey(*first_key_in_next_block)) == 0) {
+        seperator_is_key_plus_seq_ = true;
+      }
+    } else {
+      if (shortening_mode_ == BlockBasedTableOptions::IndexShorteningMode::
+                                  kShortenSeparatorsAndSuccessor) {
+        FindShortInternalKeySuccessor(*comparator_->user_comparator(),
+                                      last_key_in_current_block);
+      }
+    }
+    auto sep = Slice(*last_key_in_current_block);
+
+    assert(!include_first_key_ || !current_block_first_internal_key_.empty());
+    IndexValue entry(block_handle, current_block_first_internal_key_);
+    std::string encoded_entry;
+    std::string delta_encoded_entry;
+    entry.EncodeTo(&encoded_entry, include_first_key_, nullptr);
+    if (use_value_delta_encoding_ && !last_encoded_handle_.IsNull()) {
+      entry.EncodeTo(&delta_encoded_entry, include_first_key_,
+                     &last_encoded_handle_);
+    } else {
+      // If it's the first block, or delta encoding is disabled,
+      // BlockBuilder::Add() below won't use delta-encoded slice.
+    }
+    last_encoded_handle_ = block_handle;
+    const Slice delta_encoded_entry_slice(delta_encoded_entry);
+    index_block_builder_.Add(sep, encoded_entry, &delta_encoded_entry_slice);
+    if (!seperator_is_key_plus_seq_) {
+      index_block_builder_without_seq_.Add(ExtractUserKey(sep), encoded_entry,
+                                           &delta_encoded_entry_slice);
+    }
+
+    current_block_first_internal_key_.clear();
+  }
+
+  using IndexBuilder::Finish;
+  virtual Status Finish(
+      IndexBlocks* index_blocks,
+      const BlockHandle& /*last_partition_block_handle*/) override {
+    if (seperator_is_key_plus_seq_) {
+      index_blocks->index_block_contents = index_block_builder_.Finish();
+    } else {
+      index_blocks->index_block_contents =
+          index_block_builder_without_seq_.Finish();
+    }
+    index_size_ = index_blocks->index_block_contents.size();
+    return Status::OK();
+  }
+
+  virtual size_t IndexSize() const override { return index_size_; }
+
+  virtual bool seperator_is_key_plus_seq() override {
+    return seperator_is_key_plus_seq_;
+  }
+
+  // Changes *key to a short string >= *key.
+  //
+  static void FindShortestInternalKeySeparator(const Comparator& comparator,
+                                               std::string* start,
+                                               const Slice& limit);
+
+  static void FindShortInternalKeySuccessor(const Comparator& comparator,
+                                            std::string* key);
+
+  friend class PartitionedIndexBuilder;
+
+ private:
+  BlockBuilder index_block_builder_;
+  BlockBuilder index_block_builder_without_seq_;
+  const bool use_value_delta_encoding_;
+  bool seperator_is_key_plus_seq_;
+  const bool include_first_key_;
+  BlockBasedTableOptions::IndexShorteningMode shortening_mode_;
+  BlockHandle last_encoded_handle_ = BlockHandle::NullBlockHandle();
+  std::string current_block_first_internal_key_;
+};
+
+// HashIndexBuilder contains a binary-searchable primary index and the
+// metadata for secondary hash index construction.
+// The metadata for hash index consists two parts:
+//  - a metablock that compactly contains a sequence of prefixes. All prefixes
+//    are stored consectively without any metadata (like, prefix sizes) being
+//    stored, which is kept in the other metablock.
+//  - a metablock contains the metadata of the prefixes, including prefix size,
+//    restart index and number of block it spans. The format looks like:
+//
+// +-----------------+---------------------------+---------------------+
+// <=prefix 1
+// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
+// +-----------------+---------------------------+---------------------+
+// <=prefix 2
+// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
+// +-----------------+---------------------------+---------------------+
+// |                                                                   |
+// | ....                                                              |
+// |                                                                   |
+// +-----------------+---------------------------+---------------------+
+// <=prefix n
+// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
+// +-----------------+---------------------------+---------------------+
+//
+// The reason of separating these two metablocks is to enable the efficiently
+// reuse the first metablock during hash index construction without unnecessary
+// data copy or small heap allocations for prefixes.
+class HashIndexBuilder : public IndexBuilder {
+ public:
+  explicit HashIndexBuilder(
+      const InternalKeyComparator* comparator,
+      const SliceTransform* hash_key_extractor,
+      int index_block_restart_interval, int format_version,
+      bool use_value_delta_encoding,
+      BlockBasedTableOptions::IndexShorteningMode shortening_mode)
+      : IndexBuilder(comparator),
+        primary_index_builder_(comparator, index_block_restart_interval,
+                               format_version, use_value_delta_encoding,
+                               shortening_mode, /* include_first_key */ false),
+        hash_key_extractor_(hash_key_extractor) {}
+
+  virtual void AddIndexEntry(std::string* last_key_in_current_block,
+                             const Slice* first_key_in_next_block,
+                             const BlockHandle& block_handle) override {
+    ++current_restart_index_;
+    primary_index_builder_.AddIndexEntry(last_key_in_current_block,
+                                         first_key_in_next_block, block_handle);
+  }
+
+  virtual void OnKeyAdded(const Slice& key) override {
+    auto key_prefix = hash_key_extractor_->Transform(key);
+    bool is_first_entry = pending_block_num_ == 0;
+
+    // Keys may share the prefix
+    if (is_first_entry || pending_entry_prefix_ != key_prefix) {
+      if (!is_first_entry) {
+        FlushPendingPrefix();
+      }
+
+      // need a hard copy otherwise the underlying data changes all the time.
+      // TODO(kailiu) std::to_string() is expensive. We may speed up can avoid
+      // data copy.
+      pending_entry_prefix_ = key_prefix.ToString();
+      pending_block_num_ = 1;
+      pending_entry_index_ = static_cast<uint32_t>(current_restart_index_);
+    } else {
+      // entry number increments when keys share the prefix reside in
+      // different data blocks.
+      auto last_restart_index = pending_entry_index_ + pending_block_num_ - 1;
+      assert(last_restart_index <= current_restart_index_);
+      if (last_restart_index != current_restart_index_) {
+        ++pending_block_num_;
+      }
+    }
+  }
+
+  virtual Status Finish(
+      IndexBlocks* index_blocks,
+      const BlockHandle& last_partition_block_handle) override {
+    if (pending_block_num_ != 0) {
+      FlushPendingPrefix();
+    }
+    Status s = primary_index_builder_.Finish(index_blocks,
+                                             last_partition_block_handle);
+    index_blocks->meta_blocks.insert(
+        {kHashIndexPrefixesBlock.c_str(), prefix_block_});
+    index_blocks->meta_blocks.insert(
+        {kHashIndexPrefixesMetadataBlock.c_str(), prefix_meta_block_});
+    return s;
+  }
+
+  virtual size_t IndexSize() const override {
+    return primary_index_builder_.IndexSize() + prefix_block_.size() +
+           prefix_meta_block_.size();
+  }
+
+  virtual bool seperator_is_key_plus_seq() override {
+    return primary_index_builder_.seperator_is_key_plus_seq();
+  }
+
+ private:
+  void FlushPendingPrefix() {
+    prefix_block_.append(pending_entry_prefix_.data(),
+                         pending_entry_prefix_.size());
+    PutVarint32Varint32Varint32(
+        &prefix_meta_block_,
+        static_cast<uint32_t>(pending_entry_prefix_.size()),
+        pending_entry_index_, pending_block_num_);
+  }
+
+  ShortenedIndexBuilder primary_index_builder_;
+  const SliceTransform* hash_key_extractor_;
+
+  // stores a sequence of prefixes
+  std::string prefix_block_;
+  // stores the metadata of prefixes
+  std::string prefix_meta_block_;
+
+  // The following 3 variables keeps unflushed prefix and its metadata.
+  // The details of block_num and entry_index can be found in
+  // "block_hash_index.{h,cc}"
+  uint32_t pending_block_num_ = 0;
+  uint32_t pending_entry_index_ = 0;
+  std::string pending_entry_prefix_;
+
+  uint64_t current_restart_index_ = 0;
+};
+
+/**
+ * IndexBuilder for two-level indexing. Internally it creates a new index for
+ * each partition and Finish then in order when Finish is called on it
+ * continiously until Status::OK() is returned.
+ *
+ * The format on the disk would be I I I I I I IP where I is block containing a
+ * partition of indexes built using ShortenedIndexBuilder and IP is a block
+ * containing a secondary index on the partitions, built using
+ * ShortenedIndexBuilder.
+ */
+class PartitionedIndexBuilder : public IndexBuilder {
+ public:
+  static PartitionedIndexBuilder* CreateIndexBuilder(
+      const ROCKSDB_NAMESPACE::InternalKeyComparator* comparator,
+      const bool use_value_delta_encoding,
+      const BlockBasedTableOptions& table_opt);
+
+  explicit PartitionedIndexBuilder(const InternalKeyComparator* comparator,
+                                   const BlockBasedTableOptions& table_opt,
+                                   const bool use_value_delta_encoding);
+
+  virtual ~PartitionedIndexBuilder();
+
+  virtual void AddIndexEntry(std::string* last_key_in_current_block,
+                             const Slice* first_key_in_next_block,
+                             const BlockHandle& block_handle) override;
+
+  virtual Status Finish(
+      IndexBlocks* index_blocks,
+      const BlockHandle& last_partition_block_handle) override;
+
+  virtual size_t IndexSize() const override { return index_size_; }
+  size_t TopLevelIndexSize(uint64_t) const { return top_level_index_size_; }
+  size_t NumPartitions() const;
+
+  inline bool ShouldCutFilterBlock() {
+    // Current policy is to align the partitions of index and filters
+    if (cut_filter_block) {
+      cut_filter_block = false;
+      return true;
+    }
+    return false;
+  }
+
+  std::string& GetPartitionKey() { return sub_index_last_key_; }
+
+  // Called when an external entity (such as filter partition builder) request
+  // cutting the next partition
+  void RequestPartitionCut();
+
+  virtual bool seperator_is_key_plus_seq() override {
+    return seperator_is_key_plus_seq_;
+  }
+
+  bool get_use_value_delta_encoding() { return use_value_delta_encoding_; }
+
+ private:
+  // Set after ::Finish is called
+  size_t top_level_index_size_ = 0;
+  // Set after ::Finish is called
+  size_t partition_cnt_ = 0;
+
+  void MakeNewSubIndexBuilder();
+
+  struct Entry {
+    std::string key;
+    std::unique_ptr<ShortenedIndexBuilder> value;
+  };
+  std::list<Entry> entries_;  // list of partitioned indexes and their keys
+  BlockBuilder index_block_builder_;              // top-level index builder
+  BlockBuilder index_block_builder_without_seq_;  // same for user keys
+  // the active partition index builder
+  ShortenedIndexBuilder* sub_index_builder_;
+  // the last key in the active partition index builder
+  std::string sub_index_last_key_;
+  std::unique_ptr<FlushBlockPolicy> flush_policy_;
+  // true if Finish is called once but not complete yet.
+  bool finishing_indexes = false;
+  const BlockBasedTableOptions& table_opt_;
+  bool seperator_is_key_plus_seq_;
+  bool use_value_delta_encoding_;
+  // true if an external entity (such as filter partition builder) request
+  // cutting the next partition
+  bool partition_cut_requested_ = true;
+  // true if it should cut the next filter partition block
+  bool cut_filter_block = false;
+  BlockHandle last_encoded_handle_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_reader_common.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_reader_common.cc
new file mode 100644
index 0000000000..828200299d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_reader_common.cc
@@ -0,0 +1,57 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "table/block_based/index_reader_common.h"
+
+#include "block_cache.h"
+
+namespace ROCKSDB_NAMESPACE {
+Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock(
+    const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer,
+    const ReadOptions& read_options, bool use_cache, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context,
+    CachableEntry<Block>* index_block) {
+  PERF_TIMER_GUARD(read_index_block_nanos);
+
+  assert(table != nullptr);
+  assert(index_block != nullptr);
+  assert(index_block->IsEmpty());
+
+  const Rep* const rep = table->get_rep();
+  assert(rep != nullptr);
+
+  const Status s = table->RetrieveBlock(
+      prefetch_buffer, read_options, rep->footer.index_handle(),
+      UncompressionDict::GetEmptyDict(), &index_block->As<Block_kIndex>(),
+      get_context, lookup_context, /* for_compaction */ false, use_cache,
+      /* async_read */ false);
+
+  return s;
+}
+
+Status BlockBasedTable::IndexReaderCommon::GetOrReadIndexBlock(
+    bool no_io, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context, CachableEntry<Block>* index_block,
+    const ReadOptions& ro) const {
+  assert(index_block != nullptr);
+
+  if (!index_block_.IsEmpty()) {
+    index_block->SetUnownedValue(index_block_.GetValue());
+    return Status::OK();
+  }
+
+  ReadOptions read_options = ro;
+  if (no_io) {
+    read_options.read_tier = kBlockCacheTier;
+  }
+
+  return ReadIndexBlock(table_, /*prefetch_buffer=*/nullptr, read_options,
+                        cache_index_blocks(), get_context, lookup_context,
+                        index_block);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_reader_common.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_reader_common.h
new file mode 100644
index 0000000000..d3827fc6bc
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/index_reader_common.h
@@ -0,0 +1,85 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include "table/block_based/block_based_table_reader.h"
+#include "table/block_based/reader_common.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Encapsulates common functionality for the various index reader
+// implementations. Provides access to the index block regardless of whether
+// it is owned by the reader or stored in the cache, or whether it is pinned
+// in the cache or not.
+class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader {
+ public:
+  IndexReaderCommon(const BlockBasedTable* t,
+                    CachableEntry<Block>&& index_block)
+      : table_(t), index_block_(std::move(index_block)) {
+    assert(table_ != nullptr);
+  }
+
+ protected:
+  static Status ReadIndexBlock(const BlockBasedTable* table,
+                               FilePrefetchBuffer* prefetch_buffer,
+                               const ReadOptions& read_options, bool use_cache,
+                               GetContext* get_context,
+                               BlockCacheLookupContext* lookup_context,
+                               CachableEntry<Block>* index_block);
+
+  const BlockBasedTable* table() const { return table_; }
+
+  const InternalKeyComparator* internal_comparator() const {
+    assert(table_ != nullptr);
+    assert(table_->get_rep() != nullptr);
+
+    return &table_->get_rep()->internal_comparator;
+  }
+
+  bool index_has_first_key() const {
+    assert(table_ != nullptr);
+    assert(table_->get_rep() != nullptr);
+    return table_->get_rep()->index_has_first_key;
+  }
+
+  bool index_key_includes_seq() const {
+    assert(table_ != nullptr);
+    assert(table_->get_rep() != nullptr);
+    return table_->get_rep()->index_key_includes_seq;
+  }
+
+  bool index_value_is_full() const {
+    assert(table_ != nullptr);
+    assert(table_->get_rep() != nullptr);
+    return table_->get_rep()->index_value_is_full;
+  }
+
+  bool cache_index_blocks() const {
+    assert(table_ != nullptr);
+    assert(table_->get_rep() != nullptr);
+    return table_->get_rep()->table_options.cache_index_and_filter_blocks;
+  }
+
+  Status GetOrReadIndexBlock(bool no_io, GetContext* get_context,
+                             BlockCacheLookupContext* lookup_context,
+                             CachableEntry<Block>* index_block,
+                             const ReadOptions& read_options) const;
+
+  size_t ApproximateIndexBlockMemoryUsage() const {
+    assert(!index_block_.GetOwnValue() || index_block_.GetValue() != nullptr);
+    return index_block_.GetOwnValue()
+               ? index_block_.GetValue()->ApproximateMemoryUsage()
+               : 0;
+  }
+
+ private:
+  const BlockBasedTable* table_;
+  CachableEntry<Block> index_block_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/mock_block_based_table.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/mock_block_based_table.h
new file mode 100644
index 0000000000..13f3dfaee1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/mock_block_based_table.h
@@ -0,0 +1,62 @@
+//  Copyright (c) 2019-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <memory>
+
+#include "rocksdb/filter_policy.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "table/block_based/filter_policy_internal.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace mock {
+
+class MockBlockBasedTable : public BlockBasedTable {
+ public:
+  explicit MockBlockBasedTable(Rep* rep)
+      : BlockBasedTable(rep, nullptr /* block_cache_tracer */) {}
+};
+
+class MockBlockBasedTableTester {
+  static constexpr int kMockLevel = 0;
+
+ public:
+  Options options_;
+  ImmutableOptions ioptions_;
+  EnvOptions env_options_;
+  BlockBasedTableOptions table_options_;
+  InternalKeyComparator icomp_;
+  std::unique_ptr<BlockBasedTable> table_;
+
+  explicit MockBlockBasedTableTester(const FilterPolicy* filter_policy)
+      : MockBlockBasedTableTester(
+            std::shared_ptr<const FilterPolicy>(filter_policy)){};
+
+  explicit MockBlockBasedTableTester(
+      std::shared_ptr<const FilterPolicy> filter_policy)
+      : ioptions_(options_),
+        env_options_(options_),
+        icomp_(options_.comparator) {
+    table_options_.filter_policy = std::move(filter_policy);
+
+    constexpr bool skip_filters = false;
+    constexpr bool immortal_table = false;
+    table_.reset(new MockBlockBasedTable(new BlockBasedTable::Rep(
+        ioptions_, env_options_, table_options_, icomp_, skip_filters,
+        12345 /*file_size*/, kMockLevel, immortal_table)));
+  }
+
+  FilterBitsBuilder* GetBuilder() const {
+    FilterBuildingContext context(table_options_);
+    context.column_family_name = "mock_cf";
+    context.compaction_style = ioptions_.compaction_style;
+    context.level_at_creation = kMockLevel;
+    context.info_log = ioptions_.logger;
+    return BloomFilterPolicy::GetBuilderFromContext(context);
+  }
+};
+
+}  // namespace mock
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/parsed_full_filter_block.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/parsed_full_filter_block.cc
new file mode 100644
index 0000000000..9184a48d23
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/parsed_full_filter_block.cc
@@ -0,0 +1,23 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include "table/block_based/parsed_full_filter_block.h"
+
+#include "table/block_based/filter_policy_internal.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+ParsedFullFilterBlock::ParsedFullFilterBlock(const FilterPolicy* filter_policy,
+                                             BlockContents&& contents)
+    : block_contents_(std::move(contents)),
+      filter_bits_reader_(
+          !block_contents_.data.empty()
+              ? filter_policy->GetFilterBitsReader(block_contents_.data)
+              : nullptr) {}
+
+ParsedFullFilterBlock::~ParsedFullFilterBlock() = default;
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/parsed_full_filter_block.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/parsed_full_filter_block.h
new file mode 100644
index 0000000000..8d81868d1b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/parsed_full_filter_block.h
@@ -0,0 +1,47 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <memory>
+
+#include "table/block_based/block_type.h"
+#include "table/format.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class FilterBitsReader;
+class FilterPolicy;
+
+// The sharable/cachable part of the full filter.
+class ParsedFullFilterBlock {
+ public:
+  ParsedFullFilterBlock(const FilterPolicy* filter_policy,
+                        BlockContents&& contents);
+  ~ParsedFullFilterBlock();
+
+  FilterBitsReader* filter_bits_reader() const {
+    return filter_bits_reader_.get();
+  }
+
+  // TODO: consider memory usage of the FilterBitsReader
+  size_t ApproximateMemoryUsage() const {
+    return block_contents_.ApproximateMemoryUsage();
+  }
+
+  bool own_bytes() const { return block_contents_.own_bytes(); }
+
+  // For TypedCacheInterface
+  const Slice& ContentSlice() const { return block_contents_.data; }
+  static constexpr CacheEntryRole kCacheEntryRole =
+      CacheEntryRole::kFilterBlock;
+  static constexpr BlockType kBlockType = BlockType::kFilter;
+
+ private:
+  BlockContents block_contents_;
+  std::unique_ptr<FilterBitsReader> filter_bits_reader_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_filter_block.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_filter_block.cc
new file mode 100644
index 0000000000..daefa49f4e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_filter_block.cc
@@ -0,0 +1,554 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "table/block_based/partitioned_filter_block.h"
+
+#include <utility>
+
+#include "block_cache.h"
+#include "block_type.h"
+#include "file/random_access_file_reader.h"
+#include "logging/logging.h"
+#include "monitoring/perf_context_imp.h"
+#include "port/malloc.h"
+#include "port/port.h"
+#include "rocksdb/filter_policy.h"
+#include "table/block_based/block.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+PartitionedFilterBlockBuilder::PartitionedFilterBlockBuilder(
+    const SliceTransform* _prefix_extractor, bool whole_key_filtering,
+    FilterBitsBuilder* filter_bits_builder, int index_block_restart_interval,
+    const bool use_value_delta_encoding,
+    PartitionedIndexBuilder* const p_index_builder,
+    const uint32_t partition_size)
+    : FullFilterBlockBuilder(_prefix_extractor, whole_key_filtering,
+                             filter_bits_builder),
+      index_on_filter_block_builder_(index_block_restart_interval,
+                                     true /*use_delta_encoding*/,
+                                     use_value_delta_encoding),
+      index_on_filter_block_builder_without_seq_(index_block_restart_interval,
+                                                 true /*use_delta_encoding*/,
+                                                 use_value_delta_encoding),
+      p_index_builder_(p_index_builder),
+      keys_added_to_partition_(0),
+      total_added_in_built_(0) {
+  keys_per_partition_ = static_cast<uint32_t>(
+      filter_bits_builder_->ApproximateNumEntries(partition_size));
+  if (keys_per_partition_ < 1) {
+    // partition_size (minus buffer, ~10%) might be smaller than minimum
+    // filter size, sometimes based on cache line size. Try to find that
+    // minimum size without CalculateSpace (not necessarily available).
+    uint32_t larger = std::max(partition_size + 4, uint32_t{16});
+    for (;;) {
+      keys_per_partition_ = static_cast<uint32_t>(
+          filter_bits_builder_->ApproximateNumEntries(larger));
+      if (keys_per_partition_ >= 1) {
+        break;
+      }
+      larger += larger / 4;
+      if (larger > 100000) {
+        // might be a broken implementation. substitute something reasonable:
+        // 1 key / byte.
+        keys_per_partition_ = partition_size;
+        break;
+      }
+    }
+  }
+}
+
+PartitionedFilterBlockBuilder::~PartitionedFilterBlockBuilder() {
+  partitioned_filters_construction_status_.PermitUncheckedError();
+}
+
+void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock(
+    const Slice* next_key) {
+  // Use == to send the request only once
+  if (keys_added_to_partition_ == keys_per_partition_) {
+    // Currently only index builder is in charge of cutting a partition. We keep
+    // requesting until it is granted.
+    p_index_builder_->RequestPartitionCut();
+  }
+  if (!p_index_builder_->ShouldCutFilterBlock()) {
+    return;
+  }
+
+  // Add the prefix of the next key before finishing the partition without
+  // updating last_prefix_str_. This hack, fixes a bug with format_verison=3
+  // where seeking for the prefix would lead us to the previous partition.
+  const bool maybe_add_prefix =
+      next_key && prefix_extractor() && prefix_extractor()->InDomain(*next_key);
+  if (maybe_add_prefix) {
+    const Slice next_key_prefix = prefix_extractor()->Transform(*next_key);
+    if (next_key_prefix.compare(last_prefix_str()) != 0) {
+      AddKey(next_key_prefix);
+    }
+  }
+
+  total_added_in_built_ += filter_bits_builder_->EstimateEntriesAdded();
+  std::unique_ptr<const char[]> filter_data;
+  Status filter_construction_status = Status::OK();
+  Slice filter =
+      filter_bits_builder_->Finish(&filter_data, &filter_construction_status);
+  if (filter_construction_status.ok()) {
+    filter_construction_status = filter_bits_builder_->MaybePostVerify(filter);
+  }
+  std::string& index_key = p_index_builder_->GetPartitionKey();
+  filters.push_back({index_key, std::move(filter_data), filter});
+  if (!filter_construction_status.ok() &&
+      partitioned_filters_construction_status_.ok()) {
+    partitioned_filters_construction_status_ = filter_construction_status;
+  }
+  keys_added_to_partition_ = 0;
+  Reset();
+}
+
+void PartitionedFilterBlockBuilder::Add(const Slice& key) {
+  MaybeCutAFilterBlock(&key);
+  FullFilterBlockBuilder::Add(key);
+}
+
+void PartitionedFilterBlockBuilder::AddKey(const Slice& key) {
+  FullFilterBlockBuilder::AddKey(key);
+  keys_added_to_partition_++;
+}
+
+size_t PartitionedFilterBlockBuilder::EstimateEntriesAdded() {
+  return total_added_in_built_ + filter_bits_builder_->EstimateEntriesAdded();
+}
+
+Slice PartitionedFilterBlockBuilder::Finish(
+    const BlockHandle& last_partition_block_handle, Status* status,
+    std::unique_ptr<const char[]>* filter_data) {
+  if (finishing_filters == true) {
+    // Record the handle of the last written filter block in the index
+    std::string handle_encoding;
+    last_partition_block_handle.EncodeTo(&handle_encoding);
+    std::string handle_delta_encoding;
+    PutVarsignedint64(
+        &handle_delta_encoding,
+        last_partition_block_handle.size() - last_encoded_handle_.size());
+    last_encoded_handle_ = last_partition_block_handle;
+    const Slice handle_delta_encoding_slice(handle_delta_encoding);
+    index_on_filter_block_builder_.Add(last_filter_entry_key, handle_encoding,
+                                       &handle_delta_encoding_slice);
+    if (!p_index_builder_->seperator_is_key_plus_seq()) {
+      index_on_filter_block_builder_without_seq_.Add(
+          ExtractUserKey(last_filter_entry_key), handle_encoding,
+          &handle_delta_encoding_slice);
+    }
+  } else {
+    MaybeCutAFilterBlock(nullptr);
+  }
+
+  if (!partitioned_filters_construction_status_.ok()) {
+    *status = partitioned_filters_construction_status_;
+    return Slice();
+  }
+
+  // If there is no filter partition left, then return the index on filter
+  // partitions
+  if (UNLIKELY(filters.empty())) {
+    *status = Status::OK();
+    last_filter_data.reset();
+    if (finishing_filters) {
+      // Simplest to just add them all at the end
+      total_added_in_built_ = 0;
+      if (p_index_builder_->seperator_is_key_plus_seq()) {
+        return index_on_filter_block_builder_.Finish();
+      } else {
+        return index_on_filter_block_builder_without_seq_.Finish();
+      }
+    } else {
+      // This is the rare case where no key was added to the filter
+      return Slice();
+    }
+  } else {
+    // Return the next filter partition in line and set Incomplete() status to
+    // indicate we expect more calls to Finish
+    *status = Status::Incomplete();
+    finishing_filters = true;
+
+    last_filter_entry_key = filters.front().key;
+    Slice filter = filters.front().filter;
+    last_filter_data = std::move(filters.front().filter_data);
+    if (filter_data != nullptr) {
+      *filter_data = std::move(last_filter_data);
+    }
+    filters.pop_front();
+    return filter;
+  }
+}
+
+PartitionedFilterBlockReader::PartitionedFilterBlockReader(
+    const BlockBasedTable* t,
+    CachableEntry<Block_kFilterPartitionIndex>&& filter_block)
+    : FilterBlockReaderCommon(t, std::move(filter_block)) {}
+
+std::unique_ptr<FilterBlockReader> PartitionedFilterBlockReader::Create(
+    const BlockBasedTable* table, const ReadOptions& ro,
+    FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
+    bool pin, BlockCacheLookupContext* lookup_context) {
+  assert(table);
+  assert(table->get_rep());
+  assert(!pin || prefetch);
+
+  CachableEntry<Block_kFilterPartitionIndex> filter_block;
+  if (prefetch || !use_cache) {
+    const Status s = ReadFilterBlock(table, prefetch_buffer, ro, use_cache,
+                                     nullptr /* get_context */, lookup_context,
+                                     &filter_block);
+    if (!s.ok()) {
+      IGNORE_STATUS_IF_ERROR(s);
+      return std::unique_ptr<FilterBlockReader>();
+    }
+
+    if (use_cache && !pin) {
+      filter_block.Reset();
+    }
+  }
+
+  return std::unique_ptr<FilterBlockReader>(
+      new PartitionedFilterBlockReader(table, std::move(filter_block)));
+}
+
+bool PartitionedFilterBlockReader::KeyMayMatch(
+    const Slice& key, const bool no_io, const Slice* const const_ikey_ptr,
+    GetContext* get_context, BlockCacheLookupContext* lookup_context,
+    const ReadOptions& read_options) {
+  assert(const_ikey_ptr != nullptr);
+  if (!whole_key_filtering()) {
+    return true;
+  }
+
+  return MayMatch(key, no_io, const_ikey_ptr, get_context, lookup_context,
+                  read_options, &FullFilterBlockReader::KeyMayMatch);
+}
+
+void PartitionedFilterBlockReader::KeysMayMatch(
+    MultiGetRange* range, const bool no_io,
+    BlockCacheLookupContext* lookup_context, const ReadOptions& read_options) {
+  if (!whole_key_filtering()) {
+    return;  // Any/all may match
+  }
+
+  MayMatch(range, nullptr, no_io, lookup_context, read_options,
+           &FullFilterBlockReader::KeysMayMatch2);
+}
+
+bool PartitionedFilterBlockReader::PrefixMayMatch(
+    const Slice& prefix, const bool no_io, const Slice* const const_ikey_ptr,
+    GetContext* get_context, BlockCacheLookupContext* lookup_context,
+    const ReadOptions& read_options) {
+  assert(const_ikey_ptr != nullptr);
+  return MayMatch(prefix, no_io, const_ikey_ptr, get_context, lookup_context,
+                  read_options, &FullFilterBlockReader::PrefixMayMatch);
+}
+
+void PartitionedFilterBlockReader::PrefixesMayMatch(
+    MultiGetRange* range, const SliceTransform* prefix_extractor,
+    const bool no_io, BlockCacheLookupContext* lookup_context,
+    const ReadOptions& read_options) {
+  assert(prefix_extractor);
+  MayMatch(range, prefix_extractor, no_io, lookup_context, read_options,
+           &FullFilterBlockReader::PrefixesMayMatch);
+}
+
+BlockHandle PartitionedFilterBlockReader::GetFilterPartitionHandle(
+    const CachableEntry<Block_kFilterPartitionIndex>& filter_block,
+    const Slice& entry) const {
+  IndexBlockIter iter;
+  const InternalKeyComparator* const comparator = internal_comparator();
+  Statistics* kNullStats = nullptr;
+  filter_block.GetValue()->NewIndexIterator(
+      comparator->user_comparator(),
+      table()->get_rep()->get_global_seqno(BlockType::kFilterPartitionIndex),
+      &iter, kNullStats, true /* total_order_seek */,
+      false /* have_first_key */, index_key_includes_seq(),
+      index_value_is_full());
+  iter.Seek(entry);
+  if (UNLIKELY(!iter.Valid())) {
+    // entry is larger than all the keys. However its prefix might still be
+    // present in the last partition. If this is called by PrefixMayMatch this
+    // is necessary for correct behavior. Otherwise it is unnecessary but safe.
+    // Assuming this is an unlikely case for full key search, the performance
+    // overhead should be negligible.
+    iter.SeekToLast();
+  }
+  assert(iter.Valid());
+  BlockHandle fltr_blk_handle = iter.value().handle;
+  return fltr_blk_handle;
+}
+
+Status PartitionedFilterBlockReader::GetFilterPartitionBlock(
+    FilePrefetchBuffer* prefetch_buffer, const BlockHandle& fltr_blk_handle,
+    bool no_io, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context, const ReadOptions& _read_options,
+    CachableEntry<ParsedFullFilterBlock>* filter_block) const {
+  assert(table());
+  assert(filter_block);
+  assert(filter_block->IsEmpty());
+
+  if (!filter_map_.empty()) {
+    auto iter = filter_map_.find(fltr_blk_handle.offset());
+    // This is a possible scenario since block cache might not have had space
+    // for the partition
+    if (iter != filter_map_.end()) {
+      filter_block->SetUnownedValue(iter->second.GetValue());
+      return Status::OK();
+    }
+  }
+
+  ReadOptions read_options = _read_options;
+  if (no_io) {
+    read_options.read_tier = kBlockCacheTier;
+  }
+
+  const Status s =
+      table()->RetrieveBlock(prefetch_buffer, read_options, fltr_blk_handle,
+                             UncompressionDict::GetEmptyDict(), filter_block,
+                             get_context, lookup_context,
+                             /* for_compaction */ false, /* use_cache */ true,
+                             /* async_read */ false);
+
+  return s;
+}
+
+bool PartitionedFilterBlockReader::MayMatch(
+    const Slice& slice, bool no_io, const Slice* const_ikey_ptr,
+    GetContext* get_context, BlockCacheLookupContext* lookup_context,
+    const ReadOptions& read_options, FilterFunction filter_function) const {
+  CachableEntry<Block_kFilterPartitionIndex> filter_block;
+  Status s = GetOrReadFilterBlock(no_io, get_context, lookup_context,
+                                  &filter_block, read_options);
+  if (UNLIKELY(!s.ok())) {
+    IGNORE_STATUS_IF_ERROR(s);
+    return true;
+  }
+
+  if (UNLIKELY(filter_block.GetValue()->size() == 0)) {
+    return true;
+  }
+
+  auto filter_handle = GetFilterPartitionHandle(filter_block, *const_ikey_ptr);
+  if (UNLIKELY(filter_handle.size() == 0)) {  // key is out of range
+    return false;
+  }
+
+  CachableEntry<ParsedFullFilterBlock> filter_partition_block;
+  s = GetFilterPartitionBlock(nullptr /* prefetch_buffer */, filter_handle,
+                              no_io, get_context, lookup_context, read_options,
+                              &filter_partition_block);
+  if (UNLIKELY(!s.ok())) {
+    IGNORE_STATUS_IF_ERROR(s);
+    return true;
+  }
+
+  FullFilterBlockReader filter_partition(table(),
+                                         std::move(filter_partition_block));
+  return (filter_partition.*filter_function)(
+      slice, no_io, const_ikey_ptr, get_context, lookup_context, read_options);
+}
+
+void PartitionedFilterBlockReader::MayMatch(
+    MultiGetRange* range, const SliceTransform* prefix_extractor, bool no_io,
+    BlockCacheLookupContext* lookup_context, const ReadOptions& read_options,
+    FilterManyFunction filter_function) const {
+  CachableEntry<Block_kFilterPartitionIndex> filter_block;
+  Status s = GetOrReadFilterBlock(no_io, range->begin()->get_context,
+                                  lookup_context, &filter_block, read_options);
+  if (UNLIKELY(!s.ok())) {
+    IGNORE_STATUS_IF_ERROR(s);
+    return;  // Any/all may match
+  }
+
+  if (UNLIKELY(filter_block.GetValue()->size() == 0)) {
+    return;  // Any/all may match
+  }
+
+  auto start_iter_same_handle = range->begin();
+  BlockHandle prev_filter_handle = BlockHandle::NullBlockHandle();
+
+  // For all keys mapping to same partition (must be adjacent in sorted order)
+  // share block cache lookup and use full filter multiget on the partition
+  // filter.
+  for (auto iter = start_iter_same_handle; iter != range->end(); ++iter) {
+    // TODO: re-use one top-level index iterator
+    BlockHandle this_filter_handle =
+        GetFilterPartitionHandle(filter_block, iter->ikey);
+    if (!prev_filter_handle.IsNull() &&
+        this_filter_handle != prev_filter_handle) {
+      MultiGetRange subrange(*range, start_iter_same_handle, iter);
+      MayMatchPartition(&subrange, prefix_extractor, prev_filter_handle, no_io,
+                        lookup_context, read_options, filter_function);
+      range->AddSkipsFrom(subrange);
+      start_iter_same_handle = iter;
+    }
+    if (UNLIKELY(this_filter_handle.size() == 0)) {  // key is out of range
+      // Not reachable with current behavior of GetFilterPartitionHandle
+      assert(false);
+      range->SkipKey(iter);
+      prev_filter_handle = BlockHandle::NullBlockHandle();
+    } else {
+      prev_filter_handle = this_filter_handle;
+    }
+  }
+  if (!prev_filter_handle.IsNull()) {
+    MultiGetRange subrange(*range, start_iter_same_handle, range->end());
+    MayMatchPartition(&subrange, prefix_extractor, prev_filter_handle, no_io,
+                      lookup_context, read_options, filter_function);
+    range->AddSkipsFrom(subrange);
+  }
+}
+
+void PartitionedFilterBlockReader::MayMatchPartition(
+    MultiGetRange* range, const SliceTransform* prefix_extractor,
+    BlockHandle filter_handle, bool no_io,
+    BlockCacheLookupContext* lookup_context, const ReadOptions& read_options,
+    FilterManyFunction filter_function) const {
+  CachableEntry<ParsedFullFilterBlock> filter_partition_block;
+  Status s = GetFilterPartitionBlock(
+      nullptr /* prefetch_buffer */, filter_handle, no_io,
+      range->begin()->get_context, lookup_context, read_options,
+      &filter_partition_block);
+  if (UNLIKELY(!s.ok())) {
+    IGNORE_STATUS_IF_ERROR(s);
+    return;  // Any/all may match
+  }
+
+  FullFilterBlockReader filter_partition(table(),
+                                         std::move(filter_partition_block));
+  (filter_partition.*filter_function)(range, prefix_extractor, no_io,
+                                      lookup_context, read_options);
+}
+
+size_t PartitionedFilterBlockReader::ApproximateMemoryUsage() const {
+  size_t usage = ApproximateFilterBlockMemoryUsage();
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+  usage += malloc_usable_size(const_cast<PartitionedFilterBlockReader*>(this));
+#else
+  usage += sizeof(*this);
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+  return usage;
+  // TODO(myabandeh): better estimation for filter_map_ size
+}
+
+// TODO(myabandeh): merge this with the same function in IndexReader
+Status PartitionedFilterBlockReader::CacheDependencies(
+    const ReadOptions& ro, bool pin, FilePrefetchBuffer* tail_prefetch_buffer) {
+  assert(table());
+
+  const BlockBasedTable::Rep* const rep = table()->get_rep();
+  assert(rep);
+
+  BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch};
+
+  CachableEntry<Block_kFilterPartitionIndex> filter_block;
+
+  Status s = GetOrReadFilterBlock(false /* no_io */, nullptr /* get_context */,
+                                  &lookup_context, &filter_block, ro);
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(rep->ioptions.logger,
+                    "Error retrieving top-level filter block while trying to "
+                    "cache filter partitions: %s",
+                    s.ToString().c_str());
+    return s;
+  }
+
+  // Before read partitions, prefetch them to avoid lots of IOs
+  assert(filter_block.GetValue());
+
+  IndexBlockIter biter;
+  const InternalKeyComparator* const comparator = internal_comparator();
+  Statistics* kNullStats = nullptr;
+  filter_block.GetValue()->NewIndexIterator(
+      comparator->user_comparator(),
+      rep->get_global_seqno(BlockType::kFilterPartitionIndex), &biter,
+      kNullStats, true /* total_order_seek */, false /* have_first_key */,
+      index_key_includes_seq(), index_value_is_full());
+  // Index partitions are assumed to be consecuitive. Prefetch them all.
+  // Read the first block offset
+  biter.SeekToFirst();
+  BlockHandle handle = biter.value().handle;
+  uint64_t prefetch_off = handle.offset();
+
+  // Read the last block's offset
+  biter.SeekToLast();
+  handle = biter.value().handle;
+  uint64_t last_off =
+      handle.offset() + handle.size() + BlockBasedTable::kBlockTrailerSize;
+  uint64_t prefetch_len = last_off - prefetch_off;
+  std::unique_ptr<FilePrefetchBuffer> prefetch_buffer;
+  if (tail_prefetch_buffer == nullptr || !tail_prefetch_buffer->Enabled() ||
+      tail_prefetch_buffer->GetPrefetchOffset() > prefetch_off) {
+    rep->CreateFilePrefetchBuffer(
+        0, 0, &prefetch_buffer, false /* Implicit autoreadahead */,
+        0 /*num_reads_*/, 0 /*num_file_reads_for_auto_readahead*/);
+
+    IOOptions opts;
+    s = rep->file->PrepareIOOptions(ro, opts);
+    if (s.ok()) {
+      s = prefetch_buffer->Prefetch(opts, rep->file.get(), prefetch_off,
+                                    static_cast<size_t>(prefetch_len),
+                                    ro.rate_limiter_priority);
+    }
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  // After prefetch, read the partitions one by one
+  for (biter.SeekToFirst(); biter.Valid(); biter.Next()) {
+    handle = biter.value().handle;
+
+    CachableEntry<ParsedFullFilterBlock> block;
+    // TODO: Support counter batch update for partitioned index and
+    // filter blocks
+    s = table()->MaybeReadBlockAndLoadToCache(
+        prefetch_buffer ? prefetch_buffer.get() : tail_prefetch_buffer, ro,
+        handle, UncompressionDict::GetEmptyDict(),
+        /* for_compaction */ false, &block, nullptr /* get_context */,
+        &lookup_context, nullptr /* contents */, false);
+    if (!s.ok()) {
+      return s;
+    }
+    assert(s.ok() || block.GetValue() == nullptr);
+
+    if (block.GetValue() != nullptr) {
+      if (block.IsCached()) {
+        if (pin) {
+          filter_map_[handle.offset()] = std::move(block);
+        }
+      }
+    }
+  }
+  return biter.status();
+}
+
+const InternalKeyComparator* PartitionedFilterBlockReader::internal_comparator()
+    const {
+  assert(table());
+  assert(table()->get_rep());
+
+  return &table()->get_rep()->internal_comparator;
+}
+
+bool PartitionedFilterBlockReader::index_key_includes_seq() const {
+  assert(table());
+  assert(table()->get_rep());
+
+  return table()->get_rep()->index_key_includes_seq;
+}
+
+bool PartitionedFilterBlockReader::index_value_is_full() const {
+  assert(table());
+  assert(table()->get_rep());
+
+  return table()->get_rep()->index_value_is_full;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_filter_block.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_filter_block.h
new file mode 100644
index 0000000000..0e3b4a5b34
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_filter_block.h
@@ -0,0 +1,182 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <deque>
+#include <list>
+#include <string>
+#include <unordered_map>
+
+#include "block_cache.h"
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/slice_transform.h"
+#include "table/block_based/block.h"
+#include "table/block_based/filter_block_reader_common.h"
+#include "table/block_based/full_filter_block.h"
+#include "table/block_based/index_builder.h"
+#include "util/autovector.h"
+#include "util/hash_containers.h"
+
+namespace ROCKSDB_NAMESPACE {
+class InternalKeyComparator;
+
+class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
+ public:
+  explicit PartitionedFilterBlockBuilder(
+      const SliceTransform* prefix_extractor, bool whole_key_filtering,
+      FilterBitsBuilder* filter_bits_builder, int index_block_restart_interval,
+      const bool use_value_delta_encoding,
+      PartitionedIndexBuilder* const p_index_builder,
+      const uint32_t partition_size);
+
+  virtual ~PartitionedFilterBlockBuilder();
+
+  void AddKey(const Slice& key) override;
+  void Add(const Slice& key) override;
+  size_t EstimateEntriesAdded() override;
+
+  virtual Slice Finish(
+      const BlockHandle& last_partition_block_handle, Status* status,
+      std::unique_ptr<const char[]>* filter_data = nullptr) override;
+
+  virtual void ResetFilterBitsBuilder() override {
+    // Previously constructed partitioned filters by
+    // this to-be-reset FiterBitsBuilder can also be
+    // cleared
+    filters.clear();
+    FullFilterBlockBuilder::ResetFilterBitsBuilder();
+  }
+
+  // For PartitionFilter, optional post-verifing the filter is done
+  // as part of PartitionFilterBlockBuilder::Finish
+  // to avoid implementation complexity of doing it elsewhere.
+  // Therefore we are skipping it in here.
+  virtual Status MaybePostVerifyFilter(
+      const Slice& /* filter_content */) override {
+    return Status::OK();
+  }
+
+ private:
+  // Filter data
+  BlockBuilder index_on_filter_block_builder_;  // top-level index builder
+  BlockBuilder
+      index_on_filter_block_builder_without_seq_;  // same for user keys
+  struct FilterEntry {
+    std::string key;
+    std::unique_ptr<const char[]> filter_data;
+    Slice filter;
+  };
+  std::deque<FilterEntry> filters;  // list of partitioned filters and keys used
+                                    // in building the index
+
+  // Set to the first non-okay status if any of the filter
+  // partitions experiences construction error.
+  // If partitioned_filters_construction_status_ is non-okay,
+  // then the whole partitioned filters should not be used.
+  Status partitioned_filters_construction_status_;
+  std::string last_filter_entry_key;
+  std::unique_ptr<const char[]> last_filter_data;
+  std::unique_ptr<IndexBuilder> value;
+  bool finishing_filters =
+      false;  // true if Finish is called once but not complete yet.
+  // The policy of when cut a filter block and Finish it
+  void MaybeCutAFilterBlock(const Slice* next_key);
+  // Currently we keep the same number of partitions for filters and indexes.
+  // This would allow for some potentioal optimizations in future. If such
+  // optimizations did not realize we can use different number of partitions and
+  // eliminate p_index_builder_
+  PartitionedIndexBuilder* const p_index_builder_;
+  // The desired number of keys per partition
+  uint32_t keys_per_partition_;
+  // The number of keys added to the last partition so far
+  uint32_t keys_added_to_partition_;
+  // According to the bits builders, how many keys/prefixes added
+  // in all the filters we have fully built
+  uint64_t total_added_in_built_;
+  BlockHandle last_encoded_handle_;
+};
+
+class PartitionedFilterBlockReader
+    : public FilterBlockReaderCommon<Block_kFilterPartitionIndex> {
+ public:
+  PartitionedFilterBlockReader(
+      const BlockBasedTable* t,
+      CachableEntry<Block_kFilterPartitionIndex>&& filter_block);
+
+  static std::unique_ptr<FilterBlockReader> Create(
+      const BlockBasedTable* table, const ReadOptions& ro,
+      FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
+      bool pin, BlockCacheLookupContext* lookup_context);
+
+  bool KeyMayMatch(const Slice& key, const bool no_io,
+                   const Slice* const const_ikey_ptr, GetContext* get_context,
+                   BlockCacheLookupContext* lookup_context,
+                   const ReadOptions& read_options) override;
+  void KeysMayMatch(MultiGetRange* range, const bool no_io,
+                    BlockCacheLookupContext* lookup_context,
+                    const ReadOptions& read_options) override;
+
+  bool PrefixMayMatch(const Slice& prefix, const bool no_io,
+                      const Slice* const const_ikey_ptr,
+                      GetContext* get_context,
+                      BlockCacheLookupContext* lookup_context,
+                      const ReadOptions& read_options) override;
+  void PrefixesMayMatch(MultiGetRange* range,
+                        const SliceTransform* prefix_extractor,
+                        const bool no_io,
+                        BlockCacheLookupContext* lookup_context,
+                        const ReadOptions& read_options) override;
+
+  size_t ApproximateMemoryUsage() const override;
+
+ private:
+  BlockHandle GetFilterPartitionHandle(
+      const CachableEntry<Block_kFilterPartitionIndex>& filter_block,
+      const Slice& entry) const;
+  Status GetFilterPartitionBlock(
+      FilePrefetchBuffer* prefetch_buffer, const BlockHandle& handle,
+      bool no_io, GetContext* get_context,
+      BlockCacheLookupContext* lookup_context, const ReadOptions& read_options,
+      CachableEntry<ParsedFullFilterBlock>* filter_block) const;
+
+  using FilterFunction = bool (FullFilterBlockReader::*)(
+      const Slice& slice, const bool no_io, const Slice* const const_ikey_ptr,
+      GetContext* get_context, BlockCacheLookupContext* lookup_context,
+      const ReadOptions& read_options);
+  bool MayMatch(const Slice& slice, bool no_io, const Slice* const_ikey_ptr,
+                GetContext* get_context,
+                BlockCacheLookupContext* lookup_context,
+                const ReadOptions& read_options,
+                FilterFunction filter_function) const;
+  using FilterManyFunction = void (FullFilterBlockReader::*)(
+      MultiGetRange* range, const SliceTransform* prefix_extractor,
+      const bool no_io, BlockCacheLookupContext* lookup_context,
+      const ReadOptions& read_options);
+  void MayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor,
+                bool no_io, BlockCacheLookupContext* lookup_context,
+                const ReadOptions& read_options,
+                FilterManyFunction filter_function) const;
+  void MayMatchPartition(MultiGetRange* range,
+                         const SliceTransform* prefix_extractor,
+                         BlockHandle filter_handle, bool no_io,
+                         BlockCacheLookupContext* lookup_context,
+                         const ReadOptions& read_options,
+                         FilterManyFunction filter_function) const;
+  Status CacheDependencies(const ReadOptions& ro, bool pin,
+                           FilePrefetchBuffer* tail_prefetch_buffer) override;
+
+  const InternalKeyComparator* internal_comparator() const;
+  bool index_key_includes_seq() const;
+  bool index_value_is_full() const;
+
+ protected:
+  // For partition blocks pinned in cache. Can be a subset of blocks
+  // in case some fail insertion on attempt to pin.
+  UnorderedMap<uint64_t, CachableEntry<ParsedFullFilterBlock>> filter_map_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_iterator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_iterator.cc
new file mode 100644
index 0000000000..b9bc2155a1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_iterator.cc
@@ -0,0 +1,163 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "table/block_based/partitioned_index_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+void PartitionedIndexIterator::Seek(const Slice& target) { SeekImpl(&target); }
+
+void PartitionedIndexIterator::SeekToFirst() { SeekImpl(nullptr); }
+
+void PartitionedIndexIterator::SeekImpl(const Slice* target) {
+  SavePrevIndexValue();
+
+  if (target) {
+    index_iter_->Seek(*target);
+  } else {
+    index_iter_->SeekToFirst();
+  }
+
+  if (!index_iter_->Valid()) {
+    ResetPartitionedIndexIter();
+    return;
+  }
+
+  InitPartitionedIndexBlock();
+
+  if (target) {
+    block_iter_.Seek(*target);
+  } else {
+    block_iter_.SeekToFirst();
+  }
+  FindKeyForward();
+
+  // We could check upper bound here, but that would be too complicated
+  // and checking index upper bound is less useful than for data blocks.
+
+  if (target) {
+    assert(!Valid() || (table_->get_rep()->index_key_includes_seq
+                            ? (icomp_.Compare(*target, key()) <= 0)
+                            : (user_comparator_.Compare(ExtractUserKey(*target),
+                                                        key()) <= 0)));
+  }
+}
+
+void PartitionedIndexIterator::SeekToLast() {
+  SavePrevIndexValue();
+  index_iter_->SeekToLast();
+  if (!index_iter_->Valid()) {
+    ResetPartitionedIndexIter();
+    return;
+  }
+  InitPartitionedIndexBlock();
+  block_iter_.SeekToLast();
+  FindKeyBackward();
+}
+
+void PartitionedIndexIterator::Next() {
+  assert(block_iter_points_to_real_block_);
+  block_iter_.Next();
+  FindKeyForward();
+}
+
+void PartitionedIndexIterator::Prev() {
+  assert(block_iter_points_to_real_block_);
+  block_iter_.Prev();
+
+  FindKeyBackward();
+}
+
+void PartitionedIndexIterator::InitPartitionedIndexBlock() {
+  BlockHandle partitioned_index_handle = index_iter_->value().handle;
+  if (!block_iter_points_to_real_block_ ||
+      partitioned_index_handle.offset() != prev_block_offset_ ||
+      // if previous attempt of reading the block missed cache, try again
+      block_iter_.status().IsIncomplete()) {
+    if (block_iter_points_to_real_block_) {
+      ResetPartitionedIndexIter();
+    }
+    auto* rep = table_->get_rep();
+    bool is_for_compaction =
+        lookup_context_.caller == TableReaderCaller::kCompaction;
+    // Prefetch additional data for range scans (iterators).
+    // Implicit auto readahead:
+    //   Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0.
+    // Explicit user requested readahead:
+    //   Enabled from the very first IO when ReadOptions.readahead_size is set.
+    block_prefetcher_.PrefetchIfNeeded(
+        rep, partitioned_index_handle, read_options_.readahead_size,
+        is_for_compaction, /*no_sequential_checking=*/false,
+        read_options_.rate_limiter_priority);
+    Status s;
+    table_->NewDataBlockIterator<IndexBlockIter>(
+        read_options_, partitioned_index_handle, &block_iter_,
+        BlockType::kIndex,
+        /*get_context=*/nullptr, &lookup_context_,
+        block_prefetcher_.prefetch_buffer(),
+        /*for_compaction=*/is_for_compaction, /*async_read=*/false, s);
+    block_iter_points_to_real_block_ = true;
+    // We could check upper bound here but it is complicated to reason about
+    // upper bound in index iterator. On the other than, in large scans, index
+    // iterators are moved much less frequently compared to data blocks. So
+    // the upper bound check is skipped for simplicity.
+  }
+}
+
+void PartitionedIndexIterator::FindKeyForward() {
+  // This method's code is kept short to make it likely to be inlined.
+
+  assert(block_iter_points_to_real_block_);
+
+  if (!block_iter_.Valid()) {
+    // This is the only call site of FindBlockForward(), but it's extracted into
+    // a separate method to keep FindKeyForward() short and likely to be
+    // inlined. When transitioning to a different block, we call
+    // FindBlockForward(), which is much longer and is probably not inlined.
+    FindBlockForward();
+  } else {
+    // This is the fast path that avoids a function call.
+  }
+}
+
+void PartitionedIndexIterator::FindBlockForward() {
+  // TODO the while loop inherits from two-level-iterator. We don't know
+  // whether a block can be empty so it can be replaced by an "if".
+  do {
+    if (!block_iter_.status().ok()) {
+      return;
+    }
+    ResetPartitionedIndexIter();
+    index_iter_->Next();
+
+    if (!index_iter_->Valid()) {
+      return;
+    }
+
+    InitPartitionedIndexBlock();
+    block_iter_.SeekToFirst();
+  } while (!block_iter_.Valid());
+}
+
+void PartitionedIndexIterator::FindKeyBackward() {
+  while (!block_iter_.Valid()) {
+    if (!block_iter_.status().ok()) {
+      return;
+    }
+
+    ResetPartitionedIndexIter();
+    index_iter_->Prev();
+
+    if (index_iter_->Valid()) {
+      InitPartitionedIndexBlock();
+      block_iter_.SeekToLast();
+    } else {
+      return;
+    }
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_iterator.h
new file mode 100644
index 0000000000..6412fe2399
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_iterator.h
@@ -0,0 +1,160 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+#include "table/block_based/block_based_table_reader.h"
+#include "table/block_based/block_based_table_reader_impl.h"
+#include "table/block_based/block_prefetcher.h"
+#include "table/block_based/reader_common.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Iterator that iterates over partitioned index.
+// Some upper and lower bound tricks played in block based table iterators
+// could be played here, but it's too complicated to reason about index
+// keys with upper or lower bound, so we skip it for simplicity.
+class PartitionedIndexIterator : public InternalIteratorBase<IndexValue> {
+  // compaction_readahead_size: its value will only be used if for_compaction =
+  // true
+ public:
+  PartitionedIndexIterator(
+      const BlockBasedTable* table, const ReadOptions& read_options,
+      const InternalKeyComparator& icomp,
+      std::unique_ptr<InternalIteratorBase<IndexValue>>&& index_iter,
+      TableReaderCaller caller, size_t compaction_readahead_size = 0)
+      : index_iter_(std::move(index_iter)),
+        table_(table),
+        read_options_(read_options),
+#ifndef NDEBUG
+        icomp_(icomp),
+#endif
+        user_comparator_(icomp.user_comparator()),
+        block_iter_points_to_real_block_(false),
+        lookup_context_(caller),
+        block_prefetcher_(
+            compaction_readahead_size,
+            table_->get_rep()->table_options.initial_auto_readahead_size) {
+  }
+
+  ~PartitionedIndexIterator() override {}
+
+  void Seek(const Slice& target) override;
+  void SeekForPrev(const Slice&) override {
+    // Shouldn't be called.
+    assert(false);
+  }
+  void SeekToFirst() override;
+  void SeekToLast() override;
+  void Next() final override;
+  bool NextAndGetResult(IterateResult*) override {
+    assert(false);
+    return false;
+  }
+  void Prev() override;
+  bool Valid() const override {
+    return block_iter_points_to_real_block_ && block_iter_.Valid();
+  }
+  Slice key() const override {
+    assert(Valid());
+    return block_iter_.key();
+  }
+  Slice user_key() const override {
+    assert(Valid());
+    return block_iter_.user_key();
+  }
+  IndexValue value() const override {
+    assert(Valid());
+    return block_iter_.value();
+  }
+  Status status() const override {
+    // Prefix index set status to NotFound when the prefix does not exist
+    if (!index_iter_->status().ok() && !index_iter_->status().IsNotFound()) {
+      return index_iter_->status();
+    } else if (block_iter_points_to_real_block_) {
+      return block_iter_.status();
+    } else {
+      return Status::OK();
+    }
+  }
+  inline IterBoundCheck UpperBoundCheckResult() override {
+    // Shouldn't be called.
+    assert(false);
+    return IterBoundCheck::kUnknown;
+  }
+  void SetPinnedItersMgr(PinnedIteratorsManager*) override {
+    // Shouldn't be called.
+    assert(false);
+  }
+  bool IsKeyPinned() const override {
+    // Shouldn't be called.
+    assert(false);
+    return false;
+  }
+  bool IsValuePinned() const override {
+    // Shouldn't be called.
+    assert(false);
+    return false;
+  }
+
+  void ResetPartitionedIndexIter() {
+    if (block_iter_points_to_real_block_) {
+      block_iter_.Invalidate(Status::OK());
+      block_iter_points_to_real_block_ = false;
+    }
+  }
+
+  void SavePrevIndexValue() {
+    if (block_iter_points_to_real_block_) {
+      // Reseek. If they end up with the same data block, we shouldn't re-fetch
+      // the same data block.
+      prev_block_offset_ = index_iter_->value().handle.offset();
+    }
+  }
+
+  void GetReadaheadState(ReadaheadFileInfo* readahead_file_info) override {
+    if (block_prefetcher_.prefetch_buffer() != nullptr &&
+        read_options_.adaptive_readahead) {
+      block_prefetcher_.prefetch_buffer()->GetReadaheadState(
+          &(readahead_file_info->index_block_readahead_info));
+    }
+  }
+
+  void SetReadaheadState(ReadaheadFileInfo* readahead_file_info) override {
+    if (read_options_.adaptive_readahead) {
+      block_prefetcher_.SetReadaheadState(
+          &(readahead_file_info->index_block_readahead_info));
+    }
+  }
+
+  std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter_;
+
+ private:
+  friend class BlockBasedTableReaderTestVerifyChecksum_ChecksumMismatch_Test;
+  const BlockBasedTable* table_;
+  const ReadOptions read_options_;
+#ifndef NDEBUG
+  const InternalKeyComparator& icomp_;
+#endif
+  UserComparatorWrapper user_comparator_;
+  IndexBlockIter block_iter_;
+
+  // True if block_iter_ is initialized and points to the same block
+  // as index iterator.
+  bool block_iter_points_to_real_block_;
+  uint64_t prev_block_offset_ = std::numeric_limits<uint64_t>::max();
+  BlockCacheLookupContext lookup_context_;
+  BlockPrefetcher block_prefetcher_;
+
+  // If `target` is null, seek to first.
+  void SeekImpl(const Slice* target);
+
+  void InitPartitionedIndexBlock();
+  void FindKeyForward();
+  void FindBlockForward();
+  void FindKeyBackward();
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_reader.cc
new file mode 100644
index 0000000000..cad3a616e7
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_reader.cc
@@ -0,0 +1,224 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "table/block_based/partitioned_index_reader.h"
+
+#include "block_cache.h"
+#include "file/random_access_file_reader.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "table/block_based/partitioned_index_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+Status PartitionIndexReader::Create(
+    const BlockBasedTable* table, const ReadOptions& ro,
+    FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
+    bool pin, BlockCacheLookupContext* lookup_context,
+    std::unique_ptr<IndexReader>* index_reader) {
+  assert(table != nullptr);
+  assert(table->get_rep());
+  assert(!pin || prefetch);
+  assert(index_reader != nullptr);
+
+  CachableEntry<Block> index_block;
+  if (prefetch || !use_cache) {
+    const Status s =
+        ReadIndexBlock(table, prefetch_buffer, ro, use_cache,
+                       /*get_context=*/nullptr, lookup_context, &index_block);
+    if (!s.ok()) {
+      return s;
+    }
+
+    if (use_cache && !pin) {
+      index_block.Reset();
+    }
+  }
+
+  index_reader->reset(new PartitionIndexReader(table, std::move(index_block)));
+
+  return Status::OK();
+}
+
+InternalIteratorBase<IndexValue>* PartitionIndexReader::NewIterator(
+    const ReadOptions& read_options, bool /* disable_prefix_seek */,
+    IndexBlockIter* iter, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context) {
+  const bool no_io = (read_options.read_tier == kBlockCacheTier);
+  CachableEntry<Block> index_block;
+  const Status s = GetOrReadIndexBlock(no_io, get_context, lookup_context,
+                                       &index_block, read_options);
+  if (!s.ok()) {
+    if (iter != nullptr) {
+      iter->Invalidate(s);
+      return iter;
+    }
+
+    return NewErrorInternalIterator<IndexValue>(s);
+  }
+
+  const BlockBasedTable::Rep* rep = table()->rep_;
+  InternalIteratorBase<IndexValue>* it = nullptr;
+
+  Statistics* kNullStats = nullptr;
+  // Filters are already checked before seeking the index
+  if (!partition_map_.empty()) {
+    // We don't return pinned data from index blocks, so no need
+    // to set `block_contents_pinned`.
+    it = NewTwoLevelIterator(
+        new BlockBasedTable::PartitionedIndexIteratorState(table(),
+                                                           &partition_map_),
+        index_block.GetValue()->NewIndexIterator(
+            internal_comparator()->user_comparator(),
+            rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true,
+            index_has_first_key(), index_key_includes_seq(),
+            index_value_is_full()));
+  } else {
+    ReadOptions ro;
+    ro.fill_cache = read_options.fill_cache;
+    ro.deadline = read_options.deadline;
+    ro.io_timeout = read_options.io_timeout;
+    ro.adaptive_readahead = read_options.adaptive_readahead;
+    ro.async_io = read_options.async_io;
+    ro.rate_limiter_priority = read_options.rate_limiter_priority;
+    ro.verify_checksums = read_options.verify_checksums;
+    ro.io_activity = read_options.io_activity;
+
+    // We don't return pinned data from index blocks, so no need
+    // to set `block_contents_pinned`.
+    std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter(
+        index_block.GetValue()->NewIndexIterator(
+            internal_comparator()->user_comparator(),
+            rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true,
+            index_has_first_key(), index_key_includes_seq(),
+            index_value_is_full()));
+
+    it = new PartitionedIndexIterator(
+        table(), ro, *internal_comparator(), std::move(index_iter),
+        lookup_context ? lookup_context->caller
+                       : TableReaderCaller::kUncategorized);
+  }
+
+  assert(it != nullptr);
+  index_block.TransferTo(it);
+
+  return it;
+
+  // TODO(myabandeh): Update TwoLevelIterator to be able to make use of
+  // on-stack BlockIter while the state is on heap. Currentlly it assumes
+  // the first level iter is always on heap and will attempt to delete it
+  // in its destructor.
+}
+Status PartitionIndexReader::CacheDependencies(
+    const ReadOptions& ro, bool pin, FilePrefetchBuffer* tail_prefetch_buffer) {
+  if (!partition_map_.empty()) {
+    // The dependencies are already cached since `partition_map_` is filled in
+    // an all-or-nothing manner.
+    return Status::OK();
+  }
+  // Before read partitions, prefetch them to avoid lots of IOs
+  BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch};
+  const BlockBasedTable::Rep* rep = table()->rep_;
+  IndexBlockIter biter;
+  BlockHandle handle;
+  Statistics* kNullStats = nullptr;
+
+  CachableEntry<Block> index_block;
+  {
+    Status s = GetOrReadIndexBlock(false /* no_io */, nullptr /* get_context */,
+                                   &lookup_context, &index_block, ro);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  // We don't return pinned data from index blocks, so no need
+  // to set `block_contents_pinned`.
+  index_block.GetValue()->NewIndexIterator(
+      internal_comparator()->user_comparator(),
+      rep->get_global_seqno(BlockType::kIndex), &biter, kNullStats, true,
+      index_has_first_key(), index_key_includes_seq(), index_value_is_full());
+  // Index partitions are assumed to be consecuitive. Prefetch them all.
+  // Read the first block offset
+  biter.SeekToFirst();
+  if (!biter.Valid()) {
+    // Empty index.
+    return biter.status();
+  }
+  handle = biter.value().handle;
+  uint64_t prefetch_off = handle.offset();
+
+  // Read the last block's offset
+  biter.SeekToLast();
+  if (!biter.Valid()) {
+    // Empty index.
+    return biter.status();
+  }
+  handle = biter.value().handle;
+  uint64_t last_off =
+      handle.offset() + BlockBasedTable::BlockSizeWithTrailer(handle);
+  uint64_t prefetch_len = last_off - prefetch_off;
+  std::unique_ptr<FilePrefetchBuffer> prefetch_buffer;
+  if (tail_prefetch_buffer == nullptr || !tail_prefetch_buffer->Enabled() ||
+      tail_prefetch_buffer->GetPrefetchOffset() > prefetch_off) {
+    rep->CreateFilePrefetchBuffer(
+        0, 0, &prefetch_buffer, false /*Implicit auto readahead*/,
+        0 /*num_reads_*/, 0 /*num_file_reads_for_auto_readahead*/);
+    IOOptions opts;
+    {
+      Status s = rep->file->PrepareIOOptions(ro, opts);
+      if (s.ok()) {
+        s = prefetch_buffer->Prefetch(opts, rep->file.get(), prefetch_off,
+                                      static_cast<size_t>(prefetch_len),
+                                      ro.rate_limiter_priority);
+      }
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  }
+  // For saving "all or nothing" to partition_map_
+  UnorderedMap<uint64_t, CachableEntry<Block>> map_in_progress;
+
+  // After prefetch, read the partitions one by one
+  biter.SeekToFirst();
+  size_t partition_count = 0;
+  for (; biter.Valid(); biter.Next()) {
+    handle = biter.value().handle;
+    CachableEntry<Block> block;
+    ++partition_count;
+    // TODO: Support counter batch update for partitioned index and
+    // filter blocks
+    Status s = table()->MaybeReadBlockAndLoadToCache(
+        prefetch_buffer ? prefetch_buffer.get() : tail_prefetch_buffer, ro,
+        handle, UncompressionDict::GetEmptyDict(),
+        /*for_compaction=*/false, &block.As<Block_kIndex>(),
+        /*get_context=*/nullptr, &lookup_context, /*contents=*/nullptr,
+        /*async_read=*/false);
+
+    if (!s.ok()) {
+      return s;
+    }
+    if (block.GetValue() != nullptr) {
+      // Might need to "pin" some mmap-read blocks (GetOwnValue) if some
+      // partitions are successfully compressed (cached) and some are not
+      // compressed (mmap eligible)
+      if (block.IsCached() || block.GetOwnValue()) {
+        if (pin) {
+          map_in_progress[handle.offset()] = std::move(block);
+        }
+      }
+    }
+  }
+  Status s = biter.status();
+  // Save (pin) them only if everything checks out
+  if (map_in_progress.size() == partition_count && s.ok()) {
+    std::swap(partition_map_, map_in_progress);
+  }
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_reader.h
new file mode 100644
index 0000000000..9482fd6b44
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/partitioned_index_reader.h
@@ -0,0 +1,56 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+#include "table/block_based/index_reader_common.h"
+#include "util/hash_containers.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Index that allows binary search lookup in a two-level index structure.
+class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon {
+ public:
+  // Read the partition index from the file and create an instance for
+  // `PartitionIndexReader`.
+  // On success, index_reader will be populated; otherwise it will remain
+  // unmodified.
+  static Status Create(const BlockBasedTable* table, const ReadOptions& ro,
+                       FilePrefetchBuffer* prefetch_buffer, bool use_cache,
+                       bool prefetch, bool pin,
+                       BlockCacheLookupContext* lookup_context,
+                       std::unique_ptr<IndexReader>* index_reader);
+
+  // return a two-level iterator: first level is on the partition index
+  InternalIteratorBase<IndexValue>* NewIterator(
+      const ReadOptions& read_options, bool /* disable_prefix_seek */,
+      IndexBlockIter* iter, GetContext* get_context,
+      BlockCacheLookupContext* lookup_context) override;
+
+  Status CacheDependencies(const ReadOptions& ro, bool pin,
+                           FilePrefetchBuffer* tail_prefetch_buffer) override;
+  size_t ApproximateMemoryUsage() const override {
+    size_t usage = ApproximateIndexBlockMemoryUsage();
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+    usage += malloc_usable_size(const_cast<PartitionIndexReader*>(this));
+#else
+    usage += sizeof(*this);
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+    // TODO(myabandeh): more accurate estimate of partition_map_ mem usage
+    return usage;
+  }
+
+ private:
+  PartitionIndexReader(const BlockBasedTable* t,
+                       CachableEntry<Block>&& index_block)
+      : IndexReaderCommon(t, std::move(index_block)) {}
+
+  // For partition blocks pinned in cache. This is expected to be "all or
+  // none" so that !partition_map_.empty() can use an iterator expecting
+  // all partitions to be saved here.
+  UnorderedMap<uint64_t, CachableEntry<Block>> partition_map_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/reader_common.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/reader_common.cc
new file mode 100644
index 0000000000..0ff43e9b4e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/reader_common.cc
@@ -0,0 +1,52 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "table/block_based/reader_common.h"
+
+#include "monitoring/perf_context_imp.h"
+#include "rocksdb/table.h"
+#include "table/format.h"
+#include "util/coding.h"
+#include "util/crc32c.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+void ForceReleaseCachedEntry(void* arg, void* h) {
+  Cache* cache = reinterpret_cast<Cache*>(arg);
+  Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
+  cache->Release(handle, true /* erase_if_last_ref */);
+}
+
+// WART: this is specific to block-based table
+Status VerifyBlockChecksum(ChecksumType type, const char* data,
+                           size_t block_size, const std::string& file_name,
+                           uint64_t offset) {
+  PERF_TIMER_GUARD(block_checksum_time);
+  // After block_size bytes is compression type (1 byte), which is part of
+  // the checksummed section.
+  size_t len = block_size + 1;
+  // And then the stored checksum value (4 bytes).
+  uint32_t stored = DecodeFixed32(data + len);
+
+  uint32_t computed = ComputeBuiltinChecksum(type, data, len);
+  if (stored == computed) {
+    return Status::OK();
+  } else {
+    // Unmask for people who might look for reference crc value
+    if (type == kCRC32c) {
+      stored = crc32c::Unmask(stored);
+      computed = crc32c::Unmask(computed);
+    }
+    return Status::Corruption(
+        "block checksum mismatch: stored = " + std::to_string(stored) +
+        ", computed = " + std::to_string(computed) +
+        ", type = " + std::to_string(type) + "  in " + file_name + " offset " +
+        std::to_string(offset) + " size " + std::to_string(block_size));
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/reader_common.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/reader_common.h
new file mode 100644
index 0000000000..102802afec
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/reader_common.h
@@ -0,0 +1,34 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include "rocksdb/advanced_cache.h"
+#include "rocksdb/table.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Release the cached entry and decrement its ref count.
+extern void ForceReleaseCachedEntry(void* arg, void* h);
+
+inline MemoryAllocator* GetMemoryAllocator(
+    const BlockBasedTableOptions& table_options) {
+  return table_options.block_cache.get()
+             ? table_options.block_cache->memory_allocator()
+             : nullptr;
+}
+
+// Assumes block has a trailer as in format.h. file_name and offset provided
+// for generating a diagnostic message in returned status.
+//
+// Returns Status::OK() on checksum match, or Status::Corruption() on checksum
+// mismatch.
+extern Status VerifyBlockChecksum(ChecksumType type, const char* data,
+                                  size_t block_size,
+                                  const std::string& file_name,
+                                  uint64_t offset);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/uncompression_dict_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/uncompression_dict_reader.cc
new file mode 100644
index 0000000000..4ac442b6ba
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/uncompression_dict_reader.cc
@@ -0,0 +1,126 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include "table/block_based/uncompression_dict_reader.h"
+
+#include "logging/logging.h"
+#include "monitoring/perf_context_imp.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "util/compression.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status UncompressionDictReader::Create(
+    const BlockBasedTable* table, const ReadOptions& ro,
+    FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
+    bool pin, BlockCacheLookupContext* lookup_context,
+    std::unique_ptr<UncompressionDictReader>* uncompression_dict_reader) {
+  assert(table);
+  assert(table->get_rep());
+  assert(!pin || prefetch);
+  assert(uncompression_dict_reader);
+
+  CachableEntry<UncompressionDict> uncompression_dict;
+  if (prefetch || !use_cache) {
+    const Status s = ReadUncompressionDictionary(
+        table, prefetch_buffer, ro, use_cache, nullptr /* get_context */,
+        lookup_context, &uncompression_dict);
+    if (!s.ok()) {
+      return s;
+    }
+
+    if (use_cache && !pin) {
+      uncompression_dict.Reset();
+    }
+  }
+
+  uncompression_dict_reader->reset(
+      new UncompressionDictReader(table, std::move(uncompression_dict)));
+
+  return Status::OK();
+}
+
+Status UncompressionDictReader::ReadUncompressionDictionary(
+    const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer,
+    const ReadOptions& read_options, bool use_cache, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context,
+    CachableEntry<UncompressionDict>* uncompression_dict) {
+  // TODO: add perf counter for compression dictionary read time
+
+  assert(table);
+  assert(uncompression_dict);
+  assert(uncompression_dict->IsEmpty());
+
+  const BlockBasedTable::Rep* const rep = table->get_rep();
+  assert(rep);
+  assert(!rep->compression_dict_handle.IsNull());
+
+  const Status s = table->RetrieveBlock(
+      prefetch_buffer, read_options, rep->compression_dict_handle,
+      UncompressionDict::GetEmptyDict(), uncompression_dict, get_context,
+      lookup_context,
+      /* for_compaction */ false, use_cache,
+      /* async_read */ false);
+
+  if (!s.ok()) {
+    ROCKS_LOG_WARN(
+        rep->ioptions.logger,
+        "Encountered error while reading data from compression dictionary "
+        "block %s",
+        s.ToString().c_str());
+  }
+
+  return s;
+}
+
+Status UncompressionDictReader::GetOrReadUncompressionDictionary(
+    FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, bool no_io,
+    bool verify_checksums, GetContext* get_context,
+    BlockCacheLookupContext* lookup_context,
+    CachableEntry<UncompressionDict>* uncompression_dict) const {
+  assert(uncompression_dict);
+
+  if (!uncompression_dict_.IsEmpty()) {
+    uncompression_dict->SetUnownedValue(uncompression_dict_.GetValue());
+    return Status::OK();
+  }
+
+  ReadOptions read_options;
+  if (no_io) {
+    read_options.read_tier = kBlockCacheTier;
+  }
+  read_options.verify_checksums = verify_checksums;
+  read_options.io_activity = ro.io_activity;
+
+  return ReadUncompressionDictionary(table_, prefetch_buffer, read_options,
+                                     cache_dictionary_blocks(), get_context,
+                                     lookup_context, uncompression_dict);
+}
+
+size_t UncompressionDictReader::ApproximateMemoryUsage() const {
+  assert(!uncompression_dict_.GetOwnValue() ||
+         uncompression_dict_.GetValue() != nullptr);
+  size_t usage = uncompression_dict_.GetOwnValue()
+                     ? uncompression_dict_.GetValue()->ApproximateMemoryUsage()
+                     : 0;
+
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+  usage += malloc_usable_size(const_cast<UncompressionDictReader*>(this));
+#else
+  usage += sizeof(*this);
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+
+  return usage;
+}
+
+bool UncompressionDictReader::cache_dictionary_blocks() const {
+  assert(table_);
+  assert(table_->get_rep());
+
+  return table_->get_rep()->table_options.cache_index_and_filter_blocks;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/uncompression_dict_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/uncompression_dict_reader.h
new file mode 100644
index 0000000000..c78800d8ac
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_based/uncompression_dict_reader.h
@@ -0,0 +1,61 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+
+#include <cassert>
+
+#include "table/block_based/cachable_entry.h"
+#include "table/format.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class BlockBasedTable;
+struct BlockCacheLookupContext;
+class FilePrefetchBuffer;
+class GetContext;
+struct ReadOptions;
+struct UncompressionDict;
+
+// Provides access to the uncompression dictionary regardless of whether
+// it is owned by the reader or stored in the cache, or whether it is pinned
+// in the cache or not.
+class UncompressionDictReader {
+ public:
+  static Status Create(
+      const BlockBasedTable* table, const ReadOptions& ro,
+      FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
+      bool pin, BlockCacheLookupContext* lookup_context,
+      std::unique_ptr<UncompressionDictReader>* uncompression_dict_reader);
+
+  Status GetOrReadUncompressionDictionary(
+      FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, bool no_io,
+      bool verify_checksums, GetContext* get_context,
+      BlockCacheLookupContext* lookup_context,
+      CachableEntry<UncompressionDict>* uncompression_dict) const;
+
+  size_t ApproximateMemoryUsage() const;
+
+ private:
+  UncompressionDictReader(const BlockBasedTable* t,
+                          CachableEntry<UncompressionDict>&& uncompression_dict)
+      : table_(t), uncompression_dict_(std::move(uncompression_dict)) {
+    assert(table_);
+  }
+
+  bool cache_dictionary_blocks() const;
+
+  static Status ReadUncompressionDictionary(
+      const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer,
+      const ReadOptions& read_options, bool use_cache, GetContext* get_context,
+      BlockCacheLookupContext* lookup_context,
+      CachableEntry<UncompressionDict>* uncompression_dict);
+
+  const BlockBasedTable* table_;
+  CachableEntry<UncompressionDict> uncompression_dict_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_fetcher.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_fetcher.cc
new file mode 100644
index 0000000000..b2fa6d4b52
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_fetcher.cc
@@ -0,0 +1,405 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/block_fetcher.h"
+
+#include <cassert>
+#include <cinttypes>
+#include <string>
+
+#include "logging/logging.h"
+#include "memory/memory_allocator_impl.h"
+#include "monitoring/perf_context_imp.h"
+#include "rocksdb/compression_type.h"
+#include "rocksdb/env.h"
+#include "table/block_based/block.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "table/block_based/block_type.h"
+#include "table/block_based/reader_common.h"
+#include "table/format.h"
+#include "table/persistent_cache_helper.h"
+#include "util/compression.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+inline void BlockFetcher::ProcessTrailerIfPresent() {
+  if (footer_.GetBlockTrailerSize() > 0) {
+    assert(footer_.GetBlockTrailerSize() == BlockBasedTable::kBlockTrailerSize);
+    if (read_options_.verify_checksums) {
+      io_status_ = status_to_io_status(VerifyBlockChecksum(
+          footer_.checksum_type(), slice_.data(), block_size_,
+          file_->file_name(), handle_.offset()));
+      RecordTick(ioptions_.stats, BLOCK_CHECKSUM_COMPUTE_COUNT);
+      if (!io_status_.ok()) {
+        assert(io_status_.IsCorruption());
+        RecordTick(ioptions_.stats, BLOCK_CHECKSUM_MISMATCH_COUNT);
+      }
+    }
+    compression_type_ =
+        BlockBasedTable::GetBlockCompressionType(slice_.data(), block_size_);
+  } else {
+    // E.g. plain table or cuckoo table
+    compression_type_ = kNoCompression;
+  }
+}
+
+inline bool BlockFetcher::TryGetUncompressBlockFromPersistentCache() {
+  if (cache_options_.persistent_cache &&
+      !cache_options_.persistent_cache->IsCompressed()) {
+    Status status = PersistentCacheHelper::LookupUncompressed(
+        cache_options_, handle_, contents_);
+    if (status.ok()) {
+      // uncompressed page is found for the block handle
+      return true;
+    } else {
+      // uncompressed page is not found
+      if (ioptions_.logger && !status.IsNotFound()) {
+        assert(!status.ok());
+        ROCKS_LOG_INFO(ioptions_.logger,
+                       "Error reading from persistent cache. %s",
+                       status.ToString().c_str());
+      }
+    }
+  }
+  return false;
+}
+
+inline bool BlockFetcher::TryGetFromPrefetchBuffer() {
+  if (prefetch_buffer_ != nullptr) {
+    IOOptions opts;
+    IOStatus io_s = file_->PrepareIOOptions(read_options_, opts);
+    if (io_s.ok()) {
+      bool read_from_prefetch_buffer = false;
+      if (read_options_.async_io && !for_compaction_) {
+        read_from_prefetch_buffer = prefetch_buffer_->TryReadFromCacheAsync(
+            opts, file_, handle_.offset(), block_size_with_trailer_, &slice_,
+            &io_s, read_options_.rate_limiter_priority);
+      } else {
+        read_from_prefetch_buffer = prefetch_buffer_->TryReadFromCache(
+            opts, file_, handle_.offset(), block_size_with_trailer_, &slice_,
+            &io_s, read_options_.rate_limiter_priority, for_compaction_);
+      }
+      if (read_from_prefetch_buffer) {
+        ProcessTrailerIfPresent();
+        if (!io_status_.ok()) {
+          return true;
+        }
+        got_from_prefetch_buffer_ = true;
+        used_buf_ = const_cast<char*>(slice_.data());
+      }
+    }
+    if (!io_s.ok()) {
+      io_status_ = io_s;
+      return true;
+    }
+  }
+  return got_from_prefetch_buffer_;
+}
+
+inline bool BlockFetcher::TryGetSerializedBlockFromPersistentCache() {
+  if (cache_options_.persistent_cache &&
+      cache_options_.persistent_cache->IsCompressed()) {
+    std::unique_ptr<char[]> buf;
+    io_status_ = status_to_io_status(PersistentCacheHelper::LookupSerialized(
+        cache_options_, handle_, &buf, block_size_with_trailer_));
+    if (io_status_.ok()) {
+      heap_buf_ = CacheAllocationPtr(buf.release());
+      used_buf_ = heap_buf_.get();
+      slice_ = Slice(heap_buf_.get(), block_size_);
+      ProcessTrailerIfPresent();
+      return true;
+    } else if (!io_status_.IsNotFound() && ioptions_.logger) {
+      assert(!io_status_.ok());
+      ROCKS_LOG_INFO(ioptions_.logger,
+                     "Error reading from persistent cache. %s",
+                     io_status_.ToString().c_str());
+    }
+  }
+  return false;
+}
+
+inline void BlockFetcher::PrepareBufferForBlockFromFile() {
+  // cache miss read from device
+  if ((do_uncompress_ || ioptions_.allow_mmap_reads) &&
+      block_size_with_trailer_ < kDefaultStackBufferSize) {
+    // If we've got a small enough chunk of data, read it in to the
+    // trivially allocated stack buffer instead of needing a full malloc()
+    //
+    // `GetBlockContents()` cannot return this data as its lifetime is tied to
+    // this `BlockFetcher`'s lifetime. That is fine because this is only used
+    // in cases where we do not expect the `GetBlockContents()` result to be the
+    // same buffer we are assigning here. If we guess incorrectly, there will be
+    // a heap allocation and memcpy in `GetBlockContents()` to obtain the final
+    // result. Considering we are eliding a heap allocation here by using the
+    // stack buffer, the cost of guessing incorrectly here is one extra memcpy.
+    //
+    // When `do_uncompress_` is true, we expect the uncompression step will
+    // allocate heap memory for the final result. However this expectation will
+    // be wrong if the block turns out to already be uncompressed, which we
+    // won't know for sure until after reading it.
+    //
+    // When `ioptions_.allow_mmap_reads` is true, we do not expect the file
+    // reader to use the scratch buffer at all, but instead return a pointer
+    // into the mapped memory. This expectation will be wrong when using a
+    // file reader that does not implement mmap reads properly.
+    used_buf_ = &stack_buf_[0];
+  } else if (maybe_compressed_ && !do_uncompress_) {
+    compressed_buf_ =
+        AllocateBlock(block_size_with_trailer_, memory_allocator_compressed_);
+    used_buf_ = compressed_buf_.get();
+  } else {
+    heap_buf_ = AllocateBlock(block_size_with_trailer_, memory_allocator_);
+    used_buf_ = heap_buf_.get();
+  }
+}
+
+inline void BlockFetcher::InsertCompressedBlockToPersistentCacheIfNeeded() {
+  if (io_status_.ok() && read_options_.fill_cache &&
+      cache_options_.persistent_cache &&
+      cache_options_.persistent_cache->IsCompressed()) {
+    PersistentCacheHelper::InsertSerialized(cache_options_, handle_, used_buf_,
+                                            block_size_with_trailer_);
+  }
+}
+
+inline void BlockFetcher::InsertUncompressedBlockToPersistentCacheIfNeeded() {
+  if (io_status_.ok() && !got_from_prefetch_buffer_ &&
+      read_options_.fill_cache && cache_options_.persistent_cache &&
+      !cache_options_.persistent_cache->IsCompressed()) {
+    // insert to uncompressed cache
+    PersistentCacheHelper::InsertUncompressed(cache_options_, handle_,
+                                              *contents_);
+  }
+}
+
+inline void BlockFetcher::CopyBufferToHeapBuf() {
+  assert(used_buf_ != heap_buf_.get());
+  heap_buf_ = AllocateBlock(block_size_with_trailer_, memory_allocator_);
+  memcpy(heap_buf_.get(), used_buf_, block_size_with_trailer_);
+#ifndef NDEBUG
+  num_heap_buf_memcpy_++;
+#endif
+}
+
+inline void BlockFetcher::CopyBufferToCompressedBuf() {
+  assert(used_buf_ != compressed_buf_.get());
+  compressed_buf_ =
+      AllocateBlock(block_size_with_trailer_, memory_allocator_compressed_);
+  memcpy(compressed_buf_.get(), used_buf_, block_size_with_trailer_);
+#ifndef NDEBUG
+  num_compressed_buf_memcpy_++;
+#endif
+}
+
+// Entering this method means the block is not compressed or do not need to be
+// uncompressed. The block can be in one of the following buffers:
+// 1. prefetch buffer if prefetch is enabled and the block is prefetched before
+// 2. stack_buf_ if block size is smaller than the stack_buf_ size and block
+//    is not compressed
+// 3. heap_buf_ if the block is not compressed
+// 4. compressed_buf_ if the block is compressed
+// 5. direct_io_buf_ if direct IO is enabled
+// After this method, if the block is compressed, it should be in
+// compressed_buf_, otherwise should be in heap_buf_.
+inline void BlockFetcher::GetBlockContents() {
+  if (slice_.data() != used_buf_) {
+    // the slice content is not the buffer provided
+    *contents_ = BlockContents(Slice(slice_.data(), block_size_));
+  } else {
+    // page can be either uncompressed or compressed, the buffer either stack
+    // or heap provided. Refer to https://github.com/facebook/rocksdb/pull/4096
+    if (got_from_prefetch_buffer_ || used_buf_ == &stack_buf_[0]) {
+      CopyBufferToHeapBuf();
+    } else if (used_buf_ == compressed_buf_.get()) {
+      if (compression_type_ == kNoCompression &&
+          memory_allocator_ != memory_allocator_compressed_) {
+        CopyBufferToHeapBuf();
+      } else {
+        heap_buf_ = std::move(compressed_buf_);
+      }
+    } else if (direct_io_buf_.get() != nullptr) {
+      if (compression_type_ == kNoCompression) {
+        CopyBufferToHeapBuf();
+      } else {
+        CopyBufferToCompressedBuf();
+        heap_buf_ = std::move(compressed_buf_);
+      }
+    }
+    *contents_ = BlockContents(std::move(heap_buf_), block_size_);
+  }
+#ifndef NDEBUG
+  contents_->has_trailer = footer_.GetBlockTrailerSize() > 0;
+#endif
+}
+
+IOStatus BlockFetcher::ReadBlockContents() {
+  if (TryGetUncompressBlockFromPersistentCache()) {
+    compression_type_ = kNoCompression;
+#ifndef NDEBUG
+    contents_->has_trailer = footer_.GetBlockTrailerSize() > 0;
+#endif  // NDEBUG
+    return IOStatus::OK();
+  }
+  if (TryGetFromPrefetchBuffer()) {
+    if (!io_status_.ok()) {
+      return io_status_;
+    }
+  } else if (!TryGetSerializedBlockFromPersistentCache()) {
+    IOOptions opts;
+    io_status_ = file_->PrepareIOOptions(read_options_, opts);
+    // Actual file read
+    if (io_status_.ok()) {
+      if (file_->use_direct_io()) {
+        PERF_TIMER_GUARD(block_read_time);
+        PERF_CPU_TIMER_GUARD(block_read_cpu_time, nullptr);
+        io_status_ = file_->Read(
+            opts, handle_.offset(), block_size_with_trailer_, &slice_, nullptr,
+            &direct_io_buf_, read_options_.rate_limiter_priority);
+        PERF_COUNTER_ADD(block_read_count, 1);
+        used_buf_ = const_cast<char*>(slice_.data());
+      } else {
+        PrepareBufferForBlockFromFile();
+        PERF_TIMER_GUARD(block_read_time);
+        PERF_CPU_TIMER_GUARD(block_read_cpu_time, nullptr);
+        io_status_ = file_->Read(opts, handle_.offset(),
+                                 block_size_with_trailer_, &slice_, used_buf_,
+                                 nullptr, read_options_.rate_limiter_priority);
+        PERF_COUNTER_ADD(block_read_count, 1);
+#ifndef NDEBUG
+        if (slice_.data() == &stack_buf_[0]) {
+          num_stack_buf_memcpy_++;
+        } else if (slice_.data() == heap_buf_.get()) {
+          num_heap_buf_memcpy_++;
+        } else if (slice_.data() == compressed_buf_.get()) {
+          num_compressed_buf_memcpy_++;
+        }
+#endif
+      }
+    }
+
+    // TODO: introduce dedicated perf counter for range tombstones
+    switch (block_type_) {
+      case BlockType::kFilter:
+      case BlockType::kFilterPartitionIndex:
+        PERF_COUNTER_ADD(filter_block_read_count, 1);
+        break;
+
+      case BlockType::kCompressionDictionary:
+        PERF_COUNTER_ADD(compression_dict_block_read_count, 1);
+        break;
+
+      case BlockType::kIndex:
+        PERF_COUNTER_ADD(index_block_read_count, 1);
+        break;
+
+      // Nothing to do here as we don't have counters for the other types.
+      default:
+        break;
+    }
+
+    PERF_COUNTER_ADD(block_read_byte, block_size_with_trailer_);
+    if (!io_status_.ok()) {
+      return io_status_;
+    }
+
+    if (slice_.size() != block_size_with_trailer_) {
+      return IOStatus::Corruption(
+          "truncated block read from " + file_->file_name() + " offset " +
+          std::to_string(handle_.offset()) + ", expected " +
+          std::to_string(block_size_with_trailer_) + " bytes, got " +
+          std::to_string(slice_.size()));
+    }
+
+    ProcessTrailerIfPresent();
+    if (io_status_.ok()) {
+      InsertCompressedBlockToPersistentCacheIfNeeded();
+    } else {
+      return io_status_;
+    }
+  }
+
+  if (do_uncompress_ && compression_type_ != kNoCompression) {
+    PERF_TIMER_GUARD(block_decompress_time);
+    // compressed page, uncompress, update cache
+    UncompressionContext context(compression_type_);
+    UncompressionInfo info(context, uncompression_dict_, compression_type_);
+    io_status_ = status_to_io_status(UncompressSerializedBlock(
+        info, slice_.data(), block_size_, contents_, footer_.format_version(),
+        ioptions_, memory_allocator_));
+#ifndef NDEBUG
+    num_heap_buf_memcpy_++;
+#endif
+    compression_type_ = kNoCompression;
+  } else {
+    GetBlockContents();
+  }
+
+  InsertUncompressedBlockToPersistentCacheIfNeeded();
+
+  return io_status_;
+}
+
+IOStatus BlockFetcher::ReadAsyncBlockContents() {
+  if (TryGetUncompressBlockFromPersistentCache()) {
+    compression_type_ = kNoCompression;
+#ifndef NDEBUG
+    contents_->has_trailer = footer_.GetBlockTrailerSize() > 0;
+#endif  // NDEBUG
+    return IOStatus::OK();
+  } else if (!TryGetSerializedBlockFromPersistentCache()) {
+    assert(prefetch_buffer_ != nullptr);
+    if (!for_compaction_) {
+      IOOptions opts;
+      IOStatus io_s = file_->PrepareIOOptions(read_options_, opts);
+      if (!io_s.ok()) {
+        return io_s;
+      }
+      io_s = status_to_io_status(prefetch_buffer_->PrefetchAsync(
+          opts, file_, handle_.offset(), block_size_with_trailer_, &slice_));
+      if (io_s.IsTryAgain()) {
+        return io_s;
+      }
+      if (io_s.ok()) {
+        // Data Block is already in prefetch.
+        got_from_prefetch_buffer_ = true;
+        ProcessTrailerIfPresent();
+        if (!io_status_.ok()) {
+          return io_status_;
+        }
+        used_buf_ = const_cast<char*>(slice_.data());
+
+        if (do_uncompress_ && compression_type_ != kNoCompression) {
+          PERF_TIMER_GUARD(block_decompress_time);
+          // compressed page, uncompress, update cache
+          UncompressionContext context(compression_type_);
+          UncompressionInfo info(context, uncompression_dict_,
+                                 compression_type_);
+          io_status_ = status_to_io_status(UncompressSerializedBlock(
+              info, slice_.data(), block_size_, contents_,
+              footer_.format_version(), ioptions_, memory_allocator_));
+#ifndef NDEBUG
+          num_heap_buf_memcpy_++;
+#endif
+          compression_type_ = kNoCompression;
+        } else {
+          GetBlockContents();
+        }
+        InsertUncompressedBlockToPersistentCacheIfNeeded();
+        return io_status_;
+      }
+    }
+    // Fallback to sequential reading of data blocks in case of io_s returns
+    // error or for_compaction_is true.
+    return ReadBlockContents();
+  }
+  return io_status_;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_fetcher.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_fetcher.h
new file mode 100644
index 0000000000..da6c352d0a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/block_fetcher.h
@@ -0,0 +1,142 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include "memory/memory_allocator_impl.h"
+#include "table/block_based/block.h"
+#include "table/block_based/block_type.h"
+#include "table/format.h"
+#include "table/persistent_cache_options.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Retrieves a single block of a given file. Utilizes the prefetch buffer and/or
+// persistent cache provided (if any) to try to avoid reading from the file
+// directly. Note that both the prefetch buffer and the persistent cache are
+// optional; also, note that the persistent cache may be configured to store
+// either compressed or uncompressed blocks.
+//
+// If the retrieved block is compressed and the do_uncompress flag is set,
+// BlockFetcher uncompresses the block (using the uncompression dictionary,
+// if provided, to prime the compression algorithm), and returns the resulting
+// uncompressed block data. Otherwise, it returns the original block.
+//
+// Two read options affect the behavior of BlockFetcher: if verify_checksums is
+// true, the checksum of the (original) block is checked; if fill_cache is true,
+// the block is added to the persistent cache if needed.
+//
+// Memory for uncompressed and compressed blocks is allocated as needed
+// using memory_allocator and memory_allocator_compressed, respectively
+// (if provided; otherwise, the default allocator is used).
+
+class BlockFetcher {
+ public:
+  BlockFetcher(RandomAccessFileReader* file,
+               FilePrefetchBuffer* prefetch_buffer,
+               const Footer& footer /* ref retained */,
+               const ReadOptions& read_options,
+               const BlockHandle& handle /* ref retained */,
+               BlockContents* contents,
+               const ImmutableOptions& ioptions /* ref retained */,
+               bool do_uncompress, bool maybe_compressed, BlockType block_type,
+               const UncompressionDict& uncompression_dict /* ref retained */,
+               const PersistentCacheOptions& cache_options /* ref retained */,
+               MemoryAllocator* memory_allocator = nullptr,
+               MemoryAllocator* memory_allocator_compressed = nullptr,
+               bool for_compaction = false)
+      : file_(file),
+        prefetch_buffer_(prefetch_buffer),
+        footer_(footer),
+        read_options_(read_options),
+        handle_(handle),
+        contents_(contents),
+        ioptions_(ioptions),
+        do_uncompress_(do_uncompress),
+        maybe_compressed_(maybe_compressed),
+        block_type_(block_type),
+        block_size_(static_cast<size_t>(handle_.size())),
+        block_size_with_trailer_(block_size_ + footer.GetBlockTrailerSize()),
+        uncompression_dict_(uncompression_dict),
+        cache_options_(cache_options),
+        memory_allocator_(memory_allocator),
+        memory_allocator_compressed_(memory_allocator_compressed),
+        for_compaction_(for_compaction) {
+    io_status_.PermitUncheckedError();  // TODO(AR) can we improve on this?
+  }
+
+  IOStatus ReadBlockContents();
+  IOStatus ReadAsyncBlockContents();
+
+  inline CompressionType get_compression_type() const {
+    return compression_type_;
+  }
+  inline size_t GetBlockSizeWithTrailer() const {
+    return block_size_with_trailer_;
+  }
+
+#ifndef NDEBUG
+  int TEST_GetNumStackBufMemcpy() const { return num_stack_buf_memcpy_; }
+  int TEST_GetNumHeapBufMemcpy() const { return num_heap_buf_memcpy_; }
+  int TEST_GetNumCompressedBufMemcpy() const {
+    return num_compressed_buf_memcpy_;
+  }
+
+#endif
+ private:
+#ifndef NDEBUG
+  int num_stack_buf_memcpy_ = 0;
+  int num_heap_buf_memcpy_ = 0;
+  int num_compressed_buf_memcpy_ = 0;
+
+#endif
+  static const uint32_t kDefaultStackBufferSize = 5000;
+
+  RandomAccessFileReader* file_;
+  FilePrefetchBuffer* prefetch_buffer_;
+  const Footer& footer_;
+  const ReadOptions read_options_;
+  const BlockHandle& handle_;
+  BlockContents* contents_;
+  const ImmutableOptions& ioptions_;
+  const bool do_uncompress_;
+  const bool maybe_compressed_;
+  const BlockType block_type_;
+  const size_t block_size_;
+  const size_t block_size_with_trailer_;
+  const UncompressionDict& uncompression_dict_;
+  const PersistentCacheOptions& cache_options_;
+  MemoryAllocator* memory_allocator_;
+  MemoryAllocator* memory_allocator_compressed_;
+  IOStatus io_status_;
+  Slice slice_;
+  char* used_buf_ = nullptr;
+  AlignedBuf direct_io_buf_;
+  CacheAllocationPtr heap_buf_;
+  CacheAllocationPtr compressed_buf_;
+  char stack_buf_[kDefaultStackBufferSize];
+  bool got_from_prefetch_buffer_ = false;
+  CompressionType compression_type_;
+  bool for_compaction_ = false;
+
+  // return true if found
+  bool TryGetUncompressBlockFromPersistentCache();
+  // return true if found
+  bool TryGetFromPrefetchBuffer();
+  bool TryGetSerializedBlockFromPersistentCache();
+  void PrepareBufferForBlockFromFile();
+  // Copy content from used_buf_ to new heap_buf_.
+  void CopyBufferToHeapBuf();
+  // Copy content from used_buf_ to new compressed_buf_.
+  void CopyBufferToCompressedBuf();
+  void GetBlockContents();
+  void InsertCompressedBlockToPersistentCacheIfNeeded();
+  void InsertUncompressedBlockToPersistentCacheIfNeeded();
+  void ProcessTrailerIfPresent();
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/compaction_merging_iterator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/compaction_merging_iterator.cc
new file mode 100644
index 0000000000..8a5c452405
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/compaction_merging_iterator.cc
@@ -0,0 +1,370 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#include "table/compaction_merging_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+class CompactionMergingIterator : public InternalIterator {
+ public:
+  CompactionMergingIterator(
+      const InternalKeyComparator* comparator, InternalIterator** children,
+      int n, bool is_arena_mode,
+      std::vector<
+          std::pair<TruncatedRangeDelIterator*, TruncatedRangeDelIterator***>>
+          range_tombstones)
+      : is_arena_mode_(is_arena_mode),
+        comparator_(comparator),
+        current_(nullptr),
+        minHeap_(CompactionHeapItemComparator(comparator_)),
+        pinned_iters_mgr_(nullptr) {
+    children_.resize(n);
+    for (int i = 0; i < n; i++) {
+      children_[i].level = i;
+      children_[i].iter.Set(children[i]);
+      assert(children_[i].type == HeapItem::ITERATOR);
+    }
+    assert(range_tombstones.size() == static_cast<size_t>(n));
+    for (auto& p : range_tombstones) {
+      range_tombstone_iters_.push_back(p.first);
+    }
+    pinned_heap_item_.resize(n);
+    for (int i = 0; i < n; ++i) {
+      if (range_tombstones[i].second) {
+        // for LevelIterator
+        *range_tombstones[i].second = &range_tombstone_iters_[i];
+      }
+      pinned_heap_item_[i].level = i;
+      pinned_heap_item_[i].type = HeapItem::DELETE_RANGE_START;
+    }
+  }
+
+  void considerStatus(const Status& s) {
+    if (!s.ok() && status_.ok()) {
+      status_ = s;
+    }
+  }
+
+  ~CompactionMergingIterator() override {
+    // TODO: use unique_ptr for range_tombstone_iters_
+    for (auto child : range_tombstone_iters_) {
+      delete child;
+    }
+
+    for (auto& child : children_) {
+      child.iter.DeleteIter(is_arena_mode_);
+    }
+    status_.PermitUncheckedError();
+  }
+
+  bool Valid() const override { return current_ != nullptr && status_.ok(); }
+
+  Status status() const override { return status_; }
+
+  void SeekToFirst() override;
+
+  void Seek(const Slice& target) override;
+
+  void Next() override;
+
+  Slice key() const override {
+    assert(Valid());
+    return current_->key();
+  }
+
+  Slice value() const override {
+    assert(Valid());
+    if (LIKELY(current_->type == HeapItem::ITERATOR)) {
+      return current_->iter.value();
+    } else {
+      return dummy_tombstone_val;
+    }
+  }
+
+  // Here we simply relay MayBeOutOfLowerBound/MayBeOutOfUpperBound result
+  // from current child iterator. Potentially as long as one of child iterator
+  // report out of bound is not possible, we know current key is within bound.
+  bool MayBeOutOfLowerBound() override {
+    assert(Valid());
+    return current_->type == HeapItem::DELETE_RANGE_START ||
+           current_->iter.MayBeOutOfLowerBound();
+  }
+
+  IterBoundCheck UpperBoundCheckResult() override {
+    assert(Valid());
+    return current_->type == HeapItem::DELETE_RANGE_START
+               ? IterBoundCheck::kUnknown
+               : current_->iter.UpperBoundCheckResult();
+  }
+
+  void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
+    pinned_iters_mgr_ = pinned_iters_mgr;
+    for (auto& child : children_) {
+      child.iter.SetPinnedItersMgr(pinned_iters_mgr);
+    }
+  }
+
+  bool IsDeleteRangeSentinelKey() const override {
+    assert(Valid());
+    return current_->type == HeapItem::DELETE_RANGE_START;
+  }
+
+  // Compaction uses the above subset of InternalIterator interface.
+  void SeekToLast() override { assert(false); }
+
+  void SeekForPrev(const Slice&) override { assert(false); }
+
+  void Prev() override { assert(false); }
+
+  bool NextAndGetResult(IterateResult*) override {
+    assert(false);
+    return false;
+  }
+
+  bool IsKeyPinned() const override {
+    assert(false);
+    return false;
+  }
+
+  bool IsValuePinned() const override {
+    assert(false);
+    return false;
+  }
+
+  bool PrepareValue() override {
+    assert(false);
+    return false;
+  }
+
+ private:
+  struct HeapItem {
+    HeapItem() = default;
+
+    IteratorWrapper iter;
+    size_t level = 0;
+    std::string tombstone_str;
+    enum Type { ITERATOR, DELETE_RANGE_START };
+    Type type = ITERATOR;
+
+    explicit HeapItem(size_t _level, InternalIteratorBase<Slice>* _iter)
+        : level(_level), type(Type::ITERATOR) {
+      iter.Set(_iter);
+    }
+
+    void SetTombstoneForCompaction(const ParsedInternalKey&& pik) {
+      tombstone_str.clear();
+      AppendInternalKey(&tombstone_str, pik);
+    }
+
+    [[nodiscard]] Slice key() const {
+      return type == ITERATOR ? iter.key() : tombstone_str;
+    }
+  };
+
+  class CompactionHeapItemComparator {
+   public:
+    explicit CompactionHeapItemComparator(
+        const InternalKeyComparator* comparator)
+        : comparator_(comparator) {}
+
+    bool operator()(HeapItem* a, HeapItem* b) const {
+      int r = comparator_->Compare(a->key(), b->key());
+      // For each file, we assume all range tombstone start keys come before
+      // its file boundary sentinel key (file's meta.largest key).
+      // In the case when meta.smallest = meta.largest and range tombstone start
+      // key is truncated at meta.smallest, the start key will have op_type =
+      // kMaxValid to make it smaller (see TruncatedRangeDelIterator
+      // constructor). The following assertion validates this assumption.
+      assert(a->type == b->type || r != 0);
+      return r > 0;
+    }
+
+   private:
+    const InternalKeyComparator* comparator_;
+  };
+
+  using CompactionMinHeap = BinaryHeap<HeapItem*, CompactionHeapItemComparator>;
+  bool is_arena_mode_;
+  const InternalKeyComparator* comparator_;
+  // HeapItem for all child point iterators.
+  std::vector<HeapItem> children_;
+  // HeapItem for range tombstones. pinned_heap_item_[i] corresponds to the
+  // current range tombstone from range_tombstone_iters_[i].
+  std::vector<HeapItem> pinned_heap_item_;
+  // range_tombstone_iters_[i] contains range tombstones in the sorted run that
+  // corresponds to children_[i]. range_tombstone_iters_[i] ==
+  // nullptr means the sorted run of children_[i] does not have range
+  // tombstones (or the current SSTable does not have range tombstones in the
+  // case of LevelIterator).
+  std::vector<TruncatedRangeDelIterator*> range_tombstone_iters_;
+  // Used as value for range tombstone keys
+  std::string dummy_tombstone_val{};
+
+  // Skip file boundary sentinel keys.
+  void FindNextVisibleKey();
+
+  // top of minHeap_
+  HeapItem* current_;
+  // If any of the children have non-ok status, this is one of them.
+  Status status_;
+  CompactionMinHeap minHeap_;
+  PinnedIteratorsManager* pinned_iters_mgr_;
+  // Process a child that is not in the min heap.
+  // If valid, add to the min heap. Otherwise, check status.
+  void AddToMinHeapOrCheckStatus(HeapItem*);
+
+  HeapItem* CurrentForward() const {
+    return !minHeap_.empty() ? minHeap_.top() : nullptr;
+  }
+
+  void InsertRangeTombstoneAtLevel(size_t level) {
+    if (range_tombstone_iters_[level]->Valid()) {
+      pinned_heap_item_[level].SetTombstoneForCompaction(
+          range_tombstone_iters_[level]->start_key());
+      minHeap_.push(&pinned_heap_item_[level]);
+    }
+  }
+};
+
+void CompactionMergingIterator::SeekToFirst() {
+  minHeap_.clear();
+  status_ = Status::OK();
+  for (auto& child : children_) {
+    child.iter.SeekToFirst();
+    AddToMinHeapOrCheckStatus(&child);
+  }
+
+  for (size_t i = 0; i < range_tombstone_iters_.size(); ++i) {
+    if (range_tombstone_iters_[i]) {
+      range_tombstone_iters_[i]->SeekToFirst();
+      InsertRangeTombstoneAtLevel(i);
+    }
+  }
+
+  FindNextVisibleKey();
+  current_ = CurrentForward();
+}
+
+void CompactionMergingIterator::Seek(const Slice& target) {
+  minHeap_.clear();
+  status_ = Status::OK();
+  for (auto& child : children_) {
+    child.iter.Seek(target);
+    AddToMinHeapOrCheckStatus(&child);
+  }
+
+  ParsedInternalKey pik;
+  ParseInternalKey(target, &pik, false /* log_err_key */)
+      .PermitUncheckedError();
+  for (size_t i = 0; i < range_tombstone_iters_.size(); ++i) {
+    if (range_tombstone_iters_[i]) {
+      range_tombstone_iters_[i]->Seek(pik.user_key);
+      // For compaction, output keys should all be after seek target.
+      while (range_tombstone_iters_[i]->Valid() &&
+             comparator_->Compare(range_tombstone_iters_[i]->start_key(), pik) <
+                 0) {
+        range_tombstone_iters_[i]->Next();
+      }
+      InsertRangeTombstoneAtLevel(i);
+    }
+  }
+
+  FindNextVisibleKey();
+  current_ = CurrentForward();
+}
+
+void CompactionMergingIterator::Next() {
+  assert(Valid());
+  // For the heap modifications below to be correct, current_ must be the
+  // current top of the heap.
+  assert(current_ == CurrentForward());
+  // as the current points to the current record. move the iterator forward.
+  if (current_->type == HeapItem::ITERATOR) {
+    current_->iter.Next();
+    if (current_->iter.Valid()) {
+      // current is still valid after the Next() call above.  Call
+      // replace_top() to restore the heap property.  When the same child
+      // iterator yields a sequence of keys, this is cheap.
+      assert(current_->iter.status().ok());
+      minHeap_.replace_top(current_);
+    } else {
+      // current stopped being valid, remove it from the heap.
+      considerStatus(current_->iter.status());
+      minHeap_.pop();
+    }
+  } else {
+    assert(current_->type == HeapItem::DELETE_RANGE_START);
+    size_t level = current_->level;
+    assert(range_tombstone_iters_[level]);
+    range_tombstone_iters_[level]->Next();
+    if (range_tombstone_iters_[level]->Valid()) {
+      pinned_heap_item_[level].SetTombstoneForCompaction(
+          range_tombstone_iters_[level]->start_key());
+      minHeap_.replace_top(&pinned_heap_item_[level]);
+    } else {
+      minHeap_.pop();
+    }
+  }
+  FindNextVisibleKey();
+  current_ = CurrentForward();
+}
+
+void CompactionMergingIterator::FindNextVisibleKey() {
+  while (!minHeap_.empty()) {
+    HeapItem* current = minHeap_.top();
+    // IsDeleteRangeSentinelKey() here means file boundary sentinel keys.
+    if (current->type != HeapItem::ITERATOR ||
+        !current->iter.IsDeleteRangeSentinelKey()) {
+      return;
+    }
+    // range tombstone start keys from the same SSTable should have been
+    // exhausted
+    assert(!range_tombstone_iters_[current->level] ||
+           !range_tombstone_iters_[current->level]->Valid());
+    // current->iter is a LevelIterator, and it enters a new SST file in the
+    // Next() call here.
+    current->iter.Next();
+    if (current->iter.Valid()) {
+      assert(current->iter.status().ok());
+      minHeap_.replace_top(current);
+    } else {
+      minHeap_.pop();
+    }
+    if (range_tombstone_iters_[current->level]) {
+      InsertRangeTombstoneAtLevel(current->level);
+    }
+  }
+}
+
+void CompactionMergingIterator::AddToMinHeapOrCheckStatus(HeapItem* child) {
+  if (child->iter.Valid()) {
+    assert(child->iter.status().ok());
+    minHeap_.push(child);
+  } else {
+    considerStatus(child->iter.status());
+  }
+}
+
+InternalIterator* NewCompactionMergingIterator(
+    const InternalKeyComparator* comparator, InternalIterator** children, int n,
+    std::vector<std::pair<TruncatedRangeDelIterator*,
+                          TruncatedRangeDelIterator***>>& range_tombstone_iters,
+    Arena* arena) {
+  assert(n >= 0);
+  if (n == 0) {
+    return NewEmptyInternalIterator<Slice>(arena);
+  } else {
+    if (arena == nullptr) {
+      return new CompactionMergingIterator(comparator, children, n,
+                                           false /* is_arena_mode */,
+                                           range_tombstone_iters);
+    } else {
+      auto mem = arena->AllocateAligned(sizeof(CompactionMergingIterator));
+      return new (mem) CompactionMergingIterator(comparator, children, n,
+                                                 true /* is_arena_mode */,
+                                                 range_tombstone_iters);
+    }
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/compaction_merging_iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/compaction_merging_iterator.h
new file mode 100644
index 0000000000..e3fd7797fd
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/compaction_merging_iterator.h
@@ -0,0 +1,44 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "db/range_del_aggregator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/types.h"
+#include "table/merging_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+/*
+ * This is a simplified version of MergingIterator and is specifically used for
+ * compaction. It merges the input `children` iterators into a sorted stream of
+ * keys. Range tombstone start keys are also emitted to prevent oversize
+ * compactions. For example, consider an L1 file with content [a, b), y, z,
+ * where [a, b) is a range tombstone and y and z are point keys. This could
+ * cause an oversize compaction as it can overlap with a wide range of key space
+ * in L2.
+ *
+ * CompactionMergingIterator emits range tombstone start keys from each LSM
+ * level's range tombstone iterator, and for each range tombstone
+ * [start,end)@seqno, the key will be start@seqno with op_type
+ * kTypeRangeDeletion unless truncated at file boundary (see detail in
+ * TruncatedRangeDelIterator::start_key()).
+ *
+ * Caller should use CompactionMergingIterator::IsDeleteRangeSentinelKey() to
+ * check if the current key is a range tombstone key.
+ * TODO(cbi): IsDeleteRangeSentinelKey() is used for two kinds of keys at
+ * different layers: file boundary and range tombstone keys. Separate them into
+ * two APIs for clarity.
+ */
+class CompactionMergingIterator;
+
+InternalIterator* NewCompactionMergingIterator(
+    const InternalKeyComparator* comparator, InternalIterator** children, int n,
+    std::vector<std::pair<TruncatedRangeDelIterator*,
+                          TruncatedRangeDelIterator***>>& range_tombstone_iters,
+    Arena* arena = nullptr);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_builder.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_builder.cc
new file mode 100644
index 0000000000..7ca72365fd
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_builder.cc
@@ -0,0 +1,551 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "table/cuckoo/cuckoo_table_builder.h"
+
+#include <assert.h>
+
+#include <algorithm>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "db/dbformat.h"
+#include "file/writable_file_writer.h"
+#include "rocksdb/env.h"
+#include "rocksdb/table.h"
+#include "table/block_based/block_builder.h"
+#include "table/cuckoo/cuckoo_table_factory.h"
+#include "table/format.h"
+#include "table/meta_blocks.h"
+#include "util/autovector.h"
+#include "util/random.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+const std::string CuckooTablePropertyNames::kEmptyKey =
+    "rocksdb.cuckoo.bucket.empty.key";
+const std::string CuckooTablePropertyNames::kNumHashFunc =
+    "rocksdb.cuckoo.hash.num";
+const std::string CuckooTablePropertyNames::kHashTableSize =
+    "rocksdb.cuckoo.hash.size";
+const std::string CuckooTablePropertyNames::kValueLength =
+    "rocksdb.cuckoo.value.length";
+const std::string CuckooTablePropertyNames::kIsLastLevel =
+    "rocksdb.cuckoo.file.islastlevel";
+const std::string CuckooTablePropertyNames::kCuckooBlockSize =
+    "rocksdb.cuckoo.hash.cuckooblocksize";
+const std::string CuckooTablePropertyNames::kIdentityAsFirstHash =
+    "rocksdb.cuckoo.hash.identityfirst";
+const std::string CuckooTablePropertyNames::kUseModuleHash =
+    "rocksdb.cuckoo.hash.usemodule";
+const std::string CuckooTablePropertyNames::kUserKeyLength =
+    "rocksdb.cuckoo.hash.userkeylength";
+
+// Obtained by running echo rocksdb.table.cuckoo | sha1sum
+extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
+
+CuckooTableBuilder::CuckooTableBuilder(
+    WritableFileWriter* file, double max_hash_table_ratio,
+    uint32_t max_num_hash_table, uint32_t max_search_depth,
+    const Comparator* user_comparator, uint32_t cuckoo_block_size,
+    bool use_module_hash, bool identity_as_first_hash,
+    uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t),
+    uint32_t column_family_id, const std::string& column_family_name,
+    const std::string& db_id, const std::string& db_session_id,
+    uint64_t file_number)
+    : num_hash_func_(2),
+      file_(file),
+      max_hash_table_ratio_(max_hash_table_ratio),
+      max_num_hash_func_(max_num_hash_table),
+      max_search_depth_(max_search_depth),
+      cuckoo_block_size_(std::max(1U, cuckoo_block_size)),
+      hash_table_size_(use_module_hash ? 0 : 2),
+      is_last_level_file_(false),
+      has_seen_first_key_(false),
+      has_seen_first_value_(false),
+      key_size_(0),
+      value_size_(0),
+      num_entries_(0),
+      num_values_(0),
+      ucomp_(user_comparator),
+      use_module_hash_(use_module_hash),
+      identity_as_first_hash_(identity_as_first_hash),
+      get_slice_hash_(get_slice_hash),
+      closed_(false) {
+  // Data is in a huge block.
+  properties_.num_data_blocks = 1;
+  properties_.index_size = 0;
+  properties_.filter_size = 0;
+  properties_.column_family_id = column_family_id;
+  properties_.column_family_name = column_family_name;
+  properties_.db_id = db_id;
+  properties_.db_session_id = db_session_id;
+  properties_.orig_file_number = file_number;
+  status_.PermitUncheckedError();
+  io_status_.PermitUncheckedError();
+}
+
+void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
+  if (num_entries_ >= kMaxVectorIdx - 1) {
+    status_ = Status::NotSupported("Number of keys in a file must be < 2^32-1");
+    return;
+  }
+  ParsedInternalKey ikey;
+  Status pik_status =
+      ParseInternalKey(key, &ikey, false /* log_err_key */);  // TODO
+  if (!pik_status.ok()) {
+    status_ = Status::Corruption("Unable to parse key into internal key. ",
+                                 pik_status.getState());
+    return;
+  }
+  if (ikey.type != kTypeDeletion && ikey.type != kTypeValue) {
+    status_ = Status::NotSupported("Unsupported key type " +
+                                   std::to_string(ikey.type));
+    return;
+  }
+
+  // Determine if we can ignore the sequence number and value type from
+  // internal keys by looking at sequence number from first key. We assume
+  // that if first key has a zero sequence number, then all the remaining
+  // keys will have zero seq. no.
+  if (!has_seen_first_key_) {
+    is_last_level_file_ = ikey.sequence == 0;
+    has_seen_first_key_ = true;
+    smallest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
+    largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
+    key_size_ = is_last_level_file_ ? ikey.user_key.size() : key.size();
+  }
+  if (key_size_ != (is_last_level_file_ ? ikey.user_key.size() : key.size())) {
+    status_ = Status::NotSupported("all keys have to be the same size");
+    return;
+  }
+
+  if (ikey.type == kTypeValue) {
+    if (!has_seen_first_value_) {
+      has_seen_first_value_ = true;
+      value_size_ = value.size();
+    }
+    if (value_size_ != value.size()) {
+      status_ = Status::NotSupported("all values have to be the same size");
+      return;
+    }
+
+    if (is_last_level_file_) {
+      kvs_.append(ikey.user_key.data(), ikey.user_key.size());
+    } else {
+      kvs_.append(key.data(), key.size());
+    }
+    kvs_.append(value.data(), value.size());
+    ++num_values_;
+  } else {
+    if (is_last_level_file_) {
+      deleted_keys_.append(ikey.user_key.data(), ikey.user_key.size());
+    } else {
+      deleted_keys_.append(key.data(), key.size());
+    }
+  }
+  ++num_entries_;
+
+  // In order to fill the empty buckets in the hash table, we identify a
+  // key which is not used so far (unused_user_key). We determine this by
+  // maintaining smallest and largest keys inserted so far in bytewise order
+  // and use them to find a key outside this range in Finish() operation.
+  // Note that this strategy is independent of user comparator used here.
+  if (ikey.user_key.compare(smallest_user_key_) < 0) {
+    smallest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
+  } else if (ikey.user_key.compare(largest_user_key_) > 0) {
+    largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
+  }
+  if (!use_module_hash_) {
+    if (hash_table_size_ < num_entries_ / max_hash_table_ratio_) {
+      hash_table_size_ *= 2;
+    }
+  }
+}
+
+bool CuckooTableBuilder::IsDeletedKey(uint64_t idx) const {
+  assert(closed_);
+  return idx >= num_values_;
+}
+
+Slice CuckooTableBuilder::GetKey(uint64_t idx) const {
+  assert(closed_);
+  if (IsDeletedKey(idx)) {
+    return Slice(
+        &deleted_keys_[static_cast<size_t>((idx - num_values_) * key_size_)],
+        static_cast<size_t>(key_size_));
+  }
+  return Slice(&kvs_[static_cast<size_t>(idx * (key_size_ + value_size_))],
+               static_cast<size_t>(key_size_));
+}
+
+Slice CuckooTableBuilder::GetUserKey(uint64_t idx) const {
+  assert(closed_);
+  return is_last_level_file_ ? GetKey(idx) : ExtractUserKey(GetKey(idx));
+}
+
+Slice CuckooTableBuilder::GetValue(uint64_t idx) const {
+  assert(closed_);
+  if (IsDeletedKey(idx)) {
+    static std::string empty_value(static_cast<unsigned int>(value_size_), 'a');
+    return Slice(empty_value);
+  }
+  return Slice(
+      &kvs_[static_cast<size_t>(idx * (key_size_ + value_size_) + key_size_)],
+      static_cast<size_t>(value_size_));
+}
+
+Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
+  buckets->resize(
+      static_cast<size_t>(hash_table_size_ + cuckoo_block_size_ - 1));
+  uint32_t make_space_for_key_call_id = 0;
+  for (uint32_t vector_idx = 0; vector_idx < num_entries_; vector_idx++) {
+    uint64_t bucket_id = 0;
+    bool bucket_found = false;
+    autovector<uint64_t> hash_vals;
+    Slice user_key = GetUserKey(vector_idx);
+    for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !bucket_found;
+         ++hash_cnt) {
+      uint64_t hash_val =
+          CuckooHash(user_key, hash_cnt, use_module_hash_, hash_table_size_,
+                     identity_as_first_hash_, get_slice_hash_);
+      // If there is a collision, check next cuckoo_block_size_ locations for
+      // empty locations. While checking, if we reach end of the hash table,
+      // stop searching and proceed for next hash function.
+      for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
+           ++block_idx, ++hash_val) {
+        if ((*buckets)[static_cast<size_t>(hash_val)].vector_idx ==
+            kMaxVectorIdx) {
+          bucket_id = hash_val;
+          bucket_found = true;
+          break;
+        } else {
+          if (ucomp_->Compare(
+                  user_key, GetUserKey((*buckets)[static_cast<size_t>(hash_val)]
+                                           .vector_idx)) == 0) {
+            return Status::NotSupported("Same key is being inserted again.");
+          }
+          hash_vals.push_back(hash_val);
+        }
+      }
+    }
+    while (!bucket_found &&
+           !MakeSpaceForKey(hash_vals, ++make_space_for_key_call_id, buckets,
+                            &bucket_id)) {
+      // Rehash by increashing number of hash tables.
+      if (num_hash_func_ >= max_num_hash_func_) {
+        return Status::NotSupported("Too many collisions. Unable to hash.");
+      }
+      // We don't really need to rehash the entire table because old hashes are
+      // still valid and we only increased the number of hash functions.
+      uint64_t hash_val = CuckooHash(user_key, num_hash_func_, use_module_hash_,
+                                     hash_table_size_, identity_as_first_hash_,
+                                     get_slice_hash_);
+      ++num_hash_func_;
+      for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
+           ++block_idx, ++hash_val) {
+        if ((*buckets)[static_cast<size_t>(hash_val)].vector_idx ==
+            kMaxVectorIdx) {
+          bucket_found = true;
+          bucket_id = hash_val;
+          break;
+        } else {
+          hash_vals.push_back(hash_val);
+        }
+      }
+    }
+    (*buckets)[static_cast<size_t>(bucket_id)].vector_idx = vector_idx;
+  }
+  return Status::OK();
+}
+
+Status CuckooTableBuilder::Finish() {
+  assert(!closed_);
+  closed_ = true;
+  std::vector<CuckooBucket> buckets;
+  std::string unused_bucket;
+  if (num_entries_ > 0) {
+    // Calculate the real hash size if module hash is enabled.
+    if (use_module_hash_) {
+      hash_table_size_ =
+          static_cast<uint64_t>(num_entries_ / max_hash_table_ratio_);
+    }
+    status_ = MakeHashTable(&buckets);
+    if (!status_.ok()) {
+      return status_;
+    }
+    // Determine unused_user_key to fill empty buckets.
+    std::string unused_user_key = smallest_user_key_;
+    int curr_pos = static_cast<int>(unused_user_key.size()) - 1;
+    while (curr_pos >= 0) {
+      --unused_user_key[curr_pos];
+      if (Slice(unused_user_key).compare(smallest_user_key_) < 0) {
+        break;
+      }
+      --curr_pos;
+    }
+    if (curr_pos < 0) {
+      // Try using the largest key to identify an unused key.
+      unused_user_key = largest_user_key_;
+      curr_pos = static_cast<int>(unused_user_key.size()) - 1;
+      while (curr_pos >= 0) {
+        ++unused_user_key[curr_pos];
+        if (Slice(unused_user_key).compare(largest_user_key_) > 0) {
+          break;
+        }
+        --curr_pos;
+      }
+    }
+    if (curr_pos < 0) {
+      return Status::Corruption("Unable to find unused key");
+    }
+    if (is_last_level_file_) {
+      unused_bucket = unused_user_key;
+    } else {
+      ParsedInternalKey ikey(unused_user_key, 0, kTypeValue);
+      AppendInternalKey(&unused_bucket, ikey);
+    }
+  }
+  properties_.num_entries = num_entries_;
+  properties_.num_deletions = num_entries_ - num_values_;
+  properties_.fixed_key_len = key_size_;
+  properties_.user_collected_properties[CuckooTablePropertyNames::kValueLength]
+      .assign(reinterpret_cast<const char*>(&value_size_), sizeof(value_size_));
+
+  uint64_t bucket_size = key_size_ + value_size_;
+  unused_bucket.resize(static_cast<size_t>(bucket_size), 'a');
+  // Write the table.
+  uint32_t num_added = 0;
+  for (auto& bucket : buckets) {
+    if (bucket.vector_idx == kMaxVectorIdx) {
+      io_status_ = file_->Append(Slice(unused_bucket));
+    } else {
+      ++num_added;
+      io_status_ = file_->Append(GetKey(bucket.vector_idx));
+      if (io_status_.ok()) {
+        if (value_size_ > 0) {
+          io_status_ = file_->Append(GetValue(bucket.vector_idx));
+        }
+      }
+    }
+    if (!io_status_.ok()) {
+      status_ = io_status_;
+      return status_;
+    }
+  }
+  assert(num_added == NumEntries());
+  properties_.raw_key_size = num_added * properties_.fixed_key_len;
+  properties_.raw_value_size = num_added * value_size_;
+
+  uint64_t offset = buckets.size() * bucket_size;
+  properties_.data_size = offset;
+  unused_bucket.resize(static_cast<size_t>(properties_.fixed_key_len));
+  properties_.user_collected_properties[CuckooTablePropertyNames::kEmptyKey] =
+      unused_bucket;
+  properties_.user_collected_properties[CuckooTablePropertyNames::kNumHashFunc]
+      .assign(reinterpret_cast<char*>(&num_hash_func_), sizeof(num_hash_func_));
+
+  properties_
+      .user_collected_properties[CuckooTablePropertyNames::kHashTableSize]
+      .assign(reinterpret_cast<const char*>(&hash_table_size_),
+              sizeof(hash_table_size_));
+  properties_.user_collected_properties[CuckooTablePropertyNames::kIsLastLevel]
+      .assign(reinterpret_cast<const char*>(&is_last_level_file_),
+              sizeof(is_last_level_file_));
+  properties_
+      .user_collected_properties[CuckooTablePropertyNames::kCuckooBlockSize]
+      .assign(reinterpret_cast<const char*>(&cuckoo_block_size_),
+              sizeof(cuckoo_block_size_));
+  properties_
+      .user_collected_properties[CuckooTablePropertyNames::kIdentityAsFirstHash]
+      .assign(reinterpret_cast<const char*>(&identity_as_first_hash_),
+              sizeof(identity_as_first_hash_));
+  properties_
+      .user_collected_properties[CuckooTablePropertyNames::kUseModuleHash]
+      .assign(reinterpret_cast<const char*>(&use_module_hash_),
+              sizeof(use_module_hash_));
+  uint32_t user_key_len = static_cast<uint32_t>(smallest_user_key_.size());
+  properties_
+      .user_collected_properties[CuckooTablePropertyNames::kUserKeyLength]
+      .assign(reinterpret_cast<const char*>(&user_key_len),
+              sizeof(user_key_len));
+
+  // Write meta blocks.
+  MetaIndexBuilder meta_index_builder;
+  PropertyBlockBuilder property_block_builder;
+
+  property_block_builder.AddTableProperty(properties_);
+  property_block_builder.Add(properties_.user_collected_properties);
+  Slice property_block = property_block_builder.Finish();
+  BlockHandle property_block_handle;
+  property_block_handle.set_offset(offset);
+  property_block_handle.set_size(property_block.size());
+  io_status_ = file_->Append(property_block);
+  offset += property_block.size();
+  if (!io_status_.ok()) {
+    status_ = io_status_;
+    return status_;
+  }
+
+  meta_index_builder.Add(kPropertiesBlockName, property_block_handle);
+  Slice meta_index_block = meta_index_builder.Finish();
+
+  BlockHandle meta_index_block_handle;
+  meta_index_block_handle.set_offset(offset);
+  meta_index_block_handle.set_size(meta_index_block.size());
+  io_status_ = file_->Append(meta_index_block);
+  if (!io_status_.ok()) {
+    status_ = io_status_;
+    return status_;
+  }
+
+  FooterBuilder footer;
+  footer.Build(kCuckooTableMagicNumber, /* format_version */ 1, offset,
+               kNoChecksum, meta_index_block_handle);
+  io_status_ = file_->Append(footer.GetSlice());
+  status_ = io_status_;
+  return status_;
+}
+
+void CuckooTableBuilder::Abandon() {
+  assert(!closed_);
+  closed_ = true;
+}
+
+uint64_t CuckooTableBuilder::NumEntries() const { return num_entries_; }
+
+uint64_t CuckooTableBuilder::FileSize() const {
+  if (closed_) {
+    return file_->GetFileSize();
+  } else if (num_entries_ == 0) {
+    return 0;
+  }
+
+  if (use_module_hash_) {
+    return static_cast<uint64_t>((key_size_ + value_size_) * num_entries_ /
+                                 max_hash_table_ratio_);
+  } else {
+    // Account for buckets being a power of two.
+    // As elements are added, file size remains constant for a while and
+    // doubles its size. Since compaction algorithm stops adding elements
+    // only after it exceeds the file limit, we account for the extra element
+    // being added here.
+    uint64_t expected_hash_table_size = hash_table_size_;
+    if (expected_hash_table_size < (num_entries_ + 1) / max_hash_table_ratio_) {
+      expected_hash_table_size *= 2;
+    }
+    return (key_size_ + value_size_) * expected_hash_table_size - 1;
+  }
+}
+
+// This method is invoked when there is no place to insert the target key.
+// It searches for a set of elements that can be moved to accommodate target
+// key. The search is a BFS graph traversal with first level (hash_vals)
+// being all the buckets target key could go to.
+// Then, from each node (curr_node), we find all the buckets that curr_node
+// could go to. They form the children of curr_node in the tree.
+// We continue the traversal until we find an empty bucket, in which case, we
+// move all elements along the path from first level to this empty bucket, to
+// make space for target key which is inserted at first level (*bucket_id).
+// If tree depth exceedes max depth, we return false indicating failure.
+bool CuckooTableBuilder::MakeSpaceForKey(
+    const autovector<uint64_t>& hash_vals,
+    const uint32_t make_space_for_key_call_id,
+    std::vector<CuckooBucket>* buckets, uint64_t* bucket_id) {
+  struct CuckooNode {
+    uint64_t bucket_id;
+    uint32_t depth;
+    uint32_t parent_pos;
+    CuckooNode(uint64_t _bucket_id, uint32_t _depth, int _parent_pos)
+        : bucket_id(_bucket_id), depth(_depth), parent_pos(_parent_pos) {}
+  };
+  // This is BFS search tree that is stored simply as a vector.
+  // Each node stores the index of parent node in the vector.
+  std::vector<CuckooNode> tree;
+  // We want to identify already visited buckets in the current method call so
+  // that we don't add same buckets again for exploration in the tree.
+  // We do this by maintaining a count of current method call in
+  // make_space_for_key_call_id, which acts as a unique id for this invocation
+  // of the method. We store this number into the nodes that we explore in
+  // current method call.
+  // It is unlikely for the increment operation to overflow because the maximum
+  // no. of times this will be called is <= max_num_hash_func_ + num_entries_.
+  for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
+    uint64_t bid = hash_vals[hash_cnt];
+    (*buckets)[static_cast<size_t>(bid)].make_space_for_key_call_id =
+        make_space_for_key_call_id;
+    tree.push_back(CuckooNode(bid, 0, 0));
+  }
+  bool null_found = false;
+  uint32_t curr_pos = 0;
+  while (!null_found && curr_pos < tree.size()) {
+    CuckooNode& curr_node = tree[curr_pos];
+    uint32_t curr_depth = curr_node.depth;
+    if (curr_depth >= max_search_depth_) {
+      break;
+    }
+    CuckooBucket& curr_bucket =
+        (*buckets)[static_cast<size_t>(curr_node.bucket_id)];
+    for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !null_found;
+         ++hash_cnt) {
+      uint64_t child_bucket_id = CuckooHash(
+          GetUserKey(curr_bucket.vector_idx), hash_cnt, use_module_hash_,
+          hash_table_size_, identity_as_first_hash_, get_slice_hash_);
+      // Iterate inside Cuckoo Block.
+      for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
+           ++block_idx, ++child_bucket_id) {
+        if ((*buckets)[static_cast<size_t>(child_bucket_id)]
+                .make_space_for_key_call_id == make_space_for_key_call_id) {
+          continue;
+        }
+        (*buckets)[static_cast<size_t>(child_bucket_id)]
+            .make_space_for_key_call_id = make_space_for_key_call_id;
+        tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1, curr_pos));
+        if ((*buckets)[static_cast<size_t>(child_bucket_id)].vector_idx ==
+            kMaxVectorIdx) {
+          null_found = true;
+          break;
+        }
+      }
+    }
+    ++curr_pos;
+  }
+
+  if (null_found) {
+    // There is an empty node in tree.back(). Now, traverse the path from this
+    // empty node to top of the tree and at every node in the path, replace
+    // child with the parent. Stop when first level is reached in the tree
+    // (happens when 0 <= bucket_to_replace_pos < num_hash_func_) and return
+    // this location in first level for target key to be inserted.
+    uint32_t bucket_to_replace_pos = static_cast<uint32_t>(tree.size()) - 1;
+    while (bucket_to_replace_pos >= num_hash_func_) {
+      CuckooNode& curr_node = tree[bucket_to_replace_pos];
+      (*buckets)[static_cast<size_t>(curr_node.bucket_id)] =
+          (*buckets)[static_cast<size_t>(tree[curr_node.parent_pos].bucket_id)];
+      bucket_to_replace_pos = curr_node.parent_pos;
+    }
+    *bucket_id = tree[bucket_to_replace_pos].bucket_id;
+  }
+  return null_found;
+}
+
+std::string CuckooTableBuilder::GetFileChecksum() const {
+  if (file_ != nullptr) {
+    return file_->GetFileChecksum();
+  } else {
+    return kUnknownFileChecksum;
+  }
+}
+
+const char* CuckooTableBuilder::GetFileChecksumFuncName() const {
+  if (file_ != nullptr) {
+    return file_->GetFileChecksumFuncName();
+  } else {
+    return kUnknownFileChecksumFuncName;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_builder.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_builder.h
new file mode 100644
index 0000000000..3a19dd6f99
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_builder.h
@@ -0,0 +1,136 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <stdint.h>
+
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "db/version_edit.h"
+#include "port/port.h"
+#include "rocksdb/status.h"
+#include "rocksdb/table.h"
+#include "rocksdb/table_properties.h"
+#include "table/table_builder.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class CuckooTableBuilder : public TableBuilder {
+ public:
+  CuckooTableBuilder(
+      WritableFileWriter* file, double max_hash_table_ratio,
+      uint32_t max_num_hash_func, uint32_t max_search_depth,
+      const Comparator* user_comparator, uint32_t cuckoo_block_size,
+      bool use_module_hash, bool identity_as_first_hash,
+      uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t),
+      uint32_t column_family_id, const std::string& column_family_name,
+      const std::string& db_id = "", const std::string& db_session_id = "",
+      uint64_t file_number = 0);
+  // No copying allowed
+  CuckooTableBuilder(const CuckooTableBuilder&) = delete;
+  void operator=(const CuckooTableBuilder&) = delete;
+
+  // REQUIRES: Either Finish() or Abandon() has been called.
+  ~CuckooTableBuilder() {}
+
+  // Add key,value to the table being constructed.
+  // REQUIRES: key is after any previously added key according to comparator.
+  // REQUIRES: Finish(), Abandon() have not been called
+  void Add(const Slice& key, const Slice& value) override;
+
+  // Return non-ok iff some error has been detected.
+  Status status() const override { return status_; }
+
+  // Return non-ok iff some error happens during IO.
+  IOStatus io_status() const override { return io_status_; }
+
+  // Finish building the table.  Stops using the file passed to the
+  // constructor after this function returns.
+  // REQUIRES: Finish(), Abandon() have not been called
+  Status Finish() override;
+
+  // Indicate that the contents of this builder should be abandoned.  Stops
+  // using the file passed to the constructor after this function returns.
+  // If the caller is not going to call Finish(), it must call Abandon()
+  // before destroying this builder.
+  // REQUIRES: Finish(), Abandon() have not been called
+  void Abandon() override;
+
+  // Number of calls to Add() so far.
+  uint64_t NumEntries() const override;
+
+  // Size of the file generated so far.  If invoked after a successful
+  // Finish() call, returns the size of the final generated file.
+  uint64_t FileSize() const override;
+
+  TableProperties GetTableProperties() const override { return properties_; }
+
+  // Get file checksum
+  std::string GetFileChecksum() const override;
+
+  // Get file checksum function name
+  const char* GetFileChecksumFuncName() const override;
+
+ private:
+  struct CuckooBucket {
+    CuckooBucket() : vector_idx(kMaxVectorIdx), make_space_for_key_call_id(0) {}
+    uint32_t vector_idx;
+    // This number will not exceed kvs_.size() + max_num_hash_func_.
+    // We assume number of items is <= 2^32.
+    uint32_t make_space_for_key_call_id;
+  };
+  static const uint32_t kMaxVectorIdx = std::numeric_limits<int32_t>::max();
+
+  bool MakeSpaceForKey(const autovector<uint64_t>& hash_vals,
+                       const uint32_t call_id,
+                       std::vector<CuckooBucket>* buckets, uint64_t* bucket_id);
+  Status MakeHashTable(std::vector<CuckooBucket>* buckets);
+
+  inline bool IsDeletedKey(uint64_t idx) const;
+  inline Slice GetKey(uint64_t idx) const;
+  inline Slice GetUserKey(uint64_t idx) const;
+  inline Slice GetValue(uint64_t idx) const;
+
+  uint32_t num_hash_func_;
+  WritableFileWriter* file_;
+  const double max_hash_table_ratio_;
+  const uint32_t max_num_hash_func_;
+  const uint32_t max_search_depth_;
+  const uint32_t cuckoo_block_size_;
+  uint64_t hash_table_size_;
+  bool is_last_level_file_;
+  bool has_seen_first_key_;
+  bool has_seen_first_value_;
+  uint64_t key_size_;
+  uint64_t value_size_;
+  // A list of fixed-size key-value pairs concatenating into a string.
+  // Use GetKey(), GetUserKey(), and GetValue() to retrieve a specific
+  // key / value given an index
+  std::string kvs_;
+  std::string deleted_keys_;
+  // Number of key-value pairs stored in kvs_ + number of deleted keys
+  uint64_t num_entries_;
+  // Number of keys that contain value (non-deletion op)
+  uint64_t num_values_;
+  Status status_;
+  IOStatus io_status_;
+  TableProperties properties_;
+  const Comparator* ucomp_;
+  bool use_module_hash_;
+  bool identity_as_first_hash_;
+  uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index,
+                              uint64_t max_num_buckets);
+  std::string largest_user_key_ = "";
+  std::string smallest_user_key_ = "";
+
+  bool closed_;  // Either Finish() or Abandon() has been called.
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_factory.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_factory.cc
new file mode 100644
index 0000000000..774e00212d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_factory.cc
@@ -0,0 +1,100 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "table/cuckoo/cuckoo_table_factory.h"
+
+#include "db/dbformat.h"
+#include "options/configurable_helper.h"
+#include "rocksdb/utilities/options_type.h"
+#include "table/cuckoo/cuckoo_table_builder.h"
+#include "table/cuckoo/cuckoo_table_reader.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status CuckooTableFactory::NewTableReader(
+    const ReadOptions& /*ro*/, const TableReaderOptions& table_reader_options,
+    std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
+    std::unique_ptr<TableReader>* table,
+    bool /*prefetch_index_and_filter_in_cache*/) const {
+  std::unique_ptr<CuckooTableReader> new_reader(new CuckooTableReader(
+      table_reader_options.ioptions, std::move(file), file_size,
+      table_reader_options.internal_comparator.user_comparator(), nullptr));
+  Status s = new_reader->status();
+  if (s.ok()) {
+    *table = std::move(new_reader);
+  }
+  return s;
+}
+
+TableBuilder* CuckooTableFactory::NewTableBuilder(
+    const TableBuilderOptions& table_builder_options,
+    WritableFileWriter* file) const {
+  // TODO: change builder to take the option struct
+  return new CuckooTableBuilder(
+      file, table_options_.hash_table_ratio, 64,
+      table_options_.max_search_depth,
+      table_builder_options.internal_comparator.user_comparator(),
+      table_options_.cuckoo_block_size, table_options_.use_module_hash,
+      table_options_.identity_as_first_hash, nullptr /* get_slice_hash */,
+      table_builder_options.column_family_id,
+      table_builder_options.column_family_name, table_builder_options.db_id,
+      table_builder_options.db_session_id, table_builder_options.cur_file_num);
+}
+
+std::string CuckooTableFactory::GetPrintableOptions() const {
+  std::string ret;
+  ret.reserve(2000);
+  const int kBufferSize = 200;
+  char buffer[kBufferSize];
+
+  snprintf(buffer, kBufferSize, "  hash_table_ratio: %lf\n",
+           table_options_.hash_table_ratio);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  max_search_depth: %u\n",
+           table_options_.max_search_depth);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  cuckoo_block_size: %u\n",
+           table_options_.cuckoo_block_size);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  identity_as_first_hash: %d\n",
+           table_options_.identity_as_first_hash);
+  ret.append(buffer);
+  return ret;
+}
+
+static std::unordered_map<std::string, OptionTypeInfo> cuckoo_table_type_info =
+    {
+        {"hash_table_ratio",
+         {offsetof(struct CuckooTableOptions, hash_table_ratio),
+          OptionType::kDouble, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"max_search_depth",
+         {offsetof(struct CuckooTableOptions, max_search_depth),
+          OptionType::kUInt32T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"cuckoo_block_size",
+         {offsetof(struct CuckooTableOptions, cuckoo_block_size),
+          OptionType::kUInt32T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"identity_as_first_hash",
+         {offsetof(struct CuckooTableOptions, identity_as_first_hash),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"use_module_hash",
+         {offsetof(struct CuckooTableOptions, use_module_hash),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+};
+
+CuckooTableFactory::CuckooTableFactory(const CuckooTableOptions& table_options)
+    : table_options_(table_options) {
+  RegisterOptions(&table_options_, &cuckoo_table_type_info);
+}
+
+TableFactory* NewCuckooTableFactory(const CuckooTableOptions& table_options) {
+  return new CuckooTableFactory(table_options);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_factory.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_factory.h
new file mode 100644
index 0000000000..7132cec659
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_factory.h
@@ -0,0 +1,80 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
+#include "util/murmurhash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const uint32_t kCuckooMurmurSeedMultiplier = 816922183;
+static inline uint64_t CuckooHash(
+    const Slice& user_key, uint32_t hash_cnt, bool use_module_hash,
+    uint64_t table_size_, bool identity_as_first_hash,
+    uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) {
+#if !defined NDEBUG || defined OS_WIN
+  // This part is used only in unit tests but we have to keep it for Windows
+  // build as we run test in both debug and release modes under Windows.
+  if (get_slice_hash != nullptr) {
+    return get_slice_hash(user_key, hash_cnt, table_size_);
+  }
+#else
+  (void)get_slice_hash;
+#endif
+
+  uint64_t value = 0;
+  if (hash_cnt == 0 && identity_as_first_hash) {
+    value = (*reinterpret_cast<const int64_t*>(user_key.data()));
+  } else {
+    value = MurmurHash(user_key.data(), static_cast<int>(user_key.size()),
+                       kCuckooMurmurSeedMultiplier * hash_cnt);
+  }
+  if (use_module_hash) {
+    return value % table_size_;
+  } else {
+    return value & (table_size_ - 1);
+  }
+}
+
+// Cuckoo Table is designed for applications that require fast point lookups
+// but not fast range scans.
+//
+// Some assumptions:
+// - Key length and Value length are fixed.
+// - Does not support Snapshot.
+// - Does not support Merge operations.
+// - Does not support prefix bloom filters.
+class CuckooTableFactory : public TableFactory {
+ public:
+  explicit CuckooTableFactory(
+      const CuckooTableOptions& table_option = CuckooTableOptions());
+  ~CuckooTableFactory() {}
+
+  // Method to allow CheckedCast to work for this class
+  static const char* kClassName() { return kCuckooTableName(); }
+  const char* Name() const override { return kCuckooTableName(); }
+
+  using TableFactory::NewTableReader;
+  Status NewTableReader(
+      const ReadOptions& ro, const TableReaderOptions& table_reader_options,
+      std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
+      std::unique_ptr<TableReader>* table,
+      bool prefetch_index_and_filter_in_cache = true) const override;
+
+  TableBuilder* NewTableBuilder(
+      const TableBuilderOptions& table_builder_options,
+      WritableFileWriter* file) const override;
+
+  std::string GetPrintableOptions() const override;
+
+ private:
+  CuckooTableOptions table_options_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_reader.cc
new file mode 100644
index 0000000000..d647619620
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_reader.cc
@@ -0,0 +1,412 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/cuckoo/cuckoo_table_reader.h"
+
+#include <algorithm>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "memory/arena.h"
+#include "options/cf_options.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/table.h"
+#include "table/cuckoo/cuckoo_table_factory.h"
+#include "table/get_context.h"
+#include "table/internal_iterator.h"
+#include "table/meta_blocks.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+const uint64_t CACHE_LINE_MASK = ~((uint64_t)CACHE_LINE_SIZE - 1);
+const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
+}  // namespace
+
+extern const uint64_t kCuckooTableMagicNumber;
+
+CuckooTableReader::CuckooTableReader(
+    const ImmutableOptions& ioptions,
+    std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
+    const Comparator* comparator,
+    uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
+    : file_(std::move(file)),
+      is_last_level_(false),
+      identity_as_first_hash_(false),
+      use_module_hash_(false),
+      num_hash_func_(0),
+      unused_key_(""),
+      key_length_(0),
+      user_key_length_(0),
+      value_length_(0),
+      bucket_length_(0),
+      cuckoo_block_size_(0),
+      cuckoo_block_bytes_minus_one_(0),
+      table_size_(0),
+      ucomp_(comparator),
+      get_slice_hash_(get_slice_hash) {
+  if (!ioptions.allow_mmap_reads) {
+    status_ = Status::InvalidArgument("File is not mmaped");
+    return;
+  }
+  {
+    std::unique_ptr<TableProperties> props;
+    // TODO: plumb Env::IOActivity
+    const ReadOptions read_options;
+    status_ =
+        ReadTableProperties(file_.get(), file_size, kCuckooTableMagicNumber,
+                            ioptions, read_options, &props);
+    if (!status_.ok()) {
+      return;
+    }
+    table_props_ = std::move(props);
+  }
+  auto& user_props = table_props_->user_collected_properties;
+  auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashFunc);
+  if (hash_funs == user_props.end()) {
+    status_ = Status::Corruption("Number of hash functions not found");
+    return;
+  }
+  num_hash_func_ = *reinterpret_cast<const uint32_t*>(hash_funs->second.data());
+  auto unused_key = user_props.find(CuckooTablePropertyNames::kEmptyKey);
+  if (unused_key == user_props.end()) {
+    status_ = Status::Corruption("Empty bucket value not found");
+    return;
+  }
+  unused_key_ = unused_key->second;
+
+  key_length_ = static_cast<uint32_t>(table_props_->fixed_key_len);
+  auto user_key_len = user_props.find(CuckooTablePropertyNames::kUserKeyLength);
+  if (user_key_len == user_props.end()) {
+    status_ = Status::Corruption("User key length not found");
+    return;
+  }
+  user_key_length_ =
+      *reinterpret_cast<const uint32_t*>(user_key_len->second.data());
+
+  auto value_length = user_props.find(CuckooTablePropertyNames::kValueLength);
+  if (value_length == user_props.end()) {
+    status_ = Status::Corruption("Value length not found");
+    return;
+  }
+  value_length_ =
+      *reinterpret_cast<const uint32_t*>(value_length->second.data());
+  bucket_length_ = key_length_ + value_length_;
+
+  auto hash_table_size =
+      user_props.find(CuckooTablePropertyNames::kHashTableSize);
+  if (hash_table_size == user_props.end()) {
+    status_ = Status::Corruption("Hash table size not found");
+    return;
+  }
+  table_size_ =
+      *reinterpret_cast<const uint64_t*>(hash_table_size->second.data());
+
+  auto is_last_level = user_props.find(CuckooTablePropertyNames::kIsLastLevel);
+  if (is_last_level == user_props.end()) {
+    status_ = Status::Corruption("Is last level not found");
+    return;
+  }
+  is_last_level_ = *reinterpret_cast<const bool*>(is_last_level->second.data());
+
+  auto identity_as_first_hash =
+      user_props.find(CuckooTablePropertyNames::kIdentityAsFirstHash);
+  if (identity_as_first_hash == user_props.end()) {
+    status_ = Status::Corruption("identity as first hash not found");
+    return;
+  }
+  identity_as_first_hash_ =
+      *reinterpret_cast<const bool*>(identity_as_first_hash->second.data());
+
+  auto use_module_hash =
+      user_props.find(CuckooTablePropertyNames::kUseModuleHash);
+  if (use_module_hash == user_props.end()) {
+    status_ = Status::Corruption("hash type is not found");
+    return;
+  }
+  use_module_hash_ =
+      *reinterpret_cast<const bool*>(use_module_hash->second.data());
+  auto cuckoo_block_size =
+      user_props.find(CuckooTablePropertyNames::kCuckooBlockSize);
+  if (cuckoo_block_size == user_props.end()) {
+    status_ = Status::Corruption("Cuckoo block size not found");
+    return;
+  }
+  cuckoo_block_size_ =
+      *reinterpret_cast<const uint32_t*>(cuckoo_block_size->second.data());
+  cuckoo_block_bytes_minus_one_ = cuckoo_block_size_ * bucket_length_ - 1;
+  // TODO: rate limit reads of whole cuckoo tables.
+  status_ =
+      file_->Read(IOOptions(), 0, static_cast<size_t>(file_size), &file_data_,
+                  nullptr, nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
+}
+
+Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/,
+                              const Slice& key, GetContext* get_context,
+                              const SliceTransform* /* prefix_extractor */,
+                              bool /*skip_filters*/) {
+  assert(key.size() == key_length_ + (is_last_level_ ? 8 : 0));
+  Slice user_key = ExtractUserKey(key);
+  for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
+    uint64_t offset =
+        bucket_length_ * CuckooHash(user_key, hash_cnt, use_module_hash_,
+                                    table_size_, identity_as_first_hash_,
+                                    get_slice_hash_);
+    const char* bucket = &file_data_.data()[offset];
+    for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
+         ++block_idx, bucket += bucket_length_) {
+      if (ucomp_->Equal(Slice(unused_key_.data(), user_key.size()),
+                        Slice(bucket, user_key.size()))) {
+        return Status::OK();
+      }
+      // Here, we compare only the user key part as we support only one entry
+      // per user key and we don't support snapshot.
+      if (ucomp_->Equal(user_key, Slice(bucket, user_key.size()))) {
+        Slice value(bucket + key_length_, value_length_);
+        if (is_last_level_) {
+          // Sequence number is not stored at the last level, so we will use
+          // kMaxSequenceNumber since it is unknown.  This could cause some
+          // transactions to fail to lock a key due to known sequence number.
+          // However, it is expected for anyone to use a CuckooTable in a
+          // TransactionDB.
+          get_context->SaveValue(value, kMaxSequenceNumber);
+        } else {
+          Slice full_key(bucket, key_length_);
+          ParsedInternalKey found_ikey;
+          Status s = ParseInternalKey(full_key, &found_ikey,
+                                      false /* log_err_key */);  // TODO
+          if (!s.ok()) return s;
+          bool dont_care __attribute__((__unused__));
+          get_context->SaveValue(found_ikey, value, &dont_care);
+        }
+        // We don't support merge operations. So, we return here.
+        return Status::OK();
+      }
+    }
+  }
+  return Status::OK();
+}
+
+void CuckooTableReader::Prepare(const Slice& key) {
+  // Prefetch the first Cuckoo Block.
+  Slice user_key = ExtractUserKey(key);
+  uint64_t addr =
+      reinterpret_cast<uint64_t>(file_data_.data()) +
+      bucket_length_ * CuckooHash(user_key, 0, use_module_hash_, table_size_,
+                                  identity_as_first_hash_, nullptr);
+  uint64_t end_addr = addr + cuckoo_block_bytes_minus_one_;
+  for (addr &= CACHE_LINE_MASK; addr < end_addr; addr += CACHE_LINE_SIZE) {
+    PREFETCH(reinterpret_cast<const char*>(addr), 0, 3);
+  }
+}
+
+class CuckooTableIterator : public InternalIterator {
+ public:
+  explicit CuckooTableIterator(CuckooTableReader* reader);
+  // No copying allowed
+  CuckooTableIterator(const CuckooTableIterator&) = delete;
+  void operator=(const Iterator&) = delete;
+  ~CuckooTableIterator() override {}
+  bool Valid() const override;
+  void SeekToFirst() override;
+  void SeekToLast() override;
+  void Seek(const Slice& target) override;
+  void SeekForPrev(const Slice& target) override;
+  void Next() override;
+  void Prev() override;
+  Slice key() const override;
+  Slice value() const override;
+  Status status() const override { return Status::OK(); }
+  void InitIfNeeded();
+
+ private:
+  struct BucketComparator {
+    BucketComparator(const Slice& file_data, const Comparator* ucomp,
+                     uint32_t bucket_len, uint32_t user_key_len,
+                     const Slice& target = Slice())
+        : file_data_(file_data),
+          ucomp_(ucomp),
+          bucket_len_(bucket_len),
+          user_key_len_(user_key_len),
+          target_(target) {}
+    bool operator()(const uint32_t first, const uint32_t second) const {
+      const char* first_bucket = (first == kInvalidIndex)
+                                     ? target_.data()
+                                     : &file_data_.data()[first * bucket_len_];
+      const char* second_bucket =
+          (second == kInvalidIndex) ? target_.data()
+                                    : &file_data_.data()[second * bucket_len_];
+      return ucomp_->Compare(Slice(first_bucket, user_key_len_),
+                             Slice(second_bucket, user_key_len_)) < 0;
+    }
+
+   private:
+    const Slice file_data_;
+    const Comparator* ucomp_;
+    const uint32_t bucket_len_;
+    const uint32_t user_key_len_;
+    const Slice target_;
+  };
+
+  const BucketComparator bucket_comparator_;
+  void PrepareKVAtCurrIdx();
+  CuckooTableReader* reader_;
+  bool initialized_;
+  // Contains a map of keys to bucket_id sorted in key order.
+  std::vector<uint32_t> sorted_bucket_ids_;
+  // We assume that the number of items can be stored in uint32 (4 Billion).
+  uint32_t curr_key_idx_;
+  Slice curr_value_;
+  IterKey curr_key_;
+};
+
+CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader)
+    : bucket_comparator_(reader->file_data_, reader->ucomp_,
+                         reader->bucket_length_, reader->user_key_length_),
+      reader_(reader),
+      initialized_(false),
+      curr_key_idx_(kInvalidIndex) {
+  sorted_bucket_ids_.clear();
+  curr_value_.clear();
+  curr_key_.Clear();
+}
+
+void CuckooTableIterator::InitIfNeeded() {
+  if (initialized_) {
+    return;
+  }
+  sorted_bucket_ids_.reserve(
+      static_cast<size_t>(reader_->GetTableProperties()->num_entries));
+  uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1;
+  assert(num_buckets < kInvalidIndex);
+  const char* bucket = reader_->file_data_.data();
+  for (uint32_t bucket_id = 0; bucket_id < num_buckets; ++bucket_id) {
+    if (Slice(bucket, reader_->key_length_) != Slice(reader_->unused_key_)) {
+      sorted_bucket_ids_.push_back(bucket_id);
+    }
+    bucket += reader_->bucket_length_;
+  }
+  assert(sorted_bucket_ids_.size() ==
+         reader_->GetTableProperties()->num_entries);
+  std::sort(sorted_bucket_ids_.begin(), sorted_bucket_ids_.end(),
+            bucket_comparator_);
+  curr_key_idx_ = kInvalidIndex;
+  initialized_ = true;
+}
+
+void CuckooTableIterator::SeekToFirst() {
+  InitIfNeeded();
+  curr_key_idx_ = 0;
+  PrepareKVAtCurrIdx();
+}
+
+void CuckooTableIterator::SeekToLast() {
+  InitIfNeeded();
+  curr_key_idx_ = static_cast<uint32_t>(sorted_bucket_ids_.size()) - 1;
+  PrepareKVAtCurrIdx();
+}
+
+void CuckooTableIterator::Seek(const Slice& target) {
+  InitIfNeeded();
+  const BucketComparator seek_comparator(
+      reader_->file_data_, reader_->ucomp_, reader_->bucket_length_,
+      reader_->user_key_length_, ExtractUserKey(target));
+  auto seek_it =
+      std::lower_bound(sorted_bucket_ids_.begin(), sorted_bucket_ids_.end(),
+                       kInvalidIndex, seek_comparator);
+  curr_key_idx_ =
+      static_cast<uint32_t>(std::distance(sorted_bucket_ids_.begin(), seek_it));
+  PrepareKVAtCurrIdx();
+}
+
+void CuckooTableIterator::SeekForPrev(const Slice& /*target*/) {
+  // Not supported
+  assert(false);
+}
+
+bool CuckooTableIterator::Valid() const {
+  return curr_key_idx_ < sorted_bucket_ids_.size();
+}
+
+void CuckooTableIterator::PrepareKVAtCurrIdx() {
+  if (!Valid()) {
+    curr_value_.clear();
+    curr_key_.Clear();
+    return;
+  }
+  uint32_t id = sorted_bucket_ids_[curr_key_idx_];
+  const char* offset =
+      reader_->file_data_.data() + id * reader_->bucket_length_;
+  if (reader_->is_last_level_) {
+    // Always return internal key.
+    curr_key_.SetInternalKey(Slice(offset, reader_->user_key_length_), 0,
+                             kTypeValue);
+  } else {
+    curr_key_.SetInternalKey(Slice(offset, reader_->key_length_));
+  }
+  curr_value_ = Slice(offset + reader_->key_length_, reader_->value_length_);
+}
+
+void CuckooTableIterator::Next() {
+  if (!Valid()) {
+    curr_value_.clear();
+    curr_key_.Clear();
+    return;
+  }
+  ++curr_key_idx_;
+  PrepareKVAtCurrIdx();
+}
+
+void CuckooTableIterator::Prev() {
+  if (curr_key_idx_ == 0) {
+    curr_key_idx_ = static_cast<uint32_t>(sorted_bucket_ids_.size());
+  }
+  if (!Valid()) {
+    curr_value_.clear();
+    curr_key_.Clear();
+    return;
+  }
+  --curr_key_idx_;
+  PrepareKVAtCurrIdx();
+}
+
+Slice CuckooTableIterator::key() const {
+  assert(Valid());
+  return curr_key_.GetInternalKey();
+}
+
+Slice CuckooTableIterator::value() const {
+  assert(Valid());
+  return curr_value_;
+}
+
+InternalIterator* CuckooTableReader::NewIterator(
+    const ReadOptions& /*read_options*/,
+    const SliceTransform* /* prefix_extractor */, Arena* arena,
+    bool /*skip_filters*/, TableReaderCaller /*caller*/,
+    size_t /*compaction_readahead_size*/, bool /* allow_unprepared_value */) {
+  if (!status().ok()) {
+    return NewErrorInternalIterator<Slice>(
+        Status::Corruption("CuckooTableReader status is not okay."), arena);
+  }
+  CuckooTableIterator* iter;
+  if (arena == nullptr) {
+    iter = new CuckooTableIterator(this);
+  } else {
+    auto iter_mem = arena->AllocateAligned(sizeof(CuckooTableIterator));
+    iter = new (iter_mem) CuckooTableIterator(this);
+  }
+  return iter;
+}
+
+size_t CuckooTableReader::ApproximateMemoryUsage() const { return 0; }
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_reader.h
new file mode 100644
index 0000000000..d17011ed83
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/cuckoo/cuckoo_table_reader.h
@@ -0,0 +1,100 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "file/random_access_file_reader.h"
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "table/table_reader.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Arena;
+class TableReader;
+struct ImmutableOptions;
+
+class CuckooTableReader : public TableReader {
+ public:
+  CuckooTableReader(const ImmutableOptions& ioptions,
+                    std::unique_ptr<RandomAccessFileReader>&& file,
+                    uint64_t file_size, const Comparator* user_comparator,
+                    uint64_t (*get_slice_hash)(const Slice&, uint32_t,
+                                               uint64_t));
+  ~CuckooTableReader() {}
+
+  std::shared_ptr<const TableProperties> GetTableProperties() const override {
+    return table_props_;
+  }
+
+  Status status() const { return status_; }
+
+  Status Get(const ReadOptions& readOptions, const Slice& key,
+             GetContext* get_context, const SliceTransform* prefix_extractor,
+             bool skip_filters = false) override;
+
+  // Returns a new iterator over table contents
+  // compaction_readahead_size: its value will only be used if for_compaction =
+  // true
+  InternalIterator* NewIterator(const ReadOptions&,
+                                const SliceTransform* prefix_extractor,
+                                Arena* arena, bool skip_filters,
+                                TableReaderCaller caller,
+                                size_t compaction_readahead_size = 0,
+                                bool allow_unprepared_value = false) override;
+  void Prepare(const Slice& target) override;
+
+  // Report an approximation of how much memory has been used.
+  size_t ApproximateMemoryUsage() const override;
+
+  // Following methods are not implemented for Cuckoo Table Reader
+  uint64_t ApproximateOffsetOf(const ReadOptions& /*read_options*/,
+                               const Slice& /*key*/,
+                               TableReaderCaller /*caller*/) override {
+    return 0;
+  }
+
+  uint64_t ApproximateSize(const ReadOptions& /* read_options */,
+                           const Slice& /*start*/, const Slice& /*end*/,
+                           TableReaderCaller /*caller*/) override {
+    return 0;
+  }
+
+  void SetupForCompaction() override {}
+  // End of methods not implemented.
+
+ private:
+  friend class CuckooTableIterator;
+  void LoadAllKeys(std::vector<std::pair<Slice, uint32_t>>* key_to_bucket_id);
+  std::unique_ptr<RandomAccessFileReader> file_;
+  Slice file_data_;
+  bool is_last_level_;
+  bool identity_as_first_hash_;
+  bool use_module_hash_;
+  std::shared_ptr<const TableProperties> table_props_;
+  Status status_;
+  uint32_t num_hash_func_;
+  std::string unused_key_;
+  uint32_t key_length_;
+  uint32_t user_key_length_;
+  uint32_t value_length_;
+  uint32_t bucket_length_;
+  uint32_t cuckoo_block_size_;
+  uint32_t cuckoo_block_bytes_minus_one_;
+  uint64_t table_size_;
+  const Comparator* ucomp_;
+  uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index,
+                              uint64_t max_num_buckets);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/format.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/format.cc
new file mode 100644
index 0000000000..dc077bd452
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/format.cc
@@ -0,0 +1,573 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/format.h"
+
+#include <cinttypes>
+#include <string>
+
+#include "block_fetcher.h"
+#include "file/random_access_file_reader.h"
+#include "memory/memory_allocator_impl.h"
+#include "monitoring/perf_context_imp.h"
+#include "monitoring/statistics_impl.h"
+#include "options/options_helper.h"
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
+#include "table/block_based/block.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "table/persistent_cache_helper.h"
+#include "util/cast_util.h"
+#include "util/coding.h"
+#include "util/compression.h"
+#include "util/crc32c.h"
+#include "util/hash.h"
+#include "util/stop_watch.h"
+#include "util/string_util.h"
+#include "util/xxhash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+extern const uint64_t kLegacyBlockBasedTableMagicNumber;
+extern const uint64_t kBlockBasedTableMagicNumber;
+
+extern const uint64_t kLegacyPlainTableMagicNumber;
+extern const uint64_t kPlainTableMagicNumber;
+const char* kHostnameForDbHostId = "__hostname__";
+
+bool ShouldReportDetailedTime(Env* env, Statistics* stats) {
+  return env != nullptr && stats != nullptr &&
+         stats->get_stats_level() > kExceptDetailedTimers;
+}
+
+void BlockHandle::EncodeTo(std::string* dst) const {
+  // Sanity check that all fields have been set
+  assert(offset_ != ~uint64_t{0});
+  assert(size_ != ~uint64_t{0});
+  PutVarint64Varint64(dst, offset_, size_);
+}
+
+char* BlockHandle::EncodeTo(char* dst) const {
+  // Sanity check that all fields have been set
+  assert(offset_ != ~uint64_t{0});
+  assert(size_ != ~uint64_t{0});
+  char* cur = EncodeVarint64(dst, offset_);
+  cur = EncodeVarint64(cur, size_);
+  return cur;
+}
+
+Status BlockHandle::DecodeFrom(Slice* input) {
+  if (GetVarint64(input, &offset_) && GetVarint64(input, &size_)) {
+    return Status::OK();
+  } else {
+    // reset in case failure after partially decoding
+    offset_ = 0;
+    size_ = 0;
+    return Status::Corruption("bad block handle");
+  }
+}
+
+Status BlockHandle::DecodeSizeFrom(uint64_t _offset, Slice* input) {
+  if (GetVarint64(input, &size_)) {
+    offset_ = _offset;
+    return Status::OK();
+  } else {
+    // reset in case failure after partially decoding
+    offset_ = 0;
+    size_ = 0;
+    return Status::Corruption("bad block handle");
+  }
+}
+
+// Return a string that contains the copy of handle.
+std::string BlockHandle::ToString(bool hex) const {
+  std::string handle_str;
+  EncodeTo(&handle_str);
+  if (hex) {
+    return Slice(handle_str).ToString(true);
+  } else {
+    return handle_str;
+  }
+}
+
+const BlockHandle BlockHandle::kNullBlockHandle(0, 0);
+
+void IndexValue::EncodeTo(std::string* dst, bool have_first_key,
+                          const BlockHandle* previous_handle) const {
+  if (previous_handle) {
+    // WART: this is specific to Block-based table
+    assert(handle.offset() == previous_handle->offset() +
+                                  previous_handle->size() +
+                                  BlockBasedTable::kBlockTrailerSize);
+    PutVarsignedint64(dst, handle.size() - previous_handle->size());
+  } else {
+    handle.EncodeTo(dst);
+  }
+  assert(dst->size() != 0);
+
+  if (have_first_key) {
+    PutLengthPrefixedSlice(dst, first_internal_key);
+  }
+}
+
+Status IndexValue::DecodeFrom(Slice* input, bool have_first_key,
+                              const BlockHandle* previous_handle) {
+  if (previous_handle) {
+    int64_t delta;
+    if (!GetVarsignedint64(input, &delta)) {
+      return Status::Corruption("bad delta-encoded index value");
+    }
+    // WART: this is specific to Block-based table
+    handle = BlockHandle(previous_handle->offset() + previous_handle->size() +
+                             BlockBasedTable::kBlockTrailerSize,
+                         previous_handle->size() + delta);
+  } else {
+    Status s = handle.DecodeFrom(input);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  if (!have_first_key) {
+    first_internal_key = Slice();
+  } else if (!GetLengthPrefixedSlice(input, &first_internal_key)) {
+    return Status::Corruption("bad first key in block info");
+  }
+
+  return Status::OK();
+}
+
+std::string IndexValue::ToString(bool hex, bool have_first_key) const {
+  std::string s;
+  EncodeTo(&s, have_first_key, nullptr);
+  if (hex) {
+    return Slice(s).ToString(true);
+  } else {
+    return s;
+  }
+}
+
+namespace {
+inline bool IsLegacyFooterFormat(uint64_t magic_number) {
+  return magic_number == kLegacyBlockBasedTableMagicNumber ||
+         magic_number == kLegacyPlainTableMagicNumber;
+}
+inline uint64_t UpconvertLegacyFooterFormat(uint64_t magic_number) {
+  if (magic_number == kLegacyBlockBasedTableMagicNumber) {
+    return kBlockBasedTableMagicNumber;
+  }
+  if (magic_number == kLegacyPlainTableMagicNumber) {
+    return kPlainTableMagicNumber;
+  }
+  assert(false);
+  return magic_number;
+}
+inline uint64_t DownconvertToLegacyFooterFormat(uint64_t magic_number) {
+  if (magic_number == kBlockBasedTableMagicNumber) {
+    return kLegacyBlockBasedTableMagicNumber;
+  }
+  if (magic_number == kPlainTableMagicNumber) {
+    return kLegacyPlainTableMagicNumber;
+  }
+  assert(false);
+  return magic_number;
+}
+inline uint8_t BlockTrailerSizeForMagicNumber(uint64_t magic_number) {
+  if (magic_number == kBlockBasedTableMagicNumber ||
+      magic_number == kLegacyBlockBasedTableMagicNumber) {
+    return static_cast<uint8_t>(BlockBasedTable::kBlockTrailerSize);
+  } else {
+    return 0;
+  }
+}
+
+// Footer format, in three parts:
+// * Part1
+//   -> format_version == 0 (inferred from legacy magic number)
+//      <empty> (0 bytes)
+//   -> format_version >= 1
+//      checksum type (char, 1 byte)
+// * Part2
+//      metaindex handle (varint64 offset, varint64 size)
+//      index handle     (varint64 offset, varint64 size)
+//      <zero padding> for part2 size = 2 * BlockHandle::kMaxEncodedLength = 40
+// * Part3
+//   -> format_version == 0 (inferred from legacy magic number)
+//      legacy magic number (8 bytes)
+//   -> format_version >= 1 (inferred from NOT legacy magic number)
+//      format_version (uint32LE, 4 bytes), also called "footer version"
+//      newer magic number (8 bytes)
+
+constexpr size_t kFooterPart2Size = 2 * BlockHandle::kMaxEncodedLength;
+}  // namespace
+
+void FooterBuilder::Build(uint64_t magic_number, uint32_t format_version,
+                          uint64_t footer_offset, ChecksumType checksum_type,
+                          const BlockHandle& metaindex_handle,
+                          const BlockHandle& index_handle) {
+  (void)footer_offset;  // Future use
+
+  assert(magic_number != Footer::kNullTableMagicNumber);
+  assert(IsSupportedFormatVersion(format_version));
+
+  char* part2;
+  char* part3;
+  if (format_version > 0) {
+    slice_ = Slice(data_.data(), Footer::kNewVersionsEncodedLength);
+    // Generate parts 1 and 3
+    char* cur = data_.data();
+    // Part 1
+    *(cur++) = checksum_type;
+    // Part 2
+    part2 = cur;
+    // Skip over part 2 for now
+    cur += kFooterPart2Size;
+    // Part 3
+    part3 = cur;
+    EncodeFixed32(cur, format_version);
+    cur += 4;
+    EncodeFixed64(cur, magic_number);
+    assert(cur + 8 == slice_.data() + slice_.size());
+  } else {
+    slice_ = Slice(data_.data(), Footer::kVersion0EncodedLength);
+    // Legacy SST files use kCRC32c checksum but it's not stored in footer.
+    assert(checksum_type == kNoChecksum || checksum_type == kCRC32c);
+    // Generate part 3 (part 1 empty, skip part 2 for now)
+    part2 = data_.data();
+    part3 = part2 + kFooterPart2Size;
+    char* cur = part3;
+    // Use legacy magic numbers to indicate format_version=0, for
+    // compatibility. No other cases should use format_version=0.
+    EncodeFixed64(cur, DownconvertToLegacyFooterFormat(magic_number));
+    assert(cur + 8 == slice_.data() + slice_.size());
+  }
+
+  {
+    char* cur = part2;
+    cur = metaindex_handle.EncodeTo(cur);
+    cur = index_handle.EncodeTo(cur);
+    // Zero pad remainder
+    std::fill(cur, part3, char{0});
+  }
+}
+
+Status Footer::DecodeFrom(Slice input, uint64_t input_offset,
+                          uint64_t enforce_table_magic_number) {
+  (void)input_offset;  // Future use
+
+  // Only decode to unused Footer
+  assert(table_magic_number_ == kNullTableMagicNumber);
+  assert(input != nullptr);
+  assert(input.size() >= kMinEncodedLength);
+
+  const char* magic_ptr = input.data() + input.size() - kMagicNumberLengthByte;
+  uint64_t magic = DecodeFixed64(magic_ptr);
+
+  // We check for legacy formats here and silently upconvert them
+  bool legacy = IsLegacyFooterFormat(magic);
+  if (legacy) {
+    magic = UpconvertLegacyFooterFormat(magic);
+  }
+  if (enforce_table_magic_number != 0 && enforce_table_magic_number != magic) {
+    return Status::Corruption("Bad table magic number: expected " +
+                              std::to_string(enforce_table_magic_number) +
+                              ", found " + std::to_string(magic));
+  }
+  table_magic_number_ = magic;
+  block_trailer_size_ = BlockTrailerSizeForMagicNumber(magic);
+
+  // Parse Part3
+  if (legacy) {
+    // The size is already asserted to be at least kMinEncodedLength
+    // at the beginning of the function
+    input.remove_prefix(input.size() - kVersion0EncodedLength);
+    format_version_ = 0 /* legacy */;
+    checksum_type_ = kCRC32c;
+  } else {
+    const char* part3_ptr = magic_ptr - 4;
+    format_version_ = DecodeFixed32(part3_ptr);
+    if (!IsSupportedFormatVersion(format_version_)) {
+      return Status::Corruption("Corrupt or unsupported format_version: " +
+                                std::to_string(format_version_));
+    }
+    // All known format versions >= 1 occupy exactly this many bytes.
+    if (input.size() < kNewVersionsEncodedLength) {
+      return Status::Corruption("Input is too short to be an SST file");
+    }
+    uint64_t adjustment = input.size() - kNewVersionsEncodedLength;
+    input.remove_prefix(adjustment);
+
+    // Parse Part1
+    char chksum = input.data()[0];
+    checksum_type_ = lossless_cast<ChecksumType>(chksum);
+    if (!IsSupportedChecksumType(checksum_type())) {
+      return Status::Corruption("Corrupt or unsupported checksum type: " +
+                                std::to_string(lossless_cast<uint8_t>(chksum)));
+    }
+    // Consume checksum type field
+    input.remove_prefix(1);
+  }
+
+  // Parse Part2
+  Status result = metaindex_handle_.DecodeFrom(&input);
+  if (result.ok()) {
+    result = index_handle_.DecodeFrom(&input);
+  }
+  return result;
+  // Padding in part2 is ignored
+}
+
+std::string Footer::ToString() const {
+  std::string result;
+  result.reserve(1024);
+
+  bool legacy = IsLegacyFooterFormat(table_magic_number_);
+  if (legacy) {
+    result.append("metaindex handle: " + metaindex_handle_.ToString() + "\n  ");
+    result.append("index handle: " + index_handle_.ToString() + "\n  ");
+    result.append("table_magic_number: " + std::to_string(table_magic_number_) +
+                  "\n  ");
+  } else {
+    result.append("metaindex handle: " + metaindex_handle_.ToString() + "\n  ");
+    result.append("index handle: " + index_handle_.ToString() + "\n  ");
+    result.append("table_magic_number: " + std::to_string(table_magic_number_) +
+                  "\n  ");
+    result.append("format version: " + std::to_string(format_version_) +
+                  "\n  ");
+  }
+  return result;
+}
+
+Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file,
+                          FileSystem& fs, FilePrefetchBuffer* prefetch_buffer,
+                          uint64_t file_size, Footer* footer,
+                          uint64_t enforce_table_magic_number) {
+  if (file_size < Footer::kMinEncodedLength) {
+    return Status::Corruption("file is too short (" +
+                              std::to_string(file_size) +
+                              " bytes) to be an "
+                              "sstable: " +
+                              file->file_name());
+  }
+
+  std::string footer_buf;
+  AlignedBuf internal_buf;
+  Slice footer_input;
+  uint64_t read_offset = (file_size > Footer::kMaxEncodedLength)
+                             ? file_size - Footer::kMaxEncodedLength
+                             : 0;
+  Status s;
+  // TODO: Need to pass appropriate deadline to TryReadFromCache(). Right now,
+  // there is no readahead for point lookups, so TryReadFromCache will fail if
+  // the required data is not in the prefetch buffer. Once deadline is enabled
+  // for iterator, TryReadFromCache might do a readahead. Revisit to see if we
+  // need to pass a timeout at that point
+  // TODO: rate limit footer reads.
+  if (prefetch_buffer == nullptr ||
+      !prefetch_buffer->TryReadFromCache(
+          opts, file, read_offset, Footer::kMaxEncodedLength, &footer_input,
+          nullptr, opts.rate_limiter_priority)) {
+    if (file->use_direct_io()) {
+      s = file->Read(opts, read_offset, Footer::kMaxEncodedLength,
+                     &footer_input, nullptr, &internal_buf,
+                     opts.rate_limiter_priority);
+    } else {
+      footer_buf.reserve(Footer::kMaxEncodedLength);
+      s = file->Read(opts, read_offset, Footer::kMaxEncodedLength,
+                     &footer_input, &footer_buf[0], nullptr,
+                     opts.rate_limiter_priority);
+    }
+    if (!s.ok()) return s;
+  }
+
+  // Check that we actually read the whole footer from the file. It may be
+  // that size isn't correct.
+  if (footer_input.size() < Footer::kMinEncodedLength) {
+    uint64_t size_on_disk = 0;
+    if (fs.GetFileSize(file->file_name(), IOOptions(), &size_on_disk, nullptr)
+            .ok()) {
+      // Similar to CheckConsistency message, but not completely sure the
+      // expected size always came from manifest.
+      return Status::Corruption("Sst file size mismatch: " + file->file_name() +
+                                ". Expected " + std::to_string(file_size) +
+                                ", actual size " +
+                                std::to_string(size_on_disk) + "\n");
+    } else {
+      return Status::Corruption(
+          "Missing SST footer data in file " + file->file_name() +
+          " File too short? Expected size: " + std::to_string(file_size));
+    }
+  }
+
+  s = footer->DecodeFrom(footer_input, read_offset, enforce_table_magic_number);
+  if (!s.ok()) {
+    s = Status::CopyAppendMessage(s, " in ", file->file_name());
+    return s;
+  }
+  return Status::OK();
+}
+
+namespace {
+// Custom handling for the last byte of a block, to avoid invoking streaming
+// API to get an effective block checksum. This function is its own inverse
+// because it uses xor.
+inline uint32_t ModifyChecksumForLastByte(uint32_t checksum, char last_byte) {
+  // This strategy bears some resemblance to extending a CRC checksum by one
+  // more byte, except we don't need to re-mix the input checksum as long as
+  // we do this step only once (per checksum).
+  const uint32_t kRandomPrime = 0x6b9083d9;
+  return checksum ^ lossless_cast<uint8_t>(last_byte) * kRandomPrime;
+}
+}  // namespace
+
+uint32_t ComputeBuiltinChecksum(ChecksumType type, const char* data,
+                                size_t data_size) {
+  switch (type) {
+    case kCRC32c:
+      return crc32c::Mask(crc32c::Value(data, data_size));
+    case kxxHash:
+      return XXH32(data, data_size, /*seed*/ 0);
+    case kxxHash64:
+      return Lower32of64(XXH64(data, data_size, /*seed*/ 0));
+    case kXXH3: {
+      if (data_size == 0) {
+        // Special case because of special handling for last byte, not
+        // present in this case. Can be any value different from other
+        // small input size checksums.
+        return 0;
+      } else {
+        // See corresponding code in ComputeBuiltinChecksumWithLastByte
+        uint32_t v = Lower32of64(XXH3_64bits(data, data_size - 1));
+        return ModifyChecksumForLastByte(v, data[data_size - 1]);
+      }
+    }
+    default:  // including kNoChecksum
+      return 0;
+  }
+}
+
+uint32_t ComputeBuiltinChecksumWithLastByte(ChecksumType type, const char* data,
+                                            size_t data_size, char last_byte) {
+  switch (type) {
+    case kCRC32c: {
+      uint32_t crc = crc32c::Value(data, data_size);
+      // Extend to cover last byte (compression type)
+      crc = crc32c::Extend(crc, &last_byte, 1);
+      return crc32c::Mask(crc);
+    }
+    case kxxHash: {
+      XXH32_state_t* const state = XXH32_createState();
+      XXH32_reset(state, 0);
+      XXH32_update(state, data, data_size);
+      // Extend to cover last byte (compression type)
+      XXH32_update(state, &last_byte, 1);
+      uint32_t v = XXH32_digest(state);
+      XXH32_freeState(state);
+      return v;
+    }
+    case kxxHash64: {
+      XXH64_state_t* const state = XXH64_createState();
+      XXH64_reset(state, 0);
+      XXH64_update(state, data, data_size);
+      // Extend to cover last byte (compression type)
+      XXH64_update(state, &last_byte, 1);
+      uint32_t v = Lower32of64(XXH64_digest(state));
+      XXH64_freeState(state);
+      return v;
+    }
+    case kXXH3: {
+      // XXH3 is a complicated hash function that is extremely fast on
+      // contiguous input, but that makes its streaming support rather
+      // complex. It is worth custom handling of the last byte (`type`)
+      // in order to avoid allocating a large state object and bringing
+      // that code complexity into CPU working set.
+      uint32_t v = Lower32of64(XXH3_64bits(data, data_size));
+      return ModifyChecksumForLastByte(v, last_byte);
+    }
+    default:  // including kNoChecksum
+      return 0;
+  }
+}
+
+Status UncompressBlockData(const UncompressionInfo& uncompression_info,
+                           const char* data, size_t size,
+                           BlockContents* out_contents, uint32_t format_version,
+                           const ImmutableOptions& ioptions,
+                           MemoryAllocator* allocator) {
+  Status ret = Status::OK();
+
+  assert(uncompression_info.type() != kNoCompression &&
+         "Invalid compression type");
+
+  StopWatchNano timer(ioptions.clock,
+                      ShouldReportDetailedTime(ioptions.env, ioptions.stats));
+  size_t uncompressed_size = 0;
+  CacheAllocationPtr ubuf =
+      UncompressData(uncompression_info, data, size, &uncompressed_size,
+                     GetCompressFormatForVersion(format_version), allocator);
+  if (!ubuf) {
+    if (!CompressionTypeSupported(uncompression_info.type())) {
+      return Status::NotSupported(
+          "Unsupported compression method for this build",
+          CompressionTypeToString(uncompression_info.type()));
+    } else {
+      return Status::Corruption(
+          "Corrupted compressed block contents",
+          CompressionTypeToString(uncompression_info.type()));
+    }
+  }
+
+  *out_contents = BlockContents(std::move(ubuf), uncompressed_size);
+
+  if (ShouldReportDetailedTime(ioptions.env, ioptions.stats)) {
+    RecordTimeToHistogram(ioptions.stats, DECOMPRESSION_TIMES_NANOS,
+                          timer.ElapsedNanos());
+  }
+  RecordTick(ioptions.stats, BYTES_DECOMPRESSED_FROM, size);
+  RecordTick(ioptions.stats, BYTES_DECOMPRESSED_TO, out_contents->data.size());
+  RecordTick(ioptions.stats, NUMBER_BLOCK_DECOMPRESSED);
+
+  TEST_SYNC_POINT_CALLBACK("UncompressBlockData:TamperWithReturnValue",
+                           static_cast<void*>(&ret));
+  TEST_SYNC_POINT_CALLBACK(
+      "UncompressBlockData:"
+      "TamperWithDecompressionOutput",
+      static_cast<void*>(out_contents));
+
+  return ret;
+}
+
+Status UncompressSerializedBlock(const UncompressionInfo& uncompression_info,
+                                 const char* data, size_t size,
+                                 BlockContents* out_contents,
+                                 uint32_t format_version,
+                                 const ImmutableOptions& ioptions,
+                                 MemoryAllocator* allocator) {
+  assert(data[size] != kNoCompression);
+  assert(data[size] == static_cast<char>(uncompression_info.type()));
+  return UncompressBlockData(uncompression_info, data, size, out_contents,
+                             format_version, ioptions, allocator);
+}
+
+// Replace the contents of db_host_id with the actual hostname, if db_host_id
+// matches the keyword kHostnameForDbHostId
+Status ReifyDbHostIdProperty(Env* env, std::string* db_host_id) {
+  assert(db_host_id);
+  if (*db_host_id == kHostnameForDbHostId) {
+    Status s = env->GetHostNameString(db_host_id);
+    if (!s.ok()) {
+      db_host_id->clear();
+    }
+    return s;
+  }
+
+  return Status::OK();
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/format.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/format.h
new file mode 100644
index 0000000000..c375165bf6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/format.h
@@ -0,0 +1,378 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <array>
+#include <cstdint>
+#include <string>
+
+#include "file/file_prefetch_buffer.h"
+#include "file/random_access_file_reader.h"
+#include "memory/memory_allocator_impl.h"
+#include "options/cf_options.h"
+#include "port/malloc.h"
+#include "port/port.h"  // noexcept
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "rocksdb/table.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class RandomAccessFile;
+struct ReadOptions;
+
+bool ShouldReportDetailedTime(Env* env, Statistics* stats);
+
+// the length of the magic number in bytes.
+constexpr uint32_t kMagicNumberLengthByte = 8;
+
+// BlockHandle is a pointer to the extent of a file that stores a data
+// block or a meta block.
+class BlockHandle {
+ public:
+  // Creates a block handle with special values indicating "uninitialized,"
+  // distinct from the "null" block handle.
+  BlockHandle();
+  BlockHandle(uint64_t offset, uint64_t size);
+
+  // The offset of the block in the file.
+  uint64_t offset() const { return offset_; }
+  void set_offset(uint64_t _offset) { offset_ = _offset; }
+
+  // The size of the stored block
+  uint64_t size() const { return size_; }
+  void set_size(uint64_t _size) { size_ = _size; }
+
+  void EncodeTo(std::string* dst) const;
+  char* EncodeTo(char* dst) const;
+  Status DecodeFrom(Slice* input);
+  Status DecodeSizeFrom(uint64_t offset, Slice* input);
+
+  // Return a string that contains the copy of handle.
+  std::string ToString(bool hex = true) const;
+
+  // if the block handle's offset and size are both "0", we will view it
+  // as a null block handle that points to no where.
+  bool IsNull() const { return offset_ == 0 && size_ == 0; }
+
+  static const BlockHandle& NullBlockHandle() { return kNullBlockHandle; }
+
+  // Maximum encoding length of a BlockHandle
+  static constexpr uint32_t kMaxEncodedLength = 2 * kMaxVarint64Length;
+
+  inline bool operator==(const BlockHandle& rhs) const {
+    return offset_ == rhs.offset_ && size_ == rhs.size_;
+  }
+  inline bool operator!=(const BlockHandle& rhs) const {
+    return !(*this == rhs);
+  }
+
+ private:
+  uint64_t offset_;
+  uint64_t size_;
+
+  static const BlockHandle kNullBlockHandle;
+};
+
+// Value in block-based table file index.
+//
+// The index entry for block n is: y -> h, [x],
+// where: y is some key between the last key of block n (inclusive) and the
+// first key of block n+1 (exclusive); h is BlockHandle pointing to block n;
+// x, if present, is the first key of block n (unshortened).
+// This struct represents the "h, [x]" part.
+struct IndexValue {
+  BlockHandle handle;
+  // Empty means unknown.
+  Slice first_internal_key;
+
+  IndexValue() = default;
+  IndexValue(BlockHandle _handle, Slice _first_internal_key)
+      : handle(_handle), first_internal_key(_first_internal_key) {}
+
+  // have_first_key indicates whether the `first_internal_key` is used.
+  // If previous_handle is not null, delta encoding is used;
+  // in this case, the two handles must point to consecutive blocks:
+  // handle.offset() ==
+  //     previous_handle->offset() + previous_handle->size() + kBlockTrailerSize
+  void EncodeTo(std::string* dst, bool have_first_key,
+                const BlockHandle* previous_handle) const;
+  Status DecodeFrom(Slice* input, bool have_first_key,
+                    const BlockHandle* previous_handle);
+
+  std::string ToString(bool hex, bool have_first_key) const;
+};
+
+inline uint32_t GetCompressFormatForVersion(uint32_t format_version) {
+  // As of format_version 2, we encode compressed block with
+  // compress_format_version == 2. Before that, the version is 1.
+  // DO NOT CHANGE THIS FUNCTION, it affects disk format
+  return format_version >= 2 ? 2 : 1;
+}
+
+constexpr uint32_t kLatestFormatVersion = 5;
+
+inline bool IsSupportedFormatVersion(uint32_t version) {
+  return version <= kLatestFormatVersion;
+}
+
+// Footer encapsulates the fixed information stored at the tail end of every
+// SST file. In general, it should only include things that cannot go
+// elsewhere under the metaindex block. For example, checksum_type is
+// required for verifying metaindex block checksum (when applicable), but
+// index block handle can easily go in metaindex block (possible future).
+// See also FooterBuilder below.
+class Footer {
+ public:
+  // Create empty. Populate using DecodeFrom.
+  Footer() {}
+
+  // Deserialize a footer (populate fields) from `input` and check for various
+  // corruptions. `input_offset` is the offset within the target file of
+  // `input` buffer (future use).
+  // If enforce_table_magic_number != 0, will return corruption if table magic
+  // number is not equal to enforce_table_magic_number.
+  Status DecodeFrom(Slice input, uint64_t input_offset,
+                    uint64_t enforce_table_magic_number = 0);
+
+  // Table magic number identifies file as RocksDB SST file and which kind of
+  // SST format is use.
+  uint64_t table_magic_number() const { return table_magic_number_; }
+
+  // A version (footer and more) within a kind of SST. (It would add more
+  // unnecessary complexity to separate footer versions and
+  // BBTO::format_version.)
+  uint32_t format_version() const { return format_version_; }
+
+  // Block handle for metaindex block.
+  const BlockHandle& metaindex_handle() const { return metaindex_handle_; }
+
+  // Block handle for (top-level) index block.
+  const BlockHandle& index_handle() const { return index_handle_; }
+
+  // Checksum type used in the file.
+  ChecksumType checksum_type() const {
+    return static_cast<ChecksumType>(checksum_type_);
+  }
+
+  // Block trailer size used by file with this footer (e.g. 5 for block-based
+  // table and 0 for plain table). This is inferred from magic number so
+  // not in the serialized form.
+  inline size_t GetBlockTrailerSize() const { return block_trailer_size_; }
+
+  // Convert this object to a human readable form
+  std::string ToString() const;
+
+  // Encoded lengths of Footers. Bytes for serialized Footer will always be
+  // >= kMinEncodedLength and <= kMaxEncodedLength.
+  //
+  // Footer version 0 (legacy) will always occupy exactly this many bytes.
+  // It consists of two block handles, padding, and a magic number.
+  static constexpr uint32_t kVersion0EncodedLength =
+      2 * BlockHandle::kMaxEncodedLength + kMagicNumberLengthByte;
+  static constexpr uint32_t kMinEncodedLength = kVersion0EncodedLength;
+
+  // Footer of versions 1 and higher will always occupy exactly this many
+  // bytes. It originally consisted of the checksum type, two block handles,
+  // padding (to maximum handle encoding size), a format version number, and a
+  // magic number.
+  static constexpr uint32_t kNewVersionsEncodedLength =
+      1 + 2 * BlockHandle::kMaxEncodedLength + 4 + kMagicNumberLengthByte;
+  static constexpr uint32_t kMaxEncodedLength = kNewVersionsEncodedLength;
+
+  static constexpr uint64_t kNullTableMagicNumber = 0;
+
+  static constexpr uint32_t kInvalidFormatVersion = 0xffffffffU;
+
+ private:
+  static constexpr int kInvalidChecksumType =
+      (1 << (sizeof(ChecksumType) * 8)) | kNoChecksum;
+
+  uint64_t table_magic_number_ = kNullTableMagicNumber;
+  uint32_t format_version_ = kInvalidFormatVersion;
+  BlockHandle metaindex_handle_;
+  BlockHandle index_handle_;
+  int checksum_type_ = kInvalidChecksumType;
+  uint8_t block_trailer_size_ = 0;
+};
+
+// Builder for Footer
+class FooterBuilder {
+ public:
+  // Run builder in inputs. This is a single step with lots of parameters for
+  // efficiency (based on perf testing).
+  // * table_magic_number identifies file as RocksDB SST file and which kind of
+  // SST format is use.
+  // * format_version is a version for the footer and can also apply to other
+  // aspects of the SST file (see BlockBasedTableOptions::format_version).
+  // NOTE: To save complexity in the caller, when format_version == 0 and
+  // there is a corresponding legacy magic number to the one specified, the
+  // legacy magic number will be written for forward compatibility.
+  // * footer_offset is the file offset where the footer will be written
+  // (for future use).
+  // * checksum_type is for formats using block checksums.
+  // * index_handle is optional for some kinds of SST files.
+  void Build(uint64_t table_magic_number, uint32_t format_version,
+             uint64_t footer_offset, ChecksumType checksum_type,
+             const BlockHandle& metaindex_handle,
+             const BlockHandle& index_handle = BlockHandle::NullBlockHandle());
+
+  // After Builder, get a Slice for the serialized Footer, backed by this
+  // FooterBuilder.
+  const Slice& GetSlice() const {
+    assert(slice_.size());
+    return slice_;
+  }
+
+ private:
+  Slice slice_;
+  std::array<char, Footer::kMaxEncodedLength> data_;
+};
+
+// Read the footer from file
+// If enforce_table_magic_number != 0, ReadFooterFromFile() will return
+// corruption if table_magic number is not equal to enforce_table_magic_number
+Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file,
+                          FileSystem& fs, FilePrefetchBuffer* prefetch_buffer,
+                          uint64_t file_size, Footer* footer,
+                          uint64_t enforce_table_magic_number = 0);
+
+// Computes a checksum using the given ChecksumType. Sometimes we need to
+// include one more input byte logically at the end but not part of the main
+// data buffer. If data_size >= 1, then
+// ComputeBuiltinChecksum(type, data, size)
+// ==
+// ComputeBuiltinChecksumWithLastByte(type, data, size - 1, data[size - 1])
+uint32_t ComputeBuiltinChecksum(ChecksumType type, const char* data,
+                                size_t size);
+uint32_t ComputeBuiltinChecksumWithLastByte(ChecksumType type, const char* data,
+                                            size_t size, char last_byte);
+
+// Represents the contents of a block read from an SST file. Depending on how
+// it's created, it may or may not own the actual block bytes. As an example,
+// BlockContents objects representing data read from mmapped files only point
+// into the mmapped region. Depending on context, it might be a serialized
+// (potentially compressed) block, including a trailer beyond `size`, or an
+// uncompressed block.
+//
+// Please try to use this terminology when dealing with blocks:
+// * "Serialized block" - bytes that go into storage. For block-based table
+// (usually the case) this includes the block trailer. Here the `size` does
+// not include the trailer, but other places in code might include the trailer
+// in the size.
+// * "Maybe compressed block" - like a serialized block, but without the
+// trailer (or no promise of including a trailer). Must be accompanied by a
+// CompressionType in some other variable or field.
+// * "Uncompressed block" - "payload" bytes that are either stored with no
+// compression, used as input to compression function, or result of
+// decompression function.
+// * "Parsed block" - an in-memory form of a block in block cache, as it is
+// used by the table reader. Different C++ types are used depending on the
+// block type (see block_cache.h). Only trivially parsable block types
+// use BlockContents as the parsed form.
+//
+struct BlockContents {
+  // Points to block payload (without trailer)
+  Slice data;
+  CacheAllocationPtr allocation;
+
+#ifndef NDEBUG
+  // Whether there is a known trailer after what is pointed to by `data`.
+  // See BlockBasedTable::GetCompressionType.
+  bool has_trailer = false;
+#endif  // NDEBUG
+
+  BlockContents() {}
+
+  // Does not take ownership of the underlying data bytes.
+  BlockContents(const Slice& _data) : data(_data) {}
+
+  // Takes ownership of the underlying data bytes.
+  BlockContents(CacheAllocationPtr&& _data, size_t _size)
+      : data(_data.get(), _size), allocation(std::move(_data)) {}
+
+  // Takes ownership of the underlying data bytes.
+  BlockContents(std::unique_ptr<char[]>&& _data, size_t _size)
+      : data(_data.get(), _size) {
+    allocation.reset(_data.release());
+  }
+
+  // Returns whether the object has ownership of the underlying data bytes.
+  bool own_bytes() const { return allocation.get() != nullptr; }
+
+  // The additional memory space taken by the block data.
+  size_t usable_size() const {
+    if (allocation.get() != nullptr) {
+      auto allocator = allocation.get_deleter().allocator;
+      if (allocator) {
+        return allocator->UsableSize(allocation.get(), data.size());
+      }
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+      return malloc_usable_size(allocation.get());
+#else
+      return data.size();
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+    } else {
+      return 0;  // no extra memory is occupied by the data
+    }
+  }
+
+  size_t ApproximateMemoryUsage() const {
+    return usable_size() + sizeof(*this);
+  }
+
+  BlockContents(BlockContents&& other) noexcept { *this = std::move(other); }
+
+  BlockContents& operator=(BlockContents&& other) {
+    data = std::move(other.data);
+    allocation = std::move(other.allocation);
+#ifndef NDEBUG
+    has_trailer = other.has_trailer;
+#endif  // NDEBUG
+    return *this;
+  }
+};
+
+// The `data` points to serialized block contents read in from file, which
+// must be compressed and include a trailer beyond `size`. A new buffer is
+// allocated with the given allocator (or default) and the uncompressed
+// contents are returned in `out_contents`.
+// format_version is as defined in include/rocksdb/table.h, which is
+// used to determine compression format version.
+Status UncompressSerializedBlock(const UncompressionInfo& info,
+                                 const char* data, size_t size,
+                                 BlockContents* out_contents,
+                                 uint32_t format_version,
+                                 const ImmutableOptions& ioptions,
+                                 MemoryAllocator* allocator = nullptr);
+
+// This is a variant of UncompressSerializedBlock that does not expect a
+// block trailer beyond `size`. (CompressionType is taken from `info`.)
+Status UncompressBlockData(const UncompressionInfo& info, const char* data,
+                           size_t size, BlockContents* out_contents,
+                           uint32_t format_version,
+                           const ImmutableOptions& ioptions,
+                           MemoryAllocator* allocator = nullptr);
+
+// Replace db_host_id contents with the real hostname if necessary
+Status ReifyDbHostIdProperty(Env* env, std::string* db_host_id);
+
+// Implementation details follow.  Clients should ignore,
+
+// TODO(andrewkr): we should prefer one way of representing a null/uninitialized
+// BlockHandle. Currently we use zeros for null and use negation-of-zeros for
+// uninitialized.
+inline BlockHandle::BlockHandle() : BlockHandle(~uint64_t{0}, ~uint64_t{0}) {}
+
+inline BlockHandle::BlockHandle(uint64_t _offset, uint64_t _size)
+    : offset_(_offset), size_(_size) {}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/get_context.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/get_context.cc
new file mode 100644
index 0000000000..8f5cd75f15
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/get_context.cc
@@ -0,0 +1,616 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "table/get_context.h"
+
+#include "db/blob//blob_fetcher.h"
+#include "db/merge_helper.h"
+#include "db/pinned_iterators_manager.h"
+#include "db/read_callback.h"
+#include "db/wide/wide_column_serialization.h"
+#include "monitoring/file_read_sample.h"
+#include "monitoring/perf_context_imp.h"
+#include "monitoring/statistics_impl.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/system_clock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+void appendToReplayLog(std::string* replay_log, ValueType type, Slice value) {
+  if (replay_log) {
+    if (replay_log->empty()) {
+      // Optimization: in the common case of only one operation in the
+      // log, we allocate the exact amount of space needed.
+      replay_log->reserve(1 + VarintLength(value.size()) + value.size());
+    }
+    replay_log->push_back(type);
+    PutLengthPrefixedSlice(replay_log, value);
+  }
+}
+
+}  // namespace
+
+GetContext::GetContext(
+    const Comparator* ucmp, const MergeOperator* merge_operator, Logger* logger,
+    Statistics* statistics, GetState init_state, const Slice& user_key,
+    PinnableSlice* pinnable_val, PinnableWideColumns* columns,
+    std::string* timestamp, bool* value_found, MergeContext* merge_context,
+    bool do_merge, SequenceNumber* _max_covering_tombstone_seq,
+    SystemClock* clock, SequenceNumber* seq,
+    PinnedIteratorsManager* _pinned_iters_mgr, ReadCallback* callback,
+    bool* is_blob_index, uint64_t tracing_get_id, BlobFetcher* blob_fetcher)
+    : ucmp_(ucmp),
+      merge_operator_(merge_operator),
+      logger_(logger),
+      statistics_(statistics),
+      state_(init_state),
+      user_key_(user_key),
+      pinnable_val_(pinnable_val),
+      columns_(columns),
+      timestamp_(timestamp),
+      value_found_(value_found),
+      merge_context_(merge_context),
+      max_covering_tombstone_seq_(_max_covering_tombstone_seq),
+      clock_(clock),
+      seq_(seq),
+      replay_log_(nullptr),
+      pinned_iters_mgr_(_pinned_iters_mgr),
+      callback_(callback),
+      do_merge_(do_merge),
+      is_blob_index_(is_blob_index),
+      tracing_get_id_(tracing_get_id),
+      blob_fetcher_(blob_fetcher) {
+  if (seq_) {
+    *seq_ = kMaxSequenceNumber;
+  }
+  sample_ = should_sample_file_read();
+}
+
+GetContext::GetContext(const Comparator* ucmp,
+                       const MergeOperator* merge_operator, Logger* logger,
+                       Statistics* statistics, GetState init_state,
+                       const Slice& user_key, PinnableSlice* pinnable_val,
+                       PinnableWideColumns* columns, bool* value_found,
+                       MergeContext* merge_context, bool do_merge,
+                       SequenceNumber* _max_covering_tombstone_seq,
+                       SystemClock* clock, SequenceNumber* seq,
+                       PinnedIteratorsManager* _pinned_iters_mgr,
+                       ReadCallback* callback, bool* is_blob_index,
+                       uint64_t tracing_get_id, BlobFetcher* blob_fetcher)
+    : GetContext(ucmp, merge_operator, logger, statistics, init_state, user_key,
+                 pinnable_val, columns, /*timestamp=*/nullptr, value_found,
+                 merge_context, do_merge, _max_covering_tombstone_seq, clock,
+                 seq, _pinned_iters_mgr, callback, is_blob_index,
+                 tracing_get_id, blob_fetcher) {}
+
+// Called from TableCache::Get and Table::Get when file/block in which
+// key may exist are not there in TableCache/BlockCache respectively. In this
+// case we can't guarantee that key does not exist and are not permitted to do
+// IO to be certain.Set the status=kFound and value_found=false to let the
+// caller know that key may exist but is not there in memory
+void GetContext::MarkKeyMayExist() {
+  state_ = kFound;
+  if (value_found_ != nullptr) {
+    *value_found_ = false;
+  }
+}
+
+void GetContext::SaveValue(const Slice& value, SequenceNumber /*seq*/) {
+  assert(state_ == kNotFound);
+  appendToReplayLog(replay_log_, kTypeValue, value);
+
+  state_ = kFound;
+  if (LIKELY(pinnable_val_ != nullptr)) {
+    pinnable_val_->PinSelf(value);
+  }
+}
+
+void GetContext::ReportCounters() {
+  if (get_context_stats_.num_cache_hit > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_HIT, get_context_stats_.num_cache_hit);
+  }
+  if (get_context_stats_.num_cache_index_hit > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_INDEX_HIT,
+               get_context_stats_.num_cache_index_hit);
+  }
+  if (get_context_stats_.num_cache_data_hit > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_DATA_HIT,
+               get_context_stats_.num_cache_data_hit);
+  }
+  if (get_context_stats_.num_cache_filter_hit > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_FILTER_HIT,
+               get_context_stats_.num_cache_filter_hit);
+  }
+  if (get_context_stats_.num_cache_compression_dict_hit > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_COMPRESSION_DICT_HIT,
+               get_context_stats_.num_cache_compression_dict_hit);
+  }
+  if (get_context_stats_.num_cache_index_miss > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_INDEX_MISS,
+               get_context_stats_.num_cache_index_miss);
+  }
+  if (get_context_stats_.num_cache_filter_miss > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_FILTER_MISS,
+               get_context_stats_.num_cache_filter_miss);
+  }
+  if (get_context_stats_.num_cache_data_miss > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_DATA_MISS,
+               get_context_stats_.num_cache_data_miss);
+  }
+  if (get_context_stats_.num_cache_compression_dict_miss > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_COMPRESSION_DICT_MISS,
+               get_context_stats_.num_cache_compression_dict_miss);
+  }
+  if (get_context_stats_.num_cache_bytes_read > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_BYTES_READ,
+               get_context_stats_.num_cache_bytes_read);
+  }
+  if (get_context_stats_.num_cache_miss > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_MISS,
+               get_context_stats_.num_cache_miss);
+  }
+  if (get_context_stats_.num_cache_add > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_ADD, get_context_stats_.num_cache_add);
+  }
+  if (get_context_stats_.num_cache_add_redundant > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_ADD_REDUNDANT,
+               get_context_stats_.num_cache_add_redundant);
+  }
+  if (get_context_stats_.num_cache_bytes_write > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_BYTES_WRITE,
+               get_context_stats_.num_cache_bytes_write);
+  }
+  if (get_context_stats_.num_cache_index_add > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_INDEX_ADD,
+               get_context_stats_.num_cache_index_add);
+  }
+  if (get_context_stats_.num_cache_index_add_redundant > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_INDEX_ADD_REDUNDANT,
+               get_context_stats_.num_cache_index_add_redundant);
+  }
+  if (get_context_stats_.num_cache_index_bytes_insert > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_INDEX_BYTES_INSERT,
+               get_context_stats_.num_cache_index_bytes_insert);
+  }
+  if (get_context_stats_.num_cache_data_add > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_DATA_ADD,
+               get_context_stats_.num_cache_data_add);
+  }
+  if (get_context_stats_.num_cache_data_add_redundant > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_DATA_ADD_REDUNDANT,
+               get_context_stats_.num_cache_data_add_redundant);
+  }
+  if (get_context_stats_.num_cache_data_bytes_insert > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_DATA_BYTES_INSERT,
+               get_context_stats_.num_cache_data_bytes_insert);
+  }
+  if (get_context_stats_.num_cache_filter_add > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_FILTER_ADD,
+               get_context_stats_.num_cache_filter_add);
+  }
+  if (get_context_stats_.num_cache_filter_add_redundant > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_FILTER_ADD_REDUNDANT,
+               get_context_stats_.num_cache_filter_add_redundant);
+  }
+  if (get_context_stats_.num_cache_filter_bytes_insert > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_FILTER_BYTES_INSERT,
+               get_context_stats_.num_cache_filter_bytes_insert);
+  }
+  if (get_context_stats_.num_cache_compression_dict_add > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_COMPRESSION_DICT_ADD,
+               get_context_stats_.num_cache_compression_dict_add);
+  }
+  if (get_context_stats_.num_cache_compression_dict_add_redundant > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_COMPRESSION_DICT_ADD_REDUNDANT,
+               get_context_stats_.num_cache_compression_dict_add_redundant);
+  }
+  if (get_context_stats_.num_cache_compression_dict_bytes_insert > 0) {
+    RecordTick(statistics_, BLOCK_CACHE_COMPRESSION_DICT_BYTES_INSERT,
+               get_context_stats_.num_cache_compression_dict_bytes_insert);
+  }
+}
+
+bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
+                           const Slice& value, bool* matched,
+                           Cleanable* value_pinner) {
+  assert(matched);
+  assert((state_ != kMerge && parsed_key.type != kTypeMerge) ||
+         merge_context_ != nullptr);
+  if (ucmp_->EqualWithoutTimestamp(parsed_key.user_key, user_key_)) {
+    *matched = true;
+    // If the value is not in the snapshot, skip it
+    if (!CheckCallback(parsed_key.sequence)) {
+      return true;  // to continue to the next seq
+    }
+
+    appendToReplayLog(replay_log_, parsed_key.type, value);
+
+    if (seq_ != nullptr) {
+      // Set the sequence number if it is uninitialized
+      if (*seq_ == kMaxSequenceNumber) {
+        *seq_ = parsed_key.sequence;
+      }
+      if (max_covering_tombstone_seq_) {
+        *seq_ = std::max(*seq_, *max_covering_tombstone_seq_);
+      }
+    }
+
+    size_t ts_sz = ucmp_->timestamp_size();
+    if (ts_sz > 0 && timestamp_ != nullptr) {
+      if (!timestamp_->empty()) {
+        assert(ts_sz == timestamp_->size());
+        // `timestamp` can be set before `SaveValue` is ever called
+        // when max_covering_tombstone_seq_ was set.
+        // If this key has a higher sequence number than range tombstone,
+        // then timestamp should be updated. `ts_from_rangetombstone_` is
+        // set to false afterwards so that only the key with highest seqno
+        // updates the timestamp.
+        if (ts_from_rangetombstone_) {
+          assert(max_covering_tombstone_seq_);
+          if (parsed_key.sequence > *max_covering_tombstone_seq_) {
+            Slice ts = ExtractTimestampFromUserKey(parsed_key.user_key, ts_sz);
+            timestamp_->assign(ts.data(), ts.size());
+            ts_from_rangetombstone_ = false;
+          }
+        }
+      }
+      // TODO optimize for small size ts
+      const std::string kMaxTs(ts_sz, '\xff');
+      if (timestamp_->empty() ||
+          ucmp_->CompareTimestamp(*timestamp_, kMaxTs) == 0) {
+        Slice ts = ExtractTimestampFromUserKey(parsed_key.user_key, ts_sz);
+        timestamp_->assign(ts.data(), ts.size());
+      }
+    }
+
+    auto type = parsed_key.type;
+    // Key matches. Process it
+    if ((type == kTypeValue || type == kTypeMerge || type == kTypeBlobIndex ||
+         type == kTypeWideColumnEntity || type == kTypeDeletion ||
+         type == kTypeDeletionWithTimestamp || type == kTypeSingleDeletion) &&
+        max_covering_tombstone_seq_ != nullptr &&
+        *max_covering_tombstone_seq_ > parsed_key.sequence) {
+      // Note that deletion types are also considered, this is for the case
+      // when we need to return timestamp to user. If a range tombstone has a
+      // higher seqno than point tombstone, its timestamp should be returned.
+      type = kTypeRangeDeletion;
+    }
+    switch (type) {
+      case kTypeValue:
+      case kTypeBlobIndex:
+      case kTypeWideColumnEntity:
+        assert(state_ == kNotFound || state_ == kMerge);
+        if (type == kTypeBlobIndex) {
+          if (is_blob_index_ == nullptr) {
+            // Blob value not supported. Stop.
+            state_ = kUnexpectedBlobIndex;
+            return false;
+          }
+        }
+
+        if (is_blob_index_ != nullptr) {
+          *is_blob_index_ = (type == kTypeBlobIndex);
+        }
+
+        if (kNotFound == state_) {
+          state_ = kFound;
+          if (do_merge_) {
+            if (type == kTypeBlobIndex && ucmp_->timestamp_size() != 0) {
+              ukey_with_ts_found_.PinSelf(parsed_key.user_key);
+            }
+            if (LIKELY(pinnable_val_ != nullptr)) {
+              Slice value_to_use = value;
+
+              if (type == kTypeWideColumnEntity) {
+                Slice value_copy = value;
+
+                if (!WideColumnSerialization::GetValueOfDefaultColumn(
+                         value_copy, value_to_use)
+                         .ok()) {
+                  state_ = kCorrupt;
+                  return false;
+                }
+              }
+
+              if (LIKELY(value_pinner != nullptr)) {
+                // If the backing resources for the value are provided, pin them
+                pinnable_val_->PinSlice(value_to_use, value_pinner);
+              } else {
+                TEST_SYNC_POINT_CALLBACK("GetContext::SaveValue::PinSelf",
+                                         this);
+                // Otherwise copy the value
+                pinnable_val_->PinSelf(value_to_use);
+              }
+            } else if (columns_ != nullptr) {
+              if (type == kTypeWideColumnEntity) {
+                if (!columns_->SetWideColumnValue(value, value_pinner).ok()) {
+                  state_ = kCorrupt;
+                  return false;
+                }
+              } else {
+                columns_->SetPlainValue(value, value_pinner);
+              }
+            }
+          } else {
+            // It means this function is called as part of DB GetMergeOperands
+            // API and the current value should be part of
+            // merge_context_->operand_list
+            if (type == kTypeBlobIndex) {
+              PinnableSlice pin_val;
+              if (GetBlobValue(parsed_key.user_key, value, &pin_val) == false) {
+                return false;
+              }
+              Slice blob_value(pin_val);
+              push_operand(blob_value, nullptr);
+            } else if (type == kTypeWideColumnEntity) {
+              Slice value_copy = value;
+              Slice value_of_default;
+
+              if (!WideColumnSerialization::GetValueOfDefaultColumn(
+                       value_copy, value_of_default)
+                       .ok()) {
+                state_ = kCorrupt;
+                return false;
+              }
+
+              push_operand(value_of_default, value_pinner);
+            } else {
+              assert(type == kTypeValue);
+              push_operand(value, value_pinner);
+            }
+          }
+        } else if (kMerge == state_) {
+          assert(merge_operator_ != nullptr);
+          if (type == kTypeBlobIndex) {
+            PinnableSlice pin_val;
+            if (GetBlobValue(parsed_key.user_key, value, &pin_val) == false) {
+              return false;
+            }
+            Slice blob_value(pin_val);
+            state_ = kFound;
+            if (do_merge_) {
+              Merge(&blob_value);
+            } else {
+              // It means this function is called as part of DB GetMergeOperands
+              // API and the current value should be part of
+              // merge_context_->operand_list
+              push_operand(blob_value, nullptr);
+            }
+          } else if (type == kTypeWideColumnEntity) {
+            state_ = kFound;
+
+            if (do_merge_) {
+              MergeWithEntity(value);
+            } else {
+              // It means this function is called as part of DB GetMergeOperands
+              // API and the current value should be part of
+              // merge_context_->operand_list
+              Slice value_copy = value;
+              Slice value_of_default;
+
+              if (!WideColumnSerialization::GetValueOfDefaultColumn(
+                       value_copy, value_of_default)
+                       .ok()) {
+                state_ = kCorrupt;
+                return false;
+              }
+
+              push_operand(value_of_default, value_pinner);
+            }
+          } else {
+            assert(type == kTypeValue);
+
+            state_ = kFound;
+            if (do_merge_) {
+              Merge(&value);
+            } else {
+              // It means this function is called as part of DB GetMergeOperands
+              // API and the current value should be part of
+              // merge_context_->operand_list
+              push_operand(value, value_pinner);
+            }
+          }
+        }
+        return false;
+
+      case kTypeDeletion:
+      case kTypeDeletionWithTimestamp:
+      case kTypeSingleDeletion:
+      case kTypeRangeDeletion:
+        // TODO(noetzli): Verify correctness once merge of single-deletes
+        // is supported
+        assert(state_ == kNotFound || state_ == kMerge);
+        if (kNotFound == state_) {
+          state_ = kDeleted;
+        } else if (kMerge == state_) {
+          state_ = kFound;
+          if (do_merge_) {
+            Merge(nullptr);
+          }
+          // If do_merge_ = false then the current value shouldn't be part of
+          // merge_context_->operand_list
+        }
+        return false;
+
+      case kTypeMerge:
+        assert(state_ == kNotFound || state_ == kMerge);
+        state_ = kMerge;
+        // value_pinner is not set from plain_table_reader.cc for example.
+        push_operand(value, value_pinner);
+        PERF_COUNTER_ADD(internal_merge_point_lookup_count, 1);
+
+        if (do_merge_ && merge_operator_ != nullptr &&
+            merge_operator_->ShouldMerge(
+                merge_context_->GetOperandsDirectionBackward())) {
+          state_ = kFound;
+          Merge(nullptr);
+          return false;
+        }
+        return true;
+
+      default:
+        assert(false);
+        break;
+    }
+  }
+
+  // state_ could be Corrupt, merge or notfound
+  return false;
+}
+
+void GetContext::Merge(const Slice* value) {
+  assert(do_merge_);
+  assert(!pinnable_val_ || !columns_);
+
+  std::string result;
+  // `op_failure_scope` (an output parameter) is not provided (set to nullptr)
+  // since a failure must be propagated regardless of its value.
+  const Status s = MergeHelper::TimedFullMerge(
+      merge_operator_, user_key_, value, merge_context_->GetOperands(), &result,
+      logger_, statistics_, clock_, /* result_operand */ nullptr,
+      /* update_num_ops_stats */ true,
+      /* op_failure_scope */ nullptr);
+  if (!s.ok()) {
+    if (s.subcode() == Status::SubCode::kMergeOperatorFailed) {
+      state_ = kMergeOperatorFailed;
+    } else {
+      state_ = kCorrupt;
+    }
+    return;
+  }
+
+  if (LIKELY(pinnable_val_ != nullptr)) {
+    *(pinnable_val_->GetSelf()) = std::move(result);
+    pinnable_val_->PinSelf();
+    return;
+  }
+
+  assert(columns_);
+  columns_->SetPlainValue(std::move(result));
+}
+
+void GetContext::MergeWithEntity(Slice entity) {
+  assert(do_merge_);
+  assert(!pinnable_val_ || !columns_);
+
+  if (LIKELY(pinnable_val_ != nullptr)) {
+    Slice value_of_default;
+
+    {
+      const Status s = WideColumnSerialization::GetValueOfDefaultColumn(
+          entity, value_of_default);
+      if (!s.ok()) {
+        state_ = kCorrupt;
+        return;
+      }
+    }
+
+    {
+      // `op_failure_scope` (an output parameter) is not provided (set to
+      // nullptr) since a failure must be propagated regardless of its value.
+      const Status s = MergeHelper::TimedFullMerge(
+          merge_operator_, user_key_, &value_of_default,
+          merge_context_->GetOperands(), pinnable_val_->GetSelf(), logger_,
+          statistics_, clock_, /* result_operand */ nullptr,
+          /* update_num_ops_stats */ true,
+          /* op_failure_scope */ nullptr);
+      if (!s.ok()) {
+        if (s.subcode() == Status::SubCode::kMergeOperatorFailed) {
+          state_ = kMergeOperatorFailed;
+        } else {
+          state_ = kCorrupt;
+        }
+        return;
+      }
+    }
+
+    pinnable_val_->PinSelf();
+    return;
+  }
+
+  std::string result;
+
+  {
+    // `op_failure_scope` (an output parameter) is not provided (set to nullptr)
+    // since a failure must be propagated regardless of its value.
+    const Status s = MergeHelper::TimedFullMergeWithEntity(
+        merge_operator_, user_key_, entity, merge_context_->GetOperands(),
+        &result, logger_, statistics_, clock_, /* update_num_ops_stats */ true,
+        /* op_failure_scope */ nullptr);
+    if (!s.ok()) {
+      if (s.subcode() == Status::SubCode::kMergeOperatorFailed) {
+        state_ = kMergeOperatorFailed;
+      } else {
+        state_ = kCorrupt;
+      }
+      return;
+    }
+  }
+
+  {
+    assert(columns_);
+    const Status s = columns_->SetWideColumnValue(std::move(result));
+    if (!s.ok()) {
+      state_ = kCorrupt;
+      return;
+    }
+  }
+}
+
+bool GetContext::GetBlobValue(const Slice& user_key, const Slice& blob_index,
+                              PinnableSlice* blob_value) {
+  constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
+  constexpr uint64_t* bytes_read = nullptr;
+
+  Status status = blob_fetcher_->FetchBlob(
+      user_key, blob_index, prefetch_buffer, blob_value, bytes_read);
+  if (!status.ok()) {
+    if (status.IsIncomplete()) {
+      // FIXME: this code is not covered by unit tests
+      MarkKeyMayExist();
+      return false;
+    }
+    state_ = kCorrupt;
+    return false;
+  }
+  *is_blob_index_ = false;
+  return true;
+}
+
+void GetContext::push_operand(const Slice& value, Cleanable* value_pinner) {
+  // TODO(yanqin) preserve timestamps information in merge_context
+  if (pinned_iters_mgr() && pinned_iters_mgr()->PinningEnabled() &&
+      value_pinner != nullptr) {
+    value_pinner->DelegateCleanupsTo(pinned_iters_mgr());
+    merge_context_->PushOperand(value, true /*value_pinned*/);
+  } else {
+    merge_context_->PushOperand(value, false);
+  }
+}
+
+void replayGetContextLog(const Slice& replay_log, const Slice& user_key,
+                         GetContext* get_context, Cleanable* value_pinner) {
+  Slice s = replay_log;
+  while (s.size()) {
+    auto type = static_cast<ValueType>(*s.data());
+    s.remove_prefix(1);
+    Slice value;
+    bool ret = GetLengthPrefixedSlice(&s, &value);
+    assert(ret);
+    (void)ret;
+
+    bool dont_care __attribute__((__unused__));
+    // Since SequenceNumber is not stored and unknown, we will use
+    // kMaxSequenceNumber.
+    get_context->SaveValue(
+        ParsedInternalKey(user_key, kMaxSequenceNumber, type), value,
+        &dont_care, value_pinner);
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/get_context.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/get_context.h
new file mode 100644
index 0000000000..528cd14fd8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/get_context.h
@@ -0,0 +1,245 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <string>
+
+#include "db/read_callback.h"
+#include "rocksdb/types.h"
+
+namespace ROCKSDB_NAMESPACE {
+class BlobFetcher;
+class Comparator;
+class Logger;
+class MergeContext;
+class MergeOperator;
+class PinnableWideColumns;
+class PinnedIteratorsManager;
+class Statistics;
+class SystemClock;
+struct ParsedInternalKey;
+
+// Data structure for accumulating statistics during a point lookup. At the
+// end of the point lookup, the corresponding ticker stats are updated. This
+// avoids the overhead of frequent ticker stats updates
+struct GetContextStats {
+  uint64_t num_cache_hit = 0;
+  uint64_t num_cache_index_hit = 0;
+  uint64_t num_cache_data_hit = 0;
+  uint64_t num_cache_filter_hit = 0;
+  uint64_t num_cache_compression_dict_hit = 0;
+  uint64_t num_cache_index_miss = 0;
+  uint64_t num_cache_filter_miss = 0;
+  uint64_t num_cache_data_miss = 0;
+  uint64_t num_cache_compression_dict_miss = 0;
+  uint64_t num_cache_bytes_read = 0;
+  uint64_t num_cache_miss = 0;
+  uint64_t num_cache_add = 0;
+  uint64_t num_cache_add_redundant = 0;
+  uint64_t num_cache_bytes_write = 0;
+  uint64_t num_cache_index_add = 0;
+  uint64_t num_cache_index_add_redundant = 0;
+  uint64_t num_cache_index_bytes_insert = 0;
+  uint64_t num_cache_data_add = 0;
+  uint64_t num_cache_data_add_redundant = 0;
+  uint64_t num_cache_data_bytes_insert = 0;
+  uint64_t num_cache_filter_add = 0;
+  uint64_t num_cache_filter_add_redundant = 0;
+  uint64_t num_cache_filter_bytes_insert = 0;
+  uint64_t num_cache_compression_dict_add = 0;
+  uint64_t num_cache_compression_dict_add_redundant = 0;
+  uint64_t num_cache_compression_dict_bytes_insert = 0;
+  // MultiGet stats.
+  uint64_t num_filter_read = 0;
+  uint64_t num_index_read = 0;
+  uint64_t num_sst_read = 0;
+};
+
+// A class to hold context about a point lookup, such as pointer to value
+// slice, key, merge context etc, as well as the current state of the
+// lookup. Any user using GetContext to track the lookup result must call
+// SaveValue() whenever the internal key is found. This can happen
+// repeatedly in case of merge operands. In case the key may exist with
+// high probability, but IO is required to confirm and the user doesn't allow
+// it, MarkKeyMayExist() must be called instead of SaveValue().
+class GetContext {
+ public:
+  // Current state of the point lookup. All except kNotFound and kMerge are
+  // terminal states
+  enum GetState {
+    kNotFound,
+    kFound,
+    kDeleted,
+    kCorrupt,
+    kMerge,  // saver contains the current merge result (the operands)
+    kUnexpectedBlobIndex,
+    kMergeOperatorFailed,
+  };
+  GetContextStats get_context_stats_;
+
+  // Constructor
+  // @param value Holds the value corresponding to user_key. If its nullptr
+  //              then return all merge operands corresponding to user_key
+  //              via merge_context
+  // @param value_found If non-nullptr, set to false if key may be present
+  //                    but we can't be certain because we cannot do IO
+  // @param max_covering_tombstone_seq Pointer to highest sequence number of
+  //                    range deletion covering the key. When an internal key
+  //                    is found with smaller sequence number, the lookup
+  //                    terminates
+  // @param seq If non-nullptr, the sequence number of the found key will be
+  //            saved here
+  // @param callback Pointer to ReadCallback to perform additional checks
+  //                 for visibility of a key
+  // @param is_blob_index If non-nullptr, will be used to indicate if a found
+  //                      key is of type blob index
+  // @param do_merge True if value associated with user_key has to be returned
+  // and false if all the merge operands associated with user_key has to be
+  // returned. Id do_merge=false then all the merge operands are stored in
+  // merge_context and they are never merged. The value pointer is untouched.
+  GetContext(const Comparator* ucmp, const MergeOperator* merge_operator,
+             Logger* logger, Statistics* statistics, GetState init_state,
+             const Slice& user_key, PinnableSlice* value,
+             PinnableWideColumns* columns, bool* value_found,
+             MergeContext* merge_context, bool do_merge,
+             SequenceNumber* max_covering_tombstone_seq, SystemClock* clock,
+             SequenceNumber* seq = nullptr,
+             PinnedIteratorsManager* _pinned_iters_mgr = nullptr,
+             ReadCallback* callback = nullptr, bool* is_blob_index = nullptr,
+             uint64_t tracing_get_id = 0, BlobFetcher* blob_fetcher = nullptr);
+  GetContext(const Comparator* ucmp, const MergeOperator* merge_operator,
+             Logger* logger, Statistics* statistics, GetState init_state,
+             const Slice& user_key, PinnableSlice* value,
+             PinnableWideColumns* columns, std::string* timestamp,
+             bool* value_found, MergeContext* merge_context, bool do_merge,
+             SequenceNumber* max_covering_tombstone_seq, SystemClock* clock,
+             SequenceNumber* seq = nullptr,
+             PinnedIteratorsManager* _pinned_iters_mgr = nullptr,
+             ReadCallback* callback = nullptr, bool* is_blob_index = nullptr,
+             uint64_t tracing_get_id = 0, BlobFetcher* blob_fetcher = nullptr);
+
+  GetContext() = delete;
+
+  // This can be called to indicate that a key may be present, but cannot be
+  // confirmed due to IO not allowed
+  void MarkKeyMayExist();
+
+  // Records this key, value, and any meta-data (such as sequence number and
+  // state) into this GetContext.
+  //
+  // If the parsed_key matches the user key that we are looking for, sets
+  // matched to true.
+  //
+  // Returns True if more keys need to be read (due to merges) or
+  //         False if the complete value has been found.
+  bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value,
+                 bool* matched, Cleanable* value_pinner = nullptr);
+
+  // Simplified version of the previous function. Should only be used when we
+  // know that the operation is a Put.
+  void SaveValue(const Slice& value, SequenceNumber seq);
+
+  GetState State() const { return state_; }
+
+  SequenceNumber* max_covering_tombstone_seq() {
+    return max_covering_tombstone_seq_;
+  }
+
+  bool NeedTimestamp() { return timestamp_ != nullptr; }
+
+  void SetTimestampFromRangeTombstone(const Slice& timestamp) {
+    assert(timestamp_);
+    timestamp_->assign(timestamp.data(), timestamp.size());
+    ts_from_rangetombstone_ = true;
+  }
+
+  PinnedIteratorsManager* pinned_iters_mgr() { return pinned_iters_mgr_; }
+
+  // If a non-null string is passed, all the SaveValue calls will be
+  // logged into the string. The operations can then be replayed on
+  // another GetContext with replayGetContextLog.
+  void SetReplayLog(std::string* replay_log) { replay_log_ = replay_log; }
+
+  // Do we need to fetch the SequenceNumber for this key?
+  bool NeedToReadSequence() const { return (seq_ != nullptr); }
+
+  bool sample() const { return sample_; }
+
+  bool CheckCallback(SequenceNumber seq) {
+    if (callback_) {
+      return callback_->IsVisible(seq);
+    }
+    return true;
+  }
+
+  void ReportCounters();
+
+  bool has_callback() const { return callback_ != nullptr; }
+
+  const Slice& ukey_to_get_blob_value() const {
+    if (!ukey_with_ts_found_.empty()) {
+      return ukey_with_ts_found_;
+    } else {
+      return user_key_;
+    }
+  }
+
+  uint64_t get_tracing_get_id() const { return tracing_get_id_; }
+
+  void push_operand(const Slice& value, Cleanable* value_pinner);
+
+ private:
+  void Merge(const Slice* value);
+  void MergeWithEntity(Slice entity);
+  bool GetBlobValue(const Slice& user_key, const Slice& blob_index,
+                    PinnableSlice* blob_value);
+
+  const Comparator* ucmp_;
+  const MergeOperator* merge_operator_;
+  // the merge operations encountered;
+  Logger* logger_;
+  Statistics* statistics_;
+
+  GetState state_;
+  Slice user_key_;
+  // When a blob index is found with the user key containing timestamp,
+  // this copies the corresponding user key on record in the sst file
+  // and is later used for blob verification.
+  PinnableSlice ukey_with_ts_found_;
+  PinnableSlice* pinnable_val_;
+  PinnableWideColumns* columns_;
+  std::string* timestamp_;
+  bool ts_from_rangetombstone_{false};
+  bool* value_found_;  // Is value set correctly? Used by KeyMayExist
+  MergeContext* merge_context_;
+  SequenceNumber* max_covering_tombstone_seq_;
+  SystemClock* clock_;
+  // If a key is found, seq_ will be set to the SequenceNumber of most recent
+  // write to the key or kMaxSequenceNumber if unknown
+  SequenceNumber* seq_;
+  std::string* replay_log_;
+  // Used to temporarily pin blocks when state_ == GetContext::kMerge
+  PinnedIteratorsManager* pinned_iters_mgr_;
+  ReadCallback* callback_;
+  bool sample_;
+  // Value is true if it's called as part of DB Get API and false if it's
+  // called as part of DB GetMergeOperands API. When it's false merge operators
+  // are never merged.
+  bool do_merge_;
+  bool* is_blob_index_;
+  // Used for block cache tracing only. A tracing get id uniquely identifies a
+  // Get or a MultiGet.
+  const uint64_t tracing_get_id_;
+  BlobFetcher* blob_fetcher_;
+};
+
+// Call this to replay a log and bring the get_context up to date. The replay
+// log must have been created by another GetContext object, whose replay log
+// must have been set by calling GetContext::SetReplayLog().
+void replayGetContextLog(const Slice& replay_log, const Slice& user_key,
+                         GetContext* get_context,
+                         Cleanable* value_pinner = nullptr);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/internal_iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/internal_iterator.h
new file mode 100644
index 0000000000..8015ed6351
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/internal_iterator.h
@@ -0,0 +1,224 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+
+#include <string>
+
+#include "db/dbformat.h"
+#include "file/readahead_file_info.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/status.h"
+#include "table/format.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class PinnedIteratorsManager;
+
+enum class IterBoundCheck : char {
+  kUnknown = 0,
+  kOutOfBound,
+  kInbound,
+};
+
+struct IterateResult {
+  Slice key;
+  IterBoundCheck bound_check_result = IterBoundCheck::kUnknown;
+  // If false, PrepareValue() needs to be called before value().
+  bool value_prepared = true;
+};
+
+template <class TValue>
+class InternalIteratorBase : public Cleanable {
+ public:
+  InternalIteratorBase() {}
+
+  // No copying allowed
+  InternalIteratorBase(const InternalIteratorBase&) = delete;
+  InternalIteratorBase& operator=(const InternalIteratorBase&) = delete;
+
+  virtual ~InternalIteratorBase() {}
+
+  // An iterator is either positioned at a key/value pair, or
+  // not valid.  This method returns true iff the iterator is valid.
+  // Always returns false if !status().ok().
+  virtual bool Valid() const = 0;
+
+  // Position at the first key in the source.  The iterator is Valid()
+  // after this call iff the source is not empty.
+  virtual void SeekToFirst() = 0;
+
+  // Position at the last key in the source.  The iterator is
+  // Valid() after this call iff the source is not empty.
+  virtual void SeekToLast() = 0;
+
+  // Position at the first key in the source that at or past target
+  // The iterator is Valid() after this call iff the source contains
+  // an entry that comes at or past target.
+  // All Seek*() methods clear any error status() that the iterator had prior to
+  // the call; after the seek, status() indicates only the error (if any) that
+  // happened during the seek, not any past errors.
+  // 'target' contains user timestamp if timestamp is enabled.
+  virtual void Seek(const Slice& target) = 0;
+
+  // Position at the first key in the source that at or before target
+  // The iterator is Valid() after this call iff the source contains
+  // an entry that comes at or before target.
+  virtual void SeekForPrev(const Slice& target) = 0;
+
+  // Moves to the next entry in the source.  After this call, Valid() is
+  // true iff the iterator was not positioned at the last entry in the source.
+  // REQUIRES: Valid()
+  virtual void Next() = 0;
+
+  // Moves to the next entry in the source, and return result. Iterator
+  // implementation should override this method to help methods inline better,
+  // or when UpperBoundCheckResult() is non-trivial.
+  // REQUIRES: Valid()
+  virtual bool NextAndGetResult(IterateResult* result) {
+    Next();
+    bool is_valid = Valid();
+    if (is_valid) {
+      result->key = key();
+      // Default may_be_out_of_upper_bound to true to avoid unnecessary virtual
+      // call. If an implementation has non-trivial UpperBoundCheckResult(),
+      // it should also override NextAndGetResult().
+      result->bound_check_result = IterBoundCheck::kUnknown;
+      result->value_prepared = false;
+      assert(UpperBoundCheckResult() != IterBoundCheck::kOutOfBound);
+    }
+    return is_valid;
+  }
+
+  // Moves to the previous entry in the source.  After this call, Valid() is
+  // true iff the iterator was not positioned at the first entry in source.
+  // REQUIRES: Valid()
+  virtual void Prev() = 0;
+
+  // Return the key for the current entry.  The underlying storage for
+  // the returned slice is valid only until the next modification of
+  // the iterator.
+  // REQUIRES: Valid()
+  virtual Slice key() const = 0;
+
+  // Return user key for the current entry.
+  // REQUIRES: Valid()
+  virtual Slice user_key() const { return ExtractUserKey(key()); }
+
+  // Return the value for the current entry.  The underlying storage for
+  // the returned slice is valid only until the next modification of
+  // the iterator.
+  // REQUIRES: Valid()
+  // REQUIRES: PrepareValue() has been called if needed (see PrepareValue()).
+  virtual TValue value() const = 0;
+
+  // If an error has occurred, return it.  Else return an ok status.
+  // If non-blocking IO is requested and this operation cannot be
+  // satisfied without doing some IO, then this returns Status::Incomplete().
+  virtual Status status() const = 0;
+
+  // For some types of iterators, sometimes Seek()/Next()/SeekForPrev()/etc may
+  // load key but not value (to avoid the IO cost of reading the value from disk
+  // if it won't be not needed). This method loads the value in such situation.
+  //
+  // Needs to be called before value() at least once after each iterator
+  // movement (except if IterateResult::value_prepared = true), for iterators
+  // created with allow_unprepared_value = true.
+  //
+  // Returns false if an error occurred; in this case Valid() is also changed
+  // to false, and status() is changed to non-ok.
+  // REQUIRES: Valid()
+  virtual bool PrepareValue() { return true; }
+
+  // Keys return from this iterator can be smaller than iterate_lower_bound.
+  virtual bool MayBeOutOfLowerBound() { return true; }
+
+  // If the iterator has checked the key against iterate_upper_bound, returns
+  // the result here. The function can be used by user of the iterator to skip
+  // their own checks. If Valid() = true, IterBoundCheck::kUnknown is always
+  // a valid value. If Valid() = false, IterBoundCheck::kOutOfBound indicates
+  // that the iterator is filtered out by upper bound checks.
+  virtual IterBoundCheck UpperBoundCheckResult() {
+    return IterBoundCheck::kUnknown;
+  }
+
+  // Pass the PinnedIteratorsManager to the Iterator, most Iterators don't
+  // communicate with PinnedIteratorsManager so default implementation is no-op
+  // but for Iterators that need to communicate with PinnedIteratorsManager
+  // they will implement this function and use the passed pointer to communicate
+  // with PinnedIteratorsManager.
+  virtual void SetPinnedItersMgr(PinnedIteratorsManager* /*pinned_iters_mgr*/) {
+  }
+
+  // If true, this means that the Slice returned by key() is valid as long as
+  // PinnedIteratorsManager::ReleasePinnedData is not called and the
+  // Iterator is not deleted.
+  //
+  // IsKeyPinned() is guaranteed to always return true if
+  //  - Iterator is created with ReadOptions::pin_data = true
+  //  - DB tables were created with BlockBasedTableOptions::use_delta_encoding
+  //    set to false.
+  virtual bool IsKeyPinned() const { return false; }
+
+  // If true, this means that the Slice returned by value() is valid as long as
+  // PinnedIteratorsManager::ReleasePinnedData is not called and the
+  // Iterator is not deleted.
+  // REQUIRES: Same as for value().
+  virtual bool IsValuePinned() const { return false; }
+
+  virtual Status GetProperty(std::string /*prop_name*/, std::string* /*prop*/) {
+    return Status::NotSupported("");
+  }
+
+  // When iterator moves from one file to another file at same level, new file's
+  // readahead state (details of last block read) is updated with previous
+  // file's readahead state. This way internal readahead_size of Prefetch Buffer
+  // doesn't start from scratch and can fall back to 8KB with no prefetch if
+  // reads are not sequential.
+  //
+  // Default implementation is no-op and its implemented by iterators.
+  virtual void GetReadaheadState(ReadaheadFileInfo* /*readahead_file_info*/) {}
+
+  // Default implementation is no-op and its implemented by iterators.
+  virtual void SetReadaheadState(ReadaheadFileInfo* /*readahead_file_info*/) {}
+
+  // When used under merging iterator, LevelIterator treats file boundaries
+  // as sentinel keys to prevent it from moving to next SST file before range
+  // tombstones in the current SST file are no longer needed. This method makes
+  // it cheap to check if the current key is a sentinel key. This should only be
+  // used by MergingIterator and LevelIterator for now.
+  virtual bool IsDeleteRangeSentinelKey() const { return false; }
+
+ protected:
+  void SeekForPrevImpl(const Slice& target, const CompareInterface* cmp) {
+    Seek(target);
+    if (!Valid()) {
+      SeekToLast();
+    }
+    while (Valid() && cmp->Compare(target, key()) < 0) {
+      Prev();
+    }
+  }
+};
+
+using InternalIterator = InternalIteratorBase<Slice>;
+
+// Return an empty iterator (yields nothing).
+template <class TValue = Slice>
+extern InternalIteratorBase<TValue>* NewEmptyInternalIterator();
+
+// Return an empty iterator with the specified status.
+template <class TValue = Slice>
+extern InternalIteratorBase<TValue>* NewErrorInternalIterator(
+    const Status& status);
+
+// Return an empty iterator with the specified status, allocated arena.
+template <class TValue = Slice>
+extern InternalIteratorBase<TValue>* NewErrorInternalIterator(
+    const Status& status, Arena* arena);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/iter_heap.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/iter_heap.h
new file mode 100644
index 0000000000..6ad94be9b6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/iter_heap.h
@@ -0,0 +1,44 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+
+#include "db/dbformat.h"
+#include "table/iterator_wrapper.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// When used with std::priority_queue, this comparison functor puts the
+// iterator with the max/largest key on top.
+class MaxIteratorComparator {
+ public:
+  MaxIteratorComparator(const InternalKeyComparator* comparator)
+      : comparator_(comparator) {}
+
+  bool operator()(IteratorWrapper* a, IteratorWrapper* b) const {
+    return comparator_->Compare(a->key(), b->key()) < 0;
+  }
+
+ private:
+  const InternalKeyComparator* comparator_;
+};
+
+// When used with std::priority_queue, this comparison functor puts the
+// iterator with the min/smallest key on top.
+class MinIteratorComparator {
+ public:
+  MinIteratorComparator(const InternalKeyComparator* comparator)
+      : comparator_(comparator) {}
+
+  bool operator()(IteratorWrapper* a, IteratorWrapper* b) const {
+    return comparator_->Compare(a->key(), b->key()) > 0;
+  }
+
+ private:
+  const InternalKeyComparator* comparator_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/iterator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/iterator.cc
new file mode 100644
index 0000000000..14e280a07b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/iterator.cc
@@ -0,0 +1,130 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/iterator.h"
+
+#include "memory/arena.h"
+#include "table/internal_iterator.h"
+#include "table/iterator_wrapper.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status Iterator::GetProperty(std::string prop_name, std::string* prop) {
+  if (prop == nullptr) {
+    return Status::InvalidArgument("prop is nullptr");
+  }
+  if (prop_name == "rocksdb.iterator.is-key-pinned") {
+    *prop = "0";
+    return Status::OK();
+  }
+  return Status::InvalidArgument("Unidentified property.");
+}
+
+namespace {
+class EmptyIterator : public Iterator {
+ public:
+  explicit EmptyIterator(const Status& s) : status_(s) {}
+  bool Valid() const override { return false; }
+  void Seek(const Slice& /*target*/) override {}
+  void SeekForPrev(const Slice& /*target*/) override {}
+  void SeekToFirst() override {}
+  void SeekToLast() override {}
+  void Next() override { assert(false); }
+  void Prev() override { assert(false); }
+  Slice key() const override {
+    assert(false);
+    return Slice();
+  }
+  Slice value() const override {
+    assert(false);
+    return Slice();
+  }
+  Status status() const override { return status_; }
+
+ private:
+  Status status_;
+};
+
+template <class TValue = Slice>
+class EmptyInternalIterator : public InternalIteratorBase<TValue> {
+ public:
+  explicit EmptyInternalIterator(const Status& s) : status_(s) {}
+  bool Valid() const override { return false; }
+  void Seek(const Slice& /*target*/) override {}
+  void SeekForPrev(const Slice& /*target*/) override {}
+  void SeekToFirst() override {}
+  void SeekToLast() override {}
+  void Next() override { assert(false); }
+  void Prev() override { assert(false); }
+  Slice key() const override {
+    assert(false);
+    return Slice();
+  }
+  TValue value() const override {
+    assert(false);
+    return TValue();
+  }
+  Status status() const override { return status_; }
+
+ private:
+  Status status_;
+};
+}  // namespace
+
+Iterator* NewEmptyIterator() { return new EmptyIterator(Status::OK()); }
+
+Iterator* NewErrorIterator(const Status& status) {
+  return new EmptyIterator(status);
+}
+
+template <class TValue>
+InternalIteratorBase<TValue>* NewErrorInternalIterator(const Status& status) {
+  return new EmptyInternalIterator<TValue>(status);
+}
+template InternalIteratorBase<IndexValue>* NewErrorInternalIterator(
+    const Status& status);
+template InternalIteratorBase<Slice>* NewErrorInternalIterator(
+    const Status& status);
+
+template <class TValue>
+InternalIteratorBase<TValue>* NewErrorInternalIterator(const Status& status,
+                                                       Arena* arena) {
+  if (arena == nullptr) {
+    return NewErrorInternalIterator<TValue>(status);
+  } else {
+    auto mem = arena->AllocateAligned(sizeof(EmptyInternalIterator<TValue>));
+    return new (mem) EmptyInternalIterator<TValue>(status);
+  }
+}
+template InternalIteratorBase<IndexValue>* NewErrorInternalIterator(
+    const Status& status, Arena* arena);
+template InternalIteratorBase<Slice>* NewErrorInternalIterator(
+    const Status& status, Arena* arena);
+
+template <class TValue>
+InternalIteratorBase<TValue>* NewEmptyInternalIterator() {
+  return new EmptyInternalIterator<TValue>(Status::OK());
+}
+template InternalIteratorBase<IndexValue>* NewEmptyInternalIterator();
+template InternalIteratorBase<Slice>* NewEmptyInternalIterator();
+
+template <class TValue>
+InternalIteratorBase<TValue>* NewEmptyInternalIterator(Arena* arena) {
+  if (arena == nullptr) {
+    return NewEmptyInternalIterator<TValue>();
+  } else {
+    auto mem = arena->AllocateAligned(sizeof(EmptyInternalIterator<TValue>));
+    return new (mem) EmptyInternalIterator<TValue>(Status::OK());
+  }
+}
+template InternalIteratorBase<IndexValue>* NewEmptyInternalIterator(
+    Arena* arena);
+template InternalIteratorBase<Slice>* NewEmptyInternalIterator(Arena* arena);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/iterator_wrapper.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/iterator_wrapper.h
new file mode 100644
index 0000000000..17abef4ac7
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/iterator_wrapper.h
@@ -0,0 +1,190 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <set>
+
+#include "table/internal_iterator.h"
+#include "test_util/sync_point.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A internal wrapper class with an interface similar to Iterator that caches
+// the valid() and key() results for an underlying iterator.
+// This can help avoid virtual function calls and also gives better
+// cache locality.
+template <class TValue = Slice>
+class IteratorWrapperBase {
+ public:
+  IteratorWrapperBase() : iter_(nullptr), valid_(false) {}
+  explicit IteratorWrapperBase(InternalIteratorBase<TValue>* _iter)
+      : iter_(nullptr) {
+    Set(_iter);
+  }
+  ~IteratorWrapperBase() {}
+  InternalIteratorBase<TValue>* iter() const { return iter_; }
+
+  // Set the underlying Iterator to _iter and return
+  // previous underlying Iterator.
+  InternalIteratorBase<TValue>* Set(InternalIteratorBase<TValue>* _iter) {
+    InternalIteratorBase<TValue>* old_iter = iter_;
+
+    iter_ = _iter;
+    if (iter_ == nullptr) {
+      valid_ = false;
+    } else {
+      Update();
+    }
+    return old_iter;
+  }
+
+  void DeleteIter(bool is_arena_mode) {
+    if (iter_) {
+      if (!is_arena_mode) {
+        delete iter_;
+      } else {
+        iter_->~InternalIteratorBase<TValue>();
+      }
+    }
+  }
+
+  // Iterator interface methods
+  bool Valid() const { return valid_; }
+  Slice key() const {
+    assert(Valid());
+    return result_.key;
+  }
+  TValue value() const {
+    assert(Valid());
+    return iter_->value();
+  }
+  // Methods below require iter() != nullptr
+  Status status() const {
+    assert(iter_);
+    return iter_->status();
+  }
+  bool PrepareValue() {
+    assert(Valid());
+    if (result_.value_prepared) {
+      return true;
+    }
+    if (iter_->PrepareValue()) {
+      result_.value_prepared = true;
+      return true;
+    }
+
+    assert(!iter_->Valid());
+    valid_ = false;
+    return false;
+  }
+  void Next() {
+    assert(iter_);
+    valid_ = iter_->NextAndGetResult(&result_);
+    assert(!valid_ || iter_->status().ok());
+  }
+  bool NextAndGetResult(IterateResult* result) {
+    assert(iter_);
+    valid_ = iter_->NextAndGetResult(&result_);
+    *result = result_;
+    assert(!valid_ || iter_->status().ok());
+    return valid_;
+  }
+  void Prev() {
+    assert(iter_);
+    iter_->Prev();
+    Update();
+  }
+  void Seek(const Slice& k) {
+    assert(iter_);
+    iter_->Seek(k);
+    Update();
+  }
+  void SeekForPrev(const Slice& k) {
+    assert(iter_);
+    iter_->SeekForPrev(k);
+    Update();
+  }
+  void SeekToFirst() {
+    assert(iter_);
+    iter_->SeekToFirst();
+    Update();
+  }
+  void SeekToLast() {
+    assert(iter_);
+    iter_->SeekToLast();
+    Update();
+  }
+
+  bool MayBeOutOfLowerBound() {
+    assert(Valid());
+    return iter_->MayBeOutOfLowerBound();
+  }
+
+  IterBoundCheck UpperBoundCheckResult() {
+    assert(Valid());
+    return result_.bound_check_result;
+  }
+
+  void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) {
+    assert(iter_);
+    iter_->SetPinnedItersMgr(pinned_iters_mgr);
+  }
+  bool IsKeyPinned() const {
+    assert(Valid());
+    return iter_->IsKeyPinned();
+  }
+  bool IsValuePinned() const {
+    assert(Valid());
+    return iter_->IsValuePinned();
+  }
+
+  bool IsValuePrepared() const { return result_.value_prepared; }
+
+  Slice user_key() const {
+    assert(Valid());
+    return iter_->user_key();
+  }
+
+  void UpdateReadaheadState(InternalIteratorBase<TValue>* old_iter) {
+    if (old_iter && iter_) {
+      ReadaheadFileInfo readahead_file_info;
+      old_iter->GetReadaheadState(&readahead_file_info);
+      iter_->SetReadaheadState(&readahead_file_info);
+    }
+  }
+
+  bool IsDeleteRangeSentinelKey() const {
+    return iter_->IsDeleteRangeSentinelKey();
+  }
+
+ private:
+  void Update() {
+    valid_ = iter_->Valid();
+    if (valid_) {
+      assert(iter_->status().ok());
+      result_.key = iter_->key();
+      result_.bound_check_result = IterBoundCheck::kUnknown;
+      result_.value_prepared = false;
+    }
+  }
+
+  InternalIteratorBase<TValue>* iter_;
+  IterateResult result_;
+  bool valid_;
+};
+
+using IteratorWrapper = IteratorWrapperBase<Slice>;
+
+class Arena;
+// Return an empty iterator (yields nothing) allocated from arena.
+template <class TValue = Slice>
+extern InternalIteratorBase<TValue>* NewEmptyInternalIterator(Arena* arena);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/merging_iterator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/merging_iterator.cc
new file mode 100644
index 0000000000..0fa3fcd3eb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/merging_iterator.cc
@@ -0,0 +1,1725 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/merging_iterator.h"
+
+#include "db/arena_wrapped_db_iter.h"
+
+namespace ROCKSDB_NAMESPACE {
+// MergingIterator uses a min/max heap to combine data from point iterators.
+// Range tombstones can be added and keys covered by range tombstones will be
+// skipped.
+//
+// The following are implementation details and can be ignored by user.
+// For merging iterator to process range tombstones, it treats the start and end
+// keys of a range tombstone as two keys and put them into minHeap_ or maxHeap_
+// together with regular point keys. Each range tombstone is active only within
+// its internal key range [start_key, end_key). An `active_` set is used to
+// track levels that have an active range tombstone. Take forward scanning
+// for example. Level j is in active_ if its current range tombstone has its
+// start_key popped from minHeap_ and its end_key in minHeap_. If the top of
+// minHeap_ is a point key from level L, we can determine if the point key is
+// covered by any range tombstone by checking if there is an l <= L in active_.
+// The case of l == L also involves checking range tombstone's sequence number.
+//
+// The following (non-exhaustive) list of invariants are maintained by
+// MergingIterator during forward scanning. After each InternalIterator API,
+// i.e., Seek*() and Next(), and FindNextVisibleKey(), if minHeap_ is not empty:
+// (1) minHeap_.top().type == ITERATOR
+// (2) minHeap_.top()->key() is not covered by any range tombstone.
+//
+// After each call to SeekImpl() in addition to the functions mentioned above:
+// (3) For all level i and j <= i, range_tombstone_iters_[j].prev.end_key() <
+// children_[i].iter.key(). That is, range_tombstone_iters_[j] is at or before
+// the first range tombstone from level j with end_key() >
+// children_[i].iter.key().
+// (4) For all level i and j <= i, if j in active_, then
+// range_tombstone_iters_[j]->start_key() < children_[i].iter.key().
+// - When range_tombstone_iters_[j] is !Valid(), we consider its `prev` to be
+// the last range tombstone from that range tombstone iterator.
+// - When referring to range tombstone start/end keys, assume it is the value of
+// HeapItem::tombstone_pik. This value has op_type = kMaxValid, which makes
+// range tombstone keys have distinct values from point keys.
+//
+// Applicable class variables have their own (forward scanning) invariants
+// listed in the comments above their definition.
+class MergingIterator : public InternalIterator {
+ public:
+  MergingIterator(const InternalKeyComparator* comparator,
+                  InternalIterator** children, int n, bool is_arena_mode,
+                  bool prefix_seek_mode,
+                  const Slice* iterate_upper_bound = nullptr)
+      : is_arena_mode_(is_arena_mode),
+        prefix_seek_mode_(prefix_seek_mode),
+        direction_(kForward),
+        comparator_(comparator),
+        current_(nullptr),
+        minHeap_(MinHeapItemComparator(comparator_)),
+        pinned_iters_mgr_(nullptr),
+        iterate_upper_bound_(iterate_upper_bound) {
+    children_.resize(n);
+    for (int i = 0; i < n; i++) {
+      children_[i].level = i;
+      children_[i].iter.Set(children[i]);
+    }
+  }
+
+  void considerStatus(Status s) {
+    if (!s.ok() && status_.ok()) {
+      status_ = s;
+    }
+  }
+
+  virtual void AddIterator(InternalIterator* iter) {
+    children_.emplace_back(children_.size(), iter);
+    if (pinned_iters_mgr_) {
+      iter->SetPinnedItersMgr(pinned_iters_mgr_);
+    }
+    // Invalidate to ensure `Seek*()` is called to construct the heaps before
+    // use.
+    current_ = nullptr;
+  }
+
+  // There must be either no range tombstone iterator or the same number of
+  // range tombstone iterators as point iterators after all iters are added.
+  // The i-th added range tombstone iterator and the i-th point iterator
+  // must point to the same LSM level.
+  // Merging iterator takes ownership of `iter` and is responsible for freeing
+  // it. One exception to this is when a LevelIterator moves to a different SST
+  // file or when Iterator::Refresh() is called, the range tombstone iterator
+  // could be updated. In that case, this merging iterator is only responsible
+  // for freeing the new range tombstone iterator that it has pointers to in
+  // range_tombstone_iters_.
+  void AddRangeTombstoneIterator(TruncatedRangeDelIterator* iter) {
+    range_tombstone_iters_.emplace_back(iter);
+  }
+
+  // Called by MergingIteratorBuilder when all point iterators and range
+  // tombstone iterators are added. Initializes HeapItems for range tombstone
+  // iterators.
+  void Finish() {
+    if (!range_tombstone_iters_.empty()) {
+      assert(range_tombstone_iters_.size() == children_.size());
+      pinned_heap_item_.resize(range_tombstone_iters_.size());
+      for (size_t i = 0; i < range_tombstone_iters_.size(); ++i) {
+        pinned_heap_item_[i].level = i;
+        // Range tombstone end key is exclusive. If a point internal key has the
+        // same user key and sequence number as the start or end key of a range
+        // tombstone, the order will be start < end key < internal key with the
+        // following op_type change. This is helpful to ensure keys popped from
+        // heap are in expected order since range tombstone start/end keys will
+        // be distinct from point internal keys. Strictly speaking, this is only
+        // needed for tombstone end points that are truncated in
+        // TruncatedRangeDelIterator since untruncated tombstone end points
+        // always have kMaxSequenceNumber and kTypeRangeDeletion (see
+        // TruncatedRangeDelIterator::start_key()/end_key()).
+        pinned_heap_item_[i].tombstone_pik.type = kTypeMaxValid;
+      }
+    }
+  }
+
+  ~MergingIterator() override {
+    for (auto child : range_tombstone_iters_) {
+      delete child;
+    }
+
+    for (auto& child : children_) {
+      child.iter.DeleteIter(is_arena_mode_);
+    }
+    status_.PermitUncheckedError();
+  }
+
+  bool Valid() const override { return current_ != nullptr && status_.ok(); }
+
+  Status status() const override { return status_; }
+
+  // Add range_tombstone_iters_[level] into min heap.
+  // Updates active_ if the end key of a range tombstone is inserted.
+  // pinned_heap_items_[level].type is updated based on `start_key`.
+  //
+  // If range_tombstone_iters_[level] is after iterate_upper_bound_,
+  // it is removed from the heap.
+  // @param start_key specifies which end point of the range tombstone to add.
+  void InsertRangeTombstoneToMinHeap(size_t level, bool start_key = true,
+                                     bool replace_top = false) {
+    assert(!range_tombstone_iters_.empty() &&
+           range_tombstone_iters_[level]->Valid());
+    // Maintains Invariant(phi)
+    if (start_key) {
+      pinned_heap_item_[level].type = HeapItem::Type::DELETE_RANGE_START;
+      ParsedInternalKey pik = range_tombstone_iters_[level]->start_key();
+      // iterate_upper_bound does not have timestamp
+      if (iterate_upper_bound_ &&
+          comparator_->user_comparator()->CompareWithoutTimestamp(
+              pik.user_key, true /* a_has_ts */, *iterate_upper_bound_,
+              false /* b_has_ts */) >= 0) {
+        if (replace_top) {
+          // replace_top implies this range tombstone iterator is still in
+          // minHeap_ and at the top.
+          minHeap_.pop();
+        }
+        return;
+      }
+      pinned_heap_item_[level].SetTombstoneKey(std::move(pik));
+      // Checks Invariant(active_)
+      assert(active_.count(level) == 0);
+    } else {
+      // allow end key to go over upper bound (if present) since start key is
+      // before upper bound and the range tombstone could still cover a
+      // range before upper bound.
+      // Maintains Invariant(active_)
+      pinned_heap_item_[level].SetTombstoneKey(
+          range_tombstone_iters_[level]->end_key());
+      pinned_heap_item_[level].type = HeapItem::Type::DELETE_RANGE_END;
+      active_.insert(level);
+    }
+    if (replace_top) {
+      minHeap_.replace_top(&pinned_heap_item_[level]);
+    } else {
+      minHeap_.push(&pinned_heap_item_[level]);
+    }
+  }
+
+  // Add range_tombstone_iters_[level] into max heap.
+  // Updates active_ if the start key of a range tombstone is inserted.
+  // @param end_key specifies which end point of the range tombstone to add.
+  void InsertRangeTombstoneToMaxHeap(size_t level, bool end_key = true,
+                                     bool replace_top = false) {
+    assert(!range_tombstone_iters_.empty() &&
+           range_tombstone_iters_[level]->Valid());
+    if (end_key) {
+      pinned_heap_item_[level].SetTombstoneKey(
+          range_tombstone_iters_[level]->end_key());
+      pinned_heap_item_[level].type = HeapItem::Type::DELETE_RANGE_END;
+      assert(active_.count(level) == 0);
+    } else {
+      pinned_heap_item_[level].SetTombstoneKey(
+          range_tombstone_iters_[level]->start_key());
+      pinned_heap_item_[level].type = HeapItem::Type::DELETE_RANGE_START;
+      active_.insert(level);
+    }
+    if (replace_top) {
+      maxHeap_->replace_top(&pinned_heap_item_[level]);
+    } else {
+      maxHeap_->push(&pinned_heap_item_[level]);
+    }
+  }
+
+  // Remove HeapItems from top of minHeap_ that are of type DELETE_RANGE_START
+  // until minHeap_ is empty or the top of the minHeap_ is not of type
+  // DELETE_RANGE_START. Each such item means a range tombstone becomes active,
+  // so `active_` is updated accordingly.
+  void PopDeleteRangeStart() {
+    while (!minHeap_.empty() &&
+           minHeap_.top()->type == HeapItem::Type::DELETE_RANGE_START) {
+      TEST_SYNC_POINT_CALLBACK("MergeIterator::PopDeleteRangeStart", nullptr);
+      // Invariant(rti) holds since
+      // range_tombstone_iters_[minHeap_.top()->level] is still valid, and
+      // parameter `replace_top` is set to true here to ensure only one such
+      // HeapItem is in minHeap_.
+      InsertRangeTombstoneToMinHeap(
+          minHeap_.top()->level, false /* start_key */, true /* replace_top */);
+    }
+  }
+
+  // Remove HeapItems from top of maxHeap_ that are of type DELETE_RANGE_END
+  // until maxHeap_ is empty or the top of the maxHeap_ is not of type
+  // DELETE_RANGE_END. Each such item means a range tombstone becomes active,
+  // so `active_` is updated accordingly.
+  void PopDeleteRangeEnd() {
+    while (!maxHeap_->empty() &&
+           maxHeap_->top()->type == HeapItem::Type::DELETE_RANGE_END) {
+      // insert start key of this range tombstone and updates active_
+      InsertRangeTombstoneToMaxHeap(maxHeap_->top()->level, false /* end_key */,
+                                    true /* replace_top */);
+    }
+  }
+
+  void SeekToFirst() override {
+    ClearHeaps();
+    status_ = Status::OK();
+    for (auto& child : children_) {
+      child.iter.SeekToFirst();
+      AddToMinHeapOrCheckStatus(&child);
+    }
+
+    for (size_t i = 0; i < range_tombstone_iters_.size(); ++i) {
+      if (range_tombstone_iters_[i]) {
+        range_tombstone_iters_[i]->SeekToFirst();
+        if (range_tombstone_iters_[i]->Valid()) {
+          // It is possible to be invalid due to snapshots.
+          InsertRangeTombstoneToMinHeap(i);
+        }
+      }
+    }
+    FindNextVisibleKey();
+    direction_ = kForward;
+    current_ = CurrentForward();
+  }
+
+  void SeekToLast() override {
+    ClearHeaps();
+    InitMaxHeap();
+    status_ = Status::OK();
+    for (auto& child : children_) {
+      child.iter.SeekToLast();
+      AddToMaxHeapOrCheckStatus(&child);
+    }
+
+    for (size_t i = 0; i < range_tombstone_iters_.size(); ++i) {
+      if (range_tombstone_iters_[i]) {
+        range_tombstone_iters_[i]->SeekToLast();
+        if (range_tombstone_iters_[i]->Valid()) {
+          // It is possible to be invalid due to snapshots.
+          InsertRangeTombstoneToMaxHeap(i);
+        }
+      }
+    }
+    FindPrevVisibleKey();
+    direction_ = kReverse;
+    current_ = CurrentReverse();
+  }
+
+  // Position this merging iterator at the first key >= target (internal key).
+  // If range tombstones are present, keys covered by range tombstones are
+  // skipped, and this merging iter points to the first non-range-deleted key >=
+  // target after Seek(). If !Valid() and status().ok() then this iterator
+  // reaches the end.
+  //
+  // If range tombstones are present, cascading seeks may be called (an
+  // optimization adapted from Pebble https://github.com/cockroachdb/pebble).
+  // Roughly, if there is a range tombstone [start, end) that covers the
+  // target user key at level L, then this range tombstone must cover the range
+  // [target key, end) in all levels > L. So for all levels > L, we can pretend
+  // the target key is `end`. This optimization is applied at each level and
+  // hence the name "cascading seek".
+  void Seek(const Slice& target) override {
+    // Define LevelNextVisible(i, k) to be the first key >= k in level i that is
+    // not covered by any range tombstone.
+    // After SeekImpl(target, 0), invariants (3) and (4) hold.
+    // For all level i, target <= children_[i].iter.key() <= LevelNextVisible(i,
+    // target). By the contract of FindNextVisibleKey(), Invariants (1)-(4)
+    // holds after this call, and minHeap_.top().iter points to the
+    // first key >= target among children_ that is not covered by any range
+    // tombstone.
+    SeekImpl(target);
+    FindNextVisibleKey();
+
+    direction_ = kForward;
+    {
+      PERF_TIMER_GUARD(seek_min_heap_time);
+      current_ = CurrentForward();
+    }
+  }
+
+  void SeekForPrev(const Slice& target) override {
+    assert(range_tombstone_iters_.empty() ||
+           range_tombstone_iters_.size() == children_.size());
+    SeekForPrevImpl(target);
+    FindPrevVisibleKey();
+
+    direction_ = kReverse;
+    {
+      PERF_TIMER_GUARD(seek_max_heap_time);
+      current_ = CurrentReverse();
+    }
+  }
+
+  void Next() override {
+    assert(Valid());
+    // Ensure that all children are positioned after key().
+    // If we are moving in the forward direction, it is already
+    // true for all the non-current children since current_ is
+    // the smallest child and key() == current_->key().
+    if (direction_ != kForward) {
+      // The loop advanced all non-current children to be > key() so current_
+      // should still be strictly the smallest key.
+      SwitchToForward();
+    }
+
+    // For the heap modifications below to be correct, current_ must be the
+    // current top of the heap.
+    assert(current_ == CurrentForward());
+    // as the current points to the current record. move the iterator forward.
+    current_->Next();
+    if (current_->Valid()) {
+      // current is still valid after the Next() call above.  Call
+      // replace_top() to restore the heap property.  When the same child
+      // iterator yields a sequence of keys, this is cheap.
+      assert(current_->status().ok());
+      minHeap_.replace_top(minHeap_.top());
+    } else {
+      // current stopped being valid, remove it from the heap.
+      considerStatus(current_->status());
+      minHeap_.pop();
+    }
+    // Invariants (3) and (4) hold when after advancing current_.
+    // Let k be the smallest key among children_[i].iter.key().
+    // k <= children_[i].iter.key() <= LevelNextVisible(i, k) holds for all
+    // level i. After FindNextVisible(), Invariants (1)-(4) hold and
+    // minHeap_.top()->key() is the first key >= k from any children_ that is
+    // not covered by any range tombstone.
+    FindNextVisibleKey();
+    current_ = CurrentForward();
+  }
+
+  bool NextAndGetResult(IterateResult* result) override {
+    Next();
+    bool is_valid = Valid();
+    if (is_valid) {
+      result->key = key();
+      result->bound_check_result = UpperBoundCheckResult();
+      result->value_prepared = current_->IsValuePrepared();
+    }
+    return is_valid;
+  }
+
+  void Prev() override {
+    assert(Valid());
+    // Ensure that all children are positioned before key().
+    // If we are moving in the reverse direction, it is already
+    // true for all the non-current children since current_ is
+    // the largest child and key() == current_->key().
+    if (direction_ != kReverse) {
+      // Otherwise, retreat the non-current children.  We retreat current_
+      // just after the if-block.
+      SwitchToBackward();
+    }
+
+    // For the heap modifications below to be correct, current_ must be the
+    // current top of the heap.
+    assert(current_ == CurrentReverse());
+    current_->Prev();
+    if (current_->Valid()) {
+      // current is still valid after the Prev() call above.  Call
+      // replace_top() to restore the heap property.  When the same child
+      // iterator yields a sequence of keys, this is cheap.
+      assert(current_->status().ok());
+      maxHeap_->replace_top(maxHeap_->top());
+    } else {
+      // current stopped being valid, remove it from the heap.
+      considerStatus(current_->status());
+      maxHeap_->pop();
+    }
+    FindPrevVisibleKey();
+    current_ = CurrentReverse();
+  }
+
+  Slice key() const override {
+    assert(Valid());
+    return current_->key();
+  }
+
+  Slice value() const override {
+    assert(Valid());
+    return current_->value();
+  }
+
+  bool PrepareValue() override {
+    assert(Valid());
+    if (current_->PrepareValue()) {
+      return true;
+    }
+
+    considerStatus(current_->status());
+    assert(!status_.ok());
+    return false;
+  }
+
+  // Here we simply relay MayBeOutOfLowerBound/MayBeOutOfUpperBound result
+  // from current child iterator. Potentially as long as one of child iterator
+  // report out of bound is not possible, we know current key is within bound.
+  bool MayBeOutOfLowerBound() override {
+    assert(Valid());
+    return current_->MayBeOutOfLowerBound();
+  }
+
+  IterBoundCheck UpperBoundCheckResult() override {
+    assert(Valid());
+    return current_->UpperBoundCheckResult();
+  }
+
+  void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
+    pinned_iters_mgr_ = pinned_iters_mgr;
+    for (auto& child : children_) {
+      child.iter.SetPinnedItersMgr(pinned_iters_mgr);
+    }
+  }
+
+  bool IsKeyPinned() const override {
+    assert(Valid());
+    return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
+           current_->IsKeyPinned();
+  }
+
+  bool IsValuePinned() const override {
+    assert(Valid());
+    return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
+           current_->IsValuePinned();
+  }
+
+ private:
+  // Represents an element in the min/max heap. Each HeapItem corresponds to a
+  // point iterator or a range tombstone iterator, differentiated by
+  // HeapItem::type.
+  struct HeapItem {
+    HeapItem() = default;
+
+    // corresponding point iterator
+    IteratorWrapper iter;
+    size_t level = 0;
+    // corresponding range tombstone iterator's start or end key value
+    // depending on value of `type`.
+    ParsedInternalKey tombstone_pik;
+    // Will be overwritten before use, initialize here so compiler does not
+    // complain.
+    enum class Type { ITERATOR, DELETE_RANGE_START, DELETE_RANGE_END };
+    Type type = Type::ITERATOR;
+
+    explicit HeapItem(size_t _level, InternalIteratorBase<Slice>* _iter)
+        : level(_level), type(Type::ITERATOR) {
+      iter.Set(_iter);
+    }
+
+    void SetTombstoneKey(ParsedInternalKey&& pik) {
+      // op_type is already initialized in MergingIterator::Finish().
+      tombstone_pik.user_key = pik.user_key;
+      tombstone_pik.sequence = pik.sequence;
+    }
+  };
+
+  class MinHeapItemComparator {
+   public:
+    explicit MinHeapItemComparator(const InternalKeyComparator* comparator)
+        : comparator_(comparator) {}
+
+    bool operator()(HeapItem* a, HeapItem* b) const {
+      if (LIKELY(a->type == HeapItem::Type::ITERATOR)) {
+        if (LIKELY(b->type == HeapItem::Type::ITERATOR)) {
+          return comparator_->Compare(a->iter.key(), b->iter.key()) > 0;
+        } else {
+          return comparator_->Compare(a->iter.key(), b->tombstone_pik) > 0;
+        }
+      } else {
+        if (LIKELY(b->type == HeapItem::Type::ITERATOR)) {
+          return comparator_->Compare(a->tombstone_pik, b->iter.key()) > 0;
+        } else {
+          return comparator_->Compare(a->tombstone_pik, b->tombstone_pik) > 0;
+        }
+      }
+    }
+
+   private:
+    const InternalKeyComparator* comparator_;
+  };
+
+  class MaxHeapItemComparator {
+   public:
+    explicit MaxHeapItemComparator(const InternalKeyComparator* comparator)
+        : comparator_(comparator) {}
+
+    bool operator()(HeapItem* a, HeapItem* b) const {
+      if (LIKELY(a->type == HeapItem::Type::ITERATOR)) {
+        if (LIKELY(b->type == HeapItem::Type::ITERATOR)) {
+          return comparator_->Compare(a->iter.key(), b->iter.key()) < 0;
+        } else {
+          return comparator_->Compare(a->iter.key(), b->tombstone_pik) < 0;
+        }
+      } else {
+        if (LIKELY(b->type == HeapItem::Type::ITERATOR)) {
+          return comparator_->Compare(a->tombstone_pik, b->iter.key()) < 0;
+        } else {
+          return comparator_->Compare(a->tombstone_pik, b->tombstone_pik) < 0;
+        }
+      }
+    }
+
+   private:
+    const InternalKeyComparator* comparator_;
+  };
+
+  using MergerMinIterHeap = BinaryHeap<HeapItem*, MinHeapItemComparator>;
+  using MergerMaxIterHeap = BinaryHeap<HeapItem*, MaxHeapItemComparator>;
+
+  friend class MergeIteratorBuilder;
+  // Clears heaps for both directions, used when changing direction or seeking
+  void ClearHeaps(bool clear_active = true);
+  // Ensures that maxHeap_ is initialized when starting to go in the reverse
+  // direction
+  void InitMaxHeap();
+  // Advance this merging iterator until the current key (minHeap_.top()) is
+  // from a point iterator and is not covered by any range tombstone,
+  // or that there is no more keys (heap is empty). SeekImpl() may be called
+  // to seek to the end of a range tombstone as an optimization.
+  void FindNextVisibleKey();
+  void FindPrevVisibleKey();
+
+  // Advance this merging iterators to the first key >= `target` for all
+  // components from levels >= starting_level. All iterators before
+  // starting_level are untouched.
+  //
+  // @param range_tombstone_reseek Whether target is some range tombstone
+  // end, i.e., whether this SeekImpl() call is a part of a "cascading seek".
+  // This is used only for recoding relevant perf_context.
+  void SeekImpl(const Slice& target, size_t starting_level = 0,
+                bool range_tombstone_reseek = false);
+
+  // Seek to fist key <= target key (internal key) for
+  // children_[starting_level:].
+  void SeekForPrevImpl(const Slice& target, size_t starting_level = 0,
+                       bool range_tombstone_reseek = false);
+
+  bool is_arena_mode_;
+  bool prefix_seek_mode_;
+  // Which direction is the iterator moving?
+  enum Direction : uint8_t { kForward, kReverse };
+  Direction direction_;
+  const InternalKeyComparator* comparator_;
+  // HeapItem for all child point iterators.
+  // Invariant(children_): children_[i] is in minHeap_ iff
+  // children_[i].iter.Valid(), and at most one children_[i] is in minHeap_.
+  // TODO: We could use an autovector with a larger reserved size.
+  std::vector<HeapItem> children_;
+  // HeapItem for range tombstone start and end keys.
+  // pinned_heap_item_[i] corresponds to range_tombstone_iters_[i].
+  // Invariant(phi): If range_tombstone_iters_[i]->Valid(),
+  // pinned_heap_item_[i].tombstone_pik is equal to
+  // range_tombstone_iters_[i]->start_key() when
+  // pinned_heap_item_[i].type is DELETE_RANGE_START and
+  // range_tombstone_iters_[i]->end_key() when
+  // pinned_heap_item_[i].type is DELETE_RANGE_END (ignoring op_type which is
+  // kMaxValid for all pinned_heap_item_.tombstone_pik).
+  // pinned_heap_item_[i].type is either DELETE_RANGE_START or DELETE_RANGE_END.
+  std::vector<HeapItem> pinned_heap_item_;
+  // range_tombstone_iters_[i] contains range tombstones in the sorted run that
+  // corresponds to children_[i]. range_tombstone_iters_.empty() means not
+  // handling range tombstones in merging iterator. range_tombstone_iters_[i] ==
+  // nullptr means the sorted run of children_[i] does not have range
+  // tombstones.
+  // Invariant(rti): pinned_heap_item_[i] is in minHeap_ iff
+  // range_tombstone_iters_[i]->Valid() and at most one pinned_heap_item_[i] is
+  // in minHeap_.
+  std::vector<TruncatedRangeDelIterator*> range_tombstone_iters_;
+
+  // Levels (indices into range_tombstone_iters_/children_ ) that currently have
+  // "active" range tombstones. See comments above MergingIterator for meaning
+  // of "active".
+  // Invariant(active_): i is in active_ iff range_tombstone_iters_[i]->Valid()
+  // and pinned_heap_item_[i].type == DELETE_RANGE_END.
+  std::set<size_t> active_;
+
+  bool SkipNextDeleted();
+
+  bool SkipPrevDeleted();
+
+  // Invariant: at the end of each InternalIterator API,
+  // current_ points to minHeap_.top().iter (maxHeap_ if backward scanning)
+  // or nullptr if no child iterator is valid.
+  // This follows from that current_ = CurrentForward()/CurrentReverse() is
+  // called at the end of each InternalIterator API.
+  IteratorWrapper* current_;
+  // If any of the children have non-ok status, this is one of them.
+  Status status_;
+  // Invariant: min heap property is maintained (parent is always <= child).
+  // This holds by using only BinaryHeap APIs to modify heap. One
+  // exception is to modify heap top item directly (by caller iter->Next()), and
+  // it should be followed by a call to replace_top() or pop().
+  MergerMinIterHeap minHeap_;
+
+  // Max heap is used for reverse iteration, which is way less common than
+  // forward. Lazily initialize it to save memory.
+  std::unique_ptr<MergerMaxIterHeap> maxHeap_;
+  PinnedIteratorsManager* pinned_iters_mgr_;
+
+  // Used to bound range tombstones. For point keys, DBIter and SSTable iterator
+  // take care of boundary checking.
+  const Slice* iterate_upper_bound_;
+
+  // In forward direction, process a child that is not in the min heap.
+  // If valid, add to the min heap. Otherwise, check status.
+  void AddToMinHeapOrCheckStatus(HeapItem*);
+
+  // In backward direction, process a child that is not in the max heap.
+  // If valid, add to the min heap. Otherwise, check status.
+  void AddToMaxHeapOrCheckStatus(HeapItem*);
+
+  void SwitchToForward();
+
+  // Switch the direction from forward to backward without changing the
+  // position. Iterator should still be valid.
+  void SwitchToBackward();
+
+  IteratorWrapper* CurrentForward() const {
+    assert(direction_ == kForward);
+    assert(minHeap_.empty() ||
+           minHeap_.top()->type == HeapItem::Type::ITERATOR);
+    return !minHeap_.empty() ? &minHeap_.top()->iter : nullptr;
+  }
+
+  IteratorWrapper* CurrentReverse() const {
+    assert(direction_ == kReverse);
+    assert(maxHeap_);
+    assert(maxHeap_->empty() ||
+           maxHeap_->top()->type == HeapItem::Type::ITERATOR);
+    return !maxHeap_->empty() ? &maxHeap_->top()->iter : nullptr;
+  }
+};
+
+// Pre-condition:
+// - Invariants (3) and (4) hold for i < starting_level
+// - For i < starting_level, range_tombstone_iters_[i].prev.end_key() <
+// `target`.
+// - For i < starting_level, if i in active_, then
+// range_tombstone_iters_[i]->start_key() < `target`.
+//
+// Post-condition:
+// - Invariants (3) and (4) hold for all level i.
+// - (*) target <= children_[i].iter.key() <= LevelNextVisible(i, target)
+// for i >= starting_level
+// - (**) target < pinned_heap_item_[i].tombstone_pik if
+// range_tombstone_iters_[i].Valid() for i >= starting_level
+//
+// Proof sketch:
+// Invariant (3) holds for all level i.
+// For j <= i < starting_level, it follows from Pre-condition that (3) holds
+// and that SeekImpl(-, starting_level) does not update children_[i] or
+// range_tombstone_iters_[j].
+// For j < starting_level and i >= starting_level, it follows from
+// - Pre-condition that range_tombstone_iters_[j].prev.end_key() < `target`
+// - range_tombstone_iters_[j] is not updated in SeekImpl(), and
+// - children_[i].iter.Seek(current_search_key) is called with
+// current_search_key >= target (shown below).
+//   When current_search_key is updated, it is updated to some
+//   range_tombstone_iter->end_key() after
+//   range_tombstone_iter->SeekInternalKey(current_search_key) was called. So
+//   current_search_key increases if updated and >= target.
+// For starting_level <= j <= i:
+// children_[i].iter.Seek(k1) and range_tombstone_iters_[j]->SeekInternalKey(k2)
+// are called in SeekImpl(). Seek(k1) positions children_[i] at the first key >=
+// k1 from level i. SeekInternalKey(k2) positions range_tombstone_iters_[j] at
+// the first range tombstone from level j with end_key() > k2. It suffices to
+// show that k1 >= k2. Since k1 and k2 are values of current_search_key where
+// k1 = k2 or k1 is value of a later current_search_key than k2, so k1 >= k2.
+//
+// Invariant (4) holds for all level >= 0.
+// By Pre-condition Invariant (4) holds for i < starting_level.
+// Since children_[i], range_tombstone_iters_[i] and contents of active_ for
+// i < starting_level do not change (4) holds for j <= i < starting_level.
+// By Pre-condition: for all j < starting_level, if j in active_, then
+// range_tombstone_iters_[j]->start_key() < target. For i >= starting_level,
+// children_[i].iter.Seek(k) is called for k >= target. So
+// children_[i].iter.key() >= target > range_tombstone_iters_[j]->start_key()
+// for j < starting_level and i >= starting_level. So invariant (4) holds for
+// j < starting_level and i >= starting_level.
+// For starting_level <= j <= i, j is added to active_ only if
+// - range_tombstone_iters_[j]->SeekInternalKey(k1) was called
+// - range_tombstone_iters_[j]->start_key() <= k1
+// Since children_[i].iter.Seek(k2) is called for some k2 >= k1 and for all
+// starting_level <= j <= i, (4) also holds for all starting_level <= j <= i.
+//
+// Post-condition (*): target <= children_[i].iter.key() <= LevelNextVisible(i,
+// target) for i >= starting_level.
+// target <= children_[i].iter.key() follows from that Seek() is called on some
+// current_search_key >= target for children_[i].iter. If current_search_key
+// is updated from k1 to k2 when level = i, we show that the range [k1, k2) is
+// not visible for children_[j] for any j > i. When current_search_key is
+// updated from k1 to k2,
+//  - range_tombstone_iters_[i]->SeekInternalKey(k1) was called
+//  - range_tombstone_iters_[i]->Valid()
+//  - range_tombstone_iters_[i]->start_key().user_key <= k1.user_key
+//  - k2 = range_tombstone_iters_[i]->end_key()
+// We assume that range_tombstone_iters_[i]->start_key() has a higher sequence
+// number compared to any key from levels > i that has the same user key. So no
+// point key from levels > i in range [k1, k2) is visible. So
+// children_[i].iter.key() <= LevelNextVisible(i, target).
+//
+// Post-condition (**) target < pinned_heap_item_[i].tombstone_pik for i >=
+// starting_level if range_tombstone_iters_[i].Valid(). This follows from that
+// SeekInternalKey() being called for each range_tombstone_iters_ with some key
+// >= `target` and that we pick start/end key that is > `target` to insert to
+// minHeap_.
+void MergingIterator::SeekImpl(const Slice& target, size_t starting_level,
+                               bool range_tombstone_reseek) {
+  // active range tombstones before `starting_level` remain active
+  ClearHeaps(false /* clear_active */);
+  ParsedInternalKey pik;
+  if (!range_tombstone_iters_.empty()) {
+    // pik is only used in InsertRangeTombstoneToMinHeap().
+    ParseInternalKey(target, &pik, false).PermitUncheckedError();
+  }
+
+  // TODO: perhaps we could save some upheap cost by add all child iters first
+  //  and then do a single heapify.
+  // Invariant(children_) for level < starting_level
+  for (size_t level = 0; level < starting_level; ++level) {
+    PERF_TIMER_GUARD(seek_min_heap_time);
+    AddToMinHeapOrCheckStatus(&children_[level]);
+  }
+  if (!range_tombstone_iters_.empty()) {
+    // Add range tombstones from levels < starting_level. We can insert from
+    // pinned_heap_item_ for the following reasons:
+    // - pinned_heap_item_[level] is in minHeap_ iff
+    // range_tombstone_iters[level]->Valid().
+    // - If `level` is in active_, then range_tombstone_iters_[level]->Valid()
+    // and pinned_heap_item_[level] is of type RANGE_DELETION_END.
+    for (size_t level = 0; level < starting_level; ++level) {
+      // Restores Invariants(rti), (phi) and (active_) for level <
+      // starting_level
+      if (range_tombstone_iters_[level] &&
+          range_tombstone_iters_[level]->Valid()) {
+        // use an iterator on active_ if performance becomes an issue here
+        if (active_.count(level) > 0) {
+          assert(pinned_heap_item_[level].type ==
+                 HeapItem::Type::DELETE_RANGE_END);
+          // if it was active, then start key must be within upper_bound,
+          // so we can add to minHeap_ directly.
+          minHeap_.push(&pinned_heap_item_[level]);
+        } else {
+          assert(pinned_heap_item_[level].type ==
+                 HeapItem::Type::DELETE_RANGE_START);
+          // this takes care of checking iterate_upper_bound, but with an extra
+          // key comparison if range_tombstone_iters_[level] was already out of
+          // bound. Consider using a new HeapItem type or some flag to remember
+          // boundary checking result.
+          InsertRangeTombstoneToMinHeap(level);
+        }
+      } else {
+        assert(!active_.count(level));
+      }
+    }
+    // levels >= starting_level will be reseeked below, so clearing their active
+    // state here.
+    active_.erase(active_.lower_bound(starting_level), active_.end());
+  }
+
+  status_ = Status::OK();
+  IterKey current_search_key;
+  current_search_key.SetInternalKey(target, false /* copy */);
+  // Seek target might change to some range tombstone end key, so
+  // we need to remember them for async requests.
+  // (level, target) pairs
+  autovector<std::pair<size_t, std::string>> prefetched_target;
+  for (auto level = starting_level; level < children_.size(); ++level) {
+    {
+      PERF_TIMER_GUARD(seek_child_seek_time);
+      children_[level].iter.Seek(current_search_key.GetInternalKey());
+    }
+
+    PERF_COUNTER_ADD(seek_child_seek_count, 1);
+
+    if (!range_tombstone_iters_.empty()) {
+      if (range_tombstone_reseek) {
+        // This seek is to some range tombstone end key.
+        // Should only happen when there are range tombstones.
+        PERF_COUNTER_ADD(internal_range_del_reseek_count, 1);
+      }
+      if (children_[level].iter.status().IsTryAgain()) {
+        prefetched_target.emplace_back(
+            level, current_search_key.GetInternalKey().ToString());
+      }
+      auto range_tombstone_iter = range_tombstone_iters_[level];
+      if (range_tombstone_iter) {
+        range_tombstone_iter->SeekInternalKey(
+            current_search_key.GetInternalKey());
+        // Invariants (rti) and (phi)
+        if (range_tombstone_iter->Valid()) {
+          // If range tombstone starts after `current_search_key`,
+          // we should insert start key to heap as the range tombstone is not
+          // active yet.
+          InsertRangeTombstoneToMinHeap(
+              level, comparator_->Compare(range_tombstone_iter->start_key(),
+                                          pik) > 0 /* start_key */);
+          // current_search_key < end_key guaranteed by the SeekInternalKey()
+          // and Valid() calls above. Here we only need to compare user_key
+          // since if target.user_key ==
+          // range_tombstone_iter->start_key().user_key and target <
+          // range_tombstone_iter->start_key(), no older level would have any
+          // key in range [target, range_tombstone_iter->start_key()], so no
+          // keys in range [target, range_tombstone_iter->end_key()) from older
+          // level would be visible. So it is safe to seek to
+          // range_tombstone_iter->end_key().
+          //
+          // TODO: range_tombstone_iter->Seek() finds the max covering
+          //  sequence number, can make it cheaper by not looking for max.
+          if (comparator_->user_comparator()->Compare(
+                  range_tombstone_iter->start_key().user_key,
+                  current_search_key.GetUserKey()) <= 0) {
+            range_tombstone_reseek = true;
+            // Note that for prefix seek case, it is possible that the prefix
+            // is not the same as the original target, it should not affect
+            // correctness. Besides, in most cases, range tombstone start and
+            // end key should have the same prefix?
+            current_search_key.SetInternalKey(range_tombstone_iter->end_key());
+          }
+        }
+      }
+    }
+    // child.iter.status() is set to Status::TryAgain indicating asynchronous
+    // request for retrieval of data blocks has been submitted. So it should
+    // return at this point and Seek should be called again to retrieve the
+    // requested block and add the child to min heap.
+    if (children_[level].iter.status().IsTryAgain()) {
+      continue;
+    }
+    {
+      // Strictly, we timed slightly more than min heap operation,
+      // but these operations are very cheap.
+      PERF_TIMER_GUARD(seek_min_heap_time);
+      AddToMinHeapOrCheckStatus(&children_[level]);
+    }
+  }
+
+  if (range_tombstone_iters_.empty()) {
+    for (auto& child : children_) {
+      if (child.iter.status().IsTryAgain()) {
+        child.iter.Seek(target);
+        {
+          PERF_TIMER_GUARD(seek_min_heap_time);
+          AddToMinHeapOrCheckStatus(&child);
+        }
+        PERF_COUNTER_ADD(number_async_seek, 1);
+      }
+    }
+  } else {
+    for (auto& prefetch : prefetched_target) {
+      // (level, target) pairs
+      children_[prefetch.first].iter.Seek(prefetch.second);
+      {
+        PERF_TIMER_GUARD(seek_min_heap_time);
+        AddToMinHeapOrCheckStatus(&children_[prefetch.first]);
+      }
+      PERF_COUNTER_ADD(number_async_seek, 1);
+    }
+  }
+}
+
+// Returns true iff the current key (min heap top) should not be returned
+// to user (of the merging iterator). This can be because the current key
+// is deleted by some range tombstone, the current key is some fake file
+// boundary sentinel key, or the current key is an end point of a range
+// tombstone. Advance the iterator at heap top if needed. Heap order is restored
+// and `active_` is updated accordingly.
+// See FindNextVisibleKey() for more detail on internal implementation
+// of advancing child iters.
+// When false is returned, if minHeap is not empty, then minHeap_.top().type
+// == ITERATOR
+//
+// REQUIRES:
+// - min heap is currently not empty, and iter is in kForward direction.
+// - minHeap_ top is not DELETE_RANGE_START (so that `active_` is current).
+bool MergingIterator::SkipNextDeleted() {
+  // 3 types of keys:
+  // - point key
+  // - file boundary sentinel keys
+  // - range deletion end key
+  auto current = minHeap_.top();
+  if (current->type == HeapItem::Type::DELETE_RANGE_END) {
+    // Invariant(active_): range_tombstone_iters_[current->level] is about to
+    // become !Valid() or that its start key is going to be added to minHeap_.
+    active_.erase(current->level);
+    assert(range_tombstone_iters_[current->level] &&
+           range_tombstone_iters_[current->level]->Valid());
+    range_tombstone_iters_[current->level]->Next();
+    // Maintain Invariants (rti) and (phi)
+    if (range_tombstone_iters_[current->level]->Valid()) {
+      InsertRangeTombstoneToMinHeap(current->level, true /* start_key */,
+                                    true /* replace_top */);
+    } else {
+      minHeap_.pop();
+    }
+    return true /* current key deleted */;
+  }
+  if (current->iter.IsDeleteRangeSentinelKey()) {
+    // If the file boundary is defined by a range deletion, the range
+    // tombstone's end key must come before this sentinel key (see op_type in
+    // SetTombstoneKey()).
+    assert(ExtractValueType(current->iter.key()) != kTypeRangeDeletion ||
+           active_.count(current->level) == 0);
+    // When entering a new file, range tombstone iter from the old file is
+    // freed, but the last key from that range tombstone iter may still be in
+    // the heap. We need to ensure the data underlying its corresponding key
+    // Slice is still alive. We do so by popping the range tombstone key from
+    // heap before calling iter->Next(). Technically, this change is not needed:
+    // if there is a range tombstone end key that is after file boundary
+    // sentinel key in minHeap_, the range tombstone end key must have been
+    // truncated at file boundary. The underlying data of the range tombstone
+    // end key Slice is the SST file's largest internal key stored as file
+    // metadata in Version. However, since there are too many implicit
+    // assumptions made, it is safer to just ensure range tombstone iter is
+    // still alive.
+    minHeap_.pop();
+    // Remove last SST file's range tombstone end key if there is one.
+    // This means file boundary is before range tombstone end key,
+    // which could happen when a range tombstone and a user key
+    // straddle two SST files. Note that in TruncatedRangeDelIterator
+    // constructor, parsed_largest.sequence is decremented 1 in this case.
+    // Maintains Invariant(rti) that at most one
+    // pinned_heap_item_[current->level] is in minHeap_.
+    if (range_tombstone_iters_[current->level] &&
+        range_tombstone_iters_[current->level]->Valid()) {
+      if (!minHeap_.empty() && minHeap_.top()->level == current->level) {
+        assert(minHeap_.top()->type == HeapItem::Type::DELETE_RANGE_END);
+        minHeap_.pop();
+        // Invariant(active_): we are about to enter a new SST file with new
+        // range_tombstone_iters[current->level]. Either it is !Valid() or its
+        // start key is going to be added to minHeap_.
+        active_.erase(current->level);
+      } else {
+        // range tombstone is still valid, but it is not on heap.
+        // This should only happen if the range tombstone is over iterator
+        // upper bound.
+        assert(iterate_upper_bound_ &&
+               comparator_->user_comparator()->CompareWithoutTimestamp(
+                   range_tombstone_iters_[current->level]->start_key().user_key,
+                   true /* a_has_ts */, *iterate_upper_bound_,
+                   false /* b_has_ts */) >= 0);
+      }
+    }
+    // LevelIterator enters a new SST file
+    current->iter.Next();
+    // Invariant(children_): current is popped from heap and added back only if
+    // it is valid
+    if (current->iter.Valid()) {
+      assert(current->iter.status().ok());
+      minHeap_.push(current);
+    }
+    // Invariants (rti) and (phi)
+    if (range_tombstone_iters_[current->level] &&
+        range_tombstone_iters_[current->level]->Valid()) {
+      InsertRangeTombstoneToMinHeap(current->level);
+    }
+    return true /* current key deleted */;
+  }
+  assert(current->type == HeapItem::Type::ITERATOR);
+  // Point key case: check active_ for range tombstone coverage.
+  ParsedInternalKey pik;
+  ParseInternalKey(current->iter.key(), &pik, false).PermitUncheckedError();
+  if (!active_.empty()) {
+    auto i = *active_.begin();
+    if (i < current->level) {
+      // range tombstone is from a newer level, definitely covers
+      assert(comparator_->Compare(range_tombstone_iters_[i]->start_key(),
+                                  pik) <= 0);
+      assert(comparator_->Compare(pik, range_tombstone_iters_[i]->end_key()) <
+             0);
+      std::string target;
+      AppendInternalKey(&target, range_tombstone_iters_[i]->end_key());
+      SeekImpl(target, current->level, true);
+      return true /* current key deleted */;
+    } else if (i == current->level) {
+      // range tombstone is from the same level as current, check sequence
+      // number. By `active_` we know current key is between start key and end
+      // key.
+      assert(comparator_->Compare(range_tombstone_iters_[i]->start_key(),
+                                  pik) <= 0);
+      assert(comparator_->Compare(pik, range_tombstone_iters_[i]->end_key()) <
+             0);
+      if (pik.sequence < range_tombstone_iters_[current->level]->seq()) {
+        // covered by range tombstone
+        current->iter.Next();
+        // Invariant (children_)
+        if (current->iter.Valid()) {
+          minHeap_.replace_top(current);
+        } else {
+          minHeap_.pop();
+        }
+        return true /* current key deleted */;
+      } else {
+        return false /* current key not deleted */;
+      }
+    } else {
+      return false /* current key not deleted */;
+      // range tombstone from an older sorted run with current key < end key.
+      // current key is not deleted and the older sorted run will have its range
+      // tombstone updated when the range tombstone's end key are popped from
+      // minHeap_.
+    }
+  }
+  // we can reach here only if active_ is empty
+  assert(active_.empty());
+  assert(minHeap_.top()->type == HeapItem::Type::ITERATOR);
+  return false /* current key not deleted */;
+}
+
+void MergingIterator::SeekForPrevImpl(const Slice& target,
+                                      size_t starting_level,
+                                      bool range_tombstone_reseek) {
+  // active range tombstones before `starting_level` remain active
+  ClearHeaps(false /* clear_active */);
+  InitMaxHeap();
+  ParsedInternalKey pik;
+  if (!range_tombstone_iters_.empty()) {
+    ParseInternalKey(target, &pik, false).PermitUncheckedError();
+  }
+  for (size_t level = 0; level < starting_level; ++level) {
+    PERF_TIMER_GUARD(seek_max_heap_time);
+    AddToMaxHeapOrCheckStatus(&children_[level]);
+  }
+  if (!range_tombstone_iters_.empty()) {
+    // Add range tombstones before starting_level.
+    for (size_t level = 0; level < starting_level; ++level) {
+      if (range_tombstone_iters_[level] &&
+          range_tombstone_iters_[level]->Valid()) {
+        assert(static_cast<bool>(active_.count(level)) ==
+               (pinned_heap_item_[level].type ==
+                HeapItem::Type::DELETE_RANGE_START));
+        maxHeap_->push(&pinned_heap_item_[level]);
+      } else {
+        assert(!active_.count(level));
+      }
+    }
+    // levels >= starting_level will be reseeked below,
+    active_.erase(active_.lower_bound(starting_level), active_.end());
+  }
+
+  status_ = Status::OK();
+  IterKey current_search_key;
+  current_search_key.SetInternalKey(target, false /* copy */);
+  // Seek target might change to some range tombstone end key, so
+  // we need to remember them for async requests.
+  // (level, target) pairs
+  autovector<std::pair<size_t, std::string>> prefetched_target;
+  for (auto level = starting_level; level < children_.size(); ++level) {
+    {
+      PERF_TIMER_GUARD(seek_child_seek_time);
+      children_[level].iter.SeekForPrev(current_search_key.GetInternalKey());
+    }
+
+    PERF_COUNTER_ADD(seek_child_seek_count, 1);
+
+    if (!range_tombstone_iters_.empty()) {
+      if (range_tombstone_reseek) {
+        // This seek is to some range tombstone end key.
+        // Should only happen when there are range tombstones.
+        PERF_COUNTER_ADD(internal_range_del_reseek_count, 1);
+      }
+      if (children_[level].iter.status().IsTryAgain()) {
+        prefetched_target.emplace_back(
+            level, current_search_key.GetInternalKey().ToString());
+      }
+      auto range_tombstone_iter = range_tombstone_iters_[level];
+      if (range_tombstone_iter) {
+        range_tombstone_iter->SeekForPrev(current_search_key.GetUserKey());
+        if (range_tombstone_iter->Valid()) {
+          InsertRangeTombstoneToMaxHeap(
+              level, comparator_->Compare(range_tombstone_iter->end_key(),
+                                          pik) <= 0 /* end_key */);
+          // start key <= current_search_key guaranteed by the Seek() call above
+          // Only interested in user key coverage since older sorted runs must
+          // have smaller sequence numbers than this tombstone.
+          if (comparator_->user_comparator()->Compare(
+                  current_search_key.GetUserKey(),
+                  range_tombstone_iter->end_key().user_key) < 0) {
+            range_tombstone_reseek = true;
+            current_search_key.SetInternalKey(
+                range_tombstone_iter->start_key().user_key, kMaxSequenceNumber,
+                kValueTypeForSeekForPrev);
+          }
+        }
+      }
+    }
+    // child.iter.status() is set to Status::TryAgain indicating asynchronous
+    // request for retrieval of data blocks has been submitted. So it should
+    // return at this point and Seek should be called again to retrieve the
+    // requested block and add the child to min heap.
+    if (children_[level].iter.status().IsTryAgain()) {
+      continue;
+    }
+    {
+      // Strictly, we timed slightly more than min heap operation,
+      // but these operations are very cheap.
+      PERF_TIMER_GUARD(seek_max_heap_time);
+      AddToMaxHeapOrCheckStatus(&children_[level]);
+    }
+  }
+
+  if (range_tombstone_iters_.empty()) {
+    for (auto& child : children_) {
+      if (child.iter.status().IsTryAgain()) {
+        child.iter.SeekForPrev(target);
+        {
+          PERF_TIMER_GUARD(seek_min_heap_time);
+          AddToMaxHeapOrCheckStatus(&child);
+        }
+        PERF_COUNTER_ADD(number_async_seek, 1);
+      }
+    }
+  } else {
+    for (auto& prefetch : prefetched_target) {
+      // (level, target) pairs
+      children_[prefetch.first].iter.SeekForPrev(prefetch.second);
+      {
+        PERF_TIMER_GUARD(seek_max_heap_time);
+        AddToMaxHeapOrCheckStatus(&children_[prefetch.first]);
+      }
+      PERF_COUNTER_ADD(number_async_seek, 1);
+    }
+  }
+}
+
+// See more in comments above SkipNextDeleted().
+// REQUIRES:
+// - max heap is currently not empty, and iter is in kReverse direction.
+// - maxHeap_ top is not DELETE_RANGE_END (so that `active_` is current).
+bool MergingIterator::SkipPrevDeleted() {
+  // 3 types of keys:
+  // - point key
+  // - file boundary sentinel keys
+  // - range deletion start key
+  auto current = maxHeap_->top();
+  if (current->type == HeapItem::Type::DELETE_RANGE_START) {
+    active_.erase(current->level);
+    assert(range_tombstone_iters_[current->level] &&
+           range_tombstone_iters_[current->level]->Valid());
+    range_tombstone_iters_[current->level]->Prev();
+    if (range_tombstone_iters_[current->level]->Valid()) {
+      InsertRangeTombstoneToMaxHeap(current->level, true /* end_key */,
+                                    true /* replace_top */);
+    } else {
+      maxHeap_->pop();
+    }
+    return true /* current key deleted */;
+  }
+  if (current->iter.IsDeleteRangeSentinelKey()) {
+    // LevelIterator enters a new SST file
+    maxHeap_->pop();
+    // Remove last SST file's range tombstone key if there is one.
+    if (!maxHeap_->empty() && maxHeap_->top()->level == current->level &&
+        maxHeap_->top()->type == HeapItem::Type::DELETE_RANGE_START) {
+      maxHeap_->pop();
+      active_.erase(current->level);
+    }
+    current->iter.Prev();
+    if (current->iter.Valid()) {
+      assert(current->iter.status().ok());
+      maxHeap_->push(current);
+    }
+
+    if (range_tombstone_iters_[current->level] &&
+        range_tombstone_iters_[current->level]->Valid()) {
+      InsertRangeTombstoneToMaxHeap(current->level);
+    }
+    return true /* current key deleted */;
+  }
+  assert(current->type == HeapItem::Type::ITERATOR);
+  // Point key case: check active_ for range tombstone coverage.
+  ParsedInternalKey pik;
+  ParseInternalKey(current->iter.key(), &pik, false).PermitUncheckedError();
+  if (!active_.empty()) {
+    auto i = *active_.begin();
+    if (i < current->level) {
+      // range tombstone is from a newer level, definitely covers
+      assert(comparator_->Compare(range_tombstone_iters_[i]->start_key(),
+                                  pik) <= 0);
+      assert(comparator_->Compare(pik, range_tombstone_iters_[i]->end_key()) <
+             0);
+      std::string target;
+      AppendInternalKey(&target, range_tombstone_iters_[i]->start_key());
+      // This is different from SkipNextDeleted() which does reseek at sorted
+      // runs >= level (instead of i+1 here). With min heap, if level L is at
+      // top of the heap, then levels <L all have internal keys > level L's
+      // current internal key, which means levels <L are already at a different
+      // user key. With max heap, if level L is at top of the heap, then levels
+      // <L all have internal keys smaller than level L's current internal key,
+      // which might still be the same user key.
+      SeekForPrevImpl(target, i + 1, true);
+      return true /* current key deleted */;
+    } else if (i == current->level) {
+      // By `active_` we know current key is between start key and end key.
+      assert(comparator_->Compare(range_tombstone_iters_[i]->start_key(),
+                                  pik) <= 0);
+      assert(comparator_->Compare(pik, range_tombstone_iters_[i]->end_key()) <
+             0);
+      if (pik.sequence < range_tombstone_iters_[current->level]->seq()) {
+        current->iter.Prev();
+        if (current->iter.Valid()) {
+          maxHeap_->replace_top(current);
+        } else {
+          maxHeap_->pop();
+        }
+        return true /* current key deleted */;
+      } else {
+        return false /* current key not deleted */;
+      }
+    } else {
+      return false /* current key not deleted */;
+    }
+  }
+
+  assert(active_.empty());
+  assert(maxHeap_->top()->type == HeapItem::Type::ITERATOR);
+  return false /* current key not deleted */;
+}
+
+void MergingIterator::AddToMinHeapOrCheckStatus(HeapItem* child) {
+  // Invariant(children_)
+  if (child->iter.Valid()) {
+    assert(child->iter.status().ok());
+    minHeap_.push(child);
+  } else {
+    considerStatus(child->iter.status());
+  }
+}
+
+void MergingIterator::AddToMaxHeapOrCheckStatus(HeapItem* child) {
+  if (child->iter.Valid()) {
+    assert(child->iter.status().ok());
+    maxHeap_->push(child);
+  } else {
+    considerStatus(child->iter.status());
+  }
+}
+
+// Advance all non current_ child to > current_.key().
+// We advance current_ after the this function call as it does not require
+// Seek().
+// Advance all range tombstones iters, including the one corresponding to
+// current_, to the first tombstone with end_key > current_.key().
+// TODO: potentially do cascading seek here too
+// TODO: show that invariants hold
+void MergingIterator::SwitchToForward() {
+  ClearHeaps();
+  Slice target = key();
+  for (auto& child : children_) {
+    if (&child.iter != current_) {
+      child.iter.Seek(target);
+      // child.iter.status() is set to Status::TryAgain indicating asynchronous
+      // request for retrieval of data blocks has been submitted. So it should
+      // return at this point and Seek should be called again to retrieve the
+      // requested block and add the child to min heap.
+      if (child.iter.status() == Status::TryAgain()) {
+        continue;
+      }
+      if (child.iter.Valid() && comparator_->Equal(target, child.iter.key())) {
+        assert(child.iter.status().ok());
+        child.iter.Next();
+      }
+    }
+    AddToMinHeapOrCheckStatus(&child);
+  }
+
+  for (auto& child : children_) {
+    if (child.iter.status() == Status::TryAgain()) {
+      child.iter.Seek(target);
+      if (child.iter.Valid() && comparator_->Equal(target, child.iter.key())) {
+        assert(child.iter.status().ok());
+        child.iter.Next();
+      }
+      AddToMinHeapOrCheckStatus(&child);
+    }
+  }
+
+  // Current range tombstone iter also needs to seek for the following case:
+  // Previous direction is backward, so range tombstone iter may point to a
+  // tombstone before current_. If there is no such tombstone, then the range
+  // tombstone iter is !Valid(). Need to reseek here to make it valid again.
+  if (!range_tombstone_iters_.empty()) {
+    ParsedInternalKey pik;
+    ParseInternalKey(target, &pik, false /* log_err_key */)
+        .PermitUncheckedError();
+    for (size_t i = 0; i < range_tombstone_iters_.size(); ++i) {
+      auto iter = range_tombstone_iters_[i];
+      if (iter) {
+        iter->Seek(pik.user_key);
+        // The while loop is needed as the Seek() call above is only for user
+        // key. We could have a range tombstone with end_key covering user_key,
+        // but still is smaller than target. This happens when the range
+        // tombstone is truncated at iter.largest_.
+        while (iter->Valid() &&
+               comparator_->Compare(iter->end_key(), pik) <= 0) {
+          iter->Next();
+        }
+        if (range_tombstone_iters_[i]->Valid()) {
+          InsertRangeTombstoneToMinHeap(
+              i, comparator_->Compare(range_tombstone_iters_[i]->start_key(),
+                                      pik) > 0 /* start_key */);
+        }
+      }
+    }
+  }
+
+  direction_ = kForward;
+  assert(current_ == CurrentForward());
+}
+
+// Advance all range tombstones iters, including the one corresponding to
+// current_, to the first tombstone with start_key <= current_.key().
+void MergingIterator::SwitchToBackward() {
+  ClearHeaps();
+  InitMaxHeap();
+  Slice target = key();
+  for (auto& child : children_) {
+    if (&child.iter != current_) {
+      child.iter.SeekForPrev(target);
+      TEST_SYNC_POINT_CALLBACK("MergeIterator::Prev:BeforePrev", &child);
+      if (child.iter.Valid() && comparator_->Equal(target, child.iter.key())) {
+        assert(child.iter.status().ok());
+        child.iter.Prev();
+      }
+    }
+    AddToMaxHeapOrCheckStatus(&child);
+  }
+
+  ParsedInternalKey pik;
+  ParseInternalKey(target, &pik, false /* log_err_key */)
+      .PermitUncheckedError();
+  for (size_t i = 0; i < range_tombstone_iters_.size(); ++i) {
+    auto iter = range_tombstone_iters_[i];
+    if (iter) {
+      iter->SeekForPrev(pik.user_key);
+      // Since the SeekForPrev() call above is only for user key,
+      // we may end up with some range tombstone with start key having the
+      // same user key at current_, but with a smaller sequence number. This
+      // makes current_ not at maxHeap_ top for the CurrentReverse() call
+      // below. If there is a range tombstone start key with the same user
+      // key and the same sequence number as current_.key(), it will be fine as
+      // in InsertRangeTombstoneToMaxHeap() we change op_type to be the smallest
+      // op_type.
+      while (iter->Valid() &&
+             comparator_->Compare(iter->start_key(), pik) > 0) {
+        iter->Prev();
+      }
+      if (iter->Valid()) {
+        InsertRangeTombstoneToMaxHeap(
+            i, comparator_->Compare(range_tombstone_iters_[i]->end_key(),
+                                    pik) <= 0 /* end_key */);
+      }
+    }
+  }
+
+  direction_ = kReverse;
+  if (!prefix_seek_mode_) {
+    // Note that we don't do assert(current_ == CurrentReverse()) here
+    // because it is possible to have some keys larger than the seek-key
+    // inserted between Seek() and SeekToLast(), which makes current_ not
+    // equal to CurrentReverse().
+    current_ = CurrentReverse();
+  }
+  assert(current_ == CurrentReverse());
+}
+
+void MergingIterator::ClearHeaps(bool clear_active) {
+  minHeap_.clear();
+  if (maxHeap_) {
+    maxHeap_->clear();
+  }
+  if (clear_active) {
+    active_.clear();
+  }
+}
+
+void MergingIterator::InitMaxHeap() {
+  if (!maxHeap_) {
+    maxHeap_ =
+        std::make_unique<MergerMaxIterHeap>(MaxHeapItemComparator(comparator_));
+  }
+}
+
+// Assume there is a next key that is not covered by range tombstone.
+// Pre-condition:
+// - Invariants (3) and (4)
+// - There is some k where k <= children_[i].iter.key() <= LevelNextVisible(i,
+// k) for all levels i (LevelNextVisible() defined in Seek()).
+//
+// Define NextVisible(k) to be the first key >= k from among children_ that
+// is not covered by any range tombstone.
+// Post-condition:
+// - Invariants (1)-(4) hold
+// - (*): minHeap_->top()->key() == NextVisible(k)
+//
+// Loop invariants:
+// - Invariants (3) and (4)
+// - (*): k <= children_[i].iter.key() <= LevelNextVisible(i, k)
+//
+// Progress: minHeap_.top()->key() is non-decreasing and strictly increases in
+// a finite number of iterations.
+// TODO: it is possible to call SeekImpl(k2) after SeekImpl(k1) with
+//  k2 < k1 in the same FindNextVisibleKey(). For example, l1 has a range
+//  tombstone [2,3) and l2 has a range tombstone [1, 4). Point key 1 from l5
+//  triggers SeekImpl(4 /* target */, 5). Then point key 2 from l3 triggers
+//  SeekImpl(3 /* target */, 3).
+//  Ideally we should only move iterators forward in SeekImpl(), and the
+//  progress condition can be made simpler: iterator only moves forward.
+//
+// Proof sketch:
+// Post-condition:
+// Invariant (1) holds when this method returns:
+// Ignoring the empty minHeap_ case, there are two cases:
+// Case 1: active_ is empty and !minHeap_.top()->iter.IsDeleteRangeSentinelKey()
+// By invariants (rti) and (active_), active_ being empty means if a
+// pinned_heap_item_[i] is in minHeap_, it has type DELETE_RANGE_START. Note
+// that PopDeleteRangeStart() was called right before the while loop condition,
+// so minHeap_.top() is not of type DELETE_RANGE_START. So minHeap_.top() must
+// be of type ITERATOR.
+// Case 2: SkipNextDeleted() returns false. The method returns false only when
+// minHeap_.top().type == ITERATOR.
+//
+// Invariant (2) holds when this method returns:
+// From Invariant (1), minHeap_.top().type == ITERATOR. Suppose it is
+// children_[i] for some i. Suppose that children_[i].iter.key() is covered by
+// some range tombstone. This means there is a j <= i and a range tombstone from
+// level j with start_key() < children_[i].iter.key() < end_key().
+// - If range_tombstone_iters_[j]->Valid(), by Invariants (rti) and (phi),
+// pinned_heap_item_[j] is in minHeap_, and pinned_heap_item_[j].tombstone_pik
+// is either start or end key of this range tombstone. If
+// pinned_heap_item_[j].tombstone_pik < children_[i].iter.key(), it would be at
+// top of minHeap_ which would contradict Invariant (1). So
+// pinned_heap_item_[j].tombstone_pik > children_[i].iter.key().
+// By Invariant (3), range_tombstone_iters_[j].prev.end_key() <
+// children_[i].iter.key(). We assume that in each level, range tombstones
+// cover non-overlapping ranges. So range_tombstone_iters_[j] is at
+// the range tombstone with start_key() < children_[i].iter.key() < end_key()
+// and has its end_key() in minHeap_. By Invariants (phi) and (active_),
+// j is in active_. From while loop condition, SkipNextDeleted() must have
+// returned false for this method to return.
+//   - If j < i, then SeekImpl(range_tombstone_iters_[j']->end_key(), i)
+// was called for some j' < i and j' in active_. Note that since j' is in
+// active_, pinned_heap_item_[j'] is in minHeap_ and has tombstone_pik =
+// range_tombstone_iters_[j']->end_key(). So
+// range_tombstone_iters_[j']->end_key() must be larger than
+// children_[i].iter.key() to not be at top of minHeap_. This means after
+// SeekImpl(), children_[i] would be at a key > children_[i].iter.key()
+// -- contradiction.
+//   - If j == i, children_[i]->Next() would have been called and children_[i]
+// would be at a key > children_[i].iter.key() -- contradiction.
+// - If !range_tombstone_iters_[j]->Valid(). Then range_tombstone_iters_[j]
+// points to an SST file with all range tombstones from that file exhausted.
+// The file must come before the file containing the first
+// range tombstone with start_key() < children_[i].iter.key() < end_key().
+// Assume files from same level have non-overlapping ranges, the current file's
+// meta.largest is less than children_[i].iter.key(). So the file boundary key,
+// which has value meta.largest must have been popped from minHeap_ before
+// children_[i].iter.key(). So range_tombstone_iters_[j] would not point to
+// this SST file -- contradiction.
+// So it is impossible for children_[i].iter.key() to be covered by a range
+// tombstone.
+//
+// Post-condition (*) holds when the function returns:
+// From loop invariant (*) that k <= children_[i].iter.key() <=
+// LevelNextVisible(i, k) and Invariant (2) above, when the function returns,
+// minHeap_.top()->key() is the smallest LevelNextVisible(i, k) among all levels
+// i. This is equal to NextVisible(k).
+//
+// Invariant (3) holds after each iteration:
+// PopDeleteRangeStart() does not change range tombstone position.
+// In SkipNextDeleted():
+//   - If DELETE_RANGE_END is popped from minHeap_, it means the range
+//   tombstone's end key is < all other point keys, so it is safe to advance to
+//   next range tombstone.
+//   - If file boundary is popped (current->iter.IsDeleteRangeSentinelKey()),
+//   we assume that file's last range tombstone's
+//   end_key <= file boundary key < all other point keys. So it is safe to
+//   move to the first range tombstone in the next SST file.
+//   - If children_[i]->Next() is called, then it is fine as it is advancing a
+//   point iterator.
+//   - If SeekImpl(target, l) is called, then (3) follows from SeekImpl()'s
+//   post-condition if its pre-condition holds. First pre-condition follows
+//   from loop invariant where Invariant (3) holds for all levels i.
+//   Now we should second pre-condition holds. Since Invariant (3) holds for
+//   all i, we have for all j <= l, range_tombstone_iters_[j].prev.end_key()
+//   < children_[l].iter.key(). `target` is the value of
+//   range_tombstone_iters_[j'].end_key() for some j' < l and j' in active_.
+//   By Invariant (active_) and (rti), pinned_heap_item_[j'] is in minHeap_ and
+//   pinned_heap_item_[j'].tombstone_pik = range_tombstone_iters_[j'].end_key().
+//   This end_key must be larger than children_[l].key() since it was not at top
+//   of minHeap_. So for all levels j <= l,
+//   range_tombstone_iters_[j].prev.end_key() < children_[l].iter.key() < target
+//
+// Invariant (4) holds after each iteration:
+// A level i is inserted into active_ during calls to PopDeleteRangeStart().
+// In that case, range_tombstone_iters_[i].start_key() < all point keys
+// by heap property and the assumption that point keys and range tombstone keys
+// are distinct.
+// If SeekImpl(target, l) is called, then there is a range_tombstone_iters_[j]
+// where target = range_tombstone_iters_[j]->end_key() and children_[l]->key()
+// < target. By loop invariants, (3) and (4) holds for levels.
+// Since target > children_[l]->key(), it also holds that for j < l,
+// range_tombstone_iters_[j].prev.end_key() < target and that if j in active_,
+// range_tombstone_iters_[i]->start_key() < target. So all pre-conditions of
+// SeekImpl(target, l) holds, and (4) follow from its post-condition.
+// All other places either in this function either advance point iterators
+// or remove some level from active_, so (4) still holds.
+//
+// Look Invariant (*): for all level i, k <= children_[i] <= LevelNextVisible(i,
+// k).
+// k <= children_[i] follows from loop `progress` condition.
+// Consider when children_[i] is changed for any i. It is through
+// children_[i].iter.Next() or SeekImpl() in SkipNextDeleted().
+// If children_[i].iter.Next() is called, there is a range tombstone from level
+// i where tombstone seqno > children_[i].iter.key()'s seqno and i in active_.
+// By Invariant (4), tombstone's start_key < children_[i].iter.key(). By
+// invariants (active_), (phi), and (rti), tombstone's end_key is in minHeap_
+// and that children_[i].iter.key() < end_key. So children_[i].iter.key() is
+// not visible, and it is safe to call Next().
+// If SeekImpl(target, l) is called, by its contract, when SeekImpl() returns,
+// target <= children_[i]->key() <= LevelNextVisible(i, target) for i >= l,
+// and children_[<l] is not touched. We know `target` is
+// range_tombstone_iters_[j]->end_key() for some j < i and j is in active_.
+// By Invariant (4), range_tombstone_iters_[j]->start_key() <
+// children_[i].iter.key() for all i >= l. So for each level i >= l, the range
+// [children_[i].iter.key(), target) is not visible. So after SeekImpl(),
+// children_[i].iter.key() <= LevelNextVisible(i, target) <=
+// LevelNextVisible(i, k).
+//
+// `Progress` holds for each iteration:
+// Very sloppy intuition:
+// - in PopDeleteRangeStart(): the value of a pinned_heap_item_.tombstone_pik_
+// is updated from the start key to the end key of the same range tombstone.
+// We assume that start key <= end key for the same range tombstone.
+// - in SkipNextDeleted()
+//   - If the top of heap is DELETE_RANGE_END, the range tombstone is advanced
+//     and the relevant pinned_heap_item_.tombstone_pik is increased or popped
+//     from minHeap_.
+//   - If the top of heap is a file boundary key, then both point iter and
+//     range tombstone iter are advanced to the next file.
+//   - If the top of heap is ITERATOR and current->iter.Next() is called, it
+//     moves to a larger point key.
+//   - If the top of heap is ITERATOR and SeekImpl(k, l) is called, then all
+//     iterators from levels >= l are advanced to some key >= k by its contract.
+//     And top of minHeap_ before SeekImpl(k, l) was less than k.
+// There are special cases where different heap items have the same key,
+// e.g. when two range tombstone end keys share the same value). In
+// these cases, iterators are being advanced, so the minimum key should increase
+// in a finite number of steps.
+inline void MergingIterator::FindNextVisibleKey() {
+  PopDeleteRangeStart();
+  // PopDeleteRangeStart() implies heap top is not DELETE_RANGE_START
+  // active_ being empty implies no DELETE_RANGE_END in heap.
+  // So minHeap_->top() must be of type ITERATOR.
+  while (
+      !minHeap_.empty() &&
+      (!active_.empty() || minHeap_.top()->iter.IsDeleteRangeSentinelKey()) &&
+      SkipNextDeleted()) {
+    PopDeleteRangeStart();
+  }
+  // Checks Invariant (1)
+  assert(minHeap_.empty() || minHeap_.top()->type == HeapItem::Type::ITERATOR);
+}
+
+inline void MergingIterator::FindPrevVisibleKey() {
+  PopDeleteRangeEnd();
+  // PopDeleteRangeEnd() implies heap top is not DELETE_RANGE_END
+  // active_ being empty implies no DELETE_RANGE_START in heap.
+  // So maxHeap_->top() must be of type ITERATOR.
+  while (
+      !maxHeap_->empty() &&
+      (!active_.empty() || maxHeap_->top()->iter.IsDeleteRangeSentinelKey()) &&
+      SkipPrevDeleted()) {
+    PopDeleteRangeEnd();
+  }
+}
+
+InternalIterator* NewMergingIterator(const InternalKeyComparator* cmp,
+                                     InternalIterator** list, int n,
+                                     Arena* arena, bool prefix_seek_mode) {
+  assert(n >= 0);
+  if (n == 0) {
+    return NewEmptyInternalIterator<Slice>(arena);
+  } else if (n == 1) {
+    return list[0];
+  } else {
+    if (arena == nullptr) {
+      return new MergingIterator(cmp, list, n, false, prefix_seek_mode);
+    } else {
+      auto mem = arena->AllocateAligned(sizeof(MergingIterator));
+      return new (mem) MergingIterator(cmp, list, n, true, prefix_seek_mode);
+    }
+  }
+}
+
+MergeIteratorBuilder::MergeIteratorBuilder(
+    const InternalKeyComparator* comparator, Arena* a, bool prefix_seek_mode,
+    const Slice* iterate_upper_bound)
+    : first_iter(nullptr), use_merging_iter(false), arena(a) {
+  auto mem = arena->AllocateAligned(sizeof(MergingIterator));
+  merge_iter = new (mem) MergingIterator(comparator, nullptr, 0, true,
+                                         prefix_seek_mode, iterate_upper_bound);
+}
+
+MergeIteratorBuilder::~MergeIteratorBuilder() {
+  if (first_iter != nullptr) {
+    first_iter->~InternalIterator();
+  }
+  if (merge_iter != nullptr) {
+    merge_iter->~MergingIterator();
+  }
+}
+
+void MergeIteratorBuilder::AddIterator(InternalIterator* iter) {
+  if (!use_merging_iter && first_iter != nullptr) {
+    merge_iter->AddIterator(first_iter);
+    use_merging_iter = true;
+    first_iter = nullptr;
+  }
+  if (use_merging_iter) {
+    merge_iter->AddIterator(iter);
+  } else {
+    first_iter = iter;
+  }
+}
+
+void MergeIteratorBuilder::AddPointAndTombstoneIterator(
+    InternalIterator* point_iter, TruncatedRangeDelIterator* tombstone_iter,
+    TruncatedRangeDelIterator*** tombstone_iter_ptr) {
+  // tombstone_iter_ptr != nullptr means point_iter is a LevelIterator.
+  bool add_range_tombstone = tombstone_iter ||
+                             !merge_iter->range_tombstone_iters_.empty() ||
+                             tombstone_iter_ptr;
+  if (!use_merging_iter && (add_range_tombstone || first_iter)) {
+    use_merging_iter = true;
+    if (first_iter) {
+      merge_iter->AddIterator(first_iter);
+      first_iter = nullptr;
+    }
+  }
+  if (use_merging_iter) {
+    merge_iter->AddIterator(point_iter);
+    if (add_range_tombstone) {
+      // If there was a gap, fill in nullptr as empty range tombstone iterators.
+      while (merge_iter->range_tombstone_iters_.size() <
+             merge_iter->children_.size() - 1) {
+        merge_iter->AddRangeTombstoneIterator(nullptr);
+      }
+      merge_iter->AddRangeTombstoneIterator(tombstone_iter);
+    }
+
+    if (tombstone_iter_ptr) {
+      // This is needed instead of setting to &range_tombstone_iters_[i]
+      // directly here since the memory address of range_tombstone_iters_[i]
+      // might change during vector resizing.
+      range_del_iter_ptrs_.emplace_back(
+          merge_iter->range_tombstone_iters_.size() - 1, tombstone_iter_ptr);
+    }
+  } else {
+    first_iter = point_iter;
+  }
+}
+
+InternalIterator* MergeIteratorBuilder::Finish(ArenaWrappedDBIter* db_iter) {
+  InternalIterator* ret = nullptr;
+  if (!use_merging_iter) {
+    ret = first_iter;
+    first_iter = nullptr;
+  } else {
+    for (auto& p : range_del_iter_ptrs_) {
+      *(p.second) = &(merge_iter->range_tombstone_iters_[p.first]);
+    }
+    if (db_iter && !merge_iter->range_tombstone_iters_.empty()) {
+      // memtable is always the first level
+      db_iter->SetMemtableRangetombstoneIter(
+          &merge_iter->range_tombstone_iters_.front());
+    }
+    merge_iter->Finish();
+    ret = merge_iter;
+    merge_iter = nullptr;
+  }
+  return ret;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/merging_iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/merging_iterator.h
new file mode 100644
index 0000000000..562a4e57f5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/merging_iterator.h
@@ -0,0 +1,97 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "db/range_del_aggregator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/types.h"
+#include "table/iterator_wrapper.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Arena;
+class ArenaWrappedDBIter;
+class InternalKeyComparator;
+
+template <class TValue>
+class InternalIteratorBase;
+using InternalIterator = InternalIteratorBase<Slice>;
+
+// Return an iterator that provides the union of the data in
+// children[0,n-1].  Takes ownership of the child iterators and
+// will delete them when the result iterator is deleted.
+//
+// The result does no duplicate suppression.  I.e., if a particular
+// key is present in K child iterators, it will be yielded K times.
+//
+// REQUIRES: n >= 0
+extern InternalIterator* NewMergingIterator(
+    const InternalKeyComparator* comparator, InternalIterator** children, int n,
+    Arena* arena = nullptr, bool prefix_seek_mode = false);
+
+// The iterator returned by NewMergingIterator() and
+// MergeIteratorBuilder::Finish(). MergingIterator handles the merging of data
+// from different point and/or range tombstone iterators.
+class MergingIterator;
+
+// A builder class to for an iterator that provides the union of data
+// of input iterators. Two APIs are provided to add input iterators. User should
+// only call one of them exclusively depending on if range tombstone should be
+// processed.
+class MergeIteratorBuilder {
+ public:
+  // comparator: the comparator used in merging comparator
+  // arena: where the merging iterator needs to be allocated from.
+  explicit MergeIteratorBuilder(const InternalKeyComparator* comparator,
+                                Arena* arena, bool prefix_seek_mode = false,
+                                const Slice* iterate_upper_bound = nullptr);
+  ~MergeIteratorBuilder();
+
+  // Add point key iterator `iter` to the merging iterator.
+  void AddIterator(InternalIterator* iter);
+
+  // Add a point key iterator and a range tombstone iterator.
+  // `tombstone_iter_ptr` should and only be set by LevelIterator.
+  // *tombstone_iter_ptr will be set to where the merging iterator stores
+  // `tombstone_iter` when MergeIteratorBuilder::Finish() is called. This is
+  // used by LevelIterator to update range tombstone iters when switching to a
+  // different SST file. If a single point iterator with a nullptr range
+  // tombstone iterator is provided, and the point iterator is not a level
+  // iterator, then this builder will return the point iterator directly,
+  // instead of creating a merging iterator on top of it. Internally, if all
+  // point iterators are not LevelIterator, then range tombstone iterator is
+  // only added to the merging iter if there is a non-null `tombstone_iter`.
+  void AddPointAndTombstoneIterator(
+      InternalIterator* point_iter, TruncatedRangeDelIterator* tombstone_iter,
+      TruncatedRangeDelIterator*** tombstone_iter_ptr = nullptr);
+
+  // Get arena used to build the merging iterator. It is called one a child
+  // iterator needs to be allocated.
+  Arena* GetArena() { return arena; }
+
+  // Return the result merging iterator.
+  // If db_iter is not nullptr, then db_iter->SetMemtableRangetombstoneIter()
+  // will be called with pointer to where the merging iterator
+  // stores the memtable range tombstone iterator.
+  // This is used for DB iterator to refresh memtable range tombstones.
+  InternalIterator* Finish(ArenaWrappedDBIter* db_iter = nullptr);
+
+ private:
+  MergingIterator* merge_iter;
+  InternalIterator* first_iter;
+  bool use_merging_iter;
+  Arena* arena;
+  // Used to set LevelIterator.range_tombstone_iter_.
+  // See AddRangeTombstoneIterator() implementation for more detail.
+  std::vector<std::pair<size_t, TruncatedRangeDelIterator***>>
+      range_del_iter_ptrs_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/meta_blocks.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/meta_blocks.cc
new file mode 100644
index 0000000000..6fea536d62
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/meta_blocks.cc
@@ -0,0 +1,564 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#include "table/meta_blocks.h"
+
+#include <map>
+#include <string>
+
+#include "block_fetcher.h"
+#include "db/table_properties_collector.h"
+#include "file/random_access_file_reader.h"
+#include "logging/logging.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
+#include "rocksdb/table_properties.h"
+#include "table/block_based/block.h"
+#include "table/block_based/reader_common.h"
+#include "table/format.h"
+#include "table/internal_iterator.h"
+#include "table/persistent_cache_helper.h"
+#include "table/sst_file_writer_collectors.h"
+#include "table/table_properties_internal.h"
+#include "test_util/sync_point.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const std::string kPropertiesBlockName = "rocksdb.properties";
+// Old property block name for backward compatibility
+const std::string kPropertiesBlockOldName = "rocksdb.stats";
+const std::string kCompressionDictBlockName = "rocksdb.compression_dict";
+const std::string kRangeDelBlockName = "rocksdb.range_del";
+
+MetaIndexBuilder::MetaIndexBuilder()
+    : meta_index_block_(new BlockBuilder(1 /* restart interval */)) {}
+
+void MetaIndexBuilder::Add(const std::string& key, const BlockHandle& handle) {
+  std::string handle_encoding;
+  handle.EncodeTo(&handle_encoding);
+  meta_block_handles_.insert({key, handle_encoding});
+}
+
+Slice MetaIndexBuilder::Finish() {
+  for (const auto& metablock : meta_block_handles_) {
+    meta_index_block_->Add(metablock.first, metablock.second);
+  }
+  return meta_index_block_->Finish();
+}
+
+// Property block will be read sequentially and cached in a heap located
+// object, so there's no need for restart points. Thus we set the restart
+// interval to infinity to save space.
+PropertyBlockBuilder::PropertyBlockBuilder()
+    : properties_block_(new BlockBuilder(
+          std::numeric_limits<int32_t>::max() /* restart interval */)) {}
+
+void PropertyBlockBuilder::Add(const std::string& name,
+                               const std::string& val) {
+  props_.insert({name, val});
+}
+
+void PropertyBlockBuilder::Add(const std::string& name, uint64_t val) {
+  assert(props_.find(name) == props_.end());
+
+  std::string dst;
+  PutVarint64(&dst, val);
+
+  Add(name, dst);
+}
+
+void PropertyBlockBuilder::Add(
+    const UserCollectedProperties& user_collected_properties) {
+  for (const auto& prop : user_collected_properties) {
+    Add(prop.first, prop.second);
+  }
+}
+
+void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) {
+  TEST_SYNC_POINT_CALLBACK("PropertyBlockBuilder::AddTableProperty:Start",
+                           const_cast<TableProperties*>(&props));
+
+  Add(TablePropertiesNames::kOriginalFileNumber, props.orig_file_number);
+  Add(TablePropertiesNames::kRawKeySize, props.raw_key_size);
+  Add(TablePropertiesNames::kRawValueSize, props.raw_value_size);
+  Add(TablePropertiesNames::kDataSize, props.data_size);
+  Add(TablePropertiesNames::kIndexSize, props.index_size);
+  if (props.index_partitions != 0) {
+    Add(TablePropertiesNames::kIndexPartitions, props.index_partitions);
+    Add(TablePropertiesNames::kTopLevelIndexSize, props.top_level_index_size);
+  }
+  Add(TablePropertiesNames::kIndexKeyIsUserKey, props.index_key_is_user_key);
+  Add(TablePropertiesNames::kIndexValueIsDeltaEncoded,
+      props.index_value_is_delta_encoded);
+  Add(TablePropertiesNames::kNumEntries, props.num_entries);
+  Add(TablePropertiesNames::kNumFilterEntries, props.num_filter_entries);
+  Add(TablePropertiesNames::kDeletedKeys, props.num_deletions);
+  Add(TablePropertiesNames::kMergeOperands, props.num_merge_operands);
+  Add(TablePropertiesNames::kNumRangeDeletions, props.num_range_deletions);
+  Add(TablePropertiesNames::kNumDataBlocks, props.num_data_blocks);
+  Add(TablePropertiesNames::kFilterSize, props.filter_size);
+  Add(TablePropertiesNames::kFormatVersion, props.format_version);
+  Add(TablePropertiesNames::kFixedKeyLen, props.fixed_key_len);
+  Add(TablePropertiesNames::kColumnFamilyId, props.column_family_id);
+  Add(TablePropertiesNames::kCreationTime, props.creation_time);
+  Add(TablePropertiesNames::kOldestKeyTime, props.oldest_key_time);
+  if (props.file_creation_time > 0) {
+    Add(TablePropertiesNames::kFileCreationTime, props.file_creation_time);
+  }
+  if (props.slow_compression_estimated_data_size > 0) {
+    Add(TablePropertiesNames::kSlowCompressionEstimatedDataSize,
+        props.slow_compression_estimated_data_size);
+  }
+  if (props.fast_compression_estimated_data_size > 0) {
+    Add(TablePropertiesNames::kFastCompressionEstimatedDataSize,
+        props.fast_compression_estimated_data_size);
+  }
+  Add(TablePropertiesNames::kTailStartOffset, props.tail_start_offset);
+  if (!props.db_id.empty()) {
+    Add(TablePropertiesNames::kDbId, props.db_id);
+  }
+  if (!props.db_session_id.empty()) {
+    Add(TablePropertiesNames::kDbSessionId, props.db_session_id);
+  }
+  if (!props.db_host_id.empty()) {
+    Add(TablePropertiesNames::kDbHostId, props.db_host_id);
+  }
+
+  if (!props.filter_policy_name.empty()) {
+    Add(TablePropertiesNames::kFilterPolicy, props.filter_policy_name);
+  }
+  if (!props.comparator_name.empty()) {
+    Add(TablePropertiesNames::kComparator, props.comparator_name);
+  }
+
+  if (!props.merge_operator_name.empty()) {
+    Add(TablePropertiesNames::kMergeOperator, props.merge_operator_name);
+  }
+  if (!props.prefix_extractor_name.empty()) {
+    Add(TablePropertiesNames::kPrefixExtractorName,
+        props.prefix_extractor_name);
+  }
+  if (!props.property_collectors_names.empty()) {
+    Add(TablePropertiesNames::kPropertyCollectors,
+        props.property_collectors_names);
+  }
+  if (!props.column_family_name.empty()) {
+    Add(TablePropertiesNames::kColumnFamilyName, props.column_family_name);
+  }
+
+  if (!props.compression_name.empty()) {
+    Add(TablePropertiesNames::kCompression, props.compression_name);
+  }
+  if (!props.compression_options.empty()) {
+    Add(TablePropertiesNames::kCompressionOptions, props.compression_options);
+  }
+  if (!props.seqno_to_time_mapping.empty()) {
+    Add(TablePropertiesNames::kSequenceNumberTimeMapping,
+        props.seqno_to_time_mapping);
+  }
+}
+
+Slice PropertyBlockBuilder::Finish() {
+  for (const auto& prop : props_) {
+    properties_block_->Add(prop.first, prop.second);
+  }
+
+  return properties_block_->Finish();
+}
+
+void LogPropertiesCollectionError(Logger* info_log, const std::string& method,
+                                  const std::string& name) {
+  assert(method == "Add" || method == "Finish");
+
+  std::string msg =
+      "Encountered error when calling TablePropertiesCollector::" + method +
+      "() with collector name: " + name;
+  ROCKS_LOG_ERROR(info_log, "%s", msg.c_str());
+}
+
+bool NotifyCollectTableCollectorsOnAdd(
+    const Slice& key, const Slice& value, uint64_t file_size,
+    const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
+    Logger* info_log) {
+  bool all_succeeded = true;
+  for (auto& collector : collectors) {
+    Status s = collector->InternalAdd(key, value, file_size);
+    all_succeeded = all_succeeded && s.ok();
+    if (!s.ok()) {
+      LogPropertiesCollectionError(info_log, "Add" /* method */,
+                                   collector->Name());
+    }
+  }
+  return all_succeeded;
+}
+
+void NotifyCollectTableCollectorsOnBlockAdd(
+    const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
+    const uint64_t block_uncomp_bytes,
+    const uint64_t block_compressed_bytes_fast,
+    const uint64_t block_compressed_bytes_slow) {
+  for (auto& collector : collectors) {
+    collector->BlockAdd(block_uncomp_bytes, block_compressed_bytes_fast,
+                        block_compressed_bytes_slow);
+  }
+}
+
+bool NotifyCollectTableCollectorsOnFinish(
+    const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
+    Logger* info_log, PropertyBlockBuilder* builder) {
+  bool all_succeeded = true;
+  for (auto& collector : collectors) {
+    UserCollectedProperties user_collected_properties;
+    Status s = collector->Finish(&user_collected_properties);
+
+    all_succeeded = all_succeeded && s.ok();
+    if (!s.ok()) {
+      LogPropertiesCollectionError(info_log, "Finish" /* method */,
+                                   collector->Name());
+    } else {
+      builder->Add(user_collected_properties);
+    }
+  }
+
+  return all_succeeded;
+}
+
+// FIXME: should be a parameter for reading table properties to use persistent
+// cache?
+Status ReadTablePropertiesHelper(
+    const ReadOptions& ro, const BlockHandle& handle,
+    RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer,
+    const Footer& footer, const ImmutableOptions& ioptions,
+    std::unique_ptr<TableProperties>* table_properties,
+    MemoryAllocator* memory_allocator) {
+  assert(table_properties);
+
+  // If this is an external SST file ingested with write_global_seqno set to
+  // true, then we expect the checksum mismatch because checksum was written
+  // by SstFileWriter, but its global seqno in the properties block may have
+  // been changed during ingestion. For this reason, we initially read
+  // and process without checksum verification, then later try checksum
+  // verification so that if it fails, we can copy to a temporary buffer with
+  // global seqno set to its original value, i.e. 0, and attempt checksum
+  // verification again.
+  ReadOptions modified_ro = ro;
+  modified_ro.verify_checksums = false;
+  BlockContents block_contents;
+  BlockFetcher block_fetcher(file, prefetch_buffer, footer, modified_ro, handle,
+                             &block_contents, ioptions, false /* decompress */,
+                             false /*maybe_compressed*/, BlockType::kProperties,
+                             UncompressionDict::GetEmptyDict(),
+                             PersistentCacheOptions::kEmpty, memory_allocator);
+  Status s = block_fetcher.ReadBlockContents();
+  if (!s.ok()) {
+    return s;
+  }
+
+  // Unfortunately, Block::size() might not equal block_contents.data.size(),
+  // and Block hides block_contents
+  uint64_t block_size = block_contents.data.size();
+  Block properties_block(std::move(block_contents));
+  std::unique_ptr<MetaBlockIter> iter(properties_block.NewMetaIterator());
+
+  std::unique_ptr<TableProperties> new_table_properties{new TableProperties};
+  // All pre-defined properties of type uint64_t
+  std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = {
+      {TablePropertiesNames::kOriginalFileNumber,
+       &new_table_properties->orig_file_number},
+      {TablePropertiesNames::kDataSize, &new_table_properties->data_size},
+      {TablePropertiesNames::kIndexSize, &new_table_properties->index_size},
+      {TablePropertiesNames::kIndexPartitions,
+       &new_table_properties->index_partitions},
+      {TablePropertiesNames::kTopLevelIndexSize,
+       &new_table_properties->top_level_index_size},
+      {TablePropertiesNames::kIndexKeyIsUserKey,
+       &new_table_properties->index_key_is_user_key},
+      {TablePropertiesNames::kIndexValueIsDeltaEncoded,
+       &new_table_properties->index_value_is_delta_encoded},
+      {TablePropertiesNames::kFilterSize, &new_table_properties->filter_size},
+      {TablePropertiesNames::kRawKeySize, &new_table_properties->raw_key_size},
+      {TablePropertiesNames::kRawValueSize,
+       &new_table_properties->raw_value_size},
+      {TablePropertiesNames::kNumDataBlocks,
+       &new_table_properties->num_data_blocks},
+      {TablePropertiesNames::kNumEntries, &new_table_properties->num_entries},
+      {TablePropertiesNames::kNumFilterEntries,
+       &new_table_properties->num_filter_entries},
+      {TablePropertiesNames::kDeletedKeys,
+       &new_table_properties->num_deletions},
+      {TablePropertiesNames::kMergeOperands,
+       &new_table_properties->num_merge_operands},
+      {TablePropertiesNames::kNumRangeDeletions,
+       &new_table_properties->num_range_deletions},
+      {TablePropertiesNames::kFormatVersion,
+       &new_table_properties->format_version},
+      {TablePropertiesNames::kFixedKeyLen,
+       &new_table_properties->fixed_key_len},
+      {TablePropertiesNames::kColumnFamilyId,
+       &new_table_properties->column_family_id},
+      {TablePropertiesNames::kCreationTime,
+       &new_table_properties->creation_time},
+      {TablePropertiesNames::kOldestKeyTime,
+       &new_table_properties->oldest_key_time},
+      {TablePropertiesNames::kFileCreationTime,
+       &new_table_properties->file_creation_time},
+      {TablePropertiesNames::kSlowCompressionEstimatedDataSize,
+       &new_table_properties->slow_compression_estimated_data_size},
+      {TablePropertiesNames::kFastCompressionEstimatedDataSize,
+       &new_table_properties->fast_compression_estimated_data_size},
+      {TablePropertiesNames::kTailStartOffset,
+       &new_table_properties->tail_start_offset},
+  };
+
+  std::string last_key;
+  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+    s = iter->status();
+    if (!s.ok()) {
+      break;
+    }
+
+    auto key = iter->key().ToString();
+    // properties block should be strictly sorted with no duplicate key.
+    if (!last_key.empty() &&
+        BytewiseComparator()->Compare(key, last_key) <= 0) {
+      s = Status::Corruption("properties unsorted");
+      break;
+    }
+    last_key = key;
+
+    auto raw_val = iter->value();
+    auto pos = predefined_uint64_properties.find(key);
+
+    if (key == ExternalSstFilePropertyNames::kGlobalSeqno) {
+      new_table_properties->external_sst_file_global_seqno_offset =
+          handle.offset() + iter->ValueOffset();
+    }
+
+    if (pos != predefined_uint64_properties.end()) {
+      if (key == TablePropertiesNames::kDeletedKeys ||
+          key == TablePropertiesNames::kMergeOperands) {
+        // Insert in user-collected properties for API backwards compatibility
+        new_table_properties->user_collected_properties.insert(
+            {key, raw_val.ToString()});
+      }
+      // handle predefined rocksdb properties
+      uint64_t val;
+      if (!GetVarint64(&raw_val, &val)) {
+        // skip malformed value
+        auto error_msg =
+            "Detect malformed value in properties meta-block:"
+            "\tkey: " +
+            key + "\tval: " + raw_val.ToString();
+        ROCKS_LOG_ERROR(ioptions.logger, "%s", error_msg.c_str());
+        continue;
+      }
+      *(pos->second) = val;
+    } else if (key == TablePropertiesNames::kDbId) {
+      new_table_properties->db_id = raw_val.ToString();
+    } else if (key == TablePropertiesNames::kDbSessionId) {
+      new_table_properties->db_session_id = raw_val.ToString();
+    } else if (key == TablePropertiesNames::kDbHostId) {
+      new_table_properties->db_host_id = raw_val.ToString();
+    } else if (key == TablePropertiesNames::kFilterPolicy) {
+      new_table_properties->filter_policy_name = raw_val.ToString();
+    } else if (key == TablePropertiesNames::kColumnFamilyName) {
+      new_table_properties->column_family_name = raw_val.ToString();
+    } else if (key == TablePropertiesNames::kComparator) {
+      new_table_properties->comparator_name = raw_val.ToString();
+    } else if (key == TablePropertiesNames::kMergeOperator) {
+      new_table_properties->merge_operator_name = raw_val.ToString();
+    } else if (key == TablePropertiesNames::kPrefixExtractorName) {
+      new_table_properties->prefix_extractor_name = raw_val.ToString();
+    } else if (key == TablePropertiesNames::kPropertyCollectors) {
+      new_table_properties->property_collectors_names = raw_val.ToString();
+    } else if (key == TablePropertiesNames::kCompression) {
+      new_table_properties->compression_name = raw_val.ToString();
+    } else if (key == TablePropertiesNames::kCompressionOptions) {
+      new_table_properties->compression_options = raw_val.ToString();
+    } else if (key == TablePropertiesNames::kSequenceNumberTimeMapping) {
+      new_table_properties->seqno_to_time_mapping = raw_val.ToString();
+    } else {
+      // handle user-collected properties
+      new_table_properties->user_collected_properties.insert(
+          {key, raw_val.ToString()});
+    }
+  }
+
+  // Modified version of BlockFetcher checksum verification
+  // (See write_global_seqno comment above)
+  if (s.ok() && footer.GetBlockTrailerSize() > 0) {
+    s = VerifyBlockChecksum(footer.checksum_type(), properties_block.data(),
+                            block_size, file->file_name(), handle.offset());
+    if (s.IsCorruption()) {
+      if (new_table_properties->external_sst_file_global_seqno_offset != 0) {
+        std::string tmp_buf(properties_block.data(),
+                            block_fetcher.GetBlockSizeWithTrailer());
+        uint64_t global_seqno_offset =
+            new_table_properties->external_sst_file_global_seqno_offset -
+            handle.offset();
+        EncodeFixed64(&tmp_buf[static_cast<size_t>(global_seqno_offset)], 0);
+        s = VerifyBlockChecksum(footer.checksum_type(), tmp_buf.data(),
+                                block_size, file->file_name(), handle.offset());
+      }
+    }
+  }
+
+  if (s.ok()) {
+    *table_properties = std::move(new_table_properties);
+  }
+
+  return s;
+}
+
+Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
+                           uint64_t table_magic_number,
+                           const ImmutableOptions& ioptions,
+                           const ReadOptions& read_options,
+                           std::unique_ptr<TableProperties>* properties,
+                           MemoryAllocator* memory_allocator,
+                           FilePrefetchBuffer* prefetch_buffer) {
+  BlockHandle block_handle;
+  Footer footer;
+  Status s =
+      FindMetaBlockInFile(file, file_size, table_magic_number, ioptions,
+                          read_options, kPropertiesBlockName, &block_handle,
+                          memory_allocator, prefetch_buffer, &footer);
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (!block_handle.IsNull()) {
+    s = ReadTablePropertiesHelper(read_options, block_handle, file,
+                                  prefetch_buffer, footer, ioptions, properties,
+                                  memory_allocator);
+  } else {
+    s = Status::NotFound();
+  }
+  return s;
+}
+
+Status FindOptionalMetaBlock(InternalIterator* meta_index_iter,
+                             const std::string& meta_block_name,
+                             BlockHandle* block_handle) {
+  assert(block_handle != nullptr);
+  meta_index_iter->Seek(meta_block_name);
+  if (meta_index_iter->status().ok()) {
+    if (meta_index_iter->Valid() && meta_index_iter->key() == meta_block_name) {
+      Slice v = meta_index_iter->value();
+      return block_handle->DecodeFrom(&v);
+    } else if (meta_block_name == kPropertiesBlockName) {
+      // Have to try old name for compatibility
+      meta_index_iter->Seek(kPropertiesBlockOldName);
+      if (meta_index_iter->status().ok() && meta_index_iter->Valid() &&
+          meta_index_iter->key() == kPropertiesBlockOldName) {
+        Slice v = meta_index_iter->value();
+        return block_handle->DecodeFrom(&v);
+      }
+    }
+  }
+  // else
+  *block_handle = BlockHandle::NullBlockHandle();
+  return meta_index_iter->status();
+}
+
+Status FindMetaBlock(InternalIterator* meta_index_iter,
+                     const std::string& meta_block_name,
+                     BlockHandle* block_handle) {
+  Status s =
+      FindOptionalMetaBlock(meta_index_iter, meta_block_name, block_handle);
+  if (s.ok() && block_handle->IsNull()) {
+    return Status::Corruption("Cannot find the meta block", meta_block_name);
+  } else {
+    return s;
+  }
+}
+
+Status ReadMetaIndexBlockInFile(RandomAccessFileReader* file,
+                                uint64_t file_size, uint64_t table_magic_number,
+                                const ImmutableOptions& ioptions,
+                                const ReadOptions& read_options,
+                                BlockContents* metaindex_contents,
+                                MemoryAllocator* memory_allocator,
+                                FilePrefetchBuffer* prefetch_buffer,
+                                Footer* footer_out) {
+  Footer footer;
+  IOOptions opts;
+  Status s;
+  s = file->PrepareIOOptions(read_options, opts);
+  if (!s.ok()) {
+    return s;
+  }
+  s = ReadFooterFromFile(opts, file, *ioptions.fs, prefetch_buffer, file_size,
+                         &footer, table_magic_number);
+  if (!s.ok()) {
+    return s;
+  }
+  if (footer_out) {
+    *footer_out = footer;
+  }
+
+  auto metaindex_handle = footer.metaindex_handle();
+  return BlockFetcher(file, prefetch_buffer, footer, read_options,
+                      metaindex_handle, metaindex_contents, ioptions,
+                      false /* do decompression */, false /*maybe_compressed*/,
+                      BlockType::kMetaIndex, UncompressionDict::GetEmptyDict(),
+                      PersistentCacheOptions::kEmpty, memory_allocator)
+      .ReadBlockContents();
+}
+
+Status FindMetaBlockInFile(
+    RandomAccessFileReader* file, uint64_t file_size,
+    uint64_t table_magic_number, const ImmutableOptions& ioptions,
+    const ReadOptions& read_options, const std::string& meta_block_name,
+    BlockHandle* block_handle, MemoryAllocator* memory_allocator,
+    FilePrefetchBuffer* prefetch_buffer, Footer* footer_out) {
+  BlockContents metaindex_contents;
+  auto s = ReadMetaIndexBlockInFile(
+      file, file_size, table_magic_number, ioptions, read_options,
+      &metaindex_contents, memory_allocator, prefetch_buffer, footer_out);
+  if (!s.ok()) {
+    return s;
+  }
+  // meta blocks are never compressed. Need to add uncompress logic if we are to
+  // compress it.
+  Block metaindex_block(std::move(metaindex_contents));
+
+  std::unique_ptr<InternalIterator> meta_iter;
+  meta_iter.reset(metaindex_block.NewMetaIterator());
+
+  return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle);
+}
+
+Status ReadMetaBlock(RandomAccessFileReader* file,
+                     FilePrefetchBuffer* prefetch_buffer, uint64_t file_size,
+                     uint64_t table_magic_number,
+                     const ImmutableOptions& ioptions,
+                     const ReadOptions& read_options,
+                     const std::string& meta_block_name, BlockType block_type,
+                     BlockContents* contents,
+                     MemoryAllocator* memory_allocator) {
+  // TableProperties requires special handling because of checksum issues.
+  // Call ReadTableProperties instead for that case.
+  assert(block_type != BlockType::kProperties);
+
+  BlockHandle block_handle;
+  Footer footer;
+  Status status =
+      FindMetaBlockInFile(file, file_size, table_magic_number, ioptions,
+                          read_options, meta_block_name, &block_handle,
+                          memory_allocator, prefetch_buffer, &footer);
+  if (!status.ok()) {
+    return status;
+  }
+
+  return BlockFetcher(file, prefetch_buffer, footer, read_options, block_handle,
+                      contents, ioptions, false /* decompress */,
+                      false /*maybe_compressed*/, block_type,
+                      UncompressionDict::GetEmptyDict(),
+                      PersistentCacheOptions::kEmpty, memory_allocator)
+      .ReadBlockContents();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/meta_blocks.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/meta_blocks.h
new file mode 100644
index 0000000000..962a316388
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/meta_blocks.h
@@ -0,0 +1,172 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "db/builder.h"
+#include "db/table_properties_collector.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/memory_allocator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "table/block_based/block_builder.h"
+#include "table/block_based/block_type.h"
+#include "table/format.h"
+#include "util/kv_map.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class BlockBuilder;
+class BlockHandle;
+class Env;
+class Footer;
+class Logger;
+class RandomAccessFile;
+struct TableProperties;
+
+// Meta block names for metaindex
+extern const std::string kPropertiesBlockName;
+extern const std::string kPropertiesBlockOldName;
+extern const std::string kCompressionDictBlockName;
+extern const std::string kRangeDelBlockName;
+
+class MetaIndexBuilder {
+ public:
+  MetaIndexBuilder(const MetaIndexBuilder&) = delete;
+  MetaIndexBuilder& operator=(const MetaIndexBuilder&) = delete;
+
+  MetaIndexBuilder();
+  void Add(const std::string& key, const BlockHandle& handle);
+
+  // Write all the added key/value pairs to the block and return the contents
+  // of the block.
+  Slice Finish();
+
+ private:
+  // store the sorted key/handle of the metablocks.
+  stl_wrappers::KVMap meta_block_handles_;
+  std::unique_ptr<BlockBuilder> meta_index_block_;
+};
+
+class PropertyBlockBuilder {
+ public:
+  PropertyBlockBuilder(const PropertyBlockBuilder&) = delete;
+  PropertyBlockBuilder& operator=(const PropertyBlockBuilder&) = delete;
+
+  PropertyBlockBuilder();
+
+  void AddTableProperty(const TableProperties& props);
+  void Add(const std::string& key, uint64_t value);
+  void Add(const std::string& key, const std::string& value);
+  void Add(const UserCollectedProperties& user_collected_properties);
+
+  // Write all the added entries to the block and return the block contents
+  Slice Finish();
+
+ private:
+  std::unique_ptr<BlockBuilder> properties_block_;
+  stl_wrappers::KVMap props_;
+};
+
+// Were we encounter any error occurs during user-defined statistics collection,
+// we'll write the warning message to info log.
+void LogPropertiesCollectionError(Logger* info_log, const std::string& method,
+                                  const std::string& name);
+
+// Utility functions help table builder to trigger batch events for user
+// defined property collectors.
+// Return value indicates if there is any error occurred; if error occurred,
+// the warning message will be logged.
+// NotifyCollectTableCollectorsOnAdd() triggers the `Add` event for all
+// property collectors.
+bool NotifyCollectTableCollectorsOnAdd(
+    const Slice& key, const Slice& value, uint64_t file_size,
+    const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
+    Logger* info_log);
+
+void NotifyCollectTableCollectorsOnBlockAdd(
+    const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
+    uint64_t block_uncomp_bytes, uint64_t block_compressed_bytes_fast,
+    uint64_t block_compressed_bytes_slow);
+
+// NotifyCollectTableCollectorsOnFinish() triggers the `Finish` event for all
+// property collectors. The collected properties will be added to `builder`.
+bool NotifyCollectTableCollectorsOnFinish(
+    const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
+    Logger* info_log, PropertyBlockBuilder* builder);
+
+// Read table properties from a file using known BlockHandle.
+// @returns a status to indicate if the operation succeeded. On success,
+//          *table_properties will point to a heap-allocated TableProperties
+//          object, otherwise value of `table_properties` will not be modified.
+Status ReadTablePropertiesHelper(
+    const ReadOptions& ro, const BlockHandle& handle,
+    RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer,
+    const Footer& footer, const ImmutableOptions& ioptions,
+    std::unique_ptr<TableProperties>* table_properties,
+    MemoryAllocator* memory_allocator = nullptr);
+
+// Read table properties from the properties block of a plain table.
+// @returns a status to indicate if the operation succeeded. On success,
+//          *table_properties will point to a heap-allocated TableProperties
+//          object, otherwise value of `table_properties` will not be modified.
+Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
+                           uint64_t table_magic_number,
+                           const ImmutableOptions& ioptions,
+                           const ReadOptions& read_options,
+                           std::unique_ptr<TableProperties>* properties,
+                           MemoryAllocator* memory_allocator = nullptr,
+                           FilePrefetchBuffer* prefetch_buffer = nullptr);
+
+// Find the meta block from the meta index block. Returns OK and
+// block_handle->IsNull() if not found.
+Status FindOptionalMetaBlock(InternalIterator* meta_index_iter,
+                             const std::string& meta_block_name,
+                             BlockHandle* block_handle);
+
+// Find the meta block from the meta index block. Returns Corruption if not
+// found.
+Status FindMetaBlock(InternalIterator* meta_index_iter,
+                     const std::string& meta_block_name,
+                     BlockHandle* block_handle);
+
+// Find the meta block
+Status FindMetaBlockInFile(RandomAccessFileReader* file, uint64_t file_size,
+                           uint64_t table_magic_number,
+                           const ImmutableOptions& ioptions,
+                           const ReadOptions& read_options,
+                           const std::string& meta_block_name,
+                           BlockHandle* block_handle,
+                           MemoryAllocator* memory_allocator = nullptr,
+                           FilePrefetchBuffer* prefetch_buffer = nullptr,
+                           Footer* footer_out = nullptr);
+
+// Read meta block contents
+Status ReadMetaIndexBlockInFile(RandomAccessFileReader* file,
+                                uint64_t file_size, uint64_t table_magic_number,
+                                const ImmutableOptions& ioptions,
+                                const ReadOptions& read_options,
+                                BlockContents* block_contents,
+                                MemoryAllocator* memory_allocator = nullptr,
+                                FilePrefetchBuffer* prefetch_buffer = nullptr,
+                                Footer* footer_out = nullptr);
+
+// Read the specified meta block with name meta_block_name
+// from `file` and initialize `contents` with contents of this block.
+// Return Status::OK in case of success.
+Status ReadMetaBlock(RandomAccessFileReader* file,
+                     FilePrefetchBuffer* prefetch_buffer, uint64_t file_size,
+                     uint64_t table_magic_number,
+                     const ImmutableOptions& ioptions,
+                     const ReadOptions& read_options,
+                     const std::string& meta_block_name, BlockType block_type,
+                     BlockContents* contents,
+                     MemoryAllocator* memory_allocator = nullptr);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/mock_table.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/mock_table.h
new file mode 100644
index 0000000000..e4850d0606
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/mock_table.h
@@ -0,0 +1,94 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <algorithm>
+#include <atomic>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+
+#include "db/version_edit.h"
+#include "port/port.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/io_status.h"
+#include "rocksdb/table.h"
+#include "table/internal_iterator.h"
+#include "table/table_builder.h"
+#include "table/table_reader.h"
+#include "test_util/testharness.h"
+#include "test_util/testutil.h"
+#include "util/kv_map.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace mock {
+using KVPair = std::pair<std::string, std::string>;
+using KVVector = std::vector<KVPair>;
+
+KVVector MakeMockFile(std::initializer_list<KVPair> l = {});
+void SortKVVector(KVVector* kv_vector,
+                  const Comparator* ucmp = BytewiseComparator());
+
+struct MockTableFileSystem {
+  port::Mutex mutex;
+  std::map<uint32_t, KVVector> files;
+};
+
+class MockTableFactory : public TableFactory {
+ public:
+  enum MockCorruptionMode {
+    kCorruptNone,
+    kCorruptKey,
+    kCorruptValue,
+    kCorruptReorderKey,
+  };
+
+  MockTableFactory();
+  static const char* kClassName() { return "MockTable"; }
+  const char* Name() const override { return kClassName(); }
+  using TableFactory::NewTableReader;
+  Status NewTableReader(
+      const ReadOptions& ro, const TableReaderOptions& table_reader_options,
+      std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
+      std::unique_ptr<TableReader>* table_reader,
+      bool prefetch_index_and_filter_in_cache = true) const override;
+  TableBuilder* NewTableBuilder(
+      const TableBuilderOptions& table_builder_options,
+      WritableFileWriter* file) const override;
+
+  // This function will directly create mock table instead of going through
+  // MockTableBuilder. file_contents has to have a format of <internal_key,
+  // value>. Those key-value pairs will then be inserted into the mock table.
+  Status CreateMockTable(Env* env, const std::string& fname,
+                         KVVector file_contents);
+
+  virtual std::string GetPrintableOptions() const override {
+    return std::string();
+  }
+
+  void SetCorruptionMode(MockCorruptionMode mode) { corrupt_mode_ = mode; }
+
+  void SetKeyValueSize(size_t size) { key_value_size_ = size; }
+  // This function will assert that only a single file exists and that the
+  // contents are equal to file_contents
+  void AssertSingleFile(const KVVector& file_contents);
+  void AssertLatestFiles(const std::vector<KVVector>& files_contents);
+
+ private:
+  Status GetAndWriteNextID(WritableFileWriter* file, uint32_t* id) const;
+  Status GetIDFromFile(RandomAccessFileReader* file, uint32_t* id) const;
+
+  mutable MockTableFileSystem file_system_;
+  mutable std::atomic<uint32_t> next_id_;
+  MockCorruptionMode corrupt_mode_;
+
+  size_t key_value_size_ = 1;
+};
+
+}  // namespace mock
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/multiget_context.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/multiget_context.h
new file mode 100644
index 0000000000..54af2262cb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/multiget_context.h
@@ -0,0 +1,405 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <algorithm>
+#include <array>
+#include <string>
+
+#include "db/dbformat.h"
+#include "db/lookup_key.h"
+#include "db/merge_context.h"
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/types.h"
+#include "util/async_file_reader.h"
+#include "util/autovector.h"
+#include "util/math.h"
+#include "util/single_thread_executor.h"
+
+namespace ROCKSDB_NAMESPACE {
+class GetContext;
+class PinnableWideColumns;
+
+struct KeyContext {
+  const Slice* key;
+  LookupKey* lkey;
+  Slice ukey_with_ts;
+  Slice ukey_without_ts;
+  Slice ikey;
+  ColumnFamilyHandle* column_family;
+  Status* s;
+  MergeContext merge_context;
+  SequenceNumber max_covering_tombstone_seq;
+  bool key_exists;
+  bool is_blob_index;
+  void* cb_arg;
+  PinnableSlice* value;
+  PinnableWideColumns* columns;
+  std::string* timestamp;
+  GetContext* get_context;
+
+  KeyContext(ColumnFamilyHandle* col_family, const Slice& user_key,
+             PinnableSlice* val, PinnableWideColumns* cols, std::string* ts,
+             Status* stat)
+      : key(&user_key),
+        lkey(nullptr),
+        column_family(col_family),
+        s(stat),
+        max_covering_tombstone_seq(0),
+        key_exists(false),
+        is_blob_index(false),
+        cb_arg(nullptr),
+        value(val),
+        columns(cols),
+        timestamp(ts),
+        get_context(nullptr) {}
+};
+
+// The MultiGetContext class is a container for the sorted list of keys that
+// we need to lookup in a batch. Its main purpose is to make batch execution
+// easier by allowing various stages of the MultiGet lookups to operate on
+// subsets of keys, potentially non-contiguous. In order to accomplish this,
+// it defines the following classes -
+//
+// MultiGetContext::Range
+// MultiGetContext::Range::Iterator
+// MultiGetContext::Range::IteratorWrapper
+//
+// Here is an example of how this can be used -
+//
+// {
+//    MultiGetContext ctx(...);
+//    MultiGetContext::Range range = ctx.GetMultiGetRange();
+//
+//    // Iterate to determine some subset of the keys
+//    MultiGetContext::Range::Iterator start = range.begin();
+//    MultiGetContext::Range::Iterator end = ...;
+//
+//    // Make a new range with a subset of keys
+//    MultiGetContext::Range subrange(range, start, end);
+//
+//    // Define an auxillary vector, if needed, to hold additional data for
+//    // each key
+//    std::array<Foo, MultiGetContext::MAX_BATCH_SIZE> aux;
+//
+//    // Iterate over the subrange and the auxillary vector simultaneously
+//    MultiGetContext::Range::Iterator iter = subrange.begin();
+//    for (; iter != subrange.end(); ++iter) {
+//      KeyContext& key = *iter;
+//      Foo& aux_key = aux_iter[iter.index()];
+//      ...
+//    }
+//  }
+class MultiGetContext {
+ public:
+  // Limit the number of keys in a batch to this number. Benchmarks show that
+  // there is negligible benefit for batches exceeding this. Keeping this < 32
+  // simplifies iteration, as well as reduces the amount of stack allocations
+  // that need to be performed
+  static const int MAX_BATCH_SIZE = 32;
+
+  // A bitmask of at least MAX_BATCH_SIZE - 1 bits, so that
+  // Mask{1} << MAX_BATCH_SIZE is well defined
+  using Mask = uint64_t;
+  static_assert(MAX_BATCH_SIZE < sizeof(Mask) * 8);
+
+  MultiGetContext(autovector<KeyContext*, MAX_BATCH_SIZE>* sorted_keys,
+                  size_t begin, size_t num_keys, SequenceNumber snapshot,
+                  const ReadOptions& read_opts, FileSystem* fs,
+                  Statistics* stats)
+      : num_keys_(num_keys),
+        value_mask_(0),
+        value_size_(0),
+        lookup_key_ptr_(reinterpret_cast<LookupKey*>(lookup_key_stack_buf))
+#if USE_COROUTINES
+        ,
+        reader_(fs, stats),
+        executor_(reader_)
+#endif  // USE_COROUTINES
+  {
+    (void)fs;
+    (void)stats;
+    assert(num_keys <= MAX_BATCH_SIZE);
+    if (num_keys > MAX_LOOKUP_KEYS_ON_STACK) {
+      lookup_key_heap_buf.reset(new char[sizeof(LookupKey) * num_keys]);
+      lookup_key_ptr_ = reinterpret_cast<LookupKey*>(lookup_key_heap_buf.get());
+    }
+
+    for (size_t iter = 0; iter != num_keys_; ++iter) {
+      // autovector may not be contiguous storage, so make a copy
+      sorted_keys_[iter] = (*sorted_keys)[begin + iter];
+      sorted_keys_[iter]->lkey = new (&lookup_key_ptr_[iter])
+          LookupKey(*sorted_keys_[iter]->key, snapshot, read_opts.timestamp);
+      sorted_keys_[iter]->ukey_with_ts = sorted_keys_[iter]->lkey->user_key();
+      sorted_keys_[iter]->ukey_without_ts = StripTimestampFromUserKey(
+          sorted_keys_[iter]->lkey->user_key(),
+          read_opts.timestamp == nullptr ? 0 : read_opts.timestamp->size());
+      sorted_keys_[iter]->ikey = sorted_keys_[iter]->lkey->internal_key();
+      sorted_keys_[iter]->timestamp = (*sorted_keys)[begin + iter]->timestamp;
+      sorted_keys_[iter]->get_context =
+          (*sorted_keys)[begin + iter]->get_context;
+    }
+  }
+
+  ~MultiGetContext() {
+    for (size_t i = 0; i < num_keys_; ++i) {
+      lookup_key_ptr_[i].~LookupKey();
+    }
+  }
+
+#if USE_COROUTINES
+  SingleThreadExecutor& executor() { return executor_; }
+
+  AsyncFileReader& reader() { return reader_; }
+#endif  // USE_COROUTINES
+
+ private:
+  static const int MAX_LOOKUP_KEYS_ON_STACK = 16;
+  alignas(
+      alignof(LookupKey)) char lookup_key_stack_buf[sizeof(LookupKey) *
+                                                    MAX_LOOKUP_KEYS_ON_STACK];
+  std::array<KeyContext*, MAX_BATCH_SIZE> sorted_keys_;
+  size_t num_keys_;
+  Mask value_mask_;
+  uint64_t value_size_;
+  std::unique_ptr<char[]> lookup_key_heap_buf;
+  LookupKey* lookup_key_ptr_;
+#if USE_COROUTINES
+  AsyncFileReader reader_;
+  SingleThreadExecutor executor_;
+#endif  // USE_COROUTINES
+
+ public:
+  // MultiGetContext::Range - Specifies a range of keys, by start and end index,
+  // from the parent MultiGetContext. Each range contains a bit vector that
+  // indicates whether the corresponding keys need to be processed or skipped.
+  // A Range object can be copy constructed, and the new object inherits the
+  // original Range's bit vector. This is useful for progressively skipping
+  // keys as the lookup goes through various stages. For example, when looking
+  // up keys in the same SST file, a Range is created excluding keys not
+  // belonging to that file. A new Range is then copy constructed and individual
+  // keys are skipped based on bloom filter lookup.
+  class Range {
+   public:
+    // MultiGetContext::Range::Iterator - A forward iterator that iterates over
+    // non-skippable keys in a Range, as well as keys whose final value has been
+    // found. The latter is tracked by MultiGetContext::value_mask_
+    class Iterator {
+     public:
+      // -- iterator traits
+      using self_type = Iterator;
+      using value_type = KeyContext;
+      using reference = KeyContext&;
+      using pointer = KeyContext*;
+      using difference_type = int;
+      using iterator_category = std::forward_iterator_tag;
+
+      Iterator(const Range* range, size_t idx)
+          : range_(range), ctx_(range->ctx_), index_(idx) {
+        while (index_ < range_->end_ &&
+               (Mask{1} << index_) &
+                   (range_->ctx_->value_mask_ | range_->skip_mask_ |
+                    range_->invalid_mask_))
+          index_++;
+      }
+
+      Iterator(const Iterator&) = default;
+
+      Iterator(const Iterator& other, const Range* range)
+          : range_(range), ctx_(other.ctx_), index_(other.index_) {
+        assert(range->ctx_ == other.ctx_);
+      }
+      Iterator& operator=(const Iterator&) = default;
+
+      Iterator& operator++() {
+        while (++index_ < range_->end_ &&
+               (Mask{1} << index_) &
+                   (range_->ctx_->value_mask_ | range_->skip_mask_ |
+                    range_->invalid_mask_))
+          ;
+        return *this;
+      }
+
+      bool operator==(Iterator other) const {
+        assert(range_->ctx_ == other.range_->ctx_);
+        return index_ == other.index_;
+      }
+
+      bool operator!=(Iterator other) const {
+        assert(range_->ctx_ == other.range_->ctx_);
+        return index_ != other.index_;
+      }
+
+      KeyContext& operator*() {
+        assert(index_ < range_->end_ && index_ >= range_->start_);
+        return *(ctx_->sorted_keys_[index_]);
+      }
+
+      KeyContext* operator->() {
+        assert(index_ < range_->end_ && index_ >= range_->start_);
+        return ctx_->sorted_keys_[index_];
+      }
+
+      size_t index() { return index_; }
+
+     private:
+      friend Range;
+      const Range* range_;
+      const MultiGetContext* ctx_;
+      size_t index_;
+    };
+
+    Range(const Range& mget_range, const Iterator& first,
+          const Iterator& last) {
+      ctx_ = mget_range.ctx_;
+      if (first == last) {
+        // This means create an empty range based on mget_range. So just
+        // set start_ and and_ to the same value
+        start_ = mget_range.start_;
+        end_ = start_;
+      } else {
+        start_ = first.index_;
+        end_ = last.index_;
+      }
+      skip_mask_ = mget_range.skip_mask_;
+      invalid_mask_ = mget_range.invalid_mask_;
+      assert(start_ < 64);
+      assert(end_ < 64);
+    }
+
+    Range() = default;
+
+    Iterator begin() const { return Iterator(this, start_); }
+
+    Iterator end() const { return Iterator(this, end_); }
+
+    bool empty() const { return RemainingMask() == 0; }
+
+    void SkipIndex(size_t index) { skip_mask_ |= Mask{1} << index; }
+
+    void SkipKey(const Iterator& iter) { SkipIndex(iter.index_); }
+
+    bool IsKeySkipped(const Iterator& iter) const {
+      return skip_mask_ & (Mask{1} << iter.index_);
+    }
+
+    // Update the value_mask_ in MultiGetContext so its
+    // immediately reflected in all the Range Iterators
+    void MarkKeyDone(Iterator& iter) {
+      ctx_->value_mask_ |= (Mask{1} << iter.index_);
+    }
+
+    bool CheckKeyDone(Iterator& iter) const {
+      return ctx_->value_mask_ & (Mask{1} << iter.index_);
+    }
+
+    uint64_t KeysLeft() const { return BitsSetToOne(RemainingMask()); }
+
+    void AddSkipsFrom(const Range& other) {
+      assert(ctx_ == other.ctx_);
+      skip_mask_ |= other.skip_mask_;
+    }
+
+    uint64_t GetValueSize() { return ctx_->value_size_; }
+
+    void AddValueSize(uint64_t value_size) { ctx_->value_size_ += value_size; }
+
+    MultiGetContext* context() const { return ctx_; }
+
+    Range Suffix(const Range& other) const {
+      size_t other_last = other.FindLastRemaining();
+      size_t my_last = FindLastRemaining();
+
+      if (my_last > other_last) {
+        return Range(*this, Iterator(this, other_last),
+                     Iterator(this, my_last));
+      } else {
+        return Range(*this, begin(), begin());
+      }
+    }
+
+    // The += operator expands the number of keys in this range. The expansion
+    // is always to the right, i.e start of the additional range >= end of
+    // current range. There should be no overlap. Any skipped keys in rhs are
+    // marked as invalid in the invalid_mask_.
+    Range& operator+=(const Range& rhs) {
+      assert(rhs.start_ >= end_);
+      // Check for non-overlapping ranges and adjust invalid_mask_ accordingly
+      if (end_ < rhs.start_) {
+        invalid_mask_ |= RangeMask(end_, rhs.start_);
+        skip_mask_ |= RangeMask(end_, rhs.start_);
+      }
+      start_ = std::min<size_t>(start_, rhs.start_);
+      end_ = std::max<size_t>(end_, rhs.end_);
+      skip_mask_ |= rhs.skip_mask_ & RangeMask(rhs.start_, rhs.end_);
+      invalid_mask_ |= (rhs.invalid_mask_ | rhs.skip_mask_) &
+                       RangeMask(rhs.start_, rhs.end_);
+      assert(start_ < 64);
+      assert(end_ < 64);
+      return *this;
+    }
+
+    // The -= operator removes keys from this range. The removed keys should
+    // come from a range completely overlapping the current range. The removed
+    // keys are marked invalid in the invalid_mask_.
+    Range& operator-=(const Range& rhs) {
+      assert(start_ <= rhs.start_ && end_ >= rhs.end_);
+      skip_mask_ |= (~rhs.skip_mask_ | rhs.invalid_mask_) &
+                    RangeMask(rhs.start_, rhs.end_);
+      invalid_mask_ |= (~rhs.skip_mask_ | rhs.invalid_mask_) &
+                       RangeMask(rhs.start_, rhs.end_);
+      return *this;
+    }
+
+    // Return a complement of the current range
+    Range operator~() {
+      Range res = *this;
+      res.skip_mask_ = ~skip_mask_ & RangeMask(start_, end_);
+      return res;
+    }
+
+   private:
+    friend MultiGetContext;
+    MultiGetContext* ctx_;
+    size_t start_;
+    size_t end_;
+    Mask skip_mask_;
+    Mask invalid_mask_;
+
+    Range(MultiGetContext* ctx, size_t num_keys)
+        : ctx_(ctx),
+          start_(0),
+          end_(num_keys),
+          skip_mask_(0),
+          invalid_mask_(0) {
+      assert(num_keys < 64);
+    }
+
+    static Mask RangeMask(size_t start, size_t end) {
+      return (((Mask{1} << (end - start)) - 1) << start);
+    }
+
+    Mask RemainingMask() const {
+      return (((Mask{1} << end_) - 1) & ~((Mask{1} << start_) - 1) &
+              ~(ctx_->value_mask_ | skip_mask_));
+    }
+
+    size_t FindLastRemaining() const {
+      Mask mask = RemainingMask();
+      size_t index = (mask >>= start_) ? start_ : 0;
+      while (mask >>= 1) {
+        index++;
+      }
+      return index;
+    }
+  };
+
+  // Return the initial range that encompasses all the keys in the batch
+  Range GetMultiGetRange() { return Range(this, num_keys_); }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/persistent_cache_helper.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/persistent_cache_helper.cc
new file mode 100644
index 0000000000..eece8100e6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/persistent_cache_helper.cc
@@ -0,0 +1,111 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "table/persistent_cache_helper.h"
+
+#include "table/block_based/block_based_table_reader.h"
+#include "table/format.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const PersistentCacheOptions PersistentCacheOptions::kEmpty;
+
+void PersistentCacheHelper::InsertSerialized(
+    const PersistentCacheOptions& cache_options, const BlockHandle& handle,
+    const char* data, const size_t size) {
+  assert(cache_options.persistent_cache);
+  assert(cache_options.persistent_cache->IsCompressed());
+
+  CacheKey key =
+      BlockBasedTable::GetCacheKey(cache_options.base_cache_key, handle);
+
+  cache_options.persistent_cache->Insert(key.AsSlice(), data, size)
+      .PermitUncheckedError();
+}
+
+void PersistentCacheHelper::InsertUncompressed(
+    const PersistentCacheOptions& cache_options, const BlockHandle& handle,
+    const BlockContents& contents) {
+  assert(cache_options.persistent_cache);
+  assert(!cache_options.persistent_cache->IsCompressed());
+  // Precondition:
+  // (1) content is cacheable
+  // (2) content is not compressed
+
+  CacheKey key =
+      BlockBasedTable::GetCacheKey(cache_options.base_cache_key, handle);
+
+  cache_options.persistent_cache
+      ->Insert(key.AsSlice(), contents.data.data(), contents.data.size())
+      .PermitUncheckedError();
+  ;
+}
+
+Status PersistentCacheHelper::LookupSerialized(
+    const PersistentCacheOptions& cache_options, const BlockHandle& handle,
+    std::unique_ptr<char[]>* out_data, const size_t expected_data_size) {
+#ifdef NDEBUG
+  (void)expected_data_size;
+#endif
+  assert(cache_options.persistent_cache);
+  assert(cache_options.persistent_cache->IsCompressed());
+
+  CacheKey key =
+      BlockBasedTable::GetCacheKey(cache_options.base_cache_key, handle);
+
+  size_t size;
+  Status s =
+      cache_options.persistent_cache->Lookup(key.AsSlice(), out_data, &size);
+  if (!s.ok()) {
+    // cache miss
+    RecordTick(cache_options.statistics, PERSISTENT_CACHE_MISS);
+    return s;
+  }
+
+  // cache hit
+  // Block-based table is assumed
+  assert(expected_data_size ==
+         handle.size() + BlockBasedTable::kBlockTrailerSize);
+  assert(size == expected_data_size);
+  RecordTick(cache_options.statistics, PERSISTENT_CACHE_HIT);
+  return Status::OK();
+}
+
+Status PersistentCacheHelper::LookupUncompressed(
+    const PersistentCacheOptions& cache_options, const BlockHandle& handle,
+    BlockContents* contents) {
+  assert(cache_options.persistent_cache);
+  assert(!cache_options.persistent_cache->IsCompressed());
+  if (!contents) {
+    // We shouldn't lookup in the cache. Either
+    // (1) Nowhere to store
+    return Status::NotFound();
+  }
+
+  CacheKey key =
+      BlockBasedTable::GetCacheKey(cache_options.base_cache_key, handle);
+
+  std::unique_ptr<char[]> data;
+  size_t size;
+  Status s =
+      cache_options.persistent_cache->Lookup(key.AsSlice(), &data, &size);
+  if (!s.ok()) {
+    // cache miss
+    RecordTick(cache_options.statistics, PERSISTENT_CACHE_MISS);
+    return s;
+  }
+
+  // please note we are potentially comparing compressed data size with
+  // uncompressed data size
+  assert(handle.size() <= size);
+
+  // update stats
+  RecordTick(cache_options.statistics, PERSISTENT_CACHE_HIT);
+  // construct result and return
+  *contents = BlockContents(std::move(data), size);
+  return Status::OK();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/persistent_cache_helper.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/persistent_cache_helper.h
new file mode 100644
index 0000000000..cce4092cde
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/persistent_cache_helper.h
@@ -0,0 +1,46 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <string>
+
+#include "monitoring/statistics_impl.h"
+#include "table/format.h"
+#include "table/persistent_cache_options.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct BlockContents;
+
+// PersistentCacheHelper
+//
+// Encapsulates  some of the helper logic for read and writing from the cache
+class PersistentCacheHelper {
+ public:
+  // Insert block into cache of serialized blocks. Size includes block trailer
+  // (if applicable).
+  static void InsertSerialized(const PersistentCacheOptions& cache_options,
+                               const BlockHandle& handle, const char* data,
+                               const size_t size);
+
+  // Insert block into cache of uncompressed blocks. No block trailer.
+  static void InsertUncompressed(const PersistentCacheOptions& cache_options,
+                                 const BlockHandle& handle,
+                                 const BlockContents& contents);
+
+  // Lookup block from cache of serialized blocks. Size includes block trailer
+  // (if applicable).
+  static Status LookupSerialized(const PersistentCacheOptions& cache_options,
+                                 const BlockHandle& handle,
+                                 std::unique_ptr<char[]>* out_data,
+                                 const size_t expected_data_size);
+
+  // Lookup block from uncompressed cache. No block trailer.
+  static Status LookupUncompressed(const PersistentCacheOptions& cache_options,
+                                   const BlockHandle& handle,
+                                   BlockContents* contents);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/persistent_cache_options.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/persistent_cache_options.h
new file mode 100644
index 0000000000..46f2687979
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/persistent_cache_options.h
@@ -0,0 +1,34 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <string>
+
+#include "cache/cache_key.h"
+#include "monitoring/statistics_impl.h"
+#include "rocksdb/persistent_cache.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// PersistentCacheOptions
+//
+// This describe the caching behavior for page cache
+// This is used to pass the context for caching and the cache handle
+struct PersistentCacheOptions {
+  PersistentCacheOptions() {}
+  explicit PersistentCacheOptions(
+      const std::shared_ptr<PersistentCache>& _persistent_cache,
+      const OffsetableCacheKey& _base_cache_key, Statistics* const _statistics)
+      : persistent_cache(_persistent_cache),
+        base_cache_key(_base_cache_key),
+        statistics(_statistics) {}
+  std::shared_ptr<PersistentCache> persistent_cache;
+  OffsetableCacheKey base_cache_key;
+  Statistics* statistics = nullptr;
+
+  static const PersistentCacheOptions kEmpty;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_bloom.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_bloom.cc
new file mode 100644
index 0000000000..21441f6161
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_bloom.cc
@@ -0,0 +1,78 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "table/plain/plain_table_bloom.h"
+
+#include <algorithm>
+#include <string>
+
+#include "memory/allocator.h"
+#include "util/dynamic_bloom.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+uint32_t GetTotalBitsForLocality(uint32_t total_bits) {
+  uint32_t num_blocks =
+      (total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8);
+
+  // Make num_blocks an odd number to make sure more bits are involved
+  // when determining which block.
+  if (num_blocks % 2 == 0) {
+    num_blocks++;
+  }
+
+  return num_blocks * (CACHE_LINE_SIZE * 8);
+}
+}  // namespace
+
+PlainTableBloomV1::PlainTableBloomV1(uint32_t num_probes)
+    : kTotalBits(0), kNumBlocks(0), kNumProbes(num_probes), data_(nullptr) {}
+
+void PlainTableBloomV1::SetRawData(char* raw_data, uint32_t total_bits,
+                                   uint32_t num_blocks) {
+  data_ = raw_data;
+  kTotalBits = total_bits;
+  kNumBlocks = num_blocks;
+}
+
+void PlainTableBloomV1::SetTotalBits(Allocator* allocator, uint32_t total_bits,
+                                     uint32_t locality,
+                                     size_t huge_page_tlb_size,
+                                     Logger* logger) {
+  kTotalBits = (locality > 0) ? GetTotalBitsForLocality(total_bits)
+                              : (total_bits + 7) / 8 * 8;
+  kNumBlocks = (locality > 0) ? (kTotalBits / (CACHE_LINE_SIZE * 8)) : 0;
+
+  assert(kNumBlocks > 0 || kTotalBits > 0);
+  assert(kNumProbes > 0);
+
+  uint32_t sz = kTotalBits / 8;
+  if (kNumBlocks > 0) {
+    sz += CACHE_LINE_SIZE - 1;
+  }
+  assert(allocator);
+
+  char* raw = allocator->AllocateAligned(sz, huge_page_tlb_size, logger);
+  memset(raw, 0, sz);
+  auto cache_line_offset = reinterpret_cast<uintptr_t>(raw) % CACHE_LINE_SIZE;
+  if (kNumBlocks > 0 && cache_line_offset > 0) {
+    raw += CACHE_LINE_SIZE - cache_line_offset;
+  }
+  data_ = raw;
+}
+
+void BloomBlockBuilder::AddKeysHashes(
+    const std::vector<uint32_t>& keys_hashes) {
+  for (auto hash : keys_hashes) {
+    bloom_.AddHash(hash);
+  }
+}
+
+Slice BloomBlockBuilder::Finish() { return bloom_.GetRawData(); }
+
+const std::string BloomBlockBuilder::kBloomBlock = "kBloomBlock";
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_bloom.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_bloom.h
new file mode 100644
index 0000000000..460e7ec390
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_bloom.h
@@ -0,0 +1,132 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "port/port.h"
+#include "rocksdb/slice.h"
+#include "util/bloom_impl.h"
+#include "util/hash.h"
+#include "util/math.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Slice;
+class Allocator;
+class Logger;
+
+// A legacy Bloom filter implementation used by Plain Table db format, for
+// schema backward compatibility. Not for use in new filter applications.
+class PlainTableBloomV1 {
+ public:
+  // allocator: pass allocator to bloom filter, hence trace the usage of memory
+  // total_bits: fixed total bits for the bloom
+  // num_probes: number of hash probes for a single key
+  // locality:  If positive, optimize for cache line locality, 0 otherwise.
+  // hash_func:  customized hash function
+  // huge_page_tlb_size:  if >0, try to allocate bloom bytes from huge page TLB
+  //                      within this page size. Need to reserve huge pages for
+  //                      it to be allocated, like:
+  //                         sysctl -w vm.nr_hugepages=20
+  //                     See linux doc Documentation/vm/hugetlbpage.txt
+  explicit PlainTableBloomV1(uint32_t num_probes = 6);
+  void SetTotalBits(Allocator* allocator, uint32_t total_bits,
+                    uint32_t locality, size_t huge_page_tlb_size,
+                    Logger* logger);
+
+  ~PlainTableBloomV1() {}
+
+  // Assuming single threaded access to this function.
+  void AddHash(uint32_t hash);
+
+  // Multithreaded access to this function is OK
+  bool MayContainHash(uint32_t hash) const;
+
+  void Prefetch(uint32_t hash);
+
+  uint32_t GetNumBlocks() const { return kNumBlocks; }
+
+  Slice GetRawData() const { return Slice(data_, GetTotalBits() / 8); }
+
+  void SetRawData(char* raw_data, uint32_t total_bits, uint32_t num_blocks = 0);
+
+  uint32_t GetTotalBits() const { return kTotalBits; }
+
+  bool IsInitialized() const { return kNumBlocks > 0 || kTotalBits > 0; }
+
+ private:
+  uint32_t kTotalBits;
+  uint32_t kNumBlocks;
+  const uint32_t kNumProbes;
+
+  char* data_;
+
+  static constexpr int LOG2_CACHE_LINE_SIZE =
+      ConstexprFloorLog2(CACHE_LINE_SIZE);
+};
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+// local variable is initialized but not referenced
+#pragma warning(disable : 4189)
+#endif
+inline void PlainTableBloomV1::Prefetch(uint32_t h) {
+  if (kNumBlocks != 0) {
+    uint32_t ignored;
+    LegacyLocalityBloomImpl</*ExtraRotates*/ true>::PrepareHashMayMatch(
+        h, kNumBlocks, data_, &ignored, LOG2_CACHE_LINE_SIZE);
+  }
+}
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+inline bool PlainTableBloomV1::MayContainHash(uint32_t h) const {
+  assert(IsInitialized());
+  if (kNumBlocks != 0) {
+    return LegacyLocalityBloomImpl<true>::HashMayMatch(
+        h, kNumBlocks, kNumProbes, data_, LOG2_CACHE_LINE_SIZE);
+  } else {
+    return LegacyNoLocalityBloomImpl::HashMayMatch(h, kTotalBits, kNumProbes,
+                                                   data_);
+  }
+}
+
+inline void PlainTableBloomV1::AddHash(uint32_t h) {
+  assert(IsInitialized());
+  if (kNumBlocks != 0) {
+    LegacyLocalityBloomImpl<true>::AddHash(h, kNumBlocks, kNumProbes, data_,
+                                           LOG2_CACHE_LINE_SIZE);
+  } else {
+    LegacyNoLocalityBloomImpl::AddHash(h, kTotalBits, kNumProbes, data_);
+  }
+}
+
+class BloomBlockBuilder {
+ public:
+  static const std::string kBloomBlock;
+
+  explicit BloomBlockBuilder(uint32_t num_probes = 6) : bloom_(num_probes) {}
+
+  void SetTotalBits(Allocator* allocator, uint32_t total_bits,
+                    uint32_t locality, size_t huge_page_tlb_size,
+                    Logger* logger) {
+    bloom_.SetTotalBits(allocator, total_bits, locality, huge_page_tlb_size,
+                        logger);
+  }
+
+  uint32_t GetNumBlocks() const { return bloom_.GetNumBlocks(); }
+
+  void AddKeysHashes(const std::vector<uint32_t>& keys_hashes);
+
+  Slice Finish();
+
+ private:
+  PlainTableBloomV1 bloom_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_builder.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_builder.cc
new file mode 100644
index 0000000000..126098a868
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_builder.cc
@@ -0,0 +1,335 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "table/plain/plain_table_builder.h"
+
+#include <assert.h>
+
+#include <limits>
+#include <map>
+#include <string>
+
+#include "db/dbformat.h"
+#include "file/writable_file_writer.h"
+#include "logging/logging.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/env.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
+#include "table/block_based/block_builder.h"
+#include "table/format.h"
+#include "table/meta_blocks.h"
+#include "table/plain/plain_table_bloom.h"
+#include "table/plain/plain_table_factory.h"
+#include "table/plain/plain_table_index.h"
+#include "util/coding.h"
+#include "util/crc32c.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+// a utility that helps writing block content to the file
+//   @offset will advance if @block_contents was successfully written.
+//   @block_handle the block handle this particular block.
+IOStatus WriteBlock(const Slice& block_contents, WritableFileWriter* file,
+                    uint64_t* offset, BlockHandle* block_handle) {
+  block_handle->set_offset(*offset);
+  block_handle->set_size(block_contents.size());
+  IOStatus io_s = file->Append(block_contents);
+
+  if (io_s.ok()) {
+    *offset += block_contents.size();
+  }
+  return io_s;
+}
+
+}  // namespace
+
+// kPlainTableMagicNumber was picked by running
+//    echo rocksdb.table.plain | sha1sum
+// and taking the leading 64 bits.
+extern const uint64_t kPlainTableMagicNumber = 0x8242229663bf9564ull;
+extern const uint64_t kLegacyPlainTableMagicNumber = 0x4f3418eb7a8f13b8ull;
+
+PlainTableBuilder::PlainTableBuilder(
+    const ImmutableOptions& ioptions, const MutableCFOptions& moptions,
+    const IntTblPropCollectorFactories* int_tbl_prop_collector_factories,
+    uint32_t column_family_id, int level_at_creation, WritableFileWriter* file,
+    uint32_t user_key_len, EncodingType encoding_type, size_t index_sparseness,
+    uint32_t bloom_bits_per_key, const std::string& column_family_name,
+    uint32_t num_probes, size_t huge_page_tlb_size, double hash_table_ratio,
+    bool store_index_in_file, const std::string& db_id,
+    const std::string& db_session_id, uint64_t file_number)
+    : ioptions_(ioptions),
+      moptions_(moptions),
+      bloom_block_(num_probes),
+      file_(file),
+      bloom_bits_per_key_(bloom_bits_per_key),
+      huge_page_tlb_size_(huge_page_tlb_size),
+      encoder_(encoding_type, user_key_len, moptions.prefix_extractor.get(),
+               index_sparseness),
+      store_index_in_file_(store_index_in_file),
+      prefix_extractor_(moptions.prefix_extractor.get()) {
+  // Build index block and save it in the file if hash_table_ratio > 0
+  if (store_index_in_file_) {
+    assert(hash_table_ratio > 0 || IsTotalOrderMode());
+    index_builder_.reset(new PlainTableIndexBuilder(
+        &arena_, ioptions, moptions.prefix_extractor.get(), index_sparseness,
+        hash_table_ratio, huge_page_tlb_size_));
+    properties_
+        .user_collected_properties[PlainTablePropertyNames::kBloomVersion] =
+        "1";  // For future use
+  }
+
+  properties_.fixed_key_len = user_key_len;
+
+  // for plain table, we put all the data in a big chuck.
+  properties_.num_data_blocks = 1;
+  // Fill it later if store_index_in_file_ == true
+  properties_.index_size = 0;
+  properties_.filter_size = 0;
+  // To support roll-back to previous version, now still use version 0 for
+  // plain encoding.
+  properties_.format_version = (encoding_type == kPlain) ? 0 : 1;
+  properties_.column_family_id = column_family_id;
+  properties_.column_family_name = column_family_name;
+  properties_.db_id = db_id;
+  properties_.db_session_id = db_session_id;
+  properties_.db_host_id = ioptions.db_host_id;
+  if (!ReifyDbHostIdProperty(ioptions_.env, &properties_.db_host_id).ok()) {
+    ROCKS_LOG_INFO(ioptions_.logger, "db_host_id property will not be set");
+  }
+  properties_.orig_file_number = file_number;
+  properties_.prefix_extractor_name =
+      moptions_.prefix_extractor != nullptr
+          ? moptions_.prefix_extractor->AsString()
+          : "nullptr";
+
+  std::string val;
+  PutFixed32(&val, static_cast<uint32_t>(encoder_.GetEncodingType()));
+  properties_
+      .user_collected_properties[PlainTablePropertyNames::kEncodingType] = val;
+
+  assert(int_tbl_prop_collector_factories);
+  for (auto& factory : *int_tbl_prop_collector_factories) {
+    assert(factory);
+
+    table_properties_collectors_.emplace_back(
+        factory->CreateIntTblPropCollector(column_family_id,
+                                           level_at_creation));
+  }
+}
+
+PlainTableBuilder::~PlainTableBuilder() {
+  // They are supposed to have been passed to users through Finish()
+  // if the file succeeds.
+  status_.PermitUncheckedError();
+  io_status_.PermitUncheckedError();
+}
+
+void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
+  // temp buffer for metadata bytes between key and value.
+  char meta_bytes_buf[6];
+  size_t meta_bytes_buf_size = 0;
+
+  ParsedInternalKey internal_key;
+  if (!ParseInternalKey(key, &internal_key, false /* log_err_key */)
+           .ok()) {  // TODO
+    assert(false);
+    return;
+  }
+  if (internal_key.type == kTypeRangeDeletion) {
+    status_ = Status::NotSupported("Range deletion unsupported");
+    return;
+  }
+
+  // Store key hash
+  if (store_index_in_file_) {
+    if (moptions_.prefix_extractor == nullptr) {
+      keys_or_prefixes_hashes_.push_back(GetSliceHash(internal_key.user_key));
+    } else {
+      Slice prefix =
+          moptions_.prefix_extractor->Transform(internal_key.user_key);
+      keys_or_prefixes_hashes_.push_back(GetSliceHash(prefix));
+    }
+  }
+
+  // Write value
+  assert(offset_ <= std::numeric_limits<uint32_t>::max());
+  auto prev_offset = static_cast<uint32_t>(offset_);
+  // Write out the key
+  io_status_ = encoder_.AppendKey(key, file_, &offset_, meta_bytes_buf,
+                                  &meta_bytes_buf_size);
+  if (SaveIndexInFile()) {
+    index_builder_->AddKeyPrefix(GetPrefix(internal_key), prev_offset);
+  }
+
+  // Write value length
+  uint32_t value_size = static_cast<uint32_t>(value.size());
+  if (io_status_.ok()) {
+    char* end_ptr =
+        EncodeVarint32(meta_bytes_buf + meta_bytes_buf_size, value_size);
+    assert(end_ptr <= meta_bytes_buf + sizeof(meta_bytes_buf));
+    meta_bytes_buf_size = end_ptr - meta_bytes_buf;
+    io_status_ = file_->Append(Slice(meta_bytes_buf, meta_bytes_buf_size));
+  }
+
+  // Write value
+  if (io_status_.ok()) {
+    io_status_ = file_->Append(value);
+    offset_ += value_size + meta_bytes_buf_size;
+  }
+
+  if (io_status_.ok()) {
+    properties_.num_entries++;
+    properties_.raw_key_size += key.size();
+    properties_.raw_value_size += value.size();
+    if (internal_key.type == kTypeDeletion ||
+        internal_key.type == kTypeSingleDeletion) {
+      properties_.num_deletions++;
+    } else if (internal_key.type == kTypeMerge) {
+      properties_.num_merge_operands++;
+    }
+  }
+
+  // notify property collectors
+  NotifyCollectTableCollectorsOnAdd(
+      key, value, offset_, table_properties_collectors_, ioptions_.logger);
+  status_ = io_status_;
+}
+
+Status PlainTableBuilder::Finish() {
+  assert(!closed_);
+  closed_ = true;
+
+  properties_.data_size = offset_;
+
+  //  Write the following blocks
+  //  1. [meta block: bloom] - optional
+  //  2. [meta block: index] - optional
+  //  3. [meta block: properties]
+  //  4. [metaindex block]
+  //  5. [footer]
+
+  MetaIndexBuilder meta_index_builer;
+
+  if (store_index_in_file_ && (properties_.num_entries > 0)) {
+    assert(properties_.num_entries <= std::numeric_limits<uint32_t>::max());
+    BlockHandle bloom_block_handle;
+    if (bloom_bits_per_key_ > 0) {
+      bloom_block_.SetTotalBits(
+          &arena_,
+          static_cast<uint32_t>(properties_.num_entries) * bloom_bits_per_key_,
+          ioptions_.bloom_locality, huge_page_tlb_size_, ioptions_.logger);
+
+      PutVarint32(&properties_.user_collected_properties
+                       [PlainTablePropertyNames::kNumBloomBlocks],
+                  bloom_block_.GetNumBlocks());
+
+      bloom_block_.AddKeysHashes(keys_or_prefixes_hashes_);
+
+      Slice bloom_finish_result = bloom_block_.Finish();
+
+      properties_.filter_size = bloom_finish_result.size();
+      io_status_ =
+          WriteBlock(bloom_finish_result, file_, &offset_, &bloom_block_handle);
+
+      if (!io_status_.ok()) {
+        status_ = io_status_;
+        return status_;
+      }
+      meta_index_builer.Add(BloomBlockBuilder::kBloomBlock, bloom_block_handle);
+    }
+    BlockHandle index_block_handle;
+    Slice index_finish_result = index_builder_->Finish();
+
+    properties_.index_size = index_finish_result.size();
+    io_status_ =
+        WriteBlock(index_finish_result, file_, &offset_, &index_block_handle);
+
+    if (!io_status_.ok()) {
+      status_ = io_status_;
+      return status_;
+    }
+
+    meta_index_builer.Add(PlainTableIndexBuilder::kPlainTableIndexBlock,
+                          index_block_handle);
+  }
+
+  // Calculate bloom block size and index block size
+  PropertyBlockBuilder property_block_builder;
+  // -- Add basic properties
+  property_block_builder.AddTableProperty(properties_);
+
+  property_block_builder.Add(properties_.user_collected_properties);
+
+  // -- Add user collected properties
+  NotifyCollectTableCollectorsOnFinish(
+      table_properties_collectors_, ioptions_.logger, &property_block_builder);
+
+  // -- Write property block
+  BlockHandle property_block_handle;
+  IOStatus s = WriteBlock(property_block_builder.Finish(), file_, &offset_,
+                          &property_block_handle);
+  if (!s.ok()) {
+    return static_cast<Status>(s);
+  }
+  meta_index_builer.Add(kPropertiesBlockName, property_block_handle);
+
+  // -- write metaindex block
+  BlockHandle metaindex_block_handle;
+  io_status_ = WriteBlock(meta_index_builer.Finish(), file_, &offset_,
+                          &metaindex_block_handle);
+  if (!io_status_.ok()) {
+    status_ = io_status_;
+    return status_;
+  }
+
+  // Write Footer
+  // no need to write out new footer if we're using default checksum
+  FooterBuilder footer;
+  footer.Build(kPlainTableMagicNumber, /* format_version */ 0, offset_,
+               kNoChecksum, metaindex_block_handle);
+  io_status_ = file_->Append(footer.GetSlice());
+  if (io_status_.ok()) {
+    offset_ += footer.GetSlice().size();
+  }
+  status_ = io_status_;
+  return status_;
+}
+
+void PlainTableBuilder::Abandon() { closed_ = true; }
+
+uint64_t PlainTableBuilder::NumEntries() const {
+  return properties_.num_entries;
+}
+
+uint64_t PlainTableBuilder::FileSize() const { return offset_; }
+
+std::string PlainTableBuilder::GetFileChecksum() const {
+  if (file_ != nullptr) {
+    return file_->GetFileChecksum();
+  } else {
+    return kUnknownFileChecksum;
+  }
+}
+
+const char* PlainTableBuilder::GetFileChecksumFuncName() const {
+  if (file_ != nullptr) {
+    return file_->GetFileChecksumFuncName();
+  } else {
+    return kUnknownFileChecksumFuncName;
+  }
+}
+void PlainTableBuilder::SetSeqnoTimeTableProperties(const std::string& string,
+                                                    uint64_t uint_64) {
+  // TODO: storing seqno to time mapping is not yet support for plain table.
+  TableBuilder::SetSeqnoTimeTableProperties(string, uint_64);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_builder.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_builder.h
new file mode 100644
index 0000000000..863882baf2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_builder.h
@@ -0,0 +1,152 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "db/version_edit.h"
+#include "rocksdb/options.h"
+#include "rocksdb/status.h"
+#include "rocksdb/table.h"
+#include "rocksdb/table_properties.h"
+#include "table/plain/plain_table_bloom.h"
+#include "table/plain/plain_table_index.h"
+#include "table/plain/plain_table_key_coding.h"
+#include "table/table_builder.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class BlockBuilder;
+class BlockHandle;
+class WritableFile;
+class TableBuilder;
+
+// The builder class of PlainTable. For description of PlainTable format
+// See comments of class PlainTableFactory, where instances of
+// PlainTableReader are created.
+class PlainTableBuilder : public TableBuilder {
+ public:
+  // Create a builder that will store the contents of the table it is
+  // building in *file.  Does not close the file.  It is up to the
+  // caller to close the file after calling Finish(). The output file
+  // will be part of level specified by 'level'.  A value of -1 means
+  // that the caller does not know which level the output file will reside.
+  PlainTableBuilder(
+      const ImmutableOptions& ioptions, const MutableCFOptions& moptions,
+      const IntTblPropCollectorFactories* int_tbl_prop_collector_factories,
+      uint32_t column_family_id, int level_at_creation,
+      WritableFileWriter* file, uint32_t user_key_size,
+      EncodingType encoding_type, size_t index_sparseness,
+      uint32_t bloom_bits_per_key, const std::string& column_family_name,
+      uint32_t num_probes = 6, size_t huge_page_tlb_size = 0,
+      double hash_table_ratio = 0, bool store_index_in_file = false,
+      const std::string& db_id = "", const std::string& db_session_id = "",
+      uint64_t file_number = 0);
+  // No copying allowed
+  PlainTableBuilder(const PlainTableBuilder&) = delete;
+  void operator=(const PlainTableBuilder&) = delete;
+
+  // REQUIRES: Either Finish() or Abandon() has been called.
+  ~PlainTableBuilder();
+
+  // Add key,value to the table being constructed.
+  // REQUIRES: key is after any previously added key according to comparator.
+  // REQUIRES: Finish(), Abandon() have not been called
+  void Add(const Slice& key, const Slice& value) override;
+
+  // Return non-ok iff some error has been detected.
+  Status status() const override { return status_; }
+
+  // Return non-ok iff some error happens during IO.
+  IOStatus io_status() const override { return io_status_; }
+
+  // Finish building the table.  Stops using the file passed to the
+  // constructor after this function returns.
+  // REQUIRES: Finish(), Abandon() have not been called
+  Status Finish() override;
+
+  // Indicate that the contents of this builder should be abandoned.  Stops
+  // using the file passed to the constructor after this function returns.
+  // If the caller is not going to call Finish(), it must call Abandon()
+  // before destroying this builder.
+  // REQUIRES: Finish(), Abandon() have not been called
+  void Abandon() override;
+
+  // Number of calls to Add() so far.
+  uint64_t NumEntries() const override;
+
+  // Size of the file generated so far.  If invoked after a successful
+  // Finish() call, returns the size of the final generated file.
+  uint64_t FileSize() const override;
+
+  TableProperties GetTableProperties() const override { return properties_; }
+
+  bool SaveIndexInFile() const { return store_index_in_file_; }
+
+  // Get file checksum
+  std::string GetFileChecksum() const override;
+
+  // Get file checksum function name
+  const char* GetFileChecksumFuncName() const override;
+
+  void SetSeqnoTimeTableProperties(const std::string& string,
+                                   uint64_t uint_64) override;
+
+ private:
+  Arena arena_;
+  const ImmutableOptions& ioptions_;
+  const MutableCFOptions& moptions_;
+  std::vector<std::unique_ptr<IntTblPropCollector>>
+      table_properties_collectors_;
+
+  BloomBlockBuilder bloom_block_;
+  std::unique_ptr<PlainTableIndexBuilder> index_builder_;
+
+  WritableFileWriter* file_;
+  uint64_t offset_ = 0;
+  uint32_t bloom_bits_per_key_;
+  size_t huge_page_tlb_size_;
+  Status status_;
+  IOStatus io_status_;
+  TableProperties properties_;
+  PlainTableKeyEncoder encoder_;
+
+  bool store_index_in_file_;
+
+  std::vector<uint32_t> keys_or_prefixes_hashes_;
+  bool closed_ = false;  // Either Finish() or Abandon() has been called.
+
+  const SliceTransform* prefix_extractor_;
+
+  Slice GetPrefix(const Slice& target) const {
+    assert(target.size() >= 8);  // target is internal key
+    return GetPrefixFromUserKey(ExtractUserKey(target));
+  }
+
+  Slice GetPrefix(const ParsedInternalKey& target) const {
+    return GetPrefixFromUserKey(target.user_key);
+  }
+
+  Slice GetPrefixFromUserKey(const Slice& user_key) const {
+    if (!IsTotalOrderMode()) {
+      return prefix_extractor_->Transform(user_key);
+    } else {
+      // Use empty slice as prefix if prefix_extractor is not set.
+      // In that case,
+      // it falls back to pure binary search and
+      // total iterator seek is supported.
+      return Slice();
+    }
+  }
+
+  bool IsTotalOrderMode() const { return (prefix_extractor_ == nullptr); }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_factory.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_factory.cc
new file mode 100644
index 0000000000..80aa9cb8e8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_factory.cc
@@ -0,0 +1,296 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/plain/plain_table_factory.h"
+
+#include <stdint.h>
+
+#include <memory>
+
+#include "db/dbformat.h"
+#include "port/port.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "rocksdb/utilities/options_type.h"
+#include "table/plain/plain_table_builder.h"
+#include "table/plain/plain_table_reader.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+static std::unordered_map<std::string, OptionTypeInfo> plain_table_type_info = {
+    {"user_key_len",
+     {offsetof(struct PlainTableOptions, user_key_len), OptionType::kUInt32T,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+    {"bloom_bits_per_key",
+     {offsetof(struct PlainTableOptions, bloom_bits_per_key), OptionType::kInt,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+    {"hash_table_ratio",
+     {offsetof(struct PlainTableOptions, hash_table_ratio), OptionType::kDouble,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+    {"index_sparseness",
+     {offsetof(struct PlainTableOptions, index_sparseness), OptionType::kSizeT,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+    {"huge_page_tlb_size",
+     {offsetof(struct PlainTableOptions, huge_page_tlb_size),
+      OptionType::kSizeT, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"encoding_type",
+     {offsetof(struct PlainTableOptions, encoding_type),
+      OptionType::kEncodingType, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+    {"full_scan_mode",
+     {offsetof(struct PlainTableOptions, full_scan_mode), OptionType::kBoolean,
+      OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+    {"store_index_in_file",
+     {offsetof(struct PlainTableOptions, store_index_in_file),
+      OptionType::kBoolean, OptionVerificationType::kNormal,
+      OptionTypeFlags::kNone}},
+};
+
+PlainTableFactory::PlainTableFactory(const PlainTableOptions& options)
+    : table_options_(options) {
+  RegisterOptions(&table_options_, &plain_table_type_info);
+}
+
+Status PlainTableFactory::NewTableReader(
+    const ReadOptions& /*ro*/, const TableReaderOptions& table_reader_options,
+    std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
+    std::unique_ptr<TableReader>* table,
+    bool /*prefetch_index_and_filter_in_cache*/) const {
+  return PlainTableReader::Open(
+      table_reader_options.ioptions, table_reader_options.env_options,
+      table_reader_options.internal_comparator, std::move(file), file_size,
+      table, table_options_.bloom_bits_per_key, table_options_.hash_table_ratio,
+      table_options_.index_sparseness, table_options_.huge_page_tlb_size,
+      table_options_.full_scan_mode, table_reader_options.immortal,
+      table_reader_options.prefix_extractor.get());
+}
+
+TableBuilder* PlainTableFactory::NewTableBuilder(
+    const TableBuilderOptions& table_builder_options,
+    WritableFileWriter* file) const {
+  // Ignore the skip_filters flag. PlainTable format is optimized for small
+  // in-memory dbs. The skip_filters optimization is not useful for plain
+  // tables
+  //
+  return new PlainTableBuilder(
+      table_builder_options.ioptions, table_builder_options.moptions,
+      table_builder_options.int_tbl_prop_collector_factories,
+      table_builder_options.column_family_id,
+      table_builder_options.level_at_creation, file,
+      table_options_.user_key_len, table_options_.encoding_type,
+      table_options_.index_sparseness, table_options_.bloom_bits_per_key,
+      table_builder_options.column_family_name, 6,
+      table_options_.huge_page_tlb_size, table_options_.hash_table_ratio,
+      table_options_.store_index_in_file, table_builder_options.db_id,
+      table_builder_options.db_session_id, table_builder_options.cur_file_num);
+}
+
+std::string PlainTableFactory::GetPrintableOptions() const {
+  std::string ret;
+  ret.reserve(20000);
+  const int kBufferSize = 200;
+  char buffer[kBufferSize];
+
+  snprintf(buffer, kBufferSize, "  user_key_len: %u\n",
+           table_options_.user_key_len);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  bloom_bits_per_key: %d\n",
+           table_options_.bloom_bits_per_key);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  hash_table_ratio: %lf\n",
+           table_options_.hash_table_ratio);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  index_sparseness: %" ROCKSDB_PRIszt "\n",
+           table_options_.index_sparseness);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  huge_page_tlb_size: %" ROCKSDB_PRIszt "\n",
+           table_options_.huge_page_tlb_size);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  encoding_type: %d\n",
+           table_options_.encoding_type);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  full_scan_mode: %d\n",
+           table_options_.full_scan_mode);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "  store_index_in_file: %d\n",
+           table_options_.store_index_in_file);
+  ret.append(buffer);
+  return ret;
+}
+
+Status GetPlainTableOptionsFromString(const ConfigOptions& config_options,
+                                      const PlainTableOptions& table_options,
+                                      const std::string& opts_str,
+                                      PlainTableOptions* new_table_options) {
+  std::unordered_map<std::string, std::string> opts_map;
+  Status s = StringToMap(opts_str, &opts_map);
+  if (!s.ok()) {
+    return s;
+  }
+
+  s = GetPlainTableOptionsFromMap(config_options, table_options, opts_map,
+                                  new_table_options);
+  // Translate any errors (NotFound, NotSupported, to InvalidArgument
+  if (s.ok() || s.IsInvalidArgument()) {
+    return s;
+  } else {
+    return Status::InvalidArgument(s.getState());
+  }
+}
+
+static int RegisterBuiltinMemTableRepFactory(ObjectLibrary& library,
+                                             const std::string& /*arg*/) {
+  // The MemTableRepFactory built-in classes will be either a class
+  // (VectorRepFactory) or a nickname (vector), followed optionally by ":#",
+  // where # is the "size" of the factory.
+  auto AsPattern = [](const std::string& name, const std::string& alt) {
+    auto pattern = ObjectLibrary::PatternEntry(name, true);
+    pattern.AnotherName(alt);
+    pattern.AddNumber(":");
+    return pattern;
+  };
+  library.AddFactory<MemTableRepFactory>(
+      AsPattern(VectorRepFactory::kClassName(), VectorRepFactory::kNickName()),
+      [](const std::string& uri, std::unique_ptr<MemTableRepFactory>* guard,
+         std::string* /*errmsg*/) {
+        auto colon = uri.find(":");
+        if (colon != std::string::npos) {
+          size_t count = ParseSizeT(uri.substr(colon + 1));
+          guard->reset(new VectorRepFactory(count));
+        } else {
+          guard->reset(new VectorRepFactory());
+        }
+        return guard->get();
+      });
+  library.AddFactory<MemTableRepFactory>(
+      AsPattern(SkipListFactory::kClassName(), SkipListFactory::kNickName()),
+      [](const std::string& uri, std::unique_ptr<MemTableRepFactory>* guard,
+         std::string* /*errmsg*/) {
+        auto colon = uri.find(":");
+        if (colon != std::string::npos) {
+          size_t lookahead = ParseSizeT(uri.substr(colon + 1));
+          guard->reset(new SkipListFactory(lookahead));
+        } else {
+          guard->reset(new SkipListFactory());
+        }
+        return guard->get();
+      });
+  library.AddFactory<MemTableRepFactory>(
+      AsPattern("HashLinkListRepFactory", "hash_linkedlist"),
+      [](const std::string& uri, std::unique_ptr<MemTableRepFactory>* guard,
+         std::string* /*errmsg*/) {
+        // Expecting format: hash_linkedlist:<hash_bucket_count>
+        auto colon = uri.find(":");
+        if (colon != std::string::npos) {
+          size_t hash_bucket_count = ParseSizeT(uri.substr(colon + 1));
+          guard->reset(NewHashLinkListRepFactory(hash_bucket_count));
+        } else {
+          guard->reset(NewHashLinkListRepFactory());
+        }
+        return guard->get();
+      });
+  library.AddFactory<MemTableRepFactory>(
+      AsPattern("HashSkipListRepFactory", "prefix_hash"),
+      [](const std::string& uri, std::unique_ptr<MemTableRepFactory>* guard,
+         std::string* /*errmsg*/) {
+        // Expecting format: prefix_hash:<hash_bucket_count>
+        auto colon = uri.find(":");
+        if (colon != std::string::npos) {
+          size_t hash_bucket_count = ParseSizeT(uri.substr(colon + 1));
+          guard->reset(NewHashSkipListRepFactory(hash_bucket_count));
+        } else {
+          guard->reset(NewHashSkipListRepFactory());
+        }
+        return guard->get();
+      });
+  library.AddFactory<MemTableRepFactory>(
+      "cuckoo",
+      [](const std::string& /*uri*/,
+         std::unique_ptr<MemTableRepFactory>* /*guard*/, std::string* errmsg) {
+        *errmsg = "cuckoo hash memtable is not supported anymore.";
+        return nullptr;
+      });
+
+  size_t num_types;
+  return static_cast<int>(library.GetFactoryCount(&num_types));
+}
+
+Status GetMemTableRepFactoryFromString(
+    const std::string& opts_str, std::unique_ptr<MemTableRepFactory>* result) {
+  ConfigOptions config_options;
+  config_options.ignore_unsupported_options = false;
+  config_options.ignore_unknown_options = false;
+  return MemTableRepFactory::CreateFromString(config_options, opts_str, result);
+}
+
+Status MemTableRepFactory::CreateFromString(
+    const ConfigOptions& config_options, const std::string& value,
+    std::unique_ptr<MemTableRepFactory>* result) {
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    RegisterBuiltinMemTableRepFactory(*(ObjectLibrary::Default().get()), "");
+  });
+  std::string id;
+  std::unordered_map<std::string, std::string> opt_map;
+  Status status = Customizable::GetOptionsMap(config_options, result->get(),
+                                              value, &id, &opt_map);
+  if (!status.ok()) {  // GetOptionsMap failed
+    return status;
+  } else if (value.empty()) {
+    // No Id and no options.  Clear the object
+    result->reset();
+    return Status::OK();
+  } else if (id.empty()) {  // We have no Id but have options.  Not good
+    return Status::NotSupported("Cannot reset object ", id);
+  } else {
+    status = NewUniqueObject<MemTableRepFactory>(config_options, id, opt_map,
+                                                 result);
+  }
+  return status;
+}
+
+Status MemTableRepFactory::CreateFromString(
+    const ConfigOptions& config_options, const std::string& value,
+    std::shared_ptr<MemTableRepFactory>* result) {
+  std::unique_ptr<MemTableRepFactory> factory;
+  Status s = CreateFromString(config_options, value, &factory);
+  if (factory && s.ok()) {
+    result->reset(factory.release());
+  }
+  return s;
+}
+
+Status GetPlainTableOptionsFromMap(
+    const ConfigOptions& config_options, const PlainTableOptions& table_options,
+    const std::unordered_map<std::string, std::string>& opts_map,
+    PlainTableOptions* new_table_options) {
+  assert(new_table_options);
+  PlainTableFactory ptf(table_options);
+  Status s = ptf.ConfigureFromMap(config_options, opts_map);
+  if (s.ok()) {
+    *new_table_options = *(ptf.GetOptions<PlainTableOptions>());
+  } else {
+    // Restore "new_options" to the default "base_options".
+    *new_table_options = table_options;
+  }
+  return s;
+}
+
+extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options) {
+  return new PlainTableFactory(options);
+}
+
+const std::string PlainTablePropertyNames::kEncodingType =
+    "rocksdb.plain.table.encoding.type";
+
+const std::string PlainTablePropertyNames::kBloomVersion =
+    "rocksdb.plain.table.bloom.version";
+
+const std::string PlainTablePropertyNames::kNumBloomBlocks =
+    "rocksdb.plain.table.bloom.numblocks";
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_factory.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_factory.h
new file mode 100644
index 0000000000..a47418af69
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_factory.h
@@ -0,0 +1,180 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+
+#include "rocksdb/table.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct EnvOptions;
+
+class Status;
+class RandomAccessFile;
+class WritableFile;
+class Table;
+class TableBuilder;
+
+// PlainTableFactory is the entrance function to the PlainTable format of
+// SST files. It returns instances PlainTableBuilder as the builder
+// class and PlainTableReader as the reader class, where the format is
+// actually implemented.
+//
+// The PlainTable is designed for memory-mapped file systems, e.g. tmpfs.
+// Data is not organized in blocks, which allows fast access. Because of
+// following downsides
+// 1. Data compression is not supported.
+// 2. Data is not checksumed.
+// it is not recommended to use this format on other type of file systems.
+//
+// PlainTable requires fixed length key, configured as a constructor
+// parameter of the factory class. Output file format:
+// +-------------+-----------------+
+// | version     | user_key_length |
+// +------------++------------+-----------------+  <= key1 offset
+// |  encoded key1            | value_size  |   |
+// +------------+-------------+-------------+   |
+// | value1                                     |
+// |                                            |
+// +--------------------------+-------------+---+  <= key2 offset
+// | encoded key2             | value_size  |   |
+// +------------+-------------+-------------+   |
+// | value2                                     |
+// |                                            |
+// |        ......                              |
+// +-----------------+--------------------------+
+//
+// When the key encoding type is kPlain. Key part is encoded as:
+// +------------+--------------------+
+// | [key_size] |  internal key      |
+// +------------+--------------------+
+// for the case of user_key_len = kPlainTableVariableLength case,
+// and simply:
+// +----------------------+
+// |  internal key        |
+// +----------------------+
+// for user_key_len != kPlainTableVariableLength case.
+//
+// If key encoding type is kPrefix. Keys are encoding in this format.
+// There are three ways to encode a key:
+// (1) Full Key
+// +---------------+---------------+-------------------+
+// | Full Key Flag | Full Key Size | Full Internal Key |
+// +---------------+---------------+-------------------+
+// which simply encodes a full key
+//
+// (2) A key shared the same prefix as the previous key, which is encoded as
+//     format of (1).
+// +-------------+-------------+-------------+-------------+------------+
+// | Prefix Flag | Prefix Size | Suffix Flag | Suffix Size | Key Suffix |
+// +-------------+-------------+-------------+-------------+------------+
+// where key is the suffix part of the key, including the internal bytes.
+// the actual key will be constructed by concatenating prefix part of the
+// previous key, with the suffix part of the key here, with sizes given here.
+//
+// (3) A key shared the same prefix as the previous key, which is encoded as
+//     the format of (2).
+// +-----------------+-----------------+------------------------+
+// | Key Suffix Flag | Key Suffix Size | Suffix of Internal Key |
+// +-----------------+-----------------+------------------------+
+// The key will be constructed by concatenating previous key's prefix (which is
+// also a prefix which the last key encoded in the format of (1)) and the
+// key given here.
+//
+// For example, we for following keys (prefix and suffix are separated by
+// spaces):
+//   0000 0001
+//   0000 00021
+//   0000 0002
+//   00011 00
+//   0002 0001
+// Will be encoded like this:
+//   FK 8 00000001
+//   PF 4 SF 5 00021
+//   SF 4 0002
+//   FK 7 0001100
+//   FK 8 00020001
+// (where FK means full key flag, PF means prefix flag and SF means suffix flag)
+//
+// All those "key flag + key size" shown above are in this format:
+// The 8 bits of the first byte:
+// +----+----+----+----+----+----+----+----+
+// |  Type   |            Size             |
+// +----+----+----+----+----+----+----+----+
+// Type indicates: full key, prefix, or suffix.
+// The last 6 bits are for size. If the size bits are not all 1, it means the
+// size of the key. Otherwise, varint32 is read after this byte. This varint
+// value + 0x3F (the value of all 1) will be the key size.
+//
+// For example, full key with length 16 will be encoded as (binary):
+//     00 010000
+// (00 means full key)
+// and a prefix with 100 bytes will be encoded as:
+//     01 111111    00100101
+//         (63)       (37)
+// (01 means key suffix)
+//
+// All the internal keys above (including kPlain and kPrefix) are encoded in
+// this format:
+// There are two types:
+// (1) normal internal key format
+// +----------- ...... -------------+----+---+---+---+---+---+---+---+
+// |       user key                 |type|      sequence ID          |
+// +----------- ..... --------------+----+---+---+---+---+---+---+---+
+// (2) Special case for keys whose sequence ID is 0 and is value type
+// +----------- ...... -------------+----+
+// |       user key                 |0x80|
+// +----------- ..... --------------+----+
+// To save 7 bytes for the special case where sequence ID = 0.
+//
+//
+class PlainTableFactory : public TableFactory {
+ public:
+  ~PlainTableFactory() {}
+  // user_key_len is the length of the user key. If it is set to be
+  // kPlainTableVariableLength, then it means variable length. Otherwise, all
+  // the keys need to have the fix length of this value. bloom_bits_per_key is
+  // number of bits used for bloom filer per key. hash_table_ratio is
+  // the desired utilization of the hash table used for prefix hashing.
+  // hash_table_ratio = number of prefixes / #buckets in the hash table
+  // hash_table_ratio = 0 means skip hash table but only replying on binary
+  // search.
+  // index_sparseness determines index interval for keys
+  // inside the same prefix. It will be the maximum number of linear search
+  // required after hash and binary search.
+  // index_sparseness = 0 means index for every key.
+  // huge_page_tlb_size determines whether to allocate hash indexes from huge
+  // page TLB and the page size if allocating from there. See comments of
+  // Arena::AllocateAligned() for details.
+  explicit PlainTableFactory(
+      const PlainTableOptions& _table_options = PlainTableOptions());
+
+  // Method to allow CheckedCast to work for this class
+  static const char* kClassName() { return kPlainTableName(); }
+  const char* Name() const override { return kPlainTableName(); }
+  using TableFactory::NewTableReader;
+  Status NewTableReader(const ReadOptions& ro,
+                        const TableReaderOptions& table_reader_options,
+                        std::unique_ptr<RandomAccessFileReader>&& file,
+                        uint64_t file_size, std::unique_ptr<TableReader>* table,
+                        bool prefetch_index_and_filter_in_cache) const override;
+
+  TableBuilder* NewTableBuilder(
+      const TableBuilderOptions& table_builder_options,
+      WritableFileWriter* file) const override;
+
+  std::string GetPrintableOptions() const override;
+  static const char kValueTypeSeqId0 = char(~0);
+
+ private:
+  PlainTableOptions table_options_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_index.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_index.cc
new file mode 100644
index 0000000000..c85176d6ca
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_index.cc
@@ -0,0 +1,211 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "table/plain/plain_table_index.h"
+
+#include <cinttypes>
+
+#include "logging/logging.h"
+#include "util/coding.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+inline uint32_t GetBucketIdFromHash(uint32_t hash, uint32_t num_buckets) {
+  assert(num_buckets > 0);
+  return hash % num_buckets;
+}
+}  // namespace
+
+Status PlainTableIndex::InitFromRawData(Slice data) {
+  if (!GetVarint32(&data, &index_size_)) {
+    return Status::Corruption("Couldn't read the index size!");
+  }
+  assert(index_size_ > 0);
+  if (!GetVarint32(&data, &num_prefixes_)) {
+    return Status::Corruption("Couldn't read the index size!");
+  }
+  sub_index_size_ =
+      static_cast<uint32_t>(data.size()) - index_size_ * kOffsetLen;
+
+  char* index_data_begin = const_cast<char*>(data.data());
+  index_ = reinterpret_cast<uint32_t*>(index_data_begin);
+  sub_index_ = reinterpret_cast<char*>(index_ + index_size_);
+  return Status::OK();
+}
+
+PlainTableIndex::IndexSearchResult PlainTableIndex::GetOffset(
+    uint32_t prefix_hash, uint32_t* bucket_value) const {
+  int bucket = GetBucketIdFromHash(prefix_hash, index_size_);
+  GetUnaligned(index_ + bucket, bucket_value);
+  if ((*bucket_value & kSubIndexMask) == kSubIndexMask) {
+    *bucket_value ^= kSubIndexMask;
+    return kSubindex;
+  }
+  if (*bucket_value >= kMaxFileSize) {
+    return kNoPrefixForBucket;
+  } else {
+    // point directly to the file
+    return kDirectToFile;
+  }
+}
+
+void PlainTableIndexBuilder::IndexRecordList::AddRecord(uint32_t hash,
+                                                        uint32_t offset) {
+  if (num_records_in_current_group_ == kNumRecordsPerGroup) {
+    current_group_ = AllocateNewGroup();
+    num_records_in_current_group_ = 0;
+  }
+  auto& new_record = current_group_[num_records_in_current_group_++];
+  new_record.hash = hash;
+  new_record.offset = offset;
+  new_record.next = nullptr;
+}
+
+void PlainTableIndexBuilder::AddKeyPrefix(Slice key_prefix_slice,
+                                          uint32_t key_offset) {
+  if (is_first_record_ || prev_key_prefix_ != key_prefix_slice.ToString()) {
+    ++num_prefixes_;
+    if (!is_first_record_) {
+      keys_per_prefix_hist_.Add(num_keys_per_prefix_);
+    }
+    num_keys_per_prefix_ = 0;
+    prev_key_prefix_ = key_prefix_slice.ToString();
+    prev_key_prefix_hash_ = GetSliceHash(key_prefix_slice);
+    due_index_ = true;
+  }
+
+  if (due_index_) {
+    // Add an index key for every kIndexIntervalForSamePrefixKeys keys
+    record_list_.AddRecord(prev_key_prefix_hash_, key_offset);
+    due_index_ = false;
+  }
+
+  num_keys_per_prefix_++;
+  if (index_sparseness_ == 0 || num_keys_per_prefix_ % index_sparseness_ == 0) {
+    due_index_ = true;
+  }
+  is_first_record_ = false;
+}
+
+Slice PlainTableIndexBuilder::Finish() {
+  AllocateIndex();
+  std::vector<IndexRecord*> hash_to_offsets(index_size_, nullptr);
+  std::vector<uint32_t> entries_per_bucket(index_size_, 0);
+  BucketizeIndexes(&hash_to_offsets, &entries_per_bucket);
+
+  keys_per_prefix_hist_.Add(num_keys_per_prefix_);
+  ROCKS_LOG_INFO(ioptions_.logger, "Number of Keys per prefix Histogram: %s",
+                 keys_per_prefix_hist_.ToString().c_str());
+
+  // From the temp data structure, populate indexes.
+  return FillIndexes(hash_to_offsets, entries_per_bucket);
+}
+
+void PlainTableIndexBuilder::AllocateIndex() {
+  if (prefix_extractor_ == nullptr || hash_table_ratio_ <= 0) {
+    // Fall back to pure binary search if the user fails to specify a prefix
+    // extractor.
+    index_size_ = 1;
+  } else {
+    double hash_table_size_multipier = 1.0 / hash_table_ratio_;
+    index_size_ =
+        static_cast<uint32_t>(num_prefixes_ * hash_table_size_multipier) + 1;
+    assert(index_size_ > 0);
+  }
+}
+
+void PlainTableIndexBuilder::BucketizeIndexes(
+    std::vector<IndexRecord*>* hash_to_offsets,
+    std::vector<uint32_t>* entries_per_bucket) {
+  bool first = true;
+  uint32_t prev_hash = 0;
+  size_t num_records = record_list_.GetNumRecords();
+  for (size_t i = 0; i < num_records; i++) {
+    IndexRecord* index_record = record_list_.At(i);
+    uint32_t cur_hash = index_record->hash;
+    if (first || prev_hash != cur_hash) {
+      prev_hash = cur_hash;
+      first = false;
+    }
+    uint32_t bucket = GetBucketIdFromHash(cur_hash, index_size_);
+    IndexRecord* prev_bucket_head = (*hash_to_offsets)[bucket];
+    index_record->next = prev_bucket_head;
+    (*hash_to_offsets)[bucket] = index_record;
+    (*entries_per_bucket)[bucket]++;
+  }
+
+  sub_index_size_ = 0;
+  for (auto entry_count : *entries_per_bucket) {
+    if (entry_count <= 1) {
+      continue;
+    }
+    // Only buckets with more than 1 entry will have subindex.
+    sub_index_size_ += VarintLength(entry_count);
+    // total bytes needed to store these entries' in-file offsets.
+    sub_index_size_ += entry_count * PlainTableIndex::kOffsetLen;
+  }
+}
+
+Slice PlainTableIndexBuilder::FillIndexes(
+    const std::vector<IndexRecord*>& hash_to_offsets,
+    const std::vector<uint32_t>& entries_per_bucket) {
+  ROCKS_LOG_DEBUG(ioptions_.logger,
+                  "Reserving %" PRIu32 " bytes for plain table's sub_index",
+                  sub_index_size_);
+  auto total_allocate_size = GetTotalSize();
+  char* allocated = arena_->AllocateAligned(
+      total_allocate_size, huge_page_tlb_size_, ioptions_.logger);
+
+  auto temp_ptr = EncodeVarint32(allocated, index_size_);
+  uint32_t* index =
+      reinterpret_cast<uint32_t*>(EncodeVarint32(temp_ptr, num_prefixes_));
+  char* sub_index = reinterpret_cast<char*>(index + index_size_);
+
+  uint32_t sub_index_offset = 0;
+  for (uint32_t i = 0; i < index_size_; i++) {
+    uint32_t num_keys_for_bucket = entries_per_bucket[i];
+    switch (num_keys_for_bucket) {
+      case 0:
+        // No key for bucket
+        PutUnaligned(index + i, (uint32_t)PlainTableIndex::kMaxFileSize);
+        break;
+      case 1:
+        // point directly to the file offset
+        PutUnaligned(index + i, hash_to_offsets[i]->offset);
+        break;
+      default:
+        // point to second level indexes.
+        PutUnaligned(index + i,
+                     sub_index_offset | PlainTableIndex::kSubIndexMask);
+        char* prev_ptr = &sub_index[sub_index_offset];
+        char* cur_ptr = EncodeVarint32(prev_ptr, num_keys_for_bucket);
+        sub_index_offset += static_cast<uint32_t>(cur_ptr - prev_ptr);
+        char* sub_index_pos = &sub_index[sub_index_offset];
+        IndexRecord* record = hash_to_offsets[i];
+        int j;
+        for (j = num_keys_for_bucket - 1; j >= 0 && record;
+             j--, record = record->next) {
+          EncodeFixed32(sub_index_pos + j * sizeof(uint32_t), record->offset);
+        }
+        assert(j == -1 && record == nullptr);
+        sub_index_offset += PlainTableIndex::kOffsetLen * num_keys_for_bucket;
+        assert(sub_index_offset <= sub_index_size_);
+        break;
+    }
+  }
+  assert(sub_index_offset == sub_index_size_);
+
+  ROCKS_LOG_DEBUG(ioptions_.logger,
+                  "hash table size: %" PRIu32 ", suffix_map length %" PRIu32,
+                  index_size_, sub_index_size_);
+  return Slice(allocated, GetTotalSize());
+}
+
+const std::string PlainTableIndexBuilder::kPlainTableIndexBlock =
+    "PlainTableIndexBlock";
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_index.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_index.h
new file mode 100644
index 0000000000..0adb6417d9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_index.h
@@ -0,0 +1,246 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <string>
+#include <vector>
+
+#include "memory/arena.h"
+#include "monitoring/histogram.h"
+#include "options/cf_options.h"
+#include "rocksdb/options.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// The file contains two classes PlainTableIndex and PlainTableIndexBuilder
+// The two classes implement the index format of PlainTable.
+// For description of PlainTable format, see comments of class
+// PlainTableFactory
+//
+//
+// PlainTableIndex contains buckets size of index_size_, each is a
+// 32-bit integer. The lower 31 bits contain an offset value (explained below)
+// and the first bit of the integer indicates type of the offset.
+//
+// +--------------+------------------------------------------------------+
+// | Flag (1 bit) | Offset to binary search buffer or file (31 bits)     +
+// +--------------+------------------------------------------------------+
+//
+// Explanation for the "flag bit":
+//
+// 0 indicates that the bucket contains only one prefix (no conflict when
+//   hashing this prefix), whose first row starts from this offset of the
+// file.
+// 1 indicates that the bucket contains more than one prefixes, or there
+//   are too many rows for one prefix so we need a binary search for it. In
+//   this case, the offset indicates the offset of sub_index_ holding the
+//   binary search indexes of keys for those rows. Those binary search indexes
+//   are organized in this way:
+//
+// The first 4 bytes, indicate how many indexes (N) are stored after it. After
+// it, there are N 32-bit integers, each points of an offset of the file,
+// which
+// points to starting of a row. Those offsets need to be guaranteed to be in
+// ascending order so the keys they are pointing to are also in ascending
+// order
+// to make sure we can use them to do binary searches. Below is visual
+// presentation of a bucket.
+//
+// <begin>
+//   number_of_records:  varint32
+//   record 1 file offset:  fixedint32
+//   record 2 file offset:  fixedint32
+//    ....
+//   record N file offset:  fixedint32
+// <end>
+
+// The class loads the index block from a PlainTable SST file, and executes
+// the index lookup.
+// The class is used by PlainTableReader class.
+class PlainTableIndex {
+ public:
+  enum IndexSearchResult {
+    kNoPrefixForBucket = 0,
+    kDirectToFile = 1,
+    kSubindex = 2
+  };
+
+  explicit PlainTableIndex(Slice data) { InitFromRawData(data); }
+
+  PlainTableIndex()
+      : index_size_(0),
+        sub_index_size_(0),
+        num_prefixes_(0),
+        index_(nullptr),
+        sub_index_(nullptr) {}
+
+  // The function that executes the lookup the hash table.
+  // The hash key is `prefix_hash`. The function fills the hash bucket
+  // content in `bucket_value`, which is up to the caller to interpret.
+  IndexSearchResult GetOffset(uint32_t prefix_hash,
+                              uint32_t* bucket_value) const;
+
+  // Initialize data from `index_data`, which points to raw data for
+  // index stored in the SST file.
+  Status InitFromRawData(Slice index_data);
+
+  // Decode the sub index for specific hash bucket.
+  // The `offset` is the value returned as `bucket_value` by GetOffset()
+  // and is only valid when the return value is `kSubindex`.
+  // The return value is the pointer to the starting address of the
+  // sub-index. `upper_bound` is filled with the value indicating how many
+  // entries the sub-index has.
+  const char* GetSubIndexBasePtrAndUpperBound(uint32_t offset,
+                                              uint32_t* upper_bound) const {
+    const char* index_ptr = &sub_index_[offset];
+    return GetVarint32Ptr(index_ptr, index_ptr + 4, upper_bound);
+  }
+
+  uint32_t GetIndexSize() const { return index_size_; }
+
+  uint32_t GetSubIndexSize() const { return sub_index_size_; }
+
+  uint32_t GetNumPrefixes() const { return num_prefixes_; }
+
+  static const uint64_t kMaxFileSize = (1u << 31) - 1;
+  static const uint32_t kSubIndexMask = 0x80000000;
+  static const size_t kOffsetLen = sizeof(uint32_t);
+
+ private:
+  uint32_t index_size_;
+  uint32_t sub_index_size_;
+  uint32_t num_prefixes_;
+
+  uint32_t* index_;
+  char* sub_index_;
+};
+
+// PlainTableIndexBuilder is used to create plain table index.
+// After calling Finish(), it returns Slice, which is usually
+// used either to initialize PlainTableIndex or
+// to save index to sst file.
+// For more details about the index, please refer to:
+// https://github.com/facebook/rocksdb/wiki/PlainTable-Format
+// #wiki-in-memory-index-format
+// The class is used by PlainTableBuilder class.
+class PlainTableIndexBuilder {
+ public:
+  PlainTableIndexBuilder(Arena* arena, const ImmutableOptions& ioptions,
+                         const SliceTransform* prefix_extractor,
+                         size_t index_sparseness, double hash_table_ratio,
+                         size_t huge_page_tlb_size)
+      : arena_(arena),
+        ioptions_(ioptions),
+        record_list_(kRecordsPerGroup),
+        is_first_record_(true),
+        due_index_(false),
+        num_prefixes_(0),
+        num_keys_per_prefix_(0),
+        prev_key_prefix_hash_(0),
+        index_sparseness_(index_sparseness),
+        index_size_(0),
+        sub_index_size_(0),
+        prefix_extractor_(prefix_extractor),
+        hash_table_ratio_(hash_table_ratio),
+        huge_page_tlb_size_(huge_page_tlb_size) {}
+
+  void AddKeyPrefix(Slice key_prefix_slice, uint32_t key_offset);
+
+  Slice Finish();
+
+  uint32_t GetTotalSize() const {
+    return VarintLength(index_size_) + VarintLength(num_prefixes_) +
+           PlainTableIndex::kOffsetLen * index_size_ + sub_index_size_;
+  }
+
+  static const std::string kPlainTableIndexBlock;
+
+ private:
+  struct IndexRecord {
+    uint32_t hash;    // hash of the prefix
+    uint32_t offset;  // offset of a row
+    IndexRecord* next;
+  };
+
+  // Helper class to track all the index records
+  class IndexRecordList {
+   public:
+    explicit IndexRecordList(size_t num_records_per_group)
+        : kNumRecordsPerGroup(num_records_per_group),
+          current_group_(nullptr),
+          num_records_in_current_group_(num_records_per_group) {}
+
+    ~IndexRecordList() {
+      for (size_t i = 0; i < groups_.size(); i++) {
+        delete[] groups_[i];
+      }
+    }
+
+    void AddRecord(uint32_t hash, uint32_t offset);
+
+    size_t GetNumRecords() const {
+      return (groups_.size() - 1) * kNumRecordsPerGroup +
+             num_records_in_current_group_;
+    }
+    IndexRecord* At(size_t index) {
+      return &(
+          groups_[index / kNumRecordsPerGroup][index % kNumRecordsPerGroup]);
+    }
+
+   private:
+    IndexRecord* AllocateNewGroup() {
+      IndexRecord* result = new IndexRecord[kNumRecordsPerGroup];
+      groups_.push_back(result);
+      return result;
+    }
+
+    // Each group in `groups_` contains fix-sized records (determined by
+    // kNumRecordsPerGroup). Which can help us minimize the cost if resizing
+    // occurs.
+    const size_t kNumRecordsPerGroup;
+    IndexRecord* current_group_;
+    // List of arrays allocated
+    std::vector<IndexRecord*> groups_;
+    size_t num_records_in_current_group_;
+  };
+
+  void AllocateIndex();
+
+  // Internal helper function to bucket index record list to hash buckets.
+  void BucketizeIndexes(std::vector<IndexRecord*>* hash_to_offsets,
+                        std::vector<uint32_t>* entries_per_bucket);
+
+  // Internal helper class to fill the indexes and bloom filters to internal
+  // data structures.
+  Slice FillIndexes(const std::vector<IndexRecord*>& hash_to_offsets,
+                    const std::vector<uint32_t>& entries_per_bucket);
+
+  Arena* arena_;
+  const ImmutableOptions ioptions_;
+  HistogramImpl keys_per_prefix_hist_;
+  IndexRecordList record_list_;
+  bool is_first_record_;
+  bool due_index_;
+  uint32_t num_prefixes_;
+  uint32_t num_keys_per_prefix_;
+
+  uint32_t prev_key_prefix_hash_;
+  size_t index_sparseness_;
+  uint32_t index_size_;
+  uint32_t sub_index_size_;
+
+  const SliceTransform* prefix_extractor_;
+  double hash_table_ratio_;
+  size_t huge_page_tlb_size_;
+
+  std::string prev_key_prefix_;
+
+  static const size_t kRecordsPerGroup = 256;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_key_coding.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_key_coding.cc
new file mode 100644
index 0000000000..a40968a608
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_key_coding.cc
@@ -0,0 +1,507 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "table/plain/plain_table_key_coding.h"
+
+#include <algorithm>
+#include <string>
+
+#include "db/dbformat.h"
+#include "file/writable_file_writer.h"
+#include "table/plain/plain_table_factory.h"
+#include "table/plain/plain_table_reader.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+enum PlainTableEntryType : unsigned char {
+  kFullKey = 0,
+  kPrefixFromPreviousKey = 1,
+  kKeySuffix = 2,
+};
+
+namespace {
+
+// Control byte:
+// First two bits indicate type of entry
+// Other bytes are inlined sizes. If all bits are 1 (0x03F), overflow bytes
+// are used. key_size-0x3F will be encoded as a variint32 after this bytes.
+
+const unsigned char kSizeInlineLimit = 0x3F;
+
+// Return 0 for error
+size_t EncodeSize(PlainTableEntryType type, uint32_t key_size,
+                  char* out_buffer) {
+  out_buffer[0] = type << 6;
+
+  if (key_size < static_cast<uint32_t>(kSizeInlineLimit)) {
+    // size inlined
+    out_buffer[0] |= static_cast<char>(key_size);
+    return 1;
+  } else {
+    out_buffer[0] |= kSizeInlineLimit;
+    char* ptr = EncodeVarint32(out_buffer + 1, key_size - kSizeInlineLimit);
+    return ptr - out_buffer;
+  }
+}
+}  // namespace
+
+// Fill bytes_read with number of bytes read.
+inline Status PlainTableKeyDecoder::DecodeSize(uint32_t start_offset,
+                                               PlainTableEntryType* entry_type,
+                                               uint32_t* key_size,
+                                               uint32_t* bytes_read) {
+  Slice next_byte_slice;
+  bool success = file_reader_.Read(start_offset, 1, &next_byte_slice);
+  if (!success) {
+    return file_reader_.status();
+  }
+  *entry_type = static_cast<PlainTableEntryType>(
+      (static_cast<unsigned char>(next_byte_slice[0]) & ~kSizeInlineLimit) >>
+      6);
+  char inline_key_size = next_byte_slice[0] & kSizeInlineLimit;
+  if (inline_key_size < kSizeInlineLimit) {
+    *key_size = inline_key_size;
+    *bytes_read = 1;
+    return Status::OK();
+  } else {
+    uint32_t extra_size;
+    uint32_t tmp_bytes_read;
+    success = file_reader_.ReadVarint32(start_offset + 1, &extra_size,
+                                        &tmp_bytes_read);
+    if (!success) {
+      return file_reader_.status();
+    }
+    assert(tmp_bytes_read > 0);
+    *key_size = kSizeInlineLimit + extra_size;
+    *bytes_read = tmp_bytes_read + 1;
+    return Status::OK();
+  }
+}
+
+IOStatus PlainTableKeyEncoder::AppendKey(const Slice& key,
+                                         WritableFileWriter* file,
+                                         uint64_t* offset, char* meta_bytes_buf,
+                                         size_t* meta_bytes_buf_size) {
+  ParsedInternalKey parsed_key;
+  Status pik_status =
+      ParseInternalKey(key, &parsed_key, false /* log_err_key */);  // TODO
+  if (!pik_status.ok()) {
+    return IOStatus::Corruption(pik_status.getState());
+  }
+
+  Slice key_to_write = key;  // Portion of internal key to write out.
+
+  uint32_t user_key_size = static_cast<uint32_t>(key.size() - 8);
+  if (encoding_type_ == kPlain) {
+    if (fixed_user_key_len_ == kPlainTableVariableLength) {
+      // Write key length
+      char key_size_buf[5];  // tmp buffer for key size as varint32
+      char* ptr = EncodeVarint32(key_size_buf, user_key_size);
+      assert(ptr <= key_size_buf + sizeof(key_size_buf));
+      auto len = ptr - key_size_buf;
+      IOStatus io_s = file->Append(Slice(key_size_buf, len));
+      if (!io_s.ok()) {
+        return io_s;
+      }
+      *offset += len;
+    }
+  } else {
+    assert(encoding_type_ == kPrefix);
+    char size_bytes[12];
+    size_t size_bytes_pos = 0;
+
+    Slice prefix =
+        prefix_extractor_->Transform(Slice(key.data(), user_key_size));
+    if (key_count_for_prefix_ == 0 || prefix != pre_prefix_.GetUserKey() ||
+        key_count_for_prefix_ % index_sparseness_ == 0) {
+      key_count_for_prefix_ = 1;
+      pre_prefix_.SetUserKey(prefix);
+      size_bytes_pos += EncodeSize(kFullKey, user_key_size, size_bytes);
+      IOStatus io_s = file->Append(Slice(size_bytes, size_bytes_pos));
+      if (!io_s.ok()) {
+        return io_s;
+      }
+      *offset += size_bytes_pos;
+    } else {
+      key_count_for_prefix_++;
+      if (key_count_for_prefix_ == 2) {
+        // For second key within a prefix, need to encode prefix length
+        size_bytes_pos +=
+            EncodeSize(kPrefixFromPreviousKey,
+                       static_cast<uint32_t>(pre_prefix_.GetUserKey().size()),
+                       size_bytes + size_bytes_pos);
+      }
+      uint32_t prefix_len =
+          static_cast<uint32_t>(pre_prefix_.GetUserKey().size());
+      size_bytes_pos += EncodeSize(kKeySuffix, user_key_size - prefix_len,
+                                   size_bytes + size_bytes_pos);
+      IOStatus io_s = file->Append(Slice(size_bytes, size_bytes_pos));
+      if (!io_s.ok()) {
+        return io_s;
+      }
+      *offset += size_bytes_pos;
+      key_to_write = Slice(key.data() + prefix_len, key.size() - prefix_len);
+    }
+  }
+
+  // Encode full key
+  // For value size as varint32 (up to 5 bytes).
+  // If the row is of value type with seqId 0, flush the special flag together
+  // in this buffer to safe one file append call, which takes 1 byte.
+  if (parsed_key.sequence == 0 && parsed_key.type == kTypeValue) {
+    IOStatus io_s =
+        file->Append(Slice(key_to_write.data(), key_to_write.size() - 8));
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    *offset += key_to_write.size() - 8;
+    meta_bytes_buf[*meta_bytes_buf_size] = PlainTableFactory::kValueTypeSeqId0;
+    *meta_bytes_buf_size += 1;
+  } else {
+    IOStatus io_s = file->Append(key_to_write);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    *offset += key_to_write.size();
+  }
+
+  return IOStatus::OK();
+}
+
+Slice PlainTableFileReader::GetFromBuffer(Buffer* buffer, uint32_t file_offset,
+                                          uint32_t len) {
+  assert(file_offset + len <= file_info_->data_end_offset);
+  return Slice(buffer->buf.get() + (file_offset - buffer->buf_start_offset),
+               len);
+}
+
+bool PlainTableFileReader::ReadNonMmap(uint32_t file_offset, uint32_t len,
+                                       Slice* out) {
+  const uint32_t kPrefetchSize = 256u;
+
+  // Try to read from buffers.
+  for (uint32_t i = 0; i < num_buf_; i++) {
+    Buffer* buffer = buffers_[num_buf_ - 1 - i].get();
+    if (file_offset >= buffer->buf_start_offset &&
+        file_offset + len <= buffer->buf_start_offset + buffer->buf_len) {
+      *out = GetFromBuffer(buffer, file_offset, len);
+      return true;
+    }
+  }
+
+  Buffer* new_buffer;
+  // Data needed is not in any of the buffer. Allocate a new buffer.
+  if (num_buf_ < buffers_.size()) {
+    // Add a new buffer
+    new_buffer = new Buffer();
+    buffers_[num_buf_++].reset(new_buffer);
+  } else {
+    // Now simply replace the last buffer. Can improve the placement policy
+    // if needed.
+    new_buffer = buffers_[num_buf_ - 1].get();
+  }
+
+  assert(file_offset + len <= file_info_->data_end_offset);
+  uint32_t size_to_read = std::min(file_info_->data_end_offset - file_offset,
+                                   std::max(kPrefetchSize, len));
+  if (size_to_read > new_buffer->buf_capacity) {
+    new_buffer->buf.reset(new char[size_to_read]);
+    new_buffer->buf_capacity = size_to_read;
+    new_buffer->buf_len = 0;
+  }
+  Slice read_result;
+  // TODO: rate limit plain table reads.
+  Status s =
+      file_info_->file->Read(IOOptions(), file_offset, size_to_read,
+                             &read_result, new_buffer->buf.get(), nullptr,
+                             Env::IO_TOTAL /* rate_limiter_priority */);
+  if (!s.ok()) {
+    status_ = s;
+    return false;
+  }
+  new_buffer->buf_start_offset = file_offset;
+  new_buffer->buf_len = size_to_read;
+  *out = GetFromBuffer(new_buffer, file_offset, len);
+  return true;
+}
+
+inline bool PlainTableFileReader::ReadVarint32(uint32_t offset, uint32_t* out,
+                                               uint32_t* bytes_read) {
+  if (file_info_->is_mmap_mode) {
+    const char* start = file_info_->file_data.data() + offset;
+    const char* limit =
+        file_info_->file_data.data() + file_info_->data_end_offset;
+    const char* key_ptr = GetVarint32Ptr(start, limit, out);
+    assert(key_ptr != nullptr);
+    *bytes_read = static_cast<uint32_t>(key_ptr - start);
+    return true;
+  } else {
+    return ReadVarint32NonMmap(offset, out, bytes_read);
+  }
+}
+
+bool PlainTableFileReader::ReadVarint32NonMmap(uint32_t offset, uint32_t* out,
+                                               uint32_t* bytes_read) {
+  const char* start;
+  const char* limit;
+  const uint32_t kMaxVarInt32Size = 6u;
+  uint32_t bytes_to_read =
+      std::min(file_info_->data_end_offset - offset, kMaxVarInt32Size);
+  Slice bytes;
+  if (!Read(offset, bytes_to_read, &bytes)) {
+    return false;
+  }
+  start = bytes.data();
+  limit = bytes.data() + bytes.size();
+
+  const char* key_ptr = GetVarint32Ptr(start, limit, out);
+  *bytes_read =
+      (key_ptr != nullptr) ? static_cast<uint32_t>(key_ptr - start) : 0;
+  return true;
+}
+
+Status PlainTableKeyDecoder::ReadInternalKey(
+    uint32_t file_offset, uint32_t user_key_size, ParsedInternalKey* parsed_key,
+    uint32_t* bytes_read, bool* internal_key_valid, Slice* internal_key) {
+  Slice tmp_slice;
+  bool success = file_reader_.Read(file_offset, user_key_size + 1, &tmp_slice);
+  if (!success) {
+    return file_reader_.status();
+  }
+  if (tmp_slice[user_key_size] == PlainTableFactory::kValueTypeSeqId0) {
+    // Special encoding for the row with seqID=0
+    parsed_key->user_key = Slice(tmp_slice.data(), user_key_size);
+    parsed_key->sequence = 0;
+    parsed_key->type = kTypeValue;
+    *bytes_read += user_key_size + 1;
+    *internal_key_valid = false;
+  } else {
+    success = file_reader_.Read(file_offset, user_key_size + 8, internal_key);
+    if (!success) {
+      return file_reader_.status();
+    }
+    *internal_key_valid = true;
+    Status pik_status = ParseInternalKey(*internal_key, parsed_key,
+                                         false /* log_err_key */);  // TODO
+    if (!pik_status.ok()) {
+      return Status::Corruption(
+          Slice("Corrupted key found during next key read. "),
+          pik_status.getState());
+    }
+    *bytes_read += user_key_size + 8;
+  }
+  return Status::OK();
+}
+
+Status PlainTableKeyDecoder::NextPlainEncodingKey(uint32_t start_offset,
+                                                  ParsedInternalKey* parsed_key,
+                                                  Slice* internal_key,
+                                                  uint32_t* bytes_read,
+                                                  bool* /*seekable*/) {
+  uint32_t user_key_size = 0;
+  Status s;
+  if (fixed_user_key_len_ != kPlainTableVariableLength) {
+    user_key_size = fixed_user_key_len_;
+  } else {
+    uint32_t tmp_size = 0;
+    uint32_t tmp_read;
+    bool success =
+        file_reader_.ReadVarint32(start_offset, &tmp_size, &tmp_read);
+    if (!success) {
+      return file_reader_.status();
+    }
+    assert(tmp_read > 0);
+    user_key_size = tmp_size;
+    *bytes_read = tmp_read;
+  }
+  // dummy initial value to avoid compiler complain
+  bool decoded_internal_key_valid = true;
+  Slice decoded_internal_key;
+  s = ReadInternalKey(start_offset + *bytes_read, user_key_size, parsed_key,
+                      bytes_read, &decoded_internal_key_valid,
+                      &decoded_internal_key);
+  if (!s.ok()) {
+    return s;
+  }
+  if (!file_reader_.file_info()->is_mmap_mode) {
+    cur_key_.SetInternalKey(*parsed_key);
+    parsed_key->user_key =
+        Slice(cur_key_.GetInternalKey().data(), user_key_size);
+    if (internal_key != nullptr) {
+      *internal_key = cur_key_.GetInternalKey();
+    }
+  } else if (internal_key != nullptr) {
+    if (decoded_internal_key_valid) {
+      *internal_key = decoded_internal_key;
+    } else {
+      // Need to copy out the internal key
+      cur_key_.SetInternalKey(*parsed_key);
+      *internal_key = cur_key_.GetInternalKey();
+    }
+  }
+  return Status::OK();
+}
+
+Status PlainTableKeyDecoder::NextPrefixEncodingKey(
+    uint32_t start_offset, ParsedInternalKey* parsed_key, Slice* internal_key,
+    uint32_t* bytes_read, bool* seekable) {
+  PlainTableEntryType entry_type;
+
+  bool expect_suffix = false;
+  Status s;
+  do {
+    uint32_t size = 0;
+    // dummy initial value to avoid compiler complain
+    bool decoded_internal_key_valid = true;
+    uint32_t my_bytes_read = 0;
+    s = DecodeSize(start_offset + *bytes_read, &entry_type, &size,
+                   &my_bytes_read);
+    if (!s.ok()) {
+      return s;
+    }
+    if (my_bytes_read == 0) {
+      return Status::Corruption("Unexpected EOF when reading size of the key");
+    }
+    *bytes_read += my_bytes_read;
+
+    switch (entry_type) {
+      case kFullKey: {
+        expect_suffix = false;
+        Slice decoded_internal_key;
+        s = ReadInternalKey(start_offset + *bytes_read, size, parsed_key,
+                            bytes_read, &decoded_internal_key_valid,
+                            &decoded_internal_key);
+        if (!s.ok()) {
+          return s;
+        }
+        if (!file_reader_.file_info()->is_mmap_mode ||
+            (internal_key != nullptr && !decoded_internal_key_valid)) {
+          // In non-mmap mode, always need to make a copy of keys returned to
+          // users, because after reading value for the key, the key might
+          // be invalid.
+          cur_key_.SetInternalKey(*parsed_key);
+          saved_user_key_ = cur_key_.GetUserKey();
+          if (!file_reader_.file_info()->is_mmap_mode) {
+            parsed_key->user_key =
+                Slice(cur_key_.GetInternalKey().data(), size);
+          }
+          if (internal_key != nullptr) {
+            *internal_key = cur_key_.GetInternalKey();
+          }
+        } else {
+          if (internal_key != nullptr) {
+            *internal_key = decoded_internal_key;
+          }
+          saved_user_key_ = parsed_key->user_key;
+        }
+        break;
+      }
+      case kPrefixFromPreviousKey: {
+        if (seekable != nullptr) {
+          *seekable = false;
+        }
+        prefix_len_ = size;
+        assert(prefix_extractor_ == nullptr ||
+               prefix_extractor_->Transform(saved_user_key_).size() ==
+                   prefix_len_);
+        // Need read another size flag for suffix
+        expect_suffix = true;
+        break;
+      }
+      case kKeySuffix: {
+        expect_suffix = false;
+        if (seekable != nullptr) {
+          *seekable = false;
+        }
+
+        Slice tmp_slice;
+        s = ReadInternalKey(start_offset + *bytes_read, size, parsed_key,
+                            bytes_read, &decoded_internal_key_valid,
+                            &tmp_slice);
+        if (!s.ok()) {
+          return s;
+        }
+        if (!file_reader_.file_info()->is_mmap_mode) {
+          // In non-mmap mode, we need to make a copy of keys returned to
+          // users, because after reading value for the key, the key might
+          // be invalid.
+          // saved_user_key_ points to cur_key_. We are making a copy of
+          // the prefix part to another string, and construct the current
+          // key from the prefix part and the suffix part back to cur_key_.
+          std::string tmp =
+              Slice(saved_user_key_.data(), prefix_len_).ToString();
+          cur_key_.Reserve(prefix_len_ + size);
+          cur_key_.SetInternalKey(tmp, *parsed_key);
+          parsed_key->user_key =
+              Slice(cur_key_.GetInternalKey().data(), prefix_len_ + size);
+          saved_user_key_ = cur_key_.GetUserKey();
+        } else {
+          cur_key_.Reserve(prefix_len_ + size);
+          cur_key_.SetInternalKey(Slice(saved_user_key_.data(), prefix_len_),
+                                  *parsed_key);
+        }
+        parsed_key->user_key = cur_key_.GetUserKey();
+        if (internal_key != nullptr) {
+          *internal_key = cur_key_.GetInternalKey();
+        }
+        break;
+      }
+      default:
+        return Status::Corruption("Un-identified size flag.");
+    }
+  } while (expect_suffix);  // Another round if suffix is expected.
+  return Status::OK();
+}
+
+Status PlainTableKeyDecoder::NextKey(uint32_t start_offset,
+                                     ParsedInternalKey* parsed_key,
+                                     Slice* internal_key, Slice* value,
+                                     uint32_t* bytes_read, bool* seekable) {
+  assert(value != nullptr);
+  Status s = NextKeyNoValue(start_offset, parsed_key, internal_key, bytes_read,
+                            seekable);
+  if (s.ok()) {
+    assert(bytes_read != nullptr);
+    uint32_t value_size;
+    uint32_t value_size_bytes;
+    bool success = file_reader_.ReadVarint32(start_offset + *bytes_read,
+                                             &value_size, &value_size_bytes);
+    if (!success) {
+      return file_reader_.status();
+    }
+    if (value_size_bytes == 0) {
+      return Status::Corruption(
+          "Unexpected EOF when reading the next value's size.");
+    }
+    *bytes_read += value_size_bytes;
+    success = file_reader_.Read(start_offset + *bytes_read, value_size, value);
+    if (!success) {
+      return file_reader_.status();
+    }
+    *bytes_read += value_size;
+  }
+  return s;
+}
+
+Status PlainTableKeyDecoder::NextKeyNoValue(uint32_t start_offset,
+                                            ParsedInternalKey* parsed_key,
+                                            Slice* internal_key,
+                                            uint32_t* bytes_read,
+                                            bool* seekable) {
+  *bytes_read = 0;
+  if (seekable != nullptr) {
+    *seekable = true;
+  }
+  if (encoding_type_ == kPlain) {
+    return NextPlainEncodingKey(start_offset, parsed_key, internal_key,
+                                bytes_read, seekable);
+  } else {
+    assert(encoding_type_ == kPrefix);
+    return NextPrefixEncodingKey(start_offset, parsed_key, internal_key,
+                                 bytes_read, seekable);
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_key_coding.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_key_coding.h
new file mode 100644
index 0000000000..fdef224825
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_key_coding.h
@@ -0,0 +1,199 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <array>
+
+#include "rocksdb/slice.h"
+#include "table/plain/plain_table_reader.h"
+
+// The file contains three helper classes of PlainTable format,
+// PlainTableKeyEncoder, PlainTableKeyDecoder and PlainTableFileReader.
+// These classes issue the lowest level of operations of PlainTable.
+// Actual data format of the key is documented in comments of class
+// PlainTableFactory.
+namespace ROCKSDB_NAMESPACE {
+
+class WritableFile;
+struct ParsedInternalKey;
+struct PlainTableReaderFileInfo;
+enum PlainTableEntryType : unsigned char;
+
+// Helper class for PlainTable format to write out a key to an output file
+// The class is used in PlainTableBuilder.
+class PlainTableKeyEncoder {
+ public:
+  explicit PlainTableKeyEncoder(EncodingType encoding_type,
+                                uint32_t user_key_len,
+                                const SliceTransform* prefix_extractor,
+                                size_t index_sparseness)
+      : encoding_type_((prefix_extractor != nullptr) ? encoding_type : kPlain),
+        fixed_user_key_len_(user_key_len),
+        prefix_extractor_(prefix_extractor),
+        index_sparseness_((index_sparseness > 1) ? index_sparseness : 1),
+        key_count_for_prefix_(0) {}
+  // key: the key to write out, in the format of internal key.
+  // file: the output file to write out
+  // offset: offset in the file. Needs to be updated after appending bytes
+  //         for the key
+  // meta_bytes_buf: buffer for extra meta bytes
+  // meta_bytes_buf_size: offset to append extra meta bytes. Will be updated
+  //                      if meta_bytes_buf is updated.
+  IOStatus AppendKey(const Slice& key, WritableFileWriter* file,
+                     uint64_t* offset, char* meta_bytes_buf,
+                     size_t* meta_bytes_buf_size);
+
+  // Return actual encoding type to be picked
+  EncodingType GetEncodingType() { return encoding_type_; }
+
+ private:
+  EncodingType encoding_type_;
+  uint32_t fixed_user_key_len_;
+  const SliceTransform* prefix_extractor_;
+  const size_t index_sparseness_;
+  size_t key_count_for_prefix_;
+  IterKey pre_prefix_;
+};
+
+// The class does raw file reads for PlainTableReader.
+// It hides whether it is a mmap-read, or a non-mmap read.
+// The class is implemented in a way to favor the performance of mmap case.
+// The class is used by PlainTableReader.
+class PlainTableFileReader {
+ public:
+  explicit PlainTableFileReader(const PlainTableReaderFileInfo* _file_info)
+      : file_info_(_file_info), num_buf_(0) {}
+
+  ~PlainTableFileReader() {
+    // Should fix.
+    status_.PermitUncheckedError();
+  }
+
+  // In mmaped mode, the results point to mmaped area of the file, which
+  // means it is always valid before closing the file.
+  // In non-mmap mode, the results point to an internal buffer. If the caller
+  // makes another read call, the results may not be valid. So callers should
+  // make a copy when needed.
+  // In order to save read calls to files, we keep two internal buffers:
+  // the first read and the most recent read. This is efficient because it
+  // columns these two common use cases:
+  // (1) hash index only identify one location, we read the key to verify
+  //     the location, and read key and value if it is the right location.
+  // (2) after hash index checking, we identify two locations (because of
+  //     hash bucket conflicts), we binary search the two location to see
+  //     which one is what we need and start to read from the location.
+  // These two most common use cases will be covered by the two buffers
+  // so that we don't need to re-read the same location.
+  // Currently we keep a fixed size buffer. If a read doesn't exactly fit
+  // the buffer, we replace the second buffer with the location user reads.
+  //
+  // If return false, status code is stored in status_.
+  bool Read(uint32_t file_offset, uint32_t len, Slice* out) {
+    if (file_info_->is_mmap_mode) {
+      assert(file_offset + len <= file_info_->data_end_offset);
+      *out = Slice(file_info_->file_data.data() + file_offset, len);
+      return true;
+    } else {
+      return ReadNonMmap(file_offset, len, out);
+    }
+  }
+
+  // If return false, status code is stored in status_.
+  bool ReadNonMmap(uint32_t file_offset, uint32_t len, Slice* output);
+
+  // *bytes_read = 0 means eof. false means failure and status is saved
+  // in status_. Not directly returning Status to save copying status
+  // object to map previous performance of mmap mode.
+  inline bool ReadVarint32(uint32_t offset, uint32_t* output,
+                           uint32_t* bytes_read);
+
+  bool ReadVarint32NonMmap(uint32_t offset, uint32_t* output,
+                           uint32_t* bytes_read);
+
+  Status status() const { return status_; }
+
+  const PlainTableReaderFileInfo* file_info() { return file_info_; }
+
+ private:
+  const PlainTableReaderFileInfo* file_info_;
+
+  struct Buffer {
+    Buffer() : buf_start_offset(0), buf_len(0), buf_capacity(0) {}
+    std::unique_ptr<char[]> buf;
+    uint32_t buf_start_offset;
+    uint32_t buf_len;
+    uint32_t buf_capacity;
+  };
+
+  // Keep buffers for two recent reads.
+  std::array<std::unique_ptr<Buffer>, 2> buffers_;
+  uint32_t num_buf_;
+  Status status_;
+
+  Slice GetFromBuffer(Buffer* buf, uint32_t file_offset, uint32_t len);
+};
+
+// A helper class to decode keys from input buffer
+// The class is used by PlainTableBuilder.
+class PlainTableKeyDecoder {
+ public:
+  explicit PlainTableKeyDecoder(const PlainTableReaderFileInfo* file_info,
+                                EncodingType encoding_type,
+                                uint32_t user_key_len,
+                                const SliceTransform* prefix_extractor)
+      : file_reader_(file_info),
+        encoding_type_(encoding_type),
+        prefix_len_(0),
+        fixed_user_key_len_(user_key_len),
+        prefix_extractor_(prefix_extractor),
+        in_prefix_(false) {}
+
+  // Find the next key.
+  // start: char array where the key starts.
+  // limit: boundary of the char array
+  // parsed_key: the output of the result key
+  // internal_key: if not null, fill with the output of the result key in
+  //               un-parsed format
+  // bytes_read: how many bytes read from start. Output
+  // seekable: whether key can be read from this place. Used when building
+  //           indexes. Output.
+  Status NextKey(uint32_t start_offset, ParsedInternalKey* parsed_key,
+                 Slice* internal_key, Slice* value, uint32_t* bytes_read,
+                 bool* seekable = nullptr);
+
+  Status NextKeyNoValue(uint32_t start_offset, ParsedInternalKey* parsed_key,
+                        Slice* internal_key, uint32_t* bytes_read,
+                        bool* seekable = nullptr);
+
+  PlainTableFileReader file_reader_;
+  EncodingType encoding_type_;
+  uint32_t prefix_len_;
+  uint32_t fixed_user_key_len_;
+  Slice saved_user_key_;
+  IterKey cur_key_;
+  const SliceTransform* prefix_extractor_;
+  bool in_prefix_;
+
+ private:
+  Status NextPlainEncodingKey(uint32_t start_offset,
+                              ParsedInternalKey* parsed_key,
+                              Slice* internal_key, uint32_t* bytes_read,
+                              bool* seekable = nullptr);
+  Status NextPrefixEncodingKey(uint32_t start_offset,
+                               ParsedInternalKey* parsed_key,
+                               Slice* internal_key, uint32_t* bytes_read,
+                               bool* seekable = nullptr);
+  Status ReadInternalKey(uint32_t file_offset, uint32_t user_key_size,
+                         ParsedInternalKey* parsed_key, uint32_t* bytes_read,
+                         bool* internal_key_valid, Slice* internal_key);
+  inline Status DecodeSize(uint32_t start_offset,
+                           PlainTableEntryType* entry_type, uint32_t* key_size,
+                           uint32_t* bytes_read);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_reader.cc
new file mode 100644
index 0000000000..2f0379f72a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_reader.cc
@@ -0,0 +1,771 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+
+#include "table/plain/plain_table_reader.h"
+
+#include <string>
+#include <vector>
+
+#include "db/dbformat.h"
+#include "memory/arena.h"
+#include "monitoring/histogram.h"
+#include "monitoring/perf_context_imp.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/env.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/options.h"
+#include "rocksdb/statistics.h"
+#include "table/block_based/block.h"
+#include "table/block_based/filter_block.h"
+#include "table/format.h"
+#include "table/get_context.h"
+#include "table/internal_iterator.h"
+#include "table/meta_blocks.h"
+#include "table/plain/plain_table_bloom.h"
+#include "table/plain/plain_table_factory.h"
+#include "table/plain/plain_table_key_coding.h"
+#include "table/two_level_iterator.h"
+#include "util/coding.h"
+#include "util/dynamic_bloom.h"
+#include "util/hash.h"
+#include "util/stop_watch.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+// Safely getting a uint32_t element from a char array, where, starting from
+// `base`, every 4 bytes are considered as an fixed 32 bit integer.
+inline uint32_t GetFixed32Element(const char* base, size_t offset) {
+  return DecodeFixed32(base + offset * sizeof(uint32_t));
+}
+}  // namespace
+
+// Iterator to iterate IndexedTable
+class PlainTableIterator : public InternalIterator {
+ public:
+  explicit PlainTableIterator(PlainTableReader* table, bool use_prefix_seek);
+  // No copying allowed
+  PlainTableIterator(const PlainTableIterator&) = delete;
+  void operator=(const Iterator&) = delete;
+
+  ~PlainTableIterator() override;
+
+  bool Valid() const override;
+
+  void SeekToFirst() override;
+
+  void SeekToLast() override;
+
+  void Seek(const Slice& target) override;
+
+  void SeekForPrev(const Slice& target) override;
+
+  void Next() override;
+
+  void Prev() override;
+
+  Slice key() const override;
+
+  Slice value() const override;
+
+  Status status() const override;
+
+ private:
+  PlainTableReader* table_;
+  PlainTableKeyDecoder decoder_;
+  bool use_prefix_seek_;
+  uint32_t offset_;
+  uint32_t next_offset_;
+  Slice key_;
+  Slice value_;
+  Status status_;
+};
+
+extern const uint64_t kPlainTableMagicNumber;
+PlainTableReader::PlainTableReader(
+    const ImmutableOptions& ioptions,
+    std::unique_ptr<RandomAccessFileReader>&& file,
+    const EnvOptions& storage_options, const InternalKeyComparator& icomparator,
+    EncodingType encoding_type, uint64_t file_size,
+    const TableProperties* table_properties,
+    const SliceTransform* prefix_extractor)
+    : internal_comparator_(icomparator),
+      encoding_type_(encoding_type),
+      full_scan_mode_(false),
+      user_key_len_(static_cast<uint32_t>(table_properties->fixed_key_len)),
+      prefix_extractor_(prefix_extractor),
+      enable_bloom_(false),
+      bloom_(6),
+      file_info_(std::move(file), storage_options,
+                 static_cast<uint32_t>(table_properties->data_size)),
+      ioptions_(ioptions),
+      file_size_(file_size),
+      table_properties_(nullptr) {}
+
+PlainTableReader::~PlainTableReader() {
+  // Should fix?
+  status_.PermitUncheckedError();
+}
+
+Status PlainTableReader::Open(
+    const ImmutableOptions& ioptions, const EnvOptions& env_options,
+    const InternalKeyComparator& internal_comparator,
+    std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
+    std::unique_ptr<TableReader>* table_reader, const int bloom_bits_per_key,
+    double hash_table_ratio, size_t index_sparseness, size_t huge_page_tlb_size,
+    bool full_scan_mode, const bool immortal_table,
+    const SliceTransform* prefix_extractor) {
+  if (file_size > PlainTableIndex::kMaxFileSize) {
+    return Status::NotSupported("File is too large for PlainTableReader!");
+  }
+
+  std::unique_ptr<TableProperties> props;
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber,
+                               ioptions, read_options, &props);
+  if (!s.ok()) {
+    return s;
+  }
+
+  assert(hash_table_ratio >= 0.0);
+  auto& user_props = props->user_collected_properties;
+  auto prefix_extractor_in_file = props->prefix_extractor_name;
+
+  if (!full_scan_mode &&
+      !prefix_extractor_in_file.empty() /* old version sst file*/
+      && prefix_extractor_in_file != "nullptr") {
+    if (!prefix_extractor) {
+      return Status::InvalidArgument(
+          "Prefix extractor is missing when opening a PlainTable built "
+          "using a prefix extractor");
+    } else if (prefix_extractor_in_file != prefix_extractor->AsString()) {
+      return Status::InvalidArgument(
+          "Prefix extractor given doesn't match the one used to build "
+          "PlainTable");
+    }
+  }
+
+  EncodingType encoding_type = kPlain;
+  auto encoding_type_prop =
+      user_props.find(PlainTablePropertyNames::kEncodingType);
+  if (encoding_type_prop != user_props.end()) {
+    encoding_type = static_cast<EncodingType>(
+        DecodeFixed32(encoding_type_prop->second.c_str()));
+  }
+
+  std::unique_ptr<PlainTableReader> new_reader(new PlainTableReader(
+      ioptions, std::move(file), env_options, internal_comparator,
+      encoding_type, file_size, props.get(), prefix_extractor));
+
+  s = new_reader->MmapDataIfNeeded();
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (!full_scan_mode) {
+    s = new_reader->PopulateIndex(props.get(), bloom_bits_per_key,
+                                  hash_table_ratio, index_sparseness,
+                                  huge_page_tlb_size);
+    if (!s.ok()) {
+      return s;
+    }
+  } else {
+    // Flag to indicate it is a full scan mode so that none of the indexes
+    // can be used.
+    new_reader->full_scan_mode_ = true;
+  }
+  // PopulateIndex can add to the props, so don't store them until now
+  new_reader->table_properties_ = std::move(props);
+
+  if (immortal_table && new_reader->file_info_.is_mmap_mode) {
+    new_reader->dummy_cleanable_.reset(new Cleanable());
+  }
+
+  *table_reader = std::move(new_reader);
+  return s;
+}
+
+void PlainTableReader::SetupForCompaction() {}
+
+InternalIterator* PlainTableReader::NewIterator(
+    const ReadOptions& options, const SliceTransform* /* prefix_extractor */,
+    Arena* arena, bool /*skip_filters*/, TableReaderCaller /*caller*/,
+    size_t /*compaction_readahead_size*/, bool /* allow_unprepared_value */) {
+  // Not necessarily used here, but make sure this has been initialized
+  assert(table_properties_);
+
+  // Auto prefix mode is not implemented in PlainTable.
+  bool use_prefix_seek = !IsTotalOrderMode() && !options.total_order_seek &&
+                         !options.auto_prefix_mode;
+  if (arena == nullptr) {
+    return new PlainTableIterator(this, use_prefix_seek);
+  } else {
+    auto mem = arena->AllocateAligned(sizeof(PlainTableIterator));
+    return new (mem) PlainTableIterator(this, use_prefix_seek);
+  }
+}
+
+Status PlainTableReader::PopulateIndexRecordList(
+    PlainTableIndexBuilder* index_builder,
+    std::vector<uint32_t>* prefix_hashes) {
+  Slice prev_key_prefix_slice;
+  std::string prev_key_prefix_buf;
+  uint32_t pos = data_start_offset_;
+
+  bool is_first_record = true;
+  Slice key_prefix_slice;
+  PlainTableKeyDecoder decoder(&file_info_, encoding_type_, user_key_len_,
+                               prefix_extractor_);
+  while (pos < file_info_.data_end_offset) {
+    uint32_t key_offset = pos;
+    ParsedInternalKey key;
+    Slice value_slice;
+    bool seekable = false;
+    Status s = Next(&decoder, &pos, &key, nullptr, &value_slice, &seekable);
+    if (!s.ok()) {
+      return s;
+    }
+
+    key_prefix_slice = GetPrefix(key);
+    if (enable_bloom_) {
+      bloom_.AddHash(GetSliceHash(key.user_key));
+    } else {
+      if (is_first_record || prev_key_prefix_slice != key_prefix_slice) {
+        if (!is_first_record) {
+          prefix_hashes->push_back(GetSliceHash(prev_key_prefix_slice));
+        }
+        if (file_info_.is_mmap_mode) {
+          prev_key_prefix_slice = key_prefix_slice;
+        } else {
+          prev_key_prefix_buf = key_prefix_slice.ToString();
+          prev_key_prefix_slice = prev_key_prefix_buf;
+        }
+      }
+    }
+
+    index_builder->AddKeyPrefix(GetPrefix(key), key_offset);
+
+    if (!seekable && is_first_record) {
+      return Status::Corruption("Key for a prefix is not seekable");
+    }
+
+    is_first_record = false;
+  }
+
+  prefix_hashes->push_back(GetSliceHash(key_prefix_slice));
+  auto s = index_.InitFromRawData(index_builder->Finish());
+  return s;
+}
+
+void PlainTableReader::AllocateBloom(int bloom_bits_per_key, int num_keys,
+                                     size_t huge_page_tlb_size) {
+  uint32_t bloom_total_bits = num_keys * bloom_bits_per_key;
+  if (bloom_total_bits > 0) {
+    enable_bloom_ = true;
+    bloom_.SetTotalBits(&arena_, bloom_total_bits, ioptions_.bloom_locality,
+                        huge_page_tlb_size, ioptions_.logger);
+  }
+}
+
+void PlainTableReader::FillBloom(const std::vector<uint32_t>& prefix_hashes) {
+  assert(bloom_.IsInitialized());
+  for (const auto prefix_hash : prefix_hashes) {
+    bloom_.AddHash(prefix_hash);
+  }
+}
+
+Status PlainTableReader::MmapDataIfNeeded() {
+  if (file_info_.is_mmap_mode) {
+    // Get mmapped memory.
+    return file_info_.file->Read(
+        IOOptions(), 0, static_cast<size_t>(file_size_), &file_info_.file_data,
+        nullptr, nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
+  }
+  return Status::OK();
+}
+
+Status PlainTableReader::PopulateIndex(TableProperties* props,
+                                       int bloom_bits_per_key,
+                                       double hash_table_ratio,
+                                       size_t index_sparseness,
+                                       size_t huge_page_tlb_size) {
+  assert(props != nullptr);
+
+  BlockContents index_block_contents;
+
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  Status s =
+      ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */,
+                    file_size_, kPlainTableMagicNumber, ioptions_, read_options,
+                    PlainTableIndexBuilder::kPlainTableIndexBlock,
+                    BlockType::kIndex, &index_block_contents);
+
+  bool index_in_file = s.ok();
+
+  BlockContents bloom_block_contents;
+  bool bloom_in_file = false;
+  // We only need to read the bloom block if index block is in file.
+  if (index_in_file) {
+    s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */,
+                      file_size_, kPlainTableMagicNumber, ioptions_,
+                      read_options, BloomBlockBuilder::kBloomBlock,
+                      BlockType::kFilter, &bloom_block_contents);
+    bloom_in_file = s.ok() && bloom_block_contents.data.size() > 0;
+  }
+
+  Slice* bloom_block;
+  if (bloom_in_file) {
+    // If bloom_block_contents.allocation is not empty (which will be the case
+    // for non-mmap mode), it holds the alloated memory for the bloom block.
+    // It needs to be kept alive to keep `bloom_block` valid.
+    bloom_block_alloc_ = std::move(bloom_block_contents.allocation);
+    bloom_block = &bloom_block_contents.data;
+  } else {
+    bloom_block = nullptr;
+  }
+
+  Slice* index_block;
+  if (index_in_file) {
+    // If index_block_contents.allocation is not empty (which will be the case
+    // for non-mmap mode), it holds the alloated memory for the index block.
+    // It needs to be kept alive to keep `index_block` valid.
+    index_block_alloc_ = std::move(index_block_contents.allocation);
+    index_block = &index_block_contents.data;
+  } else {
+    index_block = nullptr;
+  }
+
+  if ((prefix_extractor_ == nullptr) && (hash_table_ratio != 0)) {
+    // moptions.prefix_extractor is requried for a hash-based look-up.
+    return Status::NotSupported(
+        "PlainTable requires a prefix extractor enable prefix hash mode.");
+  }
+
+  // First, read the whole file, for every kIndexIntervalForSamePrefixKeys rows
+  // for a prefix (starting from the first one), generate a record of (hash,
+  // offset) and append it to IndexRecordList, which is a data structure created
+  // to store them.
+
+  if (!index_in_file) {
+    // Allocate bloom filter here for total order mode.
+    if (IsTotalOrderMode()) {
+      AllocateBloom(bloom_bits_per_key,
+                    static_cast<uint32_t>(props->num_entries),
+                    huge_page_tlb_size);
+    }
+  } else if (bloom_in_file) {
+    enable_bloom_ = true;
+    auto num_blocks_property = props->user_collected_properties.find(
+        PlainTablePropertyNames::kNumBloomBlocks);
+
+    uint32_t num_blocks = 0;
+    if (num_blocks_property != props->user_collected_properties.end()) {
+      Slice temp_slice(num_blocks_property->second);
+      if (!GetVarint32(&temp_slice, &num_blocks)) {
+        num_blocks = 0;
+      }
+    }
+    // cast away const qualifier, because bloom_ won't be changed
+    bloom_.SetRawData(const_cast<char*>(bloom_block->data()),
+                      static_cast<uint32_t>(bloom_block->size()) * 8,
+                      num_blocks);
+  } else {
+    // Index in file but no bloom in file. Disable bloom filter in this case.
+    enable_bloom_ = false;
+    bloom_bits_per_key = 0;
+  }
+
+  PlainTableIndexBuilder index_builder(&arena_, ioptions_, prefix_extractor_,
+                                       index_sparseness, hash_table_ratio,
+                                       huge_page_tlb_size);
+
+  std::vector<uint32_t> prefix_hashes;
+  if (!index_in_file) {
+    // Populates _bloom if enabled (total order mode)
+    s = PopulateIndexRecordList(&index_builder, &prefix_hashes);
+    if (!s.ok()) {
+      return s;
+    }
+  } else {
+    s = index_.InitFromRawData(*index_block);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  if (!index_in_file) {
+    if (!IsTotalOrderMode()) {
+      // Calculated bloom filter size and allocate memory for
+      // bloom filter based on the number of prefixes, then fill it.
+      AllocateBloom(bloom_bits_per_key, index_.GetNumPrefixes(),
+                    huge_page_tlb_size);
+      if (enable_bloom_) {
+        FillBloom(prefix_hashes);
+      }
+    }
+  }
+
+  // Fill two table properties.
+  if (!index_in_file) {
+    props->user_collected_properties["plain_table_hash_table_size"] =
+        std::to_string(index_.GetIndexSize() * PlainTableIndex::kOffsetLen);
+    props->user_collected_properties["plain_table_sub_index_size"] =
+        std::to_string(index_.GetSubIndexSize());
+  } else {
+    props->user_collected_properties["plain_table_hash_table_size"] =
+        std::to_string(0);
+    props->user_collected_properties["plain_table_sub_index_size"] =
+        std::to_string(0);
+  }
+
+  return Status::OK();
+}
+
+Status PlainTableReader::GetOffset(PlainTableKeyDecoder* decoder,
+                                   const Slice& target, const Slice& prefix,
+                                   uint32_t prefix_hash, bool& prefix_matched,
+                                   uint32_t* offset) const {
+  prefix_matched = false;
+  uint32_t prefix_index_offset;
+  auto res = index_.GetOffset(prefix_hash, &prefix_index_offset);
+  if (res == PlainTableIndex::kNoPrefixForBucket) {
+    *offset = file_info_.data_end_offset;
+    return Status::OK();
+  } else if (res == PlainTableIndex::kDirectToFile) {
+    *offset = prefix_index_offset;
+    return Status::OK();
+  }
+
+  // point to sub-index, need to do a binary search
+  uint32_t upper_bound = 0;
+  const char* base_ptr =
+      index_.GetSubIndexBasePtrAndUpperBound(prefix_index_offset, &upper_bound);
+  uint32_t low = 0;
+  uint32_t high = upper_bound;
+  ParsedInternalKey mid_key;
+  ParsedInternalKey parsed_target;
+  Status s = ParseInternalKey(target, &parsed_target,
+                              false /* log_err_key */);  // TODO
+  if (!s.ok()) return s;
+
+  // The key is between [low, high). Do a binary search between it.
+  while (high - low > 1) {
+    uint32_t mid = (high + low) / 2;
+    uint32_t file_offset = GetFixed32Element(base_ptr, mid);
+    uint32_t tmp;
+    s = decoder->NextKeyNoValue(file_offset, &mid_key, nullptr, &tmp);
+    if (!s.ok()) {
+      return s;
+    }
+    int cmp_result = internal_comparator_.Compare(mid_key, parsed_target);
+    if (cmp_result < 0) {
+      low = mid;
+    } else {
+      if (cmp_result == 0) {
+        // Happen to have found the exact key or target is smaller than the
+        // first key after base_offset.
+        prefix_matched = true;
+        *offset = file_offset;
+        return Status::OK();
+      } else {
+        high = mid;
+      }
+    }
+  }
+  // Both of the key at the position low or low+1 could share the same
+  // prefix as target. We need to rule out one of them to avoid to go
+  // to the wrong prefix.
+  ParsedInternalKey low_key;
+  uint32_t tmp;
+  uint32_t low_key_offset = GetFixed32Element(base_ptr, low);
+  s = decoder->NextKeyNoValue(low_key_offset, &low_key, nullptr, &tmp);
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (GetPrefix(low_key) == prefix) {
+    prefix_matched = true;
+    *offset = low_key_offset;
+  } else if (low + 1 < upper_bound) {
+    // There is possible a next prefix, return it
+    prefix_matched = false;
+    *offset = GetFixed32Element(base_ptr, low + 1);
+  } else {
+    // target is larger than a key of the last prefix in this bucket
+    // but with a different prefix. Key does not exist.
+    *offset = file_info_.data_end_offset;
+  }
+  return Status::OK();
+}
+
+bool PlainTableReader::MatchBloom(uint32_t hash) const {
+  if (!enable_bloom_) {
+    return true;
+  }
+
+  if (bloom_.MayContainHash(hash)) {
+    PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
+    return true;
+  } else {
+    PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
+    return false;
+  }
+}
+
+Status PlainTableReader::Next(PlainTableKeyDecoder* decoder, uint32_t* offset,
+                              ParsedInternalKey* parsed_key,
+                              Slice* internal_key, Slice* value,
+                              bool* seekable) const {
+  if (*offset == file_info_.data_end_offset) {
+    *offset = file_info_.data_end_offset;
+    return Status::OK();
+  }
+
+  if (*offset > file_info_.data_end_offset) {
+    return Status::Corruption("Offset is out of file size");
+  }
+
+  uint32_t bytes_read;
+  Status s = decoder->NextKey(*offset, parsed_key, internal_key, value,
+                              &bytes_read, seekable);
+  if (!s.ok()) {
+    return s;
+  }
+  *offset = *offset + bytes_read;
+  return Status::OK();
+}
+
+void PlainTableReader::Prepare(const Slice& target) {
+  if (enable_bloom_) {
+    uint32_t prefix_hash = GetSliceHash(GetPrefix(target));
+    bloom_.Prefetch(prefix_hash);
+  }
+}
+
+Status PlainTableReader::Get(const ReadOptions& /*ro*/, const Slice& target,
+                             GetContext* get_context,
+                             const SliceTransform* /* prefix_extractor */,
+                             bool /*skip_filters*/) {
+  // Check bloom filter first.
+  Slice prefix_slice;
+  uint32_t prefix_hash;
+  if (IsTotalOrderMode()) {
+    if (full_scan_mode_) {
+      status_ =
+          Status::InvalidArgument("Get() is not allowed in full scan mode.");
+    }
+    // Match whole user key for bloom filter check.
+    if (!MatchBloom(GetSliceHash(ExtractUserKey(target)))) {
+      return Status::OK();
+    }
+    // in total order mode, there is only one bucket 0, and we always use empty
+    // prefix.
+    prefix_slice = Slice();
+    prefix_hash = 0;
+  } else {
+    prefix_slice = GetPrefix(target);
+    prefix_hash = GetSliceHash(prefix_slice);
+    if (!MatchBloom(prefix_hash)) {
+      return Status::OK();
+    }
+  }
+  uint32_t offset;
+  bool prefix_match;
+  PlainTableKeyDecoder decoder(&file_info_, encoding_type_, user_key_len_,
+                               prefix_extractor_);
+  Status s = GetOffset(&decoder, target, prefix_slice, prefix_hash,
+                       prefix_match, &offset);
+
+  if (!s.ok()) {
+    return s;
+  }
+  ParsedInternalKey found_key;
+  ParsedInternalKey parsed_target;
+  s = ParseInternalKey(target, &parsed_target,
+                       false /* log_err_key */);  // TODO
+  if (!s.ok()) return s;
+
+  Slice found_value;
+  while (offset < file_info_.data_end_offset) {
+    s = Next(&decoder, &offset, &found_key, nullptr, &found_value);
+    if (!s.ok()) {
+      return s;
+    }
+    if (!prefix_match) {
+      // Need to verify prefix for the first key found if it is not yet
+      // checked.
+      if (GetPrefix(found_key) != prefix_slice) {
+        return Status::OK();
+      }
+      prefix_match = true;
+    }
+    // TODO(ljin): since we know the key comparison result here,
+    // can we enable the fast path?
+    if (internal_comparator_.Compare(found_key, parsed_target) >= 0) {
+      bool dont_care __attribute__((__unused__));
+      if (!get_context->SaveValue(found_key, found_value, &dont_care,
+                                  dummy_cleanable_.get())) {
+        break;
+      }
+    }
+  }
+  return Status::OK();
+}
+
+uint64_t PlainTableReader::ApproximateOffsetOf(
+    const ReadOptions& /*read_options*/, const Slice& /*key*/,
+    TableReaderCaller /*caller*/) {
+  return 0;
+}
+
+uint64_t PlainTableReader::ApproximateSize(const ReadOptions& /* read_options*/,
+                                           const Slice& /*start*/,
+                                           const Slice& /*end*/,
+                                           TableReaderCaller /*caller*/) {
+  return 0;
+}
+
+PlainTableIterator::PlainTableIterator(PlainTableReader* table,
+                                       bool use_prefix_seek)
+    : table_(table),
+      decoder_(&table_->file_info_, table_->encoding_type_,
+               table_->user_key_len_, table_->prefix_extractor_),
+      use_prefix_seek_(use_prefix_seek) {
+  next_offset_ = offset_ = table_->file_info_.data_end_offset;
+}
+
+PlainTableIterator::~PlainTableIterator() {}
+
+bool PlainTableIterator::Valid() const {
+  return offset_ < table_->file_info_.data_end_offset &&
+         offset_ >= table_->data_start_offset_;
+}
+
+void PlainTableIterator::SeekToFirst() {
+  status_ = Status::OK();
+  next_offset_ = table_->data_start_offset_;
+  if (next_offset_ >= table_->file_info_.data_end_offset) {
+    next_offset_ = offset_ = table_->file_info_.data_end_offset;
+  } else {
+    Next();
+  }
+}
+
+void PlainTableIterator::SeekToLast() {
+  assert(false);
+  status_ = Status::NotSupported("SeekToLast() is not supported in PlainTable");
+  next_offset_ = offset_ = table_->file_info_.data_end_offset;
+}
+
+void PlainTableIterator::Seek(const Slice& target) {
+  if (use_prefix_seek_ != !table_->IsTotalOrderMode()) {
+    // This check is done here instead of NewIterator() to permit creating an
+    // iterator with total_order_seek = true even if we won't be able to Seek()
+    // it. This is needed for compaction: it creates iterator with
+    // total_order_seek = true but usually never does Seek() on it,
+    // only SeekToFirst().
+    status_ = Status::InvalidArgument(
+        "total_order_seek not implemented for PlainTable.");
+    offset_ = next_offset_ = table_->file_info_.data_end_offset;
+    return;
+  }
+
+  // If the user doesn't set prefix seek option and we are not able to do a
+  // total Seek(). assert failure.
+  if (table_->IsTotalOrderMode()) {
+    if (table_->full_scan_mode_) {
+      status_ =
+          Status::InvalidArgument("Seek() is not allowed in full scan mode.");
+      offset_ = next_offset_ = table_->file_info_.data_end_offset;
+      return;
+    } else if (table_->GetIndexSize() > 1) {
+      assert(false);
+      status_ = Status::NotSupported(
+          "PlainTable cannot issue non-prefix seek unless in total order "
+          "mode.");
+      offset_ = next_offset_ = table_->file_info_.data_end_offset;
+      return;
+    }
+  }
+
+  Slice prefix_slice = table_->GetPrefix(target);
+  uint32_t prefix_hash = 0;
+  // Bloom filter is ignored in total-order mode.
+  if (!table_->IsTotalOrderMode()) {
+    prefix_hash = GetSliceHash(prefix_slice);
+    if (!table_->MatchBloom(prefix_hash)) {
+      status_ = Status::OK();
+      offset_ = next_offset_ = table_->file_info_.data_end_offset;
+      return;
+    }
+  }
+  bool prefix_match;
+  status_ = table_->GetOffset(&decoder_, target, prefix_slice, prefix_hash,
+                              prefix_match, &next_offset_);
+  if (!status_.ok()) {
+    offset_ = next_offset_ = table_->file_info_.data_end_offset;
+    return;
+  }
+
+  if (next_offset_ < table_->file_info_.data_end_offset) {
+    for (Next(); status_.ok() && Valid(); Next()) {
+      if (!prefix_match) {
+        // Need to verify the first key's prefix
+        if (table_->GetPrefix(key()) != prefix_slice) {
+          offset_ = next_offset_ = table_->file_info_.data_end_offset;
+          break;
+        }
+        prefix_match = true;
+      }
+      if (table_->internal_comparator_.Compare(key(), target) >= 0) {
+        break;
+      }
+    }
+  } else {
+    offset_ = table_->file_info_.data_end_offset;
+  }
+}
+
+void PlainTableIterator::SeekForPrev(const Slice& /*target*/) {
+  assert(false);
+  status_ =
+      Status::NotSupported("SeekForPrev() is not supported in PlainTable");
+  offset_ = next_offset_ = table_->file_info_.data_end_offset;
+}
+
+void PlainTableIterator::Next() {
+  offset_ = next_offset_;
+  if (offset_ < table_->file_info_.data_end_offset) {
+    Slice tmp_slice;
+    ParsedInternalKey parsed_key;
+    status_ =
+        table_->Next(&decoder_, &next_offset_, &parsed_key, &key_, &value_);
+    if (!status_.ok()) {
+      offset_ = next_offset_ = table_->file_info_.data_end_offset;
+    }
+  }
+}
+
+void PlainTableIterator::Prev() { assert(false); }
+
+Slice PlainTableIterator::key() const {
+  assert(Valid());
+  return key_;
+}
+
+Slice PlainTableIterator::value() const {
+  assert(Valid());
+  return value_;
+}
+
+Status PlainTableIterator::status() const { return status_; }
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_reader.h
new file mode 100644
index 0000000000..0f5f7f3ce0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/plain/plain_table_reader.h
@@ -0,0 +1,243 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "file/random_access_file_reader.h"
+#include "memory/arena.h"
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/table.h"
+#include "rocksdb/table_properties.h"
+#include "table/plain/plain_table_bloom.h"
+#include "table/plain/plain_table_factory.h"
+#include "table/plain/plain_table_index.h"
+#include "table/table_reader.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Block;
+struct BlockContents;
+class BlockHandle;
+class Footer;
+struct Options;
+class RandomAccessFile;
+struct ReadOptions;
+class TableCache;
+class TableReader;
+class InternalKeyComparator;
+class PlainTableKeyDecoder;
+class GetContext;
+
+extern const uint32_t kPlainTableVariableLength;
+
+struct PlainTableReaderFileInfo {
+  bool is_mmap_mode;
+  Slice file_data;
+  uint32_t data_end_offset;
+  std::unique_ptr<RandomAccessFileReader> file;
+
+  PlainTableReaderFileInfo(std::unique_ptr<RandomAccessFileReader>&& _file,
+                           const EnvOptions& storage_options,
+                           uint32_t _data_size_offset)
+      : is_mmap_mode(storage_options.use_mmap_reads),
+        data_end_offset(_data_size_offset),
+        file(std::move(_file)) {}
+};
+
+// The reader class of PlainTable. For description of PlainTable format
+// See comments of class PlainTableFactory, where instances of
+// PlainTableReader are created.
+class PlainTableReader : public TableReader {
+ public:
+  // Based on following output file format shown in plain_table_factory.h
+  // When opening the output file, PlainTableReader creates a hash table
+  // from key prefixes to offset of the output file. PlainTable will decide
+  // whether it points to the data offset of the first key with the key prefix
+  // or the offset of it. If there are too many keys share this prefix, it will
+  // create a binary search-able index from the suffix to offset on disk.
+  static Status Open(const ImmutableOptions& ioptions,
+                     const EnvOptions& env_options,
+                     const InternalKeyComparator& internal_comparator,
+                     std::unique_ptr<RandomAccessFileReader>&& file,
+                     uint64_t file_size, std::unique_ptr<TableReader>* table,
+                     const int bloom_bits_per_key, double hash_table_ratio,
+                     size_t index_sparseness, size_t huge_page_tlb_size,
+                     bool full_scan_mode, const bool immortal_table = false,
+                     const SliceTransform* prefix_extractor = nullptr);
+
+  // Returns new iterator over table contents
+  // compaction_readahead_size: its value will only be used if for_compaction =
+  // true
+  InternalIterator* NewIterator(const ReadOptions&,
+                                const SliceTransform* prefix_extractor,
+                                Arena* arena, bool skip_filters,
+                                TableReaderCaller caller,
+                                size_t compaction_readahead_size = 0,
+                                bool allow_unprepared_value = false) override;
+
+  void Prepare(const Slice& target) override;
+
+  Status Get(const ReadOptions& readOptions, const Slice& key,
+             GetContext* get_context, const SliceTransform* prefix_extractor,
+             bool skip_filters = false) override;
+
+  uint64_t ApproximateOffsetOf(const ReadOptions& read_options,
+                               const Slice& key,
+                               TableReaderCaller caller) override;
+
+  uint64_t ApproximateSize(const ReadOptions& read_options, const Slice& start,
+                           const Slice& end, TableReaderCaller caller) override;
+
+  uint32_t GetIndexSize() const { return index_.GetIndexSize(); }
+  void SetupForCompaction() override;
+
+  std::shared_ptr<const TableProperties> GetTableProperties() const override {
+    return table_properties_;
+  }
+
+  virtual size_t ApproximateMemoryUsage() const override {
+    return arena_.MemoryAllocatedBytes();
+  }
+
+  PlainTableReader(const ImmutableOptions& ioptions,
+                   std::unique_ptr<RandomAccessFileReader>&& file,
+                   const EnvOptions& env_options,
+                   const InternalKeyComparator& internal_comparator,
+                   EncodingType encoding_type, uint64_t file_size,
+                   const TableProperties* table_properties,
+                   const SliceTransform* prefix_extractor);
+  virtual ~PlainTableReader();
+
+ protected:
+  // Check bloom filter to see whether it might contain this prefix.
+  // The hash of the prefix is given, since it can be reused for index lookup
+  // too.
+  virtual bool MatchBloom(uint32_t hash) const;
+
+  // PopulateIndex() builds index of keys. It must be called before any query
+  // to the table.
+  //
+  // props: the table properties object that need to be stored. Ownership of
+  //        the object will be passed.
+  //
+
+  Status PopulateIndex(TableProperties* props, int bloom_bits_per_key,
+                       double hash_table_ratio, size_t index_sparseness,
+                       size_t huge_page_tlb_size);
+
+  Status MmapDataIfNeeded();
+
+ private:
+  const InternalKeyComparator internal_comparator_;
+  EncodingType encoding_type_;
+  // represents plain table's current status.
+  Status status_;
+
+  PlainTableIndex index_;
+  bool full_scan_mode_;
+
+  // data_start_offset_ and data_end_offset_ defines the range of the
+  // sst file that stores data.
+  const uint32_t data_start_offset_ = 0;
+  const uint32_t user_key_len_;
+  const SliceTransform* prefix_extractor_;
+
+  static const size_t kNumInternalBytes = 8;
+
+  // Bloom filter is used to rule out non-existent key
+  bool enable_bloom_;
+  PlainTableBloomV1 bloom_;
+  PlainTableReaderFileInfo file_info_;
+  Arena arena_;
+  CacheAllocationPtr index_block_alloc_;
+  CacheAllocationPtr bloom_block_alloc_;
+
+  const ImmutableOptions& ioptions_;
+  std::unique_ptr<Cleanable> dummy_cleanable_;
+  uint64_t file_size_;
+
+ protected:  // for testing
+  std::shared_ptr<const TableProperties> table_properties_;
+
+ private:
+  bool IsFixedLength() const {
+    return user_key_len_ != kPlainTableVariableLength;
+  }
+
+  size_t GetFixedInternalKeyLength() const {
+    return user_key_len_ + kNumInternalBytes;
+  }
+
+  Slice GetPrefix(const Slice& target) const {
+    assert(target.size() >= 8);  // target is internal key
+    return GetPrefixFromUserKey(ExtractUserKey(target));
+  }
+
+  Slice GetPrefix(const ParsedInternalKey& target) const {
+    return GetPrefixFromUserKey(target.user_key);
+  }
+
+  Slice GetPrefixFromUserKey(const Slice& user_key) const {
+    if (!IsTotalOrderMode()) {
+      return prefix_extractor_->Transform(user_key);
+    } else {
+      // Use empty slice as prefix if prefix_extractor is not set.
+      // In that case,
+      // it falls back to pure binary search and
+      // total iterator seek is supported.
+      return Slice();
+    }
+  }
+
+  friend class TableCache;
+  friend class PlainTableIterator;
+
+  // Internal helper function to generate an IndexRecordList object from all
+  // the rows, which contains index records as a list.
+  // If bloom_ is not null, all the keys' full-key hash will be added to the
+  // bloom filter.
+  Status PopulateIndexRecordList(PlainTableIndexBuilder* index_builder,
+                                 std::vector<uint32_t>* prefix_hashes);
+
+  // Internal helper function to allocate memory for bloom filter
+  void AllocateBloom(int bloom_bits_per_key, int num_prefixes,
+                     size_t huge_page_tlb_size);
+
+  void FillBloom(const std::vector<uint32_t>& prefix_hashes);
+
+  // Read the key and value at `offset` to parameters for keys, the and
+  // `seekable`.
+  // On success, `offset` will be updated as the offset for the next key.
+  // `parsed_key` will be key in parsed format.
+  // if `internal_key` is not empty, it will be filled with key with slice
+  // format.
+  // if `seekable` is not null, it will return whether we can directly read
+  // data using this offset.
+  Status Next(PlainTableKeyDecoder* decoder, uint32_t* offset,
+              ParsedInternalKey* parsed_key, Slice* internal_key, Slice* value,
+              bool* seekable = nullptr) const;
+  // Get file offset for key target.
+  // return value prefix_matched is set to true if the offset is confirmed
+  // for a key with the same prefix as target.
+  Status GetOffset(PlainTableKeyDecoder* decoder, const Slice& target,
+                   const Slice& prefix, uint32_t prefix_hash,
+                   bool& prefix_matched, uint32_t* offset) const;
+
+  bool IsTotalOrderMode() const { return (prefix_extractor_ == nullptr); }
+
+  // No copying allowed
+  explicit PlainTableReader(const TableReader&) = delete;
+  void operator=(const TableReader&) = delete;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/scoped_arena_iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/scoped_arena_iterator.h
new file mode 100644
index 0000000000..2b8824d95e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/scoped_arena_iterator.h
@@ -0,0 +1,57 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include "port/port.h"
+#include "table/internal_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+class ScopedArenaIterator {
+  void reset(InternalIterator* iter) noexcept {
+    if (iter_ != nullptr) {
+      iter_->~InternalIterator();
+    }
+    iter_ = iter;
+  }
+
+ public:
+  explicit ScopedArenaIterator(InternalIterator* iter = nullptr)
+      : iter_(iter) {}
+
+  ScopedArenaIterator(const ScopedArenaIterator&) = delete;
+  ScopedArenaIterator& operator=(const ScopedArenaIterator&) = delete;
+
+  ScopedArenaIterator(ScopedArenaIterator&& o) noexcept {
+    iter_ = o.iter_;
+    o.iter_ = nullptr;
+  }
+
+  ScopedArenaIterator& operator=(ScopedArenaIterator&& o) noexcept {
+    reset(o.iter_);
+    o.iter_ = nullptr;
+    return *this;
+  }
+
+  InternalIterator* operator->() { return iter_; }
+  InternalIterator* get() { return iter_; }
+
+  void set(InternalIterator* iter) { reset(iter); }
+
+  InternalIterator* release() {
+    assert(iter_ != nullptr);
+    auto* res = iter_;
+    iter_ = nullptr;
+    return res;
+  }
+
+  ~ScopedArenaIterator() { reset(nullptr); }
+
+ private:
+  InternalIterator* iter_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_dumper.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_dumper.cc
new file mode 100644
index 0000000000..f6d6e195d6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_dumper.cc
@@ -0,0 +1,520 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include "table/sst_file_dumper.h"
+
+#include <chrono>
+#include <cinttypes>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <sstream>
+#include <vector>
+
+#include "db/blob/blob_index.h"
+#include "db/memtable.h"
+#include "db/write_batch_internal.h"
+#include "options/cf_options.h"
+#include "port/port.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/status.h"
+#include "rocksdb/table_properties.h"
+#include "rocksdb/utilities/ldb_cmd.h"
+#include "table/block_based/block.h"
+#include "table/block_based/block_based_table_builder.h"
+#include "table/block_based/block_based_table_factory.h"
+#include "table/block_based/block_builder.h"
+#include "table/format.h"
+#include "table/meta_blocks.h"
+#include "table/plain/plain_table_factory.h"
+#include "table/table_reader.h"
+#include "util/compression.h"
+#include "util/random.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+SstFileDumper::SstFileDumper(const Options& options,
+                             const std::string& file_path,
+                             Temperature file_temp, size_t readahead_size,
+                             bool verify_checksum, bool output_hex,
+                             bool decode_blob_index, const EnvOptions& soptions,
+                             bool silent)
+    : file_name_(file_path),
+      read_num_(0),
+      file_temp_(file_temp),
+      output_hex_(output_hex),
+      decode_blob_index_(decode_blob_index),
+      soptions_(soptions),
+      silent_(silent),
+      options_(options),
+      ioptions_(options_),
+      moptions_(ColumnFamilyOptions(options_)),
+      read_options_(verify_checksum, false),
+      internal_comparator_(BytewiseComparator()) {
+  read_options_.readahead_size = readahead_size;
+  if (!silent_) {
+    fprintf(stdout, "Process %s\n", file_path.c_str());
+  }
+  init_result_ = GetTableReader(file_name_);
+}
+
+extern const uint64_t kBlockBasedTableMagicNumber;
+extern const uint64_t kLegacyBlockBasedTableMagicNumber;
+extern const uint64_t kPlainTableMagicNumber;
+extern const uint64_t kLegacyPlainTableMagicNumber;
+
+const char* testFileName = "test_file_name";
+
+Status SstFileDumper::GetTableReader(const std::string& file_path) {
+  // Warning about 'magic_number' being uninitialized shows up only in UBsan
+  // builds. Though access is guarded by 's.ok()' checks, fix the issue to
+  // avoid any warnings.
+  uint64_t magic_number = Footer::kNullTableMagicNumber;
+
+  // read table magic number
+  Footer footer;
+
+  const auto& fs = options_.env->GetFileSystem();
+  std::unique_ptr<FSRandomAccessFile> file;
+  uint64_t file_size = 0;
+  FileOptions fopts = soptions_;
+  fopts.temperature = file_temp_;
+  Status s = fs->NewRandomAccessFile(file_path, fopts, &file, nullptr);
+  if (s.ok()) {
+    s = fs->GetFileSize(file_path, IOOptions(), &file_size, nullptr);
+  }
+
+  // check empty file
+  // if true, skip further processing of this file
+  if (file_size == 0) {
+    return Status::Aborted(file_path, "Empty file");
+  }
+
+  file_.reset(new RandomAccessFileReader(std::move(file), file_path));
+
+  FilePrefetchBuffer prefetch_buffer(
+      0 /* readahead_size */, 0 /* max_readahead_size */, true /* enable */,
+      false /* track_min_offset */);
+  if (s.ok()) {
+    const uint64_t kSstDumpTailPrefetchSize = 512 * 1024;
+    uint64_t prefetch_size = (file_size > kSstDumpTailPrefetchSize)
+                                 ? kSstDumpTailPrefetchSize
+                                 : file_size;
+    uint64_t prefetch_off = file_size - prefetch_size;
+    IOOptions opts;
+    s = prefetch_buffer.Prefetch(opts, file_.get(), prefetch_off,
+                                 static_cast<size_t>(prefetch_size),
+                                 Env::IO_TOTAL /* rate_limiter_priority */);
+
+    s = ReadFooterFromFile(opts, file_.get(), *fs, &prefetch_buffer, file_size,
+                           &footer);
+  }
+  if (s.ok()) {
+    magic_number = footer.table_magic_number();
+  }
+
+  if (s.ok()) {
+    if (magic_number == kPlainTableMagicNumber ||
+        magic_number == kLegacyPlainTableMagicNumber) {
+      soptions_.use_mmap_reads = true;
+
+      fs->NewRandomAccessFile(file_path, fopts, &file, nullptr);
+      file_.reset(new RandomAccessFileReader(std::move(file), file_path));
+    }
+
+    // For old sst format, ReadTableProperties might fail but file can be read
+    if (ReadTableProperties(magic_number, file_.get(), file_size,
+                            (magic_number == kBlockBasedTableMagicNumber)
+                                ? &prefetch_buffer
+                                : nullptr)
+            .ok()) {
+      s = SetTableOptionsByMagicNumber(magic_number);
+      if (s.ok()) {
+        if (table_properties_ && !table_properties_->comparator_name.empty()) {
+          ConfigOptions config_options;
+          const Comparator* user_comparator = nullptr;
+          s = Comparator::CreateFromString(config_options,
+                                           table_properties_->comparator_name,
+                                           &user_comparator);
+          if (s.ok()) {
+            assert(user_comparator);
+            internal_comparator_ = InternalKeyComparator(user_comparator);
+          }
+        }
+      }
+    } else {
+      s = SetOldTableOptions();
+    }
+    options_.comparator = internal_comparator_.user_comparator();
+  }
+
+  if (s.ok()) {
+    s = NewTableReader(ioptions_, soptions_, internal_comparator_, file_size,
+                       &table_reader_);
+  }
+  return s;
+}
+
+Status SstFileDumper::NewTableReader(
+    const ImmutableOptions& /*ioptions*/, const EnvOptions& /*soptions*/,
+    const InternalKeyComparator& /*internal_comparator*/, uint64_t file_size,
+    std::unique_ptr<TableReader>* /*table_reader*/) {
+  auto t_opt = TableReaderOptions(
+      ioptions_, moptions_.prefix_extractor, soptions_, internal_comparator_,
+      0 /* block_protection_bytes_per_key */, false /* skip_filters */,
+      false /* immortal */, true /* force_direct_prefetch */);
+  // Allow open file with global sequence number for backward compatibility.
+  t_opt.largest_seqno = kMaxSequenceNumber;
+
+  // We need to turn off pre-fetching of index and filter nodes for
+  // BlockBasedTable
+  if (options_.table_factory->IsInstanceOf(
+          TableFactory::kBlockBasedTableName())) {
+    return options_.table_factory->NewTableReader(t_opt, std::move(file_),
+                                                  file_size, &table_reader_,
+                                                  /*enable_prefetch=*/false);
+  }
+
+  // For all other factory implementation
+  return options_.table_factory->NewTableReader(t_opt, std::move(file_),
+                                                file_size, &table_reader_);
+}
+
+Status SstFileDumper::VerifyChecksum() {
+  // We could pass specific readahead setting into read options if needed.
+  return table_reader_->VerifyChecksum(read_options_,
+                                       TableReaderCaller::kSSTDumpTool);
+}
+
+Status SstFileDumper::DumpTable(const std::string& out_filename) {
+  std::unique_ptr<WritableFile> out_file;
+  Env* env = options_.env;
+  Status s = env->NewWritableFile(out_filename, &out_file, soptions_);
+  if (s.ok()) {
+    s = table_reader_->DumpTable(out_file.get());
+  }
+  if (!s.ok()) {
+    // close the file before return error, ignore the close error if there's any
+    out_file->Close().PermitUncheckedError();
+    return s;
+  }
+  return out_file->Close();
+}
+
+Status SstFileDumper::CalculateCompressedTableSize(
+    const TableBuilderOptions& tb_options, size_t block_size,
+    uint64_t* num_data_blocks, uint64_t* compressed_table_size) {
+  std::unique_ptr<Env> env(NewMemEnv(options_.env));
+  std::unique_ptr<WritableFileWriter> dest_writer;
+  Status s =
+      WritableFileWriter::Create(env->GetFileSystem(), testFileName,
+                                 FileOptions(soptions_), &dest_writer, nullptr);
+  if (!s.ok()) {
+    return s;
+  }
+  BlockBasedTableOptions table_options;
+  table_options.block_size = block_size;
+  BlockBasedTableFactory block_based_tf(table_options);
+  std::unique_ptr<TableBuilder> table_builder;
+  table_builder.reset(
+      block_based_tf.NewTableBuilder(tb_options, dest_writer.get()));
+  std::unique_ptr<InternalIterator> iter(table_reader_->NewIterator(
+      read_options_, moptions_.prefix_extractor.get(), /*arena=*/nullptr,
+      /*skip_filters=*/false, TableReaderCaller::kSSTDumpTool));
+  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+    table_builder->Add(iter->key(), iter->value());
+  }
+  s = iter->status();
+  if (!s.ok()) {
+    return s;
+  }
+  s = table_builder->Finish();
+  if (!s.ok()) {
+    return s;
+  }
+  *compressed_table_size = table_builder->FileSize();
+  assert(num_data_blocks != nullptr);
+  *num_data_blocks = table_builder->GetTableProperties().num_data_blocks;
+  return env->DeleteFile(testFileName);
+}
+
+Status SstFileDumper::ShowAllCompressionSizes(
+    size_t block_size,
+    const std::vector<std::pair<CompressionType, const char*>>&
+        compression_types,
+    int32_t compress_level_from, int32_t compress_level_to,
+    uint32_t max_dict_bytes, uint32_t zstd_max_train_bytes,
+    uint64_t max_dict_buffer_bytes, bool use_zstd_dict_trainer) {
+  fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size);
+  for (auto& i : compression_types) {
+    if (CompressionTypeSupported(i.first)) {
+      fprintf(stdout, "Compression: %-24s\n", i.second);
+      CompressionOptions compress_opt;
+      compress_opt.max_dict_bytes = max_dict_bytes;
+      compress_opt.zstd_max_train_bytes = zstd_max_train_bytes;
+      compress_opt.max_dict_buffer_bytes = max_dict_buffer_bytes;
+      compress_opt.use_zstd_dict_trainer = use_zstd_dict_trainer;
+      for (int32_t j = compress_level_from; j <= compress_level_to; j++) {
+        fprintf(stdout, "Compression level: %d", j);
+        compress_opt.level = j;
+        Status s = ShowCompressionSize(block_size, i.first, compress_opt);
+        if (!s.ok()) {
+          return s;
+        }
+      }
+    } else {
+      fprintf(stdout, "Unsupported compression type: %s.\n", i.second);
+    }
+  }
+  return Status::OK();
+}
+
+Status SstFileDumper::ShowCompressionSize(
+    size_t block_size, CompressionType compress_type,
+    const CompressionOptions& compress_opt) {
+  Options opts;
+  opts.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+  opts.statistics->set_stats_level(StatsLevel::kAll);
+  const ImmutableOptions imoptions(opts);
+  const ColumnFamilyOptions cfo(opts);
+  const MutableCFOptions moptions(cfo);
+  ROCKSDB_NAMESPACE::InternalKeyComparator ikc(opts.comparator);
+  IntTblPropCollectorFactories block_based_table_factories;
+
+  std::string column_family_name;
+  int unknown_level = -1;
+  TableBuilderOptions tb_opts(
+      imoptions, moptions, ikc, &block_based_table_factories, compress_type,
+      compress_opt,
+      TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
+      column_family_name, unknown_level);
+  uint64_t num_data_blocks = 0;
+  std::chrono::steady_clock::time_point start =
+      std::chrono::steady_clock::now();
+  uint64_t file_size;
+  Status s = CalculateCompressedTableSize(tb_opts, block_size, &num_data_blocks,
+                                          &file_size);
+  if (!s.ok()) {
+    return s;
+  }
+
+  std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
+  fprintf(stdout, " Size: %10" PRIu64, file_size);
+  fprintf(stdout, " Blocks: %6" PRIu64, num_data_blocks);
+  fprintf(stdout, " Time Taken: %10s microsecs",
+          std::to_string(
+              std::chrono::duration_cast<std::chrono::microseconds>(end - start)
+                  .count())
+              .c_str());
+  const uint64_t compressed_blocks =
+      opts.statistics->getAndResetTickerCount(NUMBER_BLOCK_COMPRESSED);
+  const uint64_t not_compressed_blocks =
+      opts.statistics->getAndResetTickerCount(
+          NUMBER_BLOCK_COMPRESSION_REJECTED);
+  // When the option enable_index_compression is true,
+  // NUMBER_BLOCK_COMPRESSED is incremented for index block(s).
+  if ((compressed_blocks + not_compressed_blocks) > num_data_blocks) {
+    num_data_blocks = compressed_blocks + not_compressed_blocks;
+  }
+
+  const uint64_t ratio_not_compressed_blocks =
+      (num_data_blocks - compressed_blocks) - not_compressed_blocks;
+  const double compressed_pcnt =
+      (0 == num_data_blocks) ? 0.0
+                             : ((static_cast<double>(compressed_blocks) /
+                                 static_cast<double>(num_data_blocks)) *
+                                100.0);
+  const double ratio_not_compressed_pcnt =
+      (0 == num_data_blocks)
+          ? 0.0
+          : ((static_cast<double>(ratio_not_compressed_blocks) /
+              static_cast<double>(num_data_blocks)) *
+             100.0);
+  const double not_compressed_pcnt =
+      (0 == num_data_blocks) ? 0.0
+                             : ((static_cast<double>(not_compressed_blocks) /
+                                 static_cast<double>(num_data_blocks)) *
+                                100.0);
+  fprintf(stdout, " Compressed: %6" PRIu64 " (%5.1f%%)", compressed_blocks,
+          compressed_pcnt);
+  fprintf(stdout, " Not compressed (ratio): %6" PRIu64 " (%5.1f%%)",
+          ratio_not_compressed_blocks, ratio_not_compressed_pcnt);
+  fprintf(stdout, " Not compressed (abort): %6" PRIu64 " (%5.1f%%)\n",
+          not_compressed_blocks, not_compressed_pcnt);
+  return Status::OK();
+}
+
+// Reads TableProperties prior to opening table reader in order to set up
+// options.
+Status SstFileDumper::ReadTableProperties(uint64_t table_magic_number,
+                                          RandomAccessFileReader* file,
+                                          uint64_t file_size,
+                                          FilePrefetchBuffer* prefetch_buffer) {
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  Status s = ROCKSDB_NAMESPACE::ReadTableProperties(
+      file, file_size, table_magic_number, ioptions_, read_options,
+      &table_properties_,
+      /* memory_allocator= */ nullptr, prefetch_buffer);
+  if (!s.ok()) {
+    if (!silent_) {
+      fprintf(stdout, "Not able to read table properties\n");
+    }
+  }
+  return s;
+}
+
+Status SstFileDumper::SetTableOptionsByMagicNumber(
+    uint64_t table_magic_number) {
+  assert(table_properties_);
+  if (table_magic_number == kBlockBasedTableMagicNumber ||
+      table_magic_number == kLegacyBlockBasedTableMagicNumber) {
+    BlockBasedTableFactory* bbtf = new BlockBasedTableFactory();
+    // To force tail prefetching, we fake reporting two useful reads of 512KB
+    // from the tail.
+    // It needs at least two data points to warm up the stats.
+    bbtf->tail_prefetch_stats()->RecordEffectiveSize(512 * 1024);
+    bbtf->tail_prefetch_stats()->RecordEffectiveSize(512 * 1024);
+
+    options_.table_factory.reset(bbtf);
+    if (!silent_) {
+      fprintf(stdout, "Sst file format: block-based\n");
+    }
+
+    auto& props = table_properties_->user_collected_properties;
+    auto pos = props.find(BlockBasedTablePropertyNames::kIndexType);
+    if (pos != props.end()) {
+      auto index_type_on_file = static_cast<BlockBasedTableOptions::IndexType>(
+          DecodeFixed32(pos->second.c_str()));
+      if (index_type_on_file ==
+          BlockBasedTableOptions::IndexType::kHashSearch) {
+        options_.prefix_extractor.reset(NewNoopTransform());
+      }
+    }
+  } else if (table_magic_number == kPlainTableMagicNumber ||
+             table_magic_number == kLegacyPlainTableMagicNumber) {
+    options_.allow_mmap_reads = true;
+
+    PlainTableOptions plain_table_options;
+    plain_table_options.user_key_len = kPlainTableVariableLength;
+    plain_table_options.bloom_bits_per_key = 0;
+    plain_table_options.hash_table_ratio = 0;
+    plain_table_options.index_sparseness = 1;
+    plain_table_options.huge_page_tlb_size = 0;
+    plain_table_options.encoding_type = kPlain;
+    plain_table_options.full_scan_mode = true;
+
+    options_.table_factory.reset(NewPlainTableFactory(plain_table_options));
+    if (!silent_) {
+      fprintf(stdout, "Sst file format: plain table\n");
+    }
+  } else {
+    char error_msg_buffer[80];
+    snprintf(error_msg_buffer, sizeof(error_msg_buffer) - 1,
+             "Unsupported table magic number --- %lx",
+             (long)table_magic_number);
+    return Status::InvalidArgument(error_msg_buffer);
+  }
+
+  return Status::OK();
+}
+
+Status SstFileDumper::SetOldTableOptions() {
+  assert(table_properties_ == nullptr);
+  options_.table_factory = std::make_shared<BlockBasedTableFactory>();
+  if (!silent_) {
+    fprintf(stdout, "Sst file format: block-based(old version)\n");
+  }
+
+  return Status::OK();
+}
+
+Status SstFileDumper::ReadSequential(bool print_kv, uint64_t read_num,
+                                     bool has_from, const std::string& from_key,
+                                     bool has_to, const std::string& to_key,
+                                     bool use_from_as_prefix) {
+  if (!table_reader_) {
+    return init_result_;
+  }
+
+  InternalIterator* iter = table_reader_->NewIterator(
+      read_options_, moptions_.prefix_extractor.get(),
+      /*arena=*/nullptr, /*skip_filters=*/false,
+      TableReaderCaller::kSSTDumpTool);
+  uint64_t i = 0;
+  if (has_from) {
+    InternalKey ikey;
+    ikey.SetMinPossibleForUserKey(from_key);
+    iter->Seek(ikey.Encode());
+  } else {
+    iter->SeekToFirst();
+  }
+  for (; iter->Valid(); iter->Next()) {
+    Slice key = iter->key();
+    Slice value = iter->value();
+    ++i;
+    if (read_num > 0 && i > read_num) break;
+
+    ParsedInternalKey ikey;
+    Status pik_status = ParseInternalKey(key, &ikey, true /* log_err_key */);
+    if (!pik_status.ok()) {
+      std::cerr << pik_status.getState() << "\n";
+      continue;
+    }
+
+    // the key returned is not prefixed with out 'from' key
+    if (use_from_as_prefix && !ikey.user_key.starts_with(from_key)) {
+      break;
+    }
+
+    // If end marker was specified, we stop before it
+    if (has_to && BytewiseComparator()->Compare(ikey.user_key, to_key) >= 0) {
+      break;
+    }
+
+    if (print_kv) {
+      if (!decode_blob_index_ || ikey.type != kTypeBlobIndex) {
+        fprintf(stdout, "%s => %s\n",
+                ikey.DebugString(true, output_hex_).c_str(),
+                value.ToString(output_hex_).c_str());
+      } else {
+        BlobIndex blob_index;
+
+        const Status s = blob_index.DecodeFrom(value);
+        if (!s.ok()) {
+          fprintf(stderr, "%s => error decoding blob index\n",
+                  ikey.DebugString(true, output_hex_).c_str());
+          continue;
+        }
+
+        fprintf(stdout, "%s => %s\n",
+                ikey.DebugString(true, output_hex_).c_str(),
+                blob_index.DebugString(output_hex_).c_str());
+      }
+    }
+  }
+
+  read_num_ += i;
+
+  Status ret = iter->status();
+  delete iter;
+  return ret;
+}
+
+// Provides TableProperties to API user
+Status SstFileDumper::ReadTableProperties(
+    std::shared_ptr<const TableProperties>* table_properties) {
+  if (!table_reader_) {
+    return init_result_;
+  }
+
+  *table_properties = table_reader_->GetTableProperties();
+  return init_result_;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_dumper.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_dumper.h
new file mode 100644
index 0000000000..1e78959d14
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_dumper.h
@@ -0,0 +1,99 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "db/dbformat.h"
+#include "file/writable_file_writer.h"
+#include "options/cf_options.h"
+#include "rocksdb/advanced_options.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class SstFileDumper {
+ public:
+  explicit SstFileDumper(const Options& options, const std::string& file_name,
+                         Temperature file_temp, size_t readahead_size,
+                         bool verify_checksum, bool output_hex,
+                         bool decode_blob_index,
+                         const EnvOptions& soptions = EnvOptions(),
+                         bool silent = false);
+
+  Status ReadSequential(bool print_kv, uint64_t read_num, bool has_from,
+                        const std::string& from_key, bool has_to,
+                        const std::string& to_key,
+                        bool use_from_as_prefix = false);
+
+  Status ReadTableProperties(
+      std::shared_ptr<const TableProperties>* table_properties);
+  uint64_t GetReadNumber() { return read_num_; }
+  TableProperties* GetInitTableProperties() { return table_properties_.get(); }
+
+  Status VerifyChecksum();
+  Status DumpTable(const std::string& out_filename);
+  Status getStatus() { return init_result_; }
+
+  Status ShowAllCompressionSizes(
+      size_t block_size,
+      const std::vector<std::pair<CompressionType, const char*>>&
+          compression_types,
+      int32_t compress_level_from, int32_t compress_level_to,
+      uint32_t max_dict_bytes, uint32_t zstd_max_train_bytes,
+      uint64_t max_dict_buffer_bytes, bool use_zstd_dict_trainer);
+
+  Status ShowCompressionSize(size_t block_size, CompressionType compress_type,
+                             const CompressionOptions& compress_opt);
+
+ private:
+  // Get the TableReader implementation for the sst file
+  Status GetTableReader(const std::string& file_path);
+  Status ReadTableProperties(uint64_t table_magic_number,
+                             RandomAccessFileReader* file, uint64_t file_size,
+                             FilePrefetchBuffer* prefetch_buffer);
+
+  Status CalculateCompressedTableSize(const TableBuilderOptions& tb_options,
+                                      size_t block_size,
+                                      uint64_t* num_data_blocks,
+                                      uint64_t* compressed_table_size);
+
+  Status SetTableOptionsByMagicNumber(uint64_t table_magic_number);
+  Status SetOldTableOptions();
+
+  // Helper function to call the factory with settings specific to the
+  // factory implementation
+  Status NewTableReader(const ImmutableOptions& ioptions,
+                        const EnvOptions& soptions,
+                        const InternalKeyComparator& internal_comparator,
+                        uint64_t file_size,
+                        std::unique_ptr<TableReader>* table_reader);
+
+  std::string file_name_;
+  uint64_t read_num_;
+  Temperature file_temp_;
+  bool output_hex_;
+  bool decode_blob_index_;
+  EnvOptions soptions_;
+  // less verbose in stdout/stderr
+  bool silent_;
+
+  // options_ and internal_comparator_ will also be used in
+  // ReadSequential internally (specifically, seek-related operations)
+  Options options_;
+
+  Status init_result_;
+  std::unique_ptr<TableReader> table_reader_;
+  std::unique_ptr<RandomAccessFileReader> file_;
+
+  const ImmutableOptions ioptions_;
+  const MutableCFOptions moptions_;
+  ReadOptions read_options_;
+  InternalKeyComparator internal_comparator_;
+  std::unique_ptr<TableProperties> table_properties_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_reader.cc
new file mode 100644
index 0000000000..533b7cd6ac
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_reader.cc
@@ -0,0 +1,101 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "rocksdb/sst_file_reader.h"
+
+#include "db/arena_wrapped_db_iter.h"
+#include "db/db_iter.h"
+#include "db/dbformat.h"
+#include "file/random_access_file_reader.h"
+#include "options/cf_options.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "table/get_context.h"
+#include "table/table_builder.h"
+#include "table/table_reader.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct SstFileReader::Rep {
+  Options options;
+  EnvOptions soptions;
+  ImmutableOptions ioptions;
+  MutableCFOptions moptions;
+
+  std::unique_ptr<TableReader> table_reader;
+
+  Rep(const Options& opts)
+      : options(opts),
+        soptions(options),
+        ioptions(options),
+        moptions(ColumnFamilyOptions(options)) {}
+};
+
+SstFileReader::SstFileReader(const Options& options) : rep_(new Rep(options)) {}
+
+SstFileReader::~SstFileReader() {}
+
+Status SstFileReader::Open(const std::string& file_path) {
+  auto r = rep_.get();
+  Status s;
+  uint64_t file_size = 0;
+  std::unique_ptr<FSRandomAccessFile> file;
+  std::unique_ptr<RandomAccessFileReader> file_reader;
+  FileOptions fopts(r->soptions);
+  const auto& fs = r->options.env->GetFileSystem();
+
+  s = fs->GetFileSize(file_path, fopts.io_options, &file_size, nullptr);
+  if (s.ok()) {
+    s = fs->NewRandomAccessFile(file_path, fopts, &file, nullptr);
+  }
+  if (s.ok()) {
+    file_reader.reset(new RandomAccessFileReader(std::move(file), file_path));
+  }
+  if (s.ok()) {
+    TableReaderOptions t_opt(r->ioptions, r->moptions.prefix_extractor,
+                             r->soptions, r->ioptions.internal_comparator,
+                             r->moptions.block_protection_bytes_per_key);
+    // Allow open file with global sequence number for backward compatibility.
+    t_opt.largest_seqno = kMaxSequenceNumber;
+    s = r->options.table_factory->NewTableReader(t_opt, std::move(file_reader),
+                                                 file_size, &r->table_reader);
+  }
+  return s;
+}
+
+Iterator* SstFileReader::NewIterator(const ReadOptions& roptions) {
+  assert(roptions.io_activity == Env::IOActivity::kUnknown);
+  auto r = rep_.get();
+  auto sequence = roptions.snapshot != nullptr
+                      ? roptions.snapshot->GetSequenceNumber()
+                      : kMaxSequenceNumber;
+  ArenaWrappedDBIter* res = new ArenaWrappedDBIter();
+  res->Init(r->options.env, roptions, r->ioptions, r->moptions,
+            nullptr /* version */, sequence,
+            r->moptions.max_sequential_skip_in_iterations,
+            0 /* version_number */, nullptr /* read_callback */,
+            nullptr /* db_impl */, nullptr /* cfd */,
+            true /* expose_blob_index */, false /* allow_refresh */);
+  auto internal_iter = r->table_reader->NewIterator(
+      res->GetReadOptions(), r->moptions.prefix_extractor.get(),
+      res->GetArena(), false /* skip_filters */,
+      TableReaderCaller::kSSTFileReader);
+  res->SetIterUnderDBIter(internal_iter);
+  return res;
+}
+
+std::shared_ptr<const TableProperties> SstFileReader::GetTableProperties()
+    const {
+  return rep_->table_reader->GetTableProperties();
+}
+
+Status SstFileReader::VerifyChecksum(const ReadOptions& read_options) {
+  assert(read_options.io_activity == Env::IOActivity::kUnknown);
+  return rep_->table_reader->VerifyChecksum(read_options,
+                                            TableReaderCaller::kSSTFileReader);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_writer.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_writer.cc
new file mode 100644
index 0000000000..d187b741e3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_writer.cc
@@ -0,0 +1,436 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/sst_file_writer.h"
+
+#include <vector>
+
+#include "db/db_impl/db_impl.h"
+#include "db/dbformat.h"
+#include "file/writable_file_writer.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/table.h"
+#include "table/block_based/block_based_table_builder.h"
+#include "table/sst_file_writer_collectors.h"
+#include "test_util/sync_point.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const std::string ExternalSstFilePropertyNames::kVersion =
+    "rocksdb.external_sst_file.version";
+const std::string ExternalSstFilePropertyNames::kGlobalSeqno =
+    "rocksdb.external_sst_file.global_seqno";
+
+
+const size_t kFadviseTrigger = 1024 * 1024;  // 1MB
+
+struct SstFileWriter::Rep {
+  Rep(const EnvOptions& _env_options, const Options& options,
+      Env::IOPriority _io_priority, const Comparator* _user_comparator,
+      ColumnFamilyHandle* _cfh, bool _invalidate_page_cache, bool _skip_filters,
+      std::string _db_session_id)
+      : env_options(_env_options),
+        ioptions(options),
+        mutable_cf_options(options),
+        io_priority(_io_priority),
+        internal_comparator(_user_comparator),
+        cfh(_cfh),
+        invalidate_page_cache(_invalidate_page_cache),
+        skip_filters(_skip_filters),
+        db_session_id(_db_session_id) {}
+
+  std::unique_ptr<WritableFileWriter> file_writer;
+  std::unique_ptr<TableBuilder> builder;
+  EnvOptions env_options;
+  ImmutableOptions ioptions;
+  MutableCFOptions mutable_cf_options;
+  Env::IOPriority io_priority;
+  InternalKeyComparator internal_comparator;
+  ExternalSstFileInfo file_info;
+  InternalKey ikey;
+  std::string column_family_name;
+  ColumnFamilyHandle* cfh;
+  // If true, We will give the OS a hint that this file pages is not needed
+  // every time we write 1MB to the file.
+  bool invalidate_page_cache;
+  // The size of the file during the last time we called Fadvise to remove
+  // cached pages from page cache.
+  uint64_t last_fadvise_size = 0;
+  bool skip_filters;
+  std::string db_session_id;
+  uint64_t next_file_number = 1;
+
+  Status AddImpl(const Slice& user_key, const Slice& value,
+                 ValueType value_type) {
+    if (!builder) {
+      return Status::InvalidArgument("File is not opened");
+    }
+
+    if (file_info.num_entries == 0) {
+      file_info.smallest_key.assign(user_key.data(), user_key.size());
+    } else {
+      if (internal_comparator.user_comparator()->Compare(
+              user_key, file_info.largest_key) <= 0) {
+        // Make sure that keys are added in order
+        return Status::InvalidArgument(
+            "Keys must be added in strict ascending order.");
+      }
+    }
+
+    assert(value_type == kTypeValue || value_type == kTypeMerge ||
+           value_type == kTypeDeletion ||
+           value_type == kTypeDeletionWithTimestamp);
+
+    constexpr SequenceNumber sequence_number = 0;
+
+    ikey.Set(user_key, sequence_number, value_type);
+
+    builder->Add(ikey.Encode(), value);
+
+    // update file info
+    file_info.num_entries++;
+    file_info.largest_key.assign(user_key.data(), user_key.size());
+    file_info.file_size = builder->FileSize();
+
+    InvalidatePageCache(false /* closing */).PermitUncheckedError();
+    return Status::OK();
+  }
+
+  Status Add(const Slice& user_key, const Slice& value, ValueType value_type) {
+    if (internal_comparator.user_comparator()->timestamp_size() != 0) {
+      return Status::InvalidArgument("Timestamp size mismatch");
+    }
+
+    return AddImpl(user_key, value, value_type);
+  }
+
+  Status Add(const Slice& user_key, const Slice& timestamp, const Slice& value,
+             ValueType value_type) {
+    const size_t timestamp_size = timestamp.size();
+
+    if (internal_comparator.user_comparator()->timestamp_size() !=
+        timestamp_size) {
+      return Status::InvalidArgument("Timestamp size mismatch");
+    }
+
+    const size_t user_key_size = user_key.size();
+
+    if (user_key.data() + user_key_size == timestamp.data()) {
+      Slice user_key_with_ts(user_key.data(), user_key_size + timestamp_size);
+      return AddImpl(user_key_with_ts, value, value_type);
+    }
+
+    std::string user_key_with_ts;
+    user_key_with_ts.reserve(user_key_size + timestamp_size);
+    user_key_with_ts.append(user_key.data(), user_key_size);
+    user_key_with_ts.append(timestamp.data(), timestamp_size);
+
+    return AddImpl(user_key_with_ts, value, value_type);
+  }
+
+  Status DeleteRangeImpl(const Slice& begin_key, const Slice& end_key) {
+    if (!builder) {
+      return Status::InvalidArgument("File is not opened");
+    }
+    int cmp = internal_comparator.user_comparator()->CompareWithoutTimestamp(
+        begin_key, end_key);
+    if (cmp > 0) {
+      // It's an empty range where endpoints appear mistaken. Don't bother
+      // applying it to the DB, and return an error to the user.
+      return Status::InvalidArgument("end key comes before start key");
+    } else if (cmp == 0) {
+      // It's an empty range. Don't bother applying it to the DB.
+      return Status::OK();
+    }
+
+    RangeTombstone tombstone(begin_key, end_key, 0 /* Sequence Number */);
+    if (file_info.num_range_del_entries == 0) {
+      file_info.smallest_range_del_key.assign(tombstone.start_key_.data(),
+                                              tombstone.start_key_.size());
+      file_info.largest_range_del_key.assign(tombstone.end_key_.data(),
+                                             tombstone.end_key_.size());
+    } else {
+      if (internal_comparator.user_comparator()->Compare(
+              tombstone.start_key_, file_info.smallest_range_del_key) < 0) {
+        file_info.smallest_range_del_key.assign(tombstone.start_key_.data(),
+                                                tombstone.start_key_.size());
+      }
+      if (internal_comparator.user_comparator()->Compare(
+              tombstone.end_key_, file_info.largest_range_del_key) > 0) {
+        file_info.largest_range_del_key.assign(tombstone.end_key_.data(),
+                                               tombstone.end_key_.size());
+      }
+    }
+
+    auto ikey_and_end_key = tombstone.Serialize();
+    builder->Add(ikey_and_end_key.first.Encode(), ikey_and_end_key.second);
+
+    // update file info
+    file_info.num_range_del_entries++;
+    file_info.file_size = builder->FileSize();
+
+    InvalidatePageCache(false /* closing */).PermitUncheckedError();
+    return Status::OK();
+  }
+
+  Status DeleteRange(const Slice& begin_key, const Slice& end_key) {
+    if (internal_comparator.user_comparator()->timestamp_size() != 0) {
+      return Status::InvalidArgument("Timestamp size mismatch");
+    }
+    return DeleteRangeImpl(begin_key, end_key);
+  }
+
+  // begin_key and end_key should be users keys without timestamp.
+  Status DeleteRange(const Slice& begin_key, const Slice& end_key,
+                     const Slice& timestamp) {
+    const size_t timestamp_size = timestamp.size();
+
+    if (internal_comparator.user_comparator()->timestamp_size() !=
+        timestamp_size) {
+      return Status::InvalidArgument("Timestamp size mismatch");
+    }
+
+    const size_t begin_key_size = begin_key.size();
+    const size_t end_key_size = end_key.size();
+    if (begin_key.data() + begin_key_size == timestamp.data() ||
+        end_key.data() + begin_key_size == timestamp.data()) {
+      assert(memcmp(begin_key.data() + begin_key_size,
+                    end_key.data() + end_key_size, timestamp_size) == 0);
+      Slice begin_key_with_ts(begin_key.data(),
+                              begin_key_size + timestamp_size);
+      Slice end_key_with_ts(end_key.data(), end_key.size() + timestamp_size);
+      return DeleteRangeImpl(begin_key_with_ts, end_key_with_ts);
+    }
+    std::string begin_key_with_ts;
+    begin_key_with_ts.reserve(begin_key_size + timestamp_size);
+    begin_key_with_ts.append(begin_key.data(), begin_key_size);
+    begin_key_with_ts.append(timestamp.data(), timestamp_size);
+    std::string end_key_with_ts;
+    end_key_with_ts.reserve(end_key_size + timestamp_size);
+    end_key_with_ts.append(end_key.data(), end_key_size);
+    end_key_with_ts.append(timestamp.data(), timestamp_size);
+    return DeleteRangeImpl(begin_key_with_ts, end_key_with_ts);
+  }
+
+  Status InvalidatePageCache(bool closing) {
+    Status s = Status::OK();
+    if (invalidate_page_cache == false) {
+      // Fadvise disabled
+      return s;
+    }
+    uint64_t bytes_since_last_fadvise = builder->FileSize() - last_fadvise_size;
+    if (bytes_since_last_fadvise > kFadviseTrigger || closing) {
+      TEST_SYNC_POINT_CALLBACK("SstFileWriter::Rep::InvalidatePageCache",
+                               &(bytes_since_last_fadvise));
+      // Tell the OS that we don't need this file in page cache
+      s = file_writer->InvalidateCache(0, 0);
+      if (s.IsNotSupported()) {
+        // NotSupported is fine as it could be a file type that doesn't use page
+        // cache.
+        s = Status::OK();
+      }
+      last_fadvise_size = builder->FileSize();
+    }
+    return s;
+  }
+};
+
+SstFileWriter::SstFileWriter(const EnvOptions& env_options,
+                             const Options& options,
+                             const Comparator* user_comparator,
+                             ColumnFamilyHandle* column_family,
+                             bool invalidate_page_cache,
+                             Env::IOPriority io_priority, bool skip_filters)
+    : rep_(new Rep(env_options, options, io_priority, user_comparator,
+                   column_family, invalidate_page_cache, skip_filters,
+                   DBImpl::GenerateDbSessionId(options.env))) {
+  // SstFileWriter is used to create sst files that can be added to database
+  // later. Therefore, no real db_id and db_session_id are associated with it.
+  // Here we mimic the way db_session_id behaves by getting a db_session_id
+  // for each SstFileWriter, and (later below) assign unique file numbers
+  // in the table properties. The db_id is set to be "SST Writer" for clarity.
+
+  rep_->file_info.file_size = 0;
+}
+
+SstFileWriter::~SstFileWriter() {
+  if (rep_->builder) {
+    // User did not call Finish() or Finish() failed, we need to
+    // abandon the builder.
+    rep_->builder->Abandon();
+  }
+}
+
+Status SstFileWriter::Open(const std::string& file_path) {
+  Rep* r = rep_.get();
+  Status s;
+  std::unique_ptr<FSWritableFile> sst_file;
+  FileOptions cur_file_opts(r->env_options);
+  s = r->ioptions.env->GetFileSystem()->NewWritableFile(
+      file_path, cur_file_opts, &sst_file, nullptr);
+  if (!s.ok()) {
+    return s;
+  }
+
+  sst_file->SetIOPriority(r->io_priority);
+
+  CompressionType compression_type;
+  CompressionOptions compression_opts;
+  if (r->mutable_cf_options.bottommost_compression !=
+      kDisableCompressionOption) {
+    compression_type = r->mutable_cf_options.bottommost_compression;
+    if (r->mutable_cf_options.bottommost_compression_opts.enabled) {
+      compression_opts = r->mutable_cf_options.bottommost_compression_opts;
+    } else {
+      compression_opts = r->mutable_cf_options.compression_opts;
+    }
+  } else if (!r->mutable_cf_options.compression_per_level.empty()) {
+    // Use the compression of the last level if we have per level compression
+    compression_type = *(r->mutable_cf_options.compression_per_level.rbegin());
+    compression_opts = r->mutable_cf_options.compression_opts;
+  } else {
+    compression_type = r->mutable_cf_options.compression;
+    compression_opts = r->mutable_cf_options.compression_opts;
+  }
+
+  IntTblPropCollectorFactories int_tbl_prop_collector_factories;
+
+  // SstFileWriter properties collector to add SstFileWriter version.
+  int_tbl_prop_collector_factories.emplace_back(
+      new SstFileWriterPropertiesCollectorFactory(2 /* version */,
+                                                  0 /* global_seqno*/));
+
+  // User collector factories
+  auto user_collector_factories =
+      r->ioptions.table_properties_collector_factories;
+  for (size_t i = 0; i < user_collector_factories.size(); i++) {
+    int_tbl_prop_collector_factories.emplace_back(
+        new UserKeyTablePropertiesCollectorFactory(
+            user_collector_factories[i]));
+  }
+  int unknown_level = -1;
+  uint32_t cf_id;
+
+  if (r->cfh != nullptr) {
+    // user explicitly specified that this file will be ingested into cfh,
+    // we can persist this information in the file.
+    cf_id = r->cfh->GetID();
+    r->column_family_name = r->cfh->GetName();
+  } else {
+    r->column_family_name = "";
+    cf_id = TablePropertiesCollectorFactory::Context::kUnknownColumnFamily;
+  }
+
+  // TODO: it would be better to set oldest_key_time to be used for getting the
+  //  approximate time of ingested keys.
+  TableBuilderOptions table_builder_options(
+      r->ioptions, r->mutable_cf_options, r->internal_comparator,
+      &int_tbl_prop_collector_factories, compression_type, compression_opts,
+      cf_id, r->column_family_name, unknown_level, false /* is_bottommost */,
+      TableFileCreationReason::kMisc, 0 /* oldest_key_time */,
+      0 /* file_creation_time */, "SST Writer" /* db_id */, r->db_session_id,
+      0 /* target_file_size */, r->next_file_number);
+  // External SST files used to each get a unique session id. Now for
+  // slightly better uniqueness probability in constructing cache keys, we
+  // assign fake file numbers to each file (into table properties) and keep
+  // the same session id for the life of the SstFileWriter.
+  r->next_file_number++;
+  // XXX: when we can remove skip_filters from the SstFileWriter public API
+  // we can remove it from TableBuilderOptions.
+  table_builder_options.skip_filters = r->skip_filters;
+  FileTypeSet tmp_set = r->ioptions.checksum_handoff_file_types;
+  r->file_writer.reset(new WritableFileWriter(
+      std::move(sst_file), file_path, r->env_options, r->ioptions.clock,
+      nullptr /* io_tracer */, nullptr /* stats */, r->ioptions.listeners,
+      r->ioptions.file_checksum_gen_factory.get(),
+      tmp_set.Contains(FileType::kTableFile), false));
+
+  // TODO(tec) : If table_factory is using compressed block cache, we will
+  // be adding the external sst file blocks into it, which is wasteful.
+  r->builder.reset(r->ioptions.table_factory->NewTableBuilder(
+      table_builder_options, r->file_writer.get()));
+
+  r->file_info = ExternalSstFileInfo();
+  r->file_info.file_path = file_path;
+  r->file_info.version = 2;
+  return s;
+}
+
+Status SstFileWriter::Add(const Slice& user_key, const Slice& value) {
+  return rep_->Add(user_key, value, ValueType::kTypeValue);
+}
+
+Status SstFileWriter::Put(const Slice& user_key, const Slice& value) {
+  return rep_->Add(user_key, value, ValueType::kTypeValue);
+}
+
+Status SstFileWriter::Put(const Slice& user_key, const Slice& timestamp,
+                          const Slice& value) {
+  return rep_->Add(user_key, timestamp, value, ValueType::kTypeValue);
+}
+
+Status SstFileWriter::Merge(const Slice& user_key, const Slice& value) {
+  return rep_->Add(user_key, value, ValueType::kTypeMerge);
+}
+
+Status SstFileWriter::Delete(const Slice& user_key) {
+  return rep_->Add(user_key, Slice(), ValueType::kTypeDeletion);
+}
+
+Status SstFileWriter::Delete(const Slice& user_key, const Slice& timestamp) {
+  return rep_->Add(user_key, timestamp, Slice(),
+                   ValueType::kTypeDeletionWithTimestamp);
+}
+
+Status SstFileWriter::DeleteRange(const Slice& begin_key,
+                                  const Slice& end_key) {
+  return rep_->DeleteRange(begin_key, end_key);
+}
+
+Status SstFileWriter::DeleteRange(const Slice& begin_key, const Slice& end_key,
+                                  const Slice& timestamp) {
+  return rep_->DeleteRange(begin_key, end_key, timestamp);
+}
+
+Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) {
+  Rep* r = rep_.get();
+  if (!r->builder) {
+    return Status::InvalidArgument("File is not opened");
+  }
+  if (r->file_info.num_entries == 0 &&
+      r->file_info.num_range_del_entries == 0) {
+    return Status::InvalidArgument("Cannot create sst file with no entries");
+  }
+
+  Status s = r->builder->Finish();
+  r->file_info.file_size = r->builder->FileSize();
+
+  if (s.ok()) {
+    s = r->file_writer->Sync(r->ioptions.use_fsync);
+    r->InvalidatePageCache(true /* closing */).PermitUncheckedError();
+    if (s.ok()) {
+      s = r->file_writer->Close();
+    }
+  }
+  if (s.ok()) {
+    r->file_info.file_checksum = r->file_writer->GetFileChecksum();
+    r->file_info.file_checksum_func_name =
+        r->file_writer->GetFileChecksumFuncName();
+  }
+  if (!s.ok()) {
+    r->ioptions.env->DeleteFile(r->file_info.file_path);
+  }
+
+  if (file_info != nullptr) {
+    *file_info = r->file_info;
+  }
+
+  r->builder.reset();
+  return s;
+}
+
+uint64_t SstFileWriter::FileSize() { return rep_->file_info.file_size; }
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_writer_collectors.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_writer_collectors.h
new file mode 100644
index 0000000000..486315fb5a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/sst_file_writer_collectors.h
@@ -0,0 +1,95 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <string>
+
+#include "db/table_properties_collector.h"
+#include "rocksdb/types.h"
+#include "util/coding.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Table Properties that are specific to tables created by SstFileWriter.
+struct ExternalSstFilePropertyNames {
+  // value of this property is a fixed uint32 number.
+  static const std::string kVersion;
+  // value of this property is a fixed uint64 number.
+  static const std::string kGlobalSeqno;
+};
+
+// PropertiesCollector used to add properties specific to tables
+// generated by SstFileWriter
+class SstFileWriterPropertiesCollector : public IntTblPropCollector {
+ public:
+  explicit SstFileWriterPropertiesCollector(int32_t version,
+                                            SequenceNumber global_seqno)
+      : version_(version), global_seqno_(global_seqno) {}
+
+  virtual Status InternalAdd(const Slice& /*key*/, const Slice& /*value*/,
+                             uint64_t /*file_size*/) override {
+    // Intentionally left blank. Have no interest in collecting stats for
+    // individual key/value pairs.
+    return Status::OK();
+  }
+
+  virtual void BlockAdd(uint64_t /* block_uncomp_bytes */,
+                        uint64_t /* block_compressed_bytes_fast */,
+                        uint64_t /* block_compressed_bytes_slow */) override {
+    // Intentionally left blank. No interest in collecting stats for
+    // blocks.
+    return;
+  }
+
+  virtual Status Finish(UserCollectedProperties* properties) override {
+    // File version
+    std::string version_val;
+    PutFixed32(&version_val, static_cast<uint32_t>(version_));
+    properties->insert({ExternalSstFilePropertyNames::kVersion, version_val});
+
+    // Global Sequence number
+    std::string seqno_val;
+    PutFixed64(&seqno_val, static_cast<uint64_t>(global_seqno_));
+    properties->insert({ExternalSstFilePropertyNames::kGlobalSeqno, seqno_val});
+
+    return Status::OK();
+  }
+
+  virtual const char* Name() const override {
+    return "SstFileWriterPropertiesCollector";
+  }
+
+  virtual UserCollectedProperties GetReadableProperties() const override {
+    return {{ExternalSstFilePropertyNames::kVersion, std::to_string(version_)}};
+  }
+
+ private:
+  int32_t version_;
+  SequenceNumber global_seqno_;
+};
+
+class SstFileWriterPropertiesCollectorFactory
+    : public IntTblPropCollectorFactory {
+ public:
+  explicit SstFileWriterPropertiesCollectorFactory(int32_t version,
+                                                   SequenceNumber global_seqno)
+      : version_(version), global_seqno_(global_seqno) {}
+
+  virtual IntTblPropCollector* CreateIntTblPropCollector(
+      uint32_t /*column_family_id*/, int /* level_at_creation */) override {
+    return new SstFileWriterPropertiesCollector(version_, global_seqno_);
+  }
+
+  virtual const char* Name() const override {
+    return "SstFileWriterPropertiesCollector";
+  }
+
+ private:
+  int32_t version_;
+  SequenceNumber global_seqno_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_builder.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_builder.h
new file mode 100644
index 0000000000..6d98bce707
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_builder.h
@@ -0,0 +1,228 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <stdint.h>
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "db/dbformat.h"
+#include "db/seqno_to_time_mapping.h"
+#include "db/table_properties_collector.h"
+#include "file/writable_file_writer.h"
+#include "options/cf_options.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table_properties.h"
+#include "table/unique_id_impl.h"
+#include "trace_replay/block_cache_tracer.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+class Status;
+
+struct TableReaderOptions {
+  // @param skip_filters Disables loading/accessing the filter block
+  TableReaderOptions(
+      const ImmutableOptions& _ioptions,
+      const std::shared_ptr<const SliceTransform>& _prefix_extractor,
+      const EnvOptions& _env_options,
+      const InternalKeyComparator& _internal_comparator,
+      uint8_t _block_protection_bytes_per_key, bool _skip_filters = false,
+      bool _immortal = false, bool _force_direct_prefetch = false,
+      int _level = -1, BlockCacheTracer* const _block_cache_tracer = nullptr,
+      size_t _max_file_size_for_l0_meta_pin = 0,
+      const std::string& _cur_db_session_id = "", uint64_t _cur_file_num = 0,
+      UniqueId64x2 _unique_id = {}, SequenceNumber _largest_seqno = 0,
+      uint64_t _tail_size = 0)
+      : ioptions(_ioptions),
+        prefix_extractor(_prefix_extractor),
+        env_options(_env_options),
+        internal_comparator(_internal_comparator),
+        skip_filters(_skip_filters),
+        immortal(_immortal),
+        force_direct_prefetch(_force_direct_prefetch),
+        level(_level),
+        largest_seqno(_largest_seqno),
+        block_cache_tracer(_block_cache_tracer),
+        max_file_size_for_l0_meta_pin(_max_file_size_for_l0_meta_pin),
+        cur_db_session_id(_cur_db_session_id),
+        cur_file_num(_cur_file_num),
+        unique_id(_unique_id),
+        block_protection_bytes_per_key(_block_protection_bytes_per_key),
+        tail_size(_tail_size) {}
+
+  const ImmutableOptions& ioptions;
+  const std::shared_ptr<const SliceTransform>& prefix_extractor;
+  const EnvOptions& env_options;
+  const InternalKeyComparator& internal_comparator;
+  // This is only used for BlockBasedTable (reader)
+  bool skip_filters;
+  // Whether the table will be valid as long as the DB is open
+  bool immortal;
+  // When data prefetching is needed, even if direct I/O is off, read data to
+  // fetch into RocksDB's buffer, rather than relying
+  // RandomAccessFile::Prefetch().
+  bool force_direct_prefetch;
+  // What level this table/file is on, -1 for "not set, don't know." Used
+  // for level-specific statistics.
+  int level;
+  // largest seqno in the table (or 0 means unknown???)
+  SequenceNumber largest_seqno;
+  BlockCacheTracer* const block_cache_tracer;
+  // Largest L0 file size whose meta-blocks may be pinned (can be zero when
+  // unknown).
+  const size_t max_file_size_for_l0_meta_pin;
+
+  std::string cur_db_session_id;
+
+  uint64_t cur_file_num;
+
+  // Known unique_id or {}, kNullUniqueId64x2 means unknown
+  UniqueId64x2 unique_id;
+
+  uint8_t block_protection_bytes_per_key;
+
+  uint64_t tail_size;
+};
+
+struct TableBuilderOptions {
+  TableBuilderOptions(
+      const ImmutableOptions& _ioptions, const MutableCFOptions& _moptions,
+      const InternalKeyComparator& _internal_comparator,
+      const IntTblPropCollectorFactories* _int_tbl_prop_collector_factories,
+      CompressionType _compression_type,
+      const CompressionOptions& _compression_opts, uint32_t _column_family_id,
+      const std::string& _column_family_name, int _level,
+      bool _is_bottommost = false,
+      TableFileCreationReason _reason = TableFileCreationReason::kMisc,
+      const int64_t _oldest_key_time = 0,
+      const uint64_t _file_creation_time = 0, const std::string& _db_id = "",
+      const std::string& _db_session_id = "",
+      const uint64_t _target_file_size = 0, const uint64_t _cur_file_num = 0)
+      : ioptions(_ioptions),
+        moptions(_moptions),
+        internal_comparator(_internal_comparator),
+        int_tbl_prop_collector_factories(_int_tbl_prop_collector_factories),
+        compression_type(_compression_type),
+        compression_opts(_compression_opts),
+        column_family_id(_column_family_id),
+        column_family_name(_column_family_name),
+        oldest_key_time(_oldest_key_time),
+        target_file_size(_target_file_size),
+        file_creation_time(_file_creation_time),
+        db_id(_db_id),
+        db_session_id(_db_session_id),
+        level_at_creation(_level),
+        is_bottommost(_is_bottommost),
+        reason(_reason),
+        cur_file_num(_cur_file_num) {}
+
+  const ImmutableOptions& ioptions;
+  const MutableCFOptions& moptions;
+  const InternalKeyComparator& internal_comparator;
+  const IntTblPropCollectorFactories* int_tbl_prop_collector_factories;
+  const CompressionType compression_type;
+  const CompressionOptions& compression_opts;
+  const uint32_t column_family_id;
+  const std::string& column_family_name;
+  const int64_t oldest_key_time;
+  const uint64_t target_file_size;
+  const uint64_t file_creation_time;
+  const std::string db_id;
+  const std::string db_session_id;
+  // BEGIN for FilterBuildingContext
+  const int level_at_creation;
+  const bool is_bottommost;
+  const TableFileCreationReason reason;
+  // END for FilterBuildingContext
+
+  // XXX: only used by BlockBasedTableBuilder for SstFileWriter. If you
+  // want to skip filters, that should be (for example) null filter_policy
+  // in the table options of the ioptions.table_factory
+  bool skip_filters = false;
+  const uint64_t cur_file_num;
+};
+
+// TableBuilder provides the interface used to build a Table
+// (an immutable and sorted map from keys to values).
+//
+// Multiple threads can invoke const methods on a TableBuilder without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same TableBuilder must use
+// external synchronization.
+class TableBuilder {
+ public:
+  // REQUIRES: Either Finish() or Abandon() has been called.
+  virtual ~TableBuilder() {}
+
+  // Add key,value to the table being constructed.
+  // REQUIRES: key is after any previously added key according to comparator.
+  // REQUIRES: Finish(), Abandon() have not been called
+  virtual void Add(const Slice& key, const Slice& value) = 0;
+
+  // Return non-ok iff some error has been detected.
+  virtual Status status() const = 0;
+
+  // Return non-ok iff some error happens during IO.
+  virtual IOStatus io_status() const = 0;
+
+  // Finish building the table.
+  // REQUIRES: Finish(), Abandon() have not been called
+  virtual Status Finish() = 0;
+
+  // Indicate that the contents of this builder should be abandoned.
+  // If the caller is not going to call Finish(), it must call Abandon()
+  // before destroying this builder.
+  // REQUIRES: Finish(), Abandon() have not been called
+  virtual void Abandon() = 0;
+
+  // Number of calls to Add() so far.
+  virtual uint64_t NumEntries() const = 0;
+
+  // Whether the output file is completely empty. It has neither entries
+  // or tombstones.
+  virtual bool IsEmpty() const {
+    return NumEntries() == 0 && GetTableProperties().num_range_deletions == 0;
+  }
+
+  // Size of the file generated so far.  If invoked after a successful
+  // Finish() call, returns the size of the final generated file.
+  virtual uint64_t FileSize() const = 0;
+
+  // Estimated size of the file generated so far. This is used when
+  // FileSize() cannot estimate final SST size, e.g. parallel compression
+  // is enabled.
+  virtual uint64_t EstimatedFileSize() const { return FileSize(); }
+
+  virtual uint64_t GetTailSize() const { return 0; }
+
+  // If the user defined table properties collector suggest the file to
+  // be further compacted.
+  virtual bool NeedCompact() const { return false; }
+
+  // Returns table properties
+  virtual TableProperties GetTableProperties() const = 0;
+
+  // Return file checksum
+  virtual std::string GetFileChecksum() const = 0;
+
+  // Return file checksum function name
+  virtual const char* GetFileChecksumFuncName() const = 0;
+
+  // Set the sequence number to time mapping
+  virtual void SetSeqnoTimeTableProperties(
+      const std::string& /*encoded_seqno_to_time_mapping*/,
+      uint64_t /*oldest_ancestor_time*/){};
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_factory.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_factory.cc
new file mode 100644
index 0000000000..29b6c89f81
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_factory.cc
@@ -0,0 +1,52 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <mutex>
+
+#include "rocksdb/convenience.h"
+#include "rocksdb/table.h"
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "table/block_based/block_based_table_factory.h"
+#include "table/cuckoo/cuckoo_table_factory.h"
+#include "table/plain/plain_table_factory.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+static void RegisterTableFactories(const std::string& /*arg*/) {
+  static std::once_flag loaded;
+  std::call_once(loaded, []() {
+    auto library = ObjectLibrary::Default();
+    library->AddFactory<TableFactory>(
+        TableFactory::kBlockBasedTableName(),
+        [](const std::string& /*uri*/, std::unique_ptr<TableFactory>* guard,
+           std::string* /* errmsg */) {
+          guard->reset(new BlockBasedTableFactory());
+          return guard->get();
+        });
+    library->AddFactory<TableFactory>(
+        TableFactory::kPlainTableName(),
+        [](const std::string& /*uri*/, std::unique_ptr<TableFactory>* guard,
+           std::string* /* errmsg */) {
+          guard->reset(new PlainTableFactory());
+          return guard->get();
+        });
+    library->AddFactory<TableFactory>(
+        TableFactory::kCuckooTableName(),
+        [](const std::string& /*uri*/, std::unique_ptr<TableFactory>* guard,
+           std::string* /* errmsg */) {
+          guard->reset(new CuckooTableFactory());
+          return guard->get();
+        });
+  });
+}
+
+Status TableFactory::CreateFromString(const ConfigOptions& config_options,
+                                      const std::string& value,
+                                      std::shared_ptr<TableFactory>* factory) {
+  RegisterTableFactories("");
+  return LoadSharedObject<TableFactory>(config_options, value, factory);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_properties.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_properties.cc
new file mode 100644
index 0000000000..819d4da2ad
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_properties.cc
@@ -0,0 +1,351 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/table_properties.h"
+
+#include "db/seqno_to_time_mapping.h"
+#include "port/malloc.h"
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/unique_id.h"
+#include "table/table_properties_internal.h"
+#include "table/unique_id_impl.h"
+#include "util/random.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const uint32_t TablePropertiesCollectorFactory::Context::kUnknownColumnFamily =
+    std::numeric_limits<int32_t>::max();
+
+namespace {
+void AppendProperty(std::string& props, const std::string& key,
+                    const std::string& value, const std::string& prop_delim,
+                    const std::string& kv_delim) {
+  props.append(key);
+  props.append(kv_delim);
+  props.append(value);
+  props.append(prop_delim);
+}
+
+template <class TValue>
+void AppendProperty(std::string& props, const std::string& key,
+                    const TValue& value, const std::string& prop_delim,
+                    const std::string& kv_delim) {
+  AppendProperty(props, key, std::to_string(value), prop_delim, kv_delim);
+}
+}  // namespace
+
+std::string TableProperties::ToString(const std::string& prop_delim,
+                                      const std::string& kv_delim) const {
+  std::string result;
+  result.reserve(1024);
+
+  // Basic Info
+  AppendProperty(result, "# data blocks", num_data_blocks, prop_delim,
+                 kv_delim);
+  AppendProperty(result, "# entries", num_entries, prop_delim, kv_delim);
+  AppendProperty(result, "# deletions", num_deletions, prop_delim, kv_delim);
+  AppendProperty(result, "# merge operands", num_merge_operands, prop_delim,
+                 kv_delim);
+  AppendProperty(result, "# range deletions", num_range_deletions, prop_delim,
+                 kv_delim);
+
+  AppendProperty(result, "raw key size", raw_key_size, prop_delim, kv_delim);
+  AppendProperty(result, "raw average key size",
+                 num_entries != 0 ? 1.0 * raw_key_size / num_entries : 0.0,
+                 prop_delim, kv_delim);
+  AppendProperty(result, "raw value size", raw_value_size, prop_delim,
+                 kv_delim);
+  AppendProperty(result, "raw average value size",
+                 num_entries != 0 ? 1.0 * raw_value_size / num_entries : 0.0,
+                 prop_delim, kv_delim);
+
+  AppendProperty(result, "data block size", data_size, prop_delim, kv_delim);
+  char index_block_size_str[80];
+  snprintf(index_block_size_str, sizeof(index_block_size_str),
+           "index block size (user-key? %d, delta-value? %d)",
+           static_cast<int>(index_key_is_user_key),
+           static_cast<int>(index_value_is_delta_encoded));
+  AppendProperty(result, index_block_size_str, index_size, prop_delim,
+                 kv_delim);
+  if (index_partitions != 0) {
+    AppendProperty(result, "# index partitions", index_partitions, prop_delim,
+                   kv_delim);
+    AppendProperty(result, "top-level index size", top_level_index_size,
+                   prop_delim, kv_delim);
+  }
+  AppendProperty(result, "filter block size", filter_size, prop_delim,
+                 kv_delim);
+  AppendProperty(result, "# entries for filter", num_filter_entries, prop_delim,
+                 kv_delim);
+  AppendProperty(result, "(estimated) table size",
+                 data_size + index_size + filter_size, prop_delim, kv_delim);
+
+  AppendProperty(
+      result, "filter policy name",
+      filter_policy_name.empty() ? std::string("N/A") : filter_policy_name,
+      prop_delim, kv_delim);
+
+  AppendProperty(result, "prefix extractor name",
+                 prefix_extractor_name.empty() ? std::string("N/A")
+                                               : prefix_extractor_name,
+                 prop_delim, kv_delim);
+
+  AppendProperty(result, "column family ID",
+                 column_family_id ==
+                         ROCKSDB_NAMESPACE::TablePropertiesCollectorFactory::
+                             Context::kUnknownColumnFamily
+                     ? std::string("N/A")
+                     : std::to_string(column_family_id),
+                 prop_delim, kv_delim);
+  AppendProperty(
+      result, "column family name",
+      column_family_name.empty() ? std::string("N/A") : column_family_name,
+      prop_delim, kv_delim);
+
+  AppendProperty(result, "comparator name",
+                 comparator_name.empty() ? std::string("N/A") : comparator_name,
+                 prop_delim, kv_delim);
+
+  AppendProperty(
+      result, "merge operator name",
+      merge_operator_name.empty() ? std::string("N/A") : merge_operator_name,
+      prop_delim, kv_delim);
+
+  AppendProperty(result, "property collectors names",
+                 property_collectors_names.empty() ? std::string("N/A")
+                                                   : property_collectors_names,
+                 prop_delim, kv_delim);
+
+  AppendProperty(
+      result, "SST file compression algo",
+      compression_name.empty() ? std::string("N/A") : compression_name,
+      prop_delim, kv_delim);
+
+  AppendProperty(
+      result, "SST file compression options",
+      compression_options.empty() ? std::string("N/A") : compression_options,
+      prop_delim, kv_delim);
+
+  AppendProperty(result, "creation time", creation_time, prop_delim, kv_delim);
+
+  AppendProperty(result, "time stamp of earliest key", oldest_key_time,
+                 prop_delim, kv_delim);
+
+  AppendProperty(result, "file creation time", file_creation_time, prop_delim,
+                 kv_delim);
+
+  AppendProperty(result, "slow compression estimated data size",
+                 slow_compression_estimated_data_size, prop_delim, kv_delim);
+  AppendProperty(result, "fast compression estimated data size",
+                 fast_compression_estimated_data_size, prop_delim, kv_delim);
+
+  // DB identity and DB session ID
+  AppendProperty(result, "DB identity", db_id, prop_delim, kv_delim);
+  AppendProperty(result, "DB session identity", db_session_id, prop_delim,
+                 kv_delim);
+  AppendProperty(result, "DB host id", db_host_id, prop_delim, kv_delim);
+  AppendProperty(result, "original file number", orig_file_number, prop_delim,
+                 kv_delim);
+
+  // Unique ID, when available
+  std::string id;
+  Status s = GetUniqueIdFromTableProperties(*this, &id);
+  AppendProperty(result, "unique ID",
+                 s.ok() ? UniqueIdToHumanString(id) : "N/A", prop_delim,
+                 kv_delim);
+
+  SeqnoToTimeMapping seq_time_mapping;
+  s = seq_time_mapping.Add(seqno_to_time_mapping);
+  AppendProperty(result, "Sequence number to time mapping",
+                 s.ok() ? seq_time_mapping.ToHumanString() : "N/A", prop_delim,
+                 kv_delim);
+
+  return result;
+}
+
+void TableProperties::Add(const TableProperties& tp) {
+  data_size += tp.data_size;
+  index_size += tp.index_size;
+  index_partitions += tp.index_partitions;
+  top_level_index_size += tp.top_level_index_size;
+  index_key_is_user_key += tp.index_key_is_user_key;
+  index_value_is_delta_encoded += tp.index_value_is_delta_encoded;
+  filter_size += tp.filter_size;
+  raw_key_size += tp.raw_key_size;
+  raw_value_size += tp.raw_value_size;
+  num_data_blocks += tp.num_data_blocks;
+  num_entries += tp.num_entries;
+  num_filter_entries += tp.num_filter_entries;
+  num_deletions += tp.num_deletions;
+  num_merge_operands += tp.num_merge_operands;
+  num_range_deletions += tp.num_range_deletions;
+  slow_compression_estimated_data_size +=
+      tp.slow_compression_estimated_data_size;
+  fast_compression_estimated_data_size +=
+      tp.fast_compression_estimated_data_size;
+}
+
+std::map<std::string, uint64_t>
+TableProperties::GetAggregatablePropertiesAsMap() const {
+  std::map<std::string, uint64_t> rv;
+  rv["data_size"] = data_size;
+  rv["index_size"] = index_size;
+  rv["index_partitions"] = index_partitions;
+  rv["top_level_index_size"] = top_level_index_size;
+  rv["filter_size"] = filter_size;
+  rv["raw_key_size"] = raw_key_size;
+  rv["raw_value_size"] = raw_value_size;
+  rv["num_data_blocks"] = num_data_blocks;
+  rv["num_entries"] = num_entries;
+  rv["num_filter_entries"] = num_filter_entries;
+  rv["num_deletions"] = num_deletions;
+  rv["num_merge_operands"] = num_merge_operands;
+  rv["num_range_deletions"] = num_range_deletions;
+  rv["slow_compression_estimated_data_size"] =
+      slow_compression_estimated_data_size;
+  rv["fast_compression_estimated_data_size"] =
+      fast_compression_estimated_data_size;
+  return rv;
+}
+
+// WARNING: manual update to this function is needed
+// whenever a new string property is added to TableProperties
+// to reduce approximation error.
+//
+// TODO: eliminate the need of manually updating this function
+// for new string properties
+std::size_t TableProperties::ApproximateMemoryUsage() const {
+  std::size_t usage = 0;
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+  usage += malloc_usable_size((void*)this);
+#else
+  usage += sizeof(*this);
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+
+  std::size_t string_props_mem_usage =
+      db_id.size() + db_session_id.size() + db_host_id.size() +
+      column_family_name.size() + filter_policy_name.size() +
+      comparator_name.size() + merge_operator_name.size() +
+      prefix_extractor_name.size() + property_collectors_names.size() +
+      compression_name.size() + compression_options.size();
+  usage += string_props_mem_usage;
+
+  for (auto iter = user_collected_properties.begin();
+       iter != user_collected_properties.end(); ++iter) {
+    usage += (iter->first.size() + iter->second.size());
+  }
+
+  return usage;
+}
+
+const std::string TablePropertiesNames::kDbId = "rocksdb.creating.db.identity";
+const std::string TablePropertiesNames::kDbSessionId =
+    "rocksdb.creating.session.identity";
+const std::string TablePropertiesNames::kDbHostId =
+    "rocksdb.creating.host.identity";
+const std::string TablePropertiesNames::kOriginalFileNumber =
+    "rocksdb.original.file.number";
+const std::string TablePropertiesNames::kDataSize = "rocksdb.data.size";
+const std::string TablePropertiesNames::kIndexSize = "rocksdb.index.size";
+const std::string TablePropertiesNames::kIndexPartitions =
+    "rocksdb.index.partitions";
+const std::string TablePropertiesNames::kTopLevelIndexSize =
+    "rocksdb.top-level.index.size";
+const std::string TablePropertiesNames::kIndexKeyIsUserKey =
+    "rocksdb.index.key.is.user.key";
+const std::string TablePropertiesNames::kIndexValueIsDeltaEncoded =
+    "rocksdb.index.value.is.delta.encoded";
+const std::string TablePropertiesNames::kFilterSize = "rocksdb.filter.size";
+const std::string TablePropertiesNames::kRawKeySize = "rocksdb.raw.key.size";
+const std::string TablePropertiesNames::kRawValueSize =
+    "rocksdb.raw.value.size";
+const std::string TablePropertiesNames::kNumDataBlocks =
+    "rocksdb.num.data.blocks";
+const std::string TablePropertiesNames::kNumEntries = "rocksdb.num.entries";
+const std::string TablePropertiesNames::kNumFilterEntries =
+    "rocksdb.num.filter_entries";
+const std::string TablePropertiesNames::kDeletedKeys = "rocksdb.deleted.keys";
+const std::string TablePropertiesNames::kMergeOperands =
+    "rocksdb.merge.operands";
+const std::string TablePropertiesNames::kNumRangeDeletions =
+    "rocksdb.num.range-deletions";
+const std::string TablePropertiesNames::kFilterPolicy = "rocksdb.filter.policy";
+const std::string TablePropertiesNames::kFormatVersion =
+    "rocksdb.format.version";
+const std::string TablePropertiesNames::kFixedKeyLen =
+    "rocksdb.fixed.key.length";
+const std::string TablePropertiesNames::kColumnFamilyId =
+    "rocksdb.column.family.id";
+const std::string TablePropertiesNames::kColumnFamilyName =
+    "rocksdb.column.family.name";
+const std::string TablePropertiesNames::kComparator = "rocksdb.comparator";
+const std::string TablePropertiesNames::kMergeOperator =
+    "rocksdb.merge.operator";
+const std::string TablePropertiesNames::kPrefixExtractorName =
+    "rocksdb.prefix.extractor.name";
+const std::string TablePropertiesNames::kPropertyCollectors =
+    "rocksdb.property.collectors";
+const std::string TablePropertiesNames::kCompression = "rocksdb.compression";
+const std::string TablePropertiesNames::kCompressionOptions =
+    "rocksdb.compression_options";
+const std::string TablePropertiesNames::kCreationTime = "rocksdb.creation.time";
+const std::string TablePropertiesNames::kOldestKeyTime =
+    "rocksdb.oldest.key.time";
+const std::string TablePropertiesNames::kFileCreationTime =
+    "rocksdb.file.creation.time";
+const std::string TablePropertiesNames::kSlowCompressionEstimatedDataSize =
+    "rocksdb.sample_for_compression.slow.data.size";
+const std::string TablePropertiesNames::kFastCompressionEstimatedDataSize =
+    "rocksdb.sample_for_compression.fast.data.size";
+const std::string TablePropertiesNames::kSequenceNumberTimeMapping =
+    "rocksdb.seqno.time.map";
+const std::string TablePropertiesNames::kTailStartOffset =
+    "rocksdb.tail.start.offset";
+
+#ifndef NDEBUG
+// WARNING: TEST_SetRandomTableProperties assumes the following layout of
+// TableProperties
+//
+// struct TableProperties {
+//    int64_t orig_file_number = 0;
+//    ...
+//    ... int64_t properties only
+//    ...
+//    std::string db_id;
+//    ...
+//    ... std::string properties only
+//    ...
+//    std::string compression_options;
+//    UserCollectedProperties user_collected_properties;
+//    ...
+//    ... Other extra properties: non-int64_t/non-std::string properties only
+//    ...
+// }
+void TEST_SetRandomTableProperties(TableProperties* props) {
+  Random* r = Random::GetTLSInstance();
+  uint64_t* pu = &props->orig_file_number;
+  assert(static_cast<void*>(pu) == static_cast<void*>(props));
+  std::string* ps = &props->db_id;
+  const uint64_t* const pu_end = reinterpret_cast<const uint64_t*>(ps);
+  // Use the last string property's address instead of
+  // the first extra property (e.g `user_collected_properties`)'s address
+  // in the for-loop to avoid advancing pointer to pointing to
+  // potential non-zero padding bytes between these two addresses due to
+  // user_collected_properties's alignment requirement
+  const std::string* const ps_end_inclusive = &props->compression_options;
+
+  for (; pu < pu_end; ++pu) {
+    *pu = r->Next64();
+  }
+  assert(static_cast<void*>(pu) == static_cast<void*>(ps));
+  for (; ps <= ps_end_inclusive; ++ps) {
+    *ps = r->RandomBinaryString(13);
+  }
+}
+#endif
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_properties_internal.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_properties_internal.h
new file mode 100644
index 0000000000..5c2a0cb9aa
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_properties_internal.h
@@ -0,0 +1,14 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/table_properties.h"
+
+namespace ROCKSDB_NAMESPACE {
+#ifndef NDEBUG
+void TEST_SetRandomTableProperties(TableProperties* props);
+#endif
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_reader.h
new file mode 100644
index 0000000000..53c5220529
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/table_reader.h
@@ -0,0 +1,187 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <memory>
+
+#include "db/range_tombstone_fragmenter.h"
+#if USE_COROUTINES
+#include "folly/experimental/coro/Coroutine.h"
+#include "folly/experimental/coro/Task.h"
+#endif
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/table_reader_caller.h"
+#include "table/get_context.h"
+#include "table/internal_iterator.h"
+#include "table/multiget_context.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Iterator;
+struct ParsedInternalKey;
+class Slice;
+class Arena;
+struct ReadOptions;
+struct TableProperties;
+class GetContext;
+class MultiGetContext;
+
+// A Table (also referred to as SST) is a sorted map from strings to strings.
+// Tables are immutable and persistent.  A Table may be safely accessed from
+// multiple threads without external synchronization. Table readers are used
+// for reading various types of table formats supported by rocksdb including
+// BlockBasedTable, PlainTable and CuckooTable format.
+class TableReader {
+ public:
+  virtual ~TableReader() {}
+
+  // Returns a new iterator over the table contents.
+  // The result of NewIterator() is initially invalid (caller must
+  // call one of the Seek methods on the iterator before using it).
+  //
+  // read_options: Must outlive the returned iterator.
+  // arena: If not null, the arena needs to be used to allocate the Iterator.
+  //        When destroying the iterator, the caller will not call "delete"
+  //        but Iterator::~Iterator() directly. The destructor needs to destroy
+  //        all the states but those allocated in arena.
+  // skip_filters: disables checking the bloom filters even if they exist. This
+  //               option is effective only for block-based table format.
+  // compaction_readahead_size: its value will only be used if caller =
+  // kCompaction
+  virtual InternalIterator* NewIterator(
+      const ReadOptions& read_options, const SliceTransform* prefix_extractor,
+      Arena* arena, bool skip_filters, TableReaderCaller caller,
+      size_t compaction_readahead_size = 0,
+      bool allow_unprepared_value = false) = 0;
+
+  virtual FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
+      const ReadOptions& /*read_options*/) {
+    return nullptr;
+  }
+
+  // Given a key, return an approximate byte offset in the file where
+  // the data for that key begins (or would begin if the key were
+  // present in the file).  The returned value is in terms of file
+  // bytes, and so includes effects like compression of the underlying data.
+  // E.g., the approximate offset of the last key in the table will
+  // be close to the file length.
+  // TODO(peterd): Since this function is only used for approximate size
+  // from beginning of file, reduce code duplication by removing this
+  // function and letting ApproximateSize take optional start and end, so
+  // that absolute start and end can be specified and optimized without
+  // key / index work.
+  virtual uint64_t ApproximateOffsetOf(const ReadOptions& read_options,
+                                       const Slice& key,
+                                       TableReaderCaller caller) = 0;
+
+  // Given start and end keys, return the approximate data size in the file
+  // between the keys. The returned value is in terms of file bytes, and so
+  // includes effects like compression of the underlying data and applicable
+  // portions of metadata including filters and indexes. Nullptr for start or
+  // end (or both) indicates absolute start or end of the table.
+  virtual uint64_t ApproximateSize(const ReadOptions& read_options,
+                                   const Slice& start, const Slice& end,
+                                   TableReaderCaller caller) = 0;
+
+  struct Anchor {
+    Anchor(const Slice& _user_key, size_t _range_size)
+        : user_key(_user_key.ToStringView()), range_size(_range_size) {}
+    std::string user_key;
+    size_t range_size;
+  };
+
+  // Now try to return approximately 128 anchor keys.
+  // The last one tends to be the largest key.
+  virtual Status ApproximateKeyAnchors(const ReadOptions& /*read_options*/,
+                                       std::vector<Anchor>& /*anchors*/) {
+    return Status::NotSupported("ApproximateKeyAnchors() not supported.");
+  }
+
+  // Set up the table for Compaction. Might change some parameters with
+  // posix_fadvise
+  virtual void SetupForCompaction() = 0;
+
+  virtual std::shared_ptr<const TableProperties> GetTableProperties() const = 0;
+
+  // Prepare work that can be done before the real Get()
+  virtual void Prepare(const Slice& /*target*/) {}
+
+  // Report an approximation of how much memory has been used.
+  virtual size_t ApproximateMemoryUsage() const = 0;
+
+  // Calls get_context->SaveValue() repeatedly, starting with
+  // the entry found after a call to Seek(key), until it returns false.
+  // May not make such a call if filter policy says that key is not present.
+  //
+  // get_context->MarkKeyMayExist needs to be called when it is configured to be
+  // memory only and the key is not found in the block cache.
+  //
+  // readOptions is the options for the read
+  // key is the key to search for
+  // skip_filters: disables checking the bloom filters even if they exist. This
+  //               option is effective only for block-based table format.
+  virtual Status Get(const ReadOptions& readOptions, const Slice& key,
+                     GetContext* get_context,
+                     const SliceTransform* prefix_extractor,
+                     bool skip_filters = false) = 0;
+
+  // Use bloom filters in the table file, if present, to filter out keys. The
+  // mget_range will be updated to skip keys that get a negative result from
+  // the filter lookup.
+  virtual Status MultiGetFilter(const ReadOptions& /*readOptions*/,
+                                const SliceTransform* /*prefix_extractor*/,
+                                MultiGetContext::Range* /*mget_range*/) {
+    return Status::NotSupported();
+  }
+
+  virtual void MultiGet(const ReadOptions& readOptions,
+                        const MultiGetContext::Range* mget_range,
+                        const SliceTransform* prefix_extractor,
+                        bool skip_filters = false) {
+    for (auto iter = mget_range->begin(); iter != mget_range->end(); ++iter) {
+      *iter->s = Get(readOptions, iter->ikey, iter->get_context,
+                     prefix_extractor, skip_filters);
+    }
+  }
+
+#if USE_COROUTINES
+  virtual folly::coro::Task<void> MultiGetCoroutine(
+      const ReadOptions& readOptions, const MultiGetContext::Range* mget_range,
+      const SliceTransform* prefix_extractor, bool skip_filters = false) {
+    MultiGet(readOptions, mget_range, prefix_extractor, skip_filters);
+    co_return;
+  }
+#endif  // USE_COROUTINES
+
+  // Prefetch data corresponding to a give range of keys
+  // Typically this functionality is required for table implementations that
+  // persists the data on a non volatile storage medium like disk/SSD
+  virtual Status Prefetch(const ReadOptions& /* read_options */,
+                          const Slice* begin = nullptr,
+                          const Slice* end = nullptr) {
+    (void)begin;
+    (void)end;
+    // Default implementation is NOOP.
+    // The child class should implement functionality when applicable
+    return Status::OK();
+  }
+
+  // convert db file to a human readable form
+  virtual Status DumpTable(WritableFile* /*out_file*/) {
+    return Status::NotSupported("DumpTable() not supported");
+  }
+
+  // check whether there is corruption in this db file
+  virtual Status VerifyChecksum(const ReadOptions& /*read_options*/,
+                                TableReaderCaller /*caller*/) {
+    return Status::NotSupported("VerifyChecksum() not supported");
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/two_level_iterator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/two_level_iterator.cc
new file mode 100644
index 0000000000..4b6634e5cf
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/two_level_iterator.cc
@@ -0,0 +1,220 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "table/two_level_iterator.h"
+
+#include "db/pinned_iterators_manager.h"
+#include "memory/arena.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
+#include "table/block_based/block.h"
+#include "table/format.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+class TwoLevelIndexIterator : public InternalIteratorBase<IndexValue> {
+ public:
+  explicit TwoLevelIndexIterator(
+      TwoLevelIteratorState* state,
+      InternalIteratorBase<IndexValue>* first_level_iter);
+
+  ~TwoLevelIndexIterator() override {
+    first_level_iter_.DeleteIter(false /* is_arena_mode */);
+    second_level_iter_.DeleteIter(false /* is_arena_mode */);
+    delete state_;
+  }
+
+  void Seek(const Slice& target) override;
+  void SeekForPrev(const Slice& target) override;
+  void SeekToFirst() override;
+  void SeekToLast() override;
+  void Next() override;
+  void Prev() override;
+
+  bool Valid() const override { return second_level_iter_.Valid(); }
+  Slice key() const override {
+    assert(Valid());
+    return second_level_iter_.key();
+  }
+  Slice user_key() const override {
+    assert(Valid());
+    return second_level_iter_.user_key();
+  }
+  IndexValue value() const override {
+    assert(Valid());
+    return second_level_iter_.value();
+  }
+  Status status() const override {
+    if (!first_level_iter_.status().ok()) {
+      assert(second_level_iter_.iter() == nullptr);
+      return first_level_iter_.status();
+    } else if (second_level_iter_.iter() != nullptr &&
+               !second_level_iter_.status().ok()) {
+      return second_level_iter_.status();
+    } else {
+      return status_;
+    }
+  }
+  void SetPinnedItersMgr(
+      PinnedIteratorsManager* /*pinned_iters_mgr*/) override {}
+  bool IsKeyPinned() const override { return false; }
+  bool IsValuePinned() const override { return false; }
+
+ private:
+  void SaveError(const Status& s) {
+    if (status_.ok() && !s.ok()) status_ = s;
+  }
+  void SkipEmptyDataBlocksForward();
+  void SkipEmptyDataBlocksBackward();
+  void SetSecondLevelIterator(InternalIteratorBase<IndexValue>* iter);
+  void InitDataBlock();
+
+  TwoLevelIteratorState* state_;
+  IteratorWrapperBase<IndexValue> first_level_iter_;
+  IteratorWrapperBase<IndexValue> second_level_iter_;  // May be nullptr
+  Status status_;
+  // If second_level_iter is non-nullptr, then "data_block_handle_" holds the
+  // "index_value" passed to block_function_ to create the second_level_iter.
+  BlockHandle data_block_handle_;
+};
+
+TwoLevelIndexIterator::TwoLevelIndexIterator(
+    TwoLevelIteratorState* state,
+    InternalIteratorBase<IndexValue>* first_level_iter)
+    : state_(state), first_level_iter_(first_level_iter) {}
+
+void TwoLevelIndexIterator::Seek(const Slice& target) {
+  first_level_iter_.Seek(target);
+
+  InitDataBlock();
+  if (second_level_iter_.iter() != nullptr) {
+    second_level_iter_.Seek(target);
+  }
+  SkipEmptyDataBlocksForward();
+}
+
+void TwoLevelIndexIterator::SeekForPrev(const Slice& target) {
+  first_level_iter_.Seek(target);
+  InitDataBlock();
+  if (second_level_iter_.iter() != nullptr) {
+    second_level_iter_.SeekForPrev(target);
+  }
+  if (!Valid()) {
+    if (!first_level_iter_.Valid() && first_level_iter_.status().ok()) {
+      first_level_iter_.SeekToLast();
+      InitDataBlock();
+      if (second_level_iter_.iter() != nullptr) {
+        second_level_iter_.SeekForPrev(target);
+      }
+    }
+    SkipEmptyDataBlocksBackward();
+  }
+}
+
+void TwoLevelIndexIterator::SeekToFirst() {
+  first_level_iter_.SeekToFirst();
+  InitDataBlock();
+  if (second_level_iter_.iter() != nullptr) {
+    second_level_iter_.SeekToFirst();
+  }
+  SkipEmptyDataBlocksForward();
+}
+
+void TwoLevelIndexIterator::SeekToLast() {
+  first_level_iter_.SeekToLast();
+  InitDataBlock();
+  if (second_level_iter_.iter() != nullptr) {
+    second_level_iter_.SeekToLast();
+  }
+  SkipEmptyDataBlocksBackward();
+}
+
+void TwoLevelIndexIterator::Next() {
+  assert(Valid());
+  second_level_iter_.Next();
+  SkipEmptyDataBlocksForward();
+}
+
+void TwoLevelIndexIterator::Prev() {
+  assert(Valid());
+  second_level_iter_.Prev();
+  SkipEmptyDataBlocksBackward();
+}
+
+void TwoLevelIndexIterator::SkipEmptyDataBlocksForward() {
+  while (second_level_iter_.iter() == nullptr ||
+         (!second_level_iter_.Valid() && second_level_iter_.status().ok())) {
+    // Move to next block
+    if (!first_level_iter_.Valid()) {
+      SetSecondLevelIterator(nullptr);
+      return;
+    }
+    first_level_iter_.Next();
+    InitDataBlock();
+    if (second_level_iter_.iter() != nullptr) {
+      second_level_iter_.SeekToFirst();
+    }
+  }
+}
+
+void TwoLevelIndexIterator::SkipEmptyDataBlocksBackward() {
+  while (second_level_iter_.iter() == nullptr ||
+         (!second_level_iter_.Valid() && second_level_iter_.status().ok())) {
+    // Move to next block
+    if (!first_level_iter_.Valid()) {
+      SetSecondLevelIterator(nullptr);
+      return;
+    }
+    first_level_iter_.Prev();
+    InitDataBlock();
+    if (second_level_iter_.iter() != nullptr) {
+      second_level_iter_.SeekToLast();
+    }
+  }
+}
+
+void TwoLevelIndexIterator::SetSecondLevelIterator(
+    InternalIteratorBase<IndexValue>* iter) {
+  InternalIteratorBase<IndexValue>* old_iter = second_level_iter_.Set(iter);
+  delete old_iter;
+}
+
+void TwoLevelIndexIterator::InitDataBlock() {
+  if (!first_level_iter_.Valid()) {
+    SetSecondLevelIterator(nullptr);
+  } else {
+    BlockHandle handle = first_level_iter_.value().handle;
+    if (second_level_iter_.iter() != nullptr &&
+        !second_level_iter_.status().IsIncomplete() &&
+        handle.offset() == data_block_handle_.offset()) {
+      // second_level_iter is already constructed with this iterator, so
+      // no need to change anything
+    } else {
+      InternalIteratorBase<IndexValue>* iter =
+          state_->NewSecondaryIterator(handle);
+      data_block_handle_ = handle;
+      SetSecondLevelIterator(iter);
+      if (iter == nullptr) {
+        status_ = Status::Corruption("Missing block for partition " +
+                                     handle.ToString());
+      }
+    }
+  }
+}
+
+}  // namespace
+
+InternalIteratorBase<IndexValue>* NewTwoLevelIterator(
+    TwoLevelIteratorState* state,
+    InternalIteratorBase<IndexValue>* first_level_iter) {
+  return new TwoLevelIndexIterator(state, first_level_iter);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/two_level_iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/two_level_iterator.h
new file mode 100644
index 0000000000..1fed934175
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/two_level_iterator.h
@@ -0,0 +1,43 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "table/iterator_wrapper.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct ReadOptions;
+class InternalKeyComparator;
+
+// TwoLevelIteratorState expects iterators are not created using the arena
+struct TwoLevelIteratorState {
+  TwoLevelIteratorState() {}
+
+  virtual ~TwoLevelIteratorState() {}
+  virtual InternalIteratorBase<IndexValue>* NewSecondaryIterator(
+      const BlockHandle& handle) = 0;
+};
+
+// Return a new two level iterator.  A two-level iterator contains an
+// index iterator whose values point to a sequence of blocks where
+// each block is itself a sequence of key,value pairs.  The returned
+// two-level iterator yields the concatenation of all key/value pairs
+// in the sequence of blocks.  Takes ownership of "index_iter" and
+// will delete it when no longer needed.
+//
+// Uses a supplied function to convert an index_iter value into
+// an iterator over the contents of the corresponding block.
+// Note: this function expects first_level_iter was not created using the arena
+extern InternalIteratorBase<IndexValue>* NewTwoLevelIterator(
+    TwoLevelIteratorState* state,
+    InternalIteratorBase<IndexValue>* first_level_iter);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/unique_id.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/unique_id.cc
new file mode 100644
index 0000000000..fcdd756504
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/unique_id.cc
@@ -0,0 +1,223 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include <cstdint>
+
+#include "table/unique_id_impl.h"
+#include "util/coding_lean.h"
+#include "util/hash.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+std::string EncodeSessionId(uint64_t upper, uint64_t lower) {
+  std::string db_session_id(20U, '\0');
+  char *buf = &db_session_id[0];
+  // Preserving `lower` is slightly tricky. 36^12 is slightly more than
+  // 62 bits, so we use 12 chars plus the bottom two bits of one more.
+  // (A tiny fraction of 20 digit strings go unused.)
+  uint64_t a = (upper << 2) | (lower >> 62);
+  uint64_t b = lower & (UINT64_MAX >> 2);
+  PutBaseChars<36>(&buf, 8, a, /*uppercase*/ true);
+  PutBaseChars<36>(&buf, 12, b, /*uppercase*/ true);
+  assert(buf == &db_session_id.back() + 1);
+  return db_session_id;
+}
+
+Status DecodeSessionId(const std::string &db_session_id, uint64_t *upper,
+                       uint64_t *lower) {
+  const size_t len = db_session_id.size();
+  if (len == 0) {
+    return Status::NotSupported("Missing db_session_id");
+  }
+  // Anything from 13 to 24 chars is reasonable. We don't have to limit to
+  // exactly 20.
+  if (len < 13) {
+    return Status::NotSupported("Too short db_session_id");
+  }
+  if (len > 24) {
+    return Status::NotSupported("Too long db_session_id");
+  }
+  uint64_t a = 0, b = 0;
+  const char *buf = &db_session_id.front();
+  bool success = ParseBaseChars<36>(&buf, len - 12U, &a);
+  if (!success) {
+    return Status::NotSupported("Bad digit in db_session_id");
+  }
+  success = ParseBaseChars<36>(&buf, 12U, &b);
+  if (!success) {
+    return Status::NotSupported("Bad digit in db_session_id");
+  }
+  assert(buf == &db_session_id.back() + 1);
+  *upper = a >> 2;
+  *lower = (b & (UINT64_MAX >> 2)) | (a << 62);
+  return Status::OK();
+}
+
+Status GetSstInternalUniqueId(const std::string &db_id,
+                              const std::string &db_session_id,
+                              uint64_t file_number, UniqueIdPtr out,
+                              bool force) {
+  if (!force) {
+    if (db_id.empty()) {
+      return Status::NotSupported("Missing db_id");
+    }
+    if (file_number == 0) {
+      return Status::NotSupported("Missing or bad file number");
+    }
+    if (db_session_id.empty()) {
+      return Status::NotSupported("Missing db_session_id");
+    }
+  }
+  uint64_t session_upper = 0;  // Assignment to appease clang-analyze
+  uint64_t session_lower = 0;  // Assignment to appease clang-analyze
+  {
+    Status s = DecodeSessionId(db_session_id, &session_upper, &session_lower);
+    if (!s.ok()) {
+      if (!force) {
+        return s;
+      } else {
+        // A reasonable fallback in case malformed
+        Hash2x64(db_session_id.data(), db_session_id.size(), &session_upper,
+                 &session_lower);
+        if (session_lower == 0) {
+          session_lower = session_upper | 1;
+        }
+      }
+    }
+  }
+
+  // Exactly preserve session lower to ensure that session ids generated
+  // during the same process lifetime are guaranteed unique.
+  // DBImpl also guarantees (in recent versions) that this is not zero,
+  // so that we can guarantee unique ID is never all zeros. (Can't assert
+  // that here because of testing and old versions.)
+  // We put this first in anticipation of matching a small-ish set of cache
+  // key prefixes to cover entries relevant to any DB.
+  out.ptr[0] = session_lower;
+
+  // Hash the session upper (~39 bits entropy) and DB id (120+ bits entropy)
+  // for very high global uniqueness entropy.
+  // (It is possible that many DBs descended from one common DB id are copied
+  // around and proliferate, in which case session id is critical, but it is
+  // more common for different DBs to have different DB ids.)
+  uint64_t db_a, db_b;
+  Hash2x64(db_id.data(), db_id.size(), session_upper, &db_a, &db_b);
+
+  // Xor in file number for guaranteed uniqueness by file number for a given
+  // session and DB id. (Xor slightly better than + here. See
+  // https://github.com/pdillinger/unique_id )
+  out.ptr[1] = db_a ^ file_number;
+
+  // Extra (optional) global uniqueness
+  if (out.extended) {
+    out.ptr[2] = db_b;
+  }
+
+  return Status::OK();
+}
+
+namespace {
+// For InternalUniqueIdToExternal / ExternalUniqueIdToInternal we want all
+// zeros in first 128 bits to map to itself, so that excluding zero in
+// internal IDs (session_lower != 0 above) does the same for external IDs.
+// These values are meaningless except for making that work.
+constexpr uint64_t kHiOffsetForZero = 17391078804906429400U;
+constexpr uint64_t kLoOffsetForZero = 6417269962128484497U;
+}  // namespace
+
+void InternalUniqueIdToExternal(UniqueIdPtr in_out) {
+  uint64_t hi, lo;
+  BijectiveHash2x64(in_out.ptr[1] + kHiOffsetForZero,
+                    in_out.ptr[0] + kLoOffsetForZero, &hi, &lo);
+  in_out.ptr[0] = lo;
+  in_out.ptr[1] = hi;
+  if (in_out.extended) {
+    in_out.ptr[2] += lo + hi;
+  }
+}
+
+void ExternalUniqueIdToInternal(UniqueIdPtr in_out) {
+  uint64_t lo = in_out.ptr[0];
+  uint64_t hi = in_out.ptr[1];
+  if (in_out.extended) {
+    in_out.ptr[2] -= lo + hi;
+  }
+  BijectiveUnhash2x64(hi, lo, &hi, &lo);
+  in_out.ptr[0] = lo - kLoOffsetForZero;
+  in_out.ptr[1] = hi - kHiOffsetForZero;
+}
+
+std::string EncodeUniqueIdBytes(UniqueIdPtr in) {
+  std::string ret(in.extended ? 24U : 16U, '\0');
+  EncodeFixed64(&ret[0], in.ptr[0]);
+  EncodeFixed64(&ret[8], in.ptr[1]);
+  if (in.extended) {
+    EncodeFixed64(&ret[16], in.ptr[2]);
+  }
+  return ret;
+}
+
+Status DecodeUniqueIdBytes(const std::string &unique_id, UniqueIdPtr out) {
+  if (unique_id.size() != (out.extended ? 24 : 16)) {
+    return Status::NotSupported("Not a valid unique_id");
+  }
+  const char *buf = &unique_id.front();
+  out.ptr[0] = DecodeFixed64(&buf[0]);
+  out.ptr[1] = DecodeFixed64(&buf[8]);
+  if (out.extended) {
+    out.ptr[2] = DecodeFixed64(&buf[16]);
+  }
+  return Status::OK();
+}
+
+template <typename ID>
+Status GetUniqueIdFromTablePropertiesHelper(const TableProperties &props,
+                                            std::string *out_id) {
+  ID tmp{};
+  Status s = GetSstInternalUniqueId(props.db_id, props.db_session_id,
+                                    props.orig_file_number, &tmp);
+  if (s.ok()) {
+    InternalUniqueIdToExternal(&tmp);
+    *out_id = EncodeUniqueIdBytes(&tmp);
+  } else {
+    out_id->clear();
+  }
+  return s;
+}
+
+Status GetExtendedUniqueIdFromTableProperties(const TableProperties &props,
+                                              std::string *out_id) {
+  return GetUniqueIdFromTablePropertiesHelper<UniqueId64x3>(props, out_id);
+}
+
+Status GetUniqueIdFromTableProperties(const TableProperties &props,
+                                      std::string *out_id) {
+  return GetUniqueIdFromTablePropertiesHelper<UniqueId64x2>(props, out_id);
+}
+
+std::string UniqueIdToHumanString(const std::string &id) {
+  // Not so efficient, but that's OK
+  std::string str = Slice(id).ToString(/*hex*/ true);
+  for (size_t i = 16; i < str.size(); i += 17) {
+    str.insert(i, "-");
+  }
+  return str;
+}
+
+std::string InternalUniqueIdToHumanString(UniqueIdPtr in) {
+  std::string str = "{";
+  str += std::to_string(in.ptr[0]);
+  str += ",";
+  str += std::to_string(in.ptr[1]);
+  if (in.extended) {
+    str += ",";
+    str += std::to_string(in.ptr[2]);
+  }
+  str += "}";
+  return str;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/unique_id_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/unique_id_impl.h
new file mode 100644
index 0000000000..6e3dc62c79
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/table/unique_id_impl.h
@@ -0,0 +1,93 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <array>
+
+#include "rocksdb/unique_id.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Standard size unique ID, good enough for almost all practical purposes
+using UniqueId64x2 = std::array<uint64_t, 2>;
+
+// Value never used as an actual unique ID so can be used for "null"
+constexpr UniqueId64x2 kNullUniqueId64x2 = {};
+
+// Extended size unique ID, for extra certainty of uniqueness among SST files
+// spanning many hosts over a long time (rarely if ever needed)
+using UniqueId64x3 = std::array<uint64_t, 3>;
+
+// Value never used as an actual unique ID so can be used for "null"
+constexpr UniqueId64x3 kNullUniqueId64x3 = {};
+
+// Dynamic pointer wrapper for one of the two above
+struct UniqueIdPtr {
+  uint64_t *ptr = nullptr;
+  bool extended = false;
+
+  /*implicit*/ UniqueIdPtr(UniqueId64x2 *id) {
+    ptr = (*id).data();
+    extended = false;
+  }
+  /*implicit*/ UniqueIdPtr(UniqueId64x3 *id) {
+    ptr = (*id).data();
+    extended = true;
+  }
+};
+
+// Helper for GetUniqueIdFromTableProperties. This function can also be used
+// for temporary ids for files without sufficient information in table
+// properties. The internal unique id is more structured than the public
+// unique id, so can be manipulated in more ways but very carefully.
+// These must be long term stable to ensure GetUniqueIdFromTableProperties
+// is long term stable.
+Status GetSstInternalUniqueId(const std::string &db_id,
+                              const std::string &db_session_id,
+                              uint64_t file_number, UniqueIdPtr out,
+                              bool force = false);
+
+// Helper for GetUniqueIdFromTableProperties. External unique ids go through
+// this extra hashing layer so that prefixes of the unique id have predictable
+// "full" entropy. This hashing layer is 1-to-1 on the first 128 bits and on
+// the full 192 bits.
+// This transformation must be long term stable to ensure
+// GetUniqueIdFromTableProperties is long term stable.
+void InternalUniqueIdToExternal(UniqueIdPtr in_out);
+
+// Reverse of InternalUniqueIdToExternal mostly for testing purposes
+// (demonstrably 1-to-1 on the first 128 bits and on the full 192 bits).
+void ExternalUniqueIdToInternal(UniqueIdPtr in_out);
+
+// Convert numerical format to byte format for public API
+std::string EncodeUniqueIdBytes(UniqueIdPtr in);
+
+// Reverse of EncodeUniqueIdBytes.
+Status DecodeUniqueIdBytes(const std::string &unique_id, UniqueIdPtr out);
+
+// For presenting internal IDs for debugging purposes. Visually distinct from
+// UniqueIdToHumanString for external IDs.
+std::string InternalUniqueIdToHumanString(UniqueIdPtr in);
+
+// Reformat a random value down to our "DB session id" format,
+// which is intended to be compact and friendly for use in file names.
+// `lower` is fully preserved and data is lost from `upper`.
+//
+// Detail: Encoded into 20 chars in base-36 ([0-9A-Z]), which is ~103 bits of
+// entropy, which is enough to expect no collisions across a billion servers
+// each opening DBs a million times (~2^50). Benefits vs. RFC-4122 unique id:
+// * Save ~ dozen bytes per SST file
+// * Shorter shared backup file names (some platforms have low limits)
+// * Visually distinct from DB id format (usually RFC-4122)
+std::string EncodeSessionId(uint64_t upper, uint64_t lower);
+
+// Reverse of EncodeSessionId. Returns NotSupported on error rather than
+// Corruption because non-standard session IDs should be allowed with degraded
+// functionality.
+Status DecodeSessionId(const std::string &db_session_id, uint64_t *upper,
+                       uint64_t *lower);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/mock_time_env.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/mock_time_env.h
new file mode 100644
index 0000000000..7834368e03
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/mock_time_env.h
@@ -0,0 +1,78 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <limits>
+
+#include "rocksdb/system_clock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// NOTE: SpecialEnv offers most of this functionality, along with hooks
+// for safe DB behavior under a mock time environment, so should be used
+// instead of MockSystemClock for DB tests.
+class MockSystemClock : public SystemClockWrapper {
+ public:
+  explicit MockSystemClock(const std::shared_ptr<SystemClock>& base)
+      : SystemClockWrapper(base) {}
+
+  static const char* kClassName() { return "MockSystemClock"; }
+  const char* Name() const override { return kClassName(); }
+  virtual Status GetCurrentTime(int64_t* time_sec) override {
+    assert(time_sec != nullptr);
+    *time_sec = static_cast<int64_t>(current_time_us_ / kMicrosInSecond);
+    return Status::OK();
+  }
+
+  virtual uint64_t NowSeconds() { return current_time_us_ / kMicrosInSecond; }
+
+  virtual uint64_t NowMicros() override { return current_time_us_; }
+
+  virtual uint64_t NowNanos() override {
+    assert(current_time_us_ <= std::numeric_limits<uint64_t>::max() / 1000);
+    return current_time_us_ * 1000;
+  }
+
+  uint64_t RealNowMicros() { return target_->NowMicros(); }
+
+  void SetCurrentTime(uint64_t time_sec) {
+    assert(time_sec < std::numeric_limits<uint64_t>::max() / kMicrosInSecond);
+    assert(time_sec * kMicrosInSecond >= current_time_us_);
+    current_time_us_ = time_sec * kMicrosInSecond;
+  }
+
+  // It's a fake sleep that just updates the Env current time, which is similar
+  // to `NoSleepEnv.SleepForMicroseconds()` and
+  // `SpecialEnv.MockSleepForMicroseconds()`.
+  // It's also similar to `set_current_time()`, which takes an absolute time in
+  // seconds, vs. this one takes the sleep in microseconds.
+  // Note: Not thread safe.
+  void SleepForMicroseconds(int micros) override {
+    assert(micros >= 0);
+    assert(current_time_us_ + static_cast<uint64_t>(micros) >=
+           current_time_us_);
+    current_time_us_.fetch_add(micros);
+  }
+
+  void MockSleepForSeconds(int seconds) {
+    assert(seconds >= 0);
+    uint64_t micros = static_cast<uint64_t>(seconds) * kMicrosInSecond;
+    assert(current_time_us_ + micros >= current_time_us_);
+    current_time_us_.fetch_add(micros);
+  }
+
+  // TODO: this is a workaround for the different behavior on different platform
+  // for timedwait timeout. Ideally timedwait API should be moved to env.
+  // details: PR #7101.
+  void InstallTimedWaitFixCallback();
+
+ private:
+  std::atomic<uint64_t> current_time_us_{0};
+  static constexpr uint64_t kMicrosInSecond = 1000U * 1000U;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/secondary_cache_test_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/secondary_cache_test_util.h
new file mode 100644
index 0000000000..1cfb454b5a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/secondary_cache_test_util.h
@@ -0,0 +1,119 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <gtest/gtest.h>
+
+#include <functional>
+
+#include "rocksdb/advanced_cache.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace secondary_cache_test_util {
+
+struct TestCreateContext : public Cache::CreateContext {
+  void SetFailCreate(bool fail) { fail_create_ = fail; }
+
+  bool fail_create_ = false;
+};
+
+class WithCacheType : public TestCreateContext {
+ public:
+  WithCacheType() {}
+  virtual ~WithCacheType() {}
+
+  class TestItem {
+   public:
+    TestItem(const char* buf, size_t size) : buf_(new char[size]), size_(size) {
+      memcpy(buf_.get(), buf, size);
+    }
+    ~TestItem() = default;
+
+    char* Buf() { return buf_.get(); }
+    [[nodiscard]] size_t Size() const { return size_; }
+    std::string ToString() { return std::string(Buf(), Size()); }
+
+   private:
+    std::unique_ptr<char[]> buf_;
+    size_t size_;
+  };
+
+  static constexpr auto kLRU = "lru";
+  static constexpr auto kHyperClock = "hyper_clock";
+
+  // For options other than capacity
+  size_t estimated_value_size_ = 1;
+
+  virtual const std::string& Type() = 0;
+
+  std::shared_ptr<Cache> NewCache(
+      size_t capacity,
+      std::function<void(ShardedCacheOptions&)> modify_opts_fn = {}) {
+    const auto& type = Type();
+    if (type == kLRU) {
+      LRUCacheOptions lru_opts;
+      lru_opts.capacity = capacity;
+      lru_opts.hash_seed = 0;  // deterministic tests
+      if (modify_opts_fn) {
+        modify_opts_fn(lru_opts);
+      }
+      return lru_opts.MakeSharedCache();
+    }
+    if (type == kHyperClock) {
+      HyperClockCacheOptions hc_opts{capacity, estimated_value_size_};
+      hc_opts.hash_seed = 0;  // deterministic tests
+      if (modify_opts_fn) {
+        modify_opts_fn(hc_opts);
+      }
+      return hc_opts.MakeSharedCache();
+    }
+    assert(false);
+    return nullptr;
+  }
+
+  std::shared_ptr<Cache> NewCache(
+      size_t capacity, int num_shard_bits, bool strict_capacity_limit,
+      CacheMetadataChargePolicy charge_policy = kDontChargeCacheMetadata) {
+    return NewCache(capacity, [=](ShardedCacheOptions& opts) {
+      opts.num_shard_bits = num_shard_bits;
+      opts.strict_capacity_limit = strict_capacity_limit;
+      opts.metadata_charge_policy = charge_policy;
+    });
+  }
+
+  std::shared_ptr<Cache> NewCache(
+      size_t capacity, int num_shard_bits, bool strict_capacity_limit,
+      std::shared_ptr<SecondaryCache> secondary_cache) {
+    return NewCache(capacity, [=](ShardedCacheOptions& opts) {
+      opts.num_shard_bits = num_shard_bits;
+      opts.strict_capacity_limit = strict_capacity_limit;
+      opts.metadata_charge_policy = kDontChargeCacheMetadata;
+      opts.secondary_cache = secondary_cache;
+    });
+  }
+
+  static const Cache::CacheItemHelper* GetHelper(
+      CacheEntryRole r = CacheEntryRole::kDataBlock,
+      bool secondary_compatible = true, bool fail = false);
+
+  static const Cache::CacheItemHelper* GetHelperFail(
+      CacheEntryRole r = CacheEntryRole::kDataBlock);
+};
+
+class WithCacheTypeParam : public WithCacheType,
+                           public testing::WithParamInterface<std::string> {
+  const std::string& Type() override { return GetParam(); }
+};
+
+constexpr auto kLRU = WithCacheType::kLRU;
+constexpr auto kHyperClock = WithCacheType::kHyperClock;
+
+inline auto GetTestingCacheTypes() {
+  return testing::Values(std::string(kLRU), std::string(kHyperClock));
+}
+
+}  // namespace secondary_cache_test_util
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point.cc
new file mode 100644
index 0000000000..bec02d4f67
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point.cc
@@ -0,0 +1,82 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "test_util/sync_point.h"
+
+#include <fcntl.h>
+
+#include "test_util/sync_point_impl.h"
+
+std::vector<std::string> rocksdb_kill_exclude_prefixes;
+
+#ifndef NDEBUG
+namespace ROCKSDB_NAMESPACE {
+
+SyncPoint* SyncPoint::GetInstance() {
+  static SyncPoint sync_point;
+  return &sync_point;
+}
+
+SyncPoint::SyncPoint() : impl_(new Data) {}
+
+SyncPoint::~SyncPoint() { delete impl_; }
+
+void SyncPoint::LoadDependency(const std::vector<SyncPointPair>& dependencies) {
+  impl_->LoadDependency(dependencies);
+}
+
+void SyncPoint::LoadDependencyAndMarkers(
+    const std::vector<SyncPointPair>& dependencies,
+    const std::vector<SyncPointPair>& markers) {
+  impl_->LoadDependencyAndMarkers(dependencies, markers);
+}
+
+void SyncPoint::SetCallBack(const std::string& point,
+                            const std::function<void(void*)>& callback) {
+  impl_->SetCallBack(point, callback);
+}
+
+void SyncPoint::ClearCallBack(const std::string& point) {
+  impl_->ClearCallBack(point);
+}
+
+void SyncPoint::ClearAllCallBacks() { impl_->ClearAllCallBacks(); }
+
+void SyncPoint::EnableProcessing() { impl_->EnableProcessing(); }
+
+void SyncPoint::DisableProcessing() { impl_->DisableProcessing(); }
+
+void SyncPoint::ClearTrace() { impl_->ClearTrace(); }
+
+void SyncPoint::Process(const Slice& point, void* cb_arg) {
+  impl_->Process(point, cb_arg);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // NDEBUG
+
+namespace ROCKSDB_NAMESPACE {
+void SetupSyncPointsToMockDirectIO() {
+#if !defined(NDEBUG) && !defined(OS_MACOSX) && !defined(OS_WIN) && \
+    !defined(OS_SOLARIS) && !defined(OS_AIX) && !defined(OS_OPENBSD)
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
+      "NewWritableFile:O_DIRECT", [&](void* arg) {
+        int* val = static_cast<int*>(arg);
+        *val &= ~O_DIRECT;
+      });
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
+      "NewRandomAccessFile:O_DIRECT", [&](void* arg) {
+        int* val = static_cast<int*>(arg);
+        *val &= ~O_DIRECT;
+      });
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
+      "NewSequentialFile:O_DIRECT", [&](void* arg) {
+        int* val = static_cast<int*>(arg);
+        *val &= ~O_DIRECT;
+      });
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
+#endif
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point.h
new file mode 100644
index 0000000000..65f1239ec4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point.h
@@ -0,0 +1,180 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <assert.h>
+
+#include <functional>
+#include <mutex>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/slice.h"
+
+#ifdef NDEBUG
+// empty in release build
+#define TEST_KILL_RANDOM_WITH_WEIGHT(kill_point, rocksdb_kill_odds_weight)
+#define TEST_KILL_RANDOM(kill_point)
+#else
+
+namespace ROCKSDB_NAMESPACE {
+
+// To avoid crashing always at some frequently executed codepaths (during
+// kill random test), use this factor to reduce odds
+#define REDUCE_ODDS 2
+#define REDUCE_ODDS2 4
+
+// A class used to pass when a kill point is reached.
+struct KillPoint {
+ public:
+  // This is only set from db_stress.cc and for testing only.
+  // If non-zero, kill at various points in source code with probability 1/this
+  int rocksdb_kill_odds = 0;
+  // If kill point has a prefix on this list, will skip killing.
+  std::vector<std::string> rocksdb_kill_exclude_prefixes;
+  // Kill the process with probability 1/odds for testing.
+  void TestKillRandom(std::string kill_point, int odds,
+                      const std::string& srcfile, int srcline);
+
+  static KillPoint* GetInstance();
+};
+
+#define TEST_KILL_RANDOM_WITH_WEIGHT(kill_point, rocksdb_kill_odds_weight) \
+  {                                                                        \
+    KillPoint::GetInstance()->TestKillRandom(                              \
+        kill_point, rocksdb_kill_odds_weight, __FILE__, __LINE__);         \
+  }
+#define TEST_KILL_RANDOM(kill_point) TEST_KILL_RANDOM_WITH_WEIGHT(kill_point, 1)
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
+
+#ifdef NDEBUG
+#define TEST_SYNC_POINT(x)
+#define TEST_IDX_SYNC_POINT(x, index)
+#define TEST_SYNC_POINT_CALLBACK(x, y)
+#define INIT_SYNC_POINT_SINGLETONS()
+#else
+
+namespace ROCKSDB_NAMESPACE {
+
+// This class provides facility to reproduce race conditions deterministically
+// in unit tests.
+// Developer could specify sync points in the codebase via TEST_SYNC_POINT.
+// Each sync point represents a position in the execution stream of a thread.
+// In the unit test, 'Happens After' relationship among sync points could be
+// setup via SyncPoint::LoadDependency, to reproduce a desired interleave of
+// threads execution.
+// Refer to (DBTest,TransactionLogIteratorRace), for an example use case.
+
+class SyncPoint {
+ public:
+  static SyncPoint* GetInstance();
+
+  SyncPoint(const SyncPoint&) = delete;
+  SyncPoint& operator=(const SyncPoint&) = delete;
+  ~SyncPoint();
+
+  struct SyncPointPair {
+    std::string predecessor;
+    std::string successor;
+  };
+
+  // call once at the beginning of a test to setup the dependency between
+  // sync points
+  void LoadDependency(const std::vector<SyncPointPair>& dependencies);
+
+  // call once at the beginning of a test to setup the dependency between
+  // sync points and setup markers indicating the successor is only enabled
+  // when it is processed on the same thread as the predecessor.
+  // When adding a marker, it implicitly adds a dependency for the marker pair.
+  void LoadDependencyAndMarkers(const std::vector<SyncPointPair>& dependencies,
+                                const std::vector<SyncPointPair>& markers);
+
+  // The argument to the callback is passed through from
+  // TEST_SYNC_POINT_CALLBACK(); nullptr if TEST_SYNC_POINT or
+  // TEST_IDX_SYNC_POINT was used.
+  void SetCallBack(const std::string& point,
+                   const std::function<void(void*)>& callback);
+
+  // Clear callback function by point
+  void ClearCallBack(const std::string& point);
+
+  // Clear all call back functions.
+  void ClearAllCallBacks();
+
+  // enable sync point processing (disabled on startup)
+  void EnableProcessing();
+
+  // disable sync point processing
+  void DisableProcessing();
+
+  // remove the execution trace of all sync points
+  void ClearTrace();
+
+  // triggered by TEST_SYNC_POINT, blocking execution until all predecessors
+  // are executed.
+  // And/or call registered callback function, with argument `cb_arg`
+  void Process(const Slice& point, void* cb_arg = nullptr);
+
+  // template gets length of const string at compile time,
+  //  avoiding strlen() at runtime
+  template <size_t kLen>
+  void Process(const char (&point)[kLen], void* cb_arg = nullptr) {
+    static_assert(kLen > 0, "Must not be empty");
+    assert(point[kLen - 1] == '\0');
+    Process(Slice(point, kLen - 1), cb_arg);
+  }
+
+  // TODO: it might be useful to provide a function that blocks until all
+  // sync points are cleared.
+
+  // We want this to be public so we can
+  // subclass the implementation
+  struct Data;
+
+ private:
+  // Singleton
+  SyncPoint();
+  Data* impl_;
+};
+
+// Sets up sync points to mock direct IO instead of actually issuing direct IO
+// to the file system.
+void SetupSyncPointsToMockDirectIO();
+}  // namespace ROCKSDB_NAMESPACE
+
+// Use TEST_SYNC_POINT to specify sync points inside code base.
+// Sync points can have happens-after dependency on other sync points,
+// configured at runtime via SyncPoint::LoadDependency. This could be
+// utilized to re-produce race conditions between threads.
+// See TransactionLogIteratorRace in db_test.cc for an example use case.
+// TEST_SYNC_POINT is no op in release build.
+#define TEST_SYNC_POINT(x) \
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->Process(x)
+#define TEST_IDX_SYNC_POINT(x, index)                      \
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->Process(x + \
+                                                       std::to_string(index))
+#define TEST_SYNC_POINT_CALLBACK(x, y) \
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->Process(x, y)
+#define INIT_SYNC_POINT_SINGLETONS() \
+  (void)ROCKSDB_NAMESPACE::SyncPoint::GetInstance();
+#endif  // NDEBUG
+
+// Callback sync point for any read IO errors that should be ignored by
+// the fault injection framework
+// Disable in release mode
+#ifdef NDEBUG
+#define IGNORE_STATUS_IF_ERROR(_status_)
+#else
+#define IGNORE_STATUS_IF_ERROR(_status_)            \
+  {                                                 \
+    if (!_status_.ok()) {                           \
+      TEST_SYNC_POINT("FaultInjectionIgnoreError"); \
+    }                                               \
+  }
+#endif  // NDEBUG
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point_impl.cc
new file mode 100644
index 0000000000..2a4bd3ccdf
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point_impl.cc
@@ -0,0 +1,152 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "test_util/sync_point_impl.h"
+
+#ifndef NDEBUG
+namespace ROCKSDB_NAMESPACE {
+KillPoint* KillPoint::GetInstance() {
+  static KillPoint kp;
+  return &kp;
+}
+
+void KillPoint::TestKillRandom(std::string kill_point, int odds_weight,
+                               const std::string& srcfile, int srcline) {
+  if (rocksdb_kill_odds <= 0) {
+    return;
+  }
+  int odds = rocksdb_kill_odds * odds_weight;
+  for (auto& p : rocksdb_kill_exclude_prefixes) {
+    if (kill_point.substr(0, p.length()) == p) {
+      return;
+    }
+  }
+
+  assert(odds > 0);
+  if (odds % 7 == 0) {
+    // class Random uses multiplier 16807, which is 7^5. If odds are
+    // multiplier of 7, there might be limited values generated.
+    odds++;
+  }
+  auto* r = Random::GetTLSInstance();
+  bool crash = r->OneIn(odds);
+  if (crash) {
+    port::Crash(srcfile, srcline);
+  }
+}
+
+void SyncPoint::Data::LoadDependency(
+    const std::vector<SyncPointPair>& dependencies) {
+  std::lock_guard<std::mutex> lock(mutex_);
+  successors_.clear();
+  predecessors_.clear();
+  cleared_points_.clear();
+  for (const auto& dependency : dependencies) {
+    successors_[dependency.predecessor].push_back(dependency.successor);
+    predecessors_[dependency.successor].push_back(dependency.predecessor);
+    point_filter_.Add(dependency.successor);
+    point_filter_.Add(dependency.predecessor);
+  }
+  cv_.notify_all();
+}
+
+void SyncPoint::Data::LoadDependencyAndMarkers(
+    const std::vector<SyncPointPair>& dependencies,
+    const std::vector<SyncPointPair>& markers) {
+  std::lock_guard<std::mutex> lock(mutex_);
+  successors_.clear();
+  predecessors_.clear();
+  cleared_points_.clear();
+  markers_.clear();
+  marked_thread_id_.clear();
+  for (const auto& dependency : dependencies) {
+    successors_[dependency.predecessor].push_back(dependency.successor);
+    predecessors_[dependency.successor].push_back(dependency.predecessor);
+    point_filter_.Add(dependency.successor);
+    point_filter_.Add(dependency.predecessor);
+  }
+  for (const auto& marker : markers) {
+    successors_[marker.predecessor].push_back(marker.successor);
+    predecessors_[marker.successor].push_back(marker.predecessor);
+    markers_[marker.predecessor].push_back(marker.successor);
+    point_filter_.Add(marker.predecessor);
+    point_filter_.Add(marker.successor);
+  }
+  cv_.notify_all();
+}
+
+bool SyncPoint::Data::PredecessorsAllCleared(const std::string& point) {
+  for (const auto& pred : predecessors_[point]) {
+    if (cleared_points_.count(pred) == 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+void SyncPoint::Data::ClearCallBack(const std::string& point) {
+  std::unique_lock<std::mutex> lock(mutex_);
+  while (num_callbacks_running_ > 0) {
+    cv_.wait(lock);
+  }
+  callbacks_.erase(point);
+}
+
+void SyncPoint::Data::ClearAllCallBacks() {
+  std::unique_lock<std::mutex> lock(mutex_);
+  while (num_callbacks_running_ > 0) {
+    cv_.wait(lock);
+  }
+  callbacks_.clear();
+}
+
+void SyncPoint::Data::Process(const Slice& point, void* cb_arg) {
+  if (!enabled_) {
+    return;
+  }
+
+  // Use a filter to prevent mutex lock if possible.
+  if (!point_filter_.MayContain(point)) {
+    return;
+  }
+
+  // Must convert to std::string for remaining work.  Take
+  //  heap hit.
+  std::string point_string(point.ToString());
+  std::unique_lock<std::mutex> lock(mutex_);
+  auto thread_id = std::this_thread::get_id();
+
+  auto marker_iter = markers_.find(point_string);
+  if (marker_iter != markers_.end()) {
+    for (auto& marked_point : marker_iter->second) {
+      marked_thread_id_.emplace(marked_point, thread_id);
+      point_filter_.Add(marked_point);
+    }
+  }
+
+  if (DisabledByMarker(point_string, thread_id)) {
+    return;
+  }
+
+  while (!PredecessorsAllCleared(point_string)) {
+    cv_.wait(lock);
+    if (DisabledByMarker(point_string, thread_id)) {
+      return;
+    }
+  }
+
+  auto callback_pair = callbacks_.find(point_string);
+  if (callback_pair != callbacks_.end()) {
+    num_callbacks_running_++;
+    mutex_.unlock();
+    callback_pair->second(cb_arg);
+    mutex_.lock();
+    num_callbacks_running_--;
+  }
+  cleared_points_.insert(point_string);
+  cv_.notify_all();
+}
+}  // namespace ROCKSDB_NAMESPACE
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point_impl.h
new file mode 100644
index 0000000000..64cc0445e0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/sync_point_impl.h
@@ -0,0 +1,96 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include <assert.h>
+
+#include <atomic>
+#include <condition_variable>
+#include <functional>
+#include <mutex>
+#include <string>
+#include <thread>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "memory/concurrent_arena.h"
+#include "port/port.h"
+#include "test_util/sync_point.h"
+#include "util/dynamic_bloom.h"
+#include "util/random.h"
+
+#pragma once
+
+#ifndef NDEBUG
+namespace ROCKSDB_NAMESPACE {
+// A hacky allocator for single use.
+// Arena depends on SyncPoint and create circular dependency.
+class SingleAllocator : public Allocator {
+ public:
+  char* Allocate(size_t) override {
+    assert(false);
+    return nullptr;
+  }
+  char* AllocateAligned(size_t bytes, size_t, Logger*) override {
+    buf_.resize(bytes);
+    return const_cast<char*>(buf_.data());
+  }
+  size_t BlockSize() const override {
+    assert(false);
+    return 0;
+  }
+
+ private:
+  std::string buf_;
+};
+
+struct SyncPoint::Data {
+  Data() : point_filter_(&alloc_, /*total_bits=*/8192), enabled_(false) {}
+  // Enable proper deletion by subclasses
+  virtual ~Data() {}
+  // successor/predecessor map loaded from LoadDependency
+  std::unordered_map<std::string, std::vector<std::string>> successors_;
+  std::unordered_map<std::string, std::vector<std::string>> predecessors_;
+  std::unordered_map<std::string, std::function<void(void*)>> callbacks_;
+  std::unordered_map<std::string, std::vector<std::string>> markers_;
+  std::unordered_map<std::string, std::thread::id> marked_thread_id_;
+
+  std::mutex mutex_;
+  std::condition_variable cv_;
+  // sync points that have been passed through
+  std::unordered_set<std::string> cleared_points_;
+  SingleAllocator alloc_;
+  // A filter before holding mutex to speed up process.
+  DynamicBloom point_filter_;
+  std::atomic<bool> enabled_;
+  int num_callbacks_running_ = 0;
+
+  void LoadDependency(const std::vector<SyncPointPair>& dependencies);
+  void LoadDependencyAndMarkers(const std::vector<SyncPointPair>& dependencies,
+                                const std::vector<SyncPointPair>& markers);
+  bool PredecessorsAllCleared(const std::string& point);
+  void SetCallBack(const std::string& point,
+                   const std::function<void(void*)>& callback) {
+    std::lock_guard<std::mutex> lock(mutex_);
+    callbacks_[point] = callback;
+    point_filter_.Add(point);
+  }
+
+  void ClearCallBack(const std::string& point);
+  void ClearAllCallBacks();
+  void EnableProcessing() { enabled_ = true; }
+  void DisableProcessing() { enabled_ = false; }
+  void ClearTrace() {
+    std::lock_guard<std::mutex> lock(mutex_);
+    cleared_points_.clear();
+  }
+  bool DisabledByMarker(const std::string& point, std::thread::id thread_id) {
+    auto marked_point_iter = marked_thread_id_.find(point);
+    return marked_point_iter != marked_thread_id_.end() &&
+           thread_id != marked_point_iter->second;
+  }
+  void Process(const Slice& point, void* cb_arg);
+};
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // NDEBUG
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/testharness.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/testharness.h
new file mode 100644
index 0000000000..d8b6c9679c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/testharness.h
@@ -0,0 +1,124 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#ifdef OS_AIX
+#include "gtest/gtest.h"
+#else
+#include <gtest/gtest.h>
+#endif
+
+// A "skipped" test has a specific meaning in Facebook infrastructure: the
+// test is in good shape and should be run, but something about the
+// compilation or execution environment means the test cannot be run.
+// Specifically, there is a hole in intended testing if any
+// parameterization of a test (e.g. Foo/FooTest.Bar/42) is skipped for all
+// tested build configurations/platforms/etc.
+//
+// If GTEST_SKIP is available, use it. Otherwise, define skip as success.
+//
+// The GTEST macros do not seem to print the message, even with -verbose,
+// so these print to stderr. Note that these do not exit the test themselves;
+// calling code should 'return' or similar from the test.
+#ifdef GTEST_SKIP_
+#define ROCKSDB_GTEST_SKIP(m)          \
+  do {                                 \
+    fputs("SKIPPED: " m "\n", stderr); \
+    GTEST_SKIP_(m);                    \
+  } while (false) /* user ; */
+#else
+#define ROCKSDB_GTEST_SKIP(m)          \
+  do {                                 \
+    fputs("SKIPPED: " m "\n", stderr); \
+    GTEST_SUCCESS_("SKIPPED: " m);     \
+  } while (false) /* user ; */
+#endif
+
+// We add "bypass" as an alternative to ROCKSDB_GTEST_SKIP that is allowed to
+// be a permanent condition, e.g. for intentionally omitting or disabling some
+// parameterizations for some tests. (Use _DISABLED at the end of the test
+// name to disable an entire test.)
+#define ROCKSDB_GTEST_BYPASS(m)         \
+  do {                                  \
+    fputs("BYPASSED: " m "\n", stderr); \
+    GTEST_SUCCESS_("BYPASSED: " m);     \
+  } while (false) /* user ; */
+
+// Avoid "loss of precision" warnings when passing in 64-bit integers
+#define EXPECT_NEAR2(val1, val2, abs_error)                         \
+  EXPECT_NEAR(static_cast<double>(val1), static_cast<double>(val2), \
+              static_cast<double>(abs_error))
+
+#include <string>
+
+#include "port/stack_trace.h"
+#include "rocksdb/env.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace test {
+
+// Return the directory to use for temporary storage.
+std::string TmpDir(Env* env = Env::Default());
+
+// A path unique within the thread
+std::string PerThreadDBPath(std::string name);
+std::string PerThreadDBPath(Env* env, std::string name);
+std::string PerThreadDBPath(std::string dir, std::string name);
+
+// Return a randomization seed for this run.  Typically returns the
+// same number on repeated invocations of this binary, but automated
+// runs may be able to vary the seed.
+int RandomSeed();
+
+::testing::AssertionResult AssertStatus(const char* s_expr, const Status& s);
+
+#define ASSERT_OK(s) \
+  ASSERT_PRED_FORMAT1(ROCKSDB_NAMESPACE::test::AssertStatus, s)
+#define ASSERT_NOK(s) ASSERT_FALSE((s).ok())
+#define EXPECT_OK(s) \
+  EXPECT_PRED_FORMAT1(ROCKSDB_NAMESPACE::test::AssertStatus, s)
+#define EXPECT_NOK(s) EXPECT_FALSE((s).ok())
+
+// Useful for testing
+// * No need to deal with Status like in Regex public API
+// * No triggering lint reports on use of std::regex in tests
+// * Available in LITE (unlike public API)
+class TestRegex {
+ public:
+  // These throw on bad pattern
+  /*implicit*/ TestRegex(const std::string& pattern);
+  /*implicit*/ TestRegex(const char* pattern);
+
+  // Checks that the whole of str is matched by this regex
+  bool Matches(const std::string& str) const;
+
+  const std::string& GetPattern() const;
+
+ private:
+  class Impl;
+  std::shared_ptr<Impl> impl_;  // shared_ptr for simple implementation
+  std::string pattern_;
+};
+
+::testing::AssertionResult AssertMatchesRegex(const char* str_expr,
+                                              const char* pattern_expr,
+                                              const std::string& str,
+                                              const TestRegex& pattern);
+
+#define ASSERT_MATCHES_REGEX(str, pattern) \
+  ASSERT_PRED_FORMAT2(ROCKSDB_NAMESPACE::test::AssertMatchesRegex, str, pattern)
+#define EXPECT_MATCHES_REGEX(str, pattern) \
+  EXPECT_PRED_FORMAT2(ROCKSDB_NAMESPACE::test::AssertMatchesRegex, str, pattern)
+
+}  // namespace test
+
+using test::TestRegex;
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/testutil.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/testutil.h
new file mode 100644
index 0000000000..686817c445
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/testutil.h
@@ -0,0 +1,860 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <algorithm>
+#include <deque>
+#include <string>
+#include <vector>
+
+#include "env/composite_env_wrapper.h"
+#include "file/writable_file_writer.h"
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/table.h"
+#include "table/internal_iterator.h"
+#include "util/mutexlock.h"
+
+#ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
+extern "C" {
+void RegisterCustomObjects(int argc, char** argv);
+}
+#else
+void RegisterCustomObjects(int argc, char** argv);
+#endif  // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
+
+namespace ROCKSDB_NAMESPACE {
+class FileSystem;
+class MemTableRepFactory;
+class ObjectLibrary;
+class Random;
+class SequentialFile;
+class SequentialFileReader;
+
+namespace test {
+
+extern const uint32_t kDefaultFormatVersion;
+extern const std::set<uint32_t> kFooterFormatVersionsToTest;
+
+// Return a random key with the specified length that may contain interesting
+// characters (e.g. \x00, \xff, etc.).
+enum RandomKeyType : char { RANDOM, LARGEST, SMALLEST, MIDDLE };
+extern std::string RandomKey(Random* rnd, int len,
+                             RandomKeyType type = RandomKeyType::RANDOM);
+
+// Store in *dst a string of length "len" that will compress to
+// "N*compressed_fraction" bytes and return a Slice that references
+// the generated data.
+extern Slice CompressibleString(Random* rnd, double compressed_fraction,
+                                int len, std::string* dst);
+
+#ifndef NDEBUG
+// An internal comparator that just forward comparing results from the
+// user comparator in it. Can be used to test entities that have no dependency
+// on internal key structure but consumes InternalKeyComparator, like
+// BlockBasedTable.
+class PlainInternalKeyComparator : public InternalKeyComparator {
+ public:
+  explicit PlainInternalKeyComparator(const Comparator* c)
+      : InternalKeyComparator(c) {}
+
+  virtual ~PlainInternalKeyComparator() {}
+
+  virtual int Compare(const Slice& a, const Slice& b) const override {
+    return user_comparator()->Compare(a, b);
+  }
+};
+#endif
+
+// A test comparator which compare two strings in this way:
+// (1) first compare prefix of 8 bytes in alphabet order,
+// (2) if two strings share the same prefix, sort the other part of the string
+//     in the reverse alphabet order.
+// This helps simulate the case of compounded key of [entity][timestamp] and
+// latest timestamp first.
+class SimpleSuffixReverseComparator : public Comparator {
+ public:
+  SimpleSuffixReverseComparator() {}
+  static const char* kClassName() { return "SimpleSuffixReverseComparator"; }
+  virtual const char* Name() const override { return kClassName(); }
+
+  virtual int Compare(const Slice& a, const Slice& b) const override {
+    Slice prefix_a = Slice(a.data(), 8);
+    Slice prefix_b = Slice(b.data(), 8);
+    int prefix_comp = prefix_a.compare(prefix_b);
+    if (prefix_comp != 0) {
+      return prefix_comp;
+    } else {
+      Slice suffix_a = Slice(a.data() + 8, a.size() - 8);
+      Slice suffix_b = Slice(b.data() + 8, b.size() - 8);
+      return -(suffix_a.compare(suffix_b));
+    }
+  }
+  virtual void FindShortestSeparator(std::string* /*start*/,
+                                     const Slice& /*limit*/) const override {}
+
+  virtual void FindShortSuccessor(std::string* /*key*/) const override {}
+};
+
+// Returns a user key comparator that can be used for comparing two uint64_t
+// slices. Instead of comparing slices byte-wise, it compares all the 8 bytes
+// at once. Assumes same endian-ness is used though the database's lifetime.
+// Symantics of comparison would differ from Bytewise comparator in little
+// endian machines.
+extern const Comparator* Uint64Comparator();
+
+// A wrapper api for getting the ComparatorWithU64Ts<BytewiseComparator>
+extern const Comparator* BytewiseComparatorWithU64TsWrapper();
+
+class StringSink : public FSWritableFile {
+ public:
+  std::string contents_;
+
+  explicit StringSink(Slice* reader_contents = nullptr)
+      : FSWritableFile(),
+        contents_(""),
+        reader_contents_(reader_contents),
+        last_flush_(0) {
+    if (reader_contents_ != nullptr) {
+      *reader_contents_ = Slice(contents_.data(), 0);
+    }
+  }
+
+  const std::string& contents() const { return contents_; }
+
+  IOStatus Truncate(uint64_t size, const IOOptions& /*opts*/,
+                    IODebugContext* /*dbg*/) override {
+    contents_.resize(static_cast<size_t>(size));
+    return IOStatus::OK();
+  }
+  IOStatus Close(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override {
+    return IOStatus::OK();
+  }
+  IOStatus Flush(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override {
+    if (reader_contents_ != nullptr) {
+      assert(reader_contents_->size() <= last_flush_);
+      size_t offset = last_flush_ - reader_contents_->size();
+      *reader_contents_ =
+          Slice(contents_.data() + offset, contents_.size() - offset);
+      last_flush_ = contents_.size();
+    }
+
+    return IOStatus::OK();
+  }
+  IOStatus Sync(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override {
+    return IOStatus::OK();
+  }
+
+  using FSWritableFile::Append;
+  IOStatus Append(const Slice& slice, const IOOptions& /*opts*/,
+                  IODebugContext* /*dbg*/) override {
+    contents_.append(slice.data(), slice.size());
+    return IOStatus::OK();
+  }
+  void Drop(size_t bytes) {
+    if (reader_contents_ != nullptr) {
+      contents_.resize(contents_.size() - bytes);
+      *reader_contents_ =
+          Slice(reader_contents_->data(), reader_contents_->size() - bytes);
+      last_flush_ = contents_.size();
+    }
+  }
+
+ private:
+  Slice* reader_contents_;
+  size_t last_flush_;
+};
+
+// A wrapper around a StringSink to give it a RandomRWFile interface
+class RandomRWStringSink : public FSRandomRWFile {
+ public:
+  explicit RandomRWStringSink(StringSink* ss) : ss_(ss) {}
+
+  IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& /*opts*/,
+                 IODebugContext* /*dbg*/) override {
+    if (offset + data.size() > ss_->contents_.size()) {
+      ss_->contents_.resize(static_cast<size_t>(offset) + data.size(), '\0');
+    }
+
+    char* pos = const_cast<char*>(ss_->contents_.data() + offset);
+    memcpy(pos, data.data(), data.size());
+    return IOStatus::OK();
+  }
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& /*opts*/,
+                Slice* result, char* /*scratch*/,
+                IODebugContext* /*dbg*/) const override {
+    *result = Slice(nullptr, 0);
+    if (offset < ss_->contents_.size()) {
+      size_t str_res_sz =
+          std::min(static_cast<size_t>(ss_->contents_.size() - offset), n);
+      *result = Slice(ss_->contents_.data() + offset, str_res_sz);
+    }
+    return IOStatus::OK();
+  }
+
+  IOStatus Flush(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override {
+    return IOStatus::OK();
+  }
+
+  IOStatus Sync(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override {
+    return IOStatus::OK();
+  }
+
+  IOStatus Close(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override {
+    return IOStatus::OK();
+  }
+
+  const std::string& contents() const { return ss_->contents(); }
+
+ private:
+  StringSink* ss_;
+};
+
+// Like StringSink, this writes into a string.  Unlink StringSink, it
+// has some initial content and overwrites it, just like a recycled
+// log file.
+class OverwritingStringSink : public FSWritableFile {
+ public:
+  explicit OverwritingStringSink(Slice* reader_contents)
+      : FSWritableFile(),
+        contents_(""),
+        reader_contents_(reader_contents),
+        last_flush_(0) {}
+
+  const std::string& contents() const { return contents_; }
+
+  IOStatus Truncate(uint64_t size, const IOOptions& /*opts*/,
+                    IODebugContext* /*dbg*/) override {
+    contents_.resize(static_cast<size_t>(size));
+    return IOStatus::OK();
+  }
+  IOStatus Close(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override {
+    return IOStatus::OK();
+  }
+  IOStatus Flush(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override {
+    if (last_flush_ < contents_.size()) {
+      assert(reader_contents_->size() >= contents_.size());
+      memcpy((char*)reader_contents_->data() + last_flush_,
+             contents_.data() + last_flush_, contents_.size() - last_flush_);
+      last_flush_ = contents_.size();
+    }
+    return IOStatus::OK();
+  }
+  IOStatus Sync(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override {
+    return IOStatus::OK();
+  }
+
+  using FSWritableFile::Append;
+  IOStatus Append(const Slice& slice, const IOOptions& /*opts*/,
+                  IODebugContext* /*dbg*/) override {
+    contents_.append(slice.data(), slice.size());
+    return IOStatus::OK();
+  }
+  void Drop(size_t bytes) {
+    contents_.resize(contents_.size() - bytes);
+    if (last_flush_ > contents_.size()) last_flush_ = contents_.size();
+  }
+
+ private:
+  std::string contents_;
+  Slice* reader_contents_;
+  size_t last_flush_;
+};
+
+class StringSource : public FSRandomAccessFile {
+ public:
+  explicit StringSource(const Slice& contents, uint64_t uniq_id = 0,
+                        bool mmap = false)
+      : contents_(contents.data(), contents.size()),
+        uniq_id_(uniq_id),
+        mmap_(mmap),
+        total_reads_(0) {}
+
+  virtual ~StringSource() {}
+
+  uint64_t Size() const { return contents_.size(); }
+
+  IOStatus Prefetch(uint64_t /*offset*/, size_t /*n*/,
+                    const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override {
+    // If we are using mmap_, it is equivalent to performing a prefetch
+    if (mmap_) {
+      return IOStatus::OK();
+    } else {
+      return IOStatus::NotSupported("Prefetch not supported");
+    }
+  }
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& /*opts*/,
+                Slice* result, char* scratch,
+                IODebugContext* /*dbg*/) const override {
+    total_reads_++;
+    if (offset > contents_.size()) {
+      return IOStatus::InvalidArgument("invalid Read offset");
+    }
+    if (offset + n > contents_.size()) {
+      n = contents_.size() - static_cast<size_t>(offset);
+    }
+    if (!mmap_) {
+      memcpy(scratch, &contents_[static_cast<size_t>(offset)], n);
+      *result = Slice(scratch, n);
+    } else {
+      *result = Slice(&contents_[static_cast<size_t>(offset)], n);
+    }
+    return IOStatus::OK();
+  }
+
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    if (max_size < 20) {
+      return 0;
+    }
+
+    char* rid = id;
+    rid = EncodeVarint64(rid, uniq_id_);
+    rid = EncodeVarint64(rid, 0);
+    return static_cast<size_t>(rid - id);
+  }
+
+  int total_reads() const { return total_reads_; }
+
+  void set_total_reads(int tr) { total_reads_ = tr; }
+
+ private:
+  std::string contents_;
+  uint64_t uniq_id_;
+  bool mmap_;
+  mutable int total_reads_;
+};
+
+class NullLogger : public Logger {
+ public:
+  using Logger::Logv;
+  virtual void Logv(const char* /*format*/, va_list /*ap*/) override {}
+  virtual size_t GetLogFileSize() const override { return 0; }
+};
+
+// Corrupts key by changing the type
+extern void CorruptKeyType(InternalKey* ikey);
+
+extern std::string KeyStr(const std::string& user_key,
+                          const SequenceNumber& seq, const ValueType& t,
+                          bool corrupt = false);
+
+extern std::string KeyStr(uint64_t ts, const std::string& user_key,
+                          const SequenceNumber& seq, const ValueType& t,
+                          bool corrupt = false);
+
+class SleepingBackgroundTask {
+ public:
+  SleepingBackgroundTask()
+      : bg_cv_(&mutex_),
+        should_sleep_(true),
+        done_with_sleep_(false),
+        sleeping_(false) {}
+
+  ~SleepingBackgroundTask() {
+    MutexLock l(&mutex_);
+    should_sleep_ = false;
+    while (sleeping_) {
+      assert(!should_sleep_);
+      bg_cv_.SignalAll();
+      bg_cv_.Wait();
+    }
+  }
+
+  bool IsSleeping() {
+    MutexLock l(&mutex_);
+    return sleeping_;
+  }
+  void DoSleep() {
+    MutexLock l(&mutex_);
+    sleeping_ = true;
+    bg_cv_.SignalAll();
+    while (should_sleep_) {
+      bg_cv_.Wait();
+    }
+    sleeping_ = false;
+    done_with_sleep_ = true;
+    bg_cv_.SignalAll();
+  }
+  void WaitUntilSleeping() {
+    MutexLock l(&mutex_);
+    while (!sleeping_ || !should_sleep_) {
+      bg_cv_.Wait();
+    }
+  }
+  // Waits for the status to change to sleeping,
+  // otherwise times out.
+  // wait_time is in microseconds.
+  // Returns true when times out, false otherwise.
+  bool TimedWaitUntilSleeping(uint64_t wait_time);
+
+  void WakeUp() {
+    MutexLock l(&mutex_);
+    should_sleep_ = false;
+    bg_cv_.SignalAll();
+  }
+  void WaitUntilDone() {
+    MutexLock l(&mutex_);
+    while (!done_with_sleep_) {
+      bg_cv_.Wait();
+    }
+  }
+  // Similar to TimedWaitUntilSleeping.
+  // Waits until the task is done.
+  bool TimedWaitUntilDone(uint64_t wait_time);
+
+  bool WokenUp() {
+    MutexLock l(&mutex_);
+    return should_sleep_ == false;
+  }
+
+  void Reset() {
+    MutexLock l(&mutex_);
+    should_sleep_ = true;
+    done_with_sleep_ = false;
+  }
+
+  static void DoSleepTask(void* arg) {
+    reinterpret_cast<SleepingBackgroundTask*>(arg)->DoSleep();
+  }
+
+ private:
+  port::Mutex mutex_;
+  port::CondVar bg_cv_;  // Signalled when background work finishes
+  bool should_sleep_;
+  bool done_with_sleep_;
+  bool sleeping_;
+};
+
+// Filters merge operands and values that are equal to `num`.
+class FilterNumber : public CompactionFilter {
+ public:
+  explicit FilterNumber(uint64_t num) : num_(num) {}
+
+  std::string last_merge_operand_key() { return last_merge_operand_key_; }
+
+  bool Filter(int /*level*/, const ROCKSDB_NAMESPACE::Slice& /*key*/,
+              const ROCKSDB_NAMESPACE::Slice& value, std::string* /*new_value*/,
+              bool* /*value_changed*/) const override {
+    if (value.size() == sizeof(uint64_t)) {
+      return num_ == DecodeFixed64(value.data());
+    }
+    return true;
+  }
+
+  bool FilterMergeOperand(
+      int /*level*/, const ROCKSDB_NAMESPACE::Slice& key,
+      const ROCKSDB_NAMESPACE::Slice& value) const override {
+    last_merge_operand_key_ = key.ToString();
+    if (value.size() == sizeof(uint64_t)) {
+      return num_ == DecodeFixed64(value.data());
+    }
+    return true;
+  }
+
+  const char* Name() const override { return "FilterBadMergeOperand"; }
+
+ private:
+  mutable std::string last_merge_operand_key_;
+  uint64_t num_;
+};
+
+inline std::string EncodeInt(uint64_t x) {
+  std::string result;
+  PutFixed64(&result, x);
+  return result;
+}
+
+class SeqStringSource : public FSSequentialFile {
+ public:
+  SeqStringSource(const std::string& data, std::atomic<int>* read_count)
+      : data_(data), offset_(0), read_count_(read_count) {}
+  ~SeqStringSource() override {}
+  IOStatus Read(size_t n, const IOOptions& /*opts*/, Slice* result,
+                char* scratch, IODebugContext* /*dbg*/) override {
+    std::string output;
+    if (offset_ < data_.size()) {
+      n = std::min(data_.size() - offset_, n);
+      memcpy(scratch, data_.data() + offset_, n);
+      offset_ += n;
+      *result = Slice(scratch, n);
+    } else {
+      return IOStatus::InvalidArgument(
+          "Attempt to read when it already reached eof.");
+    }
+    (*read_count_)++;
+    return IOStatus::OK();
+  }
+
+  IOStatus Skip(uint64_t n) override {
+    if (offset_ >= data_.size()) {
+      return IOStatus::InvalidArgument(
+          "Attempt to read when it already reached eof.");
+    }
+    // TODO(yhchiang): Currently doesn't handle the overflow case.
+    offset_ += static_cast<size_t>(n);
+    return IOStatus::OK();
+  }
+
+ private:
+  std::string data_;
+  size_t offset_;
+  std::atomic<int>* read_count_;
+};
+
+class StringFS : public FileSystemWrapper {
+ public:
+  class StringSink : public FSWritableFile {
+   public:
+    explicit StringSink(std::string* contents)
+        : FSWritableFile(), contents_(contents) {}
+    IOStatus Truncate(uint64_t size, const IOOptions& /*opts*/,
+                      IODebugContext* /*dbg*/) override {
+      contents_->resize(static_cast<size_t>(size));
+      return IOStatus::OK();
+    }
+    IOStatus Close(const IOOptions& /*opts*/,
+                   IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    IOStatus Flush(const IOOptions& /*opts*/,
+                   IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    IOStatus Sync(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+
+    using FSWritableFile::Append;
+    IOStatus Append(const Slice& slice, const IOOptions& /*opts*/,
+                    IODebugContext* /*dbg*/) override {
+      contents_->append(slice.data(), slice.size());
+      return IOStatus::OK();
+    }
+
+   private:
+    std::string* contents_;
+  };
+
+  explicit StringFS(const std::shared_ptr<FileSystem>& t)
+      : FileSystemWrapper(t) {}
+  ~StringFS() override {}
+
+  static const char* kClassName() { return "StringFS"; }
+  const char* Name() const override { return kClassName(); }
+
+  const std::string& GetContent(const std::string& f) { return files_[f]; }
+
+  const IOStatus WriteToNewFile(const std::string& file_name,
+                                const std::string& content) {
+    std::unique_ptr<FSWritableFile> r;
+    FileOptions file_opts;
+    IOOptions io_opts;
+
+    auto s = NewWritableFile(file_name, file_opts, &r, nullptr);
+    if (s.ok()) {
+      s = r->Append(content, io_opts, nullptr);
+    }
+    if (s.ok()) {
+      s = r->Flush(io_opts, nullptr);
+    }
+    if (s.ok()) {
+      s = r->Close(io_opts, nullptr);
+    }
+    assert(!s.ok() || files_[file_name] == content);
+    return s;
+  }
+
+  // The following text is boilerplate that forwards all methods to target()
+  IOStatus NewSequentialFile(const std::string& f,
+                             const FileOptions& /*options*/,
+                             std::unique_ptr<FSSequentialFile>* r,
+                             IODebugContext* /*dbg*/) override {
+    auto iter = files_.find(f);
+    if (iter == files_.end()) {
+      return IOStatus::NotFound("The specified file does not exist", f);
+    }
+    r->reset(new SeqStringSource(iter->second, &num_seq_file_read_));
+    return IOStatus::OK();
+  }
+
+  IOStatus NewRandomAccessFile(const std::string& /*f*/,
+                               const FileOptions& /*options*/,
+                               std::unique_ptr<FSRandomAccessFile>* /*r*/,
+                               IODebugContext* /*dbg*/) override {
+    return IOStatus::NotSupported();
+  }
+
+  IOStatus NewWritableFile(const std::string& f, const FileOptions& /*options*/,
+                           std::unique_ptr<FSWritableFile>* r,
+                           IODebugContext* /*dbg*/) override {
+    auto iter = files_.find(f);
+    if (iter != files_.end()) {
+      return IOStatus::IOError("The specified file already exists", f);
+    }
+    r->reset(new StringSink(&files_[f]));
+    return IOStatus::OK();
+  }
+  IOStatus NewDirectory(const std::string& /*name*/,
+                        const IOOptions& /*options*/,
+                        std::unique_ptr<FSDirectory>* /*result*/,
+                        IODebugContext* /*dbg*/) override {
+    return IOStatus::NotSupported();
+  }
+
+  IOStatus FileExists(const std::string& f, const IOOptions& /*options*/,
+                      IODebugContext* /*dbg*/) override {
+    if (files_.find(f) == files_.end()) {
+      return IOStatus::NotFound();
+    }
+    return IOStatus::OK();
+  }
+
+  IOStatus GetChildren(const std::string& /*dir*/, const IOOptions& /*options*/,
+                       std::vector<std::string>* /*r*/,
+                       IODebugContext* /*dbg*/) override {
+    return IOStatus::NotSupported();
+  }
+
+  IOStatus DeleteFile(const std::string& f, const IOOptions& /*options*/,
+                      IODebugContext* /*dbg*/) override {
+    files_.erase(f);
+    return IOStatus::OK();
+  }
+
+  IOStatus CreateDir(const std::string& /*d*/, const IOOptions& /*options*/,
+                     IODebugContext* /*dbg*/) override {
+    return IOStatus::NotSupported();
+  }
+
+  IOStatus CreateDirIfMissing(const std::string& /*d*/,
+                              const IOOptions& /*options*/,
+                              IODebugContext* /*dbg*/) override {
+    return IOStatus::NotSupported();
+  }
+
+  IOStatus DeleteDir(const std::string& /*d*/, const IOOptions& /*options*/,
+                     IODebugContext* /*dbg*/) override {
+    return IOStatus::NotSupported();
+  }
+
+  IOStatus GetFileSize(const std::string& f, const IOOptions& /*options*/,
+                       uint64_t* s, IODebugContext* /*dbg*/) override {
+    auto iter = files_.find(f);
+    if (iter == files_.end()) {
+      return IOStatus::NotFound("The specified file does not exist:", f);
+    }
+    *s = iter->second.size();
+    return IOStatus::OK();
+  }
+
+  IOStatus GetFileModificationTime(const std::string& /*fname*/,
+                                   const IOOptions& /*options*/,
+                                   uint64_t* /*file_mtime*/,
+                                   IODebugContext* /*dbg*/) override {
+    return IOStatus::NotSupported();
+  }
+
+  IOStatus RenameFile(const std::string& /*s*/, const std::string& /*t*/,
+                      const IOOptions& /*options*/,
+                      IODebugContext* /*dbg*/) override {
+    return IOStatus::NotSupported();
+  }
+
+  IOStatus LinkFile(const std::string& /*s*/, const std::string& /*t*/,
+                    const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override {
+    return IOStatus::NotSupported();
+  }
+
+  IOStatus LockFile(const std::string& /*f*/, const IOOptions& /*options*/,
+                    FileLock** /*l*/, IODebugContext* /*dbg*/) override {
+    return IOStatus::NotSupported();
+  }
+
+  IOStatus UnlockFile(FileLock* /*l*/, const IOOptions& /*options*/,
+                      IODebugContext* /*dbg*/) override {
+    return IOStatus::NotSupported();
+  }
+
+  std::atomic<int> num_seq_file_read_;
+
+ protected:
+  std::unordered_map<std::string, std::string> files_;
+};
+
+// Randomly initialize the given DBOptions
+void RandomInitDBOptions(DBOptions* db_opt, Random* rnd);
+
+// Randomly initialize the given ColumnFamilyOptions
+// Note that the caller is responsible for releasing non-null
+// cf_opt->compaction_filter.
+void RandomInitCFOptions(ColumnFamilyOptions* cf_opt, DBOptions&, Random* rnd);
+
+// A dummy merge operator which can change its name
+class ChanglingMergeOperator : public MergeOperator {
+ public:
+  explicit ChanglingMergeOperator(const std::string& name)
+      : name_(name + "MergeOperator") {}
+  ~ChanglingMergeOperator() {}
+
+  void SetName(const std::string& name) { name_ = name; }
+
+  virtual bool FullMergeV2(const MergeOperationInput& /*merge_in*/,
+                           MergeOperationOutput* /*merge_out*/) const override {
+    return false;
+  }
+  virtual bool PartialMergeMulti(const Slice& /*key*/,
+                                 const std::deque<Slice>& /*operand_list*/,
+                                 std::string* /*new_value*/,
+                                 Logger* /*logger*/) const override {
+    return false;
+  }
+  static const char* kClassName() { return "ChanglingMergeOperator"; }
+  const char* NickName() const override { return kNickName(); }
+  static const char* kNickName() { return "Changling"; }
+  bool IsInstanceOf(const std::string& id) const override {
+    if (id == kClassName()) {
+      return true;
+    } else {
+      return MergeOperator::IsInstanceOf(id);
+    }
+  }
+
+  virtual const char* Name() const override { return name_.c_str(); }
+
+ protected:
+  std::string name_;
+};
+
+// Returns a dummy merge operator with random name.
+MergeOperator* RandomMergeOperator(Random* rnd);
+
+// A dummy compaction filter which can change its name
+class ChanglingCompactionFilter : public CompactionFilter {
+ public:
+  explicit ChanglingCompactionFilter(const std::string& name)
+      : name_(name + "CompactionFilter") {}
+  ~ChanglingCompactionFilter() {}
+
+  void SetName(const std::string& name) { name_ = name; }
+
+  bool Filter(int /*level*/, const Slice& /*key*/,
+              const Slice& /*existing_value*/, std::string* /*new_value*/,
+              bool* /*value_changed*/) const override {
+    return false;
+  }
+
+  static const char* kClassName() { return "ChanglingCompactionFilter"; }
+  const char* NickName() const override { return kNickName(); }
+  static const char* kNickName() { return "Changling"; }
+
+  bool IsInstanceOf(const std::string& id) const override {
+    if (id == kClassName()) {
+      return true;
+    } else {
+      return CompactionFilter::IsInstanceOf(id);
+    }
+  }
+
+  const char* Name() const override { return name_.c_str(); }
+
+ private:
+  std::string name_;
+};
+
+// Returns a dummy compaction filter with a random name.
+CompactionFilter* RandomCompactionFilter(Random* rnd);
+
+// A dummy compaction filter factory which can change its name
+class ChanglingCompactionFilterFactory : public CompactionFilterFactory {
+ public:
+  explicit ChanglingCompactionFilterFactory(const std::string& name)
+      : name_(name + "CompactionFilterFactory") {}
+  ~ChanglingCompactionFilterFactory() {}
+
+  void SetName(const std::string& name) { name_ = name; }
+
+  std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+      const CompactionFilter::Context& /*context*/) override {
+    return std::unique_ptr<CompactionFilter>();
+  }
+
+  // Returns a name that identifies this compaction filter factory.
+  const char* Name() const override { return name_.c_str(); }
+  static const char* kClassName() { return "ChanglingCompactionFilterFactory"; }
+  const char* NickName() const override { return kNickName(); }
+  static const char* kNickName() { return "Changling"; }
+
+  bool IsInstanceOf(const std::string& id) const override {
+    if (id == kClassName()) {
+      return true;
+    } else {
+      return CompactionFilterFactory::IsInstanceOf(id);
+    }
+  }
+
+ protected:
+  std::string name_;
+};
+
+// The factory for the hacky skip list mem table that triggers flush after
+// number of entries exceeds a threshold.
+extern MemTableRepFactory* NewSpecialSkipListFactory(int num_entries_per_flush);
+
+CompressionType RandomCompressionType(Random* rnd);
+
+void RandomCompressionTypeVector(const size_t count,
+                                 std::vector<CompressionType>* types,
+                                 Random* rnd);
+
+CompactionFilterFactory* RandomCompactionFilterFactory(Random* rnd);
+
+const SliceTransform* RandomSliceTransform(Random* rnd, int pre_defined = -1);
+
+TableFactory* RandomTableFactory(Random* rnd, int pre_defined = -1);
+
+std::string RandomName(Random* rnd, const size_t len);
+
+bool IsDirectIOSupported(Env* env, const std::string& dir);
+
+bool IsPrefetchSupported(const std::shared_ptr<FileSystem>& fs,
+                         const std::string& dir);
+
+// Return the number of lines where a given pattern was found in a file.
+size_t GetLinesCount(const std::string& fname, const std::string& pattern);
+
+Status CorruptFile(Env* env, const std::string& fname, int offset,
+                   int bytes_to_corrupt, bool verify_checksum = true);
+Status TruncateFile(Env* env, const std::string& fname, uint64_t length);
+
+// Try and delete a directory if it exists
+Status TryDeleteDir(Env* env, const std::string& dirname);
+
+// Delete a directory if it exists
+void DeleteDir(Env* env, const std::string& dirname);
+
+// Creates an Env from the system environment by looking at the system
+// environment variables.
+Status CreateEnvFromSystem(const ConfigOptions& options, Env** result,
+                           std::shared_ptr<Env>* guard);
+
+// Registers the testutil classes with the ObjectLibrary
+int RegisterTestObjects(ObjectLibrary& library, const std::string& /*arg*/);
+
+// Register the testutil classes with the default ObjectRegistry/Library
+void RegisterTestLibrary(const std::string& arg = "");
+}  // namespace test
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/transaction_test_util.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/transaction_test_util.cc
new file mode 100644
index 0000000000..11fca6d575
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/transaction_test_util.cc
@@ -0,0 +1,400 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "test_util/transaction_test_util.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <numeric>
+#include <random>
+#include <string>
+#include <thread>
+
+#include "db/dbformat.h"
+#include "db/snapshot_impl.h"
+#include "logging/logging.h"
+#include "rocksdb/db.h"
+#include "rocksdb/utilities/optimistic_transaction_db.h"
+#include "rocksdb/utilities/transaction.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "util/random.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+RandomTransactionInserter::RandomTransactionInserter(
+    Random64* rand, const WriteOptions& write_options,
+    const ReadOptions& read_options, uint64_t num_keys, uint16_t num_sets,
+    const uint64_t cmt_delay_ms, const uint64_t first_id)
+    : rand_(rand),
+      write_options_(write_options),
+      read_options_(read_options),
+      num_keys_(num_keys),
+      num_sets_(num_sets),
+      txn_id_(first_id),
+      cmt_delay_ms_(cmt_delay_ms) {}
+
+RandomTransactionInserter::~RandomTransactionInserter() {
+  if (txn_ != nullptr) {
+    delete txn_;
+  }
+  if (optimistic_txn_ != nullptr) {
+    delete optimistic_txn_;
+  }
+}
+
+bool RandomTransactionInserter::TransactionDBInsert(
+    TransactionDB* db, const TransactionOptions& txn_options) {
+  txn_ = db->BeginTransaction(write_options_, txn_options, txn_);
+
+  std::hash<std::thread::id> hasher;
+  char name[64];
+  snprintf(name, 64, "txn%" ROCKSDB_PRIszt "-%" PRIu64,
+           hasher(std::this_thread::get_id()), txn_id_++);
+  assert(strlen(name) < 64 - 1);
+  assert(txn_->SetName(name).ok());
+
+  // Take a snapshot if set_snapshot was not set or with 50% change otherwise
+  bool take_snapshot = txn_->GetSnapshot() == nullptr || rand_->OneIn(2);
+  if (take_snapshot) {
+    txn_->SetSnapshot();
+    read_options_.snapshot = txn_->GetSnapshot();
+  }
+  auto res = DoInsert(db, txn_, false);
+  if (take_snapshot) {
+    read_options_.snapshot = nullptr;
+  }
+  return res;
+}
+
+bool RandomTransactionInserter::OptimisticTransactionDBInsert(
+    OptimisticTransactionDB* db,
+    const OptimisticTransactionOptions& txn_options) {
+  optimistic_txn_ =
+      db->BeginTransaction(write_options_, txn_options, optimistic_txn_);
+
+  return DoInsert(db, optimistic_txn_, true);
+}
+
+bool RandomTransactionInserter::DBInsert(DB* db) {
+  return DoInsert(db, nullptr, false);
+}
+
+Status RandomTransactionInserter::DBGet(
+    DB* db, Transaction* txn, ReadOptions& read_options, uint16_t set_i,
+    uint64_t ikey, bool get_for_update, uint64_t* int_value,
+    std::string* full_key, bool* unexpected_error) {
+  Status s;
+  // Five digits (since the largest uint16_t is 65535) plus the NUL
+  // end char.
+  char prefix_buf[6] = {0};
+  // Pad prefix appropriately so we can iterate over each set
+  assert(set_i + 1 <= 9999);
+  snprintf(prefix_buf, sizeof(prefix_buf), "%.4u", set_i + 1);
+  // key format:  [SET#][random#]
+  std::string skey = std::to_string(ikey);
+  Slice base_key(skey);
+  *full_key = std::string(prefix_buf) + base_key.ToString();
+  Slice key(*full_key);
+
+  std::string value;
+  if (txn != nullptr) {
+    if (get_for_update) {
+      s = txn->GetForUpdate(read_options, key, &value);
+    } else {
+      s = txn->Get(read_options, key, &value);
+    }
+  } else {
+    s = db->Get(read_options, key, &value);
+  }
+
+  if (s.ok()) {
+    // Found key, parse its value
+    *int_value = std::stoull(value);
+    if (*int_value == 0 || *int_value == ULONG_MAX) {
+      *unexpected_error = true;
+      fprintf(stderr, "Get returned unexpected value: %s\n", value.c_str());
+      s = Status::Corruption();
+    }
+  } else if (s.IsNotFound()) {
+    // Have not yet written to this key, so assume its value is 0
+    *int_value = 0;
+    s = Status::OK();
+  }
+  return s;
+}
+
+bool RandomTransactionInserter::DoInsert(DB* db, Transaction* txn,
+                                         bool is_optimistic) {
+  Status s;
+  WriteBatch batch;
+
+  // pick a random number to use to increment a key in each set
+  uint64_t incr = (rand_->Next() % 100) + 1;
+  bool unexpected_error = false;
+
+  std::vector<uint16_t> set_vec(num_sets_);
+  std::iota(set_vec.begin(), set_vec.end(), static_cast<uint16_t>(0));
+  RandomShuffle(set_vec.begin(), set_vec.end());
+
+  // For each set, pick a key at random and increment it
+  for (uint16_t set_i : set_vec) {
+    uint64_t int_value = 0;
+    std::string full_key;
+    uint64_t rand_key = rand_->Next() % num_keys_;
+    const bool get_for_update = txn ? rand_->OneIn(2) : false;
+    s = DBGet(db, txn, read_options_, set_i, rand_key, get_for_update,
+              &int_value, &full_key, &unexpected_error);
+    Slice key(full_key);
+    if (!s.ok()) {
+      // Optimistic transactions should never return non-ok status here.
+      // Non-optimistic transactions may return write-coflict/timeout errors.
+      if (is_optimistic || !(s.IsBusy() || s.IsTimedOut() || s.IsTryAgain())) {
+        fprintf(stderr, "Get returned an unexpected error: %s\n",
+                s.ToString().c_str());
+        unexpected_error = true;
+      }
+      break;
+    }
+
+    if (s.ok()) {
+      // Increment key
+      std::string sum = std::to_string(int_value + incr);
+      if (txn != nullptr) {
+        if ((set_i % 4) != 0) {
+          s = txn->SingleDelete(key);
+        } else {
+          s = txn->Delete(key);
+        }
+        if (!get_for_update && (s.IsBusy() || s.IsTimedOut())) {
+          // If the initial get was not for update, then the key is not locked
+          // before put and put could fail due to concurrent writes.
+          break;
+        } else if (!s.ok()) {
+          // Since we did a GetForUpdate, SingleDelete should not fail.
+          fprintf(stderr, "SingleDelete returned an unexpected error: %s\n",
+                  s.ToString().c_str());
+          unexpected_error = true;
+        }
+        s = txn->Put(key, sum);
+        if (!s.ok()) {
+          // Since we did a GetForUpdate, Put should not fail.
+          fprintf(stderr, "Put returned an unexpected error: %s\n",
+                  s.ToString().c_str());
+          unexpected_error = true;
+        }
+      } else {
+        batch.Put(key, sum);
+      }
+      bytes_inserted_ += key.size() + sum.size();
+    }
+    if (txn != nullptr) {
+      ROCKS_LOG_DEBUG(db->GetDBOptions().info_log,
+                      "Insert (%s) %s snap: %" PRIu64 " key:%s value: %" PRIu64
+                      "+%" PRIu64 "=%" PRIu64,
+                      txn->GetName().c_str(), s.ToString().c_str(),
+                      txn->GetSnapshot()->GetSequenceNumber(), full_key.c_str(),
+                      int_value, incr, int_value + incr);
+    }
+  }
+
+  if (s.ok()) {
+    if (txn != nullptr) {
+      bool with_prepare = !is_optimistic && !rand_->OneIn(10);
+      if (with_prepare) {
+        // Also try commit without prepare
+        s = txn->Prepare();
+        if (!s.ok()) {
+          fprintf(stderr, "Prepare returned an unexpected error: %s\n",
+                  s.ToString().c_str());
+        }
+        assert(s.ok());
+        ROCKS_LOG_DEBUG(db->GetDBOptions().info_log,
+                        "Prepare of %" PRIu64 " %s (%s)", txn->GetId(),
+                        s.ToString().c_str(), txn->GetName().c_str());
+        if (rand_->OneIn(20)) {
+          // This currently only tests the mechanics of writing commit time
+          // write batch so the exact values would not matter.
+          s = txn_->GetCommitTimeWriteBatch()->Put("cat", "dog");
+          assert(s.ok());
+        }
+        db->GetDBOptions().env->SleepForMicroseconds(
+            static_cast<int>(cmt_delay_ms_ * 1000));
+      }
+      if (!rand_->OneIn(20)) {
+        s = txn->Commit();
+        assert(!with_prepare || s.ok());
+        ROCKS_LOG_DEBUG(db->GetDBOptions().info_log,
+                        "Commit of %" PRIu64 " %s (%s)", txn->GetId(),
+                        s.ToString().c_str(), txn->GetName().c_str());
+      } else {
+        // Also try 5% rollback
+        s = txn->Rollback();
+        ROCKS_LOG_DEBUG(db->GetDBOptions().info_log,
+                        "Rollback %" PRIu64 " %s %s", txn->GetId(),
+                        txn->GetName().c_str(), s.ToString().c_str());
+        assert(s.ok());
+      }
+      assert(is_optimistic || s.ok());
+
+      if (!s.ok()) {
+        if (is_optimistic) {
+          // Optimistic transactions can have write-conflict errors on commit.
+          // Any other error is unexpected.
+          if (!(s.IsBusy() || s.IsTimedOut() || s.IsTryAgain())) {
+            unexpected_error = true;
+          }
+        } else {
+          // Non-optimistic transactions should only fail due to expiration
+          // or write failures.  For testing purproses, we do not expect any
+          // write failures.
+          if (!s.IsExpired()) {
+            unexpected_error = true;
+          }
+        }
+
+        if (unexpected_error) {
+          fprintf(stderr, "Commit returned an unexpected error: %s\n",
+                  s.ToString().c_str());
+        }
+      }
+    } else {
+      s = db->Write(write_options_, &batch);
+      if (!s.ok()) {
+        unexpected_error = true;
+        fprintf(stderr, "Write returned an unexpected error: %s\n",
+                s.ToString().c_str());
+      }
+    }
+  } else {
+    if (txn != nullptr) {
+      assert(txn->Rollback().ok());
+      ROCKS_LOG_DEBUG(db->GetDBOptions().info_log, "Error %s for txn %s",
+                      s.ToString().c_str(), txn->GetName().c_str());
+    }
+  }
+
+  if (s.ok()) {
+    success_count_++;
+  } else {
+    failure_count_++;
+  }
+
+  last_status_ = s;
+
+  // return success if we didn't get any unexpected errors
+  return !unexpected_error;
+}
+
+// Verify that the sum of the keys in each set are equal
+Status RandomTransactionInserter::Verify(DB* db, uint16_t num_sets,
+                                         uint64_t num_keys_per_set,
+                                         bool take_snapshot, Random64* rand,
+                                         uint64_t delay_ms) {
+  // delay_ms is the delay between taking a snapshot and doing the reads. It
+  // emulates reads from a long-running backup job.
+  assert(delay_ms == 0 || take_snapshot);
+  uint64_t prev_total = 0;
+  uint32_t prev_i = 0;
+  bool prev_assigned = false;
+
+  ReadOptions roptions;
+  if (take_snapshot) {
+    roptions.snapshot = db->GetSnapshot();
+    db->GetDBOptions().env->SleepForMicroseconds(
+        static_cast<int>(delay_ms * 1000));
+  }
+
+  std::vector<uint16_t> set_vec(num_sets);
+  std::iota(set_vec.begin(), set_vec.end(), static_cast<uint16_t>(0));
+  RandomShuffle(set_vec.begin(), set_vec.end());
+
+  // For each set of keys with the same prefix, sum all the values
+  for (uint16_t set_i : set_vec) {
+    // Five digits (since the largest uint16_t is 65535) plus the NUL
+    // end char.
+    char prefix_buf[6];
+    assert(set_i + 1 <= 9999);
+    snprintf(prefix_buf, sizeof(prefix_buf), "%.4u", set_i + 1);
+    uint64_t total = 0;
+
+    // Use either point lookup or iterator. Point lookups are slower so we use
+    // it less often.
+    const bool use_point_lookup =
+        num_keys_per_set != 0 && rand && rand->OneIn(10);
+    if (use_point_lookup) {
+      ReadOptions read_options;
+      for (uint64_t k = 0; k < num_keys_per_set; k++) {
+        std::string dont_care;
+        uint64_t int_value = 0;
+        bool unexpected_error = false;
+        const bool FOR_UPDATE = false;
+        Status s = DBGet(db, nullptr, roptions, set_i, k, FOR_UPDATE,
+                         &int_value, &dont_care, &unexpected_error);
+        assert(s.ok());
+        assert(!unexpected_error);
+        total += int_value;
+      }
+    } else {  // user iterators
+      Iterator* iter = db->NewIterator(roptions);
+      for (iter->Seek(Slice(prefix_buf, 4)); iter->Valid(); iter->Next()) {
+        Slice key = iter->key();
+        // stop when we reach a different prefix
+        if (key.ToString().compare(0, 4, prefix_buf) != 0) {
+          break;
+        }
+        Slice value = iter->value();
+        uint64_t int_value = std::stoull(value.ToString());
+        if (int_value == 0 || int_value == ULONG_MAX) {
+          fprintf(stderr, "Iter returned unexpected value: %s\n",
+                  value.ToString().c_str());
+          return Status::Corruption();
+        }
+        ROCKS_LOG_DEBUG(
+            db->GetDBOptions().info_log,
+            "VerifyRead at %" PRIu64 " (%" PRIu64 "): %.*s value: %" PRIu64,
+            roptions.snapshot ? roptions.snapshot->GetSequenceNumber() : 0ul,
+            roptions.snapshot
+                ? ((SnapshotImpl*)roptions.snapshot)->min_uncommitted_
+                : 0ul,
+            static_cast<int>(key.size()), key.data(), int_value);
+        total += int_value;
+      }
+      iter->status().PermitUncheckedError();
+      delete iter;
+    }
+
+    if (prev_assigned && total != prev_total) {
+      db->GetDBOptions().info_log->Flush();
+      fprintf(stdout,
+              "RandomTransactionVerify found inconsistent totals using "
+              "pointlookup? %d "
+              "Set[%" PRIu32 "]: %" PRIu64 ", Set[%" PRIu32 "]: %" PRIu64
+              " at snapshot %" PRIu64 "\n",
+              use_point_lookup, prev_i, prev_total, set_i, total,
+              roptions.snapshot ? roptions.snapshot->GetSequenceNumber() : 0ul);
+      fflush(stdout);
+      return Status::Corruption();
+    } else {
+      ROCKS_LOG_DEBUG(
+          db->GetDBOptions().info_log,
+          "RandomTransactionVerify pass pointlookup? %d total: %" PRIu64
+          " snap: %" PRIu64,
+          use_point_lookup, total,
+          roptions.snapshot ? roptions.snapshot->GetSequenceNumber() : 0ul);
+    }
+    prev_total = total;
+    prev_i = set_i;
+    prev_assigned = true;
+  }
+  if (take_snapshot) {
+    db->ReleaseSnapshot(roptions.snapshot);
+  }
+
+  return Status::OK();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/transaction_test_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/transaction_test_util.h
new file mode 100644
index 0000000000..284b392503
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/test_util/transaction_test_util.h
@@ -0,0 +1,147 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include "port/port.h"
+#include "rocksdb/options.h"
+#include "rocksdb/utilities/optimistic_transaction_db.h"
+#include "rocksdb/utilities/transaction_db.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class DB;
+class Random64;
+
+// Utility class for stress testing transactions.  Can be used to write many
+// transactions in parallel and then validate that the data written is logically
+// consistent.  This class assumes the input DB is initially empty.
+//
+// Each call to TransactionDBInsert()/OptimisticTransactionDBInsert() will
+// increment the value of a key in #num_sets sets of keys.  Regardless of
+// whether the transaction succeeds, the total sum of values of keys in each
+// set is an invariant that should remain equal.
+//
+// After calling TransactionDBInsert()/OptimisticTransactionDBInsert() many
+// times, Verify() can be called to validate that the invariant holds.
+//
+// To test writing Transaction in parallel, multiple threads can create a
+// RandomTransactionInserter with similar arguments using the same DB.
+class RandomTransactionInserter {
+ public:
+  static bool RollbackDeletionTypeCallback(const Slice& key) {
+    // These are hard-coded atm. See how RandomTransactionInserter::DoInsert()
+    // determines whether to use SingleDelete or Delete for a key.
+    assert(key.size() >= 4);
+    const char* ptr = key.data();
+    assert(ptr);
+    while (ptr && ptr < key.data() + 4 && *ptr == '0') {
+      ++ptr;
+    }
+    std::string prefix(ptr, 4 - (ptr - key.data()));
+    unsigned long set_i = std::stoul(prefix);
+    assert(set_i > 0);
+    assert(set_i <= 9999);
+    --set_i;
+    return ((set_i % 4) != 0);
+  }
+
+  // num_keys is the number of keys in each set.
+  // num_sets is the number of sets of keys.
+  // cmt_delay_ms is the delay between prepare (if there is any) and commit
+  // first_id is the id of the first transaction
+  explicit RandomTransactionInserter(
+      Random64* rand, const WriteOptions& write_options = WriteOptions(),
+      const ReadOptions& read_options = ReadOptions(), uint64_t num_keys = 1000,
+      uint16_t num_sets = 3, const uint64_t cmt_delay_ms = 0,
+      const uint64_t first_id = 0);
+
+  ~RandomTransactionInserter();
+
+  // Increment a key in each set using a Transaction on a TransactionDB.
+  //
+  // Returns true if the transaction succeeded OR if any error encountered was
+  // expected (eg a write-conflict). Error status may be obtained by calling
+  // GetLastStatus();
+  bool TransactionDBInsert(
+      TransactionDB* db,
+      const TransactionOptions& txn_options = TransactionOptions());
+
+  // Increment a key in each set using a Transaction on an
+  // OptimisticTransactionDB
+  //
+  // Returns true if the transaction succeeded OR if any error encountered was
+  // expected (eg a write-conflict). Error status may be obtained by calling
+  // GetLastStatus();
+  bool OptimisticTransactionDBInsert(
+      OptimisticTransactionDB* db,
+      const OptimisticTransactionOptions& txn_options =
+          OptimisticTransactionOptions());
+  // Increment a key in each set without using a transaction.  If this function
+  // is called in parallel, then Verify() may fail.
+  //
+  // Returns true if the write succeeds.
+  // Error status may be obtained by calling GetLastStatus().
+  bool DBInsert(DB* db);
+
+  // Get the ikey'th key from set set_i
+  static Status DBGet(DB* db, Transaction* txn, ReadOptions& read_options,
+                      uint16_t set_i, uint64_t ikey, bool get_for_update,
+                      uint64_t* int_value, std::string* full_key,
+                      bool* unexpected_error);
+
+  // Returns OK if Invariant is true.
+  static Status Verify(DB* db, uint16_t num_sets, uint64_t num_keys_per_set = 0,
+                       bool take_snapshot = false, Random64* rand = nullptr,
+                       uint64_t delay_ms = 0);
+
+  // Returns the status of the previous Insert operation
+  Status GetLastStatus() { return last_status_; }
+
+  // Returns the number of successfully written calls to
+  // TransactionDBInsert/OptimisticTransactionDBInsert/DBInsert
+  uint64_t GetSuccessCount() { return success_count_; }
+
+  // Returns the number of calls to
+  // TransactionDBInsert/OptimisticTransactionDBInsert/DBInsert that did not
+  // write any data.
+  uint64_t GetFailureCount() { return failure_count_; }
+
+  // Returns the sum of user keys/values Put() to the DB.
+  size_t GetBytesInserted() { return bytes_inserted_; }
+
+ private:
+  // Input options
+  Random64* rand_;
+  const WriteOptions write_options_;
+  ReadOptions read_options_;
+  const uint64_t num_keys_;
+  const uint16_t num_sets_;
+
+  // Number of successful insert batches performed
+  uint64_t success_count_ = 0;
+
+  // Number of failed insert batches attempted
+  uint64_t failure_count_ = 0;
+
+  size_t bytes_inserted_ = 0;
+
+  // Status returned by most recent insert operation
+  Status last_status_;
+
+  // optimization: re-use allocated transaction objects.
+  Transaction* txn_ = nullptr;
+  Transaction* optimistic_txn_ = nullptr;
+
+  uint64_t txn_id_;
+  // The delay between ::Prepare and ::Commit
+  const uint64_t cmt_delay_ms_;
+
+  bool DoInsert(DB* db, Transaction* txn, bool is_optimistic);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/third-party/gcc/ppc-asm.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/third-party/gcc/ppc-asm.h
new file mode 100644
index 0000000000..e0bce9c5ae
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/third-party/gcc/ppc-asm.h
@@ -0,0 +1,390 @@
+/* PowerPC asm definitions for GNU C.
+
+Copyright (C) 2002-2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Under winnt, 1) gas supports the following as names and 2) in particular
+   defining "toc" breaks the FUNC_START macro as ".toc" becomes ".2" */
+
+#define r0	0
+#define sp	1
+#define toc	2
+#define r3	3
+#define r4	4
+#define r5	5
+#define r6	6
+#define r7	7
+#define r8	8
+#define r9	9
+#define r10	10
+#define r11	11
+#define r12	12
+#define r13	13
+#define r14	14
+#define r15	15
+#define r16	16
+#define r17	17
+#define r18	18
+#define r19     19
+#define r20	20
+#define r21	21
+#define r22	22
+#define r23	23
+#define r24	24
+#define r25	25
+#define r26	26
+#define r27	27
+#define r28	28
+#define r29	29
+#define r30	30
+#define r31	31
+
+#define cr0	0
+#define cr1	1
+#define cr2	2
+#define cr3	3
+#define cr4	4
+#define cr5	5
+#define cr6	6
+#define cr7	7
+
+#define f0	0
+#define f1	1
+#define f2	2
+#define f3	3
+#define f4	4
+#define f5	5
+#define f6	6
+#define f7	7
+#define f8	8
+#define f9	9
+#define f10	10
+#define f11	11
+#define f12	12
+#define f13	13
+#define f14	14
+#define f15	15
+#define f16	16
+#define f17	17
+#define f18	18
+#define f19	19
+#define f20	20
+#define f21	21
+#define f22	22
+#define f23	23
+#define f24	24
+#define f25	25
+#define f26	26
+#define f27	27
+#define f28	28
+#define f29	29
+#define f30	30
+#define f31	31
+
+#ifdef __VSX__
+#define f32	32
+#define f33	33
+#define f34	34
+#define f35	35
+#define f36	36
+#define f37	37
+#define f38	38
+#define f39	39
+#define f40	40
+#define f41	41
+#define f42	42
+#define f43	43
+#define f44	44
+#define f45	45
+#define f46	46
+#define f47	47
+#define f48	48
+#define f49	49
+#define f50	50
+#define f51	51
+#define f52	52
+#define f53	53
+#define f54	54
+#define f55	55
+#define f56	56
+#define f57	57
+#define f58	58
+#define f59	59
+#define f60	60
+#define f61	61
+#define f62	62
+#define f63	63
+#endif
+
+#ifdef __ALTIVEC__
+#define v0	0
+#define v1	1
+#define v2	2
+#define v3	3
+#define v4	4
+#define v5	5
+#define v6	6
+#define v7	7
+#define v8	8
+#define v9	9
+#define v10	10
+#define v11	11
+#define v12	12
+#define v13	13
+#define v14	14
+#define v15	15
+#define v16	16
+#define v17	17
+#define v18	18
+#define v19	19
+#define v20	20
+#define v21	21
+#define v22	22
+#define v23	23
+#define v24	24
+#define v25	25
+#define v26	26
+#define v27	27
+#define v28	28
+#define v29	29
+#define v30	30
+#define v31	31
+#endif
+
+#ifdef __VSX__
+#define vs0	0
+#define vs1	1
+#define vs2	2
+#define vs3	3
+#define vs4	4
+#define vs5	5
+#define vs6	6
+#define vs7	7
+#define vs8	8
+#define vs9	9
+#define vs10	10
+#define vs11	11
+#define vs12	12
+#define vs13	13
+#define vs14	14
+#define vs15	15
+#define vs16	16
+#define vs17	17
+#define vs18	18
+#define vs19	19
+#define vs20	20
+#define vs21	21
+#define vs22	22
+#define vs23	23
+#define vs24	24
+#define vs25	25
+#define vs26	26
+#define vs27	27
+#define vs28	28
+#define vs29	29
+#define vs30	30
+#define vs31	31
+#define vs32	32
+#define vs33	33
+#define vs34	34
+#define vs35	35
+#define vs36	36
+#define vs37	37
+#define vs38	38
+#define vs39	39
+#define vs40	40
+#define vs41	41
+#define vs42	42
+#define vs43	43
+#define vs44	44
+#define vs45	45
+#define vs46	46
+#define vs47	47
+#define vs48	48
+#define vs49	49
+#define vs50	50
+#define vs51	51
+#define vs52	52
+#define vs53	53
+#define vs54	54
+#define vs55	55
+#define vs56	56
+#define vs57	57
+#define vs58	58
+#define vs59	59
+#define vs60	60
+#define vs61	61
+#define vs62	62
+#define vs63	63
+#endif
+
+/*
+ * Macros to glue together two tokens.
+ */
+
+#ifdef __STDC__
+#define XGLUE(a,b) a##b
+#else
+#define XGLUE(a,b) a/**/b
+#endif
+
+#define GLUE(a,b) XGLUE(a,b)
+
+/*
+ * Macros to begin and end a function written in assembler.  If -mcall-aixdesc
+ * or -mcall-nt, create a function descriptor with the given name, and create
+ * the real function with one or two leading periods respectively.
+ */
+
+#if defined(__powerpc64__) && _CALL_ELF == 2
+
+/* Defining "toc" above breaks @toc in assembler code.  */
+#undef toc
+
+#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name)
+#ifdef __PCREL__
+#define JUMP_TARGET(name) GLUE(FUNC_NAME(name),@notoc)
+#define FUNC_START(name) \
+	.type FUNC_NAME(name),@function; \
+	.globl FUNC_NAME(name); \
+FUNC_NAME(name): \
+	.localentry FUNC_NAME(name),1
+#else
+#define JUMP_TARGET(name) FUNC_NAME(name)
+#define FUNC_START(name) \
+	.type FUNC_NAME(name),@function; \
+	.globl FUNC_NAME(name); \
+FUNC_NAME(name): \
+0:	addis 2,12,(.TOC.-0b)@ha; \
+	addi 2,2,(.TOC.-0b)@l; \
+	.localentry FUNC_NAME(name),.-FUNC_NAME(name)
+#endif /* !__PCREL__ */
+
+#define HIDDEN_FUNC(name) \
+  FUNC_START(name) \
+  .hidden FUNC_NAME(name);
+
+#define FUNC_END(name) \
+	.size FUNC_NAME(name),.-FUNC_NAME(name)
+
+#elif defined (__powerpc64__)
+
+#define FUNC_NAME(name) GLUE(.,name)
+#define JUMP_TARGET(name) FUNC_NAME(name)
+#define FUNC_START(name) \
+	.section ".opd","aw"; \
+name: \
+	.quad GLUE(.,name); \
+	.quad .TOC.@tocbase; \
+	.quad 0; \
+	.previous; \
+	.type GLUE(.,name),@function; \
+	.globl name; \
+	.globl GLUE(.,name); \
+GLUE(.,name):
+
+#define HIDDEN_FUNC(name) \
+  FUNC_START(name) \
+  .hidden name;	\
+  .hidden GLUE(.,name);
+
+#define FUNC_END(name) \
+GLUE(.L,name): \
+	.size GLUE(.,name),GLUE(.L,name)-GLUE(.,name)
+
+#elif defined(_CALL_AIXDESC)
+
+#ifdef _RELOCATABLE
+#define DESC_SECTION ".got2"
+#else
+#define DESC_SECTION ".got1"
+#endif
+
+#define FUNC_NAME(name) GLUE(.,name)
+#define JUMP_TARGET(name) FUNC_NAME(name)
+#define FUNC_START(name) \
+	.section DESC_SECTION,"aw"; \
+name: \
+	.long GLUE(.,name); \
+	.long _GLOBAL_OFFSET_TABLE_; \
+	.long 0; \
+	.previous; \
+	.type GLUE(.,name),@function; \
+	.globl name; \
+	.globl GLUE(.,name); \
+GLUE(.,name):
+
+#define HIDDEN_FUNC(name) \
+  FUNC_START(name) \
+  .hidden name; \
+  .hidden GLUE(.,name);
+
+#define FUNC_END(name) \
+GLUE(.L,name): \
+	.size GLUE(.,name),GLUE(.L,name)-GLUE(.,name)
+
+#else
+
+#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name)
+#if defined __PIC__ || defined __pic__
+#define JUMP_TARGET(name) FUNC_NAME(name@plt)
+#else
+#define JUMP_TARGET(name) FUNC_NAME(name)
+#endif
+#define FUNC_START(name) \
+	.type FUNC_NAME(name),@function; \
+	.globl FUNC_NAME(name); \
+FUNC_NAME(name):
+
+#define HIDDEN_FUNC(name) \
+  FUNC_START(name) \
+  .hidden FUNC_NAME(name);
+
+#define FUNC_END(name) \
+GLUE(.L,name): \
+	.size FUNC_NAME(name),GLUE(.L,name)-FUNC_NAME(name)
+#endif
+
+#ifdef IN_GCC
+/* For HAVE_GAS_CFI_DIRECTIVE.  */
+#include "auto-host.h"
+
+#ifdef HAVE_GAS_CFI_DIRECTIVE
+# define CFI_STARTPROC			.cfi_startproc
+# define CFI_ENDPROC			.cfi_endproc
+# define CFI_OFFSET(reg, off)		.cfi_offset reg, off
+# define CFI_DEF_CFA_REGISTER(reg)	.cfi_def_cfa_register reg
+# define CFI_RESTORE(reg)		.cfi_restore reg
+#else
+# define CFI_STARTPROC
+# define CFI_ENDPROC
+# define CFI_OFFSET(reg, off)
+# define CFI_DEF_CFA_REGISTER(reg)
+# define CFI_RESTORE(reg)
+#endif
+#endif
+
+#if defined __linux__ && !defined __powerpc64__
+	.section .note.GNU-stack
+	.previous
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/third-party/gtest-1.8.1/fused-src/gtest/gtest.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/third-party/gtest-1.8.1/fused-src/gtest/gtest.h
new file mode 100644
index 0000000000..2d82d8e4d0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/third-party/gtest-1.8.1/fused-src/gtest/gtest.h
@@ -0,0 +1,22115 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines the public API for Google Test.  It should be
+// included by any test program that uses Google Test.
+//
+// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
+// leave some internal implementation details in this header file.
+// They are clearly marked by comments like this:
+//
+//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+//
+// Such code is NOT meant to be used by a user directly, and is subject
+// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
+// program!
+//
+// Acknowledgment: Google Test borrowed the idea of automatic test
+// registration from Barthelemy Dagenais' (barthelemy@prologique.com)
+// easyUnit framework.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
+#define GTEST_INCLUDE_GTEST_GTEST_H_
+
+#include <limits>
+#include <memory>
+#include <ostream>
+#include <vector>
+
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file declares functions and macros used internally by
+// Google Test.  They are subject to change without notice.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Low-level types and utilities for porting Google Test to various
+// platforms.  All macros ending with _ and symbols defined in an
+// internal namespace are subject to change without notice.  Code
+// outside Google Test MUST NOT USE THEM DIRECTLY.  Macros that don't
+// end with _ are part of Google Test's public API and can be used by
+// code outside Google Test.
+//
+// This file is fundamental to Google Test.  All other Google Test source
+// files are expected to #include this.  Therefore, it cannot #include
+// any other Google Test header.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+
+// Environment-describing macros
+// -----------------------------
+//
+// Google Test can be used in many different environments.  Macros in
+// this section tell Google Test what kind of environment it is being
+// used in, such that Google Test can provide environment-specific
+// features and implementations.
+//
+// Google Test tries to automatically detect the properties of its
+// environment, so users usually don't need to worry about these
+// macros.  However, the automatic detection is not perfect.
+// Sometimes it's necessary for a user to define some of the following
+// macros in the build script to override Google Test's decisions.
+//
+// If the user doesn't define a macro in the list, Google Test will
+// provide a default definition.  After this header is #included, all
+// macros in this list will be defined to either 1 or 0.
+//
+// Notes to maintainers:
+//   - Each macro here is a user-tweakable knob; do not grow the list
+//     lightly.
+//   - Use #if to key off these macros.  Don't use #ifdef or "#if
+//     defined(...)", which will not work as these macros are ALWAYS
+//     defined.
+//
+//   GTEST_HAS_CLONE          - Define it to 1/0 to indicate that clone(2)
+//                              is/isn't available.
+//   GTEST_HAS_EXCEPTIONS     - Define it to 1/0 to indicate that exceptions
+//                              are enabled.
+//   GTEST_HAS_GLOBAL_STRING  - Define it to 1/0 to indicate that ::string
+//                              is/isn't available
+//   GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::wstring
+//                              is/isn't available
+//   GTEST_HAS_POSIX_RE       - Define it to 1/0 to indicate that POSIX regular
+//                              expressions are/aren't available.
+//   GTEST_HAS_PTHREAD        - Define it to 1/0 to indicate that <pthread.h>
+//                              is/isn't available.
+//   GTEST_HAS_RTTI           - Define it to 1/0 to indicate that RTTI is/isn't
+//                              enabled.
+//   GTEST_HAS_STD_WSTRING    - Define it to 1/0 to indicate that
+//                              std::wstring does/doesn't work (Google Test can
+//                              be used where std::wstring is unavailable).
+//   GTEST_HAS_TR1_TUPLE      - Define it to 1/0 to indicate tr1::tuple
+//                              is/isn't available.
+//   GTEST_HAS_SEH            - Define it to 1/0 to indicate whether the
+//                              compiler supports Microsoft's "Structured
+//                              Exception Handling".
+//   GTEST_HAS_STREAM_REDIRECTION
+//                            - Define it to 1/0 to indicate whether the
+//                              platform supports I/O stream redirection using
+//                              dup() and dup2().
+//   GTEST_USE_OWN_TR1_TUPLE  - Define it to 1/0 to indicate whether Google
+//                              Test's own tr1 tuple implementation should be
+//                              used.  Unused when the user sets
+//                              GTEST_HAS_TR1_TUPLE to 0.
+//   GTEST_LANG_CXX11         - Define it to 1/0 to indicate that Google Test
+//                              is building in C++11/C++98 mode.
+//   GTEST_LINKED_AS_SHARED_LIBRARY
+//                            - Define to 1 when compiling tests that use
+//                              Google Test as a shared library (known as
+//                              DLL on Windows).
+//   GTEST_CREATE_SHARED_LIBRARY
+//                            - Define to 1 when compiling Google Test itself
+//                              as a shared library.
+//   GTEST_DEFAULT_DEATH_TEST_STYLE
+//                            - The default value of --gtest_death_test_style.
+//                              The legacy default has been "fast" in the open
+//                              source version since 2008. The recommended value
+//                              is "threadsafe", and can be set in
+//                              custom/gtest-port.h.
+
+// Platform-indicating macros
+// --------------------------
+//
+// Macros indicating the platform on which Google Test is being used
+// (a macro is defined to 1 if compiled on the given platform;
+// otherwise UNDEFINED -- it's never defined to 0.).  Google Test
+// defines these macros automatically.  Code outside Google Test MUST
+// NOT define them.
+//
+//   GTEST_OS_AIX      - IBM AIX
+//   GTEST_OS_CYGWIN   - Cygwin
+//   GTEST_OS_FREEBSD  - FreeBSD
+//   GTEST_OS_FUCHSIA  - Fuchsia
+//   GTEST_OS_HPUX     - HP-UX
+//   GTEST_OS_LINUX    - Linux
+//     GTEST_OS_LINUX_ANDROID - Google Android
+//   GTEST_OS_MAC      - Mac OS X
+//     GTEST_OS_IOS    - iOS
+//   GTEST_OS_NACL     - Google Native Client (NaCl)
+//   GTEST_OS_NETBSD   - NetBSD
+//   GTEST_OS_OPENBSD  - OpenBSD
+//   GTEST_OS_QNX      - QNX
+//   GTEST_OS_SOLARIS  - Sun Solaris
+//   GTEST_OS_SYMBIAN  - Symbian
+//   GTEST_OS_WINDOWS  - Windows (Desktop, MinGW, or Mobile)
+//     GTEST_OS_WINDOWS_DESKTOP  - Windows Desktop
+//     GTEST_OS_WINDOWS_MINGW    - MinGW
+//     GTEST_OS_WINDOWS_MOBILE   - Windows Mobile
+//     GTEST_OS_WINDOWS_PHONE    - Windows Phone
+//     GTEST_OS_WINDOWS_RT       - Windows Store App/WinRT
+//   GTEST_OS_ZOS      - z/OS
+//
+// Among the platforms, Cygwin, Linux, Max OS X, and Windows have the
+// most stable support.  Since core members of the Google Test project
+// don't have access to other platforms, support for them may be less
+// stable.  If you notice any problems on your platform, please notify
+// googletestframework@googlegroups.com (patches for fixing them are
+// even more welcome!).
+//
+// It is possible that none of the GTEST_OS_* macros are defined.
+
+// Feature-indicating macros
+// -------------------------
+//
+// Macros indicating which Google Test features are available (a macro
+// is defined to 1 if the corresponding feature is supported;
+// otherwise UNDEFINED -- it's never defined to 0.).  Google Test
+// defines these macros automatically.  Code outside Google Test MUST
+// NOT define them.
+//
+// These macros are public so that portable tests can be written.
+// Such tests typically surround code using a feature with an #if
+// which controls that code.  For example:
+//
+// #if GTEST_HAS_DEATH_TEST
+//   EXPECT_DEATH(DoSomethingDeadly());
+// #endif
+//
+//   GTEST_HAS_COMBINE      - the Combine() function (for value-parameterized
+//                            tests)
+//   GTEST_HAS_DEATH_TEST   - death tests
+//   GTEST_HAS_TYPED_TEST   - typed tests
+//   GTEST_HAS_TYPED_TEST_P - type-parameterized tests
+//   GTEST_IS_THREADSAFE    - Google Test is thread-safe.
+//   GOOGLETEST_CM0007 DO NOT DELETE
+//   GTEST_USES_POSIX_RE    - enhanced POSIX regex is used. Do not confuse with
+//                            GTEST_HAS_POSIX_RE (see above) which users can
+//                            define themselves.
+//   GTEST_USES_SIMPLE_RE   - our own simple regex is used;
+//                            the above RE\b(s) are mutually exclusive.
+//   GTEST_CAN_COMPARE_NULL - accepts untyped NULL in EXPECT_EQ().
+
+// Misc public macros
+// ------------------
+//
+//   GTEST_FLAG(flag_name)  - references the variable corresponding to
+//                            the given Google Test flag.
+
+// Internal utilities
+// ------------------
+//
+// The following macros and utilities are for Google Test's INTERNAL
+// use only.  Code outside Google Test MUST NOT USE THEM DIRECTLY.
+//
+// Macros for basic C++ coding:
+//   GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning.
+//   GTEST_ATTRIBUTE_UNUSED_  - declares that a class' instances or a
+//                              variable don't have to be used.
+//   GTEST_DISALLOW_ASSIGN_   - disables operator=.
+//   GTEST_DISALLOW_COPY_AND_ASSIGN_ - disables copy ctor and operator=.
+//   GTEST_MUST_USE_RESULT_   - declares that a function's result must be used.
+//   GTEST_INTENTIONAL_CONST_COND_PUSH_ - start code section where MSVC C4127 is
+//                                        suppressed (constant conditional).
+//   GTEST_INTENTIONAL_CONST_COND_POP_  - finish code section where MSVC C4127
+//                                        is suppressed.
+//
+// C++11 feature wrappers:
+//
+//   testing::internal::forward - portability wrapper for std::forward.
+//   testing::internal::move  - portability wrapper for std::move.
+//
+// Synchronization:
+//   Mutex, MutexLock, ThreadLocal, GetThreadCount()
+//                            - synchronization primitives.
+//
+// Template meta programming:
+//   is_pointer     - as in TR1; needed on Symbian and IBM XL C/C++ only.
+//   IteratorTraits - partial implementation of std::iterator_traits, which
+//                    is not available in libCstd when compiled with Sun C++.
+//
+// Smart pointers:
+//   scoped_ptr     - as in TR2.
+//
+// Regular expressions:
+//   RE             - a simple regular expression class using the POSIX
+//                    Extended Regular Expression syntax on UNIX-like platforms
+//                    GOOGLETEST_CM0008 DO NOT DELETE
+//                    or a reduced regular exception syntax on other
+//                    platforms, including Windows.
+// Logging:
+//   GTEST_LOG_()   - logs messages at the specified severity level.
+//   LogToStderr()  - directs all log messages to stderr.
+//   FlushInfoLog() - flushes informational log messages.
+//
+// Stdout and stderr capturing:
+//   CaptureStdout()     - starts capturing stdout.
+//   GetCapturedStdout() - stops capturing stdout and returns the captured
+//                         string.
+//   CaptureStderr()     - starts capturing stderr.
+//   GetCapturedStderr() - stops capturing stderr and returns the captured
+//                         string.
+//
+// Integer types:
+//   TypeWithSize   - maps an integer to a int type.
+//   Int32, UInt32, Int64, UInt64, TimeInMillis
+//                  - integers of known sizes.
+//   BiggestInt     - the biggest signed integer type.
+//
+// Command-line utilities:
+//   GTEST_DECLARE_*()  - declares a flag.
+//   GTEST_DEFINE_*()   - defines a flag.
+//   GetInjectableArgvs() - returns the command line as a vector of strings.
+//
+// Environment variable utilities:
+//   GetEnv()             - gets the value of an environment variable.
+//   BoolFromGTestEnv()   - parses a bool environment variable.
+//   Int32FromGTestEnv()  - parses an Int32 environment variable.
+//   StringFromGTestEnv() - parses a string environment variable.
+
+#include <ctype.h>   // for isspace, etc
+#include <stddef.h>  // for ptrdiff_t
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#ifndef _WIN32_WCE
+# include <sys/types.h>
+# include <sys/stat.h>
+#endif  // !_WIN32_WCE
+
+#if defined __APPLE__
+# include <AvailabilityMacros.h>
+# include <TargetConditionals.h>
+#endif
+
+// Brings in the definition of HAS_GLOBAL_STRING.  This must be done
+// BEFORE we test HAS_GLOBAL_STRING.
+#include <string>  // NOLINT
+#include <algorithm>  // NOLINT
+#include <iostream>  // NOLINT
+#include <sstream>  // NOLINT
+#include <utility>
+#include <vector>  // NOLINT
+
+// Copyright 2015, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines the GTEST_OS_* macro.
+// It is separate from gtest-port.h so that custom/gtest-port.h can include it.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
+
+// Determines the platform on which Google Test is compiled.
+#ifdef __CYGWIN__
+# define GTEST_OS_CYGWIN 1
+#elif defined __SYMBIAN32__
+# define GTEST_OS_SYMBIAN 1
+#elif defined _WIN32
+# define GTEST_OS_WINDOWS 1
+# ifdef _WIN32_WCE
+#  define GTEST_OS_WINDOWS_MOBILE 1
+# elif defined(__MINGW__) || defined(__MINGW32__)
+#  define GTEST_OS_WINDOWS_MINGW 1
+# elif defined(WINAPI_FAMILY)
+#  include <winapifamily.h>
+#  if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+#   define GTEST_OS_WINDOWS_DESKTOP 1
+#  elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
+#   define GTEST_OS_WINDOWS_PHONE 1
+#  elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
+#   define GTEST_OS_WINDOWS_RT 1
+#  elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_TV_TITLE)
+#   define GTEST_OS_WINDOWS_PHONE 1
+#   define GTEST_OS_WINDOWS_TV_TITLE 1
+#  else
+    // WINAPI_FAMILY defined but no known partition matched.
+    // Default to desktop.
+#   define GTEST_OS_WINDOWS_DESKTOP 1
+#  endif
+# else
+#  define GTEST_OS_WINDOWS_DESKTOP 1
+# endif  // _WIN32_WCE
+#elif defined __APPLE__
+# define GTEST_OS_MAC 1
+# if TARGET_OS_IPHONE
+#  define GTEST_OS_IOS 1
+# endif
+#elif defined __FreeBSD__
+# define GTEST_OS_FREEBSD 1
+#elif defined __Fuchsia__
+# define GTEST_OS_FUCHSIA 1
+#elif defined __linux__
+# define GTEST_OS_LINUX 1
+# if defined __ANDROID__
+#  define GTEST_OS_LINUX_ANDROID 1
+# endif
+#elif defined __MVS__
+# define GTEST_OS_ZOS 1
+#elif defined(__sun) && defined(__SVR4)
+# define GTEST_OS_SOLARIS 1
+#elif defined(_AIX)
+# define GTEST_OS_AIX 1
+#elif defined(__hpux)
+# define GTEST_OS_HPUX 1
+#elif defined __native_client__
+# define GTEST_OS_NACL 1
+#elif defined __NetBSD__
+# define GTEST_OS_NETBSD 1
+#elif defined __OpenBSD__
+# define GTEST_OS_OPENBSD 1
+#elif defined __QNX__
+# define GTEST_OS_QNX 1
+#endif  // __CYGWIN__
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
+// Copyright 2015, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Injection point for custom user configurations. See README for details
+//
+// ** Custom implementation starts here **
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
+
+#if !defined(GTEST_DEV_EMAIL_)
+# define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com"
+# define GTEST_FLAG_PREFIX_ "gtest_"
+# define GTEST_FLAG_PREFIX_DASH_ "gtest-"
+# define GTEST_FLAG_PREFIX_UPPER_ "GTEST_"
+# define GTEST_NAME_ "Google Test"
+# define GTEST_PROJECT_URL_ "https://github.com/google/googletest/"
+#endif  // !defined(GTEST_DEV_EMAIL_)
+
+#if !defined(GTEST_INIT_GOOGLE_TEST_NAME_)
+# define GTEST_INIT_GOOGLE_TEST_NAME_ "testing::InitGoogleTest"
+#endif  // !defined(GTEST_INIT_GOOGLE_TEST_NAME_)
+
+// Determines the version of gcc that is used to compile this.
+#ifdef __GNUC__
+// 40302 means version 4.3.2.
+# define GTEST_GCC_VER_ \
+    (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
+#endif  // __GNUC__
+
+// Macros for disabling Microsoft Visual C++ warnings.
+//
+//   GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 4385)
+//   /* code that triggers warnings C4800 and C4385 */
+//   GTEST_DISABLE_MSC_WARNINGS_POP_()
+#if _MSC_VER >= 1400
+# define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings) \
+    __pragma(warning(push))                        \
+    __pragma(warning(disable: warnings))
+# define GTEST_DISABLE_MSC_WARNINGS_POP_()          \
+    __pragma(warning(pop))
+#else
+// Older versions of MSVC don't have __pragma.
+# define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings)
+# define GTEST_DISABLE_MSC_WARNINGS_POP_()
+#endif
+
+// Clang on Windows does not understand MSVC's pragma warning.
+// We need clang-specific way to disable function deprecation warning.
+#ifdef __clang__
+# define GTEST_DISABLE_MSC_DEPRECATED_PUSH_()                         \
+    _Pragma("clang diagnostic push")                                  \
+    _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") \
+    _Pragma("clang diagnostic ignored \"-Wdeprecated-implementations\"")
+#define GTEST_DISABLE_MSC_DEPRECATED_POP_() \
+    _Pragma("clang diagnostic pop")
+#else
+# define GTEST_DISABLE_MSC_DEPRECATED_PUSH_() \
+    GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996)
+# define GTEST_DISABLE_MSC_DEPRECATED_POP_() \
+    GTEST_DISABLE_MSC_WARNINGS_POP_()
+#endif
+
+#ifndef GTEST_LANG_CXX11
+// gcc and clang define __GXX_EXPERIMENTAL_CXX0X__ when
+// -std={c,gnu}++{0x,11} is passed.  The C++11 standard specifies a
+// value for __cplusplus, and recent versions of clang, gcc, and
+// probably other compilers set that too in C++11 mode.
+# if __GXX_EXPERIMENTAL_CXX0X__ || __cplusplus >= 201103L || _MSC_VER >= 1900
+// Compiling in at least C++11 mode.
+#  define GTEST_LANG_CXX11 1
+# else
+#  define GTEST_LANG_CXX11 0
+# endif
+#endif
+
+// Distinct from C++11 language support, some environments don't provide
+// proper C++11 library support. Notably, it's possible to build in
+// C++11 mode when targeting Mac OS X 10.6, which has an old libstdc++
+// with no C++11 support.
+//
+// libstdc++ has sufficient C++11 support as of GCC 4.6.0, __GLIBCXX__
+// 20110325, but maintenance releases in the 4.4 and 4.5 series followed
+// this date, so check for those versions by their date stamps.
+// https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html#abi.versioning
+#if GTEST_LANG_CXX11 && \
+    (!defined(__GLIBCXX__) || ( \
+        __GLIBCXX__ >= 20110325ul &&  /* GCC >= 4.6.0 */ \
+        /* Blacklist of patch releases of older branches: */ \
+        __GLIBCXX__ != 20110416ul &&  /* GCC 4.4.6 */ \
+        __GLIBCXX__ != 20120313ul &&  /* GCC 4.4.7 */ \
+        __GLIBCXX__ != 20110428ul &&  /* GCC 4.5.3 */ \
+        __GLIBCXX__ != 20120702ul))   /* GCC 4.5.4 */
+# define GTEST_STDLIB_CXX11 1
+#endif
+
+// Only use C++11 library features if the library provides them.
+#if GTEST_STDLIB_CXX11
+# define GTEST_HAS_STD_BEGIN_AND_END_ 1
+# define GTEST_HAS_STD_FORWARD_LIST_ 1
+# if !defined(_MSC_VER) || (_MSC_FULL_VER >= 190023824)
+// works only with VS2015U2 and better
+#   define GTEST_HAS_STD_FUNCTION_ 1
+# endif
+# define GTEST_HAS_STD_INITIALIZER_LIST_ 1
+# define GTEST_HAS_STD_MOVE_ 1
+# define GTEST_HAS_STD_UNIQUE_PTR_ 1
+# define GTEST_HAS_STD_SHARED_PTR_ 1
+# define GTEST_HAS_UNORDERED_MAP_ 1
+# define GTEST_HAS_UNORDERED_SET_ 1
+#endif
+
+// C++11 specifies that <tuple> provides std::tuple.
+// Some platforms still might not have it, however.
+#if GTEST_LANG_CXX11
+# define GTEST_HAS_STD_TUPLE_ 1
+# if defined(__clang__)
+// Inspired by
+// https://clang.llvm.org/docs/LanguageExtensions.html#include-file-checking-macros
+#  if defined(__has_include) && !__has_include(<tuple>)
+#   undef GTEST_HAS_STD_TUPLE_
+#  endif
+# elif defined(_MSC_VER)
+// Inspired by boost/config/stdlib/dinkumware.hpp
+#  if defined(_CPPLIB_VER) && _CPPLIB_VER < 520
+#   undef GTEST_HAS_STD_TUPLE_
+#  endif
+# elif defined(__GLIBCXX__)
+// Inspired by boost/config/stdlib/libstdcpp3.hpp,
+// http://gcc.gnu.org/gcc-4.2/changes.html and
+// https://web.archive.org/web/20140227044429/gcc.gnu.org/onlinedocs/libstdc++/manual/bk01pt01ch01.html#manual.intro.status.standard.200x
+#  if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2)
+#   undef GTEST_HAS_STD_TUPLE_
+#  endif
+# endif
+#endif
+
+// Brings in definitions for functions used in the testing::internal::posix
+// namespace (read, write, close, chdir, isatty, stat). We do not currently
+// use them on Windows Mobile.
+#if GTEST_OS_WINDOWS
+# if !GTEST_OS_WINDOWS_MOBILE
+#  include <direct.h>
+#  include <io.h>
+# endif
+// In order to avoid having to include <windows.h>, use forward declaration
+#if GTEST_OS_WINDOWS_MINGW && !defined(__MINGW64_VERSION_MAJOR)
+// MinGW defined _CRITICAL_SECTION and _RTL_CRITICAL_SECTION as two
+// separate (equivalent) structs, instead of using typedef
+typedef struct _CRITICAL_SECTION GTEST_CRITICAL_SECTION;
+#else
+// Assume CRITICAL_SECTION is a typedef of _RTL_CRITICAL_SECTION.
+// This assumption is verified by
+// WindowsTypesTest.CRITICAL_SECTIONIs_RTL_CRITICAL_SECTION.
+typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
+#endif
+#else
+// This assumes that non-Windows OSes provide unistd.h. For OSes where this
+// is not the case, we need to include headers that provide the functions
+// mentioned above.
+# include <unistd.h>
+# include <strings.h>
+#endif  // GTEST_OS_WINDOWS
+
+#if GTEST_OS_LINUX_ANDROID
+// Used to define __ANDROID_API__ matching the target NDK API level.
+#  include <android/api-level.h>  // NOLINT
+#endif
+
+// Defines this to true iff Google Test can use POSIX regular expressions.
+#ifndef GTEST_HAS_POSIX_RE
+# if GTEST_OS_LINUX_ANDROID
+// On Android, <regex.h> is only available starting with Gingerbread.
+#  define GTEST_HAS_POSIX_RE (__ANDROID_API__ >= 9)
+# else
+#  define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS)
+# endif
+#endif
+
+#if GTEST_USES_PCRE
+// The appropriate headers have already been included.
+
+#elif GTEST_HAS_POSIX_RE
+
+// On some platforms, <regex.h> needs someone to define size_t, and
+// won't compile otherwise.  We can #include it here as we already
+// included <stdlib.h>, which is guaranteed to define size_t through
+// <stddef.h>.
+# include <regex.h>  // NOLINT
+
+# define GTEST_USES_POSIX_RE 1
+
+#elif GTEST_OS_WINDOWS
+
+// <regex.h> is not available on Windows.  Use our own simple regex
+// implementation instead.
+# define GTEST_USES_SIMPLE_RE 1
+
+#else
+
+// <regex.h> may not be available on this platform.  Use our own
+// simple regex implementation instead.
+# define GTEST_USES_SIMPLE_RE 1
+
+#endif  // GTEST_USES_PCRE
+
+#ifndef GTEST_HAS_EXCEPTIONS
+// The user didn't tell us whether exceptions are enabled, so we need
+// to figure it out.
+# if defined(_MSC_VER) && defined(_CPPUNWIND)
+// MSVC defines _CPPUNWIND to 1 iff exceptions are enabled.
+#  define GTEST_HAS_EXCEPTIONS 1
+# elif defined(__BORLANDC__)
+// C++Builder's implementation of the STL uses the _HAS_EXCEPTIONS
+// macro to enable exceptions, so we'll do the same.
+// Assumes that exceptions are enabled by default.
+#  ifndef _HAS_EXCEPTIONS
+#   define _HAS_EXCEPTIONS 1
+#  endif  // _HAS_EXCEPTIONS
+#  define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS
+# elif defined(__clang__)
+// clang defines __EXCEPTIONS iff exceptions are enabled before clang 220714,
+// but iff cleanups are enabled after that. In Obj-C++ files, there can be
+// cleanups for ObjC exceptions which also need cleanups, even if C++ exceptions
+// are disabled. clang has __has_feature(cxx_exceptions) which checks for C++
+// exceptions starting at clang r206352, but which checked for cleanups prior to
+// that. To reliably check for C++ exception availability with clang, check for
+// __EXCEPTIONS && __has_feature(cxx_exceptions).
+#  define GTEST_HAS_EXCEPTIONS (__EXCEPTIONS && __has_feature(cxx_exceptions))
+# elif defined(__GNUC__) && __EXCEPTIONS
+// gcc defines __EXCEPTIONS to 1 iff exceptions are enabled.
+#  define GTEST_HAS_EXCEPTIONS 1
+# elif defined(__SUNPRO_CC)
+// Sun Pro CC supports exceptions.  However, there is no compile-time way of
+// detecting whether they are enabled or not.  Therefore, we assume that
+// they are enabled unless the user tells us otherwise.
+#  define GTEST_HAS_EXCEPTIONS 1
+# elif defined(__IBMCPP__) && __EXCEPTIONS
+// xlC defines __EXCEPTIONS to 1 iff exceptions are enabled.
+#  define GTEST_HAS_EXCEPTIONS 1
+# elif defined(__HP_aCC)
+// Exception handling is in effect by default in HP aCC compiler. It has to
+// be turned of by +noeh compiler option if desired.
+#  define GTEST_HAS_EXCEPTIONS 1
+# else
+// For other compilers, we assume exceptions are disabled to be
+// conservative.
+#  define GTEST_HAS_EXCEPTIONS 0
+# endif  // defined(_MSC_VER) || defined(__BORLANDC__)
+#endif  // GTEST_HAS_EXCEPTIONS
+
+#if !defined(GTEST_HAS_STD_STRING)
+// Even though we don't use this macro any longer, we keep it in case
+// some clients still depend on it.
+# define GTEST_HAS_STD_STRING 1
+#elif !GTEST_HAS_STD_STRING
+// The user told us that ::std::string isn't available.
+# error "::std::string isn't available."
+#endif  // !defined(GTEST_HAS_STD_STRING)
+
+#ifndef GTEST_HAS_GLOBAL_STRING
+# define GTEST_HAS_GLOBAL_STRING 0
+#endif  // GTEST_HAS_GLOBAL_STRING
+
+#ifndef GTEST_HAS_STD_WSTRING
+// The user didn't tell us whether ::std::wstring is available, so we need
+// to figure it out.
+// FIXME: uses autoconf to detect whether ::std::wstring
+//   is available.
+
+// Cygwin 1.7 and below doesn't support ::std::wstring.
+// Solaris' libc++ doesn't support it either.  Android has
+// no support for it at least as recent as Froyo (2.2).
+# define GTEST_HAS_STD_WSTRING \
+    (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS))
+
+#endif  // GTEST_HAS_STD_WSTRING
+
+#ifndef GTEST_HAS_GLOBAL_WSTRING
+// The user didn't tell us whether ::wstring is available, so we need
+// to figure it out.
+# define GTEST_HAS_GLOBAL_WSTRING \
+    (GTEST_HAS_STD_WSTRING && GTEST_HAS_GLOBAL_STRING)
+#endif  // GTEST_HAS_GLOBAL_WSTRING
+
+// Determines whether RTTI is available.
+#ifndef GTEST_HAS_RTTI
+// The user didn't tell us whether RTTI is enabled, so we need to
+// figure it out.
+
+# ifdef _MSC_VER
+
+#  ifdef _CPPRTTI  // MSVC defines this macro iff RTTI is enabled.
+#   define GTEST_HAS_RTTI 1
+#  else
+#   define GTEST_HAS_RTTI 0
+#  endif
+
+// Starting with version 4.3.2, gcc defines __GXX_RTTI iff RTTI is enabled.
+# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40302)
+
+#  ifdef __GXX_RTTI
+// When building against STLport with the Android NDK and with
+// -frtti -fno-exceptions, the build fails at link time with undefined
+// references to __cxa_bad_typeid. Note sure if STL or toolchain bug,
+// so disable RTTI when detected.
+#   if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR) && \
+       !defined(__EXCEPTIONS)
+#    define GTEST_HAS_RTTI 0
+#   else
+#    define GTEST_HAS_RTTI 1
+#   endif  // GTEST_OS_LINUX_ANDROID && __STLPORT_MAJOR && !__EXCEPTIONS
+#  else
+#   define GTEST_HAS_RTTI 0
+#  endif  // __GXX_RTTI
+
+// Clang defines __GXX_RTTI starting with version 3.0, but its manual recommends
+// using has_feature instead. has_feature(cxx_rtti) is supported since 2.7, the
+// first version with C++ support.
+# elif defined(__clang__)
+
+#  define GTEST_HAS_RTTI __has_feature(cxx_rtti)
+
+// Starting with version 9.0 IBM Visual Age defines __RTTI_ALL__ to 1 if
+// both the typeid and dynamic_cast features are present.
+# elif defined(__IBMCPP__) && (__IBMCPP__ >= 900)
+
+#  ifdef __RTTI_ALL__
+#   define GTEST_HAS_RTTI 1
+#  else
+#   define GTEST_HAS_RTTI 0
+#  endif
+
+# else
+
+// For all other compilers, we assume RTTI is enabled.
+#  define GTEST_HAS_RTTI 1
+
+# endif  // _MSC_VER
+
+#endif  // GTEST_HAS_RTTI
+
+// It's this header's responsibility to #include <typeinfo> when RTTI
+// is enabled.
+#if GTEST_HAS_RTTI
+# include <typeinfo>
+#endif
+
+// Determines whether Google Test can use the pthreads library.
+#ifndef GTEST_HAS_PTHREAD
+// The user didn't tell us explicitly, so we make reasonable assumptions about
+// which platforms have pthreads support.
+//
+// To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0
+// to your compiler flags.
+#define GTEST_HAS_PTHREAD                                             \
+  (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX || GTEST_OS_QNX || \
+   GTEST_OS_FREEBSD || GTEST_OS_NACL || GTEST_OS_NETBSD || GTEST_OS_FUCHSIA)
+#endif  // GTEST_HAS_PTHREAD
+
+#if GTEST_HAS_PTHREAD
+// gtest-port.h guarantees to #include <pthread.h> when GTEST_HAS_PTHREAD is
+// true.
+# include <pthread.h>  // NOLINT
+
+// For timespec and nanosleep, used below.
+# include <time.h>  // NOLINT
+#endif
+
+// Determines if hash_map/hash_set are available.
+// Only used for testing against those containers.
+#if !defined(GTEST_HAS_HASH_MAP_)
+# if defined(_MSC_VER) && (_MSC_VER < 1900)
+#  define GTEST_HAS_HASH_MAP_ 1  // Indicates that hash_map is available.
+#  define GTEST_HAS_HASH_SET_ 1  // Indicates that hash_set is available.
+# endif  // _MSC_VER
+#endif  // !defined(GTEST_HAS_HASH_MAP_)
+
+// Determines whether Google Test can use tr1/tuple.  You can define
+// this macro to 0 to prevent Google Test from using tuple (any
+// feature depending on tuple with be disabled in this mode).
+#ifndef GTEST_HAS_TR1_TUPLE
+# if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR)
+// STLport, provided with the Android NDK, has neither <tr1/tuple> or <tuple>.
+#  define GTEST_HAS_TR1_TUPLE 0
+# elif defined(_MSC_VER) && (_MSC_VER >= 1910)
+// Prevent `warning C4996: 'std::tr1': warning STL4002:
+// The non-Standard std::tr1 namespace and TR1-only machinery
+// are deprecated and will be REMOVED.`
+#  define GTEST_HAS_TR1_TUPLE 0
+# elif GTEST_LANG_CXX11 && defined(_LIBCPP_VERSION)
+// libc++ doesn't support TR1.
+#  define GTEST_HAS_TR1_TUPLE 0
+# else
+// The user didn't tell us not to do it, so we assume it's OK.
+#  define GTEST_HAS_TR1_TUPLE 1
+# endif
+#endif  // GTEST_HAS_TR1_TUPLE
+
+// Determines whether Google Test's own tr1 tuple implementation
+// should be used.
+#ifndef GTEST_USE_OWN_TR1_TUPLE
+// We use our own tuple implementation on Symbian.
+# if GTEST_OS_SYMBIAN
+#  define GTEST_USE_OWN_TR1_TUPLE 1
+# else
+// The user didn't tell us, so we need to figure it out.
+
+// We use our own TR1 tuple if we aren't sure the user has an
+// implementation of it already.  At this time, libstdc++ 4.0.0+ and
+// MSVC 2010 are the only mainstream standard libraries that come
+// with a TR1 tuple implementation.  NVIDIA's CUDA NVCC compiler
+// pretends to be GCC by defining __GNUC__ and friends, but cannot
+// compile GCC's tuple implementation.  MSVC 2008 (9.0) provides TR1
+// tuple in a 323 MB Feature Pack download, which we cannot assume the
+// user has.  QNX's QCC compiler is a modified GCC but it doesn't
+// support TR1 tuple.  libc++ only provides std::tuple, in C++11 mode,
+// and it can be used with some compilers that define __GNUC__.
+# if (defined(__GNUC__) && !defined(__CUDACC__) && (GTEST_GCC_VER_ >= 40000) \
+      && !GTEST_OS_QNX && !defined(_LIBCPP_VERSION)) \
+      || (_MSC_VER >= 1600 && _MSC_VER < 1900)
+#  define GTEST_ENV_HAS_TR1_TUPLE_ 1
+# endif
+
+// C++11 specifies that <tuple> provides std::tuple. Use that if gtest is used
+// in C++11 mode and libstdc++ isn't very old (binaries targeting OS X 10.6
+// can build with clang but need to use gcc4.2's libstdc++).
+# if GTEST_LANG_CXX11 && (!defined(__GLIBCXX__) || __GLIBCXX__ > 20110325)
+#  define GTEST_ENV_HAS_STD_TUPLE_ 1
+# endif
+
+# if GTEST_ENV_HAS_TR1_TUPLE_ || GTEST_ENV_HAS_STD_TUPLE_
+#  define GTEST_USE_OWN_TR1_TUPLE 0
+# else
+#  define GTEST_USE_OWN_TR1_TUPLE 1
+# endif
+# endif  // GTEST_OS_SYMBIAN
+#endif  // GTEST_USE_OWN_TR1_TUPLE
+
+// To avoid conditional compilation we make it gtest-port.h's responsibility
+// to #include the header implementing tuple.
+#if GTEST_HAS_STD_TUPLE_
+# include <tuple>  // IWYU pragma: export
+# define GTEST_TUPLE_NAMESPACE_ ::std
+#endif  // GTEST_HAS_STD_TUPLE_
+
+// We include tr1::tuple even if std::tuple is available to define printers for
+// them.
+#if GTEST_HAS_TR1_TUPLE
+# ifndef GTEST_TUPLE_NAMESPACE_
+#  define GTEST_TUPLE_NAMESPACE_ ::std::tr1
+# endif  // GTEST_TUPLE_NAMESPACE_
+
+# if GTEST_USE_OWN_TR1_TUPLE
+// This file was GENERATED by command:
+//     pump.py gtest-tuple.h.pump
+// DO NOT EDIT BY HAND!!!
+
+// Copyright 2009 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+// Implements a subset of TR1 tuple needed by Google Test and Google Mock.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
+
+#include <utility>  // For ::std::pair.
+
+// The compiler used in Symbian has a bug that prevents us from declaring the
+// tuple template as a friend (it complains that tuple is redefined).  This
+// bypasses the bug by declaring the members that should otherwise be
+// private as public.
+// Sun Studio versions < 12 also have the above bug.
+#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590)
+# define GTEST_DECLARE_TUPLE_AS_FRIEND_ public:
+#else
+# define GTEST_DECLARE_TUPLE_AS_FRIEND_ \
+    template <GTEST_10_TYPENAMES_(U)> friend class tuple; \
+   private:
+#endif
+
+// Visual Studio 2010, 2012, and 2013 define symbols in std::tr1 that conflict
+// with our own definitions. Therefore using our own tuple does not work on
+// those compilers.
+#if defined(_MSC_VER) && _MSC_VER >= 1600  /* 1600 is Visual Studio 2010 */
+# error "gtest's tuple doesn't compile on Visual Studio 2010 or later. \
+GTEST_USE_OWN_TR1_TUPLE must be set to 0 on those compilers."
+#endif
+
+// GTEST_n_TUPLE_(T) is the type of an n-tuple.
+#define GTEST_0_TUPLE_(T) tuple<>
+#define GTEST_1_TUPLE_(T) tuple<T##0, void, void, void, void, void, void, \
+    void, void, void>
+#define GTEST_2_TUPLE_(T) tuple<T##0, T##1, void, void, void, void, void, \
+    void, void, void>
+#define GTEST_3_TUPLE_(T) tuple<T##0, T##1, T##2, void, void, void, void, \
+    void, void, void>
+#define GTEST_4_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, void, void, void, \
+    void, void, void>
+#define GTEST_5_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, void, void, \
+    void, void, void>
+#define GTEST_6_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, void, \
+    void, void, void>
+#define GTEST_7_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
+    void, void, void>
+#define GTEST_8_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
+    T##7, void, void>
+#define GTEST_9_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
+    T##7, T##8, void>
+#define GTEST_10_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
+    T##7, T##8, T##9>
+
+// GTEST_n_TYPENAMES_(T) declares a list of n typenames.
+#define GTEST_0_TYPENAMES_(T)
+#define GTEST_1_TYPENAMES_(T) typename T##0
+#define GTEST_2_TYPENAMES_(T) typename T##0, typename T##1
+#define GTEST_3_TYPENAMES_(T) typename T##0, typename T##1, typename T##2
+#define GTEST_4_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+    typename T##3
+#define GTEST_5_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+    typename T##3, typename T##4
+#define GTEST_6_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+    typename T##3, typename T##4, typename T##5
+#define GTEST_7_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+    typename T##3, typename T##4, typename T##5, typename T##6
+#define GTEST_8_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+    typename T##3, typename T##4, typename T##5, typename T##6, typename T##7
+#define GTEST_9_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+    typename T##3, typename T##4, typename T##5, typename T##6, \
+    typename T##7, typename T##8
+#define GTEST_10_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+    typename T##3, typename T##4, typename T##5, typename T##6, \
+    typename T##7, typename T##8, typename T##9
+
+// In theory, defining stuff in the ::std namespace is undefined
+// behavior.  We can do this as we are playing the role of a standard
+// library vendor.
+namespace std {
+namespace tr1 {
+
+template <typename T0 = void, typename T1 = void, typename T2 = void,
+    typename T3 = void, typename T4 = void, typename T5 = void,
+    typename T6 = void, typename T7 = void, typename T8 = void,
+    typename T9 = void>
+class tuple;
+
+// Anything in namespace gtest_internal is Google Test's INTERNAL
+// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code.
+namespace gtest_internal {
+
+// ByRef<T>::type is T if T is a reference; otherwise it's const T&.
+template <typename T>
+struct ByRef { typedef const T& type; };  // NOLINT
+template <typename T>
+struct ByRef<T&> { typedef T& type; };  // NOLINT
+
+// A handy wrapper for ByRef.
+#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef<T>::type
+
+// AddRef<T>::type is T if T is a reference; otherwise it's T&.  This
+// is the same as tr1::add_reference<T>::type.
+template <typename T>
+struct AddRef { typedef T& type; };  // NOLINT
+template <typename T>
+struct AddRef<T&> { typedef T& type; };  // NOLINT
+
+// A handy wrapper for AddRef.
+#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef<T>::type
+
+// A helper for implementing get<k>().
+template <int k> class Get;
+
+// A helper for implementing tuple_element<k, T>.  kIndexValid is true
+// iff k < the number of fields in tuple type T.
+template <bool kIndexValid, int kIndex, class Tuple>
+struct TupleElement;
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 0, GTEST_10_TUPLE_(T) > {
+  typedef T0 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 1, GTEST_10_TUPLE_(T) > {
+  typedef T1 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 2, GTEST_10_TUPLE_(T) > {
+  typedef T2 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 3, GTEST_10_TUPLE_(T) > {
+  typedef T3 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 4, GTEST_10_TUPLE_(T) > {
+  typedef T4 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 5, GTEST_10_TUPLE_(T) > {
+  typedef T5 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 6, GTEST_10_TUPLE_(T) > {
+  typedef T6 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 7, GTEST_10_TUPLE_(T) > {
+  typedef T7 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 8, GTEST_10_TUPLE_(T) > {
+  typedef T8 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 9, GTEST_10_TUPLE_(T) > {
+  typedef T9 type;
+};
+
+}  // namespace gtest_internal
+
+template <>
+class tuple<> {
+ public:
+  tuple() {}
+  tuple(const tuple& /* t */)  {}
+  tuple& operator=(const tuple& /* t */) { return *this; }
+};
+
+template <GTEST_1_TYPENAMES_(T)>
+class GTEST_1_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0) : f0_(f0) {}
+
+  tuple(const tuple& t) : f0_(t.f0_) {}
+
+  template <GTEST_1_TYPENAMES_(U)>
+  tuple(const GTEST_1_TUPLE_(U)& t) : f0_(t.f0_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_1_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_1_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_1_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_1_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    return *this;
+  }
+
+  T0 f0_;
+};
+
+template <GTEST_2_TYPENAMES_(T)>
+class GTEST_2_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1) : f0_(f0),
+      f1_(f1) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_) {}
+
+  template <GTEST_2_TYPENAMES_(U)>
+  tuple(const GTEST_2_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_) {}
+  template <typename U0, typename U1>
+  tuple(const ::std::pair<U0, U1>& p) : f0_(p.first), f1_(p.second) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_2_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_2_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+  template <typename U0, typename U1>
+  tuple& operator=(const ::std::pair<U0, U1>& p) {
+    f0_ = p.first;
+    f1_ = p.second;
+    return *this;
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_2_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_2_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+};
+
+template <GTEST_3_TYPENAMES_(T)>
+class GTEST_3_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2) : f0_(f0), f1_(f1), f2_(f2) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}
+
+  template <GTEST_3_TYPENAMES_(U)>
+  tuple(const GTEST_3_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_3_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_3_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_3_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_3_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+};
+
+template <GTEST_4_TYPENAMES_(T)>
+class GTEST_4_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_(), f3_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3) : f0_(f0), f1_(f1), f2_(f2),
+      f3_(f3) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_) {}
+
+  template <GTEST_4_TYPENAMES_(U)>
+  tuple(const GTEST_4_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+      f3_(t.f3_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_4_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_4_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_4_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_4_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    f3_ = t.f3_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+  T3 f3_;
+};
+
+template <GTEST_5_TYPENAMES_(T)>
+class GTEST_5_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_(), f3_(), f4_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3,
+      GTEST_BY_REF_(T4) f4) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+      f4_(t.f4_) {}
+
+  template <GTEST_5_TYPENAMES_(U)>
+  tuple(const GTEST_5_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+      f3_(t.f3_), f4_(t.f4_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_5_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_5_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_5_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_5_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    f3_ = t.f3_;
+    f4_ = t.f4_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+  T3 f3_;
+  T4 f4_;
+};
+
+template <GTEST_6_TYPENAMES_(T)>
+class GTEST_6_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+      GTEST_BY_REF_(T5) f5) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
+      f5_(f5) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+      f4_(t.f4_), f5_(t.f5_) {}
+
+  template <GTEST_6_TYPENAMES_(U)>
+  tuple(const GTEST_6_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_6_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_6_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_6_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_6_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    f3_ = t.f3_;
+    f4_ = t.f4_;
+    f5_ = t.f5_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+  T3 f3_;
+  T4 f4_;
+  T5 f5_;
+};
+
+template <GTEST_7_TYPENAMES_(T)>
+class GTEST_7_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6) : f0_(f0), f1_(f1), f2_(f2),
+      f3_(f3), f4_(f4), f5_(f5), f6_(f6) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}
+
+  template <GTEST_7_TYPENAMES_(U)>
+  tuple(const GTEST_7_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_7_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_7_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_7_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_7_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    f3_ = t.f3_;
+    f4_ = t.f4_;
+    f5_ = t.f5_;
+    f6_ = t.f6_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+  T3 f3_;
+  T4 f4_;
+  T5 f5_;
+  T6 f6_;
+};
+
+template <GTEST_8_TYPENAMES_(T)>
+class GTEST_8_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6,
+      GTEST_BY_REF_(T7) f7) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
+      f5_(f5), f6_(f6), f7_(f7) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}
+
+  template <GTEST_8_TYPENAMES_(U)>
+  tuple(const GTEST_8_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_8_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_8_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_8_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_8_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    f3_ = t.f3_;
+    f4_ = t.f4_;
+    f5_ = t.f5_;
+    f6_ = t.f6_;
+    f7_ = t.f7_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+  T3 f3_;
+  T4 f4_;
+  T5 f5_;
+  T6 f6_;
+  T7 f7_;
+};
+
+template <GTEST_9_TYPENAMES_(T)>
+class GTEST_9_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
+      GTEST_BY_REF_(T8) f8) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
+      f5_(f5), f6_(f6), f7_(f7), f8_(f8) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}
+
+  template <GTEST_9_TYPENAMES_(U)>
+  tuple(const GTEST_9_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_9_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_9_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_9_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_9_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    f3_ = t.f3_;
+    f4_ = t.f4_;
+    f5_ = t.f5_;
+    f6_ = t.f6_;
+    f7_ = t.f7_;
+    f8_ = t.f8_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+  T3 f3_;
+  T4 f4_;
+  T5 f5_;
+  T6 f6_;
+  T7 f7_;
+  T8 f8_;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+class tuple {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_(),
+      f9_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
+      GTEST_BY_REF_(T8) f8, GTEST_BY_REF_(T9) f9) : f0_(f0), f1_(f1), f2_(f2),
+      f3_(f3), f4_(f4), f5_(f5), f6_(f6), f7_(f7), f8_(f8), f9_(f9) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), f9_(t.f9_) {}
+
+  template <GTEST_10_TYPENAMES_(U)>
+  tuple(const GTEST_10_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_),
+      f9_(t.f9_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_10_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_10_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_10_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_10_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    f3_ = t.f3_;
+    f4_ = t.f4_;
+    f5_ = t.f5_;
+    f6_ = t.f6_;
+    f7_ = t.f7_;
+    f8_ = t.f8_;
+    f9_ = t.f9_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+  T3 f3_;
+  T4 f4_;
+  T5 f5_;
+  T6 f6_;
+  T7 f7_;
+  T8 f8_;
+  T9 f9_;
+};
+
+// 6.1.3.2 Tuple creation functions.
+
+// Known limitations: we don't support passing an
+// std::tr1::reference_wrapper<T> to make_tuple().  And we don't
+// implement tie().
+
+inline tuple<> make_tuple() { return tuple<>(); }
+
+template <GTEST_1_TYPENAMES_(T)>
+inline GTEST_1_TUPLE_(T) make_tuple(const T0& f0) {
+  return GTEST_1_TUPLE_(T)(f0);
+}
+
+template <GTEST_2_TYPENAMES_(T)>
+inline GTEST_2_TUPLE_(T) make_tuple(const T0& f0, const T1& f1) {
+  return GTEST_2_TUPLE_(T)(f0, f1);
+}
+
+template <GTEST_3_TYPENAMES_(T)>
+inline GTEST_3_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2) {
+  return GTEST_3_TUPLE_(T)(f0, f1, f2);
+}
+
+template <GTEST_4_TYPENAMES_(T)>
+inline GTEST_4_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+    const T3& f3) {
+  return GTEST_4_TUPLE_(T)(f0, f1, f2, f3);
+}
+
+template <GTEST_5_TYPENAMES_(T)>
+inline GTEST_5_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+    const T3& f3, const T4& f4) {
+  return GTEST_5_TUPLE_(T)(f0, f1, f2, f3, f4);
+}
+
+template <GTEST_6_TYPENAMES_(T)>
+inline GTEST_6_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+    const T3& f3, const T4& f4, const T5& f5) {
+  return GTEST_6_TUPLE_(T)(f0, f1, f2, f3, f4, f5);
+}
+
+template <GTEST_7_TYPENAMES_(T)>
+inline GTEST_7_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+    const T3& f3, const T4& f4, const T5& f5, const T6& f6) {
+  return GTEST_7_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6);
+}
+
+template <GTEST_8_TYPENAMES_(T)>
+inline GTEST_8_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7) {
+  return GTEST_8_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7);
+}
+
+template <GTEST_9_TYPENAMES_(T)>
+inline GTEST_9_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
+    const T8& f8) {
+  return GTEST_9_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8);
+}
+
+template <GTEST_10_TYPENAMES_(T)>
+inline GTEST_10_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
+    const T8& f8, const T9& f9) {
+  return GTEST_10_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
+}
+
+// 6.1.3.3 Tuple helper classes.
+
+template <typename Tuple> struct tuple_size;
+
+template <GTEST_0_TYPENAMES_(T)>
+struct tuple_size<GTEST_0_TUPLE_(T) > {
+  static const int value = 0;
+};
+
+template <GTEST_1_TYPENAMES_(T)>
+struct tuple_size<GTEST_1_TUPLE_(T) > {
+  static const int value = 1;
+};
+
+template <GTEST_2_TYPENAMES_(T)>
+struct tuple_size<GTEST_2_TUPLE_(T) > {
+  static const int value = 2;
+};
+
+template <GTEST_3_TYPENAMES_(T)>
+struct tuple_size<GTEST_3_TUPLE_(T) > {
+  static const int value = 3;
+};
+
+template <GTEST_4_TYPENAMES_(T)>
+struct tuple_size<GTEST_4_TUPLE_(T) > {
+  static const int value = 4;
+};
+
+template <GTEST_5_TYPENAMES_(T)>
+struct tuple_size<GTEST_5_TUPLE_(T) > {
+  static const int value = 5;
+};
+
+template <GTEST_6_TYPENAMES_(T)>
+struct tuple_size<GTEST_6_TUPLE_(T) > {
+  static const int value = 6;
+};
+
+template <GTEST_7_TYPENAMES_(T)>
+struct tuple_size<GTEST_7_TUPLE_(T) > {
+  static const int value = 7;
+};
+
+template <GTEST_8_TYPENAMES_(T)>
+struct tuple_size<GTEST_8_TUPLE_(T) > {
+  static const int value = 8;
+};
+
+template <GTEST_9_TYPENAMES_(T)>
+struct tuple_size<GTEST_9_TUPLE_(T) > {
+  static const int value = 9;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct tuple_size<GTEST_10_TUPLE_(T) > {
+  static const int value = 10;
+};
+
+template <int k, class Tuple>
+struct tuple_element {
+  typedef typename gtest_internal::TupleElement<
+      k < (tuple_size<Tuple>::value), k, Tuple>::type type;
+};
+
+#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element<k, Tuple >::type
+
+// 6.1.3.4 Element access.
+
+namespace gtest_internal {
+
+template <>
+class Get<0> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
+  Field(Tuple& t) { return t.f0_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
+  ConstField(const Tuple& t) { return t.f0_; }
+};
+
+template <>
+class Get<1> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
+  Field(Tuple& t) { return t.f1_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
+  ConstField(const Tuple& t) { return t.f1_; }
+};
+
+template <>
+class Get<2> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
+  Field(Tuple& t) { return t.f2_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
+  ConstField(const Tuple& t) { return t.f2_; }
+};
+
+template <>
+class Get<3> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
+  Field(Tuple& t) { return t.f3_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
+  ConstField(const Tuple& t) { return t.f3_; }
+};
+
+template <>
+class Get<4> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
+  Field(Tuple& t) { return t.f4_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
+  ConstField(const Tuple& t) { return t.f4_; }
+};
+
+template <>
+class Get<5> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
+  Field(Tuple& t) { return t.f5_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
+  ConstField(const Tuple& t) { return t.f5_; }
+};
+
+template <>
+class Get<6> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
+  Field(Tuple& t) { return t.f6_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
+  ConstField(const Tuple& t) { return t.f6_; }
+};
+
+template <>
+class Get<7> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
+  Field(Tuple& t) { return t.f7_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
+  ConstField(const Tuple& t) { return t.f7_; }
+};
+
+template <>
+class Get<8> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
+  Field(Tuple& t) { return t.f8_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
+  ConstField(const Tuple& t) { return t.f8_; }
+};
+
+template <>
+class Get<9> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
+  Field(Tuple& t) { return t.f9_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
+  ConstField(const Tuple& t) { return t.f9_; }
+};
+
+}  // namespace gtest_internal
+
+template <int k, GTEST_10_TYPENAMES_(T)>
+GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T)))
+get(GTEST_10_TUPLE_(T)& t) {
+  return gtest_internal::Get<k>::Field(t);
+}
+
+template <int k, GTEST_10_TYPENAMES_(T)>
+GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k,  GTEST_10_TUPLE_(T)))
+get(const GTEST_10_TUPLE_(T)& t) {
+  return gtest_internal::Get<k>::ConstField(t);
+}
+
+// 6.1.3.5 Relational operators
+
+// We only implement == and !=, as we don't have a need for the rest yet.
+
+namespace gtest_internal {
+
+// SameSizeTuplePrefixComparator<k, k>::Eq(t1, t2) returns true if the
+// first k fields of t1 equals the first k fields of t2.
+// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if
+// k1 != k2.
+template <int kSize1, int kSize2>
+struct SameSizeTuplePrefixComparator;
+
+template <>
+struct SameSizeTuplePrefixComparator<0, 0> {
+  template <class Tuple1, class Tuple2>
+  static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) {
+    return true;
+  }
+};
+
+template <int k>
+struct SameSizeTuplePrefixComparator<k, k> {
+  template <class Tuple1, class Tuple2>
+  static bool Eq(const Tuple1& t1, const Tuple2& t2) {
+    return SameSizeTuplePrefixComparator<k - 1, k - 1>::Eq(t1, t2) &&
+        ::std::tr1::get<k - 1>(t1) == ::std::tr1::get<k - 1>(t2);
+  }
+};
+
+}  // namespace gtest_internal
+
+template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
+inline bool operator==(const GTEST_10_TUPLE_(T)& t,
+                       const GTEST_10_TUPLE_(U)& u) {
+  return gtest_internal::SameSizeTuplePrefixComparator<
+      tuple_size<GTEST_10_TUPLE_(T) >::value,
+      tuple_size<GTEST_10_TUPLE_(U) >::value>::Eq(t, u);
+}
+
+template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
+inline bool operator!=(const GTEST_10_TUPLE_(T)& t,
+                       const GTEST_10_TUPLE_(U)& u) { return !(t == u); }
+
+// 6.1.4 Pairs.
+// Unimplemented.
+
+}  // namespace tr1
+}  // namespace std
+
+#undef GTEST_0_TUPLE_
+#undef GTEST_1_TUPLE_
+#undef GTEST_2_TUPLE_
+#undef GTEST_3_TUPLE_
+#undef GTEST_4_TUPLE_
+#undef GTEST_5_TUPLE_
+#undef GTEST_6_TUPLE_
+#undef GTEST_7_TUPLE_
+#undef GTEST_8_TUPLE_
+#undef GTEST_9_TUPLE_
+#undef GTEST_10_TUPLE_
+
+#undef GTEST_0_TYPENAMES_
+#undef GTEST_1_TYPENAMES_
+#undef GTEST_2_TYPENAMES_
+#undef GTEST_3_TYPENAMES_
+#undef GTEST_4_TYPENAMES_
+#undef GTEST_5_TYPENAMES_
+#undef GTEST_6_TYPENAMES_
+#undef GTEST_7_TYPENAMES_
+#undef GTEST_8_TYPENAMES_
+#undef GTEST_9_TYPENAMES_
+#undef GTEST_10_TYPENAMES_
+
+#undef GTEST_DECLARE_TUPLE_AS_FRIEND_
+#undef GTEST_BY_REF_
+#undef GTEST_ADD_REF_
+#undef GTEST_TUPLE_ELEMENT_
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
+# elif GTEST_OS_SYMBIAN
+
+// On Symbian, BOOST_HAS_TR1_TUPLE causes Boost's TR1 tuple library to
+// use STLport's tuple implementation, which unfortunately doesn't
+// work as the copy of STLport distributed with Symbian is incomplete.
+// By making sure BOOST_HAS_TR1_TUPLE is undefined, we force Boost to
+// use its own tuple implementation.
+#  ifdef BOOST_HAS_TR1_TUPLE
+#   undef BOOST_HAS_TR1_TUPLE
+#  endif  // BOOST_HAS_TR1_TUPLE
+
+// This prevents <boost/tr1/detail/config.hpp>, which defines
+// BOOST_HAS_TR1_TUPLE, from being #included by Boost's <tuple>.
+#  define BOOST_TR1_DETAIL_CONFIG_HPP_INCLUDED
+#  include <tuple>  // IWYU pragma: export  // NOLINT
+
+# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40000)
+// GCC 4.0+ implements tr1/tuple in the <tr1/tuple> header.  This does
+// not conform to the TR1 spec, which requires the header to be <tuple>.
+
+#  if !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302
+// Until version 4.3.2, gcc has a bug that causes <tr1/functional>,
+// which is #included by <tr1/tuple>, to not compile when RTTI is
+// disabled.  _TR1_FUNCTIONAL is the header guard for
+// <tr1/functional>.  Hence the following #define is used to prevent
+// <tr1/functional> from being included.
+#   define _TR1_FUNCTIONAL 1
+#   include <tr1/tuple>
+#   undef _TR1_FUNCTIONAL  // Allows the user to #include
+                        // <tr1/functional> if they choose to.
+#  else
+#   include <tr1/tuple>  // NOLINT
+#  endif  // !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302
+
+// VS 2010 now has tr1 support.
+# elif _MSC_VER >= 1600
+#  include <tuple>  // IWYU pragma: export  // NOLINT
+
+# else  // GTEST_USE_OWN_TR1_TUPLE
+#  include <tr1/tuple>  // IWYU pragma: export  // NOLINT
+# endif  // GTEST_USE_OWN_TR1_TUPLE
+
+#endif  // GTEST_HAS_TR1_TUPLE
+
+// Determines whether clone(2) is supported.
+// Usually it will only be available on Linux, excluding
+// Linux on the Itanium architecture.
+// Also see http://linux.die.net/man/2/clone.
+#ifndef GTEST_HAS_CLONE
+// The user didn't tell us, so we need to figure it out.
+
+# if GTEST_OS_LINUX && !defined(__ia64__)
+#  if GTEST_OS_LINUX_ANDROID
+// On Android, clone() became available at different API levels for each 32-bit
+// architecture.
+#    if defined(__LP64__) || \
+        (defined(__arm__) && __ANDROID_API__ >= 9) || \
+        (defined(__mips__) && __ANDROID_API__ >= 12) || \
+        (defined(__i386__) && __ANDROID_API__ >= 17)
+#     define GTEST_HAS_CLONE 1
+#    else
+#     define GTEST_HAS_CLONE 0
+#    endif
+#  else
+#   define GTEST_HAS_CLONE 1
+#  endif
+# else
+#  define GTEST_HAS_CLONE 0
+# endif  // GTEST_OS_LINUX && !defined(__ia64__)
+
+#endif  // GTEST_HAS_CLONE
+
+// Determines whether to support stream redirection. This is used to test
+// output correctness and to implement death tests.
+#ifndef GTEST_HAS_STREAM_REDIRECTION
+// By default, we assume that stream redirection is supported on all
+// platforms except known mobile ones.
+# if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || \
+    GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT
+#  define GTEST_HAS_STREAM_REDIRECTION 0
+# else
+#  define GTEST_HAS_STREAM_REDIRECTION 1
+# endif  // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_SYMBIAN
+#endif  // GTEST_HAS_STREAM_REDIRECTION
+
+// Determines whether to support death tests.
+// Google Test does not support death tests for VC 7.1 and earlier as
+// abort() in a VC 7.1 application compiled as GUI in debug config
+// pops up a dialog window that cannot be suppressed programmatically.
+#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS ||   \
+     (GTEST_OS_MAC && !GTEST_OS_IOS) ||                         \
+     (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER >= 1400) ||          \
+     GTEST_OS_WINDOWS_MINGW || GTEST_OS_AIX || GTEST_OS_HPUX || \
+     GTEST_OS_OPENBSD || GTEST_OS_QNX || GTEST_OS_FREEBSD || \
+     GTEST_OS_NETBSD || GTEST_OS_FUCHSIA)
+# define GTEST_HAS_DEATH_TEST 1
+#endif
+
+// Determines whether to support type-driven tests.
+
+// Typed tests need <typeinfo> and variadic macros, which GCC, VC++ 8.0,
+// Sun Pro CC, IBM Visual Age, and HP aCC support.
+#if defined(__GNUC__) || (_MSC_VER >= 1400) || defined(__SUNPRO_CC) || \
+    defined(__IBMCPP__) || defined(__HP_aCC)
+# define GTEST_HAS_TYPED_TEST 1
+# define GTEST_HAS_TYPED_TEST_P 1
+#endif
+
+// Determines whether to support Combine(). This only makes sense when
+// value-parameterized tests are enabled.  The implementation doesn't
+// work on Sun Studio since it doesn't understand templated conversion
+// operators.
+#if (GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_) && !defined(__SUNPRO_CC)
+# define GTEST_HAS_COMBINE 1
+#endif
+
+// Determines whether the system compiler uses UTF-16 for encoding wide strings.
+#define GTEST_WIDE_STRING_USES_UTF16_ \
+    (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_SYMBIAN || GTEST_OS_AIX)
+
+// Determines whether test results can be streamed to a socket.
+#if GTEST_OS_LINUX
+# define GTEST_CAN_STREAM_RESULTS_ 1
+#endif
+
+// Defines some utility macros.
+
+// The GNU compiler emits a warning if nested "if" statements are followed by
+// an "else" statement and braces are not used to explicitly disambiguate the
+// "else" binding.  This leads to problems with code like:
+//
+//   if (gate)
+//     ASSERT_*(condition) << "Some message";
+//
+// The "switch (0) case 0:" idiom is used to suppress this.
+#ifdef __INTEL_COMPILER
+# define GTEST_AMBIGUOUS_ELSE_BLOCKER_
+#else
+# define GTEST_AMBIGUOUS_ELSE_BLOCKER_ switch (0) case 0: default:  // NOLINT
+#endif
+
+// Use this annotation at the end of a struct/class definition to
+// prevent the compiler from optimizing away instances that are never
+// used.  This is useful when all interesting logic happens inside the
+// c'tor and / or d'tor.  Example:
+//
+//   struct Foo {
+//     Foo() { ... }
+//   } GTEST_ATTRIBUTE_UNUSED_;
+//
+// Also use it after a variable or parameter declaration to tell the
+// compiler the variable/parameter does not have to be used.
+#if defined(__GNUC__) && !defined(COMPILER_ICC)
+# define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused))
+#elif defined(__clang__)
+# if __has_attribute(unused)
+#  define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused))
+# endif
+#endif
+#ifndef GTEST_ATTRIBUTE_UNUSED_
+# define GTEST_ATTRIBUTE_UNUSED_
+#endif
+
+#if GTEST_LANG_CXX11
+# define GTEST_CXX11_EQUALS_DELETE_ = delete
+#else  // GTEST_LANG_CXX11
+# define GTEST_CXX11_EQUALS_DELETE_
+#endif  // GTEST_LANG_CXX11
+
+// Use this annotation before a function that takes a printf format string.
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(COMPILER_ICC)
+# if defined(__MINGW_PRINTF_FORMAT)
+// MinGW has two different printf implementations. Ensure the format macro
+// matches the selected implementation. See
+// https://sourceforge.net/p/mingw-w64/wiki2/gnu%20printf/.
+#  define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) \
+       __attribute__((__format__(__MINGW_PRINTF_FORMAT, string_index, \
+                                 first_to_check)))
+# else
+#  define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) \
+       __attribute__((__format__(__printf__, string_index, first_to_check)))
+# endif
+#else
+# define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check)
+#endif
+
+
+// A macro to disallow operator=
+// This should be used in the private: declarations for a class.
+#define GTEST_DISALLOW_ASSIGN_(type) \
+  void operator=(type const &) GTEST_CXX11_EQUALS_DELETE_
+
+// A macro to disallow copy constructor and operator=
+// This should be used in the private: declarations for a class.
+#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type) \
+  type(type const &) GTEST_CXX11_EQUALS_DELETE_; \
+  GTEST_DISALLOW_ASSIGN_(type)
+
+// Tell the compiler to warn about unused return values for functions declared
+// with this macro.  The macro should be used on function declarations
+// following the argument list:
+//
+//   Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_;
+#if defined(__GNUC__) && (GTEST_GCC_VER_ >= 30400) && !defined(COMPILER_ICC)
+# define GTEST_MUST_USE_RESULT_ __attribute__ ((warn_unused_result))
+#else
+# define GTEST_MUST_USE_RESULT_
+#endif  // __GNUC__ && (GTEST_GCC_VER_ >= 30400) && !COMPILER_ICC
+
+// MS C++ compiler emits warning when a conditional expression is compile time
+// constant. In some contexts this warning is false positive and needs to be
+// suppressed. Use the following two macros in such cases:
+//
+// GTEST_INTENTIONAL_CONST_COND_PUSH_()
+// while (true) {
+// GTEST_INTENTIONAL_CONST_COND_POP_()
+// }
+# define GTEST_INTENTIONAL_CONST_COND_PUSH_() \
+    GTEST_DISABLE_MSC_WARNINGS_PUSH_(4127)
+# define GTEST_INTENTIONAL_CONST_COND_POP_() \
+    GTEST_DISABLE_MSC_WARNINGS_POP_()
+
+// Determine whether the compiler supports Microsoft's Structured Exception
+// Handling.  This is supported by several Windows compilers but generally
+// does not exist on any other system.
+#ifndef GTEST_HAS_SEH
+// The user didn't tell us, so we need to figure it out.
+
+# if defined(_MSC_VER) || defined(__BORLANDC__)
+// These two compilers are known to support SEH.
+#  define GTEST_HAS_SEH 1
+# else
+// Assume no SEH.
+#  define GTEST_HAS_SEH 0
+# endif
+
+#define GTEST_IS_THREADSAFE \
+    (GTEST_HAS_MUTEX_AND_THREAD_LOCAL_ \
+     || (GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT) \
+     || GTEST_HAS_PTHREAD)
+
+#endif  // GTEST_HAS_SEH
+
+// GTEST_API_ qualifies all symbols that must be exported. The definitions below
+// are guarded by #ifndef to give embedders a chance to define GTEST_API_ in
+// gtest/internal/custom/gtest-port.h
+#ifndef GTEST_API_
+
+#ifdef _MSC_VER
+# if GTEST_LINKED_AS_SHARED_LIBRARY
+#  define GTEST_API_ __declspec(dllimport)
+# elif GTEST_CREATE_SHARED_LIBRARY
+#  define GTEST_API_ __declspec(dllexport)
+# endif
+#elif __GNUC__ >= 4 || defined(__clang__)
+# define GTEST_API_ __attribute__((visibility ("default")))
+#endif  // _MSC_VER
+
+#endif  // GTEST_API_
+
+#ifndef GTEST_API_
+# define GTEST_API_
+#endif  // GTEST_API_
+
+#ifndef GTEST_DEFAULT_DEATH_TEST_STYLE
+# define GTEST_DEFAULT_DEATH_TEST_STYLE  "fast"
+#endif  // GTEST_DEFAULT_DEATH_TEST_STYLE
+
+#ifdef __GNUC__
+// Ask the compiler to never inline a given function.
+# define GTEST_NO_INLINE_ __attribute__((noinline))
+#else
+# define GTEST_NO_INLINE_
+#endif
+
+// _LIBCPP_VERSION is defined by the libc++ library from the LLVM project.
+#if !defined(GTEST_HAS_CXXABI_H_)
+# if defined(__GLIBCXX__) || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))
+#  define GTEST_HAS_CXXABI_H_ 1
+# else
+#  define GTEST_HAS_CXXABI_H_ 0
+# endif
+#endif
+
+// A function level attribute to disable checking for use of uninitialized
+// memory when built with MemorySanitizer.
+#if defined(__clang__)
+# if __has_feature(memory_sanitizer)
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ \
+       __attribute__((no_sanitize_memory))
+# else
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
+# endif  // __has_feature(memory_sanitizer)
+#else
+# define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
+#endif  // __clang__
+
+// A function level attribute to disable AddressSanitizer instrumentation.
+#if defined(__clang__)
+# if __has_feature(address_sanitizer)
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ \
+       __attribute__((no_sanitize_address))
+# else
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+# endif  // __has_feature(address_sanitizer)
+#else
+# define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+#endif  // __clang__
+
+// A function level attribute to disable ThreadSanitizer instrumentation.
+#if defined(__clang__)
+# if __has_feature(thread_sanitizer)
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ \
+       __attribute__((no_sanitize_thread))
+# else
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
+# endif  // __has_feature(thread_sanitizer)
+#else
+# define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
+#endif  // __clang__
+
+namespace testing {
+
+class Message;
+
+#if defined(GTEST_TUPLE_NAMESPACE_)
+// Import tuple and friends into the ::testing namespace.
+// It is part of our interface, having them in ::testing allows us to change
+// their types as needed.
+using GTEST_TUPLE_NAMESPACE_::get;
+using GTEST_TUPLE_NAMESPACE_::make_tuple;
+using GTEST_TUPLE_NAMESPACE_::tuple;
+using GTEST_TUPLE_NAMESPACE_::tuple_size;
+using GTEST_TUPLE_NAMESPACE_::tuple_element;
+#endif  // defined(GTEST_TUPLE_NAMESPACE_)
+
+namespace internal {
+
+// A secret type that Google Test users don't know about.  It has no
+// definition on purpose.  Therefore it's impossible to create a
+// Secret object, which is what we want.
+class Secret;
+
+// The GTEST_COMPILE_ASSERT_ macro can be used to verify that a compile time
+// expression is true. For example, you could use it to verify the
+// size of a static array:
+//
+//   GTEST_COMPILE_ASSERT_(GTEST_ARRAY_SIZE_(names) == NUM_NAMES,
+//                         names_incorrect_size);
+//
+// or to make sure a struct is smaller than a certain size:
+//
+//   GTEST_COMPILE_ASSERT_(sizeof(foo) < 128, foo_too_large);
+//
+// The second argument to the macro is the name of the variable. If
+// the expression is false, most compilers will issue a warning/error
+// containing the name of the variable.
+
+#if GTEST_LANG_CXX11
+# define GTEST_COMPILE_ASSERT_(expr, msg) static_assert(expr, #msg)
+#else  // !GTEST_LANG_CXX11
+template <bool>
+  struct CompileAssert {
+};
+
+# define GTEST_COMPILE_ASSERT_(expr, msg) \
+  typedef ::testing::internal::CompileAssert<(static_cast<bool>(expr))> \
+      msg[static_cast<bool>(expr) ? 1 : -1] GTEST_ATTRIBUTE_UNUSED_
+#endif  // !GTEST_LANG_CXX11
+
+// Implementation details of GTEST_COMPILE_ASSERT_:
+//
+// (In C++11, we simply use static_assert instead of the following)
+//
+// - GTEST_COMPILE_ASSERT_ works by defining an array type that has -1
+//   elements (and thus is invalid) when the expression is false.
+//
+// - The simpler definition
+//
+//    #define GTEST_COMPILE_ASSERT_(expr, msg) typedef char msg[(expr) ? 1 : -1]
+//
+//   does not work, as gcc supports variable-length arrays whose sizes
+//   are determined at run-time (this is gcc's extension and not part
+//   of the C++ standard).  As a result, gcc fails to reject the
+//   following code with the simple definition:
+//
+//     int foo;
+//     GTEST_COMPILE_ASSERT_(foo, msg); // not supposed to compile as foo is
+//                                      // not a compile-time constant.
+//
+// - By using the type CompileAssert<(bool(expr))>, we ensures that
+//   expr is a compile-time constant.  (Template arguments must be
+//   determined at compile-time.)
+//
+// - The outter parentheses in CompileAssert<(bool(expr))> are necessary
+//   to work around a bug in gcc 3.4.4 and 4.0.1.  If we had written
+//
+//     CompileAssert<bool(expr)>
+//
+//   instead, these compilers will refuse to compile
+//
+//     GTEST_COMPILE_ASSERT_(5 > 0, some_message);
+//
+//   (They seem to think the ">" in "5 > 0" marks the end of the
+//   template argument list.)
+//
+// - The array size is (bool(expr) ? 1 : -1), instead of simply
+//
+//     ((expr) ? 1 : -1).
+//
+//   This is to avoid running into a bug in MS VC 7.1, which
+//   causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
+
+// StaticAssertTypeEqHelper is used by StaticAssertTypeEq defined in gtest.h.
+//
+// This template is declared, but intentionally undefined.
+template <typename T1, typename T2>
+struct StaticAssertTypeEqHelper;
+
+template <typename T>
+struct StaticAssertTypeEqHelper<T, T> {
+  enum { value = true };
+};
+
+// Same as std::is_same<>.
+template <typename T, typename U>
+struct IsSame {
+  enum { value = false };
+};
+template <typename T>
+struct IsSame<T, T> {
+  enum { value = true };
+};
+
+// Evaluates to the number of elements in 'array'.
+#define GTEST_ARRAY_SIZE_(array) (sizeof(array) / sizeof(array[0]))
+
+#if GTEST_HAS_GLOBAL_STRING
+typedef ::string string;
+#else
+typedef ::std::string string;
+#endif  // GTEST_HAS_GLOBAL_STRING
+
+#if GTEST_HAS_GLOBAL_WSTRING
+typedef ::wstring wstring;
+#elif GTEST_HAS_STD_WSTRING
+typedef ::std::wstring wstring;
+#endif  // GTEST_HAS_GLOBAL_WSTRING
+
+// A helper for suppressing warnings on constant condition.  It just
+// returns 'condition'.
+GTEST_API_ bool IsTrue(bool condition);
+
+// Defines scoped_ptr.
+
+// RocksDB: use unique_ptr to work around some clang-analyze false reports
+template <typename T>
+using scoped_ptr = std::unique_ptr<T>;
+/*
+// This implementation of scoped_ptr is PARTIAL - it only contains
+// enough stuff to satisfy Google Test's need.
+template <typename T>
+class scoped_ptr {
+ public:
+  typedef T element_type;
+
+  explicit scoped_ptr(T* p = NULL) : ptr_(p) {}
+  ~scoped_ptr() { reset(); }
+
+  T& operator*() const { return *ptr_; }
+  T* operator->() const { return ptr_; }
+  T* get() const { return ptr_; }
+
+  T* release() {
+    T* const ptr = ptr_;
+    ptr_ = NULL;
+    return ptr;
+  }
+
+  void reset(T* p = NULL) {
+    if (p != ptr_) {
+      if (IsTrue(sizeof(T) > 0)) {  // Makes sure T is a complete type.
+        delete ptr_;
+      }
+      ptr_ = p;
+    }
+  }
+
+  friend void swap(scoped_ptr& a, scoped_ptr& b) {
+    using std::swap;
+    swap(a.ptr_, b.ptr_);
+  }
+
+ private:
+  T* ptr_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr);
+};
+*/
+
+// Defines RE.
+
+#if GTEST_USES_PCRE
+// if used, PCRE is injected by custom/gtest-port.h
+#elif GTEST_USES_POSIX_RE || GTEST_USES_SIMPLE_RE
+
+// A simple C++ wrapper for <regex.h>.  It uses the POSIX Extended
+// Regular Expression syntax.
+class GTEST_API_ RE {
+ public:
+  // A copy constructor is required by the Standard to initialize object
+  // references from r-values.
+  RE(const RE& other) { Init(other.pattern()); }
+
+  // Constructs an RE from a string.
+  RE(const ::std::string& regex) { Init(regex.c_str()); }  // NOLINT
+
+# if GTEST_HAS_GLOBAL_STRING
+
+  RE(const ::string& regex) { Init(regex.c_str()); }  // NOLINT
+
+# endif  // GTEST_HAS_GLOBAL_STRING
+
+  RE(const char* regex) { Init(regex); }  // NOLINT
+  ~RE();
+
+  // Returns the string representation of the regex.
+  const char* pattern() const { return pattern_; }
+
+  // FullMatch(str, re) returns true iff regular expression re matches
+  // the entire str.
+  // PartialMatch(str, re) returns true iff regular expression re
+  // matches a substring of str (including str itself).
+  //
+  // FIXME: make FullMatch() and PartialMatch() work
+  // when str contains NUL characters.
+  static bool FullMatch(const ::std::string& str, const RE& re) {
+    return FullMatch(str.c_str(), re);
+  }
+  static bool PartialMatch(const ::std::string& str, const RE& re) {
+    return PartialMatch(str.c_str(), re);
+  }
+
+# if GTEST_HAS_GLOBAL_STRING
+
+  static bool FullMatch(const ::string& str, const RE& re) {
+    return FullMatch(str.c_str(), re);
+  }
+  static bool PartialMatch(const ::string& str, const RE& re) {
+    return PartialMatch(str.c_str(), re);
+  }
+
+# endif  // GTEST_HAS_GLOBAL_STRING
+
+  static bool FullMatch(const char* str, const RE& re);
+  static bool PartialMatch(const char* str, const RE& re);
+
+ private:
+  void Init(const char* regex);
+
+  // We use a const char* instead of an std::string, as Google Test used to be
+  // used where std::string is not available.  FIXME: change to
+  // std::string.
+  const char* pattern_;
+  bool is_valid_;
+
+# if GTEST_USES_POSIX_RE
+
+  regex_t full_regex_;     // For FullMatch().
+  regex_t partial_regex_;  // For PartialMatch().
+
+# else  // GTEST_USES_SIMPLE_RE
+
+  const char* full_pattern_;  // For FullMatch();
+
+# endif
+
+  GTEST_DISALLOW_ASSIGN_(RE);
+};
+
+#endif  // GTEST_USES_PCRE
+
+// Formats a source file path and a line number as they would appear
+// in an error message from the compiler used to compile this code.
+GTEST_API_ ::std::string FormatFileLocation(const char* file, int line);
+
+// Formats a file location for compiler-independent XML output.
+// Although this function is not platform dependent, we put it next to
+// FormatFileLocation in order to contrast the two functions.
+GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file,
+                                                               int line);
+
+// Defines logging utilities:
+//   GTEST_LOG_(severity) - logs messages at the specified severity level. The
+//                          message itself is streamed into the macro.
+//   LogToStderr()  - directs all log messages to stderr.
+//   FlushInfoLog() - flushes informational log messages.
+
+enum GTestLogSeverity {
+  GTEST_INFO,
+  GTEST_WARNING,
+  GTEST_ERROR,
+  GTEST_FATAL
+};
+
+// Formats log entry severity, provides a stream object for streaming the
+// log message, and terminates the message with a newline when going out of
+// scope.
+class GTEST_API_ GTestLog {
+ public:
+  GTestLog(GTestLogSeverity severity, const char* file, int line);
+
+  // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
+  ~GTestLog();
+
+  ::std::ostream& GetStream() { return ::std::cerr; }
+
+ private:
+  const GTestLogSeverity severity_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestLog);
+};
+
+#if !defined(GTEST_LOG_)
+
+# define GTEST_LOG_(severity) \
+    ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \
+                                  __FILE__, __LINE__).GetStream()
+
+inline void LogToStderr() {}
+inline void FlushInfoLog() { fflush(NULL); }
+
+#endif  // !defined(GTEST_LOG_)
+
+#if !defined(GTEST_CHECK_)
+// INTERNAL IMPLEMENTATION - DO NOT USE.
+//
+// GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition
+// is not satisfied.
+//  Synopsys:
+//    GTEST_CHECK_(boolean_condition);
+//     or
+//    GTEST_CHECK_(boolean_condition) << "Additional message";
+//
+//    This checks the condition and if the condition is not satisfied
+//    it prints message about the condition violation, including the
+//    condition itself, plus additional message streamed into it, if any,
+//    and then it aborts the program. It aborts the program irrespective of
+//    whether it is built in the debug mode or not.
+# define GTEST_CHECK_(condition) \
+    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+    if (::testing::internal::IsTrue(condition)) \
+      ; \
+    else \
+      GTEST_LOG_(FATAL) << "Condition " #condition " failed. "
+#endif  // !defined(GTEST_CHECK_)
+
+// An all-mode assert to verify that the given POSIX-style function
+// call returns 0 (indicating success).  Known limitation: this
+// doesn't expand to a balanced 'if' statement, so enclose the macro
+// in {} if you need to use it as the only statement in an 'if'
+// branch.
+#define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \
+  if (const int gtest_error = (posix_call)) \
+    GTEST_LOG_(FATAL) << #posix_call << "failed with error " \
+                      << gtest_error
+
+// Adds reference to a type if it is not a reference type,
+// otherwise leaves it unchanged.  This is the same as
+// tr1::add_reference, which is not widely available yet.
+template <typename T>
+struct AddReference { typedef T& type; };  // NOLINT
+template <typename T>
+struct AddReference<T&> { typedef T& type; };  // NOLINT
+
+// A handy wrapper around AddReference that works when the argument T
+// depends on template parameters.
+#define GTEST_ADD_REFERENCE_(T) \
+    typename ::testing::internal::AddReference<T>::type
+
+// Transforms "T" into "const T&" according to standard reference collapsing
+// rules (this is only needed as a backport for C++98 compilers that do not
+// support reference collapsing). Specifically, it transforms:
+//
+//   char         ==> const char&
+//   const char   ==> const char&
+//   char&        ==> char&
+//   const char&  ==> const char&
+//
+// Note that the non-const reference will not have "const" added. This is
+// standard, and necessary so that "T" can always bind to "const T&".
+template <typename T>
+struct ConstRef { typedef const T& type; };
+template <typename T>
+struct ConstRef<T&> { typedef T& type; };
+
+// The argument T must depend on some template parameters.
+#define GTEST_REFERENCE_TO_CONST_(T) \
+  typename ::testing::internal::ConstRef<T>::type
+
+#if GTEST_HAS_STD_MOVE_
+using std::forward;
+using std::move;
+
+template <typename T>
+struct RvalueRef {
+  typedef T&& type;
+};
+#else  // GTEST_HAS_STD_MOVE_
+template <typename T>
+const T& move(const T& t) {
+  return t;
+}
+template <typename T>
+GTEST_ADD_REFERENCE_(T) forward(GTEST_ADD_REFERENCE_(T) t) { return t; }
+
+template <typename T>
+struct RvalueRef {
+  typedef const T& type;
+};
+#endif  // GTEST_HAS_STD_MOVE_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Use ImplicitCast_ as a safe version of static_cast for upcasting in
+// the type hierarchy (e.g. casting a Foo* to a SuperclassOfFoo* or a
+// const Foo*).  When you use ImplicitCast_, the compiler checks that
+// the cast is safe.  Such explicit ImplicitCast_s are necessary in
+// surprisingly many situations where C++ demands an exact type match
+// instead of an argument type convertable to a target type.
+//
+// The syntax for using ImplicitCast_ is the same as for static_cast:
+//
+//   ImplicitCast_<ToType>(expr)
+//
+// ImplicitCast_ would have been part of the C++ standard library,
+// but the proposal was submitted too late.  It will probably make
+// its way into the language in the future.
+//
+// This relatively ugly name is intentional. It prevents clashes with
+// similar functions users may have (e.g., implicit_cast). The internal
+// namespace alone is not enough because the function can be found by ADL.
+template<typename To>
+inline To ImplicitCast_(To x) { return x; }
+
+// When you upcast (that is, cast a pointer from type Foo to type
+// SuperclassOfFoo), it's fine to use ImplicitCast_<>, since upcasts
+// always succeed.  When you downcast (that is, cast a pointer from
+// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because
+// how do you know the pointer is really of type SubclassOfFoo?  It
+// could be a bare Foo, or of type DifferentSubclassOfFoo.  Thus,
+// when you downcast, you should use this macro.  In debug mode, we
+// use dynamic_cast<> to double-check the downcast is legal (we die
+// if it's not).  In normal mode, we do the efficient static_cast<>
+// instead.  Thus, it's important to test in debug mode to make sure
+// the cast is legal!
+//    This is the only place in the code we should use dynamic_cast<>.
+// In particular, you SHOULDN'T be using dynamic_cast<> in order to
+// do RTTI (eg code like this:
+//    if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo);
+//    if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo);
+// You should design the code some other way not to need this.
+//
+// This relatively ugly name is intentional. It prevents clashes with
+// similar functions users may have (e.g., down_cast). The internal
+// namespace alone is not enough because the function can be found by ADL.
+template<typename To, typename From>  // use like this: DownCast_<T*>(foo);
+inline To DownCast_(From* f) {  // so we only accept pointers
+  // Ensures that To is a sub-type of From *.  This test is here only
+  // for compile-time type checking, and has no overhead in an
+  // optimized build at run-time, as it will be optimized away
+  // completely.
+  GTEST_INTENTIONAL_CONST_COND_PUSH_()
+  if (false) {
+  GTEST_INTENTIONAL_CONST_COND_POP_()
+    const To to = NULL;
+    ::testing::internal::ImplicitCast_<From*>(to);
+  }
+
+#if GTEST_HAS_RTTI
+  // RTTI: debug mode only!
+  GTEST_CHECK_(f == NULL || dynamic_cast<To>(f) != NULL);
+#endif
+  return static_cast<To>(f);
+}
+
+// Downcasts the pointer of type Base to Derived.
+// Derived must be a subclass of Base. The parameter MUST
+// point to a class of type Derived, not any subclass of it.
+// When RTTI is available, the function performs a runtime
+// check to enforce this.
+template <class Derived, class Base>
+Derived* CheckedDowncastToActualType(Base* base) {
+#if GTEST_HAS_RTTI
+  GTEST_CHECK_(typeid(*base) == typeid(Derived));
+#endif
+
+#if GTEST_HAS_DOWNCAST_
+  return ::down_cast<Derived*>(base);
+#elif GTEST_HAS_RTTI
+  return dynamic_cast<Derived*>(base);  // NOLINT
+#else
+  return static_cast<Derived*>(base);  // Poor man's downcast.
+#endif
+}
+
+#if GTEST_HAS_STREAM_REDIRECTION
+
+// Defines the stderr capturer:
+//   CaptureStdout     - starts capturing stdout.
+//   GetCapturedStdout - stops capturing stdout and returns the captured string.
+//   CaptureStderr     - starts capturing stderr.
+//   GetCapturedStderr - stops capturing stderr and returns the captured string.
+//
+GTEST_API_ void CaptureStdout();
+GTEST_API_ std::string GetCapturedStdout();
+GTEST_API_ void CaptureStderr();
+GTEST_API_ std::string GetCapturedStderr();
+
+#endif  // GTEST_HAS_STREAM_REDIRECTION
+// Returns the size (in bytes) of a file.
+GTEST_API_ size_t GetFileSize(FILE* file);
+
+// Reads the entire content of a file as a string.
+GTEST_API_ std::string ReadEntireFile(FILE* file);
+
+// All command line arguments.
+GTEST_API_ std::vector<std::string> GetArgvs();
+
+#if GTEST_HAS_DEATH_TEST
+
+std::vector<std::string> GetInjectableArgvs();
+// Deprecated: pass the args vector by value instead.
+void SetInjectableArgvs(const std::vector<std::string>* new_argvs);
+void SetInjectableArgvs(const std::vector<std::string>& new_argvs);
+#if GTEST_HAS_GLOBAL_STRING
+void SetInjectableArgvs(const std::vector< ::string>& new_argvs);
+#endif  // GTEST_HAS_GLOBAL_STRING
+void ClearInjectableArgvs();
+
+#endif  // GTEST_HAS_DEATH_TEST
+
+// Defines synchronization primitives.
+#if GTEST_IS_THREADSAFE
+# if GTEST_HAS_PTHREAD
+// Sleeps for (roughly) n milliseconds.  This function is only for testing
+// Google Test's own constructs.  Don't use it in user tests, either
+// directly or indirectly.
+inline void SleepMilliseconds(int n) {
+  const timespec time = {
+    0,                  // 0 seconds.
+    n * 1000L * 1000L,  // And n ms.
+  };
+  nanosleep(&time, NULL);
+}
+# endif  // GTEST_HAS_PTHREAD
+
+# if GTEST_HAS_NOTIFICATION_
+// Notification has already been imported into the namespace.
+// Nothing to do here.
+
+# elif GTEST_HAS_PTHREAD
+// Allows a controller thread to pause execution of newly created
+// threads until notified.  Instances of this class must be created
+// and destroyed in the controller thread.
+//
+// This class is only for testing Google Test's own constructs. Do not
+// use it in user tests, either directly or indirectly.
+class Notification {
+ public:
+  Notification() : notified_(false) {
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL));
+  }
+  ~Notification() {
+    pthread_mutex_destroy(&mutex_);
+  }
+
+  // Notifies all threads created with this notification to start. Must
+  // be called from the controller thread.
+  void Notify() {
+    pthread_mutex_lock(&mutex_);
+    notified_ = true;
+    pthread_mutex_unlock(&mutex_);
+  }
+
+  // Blocks until the controller thread notifies. Must be called from a test
+  // thread.
+  void WaitForNotification() {
+    for (;;) {
+      pthread_mutex_lock(&mutex_);
+      const bool notified = notified_;
+      pthread_mutex_unlock(&mutex_);
+      if (notified)
+        break;
+      SleepMilliseconds(10);
+    }
+  }
+
+ private:
+  pthread_mutex_t mutex_;
+  bool notified_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
+};
+
+# elif GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
+
+GTEST_API_ void SleepMilliseconds(int n);
+
+// Provides leak-safe Windows kernel handle ownership.
+// Used in death tests and in threading support.
+class GTEST_API_ AutoHandle {
+ public:
+  // Assume that Win32 HANDLE type is equivalent to void*. Doing so allows us to
+  // avoid including <windows.h> in this header file. Including <windows.h> is
+  // undesirable because it defines a lot of symbols and macros that tend to
+  // conflict with client code. This assumption is verified by
+  // WindowsTypesTest.HANDLEIsVoidStar.
+  typedef void* Handle;
+  AutoHandle();
+  explicit AutoHandle(Handle handle);
+
+  ~AutoHandle();
+
+  Handle Get() const;
+  void Reset();
+  void Reset(Handle handle);
+
+ private:
+  // Returns true iff the handle is a valid handle object that can be closed.
+  bool IsCloseable() const;
+
+  Handle handle_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(AutoHandle);
+};
+
+// Allows a controller thread to pause execution of newly created
+// threads until notified.  Instances of this class must be created
+// and destroyed in the controller thread.
+//
+// This class is only for testing Google Test's own constructs. Do not
+// use it in user tests, either directly or indirectly.
+class GTEST_API_ Notification {
+ public:
+  Notification();
+  void Notify();
+  void WaitForNotification();
+
+ private:
+  AutoHandle event_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
+};
+# endif  // GTEST_HAS_NOTIFICATION_
+
+// On MinGW, we can have both GTEST_OS_WINDOWS and GTEST_HAS_PTHREAD
+// defined, but we don't want to use MinGW's pthreads implementation, which
+// has conformance problems with some versions of the POSIX standard.
+# if GTEST_HAS_PTHREAD && !GTEST_OS_WINDOWS_MINGW
+
+// As a C-function, ThreadFuncWithCLinkage cannot be templated itself.
+// Consequently, it cannot select a correct instantiation of ThreadWithParam
+// in order to call its Run(). Introducing ThreadWithParamBase as a
+// non-templated base class for ThreadWithParam allows us to bypass this
+// problem.
+class ThreadWithParamBase {
+ public:
+  virtual ~ThreadWithParamBase() {}
+  virtual void Run() = 0;
+};
+
+// pthread_create() accepts a pointer to a function type with the C linkage.
+// According to the Standard (7.5/1), function types with different linkages
+// are different even if they are otherwise identical.  Some compilers (for
+// example, SunStudio) treat them as different types.  Since class methods
+// cannot be defined with C-linkage we need to define a free C-function to
+// pass into pthread_create().
+extern "C" inline void* ThreadFuncWithCLinkage(void* thread) {
+  static_cast<ThreadWithParamBase*>(thread)->Run();
+  return NULL;
+}
+
+// Helper class for testing Google Test's multi-threading constructs.
+// To use it, write:
+//
+//   void ThreadFunc(int param) { /* Do things with param */ }
+//   Notification thread_can_start;
+//   ...
+//   // The thread_can_start parameter is optional; you can supply NULL.
+//   ThreadWithParam<int> thread(&ThreadFunc, 5, &thread_can_start);
+//   thread_can_start.Notify();
+//
+// These classes are only for testing Google Test's own constructs. Do
+// not use them in user tests, either directly or indirectly.
+template <typename T>
+class ThreadWithParam : public ThreadWithParamBase {
+ public:
+  typedef void UserThreadFunc(T);
+
+  ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start)
+      : func_(func),
+        param_(param),
+        thread_can_start_(thread_can_start),
+        finished_(false) {
+    ThreadWithParamBase* const base = this;
+    // The thread can be created only after all fields except thread_
+    // have been initialized.
+    GTEST_CHECK_POSIX_SUCCESS_(
+        pthread_create(&thread_, 0, &ThreadFuncWithCLinkage, base));
+  }
+  ~ThreadWithParam() { Join(); }
+
+  void Join() {
+    if (!finished_) {
+      GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, 0));
+      finished_ = true;
+    }
+  }
+
+  virtual void Run() {
+    if (thread_can_start_ != NULL)
+      thread_can_start_->WaitForNotification();
+    func_(param_);
+  }
+
+ private:
+  UserThreadFunc* const func_;  // User-supplied thread function.
+  const T param_;  // User-supplied parameter to the thread function.
+  // When non-NULL, used to block execution until the controller thread
+  // notifies.
+  Notification* const thread_can_start_;
+  bool finished_;  // true iff we know that the thread function has finished.
+  pthread_t thread_;  // The native thread object.
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
+};
+# endif  // !GTEST_OS_WINDOWS && GTEST_HAS_PTHREAD ||
+         // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
+
+# if GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
+// Mutex and ThreadLocal have already been imported into the namespace.
+// Nothing to do here.
+
+# elif GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
+
+// Mutex implements mutex on Windows platforms.  It is used in conjunction
+// with class MutexLock:
+//
+//   Mutex mutex;
+//   ...
+//   MutexLock lock(&mutex);  // Acquires the mutex and releases it at the
+//                            // end of the current scope.
+//
+// A static Mutex *must* be defined or declared using one of the following
+// macros:
+//   GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex);
+//   GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex);
+//
+// (A non-static Mutex is defined/declared in the usual way).
+class GTEST_API_ Mutex {
+ public:
+  enum MutexType { kStatic = 0, kDynamic = 1 };
+  // We rely on kStaticMutex being 0 as it is to what the linker initializes
+  // type_ in static mutexes.  critical_section_ will be initialized lazily
+  // in ThreadSafeLazyInit().
+  enum StaticConstructorSelector { kStaticMutex = 0 };
+
+  // This constructor intentionally does nothing.  It relies on type_ being
+  // statically initialized to 0 (effectively setting it to kStatic) and on
+  // ThreadSafeLazyInit() to lazily initialize the rest of the members.
+  explicit Mutex(StaticConstructorSelector /*dummy*/) {}
+
+  Mutex();
+  ~Mutex();
+
+  void Lock();
+
+  void Unlock();
+
+  // Does nothing if the current thread holds the mutex. Otherwise, crashes
+  // with high probability.
+  void AssertHeld();
+
+ private:
+  // Initializes owner_thread_id_ and critical_section_ in static mutexes.
+  void ThreadSafeLazyInit();
+
+  // Per https://blogs.msdn.microsoft.com/oldnewthing/20040223-00/?p=40503,
+  // we assume that 0 is an invalid value for thread IDs.
+  unsigned int owner_thread_id_;
+
+  // For static mutexes, we rely on these members being initialized to zeros
+  // by the linker.
+  MutexType type_;
+  long critical_section_init_phase_;  // NOLINT
+  GTEST_CRITICAL_SECTION* critical_section_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
+};
+
+# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
+    extern ::testing::internal::Mutex mutex
+
+# define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
+    ::testing::internal::Mutex mutex(::testing::internal::Mutex::kStaticMutex)
+
+// We cannot name this class MutexLock because the ctor declaration would
+// conflict with a macro named MutexLock, which is defined on some
+// platforms. That macro is used as a defensive measure to prevent against
+// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
+// "MutexLock l(&mu)".  Hence the typedef trick below.
+class GTestMutexLock {
+ public:
+  explicit GTestMutexLock(Mutex* mutex)
+      : mutex_(mutex) { mutex_->Lock(); }
+
+  ~GTestMutexLock() { mutex_->Unlock(); }
+
+ private:
+  Mutex* const mutex_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
+};
+
+typedef GTestMutexLock MutexLock;
+
+// Base class for ValueHolder<T>.  Allows a caller to hold and delete a value
+// without knowing its type.
+class ThreadLocalValueHolderBase {
+ public:
+  virtual ~ThreadLocalValueHolderBase() {}
+};
+
+// Provides a way for a thread to send notifications to a ThreadLocal
+// regardless of its parameter type.
+class ThreadLocalBase {
+ public:
+  // Creates a new ValueHolder<T> object holding a default value passed to
+  // this ThreadLocal<T>'s constructor and returns it.  It is the caller's
+  // responsibility not to call this when the ThreadLocal<T> instance already
+  // has a value on the current thread.
+  virtual ThreadLocalValueHolderBase* NewValueForCurrentThread() const = 0;
+
+ protected:
+  ThreadLocalBase() {}
+  virtual ~ThreadLocalBase() {}
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocalBase);
+};
+
+// Maps a thread to a set of ThreadLocals that have values instantiated on that
+// thread and notifies them when the thread exits.  A ThreadLocal instance is
+// expected to persist until all threads it has values on have terminated.
+class GTEST_API_ ThreadLocalRegistry {
+ public:
+  // Registers thread_local_instance as having value on the current thread.
+  // Returns a value that can be used to identify the thread from other threads.
+  static ThreadLocalValueHolderBase* GetValueOnCurrentThread(
+      const ThreadLocalBase* thread_local_instance);
+
+  // Invoked when a ThreadLocal instance is destroyed.
+  static void OnThreadLocalDestroyed(
+      const ThreadLocalBase* thread_local_instance);
+};
+
+class GTEST_API_ ThreadWithParamBase {
+ public:
+  void Join();
+
+ protected:
+  class Runnable {
+   public:
+    virtual ~Runnable() {}
+    virtual void Run() = 0;
+  };
+
+  ThreadWithParamBase(Runnable *runnable, Notification* thread_can_start);
+  virtual ~ThreadWithParamBase();
+
+ private:
+  AutoHandle thread_;
+};
+
+// Helper class for testing Google Test's multi-threading constructs.
+template <typename T>
+class ThreadWithParam : public ThreadWithParamBase {
+ public:
+  typedef void UserThreadFunc(T);
+
+  ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start)
+      : ThreadWithParamBase(new RunnableImpl(func, param), thread_can_start) {
+  }
+  virtual ~ThreadWithParam() {}
+
+ private:
+  class RunnableImpl : public Runnable {
+   public:
+    RunnableImpl(UserThreadFunc* func, T param)
+        : func_(func),
+          param_(param) {
+    }
+    virtual ~RunnableImpl() {}
+    virtual void Run() {
+      func_(param_);
+    }
+
+   private:
+    UserThreadFunc* const func_;
+    const T param_;
+
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(RunnableImpl);
+  };
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
+};
+
+// Implements thread-local storage on Windows systems.
+//
+//   // Thread 1
+//   ThreadLocal<int> tl(100);  // 100 is the default value for each thread.
+//
+//   // Thread 2
+//   tl.set(150);  // Changes the value for thread 2 only.
+//   EXPECT_EQ(150, tl.get());
+//
+//   // Thread 1
+//   EXPECT_EQ(100, tl.get());  // In thread 1, tl has the original value.
+//   tl.set(200);
+//   EXPECT_EQ(200, tl.get());
+//
+// The template type argument T must have a public copy constructor.
+// In addition, the default ThreadLocal constructor requires T to have
+// a public default constructor.
+//
+// The users of a TheadLocal instance have to make sure that all but one
+// threads (including the main one) using that instance have exited before
+// destroying it. Otherwise, the per-thread objects managed for them by the
+// ThreadLocal instance are not guaranteed to be destroyed on all platforms.
+//
+// Google Test only uses global ThreadLocal objects.  That means they
+// will die after main() has returned.  Therefore, no per-thread
+// object managed by Google Test will be leaked as long as all threads
+// using Google Test have exited when main() returns.
+template <typename T>
+class ThreadLocal : public ThreadLocalBase {
+ public:
+  ThreadLocal() : default_factory_(new DefaultValueHolderFactory()) {}
+  explicit ThreadLocal(const T& value)
+      : default_factory_(new InstanceValueHolderFactory(value)) {}
+
+  ~ThreadLocal() { ThreadLocalRegistry::OnThreadLocalDestroyed(this); }
+
+  T* pointer() { return GetOrCreateValue(); }
+  const T* pointer() const { return GetOrCreateValue(); }
+  const T& get() const { return *pointer(); }
+  void set(const T& value) { *pointer() = value; }
+
+ private:
+  // Holds a value of T.  Can be deleted via its base class without the caller
+  // knowing the type of T.
+  class ValueHolder : public ThreadLocalValueHolderBase {
+   public:
+    ValueHolder() : value_() {}
+    explicit ValueHolder(const T& value) : value_(value) {}
+
+    T* pointer() { return &value_; }
+
+   private:
+    T value_;
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
+  };
+
+
+  T* GetOrCreateValue() const {
+    return static_cast<ValueHolder*>(
+        ThreadLocalRegistry::GetValueOnCurrentThread(this))->pointer();
+  }
+
+  virtual ThreadLocalValueHolderBase* NewValueForCurrentThread() const {
+    return default_factory_->MakeNewHolder();
+  }
+
+  class ValueHolderFactory {
+   public:
+    ValueHolderFactory() {}
+    virtual ~ValueHolderFactory() {}
+    virtual ValueHolder* MakeNewHolder() const = 0;
+
+   private:
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolderFactory);
+  };
+
+  class DefaultValueHolderFactory : public ValueHolderFactory {
+   public:
+    DefaultValueHolderFactory() {}
+    virtual ValueHolder* MakeNewHolder() const { return new ValueHolder(); }
+
+   private:
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultValueHolderFactory);
+  };
+
+  class InstanceValueHolderFactory : public ValueHolderFactory {
+   public:
+    explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
+    virtual ValueHolder* MakeNewHolder() const {
+      return new ValueHolder(value_);
+    }
+
+   private:
+    const T value_;  // The value for each thread.
+
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(InstanceValueHolderFactory);
+  };
+
+  scoped_ptr<ValueHolderFactory> default_factory_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
+};
+
+# elif GTEST_HAS_PTHREAD
+
+// MutexBase and Mutex implement mutex on pthreads-based platforms.
+class MutexBase {
+ public:
+  // Acquires this mutex.
+  void Lock() {
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_));
+    owner_ = pthread_self();
+    has_owner_ = true;
+  }
+
+  // Releases this mutex.
+  void Unlock() {
+    // Since the lock is being released the owner_ field should no longer be
+    // considered valid. We don't protect writing to has_owner_ here, as it's
+    // the caller's responsibility to ensure that the current thread holds the
+    // mutex when this is called.
+    has_owner_ = false;
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_));
+  }
+
+  // Does nothing if the current thread holds the mutex. Otherwise, crashes
+  // with high probability.
+  void AssertHeld() const {
+    GTEST_CHECK_(has_owner_ && pthread_equal(owner_, pthread_self()))
+        << "The current thread is not holding the mutex @" << this;
+  }
+
+  // A static mutex may be used before main() is entered.  It may even
+  // be used before the dynamic initialization stage.  Therefore we
+  // must be able to initialize a static mutex object at link time.
+  // This means MutexBase has to be a POD and its member variables
+  // have to be public.
+ public:
+  pthread_mutex_t mutex_;  // The underlying pthread mutex.
+  // has_owner_ indicates whether the owner_ field below contains a valid thread
+  // ID and is therefore safe to inspect (e.g., to use in pthread_equal()). All
+  // accesses to the owner_ field should be protected by a check of this field.
+  // An alternative might be to memset() owner_ to all zeros, but there's no
+  // guarantee that a zero'd pthread_t is necessarily invalid or even different
+  // from pthread_self().
+  bool has_owner_;
+  pthread_t owner_;  // The thread holding the mutex.
+};
+
+// Forward-declares a static mutex.
+#  define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
+     extern ::testing::internal::MutexBase mutex
+
+// Defines and statically (i.e. at link time) initializes a static mutex.
+// The initialization list here does not explicitly initialize each field,
+// instead relying on default initialization for the unspecified fields. In
+// particular, the owner_ field (a pthread_t) is not explicitly initialized.
+// This allows initialization to work whether pthread_t is a scalar or struct.
+// The flag -Wmissing-field-initializers must not be specified for this to work.
+#define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
+  ::testing::internal::MutexBase mutex = {PTHREAD_MUTEX_INITIALIZER, false, 0}
+
+// The Mutex class can only be used for mutexes created at runtime. It
+// shares its API with MutexBase otherwise.
+class Mutex : public MutexBase {
+ public:
+  Mutex() {
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL));
+    has_owner_ = false;
+  }
+  ~Mutex() {
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_));
+  }
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
+};
+
+// We cannot name this class MutexLock because the ctor declaration would
+// conflict with a macro named MutexLock, which is defined on some
+// platforms. That macro is used as a defensive measure to prevent against
+// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
+// "MutexLock l(&mu)".  Hence the typedef trick below.
+class GTestMutexLock {
+ public:
+  explicit GTestMutexLock(MutexBase* mutex)
+      : mutex_(mutex) { mutex_->Lock(); }
+
+  ~GTestMutexLock() { mutex_->Unlock(); }
+
+ private:
+  MutexBase* const mutex_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
+};
+
+typedef GTestMutexLock MutexLock;
+
+// Helpers for ThreadLocal.
+
+// pthread_key_create() requires DeleteThreadLocalValue() to have
+// C-linkage.  Therefore it cannot be templatized to access
+// ThreadLocal<T>.  Hence the need for class
+// ThreadLocalValueHolderBase.
+class ThreadLocalValueHolderBase {
+ public:
+  virtual ~ThreadLocalValueHolderBase() {}
+};
+
+// Called by pthread to delete thread-local data stored by
+// pthread_setspecific().
+extern "C" inline void DeleteThreadLocalValue(void* value_holder) {
+  delete static_cast<ThreadLocalValueHolderBase*>(value_holder);
+}
+
+// Implements thread-local storage on pthreads-based systems.
+template <typename T>
+class GTEST_API_ ThreadLocal {
+ public:
+  ThreadLocal()
+      : key_(CreateKey()), default_factory_(new DefaultValueHolderFactory()) {}
+  explicit ThreadLocal(const T& value)
+      : key_(CreateKey()),
+        default_factory_(new InstanceValueHolderFactory(value)) {}
+
+  ~ThreadLocal() {
+    // Destroys the managed object for the current thread, if any.
+    DeleteThreadLocalValue(pthread_getspecific(key_));
+
+    // Releases resources associated with the key.  This will *not*
+    // delete managed objects for other threads.
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_));
+  }
+
+  T* pointer() { return GetOrCreateValue(); }
+  const T* pointer() const { return GetOrCreateValue(); }
+  const T& get() const { return *pointer(); }
+  void set(const T& value) { *pointer() = value; }
+
+ private:
+  // Holds a value of type T.
+  class ValueHolder : public ThreadLocalValueHolderBase {
+   public:
+    ValueHolder() : value_() {}
+    explicit ValueHolder(const T& value) : value_(value) {}
+
+    T* pointer() { return &value_; }
+
+   private:
+    T value_;
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
+  };
+
+  static pthread_key_t CreateKey() {
+    pthread_key_t key;
+    // When a thread exits, DeleteThreadLocalValue() will be called on
+    // the object managed for that thread.
+    GTEST_CHECK_POSIX_SUCCESS_(
+        pthread_key_create(&key, &DeleteThreadLocalValue));
+    return key;
+  }
+
+  T* GetOrCreateValue() const {
+    ThreadLocalValueHolderBase* const holder =
+        static_cast<ThreadLocalValueHolderBase*>(pthread_getspecific(key_));
+    if (holder != NULL) {
+      return CheckedDowncastToActualType<ValueHolder>(holder)->pointer();
+    }
+
+    ValueHolder* const new_holder = default_factory_->MakeNewHolder();
+    ThreadLocalValueHolderBase* const holder_base = new_holder;
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base));
+    return new_holder->pointer();
+  }
+
+  class ValueHolderFactory {
+   public:
+    ValueHolderFactory() {}
+    virtual ~ValueHolderFactory() {}
+    virtual ValueHolder* MakeNewHolder() const = 0;
+
+   private:
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolderFactory);
+  };
+
+  class DefaultValueHolderFactory : public ValueHolderFactory {
+   public:
+    DefaultValueHolderFactory() {}
+    virtual ValueHolder* MakeNewHolder() const { return new ValueHolder(); }
+
+   private:
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultValueHolderFactory);
+  };
+
+  class InstanceValueHolderFactory : public ValueHolderFactory {
+   public:
+    explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
+    virtual ValueHolder* MakeNewHolder() const {
+      return new ValueHolder(value_);
+    }
+
+   private:
+    const T value_;  // The value for each thread.
+
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(InstanceValueHolderFactory);
+  };
+
+  // A key pthreads uses for looking up per-thread values.
+  const pthread_key_t key_;
+  scoped_ptr<ValueHolderFactory> default_factory_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
+};
+
+# endif  // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
+
+#else  // GTEST_IS_THREADSAFE
+
+// A dummy implementation of synchronization primitives (mutex, lock,
+// and thread-local variable).  Necessary for compiling Google Test where
+// mutex is not supported - using Google Test in multiple threads is not
+// supported on such platforms.
+
+class Mutex {
+ public:
+  Mutex() {}
+  void Lock() {}
+  void Unlock() {}
+  void AssertHeld() const {}
+};
+
+# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
+  extern ::testing::internal::Mutex mutex
+
+# define GTEST_DEFINE_STATIC_MUTEX_(mutex) ::testing::internal::Mutex mutex
+
+// We cannot name this class MutexLock because the ctor declaration would
+// conflict with a macro named MutexLock, which is defined on some
+// platforms. That macro is used as a defensive measure to prevent against
+// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
+// "MutexLock l(&mu)".  Hence the typedef trick below.
+class GTestMutexLock {
+ public:
+  explicit GTestMutexLock(Mutex*) {}  // NOLINT
+};
+
+typedef GTestMutexLock MutexLock;
+
+template <typename T>
+class GTEST_API_ ThreadLocal {
+ public:
+  ThreadLocal() : value_() {}
+  explicit ThreadLocal(const T& value) : value_(value) {}
+  T* pointer() { return &value_; }
+  const T* pointer() const { return &value_; }
+  const T& get() const { return value_; }
+  void set(const T& value) { value_ = value; }
+ private:
+  T value_;
+};
+
+#endif  // GTEST_IS_THREADSAFE
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+GTEST_API_ size_t GetThreadCount();
+
+// Passing non-POD classes through ellipsis (...) crashes the ARM
+// compiler and generates a warning in Sun Studio before 12u4. The Nokia Symbian
+// and the IBM XL C/C++ compiler try to instantiate a copy constructor
+// for objects passed through ellipsis (...), failing for uncopyable
+// objects.  We define this to ensure that only POD is passed through
+// ellipsis on these systems.
+#if defined(__SYMBIAN32__) || defined(__IBMCPP__) || \
+     (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5130)
+// We lose support for NULL detection where the compiler doesn't like
+// passing non-POD classes through ellipsis (...).
+# define GTEST_ELLIPSIS_NEEDS_POD_ 1
+#else
+# define GTEST_CAN_COMPARE_NULL 1
+#endif
+
+// The Nokia Symbian and IBM XL C/C++ compilers cannot decide between
+// const T& and const T* in a function template.  These compilers
+// _can_ decide between class template specializations for T and T*,
+// so a tr1::type_traits-like is_pointer works.
+#if defined(__SYMBIAN32__) || defined(__IBMCPP__)
+# define GTEST_NEEDS_IS_POINTER_ 1
+#endif
+
+template <bool bool_value>
+struct bool_constant {
+  typedef bool_constant<bool_value> type;
+  static const bool value = bool_value;
+};
+template <bool bool_value> const bool bool_constant<bool_value>::value;
+
+typedef bool_constant<false> false_type;
+typedef bool_constant<true> true_type;
+
+template <typename T, typename U>
+struct is_same : public false_type {};
+
+template <typename T>
+struct is_same<T, T> : public true_type {};
+
+
+template <typename T>
+struct is_pointer : public false_type {};
+
+template <typename T>
+struct is_pointer<T*> : public true_type {};
+
+template <typename Iterator>
+struct IteratorTraits {
+  typedef typename Iterator::value_type value_type;
+};
+
+
+template <typename T>
+struct IteratorTraits<T*> {
+  typedef T value_type;
+};
+
+template <typename T>
+struct IteratorTraits<const T*> {
+  typedef T value_type;
+};
+
+#if GTEST_OS_WINDOWS
+# define GTEST_PATH_SEP_ "\\"
+# define GTEST_HAS_ALT_PATH_SEP_ 1
+// The biggest signed integer type the compiler supports.
+typedef __int64 BiggestInt;
+#else
+# define GTEST_PATH_SEP_ "/"
+# define GTEST_HAS_ALT_PATH_SEP_ 0
+typedef long long BiggestInt;  // NOLINT
+#endif  // GTEST_OS_WINDOWS
+
+// Utilities for char.
+
+// isspace(int ch) and friends accept an unsigned char or EOF.  char
+// may be signed, depending on the compiler (or compiler flags).
+// Therefore we need to cast a char to unsigned char before calling
+// isspace(), etc.
+
+inline bool IsAlpha(char ch) {
+  return isalpha(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsAlNum(char ch) {
+  return isalnum(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsDigit(char ch) {
+  return isdigit(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsLower(char ch) {
+  return islower(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsSpace(char ch) {
+  return isspace(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsUpper(char ch) {
+  return isupper(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsXDigit(char ch) {
+  return isxdigit(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsXDigit(wchar_t ch) {
+  const unsigned char low_byte = static_cast<unsigned char>(ch);
+  return ch == low_byte && isxdigit(low_byte) != 0;
+}
+
+inline char ToLower(char ch) {
+  return static_cast<char>(tolower(static_cast<unsigned char>(ch)));
+}
+inline char ToUpper(char ch) {
+  return static_cast<char>(toupper(static_cast<unsigned char>(ch)));
+}
+
+inline std::string StripTrailingSpaces(std::string str) {
+  std::string::iterator it = str.end();
+  while (it != str.begin() && IsSpace(*--it))
+    it = str.erase(it);
+  return str;
+}
+
+// The testing::internal::posix namespace holds wrappers for common
+// POSIX functions.  These wrappers hide the differences between
+// Windows/MSVC and POSIX systems.  Since some compilers define these
+// standard functions as macros, the wrapper cannot have the same name
+// as the wrapped function.
+
+namespace posix {
+
+// Functions with a different name on Windows.
+
+#if GTEST_OS_WINDOWS
+
+typedef struct _stat StatStruct;
+
+# ifdef __BORLANDC__
+inline int IsATTY(int fd) { return isatty(fd); }
+inline int StrCaseCmp(const char* s1, const char* s2) {
+  return stricmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return strdup(src); }
+# else  // !__BORLANDC__
+#  if GTEST_OS_WINDOWS_MOBILE
+inline int IsATTY(int /* fd */) { return 0; }
+#  else
+inline int IsATTY(int fd) { return _isatty(fd); }
+#  endif  // GTEST_OS_WINDOWS_MOBILE
+inline int StrCaseCmp(const char* s1, const char* s2) {
+  return _stricmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return _strdup(src); }
+# endif  // __BORLANDC__
+
+# if GTEST_OS_WINDOWS_MOBILE
+inline int FileNo(FILE* file) { return reinterpret_cast<int>(_fileno(file)); }
+// Stat(), RmDir(), and IsDir() are not needed on Windows CE at this
+// time and thus not defined there.
+# else
+inline int FileNo(FILE* file) { return _fileno(file); }
+inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); }
+inline int RmDir(const char* dir) { return _rmdir(dir); }
+inline bool IsDir(const StatStruct& st) {
+  return (_S_IFDIR & st.st_mode) != 0;
+}
+# endif  // GTEST_OS_WINDOWS_MOBILE
+
+#else
+
+typedef struct stat StatStruct;
+
+inline int FileNo(FILE* file) { return fileno(file); }
+inline int IsATTY(int fd) { return isatty(fd); }
+inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); }
+inline int StrCaseCmp(const char* s1, const char* s2) {
+  return strcasecmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return strdup(src); }
+inline int RmDir(const char* dir) { return rmdir(dir); }
+inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }
+
+#endif  // GTEST_OS_WINDOWS
+
+// Functions deprecated by MSVC 8.0.
+
+GTEST_DISABLE_MSC_DEPRECATED_PUSH_()
+
+inline const char* StrNCpy(char* dest, const char* src, size_t n) {
+  return strncpy(dest, src, n);
+}
+
+// ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and
+// StrError() aren't needed on Windows CE at this time and thus not
+// defined there.
+
+#if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
+inline int ChDir(const char* dir) { return chdir(dir); }
+#endif
+inline FILE* FOpen(const char* path, const char* mode) {
+  return fopen(path, mode);
+}
+#if !GTEST_OS_WINDOWS_MOBILE
+inline FILE *FReopen(const char* path, const char* mode, FILE* stream) {
+  return freopen(path, mode, stream);
+}
+inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); }
+#endif
+inline int FClose(FILE* fp) { return fclose(fp); }
+#if !GTEST_OS_WINDOWS_MOBILE
+inline int Read(int fd, void* buf, unsigned int count) {
+  return static_cast<int>(read(fd, buf, count));
+}
+inline int Write(int fd, const void* buf, unsigned int count) {
+  return static_cast<int>(write(fd, buf, count));
+}
+inline int Close(int fd) { return close(fd); }
+inline const char* StrError(int errnum) { return strerror(errnum); }
+#endif
+inline const char* GetEnv(const char* name) {
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT
+  // We are on Windows CE, which has no environment variables.
+  static_cast<void>(name);  // To prevent 'unused argument' warning.
+  return NULL;
+#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
+  // Environment variables which we programmatically clear will be set to the
+  // empty string rather than unset (NULL).  Handle that case.
+  const char* const env = getenv(name);
+  return (env != NULL && env[0] != '\0') ? env : NULL;
+#else
+  return getenv(name);
+#endif
+}
+
+GTEST_DISABLE_MSC_DEPRECATED_POP_()
+
+#if GTEST_OS_WINDOWS_MOBILE
+// Windows CE has no C library. The abort() function is used in
+// several places in Google Test. This implementation provides a reasonable
+// imitation of standard behaviour.
+void Abort();
+#else
+inline void Abort() { abort(); }
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+}  // namespace posix
+
+// MSVC "deprecates" snprintf and issues warnings wherever it is used.  In
+// order to avoid these warnings, we need to use _snprintf or _snprintf_s on
+// MSVC-based platforms.  We map the GTEST_SNPRINTF_ macro to the appropriate
+// function in order to achieve that.  We use macro definition here because
+// snprintf is a variadic function.
+#if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE
+// MSVC 2005 and above support variadic macros.
+# define GTEST_SNPRINTF_(buffer, size, format, ...) \
+     _snprintf_s(buffer, size, size, format, __VA_ARGS__)
+#elif defined(_MSC_VER)
+// Windows CE does not define _snprintf_s and MSVC prior to 2005 doesn't
+// complain about _snprintf.
+# define GTEST_SNPRINTF_ _snprintf
+#else
+# define GTEST_SNPRINTF_ snprintf
+#endif
+
+// The maximum number a BiggestInt can represent.  This definition
+// works no matter BiggestInt is represented in one's complement or
+// two's complement.
+//
+// We cannot rely on numeric_limits in STL, as __int64 and long long
+// are not part of standard C++ and numeric_limits doesn't need to be
+// defined for them.
+const BiggestInt kMaxBiggestInt =
+    ~(static_cast<BiggestInt>(1) << (8*sizeof(BiggestInt) - 1));
+
+// This template class serves as a compile-time function from size to
+// type.  It maps a size in bytes to a primitive type with that
+// size. e.g.
+//
+//   TypeWithSize<4>::UInt
+//
+// is typedef-ed to be unsigned int (unsigned integer made up of 4
+// bytes).
+//
+// Such functionality should belong to STL, but I cannot find it
+// there.
+//
+// Google Test uses this class in the implementation of floating-point
+// comparison.
+//
+// For now it only handles UInt (unsigned int) as that's all Google Test
+// needs.  Other types can be easily added in the future if need
+// arises.
+template <size_t size>
+class TypeWithSize {
+ public:
+  // This prevents the user from using TypeWithSize<N> with incorrect
+  // values of N.
+  typedef void UInt;
+};
+
+// The specialization for size 4.
+template <>
+class TypeWithSize<4> {
+ public:
+  // unsigned int has size 4 in both gcc and MSVC.
+  //
+  // As base/basictypes.h doesn't compile on Windows, we cannot use
+  // uint32, uint64, and etc here.
+  typedef int Int;
+  typedef unsigned int UInt;
+};
+
+// The specialization for size 8.
+template <>
+class TypeWithSize<8> {
+ public:
+#if GTEST_OS_WINDOWS
+  typedef __int64 Int;
+  typedef unsigned __int64 UInt;
+#else
+  typedef long long Int;  // NOLINT
+  typedef unsigned long long UInt;  // NOLINT
+#endif  // GTEST_OS_WINDOWS
+};
+
+// Integer types of known sizes.
+typedef TypeWithSize<4>::Int Int32;
+typedef TypeWithSize<4>::UInt UInt32;
+typedef TypeWithSize<8>::Int Int64;
+typedef TypeWithSize<8>::UInt UInt64;
+typedef TypeWithSize<8>::Int TimeInMillis;  // Represents time in milliseconds.
+
+// Utilities for command line flags and environment variables.
+
+// Macro for referencing flags.
+#if !defined(GTEST_FLAG)
+# define GTEST_FLAG(name) FLAGS_gtest_##name
+#endif  // !defined(GTEST_FLAG)
+
+#if !defined(GTEST_USE_OWN_FLAGFILE_FLAG_)
+# define GTEST_USE_OWN_FLAGFILE_FLAG_ 1
+#endif  // !defined(GTEST_USE_OWN_FLAGFILE_FLAG_)
+
+#if !defined(GTEST_DECLARE_bool_)
+# define GTEST_FLAG_SAVER_ ::testing::internal::GTestFlagSaver
+
+// Macros for declaring flags.
+# define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name)
+# define GTEST_DECLARE_int32_(name) \
+    GTEST_API_ extern ::testing::internal::Int32 GTEST_FLAG(name)
+# define GTEST_DECLARE_string_(name) \
+    GTEST_API_ extern ::std::string GTEST_FLAG(name)
+
+// Macros for defining flags.
+# define GTEST_DEFINE_bool_(name, default_val, doc) \
+    GTEST_API_ bool GTEST_FLAG(name) = (default_val)
+# define GTEST_DEFINE_int32_(name, default_val, doc) \
+    GTEST_API_ ::testing::internal::Int32 GTEST_FLAG(name) = (default_val)
+# define GTEST_DEFINE_string_(name, default_val, doc) \
+    GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val)
+
+#endif  // !defined(GTEST_DECLARE_bool_)
+
+// Thread annotations
+#if !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_)
+# define GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)
+# define GTEST_LOCK_EXCLUDED_(locks)
+#endif  // !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_)
+
+// Parses 'str' for a 32-bit signed integer.  If successful, writes the result
+// to *value and returns true; otherwise leaves *value unchanged and returns
+// false.
+// FIXME: Find a better way to refactor flag and environment parsing
+// out of both gtest-port.cc and gtest.cc to avoid exporting this utility
+// function.
+bool ParseInt32(const Message& src_text, const char* str, Int32* value);
+
+// Parses a bool/Int32/string from the environment variable
+// corresponding to the given Google Test flag.
+bool BoolFromGTestEnv(const char* flag, bool default_val);
+GTEST_API_ Int32 Int32FromGTestEnv(const char* flag, Int32 default_val);
+std::string OutputFlagAlsoCheckEnvVar();
+const char* StringFromGTestEnv(const char* flag, const char* default_val);
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+
+#if GTEST_OS_LINUX
+# include <stdlib.h>
+# include <sys/types.h>
+# include <sys/wait.h>
+# include <unistd.h>
+#endif  // GTEST_OS_LINUX
+
+#if GTEST_HAS_EXCEPTIONS
+# include <stdexcept>
+#endif
+
+#include <ctype.h>
+#include <float.h>
+#include <string.h>
+#include <iomanip>
+#include <limits>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines the Message class.
+//
+// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
+// leave some internal implementation details in this header file.
+// They are clearly marked by comments like this:
+//
+//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+//
+// Such code is NOT meant to be used by a user directly, and is subject
+// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
+// program!
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+
+#include <limits>
+
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+// Ensures that there is at least one operator<< in the global namespace.
+// See Message& operator<<(...) below for why.
+void operator<<(const testing::internal::Secret&, int);
+
+namespace testing {
+
+// The Message class works like an ostream repeater.
+//
+// Typical usage:
+//
+//   1. You stream a bunch of values to a Message object.
+//      It will remember the text in a stringstream.
+//   2. Then you stream the Message object to an ostream.
+//      This causes the text in the Message to be streamed
+//      to the ostream.
+//
+// For example;
+//
+//   testing::Message foo;
+//   foo << 1 << " != " << 2;
+//   std::cout << foo;
+//
+// will print "1 != 2".
+//
+// Message is not intended to be inherited from.  In particular, its
+// destructor is not virtual.
+//
+// Note that stringstream behaves differently in gcc and in MSVC.  You
+// can stream a NULL char pointer to it in the former, but not in the
+// latter (it causes an access violation if you do).  The Message
+// class hides this difference by treating a NULL char pointer as
+// "(null)".
+class GTEST_API_ Message {
+ private:
+  // The type of basic IO manipulators (endl, ends, and flush) for
+  // narrow streams.
+  typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);
+
+ public:
+  // Constructs an empty Message.
+  Message();
+
+  // Copy constructor.
+  Message(const Message& msg) : ss_(new ::std::stringstream) {  // NOLINT
+    *ss_ << msg.GetString();
+  }
+
+  // Constructs a Message from a C-string.
+  explicit Message(const char* str) : ss_(new ::std::stringstream) {
+    *ss_ << str;
+  }
+
+#if GTEST_OS_SYMBIAN
+  // Streams a value (either a pointer or not) to this object.
+  template <typename T>
+  inline Message& operator <<(const T& value) {
+    StreamHelper(typename internal::is_pointer<T>::type(), value);
+    return *this;
+  }
+#else
+  // Streams a non-pointer value to this object.
+  template <typename T>
+  inline Message& operator <<(const T& val) {
+    // Some libraries overload << for STL containers.  These
+    // overloads are defined in the global namespace instead of ::std.
+    //
+    // C++'s symbol lookup rule (i.e. Koenig lookup) says that these
+    // overloads are visible in either the std namespace or the global
+    // namespace, but not other namespaces, including the testing
+    // namespace which Google Test's Message class is in.
+    //
+    // To allow STL containers (and other types that has a << operator
+    // defined in the global namespace) to be used in Google Test
+    // assertions, testing::Message must access the custom << operator
+    // from the global namespace.  With this using declaration,
+    // overloads of << defined in the global namespace and those
+    // visible via Koenig lookup are both exposed in this function.
+    using ::operator <<;
+    *ss_ << val;
+    return *this;
+  }
+
+  // Streams a pointer value to this object.
+  //
+  // This function is an overload of the previous one.  When you
+  // stream a pointer to a Message, this definition will be used as it
+  // is more specialized.  (The C++ Standard, section
+  // [temp.func.order].)  If you stream a non-pointer, then the
+  // previous definition will be used.
+  //
+  // The reason for this overload is that streaming a NULL pointer to
+  // ostream is undefined behavior.  Depending on the compiler, you
+  // may get "0", "(nil)", "(null)", or an access violation.  To
+  // ensure consistent result across compilers, we always treat NULL
+  // as "(null)".
+  template <typename T>
+  inline Message& operator <<(T* const& pointer) {  // NOLINT
+    if (pointer == NULL) {
+      *ss_ << "(null)";
+    } else {
+      *ss_ << pointer;
+    }
+    return *this;
+  }
+#endif  // GTEST_OS_SYMBIAN
+
+  // Since the basic IO manipulators are overloaded for both narrow
+  // and wide streams, we have to provide this specialized definition
+  // of operator <<, even though its body is the same as the
+  // templatized version above.  Without this definition, streaming
+  // endl or other basic IO manipulators to Message will confuse the
+  // compiler.
+  Message& operator <<(BasicNarrowIoManip val) {
+    *ss_ << val;
+    return *this;
+  }
+
+  // Instead of 1/0, we want to see true/false for bool values.
+  Message& operator <<(bool b) {
+    return *this << (b ? "true" : "false");
+  }
+
+  // These two overloads allow streaming a wide C string to a Message
+  // using the UTF-8 encoding.
+  Message& operator <<(const wchar_t* wide_c_str);
+  Message& operator <<(wchar_t* wide_c_str);
+
+#if GTEST_HAS_STD_WSTRING
+  // Converts the given wide string to a narrow string using the UTF-8
+  // encoding, and streams the result to this Message object.
+  Message& operator <<(const ::std::wstring& wstr);
+#endif  // GTEST_HAS_STD_WSTRING
+
+#if GTEST_HAS_GLOBAL_WSTRING
+  // Converts the given wide string to a narrow string using the UTF-8
+  // encoding, and streams the result to this Message object.
+  Message& operator <<(const ::wstring& wstr);
+#endif  // GTEST_HAS_GLOBAL_WSTRING
+
+  // Gets the text streamed to this object so far as an std::string.
+  // Each '\0' character in the buffer is replaced with "\\0".
+  //
+  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+  std::string GetString() const;
+
+ private:
+#if GTEST_OS_SYMBIAN
+  // These are needed as the Nokia Symbian Compiler cannot decide between
+  // const T& and const T* in a function template. The Nokia compiler _can_
+  // decide between class template specializations for T and T*, so a
+  // tr1::type_traits-like is_pointer works, and we can overload on that.
+  template <typename T>
+  inline void StreamHelper(internal::true_type /*is_pointer*/, T* pointer) {
+    if (pointer == NULL) {
+      *ss_ << "(null)";
+    } else {
+      *ss_ << pointer;
+    }
+  }
+  template <typename T>
+  inline void StreamHelper(internal::false_type /*is_pointer*/,
+                           const T& value) {
+    // See the comments in Message& operator <<(const T&) above for why
+    // we need this using statement.
+    using ::operator <<;
+    *ss_ << value;
+  }
+#endif  // GTEST_OS_SYMBIAN
+
+  // We'll hold the text streamed to this object here.
+  const internal::scoped_ptr< ::std::stringstream> ss_;
+
+  // We declare (but don't implement) this to prevent the compiler
+  // from implementing the assignment operator.
+  void operator=(const Message&);
+};
+
+// Streams a Message to an ostream.
+inline std::ostream& operator <<(std::ostream& os, const Message& sb) {
+  return os << sb.GetString();
+}
+
+namespace internal {
+
+// Converts a streamable value to an std::string.  A NULL pointer is
+// converted to "(null)".  When the input value is a ::string,
+// ::std::string, ::wstring, or ::std::wstring object, each NUL
+// character in it is replaced with "\\0".
+template <typename T>
+std::string StreamableToString(const T& streamable) {
+  return (Message() << streamable).GetString();
+}
+
+}  // namespace internal
+}  // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Google Test filepath utilities
+//
+// This header file declares classes and functions used internally by
+// Google Test.  They are subject to change without notice.
+//
+// This file is #included in gtest/internal/gtest-internal.h.
+// Do not include this header file separately!
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file declares the String class and functions used internally by
+// Google Test.  They are subject to change without notice. They should not used
+// by code external to Google Test.
+//
+// This header file is #included by gtest-internal.h.
+// It should not be #included by other files.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+
+#ifdef __BORLANDC__
+// string.h is not guaranteed to provide strcpy on C++ Builder.
+# include <mem.h>
+#endif
+
+#include <string.h>
+#include <string>
+
+
+namespace testing {
+namespace internal {
+
+// String - an abstract class holding static string utilities.
+class GTEST_API_ String {
+ public:
+  // Static utility methods
+
+  // Clones a 0-terminated C string, allocating memory using new.  The
+  // caller is responsible for deleting the return value using
+  // delete[].  Returns the cloned string, or NULL if the input is
+  // NULL.
+  //
+  // This is different from strdup() in string.h, which allocates
+  // memory using malloc().
+  static const char* CloneCString(const char* c_str);
+
+#if GTEST_OS_WINDOWS_MOBILE
+  // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
+  // able to pass strings to Win32 APIs on CE we need to convert them
+  // to 'Unicode', UTF-16.
+
+  // Creates a UTF-16 wide string from the given ANSI string, allocating
+  // memory using new. The caller is responsible for deleting the return
+  // value using delete[]. Returns the wide string, or NULL if the
+  // input is NULL.
+  //
+  // The wide string is created using the ANSI codepage (CP_ACP) to
+  // match the behaviour of the ANSI versions of Win32 calls and the
+  // C runtime.
+  static LPCWSTR AnsiToUtf16(const char* c_str);
+
+  // Creates an ANSI string from the given wide string, allocating
+  // memory using new. The caller is responsible for deleting the return
+  // value using delete[]. Returns the ANSI string, or NULL if the
+  // input is NULL.
+  //
+  // The returned string is created using the ANSI codepage (CP_ACP) to
+  // match the behaviour of the ANSI versions of Win32 calls and the
+  // C runtime.
+  static const char* Utf16ToAnsi(LPCWSTR utf16_str);
+#endif
+
+  // Compares two C strings.  Returns true iff they have the same content.
+  //
+  // Unlike strcmp(), this function can handle NULL argument(s).  A
+  // NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool CStringEquals(const char* lhs, const char* rhs);
+
+  // Converts a wide C string to a String using the UTF-8 encoding.
+  // NULL will be converted to "(null)".  If an error occurred during
+  // the conversion, "(failed to convert from wide string)" is
+  // returned.
+  static std::string ShowWideCString(const wchar_t* wide_c_str);
+
+  // Compares two wide C strings.  Returns true iff they have the same
+  // content.
+  //
+  // Unlike wcscmp(), this function can handle NULL argument(s).  A
+  // NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);
+
+  // Compares two C strings, ignoring case.  Returns true iff they
+  // have the same content.
+  //
+  // Unlike strcasecmp(), this function can handle NULL argument(s).
+  // A NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool CaseInsensitiveCStringEquals(const char* lhs,
+                                           const char* rhs);
+
+  // Compares two wide C strings, ignoring case.  Returns true iff they
+  // have the same content.
+  //
+  // Unlike wcscasecmp(), this function can handle NULL argument(s).
+  // A NULL C string is considered different to any non-NULL wide C string,
+  // including the empty string.
+  // NB: The implementations on different platforms slightly differ.
+  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
+  // environment variable. On GNU platform this method uses wcscasecmp
+  // which compares according to LC_CTYPE category of the current locale.
+  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
+  // current locale.
+  static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
+                                               const wchar_t* rhs);
+
+  // Returns true iff the given string ends with the given suffix, ignoring
+  // case. Any string is considered to end with an empty suffix.
+  static bool EndsWithCaseInsensitive(
+      const std::string& str, const std::string& suffix);
+
+  // Formats an int value as "%02d".
+  static std::string FormatIntWidth2(int value);  // "%02d" for width == 2
+
+  // Formats an int value as "%X".
+  static std::string FormatHexInt(int value);
+
+  // Formats a byte as "%02X".
+  static std::string FormatByte(unsigned char value);
+
+ private:
+  String();  // Not meant to be instantiated.
+};  // class String
+
+// Gets the content of the stringstream's buffer as an std::string.  Each '\0'
+// character in the buffer is replaced with "\\0".
+GTEST_API_ std::string StringStreamToString(::std::stringstream* stream);
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+namespace testing {
+namespace internal {
+
+// FilePath - a class for file and directory pathname manipulation which
+// handles platform-specific conventions (like the pathname separator).
+// Used for helper functions for naming files in a directory for xml output.
+// Except for Set methods, all methods are const or static, which provides an
+// "immutable value object" -- useful for peace of mind.
+// A FilePath with a value ending in a path separator ("like/this/") represents
+// a directory, otherwise it is assumed to represent a file. In either case,
+// it may or may not represent an actual file or directory in the file system.
+// Names are NOT checked for syntax correctness -- no checking for illegal
+// characters, malformed paths, etc.
+
+class GTEST_API_ FilePath {
+ public:
+  FilePath() : pathname_("") { }
+  FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { }
+
+  explicit FilePath(const std::string& pathname) : pathname_(pathname) {
+    Normalize();
+  }
+
+  FilePath& operator=(const FilePath& rhs) {
+    Set(rhs);
+    return *this;
+  }
+
+  void Set(const FilePath& rhs) {
+    pathname_ = rhs.pathname_;
+  }
+
+  const std::string& string() const { return pathname_; }
+  const char* c_str() const { return pathname_.c_str(); }
+
+  // Returns the current working directory, or "" if unsuccessful.
+  static FilePath GetCurrentDir();
+
+  // Given directory = "dir", base_name = "test", number = 0,
+  // extension = "xml", returns "dir/test.xml". If number is greater
+  // than zero (e.g., 12), returns "dir/test_12.xml".
+  // On Windows platform, uses \ as the separator rather than /.
+  static FilePath MakeFileName(const FilePath& directory,
+                               const FilePath& base_name,
+                               int number,
+                               const char* extension);
+
+  // Given directory = "dir", relative_path = "test.xml",
+  // returns "dir/test.xml".
+  // On Windows, uses \ as the separator rather than /.
+  static FilePath ConcatPaths(const FilePath& directory,
+                              const FilePath& relative_path);
+
+  // Returns a pathname for a file that does not currently exist. The pathname
+  // will be directory/base_name.extension or
+  // directory/base_name_<number>.extension if directory/base_name.extension
+  // already exists. The number will be incremented until a pathname is found
+  // that does not already exist.
+  // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
+  // There could be a race condition if two or more processes are calling this
+  // function at the same time -- they could both pick the same filename.
+  static FilePath GenerateUniqueFileName(const FilePath& directory,
+                                         const FilePath& base_name,
+                                         const char* extension);
+
+  // Returns true iff the path is "".
+  bool IsEmpty() const { return pathname_.empty(); }
+
+  // If input name has a trailing separator character, removes it and returns
+  // the name, otherwise return the name string unmodified.
+  // On Windows platform, uses \ as the separator, other platforms use /.
+  FilePath RemoveTrailingPathSeparator() const;
+
+  // Returns a copy of the FilePath with the directory part removed.
+  // Example: FilePath("path/to/file").RemoveDirectoryName() returns
+  // FilePath("file"). If there is no directory part ("just_a_file"), it returns
+  // the FilePath unmodified. If there is no file part ("just_a_dir/") it
+  // returns an empty FilePath ("").
+  // On Windows platform, '\' is the path separator, otherwise it is '/'.
+  FilePath RemoveDirectoryName() const;
+
+  // RemoveFileName returns the directory path with the filename removed.
+  // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
+  // If the FilePath is "a_file" or "/a_file", RemoveFileName returns
+  // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
+  // not have a file, like "just/a/dir/", it returns the FilePath unmodified.
+  // On Windows platform, '\' is the path separator, otherwise it is '/'.
+  FilePath RemoveFileName() const;
+
+  // Returns a copy of the FilePath with the case-insensitive extension removed.
+  // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
+  // FilePath("dir/file"). If a case-insensitive extension is not
+  // found, returns a copy of the original FilePath.
+  FilePath RemoveExtension(const char* extension) const;
+
+  // Creates directories so that path exists. Returns true if successful or if
+  // the directories already exist; returns false if unable to create
+  // directories for any reason. Will also return false if the FilePath does
+  // not represent a directory (that is, it doesn't end with a path separator).
+  bool CreateDirectoriesRecursively() const;
+
+  // Create the directory so that path exists. Returns true if successful or
+  // if the directory already exists; returns false if unable to create the
+  // directory for any reason, including if the parent directory does not
+  // exist. Not named "CreateDirectory" because that's a macro on Windows.
+  bool CreateFolder() const;
+
+  // Returns true if FilePath describes something in the file-system,
+  // either a file, directory, or whatever, and that something exists.
+  bool FileOrDirectoryExists() const;
+
+  // Returns true if pathname describes a directory in the file-system
+  // that exists.
+  bool DirectoryExists() const;
+
+  // Returns true if FilePath ends with a path separator, which indicates that
+  // it is intended to represent a directory. Returns false otherwise.
+  // This does NOT check that a directory (or file) actually exists.
+  bool IsDirectory() const;
+
+  // Returns true if pathname describes a root directory. (Windows has one
+  // root directory per disk drive.)
+  bool IsRootDirectory() const;
+
+  // Returns true if pathname describes an absolute path.
+  bool IsAbsolutePath() const;
+
+ private:
+  // Replaces multiple consecutive separators with a single separator.
+  // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
+  // redundancies that might be in a pathname involving "." or "..".
+  //
+  // A pathname with multiple consecutive separators may occur either through
+  // user error or as a result of some scripts or APIs that generate a pathname
+  // with a trailing separator. On other platforms the same API or script
+  // may NOT generate a pathname with a trailing "/". Then elsewhere that
+  // pathname may have another "/" and pathname components added to it,
+  // without checking for the separator already being there.
+  // The script language and operating system may allow paths like "foo//bar"
+  // but some of the functions in FilePath will not handle that correctly. In
+  // particular, RemoveTrailingPathSeparator() only removes one separator, and
+  // it is called in CreateDirectoriesRecursively() assuming that it will change
+  // a pathname from directory syntax (trailing separator) to filename syntax.
+  //
+  // On Windows this method also replaces the alternate path separator '/' with
+  // the primary path separator '\\', so that for example "bar\\/\\foo" becomes
+  // "bar\\foo".
+
+  void Normalize();
+
+  // Returns a pointer to the last occurence of a valid path separator in
+  // the FilePath. On Windows, for example, both '/' and '\' are valid path
+  // separators. Returns NULL if no path separator was found.
+  const char* FindLastPathSeparator() const;
+
+  std::string pathname_;
+};  // class FilePath
+
+}  // namespace internal
+}  // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+// This file was GENERATED by command:
+//     pump.py gtest-type-util.h.pump
+// DO NOT EDIT BY HAND!!!
+
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+// Type utilities needed for implementing typed and type-parameterized
+// tests.  This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
+//
+// Currently we support at most 50 types in a list, and at most 50
+// type-parameterized tests in one type-parameterized test case.
+// Please contact googletestframework@googlegroups.com if you need
+// more.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+
+
+// #ifdef __GNUC__ is too general here.  It is possible to use gcc without using
+// libstdc++ (which is where cxxabi.h comes from).
+# if GTEST_HAS_CXXABI_H_
+#  include <cxxabi.h>
+# elif defined(__HP_aCC)
+#  include <acxx_demangle.h>
+# endif  // GTEST_HASH_CXXABI_H_
+
+namespace testing {
+namespace internal {
+
+// Canonicalizes a given name with respect to the Standard C++ Library.
+// This handles removing the inline namespace within `std` that is
+// used by various standard libraries (e.g., `std::__1`).  Names outside
+// of namespace std are returned unmodified.
+inline std::string CanonicalizeForStdLibVersioning(std::string s) {
+  static const char prefix[] = "std::__";
+  if (s.compare(0, strlen(prefix), prefix) == 0) {
+    std::string::size_type end = s.find("::", strlen(prefix));
+    if (end != s.npos) {
+      // Erase everything between the initial `std` and the second `::`.
+      s.erase(strlen("std"), end - strlen("std"));
+    }
+  }
+  return s;
+}
+
+// GetTypeName<T>() returns a human-readable name of type T.
+// NB: This function is also used in Google Mock, so don't move it inside of
+// the typed-test-only section below.
+template <typename T>
+std::string GetTypeName() {
+# if GTEST_HAS_RTTI
+
+  const char* const name = typeid(T).name();
+#  if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
+  int status = 0;
+  // gcc's implementation of typeid(T).name() mangles the type name,
+  // so we have to demangle it.
+#   if GTEST_HAS_CXXABI_H_
+  using abi::__cxa_demangle;
+#   endif  // GTEST_HAS_CXXABI_H_
+  char* const readable_name = __cxa_demangle(name, 0, 0, &status);
+  const std::string name_str(status == 0 ? readable_name : name);
+  free(readable_name);
+  return CanonicalizeForStdLibVersioning(name_str);
+#  else
+  return name;
+#  endif  // GTEST_HAS_CXXABI_H_ || __HP_aCC
+
+# else
+
+  return "<type>";
+
+# endif  // GTEST_HAS_RTTI
+}
+
+#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+
+// AssertyTypeEq<T1, T2>::type is defined iff T1 and T2 are the same
+// type.  This can be used as a compile-time assertion to ensure that
+// two types are equal.
+
+template <typename T1, typename T2>
+struct AssertTypeEq;
+
+template <typename T>
+struct AssertTypeEq<T, T> {
+  typedef bool type;
+};
+
+// A unique type used as the default value for the arguments of class
+// template Types.  This allows us to simulate variadic templates
+// (e.g. Types<int>, Type<int, double>, and etc), which C++ doesn't
+// support directly.
+struct None {};
+
+// The following family of struct and struct templates are used to
+// represent type lists.  In particular, TypesN<T1, T2, ..., TN>
+// represents a type list with N types (T1, T2, ..., and TN) in it.
+// Except for Types0, every struct in the family has two member types:
+// Head for the first type in the list, and Tail for the rest of the
+// list.
+
+// The empty type list.
+struct Types0 {};
+
+// Type lists of length 1, 2, 3, and so on.
+
+template <typename T1>
+struct Types1 {
+  typedef T1 Head;
+  typedef Types0 Tail;
+};
+template <typename T1, typename T2>
+struct Types2 {
+  typedef T1 Head;
+  typedef Types1<T2> Tail;
+};
+
+template <typename T1, typename T2, typename T3>
+struct Types3 {
+  typedef T1 Head;
+  typedef Types2<T2, T3> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4>
+struct Types4 {
+  typedef T1 Head;
+  typedef Types3<T2, T3, T4> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+struct Types5 {
+  typedef T1 Head;
+  typedef Types4<T2, T3, T4, T5> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6>
+struct Types6 {
+  typedef T1 Head;
+  typedef Types5<T2, T3, T4, T5, T6> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7>
+struct Types7 {
+  typedef T1 Head;
+  typedef Types6<T2, T3, T4, T5, T6, T7> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8>
+struct Types8 {
+  typedef T1 Head;
+  typedef Types7<T2, T3, T4, T5, T6, T7, T8> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9>
+struct Types9 {
+  typedef T1 Head;
+  typedef Types8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10>
+struct Types10 {
+  typedef T1 Head;
+  typedef Types9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11>
+struct Types11 {
+  typedef T1 Head;
+  typedef Types10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12>
+struct Types12 {
+  typedef T1 Head;
+  typedef Types11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13>
+struct Types13 {
+  typedef T1 Head;
+  typedef Types12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14>
+struct Types14 {
+  typedef T1 Head;
+  typedef Types13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15>
+struct Types15 {
+  typedef T1 Head;
+  typedef Types14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16>
+struct Types16 {
+  typedef T1 Head;
+  typedef Types15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17>
+struct Types17 {
+  typedef T1 Head;
+  typedef Types16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18>
+struct Types18 {
+  typedef T1 Head;
+  typedef Types17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19>
+struct Types19 {
+  typedef T1 Head;
+  typedef Types18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20>
+struct Types20 {
+  typedef T1 Head;
+  typedef Types19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21>
+struct Types21 {
+  typedef T1 Head;
+  typedef Types20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22>
+struct Types22 {
+  typedef T1 Head;
+  typedef Types21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23>
+struct Types23 {
+  typedef T1 Head;
+  typedef Types22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24>
+struct Types24 {
+  typedef T1 Head;
+  typedef Types23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25>
+struct Types25 {
+  typedef T1 Head;
+  typedef Types24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26>
+struct Types26 {
+  typedef T1 Head;
+  typedef Types25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27>
+struct Types27 {
+  typedef T1 Head;
+  typedef Types26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28>
+struct Types28 {
+  typedef T1 Head;
+  typedef Types27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29>
+struct Types29 {
+  typedef T1 Head;
+  typedef Types28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30>
+struct Types30 {
+  typedef T1 Head;
+  typedef Types29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31>
+struct Types31 {
+  typedef T1 Head;
+  typedef Types30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32>
+struct Types32 {
+  typedef T1 Head;
+  typedef Types31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33>
+struct Types33 {
+  typedef T1 Head;
+  typedef Types32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34>
+struct Types34 {
+  typedef T1 Head;
+  typedef Types33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35>
+struct Types35 {
+  typedef T1 Head;
+  typedef Types34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36>
+struct Types36 {
+  typedef T1 Head;
+  typedef Types35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37>
+struct Types37 {
+  typedef T1 Head;
+  typedef Types36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38>
+struct Types38 {
+  typedef T1 Head;
+  typedef Types37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39>
+struct Types39 {
+  typedef T1 Head;
+  typedef Types38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40>
+struct Types40 {
+  typedef T1 Head;
+  typedef Types39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41>
+struct Types41 {
+  typedef T1 Head;
+  typedef Types40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42>
+struct Types42 {
+  typedef T1 Head;
+  typedef Types41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43>
+struct Types43 {
+  typedef T1 Head;
+  typedef Types42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44>
+struct Types44 {
+  typedef T1 Head;
+  typedef Types43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+      T44> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45>
+struct Types45 {
+  typedef T1 Head;
+  typedef Types44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+      T44, T45> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46>
+struct Types46 {
+  typedef T1 Head;
+  typedef Types45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+      T44, T45, T46> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47>
+struct Types47 {
+  typedef T1 Head;
+  typedef Types46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+      T44, T45, T46, T47> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48>
+struct Types48 {
+  typedef T1 Head;
+  typedef Types47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+      T44, T45, T46, T47, T48> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49>
+struct Types49 {
+  typedef T1 Head;
+  typedef Types48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+      T44, T45, T46, T47, T48, T49> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49, typename T50>
+struct Types50 {
+  typedef T1 Head;
+  typedef Types49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+      T44, T45, T46, T47, T48, T49, T50> Tail;
+};
+
+
+}  // namespace internal
+
+// We don't want to require the users to write TypesN<...> directly,
+// as that would require them to count the length.  Types<...> is much
+// easier to write, but generates horrible messages when there is a
+// compiler error, as gcc insists on printing out each template
+// argument, even if it has the default value (this means Types<int>
+// will appear as Types<int, None, None, ..., None> in the compiler
+// errors).
+//
+// Our solution is to combine the best part of the two approaches: a
+// user would write Types<T1, ..., TN>, and Google Test will translate
+// that to TypesN<T1, ..., TN> internally to make error messages
+// readable.  The translation is done by the 'type' member of the
+// Types template.
+template <typename T1 = internal::None, typename T2 = internal::None,
+    typename T3 = internal::None, typename T4 = internal::None,
+    typename T5 = internal::None, typename T6 = internal::None,
+    typename T7 = internal::None, typename T8 = internal::None,
+    typename T9 = internal::None, typename T10 = internal::None,
+    typename T11 = internal::None, typename T12 = internal::None,
+    typename T13 = internal::None, typename T14 = internal::None,
+    typename T15 = internal::None, typename T16 = internal::None,
+    typename T17 = internal::None, typename T18 = internal::None,
+    typename T19 = internal::None, typename T20 = internal::None,
+    typename T21 = internal::None, typename T22 = internal::None,
+    typename T23 = internal::None, typename T24 = internal::None,
+    typename T25 = internal::None, typename T26 = internal::None,
+    typename T27 = internal::None, typename T28 = internal::None,
+    typename T29 = internal::None, typename T30 = internal::None,
+    typename T31 = internal::None, typename T32 = internal::None,
+    typename T33 = internal::None, typename T34 = internal::None,
+    typename T35 = internal::None, typename T36 = internal::None,
+    typename T37 = internal::None, typename T38 = internal::None,
+    typename T39 = internal::None, typename T40 = internal::None,
+    typename T41 = internal::None, typename T42 = internal::None,
+    typename T43 = internal::None, typename T44 = internal::None,
+    typename T45 = internal::None, typename T46 = internal::None,
+    typename T47 = internal::None, typename T48 = internal::None,
+    typename T49 = internal::None, typename T50 = internal::None>
+struct Types {
+  typedef internal::Types50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
+};
+
+template <>
+struct Types<internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types0 type;
+};
+template <typename T1>
+struct Types<T1, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types1<T1> type;
+};
+template <typename T1, typename T2>
+struct Types<T1, T2, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types2<T1, T2> type;
+};
+template <typename T1, typename T2, typename T3>
+struct Types<T1, T2, T3, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types3<T1, T2, T3> type;
+};
+template <typename T1, typename T2, typename T3, typename T4>
+struct Types<T1, T2, T3, T4, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types4<T1, T2, T3, T4> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+struct Types<T1, T2, T3, T4, T5, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types5<T1, T2, T3, T4, T5> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6>
+struct Types<T1, T2, T3, T4, T5, T6, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types6<T1, T2, T3, T4, T5, T6> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7>
+struct Types<T1, T2, T3, T4, T5, T6, T7, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types7<T1, T2, T3, T4, T5, T6, T7> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types8<T1, T2, T3, T4, T5, T6, T7, T8> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44, T45> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+    T46, internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44, T45, T46> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+    T46, T47, internal::None, internal::None, internal::None> {
+  typedef internal::Types47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44, T45, T46, T47> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+    T46, T47, T48, internal::None, internal::None> {
+  typedef internal::Types48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44, T45, T46, T47, T48> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+    T46, T47, T48, T49, internal::None> {
+  typedef internal::Types49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44, T45, T46, T47, T48, T49> type;
+};
+
+namespace internal {
+
+# define GTEST_TEMPLATE_ template <typename T> class
+
+// The template "selector" struct TemplateSel<Tmpl> is used to
+// represent Tmpl, which must be a class template with one type
+// parameter, as a type.  TemplateSel<Tmpl>::Bind<T>::type is defined
+// as the type Tmpl<T>.  This allows us to actually instantiate the
+// template "selected" by TemplateSel<Tmpl>.
+//
+// This trick is necessary for simulating typedef for class templates,
+// which C++ doesn't support directly.
+template <GTEST_TEMPLATE_ Tmpl>
+struct TemplateSel {
+  template <typename T>
+  struct Bind {
+    typedef Tmpl<T> type;
+  };
+};
+
+# define GTEST_BIND_(TmplSel, T) \
+  TmplSel::template Bind<T>::type
+
+// A unique struct template used as the default value for the
+// arguments of class template Templates.  This allows us to simulate
+// variadic templates (e.g. Templates<int>, Templates<int, double>,
+// and etc), which C++ doesn't support directly.
+template <typename T>
+struct NoneT {};
+
+// The following family of struct and struct templates are used to
+// represent template lists.  In particular, TemplatesN<T1, T2, ...,
+// TN> represents a list of N templates (T1, T2, ..., and TN).  Except
+// for Templates0, every struct in the family has two member types:
+// Head for the selector of the first template in the list, and Tail
+// for the rest of the list.
+
+// The empty template list.
+struct Templates0 {};
+
+// Template lists of length 1, 2, 3, and so on.
+
+template <GTEST_TEMPLATE_ T1>
+struct Templates1 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates0 Tail;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
+struct Templates2 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates1<T2> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
+struct Templates3 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates2<T2, T3> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4>
+struct Templates4 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates3<T2, T3, T4> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
+struct Templates5 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates4<T2, T3, T4, T5> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
+struct Templates6 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates5<T2, T3, T4, T5, T6> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7>
+struct Templates7 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates6<T2, T3, T4, T5, T6, T7> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
+struct Templates8 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates7<T2, T3, T4, T5, T6, T7, T8> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
+struct Templates9 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10>
+struct Templates10 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
+struct Templates11 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
+struct Templates12 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13>
+struct Templates13 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
+struct Templates14 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
+struct Templates15 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16>
+struct Templates16 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
+struct Templates17 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
+struct Templates18 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19>
+struct Templates19 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
+struct Templates20 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
+struct Templates21 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22>
+struct Templates22 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
+struct Templates23 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
+struct Templates24 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25>
+struct Templates25 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
+struct Templates26 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
+struct Templates27 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28>
+struct Templates28 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
+struct Templates29 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
+struct Templates30 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31>
+struct Templates31 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
+struct Templates32 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
+struct Templates33 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34>
+struct Templates34 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
+struct Templates35 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
+struct Templates36 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37>
+struct Templates37 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
+struct Templates38 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
+struct Templates39 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40>
+struct Templates40 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
+struct Templates41 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
+struct Templates42 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43>
+struct Templates43 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
+struct Templates44 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43, T44> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
+struct Templates45 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43, T44, T45> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46>
+struct Templates46 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43, T44, T45, T46> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
+struct Templates47 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43, T44, T45, T46, T47> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
+struct Templates48 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43, T44, T45, T46, T47, T48> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
+    GTEST_TEMPLATE_ T49>
+struct Templates49 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43, T44, T45, T46, T47, T48, T49> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
+    GTEST_TEMPLATE_ T49, GTEST_TEMPLATE_ T50>
+struct Templates50 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43, T44, T45, T46, T47, T48, T49, T50> Tail;
+};
+
+
+// We don't want to require the users to write TemplatesN<...> directly,
+// as that would require them to count the length.  Templates<...> is much
+// easier to write, but generates horrible messages when there is a
+// compiler error, as gcc insists on printing out each template
+// argument, even if it has the default value (this means Templates<list>
+// will appear as Templates<list, NoneT, NoneT, ..., NoneT> in the compiler
+// errors).
+//
+// Our solution is to combine the best part of the two approaches: a
+// user would write Templates<T1, ..., TN>, and Google Test will translate
+// that to TemplatesN<T1, ..., TN> internally to make error messages
+// readable.  The translation is done by the 'type' member of the
+// Templates template.
+template <GTEST_TEMPLATE_ T1 = NoneT, GTEST_TEMPLATE_ T2 = NoneT,
+    GTEST_TEMPLATE_ T3 = NoneT, GTEST_TEMPLATE_ T4 = NoneT,
+    GTEST_TEMPLATE_ T5 = NoneT, GTEST_TEMPLATE_ T6 = NoneT,
+    GTEST_TEMPLATE_ T7 = NoneT, GTEST_TEMPLATE_ T8 = NoneT,
+    GTEST_TEMPLATE_ T9 = NoneT, GTEST_TEMPLATE_ T10 = NoneT,
+    GTEST_TEMPLATE_ T11 = NoneT, GTEST_TEMPLATE_ T12 = NoneT,
+    GTEST_TEMPLATE_ T13 = NoneT, GTEST_TEMPLATE_ T14 = NoneT,
+    GTEST_TEMPLATE_ T15 = NoneT, GTEST_TEMPLATE_ T16 = NoneT,
+    GTEST_TEMPLATE_ T17 = NoneT, GTEST_TEMPLATE_ T18 = NoneT,
+    GTEST_TEMPLATE_ T19 = NoneT, GTEST_TEMPLATE_ T20 = NoneT,
+    GTEST_TEMPLATE_ T21 = NoneT, GTEST_TEMPLATE_ T22 = NoneT,
+    GTEST_TEMPLATE_ T23 = NoneT, GTEST_TEMPLATE_ T24 = NoneT,
+    GTEST_TEMPLATE_ T25 = NoneT, GTEST_TEMPLATE_ T26 = NoneT,
+    GTEST_TEMPLATE_ T27 = NoneT, GTEST_TEMPLATE_ T28 = NoneT,
+    GTEST_TEMPLATE_ T29 = NoneT, GTEST_TEMPLATE_ T30 = NoneT,
+    GTEST_TEMPLATE_ T31 = NoneT, GTEST_TEMPLATE_ T32 = NoneT,
+    GTEST_TEMPLATE_ T33 = NoneT, GTEST_TEMPLATE_ T34 = NoneT,
+    GTEST_TEMPLATE_ T35 = NoneT, GTEST_TEMPLATE_ T36 = NoneT,
+    GTEST_TEMPLATE_ T37 = NoneT, GTEST_TEMPLATE_ T38 = NoneT,
+    GTEST_TEMPLATE_ T39 = NoneT, GTEST_TEMPLATE_ T40 = NoneT,
+    GTEST_TEMPLATE_ T41 = NoneT, GTEST_TEMPLATE_ T42 = NoneT,
+    GTEST_TEMPLATE_ T43 = NoneT, GTEST_TEMPLATE_ T44 = NoneT,
+    GTEST_TEMPLATE_ T45 = NoneT, GTEST_TEMPLATE_ T46 = NoneT,
+    GTEST_TEMPLATE_ T47 = NoneT, GTEST_TEMPLATE_ T48 = NoneT,
+    GTEST_TEMPLATE_ T49 = NoneT, GTEST_TEMPLATE_ T50 = NoneT>
+struct Templates {
+  typedef Templates50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
+};
+
+template <>
+struct Templates<NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT> {
+  typedef Templates0 type;
+};
+template <GTEST_TEMPLATE_ T1>
+struct Templates<T1, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT> {
+  typedef Templates1<T1> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
+struct Templates<T1, T2, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT> {
+  typedef Templates2<T1, T2> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
+struct Templates<T1, T2, T3, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates3<T1, T2, T3> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4>
+struct Templates<T1, T2, T3, T4, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates4<T1, T2, T3, T4> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
+struct Templates<T1, T2, T3, T4, T5, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates5<T1, T2, T3, T4, T5> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
+struct Templates<T1, T2, T3, T4, T5, T6, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates6<T1, T2, T3, T4, T5, T6> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates7<T1, T2, T3, T4, T5, T6, T7> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates8<T1, T2, T3, T4, T5, T6, T7, T8> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT> {
+  typedef Templates22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT> {
+  typedef Templates23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT> {
+  typedef Templates24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT> {
+  typedef Templates25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT> {
+  typedef Templates26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT> {
+  typedef Templates27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT> {
+  typedef Templates28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT> {
+  typedef Templates29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43, T44> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+    T45, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43, T44, T45> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+    T45, T46, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43, T44, T45, T46> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+    T45, T46, T47, NoneT, NoneT, NoneT> {
+  typedef Templates47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43, T44, T45, T46, T47> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+    T45, T46, T47, T48, NoneT, NoneT> {
+  typedef Templates48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43, T44, T45, T46, T47, T48> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
+    GTEST_TEMPLATE_ T49>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+    T45, T46, T47, T48, T49, NoneT> {
+  typedef Templates49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43, T44, T45, T46, T47, T48, T49> type;
+};
+
+// The TypeList template makes it possible to use either a single type
+// or a Types<...> list in TYPED_TEST_CASE() and
+// INSTANTIATE_TYPED_TEST_CASE_P().
+
+template <typename T>
+struct TypeList {
+  typedef Types1<T> type;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49, typename T50>
+struct TypeList<Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44, T45, T46, T47, T48, T49, T50> > {
+  typedef typename Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>::type type;
+};
+
+#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+
+// Due to C++ preprocessor weirdness, we need double indirection to
+// concatenate two tokens when one of them is __LINE__.  Writing
+//
+//   foo ## __LINE__
+//
+// will result in the token foo__LINE__, instead of foo followed by
+// the current line number.  For more details, see
+// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6
+#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar)
+#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar
+
+// Stringifies its argument.
+#define GTEST_STRINGIFY_(name) #name
+
+class ProtocolMessage;
+namespace proto2 { class Message; }
+
+namespace testing {
+
+// Forward declarations.
+
+class AssertionResult;                 // Result of an assertion.
+class Message;                         // Represents a failure message.
+class Test;                            // Represents a test.
+class TestInfo;                        // Information about a test.
+class TestPartResult;                  // Result of a test part.
+class UnitTest;                        // A collection of test cases.
+
+template <typename T>
+::std::string PrintToString(const T& value);
+
+namespace internal {
+
+struct TraceInfo;                      // Information about a trace point.
+class TestInfoImpl;                    // Opaque implementation of TestInfo
+class UnitTestImpl;                    // Opaque implementation of UnitTest
+
+// The text used in failure messages to indicate the start of the
+// stack trace.
+GTEST_API_ extern const char kStackTraceMarker[];
+
+// Two overloaded helpers for checking at compile time whether an
+// expression is a null pointer literal (i.e. NULL or any 0-valued
+// compile-time integral constant).  Their return values have
+// different sizes, so we can use sizeof() to test which version is
+// picked by the compiler.  These helpers have no implementations, as
+// we only need their signatures.
+//
+// Given IsNullLiteralHelper(x), the compiler will pick the first
+// version if x can be implicitly converted to Secret*, and pick the
+// second version otherwise.  Since Secret is a secret and incomplete
+// type, the only expression a user can write that has type Secret* is
+// a null pointer literal.  Therefore, we know that x is a null
+// pointer literal if and only if the first version is picked by the
+// compiler.
+char IsNullLiteralHelper(Secret* p);
+char (&IsNullLiteralHelper(...))[2];  // NOLINT
+
+// A compile-time bool constant that is true if and only if x is a
+// null pointer literal (i.e. NULL or any 0-valued compile-time
+// integral constant).
+#ifdef GTEST_ELLIPSIS_NEEDS_POD_
+// We lose support for NULL detection where the compiler doesn't like
+// passing non-POD classes through ellipsis (...).
+# define GTEST_IS_NULL_LITERAL_(x) false
+#else
+# define GTEST_IS_NULL_LITERAL_(x) \
+    (sizeof(::testing::internal::IsNullLiteralHelper(x)) == 1)
+#endif  // GTEST_ELLIPSIS_NEEDS_POD_
+
+// Appends the user-supplied message to the Google-Test-generated message.
+GTEST_API_ std::string AppendUserMessage(
+    const std::string& gtest_msg, const Message& user_msg);
+
+#if GTEST_HAS_EXCEPTIONS
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4275 \
+/* an exported class was derived from a class that was not exported */)
+
+// This exception is thrown by (and only by) a failed Google Test
+// assertion when GTEST_FLAG(throw_on_failure) is true (if exceptions
+// are enabled).  We derive it from std::runtime_error, which is for
+// errors presumably detectable only at run time.  Since
+// std::runtime_error inherits from std::exception, many testing
+// frameworks know how to extract and print the message inside it.
+class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error {
+ public:
+  explicit GoogleTestFailureException(const TestPartResult& failure);
+};
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4275
+
+#endif  // GTEST_HAS_EXCEPTIONS
+
+namespace edit_distance {
+// Returns the optimal edits to go from 'left' to 'right'.
+// All edits cost the same, with replace having lower priority than
+// add/remove.
+// Simple implementation of the Wagner-Fischer algorithm.
+// See http://en.wikipedia.org/wiki/Wagner-Fischer_algorithm
+enum EditType { kMatch, kAdd, kRemove, kReplace };
+GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
+    const std::vector<size_t>& left, const std::vector<size_t>& right);
+
+// Same as above, but the input is represented as strings.
+GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
+    const std::vector<std::string>& left,
+    const std::vector<std::string>& right);
+
+// Create a diff of the input strings in Unified diff format.
+GTEST_API_ std::string CreateUnifiedDiff(const std::vector<std::string>& left,
+                                         const std::vector<std::string>& right,
+                                         size_t context = 2);
+
+}  // namespace edit_distance
+
+// Calculate the diff between 'left' and 'right' and return it in unified diff
+// format.
+// If not null, stores in 'total_line_count' the total number of lines found
+// in left + right.
+GTEST_API_ std::string DiffStrings(const std::string& left,
+                                   const std::string& right,
+                                   size_t* total_line_count);
+
+// Constructs and returns the message for an equality assertion
+// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
+//
+// The first four parameters are the expressions used in the assertion
+// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
+// where foo is 5 and bar is 6, we have:
+//
+//   expected_expression: "foo"
+//   actual_expression:   "bar"
+//   expected_value:      "5"
+//   actual_value:        "6"
+//
+// The ignoring_case parameter is true iff the assertion is a
+// *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
+// be inserted into the message.
+GTEST_API_ AssertionResult EqFailure(const char* expected_expression,
+                                     const char* actual_expression,
+                                     const std::string& expected_value,
+                                     const std::string& actual_value,
+                                     bool ignoring_case);
+
+// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
+GTEST_API_ std::string GetBoolAssertionFailureMessage(
+    const AssertionResult& assertion_result,
+    const char* expression_text,
+    const char* actual_predicate_value,
+    const char* expected_predicate_value);
+
+// This template class represents an IEEE floating-point number
+// (either single-precision or double-precision, depending on the
+// template parameters).
+//
+// The purpose of this class is to do more sophisticated number
+// comparison.  (Due to round-off error, etc, it's very unlikely that
+// two floating-points will be equal exactly.  Hence a naive
+// comparison by the == operation often doesn't work.)
+//
+// Format of IEEE floating-point:
+//
+//   The most-significant bit being the leftmost, an IEEE
+//   floating-point looks like
+//
+//     sign_bit exponent_bits fraction_bits
+//
+//   Here, sign_bit is a single bit that designates the sign of the
+//   number.
+//
+//   For float, there are 8 exponent bits and 23 fraction bits.
+//
+//   For double, there are 11 exponent bits and 52 fraction bits.
+//
+//   More details can be found at
+//   http://en.wikipedia.org/wiki/IEEE_floating-point_standard.
+//
+// Template parameter:
+//
+//   RawType: the raw floating-point type (either float or double)
+template <typename RawType>
+class FloatingPoint {
+ public:
+  // Defines the unsigned integer type that has the same size as the
+  // floating point number.
+  typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits;
+
+  // Constants.
+
+  // # of bits in a number.
+  static const size_t kBitCount = 8*sizeof(RawType);
+
+  // # of fraction bits in a number.
+  static const size_t kFractionBitCount =
+    std::numeric_limits<RawType>::digits - 1;
+
+  // # of exponent bits in a number.
+  static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount;
+
+  // The mask for the sign bit.
+  static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1);
+
+  // The mask for the fraction bits.
+  static const Bits kFractionBitMask =
+    ~static_cast<Bits>(0) >> (kExponentBitCount + 1);
+
+  // The mask for the exponent bits.
+  static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask);
+
+  // How many ULP's (Units in the Last Place) we want to tolerate when
+  // comparing two numbers.  The larger the value, the more error we
+  // allow.  A 0 value means that two numbers must be exactly the same
+  // to be considered equal.
+  //
+  // The maximum error of a single floating-point operation is 0.5
+  // units in the last place.  On Intel CPU's, all floating-point
+  // calculations are done with 80-bit precision, while double has 64
+  // bits.  Therefore, 4 should be enough for ordinary use.
+  //
+  // See the following article for more details on ULP:
+  // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
+  static const size_t kMaxUlps = 4;
+
+  // Constructs a FloatingPoint from a raw floating-point number.
+  //
+  // On an Intel CPU, passing a non-normalized NAN (Not a Number)
+  // around may change its bits, although the new value is guaranteed
+  // to be also a NAN.  Therefore, don't expect this constructor to
+  // preserve the bits in x when x is a NAN.
+  explicit FloatingPoint(const RawType& x) { u_.value_ = x; }
+
+  // Static methods
+
+  // Reinterprets a bit pattern as a floating-point number.
+  //
+  // This function is needed to test the AlmostEquals() method.
+  static RawType ReinterpretBits(const Bits bits) {
+    FloatingPoint fp(0);
+    fp.u_.bits_ = bits;
+    return fp.u_.value_;
+  }
+
+  // Returns the floating-point number that represent positive infinity.
+  static RawType Infinity() {
+    return ReinterpretBits(kExponentBitMask);
+  }
+
+  // Returns the maximum representable finite floating-point number.
+  static RawType Max();
+
+  // Non-static methods
+
+  // Returns the bits that represents this number.
+  const Bits &bits() const { return u_.bits_; }
+
+  // Returns the exponent bits of this number.
+  Bits exponent_bits() const { return kExponentBitMask & u_.bits_; }
+
+  // Returns the fraction bits of this number.
+  Bits fraction_bits() const { return kFractionBitMask & u_.bits_; }
+
+  // Returns the sign bit of this number.
+  Bits sign_bit() const { return kSignBitMask & u_.bits_; }
+
+  // Returns true iff this is NAN (not a number).
+  bool is_nan() const {
+    // It's a NAN if the exponent bits are all ones and the fraction
+    // bits are not entirely zeros.
+    return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0);
+  }
+
+  // Returns true iff this number is at most kMaxUlps ULP's away from
+  // rhs.  In particular, this function:
+  //
+  //   - returns false if either number is (or both are) NAN.
+  //   - treats really large numbers as almost equal to infinity.
+  //   - thinks +0.0 and -0.0 are 0 DLP's apart.
+  bool AlmostEquals(const FloatingPoint& rhs) const {
+    // The IEEE standard says that any comparison operation involving
+    // a NAN must return false.
+    if (is_nan() || rhs.is_nan()) return false;
+
+    return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_)
+        <= kMaxUlps;
+  }
+
+ private:
+  // The data type used to store the actual floating-point number.
+  union FloatingPointUnion {
+    RawType value_;  // The raw floating-point number.
+    Bits bits_;      // The bits that represent the number.
+  };
+
+  // Converts an integer from the sign-and-magnitude representation to
+  // the biased representation.  More precisely, let N be 2 to the
+  // power of (kBitCount - 1), an integer x is represented by the
+  // unsigned number x + N.
+  //
+  // For instance,
+  //
+  //   -N + 1 (the most negative number representable using
+  //          sign-and-magnitude) is represented by 1;
+  //   0      is represented by N; and
+  //   N - 1  (the biggest number representable using
+  //          sign-and-magnitude) is represented by 2N - 1.
+  //
+  // Read http://en.wikipedia.org/wiki/Signed_number_representations
+  // for more details on signed number representations.
+  static Bits SignAndMagnitudeToBiased(const Bits &sam) {
+    if (kSignBitMask & sam) {
+      // sam represents a negative number.
+      return ~sam + 1;
+    } else {
+      // sam represents a positive number.
+      return kSignBitMask | sam;
+    }
+  }
+
+  // Given two numbers in the sign-and-magnitude representation,
+  // returns the distance between them as an unsigned number.
+  static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1,
+                                                     const Bits &sam2) {
+    const Bits biased1 = SignAndMagnitudeToBiased(sam1);
+    const Bits biased2 = SignAndMagnitudeToBiased(sam2);
+    return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1);
+  }
+
+  FloatingPointUnion u_;
+};
+
+// We cannot use std::numeric_limits<T>::max() as it clashes with the max()
+// macro defined by <windows.h>.
+template <>
+inline float FloatingPoint<float>::Max() { return FLT_MAX; }
+template <>
+inline double FloatingPoint<double>::Max() { return DBL_MAX; }
+
+// Typedefs the instances of the FloatingPoint template class that we
+// care to use.
+typedef FloatingPoint<float> Float;
+typedef FloatingPoint<double> Double;
+
+// In order to catch the mistake of putting tests that use different
+// test fixture classes in the same test case, we need to assign
+// unique IDs to fixture classes and compare them.  The TypeId type is
+// used to hold such IDs.  The user should treat TypeId as an opaque
+// type: the only operation allowed on TypeId values is to compare
+// them for equality using the == operator.
+typedef const void* TypeId;
+
+template <typename T>
+class TypeIdHelper {
+ public:
+  // dummy_ must not have a const type.  Otherwise an overly eager
+  // compiler (e.g. MSVC 7.1 & 8.0) may try to merge
+  // TypeIdHelper<T>::dummy_ for different Ts as an "optimization".
+  static bool dummy_;
+};
+
+template <typename T>
+bool TypeIdHelper<T>::dummy_ = false;
+
+// GetTypeId<T>() returns the ID of type T.  Different values will be
+// returned for different types.  Calling the function twice with the
+// same type argument is guaranteed to return the same ID.
+template <typename T>
+TypeId GetTypeId() {
+  // The compiler is required to allocate a different
+  // TypeIdHelper<T>::dummy_ variable for each T used to instantiate
+  // the template.  Therefore, the address of dummy_ is guaranteed to
+  // be unique.
+  return &(TypeIdHelper<T>::dummy_);
+}
+
+// Returns the type ID of ::testing::Test.  Always call this instead
+// of GetTypeId< ::testing::Test>() to get the type ID of
+// ::testing::Test, as the latter may give the wrong result due to a
+// suspected linker bug when compiling Google Test as a Mac OS X
+// framework.
+GTEST_API_ TypeId GetTestTypeId();
+
+// Defines the abstract factory interface that creates instances
+// of a Test object.
+class TestFactoryBase {
+ public:
+  virtual ~TestFactoryBase() {}
+
+  // Creates a test instance to run. The instance is both created and destroyed
+  // within TestInfoImpl::Run()
+  virtual Test* CreateTest() = 0;
+
+ protected:
+  TestFactoryBase() {}
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestFactoryBase);
+};
+
+// This class provides implementation of TeastFactoryBase interface.
+// It is used in TEST and TEST_F macros.
+template <class TestClass>
+class TestFactoryImpl : public TestFactoryBase {
+ public:
+  virtual Test* CreateTest() { return new TestClass; }
+};
+
+#if GTEST_OS_WINDOWS
+
+// Predicate-formatters for implementing the HRESULT checking macros
+// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}
+// We pass a long instead of HRESULT to avoid causing an
+// include dependency for the HRESULT type.
+GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr,
+                                            long hr);  // NOLINT
+GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr,
+                                            long hr);  // NOLINT
+
+#endif  // GTEST_OS_WINDOWS
+
+// Types of SetUpTestCase() and TearDownTestCase() functions.
+typedef void (*SetUpTestCaseFunc)();
+typedef void (*TearDownTestCaseFunc)();
+
+struct CodeLocation {
+  CodeLocation(const std::string& a_file, int a_line)
+      : file(a_file), line(a_line) {}
+
+  std::string file;
+  int line;
+};
+
+// Creates a new TestInfo object and registers it with Google Test;
+// returns the created object.
+//
+// Arguments:
+//
+//   test_case_name:   name of the test case
+//   name:             name of the test
+//   type_param        the name of the test's type parameter, or NULL if
+//                     this is not a typed or a type-parameterized test.
+//   value_param       text representation of the test's value parameter,
+//                     or NULL if this is not a type-parameterized test.
+//   code_location:    code location where the test is defined
+//   fixture_class_id: ID of the test fixture class
+//   set_up_tc:        pointer to the function that sets up the test case
+//   tear_down_tc:     pointer to the function that tears down the test case
+//   factory:          pointer to the factory that creates a test object.
+//                     The newly created TestInfo instance will assume
+//                     ownership of the factory object.
+GTEST_API_ TestInfo* MakeAndRegisterTestInfo(
+    const char* test_case_name,
+    const char* name,
+    const char* type_param,
+    const char* value_param,
+    CodeLocation code_location,
+    TypeId fixture_class_id,
+    SetUpTestCaseFunc set_up_tc,
+    TearDownTestCaseFunc tear_down_tc,
+    TestFactoryBase* factory);
+
+// If *pstr starts with the given prefix, modifies *pstr to be right
+// past the prefix and returns true; otherwise leaves *pstr unchanged
+// and returns false.  None of pstr, *pstr, and prefix can be NULL.
+GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr);
+
+#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+// State of the definition of a type-parameterized test case.
+class GTEST_API_ TypedTestCasePState {
+ public:
+  TypedTestCasePState() : registered_(false) {}
+
+  // Adds the given test name to defined_test_names_ and return true
+  // if the test case hasn't been registered; otherwise aborts the
+  // program.
+  bool AddTestName(const char* file, int line, const char* case_name,
+                   const char* test_name) {
+    if (registered_) {
+      fprintf(stderr, "%s Test %s must be defined before "
+              "REGISTER_TYPED_TEST_CASE_P(%s, ...).\n",
+              FormatFileLocation(file, line).c_str(), test_name, case_name);
+      fflush(stderr);
+      posix::Abort();
+    }
+    registered_tests_.insert(
+        ::std::make_pair(test_name, CodeLocation(file, line)));
+    return true;
+  }
+
+  bool TestExists(const std::string& test_name) const {
+    return registered_tests_.count(test_name) > 0;
+  }
+
+  const CodeLocation& GetCodeLocation(const std::string& test_name) const {
+    RegisteredTestsMap::const_iterator it = registered_tests_.find(test_name);
+    GTEST_CHECK_(it != registered_tests_.end());
+    return it->second;
+  }
+
+  // Verifies that registered_tests match the test names in
+  // defined_test_names_; returns registered_tests if successful, or
+  // aborts the program otherwise.
+  const char* VerifyRegisteredTestNames(
+      const char* file, int line, const char* registered_tests);
+
+ private:
+  typedef ::std::map<std::string, CodeLocation> RegisteredTestsMap;
+
+  bool registered_;
+  RegisteredTestsMap registered_tests_;
+};
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+// Skips to the first non-space char after the first comma in 'str';
+// returns NULL if no comma is found in 'str'.
+inline const char* SkipComma(const char* str) {
+  const char* comma = strchr(str, ',');
+  if (comma == NULL) {
+    return NULL;
+  }
+  while (IsSpace(*(++comma))) {}
+  return comma;
+}
+
+// Returns the prefix of 'str' before the first comma in it; returns
+// the entire string if it contains no comma.
+inline std::string GetPrefixUntilComma(const char* str) {
+  const char* comma = strchr(str, ',');
+  return comma == NULL ? str : std::string(str, comma);
+}
+
+// Splits a given string on a given delimiter, populating a given
+// vector with the fields.
+void SplitString(const ::std::string& str, char delimiter,
+                 ::std::vector< ::std::string>* dest);
+
+// The default argument to the template below for the case when the user does
+// not provide a name generator.
+struct DefaultNameGenerator {
+  template <typename T>
+  static std::string GetName(int i) {
+    return StreamableToString(i);
+  }
+};
+
+template <typename Provided = DefaultNameGenerator>
+struct NameGeneratorSelector {
+  typedef Provided type;
+};
+
+template <typename NameGenerator>
+void GenerateNamesRecursively(Types0, std::vector<std::string>*, int) {}
+
+template <typename NameGenerator, typename Types>
+void GenerateNamesRecursively(Types, std::vector<std::string>* result, int i) {
+  result->push_back(NameGenerator::template GetName<typename Types::Head>(i));
+  GenerateNamesRecursively<NameGenerator>(typename Types::Tail(), result,
+                                          i + 1);
+}
+
+template <typename NameGenerator, typename Types>
+std::vector<std::string> GenerateNames() {
+  std::vector<std::string> result;
+  GenerateNamesRecursively<NameGenerator>(Types(), &result, 0);
+  return result;
+}
+
+// TypeParameterizedTest<Fixture, TestSel, Types>::Register()
+// registers a list of type-parameterized tests with Google Test.  The
+// return value is insignificant - we just need to return something
+// such that we can call this function in a namespace scope.
+//
+// Implementation note: The GTEST_TEMPLATE_ macro declares a template
+// template parameter.  It's defined in gtest-type-util.h.
+template <GTEST_TEMPLATE_ Fixture, class TestSel, typename Types>
+class TypeParameterizedTest {
+ public:
+  // 'index' is the index of the test in the type list 'Types'
+  // specified in INSTANTIATE_TYPED_TEST_CASE_P(Prefix, TestCase,
+  // Types).  Valid values for 'index' are [0, N - 1] where N is the
+  // length of Types.
+  static bool Register(const char* prefix, const CodeLocation& code_location,
+                       const char* case_name, const char* test_names, int index,
+                       const std::vector<std::string>& type_names =
+                           GenerateNames<DefaultNameGenerator, Types>()) {
+    typedef typename Types::Head Type;
+    typedef Fixture<Type> FixtureClass;
+    typedef typename GTEST_BIND_(TestSel, Type) TestClass;
+
+    // First, registers the first type-parameterized test in the type
+    // list.
+    MakeAndRegisterTestInfo(
+        (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name +
+         "/" + type_names[index])
+            .c_str(),
+        StripTrailingSpaces(GetPrefixUntilComma(test_names)).c_str(),
+        GetTypeName<Type>().c_str(),
+        NULL,  // No value parameter.
+        code_location, GetTypeId<FixtureClass>(), TestClass::SetUpTestCase,
+        TestClass::TearDownTestCase, new TestFactoryImpl<TestClass>);
+
+    // Next, recurses (at compile time) with the tail of the type list.
+    return TypeParameterizedTest<Fixture, TestSel,
+                                 typename Types::Tail>::Register(prefix,
+                                                                 code_location,
+                                                                 case_name,
+                                                                 test_names,
+                                                                 index + 1,
+                                                                 type_names);
+  }
+};
+
+// The base case for the compile time recursion.
+template <GTEST_TEMPLATE_ Fixture, class TestSel>
+class TypeParameterizedTest<Fixture, TestSel, Types0> {
+ public:
+  static bool Register(const char* /*prefix*/, const CodeLocation&,
+                       const char* /*case_name*/, const char* /*test_names*/,
+                       int /*index*/,
+                       const std::vector<std::string>& =
+                           std::vector<std::string>() /*type_names*/) {
+    return true;
+  }
+};
+
+// TypeParameterizedTestCase<Fixture, Tests, Types>::Register()
+// registers *all combinations* of 'Tests' and 'Types' with Google
+// Test.  The return value is insignificant - we just need to return
+// something such that we can call this function in a namespace scope.
+template <GTEST_TEMPLATE_ Fixture, typename Tests, typename Types>
+class TypeParameterizedTestCase {
+ public:
+  static bool Register(const char* prefix, CodeLocation code_location,
+                       const TypedTestCasePState* state, const char* case_name,
+                       const char* test_names,
+                       const std::vector<std::string>& type_names =
+                           GenerateNames<DefaultNameGenerator, Types>()) {
+    std::string test_name = StripTrailingSpaces(
+        GetPrefixUntilComma(test_names));
+    if (!state->TestExists(test_name)) {
+      fprintf(stderr, "Failed to get code location for test %s.%s at %s.",
+              case_name, test_name.c_str(),
+              FormatFileLocation(code_location.file.c_str(),
+                                 code_location.line).c_str());
+      fflush(stderr);
+      posix::Abort();
+    }
+    const CodeLocation& test_location = state->GetCodeLocation(test_name);
+
+    typedef typename Tests::Head Head;
+
+    // First, register the first test in 'Test' for each type in 'Types'.
+    TypeParameterizedTest<Fixture, Head, Types>::Register(
+        prefix, test_location, case_name, test_names, 0, type_names);
+
+    // Next, recurses (at compile time) with the tail of the test list.
+    return TypeParameterizedTestCase<Fixture, typename Tests::Tail,
+                                     Types>::Register(prefix, code_location,
+                                                      state, case_name,
+                                                      SkipComma(test_names),
+                                                      type_names);
+  }
+};
+
+// The base case for the compile time recursion.
+template <GTEST_TEMPLATE_ Fixture, typename Types>
+class TypeParameterizedTestCase<Fixture, Templates0, Types> {
+ public:
+  static bool Register(const char* /*prefix*/, const CodeLocation&,
+                       const TypedTestCasePState* /*state*/,
+                       const char* /*case_name*/, const char* /*test_names*/,
+                       const std::vector<std::string>& =
+                           std::vector<std::string>() /*type_names*/) {
+    return true;
+  }
+};
+
+#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+
+// Returns the current OS stack trace as an std::string.
+//
+// The maximum number of stack frames to be included is specified by
+// the gtest_stack_trace_depth flag.  The skip_count parameter
+// specifies the number of top frames to be skipped, which doesn't
+// count against the number of frames to be included.
+//
+// For example, if Foo() calls Bar(), which in turn calls
+// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
+// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
+GTEST_API_ std::string GetCurrentOsStackTraceExceptTop(
+    UnitTest* unit_test, int skip_count);
+
+// Helpers for suppressing warnings on unreachable code or constant
+// condition.
+
+// Always returns true.
+GTEST_API_ bool AlwaysTrue();
+
+// Always returns false.
+inline bool AlwaysFalse() { return !AlwaysTrue(); }
+
+// Helper for suppressing false warning from Clang on a const char*
+// variable declared in a conditional expression always being NULL in
+// the else branch.
+struct GTEST_API_ ConstCharPtr {
+  ConstCharPtr(const char* str) : value(str) {}
+  operator bool() const { return true; }
+  const char* value;
+};
+
+// A simple Linear Congruential Generator for generating random
+// numbers with a uniform distribution.  Unlike rand() and srand(), it
+// doesn't use global state (and therefore can't interfere with user
+// code).  Unlike rand_r(), it's portable.  An LCG isn't very random,
+// but it's good enough for our purposes.
+class GTEST_API_ Random {
+ public:
+  static const UInt32 kMaxRange = 1u << 31;
+
+  explicit Random(UInt32 seed) : state_(seed) {}
+
+  void Reseed(UInt32 seed) { state_ = seed; }
+
+  // Generates a random number from [0, range).  Crashes if 'range' is
+  // 0 or greater than kMaxRange.
+  UInt32 Generate(UInt32 range);
+
+ private:
+  UInt32 state_;
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Random);
+};
+
+// Defining a variable of type CompileAssertTypesEqual<T1, T2> will cause a
+// compiler error iff T1 and T2 are different types.
+template <typename T1, typename T2>
+struct CompileAssertTypesEqual;
+
+template <typename T>
+struct CompileAssertTypesEqual<T, T> {
+};
+
+// Removes the reference from a type if it is a reference type,
+// otherwise leaves it unchanged.  This is the same as
+// tr1::remove_reference, which is not widely available yet.
+template <typename T>
+struct RemoveReference { typedef T type; };  // NOLINT
+template <typename T>
+struct RemoveReference<T&> { typedef T type; };  // NOLINT
+
+// A handy wrapper around RemoveReference that works when the argument
+// T depends on template parameters.
+#define GTEST_REMOVE_REFERENCE_(T) \
+    typename ::testing::internal::RemoveReference<T>::type
+
+// Removes const from a type if it is a const type, otherwise leaves
+// it unchanged.  This is the same as tr1::remove_const, which is not
+// widely available yet.
+template <typename T>
+struct RemoveConst { typedef T type; };  // NOLINT
+template <typename T>
+struct RemoveConst<const T> { typedef T type; };  // NOLINT
+
+// MSVC 8.0, Sun C++, and IBM XL C++ have a bug which causes the above
+// definition to fail to remove the const in 'const int[3]' and 'const
+// char[3][4]'.  The following specialization works around the bug.
+template <typename T, size_t N>
+struct RemoveConst<const T[N]> {
+  typedef typename RemoveConst<T>::type type[N];
+};
+
+#if defined(_MSC_VER) && _MSC_VER < 1400
+// This is the only specialization that allows VC++ 7.1 to remove const in
+// 'const int[3] and 'const int[3][4]'.  However, it causes trouble with GCC
+// and thus needs to be conditionally compiled.
+template <typename T, size_t N>
+struct RemoveConst<T[N]> {
+  typedef typename RemoveConst<T>::type type[N];
+};
+#endif
+
+// A handy wrapper around RemoveConst that works when the argument
+// T depends on template parameters.
+#define GTEST_REMOVE_CONST_(T) \
+    typename ::testing::internal::RemoveConst<T>::type
+
+// Turns const U&, U&, const U, and U all into U.
+#define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \
+    GTEST_REMOVE_CONST_(GTEST_REMOVE_REFERENCE_(T))
+
+// ImplicitlyConvertible<From, To>::value is a compile-time bool
+// constant that's true iff type From can be implicitly converted to
+// type To.
+template <typename From, typename To>
+class ImplicitlyConvertible {
+ private:
+  // We need the following helper functions only for their types.
+  // They have no implementations.
+
+  // MakeFrom() is an expression whose type is From.  We cannot simply
+  // use From(), as the type From may not have a public default
+  // constructor.
+  static typename AddReference<From>::type MakeFrom();
+
+  // These two functions are overloaded.  Given an expression
+  // Helper(x), the compiler will pick the first version if x can be
+  // implicitly converted to type To; otherwise it will pick the
+  // second version.
+  //
+  // The first version returns a value of size 1, and the second
+  // version returns a value of size 2.  Therefore, by checking the
+  // size of Helper(x), which can be done at compile time, we can tell
+  // which version of Helper() is used, and hence whether x can be
+  // implicitly converted to type To.
+  static char Helper(To);
+  static char (&Helper(...))[2];  // NOLINT
+
+  // We have to put the 'public' section after the 'private' section,
+  // or MSVC refuses to compile the code.
+ public:
+#if defined(__BORLANDC__)
+  // C++Builder cannot use member overload resolution during template
+  // instantiation.  The simplest workaround is to use its C++0x type traits
+  // functions (C++Builder 2009 and above only).
+  static const bool value = __is_convertible(From, To);
+#else
+  // MSVC warns about implicitly converting from double to int for
+  // possible loss of data, so we need to temporarily disable the
+  // warning.
+  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4244)
+  static const bool value =
+      sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1;
+  GTEST_DISABLE_MSC_WARNINGS_POP_()
+#endif  // __BORLANDC__
+};
+template <typename From, typename To>
+const bool ImplicitlyConvertible<From, To>::value;
+
+// IsAProtocolMessage<T>::value is a compile-time bool constant that's
+// true iff T is type ProtocolMessage, proto2::Message, or a subclass
+// of those.
+template <typename T>
+struct IsAProtocolMessage
+    : public bool_constant<
+  ImplicitlyConvertible<const T*, const ::ProtocolMessage*>::value ||
+  ImplicitlyConvertible<const T*, const ::proto2::Message*>::value> {
+};
+
+// When the compiler sees expression IsContainerTest<C>(0), if C is an
+// STL-style container class, the first overload of IsContainerTest
+// will be viable (since both C::iterator* and C::const_iterator* are
+// valid types and NULL can be implicitly converted to them).  It will
+// be picked over the second overload as 'int' is a perfect match for
+// the type of argument 0.  If C::iterator or C::const_iterator is not
+// a valid type, the first overload is not viable, and the second
+// overload will be picked.  Therefore, we can determine whether C is
+// a container class by checking the type of IsContainerTest<C>(0).
+// The value of the expression is insignificant.
+//
+// In C++11 mode we check the existence of a const_iterator and that an
+// iterator is properly implemented for the container.
+//
+// For pre-C++11 that we look for both C::iterator and C::const_iterator.
+// The reason is that C++ injects the name of a class as a member of the
+// class itself (e.g. you can refer to class iterator as either
+// 'iterator' or 'iterator::iterator').  If we look for C::iterator
+// only, for example, we would mistakenly think that a class named
+// iterator is an STL container.
+//
+// Also note that the simpler approach of overloading
+// IsContainerTest(typename C::const_iterator*) and
+// IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++.
+typedef int IsContainer;
+#if GTEST_LANG_CXX11
+template <class C,
+          class Iterator = decltype(::std::declval<const C&>().begin()),
+          class = decltype(::std::declval<const C&>().end()),
+          class = decltype(++::std::declval<Iterator&>()),
+          class = decltype(*::std::declval<Iterator>()),
+          class = typename C::const_iterator>
+IsContainer IsContainerTest(int /* dummy */) {
+  return 0;
+}
+#else
+template <class C>
+IsContainer IsContainerTest(int /* dummy */,
+                            typename C::iterator* /* it */ = NULL,
+                            typename C::const_iterator* /* const_it */ = NULL) {
+  return 0;
+}
+#endif  // GTEST_LANG_CXX11
+
+typedef char IsNotContainer;
+template <class C>
+IsNotContainer IsContainerTest(long /* dummy */) { return '\0'; }
+
+// Trait to detect whether a type T is a hash table.
+// The heuristic used is that the type contains an inner type `hasher` and does
+// not contain an inner type `reverse_iterator`.
+// If the container is iterable in reverse, then order might actually matter.
+template <typename T>
+struct IsHashTable {
+ private:
+  template <typename U>
+  static char test(typename U::hasher*, typename U::reverse_iterator*);
+  template <typename U>
+  static int test(typename U::hasher*, ...);
+  template <typename U>
+  static char test(...);
+
+ public:
+  static const bool value = sizeof(test<T>(0, 0)) == sizeof(int);
+};
+
+template <typename T>
+const bool IsHashTable<T>::value;
+
+template<typename T>
+struct VoidT {
+    typedef void value_type;
+};
+
+template <typename T, typename = void>
+struct HasValueType : false_type {};
+template <typename T>
+struct HasValueType<T, VoidT<typename T::value_type> > : true_type {
+};
+
+template <typename C,
+          bool = sizeof(IsContainerTest<C>(0)) == sizeof(IsContainer),
+          bool = HasValueType<C>::value>
+struct IsRecursiveContainerImpl;
+
+template <typename C, bool HV>
+struct IsRecursiveContainerImpl<C, false, HV> : public false_type {};
+
+// Since the IsRecursiveContainerImpl depends on the IsContainerTest we need to
+// obey the same inconsistencies as the IsContainerTest, namely check if
+// something is a container is relying on only const_iterator in C++11 and
+// is relying on both const_iterator and iterator otherwise
+template <typename C>
+struct IsRecursiveContainerImpl<C, true, false> : public false_type {};
+
+template <typename C>
+struct IsRecursiveContainerImpl<C, true, true> {
+  #if GTEST_LANG_CXX11
+  typedef typename IteratorTraits<typename C::const_iterator>::value_type
+      value_type;
+#else
+  typedef typename IteratorTraits<typename C::iterator>::value_type value_type;
+#endif
+  typedef is_same<value_type, C> type;
+};
+
+// IsRecursiveContainer<Type> is a unary compile-time predicate that
+// evaluates whether C is a recursive container type. A recursive container
+// type is a container type whose value_type is equal to the container type
+// itself. An example for a recursive container type is
+// boost::filesystem::path, whose iterator has a value_type that is equal to
+// boost::filesystem::path.
+template <typename C>
+struct IsRecursiveContainer : public IsRecursiveContainerImpl<C>::type {};
+
+// EnableIf<condition>::type is void when 'Cond' is true, and
+// undefined when 'Cond' is false.  To use SFINAE to make a function
+// overload only apply when a particular expression is true, add
+// "typename EnableIf<expression>::type* = 0" as the last parameter.
+template<bool> struct EnableIf;
+template<> struct EnableIf<true> { typedef void type; };  // NOLINT
+
+// Utilities for native arrays.
+
+// ArrayEq() compares two k-dimensional native arrays using the
+// elements' operator==, where k can be any integer >= 0.  When k is
+// 0, ArrayEq() degenerates into comparing a single pair of values.
+
+template <typename T, typename U>
+bool ArrayEq(const T* lhs, size_t size, const U* rhs);
+
+// This generic version is used when k is 0.
+template <typename T, typename U>
+inline bool ArrayEq(const T& lhs, const U& rhs) { return lhs == rhs; }
+
+// This overload is used when k >= 1.
+template <typename T, typename U, size_t N>
+inline bool ArrayEq(const T(&lhs)[N], const U(&rhs)[N]) {
+  return internal::ArrayEq(lhs, N, rhs);
+}
+
+// This helper reduces code bloat.  If we instead put its logic inside
+// the previous ArrayEq() function, arrays with different sizes would
+// lead to different copies of the template code.
+template <typename T, typename U>
+bool ArrayEq(const T* lhs, size_t size, const U* rhs) {
+  for (size_t i = 0; i != size; i++) {
+    if (!internal::ArrayEq(lhs[i], rhs[i]))
+      return false;
+  }
+  return true;
+}
+
+// Finds the first element in the iterator range [begin, end) that
+// equals elem.  Element may be a native array type itself.
+template <typename Iter, typename Element>
+Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) {
+  for (Iter it = begin; it != end; ++it) {
+    if (internal::ArrayEq(*it, elem))
+      return it;
+  }
+  return end;
+}
+
+// CopyArray() copies a k-dimensional native array using the elements'
+// operator=, where k can be any integer >= 0.  When k is 0,
+// CopyArray() degenerates into copying a single value.
+
+template <typename T, typename U>
+void CopyArray(const T* from, size_t size, U* to);
+
+// This generic version is used when k is 0.
+template <typename T, typename U>
+inline void CopyArray(const T& from, U* to) { *to = from; }
+
+// This overload is used when k >= 1.
+template <typename T, typename U, size_t N>
+inline void CopyArray(const T(&from)[N], U(*to)[N]) {
+  internal::CopyArray(from, N, *to);
+}
+
+// This helper reduces code bloat.  If we instead put its logic inside
+// the previous CopyArray() function, arrays with different sizes
+// would lead to different copies of the template code.
+template <typename T, typename U>
+void CopyArray(const T* from, size_t size, U* to) {
+  for (size_t i = 0; i != size; i++) {
+    internal::CopyArray(from[i], to + i);
+  }
+}
+
+// The relation between an NativeArray object (see below) and the
+// native array it represents.
+// We use 2 different structs to allow non-copyable types to be used, as long
+// as RelationToSourceReference() is passed.
+struct RelationToSourceReference {};
+struct RelationToSourceCopy {};
+
+// Adapts a native array to a read-only STL-style container.  Instead
+// of the complete STL container concept, this adaptor only implements
+// members useful for Google Mock's container matchers.  New members
+// should be added as needed.  To simplify the implementation, we only
+// support Element being a raw type (i.e. having no top-level const or
+// reference modifier).  It's the client's responsibility to satisfy
+// this requirement.  Element can be an array type itself (hence
+// multi-dimensional arrays are supported).
+template <typename Element>
+class NativeArray {
+ public:
+  // STL-style container typedefs.
+  typedef Element value_type;
+  typedef Element* iterator;
+  typedef const Element* const_iterator;
+
+  // Constructs from a native array. References the source.
+  NativeArray(const Element* array, size_t count, RelationToSourceReference) {
+    InitRef(array, count);
+  }
+
+  // Constructs from a native array. Copies the source.
+  NativeArray(const Element* array, size_t count, RelationToSourceCopy) {
+    InitCopy(array, count);
+  }
+
+  // Copy constructor.
+  NativeArray(const NativeArray& rhs) {
+    (this->*rhs.clone_)(rhs.array_, rhs.size_);
+  }
+
+  ~NativeArray() {
+    if (clone_ != &NativeArray::InitRef)
+      delete[] array_;
+  }
+
+  // STL-style container methods.
+  size_t size() const { return size_; }
+  const_iterator begin() const { return array_; }
+  const_iterator end() const { return array_ + size_; }
+  bool operator==(const NativeArray& rhs) const {
+    return size() == rhs.size() &&
+        ArrayEq(begin(), size(), rhs.begin());
+  }
+
+ private:
+  enum {
+    kCheckTypeIsNotConstOrAReference = StaticAssertTypeEqHelper<
+        Element, GTEST_REMOVE_REFERENCE_AND_CONST_(Element)>::value
+  };
+
+  // Initializes this object with a copy of the input.
+  void InitCopy(const Element* array, size_t a_size) {
+    Element* const copy = new Element[a_size];
+    CopyArray(array, a_size, copy);
+    array_ = copy;
+    size_ = a_size;
+    clone_ = &NativeArray::InitCopy;
+  }
+
+  // Initializes this object with a reference of the input.
+  void InitRef(const Element* array, size_t a_size) {
+    array_ = array;
+    size_ = a_size;
+    clone_ = &NativeArray::InitRef;
+  }
+
+  const Element* array_;
+  size_t size_;
+  void (NativeArray::*clone_)(const Element*, size_t);
+
+  GTEST_DISALLOW_ASSIGN_(NativeArray);
+};
+
+}  // namespace internal
+}  // namespace testing
+
+#define GTEST_MESSAGE_AT_(file, line, message, result_type) \
+  ::testing::internal::AssertHelper(result_type, file, line, message) \
+    = ::testing::Message()
+
+#define GTEST_MESSAGE_(message, result_type) \
+  GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type)
+
+#define GTEST_FATAL_FAILURE_(message) \
+  return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure)
+
+#define GTEST_NONFATAL_FAILURE_(message) \
+  GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure)
+
+#define GTEST_SUCCESS_(message) \
+  GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess)
+
+// Suppress MSVC warning 4702 (unreachable code) for the code following
+// statement if it returns or throws (or doesn't return or throw in some
+// situations).
+#define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \
+  if (::testing::internal::AlwaysTrue()) { statement; }
+
+#define GTEST_TEST_THROW_(statement, expected_exception, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (::testing::internal::ConstCharPtr gtest_msg = "") { \
+    bool gtest_caught_expected = false; \
+    try { \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+    } \
+    catch (expected_exception const&) { \
+      gtest_caught_expected = true; \
+    } \
+    catch (...) { \
+      gtest_msg.value = \
+          "Expected: " #statement " throws an exception of type " \
+          #expected_exception ".\n  Actual: it throws a different type."; \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
+    } \
+    if (!gtest_caught_expected) { \
+      gtest_msg.value = \
+          "Expected: " #statement " throws an exception of type " \
+          #expected_exception ".\n  Actual: it throws nothing."; \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
+    } \
+  } else \
+    GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__): \
+      fail(gtest_msg.value)
+
+#define GTEST_TEST_NO_THROW_(statement, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (::testing::internal::AlwaysTrue()) { \
+    try { \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+    } \
+    catch (...) { \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
+    } \
+  } else \
+    GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__): \
+      fail("Expected: " #statement " doesn't throw an exception.\n" \
+           "  Actual: it throws.")
+
+#define GTEST_TEST_ANY_THROW_(statement, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (::testing::internal::AlwaysTrue()) { \
+    bool gtest_caught_any = false; \
+    try { \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+    } \
+    catch (...) { \
+      gtest_caught_any = true; \
+    } \
+    if (!gtest_caught_any) { \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \
+    } \
+  } else \
+    GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__): \
+      fail("Expected: " #statement " throws an exception.\n" \
+           "  Actual: it doesn't.")
+
+
+// Implements Boolean test assertions such as EXPECT_TRUE. expression can be
+// either a boolean expression or an AssertionResult. text is a textual
+// represenation of expression as it was passed into the EXPECT_TRUE.
+#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (const ::testing::AssertionResult gtest_ar_ = \
+      ::testing::AssertionResult(expression)) \
+    ; \
+  else \
+    fail(::testing::internal::GetBoolAssertionFailureMessage(\
+        gtest_ar_, text, #actual, #expected).c_str())
+
+#define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (::testing::internal::AlwaysTrue()) { \
+    ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \
+    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+    if (gtest_fatal_failure_checker.has_new_fatal_failure()) { \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__); \
+    } \
+  } else \
+    GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__): \
+      fail("Expected: " #statement " doesn't generate new fatal " \
+           "failures in the current thread.\n" \
+           "  Actual: it does.")
+
+// Expands to the name of the class that implements the given test.
+#define GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
+  test_case_name##_##test_name##_Test
+
+// Helper macro for defining tests.
+#define GTEST_TEST_(test_case_name, test_name, parent_class, parent_id)\
+class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\
+ public:\
+  GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\
+ private:\
+  virtual void TestBody();\
+  static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(\
+      GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\
+};\
+\
+::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\
+  ::test_info_ =\
+    ::testing::internal::MakeAndRegisterTestInfo(\
+        #test_case_name, #test_name, NULL, NULL, \
+        ::testing::internal::CodeLocation(__FILE__, __LINE__), \
+        (parent_id), \
+        parent_class::SetUpTestCase, \
+        parent_class::TearDownTestCase, \
+        new ::testing::internal::TestFactoryImpl<\
+            GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\
+void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines the public API for death tests.  It is
+// #included by gtest.h so a user doesn't need to include this
+// directly.
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines internal utilities needed for implementing
+// death tests.  They are subject to change without notice.
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+
+
+#include <stdio.h>
+
+namespace testing {
+namespace internal {
+
+GTEST_DECLARE_string_(internal_run_death_test);
+
+// Names of the flags (needed for parsing Google Test flags).
+const char kDeathTestStyleFlag[] = "death_test_style";
+const char kDeathTestUseFork[] = "death_test_use_fork";
+const char kInternalRunDeathTestFlag[] = "internal_run_death_test";
+
+#if GTEST_HAS_DEATH_TEST
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+// DeathTest is a class that hides much of the complexity of the
+// GTEST_DEATH_TEST_ macro.  It is abstract; its static Create method
+// returns a concrete class that depends on the prevailing death test
+// style, as defined by the --gtest_death_test_style and/or
+// --gtest_internal_run_death_test flags.
+
+// In describing the results of death tests, these terms are used with
+// the corresponding definitions:
+//
+// exit status:  The integer exit information in the format specified
+//               by wait(2)
+// exit code:    The integer code passed to exit(3), _exit(2), or
+//               returned from main()
+class GTEST_API_ DeathTest {
+ public:
+  // Create returns false if there was an error determining the
+  // appropriate action to take for the current death test; for example,
+  // if the gtest_death_test_style flag is set to an invalid value.
+  // The LastMessage method will return a more detailed message in that
+  // case.  Otherwise, the DeathTest pointer pointed to by the "test"
+  // argument is set.  If the death test should be skipped, the pointer
+  // is set to NULL; otherwise, it is set to the address of a new concrete
+  // DeathTest object that controls the execution of the current test.
+  static bool Create(const char* statement, const RE* regex,
+                     const char* file, int line, DeathTest** test);
+  DeathTest();
+  virtual ~DeathTest() { }
+
+  // A helper class that aborts a death test when it's deleted.
+  class ReturnSentinel {
+   public:
+    explicit ReturnSentinel(DeathTest* test) : test_(test) { }
+    ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); }
+   private:
+    DeathTest* const test_;
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel);
+  } GTEST_ATTRIBUTE_UNUSED_;
+
+  // An enumeration of possible roles that may be taken when a death
+  // test is encountered.  EXECUTE means that the death test logic should
+  // be executed immediately.  OVERSEE means that the program should prepare
+  // the appropriate environment for a child process to execute the death
+  // test, then wait for it to complete.
+  enum TestRole { OVERSEE_TEST, EXECUTE_TEST };
+
+  // An enumeration of the three reasons that a test might be aborted.
+  enum AbortReason {
+    TEST_ENCOUNTERED_RETURN_STATEMENT,
+    TEST_THREW_EXCEPTION,
+    TEST_DID_NOT_DIE
+  };
+
+  // Assumes one of the above roles.
+  virtual TestRole AssumeRole() = 0;
+
+  // Waits for the death test to finish and returns its status.
+  virtual int Wait() = 0;
+
+  // Returns true if the death test passed; that is, the test process
+  // exited during the test, its exit status matches a user-supplied
+  // predicate, and its stderr output matches a user-supplied regular
+  // expression.
+  // The user-supplied predicate may be a macro expression rather
+  // than a function pointer or functor, or else Wait and Passed could
+  // be combined.
+  virtual bool Passed(bool exit_status_ok) = 0;
+
+  // Signals that the death test did not die as expected.
+  virtual void Abort(AbortReason reason) = 0;
+
+  // Returns a human-readable outcome message regarding the outcome of
+  // the last death test.
+  static const char* LastMessage();
+
+  static void set_last_death_test_message(const std::string& message);
+
+ private:
+  // A string containing a description of the outcome of the last death test.
+  static std::string last_death_test_message_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest);
+};
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+// Factory interface for death tests.  May be mocked out for testing.
+class DeathTestFactory {
+ public:
+  virtual ~DeathTestFactory() { }
+  virtual bool Create(const char* statement, const RE* regex,
+                      const char* file, int line, DeathTest** test) = 0;
+};
+
+// A concrete DeathTestFactory implementation for normal use.
+class DefaultDeathTestFactory : public DeathTestFactory {
+ public:
+  virtual bool Create(const char* statement, const RE* regex,
+                      const char* file, int line, DeathTest** test);
+};
+
+// Returns true if exit_status describes a process that was terminated
+// by a signal, or exited normally with a nonzero exit code.
+GTEST_API_ bool ExitedUnsuccessfully(int exit_status);
+
+// Traps C++ exceptions escaping statement and reports them as test
+// failures. Note that trapping SEH exceptions is not implemented here.
+# if GTEST_HAS_EXCEPTIONS
+#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
+  try { \
+    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+  } catch (const ::std::exception& gtest_exception) { \
+    fprintf(\
+        stderr, \
+        "\n%s: Caught std::exception-derived exception escaping the " \
+        "death test statement. Exception message: %s\n", \
+        ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \
+        gtest_exception.what()); \
+    fflush(stderr); \
+    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
+  } catch (...) { \
+    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
+  }
+
+# else
+#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
+  GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
+
+# endif
+
+// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*,
+// ASSERT_EXIT*, and EXPECT_EXIT*.
+# define GTEST_DEATH_TEST_(statement, predicate, regex, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (::testing::internal::AlwaysTrue()) { \
+    const ::testing::internal::RE& gtest_regex = (regex); \
+    ::testing::internal::DeathTest* gtest_dt; \
+    if (!::testing::internal::DeathTest::Create(#statement, &gtest_regex, \
+        __FILE__, __LINE__, &gtest_dt)) { \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
+    } \
+    if (gtest_dt != NULL) { \
+      ::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \
+          gtest_dt_ptr(gtest_dt); \
+      switch (gtest_dt->AssumeRole()) { \
+        case ::testing::internal::DeathTest::OVERSEE_TEST: \
+          if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \
+            goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
+          } \
+          break; \
+        case ::testing::internal::DeathTest::EXECUTE_TEST: { \
+          ::testing::internal::DeathTest::ReturnSentinel \
+              gtest_sentinel(gtest_dt); \
+          GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt); \
+          gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \
+          break; \
+        } \
+        default: \
+          break; \
+      } \
+    } \
+  } else \
+    GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__): \
+      fail(::testing::internal::DeathTest::LastMessage())
+// The symbol "fail" here expands to something into which a message
+// can be streamed.
+
+// This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in
+// NDEBUG mode. In this case we need the statements to be executed and the macro
+// must accept a streamed message even though the message is never printed.
+// The regex object is not evaluated, but it is used to prevent "unused"
+// warnings and to avoid an expression that doesn't compile in debug mode.
+#define GTEST_EXECUTE_STATEMENT_(statement, regex)             \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                \
+  if (::testing::internal::AlwaysTrue()) {                     \
+    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+  } else if (!::testing::internal::AlwaysTrue()) {             \
+    const ::testing::internal::RE& gtest_regex = (regex);      \
+    static_cast<void>(gtest_regex);                            \
+  } else                                                       \
+    ::testing::Message()
+
+// A class representing the parsed contents of the
+// --gtest_internal_run_death_test flag, as it existed when
+// RUN_ALL_TESTS was called.
+class InternalRunDeathTestFlag {
+ public:
+  InternalRunDeathTestFlag(const std::string& a_file,
+                           int a_line,
+                           int an_index,
+                           int a_write_fd)
+      : file_(a_file), line_(a_line), index_(an_index),
+        write_fd_(a_write_fd) {}
+
+  ~InternalRunDeathTestFlag() {
+    if (write_fd_ >= 0)
+      posix::Close(write_fd_);
+  }
+
+  const std::string& file() const { return file_; }
+  int line() const { return line_; }
+  int index() const { return index_; }
+  int write_fd() const { return write_fd_; }
+
+ private:
+  std::string file_;
+  int line_;
+  int index_;
+  int write_fd_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag);
+};
+
+// Returns a newly created InternalRunDeathTestFlag object with fields
+// initialized from the GTEST_FLAG(internal_run_death_test) flag if
+// the flag is specified; otherwise returns NULL.
+InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();
+
+#endif  // GTEST_HAS_DEATH_TEST
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+
+namespace testing {
+
+// This flag controls the style of death tests.  Valid values are "threadsafe",
+// meaning that the death test child process will re-execute the test binary
+// from the start, running only a single death test, or "fast",
+// meaning that the child process will execute the test logic immediately
+// after forking.
+GTEST_DECLARE_string_(death_test_style);
+
+#if GTEST_HAS_DEATH_TEST
+
+namespace internal {
+
+// Returns a Boolean value indicating whether the caller is currently
+// executing in the context of the death test child process.  Tools such as
+// Valgrind heap checkers may need this to modify their behavior in death
+// tests.  IMPORTANT: This is an internal utility.  Using it may break the
+// implementation of death tests.  User code MUST NOT use it.
+GTEST_API_ bool InDeathTestChild();
+
+}  // namespace internal
+
+// The following macros are useful for writing death tests.
+
+// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
+// executed:
+//
+//   1. It generates a warning if there is more than one active
+//   thread.  This is because it's safe to fork() or clone() only
+//   when there is a single thread.
+//
+//   2. The parent process clone()s a sub-process and runs the death
+//   test in it; the sub-process exits with code 0 at the end of the
+//   death test, if it hasn't exited already.
+//
+//   3. The parent process waits for the sub-process to terminate.
+//
+//   4. The parent process checks the exit code and error message of
+//   the sub-process.
+//
+// Examples:
+//
+//   ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
+//   for (int i = 0; i < 5; i++) {
+//     EXPECT_DEATH(server.ProcessRequest(i),
+//                  "Invalid request .* in ProcessRequest()")
+//                  << "Failed to die on request " << i;
+//   }
+//
+//   ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
+//
+//   bool KilledBySIGHUP(int exit_code) {
+//     return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
+//   }
+//
+//   ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
+//
+// On the regular expressions used in death tests:
+//
+//   GOOGLETEST_CM0005 DO NOT DELETE
+//   On POSIX-compliant systems (*nix), we use the <regex.h> library,
+//   which uses the POSIX extended regex syntax.
+//
+//   On other platforms (e.g. Windows or Mac), we only support a simple regex
+//   syntax implemented as part of Google Test.  This limited
+//   implementation should be enough most of the time when writing
+//   death tests; though it lacks many features you can find in PCRE
+//   or POSIX extended regex syntax.  For example, we don't support
+//   union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
+//   repetition count ("x{5,7}"), among others.
+//
+//   Below is the syntax that we do support.  We chose it to be a
+//   subset of both PCRE and POSIX extended regex, so it's easy to
+//   learn wherever you come from.  In the following: 'A' denotes a
+//   literal character, period (.), or a single \\ escape sequence;
+//   'x' and 'y' denote regular expressions; 'm' and 'n' are for
+//   natural numbers.
+//
+//     c     matches any literal character c
+//     \\d   matches any decimal digit
+//     \\D   matches any character that's not a decimal digit
+//     \\f   matches \f
+//     \\n   matches \n
+//     \\r   matches \r
+//     \\s   matches any ASCII whitespace, including \n
+//     \\S   matches any character that's not a whitespace
+//     \\t   matches \t
+//     \\v   matches \v
+//     \\w   matches any letter, _, or decimal digit
+//     \\W   matches any character that \\w doesn't match
+//     \\c   matches any literal character c, which must be a punctuation
+//     .     matches any single character except \n
+//     A?    matches 0 or 1 occurrences of A
+//     A*    matches 0 or many occurrences of A
+//     A+    matches 1 or many occurrences of A
+//     ^     matches the beginning of a string (not that of each line)
+//     $     matches the end of a string (not that of each line)
+//     xy    matches x followed by y
+//
+//   If you accidentally use PCRE or POSIX extended regex features
+//   not implemented by us, you will get a run-time failure.  In that
+//   case, please try to rewrite your regular expression within the
+//   above syntax.
+//
+//   This implementation is *not* meant to be as highly tuned or robust
+//   as a compiled regex library, but should perform well enough for a
+//   death test, which already incurs significant overhead by launching
+//   a child process.
+//
+// Known caveats:
+//
+//   A "threadsafe" style death test obtains the path to the test
+//   program from argv[0] and re-executes it in the sub-process.  For
+//   simplicity, the current implementation doesn't search the PATH
+//   when launching the sub-process.  This means that the user must
+//   invoke the test program via a path that contains at least one
+//   path separator (e.g. path/to/foo_test and
+//   /absolute/path/to/bar_test are fine, but foo_test is not).  This
+//   is rarely a problem as people usually don't put the test binary
+//   directory in PATH.
+//
+// FIXME: make thread-safe death tests search the PATH.
+
+// Asserts that a given statement causes the program to exit, with an
+// integer exit status that satisfies predicate, and emitting error output
+// that matches regex.
+# define ASSERT_EXIT(statement, predicate, regex) \
+    GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_)
+
+// Like ASSERT_EXIT, but continues on to successive tests in the
+// test case, if any:
+# define EXPECT_EXIT(statement, predicate, regex) \
+    GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_)
+
+// Asserts that a given statement causes the program to exit, either by
+// explicitly exiting with a nonzero exit code or being killed by a
+// signal, and emitting error output that matches regex.
+# define ASSERT_DEATH(statement, regex) \
+    ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
+
+// Like ASSERT_DEATH, but continues on to successive tests in the
+// test case, if any:
+# define EXPECT_DEATH(statement, regex) \
+    EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
+
+// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:
+
+// Tests that an exit code describes a normal exit with a given exit code.
+class GTEST_API_ ExitedWithCode {
+ public:
+  explicit ExitedWithCode(int exit_code);
+  bool operator()(int exit_status) const;
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ExitedWithCode& other);
+
+  const int exit_code_;
+};
+
+# if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
+// Tests that an exit code describes an exit due to termination by a
+// given signal.
+// GOOGLETEST_CM0006 DO NOT DELETE
+class GTEST_API_ KilledBySignal {
+ public:
+  explicit KilledBySignal(int signum);
+  bool operator()(int exit_status) const;
+ private:
+  const int signum_;
+};
+# endif  // !GTEST_OS_WINDOWS
+
+// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
+// The death testing framework causes this to have interesting semantics,
+// since the sideeffects of the call are only visible in opt mode, and not
+// in debug mode.
+//
+// In practice, this can be used to test functions that utilize the
+// LOG(DFATAL) macro using the following style:
+//
+// int DieInDebugOr12(int* sideeffect) {
+//   if (sideeffect) {
+//     *sideeffect = 12;
+//   }
+//   LOG(DFATAL) << "death";
+//   return 12;
+// }
+//
+// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) {
+//   int sideeffect = 0;
+//   // Only asserts in dbg.
+//   EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
+//
+// #ifdef NDEBUG
+//   // opt-mode has sideeffect visible.
+//   EXPECT_EQ(12, sideeffect);
+// #else
+//   // dbg-mode no visible sideeffect.
+//   EXPECT_EQ(0, sideeffect);
+// #endif
+// }
+//
+// This will assert that DieInDebugReturn12InOpt() crashes in debug
+// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
+// appropriate fallback value (12 in this case) in opt mode. If you
+// need to test that a function has appropriate side-effects in opt
+// mode, include assertions against the side-effects.  A general
+// pattern for this is:
+//
+// EXPECT_DEBUG_DEATH({
+//   // Side-effects here will have an effect after this statement in
+//   // opt mode, but none in debug mode.
+//   EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
+// }, "death");
+//
+# ifdef NDEBUG
+
+#  define EXPECT_DEBUG_DEATH(statement, regex) \
+  GTEST_EXECUTE_STATEMENT_(statement, regex)
+
+#  define ASSERT_DEBUG_DEATH(statement, regex) \
+  GTEST_EXECUTE_STATEMENT_(statement, regex)
+
+# else
+
+#  define EXPECT_DEBUG_DEATH(statement, regex) \
+  EXPECT_DEATH(statement, regex)
+
+#  define ASSERT_DEBUG_DEATH(statement, regex) \
+  ASSERT_DEATH(statement, regex)
+
+# endif  // NDEBUG for EXPECT_DEBUG_DEATH
+#endif  // GTEST_HAS_DEATH_TEST
+
+// This macro is used for implementing macros such as
+// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
+// death tests are not supported. Those macros must compile on such systems
+// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on
+// systems that support death tests. This allows one to write such a macro
+// on a system that does not support death tests and be sure that it will
+// compile on a death-test supporting system. It is exposed publicly so that
+// systems that have death-tests with stricter requirements than
+// GTEST_HAS_DEATH_TEST can write their own equivalent of
+// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED.
+//
+// Parameters:
+//   statement -  A statement that a macro such as EXPECT_DEATH would test
+//                for program termination. This macro has to make sure this
+//                statement is compiled but not executed, to ensure that
+//                EXPECT_DEATH_IF_SUPPORTED compiles with a certain
+//                parameter iff EXPECT_DEATH compiles with it.
+//   regex     -  A regex that a macro such as EXPECT_DEATH would use to test
+//                the output of statement.  This parameter has to be
+//                compiled but not evaluated by this macro, to ensure that
+//                this macro only accepts expressions that a macro such as
+//                EXPECT_DEATH would accept.
+//   terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED
+//                and a return statement for ASSERT_DEATH_IF_SUPPORTED.
+//                This ensures that ASSERT_DEATH_IF_SUPPORTED will not
+//                compile inside functions where ASSERT_DEATH doesn't
+//                compile.
+//
+//  The branch that has an always false condition is used to ensure that
+//  statement and regex are compiled (and thus syntactically correct) but
+//  never executed. The unreachable code macro protects the terminator
+//  statement from generating an 'unreachable code' warning in case
+//  statement unconditionally returns or throws. The Message constructor at
+//  the end allows the syntax of streaming additional messages into the
+//  macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH.
+# define GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, terminator) \
+    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+    if (::testing::internal::AlwaysTrue()) { \
+      GTEST_LOG_(WARNING) \
+          << "Death tests are not supported on this platform.\n" \
+          << "Statement '" #statement "' cannot be verified."; \
+    } else if (::testing::internal::AlwaysFalse()) { \
+      ::testing::internal::RE::PartialMatch(".*", (regex)); \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+      terminator; \
+    } else \
+      ::testing::Message()
+
+// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
+// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
+// death tests are supported; otherwise they just issue a warning.  This is
+// useful when you are combining death test assertions with normal test
+// assertions in one test.
+#if GTEST_HAS_DEATH_TEST
+# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
+    EXPECT_DEATH(statement, regex)
+# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
+    ASSERT_DEATH(statement, regex)
+#else
+# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
+    GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, )
+# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
+    GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, return)
+#endif
+
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+// This file was GENERATED by command:
+//     pump.py gtest-param-test.h.pump
+// DO NOT EDIT BY HAND!!!
+
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Macros and functions for implementing parameterized tests
+// in Google C++ Testing and Mocking Framework (Google Test)
+//
+// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
+//
+// GOOGLETEST_CM0001 DO NOT DELETE
+#ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+#define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+
+
+// Value-parameterized tests allow you to test your code with different
+// parameters without writing multiple copies of the same test.
+//
+// Here is how you use value-parameterized tests:
+
+#if 0
+
+// To write value-parameterized tests, first you should define a fixture
+// class. It is usually derived from testing::TestWithParam<T> (see below for
+// another inheritance scheme that's sometimes useful in more complicated
+// class hierarchies), where the type of your parameter values.
+// TestWithParam<T> is itself derived from testing::Test. T can be any
+// copyable type. If it's a raw pointer, you are responsible for managing the
+// lifespan of the pointed values.
+
+class FooTest : public ::testing::TestWithParam<const char*> {
+  // You can implement all the usual class fixture members here.
+};
+
+// Then, use the TEST_P macro to define as many parameterized tests
+// for this fixture as you want. The _P suffix is for "parameterized"
+// or "pattern", whichever you prefer to think.
+
+TEST_P(FooTest, DoesBlah) {
+  // Inside a test, access the test parameter with the GetParam() method
+  // of the TestWithParam<T> class:
+  EXPECT_TRUE(foo.Blah(GetParam()));
+  ...
+}
+
+TEST_P(FooTest, HasBlahBlah) {
+  ...
+}
+
+// Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test
+// case with any set of parameters you want. Google Test defines a number
+// of functions for generating test parameters. They return what we call
+// (surprise!) parameter generators. Here is a summary of them, which
+// are all in the testing namespace:
+//
+//
+//  Range(begin, end [, step]) - Yields values {begin, begin+step,
+//                               begin+step+step, ...}. The values do not
+//                               include end. step defaults to 1.
+//  Values(v1, v2, ..., vN)    - Yields values {v1, v2, ..., vN}.
+//  ValuesIn(container)        - Yields values from a C-style array, an STL
+//  ValuesIn(begin,end)          container, or an iterator range [begin, end).
+//  Bool()                     - Yields sequence {false, true}.
+//  Combine(g1, g2, ..., gN)   - Yields all combinations (the Cartesian product
+//                               for the math savvy) of the values generated
+//                               by the N generators.
+//
+// For more details, see comments at the definitions of these functions below
+// in this file.
+//
+// The following statement will instantiate tests from the FooTest test case
+// each with parameter values "meeny", "miny", and "moe".
+
+INSTANTIATE_TEST_CASE_P(InstantiationName,
+                        FooTest,
+                        Values("meeny", "miny", "moe"));
+
+// To distinguish different instances of the pattern, (yes, you
+// can instantiate it more then once) the first argument to the
+// INSTANTIATE_TEST_CASE_P macro is a prefix that will be added to the
+// actual test case name. Remember to pick unique prefixes for different
+// instantiations. The tests from the instantiation above will have
+// these names:
+//
+//    * InstantiationName/FooTest.DoesBlah/0 for "meeny"
+//    * InstantiationName/FooTest.DoesBlah/1 for "miny"
+//    * InstantiationName/FooTest.DoesBlah/2 for "moe"
+//    * InstantiationName/FooTest.HasBlahBlah/0 for "meeny"
+//    * InstantiationName/FooTest.HasBlahBlah/1 for "miny"
+//    * InstantiationName/FooTest.HasBlahBlah/2 for "moe"
+//
+// You can use these names in --gtest_filter.
+//
+// This statement will instantiate all tests from FooTest again, each
+// with parameter values "cat" and "dog":
+
+const char* pets[] = {"cat", "dog"};
+INSTANTIATE_TEST_CASE_P(AnotherInstantiationName, FooTest, ValuesIn(pets));
+
+// The tests from the instantiation above will have these names:
+//
+//    * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat"
+//    * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog"
+//    * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat"
+//    * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog"
+//
+// Please note that INSTANTIATE_TEST_CASE_P will instantiate all tests
+// in the given test case, whether their definitions come before or
+// AFTER the INSTANTIATE_TEST_CASE_P statement.
+//
+// Please also note that generator expressions (including parameters to the
+// generators) are evaluated in InitGoogleTest(), after main() has started.
+// This allows the user on one hand, to adjust generator parameters in order
+// to dynamically determine a set of tests to run and on the other hand,
+// give the user a chance to inspect the generated tests with Google Test
+// reflection API before RUN_ALL_TESTS() is executed.
+//
+// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc
+// for more examples.
+//
+// In the future, we plan to publish the API for defining new parameter
+// generators. But for now this interface remains part of the internal
+// implementation and is subject to change.
+//
+//
+// A parameterized test fixture must be derived from testing::Test and from
+// testing::WithParamInterface<T>, where T is the type of the parameter
+// values. Inheriting from TestWithParam<T> satisfies that requirement because
+// TestWithParam<T> inherits from both Test and WithParamInterface. In more
+// complicated hierarchies, however, it is occasionally useful to inherit
+// separately from Test and WithParamInterface. For example:
+
+class BaseTest : public ::testing::Test {
+  // You can inherit all the usual members for a non-parameterized test
+  // fixture here.
+};
+
+class DerivedTest : public BaseTest, public ::testing::WithParamInterface<int> {
+  // The usual test fixture members go here too.
+};
+
+TEST_F(BaseTest, HasFoo) {
+  // This is an ordinary non-parameterized test.
+}
+
+TEST_P(DerivedTest, DoesBlah) {
+  // GetParam works just the same here as if you inherit from TestWithParam.
+  EXPECT_TRUE(foo.Blah(GetParam()));
+}
+
+#endif  // 0
+
+
+#if !GTEST_OS_SYMBIAN
+# include <utility>
+#endif
+
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+// Type and function utilities for implementing parameterized tests.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+
+#include <ctype.h>
+
+#include <iterator>
+#include <set>
+#include <utility>
+#include <vector>
+
+// Copyright 2003 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// A "smart" pointer type with reference tracking.  Every pointer to a
+// particular object is kept on a circular linked list.  When the last pointer
+// to an object is destroyed or reassigned, the object is deleted.
+//
+// Used properly, this deletes the object when the last reference goes away.
+// There are several caveats:
+// - Like all reference counting schemes, cycles lead to leaks.
+// - Each smart pointer is actually two pointers (8 bytes instead of 4).
+// - Every time a pointer is assigned, the entire list of pointers to that
+//   object is traversed.  This class is therefore NOT SUITABLE when there
+//   will often be more than two or three pointers to a particular object.
+// - References are only tracked as long as linked_ptr<> objects are copied.
+//   If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS
+//   will happen (double deletion).
+//
+// A good use of this class is storing object references in STL containers.
+// You can safely put linked_ptr<> in a vector<>.
+// Other uses may not be as good.
+//
+// Note: If you use an incomplete type with linked_ptr<>, the class
+// *containing* linked_ptr<> must have a constructor and destructor (even
+// if they do nothing!).
+//
+// Bill Gibbons suggested we use something like this.
+//
+// Thread Safety:
+//   Unlike other linked_ptr implementations, in this implementation
+//   a linked_ptr object is thread-safe in the sense that:
+//     - it's safe to copy linked_ptr objects concurrently,
+//     - it's safe to copy *from* a linked_ptr and read its underlying
+//       raw pointer (e.g. via get()) concurrently, and
+//     - it's safe to write to two linked_ptrs that point to the same
+//       shared object concurrently.
+// FIXME: rename this to safe_linked_ptr to avoid
+// confusion with normal linked_ptr.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
+
+#include <stdlib.h>
+#include <assert.h>
+
+
+namespace testing {
+namespace internal {
+
+// Protects copying of all linked_ptr objects.
+GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex);
+
+// This is used internally by all instances of linked_ptr<>.  It needs to be
+// a non-template class because different types of linked_ptr<> can refer to
+// the same object (linked_ptr<Superclass>(obj) vs linked_ptr<Subclass>(obj)).
+// So, it needs to be possible for different types of linked_ptr to participate
+// in the same circular linked list, so we need a single class type here.
+//
+// DO NOT USE THIS CLASS DIRECTLY YOURSELF.  Use linked_ptr<T>.
+class linked_ptr_internal {
+ public:
+  // Create a new circle that includes only this instance.
+  void join_new() {
+    next_ = this;
+  }
+
+  // Many linked_ptr operations may change p.link_ for some linked_ptr
+  // variable p in the same circle as this object.  Therefore we need
+  // to prevent two such operations from occurring concurrently.
+  //
+  // Note that different types of linked_ptr objects can coexist in a
+  // circle (e.g. linked_ptr<Base>, linked_ptr<Derived1>, and
+  // linked_ptr<Derived2>).  Therefore we must use a single mutex to
+  // protect all linked_ptr objects.  This can create serious
+  // contention in production code, but is acceptable in a testing
+  // framework.
+
+  // Join an existing circle.
+  void join(linked_ptr_internal const* ptr)
+      GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
+    MutexLock lock(&g_linked_ptr_mutex);
+
+    linked_ptr_internal const* p = ptr;
+    while (p->next_ != ptr) {
+      assert(p->next_ != this &&
+             "Trying to join() a linked ring we are already in. "
+             "Is GMock thread safety enabled?");
+      p = p->next_;
+    }
+    p->next_ = this;
+    next_ = ptr;
+  }
+
+  // Leave whatever circle we're part of.  Returns true if we were the
+  // last member of the circle.  Once this is done, you can join() another.
+  bool depart()
+      GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
+    MutexLock lock(&g_linked_ptr_mutex);
+
+    if (next_ == this) return true;
+    linked_ptr_internal const* p = next_;
+    while (p->next_ != this) {
+      assert(p->next_ != next_ &&
+             "Trying to depart() a linked ring we are not in. "
+             "Is GMock thread safety enabled?");
+      p = p->next_;
+    }
+    p->next_ = next_;
+    return false;
+  }
+
+ private:
+  mutable linked_ptr_internal const* next_;
+};
+
+template <typename T>
+class linked_ptr {
+ public:
+  typedef T element_type;
+
+  // Take over ownership of a raw pointer.  This should happen as soon as
+  // possible after the object is created.
+  explicit linked_ptr(T* ptr = NULL) { capture(ptr); }
+  ~linked_ptr() { depart(); }
+
+  // Copy an existing linked_ptr<>, adding ourselves to the list of references.
+  template <typename U> linked_ptr(linked_ptr<U> const& ptr) { copy(&ptr); }
+  linked_ptr(linked_ptr const& ptr) {  // NOLINT
+    assert(&ptr != this);
+    copy(&ptr);
+  }
+
+  // Assignment releases the old value and acquires the new.
+  template <typename U> linked_ptr& operator=(linked_ptr<U> const& ptr) {
+    depart();
+    copy(&ptr);
+    return *this;
+  }
+
+  linked_ptr& operator=(linked_ptr const& ptr) {
+    if (&ptr != this) {
+      depart();
+      copy(&ptr);
+    }
+    return *this;
+  }
+
+  // Smart pointer members.
+  void reset(T* ptr = NULL) {
+    depart();
+    capture(ptr);
+  }
+  T* get() const { return value_; }
+  T* operator->() const { return value_; }
+  T& operator*() const { return *value_; }
+
+  bool operator==(T* p) const { return value_ == p; }
+  bool operator!=(T* p) const { return value_ != p; }
+  template <typename U>
+  bool operator==(linked_ptr<U> const& ptr) const {
+    return value_ == ptr.get();
+  }
+  template <typename U>
+  bool operator!=(linked_ptr<U> const& ptr) const {
+    return value_ != ptr.get();
+  }
+
+ private:
+  template <typename U>
+  friend class linked_ptr;
+
+  T* value_;
+  linked_ptr_internal link_;
+
+  void depart() {
+    if (link_.depart()) delete value_;
+  }
+
+  void capture(T* ptr) {
+    value_ = ptr;
+    link_.join_new();
+  }
+
+  template <typename U> void copy(linked_ptr<U> const* ptr) {
+    value_ = ptr->get();
+    if (value_)
+      link_.join(&ptr->link_);
+    else
+      link_.join_new();
+  }
+};
+
+template<typename T> inline
+bool operator==(T* ptr, const linked_ptr<T>& x) {
+  return ptr == x.get();
+}
+
+template<typename T> inline
+bool operator!=(T* ptr, const linked_ptr<T>& x) {
+  return ptr != x.get();
+}
+
+// A function to convert T* into linked_ptr<T>
+// Doing e.g. make_linked_ptr(new FooBarBaz<type>(arg)) is a shorter notation
+// for linked_ptr<FooBarBaz<type> >(new FooBarBaz<type>(arg))
+template <typename T>
+linked_ptr<T> make_linked_ptr(T* ptr) {
+  return linked_ptr<T>(ptr);
+}
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+// Google Test - The Google C++ Testing and Mocking Framework
+//
+// This file implements a universal value printer that can print a
+// value of any type T:
+//
+//   void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
+//
+// A user can teach this function how to print a class type T by
+// defining either operator<<() or PrintTo() in the namespace that
+// defines T.  More specifically, the FIRST defined function in the
+// following list will be used (assuming T is defined in namespace
+// foo):
+//
+//   1. foo::PrintTo(const T&, ostream*)
+//   2. operator<<(ostream&, const T&) defined in either foo or the
+//      global namespace.
+//
+// However if T is an STL-style container then it is printed element-wise
+// unless foo::PrintTo(const T&, ostream*) is defined. Note that
+// operator<<() is ignored for container types.
+//
+// If none of the above is defined, it will print the debug string of
+// the value if it is a protocol buffer, or print the raw bytes in the
+// value otherwise.
+//
+// To aid debugging: when T is a reference type, the address of the
+// value is also printed; when T is a (const) char pointer, both the
+// pointer value and the NUL-terminated string it points to are
+// printed.
+//
+// We also provide some convenient wrappers:
+//
+//   // Prints a value to a string.  For a (const or not) char
+//   // pointer, the NUL-terminated string (but not the pointer) is
+//   // printed.
+//   std::string ::testing::PrintToString(const T& value);
+//
+//   // Prints a value tersely: for a reference type, the referenced
+//   // value (but not the address) is printed; for a (const or not) char
+//   // pointer, the NUL-terminated string (but not the pointer) is
+//   // printed.
+//   void ::testing::internal::UniversalTersePrint(const T& value, ostream*);
+//
+//   // Prints value using the type inferred by the compiler.  The difference
+//   // from UniversalTersePrint() is that this function prints both the
+//   // pointer and the NUL-terminated string for a (const or not) char pointer.
+//   void ::testing::internal::UniversalPrint(const T& value, ostream*);
+//
+//   // Prints the fields of a tuple tersely to a string vector, one
+//   // element for each field. Tuple support must be enabled in
+//   // gtest-port.h.
+//   std::vector<string> UniversalTersePrintTupleFieldsToStrings(
+//       const Tuple& value);
+//
+// Known limitation:
+//
+// The print primitives print the elements of an STL-style container
+// using the compiler-inferred type of *iter where iter is a
+// const_iterator of the container.  When const_iterator is an input
+// iterator but not a forward iterator, this inferred type may not
+// match value_type, and the print output may be incorrect.  In
+// practice, this is rarely a problem as for most containers
+// const_iterator is a forward iterator.  We'll fix this if there's an
+// actual need for it.  Note that this fix cannot rely on value_type
+// being defined as many user-defined container types don't have
+// value_type.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+#define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+
+#include <ostream>  // NOLINT
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#if GTEST_HAS_STD_TUPLE_
+# include <tuple>
+#endif
+
+#if GTEST_HAS_ABSL
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "absl/types/variant.h"
+#endif  // GTEST_HAS_ABSL
+
+namespace testing {
+
+// Definitions in the 'internal' and 'internal2' name spaces are
+// subject to change without notice.  DO NOT USE THEM IN USER CODE!
+namespace internal2 {
+
+// Prints the given number of bytes in the given object to the given
+// ostream.
+GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
+                                     size_t count,
+                                     ::std::ostream* os);
+
+// For selecting which printer to use when a given type has neither <<
+// nor PrintTo().
+enum TypeKind {
+  kProtobuf,              // a protobuf type
+  kConvertibleToInteger,  // a type implicitly convertible to BiggestInt
+                          // (e.g. a named or unnamed enum type)
+#if GTEST_HAS_ABSL
+  kConvertibleToStringView,  // a type implicitly convertible to
+                             // absl::string_view
+#endif
+  kOtherType  // anything else
+};
+
+// TypeWithoutFormatter<T, kTypeKind>::PrintValue(value, os) is called
+// by the universal printer to print a value of type T when neither
+// operator<< nor PrintTo() is defined for T, where kTypeKind is the
+// "kind" of T as defined by enum TypeKind.
+template <typename T, TypeKind kTypeKind>
+class TypeWithoutFormatter {
+ public:
+  // This default version is called when kTypeKind is kOtherType.
+  static void PrintValue(const T& value, ::std::ostream* os) {
+    PrintBytesInObjectTo(static_cast<const unsigned char*>(
+                             reinterpret_cast<const void*>(&value)),
+                         sizeof(value), os);
+  }
+};
+
+// We print a protobuf using its ShortDebugString() when the string
+// doesn't exceed this many characters; otherwise we print it using
+// DebugString() for better readability.
+const size_t kProtobufOneLinerMaxLength = 50;
+
+template <typename T>
+class TypeWithoutFormatter<T, kProtobuf> {
+ public:
+  static void PrintValue(const T& value, ::std::ostream* os) {
+    std::string pretty_str = value.ShortDebugString();
+    if (pretty_str.length() > kProtobufOneLinerMaxLength) {
+      pretty_str = "\n" + value.DebugString();
+    }
+    *os << ("<" + pretty_str + ">");
+  }
+};
+
+template <typename T>
+class TypeWithoutFormatter<T, kConvertibleToInteger> {
+ public:
+  // Since T has no << operator or PrintTo() but can be implicitly
+  // converted to BiggestInt, we print it as a BiggestInt.
+  //
+  // Most likely T is an enum type (either named or unnamed), in which
+  // case printing it as an integer is the desired behavior.  In case
+  // T is not an enum, printing it as an integer is the best we can do
+  // given that it has no user-defined printer.
+  static void PrintValue(const T& value, ::std::ostream* os) {
+    const internal::BiggestInt kBigInt = value;
+    *os << kBigInt;
+  }
+};
+
+#if GTEST_HAS_ABSL
+template <typename T>
+class TypeWithoutFormatter<T, kConvertibleToStringView> {
+ public:
+  // Since T has neither operator<< nor PrintTo() but can be implicitly
+  // converted to absl::string_view, we print it as a absl::string_view.
+  //
+  // Note: the implementation is further below, as it depends on
+  // internal::PrintTo symbol which is defined later in the file.
+  static void PrintValue(const T& value, ::std::ostream* os);
+};
+#endif
+
+// Prints the given value to the given ostream.  If the value is a
+// protocol message, its debug string is printed; if it's an enum or
+// of a type implicitly convertible to BiggestInt, it's printed as an
+// integer; otherwise the bytes in the value are printed.  This is
+// what UniversalPrinter<T>::Print() does when it knows nothing about
+// type T and T has neither << operator nor PrintTo().
+//
+// A user can override this behavior for a class type Foo by defining
+// a << operator in the namespace where Foo is defined.
+//
+// We put this operator in namespace 'internal2' instead of 'internal'
+// to simplify the implementation, as much code in 'internal' needs to
+// use << in STL, which would conflict with our own << were it defined
+// in 'internal'.
+//
+// Note that this operator<< takes a generic std::basic_ostream<Char,
+// CharTraits> type instead of the more restricted std::ostream.  If
+// we define it to take an std::ostream instead, we'll get an
+// "ambiguous overloads" compiler error when trying to print a type
+// Foo that supports streaming to std::basic_ostream<Char,
+// CharTraits>, as the compiler cannot tell whether
+// operator<<(std::ostream&, const T&) or
+// operator<<(std::basic_stream<Char, CharTraits>, const Foo&) is more
+// specific.
+template <typename Char, typename CharTraits, typename T>
+::std::basic_ostream<Char, CharTraits>& operator<<(
+    ::std::basic_ostream<Char, CharTraits>& os, const T& x) {
+  TypeWithoutFormatter<T, (internal::IsAProtocolMessage<T>::value
+                               ? kProtobuf
+                               : internal::ImplicitlyConvertible<
+                                     const T&, internal::BiggestInt>::value
+                                     ? kConvertibleToInteger
+                                     :
+#if GTEST_HAS_ABSL
+                                     internal::ImplicitlyConvertible<
+                                         const T&, absl::string_view>::value
+                                         ? kConvertibleToStringView
+                                         :
+#endif
+                                         kOtherType)>::PrintValue(x, &os);
+  return os;
+}
+
+}  // namespace internal2
+}  // namespace testing
+
+// This namespace MUST NOT BE NESTED IN ::testing, or the name look-up
+// magic needed for implementing UniversalPrinter won't work.
+namespace testing_internal {
+
+// Used to print a value that is not an STL-style container when the
+// user doesn't define PrintTo() for it.
+template <typename T>
+void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) {
+  // With the following statement, during unqualified name lookup,
+  // testing::internal2::operator<< appears as if it was declared in
+  // the nearest enclosing namespace that contains both
+  // ::testing_internal and ::testing::internal2, i.e. the global
+  // namespace.  For more details, refer to the C++ Standard section
+  // 7.3.4-1 [namespace.udir].  This allows us to fall back onto
+  // testing::internal2::operator<< in case T doesn't come with a <<
+  // operator.
+  //
+  // We cannot write 'using ::testing::internal2::operator<<;', which
+  // gcc 3.3 fails to compile due to a compiler bug.
+  using namespace ::testing::internal2;  // NOLINT
+
+  // Assuming T is defined in namespace foo, in the next statement,
+  // the compiler will consider all of:
+  //
+  //   1. foo::operator<< (thanks to Koenig look-up),
+  //   2. ::operator<< (as the current namespace is enclosed in ::),
+  //   3. testing::internal2::operator<< (thanks to the using statement above).
+  //
+  // The operator<< whose type matches T best will be picked.
+  //
+  // We deliberately allow #2 to be a candidate, as sometimes it's
+  // impossible to define #1 (e.g. when foo is ::std, defining
+  // anything in it is undefined behavior unless you are a compiler
+  // vendor.).
+  *os << value;
+}
+
+}  // namespace testing_internal
+
+namespace testing {
+namespace internal {
+
+// FormatForComparison<ToPrint, OtherOperand>::Format(value) formats a
+// value of type ToPrint that is an operand of a comparison assertion
+// (e.g. ASSERT_EQ).  OtherOperand is the type of the other operand in
+// the comparison, and is used to help determine the best way to
+// format the value.  In particular, when the value is a C string
+// (char pointer) and the other operand is an STL string object, we
+// want to format the C string as a string, since we know it is
+// compared by value with the string object.  If the value is a char
+// pointer but the other operand is not an STL string object, we don't
+// know whether the pointer is supposed to point to a NUL-terminated
+// string, and thus want to print it as a pointer to be safe.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+
+// The default case.
+template <typename ToPrint, typename OtherOperand>
+class FormatForComparison {
+ public:
+  static ::std::string Format(const ToPrint& value) {
+    return ::testing::PrintToString(value);
+  }
+};
+
+// Array.
+template <typename ToPrint, size_t N, typename OtherOperand>
+class FormatForComparison<ToPrint[N], OtherOperand> {
+ public:
+  static ::std::string Format(const ToPrint* value) {
+    return FormatForComparison<const ToPrint*, OtherOperand>::Format(value);
+  }
+};
+
+// By default, print C string as pointers to be safe, as we don't know
+// whether they actually point to a NUL-terminated string.
+
+#define GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(CharType)                \
+  template <typename OtherOperand>                                      \
+  class FormatForComparison<CharType*, OtherOperand> {                  \
+   public:                                                              \
+    static ::std::string Format(CharType* value) {                      \
+      return ::testing::PrintToString(static_cast<const void*>(value)); \
+    }                                                                   \
+  }
+
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(wchar_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t);
+
+#undef GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_
+
+// If a C string is compared with an STL string object, we know it's meant
+// to point to a NUL-terminated string, and thus can print it as a string.
+
+#define GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(CharType, OtherStringType) \
+  template <>                                                           \
+  class FormatForComparison<CharType*, OtherStringType> {               \
+   public:                                                              \
+    static ::std::string Format(CharType* value) {                      \
+      return ::testing::PrintToString(value);                           \
+    }                                                                   \
+  }
+
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::std::string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::std::string);
+
+#if GTEST_HAS_GLOBAL_STRING
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::string);
+#endif
+
+#if GTEST_HAS_GLOBAL_WSTRING
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::wstring);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::wstring);
+#endif
+
+#if GTEST_HAS_STD_WSTRING
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::std::wstring);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::std::wstring);
+#endif
+
+#undef GTEST_IMPL_FORMAT_C_STRING_AS_STRING_
+
+// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc)
+// operand to be used in a failure message.  The type (but not value)
+// of the other operand may affect the format.  This allows us to
+// print a char* as a raw pointer when it is compared against another
+// char* or void*, and print it as a C string when it is compared
+// against an std::string object, for example.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+template <typename T1, typename T2>
+std::string FormatForComparisonFailureMessage(
+    const T1& value, const T2& /* other_operand */) {
+  return FormatForComparison<T1, T2>::Format(value);
+}
+
+// UniversalPrinter<T>::Print(value, ostream_ptr) prints the given
+// value to the given ostream.  The caller must ensure that
+// 'ostream_ptr' is not NULL, or the behavior is undefined.
+//
+// We define UniversalPrinter as a class template (as opposed to a
+// function template), as we need to partially specialize it for
+// reference types, which cannot be done with function templates.
+template <typename T>
+class UniversalPrinter;
+
+template <typename T>
+void UniversalPrint(const T& value, ::std::ostream* os);
+
+enum DefaultPrinterType {
+  kPrintContainer,
+  kPrintPointer,
+  kPrintFunctionPointer,
+  kPrintOther,
+};
+template <DefaultPrinterType type> struct WrapPrinterType {};
+
+// Used to print an STL-style container when the user doesn't define
+// a PrintTo() for it.
+template <typename C>
+void DefaultPrintTo(WrapPrinterType<kPrintContainer> /* dummy */,
+                    const C& container, ::std::ostream* os) {
+  const size_t kMaxCount = 32;  // The maximum number of elements to print.
+  *os << '{';
+  size_t count = 0;
+  for (typename C::const_iterator it = container.begin();
+       it != container.end(); ++it, ++count) {
+    if (count > 0) {
+      *os << ',';
+      if (count == kMaxCount) {  // Enough has been printed.
+        *os << " ...";
+        break;
+      }
+    }
+    *os << ' ';
+    // We cannot call PrintTo(*it, os) here as PrintTo() doesn't
+    // handle *it being a native array.
+    internal::UniversalPrint(*it, os);
+  }
+
+  if (count > 0) {
+    *os << ' ';
+  }
+  *os << '}';
+}
+
+// Used to print a pointer that is neither a char pointer nor a member
+// pointer, when the user doesn't define PrintTo() for it.  (A member
+// variable pointer or member function pointer doesn't really point to
+// a location in the address space.  Their representation is
+// implementation-defined.  Therefore they will be printed as raw
+// bytes.)
+template <typename T>
+void DefaultPrintTo(WrapPrinterType<kPrintPointer> /* dummy */,
+                    T* p, ::std::ostream* os) {
+  if (p == NULL) {
+    *os << "NULL";
+  } else {
+    // T is not a function type.  We just call << to print p,
+    // relying on ADL to pick up user-defined << for their pointer
+    // types, if any.
+    *os << p;
+  }
+}
+template <typename T>
+void DefaultPrintTo(WrapPrinterType<kPrintFunctionPointer> /* dummy */,
+                    T* p, ::std::ostream* os) {
+  if (p == NULL) {
+    *os << "NULL";
+  } else {
+    // T is a function type, so '*os << p' doesn't do what we want
+    // (it just prints p as bool).  We want to print p as a const
+    // void*.
+    *os << reinterpret_cast<const void*>(p);
+  }
+}
+
+// Used to print a non-container, non-pointer value when the user
+// doesn't define PrintTo() for it.
+template <typename T>
+void DefaultPrintTo(WrapPrinterType<kPrintOther> /* dummy */,
+                    const T& value, ::std::ostream* os) {
+  ::testing_internal::DefaultPrintNonContainerTo(value, os);
+}
+
+// Prints the given value using the << operator if it has one;
+// otherwise prints the bytes in it.  This is what
+// UniversalPrinter<T>::Print() does when PrintTo() is not specialized
+// or overloaded for type T.
+//
+// A user can override this behavior for a class type Foo by defining
+// an overload of PrintTo() in the namespace where Foo is defined.  We
+// give the user this option as sometimes defining a << operator for
+// Foo is not desirable (e.g. the coding style may prevent doing it,
+// or there is already a << operator but it doesn't do what the user
+// wants).
+template <typename T>
+void PrintTo(const T& value, ::std::ostream* os) {
+  // DefaultPrintTo() is overloaded.  The type of its first argument
+  // determines which version will be picked.
+  //
+  // Note that we check for container types here, prior to we check
+  // for protocol message types in our operator<<.  The rationale is:
+  //
+  // For protocol messages, we want to give people a chance to
+  // override Google Mock's format by defining a PrintTo() or
+  // operator<<.  For STL containers, other formats can be
+  // incompatible with Google Mock's format for the container
+  // elements; therefore we check for container types here to ensure
+  // that our format is used.
+  //
+  // Note that MSVC and clang-cl do allow an implicit conversion from
+  // pointer-to-function to pointer-to-object, but clang-cl warns on it.
+  // So don't use ImplicitlyConvertible if it can be helped since it will
+  // cause this warning, and use a separate overload of DefaultPrintTo for
+  // function pointers so that the `*os << p` in the object pointer overload
+  // doesn't cause that warning either.
+  DefaultPrintTo(
+      WrapPrinterType <
+                  (sizeof(IsContainerTest<T>(0)) == sizeof(IsContainer)) &&
+              !IsRecursiveContainer<T>::value
+          ? kPrintContainer
+          : !is_pointer<T>::value
+                ? kPrintOther
+#if GTEST_LANG_CXX11
+                : std::is_function<typename std::remove_pointer<T>::type>::value
+#else
+                : !internal::ImplicitlyConvertible<T, const void*>::value
+#endif
+                      ? kPrintFunctionPointer
+                      : kPrintPointer > (),
+      value, os);
+}
+
+// The following list of PrintTo() overloads tells
+// UniversalPrinter<T>::Print() how to print standard types (built-in
+// types, strings, plain arrays, and pointers).
+
+// Overloads for various char types.
+GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os);
+GTEST_API_ void PrintTo(signed char c, ::std::ostream* os);
+inline void PrintTo(char c, ::std::ostream* os) {
+  // When printing a plain char, we always treat it as unsigned.  This
+  // way, the output won't be affected by whether the compiler thinks
+  // char is signed or not.
+  PrintTo(static_cast<unsigned char>(c), os);
+}
+
+// Overloads for other simple built-in types.
+inline void PrintTo(bool x, ::std::ostream* os) {
+  *os << (x ? "true" : "false");
+}
+
+// Overload for wchar_t type.
+// Prints a wchar_t as a symbol if it is printable or as its internal
+// code otherwise and also as its decimal code (except for L'\0').
+// The L'\0' char is printed as "L'\\0'". The decimal code is printed
+// as signed integer when wchar_t is implemented by the compiler
+// as a signed type and is printed as an unsigned integer when wchar_t
+// is implemented as an unsigned type.
+GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os);
+
+// Overloads for C strings.
+GTEST_API_ void PrintTo(const char* s, ::std::ostream* os);
+inline void PrintTo(char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const char*>(s), os);
+}
+
+// signed/unsigned char is often used for representing binary data, so
+// we print pointers to it as void* to be safe.
+inline void PrintTo(const signed char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(signed char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(const unsigned char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(unsigned char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+
+// MSVC can be configured to define wchar_t as a typedef of unsigned
+// short.  It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native
+// type.  When wchar_t is a typedef, defining an overload for const
+// wchar_t* would cause unsigned short* be printed as a wide string,
+// possibly causing invalid memory accesses.
+#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
+// Overloads for wide C strings
+GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os);
+inline void PrintTo(wchar_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const wchar_t*>(s), os);
+}
+#endif
+
+// Overload for C arrays.  Multi-dimensional arrays are printed
+// properly.
+
+// Prints the given number of elements in an array, without printing
+// the curly braces.
+template <typename T>
+void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) {
+  UniversalPrint(a[0], os);
+  for (size_t i = 1; i != count; i++) {
+    *os << ", ";
+    UniversalPrint(a[i], os);
+  }
+}
+
+// Overloads for ::string and ::std::string.
+#if GTEST_HAS_GLOBAL_STRING
+GTEST_API_ void PrintStringTo(const ::string&s, ::std::ostream* os);
+inline void PrintTo(const ::string& s, ::std::ostream* os) {
+  PrintStringTo(s, os);
+}
+#endif  // GTEST_HAS_GLOBAL_STRING
+
+GTEST_API_ void PrintStringTo(const ::std::string&s, ::std::ostream* os);
+inline void PrintTo(const ::std::string& s, ::std::ostream* os) {
+  PrintStringTo(s, os);
+}
+
+// Overloads for ::wstring and ::std::wstring.
+#if GTEST_HAS_GLOBAL_WSTRING
+GTEST_API_ void PrintWideStringTo(const ::wstring&s, ::std::ostream* os);
+inline void PrintTo(const ::wstring& s, ::std::ostream* os) {
+  PrintWideStringTo(s, os);
+}
+#endif  // GTEST_HAS_GLOBAL_WSTRING
+
+#if GTEST_HAS_STD_WSTRING
+GTEST_API_ void PrintWideStringTo(const ::std::wstring&s, ::std::ostream* os);
+inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) {
+  PrintWideStringTo(s, os);
+}
+#endif  // GTEST_HAS_STD_WSTRING
+
+#if GTEST_HAS_ABSL
+// Overload for absl::string_view.
+inline void PrintTo(absl::string_view sp, ::std::ostream* os) {
+  PrintTo(::std::string(sp), os);
+}
+#endif  // GTEST_HAS_ABSL
+
+#if GTEST_LANG_CXX11
+inline void PrintTo(std::nullptr_t, ::std::ostream* os) { *os << "(nullptr)"; }
+#endif  // GTEST_LANG_CXX11
+
+#if GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_
+// Helper function for printing a tuple.  T must be instantiated with
+// a tuple type.
+template <typename T>
+void PrintTupleTo(const T& t, ::std::ostream* os);
+#endif  // GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_
+
+#if GTEST_HAS_TR1_TUPLE
+// Overload for ::std::tr1::tuple.  Needed for printing function arguments,
+// which are packed as tuples.
+
+// Overloaded PrintTo() for tuples of various arities.  We support
+// tuples of up-to 10 fields.  The following implementation works
+// regardless of whether tr1::tuple is implemented using the
+// non-standard variadic template feature or not.
+
+inline void PrintTo(const ::std::tr1::tuple<>& t, ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1>
+void PrintTo(const ::std::tr1::tuple<T1>& t, ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2>
+void PrintTo(const ::std::tr1::tuple<T1, T2>& t, ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3>& t, ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4>& t, ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5>& t,
+             ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+          typename T6>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6>& t,
+             ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+          typename T6, typename T7>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7>& t,
+             ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+          typename T6, typename T7, typename T8>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8>& t,
+             ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+          typename T6, typename T7, typename T8, typename T9>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9>& t,
+             ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+          typename T6, typename T7, typename T8, typename T9, typename T10>
+void PrintTo(
+    const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>& t,
+    ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+#endif  // GTEST_HAS_TR1_TUPLE
+
+#if GTEST_HAS_STD_TUPLE_
+template <typename... Types>
+void PrintTo(const ::std::tuple<Types...>& t, ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+#endif  // GTEST_HAS_STD_TUPLE_
+
+// Overload for std::pair.
+template <typename T1, typename T2>
+void PrintTo(const ::std::pair<T1, T2>& value, ::std::ostream* os) {
+  *os << '(';
+  // We cannot use UniversalPrint(value.first, os) here, as T1 may be
+  // a reference type.  The same for printing value.second.
+  UniversalPrinter<T1>::Print(value.first, os);
+  *os << ", ";
+  UniversalPrinter<T2>::Print(value.second, os);
+  *os << ')';
+}
+
+// Implements printing a non-reference type T by letting the compiler
+// pick the right overload of PrintTo() for T.
+template <typename T>
+class UniversalPrinter {
+ public:
+  // MSVC warns about adding const to a function type, so we want to
+  // disable the warning.
+  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180)
+
+  // Note: we deliberately don't call this PrintTo(), as that name
+  // conflicts with ::testing::internal::PrintTo in the body of the
+  // function.
+  static void Print(const T& value, ::std::ostream* os) {
+    // By default, ::testing::internal::PrintTo() is used for printing
+    // the value.
+    //
+    // Thanks to Koenig look-up, if T is a class and has its own
+    // PrintTo() function defined in its namespace, that function will
+    // be visible here.  Since it is more specific than the generic ones
+    // in ::testing::internal, it will be picked by the compiler in the
+    // following statement - exactly what we want.
+    PrintTo(value, os);
+  }
+
+  GTEST_DISABLE_MSC_WARNINGS_POP_()
+};
+
+#if GTEST_HAS_ABSL
+
+// Printer for absl::optional
+
+template <typename T>
+class UniversalPrinter<::absl::optional<T>> {
+ public:
+  static void Print(const ::absl::optional<T>& value, ::std::ostream* os) {
+    *os << '(';
+    if (!value) {
+      *os << "nullopt";
+    } else {
+      UniversalPrint(*value, os);
+    }
+    *os << ')';
+  }
+};
+
+// Printer for absl::variant
+
+template <typename... T>
+class UniversalPrinter<::absl::variant<T...>> {
+ public:
+  static void Print(const ::absl::variant<T...>& value, ::std::ostream* os) {
+    *os << '(';
+    absl::visit(Visitor{os}, value);
+    *os << ')';
+  }
+
+ private:
+  struct Visitor {
+    template <typename U>
+    void operator()(const U& u) const {
+      *os << "'" << GetTypeName<U>() << "' with value ";
+      UniversalPrint(u, os);
+    }
+    ::std::ostream* os;
+  };
+};
+
+#endif  // GTEST_HAS_ABSL
+
+// UniversalPrintArray(begin, len, os) prints an array of 'len'
+// elements, starting at address 'begin'.
+template <typename T>
+void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) {
+  if (len == 0) {
+    *os << "{}";
+  } else {
+    *os << "{ ";
+    const size_t kThreshold = 18;
+    const size_t kChunkSize = 8;
+    // If the array has more than kThreshold elements, we'll have to
+    // omit some details by printing only the first and the last
+    // kChunkSize elements.
+    // FIXME: let the user control the threshold using a flag.
+    if (len <= kThreshold) {
+      PrintRawArrayTo(begin, len, os);
+    } else {
+      PrintRawArrayTo(begin, kChunkSize, os);
+      *os << ", ..., ";
+      PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os);
+    }
+    *os << " }";
+  }
+}
+// This overload prints a (const) char array compactly.
+GTEST_API_ void UniversalPrintArray(
+    const char* begin, size_t len, ::std::ostream* os);
+
+// This overload prints a (const) wchar_t array compactly.
+GTEST_API_ void UniversalPrintArray(
+    const wchar_t* begin, size_t len, ::std::ostream* os);
+
+// Implements printing an array type T[N].
+template <typename T, size_t N>
+class UniversalPrinter<T[N]> {
+ public:
+  // Prints the given array, omitting some elements when there are too
+  // many.
+  static void Print(const T (&a)[N], ::std::ostream* os) {
+    UniversalPrintArray(a, N, os);
+  }
+};
+
+// Implements printing a reference type T&.
+template <typename T>
+class UniversalPrinter<T&> {
+ public:
+  // MSVC warns about adding const to a function type, so we want to
+  // disable the warning.
+  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180)
+
+  static void Print(const T& value, ::std::ostream* os) {
+    // Prints the address of the value.  We use reinterpret_cast here
+    // as static_cast doesn't compile when T is a function type.
+    *os << "@" << reinterpret_cast<const void*>(&value) << " ";
+
+    // Then prints the value itself.
+    UniversalPrint(value, os);
+  }
+
+  GTEST_DISABLE_MSC_WARNINGS_POP_()
+};
+
+// Prints a value tersely: for a reference type, the referenced value
+// (but not the address) is printed; for a (const) char pointer, the
+// NUL-terminated string (but not the pointer) is printed.
+
+template <typename T>
+class UniversalTersePrinter {
+ public:
+  static void Print(const T& value, ::std::ostream* os) {
+    UniversalPrint(value, os);
+  }
+};
+template <typename T>
+class UniversalTersePrinter<T&> {
+ public:
+  static void Print(const T& value, ::std::ostream* os) {
+    UniversalPrint(value, os);
+  }
+};
+template <typename T, size_t N>
+class UniversalTersePrinter<T[N]> {
+ public:
+  static void Print(const T (&value)[N], ::std::ostream* os) {
+    UniversalPrinter<T[N]>::Print(value, os);
+  }
+};
+template <>
+class UniversalTersePrinter<const char*> {
+ public:
+  static void Print(const char* str, ::std::ostream* os) {
+    if (str == NULL) {
+      *os << "NULL";
+    } else {
+      UniversalPrint(std::string(str), os);
+    }
+  }
+};
+template <>
+class UniversalTersePrinter<char*> {
+ public:
+  static void Print(char* str, ::std::ostream* os) {
+    UniversalTersePrinter<const char*>::Print(str, os);
+  }
+};
+
+#if GTEST_HAS_STD_WSTRING
+template <>
+class UniversalTersePrinter<const wchar_t*> {
+ public:
+  static void Print(const wchar_t* str, ::std::ostream* os) {
+    if (str == NULL) {
+      *os << "NULL";
+    } else {
+      UniversalPrint(::std::wstring(str), os);
+    }
+  }
+};
+#endif
+
+template <>
+class UniversalTersePrinter<wchar_t*> {
+ public:
+  static void Print(wchar_t* str, ::std::ostream* os) {
+    UniversalTersePrinter<const wchar_t*>::Print(str, os);
+  }
+};
+
+template <typename T>
+void UniversalTersePrint(const T& value, ::std::ostream* os) {
+  UniversalTersePrinter<T>::Print(value, os);
+}
+
+// Prints a value using the type inferred by the compiler.  The
+// difference between this and UniversalTersePrint() is that for a
+// (const) char pointer, this prints both the pointer and the
+// NUL-terminated string.
+template <typename T>
+void UniversalPrint(const T& value, ::std::ostream* os) {
+  // A workarond for the bug in VC++ 7.1 that prevents us from instantiating
+  // UniversalPrinter with T directly.
+  typedef T T1;
+  UniversalPrinter<T1>::Print(value, os);
+}
+
+typedef ::std::vector< ::std::string> Strings;
+
+// TuplePolicy<TupleT> must provide:
+// - tuple_size
+//     size of tuple TupleT.
+// - get<size_t I>(const TupleT& t)
+//     static function extracting element I of tuple TupleT.
+// - tuple_element<size_t I>::type
+//     type of element I of tuple TupleT.
+template <typename TupleT>
+struct TuplePolicy;
+
+#if GTEST_HAS_TR1_TUPLE
+template <typename TupleT>
+struct TuplePolicy {
+  typedef TupleT Tuple;
+  static const size_t tuple_size = ::std::tr1::tuple_size<Tuple>::value;
+
+  template <size_t I>
+  struct tuple_element : ::std::tr1::tuple_element<static_cast<int>(I), Tuple> {
+  };
+
+  template <size_t I>
+  static typename AddReference<const typename ::std::tr1::tuple_element<
+      static_cast<int>(I), Tuple>::type>::type
+  get(const Tuple& tuple) {
+    return ::std::tr1::get<I>(tuple);
+  }
+};
+template <typename TupleT>
+const size_t TuplePolicy<TupleT>::tuple_size;
+#endif  // GTEST_HAS_TR1_TUPLE
+
+#if GTEST_HAS_STD_TUPLE_
+template <typename... Types>
+struct TuplePolicy< ::std::tuple<Types...> > {
+  typedef ::std::tuple<Types...> Tuple;
+  static const size_t tuple_size = ::std::tuple_size<Tuple>::value;
+
+  template <size_t I>
+  struct tuple_element : ::std::tuple_element<I, Tuple> {};
+
+  template <size_t I>
+  static const typename ::std::tuple_element<I, Tuple>::type& get(
+      const Tuple& tuple) {
+    return ::std::get<I>(tuple);
+  }
+};
+template <typename... Types>
+const size_t TuplePolicy< ::std::tuple<Types...> >::tuple_size;
+#endif  // GTEST_HAS_STD_TUPLE_
+
+#if GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_
+// This helper template allows PrintTo() for tuples and
+// UniversalTersePrintTupleFieldsToStrings() to be defined by
+// induction on the number of tuple fields.  The idea is that
+// TuplePrefixPrinter<N>::PrintPrefixTo(t, os) prints the first N
+// fields in tuple t, and can be defined in terms of
+// TuplePrefixPrinter<N - 1>.
+//
+// The inductive case.
+template <size_t N>
+struct TuplePrefixPrinter {
+  // Prints the first N fields of a tuple.
+  template <typename Tuple>
+  static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
+    TuplePrefixPrinter<N - 1>::PrintPrefixTo(t, os);
+    GTEST_INTENTIONAL_CONST_COND_PUSH_()
+    if (N > 1) {
+    GTEST_INTENTIONAL_CONST_COND_POP_()
+      *os << ", ";
+    }
+    UniversalPrinter<
+        typename TuplePolicy<Tuple>::template tuple_element<N - 1>::type>
+        ::Print(TuplePolicy<Tuple>::template get<N - 1>(t), os);
+  }
+
+  // Tersely prints the first N fields of a tuple to a string vector,
+  // one element for each field.
+  template <typename Tuple>
+  static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
+    TuplePrefixPrinter<N - 1>::TersePrintPrefixToStrings(t, strings);
+    ::std::stringstream ss;
+    UniversalTersePrint(TuplePolicy<Tuple>::template get<N - 1>(t), &ss);
+    strings->push_back(ss.str());
+  }
+};
+
+// Base case.
+template <>
+struct TuplePrefixPrinter<0> {
+  template <typename Tuple>
+  static void PrintPrefixTo(const Tuple&, ::std::ostream*) {}
+
+  template <typename Tuple>
+  static void TersePrintPrefixToStrings(const Tuple&, Strings*) {}
+};
+
+// Helper function for printing a tuple.
+// Tuple must be either std::tr1::tuple or std::tuple type.
+template <typename Tuple>
+void PrintTupleTo(const Tuple& t, ::std::ostream* os) {
+  *os << "(";
+  TuplePrefixPrinter<TuplePolicy<Tuple>::tuple_size>::PrintPrefixTo(t, os);
+  *os << ")";
+}
+
+// Prints the fields of a tuple tersely to a string vector, one
+// element for each field.  See the comment before
+// UniversalTersePrint() for how we define "tersely".
+template <typename Tuple>
+Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) {
+  Strings result;
+  TuplePrefixPrinter<TuplePolicy<Tuple>::tuple_size>::
+      TersePrintPrefixToStrings(value, &result);
+  return result;
+}
+#endif  // GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_
+
+}  // namespace internal
+
+#if GTEST_HAS_ABSL
+namespace internal2 {
+template <typename T>
+void TypeWithoutFormatter<T, kConvertibleToStringView>::PrintValue(
+    const T& value, ::std::ostream* os) {
+  internal::PrintTo(absl::string_view(value), os);
+}
+}  // namespace internal2
+#endif
+
+template <typename T>
+::std::string PrintToString(const T& value) {
+  ::std::stringstream ss;
+  internal::UniversalTersePrinter<T>::Print(value, &ss);
+  return ss.str();
+}
+
+}  // namespace testing
+
+// Include any custom printer added by the local installation.
+// We must include this header at the end to make sure it can use the
+// declarations from this file.
+// Copyright 2015, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// This file provides an injection point for custom printers in a local
+// installation of gTest.
+// It will be included from gtest-printers.h and the overrides in this file
+// will be visible to everyone.
+//
+// Injection point for custom user configurations. See README for details
+//
+// ** Custom implementation starts here **
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+
+namespace testing {
+
+// Input to a parameterized test name generator, describing a test parameter.
+// Consists of the parameter value and the integer parameter index.
+template <class ParamType>
+struct TestParamInfo {
+  TestParamInfo(const ParamType& a_param, size_t an_index) :
+    param(a_param),
+    index(an_index) {}
+  ParamType param;
+  size_t index;
+};
+
+// A builtin parameterized test name generator which returns the result of
+// testing::PrintToString.
+struct PrintToStringParamName {
+  template <class ParamType>
+  std::string operator()(const TestParamInfo<ParamType>& info) const {
+    return PrintToString(info.param);
+  }
+};
+
+namespace internal {
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Outputs a message explaining invalid registration of different
+// fixture class for the same test case. This may happen when
+// TEST_P macro is used to define two tests with the same name
+// but in different namespaces.
+GTEST_API_ void ReportInvalidTestCaseType(const char* test_case_name,
+                                          CodeLocation code_location);
+
+template <typename> class ParamGeneratorInterface;
+template <typename> class ParamGenerator;
+
+// Interface for iterating over elements provided by an implementation
+// of ParamGeneratorInterface<T>.
+template <typename T>
+class ParamIteratorInterface {
+ public:
+  virtual ~ParamIteratorInterface() {}
+  // A pointer to the base generator instance.
+  // Used only for the purposes of iterator comparison
+  // to make sure that two iterators belong to the same generator.
+  virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
+  // Advances iterator to point to the next element
+  // provided by the generator. The caller is responsible
+  // for not calling Advance() on an iterator equal to
+  // BaseGenerator()->End().
+  virtual void Advance() = 0;
+  // Clones the iterator object. Used for implementing copy semantics
+  // of ParamIterator<T>.
+  virtual ParamIteratorInterface* Clone() const = 0;
+  // Dereferences the current iterator and provides (read-only) access
+  // to the pointed value. It is the caller's responsibility not to call
+  // Current() on an iterator equal to BaseGenerator()->End().
+  // Used for implementing ParamGenerator<T>::operator*().
+  virtual const T* Current() const = 0;
+  // Determines whether the given iterator and other point to the same
+  // element in the sequence generated by the generator.
+  // Used for implementing ParamGenerator<T>::operator==().
+  virtual bool Equals(const ParamIteratorInterface& other) const = 0;
+};
+
+// Class iterating over elements provided by an implementation of
+// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T>
+// and implements the const forward iterator concept.
+template <typename T>
+class ParamIterator {
+ public:
+  typedef T value_type;
+  typedef const T& reference;
+  typedef ptrdiff_t difference_type;
+
+  // ParamIterator assumes ownership of the impl_ pointer.
+  ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
+  ParamIterator& operator=(const ParamIterator& other) {
+    if (this != &other)
+      impl_.reset(other.impl_->Clone());
+    return *this;
+  }
+
+  const T& operator*() const { return *impl_->Current(); }
+  const T* operator->() const { return impl_->Current(); }
+  // Prefix version of operator++.
+  ParamIterator& operator++() {
+    impl_->Advance();
+    return *this;
+  }
+  // Postfix version of operator++.
+  ParamIterator operator++(int /*unused*/) {
+    ParamIteratorInterface<T>* clone = impl_->Clone();
+    impl_->Advance();
+    return ParamIterator(clone);
+  }
+  bool operator==(const ParamIterator& other) const {
+    return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
+  }
+  bool operator!=(const ParamIterator& other) const {
+    return !(*this == other);
+  }
+
+ private:
+  friend class ParamGenerator<T>;
+  explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
+  scoped_ptr<ParamIteratorInterface<T> > impl_;
+};
+
+// ParamGeneratorInterface<T> is the binary interface to access generators
+// defined in other translation units.
+template <typename T>
+class ParamGeneratorInterface {
+ public:
+  typedef T ParamType;
+
+  virtual ~ParamGeneratorInterface() {}
+
+  // Generator interface definition
+  virtual ParamIteratorInterface<T>* Begin() const = 0;
+  virtual ParamIteratorInterface<T>* End() const = 0;
+};
+
+// Wraps ParamGeneratorInterface<T> and provides general generator syntax
+// compatible with the STL Container concept.
+// This class implements copy initialization semantics and the contained
+// ParamGeneratorInterface<T> instance is shared among all copies
+// of the original object. This is possible because that instance is immutable.
+template<typename T>
+class ParamGenerator {
+ public:
+  typedef ParamIterator<T> iterator;
+
+  explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
+  ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}
+
+  ParamGenerator& operator=(const ParamGenerator& other) {
+    impl_ = other.impl_;
+    return *this;
+  }
+
+  iterator begin() const { return iterator(impl_->Begin()); }
+  iterator end() const { return iterator(impl_->End()); }
+
+ private:
+  linked_ptr<const ParamGeneratorInterface<T> > impl_;
+};
+
+// Generates values from a range of two comparable values. Can be used to
+// generate sequences of user-defined types that implement operator+() and
+// operator<().
+// This class is used in the Range() function.
+template <typename T, typename IncrementT>
+class RangeGenerator : public ParamGeneratorInterface<T> {
+ public:
+  RangeGenerator(T begin, T end, IncrementT step)
+      : begin_(begin), end_(end),
+        step_(step), end_index_(CalculateEndIndex(begin, end, step)) {}
+  virtual ~RangeGenerator() {}
+
+  virtual ParamIteratorInterface<T>* Begin() const {
+    return new Iterator(this, begin_, 0, step_);
+  }
+  virtual ParamIteratorInterface<T>* End() const {
+    return new Iterator(this, end_, end_index_, step_);
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<T> {
+   public:
+    Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
+             IncrementT step)
+        : base_(base), value_(value), index_(index), step_(step) {}
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
+      return base_;
+    }
+    virtual void Advance() {
+      value_ = static_cast<T>(value_ + step_);
+      index_++;
+    }
+    virtual ParamIteratorInterface<T>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const T* Current() const { return &value_; }
+    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const int other_index =
+          CheckedDowncastToActualType<const Iterator>(&other)->index_;
+      return index_ == other_index;
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : ParamIteratorInterface<T>(),
+          base_(other.base_), value_(other.value_), index_(other.index_),
+          step_(other.step_) {}
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<T>* const base_;
+    T value_;
+    int index_;
+    const IncrementT step_;
+  };  // class RangeGenerator::Iterator
+
+  static int CalculateEndIndex(const T& begin,
+                               const T& end,
+                               const IncrementT& step) {
+    int end_index = 0;
+    for (T i = begin; i < end; i = static_cast<T>(i + step))
+      end_index++;
+    return end_index;
+  }
+
+  // No implementation - assignment is unsupported.
+  void operator=(const RangeGenerator& other);
+
+  const T begin_;
+  const T end_;
+  const IncrementT step_;
+  // The index for the end() iterator. All the elements in the generated
+  // sequence are indexed (0-based) to aid iterator comparison.
+  const int end_index_;
+};  // class RangeGenerator
+
+
+// Generates values from a pair of STL-style iterators. Used in the
+// ValuesIn() function. The elements are copied from the source range
+// since the source can be located on the stack, and the generator
+// is likely to persist beyond that stack frame.
+template <typename T>
+class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
+ public:
+  template <typename ForwardIterator>
+  ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
+      : container_(begin, end) {}
+  virtual ~ValuesInIteratorRangeGenerator() {}
+
+  virtual ParamIteratorInterface<T>* Begin() const {
+    return new Iterator(this, container_.begin());
+  }
+  virtual ParamIteratorInterface<T>* End() const {
+    return new Iterator(this, container_.end());
+  }
+
+ private:
+  typedef typename ::std::vector<T> ContainerType;
+
+  class Iterator : public ParamIteratorInterface<T> {
+   public:
+    Iterator(const ParamGeneratorInterface<T>* base,
+             typename ContainerType::const_iterator iterator)
+        : base_(base), iterator_(iterator) {}
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
+      return base_;
+    }
+    virtual void Advance() {
+      ++iterator_;
+      value_.reset();
+    }
+    virtual ParamIteratorInterface<T>* Clone() const {
+      return new Iterator(*this);
+    }
+    // We need to use cached value referenced by iterator_ because *iterator_
+    // can return a temporary object (and of type other then T), so just
+    // having "return &*iterator_;" doesn't work.
+    // value_ is updated here and not in Advance() because Advance()
+    // can advance iterator_ beyond the end of the range, and we cannot
+    // detect that fact. The client code, on the other hand, is
+    // responsible for not calling Current() on an out-of-range iterator.
+    virtual const T* Current() const {
+      if (value_.get() == NULL)
+        value_.reset(new T(*iterator_));
+      return value_.get();
+    }
+    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      return iterator_ ==
+          CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
+    }
+
+   private:
+    Iterator(const Iterator& other)
+          // The explicit constructor call suppresses a false warning
+          // emitted by gcc when supplied with the -Wextra option.
+        : ParamIteratorInterface<T>(),
+          base_(other.base_),
+          iterator_(other.iterator_) {}
+
+    const ParamGeneratorInterface<T>* const base_;
+    typename ContainerType::const_iterator iterator_;
+    // A cached value of *iterator_. We keep it here to allow access by
+    // pointer in the wrapping iterator's operator->().
+    // value_ needs to be mutable to be accessed in Current().
+    // Use of scoped_ptr helps manage cached value's lifetime,
+    // which is bound by the lifespan of the iterator itself.
+    mutable scoped_ptr<const T> value_;
+  };  // class ValuesInIteratorRangeGenerator::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const ValuesInIteratorRangeGenerator& other);
+
+  const ContainerType container_;
+};  // class ValuesInIteratorRangeGenerator
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Default parameterized test name generator, returns a string containing the
+// integer test parameter index.
+template <class ParamType>
+std::string DefaultParamName(const TestParamInfo<ParamType>& info) {
+  Message name_stream;
+  name_stream << info.index;
+  return name_stream.GetString();
+}
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Parameterized test name overload helpers, which help the
+// INSTANTIATE_TEST_CASE_P macro choose between the default parameterized
+// test name generator and user param name generator.
+template <class ParamType, class ParamNameGenFunctor>
+ParamNameGenFunctor GetParamNameGen(ParamNameGenFunctor func) {
+  return func;
+}
+
+template <class ParamType>
+struct ParamNameGenFunc {
+  typedef std::string Type(const TestParamInfo<ParamType>&);
+};
+
+template <class ParamType>
+typename ParamNameGenFunc<ParamType>::Type *GetParamNameGen() {
+  return DefaultParamName;
+}
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Stores a parameter value and later creates tests parameterized with that
+// value.
+template <class TestClass>
+class ParameterizedTestFactory : public TestFactoryBase {
+ public:
+  typedef typename TestClass::ParamType ParamType;
+  explicit ParameterizedTestFactory(ParamType parameter) :
+      parameter_(parameter) {}
+  virtual Test* CreateTest() {
+    TestClass::SetParam(&parameter_);
+    return new TestClass();
+  }
+
+ private:
+  const ParamType parameter_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// TestMetaFactoryBase is a base class for meta-factories that create
+// test factories for passing into MakeAndRegisterTestInfo function.
+template <class ParamType>
+class TestMetaFactoryBase {
+ public:
+  virtual ~TestMetaFactoryBase() {}
+
+  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// TestMetaFactory creates test factories for passing into
+// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives
+// ownership of test factory pointer, same factory object cannot be passed
+// into that method twice. But ParameterizedTestCaseInfo is going to call
+// it for each Test/Parameter value combination. Thus it needs meta factory
+// creator class.
+template <class TestCase>
+class TestMetaFactory
+    : public TestMetaFactoryBase<typename TestCase::ParamType> {
+ public:
+  typedef typename TestCase::ParamType ParamType;
+
+  TestMetaFactory() {}
+
+  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) {
+    return new ParameterizedTestFactory<TestCase>(parameter);
+  }
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestCaseInfoBase is a generic interface
+// to ParameterizedTestCaseInfo classes. ParameterizedTestCaseInfoBase
+// accumulates test information provided by TEST_P macro invocations
+// and generators provided by INSTANTIATE_TEST_CASE_P macro invocations
+// and uses that information to register all resulting test instances
+// in RegisterTests method. The ParameterizeTestCaseRegistry class holds
+// a collection of pointers to the ParameterizedTestCaseInfo objects
+// and calls RegisterTests() on each of them when asked.
+class ParameterizedTestCaseInfoBase {
+ public:
+  virtual ~ParameterizedTestCaseInfoBase() {}
+
+  // Base part of test case name for display purposes.
+  virtual const std::string& GetTestCaseName() const = 0;
+  // Test case id to verify identity.
+  virtual TypeId GetTestCaseTypeId() const = 0;
+  // UnitTest class invokes this method to register tests in this
+  // test case right before running them in RUN_ALL_TESTS macro.
+  // This method should not be called more then once on any single
+  // instance of a ParameterizedTestCaseInfoBase derived class.
+  virtual void RegisterTests() = 0;
+
+ protected:
+  ParameterizedTestCaseInfoBase() {}
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfoBase);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestCaseInfo accumulates tests obtained from TEST_P
+// macro invocations for a particular test case and generators
+// obtained from INSTANTIATE_TEST_CASE_P macro invocations for that
+// test case. It registers tests with all values generated by all
+// generators when asked.
+template <class TestCase>
+class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase {
+ public:
+  // ParamType and GeneratorCreationFunc are private types but are required
+  // for declarations of public methods AddTestPattern() and
+  // AddTestCaseInstantiation().
+  typedef typename TestCase::ParamType ParamType;
+  // A function that returns an instance of appropriate generator type.
+  typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();
+  typedef typename ParamNameGenFunc<ParamType>::Type ParamNameGeneratorFunc;
+
+  explicit ParameterizedTestCaseInfo(
+      const char* name, CodeLocation code_location)
+      : test_case_name_(name), code_location_(code_location) {}
+
+  // Test case base name for display purposes.
+  virtual const std::string& GetTestCaseName() const { return test_case_name_; }
+  // Test case id to verify identity.
+  virtual TypeId GetTestCaseTypeId() const { return GetTypeId<TestCase>(); }
+  // TEST_P macro uses AddTestPattern() to record information
+  // about a single test in a LocalTestInfo structure.
+  // test_case_name is the base name of the test case (without invocation
+  // prefix). test_base_name is the name of an individual test without
+  // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
+  // test case base name and DoBar is test base name.
+  void AddTestPattern(const char* test_case_name,
+                      const char* test_base_name,
+                      TestMetaFactoryBase<ParamType>* meta_factory) {
+    tests_.push_back(linked_ptr<TestInfo>(new TestInfo(test_case_name,
+                                                       test_base_name,
+                                                       meta_factory)));
+  }
+  // INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information
+  // about a generator.
+  int AddTestCaseInstantiation(const std::string& instantiation_name,
+                               GeneratorCreationFunc* func,
+                               ParamNameGeneratorFunc* name_func,
+                               const char* file, int line) {
+    instantiations_.push_back(
+        InstantiationInfo(instantiation_name, func, name_func, file, line));
+    return 0;  // Return value used only to run this method in namespace scope.
+  }
+  // UnitTest class invokes this method to register tests in this test case
+  // test cases right before running tests in RUN_ALL_TESTS macro.
+  // This method should not be called more then once on any single
+  // instance of a ParameterizedTestCaseInfoBase derived class.
+  // UnitTest has a guard to prevent from calling this method more then once.
+  virtual void RegisterTests() {
+    for (typename TestInfoContainer::iterator test_it = tests_.begin();
+         test_it != tests_.end(); ++test_it) {
+      linked_ptr<TestInfo> test_info = *test_it;
+      for (typename InstantiationContainer::iterator gen_it =
+               instantiations_.begin(); gen_it != instantiations_.end();
+               ++gen_it) {
+        const std::string& instantiation_name = gen_it->name;
+        ParamGenerator<ParamType> generator((*gen_it->generator)());
+        ParamNameGeneratorFunc* name_func = gen_it->name_func;
+        const char* file = gen_it->file;
+        int line = gen_it->line;
+
+        std::string test_case_name;
+        if ( !instantiation_name.empty() )
+          test_case_name = instantiation_name + "/";
+        test_case_name += test_info->test_case_base_name;
+
+        size_t i = 0;
+        std::set<std::string> test_param_names;
+        for (typename ParamGenerator<ParamType>::iterator param_it =
+                 generator.begin();
+             param_it != generator.end(); ++param_it, ++i) {
+          Message test_name_stream;
+
+          std::string param_name = name_func(
+              TestParamInfo<ParamType>(*param_it, i));
+
+          GTEST_CHECK_(IsValidParamName(param_name))
+              << "Parameterized test name '" << param_name
+              << "' is invalid, in " << file
+              << " line " << line << std::endl;
+
+          GTEST_CHECK_(test_param_names.count(param_name) == 0)
+              << "Duplicate parameterized test name '" << param_name
+              << "', in " << file << " line " << line << std::endl;
+
+          test_param_names.insert(param_name);
+
+          test_name_stream << test_info->test_base_name << "/" << param_name;
+          MakeAndRegisterTestInfo(
+              test_case_name.c_str(),
+              test_name_stream.GetString().c_str(),
+              NULL,  // No type parameter.
+              PrintToString(*param_it).c_str(),
+              code_location_,
+              GetTestCaseTypeId(),
+              TestCase::SetUpTestCase,
+              TestCase::TearDownTestCase,
+              test_info->test_meta_factory->CreateTestFactory(*param_it));
+        }  // for param_it
+      }  // for gen_it
+    }  // for test_it
+  }  // RegisterTests
+
+ private:
+  // LocalTestInfo structure keeps information about a single test registered
+  // with TEST_P macro.
+  struct TestInfo {
+    TestInfo(const char* a_test_case_base_name,
+             const char* a_test_base_name,
+             TestMetaFactoryBase<ParamType>* a_test_meta_factory) :
+        test_case_base_name(a_test_case_base_name),
+        test_base_name(a_test_base_name),
+        test_meta_factory(a_test_meta_factory) {}
+
+    const std::string test_case_base_name;
+    const std::string test_base_name;
+    const scoped_ptr<TestMetaFactoryBase<ParamType> > test_meta_factory;
+  };
+  typedef ::std::vector<linked_ptr<TestInfo> > TestInfoContainer;
+  // Records data received from INSTANTIATE_TEST_CASE_P macros:
+  //  <Instantiation name, Sequence generator creation function,
+  //     Name generator function, Source file, Source line>
+  struct InstantiationInfo {
+      InstantiationInfo(const std::string &name_in,
+                        GeneratorCreationFunc* generator_in,
+                        ParamNameGeneratorFunc* name_func_in,
+                        const char* file_in,
+                        int line_in)
+          : name(name_in),
+            generator(generator_in),
+            name_func(name_func_in),
+            file(file_in),
+            line(line_in) {}
+
+      std::string name;
+      GeneratorCreationFunc* generator;
+      ParamNameGeneratorFunc* name_func;
+      const char* file;
+      int line;
+  };
+  typedef ::std::vector<InstantiationInfo> InstantiationContainer;
+
+  static bool IsValidParamName(const std::string& name) {
+    // Check for empty string
+    if (name.empty())
+      return false;
+
+    // Check for invalid characters
+    for (std::string::size_type index = 0; index < name.size(); ++index) {
+      if (!isalnum(name[index]) && name[index] != '_')
+        return false;
+    }
+
+    return true;
+  }
+
+  const std::string test_case_name_;
+  CodeLocation code_location_;
+  TestInfoContainer tests_;
+  InstantiationContainer instantiations_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfo);
+};  // class ParameterizedTestCaseInfo
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestCaseRegistry contains a map of ParameterizedTestCaseInfoBase
+// classes accessed by test case names. TEST_P and INSTANTIATE_TEST_CASE_P
+// macros use it to locate their corresponding ParameterizedTestCaseInfo
+// descriptors.
+class ParameterizedTestCaseRegistry {
+ public:
+  ParameterizedTestCaseRegistry() {}
+  ~ParameterizedTestCaseRegistry() {
+    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
+         it != test_case_infos_.end(); ++it) {
+      delete *it;
+    }
+  }
+
+  // Looks up or creates and returns a structure containing information about
+  // tests and instantiations of a particular test case.
+  template <class TestCase>
+  ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
+      const char* test_case_name,
+      CodeLocation code_location) {
+    ParameterizedTestCaseInfo<TestCase>* typed_test_info = NULL;
+    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
+         it != test_case_infos_.end(); ++it) {
+      if ((*it)->GetTestCaseName() == test_case_name) {
+        if ((*it)->GetTestCaseTypeId() != GetTypeId<TestCase>()) {
+          // Complain about incorrect usage of Google Test facilities
+          // and terminate the program since we cannot guaranty correct
+          // test case setup and tear-down in this case.
+          ReportInvalidTestCaseType(test_case_name, code_location);
+          posix::Abort();
+        } else {
+          // At this point we are sure that the object we found is of the same
+          // type we are looking for, so we downcast it to that type
+          // without further checks.
+          typed_test_info = CheckedDowncastToActualType<
+              ParameterizedTestCaseInfo<TestCase> >(*it);
+        }
+        break;
+      }
+    }
+    if (typed_test_info == NULL) {
+      typed_test_info = new ParameterizedTestCaseInfo<TestCase>(
+          test_case_name, code_location);
+      test_case_infos_.push_back(typed_test_info);
+    }
+    return typed_test_info;
+  }
+  void RegisterTests() {
+    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
+         it != test_case_infos_.end(); ++it) {
+      (*it)->RegisterTests();
+    }
+  }
+
+ private:
+  typedef ::std::vector<ParameterizedTestCaseInfoBase*> TestCaseInfoContainer;
+
+  TestCaseInfoContainer test_case_infos_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseRegistry);
+};
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+// This file was GENERATED by command:
+//     pump.py gtest-param-util-generated.h.pump
+// DO NOT EDIT BY HAND!!!
+
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+// Type and function utilities for implementing parameterized tests.
+// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
+//
+// Currently Google Test supports at most 50 arguments in Values,
+// and at most 10 arguments in Combine. Please contact
+// googletestframework@googlegroups.com if you need more.
+// Please note that the number of arguments to Combine is limited
+// by the maximum arity of the implementation of tuple which is
+// currently set at 10.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
+
+
+namespace testing {
+
+// Forward declarations of ValuesIn(), which is implemented in
+// include/gtest/gtest-param-test.h.
+template <typename ForwardIterator>
+internal::ParamGenerator<
+  typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
+ValuesIn(ForwardIterator begin, ForwardIterator end);
+
+template <typename T, size_t N>
+internal::ParamGenerator<T> ValuesIn(const T (&array)[N]);
+
+template <class Container>
+internal::ParamGenerator<typename Container::value_type> ValuesIn(
+    const Container& container);
+
+namespace internal {
+
+// Used in the Values() function to provide polymorphic capabilities.
+template <typename T1>
+class ValueArray1 {
+ public:
+  explicit ValueArray1(T1 v1) : v1_(v1) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray1(const ValueArray1& other) : v1_(other.v1_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray1& other);
+
+  const T1 v1_;
+};
+
+template <typename T1, typename T2>
+class ValueArray2 {
+ public:
+  ValueArray2(T1 v1, T2 v2) : v1_(v1), v2_(v2) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray2(const ValueArray2& other) : v1_(other.v1_), v2_(other.v2_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray2& other);
+
+  const T1 v1_;
+  const T2 v2_;
+};
+
+template <typename T1, typename T2, typename T3>
+class ValueArray3 {
+ public:
+  ValueArray3(T1 v1, T2 v2, T3 v3) : v1_(v1), v2_(v2), v3_(v3) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray3(const ValueArray3& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray3& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4>
+class ValueArray4 {
+ public:
+  ValueArray4(T1 v1, T2 v2, T3 v3, T4 v4) : v1_(v1), v2_(v2), v3_(v3),
+      v4_(v4) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray4(const ValueArray4& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray4& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+class ValueArray5 {
+ public:
+  ValueArray5(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) : v1_(v1), v2_(v2), v3_(v3),
+      v4_(v4), v5_(v5) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray5(const ValueArray5& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray5& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6>
+class ValueArray6 {
+ public:
+  ValueArray6(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) : v1_(v1), v2_(v2),
+      v3_(v3), v4_(v4), v5_(v5), v6_(v6) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray6(const ValueArray6& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray6& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7>
+class ValueArray7 {
+ public:
+  ValueArray7(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) : v1_(v1),
+      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray7(const ValueArray7& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray7& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8>
+class ValueArray8 {
+ public:
+  ValueArray8(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+      T8 v8) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray8(const ValueArray8& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray8& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9>
+class ValueArray9 {
+ public:
+  ValueArray9(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
+      T9 v9) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray9(const ValueArray9& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray9& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10>
+class ValueArray10 {
+ public:
+  ValueArray10(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray10(const ValueArray10& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray10& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11>
+class ValueArray11 {
+ public:
+  ValueArray11(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray11(const ValueArray11& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray11& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12>
+class ValueArray12 {
+ public:
+  ValueArray12(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray12(const ValueArray12& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray12& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13>
+class ValueArray13 {
+ public:
+  ValueArray13(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+      v12_(v12), v13_(v13) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray13(const ValueArray13& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray13& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14>
+class ValueArray14 {
+ public:
+  ValueArray14(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) : v1_(v1), v2_(v2), v3_(v3),
+      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray14(const ValueArray14& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray14& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15>
+class ValueArray15 {
+ public:
+  ValueArray15(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) : v1_(v1), v2_(v2),
+      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray15(const ValueArray15& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray15& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16>
+class ValueArray16 {
+ public:
+  ValueArray16(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16) : v1_(v1),
+      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+      v16_(v16) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray16(const ValueArray16& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray16& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17>
+class ValueArray17 {
+ public:
+  ValueArray17(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
+      T17 v17) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray17(const ValueArray17& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray17& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18>
+class ValueArray18 {
+ public:
+  ValueArray18(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray18(const ValueArray18& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray18& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19>
+class ValueArray19 {
+ public:
+  ValueArray19(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
+      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray19(const ValueArray19& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray19& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20>
+class ValueArray20 {
+ public:
+  ValueArray20(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
+      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
+      v19_(v19), v20_(v20) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray20(const ValueArray20& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray20& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21>
+class ValueArray21 {
+ public:
+  ValueArray21(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
+      v18_(v18), v19_(v19), v20_(v20), v21_(v21) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray21(const ValueArray21& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray21& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22>
+class ValueArray22 {
+ public:
+  ValueArray22(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22) : v1_(v1), v2_(v2), v3_(v3),
+      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray22(const ValueArray22& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray22& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23>
+class ValueArray23 {
+ public:
+  ValueArray23(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23) : v1_(v1), v2_(v2),
+      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+      v23_(v23) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray23(const ValueArray23& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray23& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24>
+class ValueArray24 {
+ public:
+  ValueArray24(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24) : v1_(v1),
+      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
+      v22_(v22), v23_(v23), v24_(v24) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray24(const ValueArray24& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray24& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25>
+class ValueArray25 {
+ public:
+  ValueArray25(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
+      T25 v25) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray25(const ValueArray25& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray25& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26>
+class ValueArray26 {
+ public:
+  ValueArray26(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray26(const ValueArray26& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray26& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27>
+class ValueArray27 {
+ public:
+  ValueArray27(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
+      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
+      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
+      v26_(v26), v27_(v27) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray27(const ValueArray27& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray27& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28>
+class ValueArray28 {
+ public:
+  ValueArray28(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
+      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
+      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
+      v25_(v25), v26_(v26), v27_(v27), v28_(v28) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray28(const ValueArray28& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray28& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29>
+class ValueArray29 {
+ public:
+  ValueArray29(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
+      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
+      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray29(const ValueArray29& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray29& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30>
+class ValueArray30 {
+ public:
+  ValueArray30(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) : v1_(v1), v2_(v2), v3_(v3),
+      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+      v29_(v29), v30_(v30) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray30(const ValueArray30& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray30& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31>
+class ValueArray31 {
+ public:
+  ValueArray31(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) : v1_(v1), v2_(v2),
+      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+      v29_(v29), v30_(v30), v31_(v31) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray31(const ValueArray31& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray31& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32>
+class ValueArray32 {
+ public:
+  ValueArray32(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32) : v1_(v1),
+      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
+      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
+      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray32(const ValueArray32& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray32& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33>
+class ValueArray33 {
+ public:
+  ValueArray33(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
+      T33 v33) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+      v33_(v33) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray33(const ValueArray33& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray33& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34>
+class ValueArray34 {
+ public:
+  ValueArray34(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+      v33_(v33), v34_(v34) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray34(const ValueArray34& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray34& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35>
+class ValueArray35 {
+ public:
+  ValueArray35(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
+      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
+      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
+      v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
+      v32_(v32), v33_(v33), v34_(v34), v35_(v35) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray35(const ValueArray35& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray35& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36>
+class ValueArray36 {
+ public:
+  ValueArray36(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
+      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
+      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
+      v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
+      v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray36(const ValueArray36& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray36& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37>
+class ValueArray37 {
+ public:
+  ValueArray37(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
+      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
+      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
+      v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
+      v36_(v36), v37_(v37) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_), static_cast<T>(v37_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray37(const ValueArray37& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_), v37_(other.v37_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray37& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38>
+class ValueArray38 {
+ public:
+  ValueArray38(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38) : v1_(v1), v2_(v2), v3_(v3),
+      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
+      v35_(v35), v36_(v36), v37_(v37), v38_(v38) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray38(const ValueArray38& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray38& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39>
+class ValueArray39 {
+ public:
+  ValueArray39(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39) : v1_(v1), v2_(v2),
+      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
+      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+        static_cast<T>(v39_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray39(const ValueArray39& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_),
+      v39_(other.v39_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray39& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40>
+class ValueArray40 {
+ public:
+  ValueArray40(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) : v1_(v1),
+      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
+      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
+      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
+      v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
+      v40_(v40) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+        static_cast<T>(v39_), static_cast<T>(v40_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray40(const ValueArray40& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_),
+      v39_(other.v39_), v40_(other.v40_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray40& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41>
+class ValueArray41 {
+ public:
+  ValueArray41(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
+      T41 v41) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
+      v39_(v39), v40_(v40), v41_(v41) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray41(const ValueArray41& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_),
+      v39_(other.v39_), v40_(other.v40_), v41_(other.v41_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray41& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42>
+class ValueArray42 {
+ public:
+  ValueArray42(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
+      v39_(v39), v40_(v40), v41_(v41), v42_(v42) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+        static_cast<T>(v42_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray42(const ValueArray42& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_),
+      v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray42& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43>
+class ValueArray43 {
+ public:
+  ValueArray43(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
+      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
+      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
+      v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
+      v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37),
+      v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+        static_cast<T>(v42_), static_cast<T>(v43_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray43(const ValueArray43& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_),
+      v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_),
+      v43_(other.v43_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray43& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44>
+class ValueArray44 {
+ public:
+  ValueArray44(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43, T44 v44) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
+      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
+      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
+      v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
+      v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36),
+      v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42),
+      v43_(v43), v44_(v44) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray44(const ValueArray44& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_),
+      v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_),
+      v43_(other.v43_), v44_(other.v44_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray44& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+  const T44 v44_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45>
+class ValueArray45 {
+ public:
+  ValueArray45(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43, T44 v44, T45 v45) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
+      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
+      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
+      v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
+      v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41),
+      v42_(v42), v43_(v43), v44_(v44), v45_(v45) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
+        static_cast<T>(v45_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray45(const ValueArray45& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_),
+      v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_),
+      v43_(other.v43_), v44_(other.v44_), v45_(other.v45_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray45& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+  const T44 v44_;
+  const T45 v45_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46>
+class ValueArray46 {
+ public:
+  ValueArray46(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) : v1_(v1), v2_(v2), v3_(v3),
+      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
+      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
+      v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
+        static_cast<T>(v45_), static_cast<T>(v46_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray46(const ValueArray46& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_),
+      v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_),
+      v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray46& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+  const T44 v44_;
+  const T45 v45_;
+  const T46 v46_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47>
+class ValueArray47 {
+ public:
+  ValueArray47(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) : v1_(v1), v2_(v2),
+      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
+      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
+      v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46),
+      v47_(v47) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
+        static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray47(const ValueArray47& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_),
+      v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_),
+      v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_),
+      v47_(other.v47_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray47& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+  const T44 v44_;
+  const T45 v45_;
+  const T46 v46_;
+  const T47 v47_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48>
+class ValueArray48 {
+ public:
+  ValueArray48(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48) : v1_(v1),
+      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
+      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
+      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
+      v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
+      v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45),
+      v46_(v46), v47_(v47), v48_(v48) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
+        static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
+        static_cast<T>(v48_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray48(const ValueArray48& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_),
+      v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_),
+      v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_),
+      v47_(other.v47_), v48_(other.v48_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray48& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+  const T44 v44_;
+  const T45 v45_;
+  const T46 v46_;
+  const T47 v47_;
+  const T48 v48_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49>
+class ValueArray49 {
+ public:
+  ValueArray49(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48,
+      T49 v49) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
+      v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
+      v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
+        static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
+        static_cast<T>(v48_), static_cast<T>(v49_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray49(const ValueArray49& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_),
+      v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_),
+      v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_),
+      v47_(other.v47_), v48_(other.v48_), v49_(other.v49_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray49& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+  const T44 v44_;
+  const T45 v45_;
+  const T46 v46_;
+  const T47 v47_;
+  const T48 v48_;
+  const T49 v49_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49, typename T50>
+class ValueArray50 {
+ public:
+  ValueArray50(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49,
+      T50 v50) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
+      v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
+      v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49), v50_(v50) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+        static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+        static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+        static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+        static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+        static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+        static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+        static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+        static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+        static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+        static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+        static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+        static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+        static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+        static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
+        static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
+        static_cast<T>(v48_), static_cast<T>(v49_), static_cast<T>(v50_)};
+    return ValuesIn(array);
+  }
+
+  ValueArray50(const ValueArray50& other) : v1_(other.v1_), v2_(other.v2_),
+      v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_),
+      v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_),
+      v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_),
+      v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_),
+      v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_),
+      v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_),
+      v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_),
+      v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_),
+      v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_),
+      v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_),
+      v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_),
+      v47_(other.v47_), v48_(other.v48_), v49_(other.v49_), v50_(other.v50_) {}
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray50& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+  const T44 v44_;
+  const T45 v45_;
+  const T46 v46_;
+  const T47 v47_;
+  const T48 v48_;
+  const T49 v49_;
+  const T50 v50_;
+};
+
+# if GTEST_HAS_COMBINE
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Generates values from the Cartesian product of values produced
+// by the argument generators.
+//
+template <typename T1, typename T2>
+class CartesianProductGenerator2
+    : public ParamGeneratorInterface< ::testing::tuple<T1, T2> > {
+ public:
+  typedef ::testing::tuple<T1, T2> ParamType;
+
+  CartesianProductGenerator2(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2)
+      : g1_(g1), g2_(g2) {}
+  virtual ~CartesianProductGenerator2() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current2_;
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return current_value_.get(); }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_.reset(new ParamType(*current1_, *current2_));
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    linked_ptr<ParamType> current_value_;
+  };  // class CartesianProductGenerator2::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator2& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+};  // class CartesianProductGenerator2
+
+
+template <typename T1, typename T2, typename T3>
+class CartesianProductGenerator3
+    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3> > {
+ public:
+  typedef ::testing::tuple<T1, T2, T3> ParamType;
+
+  CartesianProductGenerator3(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3)
+      : g1_(g1), g2_(g2), g3_(g3) {}
+  virtual ~CartesianProductGenerator3() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current3_;
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return current_value_.get(); }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_.reset(new ParamType(*current1_, *current2_, *current3_));
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    linked_ptr<ParamType> current_value_;
+  };  // class CartesianProductGenerator3::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator3& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+};  // class CartesianProductGenerator3
+
+
+template <typename T1, typename T2, typename T3, typename T4>
+class CartesianProductGenerator4
+    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4> > {
+ public:
+  typedef ::testing::tuple<T1, T2, T3, T4> ParamType;
+
+  CartesianProductGenerator4(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+      const ParamGenerator<T4>& g4)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
+  virtual ~CartesianProductGenerator4() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin(), g4_, g4_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+        g4_, g4_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3,
+      const ParamGenerator<T4>& g4,
+      const typename ParamGenerator<T4>::iterator& current4)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+          begin4_(g4.begin()), end4_(g4.end()), current4_(current4)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current4_;
+      if (current4_ == end4_) {
+        current4_ = begin4_;
+        ++current3_;
+      }
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return current_value_.get(); }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_ &&
+          current4_ == typed_other->current4_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_),
+        begin4_(other.begin4_),
+        end4_(other.end4_),
+        current4_(other.current4_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_.reset(new ParamType(*current1_, *current2_, *current3_,
+            *current4_));
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_ ||
+          current4_ == end4_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    const typename ParamGenerator<T4>::iterator begin4_;
+    const typename ParamGenerator<T4>::iterator end4_;
+    typename ParamGenerator<T4>::iterator current4_;
+    linked_ptr<ParamType> current_value_;
+  };  // class CartesianProductGenerator4::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator4& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+  const ParamGenerator<T4> g4_;
+};  // class CartesianProductGenerator4
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+class CartesianProductGenerator5
+    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5> > {
+ public:
+  typedef ::testing::tuple<T1, T2, T3, T4, T5> ParamType;
+
+  CartesianProductGenerator5(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
+  virtual ~CartesianProductGenerator5() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+        g4_, g4_.end(), g5_, g5_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3,
+      const ParamGenerator<T4>& g4,
+      const typename ParamGenerator<T4>::iterator& current4,
+      const ParamGenerator<T5>& g5,
+      const typename ParamGenerator<T5>::iterator& current5)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+          begin5_(g5.begin()), end5_(g5.end()), current5_(current5)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current5_;
+      if (current5_ == end5_) {
+        current5_ = begin5_;
+        ++current4_;
+      }
+      if (current4_ == end4_) {
+        current4_ = begin4_;
+        ++current3_;
+      }
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return current_value_.get(); }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_ &&
+          current4_ == typed_other->current4_ &&
+          current5_ == typed_other->current5_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_),
+        begin4_(other.begin4_),
+        end4_(other.end4_),
+        current4_(other.current4_),
+        begin5_(other.begin5_),
+        end5_(other.end5_),
+        current5_(other.current5_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_.reset(new ParamType(*current1_, *current2_, *current3_,
+            *current4_, *current5_));
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_ ||
+          current4_ == end4_ ||
+          current5_ == end5_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    const typename ParamGenerator<T4>::iterator begin4_;
+    const typename ParamGenerator<T4>::iterator end4_;
+    typename ParamGenerator<T4>::iterator current4_;
+    const typename ParamGenerator<T5>::iterator begin5_;
+    const typename ParamGenerator<T5>::iterator end5_;
+    typename ParamGenerator<T5>::iterator current5_;
+    linked_ptr<ParamType> current_value_;
+  };  // class CartesianProductGenerator5::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator5& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+  const ParamGenerator<T4> g4_;
+  const ParamGenerator<T5> g5_;
+};  // class CartesianProductGenerator5
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6>
+class CartesianProductGenerator6
+    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5,
+        T6> > {
+ public:
+  typedef ::testing::tuple<T1, T2, T3, T4, T5, T6> ParamType;
+
+  CartesianProductGenerator6(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+      const ParamGenerator<T6>& g6)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
+  virtual ~CartesianProductGenerator6() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3,
+      const ParamGenerator<T4>& g4,
+      const typename ParamGenerator<T4>::iterator& current4,
+      const ParamGenerator<T5>& g5,
+      const typename ParamGenerator<T5>::iterator& current5,
+      const ParamGenerator<T6>& g6,
+      const typename ParamGenerator<T6>::iterator& current6)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+          begin6_(g6.begin()), end6_(g6.end()), current6_(current6)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current6_;
+      if (current6_ == end6_) {
+        current6_ = begin6_;
+        ++current5_;
+      }
+      if (current5_ == end5_) {
+        current5_ = begin5_;
+        ++current4_;
+      }
+      if (current4_ == end4_) {
+        current4_ = begin4_;
+        ++current3_;
+      }
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return current_value_.get(); }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_ &&
+          current4_ == typed_other->current4_ &&
+          current5_ == typed_other->current5_ &&
+          current6_ == typed_other->current6_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_),
+        begin4_(other.begin4_),
+        end4_(other.end4_),
+        current4_(other.current4_),
+        begin5_(other.begin5_),
+        end5_(other.end5_),
+        current5_(other.current5_),
+        begin6_(other.begin6_),
+        end6_(other.end6_),
+        current6_(other.current6_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_.reset(new ParamType(*current1_, *current2_, *current3_,
+            *current4_, *current5_, *current6_));
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_ ||
+          current4_ == end4_ ||
+          current5_ == end5_ ||
+          current6_ == end6_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    const typename ParamGenerator<T4>::iterator begin4_;
+    const typename ParamGenerator<T4>::iterator end4_;
+    typename ParamGenerator<T4>::iterator current4_;
+    const typename ParamGenerator<T5>::iterator begin5_;
+    const typename ParamGenerator<T5>::iterator end5_;
+    typename ParamGenerator<T5>::iterator current5_;
+    const typename ParamGenerator<T6>::iterator begin6_;
+    const typename ParamGenerator<T6>::iterator end6_;
+    typename ParamGenerator<T6>::iterator current6_;
+    linked_ptr<ParamType> current_value_;
+  };  // class CartesianProductGenerator6::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator6& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+  const ParamGenerator<T4> g4_;
+  const ParamGenerator<T5> g5_;
+  const ParamGenerator<T6> g6_;
+};  // class CartesianProductGenerator6
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7>
+class CartesianProductGenerator7
+    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5, T6,
+        T7> > {
+ public:
+  typedef ::testing::tuple<T1, T2, T3, T4, T5, T6, T7> ParamType;
+
+  CartesianProductGenerator7(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
+  virtual ~CartesianProductGenerator7() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
+        g7_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3,
+      const ParamGenerator<T4>& g4,
+      const typename ParamGenerator<T4>::iterator& current4,
+      const ParamGenerator<T5>& g5,
+      const typename ParamGenerator<T5>::iterator& current5,
+      const ParamGenerator<T6>& g6,
+      const typename ParamGenerator<T6>::iterator& current6,
+      const ParamGenerator<T7>& g7,
+      const typename ParamGenerator<T7>::iterator& current7)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
+          begin7_(g7.begin()), end7_(g7.end()), current7_(current7)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current7_;
+      if (current7_ == end7_) {
+        current7_ = begin7_;
+        ++current6_;
+      }
+      if (current6_ == end6_) {
+        current6_ = begin6_;
+        ++current5_;
+      }
+      if (current5_ == end5_) {
+        current5_ = begin5_;
+        ++current4_;
+      }
+      if (current4_ == end4_) {
+        current4_ = begin4_;
+        ++current3_;
+      }
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return current_value_.get(); }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_ &&
+          current4_ == typed_other->current4_ &&
+          current5_ == typed_other->current5_ &&
+          current6_ == typed_other->current6_ &&
+          current7_ == typed_other->current7_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_),
+        begin4_(other.begin4_),
+        end4_(other.end4_),
+        current4_(other.current4_),
+        begin5_(other.begin5_),
+        end5_(other.end5_),
+        current5_(other.current5_),
+        begin6_(other.begin6_),
+        end6_(other.end6_),
+        current6_(other.current6_),
+        begin7_(other.begin7_),
+        end7_(other.end7_),
+        current7_(other.current7_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_.reset(new ParamType(*current1_, *current2_, *current3_,
+            *current4_, *current5_, *current6_, *current7_));
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_ ||
+          current4_ == end4_ ||
+          current5_ == end5_ ||
+          current6_ == end6_ ||
+          current7_ == end7_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    const typename ParamGenerator<T4>::iterator begin4_;
+    const typename ParamGenerator<T4>::iterator end4_;
+    typename ParamGenerator<T4>::iterator current4_;
+    const typename ParamGenerator<T5>::iterator begin5_;
+    const typename ParamGenerator<T5>::iterator end5_;
+    typename ParamGenerator<T5>::iterator current5_;
+    const typename ParamGenerator<T6>::iterator begin6_;
+    const typename ParamGenerator<T6>::iterator end6_;
+    typename ParamGenerator<T6>::iterator current6_;
+    const typename ParamGenerator<T7>::iterator begin7_;
+    const typename ParamGenerator<T7>::iterator end7_;
+    typename ParamGenerator<T7>::iterator current7_;
+    linked_ptr<ParamType> current_value_;
+  };  // class CartesianProductGenerator7::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator7& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+  const ParamGenerator<T4> g4_;
+  const ParamGenerator<T5> g5_;
+  const ParamGenerator<T6> g6_;
+  const ParamGenerator<T7> g7_;
+};  // class CartesianProductGenerator7
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8>
+class CartesianProductGenerator8
+    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5, T6,
+        T7, T8> > {
+ public:
+  typedef ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8> ParamType;
+
+  CartesianProductGenerator8(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
+      const ParamGenerator<T8>& g8)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
+          g8_(g8) {}
+  virtual ~CartesianProductGenerator8() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
+        g7_.begin(), g8_, g8_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
+        g8_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3,
+      const ParamGenerator<T4>& g4,
+      const typename ParamGenerator<T4>::iterator& current4,
+      const ParamGenerator<T5>& g5,
+      const typename ParamGenerator<T5>::iterator& current5,
+      const ParamGenerator<T6>& g6,
+      const typename ParamGenerator<T6>::iterator& current6,
+      const ParamGenerator<T7>& g7,
+      const typename ParamGenerator<T7>::iterator& current7,
+      const ParamGenerator<T8>& g8,
+      const typename ParamGenerator<T8>::iterator& current8)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
+          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
+          begin8_(g8.begin()), end8_(g8.end()), current8_(current8)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current8_;
+      if (current8_ == end8_) {
+        current8_ = begin8_;
+        ++current7_;
+      }
+      if (current7_ == end7_) {
+        current7_ = begin7_;
+        ++current6_;
+      }
+      if (current6_ == end6_) {
+        current6_ = begin6_;
+        ++current5_;
+      }
+      if (current5_ == end5_) {
+        current5_ = begin5_;
+        ++current4_;
+      }
+      if (current4_ == end4_) {
+        current4_ = begin4_;
+        ++current3_;
+      }
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return current_value_.get(); }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_ &&
+          current4_ == typed_other->current4_ &&
+          current5_ == typed_other->current5_ &&
+          current6_ == typed_other->current6_ &&
+          current7_ == typed_other->current7_ &&
+          current8_ == typed_other->current8_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_),
+        begin4_(other.begin4_),
+        end4_(other.end4_),
+        current4_(other.current4_),
+        begin5_(other.begin5_),
+        end5_(other.end5_),
+        current5_(other.current5_),
+        begin6_(other.begin6_),
+        end6_(other.end6_),
+        current6_(other.current6_),
+        begin7_(other.begin7_),
+        end7_(other.end7_),
+        current7_(other.current7_),
+        begin8_(other.begin8_),
+        end8_(other.end8_),
+        current8_(other.current8_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_.reset(new ParamType(*current1_, *current2_, *current3_,
+            *current4_, *current5_, *current6_, *current7_, *current8_));
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_ ||
+          current4_ == end4_ ||
+          current5_ == end5_ ||
+          current6_ == end6_ ||
+          current7_ == end7_ ||
+          current8_ == end8_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    const typename ParamGenerator<T4>::iterator begin4_;
+    const typename ParamGenerator<T4>::iterator end4_;
+    typename ParamGenerator<T4>::iterator current4_;
+    const typename ParamGenerator<T5>::iterator begin5_;
+    const typename ParamGenerator<T5>::iterator end5_;
+    typename ParamGenerator<T5>::iterator current5_;
+    const typename ParamGenerator<T6>::iterator begin6_;
+    const typename ParamGenerator<T6>::iterator end6_;
+    typename ParamGenerator<T6>::iterator current6_;
+    const typename ParamGenerator<T7>::iterator begin7_;
+    const typename ParamGenerator<T7>::iterator end7_;
+    typename ParamGenerator<T7>::iterator current7_;
+    const typename ParamGenerator<T8>::iterator begin8_;
+    const typename ParamGenerator<T8>::iterator end8_;
+    typename ParamGenerator<T8>::iterator current8_;
+    linked_ptr<ParamType> current_value_;
+  };  // class CartesianProductGenerator8::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator8& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+  const ParamGenerator<T4> g4_;
+  const ParamGenerator<T5> g5_;
+  const ParamGenerator<T6> g6_;
+  const ParamGenerator<T7> g7_;
+  const ParamGenerator<T8> g8_;
+};  // class CartesianProductGenerator8
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9>
+class CartesianProductGenerator9
+    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5, T6,
+        T7, T8, T9> > {
+ public:
+  typedef ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9> ParamType;
+
+  CartesianProductGenerator9(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
+      const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
+          g9_(g9) {}
+  virtual ~CartesianProductGenerator9() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
+        g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
+        g8_.end(), g9_, g9_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3,
+      const ParamGenerator<T4>& g4,
+      const typename ParamGenerator<T4>::iterator& current4,
+      const ParamGenerator<T5>& g5,
+      const typename ParamGenerator<T5>::iterator& current5,
+      const ParamGenerator<T6>& g6,
+      const typename ParamGenerator<T6>::iterator& current6,
+      const ParamGenerator<T7>& g7,
+      const typename ParamGenerator<T7>::iterator& current7,
+      const ParamGenerator<T8>& g8,
+      const typename ParamGenerator<T8>::iterator& current8,
+      const ParamGenerator<T9>& g9,
+      const typename ParamGenerator<T9>::iterator& current9)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
+          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
+          begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
+          begin9_(g9.begin()), end9_(g9.end()), current9_(current9)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current9_;
+      if (current9_ == end9_) {
+        current9_ = begin9_;
+        ++current8_;
+      }
+      if (current8_ == end8_) {
+        current8_ = begin8_;
+        ++current7_;
+      }
+      if (current7_ == end7_) {
+        current7_ = begin7_;
+        ++current6_;
+      }
+      if (current6_ == end6_) {
+        current6_ = begin6_;
+        ++current5_;
+      }
+      if (current5_ == end5_) {
+        current5_ = begin5_;
+        ++current4_;
+      }
+      if (current4_ == end4_) {
+        current4_ = begin4_;
+        ++current3_;
+      }
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return current_value_.get(); }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_ &&
+          current4_ == typed_other->current4_ &&
+          current5_ == typed_other->current5_ &&
+          current6_ == typed_other->current6_ &&
+          current7_ == typed_other->current7_ &&
+          current8_ == typed_other->current8_ &&
+          current9_ == typed_other->current9_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_),
+        begin4_(other.begin4_),
+        end4_(other.end4_),
+        current4_(other.current4_),
+        begin5_(other.begin5_),
+        end5_(other.end5_),
+        current5_(other.current5_),
+        begin6_(other.begin6_),
+        end6_(other.end6_),
+        current6_(other.current6_),
+        begin7_(other.begin7_),
+        end7_(other.end7_),
+        current7_(other.current7_),
+        begin8_(other.begin8_),
+        end8_(other.end8_),
+        current8_(other.current8_),
+        begin9_(other.begin9_),
+        end9_(other.end9_),
+        current9_(other.current9_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_.reset(new ParamType(*current1_, *current2_, *current3_,
+            *current4_, *current5_, *current6_, *current7_, *current8_,
+            *current9_));
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_ ||
+          current4_ == end4_ ||
+          current5_ == end5_ ||
+          current6_ == end6_ ||
+          current7_ == end7_ ||
+          current8_ == end8_ ||
+          current9_ == end9_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    const typename ParamGenerator<T4>::iterator begin4_;
+    const typename ParamGenerator<T4>::iterator end4_;
+    typename ParamGenerator<T4>::iterator current4_;
+    const typename ParamGenerator<T5>::iterator begin5_;
+    const typename ParamGenerator<T5>::iterator end5_;
+    typename ParamGenerator<T5>::iterator current5_;
+    const typename ParamGenerator<T6>::iterator begin6_;
+    const typename ParamGenerator<T6>::iterator end6_;
+    typename ParamGenerator<T6>::iterator current6_;
+    const typename ParamGenerator<T7>::iterator begin7_;
+    const typename ParamGenerator<T7>::iterator end7_;
+    typename ParamGenerator<T7>::iterator current7_;
+    const typename ParamGenerator<T8>::iterator begin8_;
+    const typename ParamGenerator<T8>::iterator end8_;
+    typename ParamGenerator<T8>::iterator current8_;
+    const typename ParamGenerator<T9>::iterator begin9_;
+    const typename ParamGenerator<T9>::iterator end9_;
+    typename ParamGenerator<T9>::iterator current9_;
+    linked_ptr<ParamType> current_value_;
+  };  // class CartesianProductGenerator9::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator9& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+  const ParamGenerator<T4> g4_;
+  const ParamGenerator<T5> g5_;
+  const ParamGenerator<T6> g6_;
+  const ParamGenerator<T7> g7_;
+  const ParamGenerator<T8> g8_;
+  const ParamGenerator<T9> g9_;
+};  // class CartesianProductGenerator9
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10>
+class CartesianProductGenerator10
+    : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5, T6,
+        T7, T8, T9, T10> > {
+ public:
+  typedef ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> ParamType;
+
+  CartesianProductGenerator10(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
+      const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9,
+      const ParamGenerator<T10>& g10)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
+          g9_(g9), g10_(g10) {}
+  virtual ~CartesianProductGenerator10() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
+        g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin(), g10_, g10_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
+        g8_.end(), g9_, g9_.end(), g10_, g10_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3,
+      const ParamGenerator<T4>& g4,
+      const typename ParamGenerator<T4>::iterator& current4,
+      const ParamGenerator<T5>& g5,
+      const typename ParamGenerator<T5>::iterator& current5,
+      const ParamGenerator<T6>& g6,
+      const typename ParamGenerator<T6>::iterator& current6,
+      const ParamGenerator<T7>& g7,
+      const typename ParamGenerator<T7>::iterator& current7,
+      const ParamGenerator<T8>& g8,
+      const typename ParamGenerator<T8>::iterator& current8,
+      const ParamGenerator<T9>& g9,
+      const typename ParamGenerator<T9>::iterator& current9,
+      const ParamGenerator<T10>& g10,
+      const typename ParamGenerator<T10>::iterator& current10)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
+          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
+          begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
+          begin9_(g9.begin()), end9_(g9.end()), current9_(current9),
+          begin10_(g10.begin()), end10_(g10.end()), current10_(current10)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current10_;
+      if (current10_ == end10_) {
+        current10_ = begin10_;
+        ++current9_;
+      }
+      if (current9_ == end9_) {
+        current9_ = begin9_;
+        ++current8_;
+      }
+      if (current8_ == end8_) {
+        current8_ = begin8_;
+        ++current7_;
+      }
+      if (current7_ == end7_) {
+        current7_ = begin7_;
+        ++current6_;
+      }
+      if (current6_ == end6_) {
+        current6_ = begin6_;
+        ++current5_;
+      }
+      if (current5_ == end5_) {
+        current5_ = begin5_;
+        ++current4_;
+      }
+      if (current4_ == end4_) {
+        current4_ = begin4_;
+        ++current3_;
+      }
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return current_value_.get(); }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_ &&
+          current4_ == typed_other->current4_ &&
+          current5_ == typed_other->current5_ &&
+          current6_ == typed_other->current6_ &&
+          current7_ == typed_other->current7_ &&
+          current8_ == typed_other->current8_ &&
+          current9_ == typed_other->current9_ &&
+          current10_ == typed_other->current10_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_),
+        begin4_(other.begin4_),
+        end4_(other.end4_),
+        current4_(other.current4_),
+        begin5_(other.begin5_),
+        end5_(other.end5_),
+        current5_(other.current5_),
+        begin6_(other.begin6_),
+        end6_(other.end6_),
+        current6_(other.current6_),
+        begin7_(other.begin7_),
+        end7_(other.end7_),
+        current7_(other.current7_),
+        begin8_(other.begin8_),
+        end8_(other.end8_),
+        current8_(other.current8_),
+        begin9_(other.begin9_),
+        end9_(other.end9_),
+        current9_(other.current9_),
+        begin10_(other.begin10_),
+        end10_(other.end10_),
+        current10_(other.current10_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_.reset(new ParamType(*current1_, *current2_, *current3_,
+            *current4_, *current5_, *current6_, *current7_, *current8_,
+            *current9_, *current10_));
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_ ||
+          current4_ == end4_ ||
+          current5_ == end5_ ||
+          current6_ == end6_ ||
+          current7_ == end7_ ||
+          current8_ == end8_ ||
+          current9_ == end9_ ||
+          current10_ == end10_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    const typename ParamGenerator<T4>::iterator begin4_;
+    const typename ParamGenerator<T4>::iterator end4_;
+    typename ParamGenerator<T4>::iterator current4_;
+    const typename ParamGenerator<T5>::iterator begin5_;
+    const typename ParamGenerator<T5>::iterator end5_;
+    typename ParamGenerator<T5>::iterator current5_;
+    const typename ParamGenerator<T6>::iterator begin6_;
+    const typename ParamGenerator<T6>::iterator end6_;
+    typename ParamGenerator<T6>::iterator current6_;
+    const typename ParamGenerator<T7>::iterator begin7_;
+    const typename ParamGenerator<T7>::iterator end7_;
+    typename ParamGenerator<T7>::iterator current7_;
+    const typename ParamGenerator<T8>::iterator begin8_;
+    const typename ParamGenerator<T8>::iterator end8_;
+    typename ParamGenerator<T8>::iterator current8_;
+    const typename ParamGenerator<T9>::iterator begin9_;
+    const typename ParamGenerator<T9>::iterator end9_;
+    typename ParamGenerator<T9>::iterator current9_;
+    const typename ParamGenerator<T10>::iterator begin10_;
+    const typename ParamGenerator<T10>::iterator end10_;
+    typename ParamGenerator<T10>::iterator current10_;
+    linked_ptr<ParamType> current_value_;
+  };  // class CartesianProductGenerator10::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator10& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+  const ParamGenerator<T4> g4_;
+  const ParamGenerator<T5> g5_;
+  const ParamGenerator<T6> g6_;
+  const ParamGenerator<T7> g7_;
+  const ParamGenerator<T8> g8_;
+  const ParamGenerator<T9> g9_;
+  const ParamGenerator<T10> g10_;
+};  // class CartesianProductGenerator10
+
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Helper classes providing Combine() with polymorphic features. They allow
+// casting CartesianProductGeneratorN<T> to ParamGenerator<U> if T is
+// convertible to U.
+//
+template <class Generator1, class Generator2>
+class CartesianProductHolder2 {
+ public:
+CartesianProductHolder2(const Generator1& g1, const Generator2& g2)
+      : g1_(g1), g2_(g2) {}
+  template <typename T1, typename T2>
+  operator ParamGenerator< ::testing::tuple<T1, T2> >() const {
+    return ParamGenerator< ::testing::tuple<T1, T2> >(
+        new CartesianProductGenerator2<T1, T2>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder2& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+};  // class CartesianProductHolder2
+
+template <class Generator1, class Generator2, class Generator3>
+class CartesianProductHolder3 {
+ public:
+CartesianProductHolder3(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3)
+      : g1_(g1), g2_(g2), g3_(g3) {}
+  template <typename T1, typename T2, typename T3>
+  operator ParamGenerator< ::testing::tuple<T1, T2, T3> >() const {
+    return ParamGenerator< ::testing::tuple<T1, T2, T3> >(
+        new CartesianProductGenerator3<T1, T2, T3>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder3& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+};  // class CartesianProductHolder3
+
+template <class Generator1, class Generator2, class Generator3,
+    class Generator4>
+class CartesianProductHolder4 {
+ public:
+CartesianProductHolder4(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3, const Generator4& g4)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
+  template <typename T1, typename T2, typename T3, typename T4>
+  operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4> >() const {
+    return ParamGenerator< ::testing::tuple<T1, T2, T3, T4> >(
+        new CartesianProductGenerator4<T1, T2, T3, T4>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_),
+        static_cast<ParamGenerator<T4> >(g4_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder4& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+  const Generator4 g4_;
+};  // class CartesianProductHolder4
+
+template <class Generator1, class Generator2, class Generator3,
+    class Generator4, class Generator5>
+class CartesianProductHolder5 {
+ public:
+CartesianProductHolder5(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3, const Generator4& g4, const Generator5& g5)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
+  template <typename T1, typename T2, typename T3, typename T4, typename T5>
+  operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5> >() const {
+    return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5> >(
+        new CartesianProductGenerator5<T1, T2, T3, T4, T5>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_),
+        static_cast<ParamGenerator<T4> >(g4_),
+        static_cast<ParamGenerator<T5> >(g5_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder5& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+  const Generator4 g4_;
+  const Generator5 g5_;
+};  // class CartesianProductHolder5
+
+template <class Generator1, class Generator2, class Generator3,
+    class Generator4, class Generator5, class Generator6>
+class CartesianProductHolder6 {
+ public:
+CartesianProductHolder6(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3, const Generator4& g4, const Generator5& g5,
+    const Generator6& g6)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
+  template <typename T1, typename T2, typename T3, typename T4, typename T5,
+      typename T6>
+  operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6> >() const {
+    return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6> >(
+        new CartesianProductGenerator6<T1, T2, T3, T4, T5, T6>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_),
+        static_cast<ParamGenerator<T4> >(g4_),
+        static_cast<ParamGenerator<T5> >(g5_),
+        static_cast<ParamGenerator<T6> >(g6_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder6& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+  const Generator4 g4_;
+  const Generator5 g5_;
+  const Generator6 g6_;
+};  // class CartesianProductHolder6
+
+template <class Generator1, class Generator2, class Generator3,
+    class Generator4, class Generator5, class Generator6, class Generator7>
+class CartesianProductHolder7 {
+ public:
+CartesianProductHolder7(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3, const Generator4& g4, const Generator5& g5,
+    const Generator6& g6, const Generator7& g7)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
+  template <typename T1, typename T2, typename T3, typename T4, typename T5,
+      typename T6, typename T7>
+  operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6,
+      T7> >() const {
+    return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7> >(
+        new CartesianProductGenerator7<T1, T2, T3, T4, T5, T6, T7>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_),
+        static_cast<ParamGenerator<T4> >(g4_),
+        static_cast<ParamGenerator<T5> >(g5_),
+        static_cast<ParamGenerator<T6> >(g6_),
+        static_cast<ParamGenerator<T7> >(g7_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder7& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+  const Generator4 g4_;
+  const Generator5 g5_;
+  const Generator6 g6_;
+  const Generator7 g7_;
+};  // class CartesianProductHolder7
+
+template <class Generator1, class Generator2, class Generator3,
+    class Generator4, class Generator5, class Generator6, class Generator7,
+    class Generator8>
+class CartesianProductHolder8 {
+ public:
+CartesianProductHolder8(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3, const Generator4& g4, const Generator5& g5,
+    const Generator6& g6, const Generator7& g7, const Generator8& g8)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
+          g8_(g8) {}
+  template <typename T1, typename T2, typename T3, typename T4, typename T5,
+      typename T6, typename T7, typename T8>
+  operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7,
+      T8> >() const {
+    return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8> >(
+        new CartesianProductGenerator8<T1, T2, T3, T4, T5, T6, T7, T8>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_),
+        static_cast<ParamGenerator<T4> >(g4_),
+        static_cast<ParamGenerator<T5> >(g5_),
+        static_cast<ParamGenerator<T6> >(g6_),
+        static_cast<ParamGenerator<T7> >(g7_),
+        static_cast<ParamGenerator<T8> >(g8_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder8& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+  const Generator4 g4_;
+  const Generator5 g5_;
+  const Generator6 g6_;
+  const Generator7 g7_;
+  const Generator8 g8_;
+};  // class CartesianProductHolder8
+
+template <class Generator1, class Generator2, class Generator3,
+    class Generator4, class Generator5, class Generator6, class Generator7,
+    class Generator8, class Generator9>
+class CartesianProductHolder9 {
+ public:
+CartesianProductHolder9(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3, const Generator4& g4, const Generator5& g5,
+    const Generator6& g6, const Generator7& g7, const Generator8& g8,
+    const Generator9& g9)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
+          g9_(g9) {}
+  template <typename T1, typename T2, typename T3, typename T4, typename T5,
+      typename T6, typename T7, typename T8, typename T9>
+  operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
+      T9> >() const {
+    return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
+        T9> >(
+        new CartesianProductGenerator9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_),
+        static_cast<ParamGenerator<T4> >(g4_),
+        static_cast<ParamGenerator<T5> >(g5_),
+        static_cast<ParamGenerator<T6> >(g6_),
+        static_cast<ParamGenerator<T7> >(g7_),
+        static_cast<ParamGenerator<T8> >(g8_),
+        static_cast<ParamGenerator<T9> >(g9_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder9& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+  const Generator4 g4_;
+  const Generator5 g5_;
+  const Generator6 g6_;
+  const Generator7 g7_;
+  const Generator8 g8_;
+  const Generator9 g9_;
+};  // class CartesianProductHolder9
+
+template <class Generator1, class Generator2, class Generator3,
+    class Generator4, class Generator5, class Generator6, class Generator7,
+    class Generator8, class Generator9, class Generator10>
+class CartesianProductHolder10 {
+ public:
+CartesianProductHolder10(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3, const Generator4& g4, const Generator5& g5,
+    const Generator6& g6, const Generator7& g7, const Generator8& g8,
+    const Generator9& g9, const Generator10& g10)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
+          g9_(g9), g10_(g10) {}
+  template <typename T1, typename T2, typename T3, typename T4, typename T5,
+      typename T6, typename T7, typename T8, typename T9, typename T10>
+  operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9,
+      T10> >() const {
+    return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9,
+        T10> >(
+        new CartesianProductGenerator10<T1, T2, T3, T4, T5, T6, T7, T8, T9,
+            T10>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_),
+        static_cast<ParamGenerator<T4> >(g4_),
+        static_cast<ParamGenerator<T5> >(g5_),
+        static_cast<ParamGenerator<T6> >(g6_),
+        static_cast<ParamGenerator<T7> >(g7_),
+        static_cast<ParamGenerator<T8> >(g8_),
+        static_cast<ParamGenerator<T9> >(g9_),
+        static_cast<ParamGenerator<T10> >(g10_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder10& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+  const Generator4 g4_;
+  const Generator5 g5_;
+  const Generator6 g6_;
+  const Generator7 g7_;
+  const Generator8 g8_;
+  const Generator9 g9_;
+  const Generator10 g10_;
+};  // class CartesianProductHolder10
+
+# endif  // GTEST_HAS_COMBINE
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
+
+namespace testing {
+
+// Functions producing parameter generators.
+//
+// Google Test uses these generators to produce parameters for value-
+// parameterized tests. When a parameterized test case is instantiated
+// with a particular generator, Google Test creates and runs tests
+// for each element in the sequence produced by the generator.
+//
+// In the following sample, tests from test case FooTest are instantiated
+// each three times with parameter values 3, 5, and 8:
+//
+// class FooTest : public TestWithParam<int> { ... };
+//
+// TEST_P(FooTest, TestThis) {
+// }
+// TEST_P(FooTest, TestThat) {
+// }
+// INSTANTIATE_TEST_CASE_P(TestSequence, FooTest, Values(3, 5, 8));
+//
+
+// Range() returns generators providing sequences of values in a range.
+//
+// Synopsis:
+// Range(start, end)
+//   - returns a generator producing a sequence of values {start, start+1,
+//     start+2, ..., }.
+// Range(start, end, step)
+//   - returns a generator producing a sequence of values {start, start+step,
+//     start+step+step, ..., }.
+// Notes:
+//   * The generated sequences never include end. For example, Range(1, 5)
+//     returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2)
+//     returns a generator producing {1, 3, 5, 7}.
+//   * start and end must have the same type. That type may be any integral or
+//     floating-point type or a user defined type satisfying these conditions:
+//     * It must be assignable (have operator=() defined).
+//     * It must have operator+() (operator+(int-compatible type) for
+//       two-operand version).
+//     * It must have operator<() defined.
+//     Elements in the resulting sequences will also have that type.
+//   * Condition start < end must be satisfied in order for resulting sequences
+//     to contain any elements.
+//
+template <typename T, typename IncrementT>
+internal::ParamGenerator<T> Range(T start, T end, IncrementT step) {
+  return internal::ParamGenerator<T>(
+      new internal::RangeGenerator<T, IncrementT>(start, end, step));
+}
+
+template <typename T>
+internal::ParamGenerator<T> Range(T start, T end) {
+  return Range(start, end, 1);
+}
+
+// ValuesIn() function allows generation of tests with parameters coming from
+// a container.
+//
+// Synopsis:
+// ValuesIn(const T (&array)[N])
+//   - returns a generator producing sequences with elements from
+//     a C-style array.
+// ValuesIn(const Container& container)
+//   - returns a generator producing sequences with elements from
+//     an STL-style container.
+// ValuesIn(Iterator begin, Iterator end)
+//   - returns a generator producing sequences with elements from
+//     a range [begin, end) defined by a pair of STL-style iterators. These
+//     iterators can also be plain C pointers.
+//
+// Please note that ValuesIn copies the values from the containers
+// passed in and keeps them to generate tests in RUN_ALL_TESTS().
+//
+// Examples:
+//
+// This instantiates tests from test case StringTest
+// each with C-string values of "foo", "bar", and "baz":
+//
+// const char* strings[] = {"foo", "bar", "baz"};
+// INSTANTIATE_TEST_CASE_P(StringSequence, StringTest, ValuesIn(strings));
+//
+// This instantiates tests from test case StlStringTest
+// each with STL strings with values "a" and "b":
+//
+// ::std::vector< ::std::string> GetParameterStrings() {
+//   ::std::vector< ::std::string> v;
+//   v.push_back("a");
+//   v.push_back("b");
+//   return v;
+// }
+//
+// INSTANTIATE_TEST_CASE_P(CharSequence,
+//                         StlStringTest,
+//                         ValuesIn(GetParameterStrings()));
+//
+//
+// This will also instantiate tests from CharTest
+// each with parameter values 'a' and 'b':
+//
+// ::std::list<char> GetParameterChars() {
+//   ::std::list<char> list;
+//   list.push_back('a');
+//   list.push_back('b');
+//   return list;
+// }
+// ::std::list<char> l = GetParameterChars();
+// INSTANTIATE_TEST_CASE_P(CharSequence2,
+//                         CharTest,
+//                         ValuesIn(l.begin(), l.end()));
+//
+template <typename ForwardIterator>
+internal::ParamGenerator<
+  typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
+ValuesIn(ForwardIterator begin, ForwardIterator end) {
+  typedef typename ::testing::internal::IteratorTraits<ForwardIterator>
+      ::value_type ParamType;
+  return internal::ParamGenerator<ParamType>(
+      new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end));
+}
+
+template <typename T, size_t N>
+internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) {
+  return ValuesIn(array, array + N);
+}
+
+template <class Container>
+internal::ParamGenerator<typename Container::value_type> ValuesIn(
+    const Container& container) {
+  return ValuesIn(container.begin(), container.end());
+}
+
+// Values() allows generating tests from explicitly specified list of
+// parameters.
+//
+// Synopsis:
+// Values(T v1, T v2, ..., T vN)
+//   - returns a generator producing sequences with elements v1, v2, ..., vN.
+//
+// For example, this instantiates tests from test case BarTest each
+// with values "one", "two", and "three":
+//
+// INSTANTIATE_TEST_CASE_P(NumSequence, BarTest, Values("one", "two", "three"));
+//
+// This instantiates tests from test case BazTest each with values 1, 2, 3.5.
+// The exact type of values will depend on the type of parameter in BazTest.
+//
+// INSTANTIATE_TEST_CASE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5));
+//
+// Currently, Values() supports from 1 to 50 parameters.
+//
+template <typename T1>
+internal::ValueArray1<T1> Values(T1 v1) {
+  return internal::ValueArray1<T1>(v1);
+}
+
+template <typename T1, typename T2>
+internal::ValueArray2<T1, T2> Values(T1 v1, T2 v2) {
+  return internal::ValueArray2<T1, T2>(v1, v2);
+}
+
+template <typename T1, typename T2, typename T3>
+internal::ValueArray3<T1, T2, T3> Values(T1 v1, T2 v2, T3 v3) {
+  return internal::ValueArray3<T1, T2, T3>(v1, v2, v3);
+}
+
+template <typename T1, typename T2, typename T3, typename T4>
+internal::ValueArray4<T1, T2, T3, T4> Values(T1 v1, T2 v2, T3 v3, T4 v4) {
+  return internal::ValueArray4<T1, T2, T3, T4>(v1, v2, v3, v4);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+internal::ValueArray5<T1, T2, T3, T4, T5> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+    T5 v5) {
+  return internal::ValueArray5<T1, T2, T3, T4, T5>(v1, v2, v3, v4, v5);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6>
+internal::ValueArray6<T1, T2, T3, T4, T5, T6> Values(T1 v1, T2 v2, T3 v3,
+    T4 v4, T5 v5, T6 v6) {
+  return internal::ValueArray6<T1, T2, T3, T4, T5, T6>(v1, v2, v3, v4, v5, v6);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7>
+internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7> Values(T1 v1, T2 v2, T3 v3,
+    T4 v4, T5 v5, T6 v6, T7 v7) {
+  return internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7>(v1, v2, v3, v4, v5,
+      v6, v7);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8>
+internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8> Values(T1 v1, T2 v2,
+    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) {
+  return internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8>(v1, v2, v3, v4,
+      v5, v6, v7, v8);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9>
+internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9> Values(T1 v1, T2 v2,
+    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) {
+  return internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(v1, v2, v3,
+      v4, v5, v6, v7, v8, v9);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10>
+internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> Values(T1 v1,
+    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) {
+  return internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>(v1,
+      v2, v3, v4, v5, v6, v7, v8, v9, v10);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11>
+internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
+    T11> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11) {
+  return internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
+      T11>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12>
+internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+    T12> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12) {
+  return internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13>
+internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+    T13> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13) {
+  return internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14>
+internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) {
+  return internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
+      v14);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15>
+internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
+    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) {
+  return internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+      v13, v14, v15);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16>
+internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16) {
+  return internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
+      v12, v13, v14, v15, v16);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17>
+internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17) {
+  return internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
+      v11, v12, v13, v14, v15, v16, v17);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18>
+internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
+    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17, T18 v18) {
+  return internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
+      v10, v11, v12, v13, v14, v15, v16, v17, v18);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19>
+internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
+    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
+    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19) {
+  return internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19>(v1, v2, v3, v4, v5, v6, v7, v8,
+      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20>
+internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20) {
+  return internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20>(v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21>
+internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21) {
+  return internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21>(v1, v2, v3, v4, v5, v6,
+      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22>
+internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22> Values(T1 v1, T2 v2, T3 v3,
+    T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+    T21 v21, T22 v22) {
+  return internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22>(v1, v2, v3, v4,
+      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+      v20, v21, v22);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23>
+internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> Values(T1 v1, T2 v2,
+    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+    T21 v21, T22 v22, T23 v23) {
+  return internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23>(v1, v2, v3,
+      v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+      v20, v21, v22, v23);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24>
+internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Values(T1 v1, T2 v2,
+    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+    T21 v21, T22 v22, T23 v23, T24 v24) {
+  return internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24>(v1, v2,
+      v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
+      v19, v20, v21, v22, v23, v24);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25>
+internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Values(T1 v1,
+    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
+    T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
+    T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25) {
+  return internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25>(v1,
+      v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
+      v18, v19, v20, v21, v22, v23, v24, v25);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26>
+internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+    T26> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26) {
+  return internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
+      v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27>
+internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+    T27> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26, T27 v27) {
+  return internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
+      v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28>
+internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+    T28> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26, T27 v27, T28 v28) {
+  return internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
+      v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
+      v28);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29>
+internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26, T27 v27, T28 v28, T29 v29) {
+  return internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+      v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
+      v27, v28, v29);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30>
+internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
+    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
+    T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
+    T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) {
+  return internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
+      v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
+      v26, v27, v28, v29, v30);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31>
+internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) {
+  return internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
+      v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
+      v25, v26, v27, v28, v29, v30, v31);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32>
+internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+    T32 v32) {
+  return internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
+      v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
+      v24, v25, v26, v27, v28, v29, v30, v31, v32);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33>
+internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
+    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+    T32 v32, T33 v33) {
+  return internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33>(v1, v2, v3, v4, v5, v6, v7, v8,
+      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
+      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34>
+internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
+    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
+    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
+    T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
+    T31 v31, T32 v32, T33 v33, T34 v34) {
+  return internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34>(v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
+      v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35>
+internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
+    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
+    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35) {
+  return internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35>(v1, v2, v3, v4, v5, v6,
+      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
+      v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36>
+internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
+    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
+    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36) {
+  return internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36>(v1, v2, v3, v4,
+      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
+      v34, v35, v36);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37>
+internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37> Values(T1 v1, T2 v2, T3 v3,
+    T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
+    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
+    T37 v37) {
+  return internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37>(v1, v2, v3,
+      v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
+      v34, v35, v36, v37);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38>
+internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Values(T1 v1, T2 v2,
+    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
+    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
+    T37 v37, T38 v38) {
+  return internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38>(v1, v2,
+      v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
+      v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32,
+      v33, v34, v35, v36, v37, v38);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39>
+internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Values(T1 v1, T2 v2,
+    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
+    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
+    T37 v37, T38 v38, T39 v39) {
+  return internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39>(v1,
+      v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
+      v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
+      v32, v33, v34, v35, v36, v37, v38, v39);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40>
+internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Values(T1 v1,
+    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
+    T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
+    T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27,
+    T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35,
+    T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) {
+  return internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
+      v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29,
+      v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41>
+internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+    T41> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41) {
+  return internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
+      v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28,
+      v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42>
+internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+    T42> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+    T42 v42) {
+  return internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
+      v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
+      v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41,
+      v42);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43>
+internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+    T43> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+    T42 v42, T43 v43) {
+  return internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+      v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
+      v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40,
+      v41, v42, v43);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44>
+internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+    T42 v42, T43 v43, T44 v44) {
+  return internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43, T44>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
+      v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
+      v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39,
+      v40, v41, v42, v43, v44);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45>
+internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44, T45> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
+    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
+    T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
+    T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
+    T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
+    T41 v41, T42 v42, T43 v43, T44 v44, T45 v45) {
+  return internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43, T44, T45>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
+      v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
+      v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38,
+      v39, v40, v41, v42, v43, v44, v45);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46>
+internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44, T45, T46> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
+    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) {
+  return internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43, T44, T45, T46>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
+      v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
+      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
+      v38, v39, v40, v41, v42, v43, v44, v45, v46);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47>
+internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44, T45, T46, T47> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
+    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) {
+  return internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43, T44, T45, T46, T47>(v1, v2, v3, v4, v5, v6, v7, v8,
+      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
+      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
+      v38, v39, v40, v41, v42, v43, v44, v45, v46, v47);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48>
+internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44, T45, T46, T47, T48> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
+    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
+    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47,
+    T48 v48) {
+  return internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43, T44, T45, T46, T47, T48>(v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
+      v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36,
+      v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49>
+internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44, T45, T46, T47, T48, T49> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
+    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
+    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
+    T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
+    T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38,
+    T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46,
+    T47 v47, T48 v48, T49 v49) {
+  return internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43, T44, T45, T46, T47, T48, T49>(v1, v2, v3, v4, v5, v6,
+      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
+      v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35,
+      v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49, typename T50>
+internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44, T45, T46, T47, T48, T49, T50> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
+    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
+    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37,
+    T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45,
+    T46 v46, T47 v47, T48 v48, T49 v49, T50 v50) {
+  return internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>(v1, v2, v3, v4,
+      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
+      v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47,
+      v48, v49, v50);
+}
+
+// Bool() allows generating tests with parameters in a set of (false, true).
+//
+// Synopsis:
+// Bool()
+//   - returns a generator producing sequences with elements {false, true}.
+//
+// It is useful when testing code that depends on Boolean flags. Combinations
+// of multiple flags can be tested when several Bool()'s are combined using
+// Combine() function.
+//
+// In the following example all tests in the test case FlagDependentTest
+// will be instantiated twice with parameters false and true.
+//
+// class FlagDependentTest : public testing::TestWithParam<bool> {
+//   virtual void SetUp() {
+//     external_flag = GetParam();
+//   }
+// }
+// INSTANTIATE_TEST_CASE_P(BoolSequence, FlagDependentTest, Bool());
+//
+inline internal::ParamGenerator<bool> Bool() {
+  return Values(false, true);
+}
+
+# if GTEST_HAS_COMBINE
+// Combine() allows the user to combine two or more sequences to produce
+// values of a Cartesian product of those sequences' elements.
+//
+// Synopsis:
+// Combine(gen1, gen2, ..., genN)
+//   - returns a generator producing sequences with elements coming from
+//     the Cartesian product of elements from the sequences generated by
+//     gen1, gen2, ..., genN. The sequence elements will have a type of
+//     tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types
+//     of elements from sequences produces by gen1, gen2, ..., genN.
+//
+// Combine can have up to 10 arguments. This number is currently limited
+// by the maximum number of elements in the tuple implementation used by Google
+// Test.
+//
+// Example:
+//
+// This will instantiate tests in test case AnimalTest each one with
+// the parameter values tuple("cat", BLACK), tuple("cat", WHITE),
+// tuple("dog", BLACK), and tuple("dog", WHITE):
+//
+// enum Color { BLACK, GRAY, WHITE };
+// class AnimalTest
+//     : public testing::TestWithParam<tuple<const char*, Color> > {...};
+//
+// TEST_P(AnimalTest, AnimalLooksNice) {...}
+//
+// INSTANTIATE_TEST_CASE_P(AnimalVariations, AnimalTest,
+//                         Combine(Values("cat", "dog"),
+//                                 Values(BLACK, WHITE)));
+//
+// This will instantiate tests in FlagDependentTest with all variations of two
+// Boolean flags:
+//
+// class FlagDependentTest
+//     : public testing::TestWithParam<tuple<bool, bool> > {
+//   virtual void SetUp() {
+//     // Assigns external_flag_1 and external_flag_2 values from the tuple.
+//     tie(external_flag_1, external_flag_2) = GetParam();
+//   }
+// };
+//
+// TEST_P(FlagDependentTest, TestFeature1) {
+//   // Test your code using external_flag_1 and external_flag_2 here.
+// }
+// INSTANTIATE_TEST_CASE_P(TwoBoolSequence, FlagDependentTest,
+//                         Combine(Bool(), Bool()));
+//
+template <typename Generator1, typename Generator2>
+internal::CartesianProductHolder2<Generator1, Generator2> Combine(
+    const Generator1& g1, const Generator2& g2) {
+  return internal::CartesianProductHolder2<Generator1, Generator2>(
+      g1, g2);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3>
+internal::CartesianProductHolder3<Generator1, Generator2, Generator3> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3) {
+  return internal::CartesianProductHolder3<Generator1, Generator2, Generator3>(
+      g1, g2, g3);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+    typename Generator4>
+internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
+    Generator4> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3,
+        const Generator4& g4) {
+  return internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
+      Generator4>(
+      g1, g2, g3, g4);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+    typename Generator4, typename Generator5>
+internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
+    Generator4, Generator5> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3,
+        const Generator4& g4, const Generator5& g5) {
+  return internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
+      Generator4, Generator5>(
+      g1, g2, g3, g4, g5);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+    typename Generator4, typename Generator5, typename Generator6>
+internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
+    Generator4, Generator5, Generator6> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3,
+        const Generator4& g4, const Generator5& g5, const Generator6& g6) {
+  return internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
+      Generator4, Generator5, Generator6>(
+      g1, g2, g3, g4, g5, g6);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+    typename Generator4, typename Generator5, typename Generator6,
+    typename Generator7>
+internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
+    Generator4, Generator5, Generator6, Generator7> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3,
+        const Generator4& g4, const Generator5& g5, const Generator6& g6,
+        const Generator7& g7) {
+  return internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
+      Generator4, Generator5, Generator6, Generator7>(
+      g1, g2, g3, g4, g5, g6, g7);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+    typename Generator4, typename Generator5, typename Generator6,
+    typename Generator7, typename Generator8>
+internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
+    Generator4, Generator5, Generator6, Generator7, Generator8> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3,
+        const Generator4& g4, const Generator5& g5, const Generator6& g6,
+        const Generator7& g7, const Generator8& g8) {
+  return internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
+      Generator4, Generator5, Generator6, Generator7, Generator8>(
+      g1, g2, g3, g4, g5, g6, g7, g8);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+    typename Generator4, typename Generator5, typename Generator6,
+    typename Generator7, typename Generator8, typename Generator9>
+internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
+    Generator4, Generator5, Generator6, Generator7, Generator8,
+    Generator9> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3,
+        const Generator4& g4, const Generator5& g5, const Generator6& g6,
+        const Generator7& g7, const Generator8& g8, const Generator9& g9) {
+  return internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
+      Generator4, Generator5, Generator6, Generator7, Generator8, Generator9>(
+      g1, g2, g3, g4, g5, g6, g7, g8, g9);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+    typename Generator4, typename Generator5, typename Generator6,
+    typename Generator7, typename Generator8, typename Generator9,
+    typename Generator10>
+internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
+    Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
+    Generator10> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3,
+        const Generator4& g4, const Generator5& g5, const Generator6& g6,
+        const Generator7& g7, const Generator8& g8, const Generator9& g9,
+        const Generator10& g10) {
+  return internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
+      Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
+      Generator10>(
+      g1, g2, g3, g4, g5, g6, g7, g8, g9, g10);
+}
+# endif  // GTEST_HAS_COMBINE
+
+# define TEST_P(test_case_name, test_name) \
+  class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
+      : public test_case_name { \
+   public: \
+    GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \
+    virtual void TestBody(); \
+   private: \
+    static int AddToRegistry() { \
+      ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
+          GetTestCasePatternHolder<test_case_name>(\
+              #test_case_name, \
+              ::testing::internal::CodeLocation(\
+                  __FILE__, __LINE__))->AddTestPattern(\
+                      GTEST_STRINGIFY_(test_case_name), \
+                      GTEST_STRINGIFY_(test_name), \
+                      new ::testing::internal::TestMetaFactory< \
+                          GTEST_TEST_CLASS_NAME_(\
+                              test_case_name, test_name)>()); \
+      return 0; \
+    } \
+    static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_; \
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(\
+        GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
+  }; \
+  int GTEST_TEST_CLASS_NAME_(test_case_name, \
+                             test_name)::gtest_registering_dummy_ = \
+      GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
+  void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()
+
+// The optional last argument to INSTANTIATE_TEST_CASE_P allows the user
+// to specify a function or functor that generates custom test name suffixes
+// based on the test parameters. The function should accept one argument of
+// type testing::TestParamInfo<class ParamType>, and return std::string.
+//
+// testing::PrintToStringParamName is a builtin test suffix generator that
+// returns the value of testing::PrintToString(GetParam()).
+//
+// Note: test names must be non-empty, unique, and may only contain ASCII
+// alphanumeric characters or underscore. Because PrintToString adds quotes
+// to std::string and C strings, it won't work for these types.
+
+# define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator, ...) \
+  static ::testing::internal::ParamGenerator<test_case_name::ParamType> \
+      gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \
+  static ::std::string gtest_##prefix##test_case_name##_EvalGenerateName_( \
+      const ::testing::TestParamInfo<test_case_name::ParamType>& info) { \
+    return ::testing::internal::GetParamNameGen<test_case_name::ParamType> \
+        (__VA_ARGS__)(info); \
+  } \
+  static int gtest_##prefix##test_case_name##_dummy_ GTEST_ATTRIBUTE_UNUSED_ = \
+      ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
+          GetTestCasePatternHolder<test_case_name>(\
+              #test_case_name, \
+              ::testing::internal::CodeLocation(\
+                  __FILE__, __LINE__))->AddTestCaseInstantiation(\
+                      #prefix, \
+                      &gtest_##prefix##test_case_name##_EvalGenerator_, \
+                      &gtest_##prefix##test_case_name##_EvalGenerateName_, \
+                      __FILE__, __LINE__)
+
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+//
+// Google C++ Testing and Mocking Framework definitions useful in production code.
+// GOOGLETEST_CM0003 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_
+#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_
+
+// When you need to test the private or protected members of a class,
+// use the FRIEND_TEST macro to declare your tests as friends of the
+// class.  For example:
+//
+// class MyClass {
+//  private:
+//   void PrivateMethod();
+//   FRIEND_TEST(MyClassTest, PrivateMethodWorks);
+// };
+//
+// class MyClassTest : public testing::Test {
+//   // ...
+// };
+//
+// TEST_F(MyClassTest, PrivateMethodWorks) {
+//   // Can call MyClass::PrivateMethod() here.
+// }
+//
+// Note: The test class must be in the same namespace as the class being tested.
+// For example, putting MyClassTest in an anonymous namespace will not work.
+
+#define FRIEND_TEST(test_case_name, test_name)\
+friend class test_case_name##_##test_name##_Test
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_PROD_H_
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+#define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+
+#include <iosfwd>
+#include <vector>
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+namespace testing {
+
+// A copyable object representing the result of a test part (i.e. an
+// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()).
+//
+// Don't inherit from TestPartResult as its destructor is not virtual.
+class GTEST_API_ TestPartResult {
+ public:
+  // The possible outcomes of a test part (i.e. an assertion or an
+  // explicit SUCCEED(), FAIL(), or ADD_FAILURE()).
+  enum Type {
+    kSuccess,          // Succeeded.
+    kNonFatalFailure,  // Failed but the test can continue.
+    kFatalFailure      // Failed and the test should be terminated.
+  };
+
+  // C'tor.  TestPartResult does NOT have a default constructor.
+  // Always use this constructor (with parameters) to create a
+  // TestPartResult object.
+  TestPartResult(Type a_type,
+                 const char* a_file_name,
+                 int a_line_number,
+                 const char* a_message)
+      : type_(a_type),
+        file_name_(a_file_name == NULL ? "" : a_file_name),
+        line_number_(a_line_number),
+        summary_(ExtractSummary(a_message)),
+        message_(a_message) {
+  }
+
+  // Gets the outcome of the test part.
+  Type type() const { return type_; }
+
+  // Gets the name of the source file where the test part took place, or
+  // NULL if it's unknown.
+  const char* file_name() const {
+    return file_name_.empty() ? NULL : file_name_.c_str();
+  }
+
+  // Gets the line in the source file where the test part took place,
+  // or -1 if it's unknown.
+  int line_number() const { return line_number_; }
+
+  // Gets the summary of the failure message.
+  const char* summary() const { return summary_.c_str(); }
+
+  // Gets the message associated with the test part.
+  const char* message() const { return message_.c_str(); }
+
+  // Returns true iff the test part passed.
+  bool passed() const { return type_ == kSuccess; }
+
+  // Returns true iff the test part failed.
+  bool failed() const { return type_ != kSuccess; }
+
+  // Returns true iff the test part non-fatally failed.
+  bool nonfatally_failed() const { return type_ == kNonFatalFailure; }
+
+  // Returns true iff the test part fatally failed.
+  bool fatally_failed() const { return type_ == kFatalFailure; }
+
+ private:
+  Type type_;
+
+  // Gets the summary of the failure message by omitting the stack
+  // trace in it.
+  static std::string ExtractSummary(const char* message);
+
+  // The name of the source file where the test part took place, or
+  // "" if the source file is unknown.
+  std::string file_name_;
+  // The line in the source file where the test part took place, or -1
+  // if the line number is unknown.
+  int line_number_;
+  std::string summary_;  // The test failure summary.
+  std::string message_;  // The test failure message.
+};
+
+// Prints a TestPartResult object.
+std::ostream& operator<<(std::ostream& os, const TestPartResult& result);
+
+// An array of TestPartResult objects.
+//
+// Don't inherit from TestPartResultArray as its destructor is not
+// virtual.
+class GTEST_API_ TestPartResultArray {
+ public:
+  TestPartResultArray() {}
+
+  // Appends the given TestPartResult to the array.
+  void Append(const TestPartResult& result);
+
+  // Returns the TestPartResult at the given index (0-based).
+  const TestPartResult& GetTestPartResult(int index) const;
+
+  // Returns the number of TestPartResult objects in the array.
+  int size() const;
+
+ private:
+  std::vector<TestPartResult> array_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray);
+};
+
+// This interface knows how to report a test part result.
+class GTEST_API_ TestPartResultReporterInterface {
+ public:
+  virtual ~TestPartResultReporterInterface() {}
+
+  virtual void ReportTestPartResult(const TestPartResult& result) = 0;
+};
+
+namespace internal {
+
+// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a
+// statement generates new fatal failures. To do so it registers itself as the
+// current test part result reporter. Besides checking if fatal failures were
+// reported, it only delegates the reporting to the former result reporter.
+// The original result reporter is restored in the destructor.
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+class GTEST_API_ HasNewFatalFailureHelper
+    : public TestPartResultReporterInterface {
+ public:
+  HasNewFatalFailureHelper();
+  virtual ~HasNewFatalFailureHelper();
+  virtual void ReportTestPartResult(const TestPartResult& result);
+  bool has_new_fatal_failure() const { return has_new_fatal_failure_; }
+ private:
+  bool has_new_fatal_failure_;
+  TestPartResultReporterInterface* original_reporter_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper);
+};
+
+}  // namespace internal
+
+}  // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+#define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+
+// This header implements typed tests and type-parameterized tests.
+
+// Typed (aka type-driven) tests repeat the same test for types in a
+// list.  You must know which types you want to test with when writing
+// typed tests. Here's how you do it:
+
+#if 0
+
+// First, define a fixture class template.  It should be parameterized
+// by a type.  Remember to derive it from testing::Test.
+template <typename T>
+class FooTest : public testing::Test {
+ public:
+  ...
+  typedef std::list<T> List;
+  static T shared_;
+  T value_;
+};
+
+// Next, associate a list of types with the test case, which will be
+// repeated for each type in the list.  The typedef is necessary for
+// the macro to parse correctly.
+typedef testing::Types<char, int, unsigned int> MyTypes;
+TYPED_TEST_CASE(FooTest, MyTypes);
+
+// If the type list contains only one type, you can write that type
+// directly without Types<...>:
+//   TYPED_TEST_CASE(FooTest, int);
+
+// Then, use TYPED_TEST() instead of TEST_F() to define as many typed
+// tests for this test case as you want.
+TYPED_TEST(FooTest, DoesBlah) {
+  // Inside a test, refer to TypeParam to get the type parameter.
+  // Since we are inside a derived class template, C++ requires use to
+  // visit the members of FooTest via 'this'.
+  TypeParam n = this->value_;
+
+  // To visit static members of the fixture, add the TestFixture::
+  // prefix.
+  n += TestFixture::shared_;
+
+  // To refer to typedefs in the fixture, add the "typename
+  // TestFixture::" prefix.
+  typename TestFixture::List values;
+  values.push_back(n);
+  ...
+}
+
+TYPED_TEST(FooTest, HasPropertyA) { ... }
+
+// TYPED_TEST_CASE takes an optional third argument which allows to specify a
+// class that generates custom test name suffixes based on the type. This should
+// be a class which has a static template function GetName(int index) returning
+// a string for each type. The provided integer index equals the index of the
+// type in the provided type list. In many cases the index can be ignored.
+//
+// For example:
+//   class MyTypeNames {
+//    public:
+//     template <typename T>
+//     static std::string GetName(int) {
+//       if (std::is_same<T, char>()) return "char";
+//       if (std::is_same<T, int>()) return "int";
+//       if (std::is_same<T, unsigned int>()) return "unsignedInt";
+//     }
+//   };
+//   TYPED_TEST_CASE(FooTest, MyTypes, MyTypeNames);
+
+#endif  // 0
+
+// Type-parameterized tests are abstract test patterns parameterized
+// by a type.  Compared with typed tests, type-parameterized tests
+// allow you to define the test pattern without knowing what the type
+// parameters are.  The defined pattern can be instantiated with
+// different types any number of times, in any number of translation
+// units.
+//
+// If you are designing an interface or concept, you can define a
+// suite of type-parameterized tests to verify properties that any
+// valid implementation of the interface/concept should have.  Then,
+// each implementation can easily instantiate the test suite to verify
+// that it conforms to the requirements, without having to write
+// similar tests repeatedly.  Here's an example:
+
+#if 0
+
+// First, define a fixture class template.  It should be parameterized
+// by a type.  Remember to derive it from testing::Test.
+template <typename T>
+class FooTest : public testing::Test {
+  ...
+};
+
+// Next, declare that you will define a type-parameterized test case
+// (the _P suffix is for "parameterized" or "pattern", whichever you
+// prefer):
+TYPED_TEST_CASE_P(FooTest);
+
+// Then, use TYPED_TEST_P() to define as many type-parameterized tests
+// for this type-parameterized test case as you want.
+TYPED_TEST_P(FooTest, DoesBlah) {
+  // Inside a test, refer to TypeParam to get the type parameter.
+  TypeParam n = 0;
+  ...
+}
+
+TYPED_TEST_P(FooTest, HasPropertyA) { ... }
+
+// Now the tricky part: you need to register all test patterns before
+// you can instantiate them.  The first argument of the macro is the
+// test case name; the rest are the names of the tests in this test
+// case.
+REGISTER_TYPED_TEST_CASE_P(FooTest,
+                           DoesBlah, HasPropertyA);
+
+// Finally, you are free to instantiate the pattern with the types you
+// want.  If you put the above code in a header file, you can #include
+// it in multiple C++ source files and instantiate it multiple times.
+//
+// To distinguish different instances of the pattern, the first
+// argument to the INSTANTIATE_* macro is a prefix that will be added
+// to the actual test case name.  Remember to pick unique prefixes for
+// different instances.
+typedef testing::Types<char, int, unsigned int> MyTypes;
+INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes);
+
+// If the type list contains only one type, you can write that type
+// directly without Types<...>:
+//   INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int);
+//
+// Similar to the optional argument of TYPED_TEST_CASE above,
+// INSTANTIATE_TEST_CASE_P takes an optional fourth argument which allows to
+// generate custom names.
+//   INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes, MyTypeNames);
+
+#endif  // 0
+
+
+// Implements typed tests.
+
+#if GTEST_HAS_TYPED_TEST
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the name of the typedef for the type parameters of the
+// given test case.
+# define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_
+
+// Expands to the name of the typedef for the NameGenerator, responsible for
+// creating the suffixes of the name.
+#define GTEST_NAME_GENERATOR_(TestCaseName) \
+  gtest_type_params_##TestCaseName##_NameGenerator
+
+// The 'Types' template argument below must have spaces around it
+// since some compilers may choke on '>>' when passing a template
+// instance (e.g. Types<int>)
+# define TYPED_TEST_CASE(CaseName, Types, ...)                             \
+  typedef ::testing::internal::TypeList< Types >::type GTEST_TYPE_PARAMS_( \
+      CaseName);                                                           \
+  typedef ::testing::internal::NameGeneratorSelector<__VA_ARGS__>::type    \
+      GTEST_NAME_GENERATOR_(CaseName)
+
+# define TYPED_TEST(CaseName, TestName)                                       \
+  template <typename gtest_TypeParam_>                                        \
+  class GTEST_TEST_CLASS_NAME_(CaseName, TestName)                            \
+      : public CaseName<gtest_TypeParam_> {                                   \
+   private:                                                                   \
+    typedef CaseName<gtest_TypeParam_> TestFixture;                           \
+    typedef gtest_TypeParam_ TypeParam;                                       \
+    virtual void TestBody();                                                  \
+  };                                                                          \
+  static bool gtest_##CaseName##_##TestName##_registered_                     \
+        GTEST_ATTRIBUTE_UNUSED_ =                                             \
+      ::testing::internal::TypeParameterizedTest<                             \
+          CaseName,                                                           \
+          ::testing::internal::TemplateSel<GTEST_TEST_CLASS_NAME_(CaseName,   \
+                                                                  TestName)>, \
+          GTEST_TYPE_PARAMS_(                                                 \
+              CaseName)>::Register("",                                        \
+                                   ::testing::internal::CodeLocation(         \
+                                       __FILE__, __LINE__),                   \
+                                   #CaseName, #TestName, 0,                   \
+                                   ::testing::internal::GenerateNames<        \
+                                       GTEST_NAME_GENERATOR_(CaseName),       \
+                                       GTEST_TYPE_PARAMS_(CaseName)>());      \
+  template <typename gtest_TypeParam_>                                        \
+  void GTEST_TEST_CLASS_NAME_(CaseName,                                       \
+                              TestName)<gtest_TypeParam_>::TestBody()
+
+#endif  // GTEST_HAS_TYPED_TEST
+
+// Implements type-parameterized tests.
+
+#if GTEST_HAS_TYPED_TEST_P
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the namespace name that the type-parameterized tests for
+// the given type-parameterized test case are defined in.  The exact
+// name of the namespace is subject to change without notice.
+# define GTEST_CASE_NAMESPACE_(TestCaseName) \
+  gtest_case_##TestCaseName##_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the name of the variable used to remember the names of
+// the defined tests in the given test case.
+# define GTEST_TYPED_TEST_CASE_P_STATE_(TestCaseName) \
+  gtest_typed_test_case_p_state_##TestCaseName##_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY.
+//
+// Expands to the name of the variable used to remember the names of
+// the registered tests in the given test case.
+# define GTEST_REGISTERED_TEST_NAMES_(TestCaseName) \
+  gtest_registered_test_names_##TestCaseName##_
+
+// The variables defined in the type-parameterized test macros are
+// static as typically these macros are used in a .h file that can be
+// #included in multiple translation units linked together.
+# define TYPED_TEST_CASE_P(CaseName) \
+  static ::testing::internal::TypedTestCasePState \
+      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName)
+
+# define TYPED_TEST_P(CaseName, TestName) \
+  namespace GTEST_CASE_NAMESPACE_(CaseName) { \
+  template <typename gtest_TypeParam_> \
+  class TestName : public CaseName<gtest_TypeParam_> { \
+   private: \
+    typedef CaseName<gtest_TypeParam_> TestFixture; \
+    typedef gtest_TypeParam_ TypeParam; \
+    virtual void TestBody(); \
+  }; \
+  static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \
+      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).AddTestName(\
+          __FILE__, __LINE__, #CaseName, #TestName); \
+  } \
+  template <typename gtest_TypeParam_> \
+  void GTEST_CASE_NAMESPACE_(CaseName)::TestName<gtest_TypeParam_>::TestBody()
+
+# define REGISTER_TYPED_TEST_CASE_P(CaseName, ...) \
+  namespace GTEST_CASE_NAMESPACE_(CaseName) { \
+  typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \
+  } \
+  static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) \
+      GTEST_ATTRIBUTE_UNUSED_ = \
+          GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames( \
+              __FILE__, __LINE__, #__VA_ARGS__)
+
+// The 'Types' template argument below must have spaces around it
+// since some compilers may choke on '>>' when passing a template
+// instance (e.g. Types<int>)
+# define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types, ...)      \
+  static bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ =       \
+      ::testing::internal::TypeParameterizedTestCase<                     \
+          CaseName, GTEST_CASE_NAMESPACE_(CaseName)::gtest_AllTests_,     \
+          ::testing::internal::TypeList< Types >::type>::                 \
+          Register(#Prefix,                                               \
+                   ::testing::internal::CodeLocation(__FILE__, __LINE__), \
+                   &GTEST_TYPED_TEST_CASE_P_STATE_(CaseName), #CaseName,  \
+                   GTEST_REGISTERED_TEST_NAMES_(CaseName),                \
+                   ::testing::internal::GenerateNames<                    \
+                       ::testing::internal::NameGeneratorSelector<        \
+                           __VA_ARGS__>::type,                            \
+                       ::testing::internal::TypeList< Types >::type>())
+
+#endif  // GTEST_HAS_TYPED_TEST_P
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+// Depending on the platform, different string classes are available.
+// On Linux, in addition to ::std::string, Google also makes use of
+// class ::string, which has the same interface as ::std::string, but
+// has a different implementation.
+//
+// You can define GTEST_HAS_GLOBAL_STRING to 1 to indicate that
+// ::string is available AND is a distinct type to ::std::string, or
+// define it to 0 to indicate otherwise.
+//
+// If ::std::string and ::string are the same class on your platform
+// due to aliasing, you should define GTEST_HAS_GLOBAL_STRING to 0.
+//
+// If you do not define GTEST_HAS_GLOBAL_STRING, it is defined
+// heuristically.
+
+namespace testing {
+
+// Silence C4100 (unreferenced formal parameter) and 4805
+// unsafe mix of type 'const int' and type 'const bool'
+#ifdef _MSC_VER
+# pragma warning(push)
+# pragma warning(disable:4805)
+# pragma warning(disable:4100)
+#endif
+
+
+// Declares the flags.
+
+// This flag temporary enables the disabled tests.
+GTEST_DECLARE_bool_(also_run_disabled_tests);
+
+// This flag brings the debugger on an assertion failure.
+GTEST_DECLARE_bool_(break_on_failure);
+
+// This flag controls whether Google Test catches all test-thrown exceptions
+// and logs them as failures.
+GTEST_DECLARE_bool_(catch_exceptions);
+
+// This flag enables using colors in terminal output. Available values are
+// "yes" to enable colors, "no" (disable colors), or "auto" (the default)
+// to let Google Test decide.
+GTEST_DECLARE_string_(color);
+
+// This flag sets up the filter to select by name using a glob pattern
+// the tests to run. If the filter is not given all tests are executed.
+GTEST_DECLARE_string_(filter);
+
+// This flag controls whether Google Test installs a signal handler that dumps
+// debugging information when fatal signals are raised.
+GTEST_DECLARE_bool_(install_failure_signal_handler);
+
+// This flag causes the Google Test to list tests. None of the tests listed
+// are actually run if the flag is provided.
+GTEST_DECLARE_bool_(list_tests);
+
+// This flag controls whether Google Test emits a detailed XML report to a file
+// in addition to its normal textual output.
+GTEST_DECLARE_string_(output);
+
+// This flags control whether Google Test prints the elapsed time for each
+// test.
+GTEST_DECLARE_bool_(print_time);
+
+// This flags control whether Google Test prints UTF8 characters as text.
+GTEST_DECLARE_bool_(print_utf8);
+
+// This flag specifies the random number seed.
+GTEST_DECLARE_int32_(random_seed);
+
+// This flag sets how many times the tests are repeated. The default value
+// is 1. If the value is -1 the tests are repeating forever.
+GTEST_DECLARE_int32_(repeat);
+
+// This flag controls whether Google Test includes Google Test internal
+// stack frames in failure stack traces.
+GTEST_DECLARE_bool_(show_internal_stack_frames);
+
+// When this flag is specified, tests' order is randomized on every iteration.
+GTEST_DECLARE_bool_(shuffle);
+
+// This flag specifies the maximum number of stack frames to be
+// printed in a failure message.
+GTEST_DECLARE_int32_(stack_trace_depth);
+
+// When this flag is specified, a failed assertion will throw an
+// exception if exceptions are enabled, or exit the program with a
+// non-zero code otherwise. For use with an external test framework.
+GTEST_DECLARE_bool_(throw_on_failure);
+
+// When this flag is set with a "host:port" string, on supported
+// platforms test results are streamed to the specified port on
+// the specified host machine.
+GTEST_DECLARE_string_(stream_result_to);
+
+#if GTEST_USE_OWN_FLAGFILE_FLAG_
+GTEST_DECLARE_string_(flagfile);
+#endif  // GTEST_USE_OWN_FLAGFILE_FLAG_
+
+// The upper limit for valid stack trace depths.
+const int kMaxStackTraceDepth = 100;
+
+namespace internal {
+
+class AssertHelper;
+class DefaultGlobalTestPartResultReporter;
+class ExecDeathTest;
+class NoExecDeathTest;
+class FinalSuccessChecker;
+class GTestFlagSaver;
+class StreamingListenerTest;
+class TestResultAccessor;
+class TestEventListenersAccessor;
+class TestEventRepeater;
+class UnitTestRecordPropertyTestHelper;
+class WindowsDeathTest;
+class FuchsiaDeathTest;
+class UnitTestImpl* GetUnitTestImpl();
+void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
+                                    const std::string& message);
+
+}  // namespace internal
+
+// The friend relationship of some of these classes is cyclic.
+// If we don't forward declare them the compiler might confuse the classes
+// in friendship clauses with same named classes on the scope.
+class Test;
+class TestCase;
+class TestInfo;
+class UnitTest;
+
+// A class for indicating whether an assertion was successful.  When
+// the assertion wasn't successful, the AssertionResult object
+// remembers a non-empty message that describes how it failed.
+//
+// To create an instance of this class, use one of the factory functions
+// (AssertionSuccess() and AssertionFailure()).
+//
+// This class is useful for two purposes:
+//   1. Defining predicate functions to be used with Boolean test assertions
+//      EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts
+//   2. Defining predicate-format functions to be
+//      used with predicate assertions (ASSERT_PRED_FORMAT*, etc).
+//
+// For example, if you define IsEven predicate:
+//
+//   testing::AssertionResult IsEven(int n) {
+//     if ((n % 2) == 0)
+//       return testing::AssertionSuccess();
+//     else
+//       return testing::AssertionFailure() << n << " is odd";
+//   }
+//
+// Then the failed expectation EXPECT_TRUE(IsEven(Fib(5)))
+// will print the message
+//
+//   Value of: IsEven(Fib(5))
+//     Actual: false (5 is odd)
+//   Expected: true
+//
+// instead of a more opaque
+//
+//   Value of: IsEven(Fib(5))
+//     Actual: false
+//   Expected: true
+//
+// in case IsEven is a simple Boolean predicate.
+//
+// If you expect your predicate to be reused and want to support informative
+// messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up
+// about half as often as positive ones in our tests), supply messages for
+// both success and failure cases:
+//
+//   testing::AssertionResult IsEven(int n) {
+//     if ((n % 2) == 0)
+//       return testing::AssertionSuccess() << n << " is even";
+//     else
+//       return testing::AssertionFailure() << n << " is odd";
+//   }
+//
+// Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print
+//
+//   Value of: IsEven(Fib(6))
+//     Actual: true (8 is even)
+//   Expected: false
+//
+// NB: Predicates that support negative Boolean assertions have reduced
+// performance in positive ones so be careful not to use them in tests
+// that have lots (tens of thousands) of positive Boolean assertions.
+//
+// To use this class with EXPECT_PRED_FORMAT assertions such as:
+//
+//   // Verifies that Foo() returns an even number.
+//   EXPECT_PRED_FORMAT1(IsEven, Foo());
+//
+// you need to define:
+//
+//   testing::AssertionResult IsEven(const char* expr, int n) {
+//     if ((n % 2) == 0)
+//       return testing::AssertionSuccess();
+//     else
+//       return testing::AssertionFailure()
+//         << "Expected: " << expr << " is even\n  Actual: it's " << n;
+//   }
+//
+// If Foo() returns 5, you will see the following message:
+//
+//   Expected: Foo() is even
+//     Actual: it's 5
+//
+class GTEST_API_ AssertionResult {
+ public:
+  // Copy constructor.
+  // Used in EXPECT_TRUE/FALSE(assertion_result).
+  AssertionResult(const AssertionResult& other);
+
+#if defined(_MSC_VER) && _MSC_VER < 1910
+  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 /* forcing value to bool */)
+#endif
+
+  // Used in the EXPECT_TRUE/FALSE(bool_expression).
+  //
+  // T must be contextually convertible to bool.
+  //
+  // The second parameter prevents this overload from being considered if
+  // the argument is implicitly convertible to AssertionResult. In that case
+  // we want AssertionResult's copy constructor to be used.
+  template <typename T>
+  explicit AssertionResult(
+      const T& success,
+      typename internal::EnableIf<
+          !internal::ImplicitlyConvertible<T, AssertionResult>::value>::type*
+          /*enabler*/ = NULL)
+      : success_(success) {}
+
+#if defined(_MSC_VER) && _MSC_VER < 1910
+  GTEST_DISABLE_MSC_WARNINGS_POP_()
+#endif
+
+  // Assignment operator.
+  AssertionResult& operator=(AssertionResult other) {
+    swap(other);
+    return *this;
+  }
+
+  // Returns true iff the assertion succeeded.
+  operator bool() const { return success_; }  // NOLINT
+
+  // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
+  AssertionResult operator!() const;
+
+  // Returns the text streamed into this AssertionResult. Test assertions
+  // use it when they fail (i.e., the predicate's outcome doesn't match the
+  // assertion's expectation). When nothing has been streamed into the
+  // object, returns an empty string.
+  const char* message() const {
+    return message_.get() != NULL ?  message_->c_str() : "";
+  }
+  // FIXME: Remove this after making sure no clients use it.
+  // Deprecated; please use message() instead.
+  const char* failure_message() const { return message(); }
+
+  // Streams a custom failure message into this object.
+  template <typename T> AssertionResult& operator<<(const T& value) {
+    AppendMessage(Message() << value);
+    return *this;
+  }
+
+  // Allows streaming basic output manipulators such as endl or flush into
+  // this object.
+  AssertionResult& operator<<(
+      ::std::ostream& (*basic_manipulator)(::std::ostream& stream)) {
+    AppendMessage(Message() << basic_manipulator);
+    return *this;
+  }
+
+ private:
+  // Appends the contents of message to message_.
+  void AppendMessage(const Message& a_message) {
+    if (message_.get() == NULL)
+      message_.reset(new ::std::string);
+    message_->append(a_message.GetString().c_str());
+  }
+
+  // Swap the contents of this AssertionResult with other.
+  void swap(AssertionResult& other);
+
+  // Stores result of the assertion predicate.
+  bool success_;
+  // Stores the message describing the condition in case the expectation
+  // construct is not satisfied with the predicate's outcome.
+  // Referenced via a pointer to avoid taking too much stack frame space
+  // with test assertions.
+  internal::scoped_ptr< ::std::string> message_;
+};
+
+// Makes a successful assertion result.
+GTEST_API_ AssertionResult AssertionSuccess();
+
+// Makes a failed assertion result.
+GTEST_API_ AssertionResult AssertionFailure();
+
+// Makes a failed assertion result with the given failure message.
+// Deprecated; use AssertionFailure() << msg.
+GTEST_API_ AssertionResult AssertionFailure(const Message& msg);
+
+}  // namespace testing
+
+// Includes the auto-generated header that implements a family of generic
+// predicate assertion macros. This include comes late because it relies on
+// APIs declared above.
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is AUTOMATICALLY GENERATED on 01/02/2018 by command
+// 'gen_gtest_pred_impl.py 5'.  DO NOT EDIT BY HAND!
+//
+// Implements a family of generic predicate assertion macros.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+
+
+namespace testing {
+
+// This header implements a family of generic predicate assertion
+// macros:
+//
+//   ASSERT_PRED_FORMAT1(pred_format, v1)
+//   ASSERT_PRED_FORMAT2(pred_format, v1, v2)
+//   ...
+//
+// where pred_format is a function or functor that takes n (in the
+// case of ASSERT_PRED_FORMATn) values and their source expression
+// text, and returns a testing::AssertionResult.  See the definition
+// of ASSERT_EQ in gtest.h for an example.
+//
+// If you don't care about formatting, you can use the more
+// restrictive version:
+//
+//   ASSERT_PRED1(pred, v1)
+//   ASSERT_PRED2(pred, v1, v2)
+//   ...
+//
+// where pred is an n-ary function or functor that returns bool,
+// and the values v1, v2, ..., must support the << operator for
+// streaming to std::ostream.
+//
+// We also define the EXPECT_* variations.
+//
+// For now we only support predicates whose arity is at most 5.
+
+// GTEST_ASSERT_ is the basic statement to which all of the assertions
+// in this file reduce.  Don't use this in your code.
+
+#define GTEST_ASSERT_(expression, on_failure) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (const ::testing::AssertionResult gtest_ar = (expression)) \
+    ; \
+  else \
+    on_failure(gtest_ar.failure_message())
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED1.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1>
+AssertionResult AssertPred1Helper(const char* pred_text,
+                                  const char* e1,
+                                  Pred pred,
+                                  const T1& v1) {
+  if (pred(v1)) return AssertionSuccess();
+
+  return AssertionFailure() << pred_text << "("
+                            << e1 << ") evaluates to false, where"
+                            << "\n" << e1 << " evaluates to " << v1;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, v1), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED1.  Don't use
+// this in your code.
+#define GTEST_PRED1_(pred, v1, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \
+                                             #v1, \
+                                             pred, \
+                                             v1), on_failure)
+
+// Unary predicate assertion macros.
+#define EXPECT_PRED_FORMAT1(pred_format, v1) \
+  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED1(pred, v1) \
+  GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT1(pred_format, v1) \
+  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED1(pred, v1) \
+  GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED2.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2>
+AssertionResult AssertPred2Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2) {
+  if (pred(v1, v2)) return AssertionSuccess();
+
+  return AssertionFailure() << pred_text << "("
+                            << e1 << ", "
+                            << e2 << ") evaluates to false, where"
+                            << "\n" << e1 << " evaluates to " << v1
+                            << "\n" << e2 << " evaluates to " << v2;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED2.  Don't use
+// this in your code.
+#define GTEST_PRED2_(pred, v1, v2, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             pred, \
+                                             v1, \
+                                             v2), on_failure)
+
+// Binary predicate assertion macros.
+#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \
+  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED2(pred, v1, v2) \
+  GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \
+  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED2(pred, v1, v2) \
+  GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED3.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3>
+AssertionResult AssertPred3Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3) {
+  if (pred(v1, v2, v3)) return AssertionSuccess();
+
+  return AssertionFailure() << pred_text << "("
+                            << e1 << ", "
+                            << e2 << ", "
+                            << e3 << ") evaluates to false, where"
+                            << "\n" << e1 << " evaluates to " << v1
+                            << "\n" << e2 << " evaluates to " << v2
+                            << "\n" << e3 << " evaluates to " << v3;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED3.  Don't use
+// this in your code.
+#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3), on_failure)
+
+// Ternary predicate assertion macros.
+#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \
+  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED3(pred, v1, v2, v3) \
+  GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \
+  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED3(pred, v1, v2, v3) \
+  GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED4.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3,
+          typename T4>
+AssertionResult AssertPred4Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  const char* e4,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3,
+                                  const T4& v4) {
+  if (pred(v1, v2, v3, v4)) return AssertionSuccess();
+
+  return AssertionFailure() << pred_text << "("
+                            << e1 << ", "
+                            << e2 << ", "
+                            << e3 << ", "
+                            << e4 << ") evaluates to false, where"
+                            << "\n" << e1 << " evaluates to " << v1
+                            << "\n" << e2 << " evaluates to " << v2
+                            << "\n" << e3 << " evaluates to " << v3
+                            << "\n" << e4 << " evaluates to " << v4;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED4.  Don't use
+// this in your code.
+#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             #v4, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3, \
+                                             v4), on_failure)
+
+// 4-ary predicate assertion macros.
+#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
+  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED4(pred, v1, v2, v3, v4) \
+  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
+  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED4(pred, v1, v2, v3, v4) \
+  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED5.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3,
+          typename T4,
+          typename T5>
+AssertionResult AssertPred5Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  const char* e4,
+                                  const char* e5,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3,
+                                  const T4& v4,
+                                  const T5& v5) {
+  if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess();
+
+  return AssertionFailure() << pred_text << "("
+                            << e1 << ", "
+                            << e2 << ", "
+                            << e3 << ", "
+                            << e4 << ", "
+                            << e5 << ") evaluates to false, where"
+                            << "\n" << e1 << " evaluates to " << v1
+                            << "\n" << e2 << " evaluates to " << v2
+                            << "\n" << e3 << " evaluates to " << v3
+                            << "\n" << e4 << " evaluates to " << v4
+                            << "\n" << e5 << " evaluates to " << v5;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED5.  Don't use
+// this in your code.
+#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             #v4, \
+                                             #v5, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3, \
+                                             v4, \
+                                             v5), on_failure)
+
+// 5-ary predicate assertion macros.
+#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
+  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \
+  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
+  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \
+  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
+
+
+
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+
+namespace testing {
+
+// The abstract class that all tests inherit from.
+//
+// In Google Test, a unit test program contains one or many TestCases, and
+// each TestCase contains one or many Tests.
+//
+// When you define a test using the TEST macro, you don't need to
+// explicitly derive from Test - the TEST macro automatically does
+// this for you.
+//
+// The only time you derive from Test is when defining a test fixture
+// to be used in a TEST_F.  For example:
+//
+//   class FooTest : public testing::Test {
+//    protected:
+//     void SetUp() override { ... }
+//     void TearDown() override { ... }
+//     ...
+//   };
+//
+//   TEST_F(FooTest, Bar) { ... }
+//   TEST_F(FooTest, Baz) { ... }
+//
+// Test is not copyable.
+class GTEST_API_ Test {
+ public:
+  friend class TestInfo;
+
+  // Defines types for pointers to functions that set up and tear down
+  // a test case.
+  typedef internal::SetUpTestCaseFunc SetUpTestCaseFunc;
+  typedef internal::TearDownTestCaseFunc TearDownTestCaseFunc;
+
+  // The d'tor is virtual as we intend to inherit from Test.
+  virtual ~Test();
+
+  // Sets up the stuff shared by all tests in this test case.
+  //
+  // Google Test will call Foo::SetUpTestCase() before running the first
+  // test in test case Foo.  Hence a sub-class can define its own
+  // SetUpTestCase() method to shadow the one defined in the super
+  // class.
+  static void SetUpTestCase() {}
+
+  // Tears down the stuff shared by all tests in this test case.
+  //
+  // Google Test will call Foo::TearDownTestCase() after running the last
+  // test in test case Foo.  Hence a sub-class can define its own
+  // TearDownTestCase() method to shadow the one defined in the super
+  // class.
+  static void TearDownTestCase() {}
+
+  // Returns true iff the current test has a fatal failure.
+  static bool HasFatalFailure();
+
+  // Returns true iff the current test has a non-fatal failure.
+  static bool HasNonfatalFailure();
+
+  // Returns true iff the current test has a (either fatal or
+  // non-fatal) failure.
+  static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); }
+
+  // Logs a property for the current test, test case, or for the entire
+  // invocation of the test program when used outside of the context of a
+  // test case.  Only the last value for a given key is remembered.  These
+  // are public static so they can be called from utility functions that are
+  // not members of the test fixture.  Calls to RecordProperty made during
+  // lifespan of the test (from the moment its constructor starts to the
+  // moment its destructor finishes) will be output in XML as attributes of
+  // the <testcase> element.  Properties recorded from fixture's
+  // SetUpTestCase or TearDownTestCase are logged as attributes of the
+  // corresponding <testsuite> element.  Calls to RecordProperty made in the
+  // global context (before or after invocation of RUN_ALL_TESTS and from
+  // SetUp/TearDown method of Environment objects registered with Google
+  // Test) will be output as attributes of the <testsuites> element.
+  static void RecordProperty(const std::string& key, const std::string& value);
+  static void RecordProperty(const std::string& key, int value);
+
+ protected:
+  // Creates a Test object.
+  Test();
+
+  // Sets up the test fixture.
+  virtual void SetUp();
+
+  // Tears down the test fixture.
+  virtual void TearDown();
+
+ private:
+  // Returns true iff the current test has the same fixture class as
+  // the first test in the current test case.
+  static bool HasSameFixtureClass();
+
+  // Runs the test after the test fixture has been set up.
+  //
+  // A sub-class must implement this to define the test logic.
+  //
+  // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM.
+  // Instead, use the TEST or TEST_F macro.
+  virtual void TestBody() = 0;
+
+  // Sets up, executes, and tears down the test.
+  void Run();
+
+  // Deletes self.  We deliberately pick an unusual name for this
+  // internal method to avoid clashing with names used in user TESTs.
+  void DeleteSelf_() { delete this; }
+
+  const internal::scoped_ptr< GTEST_FLAG_SAVER_ > gtest_flag_saver_;
+
+  // Often a user misspells SetUp() as Setup() and spends a long time
+  // wondering why it is never called by Google Test.  The declaration of
+  // the following method is solely for catching such an error at
+  // compile time:
+  //
+  //   - The return type is deliberately chosen to be not void, so it
+  //   will be a conflict if void Setup() is declared in the user's
+  //   test fixture.
+  //
+  //   - This method is private, so it will be another compiler error
+  //   if the method is called from the user's test fixture.
+  //
+  // DO NOT OVERRIDE THIS FUNCTION.
+  //
+  // If you see an error about overriding the following function or
+  // about it being private, you have mis-spelled SetUp() as Setup().
+  struct Setup_should_be_spelled_SetUp {};
+  virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; }
+
+  // We disallow copying Tests.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Test);
+};
+
+typedef internal::TimeInMillis TimeInMillis;
+
+// A copyable object representing a user specified test property which can be
+// output as a key/value string pair.
+//
+// Don't inherit from TestProperty as its destructor is not virtual.
+class TestProperty {
+ public:
+  // C'tor.  TestProperty does NOT have a default constructor.
+  // Always use this constructor (with parameters) to create a
+  // TestProperty object.
+  TestProperty(const std::string& a_key, const std::string& a_value) :
+    key_(a_key), value_(a_value) {
+  }
+
+  // Gets the user supplied key.
+  const char* key() const {
+    return key_.c_str();
+  }
+
+  // Gets the user supplied value.
+  const char* value() const {
+    return value_.c_str();
+  }
+
+  // Sets a new value, overriding the one supplied in the constructor.
+  void SetValue(const std::string& new_value) {
+    value_ = new_value;
+  }
+
+ private:
+  // The key supplied by the user.
+  std::string key_;
+  // The value supplied by the user.
+  std::string value_;
+};
+
+// The result of a single Test.  This includes a list of
+// TestPartResults, a list of TestProperties, a count of how many
+// death tests there are in the Test, and how much time it took to run
+// the Test.
+//
+// TestResult is not copyable.
+class GTEST_API_ TestResult {
+ public:
+  // Creates an empty TestResult.
+  TestResult();
+
+  // D'tor.  Do not inherit from TestResult.
+  ~TestResult();
+
+  // Gets the number of all test parts.  This is the sum of the number
+  // of successful test parts and the number of failed test parts.
+  int total_part_count() const;
+
+  // Returns the number of the test properties.
+  int test_property_count() const;
+
+  // Returns true iff the test passed (i.e. no test part failed).
+  bool Passed() const { return !Failed(); }
+
+  // Returns true iff the test failed.
+  bool Failed() const;
+
+  // Returns true iff the test fatally failed.
+  bool HasFatalFailure() const;
+
+  // Returns true iff the test has a non-fatal failure.
+  bool HasNonfatalFailure() const;
+
+  // Returns the elapsed time, in milliseconds.
+  TimeInMillis elapsed_time() const { return elapsed_time_; }
+
+  // Returns the i-th test part result among all the results. i can range from 0
+  // to total_part_count() - 1. If i is not in that range, aborts the program.
+  const TestPartResult& GetTestPartResult(int i) const;
+
+  // Returns the i-th test property. i can range from 0 to
+  // test_property_count() - 1. If i is not in that range, aborts the
+  // program.
+  const TestProperty& GetTestProperty(int i) const;
+
+ private:
+  friend class TestInfo;
+  friend class TestCase;
+  friend class UnitTest;
+  friend class internal::DefaultGlobalTestPartResultReporter;
+  friend class internal::ExecDeathTest;
+  friend class internal::TestResultAccessor;
+  friend class internal::UnitTestImpl;
+  friend class internal::WindowsDeathTest;
+  friend class internal::FuchsiaDeathTest;
+
+  // Gets the vector of TestPartResults.
+  const std::vector<TestPartResult>& test_part_results() const {
+    return test_part_results_;
+  }
+
+  // Gets the vector of TestProperties.
+  const std::vector<TestProperty>& test_properties() const {
+    return test_properties_;
+  }
+
+  // Sets the elapsed time.
+  void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; }
+
+  // Adds a test property to the list. The property is validated and may add
+  // a non-fatal failure if invalid (e.g., if it conflicts with reserved
+  // key names). If a property is already recorded for the same key, the
+  // value will be updated, rather than storing multiple values for the same
+  // key.  xml_element specifies the element for which the property is being
+  // recorded and is used for validation.
+  void RecordProperty(const std::string& xml_element,
+                      const TestProperty& test_property);
+
+  // Adds a failure if the key is a reserved attribute of Google Test
+  // testcase tags.  Returns true if the property is valid.
+  // FIXME: Validate attribute names are legal and human readable.
+  static bool ValidateTestProperty(const std::string& xml_element,
+                                   const TestProperty& test_property);
+
+  // Adds a test part result to the list.
+  void AddTestPartResult(const TestPartResult& test_part_result);
+
+  // Returns the death test count.
+  int death_test_count() const { return death_test_count_; }
+
+  // Increments the death test count, returning the new count.
+  int increment_death_test_count() { return ++death_test_count_; }
+
+  // Clears the test part results.
+  void ClearTestPartResults();
+
+  // Clears the object.
+  void Clear();
+
+  // Protects mutable state of the property vector and of owned
+  // properties, whose values may be updated.
+  internal::Mutex test_properites_mutex_;
+
+  // The vector of TestPartResults
+  std::vector<TestPartResult> test_part_results_;
+  // The vector of TestProperties
+  std::vector<TestProperty> test_properties_;
+  // Running count of death tests.
+  int death_test_count_;
+  // The elapsed time, in milliseconds.
+  TimeInMillis elapsed_time_;
+
+  // We disallow copying TestResult.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestResult);
+};  // class TestResult
+
+// A TestInfo object stores the following information about a test:
+//
+//   Test case name
+//   Test name
+//   Whether the test should be run
+//   A function pointer that creates the test object when invoked
+//   Test result
+//
+// The constructor of TestInfo registers itself with the UnitTest
+// singleton such that the RUN_ALL_TESTS() macro knows which tests to
+// run.
+class GTEST_API_ TestInfo {
+ public:
+  // Destructs a TestInfo object.  This function is not virtual, so
+  // don't inherit from TestInfo.
+  ~TestInfo();
+
+  // Returns the test case name.
+  const char* test_case_name() const { return test_case_name_.c_str(); }
+
+  // Returns the test name.
+  const char* name() const { return name_.c_str(); }
+
+  // Returns the name of the parameter type, or NULL if this is not a typed
+  // or a type-parameterized test.
+  const char* type_param() const {
+    if (type_param_.get() != NULL)
+      return type_param_->c_str();
+    return NULL;
+  }
+
+  // Returns the text representation of the value parameter, or NULL if this
+  // is not a value-parameterized test.
+  const char* value_param() const {
+    if (value_param_.get() != NULL)
+      return value_param_->c_str();
+    return NULL;
+  }
+
+  // Returns the file name where this test is defined.
+  const char* file() const { return location_.file.c_str(); }
+
+  // Returns the line where this test is defined.
+  int line() const { return location_.line; }
+
+  // Return true if this test should not be run because it's in another shard.
+  bool is_in_another_shard() const { return is_in_another_shard_; }
+
+  // Returns true if this test should run, that is if the test is not
+  // disabled (or it is disabled but the also_run_disabled_tests flag has
+  // been specified) and its full name matches the user-specified filter.
+  //
+  // Google Test allows the user to filter the tests by their full names.
+  // The full name of a test Bar in test case Foo is defined as
+  // "Foo.Bar".  Only the tests that match the filter will run.
+  //
+  // A filter is a colon-separated list of glob (not regex) patterns,
+  // optionally followed by a '-' and a colon-separated list of
+  // negative patterns (tests to exclude).  A test is run if it
+  // matches one of the positive patterns and does not match any of
+  // the negative patterns.
+  //
+  // For example, *A*:Foo.* is a filter that matches any string that
+  // contains the character 'A' or starts with "Foo.".
+  bool should_run() const { return should_run_; }
+
+  // Returns true iff this test will appear in the XML report.
+  bool is_reportable() const {
+    // The XML report includes tests matching the filter, excluding those
+    // run in other shards.
+    return matches_filter_ && !is_in_another_shard_;
+  }
+
+  // Returns the result of the test.
+  const TestResult* result() const { return &result_; }
+
+ private:
+#if GTEST_HAS_DEATH_TEST
+  friend class internal::DefaultDeathTestFactory;
+#endif  // GTEST_HAS_DEATH_TEST
+  friend class Test;
+  friend class TestCase;
+  friend class internal::UnitTestImpl;
+  friend class internal::StreamingListenerTest;
+  friend TestInfo* internal::MakeAndRegisterTestInfo(
+      const char* test_case_name,
+      const char* name,
+      const char* type_param,
+      const char* value_param,
+      internal::CodeLocation code_location,
+      internal::TypeId fixture_class_id,
+      Test::SetUpTestCaseFunc set_up_tc,
+      Test::TearDownTestCaseFunc tear_down_tc,
+      internal::TestFactoryBase* factory);
+
+  // Constructs a TestInfo object. The newly constructed instance assumes
+  // ownership of the factory object.
+  TestInfo(const std::string& test_case_name,
+           const std::string& name,
+           const char* a_type_param,   // NULL if not a type-parameterized test
+           const char* a_value_param,  // NULL if not a value-parameterized test
+           internal::CodeLocation a_code_location,
+           internal::TypeId fixture_class_id,
+           internal::TestFactoryBase* factory);
+
+  // Increments the number of death tests encountered in this test so
+  // far.
+  int increment_death_test_count() {
+    return result_.increment_death_test_count();
+  }
+
+  // Creates the test object, runs it, records its result, and then
+  // deletes it.
+  void Run();
+
+  static void ClearTestResult(TestInfo* test_info) {
+    test_info->result_.Clear();
+  }
+
+  // These fields are immutable properties of the test.
+  const std::string test_case_name_;     // Test case name
+  const std::string name_;               // Test name
+  // Name of the parameter type, or NULL if this is not a typed or a
+  // type-parameterized test.
+  const internal::scoped_ptr<const ::std::string> type_param_;
+  // Text representation of the value parameter, or NULL if this is not a
+  // value-parameterized test.
+  const internal::scoped_ptr<const ::std::string> value_param_;
+  internal::CodeLocation location_;
+  const internal::TypeId fixture_class_id_;   // ID of the test fixture class
+  bool should_run_;                 // True iff this test should run
+  bool is_disabled_;                // True iff this test is disabled
+  bool matches_filter_;             // True if this test matches the
+                                    // user-specified filter.
+  bool is_in_another_shard_;        // Will be run in another shard.
+  internal::TestFactoryBase* const factory_;  // The factory that creates
+                                              // the test object
+
+  // This field is mutable and needs to be reset before running the
+  // test for the second time.
+  TestResult result_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfo);
+};
+
+// A test case, which consists of a vector of TestInfos.
+//
+// TestCase is not copyable.
+class GTEST_API_ TestCase {
+ public:
+  // Creates a TestCase with the given name.
+  //
+  // TestCase does NOT have a default constructor.  Always use this
+  // constructor to create a TestCase object.
+  //
+  // Arguments:
+  //
+  //   name:         name of the test case
+  //   a_type_param: the name of the test's type parameter, or NULL if
+  //                 this is not a type-parameterized test.
+  //   set_up_tc:    pointer to the function that sets up the test case
+  //   tear_down_tc: pointer to the function that tears down the test case
+  TestCase(const char* name, const char* a_type_param,
+           Test::SetUpTestCaseFunc set_up_tc,
+           Test::TearDownTestCaseFunc tear_down_tc);
+
+  // Destructor of TestCase.
+  virtual ~TestCase();
+
+  // Gets the name of the TestCase.
+  const char* name() const { return name_.c_str(); }
+
+  // Returns the name of the parameter type, or NULL if this is not a
+  // type-parameterized test case.
+  const char* type_param() const {
+    if (type_param_.get() != NULL)
+      return type_param_->c_str();
+    return NULL;
+  }
+
+  // Returns true if any test in this test case should run.
+  bool should_run() const { return should_run_; }
+
+  // Gets the number of successful tests in this test case.
+  int successful_test_count() const;
+
+  // Gets the number of failed tests in this test case.
+  int failed_test_count() const;
+
+  // Gets the number of disabled tests that will be reported in the XML report.
+  int reportable_disabled_test_count() const;
+
+  // Gets the number of disabled tests in this test case.
+  int disabled_test_count() const;
+
+  // Gets the number of tests to be printed in the XML report.
+  int reportable_test_count() const;
+
+  // Get the number of tests in this test case that should run.
+  int test_to_run_count() const;
+
+  // Gets the number of all tests in this test case.
+  int total_test_count() const;
+
+  // Returns true iff the test case passed.
+  bool Passed() const { return !Failed(); }
+
+  // Returns true iff the test case failed.
+  bool Failed() const { return failed_test_count() > 0; }
+
+  // Returns the elapsed time, in milliseconds.
+  TimeInMillis elapsed_time() const { return elapsed_time_; }
+
+  // Returns the i-th test among all the tests. i can range from 0 to
+  // total_test_count() - 1. If i is not in that range, returns NULL.
+  const TestInfo* GetTestInfo(int i) const;
+
+  // Returns the TestResult that holds test properties recorded during
+  // execution of SetUpTestCase and TearDownTestCase.
+  const TestResult& ad_hoc_test_result() const { return ad_hoc_test_result_; }
+
+ private:
+  friend class Test;
+  friend class internal::UnitTestImpl;
+
+  // Gets the (mutable) vector of TestInfos in this TestCase.
+  std::vector<TestInfo*>& test_info_list() { return test_info_list_; }
+
+  // Gets the (immutable) vector of TestInfos in this TestCase.
+  const std::vector<TestInfo*>& test_info_list() const {
+    return test_info_list_;
+  }
+
+  // Returns the i-th test among all the tests. i can range from 0 to
+  // total_test_count() - 1. If i is not in that range, returns NULL.
+  TestInfo* GetMutableTestInfo(int i);
+
+  // Sets the should_run member.
+  void set_should_run(bool should) { should_run_ = should; }
+
+  // Adds a TestInfo to this test case.  Will delete the TestInfo upon
+  // destruction of the TestCase object.
+  void AddTestInfo(TestInfo * test_info);
+
+  // Clears the results of all tests in this test case.
+  void ClearResult();
+
+  // Clears the results of all tests in the given test case.
+  static void ClearTestCaseResult(TestCase* test_case) {
+    test_case->ClearResult();
+  }
+
+  // Runs every test in this TestCase.
+  void Run();
+
+  // Runs SetUpTestCase() for this TestCase.  This wrapper is needed
+  // for catching exceptions thrown from SetUpTestCase().
+  void RunSetUpTestCase() { (*set_up_tc_)(); }
+
+  // Runs TearDownTestCase() for this TestCase.  This wrapper is
+  // needed for catching exceptions thrown from TearDownTestCase().
+  void RunTearDownTestCase() { (*tear_down_tc_)(); }
+
+  // Returns true iff test passed.
+  static bool TestPassed(const TestInfo* test_info) {
+    return test_info->should_run() && test_info->result()->Passed();
+  }
+
+  // Returns true iff test failed.
+  static bool TestFailed(const TestInfo* test_info) {
+    return test_info->should_run() && test_info->result()->Failed();
+  }
+
+  // Returns true iff the test is disabled and will be reported in the XML
+  // report.
+  static bool TestReportableDisabled(const TestInfo* test_info) {
+    return test_info->is_reportable() && test_info->is_disabled_;
+  }
+
+  // Returns true iff test is disabled.
+  static bool TestDisabled(const TestInfo* test_info) {
+    return test_info->is_disabled_;
+  }
+
+  // Returns true iff this test will appear in the XML report.
+  static bool TestReportable(const TestInfo* test_info) {
+    return test_info->is_reportable();
+  }
+
+  // Returns true if the given test should run.
+  static bool ShouldRunTest(const TestInfo* test_info) {
+    return test_info->should_run();
+  }
+
+  // Shuffles the tests in this test case.
+  void ShuffleTests(internal::Random* random);
+
+  // Restores the test order to before the first shuffle.
+  void UnshuffleTests();
+
+  // Name of the test case.
+  std::string name_;
+  // Name of the parameter type, or NULL if this is not a typed or a
+  // type-parameterized test.
+  const internal::scoped_ptr<const ::std::string> type_param_;
+  // The vector of TestInfos in their original order.  It owns the
+  // elements in the vector.
+  std::vector<TestInfo*> test_info_list_;
+  // Provides a level of indirection for the test list to allow easy
+  // shuffling and restoring the test order.  The i-th element in this
+  // vector is the index of the i-th test in the shuffled test list.
+  std::vector<int> test_indices_;
+  // Pointer to the function that sets up the test case.
+  Test::SetUpTestCaseFunc set_up_tc_;
+  // Pointer to the function that tears down the test case.
+  Test::TearDownTestCaseFunc tear_down_tc_;
+  // True iff any test in this test case should run.
+  bool should_run_;
+  // Elapsed time, in milliseconds.
+  TimeInMillis elapsed_time_;
+  // Holds test properties recorded during execution of SetUpTestCase and
+  // TearDownTestCase.
+  TestResult ad_hoc_test_result_;
+
+  // We disallow copying TestCases.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestCase);
+};
+
+// An Environment object is capable of setting up and tearing down an
+// environment.  You should subclass this to define your own
+// environment(s).
+//
+// An Environment object does the set-up and tear-down in virtual
+// methods SetUp() and TearDown() instead of the constructor and the
+// destructor, as:
+//
+//   1. You cannot safely throw from a destructor.  This is a problem
+//      as in some cases Google Test is used where exceptions are enabled, and
+//      we may want to implement ASSERT_* using exceptions where they are
+//      available.
+//   2. You cannot use ASSERT_* directly in a constructor or
+//      destructor.
+class Environment {
+ public:
+  // The d'tor is virtual as we need to subclass Environment.
+  virtual ~Environment() {}
+
+  // Override this to define how to set up the environment.
+  virtual void SetUp() {}
+
+  // Override this to define how to tear down the environment.
+  virtual void TearDown() {}
+ private:
+  // If you see an error about overriding the following function or
+  // about it being private, you have mis-spelled SetUp() as Setup().
+  struct Setup_should_be_spelled_SetUp {};
+  virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; }
+};
+
+#if GTEST_HAS_EXCEPTIONS
+
+// Exception which can be thrown from TestEventListener::OnTestPartResult.
+class GTEST_API_ AssertionException
+    : public internal::GoogleTestFailureException {
+ public:
+  explicit AssertionException(const TestPartResult& result)
+      : GoogleTestFailureException(result) {}
+};
+
+#endif  // GTEST_HAS_EXCEPTIONS
+
+// The interface for tracing execution of tests. The methods are organized in
+// the order the corresponding events are fired.
+class TestEventListener {
+ public:
+  virtual ~TestEventListener() {}
+
+  // Fired before any test activity starts.
+  virtual void OnTestProgramStart(const UnitTest& unit_test) = 0;
+
+  // Fired before each iteration of tests starts.  There may be more than
+  // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration
+  // index, starting from 0.
+  virtual void OnTestIterationStart(const UnitTest& unit_test,
+                                    int iteration) = 0;
+
+  // Fired before environment set-up for each iteration of tests starts.
+  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0;
+
+  // Fired after environment set-up for each iteration of tests ends.
+  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0;
+
+  // Fired before the test case starts.
+  virtual void OnTestCaseStart(const TestCase& test_case) = 0;
+
+  // Fired before the test starts.
+  virtual void OnTestStart(const TestInfo& test_info) = 0;
+
+  // Fired after a failed assertion or a SUCCEED() invocation.
+  // If you want to throw an exception from this function to skip to the next
+  // TEST, it must be AssertionException defined above, or inherited from it.
+  virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0;
+
+  // Fired after the test ends.
+  virtual void OnTestEnd(const TestInfo& test_info) = 0;
+
+  // Fired after the test case ends.
+  virtual void OnTestCaseEnd(const TestCase& test_case) = 0;
+
+  // Fired before environment tear-down for each iteration of tests starts.
+  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0;
+
+  // Fired after environment tear-down for each iteration of tests ends.
+  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0;
+
+  // Fired after each iteration of tests finishes.
+  virtual void OnTestIterationEnd(const UnitTest& unit_test,
+                                  int iteration) = 0;
+
+  // Fired after all test activities have ended.
+  virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0;
+};
+
+// The convenience class for users who need to override just one or two
+// methods and are not concerned that a possible change to a signature of
+// the methods they override will not be caught during the build.  For
+// comments about each method please see the definition of TestEventListener
+// above.
+class EmptyTestEventListener : public TestEventListener {
+ public:
+  virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
+  virtual void OnTestIterationStart(const UnitTest& /*unit_test*/,
+                                    int /*iteration*/) {}
+  virtual void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) {}
+  virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
+  virtual void OnTestCaseStart(const TestCase& /*test_case*/) {}
+  virtual void OnTestStart(const TestInfo& /*test_info*/) {}
+  virtual void OnTestPartResult(const TestPartResult& /*test_part_result*/) {}
+  virtual void OnTestEnd(const TestInfo& /*test_info*/) {}
+  virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {}
+  virtual void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) {}
+  virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
+  virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/,
+                                  int /*iteration*/) {}
+  virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}
+};
+
+// TestEventListeners lets users add listeners to track events in Google Test.
+class GTEST_API_ TestEventListeners {
+ public:
+  TestEventListeners();
+  ~TestEventListeners();
+
+  // Appends an event listener to the end of the list. Google Test assumes
+  // the ownership of the listener (i.e. it will delete the listener when
+  // the test program finishes).
+  void Append(TestEventListener* listener);
+
+  // Removes the given event listener from the list and returns it.  It then
+  // becomes the caller's responsibility to delete the listener. Returns
+  // NULL if the listener is not found in the list.
+  TestEventListener* Release(TestEventListener* listener);
+
+  // Returns the standard listener responsible for the default console
+  // output.  Can be removed from the listeners list to shut down default
+  // console output.  Note that removing this object from the listener list
+  // with Release transfers its ownership to the caller and makes this
+  // function return NULL the next time.
+  TestEventListener* default_result_printer() const {
+    return default_result_printer_;
+  }
+
+  // Returns the standard listener responsible for the default XML output
+  // controlled by the --gtest_output=xml flag.  Can be removed from the
+  // listeners list by users who want to shut down the default XML output
+  // controlled by this flag and substitute it with custom one.  Note that
+  // removing this object from the listener list with Release transfers its
+  // ownership to the caller and makes this function return NULL the next
+  // time.
+  TestEventListener* default_xml_generator() const {
+    return default_xml_generator_;
+  }
+
+ private:
+  friend class TestCase;
+  friend class TestInfo;
+  friend class internal::DefaultGlobalTestPartResultReporter;
+  friend class internal::NoExecDeathTest;
+  friend class internal::TestEventListenersAccessor;
+  friend class internal::UnitTestImpl;
+
+  // Returns repeater that broadcasts the TestEventListener events to all
+  // subscribers.
+  TestEventListener* repeater();
+
+  // Sets the default_result_printer attribute to the provided listener.
+  // The listener is also added to the listener list and previous
+  // default_result_printer is removed from it and deleted. The listener can
+  // also be NULL in which case it will not be added to the list. Does
+  // nothing if the previous and the current listener objects are the same.
+  void SetDefaultResultPrinter(TestEventListener* listener);
+
+  // Sets the default_xml_generator attribute to the provided listener.  The
+  // listener is also added to the listener list and previous
+  // default_xml_generator is removed from it and deleted. The listener can
+  // also be NULL in which case it will not be added to the list. Does
+  // nothing if the previous and the current listener objects are the same.
+  void SetDefaultXmlGenerator(TestEventListener* listener);
+
+  // Controls whether events will be forwarded by the repeater to the
+  // listeners in the list.
+  bool EventForwardingEnabled() const;
+  void SuppressEventForwarding();
+
+  // The actual list of listeners.
+  internal::TestEventRepeater* repeater_;
+  // Listener responsible for the standard result output.
+  TestEventListener* default_result_printer_;
+  // Listener responsible for the creation of the XML output file.
+  TestEventListener* default_xml_generator_;
+
+  // We disallow copying TestEventListeners.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventListeners);
+};
+
+// A UnitTest consists of a vector of TestCases.
+//
+// This is a singleton class.  The only instance of UnitTest is
+// created when UnitTest::GetInstance() is first called.  This
+// instance is never deleted.
+//
+// UnitTest is not copyable.
+//
+// This class is thread-safe as long as the methods are called
+// according to their specification.
+class GTEST_API_ UnitTest {
+ public:
+  // Gets the singleton UnitTest object.  The first time this method
+  // is called, a UnitTest object is constructed and returned.
+  // Consecutive calls will return the same object.
+  static UnitTest* GetInstance();
+
+  // Runs all tests in this UnitTest object and prints the result.
+  // Returns 0 if successful, or 1 otherwise.
+  //
+  // This method can only be called from the main thread.
+  //
+  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+  int Run() GTEST_MUST_USE_RESULT_;
+
+  // Returns the working directory when the first TEST() or TEST_F()
+  // was executed.  The UnitTest object owns the string.
+  const char* original_working_dir() const;
+
+  // Returns the TestCase object for the test that's currently running,
+  // or NULL if no test is running.
+  const TestCase* current_test_case() const
+      GTEST_LOCK_EXCLUDED_(mutex_);
+
+  // Returns the TestInfo object for the test that's currently running,
+  // or NULL if no test is running.
+  const TestInfo* current_test_info() const
+      GTEST_LOCK_EXCLUDED_(mutex_);
+
+  // Returns the random seed used at the start of the current test run.
+  int random_seed() const;
+
+  // Returns the ParameterizedTestCaseRegistry object used to keep track of
+  // value-parameterized tests and instantiate and register them.
+  //
+  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+  internal::ParameterizedTestCaseRegistry& parameterized_test_registry()
+      GTEST_LOCK_EXCLUDED_(mutex_);
+
+  // Gets the number of successful test cases.
+  int successful_test_case_count() const;
+
+  // Gets the number of failed test cases.
+  int failed_test_case_count() const;
+
+  // Gets the number of all test cases.
+  int total_test_case_count() const;
+
+  // Gets the number of all test cases that contain at least one test
+  // that should run.
+  int test_case_to_run_count() const;
+
+  // Gets the number of successful tests.
+  int successful_test_count() const;
+
+  // Gets the number of failed tests.
+  int failed_test_count() const;
+
+  // Gets the number of disabled tests that will be reported in the XML report.
+  int reportable_disabled_test_count() const;
+
+  // Gets the number of disabled tests.
+  int disabled_test_count() const;
+
+  // Gets the number of tests to be printed in the XML report.
+  int reportable_test_count() const;
+
+  // Gets the number of all tests.
+  int total_test_count() const;
+
+  // Gets the number of tests that should run.
+  int test_to_run_count() const;
+
+  // Gets the time of the test program start, in ms from the start of the
+  // UNIX epoch.
+  TimeInMillis start_timestamp() const;
+
+  // Gets the elapsed time, in milliseconds.
+  TimeInMillis elapsed_time() const;
+
+  // Returns true iff the unit test passed (i.e. all test cases passed).
+  bool Passed() const;
+
+  // Returns true iff the unit test failed (i.e. some test case failed
+  // or something outside of all tests failed).
+  bool Failed() const;
+
+  // Gets the i-th test case among all the test cases. i can range from 0 to
+  // total_test_case_count() - 1. If i is not in that range, returns NULL.
+  const TestCase* GetTestCase(int i) const;
+
+  // Returns the TestResult containing information on test failures and
+  // properties logged outside of individual test cases.
+  const TestResult& ad_hoc_test_result() const;
+
+  // Returns the list of event listeners that can be used to track events
+  // inside Google Test.
+  TestEventListeners& listeners();
+
+ private:
+  // Registers and returns a global test environment.  When a test
+  // program is run, all global test environments will be set-up in
+  // the order they were registered.  After all tests in the program
+  // have finished, all global test environments will be torn-down in
+  // the *reverse* order they were registered.
+  //
+  // The UnitTest object takes ownership of the given environment.
+  //
+  // This method can only be called from the main thread.
+  Environment* AddEnvironment(Environment* env);
+
+  // Adds a TestPartResult to the current TestResult object.  All
+  // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc)
+  // eventually call this to report their results.  The user code
+  // should use the assertion macros instead of calling this directly.
+  void AddTestPartResult(TestPartResult::Type result_type,
+                         const char* file_name,
+                         int line_number,
+                         const std::string& message,
+                         const std::string& os_stack_trace)
+      GTEST_LOCK_EXCLUDED_(mutex_);
+
+  // Adds a TestProperty to the current TestResult object when invoked from
+  // inside a test, to current TestCase's ad_hoc_test_result_ when invoked
+  // from SetUpTestCase or TearDownTestCase, or to the global property set
+  // when invoked elsewhere.  If the result already contains a property with
+  // the same key, the value will be updated.
+  void RecordProperty(const std::string& key, const std::string& value);
+
+  // Gets the i-th test case among all the test cases. i can range from 0 to
+  // total_test_case_count() - 1. If i is not in that range, returns NULL.
+  TestCase* GetMutableTestCase(int i);
+
+  // Accessors for the implementation object.
+  internal::UnitTestImpl* impl() { return impl_; }
+  const internal::UnitTestImpl* impl() const { return impl_; }
+
+  // These classes and functions are friends as they need to access private
+  // members of UnitTest.
+  friend class ScopedTrace;
+  friend class Test;
+  friend class internal::AssertHelper;
+  friend class internal::StreamingListenerTest;
+  friend class internal::UnitTestRecordPropertyTestHelper;
+  friend Environment* AddGlobalTestEnvironment(Environment* env);
+  friend internal::UnitTestImpl* internal::GetUnitTestImpl();
+  friend void internal::ReportFailureInUnknownLocation(
+      TestPartResult::Type result_type,
+      const std::string& message);
+
+  // Creates an empty UnitTest.
+  UnitTest();
+
+  // D'tor
+  virtual ~UnitTest();
+
+  // Pushes a trace defined by SCOPED_TRACE() on to the per-thread
+  // Google Test trace stack.
+  void PushGTestTrace(const internal::TraceInfo& trace)
+      GTEST_LOCK_EXCLUDED_(mutex_);
+
+  // Pops a trace from the per-thread Google Test trace stack.
+  void PopGTestTrace()
+      GTEST_LOCK_EXCLUDED_(mutex_);
+
+  // Protects mutable state in *impl_.  This is mutable as some const
+  // methods need to lock it too.
+  mutable internal::Mutex mutex_;
+
+  // Opaque implementation object.  This field is never changed once
+  // the object is constructed.  We don't mark it as const here, as
+  // doing so will cause a warning in the constructor of UnitTest.
+  // Mutable state in *impl_ is protected by mutex_.
+  internal::UnitTestImpl* impl_;
+
+  // We disallow copying UnitTest.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTest);
+};
+
+// A convenient wrapper for adding an environment for the test
+// program.
+//
+// You should call this before RUN_ALL_TESTS() is called, probably in
+// main().  If you use gtest_main, you need to call this before main()
+// starts for it to take effect.  For example, you can define a global
+// variable like this:
+//
+//   testing::Environment* const foo_env =
+//       testing::AddGlobalTestEnvironment(new FooEnvironment);
+//
+// However, we strongly recommend you to write your own main() and
+// call AddGlobalTestEnvironment() there, as relying on initialization
+// of global variables makes the code harder to read and may cause
+// problems when you register multiple environments from different
+// translation units and the environments have dependencies among them
+// (remember that the compiler doesn't guarantee the order in which
+// global variables from different translation units are initialized).
+inline Environment* AddGlobalTestEnvironment(Environment* env) {
+  return UnitTest::GetInstance()->AddEnvironment(env);
+}
+
+// Initializes Google Test.  This must be called before calling
+// RUN_ALL_TESTS().  In particular, it parses a command line for the
+// flags that Google Test recognizes.  Whenever a Google Test flag is
+// seen, it is removed from argv, and *argc is decremented.
+//
+// No value is returned.  Instead, the Google Test flag variables are
+// updated.
+//
+// Calling the function for the second time has no user-visible effect.
+GTEST_API_ void InitGoogleTest(int* argc, char** argv);
+
+// This overloaded version can be used in Windows programs compiled in
+// UNICODE mode.
+GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv);
+
+namespace internal {
+
+// Separate the error generating code from the code path to reduce the stack
+// frame size of CmpHelperEQ. This helps reduce the overhead of some sanitizers
+// when calling EXPECT_* in a tight loop.
+template <typename T1, typename T2>
+AssertionResult CmpHelperEQFailure(const char* lhs_expression,
+                                   const char* rhs_expression,
+                                   const T1& lhs, const T2& rhs) {
+  return EqFailure(lhs_expression,
+                   rhs_expression,
+                   FormatForComparisonFailureMessage(lhs, rhs),
+                   FormatForComparisonFailureMessage(rhs, lhs),
+                   false);
+}
+
+// The helper function for {ASSERT|EXPECT}_EQ.
+template <typename T1, typename T2>
+AssertionResult CmpHelperEQ(const char* lhs_expression,
+                            const char* rhs_expression,
+                            const T1& lhs,
+                            const T2& rhs) {
+  if (lhs == rhs) {
+    return AssertionSuccess();
+  }
+
+  return CmpHelperEQFailure(lhs_expression, rhs_expression, lhs, rhs);
+}
+
+// With this overloaded version, we allow anonymous enums to be used
+// in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous enums
+// can be implicitly cast to BiggestInt.
+GTEST_API_ AssertionResult CmpHelperEQ(const char* lhs_expression,
+                                       const char* rhs_expression,
+                                       BiggestInt lhs,
+                                       BiggestInt rhs);
+
+// The helper class for {ASSERT|EXPECT}_EQ.  The template argument
+// lhs_is_null_literal is true iff the first argument to ASSERT_EQ()
+// is a null pointer literal.  The following default implementation is
+// for lhs_is_null_literal being false.
+template <bool lhs_is_null_literal>
+class EqHelper {
+ public:
+  // This templatized version is for the general case.
+  template <typename T1, typename T2>
+  static AssertionResult Compare(const char* lhs_expression,
+                                 const char* rhs_expression,
+                                 const T1& lhs,
+                                 const T2& rhs) {
+    return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
+  }
+
+  // With this overloaded version, we allow anonymous enums to be used
+  // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous
+  // enums can be implicitly cast to BiggestInt.
+  //
+  // Even though its body looks the same as the above version, we
+  // cannot merge the two, as it will make anonymous enums unhappy.
+  static AssertionResult Compare(const char* lhs_expression,
+                                 const char* rhs_expression,
+                                 BiggestInt lhs,
+                                 BiggestInt rhs) {
+    return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
+  }
+};
+
+// This specialization is used when the first argument to ASSERT_EQ()
+// is a null pointer literal, like NULL, false, or 0.
+template <>
+class EqHelper<true> {
+ public:
+  // We define two overloaded versions of Compare().  The first
+  // version will be picked when the second argument to ASSERT_EQ() is
+  // NOT a pointer, e.g. ASSERT_EQ(0, AnIntFunction()) or
+  // EXPECT_EQ(false, a_bool).
+  template <typename T1, typename T2>
+  static AssertionResult Compare(
+      const char* lhs_expression,
+      const char* rhs_expression,
+      const T1& lhs,
+      const T2& rhs,
+      // The following line prevents this overload from being considered if T2
+      // is not a pointer type.  We need this because ASSERT_EQ(NULL, my_ptr)
+      // expands to Compare("", "", NULL, my_ptr), which requires a conversion
+      // to match the Secret* in the other overload, which would otherwise make
+      // this template match better.
+      typename EnableIf<!is_pointer<T2>::value>::type* = 0) {
+    return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
+  }
+
+  // This version will be picked when the second argument to ASSERT_EQ() is a
+  // pointer, e.g. ASSERT_EQ(NULL, a_pointer).
+  template <typename T>
+  static AssertionResult Compare(
+      const char* lhs_expression,
+      const char* rhs_expression,
+      // We used to have a second template parameter instead of Secret*.  That
+      // template parameter would deduce to 'long', making this a better match
+      // than the first overload even without the first overload's EnableIf.
+      // Unfortunately, gcc with -Wconversion-null warns when "passing NULL to
+      // non-pointer argument" (even a deduced integral argument), so the old
+      // implementation caused warnings in user code.
+      Secret* /* lhs (NULL) */,
+      T* rhs) {
+    // We already know that 'lhs' is a null pointer.
+    return CmpHelperEQ(lhs_expression, rhs_expression,
+                       static_cast<T*>(NULL), rhs);
+  }
+};
+
+// Separate the error generating code from the code path to reduce the stack
+// frame size of CmpHelperOP. This helps reduce the overhead of some sanitizers
+// when calling EXPECT_OP in a tight loop.
+template <typename T1, typename T2>
+AssertionResult CmpHelperOpFailure(const char* expr1, const char* expr2,
+                                   const T1& val1, const T2& val2,
+                                   const char* op) {
+  return AssertionFailure()
+         << "Expected: (" << expr1 << ") " << op << " (" << expr2
+         << "), actual: " << FormatForComparisonFailureMessage(val1, val2)
+         << " vs " << FormatForComparisonFailureMessage(val2, val1);
+}
+
+// A macro for implementing the helper functions needed to implement
+// ASSERT_?? and EXPECT_??.  It is here just to avoid copy-and-paste
+// of similar code.
+//
+// For each templatized helper function, we also define an overloaded
+// version for BiggestInt in order to reduce code bloat and allow
+// anonymous enums to be used with {ASSERT|EXPECT}_?? when compiled
+// with gcc 4.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+
+#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
+template <typename T1, typename T2>\
+AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
+                                   const T1& val1, const T2& val2) {\
+  if (val1 op val2) {\
+    return AssertionSuccess();\
+  } else {\
+    return CmpHelperOpFailure(expr1, expr2, val1, val2, #op);\
+  }\
+}\
+GTEST_API_ AssertionResult CmpHelper##op_name(\
+    const char* expr1, const char* expr2, BiggestInt val1, BiggestInt val2)
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+
+// Implements the helper function for {ASSERT|EXPECT}_NE
+GTEST_IMPL_CMP_HELPER_(NE, !=);
+// Implements the helper function for {ASSERT|EXPECT}_LE
+GTEST_IMPL_CMP_HELPER_(LE, <=);
+// Implements the helper function for {ASSERT|EXPECT}_LT
+GTEST_IMPL_CMP_HELPER_(LT, <);
+// Implements the helper function for {ASSERT|EXPECT}_GE
+GTEST_IMPL_CMP_HELPER_(GE, >=);
+// Implements the helper function for {ASSERT|EXPECT}_GT
+GTEST_IMPL_CMP_HELPER_(GT, >);
+
+#undef GTEST_IMPL_CMP_HELPER_
+
+// The helper function for {ASSERT|EXPECT}_STREQ.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTREQ(const char* s1_expression,
+                                          const char* s2_expression,
+                                          const char* s1,
+                                          const char* s2);
+
+// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* s1_expression,
+                                              const char* s2_expression,
+                                              const char* s1,
+                                              const char* s2);
+
+// The helper function for {ASSERT|EXPECT}_STRNE.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
+                                          const char* s2_expression,
+                                          const char* s1,
+                                          const char* s2);
+
+// The helper function for {ASSERT|EXPECT}_STRCASENE.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
+                                              const char* s2_expression,
+                                              const char* s1,
+                                              const char* s2);
+
+
+// Helper function for *_STREQ on wide strings.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTREQ(const char* s1_expression,
+                                          const char* s2_expression,
+                                          const wchar_t* s1,
+                                          const wchar_t* s2);
+
+// Helper function for *_STRNE on wide strings.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
+                                          const char* s2_expression,
+                                          const wchar_t* s1,
+                                          const wchar_t* s2);
+
+}  // namespace internal
+
+// IsSubstring() and IsNotSubstring() are intended to be used as the
+// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by
+// themselves.  They check whether needle is a substring of haystack
+// (NULL is considered a substring of itself only), and return an
+// appropriate error message when they fail.
+//
+// The {needle,haystack}_expr arguments are the stringified
+// expressions that generated the two real arguments.
+GTEST_API_ AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const char* needle, const char* haystack);
+GTEST_API_ AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const wchar_t* needle, const wchar_t* haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const char* needle, const char* haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const wchar_t* needle, const wchar_t* haystack);
+GTEST_API_ AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::string& needle, const ::std::string& haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::string& needle, const ::std::string& haystack);
+
+#if GTEST_HAS_STD_WSTRING
+GTEST_API_ AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::wstring& needle, const ::std::wstring& haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::wstring& needle, const ::std::wstring& haystack);
+#endif  // GTEST_HAS_STD_WSTRING
+
+namespace internal {
+
+// Helper template function for comparing floating-points.
+//
+// Template parameter:
+//
+//   RawType: the raw floating-point type (either float or double)
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+template <typename RawType>
+AssertionResult CmpHelperFloatingPointEQ(const char* lhs_expression,
+                                         const char* rhs_expression,
+                                         RawType lhs_value,
+                                         RawType rhs_value) {
+  const FloatingPoint<RawType> lhs(lhs_value), rhs(rhs_value);
+
+  if (lhs.AlmostEquals(rhs)) {
+    return AssertionSuccess();
+  }
+
+  ::std::stringstream lhs_ss;
+  lhs_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+         << lhs_value;
+
+  ::std::stringstream rhs_ss;
+  rhs_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+         << rhs_value;
+
+  return EqFailure(lhs_expression,
+                   rhs_expression,
+                   StringStreamToString(&lhs_ss),
+                   StringStreamToString(&rhs_ss),
+                   false);
+}
+
+// Helper function for implementing ASSERT_NEAR.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1,
+                                                const char* expr2,
+                                                const char* abs_error_expr,
+                                                double val1,
+                                                double val2,
+                                                double abs_error);
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+// A class that enables one to stream messages to assertion macros
+class GTEST_API_ AssertHelper {
+ public:
+  // Constructor.
+  AssertHelper(TestPartResult::Type type,
+               const char* file,
+               int line,
+               const char* message);
+  ~AssertHelper();
+
+  // Message assignment is a semantic trick to enable assertion
+  // streaming; see the GTEST_MESSAGE_ macro below.
+  void operator=(const Message& message) const;
+
+ private:
+  // We put our data in a struct so that the size of the AssertHelper class can
+  // be as small as possible.  This is important because gcc is incapable of
+  // re-using stack space even for temporary variables, so every EXPECT_EQ
+  // reserves stack space for another AssertHelper.
+  struct AssertHelperData {
+    AssertHelperData(TestPartResult::Type t,
+                     const char* srcfile,
+                     int line_num,
+                     const char* msg)
+        : type(t), file(srcfile), line(line_num), message(msg) { }
+
+    TestPartResult::Type const type;
+    const char* const file;
+    int const line;
+    std::string const message;
+
+   private:
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelperData);
+  };
+
+  AssertHelperData* const data_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper);
+};
+
+}  // namespace internal
+
+// The pure interface class that all value-parameterized tests inherit from.
+// A value-parameterized class must inherit from both ::testing::Test and
+// ::testing::WithParamInterface. In most cases that just means inheriting
+// from ::testing::TestWithParam, but more complicated test hierarchies
+// may need to inherit from Test and WithParamInterface at different levels.
+//
+// This interface has support for accessing the test parameter value via
+// the GetParam() method.
+//
+// Use it with one of the parameter generator defining functions, like Range(),
+// Values(), ValuesIn(), Bool(), and Combine().
+//
+// class FooTest : public ::testing::TestWithParam<int> {
+//  protected:
+//   FooTest() {
+//     // Can use GetParam() here.
+//   }
+//   virtual ~FooTest() {
+//     // Can use GetParam() here.
+//   }
+//   virtual void SetUp() {
+//     // Can use GetParam() here.
+//   }
+//   virtual void TearDown {
+//     // Can use GetParam() here.
+//   }
+// };
+// TEST_P(FooTest, DoesBar) {
+//   // Can use GetParam() method here.
+//   Foo foo;
+//   ASSERT_TRUE(foo.DoesBar(GetParam()));
+// }
+// INSTANTIATE_TEST_CASE_P(OneToTenRange, FooTest, ::testing::Range(1, 10));
+
+template <typename T>
+class WithParamInterface {
+ public:
+  typedef T ParamType;
+  virtual ~WithParamInterface() {}
+
+  // The current parameter value. Is also available in the test fixture's
+  // constructor. This member function is non-static, even though it only
+  // references static data, to reduce the opportunity for incorrect uses
+  // like writing 'WithParamInterface<bool>::GetParam()' for a test that
+  // uses a fixture whose parameter type is int.
+  const ParamType& GetParam() const {
+    GTEST_CHECK_(parameter_ != NULL)
+        << "GetParam() can only be called inside a value-parameterized test "
+        << "-- did you intend to write TEST_P instead of TEST_F?";
+    return *parameter_;
+  }
+
+ private:
+  // Sets parameter value. The caller is responsible for making sure the value
+  // remains alive and unchanged throughout the current test.
+  static void SetParam(const ParamType* parameter) {
+    parameter_ = parameter;
+  }
+
+  // Static value used for accessing parameter during a test lifetime.
+  static const ParamType* parameter_;
+
+  // TestClass must be a subclass of WithParamInterface<T> and Test.
+  template <class TestClass> friend class internal::ParameterizedTestFactory;
+};
+
+template <typename T>
+const T* WithParamInterface<T>::parameter_ = NULL;
+
+// Most value-parameterized classes can ignore the existence of
+// WithParamInterface, and can just inherit from ::testing::TestWithParam.
+
+template <typename T>
+class TestWithParam : public Test, public WithParamInterface<T> {
+};
+
+// Macros for indicating success/failure in test code.
+
+// ADD_FAILURE unconditionally adds a failure to the current test.
+// SUCCEED generates a success - it doesn't automatically make the
+// current test successful, as a test is only successful when it has
+// no failure.
+//
+// EXPECT_* verifies that a certain condition is satisfied.  If not,
+// it behaves like ADD_FAILURE.  In particular:
+//
+//   EXPECT_TRUE  verifies that a Boolean condition is true.
+//   EXPECT_FALSE verifies that a Boolean condition is false.
+//
+// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except
+// that they will also abort the current function on failure.  People
+// usually want the fail-fast behavior of FAIL and ASSERT_*, but those
+// writing data-driven tests often find themselves using ADD_FAILURE
+// and EXPECT_* more.
+
+// Generates a nonfatal failure with a generic message.
+#define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed")
+
+// Generates a nonfatal failure at the given source file location with
+// a generic message.
+#define ADD_FAILURE_AT(file, line) \
+  GTEST_MESSAGE_AT_(file, line, "Failed", \
+                    ::testing::TestPartResult::kNonFatalFailure)
+
+// Generates a fatal failure with a generic message.
+#define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed")
+
+// Define this macro to 1 to omit the definition of FAIL(), which is a
+// generic name and clashes with some other libraries.
+#if !GTEST_DONT_DEFINE_FAIL
+# define FAIL() GTEST_FAIL()
+#endif
+
+// Generates a success with a generic message.
+#define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded")
+
+// Define this macro to 1 to omit the definition of SUCCEED(), which
+// is a generic name and clashes with some other libraries.
+#if !GTEST_DONT_DEFINE_SUCCEED
+# define SUCCEED() GTEST_SUCCEED()
+#endif
+
+// Macros for testing exceptions.
+//
+//    * {ASSERT|EXPECT}_THROW(statement, expected_exception):
+//         Tests that the statement throws the expected exception.
+//    * {ASSERT|EXPECT}_NO_THROW(statement):
+//         Tests that the statement doesn't throw any exception.
+//    * {ASSERT|EXPECT}_ANY_THROW(statement):
+//         Tests that the statement throws an exception.
+
+#define EXPECT_THROW(statement, expected_exception) \
+  GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_NO_THROW(statement) \
+  GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_ANY_THROW(statement) \
+  GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_THROW(statement, expected_exception) \
+  GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_)
+#define ASSERT_NO_THROW(statement) \
+  GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_)
+#define ASSERT_ANY_THROW(statement) \
+  GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_)
+
+// Boolean assertions. Condition can be either a Boolean expression or an
+// AssertionResult. For more information on how to use AssertionResult with
+// these macros see comments on that class.
+#define EXPECT_TRUE(condition) \
+  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
+                      GTEST_NONFATAL_FAILURE_)
+#define EXPECT_FALSE(condition) \
+  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
+                      GTEST_NONFATAL_FAILURE_)
+#define ASSERT_TRUE(condition) \
+  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
+                      GTEST_FATAL_FAILURE_)
+#define ASSERT_FALSE(condition) \
+  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
+                      GTEST_FATAL_FAILURE_)
+
+// Macros for testing equalities and inequalities.
+//
+//    * {ASSERT|EXPECT}_EQ(v1, v2): Tests that v1 == v2
+//    * {ASSERT|EXPECT}_NE(v1, v2): Tests that v1 != v2
+//    * {ASSERT|EXPECT}_LT(v1, v2): Tests that v1 < v2
+//    * {ASSERT|EXPECT}_LE(v1, v2): Tests that v1 <= v2
+//    * {ASSERT|EXPECT}_GT(v1, v2): Tests that v1 > v2
+//    * {ASSERT|EXPECT}_GE(v1, v2): Tests that v1 >= v2
+//
+// When they are not, Google Test prints both the tested expressions and
+// their actual values.  The values must be compatible built-in types,
+// or you will get a compiler error.  By "compatible" we mean that the
+// values can be compared by the respective operator.
+//
+// Note:
+//
+//   1. It is possible to make a user-defined type work with
+//   {ASSERT|EXPECT}_??(), but that requires overloading the
+//   comparison operators and is thus discouraged by the Google C++
+//   Usage Guide.  Therefore, you are advised to use the
+//   {ASSERT|EXPECT}_TRUE() macro to assert that two objects are
+//   equal.
+//
+//   2. The {ASSERT|EXPECT}_??() macros do pointer comparisons on
+//   pointers (in particular, C strings).  Therefore, if you use it
+//   with two C strings, you are testing how their locations in memory
+//   are related, not how their content is related.  To compare two C
+//   strings by content, use {ASSERT|EXPECT}_STR*().
+//
+//   3. {ASSERT|EXPECT}_EQ(v1, v2) is preferred to
+//   {ASSERT|EXPECT}_TRUE(v1 == v2), as the former tells you
+//   what the actual value is when it fails, and similarly for the
+//   other comparisons.
+//
+//   4. Do not depend on the order in which {ASSERT|EXPECT}_??()
+//   evaluate their arguments, which is undefined.
+//
+//   5. These macros evaluate their arguments exactly once.
+//
+// Examples:
+//
+//   EXPECT_NE(Foo(), 5);
+//   EXPECT_EQ(a_pointer, NULL);
+//   ASSERT_LT(i, array_size);
+//   ASSERT_GT(records.size(), 0) << "There is no record left.";
+
+#define EXPECT_EQ(val1, val2) \
+  EXPECT_PRED_FORMAT2(::testing::internal:: \
+                      EqHelper<GTEST_IS_NULL_LITERAL_(val1)>::Compare, \
+                      val1, val2)
+#define EXPECT_NE(val1, val2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
+#define EXPECT_LE(val1, val2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
+#define EXPECT_LT(val1, val2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
+#define EXPECT_GE(val1, val2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
+#define EXPECT_GT(val1, val2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)
+
+#define GTEST_ASSERT_EQ(val1, val2) \
+  ASSERT_PRED_FORMAT2(::testing::internal:: \
+                      EqHelper<GTEST_IS_NULL_LITERAL_(val1)>::Compare, \
+                      val1, val2)
+#define GTEST_ASSERT_NE(val1, val2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
+#define GTEST_ASSERT_LE(val1, val2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
+#define GTEST_ASSERT_LT(val1, val2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
+#define GTEST_ASSERT_GE(val1, val2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
+#define GTEST_ASSERT_GT(val1, val2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)
+
+// Define macro GTEST_DONT_DEFINE_ASSERT_XY to 1 to omit the definition of
+// ASSERT_XY(), which clashes with some users' own code.
+
+#if !GTEST_DONT_DEFINE_ASSERT_EQ
+# define ASSERT_EQ(val1, val2) GTEST_ASSERT_EQ(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_NE
+# define ASSERT_NE(val1, val2) GTEST_ASSERT_NE(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_LE
+# define ASSERT_LE(val1, val2) GTEST_ASSERT_LE(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_LT
+# define ASSERT_LT(val1, val2) GTEST_ASSERT_LT(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_GE
+# define ASSERT_GE(val1, val2) GTEST_ASSERT_GE(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_GT
+# define ASSERT_GT(val1, val2) GTEST_ASSERT_GT(val1, val2)
+#endif
+
+// C-string Comparisons.  All tests treat NULL and any non-NULL string
+// as different.  Two NULLs are equal.
+//
+//    * {ASSERT|EXPECT}_STREQ(s1, s2):     Tests that s1 == s2
+//    * {ASSERT|EXPECT}_STRNE(s1, s2):     Tests that s1 != s2
+//    * {ASSERT|EXPECT}_STRCASEEQ(s1, s2): Tests that s1 == s2, ignoring case
+//    * {ASSERT|EXPECT}_STRCASENE(s1, s2): Tests that s1 != s2, ignoring case
+//
+// For wide or narrow string objects, you can use the
+// {ASSERT|EXPECT}_??() macros.
+//
+// Don't depend on the order in which the arguments are evaluated,
+// which is undefined.
+//
+// These macros evaluate their arguments exactly once.
+
+#define EXPECT_STREQ(s1, s2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, s1, s2)
+#define EXPECT_STRNE(s1, s2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
+#define EXPECT_STRCASEEQ(s1, s2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, s1, s2)
+#define EXPECT_STRCASENE(s1, s2)\
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)
+
+#define ASSERT_STREQ(s1, s2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, s1, s2)
+#define ASSERT_STRNE(s1, s2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
+#define ASSERT_STRCASEEQ(s1, s2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, s1, s2)
+#define ASSERT_STRCASENE(s1, s2)\
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)
+
+// Macros for comparing floating-point numbers.
+//
+//    * {ASSERT|EXPECT}_FLOAT_EQ(val1, val2):
+//         Tests that two float values are almost equal.
+//    * {ASSERT|EXPECT}_DOUBLE_EQ(val1, val2):
+//         Tests that two double values are almost equal.
+//    * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error):
+//         Tests that v1 and v2 are within the given distance to each other.
+//
+// Google Test uses ULP-based comparison to automatically pick a default
+// error bound that is appropriate for the operands.  See the
+// FloatingPoint template class in gtest-internal.h if you are
+// interested in the implementation details.
+
+#define EXPECT_FLOAT_EQ(val1, val2)\
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
+                      val1, val2)
+
+#define EXPECT_DOUBLE_EQ(val1, val2)\
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
+                      val1, val2)
+
+#define ASSERT_FLOAT_EQ(val1, val2)\
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
+                      val1, val2)
+
+#define ASSERT_DOUBLE_EQ(val1, val2)\
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
+                      val1, val2)
+
+#define EXPECT_NEAR(val1, val2, abs_error)\
+  EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
+                      val1, val2, abs_error)
+
+#define ASSERT_NEAR(val1, val2, abs_error)\
+  ASSERT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
+                      val1, val2, abs_error)
+
+// These predicate format functions work on floating-point values, and
+// can be used in {ASSERT|EXPECT}_PRED_FORMAT2*(), e.g.
+//
+//   EXPECT_PRED_FORMAT2(testing::DoubleLE, Foo(), 5.0);
+
+// Asserts that val1 is less than, or almost equal to, val2.  Fails
+// otherwise.  In particular, it fails if either val1 or val2 is NaN.
+GTEST_API_ AssertionResult FloatLE(const char* expr1, const char* expr2,
+                                   float val1, float val2);
+GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2,
+                                    double val1, double val2);
+
+
+#if GTEST_OS_WINDOWS
+
+// Macros that test for HRESULT failure and success, these are only useful
+// on Windows, and rely on Windows SDK macros and APIs to compile.
+//
+//    * {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}(expr)
+//
+// When expr unexpectedly fails or succeeds, Google Test prints the
+// expected result and the actual result with both a human-readable
+// string representation of the error, if available, as well as the
+// hex result code.
+# define EXPECT_HRESULT_SUCCEEDED(expr) \
+    EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))
+
+# define ASSERT_HRESULT_SUCCEEDED(expr) \
+    ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))
+
+# define EXPECT_HRESULT_FAILED(expr) \
+    EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))
+
+# define ASSERT_HRESULT_FAILED(expr) \
+    ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))
+
+#endif  // GTEST_OS_WINDOWS
+
+// Macros that execute statement and check that it doesn't generate new fatal
+// failures in the current thread.
+//
+//   * {ASSERT|EXPECT}_NO_FATAL_FAILURE(statement);
+//
+// Examples:
+//
+//   EXPECT_NO_FATAL_FAILURE(Process());
+//   ASSERT_NO_FATAL_FAILURE(Process()) << "Process() failed";
+//
+#define ASSERT_NO_FATAL_FAILURE(statement) \
+    GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_FATAL_FAILURE_)
+#define EXPECT_NO_FATAL_FAILURE(statement) \
+    GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_)
+
+// Causes a trace (including the given source file path and line number,
+// and the given message) to be included in every test failure message generated
+// by code in the scope of the lifetime of an instance of this class. The effect
+// is undone with the destruction of the instance.
+//
+// The message argument can be anything streamable to std::ostream.
+//
+// Example:
+//   testing::ScopedTrace trace("file.cc", 123, "message");
+//
+class GTEST_API_ ScopedTrace {
+ public:
+  // The c'tor pushes the given source file location and message onto
+  // a trace stack maintained by Google Test.
+
+  // Template version. Uses Message() to convert the values into strings.
+  // Slow, but flexible.
+  template <typename T>
+  ScopedTrace(const char* file, int line, const T& message) {
+    PushTrace(file, line, (Message() << message).GetString());
+  }
+
+  // Optimize for some known types.
+  ScopedTrace(const char* file, int line, const char* message) {
+    PushTrace(file, line, message ? message : "(null)");
+  }
+
+#if GTEST_HAS_GLOBAL_STRING
+  ScopedTrace(const char* file, int line, const ::string& message) {
+    PushTrace(file, line, message);
+  }
+#endif
+
+  ScopedTrace(const char* file, int line, const std::string& message) {
+    PushTrace(file, line, message);
+  }
+
+  // The d'tor pops the info pushed by the c'tor.
+  //
+  // Note that the d'tor is not virtual in order to be efficient.
+  // Don't inherit from ScopedTrace!
+  ~ScopedTrace();
+
+ private:
+  void PushTrace(const char* file, int line, std::string message);
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace);
+} GTEST_ATTRIBUTE_UNUSED_;  // A ScopedTrace object does its job in its
+                            // c'tor and d'tor.  Therefore it doesn't
+                            // need to be used otherwise.
+
+// Causes a trace (including the source file path, the current line
+// number, and the given message) to be included in every test failure
+// message generated by code in the current scope.  The effect is
+// undone when the control leaves the current scope.
+//
+// The message argument can be anything streamable to std::ostream.
+//
+// In the implementation, we include the current line number as part
+// of the dummy variable name, thus allowing multiple SCOPED_TRACE()s
+// to appear in the same block - as long as they are on different
+// lines.
+//
+// Assuming that each thread maintains its own stack of traces.
+// Therefore, a SCOPED_TRACE() would (correctly) only affect the
+// assertions in its own thread.
+#define SCOPED_TRACE(message) \
+  ::testing::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\
+    __FILE__, __LINE__, (message))
+
+
+// Compile-time assertion for type equality.
+// StaticAssertTypeEq<type1, type2>() compiles iff type1 and type2 are
+// the same type.  The value it returns is not interesting.
+//
+// Instead of making StaticAssertTypeEq a class template, we make it a
+// function template that invokes a helper class template.  This
+// prevents a user from misusing StaticAssertTypeEq<T1, T2> by
+// defining objects of that type.
+//
+// CAVEAT:
+//
+// When used inside a method of a class template,
+// StaticAssertTypeEq<T1, T2>() is effective ONLY IF the method is
+// instantiated.  For example, given:
+//
+//   template <typename T> class Foo {
+//    public:
+//     void Bar() { testing::StaticAssertTypeEq<int, T>(); }
+//   };
+//
+// the code:
+//
+//   void Test1() { Foo<bool> foo; }
+//
+// will NOT generate a compiler error, as Foo<bool>::Bar() is never
+// actually instantiated.  Instead, you need:
+//
+//   void Test2() { Foo<bool> foo; foo.Bar(); }
+//
+// to cause a compiler error.
+template <typename T1, typename T2>
+bool StaticAssertTypeEq() {
+  (void)internal::StaticAssertTypeEqHelper<T1, T2>();
+  return true;
+}
+
+// Defines a test.
+//
+// The first parameter is the name of the test case, and the second
+// parameter is the name of the test within the test case.
+//
+// The convention is to end the test case name with "Test".  For
+// example, a test case for the Foo class can be named FooTest.
+//
+// Test code should appear between braces after an invocation of
+// this macro.  Example:
+//
+//   TEST(FooTest, InitializesCorrectly) {
+//     Foo foo;
+//     EXPECT_TRUE(foo.StatusIsOK());
+//   }
+
+// Note that we call GetTestTypeId() instead of GetTypeId<
+// ::testing::Test>() here to get the type ID of testing::Test.  This
+// is to work around a suspected linker bug when using Google Test as
+// a framework on Mac OS X.  The bug causes GetTypeId<
+// ::testing::Test>() to return different values depending on whether
+// the call is from the Google Test framework itself or from user test
+// code.  GetTestTypeId() is guaranteed to always return the same
+// value, as it always calls GetTypeId<>() from the Google Test
+// framework.
+#define GTEST_TEST(test_case_name, test_name)\
+  GTEST_TEST_(test_case_name, test_name, \
+              ::testing::Test, ::testing::internal::GetTestTypeId())
+
+// Define this macro to 1 to omit the definition of TEST(), which
+// is a generic name and clashes with some other libraries.
+#if !GTEST_DONT_DEFINE_TEST
+# define TEST(test_case_name, test_name) GTEST_TEST(test_case_name, test_name)
+#endif
+
+// Defines a test that uses a test fixture.
+//
+// The first parameter is the name of the test fixture class, which
+// also doubles as the test case name.  The second parameter is the
+// name of the test within the test case.
+//
+// A test fixture class must be declared earlier.  The user should put
+// the test code between braces after using this macro.  Example:
+//
+//   class FooTest : public testing::Test {
+//    protected:
+//     virtual void SetUp() { b_.AddElement(3); }
+//
+//     Foo a_;
+//     Foo b_;
+//   };
+//
+//   TEST_F(FooTest, InitializesCorrectly) {
+//     EXPECT_TRUE(a_.StatusIsOK());
+//   }
+//
+//   TEST_F(FooTest, ReturnsElementCountCorrectly) {
+//     EXPECT_EQ(a_.size(), 0);
+//     EXPECT_EQ(b_.size(), 1);
+//   }
+
+#define TEST_F(test_fixture, test_name)\
+  GTEST_TEST_(test_fixture, test_name, test_fixture, \
+              ::testing::internal::GetTypeId<test_fixture>())
+
+// Returns a path to temporary directory.
+// Tries to determine an appropriate directory for the platform.
+GTEST_API_ std::string TempDir();
+
+#ifdef _MSC_VER
+#  pragma warning(pop)
+#endif
+
+}  // namespace testing
+
+// Use this function in main() to run all tests.  It returns 0 if all
+// tests are successful, or 1 otherwise.
+//
+// RUN_ALL_TESTS() should be invoked after the command line has been
+// parsed by InitGoogleTest().
+//
+// This function was formerly a macro; thus, it is in the global
+// namespace and has an all-caps name.
+int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_;
+
+inline int RUN_ALL_TESTS() {
+  return ::testing::UnitTest::GetInstance()->Run();
+}
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_H_
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/block_cache_analyzer/block_cache_trace_analyzer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/block_cache_analyzer/block_cache_trace_analyzer.h
new file mode 100644
index 0000000000..2f1ebd139b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/block_cache_analyzer/block_cache_trace_analyzer.h
@@ -0,0 +1,397 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <map>
+#include <set>
+#include <vector>
+
+#include "db/dbformat.h"
+#include "rocksdb/env.h"
+#include "rocksdb/trace_record.h"
+#include "rocksdb/utilities/sim_cache.h"
+#include "trace_replay/block_cache_tracer.h"
+#include "utilities/simulator_cache/cache_simulator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Statistics of a key refereneced by a Get.
+struct GetKeyInfo {
+  uint64_t key_id = 0;
+  std::vector<uint64_t> access_sequence_number_timeline;
+  std::vector<uint64_t> access_timeline;
+
+  void AddAccess(const BlockCacheTraceRecord& access,
+                 uint64_t access_sequnce_number) {
+    access_sequence_number_timeline.push_back(access_sequnce_number);
+    access_timeline.push_back(access.access_timestamp);
+  }
+};
+
+// Statistics of a block.
+struct BlockAccessInfo {
+  uint64_t block_id = 0;
+  uint64_t table_id = 0;
+  uint64_t block_offset = 0;
+  uint64_t num_accesses = 0;
+  uint64_t block_size = 0;
+  uint64_t first_access_time = 0;
+  uint64_t last_access_time = 0;
+  uint64_t num_keys = 0;
+  std::map<std::string, std::map<TableReaderCaller, uint64_t>>
+      key_num_access_map;  // for keys exist in this block.
+  std::map<std::string, std::map<TableReaderCaller, uint64_t>>
+      non_exist_key_num_access_map;  // for keys do not exist in this block.
+  uint64_t num_referenced_key_exist_in_block = 0;
+  uint64_t referenced_data_size = 0;
+  std::map<TableReaderCaller, uint64_t> caller_num_access_map;
+  // caller:timestamp:number_of_accesses. The granularity of the timestamp is
+  // seconds.
+  std::map<TableReaderCaller, std::map<uint64_t, uint64_t>>
+      caller_num_accesses_timeline;
+  // Unique blocks since the last access.
+  std::set<std::string> unique_blocks_since_last_access;
+  // Number of reuses grouped by reuse distance.
+  std::map<uint64_t, uint64_t> reuse_distance_count;
+
+  // The access sequence numbers of this block.
+  std::vector<uint64_t> access_sequence_number_timeline;
+  std::map<TableReaderCaller, std::vector<uint64_t>>
+      caller_access_sequence__number_timeline;
+  // The access timestamp in microseconds of this block.
+  std::vector<uint64_t> access_timeline;
+  std::map<TableReaderCaller, std::vector<uint64_t>> caller_access_timeline;
+
+  void AddAccess(const BlockCacheTraceRecord& access,
+                 uint64_t access_sequnce_number) {
+    if (block_size != 0 && access.block_size != 0) {
+      assert(block_size == access.block_size);
+    }
+    if (num_keys != 0 && access.num_keys_in_block != 0) {
+      assert(num_keys == access.num_keys_in_block);
+    }
+    if (first_access_time == 0) {
+      first_access_time = access.access_timestamp;
+    }
+    table_id = BlockCacheTraceHelper::GetTableId(access);
+    block_offset = BlockCacheTraceHelper::GetBlockOffsetInFile(access);
+    last_access_time = access.access_timestamp;
+    block_size = access.block_size;
+    caller_num_access_map[access.caller]++;
+    num_accesses++;
+    // access.access_timestamp is in microsecond.
+    const uint64_t timestamp_in_seconds =
+        access.access_timestamp / kMicrosInSecond;
+    caller_num_accesses_timeline[access.caller][timestamp_in_seconds] += 1;
+    // Populate the feature vectors.
+    access_sequence_number_timeline.push_back(access_sequnce_number);
+    caller_access_sequence__number_timeline[access.caller].push_back(
+        access_sequnce_number);
+    access_timeline.push_back(access.access_timestamp);
+    caller_access_timeline[access.caller].push_back(access.access_timestamp);
+    if (BlockCacheTraceHelper::IsGetOrMultiGetOnDataBlock(access.block_type,
+                                                          access.caller)) {
+      num_keys = access.num_keys_in_block;
+      if (access.referenced_key_exist_in_block) {
+        if (key_num_access_map.find(access.referenced_key) ==
+            key_num_access_map.end()) {
+          referenced_data_size += access.referenced_data_size;
+        }
+        key_num_access_map[access.referenced_key][access.caller]++;
+        num_referenced_key_exist_in_block++;
+        if (referenced_data_size > block_size && block_size != 0) {
+          ParsedInternalKey internal_key;
+          Status s = ParseInternalKey(access.referenced_key, &internal_key,
+                                      false /* log_err_key */);  // TODO
+          assert(s.ok());                                        // TODO
+        }
+      } else {
+        non_exist_key_num_access_map[access.referenced_key][access.caller]++;
+      }
+    }
+  }
+};
+
+// Aggregates stats of a block given a block type.
+struct BlockTypeAccessInfoAggregate {
+  std::map<std::string, BlockAccessInfo> block_access_info_map;
+};
+
+// Aggregates BlockTypeAggregate given a SST file.
+struct SSTFileAccessInfoAggregate {
+  uint32_t level;
+  std::map<TraceType, BlockTypeAccessInfoAggregate> block_type_aggregates_map;
+};
+
+// Aggregates SSTFileAggregate given a column family.
+struct ColumnFamilyAccessInfoAggregate {
+  std::map<uint64_t, SSTFileAccessInfoAggregate> fd_aggregates_map;
+};
+
+struct Features {
+  std::vector<uint64_t> elapsed_time_since_last_access;
+  std::vector<uint64_t> num_accesses_since_last_access;
+  std::vector<uint64_t> num_past_accesses;
+};
+
+struct Predictions {
+  std::vector<uint64_t> elapsed_time_till_next_access;
+  std::vector<uint64_t> num_accesses_till_next_access;
+};
+
+class BlockCacheTraceAnalyzer {
+ public:
+  BlockCacheTraceAnalyzer(
+      const std::string& trace_file_path, const std::string& output_dir,
+      const std::string& human_readable_trace_file_path,
+      bool compute_reuse_distance, bool mrc_only,
+      bool is_human_readable_trace_file,
+      std::unique_ptr<BlockCacheTraceSimulator>&& cache_simulator);
+  ~BlockCacheTraceAnalyzer() = default;
+  // No copy and move.
+  BlockCacheTraceAnalyzer(const BlockCacheTraceAnalyzer&) = delete;
+  BlockCacheTraceAnalyzer& operator=(const BlockCacheTraceAnalyzer&) = delete;
+  BlockCacheTraceAnalyzer(BlockCacheTraceAnalyzer&&) = delete;
+  BlockCacheTraceAnalyzer& operator=(BlockCacheTraceAnalyzer&&) = delete;
+
+  // Read all access records in the given trace_file, maintains the stats of
+  // a block, and aggregates the information by block type, sst file, and column
+  // family. Subsequently, the caller may call Print* functions to print
+  // statistics.
+  Status Analyze();
+
+  // Print a summary of statistics of the trace, e.g.,
+  // Number of files: 2 Number of blocks: 50 Number of accesses: 50
+  // Number of Index blocks: 10
+  // Number of Filter blocks: 10
+  // Number of Data blocks: 10
+  // Number of UncompressionDict blocks: 10
+  // Number of RangeDeletion blocks: 10
+  // ***************************************************************
+  // Caller Get: Number of accesses 10
+  // Caller Get: Number of accesses per level break down
+  //          Level 0: Number of accesses: 10
+  // Caller Get: Number of accesses per block type break down
+  //          Block Type Index: Number of accesses: 2
+  //          Block Type Filter: Number of accesses: 2
+  //          Block Type Data: Number of accesses: 2
+  //          Block Type UncompressionDict: Number of accesses: 2
+  //          Block Type RangeDeletion: Number of accesses: 2
+  void PrintStatsSummary() const;
+
+  // Print block size distribution and the distribution break down by block type
+  // and column family.
+  void PrintBlockSizeStats() const;
+
+  // Print access count distribution and the distribution break down by block
+  // type and column family.
+  void PrintAccessCountStats(bool user_access_only, uint32_t bottom_k,
+                             uint32_t top_k) const;
+
+  // Print data block accesses by user Get and Multi-Get.
+  // It prints out 1) A histogram on the percentage of keys accessed in a data
+  // block break down by if a referenced key exists in the data block andthe
+  // histogram break down by column family. 2) A histogram on the percentage of
+  // accesses on keys exist in a data block and its break down by column family.
+  void PrintDataBlockAccessStats() const;
+
+  // Write the percentage of accesses break down by column family into a csv
+  // file saved in 'output_dir'.
+  //
+  // The file is named "percentage_of_accesses_summary". The file format is
+  // caller,cf_0,cf_1,...,cf_n where the cf_i is the column family name found in
+  // the trace.
+  void WritePercentAccessSummaryStats() const;
+
+  // Write the percentage of accesses for the given caller break down by column
+  // family, level, and block type into a csv file saved in 'output_dir'.
+  //
+  // It generates two files: 1) caller_level_percentage_of_accesses_summary and
+  // 2) caller_bt_percentage_of_accesses_summary which break down by the level
+  // and block type, respectively. The file format is
+  // level/bt,cf_0,cf_1,...,cf_n where cf_i is the column family name found in
+  // the trace.
+  void WriteDetailedPercentAccessSummaryStats(TableReaderCaller caller) const;
+
+  // Write the access count summary into a csv file saved in 'output_dir'.
+  // It groups blocks by their access count.
+  //
+  // It generates two files: 1) cf_access_count_summary and 2)
+  // bt_access_count_summary which break down the access count by column family
+  // and block type, respectively. The file format is
+  // cf/bt,bucket_0,bucket_1,...,bucket_N.
+  void WriteAccessCountSummaryStats(
+      const std::vector<uint64_t>& access_count_buckets,
+      bool user_access_only) const;
+
+  // Write miss ratio curves of simulated cache configurations into a csv file
+  // named "mrc" saved in 'output_dir'.
+  //
+  // The file format is
+  // "cache_name,num_shard_bits,capacity,miss_ratio,total_accesses".
+  void WriteMissRatioCurves() const;
+
+  // Write miss ratio timeline of simulated cache configurations into several
+  // csv files, one per cache capacity saved in 'output_dir'.
+  //
+  // The file format is
+  // "time,label_1_access_per_second,label_2_access_per_second,...,label_N_access_per_second"
+  // where N is the number of unique cache names
+  // (cache_name+num_shard_bits+ghost_capacity).
+  void WriteMissRatioTimeline(uint64_t time_unit) const;
+
+  // Write misses timeline of simulated cache configurations into several
+  // csv files, one per cache capacity saved in 'output_dir'.
+  //
+  // The file format is
+  // "time,label_1_access_per_second,label_2_access_per_second,...,label_N_access_per_second"
+  // where N is the number of unique cache names
+  // (cache_name+num_shard_bits+ghost_capacity).
+  void WriteMissTimeline(uint64_t time_unit) const;
+
+  // Write the access timeline into a csv file saved in 'output_dir'.
+  //
+  // The file is named "label_access_timeline".The file format is
+  // "time,label_1_access_per_second,label_2_access_per_second,...,label_N_access_per_second"
+  // where N is the number of unique labels found in the trace.
+  void WriteAccessTimeline(const std::string& label, uint64_t time_unit,
+                           bool user_access_only) const;
+
+  // Write the reuse distance into a csv file saved in 'output_dir'. Reuse
+  // distance is defined as the cumulated size of unique blocks read between two
+  // consective accesses on the same block.
+  //
+  // The file is named "label_reuse_distance". The file format is
+  // bucket,label_1,label_2,...,label_N.
+  void WriteReuseDistance(const std::string& label_str,
+                          const std::vector<uint64_t>& distance_buckets) const;
+
+  // Write the reuse interval into a csv file saved in 'output_dir'. Reuse
+  // interval is defined as the time between two consecutive accesses on the
+  // same block.
+  //
+  // The file is named "label_reuse_interval". The file format is
+  // bucket,label_1,label_2,...,label_N.
+  void WriteReuseInterval(const std::string& label_str,
+                          const std::vector<uint64_t>& time_buckets) const;
+
+  // Write the reuse lifetime into a csv file saved in 'output_dir'. Reuse
+  // lifetime is defined as the time interval between the first access of a
+  // block and its last access.
+  //
+  // The file is named "label_reuse_lifetime". The file format is
+  // bucket,label_1,label_2,...,label_N.
+  void WriteReuseLifetime(const std::string& label_str,
+                          const std::vector<uint64_t>& time_buckets) const;
+
+  // Write the reuse timeline into a csv file saved in 'output_dir'.
+  //
+  // The file is named
+  // "block_type_user_access_only_reuse_window_reuse_timeline". The file format
+  // is start_time,0,1,...,N where N equals trace_duration / reuse_window.
+  void WriteBlockReuseTimeline(const uint64_t reuse_window,
+                               bool user_access_only,
+                               TraceType block_type) const;
+
+  // Write the Get spatical locality into csv files saved in 'output_dir'.
+  //
+  // It generates three csv files. label_percent_ref_keys,
+  // label_percent_accesses_on_ref_keys, and
+  // label_percent_data_size_on_ref_keys.
+  void WriteGetSpatialLocality(
+      const std::string& label_str,
+      const std::vector<uint64_t>& percent_buckets) const;
+
+  void WriteCorrelationFeatures(const std::string& label_str,
+                                uint32_t max_number_of_values) const;
+
+  void WriteCorrelationFeaturesForGet(uint32_t max_number_of_values) const;
+
+  void WriteSkewness(const std::string& label_str,
+                     const std::vector<uint64_t>& percent_buckets,
+                     TraceType target_block_type) const;
+
+  const std::map<std::string, ColumnFamilyAccessInfoAggregate>&
+  TEST_cf_aggregates_map() const {
+    return cf_aggregates_map_;
+  }
+
+ private:
+  std::set<std::string> ParseLabelStr(const std::string& label_str) const;
+
+  std::string BuildLabel(const std::set<std::string>& labels,
+                         const std::string& cf_name, uint64_t fd,
+                         uint32_t level, TraceType type,
+                         TableReaderCaller caller, uint64_t block_key,
+                         const BlockAccessInfo& block) const;
+
+  void ComputeReuseDistance(BlockAccessInfo* info) const;
+
+  Status RecordAccess(const BlockCacheTraceRecord& access);
+
+  void UpdateReuseIntervalStats(
+      const std::string& label, const std::vector<uint64_t>& time_buckets,
+      const std::map<uint64_t, uint64_t> timeline,
+      std::map<std::string, std::map<uint64_t, uint64_t>>*
+          label_time_num_reuses,
+      uint64_t* total_num_reuses) const;
+
+  std::string OutputPercentAccessStats(
+      uint64_t total_accesses,
+      const std::map<std::string, uint64_t>& cf_access_count) const;
+
+  void WriteStatsToFile(
+      const std::string& label_str, const std::vector<uint64_t>& time_buckets,
+      const std::string& filename_suffix,
+      const std::map<std::string, std::map<uint64_t, uint64_t>>& label_data,
+      uint64_t ntotal) const;
+
+  void TraverseBlocks(
+      std::function<void(const std::string& /*cf_name*/, uint64_t /*fd*/,
+                         uint32_t /*level*/, TraceType /*block_type*/,
+                         const std::string& /*block_key*/,
+                         uint64_t /*block_key_id*/,
+                         const BlockAccessInfo& /*block_access_info*/)>
+          block_callback,
+      std::set<std::string>* labels = nullptr) const;
+
+  void UpdateFeatureVectors(
+      const std::vector<uint64_t>& access_sequence_number_timeline,
+      const std::vector<uint64_t>& access_timeline, const std::string& label,
+      std::map<std::string, Features>* label_features,
+      std::map<std::string, Predictions>* label_predictions) const;
+
+  void WriteCorrelationFeaturesToFile(
+      const std::string& label,
+      const std::map<std::string, Features>& label_features,
+      const std::map<std::string, Predictions>& label_predictions,
+      uint32_t max_number_of_values) const;
+
+  ROCKSDB_NAMESPACE::Env* env_;
+  const std::string trace_file_path_;
+  const std::string output_dir_;
+  std::string human_readable_trace_file_path_;
+  const bool compute_reuse_distance_;
+  const bool mrc_only_;
+  const bool is_human_readable_trace_file_;
+
+  BlockCacheTraceHeader header_;
+  std::unique_ptr<BlockCacheTraceSimulator> cache_simulator_;
+  std::map<std::string, ColumnFamilyAccessInfoAggregate> cf_aggregates_map_;
+  std::map<std::string, BlockAccessInfo*> block_info_map_;
+  std::unordered_map<std::string, GetKeyInfo> get_key_info_map_;
+  uint64_t access_sequence_number_ = 0;
+  uint64_t trace_start_timestamp_in_seconds_ = 0;
+  uint64_t trace_end_timestamp_in_seconds_ = 0;
+  MissRatioStats miss_ratio_stats_;
+  uint64_t unique_block_id_ = 1;
+  uint64_t unique_get_key_id_ = 1;
+  BlockCacheHumanReadableTraceWriter human_readable_trace_writer_;
+};
+
+int block_cache_trace_analyzer_tool(int argc, char** argv);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/dump/db_dump_tool.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/dump/db_dump_tool.cc
new file mode 100644
index 0000000000..535e70c433
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/dump/db_dump_tool.cc
@@ -0,0 +1,258 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "rocksdb/db_dump_tool.h"
+
+#include <cinttypes>
+#include <iostream>
+
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+bool DbDumpTool::Run(const DumpOptions& dump_options,
+                     ROCKSDB_NAMESPACE::Options options) {
+  ROCKSDB_NAMESPACE::DB* dbptr;
+  ROCKSDB_NAMESPACE::Status status;
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> dumpfile;
+  char hostname[1024];
+  int64_t timesec = 0;
+  std::string abspath;
+  char json[4096];
+
+  static const char* magicstr = "ROCKDUMP";
+  static const char versionstr[8] = {0, 0, 0, 0, 0, 0, 0, 1};
+
+  ROCKSDB_NAMESPACE::Env* env = ROCKSDB_NAMESPACE::Env::Default();
+
+  // Open the database
+  options.create_if_missing = false;
+  status = ROCKSDB_NAMESPACE::DB::OpenForReadOnly(options, dump_options.db_path,
+                                                  &dbptr);
+  if (!status.ok()) {
+    std::cerr << "Unable to open database '" << dump_options.db_path
+              << "' for reading: " << status.ToString() << std::endl;
+    return false;
+  }
+
+  const std::unique_ptr<ROCKSDB_NAMESPACE::DB> db(dbptr);
+
+  status = env->NewWritableFile(dump_options.dump_location, &dumpfile,
+                                ROCKSDB_NAMESPACE::EnvOptions());
+  if (!status.ok()) {
+    std::cerr << "Unable to open dump file '" << dump_options.dump_location
+              << "' for writing: " << status.ToString() << std::endl;
+    return false;
+  }
+
+  ROCKSDB_NAMESPACE::Slice magicslice(magicstr, 8);
+  status = dumpfile->Append(magicslice);
+  if (!status.ok()) {
+    std::cerr << "Append failed: " << status.ToString() << std::endl;
+    return false;
+  }
+
+  ROCKSDB_NAMESPACE::Slice versionslice(versionstr, 8);
+  status = dumpfile->Append(versionslice);
+  if (!status.ok()) {
+    std::cerr << "Append failed: " << status.ToString() << std::endl;
+    return false;
+  }
+
+  if (dump_options.anonymous) {
+    snprintf(json, sizeof(json), "{}");
+  } else {
+    status = env->GetHostName(hostname, sizeof(hostname));
+    status = env->GetCurrentTime(&timesec);
+    status = env->GetAbsolutePath(dump_options.db_path, &abspath);
+    snprintf(json, sizeof(json),
+             "{ \"database-path\": \"%s\", \"hostname\": \"%s\", "
+             "\"creation-time\": %" PRIi64 " }",
+             abspath.c_str(), hostname, timesec);
+  }
+
+  ROCKSDB_NAMESPACE::Slice infoslice(json, strlen(json));
+  char infosize[4];
+  ROCKSDB_NAMESPACE::EncodeFixed32(infosize, (uint32_t)infoslice.size());
+  ROCKSDB_NAMESPACE::Slice infosizeslice(infosize, 4);
+  status = dumpfile->Append(infosizeslice);
+  if (!status.ok()) {
+    std::cerr << "Append failed: " << status.ToString() << std::endl;
+    return false;
+  }
+  status = dumpfile->Append(infoslice);
+  if (!status.ok()) {
+    std::cerr << "Append failed: " << status.ToString() << std::endl;
+    return false;
+  }
+
+  const std::unique_ptr<ROCKSDB_NAMESPACE::Iterator> it(
+      db->NewIterator(ROCKSDB_NAMESPACE::ReadOptions()));
+  for (it->SeekToFirst(); it->Valid(); it->Next()) {
+    char keysize[4];
+    ROCKSDB_NAMESPACE::EncodeFixed32(keysize, (uint32_t)it->key().size());
+    ROCKSDB_NAMESPACE::Slice keysizeslice(keysize, 4);
+    status = dumpfile->Append(keysizeslice);
+    if (!status.ok()) {
+      std::cerr << "Append failed: " << status.ToString() << std::endl;
+      return false;
+    }
+    status = dumpfile->Append(it->key());
+    if (!status.ok()) {
+      std::cerr << "Append failed: " << status.ToString() << std::endl;
+      return false;
+    }
+
+    char valsize[4];
+    ROCKSDB_NAMESPACE::EncodeFixed32(valsize, (uint32_t)it->value().size());
+    ROCKSDB_NAMESPACE::Slice valsizeslice(valsize, 4);
+    status = dumpfile->Append(valsizeslice);
+    if (!status.ok()) {
+      std::cerr << "Append failed: " << status.ToString() << std::endl;
+      return false;
+    }
+    status = dumpfile->Append(it->value());
+    if (!status.ok()) {
+      std::cerr << "Append failed: " << status.ToString() << std::endl;
+      return false;
+    }
+  }
+  if (!it->status().ok()) {
+    std::cerr << "Database iteration failed: " << status.ToString()
+              << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool DbUndumpTool::Run(const UndumpOptions& undump_options,
+                       ROCKSDB_NAMESPACE::Options options) {
+  ROCKSDB_NAMESPACE::DB* dbptr;
+  ROCKSDB_NAMESPACE::Status status;
+  ROCKSDB_NAMESPACE::Env* env;
+  std::unique_ptr<ROCKSDB_NAMESPACE::SequentialFile> dumpfile;
+  ROCKSDB_NAMESPACE::Slice slice;
+  char scratch8[8];
+
+  static const char* magicstr = "ROCKDUMP";
+  static const char versionstr[8] = {0, 0, 0, 0, 0, 0, 0, 1};
+
+  env = ROCKSDB_NAMESPACE::Env::Default();
+
+  status = env->NewSequentialFile(undump_options.dump_location, &dumpfile,
+                                  ROCKSDB_NAMESPACE::EnvOptions());
+  if (!status.ok()) {
+    std::cerr << "Unable to open dump file '" << undump_options.dump_location
+              << "' for reading: " << status.ToString() << std::endl;
+    return false;
+  }
+
+  status = dumpfile->Read(8, &slice, scratch8);
+  if (!status.ok() || slice.size() != 8 ||
+      memcmp(slice.data(), magicstr, 8) != 0) {
+    std::cerr << "File '" << undump_options.dump_location
+              << "' is not a recognizable dump file." << std::endl;
+    return false;
+  }
+
+  status = dumpfile->Read(8, &slice, scratch8);
+  if (!status.ok() || slice.size() != 8 ||
+      memcmp(slice.data(), versionstr, 8) != 0) {
+    std::cerr << "File '" << undump_options.dump_location
+              << "' version not recognized." << std::endl;
+    return false;
+  }
+
+  status = dumpfile->Read(4, &slice, scratch8);
+  if (!status.ok() || slice.size() != 4) {
+    std::cerr << "Unable to read info blob size." << std::endl;
+    return false;
+  }
+  uint32_t infosize = ROCKSDB_NAMESPACE::DecodeFixed32(slice.data());
+  status = dumpfile->Skip(infosize);
+  if (!status.ok()) {
+    std::cerr << "Unable to skip info blob: " << status.ToString() << std::endl;
+    return false;
+  }
+
+  options.create_if_missing = true;
+  status = ROCKSDB_NAMESPACE::DB::Open(options, undump_options.db_path, &dbptr);
+  if (!status.ok()) {
+    std::cerr << "Unable to open database '" << undump_options.db_path
+              << "' for writing: " << status.ToString() << std::endl;
+    return false;
+  }
+
+  const std::unique_ptr<ROCKSDB_NAMESPACE::DB> db(dbptr);
+
+  uint32_t last_keysize = 64;
+  size_t last_valsize = 1 << 20;
+  std::unique_ptr<char[]> keyscratch(new char[last_keysize]);
+  std::unique_ptr<char[]> valscratch(new char[last_valsize]);
+
+  while (1) {
+    uint32_t keysize, valsize;
+    ROCKSDB_NAMESPACE::Slice keyslice;
+    ROCKSDB_NAMESPACE::Slice valslice;
+
+    status = dumpfile->Read(4, &slice, scratch8);
+    if (!status.ok() || slice.size() != 4) break;
+    keysize = ROCKSDB_NAMESPACE::DecodeFixed32(slice.data());
+    if (keysize > last_keysize) {
+      while (keysize > last_keysize) last_keysize *= 2;
+      keyscratch = std::unique_ptr<char[]>(new char[last_keysize]);
+    }
+
+    status = dumpfile->Read(keysize, &keyslice, keyscratch.get());
+    if (!status.ok() || keyslice.size() != keysize) {
+      std::cerr << "Key read failure: "
+                << (status.ok() ? "insufficient data" : status.ToString())
+                << std::endl;
+      return false;
+    }
+
+    status = dumpfile->Read(4, &slice, scratch8);
+    if (!status.ok() || slice.size() != 4) {
+      std::cerr << "Unable to read value size: "
+                << (status.ok() ? "insufficient data" : status.ToString())
+                << std::endl;
+      return false;
+    }
+    valsize = ROCKSDB_NAMESPACE::DecodeFixed32(slice.data());
+    if (valsize > last_valsize) {
+      while (valsize > last_valsize) last_valsize *= 2;
+      valscratch = std::unique_ptr<char[]>(new char[last_valsize]);
+    }
+
+    status = dumpfile->Read(valsize, &valslice, valscratch.get());
+    if (!status.ok() || valslice.size() != valsize) {
+      std::cerr << "Unable to read value: "
+                << (status.ok() ? "insufficient data" : status.ToString())
+                << std::endl;
+      return false;
+    }
+
+    status = db->Put(ROCKSDB_NAMESPACE::WriteOptions(), keyslice, valslice);
+    if (!status.ok()) {
+      fprintf(stderr, "Unable to write database entry\n");
+      return false;
+    }
+  }
+
+  if (undump_options.compact_db) {
+    status = db->CompactRange(ROCKSDB_NAMESPACE::CompactRangeOptions(), nullptr,
+                              nullptr);
+    if (!status.ok()) {
+      fprintf(stderr,
+              "Unable to compact the database after loading the dumped file\n");
+      return false;
+    }
+  }
+  return true;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/io_tracer_parser_tool.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/io_tracer_parser_tool.h
new file mode 100644
index 0000000000..c79d4c510c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/io_tracer_parser_tool.h
@@ -0,0 +1,38 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <memory>
+
+#include "rocksdb/env.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct IOTraceHeader;
+struct IOTraceRecord;
+
+// IOTraceRecordParser class reads the IO trace file (in binary format) and
+// dumps the human readable records in output_file_.
+class IOTraceRecordParser {
+ public:
+  explicit IOTraceRecordParser(const std::string& input_file);
+
+  // ReadIOTraceRecords reads the binary trace file records one by one and
+  // invoke PrintHumanReadableIOTraceRecord to dump the records in output_file_.
+  int ReadIOTraceRecords();
+
+ private:
+  void PrintHumanReadableHeader(const IOTraceHeader& header);
+  void PrintHumanReadableIOTraceRecord(const IOTraceRecord& record);
+
+  // Binary file that contains IO trace records.
+  std::string input_file_;
+};
+
+int io_tracer_parser(int argc, char** argv);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/ldb_cmd_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/ldb_cmd_impl.h
new file mode 100644
index 0000000000..97de981b1a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/ldb_cmd_impl.h
@@ -0,0 +1,744 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "rocksdb/utilities/ldb_cmd.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class CompactorCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "compact"; }
+
+  CompactorCommand(const std::vector<std::string>& params,
+                   const std::map<std::string, std::string>& options,
+                   const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+
+  void DoCommand() override;
+
+ private:
+  bool null_from_;
+  std::string from_;
+  bool null_to_;
+  std::string to_;
+};
+
+class DBFileDumperCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "dump_live_files"; }
+
+  DBFileDumperCommand(const std::vector<std::string>& params,
+                      const std::map<std::string, std::string>& options,
+                      const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+
+  void DoCommand() override;
+
+ private:
+  bool decode_blob_index_;
+  bool dump_uncompressed_blobs_;
+};
+
+class DBLiveFilesMetadataDumperCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "list_live_files_metadata"; }
+
+  DBLiveFilesMetadataDumperCommand(
+      const std::vector<std::string>& params,
+      const std::map<std::string, std::string>& options,
+      const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+
+  void DoCommand() override;
+
+ private:
+  bool sort_by_filename_;
+
+  static const std::string ARG_SORT_BY_FILENAME;
+};
+
+class DBDumperCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "dump"; }
+
+  DBDumperCommand(const std::vector<std::string>& params,
+                  const std::map<std::string, std::string>& options,
+                  const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+
+  void DoCommand() override;
+
+ private:
+  /**
+   * Extract file name from the full path. We handle both the forward slash (/)
+   * and backslash (\) to make sure that different OS-s are supported.
+   */
+  static std::string GetFileNameFromPath(const std::string& s) {
+    std::size_t n = s.find_last_of("/\\");
+
+    if (std::string::npos == n) {
+      return s;
+    } else {
+      return s.substr(n + 1);
+    }
+  }
+
+  void DoDumpCommand();
+
+  bool null_from_;
+  std::string from_;
+  bool null_to_;
+  std::string to_;
+  int max_keys_;
+  std::string delim_;
+  bool count_only_;
+  bool count_delim_;
+  bool print_stats_;
+  std::string path_;
+  bool decode_blob_index_;
+  bool dump_uncompressed_blobs_;
+
+  static const std::string ARG_COUNT_ONLY;
+  static const std::string ARG_COUNT_DELIM;
+  static const std::string ARG_STATS;
+  static const std::string ARG_TTL_BUCKET;
+};
+
+class InternalDumpCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "idump"; }
+
+  InternalDumpCommand(const std::vector<std::string>& params,
+                      const std::map<std::string, std::string>& options,
+                      const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+
+  void DoCommand() override;
+
+ private:
+  bool has_from_;
+  std::string from_;
+  bool has_to_;
+  std::string to_;
+  int max_keys_;
+  std::string delim_;
+  bool count_only_;
+  bool count_delim_;
+  bool print_stats_;
+  bool is_input_key_hex_;
+  bool decode_blob_index_;
+
+  static const std::string ARG_DELIM;
+  static const std::string ARG_COUNT_ONLY;
+  static const std::string ARG_COUNT_DELIM;
+  static const std::string ARG_STATS;
+  static const std::string ARG_INPUT_KEY_HEX;
+};
+
+class DBLoaderCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "load"; }
+
+  DBLoaderCommand(std::string& db_name, std::vector<std::string>& args);
+
+  DBLoaderCommand(const std::vector<std::string>& params,
+                  const std::map<std::string, std::string>& options,
+                  const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+
+  void DoCommand() override;
+
+  void OverrideBaseOptions() override;
+
+ private:
+  bool disable_wal_;
+  bool bulk_load_;
+  bool compact_;
+
+  static const std::string ARG_DISABLE_WAL;
+  static const std::string ARG_BULK_LOAD;
+  static const std::string ARG_COMPACT;
+};
+
+class ManifestDumpCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "manifest_dump"; }
+
+  ManifestDumpCommand(const std::vector<std::string>& params,
+                      const std::map<std::string, std::string>& options,
+                      const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+
+  void DoCommand() override;
+
+  bool NoDBOpen() override { return true; }
+
+ private:
+  bool verbose_;
+  bool json_;
+  std::string path_;
+
+  static const std::string ARG_VERBOSE;
+  static const std::string ARG_JSON;
+  static const std::string ARG_PATH;
+};
+
+class UpdateManifestCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "update_manifest"; }
+
+  UpdateManifestCommand(const std::vector<std::string>& params,
+                        const std::map<std::string, std::string>& options,
+                        const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+  virtual void DoCommand() override;
+
+  virtual bool NoDBOpen() override { return true; }
+
+ private:
+  bool verbose_;
+  bool update_temperatures_;
+  // TODO future: checksum_func for populating checksums
+
+  static const std::string ARG_VERBOSE;
+  static const std::string ARG_UPDATE_TEMPERATURES;
+};
+
+class FileChecksumDumpCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "file_checksum_dump"; }
+
+  FileChecksumDumpCommand(const std::vector<std::string>& params,
+                          const std::map<std::string, std::string>& options,
+                          const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+  void DoCommand() override;
+
+  bool NoDBOpen() override { return true; }
+
+ private:
+  std::string path_;
+  bool is_checksum_hex_;
+
+  static const std::string ARG_PATH;
+};
+
+class GetPropertyCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "get_property"; }
+
+  GetPropertyCommand(const std::vector<std::string>& params,
+                     const std::map<std::string, std::string>& options,
+                     const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+  void DoCommand() override;
+
+ private:
+  std::string property_;
+};
+
+class ListColumnFamiliesCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "list_column_families"; }
+
+  ListColumnFamiliesCommand(const std::vector<std::string>& params,
+                            const std::map<std::string, std::string>& options,
+                            const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+
+  void DoCommand() override;
+
+  bool NoDBOpen() override { return true; }
+};
+
+class CreateColumnFamilyCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "create_column_family"; }
+
+  CreateColumnFamilyCommand(const std::vector<std::string>& params,
+                            const std::map<std::string, std::string>& options,
+                            const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+
+  void DoCommand() override;
+
+  bool NoDBOpen() override { return false; }
+
+ private:
+  std::string new_cf_name_;
+};
+
+class DropColumnFamilyCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "drop_column_family"; }
+
+  DropColumnFamilyCommand(const std::vector<std::string>& params,
+                          const std::map<std::string, std::string>& options,
+                          const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+
+  void DoCommand() override;
+
+  bool NoDBOpen() override { return false; }
+
+ private:
+  std::string cf_name_to_drop_;
+};
+
+class ReduceDBLevelsCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "reduce_levels"; }
+
+  ReduceDBLevelsCommand(const std::vector<std::string>& params,
+                        const std::map<std::string, std::string>& options,
+                        const std::vector<std::string>& flags);
+
+  void OverrideBaseCFOptions(ColumnFamilyOptions* cf_opts) override;
+
+  void DoCommand() override;
+
+  bool NoDBOpen() override { return true; }
+
+  static void Help(std::string& msg);
+
+  static std::vector<std::string> PrepareArgs(const std::string& db_path,
+                                              int new_levels,
+                                              bool print_old_level = false);
+
+ private:
+  int old_levels_;
+  int new_levels_;
+  bool print_old_levels_;
+
+  static const std::string ARG_NEW_LEVELS;
+  static const std::string ARG_PRINT_OLD_LEVELS;
+
+  Status GetOldNumOfLevels(Options& opt, int* levels);
+};
+
+class ChangeCompactionStyleCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "change_compaction_style"; }
+
+  ChangeCompactionStyleCommand(
+      const std::vector<std::string>& params,
+      const std::map<std::string, std::string>& options,
+      const std::vector<std::string>& flags);
+
+  void OverrideBaseCFOptions(ColumnFamilyOptions* cf_opts) override;
+
+  void DoCommand() override;
+
+  static void Help(std::string& msg);
+
+ private:
+  int old_compaction_style_;
+  int new_compaction_style_;
+
+  static const std::string ARG_OLD_COMPACTION_STYLE;
+  static const std::string ARG_NEW_COMPACTION_STYLE;
+};
+
+class WALDumperCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "dump_wal"; }
+
+  WALDumperCommand(const std::vector<std::string>& params,
+                   const std::map<std::string, std::string>& options,
+                   const std::vector<std::string>& flags);
+
+  bool NoDBOpen() override { return true; }
+
+  static void Help(std::string& ret);
+
+  void DoCommand() override;
+
+ private:
+  bool print_header_;
+  std::string wal_file_;
+  bool print_values_;
+  bool is_write_committed_;  // default will be set to true
+
+  static const std::string ARG_WAL_FILE;
+  static const std::string ARG_WRITE_COMMITTED;
+  static const std::string ARG_PRINT_HEADER;
+  static const std::string ARG_PRINT_VALUE;
+};
+
+class GetCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "get"; }
+
+  GetCommand(const std::vector<std::string>& params,
+             const std::map<std::string, std::string>& options,
+             const std::vector<std::string>& flags);
+
+  void DoCommand() override;
+
+  static void Help(std::string& ret);
+
+ private:
+  std::string key_;
+};
+
+class ApproxSizeCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "approxsize"; }
+
+  ApproxSizeCommand(const std::vector<std::string>& params,
+                    const std::map<std::string, std::string>& options,
+                    const std::vector<std::string>& flags);
+
+  void DoCommand() override;
+
+  static void Help(std::string& ret);
+
+ private:
+  std::string start_key_;
+  std::string end_key_;
+};
+
+class BatchPutCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "batchput"; }
+
+  BatchPutCommand(const std::vector<std::string>& params,
+                  const std::map<std::string, std::string>& options,
+                  const std::vector<std::string>& flags);
+
+  void DoCommand() override;
+
+  static void Help(std::string& ret);
+
+  void OverrideBaseOptions() override;
+
+ private:
+  /**
+   * The key-values to be inserted.
+   */
+  std::vector<std::pair<std::string, std::string>> key_values_;
+};
+
+class ScanCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "scan"; }
+
+  ScanCommand(const std::vector<std::string>& params,
+              const std::map<std::string, std::string>& options,
+              const std::vector<std::string>& flags);
+
+  void DoCommand() override;
+
+  static void Help(std::string& ret);
+
+ private:
+  std::string start_key_;
+  std::string end_key_;
+  bool start_key_specified_;
+  bool end_key_specified_;
+  int max_keys_scanned_;
+  bool no_value_;
+};
+
+class DeleteCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "delete"; }
+
+  DeleteCommand(const std::vector<std::string>& params,
+                const std::map<std::string, std::string>& options,
+                const std::vector<std::string>& flags);
+
+  void DoCommand() override;
+
+  static void Help(std::string& ret);
+
+ private:
+  std::string key_;
+};
+
+class SingleDeleteCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "singledelete"; }
+
+  SingleDeleteCommand(const std::vector<std::string>& params,
+                      const std::map<std::string, std::string>& options,
+                      const std::vector<std::string>& flags);
+
+  void DoCommand() override;
+
+  static void Help(std::string& ret);
+
+ private:
+  std::string key_;
+};
+
+class DeleteRangeCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "deleterange"; }
+
+  DeleteRangeCommand(const std::vector<std::string>& params,
+                     const std::map<std::string, std::string>& options,
+                     const std::vector<std::string>& flags);
+
+  void DoCommand() override;
+
+  static void Help(std::string& ret);
+
+ private:
+  std::string begin_key_;
+  std::string end_key_;
+};
+
+class PutCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "put"; }
+
+  PutCommand(const std::vector<std::string>& params,
+             const std::map<std::string, std::string>& options,
+             const std::vector<std::string>& flags);
+
+  void DoCommand() override;
+
+  static void Help(std::string& ret);
+
+  void OverrideBaseOptions() override;
+
+ private:
+  std::string key_;
+  std::string value_;
+};
+
+/**
+ * Command that starts up a REPL shell that allows
+ * get/put/delete.
+ */
+class DBQuerierCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "query"; }
+
+  DBQuerierCommand(const std::vector<std::string>& params,
+                   const std::map<std::string, std::string>& options,
+                   const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+
+  void DoCommand() override;
+
+ private:
+  static const char* HELP_CMD;
+  static const char* GET_CMD;
+  static const char* PUT_CMD;
+  static const char* DELETE_CMD;
+};
+
+class CheckConsistencyCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "checkconsistency"; }
+
+  CheckConsistencyCommand(const std::vector<std::string>& params,
+                          const std::map<std::string, std::string>& options,
+                          const std::vector<std::string>& flags);
+
+  void DoCommand() override;
+
+  bool NoDBOpen() override { return true; }
+
+  static void Help(std::string& ret);
+};
+
+class CheckPointCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "checkpoint"; }
+
+  CheckPointCommand(const std::vector<std::string>& params,
+                    const std::map<std::string, std::string>& options,
+                    const std::vector<std::string>& flags);
+
+  void DoCommand() override;
+
+  static void Help(std::string& ret);
+
+  std::string checkpoint_dir_;
+
+ private:
+  static const std::string ARG_CHECKPOINT_DIR;
+};
+
+class RepairCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "repair"; }
+
+  RepairCommand(const std::vector<std::string>& params,
+                const std::map<std::string, std::string>& options,
+                const std::vector<std::string>& flags);
+
+  void DoCommand() override;
+
+  bool NoDBOpen() override { return true; }
+
+  void OverrideBaseOptions() override;
+
+  static void Help(std::string& ret);
+
+ protected:
+  bool verbose_;
+
+ private:
+  static const std::string ARG_VERBOSE;
+};
+
+class BackupEngineCommand : public LDBCommand {
+ public:
+  BackupEngineCommand(const std::vector<std::string>& params,
+                      const std::map<std::string, std::string>& options,
+                      const std::vector<std::string>& flags);
+
+ protected:
+  static void Help(const std::string& name, std::string& ret);
+  std::string backup_env_uri_;
+  std::string backup_fs_uri_;
+  std::string backup_dir_;
+  int num_threads_;
+  std::unique_ptr<Logger> logger_;
+  std::shared_ptr<Env> backup_env_guard_;
+
+ private:
+  static const std::string ARG_BACKUP_DIR;
+  static const std::string ARG_BACKUP_ENV_URI;
+  static const std::string ARG_BACKUP_FS_URI;
+  static const std::string ARG_NUM_THREADS;
+  static const std::string ARG_STDERR_LOG_LEVEL;
+};
+
+class BackupCommand : public BackupEngineCommand {
+ public:
+  static std::string Name() { return "backup"; }
+  BackupCommand(const std::vector<std::string>& params,
+                const std::map<std::string, std::string>& options,
+                const std::vector<std::string>& flags);
+  void DoCommand() override;
+  static void Help(std::string& ret);
+};
+
+class RestoreCommand : public BackupEngineCommand {
+ public:
+  static std::string Name() { return "restore"; }
+  RestoreCommand(const std::vector<std::string>& params,
+                 const std::map<std::string, std::string>& options,
+                 const std::vector<std::string>& flags);
+  void DoCommand() override;
+  bool NoDBOpen() override { return true; }
+  static void Help(std::string& ret);
+};
+
+class WriteExternalSstFilesCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "write_extern_sst"; }
+  WriteExternalSstFilesCommand(
+      const std::vector<std::string>& params,
+      const std::map<std::string, std::string>& options,
+      const std::vector<std::string>& flags);
+
+  void DoCommand() override;
+
+  bool NoDBOpen() override { return false; }
+
+  void OverrideBaseOptions() override;
+
+  static void Help(std::string& ret);
+
+ private:
+  std::string output_sst_path_;
+};
+
+class IngestExternalSstFilesCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "ingest_extern_sst"; }
+  IngestExternalSstFilesCommand(
+      const std::vector<std::string>& params,
+      const std::map<std::string, std::string>& options,
+      const std::vector<std::string>& flags);
+
+  void DoCommand() override;
+
+  bool NoDBOpen() override { return false; }
+
+  void OverrideBaseOptions() override;
+
+  static void Help(std::string& ret);
+
+ private:
+  std::string input_sst_path_;
+  bool move_files_;
+  bool snapshot_consistency_;
+  bool allow_global_seqno_;
+  bool allow_blocking_flush_;
+  bool ingest_behind_;
+  bool write_global_seqno_;
+
+  static const std::string ARG_MOVE_FILES;
+  static const std::string ARG_SNAPSHOT_CONSISTENCY;
+  static const std::string ARG_ALLOW_GLOBAL_SEQNO;
+  static const std::string ARG_ALLOW_BLOCKING_FLUSH;
+  static const std::string ARG_INGEST_BEHIND;
+  static const std::string ARG_WRITE_GLOBAL_SEQNO;
+};
+
+// Command that prints out range delete tombstones in SST files.
+class ListFileRangeDeletesCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "list_file_range_deletes"; }
+
+  ListFileRangeDeletesCommand(const std::map<std::string, std::string>& options,
+                              const std::vector<std::string>& flags);
+
+  void DoCommand() override;
+
+  static void Help(std::string& ret);
+
+ private:
+  int max_keys_ = 1000;
+};
+
+// Command that removes the SST file forcibly from the manifest.
+class UnsafeRemoveSstFileCommand : public LDBCommand {
+ public:
+  static std::string Name() { return "unsafe_remove_sst_file"; }
+
+  UnsafeRemoveSstFileCommand(const std::vector<std::string>& params,
+                             const std::map<std::string, std::string>& options,
+                             const std::vector<std::string>& flags);
+
+  static void Help(std::string& ret);
+
+  void DoCommand() override;
+
+  bool NoDBOpen() override { return true; }
+
+ private:
+  uint64_t sst_file_number_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/simulated_hybrid_file_system.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/simulated_hybrid_file_system.h
new file mode 100644
index 0000000000..44b37eadeb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/simulated_hybrid_file_system.h
@@ -0,0 +1,124 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <utility>
+
+#include "rocksdb/file_system.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A FileSystem simulates hybrid file system by ingesting latency and limit
+// IOPs.
+// This class is only used for development purpose and should not be used
+// in production.
+// Right now we ingest 15ms latency and allow 100 requests per second when
+// the file is for warm temperature.
+// When the object is destroyed, the list of warm files are written to a
+// file, which can be used to reopen a FileSystem and still recover the
+// list. This is to allow the information to preserve between db_bench
+// runs.
+class SimulatedHybridFileSystem : public FileSystemWrapper {
+ public:
+  // metadata_file_name stores metadata of the files, so that it can be
+  // loaded after process restarts. If the file doesn't exist, create
+  // one. The file is written when the class is destroyed.
+  // throughput_multiplier: multiplier of throughput. For example, 1 is to
+  //      simulate single disk spindle. 4 is to simualte 4 disk spindles.
+  // is_full_fs_warm: if true, all files are all included in slow I/O
+  // simulation.
+  SimulatedHybridFileSystem(const std::shared_ptr<FileSystem>& base,
+                            const std::string& metadata_file_name,
+                            int throughput_multiplier, bool is_full_fs_warm);
+
+  ~SimulatedHybridFileSystem() override;
+
+ public:
+  IOStatus NewRandomAccessFile(const std::string& fname,
+                               const FileOptions& file_opts,
+                               std::unique_ptr<FSRandomAccessFile>* result,
+                               IODebugContext* dbg) override;
+  IOStatus NewWritableFile(const std::string& fname,
+                           const FileOptions& file_opts,
+                           std::unique_ptr<FSWritableFile>* result,
+                           IODebugContext* dbg) override;
+  IOStatus DeleteFile(const std::string& fname, const IOOptions& options,
+                      IODebugContext* dbg) override;
+
+  const char* Name() const override { return name_.c_str(); }
+
+ private:
+  // Limit 100 requests per second. Rate limiter is designed to byte but
+  // we use it as fixed bytes is one request.
+  std::shared_ptr<RateLimiter> rate_limiter_;
+  std::mutex mutex_;
+  std::unordered_set<std::string> warm_file_set_;
+  std::string metadata_file_name_;
+  std::string name_;
+  bool is_full_fs_warm_;
+};
+
+// Simulated random access file that can control IOPs and latency to simulate
+// specific storage media
+class SimulatedHybridRaf : public FSRandomAccessFileOwnerWrapper {
+ public:
+  SimulatedHybridRaf(std::unique_ptr<FSRandomAccessFile>&& t,
+                     std::shared_ptr<RateLimiter> rate_limiter,
+                     Temperature temperature)
+      : FSRandomAccessFileOwnerWrapper(std::move(t)),
+        rate_limiter_(rate_limiter),
+        temperature_(temperature) {}
+
+  ~SimulatedHybridRaf() override {}
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override;
+
+  IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
+                     const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& options,
+                    IODebugContext* dbg) override;
+
+ private:
+  std::shared_ptr<RateLimiter> rate_limiter_;
+  Temperature temperature_;
+
+  void SimulateIOWait(int64_t num_requests) const;
+};
+
+class SimulatedWritableFile : public FSWritableFileWrapper {
+ public:
+  SimulatedWritableFile(std::unique_ptr<FSWritableFile>&& t,
+                        std::shared_ptr<RateLimiter> rate_limiter)
+      : FSWritableFileWrapper(t.get()),
+        file_guard_(std::move(t)),
+        rate_limiter_(rate_limiter) {}
+  IOStatus Append(const Slice& data, const IOOptions&,
+                  IODebugContext*) override;
+  IOStatus Append(const Slice& data, const IOOptions& options,
+                  const DataVerificationInfo& verification_info,
+                  IODebugContext* dbg) override;
+  IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
+  IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                            const IOOptions& options,
+                            IODebugContext* dbg) override;
+  IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                            const IOOptions& options,
+                            const DataVerificationInfo& verification_info,
+                            IODebugContext* dbg) override;
+
+ private:
+  std::unique_ptr<FSWritableFile> file_guard_;
+  std::shared_ptr<RateLimiter> rate_limiter_;
+  size_t unsynced_bytes = 0;
+
+  void SimulateIOWait(int64_t num_requests) const;
+};
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/trace_analyzer_tool.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/trace_analyzer_tool.h
new file mode 100644
index 0000000000..2c3042bdc5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/tools/trace_analyzer_tool.h
@@ -0,0 +1,329 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <list>
+#include <map>
+#include <queue>
+#include <set>
+#include <utility>
+#include <vector>
+
+#include "rocksdb/env.h"
+#include "rocksdb/trace_reader_writer.h"
+#include "rocksdb/trace_record.h"
+#include "rocksdb/write_batch.h"
+#include "trace_replay/trace_replay.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Value sizes may be used as denominators. Replacing 0 value sizes with this
+// positive integer avoids division error.
+extern const size_t kShadowValueSize /* = 10*/;
+
+enum TraceOperationType : int {
+  kGet = 0,
+  kPut = 1,
+  kDelete = 2,
+  kSingleDelete = 3,
+  kRangeDelete = 4,
+  kMerge = 5,
+  kIteratorSeek = 6,
+  kIteratorSeekForPrev = 7,
+  kMultiGet = 8,
+  kPutEntity = 9,
+  kTaTypeNum = 10
+};
+
+struct TraceUnit {
+  uint64_t ts;
+  uint32_t type;
+  uint32_t cf_id;
+  size_t value_size;
+  std::string key;
+};
+
+struct TypeCorrelation {
+  uint64_t count;
+  uint64_t total_ts;
+};
+
+struct StatsUnit {
+  uint64_t key_id;
+  uint64_t access_count;
+  uint64_t latest_ts;
+  uint64_t succ_count;  // current only used to count Get if key found
+  uint32_t cf_id;
+  size_t value_size;
+  std::vector<TypeCorrelation> v_correlation;
+};
+
+class AnalyzerOptions {
+ public:
+  std::vector<std::vector<int>> correlation_map;
+  std::vector<std::pair<int, int>> correlation_list;
+
+  AnalyzerOptions();
+
+  ~AnalyzerOptions();
+
+  void SparseCorrelationInput(const std::string& in_str);
+};
+
+// Note that, for the variable names  in the trace_analyzer,
+// Starting with 'a_' means the variable is used for 'accessed_keys'.
+// Starting with 'w_' means it is used for 'the whole key space'.
+// Ending with '_f' means a file write or reader pointer.
+// For example, 'a_count' means 'accessed_keys_count',
+// 'w_key_f' means 'whole_key_space_file'.
+
+struct TraceStats {
+  uint32_t cf_id;
+  std::string cf_name;
+  uint64_t a_count;
+  uint64_t a_succ_count;
+  uint64_t a_key_id;
+  uint64_t a_key_size_sqsum;
+  uint64_t a_key_size_sum;
+  uint64_t a_key_mid;
+  uint64_t a_value_size_sqsum;
+  uint64_t a_value_size_sum;
+  uint64_t a_value_mid;
+  uint32_t a_peak_qps;
+  double a_ave_qps;
+  std::map<std::string, StatsUnit> a_key_stats;
+  std::map<uint64_t, uint64_t> a_count_stats;
+  std::map<uint64_t, uint64_t> a_key_size_stats;
+  std::map<uint64_t, uint64_t> a_value_size_stats;
+  std::map<uint32_t, uint32_t> a_qps_stats;
+  std::map<uint32_t, std::map<std::string, uint32_t>> a_qps_prefix_stats;
+  std::priority_queue<std::pair<uint64_t, std::string>,
+                      std::vector<std::pair<uint64_t, std::string>>,
+                      std::greater<std::pair<uint64_t, std::string>>>
+      top_k_queue;
+  std::priority_queue<std::pair<uint64_t, std::string>,
+                      std::vector<std::pair<uint64_t, std::string>>,
+                      std::greater<std::pair<uint64_t, std::string>>>
+      top_k_prefix_access;
+  std::priority_queue<std::pair<double, std::string>,
+                      std::vector<std::pair<double, std::string>>,
+                      std::greater<std::pair<double, std::string>>>
+      top_k_prefix_ave;
+  std::priority_queue<std::pair<uint32_t, uint32_t>,
+                      std::vector<std::pair<uint32_t, uint32_t>>,
+                      std::greater<std::pair<uint32_t, uint32_t>>>
+      top_k_qps_sec;
+  std::list<TraceUnit> time_series;
+  std::vector<std::pair<uint64_t, uint64_t>> correlation_output;
+  std::map<uint32_t, uint64_t> uni_key_num;
+
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> time_series_f;
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_key_f;
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_count_dist_f;
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_prefix_cut_f;
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_value_size_f;
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_key_size_f;
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_key_num_f;
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_qps_f;
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_top_qps_prefix_f;
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> w_key_f;
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> w_prefix_cut_f;
+
+  TraceStats();
+  ~TraceStats();
+  TraceStats(const TraceStats&) = delete;
+  TraceStats& operator=(const TraceStats&) = delete;
+  TraceStats(TraceStats&&) = default;
+  TraceStats& operator=(TraceStats&&) = default;
+};
+
+struct TypeUnit {
+  std::string type_name;
+  bool enabled;
+  uint64_t total_keys;
+  uint64_t total_access;
+  uint64_t total_succ_access;
+  uint32_t sample_count;
+  std::map<uint32_t, TraceStats> stats;
+  TypeUnit() = default;
+  ~TypeUnit() = default;
+  TypeUnit(const TypeUnit&) = delete;
+  TypeUnit& operator=(const TypeUnit&) = delete;
+  TypeUnit(TypeUnit&&) = default;
+  TypeUnit& operator=(TypeUnit&&) = default;
+};
+
+struct CfUnit {
+  uint32_t cf_id;
+  uint64_t w_count;  // total keys in this cf if we use the whole key space
+  uint64_t a_count;  // the total keys in this cf that are accessed
+  std::map<uint64_t, uint64_t> w_key_size_stats;  // whole key space key size
+                                                  // statistic this cf
+  std::map<uint32_t, uint32_t> cf_qps;
+};
+
+class TraceAnalyzer : private TraceRecord::Handler,
+                      private WriteBatch::Handler {
+ public:
+  TraceAnalyzer(std::string& trace_path, std::string& output_path,
+                AnalyzerOptions _analyzer_opts);
+  ~TraceAnalyzer();
+
+  Status PrepareProcessing();
+
+  Status StartProcessing();
+
+  Status MakeStatistics();
+
+  Status ReProcessing();
+
+  Status EndProcessing();
+
+  Status WriteTraceUnit(TraceUnit& unit);
+
+  std::vector<TypeUnit>& GetTaVector() { return ta_; }
+
+ private:
+  using TraceRecord::Handler::Handle;
+  Status Handle(const WriteQueryTraceRecord& record,
+                std::unique_ptr<TraceRecordResult>* result) override;
+  Status Handle(const GetQueryTraceRecord& record,
+                std::unique_ptr<TraceRecordResult>* result) override;
+  Status Handle(const IteratorSeekQueryTraceRecord& record,
+                std::unique_ptr<TraceRecordResult>* result) override;
+  Status Handle(const MultiGetQueryTraceRecord& record,
+                std::unique_ptr<TraceRecordResult>* result) override;
+
+  using WriteBatch::Handler::PutCF;
+  Status PutCF(uint32_t column_family_id, const Slice& key,
+               const Slice& value) override;
+
+  using WriteBatch::Handler::PutEntityCF;
+  Status PutEntityCF(uint32_t column_family_id, const Slice& key,
+                     const Slice& value) override;
+
+  using WriteBatch::Handler::DeleteCF;
+  Status DeleteCF(uint32_t column_family_id, const Slice& key) override;
+
+  using WriteBatch::Handler::SingleDeleteCF;
+  Status SingleDeleteCF(uint32_t column_family_id, const Slice& key) override;
+
+  using WriteBatch::Handler::DeleteRangeCF;
+  Status DeleteRangeCF(uint32_t column_family_id, const Slice& begin_key,
+                       const Slice& end_key) override;
+
+  using WriteBatch::Handler::MergeCF;
+  Status MergeCF(uint32_t column_family_id, const Slice& key,
+                 const Slice& value) override;
+
+  // The following hanlders are not implemented, return Status::OK() to avoid
+  // the running time assertion and other irrelevant falures.
+  using WriteBatch::Handler::PutBlobIndexCF;
+  Status PutBlobIndexCF(uint32_t /*column_family_id*/, const Slice& /*key*/,
+                        const Slice& /*value*/) override {
+    return Status::OK();
+  }
+
+  // The default implementation of LogData does nothing.
+  using WriteBatch::Handler::LogData;
+  void LogData(const Slice& /*blob*/) override {}
+
+  using WriteBatch::Handler::MarkBeginPrepare;
+  Status MarkBeginPrepare(bool = false) override { return Status::OK(); }
+
+  using WriteBatch::Handler::MarkEndPrepare;
+  Status MarkEndPrepare(const Slice& /*xid*/) override { return Status::OK(); }
+
+  using WriteBatch::Handler::MarkNoop;
+  Status MarkNoop(bool /*empty_batch*/) override { return Status::OK(); }
+
+  using WriteBatch::Handler::MarkRollback;
+  Status MarkRollback(const Slice& /*xid*/) override { return Status::OK(); }
+
+  using WriteBatch::Handler::MarkCommit;
+  Status MarkCommit(const Slice& /*xid*/) override { return Status::OK(); }
+
+  using WriteBatch::Handler::MarkCommitWithTimestamp;
+  Status MarkCommitWithTimestamp(const Slice& /*xid*/,
+                                 const Slice& /*commit_ts*/) override {
+    return Status::OK();
+  }
+
+  // Process each trace operation and output the analysis result to
+  // stdout/files.
+  Status OutputAnalysisResult(TraceOperationType op_type, uint64_t timestamp,
+                              std::vector<uint32_t> cf_ids,
+                              std::vector<Slice> keys,
+                              std::vector<size_t> value_sizes);
+
+  Status OutputAnalysisResult(TraceOperationType op_type, uint64_t timestamp,
+                              uint32_t cf_id, const Slice& key,
+                              size_t value_size);
+
+  ROCKSDB_NAMESPACE::Env* env_;
+  EnvOptions env_options_;
+  std::unique_ptr<TraceReader> trace_reader_;
+  size_t offset_;
+  char buffer_[1024];
+  // Timestamp of a WriteBatch, used in its iteration.
+  uint64_t write_batch_ts_;
+  std::string trace_name_;
+  std::string output_path_;
+  AnalyzerOptions analyzer_opts_;
+  uint64_t total_requests_;
+  uint64_t total_access_keys_;
+  uint64_t total_gets_;
+  uint64_t total_writes_;
+  uint64_t total_seeks_;
+  uint64_t total_seek_prevs_;
+  uint64_t total_multigets_;
+  uint64_t trace_create_time_;
+  uint64_t begin_time_;
+  uint64_t end_time_;
+  uint64_t time_series_start_;
+  uint32_t sample_max_;
+  uint32_t cur_time_sec_;
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>
+      trace_sequence_f_;                                    // readable trace
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> qps_f_;  // overall qps
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>
+      cf_qps_f_;              // The qps of each CF>
+  std::vector<TypeUnit> ta_;  // The main statistic collecting data structure
+  std::map<uint32_t, CfUnit> cfs_;  // All the cf_id appears in this trace;
+  std::vector<uint32_t> qps_peak_;
+  std::vector<double> qps_ave_;
+
+  Status ReadTraceHeader(Trace* header);
+  Status ReadTraceFooter(Trace* footer);
+  Status ReadTraceRecord(Trace* trace);
+  Status KeyStatsInsertion(const uint32_t& type, const uint32_t& cf_id,
+                           const std::string& key, const size_t value_size,
+                           const uint64_t ts);
+  Status StatsUnitCorrelationUpdate(StatsUnit& unit, const uint32_t& type,
+                                    const uint64_t& ts, const std::string& key);
+  Status OpenStatsOutputFiles(const std::string& type, TraceStats& new_stats);
+  Status CreateOutputFile(
+      const std::string& type, const std::string& cf_name,
+      const std::string& ending,
+      std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>* f_ptr);
+  Status CloseOutputFiles();
+
+  void PrintStatistics();
+  Status TraceUnitWriter(
+      std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>& f_ptr, TraceUnit& unit);
+  Status WriteTraceSequence(const uint32_t& type, const uint32_t& cf_id,
+                            const Slice& key, const size_t value_size,
+                            const uint64_t ts);
+  Status MakeStatisticKeyStatsOrPrefix(TraceStats& stats);
+  Status MakeStatisticCorrelation(TraceStats& stats, StatsUnit& unit);
+  Status MakeStatisticQPS();
+  int db_version_;
+};
+
+int trace_analyzer_tool(int argc, char** argv);
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/block_cache_tracer.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/block_cache_tracer.cc
new file mode 100644
index 0000000000..6a3442c5b3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/block_cache_tracer.cc
@@ -0,0 +1,509 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "trace_replay/block_cache_tracer.h"
+
+#include <cinttypes>
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+
+#include "db/db_impl/db_impl.h"
+#include "db/dbformat.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/trace_record.h"
+#include "util/coding.h"
+#include "util/hash.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+bool ShouldTrace(const Slice& block_key,
+                 const BlockCacheTraceOptions& trace_options) {
+  if (trace_options.sampling_frequency == 0 ||
+      trace_options.sampling_frequency == 1) {
+    return true;
+  }
+  // We use spatial downsampling so that we have a complete access history for a
+  // block.
+  return 0 == GetSliceRangedNPHash(block_key, trace_options.sampling_frequency);
+}
+}  // namespace
+
+const uint64_t kMicrosInSecond = 1000 * 1000;
+const uint64_t kSecondInMinute = 60;
+const uint64_t kSecondInHour = 3600;
+const std::string BlockCacheTraceHelper::kUnknownColumnFamilyName =
+    "UnknownColumnFamily";
+const uint64_t BlockCacheTraceRecord::kReservedGetId = 0;
+const uint64_t BlockCacheTraceHelper::kReservedGetId = 0;
+
+bool BlockCacheTraceHelper::IsGetOrMultiGetOnDataBlock(
+    TraceType block_type, TableReaderCaller caller) {
+  return (block_type == TraceType::kBlockTraceDataBlock) &&
+         IsGetOrMultiGet(caller);
+}
+
+bool BlockCacheTraceHelper::IsGetOrMultiGet(TableReaderCaller caller) {
+  return caller == TableReaderCaller::kUserGet ||
+         caller == TableReaderCaller::kUserMultiGet;
+}
+
+bool BlockCacheTraceHelper::IsUserAccess(TableReaderCaller caller) {
+  return caller == TableReaderCaller::kUserGet ||
+         caller == TableReaderCaller::kUserMultiGet ||
+         caller == TableReaderCaller::kUserIterator ||
+         caller == TableReaderCaller::kUserApproximateSize ||
+         caller == TableReaderCaller::kUserVerifyChecksum;
+}
+
+std::string BlockCacheTraceHelper::ComputeRowKey(
+    const BlockCacheTraceRecord& access) {
+  if (!IsGetOrMultiGet(access.caller)) {
+    return "";
+  }
+  Slice key = ExtractUserKey(access.referenced_key);
+  return std::to_string(access.sst_fd_number) + "_" + key.ToString();
+}
+
+uint64_t BlockCacheTraceHelper::GetTableId(
+    const BlockCacheTraceRecord& access) {
+  if (!IsGetOrMultiGet(access.caller) || access.referenced_key.size() < 4) {
+    return 0;
+  }
+  return static_cast<uint64_t>(DecodeFixed32(access.referenced_key.data())) + 1;
+}
+
+uint64_t BlockCacheTraceHelper::GetSequenceNumber(
+    const BlockCacheTraceRecord& access) {
+  if (!IsGetOrMultiGet(access.caller)) {
+    return 0;
+  }
+  if (access.caller == TableReaderCaller::kUserMultiGet &&
+      access.referenced_key.size() < 4) {
+    return 0;
+  }
+  return access.get_from_user_specified_snapshot
+             ? 1 + GetInternalKeySeqno(access.referenced_key)
+             : 0;
+}
+
+uint64_t BlockCacheTraceHelper::GetBlockOffsetInFile(
+    const BlockCacheTraceRecord& access) {
+  Slice input(access.block_key);
+  uint64_t offset = 0;
+  while (true) {
+    uint64_t tmp = 0;
+    if (GetVarint64(&input, &tmp)) {
+      offset = tmp;
+    } else {
+      break;
+    }
+  }
+  return offset;
+}
+
+BlockCacheTraceWriterImpl::BlockCacheTraceWriterImpl(
+    SystemClock* clock, const BlockCacheTraceWriterOptions& trace_options,
+    std::unique_ptr<TraceWriter>&& trace_writer)
+    : clock_(clock),
+      trace_options_(trace_options),
+      trace_writer_(std::move(trace_writer)) {}
+
+Status BlockCacheTraceWriterImpl::WriteBlockAccess(
+    const BlockCacheTraceRecord& record, const Slice& block_key,
+    const Slice& cf_name, const Slice& referenced_key) {
+  uint64_t trace_file_size = trace_writer_->GetFileSize();
+  if (trace_file_size > trace_options_.max_trace_file_size) {
+    return Status::OK();
+  }
+  Trace trace;
+  trace.ts = record.access_timestamp;
+  trace.type = record.block_type;
+  PutLengthPrefixedSlice(&trace.payload, block_key);
+  PutFixed64(&trace.payload, record.block_size);
+  PutFixed64(&trace.payload, record.cf_id);
+  PutLengthPrefixedSlice(&trace.payload, cf_name);
+  PutFixed32(&trace.payload, record.level);
+  PutFixed64(&trace.payload, record.sst_fd_number);
+  trace.payload.push_back(record.caller);
+  trace.payload.push_back(record.is_cache_hit);
+  trace.payload.push_back(record.no_insert);
+  if (BlockCacheTraceHelper::IsGetOrMultiGet(record.caller)) {
+    PutFixed64(&trace.payload, record.get_id);
+    trace.payload.push_back(record.get_from_user_specified_snapshot);
+    PutLengthPrefixedSlice(&trace.payload, referenced_key);
+  }
+  if (BlockCacheTraceHelper::IsGetOrMultiGetOnDataBlock(record.block_type,
+                                                        record.caller)) {
+    PutFixed64(&trace.payload, record.referenced_data_size);
+    PutFixed64(&trace.payload, record.num_keys_in_block);
+    trace.payload.push_back(record.referenced_key_exist_in_block);
+  }
+  std::string encoded_trace;
+  TracerHelper::EncodeTrace(trace, &encoded_trace);
+  return trace_writer_->Write(encoded_trace);
+}
+
+Status BlockCacheTraceWriterImpl::WriteHeader() {
+  Trace trace;
+  trace.ts = clock_->NowMicros();
+  trace.type = TraceType::kTraceBegin;
+  PutLengthPrefixedSlice(&trace.payload, kTraceMagic);
+  PutFixed32(&trace.payload, kMajorVersion);
+  PutFixed32(&trace.payload, kMinorVersion);
+  std::string encoded_trace;
+  TracerHelper::EncodeTrace(trace, &encoded_trace);
+  return trace_writer_->Write(encoded_trace);
+}
+
+BlockCacheTraceReader::BlockCacheTraceReader(
+    std::unique_ptr<TraceReader>&& reader)
+    : trace_reader_(std::move(reader)) {}
+
+Status BlockCacheTraceReader::ReadHeader(BlockCacheTraceHeader* header) {
+  assert(header != nullptr);
+  std::string encoded_trace;
+  Status s = trace_reader_->Read(&encoded_trace);
+  if (!s.ok()) {
+    return s;
+  }
+  Trace trace;
+  s = TracerHelper::DecodeTrace(encoded_trace, &trace);
+  if (!s.ok()) {
+    return s;
+  }
+  header->start_time = trace.ts;
+  Slice enc_slice = Slice(trace.payload);
+  Slice magnic_number;
+  if (!GetLengthPrefixedSlice(&enc_slice, &magnic_number)) {
+    return Status::Corruption(
+        "Corrupted header in the trace file: Failed to read the magic number.");
+  }
+  if (magnic_number.ToString() != kTraceMagic) {
+    return Status::Corruption(
+        "Corrupted header in the trace file: Magic number does not match.");
+  }
+  if (!GetFixed32(&enc_slice, &header->rocksdb_major_version)) {
+    return Status::Corruption(
+        "Corrupted header in the trace file: Failed to read rocksdb major "
+        "version number.");
+  }
+  if (!GetFixed32(&enc_slice, &header->rocksdb_minor_version)) {
+    return Status::Corruption(
+        "Corrupted header in the trace file: Failed to read rocksdb minor "
+        "version number.");
+  }
+  // We should have retrieved all information in the header.
+  if (!enc_slice.empty()) {
+    return Status::Corruption(
+        "Corrupted header in the trace file: The length of header is too "
+        "long.");
+  }
+  return Status::OK();
+}
+
+Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
+  assert(record);
+  std::string encoded_trace;
+  Status s = trace_reader_->Read(&encoded_trace);
+  if (!s.ok()) {
+    return s;
+  }
+  Trace trace;
+  s = TracerHelper::DecodeTrace(encoded_trace, &trace);
+  if (!s.ok()) {
+    return s;
+  }
+  record->access_timestamp = trace.ts;
+  record->block_type = trace.type;
+  Slice enc_slice = Slice(trace.payload);
+
+  const unsigned int kCharSize = 1;
+
+  Slice block_key;
+  if (!GetLengthPrefixedSlice(&enc_slice, &block_key)) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read block key.");
+  }
+  record->block_key = block_key.ToString();
+  if (!GetFixed64(&enc_slice, &record->block_size)) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read block size.");
+  }
+  if (!GetFixed64(&enc_slice, &record->cf_id)) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read column family ID.");
+  }
+  Slice cf_name;
+  if (!GetLengthPrefixedSlice(&enc_slice, &cf_name)) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read column family name.");
+  }
+  record->cf_name = cf_name.ToString();
+  if (!GetFixed32(&enc_slice, &record->level)) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read level.");
+  }
+  if (!GetFixed64(&enc_slice, &record->sst_fd_number)) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read SST file number.");
+  }
+  if (enc_slice.empty()) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read caller.");
+  }
+  record->caller = static_cast<TableReaderCaller>(enc_slice[0]);
+  enc_slice.remove_prefix(kCharSize);
+  if (enc_slice.empty()) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read is_cache_hit.");
+  }
+  record->is_cache_hit = static_cast<char>(enc_slice[0]);
+  enc_slice.remove_prefix(kCharSize);
+  if (enc_slice.empty()) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read no_insert.");
+  }
+  record->no_insert = static_cast<char>(enc_slice[0]);
+  enc_slice.remove_prefix(kCharSize);
+  if (BlockCacheTraceHelper::IsGetOrMultiGet(record->caller)) {
+    if (!GetFixed64(&enc_slice, &record->get_id)) {
+      return Status::Incomplete(
+          "Incomplete access record: Failed to read the get id.");
+    }
+    if (enc_slice.empty()) {
+      return Status::Incomplete(
+          "Incomplete access record: Failed to read "
+          "get_from_user_specified_snapshot.");
+    }
+    record->get_from_user_specified_snapshot = static_cast<char>(enc_slice[0]);
+    enc_slice.remove_prefix(kCharSize);
+    Slice referenced_key;
+    if (!GetLengthPrefixedSlice(&enc_slice, &referenced_key)) {
+      return Status::Incomplete(
+          "Incomplete access record: Failed to read the referenced key.");
+    }
+    record->referenced_key = referenced_key.ToString();
+  }
+  if (BlockCacheTraceHelper::IsGetOrMultiGetOnDataBlock(record->block_type,
+                                                        record->caller)) {
+    if (!GetFixed64(&enc_slice, &record->referenced_data_size)) {
+      return Status::Incomplete(
+          "Incomplete access record: Failed to read the referenced data size.");
+    }
+    if (!GetFixed64(&enc_slice, &record->num_keys_in_block)) {
+      return Status::Incomplete(
+          "Incomplete access record: Failed to read the number of keys in the "
+          "block.");
+    }
+    if (enc_slice.empty()) {
+      return Status::Incomplete(
+          "Incomplete access record: Failed to read "
+          "referenced_key_exist_in_block.");
+    }
+    record->referenced_key_exist_in_block = static_cast<char>(enc_slice[0]);
+  }
+  return Status::OK();
+}
+
+BlockCacheHumanReadableTraceWriter::~BlockCacheHumanReadableTraceWriter() {
+  if (human_readable_trace_file_writer_) {
+    human_readable_trace_file_writer_->Flush().PermitUncheckedError();
+    human_readable_trace_file_writer_->Close().PermitUncheckedError();
+  }
+}
+
+Status BlockCacheHumanReadableTraceWriter::NewWritableFile(
+    const std::string& human_readable_trace_file_path,
+    ROCKSDB_NAMESPACE::Env* env) {
+  if (human_readable_trace_file_path.empty()) {
+    return Status::InvalidArgument(
+        "The provided human_readable_trace_file_path is null.");
+  }
+  return env->NewWritableFile(human_readable_trace_file_path,
+                              &human_readable_trace_file_writer_, EnvOptions());
+}
+
+Status BlockCacheHumanReadableTraceWriter::WriteHumanReadableTraceRecord(
+    const BlockCacheTraceRecord& access, uint64_t block_id,
+    uint64_t get_key_id) {
+  if (!human_readable_trace_file_writer_) {
+    return Status::OK();
+  }
+  int ret = snprintf(
+      trace_record_buffer_, sizeof(trace_record_buffer_),
+      "%" PRIu64 ",%" PRIu64 ",%u,%" PRIu64 ",%" PRIu64 ",%s,%" PRIu32
+      ",%" PRIu64 ",%u,%u,%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%u,%u,%" PRIu64
+      ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 "\n",
+      access.access_timestamp, block_id, access.block_type, access.block_size,
+      access.cf_id, access.cf_name.c_str(), access.level, access.sst_fd_number,
+      access.caller, access.no_insert, access.get_id, get_key_id,
+      access.referenced_data_size, access.is_cache_hit,
+      access.referenced_key_exist_in_block, access.num_keys_in_block,
+      BlockCacheTraceHelper::GetTableId(access),
+      BlockCacheTraceHelper::GetSequenceNumber(access),
+      static_cast<uint64_t>(access.block_key.size()),
+      static_cast<uint64_t>(access.referenced_key.size()),
+      BlockCacheTraceHelper::GetBlockOffsetInFile(access));
+  if (ret < 0) {
+    return Status::IOError("failed to format the output");
+  }
+  std::string printout(trace_record_buffer_);
+  return human_readable_trace_file_writer_->Append(printout);
+}
+
+BlockCacheHumanReadableTraceReader::BlockCacheHumanReadableTraceReader(
+    const std::string& trace_file_path)
+    : BlockCacheTraceReader(/*trace_reader=*/nullptr) {
+  human_readable_trace_reader_.open(trace_file_path, std::ifstream::in);
+}
+
+BlockCacheHumanReadableTraceReader::~BlockCacheHumanReadableTraceReader() {
+  human_readable_trace_reader_.close();
+}
+
+Status BlockCacheHumanReadableTraceReader::ReadHeader(
+    BlockCacheTraceHeader* /*header*/) {
+  return Status::OK();
+}
+
+Status BlockCacheHumanReadableTraceReader::ReadAccess(
+    BlockCacheTraceRecord* record) {
+  std::string line;
+  if (!std::getline(human_readable_trace_reader_, line)) {
+    return Status::Incomplete("No more records to read.");
+  }
+  std::stringstream ss(line);
+  std::vector<std::string> record_strs;
+  while (ss.good()) {
+    std::string substr;
+    getline(ss, substr, ',');
+    record_strs.push_back(substr);
+  }
+  if (record_strs.size() != 21) {
+    return Status::Incomplete("Records format is wrong.");
+  }
+
+  record->access_timestamp = ParseUint64(record_strs[0]);
+  uint64_t block_key = ParseUint64(record_strs[1]);
+  record->block_type = static_cast<TraceType>(ParseUint64(record_strs[2]));
+  record->block_size = ParseUint64(record_strs[3]);
+  record->cf_id = ParseUint64(record_strs[4]);
+  record->cf_name = record_strs[5];
+  record->level = static_cast<uint32_t>(ParseUint64(record_strs[6]));
+  record->sst_fd_number = ParseUint64(record_strs[7]);
+  record->caller = static_cast<TableReaderCaller>(ParseUint64(record_strs[8]));
+  record->no_insert = static_cast<char>(ParseUint64(record_strs[9]));
+  record->get_id = ParseUint64(record_strs[10]);
+  uint64_t get_key_id = ParseUint64(record_strs[11]);
+
+  record->referenced_data_size = ParseUint64(record_strs[12]);
+  record->is_cache_hit = static_cast<char>(ParseUint64(record_strs[13]));
+  record->referenced_key_exist_in_block =
+      static_cast<char>(ParseUint64(record_strs[14]));
+  record->num_keys_in_block = ParseUint64(record_strs[15]);
+  uint64_t table_id = ParseUint64(record_strs[16]);
+  if (table_id > 0) {
+    // Decrement since valid table id in the trace file equals traced table id
+    // + 1.
+    table_id -= 1;
+  }
+  uint64_t get_sequence_number = ParseUint64(record_strs[17]);
+  if (get_sequence_number > 0) {
+    record->get_from_user_specified_snapshot = true;
+    // Decrement since valid seq number in the trace file equals traced seq
+    // number + 1.
+    get_sequence_number -= 1;
+  }
+  uint64_t block_key_size = ParseUint64(record_strs[18]);
+  uint64_t get_key_size = ParseUint64(record_strs[19]);
+  uint64_t block_offset = ParseUint64(record_strs[20]);
+
+  std::string tmp_block_key;
+  PutVarint64(&tmp_block_key, block_key);
+  PutVarint64(&tmp_block_key, block_offset);
+  // Append 1 until the size is the same as traced block key size.
+  while (record->block_key.size() < block_key_size - tmp_block_key.size()) {
+    record->block_key += "1";
+  }
+  record->block_key += tmp_block_key;
+
+  if (get_key_id != 0) {
+    std::string tmp_get_key;
+    PutFixed64(&tmp_get_key, get_key_id);
+    PutFixed64(&tmp_get_key, get_sequence_number << 8);
+    PutFixed32(&record->referenced_key, static_cast<uint32_t>(table_id));
+    // Append 1 until the size is the same as traced key size.
+    while (record->referenced_key.size() < get_key_size - tmp_get_key.size()) {
+      record->referenced_key += "1";
+    }
+    record->referenced_key += tmp_get_key;
+  }
+  return Status::OK();
+}
+
+BlockCacheTracer::BlockCacheTracer() { writer_.store(nullptr); }
+
+BlockCacheTracer::~BlockCacheTracer() { EndTrace(); }
+
+Status BlockCacheTracer::StartTrace(
+    const BlockCacheTraceOptions& trace_options,
+    std::unique_ptr<BlockCacheTraceWriter>&& trace_writer) {
+  InstrumentedMutexLock lock_guard(&trace_writer_mutex_);
+  if (writer_.load()) {
+    return Status::Busy();
+  }
+  get_id_counter_.store(1);
+  trace_options_ = trace_options;
+  writer_.store(trace_writer.release());
+  return writer_.load()->WriteHeader();
+}
+
+void BlockCacheTracer::EndTrace() {
+  InstrumentedMutexLock lock_guard(&trace_writer_mutex_);
+  if (!writer_.load()) {
+    return;
+  }
+  delete writer_.load();
+  writer_.store(nullptr);
+}
+
+Status BlockCacheTracer::WriteBlockAccess(const BlockCacheTraceRecord& record,
+                                          const Slice& block_key,
+                                          const Slice& cf_name,
+                                          const Slice& referenced_key) {
+  if (!writer_.load() || !ShouldTrace(block_key, trace_options_)) {
+    return Status::OK();
+  }
+  InstrumentedMutexLock lock_guard(&trace_writer_mutex_);
+  if (!writer_.load()) {
+    return Status::OK();
+  }
+  return writer_.load()->WriteBlockAccess(record, block_key, cf_name,
+                                          referenced_key);
+}
+
+uint64_t BlockCacheTracer::NextGetId() {
+  if (!writer_.load(std::memory_order_relaxed)) {
+    return BlockCacheTraceHelper::kReservedGetId;
+  }
+  uint64_t prev_value = get_id_counter_.fetch_add(1);
+  if (prev_value == BlockCacheTraceHelper::kReservedGetId) {
+    // fetch and add again.
+    return get_id_counter_.fetch_add(1);
+  }
+  return prev_value;
+}
+
+std::unique_ptr<BlockCacheTraceWriter> NewBlockCacheTraceWriter(
+    SystemClock* clock, const BlockCacheTraceWriterOptions& trace_options,
+    std::unique_ptr<TraceWriter>&& trace_writer) {
+  return std::unique_ptr<BlockCacheTraceWriter>(new BlockCacheTraceWriterImpl(
+      clock, trace_options, std::move(trace_writer)));
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/block_cache_tracer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/block_cache_tracer.h
new file mode 100644
index 0000000000..301c7d95ee
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/block_cache_tracer.h
@@ -0,0 +1,239 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <fstream>
+
+#include "monitoring/instrumented_mutex.h"
+#include "rocksdb/block_cache_trace_writer.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table_reader_caller.h"
+#include "rocksdb/trace_reader_writer.h"
+#include "trace_replay/trace_replay.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Env;
+class SystemClock;
+
+extern const uint64_t kMicrosInSecond;
+extern const uint64_t kSecondInMinute;
+extern const uint64_t kSecondInHour;
+
+struct BlockCacheTraceRecord;
+
+class BlockCacheTraceHelper {
+ public:
+  static bool IsGetOrMultiGetOnDataBlock(TraceType block_type,
+                                         TableReaderCaller caller);
+  static bool IsGetOrMultiGet(TableReaderCaller caller);
+  static bool IsUserAccess(TableReaderCaller caller);
+  // Row key is a concatenation of the access's fd_number and the referenced
+  // user key.
+  static std::string ComputeRowKey(const BlockCacheTraceRecord& access);
+  // The first four bytes of the referenced key in a Get request is the table
+  // id.
+  static uint64_t GetTableId(const BlockCacheTraceRecord& access);
+  // The sequence number of a get request is the last part of the referenced
+  // key.
+  static uint64_t GetSequenceNumber(const BlockCacheTraceRecord& access);
+  // Block offset in a file is the last varint64 in the block key.
+  static uint64_t GetBlockOffsetInFile(const BlockCacheTraceRecord& access);
+
+  static const std::string kUnknownColumnFamilyName;
+  static const uint64_t kReservedGetId;
+};
+
+// Lookup context for tracing block cache accesses.
+// We trace block accesses at five places:
+// 1. BlockBasedTable::GetFilter
+// 2. BlockBasedTable::GetUncompressedDict.
+// 3. BlockBasedTable::MaybeReadAndLoadToCache. (To trace access on data, index,
+// and range deletion block.)
+// 4. BlockBasedTable::Get. (To trace the referenced key and whether the
+// referenced key exists in a fetched data block.)
+// 5. BlockBasedTable::MultiGet. (To trace the referenced key and whether the
+// referenced key exists in a fetched data block.)
+// The context is created at:
+// 1. BlockBasedTable::Get. (kUserGet)
+// 2. BlockBasedTable::MultiGet. (kUserMGet)
+// 3. BlockBasedTable::NewIterator. (either kUserIterator, kCompaction, or
+// external SST ingestion calls this function.)
+// 4. BlockBasedTable::Open. (kPrefetch)
+// 5. Index/Filter::CacheDependencies. (kPrefetch)
+// 6. BlockBasedTable::ApproximateOffsetOf. (kCompaction or
+// kUserApproximateSize).
+struct BlockCacheLookupContext {
+  BlockCacheLookupContext(const TableReaderCaller& _caller) : caller(_caller) {}
+  BlockCacheLookupContext(const TableReaderCaller& _caller, uint64_t _get_id,
+                          bool _get_from_user_specified_snapshot)
+      : caller(_caller),
+        get_id(_get_id),
+        get_from_user_specified_snapshot(_get_from_user_specified_snapshot) {}
+  const TableReaderCaller caller;
+  // These are populated when we perform lookup/insert on block cache. The block
+  // cache tracer uses these inforation when logging the block access at
+  // BlockBasedTable::GET and BlockBasedTable::MultiGet.
+  bool is_cache_hit = false;
+  bool no_insert = false;
+  TraceType block_type = TraceType::kTraceMax;
+  uint64_t block_size = 0;
+  std::string block_key;
+  uint64_t num_keys_in_block = 0;
+  // The unique id associated with Get and MultiGet. This enables us to track
+  // how many blocks a Get/MultiGet request accesses. We can also measure the
+  // impact of row cache vs block cache.
+  uint64_t get_id = 0;
+  std::string referenced_key;
+  bool get_from_user_specified_snapshot = false;
+
+  void FillLookupContext(bool _is_cache_hit, bool _no_insert,
+                         TraceType _block_type, uint64_t _block_size,
+                         const std::string& _block_key,
+                         uint64_t _num_keys_in_block) {
+    is_cache_hit = _is_cache_hit;
+    no_insert = _no_insert;
+    block_type = _block_type;
+    block_size = _block_size;
+    block_key = _block_key;
+    num_keys_in_block = _num_keys_in_block;
+  }
+};
+
+struct BlockCacheTraceHeader {
+  uint64_t start_time;
+  uint32_t rocksdb_major_version;
+  uint32_t rocksdb_minor_version;
+};
+
+// BlockCacheTraceWriter captures all RocksDB block cache accesses using a
+// user-provided TraceWriter. Every RocksDB operation is written as a single
+// trace. Each trace will have a timestamp and type, followed by the trace
+// payload.
+class BlockCacheTraceWriterImpl : public BlockCacheTraceWriter {
+ public:
+  BlockCacheTraceWriterImpl(SystemClock* clock,
+                            const BlockCacheTraceWriterOptions& trace_options,
+                            std::unique_ptr<TraceWriter>&& trace_writer);
+  ~BlockCacheTraceWriterImpl() = default;
+  // No copy and move.
+  BlockCacheTraceWriterImpl(const BlockCacheTraceWriterImpl&) = delete;
+  BlockCacheTraceWriterImpl& operator=(const BlockCacheTraceWriterImpl&) =
+      delete;
+  BlockCacheTraceWriterImpl(BlockCacheTraceWriterImpl&&) = delete;
+  BlockCacheTraceWriterImpl& operator=(BlockCacheTraceWriterImpl&&) = delete;
+
+  // Pass Slice references to avoid copy.
+  Status WriteBlockAccess(const BlockCacheTraceRecord& record,
+                          const Slice& block_key, const Slice& cf_name,
+                          const Slice& referenced_key) override;
+
+  // Write a trace header at the beginning, typically on initiating a trace,
+  // with some metadata like a magic number and RocksDB version.
+  Status WriteHeader() override;
+
+ private:
+  SystemClock* clock_;
+  BlockCacheTraceWriterOptions trace_options_;
+  std::unique_ptr<TraceWriter> trace_writer_;
+};
+
+// Write a trace record in human readable format, see
+// https://github.com/facebook/rocksdb/wiki/Block-cache-analysis-and-simulation-tools#trace-format
+// for details.
+class BlockCacheHumanReadableTraceWriter {
+ public:
+  ~BlockCacheHumanReadableTraceWriter();
+
+  Status NewWritableFile(const std::string& human_readable_trace_file_path,
+                         ROCKSDB_NAMESPACE::Env* env);
+
+  Status WriteHumanReadableTraceRecord(const BlockCacheTraceRecord& access,
+                                       uint64_t block_id, uint64_t get_key_id);
+
+ private:
+  char trace_record_buffer_[1024 * 1024];
+  std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>
+      human_readable_trace_file_writer_;
+};
+
+// BlockCacheTraceReader helps read the trace file generated by
+// BlockCacheTraceWriter using a user provided TraceReader.
+class BlockCacheTraceReader {
+ public:
+  BlockCacheTraceReader(std::unique_ptr<TraceReader>&& reader);
+  virtual ~BlockCacheTraceReader() = default;
+  // No copy and move.
+  BlockCacheTraceReader(const BlockCacheTraceReader&) = delete;
+  BlockCacheTraceReader& operator=(const BlockCacheTraceReader&) = delete;
+  BlockCacheTraceReader(BlockCacheTraceReader&&) = delete;
+  BlockCacheTraceReader& operator=(BlockCacheTraceReader&&) = delete;
+
+  Status ReadHeader(BlockCacheTraceHeader* header);
+
+  Status ReadAccess(BlockCacheTraceRecord* record);
+
+ private:
+  std::unique_ptr<TraceReader> trace_reader_;
+};
+
+// Read a trace record in human readable format, see
+// https://github.com/facebook/rocksdb/wiki/Block-cache-analysis-and-simulation-tools#trace-format
+// for detailed.
+class BlockCacheHumanReadableTraceReader : public BlockCacheTraceReader {
+ public:
+  BlockCacheHumanReadableTraceReader(const std::string& trace_file_path);
+
+  ~BlockCacheHumanReadableTraceReader();
+
+  Status ReadHeader(BlockCacheTraceHeader* header);
+
+  Status ReadAccess(BlockCacheTraceRecord* record);
+
+ private:
+  std::ifstream human_readable_trace_reader_;
+};
+
+// A block cache tracer. It downsamples the accesses according to
+// trace_options and uses BlockCacheTraceWriter to write the access record to
+// the trace file.
+class BlockCacheTracer {
+ public:
+  BlockCacheTracer();
+  ~BlockCacheTracer();
+  // No copy and move.
+  BlockCacheTracer(const BlockCacheTracer&) = delete;
+  BlockCacheTracer& operator=(const BlockCacheTracer&) = delete;
+  BlockCacheTracer(BlockCacheTracer&&) = delete;
+  BlockCacheTracer& operator=(BlockCacheTracer&&) = delete;
+
+  // Start writing block cache accesses to the trace_writer.
+  Status StartTrace(const BlockCacheTraceOptions& trace_options,
+                    std::unique_ptr<BlockCacheTraceWriter>&& trace_writer);
+
+  // Stop writing block cache accesses to the trace_writer.
+  void EndTrace();
+
+  bool is_tracing_enabled() const {
+    return writer_.load(std::memory_order_relaxed);
+  }
+
+  Status WriteBlockAccess(const BlockCacheTraceRecord& record,
+                          const Slice& block_key, const Slice& cf_name,
+                          const Slice& referenced_key);
+
+  // GetId cycles from 1 to std::numeric_limits<uint64_t>::max().
+  uint64_t NextGetId();
+
+ private:
+  BlockCacheTraceOptions trace_options_;
+  // A mutex protects the writer_.
+  InstrumentedMutex trace_writer_mutex_;
+  std::atomic<BlockCacheTraceWriter*> writer_;
+  std::atomic<uint64_t> get_id_counter_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/io_tracer.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/io_tracer.cc
new file mode 100644
index 0000000000..a860130f85
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/io_tracer.cc
@@ -0,0 +1,303 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "trace_replay/io_tracer.h"
+
+#include <cinttypes>
+#include <cstdio>
+#include <cstdlib>
+
+#include "db/db_impl/db_impl.h"
+#include "db/dbformat.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/trace_reader_writer.h"
+#include "util/coding.h"
+#include "util/hash.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+IOTraceWriter::IOTraceWriter(SystemClock* clock,
+                             const TraceOptions& trace_options,
+                             std::unique_ptr<TraceWriter>&& trace_writer)
+    : clock_(clock),
+      trace_options_(trace_options),
+      trace_writer_(std::move(trace_writer)) {}
+
+Status IOTraceWriter::WriteIOOp(const IOTraceRecord& record,
+                                IODebugContext* dbg) {
+  uint64_t trace_file_size = trace_writer_->GetFileSize();
+  if (trace_file_size > trace_options_.max_trace_file_size) {
+    return Status::OK();
+  }
+  Trace trace;
+  trace.ts = record.access_timestamp;
+  trace.type = record.trace_type;
+  PutFixed64(&trace.payload, record.io_op_data);
+  Slice file_operation(record.file_operation);
+  PutLengthPrefixedSlice(&trace.payload, file_operation);
+  PutFixed64(&trace.payload, record.latency);
+  Slice io_status(record.io_status);
+  PutLengthPrefixedSlice(&trace.payload, io_status);
+  Slice file_name(record.file_name);
+  PutLengthPrefixedSlice(&trace.payload, file_name);
+
+  // Each bit in io_op_data stores which corresponding info from IOTraceOp will
+  // be added in the trace. Foreg, if bit at position 1 is set then
+  // IOTraceOp::kIOLen (length) will be logged in the record (Since
+  // IOTraceOp::kIOLen = 1 in the enum). So find all the set positions in
+  // io_op_data one by one and, update corresponsing info in the trace record,
+  // unset that bit to find other set bits until io_op_data = 0.
+  /* Write remaining options based on io_op_data set by file operation */
+  int64_t io_op_data = static_cast<int64_t>(record.io_op_data);
+  while (io_op_data) {
+    // Find the rightmost set bit.
+    uint32_t set_pos = static_cast<uint32_t>(log2(io_op_data & -io_op_data));
+    switch (set_pos) {
+      case IOTraceOp::kIOFileSize:
+        PutFixed64(&trace.payload, record.file_size);
+        break;
+      case IOTraceOp::kIOLen:
+        PutFixed64(&trace.payload, record.len);
+        break;
+      case IOTraceOp::kIOOffset:
+        PutFixed64(&trace.payload, record.offset);
+        break;
+      default:
+        assert(false);
+    }
+    // unset the rightmost bit.
+    io_op_data &= (io_op_data - 1);
+  }
+
+  int64_t trace_data = 0;
+  if (dbg) {
+    trace_data = static_cast<int64_t>(dbg->trace_data);
+  }
+  PutFixed64(&trace.payload, trace_data);
+  while (trace_data) {
+    // Find the rightmost set bit.
+    uint32_t set_pos = static_cast<uint32_t>(log2(trace_data & -trace_data));
+    switch (set_pos) {
+      case IODebugContext::TraceData::kRequestID: {
+        Slice request_id(dbg->request_id);
+        PutLengthPrefixedSlice(&trace.payload, request_id);
+      } break;
+      default:
+        assert(false);
+    }
+    // unset the rightmost bit.
+    trace_data &= (trace_data - 1);
+  }
+
+  std::string encoded_trace;
+  TracerHelper::EncodeTrace(trace, &encoded_trace);
+  return trace_writer_->Write(encoded_trace);
+}
+
+Status IOTraceWriter::WriteHeader() {
+  Trace trace;
+  trace.ts = clock_->NowMicros();
+  trace.type = TraceType::kTraceBegin;
+  PutLengthPrefixedSlice(&trace.payload, kTraceMagic);
+  PutFixed32(&trace.payload, kMajorVersion);
+  PutFixed32(&trace.payload, kMinorVersion);
+  std::string encoded_trace;
+  TracerHelper::EncodeTrace(trace, &encoded_trace);
+  return trace_writer_->Write(encoded_trace);
+}
+
+IOTraceReader::IOTraceReader(std::unique_ptr<TraceReader>&& reader)
+    : trace_reader_(std::move(reader)) {}
+
+Status IOTraceReader::ReadHeader(IOTraceHeader* header) {
+  assert(header != nullptr);
+  std::string encoded_trace;
+  Status s = trace_reader_->Read(&encoded_trace);
+  if (!s.ok()) {
+    return s;
+  }
+  Trace trace;
+  s = TracerHelper::DecodeTrace(encoded_trace, &trace);
+  if (!s.ok()) {
+    return s;
+  }
+  header->start_time = trace.ts;
+  Slice enc_slice = Slice(trace.payload);
+  Slice magic_number;
+  if (!GetLengthPrefixedSlice(&enc_slice, &magic_number)) {
+    return Status::Corruption(
+        "Corrupted header in the trace file: Failed to read the magic number.");
+  }
+  if (magic_number.ToString() != kTraceMagic) {
+    return Status::Corruption(
+        "Corrupted header in the trace file: Magic number does not match.");
+  }
+  if (!GetFixed32(&enc_slice, &header->rocksdb_major_version)) {
+    return Status::Corruption(
+        "Corrupted header in the trace file: Failed to read rocksdb major "
+        "version number.");
+  }
+  if (!GetFixed32(&enc_slice, &header->rocksdb_minor_version)) {
+    return Status::Corruption(
+        "Corrupted header in the trace file: Failed to read rocksdb minor "
+        "version number.");
+  }
+  // We should have retrieved all information in the header.
+  if (!enc_slice.empty()) {
+    return Status::Corruption(
+        "Corrupted header in the trace file: The length of header is too "
+        "long.");
+  }
+  return Status::OK();
+}
+
+Status IOTraceReader::ReadIOOp(IOTraceRecord* record) {
+  assert(record);
+  std::string encoded_trace;
+  Status s = trace_reader_->Read(&encoded_trace);
+  if (!s.ok()) {
+    return s;
+  }
+  Trace trace;
+  s = TracerHelper::DecodeTrace(encoded_trace, &trace);
+  if (!s.ok()) {
+    return s;
+  }
+  record->access_timestamp = trace.ts;
+  record->trace_type = trace.type;
+  Slice enc_slice = Slice(trace.payload);
+
+  if (!GetFixed64(&enc_slice, &record->io_op_data)) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read trace data.");
+  }
+  Slice file_operation;
+  if (!GetLengthPrefixedSlice(&enc_slice, &file_operation)) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read file operation.");
+  }
+  record->file_operation = file_operation.ToString();
+  if (!GetFixed64(&enc_slice, &record->latency)) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read latency.");
+  }
+  Slice io_status;
+  if (!GetLengthPrefixedSlice(&enc_slice, &io_status)) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read IO status.");
+  }
+  record->io_status = io_status.ToString();
+  Slice file_name;
+  if (!GetLengthPrefixedSlice(&enc_slice, &file_name)) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read file name.");
+  }
+  record->file_name = file_name.ToString();
+
+  // Each bit in io_op_data stores which corresponding info from IOTraceOp will
+  // be added in the trace. Foreg, if bit at position 1 is set then
+  // IOTraceOp::kIOLen (length) will be logged in the record (Since
+  // IOTraceOp::kIOLen = 1 in the enum). So find all the set positions in
+  // io_op_data one by one and, update corresponsing info in the trace record,
+  // unset that bit to find other set bits until io_op_data = 0.
+  /* Read remaining options based on io_op_data set by file operation */
+  // Assuming 63 bits will be used at max.
+  int64_t io_op_data = static_cast<int64_t>(record->io_op_data);
+  while (io_op_data) {
+    // Find the rightmost set bit.
+    uint32_t set_pos = static_cast<uint32_t>(log2(io_op_data & -io_op_data));
+    switch (set_pos) {
+      case IOTraceOp::kIOFileSize:
+        if (!GetFixed64(&enc_slice, &record->file_size)) {
+          return Status::Incomplete(
+              "Incomplete access record: Failed to read file size.");
+        }
+        break;
+      case IOTraceOp::kIOLen:
+        if (!GetFixed64(&enc_slice, &record->len)) {
+          return Status::Incomplete(
+              "Incomplete access record: Failed to read length.");
+        }
+        break;
+      case IOTraceOp::kIOOffset:
+        if (!GetFixed64(&enc_slice, &record->offset)) {
+          return Status::Incomplete(
+              "Incomplete access record: Failed to read offset.");
+        }
+        break;
+      default:
+        assert(false);
+    }
+    // unset the rightmost bit.
+    io_op_data &= (io_op_data - 1);
+  }
+
+  if (!GetFixed64(&enc_slice, &record->trace_data)) {
+    return Status::Incomplete(
+        "Incomplete access record: Failed to read trace op.");
+  }
+  int64_t trace_data = static_cast<int64_t>(record->trace_data);
+  while (trace_data) {
+    // Find the rightmost set bit.
+    uint32_t set_pos = static_cast<uint32_t>(log2(trace_data & -trace_data));
+    switch (set_pos) {
+      case IODebugContext::TraceData::kRequestID: {
+        Slice request_id;
+        if (!GetLengthPrefixedSlice(&enc_slice, &request_id)) {
+          return Status::Incomplete(
+              "Incomplete access record: Failed to request id.");
+        }
+        record->request_id = request_id.ToString();
+      } break;
+      default:
+        assert(false);
+    }
+    // unset the rightmost bit.
+    trace_data &= (trace_data - 1);
+  }
+
+  return Status::OK();
+}
+
+IOTracer::IOTracer() : tracing_enabled(false) { writer_.store(nullptr); }
+
+IOTracer::~IOTracer() { EndIOTrace(); }
+
+Status IOTracer::StartIOTrace(SystemClock* clock,
+                              const TraceOptions& trace_options,
+                              std::unique_ptr<TraceWriter>&& trace_writer) {
+  InstrumentedMutexLock lock_guard(&trace_writer_mutex_);
+  if (writer_.load()) {
+    return Status::Busy();
+  }
+  trace_options_ = trace_options;
+  writer_.store(
+      new IOTraceWriter(clock, trace_options, std::move(trace_writer)));
+  tracing_enabled = true;
+  return writer_.load()->WriteHeader();
+}
+
+void IOTracer::EndIOTrace() {
+  InstrumentedMutexLock lock_guard(&trace_writer_mutex_);
+  if (!writer_.load()) {
+    return;
+  }
+  delete writer_.load();
+  writer_.store(nullptr);
+  tracing_enabled = false;
+}
+
+void IOTracer::WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg) {
+  if (!writer_.load()) {
+    return;
+  }
+  InstrumentedMutexLock lock_guard(&trace_writer_mutex_);
+  if (!writer_.load()) {
+    return;
+  }
+  writer_.load()->WriteIOOp(record, dbg).PermitUncheckedError();
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/io_tracer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/io_tracer.h
new file mode 100644
index 0000000000..3fc7cdba0a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/io_tracer.h
@@ -0,0 +1,185 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <fstream>
+
+#include "monitoring/instrumented_mutex.h"
+#include "port/lang.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/options.h"
+#include "rocksdb/trace_record.h"
+#include "trace_replay/trace_replay.h"
+
+namespace ROCKSDB_NAMESPACE {
+class SystemClock;
+class TraceReader;
+class TraceWriter;
+
+/* In order to log new data in trace record for specified operations, do
+   following:
+   1. Add new data in IOTraceOP (say kIONewData= 3)
+   2. Log it in IOTraceWriter::WriteIOOp, and read that in
+   IOTraceReader::ReadIOOp and
+   IOTraceRecordParser::PrintHumanReadableIOTraceRecord in the switch case.
+   3. In the FileSystemTracer APIs where this data will be logged with, update
+   io_op_data |= (1 << IOTraceOp::kIONewData).
+*/
+enum IOTraceOp : char {
+  // The value of each enum represents the bitwise position for
+  // IOTraceRecord.io_op_data.
+  kIOFileSize = 0,
+  kIOLen = 1,
+  kIOOffset = 2,
+};
+
+struct IOTraceRecord {
+  // Required fields for all accesses.
+  uint64_t access_timestamp = 0;
+  TraceType trace_type = TraceType::kTraceMax;
+  // Each bit in io_op_data stores which corresponding info from IOTraceOp will
+  // be added in the trace. Foreg, if bit at position 1 is set then
+  // IOTraceOp::kIOLen (length) will be logged in the record.
+  uint64_t io_op_data = 0;
+  std::string file_operation;
+  uint64_t latency = 0;
+  std::string io_status;
+  // Stores file name instead of full path.
+  std::string file_name;
+
+  // Fields added to record based on IO operation.
+  uint64_t len = 0;
+  uint64_t offset = 0;
+  uint64_t file_size = 0;
+
+  // Additional information passed in IODebugContext.
+  uint64_t trace_data = 0;
+  std::string request_id;
+
+  IOTraceRecord() {}
+
+  IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type,
+                const uint64_t& _io_op_data, const std::string& _file_operation,
+                const uint64_t& _latency, const std::string& _io_status,
+                const std::string& _file_name, const uint64_t& _file_size = 0)
+      : access_timestamp(_access_timestamp),
+        trace_type(_trace_type),
+        io_op_data(_io_op_data),
+        file_operation(_file_operation),
+        latency(_latency),
+        io_status(_io_status),
+        file_name(_file_name),
+        file_size(_file_size) {}
+
+  IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type,
+                const uint64_t& _io_op_data, const std::string& _file_operation,
+                const uint64_t& _latency, const std::string& _io_status,
+                const std::string& _file_name, const uint64_t& _len,
+                const uint64_t& _offset)
+      : access_timestamp(_access_timestamp),
+        trace_type(_trace_type),
+        io_op_data(_io_op_data),
+        file_operation(_file_operation),
+        latency(_latency),
+        io_status(_io_status),
+        file_name(_file_name),
+        len(_len),
+        offset(_offset) {}
+};
+
+struct IOTraceHeader {
+  uint64_t start_time;
+  uint32_t rocksdb_major_version;
+  uint32_t rocksdb_minor_version;
+};
+
+// IOTraceWriter writes IO operation as a single trace. Each trace will have a
+// timestamp and type, followed by the trace payload.
+class IOTraceWriter {
+ public:
+  IOTraceWriter(SystemClock* clock, const TraceOptions& trace_options,
+                std::unique_ptr<TraceWriter>&& trace_writer);
+  ~IOTraceWriter() = default;
+  // No copy and move.
+  IOTraceWriter(const IOTraceWriter&) = delete;
+  IOTraceWriter& operator=(const IOTraceWriter&) = delete;
+  IOTraceWriter(IOTraceWriter&&) = delete;
+  IOTraceWriter& operator=(IOTraceWriter&&) = delete;
+
+  Status WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg);
+
+  // Write a trace header at the beginning, typically on initiating a trace,
+  // with some metadata like a magic number and RocksDB version.
+  Status WriteHeader();
+
+ private:
+  SystemClock* clock_;
+  TraceOptions trace_options_;
+  std::unique_ptr<TraceWriter> trace_writer_;
+};
+
+// IOTraceReader helps read the trace file generated by IOTraceWriter.
+class IOTraceReader {
+ public:
+  explicit IOTraceReader(std::unique_ptr<TraceReader>&& reader);
+  ~IOTraceReader() = default;
+  // No copy and move.
+  IOTraceReader(const IOTraceReader&) = delete;
+  IOTraceReader& operator=(const IOTraceReader&) = delete;
+  IOTraceReader(IOTraceReader&&) = delete;
+  IOTraceReader& operator=(IOTraceReader&&) = delete;
+
+  Status ReadHeader(IOTraceHeader* header);
+
+  Status ReadIOOp(IOTraceRecord* record);
+
+ private:
+  std::unique_ptr<TraceReader> trace_reader_;
+};
+
+// An IO tracer. It uses IOTraceWriter to write the access record to the
+// trace file.
+class IOTracer {
+ public:
+  IOTracer();
+  ~IOTracer();
+  // No copy and move.
+  IOTracer(const IOTracer&) = delete;
+  IOTracer& operator=(const IOTracer&) = delete;
+  IOTracer(IOTracer&&) = delete;
+  IOTracer& operator=(IOTracer&&) = delete;
+
+  // no_sanitize is added for tracing_enabled. writer_ is protected under mutex
+  // so even if user call Start/EndIOTrace and tracing_enabled is not updated in
+  // the meanwhile, WriteIOOp will anyways check the writer_ protected under
+  // mutex and ignore the operation if writer_is null. So its ok if
+  // tracing_enabled shows non updated value.
+
+  // Start writing IO operations to the trace_writer.
+  TSAN_SUPPRESSION Status
+  StartIOTrace(SystemClock* clock, const TraceOptions& trace_options,
+               std::unique_ptr<TraceWriter>&& trace_writer);
+
+  // Stop writing IO operations to the trace_writer.
+  TSAN_SUPPRESSION void EndIOTrace();
+
+  TSAN_SUPPRESSION bool is_tracing_enabled() const { return tracing_enabled; }
+
+  void WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg);
+
+ private:
+  TraceOptions trace_options_;
+  // A mutex protects the writer_.
+  InstrumentedMutex trace_writer_mutex_;
+  std::atomic<IOTraceWriter*> writer_;
+  // bool tracing_enabled is added to avoid costly operation of checking atomic
+  // variable 'writer_' is nullptr or not in is_tracing_enabled().
+  // is_tracing_enabled() is invoked multiple times by FileSystem classes.
+  bool tracing_enabled;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record.cc
new file mode 100644
index 0000000000..21df0275dd
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record.cc
@@ -0,0 +1,206 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/trace_record.h"
+
+#include <utility>
+
+#include "rocksdb/db.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/status.h"
+#include "rocksdb/trace_record_result.h"
+#include "trace_replay/trace_record_handler.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// TraceRecord
+TraceRecord::TraceRecord(uint64_t timestamp) : timestamp_(timestamp) {}
+
+uint64_t TraceRecord::GetTimestamp() const { return timestamp_; }
+
+TraceRecord::Handler* TraceRecord::NewExecutionHandler(
+    DB* db, const std::vector<ColumnFamilyHandle*>& handles) {
+  return new TraceExecutionHandler(db, handles);
+}
+
+// QueryTraceRecord
+QueryTraceRecord::QueryTraceRecord(uint64_t timestamp)
+    : TraceRecord(timestamp) {}
+
+// WriteQueryTraceRecord
+WriteQueryTraceRecord::WriteQueryTraceRecord(PinnableSlice&& write_batch_rep,
+                                             uint64_t timestamp)
+    : QueryTraceRecord(timestamp), rep_(std::move(write_batch_rep)) {}
+
+WriteQueryTraceRecord::WriteQueryTraceRecord(const std::string& write_batch_rep,
+                                             uint64_t timestamp)
+    : QueryTraceRecord(timestamp) {
+  rep_.PinSelf(write_batch_rep);
+}
+
+WriteQueryTraceRecord::~WriteQueryTraceRecord() { rep_.clear(); }
+
+Slice WriteQueryTraceRecord::GetWriteBatchRep() const { return Slice(rep_); }
+
+Status WriteQueryTraceRecord::Accept(
+    Handler* handler, std::unique_ptr<TraceRecordResult>* result) {
+  assert(handler != nullptr);
+  return handler->Handle(*this, result);
+}
+
+// GetQueryTraceRecord
+GetQueryTraceRecord::GetQueryTraceRecord(uint32_t column_family_id,
+                                         PinnableSlice&& key,
+                                         uint64_t timestamp)
+    : QueryTraceRecord(timestamp),
+      cf_id_(column_family_id),
+      key_(std::move(key)) {}
+
+GetQueryTraceRecord::GetQueryTraceRecord(uint32_t column_family_id,
+                                         const std::string& key,
+                                         uint64_t timestamp)
+    : QueryTraceRecord(timestamp), cf_id_(column_family_id) {
+  key_.PinSelf(key);
+}
+
+GetQueryTraceRecord::~GetQueryTraceRecord() { key_.clear(); }
+
+uint32_t GetQueryTraceRecord::GetColumnFamilyID() const { return cf_id_; }
+
+Slice GetQueryTraceRecord::GetKey() const { return Slice(key_); }
+
+Status GetQueryTraceRecord::Accept(Handler* handler,
+                                   std::unique_ptr<TraceRecordResult>* result) {
+  assert(handler != nullptr);
+  return handler->Handle(*this, result);
+}
+
+// IteratorQueryTraceRecord
+IteratorQueryTraceRecord::IteratorQueryTraceRecord(uint64_t timestamp)
+    : QueryTraceRecord(timestamp) {}
+
+IteratorQueryTraceRecord::IteratorQueryTraceRecord(PinnableSlice&& lower_bound,
+                                                   PinnableSlice&& upper_bound,
+                                                   uint64_t timestamp)
+    : QueryTraceRecord(timestamp),
+      lower_(std::move(lower_bound)),
+      upper_(std::move(upper_bound)) {}
+
+IteratorQueryTraceRecord::IteratorQueryTraceRecord(
+    const std::string& lower_bound, const std::string& upper_bound,
+    uint64_t timestamp)
+    : QueryTraceRecord(timestamp) {
+  lower_.PinSelf(lower_bound);
+  upper_.PinSelf(upper_bound);
+}
+
+IteratorQueryTraceRecord::~IteratorQueryTraceRecord() {}
+
+Slice IteratorQueryTraceRecord::GetLowerBound() const { return Slice(lower_); }
+
+Slice IteratorQueryTraceRecord::GetUpperBound() const { return Slice(upper_); }
+
+// IteratorSeekQueryTraceRecord
+IteratorSeekQueryTraceRecord::IteratorSeekQueryTraceRecord(
+    SeekType seek_type, uint32_t column_family_id, PinnableSlice&& key,
+    uint64_t timestamp)
+    : IteratorQueryTraceRecord(timestamp),
+      type_(seek_type),
+      cf_id_(column_family_id),
+      key_(std::move(key)) {}
+
+IteratorSeekQueryTraceRecord::IteratorSeekQueryTraceRecord(
+    SeekType seek_type, uint32_t column_family_id, const std::string& key,
+    uint64_t timestamp)
+    : IteratorQueryTraceRecord(timestamp),
+      type_(seek_type),
+      cf_id_(column_family_id) {
+  key_.PinSelf(key);
+}
+
+IteratorSeekQueryTraceRecord::IteratorSeekQueryTraceRecord(
+    SeekType seek_type, uint32_t column_family_id, PinnableSlice&& key,
+    PinnableSlice&& lower_bound, PinnableSlice&& upper_bound,
+    uint64_t timestamp)
+    : IteratorQueryTraceRecord(std::move(lower_bound), std::move(upper_bound),
+                               timestamp),
+      type_(seek_type),
+      cf_id_(column_family_id),
+      key_(std::move(key)) {}
+
+IteratorSeekQueryTraceRecord::IteratorSeekQueryTraceRecord(
+    SeekType seek_type, uint32_t column_family_id, const std::string& key,
+    const std::string& lower_bound, const std::string& upper_bound,
+    uint64_t timestamp)
+    : IteratorQueryTraceRecord(lower_bound, upper_bound, timestamp),
+      type_(seek_type),
+      cf_id_(column_family_id) {
+  key_.PinSelf(key);
+}
+
+IteratorSeekQueryTraceRecord::~IteratorSeekQueryTraceRecord() { key_.clear(); }
+
+TraceType IteratorSeekQueryTraceRecord::GetTraceType() const {
+  return static_cast<TraceType>(type_);
+}
+
+IteratorSeekQueryTraceRecord::SeekType
+IteratorSeekQueryTraceRecord::GetSeekType() const {
+  return type_;
+}
+
+uint32_t IteratorSeekQueryTraceRecord::GetColumnFamilyID() const {
+  return cf_id_;
+}
+
+Slice IteratorSeekQueryTraceRecord::GetKey() const { return Slice(key_); }
+
+Status IteratorSeekQueryTraceRecord::Accept(
+    Handler* handler, std::unique_ptr<TraceRecordResult>* result) {
+  assert(handler != nullptr);
+  return handler->Handle(*this, result);
+}
+
+// MultiGetQueryTraceRecord
+MultiGetQueryTraceRecord::MultiGetQueryTraceRecord(
+    std::vector<uint32_t> column_family_ids, std::vector<PinnableSlice>&& keys,
+    uint64_t timestamp)
+    : QueryTraceRecord(timestamp),
+      cf_ids_(column_family_ids),
+      keys_(std::move(keys)) {}
+
+MultiGetQueryTraceRecord::MultiGetQueryTraceRecord(
+    std::vector<uint32_t> column_family_ids,
+    const std::vector<std::string>& keys, uint64_t timestamp)
+    : QueryTraceRecord(timestamp), cf_ids_(column_family_ids) {
+  keys_.reserve(keys.size());
+  for (const std::string& key : keys) {
+    PinnableSlice ps;
+    ps.PinSelf(key);
+    keys_.push_back(std::move(ps));
+  }
+}
+
+MultiGetQueryTraceRecord::~MultiGetQueryTraceRecord() {
+  cf_ids_.clear();
+  keys_.clear();
+}
+
+std::vector<uint32_t> MultiGetQueryTraceRecord::GetColumnFamilyIDs() const {
+  return cf_ids_;
+}
+
+std::vector<Slice> MultiGetQueryTraceRecord::GetKeys() const {
+  return std::vector<Slice>(keys_.begin(), keys_.end());
+}
+
+Status MultiGetQueryTraceRecord::Accept(
+    Handler* handler, std::unique_ptr<TraceRecordResult>* result) {
+  assert(handler != nullptr);
+  return handler->Handle(*this, result);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record_handler.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record_handler.cc
new file mode 100644
index 0000000000..ca179e8700
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record_handler.cc
@@ -0,0 +1,190 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "trace_replay/trace_record_handler.h"
+
+#include "rocksdb/iterator.h"
+#include "rocksdb/trace_record_result.h"
+#include "rocksdb/write_batch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// TraceExecutionHandler
+TraceExecutionHandler::TraceExecutionHandler(
+    DB* db, const std::vector<ColumnFamilyHandle*>& handles)
+    : TraceRecord::Handler(),
+      db_(db),
+      write_opts_(WriteOptions()),
+      read_opts_(ReadOptions()) {
+  assert(db != nullptr);
+  assert(!handles.empty());
+  cf_map_.reserve(handles.size());
+  for (ColumnFamilyHandle* handle : handles) {
+    assert(handle != nullptr);
+    cf_map_.insert({handle->GetID(), handle});
+  }
+  clock_ = db_->GetEnv()->GetSystemClock().get();
+}
+
+TraceExecutionHandler::~TraceExecutionHandler() { cf_map_.clear(); }
+
+Status TraceExecutionHandler::Handle(
+    const WriteQueryTraceRecord& record,
+    std::unique_ptr<TraceRecordResult>* result) {
+  if (result != nullptr) {
+    result->reset(nullptr);
+  }
+  uint64_t start = clock_->NowMicros();
+
+  WriteBatch batch(record.GetWriteBatchRep().ToString());
+  Status s = db_->Write(write_opts_, &batch);
+
+  uint64_t end = clock_->NowMicros();
+
+  if (s.ok() && result != nullptr) {
+    result->reset(new StatusOnlyTraceExecutionResult(s, start, end,
+                                                     record.GetTraceType()));
+  }
+
+  return s;
+}
+
+Status TraceExecutionHandler::Handle(
+    const GetQueryTraceRecord& record,
+    std::unique_ptr<TraceRecordResult>* result) {
+  if (result != nullptr) {
+    result->reset(nullptr);
+  }
+  auto it = cf_map_.find(record.GetColumnFamilyID());
+  if (it == cf_map_.end()) {
+    return Status::Corruption("Invalid Column Family ID.");
+  }
+
+  uint64_t start = clock_->NowMicros();
+
+  std::string value;
+  Status s = db_->Get(read_opts_, it->second, record.GetKey(), &value);
+
+  uint64_t end = clock_->NowMicros();
+
+  // Treat not found as ok, return other errors.
+  if (!s.ok() && !s.IsNotFound()) {
+    return s;
+  }
+
+  if (result != nullptr) {
+    // Report the actual opetation status in TraceExecutionResult
+    result->reset(new SingleValueTraceExecutionResult(
+        std::move(s), std::move(value), start, end, record.GetTraceType()));
+  }
+  return Status::OK();
+}
+
+Status TraceExecutionHandler::Handle(
+    const IteratorSeekQueryTraceRecord& record,
+    std::unique_ptr<TraceRecordResult>* result) {
+  if (result != nullptr) {
+    result->reset(nullptr);
+  }
+  auto it = cf_map_.find(record.GetColumnFamilyID());
+  if (it == cf_map_.end()) {
+    return Status::Corruption("Invalid Column Family ID.");
+  }
+
+  ReadOptions r_opts = read_opts_;
+  Slice lower = record.GetLowerBound();
+  if (!lower.empty()) {
+    r_opts.iterate_lower_bound = &lower;
+  }
+  Slice upper = record.GetUpperBound();
+  if (!upper.empty()) {
+    r_opts.iterate_upper_bound = &upper;
+  }
+  Iterator* single_iter = db_->NewIterator(r_opts, it->second);
+
+  uint64_t start = clock_->NowMicros();
+
+  switch (record.GetSeekType()) {
+    case IteratorSeekQueryTraceRecord::kSeekForPrev: {
+      single_iter->SeekForPrev(record.GetKey());
+      break;
+    }
+    default: {
+      single_iter->Seek(record.GetKey());
+      break;
+    }
+  }
+
+  uint64_t end = clock_->NowMicros();
+
+  Status s = single_iter->status();
+  if (s.ok() && result != nullptr) {
+    if (single_iter->Valid()) {
+      PinnableSlice ps_key;
+      ps_key.PinSelf(single_iter->key());
+      PinnableSlice ps_value;
+      ps_value.PinSelf(single_iter->value());
+      result->reset(new IteratorTraceExecutionResult(
+          true, s, std::move(ps_key), std::move(ps_value), start, end,
+          record.GetTraceType()));
+    } else {
+      result->reset(new IteratorTraceExecutionResult(
+          false, s, "", "", start, end, record.GetTraceType()));
+    }
+  }
+  delete single_iter;
+
+  return s;
+}
+
+Status TraceExecutionHandler::Handle(
+    const MultiGetQueryTraceRecord& record,
+    std::unique_ptr<TraceRecordResult>* result) {
+  if (result != nullptr) {
+    result->reset(nullptr);
+  }
+  std::vector<ColumnFamilyHandle*> handles;
+  handles.reserve(record.GetColumnFamilyIDs().size());
+  for (uint32_t cf_id : record.GetColumnFamilyIDs()) {
+    auto it = cf_map_.find(cf_id);
+    if (it == cf_map_.end()) {
+      return Status::Corruption("Invalid Column Family ID.");
+    }
+    handles.push_back(it->second);
+  }
+
+  std::vector<Slice> keys = record.GetKeys();
+
+  if (handles.empty() || keys.empty()) {
+    return Status::InvalidArgument("Empty MultiGet cf_ids or keys.");
+  }
+  if (handles.size() != keys.size()) {
+    return Status::InvalidArgument("MultiGet cf_ids and keys size mismatch.");
+  }
+
+  uint64_t start = clock_->NowMicros();
+
+  std::vector<std::string> values;
+  std::vector<Status> ss = db_->MultiGet(read_opts_, handles, keys, &values);
+
+  uint64_t end = clock_->NowMicros();
+
+  // Treat not found as ok, return other errors.
+  for (const Status& s : ss) {
+    if (!s.ok() && !s.IsNotFound()) {
+      return s;
+    }
+  }
+
+  if (result != nullptr) {
+    // Report the actual opetation status in TraceExecutionResult
+    result->reset(new MultiValuesTraceExecutionResult(
+        std::move(ss), std::move(values), start, end, record.GetTraceType()));
+  }
+
+  return Status::OK();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record_handler.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record_handler.h
new file mode 100644
index 0000000000..88cf317dda
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record_handler.h
@@ -0,0 +1,46 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "rocksdb/db.h"
+#include "rocksdb/options.h"
+#include "rocksdb/status.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/trace_record.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Handler to execute TraceRecord.
+class TraceExecutionHandler : public TraceRecord::Handler {
+ public:
+  TraceExecutionHandler(DB* db,
+                        const std::vector<ColumnFamilyHandle*>& handles);
+  virtual ~TraceExecutionHandler() override;
+
+  virtual Status Handle(const WriteQueryTraceRecord& record,
+                        std::unique_ptr<TraceRecordResult>* result) override;
+  virtual Status Handle(const GetQueryTraceRecord& record,
+                        std::unique_ptr<TraceRecordResult>* result) override;
+  virtual Status Handle(const IteratorSeekQueryTraceRecord& record,
+                        std::unique_ptr<TraceRecordResult>* result) override;
+  virtual Status Handle(const MultiGetQueryTraceRecord& record,
+                        std::unique_ptr<TraceRecordResult>* result) override;
+
+ private:
+  DB* db_;
+  std::unordered_map<uint32_t, ColumnFamilyHandle*> cf_map_;
+  WriteOptions write_opts_;
+  ReadOptions read_opts_;
+  SystemClock* clock_;
+};
+
+// To do: Handler for trace_analyzer.
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record_result.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record_result.cc
new file mode 100644
index 0000000000..9c0cb43ad9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_record_result.cc
@@ -0,0 +1,146 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/trace_record_result.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// TraceRecordResult
+TraceRecordResult::TraceRecordResult(TraceType trace_type)
+    : trace_type_(trace_type) {}
+
+TraceType TraceRecordResult::GetTraceType() const { return trace_type_; }
+
+// TraceExecutionResult
+TraceExecutionResult::TraceExecutionResult(uint64_t start_timestamp,
+                                           uint64_t end_timestamp,
+                                           TraceType trace_type)
+    : TraceRecordResult(trace_type),
+      ts_start_(start_timestamp),
+      ts_end_(end_timestamp) {
+  assert(ts_start_ <= ts_end_);
+}
+
+uint64_t TraceExecutionResult::GetStartTimestamp() const { return ts_start_; }
+
+uint64_t TraceExecutionResult::GetEndTimestamp() const { return ts_end_; }
+
+// StatusOnlyTraceExecutionResult
+StatusOnlyTraceExecutionResult::StatusOnlyTraceExecutionResult(
+    Status status, uint64_t start_timestamp, uint64_t end_timestamp,
+    TraceType trace_type)
+    : TraceExecutionResult(start_timestamp, end_timestamp, trace_type),
+      status_(std::move(status)) {}
+
+const Status& StatusOnlyTraceExecutionResult::GetStatus() const {
+  return status_;
+}
+
+Status StatusOnlyTraceExecutionResult::Accept(Handler* handler) {
+  assert(handler != nullptr);
+  return handler->Handle(*this);
+}
+
+// SingleValueTraceExecutionResult
+SingleValueTraceExecutionResult::SingleValueTraceExecutionResult(
+    Status status, const std::string& value, uint64_t start_timestamp,
+    uint64_t end_timestamp, TraceType trace_type)
+    : TraceExecutionResult(start_timestamp, end_timestamp, trace_type),
+      status_(std::move(status)),
+      value_(value) {}
+
+SingleValueTraceExecutionResult::SingleValueTraceExecutionResult(
+    Status status, std::string&& value, uint64_t start_timestamp,
+    uint64_t end_timestamp, TraceType trace_type)
+    : TraceExecutionResult(start_timestamp, end_timestamp, trace_type),
+      status_(std::move(status)),
+      value_(std::move(value)) {}
+
+SingleValueTraceExecutionResult::~SingleValueTraceExecutionResult() {
+  value_.clear();
+}
+
+const Status& SingleValueTraceExecutionResult::GetStatus() const {
+  return status_;
+}
+
+const std::string& SingleValueTraceExecutionResult::GetValue() const {
+  return value_;
+}
+
+Status SingleValueTraceExecutionResult::Accept(Handler* handler) {
+  assert(handler != nullptr);
+  return handler->Handle(*this);
+}
+
+// MultiValuesTraceExecutionResult
+MultiValuesTraceExecutionResult::MultiValuesTraceExecutionResult(
+    std::vector<Status> multi_status, std::vector<std::string> values,
+    uint64_t start_timestamp, uint64_t end_timestamp, TraceType trace_type)
+    : TraceExecutionResult(start_timestamp, end_timestamp, trace_type),
+      multi_status_(std::move(multi_status)),
+      values_(std::move(values)) {}
+
+MultiValuesTraceExecutionResult::~MultiValuesTraceExecutionResult() {
+  multi_status_.clear();
+  values_.clear();
+}
+
+const std::vector<Status>& MultiValuesTraceExecutionResult::GetMultiStatus()
+    const {
+  return multi_status_;
+}
+
+const std::vector<std::string>& MultiValuesTraceExecutionResult::GetValues()
+    const {
+  return values_;
+}
+
+Status MultiValuesTraceExecutionResult::Accept(Handler* handler) {
+  assert(handler != nullptr);
+  return handler->Handle(*this);
+}
+
+// IteratorTraceExecutionResult
+IteratorTraceExecutionResult::IteratorTraceExecutionResult(
+    bool valid, Status status, PinnableSlice&& key, PinnableSlice&& value,
+    uint64_t start_timestamp, uint64_t end_timestamp, TraceType trace_type)
+    : TraceExecutionResult(start_timestamp, end_timestamp, trace_type),
+      valid_(valid),
+      status_(std::move(status)),
+      key_(std::move(key)),
+      value_(std::move(value)) {}
+
+IteratorTraceExecutionResult::IteratorTraceExecutionResult(
+    bool valid, Status status, const std::string& key, const std::string& value,
+    uint64_t start_timestamp, uint64_t end_timestamp, TraceType trace_type)
+    : TraceExecutionResult(start_timestamp, end_timestamp, trace_type),
+      valid_(valid),
+      status_(std::move(status)) {
+  key_.PinSelf(key);
+  value_.PinSelf(value);
+}
+
+IteratorTraceExecutionResult::~IteratorTraceExecutionResult() {
+  key_.clear();
+  value_.clear();
+}
+
+bool IteratorTraceExecutionResult::GetValid() const { return valid_; }
+
+const Status& IteratorTraceExecutionResult::GetStatus() const {
+  return status_;
+}
+
+Slice IteratorTraceExecutionResult::GetKey() const { return Slice(key_); }
+
+Slice IteratorTraceExecutionResult::GetValue() const { return Slice(value_); }
+
+Status IteratorTraceExecutionResult::Accept(Handler* handler) {
+  assert(handler != nullptr);
+  return handler->Handle(*this);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_replay.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_replay.cc
new file mode 100644
index 0000000000..c681e374c4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_replay.cc
@@ -0,0 +1,622 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "trace_replay/trace_replay.h"
+
+#include <chrono>
+#include <sstream>
+#include <thread>
+
+#include "db/db_impl/db_impl.h"
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/trace_reader_writer.h"
+#include "rocksdb/write_batch.h"
+#include "util/coding.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const std::string kTraceMagic = "feedcafedeadbeef";
+
+namespace {
+void DecodeCFAndKey(std::string& buffer, uint32_t* cf_id, Slice* key) {
+  Slice buf(buffer);
+  GetFixed32(&buf, cf_id);
+  GetLengthPrefixedSlice(&buf, key);
+}
+}  // namespace
+
+Status TracerHelper::ParseVersionStr(std::string& v_string, int* v_num) {
+  if (v_string.find_first_of('.') == std::string::npos ||
+      v_string.find_first_of('.') != v_string.find_last_of('.')) {
+    return Status::Corruption(
+        "Corrupted trace file. Incorrect version format.");
+  }
+  int tmp_num = 0;
+  for (int i = 0; i < static_cast<int>(v_string.size()); i++) {
+    if (v_string[i] == '.') {
+      continue;
+    } else if (isdigit(v_string[i])) {
+      tmp_num = tmp_num * 10 + (v_string[i] - '0');
+    } else {
+      return Status::Corruption(
+          "Corrupted trace file. Incorrect version format");
+    }
+  }
+  *v_num = tmp_num;
+  return Status::OK();
+}
+
+Status TracerHelper::ParseTraceHeader(const Trace& header, int* trace_version,
+                                      int* db_version) {
+  std::vector<std::string> s_vec;
+  int begin = 0, end;
+  for (int i = 0; i < 3; i++) {
+    assert(header.payload.find("\t", begin) != std::string::npos);
+    end = static_cast<int>(header.payload.find("\t", begin));
+    s_vec.push_back(header.payload.substr(begin, end - begin));
+    begin = end + 1;
+  }
+
+  std::string t_v_str, db_v_str;
+  assert(s_vec.size() == 3);
+  assert(s_vec[1].find("Trace Version: ") != std::string::npos);
+  t_v_str = s_vec[1].substr(15);
+  assert(s_vec[2].find("RocksDB Version: ") != std::string::npos);
+  db_v_str = s_vec[2].substr(17);
+
+  Status s;
+  s = ParseVersionStr(t_v_str, trace_version);
+  if (s != Status::OK()) {
+    return s;
+  }
+  s = ParseVersionStr(db_v_str, db_version);
+  return s;
+}
+
+void TracerHelper::EncodeTrace(const Trace& trace, std::string* encoded_trace) {
+  assert(encoded_trace);
+  PutFixed64(encoded_trace, trace.ts);
+  encoded_trace->push_back(trace.type);
+  PutFixed32(encoded_trace, static_cast<uint32_t>(trace.payload.size()));
+  encoded_trace->append(trace.payload);
+}
+
+Status TracerHelper::DecodeTrace(const std::string& encoded_trace,
+                                 Trace* trace) {
+  assert(trace != nullptr);
+  Slice enc_slice = Slice(encoded_trace);
+  if (!GetFixed64(&enc_slice, &trace->ts)) {
+    return Status::Incomplete("Decode trace string failed");
+  }
+  if (enc_slice.size() < kTraceTypeSize + kTracePayloadLengthSize) {
+    return Status::Incomplete("Decode trace string failed");
+  }
+  trace->type = static_cast<TraceType>(enc_slice[0]);
+  enc_slice.remove_prefix(kTraceTypeSize + kTracePayloadLengthSize);
+  trace->payload = enc_slice.ToString();
+  return Status::OK();
+}
+
+Status TracerHelper::DecodeHeader(const std::string& encoded_trace,
+                                  Trace* header) {
+  Status s = TracerHelper::DecodeTrace(encoded_trace, header);
+
+  if (header->type != kTraceBegin) {
+    return Status::Corruption("Corrupted trace file. Incorrect header.");
+  }
+  if (header->payload.substr(0, kTraceMagic.length()) != kTraceMagic) {
+    return Status::Corruption("Corrupted trace file. Incorrect magic.");
+  }
+
+  return s;
+}
+
+bool TracerHelper::SetPayloadMap(uint64_t& payload_map,
+                                 const TracePayloadType payload_type) {
+  uint64_t old_state = payload_map;
+  uint64_t tmp = 1;
+  payload_map |= (tmp << payload_type);
+  return old_state != payload_map;
+}
+
+Status TracerHelper::DecodeTraceRecord(Trace* trace, int trace_file_version,
+                                       std::unique_ptr<TraceRecord>* record) {
+  assert(trace != nullptr);
+
+  if (record != nullptr) {
+    record->reset(nullptr);
+  }
+
+  switch (trace->type) {
+    // Write
+    case kTraceWrite: {
+      PinnableSlice rep;
+      if (trace_file_version < 2) {
+        rep.PinSelf(trace->payload);
+      } else {
+        Slice buf(trace->payload);
+        GetFixed64(&buf, &trace->payload_map);
+        int64_t payload_map = static_cast<int64_t>(trace->payload_map);
+        Slice write_batch_data;
+        while (payload_map) {
+          // Find the rightmost set bit.
+          uint32_t set_pos =
+              static_cast<uint32_t>(log2(payload_map & -payload_map));
+          switch (set_pos) {
+            case TracePayloadType::kWriteBatchData: {
+              GetLengthPrefixedSlice(&buf, &write_batch_data);
+              break;
+            }
+            default: {
+              assert(false);
+            }
+          }
+          // unset the rightmost bit.
+          payload_map &= (payload_map - 1);
+        }
+        rep.PinSelf(write_batch_data);
+      }
+
+      if (record != nullptr) {
+        record->reset(new WriteQueryTraceRecord(std::move(rep), trace->ts));
+      }
+
+      return Status::OK();
+    }
+    // Get
+    case kTraceGet: {
+      uint32_t cf_id = 0;
+      Slice get_key;
+
+      if (trace_file_version < 2) {
+        DecodeCFAndKey(trace->payload, &cf_id, &get_key);
+      } else {
+        Slice buf(trace->payload);
+        GetFixed64(&buf, &trace->payload_map);
+        int64_t payload_map = static_cast<int64_t>(trace->payload_map);
+        while (payload_map) {
+          // Find the rightmost set bit.
+          uint32_t set_pos =
+              static_cast<uint32_t>(log2(payload_map & -payload_map));
+          switch (set_pos) {
+            case TracePayloadType::kGetCFID: {
+              GetFixed32(&buf, &cf_id);
+              break;
+            }
+            case TracePayloadType::kGetKey: {
+              GetLengthPrefixedSlice(&buf, &get_key);
+              break;
+            }
+            default: {
+              assert(false);
+            }
+          }
+          // unset the rightmost bit.
+          payload_map &= (payload_map - 1);
+        }
+      }
+
+      if (record != nullptr) {
+        PinnableSlice ps;
+        ps.PinSelf(get_key);
+        record->reset(new GetQueryTraceRecord(cf_id, std::move(ps), trace->ts));
+      }
+
+      return Status::OK();
+    }
+    // Iterator Seek and SeekForPrev
+    case kTraceIteratorSeek:
+    case kTraceIteratorSeekForPrev: {
+      uint32_t cf_id = 0;
+      Slice iter_key;
+      Slice lower_bound;
+      Slice upper_bound;
+
+      if (trace_file_version < 2) {
+        DecodeCFAndKey(trace->payload, &cf_id, &iter_key);
+      } else {
+        Slice buf(trace->payload);
+        GetFixed64(&buf, &trace->payload_map);
+        int64_t payload_map = static_cast<int64_t>(trace->payload_map);
+        while (payload_map) {
+          // Find the rightmost set bit.
+          uint32_t set_pos =
+              static_cast<uint32_t>(log2(payload_map & -payload_map));
+          switch (set_pos) {
+            case TracePayloadType::kIterCFID: {
+              GetFixed32(&buf, &cf_id);
+              break;
+            }
+            case TracePayloadType::kIterKey: {
+              GetLengthPrefixedSlice(&buf, &iter_key);
+              break;
+            }
+            case TracePayloadType::kIterLowerBound: {
+              GetLengthPrefixedSlice(&buf, &lower_bound);
+              break;
+            }
+            case TracePayloadType::kIterUpperBound: {
+              GetLengthPrefixedSlice(&buf, &upper_bound);
+              break;
+            }
+            default: {
+              assert(false);
+            }
+          }
+          // unset the rightmost bit.
+          payload_map &= (payload_map - 1);
+        }
+      }
+
+      if (record != nullptr) {
+        PinnableSlice ps_key;
+        ps_key.PinSelf(iter_key);
+        PinnableSlice ps_lower;
+        ps_lower.PinSelf(lower_bound);
+        PinnableSlice ps_upper;
+        ps_upper.PinSelf(upper_bound);
+        record->reset(new IteratorSeekQueryTraceRecord(
+            static_cast<IteratorSeekQueryTraceRecord::SeekType>(trace->type),
+            cf_id, std::move(ps_key), std::move(ps_lower), std::move(ps_upper),
+            trace->ts));
+      }
+
+      return Status::OK();
+    }
+    // MultiGet
+    case kTraceMultiGet: {
+      if (trace_file_version < 2) {
+        return Status::Corruption("MultiGet is not supported.");
+      }
+
+      uint32_t multiget_size = 0;
+      std::vector<uint32_t> cf_ids;
+      std::vector<PinnableSlice> multiget_keys;
+
+      Slice cfids_payload;
+      Slice keys_payload;
+      Slice buf(trace->payload);
+      GetFixed64(&buf, &trace->payload_map);
+      int64_t payload_map = static_cast<int64_t>(trace->payload_map);
+      while (payload_map) {
+        // Find the rightmost set bit.
+        uint32_t set_pos =
+            static_cast<uint32_t>(log2(payload_map & -payload_map));
+        switch (set_pos) {
+          case TracePayloadType::kMultiGetSize: {
+            GetFixed32(&buf, &multiget_size);
+            break;
+          }
+          case TracePayloadType::kMultiGetCFIDs: {
+            GetLengthPrefixedSlice(&buf, &cfids_payload);
+            break;
+          }
+          case TracePayloadType::kMultiGetKeys: {
+            GetLengthPrefixedSlice(&buf, &keys_payload);
+            break;
+          }
+          default: {
+            assert(false);
+          }
+        }
+        // unset the rightmost bit.
+        payload_map &= (payload_map - 1);
+      }
+      if (multiget_size == 0) {
+        return Status::InvalidArgument("Empty MultiGet cf_ids or keys.");
+      }
+
+      // Decode the cfids_payload and keys_payload
+      cf_ids.reserve(multiget_size);
+      multiget_keys.reserve(multiget_size);
+      for (uint32_t i = 0; i < multiget_size; i++) {
+        uint32_t tmp_cfid = 0;
+        Slice tmp_key;
+        GetFixed32(&cfids_payload, &tmp_cfid);
+        GetLengthPrefixedSlice(&keys_payload, &tmp_key);
+        cf_ids.push_back(tmp_cfid);
+        Slice s(tmp_key);
+        PinnableSlice ps;
+        ps.PinSelf(s);
+        multiget_keys.push_back(std::move(ps));
+      }
+
+      if (record != nullptr) {
+        record->reset(new MultiGetQueryTraceRecord(
+            std::move(cf_ids), std::move(multiget_keys), trace->ts));
+      }
+
+      return Status::OK();
+    }
+    default:
+      return Status::NotSupported("Unsupported trace type.");
+  }
+}
+
+Tracer::Tracer(SystemClock* clock, const TraceOptions& trace_options,
+               std::unique_ptr<TraceWriter>&& trace_writer)
+    : clock_(clock),
+      trace_options_(trace_options),
+      trace_writer_(std::move(trace_writer)),
+      trace_request_count_(0) {
+  // TODO: What if this fails?
+  WriteHeader().PermitUncheckedError();
+}
+
+Tracer::~Tracer() { trace_writer_.reset(); }
+
+Status Tracer::Write(WriteBatch* write_batch) {
+  TraceType trace_type = kTraceWrite;
+  if (ShouldSkipTrace(trace_type)) {
+    return Status::OK();
+  }
+  Trace trace;
+  trace.ts = clock_->NowMicros();
+  trace.type = trace_type;
+  TracerHelper::SetPayloadMap(trace.payload_map,
+                              TracePayloadType::kWriteBatchData);
+  PutFixed64(&trace.payload, trace.payload_map);
+  PutLengthPrefixedSlice(&trace.payload, Slice(write_batch->Data()));
+  return WriteTrace(trace);
+}
+
+Status Tracer::Get(ColumnFamilyHandle* column_family, const Slice& key) {
+  TraceType trace_type = kTraceGet;
+  if (ShouldSkipTrace(trace_type)) {
+    return Status::OK();
+  }
+  Trace trace;
+  trace.ts = clock_->NowMicros();
+  trace.type = trace_type;
+  // Set the payloadmap of the struct member that will be encoded in the
+  // payload.
+  TracerHelper::SetPayloadMap(trace.payload_map, TracePayloadType::kGetCFID);
+  TracerHelper::SetPayloadMap(trace.payload_map, TracePayloadType::kGetKey);
+  // Encode the Get struct members into payload. Make sure add them in order.
+  PutFixed64(&trace.payload, trace.payload_map);
+  PutFixed32(&trace.payload, column_family->GetID());
+  PutLengthPrefixedSlice(&trace.payload, key);
+  return WriteTrace(trace);
+}
+
+Status Tracer::IteratorSeek(const uint32_t& cf_id, const Slice& key,
+                            const Slice& lower_bound, const Slice upper_bound) {
+  TraceType trace_type = kTraceIteratorSeek;
+  if (ShouldSkipTrace(trace_type)) {
+    return Status::OK();
+  }
+  Trace trace;
+  trace.ts = clock_->NowMicros();
+  trace.type = trace_type;
+  // Set the payloadmap of the struct member that will be encoded in the
+  // payload.
+  TracerHelper::SetPayloadMap(trace.payload_map, TracePayloadType::kIterCFID);
+  TracerHelper::SetPayloadMap(trace.payload_map, TracePayloadType::kIterKey);
+  if (lower_bound.size() > 0) {
+    TracerHelper::SetPayloadMap(trace.payload_map,
+                                TracePayloadType::kIterLowerBound);
+  }
+  if (upper_bound.size() > 0) {
+    TracerHelper::SetPayloadMap(trace.payload_map,
+                                TracePayloadType::kIterUpperBound);
+  }
+  // Encode the Iterator struct members into payload. Make sure add them in
+  // order.
+  PutFixed64(&trace.payload, trace.payload_map);
+  PutFixed32(&trace.payload, cf_id);
+  PutLengthPrefixedSlice(&trace.payload, key);
+  if (lower_bound.size() > 0) {
+    PutLengthPrefixedSlice(&trace.payload, lower_bound);
+  }
+  if (upper_bound.size() > 0) {
+    PutLengthPrefixedSlice(&trace.payload, upper_bound);
+  }
+  return WriteTrace(trace);
+}
+
+Status Tracer::IteratorSeekForPrev(const uint32_t& cf_id, const Slice& key,
+                                   const Slice& lower_bound,
+                                   const Slice upper_bound) {
+  TraceType trace_type = kTraceIteratorSeekForPrev;
+  if (ShouldSkipTrace(trace_type)) {
+    return Status::OK();
+  }
+  Trace trace;
+  trace.ts = clock_->NowMicros();
+  trace.type = trace_type;
+  // Set the payloadmap of the struct member that will be encoded in the
+  // payload.
+  TracerHelper::SetPayloadMap(trace.payload_map, TracePayloadType::kIterCFID);
+  TracerHelper::SetPayloadMap(trace.payload_map, TracePayloadType::kIterKey);
+  if (lower_bound.size() > 0) {
+    TracerHelper::SetPayloadMap(trace.payload_map,
+                                TracePayloadType::kIterLowerBound);
+  }
+  if (upper_bound.size() > 0) {
+    TracerHelper::SetPayloadMap(trace.payload_map,
+                                TracePayloadType::kIterUpperBound);
+  }
+  // Encode the Iterator struct members into payload. Make sure add them in
+  // order.
+  PutFixed64(&trace.payload, trace.payload_map);
+  PutFixed32(&trace.payload, cf_id);
+  PutLengthPrefixedSlice(&trace.payload, key);
+  if (lower_bound.size() > 0) {
+    PutLengthPrefixedSlice(&trace.payload, lower_bound);
+  }
+  if (upper_bound.size() > 0) {
+    PutLengthPrefixedSlice(&trace.payload, upper_bound);
+  }
+  return WriteTrace(trace);
+}
+
+Status Tracer::MultiGet(const size_t num_keys,
+                        ColumnFamilyHandle** column_families,
+                        const Slice* keys) {
+  if (num_keys == 0) {
+    return Status::OK();
+  }
+  std::vector<ColumnFamilyHandle*> v_column_families;
+  std::vector<Slice> v_keys;
+  v_column_families.resize(num_keys);
+  v_keys.resize(num_keys);
+  for (size_t i = 0; i < num_keys; i++) {
+    v_column_families[i] = column_families[i];
+    v_keys[i] = keys[i];
+  }
+  return MultiGet(v_column_families, v_keys);
+}
+
+Status Tracer::MultiGet(const size_t num_keys,
+                        ColumnFamilyHandle* column_family, const Slice* keys) {
+  if (num_keys == 0) {
+    return Status::OK();
+  }
+  std::vector<ColumnFamilyHandle*> column_families;
+  std::vector<Slice> v_keys;
+  column_families.resize(num_keys);
+  v_keys.resize(num_keys);
+  for (size_t i = 0; i < num_keys; i++) {
+    column_families[i] = column_family;
+    v_keys[i] = keys[i];
+  }
+  return MultiGet(column_families, v_keys);
+}
+
+Status Tracer::MultiGet(const std::vector<ColumnFamilyHandle*>& column_families,
+                        const std::vector<Slice>& keys) {
+  if (column_families.size() != keys.size()) {
+    return Status::Corruption("the CFs size and keys size does not match!");
+  }
+  TraceType trace_type = kTraceMultiGet;
+  if (ShouldSkipTrace(trace_type)) {
+    return Status::OK();
+  }
+  uint32_t multiget_size = static_cast<uint32_t>(keys.size());
+  Trace trace;
+  trace.ts = clock_->NowMicros();
+  trace.type = trace_type;
+  // Set the payloadmap of the struct member that will be encoded in the
+  // payload.
+  TracerHelper::SetPayloadMap(trace.payload_map,
+                              TracePayloadType::kMultiGetSize);
+  TracerHelper::SetPayloadMap(trace.payload_map,
+                              TracePayloadType::kMultiGetCFIDs);
+  TracerHelper::SetPayloadMap(trace.payload_map,
+                              TracePayloadType::kMultiGetKeys);
+  // Encode the CFIDs inorder
+  std::string cfids_payload;
+  std::string keys_payload;
+  for (uint32_t i = 0; i < multiget_size; i++) {
+    assert(i < column_families.size());
+    assert(i < keys.size());
+    PutFixed32(&cfids_payload, column_families[i]->GetID());
+    PutLengthPrefixedSlice(&keys_payload, keys[i]);
+  }
+  // Encode the Get struct members into payload. Make sure add them in order.
+  PutFixed64(&trace.payload, trace.payload_map);
+  PutFixed32(&trace.payload, multiget_size);
+  PutLengthPrefixedSlice(&trace.payload, cfids_payload);
+  PutLengthPrefixedSlice(&trace.payload, keys_payload);
+  return WriteTrace(trace);
+}
+
+bool Tracer::ShouldSkipTrace(const TraceType& trace_type) {
+  if (IsTraceFileOverMax()) {
+    return true;
+  }
+
+  TraceFilterType filter_mask = kTraceFilterNone;
+  switch (trace_type) {
+    case kTraceNone:
+    case kTraceBegin:
+    case kTraceEnd:
+      filter_mask = kTraceFilterNone;
+      break;
+    case kTraceWrite:
+      filter_mask = kTraceFilterWrite;
+      break;
+    case kTraceGet:
+      filter_mask = kTraceFilterGet;
+      break;
+    case kTraceIteratorSeek:
+      filter_mask = kTraceFilterIteratorSeek;
+      break;
+    case kTraceIteratorSeekForPrev:
+      filter_mask = kTraceFilterIteratorSeekForPrev;
+      break;
+    case kBlockTraceIndexBlock:
+    case kBlockTraceFilterBlock:
+    case kBlockTraceDataBlock:
+    case kBlockTraceUncompressionDictBlock:
+    case kBlockTraceRangeDeletionBlock:
+    case kIOTracer:
+      filter_mask = kTraceFilterNone;
+      break;
+    case kTraceMultiGet:
+      filter_mask = kTraceFilterMultiGet;
+      break;
+    case kTraceMax:
+      assert(false);
+      filter_mask = kTraceFilterNone;
+      break;
+  }
+  if (filter_mask != kTraceFilterNone && trace_options_.filter & filter_mask) {
+    return true;
+  }
+
+  ++trace_request_count_;
+  if (trace_request_count_ < trace_options_.sampling_frequency) {
+    return true;
+  }
+  trace_request_count_ = 0;
+  return false;
+}
+
+bool Tracer::IsTraceFileOverMax() {
+  uint64_t trace_file_size = trace_writer_->GetFileSize();
+  return (trace_file_size > trace_options_.max_trace_file_size);
+}
+
+Status Tracer::WriteHeader() {
+  std::ostringstream s;
+  s << kTraceMagic << "\t"
+    << "Trace Version: " << kTraceFileMajorVersion << "."
+    << kTraceFileMinorVersion << "\t"
+    << "RocksDB Version: " << kMajorVersion << "." << kMinorVersion << "\t"
+    << "Format: Timestamp OpType Payload\n";
+  std::string header(s.str());
+
+  Trace trace;
+  trace.ts = clock_->NowMicros();
+  trace.type = kTraceBegin;
+  trace.payload = header;
+  return WriteTrace(trace);
+}
+
+Status Tracer::WriteFooter() {
+  Trace trace;
+  trace.ts = clock_->NowMicros();
+  trace.type = kTraceEnd;
+  TracerHelper::SetPayloadMap(trace.payload_map,
+                              TracePayloadType::kEmptyPayload);
+  trace.payload = "";
+  return WriteTrace(trace);
+}
+
+Status Tracer::WriteTrace(const Trace& trace) {
+  std::string encoded_trace;
+  TracerHelper::EncodeTrace(trace, &encoded_trace);
+  return trace_writer_->Write(Slice(encoded_trace));
+}
+
+Status Tracer::Close() { return WriteFooter(); }
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_replay.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_replay.h
new file mode 100644
index 0000000000..9aba5ceb72
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/trace_replay/trace_replay.h
@@ -0,0 +1,183 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+#include <utility>
+
+#include "rocksdb/options.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/status.h"
+#include "rocksdb/trace_record.h"
+#include "rocksdb/utilities/replayer.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// This file contains Tracer and Replayer classes that enable capturing and
+// replaying RocksDB traces.
+
+class ColumnFamilyHandle;
+class ColumnFamilyData;
+class DB;
+class DBImpl;
+class Env;
+class Slice;
+class SystemClock;
+class TraceReader;
+class TraceWriter;
+class WriteBatch;
+
+struct ReadOptions;
+struct TraceOptions;
+struct WriteOptions;
+
+extern const std::string kTraceMagic;
+const unsigned int kTraceTimestampSize = 8;
+const unsigned int kTraceTypeSize = 1;
+const unsigned int kTracePayloadLengthSize = 4;
+const unsigned int kTraceMetadataSize =
+    kTraceTimestampSize + kTraceTypeSize + kTracePayloadLengthSize;
+
+static const int kTraceFileMajorVersion = 0;
+static const int kTraceFileMinorVersion = 2;
+
+// The data structure that defines a single trace.
+struct Trace {
+  uint64_t ts;  // timestamp
+  TraceType type;
+  // Each bit in payload_map stores which corresponding struct member added in
+  // the payload. Each TraceType has its corresponding payload struct. For
+  // example, if bit at position 0 is set in write payload, then the write batch
+  // will be addedd.
+  uint64_t payload_map = 0;
+  // Each trace type has its own payload_struct, which will be serilized in the
+  // payload.
+  std::string payload;
+
+  void reset() {
+    ts = 0;
+    type = kTraceMax;
+    payload_map = 0;
+    payload.clear();
+  }
+};
+
+enum TracePayloadType : char {
+  // Each member of all query payload structs should have a corresponding flag
+  // here. Make sure to add them sequentially in the order of it is added.
+  kEmptyPayload = 0,
+  kWriteBatchData = 1,
+  kGetCFID = 2,
+  kGetKey = 3,
+  kIterCFID = 4,
+  kIterKey = 5,
+  kIterLowerBound = 6,
+  kIterUpperBound = 7,
+  kMultiGetSize = 8,
+  kMultiGetCFIDs = 9,
+  kMultiGetKeys = 10,
+};
+
+class TracerHelper {
+ public:
+  // Parse the string with major and minor version only
+  static Status ParseVersionStr(std::string& v_string, int* v_num);
+
+  // Parse the trace file version and db version in trace header
+  static Status ParseTraceHeader(const Trace& header, int* trace_version,
+                                 int* db_version);
+
+  // Encode a version 0.1 trace object into the given string.
+  static void EncodeTrace(const Trace& trace, std::string* encoded_trace);
+
+  // Decode a string into the given trace object.
+  static Status DecodeTrace(const std::string& encoded_trace, Trace* trace);
+
+  // Decode a string into the given trace header.
+  static Status DecodeHeader(const std::string& encoded_trace, Trace* header);
+
+  // Set the payload map based on the payload type
+  static bool SetPayloadMap(uint64_t& payload_map,
+                            const TracePayloadType payload_type);
+
+  // Decode a Trace object into the corresponding TraceRecord.
+  // Return Status::OK() if nothing is wrong, record will be set accordingly.
+  // Return Status::NotSupported() if the trace type is not support, or the
+  // corresponding error status, record will be set to nullptr.
+  static Status DecodeTraceRecord(Trace* trace, int trace_file_version,
+                                  std::unique_ptr<TraceRecord>* record);
+};
+
+// Tracer captures all RocksDB operations using a user-provided TraceWriter.
+// Every RocksDB operation is written as a single trace. Each trace will have a
+// timestamp and type, followed by the trace payload.
+class Tracer {
+ public:
+  Tracer(SystemClock* clock, const TraceOptions& trace_options,
+         std::unique_ptr<TraceWriter>&& trace_writer);
+  ~Tracer();
+
+  // Trace all write operations -- Put, Merge, Delete, SingleDelete, Write
+  Status Write(WriteBatch* write_batch);
+
+  // Trace Get operations.
+  Status Get(ColumnFamilyHandle* cfname, const Slice& key);
+
+  // Trace Iterators.
+  Status IteratorSeek(const uint32_t& cf_id, const Slice& key,
+                      const Slice& lower_bound, const Slice upper_bound);
+  Status IteratorSeekForPrev(const uint32_t& cf_id, const Slice& key,
+                             const Slice& lower_bound, const Slice upper_bound);
+
+  // Trace MultiGet
+
+  Status MultiGet(const size_t num_keys, ColumnFamilyHandle** column_families,
+                  const Slice* keys);
+
+  Status MultiGet(const size_t num_keys, ColumnFamilyHandle* column_family,
+                  const Slice* keys);
+
+  Status MultiGet(const std::vector<ColumnFamilyHandle*>& column_family,
+                  const std::vector<Slice>& keys);
+
+  // Returns true if the trace is over the configured max trace file limit.
+  // False otherwise.
+  bool IsTraceFileOverMax();
+
+  // Returns true if the order of write trace records must match the order of
+  // the corresponding records logged to WAL and applied to the DB.
+  bool IsWriteOrderPreserved() { return trace_options_.preserve_write_order; }
+
+  // Writes a trace footer at the end of the tracing
+  Status Close();
+
+ private:
+  // Write a trace header at the beginning, typically on initiating a trace,
+  // with some metadata like a magic number, trace version, RocksDB version, and
+  // trace format.
+  Status WriteHeader();
+
+  // Write a trace footer, typically on ending a trace, with some metadata.
+  Status WriteFooter();
+
+  // Write a single trace using the provided TraceWriter to the underlying
+  // system, say, a filesystem or a streaming service.
+  Status WriteTrace(const Trace& trace);
+
+  // Helps in filtering and sampling of traces.
+  // Returns true if a trace should be skipped, false otherwise.
+  bool ShouldSkipTrace(const TraceType& type);
+
+  SystemClock* clock_;
+  TraceOptions trace_options_;
+  std::unique_ptr<TraceWriter> trace_writer_;
+  uint64_t trace_request_count_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/aligned_buffer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/aligned_buffer.h
new file mode 100644
index 0000000000..acab56c215
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/aligned_buffer.h
@@ -0,0 +1,235 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+
+#include "port/port.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// This file contains utilities to handle the alignment of pages and buffers.
+
+// Truncate to a multiple of page_size, which is also a page boundary. This
+// helps to figuring out the right alignment.
+// Example:
+//   TruncateToPageBoundary(4096, 5000)  => 4096
+//   TruncateToPageBoundary((4096, 10000) => 8192
+inline size_t TruncateToPageBoundary(size_t page_size, size_t s) {
+  s -= (s & (page_size - 1));
+  assert((s % page_size) == 0);
+  return s;
+}
+
+// Round up x to a multiple of y.
+// Example:
+//   Roundup(13, 5)   => 15
+//   Roundup(201, 16) => 208
+inline size_t Roundup(size_t x, size_t y) { return ((x + y - 1) / y) * y; }
+
+// Round down x to a multiple of y.
+// Example:
+//   Rounddown(13, 5)   => 10
+//   Rounddown(201, 16) => 192
+inline size_t Rounddown(size_t x, size_t y) { return (x / y) * y; }
+
+// AlignedBuffer manages a buffer by taking alignment into consideration, and
+// aligns the buffer start and end positions. It is mainly used for direct I/O,
+// though it can be used other purposes as well.
+// It also supports expanding the managed buffer, and copying whole or part of
+// the data from old buffer into the new expanded buffer. Such a copy especially
+// helps in cases avoiding an IO to re-fetch the data from disk.
+//
+// Example:
+//   AlignedBuffer buf;
+//   buf.Alignment(alignment);
+//   buf.AllocateNewBuffer(user_requested_buf_size);
+//   ...
+//   buf.AllocateNewBuffer(2*user_requested_buf_size, /*copy_data*/ true,
+//                         copy_offset, copy_len);
+class AlignedBuffer {
+  size_t alignment_;
+  std::unique_ptr<char[]> buf_;
+  size_t capacity_;
+  size_t cursize_;
+  char* bufstart_;
+
+ public:
+  AlignedBuffer()
+      : alignment_(), capacity_(0), cursize_(0), bufstart_(nullptr) {}
+
+  AlignedBuffer(AlignedBuffer&& o) noexcept { *this = std::move(o); }
+
+  AlignedBuffer& operator=(AlignedBuffer&& o) noexcept {
+    alignment_ = std::move(o.alignment_);
+    buf_ = std::move(o.buf_);
+    capacity_ = std::move(o.capacity_);
+    cursize_ = std::move(o.cursize_);
+    bufstart_ = std::move(o.bufstart_);
+    return *this;
+  }
+
+  AlignedBuffer(const AlignedBuffer&) = delete;
+
+  AlignedBuffer& operator=(const AlignedBuffer&) = delete;
+
+  static bool isAligned(const void* ptr, size_t alignment) {
+    return reinterpret_cast<uintptr_t>(ptr) % alignment == 0;
+  }
+
+  static bool isAligned(size_t n, size_t alignment) {
+    return n % alignment == 0;
+  }
+
+  size_t Alignment() const { return alignment_; }
+
+  size_t Capacity() const { return capacity_; }
+
+  size_t CurrentSize() const { return cursize_; }
+
+  const char* BufferStart() const { return bufstart_; }
+
+  char* BufferStart() { return bufstart_; }
+
+  void Clear() { cursize_ = 0; }
+
+  char* Release() {
+    cursize_ = 0;
+    capacity_ = 0;
+    bufstart_ = nullptr;
+    return buf_.release();
+  }
+
+  void Alignment(size_t alignment) {
+    assert(alignment > 0);
+    assert((alignment & (alignment - 1)) == 0);
+    alignment_ = alignment;
+  }
+
+  // Allocates a new buffer and sets the start position to the first aligned
+  // byte.
+  //
+  // requested_capacity: requested new buffer capacity. This capacity will be
+  //     rounded up based on alignment.
+  // copy_data: Copy data from old buffer to new buffer. If copy_offset and
+  //     copy_len are not passed in and the new requested capacity is bigger
+  //     than the existing buffer's capacity, the data in the exising buffer is
+  //     fully copied over to the new buffer.
+  // copy_offset: Copy data from this offset in old buffer.
+  // copy_len: Number of bytes to copy.
+  //
+  // The function does nothing if the new requested_capacity is smaller than
+  // the current buffer capacity and copy_data is true i.e. the old buffer is
+  // retained as is.
+  void AllocateNewBuffer(size_t requested_capacity, bool copy_data = false,
+                         uint64_t copy_offset = 0, size_t copy_len = 0) {
+    assert(alignment_ > 0);
+    assert((alignment_ & (alignment_ - 1)) == 0);
+
+    copy_len = copy_len > 0 ? copy_len : cursize_;
+    if (copy_data && requested_capacity < copy_len) {
+      // If we are downsizing to a capacity that is smaller than the current
+      // data in the buffer -- Ignore the request.
+      return;
+    }
+
+    size_t new_capacity = Roundup(requested_capacity, alignment_);
+    char* new_buf = new char[new_capacity + alignment_];
+    char* new_bufstart = reinterpret_cast<char*>(
+        (reinterpret_cast<uintptr_t>(new_buf) + (alignment_ - 1)) &
+        ~static_cast<uintptr_t>(alignment_ - 1));
+
+    if (copy_data) {
+      assert(bufstart_ + copy_offset + copy_len <= bufstart_ + cursize_);
+      memcpy(new_bufstart, bufstart_ + copy_offset, copy_len);
+      cursize_ = copy_len;
+    } else {
+      cursize_ = 0;
+    }
+
+    bufstart_ = new_bufstart;
+    capacity_ = new_capacity;
+    buf_.reset(new_buf);
+  }
+
+  // Append to the buffer.
+  //
+  // src         : source to copy the data from.
+  // append_size : number of bytes to copy from src.
+  // Returns the number of bytes appended.
+  //
+  // If append_size is more than the remaining buffer size only the
+  // remaining-size worth of bytes are copied.
+  size_t Append(const char* src, size_t append_size) {
+    size_t buffer_remaining = capacity_ - cursize_;
+    size_t to_copy = std::min(append_size, buffer_remaining);
+
+    if (to_copy > 0) {
+      memcpy(bufstart_ + cursize_, src, to_copy);
+      cursize_ += to_copy;
+    }
+    return to_copy;
+  }
+
+  // Read from the buffer.
+  //
+  // dest      : destination buffer to copy the data to.
+  // offset    : the buffer offset to start reading from.
+  // read_size : the number of bytes to copy from the buffer to dest.
+  // Returns the number of bytes read/copied to dest.
+  size_t Read(char* dest, size_t offset, size_t read_size) const {
+    assert(offset < cursize_);
+
+    size_t to_read = 0;
+    if (offset < cursize_) {
+      to_read = std::min(cursize_ - offset, read_size);
+    }
+    if (to_read > 0) {
+      memcpy(dest, bufstart_ + offset, to_read);
+    }
+    return to_read;
+  }
+
+  // Pad to the end of alignment with "padding"
+  void PadToAlignmentWith(int padding) {
+    size_t total_size = Roundup(cursize_, alignment_);
+    size_t pad_size = total_size - cursize_;
+
+    if (pad_size > 0) {
+      assert((pad_size + cursize_) <= capacity_);
+      memset(bufstart_ + cursize_, padding, pad_size);
+      cursize_ += pad_size;
+    }
+  }
+
+  void PadWith(size_t pad_size, int padding) {
+    assert((pad_size + cursize_) <= capacity_);
+    memset(bufstart_ + cursize_, padding, pad_size);
+    cursize_ += pad_size;
+  }
+
+  // After a partial flush move the tail to the beginning of the buffer.
+  void RefitTail(size_t tail_offset, size_t tail_size) {
+    if (tail_size > 0) {
+      memmove(bufstart_, bufstart_ + tail_offset, tail_size);
+    }
+    cursize_ = tail_size;
+  }
+
+  // Returns a place to start appending.
+  // WARNING: Note that it is possible to write past the end of the buffer if
+  // the buffer is modified without using the write APIs or encapsulation
+  // offered by AlignedBuffer. It is up to the user to guard against such
+  // errors.
+  char* Destination() { return bufstart_ + cursize_; }
+
+  void Size(size_t cursize) { cursize_ = cursize; }
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/async_file_reader.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/async_file_reader.cc
new file mode 100644
index 0000000000..080c1ae966
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/async_file_reader.cc
@@ -0,0 +1,81 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#if USE_COROUTINES
+#include "util/async_file_reader.h"
+
+namespace ROCKSDB_NAMESPACE {
+bool AsyncFileReader::MultiReadAsyncImpl(ReadAwaiter* awaiter) {
+  if (tail_) {
+    tail_->next_ = awaiter;
+  }
+  tail_ = awaiter;
+  if (!head_) {
+    head_ = awaiter;
+  }
+  num_reqs_ += awaiter->num_reqs_;
+  awaiter->io_handle_.resize(awaiter->num_reqs_);
+  awaiter->del_fn_.resize(awaiter->num_reqs_);
+  for (size_t i = 0; i < awaiter->num_reqs_; ++i) {
+    IOStatus s = awaiter->file_->ReadAsync(
+        awaiter->read_reqs_[i], awaiter->opts_,
+        [](const FSReadRequest& req, void* cb_arg) {
+          FSReadRequest* read_req = static_cast<FSReadRequest*>(cb_arg);
+          read_req->status = req.status;
+          read_req->result = req.result;
+        },
+        &awaiter->read_reqs_[i], &awaiter->io_handle_[i], &awaiter->del_fn_[i],
+        /*aligned_buf=*/nullptr);
+    if (!s.ok()) {
+      // For any non-ok status, the FileSystem will not call the callback
+      // So let's update the status ourselves
+      awaiter->read_reqs_[i].status = s;
+    }
+  }
+  return true;
+}
+
+void AsyncFileReader::Wait() {
+  if (!head_) {
+    return;
+  }
+  ReadAwaiter* waiter;
+  std::vector<void*> io_handles;
+  IOStatus s;
+  io_handles.reserve(num_reqs_);
+  waiter = head_;
+  do {
+    for (size_t i = 0; i < waiter->num_reqs_; ++i) {
+      if (waiter->io_handle_[i]) {
+        io_handles.push_back(waiter->io_handle_[i]);
+      }
+    }
+  } while (waiter != tail_ && (waiter = waiter->next_));
+  if (io_handles.size() > 0) {
+    StopWatch sw(SystemClock::Default().get(), stats_, POLL_WAIT_MICROS);
+    s = fs_->Poll(io_handles, io_handles.size());
+  }
+  do {
+    waiter = head_;
+    head_ = waiter->next_;
+
+    for (size_t i = 0; i < waiter->num_reqs_; ++i) {
+      if (waiter->io_handle_[i] && waiter->del_fn_[i]) {
+        waiter->del_fn_[i](waiter->io_handle_[i]);
+      }
+      if (waiter->read_reqs_[i].status.ok() && !s.ok()) {
+        // Override the request status with the Poll error
+        waiter->read_reqs_[i].status = s;
+      }
+    }
+    waiter->awaiting_coro_.resume();
+  } while (waiter != tail_);
+  head_ = tail_ = nullptr;
+  RecordInHistogram(stats_, MULTIGET_IO_BATCH_SIZE, num_reqs_);
+  num_reqs_ = 0;
+}
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // USE_COROUTINES
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/async_file_reader.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/async_file_reader.h
new file mode 100644
index 0000000000..df69a840eb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/async_file_reader.h
@@ -0,0 +1,144 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).#pragma once
+#pragma once
+
+#if USE_COROUTINES
+#include "file/random_access_file_reader.h"
+#include "folly/experimental/coro/ViaIfAsync.h"
+#include "port/port.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/statistics.h"
+#include "util/autovector.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+class SingleThreadExecutor;
+
+// AsyncFileReader implements the Awaitable concept, which allows calling
+// coroutines to co_await it. When the AsyncFileReader Awaitable is
+// resumed, it initiates the fie reads requested by the awaiting caller
+// by calling RandomAccessFileReader's ReadAsync. It then suspends the
+// awaiting coroutine. The suspended awaiter is later resumed by Wait().
+class AsyncFileReader {
+  class ReadAwaiter;
+  template <typename Awaiter>
+  class ReadOperation;
+
+ public:
+  AsyncFileReader(FileSystem* fs, Statistics* stats) : fs_(fs), stats_(stats) {}
+
+  ~AsyncFileReader() {}
+
+  ReadOperation<ReadAwaiter> MultiReadAsync(RandomAccessFileReader* file,
+                                            const IOOptions& opts,
+                                            FSReadRequest* read_reqs,
+                                            size_t num_reqs,
+                                            AlignedBuf* aligned_buf) noexcept {
+    return ReadOperation<ReadAwaiter>{*this,     file,     opts,
+                                      read_reqs, num_reqs, aligned_buf};
+  }
+
+ private:
+  friend SingleThreadExecutor;
+
+  // Implementation of the Awaitable concept
+  class ReadAwaiter {
+   public:
+    explicit ReadAwaiter(AsyncFileReader& reader, RandomAccessFileReader* file,
+                         const IOOptions& opts, FSReadRequest* read_reqs,
+                         size_t num_reqs, AlignedBuf* /*aligned_buf*/) noexcept
+        : reader_(reader),
+          file_(file),
+          opts_(opts),
+          read_reqs_(read_reqs),
+          num_reqs_(num_reqs),
+          next_(nullptr) {}
+
+    bool await_ready() noexcept { return false; }
+
+    // A return value of true means suspend the awaiter (calling coroutine). The
+    // awaiting_coro parameter is the handle of the awaiter. The handle can be
+    // resumed later, so we cache it here.
+    bool await_suspend(
+        folly::coro::impl::coroutine_handle<> awaiting_coro) noexcept {
+      awaiting_coro_ = awaiting_coro;
+      // MultiReadAsyncImpl always returns true, so caller will be suspended
+      return reader_.MultiReadAsyncImpl(this);
+    }
+
+    void await_resume() noexcept {}
+
+   private:
+    friend AsyncFileReader;
+
+    // The parameters passed to MultiReadAsync are cached here when the caller
+    // calls MultiReadAsync. Later, when the execution of this awaitable is
+    // started, these are used to do the actual IO
+    AsyncFileReader& reader_;
+    RandomAccessFileReader* file_;
+    const IOOptions& opts_;
+    FSReadRequest* read_reqs_;
+    size_t num_reqs_;
+    autovector<void*, 32> io_handle_;
+    autovector<IOHandleDeleter, 32> del_fn_;
+    folly::coro::impl::coroutine_handle<> awaiting_coro_;
+    // Use this to link to the next ReadAwaiter in the suspended coroutine
+    // list. The head and tail of the list are tracked by AsyncFileReader.
+    // We use this approach rather than an STL container in order to avoid
+    // extra memory allocations. The coroutine call already allocates a
+    // ReadAwaiter object.
+    ReadAwaiter* next_;
+  };
+
+  // An instance of ReadOperation is returned to the caller of MultiGetAsync.
+  // This represents an awaitable that can be started later.
+  template <typename Awaiter>
+  class ReadOperation {
+   public:
+    explicit ReadOperation(AsyncFileReader& reader,
+                           RandomAccessFileReader* file, const IOOptions& opts,
+                           FSReadRequest* read_reqs, size_t num_reqs,
+                           AlignedBuf* aligned_buf) noexcept
+        : reader_(reader),
+          file_(file),
+          opts_(opts),
+          read_reqs_(read_reqs),
+          num_reqs_(num_reqs),
+          aligned_buf_(aligned_buf) {}
+
+    auto viaIfAsync(folly::Executor::KeepAlive<> executor) const {
+      return folly::coro::co_viaIfAsync(
+          std::move(executor),
+          Awaiter{reader_, file_, opts_, read_reqs_, num_reqs_, aligned_buf_});
+    }
+
+   private:
+    AsyncFileReader& reader_;
+    RandomAccessFileReader* file_;
+    const IOOptions& opts_;
+    FSReadRequest* read_reqs_;
+    size_t num_reqs_;
+    AlignedBuf* aligned_buf_;
+  };
+
+  // This function does the actual work when this awaitable starts execution
+  bool MultiReadAsyncImpl(ReadAwaiter* awaiter);
+
+  // Called by the SingleThreadExecutor to poll for async IO completion.
+  // This also resumes the awaiting coroutines.
+  void Wait();
+
+  // Head of the queue of awaiters waiting for async IO completion
+  ReadAwaiter* head_ = nullptr;
+  // Tail of the awaiter queue
+  ReadAwaiter* tail_ = nullptr;
+  // Total number of pending async IOs
+  size_t num_reqs_ = 0;
+  FileSystem* fs_;
+  Statistics* stats_;
+};
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // USE_COROUTINES
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/autovector.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/autovector.h
new file mode 100644
index 0000000000..79ee5de572
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/autovector.h
@@ -0,0 +1,393 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <initializer_list>
+#include <iterator>
+#include <stdexcept>
+#include <vector>
+
+#include "port/lang.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A vector that leverages pre-allocated stack-based array to achieve better
+// performance for array with small amount of items.
+//
+// The interface resembles that of vector, but with less features since we aim
+// to solve the problem that we have in hand, rather than implementing a
+// full-fledged generic container.
+//
+// Currently we don't support:
+//  * shrink_to_fit()
+//     If used correctly, in most cases, people should not touch the
+//     underlying vector at all.
+//  * random insert()/erase(), please only use push_back()/pop_back().
+//  * No move/swap operations. Each autovector instance has a
+//     stack-allocated array and if we want support move/swap operations, we
+//     need to copy the arrays other than just swapping the pointers. In this
+//     case we'll just explicitly forbid these operations since they may
+//     lead users to make false assumption by thinking they are inexpensive
+//     operations.
+//
+// Naming style of public methods almost follows that of the STL's.
+template <class T, size_t kSize = 8>
+class autovector {
+ public:
+  // General STL-style container member types.
+  using value_type = T;
+  using difference_type = typename std::vector<T>::difference_type;
+  using size_type = typename std::vector<T>::size_type;
+  using reference = value_type&;
+  using const_reference = const value_type&;
+  using pointer = value_type*;
+  using const_pointer = const value_type*;
+
+  // This class is the base for regular/const iterator
+  template <class TAutoVector, class TValueType>
+  class iterator_impl {
+   public:
+    // -- iterator traits
+    using self_type = iterator_impl<TAutoVector, TValueType>;
+    using value_type = TValueType;
+    using reference = TValueType&;
+    using pointer = TValueType*;
+    using difference_type = typename TAutoVector::difference_type;
+    using iterator_category = std::random_access_iterator_tag;
+
+    iterator_impl(TAutoVector* vect, size_t index)
+        : vect_(vect), index_(index){};
+    iterator_impl(const iterator_impl&) = default;
+    ~iterator_impl() {}
+    iterator_impl& operator=(const iterator_impl&) = default;
+
+    // -- Advancement
+    // ++iterator
+    self_type& operator++() {
+      ++index_;
+      return *this;
+    }
+
+    // iterator++
+    self_type operator++(int) {
+      auto old = *this;
+      ++index_;
+      return old;
+    }
+
+    // --iterator
+    self_type& operator--() {
+      --index_;
+      return *this;
+    }
+
+    // iterator--
+    self_type operator--(int) {
+      auto old = *this;
+      --index_;
+      return old;
+    }
+
+    self_type operator-(difference_type len) const {
+      return self_type(vect_, index_ - len);
+    }
+
+    difference_type operator-(const self_type& other) const {
+      assert(vect_ == other.vect_);
+      return index_ - other.index_;
+    }
+
+    self_type operator+(difference_type len) const {
+      return self_type(vect_, index_ + len);
+    }
+
+    self_type& operator+=(difference_type len) {
+      index_ += len;
+      return *this;
+    }
+
+    self_type& operator-=(difference_type len) {
+      index_ -= len;
+      return *this;
+    }
+
+    // -- Reference
+    reference operator*() const {
+      assert(vect_->size() >= index_);
+      return (*vect_)[index_];
+    }
+
+    pointer operator->() const {
+      assert(vect_->size() >= index_);
+      return &(*vect_)[index_];
+    }
+
+    reference operator[](difference_type len) const { return *(*this + len); }
+
+    // -- Logical Operators
+    bool operator==(const self_type& other) const {
+      assert(vect_ == other.vect_);
+      return index_ == other.index_;
+    }
+
+    bool operator!=(const self_type& other) const { return !(*this == other); }
+
+    bool operator>(const self_type& other) const {
+      assert(vect_ == other.vect_);
+      return index_ > other.index_;
+    }
+
+    bool operator<(const self_type& other) const {
+      assert(vect_ == other.vect_);
+      return index_ < other.index_;
+    }
+
+    bool operator>=(const self_type& other) const {
+      assert(vect_ == other.vect_);
+      return index_ >= other.index_;
+    }
+
+    bool operator<=(const self_type& other) const {
+      assert(vect_ == other.vect_);
+      return index_ <= other.index_;
+    }
+
+   private:
+    TAutoVector* vect_ = nullptr;
+    size_t index_ = 0;
+  };
+
+  using iterator = iterator_impl<autovector, value_type>;
+  using const_iterator = iterator_impl<const autovector, const value_type>;
+  using reverse_iterator = std::reverse_iterator<iterator>;
+  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+  autovector() : values_(reinterpret_cast<pointer>(buf_)) {}
+
+  autovector(std::initializer_list<T> init_list)
+      : values_(reinterpret_cast<pointer>(buf_)) {
+    for (const T& item : init_list) {
+      push_back(item);
+    }
+  }
+
+  ~autovector() { clear(); }
+
+  // -- Immutable operations
+  // Indicate if all data resides in in-stack data structure.
+  bool only_in_stack() const {
+    // If no element was inserted at all, the vector's capacity will be `0`.
+    return vect_.capacity() == 0;
+  }
+
+  size_type size() const { return num_stack_items_ + vect_.size(); }
+
+  // resize does not guarantee anything about the contents of the newly
+  // available elements
+  void resize(size_type n) {
+    if (n > kSize) {
+      vect_.resize(n - kSize);
+      while (num_stack_items_ < kSize) {
+        new ((void*)(&values_[num_stack_items_++])) value_type();
+      }
+      num_stack_items_ = kSize;
+    } else {
+      vect_.clear();
+      while (num_stack_items_ < n) {
+        new ((void*)(&values_[num_stack_items_++])) value_type();
+      }
+      while (num_stack_items_ > n) {
+        values_[--num_stack_items_].~value_type();
+      }
+    }
+  }
+
+  bool empty() const { return size() == 0; }
+
+  size_type capacity() const { return kSize + vect_.capacity(); }
+
+  void reserve(size_t cap) {
+    if (cap > kSize) {
+      vect_.reserve(cap - kSize);
+    }
+
+    assert(cap <= capacity());
+  }
+
+  const_reference operator[](size_type n) const {
+    assert(n < size());
+    if (n < kSize) {
+      return values_[n];
+    }
+    return vect_[n - kSize];
+  }
+
+  reference operator[](size_type n) {
+    assert(n < size());
+    if (n < kSize) {
+      return values_[n];
+    }
+    return vect_[n - kSize];
+  }
+
+  const_reference at(size_type n) const {
+    assert(n < size());
+    return (*this)[n];
+  }
+
+  reference at(size_type n) {
+    assert(n < size());
+    return (*this)[n];
+  }
+
+  reference front() {
+    assert(!empty());
+    return *begin();
+  }
+
+  const_reference front() const {
+    assert(!empty());
+    return *begin();
+  }
+
+  reference back() {
+    assert(!empty());
+    return *(end() - 1);
+  }
+
+  const_reference back() const {
+    assert(!empty());
+    return *(end() - 1);
+  }
+
+  // -- Mutable Operations
+  void push_back(T&& item) {
+    if (num_stack_items_ < kSize) {
+      new ((void*)(&values_[num_stack_items_])) value_type();
+      values_[num_stack_items_++] = std::move(item);
+    } else {
+      vect_.push_back(item);
+    }
+  }
+
+  void push_back(const T& item) {
+    if (num_stack_items_ < kSize) {
+      new ((void*)(&values_[num_stack_items_])) value_type();
+      values_[num_stack_items_++] = item;
+    } else {
+      vect_.push_back(item);
+    }
+  }
+
+  template <class... Args>
+#if _LIBCPP_STD_VER > 14
+  reference emplace_back(Args&&... args) {
+    if (num_stack_items_ < kSize) {
+      return *(new ((void*)(&values_[num_stack_items_++]))
+                   value_type(std::forward<Args>(args)...));
+    } else {
+      return vect_.emplace_back(std::forward<Args>(args)...);
+    }
+  }
+#else
+  void emplace_back(Args&&... args) {
+    if (num_stack_items_ < kSize) {
+      new ((void*)(&values_[num_stack_items_++]))
+          value_type(std::forward<Args>(args)...);
+    } else {
+      vect_.emplace_back(std::forward<Args>(args)...);
+    }
+  }
+#endif
+
+  void pop_back() {
+    assert(!empty());
+    if (!vect_.empty()) {
+      vect_.pop_back();
+    } else {
+      values_[--num_stack_items_].~value_type();
+    }
+  }
+
+  void clear() {
+    while (num_stack_items_ > 0) {
+      values_[--num_stack_items_].~value_type();
+    }
+    vect_.clear();
+  }
+
+  // -- Copy and Assignment
+  autovector& assign(const autovector& other);
+
+  autovector(const autovector& other) { assign(other); }
+
+  autovector& operator=(const autovector& other) { return assign(other); }
+
+  autovector(autovector&& other) noexcept { *this = std::move(other); }
+  autovector& operator=(autovector&& other);
+
+  // -- Iterator Operations
+  iterator begin() { return iterator(this, 0); }
+
+  const_iterator begin() const { return const_iterator(this, 0); }
+
+  iterator end() { return iterator(this, this->size()); }
+
+  const_iterator end() const { return const_iterator(this, this->size()); }
+
+  reverse_iterator rbegin() { return reverse_iterator(end()); }
+
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(end());
+  }
+
+  reverse_iterator rend() { return reverse_iterator(begin()); }
+
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(begin());
+  }
+
+ private:
+  size_type num_stack_items_ = 0;  // current number of items
+  alignas(alignof(
+      value_type)) char buf_[kSize *
+                             sizeof(value_type)];  // the first `kSize` items
+  pointer values_;
+  // used only if there are more than `kSize` items.
+  std::vector<T> vect_;
+};
+
+template <class T, size_t kSize>
+autovector<T, kSize>& autovector<T, kSize>::assign(
+    const autovector<T, kSize>& other) {
+  values_ = reinterpret_cast<pointer>(buf_);
+  // copy the internal vector
+  vect_.assign(other.vect_.begin(), other.vect_.end());
+
+  // copy array
+  num_stack_items_ = other.num_stack_items_;
+  std::copy(other.values_, other.values_ + num_stack_items_, values_);
+
+  return *this;
+}
+
+template <class T, size_t kSize>
+autovector<T, kSize>& autovector<T, kSize>::operator=(
+    autovector<T, kSize>&& other) {
+  values_ = reinterpret_cast<pointer>(buf_);
+  vect_ = std::move(other.vect_);
+  size_t n = other.num_stack_items_;
+  num_stack_items_ = n;
+  other.num_stack_items_ = 0;
+  for (size_t i = 0; i < n; ++i) {
+    values_[i] = std::move(other.values_[i]);
+  }
+  return *this;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/bloom_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/bloom_impl.h
new file mode 100644
index 0000000000..53b70aa681
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/bloom_impl.h
@@ -0,0 +1,489 @@
+//  Copyright (c) 2019-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Implementation details of various Bloom filter implementations used in
+// RocksDB. (DynamicBloom is in a separate file for now because it
+// supports concurrent write.)
+
+#pragma once
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cmath>
+
+#include "port/port.h"  // for PREFETCH
+#include "rocksdb/slice.h"
+#include "util/hash.h"
+
+#ifdef __AVX2__
+#include <immintrin.h>
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+class BloomMath {
+ public:
+  // False positive rate of a standard Bloom filter, for given ratio of
+  // filter memory bits to added keys, and number of probes per operation.
+  // (The false positive rate is effectively independent of scale, assuming
+  // the implementation scales OK.)
+  static double StandardFpRate(double bits_per_key, int num_probes) {
+    // Standard very-good-estimate formula. See
+    // https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
+    return std::pow(1.0 - std::exp(-num_probes / bits_per_key), num_probes);
+  }
+
+  // False positive rate of a "blocked"/"shareded"/"cache-local" Bloom filter,
+  // for given ratio of filter memory bits to added keys, number of probes per
+  // operation (all within the given block or cache line size), and block or
+  // cache line size.
+  static double CacheLocalFpRate(double bits_per_key, int num_probes,
+                                 int cache_line_bits) {
+    if (bits_per_key <= 0.0) {
+      // Fix a discontinuity
+      return 1.0;
+    }
+    double keys_per_cache_line = cache_line_bits / bits_per_key;
+    // A reasonable estimate is the average of the FP rates for one standard
+    // deviation above and below the mean bucket occupancy. See
+    // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#the-math
+    double keys_stddev = std::sqrt(keys_per_cache_line);
+    double crowded_fp = StandardFpRate(
+        cache_line_bits / (keys_per_cache_line + keys_stddev), num_probes);
+    double uncrowded_fp = StandardFpRate(
+        cache_line_bits / (keys_per_cache_line - keys_stddev), num_probes);
+    return (crowded_fp + uncrowded_fp) / 2;
+  }
+
+  // False positive rate of querying a new item against `num_keys` items, all
+  // hashed to `fingerprint_bits` bits. (This assumes the fingerprint hashes
+  // themselves are stored losslessly. See Section 4 of
+  // http://www.ccs.neu.edu/home/pete/pub/bloom-filters-verification.pdf)
+  static double FingerprintFpRate(size_t num_keys, int fingerprint_bits) {
+    double inv_fingerprint_space = std::pow(0.5, fingerprint_bits);
+    // Base estimate assumes each key maps to a unique fingerprint.
+    // Could be > 1 in extreme cases.
+    double base_estimate = num_keys * inv_fingerprint_space;
+    // To account for potential overlap, we choose between two formulas
+    if (base_estimate > 0.0001) {
+      // A very good formula assuming we don't construct a floating point
+      // number extremely close to 1. Always produces a probability < 1.
+      return 1.0 - std::exp(-base_estimate);
+    } else {
+      // A very good formula when base_estimate is far below 1. (Subtract
+      // away the integral-approximated sum that some key has same hash as
+      // one coming before it in a list.)
+      return base_estimate - (base_estimate * base_estimate * 0.5);
+    }
+  }
+
+  // Returns the probably of either of two independent(-ish) events
+  // happening, given their probabilities. (This is useful for combining
+  // results from StandardFpRate or CacheLocalFpRate with FingerprintFpRate
+  // for a hash-efficient Bloom filter's FP rate. See Section 4 of
+  // http://www.ccs.neu.edu/home/pete/pub/bloom-filters-verification.pdf)
+  static double IndependentProbabilitySum(double rate1, double rate2) {
+    // Use formula that avoids floating point extremely close to 1 if
+    // rates are extremely small.
+    return rate1 + rate2 - (rate1 * rate2);
+  }
+};
+
+// A fast, flexible, and accurate cache-local Bloom implementation with
+// SIMD-optimized query performance (currently using AVX2 on Intel). Write
+// performance and non-SIMD read are very good, benefiting from FastRange32
+// used in place of % and single-cycle multiplication on recent processors.
+//
+// Most other SIMD Bloom implementations sacrifice flexibility and/or
+// accuracy by requiring num_probes to be a power of two and restricting
+// where each probe can occur in a cache line. This implementation sacrifices
+// SIMD-optimization for add (might still be possible, especially with AVX512)
+// in favor of allowing any num_probes, not crossing cache line boundary,
+// and accuracy close to theoretical best accuracy for a cache-local Bloom.
+// E.g. theoretical best for 10 bits/key, num_probes=6, and 512-bit bucket
+// (Intel cache line size) is 0.9535% FP rate. This implementation yields
+// about 0.957%. (Compare to LegacyLocalityBloomImpl<false> at 1.138%, or
+// about 0.951% for 1024-bit buckets, cache line size for some ARM CPUs.)
+//
+// This implementation can use a 32-bit hash (let h2 be h1 * 0x9e3779b9) or
+// a 64-bit hash (split into two uint32s). With many millions of keys, the
+// false positive rate associated with using a 32-bit hash can dominate the
+// false positive rate of the underlying filter. At 10 bits/key setting, the
+// inflection point is about 40 million keys, so 32-bit hash is a bad idea
+// with 10s of millions of keys or more.
+//
+// Despite accepting a 64-bit hash, this implementation uses 32-bit fastrange
+// to pick a cache line, which can be faster than 64-bit in some cases.
+// This only hurts accuracy as you get into 10s of GB for a single filter,
+// and accuracy abruptly breaks down at 256GB (2^32 cache lines). Switch to
+// 64-bit fastrange if you need filters so big. ;)
+//
+// Using only a 32-bit input hash within each cache line has negligible
+// impact for any reasonable cache line / bucket size, for arbitrary filter
+// size, and potentially saves intermediate data size in some cases vs.
+// tracking full 64 bits. (Even in an implementation using 64-bit arithmetic
+// to generate indices, I might do the same, as a single multiplication
+// suffices to generate a sufficiently mixed 64 bits from 32 bits.)
+//
+// This implementation is currently tied to Intel cache line size, 64 bytes ==
+// 512 bits. If there's sufficient demand for other cache line sizes, this is
+// a pretty good implementation to extend, but slight performance enhancements
+// are possible with an alternate implementation (probably not very compatible
+// with SIMD):
+// (1) Use rotation in addition to multiplication for remixing
+// (like murmur hash). (Using multiplication alone *slightly* hurts accuracy
+// because lower bits never depend on original upper bits.)
+// (2) Extract more than one bit index from each re-mix. (Only if rotation
+// or similar is part of remix, because otherwise you're making the
+// multiplication-only problem worse.)
+// (3) Re-mix full 64 bit hash, to get maximum number of bit indices per
+// re-mix.
+//
+class FastLocalBloomImpl {
+ public:
+  // NOTE: this has only been validated to enough accuracy for producing
+  // reasonable warnings / user feedback, not for making functional decisions.
+  static double EstimatedFpRate(size_t keys, size_t bytes, int num_probes,
+                                int hash_bits) {
+    return BloomMath::IndependentProbabilitySum(
+        BloomMath::CacheLocalFpRate(8.0 * bytes / keys, num_probes,
+                                    /*cache line bits*/ 512),
+        BloomMath::FingerprintFpRate(keys, hash_bits));
+  }
+
+  static inline int ChooseNumProbes(int millibits_per_key) {
+    // Since this implementation can (with AVX2) make up to 8 probes
+    // for the same cost, we pick the most accurate num_probes, based
+    // on actual tests of the implementation. Note that for higher
+    // bits/key, the best choice for cache-local Bloom can be notably
+    // smaller than standard bloom, e.g. 9 instead of 11 @ 16 b/k.
+    if (millibits_per_key <= 2080) {
+      return 1;
+    } else if (millibits_per_key <= 3580) {
+      return 2;
+    } else if (millibits_per_key <= 5100) {
+      return 3;
+    } else if (millibits_per_key <= 6640) {
+      return 4;
+    } else if (millibits_per_key <= 8300) {
+      return 5;
+    } else if (millibits_per_key <= 10070) {
+      return 6;
+    } else if (millibits_per_key <= 11720) {
+      return 7;
+    } else if (millibits_per_key <= 14001) {
+      // Would be something like <= 13800 but sacrificing *slightly* for
+      // more settings using <= 8 probes.
+      return 8;
+    } else if (millibits_per_key <= 16050) {
+      return 9;
+    } else if (millibits_per_key <= 18300) {
+      return 10;
+    } else if (millibits_per_key <= 22001) {
+      return 11;
+    } else if (millibits_per_key <= 25501) {
+      return 12;
+    } else if (millibits_per_key > 50000) {
+      // Top out at 24 probes (three sets of 8)
+      return 24;
+    } else {
+      // Roughly optimal choices for remaining range
+      // e.g.
+      // 28000 -> 12, 28001 -> 13
+      // 50000 -> 23, 50001 -> 24
+      return (millibits_per_key - 1) / 2000 - 1;
+    }
+  }
+
+  static inline void AddHash(uint32_t h1, uint32_t h2, uint32_t len_bytes,
+                             int num_probes, char *data) {
+    uint32_t bytes_to_cache_line = FastRange32(len_bytes >> 6, h1) << 6;
+    AddHashPrepared(h2, num_probes, data + bytes_to_cache_line);
+  }
+
+  static inline void AddHashPrepared(uint32_t h2, int num_probes,
+                                     char *data_at_cache_line) {
+    uint32_t h = h2;
+    for (int i = 0; i < num_probes; ++i, h *= uint32_t{0x9e3779b9}) {
+      // 9-bit address within 512 bit cache line
+      int bitpos = h >> (32 - 9);
+      data_at_cache_line[bitpos >> 3] |= (uint8_t{1} << (bitpos & 7));
+    }
+  }
+
+  static inline void PrepareHash(uint32_t h1, uint32_t len_bytes,
+                                 const char *data,
+                                 uint32_t /*out*/ *byte_offset) {
+    uint32_t bytes_to_cache_line = FastRange32(len_bytes >> 6, h1) << 6;
+    PREFETCH(data + bytes_to_cache_line, 0 /* rw */, 1 /* locality */);
+    PREFETCH(data + bytes_to_cache_line + 63, 0 /* rw */, 1 /* locality */);
+    *byte_offset = bytes_to_cache_line;
+  }
+
+  static inline bool HashMayMatch(uint32_t h1, uint32_t h2, uint32_t len_bytes,
+                                  int num_probes, const char *data) {
+    uint32_t bytes_to_cache_line = FastRange32(len_bytes >> 6, h1) << 6;
+    return HashMayMatchPrepared(h2, num_probes, data + bytes_to_cache_line);
+  }
+
+  static inline bool HashMayMatchPrepared(uint32_t h2, int num_probes,
+                                          const char *data_at_cache_line) {
+    uint32_t h = h2;
+#ifdef __AVX2__
+    int rem_probes = num_probes;
+
+    // NOTE: For better performance for num_probes in {1, 2, 9, 10, 17, 18,
+    // etc.} one can insert specialized code for rem_probes <= 2, bypassing
+    // the SIMD code in those cases. There is a detectable but minor overhead
+    // applied to other values of num_probes (when not statically determined),
+    // but smoother performance curve vs. num_probes. But for now, when
+    // in doubt, don't add unnecessary code.
+
+    // Powers of 32-bit golden ratio, mod 2**32.
+    const __m256i multipliers =
+        _mm256_setr_epi32(0x00000001, 0x9e3779b9, 0xe35e67b1, 0x734297e9,
+                          0x35fbe861, 0xdeb7c719, 0x448b211, 0x3459b749);
+
+    for (;;) {
+      // Eight copies of hash
+      __m256i hash_vector = _mm256_set1_epi32(h);
+
+      // Same effect as repeated multiplication by 0x9e3779b9 thanks to
+      // associativity of multiplication.
+      hash_vector = _mm256_mullo_epi32(hash_vector, multipliers);
+
+      // Now the top 9 bits of each of the eight 32-bit values in
+      // hash_vector are bit addresses for probes within the cache line.
+      // While the platform-independent code uses byte addressing (6 bits
+      // to pick a byte + 3 bits to pick a bit within a byte), here we work
+      // with 32-bit words (4 bits to pick a word + 5 bits to pick a bit
+      // within a word) because that works well with AVX2 and is equivalent
+      // under little-endian.
+
+      // Shift each right by 28 bits to get 4-bit word addresses.
+      const __m256i word_addresses = _mm256_srli_epi32(hash_vector, 28);
+
+      // Gather 32-bit values spread over 512 bits by 4-bit address. In
+      // essence, we are dereferencing eight pointers within the cache
+      // line.
+      //
+      // Option 1: AVX2 gather (seems to be a little slow - understandable)
+      // const __m256i value_vector =
+      //     _mm256_i32gather_epi32(static_cast<const int
+      //     *>(data_at_cache_line),
+      //                            word_addresses,
+      //                            /*bytes / i32*/ 4);
+      // END Option 1
+      // Potentially unaligned as we're not *always* cache-aligned -> loadu
+      const __m256i *mm_data =
+          reinterpret_cast<const __m256i *>(data_at_cache_line);
+      __m256i lower = _mm256_loadu_si256(mm_data);
+      __m256i upper = _mm256_loadu_si256(mm_data + 1);
+      // Option 2: AVX512VL permute hack
+      // Only negligibly faster than Option 3, so not yet worth supporting
+      // const __m256i value_vector =
+      //    _mm256_permutex2var_epi32(lower, word_addresses, upper);
+      // END Option 2
+      // Option 3: AVX2 permute+blend hack
+      // Use lowest three bits to order probing values, as if all from same
+      // 256 bit piece.
+      lower = _mm256_permutevar8x32_epi32(lower, word_addresses);
+      upper = _mm256_permutevar8x32_epi32(upper, word_addresses);
+      // Just top 1 bit of address, to select between lower and upper.
+      const __m256i upper_lower_selector = _mm256_srai_epi32(hash_vector, 31);
+      // Finally: the next 8 probed 32-bit values, in probing sequence order.
+      const __m256i value_vector =
+          _mm256_blendv_epi8(lower, upper, upper_lower_selector);
+      // END Option 3
+
+      // We might not need to probe all 8, so build a mask for selecting only
+      // what we need. (The k_selector(s) could be pre-computed but that
+      // doesn't seem to make a noticeable performance difference.)
+      const __m256i zero_to_seven = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
+      // Subtract rem_probes from each of those constants
+      __m256i k_selector =
+          _mm256_sub_epi32(zero_to_seven, _mm256_set1_epi32(rem_probes));
+      // Negative after subtract -> use/select
+      // Keep only high bit (logical shift right each by 31).
+      k_selector = _mm256_srli_epi32(k_selector, 31);
+
+      // Strip off the 4 bit word address (shift left)
+      __m256i bit_addresses = _mm256_slli_epi32(hash_vector, 4);
+      // And keep only 5-bit (32 - 27) bit-within-32-bit-word addresses.
+      bit_addresses = _mm256_srli_epi32(bit_addresses, 27);
+      // Build a bit mask
+      const __m256i bit_mask = _mm256_sllv_epi32(k_selector, bit_addresses);
+
+      // Like ((~value_vector) & bit_mask) == 0)
+      bool match = _mm256_testc_si256(value_vector, bit_mask) != 0;
+
+      // This check first so that it's easy for branch predictor to optimize
+      // num_probes <= 8 case, making it free of unpredictable branches.
+      if (rem_probes <= 8) {
+        return match;
+      } else if (!match) {
+        return false;
+      }
+      // otherwise
+      // Need another iteration. 0xab25f4c1 == golden ratio to the 8th power
+      h *= 0xab25f4c1;
+      rem_probes -= 8;
+    }
+#else
+    for (int i = 0; i < num_probes; ++i, h *= uint32_t{0x9e3779b9}) {
+      // 9-bit address within 512 bit cache line
+      int bitpos = h >> (32 - 9);
+      if ((data_at_cache_line[bitpos >> 3] & (char(1) << (bitpos & 7))) == 0) {
+        return false;
+      }
+    }
+    return true;
+#endif
+  }
+};
+
+// A legacy Bloom filter implementation with no locality of probes (slow).
+// It uses double hashing to generate a sequence of hash values.
+// Asymptotic analysis is in [Kirsch,Mitzenmacher 2006], but known to have
+// subtle accuracy flaws for practical sizes [Dillinger,Manolios 2004].
+//
+// DO NOT REUSE
+//
+class LegacyNoLocalityBloomImpl {
+ public:
+  static inline int ChooseNumProbes(int bits_per_key) {
+    // We intentionally round down to reduce probing cost a little bit
+    int num_probes = static_cast<int>(bits_per_key * 0.69);  // 0.69 =~ ln(2)
+    if (num_probes < 1) num_probes = 1;
+    if (num_probes > 30) num_probes = 30;
+    return num_probes;
+  }
+
+  static inline void AddHash(uint32_t h, uint32_t total_bits, int num_probes,
+                             char *data) {
+    const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
+    for (int i = 0; i < num_probes; i++) {
+      const uint32_t bitpos = h % total_bits;
+      data[bitpos / 8] |= (1 << (bitpos % 8));
+      h += delta;
+    }
+  }
+
+  static inline bool HashMayMatch(uint32_t h, uint32_t total_bits,
+                                  int num_probes, const char *data) {
+    const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
+    for (int i = 0; i < num_probes; i++) {
+      const uint32_t bitpos = h % total_bits;
+      if ((data[bitpos / 8] & (1 << (bitpos % 8))) == 0) {
+        return false;
+      }
+      h += delta;
+    }
+    return true;
+  }
+};
+
+// A legacy Bloom filter implementation with probes local to a single
+// cache line (fast). Because SST files might be transported between
+// platforms, the cache line size is a parameter rather than hard coded.
+// (But if specified as a constant parameter, an optimizing compiler
+// should take advantage of that.)
+//
+// When ExtraRotates is false, this implementation is notably deficient in
+// accuracy. Specifically, it uses double hashing with a 1/512 chance of the
+// increment being zero (when cache line size is 512 bits). Thus, there's a
+// 1/512 chance of probing only one index, which we'd expect to incur about
+// a 1/2 * 1/512 or absolute 0.1% FP rate penalty. More detail at
+// https://github.com/facebook/rocksdb/issues/4120
+//
+// DO NOT REUSE
+//
+template <bool ExtraRotates>
+class LegacyLocalityBloomImpl {
+ private:
+  static inline uint32_t GetLine(uint32_t h, uint32_t num_lines) {
+    uint32_t offset_h = ExtraRotates ? (h >> 11) | (h << 21) : h;
+    return offset_h % num_lines;
+  }
+
+ public:
+  // NOTE: this has only been validated to enough accuracy for producing
+  // reasonable warnings / user feedback, not for making functional decisions.
+  static double EstimatedFpRate(size_t keys, size_t bytes, int num_probes) {
+    double bits_per_key = 8.0 * bytes / keys;
+    double filter_rate = BloomMath::CacheLocalFpRate(bits_per_key, num_probes,
+                                                     /*cache line bits*/ 512);
+    if (!ExtraRotates) {
+      // Good estimate of impact of flaw in index computation.
+      // Adds roughly 0.002 around 50 bits/key and 0.001 around 100 bits/key.
+      // The + 22 shifts it nicely to fit for lower bits/key.
+      filter_rate += 0.1 / (bits_per_key * 0.75 + 22);
+    } else {
+      // Not yet validated
+      assert(false);
+    }
+    // Always uses 32-bit hash
+    double fingerprint_rate = BloomMath::FingerprintFpRate(keys, 32);
+    return BloomMath::IndependentProbabilitySum(filter_rate, fingerprint_rate);
+  }
+
+  static inline void AddHash(uint32_t h, uint32_t num_lines, int num_probes,
+                             char *data, int log2_cache_line_bytes) {
+    const int log2_cache_line_bits = log2_cache_line_bytes + 3;
+
+    char *data_at_offset =
+        data + (GetLine(h, num_lines) << log2_cache_line_bytes);
+    const uint32_t delta = (h >> 17) | (h << 15);
+    for (int i = 0; i < num_probes; ++i) {
+      // Mask to bit-within-cache-line address
+      const uint32_t bitpos = h & ((1 << log2_cache_line_bits) - 1);
+      data_at_offset[bitpos / 8] |= (1 << (bitpos % 8));
+      if (ExtraRotates) {
+        h = (h >> log2_cache_line_bits) | (h << (32 - log2_cache_line_bits));
+      }
+      h += delta;
+    }
+  }
+
+  static inline void PrepareHashMayMatch(uint32_t h, uint32_t num_lines,
+                                         const char *data,
+                                         uint32_t /*out*/ *byte_offset,
+                                         int log2_cache_line_bytes) {
+    uint32_t b = GetLine(h, num_lines) << log2_cache_line_bytes;
+    PREFETCH(data + b, 0 /* rw */, 1 /* locality */);
+    PREFETCH(data + b + ((1 << log2_cache_line_bytes) - 1), 0 /* rw */,
+             1 /* locality */);
+    *byte_offset = b;
+  }
+
+  static inline bool HashMayMatch(uint32_t h, uint32_t num_lines,
+                                  int num_probes, const char *data,
+                                  int log2_cache_line_bytes) {
+    uint32_t b = GetLine(h, num_lines) << log2_cache_line_bytes;
+    return HashMayMatchPrepared(h, num_probes, data + b, log2_cache_line_bytes);
+  }
+
+  static inline bool HashMayMatchPrepared(uint32_t h, int num_probes,
+                                          const char *data_at_offset,
+                                          int log2_cache_line_bytes) {
+    const int log2_cache_line_bits = log2_cache_line_bytes + 3;
+
+    const uint32_t delta = (h >> 17) | (h << 15);
+    for (int i = 0; i < num_probes; ++i) {
+      // Mask to bit-within-cache-line address
+      const uint32_t bitpos = h & ((1 << log2_cache_line_bits) - 1);
+      if (((data_at_offset[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
+        return false;
+      }
+      if (ExtraRotates) {
+        h = (h >> log2_cache_line_bits) | (h << (32 - log2_cache_line_bits));
+      }
+      h += delta;
+    }
+    return true;
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/build_version.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/build_version.cc
new file mode 100644
index 0000000000..67ef5d3495
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/build_version.cc
@@ -0,0 +1,79 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+#include <memory>
+
+#include "rocksdb/version.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "util/string_util.h"
+
+// The build script may replace these values with real values based
+// on whether or not GIT is available and the platform settings
+static const std::string rocksdb_build_git_sha  = "rocksdb_build_git_sha:";
+static const std::string rocksdb_build_git_tag = "rocksdb_build_git_tag:""";
+#define HAS_GIT_CHANGES 0
+#if HAS_GIT_CHANGES == 0
+// If HAS_GIT_CHANGES is 0, the GIT date is used.
+// Use the time the branch/tag was last modified
+static const std::string rocksdb_build_date = "rocksdb_build_date:""";
+#else
+// If HAS_GIT_CHANGES is > 0, the branch/tag has modifications.
+// Use the time the build was created.
+static const std::string rocksdb_build_date = "rocksdb_build_date:""";
+#endif
+
+extern "C" {
+
+} // extern "C"
+
+std::unordered_map<std::string, ROCKSDB_NAMESPACE::RegistrarFunc> ROCKSDB_NAMESPACE::ObjectRegistry::builtins_ = {
+  
+};
+
+namespace ROCKSDB_NAMESPACE {
+static void AddProperty(std::unordered_map<std::string, std::string> *props, const std::string& name) {
+  size_t colon = name.find(":");
+  if (colon != std::string::npos && colon > 0 && colon < name.length() - 1) {
+    // If we found a "@:", then this property was a build-time substitution that failed.  Skip it
+    size_t at = name.find("@", colon);
+    if (at != colon + 1) {
+      // Everything before the colon is the name, after is the value
+      (*props)[name.substr(0, colon)] = name.substr(colon + 1);
+    }
+  }
+}
+
+static std::unordered_map<std::string, std::string>* LoadPropertiesSet() {
+  auto * properties = new std::unordered_map<std::string, std::string>();
+  AddProperty(properties, rocksdb_build_git_sha);
+  AddProperty(properties, rocksdb_build_git_tag);
+  AddProperty(properties, rocksdb_build_date);
+  return properties;
+}
+
+const std::unordered_map<std::string, std::string>& GetRocksBuildProperties() {
+  static std::unique_ptr<std::unordered_map<std::string, std::string>> props(LoadPropertiesSet());
+  return *props;
+}
+
+std::string GetRocksVersionAsString(bool with_patch) {
+  std::string version = std::to_string(ROCKSDB_MAJOR) + "." + std::to_string(ROCKSDB_MINOR);
+  if (with_patch) {
+    return version + "." + std::to_string(ROCKSDB_PATCH);
+  } else {
+    return version;
+ }
+}
+
+std::string GetRocksBuildInfoAsString(const std::string& program, bool verbose) {
+  std::string info = program + " (RocksDB) " + GetRocksVersionAsString(true);
+  if (verbose) {
+    for (const auto& it : GetRocksBuildProperties()) {
+      info.append("\n    ");
+      info.append(it.first);
+      info.append(": ");
+      info.append(it.second);
+    }
+  }
+  return info;
+}
+} // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/build_version.cc.in b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/build_version.cc.in
new file mode 100644
index 0000000000..56bc878562
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/build_version.cc.in
@@ -0,0 +1,79 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+#include <memory>
+
+#include "rocksdb/version.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "util/string_util.h"
+
+// The build script may replace these values with real values based
+// on whether or not GIT is available and the platform settings
+static const std::string rocksdb_build_git_sha  = "rocksdb_build_git_sha:@GIT_SHA@";
+static const std::string rocksdb_build_git_tag = "rocksdb_build_git_tag:@GIT_TAG@";
+#define HAS_GIT_CHANGES @GIT_MOD@
+#if HAS_GIT_CHANGES == 0
+// If HAS_GIT_CHANGES is 0, the GIT date is used.
+// Use the time the branch/tag was last modified
+static const std::string rocksdb_build_date = "rocksdb_build_date:@GIT_DATE@";
+#else
+// If HAS_GIT_CHANGES is > 0, the branch/tag has modifications.
+// Use the time the build was created.
+static const std::string rocksdb_build_date = "rocksdb_build_date:@BUILD_DATE@";
+#endif
+
+extern "C" {
+@ROCKSDB_PLUGIN_EXTERNS@
+} // extern "C"
+
+std::unordered_map<std::string, ROCKSDB_NAMESPACE::RegistrarFunc> ROCKSDB_NAMESPACE::ObjectRegistry::builtins_ = {
+  @ROCKSDB_PLUGIN_BUILTINS@
+};
+
+namespace ROCKSDB_NAMESPACE {
+static void AddProperty(std::unordered_map<std::string, std::string> *props, const std::string& name) {
+  size_t colon = name.find(":");
+  if (colon != std::string::npos && colon > 0 && colon < name.length() - 1) {
+    // If we found a "@:", then this property was a build-time substitution that failed.  Skip it
+    size_t at = name.find("@", colon);
+    if (at != colon + 1) {
+      // Everything before the colon is the name, after is the value
+      (*props)[name.substr(0, colon)] = name.substr(colon + 1);
+    }
+  }
+}
+
+static std::unordered_map<std::string, std::string>* LoadPropertiesSet() {
+  auto * properties = new std::unordered_map<std::string, std::string>();
+  AddProperty(properties, rocksdb_build_git_sha);
+  AddProperty(properties, rocksdb_build_git_tag);
+  AddProperty(properties, rocksdb_build_date);
+  return properties;
+}
+
+const std::unordered_map<std::string, std::string>& GetRocksBuildProperties() {
+  static std::unique_ptr<std::unordered_map<std::string, std::string>> props(LoadPropertiesSet());
+  return *props;
+}
+
+std::string GetRocksVersionAsString(bool with_patch) {
+  std::string version = std::to_string(ROCKSDB_MAJOR) + "." + std::to_string(ROCKSDB_MINOR);
+  if (with_patch) {
+    return version + "." + std::to_string(ROCKSDB_PATCH);
+  } else {
+    return version;
+ }
+}
+
+std::string GetRocksBuildInfoAsString(const std::string& program, bool verbose) {
+  std::string info = program + " (RocksDB) " + GetRocksVersionAsString(true);
+  if (verbose) {
+    for (const auto& it : GetRocksBuildProperties()) {
+      info.append("\n    ");
+      info.append(it.first);
+      info.append(": ");
+      info.append(it.second);
+    }
+  }
+  return info;
+}
+} // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/cast_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/cast_util.h
new file mode 100644
index 0000000000..c91b6ff1ee
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/cast_util.h
@@ -0,0 +1,42 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <type_traits>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+// The helper function to assert the move from dynamic_cast<> to
+// static_cast<> is correct. This function is to deal with legacy code.
+// It is not recommended to add new code to issue class casting. The preferred
+// solution is to implement the functionality without a need of casting.
+template <class DestClass, class SrcClass>
+inline DestClass* static_cast_with_check(SrcClass* x) {
+  DestClass* ret = static_cast<DestClass*>(x);
+#ifdef ROCKSDB_USE_RTTI
+  assert(ret == dynamic_cast<DestClass*>(x));
+#endif
+  return ret;
+}
+
+// A wrapper around static_cast for lossless conversion between integral
+// types, including enum types. For example, this can be used for converting
+// between signed/unsigned or enum type and underlying type without fear of
+// stripping away data, now or in the future.
+template <typename To, typename From>
+inline To lossless_cast(From x) {
+  using FromValue = typename std::remove_reference<From>::type;
+  static_assert(
+      std::is_integral<FromValue>::value || std::is_enum<FromValue>::value,
+      "Only works on integral types");
+  static_assert(std::is_integral<To>::value || std::is_enum<To>::value,
+                "Only works on integral types");
+  static_assert(sizeof(To) >= sizeof(FromValue), "Must be lossless");
+  return static_cast<To>(x);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/channel.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/channel.h
new file mode 100644
index 0000000000..19b9562977
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/channel.h
@@ -0,0 +1,69 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <condition_variable>
+#include <mutex>
+#include <queue>
+#include <utility>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+template <class T>
+class channel {
+ public:
+  explicit channel() : eof_(false) {}
+
+  channel(const channel&) = delete;
+  void operator=(const channel&) = delete;
+
+  void sendEof() {
+    std::lock_guard<std::mutex> lk(lock_);
+    eof_ = true;
+    cv_.notify_all();
+  }
+
+  bool eof() {
+    std::lock_guard<std::mutex> lk(lock_);
+    return buffer_.empty() && eof_;
+  }
+
+  size_t size() const {
+    std::lock_guard<std::mutex> lk(lock_);
+    return buffer_.size();
+  }
+
+  // writes elem to the queue
+  void write(T&& elem) {
+    std::unique_lock<std::mutex> lk(lock_);
+    buffer_.emplace(std::forward<T>(elem));
+    cv_.notify_one();
+  }
+
+  /// Moves a dequeued element onto elem, blocking until an element
+  /// is available.
+  // returns false if EOF
+  bool read(T& elem) {
+    std::unique_lock<std::mutex> lk(lock_);
+    cv_.wait(lk, [&] { return eof_ || !buffer_.empty(); });
+    if (eof_ && buffer_.empty()) {
+      return false;
+    }
+    elem = std::move(buffer_.front());
+    buffer_.pop();
+    cv_.notify_one();
+    return true;
+  }
+
+ private:
+  std::condition_variable cv_;
+  mutable std::mutex lock_;
+  std::queue<T> buffer_;
+  bool eof_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/cleanable.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/cleanable.cc
new file mode 100644
index 0000000000..89a7ab9be3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/cleanable.cc
@@ -0,0 +1,181 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/cleanable.h"
+
+#include <atomic>
+#include <cassert>
+#include <utility>
+
+namespace ROCKSDB_NAMESPACE {
+
+Cleanable::Cleanable() {
+  cleanup_.function = nullptr;
+  cleanup_.next = nullptr;
+}
+
+Cleanable::~Cleanable() { DoCleanup(); }
+
+Cleanable::Cleanable(Cleanable&& other) noexcept { *this = std::move(other); }
+
+Cleanable& Cleanable::operator=(Cleanable&& other) noexcept {
+  assert(this != &other);  // https://stackoverflow.com/a/9322542/454544
+  cleanup_ = other.cleanup_;
+  other.cleanup_.function = nullptr;
+  other.cleanup_.next = nullptr;
+  return *this;
+}
+
+// If the entire linked list was on heap we could have simply add attach one
+// link list to another. However the head is an embeded object to avoid the cost
+// of creating objects for most of the use cases when the Cleanable has only one
+// Cleanup to do. We could put evernything on heap if benchmarks show no
+// negative impact on performance.
+// Also we need to iterate on the linked list since there is no pointer to the
+// tail. We can add the tail pointer but maintainin it might negatively impact
+// the perforamnce for the common case of one cleanup where tail pointer is not
+// needed. Again benchmarks could clarify that.
+// Even without a tail pointer we could iterate on the list, find the tail, and
+// have only that node updated without the need to insert the Cleanups one by
+// one. This however would be redundant when the source Cleanable has one or a
+// few Cleanups which is the case most of the time.
+// TODO(myabandeh): if the list is too long we should maintain a tail pointer
+// and have the entire list (minus the head that has to be inserted separately)
+// merged with the target linked list at once.
+void Cleanable::DelegateCleanupsTo(Cleanable* other) {
+  assert(other != nullptr);
+  if (cleanup_.function == nullptr) {
+    return;
+  }
+  Cleanup* c = &cleanup_;
+  other->RegisterCleanup(c->function, c->arg1, c->arg2);
+  c = c->next;
+  while (c != nullptr) {
+    Cleanup* next = c->next;
+    other->RegisterCleanup(c);
+    c = next;
+  }
+  cleanup_.function = nullptr;
+  cleanup_.next = nullptr;
+}
+
+void Cleanable::RegisterCleanup(Cleanable::Cleanup* c) {
+  assert(c != nullptr);
+  if (cleanup_.function == nullptr) {
+    cleanup_.function = c->function;
+    cleanup_.arg1 = c->arg1;
+    cleanup_.arg2 = c->arg2;
+    delete c;
+  } else {
+    c->next = cleanup_.next;
+    cleanup_.next = c;
+  }
+}
+
+void Cleanable::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) {
+  assert(func != nullptr);
+  Cleanup* c;
+  if (cleanup_.function == nullptr) {
+    c = &cleanup_;
+  } else {
+    c = new Cleanup;
+    c->next = cleanup_.next;
+    cleanup_.next = c;
+  }
+  c->function = func;
+  c->arg1 = arg1;
+  c->arg2 = arg2;
+}
+
+struct SharedCleanablePtr::Impl : public Cleanable {
+  std::atomic<unsigned> ref_count{1};  // Start with 1 ref
+  void Ref() { ref_count.fetch_add(1, std::memory_order_relaxed); }
+  void Unref() {
+    if (ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) {
+      // Last ref
+      delete this;
+    }
+  }
+  static void UnrefWrapper(void* arg1, void* /*arg2*/) {
+    static_cast<SharedCleanablePtr::Impl*>(arg1)->Unref();
+  }
+};
+
+void SharedCleanablePtr::Reset() {
+  if (ptr_) {
+    ptr_->Unref();
+    ptr_ = nullptr;
+  }
+}
+
+void SharedCleanablePtr::Allocate() {
+  Reset();
+  ptr_ = new Impl();
+}
+
+SharedCleanablePtr::SharedCleanablePtr(const SharedCleanablePtr& from) {
+  *this = from;
+}
+
+SharedCleanablePtr::SharedCleanablePtr(SharedCleanablePtr&& from) noexcept {
+  *this = std::move(from);
+}
+
+SharedCleanablePtr& SharedCleanablePtr::operator=(
+    const SharedCleanablePtr& from) {
+  if (this != &from) {
+    Reset();
+    ptr_ = from.ptr_;
+    if (ptr_) {
+      ptr_->Ref();
+    }
+  }
+  return *this;
+}
+
+SharedCleanablePtr& SharedCleanablePtr::operator=(
+    SharedCleanablePtr&& from) noexcept {
+  assert(this != &from);  // https://stackoverflow.com/a/9322542/454544
+  Reset();
+  ptr_ = from.ptr_;
+  from.ptr_ = nullptr;
+  return *this;
+}
+
+SharedCleanablePtr::~SharedCleanablePtr() { Reset(); }
+
+Cleanable& SharedCleanablePtr::operator*() {
+  return *ptr_;  // implicit upcast
+}
+
+Cleanable* SharedCleanablePtr::operator->() {
+  return ptr_;  // implicit upcast
+}
+
+Cleanable* SharedCleanablePtr::get() {
+  return ptr_;  // implicit upcast
+}
+
+void SharedCleanablePtr::RegisterCopyWith(Cleanable* target) {
+  if (ptr_) {
+    // "Virtual" copy of the pointer
+    ptr_->Ref();
+    target->RegisterCleanup(&Impl::UnrefWrapper, ptr_, nullptr);
+  }
+}
+
+void SharedCleanablePtr::MoveAsCleanupTo(Cleanable* target) {
+  if (ptr_) {
+    // "Virtual" move of the pointer
+    target->RegisterCleanup(&Impl::UnrefWrapper, ptr_, nullptr);
+    ptr_ = nullptr;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coding.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coding.cc
new file mode 100644
index 0000000000..3da8afaa27
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coding.cc
@@ -0,0 +1,90 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/coding.h"
+
+#include <algorithm>
+
+#include "rocksdb/slice.h"
+#include "rocksdb/slice_transform.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// conversion' conversion from 'type1' to 'type2', possible loss of data
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4244)
+#endif
+char* EncodeVarint32(char* dst, uint32_t v) {
+  // Operate on characters as unsigneds
+  unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
+  static const int B = 128;
+  if (v < (1 << 7)) {
+    *(ptr++) = v;
+  } else if (v < (1 << 14)) {
+    *(ptr++) = v | B;
+    *(ptr++) = v >> 7;
+  } else if (v < (1 << 21)) {
+    *(ptr++) = v | B;
+    *(ptr++) = (v >> 7) | B;
+    *(ptr++) = v >> 14;
+  } else if (v < (1 << 28)) {
+    *(ptr++) = v | B;
+    *(ptr++) = (v >> 7) | B;
+    *(ptr++) = (v >> 14) | B;
+    *(ptr++) = v >> 21;
+  } else {
+    *(ptr++) = v | B;
+    *(ptr++) = (v >> 7) | B;
+    *(ptr++) = (v >> 14) | B;
+    *(ptr++) = (v >> 21) | B;
+    *(ptr++) = v >> 28;
+  }
+  return reinterpret_cast<char*>(ptr);
+}
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+const char* GetVarint32PtrFallback(const char* p, const char* limit,
+                                   uint32_t* value) {
+  uint32_t result = 0;
+  for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7) {
+    uint32_t byte = *(reinterpret_cast<const unsigned char*>(p));
+    p++;
+    if (byte & 128) {
+      // More bytes are present
+      result |= ((byte & 127) << shift);
+    } else {
+      result |= (byte << shift);
+      *value = result;
+      return reinterpret_cast<const char*>(p);
+    }
+  }
+  return nullptr;
+}
+
+const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* value) {
+  uint64_t result = 0;
+  for (uint32_t shift = 0; shift <= 63 && p < limit; shift += 7) {
+    uint64_t byte = *(reinterpret_cast<const unsigned char*>(p));
+    p++;
+    if (byte & 128) {
+      // More bytes are present
+      result |= ((byte & 127) << shift);
+    } else {
+      result |= (byte << shift);
+      *value = result;
+      return reinterpret_cast<const char*>(p);
+    }
+  }
+  return nullptr;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coding.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coding.h
new file mode 100644
index 0000000000..3168fd2fd1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coding.h
@@ -0,0 +1,389 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Encoding independent of machine byte order:
+// * Fixed-length numbers are encoded with least-significant byte first
+//   (little endian, native order on Intel and others)
+// * In addition we support variable length "varint" encoding
+// * Strings are encoded prefixed by their length in varint format
+//
+// Some related functions are provided in coding_lean.h
+
+#pragma once
+#include <algorithm>
+#include <string>
+
+#include "port/port.h"
+#include "rocksdb/slice.h"
+#include "util/coding_lean.h"
+
+// Some processors does not allow unaligned access to memory
+#if defined(__sparc)
+#define PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+// The maximum length of a varint in bytes for 64-bit.
+const uint32_t kMaxVarint64Length = 10;
+
+// Standard Put... routines append to a string
+extern void PutFixed16(std::string* dst, uint16_t value);
+extern void PutFixed32(std::string* dst, uint32_t value);
+extern void PutFixed64(std::string* dst, uint64_t value);
+extern void PutVarint32(std::string* dst, uint32_t value);
+extern void PutVarint32Varint32(std::string* dst, uint32_t value1,
+                                uint32_t value2);
+extern void PutVarint32Varint32Varint32(std::string* dst, uint32_t value1,
+                                        uint32_t value2, uint32_t value3);
+extern void PutVarint64(std::string* dst, uint64_t value);
+extern void PutVarint64Varint64(std::string* dst, uint64_t value1,
+                                uint64_t value2);
+extern void PutVarint32Varint64(std::string* dst, uint32_t value1,
+                                uint64_t value2);
+extern void PutVarint32Varint32Varint64(std::string* dst, uint32_t value1,
+                                        uint32_t value2, uint64_t value3);
+extern void PutLengthPrefixedSlice(std::string* dst, const Slice& value);
+extern void PutLengthPrefixedSliceParts(std::string* dst,
+                                        const SliceParts& slice_parts);
+extern void PutLengthPrefixedSlicePartsWithPadding(
+    std::string* dst, const SliceParts& slice_parts, size_t pad_sz);
+
+// Standard Get... routines parse a value from the beginning of a Slice
+// and advance the slice past the parsed value.
+extern bool GetFixed64(Slice* input, uint64_t* value);
+extern bool GetFixed32(Slice* input, uint32_t* value);
+extern bool GetFixed16(Slice* input, uint16_t* value);
+extern bool GetVarint32(Slice* input, uint32_t* value);
+extern bool GetVarint64(Slice* input, uint64_t* value);
+extern bool GetVarsignedint64(Slice* input, int64_t* value);
+extern bool GetLengthPrefixedSlice(Slice* input, Slice* result);
+// This function assumes data is well-formed.
+extern Slice GetLengthPrefixedSlice(const char* data);
+
+extern Slice GetSliceUntil(Slice* slice, char delimiter);
+
+// Borrowed from
+// https://github.com/facebook/fbthrift/blob/449a5f77f9f9bae72c9eb5e78093247eef185c04/thrift/lib/cpp/util/VarintUtils-inl.h#L202-L208
+constexpr inline uint64_t i64ToZigzag(const int64_t l) {
+  return (static_cast<uint64_t>(l) << 1) ^ static_cast<uint64_t>(l >> 63);
+}
+inline int64_t zigzagToI64(uint64_t n) {
+  return (n >> 1) ^ -static_cast<int64_t>(n & 1);
+}
+
+// Pointer-based variants of GetVarint...  These either store a value
+// in *v and return a pointer just past the parsed value, or return
+// nullptr on error.  These routines only look at bytes in the range
+// [p..limit-1]
+extern const char* GetVarint32Ptr(const char* p, const char* limit,
+                                  uint32_t* v);
+extern const char* GetVarint64Ptr(const char* p, const char* limit,
+                                  uint64_t* v);
+inline const char* GetVarsignedint64Ptr(const char* p, const char* limit,
+                                        int64_t* value) {
+  uint64_t u = 0;
+  const char* ret = GetVarint64Ptr(p, limit, &u);
+  *value = zigzagToI64(u);
+  return ret;
+}
+
+// Returns the length of the varint32 or varint64 encoding of "v"
+extern int VarintLength(uint64_t v);
+
+// Lower-level versions of Put... that write directly into a character buffer
+// and return a pointer just past the last byte written.
+// REQUIRES: dst has enough space for the value being written
+extern char* EncodeVarint32(char* dst, uint32_t value);
+extern char* EncodeVarint64(char* dst, uint64_t value);
+
+// Internal routine for use by fallback path of GetVarint32Ptr
+extern const char* GetVarint32PtrFallback(const char* p, const char* limit,
+                                          uint32_t* value);
+inline const char* GetVarint32Ptr(const char* p, const char* limit,
+                                  uint32_t* value) {
+  if (p < limit) {
+    uint32_t result = *(reinterpret_cast<const unsigned char*>(p));
+    if ((result & 128) == 0) {
+      *value = result;
+      return p + 1;
+    }
+  }
+  return GetVarint32PtrFallback(p, limit, value);
+}
+
+// Pull the last 8 bits and cast it to a character
+inline void PutFixed16(std::string* dst, uint16_t value) {
+  if (port::kLittleEndian) {
+    dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
+                sizeof(value));
+  } else {
+    char buf[sizeof(value)];
+    EncodeFixed16(buf, value);
+    dst->append(buf, sizeof(buf));
+  }
+}
+
+inline void PutFixed32(std::string* dst, uint32_t value) {
+  if (port::kLittleEndian) {
+    dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
+                sizeof(value));
+  } else {
+    char buf[sizeof(value)];
+    EncodeFixed32(buf, value);
+    dst->append(buf, sizeof(buf));
+  }
+}
+
+inline void PutFixed64(std::string* dst, uint64_t value) {
+  if (port::kLittleEndian) {
+    dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
+                sizeof(value));
+  } else {
+    char buf[sizeof(value)];
+    EncodeFixed64(buf, value);
+    dst->append(buf, sizeof(buf));
+  }
+}
+
+inline void PutVarint32(std::string* dst, uint32_t v) {
+  char buf[5];
+  char* ptr = EncodeVarint32(buf, v);
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline void PutVarint32Varint32(std::string* dst, uint32_t v1, uint32_t v2) {
+  char buf[10];
+  char* ptr = EncodeVarint32(buf, v1);
+  ptr = EncodeVarint32(ptr, v2);
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline void PutVarint32Varint32Varint32(std::string* dst, uint32_t v1,
+                                        uint32_t v2, uint32_t v3) {
+  char buf[15];
+  char* ptr = EncodeVarint32(buf, v1);
+  ptr = EncodeVarint32(ptr, v2);
+  ptr = EncodeVarint32(ptr, v3);
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline char* EncodeVarint64(char* dst, uint64_t v) {
+  static const unsigned int B = 128;
+  unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
+  while (v >= B) {
+    *(ptr++) = (v & (B - 1)) | B;
+    v >>= 7;
+  }
+  *(ptr++) = static_cast<unsigned char>(v);
+  return reinterpret_cast<char*>(ptr);
+}
+
+inline void PutVarint64(std::string* dst, uint64_t v) {
+  char buf[kMaxVarint64Length];
+  char* ptr = EncodeVarint64(buf, v);
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline void PutVarsignedint64(std::string* dst, int64_t v) {
+  char buf[kMaxVarint64Length];
+  // Using Zigzag format to convert signed to unsigned
+  char* ptr = EncodeVarint64(buf, i64ToZigzag(v));
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline void PutVarint64Varint64(std::string* dst, uint64_t v1, uint64_t v2) {
+  char buf[20];
+  char* ptr = EncodeVarint64(buf, v1);
+  ptr = EncodeVarint64(ptr, v2);
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline void PutVarint32Varint64(std::string* dst, uint32_t v1, uint64_t v2) {
+  char buf[15];
+  char* ptr = EncodeVarint32(buf, v1);
+  ptr = EncodeVarint64(ptr, v2);
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline void PutVarint32Varint32Varint64(std::string* dst, uint32_t v1,
+                                        uint32_t v2, uint64_t v3) {
+  char buf[20];
+  char* ptr = EncodeVarint32(buf, v1);
+  ptr = EncodeVarint32(ptr, v2);
+  ptr = EncodeVarint64(ptr, v3);
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline void PutLengthPrefixedSlice(std::string* dst, const Slice& value) {
+  PutVarint32(dst, static_cast<uint32_t>(value.size()));
+  dst->append(value.data(), value.size());
+}
+
+inline void PutLengthPrefixedSliceParts(std::string* dst, size_t total_bytes,
+                                        const SliceParts& slice_parts) {
+  for (int i = 0; i < slice_parts.num_parts; ++i) {
+    total_bytes += slice_parts.parts[i].size();
+  }
+  PutVarint32(dst, static_cast<uint32_t>(total_bytes));
+  for (int i = 0; i < slice_parts.num_parts; ++i) {
+    dst->append(slice_parts.parts[i].data(), slice_parts.parts[i].size());
+  }
+}
+
+inline void PutLengthPrefixedSliceParts(std::string* dst,
+                                        const SliceParts& slice_parts) {
+  PutLengthPrefixedSliceParts(dst, /*total_bytes=*/0, slice_parts);
+}
+
+inline void PutLengthPrefixedSlicePartsWithPadding(
+    std::string* dst, const SliceParts& slice_parts, size_t pad_sz) {
+  PutLengthPrefixedSliceParts(dst, /*total_bytes=*/pad_sz, slice_parts);
+  dst->append(pad_sz, '\0');
+}
+
+inline int VarintLength(uint64_t v) {
+  int len = 1;
+  while (v >= 128) {
+    v >>= 7;
+    len++;
+  }
+  return len;
+}
+
+inline bool GetFixed64(Slice* input, uint64_t* value) {
+  if (input->size() < sizeof(uint64_t)) {
+    return false;
+  }
+  *value = DecodeFixed64(input->data());
+  input->remove_prefix(sizeof(uint64_t));
+  return true;
+}
+
+inline bool GetFixed32(Slice* input, uint32_t* value) {
+  if (input->size() < sizeof(uint32_t)) {
+    return false;
+  }
+  *value = DecodeFixed32(input->data());
+  input->remove_prefix(sizeof(uint32_t));
+  return true;
+}
+
+inline bool GetFixed16(Slice* input, uint16_t* value) {
+  if (input->size() < sizeof(uint16_t)) {
+    return false;
+  }
+  *value = DecodeFixed16(input->data());
+  input->remove_prefix(sizeof(uint16_t));
+  return true;
+}
+
+inline bool GetVarint32(Slice* input, uint32_t* value) {
+  const char* p = input->data();
+  const char* limit = p + input->size();
+  const char* q = GetVarint32Ptr(p, limit, value);
+  if (q == nullptr) {
+    return false;
+  } else {
+    *input = Slice(q, static_cast<size_t>(limit - q));
+    return true;
+  }
+}
+
+inline bool GetVarint64(Slice* input, uint64_t* value) {
+  const char* p = input->data();
+  const char* limit = p + input->size();
+  const char* q = GetVarint64Ptr(p, limit, value);
+  if (q == nullptr) {
+    return false;
+  } else {
+    *input = Slice(q, static_cast<size_t>(limit - q));
+    return true;
+  }
+}
+
+inline bool GetVarsignedint64(Slice* input, int64_t* value) {
+  const char* p = input->data();
+  const char* limit = p + input->size();
+  const char* q = GetVarsignedint64Ptr(p, limit, value);
+  if (q == nullptr) {
+    return false;
+  } else {
+    *input = Slice(q, static_cast<size_t>(limit - q));
+    return true;
+  }
+}
+
+inline bool GetLengthPrefixedSlice(Slice* input, Slice* result) {
+  uint32_t len = 0;
+  if (GetVarint32(input, &len) && input->size() >= len) {
+    *result = Slice(input->data(), len);
+    input->remove_prefix(len);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+inline Slice GetLengthPrefixedSlice(const char* data) {
+  uint32_t len = 0;
+  // +5: we assume "data" is not corrupted
+  // unsigned char is 7 bits, uint32_t is 32 bits, need 5 unsigned char
+  auto p = GetVarint32Ptr(data, data + 5 /* limit */, &len);
+  return Slice(p, len);
+}
+
+inline Slice GetSliceUntil(Slice* slice, char delimiter) {
+  uint32_t len = 0;
+  for (len = 0; len < slice->size() && slice->data()[len] != delimiter; ++len) {
+    // nothing
+  }
+
+  Slice ret(slice->data(), len);
+  slice->remove_prefix(len + ((len < slice->size()) ? 1 : 0));
+  return ret;
+}
+
+template <class T>
+#ifdef ROCKSDB_UBSAN_RUN
+#if defined(__clang__)
+__attribute__((__no_sanitize__("alignment")))
+#elif defined(__GNUC__)
+__attribute__((__no_sanitize_undefined__))
+#endif
+#endif
+inline void
+PutUnaligned(T* memory, const T& value) {
+#if defined(PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED)
+  char* nonAlignedMemory = reinterpret_cast<char*>(memory);
+  memcpy(nonAlignedMemory, reinterpret_cast<const char*>(&value), sizeof(T));
+#else
+  *memory = value;
+#endif
+}
+
+template <class T>
+#ifdef ROCKSDB_UBSAN_RUN
+#if defined(__clang__)
+__attribute__((__no_sanitize__("alignment")))
+#elif defined(__GNUC__)
+__attribute__((__no_sanitize_undefined__))
+#endif
+#endif
+inline void
+GetUnaligned(const T* memory, T* value) {
+#if defined(PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED)
+  char* nonAlignedMemory = reinterpret_cast<char*>(value);
+  memcpy(nonAlignedMemory, reinterpret_cast<const char*>(memory), sizeof(T));
+#else
+  *value = *memory;
+#endif
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coding_lean.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coding_lean.h
new file mode 100644
index 0000000000..6966f7a668
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coding_lean.h
@@ -0,0 +1,101 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// Encoding independent of machine byte order:
+// * Fixed-length numbers are encoded with least-significant byte first
+//   (little endian, native order on Intel and others)
+//
+// More functions in coding.h
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+
+#include "port/port.h"  // for port::kLittleEndian
+
+namespace ROCKSDB_NAMESPACE {
+
+// Lower-level versions of Put... that write directly into a character buffer
+// REQUIRES: dst has enough space for the value being written
+// -- Implementation of the functions declared above
+inline void EncodeFixed16(char* buf, uint16_t value) {
+  if (port::kLittleEndian) {
+    memcpy(buf, &value, sizeof(value));
+  } else {
+    buf[0] = value & 0xff;
+    buf[1] = (value >> 8) & 0xff;
+  }
+}
+
+inline void EncodeFixed32(char* buf, uint32_t value) {
+  if (port::kLittleEndian) {
+    memcpy(buf, &value, sizeof(value));
+  } else {
+    buf[0] = value & 0xff;
+    buf[1] = (value >> 8) & 0xff;
+    buf[2] = (value >> 16) & 0xff;
+    buf[3] = (value >> 24) & 0xff;
+  }
+}
+
+inline void EncodeFixed64(char* buf, uint64_t value) {
+  if (port::kLittleEndian) {
+    memcpy(buf, &value, sizeof(value));
+  } else {
+    buf[0] = value & 0xff;
+    buf[1] = (value >> 8) & 0xff;
+    buf[2] = (value >> 16) & 0xff;
+    buf[3] = (value >> 24) & 0xff;
+    buf[4] = (value >> 32) & 0xff;
+    buf[5] = (value >> 40) & 0xff;
+    buf[6] = (value >> 48) & 0xff;
+    buf[7] = (value >> 56) & 0xff;
+  }
+}
+
+// Lower-level versions of Get... that read directly from a character buffer
+// without any bounds checking.
+
+inline uint16_t DecodeFixed16(const char* ptr) {
+  if (port::kLittleEndian) {
+    // Load the raw bytes
+    uint16_t result;
+    memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
+    return result;
+  } else {
+    return ((static_cast<uint16_t>(static_cast<unsigned char>(ptr[0]))) |
+            (static_cast<uint16_t>(static_cast<unsigned char>(ptr[1])) << 8));
+  }
+}
+
+inline uint32_t DecodeFixed32(const char* ptr) {
+  if (port::kLittleEndian) {
+    // Load the raw bytes
+    uint32_t result;
+    memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
+    return result;
+  } else {
+    return ((static_cast<uint32_t>(static_cast<unsigned char>(ptr[0]))) |
+            (static_cast<uint32_t>(static_cast<unsigned char>(ptr[1])) << 8) |
+            (static_cast<uint32_t>(static_cast<unsigned char>(ptr[2])) << 16) |
+            (static_cast<uint32_t>(static_cast<unsigned char>(ptr[3])) << 24));
+  }
+}
+
+inline uint64_t DecodeFixed64(const char* ptr) {
+  if (port::kLittleEndian) {
+    // Load the raw bytes
+    uint64_t result;
+    memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
+    return result;
+  } else {
+    uint64_t lo = DecodeFixed32(ptr);
+    uint64_t hi = DecodeFixed32(ptr + 4);
+    return (hi << 32) | lo;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compaction_job_stats_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compaction_job_stats_impl.cc
new file mode 100644
index 0000000000..587a26f247
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compaction_job_stats_impl.cc
@@ -0,0 +1,92 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/compaction_job_stats.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+
+void CompactionJobStats::Reset() {
+  elapsed_micros = 0;
+  cpu_micros = 0;
+
+  num_input_records = 0;
+  num_blobs_read = 0;
+  num_input_files = 0;
+  num_input_files_at_output_level = 0;
+
+  num_output_records = 0;
+  num_output_files = 0;
+  num_output_files_blob = 0;
+
+  is_full_compaction = false;
+  is_manual_compaction = false;
+
+  total_input_bytes = 0;
+  total_blob_bytes_read = 0;
+  total_output_bytes = 0;
+  total_output_bytes_blob = 0;
+
+  num_records_replaced = 0;
+
+  total_input_raw_key_bytes = 0;
+  total_input_raw_value_bytes = 0;
+
+  num_input_deletion_records = 0;
+  num_expired_deletion_records = 0;
+
+  num_corrupt_keys = 0;
+
+  file_write_nanos = 0;
+  file_range_sync_nanos = 0;
+  file_fsync_nanos = 0;
+  file_prepare_write_nanos = 0;
+
+  smallest_output_key_prefix.clear();
+  largest_output_key_prefix.clear();
+
+  num_single_del_fallthru = 0;
+  num_single_del_mismatch = 0;
+}
+
+void CompactionJobStats::Add(const CompactionJobStats& stats) {
+  elapsed_micros += stats.elapsed_micros;
+  cpu_micros += stats.cpu_micros;
+
+  num_input_records += stats.num_input_records;
+  num_blobs_read += stats.num_blobs_read;
+  num_input_files += stats.num_input_files;
+  num_input_files_at_output_level += stats.num_input_files_at_output_level;
+
+  num_output_records += stats.num_output_records;
+  num_output_files += stats.num_output_files;
+  num_output_files_blob += stats.num_output_files_blob;
+
+  total_input_bytes += stats.total_input_bytes;
+  total_blob_bytes_read += stats.total_blob_bytes_read;
+  total_output_bytes += stats.total_output_bytes;
+  total_output_bytes_blob += stats.total_output_bytes_blob;
+
+  num_records_replaced += stats.num_records_replaced;
+
+  total_input_raw_key_bytes += stats.total_input_raw_key_bytes;
+  total_input_raw_value_bytes += stats.total_input_raw_value_bytes;
+
+  num_input_deletion_records += stats.num_input_deletion_records;
+  num_expired_deletion_records += stats.num_expired_deletion_records;
+
+  num_corrupt_keys += stats.num_corrupt_keys;
+
+  file_write_nanos += stats.file_write_nanos;
+  file_range_sync_nanos += stats.file_range_sync_nanos;
+  file_fsync_nanos += stats.file_fsync_nanos;
+  file_prepare_write_nanos += stats.file_prepare_write_nanos;
+
+  num_single_del_fallthru += stats.num_single_del_fallthru;
+  num_single_del_mismatch += stats.num_single_del_mismatch;
+}
+
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/comparator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/comparator.cc
new file mode 100644
index 0000000000..19fd47387e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/comparator.cc
@@ -0,0 +1,383 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/comparator.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <memory>
+#include <mutex>
+#include <sstream>
+
+#include "db/dbformat.h"
+#include "port/lang.h"
+#include "port/port.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/object_registry.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+class BytewiseComparatorImpl : public Comparator {
+ public:
+  BytewiseComparatorImpl() {}
+  static const char* kClassName() { return "leveldb.BytewiseComparator"; }
+  const char* Name() const override { return kClassName(); }
+
+  int Compare(const Slice& a, const Slice& b) const override {
+    return a.compare(b);
+  }
+
+  bool Equal(const Slice& a, const Slice& b) const override { return a == b; }
+
+  void FindShortestSeparator(std::string* start,
+                             const Slice& limit) const override {
+    // Find length of common prefix
+    size_t min_length = std::min(start->size(), limit.size());
+    size_t diff_index = 0;
+    while ((diff_index < min_length) &&
+           ((*start)[diff_index] == limit[diff_index])) {
+      diff_index++;
+    }
+
+    if (diff_index >= min_length) {
+      // Do not shorten if one string is a prefix of the other
+    } else {
+      uint8_t start_byte = static_cast<uint8_t>((*start)[diff_index]);
+      uint8_t limit_byte = static_cast<uint8_t>(limit[diff_index]);
+      if (start_byte >= limit_byte) {
+        // Cannot shorten since limit is smaller than start or start is
+        // already the shortest possible.
+        return;
+      }
+      assert(start_byte < limit_byte);
+
+      if (diff_index < limit.size() - 1 || start_byte + 1 < limit_byte) {
+        (*start)[diff_index]++;
+        start->resize(diff_index + 1);
+      } else {
+        //     v
+        // A A 1 A A A
+        // A A 2
+        //
+        // Incrementing the current byte will make start bigger than limit, we
+        // will skip this byte, and find the first non 0xFF byte in start and
+        // increment it.
+        diff_index++;
+
+        while (diff_index < start->size()) {
+          // Keep moving until we find the first non 0xFF byte to
+          // increment it
+          if (static_cast<uint8_t>((*start)[diff_index]) <
+              static_cast<uint8_t>(0xff)) {
+            (*start)[diff_index]++;
+            start->resize(diff_index + 1);
+            break;
+          }
+          diff_index++;
+        }
+      }
+      assert(Compare(*start, limit) < 0);
+    }
+  }
+
+  void FindShortSuccessor(std::string* key) const override {
+    // Find first character that can be incremented
+    size_t n = key->size();
+    for (size_t i = 0; i < n; i++) {
+      const uint8_t byte = (*key)[i];
+      if (byte != static_cast<uint8_t>(0xff)) {
+        (*key)[i] = byte + 1;
+        key->resize(i + 1);
+        return;
+      }
+    }
+    // *key is a run of 0xffs.  Leave it alone.
+  }
+
+  bool IsSameLengthImmediateSuccessor(const Slice& s,
+                                      const Slice& t) const override {
+    if (s.size() != t.size() || s.size() == 0) {
+      return false;
+    }
+    size_t diff_ind = s.difference_offset(t);
+    // same slice
+    if (diff_ind >= s.size()) return false;
+    uint8_t byte_s = static_cast<uint8_t>(s[diff_ind]);
+    uint8_t byte_t = static_cast<uint8_t>(t[diff_ind]);
+    // first different byte must be consecutive, and remaining bytes must be
+    // 0xff for s and 0x00 for t
+    if (byte_s != uint8_t{0xff} && byte_s + 1 == byte_t) {
+      for (size_t i = diff_ind + 1; i < s.size(); ++i) {
+        byte_s = static_cast<uint8_t>(s[i]);
+        byte_t = static_cast<uint8_t>(t[i]);
+        if (byte_s != uint8_t{0xff} || byte_t != uint8_t{0x00}) {
+          return false;
+        }
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  bool CanKeysWithDifferentByteContentsBeEqual() const override {
+    return false;
+  }
+
+  using Comparator::CompareWithoutTimestamp;
+  int CompareWithoutTimestamp(const Slice& a, bool /*a_has_ts*/, const Slice& b,
+                              bool /*b_has_ts*/) const override {
+    return a.compare(b);
+  }
+
+  bool EqualWithoutTimestamp(const Slice& a, const Slice& b) const override {
+    return a == b;
+  }
+};
+
+class ReverseBytewiseComparatorImpl : public BytewiseComparatorImpl {
+ public:
+  ReverseBytewiseComparatorImpl() {}
+
+  static const char* kClassName() {
+    return "rocksdb.ReverseBytewiseComparator";
+  }
+  const char* Name() const override { return kClassName(); }
+
+  int Compare(const Slice& a, const Slice& b) const override {
+    return -a.compare(b);
+  }
+
+  void FindShortestSeparator(std::string* start,
+                             const Slice& limit) const override {
+    // Find length of common prefix
+    size_t min_length = std::min(start->size(), limit.size());
+    size_t diff_index = 0;
+    while ((diff_index < min_length) &&
+           ((*start)[diff_index] == limit[diff_index])) {
+      diff_index++;
+    }
+
+    assert(diff_index <= min_length);
+    if (diff_index == min_length) {
+      // Do not shorten if one string is a prefix of the other
+      //
+      // We could handle cases like:
+      //     V
+      // A A 2 X Y
+      // A A 2
+      // in a similar way as BytewiseComparator::FindShortestSeparator().
+      // We keep it simple by not implementing it. We can come back to it
+      // later when needed.
+    } else {
+      uint8_t start_byte = static_cast<uint8_t>((*start)[diff_index]);
+      uint8_t limit_byte = static_cast<uint8_t>(limit[diff_index]);
+      if (start_byte > limit_byte && diff_index < start->size() - 1) {
+        // Case like
+        //     V
+        // A A 3 A A
+        // A A 1 B B
+        //
+        // or
+        //     v
+        // A A 2 A A
+        // A A 1 B B
+        // In this case "AA2" will be good.
+#ifndef NDEBUG
+        std::string old_start = *start;
+#endif
+        start->resize(diff_index + 1);
+#ifndef NDEBUG
+        assert(old_start >= *start);
+#endif
+        assert(Slice(*start).compare(limit) > 0);
+      }
+    }
+  }
+
+  void FindShortSuccessor(std::string* /*key*/) const override {
+    // Don't do anything for simplicity.
+  }
+
+  bool IsSameLengthImmediateSuccessor(const Slice& s,
+                                      const Slice& t) const override {
+    // Always returning false to prevent surfacing design flaws in
+    // auto_prefix_mode
+    (void)s, (void)t;
+    return false;
+    // "Correct" implementation:
+    // return BytewiseComparatorImpl::IsSameLengthImmediateSuccessor(t, s);
+  }
+
+  bool CanKeysWithDifferentByteContentsBeEqual() const override {
+    return false;
+  }
+
+  using Comparator::CompareWithoutTimestamp;
+  int CompareWithoutTimestamp(const Slice& a, bool /*a_has_ts*/, const Slice& b,
+                              bool /*b_has_ts*/) const override {
+    return -a.compare(b);
+  }
+};
+
+// EXPERIMENTAL
+// Comparator with 64-bit integer timestamp.
+// We did not performance test this yet.
+template <typename TComparator>
+class ComparatorWithU64TsImpl : public Comparator {
+  static_assert(std::is_base_of<Comparator, TComparator>::value,
+                "template type must be a inherited type of comparator");
+
+ public:
+  explicit ComparatorWithU64TsImpl() : Comparator(/*ts_sz=*/sizeof(uint64_t)) {
+    assert(cmp_without_ts_.timestamp_size() == 0);
+  }
+
+  static const char* kClassName() {
+    static std::string class_name = kClassNameInternal();
+    return class_name.c_str();
+  }
+
+  const char* Name() const override { return kClassName(); }
+
+  void FindShortSuccessor(std::string*) const override {}
+  void FindShortestSeparator(std::string*, const Slice&) const override {}
+  int Compare(const Slice& a, const Slice& b) const override {
+    int ret = CompareWithoutTimestamp(a, b);
+    size_t ts_sz = timestamp_size();
+    if (ret != 0) {
+      return ret;
+    }
+    // Compare timestamp.
+    // For the same user key with different timestamps, larger (newer) timestamp
+    // comes first.
+    return -CompareTimestamp(ExtractTimestampFromUserKey(a, ts_sz),
+                             ExtractTimestampFromUserKey(b, ts_sz));
+  }
+  using Comparator::CompareWithoutTimestamp;
+  int CompareWithoutTimestamp(const Slice& a, bool a_has_ts, const Slice& b,
+                              bool b_has_ts) const override {
+    const size_t ts_sz = timestamp_size();
+    assert(!a_has_ts || a.size() >= ts_sz);
+    assert(!b_has_ts || b.size() >= ts_sz);
+    Slice lhs = a_has_ts ? StripTimestampFromUserKey(a, ts_sz) : a;
+    Slice rhs = b_has_ts ? StripTimestampFromUserKey(b, ts_sz) : b;
+    return cmp_without_ts_.Compare(lhs, rhs);
+  }
+  int CompareTimestamp(const Slice& ts1, const Slice& ts2) const override {
+    assert(ts1.size() == sizeof(uint64_t));
+    assert(ts2.size() == sizeof(uint64_t));
+    uint64_t lhs = DecodeFixed64(ts1.data());
+    uint64_t rhs = DecodeFixed64(ts2.data());
+    if (lhs < rhs) {
+      return -1;
+    } else if (lhs > rhs) {
+      return 1;
+    } else {
+      return 0;
+    }
+  }
+
+ private:
+  static std::string kClassNameInternal() {
+    std::stringstream ss;
+    ss << TComparator::kClassName() << ".u64ts";
+    return ss.str();
+  }
+
+  TComparator cmp_without_ts_;
+};
+
+}  // namespace
+
+const Comparator* BytewiseComparator() {
+  STATIC_AVOID_DESTRUCTION(BytewiseComparatorImpl, bytewise);
+  return &bytewise;
+}
+
+const Comparator* ReverseBytewiseComparator() {
+  STATIC_AVOID_DESTRUCTION(ReverseBytewiseComparatorImpl, rbytewise);
+  return &rbytewise;
+}
+
+const Comparator* BytewiseComparatorWithU64Ts() {
+  STATIC_AVOID_DESTRUCTION(ComparatorWithU64TsImpl<BytewiseComparatorImpl>,
+                           comp_with_u64_ts);
+  return &comp_with_u64_ts;
+}
+
+static int RegisterBuiltinComparators(ObjectLibrary& library,
+                                      const std::string& /*arg*/) {
+  library.AddFactory<const Comparator>(
+      BytewiseComparatorImpl::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<const Comparator>* /*guard */,
+         std::string* /* errmsg */) { return BytewiseComparator(); });
+  library.AddFactory<const Comparator>(
+      ReverseBytewiseComparatorImpl::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<const Comparator>* /*guard */,
+         std::string* /* errmsg */) { return ReverseBytewiseComparator(); });
+  library.AddFactory<const Comparator>(
+      ComparatorWithU64TsImpl<BytewiseComparatorImpl>::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<const Comparator>* /*guard */,
+         std::string* /* errmsg */) { return BytewiseComparatorWithU64Ts(); });
+  return 3;
+}
+
+Status Comparator::CreateFromString(const ConfigOptions& config_options,
+                                    const std::string& value,
+                                    const Comparator** result) {
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    RegisterBuiltinComparators(*(ObjectLibrary::Default().get()), "");
+  });
+  std::string id;
+  std::unordered_map<std::string, std::string> opt_map;
+  Status status = Customizable::GetOptionsMap(config_options, *result, value,
+                                              &id, &opt_map);
+  if (!status.ok()) {  // GetOptionsMap failed
+    return status;
+  }
+  if (id == BytewiseComparatorImpl::kClassName()) {
+    *result = BytewiseComparator();
+  } else if (id == ReverseBytewiseComparatorImpl::kClassName()) {
+    *result = ReverseBytewiseComparator();
+  } else if (id ==
+             ComparatorWithU64TsImpl<BytewiseComparatorImpl>::kClassName()) {
+    *result = BytewiseComparatorWithU64Ts();
+  } else if (value.empty()) {
+    // No Id and no options.  Clear the object
+    *result = nullptr;
+    return Status::OK();
+  } else if (id.empty()) {  // We have no Id but have options.  Not good
+    return Status::NotSupported("Cannot reset object ", id);
+  } else {
+    status = config_options.registry->NewStaticObject(id, result);
+    if (!status.ok()) {
+      if (config_options.ignore_unsupported_options &&
+          status.IsNotSupported()) {
+        return Status::OK();
+      } else {
+        return status;
+      }
+    } else {
+      Comparator* comparator = const_cast<Comparator*>(*result);
+      status =
+          Customizable::ConfigureNewObject(config_options, comparator, opt_map);
+    }
+  }
+  return status;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression.cc
new file mode 100644
index 0000000000..712d333ee6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression.cc
@@ -0,0 +1,122 @@
+// Copyright (c) 2022-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/compression.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+StreamingCompress* StreamingCompress::Create(CompressionType compression_type,
+                                             const CompressionOptions& opts,
+                                             uint32_t compress_format_version,
+                                             size_t max_output_len) {
+  switch (compression_type) {
+    case kZSTD: {
+      if (!ZSTD_Streaming_Supported()) {
+        return nullptr;
+      }
+      return new ZSTDStreamingCompress(opts, compress_format_version,
+                                       max_output_len);
+    }
+    default:
+      return nullptr;
+  }
+}
+
+StreamingUncompress* StreamingUncompress::Create(
+    CompressionType compression_type, uint32_t compress_format_version,
+    size_t max_output_len) {
+  switch (compression_type) {
+    case kZSTD: {
+      if (!ZSTD_Streaming_Supported()) {
+        return nullptr;
+      }
+      return new ZSTDStreamingUncompress(compress_format_version,
+                                         max_output_len);
+    }
+    default:
+      return nullptr;
+  }
+}
+
+int ZSTDStreamingCompress::Compress(const char* input, size_t input_size,
+                                    char* output, size_t* output_pos) {
+  assert(input != nullptr && output != nullptr && output_pos != nullptr);
+  *output_pos = 0;
+  // Don't need to compress an empty input
+  if (input_size == 0) {
+    return 0;
+  }
+#ifndef ZSTD_STREAMING
+  (void)input;
+  (void)input_size;
+  (void)output;
+  return -1;
+#else
+  if (input_buffer_.src == nullptr || input_buffer_.src != input) {
+    // New input
+    // Catch errors where the previous input was not fully decompressed.
+    assert(input_buffer_.pos == input_buffer_.size);
+    input_buffer_ = {input, input_size, /*pos=*/0};
+  } else if (input_buffer_.src == input) {
+    // Same input, not fully compressed.
+  }
+  ZSTD_outBuffer output_buffer = {output, max_output_len_, /*pos=*/0};
+  const size_t remaining =
+      ZSTD_compressStream2(cctx_, &output_buffer, &input_buffer_, ZSTD_e_end);
+  if (ZSTD_isError(remaining)) {
+    // Failure
+    Reset();
+    return -1;
+  }
+  // Success
+  *output_pos = output_buffer.pos;
+  return (int)remaining;
+#endif
+}
+
+void ZSTDStreamingCompress::Reset() {
+#ifdef ZSTD_STREAMING
+  ZSTD_CCtx_reset(cctx_, ZSTD_ResetDirective::ZSTD_reset_session_only);
+  input_buffer_ = {/*src=*/nullptr, /*size=*/0, /*pos=*/0};
+#endif
+}
+
+int ZSTDStreamingUncompress::Uncompress(const char* input, size_t input_size,
+                                        char* output, size_t* output_pos) {
+  assert(output != nullptr && output_pos != nullptr);
+  *output_pos = 0;
+  // Don't need to uncompress an empty input
+  if (input_size == 0) {
+    return 0;
+  }
+#ifdef ZSTD_STREAMING
+  if (input) {
+    // New input
+    input_buffer_ = {input, input_size, /*pos=*/0};
+  }
+  ZSTD_outBuffer output_buffer = {output, max_output_len_, /*pos=*/0};
+  size_t ret = ZSTD_decompressStream(dctx_, &output_buffer, &input_buffer_);
+  if (ZSTD_isError(ret)) {
+    Reset();
+    return -1;
+  }
+  *output_pos = output_buffer.pos;
+  return (int)(input_buffer_.size - input_buffer_.pos);
+#else
+  (void)input;
+  (void)input_size;
+  (void)output;
+  return -1;
+#endif
+}
+
+void ZSTDStreamingUncompress::Reset() {
+#ifdef ZSTD_STREAMING
+  ZSTD_DCtx_reset(dctx_, ZSTD_ResetDirective::ZSTD_reset_session_only);
+  input_buffer_ = {/*src=*/nullptr, /*size=*/0, /*pos=*/0};
+#endif
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression.h
new file mode 100644
index 0000000000..eb5d1c7bbc
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression.h
@@ -0,0 +1,1795 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+#pragma once
+
+#include <algorithm>
+#include <limits>
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+#ifdef OS_FREEBSD
+#include <malloc_np.h>
+#else  // OS_FREEBSD
+#include <malloc.h>
+#endif  // OS_FREEBSD
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+#include <string>
+
+#include "memory/memory_allocator_impl.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
+#include "table/block_based/block_type.h"
+#include "test_util/sync_point.h"
+#include "util/coding.h"
+#include "util/compression_context_cache.h"
+#include "util/string_util.h"
+
+#ifdef SNAPPY
+#include <snappy.h>
+#endif
+
+#ifdef ZLIB
+#include <zlib.h>
+#endif
+
+#ifdef BZIP2
+#include <bzlib.h>
+#endif
+
+#if defined(LZ4)
+#include <lz4.h>
+#include <lz4hc.h>
+#endif
+
+#if defined(ZSTD)
+#include <zstd.h>
+// v1.1.3+
+#if ZSTD_VERSION_NUMBER >= 10103
+#include <zdict.h>
+#endif  // ZSTD_VERSION_NUMBER >= 10103
+// v1.4.0+
+#if ZSTD_VERSION_NUMBER >= 10400
+#define ZSTD_STREAMING
+#endif  // ZSTD_VERSION_NUMBER >= 10400
+namespace ROCKSDB_NAMESPACE {
+// Need this for the context allocation override
+// On windows we need to do this explicitly
+#if (ZSTD_VERSION_NUMBER >= 500)
+#if defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) && \
+    defined(ZSTD_STATIC_LINKING_ONLY)
+#define ROCKSDB_ZSTD_CUSTOM_MEM
+namespace port {
+ZSTD_customMem GetJeZstdAllocationOverrides();
+}  // namespace port
+#endif  // defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) &&
+        // defined(ZSTD_STATIC_LINKING_ONLY)
+
+// We require `ZSTD_sizeof_DDict` and `ZSTD_createDDict_byReference` to use
+// `ZSTD_DDict`. The former was introduced in v1.0.0 and the latter was
+// introduced in v1.1.3. But an important bug fix for `ZSTD_sizeof_DDict` came
+// in v1.1.4, so that is the version we require. As of today's latest version
+// (v1.3.8), they are both still in the experimental API, which means they are
+// only exported when the compiler flag `ZSTD_STATIC_LINKING_ONLY` is set.
+#if defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104
+#define ROCKSDB_ZSTD_DDICT
+#endif  // defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104
+
+// Cached data represents a portion that can be re-used
+// If, in the future we have more than one native context to
+// cache we can arrange this as a tuple
+class ZSTDUncompressCachedData {
+ public:
+  using ZSTDNativeContext = ZSTD_DCtx*;
+  ZSTDUncompressCachedData() {}
+  // Init from cache
+  ZSTDUncompressCachedData(const ZSTDUncompressCachedData& o) = delete;
+  ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete;
+  ZSTDUncompressCachedData(ZSTDUncompressCachedData&& o) noexcept
+      : ZSTDUncompressCachedData() {
+    *this = std::move(o);
+  }
+  ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&& o) noexcept {
+    assert(zstd_ctx_ == nullptr);
+    std::swap(zstd_ctx_, o.zstd_ctx_);
+    std::swap(cache_idx_, o.cache_idx_);
+    return *this;
+  }
+  ZSTDNativeContext Get() const { return zstd_ctx_; }
+  int64_t GetCacheIndex() const { return cache_idx_; }
+  void CreateIfNeeded() {
+    if (zstd_ctx_ == nullptr) {
+#ifdef ROCKSDB_ZSTD_CUSTOM_MEM
+      zstd_ctx_ =
+          ZSTD_createDCtx_advanced(port::GetJeZstdAllocationOverrides());
+#else   // ROCKSDB_ZSTD_CUSTOM_MEM
+      zstd_ctx_ = ZSTD_createDCtx();
+#endif  // ROCKSDB_ZSTD_CUSTOM_MEM
+      cache_idx_ = -1;
+    }
+  }
+  void InitFromCache(const ZSTDUncompressCachedData& o, int64_t idx) {
+    zstd_ctx_ = o.zstd_ctx_;
+    cache_idx_ = idx;
+  }
+  ~ZSTDUncompressCachedData() {
+    if (zstd_ctx_ != nullptr && cache_idx_ == -1) {
+      ZSTD_freeDCtx(zstd_ctx_);
+    }
+  }
+
+ private:
+  ZSTDNativeContext zstd_ctx_ = nullptr;
+  int64_t cache_idx_ = -1;  // -1 means this instance owns the context
+};
+#endif  // (ZSTD_VERSION_NUMBER >= 500)
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // ZSTD
+
+#if !(defined ZSTD) || !(ZSTD_VERSION_NUMBER >= 500)
+namespace ROCKSDB_NAMESPACE {
+class ZSTDUncompressCachedData {
+  void* padding;  // unused
+ public:
+  using ZSTDNativeContext = void*;
+  ZSTDUncompressCachedData() {}
+  ZSTDUncompressCachedData(const ZSTDUncompressCachedData&) {}
+  ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete;
+  ZSTDUncompressCachedData(ZSTDUncompressCachedData&&) noexcept = default;
+  ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&&) noexcept =
+      default;
+  ZSTDNativeContext Get() const { return nullptr; }
+  int64_t GetCacheIndex() const { return -1; }
+  void CreateIfNeeded() {}
+  void InitFromCache(const ZSTDUncompressCachedData&, int64_t) {}
+
+ private:
+  void ignore_padding__() { padding = nullptr; }
+};
+}  // namespace ROCKSDB_NAMESPACE
+#endif
+
+#if defined(XPRESS)
+#include "port/xpress.h"
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+// Holds dictionary and related data, like ZSTD's digested compression
+// dictionary.
+struct CompressionDict {
+#if ZSTD_VERSION_NUMBER >= 700
+  ZSTD_CDict* zstd_cdict_ = nullptr;
+#endif  // ZSTD_VERSION_NUMBER >= 700
+  std::string dict_;
+
+ public:
+#if ZSTD_VERSION_NUMBER >= 700
+  CompressionDict(std::string dict, CompressionType type, int level) {
+#else   // ZSTD_VERSION_NUMBER >= 700
+  CompressionDict(std::string dict, CompressionType /*type*/, int /*level*/) {
+#endif  // ZSTD_VERSION_NUMBER >= 700
+    dict_ = std::move(dict);
+#if ZSTD_VERSION_NUMBER >= 700
+    zstd_cdict_ = nullptr;
+    if (!dict_.empty() && (type == kZSTD || type == kZSTDNotFinalCompression)) {
+      if (level == CompressionOptions::kDefaultCompressionLevel) {
+        // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
+        // https://github.com/facebook/zstd/issues/1148
+        level = 3;
+      }
+      // Should be safe (but slower) if below call fails as we'll use the
+      // raw dictionary to compress.
+      zstd_cdict_ = ZSTD_createCDict(dict_.data(), dict_.size(), level);
+      assert(zstd_cdict_ != nullptr);
+    }
+#endif  // ZSTD_VERSION_NUMBER >= 700
+  }
+
+  ~CompressionDict() {
+#if ZSTD_VERSION_NUMBER >= 700
+    size_t res = 0;
+    if (zstd_cdict_ != nullptr) {
+      res = ZSTD_freeCDict(zstd_cdict_);
+    }
+    assert(res == 0);  // Last I checked they can't fail
+    (void)res;         // prevent unused var warning
+#endif                 // ZSTD_VERSION_NUMBER >= 700
+  }
+
+#if ZSTD_VERSION_NUMBER >= 700
+  const ZSTD_CDict* GetDigestedZstdCDict() const { return zstd_cdict_; }
+#endif  // ZSTD_VERSION_NUMBER >= 700
+
+  Slice GetRawDict() const { return dict_; }
+
+  static const CompressionDict& GetEmptyDict() {
+    static CompressionDict empty_dict{};
+    return empty_dict;
+  }
+
+  CompressionDict() = default;
+  // Disable copy/move
+  CompressionDict(const CompressionDict&) = delete;
+  CompressionDict& operator=(const CompressionDict&) = delete;
+  CompressionDict(CompressionDict&&) = delete;
+  CompressionDict& operator=(CompressionDict&&) = delete;
+};
+
+// Holds dictionary and related data, like ZSTD's digested uncompression
+// dictionary.
+struct UncompressionDict {
+  // Block containing the data for the compression dictionary in case the
+  // constructor that takes a string parameter is used.
+  std::string dict_;
+
+  // Block containing the data for the compression dictionary in case the
+  // constructor that takes a Slice parameter is used and the passed in
+  // CacheAllocationPtr is not nullptr.
+  CacheAllocationPtr allocation_;
+
+  // Slice pointing to the compression dictionary data. Can point to
+  // dict_, allocation_, or some other memory location, depending on how
+  // the object was constructed.
+  Slice slice_;
+
+#ifdef ROCKSDB_ZSTD_DDICT
+  // Processed version of the contents of slice_ for ZSTD compression.
+  ZSTD_DDict* zstd_ddict_ = nullptr;
+#endif  // ROCKSDB_ZSTD_DDICT
+
+#ifdef ROCKSDB_ZSTD_DDICT
+  UncompressionDict(std::string dict, bool using_zstd)
+#else   // ROCKSDB_ZSTD_DDICT
+  UncompressionDict(std::string dict, bool /* using_zstd */)
+#endif  // ROCKSDB_ZSTD_DDICT
+      : dict_(std::move(dict)), slice_(dict_) {
+#ifdef ROCKSDB_ZSTD_DDICT
+    if (!slice_.empty() && using_zstd) {
+      zstd_ddict_ = ZSTD_createDDict_byReference(slice_.data(), slice_.size());
+      assert(zstd_ddict_ != nullptr);
+    }
+#endif  // ROCKSDB_ZSTD_DDICT
+  }
+
+#ifdef ROCKSDB_ZSTD_DDICT
+  UncompressionDict(Slice slice, CacheAllocationPtr&& allocation,
+                    bool using_zstd)
+#else   // ROCKSDB_ZSTD_DDICT
+  UncompressionDict(Slice slice, CacheAllocationPtr&& allocation,
+                    bool /* using_zstd */)
+#endif  // ROCKSDB_ZSTD_DDICT
+      : allocation_(std::move(allocation)), slice_(std::move(slice)) {
+#ifdef ROCKSDB_ZSTD_DDICT
+    if (!slice_.empty() && using_zstd) {
+      zstd_ddict_ = ZSTD_createDDict_byReference(slice_.data(), slice_.size());
+      assert(zstd_ddict_ != nullptr);
+    }
+#endif  // ROCKSDB_ZSTD_DDICT
+  }
+
+  UncompressionDict(UncompressionDict&& rhs)
+      : dict_(std::move(rhs.dict_)),
+        allocation_(std::move(rhs.allocation_)),
+        slice_(std::move(rhs.slice_))
+#ifdef ROCKSDB_ZSTD_DDICT
+        ,
+        zstd_ddict_(rhs.zstd_ddict_)
+#endif
+  {
+#ifdef ROCKSDB_ZSTD_DDICT
+    rhs.zstd_ddict_ = nullptr;
+#endif
+  }
+
+  ~UncompressionDict() {
+#ifdef ROCKSDB_ZSTD_DDICT
+    size_t res = 0;
+    if (zstd_ddict_ != nullptr) {
+      res = ZSTD_freeDDict(zstd_ddict_);
+    }
+    assert(res == 0);  // Last I checked they can't fail
+    (void)res;         // prevent unused var warning
+#endif                 // ROCKSDB_ZSTD_DDICT
+  }
+
+  UncompressionDict& operator=(UncompressionDict&& rhs) {
+    if (this == &rhs) {
+      return *this;
+    }
+
+    dict_ = std::move(rhs.dict_);
+    allocation_ = std::move(rhs.allocation_);
+    slice_ = std::move(rhs.slice_);
+
+#ifdef ROCKSDB_ZSTD_DDICT
+    zstd_ddict_ = rhs.zstd_ddict_;
+    rhs.zstd_ddict_ = nullptr;
+#endif
+
+    return *this;
+  }
+
+  // The object is self-contained if the string constructor is used, or the
+  // Slice constructor is invoked with a non-null allocation. Otherwise, it
+  // is the caller's responsibility to ensure that the underlying storage
+  // outlives this object.
+  bool own_bytes() const { return !dict_.empty() || allocation_; }
+
+  const Slice& GetRawDict() const { return slice_; }
+
+  // For TypedCacheInterface
+  const Slice& ContentSlice() const { return slice_; }
+  static constexpr CacheEntryRole kCacheEntryRole = CacheEntryRole::kOtherBlock;
+  static constexpr BlockType kBlockType = BlockType::kCompressionDictionary;
+
+#ifdef ROCKSDB_ZSTD_DDICT
+  const ZSTD_DDict* GetDigestedZstdDDict() const { return zstd_ddict_; }
+#endif  // ROCKSDB_ZSTD_DDICT
+
+  static const UncompressionDict& GetEmptyDict() {
+    static UncompressionDict empty_dict{};
+    return empty_dict;
+  }
+
+  size_t ApproximateMemoryUsage() const {
+    size_t usage = sizeof(struct UncompressionDict);
+    usage += dict_.size();
+    if (allocation_) {
+      auto allocator = allocation_.get_deleter().allocator;
+      if (allocator) {
+        usage += allocator->UsableSize(allocation_.get(), slice_.size());
+      } else {
+        usage += slice_.size();
+      }
+    }
+#ifdef ROCKSDB_ZSTD_DDICT
+    usage += ZSTD_sizeof_DDict(zstd_ddict_);
+#endif  // ROCKSDB_ZSTD_DDICT
+    return usage;
+  }
+
+  UncompressionDict() = default;
+  // Disable copy
+  UncompressionDict(const CompressionDict&) = delete;
+  UncompressionDict& operator=(const CompressionDict&) = delete;
+};
+
+class CompressionContext {
+ private:
+#if defined(ZSTD) && (ZSTD_VERSION_NUMBER >= 500)
+  ZSTD_CCtx* zstd_ctx_ = nullptr;
+  void CreateNativeContext(CompressionType type) {
+    if (type == kZSTD || type == kZSTDNotFinalCompression) {
+#ifdef ROCKSDB_ZSTD_CUSTOM_MEM
+      zstd_ctx_ =
+          ZSTD_createCCtx_advanced(port::GetJeZstdAllocationOverrides());
+#else   // ROCKSDB_ZSTD_CUSTOM_MEM
+      zstd_ctx_ = ZSTD_createCCtx();
+#endif  // ROCKSDB_ZSTD_CUSTOM_MEM
+    }
+  }
+  void DestroyNativeContext() {
+    if (zstd_ctx_ != nullptr) {
+      ZSTD_freeCCtx(zstd_ctx_);
+    }
+  }
+
+ public:
+  // callable inside ZSTD_Compress
+  ZSTD_CCtx* ZSTDPreallocCtx() const {
+    assert(zstd_ctx_ != nullptr);
+    return zstd_ctx_;
+  }
+
+#else   // ZSTD && (ZSTD_VERSION_NUMBER >= 500)
+ private:
+  void CreateNativeContext(CompressionType /* type */) {}
+  void DestroyNativeContext() {}
+#endif  // ZSTD && (ZSTD_VERSION_NUMBER >= 500)
+ public:
+  explicit CompressionContext(CompressionType type) {
+    CreateNativeContext(type);
+  }
+  ~CompressionContext() { DestroyNativeContext(); }
+  CompressionContext(const CompressionContext&) = delete;
+  CompressionContext& operator=(const CompressionContext&) = delete;
+};
+
+class CompressionInfo {
+  const CompressionOptions& opts_;
+  const CompressionContext& context_;
+  const CompressionDict& dict_;
+  const CompressionType type_;
+  const uint64_t sample_for_compression_;
+
+ public:
+  CompressionInfo(const CompressionOptions& _opts,
+                  const CompressionContext& _context,
+                  const CompressionDict& _dict, CompressionType _type,
+                  uint64_t _sample_for_compression)
+      : opts_(_opts),
+        context_(_context),
+        dict_(_dict),
+        type_(_type),
+        sample_for_compression_(_sample_for_compression) {}
+
+  const CompressionOptions& options() const { return opts_; }
+  const CompressionContext& context() const { return context_; }
+  const CompressionDict& dict() const { return dict_; }
+  CompressionType type() const { return type_; }
+  uint64_t SampleForCompression() const { return sample_for_compression_; }
+};
+
+class UncompressionContext {
+ private:
+  CompressionContextCache* ctx_cache_ = nullptr;
+  ZSTDUncompressCachedData uncomp_cached_data_;
+
+ public:
+  explicit UncompressionContext(CompressionType type) {
+    if (type == kZSTD || type == kZSTDNotFinalCompression) {
+      ctx_cache_ = CompressionContextCache::Instance();
+      uncomp_cached_data_ = ctx_cache_->GetCachedZSTDUncompressData();
+    }
+  }
+  ~UncompressionContext() {
+    if (uncomp_cached_data_.GetCacheIndex() != -1) {
+      assert(ctx_cache_ != nullptr);
+      ctx_cache_->ReturnCachedZSTDUncompressData(
+          uncomp_cached_data_.GetCacheIndex());
+    }
+  }
+  UncompressionContext(const UncompressionContext&) = delete;
+  UncompressionContext& operator=(const UncompressionContext&) = delete;
+
+  ZSTDUncompressCachedData::ZSTDNativeContext GetZSTDContext() const {
+    return uncomp_cached_data_.Get();
+  }
+};
+
+class UncompressionInfo {
+  const UncompressionContext& context_;
+  const UncompressionDict& dict_;
+  const CompressionType type_;
+
+ public:
+  UncompressionInfo(const UncompressionContext& _context,
+                    const UncompressionDict& _dict, CompressionType _type)
+      : context_(_context), dict_(_dict), type_(_type) {}
+
+  const UncompressionContext& context() const { return context_; }
+  const UncompressionDict& dict() const { return dict_; }
+  CompressionType type() const { return type_; }
+};
+
+inline bool Snappy_Supported() {
+#ifdef SNAPPY
+  return true;
+#else
+  return false;
+#endif
+}
+
+inline bool Zlib_Supported() {
+#ifdef ZLIB
+  return true;
+#else
+  return false;
+#endif
+}
+
+inline bool BZip2_Supported() {
+#ifdef BZIP2
+  return true;
+#else
+  return false;
+#endif
+}
+
+inline bool LZ4_Supported() {
+#ifdef LZ4
+  return true;
+#else
+  return false;
+#endif
+}
+
+inline bool XPRESS_Supported() {
+#ifdef XPRESS
+  return true;
+#else
+  return false;
+#endif
+}
+
+inline bool ZSTD_Supported() {
+#ifdef ZSTD
+  // ZSTD format is finalized since version 0.8.0.
+  return (ZSTD_versionNumber() >= 800);
+#else
+  return false;
+#endif
+}
+
+inline bool ZSTDNotFinal_Supported() {
+#ifdef ZSTD
+  return true;
+#else
+  return false;
+#endif
+}
+
+inline bool ZSTD_Streaming_Supported() {
+#if defined(ZSTD) && defined(ZSTD_STREAMING)
+  return true;
+#else
+  return false;
+#endif
+}
+
+inline bool StreamingCompressionTypeSupported(
+    CompressionType compression_type) {
+  switch (compression_type) {
+    case kNoCompression:
+      return true;
+    case kZSTD:
+      return ZSTD_Streaming_Supported();
+    default:
+      return false;
+  }
+}
+
+inline bool CompressionTypeSupported(CompressionType compression_type) {
+  switch (compression_type) {
+    case kNoCompression:
+      return true;
+    case kSnappyCompression:
+      return Snappy_Supported();
+    case kZlibCompression:
+      return Zlib_Supported();
+    case kBZip2Compression:
+      return BZip2_Supported();
+    case kLZ4Compression:
+      return LZ4_Supported();
+    case kLZ4HCCompression:
+      return LZ4_Supported();
+    case kXpressCompression:
+      return XPRESS_Supported();
+    case kZSTDNotFinalCompression:
+      return ZSTDNotFinal_Supported();
+    case kZSTD:
+      return ZSTD_Supported();
+    default:
+      assert(false);
+      return false;
+  }
+}
+
+inline bool DictCompressionTypeSupported(CompressionType compression_type) {
+  switch (compression_type) {
+    case kNoCompression:
+      return false;
+    case kSnappyCompression:
+      return false;
+    case kZlibCompression:
+      return Zlib_Supported();
+    case kBZip2Compression:
+      return false;
+    case kLZ4Compression:
+    case kLZ4HCCompression:
+#if LZ4_VERSION_NUMBER >= 10400  // r124+
+      return LZ4_Supported();
+#else
+      return false;
+#endif
+    case kXpressCompression:
+      return false;
+    case kZSTDNotFinalCompression:
+#if ZSTD_VERSION_NUMBER >= 500  // v0.5.0+
+      return ZSTDNotFinal_Supported();
+#else
+      return false;
+#endif
+    case kZSTD:
+#if ZSTD_VERSION_NUMBER >= 500  // v0.5.0+
+      return ZSTD_Supported();
+#else
+      return false;
+#endif
+    default:
+      assert(false);
+      return false;
+  }
+}
+
+inline std::string CompressionTypeToString(CompressionType compression_type) {
+  switch (compression_type) {
+    case kNoCompression:
+      return "NoCompression";
+    case kSnappyCompression:
+      return "Snappy";
+    case kZlibCompression:
+      return "Zlib";
+    case kBZip2Compression:
+      return "BZip2";
+    case kLZ4Compression:
+      return "LZ4";
+    case kLZ4HCCompression:
+      return "LZ4HC";
+    case kXpressCompression:
+      return "Xpress";
+    case kZSTD:
+      return "ZSTD";
+    case kZSTDNotFinalCompression:
+      return "ZSTDNotFinal";
+    case kDisableCompressionOption:
+      return "DisableOption";
+    default:
+      assert(false);
+      return "";
+  }
+}
+
+inline std::string CompressionOptionsToString(
+    CompressionOptions& compression_options) {
+  std::string result;
+  result.reserve(512);
+  result.append("window_bits=")
+      .append(std::to_string(compression_options.window_bits))
+      .append("; ");
+  result.append("level=")
+      .append(std::to_string(compression_options.level))
+      .append("; ");
+  result.append("strategy=")
+      .append(std::to_string(compression_options.strategy))
+      .append("; ");
+  result.append("max_dict_bytes=")
+      .append(std::to_string(compression_options.max_dict_bytes))
+      .append("; ");
+  result.append("zstd_max_train_bytes=")
+      .append(std::to_string(compression_options.zstd_max_train_bytes))
+      .append("; ");
+  result.append("enabled=")
+      .append(std::to_string(compression_options.enabled))
+      .append("; ");
+  result.append("max_dict_buffer_bytes=")
+      .append(std::to_string(compression_options.max_dict_buffer_bytes))
+      .append("; ");
+  result.append("use_zstd_dict_trainer=")
+      .append(std::to_string(compression_options.use_zstd_dict_trainer))
+      .append("; ");
+  return result;
+}
+
+// compress_format_version can have two values:
+// 1 -- decompressed sizes for BZip2 and Zlib are not included in the compressed
+// block. Also, decompressed sizes for LZ4 are encoded in platform-dependent
+// way.
+// 2 -- Zlib, BZip2 and LZ4 encode decompressed size as Varint32 just before the
+// start of compressed block. Snappy format is the same as version 1.
+
+inline bool Snappy_Compress(const CompressionInfo& /*info*/, const char* input,
+                            size_t length, ::std::string* output) {
+#ifdef SNAPPY
+  output->resize(snappy::MaxCompressedLength(length));
+  size_t outlen;
+  snappy::RawCompress(input, length, &(*output)[0], &outlen);
+  output->resize(outlen);
+  return true;
+#else
+  (void)input;
+  (void)length;
+  (void)output;
+  return false;
+#endif
+}
+
+inline CacheAllocationPtr Snappy_Uncompress(
+    const char* input, size_t length, size_t* uncompressed_size,
+    MemoryAllocator* allocator = nullptr) {
+#ifdef SNAPPY
+  size_t uncompressed_length = 0;
+  if (!snappy::GetUncompressedLength(input, length, &uncompressed_length)) {
+    return nullptr;
+  }
+
+  CacheAllocationPtr output = AllocateBlock(uncompressed_length, allocator);
+
+  if (!snappy::RawUncompress(input, length, output.get())) {
+    return nullptr;
+  }
+
+  *uncompressed_size = uncompressed_length;
+
+  return output;
+#else
+  (void)input;
+  (void)length;
+  (void)uncompressed_size;
+  (void)allocator;
+  return nullptr;
+#endif
+}
+
+namespace compression {
+// returns size
+inline size_t PutDecompressedSizeInfo(std::string* output, uint32_t length) {
+  PutVarint32(output, length);
+  return output->size();
+}
+
+inline bool GetDecompressedSizeInfo(const char** input_data,
+                                    size_t* input_length,
+                                    uint32_t* output_len) {
+  auto new_input_data =
+      GetVarint32Ptr(*input_data, *input_data + *input_length, output_len);
+  if (new_input_data == nullptr) {
+    return false;
+  }
+  *input_length -= (new_input_data - *input_data);
+  *input_data = new_input_data;
+  return true;
+}
+}  // namespace compression
+
+// compress_format_version == 1 -- decompressed size is not included in the
+// block header
+// compress_format_version == 2 -- decompressed size is included in the block
+// header in varint32 format
+// @param compression_dict Data for presetting the compression library's
+//    dictionary.
+inline bool Zlib_Compress(const CompressionInfo& info,
+                          uint32_t compress_format_version, const char* input,
+                          size_t length, ::std::string* output) {
+#ifdef ZLIB
+  if (length > std::numeric_limits<uint32_t>::max()) {
+    // Can't compress more than 4GB
+    return false;
+  }
+
+  size_t output_header_len = 0;
+  if (compress_format_version == 2) {
+    output_header_len = compression::PutDecompressedSizeInfo(
+        output, static_cast<uint32_t>(length));
+  }
+
+  // The memLevel parameter specifies how much memory should be allocated for
+  // the internal compression state.
+  // memLevel=1 uses minimum memory but is slow and reduces compression ratio.
+  // memLevel=9 uses maximum memory for optimal speed.
+  // The default value is 8. See zconf.h for more details.
+  static const int memLevel = 8;
+  int level;
+  if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
+    level = Z_DEFAULT_COMPRESSION;
+  } else {
+    level = info.options().level;
+  }
+  z_stream _stream;
+  memset(&_stream, 0, sizeof(z_stream));
+  int st = deflateInit2(&_stream, level, Z_DEFLATED, info.options().window_bits,
+                        memLevel, info.options().strategy);
+  if (st != Z_OK) {
+    return false;
+  }
+
+  Slice compression_dict = info.dict().GetRawDict();
+  if (compression_dict.size()) {
+    // Initialize the compression library's dictionary
+    st = deflateSetDictionary(
+        &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()),
+        static_cast<unsigned int>(compression_dict.size()));
+    if (st != Z_OK) {
+      deflateEnd(&_stream);
+      return false;
+    }
+  }
+
+  // Get an upper bound on the compressed size.
+  size_t upper_bound =
+      deflateBound(&_stream, static_cast<unsigned long>(length));
+  output->resize(output_header_len + upper_bound);
+
+  // Compress the input, and put compressed data in output.
+  _stream.next_in = (Bytef*)input;
+  _stream.avail_in = static_cast<unsigned int>(length);
+
+  // Initialize the output size.
+  _stream.avail_out = static_cast<unsigned int>(upper_bound);
+  _stream.next_out = reinterpret_cast<Bytef*>(&(*output)[output_header_len]);
+
+  bool compressed = false;
+  st = deflate(&_stream, Z_FINISH);
+  if (st == Z_STREAM_END) {
+    compressed = true;
+    output->resize(output->size() - _stream.avail_out);
+  }
+  // The only return value we really care about is Z_STREAM_END.
+  // Z_OK means insufficient output space. This means the compression is
+  // bigger than decompressed size. Just fail the compression in that case.
+
+  deflateEnd(&_stream);
+  return compressed;
+#else
+  (void)info;
+  (void)compress_format_version;
+  (void)input;
+  (void)length;
+  (void)output;
+  return false;
+#endif
+}
+
+// compress_format_version == 1 -- decompressed size is not included in the
+// block header
+// compress_format_version == 2 -- decompressed size is included in the block
+// header in varint32 format
+// @param compression_dict Data for presetting the compression library's
+//    dictionary.
+inline CacheAllocationPtr Zlib_Uncompress(
+    const UncompressionInfo& info, const char* input_data, size_t input_length,
+    size_t* uncompressed_size, uint32_t compress_format_version,
+    MemoryAllocator* allocator = nullptr, int windowBits = -14) {
+#ifdef ZLIB
+  uint32_t output_len = 0;
+  if (compress_format_version == 2) {
+    if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
+                                              &output_len)) {
+      return nullptr;
+    }
+  } else {
+    // Assume the decompressed data size will 5x of compressed size, but round
+    // to the page size
+    size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096;
+    output_len = static_cast<uint32_t>(
+        std::min(proposed_output_len,
+                 static_cast<size_t>(std::numeric_limits<uint32_t>::max())));
+  }
+
+  z_stream _stream;
+  memset(&_stream, 0, sizeof(z_stream));
+
+  // For raw inflate, the windowBits should be -8..-15.
+  // If windowBits is bigger than zero, it will use either zlib
+  // header or gzip header. Adding 32 to it will do automatic detection.
+  int st =
+      inflateInit2(&_stream, windowBits > 0 ? windowBits + 32 : windowBits);
+  if (st != Z_OK) {
+    return nullptr;
+  }
+
+  const Slice& compression_dict = info.dict().GetRawDict();
+  if (compression_dict.size()) {
+    // Initialize the compression library's dictionary
+    st = inflateSetDictionary(
+        &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()),
+        static_cast<unsigned int>(compression_dict.size()));
+    if (st != Z_OK) {
+      return nullptr;
+    }
+  }
+
+  _stream.next_in = (Bytef*)input_data;
+  _stream.avail_in = static_cast<unsigned int>(input_length);
+
+  auto output = AllocateBlock(output_len, allocator);
+
+  _stream.next_out = (Bytef*)output.get();
+  _stream.avail_out = static_cast<unsigned int>(output_len);
+
+  bool done = false;
+  while (!done) {
+    st = inflate(&_stream, Z_SYNC_FLUSH);
+    switch (st) {
+      case Z_STREAM_END:
+        done = true;
+        break;
+      case Z_OK: {
+        // No output space. Increase the output space by 20%.
+        // We should never run out of output space if
+        // compress_format_version == 2
+        assert(compress_format_version != 2);
+        size_t old_sz = output_len;
+        uint32_t output_len_delta = output_len / 5;
+        output_len += output_len_delta < 10 ? 10 : output_len_delta;
+        auto tmp = AllocateBlock(output_len, allocator);
+        memcpy(tmp.get(), output.get(), old_sz);
+        output = std::move(tmp);
+
+        // Set more output.
+        _stream.next_out = (Bytef*)(output.get() + old_sz);
+        _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
+        break;
+      }
+      case Z_BUF_ERROR:
+      default:
+        inflateEnd(&_stream);
+        return nullptr;
+    }
+  }
+
+  // If we encoded decompressed block size, we should have no bytes left
+  assert(compress_format_version != 2 || _stream.avail_out == 0);
+  assert(output_len >= _stream.avail_out);
+  *uncompressed_size = output_len - _stream.avail_out;
+  inflateEnd(&_stream);
+  return output;
+#else
+  (void)info;
+  (void)input_data;
+  (void)input_length;
+  (void)uncompressed_size;
+  (void)compress_format_version;
+  (void)allocator;
+  (void)windowBits;
+  return nullptr;
+#endif
+}
+
+// compress_format_version == 1 -- decompressed size is not included in the
+// block header
+// compress_format_version == 2 -- decompressed size is included in the block
+// header in varint32 format
+inline bool BZip2_Compress(const CompressionInfo& /*info*/,
+                           uint32_t compress_format_version, const char* input,
+                           size_t length, ::std::string* output) {
+#ifdef BZIP2
+  if (length > std::numeric_limits<uint32_t>::max()) {
+    // Can't compress more than 4GB
+    return false;
+  }
+  size_t output_header_len = 0;
+  if (compress_format_version == 2) {
+    output_header_len = compression::PutDecompressedSizeInfo(
+        output, static_cast<uint32_t>(length));
+  }
+  // Resize output to be the plain data length.
+  // This may not be big enough if the compression actually expands data.
+  output->resize(output_header_len + length);
+
+  bz_stream _stream;
+  memset(&_stream, 0, sizeof(bz_stream));
+
+  // Block size 1 is 100K.
+  // 0 is for silent.
+  // 30 is the default workFactor
+  int st = BZ2_bzCompressInit(&_stream, 1, 0, 30);
+  if (st != BZ_OK) {
+    return false;
+  }
+
+  // Compress the input, and put compressed data in output.
+  _stream.next_in = (char*)input;
+  _stream.avail_in = static_cast<unsigned int>(length);
+
+  // Initialize the output size.
+  _stream.avail_out = static_cast<unsigned int>(length);
+  _stream.next_out = reinterpret_cast<char*>(&(*output)[output_header_len]);
+
+  bool compressed = false;
+  st = BZ2_bzCompress(&_stream, BZ_FINISH);
+  if (st == BZ_STREAM_END) {
+    compressed = true;
+    output->resize(output->size() - _stream.avail_out);
+  }
+  // The only return value we really care about is BZ_STREAM_END.
+  // BZ_FINISH_OK means insufficient output space. This means the compression
+  // is bigger than decompressed size. Just fail the compression in that case.
+
+  BZ2_bzCompressEnd(&_stream);
+  return compressed;
+#else
+  (void)compress_format_version;
+  (void)input;
+  (void)length;
+  (void)output;
+  return false;
+#endif
+}
+
+// compress_format_version == 1 -- decompressed size is not included in the
+// block header
+// compress_format_version == 2 -- decompressed size is included in the block
+// header in varint32 format
+inline CacheAllocationPtr BZip2_Uncompress(
+    const char* input_data, size_t input_length, size_t* uncompressed_size,
+    uint32_t compress_format_version, MemoryAllocator* allocator = nullptr) {
+#ifdef BZIP2
+  uint32_t output_len = 0;
+  if (compress_format_version == 2) {
+    if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
+                                              &output_len)) {
+      return nullptr;
+    }
+  } else {
+    // Assume the decompressed data size will 5x of compressed size, but round
+    // to the next page size
+    size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096;
+    output_len = static_cast<uint32_t>(
+        std::min(proposed_output_len,
+                 static_cast<size_t>(std::numeric_limits<uint32_t>::max())));
+  }
+
+  bz_stream _stream;
+  memset(&_stream, 0, sizeof(bz_stream));
+
+  int st = BZ2_bzDecompressInit(&_stream, 0, 0);
+  if (st != BZ_OK) {
+    return nullptr;
+  }
+
+  _stream.next_in = (char*)input_data;
+  _stream.avail_in = static_cast<unsigned int>(input_length);
+
+  auto output = AllocateBlock(output_len, allocator);
+
+  _stream.next_out = (char*)output.get();
+  _stream.avail_out = static_cast<unsigned int>(output_len);
+
+  bool done = false;
+  while (!done) {
+    st = BZ2_bzDecompress(&_stream);
+    switch (st) {
+      case BZ_STREAM_END:
+        done = true;
+        break;
+      case BZ_OK: {
+        // No output space. Increase the output space by 20%.
+        // We should never run out of output space if
+        // compress_format_version == 2
+        assert(compress_format_version != 2);
+        uint32_t old_sz = output_len;
+        output_len = output_len * 1.2;
+        auto tmp = AllocateBlock(output_len, allocator);
+        memcpy(tmp.get(), output.get(), old_sz);
+        output = std::move(tmp);
+
+        // Set more output.
+        _stream.next_out = (char*)(output.get() + old_sz);
+        _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
+        break;
+      }
+      default:
+        BZ2_bzDecompressEnd(&_stream);
+        return nullptr;
+    }
+  }
+
+  // If we encoded decompressed block size, we should have no bytes left
+  assert(compress_format_version != 2 || _stream.avail_out == 0);
+  assert(output_len >= _stream.avail_out);
+  *uncompressed_size = output_len - _stream.avail_out;
+  BZ2_bzDecompressEnd(&_stream);
+  return output;
+#else
+  (void)input_data;
+  (void)input_length;
+  (void)uncompressed_size;
+  (void)compress_format_version;
+  (void)allocator;
+  return nullptr;
+#endif
+}
+
+// compress_format_version == 1 -- decompressed size is included in the
+// block header using memcpy, which makes database non-portable)
+// compress_format_version == 2 -- decompressed size is included in the block
+// header in varint32 format
+// @param compression_dict Data for presetting the compression library's
+//    dictionary.
+inline bool LZ4_Compress(const CompressionInfo& info,
+                         uint32_t compress_format_version, const char* input,
+                         size_t length, ::std::string* output) {
+#ifdef LZ4
+  if (length > std::numeric_limits<uint32_t>::max()) {
+    // Can't compress more than 4GB
+    return false;
+  }
+
+  size_t output_header_len = 0;
+  if (compress_format_version == 2) {
+    // new encoding, using varint32 to store size information
+    output_header_len = compression::PutDecompressedSizeInfo(
+        output, static_cast<uint32_t>(length));
+  } else {
+    // legacy encoding, which is not really portable (depends on big/little
+    // endianness)
+    output_header_len = 8;
+    output->resize(output_header_len);
+    char* p = const_cast<char*>(output->c_str());
+    memcpy(p, &length, sizeof(length));
+  }
+  int compress_bound = LZ4_compressBound(static_cast<int>(length));
+  output->resize(static_cast<size_t>(output_header_len + compress_bound));
+
+  int outlen;
+#if LZ4_VERSION_NUMBER >= 10400  // r124+
+  LZ4_stream_t* stream = LZ4_createStream();
+  Slice compression_dict = info.dict().GetRawDict();
+  if (compression_dict.size()) {
+    LZ4_loadDict(stream, compression_dict.data(),
+                 static_cast<int>(compression_dict.size()));
+  }
+#if LZ4_VERSION_NUMBER >= 10700  // r129+
+  outlen =
+      LZ4_compress_fast_continue(stream, input, &(*output)[output_header_len],
+                                 static_cast<int>(length), compress_bound, 1);
+#else  // up to r128
+  outlen = LZ4_compress_limitedOutput_continue(
+      stream, input, &(*output)[output_header_len], static_cast<int>(length),
+      compress_bound);
+#endif
+  LZ4_freeStream(stream);
+#else   // up to r123
+  outlen = LZ4_compress_limitedOutput(input, &(*output)[output_header_len],
+                                      static_cast<int>(length), compress_bound);
+#endif  // LZ4_VERSION_NUMBER >= 10400
+
+  if (outlen == 0) {
+    return false;
+  }
+  output->resize(static_cast<size_t>(output_header_len + outlen));
+  return true;
+#else  // LZ4
+  (void)info;
+  (void)compress_format_version;
+  (void)input;
+  (void)length;
+  (void)output;
+  return false;
+#endif
+}
+
+// compress_format_version == 1 -- decompressed size is included in the
+// block header using memcpy, which makes database non-portable)
+// compress_format_version == 2 -- decompressed size is included in the block
+// header in varint32 format
+// @param compression_dict Data for presetting the compression library's
+//    dictionary.
+inline CacheAllocationPtr LZ4_Uncompress(const UncompressionInfo& info,
+                                         const char* input_data,
+                                         size_t input_length,
+                                         size_t* uncompressed_size,
+                                         uint32_t compress_format_version,
+                                         MemoryAllocator* allocator = nullptr) {
+#ifdef LZ4
+  uint32_t output_len = 0;
+  if (compress_format_version == 2) {
+    // new encoding, using varint32 to store size information
+    if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
+                                              &output_len)) {
+      return nullptr;
+    }
+  } else {
+    // legacy encoding, which is not really portable (depends on big/little
+    // endianness)
+    if (input_length < 8) {
+      return nullptr;
+    }
+    if (port::kLittleEndian) {
+      memcpy(&output_len, input_data, sizeof(output_len));
+    } else {
+      memcpy(&output_len, input_data + 4, sizeof(output_len));
+    }
+    input_length -= 8;
+    input_data += 8;
+  }
+
+  auto output = AllocateBlock(output_len, allocator);
+
+  int decompress_bytes = 0;
+
+#if LZ4_VERSION_NUMBER >= 10400  // r124+
+  LZ4_streamDecode_t* stream = LZ4_createStreamDecode();
+  const Slice& compression_dict = info.dict().GetRawDict();
+  if (compression_dict.size()) {
+    LZ4_setStreamDecode(stream, compression_dict.data(),
+                        static_cast<int>(compression_dict.size()));
+  }
+  decompress_bytes = LZ4_decompress_safe_continue(
+      stream, input_data, output.get(), static_cast<int>(input_length),
+      static_cast<int>(output_len));
+  LZ4_freeStreamDecode(stream);
+#else   // up to r123
+  decompress_bytes = LZ4_decompress_safe(input_data, output.get(),
+                                         static_cast<int>(input_length),
+                                         static_cast<int>(output_len));
+#endif  // LZ4_VERSION_NUMBER >= 10400
+
+  if (decompress_bytes < 0) {
+    return nullptr;
+  }
+  assert(decompress_bytes == static_cast<int>(output_len));
+  *uncompressed_size = decompress_bytes;
+  return output;
+#else  // LZ4
+  (void)info;
+  (void)input_data;
+  (void)input_length;
+  (void)uncompressed_size;
+  (void)compress_format_version;
+  (void)allocator;
+  return nullptr;
+#endif
+}
+
+// compress_format_version == 1 -- decompressed size is included in the
+// block header using memcpy, which makes database non-portable)
+// compress_format_version == 2 -- decompressed size is included in the block
+// header in varint32 format
+// @param compression_dict Data for presetting the compression library's
+//    dictionary.
+inline bool LZ4HC_Compress(const CompressionInfo& info,
+                           uint32_t compress_format_version, const char* input,
+                           size_t length, ::std::string* output) {
+#ifdef LZ4
+  if (length > std::numeric_limits<uint32_t>::max()) {
+    // Can't compress more than 4GB
+    return false;
+  }
+
+  size_t output_header_len = 0;
+  if (compress_format_version == 2) {
+    // new encoding, using varint32 to store size information
+    output_header_len = compression::PutDecompressedSizeInfo(
+        output, static_cast<uint32_t>(length));
+  } else {
+    // legacy encoding, which is not really portable (depends on big/little
+    // endianness)
+    output_header_len = 8;
+    output->resize(output_header_len);
+    char* p = const_cast<char*>(output->c_str());
+    memcpy(p, &length, sizeof(length));
+  }
+  int compress_bound = LZ4_compressBound(static_cast<int>(length));
+  output->resize(static_cast<size_t>(output_header_len + compress_bound));
+
+  int outlen;
+  int level;
+  if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
+    level = 0;  // lz4hc.h says any value < 1 will be sanitized to default
+  } else {
+    level = info.options().level;
+  }
+#if LZ4_VERSION_NUMBER >= 10400  // r124+
+  LZ4_streamHC_t* stream = LZ4_createStreamHC();
+  LZ4_resetStreamHC(stream, level);
+  Slice compression_dict = info.dict().GetRawDict();
+  const char* compression_dict_data =
+      compression_dict.size() > 0 ? compression_dict.data() : nullptr;
+  size_t compression_dict_size = compression_dict.size();
+  if (compression_dict_data != nullptr) {
+    LZ4_loadDictHC(stream, compression_dict_data,
+                   static_cast<int>(compression_dict_size));
+  }
+
+#if LZ4_VERSION_NUMBER >= 10700  // r129+
+  outlen =
+      LZ4_compress_HC_continue(stream, input, &(*output)[output_header_len],
+                               static_cast<int>(length), compress_bound);
+#else   // r124-r128
+  outlen = LZ4_compressHC_limitedOutput_continue(
+      stream, input, &(*output)[output_header_len], static_cast<int>(length),
+      compress_bound);
+#endif  // LZ4_VERSION_NUMBER >= 10700
+  LZ4_freeStreamHC(stream);
+
+#elif LZ4_VERSION_MAJOR  // r113-r123
+  outlen = LZ4_compressHC2_limitedOutput(input, &(*output)[output_header_len],
+                                         static_cast<int>(length),
+                                         compress_bound, level);
+#else                    // up to r112
+  outlen =
+      LZ4_compressHC_limitedOutput(input, &(*output)[output_header_len],
+                                   static_cast<int>(length), compress_bound);
+#endif                   // LZ4_VERSION_NUMBER >= 10400
+
+  if (outlen == 0) {
+    return false;
+  }
+  output->resize(static_cast<size_t>(output_header_len + outlen));
+  return true;
+#else  // LZ4
+  (void)info;
+  (void)compress_format_version;
+  (void)input;
+  (void)length;
+  (void)output;
+  return false;
+#endif
+}
+
+#ifdef XPRESS
+inline bool XPRESS_Compress(const char* input, size_t length,
+                            std::string* output) {
+  return port::xpress::Compress(input, length, output);
+}
+#else
+inline bool XPRESS_Compress(const char* /*input*/, size_t /*length*/,
+                            std::string* /*output*/) {
+  return false;
+}
+#endif
+
+#ifdef XPRESS
+inline char* XPRESS_Uncompress(const char* input_data, size_t input_length,
+                               size_t* uncompressed_size) {
+  return port::xpress::Decompress(input_data, input_length, uncompressed_size);
+}
+#else
+inline char* XPRESS_Uncompress(const char* /*input_data*/,
+                               size_t /*input_length*/,
+                               size_t* /*uncompressed_size*/) {
+  return nullptr;
+}
+#endif
+
+inline bool ZSTD_Compress(const CompressionInfo& info, const char* input,
+                          size_t length, ::std::string* output) {
+#ifdef ZSTD
+  if (length > std::numeric_limits<uint32_t>::max()) {
+    // Can't compress more than 4GB
+    return false;
+  }
+
+  size_t output_header_len = compression::PutDecompressedSizeInfo(
+      output, static_cast<uint32_t>(length));
+
+  size_t compressBound = ZSTD_compressBound(length);
+  output->resize(static_cast<size_t>(output_header_len + compressBound));
+  size_t outlen = 0;
+  int level;
+  if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
+    // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
+    // https://github.com/facebook/zstd/issues/1148
+    level = 3;
+  } else {
+    level = info.options().level;
+  }
+#if ZSTD_VERSION_NUMBER >= 500  // v0.5.0+
+  ZSTD_CCtx* context = info.context().ZSTDPreallocCtx();
+  assert(context != nullptr);
+#if ZSTD_VERSION_NUMBER >= 700  // v0.7.0+
+  if (info.dict().GetDigestedZstdCDict() != nullptr) {
+    outlen = ZSTD_compress_usingCDict(context, &(*output)[output_header_len],
+                                      compressBound, input, length,
+                                      info.dict().GetDigestedZstdCDict());
+  }
+#endif  // ZSTD_VERSION_NUMBER >= 700
+  if (outlen == 0) {
+    outlen = ZSTD_compress_usingDict(context, &(*output)[output_header_len],
+                                     compressBound, input, length,
+                                     info.dict().GetRawDict().data(),
+                                     info.dict().GetRawDict().size(), level);
+  }
+#else   // up to v0.4.x
+  outlen = ZSTD_compress(&(*output)[output_header_len], compressBound, input,
+                         length, level);
+#endif  // ZSTD_VERSION_NUMBER >= 500
+  if (outlen == 0) {
+    return false;
+  }
+  output->resize(output_header_len + outlen);
+  return true;
+#else  // ZSTD
+  (void)info;
+  (void)input;
+  (void)length;
+  (void)output;
+  return false;
+#endif
+}
+
+// @param compression_dict Data for presetting the compression library's
+//    dictionary.
+inline CacheAllocationPtr ZSTD_Uncompress(
+    const UncompressionInfo& info, const char* input_data, size_t input_length,
+    size_t* uncompressed_size, MemoryAllocator* allocator = nullptr) {
+#ifdef ZSTD
+  uint32_t output_len = 0;
+  if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
+                                            &output_len)) {
+    return nullptr;
+  }
+
+  auto output = AllocateBlock(output_len, allocator);
+  size_t actual_output_length = 0;
+#if ZSTD_VERSION_NUMBER >= 500  // v0.5.0+
+  ZSTD_DCtx* context = info.context().GetZSTDContext();
+  assert(context != nullptr);
+#ifdef ROCKSDB_ZSTD_DDICT
+  if (info.dict().GetDigestedZstdDDict() != nullptr) {
+    actual_output_length = ZSTD_decompress_usingDDict(
+        context, output.get(), output_len, input_data, input_length,
+        info.dict().GetDigestedZstdDDict());
+  }
+#endif  // ROCKSDB_ZSTD_DDICT
+  if (actual_output_length == 0) {
+    actual_output_length = ZSTD_decompress_usingDict(
+        context, output.get(), output_len, input_data, input_length,
+        info.dict().GetRawDict().data(), info.dict().GetRawDict().size());
+  }
+#else   // up to v0.4.x
+  (void)info;
+  actual_output_length =
+      ZSTD_decompress(output.get(), output_len, input_data, input_length);
+#endif  // ZSTD_VERSION_NUMBER >= 500
+  assert(actual_output_length == output_len);
+  *uncompressed_size = actual_output_length;
+  return output;
+#else  // ZSTD
+  (void)info;
+  (void)input_data;
+  (void)input_length;
+  (void)uncompressed_size;
+  (void)allocator;
+  return nullptr;
+#endif
+}
+
+inline bool ZSTD_TrainDictionarySupported() {
+#ifdef ZSTD
+  // Dictionary trainer is available since v0.6.1 for static linking, but not
+  // available for dynamic linking until v1.1.3. For now we enable the feature
+  // in v1.1.3+ only.
+  return (ZSTD_versionNumber() >= 10103);
+#else
+  return false;
+#endif
+}
+
+inline std::string ZSTD_TrainDictionary(const std::string& samples,
+                                        const std::vector<size_t>& sample_lens,
+                                        size_t max_dict_bytes) {
+  // Dictionary trainer is available since v0.6.1 for static linking, but not
+  // available for dynamic linking until v1.1.3. For now we enable the feature
+  // in v1.1.3+ only.
+#if ZSTD_VERSION_NUMBER >= 10103  // v1.1.3+
+  assert(samples.empty() == sample_lens.empty());
+  if (samples.empty()) {
+    return "";
+  }
+  std::string dict_data(max_dict_bytes, '\0');
+  size_t dict_len = ZDICT_trainFromBuffer(
+      &dict_data[0], max_dict_bytes, &samples[0], &sample_lens[0],
+      static_cast<unsigned>(sample_lens.size()));
+  if (ZDICT_isError(dict_len)) {
+    return "";
+  }
+  assert(dict_len <= max_dict_bytes);
+  dict_data.resize(dict_len);
+  return dict_data;
+#else   // up to v1.1.2
+  assert(false);
+  (void)samples;
+  (void)sample_lens;
+  (void)max_dict_bytes;
+  return "";
+#endif  // ZSTD_VERSION_NUMBER >= 10103
+}
+
+inline std::string ZSTD_TrainDictionary(const std::string& samples,
+                                        size_t sample_len_shift,
+                                        size_t max_dict_bytes) {
+  // Dictionary trainer is available since v0.6.1, but ZSTD was marked stable
+  // only since v0.8.0. For now we enable the feature in stable versions only.
+#if ZSTD_VERSION_NUMBER >= 10103  // v1.1.3+
+  // skips potential partial sample at the end of "samples"
+  size_t num_samples = samples.size() >> sample_len_shift;
+  std::vector<size_t> sample_lens(num_samples, size_t(1) << sample_len_shift);
+  return ZSTD_TrainDictionary(samples, sample_lens, max_dict_bytes);
+#else   // up to v1.1.2
+  assert(false);
+  (void)samples;
+  (void)sample_len_shift;
+  (void)max_dict_bytes;
+  return "";
+#endif  // ZSTD_VERSION_NUMBER >= 10103
+}
+
+inline bool ZSTD_FinalizeDictionarySupported() {
+#ifdef ZSTD
+  // ZDICT_finalizeDictionary API is stable since v1.4.5
+  return (ZSTD_versionNumber() >= 10405);
+#else
+  return false;
+#endif
+}
+
+inline std::string ZSTD_FinalizeDictionary(
+    const std::string& samples, const std::vector<size_t>& sample_lens,
+    size_t max_dict_bytes, int level) {
+  // ZDICT_finalizeDictionary is stable since version v1.4.5
+#if ZSTD_VERSION_NUMBER >= 10405  // v1.4.5+
+  assert(samples.empty() == sample_lens.empty());
+  if (samples.empty()) {
+    return "";
+  }
+  if (level == CompressionOptions::kDefaultCompressionLevel) {
+    // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
+    // https://github.com/facebook/zstd/issues/1148
+    level = 3;
+  }
+  std::string dict_data(max_dict_bytes, '\0');
+  size_t dict_len = ZDICT_finalizeDictionary(
+      dict_data.data(), max_dict_bytes, samples.data(),
+      std::min(static_cast<size_t>(samples.size()), max_dict_bytes),
+      samples.data(), sample_lens.data(),
+      static_cast<unsigned>(sample_lens.size()),
+      {level, 0 /* notificationLevel */, 0 /* dictID */});
+  if (ZDICT_isError(dict_len)) {
+    return "";
+  } else {
+    assert(dict_len <= max_dict_bytes);
+    dict_data.resize(dict_len);
+    return dict_data;
+  }
+#else   // up to v1.4.4
+  (void)samples;
+  (void)sample_lens;
+  (void)max_dict_bytes;
+  (void)level;
+  return "";
+#endif  // ZSTD_VERSION_NUMBER >= 10405
+}
+
+inline bool CompressData(const Slice& raw,
+                         const CompressionInfo& compression_info,
+                         uint32_t compress_format_version,
+                         std::string* compressed_output) {
+  bool ret = false;
+
+  // Will return compressed block contents if (1) the compression method is
+  // supported in this platform and (2) the compression rate is "good enough".
+  switch (compression_info.type()) {
+    case kSnappyCompression:
+      ret = Snappy_Compress(compression_info, raw.data(), raw.size(),
+                            compressed_output);
+      break;
+    case kZlibCompression:
+      ret = Zlib_Compress(compression_info, compress_format_version, raw.data(),
+                          raw.size(), compressed_output);
+      break;
+    case kBZip2Compression:
+      ret = BZip2_Compress(compression_info, compress_format_version,
+                           raw.data(), raw.size(), compressed_output);
+      break;
+    case kLZ4Compression:
+      ret = LZ4_Compress(compression_info, compress_format_version, raw.data(),
+                         raw.size(), compressed_output);
+      break;
+    case kLZ4HCCompression:
+      ret = LZ4HC_Compress(compression_info, compress_format_version,
+                           raw.data(), raw.size(), compressed_output);
+      break;
+    case kXpressCompression:
+      ret = XPRESS_Compress(raw.data(), raw.size(), compressed_output);
+      break;
+    case kZSTD:
+    case kZSTDNotFinalCompression:
+      ret = ZSTD_Compress(compression_info, raw.data(), raw.size(),
+                          compressed_output);
+      break;
+    default:
+      // Do not recognize this compression type
+      break;
+  }
+
+  TEST_SYNC_POINT_CALLBACK("CompressData:TamperWithReturnValue",
+                           static_cast<void*>(&ret));
+
+  return ret;
+}
+
+inline CacheAllocationPtr UncompressData(
+    const UncompressionInfo& uncompression_info, const char* data, size_t n,
+    size_t* uncompressed_size, uint32_t compress_format_version,
+    MemoryAllocator* allocator = nullptr) {
+  switch (uncompression_info.type()) {
+    case kSnappyCompression:
+      return Snappy_Uncompress(data, n, uncompressed_size, allocator);
+    case kZlibCompression:
+      return Zlib_Uncompress(uncompression_info, data, n, uncompressed_size,
+                             compress_format_version, allocator);
+    case kBZip2Compression:
+      return BZip2_Uncompress(data, n, uncompressed_size,
+                              compress_format_version, allocator);
+    case kLZ4Compression:
+    case kLZ4HCCompression:
+      return LZ4_Uncompress(uncompression_info, data, n, uncompressed_size,
+                            compress_format_version, allocator);
+    case kXpressCompression:
+      // XPRESS allocates memory internally, thus no support for custom
+      // allocator.
+      return CacheAllocationPtr(XPRESS_Uncompress(data, n, uncompressed_size));
+    case kZSTD:
+    case kZSTDNotFinalCompression:
+      return ZSTD_Uncompress(uncompression_info, data, n, uncompressed_size,
+                             allocator);
+    default:
+      return CacheAllocationPtr();
+  }
+}
+
+// Records the compression type for subsequent WAL records.
+class CompressionTypeRecord {
+ public:
+  explicit CompressionTypeRecord(CompressionType compression_type)
+      : compression_type_(compression_type) {}
+
+  CompressionType GetCompressionType() const { return compression_type_; }
+
+  inline void EncodeTo(std::string* dst) const {
+    assert(dst != nullptr);
+    PutFixed32(dst, compression_type_);
+  }
+
+  inline Status DecodeFrom(Slice* src) {
+    constexpr char class_name[] = "CompressionTypeRecord";
+
+    uint32_t val;
+    if (!GetFixed32(src, &val)) {
+      return Status::Corruption(class_name,
+                                "Error decoding WAL compression type");
+    }
+    CompressionType compression_type = static_cast<CompressionType>(val);
+    if (!StreamingCompressionTypeSupported(compression_type)) {
+      return Status::Corruption(class_name,
+                                "WAL compression type not supported");
+    }
+    compression_type_ = compression_type;
+    return Status::OK();
+  }
+
+  inline std::string DebugString() const {
+    return "compression_type: " + CompressionTypeToString(compression_type_);
+  }
+
+ private:
+  CompressionType compression_type_;
+};
+
+// Base class to implement compression for a stream of buffers.
+// Instantiate an implementation of the class using Create() with the
+// compression type and use Compress() repeatedly.
+// The output buffer needs to be at least max_output_len.
+// Call Reset() in between frame boundaries or in case of an error.
+// NOTE: This class is not thread safe.
+class StreamingCompress {
+ public:
+  StreamingCompress(CompressionType compression_type,
+                    const CompressionOptions& opts,
+                    uint32_t compress_format_version, size_t max_output_len)
+      : compression_type_(compression_type),
+        opts_(opts),
+        compress_format_version_(compress_format_version),
+        max_output_len_(max_output_len) {}
+  virtual ~StreamingCompress() = default;
+  // compress should be called repeatedly with the same input till the method
+  // returns 0
+  // Parameters:
+  // input - buffer to compress
+  // input_size - size of input buffer
+  // output - compressed buffer allocated by caller, should be at least
+  // max_output_len
+  // output_size - size of the output buffer
+  // Returns -1 for errors, the remaining size of the input buffer that needs to
+  // be compressed
+  virtual int Compress(const char* input, size_t input_size, char* output,
+                       size_t* output_pos) = 0;
+  // static method to create object of a class inherited from StreamingCompress
+  // based on the actual compression type.
+  static StreamingCompress* Create(CompressionType compression_type,
+                                   const CompressionOptions& opts,
+                                   uint32_t compress_format_version,
+                                   size_t max_output_len);
+  virtual void Reset() = 0;
+
+ protected:
+  const CompressionType compression_type_;
+  const CompressionOptions opts_;
+  const uint32_t compress_format_version_;
+  const size_t max_output_len_;
+};
+
+// Base class to uncompress a stream of compressed buffers.
+// Instantiate an implementation of the class using Create() with the
+// compression type and use Uncompress() repeatedly.
+// The output buffer needs to be at least max_output_len.
+// Call Reset() in between frame boundaries or in case of an error.
+// NOTE: This class is not thread safe.
+class StreamingUncompress {
+ public:
+  StreamingUncompress(CompressionType compression_type,
+                      uint32_t compress_format_version, size_t max_output_len)
+      : compression_type_(compression_type),
+        compress_format_version_(compress_format_version),
+        max_output_len_(max_output_len) {}
+  virtual ~StreamingUncompress() = default;
+  // Uncompress can be called repeatedly to progressively process the same
+  // input buffer, or can be called with a new input buffer. When the input
+  // buffer is not fully consumed, the return value is > 0 or output_size
+  // == max_output_len. When calling uncompress to continue processing the
+  // same input buffer, the input argument should be nullptr.
+  // Parameters:
+  // input - buffer to uncompress
+  // input_size - size of input buffer
+  // output - uncompressed buffer allocated by caller, should be at least
+  // max_output_len
+  // output_size - size of the output buffer
+  // Returns -1 for errors, remaining input to be processed otherwise.
+  virtual int Uncompress(const char* input, size_t input_size, char* output,
+                         size_t* output_pos) = 0;
+  static StreamingUncompress* Create(CompressionType compression_type,
+                                     uint32_t compress_format_version,
+                                     size_t max_output_len);
+  virtual void Reset() = 0;
+
+ protected:
+  CompressionType compression_type_;
+  uint32_t compress_format_version_;
+  size_t max_output_len_;
+};
+
+class ZSTDStreamingCompress final : public StreamingCompress {
+ public:
+  explicit ZSTDStreamingCompress(const CompressionOptions& opts,
+                                 uint32_t compress_format_version,
+                                 size_t max_output_len)
+      : StreamingCompress(kZSTD, opts, compress_format_version,
+                          max_output_len) {
+#ifdef ZSTD_STREAMING
+    cctx_ = ZSTD_createCCtx();
+    // Each compressed frame will have a checksum
+    ZSTD_CCtx_setParameter(cctx_, ZSTD_c_checksumFlag, 1);
+    assert(cctx_ != nullptr);
+    input_buffer_ = {/*src=*/nullptr, /*size=*/0, /*pos=*/0};
+#endif
+  }
+  ~ZSTDStreamingCompress() override {
+#ifdef ZSTD_STREAMING
+    ZSTD_freeCCtx(cctx_);
+#endif
+  }
+  int Compress(const char* input, size_t input_size, char* output,
+               size_t* output_pos) override;
+  void Reset() override;
+#ifdef ZSTD_STREAMING
+  ZSTD_CCtx* cctx_;
+  ZSTD_inBuffer input_buffer_;
+#endif
+};
+
+class ZSTDStreamingUncompress final : public StreamingUncompress {
+ public:
+  explicit ZSTDStreamingUncompress(uint32_t compress_format_version,
+                                   size_t max_output_len)
+      : StreamingUncompress(kZSTD, compress_format_version, max_output_len) {
+#ifdef ZSTD_STREAMING
+    dctx_ = ZSTD_createDCtx();
+    assert(dctx_ != nullptr);
+    input_buffer_ = {/*src=*/nullptr, /*size=*/0, /*pos=*/0};
+#endif
+  }
+  ~ZSTDStreamingUncompress() override {
+#ifdef ZSTD_STREAMING
+    ZSTD_freeDCtx(dctx_);
+#endif
+  }
+  int Uncompress(const char* input, size_t input_size, char* output,
+                 size_t* output_size) override;
+  void Reset() override;
+
+ private:
+#ifdef ZSTD_STREAMING
+  ZSTD_DCtx* dctx_;
+  ZSTD_inBuffer input_buffer_;
+#endif
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression_context_cache.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression_context_cache.cc
new file mode 100644
index 0000000000..52c3fac72a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression_context_cache.cc
@@ -0,0 +1,106 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+
+#include "util/compression_context_cache.h"
+
+#include <atomic>
+
+#include "util/compression.h"
+#include "util/core_local.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace compression_cache {
+
+void* const SentinelValue = nullptr;
+// Cache ZSTD uncompression contexts for reads
+// if needed we can add ZSTD compression context caching
+// which is currently is not done since BlockBasedTableBuilder
+// simply creates one compression context per new SST file.
+struct ZSTDCachedData {
+  // We choose to cache the below structure instead of a ptr
+  // because we want to avoid a) native types leak b) make
+  // cache use transparent for the user
+  ZSTDUncompressCachedData uncomp_cached_data_;
+  std::atomic<void*> zstd_uncomp_sentinel_;
+
+  char
+      padding[(CACHE_LINE_SIZE -
+               (sizeof(ZSTDUncompressCachedData) + sizeof(std::atomic<void*>)) %
+                   CACHE_LINE_SIZE)];  // unused padding field
+
+  ZSTDCachedData() : zstd_uncomp_sentinel_(&uncomp_cached_data_) {}
+  ZSTDCachedData(const ZSTDCachedData&) = delete;
+  ZSTDCachedData& operator=(const ZSTDCachedData&) = delete;
+
+  ZSTDUncompressCachedData GetUncompressData(int64_t idx) {
+    ZSTDUncompressCachedData result;
+    void* expected = &uncomp_cached_data_;
+    if (zstd_uncomp_sentinel_.compare_exchange_strong(expected,
+                                                      SentinelValue)) {
+      uncomp_cached_data_.CreateIfNeeded();
+      result.InitFromCache(uncomp_cached_data_, idx);
+    } else {
+      // Creates one time use data
+      result.CreateIfNeeded();
+    }
+    return result;
+  }
+  // Return the entry back into circulation
+  // This is executed only when we successfully obtained
+  // in the first place
+  void ReturnUncompressData() {
+    if (zstd_uncomp_sentinel_.exchange(&uncomp_cached_data_) != SentinelValue) {
+      // Means we are returning while not having it acquired.
+      assert(false);
+    }
+  }
+};
+static_assert(sizeof(ZSTDCachedData) % CACHE_LINE_SIZE == 0,
+              "Expected CACHE_LINE_SIZE alignment");
+}  // namespace compression_cache
+
+class CompressionContextCache::Rep {
+ public:
+  Rep() {}
+  ZSTDUncompressCachedData GetZSTDUncompressData() {
+    auto p = per_core_uncompr_.AccessElementAndIndex();
+    int64_t idx = static_cast<int64_t>(p.second);
+    return p.first->GetUncompressData(idx);
+  }
+  void ReturnZSTDUncompressData(int64_t idx) {
+    assert(idx >= 0);
+    auto* cn = per_core_uncompr_.AccessAtCore(static_cast<size_t>(idx));
+    cn->ReturnUncompressData();
+  }
+
+ private:
+  CoreLocalArray<compression_cache::ZSTDCachedData> per_core_uncompr_;
+};
+
+CompressionContextCache::CompressionContextCache() : rep_(new Rep()) {}
+
+CompressionContextCache* CompressionContextCache::Instance() {
+  static CompressionContextCache instance;
+  return &instance;
+}
+
+void CompressionContextCache::InitSingleton() { Instance(); }
+
+ZSTDUncompressCachedData
+CompressionContextCache::GetCachedZSTDUncompressData() {
+  return rep_->GetZSTDUncompressData();
+}
+
+void CompressionContextCache::ReturnCachedZSTDUncompressData(int64_t idx) {
+  rep_->ReturnZSTDUncompressData(idx);
+}
+
+CompressionContextCache::~CompressionContextCache() { delete rep_; }
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression_context_cache.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression_context_cache.h
new file mode 100644
index 0000000000..7b7b2d507a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/compression_context_cache.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+
+// Compression context cache allows to cache compression/uncompression contexts
+// This helps with Random Read latencies and reduces CPU utilization
+// Caching is implemented using CoreLocal facility. Compression/Uncompression
+// instances are cached on a per core basis using CoreLocalArray. A borrowed
+// instance is atomically replaced with a sentinel value for the time of being
+// used. If it turns out that another thread is already makes use of the
+// instance we still create one on the heap which is later is destroyed.
+
+#pragma once
+
+#include <stdint.h>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+class ZSTDUncompressCachedData;
+
+class CompressionContextCache {
+ public:
+  // Singleton
+  static CompressionContextCache* Instance();
+  static void InitSingleton();
+  CompressionContextCache(const CompressionContextCache&) = delete;
+  CompressionContextCache& operator=(const CompressionContextCache&) = delete;
+
+  ZSTDUncompressCachedData GetCachedZSTDUncompressData();
+  void ReturnCachedZSTDUncompressData(int64_t idx);
+
+ private:
+  // Singleton
+  CompressionContextCache();
+  ~CompressionContextCache();
+
+  class Rep;
+  Rep* rep_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/concurrent_task_limiter_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/concurrent_task_limiter_impl.cc
new file mode 100644
index 0000000000..a0fc7331f5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/concurrent_task_limiter_impl.cc
@@ -0,0 +1,64 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/concurrent_task_limiter_impl.h"
+
+#include "rocksdb/concurrent_task_limiter.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+ConcurrentTaskLimiterImpl::ConcurrentTaskLimiterImpl(
+    const std::string& name, int32_t max_outstanding_task)
+    : name_(name),
+      max_outstanding_tasks_{max_outstanding_task},
+      outstanding_tasks_{0} {}
+
+ConcurrentTaskLimiterImpl::~ConcurrentTaskLimiterImpl() {
+  assert(outstanding_tasks_ == 0);
+}
+
+const std::string& ConcurrentTaskLimiterImpl::GetName() const { return name_; }
+
+void ConcurrentTaskLimiterImpl::SetMaxOutstandingTask(int32_t limit) {
+  max_outstanding_tasks_.store(limit, std::memory_order_relaxed);
+}
+
+void ConcurrentTaskLimiterImpl::ResetMaxOutstandingTask() {
+  max_outstanding_tasks_.store(-1, std::memory_order_relaxed);
+}
+
+int32_t ConcurrentTaskLimiterImpl::GetOutstandingTask() const {
+  return outstanding_tasks_.load(std::memory_order_relaxed);
+}
+
+std::unique_ptr<TaskLimiterToken> ConcurrentTaskLimiterImpl::GetToken(
+    bool force) {
+  int32_t limit = max_outstanding_tasks_.load(std::memory_order_relaxed);
+  int32_t tasks = outstanding_tasks_.load(std::memory_order_relaxed);
+  // force = true, bypass the throttle.
+  // limit < 0 means unlimited tasks.
+  while (force || limit < 0 || tasks < limit) {
+    if (outstanding_tasks_.compare_exchange_weak(tasks, tasks + 1)) {
+      return std::unique_ptr<TaskLimiterToken>(new TaskLimiterToken(this));
+    }
+  }
+  return nullptr;
+}
+
+ConcurrentTaskLimiter* NewConcurrentTaskLimiter(const std::string& name,
+                                                int32_t limit) {
+  return new ConcurrentTaskLimiterImpl(name, limit);
+}
+
+TaskLimiterToken::~TaskLimiterToken() {
+  --limiter_->outstanding_tasks_;
+  assert(limiter_->outstanding_tasks_ >= 0);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/concurrent_task_limiter_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/concurrent_task_limiter_impl.h
new file mode 100644
index 0000000000..4952ae23aa
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/concurrent_task_limiter_impl.h
@@ -0,0 +1,67 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <atomic>
+#include <memory>
+
+#include "rocksdb/concurrent_task_limiter.h"
+#include "rocksdb/env.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class TaskLimiterToken;
+
+class ConcurrentTaskLimiterImpl : public ConcurrentTaskLimiter {
+ public:
+  explicit ConcurrentTaskLimiterImpl(const std::string& name,
+                                     int32_t max_outstanding_task);
+  // No copying allowed
+  ConcurrentTaskLimiterImpl(const ConcurrentTaskLimiterImpl&) = delete;
+  ConcurrentTaskLimiterImpl& operator=(const ConcurrentTaskLimiterImpl&) =
+      delete;
+
+  virtual ~ConcurrentTaskLimiterImpl();
+
+  virtual const std::string& GetName() const override;
+
+  virtual void SetMaxOutstandingTask(int32_t limit) override;
+
+  virtual void ResetMaxOutstandingTask() override;
+
+  virtual int32_t GetOutstandingTask() const override;
+
+  // Request token for adding a new task.
+  // If force == true, it requests a token bypassing throttle.
+  // Returns nullptr if it got throttled.
+  virtual std::unique_ptr<TaskLimiterToken> GetToken(bool force);
+
+ private:
+  friend class TaskLimiterToken;
+
+  std::string name_;
+  std::atomic<int32_t> max_outstanding_tasks_;
+  std::atomic<int32_t> outstanding_tasks_;
+};
+
+class TaskLimiterToken {
+ public:
+  explicit TaskLimiterToken(ConcurrentTaskLimiterImpl* limiter)
+      : limiter_(limiter) {}
+  ~TaskLimiterToken();
+
+ private:
+  ConcurrentTaskLimiterImpl* limiter_;
+
+  // no copying allowed
+  TaskLimiterToken(const TaskLimiterToken&) = delete;
+  void operator=(const TaskLimiterToken&) = delete;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/core_local.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/core_local.h
new file mode 100644
index 0000000000..25174aef84
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/core_local.h
@@ -0,0 +1,84 @@
+//  Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cassert>
+#include <cstddef>
+#include <thread>
+#include <utility>
+#include <vector>
+
+#include "port/likely.h"
+#include "port/port.h"
+#include "util/random.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// An array of core-local values. Ideally the value type, T, is cache aligned to
+// prevent false sharing.
+template <typename T>
+class CoreLocalArray {
+ public:
+  CoreLocalArray();
+
+  size_t Size() const;
+  // returns pointer to the element corresponding to the core that the thread
+  // currently runs on.
+  T* Access() const;
+  // same as above, but also returns the core index, which the client can cache
+  // to reduce how often core ID needs to be retrieved. Only do this if some
+  // inaccuracy is tolerable, as the thread may migrate to a different core.
+  std::pair<T*, size_t> AccessElementAndIndex() const;
+  // returns pointer to element for the specified core index. This can be used,
+  // e.g., for aggregation, or if the client caches core index.
+  T* AccessAtCore(size_t core_idx) const;
+
+ private:
+  std::unique_ptr<T[]> data_;
+  int size_shift_;
+};
+
+template <typename T>
+CoreLocalArray<T>::CoreLocalArray() {
+  int num_cpus = static_cast<int>(std::thread::hardware_concurrency());
+  // find a power of two >= num_cpus and >= 8
+  size_shift_ = 3;
+  while (1 << size_shift_ < num_cpus) {
+    ++size_shift_;
+  }
+  data_.reset(new T[static_cast<size_t>(1) << size_shift_]);
+}
+
+template <typename T>
+size_t CoreLocalArray<T>::Size() const {
+  return static_cast<size_t>(1) << size_shift_;
+}
+
+template <typename T>
+T* CoreLocalArray<T>::Access() const {
+  return AccessElementAndIndex().first;
+}
+
+template <typename T>
+std::pair<T*, size_t> CoreLocalArray<T>::AccessElementAndIndex() const {
+  int cpuid = port::PhysicalCoreID();
+  size_t core_idx;
+  if (UNLIKELY(cpuid < 0)) {
+    // cpu id unavailable, just pick randomly
+    core_idx = Random::GetTLSInstance()->Uniform(1 << size_shift_);
+  } else {
+    core_idx = static_cast<size_t>(cpuid & ((1 << size_shift_) - 1));
+  }
+  return {AccessAtCore(core_idx), core_idx};
+}
+
+template <typename T>
+T* CoreLocalArray<T>::AccessAtCore(size_t core_idx) const {
+  assert(core_idx < static_cast<size_t>(1) << size_shift_);
+  return &data_[core_idx];
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coro_utils.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coro_utils.h
new file mode 100644
index 0000000000..5b4211135e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/coro_utils.h
@@ -0,0 +1,112 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#if defined(USE_COROUTINES)
+#include "folly/experimental/coro/Coroutine.h"
+#include "folly/experimental/coro/Task.h"
+#endif
+#include "rocksdb/rocksdb_namespace.h"
+
+// This file has two sctions. The first section applies to all instances of
+// header file inclusion and has an include guard. The second section is
+// meant for multiple inclusions in the same source file, and is idempotent.
+namespace ROCKSDB_NAMESPACE {
+
+#ifndef UTIL_CORO_UTILS_H_
+#define UTIL_CORO_UTILS_H_
+
+#if defined(USE_COROUTINES)
+
+// The follwoing macros expand to regular and coroutine function
+// declarations for a given function
+#define DECLARE_SYNC_AND_ASYNC(__ret_type__, __func_name__, ...) \
+  __ret_type__ __func_name__(__VA_ARGS__);                       \
+  folly::coro::Task<__ret_type__> __func_name__##Coroutine(__VA_ARGS__);
+
+#define DECLARE_SYNC_AND_ASYNC_OVERRIDE(__ret_type__, __func_name__, ...) \
+  __ret_type__ __func_name__(__VA_ARGS__) override;                       \
+  folly::coro::Task<__ret_type__> __func_name__##Coroutine(__VA_ARGS__)   \
+      override;
+
+#define DECLARE_SYNC_AND_ASYNC_CONST(__ret_type__, __func_name__, ...) \
+  __ret_type__ __func_name__(__VA_ARGS__) const;                       \
+  folly::coro::Task<__ret_type__> __func_name__##Coroutine(__VA_ARGS__) const;
+
+constexpr bool using_coroutines() { return true; }
+#else  // !USE_COROUTINES
+
+// The follwoing macros expand to a regular function declaration for a given
+// function
+#define DECLARE_SYNC_AND_ASYNC(__ret_type__, __func_name__, ...) \
+  __ret_type__ __func_name__(__VA_ARGS__);
+
+#define DECLARE_SYNC_AND_ASYNC_OVERRIDE(__ret_type__, __func_name__, ...) \
+  __ret_type__ __func_name__(__VA_ARGS__) override;
+
+#define DECLARE_SYNC_AND_ASYNC_CONST(__ret_type__, __func_name__, ...) \
+  __ret_type__ __func_name__(__VA_ARGS__) const;
+
+constexpr bool using_coroutines() { return false; }
+#endif  // USE_COROUTINES
+#endif  // UTIL_CORO_UTILS_H_
+
+// The following section of the file is meant to be included twice in a
+// source file - once defining WITH_COROUTINES and once defining
+// WITHOUT_COROUTINES
+#undef DEFINE_SYNC_AND_ASYNC
+#undef CO_AWAIT
+#undef CO_RETURN
+
+#if defined(WITH_COROUTINES) && defined(USE_COROUTINES)
+
+// This macro should be used in the beginning of the function
+// definition. The declaration should have been done using one of the
+// DECLARE_SYNC_AND_ASYNC* macros. It expands to the return type and
+// the function name with the Coroutine suffix. For example -
+// DEFINE_SYNC_AND_ASYNC(int, foo)(bool bar) {}
+// would expand to -
+// folly::coro::Task<int> fooCoroutine(bool bar) {}
+#define DEFINE_SYNC_AND_ASYNC(__ret_type__, __func_name__) \
+  folly::coro::Task<__ret_type__> __func_name__##Coroutine
+
+// This macro should be used to call a function that might be a
+// coroutine. It expands to the correct function name and prefixes
+// the co_await operator if necessary. For example -
+// s = CO_AWAIT(foo)(true);
+// if the code is compiled WITH_COROUTINES, would expand to
+// s = co_await fooCoroutine(true);
+// if compiled WITHOUT_COROUTINES, would expand to
+// s = foo(true);
+#define CO_AWAIT(__func_name__) co_await __func_name__##Coroutine
+
+#define CO_RETURN co_return
+
+#elif defined(WITHOUT_COROUTINES)
+
+// This macro should be used in the beginning of the function
+// definition. The declaration should have been done using one of the
+// DECLARE_SYNC_AND_ASYNC* macros. It expands to the return type and
+// the function name without the Coroutine suffix. For example -
+// DEFINE_SYNC_AND_ASYNC(int, foo)(bool bar) {}
+// would expand to -
+// int foo(bool bar) {}
+#define DEFINE_SYNC_AND_ASYNC(__ret_type__, __func_name__) \
+  __ret_type__ __func_name__
+
+// This macro should be used to call a function that might be a
+// coroutine. It expands to the correct function name and prefixes
+// the co_await operator if necessary. For example -
+// s = CO_AWAIT(foo)(true);
+// if the code is compiled WITH_COROUTINES, would expand to
+// s = co_await fooCoroutine(true);
+// if compiled WITHOUT_COROUTINES, would expand to
+// s = foo(true);
+#define CO_AWAIT(__func_name__) __func_name__
+
+#define CO_RETURN return
+
+#endif  // DO_NOT_USE_COROUTINES
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c.cc
new file mode 100644
index 0000000000..d4cd78b52e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c.cc
@@ -0,0 +1,1292 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A portable implementation of crc32c, optimized to handle
+// four bytes at a time.
+#include "util/crc32c.h"
+
+#include <stdint.h>
+
+#include <array>
+#include <utility>
+
+#include "port/lang.h"
+#include "util/coding.h"
+#include "util/crc32c_arm64.h"
+#include "util/math.h"
+
+#ifdef __powerpc64__
+#include "util/crc32c_ppc.h"
+#include "util/crc32c_ppc_constants.h"
+
+#if __linux__
+#ifdef ROCKSDB_AUXV_GETAUXVAL_PRESENT
+#include <sys/auxv.h>
+#endif
+
+#ifndef PPC_FEATURE2_VEC_CRYPTO
+#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
+#endif
+
+#ifndef AT_HWCAP2
+#define AT_HWCAP2 26
+#endif
+
+#elif __FreeBSD__
+#include <machine/cpu.h>
+#include <sys/auxv.h>
+#include <sys/elf_common.h>
+#endif /* __linux__ */
+
+#endif
+
+ASSERT_FEATURE_COMPAT_HEADER();
+
+#ifdef __SSE4_2__
+#include <nmmintrin.h>
+#include <wmmintrin.h>
+#endif
+
+#if defined(HAVE_ARM64_CRC)
+bool pmull_runtime_flag = false;
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+namespace crc32c {
+
+#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC)
+#ifdef __powerpc64__
+static int arch_ppc_crc32 = 0;
+#endif /* __powerpc64__ */
+#endif
+
+static const uint32_t table0_[256] = {
+    0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c,
+    0x26a1e7e8, 0xd4ca64eb, 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
+    0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, 0x105ec76f, 0xe235446c,
+    0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
+    0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc,
+    0xbc267848, 0x4e4dfb4b, 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
+    0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, 0xaa64d611, 0x580f5512,
+    0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
+    0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad,
+    0x1642ae59, 0xe4292d5a, 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
+    0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, 0x417b1dbc, 0xb3109ebf,
+    0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
+    0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f,
+    0xed03a29b, 0x1f682198, 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
+    0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, 0xdbfc821c, 0x2997011f,
+    0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
+    0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e,
+    0x4767748a, 0xb50cf789, 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
+    0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, 0x7198540d, 0x83f3d70e,
+    0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
+    0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de,
+    0xdde0eb2a, 0x2f8b6829, 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
+    0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, 0x082f63b7, 0xfa44e0b4,
+    0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
+    0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b,
+    0xb4091bff, 0x466298fc, 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
+    0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, 0xa24bb5a6, 0x502036a5,
+    0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
+    0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975,
+    0x0e330a81, 0xfc588982, 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
+    0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, 0x38cc2a06, 0xcaa7a905,
+    0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
+    0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8,
+    0xe52cc12c, 0x1747422f, 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
+    0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, 0xd3d3e1ab, 0x21b862a8,
+    0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
+    0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78,
+    0x7fab5e8c, 0x8dc0dd8f, 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
+    0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, 0x69e9f0d5, 0x9b8273d6,
+    0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
+    0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69,
+    0xd5cf889d, 0x27a40b9e, 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
+    0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351};
+#ifndef __SSE4_2__
+static const uint32_t table1_[256] = {
+    0x00000000, 0x13a29877, 0x274530ee, 0x34e7a899, 0x4e8a61dc, 0x5d28f9ab,
+    0x69cf5132, 0x7a6dc945, 0x9d14c3b8, 0x8eb65bcf, 0xba51f356, 0xa9f36b21,
+    0xd39ea264, 0xc03c3a13, 0xf4db928a, 0xe7790afd, 0x3fc5f181, 0x2c6769f6,
+    0x1880c16f, 0x0b225918, 0x714f905d, 0x62ed082a, 0x560aa0b3, 0x45a838c4,
+    0xa2d13239, 0xb173aa4e, 0x859402d7, 0x96369aa0, 0xec5b53e5, 0xfff9cb92,
+    0xcb1e630b, 0xd8bcfb7c, 0x7f8be302, 0x6c297b75, 0x58ced3ec, 0x4b6c4b9b,
+    0x310182de, 0x22a31aa9, 0x1644b230, 0x05e62a47, 0xe29f20ba, 0xf13db8cd,
+    0xc5da1054, 0xd6788823, 0xac154166, 0xbfb7d911, 0x8b507188, 0x98f2e9ff,
+    0x404e1283, 0x53ec8af4, 0x670b226d, 0x74a9ba1a, 0x0ec4735f, 0x1d66eb28,
+    0x298143b1, 0x3a23dbc6, 0xdd5ad13b, 0xcef8494c, 0xfa1fe1d5, 0xe9bd79a2,
+    0x93d0b0e7, 0x80722890, 0xb4958009, 0xa737187e, 0xff17c604, 0xecb55e73,
+    0xd852f6ea, 0xcbf06e9d, 0xb19da7d8, 0xa23f3faf, 0x96d89736, 0x857a0f41,
+    0x620305bc, 0x71a19dcb, 0x45463552, 0x56e4ad25, 0x2c896460, 0x3f2bfc17,
+    0x0bcc548e, 0x186eccf9, 0xc0d23785, 0xd370aff2, 0xe797076b, 0xf4359f1c,
+    0x8e585659, 0x9dface2e, 0xa91d66b7, 0xbabffec0, 0x5dc6f43d, 0x4e646c4a,
+    0x7a83c4d3, 0x69215ca4, 0x134c95e1, 0x00ee0d96, 0x3409a50f, 0x27ab3d78,
+    0x809c2506, 0x933ebd71, 0xa7d915e8, 0xb47b8d9f, 0xce1644da, 0xddb4dcad,
+    0xe9537434, 0xfaf1ec43, 0x1d88e6be, 0x0e2a7ec9, 0x3acdd650, 0x296f4e27,
+    0x53028762, 0x40a01f15, 0x7447b78c, 0x67e52ffb, 0xbf59d487, 0xacfb4cf0,
+    0x981ce469, 0x8bbe7c1e, 0xf1d3b55b, 0xe2712d2c, 0xd69685b5, 0xc5341dc2,
+    0x224d173f, 0x31ef8f48, 0x050827d1, 0x16aabfa6, 0x6cc776e3, 0x7f65ee94,
+    0x4b82460d, 0x5820de7a, 0xfbc3faf9, 0xe861628e, 0xdc86ca17, 0xcf245260,
+    0xb5499b25, 0xa6eb0352, 0x920cabcb, 0x81ae33bc, 0x66d73941, 0x7575a136,
+    0x419209af, 0x523091d8, 0x285d589d, 0x3bffc0ea, 0x0f186873, 0x1cbaf004,
+    0xc4060b78, 0xd7a4930f, 0xe3433b96, 0xf0e1a3e1, 0x8a8c6aa4, 0x992ef2d3,
+    0xadc95a4a, 0xbe6bc23d, 0x5912c8c0, 0x4ab050b7, 0x7e57f82e, 0x6df56059,
+    0x1798a91c, 0x043a316b, 0x30dd99f2, 0x237f0185, 0x844819fb, 0x97ea818c,
+    0xa30d2915, 0xb0afb162, 0xcac27827, 0xd960e050, 0xed8748c9, 0xfe25d0be,
+    0x195cda43, 0x0afe4234, 0x3e19eaad, 0x2dbb72da, 0x57d6bb9f, 0x447423e8,
+    0x70938b71, 0x63311306, 0xbb8de87a, 0xa82f700d, 0x9cc8d894, 0x8f6a40e3,
+    0xf50789a6, 0xe6a511d1, 0xd242b948, 0xc1e0213f, 0x26992bc2, 0x353bb3b5,
+    0x01dc1b2c, 0x127e835b, 0x68134a1e, 0x7bb1d269, 0x4f567af0, 0x5cf4e287,
+    0x04d43cfd, 0x1776a48a, 0x23910c13, 0x30339464, 0x4a5e5d21, 0x59fcc556,
+    0x6d1b6dcf, 0x7eb9f5b8, 0x99c0ff45, 0x8a626732, 0xbe85cfab, 0xad2757dc,
+    0xd74a9e99, 0xc4e806ee, 0xf00fae77, 0xe3ad3600, 0x3b11cd7c, 0x28b3550b,
+    0x1c54fd92, 0x0ff665e5, 0x759baca0, 0x663934d7, 0x52de9c4e, 0x417c0439,
+    0xa6050ec4, 0xb5a796b3, 0x81403e2a, 0x92e2a65d, 0xe88f6f18, 0xfb2df76f,
+    0xcfca5ff6, 0xdc68c781, 0x7b5fdfff, 0x68fd4788, 0x5c1aef11, 0x4fb87766,
+    0x35d5be23, 0x26772654, 0x12908ecd, 0x013216ba, 0xe64b1c47, 0xf5e98430,
+    0xc10e2ca9, 0xd2acb4de, 0xa8c17d9b, 0xbb63e5ec, 0x8f844d75, 0x9c26d502,
+    0x449a2e7e, 0x5738b609, 0x63df1e90, 0x707d86e7, 0x0a104fa2, 0x19b2d7d5,
+    0x2d557f4c, 0x3ef7e73b, 0xd98eedc6, 0xca2c75b1, 0xfecbdd28, 0xed69455f,
+    0x97048c1a, 0x84a6146d, 0xb041bcf4, 0xa3e32483};
+static const uint32_t table2_[256] = {
+    0x00000000, 0xa541927e, 0x4f6f520d, 0xea2ec073, 0x9edea41a, 0x3b9f3664,
+    0xd1b1f617, 0x74f06469, 0x38513ec5, 0x9d10acbb, 0x773e6cc8, 0xd27ffeb6,
+    0xa68f9adf, 0x03ce08a1, 0xe9e0c8d2, 0x4ca15aac, 0x70a27d8a, 0xd5e3eff4,
+    0x3fcd2f87, 0x9a8cbdf9, 0xee7cd990, 0x4b3d4bee, 0xa1138b9d, 0x045219e3,
+    0x48f3434f, 0xedb2d131, 0x079c1142, 0xa2dd833c, 0xd62de755, 0x736c752b,
+    0x9942b558, 0x3c032726, 0xe144fb14, 0x4405696a, 0xae2ba919, 0x0b6a3b67,
+    0x7f9a5f0e, 0xdadbcd70, 0x30f50d03, 0x95b49f7d, 0xd915c5d1, 0x7c5457af,
+    0x967a97dc, 0x333b05a2, 0x47cb61cb, 0xe28af3b5, 0x08a433c6, 0xade5a1b8,
+    0x91e6869e, 0x34a714e0, 0xde89d493, 0x7bc846ed, 0x0f382284, 0xaa79b0fa,
+    0x40577089, 0xe516e2f7, 0xa9b7b85b, 0x0cf62a25, 0xe6d8ea56, 0x43997828,
+    0x37691c41, 0x92288e3f, 0x78064e4c, 0xdd47dc32, 0xc76580d9, 0x622412a7,
+    0x880ad2d4, 0x2d4b40aa, 0x59bb24c3, 0xfcfab6bd, 0x16d476ce, 0xb395e4b0,
+    0xff34be1c, 0x5a752c62, 0xb05bec11, 0x151a7e6f, 0x61ea1a06, 0xc4ab8878,
+    0x2e85480b, 0x8bc4da75, 0xb7c7fd53, 0x12866f2d, 0xf8a8af5e, 0x5de93d20,
+    0x29195949, 0x8c58cb37, 0x66760b44, 0xc337993a, 0x8f96c396, 0x2ad751e8,
+    0xc0f9919b, 0x65b803e5, 0x1148678c, 0xb409f5f2, 0x5e273581, 0xfb66a7ff,
+    0x26217bcd, 0x8360e9b3, 0x694e29c0, 0xcc0fbbbe, 0xb8ffdfd7, 0x1dbe4da9,
+    0xf7908dda, 0x52d11fa4, 0x1e704508, 0xbb31d776, 0x511f1705, 0xf45e857b,
+    0x80aee112, 0x25ef736c, 0xcfc1b31f, 0x6a802161, 0x56830647, 0xf3c29439,
+    0x19ec544a, 0xbcadc634, 0xc85da25d, 0x6d1c3023, 0x8732f050, 0x2273622e,
+    0x6ed23882, 0xcb93aafc, 0x21bd6a8f, 0x84fcf8f1, 0xf00c9c98, 0x554d0ee6,
+    0xbf63ce95, 0x1a225ceb, 0x8b277743, 0x2e66e53d, 0xc448254e, 0x6109b730,
+    0x15f9d359, 0xb0b84127, 0x5a968154, 0xffd7132a, 0xb3764986, 0x1637dbf8,
+    0xfc191b8b, 0x595889f5, 0x2da8ed9c, 0x88e97fe2, 0x62c7bf91, 0xc7862def,
+    0xfb850ac9, 0x5ec498b7, 0xb4ea58c4, 0x11abcaba, 0x655baed3, 0xc01a3cad,
+    0x2a34fcde, 0x8f756ea0, 0xc3d4340c, 0x6695a672, 0x8cbb6601, 0x29faf47f,
+    0x5d0a9016, 0xf84b0268, 0x1265c21b, 0xb7245065, 0x6a638c57, 0xcf221e29,
+    0x250cde5a, 0x804d4c24, 0xf4bd284d, 0x51fcba33, 0xbbd27a40, 0x1e93e83e,
+    0x5232b292, 0xf77320ec, 0x1d5de09f, 0xb81c72e1, 0xccec1688, 0x69ad84f6,
+    0x83834485, 0x26c2d6fb, 0x1ac1f1dd, 0xbf8063a3, 0x55aea3d0, 0xf0ef31ae,
+    0x841f55c7, 0x215ec7b9, 0xcb7007ca, 0x6e3195b4, 0x2290cf18, 0x87d15d66,
+    0x6dff9d15, 0xc8be0f6b, 0xbc4e6b02, 0x190ff97c, 0xf321390f, 0x5660ab71,
+    0x4c42f79a, 0xe90365e4, 0x032da597, 0xa66c37e9, 0xd29c5380, 0x77ddc1fe,
+    0x9df3018d, 0x38b293f3, 0x7413c95f, 0xd1525b21, 0x3b7c9b52, 0x9e3d092c,
+    0xeacd6d45, 0x4f8cff3b, 0xa5a23f48, 0x00e3ad36, 0x3ce08a10, 0x99a1186e,
+    0x738fd81d, 0xd6ce4a63, 0xa23e2e0a, 0x077fbc74, 0xed517c07, 0x4810ee79,
+    0x04b1b4d5, 0xa1f026ab, 0x4bdee6d8, 0xee9f74a6, 0x9a6f10cf, 0x3f2e82b1,
+    0xd50042c2, 0x7041d0bc, 0xad060c8e, 0x08479ef0, 0xe2695e83, 0x4728ccfd,
+    0x33d8a894, 0x96993aea, 0x7cb7fa99, 0xd9f668e7, 0x9557324b, 0x3016a035,
+    0xda386046, 0x7f79f238, 0x0b899651, 0xaec8042f, 0x44e6c45c, 0xe1a75622,
+    0xdda47104, 0x78e5e37a, 0x92cb2309, 0x378ab177, 0x437ad51e, 0xe63b4760,
+    0x0c158713, 0xa954156d, 0xe5f54fc1, 0x40b4ddbf, 0xaa9a1dcc, 0x0fdb8fb2,
+    0x7b2bebdb, 0xde6a79a5, 0x3444b9d6, 0x91052ba8};
+static const uint32_t table3_[256] = {
+    0x00000000, 0xdd45aab8, 0xbf672381, 0x62228939, 0x7b2231f3, 0xa6679b4b,
+    0xc4451272, 0x1900b8ca, 0xf64463e6, 0x2b01c95e, 0x49234067, 0x9466eadf,
+    0x8d665215, 0x5023f8ad, 0x32017194, 0xef44db2c, 0xe964b13d, 0x34211b85,
+    0x560392bc, 0x8b463804, 0x924680ce, 0x4f032a76, 0x2d21a34f, 0xf06409f7,
+    0x1f20d2db, 0xc2657863, 0xa047f15a, 0x7d025be2, 0x6402e328, 0xb9474990,
+    0xdb65c0a9, 0x06206a11, 0xd725148b, 0x0a60be33, 0x6842370a, 0xb5079db2,
+    0xac072578, 0x71428fc0, 0x136006f9, 0xce25ac41, 0x2161776d, 0xfc24ddd5,
+    0x9e0654ec, 0x4343fe54, 0x5a43469e, 0x8706ec26, 0xe524651f, 0x3861cfa7,
+    0x3e41a5b6, 0xe3040f0e, 0x81268637, 0x5c632c8f, 0x45639445, 0x98263efd,
+    0xfa04b7c4, 0x27411d7c, 0xc805c650, 0x15406ce8, 0x7762e5d1, 0xaa274f69,
+    0xb327f7a3, 0x6e625d1b, 0x0c40d422, 0xd1057e9a, 0xaba65fe7, 0x76e3f55f,
+    0x14c17c66, 0xc984d6de, 0xd0846e14, 0x0dc1c4ac, 0x6fe34d95, 0xb2a6e72d,
+    0x5de23c01, 0x80a796b9, 0xe2851f80, 0x3fc0b538, 0x26c00df2, 0xfb85a74a,
+    0x99a72e73, 0x44e284cb, 0x42c2eeda, 0x9f874462, 0xfda5cd5b, 0x20e067e3,
+    0x39e0df29, 0xe4a57591, 0x8687fca8, 0x5bc25610, 0xb4868d3c, 0x69c32784,
+    0x0be1aebd, 0xd6a40405, 0xcfa4bccf, 0x12e11677, 0x70c39f4e, 0xad8635f6,
+    0x7c834b6c, 0xa1c6e1d4, 0xc3e468ed, 0x1ea1c255, 0x07a17a9f, 0xdae4d027,
+    0xb8c6591e, 0x6583f3a6, 0x8ac7288a, 0x57828232, 0x35a00b0b, 0xe8e5a1b3,
+    0xf1e51979, 0x2ca0b3c1, 0x4e823af8, 0x93c79040, 0x95e7fa51, 0x48a250e9,
+    0x2a80d9d0, 0xf7c57368, 0xeec5cba2, 0x3380611a, 0x51a2e823, 0x8ce7429b,
+    0x63a399b7, 0xbee6330f, 0xdcc4ba36, 0x0181108e, 0x1881a844, 0xc5c402fc,
+    0xa7e68bc5, 0x7aa3217d, 0x52a0c93f, 0x8fe56387, 0xedc7eabe, 0x30824006,
+    0x2982f8cc, 0xf4c75274, 0x96e5db4d, 0x4ba071f5, 0xa4e4aad9, 0x79a10061,
+    0x1b838958, 0xc6c623e0, 0xdfc69b2a, 0x02833192, 0x60a1b8ab, 0xbde41213,
+    0xbbc47802, 0x6681d2ba, 0x04a35b83, 0xd9e6f13b, 0xc0e649f1, 0x1da3e349,
+    0x7f816a70, 0xa2c4c0c8, 0x4d801be4, 0x90c5b15c, 0xf2e73865, 0x2fa292dd,
+    0x36a22a17, 0xebe780af, 0x89c50996, 0x5480a32e, 0x8585ddb4, 0x58c0770c,
+    0x3ae2fe35, 0xe7a7548d, 0xfea7ec47, 0x23e246ff, 0x41c0cfc6, 0x9c85657e,
+    0x73c1be52, 0xae8414ea, 0xcca69dd3, 0x11e3376b, 0x08e38fa1, 0xd5a62519,
+    0xb784ac20, 0x6ac10698, 0x6ce16c89, 0xb1a4c631, 0xd3864f08, 0x0ec3e5b0,
+    0x17c35d7a, 0xca86f7c2, 0xa8a47efb, 0x75e1d443, 0x9aa50f6f, 0x47e0a5d7,
+    0x25c22cee, 0xf8878656, 0xe1873e9c, 0x3cc29424, 0x5ee01d1d, 0x83a5b7a5,
+    0xf90696d8, 0x24433c60, 0x4661b559, 0x9b241fe1, 0x8224a72b, 0x5f610d93,
+    0x3d4384aa, 0xe0062e12, 0x0f42f53e, 0xd2075f86, 0xb025d6bf, 0x6d607c07,
+    0x7460c4cd, 0xa9256e75, 0xcb07e74c, 0x16424df4, 0x106227e5, 0xcd278d5d,
+    0xaf050464, 0x7240aedc, 0x6b401616, 0xb605bcae, 0xd4273597, 0x09629f2f,
+    0xe6264403, 0x3b63eebb, 0x59416782, 0x8404cd3a, 0x9d0475f0, 0x4041df48,
+    0x22635671, 0xff26fcc9, 0x2e238253, 0xf36628eb, 0x9144a1d2, 0x4c010b6a,
+    0x5501b3a0, 0x88441918, 0xea669021, 0x37233a99, 0xd867e1b5, 0x05224b0d,
+    0x6700c234, 0xba45688c, 0xa345d046, 0x7e007afe, 0x1c22f3c7, 0xc167597f,
+    0xc747336e, 0x1a0299d6, 0x782010ef, 0xa565ba57, 0xbc65029d, 0x6120a825,
+    0x0302211c, 0xde478ba4, 0x31035088, 0xec46fa30, 0x8e647309, 0x5321d9b1,
+    0x4a21617b, 0x9764cbc3, 0xf54642fa, 0x2803e842};
+
+// Used to fetch a naturally-aligned 32-bit word in little endian byte-order
+static inline uint32_t LE_LOAD32(const uint8_t* p) {
+  return DecodeFixed32(reinterpret_cast<const char*>(p));
+}
+#endif  // !__SSE4_2__
+
+static inline void DefaultCRC32(uint64_t* l, uint8_t const** p) {
+#ifndef __SSE4_2__
+  uint32_t c = static_cast<uint32_t>(*l ^ LE_LOAD32(*p));
+  *p += 4;
+  *l = table3_[c & 0xff] ^ table2_[(c >> 8) & 0xff] ^
+       table1_[(c >> 16) & 0xff] ^ table0_[c >> 24];
+  // DO it twice.
+  c = static_cast<uint32_t>(*l ^ LE_LOAD32(*p));
+  *p += 4;
+  *l = table3_[c & 0xff] ^ table2_[(c >> 8) & 0xff] ^
+       table1_[(c >> 16) & 0xff] ^ table0_[c >> 24];
+#elif defined(__LP64__) || defined(_WIN64)
+  *l = _mm_crc32_u64(*l, DecodeFixed64(reinterpret_cast<const char*>(*p)));
+  *p += 8;
+#else
+  *l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
+  *p += 4;
+  *l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
+  *p += 4;
+#endif
+}
+
+template <void (*CRC32)(uint64_t*, uint8_t const**)>
+uint32_t ExtendImpl(uint32_t crc, const char* buf, size_t size) {
+  const uint8_t* p = reinterpret_cast<const uint8_t*>(buf);
+  const uint8_t* e = p + size;
+  uint64_t l = crc ^ 0xffffffffu;
+
+// Align n to (1 << m) byte boundary
+#define ALIGN(n, m) ((n + ((1 << m) - 1)) & ~((1 << m) - 1))
+
+#define STEP1                  \
+  do {                         \
+    int c = (l & 0xff) ^ *p++; \
+    l = table0_[c] ^ (l >> 8); \
+  } while (0)
+
+  // Point x at first 16-byte aligned byte in string.  This might be
+  // just past the end of the string.
+  const uintptr_t pval = reinterpret_cast<uintptr_t>(p);
+  const uint8_t* x = reinterpret_cast<const uint8_t*>(ALIGN(pval, 4));
+  if (x <= e) {
+    // Process bytes until finished or p is 16-byte aligned
+    while (p != x) {
+      STEP1;
+    }
+  }
+  // Process bytes 16 at a time
+  while ((e - p) >= 16) {
+    CRC32(&l, &p);
+    CRC32(&l, &p);
+  }
+  // Process bytes 8 at a time
+  while ((e - p) >= 8) {
+    CRC32(&l, &p);
+  }
+  // Process the last few bytes
+  while (p != e) {
+    STEP1;
+  }
+#undef STEP1
+#undef ALIGN
+  return static_cast<uint32_t>(l ^ 0xffffffffu);
+}
+
+using Function = uint32_t (*)(uint32_t, const char*, size_t);
+
+#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC)
+uint32_t ExtendPPCImpl(uint32_t crc, const char* buf, size_t size) {
+  return crc32c_ppc(crc, (const unsigned char*)buf, size);
+}
+
+#if __linux__
+static int arch_ppc_probe(void) {
+  arch_ppc_crc32 = 0;
+
+#if defined(__powerpc64__) && defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT)
+  if (getauxval(AT_HWCAP2) & PPC_FEATURE2_VEC_CRYPTO) arch_ppc_crc32 = 1;
+#endif /* __powerpc64__ */
+
+  return arch_ppc_crc32;
+}
+#elif __FreeBSD__
+static int arch_ppc_probe(void) {
+  unsigned long cpufeatures;
+  arch_ppc_crc32 = 0;
+
+#if defined(__powerpc64__)
+  elf_aux_info(AT_HWCAP2, &cpufeatures, sizeof(cpufeatures));
+  if (cpufeatures & PPC_FEATURE2_HAS_VEC_CRYPTO) arch_ppc_crc32 = 1;
+#endif  /* __powerpc64__ */
+
+  return arch_ppc_crc32;
+}
+#endif  // __linux__
+
+static bool isAltiVec() {
+  if (arch_ppc_probe()) {
+    return true;
+  } else {
+    return false;
+  }
+}
+#endif
+
+#if defined(HAVE_ARM64_CRC)
+uint32_t ExtendARMImpl(uint32_t crc, const char* buf, size_t size) {
+  return crc32c_arm64(crc, (const unsigned char*)buf, size);
+}
+#endif
+
+std::string IsFastCrc32Supported() {
+  bool has_fast_crc = false;
+  std::string fast_zero_msg;
+  std::string arch;
+#ifdef HAVE_POWER8
+#ifdef HAS_ALTIVEC
+  if (arch_ppc_probe()) {
+    has_fast_crc = true;
+    arch = "PPC";
+  }
+#else
+  has_fast_crc = false;
+  arch = "PPC";
+#endif
+#elif defined(HAVE_ARM64_CRC)
+  if (crc32c_runtime_check()) {
+    has_fast_crc = true;
+    arch = "Arm64";
+    pmull_runtime_flag = crc32c_pmull_runtime_check();
+  } else {
+    has_fast_crc = false;
+    arch = "Arm64";
+  }
+#else
+#ifdef __SSE4_2__
+  has_fast_crc = true;
+#endif  // __SSE4_2__
+  arch = "x86";
+#endif
+  if (has_fast_crc) {
+    fast_zero_msg.append("Supported on " + arch);
+  } else {
+    fast_zero_msg.append("Not supported on " + arch);
+  }
+  return fast_zero_msg;
+}
+
+/*
+ * Copyright 2016 Ferry Toth, Exalon Delft BV, The Netherlands
+ *  This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the author be held liable for any damages
+ * arising from the use of this software.
+ *  Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *  1. The origin of this software must not be misrepresented; you must not
+ *   claim that you wrote the original software. If you use this software
+ *   in a product, an acknowledgment in the product documentation would be
+ *   appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *   misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *  Ferry Toth
+ * ftoth@exalondelft.nl
+ *
+ * https://github.com/htot/crc32c
+ *
+ * Modified by Facebook
+ *
+ * Original intel whitepaper:
+ * "Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction"
+ * https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
+ *
+ * This version is from the folly library, created by Dave Watson
+ * <davejwatson@fb.com>
+ *
+ */
+#if defined(__SSE4_2__) && defined(__PCLMUL__)
+
+#define CRCtriplet(crc, buf, offset)                  \
+  crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
+  crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset)); \
+  crc##2 = _mm_crc32_u64(crc##2, *(buf##2 + offset));
+
+#define CRCduplet(crc, buf, offset)                   \
+  crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
+  crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset));
+
+#define CRCsinglet(crc, buf, offset) \
+  crc = _mm_crc32_u64(crc, *(uint64_t*)(buf + offset));
+
+// Numbers taken directly from intel whitepaper.
+// clang-format off
+const uint64_t clmul_constants[] = {
+    0x14cd00bd6, 0x105ec76f0, 0x0ba4fc28e, 0x14cd00bd6,
+    0x1d82c63da, 0x0f20c0dfe, 0x09e4addf8, 0x0ba4fc28e,
+    0x039d3b296, 0x1384aa63a, 0x102f9b8a2, 0x1d82c63da,
+    0x14237f5e6, 0x01c291d04, 0x00d3b6092, 0x09e4addf8,
+    0x0c96cfdc0, 0x0740eef02, 0x18266e456, 0x039d3b296,
+    0x0daece73e, 0x0083a6eec, 0x0ab7aff2a, 0x102f9b8a2,
+    0x1248ea574, 0x1c1733996, 0x083348832, 0x14237f5e6,
+    0x12c743124, 0x02ad91c30, 0x0b9e02b86, 0x00d3b6092,
+    0x018b33a4e, 0x06992cea2, 0x1b331e26a, 0x0c96cfdc0,
+    0x17d35ba46, 0x07e908048, 0x1bf2e8b8a, 0x18266e456,
+    0x1a3e0968a, 0x11ed1f9d8, 0x0ce7f39f4, 0x0daece73e,
+    0x061d82e56, 0x0f1d0f55e, 0x0d270f1a2, 0x0ab7aff2a,
+    0x1c3f5f66c, 0x0a87ab8a8, 0x12ed0daac, 0x1248ea574,
+    0x065863b64, 0x08462d800, 0x11eef4f8e, 0x083348832,
+    0x1ee54f54c, 0x071d111a8, 0x0b3e32c28, 0x12c743124,
+    0x0064f7f26, 0x0ffd852c6, 0x0dd7e3b0c, 0x0b9e02b86,
+    0x0f285651c, 0x0dcb17aa4, 0x010746f3c, 0x018b33a4e,
+    0x1c24afea4, 0x0f37c5aee, 0x0271d9844, 0x1b331e26a,
+    0x08e766a0c, 0x06051d5a2, 0x093a5f730, 0x17d35ba46,
+    0x06cb08e5c, 0x11d5ca20e, 0x06b749fb2, 0x1bf2e8b8a,
+    0x1167f94f2, 0x021f3d99c, 0x0cec3662e, 0x1a3e0968a,
+    0x19329634a, 0x08f158014, 0x0e6fc4e6a, 0x0ce7f39f4,
+    0x08227bb8a, 0x1a5e82106, 0x0b0cd4768, 0x061d82e56,
+    0x13c2b89c4, 0x188815ab2, 0x0d7a4825c, 0x0d270f1a2,
+    0x10f5ff2ba, 0x105405f3e, 0x00167d312, 0x1c3f5f66c,
+    0x0f6076544, 0x0e9adf796, 0x026f6a60a, 0x12ed0daac,
+    0x1a2adb74e, 0x096638b34, 0x19d34af3a, 0x065863b64,
+    0x049c3cc9c, 0x1e50585a0, 0x068bce87a, 0x11eef4f8e,
+    0x1524fa6c6, 0x19f1c69dc, 0x16cba8aca, 0x1ee54f54c,
+    0x042d98888, 0x12913343e, 0x1329d9f7e, 0x0b3e32c28,
+    0x1b1c69528, 0x088f25a3a, 0x02178513a, 0x0064f7f26,
+    0x0e0ac139e, 0x04e36f0b0, 0x0170076fa, 0x0dd7e3b0c,
+    0x141a1a2e2, 0x0bd6f81f8, 0x16ad828b4, 0x0f285651c,
+    0x041d17b64, 0x19425cbba, 0x1fae1cc66, 0x010746f3c,
+    0x1a75b4b00, 0x18db37e8a, 0x0f872e54c, 0x1c24afea4,
+    0x01e41e9fc, 0x04c144932, 0x086d8e4d2, 0x0271d9844,
+    0x160f7af7a, 0x052148f02, 0x05bb8f1bc, 0x08e766a0c,
+    0x0a90fd27a, 0x0a3c6f37a, 0x0b3af077a, 0x093a5f730,
+    0x04984d782, 0x1d22c238e, 0x0ca6ef3ac, 0x06cb08e5c,
+    0x0234e0b26, 0x063ded06a, 0x1d88abd4a, 0x06b749fb2,
+    0x04597456a, 0x04d56973c, 0x0e9e28eb4, 0x1167f94f2,
+    0x07b3ff57a, 0x19385bf2e, 0x0c9c8b782, 0x0cec3662e,
+    0x13a9cba9e, 0x0e417f38a, 0x093e106a4, 0x19329634a,
+    0x167001a9c, 0x14e727980, 0x1ddffc5d4, 0x0e6fc4e6a,
+    0x00df04680, 0x0d104b8fc, 0x02342001e, 0x08227bb8a,
+    0x00a2a8d7e, 0x05b397730, 0x168763fa6, 0x0b0cd4768,
+    0x1ed5a407a, 0x0e78eb416, 0x0d2c3ed1a, 0x13c2b89c4,
+    0x0995a5724, 0x1641378f0, 0x19b1afbc4, 0x0d7a4825c,
+    0x109ffedc0, 0x08d96551c, 0x0f2271e60, 0x10f5ff2ba,
+    0x00b0bf8ca, 0x00bf80dd2, 0x123888b7a, 0x00167d312,
+    0x1e888f7dc, 0x18dcddd1c, 0x002ee03b2, 0x0f6076544,
+    0x183e8d8fe, 0x06a45d2b2, 0x133d7a042, 0x026f6a60a,
+    0x116b0f50c, 0x1dd3e10e8, 0x05fabe670, 0x1a2adb74e,
+    0x130004488, 0x0de87806c, 0x000bcf5f6, 0x19d34af3a,
+    0x18f0c7078, 0x014338754, 0x017f27698, 0x049c3cc9c,
+    0x058ca5f00, 0x15e3e77ee, 0x1af900c24, 0x068bce87a,
+    0x0b5cfca28, 0x0dd07448e, 0x0ded288f8, 0x1524fa6c6,
+    0x059f229bc, 0x1d8048348, 0x06d390dec, 0x16cba8aca,
+    0x037170390, 0x0a3e3e02c, 0x06353c1cc, 0x042d98888,
+    0x0c4584f5c, 0x0d73c7bea, 0x1f16a3418, 0x1329d9f7e,
+    0x0531377e2, 0x185137662, 0x1d8d9ca7c, 0x1b1c69528,
+    0x0b25b29f2, 0x18a08b5bc, 0x19fb2a8b0, 0x02178513a,
+    0x1a08fe6ac, 0x1da758ae0, 0x045cddf4e, 0x0e0ac139e,
+    0x1a91647f2, 0x169cf9eb0, 0x1a0f717c4, 0x0170076fa,
+};
+
+// Compute the crc32c value for buffer smaller than 8
+#ifdef ROCKSDB_UBSAN_RUN
+#if defined(__clang__)
+__attribute__((__no_sanitize__("alignment")))
+#elif defined(__GNUC__)
+__attribute__((__no_sanitize_undefined__))
+#endif
+#endif
+inline void align_to_8(
+    size_t len,
+    uint64_t& crc0, // crc so far, updated on return
+    const unsigned char*& next) { // next data pointer, updated on return
+  uint32_t crc32bit = static_cast<uint32_t>(crc0);
+  if (len & 0x04) {
+    crc32bit = _mm_crc32_u32(crc32bit, *(uint32_t*)next);
+    next += sizeof(uint32_t);
+  }
+  if (len & 0x02) {
+    crc32bit = _mm_crc32_u16(crc32bit, *(uint16_t*)next);
+    next += sizeof(uint16_t);
+  }
+  if (len & 0x01) {
+    crc32bit = _mm_crc32_u8(crc32bit, *(next));
+    next++;
+  }
+  crc0 = crc32bit;
+}
+
+//
+// CombineCRC performs pclmulqdq multiplication of 2 partial CRC's and a well
+// chosen constant and xor's these with the remaining CRC.
+//
+inline uint64_t CombineCRC(
+    size_t block_size,
+    uint64_t crc0,
+    uint64_t crc1,
+    uint64_t crc2,
+    const uint64_t* next2) {
+  const auto multiplier =
+      *(reinterpret_cast<const __m128i*>(clmul_constants) + block_size - 1);
+  const auto crc0_xmm = _mm_set_epi64x(0, crc0);
+  const auto res0 = _mm_clmulepi64_si128(crc0_xmm, multiplier, 0x00);
+  const auto crc1_xmm = _mm_set_epi64x(0, crc1);
+  const auto res1 = _mm_clmulepi64_si128(crc1_xmm, multiplier, 0x10);
+  const auto res = _mm_xor_si128(res0, res1);
+  crc0 = _mm_cvtsi128_si64(res);
+  crc0 = crc0 ^ *((uint64_t*)next2 - 1);
+  crc2 = _mm_crc32_u64(crc2, crc0);
+  return crc2;
+}
+
+// Compute CRC-32C using the Intel hardware instruction.
+#ifdef ROCKSDB_UBSAN_RUN
+#if defined(__clang__)
+__attribute__((__no_sanitize__("alignment")))
+#elif defined(__GNUC__)
+__attribute__((__no_sanitize_undefined__))
+#endif
+#endif
+uint32_t crc32c_3way(uint32_t crc, const char* buf, size_t len) {
+  const unsigned char* next = (const unsigned char*)buf;
+  uint64_t count;
+  uint64_t crc0, crc1, crc2;
+  crc0 = crc ^ 0xffffffffu;
+
+
+  if (len >= 8) {
+    // if len > 216 then align and use triplets
+    if (len > 216) {
+      {
+        // Work on the bytes (< 8) before the first 8-byte alignment addr starts
+        uint64_t align_bytes = (8 - (uintptr_t)next) & 7;
+        len -= align_bytes;
+        align_to_8(align_bytes, crc0, next);
+      }
+
+      // Now work on the remaining blocks
+      count = len / 24; // number of triplets
+      len %= 24; // bytes remaining
+      uint64_t n = count >> 7; // #blocks = first block + full blocks
+      uint64_t block_size = count & 127;
+      if (block_size == 0) {
+        block_size = 128;
+      } else {
+        n++;
+      }
+      // points to the first byte of the next block
+      const uint64_t* next0 = (uint64_t*)next + block_size;
+      const uint64_t* next1 = next0 + block_size;
+      const uint64_t* next2 = next1 + block_size;
+
+      crc1 = crc2 = 0;
+      // Use Duff's device, a for() loop inside a switch()
+      // statement. This needs to execute at least once, round len
+      // down to nearest triplet multiple
+      switch (block_size) {
+        case 128:
+          do {
+            // jumps here for a full block of len 128
+            CRCtriplet(crc, next, -128);
+            FALLTHROUGH_INTENDED;
+            case 127:
+              // jumps here or below for the first block smaller
+              CRCtriplet(crc, next, -127);
+              FALLTHROUGH_INTENDED;
+            case 126:
+              CRCtriplet(crc, next, -126); // than 128
+              FALLTHROUGH_INTENDED;
+            case 125:
+              CRCtriplet(crc, next, -125);
+              FALLTHROUGH_INTENDED;
+            case 124:
+              CRCtriplet(crc, next, -124);
+              FALLTHROUGH_INTENDED;
+            case 123:
+              CRCtriplet(crc, next, -123);
+              FALLTHROUGH_INTENDED;
+            case 122:
+              CRCtriplet(crc, next, -122);
+              FALLTHROUGH_INTENDED;
+            case 121:
+              CRCtriplet(crc, next, -121);
+              FALLTHROUGH_INTENDED;
+            case 120:
+              CRCtriplet(crc, next, -120);
+              FALLTHROUGH_INTENDED;
+            case 119:
+              CRCtriplet(crc, next, -119);
+              FALLTHROUGH_INTENDED;
+            case 118:
+              CRCtriplet(crc, next, -118);
+              FALLTHROUGH_INTENDED;
+            case 117:
+              CRCtriplet(crc, next, -117);
+              FALLTHROUGH_INTENDED;
+            case 116:
+              CRCtriplet(crc, next, -116);
+              FALLTHROUGH_INTENDED;
+            case 115:
+              CRCtriplet(crc, next, -115);
+              FALLTHROUGH_INTENDED;
+            case 114:
+              CRCtriplet(crc, next, -114);
+              FALLTHROUGH_INTENDED;
+            case 113:
+              CRCtriplet(crc, next, -113);
+              FALLTHROUGH_INTENDED;
+            case 112:
+              CRCtriplet(crc, next, -112);
+              FALLTHROUGH_INTENDED;
+            case 111:
+              CRCtriplet(crc, next, -111);
+              FALLTHROUGH_INTENDED;
+            case 110:
+              CRCtriplet(crc, next, -110);
+              FALLTHROUGH_INTENDED;
+            case 109:
+              CRCtriplet(crc, next, -109);
+              FALLTHROUGH_INTENDED;
+            case 108:
+              CRCtriplet(crc, next, -108);
+              FALLTHROUGH_INTENDED;
+            case 107:
+              CRCtriplet(crc, next, -107);
+              FALLTHROUGH_INTENDED;
+            case 106:
+              CRCtriplet(crc, next, -106);
+              FALLTHROUGH_INTENDED;
+            case 105:
+              CRCtriplet(crc, next, -105);
+              FALLTHROUGH_INTENDED;
+            case 104:
+              CRCtriplet(crc, next, -104);
+              FALLTHROUGH_INTENDED;
+            case 103:
+              CRCtriplet(crc, next, -103);
+              FALLTHROUGH_INTENDED;
+            case 102:
+              CRCtriplet(crc, next, -102);
+              FALLTHROUGH_INTENDED;
+            case 101:
+              CRCtriplet(crc, next, -101);
+              FALLTHROUGH_INTENDED;
+            case 100:
+              CRCtriplet(crc, next, -100);
+              FALLTHROUGH_INTENDED;
+            case 99:
+              CRCtriplet(crc, next, -99);
+              FALLTHROUGH_INTENDED;
+            case 98:
+              CRCtriplet(crc, next, -98);
+              FALLTHROUGH_INTENDED;
+            case 97:
+              CRCtriplet(crc, next, -97);
+              FALLTHROUGH_INTENDED;
+            case 96:
+              CRCtriplet(crc, next, -96);
+              FALLTHROUGH_INTENDED;
+            case 95:
+              CRCtriplet(crc, next, -95);
+              FALLTHROUGH_INTENDED;
+            case 94:
+              CRCtriplet(crc, next, -94);
+              FALLTHROUGH_INTENDED;
+            case 93:
+              CRCtriplet(crc, next, -93);
+              FALLTHROUGH_INTENDED;
+            case 92:
+              CRCtriplet(crc, next, -92);
+              FALLTHROUGH_INTENDED;
+            case 91:
+              CRCtriplet(crc, next, -91);
+              FALLTHROUGH_INTENDED;
+            case 90:
+              CRCtriplet(crc, next, -90);
+              FALLTHROUGH_INTENDED;
+            case 89:
+              CRCtriplet(crc, next, -89);
+              FALLTHROUGH_INTENDED;
+            case 88:
+              CRCtriplet(crc, next, -88);
+              FALLTHROUGH_INTENDED;
+            case 87:
+              CRCtriplet(crc, next, -87);
+              FALLTHROUGH_INTENDED;
+            case 86:
+              CRCtriplet(crc, next, -86);
+              FALLTHROUGH_INTENDED;
+            case 85:
+              CRCtriplet(crc, next, -85);
+              FALLTHROUGH_INTENDED;
+            case 84:
+              CRCtriplet(crc, next, -84);
+              FALLTHROUGH_INTENDED;
+            case 83:
+              CRCtriplet(crc, next, -83);
+              FALLTHROUGH_INTENDED;
+            case 82:
+              CRCtriplet(crc, next, -82);
+              FALLTHROUGH_INTENDED;
+            case 81:
+              CRCtriplet(crc, next, -81);
+              FALLTHROUGH_INTENDED;
+            case 80:
+              CRCtriplet(crc, next, -80);
+              FALLTHROUGH_INTENDED;
+            case 79:
+              CRCtriplet(crc, next, -79);
+              FALLTHROUGH_INTENDED;
+            case 78:
+              CRCtriplet(crc, next, -78);
+              FALLTHROUGH_INTENDED;
+            case 77:
+              CRCtriplet(crc, next, -77);
+              FALLTHROUGH_INTENDED;
+            case 76:
+              CRCtriplet(crc, next, -76);
+              FALLTHROUGH_INTENDED;
+            case 75:
+              CRCtriplet(crc, next, -75);
+              FALLTHROUGH_INTENDED;
+            case 74:
+              CRCtriplet(crc, next, -74);
+              FALLTHROUGH_INTENDED;
+            case 73:
+              CRCtriplet(crc, next, -73);
+              FALLTHROUGH_INTENDED;
+            case 72:
+              CRCtriplet(crc, next, -72);
+              FALLTHROUGH_INTENDED;
+            case 71:
+              CRCtriplet(crc, next, -71);
+              FALLTHROUGH_INTENDED;
+            case 70:
+              CRCtriplet(crc, next, -70);
+              FALLTHROUGH_INTENDED;
+            case 69:
+              CRCtriplet(crc, next, -69);
+              FALLTHROUGH_INTENDED;
+            case 68:
+              CRCtriplet(crc, next, -68);
+              FALLTHROUGH_INTENDED;
+            case 67:
+              CRCtriplet(crc, next, -67);
+              FALLTHROUGH_INTENDED;
+            case 66:
+              CRCtriplet(crc, next, -66);
+              FALLTHROUGH_INTENDED;
+            case 65:
+              CRCtriplet(crc, next, -65);
+              FALLTHROUGH_INTENDED;
+            case 64:
+              CRCtriplet(crc, next, -64);
+              FALLTHROUGH_INTENDED;
+            case 63:
+              CRCtriplet(crc, next, -63);
+              FALLTHROUGH_INTENDED;
+            case 62:
+              CRCtriplet(crc, next, -62);
+              FALLTHROUGH_INTENDED;
+            case 61:
+              CRCtriplet(crc, next, -61);
+              FALLTHROUGH_INTENDED;
+            case 60:
+              CRCtriplet(crc, next, -60);
+              FALLTHROUGH_INTENDED;
+            case 59:
+              CRCtriplet(crc, next, -59);
+              FALLTHROUGH_INTENDED;
+            case 58:
+              CRCtriplet(crc, next, -58);
+              FALLTHROUGH_INTENDED;
+            case 57:
+              CRCtriplet(crc, next, -57);
+              FALLTHROUGH_INTENDED;
+            case 56:
+              CRCtriplet(crc, next, -56);
+              FALLTHROUGH_INTENDED;
+            case 55:
+              CRCtriplet(crc, next, -55);
+              FALLTHROUGH_INTENDED;
+            case 54:
+              CRCtriplet(crc, next, -54);
+              FALLTHROUGH_INTENDED;
+            case 53:
+              CRCtriplet(crc, next, -53);
+              FALLTHROUGH_INTENDED;
+            case 52:
+              CRCtriplet(crc, next, -52);
+              FALLTHROUGH_INTENDED;
+            case 51:
+              CRCtriplet(crc, next, -51);
+              FALLTHROUGH_INTENDED;
+            case 50:
+              CRCtriplet(crc, next, -50);
+              FALLTHROUGH_INTENDED;
+            case 49:
+              CRCtriplet(crc, next, -49);
+              FALLTHROUGH_INTENDED;
+            case 48:
+              CRCtriplet(crc, next, -48);
+              FALLTHROUGH_INTENDED;
+            case 47:
+              CRCtriplet(crc, next, -47);
+              FALLTHROUGH_INTENDED;
+            case 46:
+              CRCtriplet(crc, next, -46);
+              FALLTHROUGH_INTENDED;
+            case 45:
+              CRCtriplet(crc, next, -45);
+              FALLTHROUGH_INTENDED;
+            case 44:
+              CRCtriplet(crc, next, -44);
+              FALLTHROUGH_INTENDED;
+            case 43:
+              CRCtriplet(crc, next, -43);
+              FALLTHROUGH_INTENDED;
+            case 42:
+              CRCtriplet(crc, next, -42);
+              FALLTHROUGH_INTENDED;
+            case 41:
+              CRCtriplet(crc, next, -41);
+              FALLTHROUGH_INTENDED;
+            case 40:
+              CRCtriplet(crc, next, -40);
+              FALLTHROUGH_INTENDED;
+            case 39:
+              CRCtriplet(crc, next, -39);
+              FALLTHROUGH_INTENDED;
+            case 38:
+              CRCtriplet(crc, next, -38);
+              FALLTHROUGH_INTENDED;
+            case 37:
+              CRCtriplet(crc, next, -37);
+              FALLTHROUGH_INTENDED;
+            case 36:
+              CRCtriplet(crc, next, -36);
+              FALLTHROUGH_INTENDED;
+            case 35:
+              CRCtriplet(crc, next, -35);
+              FALLTHROUGH_INTENDED;
+            case 34:
+              CRCtriplet(crc, next, -34);
+              FALLTHROUGH_INTENDED;
+            case 33:
+              CRCtriplet(crc, next, -33);
+              FALLTHROUGH_INTENDED;
+            case 32:
+              CRCtriplet(crc, next, -32);
+              FALLTHROUGH_INTENDED;
+            case 31:
+              CRCtriplet(crc, next, -31);
+              FALLTHROUGH_INTENDED;
+            case 30:
+              CRCtriplet(crc, next, -30);
+              FALLTHROUGH_INTENDED;
+            case 29:
+              CRCtriplet(crc, next, -29);
+              FALLTHROUGH_INTENDED;
+            case 28:
+              CRCtriplet(crc, next, -28);
+              FALLTHROUGH_INTENDED;
+            case 27:
+              CRCtriplet(crc, next, -27);
+              FALLTHROUGH_INTENDED;
+            case 26:
+              CRCtriplet(crc, next, -26);
+              FALLTHROUGH_INTENDED;
+            case 25:
+              CRCtriplet(crc, next, -25);
+              FALLTHROUGH_INTENDED;
+            case 24:
+              CRCtriplet(crc, next, -24);
+              FALLTHROUGH_INTENDED;
+            case 23:
+              CRCtriplet(crc, next, -23);
+              FALLTHROUGH_INTENDED;
+            case 22:
+              CRCtriplet(crc, next, -22);
+              FALLTHROUGH_INTENDED;
+            case 21:
+              CRCtriplet(crc, next, -21);
+              FALLTHROUGH_INTENDED;
+            case 20:
+              CRCtriplet(crc, next, -20);
+              FALLTHROUGH_INTENDED;
+            case 19:
+              CRCtriplet(crc, next, -19);
+              FALLTHROUGH_INTENDED;
+            case 18:
+              CRCtriplet(crc, next, -18);
+              FALLTHROUGH_INTENDED;
+            case 17:
+              CRCtriplet(crc, next, -17);
+              FALLTHROUGH_INTENDED;
+            case 16:
+              CRCtriplet(crc, next, -16);
+              FALLTHROUGH_INTENDED;
+            case 15:
+              CRCtriplet(crc, next, -15);
+              FALLTHROUGH_INTENDED;
+            case 14:
+              CRCtriplet(crc, next, -14);
+              FALLTHROUGH_INTENDED;
+            case 13:
+              CRCtriplet(crc, next, -13);
+              FALLTHROUGH_INTENDED;
+            case 12:
+              CRCtriplet(crc, next, -12);
+              FALLTHROUGH_INTENDED;
+            case 11:
+              CRCtriplet(crc, next, -11);
+              FALLTHROUGH_INTENDED;
+            case 10:
+              CRCtriplet(crc, next, -10);
+              FALLTHROUGH_INTENDED;
+            case 9:
+              CRCtriplet(crc, next, -9);
+              FALLTHROUGH_INTENDED;
+            case 8:
+              CRCtriplet(crc, next, -8);
+              FALLTHROUGH_INTENDED;
+            case 7:
+              CRCtriplet(crc, next, -7);
+              FALLTHROUGH_INTENDED;
+            case 6:
+              CRCtriplet(crc, next, -6);
+              FALLTHROUGH_INTENDED;
+            case 5:
+              CRCtriplet(crc, next, -5);
+              FALLTHROUGH_INTENDED;
+            case 4:
+              CRCtriplet(crc, next, -4);
+              FALLTHROUGH_INTENDED;
+            case 3:
+              CRCtriplet(crc, next, -3);
+              FALLTHROUGH_INTENDED;
+            case 2:
+              CRCtriplet(crc, next, -2);
+              FALLTHROUGH_INTENDED;
+            case 1:
+              CRCduplet(crc, next, -1); // the final triplet is actually only 2
+              //{ CombineCRC(); }
+              crc0 = CombineCRC(block_size, crc0, crc1, crc2, next2);
+              if (--n > 0) {
+                crc1 = crc2 = 0;
+                block_size = 128;
+                // points to the first byte of the next block
+                next0 = next2 + 128;
+                next1 = next0 + 128; // from here on all blocks are 128 long
+                next2 = next1 + 128;
+              }
+              FALLTHROUGH_INTENDED;
+            case 0:;
+          } while (n > 0);
+      }
+      next = (const unsigned char*)next2;
+    }
+    uint64_t count2 = len >> 3; // 216 of less bytes is 27 or less singlets
+    len = len & 7;
+    next += (count2 * 8);
+    switch (count2) {
+      case 27:
+        CRCsinglet(crc0, next, -27 * 8);
+        FALLTHROUGH_INTENDED;
+      case 26:
+        CRCsinglet(crc0, next, -26 * 8);
+        FALLTHROUGH_INTENDED;
+      case 25:
+        CRCsinglet(crc0, next, -25 * 8);
+        FALLTHROUGH_INTENDED;
+      case 24:
+        CRCsinglet(crc0, next, -24 * 8);
+        FALLTHROUGH_INTENDED;
+      case 23:
+        CRCsinglet(crc0, next, -23 * 8);
+        FALLTHROUGH_INTENDED;
+      case 22:
+        CRCsinglet(crc0, next, -22 * 8);
+        FALLTHROUGH_INTENDED;
+      case 21:
+        CRCsinglet(crc0, next, -21 * 8);
+        FALLTHROUGH_INTENDED;
+      case 20:
+        CRCsinglet(crc0, next, -20 * 8);
+        FALLTHROUGH_INTENDED;
+      case 19:
+        CRCsinglet(crc0, next, -19 * 8);
+        FALLTHROUGH_INTENDED;
+      case 18:
+        CRCsinglet(crc0, next, -18 * 8);
+        FALLTHROUGH_INTENDED;
+      case 17:
+        CRCsinglet(crc0, next, -17 * 8);
+        FALLTHROUGH_INTENDED;
+      case 16:
+        CRCsinglet(crc0, next, -16 * 8);
+        FALLTHROUGH_INTENDED;
+      case 15:
+        CRCsinglet(crc0, next, -15 * 8);
+        FALLTHROUGH_INTENDED;
+      case 14:
+        CRCsinglet(crc0, next, -14 * 8);
+        FALLTHROUGH_INTENDED;
+      case 13:
+        CRCsinglet(crc0, next, -13 * 8);
+        FALLTHROUGH_INTENDED;
+      case 12:
+        CRCsinglet(crc0, next, -12 * 8);
+        FALLTHROUGH_INTENDED;
+      case 11:
+        CRCsinglet(crc0, next, -11 * 8);
+        FALLTHROUGH_INTENDED;
+      case 10:
+        CRCsinglet(crc0, next, -10 * 8);
+        FALLTHROUGH_INTENDED;
+      case 9:
+        CRCsinglet(crc0, next, -9 * 8);
+        FALLTHROUGH_INTENDED;
+      case 8:
+        CRCsinglet(crc0, next, -8 * 8);
+        FALLTHROUGH_INTENDED;
+      case 7:
+        CRCsinglet(crc0, next, -7 * 8);
+        FALLTHROUGH_INTENDED;
+      case 6:
+        CRCsinglet(crc0, next, -6 * 8);
+        FALLTHROUGH_INTENDED;
+      case 5:
+        CRCsinglet(crc0, next, -5 * 8);
+        FALLTHROUGH_INTENDED;
+      case 4:
+        CRCsinglet(crc0, next, -4 * 8);
+        FALLTHROUGH_INTENDED;
+      case 3:
+        CRCsinglet(crc0, next, -3 * 8);
+        FALLTHROUGH_INTENDED;
+      case 2:
+        CRCsinglet(crc0, next, -2 * 8);
+        FALLTHROUGH_INTENDED;
+      case 1:
+        CRCsinglet(crc0, next, -1 * 8);
+        FALLTHROUGH_INTENDED;
+      case 0:;
+    }
+  }
+  {
+    align_to_8(len, crc0, next);
+    return (uint32_t)crc0 ^ 0xffffffffu;
+  }
+}
+
+#endif //__SSE4_2__ && __PCLMUL__
+
+static inline Function Choose_Extend() {
+#ifdef HAVE_POWER8
+  return isAltiVec() ? ExtendPPCImpl : ExtendImpl<DefaultCRC32>;
+#elif defined(HAVE_ARM64_CRC)
+  if(crc32c_runtime_check()) {
+    pmull_runtime_flag = crc32c_pmull_runtime_check();
+    return ExtendARMImpl;
+  } else {
+    return ExtendImpl<DefaultCRC32>;
+  }
+#elif defined(__SSE4_2__) && defined(__PCLMUL__) && !defined NO_THREEWAY_CRC32C
+  // NOTE: runtime detection no longer supported on x86
+  (void)ExtendImpl<DefaultCRC32>;  // suppress unused warning
+  return crc32c_3way;
+#else
+  return ExtendImpl<DefaultCRC32>;
+#endif
+}
+
+static Function ChosenExtend = Choose_Extend();
+uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
+  return ChosenExtend(crc, buf, size);
+}
+
+// The code for crc32c combine, copied with permission from folly
+
+// Standard galois-field multiply.  The only modification is that a,
+// b, m, and p are all bit-reflected.
+//
+// https://en.wikipedia.org/wiki/Finite_field_arithmetic
+static constexpr uint32_t gf_multiply_sw_1(
+    size_t i, uint32_t p, uint32_t a, uint32_t b, uint32_t m) {
+  // clang-format off
+  return i == 32 ? p : gf_multiply_sw_1(
+      /* i = */ i + 1,
+      /* p = */ p ^ ((0u-((b >> 31) & 1)) & a),
+      /* a = */ (a >> 1) ^ ((0u-(a & 1)) & m),
+      /* b = */ b << 1,
+      /* m = */ m);
+  // clang-format on
+}
+static constexpr uint32_t gf_multiply_sw(uint32_t a, uint32_t b, uint32_t m) {
+  return gf_multiply_sw_1(/* i = */ 0, /* p = */ 0, a, b, m);
+}
+
+static constexpr uint32_t gf_square_sw(uint32_t a, uint32_t m) {
+  return gf_multiply_sw(a, a, m);
+}
+
+template <size_t i, uint32_t m>
+struct gf_powers_memo {
+  static constexpr uint32_t value =
+      gf_square_sw(gf_powers_memo<i - 1, m>::value, m);
+};
+template <uint32_t m>
+struct gf_powers_memo<0, m> {
+  static constexpr uint32_t value = m;
+};
+
+template <typename T, T... Ints>
+struct integer_sequence {
+  using value_type = T;
+  static constexpr size_t size() { return sizeof...(Ints); }
+};
+
+template <typename T, std::size_t N, T... Is>
+struct make_integer_sequence : make_integer_sequence<T, N - 1, N - 1, Is...> {};
+
+template <typename T, T... Is>
+struct make_integer_sequence<T, 0, Is...> : integer_sequence<T, Is...> {};
+
+template <std::size_t N>
+using make_index_sequence = make_integer_sequence<std::size_t, N>;
+
+template <uint32_t m>
+struct gf_powers_make {
+  template <size_t... i>
+  using index_sequence = integer_sequence<size_t, i...>;
+  template <size_t... i>
+  constexpr std::array<uint32_t, sizeof...(i)> operator()(
+      index_sequence<i...>) const {
+    return std::array<uint32_t, sizeof...(i)>{{gf_powers_memo<i, m>::value...}};
+  }
+};
+
+static constexpr uint32_t crc32c_m = 0x82f63b78;
+
+static constexpr std::array<uint32_t, 62> const crc32c_powers =
+    gf_powers_make<crc32c_m>{}(make_index_sequence<62>{});
+
+// Expects a "pure" crc (see Crc32cCombine)
+static uint32_t Crc32AppendZeroes(
+    uint32_t crc, size_t len_over_4, uint32_t polynomial,
+    std::array<uint32_t, 62> const& powers_array) {
+  auto powers = powers_array.data();
+  // Append by multiplying by consecutive powers of two of the zeroes
+  // array
+  size_t len_bits = len_over_4;
+
+  while (len_bits) {
+    // Advance directly to next bit set.
+    auto r = CountTrailingZeroBits(len_bits);
+    len_bits >>= r;
+    powers += r;
+
+    crc = gf_multiply_sw(crc, *powers, polynomial);
+
+    len_bits >>= 1;
+    powers++;
+  }
+
+  return crc;
+}
+
+static inline uint32_t InvertedToPure(uint32_t crc) { return ~crc; }
+
+static inline uint32_t PureToInverted(uint32_t crc) { return ~crc; }
+
+static inline uint32_t PureExtend(uint32_t crc, const char* buf, size_t size) {
+  return InvertedToPure(Extend(PureToInverted(crc), buf, size));
+}
+
+// Background:
+// RocksDB uses two kinds of crc32c values: masked and unmasked. Neither is
+// a "pure" CRC because a pure CRC satisfies (^ for xor)
+//  crc(a ^ b) = crc(a) ^ crc(b)
+// The unmasked is closest, and this function takes unmasked crc32c values.
+// The unmasked values are impure in two ways:
+// * The initial setting at the start of CRC computation is all 1 bits
+// (like -1) instead of zero.
+// * The result has all bits invered.
+// Note that together, these result in the empty string having a crc32c of
+// zero. See
+// https://en.wikipedia.org/wiki/Computation_of_cyclic_redundancy_checks#CRC_variants
+//
+// Simplified version of strategy, using xor through pure CRCs (+ for concat):
+//
+// pure_crc(str1 + str2) = pure_crc(str1 + zeros(len(str2))) ^
+//                         pure_crc(zeros(len(str1)) + str2)
+//
+// because the xor of these two zero-padded strings is str1 + str2. For pure
+// CRC, leading zeros don't affect the result, so we only need
+//
+// pure_crc(str1 + str2) = pure_crc(str1 + zeros(len(str2))) ^
+//                         pure_crc(str2)
+//
+// Considering we aren't working with pure CRCs, what is actually in the input?
+//
+// crc1 = PureToInverted(PureExtendCrc32c(-1, zeros, crc1len) ^
+//                       PureCrc32c(str1, crc1len))
+// crc2 = PureToInverted(PureExtendCrc32c(-1, zeros, crc2len) ^
+//                       PureCrc32c(str2, crc2len))
+//
+// The result we want to compute is
+// combined = PureToInverted(PureExtendCrc32c(PureExtendCrc32c(-1, zeros,
+//                                                             crc1len) ^
+//                                            PureCrc32c(str1, crc1len),
+//                                            zeros, crc2len) ^
+//                           PureCrc32c(str2, crc2len))
+//
+// Thus, in addition to extending crc1 over the length of str2 in (virtual)
+// zeros, we need to cancel out the -1 initializer that was used in computing
+// crc2. To cancel it out, we also need to extend it over crc2len in zeros.
+// To simplify, since the end of str1 and that -1 initializer for crc2 are at
+// the same logical position, we can combine them before we extend over the
+// zeros.
+uint32_t Crc32cCombine(uint32_t crc1, uint32_t crc2, size_t crc2len) {
+  uint32_t pure_crc1_with_init = InvertedToPure(crc1);
+  uint32_t pure_crc2_with_init = InvertedToPure(crc2);
+  uint32_t pure_crc2_init = static_cast<uint32_t>(-1);
+
+  // Append up to 32 bits of zeroes in the normal way
+  char zeros[4] = {0, 0, 0, 0};
+  auto len = crc2len & 3;
+  uint32_t tmp = pure_crc1_with_init ^ pure_crc2_init;
+  if (len) {
+    tmp = PureExtend(tmp, zeros, len);
+  }
+  return PureToInverted(
+      Crc32AppendZeroes(tmp, crc2len / 4, crc32c_m, crc32c_powers) ^
+      pure_crc2_with_init);
+}
+
+}  // namespace crc32c
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c.h
new file mode 100644
index 0000000000..a08ad60af3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c.h
@@ -0,0 +1,56 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace crc32c {
+
+extern std::string IsFastCrc32Supported();
+
+// Return the crc32c of concat(A, data[0,n-1]) where init_crc is the
+// crc32c of some string A.  Extend() is often used to maintain the
+// crc32c of a stream of data.
+extern uint32_t Extend(uint32_t init_crc, const char* data, size_t n);
+
+// Takes two unmasked crc32c values, and the length of the string from
+// which `crc2` was computed, and computes a crc32c value for the
+// concatenation of the original two input strings. Running time is
+// ~ log(crc2len).
+extern uint32_t Crc32cCombine(uint32_t crc1, uint32_t crc2, size_t crc2len);
+
+// Return the crc32c of data[0,n-1]
+inline uint32_t Value(const char* data, size_t n) { return Extend(0, data, n); }
+
+static const uint32_t kMaskDelta = 0xa282ead8ul;
+
+// Return a masked representation of crc.
+//
+// Motivation: it is problematic to compute the CRC of a string that
+// contains embedded CRCs.  Therefore we recommend that CRCs stored
+// somewhere (e.g., in files) should be masked before being stored.
+inline uint32_t Mask(uint32_t crc) {
+  // Rotate right by 15 bits and add a constant.
+  return ((crc >> 15) | (crc << 17)) + kMaskDelta;
+}
+
+// Return the crc whose masked representation is masked_crc.
+inline uint32_t Unmask(uint32_t masked_crc) {
+  uint32_t rot = masked_crc - kMaskDelta;
+  return ((rot >> 17) | (rot << 15));
+}
+
+}  // namespace crc32c
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_arm64.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_arm64.cc
new file mode 100644
index 0000000000..98d1c307db
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_arm64.cc
@@ -0,0 +1,213 @@
+//  Copyright (c) 2018, Arm Limited and affiliates. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/crc32c_arm64.h"
+
+#if defined(HAVE_ARM64_CRC)
+
+#if defined(__linux__)
+#include <asm/hwcap.h>
+#endif
+#ifdef ROCKSDB_AUXV_GETAUXVAL_PRESENT
+#include <sys/auxv.h>
+#endif
+#ifndef HWCAP_CRC32
+#define HWCAP_CRC32 (1 << 7)
+#endif
+#ifndef HWCAP_PMULL
+#define HWCAP_PMULL (1 << 4)
+#endif
+#if defined(__APPLE__)
+#include <sys/sysctl.h>
+#endif
+#if defined(__OpenBSD__)
+#include <machine/armreg.h>
+#include <machine/cpu.h>
+#include <sys/sysctl.h>
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_ARM64_CRYPTO
+/* unfolding to compute 8 * 3 = 24 bytes parallelly */
+#define CRC32C24BYTES(ITR)                                    \
+  crc1 = crc32c_u64(crc1, *(buf64 + BLK_LENGTH + (ITR)));     \
+  crc2 = crc32c_u64(crc2, *(buf64 + BLK_LENGTH * 2 + (ITR))); \
+  crc0 = crc32c_u64(crc0, *(buf64 + (ITR)));
+
+/* unfolding to compute 24 * 7 = 168 bytes parallelly */
+#define CRC32C7X24BYTES(ITR)   \
+  do {                         \
+    CRC32C24BYTES((ITR)*7 + 0) \
+    CRC32C24BYTES((ITR)*7 + 1) \
+    CRC32C24BYTES((ITR)*7 + 2) \
+    CRC32C24BYTES((ITR)*7 + 3) \
+    CRC32C24BYTES((ITR)*7 + 4) \
+    CRC32C24BYTES((ITR)*7 + 5) \
+    CRC32C24BYTES((ITR)*7 + 6) \
+  } while (0)
+#endif
+
+extern bool pmull_runtime_flag;
+
+uint32_t crc32c_runtime_check(void) {
+#if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT) || defined(__FreeBSD__)
+  uint64_t auxv = 0;
+#if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT)
+  auxv = getauxval(AT_HWCAP);
+#elif defined(__FreeBSD__)
+  elf_aux_info(AT_HWCAP, &auxv, sizeof(auxv));
+#endif
+  return (auxv & HWCAP_CRC32) != 0;
+#elif defined(__APPLE__)
+  int r;
+  size_t l = sizeof(r);
+  if (sysctlbyname("hw.optional.armv8_crc32", &r, &l, NULL, 0) == -1) return 0;
+  return r == 1;
+#elif defined(__OpenBSD__)
+  int r = 0;
+  const int isar0_mib[] = {CTL_MACHDEP, CPU_ID_AA64ISAR0};
+  uint64_t isar0;
+  size_t len = sizeof(isar0);
+
+  if (sysctl(isar0_mib, 2, &isar0, &len, NULL, 0) != -1) {
+    if (ID_AA64ISAR0_CRC32(isar0) >= ID_AA64ISAR0_CRC32_BASE) r = 1;
+  }
+  return r;
+#else
+  return 0;
+#endif
+}
+
+bool crc32c_pmull_runtime_check(void) {
+#if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT) || defined(__FreeBSD__)
+  uint64_t auxv = 0;
+#if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT)
+  auxv = getauxval(AT_HWCAP);
+#elif defined(__FreeBSD__)
+  elf_aux_info(AT_HWCAP, &auxv, sizeof(auxv));
+#endif
+  return (auxv & HWCAP_PMULL) != 0;
+#elif defined(__APPLE__)
+  return true;
+#elif defined(__OpenBSD__)
+  bool r = false;
+  const int isar0_mib[] = {CTL_MACHDEP, CPU_ID_AA64ISAR0};
+  uint64_t isar0;
+  size_t len = sizeof(isar0);
+
+  if (sysctl(isar0_mib, 2, &isar0, &len, NULL, 0) != -1) {
+    if (ID_AA64ISAR0_AES(isar0) >= ID_AA64ISAR0_AES_PMULL) r = true;
+  }
+  return r;
+#else
+  return false;
+#endif
+}
+
+#ifdef ROCKSDB_UBSAN_RUN
+#if defined(__clang__)
+__attribute__((__no_sanitize__("alignment")))
+#elif defined(__GNUC__)
+__attribute__((__no_sanitize_undefined__))
+#endif
+#endif
+uint32_t
+crc32c_arm64(uint32_t crc, unsigned char const *data, size_t len) {
+  const uint8_t *buf8;
+  const uint64_t *buf64 = (uint64_t *)data;
+  int length = (int)len;
+  crc ^= 0xffffffff;
+
+  /*
+   * Pmull runtime check here.
+   * Raspberry Pi supports crc32 but doesn't support pmull.
+   * Skip Crc32c Parallel computation if no crypto extension available.
+   */
+  if (pmull_runtime_flag) {
+/* Macro (HAVE_ARM64_CRYPTO) is used for compiling check  */
+#ifdef HAVE_ARM64_CRYPTO
+/* Crc32c Parallel computation
+ *   Algorithm comes from Intel whitepaper:
+ *   crc-iscsi-polynomial-crc32-instruction-paper
+ *
+ * Input data is divided into three equal-sized blocks
+ *   Three parallel blocks (crc0, crc1, crc2) for 1024 Bytes
+ *   One Block: 42(BLK_LENGTH) * 8(step length: crc32c_u64) bytes
+ */
+#define BLK_LENGTH 42
+    while (length >= 1024) {
+      uint64_t t0, t1;
+      uint32_t crc0 = 0, crc1 = 0, crc2 = 0;
+
+      /* Parallel Param:
+       *   k0 = CRC32(x ^ (42 * 8 * 8 * 2 - 1));
+       *   k1 = CRC32(x ^ (42 * 8 * 8 - 1));
+       */
+      uint32_t k0 = 0xe417f38a, k1 = 0x8f158014;
+
+      /* Prefetch data for following block to avoid cache miss */
+      PREF1KL1((uint8_t *)buf64, 1024);
+
+      /* First 8 byte for better pipelining */
+      crc0 = crc32c_u64(crc, *buf64++);
+
+      /* 3 blocks crc32c parallel computation
+       * Macro unfolding to compute parallelly
+       * 168 * 6 = 1008 (bytes)
+       */
+      CRC32C7X24BYTES(0);
+      CRC32C7X24BYTES(1);
+      CRC32C7X24BYTES(2);
+      CRC32C7X24BYTES(3);
+      CRC32C7X24BYTES(4);
+      CRC32C7X24BYTES(5);
+      buf64 += (BLK_LENGTH * 3);
+
+      /* Last 8 bytes */
+      crc = crc32c_u64(crc2, *buf64++);
+
+      t0 = (uint64_t)vmull_p64(crc0, k0);
+      t1 = (uint64_t)vmull_p64(crc1, k1);
+
+      /* Merge (crc0, crc1, crc2) -> crc */
+      crc1 = crc32c_u64(0, t1);
+      crc ^= crc1;
+      crc0 = crc32c_u64(0, t0);
+      crc ^= crc0;
+
+      length -= 1024;
+    }
+
+    if (length == 0) return crc ^ (0xffffffffU);
+#endif
+  }  // if Pmull runtime check here
+
+  buf8 = (const uint8_t *)buf64;
+  while (length >= 8) {
+    crc = crc32c_u64(crc, *(const uint64_t *)buf8);
+    buf8 += 8;
+    length -= 8;
+  }
+
+  /* The following is more efficient than the straight loop */
+  if (length >= 4) {
+    crc = crc32c_u32(crc, *(const uint32_t *)buf8);
+    buf8 += 4;
+    length -= 4;
+  }
+
+  if (length >= 2) {
+    crc = crc32c_u16(crc, *(const uint16_t *)buf8);
+    buf8 += 2;
+    length -= 2;
+  }
+
+  if (length >= 1) crc = crc32c_u8(crc, *buf8);
+
+  crc ^= 0xffffffff;
+  return crc;
+}
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_arm64.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_arm64.h
new file mode 100644
index 0000000000..4b27fe8710
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_arm64.h
@@ -0,0 +1,52 @@
+//  Copyright (c) 2018, Arm Limited and affiliates. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#ifndef UTIL_CRC32C_ARM64_H
+#define UTIL_CRC32C_ARM64_H
+
+#include <cinttypes>
+#include <cstddef>
+
+#if defined(__aarch64__) || defined(__AARCH64__)
+
+#ifdef __ARM_FEATURE_CRC32
+#define HAVE_ARM64_CRC
+#include <arm_acle.h>
+#define crc32c_u8(crc, v) __crc32cb(crc, v)
+#define crc32c_u16(crc, v) __crc32ch(crc, v)
+#define crc32c_u32(crc, v) __crc32cw(crc, v)
+#define crc32c_u64(crc, v) __crc32cd(crc, v)
+// clang-format off
+#define PREF4X64L1(buffer, PREF_OFFSET, ITR)                \
+  __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]" ::[v] "r"(buffer), \
+          [c] "I"((PREF_OFFSET) + ((ITR) + 0) * 64));       \
+  __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]" ::[v] "r"(buffer), \
+          [c] "I"((PREF_OFFSET) + ((ITR) + 1) * 64));       \
+  __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]" ::[v] "r"(buffer), \
+          [c] "I"((PREF_OFFSET) + ((ITR) + 2) * 64));       \
+  __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]" ::[v] "r"(buffer), \
+          [c] "I"((PREF_OFFSET) + ((ITR) + 3) * 64));
+// clang-format on
+
+#define PREF1KL1(buffer, PREF_OFFSET)  \
+  PREF4X64L1(buffer, (PREF_OFFSET), 0) \
+  PREF4X64L1(buffer, (PREF_OFFSET), 4) \
+  PREF4X64L1(buffer, (PREF_OFFSET), 8) \
+  PREF4X64L1(buffer, (PREF_OFFSET), 12)
+
+extern uint32_t crc32c_arm64(uint32_t crc, unsigned char const *data,
+                             size_t len);
+extern uint32_t crc32c_runtime_check(void);
+extern bool crc32c_pmull_runtime_check(void);
+
+#ifdef __ARM_FEATURE_CRYPTO
+#define HAVE_ARM64_CRYPTO
+#include <arm_neon.h>
+#endif  // __ARM_FEATURE_CRYPTO
+#endif  // __ARM_FEATURE_CRC32
+
+#endif  // defined(__aarch64__) || defined(__AARCH64__)
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_ppc.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_ppc.h
new file mode 100644
index 0000000000..f0b0b66d5d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_ppc.h
@@ -0,0 +1,22 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  Copyright (c) 2017 International Business Machines Corp.
+//  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern uint32_t crc32c_ppc(uint32_t crc, unsigned char const *buffer,
+                           size_t len);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_ppc_constants.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_ppc_constants.h
new file mode 100644
index 0000000000..f6494cd01c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/crc32c_ppc_constants.h
@@ -0,0 +1,900 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  Copyright (C) 2015, 2017 International Business Machines Corp.
+//  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#define CRC 0x1edc6f41
+#define REFLECT
+#define CRC_XOR
+
+#ifndef __ASSEMBLY__
+#ifdef CRC_TABLE
+static const unsigned int crc_table[] = {
+    0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c,
+    0x26a1e7e8, 0xd4ca64eb, 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
+    0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, 0x105ec76f, 0xe235446c,
+    0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
+    0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc,
+    0xbc267848, 0x4e4dfb4b, 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
+    0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, 0xaa64d611, 0x580f5512,
+    0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
+    0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad,
+    0x1642ae59, 0xe4292d5a, 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
+    0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, 0x417b1dbc, 0xb3109ebf,
+    0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
+    0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f,
+    0xed03a29b, 0x1f682198, 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
+    0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, 0xdbfc821c, 0x2997011f,
+    0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
+    0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e,
+    0x4767748a, 0xb50cf789, 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
+    0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, 0x7198540d, 0x83f3d70e,
+    0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
+    0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de,
+    0xdde0eb2a, 0x2f8b6829, 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
+    0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, 0x082f63b7, 0xfa44e0b4,
+    0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
+    0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b,
+    0xb4091bff, 0x466298fc, 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
+    0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, 0xa24bb5a6, 0x502036a5,
+    0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
+    0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975,
+    0x0e330a81, 0xfc588982, 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
+    0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, 0x38cc2a06, 0xcaa7a905,
+    0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
+    0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8,
+    0xe52cc12c, 0x1747422f, 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
+    0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, 0xd3d3e1ab, 0x21b862a8,
+    0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
+    0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78,
+    0x7fab5e8c, 0x8dc0dd8f, 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
+    0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, 0x69e9f0d5, 0x9b8273d6,
+    0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
+    0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69,
+    0xd5cf889d, 0x27a40b9e, 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
+    0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
+};
+
+#endif
+
+#else
+#define MAX_SIZE 32768
+.constants :
+
+        /* Reduce 262144 kbits to 1024 bits */
+        /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
+        .octa 0x00000000b6ca9e20000000009c37c408
+
+        /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
+        .octa 0x00000000350249a800000001b51df26c
+
+        /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
+        .octa 0x00000001862dac54000000000724b9d0
+
+        /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
+        .octa 0x00000001d87fb48c00000001c00532fe
+
+        /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
+        .octa 0x00000001f39b699e00000000f05a9362
+
+        /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
+        .octa 0x0000000101da11b400000001e1007970
+
+        /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
+        .octa 0x00000001cab571e000000000a57366ee
+
+        /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
+        .octa 0x00000000c7020cfe0000000192011284
+
+        /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
+        .octa 0x00000000cdaed1ae0000000162716d9a
+
+        /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
+        .octa 0x00000001e804effc00000000cd97ecde
+
+        /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
+        .octa 0x0000000077c3ea3a0000000058812bc0
+
+        /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
+        .octa 0x0000000068df31b40000000088b8c12e
+
+        /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
+        .octa 0x00000000b059b6c200000001230b234c
+
+        /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
+        .octa 0x0000000145fb8ed800000001120b416e
+
+        /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
+        .octa 0x00000000cbc0916800000001974aecb0
+
+        /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
+        .octa 0x000000005ceeedc2000000008ee3f226
+
+        /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
+        .octa 0x0000000047d74e8600000001089aba9a
+
+        /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
+        .octa 0x00000001407e9e220000000065113872
+
+        /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
+        .octa 0x00000001da967bda000000005c07ec10
+
+        /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
+        .octa 0x000000006c8983680000000187590924
+
+        /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
+        .octa 0x00000000f2d14c9800000000e35da7c6
+
+        /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
+        .octa 0x00000001993c6ad4000000000415855a
+
+        /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
+        .octa 0x000000014683d1ac0000000073617758
+
+        /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
+        .octa 0x00000001a7c93e6c0000000176021d28
+
+        /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
+        .octa 0x000000010211e90a00000001c358fd0a
+
+        /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
+        .octa 0x000000001119403e00000001ff7a2c18
+
+        /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
+        .octa 0x000000001c3261aa00000000f2d9f7e4
+
+        /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
+        .octa 0x000000014e37a634000000016cf1f9c8
+
+        /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
+        .octa 0x0000000073786c0c000000010af9279a
+
+        /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
+        .octa 0x000000011dc037f80000000004f101e8
+
+        /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
+        .octa 0x0000000031433dfc0000000070bcf184
+
+        /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
+        .octa 0x000000009cde8348000000000a8de642
+
+        /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
+        .octa 0x0000000038d3c2a60000000062ea130c
+
+        /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
+        .octa 0x000000011b25f26000000001eb31cbb2
+
+        /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
+        .octa 0x000000001629e6f00000000170783448
+
+        /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
+        .octa 0x0000000160838b4c00000001a684b4c6
+
+        /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
+        .octa 0x000000007a44011c00000000253ca5b4
+
+        /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
+        .octa 0x00000000226f417a0000000057b4b1e2
+
+        /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
+        .octa 0x0000000045eb2eb400000000b6bd084c
+
+        /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
+        .octa 0x000000014459d70c0000000123c2d592
+
+        /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
+        .octa 0x00000001d406ed8200000000159dafce
+
+        /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
+        .octa 0x0000000160c8e1a80000000127e1a64e
+
+        /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
+        .octa 0x0000000027ba80980000000056860754
+
+        /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
+        .octa 0x000000006d92d01800000001e661aae8
+
+        /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
+        .octa 0x000000012ed7e3f200000000f82c6166
+
+        /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
+        .octa 0x000000002dc8778800000000c4f9c7ae
+
+        /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
+        .octa 0x0000000018240bb80000000074203d20
+
+        /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
+        .octa 0x000000001ad381580000000198173052
+
+        /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
+        .octa 0x00000001396b78f200000001ce8aba54
+
+        /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
+        .octa 0x000000011a68133400000001850d5d94
+
+        /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
+        .octa 0x000000012104732e00000001d609239c
+
+        /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
+        .octa 0x00000000a140d90c000000001595f048
+
+        /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
+        .octa 0x00000001b7215eda0000000042ccee08
+
+        /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
+        .octa 0x00000001aaf1df3c000000010a389d74
+
+        /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
+        .octa 0x0000000029d15b8a000000012a840da6
+
+        /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
+        .octa 0x00000000f1a96922000000001d181c0c
+
+        /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
+        .octa 0x00000001ac80d03c0000000068b7d1f6
+
+        /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
+        .octa 0x000000000f11d56a000000005b0f14fc
+
+        /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
+        .octa 0x00000001f1c022a20000000179e9e730
+
+        /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
+        .octa 0x0000000173d00ae200000001ce1368d6
+
+        /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
+        .octa 0x00000001d4ffe4ac0000000112c3a84c
+
+        /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
+        .octa 0x000000016edc5ae400000000de940fee
+
+        /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
+        .octa 0x00000001f1a0214000000000fe896b7e
+
+        /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
+        .octa 0x00000000ca0b28a000000001f797431c
+
+        /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
+        .octa 0x00000001928e30a20000000053e989ba
+
+        /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
+        .octa 0x0000000097b1b002000000003920cd16
+
+        /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
+        .octa 0x00000000b15bf90600000001e6f579b8
+
+        /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
+        .octa 0x00000000411c5d52000000007493cb0a
+
+        /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
+        .octa 0x00000001c36f330000000001bdd376d8
+
+        /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
+        .octa 0x00000001119227e0000000016badfee6
+
+        /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
+        .octa 0x00000000114d47020000000071de5c58
+
+        /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
+        .octa 0x00000000458b5b9800000000453f317c
+
+        /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
+        .octa 0x000000012e31fb8e0000000121675cce
+
+        /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
+        .octa 0x000000005cf619d800000001f409ee92
+
+        /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
+        .octa 0x0000000063f4d8b200000000f36b9c88
+
+        /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
+        .octa 0x000000004138dc8a0000000036b398f4
+
+        /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
+        .octa 0x00000001d29ee8e000000001748f9adc
+
+        /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
+        .octa 0x000000006a08ace800000001be94ec00
+
+        /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
+        .octa 0x0000000127d4201000000000b74370d6
+
+        /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
+        .octa 0x0000000019d76b6200000001174d0b98
+
+        /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
+        .octa 0x00000001b1471f6e00000000befc06a4
+
+        /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
+        .octa 0x00000001f64c19cc00000001ae125288
+
+        /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
+        .octa 0x00000000003c0ea00000000095c19b34
+
+        /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
+        .octa 0x000000014d73abf600000001a78496f2
+
+        /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
+        .octa 0x00000001620eb84400000001ac5390a0
+
+        /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
+        .octa 0x0000000147655048000000002a80ed6e
+
+        /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
+        .octa 0x0000000067b5077e00000001fa9b0128
+
+        /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
+        .octa 0x0000000010ffe20600000001ea94929e
+
+        /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
+        .octa 0x000000000fee8f1e0000000125f4305c
+
+        /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
+        .octa 0x00000001da26fbae00000001471e2002
+
+        /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
+        .octa 0x00000001b3a8bd880000000132d2253a
+
+        /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
+        .octa 0x00000000e8f3898e00000000f26b3592
+
+        /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
+        .octa 0x00000000b0d0d28c00000000bc8b67b0
+
+        /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
+        .octa 0x0000000030f2a798000000013a826ef2
+
+        /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
+        .octa 0x000000000fba10020000000081482c84
+
+        /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
+        .octa 0x00000000bdb9bd7200000000e77307c2
+
+        /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
+        .octa 0x0000000075d3bf5a00000000d4a07ec8
+
+        /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
+        .octa 0x00000000ef1f98a00000000017102100
+
+        /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
+        .octa 0x00000000689c760200000000db406486
+
+        /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
+        .octa 0x000000016d5fa5fe0000000192db7f88
+
+        /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
+        .octa 0x00000001d0d2b9ca000000018bf67b1e
+
+        /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
+        .octa 0x0000000041e7b470000000007c09163e
+
+        /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
+        .octa 0x00000001cbb6495e000000000adac060
+
+        /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
+        .octa 0x000000010052a0b000000000bd8316ae
+
+        /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
+        .octa 0x00000001d8effb5c000000019f09ab54
+
+        /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
+        .octa 0x00000001d969853c0000000125155542
+
+        /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
+        .octa 0x00000000523ccce2000000018fdb5882
+
+        /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
+        .octa 0x000000001e2436bc00000000e794b3f4
+
+        /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
+        .octa 0x00000000ddd1c3a2000000016f9bb022
+
+        /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
+        .octa 0x0000000019fcfe3800000000290c9978
+
+        /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
+        .octa 0x00000001ce95db640000000083c0f350
+
+        /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
+        .octa 0x00000000af5828060000000173ea6628
+
+        /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
+        .octa 0x00000001006388f600000001c8b4e00a
+
+        /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
+        .octa 0x0000000179eca00a00000000de95d6aa
+
+        /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
+        .octa 0x0000000122410a6a000000010b7f7248
+
+        /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
+        .octa 0x000000004288e87c00000001326e3a06
+
+        /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
+        .octa 0x000000016c5490da00000000bb62c2e6
+
+        /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
+        .octa 0x00000000d1c71f6e0000000156a4b2c2
+
+        /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
+        .octa 0x00000001b4ce08a6000000011dfe763a
+
+        /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
+        .octa 0x00000001466ba60c000000007bcca8e2
+
+        /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
+        .octa 0x00000001f6c488a40000000186118faa
+
+        /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
+        .octa 0x000000013bfb06820000000111a65a88
+
+        /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
+        .octa 0x00000000690e9e54000000003565e1c4
+
+        /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
+        .octa 0x00000000281346b6000000012ed02a82
+
+        /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
+        .octa 0x000000015646402400000000c486ecfc
+
+        /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
+        .octa 0x000000016063a8dc0000000001b951b2
+
+        /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
+        .octa 0x0000000116a663620000000048143916
+
+        /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
+        .octa 0x000000017e8aa4d200000001dc2ae124
+
+        /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
+        .octa 0x00000001728eb10c00000001416c58d6
+
+        /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
+        .octa 0x00000001b08fd7fa00000000a479744a
+
+        /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
+        .octa 0x00000001092a16e80000000096ca3a26
+
+        /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
+        .octa 0x00000000a505637c00000000ff223d4e
+
+        /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
+        .octa 0x00000000d94869b2000000010e84da42
+
+        /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
+        .octa 0x00000001c8b203ae00000001b61ba3d0
+
+        /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
+        .octa 0x000000005704aea000000000680f2de8
+
+        /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
+        .octa 0x000000012e295fa2000000008772a9a8
+
+        /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
+        .octa 0x000000011d0908bc0000000155f295bc
+
+        /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
+        .octa 0x0000000193ed97ea00000000595f9282
+
+        /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
+        .octa 0x000000013a0f1c520000000164b1c25a
+
+        /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
+        .octa 0x000000010c2c40c000000000fbd67c50
+
+        /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
+        .octa 0x00000000ff6fac3e0000000096076268
+
+        /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
+        .octa 0x000000017b3609c000000001d288e4cc
+
+        /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
+        .octa 0x0000000088c8c92200000001eaac1bdc
+
+        /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
+        .octa 0x00000001751baae600000001f1ea39e2
+
+        /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
+        .octa 0x000000010795297200000001eb6506fc
+
+        /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
+        .octa 0x0000000162b00abe000000010f806ffe
+
+        /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
+        .octa 0x000000000d7b404c000000010408481e
+
+        /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
+        .octa 0x00000000763b13d40000000188260534
+
+        /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
+        .octa 0x00000000f6dc22d80000000058fc73e0
+
+        /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
+        .octa 0x000000007daae06000000000391c59b8
+
+        /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
+        .octa 0x000000013359ab7c000000018b638400
+
+        /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
+        .octa 0x000000008add438a000000011738f5c4
+
+        /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
+        .octa 0x00000001edbefdea000000008cf7c6da
+
+        /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
+        .octa 0x000000004104e0f800000001ef97fb16
+
+        /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
+        .octa 0x00000000b48a82220000000102130e20
+
+        /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
+        .octa 0x00000001bcb4684400000000db968898
+
+        /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
+        .octa 0x000000013293ce0a00000000b5047b5e
+
+        /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
+        .octa 0x00000001710d0844000000010b90fdb2
+
+        /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
+        .octa 0x0000000117907f6e000000004834a32e
+
+        /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
+        .octa 0x0000000087ddf93e0000000059c8f2b0
+
+        /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
+        .octa 0x000000005970e9b00000000122cec508
+
+        /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
+        .octa 0x0000000185b2b7d0000000000a330cda
+
+        /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
+        .octa 0x00000001dcee0efc000000014a47148c
+
+        /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
+        .octa 0x0000000030da27220000000042c61cb8
+
+        /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
+        .octa 0x000000012f925a180000000012fe6960
+
+        /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
+        .octa 0x00000000dd2e357c00000000dbda2c20
+
+        /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
+        .octa 0x00000000071c80de000000011122410c
+
+        /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
+        .octa 0x000000011513140a00000000977b2070
+
+        /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
+        .octa 0x00000001df876e8e000000014050438e
+
+        /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
+        .octa 0x000000015f81d6ce0000000147c840e8
+
+        /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
+        .octa 0x000000019dd94dbe00000001cc7c88ce
+
+        /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
+        .octa 0x00000001373d206e00000001476b35a4
+
+        /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
+        .octa 0x00000000668ccade000000013d52d508
+
+        /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
+        .octa 0x00000001b192d268000000008e4be32e
+
+        /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
+        .octa 0x00000000e30f3a7800000000024120fe
+
+        /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
+        .octa 0x000000010ef1f7bc00000000ddecddb4
+
+        /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
+        .octa 0x00000001f5ac738000000000d4d403bc
+
+        /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
+        .octa 0x000000011822ea7000000001734b89aa
+
+        /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
+        .octa 0x00000000c3a33848000000010e7a58d6
+
+        /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
+        .octa 0x00000001bd151c2400000001f9f04e9c
+
+        /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
+        .octa 0x0000000056002d7600000000b692225e
+
+        /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
+        .octa 0x000000014657c4f4000000019b8d3f3e
+
+        /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
+        .octa 0x0000000113742d7c00000001a874f11e
+
+        /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
+        .octa 0x000000019c5920ba000000010d5a4254
+
+        /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
+        .octa 0x000000005216d2d600000000bbb2f5d6
+
+        /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
+        .octa 0x0000000136f5ad8a0000000179cc0e36
+
+        /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
+        .octa 0x000000018b07beb600000001dca1da4a
+
+        /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
+        .octa 0x00000000db1e93b000000000feb1a192
+
+        /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
+        .octa 0x000000000b96fa3a00000000d1eeedd6
+
+        /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
+        .octa 0x00000001d9968af0000000008fad9bb4
+
+        /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
+        .octa 0x000000000e4a77a200000001884938e4
+
+        /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
+        .octa 0x00000000508c2ac800000001bc2e9bc0
+
+        /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
+        .octa 0x0000000021572a8000000001f9658a68
+
+        /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
+        .octa 0x00000001b859daf2000000001b9224fc
+
+        /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
+        .octa 0x000000016f7884740000000055b2fb84
+
+        /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
+        .octa 0x00000001b438810e000000018b090348
+
+        /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
+        .octa 0x0000000095ddc6f2000000011ccbd5ea
+
+        /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
+        .octa 0x00000001d977c20c0000000007ae47f8
+
+        /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
+        .octa 0x00000000ebedb99a0000000172acbec0
+
+        /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
+        .octa 0x00000001df9e9e9200000001c6e3ff20
+
+        /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
+        .octa 0x00000001a4a3f95200000000e1b38744
+
+        /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
+        .octa 0x00000000e2f5122000000000791585b2
+
+        /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
+        .octa 0x000000004aa01f3e00000000ac53b894
+
+        /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
+        .octa 0x00000000b3e90a5800000001ed5f2cf4
+
+        /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
+        .octa 0x000000000c9ca2aa00000001df48b2e0
+
+        /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
+        .octa 0x000000015168231600000000049c1c62
+
+        /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
+        .octa 0x0000000036fce78c000000017c460c12
+
+        /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
+        .octa 0x000000009037dc10000000015be4da7e
+
+        /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
+        .octa 0x00000000d3298582000000010f38f668
+
+        /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
+        .octa 0x00000001b42e8ad60000000039f40a00
+
+        /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
+        .octa 0x00000000142a983800000000bd4c10c4
+
+        /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
+        .octa 0x0000000109c7f1900000000042db1d98
+
+        /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
+        .octa 0x0000000056ff931000000001c905bae6
+
+        /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
+        .octa 0x00000001594513aa00000000069d40ea
+
+        /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
+        .octa 0x00000001e3b5b1e8000000008e4fbad0
+
+        /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
+        .octa 0x000000011dd5fc080000000047bedd46
+
+        /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
+        .octa 0x00000001675f0cc20000000026396bf8
+
+        /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
+        .octa 0x00000000d1c8dd4400000000379beb92
+
+        /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
+        .octa 0x0000000115ebd3d8000000000abae54a
+
+        /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
+        .octa 0x00000001ecbd0dac0000000007e6a128
+
+        /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
+        .octa 0x00000000cdf67af2000000000ade29d2
+
+        /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
+        .octa 0x000000004c01ff4c00000000f974c45c
+
+        /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
+        .octa 0x00000000f2d8657e00000000e77ac60a
+
+        /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
+        .octa 0x000000006bae74c40000000145895816
+
+        /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
+        .octa 0x0000000152af8aa00000000038e362be
+
+        /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
+        .octa 0x0000000004663802000000007f991a64
+
+        /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
+        .octa 0x00000001ab2f5afc00000000fa366d3a
+
+        /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
+        .octa 0x0000000074a4ebd400000001a2bb34f0
+
+        /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
+        .octa 0x00000001d7ab3a4c0000000028a9981e
+
+        /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
+        .octa 0x00000001a8da60c600000001dbc672be
+
+        /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
+        .octa 0x000000013cf6382000000000b04d77f6
+
+        /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
+        .octa 0x00000000bec12e1e0000000124400d96
+
+        /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
+        .octa 0x00000001c6368010000000014ca4b414
+
+        /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
+        .octa 0x00000001e6e78758000000012fe2c938
+
+        /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
+        .octa 0x000000008d7f2b3c00000001faed01e6
+
+        /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
+        .octa 0x000000016b4a156e000000007e80ecfe
+
+        /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
+        .octa 0x00000001c63cfeb60000000098daee94
+
+        /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
+        .octa 0x000000015f902670000000010a04edea
+
+        /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
+        .octa 0x00000001cd5de11e00000001c00b4524
+
+        /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
+        .octa 0x000000001acaec540000000170296550
+
+        /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
+        .octa 0x000000002bd0ca780000000181afaa48
+
+        /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
+        .octa 0x0000000032d63d5c0000000185a31ffa
+
+        /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
+        .octa 0x000000001c6d4e4c000000002469f608
+
+        /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
+        .octa 0x0000000106a60b92000000006980102a
+
+        /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
+        .octa 0x00000000d3855e120000000111ea9ca8
+
+        /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
+        .octa 0x00000000e312563600000001bd1d29ce
+
+        /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
+        .octa 0x000000009e8f7ea400000001b34b9580
+
+        /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
+        .octa 0x00000001c82e562c000000003076054e
+
+        /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
+        .octa 0x00000000ca9f09ce000000012a608ea4
+
+        /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
+        .octa 0x00000000c63764e600000000784d05fe
+
+        /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
+        .octa 0x0000000168d2e49e000000016ef0d82a
+
+        /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
+        .octa 0x00000000e986c1480000000075bda454
+
+        /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
+        .octa 0x00000000cfb65894000000003dc0a1c4
+
+        /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
+        .octa 0x0000000111cadee400000000e9a5d8be
+
+        /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
+        .octa 0x0000000171fb63ce00000001609bc4b4
+
+        .short_constants :
+
+        /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include
+           the trailing 32 bits of zeros */
+        /* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod
+           p(x)` */
+        .octa 0x7fec2963e5bf80485cf015c388e56f72
+
+        /* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod
+           p(x)` */
+        .octa 0x38e888d4844752a9963a18920246e2e6
+
+        /* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod
+           p(x)` */
+        .octa 0x42316c00730206ad419a441956993a31
+
+        /* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod
+           p(x)` */
+        .octa 0x543d5c543e65ddf9924752ba2b830011
+
+        /* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod
+           p(x)` */
+        .octa 0x78e87aaf56767c9255bd7f9518e4a304
+
+        /* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod
+           p(x)` */
+        .octa 0x8f68fcec1903da7f6d76739fe0553f1e
+
+        /* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod
+           p(x)` */
+        .octa 0x3f4840246791d588c133722b1fe0b5c3
+
+        /* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod
+           p(x)` */
+        .octa 0x34c96751b04de25a64b67ee0e55ef1f3
+
+        /* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)`
+         */
+        .octa 0x156c8e180b4a395b069db049b8fdb1e7
+
+        /* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */
+        .octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e
+
+        /* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */
+        .octa 0x041d37768cd75659817cdc5119b29a35
+
+        /* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */
+        .octa 0x3a0777818cfaa9651ce9d94b36c41f1c
+
+        /* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */
+        .octa 0x0e148e8252377a554f256efcb82be955
+
+        /* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */
+        .octa 0x9c25531d19e65ddeec1631edb2dea967
+
+        /* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */
+        .octa 0x790606ff9957c0a65d27e147510ac59a
+
+        /* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */
+        .octa 0x82f63b786ea2d55ca66805eb18b8ea18
+
+        .barrett_constants :
+        /* 33 bit reflected Barrett constant m - (4^32)/n */
+        .octa 0x000000000000000000000000dea713f1 /* x^64 div p(x)` */
+        /* 33 bit reflected Barrett constant n */
+        .octa 0x00000000000000000000000105ec76f1
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/data_structure.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/data_structure.cc
new file mode 100644
index 0000000000..d647df5d5b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/data_structure.cc
@@ -0,0 +1,18 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/data_structure.h"
+
+#include "util/math.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace detail {
+
+int CountTrailingZeroBitsForSmallEnumSet(uint64_t v) {
+  return CountTrailingZeroBits(v);
+}
+
+}  // namespace detail
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/defer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/defer.h
new file mode 100644
index 0000000000..f71e67ba9c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/defer.h
@@ -0,0 +1,82 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <functional>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Defers the execution of the provided function until the Defer
+// object goes out of scope.
+//
+// Usage example:
+//
+// Status DeferTest() {
+//   Status s;
+//   Defer defer([&s]() {
+//     if (!s.ok()) {
+//       // do cleanups ...
+//     }
+//   });
+//   // do something ...
+//   if (!s.ok()) return;
+//   // do some other things ...
+//   return s;
+// }
+//
+// The above code ensures that cleanups will always happen on returning.
+//
+// Without the help of Defer, you can
+// 1. every time when !s.ok(), do the cleanup;
+// 2. instead of returning when !s.ok(), continue the work only when s.ok(),
+//    but sometimes, this might lead to nested blocks of "if (s.ok()) {...}".
+//
+// With the help of Defer, you can centralize the cleanup logic inside the
+// lambda passed to Defer, and you can return immediately on failure when
+// necessary.
+class Defer final {
+ public:
+  explicit Defer(std::function<void()>&& fn) : fn_(std::move(fn)) {}
+  ~Defer() { fn_(); }
+
+  // Disallow copy.
+  Defer(const Defer&) = delete;
+  Defer& operator=(const Defer&) = delete;
+
+ private:
+  std::function<void()> fn_;
+};
+
+// An RAII utility object that saves the current value of an object so that
+// it can be overwritten, and restores it to the saved value when the
+// SaveAndRestore object goes out of scope.
+template <typename T>
+class SaveAndRestore {
+ public:
+  // obj is non-null pointer to value to be saved and later restored.
+  explicit SaveAndRestore(T* obj) : obj_(obj), saved_(*obj) {}
+  // new_value is stored in *obj
+  SaveAndRestore(T* obj, const T& new_value)
+      : obj_(obj), saved_(std::move(*obj)) {
+    *obj = new_value;
+  }
+  SaveAndRestore(T* obj, T&& new_value) : obj_(obj), saved_(std::move(*obj)) {
+    *obj = std::move(new_value);
+  }
+  ~SaveAndRestore() { *obj_ = std::move(saved_); }
+
+  // No copies
+  SaveAndRestore(const SaveAndRestore&) = delete;
+  SaveAndRestore& operator=(const SaveAndRestore&) = delete;
+
+ private:
+  T* const obj_;
+  T saved_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/distributed_mutex.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/distributed_mutex.h
new file mode 100644
index 0000000000..1734269cc9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/distributed_mutex.h
@@ -0,0 +1,50 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+
+// This file declares a wrapper around the efficient folly DistributedMutex
+// that falls back on a standard mutex when not available. See
+// https://github.com/facebook/folly/blob/main/folly/synchronization/DistributedMutex.h
+// for benefits and limitations.
+
+// At the moment, only scoped locking is supported using DMutexLock
+// RAII wrapper, because lock/unlock APIs will vary.
+
+#ifdef USE_FOLLY
+
+#include <folly/synchronization/DistributedMutex.h>
+
+namespace ROCKSDB_NAMESPACE {
+
+class DMutex : public folly::DistributedMutex {
+ public:
+  static const char* kName() { return "folly::DistributedMutex"; }
+
+  explicit DMutex(bool IGNORED_adaptive = false) { (void)IGNORED_adaptive; }
+
+  // currently no-op
+  void AssertHeld() {}
+};
+using DMutexLock = std::lock_guard<folly::DistributedMutex>;
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#else
+
+#include <mutex>
+
+#include "port/port.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+using DMutex = port::Mutex;
+using DMutexLock = std::lock_guard<DMutex>;
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/duplicate_detector.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/duplicate_detector.h
new file mode 100644
index 0000000000..aa42e950e4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/duplicate_detector.h
@@ -0,0 +1,69 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstdint>
+
+#include "db/db_impl/db_impl.h"
+#include "logging/logging.h"
+#include "util/set_comparator.h"
+
+namespace ROCKSDB_NAMESPACE {
+// During recovery if the memtable is flushed we cannot rely on its help on
+// duplicate key detection and as key insert will not be attempted. This class
+// will be used as a emulator of memtable to tell if insertion of a key/seq
+// would have resulted in duplication.
+class DuplicateDetector {
+ public:
+  explicit DuplicateDetector(DBImpl* db) : db_(db) {}
+  bool IsDuplicateKeySeq(uint32_t cf, const Slice& key, SequenceNumber seq) {
+    assert(seq >= batch_seq_);
+    if (batch_seq_ != seq) {  // it is a new batch
+      keys_.clear();
+    }
+    batch_seq_ = seq;
+    CFKeys& cf_keys = keys_[cf];
+    if (cf_keys.size() == 0) {  // just inserted
+      InitWithComp(cf);
+    }
+    auto it = cf_keys.insert(key);
+    if (it.second == false) {  // second is false if a element already existed.
+      keys_.clear();
+      InitWithComp(cf);
+      keys_[cf].insert(key);
+      return true;
+    }
+    return false;
+  }
+
+ private:
+  SequenceNumber batch_seq_ = 0;
+  DBImpl* db_;
+  using CFKeys = std::set<Slice, SetComparator>;
+  std::map<uint32_t, CFKeys> keys_;
+  void InitWithComp(const uint32_t cf) {
+    auto h = db_->GetColumnFamilyHandle(cf);
+    if (!h) {
+      // TODO(myabandeh): This is not a concern in MyRocks as drop cf is not
+      // implemented yet. When it does, we should return proper error instead
+      // of throwing exception.
+      ROCKS_LOG_FATAL(
+          db_->immutable_db_options().info_log,
+          "Recovering an entry from the dropped column family %" PRIu32
+          ". WAL must must have been emptied before dropping the column "
+          "family",
+          cf);
+      throw std::runtime_error(
+          "Recovering an entry from a dropped column family. "
+          "WAL must must have been flushed before dropping the column "
+          "family");
+      return;
+    }
+    auto cmp = h->GetComparator();
+    keys_[cf] = CFKeys(SetComparator(cmp));
+  }
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/dynamic_bloom.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/dynamic_bloom.cc
new file mode 100644
index 0000000000..0ff3b4a758
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/dynamic_bloom.cc
@@ -0,0 +1,70 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "dynamic_bloom.h"
+
+#include <algorithm>
+
+#include "memory/allocator.h"
+#include "port/port.h"
+#include "rocksdb/slice.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+uint32_t roundUpToPow2(uint32_t x) {
+  uint32_t rv = 1;
+  while (rv < x) {
+    rv <<= 1;
+  }
+  return rv;
+}
+}  // namespace
+
+DynamicBloom::DynamicBloom(Allocator* allocator, uint32_t total_bits,
+                           uint32_t num_probes, size_t huge_page_tlb_size,
+                           Logger* logger)
+    // Round down, except round up with 1
+    : kNumDoubleProbes((num_probes + (num_probes == 1)) / 2) {
+  assert(num_probes % 2 == 0);  // limitation of current implementation
+  assert(num_probes <= 10);     // limitation of current implementation
+  assert(kNumDoubleProbes > 0);
+
+  // Determine how much to round off + align by so that x ^ i (that's xor) is
+  // a valid u64 index if x is a valid u64 index and 0 <= i < kNumDoubleProbes.
+  uint32_t block_bytes = /*bytes/u64*/ 8 *
+                         /*u64s*/ std::max(1U, roundUpToPow2(kNumDoubleProbes));
+  uint32_t block_bits = block_bytes * 8;
+  uint32_t blocks = (total_bits + block_bits - 1) / block_bits;
+  uint32_t sz = blocks * block_bytes;
+  kLen = sz / /*bytes/u64*/ 8;
+  assert(kLen > 0);
+#ifndef NDEBUG
+  for (uint32_t i = 0; i < kNumDoubleProbes; ++i) {
+    // Ensure probes starting at last word are in range
+    assert(((kLen - 1) ^ i) < kLen);
+  }
+#endif
+
+  // Padding to correct for allocation not originally aligned on block_bytes
+  // boundary
+  sz += block_bytes - 1;
+  assert(allocator);
+
+  char* raw = allocator->AllocateAligned(sz, huge_page_tlb_size, logger);
+  memset(raw, 0, sz);
+  auto block_offset = reinterpret_cast<uintptr_t>(raw) % block_bytes;
+  if (block_offset > 0) {
+    // Align on block_bytes boundary
+    raw += block_bytes - block_offset;
+  }
+  static_assert(sizeof(std::atomic<uint64_t>) == sizeof(uint64_t),
+                "Expecting zero-space-overhead atomic");
+  data_ = reinterpret_cast<std::atomic<uint64_t>*>(raw);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/dynamic_bloom.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/dynamic_bloom.h
new file mode 100644
index 0000000000..40cd294044
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/dynamic_bloom.h
@@ -0,0 +1,214 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <memory>
+#include <string>
+
+#include "port/port.h"
+#include "rocksdb/slice.h"
+#include "table/multiget_context.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+class Allocator;
+class Logger;
+
+// A Bloom filter intended only to be used in memory, never serialized in a way
+// that could lead to schema incompatibility. Supports opt-in lock-free
+// concurrent access.
+//
+// This implementation is also intended for applications generally preferring
+// speed vs. maximum accuracy: roughly 0.9x BF op latency for 1.1x FP rate.
+// For 1% FP rate, that means that the latency of a look-up triggered by an FP
+// should be less than roughly 100x the cost of a Bloom filter op.
+//
+// For simplicity and performance, the current implementation requires
+// num_probes to be a multiple of two and <= 10.
+//
+class DynamicBloom {
+ public:
+  // allocator: pass allocator to bloom filter, hence trace the usage of memory
+  // total_bits: fixed total bits for the bloom
+  // num_probes: number of hash probes for a single key
+  // hash_func:  customized hash function
+  // huge_page_tlb_size:  if >0, try to allocate bloom bytes from huge page TLB
+  //                      within this page size. Need to reserve huge pages for
+  //                      it to be allocated, like:
+  //                         sysctl -w vm.nr_hugepages=20
+  //                     See linux doc Documentation/vm/hugetlbpage.txt
+  explicit DynamicBloom(Allocator* allocator, uint32_t total_bits,
+                        uint32_t num_probes = 6, size_t huge_page_tlb_size = 0,
+                        Logger* logger = nullptr);
+
+  ~DynamicBloom() {}
+
+  // Assuming single threaded access to this function.
+  void Add(const Slice& key);
+
+  // Like Add, but may be called concurrent with other functions.
+  void AddConcurrently(const Slice& key);
+
+  // Assuming single threaded access to this function.
+  void AddHash(uint32_t hash);
+
+  // Like AddHash, but may be called concurrent with other functions.
+  void AddHashConcurrently(uint32_t hash);
+
+  // Multithreaded access to this function is OK
+  bool MayContain(const Slice& key) const;
+
+  void MayContain(int num_keys, Slice* keys, bool* may_match) const;
+
+  // Multithreaded access to this function is OK
+  bool MayContainHash(uint32_t hash) const;
+
+  void Prefetch(uint32_t h);
+
+ private:
+  // Length of the structure, in 64-bit words. For this structure, "word"
+  // will always refer to 64-bit words.
+  uint32_t kLen;
+  // We make the k probes in pairs, two for each 64-bit read/write. Thus,
+  // this stores k/2, the number of words to double-probe.
+  const uint32_t kNumDoubleProbes;
+
+  std::atomic<uint64_t>* data_;
+
+  // or_func(ptr, mask) should effect *ptr |= mask with the appropriate
+  // concurrency safety, working with bytes.
+  template <typename OrFunc>
+  void AddHash(uint32_t hash, const OrFunc& or_func);
+
+  bool DoubleProbe(uint32_t h32, size_t a) const;
+};
+
+inline void DynamicBloom::Add(const Slice& key) { AddHash(BloomHash(key)); }
+
+inline void DynamicBloom::AddConcurrently(const Slice& key) {
+  AddHashConcurrently(BloomHash(key));
+}
+
+inline void DynamicBloom::AddHash(uint32_t hash) {
+  AddHash(hash, [](std::atomic<uint64_t>* ptr, uint64_t mask) {
+    ptr->store(ptr->load(std::memory_order_relaxed) | mask,
+               std::memory_order_relaxed);
+  });
+}
+
+inline void DynamicBloom::AddHashConcurrently(uint32_t hash) {
+  AddHash(hash, [](std::atomic<uint64_t>* ptr, uint64_t mask) {
+    // Happens-before between AddHash and MaybeContains is handled by
+    // access to versions_->LastSequence(), so all we have to do here is
+    // avoid races (so we don't give the compiler a license to mess up
+    // our code) and not lose bits.  std::memory_order_relaxed is enough
+    // for that.
+    if ((mask & ptr->load(std::memory_order_relaxed)) != mask) {
+      ptr->fetch_or(mask, std::memory_order_relaxed);
+    }
+  });
+}
+
+inline bool DynamicBloom::MayContain(const Slice& key) const {
+  return (MayContainHash(BloomHash(key)));
+}
+
+inline void DynamicBloom::MayContain(int num_keys, Slice* keys,
+                                     bool* may_match) const {
+  std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
+  std::array<size_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
+  for (int i = 0; i < num_keys; ++i) {
+    hashes[i] = BloomHash(keys[i]);
+    size_t a = FastRange32(kLen, hashes[i]);
+    PREFETCH(data_ + a, 0, 3);
+    byte_offsets[i] = a;
+  }
+
+  for (int i = 0; i < num_keys; i++) {
+    may_match[i] = DoubleProbe(hashes[i], byte_offsets[i]);
+  }
+}
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+// local variable is initialized but not referenced
+#pragma warning(disable : 4189)
+#endif
+inline void DynamicBloom::Prefetch(uint32_t h32) {
+  size_t a = FastRange32(kLen, h32);
+  PREFETCH(data_ + a, 0, 3);
+}
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+// Speed hacks in this implementation:
+// * Uses fastrange instead of %
+// * Minimum logic to determine first (and all) probed memory addresses.
+//   (Uses constant bit-xor offsets from the starting probe address.)
+// * (Major) Two probes per 64-bit memory fetch/write.
+//   Code simplification / optimization: only allow even number of probes.
+// * Very fast and effective (murmur-like) hash expansion/re-mixing. (At
+// least on recent CPUs, integer multiplication is very cheap. Each 64-bit
+// remix provides five pairs of bit addresses within a uint64_t.)
+//   Code simplification / optimization: only allow up to 10 probes, from a
+//   single 64-bit remix.
+//
+// The FP rate penalty for this implementation, vs. standard Bloom filter, is
+// roughly 1.12x on top of the 1.15x penalty for a 512-bit cache-local Bloom.
+// This implementation does not explicitly use the cache line size, but is
+// effectively cache-local (up to 16 probes) because of the bit-xor offsetting.
+//
+// NB: could easily be upgraded to support a 64-bit hash and
+// total_bits > 2^32 (512MB). (The latter is a bad idea without the former,
+// because of false positives.)
+
+inline bool DynamicBloom::MayContainHash(uint32_t h32) const {
+  size_t a = FastRange32(kLen, h32);
+  PREFETCH(data_ + a, 0, 3);
+  return DoubleProbe(h32, a);
+}
+
+inline bool DynamicBloom::DoubleProbe(uint32_t h32, size_t byte_offset) const {
+  // Expand/remix with 64-bit golden ratio
+  uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
+  for (unsigned i = 0;; ++i) {
+    // Two bit probes per uint64_t probe
+    uint64_t mask =
+        ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
+    uint64_t val = data_[byte_offset ^ i].load(std::memory_order_relaxed);
+    if (i + 1 >= kNumDoubleProbes) {
+      return (val & mask) == mask;
+    } else if ((val & mask) != mask) {
+      return false;
+    }
+    h = (h >> 12) | (h << 52);
+  }
+}
+
+template <typename OrFunc>
+inline void DynamicBloom::AddHash(uint32_t h32, const OrFunc& or_func) {
+  size_t a = FastRange32(kLen, h32);
+  PREFETCH(data_ + a, 0, 3);
+  // Expand/remix with 64-bit golden ratio
+  uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
+  for (unsigned i = 0;; ++i) {
+    // Two bit probes per uint64_t probe
+    uint64_t mask =
+        ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
+    or_func(&data_[a ^ i], mask);
+    if (i + 1 >= kNumDoubleProbes) {
+      return;
+    }
+    h = (h >> 12) | (h << 52);
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/fastrange.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/fastrange.h
new file mode 100644
index 0000000000..a70a980f67
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/fastrange.h
@@ -0,0 +1,114 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// fastrange/FastRange: A faster alternative to % for mapping a hash value
+// to an arbitrary range. See https://github.com/lemire/fastrange
+//
+// Generally recommended are FastRange32 for mapping results of 32-bit
+// hash functions and FastRange64 for mapping results of 64-bit hash
+// functions. FastRange is less forgiving than % if the input hashes are
+// not well distributed over the full range of the type (32 or 64 bits).
+//
+// Also included is a templated implementation FastRangeGeneric for use
+// in generic algorithms, but not otherwise recommended because of
+// potential ambiguity. Unlike with %, it is critical to use the right
+// FastRange variant for the output size of your hash function.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+#ifdef TEST_UINT128_COMPAT
+#undef HAVE_UINT128_EXTENSION
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace detail {
+
+// Using a class template to support partial specialization
+template <typename Hash, typename Range>
+struct FastRangeGenericImpl {
+  // only reach this on no supported specialization
+};
+
+template <typename Range>
+struct FastRangeGenericImpl<uint32_t, Range> {
+  static inline Range Fn(uint32_t hash, Range range) {
+    static_assert(std::is_unsigned<Range>::value, "must be unsigned");
+    static_assert(sizeof(Range) <= sizeof(uint32_t),
+                  "cannot be larger than hash (32 bits)");
+
+    uint64_t product = uint64_t{range} * hash;
+    return static_cast<Range>(product >> 32);
+  }
+};
+
+template <typename Range>
+struct FastRangeGenericImpl<uint64_t, Range> {
+  static inline Range Fn(uint64_t hash, Range range) {
+    static_assert(std::is_unsigned<Range>::value, "must be unsigned");
+    static_assert(sizeof(Range) <= sizeof(uint64_t),
+                  "cannot be larger than hash (64 bits)");
+
+#ifdef HAVE_UINT128_EXTENSION
+    // Can use compiler's 128-bit type. Trust it to do the right thing.
+    __uint128_t wide = __uint128_t{range} * hash;
+    return static_cast<Range>(wide >> 64);
+#else
+    // Fall back: full decomposition.
+    // NOTE: GCC seems to fully understand this code as 64-bit x 64-bit
+    // -> 128-bit multiplication and optimize it appropriately
+    uint64_t range64 = range;  // ok to shift by 32, even if Range is 32-bit
+    uint64_t tmp = uint64_t{range64 & 0xffffFFFF} * uint64_t{hash & 0xffffFFFF};
+    tmp >>= 32;
+    tmp += uint64_t{range64 & 0xffffFFFF} * uint64_t{hash >> 32};
+    // Avoid overflow: first add lower 32 of tmp2, and later upper 32
+    uint64_t tmp2 = uint64_t{range64 >> 32} * uint64_t{hash & 0xffffFFFF};
+    tmp += static_cast<uint32_t>(tmp2);
+    tmp >>= 32;
+    tmp += (tmp2 >> 32);
+    tmp += uint64_t{range64 >> 32} * uint64_t{hash >> 32};
+    return static_cast<Range>(tmp);
+#endif
+  }
+};
+
+}  // namespace detail
+
+// Now an omnibus templated function (yay parameter inference).
+//
+// NOTICE:
+// This templated version is not recommended for typical use because
+// of the potential to mix a 64-bit FastRange with a 32-bit bit hash,
+// most likely because you put your 32-bit hash in an "unsigned long"
+// which is 64 bits on some platforms. That doesn't really matter for
+// an operation like %, but 64-bit FastRange gives extremely bad results,
+// mostly zero, on 32-bit hash values. And because good hashing is not
+// generally required for correctness, this kind of mistake could go
+// unnoticed with just unit tests. Plus it could vary by platform.
+template <typename Hash, typename Range>
+inline Range FastRangeGeneric(Hash hash, Range range) {
+  return detail::FastRangeGenericImpl<Hash, Range>::Fn(hash, range);
+}
+
+// The most popular / convenient / recommended variants:
+
+// Map a quality 64-bit hash value down to an arbitrary size_t range.
+// (size_t is standard for mapping to things in memory.)
+inline size_t FastRange64(uint64_t hash, size_t range) {
+  return FastRangeGeneric(hash, range);
+}
+
+// Map a quality 32-bit hash value down to an arbitrary uint32_t range.
+inline uint32_t FastRange32(uint32_t hash, uint32_t range) {
+  return FastRangeGeneric(hash, range);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/file_checksum_helper.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/file_checksum_helper.cc
new file mode 100644
index 0000000000..b8c4099b80
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/file_checksum_helper.cc
@@ -0,0 +1,170 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+//  Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+//  Use of this source code is governed by a BSD-style license that can be
+//  found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/file_checksum_helper.h"
+
+#include <unordered_set>
+
+#include "db/log_reader.h"
+#include "db/version_edit.h"
+#include "db/version_edit_handler.h"
+#include "file/sequence_file_reader.h"
+#include "rocksdb/utilities/customizable_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void FileChecksumListImpl::reset() { checksum_map_.clear(); }
+
+size_t FileChecksumListImpl::size() const { return checksum_map_.size(); }
+
+Status FileChecksumListImpl::GetAllFileChecksums(
+    std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums,
+    std::vector<std::string>* checksum_func_names) {
+  if (file_numbers == nullptr || checksums == nullptr ||
+      checksum_func_names == nullptr) {
+    return Status::InvalidArgument("Pointer has not been initiated");
+  }
+
+  for (auto i : checksum_map_) {
+    file_numbers->push_back(i.first);
+    checksums->push_back(i.second.first);
+    checksum_func_names->push_back(i.second.second);
+  }
+  return Status::OK();
+}
+
+Status FileChecksumListImpl::SearchOneFileChecksum(
+    uint64_t file_number, std::string* checksum,
+    std::string* checksum_func_name) {
+  if (checksum == nullptr || checksum_func_name == nullptr) {
+    return Status::InvalidArgument("Pointer has not been initiated");
+  }
+
+  auto it = checksum_map_.find(file_number);
+  if (it == checksum_map_.end()) {
+    return Status::NotFound();
+  } else {
+    *checksum = it->second.first;
+    *checksum_func_name = it->second.second;
+  }
+  return Status::OK();
+}
+
+Status FileChecksumListImpl::InsertOneFileChecksum(
+    uint64_t file_number, const std::string& checksum,
+    const std::string& checksum_func_name) {
+  auto it = checksum_map_.find(file_number);
+  if (it == checksum_map_.end()) {
+    checksum_map_.insert(std::make_pair(
+        file_number, std::make_pair(checksum, checksum_func_name)));
+  } else {
+    it->second.first = checksum;
+    it->second.second = checksum_func_name;
+  }
+  return Status::OK();
+}
+
+Status FileChecksumListImpl::RemoveOneFileChecksum(uint64_t file_number) {
+  auto it = checksum_map_.find(file_number);
+  if (it == checksum_map_.end()) {
+    return Status::NotFound();
+  } else {
+    checksum_map_.erase(it);
+  }
+  return Status::OK();
+}
+
+FileChecksumList* NewFileChecksumList() {
+  FileChecksumListImpl* checksum_list = new FileChecksumListImpl();
+  return checksum_list;
+}
+
+std::shared_ptr<FileChecksumGenFactory> GetFileChecksumGenCrc32cFactory() {
+  static std::shared_ptr<FileChecksumGenFactory> default_crc32c_gen_factory(
+      new FileChecksumGenCrc32cFactory());
+  return default_crc32c_gen_factory;
+}
+
+Status GetFileChecksumsFromManifest(Env* src_env, const std::string& abs_path,
+                                    uint64_t manifest_file_size,
+                                    FileChecksumList* checksum_list) {
+  if (checksum_list == nullptr) {
+    return Status::InvalidArgument("checksum_list is nullptr");
+  }
+  assert(checksum_list);
+  // TODO: plumb Env::IOActivity
+  const ReadOptions read_options;
+  checksum_list->reset();
+  Status s;
+
+  std::unique_ptr<SequentialFileReader> file_reader;
+  {
+    std::unique_ptr<FSSequentialFile> file;
+    const std::shared_ptr<FileSystem>& fs = src_env->GetFileSystem();
+    s = fs->NewSequentialFile(abs_path,
+                              fs->OptimizeForManifestRead(FileOptions()), &file,
+                              nullptr /* dbg */);
+    if (!s.ok()) {
+      return s;
+    }
+    file_reader.reset(new SequentialFileReader(std::move(file), abs_path));
+  }
+
+  struct LogReporter : public log::Reader::Reporter {
+    Status* status_ptr;
+    virtual void Corruption(size_t /*bytes*/, const Status& st) override {
+      if (status_ptr->ok()) {
+        *status_ptr = st;
+      }
+    }
+  } reporter;
+  reporter.status_ptr = &s;
+  log::Reader reader(nullptr, std::move(file_reader), &reporter,
+                     true /* checksum */, 0 /* log_number */);
+  FileChecksumRetriever retriever(read_options, manifest_file_size,
+                                  *checksum_list);
+  retriever.Iterate(reader, &s);
+  assert(!retriever.status().ok() ||
+         manifest_file_size == std::numeric_limits<uint64_t>::max() ||
+         reader.LastRecordEnd() == manifest_file_size);
+
+  return retriever.status();
+}
+
+namespace {
+static int RegisterFileChecksumGenFactories(ObjectLibrary& library,
+                                            const std::string& /*arg*/) {
+  library.AddFactory<FileChecksumGenFactory>(
+      FileChecksumGenCrc32cFactory::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<FileChecksumGenFactory>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new FileChecksumGenCrc32cFactory());
+        return guard->get();
+      });
+  return 1;
+}
+}  // namespace
+
+Status FileChecksumGenFactory::CreateFromString(
+    const ConfigOptions& options, const std::string& value,
+    std::shared_ptr<FileChecksumGenFactory>* result) {
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    RegisterFileChecksumGenFactories(*(ObjectLibrary::Default().get()), "");
+  });
+  if (value == FileChecksumGenCrc32cFactory::kClassName()) {
+    *result = GetFileChecksumGenCrc32cFactory();
+    return Status::OK();
+  } else {
+    Status s = LoadSharedObject<FileChecksumGenFactory>(options, value, result);
+    return s;
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/file_checksum_helper.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/file_checksum_helper.h
new file mode 100644
index 0000000000..52469cf9f9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/file_checksum_helper.h
@@ -0,0 +1,101 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <cassert>
+#include <unordered_map>
+
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_checksum.h"
+#include "rocksdb/status.h"
+#include "util/coding.h"
+#include "util/crc32c.h"
+#include "util/math.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// This is the class to generate the file checksum based on Crc32. It
+// will be used as the default checksum method for SST file checksum
+class FileChecksumGenCrc32c : public FileChecksumGenerator {
+ public:
+  FileChecksumGenCrc32c(const FileChecksumGenContext& /*context*/) {
+    checksum_ = 0;
+  }
+
+  void Update(const char* data, size_t n) override {
+    checksum_ = crc32c::Extend(checksum_, data, n);
+  }
+
+  void Finalize() override {
+    assert(checksum_str_.empty());
+    // Store as big endian raw bytes
+    PutFixed32(&checksum_str_, EndianSwapValue(checksum_));
+  }
+
+  std::string GetChecksum() const override {
+    assert(!checksum_str_.empty());
+    return checksum_str_;
+  }
+
+  const char* Name() const override { return "FileChecksumCrc32c"; }
+
+ private:
+  uint32_t checksum_;
+  std::string checksum_str_;
+};
+
+class FileChecksumGenCrc32cFactory : public FileChecksumGenFactory {
+ public:
+  std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
+      const FileChecksumGenContext& context) override {
+    if (context.requested_checksum_func_name.empty() ||
+        context.requested_checksum_func_name == "FileChecksumCrc32c") {
+      return std::unique_ptr<FileChecksumGenerator>(
+          new FileChecksumGenCrc32c(context));
+    } else {
+      return nullptr;
+    }
+  }
+
+  static const char* kClassName() { return "FileChecksumGenCrc32cFactory"; }
+  const char* Name() const override { return kClassName(); }
+};
+
+// The default implementaion of FileChecksumList
+class FileChecksumListImpl : public FileChecksumList {
+ public:
+  FileChecksumListImpl() {}
+  void reset() override;
+
+  size_t size() const override;
+
+  Status GetAllFileChecksums(
+      std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums,
+      std::vector<std::string>* checksum_func_names) override;
+
+  Status SearchOneFileChecksum(uint64_t file_number, std::string* checksum,
+                               std::string* checksum_func_name) override;
+
+  Status InsertOneFileChecksum(uint64_t file_number,
+                               const std::string& checksum,
+                               const std::string& checksum_func_name) override;
+
+  Status RemoveOneFileChecksum(uint64_t file_number) override;
+
+ private:
+  // Key is the file number, the first portion of the value is checksum, the
+  // second portion of the value is checksum function name.
+  std::unordered_map<uint64_t, std::pair<std::string, std::string>>
+      checksum_map_;
+};
+
+// If manifest_file_size < std::numeric_limits<uint64_t>::max(), only use
+// that length prefix of the manifest file.
+Status GetFileChecksumsFromManifest(Env* src_env, const std::string& abs_path,
+                                    uint64_t manifest_file_size,
+                                    FileChecksumList* checksum_list);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/gflags_compat.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/gflags_compat.h
new file mode 100644
index 0000000000..8f4a30b0d6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/gflags_compat.h
@@ -0,0 +1,29 @@
+//  Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <gflags/gflags.h>
+
+#include <functional>
+
+#ifndef GFLAGS_NAMESPACE
+// in case it's not defined in old versions, that's probably because it was
+// still google by default.
+#define GFLAGS_NAMESPACE google
+#endif
+
+#ifndef DEFINE_uint32
+// DEFINE_uint32 / DECLARE_uint32 do not appear in older versions of gflags.
+// These should be sane definitions for those versions.
+#include <cstdint>
+#define DEFINE_uint32(name, val, txt) \
+  namespace gflags_compat {           \
+  DEFINE_int32(name, val, txt);       \
+  }                                   \
+  uint32_t &FLAGS_##name =            \
+      *reinterpret_cast<uint32_t *>(&gflags_compat::FLAGS_##name);
+
+#define DECLARE_uint32(name) extern uint32_t &FLAGS_##name;
+#endif  // !DEFINE_uint32
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash.cc
new file mode 100644
index 0000000000..0f7f2edc13
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash.cc
@@ -0,0 +1,201 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/hash.h"
+
+#include <string>
+
+#include "port/lang.h"
+#include "util/coding.h"
+#include "util/hash128.h"
+#include "util/math128.h"
+#include "util/xxhash.h"
+#include "util/xxph3.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+uint64_t (*kGetSliceNPHash64UnseededFnPtr)(const Slice&) = &GetSliceHash64;
+
+uint32_t Hash(const char* data, size_t n, uint32_t seed) {
+  // MurmurHash1 - fast but mediocre quality
+  // https://github.com/aappleby/smhasher/wiki/MurmurHash1
+  //
+  const uint32_t m = 0xc6a4a793;
+  const uint32_t r = 24;
+  const char* limit = data + n;
+  uint32_t h = static_cast<uint32_t>(seed ^ (n * m));
+
+  // Pick up four bytes at a time
+  while (data + 4 <= limit) {
+    uint32_t w = DecodeFixed32(data);
+    data += 4;
+    h += w;
+    h *= m;
+    h ^= (h >> 16);
+  }
+
+  // Pick up remaining bytes
+  switch (limit - data) {
+    // Note: The original hash implementation used data[i] << shift, which
+    // promotes the char to int and then performs the shift. If the char is
+    // negative, the shift is undefined behavior in C++. The hash algorithm is
+    // part of the format definition, so we cannot change it; to obtain the same
+    // behavior in a legal way we just cast to uint32_t, which will do
+    // sign-extension. To guarantee compatibility with architectures where chars
+    // are unsigned we first cast the char to int8_t.
+    case 3:
+      h += static_cast<uint32_t>(static_cast<int8_t>(data[2])) << 16;
+      FALLTHROUGH_INTENDED;
+    case 2:
+      h += static_cast<uint32_t>(static_cast<int8_t>(data[1])) << 8;
+      FALLTHROUGH_INTENDED;
+    case 1:
+      h += static_cast<uint32_t>(static_cast<int8_t>(data[0]));
+      h *= m;
+      h ^= (h >> r);
+      break;
+  }
+  return h;
+}
+
+// We are standardizing on a preview release of XXH3, because that's
+// the best available at time of standardizing.
+//
+// In testing (mostly Intel Skylake), this hash function is much more
+// thorough than Hash32 and is almost universally faster. Hash() only
+// seems faster when passing runtime-sized keys of the same small size
+// (less than about 24 bytes) thousands of times in a row; this seems
+// to allow the branch predictor to work some magic. XXH3's speed is
+// much less dependent on branch prediction.
+//
+// Hashing with a prefix extractor is potentially a common case of
+// hashing objects of small, predictable size. We could consider
+// bundling hash functions specialized for particular lengths with
+// the prefix extractors.
+uint64_t Hash64(const char* data, size_t n, uint64_t seed) {
+  return XXPH3_64bits_withSeed(data, n, seed);
+}
+
+uint64_t Hash64(const char* data, size_t n) {
+  // Same as seed = 0
+  return XXPH3_64bits(data, n);
+}
+
+uint64_t GetSlicePartsNPHash64(const SliceParts& data, uint64_t seed) {
+  // TODO(ajkr): use XXH3 streaming APIs to avoid the copy/allocation.
+  size_t concat_len = 0;
+  for (int i = 0; i < data.num_parts; ++i) {
+    concat_len += data.parts[i].size();
+  }
+  std::string concat_data;
+  concat_data.reserve(concat_len);
+  for (int i = 0; i < data.num_parts; ++i) {
+    concat_data.append(data.parts[i].data(), data.parts[i].size());
+  }
+  assert(concat_data.size() == concat_len);
+  return NPHash64(concat_data.data(), concat_len, seed);
+}
+
+Unsigned128 Hash128(const char* data, size_t n, uint64_t seed) {
+  auto h = XXH3_128bits_withSeed(data, n, seed);
+  return (Unsigned128{h.high64} << 64) | (h.low64);
+}
+
+Unsigned128 Hash128(const char* data, size_t n) {
+  // Same as seed = 0
+  auto h = XXH3_128bits(data, n);
+  return (Unsigned128{h.high64} << 64) | (h.low64);
+}
+
+void Hash2x64(const char* data, size_t n, uint64_t* high64, uint64_t* low64) {
+  // Same as seed = 0
+  auto h = XXH3_128bits(data, n);
+  *high64 = h.high64;
+  *low64 = h.low64;
+}
+
+void Hash2x64(const char* data, size_t n, uint64_t seed, uint64_t* high64,
+              uint64_t* low64) {
+  auto h = XXH3_128bits_withSeed(data, n, seed);
+  *high64 = h.high64;
+  *low64 = h.low64;
+}
+
+namespace {
+
+inline uint64_t XXH3_avalanche(uint64_t h64) {
+  h64 ^= h64 >> 37;
+  h64 *= 0x165667919E3779F9U;
+  h64 ^= h64 >> 32;
+  return h64;
+}
+
+inline uint64_t XXH3_unavalanche(uint64_t h64) {
+  h64 ^= h64 >> 32;
+  h64 *= 0x8da8ee41d6df849U;  // inverse of 0x165667919E3779F9U
+  h64 ^= h64 >> 37;
+  return h64;
+}
+
+}  // namespace
+
+void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed,
+                       uint64_t* out_high64, uint64_t* out_low64) {
+  // Adapted from XXH3_len_9to16_128b
+  const uint64_t bitflipl = /*secret part*/ 0x59973f0033362349U - seed;
+  const uint64_t bitfliph = /*secret part*/ 0xc202797692d63d58U + seed;
+  Unsigned128 tmp128 =
+      Multiply64to128(in_low64 ^ in_high64 ^ bitflipl, 0x9E3779B185EBCA87U);
+  uint64_t lo = Lower64of128(tmp128);
+  uint64_t hi = Upper64of128(tmp128);
+  lo += 0x3c0000000000000U;  // (len - 1) << 54
+  in_high64 ^= bitfliph;
+  hi += in_high64 + (Lower32of64(in_high64) * uint64_t{0x85EBCA76});
+  lo ^= EndianSwapValue(hi);
+  tmp128 = Multiply64to128(lo, 0xC2B2AE3D27D4EB4FU);
+  lo = Lower64of128(tmp128);
+  hi = Upper64of128(tmp128) + (hi * 0xC2B2AE3D27D4EB4FU);
+  *out_low64 = XXH3_avalanche(lo);
+  *out_high64 = XXH3_avalanche(hi);
+}
+
+void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed,
+                         uint64_t* out_high64, uint64_t* out_low64) {
+  // Inverted above (also consulting XXH3_len_9to16_128b)
+  const uint64_t bitflipl = /*secret part*/ 0x59973f0033362349U - seed;
+  const uint64_t bitfliph = /*secret part*/ 0xc202797692d63d58U + seed;
+  uint64_t lo = XXH3_unavalanche(in_low64);
+  uint64_t hi = XXH3_unavalanche(in_high64);
+  lo *= 0xba79078168d4baf;  // inverse of 0xC2B2AE3D27D4EB4FU
+  hi -= Upper64of128(Multiply64to128(lo, 0xC2B2AE3D27D4EB4FU));
+  hi *= 0xba79078168d4baf;  // inverse of 0xC2B2AE3D27D4EB4FU
+  lo ^= EndianSwapValue(hi);
+  lo -= 0x3c0000000000000U;
+  lo *= 0x887493432badb37U;  // inverse of 0x9E3779B185EBCA87U
+  hi -= Upper64of128(Multiply64to128(lo, 0x9E3779B185EBCA87U));
+  uint32_t tmp32 = Lower32of64(hi) * 0xb6c92f47;  // inverse of 0x85EBCA77
+  hi -= tmp32;
+  hi = (hi & 0xFFFFFFFF00000000U) -
+       ((tmp32 * uint64_t{0x85EBCA76}) & 0xFFFFFFFF00000000U) + tmp32;
+  hi ^= bitfliph;
+  lo ^= hi ^ bitflipl;
+  *out_high64 = hi;
+  *out_low64 = lo;
+}
+
+void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64,
+                       uint64_t* out_high64, uint64_t* out_low64) {
+  BijectiveHash2x64(in_high64, in_low64, /*seed*/ 0, out_high64, out_low64);
+}
+
+void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64,
+                         uint64_t* out_high64, uint64_t* out_low64) {
+  BijectiveUnhash2x64(in_high64, in_low64, /*seed*/ 0, out_high64, out_low64);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash.h
new file mode 100644
index 0000000000..eafa47f346
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash.h
@@ -0,0 +1,137 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Common hash functions with convenient interfaces. If hashing a
+// statically-sized input in a performance-critical context, consider
+// calling a specific hash implementation directly, such as
+// XXH3_64bits from xxhash.h.
+//
+// Since this is a very common header, implementation details are kept
+// out-of-line. Out-of-lining also aids in tracking the time spent in
+// hashing functions. Inlining is of limited benefit for runtime-sized
+// hash inputs.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+#include "rocksdb/slice.h"
+#include "util/fastrange.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Stable/persistent 64-bit hash. Higher quality and generally faster than
+// Hash(), especially for inputs > 24 bytes.
+// KNOWN FLAW: incrementing seed by 1 might not give sufficiently independent
+// results from previous seed. Recommend incrementing by a large odd number.
+extern uint64_t Hash64(const char* data, size_t n, uint64_t seed);
+
+// Specific optimization without seed (same as seed = 0)
+extern uint64_t Hash64(const char* data, size_t n);
+
+// Non-persistent hash. Must only used for in-memory data structures.
+// The hash results are thus subject to change between releases,
+// architectures, build configuration, etc. (Thus, it rarely makes sense
+// to specify a seed for this function, except for a "rolling" hash.)
+// KNOWN FLAW: incrementing seed by 1 might not give sufficiently independent
+// results from previous seed. Recommend incrementing by a large odd number.
+inline uint64_t NPHash64(const char* data, size_t n, uint64_t seed) {
+#ifdef ROCKSDB_MODIFY_NPHASH
+  // For testing "subject to change"
+  return Hash64(data, n, seed + 123456789);
+#else
+  // Currently same as Hash64
+  return Hash64(data, n, seed);
+#endif
+}
+
+// Specific optimization without seed (same as seed = 0)
+inline uint64_t NPHash64(const char* data, size_t n) {
+#ifdef ROCKSDB_MODIFY_NPHASH
+  // For testing "subject to change"
+  return Hash64(data, n, 123456789);
+#else
+  // Currently same as Hash64
+  return Hash64(data, n);
+#endif
+}
+
+// Convenient and equivalent version of Hash128 without depending on 128-bit
+// scalars
+void Hash2x64(const char* data, size_t n, uint64_t* high64, uint64_t* low64);
+void Hash2x64(const char* data, size_t n, uint64_t seed, uint64_t* high64,
+              uint64_t* low64);
+
+// Hash 128 bits to 128 bits, guaranteed not to lose data (equivalent to
+// Hash2x64 on 16 bytes little endian)
+void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64,
+                       uint64_t* out_high64, uint64_t* out_low64);
+void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed,
+                       uint64_t* out_high64, uint64_t* out_low64);
+
+// Inverse of above (mostly for testing)
+void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64,
+                         uint64_t* out_high64, uint64_t* out_low64);
+void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed,
+                         uint64_t* out_high64, uint64_t* out_low64);
+
+// Stable/persistent 32-bit hash. Moderate quality and high speed on
+// small inputs.
+// TODO: consider rename to Hash32
+// KNOWN FLAW: incrementing seed by 1 might not give sufficiently independent
+// results from previous seed. Recommend pseudorandom or hashed seeds.
+extern uint32_t Hash(const char* data, size_t n, uint32_t seed);
+
+// TODO: consider rename to LegacyBloomHash32
+inline uint32_t BloomHash(const Slice& key) {
+  return Hash(key.data(), key.size(), 0xbc9f1d34);
+}
+
+inline uint64_t GetSliceHash64(const Slice& key) {
+  return Hash64(key.data(), key.size());
+}
+// Provided for convenience for use with template argument deduction, where a
+// specific overload needs to be used.
+extern uint64_t (*kGetSliceNPHash64UnseededFnPtr)(const Slice&);
+
+inline uint64_t GetSliceNPHash64(const Slice& s) {
+  return NPHash64(s.data(), s.size());
+}
+
+inline uint64_t GetSliceNPHash64(const Slice& s, uint64_t seed) {
+  return NPHash64(s.data(), s.size(), seed);
+}
+
+// Similar to `GetSliceNPHash64()` with `seed`, but input comes from
+// concatenation of `Slice`s in `data`.
+extern uint64_t GetSlicePartsNPHash64(const SliceParts& data, uint64_t seed);
+
+inline size_t GetSliceRangedNPHash(const Slice& s, size_t range) {
+  return FastRange64(NPHash64(s.data(), s.size()), range);
+}
+
+// TODO: consider rename to GetSliceHash32
+inline uint32_t GetSliceHash(const Slice& s) {
+  return Hash(s.data(), s.size(), 397);
+}
+
+// Useful for splitting up a 64-bit hash
+inline uint32_t Upper32of64(uint64_t v) {
+  return static_cast<uint32_t>(v >> 32);
+}
+inline uint32_t Lower32of64(uint64_t v) { return static_cast<uint32_t>(v); }
+
+// std::hash compatible interface.
+// TODO: consider rename to SliceHasher32
+struct SliceHasher {
+  uint32_t operator()(const Slice& s) const { return GetSliceHash(s); }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash128.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash128.h
new file mode 100644
index 0000000000..305caa14ae
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash128.h
@@ -0,0 +1,26 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+// 128-bit hash gets it own header so that more popular hash.h doesn't
+// depend on math128.h
+
+#include "rocksdb/slice.h"
+#include "util/math128.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Stable/persistent 128-bit hash for non-cryptographic applications.
+Unsigned128 Hash128(const char* data, size_t n, uint64_t seed);
+
+// Specific optimization without seed (same as seed = 0)
+Unsigned128 Hash128(const char* data, size_t n);
+
+inline Unsigned128 GetSliceHash128(const Slice& key) {
+  return Hash128(key.data(), key.size());
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash_containers.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash_containers.h
new file mode 100644
index 0000000000..52be3718c0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash_containers.h
@@ -0,0 +1,51 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// This header establishes compile-time pluggable implementations of hashed
+// container structures, so that deployments have the option of minimal
+// dependencies with ok performance (e.g. std::unordered_map) or more
+// dependencies with optimized performance (e.g. folly::F14FastMap).
+
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+
+#ifdef USE_FOLLY
+
+#include <folly/container/F14Map.h>
+#include <folly/container/F14Set.h>
+
+namespace ROCKSDB_NAMESPACE {
+
+template <typename K, typename V>
+using UnorderedMap = folly::F14FastMap<K, V>;
+
+template <typename K, typename V, typename H>
+using UnorderedMapH = folly::F14FastMap<K, V, H>;
+
+template <typename K>
+using UnorderedSet = folly::F14FastSet<K>;
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#else
+
+#include <unordered_map>
+#include <unordered_set>
+
+namespace ROCKSDB_NAMESPACE {
+
+template <typename K, typename V>
+using UnorderedMap = std::unordered_map<K, V>;
+
+template <typename K, typename V, typename H>
+using UnorderedMapH = std::unordered_map<K, V, H>;
+
+template <typename K>
+using UnorderedSet = std::unordered_set<K>;
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash_map.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash_map.h
new file mode 100644
index 0000000000..e3ad2584f7
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/hash_map.h
@@ -0,0 +1,67 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <utility>
+
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// This is similar to std::unordered_map, except that it tries to avoid
+// allocating or deallocating memory as much as possible. With
+// std::unordered_map, an allocation/deallocation is made for every insertion
+// or deletion because of the requirement that iterators remain valid even
+// with insertions or deletions. This means that the hash chains will be
+// implemented as linked lists.
+//
+// This implementation uses autovector as hash chains insteads.
+//
+template <typename K, typename V, size_t size = 128>
+class HashMap {
+  std::array<autovector<std::pair<K, V>, 1>, size> table_;
+
+ public:
+  bool Contains(K key) {
+    auto& bucket = table_[key % size];
+    auto it = std::find_if(
+        bucket.begin(), bucket.end(),
+        [key](const std::pair<K, V>& p) { return p.first == key; });
+    return it != bucket.end();
+  }
+
+  void Insert(K key, const V& value) {
+    auto& bucket = table_[key % size];
+    bucket.push_back({key, value});
+  }
+
+  void Delete(K key) {
+    auto& bucket = table_[key % size];
+    auto it = std::find_if(
+        bucket.begin(), bucket.end(),
+        [key](const std::pair<K, V>& p) { return p.first == key; });
+    if (it != bucket.end()) {
+      auto last = bucket.end() - 1;
+      if (it != last) {
+        *it = *last;
+      }
+      bucket.pop_back();
+    }
+  }
+
+  V& Get(K key) {
+    auto& bucket = table_[key % size];
+    auto it = std::find_if(
+        bucket.begin(), bucket.end(),
+        [key](const std::pair<K, V>& p) { return p.first == key; });
+    return it->second;
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/heap.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/heap.h
new file mode 100644
index 0000000000..f221fc7327
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/heap.h
@@ -0,0 +1,174 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <functional>
+
+#include "port/port.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Binary heap implementation optimized for use in multi-way merge sort.
+// Comparison to std::priority_queue:
+// - In libstdc++, std::priority_queue::pop() usually performs just over logN
+//   comparisons but never fewer.
+// - std::priority_queue does not have a replace-top operation, requiring a
+//   pop+push.  If the replacement element is the new top, this requires
+//   around 2logN comparisons.
+// - This heap's pop() uses a "schoolbook" downheap which requires up to ~2logN
+//   comparisons.
+// - This heap provides a replace_top() operation which requires [1, 2logN]
+//   comparisons.  When the replacement element is also the new top, this
+//   takes just 1 or 2 comparisons.
+//
+// The last property can yield an order-of-magnitude performance improvement
+// when merge-sorting real-world non-random data.  If the merge operation is
+// likely to take chunks of elements from the same input stream, only 1
+// comparison per element is needed.  In RocksDB-land, this happens when
+// compacting a database where keys are not randomly distributed across L0
+// files but nearby keys are likely to be in the same L0 file.
+//
+// The container uses the same counterintuitive ordering as
+// std::priority_queue: the comparison operator is expected to provide the
+// less-than relation, but top() will return the maximum.
+
+template <typename T, typename Compare = std::less<T>>
+class BinaryHeap {
+ public:
+  BinaryHeap() {}
+  explicit BinaryHeap(Compare cmp) : cmp_(std::move(cmp)) {}
+
+  void push(const T& value) {
+    data_.push_back(value);
+    upheap(data_.size() - 1);
+  }
+
+  void push(T&& value) {
+    data_.push_back(std::move(value));
+    upheap(data_.size() - 1);
+  }
+
+  const T& top() const {
+    assert(!empty());
+    return data_.front();
+  }
+
+  void replace_top(const T& value) {
+    assert(!empty());
+    data_.front() = value;
+    downheap(get_root());
+  }
+
+  void replace_top(T&& value) {
+    assert(!empty());
+    data_.front() = std::move(value);
+    downheap(get_root());
+  }
+
+  void pop() {
+    assert(!empty());
+    if (data_.size() > 1) {
+      // Avoid self-move-assign, because it could cause problems with
+      // classes which are not prepared for this and it trips up the
+      // STL debugger when activated.
+      data_.front() = std::move(data_.back());
+    }
+    data_.pop_back();
+    if (!empty()) {
+      downheap(get_root());
+    } else {
+      reset_root_cmp_cache();
+    }
+  }
+
+  void swap(BinaryHeap& other) {
+    std::swap(cmp_, other.cmp_);
+    data_.swap(other.data_);
+    std::swap(root_cmp_cache_, other.root_cmp_cache_);
+  }
+
+  void clear() {
+    data_.clear();
+    reset_root_cmp_cache();
+  }
+
+  bool empty() const { return data_.empty(); }
+
+  size_t size() const { return data_.size(); }
+
+  void reset_root_cmp_cache() {
+    root_cmp_cache_ = std::numeric_limits<size_t>::max();
+  }
+
+ private:
+  static inline size_t get_root() { return 0; }
+  static inline size_t get_parent(size_t index) { return (index - 1) / 2; }
+  static inline size_t get_left(size_t index) { return 2 * index + 1; }
+  static inline size_t get_right(size_t index) { return 2 * index + 2; }
+
+  void upheap(size_t index) {
+    T v = std::move(data_[index]);
+    while (index > get_root()) {
+      const size_t parent = get_parent(index);
+      if (!cmp_(data_[parent], v)) {
+        break;
+      }
+      data_[index] = std::move(data_[parent]);
+      index = parent;
+    }
+    data_[index] = std::move(v);
+    reset_root_cmp_cache();
+  }
+
+  void downheap(size_t index) {
+    T v = std::move(data_[index]);
+
+    size_t picked_child = std::numeric_limits<size_t>::max();
+    while (1) {
+      const size_t left_child = get_left(index);
+      if (get_left(index) >= data_.size()) {
+        break;
+      }
+      const size_t right_child = left_child + 1;
+      assert(right_child == get_right(index));
+      picked_child = left_child;
+      if (index == 0 && root_cmp_cache_ < data_.size()) {
+        picked_child = root_cmp_cache_;
+      } else if (right_child < data_.size() &&
+                 cmp_(data_[left_child], data_[right_child])) {
+        picked_child = right_child;
+      }
+      if (!cmp_(v, data_[picked_child])) {
+        break;
+      }
+      data_[index] = std::move(data_[picked_child]);
+      index = picked_child;
+    }
+
+    if (index == 0) {
+      // We did not change anything in the tree except for the value
+      // of the root node, left and right child did not change, we can
+      // cache that `picked_child` is the smallest child
+      // so next time we compare againist it directly
+      root_cmp_cache_ = picked_child;
+    } else {
+      // the tree changed, reset cache
+      reset_root_cmp_cache();
+    }
+
+    data_[index] = std::move(v);
+  }
+
+  Compare cmp_;
+  autovector<T> data_;
+  // Used to reduce number of cmp_ calls in downheap()
+  size_t root_cmp_cache_ = std::numeric_limits<size_t>::max();
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/kv_map.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/kv_map.h
new file mode 100644
index 0000000000..62be6d18e3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/kv_map.h
@@ -0,0 +1,33 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <map>
+#include <string>
+
+#include "rocksdb/comparator.h"
+#include "rocksdb/slice.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace stl_wrappers {
+
+struct LessOfComparator {
+  explicit LessOfComparator(const Comparator* c = BytewiseComparator())
+      : cmp(c) {}
+
+  bool operator()(const std::string& a, const std::string& b) const {
+    return cmp->Compare(Slice(a), Slice(b)) < 0;
+  }
+  bool operator()(const Slice& a, const Slice& b) const {
+    return cmp->Compare(a, b) < 0;
+  }
+
+  const Comparator* cmp;
+};
+
+using KVMap = std::map<std::string, std::string, LessOfComparator>;
+}  // namespace stl_wrappers
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/math.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/math.h
new file mode 100644
index 0000000000..39f3083287
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/math.h
@@ -0,0 +1,299 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <assert.h>
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+#include <cstdint>
+#include <type_traits>
+
+#include "port/lang.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+ASSERT_FEATURE_COMPAT_HEADER();
+
+namespace ROCKSDB_NAMESPACE {
+
+// Fast implementation of floor(log2(v)). Undefined for 0 or negative
+// numbers (in case of signed type).
+template <typename T>
+inline int FloorLog2(T v) {
+  static_assert(std::is_integral<T>::value, "non-integral type");
+  assert(v > 0);
+#ifdef _MSC_VER
+  static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
+  unsigned long idx = 0;
+  if (sizeof(T) <= sizeof(uint32_t)) {
+    _BitScanReverse(&idx, static_cast<uint32_t>(v));
+  } else {
+#if defined(_M_X64) || defined(_M_ARM64)
+    _BitScanReverse64(&idx, static_cast<uint64_t>(v));
+#else
+    const auto vh = static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32);
+    if (vh != 0) {
+      _BitScanReverse(&idx, static_cast<uint32_t>(vh));
+      idx += 32;
+    } else {
+      _BitScanReverse(&idx, static_cast<uint32_t>(v));
+    }
+#endif
+  }
+  return idx;
+#else
+  static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
+  if (sizeof(T) <= sizeof(unsigned int)) {
+    int lz = __builtin_clz(static_cast<unsigned int>(v));
+    return int{sizeof(unsigned int)} * 8 - 1 - lz;
+  } else if (sizeof(T) <= sizeof(unsigned long)) {
+    int lz = __builtin_clzl(static_cast<unsigned long>(v));
+    return int{sizeof(unsigned long)} * 8 - 1 - lz;
+  } else {
+    int lz = __builtin_clzll(static_cast<unsigned long long>(v));
+    return int{sizeof(unsigned long long)} * 8 - 1 - lz;
+  }
+#endif
+}
+
+// Constexpr version of FloorLog2
+template <typename T>
+constexpr int ConstexprFloorLog2(T v) {
+  int rv = 0;
+  while (v > T{1}) {
+    ++rv;
+    v >>= 1;
+  }
+  return rv;
+}
+
+// Number of low-order zero bits before the first 1 bit. Undefined for 0.
+template <typename T>
+inline int CountTrailingZeroBits(T v) {
+  static_assert(std::is_integral<T>::value, "non-integral type");
+  assert(v != 0);
+#ifdef _MSC_VER
+  static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
+  unsigned long tz = 0;
+  if (sizeof(T) <= sizeof(uint32_t)) {
+    _BitScanForward(&tz, static_cast<uint32_t>(v));
+  } else {
+#if defined(_M_X64) || defined(_M_ARM64)
+    _BitScanForward64(&tz, static_cast<uint64_t>(v));
+#else
+    _BitScanForward(&tz, static_cast<uint32_t>(v));
+    if (tz == 0) {
+      _BitScanForward(&tz,
+                      static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32));
+      tz += 32;
+    }
+#endif
+  }
+  return static_cast<int>(tz);
+#else
+  static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
+  if (sizeof(T) <= sizeof(unsigned int)) {
+    return __builtin_ctz(static_cast<unsigned int>(v));
+  } else if (sizeof(T) <= sizeof(unsigned long)) {
+    return __builtin_ctzl(static_cast<unsigned long>(v));
+  } else {
+    return __builtin_ctzll(static_cast<unsigned long long>(v));
+  }
+#endif
+}
+
+// Not all MSVC compile settings will use `BitsSetToOneFallback()`. We include
+// the following code at coarse granularity for simpler macros. It's important
+// to exclude at least so our non-MSVC unit test coverage tool doesn't see it.
+#ifdef _MSC_VER
+
+namespace detail {
+
+template <typename T>
+int BitsSetToOneFallback(T v) {
+  const int kBits = static_cast<int>(sizeof(T)) * 8;
+  static_assert((kBits & (kBits - 1)) == 0, "must be power of two bits");
+  // we static_cast these bit patterns in order to truncate them to the correct
+  // size. Warning C4309 dislikes this technique, so disable it here.
+#pragma warning(disable : 4309)
+  v = static_cast<T>(v - ((v >> 1) & static_cast<T>(0x5555555555555555ull)));
+  v = static_cast<T>((v & static_cast<T>(0x3333333333333333ull)) +
+                     ((v >> 2) & static_cast<T>(0x3333333333333333ull)));
+  v = static_cast<T>((v + (v >> 4)) & static_cast<T>(0x0F0F0F0F0F0F0F0Full));
+#pragma warning(default : 4309)
+  for (int shift_bits = 8; shift_bits < kBits; shift_bits <<= 1) {
+    v += static_cast<T>(v >> shift_bits);
+  }
+  // we want the bottom "slot" that's big enough to represent a value up to
+  // (and including) kBits.
+  return static_cast<int>(v & static_cast<T>(kBits | (kBits - 1)));
+}
+
+}  // namespace detail
+
+#endif  // _MSC_VER
+
+// Number of bits set to 1. Also known as "population count".
+template <typename T>
+inline int BitsSetToOne(T v) {
+  static_assert(std::is_integral<T>::value, "non-integral type");
+#ifdef _MSC_VER
+  static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
+  if (sizeof(T) < sizeof(uint32_t)) {
+    // This bit mask is to avoid a compiler warning on unused path
+    constexpr auto mm = 8 * sizeof(uint32_t) - 1;
+    // The bit mask is to neutralize sign extension on small signed types
+    constexpr uint32_t m = (uint32_t{1} << ((8 * sizeof(T)) & mm)) - 1;
+#if __POPCNT__
+    return static_cast<int>(__popcnt(static_cast<uint32_t>(v) & m));
+#else
+    return static_cast<int>(detail::BitsSetToOneFallback(v) & m);
+#endif  // __POPCNT__
+  } else if (sizeof(T) == sizeof(uint32_t)) {
+#if __POPCNT__
+    return static_cast<int>(__popcnt(static_cast<uint32_t>(v)));
+#else
+    return detail::BitsSetToOneFallback(static_cast<uint32_t>(v));
+#endif  // __POPCNT__
+  } else {
+#if __POPCNT__
+#ifdef _M_X64
+    return static_cast<int>(__popcnt64(static_cast<uint64_t>(v)));
+#else
+    return static_cast<int>(
+        __popcnt(static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32) +
+                 __popcnt(static_cast<uint32_t>(v))));
+#endif  // _M_X64
+#else
+    return detail::BitsSetToOneFallback(static_cast<uint64_t>(v));
+#endif  // __POPCNT__
+  }
+#else
+  static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
+  if (sizeof(T) < sizeof(unsigned int)) {
+    // This bit mask is to avoid a compiler warning on unused path
+    constexpr auto mm = 8 * sizeof(unsigned int) - 1;
+    // This bit mask is to neutralize sign extension on small signed types
+    constexpr unsigned int m = (1U << ((8 * sizeof(T)) & mm)) - 1;
+    return __builtin_popcount(static_cast<unsigned int>(v) & m);
+  } else if (sizeof(T) == sizeof(unsigned int)) {
+    return __builtin_popcount(static_cast<unsigned int>(v));
+  } else if (sizeof(T) <= sizeof(unsigned long)) {
+    return __builtin_popcountl(static_cast<unsigned long>(v));
+  } else {
+    return __builtin_popcountll(static_cast<unsigned long long>(v));
+  }
+#endif
+}
+
+template <typename T>
+inline int BitParity(T v) {
+  static_assert(std::is_integral<T>::value, "non-integral type");
+#ifdef _MSC_VER
+  // bit parity == oddness of popcount
+  return BitsSetToOne(v) & 1;
+#else
+  static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
+  if (sizeof(T) <= sizeof(unsigned int)) {
+    // On any sane systen, potential sign extension here won't change parity
+    return __builtin_parity(static_cast<unsigned int>(v));
+  } else if (sizeof(T) <= sizeof(unsigned long)) {
+    return __builtin_parityl(static_cast<unsigned long>(v));
+  } else {
+    return __builtin_parityll(static_cast<unsigned long long>(v));
+  }
+#endif
+}
+
+// Swaps between big and little endian. Can be used in combination with the
+// little-endian encoding/decoding functions in coding_lean.h and coding.h to
+// encode/decode big endian.
+template <typename T>
+inline T EndianSwapValue(T v) {
+  static_assert(std::is_integral<T>::value, "non-integral type");
+
+#ifdef _MSC_VER
+  if (sizeof(T) == 2) {
+    return static_cast<T>(_byteswap_ushort(static_cast<uint16_t>(v)));
+  } else if (sizeof(T) == 4) {
+    return static_cast<T>(_byteswap_ulong(static_cast<uint32_t>(v)));
+  } else if (sizeof(T) == 8) {
+    return static_cast<T>(_byteswap_uint64(static_cast<uint64_t>(v)));
+  }
+#else
+  if (sizeof(T) == 2) {
+    return static_cast<T>(__builtin_bswap16(static_cast<uint16_t>(v)));
+  } else if (sizeof(T) == 4) {
+    return static_cast<T>(__builtin_bswap32(static_cast<uint32_t>(v)));
+  } else if (sizeof(T) == 8) {
+    return static_cast<T>(__builtin_bswap64(static_cast<uint64_t>(v)));
+  }
+#endif
+  // Recognized by clang as bswap, but not by gcc :(
+  T ret_val = 0;
+  for (std::size_t i = 0; i < sizeof(T); ++i) {
+    ret_val |= ((v >> (8 * i)) & 0xff) << (8 * (sizeof(T) - 1 - i));
+  }
+  return ret_val;
+}
+
+// Reverses the order of bits in an integral value
+template <typename T>
+inline T ReverseBits(T v) {
+  T r = EndianSwapValue(v);
+  const T kHighestByte = T{1} << ((sizeof(T) - 1) * 8);
+  const T kEveryByte = kHighestByte | (kHighestByte / 255);
+
+  r = ((r & (kEveryByte * 0x0f)) << 4) | ((r >> 4) & (kEveryByte * 0x0f));
+  r = ((r & (kEveryByte * 0x33)) << 2) | ((r >> 2) & (kEveryByte * 0x33));
+  r = ((r & (kEveryByte * 0x55)) << 1) | ((r >> 1) & (kEveryByte * 0x55));
+
+  return r;
+}
+
+// Every output bit depends on many input bits in the same and higher
+// positions, but not lower positions. Specifically, this function
+// * Output highest bit set to 1 is same as input (same FloorLog2, or
+//   equivalently, same number of leading zeros)
+// * Is its own inverse (an involution)
+// * Guarantees that b bottom bits of v and c bottom bits of
+//   DownwardInvolution(v) uniquely identify b + c bottom bits of v
+//   (which is all of v if v < 2**(b + c)).
+// ** A notable special case is that modifying c adjacent bits at
+//    some chosen position in the input is bijective with the bottom c
+//    output bits.
+// * Distributes over xor, as in DI(a ^ b) == DI(a) ^ DI(b)
+//
+// This transformation is equivalent to a matrix*vector multiplication in
+// GF(2) where the matrix is recursively defined by the pattern matrix
+// P = | 1 1 |
+//     | 0 1 |
+// and replacing 1's with P and 0's with 2x2 zero matices to some depth,
+// e.g. depth of 6 for 64-bit T. An essential feature of this matrix
+// is that all square sub-matrices that include the top row are invertible.
+template <typename T>
+inline T DownwardInvolution(T v) {
+  static_assert(std::is_integral<T>::value, "non-integral type");
+  static_assert(sizeof(T) <= 8, "only supported up to 64 bits");
+
+  uint64_t r = static_cast<uint64_t>(v);
+  if constexpr (sizeof(T) > 4) {
+    r ^= r >> 32;
+  }
+  if constexpr (sizeof(T) > 2) {
+    r ^= (r & 0xffff0000ffff0000U) >> 16;
+  }
+  if constexpr (sizeof(T) > 1) {
+    r ^= (r & 0xff00ff00ff00ff00U) >> 8;
+  }
+  r ^= (r & 0xf0f0f0f0f0f0f0f0U) >> 4;
+  r ^= (r & 0xccccccccccccccccU) >> 2;
+  r ^= (r & 0xaaaaaaaaaaaaaaaaU) >> 1;
+  return static_cast<T>(r);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/math128.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/math128.h
new file mode 100644
index 0000000000..ae490051a7
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/math128.h
@@ -0,0 +1,316 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "util/coding_lean.h"
+#include "util/math.h"
+
+#ifdef TEST_UINT128_COMPAT
+#undef HAVE_UINT128_EXTENSION
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+// Unsigned128 is a 128 bit value supporting (at least) bitwise operators,
+// shifts, and comparisons. __uint128_t is not always available.
+
+#ifdef HAVE_UINT128_EXTENSION
+using Unsigned128 = __uint128_t;
+#else
+struct Unsigned128 {
+  uint64_t lo;
+  uint64_t hi;
+
+  inline Unsigned128() {
+    static_assert(sizeof(Unsigned128) == 2 * sizeof(uint64_t),
+                  "unexpected overhead in representation");
+    lo = 0;
+    hi = 0;
+  }
+
+  inline Unsigned128(uint64_t lower) {
+    lo = lower;
+    hi = 0;
+  }
+
+  inline Unsigned128(uint64_t lower, uint64_t upper) {
+    lo = lower;
+    hi = upper;
+  }
+
+  explicit operator uint64_t() { return lo; }
+
+  explicit operator uint32_t() { return static_cast<uint32_t>(lo); }
+
+  explicit operator uint16_t() { return static_cast<uint16_t>(lo); }
+
+  explicit operator uint8_t() { return static_cast<uint8_t>(lo); }
+};
+
+inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) {
+  shift &= 127;
+  Unsigned128 rv;
+  if (shift >= 64) {
+    rv.lo = 0;
+    rv.hi = lhs.lo << (shift & 63);
+  } else {
+    uint64_t tmp = lhs.lo;
+    rv.lo = tmp << shift;
+    // Ensure shift==0 shifts away everything. (This avoids another
+    // conditional branch on shift == 0.)
+    tmp = tmp >> 1 >> (63 - shift);
+    rv.hi = tmp | (lhs.hi << shift);
+  }
+  return rv;
+}
+
+inline Unsigned128& operator<<=(Unsigned128& lhs, unsigned shift) {
+  lhs = lhs << shift;
+  return lhs;
+}
+
+inline Unsigned128 operator>>(const Unsigned128& lhs, unsigned shift) {
+  shift &= 127;
+  Unsigned128 rv;
+  if (shift >= 64) {
+    rv.hi = 0;
+    rv.lo = lhs.hi >> (shift & 63);
+  } else {
+    uint64_t tmp = lhs.hi;
+    rv.hi = tmp >> shift;
+    // Ensure shift==0 shifts away everything
+    tmp = tmp << 1 << (63 - shift);
+    rv.lo = tmp | (lhs.lo >> shift);
+  }
+  return rv;
+}
+
+inline Unsigned128& operator>>=(Unsigned128& lhs, unsigned shift) {
+  lhs = lhs >> shift;
+  return lhs;
+}
+
+inline Unsigned128 operator&(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return Unsigned128(lhs.lo & rhs.lo, lhs.hi & rhs.hi);
+}
+
+inline Unsigned128& operator&=(Unsigned128& lhs, const Unsigned128& rhs) {
+  lhs = lhs & rhs;
+  return lhs;
+}
+
+inline Unsigned128 operator|(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return Unsigned128(lhs.lo | rhs.lo, lhs.hi | rhs.hi);
+}
+
+inline Unsigned128& operator|=(Unsigned128& lhs, const Unsigned128& rhs) {
+  lhs = lhs | rhs;
+  return lhs;
+}
+
+inline Unsigned128 operator^(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return Unsigned128(lhs.lo ^ rhs.lo, lhs.hi ^ rhs.hi);
+}
+
+inline Unsigned128& operator^=(Unsigned128& lhs, const Unsigned128& rhs) {
+  lhs = lhs ^ rhs;
+  return lhs;
+}
+
+inline Unsigned128 operator~(const Unsigned128& v) {
+  return Unsigned128(~v.lo, ~v.hi);
+}
+
+inline bool operator==(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return lhs.lo == rhs.lo && lhs.hi == rhs.hi;
+}
+
+inline bool operator!=(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return lhs.lo != rhs.lo || lhs.hi != rhs.hi;
+}
+
+inline bool operator>(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return lhs.hi > rhs.hi || (lhs.hi == rhs.hi && lhs.lo > rhs.lo);
+}
+
+inline bool operator<(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return lhs.hi < rhs.hi || (lhs.hi == rhs.hi && lhs.lo < rhs.lo);
+}
+
+inline bool operator>=(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return lhs.hi > rhs.hi || (lhs.hi == rhs.hi && lhs.lo >= rhs.lo);
+}
+
+inline bool operator<=(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return lhs.hi < rhs.hi || (lhs.hi == rhs.hi && lhs.lo <= rhs.lo);
+}
+#endif
+
+inline uint64_t Lower64of128(Unsigned128 v) {
+#ifdef HAVE_UINT128_EXTENSION
+  return static_cast<uint64_t>(v);
+#else
+  return v.lo;
+#endif
+}
+
+inline uint64_t Upper64of128(Unsigned128 v) {
+#ifdef HAVE_UINT128_EXTENSION
+  return static_cast<uint64_t>(v >> 64);
+#else
+  return v.hi;
+#endif
+}
+
+// This generally compiles down to a single fast instruction on 64-bit.
+// This doesn't really make sense as operator* because it's not a
+// general 128x128 multiply and provides more output than 64x64 multiply.
+inline Unsigned128 Multiply64to128(uint64_t a, uint64_t b) {
+#ifdef HAVE_UINT128_EXTENSION
+  return Unsigned128{a} * Unsigned128{b};
+#else
+  // Full decomposition
+  // NOTE: GCC seems to fully understand this code as 64-bit x 64-bit
+  // -> 128-bit multiplication and optimize it appropriately.
+  uint64_t tmp = uint64_t{b & 0xffffFFFF} * uint64_t{a & 0xffffFFFF};
+  uint64_t lower = tmp & 0xffffFFFF;
+  tmp >>= 32;
+  tmp += uint64_t{b & 0xffffFFFF} * uint64_t{a >> 32};
+  // Avoid overflow: first add lower 32 of tmp2, and later upper 32
+  uint64_t tmp2 = uint64_t{b >> 32} * uint64_t{a & 0xffffFFFF};
+  tmp += tmp2 & 0xffffFFFF;
+  lower |= tmp << 32;
+  tmp >>= 32;
+  tmp += tmp2 >> 32;
+  tmp += uint64_t{b >> 32} * uint64_t{a >> 32};
+  return Unsigned128(lower, tmp);
+#endif
+}
+
+template <>
+inline int FloorLog2(Unsigned128 v) {
+  if (Upper64of128(v) == 0) {
+    return FloorLog2(Lower64of128(v));
+  } else {
+    return FloorLog2(Upper64of128(v)) + 64;
+  }
+}
+
+template <>
+inline int CountTrailingZeroBits(Unsigned128 v) {
+  if (Lower64of128(v) != 0) {
+    return CountTrailingZeroBits(Lower64of128(v));
+  } else {
+    return CountTrailingZeroBits(Upper64of128(v)) + 64;
+  }
+}
+
+template <>
+inline int BitsSetToOne(Unsigned128 v) {
+  return BitsSetToOne(Lower64of128(v)) + BitsSetToOne(Upper64of128(v));
+}
+
+template <>
+inline int BitParity(Unsigned128 v) {
+  return BitParity(Lower64of128(v) ^ Upper64of128(v));
+}
+
+template <>
+inline Unsigned128 EndianSwapValue(Unsigned128 v) {
+  return (Unsigned128{EndianSwapValue(Lower64of128(v))} << 64) |
+         EndianSwapValue(Upper64of128(v));
+}
+
+template <>
+inline Unsigned128 ReverseBits(Unsigned128 v) {
+  return (Unsigned128{ReverseBits(Lower64of128(v))} << 64) |
+         ReverseBits(Upper64of128(v));
+}
+
+template <>
+inline Unsigned128 DownwardInvolution(Unsigned128 v) {
+  return (Unsigned128{DownwardInvolution(Upper64of128(v))} << 64) |
+         DownwardInvolution(Upper64of128(v) ^ Lower64of128(v));
+}
+
+template <typename T>
+struct IsUnsignedUpTo128
+    : std::integral_constant<bool, std::is_unsigned<T>::value ||
+                                       std::is_same<T, Unsigned128>::value> {};
+
+inline void EncodeFixed128(char* dst, Unsigned128 value) {
+  EncodeFixed64(dst, Lower64of128(value));
+  EncodeFixed64(dst + 8, Upper64of128(value));
+}
+
+inline Unsigned128 DecodeFixed128(const char* ptr) {
+  Unsigned128 rv = DecodeFixed64(ptr + 8);
+  return (rv << 64) | DecodeFixed64(ptr);
+}
+
+// A version of EncodeFixed* for generic algorithms. Likely to be used
+// with Unsigned128, so lives here for now.
+template <typename T>
+inline void EncodeFixedGeneric(char* /*dst*/, T /*value*/) {
+  // Unfortunately, GCC does not appear to optimize this simple code down
+  // to a trivial load on Intel:
+  //
+  // T ret_val = 0;
+  // for (size_t i = 0; i < sizeof(T); ++i) {
+  //   ret_val |= (static_cast<T>(static_cast<unsigned char>(ptr[i])) << (8 *
+  //   i));
+  // }
+  // return ret_val;
+  //
+  // But does unroll the loop, and does optimize manually unrolled version
+  // for specific sizes down to a trivial load. I have no idea why it doesn't
+  // do both on this code.
+
+  // So instead, we rely on specializations
+  static_assert(sizeof(T) == 0, "No specialization provided for this type");
+}
+
+template <>
+inline void EncodeFixedGeneric(char* dst, uint16_t value) {
+  return EncodeFixed16(dst, value);
+}
+template <>
+inline void EncodeFixedGeneric(char* dst, uint32_t value) {
+  return EncodeFixed32(dst, value);
+}
+template <>
+inline void EncodeFixedGeneric(char* dst, uint64_t value) {
+  return EncodeFixed64(dst, value);
+}
+template <>
+inline void EncodeFixedGeneric(char* dst, Unsigned128 value) {
+  return EncodeFixed128(dst, value);
+}
+
+// A version of EncodeFixed* for generic algorithms.
+template <typename T>
+inline T DecodeFixedGeneric(const char* /*dst*/) {
+  static_assert(sizeof(T) == 0, "No specialization provided for this type");
+}
+
+template <>
+inline uint16_t DecodeFixedGeneric(const char* dst) {
+  return DecodeFixed16(dst);
+}
+template <>
+inline uint32_t DecodeFixedGeneric(const char* dst) {
+  return DecodeFixed32(dst);
+}
+template <>
+inline uint64_t DecodeFixedGeneric(const char* dst) {
+  return DecodeFixed64(dst);
+}
+template <>
+inline Unsigned128 DecodeFixedGeneric(const char* dst) {
+  return DecodeFixed128(dst);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/murmurhash.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/murmurhash.cc
new file mode 100644
index 0000000000..a69f3918ab
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/murmurhash.cc
@@ -0,0 +1,196 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+/*
+  Murmurhash from http://sites.google.com/site/murmurhash/
+
+  All code is released to the public domain. For business purposes, Murmurhash
+  is under the MIT license.
+*/
+#include "murmurhash.h"
+
+#include "port/lang.h"
+
+#if defined(__x86_64__)
+
+// -------------------------------------------------------------------
+//
+// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
+// and endian-ness issues if used across multiple platforms.
+//
+// 64-bit hash for 64-bit platforms
+
+#ifdef ROCKSDB_UBSAN_RUN
+#if defined(__clang__)
+__attribute__((__no_sanitize__("alignment")))
+#elif defined(__GNUC__)
+__attribute__((__no_sanitize_undefined__))
+#endif
+#endif
+// clang-format off
+uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed )
+{
+    const uint64_t m = 0xc6a4a7935bd1e995;
+    const int r = 47;
+
+    uint64_t h = seed ^ (len * m);
+
+    const uint64_t * data = (const uint64_t *)key;
+    const uint64_t * end = data + (len/8);
+
+    while(data != end)
+    {
+        uint64_t k = *data++;
+
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h ^= k;
+        h *= m;
+    }
+
+    const unsigned char * data2 = (const unsigned char*)data;
+
+    switch(len & 7)
+    {
+    case 7: h ^= ((uint64_t)data2[6]) << 48; FALLTHROUGH_INTENDED;
+    case 6: h ^= ((uint64_t)data2[5]) << 40; FALLTHROUGH_INTENDED;
+    case 5: h ^= ((uint64_t)data2[4]) << 32; FALLTHROUGH_INTENDED;
+    case 4: h ^= ((uint64_t)data2[3]) << 24; FALLTHROUGH_INTENDED;
+    case 3: h ^= ((uint64_t)data2[2]) << 16; FALLTHROUGH_INTENDED;
+    case 2: h ^= ((uint64_t)data2[1]) << 8;  FALLTHROUGH_INTENDED;
+    case 1: h ^= ((uint64_t)data2[0]);
+        h *= m;
+    };
+
+    h ^= h >> r;
+    h *= m;
+    h ^= h >> r;
+
+    return h;
+}
+// clang-format on
+
+#elif defined(__i386__)
+
+// -------------------------------------------------------------------
+//
+// Note - This code makes a few assumptions about how your machine behaves -
+//
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+//
+// And it has a few limitations -
+//
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+//    machines.
+// clang-format off
+unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
+{
+    // 'm' and 'r' are mixing constants generated offline.
+    // They're not really 'magic', they just happen to work well.
+
+    const unsigned int m = 0x5bd1e995;
+    const int r = 24;
+
+    // Initialize the hash to a 'random' value
+
+    unsigned int h = seed ^ len;
+
+    // Mix 4 bytes at a time into the hash
+
+    const unsigned char * data = (const unsigned char *)key;
+
+    while(len >= 4)
+    {
+        unsigned int k = *(unsigned int *)data;
+
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h *= m;
+        h ^= k;
+
+        data += 4;
+        len -= 4;
+    }
+
+    // Handle the last few bytes of the input array
+
+    switch(len)
+    {
+    case 3: h ^= data[2] << 16; FALLTHROUGH_INTENDED;
+    case 2: h ^= data[1] << 8;  FALLTHROUGH_INTENDED;
+    case 1: h ^= data[0];
+        h *= m;
+    };
+
+    // Do a few final mixes of the hash to ensure the last few
+    // bytes are well-incorporated.
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+}
+// clang-format on
+
+#else
+
+// -------------------------------------------------------------------
+//
+// Same as MurmurHash2, but endian- and alignment-neutral.
+// Half the speed though, alas.
+// clang-format off
+unsigned int MurmurHashNeutral2 ( const void * key, int len, unsigned int seed )
+{
+    const unsigned int m = 0x5bd1e995;
+    const int r = 24;
+
+    unsigned int h = seed ^ len;
+
+    const unsigned char * data = (const unsigned char *)key;
+
+    while(len >= 4)
+    {
+        unsigned int k;
+
+        k  = data[0];
+        k |= data[1] << 8;
+        k |= data[2] << 16;
+        k |= data[3] << 24;
+
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h *= m;
+        h ^= k;
+
+        data += 4;
+        len -= 4;
+    }
+
+    switch(len)
+    {
+    case 3: h ^= data[2] << 16; FALLTHROUGH_INTENDED;
+    case 2: h ^= data[1] << 8;  FALLTHROUGH_INTENDED;
+    case 1: h ^= data[0];
+        h *= m;
+    };
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+}
+// clang-format on
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/murmurhash.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/murmurhash.h
new file mode 100644
index 0000000000..7ef4cbbec8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/murmurhash.h
@@ -0,0 +1,43 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+/*
+  Murmurhash from http://sites.google.com/site/murmurhash/
+
+  All code is released to the public domain. For business purposes, Murmurhash
+  is under the MIT license.
+*/
+#pragma once
+#include <stdint.h>
+
+#include "rocksdb/slice.h"
+
+#if defined(__x86_64__)
+#define MURMUR_HASH MurmurHash64A
+uint64_t MurmurHash64A(const void* key, int len, unsigned int seed);
+#define MurmurHash MurmurHash64A
+using murmur_t = uint64_t;
+
+#elif defined(__i386__)
+#define MURMUR_HASH MurmurHash2
+unsigned int MurmurHash2(const void* key, int len, unsigned int seed);
+#define MurmurHash MurmurHash2
+using murmur_t = unsigned int;
+
+#else
+#define MURMUR_HASH MurmurHashNeutral2
+unsigned int MurmurHashNeutral2(const void* key, int len, unsigned int seed);
+#define MurmurHash MurmurHashNeutral2
+using murmur_t = unsigned int;
+#endif
+
+// Allow slice to be hashable by murmur hash.
+namespace ROCKSDB_NAMESPACE {
+struct murmur_hash {
+  size_t operator()(const Slice& slice) const {
+    return MurmurHash(slice.data(), static_cast<int>(slice.size()), 0);
+  }
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/mutexlock.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/mutexlock.h
new file mode 100644
index 0000000000..a5b7581538
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/mutexlock.h
@@ -0,0 +1,181 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <assert.h>
+
+#include <atomic>
+#include <functional>
+#include <mutex>
+#include <thread>
+
+#include "port/port.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Helper class that locks a mutex on construction and unlocks the mutex when
+// the destructor of the MutexLock object is invoked.
+//
+// Typical usage:
+//
+//   void MyClass::MyMethod() {
+//     MutexLock l(&mu_);       // mu_ is an instance variable
+//     ... some complex code, possibly with multiple return paths ...
+//   }
+
+class MutexLock {
+ public:
+  explicit MutexLock(port::Mutex *mu) : mu_(mu) { this->mu_->Lock(); }
+  // No copying allowed
+  MutexLock(const MutexLock &) = delete;
+  void operator=(const MutexLock &) = delete;
+
+  ~MutexLock() { this->mu_->Unlock(); }
+
+ private:
+  port::Mutex *const mu_;
+};
+
+//
+// Acquire a ReadLock on the specified RWMutex.
+// The Lock will be automatically released when the
+// object goes out of scope.
+//
+class ReadLock {
+ public:
+  explicit ReadLock(port::RWMutex *mu) : mu_(mu) { this->mu_->ReadLock(); }
+  // No copying allowed
+  ReadLock(const ReadLock &) = delete;
+  void operator=(const ReadLock &) = delete;
+
+  ~ReadLock() { this->mu_->ReadUnlock(); }
+
+ private:
+  port::RWMutex *const mu_;
+};
+
+//
+// Automatically unlock a locked mutex when the object is destroyed
+//
+class ReadUnlock {
+ public:
+  explicit ReadUnlock(port::RWMutex *mu) : mu_(mu) { mu->AssertHeld(); }
+  // No copying allowed
+  ReadUnlock(const ReadUnlock &) = delete;
+  ReadUnlock &operator=(const ReadUnlock &) = delete;
+
+  ~ReadUnlock() { mu_->ReadUnlock(); }
+
+ private:
+  port::RWMutex *const mu_;
+};
+
+//
+// Acquire a WriteLock on the specified RWMutex.
+// The Lock will be automatically released then the
+// object goes out of scope.
+//
+class WriteLock {
+ public:
+  explicit WriteLock(port::RWMutex *mu) : mu_(mu) { this->mu_->WriteLock(); }
+  // No copying allowed
+  WriteLock(const WriteLock &) = delete;
+  void operator=(const WriteLock &) = delete;
+
+  ~WriteLock() { this->mu_->WriteUnlock(); }
+
+ private:
+  port::RWMutex *const mu_;
+};
+
+//
+// SpinMutex has very low overhead for low-contention cases.  Method names
+// are chosen so you can use std::unique_lock or std::lock_guard with it.
+//
+class SpinMutex {
+ public:
+  SpinMutex() : locked_(false) {}
+
+  bool try_lock() {
+    auto currently_locked = locked_.load(std::memory_order_relaxed);
+    return !currently_locked &&
+           locked_.compare_exchange_weak(currently_locked, true,
+                                         std::memory_order_acquire,
+                                         std::memory_order_relaxed);
+  }
+
+  void lock() {
+    for (size_t tries = 0;; ++tries) {
+      if (try_lock()) {
+        // success
+        break;
+      }
+      port::AsmVolatilePause();
+      if (tries > 100) {
+        std::this_thread::yield();
+      }
+    }
+  }
+
+  void unlock() { locked_.store(false, std::memory_order_release); }
+
+ private:
+  std::atomic<bool> locked_;
+};
+
+// We want to prevent false sharing
+template <class T>
+struct ALIGN_AS(CACHE_LINE_SIZE) LockData {
+  T lock_;
+};
+
+//
+// Inspired by Guava: https://github.com/google/guava/wiki/StripedExplained
+// A striped Lock. This offers the underlying lock striping similar
+// to that of ConcurrentHashMap in a reusable form, and extends it for
+// semaphores and read-write locks. Conceptually, lock striping is the technique
+// of dividing a lock into many <i>stripes</i>, increasing the granularity of a
+// single lock and allowing independent operations to lock different stripes and
+// proceed concurrently, instead of creating contention for a single lock.
+//
+template <class T, class P>
+class Striped {
+ public:
+  Striped(size_t stripes, std::function<uint64_t(const P &)> hash)
+      : stripes_(stripes), hash_(hash) {
+    locks_ = reinterpret_cast<LockData<T> *>(
+        port::cacheline_aligned_alloc(sizeof(LockData<T>) * stripes));
+    for (size_t i = 0; i < stripes; i++) {
+      new (&locks_[i]) LockData<T>();
+    }
+  }
+
+  virtual ~Striped() {
+    if (locks_ != nullptr) {
+      assert(stripes_ > 0);
+      for (size_t i = 0; i < stripes_; i++) {
+        locks_[i].~LockData<T>();
+      }
+      port::cacheline_aligned_free(locks_);
+    }
+  }
+
+  T *get(const P &key) {
+    uint64_t h = hash_(key);
+    size_t index = h % stripes_;
+    return &reinterpret_cast<LockData<T> *>(&locks_[index])->lock_;
+  }
+
+ private:
+  size_t stripes_;
+  LockData<T> *locks_;
+  std::function<uint64_t(const P &)> hash_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ppc-opcode.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ppc-opcode.h
new file mode 100644
index 0000000000..5cc5af0e30
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ppc-opcode.h
@@ -0,0 +1,27 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  Copyright (c) 2017 International Business Machines Corp.
+//  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#define __PPC_RA(a) (((a)&0x1f) << 16)
+#define __PPC_RB(b) (((b)&0x1f) << 11)
+#define __PPC_XA(a) ((((a)&0x1f) << 16) | (((a)&0x20) >> 3))
+#define __PPC_XB(b) ((((b)&0x1f) << 11) | (((b)&0x20) >> 4))
+#define __PPC_XS(s) ((((s)&0x1f) << 21) | (((s)&0x20) >> 5))
+#define __PPC_XT(s) __PPC_XS(s)
+#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
+#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
+
+#define PPC_INST_VPMSUMW 0x10000488
+#define PPC_INST_VPMSUMD 0x100004c8
+#define PPC_INST_MFVSRD 0x7c000066
+#define PPC_INST_MTVSRD 0x7c000166
+
+#define VPMSUMW(t, a, b) .long PPC_INST_VPMSUMW | VSX_XX3((t), a, b)
+#define VPMSUMD(t, a, b) .long PPC_INST_VPMSUMD | VSX_XX3((t), a, b)
+#define MFVRD(a, t) .long PPC_INST_MFVSRD | VSX_XX1((t) + 32, a, 0)
+#define MTVRD(t, a) .long PPC_INST_MTVSRD | VSX_XX1((t) + 32, a, 0)
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/random.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/random.cc
new file mode 100644
index 0000000000..7ac6ee19a1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/random.cc
@@ -0,0 +1,63 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include "util/random.h"
+
+#include <limits.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <thread>
+#include <utility>
+
+#include "port/likely.h"
+#include "util/thread_local.h"
+
+#define STORAGE_DECL static thread_local
+
+namespace ROCKSDB_NAMESPACE {
+
+Random* Random::GetTLSInstance() {
+  STORAGE_DECL Random* tls_instance;
+  STORAGE_DECL std::aligned_storage<sizeof(Random)>::type tls_instance_bytes;
+
+  auto rv = tls_instance;
+  if (UNLIKELY(rv == nullptr)) {
+    size_t seed = std::hash<std::thread::id>()(std::this_thread::get_id());
+    rv = new (&tls_instance_bytes) Random((uint32_t)seed);
+    tls_instance = rv;
+  }
+  return rv;
+}
+
+std::string Random::HumanReadableString(int len) {
+  std::string ret;
+  ret.resize(len);
+  for (int i = 0; i < len; ++i) {
+    ret[i] = static_cast<char>('a' + Uniform(26));
+  }
+  return ret;
+}
+
+std::string Random::RandomString(int len) {
+  std::string ret;
+  ret.resize(len);
+  for (int i = 0; i < len; i++) {
+    ret[i] = static_cast<char>(' ' + Uniform(95));  // ' ' .. '~'
+  }
+  return ret;
+}
+
+std::string Random::RandomBinaryString(int len) {
+  std::string ret;
+  ret.resize(len);
+  for (int i = 0; i < len; i++) {
+    ret[i] = static_cast<char>(Uniform(CHAR_MAX));
+  }
+  return ret;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/random.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/random.h
new file mode 100644
index 0000000000..8923bdc4f0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/random.h
@@ -0,0 +1,190 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <stdint.h>
+
+#include <algorithm>
+#include <random>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A very simple random number generator.  Not especially good at
+// generating truly random bits, but good enough for our needs in this
+// package.
+class Random {
+ private:
+  enum : uint32_t {
+    M = 2147483647L  // 2^31-1
+  };
+  enum : uint64_t {
+    A = 16807  // bits 14, 8, 7, 5, 2, 1, 0
+  };
+
+  uint32_t seed_;
+
+  static uint32_t GoodSeed(uint32_t s) { return (s & M) != 0 ? (s & M) : 1; }
+
+ public:
+  // This is the largest value that can be returned from Next()
+  enum : uint32_t { kMaxNext = M };
+
+  explicit Random(uint32_t s) : seed_(GoodSeed(s)) {}
+
+  void Reset(uint32_t s) { seed_ = GoodSeed(s); }
+
+  uint32_t Next() {
+    // We are computing
+    //       seed_ = (seed_ * A) % M,    where M = 2^31-1
+    //
+    // seed_ must not be zero or M, or else all subsequent computed values
+    // will be zero or M respectively.  For all other values, seed_ will end
+    // up cycling through every number in [1,M-1]
+    uint64_t product = seed_ * A;
+
+    // Compute (product % M) using the fact that ((x << 31) % M) == x.
+    seed_ = static_cast<uint32_t>((product >> 31) + (product & M));
+    // The first reduction may overflow by 1 bit, so we may need to
+    // repeat.  mod == M is not possible; using > allows the faster
+    // sign-bit-based test.
+    if (seed_ > M) {
+      seed_ -= M;
+    }
+    return seed_;
+  }
+
+  uint64_t Next64() { return (uint64_t{Next()} << 32) | Next(); }
+
+  // Returns a uniformly distributed value in the range [0..n-1]
+  // REQUIRES: n > 0
+  uint32_t Uniform(int n) { return Next() % n; }
+
+  // Randomly returns true ~"1/n" of the time, and false otherwise.
+  // REQUIRES: n > 0
+  bool OneIn(int n) { return Uniform(n) == 0; }
+
+  // "Optional" one-in-n, where 0 or negative always returns false
+  // (may or may not consume a random value)
+  bool OneInOpt(int n) { return n > 0 && OneIn(n); }
+
+  // Returns random bool that is true for the given percentage of
+  // calls on average. Zero or less is always false and 100 or more
+  // is always true (may or may not consume a random value)
+  bool PercentTrue(int percentage) {
+    return static_cast<int>(Uniform(100)) < percentage;
+  }
+
+  // Skewed: pick "base" uniformly from range [0,max_log] and then
+  // return "base" random bits.  The effect is to pick a number in the
+  // range [0,2^max_log-1] with exponential bias towards smaller numbers.
+  uint32_t Skewed(int max_log) { return Uniform(1 << Uniform(max_log + 1)); }
+
+  // Returns a random string of length "len"
+  std::string RandomString(int len);
+
+  // Generates a random string of len bytes using human-readable characters
+  std::string HumanReadableString(int len);
+
+  // Generates a random binary data
+  std::string RandomBinaryString(int len);
+
+  // Returns a Random instance for use by the current thread without
+  // additional locking
+  static Random* GetTLSInstance();
+};
+
+// A good 32-bit random number generator based on std::mt19937.
+// This exists in part to avoid compiler variance in warning about coercing
+// uint_fast32_t from mt19937 to uint32_t.
+class Random32 {
+ private:
+  std::mt19937 generator_;
+
+ public:
+  explicit Random32(uint32_t s) : generator_(s) {}
+
+  // Generates the next random number
+  uint32_t Next() { return static_cast<uint32_t>(generator_()); }
+
+  // Returns a uniformly distributed value in the range [0..n-1]
+  // REQUIRES: n > 0
+  uint32_t Uniform(uint32_t n) {
+    return static_cast<uint32_t>(
+        std::uniform_int_distribution<std::mt19937::result_type>(
+            0, n - 1)(generator_));
+  }
+
+  // Returns an *almost* uniformly distributed value in the range [0..n-1].
+  // Much faster than Uniform().
+  // REQUIRES: n > 0
+  uint32_t Uniformish(uint32_t n) {
+    // fastrange (without the header)
+    return static_cast<uint32_t>((uint64_t(generator_()) * uint64_t(n)) >> 32);
+  }
+
+  // Randomly returns true ~"1/n" of the time, and false otherwise.
+  // REQUIRES: n > 0
+  bool OneIn(uint32_t n) { return Uniform(n) == 0; }
+
+  // Skewed: pick "base" uniformly from range [0,max_log] and then
+  // return "base" random bits.  The effect is to pick a number in the
+  // range [0,2^max_log-1] with exponential bias towards smaller numbers.
+  uint32_t Skewed(int max_log) {
+    return Uniform(uint32_t{1} << Uniform(max_log + 1));
+  }
+
+  // Reset the seed of the generator to the given value
+  void Seed(uint32_t new_seed) { generator_.seed(new_seed); }
+};
+
+// A good 64-bit random number generator based on std::mt19937_64
+class Random64 {
+ private:
+  std::mt19937_64 generator_;
+
+ public:
+  explicit Random64(uint64_t s) : generator_(s) {}
+
+  // Generates the next random number
+  uint64_t Next() { return generator_(); }
+
+  // Returns a uniformly distributed value in the range [0..n-1]
+  // REQUIRES: n > 0
+  uint64_t Uniform(uint64_t n) {
+    return std::uniform_int_distribution<uint64_t>(0, n - 1)(generator_);
+  }
+
+  // Randomly returns true ~"1/n" of the time, and false otherwise.
+  // REQUIRES: n > 0
+  bool OneIn(uint64_t n) { return Uniform(n) == 0; }
+
+  // Skewed: pick "base" uniformly from range [0,max_log] and then
+  // return "base" random bits.  The effect is to pick a number in the
+  // range [0,2^max_log-1] with exponential bias towards smaller numbers.
+  uint64_t Skewed(int max_log) {
+    return Uniform(uint64_t(1) << Uniform(max_log + 1));
+  }
+};
+
+// A seeded replacement for removed std::random_shuffle
+template <class RandomIt>
+void RandomShuffle(RandomIt first, RandomIt last, uint32_t seed) {
+  std::mt19937 rng(seed);
+  std::shuffle(first, last, rng);
+}
+
+// A replacement for removed std::random_shuffle
+template <class RandomIt>
+void RandomShuffle(RandomIt first, RandomIt last) {
+  RandomShuffle(first, last, std::random_device{}());
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/rate_limiter.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/rate_limiter.cc
new file mode 100644
index 0000000000..be54138d94
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/rate_limiter.cc
@@ -0,0 +1,376 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <algorithm>
+
+#include "monitoring/statistics_impl.h"
+#include "port/port.h"
+#include "rocksdb/system_clock.h"
+#include "test_util/sync_point.h"
+#include "util/aligned_buffer.h"
+#include "util/rate_limiter_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+size_t RateLimiter::RequestToken(size_t bytes, size_t alignment,
+                                 Env::IOPriority io_priority, Statistics* stats,
+                                 RateLimiter::OpType op_type) {
+  if (io_priority < Env::IO_TOTAL && IsRateLimited(op_type)) {
+    bytes = std::min(bytes, static_cast<size_t>(GetSingleBurstBytes()));
+
+    if (alignment > 0) {
+      // Here we may actually require more than burst and block
+      // as we can not write/read less than one page at a time on direct I/O
+      // thus we do not want to be strictly constrained by burst
+      bytes = std::max(alignment, TruncateToPageBoundary(alignment, bytes));
+    }
+    Request(bytes, io_priority, stats, op_type);
+  }
+  return bytes;
+}
+
+// Pending request
+struct GenericRateLimiter::Req {
+  explicit Req(int64_t _bytes, port::Mutex* _mu)
+      : request_bytes(_bytes), bytes(_bytes), cv(_mu) {}
+  int64_t request_bytes;
+  int64_t bytes;
+  port::CondVar cv;
+};
+
+GenericRateLimiter::GenericRateLimiter(
+    int64_t rate_bytes_per_sec, int64_t refill_period_us, int32_t fairness,
+    RateLimiter::Mode mode, const std::shared_ptr<SystemClock>& clock,
+    bool auto_tuned)
+    : RateLimiter(mode),
+      refill_period_us_(refill_period_us),
+      rate_bytes_per_sec_(auto_tuned ? rate_bytes_per_sec / 2
+                                     : rate_bytes_per_sec),
+      refill_bytes_per_period_(
+          CalculateRefillBytesPerPeriodLocked(rate_bytes_per_sec_)),
+      clock_(clock),
+      stop_(false),
+      exit_cv_(&request_mutex_),
+      requests_to_wait_(0),
+      available_bytes_(0),
+      next_refill_us_(NowMicrosMonotonicLocked()),
+      fairness_(fairness > 100 ? 100 : fairness),
+      rnd_((uint32_t)time(nullptr)),
+      wait_until_refill_pending_(false),
+      auto_tuned_(auto_tuned),
+      num_drains_(0),
+      max_bytes_per_sec_(rate_bytes_per_sec),
+      tuned_time_(NowMicrosMonotonicLocked()) {
+  for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+    total_requests_[i] = 0;
+    total_bytes_through_[i] = 0;
+  }
+}
+
+GenericRateLimiter::~GenericRateLimiter() {
+  MutexLock g(&request_mutex_);
+  stop_ = true;
+  std::deque<Req*>::size_type queues_size_sum = 0;
+  for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+    queues_size_sum += queue_[i].size();
+  }
+  requests_to_wait_ = static_cast<int32_t>(queues_size_sum);
+
+  for (int i = Env::IO_TOTAL - 1; i >= Env::IO_LOW; --i) {
+    std::deque<Req*> queue = queue_[i];
+    for (auto& r : queue) {
+      r->cv.Signal();
+    }
+  }
+
+  while (requests_to_wait_ > 0) {
+    exit_cv_.Wait();
+  }
+}
+
+// This API allows user to dynamically change rate limiter's bytes per second.
+void GenericRateLimiter::SetBytesPerSecond(int64_t bytes_per_second) {
+  MutexLock g(&request_mutex_);
+  SetBytesPerSecondLocked(bytes_per_second);
+}
+
+void GenericRateLimiter::SetBytesPerSecondLocked(int64_t bytes_per_second) {
+  assert(bytes_per_second > 0);
+  rate_bytes_per_sec_.store(bytes_per_second, std::memory_order_relaxed);
+  refill_bytes_per_period_.store(
+      CalculateRefillBytesPerPeriodLocked(bytes_per_second),
+      std::memory_order_relaxed);
+}
+
+void GenericRateLimiter::Request(int64_t bytes, const Env::IOPriority pri,
+                                 Statistics* stats) {
+  assert(bytes <= refill_bytes_per_period_.load(std::memory_order_relaxed));
+  bytes = std::max(static_cast<int64_t>(0), bytes);
+  TEST_SYNC_POINT("GenericRateLimiter::Request");
+  TEST_SYNC_POINT_CALLBACK("GenericRateLimiter::Request:1",
+                           &rate_bytes_per_sec_);
+  MutexLock g(&request_mutex_);
+
+  if (auto_tuned_) {
+    static const int kRefillsPerTune = 100;
+    std::chrono::microseconds now(NowMicrosMonotonicLocked());
+    if (now - tuned_time_ >=
+        kRefillsPerTune * std::chrono::microseconds(refill_period_us_)) {
+      Status s = TuneLocked();
+      s.PermitUncheckedError();  //**TODO: What to do on error?
+    }
+  }
+
+  if (stop_) {
+    // It is now in the clean-up of ~GenericRateLimiter().
+    // Therefore any new incoming request will exit from here
+    // and not get satiesfied.
+    return;
+  }
+
+  ++total_requests_[pri];
+
+  if (available_bytes_ > 0) {
+    int64_t bytes_through = std::min(available_bytes_, bytes);
+    total_bytes_through_[pri] += bytes_through;
+    available_bytes_ -= bytes_through;
+    bytes -= bytes_through;
+  }
+
+  if (bytes == 0) {
+    return;
+  }
+
+  // Request cannot be satisfied at this moment, enqueue
+  Req r(bytes, &request_mutex_);
+  queue_[pri].push_back(&r);
+  TEST_SYNC_POINT_CALLBACK("GenericRateLimiter::Request:PostEnqueueRequest",
+                           &request_mutex_);
+  // A thread representing a queued request coordinates with other such threads.
+  // There are two main duties.
+  //
+  // (1) Waiting for the next refill time.
+  // (2) Refilling the bytes and granting requests.
+  do {
+    int64_t time_until_refill_us = next_refill_us_ - NowMicrosMonotonicLocked();
+    if (time_until_refill_us > 0) {
+      if (wait_until_refill_pending_) {
+        // Somebody is performing (1). Trust we'll be woken up when our request
+        // is granted or we are needed for future duties.
+        r.cv.Wait();
+      } else {
+        // Whichever thread reaches here first performs duty (1) as described
+        // above.
+        int64_t wait_until = clock_->NowMicros() + time_until_refill_us;
+        RecordTick(stats, NUMBER_RATE_LIMITER_DRAINS);
+        ++num_drains_;
+        wait_until_refill_pending_ = true;
+        r.cv.TimedWait(wait_until);
+        TEST_SYNC_POINT_CALLBACK("GenericRateLimiter::Request:PostTimedWait",
+                                 &time_until_refill_us);
+        wait_until_refill_pending_ = false;
+      }
+    } else {
+      // Whichever thread reaches here first performs duty (2) as described
+      // above.
+      RefillBytesAndGrantRequestsLocked();
+      if (r.request_bytes == 0) {
+        // If there is any remaining requests, make sure there exists at least
+        // one candidate is awake for future duties by signaling a front request
+        // of a queue.
+        for (int i = Env::IO_TOTAL - 1; i >= Env::IO_LOW; --i) {
+          std::deque<Req*> queue = queue_[i];
+          if (!queue.empty()) {
+            queue.front()->cv.Signal();
+            break;
+          }
+        }
+      }
+    }
+    // Invariant: non-granted request is always in one queue, and granted
+    // request is always in zero queues.
+#ifndef NDEBUG
+    int num_found = 0;
+    for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+      if (std::find(queue_[i].begin(), queue_[i].end(), &r) !=
+          queue_[i].end()) {
+        ++num_found;
+      }
+    }
+    if (r.request_bytes == 0) {
+      assert(num_found == 0);
+    } else {
+      assert(num_found == 1);
+    }
+#endif  // NDEBUG
+  } while (!stop_ && r.request_bytes > 0);
+
+  if (stop_) {
+    // It is now in the clean-up of ~GenericRateLimiter().
+    // Therefore any woken-up request will have come out of the loop and then
+    // exit here. It might or might not have been satisfied.
+    --requests_to_wait_;
+    exit_cv_.Signal();
+  }
+}
+
+std::vector<Env::IOPriority>
+GenericRateLimiter::GeneratePriorityIterationOrderLocked() {
+  std::vector<Env::IOPriority> pri_iteration_order(Env::IO_TOTAL /* 4 */);
+  // We make Env::IO_USER a superior priority by always iterating its queue
+  // first
+  pri_iteration_order[0] = Env::IO_USER;
+
+  bool high_pri_iterated_after_mid_low_pri = rnd_.OneIn(fairness_);
+  TEST_SYNC_POINT_CALLBACK(
+      "GenericRateLimiter::GeneratePriorityIterationOrderLocked::"
+      "PostRandomOneInFairnessForHighPri",
+      &high_pri_iterated_after_mid_low_pri);
+  bool mid_pri_itereated_after_low_pri = rnd_.OneIn(fairness_);
+  TEST_SYNC_POINT_CALLBACK(
+      "GenericRateLimiter::GeneratePriorityIterationOrderLocked::"
+      "PostRandomOneInFairnessForMidPri",
+      &mid_pri_itereated_after_low_pri);
+
+  if (high_pri_iterated_after_mid_low_pri) {
+    pri_iteration_order[3] = Env::IO_HIGH;
+    pri_iteration_order[2] =
+        mid_pri_itereated_after_low_pri ? Env::IO_MID : Env::IO_LOW;
+    pri_iteration_order[1] =
+        (pri_iteration_order[2] == Env::IO_MID) ? Env::IO_LOW : Env::IO_MID;
+  } else {
+    pri_iteration_order[1] = Env::IO_HIGH;
+    pri_iteration_order[3] =
+        mid_pri_itereated_after_low_pri ? Env::IO_MID : Env::IO_LOW;
+    pri_iteration_order[2] =
+        (pri_iteration_order[3] == Env::IO_MID) ? Env::IO_LOW : Env::IO_MID;
+  }
+
+  TEST_SYNC_POINT_CALLBACK(
+      "GenericRateLimiter::GeneratePriorityIterationOrderLocked::"
+      "PreReturnPriIterationOrder",
+      &pri_iteration_order);
+  return pri_iteration_order;
+}
+
+void GenericRateLimiter::RefillBytesAndGrantRequestsLocked() {
+  TEST_SYNC_POINT_CALLBACK(
+      "GenericRateLimiter::RefillBytesAndGrantRequestsLocked", &request_mutex_);
+  next_refill_us_ = NowMicrosMonotonicLocked() + refill_period_us_;
+  // Carry over the left over quota from the last period
+  auto refill_bytes_per_period =
+      refill_bytes_per_period_.load(std::memory_order_relaxed);
+  assert(available_bytes_ == 0);
+  available_bytes_ = refill_bytes_per_period;
+
+  std::vector<Env::IOPriority> pri_iteration_order =
+      GeneratePriorityIterationOrderLocked();
+
+  for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+    assert(!pri_iteration_order.empty());
+    Env::IOPriority current_pri = pri_iteration_order[i];
+    auto* queue = &queue_[current_pri];
+    while (!queue->empty()) {
+      auto* next_req = queue->front();
+      if (available_bytes_ < next_req->request_bytes) {
+        // Grant partial request_bytes to avoid starvation of requests
+        // that become asking for more bytes than available_bytes_
+        // due to dynamically reduced rate limiter's bytes_per_second that
+        // leads to reduced refill_bytes_per_period hence available_bytes_
+        next_req->request_bytes -= available_bytes_;
+        available_bytes_ = 0;
+        break;
+      }
+      available_bytes_ -= next_req->request_bytes;
+      next_req->request_bytes = 0;
+      total_bytes_through_[current_pri] += next_req->bytes;
+      queue->pop_front();
+
+      // Quota granted, signal the thread to exit
+      next_req->cv.Signal();
+    }
+  }
+}
+
+int64_t GenericRateLimiter::CalculateRefillBytesPerPeriodLocked(
+    int64_t rate_bytes_per_sec) {
+  if (std::numeric_limits<int64_t>::max() / rate_bytes_per_sec <
+      refill_period_us_) {
+    // Avoid unexpected result in the overflow case. The result now is still
+    // inaccurate but is a number that is large enough.
+    return std::numeric_limits<int64_t>::max() / 1000000;
+  } else {
+    return rate_bytes_per_sec * refill_period_us_ / 1000000;
+  }
+}
+
+Status GenericRateLimiter::TuneLocked() {
+  const int kLowWatermarkPct = 50;
+  const int kHighWatermarkPct = 90;
+  const int kAdjustFactorPct = 5;
+  // computed rate limit will be in
+  // `[max_bytes_per_sec_ / kAllowedRangeFactor, max_bytes_per_sec_]`.
+  const int kAllowedRangeFactor = 20;
+
+  std::chrono::microseconds prev_tuned_time = tuned_time_;
+  tuned_time_ = std::chrono::microseconds(NowMicrosMonotonicLocked());
+
+  int64_t elapsed_intervals = (tuned_time_ - prev_tuned_time +
+                               std::chrono::microseconds(refill_period_us_) -
+                               std::chrono::microseconds(1)) /
+                              std::chrono::microseconds(refill_period_us_);
+  // We tune every kRefillsPerTune intervals, so the overflow and division-by-
+  // zero conditions should never happen.
+  assert(num_drains_ <= std::numeric_limits<int64_t>::max() / 100);
+  assert(elapsed_intervals > 0);
+  int64_t drained_pct = num_drains_ * 100 / elapsed_intervals;
+
+  int64_t prev_bytes_per_sec = GetBytesPerSecond();
+  int64_t new_bytes_per_sec;
+  if (drained_pct == 0) {
+    new_bytes_per_sec = max_bytes_per_sec_ / kAllowedRangeFactor;
+  } else if (drained_pct < kLowWatermarkPct) {
+    // sanitize to prevent overflow
+    int64_t sanitized_prev_bytes_per_sec =
+        std::min(prev_bytes_per_sec, std::numeric_limits<int64_t>::max() / 100);
+    new_bytes_per_sec =
+        std::max(max_bytes_per_sec_ / kAllowedRangeFactor,
+                 sanitized_prev_bytes_per_sec * 100 / (100 + kAdjustFactorPct));
+  } else if (drained_pct > kHighWatermarkPct) {
+    // sanitize to prevent overflow
+    int64_t sanitized_prev_bytes_per_sec =
+        std::min(prev_bytes_per_sec, std::numeric_limits<int64_t>::max() /
+                                         (100 + kAdjustFactorPct));
+    new_bytes_per_sec =
+        std::min(max_bytes_per_sec_,
+                 sanitized_prev_bytes_per_sec * (100 + kAdjustFactorPct) / 100);
+  } else {
+    new_bytes_per_sec = prev_bytes_per_sec;
+  }
+  if (new_bytes_per_sec != prev_bytes_per_sec) {
+    SetBytesPerSecondLocked(new_bytes_per_sec);
+  }
+  num_drains_ = 0;
+  return Status::OK();
+}
+
+RateLimiter* NewGenericRateLimiter(
+    int64_t rate_bytes_per_sec, int64_t refill_period_us /* = 100 * 1000 */,
+    int32_t fairness /* = 10 */,
+    RateLimiter::Mode mode /* = RateLimiter::Mode::kWritesOnly */,
+    bool auto_tuned /* = false */) {
+  assert(rate_bytes_per_sec > 0);
+  assert(refill_period_us > 0);
+  assert(fairness > 0);
+  std::unique_ptr<RateLimiter> limiter(
+      new GenericRateLimiter(rate_bytes_per_sec, refill_period_us, fairness,
+                             mode, SystemClock::Default(), auto_tuned));
+  return limiter.release();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/rate_limiter_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/rate_limiter_impl.h
new file mode 100644
index 0000000000..4c078f5a0e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/rate_limiter_impl.h
@@ -0,0 +1,146 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <deque>
+
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/rate_limiter.h"
+#include "rocksdb/status.h"
+#include "rocksdb/system_clock.h"
+#include "util/mutexlock.h"
+#include "util/random.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class GenericRateLimiter : public RateLimiter {
+ public:
+  GenericRateLimiter(int64_t refill_bytes, int64_t refill_period_us,
+                     int32_t fairness, RateLimiter::Mode mode,
+                     const std::shared_ptr<SystemClock>& clock,
+                     bool auto_tuned);
+
+  virtual ~GenericRateLimiter();
+
+  // This API allows user to dynamically change rate limiter's bytes per second.
+  virtual void SetBytesPerSecond(int64_t bytes_per_second) override;
+
+  // Request for token to write bytes. If this request can not be satisfied,
+  // the call is blocked. Caller is responsible to make sure
+  // bytes <= GetSingleBurstBytes() and bytes >= 0. Negative bytes
+  // passed in will be rounded up to 0.
+  using RateLimiter::Request;
+  virtual void Request(const int64_t bytes, const Env::IOPriority pri,
+                       Statistics* stats) override;
+
+  virtual int64_t GetSingleBurstBytes() const override {
+    return refill_bytes_per_period_.load(std::memory_order_relaxed);
+  }
+
+  virtual int64_t GetTotalBytesThrough(
+      const Env::IOPriority pri = Env::IO_TOTAL) const override {
+    MutexLock g(&request_mutex_);
+    if (pri == Env::IO_TOTAL) {
+      int64_t total_bytes_through_sum = 0;
+      for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+        total_bytes_through_sum += total_bytes_through_[i];
+      }
+      return total_bytes_through_sum;
+    }
+    return total_bytes_through_[pri];
+  }
+
+  virtual int64_t GetTotalRequests(
+      const Env::IOPriority pri = Env::IO_TOTAL) const override {
+    MutexLock g(&request_mutex_);
+    if (pri == Env::IO_TOTAL) {
+      int64_t total_requests_sum = 0;
+      for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+        total_requests_sum += total_requests_[i];
+      }
+      return total_requests_sum;
+    }
+    return total_requests_[pri];
+  }
+
+  virtual Status GetTotalPendingRequests(
+      int64_t* total_pending_requests,
+      const Env::IOPriority pri = Env::IO_TOTAL) const override {
+    assert(total_pending_requests != nullptr);
+    MutexLock g(&request_mutex_);
+    if (pri == Env::IO_TOTAL) {
+      int64_t total_pending_requests_sum = 0;
+      for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+        total_pending_requests_sum += static_cast<int64_t>(queue_[i].size());
+      }
+      *total_pending_requests = total_pending_requests_sum;
+    } else {
+      *total_pending_requests = static_cast<int64_t>(queue_[pri].size());
+    }
+    return Status::OK();
+  }
+
+  virtual int64_t GetBytesPerSecond() const override {
+    return rate_bytes_per_sec_.load(std::memory_order_relaxed);
+  }
+
+  virtual void TEST_SetClock(std::shared_ptr<SystemClock> clock) {
+    MutexLock g(&request_mutex_);
+    clock_ = std::move(clock);
+    next_refill_us_ = NowMicrosMonotonicLocked();
+  }
+
+ private:
+  void RefillBytesAndGrantRequestsLocked();
+  std::vector<Env::IOPriority> GeneratePriorityIterationOrderLocked();
+  int64_t CalculateRefillBytesPerPeriodLocked(int64_t rate_bytes_per_sec);
+  Status TuneLocked();
+  void SetBytesPerSecondLocked(int64_t bytes_per_second);
+
+  uint64_t NowMicrosMonotonicLocked() {
+    return clock_->NowNanos() / std::milli::den;
+  }
+
+  // This mutex guard all internal states
+  mutable port::Mutex request_mutex_;
+
+  const int64_t refill_period_us_;
+
+  std::atomic<int64_t> rate_bytes_per_sec_;
+  std::atomic<int64_t> refill_bytes_per_period_;
+  std::shared_ptr<SystemClock> clock_;
+
+  bool stop_;
+  port::CondVar exit_cv_;
+  int32_t requests_to_wait_;
+
+  int64_t total_requests_[Env::IO_TOTAL];
+  int64_t total_bytes_through_[Env::IO_TOTAL];
+  int64_t available_bytes_;
+  int64_t next_refill_us_;
+
+  int32_t fairness_;
+  Random rnd_;
+
+  struct Req;
+  std::deque<Req*> queue_[Env::IO_TOTAL];
+  bool wait_until_refill_pending_;
+
+  bool auto_tuned_;
+  int64_t num_drains_;
+  const int64_t max_bytes_per_sec_;
+  std::chrono::microseconds tuned_time_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/repeatable_thread.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/repeatable_thread.h
new file mode 100644
index 0000000000..c75ad7c49f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/repeatable_thread.h
@@ -0,0 +1,149 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <functional>
+#include <string>
+
+#include "monitoring/instrumented_mutex.h"
+#include "port/port.h"
+#include "rocksdb/system_clock.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Simple wrapper around port::Thread that supports calling a callback every
+// X seconds. If you pass in 0, then it will call your callback repeatedly
+// without delay.
+class RepeatableThread {
+ public:
+  RepeatableThread(std::function<void()> function,
+                   const std::string& thread_name, SystemClock* clock,
+                   uint64_t delay_us, uint64_t initial_delay_us = 0)
+      : function_(function),
+        thread_name_("rocksdb:" + thread_name),
+        clock_(clock),
+        delay_us_(delay_us),
+        initial_delay_us_(initial_delay_us),
+        mutex_(clock),
+        cond_var_(&mutex_),
+        running_(true),
+#ifndef NDEBUG
+        waiting_(false),
+        run_count_(0),
+#endif
+        thread_([this] { thread(); }) {
+  }
+
+  void cancel() {
+    {
+      InstrumentedMutexLock l(&mutex_);
+      if (!running_) {
+        return;
+      }
+      running_ = false;
+      cond_var_.SignalAll();
+    }
+    thread_.join();
+  }
+
+  bool IsRunning() { return running_; }
+
+  ~RepeatableThread() { cancel(); }
+
+#ifndef NDEBUG
+  // Wait until RepeatableThread starting waiting, call the optional callback,
+  // then wait for one run of RepeatableThread. Tests can use provide a
+  // custom clock object to mock time, and use the callback here to bump current
+  // time and trigger RepeatableThread. See repeatable_thread_test for example.
+  //
+  // Note: only support one caller of this method.
+  void TEST_WaitForRun(std::function<void()> callback = nullptr) {
+    InstrumentedMutexLock l(&mutex_);
+    while (!waiting_) {
+      cond_var_.Wait();
+    }
+    uint64_t prev_count = run_count_;
+    if (callback != nullptr) {
+      callback();
+    }
+    cond_var_.SignalAll();
+    while (!(run_count_ > prev_count)) {
+      cond_var_.Wait();
+    }
+  }
+#endif
+
+ private:
+  bool wait(uint64_t delay) {
+    InstrumentedMutexLock l(&mutex_);
+    if (running_ && delay > 0) {
+      uint64_t wait_until = clock_->NowMicros() + delay;
+#ifndef NDEBUG
+      waiting_ = true;
+      cond_var_.SignalAll();
+#endif
+      while (running_) {
+        cond_var_.TimedWait(wait_until);
+        if (clock_->NowMicros() >= wait_until) {
+          break;
+        }
+      }
+#ifndef NDEBUG
+      waiting_ = false;
+#endif
+    }
+    return running_;
+  }
+
+  void thread() {
+#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 12)
+    // Set thread name.
+    auto thread_handle = thread_.native_handle();
+    int ret __attribute__((__unused__)) =
+        pthread_setname_np(thread_handle, thread_name_.c_str());
+    assert(ret == 0);
+#endif
+#endif
+
+    assert(delay_us_ > 0);
+    if (!wait(initial_delay_us_)) {
+      return;
+    }
+    do {
+      function_();
+#ifndef NDEBUG
+      {
+        InstrumentedMutexLock l(&mutex_);
+        run_count_++;
+        cond_var_.SignalAll();
+      }
+#endif
+    } while (wait(delay_us_));
+  }
+
+  const std::function<void()> function_;
+  const std::string thread_name_;
+  SystemClock* clock_;
+  const uint64_t delay_us_;
+  const uint64_t initial_delay_us_;
+
+  // Mutex lock should be held when accessing running_, waiting_
+  // and run_count_.
+  InstrumentedMutex mutex_;
+  InstrumentedCondVar cond_var_;
+  bool running_;
+#ifndef NDEBUG
+  // RepeatableThread waiting for timeout.
+  bool waiting_;
+  // Times function_ had run.
+  uint64_t run_count_;
+#endif
+  port::Thread thread_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_alg.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_alg.h
new file mode 100644
index 0000000000..f9afefc237
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_alg.h
@@ -0,0 +1,1225 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <array>
+#include <memory>
+
+#include "rocksdb/rocksdb_namespace.h"
+#include "util/math128.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace ribbon {
+
+// RIBBON PHSF & RIBBON Filter (Rapid Incremental Boolean Banding ON-the-fly)
+//
+// ribbon_alg.h: generic versions of core algorithms.
+//
+// Ribbon is a Perfect Hash Static Function construction useful as a compact
+// static Bloom filter alternative. It combines (a) a boolean (GF(2)) linear
+// system construction that approximates a Band Matrix with hashing,
+// (b) an incremental, on-the-fly Gaussian Elimination algorithm that is
+// remarkably efficient and adaptable at constructing an upper-triangular
+// band matrix from a set of band-approximating inputs from (a), and
+// (c) a storage layout that is fast and adaptable as a filter.
+//
+// Footnotes: (a) "Efficient Gauss Elimination for Near-Quadratic Matrices
+// with One Short Random Block per Row, with Applications" by Stefan
+// Walzer and Martin Dietzfelbinger ("DW paper")
+// (b) developed by Peter C. Dillinger, though not the first on-the-fly
+// GE algorithm. See "On the fly Gaussian Elimination for LT codes" by
+// Bioglio, Grangetto, Gaeta, and Sereno.
+// (c) see "interleaved" solution storage below.
+//
+// See ribbon_impl.h for high-level behavioral summary. This file focuses
+// on the core design details.
+//
+// ######################################################################
+// ################# PHSF -> static filter reduction ####################
+//
+// A Perfect Hash Static Function is a data structure representing a
+// map from anything hashable (a "key") to values of some fixed size.
+// Crucially, it is allowed to return garbage values for anything not in
+// the original set of map keys, and it is a "static" structure: entries
+// cannot be added or deleted after construction. PHSFs representing n
+// mappings to b-bit values (assume uniformly distributed) require at least
+// n * b bits to represent, or at least b bits per entry. We typically
+// describe the compactness of a PHSF by typical bits per entry as some
+// function of b. For example, the MWHC construction (k=3 "peeling")
+// requires about 1.0222*b and a variant called Xor+ requires about
+// 1.08*b + 0.5 bits per entry.
+//
+// With more hashing, a PHSF can over-approximate a set as a Bloom filter
+// does, with no FN queries and predictable false positive (FP) query
+// rate. Instead of the user providing a value to map each input key to,
+// a hash function provides the value. Keys in the original set will
+// return a positive membership query because the underlying PHSF returns
+// the same value as hashing the key. When a key is not in the original set,
+// the PHSF returns a "garbage" value, which is only equal to the key's
+// hash with (false positive) probability 1 in 2^b.
+//
+// For a matching false positive rate, standard Bloom filters require
+// 1.44*b bits per entry. Cache-local Bloom filters (like bloom_impl.h)
+// require a bit more, around 1.5*b bits per entry. Thus, a Bloom
+// alternative could save up to or nearly 1/3rd of memory and storage
+// that RocksDB uses for SST (static) Bloom filters. (Memtable Bloom filter
+// is dynamic.)
+//
+// Recommended reading:
+// "Xor Filters: Faster and Smaller Than Bloom and Cuckoo Filters"
+// by Graf and Lemire
+// First three sections of "Fast Scalable Construction of (Minimal
+// Perfect Hash) Functions" by Genuzio, Ottaviano, and Vigna
+//
+// ######################################################################
+// ################## PHSF vs. hash table vs. Bloom #####################
+//
+// You can think of traditional hash tables and related filter variants
+// such as Cuckoo filters as utilizing an "OR" construction: a hash
+// function associates a key with some slots and the data is returned if
+// the data is found in any one of those slots. The collision resolution
+// is visible in the final data structure and requires extra information.
+// For example, Cuckoo filter uses roughly 1.05b + 2 bits per entry, and
+// Golomb-Rice code (aka "GCS") as little as b + 1.5. When the data
+// structure associates each input key with data in one slot, the
+// structure implicitly constructs a (near-)minimal (near-)perfect hash
+// (MPH) of the keys, which requires at least 1.44 bits per key to
+// represent. This is why approaches with visible collision resolution
+// have a fixed + 1.5 or more in storage overhead per entry, often in
+// addition to an overhead multiplier on b.
+//
+// By contrast Bloom filters utilize an "AND" construction: a query only
+// returns true if all bit positions associated with a key are set to 1.
+// There is no collision resolution, so Bloom filters do not suffer a
+// fixed bits per entry overhead like the above structures.
+//
+// PHSFs typically use a bitwise XOR construction: the data you want is
+// not in a single slot, but in a linear combination of several slots.
+// For static data, this gives the best of "AND" and "OR" constructions:
+// avoids the +1.44 or more fixed overhead by not approximating a MPH and
+// can do much better than Bloom's 1.44 factor on b with collision
+// resolution, which here is done ahead of time and invisible at query
+// time.
+//
+// ######################################################################
+// ######################## PHSF construction ###########################
+//
+// For a typical PHSF, construction is solving a linear system of
+// equations, typically in GF(2), which is to say that values are boolean
+// and XOR serves both as addition and subtraction. We can use matrices to
+// represent the problem:
+//
+//    C    *    S    =    R
+// (n x m)   (m x b)   (n x b)
+// where C = coefficients, S = solution, R = results
+// and solving for S given C and R.
+//
+// Note that C and R each have n rows, one for each input entry for the
+// PHSF. A row in C is given by a hash function on the PHSF input key,
+// and the corresponding row in R is the b-bit value to associate with
+// that input key. (In a filter, rows of R are given by another hash
+// function on the input key.)
+//
+// On solving, the matrix S (solution) is the final PHSF data, as it
+// maps any row from the original C to its corresponding desired result
+// in R. We just have to hash our query inputs and compute a linear
+// combination of rows in S.
+//
+// In theory, we could chose m = n and let a hash function associate
+// each input key with random rows in C. A solution exists with high
+// probability, and uses essentially minimum space, b bits per entry
+// (because we set m = n) but this has terrible scaling, something
+// like O(n^2) space and O(n^3) time during construction (Gaussian
+// elimination) and O(n) query time. But computational efficiency is
+// key, and the core of this is avoiding scanning all of S to answer
+// each query.
+//
+// The traditional approach (MWHC, aka Xor filter) starts with setting
+// only some small fixed number of columns (typically k=3) to 1 for each
+// row of C, with remaining entries implicitly 0. This is implemented as
+// three hash functions over [0,m), and S can be implemented as a vector
+// of b-bit values. Now, a query only involves looking up k rows
+// (values) in S and computing their bitwise XOR. Additionally, this
+// construction can use a linear time algorithm called "peeling" for
+// finding a solution in many cases of one existing, but peeling
+// generally requires a larger space overhead factor in the solution
+// (m/n) than is required with Gaussian elimination.
+//
+// Recommended reading:
+// "Peeling Close to the Orientability Threshold - Spatial Coupling in
+// Hashing-Based Data Structures" by Stefan Walzer
+//
+// ######################################################################
+// ##################### Ribbon PHSF construction #######################
+//
+// Ribbon constructs coefficient rows essentially the same as in the
+// Walzer/Dietzfelbinger paper cited above: for some chosen fixed width
+// r (kCoeffBits in code), each key is hashed to a starting column in
+// [0, m - r] (GetStart() in code) and an r-bit sequence of boolean
+// coefficients (GetCoeffRow() in code). If you sort the rows by start,
+// the C matrix would look something like this:
+//
+// [####00000000000000000000]
+// [####00000000000000000000]
+// [000####00000000000000000]
+// [0000####0000000000000000]
+// [0000000####0000000000000]
+// [000000000####00000000000]
+// [000000000####00000000000]
+// [0000000000000####0000000]
+// [0000000000000000####0000]
+// [00000000000000000####000]
+// [00000000000000000000####]
+//
+// where each # could be a 0 or 1, chosen uniformly by a hash function.
+// (Except we typically set the start column value to 1.) This scheme
+// uses hashing to approximate a band matrix, and it has a solution iff
+// it reduces to an upper-triangular boolean r-band matrix, like this:
+//
+// [1###00000000000000000000]
+// [01##00000000000000000000]
+// [000000000000000000000000]
+// [0001###00000000000000000]
+// [000000000000000000000000]
+// [000001##0000000000000000]
+// [000000000000000000000000]
+// [00000001###0000000000000]
+// [000000001###000000000000]
+// [0000000001##000000000000]
+// ...
+// [00000000000000000000001#]
+// [000000000000000000000001]
+//
+// where we have expanded to an m x m matrix by filling with rows of
+// all zeros as needed. As in Gaussian elimination, this form is ready for
+// generating a solution through back-substitution.
+//
+// The awesome thing about the Ribbon construction (from the DW paper) is
+// how row reductions keep each row representable as a start column and
+// r coefficients, because row reductions are only needed when two rows
+// have the same number of leading zero columns. Thus, the combination
+// of those rows, the bitwise XOR of the r-bit coefficient rows, cancels
+// out the leading 1s, so starts (at least) one column later and only
+// needs (at most) r - 1 coefficients.
+//
+// ######################################################################
+// ###################### Ribbon PHSF scalability #######################
+//
+// Although more practical detail is in ribbon_impl.h, it's worth
+// understanding some of the overall benefits and limitations of the
+// Ribbon PHSFs.
+//
+// High-end scalability is a primary issue for Ribbon PHSFs, because in
+// a single Ribbon linear system with fixed r and fixed m/n ratio, the
+// solution probability approaches zero as n approaches infinity.
+// For a given n, solution probability improves with larger r and larger
+// m/n.
+//
+// By contrast, peeling-based PHSFs have somewhat worse storage ratio
+// or solution probability for small n (less than ~1000). This is
+// especially true with spatial-coupling, where benefits are only
+// notable for n on the order of 100k or 1m or more.
+//
+// To make best use of current hardware, r=128 seems to be closest to
+// a "generally good" choice for Ribbon, at least in RocksDB where SST
+// Bloom filters typically hold around 10-100k keys, and almost always
+// less than 10m keys. r=128 ribbon has a high chance of encoding success
+// (with first hash seed) when storage overhead is around 5% (m/n ~ 1.05)
+// for roughly 10k - 10m keys in a single linear system. r=64 only scales
+// up to about 10k keys with the same storage overhead. Construction and
+// access times for r=128 are similar to r=64. r=128 tracks nearly
+// twice as much data during construction, but in most cases we expect
+// the scalability benefits of r=128 vs. r=64 to make it preferred.
+//
+// A natural approach to scaling Ribbon beyond ~10m keys is splitting
+// (or "sharding") the inputs into multiple linear systems with their
+// own hash seeds. This can also help to control peak memory consumption.
+// TODO: much more to come
+//
+// ######################################################################
+// #################### Ribbon on-the-fly banding #######################
+//
+// "Banding" is what we call the process of reducing the inputs to an
+// upper-triangular r-band matrix ready for finishing a solution with
+// back-substitution. Although the DW paper presents an algorithm for
+// this ("SGauss"), the awesome properties of their construction enable
+// an even simpler, faster, and more backtrackable algorithm. In simplest
+// terms, the SGauss algorithm requires sorting the inputs by start
+// columns, but it's possible to make Gaussian elimination resemble hash
+// table insertion!
+//
+// The enhanced algorithm is based on these observations:
+// - When processing a coefficient row with first 1 in column j,
+//   - If it's the first at column j to be processed, it can be part of
+//     the banding at row j. (And that decision never overwritten, with
+//     no loss of generality!)
+//   - Else, it can be combined with existing row j and re-processed,
+//     which will look for a later "empty" row or reach "no solution".
+//
+// We call our banding algorithm "incremental" and "on-the-fly" because
+// (like hash table insertion) we are "finished" after each input
+// processed, with respect to all inputs processed so far. Although the
+// band matrix is an intermediate step to the solution structure, we have
+// eliminated intermediate steps and unnecessary data tracking for
+// banding.
+//
+// Building on "incremental" and "on-the-fly", the banding algorithm is
+// easily backtrackable because no (non-empty) rows are overwritten in
+// the banding. Thus, if we want to "try" adding an additional set of
+// inputs to the banding, we only have to record which rows were written
+// in order to efficiently backtrack to our state before considering
+// the additional set. (TODO: how this can mitigate scalability and
+// reach sub-1% overheads)
+//
+// Like in a linear-probed hash table, as the occupancy approaches and
+// surpasses 90-95%, collision resolution dominates the construction
+// time. (Ribbon doesn't usually pay at query time; see solution
+// storage below.) This means that we can speed up construction time
+// by using a higher m/n ratio, up to negative returns around 1.2.
+// At m/n ~= 1.2, which still saves memory substantially vs. Bloom
+// filter's 1.5, construction speed (including back-substitution) is not
+// far from sorting speed, but still a few times slower than cache-local
+// Bloom construction speed.
+//
+// Back-substitution from an upper-triangular boolean band matrix is
+// especially fast and easy. All the memory accesses are sequential or at
+// least local, no random. If the number of result bits (b) is a
+// compile-time constant, the back-substitution state can even be tracked
+// in CPU registers. Regardless of the solution representation, we prefer
+// column-major representation for tracking back-substitution state, as
+// r (the band width) will typically be much larger than b (result bits
+// or columns), so better to handle r-bit values b times (per solution
+// row) than b-bit values r times.
+//
+// ######################################################################
+// ##################### Ribbon solution storage ########################
+//
+// Row-major layout is typical for boolean (bit) matrices, including for
+// MWHC (Xor) filters where a query combines k b-bit values, and k is
+// typically smaller than b. Even for k=4 and b=2, at least k=4 random
+// look-ups are required regardless of layout.
+//
+// Ribbon PHSFs are quite different, however, because
+// (a) all of the solution rows relevant to a query are within a single
+// range of r rows, and
+// (b) the number of solution rows involved (r/2 on average, or r if
+// avoiding conditional accesses) is typically much greater than
+// b, the number of solution columns.
+//
+// Row-major for Ribbon PHSFs therefore tends to incur undue CPU overhead
+// by processing (up to) r entries of b bits each, where b is typically
+// less than 10 for filter applications.
+//
+// Column-major layout has poor locality because of accessing up to b
+// memory locations in different pages (and obviously cache lines). Note
+// that negative filter queries do not typically need to access all
+// solution columns, as they can return when a mismatch is found in any
+// result/solution column. This optimization doesn't always pay off on
+// recent hardware, where the penalty for unpredictable conditional
+// branching can exceed the penalty for unnecessary work, but the
+// optimization is essentially unavailable with row-major layout.
+//
+// The best compromise seems to be interleaving column-major on the small
+// scale with row-major on the large scale. For example, let a solution
+// "block" be r rows column-major encoded as b r-bit values in sequence.
+// Each query accesses (up to) 2 adjacent blocks, which will typically
+// span 1-3 cache lines in adjacent memory. We get very close to the same
+// locality as row-major, but with much faster reconstruction of each
+// result column, at least for filter applications where b is relatively
+// small and negative queries can return early.
+//
+// ######################################################################
+// ###################### Fractional result bits ########################
+//
+// Bloom filters have great flexibility that alternatives mostly do not
+// have. One of those flexibilities is in utilizing any ratio of data
+// structure bits per key. With a typical memory allocator like jemalloc,
+// this flexibility can save roughly 10% of the filters' footprint in
+// DRAM by rounding up and down filter sizes to minimize memory internal
+// fragmentation (see optimize_filters_for_memory RocksDB option).
+//
+// At first glance, PHSFs only offer a whole number of bits per "slot"
+// (m rather than number of keys n), but coefficient locality in the
+// Ribbon construction makes fractional bits/key quite possible and
+// attractive for filter applications. This works by a prefix of the
+// structure using b-1 solution columns and the rest using b solution
+// columns. See InterleavedSolutionStorage below for more detail.
+//
+// Because false positive rates are non-linear in bits/key, this approach
+// is not quite optimal in terms of information theory. In common cases,
+// we see additional space overhead up to about 1.5% vs. theoretical
+// optimal to achieve the same FP rate. We consider this a quite acceptable
+// overhead for very efficiently utilizing space that might otherwise be
+// wasted.
+//
+// This property of Ribbon even makes it "elastic." A Ribbon filter and
+// its small metadata for answering queries can be adapted into another
+// Ribbon filter filling any smaller multiple of r bits (plus small
+// metadata), with a correspondingly higher FP rate. None of the data
+// thrown away during construction needs to be recalled for this reduction.
+// Similarly a single Ribbon construction can be separated (by solution
+// column) into two or more structures (or "layers" or "levels") with
+// independent filtering ability (no FP correlation, just as solution or
+// result columns in a single structure) despite being constructed as part
+// of a single linear system. (TODO: implement)
+// See also "ElasticBF: Fine-grained and Elastic Bloom Filter Towards
+// Efficient Read for LSM-tree-based KV Stores."
+//
+
+// ######################################################################
+// ################### CODE: Ribbon core algorithms #####################
+// ######################################################################
+//
+// These algorithms are templatized for genericity but near-maximum
+// performance in a given application. The template parameters
+// adhere to informal class/struct type concepts outlined below. (This
+// code is written for C++11 so does not use formal C++ concepts.)
+
+// Rough architecture for these algorithms:
+//
+// +-----------+     +---+     +-----------------+
+// | AddInputs | --> | H | --> | BandingStorage  |
+// +-----------+     | a |     +-----------------+
+//                   | s |             |
+//                   | h |      Back substitution
+//                   | e |             V
+// +-----------+     | r |     +-----------------+
+// | Query Key | --> |   | >+< | SolutionStorage |
+// +-----------+     +---+  |  +-----------------+
+//                          V
+//                     Query result
+
+// Common to other concepts
+// concept RibbonTypes {
+//   // An unsigned integer type for an r-bit subsequence of coefficients.
+//   // r (or kCoeffBits) is taken to be sizeof(CoeffRow) * 8, as it would
+//   // generally only hurt scalability to leave bits of CoeffRow unused.
+//   typename CoeffRow;
+//   // An unsigned integer type big enough to hold a result row (b bits,
+//   // or number of solution/result columns).
+//   // In many applications, especially filters, the number of result
+//   // columns is decided at run time, so ResultRow simply needs to be
+//   // big enough for the largest number of columns allowed.
+//   typename ResultRow;
+//   // An unsigned integer type sufficient for representing the number of
+//   // rows in the solution structure, and at least the arithmetic
+//   // promotion size (usually 32 bits). uint32_t recommended because a
+//   // single Ribbon construction doesn't really scale to billions of
+//   // entries.
+//   typename Index;
+// };
+
+// ######################################################################
+// ######################## Hashers and Banding #########################
+
+// Hasher concepts abstract out hashing details.
+
+// concept PhsfQueryHasher extends RibbonTypes {
+//   // Type for a lookup key, which is hashable.
+//   typename Key;
+//
+//   // Type for hashed summary of a Key. uint64_t is recommended.
+//   typename Hash;
+//
+//   // Compute a hash value summarizing a Key
+//   Hash GetHash(const Key &) const;
+//
+//   // Given a hash value and a number of columns that can start an
+//   // r-sequence of coefficients (== m - r + 1), return the start
+//   // column to associate with that hash value. (Starts can be chosen
+//   // uniformly or "smash" extra entries into the beginning and end for
+//   // better utilization at those extremes of the structure. Details in
+//   // ribbon.impl.h)
+//   Index GetStart(Hash, Index num_starts) const;
+//
+//   // Given a hash value, return the r-bit sequence of coefficients to
+//   // associate with it. It's generally OK if
+//   //   sizeof(CoeffRow) > sizeof(Hash)
+//   // as long as the hash itself is not too prone to collisions for the
+//   // applications and the CoeffRow is generated uniformly from
+//   // available hash data, but relatively independent of the start.
+//   //
+//   // Must be non-zero, because that's required for a solution to exist
+//   // when mapping to non-zero result row. (Note: BandingAdd could be
+//   // modified to allow 0 coeff row if that only occurs with 0 result
+//   // row, which really only makes sense for filter implementation,
+//   // where both values are hash-derived. Or BandingAdd could reject 0
+//   // coeff row, forcing next seed, but that has potential problems with
+//   // generality/scalability.)
+//   CoeffRow GetCoeffRow(Hash) const;
+// };
+
+// concept FilterQueryHasher extends PhsfQueryHasher {
+//   // For building or querying a filter, this returns the expected
+//   // result row associated with a hashed input. For general PHSF,
+//   // this must return 0.
+//   //
+//   // Although not strictly required, there's a slightly better chance of
+//   // solver success if result row is masked down here to only the bits
+//   // actually needed.
+//   ResultRow GetResultRowFromHash(Hash) const;
+// }
+
+// concept BandingHasher extends FilterQueryHasher {
+//   // For a filter, this will generally be the same as Key.
+//   // For a general PHSF, it must either
+//   // (a) include a key and a result it maps to (e.g. in a std::pair), or
+//   // (b) GetResultRowFromInput looks up the result somewhere rather than
+//   // extracting it.
+//   typename AddInput;
+//
+//   // Instead of requiring a way to extract a Key from an
+//   // AddInput, we require getting the hash of the Key part
+//   // of an AddInput, which is trivial if AddInput == Key.
+//   Hash GetHash(const AddInput &) const;
+//
+//   // For building a non-filter PHSF, this extracts or looks up the result
+//   // row to associate with an input. For filter PHSF, this must return 0.
+//   ResultRow GetResultRowFromInput(const AddInput &) const;
+//
+//   // Whether the solver can assume the lowest bit of GetCoeffRow is
+//   // always 1. When true, it should improve solver efficiency slightly.
+//   static bool kFirstCoeffAlwaysOne;
+// }
+
+// Abstract storage for the the result of "banding" the inputs (Gaussian
+// elimination to an upper-triangular boolean band matrix). Because the
+// banding is an incremental / on-the-fly algorithm, this also represents
+// all the intermediate state between input entries.
+//
+// concept BandingStorage extends RibbonTypes {
+//   // Tells the banding algorithm to prefetch memory associated with
+//   // the next input before processing the current input. Generally
+//   // recommended iff the BandingStorage doesn't easily fit in CPU
+//   // cache.
+//   bool UsePrefetch() const;
+//
+//   // Prefetches (e.g. __builtin_prefetch) memory associated with a
+//   // slot index i.
+//   void Prefetch(Index i) const;
+//
+//   // Load or store CoeffRow and ResultRow for slot index i.
+//   // (Gaussian row operations involve both sides of the equation.)
+//   // Bool `for_back_subst` indicates that customizing values for
+//   // unconstrained solution rows (cr == 0) is allowed.
+//   void LoadRow(Index i, CoeffRow *cr, ResultRow *rr, bool for_back_subst)
+//        const;
+//   void StoreRow(Index i, CoeffRow cr, ResultRow rr);
+//
+//   // Returns the number of columns that can start an r-sequence of
+//   // coefficients, which is the number of slots minus r (kCoeffBits)
+//   // plus one. (m - r + 1)
+//   Index GetNumStarts() const;
+// };
+
+// Optional storage for backtracking data in banding a set of input
+// entries. It exposes an array structure which will generally be
+// used as a stack. It must be able to accommodate as many entries
+// as are passed in as inputs to `BandingAddRange`.
+//
+// concept BacktrackStorage extends RibbonTypes {
+//   // If false, backtracking support will be disabled in the algorithm.
+//   // This should preferably be an inline compile-time constant function.
+//   bool UseBacktrack() const;
+//
+//   // Records `to_save` as the `i`th backtrack entry
+//   void BacktrackPut(Index i, Index to_save);
+//
+//   // Recalls the `i`th backtrack entry
+//   Index BacktrackGet(Index i) const;
+// }
+
+// Adds a single entry to BandingStorage (and optionally, BacktrackStorage),
+// returning true if successful or false if solution is impossible with
+// current hasher (and presumably its seed) and number of "slots" (solution
+// or banding rows). (A solution is impossible when there is a linear
+// dependence among the inputs that doesn't "cancel out".)
+//
+// Pre- and post-condition: the BandingStorage represents a band matrix
+// ready for back substitution (row echelon form except for zero rows),
+// augmented with result values such that back substitution would give a
+// solution satisfying all the cr@start -> rr entries added.
+template <bool kFirstCoeffAlwaysOne, typename BandingStorage,
+          typename BacktrackStorage>
+bool BandingAdd(BandingStorage *bs, typename BandingStorage::Index start,
+                typename BandingStorage::ResultRow rr,
+                typename BandingStorage::CoeffRow cr, BacktrackStorage *bts,
+                typename BandingStorage::Index *backtrack_pos) {
+  using CoeffRow = typename BandingStorage::CoeffRow;
+  using ResultRow = typename BandingStorage::ResultRow;
+  using Index = typename BandingStorage::Index;
+
+  Index i = start;
+
+  if (!kFirstCoeffAlwaysOne) {
+    // Requires/asserts that cr != 0
+    int tz = CountTrailingZeroBits(cr);
+    i += static_cast<Index>(tz);
+    cr >>= tz;
+  }
+
+  for (;;) {
+    assert((cr & 1) == 1);
+    CoeffRow cr_at_i;
+    ResultRow rr_at_i;
+    bs->LoadRow(i, &cr_at_i, &rr_at_i, /* for_back_subst */ false);
+    if (cr_at_i == 0) {
+      bs->StoreRow(i, cr, rr);
+      bts->BacktrackPut(*backtrack_pos, i);
+      ++*backtrack_pos;
+      return true;
+    }
+    assert((cr_at_i & 1) == 1);
+    // Gaussian row reduction
+    cr ^= cr_at_i;
+    rr ^= rr_at_i;
+    if (cr == 0) {
+      // Inconsistency or (less likely) redundancy
+      break;
+    }
+    // Find relative offset of next non-zero coefficient.
+    int tz = CountTrailingZeroBits(cr);
+    i += static_cast<Index>(tz);
+    cr >>= tz;
+  }
+
+  // Failed, unless result row == 0 because e.g. a duplicate input or a
+  // stock hash collision, with same result row. (For filter, stock hash
+  // collision implies same result row.) Or we could have a full equation
+  // equal to sum of other equations, which is very possible with
+  // small range of values for result row.
+  return rr == 0;
+}
+
+// Adds a range of entries to BandingStorage returning true if successful
+// or false if solution is impossible with current hasher (and presumably
+// its seed) and number of "slots" (solution or banding rows). (A solution
+// is impossible when there is a linear dependence among the inputs that
+// doesn't "cancel out".) Here "InputIterator" is an iterator over AddInputs.
+//
+// If UseBacktrack in the BacktrackStorage, this function call rolls back
+// to prior state on failure. If !UseBacktrack, some subset of the entries
+// will have been added to the BandingStorage, so best considered to be in
+// an indeterminate state.
+//
+template <typename BandingStorage, typename BacktrackStorage,
+          typename BandingHasher, typename InputIterator>
+bool BandingAddRange(BandingStorage *bs, BacktrackStorage *bts,
+                     const BandingHasher &bh, InputIterator begin,
+                     InputIterator end) {
+  using CoeffRow = typename BandingStorage::CoeffRow;
+  using Index = typename BandingStorage::Index;
+  using ResultRow = typename BandingStorage::ResultRow;
+  using Hash = typename BandingHasher::Hash;
+
+  static_assert(IsUnsignedUpTo128<CoeffRow>::value, "must be unsigned");
+  static_assert(IsUnsignedUpTo128<Index>::value, "must be unsigned");
+  static_assert(IsUnsignedUpTo128<ResultRow>::value, "must be unsigned");
+
+  constexpr bool kFCA1 = BandingHasher::kFirstCoeffAlwaysOne;
+
+  if (begin == end) {
+    // trivial
+    return true;
+  }
+
+  const Index num_starts = bs->GetNumStarts();
+
+  InputIterator cur = begin;
+  Index backtrack_pos = 0;
+  if (!bs->UsePrefetch()) {
+    // Simple version, no prefetch
+    for (;;) {
+      Hash h = bh.GetHash(*cur);
+      Index start = bh.GetStart(h, num_starts);
+      ResultRow rr =
+          bh.GetResultRowFromInput(*cur) | bh.GetResultRowFromHash(h);
+      CoeffRow cr = bh.GetCoeffRow(h);
+
+      if (!BandingAdd<kFCA1>(bs, start, rr, cr, bts, &backtrack_pos)) {
+        break;
+      }
+      if ((++cur) == end) {
+        return true;
+      }
+    }
+  } else {
+    // Pipelined w/prefetch
+    // Prime the pipeline
+    Hash h = bh.GetHash(*cur);
+    Index start = bh.GetStart(h, num_starts);
+    ResultRow rr = bh.GetResultRowFromInput(*cur);
+    bs->Prefetch(start);
+
+    // Pipeline
+    for (;;) {
+      rr |= bh.GetResultRowFromHash(h);
+      CoeffRow cr = bh.GetCoeffRow(h);
+      if ((++cur) == end) {
+        if (!BandingAdd<kFCA1>(bs, start, rr, cr, bts, &backtrack_pos)) {
+          break;
+        }
+        return true;
+      }
+      Hash next_h = bh.GetHash(*cur);
+      Index next_start = bh.GetStart(next_h, num_starts);
+      ResultRow next_rr = bh.GetResultRowFromInput(*cur);
+      bs->Prefetch(next_start);
+      if (!BandingAdd<kFCA1>(bs, start, rr, cr, bts, &backtrack_pos)) {
+        break;
+      }
+      h = next_h;
+      start = next_start;
+      rr = next_rr;
+    }
+  }
+  // failed; backtrack (if implemented)
+  if (bts->UseBacktrack()) {
+    while (backtrack_pos > 0) {
+      --backtrack_pos;
+      Index i = bts->BacktrackGet(backtrack_pos);
+      // Clearing the ResultRow is not strictly required, but is required
+      // for good FP rate on inputs that might have been backtracked out.
+      // (We don't want anything we've backtracked on to leak into final
+      // result, as that might not be "harmless".)
+      bs->StoreRow(i, 0, 0);
+    }
+  }
+  return false;
+}
+
+// Adds a range of entries to BandingStorage returning true if successful
+// or false if solution is impossible with current hasher (and presumably
+// its seed) and number of "slots" (solution or banding rows). (A solution
+// is impossible when there is a linear dependence among the inputs that
+// doesn't "cancel out".) Here "InputIterator" is an iterator over AddInputs.
+//
+// On failure, some subset of the entries will have been added to the
+// BandingStorage, so best considered to be in an indeterminate state.
+//
+template <typename BandingStorage, typename BandingHasher,
+          typename InputIterator>
+bool BandingAddRange(BandingStorage *bs, const BandingHasher &bh,
+                     InputIterator begin, InputIterator end) {
+  using Index = typename BandingStorage::Index;
+  struct NoopBacktrackStorage {
+    bool UseBacktrack() { return false; }
+    void BacktrackPut(Index, Index) {}
+    Index BacktrackGet(Index) {
+      assert(false);
+      return 0;
+    }
+  } nbts;
+  return BandingAddRange(bs, &nbts, bh, begin, end);
+}
+
+// ######################################################################
+// ######################### Solution Storage ###########################
+
+// Back-substitution and query algorithms unfortunately depend on some
+// details of data layout in the final data structure ("solution"). Thus,
+// there is no common SolutionStorage covering all the reasonable
+// possibilities.
+
+// ###################### SimpleSolutionStorage #########################
+
+// SimpleSolutionStorage is for a row-major storage, typically with no
+// unused bits in each ResultRow. This is mostly for demonstration
+// purposes as the simplest solution storage scheme. It is relatively slow
+// for filter queries.
+
+// concept SimpleSolutionStorage extends RibbonTypes {
+//   // This is called at the beginning of back-substitution for the
+//   // solution storage to do any remaining configuration before data
+//   // is stored to it. If configuration is previously finalized, this
+//   // could be a simple assertion or even no-op. Ribbon algorithms
+//   // only call this from back-substitution, and only once per call,
+//   // before other functions here.
+//   void PrepareForNumStarts(Index num_starts) const;
+//   // Must return num_starts passed to PrepareForNumStarts, or the most
+//   // recent call to PrepareForNumStarts if this storage object can be
+//   // reused. Note that num_starts == num_slots - kCoeffBits + 1 because
+//   // there must be a run of kCoeffBits slots starting from each start.
+//   Index GetNumStarts() const;
+//   // Load the solution row (type ResultRow) for a slot
+//   ResultRow Load(Index slot_num) const;
+//   // Store the solution row (type ResultRow) for a slot
+//   void Store(Index slot_num, ResultRow data);
+// };
+
+// Back-substitution for generating a solution from BandingStorage to
+// SimpleSolutionStorage.
+template <typename SimpleSolutionStorage, typename BandingStorage>
+void SimpleBackSubst(SimpleSolutionStorage *sss, const BandingStorage &bs) {
+  using CoeffRow = typename BandingStorage::CoeffRow;
+  using Index = typename BandingStorage::Index;
+  using ResultRow = typename BandingStorage::ResultRow;
+
+  static_assert(sizeof(Index) == sizeof(typename SimpleSolutionStorage::Index),
+                "must be same");
+  static_assert(
+      sizeof(CoeffRow) == sizeof(typename SimpleSolutionStorage::CoeffRow),
+      "must be same");
+  static_assert(
+      sizeof(ResultRow) == sizeof(typename SimpleSolutionStorage::ResultRow),
+      "must be same");
+
+  constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
+  constexpr auto kResultBits = static_cast<Index>(sizeof(ResultRow) * 8U);
+
+  // A column-major buffer of the solution matrix, containing enough
+  // recently-computed solution data to compute the next solution row
+  // (based also on banding data).
+  std::array<CoeffRow, kResultBits> state;
+  state.fill(0);
+
+  const Index num_starts = bs.GetNumStarts();
+  sss->PrepareForNumStarts(num_starts);
+  const Index num_slots = num_starts + kCoeffBits - 1;
+
+  for (Index i = num_slots; i > 0;) {
+    --i;
+    CoeffRow cr;
+    ResultRow rr;
+    bs.LoadRow(i, &cr, &rr, /* for_back_subst */ true);
+    // solution row
+    ResultRow sr = 0;
+    for (Index j = 0; j < kResultBits; ++j) {
+      // Compute next solution bit at row i, column j (see derivation below)
+      CoeffRow tmp = state[j] << 1;
+      bool bit = (BitParity(tmp & cr) ^ ((rr >> j) & 1)) != 0;
+      tmp |= bit ? CoeffRow{1} : CoeffRow{0};
+
+      // Now tmp is solution at column j from row i for next kCoeffBits
+      // more rows. Thus, for valid solution, the dot product of the
+      // solution column with the coefficient row has to equal the result
+      // at that column,
+      //   BitParity(tmp & cr) == ((rr >> j) & 1)
+
+      // Update state.
+      state[j] = tmp;
+      // add to solution row
+      sr |= (bit ? ResultRow{1} : ResultRow{0}) << j;
+    }
+    sss->Store(i, sr);
+  }
+}
+
+// Common functionality for querying a key (already hashed) in
+// SimpleSolutionStorage.
+template <typename SimpleSolutionStorage>
+typename SimpleSolutionStorage::ResultRow SimpleQueryHelper(
+    typename SimpleSolutionStorage::Index start_slot,
+    typename SimpleSolutionStorage::CoeffRow cr,
+    const SimpleSolutionStorage &sss) {
+  using CoeffRow = typename SimpleSolutionStorage::CoeffRow;
+  using ResultRow = typename SimpleSolutionStorage::ResultRow;
+
+  constexpr unsigned kCoeffBits = static_cast<unsigned>(sizeof(CoeffRow) * 8U);
+
+  ResultRow result = 0;
+  for (unsigned i = 0; i < kCoeffBits; ++i) {
+    // Bit masking whole value is generally faster here than 'if'
+    result ^= sss.Load(start_slot + i) &
+              (ResultRow{0} - (static_cast<ResultRow>(cr >> i) & ResultRow{1}));
+  }
+  return result;
+}
+
+// General PHSF query a key from SimpleSolutionStorage.
+template <typename SimpleSolutionStorage, typename PhsfQueryHasher>
+typename SimpleSolutionStorage::ResultRow SimplePhsfQuery(
+    const typename PhsfQueryHasher::Key &key, const PhsfQueryHasher &hasher,
+    const SimpleSolutionStorage &sss) {
+  const typename PhsfQueryHasher::Hash hash = hasher.GetHash(key);
+
+  static_assert(sizeof(typename SimpleSolutionStorage::Index) ==
+                    sizeof(typename PhsfQueryHasher::Index),
+                "must be same");
+  static_assert(sizeof(typename SimpleSolutionStorage::CoeffRow) ==
+                    sizeof(typename PhsfQueryHasher::CoeffRow),
+                "must be same");
+
+  return SimpleQueryHelper(hasher.GetStart(hash, sss.GetNumStarts()),
+                           hasher.GetCoeffRow(hash), sss);
+}
+
+// Filter query a key from SimpleSolutionStorage.
+template <typename SimpleSolutionStorage, typename FilterQueryHasher>
+bool SimpleFilterQuery(const typename FilterQueryHasher::Key &key,
+                       const FilterQueryHasher &hasher,
+                       const SimpleSolutionStorage &sss) {
+  const typename FilterQueryHasher::Hash hash = hasher.GetHash(key);
+  const typename SimpleSolutionStorage::ResultRow expected =
+      hasher.GetResultRowFromHash(hash);
+
+  static_assert(sizeof(typename SimpleSolutionStorage::Index) ==
+                    sizeof(typename FilterQueryHasher::Index),
+                "must be same");
+  static_assert(sizeof(typename SimpleSolutionStorage::CoeffRow) ==
+                    sizeof(typename FilterQueryHasher::CoeffRow),
+                "must be same");
+  static_assert(sizeof(typename SimpleSolutionStorage::ResultRow) ==
+                    sizeof(typename FilterQueryHasher::ResultRow),
+                "must be same");
+
+  return expected ==
+         SimpleQueryHelper(hasher.GetStart(hash, sss.GetNumStarts()),
+                           hasher.GetCoeffRow(hash), sss);
+}
+
+// #################### InterleavedSolutionStorage ######################
+
+// InterleavedSolutionStorage is row-major at a high level, for good
+// locality, and column-major at a low level, for CPU efficiency
+// especially in filter queries or relatively small number of result bits
+// (== solution columns). The storage is a sequence of "blocks" where a
+// block has one CoeffRow-sized segment for each solution column. Each
+// query spans at most two blocks; the starting solution row is typically
+// in the row-logical middle of a block and spans to the middle of the
+// next block. (See diagram below.)
+//
+// InterleavedSolutionStorage supports choosing b (number of result or
+// solution columns) at run time, and even supports mixing b and b-1 solution
+// columns in a single linear system solution, for filters that can
+// effectively utilize any size space (multiple of CoeffRow) for minimizing
+// FP rate for any number of added keys. To simplify query implementation
+// (with lower-index columns first), the b-bit portion comes after the b-1
+// portion of the structure.
+//
+// Diagram (=== marks logical block boundary; b=4; ### is data used by a
+// query crossing the b-1 to b boundary, each Segment has type CoeffRow):
+//  ...
+// +======================+
+// | S e g m e n t  col=0 |
+// +----------------------+
+// | S e g m e n t  col=1 |
+// +----------------------+
+// | S e g m e n t  col=2 |
+// +======================+
+// | S e g m e n #########|
+// +----------------------+
+// | S e g m e n #########|
+// +----------------------+
+// | S e g m e n #########|
+// +======================+ Result/solution columns: above = 3, below = 4
+// |#############t  col=0 |
+// +----------------------+
+// |#############t  col=1 |
+// +----------------------+
+// |#############t  col=2 |
+// +----------------------+
+// | S e g m e n t  col=3 |
+// +======================+
+// | S e g m e n t  col=0 |
+// +----------------------+
+// | S e g m e n t  col=1 |
+// +----------------------+
+// | S e g m e n t  col=2 |
+// +----------------------+
+// | S e g m e n t  col=3 |
+// +======================+
+//  ...
+//
+// InterleavedSolutionStorage will be adapted by the algorithms from
+// simple array-like segment storage. That array-like storage is templatized
+// in part so that an implementation may choose to handle byte ordering
+// at access time.
+//
+// concept InterleavedSolutionStorage extends RibbonTypes {
+//   // This is called at the beginning of back-substitution for the
+//   // solution storage to do any remaining configuration before data
+//   // is stored to it. If configuration is previously finalized, this
+//   // could be a simple assertion or even no-op. Ribbon algorithms
+//   // only call this from back-substitution, and only once per call,
+//   // before other functions here.
+//   void PrepareForNumStarts(Index num_starts) const;
+//   // Must return num_starts passed to PrepareForNumStarts, or the most
+//   // recent call to PrepareForNumStarts if this storage object can be
+//   // reused. Note that num_starts == num_slots - kCoeffBits + 1 because
+//   // there must be a run of kCoeffBits slots starting from each start.
+//   Index GetNumStarts() const;
+//   // The larger number of solution columns used (called "b" above).
+//   Index GetUpperNumColumns() const;
+//   // If returns > 0, then block numbers below that use
+//   // GetUpperNumColumns() - 1 columns per solution row, and the rest
+//   // use GetUpperNumColumns(). A block represents kCoeffBits "slots",
+//   // where all but the last kCoeffBits - 1 slots are also starts. And
+//   // a block contains a segment for each solution column.
+//   // An implementation may only support uniform columns per solution
+//   // row and return constant 0 here.
+//   Index GetUpperStartBlock() const;
+//
+//   // ### "Array of segments" portion of API ###
+//   // The number of values of type CoeffRow used in this solution
+//   // representation. (This value can be inferred from the previous
+//   // three functions, but is expected at least for sanity / assertion
+//   // checking.)
+//   Index GetNumSegments() const;
+//   // Load an entry from the logical array of segments
+//   CoeffRow LoadSegment(Index segment_num) const;
+//   // Store an entry to the logical array of segments
+//   void StoreSegment(Index segment_num, CoeffRow data);
+// };
+
+// A helper for InterleavedBackSubst.
+template <typename BandingStorage>
+inline void BackSubstBlock(typename BandingStorage::CoeffRow *state,
+                           typename BandingStorage::Index num_columns,
+                           const BandingStorage &bs,
+                           typename BandingStorage::Index start_slot) {
+  using CoeffRow = typename BandingStorage::CoeffRow;
+  using Index = typename BandingStorage::Index;
+  using ResultRow = typename BandingStorage::ResultRow;
+
+  constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
+
+  for (Index i = start_slot + kCoeffBits; i > start_slot;) {
+    --i;
+    CoeffRow cr;
+    ResultRow rr;
+    bs.LoadRow(i, &cr, &rr, /* for_back_subst */ true);
+    for (Index j = 0; j < num_columns; ++j) {
+      // Compute next solution bit at row i, column j (see derivation below)
+      CoeffRow tmp = state[j] << 1;
+      int bit = BitParity(tmp & cr) ^ ((rr >> j) & 1);
+      tmp |= static_cast<CoeffRow>(bit);
+
+      // Now tmp is solution at column j from row i for next kCoeffBits
+      // more rows. Thus, for valid solution, the dot product of the
+      // solution column with the coefficient row has to equal the result
+      // at that column,
+      //   BitParity(tmp & cr) == ((rr >> j) & 1)
+
+      // Update state.
+      state[j] = tmp;
+    }
+  }
+}
+
+// Back-substitution for generating a solution from BandingStorage to
+// InterleavedSolutionStorage.
+template <typename InterleavedSolutionStorage, typename BandingStorage>
+void InterleavedBackSubst(InterleavedSolutionStorage *iss,
+                          const BandingStorage &bs) {
+  using CoeffRow = typename BandingStorage::CoeffRow;
+  using Index = typename BandingStorage::Index;
+
+  static_assert(
+      sizeof(Index) == sizeof(typename InterleavedSolutionStorage::Index),
+      "must be same");
+  static_assert(
+      sizeof(CoeffRow) == sizeof(typename InterleavedSolutionStorage::CoeffRow),
+      "must be same");
+
+  constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
+
+  const Index num_starts = bs.GetNumStarts();
+  // Although it might be nice to have a filter that returns "always false"
+  // when no key is added, we aren't specifically supporting that here
+  // because it would require another condition branch in the query.
+  assert(num_starts > 0);
+  iss->PrepareForNumStarts(num_starts);
+
+  const Index num_slots = num_starts + kCoeffBits - 1;
+  assert(num_slots % kCoeffBits == 0);
+  const Index num_blocks = num_slots / kCoeffBits;
+  const Index num_segments = iss->GetNumSegments();
+
+  // For now upper, then lower
+  Index num_columns = iss->GetUpperNumColumns();
+  const Index upper_start_block = iss->GetUpperStartBlock();
+
+  if (num_columns == 0) {
+    // Nothing to do, presumably because there's not enough space for even
+    // a single segment.
+    assert(num_segments == 0);
+    // When num_columns == 0, a Ribbon filter query will always return true,
+    // or a PHSF query always 0.
+    return;
+  }
+
+  // We should be utilizing all available segments
+  assert(num_segments == (upper_start_block * (num_columns - 1)) +
+                             ((num_blocks - upper_start_block) * num_columns));
+
+  // TODO: consider fixed-column specializations with stack-allocated state
+
+  // A column-major buffer of the solution matrix, containing enough
+  // recently-computed solution data to compute the next solution row
+  // (based also on banding data).
+  std::unique_ptr<CoeffRow[]> state{new CoeffRow[num_columns]()};
+
+  Index block = num_blocks;
+  Index segment_num = num_segments;
+  while (block > upper_start_block) {
+    --block;
+    BackSubstBlock(state.get(), num_columns, bs, block * kCoeffBits);
+    segment_num -= num_columns;
+    for (Index i = 0; i < num_columns; ++i) {
+      iss->StoreSegment(segment_num + i, state[i]);
+    }
+  }
+  // Now (if applicable), region using lower number of columns
+  // (This should be optimized away if GetUpperStartBlock() returns
+  // constant 0.)
+  --num_columns;
+  while (block > 0) {
+    --block;
+    BackSubstBlock(state.get(), num_columns, bs, block * kCoeffBits);
+    segment_num -= num_columns;
+    for (Index i = 0; i < num_columns; ++i) {
+      iss->StoreSegment(segment_num + i, state[i]);
+    }
+  }
+  // Verify everything processed
+  assert(block == 0);
+  assert(segment_num == 0);
+}
+
+// Prefetch memory for a key in InterleavedSolutionStorage.
+template <typename InterleavedSolutionStorage, typename PhsfQueryHasher>
+inline void InterleavedPrepareQuery(
+    const typename PhsfQueryHasher::Key &key, const PhsfQueryHasher &hasher,
+    const InterleavedSolutionStorage &iss,
+    typename PhsfQueryHasher::Hash *saved_hash,
+    typename InterleavedSolutionStorage::Index *saved_segment_num,
+    typename InterleavedSolutionStorage::Index *saved_num_columns,
+    typename InterleavedSolutionStorage::Index *saved_start_bit) {
+  using Hash = typename PhsfQueryHasher::Hash;
+  using CoeffRow = typename InterleavedSolutionStorage::CoeffRow;
+  using Index = typename InterleavedSolutionStorage::Index;
+
+  static_assert(sizeof(Index) == sizeof(typename PhsfQueryHasher::Index),
+                "must be same");
+
+  const Hash hash = hasher.GetHash(key);
+  const Index start_slot = hasher.GetStart(hash, iss.GetNumStarts());
+
+  constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
+
+  const Index upper_start_block = iss.GetUpperStartBlock();
+  Index num_columns = iss.GetUpperNumColumns();
+  Index start_block_num = start_slot / kCoeffBits;
+  Index segment_num = start_block_num * num_columns -
+                      std::min(start_block_num, upper_start_block);
+  // Change to lower num columns if applicable.
+  // (This should not compile to a conditional branch.)
+  num_columns -= (start_block_num < upper_start_block) ? 1 : 0;
+
+  Index start_bit = start_slot % kCoeffBits;
+
+  Index segment_count = num_columns + (start_bit == 0 ? 0 : num_columns);
+
+  iss.PrefetchSegmentRange(segment_num, segment_num + segment_count);
+
+  *saved_hash = hash;
+  *saved_segment_num = segment_num;
+  *saved_num_columns = num_columns;
+  *saved_start_bit = start_bit;
+}
+
+// General PHSF query from InterleavedSolutionStorage, using data for
+// the query key from InterleavedPrepareQuery
+template <typename InterleavedSolutionStorage, typename PhsfQueryHasher>
+inline typename InterleavedSolutionStorage::ResultRow InterleavedPhsfQuery(
+    typename PhsfQueryHasher::Hash hash,
+    typename InterleavedSolutionStorage::Index segment_num,
+    typename InterleavedSolutionStorage::Index num_columns,
+    typename InterleavedSolutionStorage::Index start_bit,
+    const PhsfQueryHasher &hasher, const InterleavedSolutionStorage &iss) {
+  using CoeffRow = typename InterleavedSolutionStorage::CoeffRow;
+  using Index = typename InterleavedSolutionStorage::Index;
+  using ResultRow = typename InterleavedSolutionStorage::ResultRow;
+
+  static_assert(sizeof(Index) == sizeof(typename PhsfQueryHasher::Index),
+                "must be same");
+  static_assert(sizeof(CoeffRow) == sizeof(typename PhsfQueryHasher::CoeffRow),
+                "must be same");
+
+  constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
+
+  const CoeffRow cr = hasher.GetCoeffRow(hash);
+
+  ResultRow sr = 0;
+  const CoeffRow cr_left = cr << static_cast<unsigned>(start_bit);
+  for (Index i = 0; i < num_columns; ++i) {
+    sr ^= BitParity(iss.LoadSegment(segment_num + i) & cr_left) << i;
+  }
+
+  if (start_bit > 0) {
+    segment_num += num_columns;
+    const CoeffRow cr_right =
+        cr >> static_cast<unsigned>(kCoeffBits - start_bit);
+    for (Index i = 0; i < num_columns; ++i) {
+      sr ^= BitParity(iss.LoadSegment(segment_num + i) & cr_right) << i;
+    }
+  }
+
+  return sr;
+}
+
+// Filter query a key from InterleavedFilterQuery.
+template <typename InterleavedSolutionStorage, typename FilterQueryHasher>
+inline bool InterleavedFilterQuery(
+    typename FilterQueryHasher::Hash hash,
+    typename InterleavedSolutionStorage::Index segment_num,
+    typename InterleavedSolutionStorage::Index num_columns,
+    typename InterleavedSolutionStorage::Index start_bit,
+    const FilterQueryHasher &hasher, const InterleavedSolutionStorage &iss) {
+  using CoeffRow = typename InterleavedSolutionStorage::CoeffRow;
+  using Index = typename InterleavedSolutionStorage::Index;
+  using ResultRow = typename InterleavedSolutionStorage::ResultRow;
+
+  static_assert(sizeof(Index) == sizeof(typename FilterQueryHasher::Index),
+                "must be same");
+  static_assert(
+      sizeof(CoeffRow) == sizeof(typename FilterQueryHasher::CoeffRow),
+      "must be same");
+  static_assert(
+      sizeof(ResultRow) == sizeof(typename FilterQueryHasher::ResultRow),
+      "must be same");
+
+  constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
+
+  const CoeffRow cr = hasher.GetCoeffRow(hash);
+  const ResultRow expected = hasher.GetResultRowFromHash(hash);
+
+  // TODO: consider optimizations such as
+  // * get rid of start_bit == 0 condition with careful fetching & shifting
+  if (start_bit == 0) {
+    for (Index i = 0; i < num_columns; ++i) {
+      if (BitParity(iss.LoadSegment(segment_num + i) & cr) !=
+          (static_cast<int>(expected >> i) & 1)) {
+        return false;
+      }
+    }
+  } else {
+    const CoeffRow cr_left = cr << static_cast<unsigned>(start_bit);
+    const CoeffRow cr_right =
+        cr >> static_cast<unsigned>(kCoeffBits - start_bit);
+
+    for (Index i = 0; i < num_columns; ++i) {
+      CoeffRow soln_data =
+          (iss.LoadSegment(segment_num + i) & cr_left) ^
+          (iss.LoadSegment(segment_num + num_columns + i) & cr_right);
+      if (BitParity(soln_data) != (static_cast<int>(expected >> i) & 1)) {
+        return false;
+      }
+    }
+  }
+  // otherwise, all match
+  return true;
+}
+
+// TODO: refactor Interleaved*Query so that queries can be "prepared" by
+// prefetching memory, to hide memory latency for multiple queries in a
+// single thread.
+
+}  // namespace ribbon
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_config.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_config.cc
new file mode 100644
index 0000000000..c1046f4aaa
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_config.cc
@@ -0,0 +1,506 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/ribbon_config.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace ribbon {
+
+namespace detail {
+
+// Each instantiation of this struct is sufficiently unique for configuration
+// purposes, and is only instantiated for settings where we support the
+// configuration API. An application might only reference one instantiation,
+// meaning the rest could be pruned at link time.
+template <ConstructionFailureChance kCfc, uint64_t kCoeffBits, bool kUseSmash>
+struct BandingConfigHelperData {
+  static constexpr size_t kKnownSize = 18U;
+
+  // Because of complexity in the data, for smaller numbers of slots
+  // (powers of two up to 2^17), we record known numbers that can be added
+  // with kCfc chance of construction failure and settings in template
+  // parameters. Zero means "unsupported (too small) number of slots".
+  // (GetNumToAdd below will use interpolation for numbers of slots
+  // between powers of two; double rather than integer values here make
+  // that more accurate.)
+  static const std::array<double, kKnownSize> kKnownToAddByPow2;
+
+  // For sufficiently large number of slots, doubling the number of
+  // slots will increase the expected overhead (slots over number added)
+  // by approximately this constant.
+  // (This is roughly constant regardless of ConstructionFailureChance and
+  // smash setting.)
+  // (Would be a constant if we had partial template specialization for
+  // static const members.)
+  static inline double GetFactorPerPow2() {
+    if (kCoeffBits == 128U) {
+      return 0.0038;
+    } else {
+      assert(kCoeffBits == 64U);
+      return 0.0083;
+    }
+  }
+
+  // Overhead factor for 2^(kKnownSize-1) slots
+  // (Would be a constant if we had partial template specialization for
+  // static const members.)
+  static inline double GetFinalKnownFactor() {
+    return 1.0 * (uint32_t{1} << (kKnownSize - 1)) /
+           kKnownToAddByPow2[kKnownSize - 1];
+  }
+
+  // GetFinalKnownFactor() - (kKnownSize-1) * GetFactorPerPow2()
+  // (Would be a constant if we had partial template specialization for
+  // static const members.)
+  static inline double GetBaseFactor() {
+    return GetFinalKnownFactor() - (kKnownSize - 1) * GetFactorPerPow2();
+  }
+
+  // Get overhead factor (slots over number to add) for sufficiently large
+  // number of slots (by log base 2)
+  static inline double GetFactorForLarge(double log2_num_slots) {
+    return GetBaseFactor() + log2_num_slots * GetFactorPerPow2();
+  }
+
+  // For a given power of two number of slots (specified by whole number
+  // log base 2), implements GetNumToAdd for such limited case, returning
+  // double for better interpolation in GetNumToAdd and GetNumSlots.
+  static inline double GetNumToAddForPow2(uint32_t log2_num_slots) {
+    assert(log2_num_slots <= 32);  // help clang-analyze
+    if (log2_num_slots < kKnownSize) {
+      return kKnownToAddByPow2[log2_num_slots];
+    } else {
+      return 1.0 * (uint64_t{1} << log2_num_slots) /
+             GetFactorForLarge(1.0 * log2_num_slots);
+    }
+  }
+};
+
+// Based on data from FindOccupancy in ribbon_test
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn2, 128U, false>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        252.984,
+        506.109,
+        1013.71,
+        2029.47,
+        4060.43,
+        8115.63,
+        16202.2,
+        32305.1,
+        64383.5,
+        128274,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn2, 128U, /*smash*/ true>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        126.274,
+        254.279,
+        510.27,
+        1022.24,
+        2046.02,
+        4091.99,
+        8154.98,
+        16244.3,
+        32349.7,
+        64426.6,
+        128307,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn2, 64U, false>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        124.94,
+        249.968,
+        501.234,
+        1004.06,
+        2006.15,
+        3997.89,
+        7946.99,
+        15778.4,
+        31306.9,
+        62115.3,
+        123284,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn2, 64U, /*smash*/ true>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        62.2683,
+        126.259,
+        254.268,
+        509.975,
+        1019.98,
+        2026.16,
+        4019.75,
+        7969.8,
+        15798.2,
+        31330.3,
+        62134.2,
+        123255,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn20, 128U, false>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        248.851,
+        499.532,
+        1001.26,
+        2003.97,
+        4005.59,
+        8000.39,
+        15966.6,
+        31828.1,
+        63447.3,
+        126506,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn20, 128U, /*smash*/ true>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        122.637,
+        250.651,
+        506.625,
+        1018.54,
+        2036.43,
+        4041.6,
+        8039.25,
+        16005,
+        31869.6,
+        63492.8,
+        126537,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn20, 64U, false>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        120.659,
+        243.346,
+        488.168,
+        976.373,
+        1948.86,
+        3875.85,
+        7704.97,
+        15312.4,
+        30395.1,
+        60321.8,
+        119813,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn20, 64U, /*smash*/ true>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        58.6016,
+        122.619,
+        250.641,
+        503.595,
+        994.165,
+        1967.36,
+        3898.17,
+        7727.21,
+        15331.5,
+        30405.8,
+        60376.2,
+        119836,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn1000, 128U, false>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        242.61,
+        491.887,
+        983.603,
+        1968.21,
+        3926.98,
+        7833.99,
+        15629,
+        31199.9,
+        62307.8,
+        123870,
+    }};
+
+template <>
+const std::array<double, 18> BandingConfigHelperData<
+    kOneIn1000, 128U, /*smash*/ true>::kKnownToAddByPow2{{
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,  // unsupported
+    117.19,
+    245.105,
+    500.748,
+    1010.67,
+    1993.4,
+    3950.01,
+    7863.31,
+    15652,
+    31262.1,
+    62462.8,
+    124095,
+}};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn1000, 64U, false>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        114,
+        234.8,
+        471.498,
+        940.165,
+        1874,
+        3721.5,
+        7387.5,
+        14592,
+        29160,
+        57745,
+        115082,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn1000, 64U, /*smash*/ true>::kKnownToAddByPow2{
+        {
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,  // unsupported
+            53.0434,
+            117,
+            245.312,
+            483.571,
+            950.251,
+            1878,
+            3736.34,
+            7387.97,
+            14618,
+            29142.9,
+            57838.8,
+            114932,
+        }};
+
+// We hide these implementation details from the .h file with explicit
+// instantiations below these partial specializations.
+
+template <ConstructionFailureChance kCfc, uint64_t kCoeffBits, bool kUseSmash,
+          bool kHomogeneous>
+uint32_t BandingConfigHelper1MaybeSupported<
+    kCfc, kCoeffBits, kUseSmash, kHomogeneous,
+    true /* kIsSupported */>::GetNumToAdd(uint32_t num_slots) {
+  using Data = detail::BandingConfigHelperData<kCfc, kCoeffBits, kUseSmash>;
+  if (num_slots == 0) {
+    return 0;
+  }
+  uint32_t num_to_add;
+  double log2_num_slots = std::log(num_slots) * 1.4426950409;
+  uint32_t floor_log2 = static_cast<uint32_t>(log2_num_slots);
+  if (floor_log2 + 1 < Data::kKnownSize) {
+    double ceil_portion = 1.0 * num_slots / (uint32_t{1} << floor_log2) - 1.0;
+    // Must be a supported number of slots
+    assert(Data::kKnownToAddByPow2[floor_log2] > 0.0);
+    // Weighted average of two nearest known data points
+    num_to_add = static_cast<uint32_t>(
+        ceil_portion * Data::kKnownToAddByPow2[floor_log2 + 1] +
+        (1.0 - ceil_portion) * Data::kKnownToAddByPow2[floor_log2]);
+  } else {
+    // Use formula for large values
+    double factor = Data::GetFactorForLarge(log2_num_slots);
+    assert(factor >= 1.0);
+    num_to_add = static_cast<uint32_t>(num_slots / factor);
+  }
+  if (kHomogeneous) {
+    // Even when standard filter construction would succeed, we might
+    // have loaded things up too much for Homogeneous filter. (Complete
+    // explanation not known but observed empirically.) This seems to
+    // correct for that, mostly affecting small filter configurations.
+    if (num_to_add >= 8) {
+      num_to_add -= 8;
+    } else {
+      assert(false);
+    }
+  }
+  return num_to_add;
+}
+
+template <ConstructionFailureChance kCfc, uint64_t kCoeffBits, bool kUseSmash,
+          bool kHomogeneous>
+uint32_t BandingConfigHelper1MaybeSupported<
+    kCfc, kCoeffBits, kUseSmash, kHomogeneous,
+    true /* kIsSupported */>::GetNumSlots(uint32_t num_to_add) {
+  using Data = detail::BandingConfigHelperData<kCfc, kCoeffBits, kUseSmash>;
+
+  if (num_to_add == 0) {
+    return 0;
+  }
+  if (kHomogeneous) {
+    // Reverse of above in GetNumToAdd
+    num_to_add += 8;
+  }
+  double log2_num_to_add = std::log(num_to_add) * 1.4426950409;
+  uint32_t approx_log2_slots = static_cast<uint32_t>(log2_num_to_add + 0.5);
+  assert(approx_log2_slots <= 32);  // help clang-analyze
+
+  double lower_num_to_add = Data::GetNumToAddForPow2(approx_log2_slots);
+  double upper_num_to_add;
+  if (approx_log2_slots == 0 || lower_num_to_add == /* unsupported */ 0) {
+    // Return minimum non-zero slots in standard implementation
+    return kUseSmash ? kCoeffBits : 2 * kCoeffBits;
+  } else if (num_to_add < lower_num_to_add) {
+    upper_num_to_add = lower_num_to_add;
+    --approx_log2_slots;
+    lower_num_to_add = Data::GetNumToAddForPow2(approx_log2_slots);
+  } else {
+    upper_num_to_add = Data::GetNumToAddForPow2(approx_log2_slots + 1);
+  }
+
+  assert(num_to_add >= lower_num_to_add);
+  assert(num_to_add < upper_num_to_add);
+
+  double upper_portion =
+      (num_to_add - lower_num_to_add) / (upper_num_to_add - lower_num_to_add);
+
+  double lower_num_slots = 1.0 * (uint64_t{1} << approx_log2_slots);
+
+  // Interpolation, round up
+  return static_cast<uint32_t>(upper_portion * lower_num_slots +
+                               lower_num_slots + 0.999999999);
+}
+
+// These explicit instantiations enable us to hide most of the
+// implementation details from the .h file. (The .h file currently
+// needs to determine whether settings are "supported" or not.)
+
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 128U, /*sm*/ false,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 128U, /*sm*/ true,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 128U, /*sm*/ false,
+                                                   /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 128U, /*sm*/ true,
+                                                   /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 64U, /*sm*/ false,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 64U, /*sm*/ true,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 64U, /*sm*/ false,
+                                                   /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 64U, /*sm*/ true,
+                                                   /*hm*/ true, /*sup*/ true>;
+
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 128U, /*sm*/ false,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 128U, /*sm*/ true,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 128U, /*sm*/ false,
+                                                   /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 128U, /*sm*/ true,
+                                                   /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 64U, /*sm*/ false,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 64U, /*sm*/ true,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 64U, /*sm*/ false,
+                                                   /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 64U, /*sm*/ true,
+                                                   /*hm*/ true, /*sup*/ true>;
+
+template struct BandingConfigHelper1MaybeSupported<
+    kOneIn1000, 128U, /*sm*/ false, /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<
+    kOneIn1000, 128U, /*sm*/ true, /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<
+    kOneIn1000, 128U, /*sm*/ false, /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<
+    kOneIn1000, 128U, /*sm*/ true, /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<
+    kOneIn1000, 64U, /*sm*/ false, /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn1000, 64U, /*sm*/ true,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<
+    kOneIn1000, 64U, /*sm*/ false, /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn1000, 64U, /*sm*/ true,
+                                                   /*hm*/ true, /*sup*/ true>;
+
+}  // namespace detail
+
+}  // namespace ribbon
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_config.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_config.h
new file mode 100644
index 0000000000..0e3edf0734
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_config.h
@@ -0,0 +1,182 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+
+#include "port/lang.h"  // for FALLTHROUGH_INTENDED
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace ribbon {
+
+// RIBBON PHSF & RIBBON Filter (Rapid Incremental Boolean Banding ON-the-fly)
+//
+// ribbon_config.h: APIs for relating numbers of slots with numbers of
+// additions for tolerable construction failure probabilities. This is
+// separate from ribbon_impl.h because it might not be needed for
+// some applications.
+//
+// This API assumes uint32_t for number of slots, as a single Ribbon
+// linear system should not normally overflow that without big penalties.
+//
+// Template parameter kCoeffBits uses uint64_t for convenience in case it
+// comes from size_t.
+//
+// Most of the complexity here is trying to optimize speed and
+// compiled code size, using templates to minimize table look-ups and
+// the compiled size of all linked look-up tables. Look-up tables are
+// required because we don't have good formulas, and the data comes
+// from running FindOccupancy in ribbon_test.
+
+// Represents a chosen chance of successful Ribbon construction for a single
+// seed. Allowing higher chance of failed construction can reduce space
+// overhead but takes extra time in construction.
+enum ConstructionFailureChance {
+  kOneIn2,
+  kOneIn20,
+  // When using kHomogeneous==true, construction failure chance should
+  // not generally exceed target FP rate, so it unlikely useful to
+  // allow a higher "failure" chance. In some cases, even more overhead
+  // is appropriate. (TODO)
+  kOneIn1000,
+};
+
+namespace detail {
+
+// It is useful to compile ribbon_test linking to BandingConfigHelper with
+// settings for which we do not have configuration data, as long as we don't
+// run the code. This template hack supports that.
+template <ConstructionFailureChance kCfc, uint64_t kCoeffBits, bool kUseSmash,
+          bool kHomogeneous, bool kIsSupported>
+struct BandingConfigHelper1MaybeSupported {
+ public:
+  static uint32_t GetNumToAdd(uint32_t num_slots) {
+    // Unsupported
+    assert(num_slots == 0);
+    (void)num_slots;
+    return 0;
+  }
+
+  static uint32_t GetNumSlots(uint32_t num_to_add) {
+    // Unsupported
+    assert(num_to_add == 0);
+    (void)num_to_add;
+    return 0;
+  }
+};
+
+// Base class for BandingConfigHelper1 and helper for BandingConfigHelper
+// with core implementations built on above data
+template <ConstructionFailureChance kCfc, uint64_t kCoeffBits, bool kUseSmash,
+          bool kHomogeneous>
+struct BandingConfigHelper1MaybeSupported<
+    kCfc, kCoeffBits, kUseSmash, kHomogeneous, true /* kIsSupported */> {
+ public:
+  // See BandingConfigHelper1. Implementation in ribbon_config.cc
+  static uint32_t GetNumToAdd(uint32_t num_slots);
+
+  // See BandingConfigHelper1. Implementation in ribbon_config.cc
+  static uint32_t GetNumSlots(uint32_t num_to_add);
+};
+
+}  // namespace detail
+
+template <ConstructionFailureChance kCfc, uint64_t kCoeffBits, bool kUseSmash,
+          bool kHomogeneous>
+struct BandingConfigHelper1
+    : public detail::BandingConfigHelper1MaybeSupported<
+          kCfc, kCoeffBits, kUseSmash, kHomogeneous,
+          /* kIsSupported */ kCoeffBits == 64 || kCoeffBits == 128> {
+ public:
+  // Returns a number of entries that can be added to a given number of
+  // slots, with roughly kCfc chance of construction failure per seed,
+  // or better. Does NOT do rounding for InterleavedSoln; call
+  // RoundUpNumSlots for that.
+  //
+  // inherited:
+  // static uint32_t GetNumToAdd(uint32_t num_slots);
+
+  // Returns a number of slots for a given number of entries to add
+  // that should have roughly kCfc chance of construction failure per
+  // seed, or better. Does NOT do rounding for InterleavedSoln; call
+  // RoundUpNumSlots for that.
+  //
+  // num_to_add should not exceed roughly 2/3rds of the maximum value
+  // of the uint32_t type to avoid overflow.
+  //
+  // inherited:
+  // static uint32_t GetNumSlots(uint32_t num_to_add);
+};
+
+// Configured using TypesAndSettings as in ribbon_impl.h
+template <ConstructionFailureChance kCfc, class TypesAndSettings>
+struct BandingConfigHelper1TS
+    : public BandingConfigHelper1<
+          kCfc,
+          /* kCoeffBits */ sizeof(typename TypesAndSettings::CoeffRow) * 8U,
+          TypesAndSettings::kUseSmash, TypesAndSettings::kHomogeneous> {};
+
+// Like BandingConfigHelper1TS except failure chance can be a runtime rather
+// than compile time value.
+template <class TypesAndSettings>
+struct BandingConfigHelper {
+ public:
+  static constexpr ConstructionFailureChance kDefaultFailureChance =
+      TypesAndSettings::kHomogeneous ? kOneIn1000 : kOneIn20;
+
+  static uint32_t GetNumToAdd(
+      uint32_t num_slots,
+      ConstructionFailureChance max_failure = kDefaultFailureChance) {
+    switch (max_failure) {
+      default:
+        assert(false);
+        FALLTHROUGH_INTENDED;
+      case kOneIn20: {
+        using H1 = BandingConfigHelper1TS<kOneIn20, TypesAndSettings>;
+        return H1::GetNumToAdd(num_slots);
+      }
+      case kOneIn2: {
+        using H1 = BandingConfigHelper1TS<kOneIn2, TypesAndSettings>;
+        return H1::GetNumToAdd(num_slots);
+      }
+      case kOneIn1000: {
+        using H1 = BandingConfigHelper1TS<kOneIn1000, TypesAndSettings>;
+        return H1::GetNumToAdd(num_slots);
+      }
+    }
+  }
+
+  static uint32_t GetNumSlots(
+      uint32_t num_to_add,
+      ConstructionFailureChance max_failure = kDefaultFailureChance) {
+    switch (max_failure) {
+      default:
+        assert(false);
+        FALLTHROUGH_INTENDED;
+      case kOneIn20: {
+        using H1 = BandingConfigHelper1TS<kOneIn20, TypesAndSettings>;
+        return H1::GetNumSlots(num_to_add);
+      }
+      case kOneIn2: {
+        using H1 = BandingConfigHelper1TS<kOneIn2, TypesAndSettings>;
+        return H1::GetNumSlots(num_to_add);
+      }
+      case kOneIn1000: {
+        using H1 = BandingConfigHelper1TS<kOneIn1000, TypesAndSettings>;
+        return H1::GetNumSlots(num_to_add);
+      }
+    }
+  }
+};
+
+}  // namespace ribbon
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_impl.h
new file mode 100644
index 0000000000..0afecc67dd
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/ribbon_impl.h
@@ -0,0 +1,1137 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cmath>
+
+#include "port/port.h"  // for PREFETCH
+#include "util/fastrange.h"
+#include "util/ribbon_alg.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace ribbon {
+
+// RIBBON PHSF & RIBBON Filter (Rapid Incremental Boolean Banding ON-the-fly)
+//
+// ribbon_impl.h: templated (parameterized) standard implementations
+//
+// Ribbon is a Perfect Hash Static Function construction useful as a compact
+// static Bloom filter alternative. See ribbon_alg.h for core algorithms
+// and core design details.
+//
+// TODO: more details on trade-offs and practical issues.
+//
+// APIs for configuring Ribbon are in ribbon_config.h
+
+// Ribbon implementations in this file take these parameters, which must be
+// provided in a class/struct type with members expressed in this concept:
+
+// concept TypesAndSettings {
+//   // See RibbonTypes and *Hasher in ribbon_alg.h, except here we have
+//   // the added constraint that Hash be equivalent to either uint32_t or
+//   // uint64_t.
+//   typename Hash;
+//   typename CoeffRow;
+//   typename ResultRow;
+//   typename Index;
+//   typename Key;
+//   static constexpr bool kFirstCoeffAlwaysOne;
+//
+//   // An unsigned integer type for identifying a hash seed, typically
+//   // uint32_t or uint64_t. Importantly, this is the amount of data
+//   // stored in memory for identifying a raw seed. See StandardHasher.
+//   typename Seed;
+//
+//   // When true, the PHSF implements a static filter, expecting just
+//   // keys as inputs for construction. When false, implements a general
+//   // PHSF and expects std::pair<Key, ResultRow> as inputs for
+//   // construction.
+//   static constexpr bool kIsFilter;
+//
+//   // When true, enables a special "homogeneous" filter implementation that
+//   // is slightly faster to construct, and never fails to construct though
+//   // FP rate can quickly explode in cases where corresponding
+//   // non-homogeneous filter would fail (or nearly fail?) to construct.
+//   // For smaller filters, you can configure with ConstructionFailureChance
+//   // smaller than desired FP rate to largely counteract this effect.
+//   // TODO: configuring Homogeneous Ribbon for arbitrarily large filters
+//   // based on data from OptimizeHomogAtScale
+//   static constexpr bool kHomogeneous;
+//
+//   // When true, adds a tiny bit more hashing logic on queries and
+//   // construction to improve utilization at the beginning and end of
+//   // the structure.  Recommended when CoeffRow is only 64 bits (or
+//   // less), so typical num_starts < 10k. Although this is compatible
+//   // with kHomogeneous, the competing space vs. time priorities might
+//   // not be useful.
+//   static constexpr bool kUseSmash;
+//
+//   // When true, allows number of "starts" to be zero, for best support
+//   // of the "no keys to add" case by always returning false for filter
+//   // queries. (This is distinct from the "keys added but no space for
+//   // any data" case, in which a filter always returns true.) The cost
+//   // supporting this is a conditional branch (probably predictable) in
+//   // queries.
+//   static constexpr bool kAllowZeroStarts;
+//
+//   // A seedable stock hash function on Keys. All bits of Hash must
+//   // be reasonably high quality. XXH functions recommended, but
+//   // Murmur, City, Farm, etc. also work.
+//   static Hash HashFn(const Key &, Seed raw_seed);
+// };
+
+// A bit of a hack to automatically construct the type for
+// AddInput based on a constexpr bool.
+template <typename Key, typename ResultRow, bool IsFilter>
+struct AddInputSelector {
+  // For general PHSF, not filter
+  using T = std::pair<Key, ResultRow>;
+};
+
+template <typename Key, typename ResultRow>
+struct AddInputSelector<Key, ResultRow, true /*IsFilter*/> {
+  // For Filter
+  using T = Key;
+};
+
+// To avoid writing 'typename' everywhere that we use types like 'Index'
+#define IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings)                   \
+  using CoeffRow = typename TypesAndSettings::CoeffRow;                      \
+  using ResultRow = typename TypesAndSettings::ResultRow;                    \
+  using Index = typename TypesAndSettings::Index;                            \
+  using Hash = typename TypesAndSettings::Hash;                              \
+  using Key = typename TypesAndSettings::Key;                                \
+  using Seed = typename TypesAndSettings::Seed;                              \
+                                                                             \
+  /* Some more additions */                                                  \
+  using QueryInput = Key;                                                    \
+  using AddInput = typename ROCKSDB_NAMESPACE::ribbon::AddInputSelector<     \
+      Key, ResultRow, TypesAndSettings::kIsFilter>::T;                       \
+  static constexpr auto kCoeffBits =                                         \
+      static_cast<Index>(sizeof(CoeffRow) * 8U);                             \
+                                                                             \
+  /* Export to algorithm */                                                  \
+  static constexpr bool kFirstCoeffAlwaysOne =                               \
+      TypesAndSettings::kFirstCoeffAlwaysOne;                                \
+                                                                             \
+  static_assert(sizeof(CoeffRow) + sizeof(ResultRow) + sizeof(Index) +       \
+                        sizeof(Hash) + sizeof(Key) + sizeof(Seed) +          \
+                        sizeof(QueryInput) + sizeof(AddInput) + kCoeffBits + \
+                        kFirstCoeffAlwaysOne >                               \
+                    0,                                                       \
+                "avoid unused warnings, semicolon expected after macro call")
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4309)  // cast truncating constant
+#pragma warning(disable : 4307)  // arithmetic constant overflow
+#endif
+
+// StandardHasher: A standard implementation of concepts RibbonTypes,
+// PhsfQueryHasher, FilterQueryHasher, and BandingHasher from ribbon_alg.h.
+//
+// This implementation should be suitable for most all practical purposes
+// as it "behaves" across a wide range of settings, with little room left
+// for improvement. The key functionality in this hasher is generating
+// CoeffRows, starts, and (for filters) ResultRows, which could be ~150
+// bits of data or more, from a modest hash of 64 or even just 32 bits, with
+// enough uniformity and bitwise independence to be close to "the best you
+// can do" with available hash information in terms of FP rate and
+// compactness. (64 bits recommended and sufficient for PHSF practical
+// purposes.)
+//
+// Another feature of this hasher is a minimal "premixing" of seeds before
+// they are provided to TypesAndSettings::HashFn in case that function does
+// not provide sufficiently independent hashes when iterating merely
+// sequentially on seeds. (This for example works around a problem with the
+// preview version 0.7.2 of XXH3 used in RocksDB, a.k.a. XXPH3 or Hash64, and
+// MurmurHash1 used in RocksDB, a.k.a. Hash.) We say this pre-mixing step
+// translates "ordinal seeds," which we iterate sequentially to find a
+// solution, into "raw seeds," with many more bits changing for each
+// iteration. The translation is an easily reversible lightweight mixing,
+// not suitable for hashing on its own. An advantage of this approach is that
+// StandardHasher can store just the raw seed (e.g. 64 bits) for fast query
+// times, while from the application perspective, we can limit to a small
+// number of ordinal keys (e.g. 64 in 6 bits) for saving in metadata.
+//
+// The default constructor initializes the seed to ordinal seed zero, which
+// is equal to raw seed zero.
+//
+template <class TypesAndSettings>
+class StandardHasher {
+ public:
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
+
+  inline Hash GetHash(const Key& key) const {
+    return TypesAndSettings::HashFn(key, raw_seed_);
+  };
+  // For when AddInput == pair<Key, ResultRow> (kIsFilter == false)
+  inline Hash GetHash(const std::pair<Key, ResultRow>& bi) const {
+    return GetHash(bi.first);
+  };
+  inline Index GetStart(Hash h, Index num_starts) const {
+    // This is "critical path" code because it's required before memory
+    // lookup.
+    //
+    // FastRange gives us a fast and effective mapping from h to the
+    // appropriate range. This depends most, sometimes exclusively, on
+    // upper bits of h.
+    //
+    if (TypesAndSettings::kUseSmash) {
+      // Extra logic to "smash" entries at beginning and end, for
+      // better utilization. For example, without smash and with
+      // kFirstCoeffAlwaysOne, there's about a 30% chance that the
+      // first slot in the banding will be unused, and worse without
+      // kFirstCoeffAlwaysOne. The ending slots are even less utilized
+      // without smash.
+      //
+      // But since this only affects roughly kCoeffBits of the slots,
+      // it's usually small enough to be ignorable (less computation in
+      // this function) when number of slots is roughly 10k or larger.
+      //
+      // The best values for these smash weights might depend on how
+      // densely you're packing entries, and also kCoeffBits, but this
+      // seems to work well for roughly 95% success probability.
+      //
+      constexpr Index kFrontSmash = kCoeffBits / 4;
+      constexpr Index kBackSmash = kCoeffBits / 4;
+      Index start = FastRangeGeneric(h, num_starts + kFrontSmash + kBackSmash);
+      start = std::max(start, kFrontSmash);
+      start -= kFrontSmash;
+      start = std::min(start, num_starts - 1);
+      return start;
+    } else {
+      // For query speed, we allow small number of initial and final
+      // entries to be under-utilized.
+      // NOTE: This call statically enforces that Hash is equivalent to
+      // either uint32_t or uint64_t.
+      return FastRangeGeneric(h, num_starts);
+    }
+  }
+  inline CoeffRow GetCoeffRow(Hash h) const {
+    // This is not so much "critical path" code because it can be done in
+    // parallel (instruction level) with memory lookup.
+    //
+    // When we might have many entries squeezed into a single start,
+    // we need reasonably good remixing for CoeffRow.
+    if (TypesAndSettings::kUseSmash) {
+      // Reasonably good, reasonably fast, reasonably general.
+      // Probably not 1:1 but probably close enough.
+      Unsigned128 a = Multiply64to128(h, kAltCoeffFactor1);
+      Unsigned128 b = Multiply64to128(h, kAltCoeffFactor2);
+      auto cr = static_cast<CoeffRow>(b ^ (a << 64) ^ (a >> 64));
+
+      // Now ensure the value is non-zero
+      if (kFirstCoeffAlwaysOne) {
+        cr |= 1;
+      } else {
+        // Still have to ensure some bit is non-zero
+        cr |= (cr == 0) ? 1 : 0;
+      }
+      return cr;
+    }
+    // If not kUseSmash, we ensure we're not squeezing many entries into a
+    // single start, in part by ensuring num_starts > num_slots / 2. Thus,
+    // here we do not need good remixing for CoeffRow, but just enough that
+    // (a) every bit is reasonably independent from Start.
+    // (b) every Hash-length bit subsequence of the CoeffRow has full or
+    // nearly full entropy from h.
+    // (c) if nontrivial bit subsequences within are correlated, it needs to
+    // be more complicated than exact copy or bitwise not (at least without
+    // kFirstCoeffAlwaysOne), or else there seems to be a kind of
+    // correlated clustering effect.
+    // (d) the CoeffRow is not zero, so that no one input on its own can
+    // doom construction success. (Preferably a mix of 1's and 0's if
+    // satisfying above.)
+
+    // First, establish sufficient bitwise independence from Start, with
+    // multiplication by a large random prime.
+    // Note that we cast to Hash because if we use product bits beyond
+    // original input size, that's going to correlate with Start (FastRange)
+    // even with a (likely) different multiplier here.
+    Hash a = h * kCoeffAndResultFactor;
+
+    static_assert(
+        sizeof(Hash) == sizeof(uint64_t) || sizeof(Hash) == sizeof(uint32_t),
+        "Supported sizes");
+    // If that's big enough, we're done. If not, we have to expand it,
+    // maybe up to 4x size.
+    uint64_t b;
+    if (sizeof(Hash) < sizeof(uint64_t)) {
+      // Almost-trivial hash expansion (OK - see above), favoring roughly
+      // equal number of 1's and 0's in result
+      b = (uint64_t{a} << 32) ^ (a ^ kCoeffXor32);
+    } else {
+      b = a;
+    }
+    static_assert(sizeof(CoeffRow) <= sizeof(Unsigned128), "Supported sizes");
+    Unsigned128 c;
+    if (sizeof(uint64_t) < sizeof(CoeffRow)) {
+      // Almost-trivial hash expansion (OK - see above), favoring roughly
+      // equal number of 1's and 0's in result
+      c = (Unsigned128{b} << 64) ^ (b ^ kCoeffXor64);
+    } else {
+      c = b;
+    }
+    auto cr = static_cast<CoeffRow>(c);
+
+    // Now ensure the value is non-zero
+    if (kFirstCoeffAlwaysOne) {
+      cr |= 1;
+    } else if (sizeof(CoeffRow) == sizeof(Hash)) {
+      // Still have to ensure some bit is non-zero
+      cr |= (cr == 0) ? 1 : 0;
+    } else {
+      // (We did trivial expansion with constant xor, which ensures some
+      // bits are non-zero.)
+    }
+    return cr;
+  }
+  inline ResultRow GetResultRowMask() const {
+    // TODO: will be used with InterleavedSolutionStorage?
+    // For now, all bits set (note: might be a small type so might need to
+    // narrow after promotion)
+    return static_cast<ResultRow>(~ResultRow{0});
+  }
+  inline ResultRow GetResultRowFromHash(Hash h) const {
+    if (TypesAndSettings::kIsFilter && !TypesAndSettings::kHomogeneous) {
+      // This is not so much "critical path" code because it can be done in
+      // parallel (instruction level) with memory lookup.
+      //
+      // ResultRow bits only needs to be independent from CoeffRow bits if
+      // many entries might have the same start location, where "many" is
+      // comparable to number of hash bits or kCoeffBits. If !kUseSmash
+      // and num_starts > kCoeffBits, it is safe and efficient to draw from
+      // the same bits computed for CoeffRow, which are reasonably
+      // independent from Start. (Inlining and common subexpression
+      // elimination with GetCoeffRow should make this
+      // a single shared multiplication in generated code when !kUseSmash.)
+      Hash a = h * kCoeffAndResultFactor;
+
+      // The bits here that are *most* independent of Start are the highest
+      // order bits (as in Knuth multiplicative hash). To make those the
+      // most preferred for use in the result row, we do a bswap here.
+      auto rr = static_cast<ResultRow>(EndianSwapValue(a));
+      return rr & GetResultRowMask();
+    } else {
+      // Must be zero
+      return 0;
+    }
+  }
+  // For when AddInput == Key (kIsFilter == true)
+  inline ResultRow GetResultRowFromInput(const Key&) const {
+    // Must be zero
+    return 0;
+  }
+  // For when AddInput == pair<Key, ResultRow> (kIsFilter == false)
+  inline ResultRow GetResultRowFromInput(
+      const std::pair<Key, ResultRow>& bi) const {
+    // Simple extraction
+    return bi.second;
+  }
+
+  // Seed tracking APIs - see class comment
+  void SetRawSeed(Seed seed) { raw_seed_ = seed; }
+  Seed GetRawSeed() { return raw_seed_; }
+  void SetOrdinalSeed(Seed count) {
+    // A simple, reversible mixing of any size (whole bytes) up to 64 bits.
+    // This allows casting the raw seed to any smaller size we use for
+    // ordinal seeds without risk of duplicate raw seeds for unique ordinal
+    // seeds.
+
+    // Seed type might be smaller than numerical promotion size, but Hash
+    // should be at least that size, so we use Hash as intermediate type.
+    static_assert(sizeof(Seed) <= sizeof(Hash),
+                  "Hash must be at least size of Seed");
+
+    // Multiply by a large random prime (one-to-one for any prefix of bits)
+    Hash tmp = count * kToRawSeedFactor;
+    // Within-byte one-to-one mixing
+    static_assert((kSeedMixMask & (kSeedMixMask >> kSeedMixShift)) == 0,
+                  "Illegal mask+shift");
+    tmp ^= (tmp & kSeedMixMask) >> kSeedMixShift;
+    raw_seed_ = static_cast<Seed>(tmp);
+    // dynamic verification
+    assert(GetOrdinalSeed() == count);
+  }
+  Seed GetOrdinalSeed() {
+    Hash tmp = raw_seed_;
+    // Within-byte one-to-one mixing (its own inverse)
+    tmp ^= (tmp & kSeedMixMask) >> kSeedMixShift;
+    // Multiply by 64-bit multiplicative inverse
+    static_assert(kToRawSeedFactor * kFromRawSeedFactor == Hash{1},
+                  "Must be inverses");
+    return static_cast<Seed>(tmp * kFromRawSeedFactor);
+  }
+
+ protected:
+  // For expanding hash:
+  // large random prime
+  static constexpr Hash kCoeffAndResultFactor =
+      static_cast<Hash>(0xc28f82822b650bedULL);
+  static constexpr uint64_t kAltCoeffFactor1 = 0x876f170be4f1fcb9U;
+  static constexpr uint64_t kAltCoeffFactor2 = 0xf0433a4aecda4c5fU;
+  // random-ish data
+  static constexpr uint32_t kCoeffXor32 = 0xa6293635U;
+  static constexpr uint64_t kCoeffXor64 = 0xc367844a6e52731dU;
+
+  // For pre-mixing seeds
+  static constexpr Hash kSeedMixMask = static_cast<Hash>(0xf0f0f0f0f0f0f0f0ULL);
+  static constexpr unsigned kSeedMixShift = 4U;
+  static constexpr Hash kToRawSeedFactor =
+      static_cast<Hash>(0xc78219a23eeadd03ULL);
+  static constexpr Hash kFromRawSeedFactor =
+      static_cast<Hash>(0xfe1a137d14b475abULL);
+
+  // See class description
+  Seed raw_seed_ = 0;
+};
+
+// StandardRehasher (and StandardRehasherAdapter): A variant of
+// StandardHasher that uses the same type for keys as for hashes.
+// This is primarily intended for building a Ribbon filter
+// from existing hashes without going back to original inputs in
+// order to apply a different seed. This hasher seeds a 1-to-1 mixing
+// transformation to apply a seed to an existing hash. (Untested for
+// hash-sized keys that are not already uniformly distributed.) This
+// transformation builds on the seed pre-mixing done in StandardHasher.
+//
+// Testing suggests essentially no degradation of solution success rate
+// vs. going back to original inputs when changing hash seeds. For example:
+// Average re-seeds for solution with r=128, 1.02x overhead, and ~100k keys
+// is about 1.10 for both StandardHasher and StandardRehasher.
+//
+// StandardRehasher is not really recommended for general PHSFs (not
+// filters) because a collision in the original hash could prevent
+// construction despite re-seeding the Rehasher. (Such collisions
+// do not interfere with filter construction.)
+//
+// concept RehasherTypesAndSettings: like TypesAndSettings but
+// does not require Key or HashFn.
+template <class RehasherTypesAndSettings>
+class StandardRehasherAdapter : public RehasherTypesAndSettings {
+ public:
+  using Hash = typename RehasherTypesAndSettings::Hash;
+  using Key = Hash;
+  using Seed = typename RehasherTypesAndSettings::Seed;
+
+  static Hash HashFn(const Hash& input, Seed raw_seed) {
+    // Note: raw_seed is already lightly pre-mixed, and this multiplication
+    // by a large prime is sufficient mixing (low-to-high bits) on top of
+    // that for good FastRange results, which depends primarily on highest
+    // bits. (The hashed CoeffRow and ResultRow are less sensitive to
+    // mixing than Start.)
+    // Also note: did consider adding ^ (input >> some) before the
+    // multiplication, but doesn't appear to be necessary.
+    return (input ^ raw_seed) * kRehashFactor;
+  }
+
+ private:
+  static constexpr Hash kRehashFactor =
+      static_cast<Hash>(0x6193d459236a3a0dULL);
+};
+
+// See comment on StandardRehasherAdapter
+template <class RehasherTypesAndSettings>
+using StandardRehasher =
+    StandardHasher<StandardRehasherAdapter<RehasherTypesAndSettings>>;
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+// Especially with smaller hashes (e.g. 32 bit), there can be noticeable
+// false positives due to collisions in the Hash returned by GetHash.
+// This function returns the expected FP rate due to those collisions,
+// which can be added to the expected FP rate from the underlying data
+// structure. (Note: technically, a + b is only a good approximation of
+// 1-(1-a)(1-b) == a + b - a*b, if a and b are much closer to 0 than to 1.)
+// The number of entries added can be a double here in case it's an
+// average.
+template <class Hasher, typename Numerical>
+double ExpectedCollisionFpRate(const Hasher& hasher, Numerical added) {
+  // Standardize on the 'double' specialization
+  return ExpectedCollisionFpRate(hasher, 1.0 * added);
+}
+template <class Hasher>
+double ExpectedCollisionFpRate(const Hasher& /*hasher*/, double added) {
+  // Technically, there could be overlap among the added, but ignoring that
+  // is typically close enough.
+  return added / std::pow(256.0, sizeof(typename Hasher::Hash));
+}
+
+// StandardBanding: a canonical implementation of BandingStorage and
+// BacktrackStorage, with convenience API for banding (solving with on-the-fly
+// Gaussian elimination) with and without backtracking.
+template <class TypesAndSettings>
+class StandardBanding : public StandardHasher<TypesAndSettings> {
+ public:
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
+
+  StandardBanding(Index num_slots = 0, Index backtrack_size = 0) {
+    Reset(num_slots, backtrack_size);
+  }
+
+  void Reset(Index num_slots, Index backtrack_size = 0) {
+    if (num_slots == 0) {
+      // Unusual (TypesAndSettings::kAllowZeroStarts) or "uninitialized"
+      num_starts_ = 0;
+    } else {
+      // Normal
+      assert(num_slots >= kCoeffBits);
+      if (num_slots > num_slots_allocated_) {
+        coeff_rows_.reset(new CoeffRow[num_slots]());
+        if (!TypesAndSettings::kHomogeneous) {
+          // Note: don't strictly have to zero-init result_rows,
+          // except possible information leakage, etc ;)
+          result_rows_.reset(new ResultRow[num_slots]());
+        }
+        num_slots_allocated_ = num_slots;
+      } else {
+        for (Index i = 0; i < num_slots; ++i) {
+          coeff_rows_[i] = 0;
+          if (!TypesAndSettings::kHomogeneous) {
+            // Note: don't strictly have to zero-init result_rows,
+            // except possible information leakage, etc ;)
+            result_rows_[i] = 0;
+          }
+        }
+      }
+      num_starts_ = num_slots - kCoeffBits + 1;
+    }
+    EnsureBacktrackSize(backtrack_size);
+  }
+
+  void EnsureBacktrackSize(Index backtrack_size) {
+    if (backtrack_size > backtrack_size_) {
+      backtrack_.reset(new Index[backtrack_size]);
+      backtrack_size_ = backtrack_size;
+    }
+  }
+
+  // ********************************************************************
+  // From concept BandingStorage
+
+  inline bool UsePrefetch() const {
+    // A rough guesstimate of when prefetching during construction pays off.
+    // TODO: verify/validate
+    return num_starts_ > 1500;
+  }
+  inline void Prefetch(Index i) const {
+    PREFETCH(&coeff_rows_[i], 1 /* rw */, 1 /* locality */);
+    if (!TypesAndSettings::kHomogeneous) {
+      PREFETCH(&result_rows_[i], 1 /* rw */, 1 /* locality */);
+    }
+  }
+  inline void LoadRow(Index i, CoeffRow* cr, ResultRow* rr,
+                      bool for_back_subst) const {
+    *cr = coeff_rows_[i];
+    if (TypesAndSettings::kHomogeneous) {
+      if (for_back_subst && *cr == 0) {
+        // Cheap pseudorandom data to fill unconstrained solution rows
+        *rr = static_cast<ResultRow>(i * 0x9E3779B185EBCA87ULL);
+      } else {
+        *rr = 0;
+      }
+    } else {
+      *rr = result_rows_[i];
+    }
+  }
+  inline void StoreRow(Index i, CoeffRow cr, ResultRow rr) {
+    coeff_rows_[i] = cr;
+    if (TypesAndSettings::kHomogeneous) {
+      assert(rr == 0);
+    } else {
+      result_rows_[i] = rr;
+    }
+  }
+  inline Index GetNumStarts() const { return num_starts_; }
+
+  // from concept BacktrackStorage, for when backtracking is used
+  inline bool UseBacktrack() const { return true; }
+  inline void BacktrackPut(Index i, Index to_save) { backtrack_[i] = to_save; }
+  inline Index BacktrackGet(Index i) const { return backtrack_[i]; }
+
+  // ********************************************************************
+  // Some useful API, still somewhat low level. Here an input is
+  // a Key for filters, or std::pair<Key, ResultRow> for general PHSF.
+
+  // Adds a range of inputs to the banding, returning true if successful.
+  // False means none or some may have been successfully added, so it's
+  // best to Reset this banding before any further use.
+  //
+  // Adding can fail even before all the "slots" are completely "full".
+  //
+  template <typename InputIterator>
+  bool AddRange(InputIterator begin, InputIterator end) {
+    assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts);
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual. Can't add any in this case.
+      return begin == end;
+    }
+    // Normal
+    return BandingAddRange(this, *this, begin, end);
+  }
+
+  // Adds a range of inputs to the banding, returning true if successful,
+  // or if unsuccessful, rolls back to state before this call and returns
+  // false. Caller guarantees that the number of inputs in this batch
+  // does not exceed `backtrack_size` provided to Reset.
+  //
+  // Adding can fail even before all the "slots" are completely "full".
+  //
+  template <typename InputIterator>
+  bool AddRangeOrRollBack(InputIterator begin, InputIterator end) {
+    assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts);
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual. Can't add any in this case.
+      return begin == end;
+    }
+    // else Normal
+    return BandingAddRange(this, this, *this, begin, end);
+  }
+
+  // Adds a single input to the banding, returning true if successful.
+  // If unsuccessful, returns false and banding state is unchanged.
+  //
+  // Adding can fail even before all the "slots" are completely "full".
+  //
+  bool Add(const AddInput& input) {
+    // Pointer can act as iterator
+    return AddRange(&input, &input + 1);
+  }
+
+  // Return the number of "occupied" rows (with non-zero coefficients stored).
+  Index GetOccupiedCount() const {
+    Index count = 0;
+    if (num_starts_ > 0) {
+      const Index num_slots = num_starts_ + kCoeffBits - 1;
+      for (Index i = 0; i < num_slots; ++i) {
+        if (coeff_rows_[i] != 0) {
+          ++count;
+        }
+      }
+    }
+    return count;
+  }
+
+  // Returns whether a row is "occupied" in the banding (non-zero
+  // coefficients stored). (Only recommended for debug/test)
+  bool IsOccupied(Index i) { return coeff_rows_[i] != 0; }
+
+  // ********************************************************************
+  // High-level API
+
+  // Iteratively (a) resets the structure for `num_slots`, (b) attempts
+  // to add the range of inputs, and (c) if unsuccessful, chooses next
+  // hash seed, until either successful or unsuccessful with all the
+  // allowed seeds. Returns true if successful. In that case, use
+  // GetOrdinalSeed() or GetRawSeed() to get the successful seed.
+  //
+  // The allowed sequence of hash seeds is determined by
+  // `starting_ordinal_seed,` the first ordinal seed to be attempted
+  // (see StandardHasher), and `ordinal_seed_mask,` a bit mask (power of
+  // two minus one) for the range of ordinal seeds to consider. The
+  // max number of seeds considered will be ordinal_seed_mask + 1.
+  // For filters we suggest `starting_ordinal_seed` be chosen randomly
+  // or round-robin, to minimize false positive correlations between keys.
+  //
+  // If unsuccessful, how best to continue is going to be application
+  // specific. It should be possible to choose parameters such that
+  // failure is extremely unlikely, using max_seed around 32 to 64.
+  // (TODO: APIs to help choose parameters) One option for fallback in
+  // constructing a filter is to construct a Bloom filter instead.
+  // Increasing num_slots is an option, but should not be used often
+  // unless construction maximum latency is a concern (rather than
+  // average running time of construction). Instead, choose parameters
+  // appropriately and trust that seeds are independent. (Also,
+  // increasing num_slots without changing hash seed would have a
+  // significant correlation in success, rather than independence.)
+  template <typename InputIterator>
+  bool ResetAndFindSeedToSolve(Index num_slots, InputIterator begin,
+                               InputIterator end,
+                               Seed starting_ordinal_seed = 0U,
+                               Seed ordinal_seed_mask = 63U) {
+    // power of 2 minus 1
+    assert((ordinal_seed_mask & (ordinal_seed_mask + 1)) == 0);
+    // starting seed is within mask
+    assert((starting_ordinal_seed & ordinal_seed_mask) ==
+           starting_ordinal_seed);
+    starting_ordinal_seed &= ordinal_seed_mask;  // if not debug
+
+    Seed cur_ordinal_seed = starting_ordinal_seed;
+    do {
+      StandardHasher<TypesAndSettings>::SetOrdinalSeed(cur_ordinal_seed);
+      Reset(num_slots);
+      bool success = AddRange(begin, end);
+      if (success) {
+        return true;
+      }
+      cur_ordinal_seed = (cur_ordinal_seed + 1) & ordinal_seed_mask;
+    } while (cur_ordinal_seed != starting_ordinal_seed);
+    // Reached limit by circling around
+    return false;
+  }
+
+  static std::size_t EstimateMemoryUsage(uint32_t num_slots) {
+    std::size_t bytes_coeff_rows = num_slots * sizeof(CoeffRow);
+    std::size_t bytes_result_rows = num_slots * sizeof(ResultRow);
+    std::size_t bytes_backtrack = 0;
+    std::size_t bytes_banding =
+        bytes_coeff_rows + bytes_result_rows + bytes_backtrack;
+
+    return bytes_banding;
+  }
+
+ protected:
+  // TODO: explore combining in a struct
+  std::unique_ptr<CoeffRow[]> coeff_rows_;
+  std::unique_ptr<ResultRow[]> result_rows_;
+  // We generally store "starts" instead of slots for speed of GetStart(),
+  // as in StandardHasher.
+  Index num_starts_ = 0;
+  Index num_slots_allocated_ = 0;
+  std::unique_ptr<Index[]> backtrack_;
+  Index backtrack_size_ = 0;
+};
+
+// Implements concept SimpleSolutionStorage, mostly for demonstration
+// purposes. This is "in memory" only because it does not handle byte
+// ordering issues for serialization.
+template <class TypesAndSettings>
+class InMemSimpleSolution {
+ public:
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
+
+  void PrepareForNumStarts(Index num_starts) {
+    if (TypesAndSettings::kAllowZeroStarts && num_starts == 0) {
+      // Unusual
+      num_starts_ = 0;
+    } else {
+      // Normal
+      const Index num_slots = num_starts + kCoeffBits - 1;
+      assert(num_slots >= kCoeffBits);
+      if (num_slots > num_slots_allocated_) {
+        // Do not need to init the memory
+        solution_rows_.reset(new ResultRow[num_slots]);
+        num_slots_allocated_ = num_slots;
+      }
+      num_starts_ = num_starts;
+    }
+  }
+
+  Index GetNumStarts() const { return num_starts_; }
+
+  ResultRow Load(Index slot_num) const { return solution_rows_[slot_num]; }
+
+  void Store(Index slot_num, ResultRow solution_row) {
+    solution_rows_[slot_num] = solution_row;
+  }
+
+  // ********************************************************************
+  // High-level API
+
+  template <typename BandingStorage>
+  void BackSubstFrom(const BandingStorage& bs) {
+    if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) {
+      // Unusual
+      PrepareForNumStarts(0);
+    } else {
+      // Normal
+      SimpleBackSubst(this, bs);
+    }
+  }
+
+  template <typename PhsfQueryHasher>
+  ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) const {
+    // assert(!TypesAndSettings::kIsFilter);  Can be useful in testing
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual
+      return 0;
+    } else {
+      // Normal
+      return SimplePhsfQuery(input, hasher, *this);
+    }
+  }
+
+  template <typename FilterQueryHasher>
+  bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) const {
+    assert(TypesAndSettings::kIsFilter);
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual. Zero starts presumes no keys added -> always false
+      return false;
+    } else {
+      // Normal, or upper_num_columns_ == 0 means "no space for data" and
+      // thus will always return true.
+      return SimpleFilterQuery(input, hasher, *this);
+    }
+  }
+
+  double ExpectedFpRate() const {
+    assert(TypesAndSettings::kIsFilter);
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual, but we don't have FPs if we always return false.
+      return 0.0;
+    }
+    // else Normal
+
+    // Each result (solution) bit (column) cuts FP rate in half
+    return std::pow(0.5, 8U * sizeof(ResultRow));
+  }
+
+  // ********************************************************************
+  // Static high-level API
+
+  // Round up to a number of slots supported by this structure. Note that
+  // this needs to be must be taken into account for the banding if this
+  // solution layout/storage is to be used.
+  static Index RoundUpNumSlots(Index num_slots) {
+    // Must be at least kCoeffBits for at least one start
+    // Or if not smash, even more because hashing not equipped
+    // for stacking up so many entries on a single start location
+    auto min_slots = kCoeffBits * (TypesAndSettings::kUseSmash ? 1 : 2);
+    return std::max(num_slots, static_cast<Index>(min_slots));
+  }
+
+ protected:
+  // We generally store "starts" instead of slots for speed of GetStart(),
+  // as in StandardHasher.
+  Index num_starts_ = 0;
+  Index num_slots_allocated_ = 0;
+  std::unique_ptr<ResultRow[]> solution_rows_;
+};
+
+// Implements concept InterleavedSolutionStorage always using little-endian
+// byte order, so easy for serialization/deserialization. This implementation
+// fully supports fractional bits per key, where any number of segments
+// (number of bytes multiple of sizeof(CoeffRow)) can be used with any number
+// of slots that is a multiple of kCoeffBits.
+//
+// The structure is passed an externally allocated/de-allocated byte buffer
+// that is optionally pre-populated (from storage) for answering queries,
+// or can be populated by BackSubstFrom.
+//
+template <class TypesAndSettings>
+class SerializableInterleavedSolution {
+ public:
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
+
+  // Does not take ownership of `data` but uses it (up to `data_len` bytes)
+  // throughout lifetime
+  SerializableInterleavedSolution(char* data, size_t data_len)
+      : data_(data), data_len_(data_len) {}
+
+  void PrepareForNumStarts(Index num_starts) {
+    assert(num_starts == 0 || (num_starts % kCoeffBits == 1));
+    num_starts_ = num_starts;
+
+    InternalConfigure();
+  }
+
+  Index GetNumStarts() const { return num_starts_; }
+
+  Index GetNumBlocks() const {
+    const Index num_slots = num_starts_ + kCoeffBits - 1;
+    return num_slots / kCoeffBits;
+  }
+
+  Index GetUpperNumColumns() const { return upper_num_columns_; }
+
+  Index GetUpperStartBlock() const { return upper_start_block_; }
+
+  Index GetNumSegments() const {
+    return static_cast<Index>(data_len_ / sizeof(CoeffRow));
+  }
+
+  CoeffRow LoadSegment(Index segment_num) const {
+    assert(data_ != nullptr);  // suppress clang analyzer report
+    return DecodeFixedGeneric<CoeffRow>(data_ + segment_num * sizeof(CoeffRow));
+  }
+  void StoreSegment(Index segment_num, CoeffRow val) {
+    assert(data_ != nullptr);  // suppress clang analyzer report
+    EncodeFixedGeneric(data_ + segment_num * sizeof(CoeffRow), val);
+  }
+  void PrefetchSegmentRange(Index begin_segment_num,
+                            Index end_segment_num) const {
+    if (end_segment_num == begin_segment_num) {
+      // Nothing to do
+      return;
+    }
+    char* cur = data_ + begin_segment_num * sizeof(CoeffRow);
+    char* last = data_ + (end_segment_num - 1) * sizeof(CoeffRow);
+    while (cur < last) {
+      PREFETCH(cur, 0 /* rw */, 1 /* locality */);
+      cur += CACHE_LINE_SIZE;
+    }
+    PREFETCH(last, 0 /* rw */, 1 /* locality */);
+  }
+
+  // ********************************************************************
+  // High-level API
+
+  void ConfigureForNumBlocks(Index num_blocks) {
+    if (num_blocks == 0) {
+      PrepareForNumStarts(0);
+    } else {
+      PrepareForNumStarts(num_blocks * kCoeffBits - kCoeffBits + 1);
+    }
+  }
+
+  void ConfigureForNumSlots(Index num_slots) {
+    assert(num_slots % kCoeffBits == 0);
+    ConfigureForNumBlocks(num_slots / kCoeffBits);
+  }
+
+  template <typename BandingStorage>
+  void BackSubstFrom(const BandingStorage& bs) {
+    if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) {
+      // Unusual
+      PrepareForNumStarts(0);
+    } else {
+      // Normal
+      InterleavedBackSubst(this, bs);
+    }
+  }
+
+  template <typename PhsfQueryHasher>
+  ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) const {
+    // assert(!TypesAndSettings::kIsFilter);  Can be useful in testing
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual
+      return 0;
+    } else {
+      // Normal
+      // NOTE: not using a struct to encourage compiler optimization
+      Hash hash;
+      Index segment_num;
+      Index num_columns;
+      Index start_bit;
+      InterleavedPrepareQuery(input, hasher, *this, &hash, &segment_num,
+                              &num_columns, &start_bit);
+      return InterleavedPhsfQuery(hash, segment_num, num_columns, start_bit,
+                                  hasher, *this);
+    }
+  }
+
+  template <typename FilterQueryHasher>
+  bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) const {
+    assert(TypesAndSettings::kIsFilter);
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual. Zero starts presumes no keys added -> always false
+      return false;
+    } else {
+      // Normal, or upper_num_columns_ == 0 means "no space for data" and
+      // thus will always return true.
+      // NOTE: not using a struct to encourage compiler optimization
+      Hash hash;
+      Index segment_num;
+      Index num_columns;
+      Index start_bit;
+      InterleavedPrepareQuery(input, hasher, *this, &hash, &segment_num,
+                              &num_columns, &start_bit);
+      return InterleavedFilterQuery(hash, segment_num, num_columns, start_bit,
+                                    hasher, *this);
+    }
+  }
+
+  double ExpectedFpRate() const {
+    assert(TypesAndSettings::kIsFilter);
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual. Zero starts presumes no keys added -> always false
+      return 0.0;
+    }
+    // else Normal
+
+    // Note: Ignoring smash setting; still close enough in that case
+    double lower_portion =
+        (upper_start_block_ * 1.0 * kCoeffBits) / num_starts_;
+
+    // Each result (solution) bit (column) cuts FP rate in half. Weight that
+    // for upper and lower number of bits (columns).
+    return lower_portion * std::pow(0.5, upper_num_columns_ - 1) +
+           (1.0 - lower_portion) * std::pow(0.5, upper_num_columns_);
+  }
+
+  // ********************************************************************
+  // Static high-level API
+
+  // Round up to a number of slots supported by this structure. Note that
+  // this needs to be must be taken into account for the banding if this
+  // solution layout/storage is to be used.
+  static Index RoundUpNumSlots(Index num_slots) {
+    // Must be multiple of kCoeffBits
+    Index corrected = (num_slots + kCoeffBits - 1) / kCoeffBits * kCoeffBits;
+
+    // Do not use num_starts==1 unless kUseSmash, because the hashing
+    // might not be equipped for stacking up so many entries on a
+    // single start location.
+    if (!TypesAndSettings::kUseSmash && corrected == kCoeffBits) {
+      corrected += kCoeffBits;
+    }
+    return corrected;
+  }
+
+  // Round down to a number of slots supported by this structure. Note that
+  // this needs to be must be taken into account for the banding if this
+  // solution layout/storage is to be used.
+  static Index RoundDownNumSlots(Index num_slots) {
+    // Must be multiple of kCoeffBits
+    Index corrected = num_slots / kCoeffBits * kCoeffBits;
+
+    // Do not use num_starts==1 unless kUseSmash, because the hashing
+    // might not be equipped for stacking up so many entries on a
+    // single start location.
+    if (!TypesAndSettings::kUseSmash && corrected == kCoeffBits) {
+      corrected = 0;
+    }
+    return corrected;
+  }
+
+  // Compute the number of bytes for a given number of slots and desired
+  // FP rate. Since desired FP rate might not be exactly achievable,
+  // rounding_bias32==0 means to always round toward lower FP rate
+  // than desired (more bytes); rounding_bias32==max uint32_t means always
+  // round toward higher FP rate than desired (fewer bytes); other values
+  // act as a proportional threshold or bias between the two.
+  static size_t GetBytesForFpRate(Index num_slots, double desired_fp_rate,
+                                  uint32_t rounding_bias32) {
+    return InternalGetBytesForFpRate(num_slots, desired_fp_rate,
+                                     1.0 / desired_fp_rate, rounding_bias32);
+  }
+
+  // The same, but specifying desired accuracy as 1.0 / FP rate, or
+  // one_in_fp_rate. E.g. desired_one_in_fp_rate=100 means 1% FP rate.
+  static size_t GetBytesForOneInFpRate(Index num_slots,
+                                       double desired_one_in_fp_rate,
+                                       uint32_t rounding_bias32) {
+    return InternalGetBytesForFpRate(num_slots, 1.0 / desired_one_in_fp_rate,
+                                     desired_one_in_fp_rate, rounding_bias32);
+  }
+
+ protected:
+  static size_t InternalGetBytesForFpRate(Index num_slots,
+                                          double desired_fp_rate,
+                                          double desired_one_in_fp_rate,
+                                          uint32_t rounding_bias32) {
+    assert(TypesAndSettings::kIsFilter);
+    if (TypesAndSettings::kAllowZeroStarts) {
+      if (num_slots == 0) {
+        // Unusual. Zero starts presumes no keys added -> always false (no FPs)
+        return 0U;
+      }
+    } else {
+      assert(num_slots > 0);
+    }
+    // Must be rounded up already.
+    assert(RoundUpNumSlots(num_slots) == num_slots);
+
+    if (desired_one_in_fp_rate > 1.0 && desired_fp_rate < 1.0) {
+      // Typical: less than 100% FP rate
+      if (desired_one_in_fp_rate <= static_cast<ResultRow>(-1)) {
+        // Typical: Less than maximum result row entropy
+        ResultRow rounded = static_cast<ResultRow>(desired_one_in_fp_rate);
+        int lower_columns = FloorLog2(rounded);
+        double lower_columns_fp_rate = std::pow(2.0, -lower_columns);
+        double upper_columns_fp_rate = std::pow(2.0, -(lower_columns + 1));
+        // Floating point don't let me down!
+        assert(lower_columns_fp_rate >= desired_fp_rate);
+        assert(upper_columns_fp_rate <= desired_fp_rate);
+
+        double lower_portion = (desired_fp_rate - upper_columns_fp_rate) /
+                               (lower_columns_fp_rate - upper_columns_fp_rate);
+        // Floating point don't let me down!
+        assert(lower_portion >= 0.0);
+        assert(lower_portion <= 1.0);
+
+        double rounding_bias = (rounding_bias32 + 0.5) / double{0x100000000};
+        assert(rounding_bias > 0.0);
+        assert(rounding_bias < 1.0);
+
+        // Note: Ignoring smash setting; still close enough in that case
+        Index num_starts = num_slots - kCoeffBits + 1;
+        // Lower upper_start_block means lower FP rate (higher accuracy)
+        Index upper_start_block = static_cast<Index>(
+            (lower_portion * num_starts + rounding_bias) / kCoeffBits);
+        Index num_blocks = num_slots / kCoeffBits;
+        assert(upper_start_block < num_blocks);
+
+        // Start by assuming all blocks use lower number of columns
+        Index num_segments = num_blocks * static_cast<Index>(lower_columns);
+        // Correct by 1 each for blocks using upper number of columns
+        num_segments += (num_blocks - upper_start_block);
+        // Total bytes
+        return num_segments * sizeof(CoeffRow);
+      } else {
+        // one_in_fp_rate too big, thus requested FP rate is smaller than
+        // supported. Use max number of columns for minimum supported FP rate.
+        return num_slots * sizeof(ResultRow);
+      }
+    } else {
+      // Effectively asking for 100% FP rate, or NaN etc.
+      if (TypesAndSettings::kAllowZeroStarts) {
+        // Zero segments
+        return 0U;
+      } else {
+        // One segment (minimum size, maximizing FP rate)
+        return sizeof(CoeffRow);
+      }
+    }
+  }
+
+  void InternalConfigure() {
+    const Index num_blocks = GetNumBlocks();
+    Index num_segments = GetNumSegments();
+
+    if (num_blocks == 0) {
+      // Exceptional
+      upper_num_columns_ = 0;
+      upper_start_block_ = 0;
+    } else {
+      // Normal
+      upper_num_columns_ =
+          (num_segments + /*round up*/ num_blocks - 1) / num_blocks;
+      upper_start_block_ = upper_num_columns_ * num_blocks - num_segments;
+      // Unless that's more columns than supported by ResultRow data type
+      if (upper_num_columns_ > 8U * sizeof(ResultRow)) {
+        // Use maximum columns (there will be space unused)
+        upper_num_columns_ = static_cast<Index>(8U * sizeof(ResultRow));
+        upper_start_block_ = 0;
+        num_segments = num_blocks * upper_num_columns_;
+      }
+    }
+    // Update data_len_ for correct rounding and/or unused space
+    // NOTE: unused space stays gone if we PrepareForNumStarts again.
+    // We are prioritizing minimizing the number of fields over making
+    // the "unusued space" feature work well.
+    data_len_ = num_segments * sizeof(CoeffRow);
+  }
+
+  char* const data_;
+  size_t data_len_;
+  Index num_starts_ = 0;
+  Index upper_num_columns_ = 0;
+  Index upper_start_block_ = 0;
+};
+
+}  // namespace ribbon
+
+}  // namespace ROCKSDB_NAMESPACE
+
+// For convenience working with templates
+#define IMPORT_RIBBON_IMPL_TYPES(TypesAndSettings)                            \
+  using Hasher = ROCKSDB_NAMESPACE::ribbon::StandardHasher<TypesAndSettings>; \
+  using Banding =                                                             \
+      ROCKSDB_NAMESPACE::ribbon::StandardBanding<TypesAndSettings>;           \
+  using SimpleSoln =                                                          \
+      ROCKSDB_NAMESPACE::ribbon::InMemSimpleSolution<TypesAndSettings>;       \
+  using InterleavedSoln =                                                     \
+      ROCKSDB_NAMESPACE::ribbon::SerializableInterleavedSolution<             \
+          TypesAndSettings>;                                                  \
+  static_assert(sizeof(Hasher) + sizeof(Banding) + sizeof(SimpleSoln) +       \
+                        sizeof(InterleavedSoln) >                             \
+                    0,                                                        \
+                "avoid unused warnings, semicolon expected after macro call")
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/set_comparator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/set_comparator.h
new file mode 100644
index 0000000000..e0e64436ad
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/set_comparator.h
@@ -0,0 +1,24 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/comparator.h"
+
+namespace ROCKSDB_NAMESPACE {
+// A comparator to be used in std::set
+struct SetComparator {
+  explicit SetComparator() : user_comparator_(BytewiseComparator()) {}
+  explicit SetComparator(const Comparator* user_comparator)
+      : user_comparator_(user_comparator ? user_comparator
+                                         : BytewiseComparator()) {}
+  bool operator()(const Slice& lhs, const Slice& rhs) const {
+    return user_comparator_->Compare(lhs, rhs) < 0;
+  }
+
+ private:
+  const Comparator* user_comparator_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/single_thread_executor.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/single_thread_executor.h
new file mode 100644
index 0000000000..c69f2a2921
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/single_thread_executor.h
@@ -0,0 +1,56 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+#if USE_COROUTINES
+#include <atomic>
+
+#include "folly/CPortability.h"
+#include "folly/CppAttributes.h"
+#include "folly/Executor.h"
+#include "util/async_file_reader.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Implements a simple executor that runs callback functions in the same
+// thread, unlike CPUThreadExecutor which may schedule the callback on
+// another thread. Runs in a tight loop calling the queued callbacks,
+// and polls for async IO completions when idle. The completions will
+// resume suspended coroutines and they get added to the queue, which
+// will get picked up by this loop.
+// Any possibility of deadlock is precluded because the file system
+// guarantees that async IO completion callbacks will not be scheduled
+// to run in this thread or this executor.
+class SingleThreadExecutor : public folly::Executor {
+ public:
+  explicit SingleThreadExecutor(AsyncFileReader& reader)
+      : reader_(reader), busy_(false) {}
+
+  void add(folly::Func callback) override {
+    auto& q = q_;
+    q.push(std::move(callback));
+    if (q.size() == 1 && !busy_) {
+      while (!q.empty()) {
+        q.front()();
+        q.pop();
+
+        if (q.empty()) {
+          // Prevent recursion, as the Wait may queue resumed coroutines
+          busy_ = true;
+          reader_.Wait();
+          busy_ = false;
+        }
+      }
+    }
+  }
+
+ private:
+  std::queue<folly::Func> q_;
+  AsyncFileReader& reader_;
+  bool busy_;
+};
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // USE_COROUTINES
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/slice.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/slice.cc
new file mode 100644
index 0000000000..22dd7ee6bb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/slice.cc
@@ -0,0 +1,367 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/slice.h"
+
+#include <stdio.h>
+
+#include <algorithm>
+
+#include "rocksdb/convenience.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+class FixedPrefixTransform : public SliceTransform {
+ private:
+  size_t prefix_len_;
+  std::string id_;
+
+ public:
+  explicit FixedPrefixTransform(size_t prefix_len) : prefix_len_(prefix_len) {
+    id_ = std::string(kClassName()) + "." + std::to_string(prefix_len_);
+  }
+
+  static const char* kClassName() { return "rocksdb.FixedPrefix"; }
+  static const char* kNickName() { return "fixed"; }
+  const char* Name() const override { return kClassName(); }
+  const char* NickName() const override { return kNickName(); }
+
+  bool IsInstanceOf(const std::string& name) const override {
+    if (name == id_) {
+      return true;
+    } else if (StartsWith(name, kNickName())) {
+      std::string alt_id =
+          std::string(kNickName()) + ":" + std::to_string(prefix_len_);
+      if (name == alt_id) {
+        return true;
+      }
+    }
+    return SliceTransform::IsInstanceOf(name);
+  }
+
+  std::string GetId() const override { return id_; }
+
+  Slice Transform(const Slice& src) const override {
+    assert(InDomain(src));
+    return Slice(src.data(), prefix_len_);
+  }
+
+  bool InDomain(const Slice& src) const override {
+    return (src.size() >= prefix_len_);
+  }
+
+  bool InRange(const Slice& dst) const override {
+    return (dst.size() == prefix_len_);
+  }
+
+  bool FullLengthEnabled(size_t* len) const override {
+    *len = prefix_len_;
+    return true;
+  }
+
+  bool SameResultWhenAppended(const Slice& prefix) const override {
+    return InDomain(prefix);
+  }
+};
+
+class CappedPrefixTransform : public SliceTransform {
+ private:
+  size_t cap_len_;
+  std::string id_;
+
+ public:
+  explicit CappedPrefixTransform(size_t cap_len) : cap_len_(cap_len) {
+    id_ = std::string(kClassName()) + "." + std::to_string(cap_len_);
+  }
+
+  static const char* kClassName() { return "rocksdb.CappedPrefix"; }
+  static const char* kNickName() { return "capped"; }
+  const char* Name() const override { return kClassName(); }
+  const char* NickName() const override { return kNickName(); }
+  std::string GetId() const override { return id_; }
+
+  bool IsInstanceOf(const std::string& name) const override {
+    if (name == id_) {
+      return true;
+    } else if (StartsWith(name, kNickName())) {
+      std::string alt_id =
+          std::string(kNickName()) + ":" + std::to_string(cap_len_);
+      if (name == alt_id) {
+        return true;
+      }
+    }
+    return SliceTransform::IsInstanceOf(name);
+  }
+
+  Slice Transform(const Slice& src) const override {
+    assert(InDomain(src));
+    return Slice(src.data(), std::min(cap_len_, src.size()));
+  }
+
+  bool InDomain(const Slice& /*src*/) const override { return true; }
+
+  bool InRange(const Slice& dst) const override {
+    return (dst.size() <= cap_len_);
+  }
+
+  bool FullLengthEnabled(size_t* len) const override {
+    *len = cap_len_;
+    return true;
+  }
+
+  bool SameResultWhenAppended(const Slice& prefix) const override {
+    return prefix.size() >= cap_len_;
+  }
+};
+
+class NoopTransform : public SliceTransform {
+ public:
+  explicit NoopTransform() {}
+
+  static const char* kClassName() { return "rocksdb.Noop"; }
+  const char* Name() const override { return kClassName(); }
+
+  Slice Transform(const Slice& src) const override { return src; }
+
+  bool InDomain(const Slice& /*src*/) const override { return true; }
+
+  bool InRange(const Slice& /*dst*/) const override { return true; }
+
+  bool SameResultWhenAppended(const Slice& /*prefix*/) const override {
+    return false;
+  }
+};
+
+}  // end namespace
+
+const SliceTransform* NewFixedPrefixTransform(size_t prefix_len) {
+  return new FixedPrefixTransform(prefix_len);
+}
+
+const SliceTransform* NewCappedPrefixTransform(size_t cap_len) {
+  return new CappedPrefixTransform(cap_len);
+}
+
+const SliceTransform* NewNoopTransform() { return new NoopTransform; }
+
+static int RegisterBuiltinSliceTransform(ObjectLibrary& library,
+                                         const std::string& /*arg*/) {
+  // For the builtin transforms, the format is typically
+  // [Name].[0-9]+ or [NickName]:[0-9]+
+  library.AddFactory<const SliceTransform>(
+      NoopTransform::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<const SliceTransform>* guard,
+         std::string* /*errmsg*/) {
+        guard->reset(NewNoopTransform());
+        return guard->get();
+      });
+  library.AddFactory<const SliceTransform>(
+      ObjectLibrary::PatternEntry(FixedPrefixTransform::kNickName(), false)
+          .AddNumber(":"),
+      [](const std::string& uri, std::unique_ptr<const SliceTransform>* guard,
+         std::string* /*errmsg*/) {
+        auto colon = uri.find(":");
+        auto len = ParseSizeT(uri.substr(colon + 1));
+        guard->reset(NewFixedPrefixTransform(len));
+        return guard->get();
+      });
+  library.AddFactory<const SliceTransform>(
+      ObjectLibrary::PatternEntry(FixedPrefixTransform::kClassName(), false)
+          .AddNumber("."),
+      [](const std::string& uri, std::unique_ptr<const SliceTransform>* guard,
+         std::string* /*errmsg*/) {
+        auto len = ParseSizeT(
+            uri.substr(strlen(FixedPrefixTransform::kClassName()) + 1));
+        guard->reset(NewFixedPrefixTransform(len));
+        return guard->get();
+      });
+  library.AddFactory<const SliceTransform>(
+      ObjectLibrary::PatternEntry(CappedPrefixTransform::kNickName(), false)
+          .AddNumber(":"),
+      [](const std::string& uri, std::unique_ptr<const SliceTransform>* guard,
+         std::string* /*errmsg*/) {
+        auto colon = uri.find(":");
+        auto len = ParseSizeT(uri.substr(colon + 1));
+        guard->reset(NewCappedPrefixTransform(len));
+        return guard->get();
+      });
+  library.AddFactory<const SliceTransform>(
+      ObjectLibrary::PatternEntry(CappedPrefixTransform::kClassName(), false)
+          .AddNumber("."),
+      [](const std::string& uri, std::unique_ptr<const SliceTransform>* guard,
+         std::string* /*errmsg*/) {
+        auto len = ParseSizeT(
+            uri.substr(strlen(CappedPrefixTransform::kClassName()) + 1));
+        guard->reset(NewCappedPrefixTransform(len));
+        return guard->get();
+      });
+  size_t num_types;
+  return static_cast<int>(library.GetFactoryCount(&num_types));
+}
+
+Status SliceTransform::CreateFromString(
+    const ConfigOptions& config_options, const std::string& value,
+    std::shared_ptr<const SliceTransform>* result) {
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    RegisterBuiltinSliceTransform(*(ObjectLibrary::Default().get()), "");
+  });
+  std::string id;
+  std::unordered_map<std::string, std::string> opt_map;
+  Status status = Customizable::GetOptionsMap(config_options, result->get(),
+                                              value, &id, &opt_map);
+  if (!status.ok()) {  // GetOptionsMap failed
+    return status;
+  } else if (id.empty() && opt_map.empty()) {
+    result->reset();
+  } else {
+    status = config_options.registry->NewSharedObject(id, result);
+    if (config_options.ignore_unsupported_options && status.IsNotSupported()) {
+      return Status::OK();
+    } else if (status.ok()) {
+      SliceTransform* transform = const_cast<SliceTransform*>(result->get());
+      status =
+          Customizable::ConfigureNewObject(config_options, transform, opt_map);
+    }
+  }
+  return status;
+}
+
+std::string SliceTransform::AsString() const {
+  if (HasRegisteredOptions()) {
+    ConfigOptions opts;
+    opts.delimiter = ";";
+    return ToString(opts);
+  }
+  return GetId();
+}
+
+// 2 small internal utility functions, for efficient hex conversions
+// and no need for snprintf, toupper etc...
+// Originally from wdt/util/EncryptionUtils.cpp - for
+// std::to_string(true)/DecodeHex:
+char toHex(unsigned char v) {
+  if (v <= 9) {
+    return '0' + v;
+  }
+  return 'A' + v - 10;
+}
+// most of the code is for validation/error check
+int fromHex(char c) {
+  // toupper:
+  if (c >= 'a' && c <= 'f') {
+    c -= ('a' - 'A');  // aka 0x20
+  }
+  // validation
+  if (c < '0' || (c > '9' && (c < 'A' || c > 'F'))) {
+    return -1;  // invalid not 0-9A-F hex char
+  }
+  if (c <= '9') {
+    return c - '0';
+  }
+  return c - 'A' + 10;
+}
+
+Slice::Slice(const SliceParts& parts, std::string* buf) {
+  size_t length = 0;
+  for (int i = 0; i < parts.num_parts; ++i) {
+    length += parts.parts[i].size();
+  }
+  buf->reserve(length);
+
+  for (int i = 0; i < parts.num_parts; ++i) {
+    buf->append(parts.parts[i].data(), parts.parts[i].size());
+  }
+  data_ = buf->data();
+  size_ = buf->size();
+}
+
+// Return a string that contains the copy of the referenced data.
+std::string Slice::ToString(bool hex) const {
+  std::string result;  // RVO/NRVO/move
+  if (hex) {
+    result.reserve(2 * size_);
+    for (size_t i = 0; i < size_; ++i) {
+      unsigned char c = data_[i];
+      result.push_back(toHex(c >> 4));
+      result.push_back(toHex(c & 0xf));
+    }
+    return result;
+  } else {
+    result.assign(data_, size_);
+    return result;
+  }
+}
+
+// Originally from rocksdb/utilities/ldb_cmd.h
+bool Slice::DecodeHex(std::string* result) const {
+  std::string::size_type len = size_;
+  if (len % 2) {
+    // Hex string must be even number of hex digits to get complete bytes back
+    return false;
+  }
+  if (!result) {
+    return false;
+  }
+  result->clear();
+  result->reserve(len / 2);
+
+  for (size_t i = 0; i < len;) {
+    int h1 = fromHex(data_[i++]);
+    if (h1 < 0) {
+      return false;
+    }
+    int h2 = fromHex(data_[i++]);
+    if (h2 < 0) {
+      return false;
+    }
+    result->push_back(static_cast<char>((h1 << 4) | h2));
+  }
+  return true;
+}
+
+PinnableSlice::PinnableSlice(PinnableSlice&& other) {
+  *this = std::move(other);
+}
+
+PinnableSlice& PinnableSlice::operator=(PinnableSlice&& other) {
+  if (this != &other) {
+    Cleanable::Reset();
+    Cleanable::operator=(std::move(other));
+    size_ = other.size_;
+    pinned_ = other.pinned_;
+    if (pinned_) {
+      data_ = other.data_;
+      // When it's pinned, buf should no longer be of use.
+    } else {
+      if (other.buf_ == &other.self_space_) {
+        self_space_ = std::move(other.self_space_);
+        buf_ = &self_space_;
+        data_ = buf_->data();
+      } else {
+        buf_ = other.buf_;
+        data_ = other.data_;
+      }
+    }
+    other.self_space_.clear();
+    other.buf_ = &other.self_space_;
+    other.pinned_ = false;
+    other.PinSelf();
+  }
+  return *this;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/status.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/status.cc
new file mode 100644
index 0000000000..ead315848d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/status.cc
@@ -0,0 +1,162 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/status.h"
+
+#include <stdio.h>
+#ifdef OS_WIN
+#include <string.h>
+#endif
+#include <cstring>
+
+#include "port/port.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+std::unique_ptr<const char[]> Status::CopyState(const char* s) {
+  const size_t cch = std::strlen(s) + 1;  // +1 for the null terminator
+  char* rv = new char[cch];
+  std::strncpy(rv, s, cch);
+  return std::unique_ptr<const char[]>(rv);
+}
+
+static const char* msgs[static_cast<int>(Status::kMaxSubCode)] = {
+    "",                                                   // kNone
+    "Timeout Acquiring Mutex",                            // kMutexTimeout
+    "Timeout waiting to lock key",                        // kLockTimeout
+    "Failed to acquire lock due to max_num_locks limit",  // kLockLimit
+    "No space left on device",                            // kNoSpace
+    "Deadlock",                                           // kDeadlock
+    "Stale file handle",                                  // kStaleFile
+    "Memory limit reached",                               // kMemoryLimit
+    "Space limit reached",                                // kSpaceLimit
+    "No such file or directory",                          // kPathNotFound
+    // KMergeOperandsInsufficientCapacity
+    "Insufficient capacity for merge operands",
+    // kManualCompactionPaused
+    "Manual compaction paused",
+    " (overwritten)",         // kOverwritten, subcode of OK
+    "Txn not prepared",       // kTxnNotPrepared
+    "IO fenced off",          // kIOFenced
+    "Merge operator failed",  // kMergeOperatorFailed
+};
+
+Status::Status(Code _code, SubCode _subcode, const Slice& msg,
+               const Slice& msg2, Severity sev)
+    : code_(_code),
+      subcode_(_subcode),
+      sev_(sev),
+      retryable_(false),
+      data_loss_(false),
+      scope_(0) {
+  assert(subcode_ != kMaxSubCode);
+  const size_t len1 = msg.size();
+  const size_t len2 = msg2.size();
+  const size_t size = len1 + (len2 ? (2 + len2) : 0);
+  char* const result = new char[size + 1];  // +1 for null terminator
+  memcpy(result, msg.data(), len1);
+  if (len2) {
+    result[len1] = ':';
+    result[len1 + 1] = ' ';
+    memcpy(result + len1 + 2, msg2.data(), len2);
+  }
+  result[size] = '\0';  // null terminator for C style string
+  state_.reset(result);
+}
+
+Status Status::CopyAppendMessage(const Status& s, const Slice& delim,
+                                 const Slice& msg) {
+  // (No attempt at efficiency)
+  return Status(s.code(), s.subcode(), s.severity(),
+                std::string(s.getState()) + delim.ToString() + msg.ToString());
+}
+
+std::string Status::ToString() const {
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+  checked_ = true;
+#endif  // ROCKSDB_ASSERT_STATUS_CHECKED
+  const char* type = nullptr;
+  switch (code_) {
+    case kOk:
+      return "OK";
+    case kNotFound:
+      type = "NotFound: ";
+      break;
+    case kCorruption:
+      type = "Corruption: ";
+      break;
+    case kNotSupported:
+      type = "Not implemented: ";
+      break;
+    case kInvalidArgument:
+      type = "Invalid argument: ";
+      break;
+    case kIOError:
+      type = "IO error: ";
+      break;
+    case kMergeInProgress:
+      type = "Merge in progress: ";
+      break;
+    case kIncomplete:
+      type = "Result incomplete: ";
+      break;
+    case kShutdownInProgress:
+      type = "Shutdown in progress: ";
+      break;
+    case kTimedOut:
+      type = "Operation timed out: ";
+      break;
+    case kAborted:
+      type = "Operation aborted: ";
+      break;
+    case kBusy:
+      type = "Resource busy: ";
+      break;
+    case kExpired:
+      type = "Operation expired: ";
+      break;
+    case kTryAgain:
+      type = "Operation failed. Try again.: ";
+      break;
+    case kCompactionTooLarge:
+      type = "Compaction too large: ";
+      break;
+    case kColumnFamilyDropped:
+      type = "Column family dropped: ";
+      break;
+    case kMaxCode:
+      assert(false);
+      break;
+  }
+  char tmp[30];
+  if (type == nullptr) {
+    // This should not happen since `code_` should be a valid non-`kMaxCode`
+    // member of the `Code` enum. The above switch-statement should have had a
+    // case assigning `type` to a corresponding string.
+    assert(false);
+    snprintf(tmp, sizeof(tmp), "Unknown code(%d): ", static_cast<int>(code()));
+    type = tmp;
+  }
+  std::string result(type);
+  if (subcode_ != kNone) {
+    uint32_t index = static_cast<int32_t>(subcode_);
+    assert(sizeof(msgs) / sizeof(msgs[0]) > index);
+    result.append(msgs[index]);
+  }
+
+  if (state_ != nullptr) {
+    if (subcode_ != kNone) {
+      result.append(": ");
+    }
+    result.append(state_.get());
+  }
+  return result;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/stderr_logger.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/stderr_logger.cc
new file mode 100644
index 0000000000..6044b8b936
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/stderr_logger.cc
@@ -0,0 +1,30 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/stderr_logger.h"
+
+#include "port/sys_time.h"
+
+namespace ROCKSDB_NAMESPACE {
+StderrLogger::~StderrLogger() {}
+
+void StderrLogger::Logv(const char* format, va_list ap) {
+  const uint64_t thread_id = Env::Default()->GetThreadID();
+
+  port::TimeVal now_tv;
+  port::GetTimeOfDay(&now_tv, nullptr);
+  const time_t seconds = now_tv.tv_sec;
+  struct tm t;
+  port::LocalTimeR(&seconds, &t);
+  fprintf(stderr, "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ", t.tm_year + 1900,
+          t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec,
+          static_cast<int>(now_tv.tv_usec),
+          static_cast<long long unsigned int>(thread_id));
+
+  vfprintf(stderr, format, ap);
+  fprintf(stderr, "\n");
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/stderr_logger.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/stderr_logger.h
new file mode 100644
index 0000000000..c3b01210ce
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/stderr_logger.h
@@ -0,0 +1,31 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "rocksdb/env.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Prints logs to stderr for faster debugging
+class StderrLogger : public Logger {
+ public:
+  explicit StderrLogger(const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL)
+      : Logger(log_level) {}
+
+  ~StderrLogger() override;
+
+  // Brings overloaded Logv()s into scope so they're not hidden when we override
+  // a subset of them.
+  using Logger::Logv;
+
+  virtual void Logv(const char* format, va_list ap) override;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/stop_watch.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/stop_watch.h
new file mode 100644
index 0000000000..ece72007b8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/stop_watch.h
@@ -0,0 +1,136 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+#include "monitoring/statistics_impl.h"
+#include "rocksdb/system_clock.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Auto-scoped.
+// When statistics is not nullptr, records the measured time into any enabled
+// histograms supplied to the constructor. A histogram argument may be omitted
+// by setting it to Histograms::HISTOGRAM_ENUM_MAX. It is also saved into
+// *elapsed if the pointer is not nullptr and overwrite is true, it will be
+// added to *elapsed if overwrite is false.
+class StopWatch {
+ public:
+  StopWatch(SystemClock* clock, Statistics* statistics,
+            const uint32_t hist_type_1,
+            const uint32_t hist_type_2 = Histograms::HISTOGRAM_ENUM_MAX,
+            uint64_t* elapsed = nullptr, bool overwrite = true,
+            bool delay_enabled = false)
+      : clock_(clock),
+        statistics_(statistics),
+        hist_type_1_(statistics && statistics->HistEnabledForType(hist_type_1)
+                         ? hist_type_1
+                         : Histograms::HISTOGRAM_ENUM_MAX),
+        hist_type_2_(statistics && statistics->HistEnabledForType(hist_type_2)
+                         ? hist_type_2
+                         : Histograms::HISTOGRAM_ENUM_MAX),
+        elapsed_(elapsed),
+        overwrite_(overwrite),
+        stats_enabled_(statistics &&
+                       statistics->get_stats_level() >=
+                           StatsLevel::kExceptTimers &&
+                       (hist_type_1_ != Histograms::HISTOGRAM_ENUM_MAX ||
+                        hist_type_2_ != Histograms::HISTOGRAM_ENUM_MAX)),
+        delay_enabled_(delay_enabled),
+        total_delay_(0),
+        delay_start_time_(0),
+        start_time_((stats_enabled_ || elapsed != nullptr) ? clock->NowMicros()
+                                                           : 0) {}
+
+  ~StopWatch() {
+    if (elapsed_) {
+      if (overwrite_) {
+        *elapsed_ = clock_->NowMicros() - start_time_;
+      } else {
+        *elapsed_ += clock_->NowMicros() - start_time_;
+      }
+    }
+    if (elapsed_ && delay_enabled_) {
+      *elapsed_ -= total_delay_;
+    }
+    if (stats_enabled_) {
+      const auto time = (elapsed_ != nullptr)
+                            ? *elapsed_
+                            : (clock_->NowMicros() - start_time_);
+      if (hist_type_1_ != Histograms::HISTOGRAM_ENUM_MAX) {
+        statistics_->reportTimeToHistogram(hist_type_1_, time);
+      }
+      if (hist_type_2_ != Histograms::HISTOGRAM_ENUM_MAX) {
+        statistics_->reportTimeToHistogram(hist_type_2_, time);
+      }
+    }
+  }
+
+  void DelayStart() {
+    // if delay_start_time_ is not 0, it means we are already tracking delay,
+    // so delay_start_time_ should not be overwritten
+    if (elapsed_ && delay_enabled_ && delay_start_time_ == 0) {
+      delay_start_time_ = clock_->NowMicros();
+    }
+  }
+
+  void DelayStop() {
+    if (elapsed_ && delay_enabled_ && delay_start_time_ != 0) {
+      total_delay_ += clock_->NowMicros() - delay_start_time_;
+    }
+    // reset to 0 means currently no delay is being tracked, so two consecutive
+    // calls to DelayStop will not increase total_delay_
+    delay_start_time_ = 0;
+  }
+
+  uint64_t GetDelay() const { return delay_enabled_ ? total_delay_ : 0; }
+
+  uint64_t start_time() const { return start_time_; }
+
+ private:
+  SystemClock* clock_;
+  Statistics* statistics_;
+  const uint32_t hist_type_1_;
+  const uint32_t hist_type_2_;
+  uint64_t* elapsed_;
+  bool overwrite_;
+  bool stats_enabled_;
+  bool delay_enabled_;
+  uint64_t total_delay_;
+  uint64_t delay_start_time_;
+  const uint64_t start_time_;
+};
+
+// a nano second precision stopwatch
+class StopWatchNano {
+ public:
+  explicit StopWatchNano(SystemClock* clock, bool auto_start = false)
+      : clock_(clock), start_(0) {
+    if (auto_start) {
+      Start();
+    }
+  }
+
+  void Start() { start_ = clock_->NowNanos(); }
+
+  uint64_t ElapsedNanos(bool reset = false) {
+    auto now = clock_->NowNanos();
+    auto elapsed = now - start_;
+    if (reset) {
+      start_ = now;
+    }
+    return elapsed;
+  }
+
+  uint64_t ElapsedNanosSafe(bool reset = false) {
+    return (clock_ != nullptr) ? ElapsedNanos(reset) : 0U;
+  }
+
+  bool IsStarted() { return start_ != 0; }
+
+ private:
+  SystemClock* clock_;
+  uint64_t start_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/string_util.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/string_util.cc
new file mode 100644
index 0000000000..821ccba07f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/string_util.cc
@@ -0,0 +1,502 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include "util/string_util.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cinttypes>
+#include <cmath>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "port/port.h"
+#include "port/sys_time.h"
+#include "rocksdb/slice.h"
+
+#ifndef __has_cpp_attribute
+#define ROCKSDB_HAS_CPP_ATTRIBUTE(x) 0
+#else
+#define ROCKSDB_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#endif
+
+#if ROCKSDB_HAS_CPP_ATTRIBUTE(maybe_unused) && __cplusplus >= 201703L
+#define ROCKSDB_MAYBE_UNUSED [[maybe_unused]]
+#elif ROCKSDB_HAS_CPP_ATTRIBUTE(gnu::unused) || __GNUC__
+#define ROCKSDB_MAYBE_UNUSED [[gnu::unused]]
+#else
+#define ROCKSDB_MAYBE_UNUSED
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+const std::string kNullptrString = "nullptr";
+
+std::vector<std::string> StringSplit(const std::string& arg, char delim) {
+  std::vector<std::string> splits;
+  std::stringstream ss(arg);
+  std::string item;
+  while (std::getline(ss, item, delim)) {
+    splits.push_back(item);
+  }
+  return splits;
+}
+
+// for micros < 10ms, print "XX us".
+// for micros < 10sec, print "XX ms".
+// for micros >= 10 sec, print "XX sec".
+// for micros <= 1 hour, print Y:X M:S".
+// for micros > 1 hour, print Z:Y:X H:M:S".
+int AppendHumanMicros(uint64_t micros, char* output, int len,
+                      bool fixed_format) {
+  if (micros < 10000 && !fixed_format) {
+    return snprintf(output, len, "%" PRIu64 " us", micros);
+  } else if (micros < 10000000 && !fixed_format) {
+    return snprintf(output, len, "%.3lf ms",
+                    static_cast<double>(micros) / 1000);
+  } else if (micros < 1000000l * 60 && !fixed_format) {
+    return snprintf(output, len, "%.3lf sec",
+                    static_cast<double>(micros) / 1000000);
+  } else if (micros < 1000000ll * 60 * 60 && !fixed_format) {
+    return snprintf(output, len, "%02" PRIu64 ":%05.3f M:S",
+                    micros / 1000000 / 60,
+                    static_cast<double>(micros % 60000000) / 1000000);
+  } else {
+    return snprintf(output, len, "%02" PRIu64 ":%02" PRIu64 ":%05.3f H:M:S",
+                    micros / 1000000 / 3600, (micros / 1000000 / 60) % 60,
+                    static_cast<double>(micros % 60000000) / 1000000);
+  }
+}
+
+// for sizes >=10TB, print "XXTB"
+// for sizes >=10GB, print "XXGB"
+// etc.
+// append file size summary to output and return the len
+int AppendHumanBytes(uint64_t bytes, char* output, int len) {
+  const uint64_t ull10 = 10;
+  if (bytes >= ull10 << 40) {
+    return snprintf(output, len, "%" PRIu64 "TB", bytes >> 40);
+  } else if (bytes >= ull10 << 30) {
+    return snprintf(output, len, "%" PRIu64 "GB", bytes >> 30);
+  } else if (bytes >= ull10 << 20) {
+    return snprintf(output, len, "%" PRIu64 "MB", bytes >> 20);
+  } else if (bytes >= ull10 << 10) {
+    return snprintf(output, len, "%" PRIu64 "KB", bytes >> 10);
+  } else {
+    return snprintf(output, len, "%" PRIu64 "B", bytes);
+  }
+}
+
+void AppendNumberTo(std::string* str, uint64_t num) {
+  char buf[30];
+  snprintf(buf, sizeof(buf), "%" PRIu64, num);
+  str->append(buf);
+}
+
+void AppendEscapedStringTo(std::string* str, const Slice& value) {
+  for (size_t i = 0; i < value.size(); i++) {
+    char c = value[i];
+    if (c >= ' ' && c <= '~') {
+      str->push_back(c);
+    } else {
+      char buf[10];
+      snprintf(buf, sizeof(buf), "\\x%02x",
+               static_cast<unsigned int>(c) & 0xff);
+      str->append(buf);
+    }
+  }
+}
+
+std::string NumberToHumanString(int64_t num) {
+  char buf[19];
+  int64_t absnum = num < 0 ? -num : num;
+  if (absnum < 10000) {
+    snprintf(buf, sizeof(buf), "%" PRIi64, num);
+  } else if (absnum < 10000000) {
+    snprintf(buf, sizeof(buf), "%" PRIi64 "K", num / 1000);
+  } else if (absnum < 10000000000LL) {
+    snprintf(buf, sizeof(buf), "%" PRIi64 "M", num / 1000000);
+  } else {
+    snprintf(buf, sizeof(buf), "%" PRIi64 "G", num / 1000000000);
+  }
+  return std::string(buf);
+}
+
+std::string BytesToHumanString(uint64_t bytes) {
+  const char* size_name[] = {"KB", "MB", "GB", "TB"};
+  double final_size = static_cast<double>(bytes);
+  size_t size_idx;
+
+  // always start with KB
+  final_size /= 1024;
+  size_idx = 0;
+
+  while (size_idx < 3 && final_size >= 1024) {
+    final_size /= 1024;
+    size_idx++;
+  }
+
+  char buf[20];
+  snprintf(buf, sizeof(buf), "%.2f %s", final_size, size_name[size_idx]);
+  return std::string(buf);
+}
+
+std::string TimeToHumanString(int unixtime) {
+  char time_buffer[80];
+  time_t rawtime = unixtime;
+  struct tm tInfo;
+  struct tm* timeinfo = port::LocalTimeR(&rawtime, &tInfo);
+  assert(timeinfo == &tInfo);
+  strftime(time_buffer, 80, "%c", timeinfo);
+  return std::string(time_buffer);
+}
+
+std::string EscapeString(const Slice& value) {
+  std::string r;
+  AppendEscapedStringTo(&r, value);
+  return r;
+}
+
+bool ConsumeDecimalNumber(Slice* in, uint64_t* val) {
+  uint64_t v = 0;
+  int digits = 0;
+  while (!in->empty()) {
+    char c = (*in)[0];
+    if (c >= '0' && c <= '9') {
+      ++digits;
+      const unsigned int delta = (c - '0');
+      static const uint64_t kMaxUint64 = ~static_cast<uint64_t>(0);
+      if (v > kMaxUint64 / 10 ||
+          (v == kMaxUint64 / 10 && delta > kMaxUint64 % 10)) {
+        // Overflow
+        return false;
+      }
+      v = (v * 10) + delta;
+      in->remove_prefix(1);
+    } else {
+      break;
+    }
+  }
+  *val = v;
+  return (digits > 0);
+}
+
+bool isSpecialChar(const char c) {
+  if (c == '\\' || c == '#' || c == ':' || c == '\r' || c == '\n') {
+    return true;
+  }
+  return false;
+}
+
+namespace {
+using CharMap = std::pair<char, char>;
+}
+
+char UnescapeChar(const char c) {
+  static const CharMap convert_map[] = {{'r', '\r'}, {'n', '\n'}};
+
+  auto iter = std::find_if(std::begin(convert_map), std::end(convert_map),
+                           [c](const CharMap& p) { return p.first == c; });
+
+  if (iter == std::end(convert_map)) {
+    return c;
+  }
+  return iter->second;
+}
+
+char EscapeChar(const char c) {
+  static const CharMap convert_map[] = {{'\n', 'n'}, {'\r', 'r'}};
+
+  auto iter = std::find_if(std::begin(convert_map), std::end(convert_map),
+                           [c](const CharMap& p) { return p.first == c; });
+
+  if (iter == std::end(convert_map)) {
+    return c;
+  }
+  return iter->second;
+}
+
+std::string EscapeOptionString(const std::string& raw_string) {
+  std::string output;
+  for (auto c : raw_string) {
+    if (isSpecialChar(c)) {
+      output += '\\';
+      output += EscapeChar(c);
+    } else {
+      output += c;
+    }
+  }
+
+  return output;
+}
+
+std::string UnescapeOptionString(const std::string& escaped_string) {
+  bool escaped = false;
+  std::string output;
+
+  for (auto c : escaped_string) {
+    if (escaped) {
+      output += UnescapeChar(c);
+      escaped = false;
+    } else {
+      if (c == '\\') {
+        escaped = true;
+        continue;
+      }
+      output += c;
+    }
+  }
+  return output;
+}
+
+std::string trim(const std::string& str) {
+  if (str.empty()) return std::string();
+  size_t start = 0;
+  size_t end = str.size() - 1;
+  while (isspace(str[start]) != 0 && start < end) {
+    ++start;
+  }
+  while (isspace(str[end]) != 0 && start < end) {
+    --end;
+  }
+  if (start <= end) {
+    return str.substr(start, end - start + 1);
+  }
+  return std::string();
+}
+
+bool EndsWith(const std::string& string, const std::string& pattern) {
+  size_t plen = pattern.size();
+  size_t slen = string.size();
+  if (plen <= slen) {
+    return string.compare(slen - plen, plen, pattern) == 0;
+  } else {
+    return false;
+  }
+}
+
+bool StartsWith(const std::string& string, const std::string& pattern) {
+  return string.compare(0, pattern.size(), pattern) == 0;
+}
+
+
+bool ParseBoolean(const std::string& type, const std::string& value) {
+  if (value == "true" || value == "1") {
+    return true;
+  } else if (value == "false" || value == "0") {
+    return false;
+  }
+  throw std::invalid_argument(type);
+}
+
+uint8_t ParseUint8(const std::string& value) {
+  uint64_t num = ParseUint64(value);
+  if ((num >> 8LL) == 0) {
+    return static_cast<uint8_t>(num);
+  } else {
+    throw std::out_of_range(value);
+  }
+}
+
+uint32_t ParseUint32(const std::string& value) {
+  uint64_t num = ParseUint64(value);
+  if ((num >> 32LL) == 0) {
+    return static_cast<uint32_t>(num);
+  } else {
+    throw std::out_of_range(value);
+  }
+}
+
+int32_t ParseInt32(const std::string& value) {
+  int64_t num = ParseInt64(value);
+  if (num <= std::numeric_limits<int32_t>::max() &&
+      num >= std::numeric_limits<int32_t>::min()) {
+    return static_cast<int32_t>(num);
+  } else {
+    throw std::out_of_range(value);
+  }
+}
+
+
+uint64_t ParseUint64(const std::string& value) {
+  size_t endchar;
+#ifndef CYGWIN
+  uint64_t num = std::stoull(value.c_str(), &endchar);
+#else
+  char* endptr;
+  uint64_t num = std::strtoul(value.c_str(), &endptr, 0);
+  endchar = endptr - value.c_str();
+#endif
+
+  if (endchar < value.length()) {
+    char c = value[endchar];
+    if (c == 'k' || c == 'K')
+      num <<= 10LL;
+    else if (c == 'm' || c == 'M')
+      num <<= 20LL;
+    else if (c == 'g' || c == 'G')
+      num <<= 30LL;
+    else if (c == 't' || c == 'T')
+      num <<= 40LL;
+  }
+
+  return num;
+}
+
+int64_t ParseInt64(const std::string& value) {
+  size_t endchar;
+#ifndef CYGWIN
+  int64_t num = std::stoll(value.c_str(), &endchar);
+#else
+  char* endptr;
+  int64_t num = std::strtoll(value.c_str(), &endptr, 0);
+  endchar = endptr - value.c_str();
+#endif
+
+  if (endchar < value.length()) {
+    char c = value[endchar];
+    if (c == 'k' || c == 'K')
+      num <<= 10LL;
+    else if (c == 'm' || c == 'M')
+      num <<= 20LL;
+    else if (c == 'g' || c == 'G')
+      num <<= 30LL;
+    else if (c == 't' || c == 'T')
+      num <<= 40LL;
+  }
+
+  return num;
+}
+
+int ParseInt(const std::string& value) {
+  size_t endchar;
+#ifndef CYGWIN
+  int num = std::stoi(value.c_str(), &endchar);
+#else
+  char* endptr;
+  int num = std::strtoul(value.c_str(), &endptr, 0);
+  endchar = endptr - value.c_str();
+#endif
+
+  if (endchar < value.length()) {
+    char c = value[endchar];
+    if (c == 'k' || c == 'K')
+      num <<= 10;
+    else if (c == 'm' || c == 'M')
+      num <<= 20;
+    else if (c == 'g' || c == 'G')
+      num <<= 30;
+  }
+
+  return num;
+}
+
+double ParseDouble(const std::string& value) {
+#ifndef CYGWIN
+  return std::stod(value);
+#else
+  return std::strtod(value.c_str(), 0);
+#endif
+}
+
+size_t ParseSizeT(const std::string& value) {
+  return static_cast<size_t>(ParseUint64(value));
+}
+
+std::vector<int> ParseVectorInt(const std::string& value) {
+  std::vector<int> result;
+  size_t start = 0;
+  while (start < value.size()) {
+    size_t end = value.find(':', start);
+    if (end == std::string::npos) {
+      result.push_back(ParseInt(value.substr(start)));
+      break;
+    } else {
+      result.push_back(ParseInt(value.substr(start, end - start)));
+      start = end + 1;
+    }
+  }
+  return result;
+}
+
+bool SerializeIntVector(const std::vector<int>& vec, std::string* value) {
+  *value = "";
+  for (size_t i = 0; i < vec.size(); ++i) {
+    if (i > 0) {
+      *value += ":";
+    }
+    *value += std::to_string(vec[i]);
+  }
+  return true;
+}
+
+// Copied from folly/string.cpp:
+// https://github.com/facebook/folly/blob/0deef031cb8aab76dc7e736f8b7c22d701d5f36b/folly/String.cpp#L457
+// There are two variants of `strerror_r` function, one returns
+// `int`, and another returns `char*`. Selecting proper version using
+// preprocessor macros portably is extremely hard.
+//
+// For example, on Android function signature depends on `__USE_GNU` and
+// `__ANDROID_API__` macros (https://git.io/fjBBE).
+//
+// So we are using C++ overloading trick: we pass a pointer of
+// `strerror_r` to `invoke_strerror_r` function, and C++ compiler
+// selects proper function.
+
+#if !(defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER)))
+ROCKSDB_MAYBE_UNUSED
+static std::string invoke_strerror_r(int (*strerror_r)(int, char*, size_t),
+                                     int err, char* buf, size_t buflen) {
+  // Using XSI-compatible strerror_r
+  int r = strerror_r(err, buf, buflen);
+
+  // OSX/FreeBSD use EINVAL and Linux uses -1 so just check for non-zero
+  if (r != 0) {
+    snprintf(buf, buflen, "Unknown error %d (strerror_r failed with error %d)",
+             err, errno);
+  }
+  return buf;
+}
+
+ROCKSDB_MAYBE_UNUSED
+static std::string invoke_strerror_r(char* (*strerror_r)(int, char*, size_t),
+                                     int err, char* buf, size_t buflen) {
+  // Using GNU strerror_r
+  return strerror_r(err, buf, buflen);
+}
+#endif  // !(defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER)))
+
+std::string errnoStr(int err) {
+  char buf[1024];
+  buf[0] = '\0';
+
+  std::string result;
+
+  // https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man3/strerror_r.3.html
+  // http://www.kernel.org/doc/man-pages/online/pages/man3/strerror.3.html
+#if defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER))
+  // mingw64 has no strerror_r, but Windows has strerror_s, which C11 added
+  // as well. So maybe we should use this across all platforms (together
+  // with strerrorlen_s). Note strerror_r and _s have swapped args.
+  int r = strerror_s(buf, sizeof(buf), err);
+  if (r != 0) {
+    snprintf(buf, sizeof(buf),
+             "Unknown error %d (strerror_r failed with error %d)", err, errno);
+  }
+  result.assign(buf);
+#else
+  // Using any strerror_r
+  result.assign(invoke_strerror_r(strerror_r, err, buf, sizeof(buf)));
+#endif
+
+  return result;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/string_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/string_util.h
new file mode 100644
index 0000000000..0b15181f5d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/string_util.h
@@ -0,0 +1,175 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+
+#include <cstdint>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+
+extern std::vector<std::string> StringSplit(const std::string& arg, char delim);
+
+// Append a human-readable printout of "num" to *str
+extern void AppendNumberTo(std::string* str, uint64_t num);
+
+// Append a human-readable printout of "value" to *str.
+// Escapes any non-printable characters found in "value".
+extern void AppendEscapedStringTo(std::string* str, const Slice& value);
+
+// Put n digits from v in base kBase to (*buf)[0] to (*buf)[n-1] and
+// advance *buf to the position after what was written.
+template <size_t kBase>
+inline void PutBaseChars(char** buf, size_t n, uint64_t v, bool uppercase) {
+  const char* digitChars = uppercase ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                     : "0123456789abcdefghijklmnopqrstuvwxyz";
+  for (size_t i = n; i > 0; --i) {
+    (*buf)[i - 1] = digitChars[static_cast<size_t>(v % kBase)];
+    v /= kBase;
+  }
+  *buf += n;
+}
+
+// Parse n digits from *buf in base kBase to *v and advance *buf to the
+// position after what was read. On success, true is returned. On failure,
+// false is returned, *buf is placed at the first bad character, and *v
+// contains the partial parsed data. Overflow is not checked but the
+// result is accurate mod 2^64. Requires the starting value of *v to be
+// zero or previously accumulated parsed digits, i.e.
+//   ParseBaseChars(&b, n, &v);
+// is equivalent to n calls to
+//   ParseBaseChars(&b, 1, &v);
+template <int kBase>
+inline bool ParseBaseChars(const char** buf, size_t n, uint64_t* v) {
+  while (n) {
+    char c = **buf;
+    *v *= static_cast<uint64_t>(kBase);
+    if (c >= '0' && (kBase >= 10 ? c <= '9' : c < '0' + kBase)) {
+      *v += static_cast<uint64_t>(c - '0');
+    } else if (kBase > 10 && c >= 'A' && c < 'A' + kBase - 10) {
+      *v += static_cast<uint64_t>(c - 'A' + 10);
+    } else if (kBase > 10 && c >= 'a' && c < 'a' + kBase - 10) {
+      *v += static_cast<uint64_t>(c - 'a' + 10);
+    } else {
+      return false;
+    }
+    --n;
+    ++*buf;
+  }
+  return true;
+}
+
+// Return a human-readable version of num.
+// for num >= 10.000, prints "xxK"
+// for num >= 10.000.000, prints "xxM"
+// for num >= 10.000.000.000, prints "xxG"
+extern std::string NumberToHumanString(int64_t num);
+
+// Return a human-readable version of bytes
+// ex: 1048576 -> 1.00 GB
+extern std::string BytesToHumanString(uint64_t bytes);
+
+// Return a human-readable version of unix time
+// ex: 1562116015 -> "Tue Jul  2 18:06:55 2019"
+extern std::string TimeToHumanString(int unixtime);
+
+// Append a human-readable time in micros.
+int AppendHumanMicros(uint64_t micros, char* output, int len,
+                      bool fixed_format);
+
+// Append a human-readable size in bytes
+int AppendHumanBytes(uint64_t bytes, char* output, int len);
+
+// Return a human-readable version of "value".
+// Escapes any non-printable characters found in "value".
+extern std::string EscapeString(const Slice& value);
+
+// Parse a human-readable number from "*in" into *value.  On success,
+// advances "*in" past the consumed number and sets "*val" to the
+// numeric value.  Otherwise, returns false and leaves *in in an
+// unspecified state.
+extern bool ConsumeDecimalNumber(Slice* in, uint64_t* val);
+
+// Returns true if the input char "c" is considered as a special character
+// that will be escaped when EscapeOptionString() is called.
+//
+// @param c the input char
+// @return true if the input char "c" is considered as a special character.
+// @see EscapeOptionString
+bool isSpecialChar(const char c);
+
+// If the input char is an escaped char, it will return the its
+// associated raw-char.  Otherwise, the function will simply return
+// the original input char.
+char UnescapeChar(const char c);
+
+// If the input char is a control char, it will return the its
+// associated escaped char.  Otherwise, the function will simply return
+// the original input char.
+char EscapeChar(const char c);
+
+// Converts a raw string to an escaped string.  Escaped-characters are
+// defined via the isSpecialChar() function.  When a char in the input
+// string "raw_string" is classified as a special characters, then it
+// will be prefixed by '\' in the output.
+//
+// It's inverse function is UnescapeOptionString().
+// @param raw_string the input string
+// @return the '\' escaped string of the input "raw_string"
+// @see isSpecialChar, UnescapeOptionString
+std::string EscapeOptionString(const std::string& raw_string);
+
+// The inverse function of EscapeOptionString.  It converts
+// an '\' escaped string back to a raw string.
+//
+// @param escaped_string the input '\' escaped string
+// @return the raw string of the input "escaped_string"
+std::string UnescapeOptionString(const std::string& escaped_string);
+
+std::string trim(const std::string& str);
+
+// Returns true if "string" ends with "pattern"
+bool EndsWith(const std::string& string, const std::string& pattern);
+
+// Returns true if "string" starts with "pattern"
+bool StartsWith(const std::string& string, const std::string& pattern);
+
+bool ParseBoolean(const std::string& type, const std::string& value);
+
+uint8_t ParseUint8(const std::string& value);
+
+uint32_t ParseUint32(const std::string& value);
+
+int32_t ParseInt32(const std::string& value);
+
+uint64_t ParseUint64(const std::string& value);
+
+int ParseInt(const std::string& value);
+
+int64_t ParseInt64(const std::string& value);
+
+double ParseDouble(const std::string& value);
+
+size_t ParseSizeT(const std::string& value);
+
+std::vector<int> ParseVectorInt(const std::string& value);
+
+bool SerializeIntVector(const std::vector<int>& vec, std::string* value);
+
+extern const std::string kNullptrString;
+
+// errnoStr() function returns a string that describes the error code passed in
+// the argument err
+extern std::string errnoStr(int err);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_guard.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_guard.h
new file mode 100644
index 0000000000..b2bb06a1b0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_guard.h
@@ -0,0 +1,41 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "port/port.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Resource management object for threads that joins the thread upon
+// destruction. Has unique ownership of the thread object, so copying it is not
+// allowed, while moving it transfers ownership.
+class ThreadGuard {
+ public:
+  ThreadGuard() = default;
+
+  explicit ThreadGuard(port::Thread&& thread) : thread_(std::move(thread)) {}
+
+  ThreadGuard(const ThreadGuard&) = delete;
+  ThreadGuard& operator=(const ThreadGuard&) = delete;
+
+  ThreadGuard(ThreadGuard&&) noexcept = default;
+  ThreadGuard& operator=(ThreadGuard&&) noexcept = default;
+
+  ~ThreadGuard() {
+    if (thread_.joinable()) {
+      thread_.join();
+    }
+  }
+
+  const port::Thread& GetThread() const { return thread_; }
+  port::Thread& GetThread() { return thread_; }
+
+ private:
+  port::Thread thread_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_local.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_local.cc
new file mode 100644
index 0000000000..969639d9bc
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_local.cc
@@ -0,0 +1,521 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/thread_local.h"
+
+#include <stdlib.h>
+
+#include "port/likely.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct Entry {
+  Entry() : ptr(nullptr) {}
+  Entry(const Entry& e) : ptr(e.ptr.load(std::memory_order_relaxed)) {}
+  std::atomic<void*> ptr;
+};
+
+class StaticMeta;
+
+// This is the structure that is declared as "thread_local" storage.
+// The vector keep list of atomic pointer for all instances for "current"
+// thread. The vector is indexed by an Id that is unique in process and
+// associated with one ThreadLocalPtr instance. The Id is assigned by a
+// global StaticMeta singleton. So if we instantiated 3 ThreadLocalPtr
+// instances, each thread will have a ThreadData with a vector of size 3:
+//     ---------------------------------------------------
+//     |          | instance 1 | instance 2 | instance 3 |
+//     ---------------------------------------------------
+//     | thread 1 |    void*   |    void*   |    void*   | <- ThreadData
+//     ---------------------------------------------------
+//     | thread 2 |    void*   |    void*   |    void*   | <- ThreadData
+//     ---------------------------------------------------
+//     | thread 3 |    void*   |    void*   |    void*   | <- ThreadData
+//     ---------------------------------------------------
+struct ThreadData {
+  explicit ThreadData(ThreadLocalPtr::StaticMeta* _inst)
+      : entries(), next(nullptr), prev(nullptr), inst(_inst) {}
+  std::vector<Entry> entries;
+  ThreadData* next;
+  ThreadData* prev;
+  ThreadLocalPtr::StaticMeta* inst;
+};
+
+class ThreadLocalPtr::StaticMeta {
+ public:
+  StaticMeta();
+
+  // Return the next available Id
+  uint32_t GetId();
+  // Return the next available Id without claiming it
+  uint32_t PeekId() const;
+  // Return the given Id back to the free pool. This also triggers
+  // UnrefHandler for associated pointer value (if not NULL) for all threads.
+  void ReclaimId(uint32_t id);
+
+  // Return the pointer value for the given id for the current thread.
+  void* Get(uint32_t id) const;
+  // Reset the pointer value for the given id for the current thread.
+  void Reset(uint32_t id, void* ptr);
+  // Atomically swap the supplied ptr and return the previous value
+  void* Swap(uint32_t id, void* ptr);
+  // Atomically compare and swap the provided value only if it equals
+  // to expected value.
+  bool CompareAndSwap(uint32_t id, void* ptr, void*& expected);
+  // Reset all thread local data to replacement, and return non-nullptr
+  // data for all existing threads
+  void Scrape(uint32_t id, autovector<void*>* ptrs, void* const replacement);
+  // Update res by applying func on each thread-local value. Holds a lock that
+  // prevents unref handler from running during this call, but clients must
+  // still provide external synchronization since the owning thread can
+  // access the values without internal locking, e.g., via Get() and Reset().
+  void Fold(uint32_t id, FoldFunc func, void* res);
+
+  // Register the UnrefHandler for id
+  void SetHandler(uint32_t id, UnrefHandler handler);
+
+  // protect inst, next_instance_id_, free_instance_ids_, head_,
+  // ThreadData.entries
+  //
+  // Note that here we prefer function static variable instead of the usual
+  // global static variable.  The reason is that c++ destruction order of
+  // static variables in the reverse order of their construction order.
+  // However, C++ does not guarantee any construction order when global
+  // static variables are defined in different files, while the function
+  // static variables are initialized when their function are first called.
+  // As a result, the construction order of the function static variables
+  // can be controlled by properly invoke their first function calls in
+  // the right order.
+  //
+  // For instance, the following function contains a function static
+  // variable.  We place a dummy function call of this inside
+  // Env::Default() to ensure the construction order of the construction
+  // order.
+  static port::Mutex* Mutex();
+
+  // Returns the member mutex of the current StaticMeta.  In general,
+  // Mutex() should be used instead of this one.  However, in case where
+  // the static variable inside Instance() goes out of scope, MemberMutex()
+  // should be used.  One example is OnThreadExit() function.
+  port::Mutex* MemberMutex() { return &mutex_; }
+
+ private:
+  // Get UnrefHandler for id with acquiring mutex
+  // REQUIRES: mutex locked
+  UnrefHandler GetHandler(uint32_t id);
+
+  // Triggered before a thread terminates
+  static void OnThreadExit(void* ptr);
+
+  // Add current thread's ThreadData to the global chain
+  // REQUIRES: mutex locked
+  void AddThreadData(ThreadData* d);
+
+  // Remove current thread's ThreadData from the global chain
+  // REQUIRES: mutex locked
+  void RemoveThreadData(ThreadData* d);
+
+  static ThreadData* GetThreadLocal();
+
+  uint32_t next_instance_id_;
+  // Used to recycle Ids in case ThreadLocalPtr is instantiated and destroyed
+  // frequently. This also prevents it from blowing up the vector space.
+  autovector<uint32_t> free_instance_ids_;
+  // Chain all thread local structure together. This is necessary since
+  // when one ThreadLocalPtr gets destroyed, we need to loop over each
+  // thread's version of pointer corresponding to that instance and
+  // call UnrefHandler for it.
+  ThreadData head_;
+
+  std::unordered_map<uint32_t, UnrefHandler> handler_map_;
+
+  // The private mutex.  Developers should always use Mutex() instead of
+  // using this variable directly.
+  port::Mutex mutex_;
+  // Thread local storage
+  static thread_local ThreadData* tls_;
+
+  // Used to make thread exit trigger possible if !defined(OS_MACOSX).
+  // Otherwise, used to retrieve thread data.
+  pthread_key_t pthread_key_;
+};
+
+thread_local ThreadData* ThreadLocalPtr::StaticMeta::tls_ = nullptr;
+
+// Windows doesn't support a per-thread destructor with its
+// TLS primitives.  So, we build it manually by inserting a
+// function to be called on each thread's exit.
+// See http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
+// and http://www.nynaeve.net/?p=183
+//
+// really we do this to have clear conscience since using TLS with thread-pools
+// is iffy
+// although OK within a request. But otherwise, threads have no identity in its
+// modern use.
+
+// This runs on windows only called from the System Loader
+#ifdef OS_WIN
+
+// Windows cleanup routine is invoked from a System Loader with a different
+// signature so we can not directly hookup the original OnThreadExit which is
+// private member
+// so we make StaticMeta class share with the us the address of the function so
+// we can invoke it.
+namespace wintlscleanup {
+
+// This is set to OnThreadExit in StaticMeta singleton constructor
+UnrefHandler thread_local_inclass_routine = nullptr;
+pthread_key_t thread_local_key = pthread_key_t(-1);
+
+// Static callback function to call with each thread termination.
+void NTAPI WinOnThreadExit(PVOID module, DWORD reason, PVOID reserved) {
+  // We decided to punt on PROCESS_EXIT
+  if (DLL_THREAD_DETACH == reason) {
+    if (thread_local_key != pthread_key_t(-1) &&
+        thread_local_inclass_routine != nullptr) {
+      void* tls = TlsGetValue(thread_local_key);
+      if (tls != nullptr) {
+        thread_local_inclass_routine(tls);
+      }
+    }
+  }
+}
+
+}  // namespace wintlscleanup
+
+// extern "C" suppresses C++ name mangling so we know the symbol name for the
+// linker /INCLUDE:symbol pragma above.
+extern "C" {
+
+#ifdef _MSC_VER
+// The linker must not discard thread_callback_on_exit.  (We force a reference
+// to this variable with a linker /include:symbol pragma to ensure that.) If
+// this variable is discarded, the OnThreadExit function will never be called.
+#ifndef _X86_
+
+// .CRT section is merged with .rdata on x64 so it must be constant data.
+#pragma const_seg(".CRT$XLB")
+// When defining a const variable, it must have external linkage to be sure the
+// linker doesn't discard it.
+extern const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit;
+const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit =
+    wintlscleanup::WinOnThreadExit;
+// Reset the default section.
+#pragma const_seg()
+
+#pragma comment(linker, "/include:_tls_used")
+#pragma comment(linker, "/include:p_thread_callback_on_exit")
+
+#else  // _X86_
+
+#pragma data_seg(".CRT$XLB")
+PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit;
+// Reset the default section.
+#pragma data_seg()
+
+#pragma comment(linker, "/INCLUDE:__tls_used")
+#pragma comment(linker, "/INCLUDE:_p_thread_callback_on_exit")
+
+#endif  // _X86_
+
+#else
+// https://github.com/couchbase/gperftools/blob/master/src/windows/port.cc
+BOOL WINAPI DllMain(HINSTANCE h, DWORD dwReason, PVOID pv) {
+  if (dwReason == DLL_THREAD_DETACH)
+    wintlscleanup::WinOnThreadExit(h, dwReason, pv);
+  return TRUE;
+}
+#endif
+}  // extern "C"
+
+#endif  // OS_WIN
+
+void ThreadLocalPtr::InitSingletons() { ThreadLocalPtr::Instance(); }
+
+ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() {
+  // Here we prefer function static variable instead of global
+  // static variable as function static variable is initialized
+  // when the function is first call.  As a result, we can properly
+  // control their construction order by properly preparing their
+  // first function call.
+  //
+  // Note that here we decide to make "inst" a static pointer w/o deleting
+  // it at the end instead of a static variable.  This is to avoid the following
+  // destruction order disaster happens when a child thread using ThreadLocalPtr
+  // dies AFTER the main thread dies:  When a child thread happens to use
+  // ThreadLocalPtr, it will try to delete its thread-local data on its
+  // OnThreadExit when the child thread dies.  However, OnThreadExit depends
+  // on the following variable.  As a result, if the main thread dies before any
+  // child thread happen to use ThreadLocalPtr dies, then the destruction of
+  // the following variable will go first, then OnThreadExit, therefore causing
+  // invalid access.
+  //
+  // The above problem can be solved by using thread_local to store tls_.
+  // thread_local supports dynamic construction and destruction of
+  // non-primitive typed variables.  As a result, we can guarantee the
+  // destruction order even when the main thread dies before any child threads.
+  static ThreadLocalPtr::StaticMeta* inst = new ThreadLocalPtr::StaticMeta();
+  return inst;
+}
+
+port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_; }
+
+void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) {
+  auto* tls = static_cast<ThreadData*>(ptr);
+  assert(tls != nullptr);
+
+  // Use the cached StaticMeta::Instance() instead of directly calling
+  // the variable inside StaticMeta::Instance() might already go out of
+  // scope here in case this OnThreadExit is called after the main thread
+  // dies.
+  auto* inst = tls->inst;
+  pthread_setspecific(inst->pthread_key_, nullptr);
+
+  MutexLock l(inst->MemberMutex());
+  inst->RemoveThreadData(tls);
+  // Unref stored pointers of current thread from all instances
+  uint32_t id = 0;
+  for (auto& e : tls->entries) {
+    void* raw = e.ptr.load();
+    if (raw != nullptr) {
+      auto unref = inst->GetHandler(id);
+      if (unref != nullptr) {
+        unref(raw);
+      }
+    }
+    ++id;
+  }
+  // Delete thread local structure no matter if it is Mac platform
+  delete tls;
+}
+
+ThreadLocalPtr::StaticMeta::StaticMeta()
+    : next_instance_id_(0), head_(this), pthread_key_(0) {
+  if (pthread_key_create(&pthread_key_, &OnThreadExit) != 0) {
+    abort();
+  }
+
+  // OnThreadExit is not getting called on the main thread.
+  // Call through the static destructor mechanism to avoid memory leak.
+  //
+  // Caveats: ~A() will be invoked _after_ ~StaticMeta for the global
+  // singleton (destructors are invoked in reverse order of constructor
+  // _completion_); the latter must not mutate internal members. This
+  // cleanup mechanism inherently relies on use-after-release of the
+  // StaticMeta, and is brittle with respect to compiler-specific handling
+  // of memory backing destructed statically-scoped objects. Perhaps
+  // registering with atexit(3) would be more robust.
+  //
+// This is not required on Windows.
+#if !defined(OS_WIN)
+  static struct A {
+    ~A() {
+      if (tls_) {
+        OnThreadExit(tls_);
+      }
+    }
+  } a;
+#endif  // !defined(OS_WIN)
+
+  head_.next = &head_;
+  head_.prev = &head_;
+
+#ifdef OS_WIN
+  // Share with Windows its cleanup routine and the key
+  wintlscleanup::thread_local_inclass_routine = OnThreadExit;
+  wintlscleanup::thread_local_key = pthread_key_;
+#endif
+}
+
+void ThreadLocalPtr::StaticMeta::AddThreadData(ThreadData* d) {
+  Mutex()->AssertHeld();
+  d->next = &head_;
+  d->prev = head_.prev;
+  head_.prev->next = d;
+  head_.prev = d;
+}
+
+void ThreadLocalPtr::StaticMeta::RemoveThreadData(ThreadData* d) {
+  Mutex()->AssertHeld();
+  d->next->prev = d->prev;
+  d->prev->next = d->next;
+  d->next = d->prev = d;
+}
+
+ThreadData* ThreadLocalPtr::StaticMeta::GetThreadLocal() {
+  if (UNLIKELY(tls_ == nullptr)) {
+    auto* inst = Instance();
+    tls_ = new ThreadData(inst);
+    {
+      // Register it in the global chain, needs to be done before thread exit
+      // handler registration
+      MutexLock l(Mutex());
+      inst->AddThreadData(tls_);
+    }
+    // Even it is not OS_MACOSX, need to register value for pthread_key_ so that
+    // its exit handler will be triggered.
+    if (pthread_setspecific(inst->pthread_key_, tls_) != 0) {
+      {
+        MutexLock l(Mutex());
+        inst->RemoveThreadData(tls_);
+      }
+      delete tls_;
+      abort();
+    }
+  }
+  return tls_;
+}
+
+void* ThreadLocalPtr::StaticMeta::Get(uint32_t id) const {
+  auto* tls = GetThreadLocal();
+  if (UNLIKELY(id >= tls->entries.size())) {
+    return nullptr;
+  }
+  return tls->entries[id].ptr.load(std::memory_order_acquire);
+}
+
+void ThreadLocalPtr::StaticMeta::Reset(uint32_t id, void* ptr) {
+  auto* tls = GetThreadLocal();
+  if (UNLIKELY(id >= tls->entries.size())) {
+    // Need mutex to protect entries access within ReclaimId
+    MutexLock l(Mutex());
+    tls->entries.resize(id + 1);
+  }
+  tls->entries[id].ptr.store(ptr, std::memory_order_release);
+}
+
+void* ThreadLocalPtr::StaticMeta::Swap(uint32_t id, void* ptr) {
+  auto* tls = GetThreadLocal();
+  if (UNLIKELY(id >= tls->entries.size())) {
+    // Need mutex to protect entries access within ReclaimId
+    MutexLock l(Mutex());
+    tls->entries.resize(id + 1);
+  }
+  return tls->entries[id].ptr.exchange(ptr, std::memory_order_acquire);
+}
+
+bool ThreadLocalPtr::StaticMeta::CompareAndSwap(uint32_t id, void* ptr,
+                                                void*& expected) {
+  auto* tls = GetThreadLocal();
+  if (UNLIKELY(id >= tls->entries.size())) {
+    // Need mutex to protect entries access within ReclaimId
+    MutexLock l(Mutex());
+    tls->entries.resize(id + 1);
+  }
+  return tls->entries[id].ptr.compare_exchange_strong(
+      expected, ptr, std::memory_order_release, std::memory_order_relaxed);
+}
+
+void ThreadLocalPtr::StaticMeta::Scrape(uint32_t id, autovector<void*>* ptrs,
+                                        void* const replacement) {
+  MutexLock l(Mutex());
+  for (ThreadData* t = head_.next; t != &head_; t = t->next) {
+    if (id < t->entries.size()) {
+      void* ptr =
+          t->entries[id].ptr.exchange(replacement, std::memory_order_acquire);
+      if (ptr != nullptr) {
+        ptrs->push_back(ptr);
+      }
+    }
+  }
+}
+
+void ThreadLocalPtr::StaticMeta::Fold(uint32_t id, FoldFunc func, void* res) {
+  MutexLock l(Mutex());
+  for (ThreadData* t = head_.next; t != &head_; t = t->next) {
+    if (id < t->entries.size()) {
+      void* ptr = t->entries[id].ptr.load();
+      if (ptr != nullptr) {
+        func(ptr, res);
+      }
+    }
+  }
+}
+
+uint32_t ThreadLocalPtr::TEST_PeekId() { return Instance()->PeekId(); }
+
+void ThreadLocalPtr::StaticMeta::SetHandler(uint32_t id, UnrefHandler handler) {
+  MutexLock l(Mutex());
+  handler_map_[id] = handler;
+}
+
+UnrefHandler ThreadLocalPtr::StaticMeta::GetHandler(uint32_t id) {
+  Mutex()->AssertHeld();
+  auto iter = handler_map_.find(id);
+  if (iter == handler_map_.end()) {
+    return nullptr;
+  }
+  return iter->second;
+}
+
+uint32_t ThreadLocalPtr::StaticMeta::GetId() {
+  MutexLock l(Mutex());
+  if (free_instance_ids_.empty()) {
+    return next_instance_id_++;
+  }
+
+  uint32_t id = free_instance_ids_.back();
+  free_instance_ids_.pop_back();
+  return id;
+}
+
+uint32_t ThreadLocalPtr::StaticMeta::PeekId() const {
+  MutexLock l(Mutex());
+  if (!free_instance_ids_.empty()) {
+    return free_instance_ids_.back();
+  }
+  return next_instance_id_;
+}
+
+void ThreadLocalPtr::StaticMeta::ReclaimId(uint32_t id) {
+  // This id is not used, go through all thread local data and release
+  // corresponding value
+  MutexLock l(Mutex());
+  auto unref = GetHandler(id);
+  for (ThreadData* t = head_.next; t != &head_; t = t->next) {
+    if (id < t->entries.size()) {
+      void* ptr = t->entries[id].ptr.exchange(nullptr);
+      if (ptr != nullptr && unref != nullptr) {
+        unref(ptr);
+      }
+    }
+  }
+  handler_map_[id] = nullptr;
+  free_instance_ids_.push_back(id);
+}
+
+ThreadLocalPtr::ThreadLocalPtr(UnrefHandler handler)
+    : id_(Instance()->GetId()) {
+  if (handler != nullptr) {
+    Instance()->SetHandler(id_, handler);
+  }
+}
+
+ThreadLocalPtr::~ThreadLocalPtr() { Instance()->ReclaimId(id_); }
+
+void* ThreadLocalPtr::Get() const { return Instance()->Get(id_); }
+
+void ThreadLocalPtr::Reset(void* ptr) { Instance()->Reset(id_, ptr); }
+
+void* ThreadLocalPtr::Swap(void* ptr) { return Instance()->Swap(id_, ptr); }
+
+bool ThreadLocalPtr::CompareAndSwap(void* ptr, void*& expected) {
+  return Instance()->CompareAndSwap(id_, ptr, expected);
+}
+
+void ThreadLocalPtr::Scrape(autovector<void*>* ptrs, void* const replacement) {
+  Instance()->Scrape(id_, ptrs, replacement);
+}
+
+void ThreadLocalPtr::Fold(FoldFunc func, void* res) {
+  Instance()->Fold(id_, func, res);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_local.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_local.h
new file mode 100644
index 0000000000..fde68f86fb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_local.h
@@ -0,0 +1,100 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <atomic>
+#include <functional>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "port/port.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Cleanup function that will be called for a stored thread local
+// pointer (if not NULL) when one of the following happens:
+// (1) a thread terminates
+// (2) a ThreadLocalPtr is destroyed
+//
+// Warning: this function is called while holding a global mutex. The same mutex
+// is used (at least in some cases) by most methods of ThreadLocalPtr, and it's
+// shared across all instances of ThreadLocalPtr. Thereforere extra care
+// is needed to avoid deadlocks. In particular, the handler shouldn't lock any
+// mutexes and shouldn't call any methods of any ThreadLocalPtr instances,
+// unless you know what you're doing.
+using UnrefHandler = void (*)(void* ptr);
+
+// ThreadLocalPtr stores only values of pointer type.  Different from
+// the usual thread-local-storage, ThreadLocalPtr has the ability to
+// distinguish data coming from different threads and different
+// ThreadLocalPtr instances.  For example, if a regular thread_local
+// variable A is declared in DBImpl, two DBImpl objects would share
+// the same A.  However, a ThreadLocalPtr that is defined under the
+// scope of DBImpl can avoid such confliction.  As a result, its memory
+// usage would be O(# of threads * # of ThreadLocalPtr instances).
+class ThreadLocalPtr {
+ public:
+  explicit ThreadLocalPtr(UnrefHandler handler = nullptr);
+
+  ThreadLocalPtr(const ThreadLocalPtr&) = delete;
+  ThreadLocalPtr& operator=(const ThreadLocalPtr&) = delete;
+
+  ~ThreadLocalPtr();
+
+  // Return the current pointer stored in thread local
+  void* Get() const;
+
+  // Set a new pointer value to the thread local storage.
+  void Reset(void* ptr);
+
+  // Atomically swap the supplied ptr and return the previous value
+  void* Swap(void* ptr);
+
+  // Atomically compare the stored value with expected. Set the new
+  // pointer value to thread local only if the comparison is true.
+  // Otherwise, expected returns the stored value.
+  // Return true on success, false on failure
+  bool CompareAndSwap(void* ptr, void*& expected);
+
+  // Reset all thread local data to replacement, and return non-nullptr
+  // data for all existing threads
+  void Scrape(autovector<void*>* ptrs, void* const replacement);
+
+  using FoldFunc = std::function<void(void*, void*)>;
+  // Update res by applying func on each thread-local value. Holds a lock that
+  // prevents unref handler from running during this call, but clients must
+  // still provide external synchronization since the owning thread can
+  // access the values without internal locking, e.g., via Get() and Reset().
+  void Fold(FoldFunc func, void* res);
+
+  // Add here for testing
+  // Return the next available Id without claiming it
+  static uint32_t TEST_PeekId();
+
+  // Initialize the static singletons of the ThreadLocalPtr.
+  //
+  // If this function is not called, then the singletons will be
+  // automatically initialized when they are used.
+  //
+  // Calling this function twice or after the singletons have been
+  // initialized will be no-op.
+  static void InitSingletons();
+
+  class StaticMeta;
+
+ private:
+  static StaticMeta* Instance();
+
+  const uint32_t id_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_operation.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_operation.h
new file mode 100644
index 0000000000..b6c1062796
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/thread_operation.h
@@ -0,0 +1,113 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file defines the structures for thread operation and state.
+// Thread operations are used to describe high level action of a
+// thread such as doing compaction or flush, while thread state
+// are used to describe lower-level action such as reading /
+// writing a file or waiting for a mutex.  Operations and states
+// are designed to be independent.  Typically, a thread usually involves
+// in one operation and one state at any specific point in time.
+
+#pragma once
+
+#include <string>
+
+#include "rocksdb/thread_status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+#ifdef ROCKSDB_USING_THREAD_STATUS
+
+// The structure that describes a major thread operation.
+struct OperationInfo {
+  const ThreadStatus::OperationType type;
+  const std::string name;
+};
+
+// The global operation table.
+//
+// When updating a status of a thread, the pointer of the OperationInfo
+// of the current ThreadStatusData will be pointing to one of the
+// rows in this global table.
+//
+// Note that it's not designed to be constant as in the future we
+// might consider adding global count to the OperationInfo.
+static OperationInfo global_operation_table[] = {
+    {ThreadStatus::OP_UNKNOWN, ""},
+    {ThreadStatus::OP_COMPACTION, "Compaction"},
+    {ThreadStatus::OP_FLUSH, "Flush"},
+    {ThreadStatus::OP_DBOPEN, "DBOpen"}};
+
+struct OperationStageInfo {
+  const ThreadStatus::OperationStage stage;
+  const std::string name;
+};
+
+// A table maintains the mapping from stage type to stage string.
+// Note that the string must be changed accordingly when the
+// associated function name changed.
+static OperationStageInfo global_op_stage_table[] = {
+    {ThreadStatus::STAGE_UNKNOWN, ""},
+    {ThreadStatus::STAGE_FLUSH_RUN, "FlushJob::Run"},
+    {ThreadStatus::STAGE_FLUSH_WRITE_L0, "FlushJob::WriteLevel0Table"},
+    {ThreadStatus::STAGE_COMPACTION_PREPARE, "CompactionJob::Prepare"},
+    {ThreadStatus::STAGE_COMPACTION_RUN, "CompactionJob::Run"},
+    {ThreadStatus::STAGE_COMPACTION_PROCESS_KV,
+     "CompactionJob::ProcessKeyValueCompaction"},
+    {ThreadStatus::STAGE_COMPACTION_INSTALL, "CompactionJob::Install"},
+    {ThreadStatus::STAGE_COMPACTION_SYNC_FILE,
+     "CompactionJob::FinishCompactionOutputFile"},
+    {ThreadStatus::STAGE_PICK_MEMTABLES_TO_FLUSH,
+     "MemTableList::PickMemtablesToFlush"},
+    {ThreadStatus::STAGE_MEMTABLE_ROLLBACK,
+     "MemTableList::RollbackMemtableFlush"},
+    {ThreadStatus::STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS,
+     "MemTableList::TryInstallMemtableFlushResults"},
+};
+
+// The structure that describes a state.
+struct StateInfo {
+  const ThreadStatus::StateType type;
+  const std::string name;
+};
+
+// The global state table.
+//
+// When updating a status of a thread, the pointer of the StateInfo
+// of the current ThreadStatusData will be pointing to one of the
+// rows in this global table.
+static StateInfo global_state_table[] = {
+    {ThreadStatus::STATE_UNKNOWN, ""},
+    {ThreadStatus::STATE_MUTEX_WAIT, "Mutex Wait"},
+};
+
+struct OperationProperty {
+  int code;
+  std::string name;
+};
+
+static OperationProperty compaction_operation_properties[] = {
+    {ThreadStatus::COMPACTION_JOB_ID, "JobID"},
+    {ThreadStatus::COMPACTION_INPUT_OUTPUT_LEVEL, "InputOutputLevel"},
+    {ThreadStatus::COMPACTION_PROP_FLAGS, "Manual/Deletion/Trivial"},
+    {ThreadStatus::COMPACTION_TOTAL_INPUT_BYTES, "TotalInputBytes"},
+    {ThreadStatus::COMPACTION_BYTES_READ, "BytesRead"},
+    {ThreadStatus::COMPACTION_BYTES_WRITTEN, "BytesWritten"},
+};
+
+static OperationProperty flush_operation_properties[] = {
+    {ThreadStatus::FLUSH_JOB_ID, "JobID"},
+    {ThreadStatus::FLUSH_BYTES_MEMTABLES, "BytesMemtables"},
+    {ThreadStatus::FLUSH_BYTES_WRITTEN, "BytesWritten"}};
+
+#else
+
+struct OperationInfo {};
+
+struct StateInfo {};
+
+#endif  // ROCKSDB_USING_THREAD_STATUS
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/threadpool_imp.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/threadpool_imp.cc
new file mode 100644
index 0000000000..09706cac57
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/threadpool_imp.cc
@@ -0,0 +1,551 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/threadpool_imp.h"
+
+#ifndef OS_WIN
+#include <unistd.h>
+#endif
+
+#ifdef OS_LINUX
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#endif
+
+#include <stdlib.h>
+
+#include <algorithm>
+#include <atomic>
+#include <condition_variable>
+#include <deque>
+#include <mutex>
+#include <sstream>
+#include <thread>
+#include <vector>
+
+#include "monitoring/thread_status_util.h"
+#include "port/port.h"
+#include "test_util/sync_point.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void ThreadPoolImpl::PthreadCall(const char* label, int result) {
+  if (result != 0) {
+    fprintf(stderr, "pthread %s: %s\n", label, errnoStr(result).c_str());
+    abort();
+  }
+}
+
+struct ThreadPoolImpl::Impl {
+  Impl();
+  ~Impl();
+
+  void JoinThreads(bool wait_for_jobs_to_complete);
+
+  void SetBackgroundThreadsInternal(int num, bool allow_reduce);
+  int GetBackgroundThreads();
+
+  unsigned int GetQueueLen() const {
+    return queue_len_.load(std::memory_order_relaxed);
+  }
+
+  void LowerIOPriority();
+
+  void LowerCPUPriority(CpuPriority pri);
+
+  void WakeUpAllThreads() { bgsignal_.notify_all(); }
+
+  void BGThread(size_t thread_id);
+
+  void StartBGThreads();
+
+  void Submit(std::function<void()>&& schedule,
+              std::function<void()>&& unschedule, void* tag);
+
+  int UnSchedule(void* arg);
+
+  void SetHostEnv(Env* env) { env_ = env; }
+
+  Env* GetHostEnv() const { return env_; }
+
+  bool HasExcessiveThread() const {
+    return static_cast<int>(bgthreads_.size()) > total_threads_limit_;
+  }
+
+  // Return true iff the current thread is the excessive thread to terminate.
+  // Always terminate the running thread that is added last, even if there are
+  // more than one thread to terminate.
+  bool IsLastExcessiveThread(size_t thread_id) const {
+    return HasExcessiveThread() && thread_id == bgthreads_.size() - 1;
+  }
+
+  bool IsExcessiveThread(size_t thread_id) const {
+    return static_cast<int>(thread_id) >= total_threads_limit_;
+  }
+
+  // Return the thread priority.
+  // This would allow its member-thread to know its priority.
+  Env::Priority GetThreadPriority() const { return priority_; }
+
+  // Set the thread priority.
+  void SetThreadPriority(Env::Priority priority) { priority_ = priority; }
+
+  int ReserveThreads(int threads_to_be_reserved) {
+    std::unique_lock<std::mutex> lock(mu_);
+    // We can reserve at most num_waiting_threads_ in total so the number of
+    // threads that can be reserved might be fewer than the desired one. In
+    // rare cases, num_waiting_threads_ could be less than reserved_threads
+    // due to SetBackgroundThreadInternal or last excessive threads. If that
+    // happens, we cannot reserve any other threads.
+    int reserved_threads_in_success =
+        std::min(std::max(num_waiting_threads_ - reserved_threads_, 0),
+                 threads_to_be_reserved);
+    reserved_threads_ += reserved_threads_in_success;
+    return reserved_threads_in_success;
+  }
+
+  int ReleaseThreads(int threads_to_be_released) {
+    std::unique_lock<std::mutex> lock(mu_);
+    // We cannot release more than reserved_threads_
+    int released_threads_in_success =
+        std::min(reserved_threads_, threads_to_be_released);
+    reserved_threads_ -= released_threads_in_success;
+    WakeUpAllThreads();
+    return released_threads_in_success;
+  }
+
+ private:
+  static void BGThreadWrapper(void* arg);
+
+  bool low_io_priority_;
+  CpuPriority cpu_priority_;
+  Env::Priority priority_;
+  Env* env_;
+
+  int total_threads_limit_;
+  std::atomic_uint queue_len_;  // Queue length. Used for stats reporting
+  // Number of reserved threads, managed by ReserveThreads(..) and
+  // ReleaseThreads(..), if num_waiting_threads_ is no larger than
+  // reserved_threads_, its thread will be blocked to ensure the reservation
+  // mechanism
+  int reserved_threads_;
+  // Number of waiting threads (Maximum number of threads that can be
+  // reserved), in rare cases, num_waiting_threads_ could be less than
+  // reserved_threads due to SetBackgroundThreadInternal or last
+  // excessive threads.
+  int num_waiting_threads_;
+  bool exit_all_threads_;
+  bool wait_for_jobs_to_complete_;
+
+  // Entry per Schedule()/Submit() call
+  struct BGItem {
+    void* tag = nullptr;
+    std::function<void()> function;
+    std::function<void()> unschedFunction;
+  };
+
+  using BGQueue = std::deque<BGItem>;
+  BGQueue queue_;
+
+  std::mutex mu_;
+  std::condition_variable bgsignal_;
+  std::vector<port::Thread> bgthreads_;
+};
+
+inline ThreadPoolImpl::Impl::Impl()
+    : low_io_priority_(false),
+      cpu_priority_(CpuPriority::kNormal),
+      priority_(Env::LOW),
+      env_(nullptr),
+      total_threads_limit_(0),
+      queue_len_(),
+      reserved_threads_(0),
+      num_waiting_threads_(0),
+      exit_all_threads_(false),
+      wait_for_jobs_to_complete_(false),
+      queue_(),
+      mu_(),
+      bgsignal_(),
+      bgthreads_() {}
+
+inline ThreadPoolImpl::Impl::~Impl() { assert(bgthreads_.size() == 0U); }
+
+void ThreadPoolImpl::Impl::JoinThreads(bool wait_for_jobs_to_complete) {
+  std::unique_lock<std::mutex> lock(mu_);
+  assert(!exit_all_threads_);
+
+  wait_for_jobs_to_complete_ = wait_for_jobs_to_complete;
+  exit_all_threads_ = true;
+  // prevent threads from being recreated right after they're joined, in case
+  // the user is concurrently submitting jobs.
+  total_threads_limit_ = 0;
+  reserved_threads_ = 0;
+  num_waiting_threads_ = 0;
+
+  lock.unlock();
+
+  bgsignal_.notify_all();
+
+  for (auto& th : bgthreads_) {
+    th.join();
+  }
+
+  bgthreads_.clear();
+
+  exit_all_threads_ = false;
+  wait_for_jobs_to_complete_ = false;
+}
+
+inline void ThreadPoolImpl::Impl::LowerIOPriority() {
+  std::lock_guard<std::mutex> lock(mu_);
+  low_io_priority_ = true;
+}
+
+inline void ThreadPoolImpl::Impl::LowerCPUPriority(CpuPriority pri) {
+  std::lock_guard<std::mutex> lock(mu_);
+  cpu_priority_ = pri;
+}
+
+void ThreadPoolImpl::Impl::BGThread(size_t thread_id) {
+  bool low_io_priority = false;
+  CpuPriority current_cpu_priority = CpuPriority::kNormal;
+
+  while (true) {
+    // Wait until there is an item that is ready to run
+    std::unique_lock<std::mutex> lock(mu_);
+    // Stop waiting if the thread needs to do work or needs to terminate.
+    // Increase num_waiting_threads_ once this task has started waiting
+    num_waiting_threads_++;
+
+    TEST_SYNC_POINT("ThreadPoolImpl::BGThread::WaitingThreadsInc");
+    TEST_IDX_SYNC_POINT("ThreadPoolImpl::BGThread::Start:th", thread_id);
+    // When not exist_all_threads and the current thread id is not the last
+    // excessive thread, it may be blocked due to 3 reasons: 1) queue is empty
+    // 2) it is the excessive thread (not the last one)
+    // 3) the number of waiting threads is not greater than reserved threads
+    // (i.e, no available threads due to full reservation")
+    while (!exit_all_threads_ && !IsLastExcessiveThread(thread_id) &&
+           (queue_.empty() || IsExcessiveThread(thread_id) ||
+            num_waiting_threads_ <= reserved_threads_)) {
+      bgsignal_.wait(lock);
+    }
+    // Decrease num_waiting_threads_ once the thread is not waiting
+    num_waiting_threads_--;
+
+    if (exit_all_threads_) {  // mechanism to let BG threads exit safely
+
+      if (!wait_for_jobs_to_complete_ || queue_.empty()) {
+        break;
+      }
+    } else if (IsLastExcessiveThread(thread_id)) {
+      // Current thread is the last generated one and is excessive.
+      // We always terminate excessive thread in the reverse order of
+      // generation time. But not when `exit_all_threads_ == true`,
+      // otherwise `JoinThreads()` could try to `join()` a `detach()`ed
+      // thread.
+      auto& terminating_thread = bgthreads_.back();
+      terminating_thread.detach();
+      bgthreads_.pop_back();
+      if (HasExcessiveThread()) {
+        // There is still at least more excessive thread to terminate.
+        WakeUpAllThreads();
+      }
+      TEST_IDX_SYNC_POINT("ThreadPoolImpl::BGThread::Termination:th",
+                          thread_id);
+      TEST_SYNC_POINT("ThreadPoolImpl::BGThread::Termination");
+      break;
+    }
+
+    auto func = std::move(queue_.front().function);
+    queue_.pop_front();
+
+    queue_len_.store(static_cast<unsigned int>(queue_.size()),
+                     std::memory_order_relaxed);
+
+    bool decrease_io_priority = (low_io_priority != low_io_priority_);
+    CpuPriority cpu_priority = cpu_priority_;
+    lock.unlock();
+
+    if (cpu_priority < current_cpu_priority) {
+      TEST_SYNC_POINT_CALLBACK("ThreadPoolImpl::BGThread::BeforeSetCpuPriority",
+                               &current_cpu_priority);
+      // 0 means current thread.
+      port::SetCpuPriority(0, cpu_priority);
+      current_cpu_priority = cpu_priority;
+      TEST_SYNC_POINT_CALLBACK("ThreadPoolImpl::BGThread::AfterSetCpuPriority",
+                               &current_cpu_priority);
+    }
+
+#ifdef OS_LINUX
+    if (decrease_io_priority) {
+#define IOPRIO_CLASS_SHIFT (13)
+#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
+      // Put schedule into IOPRIO_CLASS_IDLE class (lowest)
+      // These system calls only have an effect when used in conjunction
+      // with an I/O scheduler that supports I/O priorities. As at
+      // kernel 2.6.17 the only such scheduler is the Completely
+      // Fair Queuing (CFQ) I/O scheduler.
+      // To change scheduler:
+      //  echo cfq > /sys/block/<device_name>/queue/schedule
+      // Tunables to consider:
+      //  /sys/block/<device_name>/queue/slice_idle
+      //  /sys/block/<device_name>/queue/slice_sync
+      syscall(SYS_ioprio_set, 1,  // IOPRIO_WHO_PROCESS
+              0,                  // current thread
+              IOPRIO_PRIO_VALUE(3, 0));
+      low_io_priority = true;
+    }
+#else
+    (void)decrease_io_priority;  // avoid 'unused variable' error
+#endif
+
+    TEST_SYNC_POINT_CALLBACK("ThreadPoolImpl::Impl::BGThread:BeforeRun",
+                             &priority_);
+
+    func();
+  }
+}
+
+// Helper struct for passing arguments when creating threads.
+struct BGThreadMetadata {
+  ThreadPoolImpl::Impl* thread_pool_;
+  size_t thread_id_;  // Thread count in the thread.
+  BGThreadMetadata(ThreadPoolImpl::Impl* thread_pool, size_t thread_id)
+      : thread_pool_(thread_pool), thread_id_(thread_id) {}
+};
+
+void ThreadPoolImpl::Impl::BGThreadWrapper(void* arg) {
+  BGThreadMetadata* meta = reinterpret_cast<BGThreadMetadata*>(arg);
+  size_t thread_id = meta->thread_id_;
+  ThreadPoolImpl::Impl* tp = meta->thread_pool_;
+#ifdef ROCKSDB_USING_THREAD_STATUS
+  // initialize it because compiler isn't good enough to see we don't use it
+  // uninitialized
+  ThreadStatus::ThreadType thread_type = ThreadStatus::NUM_THREAD_TYPES;
+  switch (tp->GetThreadPriority()) {
+    case Env::Priority::HIGH:
+      thread_type = ThreadStatus::HIGH_PRIORITY;
+      break;
+    case Env::Priority::LOW:
+      thread_type = ThreadStatus::LOW_PRIORITY;
+      break;
+    case Env::Priority::BOTTOM:
+      thread_type = ThreadStatus::BOTTOM_PRIORITY;
+      break;
+    case Env::Priority::USER:
+      thread_type = ThreadStatus::USER;
+      break;
+    case Env::Priority::TOTAL:
+      assert(false);
+      return;
+  }
+  assert(thread_type != ThreadStatus::NUM_THREAD_TYPES);
+  ThreadStatusUtil::RegisterThread(tp->GetHostEnv(), thread_type);
+#endif
+  delete meta;
+  tp->BGThread(thread_id);
+#ifdef ROCKSDB_USING_THREAD_STATUS
+  ThreadStatusUtil::UnregisterThread();
+#endif
+  return;
+}
+
+void ThreadPoolImpl::Impl::SetBackgroundThreadsInternal(int num,
+                                                        bool allow_reduce) {
+  std::lock_guard<std::mutex> lock(mu_);
+  if (exit_all_threads_) {
+    return;
+  }
+  if (num > total_threads_limit_ ||
+      (num < total_threads_limit_ && allow_reduce)) {
+    total_threads_limit_ = std::max(0, num);
+    WakeUpAllThreads();
+    StartBGThreads();
+  }
+}
+
+int ThreadPoolImpl::Impl::GetBackgroundThreads() {
+  std::unique_lock<std::mutex> lock(mu_);
+  return total_threads_limit_;
+}
+
+void ThreadPoolImpl::Impl::StartBGThreads() {
+  // Start background thread if necessary
+  while ((int)bgthreads_.size() < total_threads_limit_) {
+    port::Thread p_t(&BGThreadWrapper,
+                     new BGThreadMetadata(this, bgthreads_.size()));
+
+// Set the thread name to aid debugging
+#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 12)
+    auto th_handle = p_t.native_handle();
+    std::string thread_priority = Env::PriorityToString(GetThreadPriority());
+    std::ostringstream thread_name_stream;
+    thread_name_stream << "rocksdb:";
+    for (char c : thread_priority) {
+      thread_name_stream << static_cast<char>(tolower(c));
+    }
+    pthread_setname_np(th_handle, thread_name_stream.str().c_str());
+#endif
+#endif
+    bgthreads_.push_back(std::move(p_t));
+  }
+}
+
+void ThreadPoolImpl::Impl::Submit(std::function<void()>&& schedule,
+                                  std::function<void()>&& unschedule,
+                                  void* tag) {
+  std::lock_guard<std::mutex> lock(mu_);
+
+  if (exit_all_threads_) {
+    return;
+  }
+
+  StartBGThreads();
+
+  // Add to priority queue
+  queue_.push_back(BGItem());
+  TEST_SYNC_POINT("ThreadPoolImpl::Submit::Enqueue");
+  auto& item = queue_.back();
+  item.tag = tag;
+  item.function = std::move(schedule);
+  item.unschedFunction = std::move(unschedule);
+
+  queue_len_.store(static_cast<unsigned int>(queue_.size()),
+                   std::memory_order_relaxed);
+
+  if (!HasExcessiveThread()) {
+    // Wake up at least one waiting thread.
+    bgsignal_.notify_one();
+  } else {
+    // Need to wake up all threads to make sure the one woken
+    // up is not the one to terminate.
+    WakeUpAllThreads();
+  }
+}
+
+int ThreadPoolImpl::Impl::UnSchedule(void* arg) {
+  int count = 0;
+
+  std::vector<std::function<void()>> candidates;
+  {
+    std::lock_guard<std::mutex> lock(mu_);
+
+    // Remove from priority queue
+    BGQueue::iterator it = queue_.begin();
+    while (it != queue_.end()) {
+      if (arg == (*it).tag) {
+        if (it->unschedFunction) {
+          candidates.push_back(std::move(it->unschedFunction));
+        }
+        it = queue_.erase(it);
+        count++;
+      } else {
+        ++it;
+      }
+    }
+    queue_len_.store(static_cast<unsigned int>(queue_.size()),
+                     std::memory_order_relaxed);
+  }
+
+  // Run unschedule functions outside the mutex
+  for (auto& f : candidates) {
+    f();
+  }
+
+  return count;
+}
+
+ThreadPoolImpl::ThreadPoolImpl() : impl_(new Impl()) {}
+
+ThreadPoolImpl::~ThreadPoolImpl() {}
+
+void ThreadPoolImpl::JoinAllThreads() { impl_->JoinThreads(false); }
+
+void ThreadPoolImpl::SetBackgroundThreads(int num) {
+  impl_->SetBackgroundThreadsInternal(num, true);
+}
+
+int ThreadPoolImpl::GetBackgroundThreads() {
+  return impl_->GetBackgroundThreads();
+}
+
+unsigned int ThreadPoolImpl::GetQueueLen() const {
+  return impl_->GetQueueLen();
+}
+
+void ThreadPoolImpl::WaitForJobsAndJoinAllThreads() {
+  impl_->JoinThreads(true);
+}
+
+void ThreadPoolImpl::LowerIOPriority() { impl_->LowerIOPriority(); }
+
+void ThreadPoolImpl::LowerCPUPriority(CpuPriority pri) {
+  impl_->LowerCPUPriority(pri);
+}
+
+void ThreadPoolImpl::IncBackgroundThreadsIfNeeded(int num) {
+  impl_->SetBackgroundThreadsInternal(num, false);
+}
+
+void ThreadPoolImpl::SubmitJob(const std::function<void()>& job) {
+  auto copy(job);
+  impl_->Submit(std::move(copy), std::function<void()>(), nullptr);
+}
+
+void ThreadPoolImpl::SubmitJob(std::function<void()>&& job) {
+  impl_->Submit(std::move(job), std::function<void()>(), nullptr);
+}
+
+void ThreadPoolImpl::Schedule(void (*function)(void* arg1), void* arg,
+                              void* tag, void (*unschedFunction)(void* arg)) {
+  if (unschedFunction == nullptr) {
+    impl_->Submit(std::bind(function, arg), std::function<void()>(), tag);
+  } else {
+    impl_->Submit(std::bind(function, arg), std::bind(unschedFunction, arg),
+                  tag);
+  }
+}
+
+int ThreadPoolImpl::UnSchedule(void* arg) { return impl_->UnSchedule(arg); }
+
+void ThreadPoolImpl::SetHostEnv(Env* env) { impl_->SetHostEnv(env); }
+
+Env* ThreadPoolImpl::GetHostEnv() const { return impl_->GetHostEnv(); }
+
+// Return the thread priority.
+// This would allow its member-thread to know its priority.
+Env::Priority ThreadPoolImpl::GetThreadPriority() const {
+  return impl_->GetThreadPriority();
+}
+
+// Set the thread priority.
+void ThreadPoolImpl::SetThreadPriority(Env::Priority priority) {
+  impl_->SetThreadPriority(priority);
+}
+
+// Reserve a specific number of threads, prevent them from running other
+// functions The number of reserved threads could be fewer than the desired one
+int ThreadPoolImpl::ReserveThreads(int threads_to_be_reserved) {
+  return impl_->ReserveThreads(threads_to_be_reserved);
+}
+
+// Release a specific number of threads
+int ThreadPoolImpl::ReleaseThreads(int threads_to_be_released) {
+  return impl_->ReleaseThreads(threads_to_be_released);
+}
+
+ThreadPool* NewThreadPool(int num_threads) {
+  ThreadPoolImpl* thread_pool = new ThreadPoolImpl();
+  thread_pool->SetBackgroundThreads(num_threads);
+  return thread_pool;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/threadpool_imp.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/threadpool_imp.h
new file mode 100644
index 0000000000..a5109e38f5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/threadpool_imp.h
@@ -0,0 +1,120 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <functional>
+#include <memory>
+
+#include "rocksdb/env.h"
+#include "rocksdb/threadpool.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class ThreadPoolImpl : public ThreadPool {
+ public:
+  ThreadPoolImpl();
+  ~ThreadPoolImpl();
+
+  ThreadPoolImpl(ThreadPoolImpl&&) = delete;
+  ThreadPoolImpl& operator=(ThreadPoolImpl&&) = delete;
+
+  // Implement ThreadPool interfaces
+
+  // Wait for all threads to finish.
+  // Discards all the jobs that did not
+  // start executing and waits for those running
+  // to complete
+  void JoinAllThreads() override;
+
+  // Set the number of background threads that will be executing the
+  // scheduled jobs.
+  void SetBackgroundThreads(int num) override;
+  int GetBackgroundThreads() override;
+
+  // Get the number of jobs scheduled in the ThreadPool queue.
+  unsigned int GetQueueLen() const override;
+
+  // Waits for all jobs to complete those
+  // that already started running and those that did not
+  // start yet
+  void WaitForJobsAndJoinAllThreads() override;
+
+  // Make threads to run at a lower kernel IO priority
+  // Currently only has effect on Linux
+  void LowerIOPriority();
+
+  // Make threads to run at a lower kernel CPU priority
+  // Currently only has effect on Linux
+  void LowerCPUPriority(CpuPriority pri);
+
+  // Ensure there is at aleast num threads in the pool
+  // but do not kill threads if there are more
+  void IncBackgroundThreadsIfNeeded(int num);
+
+  // Submit a fire and forget job
+  // These jobs can not be unscheduled
+
+  // This allows to submit the same job multiple times
+  void SubmitJob(const std::function<void()>&) override;
+  // This moves the function in for efficiency
+  void SubmitJob(std::function<void()>&&) override;
+
+  // Schedule a job with an unschedule tag and unschedule function
+  // Can be used to filter and unschedule jobs by a tag
+  // that are still in the queue and did not start running
+  void Schedule(void (*function)(void* arg1), void* arg, void* tag,
+                void (*unschedFunction)(void* arg));
+
+  // Filter jobs that are still in a queue and match
+  // the given tag. Remove them from a queue if any
+  // and for each such job execute an unschedule function
+  // if such was given at scheduling time.
+  int UnSchedule(void* tag);
+
+  void SetHostEnv(Env* env);
+
+  Env* GetHostEnv() const;
+
+  // Return the thread priority.
+  // This would allow its member-thread to know its priority.
+  Env::Priority GetThreadPriority() const;
+
+  // Set the thread priority.
+  void SetThreadPriority(Env::Priority priority);
+
+  // Reserve a specific number of threads, prevent them from running other
+  // functions The number of reserved threads could be fewer than the desired
+  // one
+  int ReserveThreads(int threads_to_be_reserved) override;
+
+  // Release a specific number of threads
+  int ReleaseThreads(int threads_to_be_released) override;
+
+  static void PthreadCall(const char* label, int result);
+
+  struct Impl;
+
+ private:
+  // Current public virtual interface does not provide usable
+  // functionality and thus can not be used internally to
+  // facade different implementations.
+  //
+  // We propose a pimpl idiom in order to easily replace the thread pool impl
+  // w/o touching the header file but providing a different .cc potentially
+  // CMake option driven.
+  //
+  // Another option is to introduce a Env::MakeThreadPool() virtual interface
+  // and override the environment. This would require refactoring ThreadPool
+  // usage.
+  //
+  // We can also combine these two approaches
+  std::unique_ptr<Impl> impl_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/timer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/timer.h
new file mode 100644
index 0000000000..db71cefaf8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/timer.h
@@ -0,0 +1,340 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <queue>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "monitoring/instrumented_mutex.h"
+#include "rocksdb/system_clock.h"
+#include "test_util/sync_point.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A Timer class to handle repeated work.
+//
+// `Start()` and `Shutdown()` are currently not thread-safe. The client must
+// serialize calls to these two member functions.
+//
+// A single timer instance can handle multiple functions via a single thread.
+// It is better to leave long running work to a dedicated thread pool.
+//
+// Timer can be started by calling `Start()`, and ended by calling `Shutdown()`.
+// Work (in terms of a `void function`) can be scheduled by calling `Add` with
+// a unique function name and de-scheduled by calling `Cancel`.
+// Many functions can be added.
+//
+// Impl Details:
+// A heap is used to keep track of when the next timer goes off.
+// A map from a function name to the function keeps track of all the functions.
+class Timer {
+ public:
+  explicit Timer(SystemClock* clock)
+      : clock_(clock),
+        mutex_(clock),
+        cond_var_(&mutex_),
+        running_(false),
+        executing_task_(false) {}
+
+  ~Timer() { Shutdown(); }
+
+  // Add a new function to run.
+  // fn_name has to be identical, otherwise it will fail to add and return false
+  // start_after_us is the initial delay.
+  // repeat_every_us is the interval between ending time of the last call and
+  // starting time of the next call. For example, repeat_every_us = 2000 and
+  // the function takes 1000us to run. If it starts at time [now]us, then it
+  // finishes at [now]+1000us, 2nd run starting time will be at [now]+3000us.
+  // repeat_every_us == 0 means do not repeat.
+  bool Add(std::function<void()> fn, const std::string& fn_name,
+           uint64_t start_after_us, uint64_t repeat_every_us) {
+    auto fn_info = std::make_unique<FunctionInfo>(std::move(fn), fn_name, 0,
+                                                  repeat_every_us);
+    InstrumentedMutexLock l(&mutex_);
+    // Assign time within mutex to make sure the next_run_time is larger than
+    // the current running one
+    fn_info->next_run_time_us = clock_->NowMicros() + start_after_us;
+    // the new task start time should never before the current task executing
+    // time, as the executing task can only be running if it's next_run_time_us
+    // is due (<= clock_->NowMicros()).
+    if (executing_task_ &&
+        fn_info->next_run_time_us < heap_.top()->next_run_time_us) {
+      return false;
+    }
+    auto it = map_.find(fn_name);
+    if (it == map_.end()) {
+      heap_.push(fn_info.get());
+      map_.try_emplace(fn_name, std::move(fn_info));
+    } else {
+      // timer doesn't support duplicated function name
+      return false;
+    }
+    cond_var_.SignalAll();
+    return true;
+  }
+
+  void Cancel(const std::string& fn_name) {
+    InstrumentedMutexLock l(&mutex_);
+
+    // Mark the function with fn_name as invalid so that it will not be
+    // requeued.
+    auto it = map_.find(fn_name);
+    if (it != map_.end() && it->second) {
+      it->second->Cancel();
+    }
+
+    // If the currently running function is fn_name, then we need to wait
+    // until it finishes before returning to caller.
+    while (!heap_.empty() && executing_task_) {
+      FunctionInfo* func_info = heap_.top();
+      assert(func_info);
+      if (func_info->name == fn_name) {
+        WaitForTaskCompleteIfNecessary();
+      } else {
+        break;
+      }
+    }
+  }
+
+  void CancelAll() {
+    InstrumentedMutexLock l(&mutex_);
+    CancelAllWithLock();
+  }
+
+  // Start the Timer
+  bool Start() {
+    InstrumentedMutexLock l(&mutex_);
+    if (running_) {
+      return false;
+    }
+
+    running_ = true;
+    thread_ = std::make_unique<port::Thread>(&Timer::Run, this);
+    return true;
+  }
+
+  // Shutdown the Timer
+  bool Shutdown() {
+    {
+      InstrumentedMutexLock l(&mutex_);
+      if (!running_) {
+        return false;
+      }
+      running_ = false;
+      CancelAllWithLock();
+      cond_var_.SignalAll();
+    }
+
+    if (thread_) {
+      thread_->join();
+    }
+    return true;
+  }
+
+  bool HasPendingTask() const {
+    InstrumentedMutexLock l(&mutex_);
+    for (const auto& fn_info : map_) {
+      if (fn_info.second->IsValid()) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+#ifndef NDEBUG
+  // Wait until Timer starting waiting, call the optional callback, then wait
+  // for Timer waiting again.
+  // Tests can provide a custom Clock object to mock time, and use the callback
+  // here to bump current time and trigger Timer. See timer_test for example.
+  //
+  // Note: only support one caller of this method.
+  void TEST_WaitForRun(const std::function<void()>& callback = nullptr) {
+    InstrumentedMutexLock l(&mutex_);
+    // It act as a spin lock
+    while (executing_task_ ||
+           (!heap_.empty() &&
+            heap_.top()->next_run_time_us <= clock_->NowMicros())) {
+      cond_var_.TimedWait(clock_->NowMicros() + 1000);
+    }
+    if (callback != nullptr) {
+      callback();
+    }
+    cond_var_.SignalAll();
+    do {
+      cond_var_.TimedWait(clock_->NowMicros() + 1000);
+    } while (executing_task_ ||
+             (!heap_.empty() &&
+              heap_.top()->next_run_time_us <= clock_->NowMicros()));
+  }
+
+  size_t TEST_GetPendingTaskNum() const {
+    InstrumentedMutexLock l(&mutex_);
+    size_t ret = 0;
+    for (const auto& fn_info : map_) {
+      if (fn_info.second->IsValid()) {
+        ret++;
+      }
+    }
+    return ret;
+  }
+
+  void TEST_OverrideTimer(SystemClock* clock) {
+    InstrumentedMutexLock l(&mutex_);
+    clock_ = clock;
+  }
+#endif  // NDEBUG
+
+ private:
+  void Run() {
+    InstrumentedMutexLock l(&mutex_);
+
+    while (running_) {
+      if (heap_.empty()) {
+        // wait
+        TEST_SYNC_POINT("Timer::Run::Waiting");
+        cond_var_.Wait();
+        continue;
+      }
+
+      FunctionInfo* current_fn = heap_.top();
+      assert(current_fn);
+
+      if (!current_fn->IsValid()) {
+        heap_.pop();
+        map_.erase(current_fn->name);
+        continue;
+      }
+
+      if (current_fn->next_run_time_us <= clock_->NowMicros()) {
+        // make a copy of the function so it won't be changed after
+        // mutex_.unlock.
+        std::function<void()> fn = current_fn->fn;
+        executing_task_ = true;
+        mutex_.Unlock();
+        // Execute the work
+        fn();
+        mutex_.Lock();
+        executing_task_ = false;
+        cond_var_.SignalAll();
+
+        // Remove the work from the heap once it is done executing, make sure
+        // it's the same function after executing the work while mutex is
+        // released.
+        // Note that we are just removing the pointer from the heap. Its
+        // memory is still managed in the map (as it holds a unique ptr).
+        // So current_fn is still a valid ptr.
+        assert(heap_.top() == current_fn);
+        heap_.pop();
+
+        // current_fn may be cancelled already.
+        if (current_fn->IsValid() && current_fn->repeat_every_us > 0) {
+          assert(running_);
+          current_fn->next_run_time_us =
+              clock_->NowMicros() + current_fn->repeat_every_us;
+
+          // Schedule new work into the heap with new time.
+          heap_.push(current_fn);
+        } else {
+          // if current_fn is cancelled or no need to repeat, remove it from the
+          // map to avoid leak.
+          map_.erase(current_fn->name);
+        }
+      } else {
+        cond_var_.TimedWait(current_fn->next_run_time_us);
+      }
+    }
+  }
+
+  void CancelAllWithLock() {
+    mutex_.AssertHeld();
+    if (map_.empty() && heap_.empty()) {
+      return;
+    }
+
+    // With mutex_ held, set all tasks to invalid so that they will not be
+    // re-queued.
+    for (auto& elem : map_) {
+      auto& func_info = elem.second;
+      assert(func_info);
+      func_info->Cancel();
+    }
+
+    // WaitForTaskCompleteIfNecessary() may release mutex_
+    WaitForTaskCompleteIfNecessary();
+
+    while (!heap_.empty()) {
+      heap_.pop();
+    }
+    map_.clear();
+  }
+
+  // A wrapper around std::function to keep track when it should run next
+  // and at what frequency.
+  struct FunctionInfo {
+    // the actual work
+    std::function<void()> fn;
+    // name of the function
+    std::string name;
+    // when the function should run next
+    uint64_t next_run_time_us;
+    // repeat interval
+    uint64_t repeat_every_us;
+    // controls whether this function is valid.
+    // A function is valid upon construction and until someone explicitly
+    // calls `Cancel()`.
+    bool valid;
+
+    FunctionInfo(std::function<void()>&& _fn, std::string _name,
+                 const uint64_t _next_run_time_us, uint64_t _repeat_every_us)
+        : fn(std::move(_fn)),
+          name(std::move(_name)),
+          next_run_time_us(_next_run_time_us),
+          repeat_every_us(_repeat_every_us),
+          valid(true) {}
+
+    void Cancel() { valid = false; }
+
+    bool IsValid() const { return valid; }
+  };
+
+  void WaitForTaskCompleteIfNecessary() {
+    mutex_.AssertHeld();
+    while (executing_task_) {
+      TEST_SYNC_POINT("Timer::WaitForTaskCompleteIfNecessary:TaskExecuting");
+      cond_var_.Wait();
+    }
+  }
+
+  struct RunTimeOrder {
+    bool operator()(const FunctionInfo* f1, const FunctionInfo* f2) {
+      return f1->next_run_time_us > f2->next_run_time_us;
+    }
+  };
+
+  SystemClock* clock_;
+  // This mutex controls both the heap_ and the map_. It needs to be held for
+  // making any changes in them.
+  mutable InstrumentedMutex mutex_;
+  InstrumentedCondVar cond_var_;
+  std::unique_ptr<port::Thread> thread_;
+  bool running_;
+  bool executing_task_;
+
+  std::priority_queue<FunctionInfo*, std::vector<FunctionInfo*>, RunTimeOrder>
+      heap_;
+
+  // In addition to providing a mapping from a function name to a function,
+  // it is also responsible for memory management.
+  std::unordered_map<std::string, std::unique_ptr<FunctionInfo>> map_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/timer_queue.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/timer_queue.h
new file mode 100644
index 0000000000..36a1744aca
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/timer_queue.h
@@ -0,0 +1,231 @@
+//  Portions Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Borrowed from
+// http://www.crazygaze.com/blog/2016/03/24/portable-c-timer-queue/
+// Timer Queue
+//
+// License
+//
+// The source code in this article is licensed under the CC0 license, so feel
+// free to copy, modify, share, do whatever you want with it.
+// No attribution is required, but Ill be happy if you do.
+// CC0 license
+
+// The person who associated a work with this deed has dedicated the work to the
+// public domain by waiving all of his or her rights to the work worldwide
+// under copyright law, including all related and neighboring rights, to the
+// extent allowed by law.  You can copy, modify, distribute and perform the
+// work, even for commercial purposes, all without asking permission.
+
+#pragma once
+
+#include <assert.h>
+
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <queue>
+#include <thread>
+#include <utility>
+#include <vector>
+
+#include "port/port.h"
+#include "test_util/sync_point.h"
+
+// Allows execution of handlers at a specified time in the future
+// Guarantees:
+//  - All handlers are executed ONCE, even if cancelled (aborted parameter will
+// be set to true)
+//      - If TimerQueue is destroyed, it will cancel all handlers.
+//  - Handlers are ALWAYS executed in the Timer Queue worker thread.
+//  - Handlers execution order is NOT guaranteed
+//
+////////////////////////////////////////////////////////////////////////////////
+// borrowed from
+// http://www.crazygaze.com/blog/2016/03/24/portable-c-timer-queue/
+class TimerQueue {
+ public:
+  TimerQueue() : m_th(&TimerQueue::run, this) {}
+
+  ~TimerQueue() { shutdown(); }
+
+  // This function is not thread-safe.
+  void shutdown() {
+    if (closed_) {
+      return;
+    }
+    cancelAll();
+    // Abusing the timer queue to trigger the shutdown.
+    add(0, [this](bool) {
+      m_finish = true;
+      return std::make_pair(false, 0);
+    });
+    m_th.join();
+    closed_ = true;
+  }
+
+  // Adds a new timer
+  // \return
+  //  Returns the ID of the new timer. You can use this ID to cancel the
+  // timer
+  uint64_t add(int64_t milliseconds,
+               std::function<std::pair<bool, int64_t>(bool)> handler) {
+    WorkItem item;
+    Clock::time_point tp = Clock::now();
+    item.end = tp + std::chrono::milliseconds(milliseconds);
+    TEST_SYNC_POINT_CALLBACK("TimeQueue::Add:item.end", &item.end);
+    item.period = milliseconds;
+    item.handler = std::move(handler);
+
+    std::unique_lock<std::mutex> lk(m_mtx);
+    uint64_t id = ++m_idcounter;
+    item.id = id;
+    m_items.push(std::move(item));
+
+    // Something changed, so wake up timer thread
+    m_checkWork.notify_one();
+    return id;
+  }
+
+  // Cancels the specified timer
+  // \return
+  //  1 if the timer was cancelled.
+  //  0 if you were too late to cancel (or the timer ID was never valid to
+  // start with)
+  size_t cancel(uint64_t id) {
+    // Instead of removing the item from the container (thus breaking the
+    // heap integrity), we set the item as having no handler, and put
+    // that handler on a new item at the top for immediate execution
+    // The timer thread will then ignore the original item, since it has no
+    // handler.
+    std::unique_lock<std::mutex> lk(m_mtx);
+    for (auto&& item : m_items.getContainer()) {
+      if (item.id == id && item.handler) {
+        WorkItem newItem;
+        // Zero time, so it stays at the top for immediate execution
+        newItem.end = Clock::time_point();
+        newItem.id = 0;  // Means it is a canceled item
+        // Move the handler from item to newitem (thus clearing item)
+        newItem.handler = std::move(item.handler);
+        m_items.push(std::move(newItem));
+
+        // Something changed, so wake up timer thread
+        m_checkWork.notify_one();
+        return 1;
+      }
+    }
+    return 0;
+  }
+
+  // Cancels all timers
+  // \return
+  //  The number of timers cancelled
+  size_t cancelAll() {
+    // Setting all "end" to 0 (for immediate execution) is ok,
+    // since it maintains the heap integrity
+    std::unique_lock<std::mutex> lk(m_mtx);
+    m_cancel = true;
+    for (auto&& item : m_items.getContainer()) {
+      if (item.id && item.handler) {
+        item.end = Clock::time_point();
+        item.id = 0;
+      }
+    }
+    auto ret = m_items.size();
+
+    m_checkWork.notify_one();
+    return ret;
+  }
+
+ private:
+  using Clock = std::chrono::steady_clock;
+  TimerQueue(const TimerQueue&) = delete;
+  TimerQueue& operator=(const TimerQueue&) = delete;
+
+  void run() {
+    std::unique_lock<std::mutex> lk(m_mtx);
+    while (!m_finish) {
+      auto end = calcWaitTime_lock();
+      if (end.first) {
+        // Timers found, so wait until it expires (or something else
+        // changes)
+        m_checkWork.wait_until(lk, end.second);
+      } else {
+        // No timers exist, so wait forever until something changes
+        m_checkWork.wait(lk);
+      }
+
+      // Check and execute as much work as possible, such as, all expired
+      // timers
+      checkWork(&lk);
+    }
+
+    // If we are shutting down, we should not have any items left,
+    // since the shutdown cancels all items
+    assert(m_items.size() == 0);
+  }
+
+  std::pair<bool, Clock::time_point> calcWaitTime_lock() {
+    while (m_items.size()) {
+      if (m_items.top().handler) {
+        // Item present, so return the new wait time
+        return std::make_pair(true, m_items.top().end);
+      } else {
+        // Discard empty handlers (they were cancelled)
+        m_items.pop();
+      }
+    }
+
+    // No items found, so return no wait time (causes the thread to wait
+    // indefinitely)
+    return std::make_pair(false, Clock::time_point());
+  }
+
+  void checkWork(std::unique_lock<std::mutex>* lk) {
+    while (m_items.size() && m_items.top().end <= Clock::now()) {
+      WorkItem item(m_items.top());
+      m_items.pop();
+
+      if (item.handler) {
+        (*lk).unlock();
+        auto reschedule_pair = item.handler(item.id == 0);
+        (*lk).lock();
+        if (!m_cancel && reschedule_pair.first) {
+          int64_t new_period = (reschedule_pair.second == -1)
+                                   ? item.period
+                                   : reschedule_pair.second;
+
+          item.period = new_period;
+          item.end = Clock::now() + std::chrono::milliseconds(new_period);
+          m_items.push(std::move(item));
+        }
+      }
+    }
+  }
+
+  bool m_finish = false;
+  bool m_cancel = false;
+  uint64_t m_idcounter = 0;
+  std::condition_variable m_checkWork;
+
+  struct WorkItem {
+    Clock::time_point end;
+    int64_t period;
+    uint64_t id;  // id==0 means it was cancelled
+    std::function<std::pair<bool, int64_t>(bool)> handler;
+    bool operator>(const WorkItem& other) const { return end > other.end; }
+  };
+
+  std::mutex m_mtx;
+  // Inheriting from priority_queue, so we can access the internal container
+  class Queue : public std::priority_queue<WorkItem, std::vector<WorkItem>,
+                                           std::greater<WorkItem>> {
+   public:
+    std::vector<WorkItem>& getContainer() { return this->c; }
+  } m_items;
+  ROCKSDB_NAMESPACE::port::Thread m_th;
+  bool closed_ = false;
+};
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/udt_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/udt_util.h
new file mode 100644
index 0000000000..219a093d8d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/udt_util.h
@@ -0,0 +1,77 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <sstream>
+#include <vector>
+
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Dummy record in WAL logs signaling user-defined timestamp sizes for
+// subsequent records.
+class UserDefinedTimestampSizeRecord {
+ public:
+  UserDefinedTimestampSizeRecord() {}
+  explicit UserDefinedTimestampSizeRecord(
+      std::vector<std::pair<uint32_t, size_t>>&& cf_to_ts_sz)
+      : cf_to_ts_sz_(std::move(cf_to_ts_sz)) {}
+
+  const std::vector<std::pair<uint32_t, size_t>>& GetUserDefinedTimestampSize()
+      const {
+    return cf_to_ts_sz_;
+  }
+
+  inline void EncodeTo(std::string* dst) const {
+    assert(dst != nullptr);
+    for (const auto& [cf_id, ts_sz] : cf_to_ts_sz_) {
+      assert(ts_sz != 0);
+      PutFixed32(dst, cf_id);
+      PutFixed16(dst, static_cast<uint16_t>(ts_sz));
+    }
+  }
+
+  inline Status DecodeFrom(Slice* src) {
+    const size_t total_size = src->size();
+    if ((total_size % kSizePerColumnFamily) != 0) {
+      std::ostringstream oss;
+      oss << "User-defined timestamp size record length: " << total_size
+          << " is not a multiple of " << kSizePerColumnFamily << std::endl;
+      return Status::Corruption(oss.str());
+    }
+    int num_of_entries = static_cast<int>(total_size / kSizePerColumnFamily);
+    for (int i = 0; i < num_of_entries; i++) {
+      uint32_t cf_id = 0;
+      uint16_t ts_sz = 0;
+      if (!GetFixed32(src, &cf_id) || !GetFixed16(src, &ts_sz)) {
+        return Status::Corruption(
+            "Error decoding user-defined timestamp size record entry");
+      }
+      cf_to_ts_sz_.emplace_back(cf_id, static_cast<size_t>(ts_sz));
+    }
+    return Status::OK();
+  }
+
+  inline std::string DebugString() const {
+    std::ostringstream oss;
+
+    for (const auto& [cf_id, ts_sz] : cf_to_ts_sz_) {
+      oss << "Column family: " << cf_id
+          << ", user-defined timestamp size: " << ts_sz << std::endl;
+    }
+    return oss.str();
+  }
+
+ private:
+  // 4 bytes for column family id, 2 bytes for user-defined timestamp size.
+  static constexpr size_t kSizePerColumnFamily = 4 + 2;
+
+  std::vector<std::pair<uint32_t, size_t>> cf_to_ts_sz_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/user_comparator_wrapper.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/user_comparator_wrapper.h
new file mode 100644
index 0000000000..59ebada12d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/user_comparator_wrapper.h
@@ -0,0 +1,64 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "monitoring/perf_context_imp.h"
+#include "rocksdb/comparator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Wrapper of user comparator, with auto increment to
+// perf_context.user_key_comparison_count.
+class UserComparatorWrapper {
+ public:
+  // `UserComparatorWrapper`s constructed with the default constructor are not
+  // usable and will segfault on any attempt to use them for comparisons.
+  UserComparatorWrapper() : user_comparator_(nullptr) {}
+
+  explicit UserComparatorWrapper(const Comparator* const user_cmp)
+      : user_comparator_(user_cmp) {}
+
+  ~UserComparatorWrapper() = default;
+
+  const Comparator* user_comparator() const { return user_comparator_; }
+
+  int Compare(const Slice& a, const Slice& b) const {
+    PERF_COUNTER_ADD(user_key_comparison_count, 1);
+    return user_comparator_->Compare(a, b);
+  }
+
+  bool Equal(const Slice& a, const Slice& b) const {
+    PERF_COUNTER_ADD(user_key_comparison_count, 1);
+    return user_comparator_->Equal(a, b);
+  }
+
+  int CompareTimestamp(const Slice& ts1, const Slice& ts2) const {
+    return user_comparator_->CompareTimestamp(ts1, ts2);
+  }
+
+  int CompareWithoutTimestamp(const Slice& a, const Slice& b) const {
+    PERF_COUNTER_ADD(user_key_comparison_count, 1);
+    return user_comparator_->CompareWithoutTimestamp(a, b);
+  }
+
+  int CompareWithoutTimestamp(const Slice& a, bool a_has_ts, const Slice& b,
+                              bool b_has_ts) const {
+    PERF_COUNTER_ADD(user_key_comparison_count, 1);
+    return user_comparator_->CompareWithoutTimestamp(a, a_has_ts, b, b_has_ts);
+  }
+
+  bool EqualWithoutTimestamp(const Slice& a, const Slice& b) const {
+    return user_comparator_->EqualWithoutTimestamp(a, b);
+  }
+
+ private:
+  const Comparator* user_comparator_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/vector_iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/vector_iterator.h
new file mode 100644
index 0000000000..c4cc01d561
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/vector_iterator.h
@@ -0,0 +1,118 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#pragma once
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "db/dbformat.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/slice.h"
+#include "table/internal_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Iterator over a vector of keys/values
+class VectorIterator : public InternalIterator {
+ public:
+  VectorIterator(std::vector<std::string> keys, std::vector<std::string> values,
+                 const CompareInterface* icmp = nullptr)
+      : keys_(std::move(keys)),
+        values_(std::move(values)),
+        current_(keys_.size()),
+        indexed_cmp_(icmp, &keys_) {
+    assert(keys_.size() == values_.size());
+
+    indices_.reserve(keys_.size());
+    for (size_t i = 0; i < keys_.size(); i++) {
+      indices_.push_back(i);
+    }
+    if (icmp != nullptr) {
+      std::sort(indices_.begin(), indices_.end(), indexed_cmp_);
+    }
+  }
+
+  virtual bool Valid() const override {
+    return !indices_.empty() && current_ < indices_.size();
+  }
+
+  virtual void SeekToFirst() override { current_ = 0; }
+  virtual void SeekToLast() override { current_ = indices_.size() - 1; }
+
+  virtual void Seek(const Slice& target) override {
+    if (indexed_cmp_.cmp != nullptr) {
+      current_ = std::lower_bound(indices_.begin(), indices_.end(), target,
+                                  indexed_cmp_) -
+                 indices_.begin();
+    } else {
+      current_ =
+          std::lower_bound(keys_.begin(), keys_.end(), target.ToString()) -
+          keys_.begin();
+    }
+  }
+
+  virtual void SeekForPrev(const Slice& target) override {
+    if (indexed_cmp_.cmp != nullptr) {
+      current_ = std::upper_bound(indices_.begin(), indices_.end(), target,
+                                  indexed_cmp_) -
+                 indices_.begin();
+    } else {
+      current_ =
+          std::upper_bound(keys_.begin(), keys_.end(), target.ToString()) -
+          keys_.begin();
+    }
+    if (!Valid()) {
+      SeekToLast();
+    } else {
+      Prev();
+    }
+  }
+
+  virtual void Next() override { current_++; }
+  virtual void Prev() override { current_--; }
+
+  virtual Slice key() const override {
+    return Slice(keys_[indices_[current_]]);
+  }
+  virtual Slice value() const override {
+    return Slice(values_[indices_[current_]]);
+  }
+
+  virtual Status status() const override { return Status::OK(); }
+
+  virtual bool IsKeyPinned() const override { return true; }
+  virtual bool IsValuePinned() const override { return true; }
+
+ protected:
+  std::vector<std::string> keys_;
+  std::vector<std::string> values_;
+  size_t current_;
+
+ private:
+  struct IndexedKeyComparator {
+    IndexedKeyComparator(const CompareInterface* c,
+                         const std::vector<std::string>* ks)
+        : cmp(c), keys(ks) {}
+
+    bool operator()(size_t a, size_t b) const {
+      return cmp->Compare((*keys)[a], (*keys)[b]) < 0;
+    }
+
+    bool operator()(size_t a, const Slice& b) const {
+      return cmp->Compare((*keys)[a], b) < 0;
+    }
+
+    bool operator()(const Slice& a, size_t b) const {
+      return cmp->Compare(a, (*keys)[b]) < 0;
+    }
+
+    const CompareInterface* cmp;
+    const std::vector<std::string>* keys;
+  };
+
+  IndexedKeyComparator indexed_cmp_;
+  std::vector<size_t> indices_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/work_queue.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/work_queue.h
new file mode 100644
index 0000000000..94ece85d9d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/work_queue.h
@@ -0,0 +1,150 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <condition_variable>
+#include <cstddef>
+#include <functional>
+#include <mutex>
+#include <queue>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+/// Unbounded thread-safe work queue.
+//
+// This file is an excerpt from Facebook's zstd repo at
+// https://github.com/facebook/zstd/. The relevant file is
+// contrib/pzstd/utils/WorkQueue.h.
+
+template <typename T>
+class WorkQueue {
+  // Protects all member variable access
+  std::mutex mutex_;
+  std::condition_variable readerCv_;
+  std::condition_variable writerCv_;
+  std::condition_variable finishCv_;
+
+  std::queue<T> queue_;
+  bool done_;
+  std::size_t maxSize_;
+
+  // Must have lock to call this function
+  bool full() const {
+    if (maxSize_ == 0) {
+      return false;
+    }
+    return queue_.size() >= maxSize_;
+  }
+
+ public:
+  /**
+   * Constructs an empty work queue with an optional max size.
+   * If `maxSize == 0` the queue size is unbounded.
+   *
+   * @param maxSize The maximum allowed size of the work queue.
+   */
+  WorkQueue(std::size_t maxSize = 0) : done_(false), maxSize_(maxSize) {}
+
+  /**
+   * Push an item onto the work queue.  Notify a single thread that work is
+   * available.  If `finish()` has been called, do nothing and return false.
+   * If `push()` returns false, then `item` has not been copied from.
+   *
+   * @param item  Item to push onto the queue.
+   * @returns     True upon success, false if `finish()` has been called.  An
+   *               item was pushed iff `push()` returns true.
+   */
+  template <typename U>
+  bool push(U&& item) {
+    {
+      std::unique_lock<std::mutex> lock(mutex_);
+      while (full() && !done_) {
+        writerCv_.wait(lock);
+      }
+      if (done_) {
+        return false;
+      }
+      queue_.push(std::forward<U>(item));
+    }
+    readerCv_.notify_one();
+    return true;
+  }
+
+  /**
+   * Attempts to pop an item off the work queue.  It will block until data is
+   * available or `finish()` has been called.
+   *
+   * @param[out] item  If `pop` returns `true`, it contains the popped item.
+   *                    If `pop` returns `false`, it is unmodified.
+   * @returns          True upon success.  False if the queue is empty and
+   *                    `finish()` has been called.
+   */
+  bool pop(T& item) {
+    {
+      std::unique_lock<std::mutex> lock(mutex_);
+      while (queue_.empty() && !done_) {
+        readerCv_.wait(lock);
+      }
+      if (queue_.empty()) {
+        assert(done_);
+        return false;
+      }
+      item = queue_.front();
+      queue_.pop();
+    }
+    writerCv_.notify_one();
+    return true;
+  }
+
+  /**
+   * Sets the maximum queue size.  If `maxSize == 0` then it is unbounded.
+   *
+   * @param maxSize The new maximum queue size.
+   */
+  void setMaxSize(std::size_t maxSize) {
+    {
+      std::lock_guard<std::mutex> lock(mutex_);
+      maxSize_ = maxSize;
+    }
+    writerCv_.notify_all();
+  }
+
+  /**
+   * Promise that `push()` won't be called again, so once the queue is empty
+   * there will never any more work.
+   */
+  void finish() {
+    {
+      std::lock_guard<std::mutex> lock(mutex_);
+      assert(!done_);
+      done_ = true;
+    }
+    readerCv_.notify_all();
+    writerCv_.notify_all();
+    finishCv_.notify_all();
+  }
+
+  /// Blocks until `finish()` has been called (but the queue may not be empty).
+  void waitUntilFinished() {
+    std::unique_lock<std::mutex> lock(mutex_);
+    while (!done_) {
+      finishCv_.wait(lock);
+    }
+  }
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/xxhash.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/xxhash.cc
new file mode 100644
index 0000000000..88852c3308
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/xxhash.cc
@@ -0,0 +1,48 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (C) 2012-2020 Yann Collet
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other materials provided with the
+ *      distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ *   - xxHash homepage: https://www.xxhash.com
+ *   - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+/*
+ * xxhash.c instantiates functions defined in xxhash.h
+ */
+// clang-format off
+#ifndef XXH_STATIC_LINKING_ONLY
+#define XXH_STATIC_LINKING_ONLY   /* access advanced declarations */
+#endif // !defined(XXH_STATIC_LINKING_ONLY)
+#define XXH_IMPLEMENTATION   /* access definitions */
+
+#include "xxhash.h"
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/xxhash.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/xxhash.h
new file mode 100644
index 0000000000..ad49bab816
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/xxhash.h
@@ -0,0 +1,6360 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+/* BEGIN RocksDB customizations */
+#ifndef XXH_STATIC_LINKING_ONLY
+// Using compiled xxhash.cc
+#define XXH_STATIC_LINKING_ONLY 1
+#endif  // !defined(XXH_STATIC_LINKING_ONLY)
+#ifndef XXH_NAMESPACE
+#define XXH_NAMESPACE ROCKSDB_
+#endif  // !defined(XXH_NAMESPACE)
+
+// for FALLTHROUGH_INTENDED, inserted as appropriate
+#include "port/lang.h"
+/* END RocksDB customizations */
+
+// clang-format off
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (C) 2012-2021 Yann Collet
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other materials provided with the
+ *      distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ *   - xxHash homepage: https://www.xxhash.com
+ *   - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+/*!
+ * @mainpage xxHash
+ *
+ * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed
+ * limits.
+ *
+ * It is proposed in four flavors, in three families:
+ * 1. @ref XXH32_family
+ *   - Classic 32-bit hash function. Simple, compact, and runs on almost all
+ *     32-bit and 64-bit systems.
+ * 2. @ref XXH64_family
+ *   - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most
+ *     64-bit systems (but _not_ 32-bit systems).
+ * 3. @ref XXH3_family
+ *   - Modern 64-bit and 128-bit hash function family which features improved
+ *     strength and performance across the board, especially on smaller data.
+ *     It benefits greatly from SIMD and 64-bit without requiring it.
+ *
+ * Benchmarks
+ * ---
+ * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04.
+ * The open source benchmark program is compiled with clang v10.0 using -O3 flag.
+ *
+ * | Hash Name            | ISA ext | Width | Large Data Speed | Small Data Velocity |
+ * | -------------------- | ------- | ----: | ---------------: | ------------------: |
+ * | XXH3_64bits()        | @b AVX2 |    64 |        59.4 GB/s |               133.1 |
+ * | MeowHash             | AES-NI  |   128 |        58.2 GB/s |                52.5 |
+ * | XXH3_128bits()       | @b AVX2 |   128 |        57.9 GB/s |               118.1 |
+ * | CLHash               | PCLMUL  |    64 |        37.1 GB/s |                58.1 |
+ * | XXH3_64bits()        | @b SSE2 |    64 |        31.5 GB/s |               133.1 |
+ * | XXH3_128bits()       | @b SSE2 |   128 |        29.6 GB/s |               118.1 |
+ * | RAM sequential read  |         |   N/A |        28.0 GB/s |                 N/A |
+ * | ahash                | AES-NI  |    64 |        22.5 GB/s |               107.2 |
+ * | City64               |         |    64 |        22.0 GB/s |                76.6 |
+ * | T1ha2                |         |    64 |        22.0 GB/s |                99.0 |
+ * | City128              |         |   128 |        21.7 GB/s |                57.7 |
+ * | FarmHash             | AES-NI  |    64 |        21.3 GB/s |                71.9 |
+ * | XXH64()              |         |    64 |        19.4 GB/s |                71.0 |
+ * | SpookyHash           |         |    64 |        19.3 GB/s |                53.2 |
+ * | Mum                  |         |    64 |        18.0 GB/s |                67.0 |
+ * | CRC32C               | SSE4.2  |    32 |        13.0 GB/s |                57.9 |
+ * | XXH32()              |         |    32 |         9.7 GB/s |                71.9 |
+ * | City32               |         |    32 |         9.1 GB/s |                66.0 |
+ * | Blake3*              | @b AVX2 |   256 |         4.4 GB/s |                 8.1 |
+ * | Murmur3              |         |    32 |         3.9 GB/s |                56.1 |
+ * | SipHash*             |         |    64 |         3.0 GB/s |                43.2 |
+ * | Blake3*              | @b SSE2 |   256 |         2.4 GB/s |                 8.1 |
+ * | HighwayHash          |         |    64 |         1.4 GB/s |                 6.0 |
+ * | FNV64                |         |    64 |         1.2 GB/s |                62.7 |
+ * | Blake2*              |         |   256 |         1.1 GB/s |                 5.1 |
+ * | SHA1*                |         |   160 |         0.8 GB/s |                 5.6 |
+ * | MD5*                 |         |   128 |         0.6 GB/s |                 7.8 |
+ * @note
+ *   - Hashes which require a specific ISA extension are noted. SSE2 is also noted,
+ *     even though it is mandatory on x64.
+ *   - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic
+ *     by modern standards.
+ *   - Small data velocity is a rough average of algorithm's efficiency for small
+ *     data. For more accurate information, see the wiki.
+ *   - More benchmarks and strength tests are found on the wiki:
+ *         https://github.com/Cyan4973/xxHash/wiki
+ *
+ * Usage
+ * ------
+ * All xxHash variants use a similar API. Changing the algorithm is a trivial
+ * substitution.
+ *
+ * @pre
+ *    For functions which take an input and length parameter, the following
+ *    requirements are assumed:
+ *    - The range from [`input`, `input + length`) is valid, readable memory.
+ *      - The only exception is if the `length` is `0`, `input` may be `NULL`.
+ *    - For C++, the objects must have the *TriviallyCopyable* property, as the
+ *      functions access bytes directly as if it was an array of `unsigned char`.
+ *
+ * @anchor single_shot_example
+ * **Single Shot**
+ *
+ * These functions are stateless functions which hash a contiguous block of memory,
+ * immediately returning the result. They are the easiest and usually the fastest
+ * option.
+ *
+ * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits()
+ *
+ * @code{.c}
+ *   #include <string.h>
+ *   #include "xxhash.h"
+ *
+ *   // Example for a function which hashes a null terminated string with XXH32().
+ *   XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed)
+ *   {
+ *       // NULL pointers are only valid if the length is zero
+ *       size_t length = (string == NULL) ? 0 : strlen(string);
+ *       return XXH32(string, length, seed);
+ *   }
+ * @endcode
+ *
+ * @anchor streaming_example
+ * **Streaming**
+ *
+ * These groups of functions allow incremental hashing of unknown size, even
+ * more than what would fit in a size_t.
+ *
+ * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset()
+ *
+ * @code{.c}
+ *   #include <stdio.h>
+ *   #include <assert.h>
+ *   #include "xxhash.h"
+ *   // Example for a function which hashes a FILE incrementally with XXH3_64bits().
+ *   XXH64_hash_t hashFile(FILE* f)
+ *   {
+ *       // Allocate a state struct. Do not just use malloc() or new.
+ *       XXH3_state_t* state = XXH3_createState();
+ *       assert(state != NULL && "Out of memory!");
+ *       // Reset the state to start a new hashing session.
+ *       XXH3_64bits_reset(state);
+ *       char buffer[4096];
+ *       size_t count;
+ *       // Read the file in chunks
+ *       while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) {
+ *           // Run update() as many times as necessary to process the data
+ *           XXH3_64bits_update(state, buffer, count);
+ *       }
+ *       // Retrieve the finalized hash. This will not change the state.
+ *       XXH64_hash_t result = XXH3_64bits_digest(state);
+ *       // Free the state. Do not use free().
+ *       XXH3_freeState(state);
+ *       return result;
+ *   }
+ * @endcode
+ *
+ * @file xxhash.h
+ * xxHash prototypes and implementation
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* ****************************
+ *  INLINE mode
+ ******************************/
+/*!
+ * @defgroup public Public API
+ * Contains details on the public xxHash functions.
+ * @{
+ */
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Exposes the implementation and marks all functions as `inline`.
+ *
+ * Use these build macros to inline xxhash into the target unit.
+ * Inlining improves performance on small inputs, especially when the length is
+ * expressed as a compile-time constant:
+ *
+ *  https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
+ *
+ * It also keeps xxHash symbols private to the unit, so they are not exported.
+ *
+ * Usage:
+ * @code{.c}
+ *     #define XXH_INLINE_ALL
+ *     #include "xxhash.h"
+ * @endcode
+ * Do not compile and link xxhash.o as a separate object, as it is not useful.
+ */
+#  define XXH_INLINE_ALL
+#  undef XXH_INLINE_ALL
+/*!
+ * @brief Exposes the implementation without marking functions as inline.
+ */
+#  define XXH_PRIVATE_API
+#  undef XXH_PRIVATE_API
+/*!
+ * @brief Emulate a namespace by transparently prefixing all symbols.
+ *
+ * If you want to include _and expose_ xxHash functions from within your own
+ * library, but also want to avoid symbol collisions with other libraries which
+ * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix
+ * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE
+ * (therefore, avoid empty or numeric values).
+ *
+ * Note that no change is required within the calling program as long as it
+ * includes `xxhash.h`: Regular symbol names will be automatically translated
+ * by this header.
+ */
+#  define XXH_NAMESPACE /* YOUR NAME HERE */
+#  undef XXH_NAMESPACE
+#endif
+
+#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
+    && !defined(XXH_INLINE_ALL_31684351384)
+   /* this section should be traversed only once */
+#  define XXH_INLINE_ALL_31684351384
+   /* give access to the advanced API, required to compile implementations */
+#  undef XXH_STATIC_LINKING_ONLY   /* avoid macro redef */
+#  define XXH_STATIC_LINKING_ONLY
+   /* make all functions private */
+#  undef XXH_PUBLIC_API
+#  if defined(__GNUC__)
+#    define XXH_PUBLIC_API static __inline __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXH_PUBLIC_API static __inline
+#  else
+     /* note: this version may generate warnings for unused static functions */
+#    define XXH_PUBLIC_API static
+#  endif
+
+   /*
+    * This part deals with the special case where a unit wants to inline xxHash,
+    * but "xxhash.h" has previously been included without XXH_INLINE_ALL,
+    * such as part of some previously included *.h header file.
+    * Without further action, the new include would just be ignored,
+    * and functions would effectively _not_ be inlined (silent failure).
+    * The following macros solve this situation by prefixing all inlined names,
+    * avoiding naming collision with previous inclusions.
+    */
+   /* Before that, we unconditionally #undef all symbols,
+    * in case they were already defined with XXH_NAMESPACE.
+    * They will then be redefined for XXH_INLINE_ALL
+    */
+#  undef XXH_versionNumber
+    /* XXH32 */
+#  undef XXH32
+#  undef XXH32_createState
+#  undef XXH32_freeState
+#  undef XXH32_reset
+#  undef XXH32_update
+#  undef XXH32_digest
+#  undef XXH32_copyState
+#  undef XXH32_canonicalFromHash
+#  undef XXH32_hashFromCanonical
+    /* XXH64 */
+#  undef XXH64
+#  undef XXH64_createState
+#  undef XXH64_freeState
+#  undef XXH64_reset
+#  undef XXH64_update
+#  undef XXH64_digest
+#  undef XXH64_copyState
+#  undef XXH64_canonicalFromHash
+#  undef XXH64_hashFromCanonical
+    /* XXH3_64bits */
+#  undef XXH3_64bits
+#  undef XXH3_64bits_withSecret
+#  undef XXH3_64bits_withSeed
+#  undef XXH3_64bits_withSecretandSeed
+#  undef XXH3_createState
+#  undef XXH3_freeState
+#  undef XXH3_copyState
+#  undef XXH3_64bits_reset
+#  undef XXH3_64bits_reset_withSeed
+#  undef XXH3_64bits_reset_withSecret
+#  undef XXH3_64bits_update
+#  undef XXH3_64bits_digest
+#  undef XXH3_generateSecret
+    /* XXH3_128bits */
+#  undef XXH128
+#  undef XXH3_128bits
+#  undef XXH3_128bits_withSeed
+#  undef XXH3_128bits_withSecret
+#  undef XXH3_128bits_reset
+#  undef XXH3_128bits_reset_withSeed
+#  undef XXH3_128bits_reset_withSecret
+#  undef XXH3_128bits_reset_withSecretandSeed
+#  undef XXH3_128bits_update
+#  undef XXH3_128bits_digest
+#  undef XXH128_isEqual
+#  undef XXH128_cmp
+#  undef XXH128_canonicalFromHash
+#  undef XXH128_hashFromCanonical
+    /* Finally, free the namespace itself */
+#  undef XXH_NAMESPACE
+
+    /* employ the namespace for XXH_INLINE_ALL */
+#  define XXH_NAMESPACE XXH_INLINE_
+   /*
+    * Some identifiers (enums, type names) are not symbols,
+    * but they must nonetheless be renamed to avoid redeclaration.
+    * Alternative solution: do not redeclare them.
+    * However, this requires some #ifdefs, and has a more dispersed impact.
+    * Meanwhile, renaming can be achieved in a single place.
+    */
+#  define XXH_IPREF(Id)   XXH_NAMESPACE ## Id
+#  define XXH_OK XXH_IPREF(XXH_OK)
+#  define XXH_ERROR XXH_IPREF(XXH_ERROR)
+#  define XXH_errorcode XXH_IPREF(XXH_errorcode)
+#  define XXH32_canonical_t  XXH_IPREF(XXH32_canonical_t)
+#  define XXH64_canonical_t  XXH_IPREF(XXH64_canonical_t)
+#  define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
+#  define XXH32_state_s XXH_IPREF(XXH32_state_s)
+#  define XXH32_state_t XXH_IPREF(XXH32_state_t)
+#  define XXH64_state_s XXH_IPREF(XXH64_state_s)
+#  define XXH64_state_t XXH_IPREF(XXH64_state_t)
+#  define XXH3_state_s  XXH_IPREF(XXH3_state_s)
+#  define XXH3_state_t  XXH_IPREF(XXH3_state_t)
+#  define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
+   /* Ensure the header is parsed again, even if it was previously included */
+#  undef XXHASH_H_5627135585666179
+#  undef XXHASH_H_STATIC_13879238742
+#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
+
+/* ****************************************************************
+ *  Stable API
+ *****************************************************************/
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+/*! @brief Marks a global symbol. */
+#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
+#  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+#    ifdef XXH_EXPORT
+#      define XXH_PUBLIC_API __declspec(dllexport)
+#    elif XXH_IMPORT
+#      define XXH_PUBLIC_API __declspec(dllimport)
+#    endif
+#  else
+#    define XXH_PUBLIC_API   /* do nothing */
+#  endif
+#endif
+
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+/* XXH32 */
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+/* XXH64 */
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+/* XXH3_64bits */
+#  define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
+#  define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
+#  define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
+#  define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
+#  define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
+#  define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
+#  define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
+#  define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
+#  define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
+#  define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
+#  define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
+#  define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
+#  define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
+#  define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
+#  define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
+/* XXH3_128bits */
+#  define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
+#  define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
+#  define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
+#  define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
+#  define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
+#  define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
+#  define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
+#  define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
+#  define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
+#  define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
+#  define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
+#  define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
+#  define XXH128_cmp     XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
+#  define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
+#  define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
+#endif
+
+
+/* *************************************
+*  Compiler specifics
+***************************************/
+
+/* specific declaration modes for Windows */
+#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
+#  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+#    ifdef XXH_EXPORT
+#      define XXH_PUBLIC_API __declspec(dllexport)
+#    elif XXH_IMPORT
+#      define XXH_PUBLIC_API __declspec(dllimport)
+#    endif
+#  else
+#    define XXH_PUBLIC_API   /* do nothing */
+#  endif
+#endif
+
+#if defined (__GNUC__)
+# define XXH_CONSTF  __attribute__((const))
+# define XXH_PUREF   __attribute__((pure))
+# define XXH_MALLOCF __attribute__((malloc))
+#else
+# define XXH_CONSTF  /* disable */
+# define XXH_PUREF
+# define XXH_MALLOCF
+#endif
+
+/* *************************************
+*  Version
+***************************************/
+#define XXH_VERSION_MAJOR    0
+#define XXH_VERSION_MINOR    8
+#define XXH_VERSION_RELEASE  1
+/*! @brief Version number, encoded as two digits each */
+#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+
+/*!
+ * @brief Obtains the xxHash version.
+ *
+ * This is mostly useful when xxHash is compiled as a shared library,
+ * since the returned value comes from the library, as opposed to header file.
+ *
+ * @return @ref XXH_VERSION_NUMBER of the invoked library.
+ */
+XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void);
+
+
+/* ****************************
+*  Common basic types
+******************************/
+#include <stddef.h>   /* size_t */
+/*!
+ * @brief Exit code for the streaming API.
+ */
+typedef enum {
+    XXH_OK = 0, /*!< OK */
+    XXH_ERROR   /*!< Error */
+} XXH_errorcode;
+
+
+/*-**********************************************************************
+*  32-bit hash
+************************************************************************/
+#if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
+/*!
+ * @brief An unsigned 32-bit integer.
+ *
+ * Not necessarily defined to `uint32_t` but functionally equivalent.
+ */
+typedef uint32_t XXH32_hash_t;
+
+#elif !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+    typedef uint32_t XXH32_hash_t;
+
+#else
+#   include <limits.h>
+#   if UINT_MAX == 0xFFFFFFFFUL
+      typedef unsigned int XXH32_hash_t;
+#   elif ULONG_MAX == 0xFFFFFFFFUL
+      typedef unsigned long XXH32_hash_t;
+#   else
+#     error "unsupported platform: need a 32-bit type"
+#   endif
+#endif
+
+/*!
+ * @}
+ *
+ * @defgroup XXH32_family XXH32 family
+ * @ingroup public
+ * Contains functions used in the classic 32-bit xxHash algorithm.
+ *
+ * @note
+ *   XXH32 is useful for older platforms, with no or poor 64-bit performance.
+ *   Note that the @ref XXH3_family provides competitive speed for both 32-bit
+ *   and 64-bit systems, and offers true 64/128 bit hash results.
+ *
+ * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families
+ * @see @ref XXH32_impl for implementation details
+ * @{
+ */
+
+/*!
+ * @brief Calculates the 32-bit hash of @p input using xxHash32.
+ *
+ * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
+ *
+ * See @ref single_shot_example "Single Shot Example" for an example.
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 32-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 32-bit hash value.
+ *
+ * @see
+ *    XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
+ *    Direct equivalents for the other variants of xxHash.
+ * @see
+ *    XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
+
+#ifndef XXH_NO_STREAM
+/*!
+ * Streaming functions generate the xxHash value from an incremental input.
+ * This method is slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * An XXH state must first be allocated using `XXH*_createState()`.
+ *
+ * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
+ *
+ * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
+ *
+ * The function returns an error code, with 0 meaning OK, and any other value
+ * meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a
+ * digest, and generate new hash values later on by invoking `XXH*_digest()`.
+ *
+ * When done, release the state using `XXH*_freeState()`.
+ *
+ * @see streaming_example at the top of @ref xxhash.h for an example.
+ */
+
+/*!
+ * @typedef struct XXH32_state_s XXH32_state_t
+ * @brief The opaque state struct for the XXH32 streaming API.
+ *
+ * @see XXH32_state_s for details.
+ */
+typedef struct XXH32_state_s XXH32_state_t;
+
+/*!
+ * @brief Allocates an @ref XXH32_state_t.
+ *
+ * Must be freed with XXH32_freeState().
+ * @return An allocated XXH32_state_t on success, `NULL` on failure.
+ */
+XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
+/*!
+ * @brief Frees an @ref XXH32_state_t.
+ *
+ * Must be allocated with XXH32_createState().
+ * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
+ * @return XXH_OK.
+ */
+XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+/*!
+ * @brief Copies one @ref XXH32_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
+ */
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH32_state_t to begin a new hash.
+ *
+ * This function resets and seeds a state. Call it before @ref XXH32_update().
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 32-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH32_state_t.
+ *
+ * Call this to incrementally consume blocks of data.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+
+/*!
+ * @brief Returns the calculated hash value from an @ref XXH32_state_t.
+ *
+ * @note
+ *   Calling XXH32_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ *  @p statePtr must not be `NULL`.
+ *
+ * @return The calculated xxHash32 value from that state.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
+
+/*******   Canonical representation   *******/
+
+/*
+ * The default return values from XXH functions are unsigned 32 and 64 bit
+ * integers.
+ * This the simplest and fastest format for further post-processing.
+ *
+ * However, this leaves open the question of what is the order on the byte level,
+ * since little and big endian conventions will store the same number differently.
+ *
+ * The canonical representation settles this issue by mandating big-endian
+ * convention, the same convention as human-readable numbers (large digits first).
+ *
+ * When writing hash values to storage, sending them over a network, or printing
+ * them, it's highly recommended to use the canonical representation to ensure
+ * portability across a wider range of systems, present and future.
+ *
+ * The following functions allow transformation of hash values to and from
+ * canonical format.
+ */
+
+/*!
+ * @brief Canonical (big endian) representation of @ref XXH32_hash_t.
+ */
+typedef struct {
+    unsigned char digest[4]; /*!< Hash bytes, big endian */
+} XXH32_canonical_t;
+
+/*!
+ * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
+ *
+ * @param dst The @ref XXH32_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH32_hash_t to be converted.
+ *
+ * @pre
+ *   @p dst must not be `NULL`.
+ */
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t.
+ *
+ * @param src The @ref XXH32_canonical_t to convert.
+ *
+ * @pre
+ *   @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+
+
+#ifdef __has_attribute
+# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
+#else
+# define XXH_HAS_ATTRIBUTE(x) 0
+#endif
+
+/* C-language Attributes are added in C23. */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
+# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
+#else
+# define XXH_HAS_C_ATTRIBUTE(x) 0
+#endif
+
+#if defined(__cplusplus) && defined(__has_cpp_attribute)
+# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#else
+# define XXH_HAS_CPP_ATTRIBUTE(x) 0
+#endif
+
+/*
+ * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
+ * introduced in CPP17 and C23.
+ * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
+ * C23   : https://en.cppreference.com/w/c/language/attributes/fallthrough
+ */
+#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
+# define XXH_FALLTHROUGH [[fallthrough]]
+#elif XXH_HAS_ATTRIBUTE(__fallthrough__)
+# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
+#else
+# define XXH_FALLTHROUGH /* fallthrough */
+#endif
+
+/*
+ * Define XXH_NOESCAPE for annotated pointers in public API.
+ * https://clang.llvm.org/docs/AttributeReference.html#noescape
+ * As of writing this, only supported by clang.
+ */
+#if XXH_HAS_ATTRIBUTE(noescape)
+# define XXH_NOESCAPE __attribute__((noescape))
+#else
+# define XXH_NOESCAPE
+#endif
+
+
+/*!
+ * @}
+ * @ingroup public
+ * @{
+ */
+
+#ifndef XXH_NO_LONG_LONG
+/*-**********************************************************************
+*  64-bit hash
+************************************************************************/
+#if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
+/*!
+ * @brief An unsigned 64-bit integer.
+ *
+ * Not necessarily defined to `uint64_t` but functionally equivalent.
+ */
+typedef uint64_t XXH64_hash_t;
+#elif !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#  include <stdint.h>
+   typedef uint64_t XXH64_hash_t;
+#else
+#  include <limits.h>
+#  if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
+     /* LP64 ABI says uint64_t is unsigned long */
+     typedef unsigned long XXH64_hash_t;
+#  else
+     /* the following type must have a width of 64-bit */
+     typedef unsigned long long XXH64_hash_t;
+#  endif
+#endif
+
+/*!
+ * @}
+ *
+ * @defgroup XXH64_family XXH64 family
+ * @ingroup public
+ * @{
+ * Contains functions used in the classic 64-bit xxHash algorithm.
+ *
+ * @note
+ *   XXH3 provides competitive speed for both 32-bit and 64-bit systems,
+ *   and offers true 64/128 bit hash results.
+ *   It provides better speed for systems with vector processing capabilities.
+ */
+
+/*!
+ * @brief Calculates the 64-bit hash of @p input using xxHash64.
+ *
+ * This function usually runs faster on 64-bit systems, but slower on 32-bit
+ * systems (see benchmark).
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 64-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit hash.
+ *
+ * @see
+ *    XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
+ *    Direct equivalents for the other variants of xxHash.
+ * @see
+ *    XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
+
+/*******   Streaming   *******/
+#ifndef XXH_NO_STREAM
+/*!
+ * @brief The opaque state struct for the XXH64 streaming API.
+ *
+ * @see XXH64_state_s for details.
+ */
+typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
+XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
+/*******   Canonical representation   *******/
+typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src);
+
+#ifndef XXH_NO_XXH3
+
+/*!
+ * @}
+ * ************************************************************************
+ * @defgroup XXH3_family XXH3 family
+ * @ingroup public
+ * @{
+ *
+ * XXH3 is a more recent hash algorithm featuring:
+ *  - Improved speed for both small and large inputs
+ *  - True 64-bit and 128-bit outputs
+ *  - SIMD acceleration
+ *  - Improved 32-bit viability
+ *
+ * Speed analysis methodology is explained here:
+ *
+ *    https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
+ *
+ * Compared to XXH64, expect XXH3 to run approximately
+ * ~2x faster on large inputs and >3x faster on small ones,
+ * exact differences vary depending on platform.
+ *
+ * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
+ * but does not require it.
+ * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3
+ * at competitive speeds, even without vector support. Further details are
+ * explained in the implementation.
+ *
+ * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,
+ * ZVector and scalar targets. This can be controlled via the @ref XXH_VECTOR
+ * macro. For the x86 family, an automatic dispatcher is included separately
+ * in @ref xxh_x86dispatch.c.
+ *
+ * XXH3 implementation is portable:
+ * it has a generic C90 formulation that can be compiled on any platform,
+ * all implementations generage exactly the same hash value on all platforms.
+ * Starting from v0.8.0, it's also labelled "stable", meaning that
+ * any future version will also generate the same hash value.
+ *
+ * XXH3 offers 2 variants, _64bits and _128bits.
+ *
+ * When only 64 bits are needed, prefer invoking the _64bits variant, as it
+ * reduces the amount of mixing, resulting in faster speed on small inputs.
+ * It's also generally simpler to manipulate a scalar return type than a struct.
+ *
+ * The API supports one-shot hashing, streaming mode, and custom secrets.
+ */
+/*-**********************************************************************
+*  XXH3 64-bit variant
+************************************************************************/
+
+/*!
+ * @brief 64-bit unseeded variant of XXH3.
+ *
+ * This is equivalent to @ref XXH3_64bits_withSeed() with a seed of 0, however
+ * it may have slightly better performance due to constant propagation of the
+ * defaults.
+ *
+ * @see
+ *    XXH32(), XXH64(), XXH3_128bits(): equivalent for the other xxHash algorithms
+ * @see
+ *    XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants
+ * @see
+ *    XXH3_64bits_reset(), XXH3_64bits_update(), XXH3_64bits_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length);
+
+/*!
+ * @brief 64-bit seeded variant of XXH3
+ *
+ * This variant generates a custom secret on the fly based on default secret
+ * altered using the `seed` value.
+ *
+ * While this operation is decently fast, note that it's not completely free.
+ *
+ * @note
+ *    seed == 0 produces the same results as @ref XXH3_64bits().
+ *
+ * @param input The data to hash
+ * @param length The length
+ * @param seed The 64-bit seed to alter the state.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
+
+/*!
+ * The bare minimum size for a custom secret.
+ *
+ * @see
+ *  XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(),
+ *  XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret().
+ */
+#define XXH3_SECRET_SIZE_MIN 136
+
+/*!
+ * @brief 64-bit variant of XXH3 with a custom "secret".
+ *
+ * It's possible to provide any blob of bytes as a "secret" to generate the hash.
+ * This makes it more difficult for an external actor to prepare an intentional collision.
+ * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
+ * However, the quality of the secret impacts the dispersion of the hash algorithm.
+ * Therefore, the secret _must_ look like a bunch of random bytes.
+ * Avoid "trivial" or structured data such as repeated sequences or a text document.
+ * Whenever in doubt about the "randomness" of the blob of bytes,
+ * consider employing "XXH3_generateSecret()" instead (see below).
+ * It will generate a proper high entropy secret derived from the blob of bytes.
+ * Another advantage of using XXH3_generateSecret() is that
+ * it guarantees that all bits within the initial blob of bytes
+ * will impact every bit of the output.
+ * This is not necessarily the case when using the blob of bytes directly
+ * because, when hashing _small_ inputs, only a portion of the secret is employed.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
+
+
+/*******   Streaming   *******/
+#ifndef XXH_NO_STREAM
+/*
+ * Streaming requires state maintenance.
+ * This operation costs memory and CPU.
+ * As a consequence, streaming is slower than one-shot hashing.
+ * For better performance, prefer one-shot functions whenever applicable.
+ */
+
+/*!
+ * @brief The state struct for the XXH3 streaming API.
+ *
+ * @see XXH3_state_s for details.
+ */
+typedef struct XXH3_state_s XXH3_state_t;
+XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
+XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state);
+
+/*
+ * XXH3_64bits_reset():
+ * Initialize with default parameters.
+ * digest will be equivalent to `XXH3_64bits()`.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
+/*
+ * XXH3_64bits_reset_withSeed():
+ * Generate a custom secret from `seed`, and store it into `statePtr`.
+ * digest will be equivalent to `XXH3_64bits_withSeed()`.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
+/*!
+ * XXH3_64bits_reset_withSecret():
+ * `secret` is referenced, it _must outlive_ the hash streaming session.
+ * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
+ * and the quality of produced hash values depends on secret's entropy
+ * (secret's content should look like a bunch of random bytes).
+ * When in doubt about the randomness of a candidate `secret`,
+ * consider employing `XXH3_generateSecret()` instead (see below).
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
+
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t  XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
+
+/* note : canonical representation of XXH3 is the same as XXH64
+ * since they both produce XXH64_hash_t values */
+
+
+/*-**********************************************************************
+*  XXH3 128-bit variant
+************************************************************************/
+
+/*!
+ * @brief The return value from 128-bit hashes.
+ *
+ * Stored in little endian order, although the fields themselves are in native
+ * endianness.
+ */
+typedef struct {
+    XXH64_hash_t low64;   /*!< `value & 0xFFFFFFFFFFFFFFFF` */
+    XXH64_hash_t high64;  /*!< `value >> 64` */
+} XXH128_hash_t;
+
+/*!
+ * @brief Unseeded 128-bit variant of XXH3
+ *
+ * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead
+ * for shorter inputs.
+ *
+ * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of 0, however
+ * it may have slightly better performance due to constant propagation of the
+ * defaults.
+ *
+ * @see
+ *    XXH32(), XXH64(), XXH3_64bits(): equivalent for the other xxHash algorithms
+ * @see
+ *    XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
+ * @see
+ *    XXH3_128bits_reset(), XXH3_128bits_update(), XXH3_128bits_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len);
+/*! @brief Seeded 128-bit variant of XXH3. @see XXH3_64bits_withSeed(). */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
+/*! @brief Custom secret 128-bit variant of XXH3. @see XXH3_64bits_withSecret(). */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
+
+/*******   Streaming   *******/
+#ifndef XXH_NO_STREAM
+/*
+ * Streaming requires state maintenance.
+ * This operation costs memory and CPU.
+ * As a consequence, streaming is slower than one-shot hashing.
+ * For better performance, prefer one-shot functions whenever applicable.
+ *
+ * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
+ * Use already declared XXH3_createState() and XXH3_freeState().
+ *
+ * All reset and streaming functions have same meaning as their 64-bit counterpart.
+ */
+
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
+
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
+
+/* Following helper functions make it possible to compare XXH128_hast_t values.
+ * Since XXH128_hash_t is a structure, this capability is not offered by the language.
+ * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
+
+/*!
+ * XXH128_isEqual():
+ * Return: 1 if `h1` and `h2` are equal, 0 if they are not.
+ */
+XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
+
+/*!
+ * @brief Compares two @ref XXH128_hash_t
+ * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
+ *
+ * @return: >0 if *h128_1  > *h128_2
+ *          =0 if *h128_1 == *h128_2
+ *          <0 if *h128_1  < *h128_2
+ */
+XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
+
+
+/*******   Canonical representation   *******/
+typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
+XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash);
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src);
+
+
+#endif  /* !XXH_NO_XXH3 */
+#endif  /* XXH_NO_LONG_LONG */
+
+/*!
+ * @}
+ */
+#endif /* XXHASH_H_5627135585666179 */
+
+
+
+#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
+#define XXHASH_H_STATIC_13879238742
+/* ****************************************************************************
+ * This section contains declarations which are not guaranteed to remain stable.
+ * They may change in future versions, becoming incompatible with a different
+ * version of the library.
+ * These declarations should only be used with static linking.
+ * Never use them in association with dynamic linking!
+ ***************************************************************************** */
+
+/*
+ * These definitions are only present to allow static allocation
+ * of XXH states, on stack or in a struct, for example.
+ * Never **ever** access their members directly.
+ */
+
+/*!
+ * @internal
+ * @brief Structure for XXH32 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * Typedef'd to @ref XXH32_state_t.
+ * Do not access the members of this struct directly.
+ * @see XXH64_state_s, XXH3_state_s
+ */
+struct XXH32_state_s {
+   XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
+   XXH32_hash_t large_len;    /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
+   XXH32_hash_t v[4];         /*!< Accumulator lanes */
+   XXH32_hash_t mem32[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
+   XXH32_hash_t memsize;      /*!< Amount of data in @ref mem32 */
+   XXH32_hash_t reserved;     /*!< Reserved field. Do not read nor write to it. */
+};   /* typedef'd to XXH32_state_t */
+
+
+#ifndef XXH_NO_LONG_LONG  /* defined when there is no 64-bit support */
+
+/*!
+ * @internal
+ * @brief Structure for XXH64 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * Typedef'd to @ref XXH64_state_t.
+ * Do not access the members of this struct directly.
+ * @see XXH32_state_s, XXH3_state_s
+ */
+struct XXH64_state_s {
+   XXH64_hash_t total_len;    /*!< Total length hashed. This is always 64-bit. */
+   XXH64_hash_t v[4];         /*!< Accumulator lanes */
+   XXH64_hash_t mem64[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
+   XXH32_hash_t memsize;      /*!< Amount of data in @ref mem64 */
+   XXH32_hash_t reserved32;   /*!< Reserved field, needed for padding anyways*/
+   XXH64_hash_t reserved64;   /*!< Reserved field. Do not read or write to it. */
+};   /* typedef'd to XXH64_state_t */
+
+#ifndef XXH_NO_XXH3
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
+#  include <stdalign.h>
+#  define XXH_ALIGN(n)      alignas(n)
+#elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
+/* In C++ alignas() is a keyword */
+#  define XXH_ALIGN(n)      alignas(n)
+#elif defined(__GNUC__)
+#  define XXH_ALIGN(n)      __attribute__ ((aligned(n)))
+#elif defined(_MSC_VER)
+#  define XXH_ALIGN(n)      __declspec(align(n))
+#else
+#  define XXH_ALIGN(n)   /* disabled */
+#endif
+
+/* Old GCC versions only accept the attribute after the type in structures. */
+#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))   /* C11+ */ \
+    && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \
+    && defined(__GNUC__)
+#   define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
+#else
+#   define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
+#endif
+
+/*!
+ * @brief The size of the internal XXH3 buffer.
+ *
+ * This is the optimal update size for incremental hashing.
+ *
+ * @see XXH3_64b_update(), XXH3_128b_update().
+ */
+#define XXH3_INTERNALBUFFER_SIZE 256
+
+/*!
+ * @brief Default size of the secret buffer (and @ref XXH3_kSecret).
+ *
+ * This is the size used in @ref XXH3_kSecret and the seeded functions.
+ *
+ * Not to be confused with @ref XXH3_SECRET_SIZE_MIN.
+ */
+#define XXH3_SECRET_DEFAULT_SIZE 192
+
+/*!
+ * @internal
+ * @brief Structure for XXH3 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined.
+ * Otherwise it is an opaque type.
+ * Never use this definition in combination with dynamic library.
+ * This allows fields to safely be changed in the future.
+ *
+ * @note ** This structure has a strict alignment requirement of 64 bytes!! **
+ * Do not allocate this with `malloc()` or `new`,
+ * it will not be sufficiently aligned.
+ * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation.
+ *
+ * Typedef'd to @ref XXH3_state_t.
+ * Do never access the members of this struct directly.
+ *
+ * @see XXH3_INITSTATE() for stack initialization.
+ * @see XXH3_createState(), XXH3_freeState().
+ * @see XXH32_state_s, XXH64_state_s
+ */
+struct XXH3_state_s {
+   XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
+       /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */
+   XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
+       /*!< Used to store a custom secret generated from a seed. */
+   XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
+       /*!< The internal buffer. @see XXH32_state_s::mem32 */
+   XXH32_hash_t bufferedSize;
+       /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
+   XXH32_hash_t useSeed;
+       /*!< Reserved field. Needed for padding on 64-bit. */
+   size_t nbStripesSoFar;
+       /*!< Number or stripes processed. */
+   XXH64_hash_t totalLen;
+       /*!< Total length hashed. 64-bit even on 32-bit targets. */
+   size_t nbStripesPerBlock;
+       /*!< Number of stripes per block. */
+   size_t secretLimit;
+       /*!< Size of @ref customSecret or @ref extSecret */
+   XXH64_hash_t seed;
+       /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */
+   XXH64_hash_t reserved64;
+       /*!< Reserved field. */
+   const unsigned char* extSecret;
+       /*!< Reference to an external secret for the _withSecret variants, NULL
+        *   for other variants. */
+   /* note: there may be some padding at the end due to alignment on 64 bytes */
+}; /* typedef'd to XXH3_state_t */
+
+#undef XXH_ALIGN_MEMBER
+
+/*!
+ * @brief Initializes a stack-allocated `XXH3_state_s`.
+ *
+ * When the @ref XXH3_state_t structure is merely emplaced on stack,
+ * it should be initialized with XXH3_INITSTATE() or a memset()
+ * in case its first reset uses XXH3_NNbits_reset_withSeed().
+ * This init can be omitted if the first reset uses default or _withSecret mode.
+ * This operation isn't necessary when the state is created with XXH3_createState().
+ * Note that this doesn't prepare the state for a streaming operation,
+ * it's still necessary to use XXH3_NNbits_reset*() afterwards.
+ */
+#define XXH3_INITSTATE(XXH3_state_ptr)   { (XXH3_state_ptr)->seed = 0; }
+
+
+/*!
+ * simple alias to pre-selected XXH3_128bits variant
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
+
+
+/* ===   Experimental API   === */
+/* Symbols defined below must be considered tied to a specific library version. */
+
+/*!
+ * XXH3_generateSecret():
+ *
+ * Derive a high-entropy secret from any user-defined content, named customSeed.
+ * The generated secret can be used in combination with `*_withSecret()` functions.
+ * The `_withSecret()` variants are useful to provide a higher level of protection
+ * than 64-bit seed, as it becomes much more difficult for an external actor to
+ * guess how to impact the calculation logic.
+ *
+ * The function accepts as input a custom seed of any length and any content,
+ * and derives from it a high-entropy secret of length @p secretSize into an
+ * already allocated buffer @p secretBuffer.
+ *
+ * The generated secret can then be used with any `*_withSecret()` variant.
+ * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(),
+ * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret()
+ * are part of this list. They all accept a `secret` parameter
+ * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN)
+ * _and_ feature very high entropy (consist of random-looking bytes).
+ * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can
+ * be employed to ensure proper quality.
+ *
+ * @p customSeed can be anything. It can have any size, even small ones,
+ * and its content can be anything, even "poor entropy" sources such as a bunch
+ * of zeroes. The resulting `secret` will nonetheless provide all required qualities.
+ *
+ * @pre
+ *   - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN
+ *   - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
+ *
+ * Example code:
+ * @code{.c}
+ *    #include <stdio.h>
+ *    #include <stdlib.h>
+ *    #include <string.h>
+ *    #define XXH_STATIC_LINKING_ONLY // expose unstable API
+ *    #include "xxhash.h"
+ *    // Hashes argv[2] using the entropy from argv[1].
+ *    int main(int argc, char* argv[])
+ *    {
+ *        char secret[XXH3_SECRET_SIZE_MIN];
+ *        if (argv != 3) { return 1; }
+ *        XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1]));
+ *        XXH64_hash_t h = XXH3_64bits_withSecret(
+ *             argv[2], strlen(argv[2]),
+ *             secret, sizeof(secret)
+ *        );
+ *        printf("%016llx\n", (unsigned long long) h);
+ *    }
+ * @endcode
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);
+
+/*!
+ * @brief Generate the same secret as the _withSeed() variants.
+ *
+ * The generated secret can be used in combination with
+ *`*_withSecret()` and `_withSecretandSeed()` variants.
+ *
+ * Example C++ `std::string` hash class:
+ * @code{.cpp}
+ *    #include <string>
+ *    #define XXH_STATIC_LINKING_ONLY // expose unstable API
+ *    #include "xxhash.h"
+ *    // Slow, seeds each time
+ *    class HashSlow {
+ *        XXH64_hash_t seed;
+ *    public:
+ *        HashSlow(XXH64_hash_t s) : seed{s} {}
+ *        size_t operator()(const std::string& x) const {
+ *            return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)};
+ *        }
+ *    };
+ *    // Fast, caches the seeded secret for future uses.
+ *    class HashFast {
+ *        unsigned char secret[XXH3_SECRET_SIZE_MIN];
+ *    public:
+ *        HashFast(XXH64_hash_t s) {
+ *            XXH3_generateSecret_fromSeed(secret, seed);
+ *        }
+ *        size_t operator()(const std::string& x) const {
+ *            return size_t{
+ *                XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret))
+ *            };
+ *        }
+ *    };
+ * @endcode
+ * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes
+ * @param seed The seed to seed the state.
+ */
+XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
+
+/*!
+ * These variants generate hash values using either
+ * @p seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
+ * or @p secret for "large" keys (>= XXH3_MIDSIZE_MAX).
+ *
+ * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
+ * `_withSeed()` has to generate the secret on the fly for "large" keys.
+ * It's fast, but can be perceptible for "not so large" keys (< 1 KB).
+ * `_withSecret()` has to generate the masks on the fly for "small" keys,
+ * which requires more instructions than _withSeed() variants.
+ * Therefore, _withSecretandSeed variant combines the best of both worlds.
+ *
+ * When @p secret has been generated by XXH3_generateSecret_fromSeed(),
+ * this variant produces *exactly* the same results as `_withSeed()` variant,
+ * hence offering only a pure speed benefit on "large" input,
+ * by skipping the need to regenerate the secret for every large input.
+ *
+ * Another usage scenario is to hash the secret to a 64-bit hash value,
+ * for example with XXH3_64bits(), which then becomes the seed,
+ * and then employ both the seed and the secret in _withSecretandSeed().
+ * On top of speed, an added benefit is that each bit in the secret
+ * has a 50% chance to swap each bit in the output, via its impact to the seed.
+ *
+ * This is not guaranteed when using the secret directly in "small data" scenarios,
+ * because only portions of the secret are employed for small data.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
+XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
+                              XXH_NOESCAPE const void* secret, size_t secretSize,
+                              XXH64_hash_t seed);
+/*! @copydoc XXH3_64bits_withSecretandSeed() */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
+XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
+                               XXH_NOESCAPE const void* secret, size_t secretSize,
+                               XXH64_hash_t seed64);
+#ifndef XXH_NO_STREAM
+/*! @copydoc XXH3_64bits_withSecretandSeed() */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                    XXH_NOESCAPE const void* secret, size_t secretSize,
+                                    XXH64_hash_t seed64);
+/*! @copydoc XXH3_64bits_withSecretandSeed() */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                     XXH_NOESCAPE const void* secret, size_t secretSize,
+                                     XXH64_hash_t seed64);
+#endif /* !XXH_NO_STREAM */
+
+#endif  /* !XXH_NO_XXH3 */
+#endif  /* XXH_NO_LONG_LONG */
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+#  define XXH_IMPLEMENTATION
+#endif
+
+#endif  /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
+
+
+/* ======================================================================== */
+/* ======================================================================== */
+/* ======================================================================== */
+
+
+/*-**********************************************************************
+ * xxHash implementation
+ *-**********************************************************************
+ * xxHash's implementation used to be hosted inside xxhash.c.
+ *
+ * However, inlining requires implementation to be visible to the compiler,
+ * hence be included alongside the header.
+ * Previously, implementation was hosted inside xxhash.c,
+ * which was then #included when inlining was activated.
+ * This construction created issues with a few build and install systems,
+ * as it required xxhash.c to be stored in /include directory.
+ *
+ * xxHash implementation is now directly integrated within xxhash.h.
+ * As a consequence, xxhash.c is no longer needed in /include.
+ *
+ * xxhash.c is still available and is still useful.
+ * In a "normal" setup, when xxhash is not inlined,
+ * xxhash.h only exposes the prototypes and public symbols,
+ * while xxhash.c can be built into an object file xxhash.o
+ * which can then be linked into the final binary.
+ ************************************************************************/
+
+#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
+   || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
+#  define XXH_IMPLEM_13a8737387
+
+/* *************************************
+*  Tuning parameters
+***************************************/
+
+/*!
+ * @defgroup tuning Tuning parameters
+ * @{
+ *
+ * Various macros to control xxHash's behavior.
+ */
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Define this to disable 64-bit code.
+ *
+ * Useful if only using the @ref XXH32_family and you have a strict C90 compiler.
+ */
+#  define XXH_NO_LONG_LONG
+#  undef XXH_NO_LONG_LONG /* don't actually */
+/*!
+ * @brief Controls how unaligned memory is accessed.
+ *
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is
+ * safe and portable.
+ *
+ * Unfortunately, on some target/compiler combinations, the generated assembly
+ * is sub-optimal.
+ *
+ * The below switch allow selection of a different access method
+ * in the search for improved performance.
+ *
+ * @par Possible options:
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`
+ *   @par
+ *     Use `memcpy()`. Safe and portable. Note that most modern compilers will
+ *     eliminate the function call and treat it as an unaligned access.
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
+ *   @par
+ *     Depends on compiler extensions and is therefore not portable.
+ *     This method is safe _if_ your compiler supports it,
+ *     and *generally* as fast or faster than `memcpy`.
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast
+ *  @par
+ *     Casts directly and dereferences. This method doesn't depend on the
+ *     compiler, but it violates the C standard as it directly dereferences an
+ *     unaligned pointer. It can generate buggy code on targets which do not
+ *     support unaligned memory accesses, but in some circumstances, it's the
+ *     only known way to get the most performance.
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift
+ *  @par
+ *     Also portable. This can generate the best code on old compilers which don't
+ *     inline small `memcpy()` calls, and it might also be faster on big-endian
+ *     systems which lack a native byteswap instruction. However, some compilers
+ *     will emit literal byteshifts even if the target supports unaligned access.
+ *  .
+ *
+ * @warning
+ *   Methods 1 and 2 rely on implementation-defined behavior. Use these with
+ *   care, as what works on one compiler/platform/optimization level may cause
+ *   another to read garbage data or even crash.
+ *
+ * See https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
+ *
+ * Prefer these methods in priority order (0 > 3 > 1 > 2)
+ */
+#  define XXH_FORCE_MEMORY_ACCESS 0
+
+/*!
+ * @def XXH_SIZE_OPT
+ * @brief Controls how much xxHash optimizes for size.
+ *
+ * xxHash, when compiled, tends to result in a rather large binary size. This
+ * is mostly due to heavy usage to forced inlining and constant folding of the
+ * @ref XXH3_family to increase performance.
+ *
+ * However, some developers prefer size over speed. This option can
+ * significantly reduce the size of the generated code. When using the `-Os`
+ * or `-Oz` options on GCC or Clang, this is defined to 1 by default,
+ * otherwise it is defined to 0.
+ *
+ * Most of these size optimizations can be controlled manually.
+ *
+ * This is a number from 0-2.
+ *  - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed
+ *    comes first.
+ *  - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more
+ *    conservative and disables hacks that increase code size. It implies the
+ *    options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,
+ *    and @ref XXH3_NEON_LANES == 8 if they are not already defined.
+ *  - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.
+ *    Performance may cry. For example, the single shot functions just use the
+ *    streaming API.
+ */
+#  define XXH_SIZE_OPT 0
+
+/*!
+ * @def XXH_FORCE_ALIGN_CHECK
+ * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
+ * and XXH64() only).
+ *
+ * This is an important performance trick for architectures without decent
+ * unaligned memory access performance.
+ *
+ * It checks for input alignment, and when conditions are met, uses a "fast
+ * path" employing direct 32-bit/64-bit reads, resulting in _dramatically
+ * faster_ read speed.
+ *
+ * The check costs one initial branch per hash, which is generally negligible,
+ * but not zero.
+ *
+ * Moreover, it's not useful to generate an additional code path if memory
+ * access uses the same instruction for both aligned and unaligned
+ * addresses (e.g. x86 and aarch64).
+ *
+ * In these cases, the alignment check can be removed by setting this macro to 0.
+ * Then the code will always use unaligned memory access.
+ * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips
+ * which are platforms known to offer good unaligned memory accesses performance.
+ *
+ * It is also disabled by default when @ref XXH_SIZE_OPT >= 1.
+ *
+ * This option does not affect XXH3 (only XXH32 and XXH64).
+ */
+#  define XXH_FORCE_ALIGN_CHECK 0
+
+/*!
+ * @def XXH_NO_INLINE_HINTS
+ * @brief When non-zero, sets all functions to `static`.
+ *
+ * By default, xxHash tries to force the compiler to inline almost all internal
+ * functions.
+ *
+ * This can usually improve performance due to reduced jumping and improved
+ * constant folding, but significantly increases the size of the binary which
+ * might not be favorable.
+ *
+ * Additionally, sometimes the forced inlining can be detrimental to performance,
+ * depending on the architecture.
+ *
+ * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
+ * compiler full control on whether to inline or not.
+ *
+ * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if
+ * @ref XXH_SIZE_OPT >= 1, this will automatically be defined.
+ */
+#  define XXH_NO_INLINE_HINTS 0
+
+/*!
+ * @def XXH32_ENDJMP
+ * @brief Whether to use a jump for `XXH32_finalize`.
+ *
+ * For performance, `XXH32_finalize` uses multiple branches in the finalizer.
+ * This is generally preferable for performance,
+ * but depending on exact architecture, a jmp may be preferable.
+ *
+ * This setting is only possibly making a difference for very small inputs.
+ */
+#  define XXH32_ENDJMP 0
+
+/*!
+ * @internal
+ * @brief Redefines old internal names.
+ *
+ * For compatibility with code that uses xxHash's internals before the names
+ * were changed to improve namespacing. There is no other reason to use this.
+ */
+#  define XXH_OLD_NAMES
+#  undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
+
+/*!
+ * @def XXH_NO_STREAM
+ * @brief Disables the streaming API.
+ *
+ * When xxHash is not inlined and the streaming functions are not used, disabling
+ * the streaming functions can improve code size significantly, especially with
+ * the @ref XXH3_family which tends to make constant folded copies of itself.
+ */
+#  define XXH_NO_STREAM
+#  undef XXH_NO_STREAM /* don't actually */
+#endif /* XXH_DOXYGEN */
+/*!
+ * @}
+ */
+
+#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+   /* prefer __packed__ structures (method 1) for GCC
+    * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
+    * which for some reason does unaligned loads. */
+#  if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
+#    define XXH_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+#ifndef XXH_SIZE_OPT
+   /* default to 1 for -Os or -Oz */
+#  if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
+#    define XXH_SIZE_OPT 1
+#  else
+#    define XXH_SIZE_OPT 0
+#  endif
+#endif
+
+#ifndef XXH_FORCE_ALIGN_CHECK  /* can be defined externally */
+   /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
+#  if XXH_SIZE_OPT >= 1 || \
+      defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
+   || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64)    || defined(_M_ARM) \
+   || defined(__loongarch64) /* visual */
+#    define XXH_FORCE_ALIGN_CHECK 0
+#  else
+#    define XXH_FORCE_ALIGN_CHECK 1
+#  endif
+#endif
+
+#ifndef XXH_NO_INLINE_HINTS
+#  if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__)  /* -O0, -fno-inline */
+#    define XXH_NO_INLINE_HINTS 1
+#  else
+#    define XXH_NO_INLINE_HINTS 0
+#  endif
+#endif
+
+#ifndef XXH32_ENDJMP
+/* generally preferable for performance */
+#  define XXH32_ENDJMP 0
+#endif
+
+/*!
+ * @defgroup impl Implementation
+ * @{
+ */
+
+
+/* *************************************
+*  Includes & Memory related functions
+***************************************/
+#if defined(XXH_NO_STREAM)
+/* nothing */
+#elif defined(XXH_NO_STDLIB)
+
+/* When requesting to disable any mention of stdlib,
+ * the library loses the ability to invoked malloc / free.
+ * In practice, it means that functions like `XXH*_createState()`
+ * will always fail, and return NULL.
+ * This flag is useful in situations where
+ * xxhash.h is integrated into some kernel, embedded or limited environment
+ * without access to dynamic allocation.
+ */
+
+static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
+static void XXH_free(void* p) { (void)p; }
+
+#else
+
+/*
+ * Modify the local functions below should you wish to use
+ * different memory routines for malloc() and free()
+ */
+#include <stdlib.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than malloc().
+ */
+static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than free().
+ */
+static void XXH_free(void* p) { free(p); }
+
+#endif  /* XXH_NO_STDLIB */
+
+#include <string.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than memcpy().
+ */
+static void* XXH_memcpy(void* dest, const void* src, size_t size)
+{
+    return memcpy(dest,src,size);
+}
+
+#include <limits.h>   /* ULLONG_MAX */
+
+
+/* *************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER /* Visual Studio warning fix */
+#  pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+#endif
+
+#if XXH_NO_INLINE_HINTS  /* disable inlining hints */
+#  if defined(__GNUC__) || defined(__clang__)
+#    define XXH_FORCE_INLINE static __attribute__((unused))
+#  else
+#    define XXH_FORCE_INLINE static
+#  endif
+#  define XXH_NO_INLINE static
+/* enable inlining hints */
+#elif defined(__GNUC__) || defined(__clang__)
+#  define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
+#  define XXH_NO_INLINE static __attribute__((noinline))
+#elif defined(_MSC_VER)  /* Visual Studio */
+#  define XXH_FORCE_INLINE static __forceinline
+#  define XXH_NO_INLINE static __declspec(noinline)
+#elif defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* C99 */
+#  define XXH_FORCE_INLINE static inline
+#  define XXH_NO_INLINE static
+#else
+#  define XXH_FORCE_INLINE static
+#  define XXH_NO_INLINE static
+#endif
+
+
+
+/* *************************************
+*  Debug
+***************************************/
+/*!
+ * @ingroup tuning
+ * @def XXH_DEBUGLEVEL
+ * @brief Sets the debugging level.
+ *
+ * XXH_DEBUGLEVEL is expected to be defined externally, typically via the
+ * compiler's command line options. The value must be a number.
+ */
+#ifndef XXH_DEBUGLEVEL
+#  ifdef DEBUGLEVEL /* backwards compat */
+#    define XXH_DEBUGLEVEL DEBUGLEVEL
+#  else
+#    define XXH_DEBUGLEVEL 0
+#  endif
+#endif
+
+#if (XXH_DEBUGLEVEL>=1)
+#  include <assert.h>   /* note: can still be disabled with NDEBUG */
+#  define XXH_ASSERT(c)   assert(c)
+#else
+#  define XXH_ASSERT(c)   XXH_ASSUME(c)
+#endif
+
+/* note: use after variable declarations */
+#ifndef XXH_STATIC_ASSERT
+#  if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)    /* C11 */
+#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
+#  elif defined(__cplusplus) && (__cplusplus >= 201103L)            /* C++11 */
+#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
+#  else
+#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
+#  endif
+#  define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
+#endif
+
+/*!
+ * @internal
+ * @def XXH_COMPILER_GUARD(var)
+ * @brief Used to prevent unwanted optimizations for @p var.
+ *
+ * It uses an empty GCC inline assembly statement with a register constraint
+ * which forces @p var into a general purpose register (eg eax, ebx, ecx
+ * on x86) and marks it as modified.
+ *
+ * This is used in a few places to avoid unwanted autovectorization (e.g.
+ * XXH32_round()). All vectorization we want is explicit via intrinsics,
+ * and _usually_ isn't wanted elsewhere.
+ *
+ * We also use it to prevent unwanted constant folding for AArch64 in
+ * XXH3_initCustomSecret_scalar().
+ */
+#if defined(__GNUC__) || defined(__clang__)
+#  define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
+#else
+#  define XXH_COMPILER_GUARD(var) ((void)0)
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#  define XXH_COMPILER_GUARD_W(var) __asm__ __volatile__("" : "+w" (var))
+#else
+#  define XXH_COMPILER_GUARD_W(var) ((void)0)
+#endif
+
+/* *************************************
+*  Basic Types
+***************************************/
+#if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+  typedef uint8_t xxh_u8;
+#else
+  typedef unsigned char xxh_u8;
+#endif
+typedef XXH32_hash_t xxh_u32;
+
+#ifdef XXH_OLD_NAMES
+#  define BYTE xxh_u8
+#  define U8   xxh_u8
+#  define U32  xxh_u32
+#endif
+
+/* ***   Memory access   *** */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_read32(const void* ptr)
+ * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit native endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readLE32(const void* ptr)
+ * @brief Reads an unaligned 32-bit little endian integer from @p ptr.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit little endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readBE32(const void* ptr)
+ * @brief Reads an unaligned 32-bit big endian integer from @p ptr.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit big endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align)
+ * @brief Like @ref XXH_readLE32(), but has an option for aligned reads.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is
+ * always @ref XXH_alignment::XXH_unaligned.
+ *
+ * @param ptr The pointer to read from.
+ * @param align Whether @p ptr is aligned.
+ * @pre
+ *   If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte
+ *   aligned.
+ * @return The 32-bit little endian integer from the bytes at @p ptr.
+ */
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+/*
+ * Manual byteshift. Best for old compilers which don't inline memcpy.
+ * We actually directly use XXH_readLE32 and XXH_readBE32.
+ */
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/*
+ * Force direct memory access. Only works on CPU which support unaligned memory
+ * access in hardware.
+ */
+static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/*
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
+ * documentation claimed that it only increased the alignment, but actually it
+ * can decrease it on gcc, clang, and icc:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
+ * https://gcc.godbolt.org/z/xYez1j67Y.
+ */
+#ifdef XXH_OLD_NAMES
+typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
+#endif
+static xxh_u32 XXH_read32(const void* ptr)
+{
+    typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
+    return *((const xxh_unalign32*)ptr);
+}
+
+#else
+
+/*
+ * Portable and safe solution. Generally efficient.
+ * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
+ */
+static xxh_u32 XXH_read32(const void* memPtr)
+{
+    xxh_u32 val;
+    XXH_memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ***   Endianness   *** */
+
+/*!
+ * @ingroup tuning
+ * @def XXH_CPU_LITTLE_ENDIAN
+ * @brief Whether the target is little endian.
+ *
+ * Defined to 1 if the target is little endian, or 0 if it is big endian.
+ * It can be defined externally, for example on the compiler command line.
+ *
+ * If it is not defined,
+ * a runtime check (which is usually constant folded) is used instead.
+ *
+ * @note
+ *   This is not necessarily defined to an integer constant.
+ *
+ * @see XXH_isLittleEndian() for the runtime check.
+ */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+/*
+ * Try to detect endianness automatically, to avoid the nonstandard behavior
+ * in `XXH_isLittleEndian()`
+ */
+#  if defined(_WIN32) /* Windows is always little endian */ \
+     || defined(__LITTLE_ENDIAN__) \
+     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#    define XXH_CPU_LITTLE_ENDIAN 1
+#  elif defined(__BIG_ENDIAN__) \
+     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#    define XXH_CPU_LITTLE_ENDIAN 0
+#  else
+/*!
+ * @internal
+ * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN.
+ *
+ * Most compilers will constant fold this.
+ */
+static int XXH_isLittleEndian(void)
+{
+    /*
+     * Portable and well-defined behavior.
+     * Don't use static: it is detrimental to performance.
+     */
+    const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };
+    return one.c[0];
+}
+#   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
+#  endif
+#endif
+
+
+
+
+/* ****************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#ifdef __has_builtin
+#  define XXH_HAS_BUILTIN(x) __has_builtin(x)
+#else
+#  define XXH_HAS_BUILTIN(x) 0
+#endif
+
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L)
+/* C23 and future versions have standard "unreachable()" */
+#  include <stddef.h>
+#  define XXH_UNREACHABLE() unreachable()
+
+#elif defined(__cplusplus) && (__cplusplus > 202002L)
+/* C++23 and future versions have std::unreachable() */
+#  include <utility> /* std::unreachable() */
+#  define XXH_UNREACHABLE() std::unreachable()
+
+#elif XXH_HAS_BUILTIN(__builtin_unreachable)
+#  define XXH_UNREACHABLE() __builtin_unreachable()
+
+#elif defined(_MSC_VER)
+#  define XXH_UNREACHABLE() __assume(0)
+
+#else
+#  define XXH_UNREACHABLE()
+#endif
+
+#define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
+
+/*!
+ * @internal
+ * @def XXH_rotl32(x,r)
+ * @brief 32-bit rotate left.
+ *
+ * @param x The 32-bit integer to be rotated.
+ * @param r The number of bits to rotate.
+ * @pre
+ *   @p r > 0 && @p r < 32
+ * @note
+ *   @p x and @p r may be evaluated multiple times.
+ * @return The rotated result.
+ */
+#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
+                               && XXH_HAS_BUILTIN(__builtin_rotateleft64)
+#  define XXH_rotl32 __builtin_rotateleft32
+#  define XXH_rotl64 __builtin_rotateleft64
+/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
+#elif defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+#  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
+#endif
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_swap32(xxh_u32 x)
+ * @brief A 32-bit byteswap.
+ *
+ * @param x The 32-bit integer to byteswap.
+ * @return @p x, byteswapped.
+ */
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap32 _byteswap_ulong
+#elif XXH_GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#else
+static xxh_u32 XXH_swap32 (xxh_u32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+#endif
+
+
+/* ***************************
+*  Memory reads
+*****************************/
+
+/*!
+ * @internal
+ * @brief Enum to indicate whether a pointer is aligned.
+ */
+typedef enum {
+    XXH_aligned,  /*!< Aligned */
+    XXH_unaligned /*!< Possibly unaligned */
+} XXH_alignment;
+
+/*
+ * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
+ *
+ * This is ideal for older compilers which don't inline memcpy.
+ */
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+
+XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)
+{
+    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+    return bytePtr[0]
+         | ((xxh_u32)bytePtr[1] << 8)
+         | ((xxh_u32)bytePtr[2] << 16)
+         | ((xxh_u32)bytePtr[3] << 24);
+}
+
+XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)
+{
+    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+    return bytePtr[3]
+         | ((xxh_u32)bytePtr[2] << 8)
+         | ((xxh_u32)bytePtr[1] << 16)
+         | ((xxh_u32)bytePtr[0] << 24);
+}
+
+#else
+XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+}
+
+static xxh_u32 XXH_readBE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+#endif
+
+XXH_FORCE_INLINE xxh_u32
+XXH_readLE32_align(const void* ptr, XXH_alignment align)
+{
+    if (align==XXH_unaligned) {
+        return XXH_readLE32(ptr);
+    } else {
+        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
+    }
+}
+
+
+/* *************************************
+*  Misc
+***************************************/
+/*! @ingroup public */
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* *******************************************************************
+*  32-bit hash functions
+*********************************************************************/
+/*!
+ * @}
+ * @defgroup XXH32_impl XXH32 implementation
+ * @ingroup impl
+ *
+ * Details on the XXH32 implementation.
+ * @{
+ */
+ /* #define instead of static const, to be used as initializers */
+#define XXH_PRIME32_1  0x9E3779B1U  /*!< 0b10011110001101110111100110110001 */
+#define XXH_PRIME32_2  0x85EBCA77U  /*!< 0b10000101111010111100101001110111 */
+#define XXH_PRIME32_3  0xC2B2AE3DU  /*!< 0b11000010101100101010111000111101 */
+#define XXH_PRIME32_4  0x27D4EB2FU  /*!< 0b00100111110101001110101100101111 */
+#define XXH_PRIME32_5  0x165667B1U  /*!< 0b00010110010101100110011110110001 */
+
+#ifdef XXH_OLD_NAMES
+#  define PRIME32_1 XXH_PRIME32_1
+#  define PRIME32_2 XXH_PRIME32_2
+#  define PRIME32_3 XXH_PRIME32_3
+#  define PRIME32_4 XXH_PRIME32_4
+#  define PRIME32_5 XXH_PRIME32_5
+#endif
+
+/*!
+ * @internal
+ * @brief Normal stripe processing routine.
+ *
+ * This shuffles the bits so that any bit from @p input impacts several bits in
+ * @p acc.
+ *
+ * @param acc The accumulator lane.
+ * @param input The stripe of input to mix.
+ * @return The mixed accumulator lane.
+ */
+static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
+{
+    acc += input * XXH_PRIME32_2;
+    acc  = XXH_rotl32(acc, 13);
+    acc *= XXH_PRIME32_1;
+#if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+    /*
+     * UGLY HACK:
+     * A compiler fence is the only thing that prevents GCC and Clang from
+     * autovectorizing the XXH32 loop (pragmas and attributes don't work for some
+     * reason) without globally disabling SSE4.1.
+     *
+     * The reason we want to avoid vectorization is because despite working on
+     * 4 integers at a time, there are multiple factors slowing XXH32 down on
+     * SSE4:
+     * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
+     *   newer chips!) making it slightly slower to multiply four integers at
+     *   once compared to four integers independently. Even when pmulld was
+     *   fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE
+     *   just to multiply unless doing a long operation.
+     *
+     * - Four instructions are required to rotate,
+     *      movqda tmp,  v // not required with VEX encoding
+     *      pslld  tmp, 13 // tmp <<= 13
+     *      psrld  v,   19 // x >>= 19
+     *      por    v,  tmp // x |= tmp
+     *   compared to one for scalar:
+     *      roll   v, 13    // reliably fast across the board
+     *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason
+     *
+     * - Instruction level parallelism is actually more beneficial here because
+     *   the SIMD actually serializes this operation: While v1 is rotating, v2
+     *   can load data, while v3 can multiply. SSE forces them to operate
+     *   together.
+     *
+     * This is also enabled on AArch64, as Clang autovectorizes it incorrectly
+     * and it is pointless writing a NEON implementation that is basically the
+     * same speed as scalar for XXH32.
+     */
+    XXH_COMPILER_GUARD(acc);
+#endif
+    return acc;
+}
+
+/*!
+ * @internal
+ * @brief Mixes all bits to finalize the hash.
+ *
+ * The final mix ensures that all input bits have a chance to impact any bit in
+ * the output digest, resulting in an unbiased distribution.
+ *
+ * @param hash The hash to avalanche.
+ * @return The avalanched hash.
+ */
+static xxh_u32 XXH32_avalanche(xxh_u32 hash)
+{
+    hash ^= hash >> 15;
+    hash *= XXH_PRIME32_2;
+    hash ^= hash >> 13;
+    hash *= XXH_PRIME32_3;
+    hash ^= hash >> 16;
+    return hash;
+}
+
+#define XXH_get32bits(p) XXH_readLE32_align(p, align)
+
+/*!
+ * @internal
+ * @brief Processes the last 0-15 bytes of @p ptr.
+ *
+ * There may be up to 15 bytes remaining to consume from the input.
+ * This final stage will digest them to ensure that all input bytes are present
+ * in the final mix.
+ *
+ * @param hash The hash to finalize.
+ * @param ptr The pointer to the remaining input.
+ * @param len The remaining length, modulo 16.
+ * @param align Whether @p ptr is aligned.
+ * @return The finalized hash.
+ * @see XXH64_finalize().
+ */
+static XXH_PUREF xxh_u32
+XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
+{
+#define XXH_PROCESS1 do {                             \
+    hash += (*ptr++) * XXH_PRIME32_5;                 \
+    hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1;      \
+} while (0)
+
+#define XXH_PROCESS4 do {                             \
+    hash += XXH_get32bits(ptr) * XXH_PRIME32_3;       \
+    ptr += 4;                                         \
+    hash  = XXH_rotl32(hash, 17) * XXH_PRIME32_4;     \
+} while (0)
+
+    if (ptr==NULL) XXH_ASSERT(len == 0);
+
+    /* Compact rerolled version; generally faster */
+    if (!XXH32_ENDJMP) {
+        len &= 15;
+        while (len >= 4) {
+            XXH_PROCESS4;
+            len -= 4;
+        }
+        while (len > 0) {
+            XXH_PROCESS1;
+            --len;
+        }
+        return XXH32_avalanche(hash);
+    } else {
+         switch(len&15) /* or switch(bEnd - p) */ {
+           case 12:      XXH_PROCESS4;
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 8:       XXH_PROCESS4;
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 4:       XXH_PROCESS4;
+                         return XXH32_avalanche(hash);
+
+           case 13:      XXH_PROCESS4;
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 9:       XXH_PROCESS4;
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 5:       XXH_PROCESS4;
+                         XXH_PROCESS1;
+                         return XXH32_avalanche(hash);
+
+           case 14:      XXH_PROCESS4;
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 10:      XXH_PROCESS4;
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 6:       XXH_PROCESS4;
+                         XXH_PROCESS1;
+                         XXH_PROCESS1;
+                         return XXH32_avalanche(hash);
+
+           case 15:      XXH_PROCESS4;
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 11:      XXH_PROCESS4;
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 7:       XXH_PROCESS4;
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 3:       XXH_PROCESS1;
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 2:       XXH_PROCESS1;
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 1:       XXH_PROCESS1;
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 0:       return XXH32_avalanche(hash);
+        }
+        XXH_ASSERT(0);
+        return hash;   /* reaching this point is deemed impossible */
+    }
+}
+
+#ifdef XXH_OLD_NAMES
+#  define PROCESS1 XXH_PROCESS1
+#  define PROCESS4 XXH_PROCESS4
+#else
+#  undef XXH_PROCESS1
+#  undef XXH_PROCESS4
+#endif
+
+/*!
+ * @internal
+ * @brief The implementation for @ref XXH32().
+ *
+ * @param input , len , seed Directly passed from @ref XXH32().
+ * @param align Whether @p input is aligned.
+ * @return The calculated hash.
+ */
+XXH_FORCE_INLINE XXH_PUREF xxh_u32
+XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
+{
+    xxh_u32 h32;
+
+    if (input==NULL) XXH_ASSERT(len == 0);
+
+    if (len>=16) {
+        const xxh_u8* const bEnd = input + len;
+        const xxh_u8* const limit = bEnd - 15;
+        xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+        xxh_u32 v2 = seed + XXH_PRIME32_2;
+        xxh_u32 v3 = seed + 0;
+        xxh_u32 v4 = seed - XXH_PRIME32_1;
+
+        do {
+            v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
+            v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
+            v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
+            v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
+        } while (input < limit);
+
+        h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)
+            + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    } else {
+        h32  = seed + XXH_PRIME32_5;
+    }
+
+    h32 += (xxh_u32)len;
+
+    return XXH32_finalize(h32, input, len&15, align);
+}
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
+{
+#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_state_t state;
+    XXH32_reset(&state, seed);
+    XXH32_update(&state, (const xxh_u8*)input, len);
+    return XXH32_digest(&state);
+#else
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
+            return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
+    }   }
+
+    return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
+#endif
+}
+
+
+
+/*******   Hash streaming   *******/
+#ifndef XXH_NO_STREAM
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
+{
+    XXH_memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
+{
+    XXH_ASSERT(statePtr != NULL);
+    memset(statePtr, 0, sizeof(*statePtr));
+    statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+    statePtr->v[1] = seed + XXH_PRIME32_2;
+    statePtr->v[2] = seed + 0;
+    statePtr->v[3] = seed - XXH_PRIME32_1;
+    return XXH_OK;
+}
+
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH32_update(XXH32_state_t* state, const void* input, size_t len)
+{
+    if (input==NULL) {
+        XXH_ASSERT(len == 0);
+        return XXH_OK;
+    }
+
+    {   const xxh_u8* p = (const xxh_u8*)input;
+        const xxh_u8* const bEnd = p + len;
+
+        state->total_len_32 += (XXH32_hash_t)len;
+        state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
+
+        if (state->memsize + len < 16)  {   /* fill in tmp buffer */
+            XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
+            state->memsize += (XXH32_hash_t)len;
+            return XXH_OK;
+        }
+
+        if (state->memsize) {   /* some data left from previous update */
+            XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
+            {   const xxh_u32* p32 = state->mem32;
+                state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
+                state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
+                state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
+                state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
+            }
+            p += 16-state->memsize;
+            state->memsize = 0;
+        }
+
+        if (p <= bEnd-16) {
+            const xxh_u8* const limit = bEnd - 16;
+
+            do {
+                state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
+                state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
+                state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
+                state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
+            } while (p<=limit);
+
+        }
+
+        if (p < bEnd) {
+            XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+            state->memsize = (unsigned)(bEnd-p);
+        }
+    }
+
+    return XXH_OK;
+}
+
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
+{
+    xxh_u32 h32;
+
+    if (state->large_len) {
+        h32 = XXH_rotl32(state->v[0], 1)
+            + XXH_rotl32(state->v[1], 7)
+            + XXH_rotl32(state->v[2], 12)
+            + XXH_rotl32(state->v[3], 18);
+    } else {
+        h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
+    }
+
+    h32 += state->total_len_32;
+
+    return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
+}
+#endif /* !XXH_NO_STREAM */
+
+/*******   Canonical representation   *******/
+
+/*!
+ * @ingroup XXH32_family
+ * The default return values from XXH functions are unsigned 32 and 64 bit
+ * integers.
+ *
+ * The canonical representation uses big endian convention, the same convention
+ * as human-readable numbers (large digits first).
+ *
+ * This way, hash values can be written into a file or buffer, remaining
+ * comparable across different systems.
+ *
+ * The following functions allow transformation of hash values to and from their
+ * canonical format.
+ */
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+    XXH_memcpy(dst, &hash, sizeof(*dst));
+}
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+    return XXH_readBE32(src);
+}
+
+
+#ifndef XXH_NO_LONG_LONG
+
+/* *******************************************************************
+*  64-bit hash functions
+*********************************************************************/
+/*!
+ * @}
+ * @ingroup impl
+ * @{
+ */
+/*******   Memory access   *******/
+
+typedef XXH64_hash_t xxh_u64;
+
+#ifdef XXH_OLD_NAMES
+#  define U64 xxh_u64
+#endif
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+/*
+ * Manual byteshift. Best for old compilers which don't inline memcpy.
+ * We actually directly use XXH_readLE64 and XXH_readBE64.
+ */
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static xxh_u64 XXH_read64(const void* memPtr)
+{
+    return *(const xxh_u64*) memPtr;
+}
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/*
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
+ * documentation claimed that it only increased the alignment, but actually it
+ * can decrease it on gcc, clang, and icc:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
+ * https://gcc.godbolt.org/z/xYez1j67Y.
+ */
+#ifdef XXH_OLD_NAMES
+typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
+#endif
+static xxh_u64 XXH_read64(const void* ptr)
+{
+    typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
+    return *((const xxh_unalign64*)ptr);
+}
+
+#else
+
+/*
+ * Portable and safe solution. Generally efficient.
+ * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
+ */
+static xxh_u64 XXH_read64(const void* memPtr)
+{
+    xxh_u64 val;
+    XXH_memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap64 _byteswap_uint64
+#elif XXH_GCC_VERSION >= 403
+#  define XXH_swap64 __builtin_bswap64
+#else
+static xxh_u64 XXH_swap64(xxh_u64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+
+XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)
+{
+    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+    return bytePtr[0]
+         | ((xxh_u64)bytePtr[1] << 8)
+         | ((xxh_u64)bytePtr[2] << 16)
+         | ((xxh_u64)bytePtr[3] << 24)
+         | ((xxh_u64)bytePtr[4] << 32)
+         | ((xxh_u64)bytePtr[5] << 40)
+         | ((xxh_u64)bytePtr[6] << 48)
+         | ((xxh_u64)bytePtr[7] << 56);
+}
+
+XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)
+{
+    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+    return bytePtr[7]
+         | ((xxh_u64)bytePtr[6] << 8)
+         | ((xxh_u64)bytePtr[5] << 16)
+         | ((xxh_u64)bytePtr[4] << 24)
+         | ((xxh_u64)bytePtr[3] << 32)
+         | ((xxh_u64)bytePtr[2] << 40)
+         | ((xxh_u64)bytePtr[1] << 48)
+         | ((xxh_u64)bytePtr[0] << 56);
+}
+
+#else
+XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+}
+
+static xxh_u64 XXH_readBE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+#endif
+
+XXH_FORCE_INLINE xxh_u64
+XXH_readLE64_align(const void* ptr, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return XXH_readLE64(ptr);
+    else
+        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
+}
+
+
+/*******   xxh64   *******/
+/*!
+ * @}
+ * @defgroup XXH64_impl XXH64 implementation
+ * @ingroup impl
+ *
+ * Details on the XXH64 implementation.
+ * @{
+ */
+/* #define rather that static const, to be used as initializers */
+#define XXH_PRIME64_1  0x9E3779B185EBCA87ULL  /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
+#define XXH_PRIME64_2  0xC2B2AE3D27D4EB4FULL  /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
+#define XXH_PRIME64_3  0x165667B19E3779F9ULL  /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
+#define XXH_PRIME64_4  0x85EBCA77C2B2AE63ULL  /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
+#define XXH_PRIME64_5  0x27D4EB2F165667C5ULL  /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
+
+#ifdef XXH_OLD_NAMES
+#  define PRIME64_1 XXH_PRIME64_1
+#  define PRIME64_2 XXH_PRIME64_2
+#  define PRIME64_3 XXH_PRIME64_3
+#  define PRIME64_4 XXH_PRIME64_4
+#  define PRIME64_5 XXH_PRIME64_5
+#endif
+
+/*! @copydoc XXH32_round */
+static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
+{
+    acc += input * XXH_PRIME64_2;
+    acc  = XXH_rotl64(acc, 31);
+    acc *= XXH_PRIME64_1;
+    return acc;
+}
+
+static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
+{
+    val  = XXH64_round(0, val);
+    acc ^= val;
+    acc  = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
+    return acc;
+}
+
+/*! @copydoc XXH32_avalanche */
+static xxh_u64 XXH64_avalanche(xxh_u64 hash)
+{
+    hash ^= hash >> 33;
+    hash *= XXH_PRIME64_2;
+    hash ^= hash >> 29;
+    hash *= XXH_PRIME64_3;
+    hash ^= hash >> 32;
+    return hash;
+}
+
+
+#define XXH_get64bits(p) XXH_readLE64_align(p, align)
+
+/*!
+ * @internal
+ * @brief Processes the last 0-31 bytes of @p ptr.
+ *
+ * There may be up to 31 bytes remaining to consume from the input.
+ * This final stage will digest them to ensure that all input bytes are present
+ * in the final mix.
+ *
+ * @param hash The hash to finalize.
+ * @param ptr The pointer to the remaining input.
+ * @param len The remaining length, modulo 32.
+ * @param align Whether @p ptr is aligned.
+ * @return The finalized hash
+ * @see XXH32_finalize().
+ */
+static XXH_PUREF xxh_u64
+XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
+{
+    if (ptr==NULL) XXH_ASSERT(len == 0);
+    len &= 31;
+    while (len >= 8) {
+        xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
+        ptr += 8;
+        hash ^= k1;
+        hash  = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
+        len -= 8;
+    }
+    if (len >= 4) {
+        hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
+        ptr += 4;
+        hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
+        len -= 4;
+    }
+    while (len > 0) {
+        hash ^= (*ptr++) * XXH_PRIME64_5;
+        hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
+        --len;
+    }
+    return  XXH64_avalanche(hash);
+}
+
+#ifdef XXH_OLD_NAMES
+#  define PROCESS1_64 XXH_PROCESS1_64
+#  define PROCESS4_64 XXH_PROCESS4_64
+#  define PROCESS8_64 XXH_PROCESS8_64
+#else
+#  undef XXH_PROCESS1_64
+#  undef XXH_PROCESS4_64
+#  undef XXH_PROCESS8_64
+#endif
+
+/*!
+ * @internal
+ * @brief The implementation for @ref XXH64().
+ *
+ * @param input , len , seed Directly passed from @ref XXH64().
+ * @param align Whether @p input is aligned.
+ * @return The calculated hash.
+ */
+XXH_FORCE_INLINE XXH_PUREF xxh_u64
+XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
+{
+    xxh_u64 h64;
+    if (input==NULL) XXH_ASSERT(len == 0);
+
+    if (len>=32) {
+        const xxh_u8* const bEnd = input + len;
+        const xxh_u8* const limit = bEnd - 31;
+        xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+        xxh_u64 v2 = seed + XXH_PRIME64_2;
+        xxh_u64 v3 = seed + 0;
+        xxh_u64 v4 = seed - XXH_PRIME64_1;
+
+        do {
+            v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
+            v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
+            v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
+            v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
+        } while (input<limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+
+    } else {
+        h64  = seed + XXH_PRIME64_5;
+    }
+
+    h64 += (xxh_u64) len;
+
+    return XXH64_finalize(h64, input, len, align);
+}
+
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
+{
+#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_state_t state;
+    XXH64_reset(&state, seed);
+    XXH64_update(&state, (const xxh_u8*)input, len);
+    return XXH64_digest(&state);
+#else
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
+            return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
+    }   }
+
+    return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
+
+#endif
+}
+
+/*******   Hash Streaming   *******/
+#ifndef XXH_NO_STREAM
+/*! @ingroup XXH64_family*/
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState)
+{
+    XXH_memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed)
+{
+    XXH_ASSERT(statePtr != NULL);
+    memset(statePtr, 0, sizeof(*statePtr));
+    statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+    statePtr->v[1] = seed + XXH_PRIME64_2;
+    statePtr->v[2] = seed + 0;
+    statePtr->v[3] = seed - XXH_PRIME64_1;
+    return XXH_OK;
+}
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)
+{
+    if (input==NULL) {
+        XXH_ASSERT(len == 0);
+        return XXH_OK;
+    }
+
+    {   const xxh_u8* p = (const xxh_u8*)input;
+        const xxh_u8* const bEnd = p + len;
+
+        state->total_len += len;
+
+        if (state->memsize + len < 32) {  /* fill in tmp buffer */
+            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
+            state->memsize += (xxh_u32)len;
+            return XXH_OK;
+        }
+
+        if (state->memsize) {   /* tmp buffer is full */
+            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
+            state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
+            state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
+            state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
+            state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
+            p += 32 - state->memsize;
+            state->memsize = 0;
+        }
+
+        if (p+32 <= bEnd) {
+            const xxh_u8* const limit = bEnd - 32;
+
+            do {
+                state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
+                state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
+                state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
+                state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
+            } while (p<=limit);
+
+        }
+
+        if (p < bEnd) {
+            XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+            state->memsize = (unsigned)(bEnd-p);
+        }
+    }
+
+    return XXH_OK;
+}
+
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state)
+{
+    xxh_u64 h64;
+
+    if (state->total_len >= 32) {
+        h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
+        h64 = XXH64_mergeRound(h64, state->v[0]);
+        h64 = XXH64_mergeRound(h64, state->v[1]);
+        h64 = XXH64_mergeRound(h64, state->v[2]);
+        h64 = XXH64_mergeRound(h64, state->v[3]);
+    } else {
+        h64  = state->v[2] /*seed*/ + XXH_PRIME64_5;
+    }
+
+    h64 += (xxh_u64) state->total_len;
+
+    return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
+}
+#endif /* !XXH_NO_STREAM */
+
+/******* Canonical representation   *******/
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+    XXH_memcpy(dst, &hash, sizeof(*dst));
+}
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src)
+{
+    return XXH_readBE64(src);
+}
+
+#ifndef XXH_NO_XXH3
+
+/* *********************************************************************
+*  XXH3
+*  New generation hash designed for speed on small keys and vectorization
+************************************************************************ */
+/*!
+ * @}
+ * @defgroup XXH3_impl XXH3 implementation
+ * @ingroup impl
+ * @{
+ */
+
+/* ===   Compiler specifics   === */
+
+#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
+#  define XXH_RESTRICT /* disable */
+#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */
+#  define XXH_RESTRICT   restrict
+#else
+/* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */
+#  define XXH_RESTRICT   /* disable */
+#endif
+
+#if (defined(__GNUC__) && (__GNUC__ >= 3))  \
+  || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
+  || defined(__clang__)
+#    define XXH_likely(x) __builtin_expect(x, 1)
+#    define XXH_unlikely(x) __builtin_expect(x, 0)
+#else
+#    define XXH_likely(x) (x)
+#    define XXH_unlikely(x) (x)
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#  if defined(__ARM_FEATURE_SVE)
+#    include <arm_sve.h>
+#  elif defined(__ARM_NEON__) || defined(__ARM_NEON) \
+   || defined(__aarch64__)  || defined(_M_ARM) \
+   || defined(_M_ARM64)     || defined(_M_ARM64EC)
+#    define inline __inline__  /* circumvent a clang bug */
+#    include <arm_neon.h>
+#    undef inline
+#  elif defined(__AVX2__)
+#    include <immintrin.h>
+#  elif defined(__SSE2__)
+#    include <emmintrin.h>
+#  endif
+#endif
+
+#if defined(_MSC_VER)
+#  include <intrin.h>
+#endif
+
+/*
+ * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
+ * remaining a true 64-bit/128-bit hash function.
+ *
+ * This is done by prioritizing a subset of 64-bit operations that can be
+ * emulated without too many steps on the average 32-bit machine.
+ *
+ * For example, these two lines seem similar, and run equally fast on 64-bit:
+ *
+ *   xxh_u64 x;
+ *   x ^= (x >> 47); // good
+ *   x ^= (x >> 13); // bad
+ *
+ * However, to a 32-bit machine, there is a major difference.
+ *
+ * x ^= (x >> 47) looks like this:
+ *
+ *   x.lo ^= (x.hi >> (47 - 32));
+ *
+ * while x ^= (x >> 13) looks like this:
+ *
+ *   // note: funnel shifts are not usually cheap.
+ *   x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
+ *   x.hi ^= (x.hi >> 13);
+ *
+ * The first one is significantly faster than the second, simply because the
+ * shift is larger than 32. This means:
+ *  - All the bits we need are in the upper 32 bits, so we can ignore the lower
+ *    32 bits in the shift.
+ *  - The shift result will always fit in the lower 32 bits, and therefore,
+ *    we can ignore the upper 32 bits in the xor.
+ *
+ * Thanks to this optimization, XXH3 only requires these features to be efficient:
+ *
+ *  - Usable unaligned access
+ *  - A 32-bit or 64-bit ALU
+ *      - If 32-bit, a decent ADC instruction
+ *  - A 32 or 64-bit multiply with a 64-bit result
+ *  - For the 128-bit variant, a decent byteswap helps short inputs.
+ *
+ * The first two are already required by XXH32, and almost all 32-bit and 64-bit
+ * platforms which can run XXH32 can run XXH3 efficiently.
+ *
+ * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
+ * notable exception.
+ *
+ * First of all, Thumb-1 lacks support for the UMULL instruction which
+ * performs the important long multiply. This means numerous __aeabi_lmul
+ * calls.
+ *
+ * Second of all, the 8 functional registers are just not enough.
+ * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
+ * Lo registers, and this shuffling results in thousands more MOVs than A32.
+ *
+ * A32 and T32 don't have this limitation. They can access all 14 registers,
+ * do a 32->64 multiply with UMULL, and the flexible operand allowing free
+ * shifts is helpful, too.
+ *
+ * Therefore, we do a quick sanity check.
+ *
+ * If compiling Thumb-1 for a target which supports ARM instructions, we will
+ * emit a warning, as it is not a "sane" platform to compile for.
+ *
+ * Usually, if this happens, it is because of an accident and you probably need
+ * to specify -march, as you likely meant to compile for a newer architecture.
+ *
+ * Credit: large sections of the vectorial and asm source code paths
+ *         have been contributed by @easyaspi314
+ */
+#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
+#   warning "XXH3 is highly inefficient without ARM or Thumb-2."
+#endif
+
+/* ==========================================
+ * Vectorization detection
+ * ========================================== */
+
+#ifdef XXH_DOXYGEN
+/*!
+ * @ingroup tuning
+ * @brief Overrides the vectorization implementation chosen for XXH3.
+ *
+ * Can be defined to 0 to disable SIMD or any of the values mentioned in
+ * @ref XXH_VECTOR_TYPE.
+ *
+ * If this is not defined, it uses predefined macros to determine the best
+ * implementation.
+ */
+#  define XXH_VECTOR XXH_SCALAR
+/*!
+ * @ingroup tuning
+ * @brief Possible values for @ref XXH_VECTOR.
+ *
+ * Note that these are actually implemented as macros.
+ *
+ * If this is not defined, it is detected automatically.
+ * @ref XXH_X86DISPATCH overrides this.
+ */
+enum XXH_VECTOR_TYPE /* fake enum */ {
+    XXH_SCALAR = 0,  /*!< Portable scalar version */
+    XXH_SSE2   = 1,  /*!<
+                      * SSE2 for Pentium 4, Opteron, all x86_64.
+                      *
+                      * @note SSE2 is also guaranteed on Windows 10, macOS, and
+                      * Android x86.
+                      */
+    XXH_AVX2   = 2,  /*!< AVX2 for Haswell and Bulldozer */
+    XXH_AVX512 = 3,  /*!< AVX512 for Skylake and Icelake */
+    XXH_NEON   = 4,  /*!< NEON for most ARMv7-A and all AArch64 */
+    XXH_VSX    = 5,  /*!< VSX and ZVector for POWER8/z13 (64-bit) */
+    XXH_SVE    = 6,  /*!< SVE for some ARMv8-A and ARMv9-A */
+};
+/*!
+ * @ingroup tuning
+ * @brief Selects the minimum alignment for XXH3's accumulators.
+ *
+ * When using SIMD, this should match the alignment required for said vector
+ * type, so, for example, 32 for AVX2.
+ *
+ * Default: Auto detected.
+ */
+#  define XXH_ACC_ALIGN 8
+#endif
+
+/* Actual definition */
+#ifndef XXH_DOXYGEN
+#  define XXH_SCALAR 0
+#  define XXH_SSE2   1
+#  define XXH_AVX2   2
+#  define XXH_AVX512 3
+#  define XXH_NEON   4
+#  define XXH_VSX    5
+#  define XXH_SVE    6
+#endif
+
+#ifndef XXH_VECTOR    /* can be defined on command line */
+#  if defined(__ARM_FEATURE_SVE)
+#    define XXH_VECTOR XXH_SVE
+#  elif ( \
+        defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
+     || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
+   ) && ( \
+        defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
+    || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
+   )
+#    define XXH_VECTOR XXH_NEON
+#  elif defined(__AVX512F__)
+#    define XXH_VECTOR XXH_AVX512
+#  elif defined(__AVX2__)
+#    define XXH_VECTOR XXH_AVX2
+#  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
+#    define XXH_VECTOR XXH_SSE2
+#  elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
+     || (defined(__s390x__) && defined(__VEC__)) \
+     && defined(__GNUC__) /* TODO: IBM XL */
+#    define XXH_VECTOR XXH_VSX
+#  else
+#    define XXH_VECTOR XXH_SCALAR
+#  endif
+#endif
+
+/* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
+#if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
+#  ifdef _MSC_VER
+#    pragma warning(once : 4606)
+#  else
+#    warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
+#  endif
+#  undef XXH_VECTOR
+#  define XXH_VECTOR XXH_SCALAR
+#endif
+
+/*
+ * Controls the alignment of the accumulator,
+ * for compatibility with aligned vector loads, which are usually faster.
+ */
+#ifndef XXH_ACC_ALIGN
+#  if defined(XXH_X86DISPATCH)
+#     define XXH_ACC_ALIGN 64  /* for compatibility with avx512 */
+#  elif XXH_VECTOR == XXH_SCALAR  /* scalar */
+#     define XXH_ACC_ALIGN 8
+#  elif XXH_VECTOR == XXH_SSE2  /* sse2 */
+#     define XXH_ACC_ALIGN 16
+#  elif XXH_VECTOR == XXH_AVX2  /* avx2 */
+#     define XXH_ACC_ALIGN 32
+#  elif XXH_VECTOR == XXH_NEON  /* neon */
+#     define XXH_ACC_ALIGN 16
+#  elif XXH_VECTOR == XXH_VSX   /* vsx */
+#     define XXH_ACC_ALIGN 16
+#  elif XXH_VECTOR == XXH_AVX512  /* avx512 */
+#     define XXH_ACC_ALIGN 64
+#  elif XXH_VECTOR == XXH_SVE   /* sve */
+#     define XXH_ACC_ALIGN 64
+#  endif
+#endif
+
+#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
+    || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
+#  define XXH_SEC_ALIGN XXH_ACC_ALIGN
+#elif XXH_VECTOR == XXH_SVE
+#  define XXH_SEC_ALIGN XXH_ACC_ALIGN
+#else
+#  define XXH_SEC_ALIGN 8
+#endif
+
+/*
+ * UGLY HACK:
+ * GCC usually generates the best code with -O3 for xxHash.
+ *
+ * However, when targeting AVX2, it is overzealous in its unrolling resulting
+ * in code roughly 3/4 the speed of Clang.
+ *
+ * There are other issues, such as GCC splitting _mm256_loadu_si256 into
+ * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
+ * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
+ *
+ * That is why when compiling the AVX2 version, it is recommended to use either
+ *   -O2 -mavx2 -march=haswell
+ * or
+ *   -O2 -mavx2 -mno-avx256-split-unaligned-load
+ * for decent performance, or to use Clang instead.
+ *
+ * Fortunately, we can control the first one with a pragma that forces GCC into
+ * -O2, but the other one we can't control without "failed to inline always
+ * inline function due to target mismatch" warnings.
+ */
+#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
+  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+  && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
+#  pragma GCC push_options
+#  pragma GCC optimize("-O2")
+#endif
+
+
+#if XXH_VECTOR == XXH_NEON
+/*
+ * NEON's setup for vmlal_u32 is a little more complicated than it is on
+ * SSE2, AVX2, and VSX.
+ *
+ * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an upcast.
+ *
+ * To do the same operation, the 128-bit 'Q' register needs to be split into
+ * two 64-bit 'D' registers, performing this operation::
+ *
+ *   [                a                 |                 b                ]
+ *            |              '---------. .--------'                |
+ *            |                         x                          |
+ *            |              .---------' '--------.                |
+ *   [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[    a >> 32     |     b >> 32    ]
+ *
+ * Due to significant changes in aarch64, the fastest method for aarch64 is
+ * completely different than the fastest method for ARMv7-A.
+ *
+ * ARMv7-A treats D registers as unions overlaying Q registers, so modifying
+ * D11 will modify the high half of Q5. This is similar to how modifying AH
+ * will only affect bits 8-15 of AX on x86.
+ *
+ * VZIP takes two registers, and puts even lanes in one register and odd lanes
+ * in the other.
+ *
+ * On ARMv7-A, this strangely modifies both parameters in place instead of
+ * taking the usual 3-operand form.
+ *
+ * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the
+ * lower and upper halves of the Q register to end up with the high and low
+ * halves where we want - all in one instruction.
+ *
+ *   vzip.32   d10, d11       @ d10 = { d10[0], d11[0] }; d11 = { d10[1], d11[1] }
+ *
+ * Unfortunately we need inline assembly for this: Instructions modifying two
+ * registers at once is not possible in GCC or Clang's IR, and they have to
+ * create a copy.
+ *
+ * aarch64 requires a different approach.
+ *
+ * In order to make it easier to write a decent compiler for aarch64, many
+ * quirks were removed, such as conditional execution.
+ *
+ * NEON was also affected by this.
+ *
+ * aarch64 cannot access the high bits of a Q-form register, and writes to a
+ * D-form register zero the high bits, similar to how writes to W-form scalar
+ * registers (or DWORD registers on x86_64) work.
+ *
+ * The formerly free vget_high intrinsics now require a vext (with a few
+ * exceptions)
+ *
+ * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the equivalent
+ * of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to only modify one
+ * operand.
+ *
+ * The equivalent of the VZIP.32 on the lower and upper halves would be this
+ * mess:
+ *
+ *   ext     v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] }
+ *   zip1    v1.2s, v0.2s, v2.2s     // v1 = { v0[0], v2[0] }
+ *   zip2    v0.2s, v0.2s, v1.2s     // v0 = { v0[1], v2[1] }
+ *
+ * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 (SHRN):
+ *
+ *   shrn    v1.2s, v0.2d, #32  // v1 = (uint32x2_t)(v0 >> 32);
+ *   xtn     v0.2s, v0.2d       // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF);
+ *
+ * This is available on ARMv7-A, but is less efficient than a single VZIP.32.
+ */
+
+/*!
+ * Function-like macro:
+ * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t &outHi)
+ * {
+ *     outLo = (uint32x2_t)(in & 0xFFFFFFFF);
+ *     outHi = (uint32x2_t)(in >> 32);
+ *     in = UNDEFINED;
+ * }
+ */
+# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
+   && (defined(__GNUC__) || defined(__clang__)) \
+   && (defined(__arm__) || defined(__thumb__) || defined(_M_ARM))
+#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                              \
+    do {                                                                                    \
+      /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
+      /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */     \
+      /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \
+      __asm__("vzip.32  %e0, %f0" : "+w" (in));                                             \
+      (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in));                                   \
+      (outHi) = vget_high_u32(vreinterpretq_u32_u64(in));                                   \
+   } while (0)
+# else
+#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                            \
+    do {                                                                                  \
+      (outLo) = vmovn_u64    (in);                                                        \
+      (outHi) = vshrn_n_u64  ((in), 32);                                                  \
+    } while (0)
+# endif
+
+/*!
+ * @internal
+ * @brief `vld1q_u64` but faster and alignment-safe.
+ *
+ * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
+ * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
+ *
+ * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
+ * prohibits load-store optimizations. Therefore, a direct dereference is used.
+ *
+ * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
+ * unaligned load.
+ */
+#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
+XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
+{
+    return *(uint64x2_t const*)ptr;
+}
+#else
+XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
+{
+    return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
+}
+#endif
+/*!
+ * @ingroup tuning
+ * @brief Controls the NEON to scalar ratio for XXH3
+ *
+ * On AArch64 when not optimizing for size, XXH3 will run 6 lanes using NEON and
+ * 2 lanes on scalar by default (except on Apple platforms, as Apple CPUs benefit
+ * from only using NEON).
+ *
+ * This can be set to 2, 4, 6, or 8. ARMv7 will default to all 8 NEON lanes, as the
+ * emulated 64-bit arithmetic is too slow.
+ *
+ * Modern ARM CPUs are _very_ sensitive to how their pipelines are used.
+ *
+ * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but it can't
+ * have more than 2 NEON (F0/F1) micro-ops. If you are only using NEON instructions,
+ * you are only using 2/3 of the CPU bandwidth.
+ *
+ * This is even more noticeable on the more advanced cores like the A76 which
+ * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
+ *
+ * Therefore, @ref XXH3_NEON_LANES lanes will be processed using NEON, and the
+ * remaining lanes will use scalar instructions. This improves the bandwidth
+ * and also gives the integer pipelines something to do besides twiddling loop
+ * counters and pointers.
+ *
+ * This change benefits CPUs with large micro-op buffers without negatively affecting
+ * most other CPUs:
+ *
+ *  | Chipset               | Dispatch type       | NEON only | 6:2 hybrid | Diff. |
+ *  |:----------------------|:--------------------|----------:|-----------:|------:|
+ *  | Snapdragon 730 (A76)  | 2 NEON/8 micro-ops  |  8.8 GB/s |  10.1 GB/s |  ~16% |
+ *  | Snapdragon 835 (A73)  | 2 NEON/3 micro-ops  |  5.1 GB/s |   5.3 GB/s |   ~5% |
+ *  | Marvell PXA1928 (A53) | In-order dual-issue |  1.9 GB/s |   1.9 GB/s |    0% |
+ *  | Apple M1              | 4 NEON/8 micro-ops  | 37.3 GB/s |  36.1 GB/s |  ~-3% |
+ *
+ * It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
+ *
+ * @see XXH3_accumulate_512_neon()
+ */
+# ifndef XXH3_NEON_LANES
+#  if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
+   && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
+#   define XXH3_NEON_LANES 6
+#  else
+#   define XXH3_NEON_LANES XXH_ACC_NB
+#  endif
+# endif
+#endif  /* XXH_VECTOR == XXH_NEON */
+
+/*
+ * VSX and Z Vector helpers.
+ *
+ * This is very messy, and any pull requests to clean this up are welcome.
+ *
+ * There are a lot of problems with supporting VSX and s390x, due to
+ * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
+ */
+#if XXH_VECTOR == XXH_VSX
+/* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
+ * and `pixel`. This is a problem for obvious reasons.
+ *
+ * These keywords are unnecessary; the spec literally says they are
+ * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
+ * after including the header.
+ *
+ * We use pragma push_macro/pop_macro to keep the namespace clean. */
+#  pragma push_macro("bool")
+#  pragma push_macro("vector")
+#  pragma push_macro("pixel")
+/* silence potential macro redefined warnings */
+#  undef bool
+#  undef vector
+#  undef pixel
+
+#  if defined(__s390x__)
+#    include <s390intrin.h>
+#  else
+#    include <altivec.h>
+#  endif
+
+/* Restore the original macro values, if applicable. */
+#  pragma pop_macro("pixel")
+#  pragma pop_macro("vector")
+#  pragma pop_macro("bool")
+
+typedef __vector unsigned long long xxh_u64x2;
+typedef __vector unsigned char xxh_u8x16;
+typedef __vector unsigned xxh_u32x4;
+
+# ifndef XXH_VSX_BE
+#  if defined(__BIG_ENDIAN__) \
+  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#    define XXH_VSX_BE 1
+#  elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+#    warning "-maltivec=be is not recommended. Please use native endianness."
+#    define XXH_VSX_BE 1
+#  else
+#    define XXH_VSX_BE 0
+#  endif
+# endif /* !defined(XXH_VSX_BE) */
+
+# if XXH_VSX_BE
+#  if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
+#    define XXH_vec_revb vec_revb
+#  else
+/*!
+ * A polyfill for POWER9's vec_revb().
+ */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
+{
+    xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
+                                  0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
+    return vec_perm(val, val, vByteSwap);
+}
+#  endif
+# endif /* XXH_VSX_BE */
+
+/*!
+ * Performs an unaligned vector load and byte swaps it on big endian.
+ */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
+{
+    xxh_u64x2 ret;
+    XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
+# if XXH_VSX_BE
+    ret = XXH_vec_revb(ret);
+# endif
+    return ret;
+}
+
+/*
+ * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
+ *
+ * These intrinsics weren't added until GCC 8, despite existing for a while,
+ * and they are endian dependent. Also, their meaning swap depending on version.
+ * */
+# if defined(__s390x__)
+ /* s390x is always big endian, no issue on this platform */
+#  define XXH_vec_mulo vec_mulo
+#  define XXH_vec_mule vec_mule
+# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
+/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
+ /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */
+#  define XXH_vec_mulo __builtin_altivec_vmulouw
+#  define XXH_vec_mule __builtin_altivec_vmuleuw
+# else
+/* gcc needs inline assembly */
+/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)
+{
+    xxh_u64x2 result;
+    __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+    return result;
+}
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
+{
+    xxh_u64x2 result;
+    __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+    return result;
+}
+# endif /* XXH_vec_mulo, XXH_vec_mule */
+#endif /* XXH_VECTOR == XXH_VSX */
+
+#if XXH_VECTOR == XXH_SVE
+#define ACCRND(acc, offset) \
+do { \
+    svuint64_t input_vec = svld1_u64(mask, xinput + offset);         \
+    svuint64_t secret_vec = svld1_u64(mask, xsecret + offset);       \
+    svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec);     \
+    svuint64_t swapped = svtbl_u64(input_vec, kSwap);                \
+    svuint64_t mixed_lo = svextw_u64_x(mask, mixed);                 \
+    svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32);            \
+    svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
+    acc = svadd_u64_x(mask, acc, mul);                               \
+} while (0)
+#endif /* XXH_VECTOR == XXH_SVE */
+
+
+/* prefetch
+ * can be disabled, by declaring XXH_NO_PREFETCH build macro */
+#if defined(XXH_NO_PREFETCH)
+#  define XXH_PREFETCH(ptr)  (void)(ptr)  /* disabled */
+#else
+#  if XXH_SIZE_OPT >= 1
+#    define XXH_PREFETCH(ptr) (void)(ptr)
+#  elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))  /* _mm_prefetch() not defined outside of x86/x64 */
+#    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+#    define XXH_PREFETCH(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+#  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+#    define XXH_PREFETCH(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#  else
+#    define XXH_PREFETCH(ptr) (void)(ptr)  /* disabled */
+#  endif
+#endif  /* XXH_NO_PREFETCH */
+
+
+/* ==========================================
+ * XXH3 default settings
+ * ========================================== */
+
+#define XXH_SECRET_DEFAULT_SIZE 192   /* minimum XXH3_SECRET_SIZE_MIN */
+
+#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
+#  error "default keyset is not large enough"
+#endif
+
+/*! Pseudorandom secret taken directly from FARSH. */
+XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
+    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
+    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
+    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
+    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
+    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
+    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
+    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
+    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
+    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
+    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
+    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
+    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
+};
+
+
+#ifdef XXH_OLD_NAMES
+#  define kSecret XXH3_kSecret
+#endif
+
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Calculates a 32-bit to 64-bit long multiply.
+ *
+ * Implemented as a macro.
+ *
+ * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't
+ * need to (but it shouldn't need to anyways, it is about 7 instructions to do
+ * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we
+ * use that instead of the normal method.
+ *
+ * If you are compiling for platforms like Thumb-1 and don't have a better option,
+ * you may also want to write your own long multiply routine here.
+ *
+ * @param x, y Numbers to be multiplied
+ * @return 64-bit product of the low 32 bits of @p x and @p y.
+ */
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64(xxh_u64 x, xxh_u64 y)
+{
+   return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
+}
+#elif defined(_MSC_VER) && defined(_M_IX86)
+#    define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
+#else
+/*
+ * Downcast + upcast is usually better than masking on older compilers like
+ * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
+ *
+ * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
+ * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
+ */
+#    define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
+#endif
+
+/*!
+ * @brief Calculates a 64->128-bit long multiply.
+ *
+ * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
+ * version.
+ *
+ * @param lhs , rhs The 64-bit integers to be multiplied
+ * @return The 128-bit result represented in an @ref XXH128_hash_t.
+ */
+static XXH128_hash_t
+XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
+{
+    /*
+     * GCC/Clang __uint128_t method.
+     *
+     * On most 64-bit targets, GCC and Clang define a __uint128_t type.
+     * This is usually the best way as it usually uses a native long 64-bit
+     * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
+     *
+     * Usually.
+     *
+     * Despite being a 32-bit platform, Clang (and emscripten) define this type
+     * despite not having the arithmetic for it. This results in a laggy
+     * compiler builtin call which calculates a full 128-bit multiply.
+     * In that case it is best to use the portable one.
+     * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
+     */
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
+    && defined(__SIZEOF_INT128__) \
+    || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
+
+    __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
+    XXH128_hash_t r128;
+    r128.low64  = (xxh_u64)(product);
+    r128.high64 = (xxh_u64)(product >> 64);
+    return r128;
+
+    /*
+     * MSVC for x64's _umul128 method.
+     *
+     * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
+     *
+     * This compiles to single operand MUL on x64.
+     */
+#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
+
+#ifndef _MSC_VER
+#   pragma intrinsic(_umul128)
+#endif
+    xxh_u64 product_high;
+    xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
+    XXH128_hash_t r128;
+    r128.low64  = product_low;
+    r128.high64 = product_high;
+    return r128;
+
+    /*
+     * MSVC for ARM64's __umulh method.
+     *
+     * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
+     */
+#elif defined(_M_ARM64) || defined(_M_ARM64EC)
+
+#ifndef _MSC_VER
+#   pragma intrinsic(__umulh)
+#endif
+    XXH128_hash_t r128;
+    r128.low64  = lhs * rhs;
+    r128.high64 = __umulh(lhs, rhs);
+    return r128;
+
+#else
+    /*
+     * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
+     *
+     * This is a fast and simple grade school multiply, which is shown below
+     * with base 10 arithmetic instead of base 0x100000000.
+     *
+     *           9 3 // D2 lhs = 93
+     *         x 7 5 // D2 rhs = 75
+     *     ----------
+     *           1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
+     *         4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
+     *         2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
+     *     + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
+     *     ---------
+     *         2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
+     *     + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
+     *     ---------
+     *       6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
+     *
+     * The reasons for adding the products like this are:
+     *  1. It avoids manual carry tracking. Just like how
+     *     (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
+     *     This avoids a lot of complexity.
+     *
+     *  2. It hints for, and on Clang, compiles to, the powerful UMAAL
+     *     instruction available in ARM's Digital Signal Processing extension
+     *     in 32-bit ARMv6 and later, which is shown below:
+     *
+     *         void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
+     *         {
+     *             xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
+     *             *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
+     *             *RdHi = (xxh_u32)(product >> 32);
+     *         }
+     *
+     *     This instruction was designed for efficient long multiplication, and
+     *     allows this to be calculated in only 4 instructions at speeds
+     *     comparable to some 64-bit ALUs.
+     *
+     *  3. It isn't terrible on other platforms. Usually this will be a couple
+     *     of 32-bit ADD/ADCs.
+     */
+
+    /* First calculate all of the cross products. */
+    xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
+    xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32,        rhs & 0xFFFFFFFF);
+    xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
+    xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32,        rhs >> 32);
+
+    /* Now add the products together. These will never overflow. */
+    xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
+    xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32)        + hi_hi;
+    xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
+
+    XXH128_hash_t r128;
+    r128.low64  = lower;
+    r128.high64 = upper;
+    return r128;
+#endif
+}
+
+/*!
+ * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.
+ *
+ * The reason for the separate function is to prevent passing too many structs
+ * around by value. This will hopefully inline the multiply, but we don't force it.
+ *
+ * @param lhs , rhs The 64-bit integers to multiply
+ * @return The low 64 bits of the product XOR'd by the high 64 bits.
+ * @see XXH_mult64to128()
+ */
+static xxh_u64
+XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
+{
+    XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
+    return product.low64 ^ product.high64;
+}
+
+/*! Seems to produce slightly better code on GCC for some reason. */
+XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
+{
+    XXH_ASSERT(0 <= shift && shift < 64);
+    return v64 ^ (v64 >> shift);
+}
+
+/*
+ * This is a fast avalanche stage,
+ * suitable when input bits are already partially mixed
+ */
+static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
+{
+    h64 = XXH_xorshift64(h64, 37);
+    h64 *= 0x165667919E3779F9ULL;
+    h64 = XXH_xorshift64(h64, 32);
+    return h64;
+}
+
+/*
+ * This is a stronger avalanche,
+ * inspired by Pelle Evensen's rrmxmx
+ * preferable when input has not been previously mixed
+ */
+static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
+{
+    /* this mix is inspired by Pelle Evensen's rrmxmx */
+    h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
+    h64 *= 0x9FB21C651E98DF25ULL;
+    h64 ^= (h64 >> 35) + len ;
+    h64 *= 0x9FB21C651E98DF25ULL;
+    return XXH_xorshift64(h64, 28);
+}
+
+
+/* ==========================================
+ * Short keys
+ * ==========================================
+ * One of the shortcomings of XXH32 and XXH64 was that their performance was
+ * sub-optimal on short lengths. It used an iterative algorithm which strongly
+ * favored lengths that were a multiple of 4 or 8.
+ *
+ * Instead of iterating over individual inputs, we use a set of single shot
+ * functions which piece together a range of lengths and operate in constant time.
+ *
+ * Additionally, the number of multiplies has been significantly reduced. This
+ * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
+ *
+ * Depending on the platform, this may or may not be faster than XXH32, but it
+ * is almost guaranteed to be faster than XXH64.
+ */
+
+/*
+ * At very short lengths, there isn't enough input to fully hide secrets, or use
+ * the entire secret.
+ *
+ * There is also only a limited amount of mixing we can do before significantly
+ * impacting performance.
+ *
+ * Therefore, we use different sections of the secret and always mix two secret
+ * samples with an XOR. This should have no effect on performance on the
+ * seedless or withSeed variants because everything _should_ be constant folded
+ * by modern compilers.
+ *
+ * The XOR mixing hides individual parts of the secret and increases entropy.
+ *
+ * This adds an extra layer of strength for custom secrets.
+ */
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
+XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(1 <= len && len <= 3);
+    XXH_ASSERT(secret != NULL);
+    /*
+     * len = 1: combined = { input[0], 0x01, input[0], input[0] }
+     * len = 2: combined = { input[1], 0x02, input[0], input[1] }
+     * len = 3: combined = { input[2], 0x03, input[0], input[1] }
+     */
+    {   xxh_u8  const c1 = input[0];
+        xxh_u8  const c2 = input[len >> 1];
+        xxh_u8  const c3 = input[len - 1];
+        xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2  << 24)
+                               | ((xxh_u32)c3 <<  0) | ((xxh_u32)len << 8);
+        xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
+        xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;
+        return XXH64_avalanche(keyed);
+    }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
+XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(4 <= len && len <= 8);
+    seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
+    {   xxh_u32 const input1 = XXH_readLE32(input);
+        xxh_u32 const input2 = XXH_readLE32(input + len - 4);
+        xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
+        xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);
+        xxh_u64 const keyed = input64 ^ bitflip;
+        return XXH3_rrmxmx(keyed, len);
+    }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
+XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(9 <= len && len <= 16);
+    {   xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
+        xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
+        xxh_u64 const input_lo = XXH_readLE64(input)           ^ bitflip1;
+        xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;
+        xxh_u64 const acc = len
+                          + XXH_swap64(input_lo) + input_hi
+                          + XXH3_mul128_fold64(input_lo, input_hi);
+        return XXH3_avalanche(acc);
+    }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
+XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(len <= 16);
+    {   if (XXH_likely(len >  8)) return XXH3_len_9to16_64b(input, len, secret, seed);
+        if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);
+        if (len) return XXH3_len_1to3_64b(input, len, secret, seed);
+        return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
+    }
+}
+
+/*
+ * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
+ * multiplication by zero, affecting hashes of lengths 17 to 240.
+ *
+ * However, they are very unlikely.
+ *
+ * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
+ * unseeded non-cryptographic hashes, it does not attempt to defend itself
+ * against specially crafted inputs, only random inputs.
+ *
+ * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
+ * cancelling out the secret is taken an arbitrary number of times (addressed
+ * in XXH3_accumulate_512), this collision is very unlikely with random inputs
+ * and/or proper seeding:
+ *
+ * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
+ * function that is only called up to 16 times per hash with up to 240 bytes of
+ * input.
+ *
+ * This is not too bad for a non-cryptographic hash function, especially with
+ * only 64 bit outputs.
+ *
+ * The 128-bit variant (which trades some speed for strength) is NOT affected
+ * by this, although it is always a good idea to use a proper seed if you care
+ * about strength.
+ */
+XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
+                                     const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
+{
+#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+  && defined(__i386__) && defined(__SSE2__)  /* x86 + SSE2 */ \
+  && !defined(XXH_ENABLE_AUTOVECTORIZE)      /* Define to disable like XXH32 hack */
+    /*
+     * UGLY HACK:
+     * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
+     * slower code.
+     *
+     * By forcing seed64 into a register, we disrupt the cost model and
+     * cause it to scalarize. See `XXH32_round()`
+     *
+     * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,
+     * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on
+     * GCC 9.2, despite both emitting scalar code.
+     *
+     * GCC generates much better scalar code than Clang for the rest of XXH3,
+     * which is why finding a more optimal codepath is an interest.
+     */
+    XXH_COMPILER_GUARD(seed64);
+#endif
+    {   xxh_u64 const input_lo = XXH_readLE64(input);
+        xxh_u64 const input_hi = XXH_readLE64(input+8);
+        return XXH3_mul128_fold64(
+            input_lo ^ (XXH_readLE64(secret)   + seed64),
+            input_hi ^ (XXH_readLE64(secret+8) - seed64)
+        );
+    }
+}
+
+/* For mid range keys, XXH3 uses a Mum-hash variant. */
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
+XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
+                     const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                     XXH64_hash_t seed)
+{
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXH_ASSERT(16 < len && len <= 128);
+
+    {   xxh_u64 acc = len * XXH_PRIME64_1, acc_end;
+#if XXH_SIZE_OPT >= 1
+        /* Smaller and cleaner, but slightly slower. */
+        unsigned int i = (unsigned int)(len - 1) / 32;
+        do {
+            acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
+            acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
+        } while (i-- != 0);
+        acc_end = 0;
+#else
+        acc += XXH3_mix16B(input+0, secret+0, seed);
+        acc_end = XXH3_mix16B(input+len-16, secret+16, seed);
+        if (len > 32) {
+            acc += XXH3_mix16B(input+16, secret+32, seed);
+            acc_end += XXH3_mix16B(input+len-32, secret+48, seed);
+            if (len > 64) {
+                acc += XXH3_mix16B(input+32, secret+64, seed);
+                acc_end += XXH3_mix16B(input+len-48, secret+80, seed);
+
+                if (len > 96) {
+                    acc += XXH3_mix16B(input+48, secret+96, seed);
+                    acc_end += XXH3_mix16B(input+len-64, secret+112, seed);
+                }
+            }
+        }
+#endif
+        return XXH3_avalanche(acc + acc_end);
+    }
+}
+
+#define XXH3_MIDSIZE_MAX 240
+
+XXH_NO_INLINE XXH_PUREF XXH64_hash_t
+XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
+                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                      XXH64_hash_t seed)
+{
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+
+    #define XXH3_MIDSIZE_STARTOFFSET 3
+    #define XXH3_MIDSIZE_LASTOFFSET  17
+
+    {   xxh_u64 acc = len * XXH_PRIME64_1;
+        xxh_u64 acc_end;
+        unsigned int const nbRounds = (unsigned int)len / 16;
+        unsigned int i;
+        XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+        for (i=0; i<8; i++) {
+            acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
+        }
+        /* last bytes */
+        acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
+        XXH_ASSERT(nbRounds >= 8);
+        acc = XXH3_avalanche(acc);
+#if defined(__clang__)                                /* Clang */ \
+    && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
+    && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
+        /*
+         * UGLY HACK:
+         * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
+         * In everywhere else, it uses scalar code.
+         *
+         * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
+         * would still be slower than UMAAL (see XXH_mult64to128).
+         *
+         * Unfortunately, Clang doesn't handle the long multiplies properly and
+         * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
+         * scalarized into an ugly mess of VMOV.32 instructions.
+         *
+         * This mess is difficult to avoid without turning autovectorization
+         * off completely, but they are usually relatively minor and/or not
+         * worth it to fix.
+         *
+         * This loop is the easiest to fix, as unlike XXH32, this pragma
+         * _actually works_ because it is a loop vectorization instead of an
+         * SLP vectorization.
+         */
+        #pragma clang loop vectorize(disable)
+#endif
+        for (i=8 ; i < nbRounds; i++) {
+            /*
+             * Prevents clang for unrolling the acc loop and interleaving with this one.
+             */
+            XXH_COMPILER_GUARD(acc);
+            acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
+        }
+        return XXH3_avalanche(acc + acc_end);
+    }
+}
+
+
+/* =======     Long Keys     ======= */
+
+#define XXH_STRIPE_LEN 64
+#define XXH_SECRET_CONSUME_RATE 8   /* nb of secret bytes consumed at each accumulation */
+#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
+
+#ifdef XXH_OLD_NAMES
+#  define STRIPE_LEN XXH_STRIPE_LEN
+#  define ACC_NB XXH_ACC_NB
+#endif
+
+#ifndef XXH_PREFETCH_DIST
+#  ifdef __clang__
+#    define XXH_PREFETCH_DIST 320
+#  else
+#    if (XXH_VECTOR == XXH_AVX512)
+#      define XXH_PREFETCH_DIST 512
+#    else
+#      define XXH_PREFETCH_DIST 384
+#    endif
+#  endif  /* __clang__ */
+#endif  /* XXH_PREFETCH_DIST */
+
+/*
+ * These macros are to generate an XXH3_accumulate() function.
+ * The two arguments select the name suffix and target attribute.
+ *
+ * The name of this symbol is XXH3_accumulate_<name>() and it calls
+ * XXH3_accumulate_512_<name>().
+ *
+ * It may be useful to hand implement this function if the compiler fails to
+ * optimize the inline function.
+ */
+#define XXH3_ACCUMULATE_TEMPLATE(name)                      \
+void                                                        \
+XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc,           \
+                       const xxh_u8* XXH_RESTRICT input,    \
+                       const xxh_u8* XXH_RESTRICT secret,   \
+                       size_t nbStripes)                    \
+{                                                           \
+    size_t n;                                               \
+    for (n = 0; n < nbStripes; n++ ) {                      \
+        const xxh_u8* const in = input + n*XXH_STRIPE_LEN;  \
+        XXH_PREFETCH(in + XXH_PREFETCH_DIST);               \
+        XXH3_accumulate_512_##name(                         \
+                 acc,                                       \
+                 in,                                        \
+                 secret + n*XXH_SECRET_CONSUME_RATE);       \
+    }                                                       \
+}
+
+
+XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
+{
+    if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
+    XXH_memcpy(dst, &v64, sizeof(v64));
+}
+
+/* Several intrinsic functions below are supposed to accept __int64 as argument,
+ * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
+ * However, several environments do not define __int64 type,
+ * requiring a workaround.
+ */
+#if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+    typedef int64_t xxh_i64;
+#else
+    /* the following type must have a width of 64-bit */
+    typedef long long xxh_i64;
+#endif
+
+
+/*
+ * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
+ *
+ * It is a hardened version of UMAC, based off of FARSH's implementation.
+ *
+ * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
+ * implementations, and it is ridiculously fast.
+ *
+ * We harden it by mixing the original input to the accumulators as well as the product.
+ *
+ * This means that in the (relatively likely) case of a multiply by zero, the
+ * original input is preserved.
+ *
+ * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
+ * cross-pollination, as otherwise the upper and lower halves would be
+ * essentially independent.
+ *
+ * This doesn't matter on 64-bit hashes since they all get merged together in
+ * the end, so we skip the extra step.
+ *
+ * Both XXH3_64bits and XXH3_128bits use this subroutine.
+ */
+
+#if (XXH_VECTOR == XXH_AVX512) \
+     || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
+
+#ifndef XXH_TARGET_AVX512
+# define XXH_TARGET_AVX512  /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
+                     const void* XXH_RESTRICT input,
+                     const void* XXH_RESTRICT secret)
+{
+    __m512i* const xacc = (__m512i *) acc;
+    XXH_ASSERT((((size_t)acc) & 63) == 0);
+    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
+
+    {
+        /* data_vec    = input[0]; */
+        __m512i const data_vec    = _mm512_loadu_si512   (input);
+        /* key_vec     = secret[0]; */
+        __m512i const key_vec     = _mm512_loadu_si512   (secret);
+        /* data_key    = data_vec ^ key_vec; */
+        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
+        /* data_key_lo = data_key >> 32; */
+        __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);
+        /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+        __m512i const product     = _mm512_mul_epu32     (data_key, data_key_lo);
+        /* xacc[0] += swap(data_vec); */
+        __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
+        __m512i const sum       = _mm512_add_epi64(*xacc, data_swap);
+        /* xacc[0] += product; */
+        *xacc = _mm512_add_epi64(product, sum);
+    }
+}
+XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
+
+/*
+ * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
+ *
+ * Multiplication isn't perfect, as explained by Google in HighwayHash:
+ *
+ *  // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
+ *  // varying degrees. In descending order of goodness, bytes
+ *  // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.
+ *  // As expected, the upper and lower bytes are much worse.
+ *
+ * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291
+ *
+ * Since our algorithm uses a pseudorandom secret to add some variance into the
+ * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.
+ *
+ * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid
+ * extraction.
+ *
+ * Both XXH3_64bits and XXH3_128bits use this subroutine.
+ */
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 63) == 0);
+    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
+    {   __m512i* const xacc = (__m512i*) acc;
+        const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
+
+        /* xacc[0] ^= (xacc[0] >> 47) */
+        __m512i const acc_vec     = *xacc;
+        __m512i const shifted     = _mm512_srli_epi64    (acc_vec, 47);
+        /* xacc[0] ^= secret; */
+        __m512i const key_vec     = _mm512_loadu_si512   (secret);
+        __m512i const data_key    = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);
+
+        /* xacc[0] *= XXH_PRIME32_1; */
+        __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);
+        __m512i const prod_lo     = _mm512_mul_epu32     (data_key, prime32);
+        __m512i const prod_hi     = _mm512_mul_epu32     (data_key_hi, prime32);
+        *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
+    }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
+    XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
+    XXH_ASSERT(((size_t)customSecret & 63) == 0);
+    (void)(&XXH_writeLE64);
+    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
+        __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
+        __m512i const seed     = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
+
+        const __m512i* const src  = (const __m512i*) ((const void*) XXH3_kSecret);
+              __m512i* const dest = (      __m512i*) customSecret;
+        int i;
+        XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
+        XXH_ASSERT(((size_t)dest & 63) == 0);
+        for (i=0; i < nbRounds; ++i) {
+            dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
+    }   }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_AVX2) \
+    || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
+
+#ifndef XXH_TARGET_AVX2
+# define XXH_TARGET_AVX2  /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void
+XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
+                    const void* XXH_RESTRICT input,
+                    const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 31) == 0);
+    {   __m256i* const xacc    =       (__m256i *) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm256_loadu_si256 requires  a const __m256i * pointer for some reason. */
+        const         __m256i* const xinput  = (const __m256i *) input;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
+        const         __m256i* const xsecret = (const __m256i *) secret;
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
+            /* data_vec    = xinput[i]; */
+            __m256i const data_vec    = _mm256_loadu_si256    (xinput+i);
+            /* key_vec     = xsecret[i]; */
+            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
+            /* data_key    = data_vec ^ key_vec; */
+            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
+            /* data_key_lo = data_key >> 32; */
+            __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);
+            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+            __m256i const product     = _mm256_mul_epu32     (data_key, data_key_lo);
+            /* xacc[i] += swap(data_vec); */
+            __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
+            __m256i const sum       = _mm256_add_epi64(xacc[i], data_swap);
+            /* xacc[i] += product; */
+            xacc[i] = _mm256_add_epi64(product, sum);
+    }   }
+}
+XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void
+XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 31) == 0);
+    {   __m256i* const xacc = (__m256i*) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
+        const         __m256i* const xsecret = (const __m256i *) secret;
+        const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47) */
+            __m256i const acc_vec     = xacc[i];
+            __m256i const shifted     = _mm256_srli_epi64    (acc_vec, 47);
+            __m256i const data_vec    = _mm256_xor_si256     (acc_vec, shifted);
+            /* xacc[i] ^= xsecret; */
+            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
+            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
+
+            /* xacc[i] *= XXH_PRIME32_1; */
+            __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);
+            __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
+            __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
+            xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
+        }
+    }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
+    XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
+    (void)(&XXH_writeLE64);
+    XXH_PREFETCH(customSecret);
+    {   __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
+
+        const __m256i* const src  = (const __m256i*) ((const void*) XXH3_kSecret);
+              __m256i*       dest = (      __m256i*) customSecret;
+
+#       if defined(__GNUC__) || defined(__clang__)
+        /*
+         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
+         *   - do not extract the secret from sse registers in the internal loop
+         *   - use less common registers, and avoid pushing these reg into stack
+         */
+        XXH_COMPILER_GUARD(dest);
+#       endif
+        XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */
+        XXH_ASSERT(((size_t)dest & 31) == 0);
+
+        /* GCC -O2 need unroll loop manually */
+        dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);
+        dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);
+        dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);
+        dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);
+        dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);
+        dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);
+    }
+}
+
+#endif
+
+/* x86dispatch always generates SSE2 */
+#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
+
+#ifndef XXH_TARGET_SSE2
+# define XXH_TARGET_SSE2  /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void
+XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
+                    const void* XXH_RESTRICT input,
+                    const void* XXH_RESTRICT secret)
+{
+    /* SSE2 is just a half-scale version of the AVX2 version. */
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+    {   __m128i* const xacc    =       (__m128i *) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+        const         __m128i* const xinput  = (const __m128i *) input;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+        const         __m128i* const xsecret = (const __m128i *) secret;
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
+            /* data_vec    = xinput[i]; */
+            __m128i const data_vec    = _mm_loadu_si128   (xinput+i);
+            /* key_vec     = xsecret[i]; */
+            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
+            /* data_key    = data_vec ^ key_vec; */
+            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
+            /* data_key_lo = data_key >> 32; */
+            __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+            __m128i const product     = _mm_mul_epu32     (data_key, data_key_lo);
+            /* xacc[i] += swap(data_vec); */
+            __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
+            __m128i const sum       = _mm_add_epi64(xacc[i], data_swap);
+            /* xacc[i] += product; */
+            xacc[i] = _mm_add_epi64(product, sum);
+    }   }
+}
+XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void
+XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+    {   __m128i* const xacc = (__m128i*) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+        const         __m128i* const xsecret = (const __m128i *) secret;
+        const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47) */
+            __m128i const acc_vec     = xacc[i];
+            __m128i const shifted     = _mm_srli_epi64    (acc_vec, 47);
+            __m128i const data_vec    = _mm_xor_si128     (acc_vec, shifted);
+            /* xacc[i] ^= xsecret[i]; */
+            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
+            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
+
+            /* xacc[i] *= XXH_PRIME32_1; */
+            __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            __m128i const prod_lo     = _mm_mul_epu32     (data_key, prime32);
+            __m128i const prod_hi     = _mm_mul_epu32     (data_key_hi, prime32);
+            xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
+        }
+    }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
+    (void)(&XXH_writeLE64);
+    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
+
+#       if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
+        /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
+        XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
+        __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
+#       else
+        __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
+#       endif
+        int i;
+
+        const void* const src16 = XXH3_kSecret;
+        __m128i* dst16 = (__m128i*) customSecret;
+#       if defined(__GNUC__) || defined(__clang__)
+        /*
+         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
+         *   - do not extract the secret from sse registers in the internal loop
+         *   - use less common registers, and avoid pushing these reg into stack
+         */
+        XXH_COMPILER_GUARD(dst16);
+#       endif
+        XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */
+        XXH_ASSERT(((size_t)dst16 & 15) == 0);
+
+        for (i=0; i < nbRounds; ++i) {
+            dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);
+    }   }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_NEON)
+
+/* forward declarations for the scalar routines */
+XXH_FORCE_INLINE void
+XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
+                 void const* XXH_RESTRICT secret, size_t lane);
+
+XXH_FORCE_INLINE void
+XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
+                         void const* XXH_RESTRICT secret, size_t lane);
+
+/*!
+ * @internal
+ * @brief The bulk processing loop for NEON.
+ *
+ * The NEON code path is actually partially scalar when running on AArch64. This
+ * is to optimize the pipelining and can have up to 15% speedup depending on the
+ * CPU, and it also mitigates some GCC codegen issues.
+ *
+ * @see XXH3_NEON_LANES for configuring this and details about this optimization.
+ */
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
+                    const void* XXH_RESTRICT input,
+                    const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+    XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
+    {
+        uint64x2_t* const xacc = (uint64x2_t *) acc;
+        /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
+        uint8_t const* const xinput = (const uint8_t *) input;
+        uint8_t const* const xsecret  = (const uint8_t *) secret;
+
+        size_t i;
+        /* AArch64 uses both scalar and neon at the same time */
+        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
+            XXH3_scalarRound(acc, input, secret, i);
+        }
+        i = 0;
+        for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {
+            uint64x2_t acc_vec1 = xacc[i];
+            /* data_vec = xinput[i]; */
+            uint64x2_t data_vec1 = XXH_vld1q_u64(xinput  + (i * 16));
+            /* key_vec  = xsecret[i];  */
+            uint64x2_t key_vec1  = XXH_vld1q_u64(xsecret + (i * 16));
+            /* acc_vec_2 = swap(data_vec) */
+            uint64x2_t acc_vec_21 = vextq_u64(data_vec1, data_vec1, 1);
+            /* data_key = data_vec ^ key_vec; */
+            uint64x2_t data_key1 = veorq_u64(data_vec1, key_vec1);
+
+            uint64x2_t acc_vec2 = xacc[i+1];
+            /* data_vec = xinput[i]; */
+            uint64x2_t data_vec2 = XXH_vld1q_u64(xinput  + ((i+1) * 16));
+            /* key_vec  = xsecret[i];  */
+            uint64x2_t key_vec2  = XXH_vld1q_u64(xsecret + ((i+1) * 16));
+            /* acc_vec_2 = swap(data_vec) */
+            uint64x2_t acc_vec_22 = vextq_u64(data_vec2, data_vec2, 1);
+            /* data_key = data_vec ^ key_vec; */
+            uint64x2_t data_key2 = veorq_u64(data_vec2, key_vec2);
+
+            /* data_key_lo = {(data_key1 & 0xFFFFFFFF), (data_key2 & 0xFFFFFFFF)};
+             * data_key_hi = {(data_key1 >> 32), (data_key2 >> 32)};
+             */
+            uint32x4x2_t zipped = vuzpq_u32(vreinterpretq_u32_u64(data_key1), vreinterpretq_u32_u64(data_key2));
+            uint32x4_t data_key_lo = zipped.val[0];
+            uint32x4_t data_key_hi = zipped.val[1];
+
+            /* acc_vec_2 += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
+            acc_vec_21 = vmlal_u32 (acc_vec_21, vget_low_u32(data_key_lo), vget_low_u32(data_key_hi));
+            XXH_COMPILER_GUARD_W(acc_vec_21);
+            /* xacc[i] += acc_vec_2; */
+            acc_vec1 = vaddq_u64 (acc_vec1, acc_vec_21);
+            xacc[i] = acc_vec1;
+            /* acc_vec_2 += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
+            acc_vec_22 = vmlal_u32 (acc_vec_22, vget_high_u32(data_key_lo), vget_high_u32(data_key_hi));
+            XXH_COMPILER_GUARD_W(acc_vec_22);
+            /* xacc[i] += acc_vec_2; */
+            acc_vec2 = vaddq_u64 (acc_vec2, acc_vec_22);
+            xacc[i+1] = acc_vec2;
+        }
+        for (; i < XXH3_NEON_LANES / 2; i++) {
+            uint64x2_t acc_vec = xacc[i];
+            /* data_vec = xinput[i]; */
+            uint64x2_t data_vec = XXH_vld1q_u64(xinput  + (i * 16));
+            /* key_vec  = xsecret[i];  */
+            uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
+            uint64x2_t data_key;
+            uint32x2_t data_key_lo, data_key_hi;
+            /* acc_vec_2 = swap(data_vec) */
+            uint64x2_t acc_vec_2 = vextq_u64(data_vec, data_vec, 1);
+            /* data_key = data_vec ^ key_vec; */
+            data_key = veorq_u64(data_vec, key_vec);
+            /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
+             * data_key_hi = (uint32x2_t) (data_key >> 32);
+             * data_key = UNDEFINED; */
+            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
+            /* acc_vec_2 += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
+            acc_vec_2 = vmlal_u32 (acc_vec_2, data_key_lo, data_key_hi);
+            XXH_COMPILER_GUARD_W(acc_vec_2);
+            /* xacc[i] += acc_vec_2; */
+            acc_vec = vaddq_u64 (acc_vec, acc_vec_2);
+            xacc[i] = acc_vec;
+        }
+
+    }
+}
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
+
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+
+    {   uint64x2_t* xacc       = (uint64x2_t*) acc;
+        uint8_t const* xsecret = (uint8_t const*) secret;
+        uint32x2_t prime       = vdup_n_u32 (XXH_PRIME32_1);
+
+        size_t i;
+        /* AArch64 uses both scalar and neon at the same time */
+        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
+            XXH3_scalarScrambleRound(acc, secret, i);
+        }
+        for (i=0; i < XXH3_NEON_LANES / 2; i++) {
+            /* xacc[i] ^= (xacc[i] >> 47); */
+            uint64x2_t acc_vec  = xacc[i];
+            uint64x2_t shifted  = vshrq_n_u64   (acc_vec, 47);
+            uint64x2_t data_vec = veorq_u64     (acc_vec, shifted);
+
+            /* xacc[i] ^= xsecret[i]; */
+            uint64x2_t key_vec  = XXH_vld1q_u64 (xsecret + (i * 16));
+            uint64x2_t data_key = veorq_u64     (data_vec, key_vec);
+
+            /* xacc[i] *= XXH_PRIME32_1 */
+            uint32x2_t data_key_lo, data_key_hi;
+            /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF);
+             * data_key_hi = (uint32x2_t) (xacc[i] >> 32);
+             * xacc[i] = UNDEFINED; */
+            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
+            {   /*
+                 * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
+                 *
+                 * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
+                 * incorrectly "optimize" this:
+                 *   tmp     = vmul_u32(vmovn_u64(a), vmovn_u64(b));
+                 *   shifted = vshll_n_u32(tmp, 32);
+                 * to this:
+                 *   tmp     = "vmulq_u64"(a, b); // no such thing!
+                 *   shifted = vshlq_n_u64(tmp, 32);
+                 *
+                 * However, unlike SSE, Clang lacks a 64-bit multiply routine
+                 * for NEON, and it scalarizes two 64-bit multiplies instead.
+                 *
+                 * vmull_u32 has the same timing as vmul_u32, and it avoids
+                 * this bug completely.
+                 * See https://bugs.llvm.org/show_bug.cgi?id=39967
+                 */
+                uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
+                /* xacc[i] = prod_hi << 32; */
+                prod_hi = vshlq_n_u64(prod_hi, 32);
+                /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
+                xacc[i] = vmlal_u32(prod_hi, data_key_lo, prime);
+            }
+        }
+    }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_VSX)
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,
+                    const void* XXH_RESTRICT input,
+                    const void* XXH_RESTRICT secret)
+{
+    /* presumed aligned */
+    unsigned int* const xacc = (unsigned int*) acc;
+    xxh_u64x2 const* const xinput   = (xxh_u64x2 const*) input;   /* no alignment restriction */
+    xxh_u64x2 const* const xsecret  = (xxh_u64x2 const*) secret;    /* no alignment restriction */
+    xxh_u64x2 const v32 = { 32, 32 };
+    size_t i;
+    for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
+        /* data_vec = xinput[i]; */
+        xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i);
+        /* key_vec = xsecret[i]; */
+        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
+        xxh_u64x2 const data_key = data_vec ^ key_vec;
+        /* shuffled = (data_key << 32) | (data_key >> 32); */
+        xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
+        /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
+        xxh_u64x2 const product  = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
+        /* acc_vec = xacc[i]; */
+        xxh_u64x2 acc_vec        = (xxh_u64x2)vec_xl(0, xacc + 4 * i);
+        acc_vec += product;
+
+        /* swap high and low halves */
+#ifdef __s390x__
+        acc_vec += vec_permi(data_vec, data_vec, 2);
+#else
+        acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
+#endif
+        /* xacc[i] = acc_vec; */
+        vec_xst((xxh_u32x4)acc_vec, 0, xacc + 4 * i);
+    }
+}
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
+
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+
+    {         xxh_u64x2* const xacc    =       (xxh_u64x2*) acc;
+        const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret;
+        /* constants */
+        xxh_u64x2 const v32  = { 32, 32 };
+        xxh_u64x2 const v47 = { 47, 47 };
+        xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
+        size_t i;
+        for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47); */
+            xxh_u64x2 const acc_vec  = xacc[i];
+            xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
+
+            /* xacc[i] ^= xsecret[i]; */
+            xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
+            xxh_u64x2 const data_key = data_vec ^ key_vec;
+
+            /* xacc[i] *= XXH_PRIME32_1 */
+            /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF);  */
+            xxh_u64x2 const prod_even  = XXH_vec_mule((xxh_u32x4)data_key, prime);
+            /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32);  */
+            xxh_u64x2 const prod_odd  = XXH_vec_mulo((xxh_u32x4)data_key, prime);
+            xacc[i] = prod_odd + (prod_even << v32);
+    }   }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_SVE)
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,
+                   const void* XXH_RESTRICT input,
+                   const void* XXH_RESTRICT secret)
+{
+    uint64_t *xacc = (uint64_t *)acc;
+    const uint64_t *xinput = (const uint64_t *)(const void *)input;
+    const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
+    svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
+    uint64_t element_count = svcntd();
+    if (element_count >= 8) {
+        svbool_t mask = svptrue_pat_b64(SV_VL8);
+        svuint64_t vacc = svld1_u64(mask, xacc);
+        ACCRND(vacc, 0);
+        svst1_u64(mask, xacc, vacc);
+    } else if (element_count == 2) {   /* sve128 */
+        svbool_t mask = svptrue_pat_b64(SV_VL2);
+        svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+        svuint64_t acc1 = svld1_u64(mask, xacc + 2);
+        svuint64_t acc2 = svld1_u64(mask, xacc + 4);
+        svuint64_t acc3 = svld1_u64(mask, xacc + 6);
+        ACCRND(acc0, 0);
+        ACCRND(acc1, 2);
+        ACCRND(acc2, 4);
+        ACCRND(acc3, 6);
+        svst1_u64(mask, xacc + 0, acc0);
+        svst1_u64(mask, xacc + 2, acc1);
+        svst1_u64(mask, xacc + 4, acc2);
+        svst1_u64(mask, xacc + 6, acc3);
+    } else {
+        svbool_t mask = svptrue_pat_b64(SV_VL4);
+        svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+        svuint64_t acc1 = svld1_u64(mask, xacc + 4);
+        ACCRND(acc0, 0);
+        ACCRND(acc1, 4);
+        svst1_u64(mask, xacc + 0, acc0);
+        svst1_u64(mask, xacc + 4, acc1);
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
+               const xxh_u8* XXH_RESTRICT input,
+               const xxh_u8* XXH_RESTRICT secret,
+               size_t nbStripes)
+{
+    if (nbStripes != 0) {
+        uint64_t *xacc = (uint64_t *)acc;
+        const uint64_t *xinput = (const uint64_t *)(const void *)input;
+        const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
+        svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
+        uint64_t element_count = svcntd();
+        if (element_count >= 8) {
+            svbool_t mask = svptrue_pat_b64(SV_VL8);
+            svuint64_t vacc = svld1_u64(mask, xacc + 0);
+            do {
+                /* svprfd(svbool_t, void *, enum svfprop); */
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(vacc, 0);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+           } while (nbStripes != 0);
+
+           svst1_u64(mask, xacc + 0, vacc);
+        } else if (element_count == 2) { /* sve128 */
+            svbool_t mask = svptrue_pat_b64(SV_VL2);
+            svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+            svuint64_t acc1 = svld1_u64(mask, xacc + 2);
+            svuint64_t acc2 = svld1_u64(mask, xacc + 4);
+            svuint64_t acc3 = svld1_u64(mask, xacc + 6);
+            do {
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(acc0, 0);
+                ACCRND(acc1, 2);
+                ACCRND(acc2, 4);
+                ACCRND(acc3, 6);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+           } while (nbStripes != 0);
+
+           svst1_u64(mask, xacc + 0, acc0);
+           svst1_u64(mask, xacc + 2, acc1);
+           svst1_u64(mask, xacc + 4, acc2);
+           svst1_u64(mask, xacc + 6, acc3);
+        } else {
+            svbool_t mask = svptrue_pat_b64(SV_VL4);
+            svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+            svuint64_t acc1 = svld1_u64(mask, xacc + 4);
+            do {
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(acc0, 0);
+                ACCRND(acc1, 4);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+           } while (nbStripes != 0);
+
+           svst1_u64(mask, xacc + 0, acc0);
+           svst1_u64(mask, xacc + 4, acc1);
+       }
+    }
+}
+
+#endif
+
+/* scalar variants - universal */
+
+/*!
+ * @internal
+ * @brief Scalar round for @ref XXH3_accumulate_512_scalar().
+ *
+ * This is extracted to its own function because the NEON path uses a combination
+ * of NEON and scalar.
+ */
+XXH_FORCE_INLINE void
+XXH3_scalarRound(void* XXH_RESTRICT acc,
+                 void const* XXH_RESTRICT input,
+                 void const* XXH_RESTRICT secret,
+                 size_t lane)
+{
+    xxh_u64* xacc = (xxh_u64*) acc;
+    xxh_u8 const* xinput  = (xxh_u8 const*) input;
+    xxh_u8 const* xsecret = (xxh_u8 const*) secret;
+    XXH_ASSERT(lane < XXH_ACC_NB);
+    XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
+    {
+        xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
+        xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
+        xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
+        xacc[lane] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
+    }
+}
+
+/*!
+ * @internal
+ * @brief Processes a 64 byte block of data using the scalar path.
+ */
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
+                     const void* XXH_RESTRICT input,
+                     const void* XXH_RESTRICT secret)
+{
+    size_t i;
+    /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
+#if defined(__GNUC__) && !defined(__clang__) \
+  && (defined(__arm__) || defined(__thumb2__)) \
+  && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
+  && XXH_SIZE_OPT <= 0
+#  pragma GCC unroll 8
+#endif
+    for (i=0; i < XXH_ACC_NB; i++) {
+        XXH3_scalarRound(acc, input, secret, i);
+    }
+}
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
+
+/*!
+ * @internal
+ * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar().
+ *
+ * This is extracted to its own function because the NEON path uses a combination
+ * of NEON and scalar.
+ */
+XXH_FORCE_INLINE void
+XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
+                         void const* XXH_RESTRICT secret,
+                         size_t lane)
+{
+    xxh_u64* const xacc = (xxh_u64*) acc;   /* presumed aligned */
+    const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
+    XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
+    XXH_ASSERT(lane < XXH_ACC_NB);
+    {
+        xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
+        xxh_u64 acc64 = xacc[lane];
+        acc64 = XXH_xorshift64(acc64, 47);
+        acc64 ^= key64;
+        acc64 *= XXH_PRIME32_1;
+        xacc[lane] = acc64;
+    }
+}
+
+/*!
+ * @internal
+ * @brief Scrambles the accumulators after a large chunk has been read
+ */
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    size_t i;
+    for (i=0; i < XXH_ACC_NB; i++) {
+        XXH3_scalarScrambleRound(acc, secret, i);
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+    /*
+     * We need a separate pointer for the hack below,
+     * which requires a non-const pointer.
+     * Any decent compiler will optimize this out otherwise.
+     */
+    const xxh_u8* kSecretPtr = XXH3_kSecret;
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
+
+#if defined(__clang__) && defined(__aarch64__)
+    /*
+     * UGLY HACK:
+     * Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are
+     * placed sequentially, in order, at the top of the unrolled loop.
+     *
+     * While MOVK is great for generating constants (2 cycles for a 64-bit
+     * constant compared to 4 cycles for LDR), it fights for bandwidth with
+     * the arithmetic instructions.
+     *
+     *   I   L   S
+     * MOVK
+     * MOVK
+     * MOVK
+     * MOVK
+     * ADD
+     * SUB      STR
+     *          STR
+     * By forcing loads from memory (as the asm line causes Clang to assume
+     * that XXH3_kSecretPtr has been changed), the pipelines are used more
+     * efficiently:
+     *   I   L   S
+     *      LDR
+     *  ADD LDR
+     *  SUB     STR
+     *          STR
+     *
+     * See XXH3_NEON_LANES for details on the pipsline.
+     *
+     * XXH3_64bits_withSeed, len == 256, Snapdragon 835
+     *   without hack: 2654.4 MB/s
+     *   with hack:    3202.9 MB/s
+     */
+    XXH_COMPILER_GUARD(kSecretPtr);
+#endif
+    /*
+     * Note: in debug mode, this overrides the asm optimization
+     * and Clang will emit MOVK chains again.
+     */
+    XXH_ASSERT(kSecretPtr == XXH3_kSecret);
+
+    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
+        int i;
+        for (i=0; i < nbRounds; i++) {
+            /*
+             * The asm hack causes Clang to assume that kSecretPtr aliases with
+             * customSecret, and on aarch64, this prevented LDP from merging two
+             * loads together for free. Putting the loads together before the stores
+             * properly generates LDP.
+             */
+            xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i)     + seed64;
+            xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;
+            XXH_writeLE64((xxh_u8*)customSecret + 16*i,     lo);
+            XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);
+    }   }
+}
+
+
+typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);
+typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
+typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
+
+
+#if (XXH_VECTOR == XXH_AVX512)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_avx512
+#define XXH3_accumulate     XXH3_accumulate_avx512
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_avx512
+#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
+
+#elif (XXH_VECTOR == XXH_AVX2)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_avx2
+#define XXH3_accumulate     XXH3_accumulate_avx2
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_avx2
+#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
+
+#elif (XXH_VECTOR == XXH_SSE2)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_sse2
+#define XXH3_accumulate     XXH3_accumulate_sse2
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_sse2
+#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
+
+#elif (XXH_VECTOR == XXH_NEON)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_neon
+#define XXH3_accumulate     XXH3_accumulate_neon
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_neon
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#elif (XXH_VECTOR == XXH_VSX)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_vsx
+#define XXH3_accumulate     XXH3_accumulate_vsx
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_vsx
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#elif (XXH_VECTOR == XXH_SVE)
+#define XXH3_accumulate_512 XXH3_accumulate_512_sve
+#define XXH3_accumulate     XXH3_accumulate_sve
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#else /* scalar */
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
+#define XXH3_accumulate     XXH3_accumulate_scalar
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#endif
+
+#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
+#  undef XXH3_initCustomSecret
+#  define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+#endif
+
+XXH_FORCE_INLINE void
+XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
+                      const xxh_u8* XXH_RESTRICT input, size_t len,
+                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                            XXH3_f_accumulate f_acc,
+                            XXH3_f_scrambleAcc f_scramble)
+{
+    size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
+    size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
+    size_t const nb_blocks = (len - 1) / block_len;
+
+    size_t n;
+
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+
+    for (n = 0; n < nb_blocks; n++) {
+        f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);
+        f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
+    }
+
+    /* last partial block */
+    XXH_ASSERT(len > XXH_STRIPE_LEN);
+    {   size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
+        XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
+        f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);
+
+        /* last stripe */
+        {   const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
+#define XXH_SECRET_LASTACC_START 7  /* not aligned on 8, last secret is different from acc & scrambler */
+            XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
+    }   }
+}
+
+XXH_FORCE_INLINE xxh_u64
+XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)
+{
+    return XXH3_mul128_fold64(
+               acc[0] ^ XXH_readLE64(secret),
+               acc[1] ^ XXH_readLE64(secret+8) );
+}
+
+static XXH64_hash_t
+XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
+{
+    xxh_u64 result64 = start;
+    size_t i = 0;
+
+    for (i = 0; i < 4; i++) {
+        result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
+#if defined(__clang__)                                /* Clang */ \
+    && (defined(__arm__) || defined(__thumb__))       /* ARMv7 */ \
+    && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */  \
+    && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
+        /*
+         * UGLY HACK:
+         * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
+         * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
+         * XXH3_64bits, len == 256, Snapdragon 835:
+         *   without hack: 2063.7 MB/s
+         *   with hack:    2560.7 MB/s
+         */
+        XXH_COMPILER_GUARD(result64);
+#endif
+    }
+
+    return XXH3_avalanche(result64);
+}
+
+#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
+                        XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
+                           const void* XXH_RESTRICT secret, size_t secretSize,
+                           XXH3_f_accumulate f_acc,
+                           XXH3_f_scrambleAcc f_scramble)
+{
+    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
+
+    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);
+
+    /* converge into final hash */
+    XXH_STATIC_ASSERT(sizeof(acc) == 64);
+    /* do not align on 8, so that the secret is different from the accumulator */
+#define XXH_SECRET_MERGEACCS_START 11
+    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+    return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
+}
+
+/*
+ * It's important for performance to transmit secret's size (when it's static)
+ * so that the compiler can properly optimize the vectorized loop.
+ * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
+ */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
+                             XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)seed64;
+    return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+/*
+ * It's preferable for performance that XXH3_hashLong is not inlined,
+ * as it results in a smaller function for small data, easier to the instruction cache.
+ * Note that inside this no_inline function, we do inline the internal loop,
+ * and provide a statically defined secret size to allow optimization of vector loop.
+ */
+XXH_NO_INLINE XXH_PUREF XXH64_hash_t
+XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
+                          XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)seed64; (void)secret; (void)secretLen;
+    return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+/*
+ * XXH3_hashLong_64b_withSeed():
+ * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
+ * and then use this key for long mode hashing.
+ *
+ * This operation is decently fast but nonetheless costs a little bit of time.
+ * Try to avoid it whenever possible (typically when seed==0).
+ *
+ * It's important for performance that XXH3_hashLong is not inlined. Not sure
+ * why (uop cache maybe?), but the difference is large and easily measurable.
+ */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
+                                    XXH64_hash_t seed,
+                                    XXH3_f_accumulate f_acc,
+                                    XXH3_f_scrambleAcc f_scramble,
+                                    XXH3_f_initCustomSecret f_initSec)
+{
+#if XXH_SIZE_OPT <= 0
+    if (seed == 0)
+        return XXH3_hashLong_64b_internal(input, len,
+                                          XXH3_kSecret, sizeof(XXH3_kSecret),
+                                          f_acc, f_scramble);
+#endif
+    {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+        f_initSec(secret, seed);
+        return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
+                                          f_acc, f_scramble);
+    }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,
+                           XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)secret; (void)secretLen;
+    return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
+                XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
+}
+
+
+typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
+                                          XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
+                     XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
+                     XXH3_hashLong64_f f_hashLong)
+{
+    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
+    /*
+     * If an action is to be taken if `secretLen` condition is not respected,
+     * it should be done here.
+     * For now, it's a contract pre-condition.
+     * Adding a check and a branch here would cost performance at every hash.
+     * Also, note that function signature doesn't offer room to return an error.
+     */
+    if (len <= 16)
+        return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
+    if (len <= 128)
+        return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+    if (len <= XXH3_MIDSIZE_MAX)
+        return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+    return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);
+}
+
+
+/* ===   Public entry point   === */
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length)
+{
+    return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)
+{
+    return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)
+{
+    return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
+}
+
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
+{
+    if (length <= XXH3_MIDSIZE_MAX)
+        return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
+    return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
+}
+
+
+/* ===   XXH3 streaming   === */
+#ifndef XXH_NO_STREAM
+/*
+ * Malloc's a pointer that is always aligned to align.
+ *
+ * This must be freed with `XXH_alignedFree()`.
+ *
+ * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
+ * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
+ * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
+ *
+ * This underalignment previously caused a rather obvious crash which went
+ * completely unnoticed due to XXH3_createState() not actually being tested.
+ * Credit to RedSpah for noticing this bug.
+ *
+ * The alignment is done manually: Functions like posix_memalign or _mm_malloc
+ * are avoided: To maintain portability, we would have to write a fallback
+ * like this anyways, and besides, testing for the existence of library
+ * functions without relying on external build tools is impossible.
+ *
+ * The method is simple: Overallocate, manually align, and store the offset
+ * to the original behind the returned pointer.
+ *
+ * Align must be a power of 2 and 8 <= align <= 128.
+ */
+static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)
+{
+    XXH_ASSERT(align <= 128 && align >= 8); /* range check */
+    XXH_ASSERT((align & (align-1)) == 0);   /* power of 2 */
+    XXH_ASSERT(s != 0 && s < (s + align));  /* empty/overflow */
+    {   /* Overallocate to make room for manual realignment and an offset byte */
+        xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);
+        if (base != NULL) {
+            /*
+             * Get the offset needed to align this pointer.
+             *
+             * Even if the returned pointer is aligned, there will always be
+             * at least one byte to store the offset to the original pointer.
+             */
+            size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
+            /* Add the offset for the now-aligned pointer */
+            xxh_u8* ptr = base + offset;
+
+            XXH_ASSERT((size_t)ptr % align == 0);
+
+            /* Store the offset immediately before the returned pointer. */
+            ptr[-1] = (xxh_u8)offset;
+            return ptr;
+        }
+        return NULL;
+    }
+}
+/*
+ * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
+ * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
+ */
+static void XXH_alignedFree(void* p)
+{
+    if (p != NULL) {
+        xxh_u8* ptr = (xxh_u8*)p;
+        /* Get the offset byte we added in XXH_malloc. */
+        xxh_u8 offset = ptr[-1];
+        /* Free the original malloc'd pointer */
+        xxh_u8* base = ptr - offset;
+        XXH_free(base);
+    }
+}
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
+{
+    XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
+    if (state==NULL) return NULL;
+    XXH3_INITSTATE(state);
+    return state;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
+{
+    XXH_alignedFree(statePtr);
+    return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API void
+XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state)
+{
+    XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
+}
+
+static void
+XXH3_reset_internal(XXH3_state_t* statePtr,
+                    XXH64_hash_t seed,
+                    const void* secret, size_t secretSize)
+{
+    size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
+    size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
+    XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
+    XXH_ASSERT(statePtr != NULL);
+    /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
+    memset((char*)statePtr + initStart, 0, initLength);
+    statePtr->acc[0] = XXH_PRIME32_3;
+    statePtr->acc[1] = XXH_PRIME64_1;
+    statePtr->acc[2] = XXH_PRIME64_2;
+    statePtr->acc[3] = XXH_PRIME64_3;
+    statePtr->acc[4] = XXH_PRIME64_4;
+    statePtr->acc[5] = XXH_PRIME32_2;
+    statePtr->acc[6] = XXH_PRIME64_5;
+    statePtr->acc[7] = XXH_PRIME32_1;
+    statePtr->seed = seed;
+    statePtr->useSeed = (seed != 0);
+    statePtr->extSecret = (const unsigned char*)secret;
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+    statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
+    statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
+{
+    if (statePtr == NULL) return XXH_ERROR;
+    XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
+    return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
+{
+    if (statePtr == NULL) return XXH_ERROR;
+    XXH3_reset_internal(statePtr, 0, secret, secretSize);
+    if (secret == NULL) return XXH_ERROR;
+    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+    return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
+{
+    if (statePtr == NULL) return XXH_ERROR;
+    if (seed==0) return XXH3_64bits_reset(statePtr);
+    if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
+        XXH3_initCustomSecret(statePtr->customSecret, seed);
+    XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
+    return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)
+{
+    if (statePtr == NULL) return XXH_ERROR;
+    if (secret == NULL) return XXH_ERROR;
+    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+    XXH3_reset_internal(statePtr, seed64, secret, secretSize);
+    statePtr->useSeed = 1; /* always, even if seed64==0 */
+    return XXH_OK;
+}
+
+/* Note : when XXH3_consumeStripes() is invoked,
+ * there must be a guarantee that at least one more byte must be consumed from input
+ * so that the function can blindly consume all stripes using the "normal" secret segment */
+XXH_FORCE_INLINE void
+XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
+                    size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
+                    const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
+                    const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
+                    XXH3_f_accumulate f_acc,
+                    XXH3_f_scrambleAcc f_scramble)
+{
+    XXH_ASSERT(nbStripes <= nbStripesPerBlock);  /* can handle max 1 scramble per invocation */
+    XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
+    if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
+        /* need a scrambling operation */
+        size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
+        size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
+        f_acc(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock);
+        f_scramble(acc, secret + secretLimit);
+        f_acc(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock);
+        *nbStripesSoFarPtr = nbStripesAfterBlock;
+    } else {
+        f_acc(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes);
+        *nbStripesSoFarPtr += nbStripes;
+    }
+}
+
+#ifndef XXH3_STREAM_USE_STACK
+# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
+#   define XXH3_STREAM_USE_STACK 1
+# endif
+#endif
+/*
+ * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
+ */
+XXH_FORCE_INLINE XXH_errorcode
+XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
+            const xxh_u8* XXH_RESTRICT input, size_t len,
+            XXH3_f_accumulate f_acc,
+            XXH3_f_scrambleAcc f_scramble)
+{
+    if (input==NULL) {
+        XXH_ASSERT(len == 0);
+        return XXH_OK;
+    }
+
+    XXH_ASSERT(state != NULL);
+    {   const xxh_u8* const bEnd = input + len;
+        const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
+        /* For some reason, gcc and MSVC seem to suffer greatly
+         * when operating accumulators directly into state.
+         * Operating into stack space seems to enable proper optimization.
+         * clang, on the other hand, doesn't seem to need this trick */
+        XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
+#else
+        xxh_u64* XXH_RESTRICT const acc = state->acc;
+#endif
+        state->totalLen += len;
+        XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
+
+        /* small input : just fill in tmp buffer */
+        if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
+            XXH_memcpy(state->buffer + state->bufferedSize, input, len);
+            state->bufferedSize += (XXH32_hash_t)len;
+            return XXH_OK;
+        }
+
+        /* total input is now > XXH3_INTERNALBUFFER_SIZE */
+        #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
+        XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0);   /* clean multiple */
+
+        /*
+         * Internal buffer is partially filled (always, except at beginning)
+         * Complete it, then consume it.
+         */
+        if (state->bufferedSize) {
+            size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
+            XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
+            input += loadSize;
+            XXH3_consumeStripes(acc,
+                               &state->nbStripesSoFar, state->nbStripesPerBlock,
+                                state->buffer, XXH3_INTERNALBUFFER_STRIPES,
+                                secret, state->secretLimit,
+                                f_acc, f_scramble);
+            state->bufferedSize = 0;
+        }
+        XXH_ASSERT(input < bEnd);
+
+        /* large input to consume : ingest per full block */
+        if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
+            size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
+            XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
+            /* join to current block's end */
+            {   size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
+                XXH_ASSERT(nbStripesToEnd <= nbStripes);
+                f_acc(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd);
+                f_scramble(acc, secret + state->secretLimit);
+                state->nbStripesSoFar = 0;
+                input += nbStripesToEnd * XXH_STRIPE_LEN;
+                nbStripes -= nbStripesToEnd;
+            }
+            /* consume per entire blocks */
+            while(nbStripes >= state->nbStripesPerBlock) {
+                f_acc(acc, input, secret, state->nbStripesPerBlock);
+                f_scramble(acc, secret + state->secretLimit);
+                input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
+                nbStripes -= state->nbStripesPerBlock;
+            }
+            /* consume last partial block */
+            f_acc(acc, input, secret, nbStripes);
+            input += nbStripes * XXH_STRIPE_LEN;
+            XXH_ASSERT(input < bEnd);  /* at least some bytes left */
+            state->nbStripesSoFar = nbStripes;
+            /* buffer predecessor of last partial stripe */
+            XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
+            XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
+        } else {
+            /* content to consume <= block size */
+            /* Consume input by a multiple of internal buffer size */
+            if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
+                const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
+                do {
+                    XXH3_consumeStripes(acc,
+                                       &state->nbStripesSoFar, state->nbStripesPerBlock,
+                                        input, XXH3_INTERNALBUFFER_STRIPES,
+                                        secret, state->secretLimit,
+                                        f_acc, f_scramble);
+                    input += XXH3_INTERNALBUFFER_SIZE;
+                } while (input<limit);
+                /* buffer predecessor of last partial stripe */
+                XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
+            }
+        }
+
+        /* Some remaining input (always) : buffer it */
+        XXH_ASSERT(input < bEnd);
+        XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
+        XXH_ASSERT(state->bufferedSize == 0);
+        XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
+        state->bufferedSize = (XXH32_hash_t)(bEnd-input);
+#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
+        /* save stack accumulators into state */
+        memcpy(state->acc, acc, sizeof(acc));
+#endif
+    }
+
+    return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
+{
+    return XXH3_update(state, (const xxh_u8*)input, len,
+                       XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+
+XXH_FORCE_INLINE void
+XXH3_digest_long (XXH64_hash_t* acc,
+                  const XXH3_state_t* state,
+                  const unsigned char* secret)
+{
+    /*
+     * Digest on a local copy. This way, the state remains unaltered, and it can
+     * continue ingesting more input afterwards.
+     */
+    XXH_memcpy(acc, state->acc, sizeof(state->acc));
+    if (state->bufferedSize >= XXH_STRIPE_LEN) {
+        size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
+        size_t nbStripesSoFar = state->nbStripesSoFar;
+        XXH3_consumeStripes(acc,
+                           &nbStripesSoFar, state->nbStripesPerBlock,
+                            state->buffer, nbStripes,
+                            secret, state->secretLimit,
+                            XXH3_accumulate, XXH3_scrambleAcc);
+        /* last stripe */
+        XXH3_accumulate_512(acc,
+                            state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
+                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+    } else {  /* bufferedSize < XXH_STRIPE_LEN */
+        xxh_u8 lastStripe[XXH_STRIPE_LEN];
+        size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
+        XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */
+        XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
+        XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
+        XXH3_accumulate_512(acc,
+                            lastStripe,
+                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+    }
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
+{
+    const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+    if (state->totalLen > XXH3_MIDSIZE_MAX) {
+        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
+        XXH3_digest_long(acc, state, secret);
+        return XXH3_mergeAccs(acc,
+                              secret + XXH_SECRET_MERGEACCS_START,
+                              (xxh_u64)state->totalLen * XXH_PRIME64_1);
+    }
+    /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
+    if (state->useSeed)
+        return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
+    return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
+                                  secret, state->secretLimit + XXH_STRIPE_LEN);
+}
+#endif /* !XXH_NO_STREAM */
+
+
+/* ==========================================
+ * XXH3 128 bits (a.k.a XXH128)
+ * ==========================================
+ * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
+ * even without counting the significantly larger output size.
+ *
+ * For example, extra steps are taken to avoid the seed-dependent collisions
+ * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
+ *
+ * This strength naturally comes at the cost of some speed, especially on short
+ * lengths. Note that longer hashes are about as fast as the 64-bit version
+ * due to it using only a slight modification of the 64-bit loop.
+ *
+ * XXH128 is also more oriented towards 64-bit machines. It is still extremely
+ * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
+ */
+
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
+XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    /* A doubled version of 1to3_64b with different constants. */
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(1 <= len && len <= 3);
+    XXH_ASSERT(secret != NULL);
+    /*
+     * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
+     * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
+     * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
+     */
+    {   xxh_u8 const c1 = input[0];
+        xxh_u8 const c2 = input[len >> 1];
+        xxh_u8 const c3 = input[len - 1];
+        xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)
+                                | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
+        xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
+        xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
+        xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
+        xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
+        xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
+        XXH128_hash_t h128;
+        h128.low64  = XXH64_avalanche(keyed_lo);
+        h128.high64 = XXH64_avalanche(keyed_hi);
+        return h128;
+    }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
+XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(4 <= len && len <= 8);
+    seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
+    {   xxh_u32 const input_lo = XXH_readLE32(input);
+        xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
+        xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);
+        xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
+        xxh_u64 const keyed = input_64 ^ bitflip;
+
+        /* Shift len to the left to ensure it is even, this avoids even multiplies. */
+        XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
+
+        m128.high64 += (m128.low64 << 1);
+        m128.low64  ^= (m128.high64 >> 3);
+
+        m128.low64   = XXH_xorshift64(m128.low64, 35);
+        m128.low64  *= 0x9FB21C651E98DF25ULL;
+        m128.low64   = XXH_xorshift64(m128.low64, 28);
+        m128.high64  = XXH3_avalanche(m128.high64);
+        return m128;
+    }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
+XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(9 <= len && len <= 16);
+    {   xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
+        xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
+        xxh_u64 const input_lo = XXH_readLE64(input);
+        xxh_u64       input_hi = XXH_readLE64(input + len - 8);
+        XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
+        /*
+         * Put len in the middle of m128 to ensure that the length gets mixed to
+         * both the low and high bits in the 128x64 multiply below.
+         */
+        m128.low64 += (xxh_u64)(len - 1) << 54;
+        input_hi   ^= bitfliph;
+        /*
+         * Add the high 32 bits of input_hi to the high 32 bits of m128, then
+         * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
+         * the high 64 bits of m128.
+         *
+         * The best approach to this operation is different on 32-bit and 64-bit.
+         */
+        if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */
+            /*
+             * 32-bit optimized version, which is more readable.
+             *
+             * On 32-bit, it removes an ADC and delays a dependency between the two
+             * halves of m128.high64, but it generates an extra mask on 64-bit.
+             */
+            m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
+        } else {
+            /*
+             * 64-bit optimized (albeit more confusing) version.
+             *
+             * Uses some properties of addition and multiplication to remove the mask:
+             *
+             * Let:
+             *    a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
+             *    b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
+             *    c = XXH_PRIME32_2
+             *
+             *    a + (b * c)
+             * Inverse Property: x + y - x == y
+             *    a + (b * (1 + c - 1))
+             * Distributive Property: x * (y + z) == (x * y) + (x * z)
+             *    a + (b * 1) + (b * (c - 1))
+             * Identity Property: x * 1 == x
+             *    a + b + (b * (c - 1))
+             *
+             * Substitute a, b, and c:
+             *    input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
+             *
+             * Since input_hi.hi + input_hi.lo == input_hi, we get this:
+             *    input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
+             */
+            m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
+        }
+        /* m128 ^= XXH_swap64(m128 >> 64); */
+        m128.low64  ^= XXH_swap64(m128.high64);
+
+        {   /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
+            XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
+            h128.high64 += m128.high64 * XXH_PRIME64_2;
+
+            h128.low64   = XXH3_avalanche(h128.low64);
+            h128.high64  = XXH3_avalanche(h128.high64);
+            return h128;
+    }   }
+}
+
+/*
+ * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
+ */
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
+XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(len <= 16);
+    {   if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);
+        if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);
+        if (len) return XXH3_len_1to3_128b(input, len, secret, seed);
+        {   XXH128_hash_t h128;
+            xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
+            xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
+            h128.low64 = XXH64_avalanche(seed ^ bitflipl);
+            h128.high64 = XXH64_avalanche( seed ^ bitfliph);
+            return h128;
+    }   }
+}
+
+/*
+ * A bit slower than XXH3_mix16B, but handles multiply by zero better.
+ */
+XXH_FORCE_INLINE XXH128_hash_t
+XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
+              const xxh_u8* secret, XXH64_hash_t seed)
+{
+    acc.low64  += XXH3_mix16B (input_1, secret+0, seed);
+    acc.low64  ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
+    acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
+    acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
+    return acc;
+}
+
+
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
+XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
+                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                      XXH64_hash_t seed)
+{
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXH_ASSERT(16 < len && len <= 128);
+
+    {   XXH128_hash_t acc;
+        acc.low64 = len * XXH_PRIME64_1;
+        acc.high64 = 0;
+
+#if XXH_SIZE_OPT >= 1
+        {
+            /* Smaller, but slightly slower. */
+            unsigned int i = (unsigned int)(len - 1) / 32;
+            do {
+                acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
+            } while (i-- != 0);
+        }
+#else
+        if (len > 32) {
+            if (len > 64) {
+                if (len > 96) {
+                    acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
+                }
+                acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
+            }
+            acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
+        }
+        acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
+#endif
+        {   XXH128_hash_t h128;
+            h128.low64  = acc.low64 + acc.high64;
+            h128.high64 = (acc.low64    * XXH_PRIME64_1)
+                        + (acc.high64   * XXH_PRIME64_4)
+                        + ((len - seed) * XXH_PRIME64_2);
+            h128.low64  = XXH3_avalanche(h128.low64);
+            h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
+            return h128;
+        }
+    }
+}
+
+XXH_NO_INLINE XXH_PUREF XXH128_hash_t
+XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
+                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                       XXH64_hash_t seed)
+{
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+
+    {   XXH128_hash_t acc;
+        unsigned i;
+        acc.low64 = len * XXH_PRIME64_1;
+        acc.high64 = 0;
+        /*
+         *  We set as `i` as offset + 32. We do this so that unchanged
+         * `len` can be used as upper bound. This reaches a sweet spot
+         * where both x86 and aarch64 get simple agen and good codegen
+         * for the loop.
+         */
+        for (i = 32; i < 160; i += 32) {
+            acc = XXH128_mix32B(acc,
+                                input  + i - 32,
+                                input  + i - 16,
+                                secret + i - 32,
+                                seed);
+        }
+        acc.low64 = XXH3_avalanche(acc.low64);
+        acc.high64 = XXH3_avalanche(acc.high64);
+        /*
+         * NB: `i <= len` will duplicate the last 32-bytes if
+         * len % 32 was zero. This is an unfortunate necessity to keep
+         * the hash result stable.
+         */
+        for (i=160; i <= len; i += 32) {
+            acc = XXH128_mix32B(acc,
+                                input + i - 32,
+                                input + i - 16,
+                                secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
+                                seed);
+        }
+        /* last bytes */
+        acc = XXH128_mix32B(acc,
+                            input + len - 16,
+                            input + len - 32,
+                            secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
+                            (XXH64_hash_t)0 - seed);
+
+        {   XXH128_hash_t h128;
+            h128.low64  = acc.low64 + acc.high64;
+            h128.high64 = (acc.low64    * XXH_PRIME64_1)
+                        + (acc.high64   * XXH_PRIME64_4)
+                        + ((len - seed) * XXH_PRIME64_2);
+            h128.low64  = XXH3_avalanche(h128.low64);
+            h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
+            return h128;
+        }
+    }
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
+                            const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                            XXH3_f_accumulate f_acc,
+                            XXH3_f_scrambleAcc f_scramble)
+{
+    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
+
+    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);
+
+    /* converge into final hash */
+    XXH_STATIC_ASSERT(sizeof(acc) == 64);
+    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+    {   XXH128_hash_t h128;
+        h128.low64  = XXH3_mergeAccs(acc,
+                                     secret + XXH_SECRET_MERGEACCS_START,
+                                     (xxh_u64)len * XXH_PRIME64_1);
+        h128.high64 = XXH3_mergeAccs(acc,
+                                     secret + secretSize
+                                            - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
+                                     ~((xxh_u64)len * XXH_PRIME64_2));
+        return h128;
+    }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong() is not inlined.
+ */
+XXH_NO_INLINE XXH_PUREF XXH128_hash_t
+XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
+                           XXH64_hash_t seed64,
+                           const void* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)seed64; (void)secret; (void)secretLen;
+    return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
+                                       XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+/*
+ * It's important for performance to pass @p secretLen (when it's static)
+ * to the compiler, so that it can properly optimize the vectorized loop.
+ */
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
+                              XXH64_hash_t seed64,
+                              const void* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)seed64;
+    return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
+                                       XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
+                                XXH64_hash_t seed64,
+                                XXH3_f_accumulate f_acc,
+                                XXH3_f_scrambleAcc f_scramble,
+                                XXH3_f_initCustomSecret f_initSec)
+{
+    if (seed64 == 0)
+        return XXH3_hashLong_128b_internal(input, len,
+                                           XXH3_kSecret, sizeof(XXH3_kSecret),
+                                           f_acc, f_scramble);
+    {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+        f_initSec(secret, seed64);
+        return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
+                                           f_acc, f_scramble);
+    }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSeed(const void* input, size_t len,
+                            XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)secret; (void)secretLen;
+    return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
+                XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
+}
+
+typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
+                                            XXH64_hash_t, const void* XXH_RESTRICT, size_t);
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_128bits_internal(const void* input, size_t len,
+                      XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
+                      XXH3_hashLong128_f f_hl128)
+{
+    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
+    /*
+     * If an action is to be taken if `secret` conditions are not respected,
+     * it should be done here.
+     * For now, it's a contract pre-condition.
+     * Adding a check and a branch here would cost performance at every hash.
+     */
+    if (len <= 16)
+        return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
+    if (len <= 128)
+        return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+    if (len <= XXH3_MIDSIZE_MAX)
+        return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+    return f_hl128(input, len, seed64, secret, secretLen);
+}
+
+
+/* ===   Public XXH128 API   === */
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len)
+{
+    return XXH3_128bits_internal(input, len, 0,
+                                 XXH3_kSecret, sizeof(XXH3_kSecret),
+                                 XXH3_hashLong_128b_default);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)
+{
+    return XXH3_128bits_internal(input, len, 0,
+                                 (const xxh_u8*)secret, secretSize,
+                                 XXH3_hashLong_128b_withSecret);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
+{
+    return XXH3_128bits_internal(input, len, seed,
+                                 XXH3_kSecret, sizeof(XXH3_kSecret),
+                                 XXH3_hashLong_128b_withSeed);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
+{
+    if (len <= XXH3_MIDSIZE_MAX)
+        return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
+    return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
+{
+    return XXH3_128bits_withSeed(input, len, seed);
+}
+
+
+/* ===   XXH3 128-bit streaming   === */
+#ifndef XXH_NO_STREAM
+/*
+ * All initialization and update functions are identical to 64-bit streaming variant.
+ * The only difference is the finalization routine.
+ */
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
+{
+    return XXH3_64bits_reset(statePtr);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
+{
+    return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
+{
+    return XXH3_64bits_reset_withSeed(statePtr, seed);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
+{
+    return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
+{
+    return XXH3_update(state, (const xxh_u8*)input, len,
+                       XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
+{
+    const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+    if (state->totalLen > XXH3_MIDSIZE_MAX) {
+        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
+        XXH3_digest_long(acc, state, secret);
+        XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+        {   XXH128_hash_t h128;
+            h128.low64  = XXH3_mergeAccs(acc,
+                                         secret + XXH_SECRET_MERGEACCS_START,
+                                         (xxh_u64)state->totalLen * XXH_PRIME64_1);
+            h128.high64 = XXH3_mergeAccs(acc,
+                                         secret + state->secretLimit + XXH_STRIPE_LEN
+                                                - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
+                                         ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
+            return h128;
+        }
+    }
+    /* len <= XXH3_MIDSIZE_MAX : short code */
+    if (state->seed)
+        return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
+    return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
+                                   secret, state->secretLimit + XXH_STRIPE_LEN);
+}
+#endif /* !XXH_NO_STREAM */
+/* 128-bit utility functions */
+
+#include <string.h>   /* memcmp, memcpy */
+
+/* return : 1 is equal, 0 if different */
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
+{
+    /* note : XXH128_hash_t is compact, it has no padding byte */
+    return !(memcmp(&h1, &h2, sizeof(h1)));
+}
+
+/* This prototype is compatible with stdlib's qsort().
+ * @return : >0 if *h128_1  > *h128_2
+ *           <0 if *h128_1  < *h128_2
+ *           =0 if *h128_1 == *h128_2  */
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)
+{
+    XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
+    XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
+    int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
+    /* note : bets that, in most cases, hash values are different */
+    if (hcmp) return hcmp;
+    return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
+}
+
+
+/*======   Canonical representation   ======*/
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API void
+XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) {
+        hash.high64 = XXH_swap64(hash.high64);
+        hash.low64  = XXH_swap64(hash.low64);
+    }
+    XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
+    XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src)
+{
+    XXH128_hash_t h;
+    h.high64 = XXH_readBE64(src);
+    h.low64  = XXH_readBE64(src->digest + 8);
+    return h;
+}
+
+
+
+/* ==========================================
+ * Secret generators
+ * ==========================================
+ */
+#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
+
+XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
+{
+    XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
+    XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)
+{
+#if (XXH_DEBUGLEVEL >= 1)
+    XXH_ASSERT(secretBuffer != NULL);
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+#else
+    /* production mode, assert() are disabled */
+    if (secretBuffer == NULL) return XXH_ERROR;
+    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+#endif
+
+    if (customSeedSize == 0) {
+        customSeed = XXH3_kSecret;
+        customSeedSize = XXH_SECRET_DEFAULT_SIZE;
+    }
+#if (XXH_DEBUGLEVEL >= 1)
+    XXH_ASSERT(customSeed != NULL);
+#else
+    if (customSeed == NULL) return XXH_ERROR;
+#endif
+
+    /* Fill secretBuffer with a copy of customSeed - repeat as needed */
+    {   size_t pos = 0;
+        while (pos < secretSize) {
+            size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
+            memcpy((char*)secretBuffer + pos, customSeed, toCopy);
+            pos += toCopy;
+    }   }
+
+    {   size_t const nbSeg16 = secretSize / 16;
+        size_t n;
+        XXH128_canonical_t scrambler;
+        XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
+        for (n=0; n<nbSeg16; n++) {
+            XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
+            XXH3_combine16((char*)secretBuffer + n*16, h128);
+        }
+        /* last segment */
+        XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
+    }
+    return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API void
+XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
+{
+    XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+    XXH3_initCustomSecret(secret, seed);
+    XXH_ASSERT(secretBuffer != NULL);
+    memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
+}
+
+
+
+/* Pop our optimization override from above */
+#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
+  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+  && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
+#  pragma GCC pop_options
+#endif
+
+#endif  /* XXH_NO_LONG_LONG */
+
+#endif  /* XXH_NO_XXH3 */
+
+/*!
+ * @}
+ */
+#endif  /* XXH_IMPLEMENTATION */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/xxph3.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/xxph3.h
new file mode 100644
index 0000000000..968000c3a8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/util/xxph3.h
@@ -0,0 +1,1764 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+// This is a fork of a preview version of xxHash, as RocksDB depends on
+// this preview version of XXH3. To allow this to coexist with the
+// standard xxHash, including in the "unity" build where all source files
+// and headers go into a single translation unit, here "XXH" has been
+// replaced with "XXPH" for XX Preview Hash.
+
+#ifndef XXPHASH_H_5627135585666179
+#define XXPHASH_H_5627135585666179 1
+
+/* BEGIN RocksDB customizations */
+#ifndef XXPH_STATIC_LINKING_ONLY
+// Access experimental APIs
+#define XXPH_STATIC_LINKING_ONLY 1
+#endif
+#define XXPH_NAMESPACE ROCKSDB_
+#define XXPH_INLINE_ALL
+#include <cstring>
+/* END RocksDB customizations */
+
+// clang-format off
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************
+*  Definitions
+******************************/
+#include <stddef.h>   /* size_t */
+typedef enum { XXPH_OK=0, XXPH_ERROR } XXPH_errorcode;
+
+
+/* ****************************
+ *  API modifier
+ ******************************/
+/** XXPH_INLINE_ALL (and XXPH_PRIVATE_API)
+ *  This build macro includes xxhash functions in `static` mode
+ *  in order to inline them, and remove their symbol from the public list.
+ *  Inlining offers great performance improvement on small keys,
+ *  and dramatic ones when length is expressed as a compile-time constant.
+ *  See https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html .
+ *  Methodology :
+ *     #define XXPH_INLINE_ALL
+ *     #include "xxhash.h"
+ * `xxhash.c` is automatically included.
+ *  It's not useful to compile and link it as a separate object.
+ */
+#if defined(XXPH_INLINE_ALL) || defined(XXPH_PRIVATE_API)
+#  ifndef XXPH_STATIC_LINKING_ONLY
+#    define XXPH_STATIC_LINKING_ONLY
+#  endif
+#  if defined(__GNUC__)
+#    define XXPH_PUBLIC_API static __inline __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXPH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXPH_PUBLIC_API static __inline
+#  else
+     /* this version may generate warnings for unused static functions */
+#    define XXPH_PUBLIC_API static
+#  endif
+#else
+#  if defined(WIN32) && defined(_MSC_VER) && (defined(XXPH_IMPORT) || defined(XXPH_EXPORT))
+#    ifdef XXPH_EXPORT
+#      define XXPH_PUBLIC_API __declspec(dllexport)
+#    elif XXPH_IMPORT
+#      define XXPH_PUBLIC_API __declspec(dllimport)
+#    endif
+#  else
+#    define XXPH_PUBLIC_API   /* do nothing */
+#  endif
+#endif /* XXPH_INLINE_ALL || XXPH_PRIVATE_API */
+
+/*! XXPH_NAMESPACE, aka Namespace Emulation :
+ *
+ * If you want to include _and expose_ xxHash functions from within your own library,
+ * but also want to avoid symbol collisions with other libraries which may also include xxHash,
+ *
+ * you can use XXPH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+ * with the value of XXPH_NAMESPACE (therefore, avoid NULL and numeric values).
+ *
+ * Note that no change is required within the calling program as long as it includes `xxhash.h` :
+ * regular symbol name will be automatically translated by this header.
+ */
+#ifdef XXPH_NAMESPACE
+#  define XXPH_CAT(A,B) A##B
+#  define XXPH_NAME2(A,B) XXPH_CAT(A,B)
+#  define XXPH_versionNumber XXPH_NAME2(XXPH_NAMESPACE, XXPH_versionNumber)
+#endif
+
+
+/* *************************************
+*  Version
+***************************************/
+#define XXPH_VERSION_MAJOR    0
+#define XXPH_VERSION_MINOR    7
+#define XXPH_VERSION_RELEASE  2
+#define XXPH_VERSION_NUMBER  (XXPH_VERSION_MAJOR *100*100 + XXPH_VERSION_MINOR *100 + XXPH_VERSION_RELEASE)
+XXPH_PUBLIC_API unsigned XXPH_versionNumber (void);
+
+
+/*-**********************************************************************
+*  32-bit hash
+************************************************************************/
+#if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+    typedef uint32_t XXPH32_hash_t;
+#else
+#   include <limits.h>
+#   if UINT_MAX == 0xFFFFFFFFUL
+      typedef unsigned int XXPH32_hash_t;
+#   else
+#     if ULONG_MAX == 0xFFFFFFFFUL
+        typedef unsigned long XXPH32_hash_t;
+#     else
+#       error "unsupported platform : need a 32-bit type"
+#     endif
+#   endif
+#endif
+
+#ifndef XXPH_NO_LONG_LONG
+/*-**********************************************************************
+*  64-bit hash
+************************************************************************/
+#if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+    typedef uint64_t XXPH64_hash_t;
+#else
+    /* the following type must have a width of 64-bit */
+    typedef unsigned long long XXPH64_hash_t;
+#endif
+
+#endif  /* XXPH_NO_LONG_LONG */
+
+
+
+#ifdef XXPH_STATIC_LINKING_ONLY
+
+/* ================================================================================================
+   This section contains declarations which are not guaranteed to remain stable.
+   They may change in future versions, becoming incompatible with a different version of the library.
+   These declarations should only be used with static linking.
+   Never use them in association with dynamic linking !
+=================================================================================================== */
+
+
+/*-**********************************************************************
+*  XXPH3
+*  New experimental hash
+************************************************************************/
+#ifndef XXPH_NO_LONG_LONG
+
+
+/* ============================================
+ * XXPH3 is a new hash algorithm,
+ * featuring improved speed performance for both small and large inputs.
+ * See full speed analysis at : http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
+ * In general, expect XXPH3 to run about ~2x faster on large inputs,
+ * and >3x faster on small ones, though exact differences depend on platform.
+ *
+ * The algorithm is portable, will generate the same hash on all platforms.
+ * It benefits greatly from vectorization units, but does not require it.
+ *
+ * XXPH3 offers 2 variants, _64bits and _128bits.
+ * When only 64 bits are needed, prefer calling the _64bits variant :
+ * it reduces the amount of mixing, resulting in faster speed on small inputs.
+ * It's also generally simpler to manipulate a scalar return type than a struct.
+ *
+ * The XXPH3 algorithm is still considered experimental.
+ * Produced results can still change between versions.
+ * Results produced by v0.7.x are not comparable with results from v0.7.y .
+ * It's nonetheless possible to use XXPH3 for ephemeral data (local sessions),
+ * but avoid storing values in long-term storage for later reads.
+ *
+ * The API supports one-shot hashing, streaming mode, and custom secrets.
+ *
+ * There are still a number of opened questions that community can influence during the experimental period.
+ * I'm trying to list a few of them below, though don't consider this list as complete.
+ *
+ * - 128-bits output type : currently defined as a structure of two 64-bits fields.
+ *                          That's because 128-bit values do not exist in C standard.
+ *                          Note that it means that, at byte level, result is not identical depending on endianess.
+ *                          However, at field level, they are identical on all platforms.
+ *                          The canonical representation solves the issue of identical byte-level representation across platforms,
+ *                          which is necessary for serialization.
+ *                          Q1 : Would there be a better representation for a 128-bit hash result ?
+ *                          Q2 : Are the names of the inner 64-bit fields important ? Should they be changed ?
+ *
+ * - Prototype XXPH128() :   XXPH128() uses the same arguments as XXPH64(), for consistency.
+ *                          It means it maps to XXPH3_128bits_withSeed().
+ *                          This variant is slightly slower than XXPH3_128bits(),
+ *                          because the seed is now part of the algorithm, and can't be simplified.
+ *                          Is that a good idea ?
+ *
+ * - Seed type for XXPH128() : currently, it's a single 64-bit value, like the 64-bit variant.
+ *                          It could be argued that it's more logical to offer a 128-bit seed input parameter for a 128-bit hash.
+ *                          But 128-bit seed is more difficult to use, since it requires to pass a structure instead of a scalar value.
+ *                          Such a variant could either replace current one, or become an additional one.
+ *                          Farmhash, for example, offers both variants (the 128-bits seed variant is called `doubleSeed`).
+ *                          Follow up question : if both 64-bit and 128-bit seeds are allowed, which variant should be called XXPH128 ?
+ *
+ * - Result for len==0 :    Currently, the result of hashing a zero-length input is always `0`.
+ *                          It seems okay as a return value when using "default" secret and seed.
+ *                          But is it still fine to return `0` when secret or seed are non-default ?
+ *                          Are there use cases which could depend on generating a different hash result for zero-length input when the secret is different ?
+ *
+ * - Consistency (1) :      Streaming XXPH128 uses an XXPH3 state, which is the same state as XXPH3_64bits().
+ *                          It means a 128bit streaming loop must invoke the following symbols :
+ *                          XXPH3_createState(), XXPH3_128bits_reset(), XXPH3_128bits_update() (loop), XXPH3_128bits_digest(), XXPH3_freeState().
+ *                          Is that consistent enough ?
+ *
+ * - Consistency (2) :      The canonical representation of `XXPH3_64bits` is provided by existing functions
+ *                          XXPH64_canonicalFromHash(), and reverse operation XXPH64_hashFromCanonical().
+ *                          As a mirror, canonical functions for XXPH128_hash_t results generated by `XXPH3_128bits`
+ *                          are XXPH128_canonicalFromHash() and XXPH128_hashFromCanonical().
+ *                          Which means, `XXPH3` doesn't appear in the names, because canonical functions operate on a type,
+ *                          independently of which algorithm was used to generate that type.
+ *                          Is that consistent enough ?
+ */
+
+#ifdef XXPH_NAMESPACE
+#  define XXPH3_64bits XXPH_NAME2(XXPH_NAMESPACE, XXPH3_64bits)
+#  define XXPH3_64bits_withSecret XXPH_NAME2(XXPH_NAMESPACE, XXPH3_64bits_withSecret)
+#  define XXPH3_64bits_withSeed XXPH_NAME2(XXPH_NAMESPACE, XXPH3_64bits_withSeed)
+#endif
+
+/* XXPH3_64bits() :
+ * default 64-bit variant, using default secret and default seed of 0.
+ * It's the fastest variant. */
+XXPH_PUBLIC_API XXPH64_hash_t XXPH3_64bits(const void* data, size_t len);
+
+/* XXPH3_64bits_withSecret() :
+ * It's possible to provide any blob of bytes as a "secret" to generate the hash.
+ * This makes it more difficult for an external actor to prepare an intentional collision.
+ * The secret *must* be large enough (>= XXPH3_SECRET_SIZE_MIN).
+ * It should consist of random bytes.
+ * Avoid repeating same character, or sequences of bytes,
+ * and especially avoid swathes of \0.
+ * Failure to respect these conditions will result in a poor quality hash.
+ */
+#define XXPH3_SECRET_SIZE_MIN 136
+XXPH_PUBLIC_API XXPH64_hash_t XXPH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+
+/* XXPH3_64bits_withSeed() :
+ * This variant generates on the fly a custom secret,
+ * based on the default secret, altered using the `seed` value.
+ * While this operation is decently fast, note that it's not completely free.
+ * note : seed==0 produces same results as XXPH3_64bits() */
+XXPH_PUBLIC_API XXPH64_hash_t XXPH3_64bits_withSeed(const void* data, size_t len, XXPH64_hash_t seed);
+
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)   /* C11+ */
+#  include <stdalign.h>
+#  define XXPH_ALIGN(n)      alignas(n)
+#elif defined(__GNUC__)
+#  define XXPH_ALIGN(n)      __attribute__ ((aligned(n)))
+#elif defined(_MSC_VER)
+#  define XXPH_ALIGN(n)      __declspec(align(n))
+#else
+#  define XXPH_ALIGN(n)   /* disabled */
+#endif
+
+#define XXPH3_SECRET_DEFAULT_SIZE 192   /* minimum XXPH3_SECRET_SIZE_MIN */
+
+#endif  /* XXPH_NO_LONG_LONG */
+
+
+/*-**********************************************************************
+*  XXPH_INLINE_ALL
+************************************************************************/
+#if defined(XXPH_INLINE_ALL) || defined(XXPH_PRIVATE_API)
+
+/* === RocksDB modification: was #include here but permanently inlining === */
+
+typedef struct {
+    XXPH64_hash_t low64;
+    XXPH64_hash_t high64;
+} XXPH128_hash_t;
+
+/* *************************************
+*  Tuning parameters
+***************************************/
+/*!XXPH_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ *            It can generate buggy code on targets which do not support unaligned memory accesses.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXPH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6)
+#    define XXPH_FORCE_MEMORY_ACCESS 2
+#  elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
+  (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7)))
+#    define XXPH_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/*!XXPH_ACCEPT_NULL_INPUT_POINTER :
+ * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
+ * When this macro is enabled, xxHash actively checks input for null pointer.
+ * It it is, result for null input pointers is the same as a null-length input.
+ */
+#ifndef XXPH_ACCEPT_NULL_INPUT_POINTER   /* can be defined externally */
+#  define XXPH_ACCEPT_NULL_INPUT_POINTER 0
+#endif
+
+/*!XXPH_FORCE_ALIGN_CHECK :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash;
+ * set it to 0 when the input is guaranteed to be aligned,
+ * or when alignment doesn't matter for performance.
+ */
+#ifndef XXPH_FORCE_ALIGN_CHECK /* can be defined externally */
+#  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#    define XXPH_FORCE_ALIGN_CHECK 0
+#  else
+#    define XXPH_FORCE_ALIGN_CHECK 1
+#  endif
+#endif
+
+/*!XXPH_REROLL:
+ * Whether to reroll XXPH32_finalize, and XXPH64_finalize,
+ * instead of using an unrolled jump table/if statement loop.
+ *
+ * This is automatically defined on -Os/-Oz on GCC and Clang. */
+#ifndef XXPH_REROLL
+#  if defined(__OPTIMIZE_SIZE__)
+#    define XXPH_REROLL 1
+#  else
+#    define XXPH_REROLL 0
+#  endif
+#endif
+
+#include <limits.h>   /* ULLONG_MAX */
+
+#ifndef XXPH_STATIC_LINKING_ONLY
+#define XXPH_STATIC_LINKING_ONLY
+#endif
+
+/* BEGIN RocksDB customizations */
+#include "port/lang.h" /* for FALLTHROUGH_INTENDED, inserted as appropriate */
+/* END RocksDB customizations */
+
+/* *************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
+#  define XXPH_FORCE_INLINE static __forceinline
+#  define XXPH_NO_INLINE static __declspec(noinline)
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define XXPH_FORCE_INLINE static inline __attribute__((always_inline))
+#      define XXPH_NO_INLINE static __attribute__((noinline))
+#    else
+#      define XXPH_FORCE_INLINE static inline
+#      define XXPH_NO_INLINE static
+#    endif
+#  else
+#    define XXPH_FORCE_INLINE static
+#    define XXPH_NO_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+
+/* *************************************
+*  Debug
+***************************************/
+/* DEBUGLEVEL is expected to be defined externally,
+ * typically through compiler command line.
+ * Value must be a number. */
+#ifndef DEBUGLEVEL
+#  define DEBUGLEVEL 0
+#endif
+
+#if (DEBUGLEVEL>=1)
+#  include <assert.h>   /* note : can still be disabled with NDEBUG */
+#  define XXPH_ASSERT(c)   assert(c)
+#else
+#  define XXPH_ASSERT(c)   ((void)0)
+#endif
+
+/* note : use after variable declarations */
+#define XXPH_STATIC_ASSERT(c)  { enum { XXPH_sa = 1/(int)(!!(c)) }; }
+
+
+/* *************************************
+*  Basic Types
+***************************************/
+#if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+  typedef uint8_t  xxh_u8;
+#else
+  typedef unsigned char      xxh_u8;
+#endif
+typedef XXPH32_hash_t xxh_u32;
+
+
+/* ===   Memory access   === */
+
+#if (defined(XXPH_FORCE_MEMORY_ACCESS) && (XXPH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static xxh_u32 XXPH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
+
+#elif (defined(XXPH_FORCE_MEMORY_ACCESS) && (XXPH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
+static xxh_u32 XXPH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+static xxh_u32 XXPH_read32(const void* memPtr)
+{
+    xxh_u32 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXPH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ===   Endianess   === */
+
+/* XXPH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXPH_CPU_LITTLE_ENDIAN
+#  if defined(_WIN32) /* Windows is always little endian */ \
+     || defined(__LITTLE_ENDIAN__) \
+     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#    define XXPH_CPU_LITTLE_ENDIAN 1
+#  elif defined(__BIG_ENDIAN__) \
+     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#    define XXPH_CPU_LITTLE_ENDIAN 0
+#  else
+static int XXPH_isLittleEndian(void)
+{
+    const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+#   define XXPH_CPU_LITTLE_ENDIAN   XXPH_isLittleEndian()
+#  endif
+#endif
+
+
+
+
+/* ****************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define XXPH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#ifndef __has_builtin
+#  define __has_builtin(x) 0
+#endif
+
+#if !defined(NO_CLANG_BUILTIN) && __has_builtin(__builtin_rotateleft32) && __has_builtin(__builtin_rotateleft64)
+#  define XXPH_rotl32 __builtin_rotateleft32
+#  define XXPH_rotl64 __builtin_rotateleft64
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#elif defined(_MSC_VER)
+#  define XXPH_rotl32(x,r) _rotl(x,r)
+#  define XXPH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXPH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+#  define XXPH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
+#endif
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXPH_swap32 _byteswap_ulong
+#elif XXPH_GCC_VERSION >= 403
+#  define XXPH_swap32 __builtin_bswap32
+#else
+static xxh_u32 XXPH_swap32 (xxh_u32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+#endif
+
+
+/* ***************************
+*  Memory reads
+*****************************/
+typedef enum { XXPH_aligned, XXPH_unaligned } XXPH_alignment;
+
+XXPH_FORCE_INLINE xxh_u32 XXPH_readLE32(const void* ptr)
+{
+    return XXPH_CPU_LITTLE_ENDIAN ? XXPH_read32(ptr) : XXPH_swap32(XXPH_read32(ptr));
+}
+
+XXPH_FORCE_INLINE xxh_u32
+XXPH_readLE32_align(const void* ptr, XXPH_alignment align)
+{
+    if (align==XXPH_unaligned) {
+        return XXPH_readLE32(ptr);
+    } else {
+        return XXPH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXPH_swap32(*(const xxh_u32*)ptr);
+    }
+}
+
+
+/* *************************************
+*  Misc
+***************************************/
+XXPH_PUBLIC_API unsigned XXPH_versionNumber (void) { return XXPH_VERSION_NUMBER; }
+
+
+static const xxh_u32 PRIME32_1 = 0x9E3779B1U;   /* 0b10011110001101110111100110110001 */
+static const xxh_u32 PRIME32_2 = 0x85EBCA77U;   /* 0b10000101111010111100101001110111 */
+static const xxh_u32 PRIME32_3 = 0xC2B2AE3DU;   /* 0b11000010101100101010111000111101 */
+static const xxh_u32 PRIME32_4 = 0x27D4EB2FU;   /* 0b00100111110101001110101100101111 */
+static const xxh_u32 PRIME32_5 = 0x165667B1U;   /* 0b00010110010101100110011110110001 */
+
+#ifndef XXPH_NO_LONG_LONG
+
+/* *******************************************************************
+*  64-bit hash functions
+*********************************************************************/
+
+/*======   Memory access   ======*/
+
+typedef XXPH64_hash_t xxh_u64;
+
+#if (defined(XXPH_FORCE_MEMORY_ACCESS) && (XXPH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static xxh_u64 XXPH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; }
+
+#elif (defined(XXPH_FORCE_MEMORY_ACCESS) && (XXPH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
+static xxh_u64 XXPH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static xxh_u64 XXPH_read64(const void* memPtr)
+{
+    xxh_u64 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXPH_FORCE_DIRECT_MEMORY_ACCESS */
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXPH_swap64 _byteswap_uint64
+#elif XXPH_GCC_VERSION >= 403
+#  define XXPH_swap64 __builtin_bswap64
+#else
+static xxh_u64 XXPH_swap64 (xxh_u64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+XXPH_FORCE_INLINE xxh_u64 XXPH_readLE64(const void* ptr)
+{
+    return XXPH_CPU_LITTLE_ENDIAN ? XXPH_read64(ptr) : XXPH_swap64(XXPH_read64(ptr));
+}
+
+XXPH_FORCE_INLINE xxh_u64
+XXPH_readLE64_align(const void* ptr, XXPH_alignment align)
+{
+    if (align==XXPH_unaligned)
+        return XXPH_readLE64(ptr);
+    else
+        return XXPH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXPH_swap64(*(const xxh_u64*)ptr);
+}
+
+
+/*======   xxh64   ======*/
+
+static const xxh_u64 PRIME64_1 = 0x9E3779B185EBCA87ULL;   /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
+static const xxh_u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL;   /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
+static const xxh_u64 PRIME64_3 = 0x165667B19E3779F9ULL;   /* 0b0001011001010110011001111011000110011110001101110111100111111001 */
+static const xxh_u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL;   /* 0b1000010111101011110010100111011111000010101100101010111001100011 */
+static const xxh_u64 PRIME64_5 = 0x27D4EB2F165667C5ULL;   /* 0b0010011111010100111010110010111100010110010101100110011111000101 */
+
+
+/* *********************************************************************
+*  XXPH3
+*  New generation hash designed for speed on small keys and vectorization
+************************************************************************ */
+
+/*======== Was #include "xxh3.h", now inlined below ==========*/
+
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Development source file for `xxh3`
+   Copyright (C) 2019-present, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* RocksDB Note: This file contains a preview release (xxhash repository
+   version 0.7.2) of XXPH3 that is unlikely to be compatible with the final
+   version of XXPH3. We have therefore renamed this XXPH3 ("preview"), for
+   clarity so that we can continue to use this version even after
+   integrating a newer incompatible version.
+*/
+
+/* ===   Dependencies   === */
+
+#undef XXPH_INLINE_ALL   /* in case it's already defined */
+#define XXPH_INLINE_ALL
+
+
+/* ===   Compiler specifics   === */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */
+#  define XXPH_RESTRICT   restrict
+#else
+/* note : it might be useful to define __restrict or __restrict__ for some C++ compilers */
+#  define XXPH_RESTRICT   /* disable */
+#endif
+
+#if defined(__GNUC__)
+#  if defined(__AVX2__)
+#    include <immintrin.h>
+#  elif defined(__SSE2__)
+#    include <emmintrin.h>
+#  elif defined(__ARM_NEON__) || defined(__ARM_NEON)
+#    define inline __inline__  /* clang bug */
+#    include <arm_neon.h>
+#    undef inline
+#  endif
+#elif defined(_MSC_VER)
+#  include <intrin.h>
+#endif
+
+/*
+ * Sanity check.
+ *
+ * XXPH3 only requires these features to be efficient:
+ *
+ *  - Usable unaligned access
+ *  - A 32-bit or 64-bit ALU
+ *      - If 32-bit, a decent ADC instruction
+ *  - A 32 or 64-bit multiply with a 64-bit result
+ *
+ * Almost all 32-bit and 64-bit targets meet this, except for Thumb-1, the
+ * classic 16-bit only subset of ARM's instruction set.
+ *
+ * First of all, Thumb-1 lacks support for the UMULL instruction which
+ * performs the important long multiply. This means numerous __aeabi_lmul
+ * calls.
+ *
+ * Second of all, the 8 functional registers are just not enough.
+ * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
+ * Lo registers, and this shuffling results in thousands more MOVs than A32.
+ *
+ * A32 and T32 don't have this limitation. They can access all 14 registers,
+ * do a 32->64 multiply with UMULL, and the flexible operand is helpful too.
+ *
+ * If compiling Thumb-1 for a target which supports ARM instructions, we
+ * will give a warning.
+ *
+ * Usually, if this happens, it is because of an accident and you probably
+ * need to specify -march, as you probably meant to compileh for a newer
+ * architecture.
+ */
+#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
+#   warning "XXPH3 is highly inefficient without ARM or Thumb-2."
+#endif
+
+/* ==========================================
+ * Vectorization detection
+ * ========================================== */
+#define XXPH_SCALAR 0
+#define XXPH_SSE2   1
+#define XXPH_AVX2   2
+#define XXPH_NEON   3
+#define XXPH_VSX    4
+
+#ifndef XXPH_VECTOR    /* can be defined on command line */
+#  if defined(__AVX2__)
+#    define XXPH_VECTOR XXPH_AVX2
+#  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
+#    define XXPH_VECTOR XXPH_SSE2
+#  elif defined(__GNUC__) /* msvc support maybe later */ \
+  && (defined(__ARM_NEON__) || defined(__ARM_NEON)) \
+  && (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \
+    || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+#    define XXPH_VECTOR XXPH_NEON
+#  elif defined(__PPC64__) && defined(__POWER8_VECTOR__) && defined(__GNUC__)
+#    define XXPH_VECTOR XXPH_VSX
+#  else
+#    define XXPH_VECTOR XXPH_SCALAR
+#  endif
+#endif
+
+/* control alignment of accumulator,
+ * for compatibility with fast vector loads */
+#ifndef XXPH_ACC_ALIGN
+#  if XXPH_VECTOR == 0   /* scalar */
+#     define XXPH_ACC_ALIGN 8
+#  elif XXPH_VECTOR == 1  /* sse2 */
+#     define XXPH_ACC_ALIGN 16
+#  elif XXPH_VECTOR == 2  /* avx2 */
+#     define XXPH_ACC_ALIGN 32
+#  elif XXPH_VECTOR == 3  /* neon */
+#     define XXPH_ACC_ALIGN 16
+#  elif XXPH_VECTOR == 4  /* vsx */
+#     define XXPH_ACC_ALIGN 16
+#  endif
+#endif
+
+/* xxh_u64 XXPH_mult32to64(xxh_u32 a, xxh_u64 b) { return (xxh_u64)a * (xxh_u64)b; } */
+#if defined(_MSC_VER) && defined(_M_IX86)
+#    include <intrin.h>
+#    define XXPH_mult32to64(x, y) __emulu(x, y)
+#else
+#    define XXPH_mult32to64(x, y) ((xxh_u64)((x) & 0xFFFFFFFF) * (xxh_u64)((y) & 0xFFFFFFFF))
+#endif
+
+/* VSX stuff. It's a lot because VSX support is mediocre across compilers and
+ * there is a lot of mischief with endianness. */
+#if XXPH_VECTOR == XXPH_VSX
+#  include <altivec.h>
+#  undef vector
+typedef __vector unsigned long long U64x2;
+typedef __vector unsigned char U8x16;
+typedef __vector unsigned U32x4;
+
+#ifndef XXPH_VSX_BE
+#  if defined(__BIG_ENDIAN__) \
+  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#    define XXPH_VSX_BE 1
+#  elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+#    warning "-maltivec=be is not recommended. Please use native endianness."
+#    define XXPH_VSX_BE 1
+#  else
+#    define XXPH_VSX_BE 0
+#  endif
+#endif
+
+/* We need some helpers for big endian mode. */
+#if XXPH_VSX_BE
+/* A wrapper for POWER9's vec_revb. */
+#  ifdef __POWER9_VECTOR__
+#    define XXPH_vec_revb vec_revb
+#  else
+XXPH_FORCE_INLINE U64x2 XXPH_vec_revb(U64x2 val)
+{
+    U8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
+                              0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
+    return vec_perm(val, val, vByteSwap);
+}
+#  endif
+
+/* Power8 Crypto gives us vpermxor which is very handy for
+ * PPC64EB.
+ *
+ * U8x16 vpermxor(U8x16 a, U8x16 b, U8x16 mask)
+ * {
+ *     U8x16 ret;
+ *     for (int i = 0; i < 16; i++) {
+ *         ret[i] = a[mask[i] & 0xF] ^ b[mask[i] >> 4];
+ *     }
+ *     return ret;
+ * }
+ *
+ * Because both of the main loops load the key, swap, and xor it with input,
+ * we can combine the key swap into this instruction.
+ */
+#  ifdef vec_permxor
+#    define XXPH_vec_permxor vec_permxor
+#  else
+#    define XXPH_vec_permxor __builtin_crypto_vpermxor
+#  endif
+#endif  /* XXPH_VSX_BE */
+/*
+ * Because we reinterpret the multiply, there are endian memes: vec_mulo actually becomes
+ * vec_mule.
+ *
+ * Additionally, the intrinsic wasn't added until GCC 8, despite existing for a while.
+ * Clang has an easy way to control this, we can just use the builtin which doesn't swap.
+ * GCC needs inline assembly. */
+#if __has_builtin(__builtin_altivec_vmuleuw)
+#  define XXPH_vec_mulo __builtin_altivec_vmulouw
+#  define XXPH_vec_mule __builtin_altivec_vmuleuw
+#else
+/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
+XXPH_FORCE_INLINE U64x2 XXPH_vec_mulo(U32x4 a, U32x4 b) {
+    U64x2 result;
+    __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+    return result;
+}
+XXPH_FORCE_INLINE U64x2 XXPH_vec_mule(U32x4 a, U32x4 b) {
+    U64x2 result;
+    __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+    return result;
+}
+#endif /* __has_builtin(__builtin_altivec_vmuleuw) */
+#endif /* XXPH_VECTOR == XXPH_VSX */
+
+/* prefetch
+ * can be disabled, by declaring XXPH_NO_PREFETCH build macro */
+#if defined(XXPH_NO_PREFETCH)
+#  define XXPH_PREFETCH(ptr)  (void)(ptr)  /* disabled */
+#else
+#if defined(_MSC_VER) && \
+    (defined(_M_X64) ||  \
+     defined(_M_IX86)) /* _mm_prefetch() is not defined outside of x86/x64 */
+#    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+#    define XXPH_PREFETCH(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+#  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+#    define XXPH_PREFETCH(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#  else
+#    define XXPH_PREFETCH(ptr) (void)(ptr)  /* disabled */
+#  endif
+#endif  /* XXPH_NO_PREFETCH */
+
+
+/* ==========================================
+ * XXPH3 default settings
+ * ========================================== */
+
+#define XXPH_SECRET_DEFAULT_SIZE 192   /* minimum XXPH3_SECRET_SIZE_MIN */
+
+#if (XXPH_SECRET_DEFAULT_SIZE < XXPH3_SECRET_SIZE_MIN)
+#  error "default keyset is not large enough"
+#endif
+
+XXPH_ALIGN(64) static const xxh_u8 kSecret[XXPH_SECRET_DEFAULT_SIZE] = {
+    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
+    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
+    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
+    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
+    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
+    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
+    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
+    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
+
+    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
+    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
+    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
+    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
+};
+
+/*
+ * GCC for x86 has a tendency to use SSE in this loop. While it
+ * successfully avoids swapping (as MUL overwrites EAX and EDX), it
+ * slows it down because instead of free register swap shifts, it
+ * must use pshufd and punpckl/hd.
+ *
+ * To prevent this, we use this attribute to shut off SSE.
+ */
+#if defined(__GNUC__) && !defined(__clang__) && defined(__i386__)
+__attribute__((__target__("no-sse")))
+#endif
+static XXPH128_hash_t
+XXPH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
+{
+    /*
+     * GCC/Clang __uint128_t method.
+     *
+     * On most 64-bit targets, GCC and Clang define a __uint128_t type.
+     * This is usually the best way as it usually uses a native long 64-bit
+     * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
+     *
+     * Usually.
+     *
+     * Despite being a 32-bit platform, Clang (and emscripten) define this
+     * type despite not having the arithmetic for it. This results in a
+     * laggy compiler builtin call which calculates a full 128-bit multiply.
+     * In that case it is best to use the portable one.
+     * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
+     */
+#if defined(__GNUC__) && !defined(__wasm__) \
+    && defined(__SIZEOF_INT128__) \
+    || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
+
+    __uint128_t product = (__uint128_t)lhs * (__uint128_t)rhs;
+    XXPH128_hash_t const r128 = { (xxh_u64)(product), (xxh_u64)(product >> 64) };
+    return r128;
+
+    /*
+     * MSVC for x64's _umul128 method.
+     *
+     * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
+     *
+     * This compiles to single operand MUL on x64.
+     */
+#elif defined(_M_X64) || defined(_M_IA64)
+
+#ifndef _MSC_VER
+#   pragma intrinsic(_umul128)
+#endif
+    xxh_u64 product_high;
+    xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
+    XXPH128_hash_t const r128 = { product_low, product_high };
+    return r128;
+
+#else
+    /*
+     * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
+     *
+     * This is a fast and simple grade school multiply, which is shown
+     * below with base 10 arithmetic instead of base 0x100000000.
+     *
+     *           9 3 // D2 lhs = 93
+     *         x 7 5 // D2 rhs = 75
+     *     ----------
+     *           1 5 // D2 lo_lo = (93 % 10) * (75 % 10)
+     *         4 5 | // D2 hi_lo = (93 / 10) * (75 % 10)
+     *         2 1 | // D2 lo_hi = (93 % 10) * (75 / 10)
+     *     + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10)
+     *     ---------
+     *         2 7 | // D2 cross  = (15 / 10) + (45 % 10) + 21
+     *     + 6 7 | | // D2 upper  = (27 / 10) + (45 / 10) + 63
+     *     ---------
+     *       6 9 7 5
+     *
+     * The reasons for adding the products like this are:
+     *  1. It avoids manual carry tracking. Just like how
+     *     (9 * 9) + 9 + 9 = 99, the same applies with this for
+     *     UINT64_MAX. This avoids a lot of complexity.
+     *
+     *  2. It hints for, and on Clang, compiles to, the powerful UMAAL
+     *     instruction available in ARMv6+ A32/T32, which is shown below:
+     *
+     *         void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
+     *         {
+     *             xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
+     *             *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
+     *             *RdHi = (xxh_u32)(product >> 32);
+     *         }
+     *
+     *     This instruction was designed for efficient long multiplication,
+     *     and allows this to be calculated in only 4 instructions which
+     *     is comparable to some 64-bit ALUs.
+     *
+     *  3. It isn't terrible on other platforms. Usually this will be
+     *     a couple of 32-bit ADD/ADCs.
+     */
+
+    /* First calculate all of the cross products. */
+    xxh_u64 const lo_lo = XXPH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
+    xxh_u64 const hi_lo = XXPH_mult32to64(lhs >> 32,        rhs & 0xFFFFFFFF);
+    xxh_u64 const lo_hi = XXPH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
+    xxh_u64 const hi_hi = XXPH_mult32to64(lhs >> 32,        rhs >> 32);
+
+    /* Now add the products together. These will never overflow. */
+    xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
+    xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32)        + hi_hi;
+    xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
+
+    XXPH128_hash_t r128 = { lower, upper };
+    return r128;
+#endif
+}
+
+/*
+ * We want to keep the attribute here because a target switch
+ * disables inlining.
+ *
+ * Does a 64-bit to 128-bit multiply, then XOR folds it.
+ * The reason for the separate function is to prevent passing
+ * too many structs around by value. This will hopefully inline
+ * the multiply, but we don't force it.
+ */
+#if defined(__GNUC__) && !defined(__clang__) && defined(__i386__)
+__attribute__((__target__("no-sse")))
+#endif
+static xxh_u64
+XXPH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
+{
+    XXPH128_hash_t product = XXPH_mult64to128(lhs, rhs);
+    return product.low64 ^ product.high64;
+}
+
+
+static XXPH64_hash_t XXPH3_avalanche(xxh_u64 h64)
+{
+    h64 ^= h64 >> 37;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+    return h64;
+}
+
+
+/* ==========================================
+ * Short keys
+ * ========================================== */
+
+XXPH_FORCE_INLINE XXPH64_hash_t
+XXPH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXPH64_hash_t seed)
+{
+    XXPH_ASSERT(input != NULL);
+    XXPH_ASSERT(1 <= len && len <= 3);
+    XXPH_ASSERT(secret != NULL);
+    {   xxh_u8 const c1 = input[0];
+        xxh_u8 const c2 = input[len >> 1];
+        xxh_u8 const c3 = input[len - 1];
+        xxh_u32  const combined = ((xxh_u32)c1) | (((xxh_u32)c2) << 8) | (((xxh_u32)c3) << 16) | (((xxh_u32)len) << 24);
+        xxh_u64  const keyed = (xxh_u64)combined ^ (XXPH_readLE32(secret) + seed);
+        xxh_u64  const mixed = keyed * PRIME64_1;
+        return XXPH3_avalanche(mixed);
+    }
+}
+
+XXPH_FORCE_INLINE XXPH64_hash_t
+XXPH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXPH64_hash_t seed)
+{
+    XXPH_ASSERT(input != NULL);
+    XXPH_ASSERT(secret != NULL);
+    XXPH_ASSERT(4 <= len && len <= 8);
+    {   xxh_u32 const input_lo = XXPH_readLE32(input);
+        xxh_u32 const input_hi = XXPH_readLE32(input + len - 4);
+        xxh_u64 const input_64 = input_lo | ((xxh_u64)input_hi << 32);
+        xxh_u64 const keyed = input_64 ^ (XXPH_readLE64(secret) + seed);
+        xxh_u64 const mix64 = len + ((keyed ^ (keyed >> 51)) * PRIME32_1);
+        return XXPH3_avalanche((mix64 ^ (mix64 >> 47)) * PRIME64_2);
+    }
+}
+
+XXPH_FORCE_INLINE XXPH64_hash_t
+XXPH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXPH64_hash_t seed)
+{
+    XXPH_ASSERT(input != NULL);
+    XXPH_ASSERT(secret != NULL);
+    XXPH_ASSERT(9 <= len && len <= 16);
+    {   xxh_u64 const input_lo = XXPH_readLE64(input)           ^ (XXPH_readLE64(secret)     + seed);
+        xxh_u64 const input_hi = XXPH_readLE64(input + len - 8) ^ (XXPH_readLE64(secret + 8) - seed);
+        xxh_u64 const acc = len + (input_lo + input_hi) + XXPH3_mul128_fold64(input_lo, input_hi);
+        return XXPH3_avalanche(acc);
+    }
+}
+
+XXPH_FORCE_INLINE XXPH64_hash_t
+XXPH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXPH64_hash_t seed)
+{
+    XXPH_ASSERT(len <= 16);
+    {   if (len > 8) return XXPH3_len_9to16_64b(input, len, secret, seed);
+        if (len >= 4) return XXPH3_len_4to8_64b(input, len, secret, seed);
+        if (len) return XXPH3_len_1to3_64b(input, len, secret, seed);
+        /*
+         * RocksDB modification from XXPH3 preview: zero result for empty
+         * string can be problematic for multiplication-based algorithms.
+         * Return a hash of the seed instead.
+         */
+        return XXPH3_mul128_fold64(seed + XXPH_readLE64(secret), PRIME64_2);
+    }
+}
+
+
+/* ===    Long Keys    === */
+
+#define STRIPE_LEN 64
+#define XXPH_SECRET_CONSUME_RATE 8   /* nb of secret bytes consumed at each accumulation */
+#define ACC_NB (STRIPE_LEN / sizeof(xxh_u64))
+
+typedef enum { XXPH3_acc_64bits, XXPH3_acc_128bits } XXPH3_accWidth_e;
+
+XXPH_FORCE_INLINE void
+XXPH3_accumulate_512(      void* XXPH_RESTRICT acc,
+                    const void* XXPH_RESTRICT input,
+                    const void* XXPH_RESTRICT secret,
+                    XXPH3_accWidth_e accWidth)
+{
+#if (XXPH_VECTOR == XXPH_AVX2)
+
+    XXPH_ASSERT((((size_t)acc) & 31) == 0);
+    {   XXPH_ALIGN(32) __m256i* const xacc  =       (__m256i *) acc;
+        const         __m256i* const xinput = (const __m256i *) input;  /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this type */
+        const         __m256i* const xsecret = (const __m256i *) secret;   /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this type */
+
+        size_t i;
+        for (i=0; i < STRIPE_LEN/sizeof(__m256i); i++) {
+            __m256i const data_vec = _mm256_loadu_si256 (xinput+i);
+            __m256i const key_vec = _mm256_loadu_si256 (xsecret+i);
+            __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);                                  /* uint32 dk[8]  = {d0+k0, d1+k1, d2+k2, d3+k3, ...} */
+            __m256i const product = _mm256_mul_epu32 (data_key, _mm256_shuffle_epi32 (data_key, 0x31));  /* uint64 mul[4] = {dk0*dk1, dk2*dk3, ...} */
+            if (accWidth == XXPH3_acc_128bits) {
+                __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
+                __m256i const sum = _mm256_add_epi64(xacc[i], data_swap);
+                xacc[i]  = _mm256_add_epi64(product, sum);
+            } else {  /* XXPH3_acc_64bits */
+                __m256i const sum = _mm256_add_epi64(xacc[i], data_vec);
+                xacc[i]  = _mm256_add_epi64(product, sum);
+            }
+    }   }
+
+#elif (XXPH_VECTOR == XXPH_SSE2)
+
+    XXPH_ASSERT((((size_t)acc) & 15) == 0);
+    {   XXPH_ALIGN(16) __m128i* const xacc  =       (__m128i *) acc;
+        const         __m128i* const xinput = (const __m128i *) input;  /* not really aligned, just for ptr arithmetic, and because _mm_loadu_si128() requires this type */
+        const         __m128i* const xsecret = (const __m128i *) secret;   /* not really aligned, just for ptr arithmetic, and because _mm_loadu_si128() requires this type */
+
+        size_t i;
+        for (i=0; i < STRIPE_LEN/sizeof(__m128i); i++) {
+            __m128i const data_vec = _mm_loadu_si128 (xinput+i);
+            __m128i const key_vec = _mm_loadu_si128 (xsecret+i);
+            __m128i const data_key = _mm_xor_si128 (data_vec, key_vec);                                  /* uint32 dk[8]  = {d0+k0, d1+k1, d2+k2, d3+k3, ...} */
+            __m128i const product = _mm_mul_epu32 (data_key, _mm_shuffle_epi32 (data_key, 0x31));  /* uint64 mul[4] = {dk0*dk1, dk2*dk3, ...} */
+            if (accWidth == XXPH3_acc_128bits) {
+                __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
+                __m128i const sum = _mm_add_epi64(xacc[i], data_swap);
+                xacc[i]  = _mm_add_epi64(product, sum);
+            } else {  /* XXPH3_acc_64bits */
+                __m128i const sum = _mm_add_epi64(xacc[i], data_vec);
+                xacc[i]  = _mm_add_epi64(product, sum);
+            }
+    }   }
+
+#elif (XXPH_VECTOR == XXPH_NEON)
+
+    XXPH_ASSERT((((size_t)acc) & 15) == 0);
+    {
+        XXPH_ALIGN(16) uint64x2_t* const xacc = (uint64x2_t *) acc;
+        /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
+        uint8_t const* const xinput = (const uint8_t *) input;
+        uint8_t const* const xsecret  = (const uint8_t *) secret;
+
+        size_t i;
+        for (i=0; i < STRIPE_LEN / sizeof(uint64x2_t); i++) {
+#if !defined(__aarch64__) && !defined(__arm64__) && defined(__GNUC__) /* ARM32-specific hack */
+            /* vzip on ARMv7 Clang generates a lot of vmovs (technically vorrs) without this.
+             * vzip on 32-bit ARM NEON will overwrite the original register, and I think that Clang
+             * assumes I don't want to destroy it and tries to make a copy. This slows down the code
+             * a lot.
+             * aarch64 not only uses an entirely different syntax, but it requires three
+             * instructions...
+             *    ext    v1.16B, v0.16B, #8    // select high bits because aarch64 can't address them directly
+             *    zip1   v3.2s, v0.2s, v1.2s   // first zip
+             *    zip2   v2.2s, v0.2s, v1.2s   // second zip
+             * ...to do what ARM does in one:
+             *    vzip.32 d0, d1               // Interleave high and low bits and overwrite. */
+
+            /* data_vec = xsecret[i]; */
+            uint8x16_t const data_vec    = vld1q_u8(xinput + (i * 16));
+            /* key_vec  = xsecret[i];  */
+            uint8x16_t const key_vec     = vld1q_u8(xsecret  + (i * 16));
+            /* data_key = data_vec ^ key_vec; */
+            uint32x4_t       data_key;
+
+            if (accWidth == XXPH3_acc_64bits) {
+                /* Add first to prevent register swaps */
+                /* xacc[i] += data_vec; */
+                xacc[i] = vaddq_u64 (xacc[i], vreinterpretq_u64_u8(data_vec));
+            } else {  /* XXPH3_acc_128bits */
+                /* xacc[i] += swap(data_vec); */
+                /* can probably be optimized better */
+                uint64x2_t const data64 = vreinterpretq_u64_u8(data_vec);
+                uint64x2_t const swapped= vextq_u64(data64, data64, 1);
+                xacc[i] = vaddq_u64 (xacc[i], swapped);
+            }
+
+            data_key = vreinterpretq_u32_u8(veorq_u8(data_vec, key_vec));
+
+            /* Here's the magic. We use the quirkiness of vzip to shuffle data_key in place.
+             * shuffle: data_key[0, 1, 2, 3] = data_key[0, 2, 1, 3] */
+            __asm__("vzip.32 %e0, %f0" : "+w" (data_key));
+            /* xacc[i] += (uint64x2_t) data_key[0, 1] * (uint64x2_t) data_key[2, 3]; */
+            xacc[i] = vmlal_u32(xacc[i], vget_low_u32(data_key), vget_high_u32(data_key));
+
+#else
+            /* On aarch64, vshrn/vmovn seems to be equivalent to, if not faster than, the vzip method. */
+
+            /* data_vec = xsecret[i]; */
+            uint8x16_t const data_vec    = vld1q_u8(xinput + (i * 16));
+            /* key_vec  = xsecret[i];  */
+            uint8x16_t const key_vec     = vld1q_u8(xsecret  + (i * 16));
+            /* data_key = data_vec ^ key_vec; */
+            uint64x2_t const data_key    = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));
+            /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF); */
+            uint32x2_t const data_key_lo = vmovn_u64  (data_key);
+            /* data_key_hi = (uint32x2_t) (data_key >> 32); */
+            uint32x2_t const data_key_hi = vshrn_n_u64 (data_key, 32);
+            if (accWidth == XXPH3_acc_64bits) {
+                /* xacc[i] += data_vec; */
+                xacc[i] = vaddq_u64 (xacc[i], vreinterpretq_u64_u8(data_vec));
+            } else {  /* XXPH3_acc_128bits */
+                /* xacc[i] += swap(data_vec); */
+                uint64x2_t const data64 = vreinterpretq_u64_u8(data_vec);
+                uint64x2_t const swapped= vextq_u64(data64, data64, 1);
+                xacc[i] = vaddq_u64 (xacc[i], swapped);
+            }
+            /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
+            xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);
+
+#endif
+        }
+    }
+
+#elif (XXPH_VECTOR == XXPH_VSX) && /* work around a compiler bug */ (__GNUC__ > 5)
+          U64x2* const xacc =        (U64x2*) acc;    /* presumed aligned */
+    U64x2 const* const xinput = (U64x2 const*) input;   /* no alignment restriction */
+    U64x2 const* const xsecret  = (U64x2 const*) secret;    /* no alignment restriction */
+    U64x2 const v32 = { 32,  32 };
+#if XXPH_VSX_BE
+    U8x16 const vXorSwap  = { 0x07, 0x16, 0x25, 0x34, 0x43, 0x52, 0x61, 0x70,
+                              0x8F, 0x9E, 0xAD, 0xBC, 0xCB, 0xDA, 0xE9, 0xF8 };
+#endif
+    size_t i;
+    for (i = 0; i < STRIPE_LEN / sizeof(U64x2); i++) {
+        /* data_vec = xinput[i]; */
+        /* key_vec = xsecret[i]; */
+#if XXPH_VSX_BE
+        /* byteswap */
+        U64x2 const data_vec = XXPH_vec_revb(vec_vsx_ld(0, xinput + i));
+        U64x2 const key_raw = vec_vsx_ld(0, xsecret + i);
+        /* See comment above. data_key = data_vec ^ swap(xsecret[i]); */
+        U64x2 const data_key = (U64x2)XXPH_vec_permxor((U8x16)data_vec, (U8x16)key_raw, vXorSwap);
+#else
+        U64x2 const data_vec = vec_vsx_ld(0, xinput + i);
+        U64x2 const key_vec = vec_vsx_ld(0, xsecret + i);
+        U64x2 const data_key = data_vec ^ key_vec;
+#endif
+        /* shuffled = (data_key << 32) | (data_key >> 32); */
+        U32x4 const shuffled = (U32x4)vec_rl(data_key, v32);
+        /* product = ((U64x2)data_key & 0xFFFFFFFF) * ((U64x2)shuffled & 0xFFFFFFFF); */
+        U64x2 const product = XXPH_vec_mulo((U32x4)data_key, shuffled);
+        xacc[i] += product;
+
+        if (accWidth == XXPH3_acc_64bits) {
+            xacc[i] += data_vec;
+        } else {  /* XXPH3_acc_128bits */
+            /* swap high and low halves */
+            U64x2 const data_swapped = vec_xxpermdi(data_vec, data_vec, 2);
+            xacc[i] += data_swapped;
+        }
+    }
+
+#else   /* scalar variant of Accumulator - universal */
+
+    XXPH_ALIGN(XXPH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc;    /* presumed aligned on 32-bytes boundaries, little hint for the auto-vectorizer */
+    const xxh_u8* const xinput = (const xxh_u8*) input;  /* no alignment restriction */
+    const xxh_u8* const xsecret  = (const xxh_u8*) secret;   /* no alignment restriction */
+    size_t i;
+    XXPH_ASSERT(((size_t)acc & (XXPH_ACC_ALIGN-1)) == 0);
+    for (i=0; i < ACC_NB; i++) {
+        xxh_u64 const data_val = XXPH_readLE64(xinput + 8*i);
+        xxh_u64 const data_key = data_val ^ XXPH_readLE64(xsecret + i*8);
+
+        if (accWidth == XXPH3_acc_64bits) {
+            xacc[i] += data_val;
+        } else {
+            xacc[i ^ 1] += data_val; /* swap adjacent lanes */
+        }
+        xacc[i] += XXPH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
+    }
+#endif
+}
+
+XXPH_FORCE_INLINE void
+XXPH3_scrambleAcc(void* XXPH_RESTRICT acc, const void* XXPH_RESTRICT secret)
+{
+#if (XXPH_VECTOR == XXPH_AVX2)
+
+    XXPH_ASSERT((((size_t)acc) & 31) == 0);
+    {   XXPH_ALIGN(32) __m256i* const xacc = (__m256i*) acc;
+        const         __m256i* const xsecret = (const __m256i *) secret;   /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this argument type */
+        const __m256i prime32 = _mm256_set1_epi32((int)PRIME32_1);
+
+        size_t i;
+        for (i=0; i < STRIPE_LEN/sizeof(__m256i); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47) */
+            __m256i const acc_vec     = xacc[i];
+            __m256i const shifted     = _mm256_srli_epi64    (acc_vec, 47);
+            __m256i const data_vec    = _mm256_xor_si256     (acc_vec, shifted);
+            /* xacc[i] ^= xsecret; */
+            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
+            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
+
+            /* xacc[i] *= PRIME32_1; */
+            __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, 0x31);
+            __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
+            __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
+            xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
+        }
+    }
+
+#elif (XXPH_VECTOR == XXPH_SSE2)
+
+    XXPH_ASSERT((((size_t)acc) & 15) == 0);
+    {   XXPH_ALIGN(16) __m128i* const xacc = (__m128i*) acc;
+        const         __m128i* const xsecret = (const __m128i *) secret;   /* not really aligned, just for ptr arithmetic, and because _mm_loadu_si128() requires this argument type */
+        const __m128i prime32 = _mm_set1_epi32((int)PRIME32_1);
+
+        size_t i;
+        for (i=0; i < STRIPE_LEN/sizeof(__m128i); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47) */
+            __m128i const acc_vec     = xacc[i];
+            __m128i const shifted     = _mm_srli_epi64    (acc_vec, 47);
+            __m128i const data_vec    = _mm_xor_si128     (acc_vec, shifted);
+            /* xacc[i] ^= xsecret; */
+            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
+            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
+
+            /* xacc[i] *= PRIME32_1; */
+            __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, 0x31);
+            __m128i const prod_lo     = _mm_mul_epu32     (data_key, prime32);
+            __m128i const prod_hi     = _mm_mul_epu32     (data_key_hi, prime32);
+            xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
+        }
+    }
+
+#elif (XXPH_VECTOR == XXPH_NEON)
+
+    XXPH_ASSERT((((size_t)acc) & 15) == 0);
+
+    {   uint64x2_t* const xacc =     (uint64x2_t*) acc;
+        uint8_t const* const xsecret = (uint8_t const*) secret;
+        uint32x2_t const prime     = vdup_n_u32 (PRIME32_1);
+
+        size_t i;
+        for (i=0; i < STRIPE_LEN/sizeof(uint64x2_t); i++) {
+            /* data_vec = xacc[i] ^ (xacc[i] >> 47); */
+            uint64x2_t const   acc_vec  = xacc[i];
+            uint64x2_t const   shifted  = vshrq_n_u64 (acc_vec, 47);
+            uint64x2_t const   data_vec = veorq_u64   (acc_vec, shifted);
+
+            /* key_vec  = xsecret[i]; */
+            uint32x4_t const   key_vec  = vreinterpretq_u32_u8(vld1q_u8(xsecret + (i * 16)));
+            /* data_key = data_vec ^ key_vec; */
+            uint32x4_t const   data_key = veorq_u32   (vreinterpretq_u32_u64(data_vec), key_vec);
+            /* shuffled = { data_key[0, 2], data_key[1, 3] }; */
+            uint32x2x2_t const shuffled = vzip_u32    (vget_low_u32(data_key), vget_high_u32(data_key));
+
+            /* data_key *= PRIME32_1 */
+
+            /* prod_hi = (data_key >> 32) * PRIME32_1; */
+            uint64x2_t const   prod_hi = vmull_u32    (shuffled.val[1], prime);
+            /* xacc[i] = prod_hi << 32; */
+            xacc[i] = vshlq_n_u64(prod_hi, 32);
+            /* xacc[i] += (prod_hi & 0xFFFFFFFF) * PRIME32_1; */
+            xacc[i] = vmlal_u32(xacc[i], shuffled.val[0], prime);
+    }   }
+
+#elif (XXPH_VECTOR == XXPH_VSX) && /* work around a compiler bug */ (__GNUC__ > 5)
+
+          U64x2* const xacc =       (U64x2*) acc;
+    const U64x2* const xsecret = (const U64x2*) secret;
+    /* constants */
+    U64x2 const v32  = { 32, 32 };
+    U64x2 const v47 = { 47, 47 };
+    U32x4 const prime = { PRIME32_1, PRIME32_1, PRIME32_1, PRIME32_1 };
+    size_t i;
+#if XXPH_VSX_BE
+    /* endian swap */
+    U8x16 const vXorSwap  = { 0x07, 0x16, 0x25, 0x34, 0x43, 0x52, 0x61, 0x70,
+                              0x8F, 0x9E, 0xAD, 0xBC, 0xCB, 0xDA, 0xE9, 0xF8 };
+#endif
+    for (i = 0; i < STRIPE_LEN / sizeof(U64x2); i++) {
+        U64x2 const acc_vec  = xacc[i];
+        U64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
+        /* key_vec = xsecret[i]; */
+#if XXPH_VSX_BE
+        /* swap bytes words */
+        U64x2 const key_raw  = vec_vsx_ld(0, xsecret + i);
+        U64x2 const data_key = (U64x2)XXPH_vec_permxor((U8x16)data_vec, (U8x16)key_raw, vXorSwap);
+#else
+        U64x2 const key_vec  = vec_vsx_ld(0, xsecret + i);
+        U64x2 const data_key = data_vec ^ key_vec;
+#endif
+
+        /* data_key *= PRIME32_1 */
+
+        /* prod_lo = ((U64x2)data_key & 0xFFFFFFFF) * ((U64x2)prime & 0xFFFFFFFF);  */
+        U64x2 const prod_even  = XXPH_vec_mule((U32x4)data_key, prime);
+        /* prod_hi = ((U64x2)data_key >> 32) * ((U64x2)prime >> 32);  */
+        U64x2 const prod_odd  = XXPH_vec_mulo((U32x4)data_key, prime);
+        xacc[i] = prod_odd + (prod_even << v32);
+    }
+
+#else   /* scalar variant of Scrambler - universal */
+
+    XXPH_ALIGN(XXPH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc;   /* presumed aligned on 32-bytes boundaries, little hint for the auto-vectorizer */
+    const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
+    size_t i;
+    XXPH_ASSERT((((size_t)acc) & (XXPH_ACC_ALIGN-1)) == 0);
+    for (i=0; i < ACC_NB; i++) {
+        xxh_u64 const key64 = XXPH_readLE64(xsecret + 8*i);
+        xxh_u64 acc64 = xacc[i];
+        acc64 ^= acc64 >> 47;
+        acc64 ^= key64;
+        acc64 *= PRIME32_1;
+        xacc[i] = acc64;
+    }
+
+#endif
+}
+
+#define XXPH_PREFETCH_DIST 384
+
+/* assumption : nbStripes will not overflow secret size */
+XXPH_FORCE_INLINE void
+XXPH3_accumulate(       xxh_u64* XXPH_RESTRICT acc,
+                const xxh_u8* XXPH_RESTRICT input,
+                const xxh_u8* XXPH_RESTRICT secret,
+                      size_t nbStripes,
+                      XXPH3_accWidth_e accWidth)
+{
+    size_t n;
+    for (n = 0; n < nbStripes; n++ ) {
+        const xxh_u8* const in = input + n*STRIPE_LEN;
+        XXPH_PREFETCH(in + XXPH_PREFETCH_DIST);
+        XXPH3_accumulate_512(acc,
+                            in,
+                            secret + n*XXPH_SECRET_CONSUME_RATE,
+                            accWidth);
+    }
+}
+
+/* note : clang auto-vectorizes well in SS2 mode _if_ this function is `static`,
+ *        and doesn't auto-vectorize it at all if it is `FORCE_INLINE`.
+ *        However, it auto-vectorizes better AVX2 if it is `FORCE_INLINE`
+ *        Pretty much every other modes and compilers prefer `FORCE_INLINE`.
+ */
+
+#if defined(__clang__) && (XXPH_VECTOR==0) && !defined(__AVX2__) && !defined(__arm__) && !defined(__thumb__)
+static void
+#else
+XXPH_FORCE_INLINE void
+#endif
+XXPH3_hashLong_internal_loop( xxh_u64* XXPH_RESTRICT acc,
+                      const xxh_u8* XXPH_RESTRICT input, size_t len,
+                      const xxh_u8* XXPH_RESTRICT secret, size_t secretSize,
+                            XXPH3_accWidth_e accWidth)
+{
+    size_t const nb_rounds = (secretSize - STRIPE_LEN) / XXPH_SECRET_CONSUME_RATE;
+    size_t const block_len = STRIPE_LEN * nb_rounds;
+    size_t const nb_blocks = len / block_len;
+
+    size_t n;
+
+    XXPH_ASSERT(secretSize >= XXPH3_SECRET_SIZE_MIN);
+
+    for (n = 0; n < nb_blocks; n++) {
+        XXPH3_accumulate(acc, input + n*block_len, secret, nb_rounds, accWidth);
+        XXPH3_scrambleAcc(acc, secret + secretSize - STRIPE_LEN);
+    }
+
+    /* last partial block */
+    XXPH_ASSERT(len > STRIPE_LEN);
+    {   size_t const nbStripes = (len - (block_len * nb_blocks)) / STRIPE_LEN;
+        XXPH_ASSERT(nbStripes <= (secretSize / XXPH_SECRET_CONSUME_RATE));
+        XXPH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, accWidth);
+
+        /* last stripe */
+        if (len & (STRIPE_LEN - 1)) {
+            const xxh_u8* const p = input + len - STRIPE_LEN;
+#define XXPH_SECRET_LASTACC_START 7  /* do not align on 8, so that secret is different from scrambler */
+            XXPH3_accumulate_512(acc, p, secret + secretSize - STRIPE_LEN - XXPH_SECRET_LASTACC_START, accWidth);
+    }   }
+}
+
+XXPH_FORCE_INLINE xxh_u64
+XXPH3_mix2Accs(const xxh_u64* XXPH_RESTRICT acc, const xxh_u8* XXPH_RESTRICT secret)
+{
+    return XXPH3_mul128_fold64(
+               acc[0] ^ XXPH_readLE64(secret),
+               acc[1] ^ XXPH_readLE64(secret+8) );
+}
+
+static XXPH64_hash_t
+XXPH3_mergeAccs(const xxh_u64* XXPH_RESTRICT acc, const xxh_u8* XXPH_RESTRICT secret, xxh_u64 start)
+{
+    xxh_u64 result64 = start;
+
+    result64 += XXPH3_mix2Accs(acc+0, secret +  0);
+    result64 += XXPH3_mix2Accs(acc+2, secret + 16);
+    result64 += XXPH3_mix2Accs(acc+4, secret + 32);
+    result64 += XXPH3_mix2Accs(acc+6, secret + 48);
+
+    return XXPH3_avalanche(result64);
+}
+
+#define XXPH3_INIT_ACC { PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3, \
+                        PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1 };
+
+XXPH_FORCE_INLINE XXPH64_hash_t
+XXPH3_hashLong_internal(const xxh_u8* XXPH_RESTRICT input, size_t len,
+                       const xxh_u8* XXPH_RESTRICT secret, size_t secretSize)
+{
+    XXPH_ALIGN(XXPH_ACC_ALIGN) xxh_u64 acc[ACC_NB] = XXPH3_INIT_ACC;
+
+    XXPH3_hashLong_internal_loop(acc, input, len, secret, secretSize, XXPH3_acc_64bits);
+
+    /* converge into final hash */
+    XXPH_STATIC_ASSERT(sizeof(acc) == 64);
+#define XXPH_SECRET_MERGEACCS_START 11  /* do not align on 8, so that secret is different from accumulator */
+    XXPH_ASSERT(secretSize >= sizeof(acc) + XXPH_SECRET_MERGEACCS_START);
+    return XXPH3_mergeAccs(acc, secret + XXPH_SECRET_MERGEACCS_START, (xxh_u64)len * PRIME64_1);
+}
+
+
+XXPH_NO_INLINE XXPH64_hash_t    /* It's important for performance that XXPH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */
+XXPH3_hashLong_64b_defaultSecret(const xxh_u8* XXPH_RESTRICT input, size_t len)
+{
+    return XXPH3_hashLong_internal(input, len, kSecret, sizeof(kSecret));
+}
+
+XXPH_NO_INLINE XXPH64_hash_t    /* It's important for performance that XXPH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */
+XXPH3_hashLong_64b_withSecret(const xxh_u8* XXPH_RESTRICT input, size_t len,
+                             const xxh_u8* XXPH_RESTRICT secret, size_t secretSize)
+{
+    return XXPH3_hashLong_internal(input, len, secret, secretSize);
+}
+
+
+XXPH_FORCE_INLINE void XXPH_writeLE64(void* dst, xxh_u64 v64)
+{
+    if (!XXPH_CPU_LITTLE_ENDIAN) v64 = XXPH_swap64(v64);
+    memcpy(dst, &v64, sizeof(v64));
+}
+
+/* XXPH3_initCustomSecret() :
+ * destination `customSecret` is presumed allocated and same size as `kSecret`.
+ */
+XXPH_FORCE_INLINE void XXPH3_initCustomSecret(xxh_u8* customSecret, xxh_u64 seed64)
+{
+    int const nbRounds = XXPH_SECRET_DEFAULT_SIZE / 16;
+    int i;
+
+    XXPH_STATIC_ASSERT((XXPH_SECRET_DEFAULT_SIZE & 15) == 0);
+
+    for (i=0; i < nbRounds; i++) {
+        XXPH_writeLE64(customSecret + 16*i,     XXPH_readLE64(kSecret + 16*i)     + seed64);
+        XXPH_writeLE64(customSecret + 16*i + 8, XXPH_readLE64(kSecret + 16*i + 8) - seed64);
+    }
+}
+
+
+/* XXPH3_hashLong_64b_withSeed() :
+ * Generate a custom key,
+ * based on alteration of default kSecret with the seed,
+ * and then use this key for long mode hashing.
+ * This operation is decently fast but nonetheless costs a little bit of time.
+ * Try to avoid it whenever possible (typically when seed==0).
+ */
+XXPH_NO_INLINE XXPH64_hash_t    /* It's important for performance that XXPH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */
+XXPH3_hashLong_64b_withSeed(const xxh_u8* input, size_t len, XXPH64_hash_t seed)
+{
+    XXPH_ALIGN(8) xxh_u8 secret[XXPH_SECRET_DEFAULT_SIZE];
+    if (seed==0) return XXPH3_hashLong_64b_defaultSecret(input, len);
+    XXPH3_initCustomSecret(secret, seed);
+    return XXPH3_hashLong_internal(input, len, secret, sizeof(secret));
+}
+
+
+XXPH_FORCE_INLINE xxh_u64 XXPH3_mix16B(const xxh_u8* XXPH_RESTRICT input,
+                                 const xxh_u8* XXPH_RESTRICT secret, xxh_u64 seed64)
+{
+    xxh_u64 const input_lo = XXPH_readLE64(input);
+    xxh_u64 const input_hi = XXPH_readLE64(input+8);
+    return XXPH3_mul128_fold64(
+               input_lo ^ (XXPH_readLE64(secret)   + seed64),
+               input_hi ^ (XXPH_readLE64(secret+8) - seed64) );
+}
+
+
+XXPH_FORCE_INLINE XXPH64_hash_t
+XXPH3_len_17to128_64b(const xxh_u8* XXPH_RESTRICT input, size_t len,
+                     const xxh_u8* XXPH_RESTRICT secret, size_t secretSize,
+                     XXPH64_hash_t seed)
+{
+    XXPH_ASSERT(secretSize >= XXPH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXPH_ASSERT(16 < len && len <= 128);
+
+    {   xxh_u64 acc = len * PRIME64_1;
+        if (len > 32) {
+            if (len > 64) {
+                if (len > 96) {
+                    acc += XXPH3_mix16B(input+48, secret+96, seed);
+                    acc += XXPH3_mix16B(input+len-64, secret+112, seed);
+                }
+                acc += XXPH3_mix16B(input+32, secret+64, seed);
+                acc += XXPH3_mix16B(input+len-48, secret+80, seed);
+            }
+            acc += XXPH3_mix16B(input+16, secret+32, seed);
+            acc += XXPH3_mix16B(input+len-32, secret+48, seed);
+        }
+        acc += XXPH3_mix16B(input+0, secret+0, seed);
+        acc += XXPH3_mix16B(input+len-16, secret+16, seed);
+
+        return XXPH3_avalanche(acc);
+    }
+}
+
+#define XXPH3_MIDSIZE_MAX 240
+
+XXPH_NO_INLINE XXPH64_hash_t
+XXPH3_len_129to240_64b(const xxh_u8* XXPH_RESTRICT input, size_t len,
+                      const xxh_u8* XXPH_RESTRICT secret, size_t secretSize,
+                      XXPH64_hash_t seed)
+{
+    XXPH_ASSERT(secretSize >= XXPH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXPH_ASSERT(128 < len && len <= XXPH3_MIDSIZE_MAX);
+
+    #define XXPH3_MIDSIZE_STARTOFFSET 3
+    #define XXPH3_MIDSIZE_LASTOFFSET  17
+
+    {   xxh_u64 acc = len * PRIME64_1;
+        int const nbRounds = (int)len / 16;
+        int i;
+        for (i=0; i<8; i++) {
+            acc += XXPH3_mix16B(input+(16*i), secret+(16*i), seed);
+        }
+        acc = XXPH3_avalanche(acc);
+        XXPH_ASSERT(nbRounds >= 8);
+        for (i=8 ; i < nbRounds; i++) {
+            acc += XXPH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXPH3_MIDSIZE_STARTOFFSET, seed);
+        }
+        /* last bytes */
+        acc += XXPH3_mix16B(input + len - 16, secret + XXPH3_SECRET_SIZE_MIN - XXPH3_MIDSIZE_LASTOFFSET, seed);
+        return XXPH3_avalanche(acc);
+    }
+}
+
+/* ===   Public entry point   === */
+
+XXPH_PUBLIC_API XXPH64_hash_t XXPH3_64bits(const void* input, size_t len)
+{
+    if (len <= 16) return XXPH3_len_0to16_64b((const xxh_u8*)input, len, kSecret, 0);
+    if (len <= 128) return XXPH3_len_17to128_64b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), 0);
+    if (len <= XXPH3_MIDSIZE_MAX) return XXPH3_len_129to240_64b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), 0);
+    return XXPH3_hashLong_64b_defaultSecret((const xxh_u8*)input, len);
+}
+
+XXPH_PUBLIC_API XXPH64_hash_t
+XXPH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+{
+    XXPH_ASSERT(secretSize >= XXPH3_SECRET_SIZE_MIN);
+    /* if an action must be taken should `secret` conditions not be respected,
+     * it should be done here.
+     * For now, it's a contract pre-condition.
+     * Adding a check and a branch here would cost performance at every hash */
+     if (len <= 16) return XXPH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, 0);
+     if (len <= 128) return XXPH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, 0);
+     if (len <= XXPH3_MIDSIZE_MAX) return XXPH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, 0);
+     return XXPH3_hashLong_64b_withSecret((const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize);
+}
+
+XXPH_PUBLIC_API XXPH64_hash_t
+XXPH3_64bits_withSeed(const void* input, size_t len, XXPH64_hash_t seed)
+{
+    if (len <= 16) return XXPH3_len_0to16_64b((const xxh_u8*)input, len, kSecret, seed);
+    if (len <= 128) return XXPH3_len_17to128_64b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), seed);
+    if (len <= XXPH3_MIDSIZE_MAX) return XXPH3_len_129to240_64b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), seed);
+    return XXPH3_hashLong_64b_withSeed((const xxh_u8*)input, len, seed);
+}
+
+/* ===   XXPH3 streaming   === */
+
+/* RocksDB Note: unused & removed due to bug in preview version */
+
+/*======== END #include "xxh3.h", now inlined above ==========*/
+
+#endif  /* XXPH_NO_LONG_LONG */
+
+/* === END RocksDB modification of permanently inlining === */
+
+#endif /*  defined(XXPH_INLINE_ALL) || defined(XXPH_PRIVATE_API) */
+
+#endif /* XXPH_STATIC_LINKING_ONLY */
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* XXPHASH_H_5627135585666179 */
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/agg_merge/agg_merge.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/agg_merge/agg_merge.cc
new file mode 100644
index 0000000000..8e5c536f55
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/agg_merge/agg_merge.cc
@@ -0,0 +1,238 @@
+//  Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/utilities/agg_merge.h"
+
+#include <assert.h>
+
+#include <deque>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "port/lang.h"
+#include "port/likely.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/coding.h"
+#include "utilities/agg_merge/agg_merge_impl.h"
+#include "utilities/merge_operators.h"
+
+namespace ROCKSDB_NAMESPACE {
+static std::unordered_map<std::string, std::unique_ptr<Aggregator>> func_map;
+const std::string kUnnamedFuncName = "";
+const std::string kErrorFuncName = "kErrorFuncName";
+
+Status AddAggregator(const std::string& function_name,
+                     std::unique_ptr<Aggregator>&& agg) {
+  if (function_name == kErrorFuncName) {
+    return Status::InvalidArgument(
+        "Cannot register function name kErrorFuncName");
+  }
+  func_map.emplace(function_name, std::move(agg));
+  return Status::OK();
+}
+
+AggMergeOperator::AggMergeOperator() {}
+
+std::string EncodeAggFuncAndPayloadNoCheck(const Slice& function_name,
+                                           const Slice& value) {
+  std::string result;
+  PutLengthPrefixedSlice(&result, function_name);
+  result += value.ToString();
+  return result;
+}
+
+Status EncodeAggFuncAndPayload(const Slice& function_name, const Slice& payload,
+                               std::string& output) {
+  if (function_name == kErrorFuncName) {
+    return Status::InvalidArgument("Cannot use error function name");
+  }
+  if (function_name != kUnnamedFuncName &&
+      func_map.find(function_name.ToString()) == func_map.end()) {
+    return Status::InvalidArgument("Function name not registered");
+  }
+  output = EncodeAggFuncAndPayloadNoCheck(function_name, payload);
+  return Status::OK();
+}
+
+bool ExtractAggFuncAndValue(const Slice& op, Slice& func, Slice& value) {
+  value = op;
+  return GetLengthPrefixedSlice(&value, &func);
+}
+
+bool ExtractList(const Slice& encoded_list, std::vector<Slice>& decoded_list) {
+  decoded_list.clear();
+  Slice list_slice = encoded_list;
+  Slice item;
+  while (GetLengthPrefixedSlice(&list_slice, &item)) {
+    decoded_list.push_back(item);
+  }
+  return list_slice.empty();
+}
+
+class AggMergeOperator::Accumulator {
+ public:
+  bool Add(const Slice& op, bool is_partial_aggregation) {
+    if (ignore_operands_) {
+      return true;
+    }
+    Slice my_func;
+    Slice my_value;
+    bool ret = ExtractAggFuncAndValue(op, my_func, my_value);
+    if (!ret) {
+      ignore_operands_ = true;
+      return true;
+    }
+
+    // Determine whether we need to do partial merge.
+    if (is_partial_aggregation && !my_func.empty()) {
+      auto f = func_map.find(my_func.ToString());
+      if (f == func_map.end() || !f->second->DoPartialAggregate()) {
+        return false;
+      }
+    }
+
+    if (!func_valid_) {
+      if (my_func != kUnnamedFuncName) {
+        func_ = my_func;
+        func_valid_ = true;
+      }
+    } else if (func_ != my_func) {
+      // User switched aggregation function. Need to aggregate the older
+      // one first.
+
+      // Previous aggreagion can't be done in partial merge
+      if (is_partial_aggregation) {
+        func_valid_ = false;
+        ignore_operands_ = true;
+        return false;
+      }
+
+      // We could consider stashing an iterator into the hash of aggregators
+      // to avoid repeated lookups when the aggregator doesn't change.
+      auto f = func_map.find(func_.ToString());
+      if (f == func_map.end() || !f->second->Aggregate(values_, scratch_)) {
+        func_valid_ = false;
+        ignore_operands_ = true;
+        return true;
+      }
+      std::swap(scratch_, aggregated_);
+      values_.clear();
+      values_.push_back(aggregated_);
+      func_ = my_func;
+    }
+    values_.push_back(my_value);
+    return true;
+  }
+
+  // Return false if aggregation fails.
+  // One possible reason
+  bool GetResult(std::string& result) {
+    if (!func_valid_) {
+      return false;
+    }
+    auto f = func_map.find(func_.ToString());
+    if (f == func_map.end()) {
+      return false;
+    }
+    if (!f->second->Aggregate(values_, scratch_)) {
+      return false;
+    }
+    result = EncodeAggFuncAndPayloadNoCheck(func_, scratch_);
+    return true;
+  }
+
+  void Clear() {
+    func_.clear();
+    values_.clear();
+    aggregated_.clear();
+    scratch_.clear();
+    ignore_operands_ = false;
+    func_valid_ = false;
+  }
+
+ private:
+  Slice func_;
+  std::vector<Slice> values_;
+  std::string aggregated_;
+  std::string scratch_;
+  bool ignore_operands_ = false;
+  bool func_valid_ = false;
+};
+
+// Creating and using a new Accumulator might invoke multiple malloc and is
+// expensive if it needs to be done when processing each merge operation.
+// AggMergeOperator's merge operators can be invoked concurrently by multiple
+// threads so we cannot simply create one Aggregator and reuse.
+// We use thread local instances instead.
+AggMergeOperator::Accumulator& AggMergeOperator::GetTLSAccumulator() {
+  static thread_local Accumulator tls_acc;
+  tls_acc.Clear();
+  return tls_acc;
+}
+
+void AggMergeOperator::PackAllMergeOperands(const MergeOperationInput& merge_in,
+                                            MergeOperationOutput& merge_out) {
+  merge_out.new_value = "";
+  PutLengthPrefixedSlice(&merge_out.new_value, kErrorFuncName);
+  if (merge_in.existing_value != nullptr) {
+    PutLengthPrefixedSlice(&merge_out.new_value, *merge_in.existing_value);
+  }
+  for (const Slice& op : merge_in.operand_list) {
+    PutLengthPrefixedSlice(&merge_out.new_value, op);
+  }
+}
+
+bool AggMergeOperator::FullMergeV2(const MergeOperationInput& merge_in,
+                                   MergeOperationOutput* merge_out) const {
+  Accumulator& agg = GetTLSAccumulator();
+  if (merge_in.existing_value != nullptr) {
+    agg.Add(*merge_in.existing_value, /*is_partial_aggregation=*/false);
+  }
+  for (const Slice& e : merge_in.operand_list) {
+    agg.Add(e, /*is_partial_aggregation=*/false);
+  }
+
+  bool succ = agg.GetResult(merge_out->new_value);
+  if (!succ) {
+    // If aggregation can't happen, pack all merge operands. In contrast to
+    // merge operator, we don't want to fail the DB. If users insert wrong
+    // format or call unregistered an aggregation function, we still hope
+    // the DB can continue functioning with other keys.
+    PackAllMergeOperands(merge_in, *merge_out);
+  }
+  agg.Clear();
+  return true;
+}
+
+bool AggMergeOperator::PartialMergeMulti(const Slice& /*key*/,
+                                         const std::deque<Slice>& operand_list,
+                                         std::string* new_value,
+                                         Logger* /*logger*/) const {
+  Accumulator& agg = GetTLSAccumulator();
+  bool do_aggregation = true;
+  for (const Slice& item : operand_list) {
+    do_aggregation = agg.Add(item, /*is_partial_aggregation=*/true);
+    if (!do_aggregation) {
+      break;
+    }
+  }
+  if (do_aggregation) {
+    do_aggregation = agg.GetResult(*new_value);
+  }
+  agg.Clear();
+  return do_aggregation;
+}
+
+std::shared_ptr<MergeOperator> GetAggMergeOperator() {
+  STATIC_AVOID_DESTRUCTION(std::shared_ptr<MergeOperator>, instance)
+  (std::make_shared<AggMergeOperator>());
+  assert(instance);
+  return instance;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/agg_merge/agg_merge_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/agg_merge/agg_merge_impl.h
new file mode 100644
index 0000000000..00e58de088
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/agg_merge/agg_merge_impl.h
@@ -0,0 +1,49 @@
+//  Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <unordered_map>
+
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/agg_merge.h"
+#include "utilities/cassandra/cassandra_options.h"
+
+namespace ROCKSDB_NAMESPACE {
+class AggMergeOperator : public MergeOperator {
+ public:
+  explicit AggMergeOperator();
+
+  bool FullMergeV2(const MergeOperationInput& merge_in,
+                   MergeOperationOutput* merge_out) const override;
+
+  bool PartialMergeMulti(const Slice& key,
+                         const std::deque<Slice>& operand_list,
+                         std::string* new_value, Logger* logger) const override;
+
+  const char* Name() const override { return kClassName(); }
+  static const char* kClassName() { return "AggMergeOperator.v1"; }
+
+  bool AllowSingleOperand() const override { return true; }
+
+  bool ShouldMerge(const std::vector<Slice>&) const override { return false; }
+
+ private:
+  class Accumulator;
+
+  // Pack all merge operands into one value. This is called when aggregation
+  // fails. The existing values are preserved and returned so that users can
+  // debug the problem.
+  static void PackAllMergeOperands(const MergeOperationInput& merge_in,
+                                   MergeOperationOutput& merge_out);
+  static Accumulator& GetTLSAccumulator();
+};
+
+extern std::string EncodeAggFuncAndPayloadNoCheck(const Slice& function_name,
+                                                  const Slice& value);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/agg_merge/test_agg_merge.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/agg_merge/test_agg_merge.h
new file mode 100644
index 0000000000..5bdf8b9cc2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/agg_merge/test_agg_merge.h
@@ -0,0 +1,47 @@
+//  Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <unordered_map>
+
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/agg_merge.h"
+#include "utilities/cassandra/cassandra_options.h"
+
+namespace ROCKSDB_NAMESPACE {
+class SumAggregator : public Aggregator {
+ public:
+  ~SumAggregator() override {}
+  bool Aggregate(const std::vector<Slice>&, std::string& result) const override;
+  bool DoPartialAggregate() const override { return true; }
+};
+
+class MultipleAggregator : public Aggregator {
+ public:
+  ~MultipleAggregator() override {}
+  bool Aggregate(const std::vector<Slice>&, std::string& result) const override;
+  bool DoPartialAggregate() const override { return true; }
+};
+
+class Last3Aggregator : public Aggregator {
+ public:
+  ~Last3Aggregator() override {}
+  bool Aggregate(const std::vector<Slice>&, std::string& result) const override;
+};
+
+class EncodeHelper {
+ public:
+  static std::string EncodeFuncAndInt(const Slice& function_name,
+                                      int64_t value);
+  static std::string EncodeInt(int64_t value);
+  static std::string EncodeList(const std::vector<Slice>& list);
+  static std::string EncodeFuncAndList(const Slice& function_name,
+                                       const std::vector<Slice>& list);
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/backup/backup_engine.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/backup/backup_engine.cc
new file mode 100644
index 0000000000..e74218d45c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/backup/backup_engine.cc
@@ -0,0 +1,3355 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+
+#include <algorithm>
+#include <atomic>
+#include <cinttypes>
+#include <cstdlib>
+#include <exception>
+#include <functional>
+#include <future>
+#include <limits>
+#include <map>
+#include <mutex>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <thread>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "env/composite_env_wrapper.h"
+#include "env/fs_readonly.h"
+#include "env/fs_remap.h"
+#include "file/filename.h"
+#include "file/line_file_reader.h"
+#include "file/sequence_file_reader.h"
+#include "file/writable_file_writer.h"
+#include "logging/logging.h"
+#include "monitoring/iostats_context_imp.h"
+#include "options/options_helper.h"
+#include "port/port.h"
+#include "rocksdb/advanced_options.h"
+#include "rocksdb/env.h"
+#include "rocksdb/rate_limiter.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/transaction_log.h"
+#include "table/sst_file_dumper.h"
+#include "test_util/sync_point.h"
+#include "util/cast_util.h"
+#include "util/channel.h"
+#include "util/coding.h"
+#include "util/crc32c.h"
+#include "util/math.h"
+#include "util/rate_limiter_impl.h"
+#include "util/string_util.h"
+#include "utilities/backup/backup_engine_impl.h"
+#include "utilities/checkpoint/checkpoint_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+using ShareFilesNaming = BackupEngineOptions::ShareFilesNaming;
+
+constexpr BackupID kLatestBackupIDMarker = static_cast<BackupID>(-2);
+
+inline uint32_t ChecksumHexToInt32(const std::string& checksum_hex) {
+  std::string checksum_str;
+  Slice(checksum_hex).DecodeHex(&checksum_str);
+  return EndianSwapValue(DecodeFixed32(checksum_str.c_str()));
+}
+inline std::string ChecksumStrToHex(const std::string& checksum_str) {
+  return Slice(checksum_str).ToString(true);
+}
+inline std::string ChecksumInt32ToHex(const uint32_t& checksum_value) {
+  std::string checksum_str;
+  PutFixed32(&checksum_str, EndianSwapValue(checksum_value));
+  return ChecksumStrToHex(checksum_str);
+}
+
+const std::string kPrivateDirName = "private";
+const std::string kMetaDirName = "meta";
+const std::string kSharedDirName = "shared";
+const std::string kSharedChecksumDirName = "shared_checksum";
+const std::string kPrivateDirSlash = kPrivateDirName + "/";
+const std::string kMetaDirSlash = kMetaDirName + "/";
+const std::string kSharedDirSlash = kSharedDirName + "/";
+const std::string kSharedChecksumDirSlash = kSharedChecksumDirName + "/";
+
+}  // namespace
+
+void BackupStatistics::IncrementNumberSuccessBackup() {
+  number_success_backup++;
+}
+void BackupStatistics::IncrementNumberFailBackup() { number_fail_backup++; }
+
+uint32_t BackupStatistics::GetNumberSuccessBackup() const {
+  return number_success_backup;
+}
+uint32_t BackupStatistics::GetNumberFailBackup() const {
+  return number_fail_backup;
+}
+
+std::string BackupStatistics::ToString() const {
+  char result[50];
+  snprintf(result, sizeof(result), "# success backup: %u, # fail backup: %u",
+           GetNumberSuccessBackup(), GetNumberFailBackup());
+  return result;
+}
+
+void BackupEngineOptions::Dump(Logger* logger) const {
+  ROCKS_LOG_INFO(logger, "               Options.backup_dir: %s",
+                 backup_dir.c_str());
+  ROCKS_LOG_INFO(logger, "               Options.backup_env: %p", backup_env);
+  ROCKS_LOG_INFO(logger, "        Options.share_table_files: %d",
+                 static_cast<int>(share_table_files));
+  ROCKS_LOG_INFO(logger, "                 Options.info_log: %p", info_log);
+  ROCKS_LOG_INFO(logger, "                     Options.sync: %d",
+                 static_cast<int>(sync));
+  ROCKS_LOG_INFO(logger, "         Options.destroy_old_data: %d",
+                 static_cast<int>(destroy_old_data));
+  ROCKS_LOG_INFO(logger, "         Options.backup_log_files: %d",
+                 static_cast<int>(backup_log_files));
+  ROCKS_LOG_INFO(logger, "        Options.backup_rate_limit: %" PRIu64,
+                 backup_rate_limit);
+  ROCKS_LOG_INFO(logger, "       Options.restore_rate_limit: %" PRIu64,
+                 restore_rate_limit);
+  ROCKS_LOG_INFO(logger, "Options.max_background_operations: %d",
+                 max_background_operations);
+}
+
+namespace {
+// -------- BackupEngineImpl class ---------
+class BackupEngineImpl {
+ public:
+  BackupEngineImpl(const BackupEngineOptions& options, Env* db_env,
+                   bool read_only = false);
+  ~BackupEngineImpl();
+
+  IOStatus CreateNewBackupWithMetadata(const CreateBackupOptions& options,
+                                       DB* db, const std::string& app_metadata,
+                                       BackupID* new_backup_id_ptr);
+
+  IOStatus PurgeOldBackups(uint32_t num_backups_to_keep);
+
+  IOStatus DeleteBackup(BackupID backup_id);
+
+  void StopBackup() { stop_backup_.store(true, std::memory_order_release); }
+
+  IOStatus GarbageCollect();
+
+  // The returned BackupInfos are in chronological order, which means the
+  // latest backup comes last.
+  void GetBackupInfo(std::vector<BackupInfo>* backup_info,
+                     bool include_file_details) const;
+
+  Status GetBackupInfo(BackupID backup_id, BackupInfo* backup_info,
+                       bool include_file_details = false) const;
+
+  void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) const;
+
+  IOStatus RestoreDBFromBackup(
+      const RestoreOptions& options, BackupID backup_id,
+      const std::string& db_dir, const std::string& wal_dir,
+      const std::list<const BackupEngineImpl*>& locked_restore_from_dirs) const;
+
+  IOStatus VerifyBackup(BackupID backup_id,
+                        bool verify_with_checksum = false) const;
+
+  IOStatus Initialize();
+
+  ShareFilesNaming GetNamingNoFlags() const {
+    return options_.share_files_with_checksum_naming &
+           BackupEngineOptions::kMaskNoNamingFlags;
+  }
+  ShareFilesNaming GetNamingFlags() const {
+    return options_.share_files_with_checksum_naming &
+           BackupEngineOptions::kMaskNamingFlags;
+  }
+
+  void TEST_SetDefaultRateLimitersClock(
+      const std::shared_ptr<SystemClock>& backup_rate_limiter_clock,
+      const std::shared_ptr<SystemClock>& restore_rate_limiter_clock) {
+    if (backup_rate_limiter_clock) {
+      static_cast<GenericRateLimiter*>(options_.backup_rate_limiter.get())
+          ->TEST_SetClock(backup_rate_limiter_clock);
+    }
+
+    if (restore_rate_limiter_clock) {
+      static_cast<GenericRateLimiter*>(options_.restore_rate_limiter.get())
+          ->TEST_SetClock(restore_rate_limiter_clock);
+    }
+  }
+
+ private:
+  void DeleteChildren(const std::string& dir,
+                      uint32_t file_type_filter = 0) const;
+  IOStatus DeleteBackupNoGC(BackupID backup_id);
+
+  // Extends the "result" map with pathname->size mappings for the contents of
+  // "dir" in "env". Pathnames are prefixed with "dir".
+  IOStatus ReadChildFileCurrentSizes(
+      const std::string& dir, const std::shared_ptr<FileSystem>&,
+      std::unordered_map<std::string, uint64_t>* result) const;
+
+  struct FileInfo {
+    FileInfo(const std::string& fname, uint64_t sz, const std::string& checksum,
+             const std::string& id, const std::string& sid, Temperature _temp)
+        : refs(0),
+          filename(fname),
+          size(sz),
+          checksum_hex(checksum),
+          db_id(id),
+          db_session_id(sid),
+          temp(_temp) {}
+
+    FileInfo(const FileInfo&) = delete;
+    FileInfo& operator=(const FileInfo&) = delete;
+
+    int refs;
+    // Relative path from backup dir
+    const std::string filename;
+    const uint64_t size;
+    // crc32c checksum as hex. empty == unknown / unavailable
+    std::string checksum_hex;
+    // DB identities
+    // db_id is obtained for potential usage in the future but not used
+    // currently
+    const std::string db_id;
+    // db_session_id appears in the backup SST filename if the table naming
+    // option is kUseDbSessionId
+    const std::string db_session_id;
+    Temperature temp;
+
+    std::string GetDbFileName() const {
+      std::string rv;
+      // extract the filename part
+      size_t slash = filename.find_last_of('/');
+      // file will either be shared/<file>, shared_checksum/<file_crc32c_size>,
+      // shared_checksum/<file_session>, shared_checksum/<file_crc32c_session>,
+      // or private/<number>/<file>
+      assert(slash != std::string::npos);
+      rv = filename.substr(slash + 1);
+
+      // if the file was in shared_checksum, extract the real file name
+      // in this case the file is <number>_<checksum>_<size>.<type>,
+      // <number>_<session>.<type>, or <number>_<checksum>_<session>.<type>
+      if (filename.substr(0, slash) == kSharedChecksumDirName) {
+        rv = GetFileFromChecksumFile(rv);
+      }
+      return rv;
+    }
+  };
+
+  // TODO: deprecate this function once we migrate all BackupEngine's rate
+  // limiting to lower-level ones (i.e, ones in file access wrapper level like
+  // `WritableFileWriter`)
+  static void LoopRateLimitRequestHelper(const size_t total_bytes_to_request,
+                                         RateLimiter* rate_limiter,
+                                         const Env::IOPriority pri,
+                                         Statistics* stats,
+                                         const RateLimiter::OpType op_type);
+
+  static inline std::string WithoutTrailingSlash(const std::string& path) {
+    if (path.empty() || path.back() != '/') {
+      return path;
+    } else {
+      return path.substr(path.size() - 1);
+    }
+  }
+
+  static inline std::string WithTrailingSlash(const std::string& path) {
+    if (path.empty() || path.back() != '/') {
+      return path + '/';
+    } else {
+      return path;
+    }
+  }
+
+  // A filesystem wrapper that makes shared backup files appear to be in the
+  // private backup directory (dst_dir), so that the private backup dir can
+  // be opened as a read-only DB.
+  class RemapSharedFileSystem : public RemapFileSystem {
+   public:
+    RemapSharedFileSystem(const std::shared_ptr<FileSystem>& base,
+                          const std::string& dst_dir,
+                          const std::string& src_base_dir,
+                          const std::vector<std::shared_ptr<FileInfo>>& files)
+        : RemapFileSystem(base),
+          dst_dir_(WithoutTrailingSlash(dst_dir)),
+          dst_dir_slash_(WithTrailingSlash(dst_dir)),
+          src_base_dir_(WithTrailingSlash(src_base_dir)) {
+      for (auto& info : files) {
+        if (!StartsWith(info->filename, kPrivateDirSlash)) {
+          assert(StartsWith(info->filename, kSharedDirSlash) ||
+                 StartsWith(info->filename, kSharedChecksumDirSlash));
+          remaps_[info->GetDbFileName()] = info;
+        }
+      }
+    }
+
+    const char* Name() const override {
+      return "BackupEngineImpl::RemapSharedFileSystem";
+    }
+
+    // Sometimes a directory listing is required in opening a DB
+    IOStatus GetChildren(const std::string& dir, const IOOptions& options,
+                         std::vector<std::string>* result,
+                         IODebugContext* dbg) override {
+      IOStatus s = RemapFileSystem::GetChildren(dir, options, result, dbg);
+      if (s.ok() && (dir == dst_dir_ || dir == dst_dir_slash_)) {
+        // Assume remapped files exist
+        for (auto& r : remaps_) {
+          result->push_back(r.first);
+        }
+      }
+      return s;
+    }
+
+    // Sometimes a directory listing is required in opening a DB
+    IOStatus GetChildrenFileAttributes(const std::string& dir,
+                                       const IOOptions& options,
+                                       std::vector<FileAttributes>* result,
+                                       IODebugContext* dbg) override {
+      IOStatus s =
+          RemapFileSystem::GetChildrenFileAttributes(dir, options, result, dbg);
+      if (s.ok() && (dir == dst_dir_ || dir == dst_dir_slash_)) {
+        // Assume remapped files exist with recorded size
+        for (auto& r : remaps_) {
+          result->emplace_back();  // clean up with C++20
+          FileAttributes& attr = result->back();
+          attr.name = r.first;
+          attr.size_bytes = r.second->size;
+        }
+      }
+      return s;
+    }
+
+   protected:
+    // When a file in dst_dir is requested, see if we need to remap to shared
+    // file path.
+    std::pair<IOStatus, std::string> EncodePath(
+        const std::string& path) override {
+      if (path.empty() || path[0] != '/') {
+        return {IOStatus::InvalidArgument(path, "Not an absolute path"), ""};
+      }
+      std::pair<IOStatus, std::string> rv{IOStatus(), path};
+      if (StartsWith(path, dst_dir_slash_)) {
+        std::string relative = path.substr(dst_dir_slash_.size());
+        auto it = remaps_.find(relative);
+        if (it != remaps_.end()) {
+          rv.second = src_base_dir_ + it->second->filename;
+        }
+      }
+      return rv;
+    }
+
+   private:
+    // Absolute path to a directory that some extra files will be mapped into.
+    const std::string dst_dir_;
+    // Includes a trailing slash.
+    const std::string dst_dir_slash_;
+    // Absolute path to a directory containing some files to be mapped into
+    // dst_dir_. Includes a trailing slash.
+    const std::string src_base_dir_;
+    // If remaps_[x] exists, attempt to read dst_dir_ / x should instead read
+    // src_base_dir_ / remaps_[x]->filename. FileInfo is used to maximize
+    // sharing with other backup data in memory.
+    std::unordered_map<std::string, std::shared_ptr<FileInfo>> remaps_;
+  };
+
+  class BackupMeta {
+   public:
+    BackupMeta(
+        const std::string& meta_filename, const std::string& meta_tmp_filename,
+        std::unordered_map<std::string, std::shared_ptr<FileInfo>>* file_infos,
+        Env* env, const std::shared_ptr<FileSystem>& fs)
+        : timestamp_(0),
+          sequence_number_(0),
+          size_(0),
+          meta_filename_(meta_filename),
+          meta_tmp_filename_(meta_tmp_filename),
+          file_infos_(file_infos),
+          env_(env),
+          fs_(fs) {}
+
+    BackupMeta(const BackupMeta&) = delete;
+    BackupMeta& operator=(const BackupMeta&) = delete;
+
+    ~BackupMeta() {}
+
+    void RecordTimestamp() {
+      // Best effort
+      Status s = env_->GetCurrentTime(&timestamp_);
+      if (!s.ok()) {
+        timestamp_ = /* something clearly fabricated */ 1;
+      }
+    }
+    int64_t GetTimestamp() const { return timestamp_; }
+    uint64_t GetSize() const { return size_; }
+    uint32_t GetNumberFiles() const {
+      return static_cast<uint32_t>(files_.size());
+    }
+    void SetSequenceNumber(uint64_t sequence_number) {
+      sequence_number_ = sequence_number;
+    }
+    uint64_t GetSequenceNumber() const { return sequence_number_; }
+
+    const std::string& GetAppMetadata() const { return app_metadata_; }
+
+    void SetAppMetadata(const std::string& app_metadata) {
+      app_metadata_ = app_metadata;
+    }
+
+    IOStatus AddFile(std::shared_ptr<FileInfo> file_info);
+
+    void AddExcludedFile(const std::string& relative_file) {
+      excluded_files_.emplace_back(relative_file);
+    }
+
+    IOStatus Delete(bool delete_meta = true);
+
+    bool Empty() const { return files_.empty(); }
+
+    std::shared_ptr<FileInfo> GetFile(const std::string& filename) const {
+      auto it = file_infos_->find(filename);
+      if (it == file_infos_->end()) {
+        return nullptr;
+      }
+      return it->second;
+    }
+
+    const std::vector<std::shared_ptr<FileInfo>>& GetFiles() const {
+      return files_;
+    }
+
+    const std::vector<BackupExcludedFileInfo>& GetExcludedFiles() const {
+      return excluded_files_;
+    }
+
+    // @param abs_path_to_size Pre-fetched file sizes (bytes).
+    IOStatus LoadFromFile(
+        const std::string& backup_dir,
+        const std::unordered_map<std::string, uint64_t>& abs_path_to_size,
+        RateLimiter* rate_limiter, Logger* info_log,
+        std::unordered_set<std::string>* reported_ignored_fields);
+    IOStatus StoreToFile(
+        bool sync, int schema_version,
+        const TEST_BackupMetaSchemaOptions* schema_test_options);
+
+    std::string GetInfoString() {
+      std::ostringstream ss;
+      ss << "Timestamp: " << timestamp_ << std::endl;
+      char human_size[16];
+      AppendHumanBytes(size_, human_size, sizeof(human_size));
+      ss << "Size: " << human_size << std::endl;
+      ss << "Files:" << std::endl;
+      for (const auto& file : files_) {
+        AppendHumanBytes(file->size, human_size, sizeof(human_size));
+        ss << file->filename << ", size " << human_size << ", refs "
+           << file->refs << std::endl;
+      }
+      return ss.str();
+    }
+
+    const std::shared_ptr<Env>& GetEnvForOpen() const {
+      if (!env_for_open_) {
+        // Lazy initialize
+        // Find directories
+        std::string dst_dir = meta_filename_;
+        auto i = dst_dir.rfind(kMetaDirSlash);
+        assert(i != std::string::npos);
+        std::string src_base_dir = dst_dir.substr(0, i);
+        dst_dir.replace(i, kMetaDirSlash.size(), kPrivateDirSlash);
+        // Make the RemapSharedFileSystem
+        std::shared_ptr<FileSystem> remap_fs =
+            std::make_shared<RemapSharedFileSystem>(fs_, dst_dir, src_base_dir,
+                                                    files_);
+        // Make it read-only for safety
+        remap_fs = std::make_shared<ReadOnlyFileSystem>(remap_fs);
+        // Make an Env wrapper
+        env_for_open_ = std::make_shared<CompositeEnvWrapper>(env_, remap_fs);
+      }
+      return env_for_open_;
+    }
+
+   private:
+    int64_t timestamp_;
+    // sequence number is only approximate, should not be used
+    // by clients
+    uint64_t sequence_number_;
+    uint64_t size_;
+    std::string app_metadata_;
+    std::string const meta_filename_;
+    std::string const meta_tmp_filename_;
+    // files with relative paths (without "/" prefix!!)
+    std::vector<std::shared_ptr<FileInfo>> files_;
+    std::vector<BackupExcludedFileInfo> excluded_files_;
+    std::unordered_map<std::string, std::shared_ptr<FileInfo>>* file_infos_;
+    Env* env_;
+    mutable std::shared_ptr<Env> env_for_open_;
+    std::shared_ptr<FileSystem> fs_;
+    IOOptions iooptions_ = IOOptions();
+  };  // BackupMeta
+
+  void SetBackupInfoFromBackupMeta(BackupID id, const BackupMeta& meta,
+                                   BackupInfo* backup_info,
+                                   bool include_file_details) const;
+
+  inline std::string GetAbsolutePath(
+      const std::string& relative_path = "") const {
+    assert(relative_path.size() == 0 || relative_path[0] != '/');
+    return options_.backup_dir + "/" + relative_path;
+  }
+  inline std::string GetPrivateFileRel(BackupID backup_id, bool tmp = false,
+                                       const std::string& file = "") const {
+    assert(file.size() == 0 || file[0] != '/');
+    return kPrivateDirSlash + std::to_string(backup_id) + (tmp ? ".tmp" : "") +
+           "/" + file;
+  }
+  inline std::string GetSharedFileRel(const std::string& file = "",
+                                      bool tmp = false) const {
+    assert(file.size() == 0 || file[0] != '/');
+    return kSharedDirSlash + std::string(tmp ? "." : "") + file +
+           (tmp ? ".tmp" : "");
+  }
+  inline std::string GetSharedFileWithChecksumRel(const std::string& file = "",
+                                                  bool tmp = false) const {
+    assert(file.size() == 0 || file[0] != '/');
+    return kSharedChecksumDirSlash + std::string(tmp ? "." : "") + file +
+           (tmp ? ".tmp" : "");
+  }
+  inline bool UseLegacyNaming(const std::string& sid) const {
+    return GetNamingNoFlags() ==
+               BackupEngineOptions::kLegacyCrc32cAndFileSize ||
+           sid.empty();
+  }
+  inline std::string GetSharedFileWithChecksum(
+      const std::string& file, const std::string& checksum_hex,
+      const uint64_t file_size, const std::string& db_session_id) const {
+    assert(file.size() == 0 || file[0] != '/');
+    std::string file_copy = file;
+    if (UseLegacyNaming(db_session_id)) {
+      assert(!checksum_hex.empty());
+      file_copy.insert(file_copy.find_last_of('.'),
+                       "_" + std::to_string(ChecksumHexToInt32(checksum_hex)) +
+                           "_" + std::to_string(file_size));
+    } else {
+      file_copy.insert(file_copy.find_last_of('.'), "_s" + db_session_id);
+      if (GetNamingFlags() & BackupEngineOptions::kFlagIncludeFileSize) {
+        file_copy.insert(file_copy.find_last_of('.'),
+                         "_" + std::to_string(file_size));
+      }
+    }
+    return file_copy;
+  }
+  static inline std::string GetFileFromChecksumFile(const std::string& file) {
+    assert(file.size() == 0 || file[0] != '/');
+    std::string file_copy = file;
+    size_t first_underscore = file_copy.find_first_of('_');
+    return file_copy.erase(first_underscore,
+                           file_copy.find_last_of('.') - first_underscore);
+  }
+  inline std::string GetBackupMetaFile(BackupID backup_id, bool tmp) const {
+    return GetAbsolutePath(kMetaDirName) + "/" + (tmp ? "." : "") +
+           std::to_string(backup_id) + (tmp ? ".tmp" : "");
+  }
+
+  // If size_limit == 0, there is no size limit, copy everything.
+  //
+  // Exactly one of src and contents must be non-empty.
+  //
+  // @param src If non-empty, the file is copied from this pathname.
+  // @param contents If non-empty, the file will be created with these contents.
+  // @param src_temperature Pass in expected temperature of src, return back
+  // temperature reported by FileSystem
+  IOStatus CopyOrCreateFile(const std::string& src, const std::string& dst,
+                            const std::string& contents, uint64_t size_limit,
+                            Env* src_env, Env* dst_env,
+                            const EnvOptions& src_env_options, bool sync,
+                            RateLimiter* rate_limiter,
+                            std::function<void()> progress_callback,
+                            Temperature* src_temperature,
+                            Temperature dst_temperature,
+                            uint64_t* bytes_toward_next_callback,
+                            uint64_t* size, std::string* checksum_hex);
+
+  IOStatus ReadFileAndComputeChecksum(const std::string& src,
+                                      const std::shared_ptr<FileSystem>& src_fs,
+                                      const EnvOptions& src_env_options,
+                                      uint64_t size_limit,
+                                      std::string* checksum_hex,
+                                      const Temperature src_temperature) const;
+
+  // Obtain db_id and db_session_id from the table properties of file_path
+  Status GetFileDbIdentities(Env* src_env, const EnvOptions& src_env_options,
+                             const std::string& file_path,
+                             Temperature file_temp, RateLimiter* rate_limiter,
+                             std::string* db_id, std::string* db_session_id);
+
+  struct CopyOrCreateResult {
+    ~CopyOrCreateResult() {
+      // The Status needs to be ignored here for two reasons.
+      // First, if the BackupEngineImpl shuts down with jobs outstanding, then
+      // it is possible that the Status in the future/promise is never read,
+      // resulting in an unchecked Status. Second, if there are items in the
+      // channel when the BackupEngineImpl is shutdown, these will also have
+      // Status that have not been checked.  This
+      // TODO: Fix those issues so that the Status
+      io_status.PermitUncheckedError();
+    }
+    uint64_t size;
+    std::string checksum_hex;
+    std::string db_id;
+    std::string db_session_id;
+    IOStatus io_status;
+    Temperature expected_src_temperature = Temperature::kUnknown;
+    Temperature current_src_temperature = Temperature::kUnknown;
+  };
+
+  // Exactly one of src_path and contents must be non-empty. If src_path is
+  // non-empty, the file is copied from this pathname. Otherwise, if contents is
+  // non-empty, the file will be created at dst_path with these contents.
+  struct CopyOrCreateWorkItem {
+    std::string src_path;
+    std::string dst_path;
+    Temperature src_temperature;
+    Temperature dst_temperature;
+    std::string contents;
+    Env* src_env;
+    Env* dst_env;
+    EnvOptions src_env_options;
+    bool sync;
+    RateLimiter* rate_limiter;
+    uint64_t size_limit;
+    Statistics* stats;
+    std::promise<CopyOrCreateResult> result;
+    std::function<void()> progress_callback;
+    std::string src_checksum_func_name;
+    std::string src_checksum_hex;
+    std::string db_id;
+    std::string db_session_id;
+
+    CopyOrCreateWorkItem()
+        : src_path(""),
+          dst_path(""),
+          src_temperature(Temperature::kUnknown),
+          dst_temperature(Temperature::kUnknown),
+          contents(""),
+          src_env(nullptr),
+          dst_env(nullptr),
+          src_env_options(),
+          sync(false),
+          rate_limiter(nullptr),
+          size_limit(0),
+          stats(nullptr),
+          src_checksum_func_name(kUnknownFileChecksumFuncName),
+          src_checksum_hex(""),
+          db_id(""),
+          db_session_id("") {}
+
+    CopyOrCreateWorkItem(const CopyOrCreateWorkItem&) = delete;
+    CopyOrCreateWorkItem& operator=(const CopyOrCreateWorkItem&) = delete;
+
+    CopyOrCreateWorkItem(CopyOrCreateWorkItem&& o) noexcept {
+      *this = std::move(o);
+    }
+
+    CopyOrCreateWorkItem& operator=(CopyOrCreateWorkItem&& o) noexcept {
+      src_path = std::move(o.src_path);
+      dst_path = std::move(o.dst_path);
+      src_temperature = std::move(o.src_temperature);
+      dst_temperature = std::move(o.dst_temperature);
+      contents = std::move(o.contents);
+      src_env = o.src_env;
+      dst_env = o.dst_env;
+      src_env_options = std::move(o.src_env_options);
+      sync = o.sync;
+      rate_limiter = o.rate_limiter;
+      size_limit = o.size_limit;
+      stats = o.stats;
+      result = std::move(o.result);
+      progress_callback = std::move(o.progress_callback);
+      src_checksum_func_name = std::move(o.src_checksum_func_name);
+      src_checksum_hex = std::move(o.src_checksum_hex);
+      db_id = std::move(o.db_id);
+      db_session_id = std::move(o.db_session_id);
+      src_temperature = o.src_temperature;
+      return *this;
+    }
+
+    CopyOrCreateWorkItem(std::string _src_path, std::string _dst_path,
+                         const Temperature _src_temperature,
+                         const Temperature _dst_temperature,
+                         std::string _contents, Env* _src_env, Env* _dst_env,
+                         EnvOptions _src_env_options, bool _sync,
+                         RateLimiter* _rate_limiter, uint64_t _size_limit,
+                         Statistics* _stats,
+                         std::function<void()> _progress_callback = {},
+                         const std::string& _src_checksum_func_name =
+                             kUnknownFileChecksumFuncName,
+                         const std::string& _src_checksum_hex = "",
+                         const std::string& _db_id = "",
+                         const std::string& _db_session_id = "")
+        : src_path(std::move(_src_path)),
+          dst_path(std::move(_dst_path)),
+          src_temperature(_src_temperature),
+          dst_temperature(_dst_temperature),
+          contents(std::move(_contents)),
+          src_env(_src_env),
+          dst_env(_dst_env),
+          src_env_options(std::move(_src_env_options)),
+          sync(_sync),
+          rate_limiter(_rate_limiter),
+          size_limit(_size_limit),
+          stats(_stats),
+          progress_callback(_progress_callback),
+          src_checksum_func_name(_src_checksum_func_name),
+          src_checksum_hex(_src_checksum_hex),
+          db_id(_db_id),
+          db_session_id(_db_session_id) {}
+  };
+
+  struct BackupAfterCopyOrCreateWorkItem {
+    std::future<CopyOrCreateResult> result;
+    bool shared;
+    bool needed_to_copy;
+    Env* backup_env;
+    std::string dst_path_tmp;
+    std::string dst_path;
+    std::string dst_relative;
+    BackupAfterCopyOrCreateWorkItem()
+        : shared(false),
+          needed_to_copy(false),
+          backup_env(nullptr),
+          dst_path_tmp(""),
+          dst_path(""),
+          dst_relative("") {}
+
+    BackupAfterCopyOrCreateWorkItem(
+        BackupAfterCopyOrCreateWorkItem&& o) noexcept {
+      *this = std::move(o);
+    }
+
+    BackupAfterCopyOrCreateWorkItem& operator=(
+        BackupAfterCopyOrCreateWorkItem&& o) noexcept {
+      result = std::move(o.result);
+      shared = o.shared;
+      needed_to_copy = o.needed_to_copy;
+      backup_env = o.backup_env;
+      dst_path_tmp = std::move(o.dst_path_tmp);
+      dst_path = std::move(o.dst_path);
+      dst_relative = std::move(o.dst_relative);
+      return *this;
+    }
+
+    BackupAfterCopyOrCreateWorkItem(std::future<CopyOrCreateResult>&& _result,
+                                    bool _shared, bool _needed_to_copy,
+                                    Env* _backup_env, std::string _dst_path_tmp,
+                                    std::string _dst_path,
+                                    std::string _dst_relative)
+        : result(std::move(_result)),
+          shared(_shared),
+          needed_to_copy(_needed_to_copy),
+          backup_env(_backup_env),
+          dst_path_tmp(std::move(_dst_path_tmp)),
+          dst_path(std::move(_dst_path)),
+          dst_relative(std::move(_dst_relative)) {}
+  };
+
+  using BackupWorkItemPair =
+      std::pair<CopyOrCreateWorkItem, BackupAfterCopyOrCreateWorkItem>;
+
+  struct RestoreAfterCopyOrCreateWorkItem {
+    std::future<CopyOrCreateResult> result;
+    std::string from_file;
+    std::string to_file;
+    std::string checksum_hex;
+    RestoreAfterCopyOrCreateWorkItem() : checksum_hex("") {}
+    RestoreAfterCopyOrCreateWorkItem(std::future<CopyOrCreateResult>&& _result,
+                                     const std::string& _from_file,
+                                     const std::string& _to_file,
+                                     const std::string& _checksum_hex)
+        : result(std::move(_result)),
+          from_file(_from_file),
+          to_file(_to_file),
+          checksum_hex(_checksum_hex) {}
+    RestoreAfterCopyOrCreateWorkItem(
+        RestoreAfterCopyOrCreateWorkItem&& o) noexcept {
+      *this = std::move(o);
+    }
+
+    RestoreAfterCopyOrCreateWorkItem& operator=(
+        RestoreAfterCopyOrCreateWorkItem&& o) noexcept {
+      result = std::move(o.result);
+      checksum_hex = std::move(o.checksum_hex);
+      return *this;
+    }
+  };
+
+  bool initialized_;
+  std::mutex byte_report_mutex_;
+  mutable channel<CopyOrCreateWorkItem> files_to_copy_or_create_;
+  std::vector<port::Thread> threads_;
+  std::atomic<CpuPriority> threads_cpu_priority_;
+
+  // Certain operations like PurgeOldBackups and DeleteBackup will trigger
+  // automatic GarbageCollect (true) unless we've already done one in this
+  // session and have not failed to delete backup files since then (false).
+  bool might_need_garbage_collect_ = true;
+
+  // Adds a file to the backup work queue to be copied or created if it doesn't
+  // already exist.
+  //
+  // Exactly one of src_dir and contents must be non-empty.
+  //
+  // @param src_dir If non-empty, the file in this directory named fname will be
+  //    copied.
+  // @param fname Name of destination file and, in case of copy, source file.
+  // @param contents If non-empty, the file will be created with these contents.
+  IOStatus AddBackupFileWorkItem(
+      std::unordered_set<std::string>& live_dst_paths,
+      std::deque<BackupAfterCopyOrCreateWorkItem>& backup_items_to_finish,
+      std::deque<BackupWorkItemPair>* excludable_items, BackupID backup_id,
+      bool shared, const std::string& src_dir,
+      const std::string& fname,  // starts with "/"
+      const EnvOptions& src_env_options, RateLimiter* rate_limiter,
+      FileType file_type, uint64_t size_bytes, Statistics* stats,
+      uint64_t size_limit = 0, bool shared_checksum = false,
+      std::function<void()> progress_callback = {},
+      const std::string& contents = std::string(),
+      const std::string& src_checksum_func_name = kUnknownFileChecksumFuncName,
+      const std::string& src_checksum_str = kUnknownFileChecksum,
+      const Temperature src_temperature = Temperature::kUnknown);
+
+  // backup state data
+  BackupID latest_backup_id_;
+  BackupID latest_valid_backup_id_;
+  std::map<BackupID, std::unique_ptr<BackupMeta>> backups_;
+  std::map<BackupID, std::pair<IOStatus, std::unique_ptr<BackupMeta>>>
+      corrupt_backups_;
+  std::unordered_map<std::string, std::shared_ptr<FileInfo>>
+      backuped_file_infos_;
+  std::atomic<bool> stop_backup_;
+
+  // options data
+  BackupEngineOptions options_;
+  Env* db_env_;
+  Env* backup_env_;
+
+  // directories
+  std::unique_ptr<FSDirectory> backup_directory_;
+  std::unique_ptr<FSDirectory> shared_directory_;
+  std::unique_ptr<FSDirectory> meta_directory_;
+  std::unique_ptr<FSDirectory> private_directory_;
+
+  static const size_t kDefaultCopyFileBufferSize = 5 * 1024 * 1024LL;  // 5MB
+  bool read_only_;
+  BackupStatistics backup_statistics_;
+  std::unordered_set<std::string> reported_ignored_fields_;
+  static const size_t kMaxAppMetaSize = 1024 * 1024;  // 1MB
+  std::shared_ptr<FileSystem> db_fs_;
+  std::shared_ptr<FileSystem> backup_fs_;
+  IOOptions io_options_ = IOOptions();
+
+ public:
+  std::unique_ptr<TEST_BackupMetaSchemaOptions> schema_test_options_;
+};
+
+// -------- BackupEngineImplThreadSafe class ---------
+// This locking layer for thread safety in the public API is layered on
+// top to prevent accidental recursive locking with RWMutex, which is UB.
+// Note: BackupEngineReadOnlyBase inherited twice, but has no fields
+class BackupEngineImplThreadSafe : public BackupEngine,
+                                   public BackupEngineReadOnly {
+ public:
+  BackupEngineImplThreadSafe(const BackupEngineOptions& options, Env* db_env,
+                             bool read_only = false)
+      : impl_(options, db_env, read_only) {}
+  ~BackupEngineImplThreadSafe() override {}
+
+  using BackupEngine::CreateNewBackupWithMetadata;
+  IOStatus CreateNewBackupWithMetadata(const CreateBackupOptions& options,
+                                       DB* db, const std::string& app_metadata,
+                                       BackupID* new_backup_id) override {
+    WriteLock lock(&mutex_);
+    return impl_.CreateNewBackupWithMetadata(options, db, app_metadata,
+                                             new_backup_id);
+  }
+
+  IOStatus PurgeOldBackups(uint32_t num_backups_to_keep) override {
+    WriteLock lock(&mutex_);
+    return impl_.PurgeOldBackups(num_backups_to_keep);
+  }
+
+  IOStatus DeleteBackup(BackupID backup_id) override {
+    WriteLock lock(&mutex_);
+    return impl_.DeleteBackup(backup_id);
+  }
+
+  void StopBackup() override {
+    // No locking needed
+    impl_.StopBackup();
+  }
+
+  IOStatus GarbageCollect() override {
+    WriteLock lock(&mutex_);
+    return impl_.GarbageCollect();
+  }
+
+  Status GetLatestBackupInfo(BackupInfo* backup_info,
+                             bool include_file_details = false) const override {
+    ReadLock lock(&mutex_);
+    return impl_.GetBackupInfo(kLatestBackupIDMarker, backup_info,
+                               include_file_details);
+  }
+
+  Status GetBackupInfo(BackupID backup_id, BackupInfo* backup_info,
+                       bool include_file_details = false) const override {
+    ReadLock lock(&mutex_);
+    return impl_.GetBackupInfo(backup_id, backup_info, include_file_details);
+  }
+
+  void GetBackupInfo(std::vector<BackupInfo>* backup_info,
+                     bool include_file_details) const override {
+    ReadLock lock(&mutex_);
+    impl_.GetBackupInfo(backup_info, include_file_details);
+  }
+
+  void GetCorruptedBackups(
+      std::vector<BackupID>* corrupt_backup_ids) const override {
+    ReadLock lock(&mutex_);
+    impl_.GetCorruptedBackups(corrupt_backup_ids);
+  }
+
+  using BackupEngine::RestoreDBFromBackup;
+  IOStatus RestoreDBFromBackup(const RestoreOptions& options,
+                               BackupID backup_id, const std::string& db_dir,
+                               const std::string& wal_dir) const override {
+    // TSAN reports a lock inversion (potential deadlock) if we acquire read
+    // locks in different orders. Assuming the implementation of RWMutex
+    // allows simultaneous read locks, there should be no deadlock, because
+    // there is no write lock involved here. Nevertheless, to appease TSAN and
+    // in case of degraded RWMutex implementation, we lock the BackupEngines
+    // including this one and those in options.alternate_dirs in a consistent
+    // order.
+    // However, locked_restore_from_dirs is kept in "search" order.
+    std::list<const BackupEngineImpl*> locked_restore_from_dirs;
+    std::vector<port::RWMutex*> mutexes;
+
+    // Add `this`
+    locked_restore_from_dirs.emplace_back(&impl_);
+    mutexes.push_back(&mutex_);
+
+    // Add alternates
+    for (BackupEngineReadOnlyBase* be : options.alternate_dirs) {
+      BackupEngineImplThreadSafe* bets =
+          static_cast_with_check<BackupEngineImplThreadSafe>(
+              be->AsBackupEngine());
+      locked_restore_from_dirs.emplace_back(&bets->impl_);
+      mutexes.push_back(&bets->mutex_);
+    }
+
+    // Acquire read locks in pointer order
+    std::sort(mutexes.begin(), mutexes.end());
+    std::vector<ReadLock> locks(mutexes.begin(), mutexes.end());
+
+    // Impl
+    return impl_.RestoreDBFromBackup(options, backup_id, db_dir, wal_dir,
+                                     locked_restore_from_dirs);
+  }
+
+  using BackupEngine::RestoreDBFromLatestBackup;
+  IOStatus RestoreDBFromLatestBackup(
+      const RestoreOptions& options, const std::string& db_dir,
+      const std::string& wal_dir) const override {
+    // Defer to above function, which locks
+    return RestoreDBFromBackup(options, kLatestBackupIDMarker, db_dir, wal_dir);
+  }
+
+  IOStatus VerifyBackup(BackupID backup_id,
+                        bool verify_with_checksum = false) const override {
+    ReadLock lock(&mutex_);
+    return impl_.VerifyBackup(backup_id, verify_with_checksum);
+  }
+
+  BackupEngine* AsBackupEngine() override { return this; }
+
+  // Not public API but needed
+  IOStatus Initialize() {
+    // No locking needed
+    return impl_.Initialize();
+  }
+
+  // Not public API but used in testing
+  void TEST_SetBackupMetaSchemaOptions(
+      const TEST_BackupMetaSchemaOptions& options) {
+    impl_.schema_test_options_.reset(new TEST_BackupMetaSchemaOptions(options));
+  }
+
+  // Not public API but used in testing
+  void TEST_SetDefaultRateLimitersClock(
+      const std::shared_ptr<SystemClock>& backup_rate_limiter_clock = nullptr,
+      const std::shared_ptr<SystemClock>& restore_rate_limiter_clock =
+          nullptr) {
+    impl_.TEST_SetDefaultRateLimitersClock(backup_rate_limiter_clock,
+                                           restore_rate_limiter_clock);
+  }
+
+ private:
+  mutable port::RWMutex mutex_;
+  BackupEngineImpl impl_;
+};
+
+}  // namespace
+
+IOStatus BackupEngine::Open(const BackupEngineOptions& options, Env* env,
+                            BackupEngine** backup_engine_ptr) {
+  std::unique_ptr<BackupEngineImplThreadSafe> backup_engine(
+      new BackupEngineImplThreadSafe(options, env));
+  auto s = backup_engine->Initialize();
+  if (!s.ok()) {
+    *backup_engine_ptr = nullptr;
+    return s;
+  }
+  *backup_engine_ptr = backup_engine.release();
+  return IOStatus::OK();
+}
+
+namespace {
+BackupEngineImpl::BackupEngineImpl(const BackupEngineOptions& options,
+                                   Env* db_env, bool read_only)
+    : initialized_(false),
+      threads_cpu_priority_(),
+      latest_backup_id_(0),
+      latest_valid_backup_id_(0),
+      stop_backup_(false),
+      options_(options),
+      db_env_(db_env),
+      backup_env_(options.backup_env != nullptr ? options.backup_env : db_env_),
+      read_only_(read_only) {
+  if (options_.backup_rate_limiter == nullptr &&
+      options_.backup_rate_limit > 0) {
+    options_.backup_rate_limiter.reset(
+        NewGenericRateLimiter(options_.backup_rate_limit));
+  }
+  if (options_.restore_rate_limiter == nullptr &&
+      options_.restore_rate_limit > 0) {
+    options_.restore_rate_limiter.reset(
+        NewGenericRateLimiter(options_.restore_rate_limit));
+  }
+  db_fs_ = db_env_->GetFileSystem();
+  backup_fs_ = backup_env_->GetFileSystem();
+}
+
+BackupEngineImpl::~BackupEngineImpl() {
+  files_to_copy_or_create_.sendEof();
+  for (auto& t : threads_) {
+    t.join();
+  }
+  LogFlush(options_.info_log);
+  for (const auto& it : corrupt_backups_) {
+    it.second.first.PermitUncheckedError();
+  }
+}
+
+IOStatus BackupEngineImpl::Initialize() {
+  assert(!initialized_);
+  initialized_ = true;
+  if (read_only_) {
+    ROCKS_LOG_INFO(options_.info_log, "Starting read_only backup engine");
+  }
+  options_.Dump(options_.info_log);
+
+  auto meta_path = GetAbsolutePath(kMetaDirName);
+
+  if (!read_only_) {
+    // we might need to clean up from previous crash or I/O errors
+    might_need_garbage_collect_ = true;
+
+    if (options_.max_valid_backups_to_open !=
+        std::numeric_limits<int32_t>::max()) {
+      options_.max_valid_backups_to_open = std::numeric_limits<int32_t>::max();
+      ROCKS_LOG_WARN(
+          options_.info_log,
+          "`max_valid_backups_to_open` is not set to the default value. "
+          "Ignoring its value since BackupEngine is not read-only.");
+    }
+
+    // gather the list of directories that we need to create
+    std::vector<std::pair<std::string, std::unique_ptr<FSDirectory>*>>
+        directories;
+    directories.emplace_back(GetAbsolutePath(), &backup_directory_);
+    if (options_.share_table_files) {
+      if (options_.share_files_with_checksum) {
+        directories.emplace_back(
+            GetAbsolutePath(GetSharedFileWithChecksumRel()),
+            &shared_directory_);
+      } else {
+        directories.emplace_back(GetAbsolutePath(GetSharedFileRel()),
+                                 &shared_directory_);
+      }
+    }
+    directories.emplace_back(GetAbsolutePath(kPrivateDirName),
+                             &private_directory_);
+    directories.emplace_back(meta_path, &meta_directory_);
+    // create all the dirs we need
+    for (const auto& d : directories) {
+      IOStatus io_s =
+          backup_fs_->CreateDirIfMissing(d.first, io_options_, nullptr);
+      if (io_s.ok()) {
+        io_s =
+            backup_fs_->NewDirectory(d.first, io_options_, d.second, nullptr);
+      }
+      if (!io_s.ok()) {
+        return io_s;
+      }
+    }
+  }
+
+  std::vector<std::string> backup_meta_files;
+  {
+    IOStatus io_s = backup_fs_->GetChildren(meta_path, io_options_,
+                                            &backup_meta_files, nullptr);
+    if (io_s.IsNotFound()) {
+      return IOStatus::NotFound(meta_path + " is missing");
+    } else if (!io_s.ok()) {
+      return io_s;
+    }
+  }
+  // create backups_ structure
+  for (auto& file : backup_meta_files) {
+    ROCKS_LOG_INFO(options_.info_log, "Detected backup %s", file.c_str());
+    BackupID backup_id = 0;
+    sscanf(file.c_str(), "%u", &backup_id);
+    if (backup_id == 0 || file != std::to_string(backup_id)) {
+      // Invalid file name, will be deleted with auto-GC when user
+      // initiates an append or write operation. (Behave as read-only until
+      // then.)
+      ROCKS_LOG_INFO(options_.info_log, "Skipping unrecognized meta file %s",
+                     file.c_str());
+      continue;
+    }
+    assert(backups_.find(backup_id) == backups_.end());
+    // Insert all the (backup_id, BackupMeta) that will be loaded later
+    // The loading performed later will check whether there are corrupt backups
+    // and move the corrupt backups to corrupt_backups_
+    backups_.insert(std::make_pair(
+        backup_id, std::unique_ptr<BackupMeta>(new BackupMeta(
+                       GetBackupMetaFile(backup_id, false /* tmp */),
+                       GetBackupMetaFile(backup_id, true /* tmp */),
+                       &backuped_file_infos_, backup_env_, backup_fs_))));
+  }
+
+  latest_backup_id_ = 0;
+  latest_valid_backup_id_ = 0;
+  if (options_.destroy_old_data) {  // Destroy old data
+    assert(!read_only_);
+    ROCKS_LOG_INFO(
+        options_.info_log,
+        "Backup Engine started with destroy_old_data == true, deleting all "
+        "backups");
+    IOStatus io_s = PurgeOldBackups(0);
+    if (io_s.ok()) {
+      io_s = GarbageCollect();
+    }
+    if (!io_s.ok()) {
+      return io_s;
+    }
+  } else {  // Load data from storage
+    // abs_path_to_size: maps absolute paths of files in backup directory to
+    // their corresponding sizes
+    std::unordered_map<std::string, uint64_t> abs_path_to_size;
+    // Insert files and their sizes in backup sub-directories (shared and
+    // shared_checksum) to abs_path_to_size
+    for (const auto& rel_dir :
+         {GetSharedFileRel(), GetSharedFileWithChecksumRel()}) {
+      const auto abs_dir = GetAbsolutePath(rel_dir);
+      IOStatus io_s =
+          ReadChildFileCurrentSizes(abs_dir, backup_fs_, &abs_path_to_size);
+      if (!io_s.ok()) {
+        // I/O error likely impacting all backups
+        return io_s;
+      }
+    }
+    // load the backups if any, until valid_backups_to_open of the latest
+    // non-corrupted backups have been successfully opened.
+    int valid_backups_to_open = options_.max_valid_backups_to_open;
+    for (auto backup_iter = backups_.rbegin(); backup_iter != backups_.rend();
+         ++backup_iter) {
+      assert(latest_backup_id_ == 0 || latest_backup_id_ > backup_iter->first);
+      if (latest_backup_id_ == 0) {
+        latest_backup_id_ = backup_iter->first;
+      }
+      if (valid_backups_to_open == 0) {
+        break;
+      }
+
+      // Insert files and their sizes in backup sub-directories
+      // (private/backup_id) to abs_path_to_size
+      IOStatus io_s = ReadChildFileCurrentSizes(
+          GetAbsolutePath(GetPrivateFileRel(backup_iter->first)), backup_fs_,
+          &abs_path_to_size);
+      if (io_s.ok()) {
+        io_s = backup_iter->second->LoadFromFile(
+            options_.backup_dir, abs_path_to_size,
+            options_.backup_rate_limiter.get(), options_.info_log,
+            &reported_ignored_fields_);
+      }
+      if (io_s.IsCorruption() || io_s.IsNotSupported()) {
+        ROCKS_LOG_INFO(options_.info_log, "Backup %u corrupted -- %s",
+                       backup_iter->first, io_s.ToString().c_str());
+        corrupt_backups_.insert(std::make_pair(
+            backup_iter->first,
+            std::make_pair(io_s, std::move(backup_iter->second))));
+      } else if (!io_s.ok()) {
+        // Distinguish corruption errors from errors in the backup Env.
+        // Errors in the backup Env (i.e., this code path) will cause Open() to
+        // fail, whereas corruption errors would not cause Open() failures.
+        return io_s;
+      } else {
+        ROCKS_LOG_INFO(options_.info_log, "Loading backup %" PRIu32 " OK:\n%s",
+                       backup_iter->first,
+                       backup_iter->second->GetInfoString().c_str());
+        assert(latest_valid_backup_id_ == 0 ||
+               latest_valid_backup_id_ > backup_iter->first);
+        if (latest_valid_backup_id_ == 0) {
+          latest_valid_backup_id_ = backup_iter->first;
+        }
+        --valid_backups_to_open;
+      }
+    }
+
+    for (const auto& corrupt : corrupt_backups_) {
+      backups_.erase(backups_.find(corrupt.first));
+    }
+    // erase the backups before max_valid_backups_to_open
+    int num_unopened_backups;
+    if (options_.max_valid_backups_to_open == 0) {
+      num_unopened_backups = 0;
+    } else {
+      num_unopened_backups =
+          std::max(0, static_cast<int>(backups_.size()) -
+                          options_.max_valid_backups_to_open);
+    }
+    for (int i = 0; i < num_unopened_backups; ++i) {
+      assert(backups_.begin()->second->Empty());
+      backups_.erase(backups_.begin());
+    }
+  }
+
+  ROCKS_LOG_INFO(options_.info_log, "Latest backup is %u", latest_backup_id_);
+  ROCKS_LOG_INFO(options_.info_log, "Latest valid backup is %u",
+                 latest_valid_backup_id_);
+
+  // set up threads perform copies from files_to_copy_or_create_ in the
+  // background
+  threads_cpu_priority_ = CpuPriority::kNormal;
+  threads_.reserve(options_.max_background_operations);
+  for (int t = 0; t < options_.max_background_operations; t++) {
+    threads_.emplace_back([this]() {
+#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 12)
+      pthread_setname_np(pthread_self(), "backup_engine");
+#endif
+#endif
+      CpuPriority current_priority = CpuPriority::kNormal;
+      CopyOrCreateWorkItem work_item;
+      uint64_t bytes_toward_next_callback = 0;
+      while (files_to_copy_or_create_.read(work_item)) {
+        CpuPriority priority = threads_cpu_priority_;
+        if (current_priority != priority) {
+          TEST_SYNC_POINT_CALLBACK(
+              "BackupEngineImpl::Initialize:SetCpuPriority", &priority);
+          port::SetCpuPriority(0, priority);
+          current_priority = priority;
+        }
+        // `bytes_read` and `bytes_written` stats are enabled based on
+        // compile-time support and cannot be dynamically toggled. So we do not
+        // need to worry about `PerfLevel` here, unlike many other
+        // `IOStatsContext` / `PerfContext` stats.
+        uint64_t prev_bytes_read = IOSTATS(bytes_read);
+        uint64_t prev_bytes_written = IOSTATS(bytes_written);
+
+        CopyOrCreateResult result;
+        Temperature temp = work_item.src_temperature;
+        result.io_status = CopyOrCreateFile(
+            work_item.src_path, work_item.dst_path, work_item.contents,
+            work_item.size_limit, work_item.src_env, work_item.dst_env,
+            work_item.src_env_options, work_item.sync, work_item.rate_limiter,
+            work_item.progress_callback, &temp, work_item.dst_temperature,
+            &bytes_toward_next_callback, &result.size, &result.checksum_hex);
+
+        RecordTick(work_item.stats, BACKUP_READ_BYTES,
+                   IOSTATS(bytes_read) - prev_bytes_read);
+        RecordTick(work_item.stats, BACKUP_WRITE_BYTES,
+                   IOSTATS(bytes_written) - prev_bytes_written);
+
+        result.db_id = work_item.db_id;
+        result.db_session_id = work_item.db_session_id;
+        result.expected_src_temperature = work_item.src_temperature;
+        result.current_src_temperature = temp;
+        if (result.io_status.ok() && !work_item.src_checksum_hex.empty()) {
+          // unknown checksum function name implies no db table file checksum in
+          // db manifest; work_item.src_checksum_hex not empty means
+          // backup engine has calculated its crc32c checksum for the table
+          // file; therefore, we are able to compare the checksums.
+          if (work_item.src_checksum_func_name ==
+                  kUnknownFileChecksumFuncName ||
+              work_item.src_checksum_func_name == kDbFileChecksumFuncName) {
+            if (work_item.src_checksum_hex != result.checksum_hex) {
+              std::string checksum_info(
+                  "Expected checksum is " + work_item.src_checksum_hex +
+                  " while computed checksum is " + result.checksum_hex);
+              result.io_status = IOStatus::Corruption(
+                  "Checksum mismatch after copying to " + work_item.dst_path +
+                  ": " + checksum_info);
+            }
+          } else {
+            // FIXME(peterd): dead code?
+            std::string checksum_function_info(
+                "Existing checksum function is " +
+                work_item.src_checksum_func_name +
+                " while provided checksum function is " +
+                kBackupFileChecksumFuncName);
+            ROCKS_LOG_INFO(
+                options_.info_log,
+                "Unable to verify checksum after copying to %s: %s\n",
+                work_item.dst_path.c_str(), checksum_function_info.c_str());
+          }
+        }
+        work_item.result.set_value(std::move(result));
+      }
+    });
+  }
+  ROCKS_LOG_INFO(options_.info_log, "Initialized BackupEngine");
+  return IOStatus::OK();
+}
+
+IOStatus BackupEngineImpl::CreateNewBackupWithMetadata(
+    const CreateBackupOptions& options, DB* db, const std::string& app_metadata,
+    BackupID* new_backup_id_ptr) {
+  assert(initialized_);
+  assert(!read_only_);
+  if (app_metadata.size() > kMaxAppMetaSize) {
+    return IOStatus::InvalidArgument("App metadata too large");
+  }
+
+  bool maybe_exclude_items = bool{options.exclude_files_callback};
+  if (maybe_exclude_items && options_.schema_version < 2) {
+    return IOStatus::InvalidArgument(
+        "exclude_files_callback requires schema_version >= 2");
+  }
+
+  if (options.decrease_background_thread_cpu_priority) {
+    if (options.background_thread_cpu_priority < threads_cpu_priority_) {
+      threads_cpu_priority_.store(options.background_thread_cpu_priority);
+    }
+  }
+
+  BackupID new_backup_id = latest_backup_id_ + 1;
+
+  // `bytes_read` and `bytes_written` stats are enabled based on compile-time
+  // support and cannot be dynamically toggled. So we do not need to worry about
+  // `PerfLevel` here, unlike many other `IOStatsContext` / `PerfContext` stats.
+  uint64_t prev_bytes_read = IOSTATS(bytes_read);
+  uint64_t prev_bytes_written = IOSTATS(bytes_written);
+
+  assert(backups_.find(new_backup_id) == backups_.end());
+
+  auto private_dir = GetAbsolutePath(GetPrivateFileRel(new_backup_id));
+  IOStatus io_s = backup_fs_->FileExists(private_dir, io_options_, nullptr);
+  if (io_s.ok()) {
+    // maybe last backup failed and left partial state behind, clean it up.
+    // need to do this before updating backups_ such that a private dir
+    // named after new_backup_id will be cleaned up.
+    // (If an incomplete new backup is followed by an incomplete delete
+    // of the latest full backup, then there could be more than one next
+    // id with a private dir, the last thing to be deleted in delete
+    // backup, but all will be cleaned up with a GarbageCollect.)
+    io_s = GarbageCollect();
+  } else if (io_s.IsNotFound()) {
+    // normal case, the new backup's private dir doesn't exist yet
+    io_s = IOStatus::OK();
+  }
+
+  auto ret = backups_.insert(std::make_pair(
+      new_backup_id, std::unique_ptr<BackupMeta>(new BackupMeta(
+                         GetBackupMetaFile(new_backup_id, false /* tmp */),
+                         GetBackupMetaFile(new_backup_id, true /* tmp */),
+                         &backuped_file_infos_, backup_env_, backup_fs_))));
+  assert(ret.second == true);
+  auto& new_backup = ret.first->second;
+  new_backup->RecordTimestamp();
+  new_backup->SetAppMetadata(app_metadata);
+
+  auto start_backup = backup_env_->NowMicros();
+
+  ROCKS_LOG_INFO(options_.info_log,
+                 "Started the backup process -- creating backup %u",
+                 new_backup_id);
+
+  if (options_.share_table_files && !options_.share_files_with_checksum) {
+    ROCKS_LOG_WARN(options_.info_log,
+                   "BackupEngineOptions::share_files_with_checksum=false is "
+                   "DEPRECATED and could lead to data loss.");
+  }
+
+  if (io_s.ok()) {
+    io_s = backup_fs_->CreateDir(private_dir, io_options_, nullptr);
+  }
+
+  // A set into which we will insert the dst_paths that are calculated for live
+  // files and live WAL files.
+  // This is used to check whether a live files shares a dst_path with another
+  // live file.
+  std::unordered_set<std::string> live_dst_paths;
+
+  std::deque<BackupWorkItemPair> excludable_items;
+  std::deque<BackupAfterCopyOrCreateWorkItem> backup_items_to_finish;
+  // Add a CopyOrCreateWorkItem to the channel for each live file
+  Status disabled = db->DisableFileDeletions();
+  DBOptions db_options = db->GetDBOptions();
+  Statistics* stats = db_options.statistics.get();
+  if (io_s.ok()) {
+    CheckpointImpl checkpoint(db);
+    uint64_t sequence_number = 0;
+    FileChecksumGenFactory* db_checksum_factory =
+        db_options.file_checksum_gen_factory.get();
+    const std::string kFileChecksumGenFactoryName =
+        "FileChecksumGenCrc32cFactory";
+    bool compare_checksum =
+        db_checksum_factory != nullptr &&
+                db_checksum_factory->Name() == kFileChecksumGenFactoryName
+            ? true
+            : false;
+    EnvOptions src_raw_env_options(db_options);
+    RateLimiter* rate_limiter = options_.backup_rate_limiter.get();
+    io_s = status_to_io_status(checkpoint.CreateCustomCheckpoint(
+        [&](const std::string& /*src_dirname*/, const std::string& /*fname*/,
+            FileType) {
+          // custom checkpoint will switch to calling copy_file_cb after it sees
+          // NotSupported returned from link_file_cb.
+          return IOStatus::NotSupported();
+        } /* link_file_cb */,
+        [&](const std::string& src_dirname, const std::string& fname,
+            uint64_t size_limit_bytes, FileType type,
+            const std::string& checksum_func_name,
+            const std::string& checksum_val,
+            const Temperature src_temperature) {
+          if (type == kWalFile && !options_.backup_log_files) {
+            return IOStatus::OK();
+          }
+          Log(options_.info_log, "add file for backup %s", fname.c_str());
+          uint64_t size_bytes = 0;
+          IOStatus io_st;
+          if (type == kTableFile || type == kBlobFile) {
+            io_st = db_fs_->GetFileSize(src_dirname + "/" + fname, io_options_,
+                                        &size_bytes, nullptr);
+            if (!io_st.ok()) {
+              Log(options_.info_log, "GetFileSize is failed: %s",
+                  io_st.ToString().c_str());
+              return io_st;
+            }
+          }
+          EnvOptions src_env_options;
+          switch (type) {
+            case kWalFile:
+              src_env_options =
+                  db_env_->OptimizeForLogRead(src_raw_env_options);
+              break;
+            case kTableFile:
+              src_env_options = db_env_->OptimizeForCompactionTableRead(
+                  src_raw_env_options, ImmutableDBOptions(db_options));
+              break;
+            case kDescriptorFile:
+              src_env_options =
+                  db_env_->OptimizeForManifestRead(src_raw_env_options);
+              break;
+            case kBlobFile:
+              src_env_options = db_env_->OptimizeForBlobFileRead(
+                  src_raw_env_options, ImmutableDBOptions(db_options));
+              break;
+            default:
+              // Other backed up files (like options file) are not read by live
+              // DB, so don't need to worry about avoiding mixing buffered and
+              // direct I/O. Just use plain defaults.
+              src_env_options = src_raw_env_options;
+              break;
+          }
+          io_st = AddBackupFileWorkItem(
+              live_dst_paths, backup_items_to_finish,
+              maybe_exclude_items ? &excludable_items : nullptr, new_backup_id,
+              options_.share_table_files &&
+                  (type == kTableFile || type == kBlobFile),
+              src_dirname, fname, src_env_options, rate_limiter, type,
+              size_bytes, db_options.statistics.get(), size_limit_bytes,
+              options_.share_files_with_checksum &&
+                  (type == kTableFile || type == kBlobFile),
+              options.progress_callback, "" /* contents */, checksum_func_name,
+              checksum_val, src_temperature);
+          return io_st;
+        } /* copy_file_cb */,
+        [&](const std::string& fname, const std::string& contents,
+            FileType type) {
+          Log(options_.info_log, "add file for backup %s", fname.c_str());
+          return AddBackupFileWorkItem(
+              live_dst_paths, backup_items_to_finish,
+              maybe_exclude_items ? &excludable_items : nullptr, new_backup_id,
+              false /* shared */, "" /* src_dir */, fname,
+              EnvOptions() /* src_env_options */, rate_limiter, type,
+              contents.size(), db_options.statistics.get(), 0 /* size_limit */,
+              false /* shared_checksum */, options.progress_callback, contents);
+        } /* create_file_cb */,
+        &sequence_number,
+        options.flush_before_backup ? 0 : std::numeric_limits<uint64_t>::max(),
+        compare_checksum));
+    if (io_s.ok()) {
+      new_backup->SetSequenceNumber(sequence_number);
+    }
+  }
+  ROCKS_LOG_INFO(options_.info_log, "add files for backup done.");
+  if (io_s.ok() && maybe_exclude_items) {
+    assert(options.exclude_files_callback);
+    size_t count = excludable_items.size();
+    std::vector<MaybeExcludeBackupFile> maybe_exclude_files;
+    maybe_exclude_files.reserve(count);
+    for (auto& e : excludable_items) {
+      maybe_exclude_files.emplace_back(
+          BackupExcludedFileInfo(e.second.dst_relative));
+    }
+    if (count > 0) {
+      try {
+        options.exclude_files_callback(
+            &maybe_exclude_files.front(),
+            /*end pointer*/ &maybe_exclude_files.back() + 1);
+      } catch (const std::exception& exn) {
+        io_s = IOStatus::Aborted("Exception in exclude_files_callback: " +
+                                 std::string(exn.what()));
+      } catch (...) {
+        io_s = IOStatus::Aborted("Unknown exception in exclude_files_callback");
+      }
+    }
+    if (io_s.ok()) {
+      for (size_t i = 0; i < count; ++i) {
+        auto& e = excludable_items[i];
+        if (maybe_exclude_files[i].exclude_decision) {
+          new_backup.get()->AddExcludedFile(e.second.dst_relative);
+        } else {
+          files_to_copy_or_create_.write(std::move(e.first));
+          backup_items_to_finish.push_back(std::move(e.second));
+        }
+      }
+    }
+    excludable_items.clear();
+  } else {
+    assert(!options.exclude_files_callback);
+    assert(excludable_items.empty());
+  }
+  ROCKS_LOG_INFO(options_.info_log,
+                 "dispatch files for backup done, wait for finish.");
+  IOStatus item_io_status;
+  for (auto& item : backup_items_to_finish) {
+    item.result.wait();
+    auto result = item.result.get();
+    item_io_status = result.io_status;
+    Temperature temp = result.expected_src_temperature;
+    if (result.current_src_temperature != Temperature::kUnknown &&
+        (temp == Temperature::kUnknown ||
+         options_.current_temperatures_override_manifest)) {
+      temp = result.current_src_temperature;
+    }
+    if (item_io_status.ok() && item.shared && item.needed_to_copy) {
+      item_io_status = item.backup_env->GetFileSystem()->RenameFile(
+          item.dst_path_tmp, item.dst_path, io_options_, nullptr);
+    }
+    if (item_io_status.ok()) {
+      item_io_status = new_backup.get()->AddFile(std::make_shared<FileInfo>(
+          item.dst_relative, result.size, result.checksum_hex, result.db_id,
+          result.db_session_id, temp));
+    }
+    if (!item_io_status.ok()) {
+      io_s = item_io_status;
+    }
+  }
+
+  // we copied all the files, enable file deletions
+  if (disabled.ok()) {  // If we successfully disabled file deletions
+    db->EnableFileDeletions(false).PermitUncheckedError();
+  }
+  auto backup_time = backup_env_->NowMicros() - start_backup;
+
+  if (io_s.ok()) {
+    // persist the backup metadata on the disk
+    io_s = new_backup->StoreToFile(options_.sync, options_.schema_version,
+                                   schema_test_options_.get());
+  }
+  if (io_s.ok() && options_.sync) {
+    std::unique_ptr<FSDirectory> backup_private_directory;
+    backup_fs_
+        ->NewDirectory(GetAbsolutePath(GetPrivateFileRel(new_backup_id, false)),
+                       io_options_, &backup_private_directory, nullptr)
+        .PermitUncheckedError();
+    if (backup_private_directory != nullptr) {
+      io_s = backup_private_directory->FsyncWithDirOptions(io_options_, nullptr,
+                                                           DirFsyncOptions());
+    }
+    if (io_s.ok() && private_directory_ != nullptr) {
+      io_s = private_directory_->FsyncWithDirOptions(io_options_, nullptr,
+                                                     DirFsyncOptions());
+    }
+    if (io_s.ok() && meta_directory_ != nullptr) {
+      io_s = meta_directory_->FsyncWithDirOptions(io_options_, nullptr,
+                                                  DirFsyncOptions());
+    }
+    if (io_s.ok() && shared_directory_ != nullptr) {
+      io_s = shared_directory_->FsyncWithDirOptions(io_options_, nullptr,
+                                                    DirFsyncOptions());
+    }
+    if (io_s.ok() && backup_directory_ != nullptr) {
+      io_s = backup_directory_->FsyncWithDirOptions(io_options_, nullptr,
+                                                    DirFsyncOptions());
+    }
+  }
+
+  if (io_s.ok()) {
+    backup_statistics_.IncrementNumberSuccessBackup();
+    // here we know that we succeeded and installed the new backup
+    latest_backup_id_ = new_backup_id;
+    latest_valid_backup_id_ = new_backup_id;
+    if (new_backup_id_ptr) {
+      *new_backup_id_ptr = new_backup_id;
+    }
+    ROCKS_LOG_INFO(options_.info_log, "Backup DONE. All is good");
+
+    // backup_speed is in byte/second
+    double backup_speed = new_backup->GetSize() / (1.048576 * backup_time);
+    ROCKS_LOG_INFO(options_.info_log, "Backup number of files: %u",
+                   new_backup->GetNumberFiles());
+    char human_size[16];
+    AppendHumanBytes(new_backup->GetSize(), human_size, sizeof(human_size));
+    ROCKS_LOG_INFO(options_.info_log, "Backup size: %s", human_size);
+    ROCKS_LOG_INFO(options_.info_log, "Backup time: %" PRIu64 " microseconds",
+                   backup_time);
+    ROCKS_LOG_INFO(options_.info_log, "Backup speed: %.3f MB/s", backup_speed);
+    ROCKS_LOG_INFO(options_.info_log, "Backup Statistics %s",
+                   backup_statistics_.ToString().c_str());
+  } else {
+    backup_statistics_.IncrementNumberFailBackup();
+    // clean all the files we might have created
+    ROCKS_LOG_INFO(options_.info_log, "Backup failed -- %s",
+                   io_s.ToString().c_str());
+    ROCKS_LOG_INFO(options_.info_log, "Backup Statistics %s\n",
+                   backup_statistics_.ToString().c_str());
+    // delete files that we might have already written
+    might_need_garbage_collect_ = true;
+    DeleteBackup(new_backup_id).PermitUncheckedError();
+  }
+
+  RecordTick(stats, BACKUP_READ_BYTES, IOSTATS(bytes_read) - prev_bytes_read);
+  RecordTick(stats, BACKUP_WRITE_BYTES,
+             IOSTATS(bytes_written) - prev_bytes_written);
+  return io_s;
+}
+
+IOStatus BackupEngineImpl::PurgeOldBackups(uint32_t num_backups_to_keep) {
+  assert(initialized_);
+  assert(!read_only_);
+
+  // Best effort deletion even with errors
+  IOStatus overall_status = IOStatus::OK();
+
+  ROCKS_LOG_INFO(options_.info_log, "Purging old backups, keeping %u",
+                 num_backups_to_keep);
+  std::vector<BackupID> to_delete;
+  auto itr = backups_.begin();
+  while ((backups_.size() - to_delete.size()) > num_backups_to_keep) {
+    to_delete.push_back(itr->first);
+    itr++;
+  }
+  for (auto backup_id : to_delete) {
+    // Do not GC until end
+    IOStatus io_s = DeleteBackupNoGC(backup_id);
+    if (!io_s.ok()) {
+      overall_status = io_s;
+    }
+  }
+  // Clean up after any incomplete backup deletion, potentially from
+  // earlier session.
+  if (might_need_garbage_collect_) {
+    IOStatus io_s = GarbageCollect();
+    if (!io_s.ok() && overall_status.ok()) {
+      overall_status = io_s;
+    }
+  }
+  return overall_status;
+}
+
+IOStatus BackupEngineImpl::DeleteBackup(BackupID backup_id) {
+  IOStatus s1 = DeleteBackupNoGC(backup_id);
+  IOStatus s2 = IOStatus::OK();
+
+  // Clean up after any incomplete backup deletion, potentially from
+  // earlier session.
+  if (might_need_garbage_collect_) {
+    s2 = GarbageCollect();
+  }
+
+  if (!s1.ok()) {
+    // Any failure in the primary objective trumps any failure in the
+    // secondary objective.
+    s2.PermitUncheckedError();
+    return s1;
+  } else {
+    return s2;
+  }
+}
+
+// Does not auto-GarbageCollect nor lock
+IOStatus BackupEngineImpl::DeleteBackupNoGC(BackupID backup_id) {
+  assert(initialized_);
+  assert(!read_only_);
+
+  ROCKS_LOG_INFO(options_.info_log, "Deleting backup %u", backup_id);
+  auto backup = backups_.find(backup_id);
+  if (backup != backups_.end()) {
+    IOStatus io_s = backup->second->Delete();
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    backups_.erase(backup);
+    if (backups_.empty()) {
+      latest_valid_backup_id_ = 0;
+    } else {
+      latest_valid_backup_id_ = backups_.rbegin()->first;
+    }
+  } else {
+    auto corrupt = corrupt_backups_.find(backup_id);
+    if (corrupt == corrupt_backups_.end()) {
+      return IOStatus::NotFound("Backup not found");
+    }
+    IOStatus io_s = corrupt->second.second->Delete();
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    corrupt->second.first.PermitUncheckedError();
+    corrupt_backups_.erase(corrupt);
+  }
+
+  // After removing meta file, best effort deletion even with errors.
+  // (Don't delete other files if we can't delete the meta file right
+  // now.)
+  std::vector<std::string> to_delete;
+  for (auto& itr : backuped_file_infos_) {
+    if (itr.second->refs == 0) {
+      IOStatus io_s = backup_fs_->DeleteFile(GetAbsolutePath(itr.first),
+                                             io_options_, nullptr);
+      ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s", itr.first.c_str(),
+                     io_s.ToString().c_str());
+      to_delete.push_back(itr.first);
+      if (!io_s.ok()) {
+        // Trying again later might work
+        might_need_garbage_collect_ = true;
+      }
+    }
+  }
+  for (auto& td : to_delete) {
+    backuped_file_infos_.erase(td);
+  }
+
+  // take care of private dirs -- GarbageCollect() will take care of them
+  // if they are not empty
+  std::string private_dir = GetPrivateFileRel(backup_id);
+  IOStatus io_s =
+      backup_fs_->DeleteDir(GetAbsolutePath(private_dir), io_options_, nullptr);
+  ROCKS_LOG_INFO(options_.info_log, "Deleting private dir %s -- %s",
+                 private_dir.c_str(), io_s.ToString().c_str());
+  if (!io_s.ok()) {
+    // Full gc or trying again later might work
+    might_need_garbage_collect_ = true;
+  }
+  return IOStatus::OK();
+}
+
+void BackupEngineImpl::SetBackupInfoFromBackupMeta(
+    BackupID id, const BackupMeta& meta, BackupInfo* backup_info,
+    bool include_file_details) const {
+  *backup_info = BackupInfo(id, meta.GetTimestamp(), meta.GetSize(),
+                            meta.GetNumberFiles(), meta.GetAppMetadata());
+  std::string dir =
+      options_.backup_dir + "/" + kPrivateDirSlash + std::to_string(id);
+  if (include_file_details) {
+    auto& file_details = backup_info->file_details;
+    file_details.reserve(meta.GetFiles().size());
+    for (auto& file_ptr : meta.GetFiles()) {
+      BackupFileInfo& finfo = file_details.emplace_back();
+      finfo.relative_filename = file_ptr->filename;
+      finfo.size = file_ptr->size;
+      finfo.directory = dir;
+      uint64_t number;
+      FileType type;
+      bool ok = ParseFileName(file_ptr->filename, &number, &type);
+      if (ok) {
+        finfo.file_number = number;
+        finfo.file_type = type;
+      }
+      // TODO: temperature, file_checksum, file_checksum_func_name
+      // finfo.temperature = file_ptr->temp;
+    }
+    backup_info->excluded_files = meta.GetExcludedFiles();
+
+    backup_info->name_for_open = GetAbsolutePath(GetPrivateFileRel(id));
+    backup_info->name_for_open.pop_back();  // remove trailing '/'
+    backup_info->env_for_open = meta.GetEnvForOpen();
+  }
+}
+
+Status BackupEngineImpl::GetBackupInfo(BackupID backup_id,
+                                       BackupInfo* backup_info,
+                                       bool include_file_details) const {
+  assert(initialized_);
+  if (backup_id == kLatestBackupIDMarker) {
+    // Note: Read latest_valid_backup_id_ inside of lock
+    backup_id = latest_valid_backup_id_;
+  }
+  auto corrupt_itr = corrupt_backups_.find(backup_id);
+  if (corrupt_itr != corrupt_backups_.end()) {
+    return Status::Corruption(corrupt_itr->second.first.ToString());
+  }
+  auto backup_itr = backups_.find(backup_id);
+  if (backup_itr == backups_.end()) {
+    return Status::NotFound("Backup not found");
+  }
+  auto& backup = backup_itr->second;
+  if (backup->Empty()) {
+    return Status::NotFound("Backup not found");
+  }
+
+  SetBackupInfoFromBackupMeta(backup_id, *backup, backup_info,
+                              include_file_details);
+  return Status::OK();
+}
+
+void BackupEngineImpl::GetBackupInfo(std::vector<BackupInfo>* backup_info,
+                                     bool include_file_details) const {
+  assert(initialized_);
+  backup_info->resize(backups_.size());
+  size_t i = 0;
+  for (auto& backup : backups_) {
+    const BackupMeta& meta = *backup.second;
+    if (!meta.Empty()) {
+      SetBackupInfoFromBackupMeta(backup.first, meta, &backup_info->at(i++),
+                                  include_file_details);
+    }
+  }
+}
+
+void BackupEngineImpl::GetCorruptedBackups(
+    std::vector<BackupID>* corrupt_backup_ids) const {
+  assert(initialized_);
+  corrupt_backup_ids->reserve(corrupt_backups_.size());
+  for (auto& backup : corrupt_backups_) {
+    corrupt_backup_ids->push_back(backup.first);
+  }
+}
+
+IOStatus BackupEngineImpl::RestoreDBFromBackup(
+    const RestoreOptions& options, BackupID backup_id,
+    const std::string& db_dir, const std::string& wal_dir,
+    const std::list<const BackupEngineImpl*>& locked_restore_from_dirs) const {
+  assert(initialized_);
+  if (backup_id == kLatestBackupIDMarker) {
+    // Note: Read latest_valid_backup_id_ inside of lock
+    backup_id = latest_valid_backup_id_;
+  }
+  auto corrupt_itr = corrupt_backups_.find(backup_id);
+  if (corrupt_itr != corrupt_backups_.end()) {
+    return corrupt_itr->second.first;
+  }
+  auto backup_itr = backups_.find(backup_id);
+  if (backup_itr == backups_.end()) {
+    return IOStatus::NotFound("Backup not found");
+  }
+  auto& backup = backup_itr->second;
+  if (backup->Empty()) {
+    return IOStatus::NotFound("Backup not found");
+  }
+
+  ROCKS_LOG_INFO(options_.info_log, "Restoring backup id %u\n", backup_id);
+  ROCKS_LOG_INFO(options_.info_log, "keep_log_files: %d\n",
+                 static_cast<int>(options.keep_log_files));
+
+  // just in case. Ignore errors
+  db_fs_->CreateDirIfMissing(db_dir, io_options_, nullptr)
+      .PermitUncheckedError();
+  db_fs_->CreateDirIfMissing(wal_dir, io_options_, nullptr)
+      .PermitUncheckedError();
+
+  if (options.keep_log_files) {
+    // delete files in db_dir, but keep all the log files
+    DeleteChildren(db_dir, 1 << kWalFile);
+    // move all the files from archive dir to wal_dir
+    std::string archive_dir = ArchivalDirectory(wal_dir);
+    std::vector<std::string> archive_files;
+    db_fs_->GetChildren(archive_dir, io_options_, &archive_files, nullptr)
+        .PermitUncheckedError();  // ignore errors
+    for (const auto& f : archive_files) {
+      uint64_t number;
+      FileType type;
+      bool ok = ParseFileName(f, &number, &type);
+      if (ok && type == kWalFile) {
+        ROCKS_LOG_INFO(options_.info_log,
+                       "Moving log file from archive/ to wal_dir: %s",
+                       f.c_str());
+        IOStatus io_s = db_fs_->RenameFile(
+            archive_dir + "/" + f, wal_dir + "/" + f, io_options_, nullptr);
+        if (!io_s.ok()) {
+          // if we can't move log file from archive_dir to wal_dir,
+          // we should fail, since it might mean data loss
+          return io_s;
+        }
+      }
+    }
+  } else {
+    DeleteChildren(wal_dir);
+    DeleteChildren(ArchivalDirectory(wal_dir));
+    DeleteChildren(db_dir);
+  }
+
+  // Files to restore, and from where (taking into account excluded files)
+  std::vector<std::pair<const BackupEngineImpl*, const FileInfo*>>
+      restore_file_infos;
+  restore_file_infos.reserve(backup->GetFiles().size() +
+                             backup->GetExcludedFiles().size());
+
+  for (const auto& ef : backup->GetExcludedFiles()) {
+    const std::string& file = ef.relative_file;
+
+    bool found = false;
+    for (auto be : locked_restore_from_dirs) {
+      auto it = be->backuped_file_infos_.find(file);
+      if (it != backuped_file_infos_.end()) {
+        restore_file_infos.emplace_back(be, &*it->second);
+        found = true;
+        break;
+      }
+    }
+    if (!found) {
+      return IOStatus::InvalidArgument(
+          "Excluded file " + file + " not found in other backups nor in " +
+          std::to_string(locked_restore_from_dirs.size() - 1) +
+          " alternate backup directories");
+    }
+  }
+
+  // Non-excluded files
+  for (const auto& file_info_shared : backup->GetFiles()) {
+    restore_file_infos.emplace_back(this, &*file_info_shared);
+  }
+
+  IOStatus io_s;
+  std::vector<RestoreAfterCopyOrCreateWorkItem> restore_items_to_finish;
+  std::string temporary_current_file;
+  std::string final_current_file;
+  std::unique_ptr<FSDirectory> db_dir_for_fsync;
+  std::unique_ptr<FSDirectory> wal_dir_for_fsync;
+
+  for (const auto& engine_and_file_info : restore_file_infos) {
+    const FileInfo* file_info = engine_and_file_info.second;
+    const std::string& file = file_info->filename;
+    std::string absolute_file =
+        engine_and_file_info.first->GetAbsolutePath(file);
+    Env* src_env = engine_and_file_info.first->backup_env_;
+
+    // 1. get DB filename
+    std::string dst = file_info->GetDbFileName();
+
+    // 2. find the filetype
+    uint64_t number;
+    FileType type;
+    bool ok = ParseFileName(dst, &number, &type);
+    if (!ok) {
+      return IOStatus::Corruption("Backup corrupted: Fail to parse filename " +
+                                  dst);
+    }
+    // 3. Construct the final path
+    // kWalFile lives in wal_dir and all the rest live in db_dir
+    if (type == kWalFile) {
+      dst = wal_dir + "/" + dst;
+      if (options_.sync && !wal_dir_for_fsync) {
+        io_s = db_fs_->NewDirectory(wal_dir, io_options_, &wal_dir_for_fsync,
+                                    nullptr);
+        if (!io_s.ok()) {
+          return io_s;
+        }
+      }
+    } else {
+      dst = db_dir + "/" + dst;
+      if (options_.sync && !db_dir_for_fsync) {
+        io_s = db_fs_->NewDirectory(db_dir, io_options_, &db_dir_for_fsync,
+                                    nullptr);
+        if (!io_s.ok()) {
+          return io_s;
+        }
+      }
+    }
+    // For atomicity, initially restore CURRENT file to a temporary name.
+    // This is useful even without options_.sync e.g. in case the restore
+    // process is interrupted.
+    if (type == kCurrentFile) {
+      final_current_file = dst;
+      dst = temporary_current_file = dst + ".tmp";
+    }
+
+    ROCKS_LOG_INFO(options_.info_log, "Restoring %s to %s\n", file.c_str(),
+                   dst.c_str());
+    CopyOrCreateWorkItem copy_or_create_work_item(
+        absolute_file, dst, Temperature::kUnknown /* src_temp */,
+        file_info->temp, "" /* contents */, src_env, db_env_,
+        EnvOptions() /* src_env_options */, options_.sync,
+        options_.restore_rate_limiter.get(), file_info->size,
+        nullptr /* stats */);
+    RestoreAfterCopyOrCreateWorkItem after_copy_or_create_work_item(
+        copy_or_create_work_item.result.get_future(), file, dst,
+        file_info->checksum_hex);
+    files_to_copy_or_create_.write(std::move(copy_or_create_work_item));
+    restore_items_to_finish.push_back(
+        std::move(after_copy_or_create_work_item));
+  }
+  IOStatus item_io_status;
+  for (auto& item : restore_items_to_finish) {
+    item.result.wait();
+    auto result = item.result.get();
+    item_io_status = result.io_status;
+    // Note: It is possible that both of the following bad-status cases occur
+    // during copying. But, we only return one status.
+    if (!item_io_status.ok()) {
+      io_s = item_io_status;
+      break;
+    } else if (!item.checksum_hex.empty() &&
+               item.checksum_hex != result.checksum_hex) {
+      io_s = IOStatus::Corruption(
+          "While restoring " + item.from_file + " -> " + item.to_file +
+          ": expected checksum is " + item.checksum_hex +
+          " while computed checksum is " + result.checksum_hex);
+      break;
+    }
+  }
+
+  // When enabled, the first FsyncWithDirOptions is to ensure all files are
+  // fully persisted before renaming CURRENT.tmp
+  if (io_s.ok() && db_dir_for_fsync) {
+    ROCKS_LOG_INFO(options_.info_log, "Restore: fsync\n");
+    io_s = db_dir_for_fsync->FsyncWithDirOptions(io_options_, nullptr,
+                                                 DirFsyncOptions());
+  }
+
+  if (io_s.ok() && wal_dir_for_fsync) {
+    io_s = wal_dir_for_fsync->FsyncWithDirOptions(io_options_, nullptr,
+                                                  DirFsyncOptions());
+  }
+
+  if (io_s.ok() && !temporary_current_file.empty()) {
+    ROCKS_LOG_INFO(options_.info_log, "Restore: atomic rename CURRENT.tmp\n");
+    assert(!final_current_file.empty());
+    io_s = db_fs_->RenameFile(temporary_current_file, final_current_file,
+                              io_options_, nullptr);
+  }
+
+  if (io_s.ok() && db_dir_for_fsync && !temporary_current_file.empty()) {
+    // Second FsyncWithDirOptions is to ensure the final atomic rename of DB
+    // restore is fully persisted even if power goes out right after restore
+    // operation returns success
+    assert(db_dir_for_fsync);
+    io_s = db_dir_for_fsync->FsyncWithDirOptions(
+        io_options_, nullptr, DirFsyncOptions(final_current_file));
+  }
+
+  ROCKS_LOG_INFO(options_.info_log, "Restoring done -- %s\n",
+                 io_s.ToString().c_str());
+  return io_s;
+}
+
+IOStatus BackupEngineImpl::VerifyBackup(BackupID backup_id,
+                                        bool verify_with_checksum) const {
+  assert(initialized_);
+  // Check if backup_id is corrupted, or valid and registered
+  auto corrupt_itr = corrupt_backups_.find(backup_id);
+  if (corrupt_itr != corrupt_backups_.end()) {
+    return corrupt_itr->second.first;
+  }
+
+  auto backup_itr = backups_.find(backup_id);
+  if (backup_itr == backups_.end()) {
+    return IOStatus::NotFound();
+  }
+
+  auto& backup = backup_itr->second;
+  if (backup->Empty()) {
+    return IOStatus::NotFound();
+  }
+
+  ROCKS_LOG_INFO(options_.info_log, "Verifying backup id %u\n", backup_id);
+
+  // Find all existing backup files belong to backup_id
+  std::unordered_map<std::string, uint64_t> curr_abs_path_to_size;
+  for (const auto& rel_dir : {GetPrivateFileRel(backup_id), GetSharedFileRel(),
+                              GetSharedFileWithChecksumRel()}) {
+    const auto abs_dir = GetAbsolutePath(rel_dir);
+    // Shared directories allowed to be missing in some cases. Expected but
+    // missing files will be reported a few lines down.
+    ReadChildFileCurrentSizes(abs_dir, backup_fs_, &curr_abs_path_to_size)
+        .PermitUncheckedError();
+  }
+
+  // For all files registered in backup
+  for (const auto& file_info : backup->GetFiles()) {
+    const auto abs_path = GetAbsolutePath(file_info->filename);
+    // check existence of the file
+    if (curr_abs_path_to_size.find(abs_path) == curr_abs_path_to_size.end()) {
+      return IOStatus::NotFound("File missing: " + abs_path);
+    }
+    // verify file size
+    if (file_info->size != curr_abs_path_to_size[abs_path]) {
+      std::string size_info("Expected file size is " +
+                            std::to_string(file_info->size) +
+                            " while found file size is " +
+                            std::to_string(curr_abs_path_to_size[abs_path]));
+      return IOStatus::Corruption("File corrupted: File size mismatch for " +
+                                  abs_path + ": " + size_info);
+    }
+    if (verify_with_checksum && !file_info->checksum_hex.empty()) {
+      // verify file checksum
+      std::string checksum_hex;
+      ROCKS_LOG_INFO(options_.info_log, "Verifying %s checksum...\n",
+                     abs_path.c_str());
+      IOStatus io_s = ReadFileAndComputeChecksum(
+          abs_path, backup_fs_, EnvOptions(), 0 /* size_limit */, &checksum_hex,
+          Temperature::kUnknown);
+      if (!io_s.ok()) {
+        return io_s;
+      } else if (file_info->checksum_hex != checksum_hex) {
+        std::string checksum_info(
+            "Expected checksum is " + file_info->checksum_hex +
+            " while computed checksum is " + checksum_hex);
+        return IOStatus::Corruption("File corrupted: Checksum mismatch for " +
+                                    abs_path + ": " + checksum_info);
+      }
+    }
+  }
+  return IOStatus::OK();
+}
+
+IOStatus BackupEngineImpl::CopyOrCreateFile(
+    const std::string& src, const std::string& dst, const std::string& contents,
+    uint64_t size_limit, Env* src_env, Env* dst_env,
+    const EnvOptions& src_env_options, bool sync, RateLimiter* rate_limiter,
+    std::function<void()> progress_callback, Temperature* src_temperature,
+    Temperature dst_temperature, uint64_t* bytes_toward_next_callback,
+    uint64_t* size, std::string* checksum_hex) {
+  assert(src.empty() != contents.empty());
+  IOStatus io_s;
+  std::unique_ptr<FSWritableFile> dst_file;
+  std::unique_ptr<FSSequentialFile> src_file;
+  FileOptions dst_file_options;
+  dst_file_options.use_mmap_writes = false;
+  dst_file_options.temperature = dst_temperature;
+  // TODO:(gzh) maybe use direct reads/writes here if possible
+  if (size != nullptr) {
+    *size = 0;
+  }
+  uint32_t checksum_value = 0;
+
+  // Check if size limit is set. if not, set it to very big number
+  if (size_limit == 0) {
+    size_limit = std::numeric_limits<uint64_t>::max();
+  }
+
+  io_s = dst_env->GetFileSystem()->NewWritableFile(dst, dst_file_options,
+                                                   &dst_file, nullptr);
+  if (io_s.ok() && !src.empty()) {
+    auto src_file_options = FileOptions(src_env_options);
+    src_file_options.temperature = *src_temperature;
+    io_s = src_env->GetFileSystem()->NewSequentialFile(src, src_file_options,
+                                                       &src_file, nullptr);
+  }
+  if (io_s.IsPathNotFound() && *src_temperature != Temperature::kUnknown) {
+    // Retry without temperature hint in case the FileSystem is strict with
+    // non-kUnknown temperature option
+    io_s = src_env->GetFileSystem()->NewSequentialFile(
+        src, FileOptions(src_env_options), &src_file, nullptr);
+  }
+  if (!io_s.ok()) {
+    return io_s;
+  }
+
+  size_t buf_size =
+      rate_limiter ? static_cast<size_t>(rate_limiter->GetSingleBurstBytes())
+                   : kDefaultCopyFileBufferSize;
+
+  std::unique_ptr<WritableFileWriter> dest_writer(
+      new WritableFileWriter(std::move(dst_file), dst, dst_file_options));
+  std::unique_ptr<SequentialFileReader> src_reader;
+  std::unique_ptr<char[]> buf;
+  if (!src.empty()) {
+    // Return back current temperature in FileSystem
+    *src_temperature = src_file->GetTemperature();
+
+    src_reader.reset(new SequentialFileReader(
+        std::move(src_file), src, nullptr /* io_tracer */, {}, rate_limiter));
+    buf.reset(new char[buf_size]);
+  }
+
+  Slice data;
+  do {
+    if (stop_backup_.load(std::memory_order_acquire)) {
+      return status_to_io_status(Status::Incomplete("Backup stopped"));
+    }
+    if (!src.empty()) {
+      size_t buffer_to_read =
+          (buf_size < size_limit) ? buf_size : static_cast<size_t>(size_limit);
+      io_s = src_reader->Read(buffer_to_read, &data, buf.get(),
+                              Env::IO_LOW /* rate_limiter_priority */);
+      *bytes_toward_next_callback += data.size();
+    } else {
+      data = contents;
+    }
+    size_limit -= data.size();
+    TEST_SYNC_POINT_CALLBACK(
+        "BackupEngineImpl::CopyOrCreateFile:CorruptionDuringBackup",
+        (src.length() > 4 && src.rfind(".sst") == src.length() - 4) ? &data
+                                                                    : nullptr);
+
+    if (!io_s.ok()) {
+      return io_s;
+    }
+
+    if (size != nullptr) {
+      *size += data.size();
+    }
+    if (checksum_hex != nullptr) {
+      checksum_value = crc32c::Extend(checksum_value, data.data(), data.size());
+    }
+    io_s = dest_writer->Append(data);
+
+    if (rate_limiter != nullptr) {
+      if (!src.empty()) {
+        rate_limiter->Request(data.size(), Env::IO_LOW, nullptr /* stats */,
+                              RateLimiter::OpType::kWrite);
+      } else {
+        LoopRateLimitRequestHelper(data.size(), rate_limiter, Env::IO_LOW,
+                                   nullptr /* stats */,
+                                   RateLimiter::OpType::kWrite);
+      }
+    }
+    while (*bytes_toward_next_callback >=
+           options_.callback_trigger_interval_size) {
+      *bytes_toward_next_callback -= options_.callback_trigger_interval_size;
+      if (progress_callback) {
+        std::lock_guard<std::mutex> lock(byte_report_mutex_);
+        try {
+          progress_callback();
+        } catch (const std::exception& exn) {
+          io_s = IOStatus::Aborted("Exception in progress_callback: " +
+                                   std::string(exn.what()));
+          break;
+        } catch (...) {
+          io_s = IOStatus::Aborted("Unknown exception in progress_callback");
+          break;
+        }
+      }
+    }
+  } while (io_s.ok() && contents.empty() && data.size() > 0 && size_limit > 0);
+
+  // Convert uint32_t checksum to hex checksum
+  if (checksum_hex != nullptr) {
+    checksum_hex->assign(ChecksumInt32ToHex(checksum_value));
+  }
+
+  if (io_s.ok() && sync) {
+    io_s = dest_writer->Sync(false);
+  }
+  if (io_s.ok()) {
+    io_s = dest_writer->Close();
+  }
+  return io_s;
+}
+
+// fname will always start with "/"
+IOStatus BackupEngineImpl::AddBackupFileWorkItem(
+    std::unordered_set<std::string>& live_dst_paths,
+    std::deque<BackupAfterCopyOrCreateWorkItem>& backup_items_to_finish,
+    std::deque<BackupWorkItemPair>* excludable_items, BackupID backup_id,
+    bool shared, const std::string& src_dir, const std::string& fname,
+    const EnvOptions& src_env_options, RateLimiter* rate_limiter,
+    FileType file_type, uint64_t size_bytes, Statistics* stats,
+    uint64_t size_limit, bool shared_checksum,
+    std::function<void()> progress_callback, const std::string& contents,
+    const std::string& src_checksum_func_name,
+    const std::string& src_checksum_str, const Temperature src_temperature) {
+  assert(contents.empty() != src_dir.empty());
+
+  std::string src_path = src_dir + "/" + fname;
+  std::string dst_relative;
+  std::string dst_relative_tmp;
+  std::string db_id;
+  std::string db_session_id;
+  // crc32c checksum in hex. empty == unavailable / unknown
+  std::string checksum_hex;
+
+  // Whenever a default checksum function name is passed in, we will compares
+  // the corresponding checksum values after copying. Note that only table and
+  // blob files may have a known checksum function name passed in.
+  //
+  // If no default checksum function name is passed in and db session id is not
+  // available, we will calculate the checksum *before* copying in two cases
+  // (we always calcuate checksums when copying or creating for any file types):
+  // a) share_files_with_checksum is true and file type is table;
+  // b) share_table_files is true and the file exists already.
+  //
+  // Step 0: Check if default checksum function name is passed in
+  if (kDbFileChecksumFuncName == src_checksum_func_name) {
+    if (src_checksum_str == kUnknownFileChecksum) {
+      return status_to_io_status(
+          Status::Aborted("Unknown checksum value for " + fname));
+    }
+    checksum_hex = ChecksumStrToHex(src_checksum_str);
+  }
+
+  // Step 1: Prepare the relative path to destination
+  if (shared && shared_checksum) {
+    if (GetNamingNoFlags() != BackupEngineOptions::kLegacyCrc32cAndFileSize &&
+        file_type != kBlobFile) {
+      // Prepare db_session_id to add to the file name
+      // Ignore the returned status
+      // In the failed cases, db_id and db_session_id will be empty
+      GetFileDbIdentities(db_env_, src_env_options, src_path, src_temperature,
+                          rate_limiter, &db_id, &db_session_id)
+          .PermitUncheckedError();
+    }
+    // Calculate checksum if checksum and db session id are not available.
+    // If db session id is available, we will not calculate the checksum
+    // since the session id should suffice to avoid file name collision in
+    // the shared_checksum directory.
+    if (checksum_hex.empty() && db_session_id.empty()) {
+      IOStatus io_s = ReadFileAndComputeChecksum(
+          src_path, db_fs_, src_env_options, size_limit, &checksum_hex,
+          src_temperature);
+      if (!io_s.ok()) {
+        return io_s;
+      }
+    }
+    if (size_bytes == std::numeric_limits<uint64_t>::max()) {
+      return IOStatus::NotFound("File missing: " + src_path);
+    }
+    // dst_relative depends on the following conditions:
+    // 1) the naming scheme is kUseDbSessionId,
+    // 2) db_session_id is not empty,
+    // 3) checksum is available in the DB manifest.
+    // If 1,2,3) are satisfied, then dst_relative will be of the form:
+    // shared_checksum/<file_number>_<checksum>_<db_session_id>.sst
+    // If 1,2) are satisfied, then dst_relative will be of the form:
+    // shared_checksum/<file_number>_<db_session_id>.sst
+    // Otherwise, dst_relative is of the form
+    // shared_checksum/<file_number>_<checksum>_<size>.sst
+    //
+    // For blob files, db_session_id is not supported with the blob file format.
+    // It uses original/legacy naming scheme.
+    // dst_relative will be of the form:
+    // shared_checksum/<file_number>_<checksum>_<size>.blob
+    dst_relative = GetSharedFileWithChecksum(fname, checksum_hex, size_bytes,
+                                             db_session_id);
+    dst_relative_tmp = GetSharedFileWithChecksumRel(dst_relative, true);
+    dst_relative = GetSharedFileWithChecksumRel(dst_relative, false);
+  } else if (shared) {
+    dst_relative_tmp = GetSharedFileRel(fname, true);
+    dst_relative = GetSharedFileRel(fname, false);
+  } else {
+    dst_relative = GetPrivateFileRel(backup_id, false, fname);
+  }
+
+  // We copy into `temp_dest_path` and, once finished, rename it to
+  // `final_dest_path`. This allows files to atomically appear at
+  // `final_dest_path`. We can copy directly to the final path when atomicity
+  // is unnecessary, like for files in private backup directories.
+  const std::string* copy_dest_path;
+  std::string temp_dest_path;
+  std::string final_dest_path = GetAbsolutePath(dst_relative);
+  if (!dst_relative_tmp.empty()) {
+    temp_dest_path = GetAbsolutePath(dst_relative_tmp);
+    copy_dest_path = &temp_dest_path;
+  } else {
+    copy_dest_path = &final_dest_path;
+  }
+
+  // Step 2: Determine whether to copy or not
+  // if it's shared, we also need to check if it exists -- if it does, no need
+  // to copy it again.
+  bool need_to_copy = true;
+  // true if final_dest_path is the same path as another live file
+  const bool same_path =
+      live_dst_paths.find(final_dest_path) != live_dst_paths.end();
+
+  bool file_exists = false;
+  if (shared && !same_path) {
+    // Should be in shared directory but not a live path, check existence in
+    // shared directory
+    IOStatus exist =
+        backup_fs_->FileExists(final_dest_path, io_options_, nullptr);
+    if (exist.ok()) {
+      file_exists = true;
+    } else if (exist.IsNotFound()) {
+      file_exists = false;
+    } else {
+      return exist;
+    }
+  }
+
+  if (!contents.empty()) {
+    need_to_copy = false;
+  } else if (shared && (same_path || file_exists)) {
+    need_to_copy = false;
+    auto find_result = backuped_file_infos_.find(dst_relative);
+    if (find_result == backuped_file_infos_.end() && !same_path) {
+      // file exists but not referenced
+      ROCKS_LOG_INFO(
+          options_.info_log,
+          "%s already present, but not referenced by any backup. We will "
+          "overwrite the file.",
+          fname.c_str());
+      need_to_copy = true;
+      // Defer any failure reporting to when we try to write the file
+      backup_fs_->DeleteFile(final_dest_path, io_options_, nullptr)
+          .PermitUncheckedError();
+    } else {
+      // file exists and referenced
+      if (checksum_hex.empty()) {
+        // same_path should not happen for a standard DB, so OK to
+        // read file contents to check for checksum mismatch between
+        // two files from same DB getting same name.
+        // For compatibility with future meta file that might not have
+        // crc32c checksum available, consider it might be empty, but
+        // we don't currently generate meta file without crc32c checksum.
+        // Therefore we have to read & compute it if we don't have it.
+        if (!same_path && !find_result->second->checksum_hex.empty()) {
+          assert(find_result != backuped_file_infos_.end());
+          // Note: to save I/O on incremental backups, we copy prior known
+          // checksum of the file instead of reading entire file contents
+          // to recompute it.
+          checksum_hex = find_result->second->checksum_hex;
+          // Regarding corruption detection, consider:
+          // (a) the DB file is corrupt (since previous backup) and the backup
+          // file is OK: we failed to detect, but the backup is safe. DB can
+          // be repaired/restored once its corruption is detected.
+          // (b) the backup file is corrupt (since previous backup) and the
+          // db file is OK: we failed to detect, but the backup is corrupt.
+          // CreateNewBackup should support fast incremental backups and
+          // there's no way to support that without reading all the files.
+          // We might add an option for extra checks on incremental backup,
+          // but until then, use VerifyBackups to check existing backup data.
+          // (c) file name collision with legitimately different content.
+          // This is almost inconceivable with a well-generated DB session
+          // ID, but even in that case, we double check the file sizes in
+          // BackupMeta::AddFile.
+        } else {
+          IOStatus io_s = ReadFileAndComputeChecksum(
+              src_path, db_fs_, src_env_options, size_limit, &checksum_hex,
+              src_temperature);
+          if (!io_s.ok()) {
+            return io_s;
+          }
+        }
+      }
+      if (!db_session_id.empty()) {
+        ROCKS_LOG_INFO(options_.info_log,
+                       "%s already present, with checksum %s, size %" PRIu64
+                       " and DB session identity %s",
+                       fname.c_str(), checksum_hex.c_str(), size_bytes,
+                       db_session_id.c_str());
+      } else {
+        ROCKS_LOG_INFO(options_.info_log,
+                       "%s already present, with checksum %s and size %" PRIu64,
+                       fname.c_str(), checksum_hex.c_str(), size_bytes);
+      }
+    }
+  }
+  live_dst_paths.insert(final_dest_path);
+
+  // Step 3: Add work item
+  if (!contents.empty() || need_to_copy) {
+    CopyOrCreateWorkItem copy_or_create_work_item(
+        src_dir.empty() ? "" : src_path, *copy_dest_path, src_temperature,
+        Temperature::kUnknown /*dst_temp*/, contents, db_env_, backup_env_,
+        src_env_options, options_.sync, rate_limiter, size_limit, stats,
+        progress_callback, src_checksum_func_name, checksum_hex, db_id,
+        db_session_id);
+    BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item(
+        copy_or_create_work_item.result.get_future(), shared, need_to_copy,
+        backup_env_, temp_dest_path, final_dest_path, dst_relative);
+    if (excludable_items != nullptr && shared && shared_checksum &&
+        need_to_copy) {
+      ROCKS_LOG_INFO(options_.info_log, "Copying (if not excluded) %s to %s",
+                     fname.c_str(), copy_dest_path->c_str());
+      excludable_items->emplace_back(std::move(copy_or_create_work_item),
+                                     std::move(after_copy_or_create_work_item));
+    } else {
+      // For files known not excluded, can start copying even before finishing
+      // the checkpoint
+      ROCKS_LOG_INFO(options_.info_log, "Copying %s to %s", fname.c_str(),
+                     copy_dest_path->c_str());
+      files_to_copy_or_create_.write(std::move(copy_or_create_work_item));
+      backup_items_to_finish.push_back(
+          std::move(after_copy_or_create_work_item));
+    }
+  } else {
+    std::promise<CopyOrCreateResult> promise_result;
+    BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item(
+        promise_result.get_future(), shared, need_to_copy, backup_env_,
+        temp_dest_path, final_dest_path, dst_relative);
+    backup_items_to_finish.push_back(std::move(after_copy_or_create_work_item));
+    CopyOrCreateResult result;
+    result.io_status = IOStatus::OK();
+    result.size = size_bytes;
+    result.checksum_hex = std::move(checksum_hex);
+    result.db_id = std::move(db_id);
+    result.db_session_id = std::move(db_session_id);
+    promise_result.set_value(std::move(result));
+  }
+  return IOStatus::OK();
+}
+
+IOStatus BackupEngineImpl::ReadFileAndComputeChecksum(
+    const std::string& src, const std::shared_ptr<FileSystem>& src_fs,
+    const EnvOptions& src_env_options, uint64_t size_limit,
+    std::string* checksum_hex, const Temperature src_temperature) const {
+  if (checksum_hex == nullptr) {
+    return status_to_io_status(Status::Aborted("Checksum pointer is null"));
+  }
+  uint32_t checksum_value = 0;
+  if (size_limit == 0) {
+    size_limit = std::numeric_limits<uint64_t>::max();
+  }
+
+  std::unique_ptr<SequentialFileReader> src_reader;
+  auto file_options = FileOptions(src_env_options);
+  file_options.temperature = src_temperature;
+  RateLimiter* rate_limiter = options_.backup_rate_limiter.get();
+  IOStatus io_s = SequentialFileReader::Create(
+      src_fs, src, file_options, &src_reader, nullptr /* dbg */, rate_limiter);
+  if (io_s.IsPathNotFound() && src_temperature != Temperature::kUnknown) {
+    // Retry without temperature hint in case the FileSystem is strict with
+    // non-kUnknown temperature option
+    file_options.temperature = Temperature::kUnknown;
+    io_s = SequentialFileReader::Create(src_fs, src, file_options, &src_reader,
+                                        nullptr /* dbg */, rate_limiter);
+  }
+  if (!io_s.ok()) {
+    return io_s;
+  }
+
+  size_t buf_size = kDefaultCopyFileBufferSize;
+  std::unique_ptr<char[]> buf(new char[buf_size]);
+  Slice data;
+
+  do {
+    if (stop_backup_.load(std::memory_order_acquire)) {
+      return status_to_io_status(Status::Incomplete("Backup stopped"));
+    }
+    size_t buffer_to_read =
+        (buf_size < size_limit) ? buf_size : static_cast<size_t>(size_limit);
+    io_s = src_reader->Read(buffer_to_read, &data, buf.get(),
+                            Env::IO_LOW /* rate_limiter_priority */);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+
+    size_limit -= data.size();
+    checksum_value = crc32c::Extend(checksum_value, data.data(), data.size());
+  } while (data.size() > 0 && size_limit > 0);
+
+  checksum_hex->assign(ChecksumInt32ToHex(checksum_value));
+
+  return io_s;
+}
+
+Status BackupEngineImpl::GetFileDbIdentities(
+    Env* src_env, const EnvOptions& src_env_options,
+    const std::string& file_path, Temperature file_temp,
+    RateLimiter* rate_limiter, std::string* db_id, std::string* db_session_id) {
+  assert(db_id != nullptr || db_session_id != nullptr);
+
+  Options options;
+  options.env = src_env;
+  SstFileDumper sst_reader(options, file_path, file_temp,
+                           2 * 1024 * 1024
+                           /* readahead_size */,
+                           false /* verify_checksum */, false /* output_hex */,
+                           false /* decode_blob_index */, src_env_options,
+                           true /* silent */);
+
+  const TableProperties* table_properties = nullptr;
+  std::shared_ptr<const TableProperties> tp;
+  Status s = sst_reader.getStatus();
+
+  if (s.ok()) {
+    // Try to get table properties from the table reader of sst_reader
+    if (!sst_reader.ReadTableProperties(&tp).ok()) {
+      // Try to use table properites from the initialization of sst_reader
+      table_properties = sst_reader.GetInitTableProperties();
+    } else {
+      table_properties = tp.get();
+      if (table_properties != nullptr && rate_limiter != nullptr) {
+        // sizeof(*table_properties) is a sufficent but far-from-exact
+        // approximation of read bytes due to metaindex block, std::string
+        // properties and varint compression
+        LoopRateLimitRequestHelper(sizeof(*table_properties), rate_limiter,
+                                   Env::IO_LOW, nullptr /* stats */,
+                                   RateLimiter::OpType::kRead);
+      }
+    }
+  } else {
+    ROCKS_LOG_INFO(options_.info_log, "Failed to read %s: %s",
+                   file_path.c_str(), s.ToString().c_str());
+    return s;
+  }
+
+  if (table_properties != nullptr) {
+    if (db_id != nullptr) {
+      db_id->assign(table_properties->db_id);
+    }
+    if (db_session_id != nullptr) {
+      db_session_id->assign(table_properties->db_session_id);
+      if (db_session_id->empty()) {
+        s = Status::NotFound("DB session identity not found in " + file_path);
+        ROCKS_LOG_INFO(options_.info_log, "%s", s.ToString().c_str());
+        return s;
+      }
+    }
+    return Status::OK();
+  } else {
+    s = Status::Corruption("Table properties missing in " + file_path);
+    ROCKS_LOG_INFO(options_.info_log, "%s", s.ToString().c_str());
+    return s;
+  }
+}
+
+void BackupEngineImpl::LoopRateLimitRequestHelper(
+    const size_t total_bytes_to_request, RateLimiter* rate_limiter,
+    const Env::IOPriority pri, Statistics* stats,
+    const RateLimiter::OpType op_type) {
+  assert(rate_limiter != nullptr);
+  size_t remaining_bytes = total_bytes_to_request;
+  size_t request_bytes = 0;
+  while (remaining_bytes > 0) {
+    request_bytes =
+        std::min(static_cast<size_t>(rate_limiter->GetSingleBurstBytes()),
+                 remaining_bytes);
+    rate_limiter->Request(request_bytes, pri, stats, op_type);
+    remaining_bytes -= request_bytes;
+  }
+}
+
+void BackupEngineImpl::DeleteChildren(const std::string& dir,
+                                      uint32_t file_type_filter) const {
+  std::vector<std::string> children;
+  db_fs_->GetChildren(dir, io_options_, &children, nullptr)
+      .PermitUncheckedError();  // ignore errors
+
+  for (const auto& f : children) {
+    uint64_t number;
+    FileType type;
+    bool ok = ParseFileName(f, &number, &type);
+    if (ok && (file_type_filter & (1 << type))) {
+      // don't delete this file
+      continue;
+    }
+    db_fs_->DeleteFile(dir + "/" + f, io_options_, nullptr)
+        .PermitUncheckedError();  // ignore errors
+  }
+}
+
+IOStatus BackupEngineImpl::ReadChildFileCurrentSizes(
+    const std::string& dir, const std::shared_ptr<FileSystem>& fs,
+    std::unordered_map<std::string, uint64_t>* result) const {
+  assert(result != nullptr);
+  std::vector<Env::FileAttributes> files_attrs;
+  IOStatus io_status = fs->FileExists(dir, io_options_, nullptr);
+  if (io_status.ok()) {
+    io_status =
+        fs->GetChildrenFileAttributes(dir, io_options_, &files_attrs, nullptr);
+  } else if (io_status.IsNotFound()) {
+    // Insert no entries can be considered success
+    io_status = IOStatus::OK();
+  }
+  const bool slash_needed = dir.empty() || dir.back() != '/';
+  for (const auto& file_attrs : files_attrs) {
+    result->emplace(dir + (slash_needed ? "/" : "") + file_attrs.name,
+                    file_attrs.size_bytes);
+  }
+  return io_status;
+}
+
+IOStatus BackupEngineImpl::GarbageCollect() {
+  assert(!read_only_);
+
+  // We will make a best effort to remove all garbage even in the presence
+  // of inconsistencies or I/O failures that inhibit finding garbage.
+  IOStatus overall_status = IOStatus::OK();
+  // If all goes well, we don't need another auto-GC this session
+  might_need_garbage_collect_ = false;
+
+  ROCKS_LOG_INFO(options_.info_log, "Starting garbage collection");
+
+  // delete obsolete shared files
+  for (bool with_checksum : {false, true}) {
+    std::vector<std::string> shared_children;
+    {
+      std::string shared_path;
+      if (with_checksum) {
+        shared_path = GetAbsolutePath(GetSharedFileWithChecksumRel());
+      } else {
+        shared_path = GetAbsolutePath(GetSharedFileRel());
+      }
+      IOStatus io_s = backup_fs_->FileExists(shared_path, io_options_, nullptr);
+      if (io_s.ok()) {
+        io_s = backup_fs_->GetChildren(shared_path, io_options_,
+                                       &shared_children, nullptr);
+      } else if (io_s.IsNotFound()) {
+        io_s = IOStatus::OK();
+      }
+      if (!io_s.ok()) {
+        overall_status = io_s;
+        // Trying again later might work
+        might_need_garbage_collect_ = true;
+      }
+    }
+    for (auto& child : shared_children) {
+      std::string rel_fname;
+      if (with_checksum) {
+        rel_fname = GetSharedFileWithChecksumRel(child);
+      } else {
+        rel_fname = GetSharedFileRel(child);
+      }
+      auto child_itr = backuped_file_infos_.find(rel_fname);
+      // if it's not refcounted, delete it
+      if (child_itr == backuped_file_infos_.end() ||
+          child_itr->second->refs == 0) {
+        // this might be a directory, but DeleteFile will just fail in that
+        // case, so we're good
+        IOStatus io_s = backup_fs_->DeleteFile(GetAbsolutePath(rel_fname),
+                                               io_options_, nullptr);
+        ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s",
+                       rel_fname.c_str(), io_s.ToString().c_str());
+        backuped_file_infos_.erase(rel_fname);
+        if (!io_s.ok()) {
+          // Trying again later might work
+          might_need_garbage_collect_ = true;
+        }
+      }
+    }
+  }
+
+  // delete obsolete private files
+  std::vector<std::string> private_children;
+  {
+    IOStatus io_s =
+        backup_fs_->GetChildren(GetAbsolutePath(kPrivateDirName), io_options_,
+                                &private_children, nullptr);
+    if (!io_s.ok()) {
+      overall_status = io_s;
+      // Trying again later might work
+      might_need_garbage_collect_ = true;
+    }
+  }
+  for (auto& child : private_children) {
+    BackupID backup_id = 0;
+    bool tmp_dir = child.find(".tmp") != std::string::npos;
+    sscanf(child.c_str(), "%u", &backup_id);
+    if (!tmp_dir &&  // if it's tmp_dir, delete it
+        (backup_id == 0 || backups_.find(backup_id) != backups_.end())) {
+      // it's either not a number or it's still alive. continue
+      continue;
+    }
+    // here we have to delete the dir and all its children
+    std::string full_private_path =
+        GetAbsolutePath(GetPrivateFileRel(backup_id));
+    std::vector<std::string> subchildren;
+    if (backup_fs_
+            ->GetChildren(full_private_path, io_options_, &subchildren, nullptr)
+            .ok()) {
+      for (auto& subchild : subchildren) {
+        IOStatus io_s = backup_fs_->DeleteFile(full_private_path + subchild,
+                                               io_options_, nullptr);
+        ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s",
+                       (full_private_path + subchild).c_str(),
+                       io_s.ToString().c_str());
+        if (!io_s.ok()) {
+          // Trying again later might work
+          might_need_garbage_collect_ = true;
+        }
+      }
+    }
+    // finally delete the private dir
+    IOStatus io_s =
+        backup_fs_->DeleteDir(full_private_path, io_options_, nullptr);
+    ROCKS_LOG_INFO(options_.info_log, "Deleting dir %s -- %s",
+                   full_private_path.c_str(), io_s.ToString().c_str());
+    if (!io_s.ok()) {
+      // Trying again later might work
+      might_need_garbage_collect_ = true;
+    }
+  }
+
+  assert(overall_status.ok() || might_need_garbage_collect_);
+  return overall_status;
+}
+
+// ------- BackupMeta class --------
+
+IOStatus BackupEngineImpl::BackupMeta::AddFile(
+    std::shared_ptr<FileInfo> file_info) {
+  auto itr = file_infos_->find(file_info->filename);
+  if (itr == file_infos_->end()) {
+    auto ret = file_infos_->insert({file_info->filename, file_info});
+    if (ret.second) {
+      itr = ret.first;
+      itr->second->refs = 1;
+    } else {
+      // if this happens, something is seriously wrong
+      return IOStatus::Corruption("In memory metadata insertion error");
+    }
+  } else {
+    // Compare sizes, because we scanned that off the filesystem on both
+    // ends. This is like a check in VerifyBackup.
+    if (itr->second->size != file_info->size) {
+      std::string msg = "Size mismatch for existing backup file: ";
+      msg.append(file_info->filename);
+      msg.append(" Size in backup is " + std::to_string(itr->second->size) +
+                 " while size in DB is " + std::to_string(file_info->size));
+      msg.append(
+          " If this DB file checks as not corrupt, try deleting old"
+          " backups or backing up to a different backup directory.");
+      return IOStatus::Corruption(msg);
+    }
+    if (file_info->checksum_hex.empty()) {
+      // No checksum available to check
+    } else if (itr->second->checksum_hex.empty()) {
+      // Remember checksum if newly acquired
+      itr->second->checksum_hex = file_info->checksum_hex;
+    } else if (itr->second->checksum_hex != file_info->checksum_hex) {
+      // Note: to save I/O, these will be equal trivially on already backed
+      // up files that don't have the checksum in their name. And it should
+      // never fail for files that do have checksum in their name.
+
+      // Should never reach here, but produce an appropriate corruption
+      // message in case we do in a release build.
+      assert(false);
+      std::string msg = "Checksum mismatch for existing backup file: ";
+      msg.append(file_info->filename);
+      msg.append(" Expected checksum is " + itr->second->checksum_hex +
+                 " while computed checksum is " + file_info->checksum_hex);
+      msg.append(
+          " If this DB file checks as not corrupt, try deleting old"
+          " backups or backing up to a different backup directory.");
+      return IOStatus::Corruption(msg);
+    }
+    ++itr->second->refs;  // increase refcount if already present
+  }
+
+  size_ += file_info->size;
+  files_.push_back(itr->second);
+
+  return IOStatus::OK();
+}
+
+IOStatus BackupEngineImpl::BackupMeta::Delete(bool delete_meta) {
+  IOStatus io_s;
+  for (const auto& file : files_) {
+    --file->refs;  // decrease refcount
+  }
+  files_.clear();
+  // delete meta file
+  if (delete_meta) {
+    io_s = fs_->FileExists(meta_filename_, iooptions_, nullptr);
+    if (io_s.ok()) {
+      io_s = fs_->DeleteFile(meta_filename_, iooptions_, nullptr);
+    } else if (io_s.IsNotFound()) {
+      io_s = IOStatus::OK();  // nothing to delete
+    }
+  }
+  timestamp_ = 0;
+  return io_s;
+}
+
+// Constants for backup meta file schema (see LoadFromFile)
+const std::string kSchemaVersionPrefix{"schema_version "};
+const std::string kFooterMarker{"// FOOTER"};
+
+const std::string kAppMetaDataFieldName{"metadata"};
+
+// WART: The checksums are crc32c but named "crc32"
+const std::string kFileCrc32cFieldName{"crc32"};
+const std::string kFileSizeFieldName{"size"};
+const std::string kTemperatureFieldName{"temp"};
+const std::string kExcludedFieldName{"ni::excluded"};
+
+// Marks a (future) field that should cause failure if not recognized.
+// Other fields are assumed to be ignorable. For example, in the future
+// we might add
+//  ni::file_name_escape uri_percent
+// to indicate all file names have had spaces and special characters
+// escaped using a URI percent encoding.
+const std::string kNonIgnorableFieldPrefix{"ni::"};
+
+// Each backup meta file is of the format (schema version 1):
+//----------------------------------------------------------
+// <timestamp>
+// <seq number>
+// metadata <metadata> (optional)
+// <number of files>
+// <file1> crc32 <crc32c_as_unsigned_decimal>
+// <file2> crc32 <crc32c_as_unsigned_decimal>
+// ...
+//----------------------------------------------------------
+//
+// For schema version 2.x:
+//----------------------------------------------------------
+// schema_version <ver>
+// <timestamp>
+// <seq number>
+// [<field name> <field data>]
+// ...
+// <number of files>
+// <file1>( <field name> <field data no spaces>)*
+// <file2>( <field name> <field data no spaces>)*
+// ...
+// [// FOOTER]
+// [<field name> <field data>]
+// ...
+//----------------------------------------------------------
+// where
+// <ver> ::= [0-9]+([.][0-9]+)
+// <field name> ::= [A-Za-z_][A-Za-z_0-9.]+
+// <field data> is anything but newline
+// <field data no spaces> is anything but space and newline
+// Although "// FOOTER" wouldn't strictly be required as a delimiter
+// given the number of files is included, it is there for parsing
+// sanity in case of corruption. It is only required if followed
+// by footer fields, such as a checksum of the meta file (so far).
+// Unrecognized fields are ignored, to support schema evolution on
+// non-critical features with forward compatibility. Update schema
+// major version for breaking changes. Schema minor versions are indicated
+// only for diagnostic/debugging purposes.
+//
+// Fields in schema version 2.0:
+// * Top-level meta fields:
+//   * Only "metadata" as in schema version 1
+// * File meta fields:
+//   * "crc32" - a crc32c checksum as in schema version 1
+//   * "size" - the size of the file (new)
+// * Footer meta fields:
+//   * None yet (future use for meta file checksum anticipated)
+//
+IOStatus BackupEngineImpl::BackupMeta::LoadFromFile(
+    const std::string& backup_dir,
+    const std::unordered_map<std::string, uint64_t>& abs_path_to_size,
+    RateLimiter* rate_limiter, Logger* info_log,
+    std::unordered_set<std::string>* reported_ignored_fields) {
+  assert(reported_ignored_fields);
+  assert(Empty());
+
+  std::unique_ptr<LineFileReader> backup_meta_reader;
+  {
+    IOStatus io_s = LineFileReader::Create(fs_, meta_filename_, FileOptions(),
+                                           &backup_meta_reader,
+                                           nullptr /* dbg */, rate_limiter);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+  }
+
+  // If we don't read an explicit schema_version, that implies version 1,
+  // which is what we call the original backup meta schema.
+  int schema_major_version = 1;
+
+  // Failures handled at the end
+  std::string line;
+  if (backup_meta_reader->ReadLine(&line,
+                                   Env::IO_LOW /* rate_limiter_priority */)) {
+    if (StartsWith(line, kSchemaVersionPrefix)) {
+      std::string ver = line.substr(kSchemaVersionPrefix.size());
+      if (ver == "2" || StartsWith(ver, "2.")) {
+        schema_major_version = 2;
+      } else {
+        return IOStatus::NotSupported(
+            "Unsupported/unrecognized schema version: " + ver);
+      }
+      line.clear();
+    } else if (line.empty()) {
+      return IOStatus::Corruption("Unexpected empty line");
+    }
+  }
+  if (!line.empty()) {
+    timestamp_ = std::strtoull(line.c_str(), nullptr, /*base*/ 10);
+  } else if (backup_meta_reader->ReadLine(
+                 &line, Env::IO_LOW /* rate_limiter_priority */)) {
+    timestamp_ = std::strtoull(line.c_str(), nullptr, /*base*/ 10);
+  }
+  if (backup_meta_reader->ReadLine(&line,
+                                   Env::IO_LOW /* rate_limiter_priority */)) {
+    sequence_number_ = std::strtoull(line.c_str(), nullptr, /*base*/ 10);
+  }
+  uint32_t num_files = UINT32_MAX;
+  while (backup_meta_reader->ReadLine(
+      &line, Env::IO_LOW /* rate_limiter_priority */)) {
+    if (line.empty()) {
+      return IOStatus::Corruption("Unexpected empty line");
+    }
+    // Number -> number of files -> exit loop reading optional meta fields
+    if (line[0] >= '0' && line[0] <= '9') {
+      num_files = static_cast<uint32_t>(strtoul(line.c_str(), nullptr, 10));
+      break;
+    }
+    // else, must be a meta field assignment
+    auto space_pos = line.find_first_of(' ');
+    if (space_pos == std::string::npos) {
+      return IOStatus::Corruption("Expected number of files or meta field");
+    }
+    std::string field_name = line.substr(0, space_pos);
+    std::string field_data = line.substr(space_pos + 1);
+    if (field_name == kAppMetaDataFieldName) {
+      // app metadata present
+      bool decode_success = Slice(field_data).DecodeHex(&app_metadata_);
+      if (!decode_success) {
+        return IOStatus::Corruption(
+            "Failed to decode stored hex encoded app metadata");
+      }
+    } else if (schema_major_version < 2) {
+      return IOStatus::Corruption("Expected number of files or \"" +
+                                  kAppMetaDataFieldName + "\" field");
+    } else if (StartsWith(field_name, kNonIgnorableFieldPrefix)) {
+      return IOStatus::NotSupported("Unrecognized non-ignorable meta field " +
+                                    field_name + " (from future version?)");
+    } else {
+      // Warn the first time we see any particular unrecognized meta field
+      if (reported_ignored_fields->insert("meta:" + field_name).second) {
+        ROCKS_LOG_WARN(info_log, "Ignoring unrecognized backup meta field %s",
+                       field_name.c_str());
+      }
+    }
+  }
+  std::vector<std::shared_ptr<FileInfo>> files;
+  bool footer_present = false;
+  while (backup_meta_reader->ReadLine(
+      &line, Env::IO_LOW /* rate_limiter_priority */)) {
+    std::vector<std::string> components = StringSplit(line, ' ');
+
+    if (components.size() < 1) {
+      return IOStatus::Corruption("Empty line instead of file entry.");
+    }
+    if (schema_major_version >= 2 && components.size() == 2 &&
+        line == kFooterMarker) {
+      footer_present = true;
+      break;
+    }
+
+    const std::string& filename = components[0];
+
+    if (schema_major_version >= 2) {
+      if (components.size() % 2 != 1) {
+        return IOStatus::Corruption(
+            "Bad number of line components for file entry.");
+      }
+    } else {
+      // Check restricted original schema
+      if (components.size() < 3) {
+        return IOStatus::Corruption("File checksum is missing for " + filename +
+                                    " in " + meta_filename_);
+      }
+      if (components[1] != kFileCrc32cFieldName) {
+        return IOStatus::Corruption("Unknown checksum type for " + filename +
+                                    " in " + meta_filename_);
+      }
+      if (components.size() > 3) {
+        return IOStatus::Corruption("Extra data for entry " + filename +
+                                    " in " + meta_filename_);
+      }
+    }
+
+    std::optional<uint64_t> expected_size{};
+    std::string checksum_hex;
+    Temperature temp = Temperature::kUnknown;
+    bool excluded = false;
+    for (unsigned i = 1; i < components.size(); i += 2) {
+      const std::string& field_name = components[i];
+      const std::string& field_data = components[i + 1];
+
+      if (field_name == kFileCrc32cFieldName) {
+        uint32_t checksum_value =
+            static_cast<uint32_t>(strtoul(field_data.c_str(), nullptr, 10));
+        if (field_data != std::to_string(checksum_value)) {
+          return IOStatus::Corruption("Invalid checksum value for " + filename +
+                                      " in " + meta_filename_);
+        }
+        checksum_hex = ChecksumInt32ToHex(checksum_value);
+      } else if (field_name == kFileSizeFieldName) {
+        expected_size = std::strtoull(field_data.c_str(), nullptr, /*base*/ 10);
+      } else if (field_name == kTemperatureFieldName) {
+        auto iter = temperature_string_map.find(field_data);
+        if (iter != temperature_string_map.end()) {
+          temp = iter->second;
+        } else {
+          // Could report corruption, but in case of new temperatures added
+          // in future, letting those map to kUnknown which should generally
+          // be safe.
+          temp = Temperature::kUnknown;
+        }
+      } else if (field_name == kExcludedFieldName) {
+        if (field_data == "true") {
+          excluded = true;
+        } else if (field_data == "false") {
+          excluded = false;
+        } else {
+          return IOStatus::NotSupported("Unrecognized value \"" + field_data +
+                                        "\" for field " + field_name);
+        }
+      } else if (StartsWith(field_name, kNonIgnorableFieldPrefix)) {
+        return IOStatus::NotSupported("Unrecognized non-ignorable file field " +
+                                      field_name + " (from future version?)");
+      } else {
+        // Warn the first time we see any particular unrecognized file field
+        if (reported_ignored_fields->insert("file:" + field_name).second) {
+          ROCKS_LOG_WARN(info_log, "Ignoring unrecognized backup file field %s",
+                         field_name.c_str());
+        }
+      }
+    }
+
+    if (excluded) {
+      excluded_files_.emplace_back(filename);
+    } else {
+      // Verify file exists, with expected size
+      std::string abs_path = backup_dir + "/" + filename;
+      auto e = abs_path_to_size.find(abs_path);
+      if (e == abs_path_to_size.end()) {
+        return IOStatus::Corruption(
+            "Pathname in meta file not found on disk: " + abs_path);
+      }
+      uint64_t actual_size = e->second;
+      if (expected_size.has_value() && *expected_size != actual_size) {
+        return IOStatus::Corruption("For file " + filename + " expected size " +
+                                    std::to_string(*expected_size) +
+                                    " but found size" +
+                                    std::to_string(actual_size));
+      }
+
+      // NOTE: FileInfo will be coalesced for sharing later (AddFile below)
+      files.emplace_back(
+          std::make_shared<FileInfo>(filename, actual_size, checksum_hex,
+                                     /*id*/ "", /*sid*/ "", temp));
+    }
+  }
+
+  if (footer_present) {
+    assert(schema_major_version >= 2);
+    while (backup_meta_reader->ReadLine(
+        &line, Env::IO_LOW /* rate_limiter_priority */)) {
+      if (line.empty()) {
+        return IOStatus::Corruption("Unexpected empty line");
+      }
+      auto space_pos = line.find_first_of(' ');
+      if (space_pos == std::string::npos) {
+        return IOStatus::Corruption("Expected footer field");
+      }
+      std::string field_name = line.substr(0, space_pos);
+      std::string field_data = line.substr(space_pos + 1);
+      if (StartsWith(field_name, kNonIgnorableFieldPrefix)) {
+        return IOStatus::NotSupported("Unrecognized non-ignorable field " +
+                                      field_name + " (from future version?)");
+      } else if (reported_ignored_fields->insert("footer:" + field_name)
+                     .second) {
+        // Warn the first time we see any particular unrecognized footer field
+        ROCKS_LOG_WARN(info_log,
+                       "Ignoring unrecognized backup meta footer field %s",
+                       field_name.c_str());
+      }
+    }
+  }
+
+  {
+    IOStatus io_s = backup_meta_reader->GetStatus();
+    if (!io_s.ok()) {
+      return io_s;
+    }
+  }
+
+  if (num_files != files.size()) {
+    return IOStatus::Corruption(
+        "Inconsistent number of files or missing/incomplete header in " +
+        meta_filename_);
+  }
+
+  files_.reserve(files.size());
+  for (const auto& file_info : files) {
+    IOStatus io_s = AddFile(file_info);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+  }
+
+  return IOStatus::OK();
+}
+
+const std::vector<std::string> minor_version_strings{
+    "",  // invalid major version 0
+    "",  // implicit major version 1
+    "2.1",
+};
+
+IOStatus BackupEngineImpl::BackupMeta::StoreToFile(
+    bool sync, int schema_version,
+    const TEST_BackupMetaSchemaOptions* schema_test_options) {
+  if (schema_version < 1) {
+    return IOStatus::InvalidArgument(
+        "BackupEngineOptions::schema_version must be >= 1");
+  }
+  if (schema_version > static_cast<int>(minor_version_strings.size() - 1)) {
+    return IOStatus::NotSupported(
+        "Only BackupEngineOptions::schema_version <= " +
+        std::to_string(minor_version_strings.size() - 1) + " is supported");
+  }
+  std::string ver = minor_version_strings[schema_version];
+
+  // Need schema_version >= 2 for TEST_BackupMetaSchemaOptions
+  assert(schema_version >= 2 || schema_test_options == nullptr);
+
+  IOStatus io_s;
+  std::unique_ptr<FSWritableFile> backup_meta_file;
+  FileOptions file_options;
+  file_options.use_mmap_writes = false;
+  file_options.use_direct_writes = false;
+  io_s = fs_->NewWritableFile(meta_tmp_filename_, file_options,
+                              &backup_meta_file, nullptr);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+
+  std::ostringstream buf;
+  if (schema_test_options) {
+    // override for testing
+    ver = schema_test_options->version;
+  }
+  if (!ver.empty()) {
+    assert(schema_version >= 2);
+    buf << kSchemaVersionPrefix << ver << "\n";
+  }
+  buf << static_cast<unsigned long long>(timestamp_) << "\n";
+  buf << sequence_number_ << "\n";
+
+  if (!app_metadata_.empty()) {
+    std::string hex_encoded_metadata =
+        Slice(app_metadata_).ToString(/* hex */ true);
+    buf << kAppMetaDataFieldName << " " << hex_encoded_metadata << "\n";
+  }
+  if (schema_test_options) {
+    for (auto& e : schema_test_options->meta_fields) {
+      buf << e.first << " " << e.second << "\n";
+    }
+  }
+  buf << files_.size() << "\n";
+
+  for (const auto& file : files_) {
+    buf << file->filename;
+    if (schema_test_options == nullptr ||
+        schema_test_options->crc32c_checksums) {
+      // use crc32c for now, switch to something else if needed
+      buf << " " << kFileCrc32cFieldName << " "
+          << ChecksumHexToInt32(file->checksum_hex);
+    }
+    if (schema_version >= 2 && file->temp != Temperature::kUnknown) {
+      buf << " " << kTemperatureFieldName << " "
+          << temperature_to_string[file->temp];
+    }
+    if (schema_test_options && schema_test_options->file_sizes) {
+      buf << " " << kFileSizeFieldName << " " << std::to_string(file->size);
+    }
+    if (schema_test_options) {
+      for (auto& e : schema_test_options->file_fields) {
+        buf << " " << e.first << " " << e.second;
+      }
+    }
+    buf << "\n";
+  }
+
+  for (const auto& file : excluded_files_) {
+    assert(schema_version >= 2);
+    buf << file.relative_file << " " << kExcludedFieldName << " true\n";
+  }
+
+  if (schema_test_options && !schema_test_options->footer_fields.empty()) {
+    buf << kFooterMarker << "\n";
+    for (auto& e : schema_test_options->footer_fields) {
+      buf << e.first << " " << e.second << "\n";
+    }
+  }
+
+  io_s = backup_meta_file->Append(Slice(buf.str()), iooptions_, nullptr);
+  IOSTATS_ADD(bytes_written, buf.str().size());
+  if (io_s.ok() && sync) {
+    io_s = backup_meta_file->Sync(iooptions_, nullptr);
+  }
+  if (io_s.ok()) {
+    io_s = backup_meta_file->Close(iooptions_, nullptr);
+  }
+  if (io_s.ok()) {
+    io_s = fs_->RenameFile(meta_tmp_filename_, meta_filename_, iooptions_,
+                           nullptr);
+  }
+  return io_s;
+}
+}  // namespace
+
+IOStatus BackupEngineReadOnly::Open(const BackupEngineOptions& options,
+                                    Env* env,
+                                    BackupEngineReadOnly** backup_engine_ptr) {
+  if (options.destroy_old_data) {
+    return IOStatus::InvalidArgument(
+        "Can't destroy old data with ReadOnly BackupEngine");
+  }
+  std::unique_ptr<BackupEngineImplThreadSafe> backup_engine(
+      new BackupEngineImplThreadSafe(options, env, true /*read_only*/));
+  auto s = backup_engine->Initialize();
+  if (!s.ok()) {
+    *backup_engine_ptr = nullptr;
+    return s;
+  }
+  *backup_engine_ptr = backup_engine.release();
+  return IOStatus::OK();
+}
+
+void TEST_SetBackupMetaSchemaOptions(
+    BackupEngine* engine, const TEST_BackupMetaSchemaOptions& options) {
+  BackupEngineImplThreadSafe* impl =
+      static_cast_with_check<BackupEngineImplThreadSafe>(engine);
+  impl->TEST_SetBackupMetaSchemaOptions(options);
+}
+
+void TEST_SetDefaultRateLimitersClock(
+    BackupEngine* engine,
+    const std::shared_ptr<SystemClock>& backup_rate_limiter_clock,
+    const std::shared_ptr<SystemClock>& restore_rate_limiter_clock) {
+  BackupEngineImplThreadSafe* impl =
+      static_cast_with_check<BackupEngineImplThreadSafe>(engine);
+  impl->TEST_SetDefaultRateLimitersClock(backup_rate_limiter_clock,
+                                         restore_rate_limiter_clock);
+}
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/backup/backup_engine_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/backup/backup_engine_impl.h
new file mode 100644
index 0000000000..a764b34d9b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/backup/backup_engine_impl.h
@@ -0,0 +1,34 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/utilities/backup_engine.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct TEST_BackupMetaSchemaOptions {
+  std::string version = "2";
+  bool crc32c_checksums = false;
+  bool file_sizes = true;
+  std::map<std::string, std::string> meta_fields;
+  std::map<std::string, std::string> file_fields;
+  std::map<std::string, std::string> footer_fields;
+};
+
+// Modifies the BackupEngine(Impl) to write backup meta files using the
+// unpublished schema version 2, for the life of this object (not backup_dir).
+// TEST_BackupMetaSchemaOptions offers some customization for testing.
+void TEST_SetBackupMetaSchemaOptions(
+    BackupEngine* engine, const TEST_BackupMetaSchemaOptions& options);
+
+// Modifies the BackupEngine(Impl) to use specified clocks for backup and
+// restore rate limiters created by default if not specified by users for
+// test speedup.
+void TEST_SetDefaultRateLimitersClock(
+    BackupEngine* engine,
+    const std::shared_ptr<SystemClock>& backup_rate_limiter_clock = nullptr,
+    const std::shared_ptr<SystemClock>& restore_rate_limiter_clock = nullptr);
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_compaction_filter.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_compaction_filter.cc
new file mode 100644
index 0000000000..ddaa98c7d3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_compaction_filter.cc
@@ -0,0 +1,488 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/blob_db/blob_compaction_filter.h"
+
+#include <cinttypes>
+
+#include "db/dbformat.h"
+#include "logging/logging.h"
+#include "rocksdb/system_clock.h"
+#include "test_util/sync_point.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace blob_db {
+
+BlobIndexCompactionFilterBase::~BlobIndexCompactionFilterBase() {
+  if (blob_file_) {
+    CloseAndRegisterNewBlobFile();
+  }
+  RecordTick(statistics_, BLOB_DB_BLOB_INDEX_EXPIRED_COUNT, expired_count_);
+  RecordTick(statistics_, BLOB_DB_BLOB_INDEX_EXPIRED_SIZE, expired_size_);
+  RecordTick(statistics_, BLOB_DB_BLOB_INDEX_EVICTED_COUNT, evicted_count_);
+  RecordTick(statistics_, BLOB_DB_BLOB_INDEX_EVICTED_SIZE, evicted_size_);
+}
+
+CompactionFilter::Decision BlobIndexCompactionFilterBase::FilterV2(
+    int level, const Slice& key, ValueType value_type, const Slice& value,
+    std::string* new_value, std::string* skip_until) const {
+  const CompactionFilter* ucf = user_comp_filter();
+  if (value_type != kBlobIndex) {
+    if (ucf == nullptr) {
+      return Decision::kKeep;
+    }
+    // Apply user compaction filter for inlined data.
+    CompactionFilter::Decision decision =
+        ucf->FilterV2(level, key, value_type, value, new_value, skip_until);
+    if (decision == Decision::kChangeValue) {
+      return HandleValueChange(key, new_value);
+    }
+    return decision;
+  }
+  BlobIndex blob_index;
+  Status s = blob_index.DecodeFrom(value);
+  if (!s.ok()) {
+    // Unable to decode blob index. Keeping the value.
+    return Decision::kKeep;
+  }
+  if (blob_index.HasTTL() && blob_index.expiration() <= current_time_) {
+    // Expired
+    expired_count_++;
+    expired_size_ += key.size() + value.size();
+    return Decision::kRemove;
+  }
+  if (!blob_index.IsInlined() &&
+      blob_index.file_number() < context_.next_file_number &&
+      context_.current_blob_files.count(blob_index.file_number()) == 0) {
+    // Corresponding blob file gone (most likely, evicted by FIFO eviction).
+    evicted_count_++;
+    evicted_size_ += key.size() + value.size();
+    return Decision::kRemove;
+  }
+  if (context_.fifo_eviction_seq > 0 && blob_index.HasTTL() &&
+      blob_index.expiration() < context_.evict_expiration_up_to) {
+    // Hack: Internal key is passed to BlobIndexCompactionFilter for it to
+    // get sequence number.
+    ParsedInternalKey ikey;
+    if (!ParseInternalKey(
+             key, &ikey,
+             context_.blob_db_impl->db_options_.allow_data_in_errors)
+             .ok()) {
+      assert(false);
+      return Decision::kKeep;
+    }
+    // Remove keys that could have been remove by last FIFO eviction.
+    // If get error while parsing key, ignore and continue.
+    if (ikey.sequence < context_.fifo_eviction_seq) {
+      evicted_count_++;
+      evicted_size_ += key.size() + value.size();
+      return Decision::kRemove;
+    }
+  }
+  // Apply user compaction filter for all non-TTL blob data.
+  if (ucf != nullptr && !blob_index.HasTTL()) {
+    // Hack: Internal key is passed to BlobIndexCompactionFilter for it to
+    // get sequence number.
+    ParsedInternalKey ikey;
+    if (!ParseInternalKey(
+             key, &ikey,
+             context_.blob_db_impl->db_options_.allow_data_in_errors)
+             .ok()) {
+      assert(false);
+      return Decision::kKeep;
+    }
+    // Read value from blob file.
+    PinnableSlice blob;
+    CompressionType compression_type = kNoCompression;
+    constexpr bool need_decompress = true;
+    if (!ReadBlobFromOldFile(ikey.user_key, blob_index, &blob, need_decompress,
+                             &compression_type)) {
+      return Decision::kIOError;
+    }
+    CompactionFilter::Decision decision = ucf->FilterV2(
+        level, ikey.user_key, kValue, blob, new_value, skip_until);
+    if (decision == Decision::kChangeValue) {
+      return HandleValueChange(ikey.user_key, new_value);
+    }
+    return decision;
+  }
+  return Decision::kKeep;
+}
+
+CompactionFilter::Decision BlobIndexCompactionFilterBase::HandleValueChange(
+    const Slice& key, std::string* new_value) const {
+  BlobDBImpl* const blob_db_impl = context_.blob_db_impl;
+  assert(blob_db_impl);
+
+  if (new_value->size() < blob_db_impl->bdb_options_.min_blob_size) {
+    // Keep new_value inlined.
+    return Decision::kChangeValue;
+  }
+  if (!OpenNewBlobFileIfNeeded()) {
+    return Decision::kIOError;
+  }
+  Slice new_blob_value(*new_value);
+  std::string compression_output;
+  if (blob_db_impl->bdb_options_.compression != kNoCompression) {
+    new_blob_value =
+        blob_db_impl->GetCompressedSlice(new_blob_value, &compression_output);
+  }
+  uint64_t new_blob_file_number = 0;
+  uint64_t new_blob_offset = 0;
+  if (!WriteBlobToNewFile(key, new_blob_value, &new_blob_file_number,
+                          &new_blob_offset)) {
+    return Decision::kIOError;
+  }
+  if (!CloseAndRegisterNewBlobFileIfNeeded()) {
+    return Decision::kIOError;
+  }
+  BlobIndex::EncodeBlob(new_value, new_blob_file_number, new_blob_offset,
+                        new_blob_value.size(),
+                        blob_db_impl->bdb_options_.compression);
+  return Decision::kChangeBlobIndex;
+}
+
+BlobIndexCompactionFilterGC::~BlobIndexCompactionFilterGC() {
+  assert(context().blob_db_impl);
+
+  ROCKS_LOG_INFO(context().blob_db_impl->db_options_.info_log,
+                 "GC pass finished %s: encountered %" PRIu64 " blobs (%" PRIu64
+                 " bytes), relocated %" PRIu64 " blobs (%" PRIu64
+                 " bytes), created %" PRIu64 " new blob file(s)",
+                 !gc_stats_.HasError() ? "successfully" : "with failure",
+                 gc_stats_.AllBlobs(), gc_stats_.AllBytes(),
+                 gc_stats_.RelocatedBlobs(), gc_stats_.RelocatedBytes(),
+                 gc_stats_.NewFiles());
+
+  RecordTick(statistics(), BLOB_DB_GC_NUM_KEYS_RELOCATED,
+             gc_stats_.RelocatedBlobs());
+  RecordTick(statistics(), BLOB_DB_GC_BYTES_RELOCATED,
+             gc_stats_.RelocatedBytes());
+  RecordTick(statistics(), BLOB_DB_GC_NUM_NEW_FILES, gc_stats_.NewFiles());
+  RecordTick(statistics(), BLOB_DB_GC_FAILURES, gc_stats_.HasError());
+}
+
+bool BlobIndexCompactionFilterBase::IsBlobFileOpened() const {
+  if (blob_file_) {
+    assert(writer_);
+    return true;
+  }
+  return false;
+}
+
+bool BlobIndexCompactionFilterBase::OpenNewBlobFileIfNeeded() const {
+  if (IsBlobFileOpened()) {
+    return true;
+  }
+
+  BlobDBImpl* const blob_db_impl = context_.blob_db_impl;
+  assert(blob_db_impl);
+
+  const Status s = blob_db_impl->CreateBlobFileAndWriter(
+      /* has_ttl */ false, ExpirationRange(), "compaction/GC", &blob_file_,
+      &writer_);
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(
+        blob_db_impl->db_options_.info_log,
+        "Error opening new blob file during compaction/GC, status: %s",
+        s.ToString().c_str());
+    blob_file_.reset();
+    writer_.reset();
+    return false;
+  }
+
+  assert(blob_file_);
+  assert(writer_);
+
+  return true;
+}
+
+bool BlobIndexCompactionFilterBase::ReadBlobFromOldFile(
+    const Slice& key, const BlobIndex& blob_index, PinnableSlice* blob,
+    bool need_decompress, CompressionType* compression_type) const {
+  BlobDBImpl* const blob_db_impl = context_.blob_db_impl;
+  assert(blob_db_impl);
+
+  Status s = blob_db_impl->GetRawBlobFromFile(
+      key, blob_index.file_number(), blob_index.offset(), blob_index.size(),
+      blob, compression_type);
+
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(
+        blob_db_impl->db_options_.info_log,
+        "Error reading blob during compaction/GC, key: %s (%s), status: %s",
+        key.ToString(/* output_hex */ true).c_str(),
+        blob_index.DebugString(/* output_hex */ true).c_str(),
+        s.ToString().c_str());
+
+    return false;
+  }
+
+  if (need_decompress && *compression_type != kNoCompression) {
+    s = blob_db_impl->DecompressSlice(*blob, *compression_type, blob);
+    if (!s.ok()) {
+      ROCKS_LOG_ERROR(
+          blob_db_impl->db_options_.info_log,
+          "Uncompression error during blob read from file: %" PRIu64
+          " blob_offset: %" PRIu64 " blob_size: %" PRIu64
+          " key: %s status: '%s'",
+          blob_index.file_number(), blob_index.offset(), blob_index.size(),
+          key.ToString(/* output_hex */ true).c_str(), s.ToString().c_str());
+
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool BlobIndexCompactionFilterBase::WriteBlobToNewFile(
+    const Slice& key, const Slice& blob, uint64_t* new_blob_file_number,
+    uint64_t* new_blob_offset) const {
+  TEST_SYNC_POINT("BlobIndexCompactionFilterBase::WriteBlobToNewFile");
+  assert(new_blob_file_number);
+  assert(new_blob_offset);
+
+  assert(blob_file_);
+  *new_blob_file_number = blob_file_->BlobFileNumber();
+
+  assert(writer_);
+  uint64_t new_key_offset = 0;
+  const Status s = writer_->AddRecord(key, blob, kNoExpiration, &new_key_offset,
+                                      new_blob_offset);
+
+  if (!s.ok()) {
+    const BlobDBImpl* const blob_db_impl = context_.blob_db_impl;
+    assert(blob_db_impl);
+
+    ROCKS_LOG_ERROR(blob_db_impl->db_options_.info_log,
+                    "Error writing blob to new file %s during compaction/GC, "
+                    "key: %s, status: %s",
+                    blob_file_->PathName().c_str(),
+                    key.ToString(/* output_hex */ true).c_str(),
+                    s.ToString().c_str());
+    return false;
+  }
+
+  const uint64_t new_size =
+      BlobLogRecord::kHeaderSize + key.size() + blob.size();
+  blob_file_->BlobRecordAdded(new_size);
+
+  BlobDBImpl* const blob_db_impl = context_.blob_db_impl;
+  assert(blob_db_impl);
+
+  blob_db_impl->total_blob_size_ += new_size;
+
+  return true;
+}
+
+bool BlobIndexCompactionFilterBase::CloseAndRegisterNewBlobFileIfNeeded()
+    const {
+  const BlobDBImpl* const blob_db_impl = context_.blob_db_impl;
+  assert(blob_db_impl);
+
+  assert(blob_file_);
+  if (blob_file_->GetFileSize() < blob_db_impl->bdb_options_.blob_file_size) {
+    return true;
+  }
+
+  return CloseAndRegisterNewBlobFile();
+}
+
+bool BlobIndexCompactionFilterBase::CloseAndRegisterNewBlobFile() const {
+  BlobDBImpl* const blob_db_impl = context_.blob_db_impl;
+  assert(blob_db_impl);
+  assert(blob_file_);
+
+  Status s;
+
+  {
+    WriteLock wl(&blob_db_impl->mutex_);
+
+    s = blob_db_impl->CloseBlobFile(blob_file_);
+
+    // Note: we delay registering the new blob file until it's closed to
+    // prevent FIFO eviction from processing it during compaction/GC.
+    blob_db_impl->RegisterBlobFile(blob_file_);
+  }
+
+  assert(blob_file_->Immutable());
+
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(
+        blob_db_impl->db_options_.info_log,
+        "Error closing new blob file %s during compaction/GC, status: %s",
+        blob_file_->PathName().c_str(), s.ToString().c_str());
+  }
+
+  blob_file_.reset();
+  return s.ok();
+}
+
+CompactionFilter::BlobDecision BlobIndexCompactionFilterGC::PrepareBlobOutput(
+    const Slice& key, const Slice& existing_value,
+    std::string* new_value) const {
+  assert(new_value);
+
+  const BlobDBImpl* const blob_db_impl = context().blob_db_impl;
+  (void)blob_db_impl;
+
+  assert(blob_db_impl);
+  assert(blob_db_impl->bdb_options_.enable_garbage_collection);
+
+  BlobIndex blob_index;
+  const Status s = blob_index.DecodeFrom(existing_value);
+  if (!s.ok()) {
+    gc_stats_.SetError();
+    return BlobDecision::kCorruption;
+  }
+
+  if (blob_index.IsInlined()) {
+    gc_stats_.AddBlob(blob_index.value().size());
+
+    return BlobDecision::kKeep;
+  }
+
+  gc_stats_.AddBlob(blob_index.size());
+
+  if (blob_index.HasTTL()) {
+    return BlobDecision::kKeep;
+  }
+
+  if (blob_index.file_number() >= context_gc_.cutoff_file_number) {
+    return BlobDecision::kKeep;
+  }
+
+  // Note: each compaction generates its own blob files, which, depending on the
+  // workload, might result in many small blob files. The total number of files
+  // is bounded though (determined by the number of compactions and the blob
+  // file size option).
+  if (!OpenNewBlobFileIfNeeded()) {
+    gc_stats_.SetError();
+    return BlobDecision::kIOError;
+  }
+
+  PinnableSlice blob;
+  CompressionType compression_type = kNoCompression;
+  std::string compression_output;
+  if (!ReadBlobFromOldFile(key, blob_index, &blob, false, &compression_type)) {
+    gc_stats_.SetError();
+    return BlobDecision::kIOError;
+  }
+
+  // If the compression_type is changed, re-compress it with the new compression
+  // type.
+  if (compression_type != blob_db_impl->bdb_options_.compression) {
+    if (compression_type != kNoCompression) {
+      const Status status =
+          blob_db_impl->DecompressSlice(blob, compression_type, &blob);
+      if (!status.ok()) {
+        gc_stats_.SetError();
+        return BlobDecision::kCorruption;
+      }
+    }
+    if (blob_db_impl->bdb_options_.compression != kNoCompression) {
+      blob_db_impl->GetCompressedSlice(blob, &compression_output);
+      blob = PinnableSlice(&compression_output);
+      blob.PinSelf();
+    }
+  }
+
+  uint64_t new_blob_file_number = 0;
+  uint64_t new_blob_offset = 0;
+  if (!WriteBlobToNewFile(key, blob, &new_blob_file_number, &new_blob_offset)) {
+    gc_stats_.SetError();
+    return BlobDecision::kIOError;
+  }
+
+  if (!CloseAndRegisterNewBlobFileIfNeeded()) {
+    gc_stats_.SetError();
+    return BlobDecision::kIOError;
+  }
+
+  BlobIndex::EncodeBlob(new_value, new_blob_file_number, new_blob_offset,
+                        blob.size(), compression_type);
+
+  gc_stats_.AddRelocatedBlob(blob_index.size());
+
+  return BlobDecision::kChangeValue;
+}
+
+bool BlobIndexCompactionFilterGC::OpenNewBlobFileIfNeeded() const {
+  if (IsBlobFileOpened()) {
+    return true;
+  }
+  bool result = BlobIndexCompactionFilterBase::OpenNewBlobFileIfNeeded();
+  if (result) {
+    gc_stats_.AddNewFile();
+  }
+  return result;
+}
+
+std::unique_ptr<CompactionFilter>
+BlobIndexCompactionFilterFactoryBase::CreateUserCompactionFilterFromFactory(
+    const CompactionFilter::Context& context) const {
+  std::unique_ptr<CompactionFilter> user_comp_filter_from_factory;
+  if (user_comp_filter_factory_) {
+    user_comp_filter_from_factory =
+        user_comp_filter_factory_->CreateCompactionFilter(context);
+  }
+  return user_comp_filter_from_factory;
+}
+
+std::unique_ptr<CompactionFilter>
+BlobIndexCompactionFilterFactory::CreateCompactionFilter(
+    const CompactionFilter::Context& _context) {
+  assert(clock());
+
+  int64_t current_time = 0;
+  Status s = clock()->GetCurrentTime(&current_time);
+  if (!s.ok()) {
+    return nullptr;
+  }
+  assert(current_time >= 0);
+
+  assert(blob_db_impl());
+
+  BlobCompactionContext context;
+  blob_db_impl()->GetCompactionContext(&context);
+
+  std::unique_ptr<CompactionFilter> user_comp_filter_from_factory =
+      CreateUserCompactionFilterFromFactory(_context);
+
+  return std::unique_ptr<CompactionFilter>(new BlobIndexCompactionFilter(
+      std::move(context), user_comp_filter(),
+      std::move(user_comp_filter_from_factory), current_time, statistics()));
+}
+
+std::unique_ptr<CompactionFilter>
+BlobIndexCompactionFilterFactoryGC::CreateCompactionFilter(
+    const CompactionFilter::Context& _context) {
+  assert(clock());
+
+  int64_t current_time = 0;
+  Status s = clock()->GetCurrentTime(&current_time);
+  if (!s.ok()) {
+    return nullptr;
+  }
+  assert(current_time >= 0);
+
+  assert(blob_db_impl());
+
+  BlobCompactionContext context;
+  BlobCompactionContextGC context_gc;
+  blob_db_impl()->GetCompactionContext(&context, &context_gc);
+
+  std::unique_ptr<CompactionFilter> user_comp_filter_from_factory =
+      CreateUserCompactionFilterFromFactory(_context);
+
+  return std::unique_ptr<CompactionFilter>(new BlobIndexCompactionFilterGC(
+      std::move(context), std::move(context_gc), user_comp_filter(),
+      std::move(user_comp_filter_from_factory), current_time, statistics()));
+}
+
+}  // namespace blob_db
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_compaction_filter.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_compaction_filter.h
new file mode 100644
index 0000000000..cb83d0d034
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_compaction_filter.h
@@ -0,0 +1,202 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <unordered_set>
+
+#include "db/blob/blob_index.h"
+#include "monitoring/statistics_impl.h"
+#include "rocksdb/compaction_filter.h"
+#include "utilities/blob_db/blob_db_gc_stats.h"
+#include "utilities/blob_db/blob_db_impl.h"
+#include "utilities/compaction_filters/layered_compaction_filter_base.h"
+
+namespace ROCKSDB_NAMESPACE {
+class SystemClock;
+namespace blob_db {
+
+struct BlobCompactionContext {
+  BlobDBImpl* blob_db_impl = nullptr;
+  uint64_t next_file_number = 0;
+  std::unordered_set<uint64_t> current_blob_files;
+  SequenceNumber fifo_eviction_seq = 0;
+  uint64_t evict_expiration_up_to = 0;
+};
+
+struct BlobCompactionContextGC {
+  uint64_t cutoff_file_number = 0;
+};
+
+// Compaction filter that deletes expired blob indexes from the base DB.
+// Comes into two varieties, one for the non-GC case and one for the GC case.
+class BlobIndexCompactionFilterBase : public LayeredCompactionFilterBase {
+ public:
+  BlobIndexCompactionFilterBase(
+      BlobCompactionContext&& _context,
+      const CompactionFilter* _user_comp_filter,
+      std::unique_ptr<const CompactionFilter> _user_comp_filter_from_factory,
+      uint64_t current_time, Statistics* stats)
+      : LayeredCompactionFilterBase(_user_comp_filter,
+                                    std::move(_user_comp_filter_from_factory)),
+        context_(std::move(_context)),
+        current_time_(current_time),
+        statistics_(stats) {}
+
+  ~BlobIndexCompactionFilterBase() override;
+
+  // Filter expired blob indexes regardless of snapshots.
+  bool IgnoreSnapshots() const override { return true; }
+
+  Decision FilterV2(int level, const Slice& key, ValueType value_type,
+                    const Slice& value, std::string* new_value,
+                    std::string* skip_until) const override;
+
+  bool IsStackedBlobDbInternalCompactionFilter() const override { return true; }
+
+ protected:
+  bool IsBlobFileOpened() const;
+  virtual bool OpenNewBlobFileIfNeeded() const;
+  bool ReadBlobFromOldFile(const Slice& key, const BlobIndex& blob_index,
+                           PinnableSlice* blob, bool need_decompress,
+                           CompressionType* compression_type) const;
+  bool WriteBlobToNewFile(const Slice& key, const Slice& blob,
+                          uint64_t* new_blob_file_number,
+                          uint64_t* new_blob_offset) const;
+  bool CloseAndRegisterNewBlobFileIfNeeded() const;
+  bool CloseAndRegisterNewBlobFile() const;
+
+  Statistics* statistics() const { return statistics_; }
+  const BlobCompactionContext& context() const { return context_; }
+
+ private:
+  Decision HandleValueChange(const Slice& key, std::string* new_value) const;
+
+ private:
+  BlobCompactionContext context_;
+  const uint64_t current_time_;
+  Statistics* statistics_;
+
+  mutable std::shared_ptr<BlobFile> blob_file_;
+  mutable std::shared_ptr<BlobLogWriter> writer_;
+
+  // It is safe to not using std::atomic since the compaction filter, created
+  // from a compaction filter factroy, will not be called from multiple threads.
+  mutable uint64_t expired_count_ = 0;
+  mutable uint64_t expired_size_ = 0;
+  mutable uint64_t evicted_count_ = 0;
+  mutable uint64_t evicted_size_ = 0;
+};
+
+class BlobIndexCompactionFilter : public BlobIndexCompactionFilterBase {
+ public:
+  BlobIndexCompactionFilter(
+      BlobCompactionContext&& _context,
+      const CompactionFilter* _user_comp_filter,
+      std::unique_ptr<const CompactionFilter> _user_comp_filter_from_factory,
+      uint64_t current_time, Statistics* stats)
+      : BlobIndexCompactionFilterBase(std::move(_context), _user_comp_filter,
+                                      std::move(_user_comp_filter_from_factory),
+                                      current_time, stats) {}
+
+  const char* Name() const override { return "BlobIndexCompactionFilter"; }
+};
+
+class BlobIndexCompactionFilterGC : public BlobIndexCompactionFilterBase {
+ public:
+  BlobIndexCompactionFilterGC(
+      BlobCompactionContext&& _context, BlobCompactionContextGC&& context_gc,
+      const CompactionFilter* _user_comp_filter,
+      std::unique_ptr<const CompactionFilter> _user_comp_filter_from_factory,
+      uint64_t current_time, Statistics* stats)
+      : BlobIndexCompactionFilterBase(std::move(_context), _user_comp_filter,
+                                      std::move(_user_comp_filter_from_factory),
+                                      current_time, stats),
+        context_gc_(std::move(context_gc)) {}
+
+  ~BlobIndexCompactionFilterGC() override;
+
+  const char* Name() const override { return "BlobIndexCompactionFilterGC"; }
+
+  BlobDecision PrepareBlobOutput(const Slice& key, const Slice& existing_value,
+                                 std::string* new_value) const override;
+
+ private:
+  bool OpenNewBlobFileIfNeeded() const override;
+
+ private:
+  BlobCompactionContextGC context_gc_;
+  mutable BlobDBGarbageCollectionStats gc_stats_;
+};
+
+// Compaction filter factory; similarly to the filters above, it comes
+// in two flavors, one that creates filters that support GC, and one
+// that creates non-GC filters.
+class BlobIndexCompactionFilterFactoryBase : public CompactionFilterFactory {
+ public:
+  BlobIndexCompactionFilterFactoryBase(BlobDBImpl* _blob_db_impl,
+                                       SystemClock* _clock,
+                                       const ColumnFamilyOptions& _cf_options,
+                                       Statistics* _statistics)
+      : blob_db_impl_(_blob_db_impl),
+        clock_(_clock),
+        statistics_(_statistics),
+        user_comp_filter_(_cf_options.compaction_filter),
+        user_comp_filter_factory_(_cf_options.compaction_filter_factory) {}
+
+ protected:
+  std::unique_ptr<CompactionFilter> CreateUserCompactionFilterFromFactory(
+      const CompactionFilter::Context& context) const;
+
+  BlobDBImpl* blob_db_impl() const { return blob_db_impl_; }
+  SystemClock* clock() const { return clock_; }
+  Statistics* statistics() const { return statistics_; }
+  const CompactionFilter* user_comp_filter() const { return user_comp_filter_; }
+
+ private:
+  BlobDBImpl* blob_db_impl_;
+  SystemClock* clock_;
+  Statistics* statistics_;
+  const CompactionFilter* user_comp_filter_;
+  std::shared_ptr<CompactionFilterFactory> user_comp_filter_factory_;
+};
+
+class BlobIndexCompactionFilterFactory
+    : public BlobIndexCompactionFilterFactoryBase {
+ public:
+  BlobIndexCompactionFilterFactory(BlobDBImpl* _blob_db_impl,
+                                   SystemClock* _clock,
+                                   const ColumnFamilyOptions& _cf_options,
+                                   Statistics* _statistics)
+      : BlobIndexCompactionFilterFactoryBase(_blob_db_impl, _clock, _cf_options,
+                                             _statistics) {}
+
+  const char* Name() const override {
+    return "BlobIndexCompactionFilterFactory";
+  }
+
+  std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+      const CompactionFilter::Context& context) override;
+};
+
+class BlobIndexCompactionFilterFactoryGC
+    : public BlobIndexCompactionFilterFactoryBase {
+ public:
+  BlobIndexCompactionFilterFactoryGC(BlobDBImpl* _blob_db_impl,
+                                     SystemClock* _clock,
+                                     const ColumnFamilyOptions& _cf_options,
+                                     Statistics* _statistics)
+      : BlobIndexCompactionFilterFactoryBase(_blob_db_impl, _clock, _cf_options,
+                                             _statistics) {}
+
+  const char* Name() const override {
+    return "BlobIndexCompactionFilterFactoryGC";
+  }
+
+  std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+      const CompactionFilter::Context& context) override;
+};
+
+}  // namespace blob_db
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db.cc
new file mode 100644
index 0000000000..b6fe039036
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db.cc
@@ -0,0 +1,112 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include "utilities/blob_db/blob_db.h"
+
+#include <cinttypes>
+
+#include "logging/logging.h"
+#include "utilities/blob_db/blob_db_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace blob_db {
+
+Status BlobDB::Open(const Options& options, const BlobDBOptions& bdb_options,
+                    const std::string& dbname, BlobDB** blob_db) {
+  *blob_db = nullptr;
+  DBOptions db_options(options);
+  ColumnFamilyOptions cf_options(options);
+  std::vector<ColumnFamilyDescriptor> column_families;
+  column_families.push_back(
+      ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options));
+  std::vector<ColumnFamilyHandle*> handles;
+  Status s = BlobDB::Open(db_options, bdb_options, dbname, column_families,
+                          &handles, blob_db);
+  if (s.ok()) {
+    assert(handles.size() == 1);
+    // i can delete the handle since DBImpl is always holding a reference to
+    // default column family
+    delete handles[0];
+  }
+  return s;
+}
+
+Status BlobDB::Open(const DBOptions& db_options,
+                    const BlobDBOptions& bdb_options, const std::string& dbname,
+                    const std::vector<ColumnFamilyDescriptor>& column_families,
+                    std::vector<ColumnFamilyHandle*>* handles,
+                    BlobDB** blob_db) {
+  assert(handles);
+
+  if (column_families.size() != 1 ||
+      column_families[0].name != kDefaultColumnFamilyName) {
+    return Status::NotSupported(
+        "Blob DB doesn't support non-default column family.");
+  }
+
+  BlobDBImpl* blob_db_impl = new BlobDBImpl(dbname, bdb_options, db_options,
+                                            column_families[0].options);
+  Status s = blob_db_impl->Open(handles);
+  if (s.ok()) {
+    *blob_db = static_cast<BlobDB*>(blob_db_impl);
+  } else {
+    if (!handles->empty()) {
+      for (ColumnFamilyHandle* cfh : *handles) {
+        blob_db_impl->DestroyColumnFamilyHandle(cfh);
+      }
+
+      handles->clear();
+    }
+
+    delete blob_db_impl;
+    *blob_db = nullptr;
+  }
+  return s;
+}
+
+BlobDB::BlobDB() : StackableDB(nullptr) {}
+
+void BlobDBOptions::Dump(Logger* log) const {
+  ROCKS_LOG_HEADER(
+      log, "                                  BlobDBOptions.blob_dir: %s",
+      blob_dir.c_str());
+  ROCKS_LOG_HEADER(
+      log, "                             BlobDBOptions.path_relative: %d",
+      path_relative);
+  ROCKS_LOG_HEADER(
+      log, "                                   BlobDBOptions.is_fifo: %d",
+      is_fifo);
+  ROCKS_LOG_HEADER(
+      log, "                               BlobDBOptions.max_db_size: %" PRIu64,
+      max_db_size);
+  ROCKS_LOG_HEADER(
+      log, "                            BlobDBOptions.ttl_range_secs: %" PRIu64,
+      ttl_range_secs);
+  ROCKS_LOG_HEADER(
+      log, "                             BlobDBOptions.min_blob_size: %" PRIu64,
+      min_blob_size);
+  ROCKS_LOG_HEADER(
+      log, "                            BlobDBOptions.bytes_per_sync: %" PRIu64,
+      bytes_per_sync);
+  ROCKS_LOG_HEADER(
+      log, "                            BlobDBOptions.blob_file_size: %" PRIu64,
+      blob_file_size);
+  ROCKS_LOG_HEADER(
+      log, "                               BlobDBOptions.compression: %d",
+      static_cast<int>(compression));
+  ROCKS_LOG_HEADER(
+      log, "                 BlobDBOptions.enable_garbage_collection: %d",
+      enable_garbage_collection);
+  ROCKS_LOG_HEADER(
+      log, "                 BlobDBOptions.garbage_collection_cutoff: %f",
+      garbage_collection_cutoff);
+  ROCKS_LOG_HEADER(
+      log, "                  BlobDBOptions.disable_background_tasks: %d",
+      disable_background_tasks);
+}
+
+}  // namespace blob_db
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db.h
new file mode 100644
index 0000000000..e2f0b7bdbd
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db.h
@@ -0,0 +1,264 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <functional>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "rocksdb/db.h"
+#include "rocksdb/status.h"
+#include "rocksdb/utilities/stackable_db.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace blob_db {
+
+// A wrapped database which puts values of KV pairs in a separate log
+// and store location to the log in the underlying DB.
+//
+// The factory needs to be moved to include/rocksdb/utilities to allow
+// users to use blob DB.
+
+constexpr uint64_t kNoExpiration = std::numeric_limits<uint64_t>::max();
+
+struct BlobDBOptions {
+  // Name of the directory under the base DB where blobs will be stored. Using
+  // a directory where the base DB stores its SST files is not supported.
+  // Default is "blob_dir"
+  std::string blob_dir = "blob_dir";
+
+  // whether the blob_dir path is relative or absolute.
+  bool path_relative = true;
+
+  // When max_db_size is reached, evict blob files to free up space
+  // instead of returnning NoSpace error on write. Blob files will be
+  // evicted from oldest to newest, based on file creation time.
+  bool is_fifo = false;
+
+  // Maximum size of the database (including SST files and blob files).
+  //
+  // Default: 0 (no limits)
+  uint64_t max_db_size = 0;
+
+  // a new bucket is opened, for ttl_range. So if ttl_range is 600seconds
+  // (10 minutes), and the first bucket starts at 1471542000
+  // then the blob buckets will be
+  // first bucket is 1471542000 - 1471542600
+  // second bucket is 1471542600 - 1471543200
+  // and so on
+  uint64_t ttl_range_secs = 3600;
+
+  // The smallest value to store in blob log. Values smaller than this threshold
+  // will be inlined in base DB together with the key.
+  uint64_t min_blob_size = 0;
+
+  // Allows OS to incrementally sync blob files to disk for every
+  // bytes_per_sync bytes written. Users shouldn't rely on it for
+  // persistency guarantee.
+  uint64_t bytes_per_sync = 512 * 1024;
+
+  // the target size of each blob file. File will become immutable
+  // after it exceeds that size
+  uint64_t blob_file_size = 256 * 1024 * 1024;
+
+  // what compression to use for Blob's
+  CompressionType compression = kNoCompression;
+
+  // If enabled, BlobDB cleans up stale blobs in non-TTL files during compaction
+  // by rewriting the remaining live blobs to new files.
+  bool enable_garbage_collection = false;
+
+  // The cutoff in terms of blob file age for garbage collection. Blobs in
+  // the oldest N non-TTL blob files will be rewritten when encountered during
+  // compaction, where N = garbage_collection_cutoff * number_of_non_TTL_files.
+  double garbage_collection_cutoff = 0.25;
+
+  // Disable all background job. Used for test only.
+  bool disable_background_tasks = false;
+
+  void Dump(Logger* log) const;
+};
+
+class BlobDB : public StackableDB {
+ public:
+  using ROCKSDB_NAMESPACE::StackableDB::Put;
+  virtual Status Put(const WriteOptions& options, const Slice& key,
+                     const Slice& value) override = 0;
+  virtual Status Put(const WriteOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     const Slice& value) override {
+    if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
+      return Status::NotSupported(
+          "Blob DB doesn't support non-default column family.");
+    }
+    return Put(options, key, value);
+  }
+
+  using ROCKSDB_NAMESPACE::StackableDB::Delete;
+  virtual Status Delete(const WriteOptions& options,
+                        ColumnFamilyHandle* column_family,
+                        const Slice& key) override {
+    if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
+      return Status::NotSupported(
+          "Blob DB doesn't support non-default column family.");
+    }
+    assert(db_ != nullptr);
+    return db_->Delete(options, column_family, key);
+  }
+
+  virtual Status PutWithTTL(const WriteOptions& options, const Slice& key,
+                            const Slice& value, uint64_t ttl) = 0;
+  virtual Status PutWithTTL(const WriteOptions& options,
+                            ColumnFamilyHandle* column_family, const Slice& key,
+                            const Slice& value, uint64_t ttl) {
+    if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
+      return Status::NotSupported(
+          "Blob DB doesn't support non-default column family.");
+    }
+    return PutWithTTL(options, key, value, ttl);
+  }
+
+  // Put with expiration. Key with expiration time equal to
+  // std::numeric_limits<uint64_t>::max() means the key don't expire.
+  virtual Status PutUntil(const WriteOptions& options, const Slice& key,
+                          const Slice& value, uint64_t expiration) = 0;
+  virtual Status PutUntil(const WriteOptions& options,
+                          ColumnFamilyHandle* column_family, const Slice& key,
+                          const Slice& value, uint64_t expiration) {
+    if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
+      return Status::NotSupported(
+          "Blob DB doesn't support non-default column family.");
+    }
+    return PutUntil(options, key, value, expiration);
+  }
+
+  using ROCKSDB_NAMESPACE::StackableDB::Get;
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     PinnableSlice* value) override = 0;
+
+  // Get value and expiration.
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     PinnableSlice* value, uint64_t* expiration) = 0;
+  virtual Status Get(const ReadOptions& options, const Slice& key,
+                     PinnableSlice* value, uint64_t* expiration) {
+    return Get(options, DefaultColumnFamily(), key, value, expiration);
+  }
+
+  using ROCKSDB_NAMESPACE::StackableDB::MultiGet;
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& options, const std::vector<Slice>& keys,
+      std::vector<std::string>* values) override = 0;
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_families,
+      const std::vector<Slice>& keys,
+      std::vector<std::string>* values) override {
+    for (auto column_family : column_families) {
+      if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
+        return std::vector<Status>(
+            column_families.size(),
+            Status::NotSupported(
+                "Blob DB doesn't support non-default column family."));
+      }
+    }
+    return MultiGet(options, keys, values);
+  }
+  virtual void MultiGet(const ReadOptions& /*options*/,
+                        ColumnFamilyHandle* /*column_family*/,
+                        const size_t num_keys, const Slice* /*keys*/,
+                        PinnableSlice* /*values*/, Status* statuses,
+                        const bool /*sorted_input*/ = false) override {
+    for (size_t i = 0; i < num_keys; ++i) {
+      statuses[i] =
+          Status::NotSupported("Blob DB doesn't support batched MultiGet");
+    }
+  }
+
+  using ROCKSDB_NAMESPACE::StackableDB::SingleDelete;
+  virtual Status SingleDelete(const WriteOptions& /*wopts*/,
+                              ColumnFamilyHandle* /*column_family*/,
+                              const Slice& /*key*/) override {
+    return Status::NotSupported("Not supported operation in blob db.");
+  }
+
+  using ROCKSDB_NAMESPACE::StackableDB::Merge;
+  virtual Status Merge(const WriteOptions& /*options*/,
+                       ColumnFamilyHandle* /*column_family*/,
+                       const Slice& /*key*/, const Slice& /*value*/) override {
+    return Status::NotSupported("Not supported operation in blob db.");
+  }
+
+  virtual Status Write(const WriteOptions& opts,
+                       WriteBatch* updates) override = 0;
+
+  using ROCKSDB_NAMESPACE::StackableDB::NewIterator;
+  virtual Iterator* NewIterator(const ReadOptions& options) override = 0;
+  virtual Iterator* NewIterator(const ReadOptions& options,
+                                ColumnFamilyHandle* column_family) override {
+    if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
+      // Blob DB doesn't support non-default column family.
+      return nullptr;
+    }
+    return NewIterator(options);
+  }
+
+  Status CompactFiles(
+      const CompactionOptions& compact_options,
+      const std::vector<std::string>& input_file_names, const int output_level,
+      const int output_path_id = -1,
+      std::vector<std::string>* const output_file_names = nullptr,
+      CompactionJobInfo* compaction_job_info = nullptr) override = 0;
+  Status CompactFiles(
+      const CompactionOptions& compact_options,
+      ColumnFamilyHandle* column_family,
+      const std::vector<std::string>& input_file_names, const int output_level,
+      const int output_path_id = -1,
+      std::vector<std::string>* const output_file_names = nullptr,
+      CompactionJobInfo* compaction_job_info = nullptr) override {
+    if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
+      return Status::NotSupported(
+          "Blob DB doesn't support non-default column family.");
+    }
+
+    return CompactFiles(compact_options, input_file_names, output_level,
+                        output_path_id, output_file_names, compaction_job_info);
+  }
+
+  using ROCKSDB_NAMESPACE::StackableDB::Close;
+  virtual Status Close() override = 0;
+
+  // Opening blob db.
+  static Status Open(const Options& options, const BlobDBOptions& bdb_options,
+                     const std::string& dbname, BlobDB** blob_db);
+
+  static Status Open(const DBOptions& db_options,
+                     const BlobDBOptions& bdb_options,
+                     const std::string& dbname,
+                     const std::vector<ColumnFamilyDescriptor>& column_families,
+                     std::vector<ColumnFamilyHandle*>* handles,
+                     BlobDB** blob_db);
+
+  virtual BlobDBOptions GetBlobDBOptions() const = 0;
+
+  virtual Status SyncBlobFiles() = 0;
+
+  virtual ~BlobDB() {}
+
+ protected:
+  explicit BlobDB();
+};
+
+// Destroy the content of the database.
+Status DestroyBlobDB(const std::string& dbname, const Options& options,
+                     const BlobDBOptions& bdb_options);
+
+}  // namespace blob_db
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_gc_stats.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_gc_stats.h
new file mode 100644
index 0000000000..12c11af854
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_gc_stats.h
@@ -0,0 +1,54 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+#include <cstdint>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace blob_db {
+
+/**
+ * Statistics related to a single garbage collection pass (i.e. a single
+ * (sub)compaction).
+ */
+class BlobDBGarbageCollectionStats {
+ public:
+  uint64_t AllBlobs() const { return all_blobs_; }
+  uint64_t AllBytes() const { return all_bytes_; }
+  uint64_t RelocatedBlobs() const { return relocated_blobs_; }
+  uint64_t RelocatedBytes() const { return relocated_bytes_; }
+  uint64_t NewFiles() const { return new_files_; }
+  bool HasError() const { return error_; }
+
+  void AddBlob(uint64_t size) {
+    ++all_blobs_;
+    all_bytes_ += size;
+  }
+
+  void AddRelocatedBlob(uint64_t size) {
+    ++relocated_blobs_;
+    relocated_bytes_ += size;
+  }
+
+  void AddNewFile() { ++new_files_; }
+
+  void SetError() { error_ = true; }
+
+ private:
+  uint64_t all_blobs_ = 0;
+  uint64_t all_bytes_ = 0;
+  uint64_t relocated_blobs_ = 0;
+  uint64_t relocated_bytes_ = 0;
+  uint64_t new_files_ = 0;
+  bool error_ = false;
+};
+
+}  // namespace blob_db
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_impl.cc
new file mode 100644
index 0000000000..7a4b603f22
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_impl.cc
@@ -0,0 +1,2185 @@
+
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "utilities/blob_db/blob_db_impl.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <iomanip>
+#include <memory>
+#include <sstream>
+
+#include "db/blob/blob_index.h"
+#include "db/db_impl/db_impl.h"
+#include "db/write_batch_internal.h"
+#include "file/file_util.h"
+#include "file/filename.h"
+#include "file/random_access_file_reader.h"
+#include "file/sst_file_manager_impl.h"
+#include "file/writable_file_writer.h"
+#include "logging/logging.h"
+#include "monitoring/instrumented_mutex.h"
+#include "monitoring/statistics_impl.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/utilities/stackable_db.h"
+#include "rocksdb/utilities/transaction.h"
+#include "table/block_based/block.h"
+#include "table/block_based/block_based_table_builder.h"
+#include "table/block_based/block_builder.h"
+#include "table/meta_blocks.h"
+#include "test_util/sync_point.h"
+#include "util/cast_util.h"
+#include "util/crc32c.h"
+#include "util/mutexlock.h"
+#include "util/random.h"
+#include "util/stop_watch.h"
+#include "util/timer_queue.h"
+#include "utilities/blob_db/blob_compaction_filter.h"
+#include "utilities/blob_db/blob_db_iterator.h"
+#include "utilities/blob_db/blob_db_listener.h"
+
+namespace {
+int kBlockBasedTableVersionFormat = 2;
+}  // end namespace
+
+namespace ROCKSDB_NAMESPACE {
+namespace blob_db {
+
+bool BlobFileComparator::operator()(
+    const std::shared_ptr<BlobFile>& lhs,
+    const std::shared_ptr<BlobFile>& rhs) const {
+  return lhs->BlobFileNumber() > rhs->BlobFileNumber();
+}
+
+bool BlobFileComparatorTTL::operator()(
+    const std::shared_ptr<BlobFile>& lhs,
+    const std::shared_ptr<BlobFile>& rhs) const {
+  assert(lhs->HasTTL() && rhs->HasTTL());
+  if (lhs->expiration_range_.first < rhs->expiration_range_.first) {
+    return true;
+  }
+  if (lhs->expiration_range_.first > rhs->expiration_range_.first) {
+    return false;
+  }
+  return lhs->BlobFileNumber() < rhs->BlobFileNumber();
+}
+
+BlobDBImpl::BlobDBImpl(const std::string& dbname,
+                       const BlobDBOptions& blob_db_options,
+                       const DBOptions& db_options,
+                       const ColumnFamilyOptions& cf_options)
+    : BlobDB(),
+      dbname_(dbname),
+      db_impl_(nullptr),
+      env_(db_options.env),
+      bdb_options_(blob_db_options),
+      db_options_(db_options),
+      cf_options_(cf_options),
+      file_options_(db_options),
+      statistics_(db_options_.statistics.get()),
+      next_file_number_(1),
+      flush_sequence_(0),
+      closed_(true),
+      open_file_count_(0),
+      total_blob_size_(0),
+      live_sst_size_(0),
+      fifo_eviction_seq_(0),
+      evict_expiration_up_to_(0),
+      debug_level_(0) {
+  clock_ = env_->GetSystemClock().get();
+  blob_dir_ = (bdb_options_.path_relative)
+                  ? dbname + "/" + bdb_options_.blob_dir
+                  : bdb_options_.blob_dir;
+  file_options_.bytes_per_sync = blob_db_options.bytes_per_sync;
+}
+
+BlobDBImpl::~BlobDBImpl() {
+  tqueue_.shutdown();
+  // CancelAllBackgroundWork(db_, true);
+  Status s __attribute__((__unused__)) = Close();
+  assert(s.ok());
+}
+
+Status BlobDBImpl::Close() {
+  if (closed_) {
+    return Status::OK();
+  }
+  closed_ = true;
+
+  // Close base DB before BlobDBImpl destructs to stop event listener and
+  // compaction filter call.
+  Status s = db_->Close();
+  // delete db_ anyway even if close failed.
+  delete db_;
+  // Reset pointers to avoid StackableDB delete the pointer again.
+  db_ = nullptr;
+  db_impl_ = nullptr;
+  if (!s.ok()) {
+    return s;
+  }
+
+  s = SyncBlobFiles();
+  return s;
+}
+
+BlobDBOptions BlobDBImpl::GetBlobDBOptions() const { return bdb_options_; }
+
+Status BlobDBImpl::Open(std::vector<ColumnFamilyHandle*>* handles) {
+  assert(handles != nullptr);
+  assert(db_ == nullptr);
+
+  if (blob_dir_.empty()) {
+    return Status::NotSupported("No blob directory in options");
+  }
+
+  if (bdb_options_.garbage_collection_cutoff < 0.0 ||
+      bdb_options_.garbage_collection_cutoff > 1.0) {
+    return Status::InvalidArgument(
+        "Garbage collection cutoff must be in the interval [0.0, 1.0]");
+  }
+
+  // Temporarily disable compactions in the base DB during open; save the user
+  // defined value beforehand so we can restore it once BlobDB is initialized.
+  // Note: this is only needed if garbage collection is enabled.
+  const bool disable_auto_compactions = cf_options_.disable_auto_compactions;
+
+  if (bdb_options_.enable_garbage_collection) {
+    cf_options_.disable_auto_compactions = true;
+  }
+
+  Status s;
+
+  // Create info log.
+  if (db_options_.info_log == nullptr) {
+    s = CreateLoggerFromOptions(dbname_, db_options_, &db_options_.info_log);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  ROCKS_LOG_INFO(db_options_.info_log, "Opening BlobDB...");
+
+  if ((cf_options_.compaction_filter != nullptr ||
+       cf_options_.compaction_filter_factory != nullptr)) {
+    ROCKS_LOG_INFO(db_options_.info_log,
+                   "BlobDB only support compaction filter on non-TTL values.");
+  }
+
+  // Open blob directory.
+  s = env_->CreateDirIfMissing(blob_dir_);
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(db_options_.info_log,
+                    "Failed to create blob_dir %s, status: %s",
+                    blob_dir_.c_str(), s.ToString().c_str());
+  }
+  s = env_->GetFileSystem()->NewDirectory(blob_dir_, IOOptions(), &dir_ent_,
+                                          nullptr);
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(db_options_.info_log,
+                    "Failed to open blob_dir %s, status: %s", blob_dir_.c_str(),
+                    s.ToString().c_str());
+    return s;
+  }
+
+  // Open blob files.
+  s = OpenAllBlobFiles();
+  if (!s.ok()) {
+    return s;
+  }
+
+  // Update options
+  if (bdb_options_.enable_garbage_collection) {
+    db_options_.listeners.push_back(std::make_shared<BlobDBListenerGC>(this));
+    cf_options_.compaction_filter_factory =
+        std::make_shared<BlobIndexCompactionFilterFactoryGC>(
+            this, clock_, cf_options_, statistics_);
+  } else {
+    db_options_.listeners.push_back(std::make_shared<BlobDBListener>(this));
+    cf_options_.compaction_filter_factory =
+        std::make_shared<BlobIndexCompactionFilterFactory>(
+            this, clock_, cf_options_, statistics_);
+  }
+
+  // Reset user compaction filter after building into compaction factory.
+  cf_options_.compaction_filter = nullptr;
+
+  // Open base db.
+  ColumnFamilyDescriptor cf_descriptor(kDefaultColumnFamilyName, cf_options_);
+  s = DB::Open(db_options_, dbname_, {cf_descriptor}, handles, &db_);
+  if (!s.ok()) {
+    return s;
+  }
+  db_impl_ = static_cast_with_check<DBImpl>(db_->GetRootDB());
+
+  // Sanitize the blob_dir provided. Using a directory where the
+  // base DB stores its files for the default CF is not supported.
+  const ColumnFamilyData* const cfd =
+      static_cast<ColumnFamilyHandleImpl*>(DefaultColumnFamily())->cfd();
+  assert(cfd);
+
+  const ImmutableCFOptions* const ioptions = cfd->ioptions();
+  assert(ioptions);
+
+  assert(env_);
+
+  for (const auto& cf_path : ioptions->cf_paths) {
+    bool blob_dir_same_as_cf_dir = false;
+    s = env_->AreFilesSame(blob_dir_, cf_path.path, &blob_dir_same_as_cf_dir);
+    if (!s.ok()) {
+      ROCKS_LOG_ERROR(db_options_.info_log,
+                      "Error while sanitizing blob_dir %s, status: %s",
+                      blob_dir_.c_str(), s.ToString().c_str());
+      return s;
+    }
+
+    if (blob_dir_same_as_cf_dir) {
+      return Status::NotSupported(
+          "Using the base DB's storage directories for BlobDB files is not "
+          "supported.");
+    }
+  }
+
+  // Initialize SST file <-> oldest blob file mapping if garbage collection
+  // is enabled.
+  if (bdb_options_.enable_garbage_collection) {
+    std::vector<LiveFileMetaData> live_files;
+    db_->GetLiveFilesMetaData(&live_files);
+
+    InitializeBlobFileToSstMapping(live_files);
+
+    MarkUnreferencedBlobFilesObsoleteDuringOpen();
+
+    if (!disable_auto_compactions) {
+      s = db_->EnableAutoCompaction(*handles);
+      if (!s.ok()) {
+        ROCKS_LOG_ERROR(
+            db_options_.info_log,
+            "Failed to enable automatic compactions during open, status: %s",
+            s.ToString().c_str());
+        return s;
+      }
+    }
+  }
+
+  // Add trash files in blob dir to file delete scheduler.
+  SstFileManagerImpl* sfm = static_cast<SstFileManagerImpl*>(
+      db_impl_->immutable_db_options().sst_file_manager.get());
+  DeleteScheduler::CleanupDirectory(env_, sfm, blob_dir_);
+
+  UpdateLiveSSTSize();
+
+  // Start background jobs.
+  if (!bdb_options_.disable_background_tasks) {
+    StartBackgroundTasks();
+  }
+
+  ROCKS_LOG_INFO(db_options_.info_log, "BlobDB pointer %p", this);
+  bdb_options_.Dump(db_options_.info_log.get());
+  closed_ = false;
+  return s;
+}
+
+void BlobDBImpl::StartBackgroundTasks() {
+  // store a call to a member function and object
+  tqueue_.add(
+      kReclaimOpenFilesPeriodMillisecs,
+      std::bind(&BlobDBImpl::ReclaimOpenFiles, this, std::placeholders::_1));
+  tqueue_.add(
+      kDeleteObsoleteFilesPeriodMillisecs,
+      std::bind(&BlobDBImpl::DeleteObsoleteFiles, this, std::placeholders::_1));
+  tqueue_.add(kSanityCheckPeriodMillisecs,
+              std::bind(&BlobDBImpl::SanityCheck, this, std::placeholders::_1));
+  tqueue_.add(
+      kEvictExpiredFilesPeriodMillisecs,
+      std::bind(&BlobDBImpl::EvictExpiredFiles, this, std::placeholders::_1));
+}
+
+Status BlobDBImpl::GetAllBlobFiles(std::set<uint64_t>* file_numbers) {
+  assert(file_numbers != nullptr);
+  std::vector<std::string> all_files;
+  Status s = env_->GetChildren(blob_dir_, &all_files);
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(db_options_.info_log,
+                    "Failed to get list of blob files, status: %s",
+                    s.ToString().c_str());
+    return s;
+  }
+
+  for (const auto& file_name : all_files) {
+    uint64_t file_number;
+    FileType type;
+    bool success = ParseFileName(file_name, &file_number, &type);
+    if (success && type == kBlobFile) {
+      file_numbers->insert(file_number);
+    } else {
+      ROCKS_LOG_WARN(db_options_.info_log,
+                     "Skipping file in blob directory: %s", file_name.c_str());
+    }
+  }
+
+  return s;
+}
+
+Status BlobDBImpl::OpenAllBlobFiles() {
+  std::set<uint64_t> file_numbers;
+  Status s = GetAllBlobFiles(&file_numbers);
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (!file_numbers.empty()) {
+    next_file_number_.store(*file_numbers.rbegin() + 1);
+  }
+
+  std::ostringstream blob_file_oss;
+  std::ostringstream live_imm_oss;
+  std::ostringstream obsolete_file_oss;
+
+  for (auto& file_number : file_numbers) {
+    std::shared_ptr<BlobFile> blob_file = std::make_shared<BlobFile>(
+        this, blob_dir_, file_number, db_options_.info_log.get());
+    blob_file->MarkImmutable(/* sequence */ 0);
+
+    // Read file header and footer
+    Status read_metadata_status =
+        blob_file->ReadMetadata(env_->GetFileSystem(), file_options_);
+    if (read_metadata_status.IsCorruption()) {
+      // Remove incomplete file.
+      if (!obsolete_files_.empty()) {
+        obsolete_file_oss << ", ";
+      }
+      obsolete_file_oss << file_number;
+
+      ObsoleteBlobFile(blob_file, 0 /*obsolete_seq*/, false /*update_size*/);
+      continue;
+    } else if (!read_metadata_status.ok()) {
+      ROCKS_LOG_ERROR(db_options_.info_log,
+                      "Unable to read metadata of blob file %" PRIu64
+                      ", status: '%s'",
+                      file_number, read_metadata_status.ToString().c_str());
+      return read_metadata_status;
+    }
+
+    total_blob_size_ += blob_file->GetFileSize();
+
+    if (!blob_files_.empty()) {
+      blob_file_oss << ", ";
+    }
+    blob_file_oss << file_number;
+
+    blob_files_[file_number] = blob_file;
+
+    if (!blob_file->HasTTL()) {
+      if (!live_imm_non_ttl_blob_files_.empty()) {
+        live_imm_oss << ", ";
+      }
+      live_imm_oss << file_number;
+
+      live_imm_non_ttl_blob_files_[file_number] = blob_file;
+    }
+  }
+
+  ROCKS_LOG_INFO(db_options_.info_log,
+                 "Found %" ROCKSDB_PRIszt " blob files: %s", blob_files_.size(),
+                 blob_file_oss.str().c_str());
+  ROCKS_LOG_INFO(
+      db_options_.info_log, "Found %" ROCKSDB_PRIszt " non-TTL blob files: %s",
+      live_imm_non_ttl_blob_files_.size(), live_imm_oss.str().c_str());
+  ROCKS_LOG_INFO(db_options_.info_log,
+                 "Found %" ROCKSDB_PRIszt
+                 " incomplete or corrupted blob files: %s",
+                 obsolete_files_.size(), obsolete_file_oss.str().c_str());
+  return s;
+}
+
+template <typename Linker>
+void BlobDBImpl::LinkSstToBlobFileImpl(uint64_t sst_file_number,
+                                       uint64_t blob_file_number,
+                                       Linker linker) {
+  assert(bdb_options_.enable_garbage_collection);
+  assert(blob_file_number != kInvalidBlobFileNumber);
+
+  auto it = blob_files_.find(blob_file_number);
+  if (it == blob_files_.end()) {
+    ROCKS_LOG_WARN(db_options_.info_log,
+                   "Blob file %" PRIu64
+                   " not found while trying to link "
+                   "SST file %" PRIu64,
+                   blob_file_number, sst_file_number);
+    return;
+  }
+
+  BlobFile* const blob_file = it->second.get();
+  assert(blob_file);
+
+  linker(blob_file, sst_file_number);
+
+  ROCKS_LOG_INFO(db_options_.info_log,
+                 "Blob file %" PRIu64 " linked to SST file %" PRIu64,
+                 blob_file_number, sst_file_number);
+}
+
+void BlobDBImpl::LinkSstToBlobFile(uint64_t sst_file_number,
+                                   uint64_t blob_file_number) {
+  auto linker = [](BlobFile* blob_file, uint64_t sst_file) {
+    WriteLock file_lock(&blob_file->mutex_);
+    blob_file->LinkSstFile(sst_file);
+  };
+
+  LinkSstToBlobFileImpl(sst_file_number, blob_file_number, linker);
+}
+
+void BlobDBImpl::LinkSstToBlobFileNoLock(uint64_t sst_file_number,
+                                         uint64_t blob_file_number) {
+  auto linker = [](BlobFile* blob_file, uint64_t sst_file) {
+    blob_file->LinkSstFile(sst_file);
+  };
+
+  LinkSstToBlobFileImpl(sst_file_number, blob_file_number, linker);
+}
+
+void BlobDBImpl::UnlinkSstFromBlobFile(uint64_t sst_file_number,
+                                       uint64_t blob_file_number) {
+  assert(bdb_options_.enable_garbage_collection);
+  assert(blob_file_number != kInvalidBlobFileNumber);
+
+  auto it = blob_files_.find(blob_file_number);
+  if (it == blob_files_.end()) {
+    ROCKS_LOG_WARN(db_options_.info_log,
+                   "Blob file %" PRIu64
+                   " not found while trying to unlink "
+                   "SST file %" PRIu64,
+                   blob_file_number, sst_file_number);
+    return;
+  }
+
+  BlobFile* const blob_file = it->second.get();
+  assert(blob_file);
+
+  {
+    WriteLock file_lock(&blob_file->mutex_);
+    blob_file->UnlinkSstFile(sst_file_number);
+  }
+
+  ROCKS_LOG_INFO(db_options_.info_log,
+                 "Blob file %" PRIu64 " unlinked from SST file %" PRIu64,
+                 blob_file_number, sst_file_number);
+}
+
+void BlobDBImpl::InitializeBlobFileToSstMapping(
+    const std::vector<LiveFileMetaData>& live_files) {
+  assert(bdb_options_.enable_garbage_collection);
+
+  for (const auto& live_file : live_files) {
+    const uint64_t sst_file_number = live_file.file_number;
+    const uint64_t blob_file_number = live_file.oldest_blob_file_number;
+
+    if (blob_file_number == kInvalidBlobFileNumber) {
+      continue;
+    }
+
+    LinkSstToBlobFileNoLock(sst_file_number, blob_file_number);
+  }
+}
+
+void BlobDBImpl::ProcessFlushJobInfo(const FlushJobInfo& info) {
+  assert(bdb_options_.enable_garbage_collection);
+
+  WriteLock lock(&mutex_);
+
+  if (info.oldest_blob_file_number != kInvalidBlobFileNumber) {
+    LinkSstToBlobFile(info.file_number, info.oldest_blob_file_number);
+  }
+
+  assert(flush_sequence_ < info.largest_seqno);
+  flush_sequence_ = info.largest_seqno;
+
+  MarkUnreferencedBlobFilesObsolete();
+}
+
+void BlobDBImpl::ProcessCompactionJobInfo(const CompactionJobInfo& info) {
+  assert(bdb_options_.enable_garbage_collection);
+
+  if (!info.status.ok()) {
+    return;
+  }
+
+  // Note: the same SST file may appear in both the input and the output
+  // file list in case of a trivial move. We walk through the two lists
+  // below in a fashion that's similar to merge sort to detect this.
+
+  auto cmp = [](const CompactionFileInfo& lhs, const CompactionFileInfo& rhs) {
+    return lhs.file_number < rhs.file_number;
+  };
+
+  auto inputs = info.input_file_infos;
+  auto iit = inputs.begin();
+  const auto iit_end = inputs.end();
+
+  std::sort(iit, iit_end, cmp);
+
+  auto outputs = info.output_file_infos;
+  auto oit = outputs.begin();
+  const auto oit_end = outputs.end();
+
+  std::sort(oit, oit_end, cmp);
+
+  WriteLock lock(&mutex_);
+
+  while (iit != iit_end && oit != oit_end) {
+    const auto& input = *iit;
+    const auto& output = *oit;
+
+    if (input.file_number == output.file_number) {
+      ++iit;
+      ++oit;
+    } else if (input.file_number < output.file_number) {
+      if (input.oldest_blob_file_number != kInvalidBlobFileNumber) {
+        UnlinkSstFromBlobFile(input.file_number, input.oldest_blob_file_number);
+      }
+
+      ++iit;
+    } else {
+      assert(output.file_number < input.file_number);
+
+      if (output.oldest_blob_file_number != kInvalidBlobFileNumber) {
+        LinkSstToBlobFile(output.file_number, output.oldest_blob_file_number);
+      }
+
+      ++oit;
+    }
+  }
+
+  while (iit != iit_end) {
+    const auto& input = *iit;
+
+    if (input.oldest_blob_file_number != kInvalidBlobFileNumber) {
+      UnlinkSstFromBlobFile(input.file_number, input.oldest_blob_file_number);
+    }
+
+    ++iit;
+  }
+
+  while (oit != oit_end) {
+    const auto& output = *oit;
+
+    if (output.oldest_blob_file_number != kInvalidBlobFileNumber) {
+      LinkSstToBlobFile(output.file_number, output.oldest_blob_file_number);
+    }
+
+    ++oit;
+  }
+
+  MarkUnreferencedBlobFilesObsolete();
+}
+
+bool BlobDBImpl::MarkBlobFileObsoleteIfNeeded(
+    const std::shared_ptr<BlobFile>& blob_file, SequenceNumber obsolete_seq) {
+  assert(blob_file);
+  assert(!blob_file->HasTTL());
+  assert(blob_file->Immutable());
+  assert(bdb_options_.enable_garbage_collection);
+
+  // Note: FIFO eviction could have marked this file obsolete already.
+  if (blob_file->Obsolete()) {
+    return true;
+  }
+
+  // We cannot mark this file (or any higher-numbered files for that matter)
+  // obsolete if it is referenced by any memtables or SSTs. We keep track of
+  // the SSTs explicitly. To account for memtables, we keep track of the highest
+  // sequence number received in flush notifications, and we do not mark the
+  // blob file obsolete if there are still unflushed memtables from before
+  // the time the blob file was closed.
+  if (blob_file->GetImmutableSequence() > flush_sequence_ ||
+      !blob_file->GetLinkedSstFiles().empty()) {
+    return false;
+  }
+
+  ROCKS_LOG_INFO(db_options_.info_log,
+                 "Blob file %" PRIu64 " is no longer needed, marking obsolete",
+                 blob_file->BlobFileNumber());
+
+  ObsoleteBlobFile(blob_file, obsolete_seq, /* update_size */ true);
+  return true;
+}
+
+template <class Functor>
+void BlobDBImpl::MarkUnreferencedBlobFilesObsoleteImpl(Functor mark_if_needed) {
+  assert(bdb_options_.enable_garbage_collection);
+
+  // Iterate through all live immutable non-TTL blob files, and mark them
+  // obsolete assuming no SST files or memtables rely on the blobs in them.
+  // Note: we need to stop as soon as we find a blob file that has any
+  // linked SSTs (or one potentially referenced by memtables).
+
+  uint64_t obsoleted_files = 0;
+
+  auto it = live_imm_non_ttl_blob_files_.begin();
+  while (it != live_imm_non_ttl_blob_files_.end()) {
+    const auto& blob_file = it->second;
+    assert(blob_file);
+    assert(blob_file->BlobFileNumber() == it->first);
+    assert(!blob_file->HasTTL());
+    assert(blob_file->Immutable());
+
+    // Small optimization: Obsolete() does an atomic read, so we can do
+    // this check without taking a lock on the blob file's mutex.
+    if (blob_file->Obsolete()) {
+      it = live_imm_non_ttl_blob_files_.erase(it);
+      continue;
+    }
+
+    if (!mark_if_needed(blob_file)) {
+      break;
+    }
+
+    it = live_imm_non_ttl_blob_files_.erase(it);
+
+    ++obsoleted_files;
+  }
+
+  if (obsoleted_files > 0) {
+    ROCKS_LOG_INFO(db_options_.info_log,
+                   "%" PRIu64 " blob file(s) marked obsolete by GC",
+                   obsoleted_files);
+    RecordTick(statistics_, BLOB_DB_GC_NUM_FILES, obsoleted_files);
+  }
+}
+
+void BlobDBImpl::MarkUnreferencedBlobFilesObsolete() {
+  const SequenceNumber obsolete_seq = GetLatestSequenceNumber();
+
+  MarkUnreferencedBlobFilesObsoleteImpl(
+      [this, obsolete_seq](const std::shared_ptr<BlobFile>& blob_file) {
+        WriteLock file_lock(&blob_file->mutex_);
+        return MarkBlobFileObsoleteIfNeeded(blob_file, obsolete_seq);
+      });
+}
+
+void BlobDBImpl::MarkUnreferencedBlobFilesObsoleteDuringOpen() {
+  MarkUnreferencedBlobFilesObsoleteImpl(
+      [this](const std::shared_ptr<BlobFile>& blob_file) {
+        return MarkBlobFileObsoleteIfNeeded(blob_file, /* obsolete_seq */ 0);
+      });
+}
+
+void BlobDBImpl::CloseRandomAccessLocked(
+    const std::shared_ptr<BlobFile>& bfile) {
+  bfile->CloseRandomAccessLocked();
+  open_file_count_--;
+}
+
+Status BlobDBImpl::GetBlobFileReader(
+    const std::shared_ptr<BlobFile>& blob_file,
+    std::shared_ptr<RandomAccessFileReader>* reader) {
+  assert(reader != nullptr);
+  bool fresh_open = false;
+  Status s = blob_file->GetReader(env_, file_options_, reader, &fresh_open);
+  if (s.ok() && fresh_open) {
+    assert(*reader != nullptr);
+    open_file_count_++;
+  }
+  return s;
+}
+
+std::shared_ptr<BlobFile> BlobDBImpl::NewBlobFile(
+    bool has_ttl, const ExpirationRange& expiration_range,
+    const std::string& reason) {
+  assert(has_ttl == (expiration_range.first || expiration_range.second));
+
+  uint64_t file_num = next_file_number_++;
+
+  const uint32_t column_family_id =
+      static_cast<ColumnFamilyHandleImpl*>(DefaultColumnFamily())->GetID();
+  auto blob_file = std::make_shared<BlobFile>(
+      this, blob_dir_, file_num, db_options_.info_log.get(), column_family_id,
+      bdb_options_.compression, has_ttl, expiration_range);
+
+  ROCKS_LOG_DEBUG(db_options_.info_log, "New blob file created: %s reason='%s'",
+                  blob_file->PathName().c_str(), reason.c_str());
+  LogFlush(db_options_.info_log);
+
+  return blob_file;
+}
+
+void BlobDBImpl::RegisterBlobFile(std::shared_ptr<BlobFile> blob_file) {
+  const uint64_t blob_file_number = blob_file->BlobFileNumber();
+
+  auto it = blob_files_.lower_bound(blob_file_number);
+  assert(it == blob_files_.end() || it->first != blob_file_number);
+
+  blob_files_.insert(it,
+                     std::map<uint64_t, std::shared_ptr<BlobFile>>::value_type(
+                         blob_file_number, std::move(blob_file)));
+}
+
+Status BlobDBImpl::CreateWriterLocked(const std::shared_ptr<BlobFile>& bfile) {
+  std::string fpath(bfile->PathName());
+  std::unique_ptr<FSWritableFile> wfile;
+  const auto& fs = env_->GetFileSystem();
+
+  Status s = fs->ReopenWritableFile(fpath, file_options_, &wfile, nullptr);
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(db_options_.info_log,
+                    "Failed to open blob file for write: %s status: '%s'"
+                    " exists: '%s'",
+                    fpath.c_str(), s.ToString().c_str(),
+                    fs->FileExists(fpath, file_options_.io_options, nullptr)
+                        .ToString()
+                        .c_str());
+    return s;
+  }
+
+  std::unique_ptr<WritableFileWriter> fwriter;
+  fwriter.reset(new WritableFileWriter(std::move(wfile), fpath, file_options_));
+
+  uint64_t boffset = bfile->GetFileSize();
+  if (debug_level_ >= 2 && boffset) {
+    ROCKS_LOG_DEBUG(db_options_.info_log,
+                    "Open blob file: %s with offset: %" PRIu64, fpath.c_str(),
+                    boffset);
+  }
+
+  BlobLogWriter::ElemType et = BlobLogWriter::kEtNone;
+  if (bfile->file_size_ == BlobLogHeader::kSize) {
+    et = BlobLogWriter::kEtFileHdr;
+  } else if (bfile->file_size_ > BlobLogHeader::kSize) {
+    et = BlobLogWriter::kEtRecord;
+  } else if (bfile->file_size_) {
+    ROCKS_LOG_WARN(db_options_.info_log,
+                   "Open blob file: %s with wrong size: %" PRIu64,
+                   fpath.c_str(), boffset);
+    return Status::Corruption("Invalid blob file size");
+  }
+
+  constexpr bool do_flush = true;
+
+  bfile->log_writer_ = std::make_shared<BlobLogWriter>(
+      std::move(fwriter), clock_, statistics_, bfile->file_number_,
+      db_options_.use_fsync, do_flush, boffset);
+  bfile->log_writer_->last_elem_type_ = et;
+
+  return s;
+}
+
+std::shared_ptr<BlobFile> BlobDBImpl::FindBlobFileLocked(
+    uint64_t expiration) const {
+  if (open_ttl_files_.empty()) {
+    return nullptr;
+  }
+
+  std::shared_ptr<BlobFile> tmp = std::make_shared<BlobFile>();
+  tmp->SetHasTTL(true);
+  tmp->expiration_range_ = std::make_pair(expiration, 0);
+  tmp->file_number_ = std::numeric_limits<uint64_t>::max();
+
+  auto citr = open_ttl_files_.equal_range(tmp);
+  if (citr.first == open_ttl_files_.end()) {
+    assert(citr.second == open_ttl_files_.end());
+
+    std::shared_ptr<BlobFile> check = *(open_ttl_files_.rbegin());
+    return (check->expiration_range_.second <= expiration) ? nullptr : check;
+  }
+
+  if (citr.first != citr.second) {
+    return *(citr.first);
+  }
+
+  auto finditr = citr.second;
+  if (finditr != open_ttl_files_.begin()) {
+    --finditr;
+  }
+
+  bool b2 = (*finditr)->expiration_range_.second <= expiration;
+  bool b1 = (*finditr)->expiration_range_.first > expiration;
+
+  return (b1 || b2) ? nullptr : (*finditr);
+}
+
+Status BlobDBImpl::CheckOrCreateWriterLocked(
+    const std::shared_ptr<BlobFile>& blob_file,
+    std::shared_ptr<BlobLogWriter>* writer) {
+  assert(writer != nullptr);
+  *writer = blob_file->GetWriter();
+  if (*writer != nullptr) {
+    return Status::OK();
+  }
+  Status s = CreateWriterLocked(blob_file);
+  if (s.ok()) {
+    *writer = blob_file->GetWriter();
+  }
+  return s;
+}
+
+Status BlobDBImpl::CreateBlobFileAndWriter(
+    bool has_ttl, const ExpirationRange& expiration_range,
+    const std::string& reason, std::shared_ptr<BlobFile>* blob_file,
+    std::shared_ptr<BlobLogWriter>* writer) {
+  TEST_SYNC_POINT("BlobDBImpl::CreateBlobFileAndWriter");
+  assert(has_ttl == (expiration_range.first || expiration_range.second));
+  assert(blob_file);
+  assert(writer);
+
+  *blob_file = NewBlobFile(has_ttl, expiration_range, reason);
+  assert(*blob_file);
+
+  // file not visible, hence no lock
+  Status s = CheckOrCreateWriterLocked(*blob_file, writer);
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(db_options_.info_log,
+                    "Failed to get writer for blob file: %s, error: %s",
+                    (*blob_file)->PathName().c_str(), s.ToString().c_str());
+    return s;
+  }
+
+  assert(*writer);
+
+  s = (*writer)->WriteHeader((*blob_file)->header_);
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(db_options_.info_log,
+                    "Failed to write header to new blob file: %s"
+                    " status: '%s'",
+                    (*blob_file)->PathName().c_str(), s.ToString().c_str());
+    return s;
+  }
+
+  (*blob_file)->SetFileSize(BlobLogHeader::kSize);
+  total_blob_size_ += BlobLogHeader::kSize;
+
+  return s;
+}
+
+Status BlobDBImpl::SelectBlobFile(std::shared_ptr<BlobFile>* blob_file) {
+  assert(blob_file);
+
+  {
+    ReadLock rl(&mutex_);
+
+    if (open_non_ttl_file_) {
+      assert(!open_non_ttl_file_->Immutable());
+      *blob_file = open_non_ttl_file_;
+      return Status::OK();
+    }
+  }
+
+  // Check again
+  WriteLock wl(&mutex_);
+
+  if (open_non_ttl_file_) {
+    assert(!open_non_ttl_file_->Immutable());
+    *blob_file = open_non_ttl_file_;
+    return Status::OK();
+  }
+
+  std::shared_ptr<BlobLogWriter> writer;
+  const Status s = CreateBlobFileAndWriter(
+      /* has_ttl */ false, ExpirationRange(),
+      /* reason */ "SelectBlobFile", blob_file, &writer);
+  if (!s.ok()) {
+    return s;
+  }
+
+  RegisterBlobFile(*blob_file);
+  open_non_ttl_file_ = *blob_file;
+
+  return s;
+}
+
+Status BlobDBImpl::SelectBlobFileTTL(uint64_t expiration,
+                                     std::shared_ptr<BlobFile>* blob_file) {
+  assert(blob_file);
+  assert(expiration != kNoExpiration);
+
+  {
+    ReadLock rl(&mutex_);
+
+    *blob_file = FindBlobFileLocked(expiration);
+    if (*blob_file != nullptr) {
+      assert(!(*blob_file)->Immutable());
+      return Status::OK();
+    }
+  }
+
+  // Check again
+  WriteLock wl(&mutex_);
+
+  *blob_file = FindBlobFileLocked(expiration);
+  if (*blob_file != nullptr) {
+    assert(!(*blob_file)->Immutable());
+    return Status::OK();
+  }
+
+  const uint64_t exp_low =
+      (expiration / bdb_options_.ttl_range_secs) * bdb_options_.ttl_range_secs;
+  const uint64_t exp_high = exp_low + bdb_options_.ttl_range_secs;
+  const ExpirationRange expiration_range(exp_low, exp_high);
+
+  std::ostringstream oss;
+  oss << "SelectBlobFileTTL range: [" << exp_low << ',' << exp_high << ')';
+
+  std::shared_ptr<BlobLogWriter> writer;
+  const Status s =
+      CreateBlobFileAndWriter(/* has_ttl */ true, expiration_range,
+                              /* reason */ oss.str(), blob_file, &writer);
+  if (!s.ok()) {
+    return s;
+  }
+
+  RegisterBlobFile(*blob_file);
+  open_ttl_files_.insert(*blob_file);
+
+  return s;
+}
+
+class BlobDBImpl::BlobInserter : public WriteBatch::Handler {
+ private:
+  const WriteOptions& options_;
+  BlobDBImpl* blob_db_impl_;
+  uint32_t default_cf_id_;
+  WriteBatch batch_;
+
+ public:
+  BlobInserter(const WriteOptions& options, BlobDBImpl* blob_db_impl,
+               uint32_t default_cf_id)
+      : options_(options),
+        blob_db_impl_(blob_db_impl),
+        default_cf_id_(default_cf_id) {}
+
+  WriteBatch* batch() { return &batch_; }
+
+  Status PutCF(uint32_t column_family_id, const Slice& key,
+               const Slice& value) override {
+    if (column_family_id != default_cf_id_) {
+      return Status::NotSupported(
+          "Blob DB doesn't support non-default column family.");
+    }
+    Status s = blob_db_impl_->PutBlobValue(options_, key, value, kNoExpiration,
+                                           &batch_);
+    return s;
+  }
+
+  Status DeleteCF(uint32_t column_family_id, const Slice& key) override {
+    if (column_family_id != default_cf_id_) {
+      return Status::NotSupported(
+          "Blob DB doesn't support non-default column family.");
+    }
+    Status s = WriteBatchInternal::Delete(&batch_, column_family_id, key);
+    return s;
+  }
+
+  virtual Status DeleteRange(uint32_t column_family_id, const Slice& begin_key,
+                             const Slice& end_key) {
+    if (column_family_id != default_cf_id_) {
+      return Status::NotSupported(
+          "Blob DB doesn't support non-default column family.");
+    }
+    Status s = WriteBatchInternal::DeleteRange(&batch_, column_family_id,
+                                               begin_key, end_key);
+    return s;
+  }
+
+  Status SingleDeleteCF(uint32_t /*column_family_id*/,
+                        const Slice& /*key*/) override {
+    return Status::NotSupported("Not supported operation in blob db.");
+  }
+
+  Status MergeCF(uint32_t /*column_family_id*/, const Slice& /*key*/,
+                 const Slice& /*value*/) override {
+    return Status::NotSupported("Not supported operation in blob db.");
+  }
+
+  void LogData(const Slice& blob) override { batch_.PutLogData(blob); }
+};
+
+Status BlobDBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
+  StopWatch write_sw(clock_, statistics_, BLOB_DB_WRITE_MICROS);
+  RecordTick(statistics_, BLOB_DB_NUM_WRITE);
+  uint32_t default_cf_id =
+      static_cast_with_check<ColumnFamilyHandleImpl>(DefaultColumnFamily())
+          ->GetID();
+  Status s;
+  BlobInserter blob_inserter(options, this, default_cf_id);
+  {
+    // Release write_mutex_ before DB write to avoid race condition with
+    // flush begin listener, which also require write_mutex_ to sync
+    // blob files.
+    MutexLock l(&write_mutex_);
+    s = updates->Iterate(&blob_inserter);
+  }
+  if (!s.ok()) {
+    return s;
+  }
+  return db_->Write(options, blob_inserter.batch());
+}
+
+Status BlobDBImpl::Put(const WriteOptions& options, const Slice& key,
+                       const Slice& value) {
+  return PutUntil(options, key, value, kNoExpiration);
+}
+
+Status BlobDBImpl::PutWithTTL(const WriteOptions& options, const Slice& key,
+                              const Slice& value, uint64_t ttl) {
+  uint64_t now = EpochNow();
+  uint64_t expiration = kNoExpiration - now > ttl ? now + ttl : kNoExpiration;
+  return PutUntil(options, key, value, expiration);
+}
+
+Status BlobDBImpl::PutUntil(const WriteOptions& options, const Slice& key,
+                            const Slice& value, uint64_t expiration) {
+  StopWatch write_sw(clock_, statistics_, BLOB_DB_WRITE_MICROS);
+  RecordTick(statistics_, BLOB_DB_NUM_PUT);
+  Status s;
+  WriteBatch batch;
+  {
+    // Release write_mutex_ before DB write to avoid race condition with
+    // flush begin listener, which also require write_mutex_ to sync
+    // blob files.
+    MutexLock l(&write_mutex_);
+    s = PutBlobValue(options, key, value, expiration, &batch);
+  }
+  if (s.ok()) {
+    s = db_->Write(options, &batch);
+  }
+  return s;
+}
+
+Status BlobDBImpl::PutBlobValue(const WriteOptions& /*options*/,
+                                const Slice& key, const Slice& value,
+                                uint64_t expiration, WriteBatch* batch) {
+  write_mutex_.AssertHeld();
+  Status s;
+  std::string index_entry;
+  uint32_t column_family_id =
+      static_cast_with_check<ColumnFamilyHandleImpl>(DefaultColumnFamily())
+          ->GetID();
+  if (value.size() < bdb_options_.min_blob_size) {
+    if (expiration == kNoExpiration) {
+      // Put as normal value
+      s = batch->Put(key, value);
+      RecordTick(statistics_, BLOB_DB_WRITE_INLINED);
+    } else {
+      // Inlined with TTL
+      BlobIndex::EncodeInlinedTTL(&index_entry, expiration, value);
+      s = WriteBatchInternal::PutBlobIndex(batch, column_family_id, key,
+                                           index_entry);
+      RecordTick(statistics_, BLOB_DB_WRITE_INLINED_TTL);
+    }
+  } else {
+    std::string compression_output;
+    Slice value_compressed = GetCompressedSlice(value, &compression_output);
+
+    std::string headerbuf;
+    BlobLogWriter::ConstructBlobHeader(&headerbuf, key, value_compressed,
+                                       expiration);
+
+    // Check DB size limit before selecting blob file to
+    // Since CheckSizeAndEvictBlobFiles() can close blob files, it needs to be
+    // done before calling SelectBlobFile().
+    s = CheckSizeAndEvictBlobFiles(headerbuf.size() + key.size() +
+                                   value_compressed.size());
+    if (!s.ok()) {
+      return s;
+    }
+
+    std::shared_ptr<BlobFile> blob_file;
+    if (expiration != kNoExpiration) {
+      s = SelectBlobFileTTL(expiration, &blob_file);
+    } else {
+      s = SelectBlobFile(&blob_file);
+    }
+    if (s.ok()) {
+      assert(blob_file != nullptr);
+      assert(blob_file->GetCompressionType() == bdb_options_.compression);
+      s = AppendBlob(blob_file, headerbuf, key, value_compressed, expiration,
+                     &index_entry);
+    }
+    if (s.ok()) {
+      if (expiration != kNoExpiration) {
+        WriteLock file_lock(&blob_file->mutex_);
+        blob_file->ExtendExpirationRange(expiration);
+      }
+      s = CloseBlobFileIfNeeded(blob_file);
+    }
+    if (s.ok()) {
+      s = WriteBatchInternal::PutBlobIndex(batch, column_family_id, key,
+                                           index_entry);
+    }
+    if (s.ok()) {
+      if (expiration == kNoExpiration) {
+        RecordTick(statistics_, BLOB_DB_WRITE_BLOB);
+      } else {
+        RecordTick(statistics_, BLOB_DB_WRITE_BLOB_TTL);
+      }
+    } else {
+      ROCKS_LOG_ERROR(
+          db_options_.info_log,
+          "Failed to append blob to FILE: %s: KEY: %s VALSZ: %" ROCKSDB_PRIszt
+          " status: '%s' blob_file: '%s'",
+          blob_file->PathName().c_str(), key.ToString().c_str(), value.size(),
+          s.ToString().c_str(), blob_file->DumpState().c_str());
+    }
+  }
+
+  RecordTick(statistics_, BLOB_DB_NUM_KEYS_WRITTEN);
+  RecordTick(statistics_, BLOB_DB_BYTES_WRITTEN, key.size() + value.size());
+  RecordInHistogram(statistics_, BLOB_DB_KEY_SIZE, key.size());
+  RecordInHistogram(statistics_, BLOB_DB_VALUE_SIZE, value.size());
+
+  return s;
+}
+
+Slice BlobDBImpl::GetCompressedSlice(const Slice& raw,
+                                     std::string* compression_output) const {
+  if (bdb_options_.compression == kNoCompression) {
+    return raw;
+  }
+  StopWatch compression_sw(clock_, statistics_, BLOB_DB_COMPRESSION_MICROS);
+  CompressionType type = bdb_options_.compression;
+  CompressionOptions opts;
+  CompressionContext context(type);
+  CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(), type,
+                       0 /* sample_for_compression */);
+  CompressBlock(raw, info, &type, kBlockBasedTableVersionFormat, false,
+                compression_output, nullptr, nullptr);
+  return *compression_output;
+}
+
+Status BlobDBImpl::DecompressSlice(const Slice& compressed_value,
+                                   CompressionType compression_type,
+                                   PinnableSlice* value_output) const {
+  assert(compression_type != kNoCompression);
+
+  BlockContents contents;
+  auto cfh = static_cast<ColumnFamilyHandleImpl*>(DefaultColumnFamily());
+
+  {
+    StopWatch decompression_sw(clock_, statistics_,
+                               BLOB_DB_DECOMPRESSION_MICROS);
+    UncompressionContext context(compression_type);
+    UncompressionInfo info(context, UncompressionDict::GetEmptyDict(),
+                           compression_type);
+    Status s = UncompressBlockData(
+        info, compressed_value.data(), compressed_value.size(), &contents,
+        kBlockBasedTableVersionFormat, *(cfh->cfd()->ioptions()));
+    if (!s.ok()) {
+      return Status::Corruption("Unable to decompress blob.");
+    }
+  }
+
+  value_output->PinSelf(contents.data);
+
+  return Status::OK();
+}
+
+Status BlobDBImpl::CompactFiles(
+    const CompactionOptions& compact_options,
+    const std::vector<std::string>& input_file_names, const int output_level,
+    const int output_path_id, std::vector<std::string>* const output_file_names,
+    CompactionJobInfo* compaction_job_info) {
+  // Note: we need CompactionJobInfo to be able to track updates to the
+  // blob file <-> SST mappings, so we provide one if the user hasn't,
+  // assuming that GC is enabled.
+  CompactionJobInfo info{};
+  if (bdb_options_.enable_garbage_collection && !compaction_job_info) {
+    compaction_job_info = &info;
+  }
+
+  const Status s =
+      db_->CompactFiles(compact_options, input_file_names, output_level,
+                        output_path_id, output_file_names, compaction_job_info);
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (bdb_options_.enable_garbage_collection) {
+    assert(compaction_job_info);
+    ProcessCompactionJobInfo(*compaction_job_info);
+  }
+
+  return s;
+}
+
+void BlobDBImpl::GetCompactionContextCommon(BlobCompactionContext* context) {
+  assert(context);
+
+  context->blob_db_impl = this;
+  context->next_file_number = next_file_number_.load();
+  context->current_blob_files.clear();
+  for (auto& p : blob_files_) {
+    context->current_blob_files.insert(p.first);
+  }
+  context->fifo_eviction_seq = fifo_eviction_seq_;
+  context->evict_expiration_up_to = evict_expiration_up_to_;
+}
+
+void BlobDBImpl::GetCompactionContext(BlobCompactionContext* context) {
+  assert(context);
+
+  ReadLock l(&mutex_);
+  GetCompactionContextCommon(context);
+}
+
+void BlobDBImpl::GetCompactionContext(BlobCompactionContext* context,
+                                      BlobCompactionContextGC* context_gc) {
+  assert(context);
+  assert(context_gc);
+
+  ReadLock l(&mutex_);
+  GetCompactionContextCommon(context);
+
+  if (!live_imm_non_ttl_blob_files_.empty()) {
+    auto it = live_imm_non_ttl_blob_files_.begin();
+    std::advance(it, bdb_options_.garbage_collection_cutoff *
+                         live_imm_non_ttl_blob_files_.size());
+    context_gc->cutoff_file_number = it != live_imm_non_ttl_blob_files_.end()
+                                         ? it->first
+                                         : std::numeric_limits<uint64_t>::max();
+  }
+}
+
+void BlobDBImpl::UpdateLiveSSTSize() {
+  uint64_t live_sst_size = 0;
+  bool ok = GetIntProperty(DB::Properties::kLiveSstFilesSize, &live_sst_size);
+  if (ok) {
+    live_sst_size_.store(live_sst_size);
+    ROCKS_LOG_INFO(db_options_.info_log,
+                   "Updated total SST file size: %" PRIu64 " bytes.",
+                   live_sst_size);
+  } else {
+    ROCKS_LOG_ERROR(
+        db_options_.info_log,
+        "Failed to update total SST file size after flush or compaction.");
+  }
+  {
+    // Trigger FIFO eviction if needed.
+    MutexLock l(&write_mutex_);
+    Status s = CheckSizeAndEvictBlobFiles(0, true /*force*/);
+    if (s.IsNoSpace()) {
+      ROCKS_LOG_WARN(db_options_.info_log,
+                     "DB grow out-of-space after SST size updated. Current live"
+                     " SST size: %" PRIu64
+                     " , current blob files size: %" PRIu64 ".",
+                     live_sst_size_.load(), total_blob_size_.load());
+    }
+  }
+}
+
+Status BlobDBImpl::CheckSizeAndEvictBlobFiles(uint64_t blob_size,
+                                              bool force_evict) {
+  write_mutex_.AssertHeld();
+
+  uint64_t live_sst_size = live_sst_size_.load();
+  if (bdb_options_.max_db_size == 0 ||
+      live_sst_size + total_blob_size_.load() + blob_size <=
+          bdb_options_.max_db_size) {
+    return Status::OK();
+  }
+
+  if (bdb_options_.is_fifo == false ||
+      (!force_evict && live_sst_size + blob_size > bdb_options_.max_db_size)) {
+    // FIFO eviction is disabled, or no space to insert new blob even we evict
+    // all blob files.
+    return Status::NoSpace(
+        "Write failed, as writing it would exceed max_db_size limit.");
+  }
+
+  std::vector<std::shared_ptr<BlobFile>> candidate_files;
+  CopyBlobFiles(&candidate_files);
+  std::sort(candidate_files.begin(), candidate_files.end(),
+            BlobFileComparator());
+  fifo_eviction_seq_ = GetLatestSequenceNumber();
+
+  WriteLock l(&mutex_);
+
+  while (!candidate_files.empty() &&
+         live_sst_size + total_blob_size_.load() + blob_size >
+             bdb_options_.max_db_size) {
+    std::shared_ptr<BlobFile> blob_file = candidate_files.back();
+    candidate_files.pop_back();
+    WriteLock file_lock(&blob_file->mutex_);
+    if (blob_file->Obsolete()) {
+      // File already obsoleted by someone else.
+      assert(blob_file->Immutable());
+      continue;
+    }
+    // FIFO eviction can evict open blob files.
+    if (!blob_file->Immutable()) {
+      Status s = CloseBlobFile(blob_file);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+    assert(blob_file->Immutable());
+    auto expiration_range = blob_file->GetExpirationRange();
+    ROCKS_LOG_INFO(db_options_.info_log,
+                   "Evict oldest blob file since DB out of space. Current "
+                   "live SST file size: %" PRIu64 ", total blob size: %" PRIu64
+                   ", max db size: %" PRIu64 ", evicted blob file #%" PRIu64
+                   ".",
+                   live_sst_size, total_blob_size_.load(),
+                   bdb_options_.max_db_size, blob_file->BlobFileNumber());
+    ObsoleteBlobFile(blob_file, fifo_eviction_seq_, true /*update_size*/);
+    evict_expiration_up_to_ = expiration_range.first;
+    RecordTick(statistics_, BLOB_DB_FIFO_NUM_FILES_EVICTED);
+    RecordTick(statistics_, BLOB_DB_FIFO_NUM_KEYS_EVICTED,
+               blob_file->BlobCount());
+    RecordTick(statistics_, BLOB_DB_FIFO_BYTES_EVICTED,
+               blob_file->GetFileSize());
+    TEST_SYNC_POINT("BlobDBImpl::EvictOldestBlobFile:Evicted");
+  }
+  if (live_sst_size + total_blob_size_.load() + blob_size >
+      bdb_options_.max_db_size) {
+    return Status::NoSpace(
+        "Write failed, as writing it would exceed max_db_size limit.");
+  }
+  return Status::OK();
+}
+
+Status BlobDBImpl::AppendBlob(const std::shared_ptr<BlobFile>& bfile,
+                              const std::string& headerbuf, const Slice& key,
+                              const Slice& value, uint64_t expiration,
+                              std::string* index_entry) {
+  Status s;
+  uint64_t blob_offset = 0;
+  uint64_t key_offset = 0;
+  {
+    WriteLock lockbfile_w(&bfile->mutex_);
+    std::shared_ptr<BlobLogWriter> writer;
+    s = CheckOrCreateWriterLocked(bfile, &writer);
+    if (!s.ok()) {
+      return s;
+    }
+
+    // write the blob to the blob log.
+    s = writer->EmitPhysicalRecord(headerbuf, key, value, &key_offset,
+                                   &blob_offset);
+  }
+
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(db_options_.info_log,
+                    "Invalid status in AppendBlob: %s status: '%s'",
+                    bfile->PathName().c_str(), s.ToString().c_str());
+    return s;
+  }
+
+  uint64_t size_put = headerbuf.size() + key.size() + value.size();
+  bfile->BlobRecordAdded(size_put);
+  total_blob_size_ += size_put;
+
+  if (expiration == kNoExpiration) {
+    BlobIndex::EncodeBlob(index_entry, bfile->BlobFileNumber(), blob_offset,
+                          value.size(), bdb_options_.compression);
+  } else {
+    BlobIndex::EncodeBlobTTL(index_entry, expiration, bfile->BlobFileNumber(),
+                             blob_offset, value.size(),
+                             bdb_options_.compression);
+  }
+
+  return s;
+}
+
+std::vector<Status> BlobDBImpl::MultiGet(const ReadOptions& read_options,
+                                         const std::vector<Slice>& keys,
+                                         std::vector<std::string>* values) {
+  StopWatch multiget_sw(clock_, statistics_, BLOB_DB_MULTIGET_MICROS);
+  RecordTick(statistics_, BLOB_DB_NUM_MULTIGET);
+  // Get a snapshot to avoid blob file get deleted between we
+  // fetch and index entry and reading from the file.
+  ReadOptions ro(read_options);
+  bool snapshot_created = SetSnapshotIfNeeded(&ro);
+
+  std::vector<Status> statuses;
+  statuses.reserve(keys.size());
+  values->clear();
+  values->reserve(keys.size());
+  PinnableSlice value;
+  for (size_t i = 0; i < keys.size(); i++) {
+    statuses.push_back(Get(ro, DefaultColumnFamily(), keys[i], &value));
+    values->push_back(value.ToString());
+    value.Reset();
+  }
+  if (snapshot_created) {
+    db_->ReleaseSnapshot(ro.snapshot);
+  }
+  return statuses;
+}
+
+bool BlobDBImpl::SetSnapshotIfNeeded(ReadOptions* read_options) {
+  assert(read_options != nullptr);
+  if (read_options->snapshot != nullptr) {
+    return false;
+  }
+  read_options->snapshot = db_->GetSnapshot();
+  return true;
+}
+
+Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry,
+                                PinnableSlice* value, uint64_t* expiration) {
+  assert(value);
+
+  BlobIndex blob_index;
+  Status s = blob_index.DecodeFrom(index_entry);
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (blob_index.HasTTL() && blob_index.expiration() <= EpochNow()) {
+    return Status::NotFound("Key expired");
+  }
+
+  if (expiration != nullptr) {
+    if (blob_index.HasTTL()) {
+      *expiration = blob_index.expiration();
+    } else {
+      *expiration = kNoExpiration;
+    }
+  }
+
+  if (blob_index.IsInlined()) {
+    // TODO(yiwu): If index_entry is a PinnableSlice, we can also pin the same
+    // memory buffer to avoid extra copy.
+    value->PinSelf(blob_index.value());
+    return Status::OK();
+  }
+
+  CompressionType compression_type = kNoCompression;
+  s = GetRawBlobFromFile(key, blob_index.file_number(), blob_index.offset(),
+                         blob_index.size(), value, &compression_type);
+  if (!s.ok()) {
+    return s;
+  }
+
+  if (compression_type != kNoCompression) {
+    s = DecompressSlice(*value, compression_type, value);
+    if (!s.ok()) {
+      if (debug_level_ >= 2) {
+        ROCKS_LOG_ERROR(
+            db_options_.info_log,
+            "Uncompression error during blob read from file: %" PRIu64
+            " blob_offset: %" PRIu64 " blob_size: %" PRIu64
+            " key: %s status: '%s'",
+            blob_index.file_number(), blob_index.offset(), blob_index.size(),
+            key.ToString(/* output_hex */ true).c_str(), s.ToString().c_str());
+      }
+      return s;
+    }
+  }
+
+  return Status::OK();
+}
+
+Status BlobDBImpl::GetRawBlobFromFile(const Slice& key, uint64_t file_number,
+                                      uint64_t offset, uint64_t size,
+                                      PinnableSlice* value,
+                                      CompressionType* compression_type) {
+  assert(value);
+  assert(compression_type);
+  assert(*compression_type == kNoCompression);
+
+  if (!size) {
+    value->PinSelf("");
+    return Status::OK();
+  }
+
+  // offset has to have certain min, as we will read CRC
+  // later from the Blob Header, which needs to be also a
+  // valid offset.
+  if (offset <
+      (BlobLogHeader::kSize + BlobLogRecord::kHeaderSize + key.size())) {
+    if (debug_level_ >= 2) {
+      ROCKS_LOG_ERROR(db_options_.info_log,
+                      "Invalid blob index file_number: %" PRIu64
+                      " blob_offset: %" PRIu64 " blob_size: %" PRIu64
+                      " key: %s",
+                      file_number, offset, size,
+                      key.ToString(/* output_hex */ true).c_str());
+    }
+
+    return Status::NotFound("Invalid blob offset");
+  }
+
+  std::shared_ptr<BlobFile> blob_file;
+
+  {
+    ReadLock rl(&mutex_);
+    auto it = blob_files_.find(file_number);
+
+    // file was deleted
+    if (it == blob_files_.end()) {
+      return Status::NotFound("Blob Not Found as blob file missing");
+    }
+
+    blob_file = it->second;
+  }
+
+  *compression_type = blob_file->GetCompressionType();
+
+  // takes locks when called
+  std::shared_ptr<RandomAccessFileReader> reader;
+  Status s = GetBlobFileReader(blob_file, &reader);
+  if (!s.ok()) {
+    return s;
+  }
+
+  assert(offset >= key.size() + sizeof(uint32_t));
+  const uint64_t record_offset = offset - key.size() - sizeof(uint32_t);
+  const uint64_t record_size = sizeof(uint32_t) + key.size() + size;
+
+  // Allocate the buffer. This is safe in C++11
+  std::string buf;
+  AlignedBuf aligned_buf;
+
+  // A partial blob record contain checksum, key and value.
+  Slice blob_record;
+
+  {
+    StopWatch read_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS);
+    // TODO: rate limit old blob DB file reads.
+    if (reader->use_direct_io()) {
+      s = reader->Read(IOOptions(), record_offset,
+                       static_cast<size_t>(record_size), &blob_record, nullptr,
+                       &aligned_buf, Env::IO_TOTAL /* rate_limiter_priority */);
+    } else {
+      buf.reserve(static_cast<size_t>(record_size));
+      s = reader->Read(IOOptions(), record_offset,
+                       static_cast<size_t>(record_size), &blob_record, &buf[0],
+                       nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
+    }
+    RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, blob_record.size());
+  }
+
+  if (!s.ok()) {
+    ROCKS_LOG_DEBUG(
+        db_options_.info_log,
+        "Failed to read blob from blob file %" PRIu64 ", blob_offset: %" PRIu64
+        ", blob_size: %" PRIu64 ", key_size: %" ROCKSDB_PRIszt ", status: '%s'",
+        file_number, offset, size, key.size(), s.ToString().c_str());
+    return s;
+  }
+
+  if (blob_record.size() != record_size) {
+    ROCKS_LOG_DEBUG(
+        db_options_.info_log,
+        "Failed to read blob from blob file %" PRIu64 ", blob_offset: %" PRIu64
+        ", blob_size: %" PRIu64 ", key_size: %" ROCKSDB_PRIszt
+        ", read %" ROCKSDB_PRIszt " bytes, expected %" PRIu64 " bytes",
+        file_number, offset, size, key.size(), blob_record.size(), record_size);
+
+    return Status::Corruption("Failed to retrieve blob from blob index.");
+  }
+
+  Slice crc_slice(blob_record.data(), sizeof(uint32_t));
+  Slice blob_value(blob_record.data() + sizeof(uint32_t) + key.size(),
+                   static_cast<size_t>(size));
+
+  uint32_t crc_exp = 0;
+  if (!GetFixed32(&crc_slice, &crc_exp)) {
+    ROCKS_LOG_DEBUG(
+        db_options_.info_log,
+        "Unable to decode CRC from blob file %" PRIu64 ", blob_offset: %" PRIu64
+        ", blob_size: %" PRIu64 ", key size: %" ROCKSDB_PRIszt ", status: '%s'",
+        file_number, offset, size, key.size(), s.ToString().c_str());
+    return Status::Corruption("Unable to decode checksum.");
+  }
+
+  uint32_t crc = crc32c::Value(blob_record.data() + sizeof(uint32_t),
+                               blob_record.size() - sizeof(uint32_t));
+  crc = crc32c::Mask(crc);  // Adjust for storage
+  if (crc != crc_exp) {
+    if (debug_level_ >= 2) {
+      ROCKS_LOG_ERROR(
+          db_options_.info_log,
+          "Blob crc mismatch file: %" PRIu64 " blob_offset: %" PRIu64
+          " blob_size: %" PRIu64 " key: %s status: '%s'",
+          file_number, offset, size,
+          key.ToString(/* output_hex */ true).c_str(), s.ToString().c_str());
+    }
+
+    return Status::Corruption("Corruption. Blob CRC mismatch");
+  }
+
+  value->PinSelf(blob_value);
+
+  return Status::OK();
+}
+
+Status BlobDBImpl::Get(const ReadOptions& read_options,
+                       ColumnFamilyHandle* column_family, const Slice& key,
+                       PinnableSlice* value) {
+  return Get(read_options, column_family, key, value,
+             static_cast<uint64_t*>(nullptr) /*expiration*/);
+}
+
+Status BlobDBImpl::Get(const ReadOptions& read_options,
+                       ColumnFamilyHandle* column_family, const Slice& key,
+                       PinnableSlice* value, uint64_t* expiration) {
+  StopWatch get_sw(clock_, statistics_, BLOB_DB_GET_MICROS);
+  RecordTick(statistics_, BLOB_DB_NUM_GET);
+  return GetImpl(read_options, column_family, key, value, expiration);
+}
+
+Status BlobDBImpl::GetImpl(const ReadOptions& read_options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           PinnableSlice* value, uint64_t* expiration) {
+  if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
+    return Status::NotSupported(
+        "Blob DB doesn't support non-default column family.");
+  }
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call Get with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+  // Get a snapshot to avoid blob file get deleted between we
+  // fetch and index entry and reading from the file.
+  // TODO(yiwu): For Get() retry if file not found would be a simpler strategy.
+  ReadOptions ro(read_options);
+  bool snapshot_created = SetSnapshotIfNeeded(&ro);
+
+  PinnableSlice index_entry;
+  Status s;
+  bool is_blob_index = false;
+  DBImpl::GetImplOptions get_impl_options;
+  get_impl_options.column_family = column_family;
+  get_impl_options.value = &index_entry;
+  get_impl_options.is_blob_index = &is_blob_index;
+  s = db_impl_->GetImpl(ro, key, get_impl_options);
+  if (expiration != nullptr) {
+    *expiration = kNoExpiration;
+  }
+  RecordTick(statistics_, BLOB_DB_NUM_KEYS_READ);
+  if (s.ok()) {
+    if (is_blob_index) {
+      s = GetBlobValue(key, index_entry, value, expiration);
+    } else {
+      // The index entry is the value itself in this case.
+      value->PinSelf(index_entry);
+    }
+    RecordTick(statistics_, BLOB_DB_BYTES_READ, value->size());
+  }
+  if (snapshot_created) {
+    db_->ReleaseSnapshot(ro.snapshot);
+  }
+  return s;
+}
+
+std::pair<bool, int64_t> BlobDBImpl::SanityCheck(bool aborted) {
+  if (aborted) {
+    return std::make_pair(false, -1);
+  }
+
+  ReadLock rl(&mutex_);
+
+  ROCKS_LOG_INFO(db_options_.info_log, "Starting Sanity Check");
+  ROCKS_LOG_INFO(db_options_.info_log, "Number of files %" ROCKSDB_PRIszt,
+                 blob_files_.size());
+  ROCKS_LOG_INFO(db_options_.info_log, "Number of open files %" ROCKSDB_PRIszt,
+                 open_ttl_files_.size());
+
+  for (const auto& blob_file : open_ttl_files_) {
+    (void)blob_file;
+    assert(!blob_file->Immutable());
+  }
+
+  for (const auto& pair : live_imm_non_ttl_blob_files_) {
+    const auto& blob_file = pair.second;
+    (void)blob_file;
+    assert(!blob_file->HasTTL());
+    assert(blob_file->Immutable());
+  }
+
+  uint64_t now = EpochNow();
+
+  for (auto blob_file_pair : blob_files_) {
+    auto blob_file = blob_file_pair.second;
+    std::ostringstream buf;
+
+    buf << "Blob file " << blob_file->BlobFileNumber() << ", size "
+        << blob_file->GetFileSize() << ", blob count " << blob_file->BlobCount()
+        << ", immutable " << blob_file->Immutable();
+
+    if (blob_file->HasTTL()) {
+      ExpirationRange expiration_range;
+      {
+        ReadLock file_lock(&blob_file->mutex_);
+        expiration_range = blob_file->GetExpirationRange();
+      }
+      buf << ", expiration range (" << expiration_range.first << ", "
+          << expiration_range.second << ")";
+
+      if (!blob_file->Obsolete()) {
+        buf << ", expire in " << (expiration_range.second - now) << "seconds";
+      }
+    }
+    if (blob_file->Obsolete()) {
+      buf << ", obsolete at " << blob_file->GetObsoleteSequence();
+    }
+    buf << ".";
+    ROCKS_LOG_INFO(db_options_.info_log, "%s", buf.str().c_str());
+  }
+
+  // reschedule
+  return std::make_pair(true, -1);
+}
+
+Status BlobDBImpl::CloseBlobFile(std::shared_ptr<BlobFile> bfile) {
+  TEST_SYNC_POINT("BlobDBImpl::CloseBlobFile");
+  assert(bfile);
+  assert(!bfile->Immutable());
+  assert(!bfile->Obsolete());
+
+  if (bfile->HasTTL() || bfile == open_non_ttl_file_) {
+    write_mutex_.AssertHeld();
+  }
+
+  ROCKS_LOG_INFO(db_options_.info_log,
+                 "Closing blob file %" PRIu64 ". Path: %s",
+                 bfile->BlobFileNumber(), bfile->PathName().c_str());
+
+  const SequenceNumber sequence = GetLatestSequenceNumber();
+
+  const Status s = bfile->WriteFooterAndCloseLocked(sequence);
+
+  if (s.ok()) {
+    total_blob_size_ += BlobLogFooter::kSize;
+  } else {
+    bfile->MarkImmutable(sequence);
+
+    ROCKS_LOG_ERROR(db_options_.info_log,
+                    "Failed to close blob file %" PRIu64 "with error: %s",
+                    bfile->BlobFileNumber(), s.ToString().c_str());
+  }
+
+  if (bfile->HasTTL()) {
+    size_t erased __attribute__((__unused__));
+    erased = open_ttl_files_.erase(bfile);
+  } else {
+    if (bfile == open_non_ttl_file_) {
+      open_non_ttl_file_ = nullptr;
+    }
+
+    const uint64_t blob_file_number = bfile->BlobFileNumber();
+    auto it = live_imm_non_ttl_blob_files_.lower_bound(blob_file_number);
+    assert(it == live_imm_non_ttl_blob_files_.end() ||
+           it->first != blob_file_number);
+    live_imm_non_ttl_blob_files_.insert(
+        it, std::map<uint64_t, std::shared_ptr<BlobFile>>::value_type(
+                blob_file_number, bfile));
+  }
+
+  return s;
+}
+
+Status BlobDBImpl::CloseBlobFileIfNeeded(std::shared_ptr<BlobFile>& bfile) {
+  write_mutex_.AssertHeld();
+
+  // atomic read
+  if (bfile->GetFileSize() < bdb_options_.blob_file_size) {
+    return Status::OK();
+  }
+
+  WriteLock lock(&mutex_);
+  WriteLock file_lock(&bfile->mutex_);
+
+  assert(!bfile->Obsolete() || bfile->Immutable());
+  if (bfile->Immutable()) {
+    return Status::OK();
+  }
+
+  return CloseBlobFile(bfile);
+}
+
+void BlobDBImpl::ObsoleteBlobFile(std::shared_ptr<BlobFile> blob_file,
+                                  SequenceNumber obsolete_seq,
+                                  bool update_size) {
+  assert(blob_file->Immutable());
+  assert(!blob_file->Obsolete());
+
+  // Should hold write lock of mutex_ or during DB open.
+  blob_file->MarkObsolete(obsolete_seq);
+  obsolete_files_.push_back(blob_file);
+  assert(total_blob_size_.load() >= blob_file->GetFileSize());
+  if (update_size) {
+    total_blob_size_ -= blob_file->GetFileSize();
+  }
+}
+
+bool BlobDBImpl::VisibleToActiveSnapshot(
+    const std::shared_ptr<BlobFile>& bfile) {
+  assert(bfile->Obsolete());
+
+  // We check whether the oldest snapshot is no less than the last sequence
+  // by the time the blob file become obsolete. If so, the blob file is not
+  // visible to all existing snapshots.
+  //
+  // If we keep track of the earliest sequence of the keys in the blob file,
+  // we could instead check if there's a snapshot falls in range
+  // [earliest_sequence, obsolete_sequence). But doing so will make the
+  // implementation more complicated.
+  SequenceNumber obsolete_sequence = bfile->GetObsoleteSequence();
+  SequenceNumber oldest_snapshot = kMaxSequenceNumber;
+  {
+    // Need to lock DBImpl mutex before access snapshot list.
+    InstrumentedMutexLock l(db_impl_->mutex());
+    auto& snapshots = db_impl_->snapshots();
+    if (!snapshots.empty()) {
+      oldest_snapshot = snapshots.oldest()->GetSequenceNumber();
+    }
+  }
+  bool visible = oldest_snapshot < obsolete_sequence;
+  if (visible) {
+    ROCKS_LOG_INFO(db_options_.info_log,
+                   "Obsolete blob file %" PRIu64 " (obsolete at %" PRIu64
+                   ") visible to oldest snapshot %" PRIu64 ".",
+                   bfile->BlobFileNumber(), obsolete_sequence, oldest_snapshot);
+  }
+  return visible;
+}
+
+std::pair<bool, int64_t> BlobDBImpl::EvictExpiredFiles(bool aborted) {
+  if (aborted) {
+    return std::make_pair(false, -1);
+  }
+
+  TEST_SYNC_POINT("BlobDBImpl::EvictExpiredFiles:0");
+  TEST_SYNC_POINT("BlobDBImpl::EvictExpiredFiles:1");
+
+  std::vector<std::shared_ptr<BlobFile>> process_files;
+  uint64_t now = EpochNow();
+  {
+    ReadLock rl(&mutex_);
+    for (auto p : blob_files_) {
+      auto& blob_file = p.second;
+      ReadLock file_lock(&blob_file->mutex_);
+      if (blob_file->HasTTL() && !blob_file->Obsolete() &&
+          blob_file->GetExpirationRange().second <= now) {
+        process_files.push_back(blob_file);
+      }
+    }
+  }
+
+  TEST_SYNC_POINT("BlobDBImpl::EvictExpiredFiles:2");
+  TEST_SYNC_POINT("BlobDBImpl::EvictExpiredFiles:3");
+  TEST_SYNC_POINT_CALLBACK("BlobDBImpl::EvictExpiredFiles:cb", nullptr);
+
+  SequenceNumber seq = GetLatestSequenceNumber();
+  {
+    MutexLock l(&write_mutex_);
+    WriteLock lock(&mutex_);
+    for (auto& blob_file : process_files) {
+      WriteLock file_lock(&blob_file->mutex_);
+
+      // Need to double check if the file is obsolete.
+      if (blob_file->Obsolete()) {
+        assert(blob_file->Immutable());
+        continue;
+      }
+
+      if (!blob_file->Immutable()) {
+        CloseBlobFile(blob_file);
+      }
+
+      assert(blob_file->Immutable());
+
+      ObsoleteBlobFile(blob_file, seq, true /*update_size*/);
+    }
+  }
+
+  return std::make_pair(true, -1);
+}
+
+Status BlobDBImpl::SyncBlobFiles() {
+  MutexLock l(&write_mutex_);
+
+  std::vector<std::shared_ptr<BlobFile>> process_files;
+  {
+    ReadLock rl(&mutex_);
+    for (auto fitr : open_ttl_files_) {
+      process_files.push_back(fitr);
+    }
+    if (open_non_ttl_file_ != nullptr) {
+      process_files.push_back(open_non_ttl_file_);
+    }
+  }
+
+  Status s;
+  for (auto& blob_file : process_files) {
+    s = blob_file->Fsync();
+    if (!s.ok()) {
+      ROCKS_LOG_ERROR(db_options_.info_log,
+                      "Failed to sync blob file %" PRIu64 ", status: %s",
+                      blob_file->BlobFileNumber(), s.ToString().c_str());
+      return s;
+    }
+  }
+
+  s = dir_ent_->FsyncWithDirOptions(IOOptions(), nullptr, DirFsyncOptions());
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(db_options_.info_log,
+                    "Failed to sync blob directory, status: %s",
+                    s.ToString().c_str());
+  }
+  return s;
+}
+
+std::pair<bool, int64_t> BlobDBImpl::ReclaimOpenFiles(bool aborted) {
+  if (aborted) return std::make_pair(false, -1);
+
+  if (open_file_count_.load() < kOpenFilesTrigger) {
+    return std::make_pair(true, -1);
+  }
+
+  // in the future, we should sort by last_access_
+  // instead of closing every file
+  ReadLock rl(&mutex_);
+  for (auto const& ent : blob_files_) {
+    auto bfile = ent.second;
+    if (bfile->last_access_.load() == -1) continue;
+
+    WriteLock lockbfile_w(&bfile->mutex_);
+    CloseRandomAccessLocked(bfile);
+  }
+
+  return std::make_pair(true, -1);
+}
+
+std::pair<bool, int64_t> BlobDBImpl::DeleteObsoleteFiles(bool aborted) {
+  if (aborted) {
+    return std::make_pair(false, -1);
+  }
+
+  MutexLock delete_file_lock(&delete_file_mutex_);
+  if (disable_file_deletions_ > 0) {
+    return std::make_pair(true, -1);
+  }
+
+  std::list<std::shared_ptr<BlobFile>> tobsolete;
+  {
+    WriteLock wl(&mutex_);
+    if (obsolete_files_.empty()) {
+      return std::make_pair(true, -1);
+    }
+    tobsolete.swap(obsolete_files_);
+  }
+
+  bool file_deleted = false;
+  for (auto iter = tobsolete.begin(); iter != tobsolete.end();) {
+    auto bfile = *iter;
+    {
+      ReadLock lockbfile_r(&bfile->mutex_);
+      if (VisibleToActiveSnapshot(bfile)) {
+        ROCKS_LOG_INFO(db_options_.info_log,
+                       "Could not delete file due to snapshot failure %s",
+                       bfile->PathName().c_str());
+        ++iter;
+        continue;
+      }
+    }
+    ROCKS_LOG_INFO(db_options_.info_log,
+                   "Will delete file due to snapshot success %s",
+                   bfile->PathName().c_str());
+
+    {
+      WriteLock wl(&mutex_);
+      blob_files_.erase(bfile->BlobFileNumber());
+    }
+
+    Status s = DeleteDBFile(&(db_impl_->immutable_db_options()),
+                            bfile->PathName(), blob_dir_, true,
+                            /*force_fg=*/false);
+    if (!s.ok()) {
+      ROCKS_LOG_ERROR(db_options_.info_log,
+                      "File failed to be deleted as obsolete %s",
+                      bfile->PathName().c_str());
+      ++iter;
+      continue;
+    }
+
+    file_deleted = true;
+    ROCKS_LOG_INFO(db_options_.info_log,
+                   "File deleted as obsolete from blob dir %s",
+                   bfile->PathName().c_str());
+
+    iter = tobsolete.erase(iter);
+  }
+
+  // directory change. Fsync
+  if (file_deleted) {
+    Status s = dir_ent_->FsyncWithDirOptions(
+        IOOptions(), nullptr,
+        DirFsyncOptions(DirFsyncOptions::FsyncReason::kFileDeleted));
+    if (!s.ok()) {
+      ROCKS_LOG_ERROR(db_options_.info_log, "Failed to sync dir %s: %s",
+                      blob_dir_.c_str(), s.ToString().c_str());
+    }
+  }
+
+  // put files back into obsolete if for some reason, delete failed
+  if (!tobsolete.empty()) {
+    WriteLock wl(&mutex_);
+    for (auto bfile : tobsolete) {
+      blob_files_.insert(std::make_pair(bfile->BlobFileNumber(), bfile));
+      obsolete_files_.push_front(bfile);
+    }
+  }
+
+  return std::make_pair(!aborted, -1);
+}
+
+void BlobDBImpl::CopyBlobFiles(
+    std::vector<std::shared_ptr<BlobFile>>* bfiles_copy) {
+  ReadLock rl(&mutex_);
+  for (auto const& p : blob_files_) {
+    bfiles_copy->push_back(p.second);
+  }
+}
+
+Iterator* BlobDBImpl::NewIterator(const ReadOptions& read_options) {
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return NewErrorIterator(Status::InvalidArgument(
+        "Cannot call NewIterator with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`"));
+  }
+  auto* cfd =
+      static_cast_with_check<ColumnFamilyHandleImpl>(DefaultColumnFamily())
+          ->cfd();
+  // Get a snapshot to avoid blob file get deleted between we
+  // fetch and index entry and reading from the file.
+  ManagedSnapshot* own_snapshot = nullptr;
+  const Snapshot* snapshot = read_options.snapshot;
+  if (snapshot == nullptr) {
+    own_snapshot = new ManagedSnapshot(db_);
+    snapshot = own_snapshot->snapshot();
+  }
+  auto* iter = db_impl_->NewIteratorImpl(
+      read_options, cfd, snapshot->GetSequenceNumber(),
+      nullptr /*read_callback*/, true /*expose_blob_index*/);
+  return new BlobDBIterator(own_snapshot, iter, this, clock_, statistics_);
+}
+
+Status DestroyBlobDB(const std::string& dbname, const Options& options,
+                     const BlobDBOptions& bdb_options) {
+  const ImmutableDBOptions soptions(SanitizeOptions(dbname, options));
+  Env* env = soptions.env;
+
+  Status status;
+  std::string blobdir;
+  blobdir = (bdb_options.path_relative) ? dbname + "/" + bdb_options.blob_dir
+                                        : bdb_options.blob_dir;
+
+  std::vector<std::string> filenames;
+  if (env->GetChildren(blobdir, &filenames).ok()) {
+    for (const auto& f : filenames) {
+      uint64_t number;
+      FileType type;
+      if (ParseFileName(f, &number, &type) && type == kBlobFile) {
+        Status del = DeleteDBFile(&soptions, blobdir + "/" + f, blobdir, true,
+                                  /*force_fg=*/false);
+        if (status.ok() && !del.ok()) {
+          status = del;
+        }
+      }
+    }
+    // TODO: What to do if we cannot delete the directory?
+    env->DeleteDir(blobdir).PermitUncheckedError();
+  }
+  Status destroy = DestroyDB(dbname, options);
+  if (status.ok() && !destroy.ok()) {
+    status = destroy;
+  }
+
+  return status;
+}
+
+#ifndef NDEBUG
+Status BlobDBImpl::TEST_GetBlobValue(const Slice& key, const Slice& index_entry,
+                                     PinnableSlice* value) {
+  return GetBlobValue(key, index_entry, value);
+}
+
+void BlobDBImpl::TEST_AddDummyBlobFile(uint64_t blob_file_number,
+                                       SequenceNumber immutable_sequence) {
+  auto blob_file = std::make_shared<BlobFile>(this, blob_dir_, blob_file_number,
+                                              db_options_.info_log.get());
+  blob_file->MarkImmutable(immutable_sequence);
+
+  blob_files_[blob_file_number] = blob_file;
+  live_imm_non_ttl_blob_files_[blob_file_number] = blob_file;
+}
+
+std::vector<std::shared_ptr<BlobFile>> BlobDBImpl::TEST_GetBlobFiles() const {
+  ReadLock l(&mutex_);
+  std::vector<std::shared_ptr<BlobFile>> blob_files;
+  for (auto& p : blob_files_) {
+    blob_files.emplace_back(p.second);
+  }
+  return blob_files;
+}
+
+std::vector<std::shared_ptr<BlobFile>> BlobDBImpl::TEST_GetLiveImmNonTTLFiles()
+    const {
+  ReadLock l(&mutex_);
+  std::vector<std::shared_ptr<BlobFile>> live_imm_non_ttl_files;
+  for (const auto& pair : live_imm_non_ttl_blob_files_) {
+    live_imm_non_ttl_files.emplace_back(pair.second);
+  }
+  return live_imm_non_ttl_files;
+}
+
+std::vector<std::shared_ptr<BlobFile>> BlobDBImpl::TEST_GetObsoleteFiles()
+    const {
+  ReadLock l(&mutex_);
+  std::vector<std::shared_ptr<BlobFile>> obsolete_files;
+  for (auto& bfile : obsolete_files_) {
+    obsolete_files.emplace_back(bfile);
+  }
+  return obsolete_files;
+}
+
+void BlobDBImpl::TEST_DeleteObsoleteFiles() {
+  DeleteObsoleteFiles(false /*abort*/);
+}
+
+Status BlobDBImpl::TEST_CloseBlobFile(std::shared_ptr<BlobFile>& bfile) {
+  MutexLock l(&write_mutex_);
+  WriteLock lock(&mutex_);
+  WriteLock file_lock(&bfile->mutex_);
+
+  return CloseBlobFile(bfile);
+}
+
+void BlobDBImpl::TEST_ObsoleteBlobFile(std::shared_ptr<BlobFile>& blob_file,
+                                       SequenceNumber obsolete_seq,
+                                       bool update_size) {
+  return ObsoleteBlobFile(blob_file, obsolete_seq, update_size);
+}
+
+void BlobDBImpl::TEST_EvictExpiredFiles() {
+  EvictExpiredFiles(false /*abort*/);
+}
+
+uint64_t BlobDBImpl::TEST_live_sst_size() { return live_sst_size_.load(); }
+
+void BlobDBImpl::TEST_InitializeBlobFileToSstMapping(
+    const std::vector<LiveFileMetaData>& live_files) {
+  InitializeBlobFileToSstMapping(live_files);
+}
+
+void BlobDBImpl::TEST_ProcessFlushJobInfo(const FlushJobInfo& info) {
+  ProcessFlushJobInfo(info);
+}
+
+void BlobDBImpl::TEST_ProcessCompactionJobInfo(const CompactionJobInfo& info) {
+  ProcessCompactionJobInfo(info);
+}
+
+#endif  //  !NDEBUG
+
+}  // namespace blob_db
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_impl.h
new file mode 100644
index 0000000000..de888c0683
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_impl.h
@@ -0,0 +1,501 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <atomic>
+#include <condition_variable>
+#include <limits>
+#include <list>
+#include <memory>
+#include <set>
+#include <string>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "db/blob/blob_log_format.h"
+#include "db/blob/blob_log_writer.h"
+#include "db/db_iter.h"
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/db.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/listener.h"
+#include "rocksdb/options.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/wal_filter.h"
+#include "util/mutexlock.h"
+#include "util/timer_queue.h"
+#include "utilities/blob_db/blob_db.h"
+#include "utilities/blob_db/blob_file.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class DBImpl;
+class ColumnFamilyHandle;
+class ColumnFamilyData;
+class SystemClock;
+
+struct FlushJobInfo;
+
+namespace blob_db {
+
+struct BlobCompactionContext;
+struct BlobCompactionContextGC;
+class BlobDBImpl;
+class BlobFile;
+
+// Comparator to sort "TTL" aware Blob files based on the lower value of
+// TTL range.
+struct BlobFileComparatorTTL {
+  bool operator()(const std::shared_ptr<BlobFile>& lhs,
+                  const std::shared_ptr<BlobFile>& rhs) const;
+};
+
+struct BlobFileComparator {
+  bool operator()(const std::shared_ptr<BlobFile>& lhs,
+                  const std::shared_ptr<BlobFile>& rhs) const;
+};
+
+/**
+ * The implementation class for BlobDB. It manages the blob logs, which
+ * are sequentially written files. Blob logs can be of the TTL or non-TTL
+ * varieties; the former are cleaned up when they expire, while the latter
+ * are (optionally) garbage collected.
+ */
+class BlobDBImpl : public BlobDB {
+  friend class BlobFile;
+  friend class BlobDBIterator;
+  friend class BlobDBListener;
+  friend class BlobDBListenerGC;
+  friend class BlobIndexCompactionFilterBase;
+  friend class BlobIndexCompactionFilterGC;
+
+ public:
+  // deletions check period
+  static constexpr uint32_t kDeleteCheckPeriodMillisecs = 2 * 1000;
+
+  // sanity check task
+  static constexpr uint32_t kSanityCheckPeriodMillisecs = 20 * 60 * 1000;
+
+  // how many random access open files can we tolerate
+  static constexpr uint32_t kOpenFilesTrigger = 100;
+
+  // how often to schedule reclaim open files.
+  static constexpr uint32_t kReclaimOpenFilesPeriodMillisecs = 1 * 1000;
+
+  // how often to schedule delete obs files periods
+  static constexpr uint32_t kDeleteObsoleteFilesPeriodMillisecs = 10 * 1000;
+
+  // how often to schedule expired files eviction.
+  static constexpr uint32_t kEvictExpiredFilesPeriodMillisecs = 10 * 1000;
+
+  // when should oldest file be evicted:
+  // on reaching 90% of blob_dir_size
+  static constexpr double kEvictOldestFileAtSize = 0.9;
+
+  using BlobDB::Put;
+  Status Put(const WriteOptions& options, const Slice& key,
+             const Slice& value) override;
+
+  using BlobDB::Get;
+  Status Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family,
+             const Slice& key, PinnableSlice* value) override;
+
+  Status Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family,
+             const Slice& key, PinnableSlice* value,
+             uint64_t* expiration) override;
+
+  using BlobDB::NewIterator;
+  virtual Iterator* NewIterator(const ReadOptions& read_options) override;
+
+  using BlobDB::NewIterators;
+  virtual Status NewIterators(
+      const ReadOptions& /*read_options*/,
+      const std::vector<ColumnFamilyHandle*>& /*column_families*/,
+      std::vector<Iterator*>* /*iterators*/) override {
+    return Status::NotSupported("Not implemented");
+  }
+
+  using BlobDB::MultiGet;
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& read_options, const std::vector<Slice>& keys,
+      std::vector<std::string>* values) override;
+
+  using BlobDB::Write;
+  virtual Status Write(const WriteOptions& opts, WriteBatch* updates) override;
+
+  virtual Status Close() override;
+
+  using BlobDB::PutWithTTL;
+  Status PutWithTTL(const WriteOptions& options, const Slice& key,
+                    const Slice& value, uint64_t ttl) override;
+
+  using BlobDB::PutUntil;
+  Status PutUntil(const WriteOptions& options, const Slice& key,
+                  const Slice& value, uint64_t expiration) override;
+
+  using BlobDB::CompactFiles;
+  Status CompactFiles(
+      const CompactionOptions& compact_options,
+      const std::vector<std::string>& input_file_names, const int output_level,
+      const int output_path_id = -1,
+      std::vector<std::string>* const output_file_names = nullptr,
+      CompactionJobInfo* compaction_job_info = nullptr) override;
+
+  BlobDBOptions GetBlobDBOptions() const override;
+
+  BlobDBImpl(const std::string& dbname, const BlobDBOptions& bdb_options,
+             const DBOptions& db_options,
+             const ColumnFamilyOptions& cf_options);
+
+  virtual Status DisableFileDeletions() override;
+
+  virtual Status EnableFileDeletions(bool force) override;
+
+  virtual Status GetLiveFiles(std::vector<std::string>&,
+                              uint64_t* manifest_file_size,
+                              bool flush_memtable = true) override;
+  virtual void GetLiveFilesMetaData(std::vector<LiveFileMetaData>*) override;
+
+  ~BlobDBImpl();
+
+  Status Open(std::vector<ColumnFamilyHandle*>* handles);
+
+  Status SyncBlobFiles() override;
+
+  // Common part of the two GetCompactionContext methods below.
+  // REQUIRES: read lock on mutex_
+  void GetCompactionContextCommon(BlobCompactionContext* context);
+
+  void GetCompactionContext(BlobCompactionContext* context);
+  void GetCompactionContext(BlobCompactionContext* context,
+                            BlobCompactionContextGC* context_gc);
+
+#ifndef NDEBUG
+  Status TEST_GetBlobValue(const Slice& key, const Slice& index_entry,
+                           PinnableSlice* value);
+
+  void TEST_AddDummyBlobFile(uint64_t blob_file_number,
+                             SequenceNumber immutable_sequence);
+
+  std::vector<std::shared_ptr<BlobFile>> TEST_GetBlobFiles() const;
+
+  std::vector<std::shared_ptr<BlobFile>> TEST_GetLiveImmNonTTLFiles() const;
+
+  std::vector<std::shared_ptr<BlobFile>> TEST_GetObsoleteFiles() const;
+
+  Status TEST_CloseBlobFile(std::shared_ptr<BlobFile>& bfile);
+
+  void TEST_ObsoleteBlobFile(std::shared_ptr<BlobFile>& blob_file,
+                             SequenceNumber obsolete_seq = 0,
+                             bool update_size = true);
+
+  void TEST_EvictExpiredFiles();
+
+  void TEST_DeleteObsoleteFiles();
+
+  uint64_t TEST_live_sst_size();
+
+  const std::string& TEST_blob_dir() const { return blob_dir_; }
+
+  void TEST_InitializeBlobFileToSstMapping(
+      const std::vector<LiveFileMetaData>& live_files);
+
+  void TEST_ProcessFlushJobInfo(const FlushJobInfo& info);
+
+  void TEST_ProcessCompactionJobInfo(const CompactionJobInfo& info);
+
+#endif  //  !NDEBUG
+
+ private:
+  class BlobInserter;
+
+  // Create a snapshot if there isn't one in read options.
+  // Return true if a snapshot is created.
+  bool SetSnapshotIfNeeded(ReadOptions* read_options);
+
+  Status GetImpl(const ReadOptions& read_options,
+                 ColumnFamilyHandle* column_family, const Slice& key,
+                 PinnableSlice* value, uint64_t* expiration = nullptr);
+
+  Status GetBlobValue(const Slice& key, const Slice& index_entry,
+                      PinnableSlice* value, uint64_t* expiration = nullptr);
+
+  Status GetRawBlobFromFile(const Slice& key, uint64_t file_number,
+                            uint64_t offset, uint64_t size,
+                            PinnableSlice* value,
+                            CompressionType* compression_type);
+
+  Slice GetCompressedSlice(const Slice& raw,
+                           std::string* compression_output) const;
+
+  Status DecompressSlice(const Slice& compressed_value,
+                         CompressionType compression_type,
+                         PinnableSlice* value_output) const;
+
+  // Close a file by appending a footer, and removes file from open files list.
+  // REQUIRES: lock held on write_mutex_, write lock held on both the db mutex_
+  // and the blob file's mutex_. If called on a blob file which is visible only
+  // to a single thread (like in the case of new files written during
+  // compaction/GC), the locks on write_mutex_ and the blob file's mutex_ can be
+  // avoided.
+  Status CloseBlobFile(std::shared_ptr<BlobFile> bfile);
+
+  // Close a file if its size exceeds blob_file_size
+  // REQUIRES: lock held on write_mutex_.
+  Status CloseBlobFileIfNeeded(std::shared_ptr<BlobFile>& bfile);
+
+  // Mark file as obsolete and move the file to obsolete file list.
+  //
+  // REQUIRED: hold write lock of mutex_ or during DB open.
+  void ObsoleteBlobFile(std::shared_ptr<BlobFile> blob_file,
+                        SequenceNumber obsolete_seq, bool update_size);
+
+  Status PutBlobValue(const WriteOptions& options, const Slice& key,
+                      const Slice& value, uint64_t expiration,
+                      WriteBatch* batch);
+
+  Status AppendBlob(const std::shared_ptr<BlobFile>& bfile,
+                    const std::string& headerbuf, const Slice& key,
+                    const Slice& value, uint64_t expiration,
+                    std::string* index_entry);
+
+  // Create a new blob file and associated writer.
+  Status CreateBlobFileAndWriter(bool has_ttl,
+                                 const ExpirationRange& expiration_range,
+                                 const std::string& reason,
+                                 std::shared_ptr<BlobFile>* blob_file,
+                                 std::shared_ptr<BlobLogWriter>* writer);
+
+  // Get the open non-TTL blob log file, or create a new one if no such file
+  // exists.
+  Status SelectBlobFile(std::shared_ptr<BlobFile>* blob_file);
+
+  // Get the open TTL blob log file for a certain expiration, or create a new
+  // one if no such file exists.
+  Status SelectBlobFileTTL(uint64_t expiration,
+                           std::shared_ptr<BlobFile>* blob_file);
+
+  std::shared_ptr<BlobFile> FindBlobFileLocked(uint64_t expiration) const;
+
+  // periodic sanity check. Bunch of checks
+  std::pair<bool, int64_t> SanityCheck(bool aborted);
+
+  // Delete files that have been marked obsolete (either because of TTL
+  // or GC). Check whether any snapshots exist which refer to the same.
+  std::pair<bool, int64_t> DeleteObsoleteFiles(bool aborted);
+
+  // periodically check if open blob files and their TTL's has expired
+  // if expired, close the sequential writer and make the file immutable
+  std::pair<bool, int64_t> EvictExpiredFiles(bool aborted);
+
+  // if the number of open files, approaches ULIMIT's this
+  // task will close random readers, which are kept around for
+  // efficiency
+  std::pair<bool, int64_t> ReclaimOpenFiles(bool aborted);
+
+  std::pair<bool, int64_t> RemoveTimerQ(TimerQueue* tq, bool aborted);
+
+  // Adds the background tasks to the timer queue
+  void StartBackgroundTasks();
+
+  // add a new Blob File
+  std::shared_ptr<BlobFile> NewBlobFile(bool has_ttl,
+                                        const ExpirationRange& expiration_range,
+                                        const std::string& reason);
+
+  // Register a new blob file.
+  // REQUIRES: write lock on mutex_.
+  void RegisterBlobFile(std::shared_ptr<BlobFile> blob_file);
+
+  // collect all the blob log files from the blob directory
+  Status GetAllBlobFiles(std::set<uint64_t>* file_numbers);
+
+  // Open all blob files found in blob_dir.
+  Status OpenAllBlobFiles();
+
+  // Link an SST to a blob file. Comes in locking and non-locking varieties
+  // (the latter is used during Open).
+  template <typename Linker>
+  void LinkSstToBlobFileImpl(uint64_t sst_file_number,
+                             uint64_t blob_file_number, Linker linker);
+
+  void LinkSstToBlobFile(uint64_t sst_file_number, uint64_t blob_file_number);
+
+  void LinkSstToBlobFileNoLock(uint64_t sst_file_number,
+                               uint64_t blob_file_number);
+
+  // Unlink an SST from a blob file.
+  void UnlinkSstFromBlobFile(uint64_t sst_file_number,
+                             uint64_t blob_file_number);
+
+  // Initialize the mapping between blob files and SSTs during Open.
+  void InitializeBlobFileToSstMapping(
+      const std::vector<LiveFileMetaData>& live_files);
+
+  // Update the mapping between blob files and SSTs after a flush and mark
+  // any unneeded blob files obsolete.
+  void ProcessFlushJobInfo(const FlushJobInfo& info);
+
+  // Update the mapping between blob files and SSTs after a compaction and
+  // mark any unneeded blob files obsolete.
+  void ProcessCompactionJobInfo(const CompactionJobInfo& info);
+
+  // Mark an immutable non-TTL blob file obsolete assuming it has no more SSTs
+  // linked to it, and all memtables from before the blob file became immutable
+  // have been flushed. Note: should only be called if the condition holds for
+  // all lower-numbered non-TTL blob files as well.
+  bool MarkBlobFileObsoleteIfNeeded(const std::shared_ptr<BlobFile>& blob_file,
+                                    SequenceNumber obsolete_seq);
+
+  // Mark all immutable non-TTL blob files that aren't needed by any SSTs as
+  // obsolete. Comes in two varieties; the version used during Open need not
+  // worry about locking or snapshots.
+  template <class Functor>
+  void MarkUnreferencedBlobFilesObsoleteImpl(Functor mark_if_needed);
+
+  void MarkUnreferencedBlobFilesObsolete();
+  void MarkUnreferencedBlobFilesObsoleteDuringOpen();
+
+  void UpdateLiveSSTSize();
+
+  Status GetBlobFileReader(const std::shared_ptr<BlobFile>& blob_file,
+                           std::shared_ptr<RandomAccessFileReader>* reader);
+
+  // hold write mutex on file and call.
+  // Close the above Random Access reader
+  void CloseRandomAccessLocked(const std::shared_ptr<BlobFile>& bfile);
+
+  // hold write mutex on file and call
+  // creates a sequential (append) writer for this blobfile
+  Status CreateWriterLocked(const std::shared_ptr<BlobFile>& bfile);
+
+  // returns a BlobLogWriter object for the file. If writer is not
+  // already present, creates one. Needs Write Mutex to be held
+  Status CheckOrCreateWriterLocked(const std::shared_ptr<BlobFile>& blob_file,
+                                   std::shared_ptr<BlobLogWriter>* writer);
+
+  // checks if there is no snapshot which is referencing the
+  // blobs
+  bool VisibleToActiveSnapshot(const std::shared_ptr<BlobFile>& file);
+  bool FileDeleteOk_SnapshotCheckLocked(const std::shared_ptr<BlobFile>& bfile);
+
+  void CopyBlobFiles(std::vector<std::shared_ptr<BlobFile>>* bfiles_copy);
+
+  uint64_t EpochNow() { return clock_->NowMicros() / 1000000; }
+
+  // Check if inserting a new blob will make DB grow out of space.
+  // If is_fifo = true, FIFO eviction will be triggered to make room for the
+  // new blob. If force_evict = true, FIFO eviction will evict blob files
+  // even eviction will not make enough room for the new blob.
+  Status CheckSizeAndEvictBlobFiles(uint64_t blob_size,
+                                    bool force_evict = false);
+
+  // name of the database directory
+  std::string dbname_;
+
+  // the base DB
+  DBImpl* db_impl_;
+  Env* env_;
+  SystemClock* clock_;
+  // the options that govern the behavior of Blob Storage
+  BlobDBOptions bdb_options_;
+  DBOptions db_options_;
+  ColumnFamilyOptions cf_options_;
+  FileOptions file_options_;
+
+  // Raw pointer of statistic. db_options_ has a std::shared_ptr to hold
+  // ownership.
+  Statistics* statistics_;
+
+  // by default this is "blob_dir" under dbname_
+  // but can be configured
+  std::string blob_dir_;
+
+  // pointer to directory
+  std::unique_ptr<FSDirectory> dir_ent_;
+
+  // Read Write Mutex, which protects all the data structures
+  // HEAVILY TRAFFICKED
+  mutable port::RWMutex mutex_;
+
+  // Writers has to hold write_mutex_ before writing.
+  mutable port::Mutex write_mutex_;
+
+  // counter for blob file number
+  std::atomic<uint64_t> next_file_number_;
+
+  // entire metadata of all the BLOB files memory
+  std::map<uint64_t, std::shared_ptr<BlobFile>> blob_files_;
+
+  // All live immutable non-TTL blob files.
+  std::map<uint64_t, std::shared_ptr<BlobFile>> live_imm_non_ttl_blob_files_;
+
+  // The largest sequence number that has been flushed.
+  SequenceNumber flush_sequence_;
+
+  // opened non-TTL blob file.
+  std::shared_ptr<BlobFile> open_non_ttl_file_;
+
+  // all the blob files which are currently being appended to based
+  // on variety of incoming TTL's
+  std::set<std::shared_ptr<BlobFile>, BlobFileComparatorTTL> open_ttl_files_;
+
+  // Flag to check whether Close() has been called on this DB
+  bool closed_;
+
+  // timer based queue to execute tasks
+  TimerQueue tqueue_;
+
+  // number of files opened for random access/GET
+  // counter is used to monitor and close excess RA files.
+  std::atomic<uint32_t> open_file_count_;
+
+  // Total size of all live blob files (i.e. exclude obsolete files).
+  std::atomic<uint64_t> total_blob_size_;
+
+  // total size of SST files.
+  std::atomic<uint64_t> live_sst_size_;
+
+  // Latest FIFO eviction timestamp
+  //
+  // REQUIRES: access with metex_ lock held.
+  uint64_t fifo_eviction_seq_;
+
+  // The expiration up to which latest FIFO eviction evicts.
+  //
+  // REQUIRES: access with metex_ lock held.
+  uint64_t evict_expiration_up_to_;
+
+  std::list<std::shared_ptr<BlobFile>> obsolete_files_;
+
+  // DeleteObsoleteFiles, DiableFileDeletions and EnableFileDeletions block
+  // on the mutex to avoid contention.
+  //
+  // While DeleteObsoleteFiles hold both mutex_ and delete_file_mutex_, note
+  // the difference. mutex_ only needs to be held when access the
+  // data-structure, and delete_file_mutex_ needs to be held the whole time
+  // during DeleteObsoleteFiles to avoid being run simultaneously with
+  // DisableFileDeletions.
+  //
+  // If both of mutex_ and delete_file_mutex_ needs to be held, it is adviced
+  // to hold delete_file_mutex_ first to avoid deadlock.
+  mutable port::Mutex delete_file_mutex_;
+
+  // Each call of DisableFileDeletions will increase disable_file_deletion_
+  // by 1. EnableFileDeletions will either decrease the count by 1 or reset
+  // it to zeor, depending on the force flag.
+  //
+  // REQUIRES: access with delete_file_mutex_ held.
+  int disable_file_deletions_ = 0;
+
+  uint32_t debug_level_;
+};
+
+}  // namespace blob_db
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_impl_filesnapshot.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_impl_filesnapshot.cc
new file mode 100644
index 0000000000..da2d02d07a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_impl_filesnapshot.cc
@@ -0,0 +1,111 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "file/filename.h"
+#include "logging/logging.h"
+#include "util/cast_util.h"
+#include "util/mutexlock.h"
+#include "utilities/blob_db/blob_db_impl.h"
+
+// BlobDBImpl methods to get snapshot of files, e.g. for replication.
+
+namespace ROCKSDB_NAMESPACE {
+namespace blob_db {
+
+Status BlobDBImpl::DisableFileDeletions() {
+  // Disable base DB file deletions.
+  Status s = db_impl_->DisableFileDeletions();
+  if (!s.ok()) {
+    return s;
+  }
+
+  int count = 0;
+  {
+    // Hold delete_file_mutex_ to make sure no DeleteObsoleteFiles job
+    // is running.
+    MutexLock l(&delete_file_mutex_);
+    count = ++disable_file_deletions_;
+  }
+
+  ROCKS_LOG_INFO(db_options_.info_log,
+                 "Disabled blob file deletions. count: %d", count);
+  return Status::OK();
+}
+
+Status BlobDBImpl::EnableFileDeletions(bool force) {
+  // Enable base DB file deletions.
+  Status s = db_impl_->EnableFileDeletions(force);
+  if (!s.ok()) {
+    return s;
+  }
+
+  int count = 0;
+  {
+    MutexLock l(&delete_file_mutex_);
+    if (force) {
+      disable_file_deletions_ = 0;
+    } else if (disable_file_deletions_ > 0) {
+      count = --disable_file_deletions_;
+    }
+    assert(count >= 0);
+  }
+
+  ROCKS_LOG_INFO(db_options_.info_log, "Enabled blob file deletions. count: %d",
+                 count);
+  // Consider trigger DeleteobsoleteFiles once after re-enabled, if we are to
+  // make DeleteobsoleteFiles re-run interval configuration.
+  return Status::OK();
+}
+
+Status BlobDBImpl::GetLiveFiles(std::vector<std::string>& ret,
+                                uint64_t* manifest_file_size,
+                                bool flush_memtable) {
+  if (!bdb_options_.path_relative) {
+    return Status::NotSupported(
+        "Not able to get relative blob file path from absolute blob_dir.");
+  }
+  // Hold a lock in the beginning to avoid updates to base DB during the call
+  ReadLock rl(&mutex_);
+  Status s = db_->GetLiveFiles(ret, manifest_file_size, flush_memtable);
+  if (!s.ok()) {
+    return s;
+  }
+  ret.reserve(ret.size() + blob_files_.size());
+  for (auto bfile_pair : blob_files_) {
+    auto blob_file = bfile_pair.second;
+    // Path should be relative to db_name, but begin with slash.
+    ret.emplace_back(
+        BlobFileName("", bdb_options_.blob_dir, blob_file->BlobFileNumber()));
+  }
+  return Status::OK();
+}
+
+void BlobDBImpl::GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) {
+  // Path should be relative to db_name.
+  assert(bdb_options_.path_relative);
+  // Hold a lock in the beginning to avoid updates to base DB during the call
+  ReadLock rl(&mutex_);
+  db_->GetLiveFilesMetaData(metadata);
+  for (auto bfile_pair : blob_files_) {
+    auto blob_file = bfile_pair.second;
+    LiveFileMetaData filemetadata;
+    filemetadata.size = blob_file->GetFileSize();
+    const uint64_t file_number = blob_file->BlobFileNumber();
+    // Path should be relative to db_name, but begin with slash.
+    filemetadata.name = BlobFileName("", bdb_options_.blob_dir, file_number);
+    filemetadata.file_number = file_number;
+    if (blob_file->HasTTL()) {
+      filemetadata.oldest_ancester_time = blob_file->GetExpirationRange().first;
+    }
+    auto cfh =
+        static_cast_with_check<ColumnFamilyHandleImpl>(DefaultColumnFamily());
+    filemetadata.column_family_name = cfh->GetName();
+    metadata->emplace_back(filemetadata);
+  }
+}
+
+}  // namespace blob_db
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_iterator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_iterator.h
new file mode 100644
index 0000000000..4898ddfd76
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_iterator.h
@@ -0,0 +1,148 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "db/arena_wrapped_db_iter.h"
+#include "rocksdb/iterator.h"
+#include "util/stop_watch.h"
+#include "utilities/blob_db/blob_db_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Statistics;
+class SystemClock;
+
+namespace blob_db {
+
+using ROCKSDB_NAMESPACE::ManagedSnapshot;
+
+class BlobDBIterator : public Iterator {
+ public:
+  BlobDBIterator(ManagedSnapshot* snapshot, ArenaWrappedDBIter* iter,
+                 BlobDBImpl* blob_db, SystemClock* clock,
+                 Statistics* statistics)
+      : snapshot_(snapshot),
+        iter_(iter),
+        blob_db_(blob_db),
+        clock_(clock),
+        statistics_(statistics) {}
+
+  virtual ~BlobDBIterator() = default;
+
+  bool Valid() const override {
+    if (!iter_->Valid()) {
+      return false;
+    }
+    return status_.ok();
+  }
+
+  Status status() const override {
+    if (!iter_->status().ok()) {
+      return iter_->status();
+    }
+    return status_;
+  }
+
+  void SeekToFirst() override {
+    StopWatch seek_sw(clock_, statistics_, BLOB_DB_SEEK_MICROS);
+    RecordTick(statistics_, BLOB_DB_NUM_SEEK);
+    iter_->SeekToFirst();
+    while (UpdateBlobValue()) {
+      iter_->Next();
+    }
+  }
+
+  void SeekToLast() override {
+    StopWatch seek_sw(clock_, statistics_, BLOB_DB_SEEK_MICROS);
+    RecordTick(statistics_, BLOB_DB_NUM_SEEK);
+    iter_->SeekToLast();
+    while (UpdateBlobValue()) {
+      iter_->Prev();
+    }
+  }
+
+  void Seek(const Slice& target) override {
+    StopWatch seek_sw(clock_, statistics_, BLOB_DB_SEEK_MICROS);
+    RecordTick(statistics_, BLOB_DB_NUM_SEEK);
+    iter_->Seek(target);
+    while (UpdateBlobValue()) {
+      iter_->Next();
+    }
+  }
+
+  void SeekForPrev(const Slice& target) override {
+    StopWatch seek_sw(clock_, statistics_, BLOB_DB_SEEK_MICROS);
+    RecordTick(statistics_, BLOB_DB_NUM_SEEK);
+    iter_->SeekForPrev(target);
+    while (UpdateBlobValue()) {
+      iter_->Prev();
+    }
+  }
+
+  void Next() override {
+    assert(Valid());
+    StopWatch next_sw(clock_, statistics_, BLOB_DB_NEXT_MICROS);
+    RecordTick(statistics_, BLOB_DB_NUM_NEXT);
+    iter_->Next();
+    while (UpdateBlobValue()) {
+      iter_->Next();
+    }
+  }
+
+  void Prev() override {
+    assert(Valid());
+    StopWatch prev_sw(clock_, statistics_, BLOB_DB_PREV_MICROS);
+    RecordTick(statistics_, BLOB_DB_NUM_PREV);
+    iter_->Prev();
+    while (UpdateBlobValue()) {
+      iter_->Prev();
+    }
+  }
+
+  Slice key() const override {
+    assert(Valid());
+    return iter_->key();
+  }
+
+  Slice value() const override {
+    assert(Valid());
+    if (!iter_->IsBlob()) {
+      return iter_->value();
+    }
+    return value_;
+  }
+
+  // Iterator::Refresh() not supported.
+
+ private:
+  // Return true if caller should continue to next value.
+  bool UpdateBlobValue() {
+    value_.Reset();
+    status_ = Status::OK();
+    if (iter_->Valid() && iter_->status().ok() && iter_->IsBlob()) {
+      Status s = blob_db_->GetBlobValue(iter_->key(), iter_->value(), &value_);
+      if (s.IsNotFound()) {
+        return true;
+      } else {
+        if (!s.ok()) {
+          status_ = s;
+        }
+        return false;
+      }
+    } else {
+      return false;
+    }
+  }
+
+  std::unique_ptr<ManagedSnapshot> snapshot_;
+  std::unique_ptr<ArenaWrappedDBIter> iter_;
+  BlobDBImpl* blob_db_;
+  SystemClock* clock_;
+  Statistics* statistics_;
+  Status status_;
+  PinnableSlice value_;
+};
+}  // namespace blob_db
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_listener.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_listener.h
new file mode 100644
index 0000000000..16aed33404
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_db_listener.h
@@ -0,0 +1,69 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <atomic>
+
+#include "rocksdb/listener.h"
+#include "util/mutexlock.h"
+#include "utilities/blob_db/blob_db_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace blob_db {
+
+class BlobDBListener : public EventListener {
+ public:
+  explicit BlobDBListener(BlobDBImpl* blob_db_impl)
+      : blob_db_impl_(blob_db_impl) {}
+
+  void OnFlushBegin(DB* /*db*/, const FlushJobInfo& /*info*/) override {
+    assert(blob_db_impl_ != nullptr);
+    blob_db_impl_->SyncBlobFiles();
+  }
+
+  void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& /*info*/) override {
+    assert(blob_db_impl_ != nullptr);
+    blob_db_impl_->UpdateLiveSSTSize();
+  }
+
+  void OnCompactionCompleted(DB* /*db*/,
+                             const CompactionJobInfo& /*info*/) override {
+    assert(blob_db_impl_ != nullptr);
+    blob_db_impl_->UpdateLiveSSTSize();
+  }
+
+  const char* Name() const override { return kClassName(); }
+  static const char* kClassName() { return "BlobDBListener"; }
+
+ protected:
+  BlobDBImpl* blob_db_impl_;
+};
+
+class BlobDBListenerGC : public BlobDBListener {
+ public:
+  explicit BlobDBListenerGC(BlobDBImpl* blob_db_impl)
+      : BlobDBListener(blob_db_impl) {}
+
+  const char* Name() const override { return kClassName(); }
+  static const char* kClassName() { return "BlobDBListenerGC"; }
+  void OnFlushCompleted(DB* db, const FlushJobInfo& info) override {
+    BlobDBListener::OnFlushCompleted(db, info);
+
+    assert(blob_db_impl_);
+    blob_db_impl_->ProcessFlushJobInfo(info);
+  }
+
+  void OnCompactionCompleted(DB* db, const CompactionJobInfo& info) override {
+    BlobDBListener::OnCompactionCompleted(db, info);
+
+    assert(blob_db_impl_);
+    blob_db_impl_->ProcessCompactionJobInfo(info);
+  }
+};
+
+}  // namespace blob_db
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_dump_tool.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_dump_tool.h
new file mode 100644
index 0000000000..12cd1bf422
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_dump_tool.h
@@ -0,0 +1,56 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "db/blob/blob_log_format.h"
+#include "file/random_access_file_reader.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace blob_db {
+
+class BlobDumpTool {
+ public:
+  enum class DisplayType {
+    kNone,
+    kRaw,
+    kHex,
+    kDetail,
+  };
+
+  BlobDumpTool();
+
+  Status Run(const std::string& filename, DisplayType show_key,
+             DisplayType show_blob, DisplayType show_uncompressed_blob,
+             bool show_summary);
+
+ private:
+  std::unique_ptr<RandomAccessFileReader> reader_;
+  std::unique_ptr<char[]> buffer_;
+  size_t buffer_size_;
+
+  Status Read(uint64_t offset, size_t size, Slice* result);
+  Status DumpBlobLogHeader(uint64_t* offset, CompressionType* compression);
+  Status DumpBlobLogFooter(uint64_t file_size, uint64_t* footer_offset);
+  Status DumpRecord(DisplayType show_key, DisplayType show_blob,
+                    DisplayType show_uncompressed_blob, bool show_summary,
+                    CompressionType compression, uint64_t* offset,
+                    uint64_t* total_records, uint64_t* total_key_size,
+                    uint64_t* total_blob_size,
+                    uint64_t* total_uncompressed_blob_size);
+  void DumpSlice(const Slice s, DisplayType type);
+
+  template <class T>
+  std::string GetString(std::pair<T, T> p);
+};
+
+}  // namespace blob_db
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_file.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_file.cc
new file mode 100644
index 0000000000..cad89f2e49
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_file.cc
@@ -0,0 +1,316 @@
+
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#include "utilities/blob_db/blob_file.h"
+
+#include <stdio.h>
+
+#include <algorithm>
+#include <cinttypes>
+#include <memory>
+
+#include "db/column_family.h"
+#include "db/db_impl/db_impl.h"
+#include "db/dbformat.h"
+#include "file/filename.h"
+#include "file/readahead_raf.h"
+#include "logging/logging.h"
+#include "utilities/blob_db/blob_db_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace blob_db {
+
+BlobFile::BlobFile(const BlobDBImpl* p, const std::string& bdir, uint64_t fn,
+                   Logger* info_log)
+    : parent_(p), path_to_dir_(bdir), file_number_(fn), info_log_(info_log) {}
+
+BlobFile::BlobFile(const BlobDBImpl* p, const std::string& bdir, uint64_t fn,
+                   Logger* info_log, uint32_t column_family_id,
+                   CompressionType compression, bool has_ttl,
+                   const ExpirationRange& expiration_range)
+    : parent_(p),
+      path_to_dir_(bdir),
+      file_number_(fn),
+      info_log_(info_log),
+      column_family_id_(column_family_id),
+      compression_(compression),
+      has_ttl_(has_ttl),
+      expiration_range_(expiration_range),
+      header_(column_family_id, compression, has_ttl, expiration_range),
+      header_valid_(true) {}
+
+BlobFile::~BlobFile() {
+  if (obsolete_) {
+    std::string pn(PathName());
+    Status s = Env::Default()->DeleteFile(PathName());
+    if (!s.ok()) {
+      // ROCKS_LOG_INFO(db_options_.info_log,
+      // "File could not be deleted %s", pn.c_str());
+    }
+  }
+}
+
+uint32_t BlobFile::GetColumnFamilyId() const { return column_family_id_; }
+
+std::string BlobFile::PathName() const {
+  return BlobFileName(path_to_dir_, file_number_);
+}
+
+std::string BlobFile::DumpState() const {
+  char str[1000];
+  snprintf(
+      str, sizeof(str),
+      "path: %s fn: %" PRIu64 " blob_count: %" PRIu64 " file_size: %" PRIu64
+      " closed: %d obsolete: %d expiration_range: (%" PRIu64 ", %" PRIu64
+      "), writer: %d reader: %d",
+      path_to_dir_.c_str(), file_number_, blob_count_.load(), file_size_.load(),
+      closed_.load(), obsolete_.load(), expiration_range_.first,
+      expiration_range_.second, (!!log_writer_), (!!ra_file_reader_));
+  return str;
+}
+
+void BlobFile::MarkObsolete(SequenceNumber sequence) {
+  assert(Immutable());
+  obsolete_sequence_ = sequence;
+  obsolete_.store(true);
+}
+
+Status BlobFile::WriteFooterAndCloseLocked(SequenceNumber sequence) {
+  BlobLogFooter footer;
+  footer.blob_count = blob_count_;
+  if (HasTTL()) {
+    footer.expiration_range = expiration_range_;
+  }
+
+  // this will close the file and reset the Writable File Pointer.
+  Status s = log_writer_->AppendFooter(footer, /* checksum_method */ nullptr,
+                                       /* checksum_value */ nullptr);
+  if (s.ok()) {
+    closed_ = true;
+    immutable_sequence_ = sequence;
+    file_size_ += BlobLogFooter::kSize;
+  }
+  // delete the sequential writer
+  log_writer_.reset();
+  return s;
+}
+
+Status BlobFile::ReadFooter(BlobLogFooter* bf) {
+  if (file_size_ < (BlobLogHeader::kSize + BlobLogFooter::kSize)) {
+    return Status::IOError("File does not have footer", PathName());
+  }
+
+  uint64_t footer_offset = file_size_ - BlobLogFooter::kSize;
+  // assume that ra_file_reader_ is valid before we enter this
+  assert(ra_file_reader_);
+
+  Slice result;
+  std::string buf;
+  AlignedBuf aligned_buf;
+  Status s;
+  // TODO: rate limit reading footers from blob files.
+  if (ra_file_reader_->use_direct_io()) {
+    s = ra_file_reader_->Read(IOOptions(), footer_offset, BlobLogFooter::kSize,
+                              &result, nullptr, &aligned_buf,
+                              Env::IO_TOTAL /* rate_limiter_priority */);
+  } else {
+    buf.reserve(BlobLogFooter::kSize + 10);
+    s = ra_file_reader_->Read(IOOptions(), footer_offset, BlobLogFooter::kSize,
+                              &result, &buf[0], nullptr,
+                              Env::IO_TOTAL /* rate_limiter_priority */);
+  }
+  if (!s.ok()) return s;
+  if (result.size() != BlobLogFooter::kSize) {
+    // should not happen
+    return Status::IOError("EOF reached before footer");
+  }
+
+  s = bf->DecodeFrom(result);
+  return s;
+}
+
+Status BlobFile::SetFromFooterLocked(const BlobLogFooter& footer) {
+  blob_count_ = footer.blob_count;
+  expiration_range_ = footer.expiration_range;
+  closed_ = true;
+  return Status::OK();
+}
+
+Status BlobFile::Fsync() {
+  Status s;
+  if (log_writer_.get()) {
+    s = log_writer_->Sync();
+  }
+  return s;
+}
+
+void BlobFile::CloseRandomAccessLocked() {
+  ra_file_reader_.reset();
+  last_access_ = -1;
+}
+
+Status BlobFile::GetReader(Env* env, const FileOptions& file_options,
+                           std::shared_ptr<RandomAccessFileReader>* reader,
+                           bool* fresh_open) {
+  assert(reader != nullptr);
+  assert(fresh_open != nullptr);
+  *fresh_open = false;
+  int64_t current_time = 0;
+  if (env->GetCurrentTime(&current_time).ok()) {
+    last_access_.store(current_time);
+  }
+  Status s;
+
+  {
+    ReadLock lockbfile_r(&mutex_);
+    if (ra_file_reader_) {
+      *reader = ra_file_reader_;
+      return s;
+    }
+  }
+
+  WriteLock lockbfile_w(&mutex_);
+  // Double check.
+  if (ra_file_reader_) {
+    *reader = ra_file_reader_;
+    return s;
+  }
+
+  std::unique_ptr<FSRandomAccessFile> rfile;
+  s = env->GetFileSystem()->NewRandomAccessFile(PathName(), file_options,
+                                                &rfile, nullptr);
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(info_log_,
+                    "Failed to open blob file for random-read: %s status: '%s'"
+                    " exists: '%s'",
+                    PathName().c_str(), s.ToString().c_str(),
+                    env->FileExists(PathName()).ToString().c_str());
+    return s;
+  }
+
+  ra_file_reader_ =
+      std::make_shared<RandomAccessFileReader>(std::move(rfile), PathName());
+  *reader = ra_file_reader_;
+  *fresh_open = true;
+  return s;
+}
+
+Status BlobFile::ReadMetadata(const std::shared_ptr<FileSystem>& fs,
+                              const FileOptions& file_options) {
+  assert(Immutable());
+  // Get file size.
+  uint64_t file_size = 0;
+  Status s =
+      fs->GetFileSize(PathName(), file_options.io_options, &file_size, nullptr);
+  if (s.ok()) {
+    file_size_ = file_size;
+  } else {
+    ROCKS_LOG_ERROR(info_log_,
+                    "Failed to get size of blob file %" PRIu64 ", status: %s",
+                    file_number_, s.ToString().c_str());
+    return s;
+  }
+  if (file_size < BlobLogHeader::kSize) {
+    ROCKS_LOG_ERROR(
+        info_log_, "Incomplete blob file blob file %" PRIu64 ", size: %" PRIu64,
+        file_number_, file_size);
+    return Status::Corruption("Incomplete blob file header.");
+  }
+
+  // Create file reader.
+  std::unique_ptr<RandomAccessFileReader> file_reader;
+  s = RandomAccessFileReader::Create(fs, PathName(), file_options, &file_reader,
+                                     nullptr);
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(info_log_,
+                    "Failed to open blob file %" PRIu64 ", status: %s",
+                    file_number_, s.ToString().c_str());
+    return s;
+  }
+
+  // Read file header.
+  std::string header_buf;
+  AlignedBuf aligned_buf;
+  Slice header_slice;
+  // TODO: rate limit reading headers from blob files.
+  if (file_reader->use_direct_io()) {
+    s = file_reader->Read(IOOptions(), 0, BlobLogHeader::kSize, &header_slice,
+                          nullptr, &aligned_buf,
+                          Env::IO_TOTAL /* rate_limiter_priority */);
+  } else {
+    header_buf.reserve(BlobLogHeader::kSize);
+    s = file_reader->Read(IOOptions(), 0, BlobLogHeader::kSize, &header_slice,
+                          &header_buf[0], nullptr,
+                          Env::IO_TOTAL /* rate_limiter_priority */);
+  }
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(
+        info_log_, "Failed to read header of blob file %" PRIu64 ", status: %s",
+        file_number_, s.ToString().c_str());
+    return s;
+  }
+  BlobLogHeader header;
+  s = header.DecodeFrom(header_slice);
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(info_log_,
+                    "Failed to decode header of blob file %" PRIu64
+                    ", status: %s",
+                    file_number_, s.ToString().c_str());
+    return s;
+  }
+  column_family_id_ = header.column_family_id;
+  compression_ = header.compression;
+  has_ttl_ = header.has_ttl;
+  if (has_ttl_) {
+    expiration_range_ = header.expiration_range;
+  }
+  header_valid_ = true;
+
+  // Read file footer.
+  if (file_size_ < BlobLogHeader::kSize + BlobLogFooter::kSize) {
+    // OK not to have footer.
+    assert(!footer_valid_);
+    return Status::OK();
+  }
+  std::string footer_buf;
+  Slice footer_slice;
+  // TODO: rate limit reading footers from blob files.
+  if (file_reader->use_direct_io()) {
+    s = file_reader->Read(IOOptions(), file_size - BlobLogFooter::kSize,
+                          BlobLogFooter::kSize, &footer_slice, nullptr,
+                          &aligned_buf,
+                          Env::IO_TOTAL /* rate_limiter_priority */);
+  } else {
+    footer_buf.reserve(BlobLogFooter::kSize);
+    s = file_reader->Read(IOOptions(), file_size - BlobLogFooter::kSize,
+                          BlobLogFooter::kSize, &footer_slice, &footer_buf[0],
+                          nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
+  }
+  if (!s.ok()) {
+    ROCKS_LOG_ERROR(
+        info_log_, "Failed to read footer of blob file %" PRIu64 ", status: %s",
+        file_number_, s.ToString().c_str());
+    return s;
+  }
+  BlobLogFooter footer;
+  s = footer.DecodeFrom(footer_slice);
+  if (!s.ok()) {
+    // OK not to have footer.
+    assert(!footer_valid_);
+    return Status::OK();
+  }
+  blob_count_ = footer.blob_count;
+  if (has_ttl_) {
+    assert(header.expiration_range.first <= footer.expiration_range.first);
+    assert(header.expiration_range.second >= footer.expiration_range.second);
+    expiration_range_ = footer.expiration_range;
+  }
+  footer_valid_ = true;
+  return Status::OK();
+}
+
+}  // namespace blob_db
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_file.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_file.h
new file mode 100644
index 0000000000..8651c6b672
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/blob_db/blob_file.h
@@ -0,0 +1,244 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <atomic>
+#include <limits>
+#include <memory>
+#include <unordered_set>
+
+#include "db/blob/blob_log_format.h"
+#include "db/blob/blob_log_writer.h"
+#include "file/random_access_file_reader.h"
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/options.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace blob_db {
+
+class BlobDBImpl;
+
+class BlobFile {
+  friend class BlobDBImpl;
+  friend struct BlobFileComparator;
+  friend struct BlobFileComparatorTTL;
+  friend class BlobIndexCompactionFilterBase;
+  friend class BlobIndexCompactionFilterGC;
+
+ private:
+  // access to parent
+  const BlobDBImpl* parent_{nullptr};
+
+  // path to blob directory
+  std::string path_to_dir_;
+
+  // the id of the file.
+  // the above 2 are created during file creation and never changed
+  // after that
+  uint64_t file_number_{0};
+
+  // The file numbers of the SST files whose oldest blob file reference
+  // points to this blob file.
+  std::unordered_set<uint64_t> linked_sst_files_;
+
+  // Info log.
+  Logger* info_log_{nullptr};
+
+  // Column family id.
+  uint32_t column_family_id_{std::numeric_limits<uint32_t>::max()};
+
+  // Compression type of blobs in the file
+  CompressionType compression_{kNoCompression};
+
+  // If true, the keys in this file all has TTL. Otherwise all keys don't
+  // have TTL.
+  bool has_ttl_{false};
+
+  // TTL range of blobs in the file.
+  ExpirationRange expiration_range_;
+
+  // number of blobs in the file
+  std::atomic<uint64_t> blob_count_{0};
+
+  // size of the file
+  std::atomic<uint64_t> file_size_{0};
+
+  BlobLogHeader header_;
+
+  // closed_ = true implies the file is no more mutable
+  // no more blobs will be appended and the footer has been written out
+  std::atomic<bool> closed_{false};
+
+  // The latest sequence number when the file was closed/made immutable.
+  SequenceNumber immutable_sequence_{0};
+
+  // Whether the file was marked obsolete (due to either TTL or GC).
+  // obsolete_ still needs to do iterator/snapshot checks
+  std::atomic<bool> obsolete_{false};
+
+  // The last sequence number by the time the file marked as obsolete.
+  // Data in this file is visible to a snapshot taken before the sequence.
+  SequenceNumber obsolete_sequence_{0};
+
+  // Sequential/Append writer for blobs
+  std::shared_ptr<BlobLogWriter> log_writer_;
+
+  // random access file reader for GET calls
+  std::shared_ptr<RandomAccessFileReader> ra_file_reader_;
+
+  // This Read-Write mutex is per file specific and protects
+  // all the datastructures
+  mutable port::RWMutex mutex_;
+
+  // time when the random access reader was last created.
+  std::atomic<std::int64_t> last_access_{-1};
+
+  bool header_valid_{false};
+
+  bool footer_valid_{false};
+
+ public:
+  BlobFile() = default;
+
+  BlobFile(const BlobDBImpl* parent, const std::string& bdir, uint64_t fnum,
+           Logger* info_log);
+
+  BlobFile(const BlobDBImpl* parent, const std::string& bdir, uint64_t fnum,
+           Logger* info_log, uint32_t column_family_id,
+           CompressionType compression, bool has_ttl,
+           const ExpirationRange& expiration_range);
+
+  ~BlobFile();
+
+  uint32_t GetColumnFamilyId() const;
+
+  // Returns log file's absolute pathname.
+  std::string PathName() const;
+
+  // Primary identifier for blob file.
+  // once the file is created, this never changes
+  uint64_t BlobFileNumber() const { return file_number_; }
+
+  // Get the set of SST files whose oldest blob file reference points to
+  // this file.
+  const std::unordered_set<uint64_t>& GetLinkedSstFiles() const {
+    return linked_sst_files_;
+  }
+
+  // Link an SST file whose oldest blob file reference points to this file.
+  void LinkSstFile(uint64_t sst_file_number) {
+    assert(linked_sst_files_.find(sst_file_number) == linked_sst_files_.end());
+    linked_sst_files_.insert(sst_file_number);
+  }
+
+  // Unlink an SST file whose oldest blob file reference points to this file.
+  void UnlinkSstFile(uint64_t sst_file_number) {
+    auto it = linked_sst_files_.find(sst_file_number);
+    assert(it != linked_sst_files_.end());
+    linked_sst_files_.erase(it);
+  }
+
+  // the following functions are atomic, and don't need
+  // read lock
+  uint64_t BlobCount() const {
+    return blob_count_.load(std::memory_order_acquire);
+  }
+
+  std::string DumpState() const;
+
+  // if the file is not taking any more appends.
+  bool Immutable() const { return closed_.load(); }
+
+  // Mark the file as immutable.
+  // REQUIRES: write lock held, or access from single thread (on DB open).
+  void MarkImmutable(SequenceNumber sequence) {
+    closed_ = true;
+    immutable_sequence_ = sequence;
+  }
+
+  SequenceNumber GetImmutableSequence() const {
+    assert(Immutable());
+    return immutable_sequence_;
+  }
+
+  // Whether the file was marked obsolete (due to either TTL or GC).
+  bool Obsolete() const {
+    assert(Immutable() || !obsolete_.load());
+    return obsolete_.load();
+  }
+
+  // Mark file as obsolete (due to either TTL or GC). The file is not visible to
+  // snapshots with sequence greater or equal to the given sequence.
+  void MarkObsolete(SequenceNumber sequence);
+
+  SequenceNumber GetObsoleteSequence() const {
+    assert(Obsolete());
+    return obsolete_sequence_;
+  }
+
+  Status Fsync();
+
+  uint64_t GetFileSize() const {
+    return file_size_.load(std::memory_order_acquire);
+  }
+
+  // All Get functions which are not atomic, will need ReadLock on the mutex
+
+  const ExpirationRange& GetExpirationRange() const {
+    return expiration_range_;
+  }
+
+  void ExtendExpirationRange(uint64_t expiration) {
+    expiration_range_.first = std::min(expiration_range_.first, expiration);
+    expiration_range_.second = std::max(expiration_range_.second, expiration);
+  }
+
+  bool HasTTL() const { return has_ttl_; }
+
+  void SetHasTTL(bool has_ttl) { has_ttl_ = has_ttl; }
+
+  CompressionType GetCompressionType() const { return compression_; }
+
+  std::shared_ptr<BlobLogWriter> GetWriter() const { return log_writer_; }
+
+  // Read blob file header and footer. Return corruption if file header is
+  // malform or incomplete. If footer is malform or incomplete, set
+  // footer_valid_ to false and return Status::OK.
+  Status ReadMetadata(const std::shared_ptr<FileSystem>& fs,
+                      const FileOptions& file_options);
+
+  Status GetReader(Env* env, const FileOptions& file_options,
+                   std::shared_ptr<RandomAccessFileReader>* reader,
+                   bool* fresh_open);
+
+ private:
+  Status ReadFooter(BlobLogFooter* footer);
+
+  Status WriteFooterAndCloseLocked(SequenceNumber sequence);
+
+  void CloseRandomAccessLocked();
+
+  // this is used, when you are reading only the footer of a
+  // previously closed file
+  Status SetFromFooterLocked(const BlobLogFooter& footer);
+
+  void set_expiration_range(const ExpirationRange& expiration_range) {
+    expiration_range_ = expiration_range;
+  }
+
+  // The following functions are atomic, and don't need locks
+  void SetFileSize(uint64_t fs) { file_size_ = fs; }
+
+  void SetBlobCount(uint64_t bc) { blob_count_ = bc; }
+
+  void BlobRecordAdded(uint64_t record_size) {
+    ++blob_count_;
+    file_size_ += record_size;
+  }
+};
+}  // namespace blob_db
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cache_dump_load.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cache_dump_load.cc
new file mode 100644
index 0000000000..bbe02a9340
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cache_dump_load.cc
@@ -0,0 +1,67 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "rocksdb/utilities/cache_dump_load.h"
+
+#include "file/writable_file_writer.h"
+#include "port/lang.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "table/format.h"
+#include "util/crc32c.h"
+#include "utilities/cache_dump_load_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+IOStatus NewToFileCacheDumpWriter(const std::shared_ptr<FileSystem>& fs,
+                                  const FileOptions& file_opts,
+                                  const std::string& file_name,
+                                  std::unique_ptr<CacheDumpWriter>* writer) {
+  std::unique_ptr<WritableFileWriter> file_writer;
+  IOStatus io_s = WritableFileWriter::Create(fs, file_name, file_opts,
+                                             &file_writer, nullptr);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  writer->reset(new ToFileCacheDumpWriter(std::move(file_writer)));
+  return io_s;
+}
+
+IOStatus NewFromFileCacheDumpReader(const std::shared_ptr<FileSystem>& fs,
+                                    const FileOptions& file_opts,
+                                    const std::string& file_name,
+                                    std::unique_ptr<CacheDumpReader>* reader) {
+  std::unique_ptr<RandomAccessFileReader> file_reader;
+  IOStatus io_s = RandomAccessFileReader::Create(fs, file_name, file_opts,
+                                                 &file_reader, nullptr);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  reader->reset(new FromFileCacheDumpReader(std::move(file_reader)));
+  return io_s;
+}
+
+Status NewDefaultCacheDumper(const CacheDumpOptions& dump_options,
+                             const std::shared_ptr<Cache>& cache,
+                             std::unique_ptr<CacheDumpWriter>&& writer,
+                             std::unique_ptr<CacheDumper>* cache_dumper) {
+  cache_dumper->reset(
+      new CacheDumperImpl(dump_options, cache, std::move(writer)));
+  return Status::OK();
+}
+
+Status NewDefaultCacheDumpedLoader(
+    const CacheDumpOptions& dump_options,
+    const BlockBasedTableOptions& toptions,
+    const std::shared_ptr<SecondaryCache>& secondary_cache,
+    std::unique_ptr<CacheDumpReader>&& reader,
+    std::unique_ptr<CacheDumpedLoader>* cache_dump_loader) {
+  cache_dump_loader->reset(new CacheDumpedLoaderImpl(
+      dump_options, toptions, secondary_cache, std::move(reader)));
+  return Status::OK();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cache_dump_load_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cache_dump_load_impl.cc
new file mode 100644
index 0000000000..52f2a4df7d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cache_dump_load_impl.cc
@@ -0,0 +1,369 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "cache/cache_key.h"
+#include "table/block_based/block_based_table_reader.h"
+
+#include "cache/cache_entry_roles.h"
+#include "file/writable_file_writer.h"
+#include "port/lang.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/utilities/ldb_cmd.h"
+#include "table/format.h"
+#include "util/crc32c.h"
+#include "utilities/cache_dump_load_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Set the dump filter with a list of DBs. Block cache may be shared by multipe
+// DBs and we may only want to dump out the blocks belonging to certain DB(s).
+// Therefore, a filter is need to decide if the key of the block satisfy the
+// requirement.
+Status CacheDumperImpl::SetDumpFilter(std::vector<DB*> db_list) {
+  Status s = Status::OK();
+  for (size_t i = 0; i < db_list.size(); i++) {
+    assert(i < db_list.size());
+    TablePropertiesCollection ptc;
+    assert(db_list[i] != nullptr);
+    s = db_list[i]->GetPropertiesOfAllTables(&ptc);
+    if (!s.ok()) {
+      return s;
+    }
+    for (auto id = ptc.begin(); id != ptc.end(); id++) {
+      OffsetableCacheKey base;
+      // We only want to save cache entries that are portable to another
+      // DB::Open, so only save entries with stable keys.
+      bool is_stable;
+      BlockBasedTable::SetupBaseCacheKey(id->second.get(),
+                                         /*cur_db_session_id*/ "",
+                                         /*cur_file_num*/ 0, &base, &is_stable);
+      if (is_stable) {
+        Slice prefix_slice = base.CommonPrefixSlice();
+        assert(prefix_slice.size() == OffsetableCacheKey::kCommonPrefixSize);
+        prefix_filter_.insert(prefix_slice.ToString());
+      }
+    }
+  }
+  return s;
+}
+
+// This is the main function to dump out the cache block entries to the writer.
+// The writer may create a file or write to other systems. Currently, we will
+// iterate the whole block cache, get the blocks, and write them to the writer
+IOStatus CacheDumperImpl::DumpCacheEntriesToWriter() {
+  // Prepare stage, check the parameters.
+  if (cache_ == nullptr) {
+    return IOStatus::InvalidArgument("Cache is null");
+  }
+  if (writer_ == nullptr) {
+    return IOStatus::InvalidArgument("CacheDumpWriter is null");
+  }
+  // Set the system clock
+  if (options_.clock == nullptr) {
+    return IOStatus::InvalidArgument("System clock is null");
+  }
+  clock_ = options_.clock;
+
+  // Set the sequence number
+  sequence_num_ = 0;
+
+  // Dump stage, first, we write the hader
+  IOStatus io_s = WriteHeader();
+  if (!io_s.ok()) {
+    return io_s;
+  }
+
+  // Then, we iterate the block cache and dump out the blocks that are not
+  // filtered out.
+  std::string buf;
+  cache_->ApplyToAllEntries(DumpOneBlockCallBack(buf), {});
+
+  // Finally, write the footer
+  io_s = WriteFooter();
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  io_s = writer_->Close();
+  return io_s;
+}
+
+// Check if we need to filter out the block based on its key
+bool CacheDumperImpl::ShouldFilterOut(const Slice& key) {
+  if (key.size() < OffsetableCacheKey::kCommonPrefixSize) {
+    return /*filter out*/ true;
+  }
+  Slice key_prefix(key.data(), OffsetableCacheKey::kCommonPrefixSize);
+  std::string prefix = key_prefix.ToString();
+  // Filter out if not found
+  return prefix_filter_.find(prefix) == prefix_filter_.end();
+}
+
+// This is the callback function which will be applied to
+// Cache::ApplyToAllEntries. In this callback function, we will get the block
+// type, decide if the block needs to be dumped based on the filter, and write
+// the block through the provided writer. `buf` is passed in for efficiennt
+// reuse.
+std::function<void(const Slice&, Cache::ObjectPtr, size_t,
+                   const Cache::CacheItemHelper*)>
+CacheDumperImpl::DumpOneBlockCallBack(std::string& buf) {
+  return [&](const Slice& key, Cache::ObjectPtr value, size_t /*charge*/,
+             const Cache::CacheItemHelper* helper) {
+    if (helper == nullptr || helper->size_cb == nullptr ||
+        helper->saveto_cb == nullptr) {
+      // Not compatible with dumping. Skip this entry.
+      return;
+    }
+
+    CacheEntryRole role = helper->role;
+    CacheDumpUnitType type = CacheDumpUnitType::kBlockTypeMax;
+
+    switch (role) {
+      case CacheEntryRole::kDataBlock:
+        type = CacheDumpUnitType::kData;
+        break;
+      case CacheEntryRole::kFilterBlock:
+        type = CacheDumpUnitType::kFilter;
+        break;
+      case CacheEntryRole::kFilterMetaBlock:
+        type = CacheDumpUnitType::kFilterMetaBlock;
+        break;
+      case CacheEntryRole::kIndexBlock:
+        type = CacheDumpUnitType::kIndex;
+        break;
+      default:
+        // Filter out other entries
+        // FIXME? Do we need the CacheDumpUnitTypes? UncompressionDict?
+        return;
+    }
+
+    // based on the key prefix, check if the block should be filter out.
+    if (ShouldFilterOut(key)) {
+      return;
+    }
+
+    assert(type != CacheDumpUnitType::kBlockTypeMax);
+
+    // Use cache item helper to get persistable data
+    // FIXME: reduce copying
+    size_t len = helper->size_cb(value);
+    buf.assign(len, '\0');
+    Status s = helper->saveto_cb(value, /*start*/ 0, len, buf.data());
+
+    if (s.ok()) {
+      // Write it out
+      WriteBlock(type, key, buf).PermitUncheckedError();
+    }
+  };
+}
+
+// Write the block to the writer. It takes the timestamp of the
+// block being copied from block cache, block type, key, block pointer,
+// block size and block checksum as the input. When writing the dumper raw
+// block, we first create the dump unit and encoude it to a string. Then,
+// we calculate the checksum of the whole dump unit string and store it in
+// the dump unit metadata.
+// First, we write the metadata first, which is a fixed size string. Then, we
+// Append the dump unit string to the writer.
+IOStatus CacheDumperImpl::WriteBlock(CacheDumpUnitType type, const Slice& key,
+                                     const Slice& value) {
+  uint64_t timestamp = clock_->NowMicros();
+  uint32_t value_checksum = crc32c::Value(value.data(), value.size());
+
+  // First, serialize the block information in a string
+  DumpUnit dump_unit;
+  dump_unit.timestamp = timestamp;
+  dump_unit.key = key;
+  dump_unit.type = type;
+  dump_unit.value_len = value.size();
+  dump_unit.value = const_cast<char*>(value.data());
+  dump_unit.value_checksum = value_checksum;
+  std::string encoded_data;
+  CacheDumperHelper::EncodeDumpUnit(dump_unit, &encoded_data);
+
+  // Second, create the metadata, which contains a sequence number, the dump
+  // unit string checksum and the string size. The sequence number monotonically
+  // increases from 0.
+  DumpUnitMeta unit_meta;
+  unit_meta.sequence_num = sequence_num_;
+  sequence_num_++;
+  unit_meta.dump_unit_checksum =
+      crc32c::Value(encoded_data.data(), encoded_data.size());
+  unit_meta.dump_unit_size = encoded_data.size();
+  std::string encoded_meta;
+  CacheDumperHelper::EncodeDumpUnitMeta(unit_meta, &encoded_meta);
+
+  // We write the metadata first.
+  assert(writer_ != nullptr);
+  IOStatus io_s = writer_->WriteMetadata(encoded_meta);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  // followed by the dump unit.
+  return writer_->WritePacket(encoded_data);
+}
+
+// Before we write any block, we write the header first to store the cache dump
+// format version, rocksdb version, and brief intro.
+IOStatus CacheDumperImpl::WriteHeader() {
+  std::string header_key = "header";
+  std::ostringstream s;
+  s << kTraceMagic << "\t"
+    << "Cache dump format version: " << kCacheDumpMajorVersion << "."
+    << kCacheDumpMinorVersion << "\t"
+    << "RocksDB Version: " << kMajorVersion << "." << kMinorVersion << "\t"
+    << "Format: dump_unit_metadata <sequence_number, dump_unit_checksum, "
+       "dump_unit_size>, dump_unit <timestamp, key, block_type, "
+       "block_size, block_data, block_checksum> cache_value\n";
+  std::string header_value(s.str());
+  CacheDumpUnitType type = CacheDumpUnitType::kHeader;
+  return WriteBlock(type, header_key, header_value);
+}
+
+// Write the footer after all the blocks are stored to indicate the ending.
+IOStatus CacheDumperImpl::WriteFooter() {
+  std::string footer_key = "footer";
+  std::string footer_value("cache dump completed");
+  CacheDumpUnitType type = CacheDumpUnitType::kFooter;
+  return WriteBlock(type, footer_key, footer_value);
+}
+
+// This is the main function to restore the cache entries to secondary cache.
+// First, we check if all the arguments are valid. Then, we read the block
+// sequentially from the reader and insert them to the secondary cache.
+IOStatus CacheDumpedLoaderImpl::RestoreCacheEntriesToSecondaryCache() {
+  // TODO: remove this line when options are used in the loader
+  (void)options_;
+  // Step 1: we check if all the arguments are valid
+  if (secondary_cache_ == nullptr) {
+    return IOStatus::InvalidArgument("Secondary Cache is null");
+  }
+  if (reader_ == nullptr) {
+    return IOStatus::InvalidArgument("CacheDumpReader is null");
+  }
+
+  // Step 2: read the header
+  // TODO: we need to check the cache dump format version and RocksDB version
+  // after the header is read out.
+  IOStatus io_s;
+  DumpUnit dump_unit;
+  std::string data;
+  io_s = ReadHeader(&data, &dump_unit);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+
+  // Step 3: read out the rest of the blocks from the reader. The loop will stop
+  // either I/O status is not ok or we reach to the the end.
+  while (io_s.ok()) {
+    dump_unit.reset();
+    data.clear();
+    // read the content and store in the dump_unit
+    io_s = ReadCacheBlock(&data, &dump_unit);
+    if (!io_s.ok()) {
+      break;
+    }
+    if (dump_unit.type == CacheDumpUnitType::kFooter) {
+      break;
+    }
+    // Create the uncompressed_block based on the information in the dump_unit
+    // (There is no block trailer here compatible with block-based SST file.)
+    Slice content =
+        Slice(static_cast<char*>(dump_unit.value), dump_unit.value_len);
+    Status s = secondary_cache_->InsertSaved(dump_unit.key, content);
+    if (!s.ok()) {
+      io_s = status_to_io_status(std::move(s));
+    }
+  }
+  if (dump_unit.type == CacheDumpUnitType::kFooter) {
+    return IOStatus::OK();
+  } else {
+    return io_s;
+  }
+}
+
+// Read and copy the dump unit metadata to std::string data, decode and create
+// the unit metadata based on the string
+IOStatus CacheDumpedLoaderImpl::ReadDumpUnitMeta(std::string* data,
+                                                 DumpUnitMeta* unit_meta) {
+  assert(reader_ != nullptr);
+  assert(data != nullptr);
+  assert(unit_meta != nullptr);
+  IOStatus io_s = reader_->ReadMetadata(data);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  return status_to_io_status(
+      CacheDumperHelper::DecodeDumpUnitMeta(*data, unit_meta));
+}
+
+// Read and copy the dump unit to std::string data, decode and create the unit
+// based on the string
+IOStatus CacheDumpedLoaderImpl::ReadDumpUnit(size_t len, std::string* data,
+                                             DumpUnit* unit) {
+  assert(reader_ != nullptr);
+  assert(data != nullptr);
+  assert(unit != nullptr);
+  IOStatus io_s = reader_->ReadPacket(data);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  if (data->size() != len) {
+    return IOStatus::Corruption(
+        "The data being read out does not match the size stored in metadata!");
+  }
+  Slice block;
+  return status_to_io_status(CacheDumperHelper::DecodeDumpUnit(*data, unit));
+}
+
+// Read the header
+IOStatus CacheDumpedLoaderImpl::ReadHeader(std::string* data,
+                                           DumpUnit* dump_unit) {
+  DumpUnitMeta header_meta;
+  header_meta.reset();
+  std::string meta_string;
+  IOStatus io_s = ReadDumpUnitMeta(&meta_string, &header_meta);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+
+  io_s = ReadDumpUnit(header_meta.dump_unit_size, data, dump_unit);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  uint32_t unit_checksum = crc32c::Value(data->data(), data->size());
+  if (unit_checksum != header_meta.dump_unit_checksum) {
+    return IOStatus::Corruption("Read header unit corrupted!");
+  }
+  return io_s;
+}
+
+// Read the blocks after header is read out
+IOStatus CacheDumpedLoaderImpl::ReadCacheBlock(std::string* data,
+                                               DumpUnit* dump_unit) {
+  // According to the write process, we read the dump_unit_metadata first
+  DumpUnitMeta unit_meta;
+  unit_meta.reset();
+  std::string unit_string;
+  IOStatus io_s = ReadDumpUnitMeta(&unit_string, &unit_meta);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+
+  // Based on the information in the dump_unit_metadata, we read the dump_unit
+  // and verify if its content is correct.
+  io_s = ReadDumpUnit(unit_meta.dump_unit_size, data, dump_unit);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  uint32_t unit_checksum = crc32c::Value(data->data(), data->size());
+  if (unit_checksum != unit_meta.dump_unit_checksum) {
+    return IOStatus::Corruption(
+        "Checksum does not match! Read dumped unit corrupted!");
+  }
+  return io_s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cache_dump_load_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cache_dump_load_impl.h
new file mode 100644
index 0000000000..9811ff2d91
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cache_dump_load_impl.h
@@ -0,0 +1,356 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <unordered_map>
+
+#include "file/random_access_file_reader.h"
+#include "file/writable_file_writer.h"
+#include "rocksdb/utilities/cache_dump_load.h"
+#include "table/block_based/block.h"
+#include "table/block_based/block_type.h"
+#include "table/block_based/cachable_entry.h"
+#include "table/block_based/parsed_full_filter_block.h"
+#include "table/block_based/reader_common.h"
+#include "util/hash_containers.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// the read buffer size of for the default CacheDumpReader
+const unsigned int kDumpReaderBufferSize = 1024;  // 1KB
+static const unsigned int kSizePrefixLen = 4;
+
+enum CacheDumpUnitType : unsigned char {
+  kHeader = 1,
+  kFooter = 2,
+  kData = 3,
+  kFilter = 4,
+  kProperties = 5,
+  kCompressionDictionary = 6,
+  kRangeDeletion = 7,
+  kHashIndexPrefixes = 8,
+  kHashIndexMetadata = 9,
+  kMetaIndex = 10,
+  kIndex = 11,
+  kDeprecatedFilterBlock = 12,  // OBSOLETE / DEPRECATED
+  kFilterMetaBlock = 13,
+  kBlockTypeMax,
+};
+
+// The metadata of a dump unit. After it is serilized, its size is fixed 16
+// bytes.
+struct DumpUnitMeta {
+  // sequence number is a monotonically increasing number to indicate the order
+  // of the blocks being written. Header is 0.
+  uint32_t sequence_num;
+  // The Crc32c checksum of its dump unit.
+  uint32_t dump_unit_checksum;
+  // The dump unit size after the dump unit is serilized to a string.
+  uint64_t dump_unit_size;
+
+  void reset() {
+    sequence_num = 0;
+    dump_unit_checksum = 0;
+    dump_unit_size = 0;
+  }
+};
+
+// The data structure to hold a block and its information.
+struct DumpUnit {
+  // The timestamp when the block is identified, copied, and dumped from block
+  // cache
+  uint64_t timestamp;
+  // The type of the block
+  CacheDumpUnitType type;
+  // The key of this block when the block is referenced by this Cache
+  Slice key;
+  // The block size
+  size_t value_len;
+  // The Crc32c checksum of the block
+  uint32_t value_checksum;
+  // Pointer to the block. Note that, in the dump process, it points to a memory
+  // buffer copied from cache block. The buffer is freed when we process the
+  // next block. In the load process, we use an std::string to store the
+  // serialized dump_unit read from the reader. So it points to the memory
+  // address of the begin of the block in this string.
+  void* value;
+
+  DumpUnit() { reset(); }
+
+  void reset() {
+    timestamp = 0;
+    type = CacheDumpUnitType::kBlockTypeMax;
+    key.clear();
+    value_len = 0;
+    value_checksum = 0;
+    value = nullptr;
+  }
+};
+
+// The default implementation of the Cache Dumper
+class CacheDumperImpl : public CacheDumper {
+ public:
+  CacheDumperImpl(const CacheDumpOptions& dump_options,
+                  const std::shared_ptr<Cache>& cache,
+                  std::unique_ptr<CacheDumpWriter>&& writer)
+      : options_(dump_options), cache_(cache), writer_(std::move(writer)) {}
+  ~CacheDumperImpl() { writer_.reset(); }
+  Status SetDumpFilter(std::vector<DB*> db_list) override;
+  IOStatus DumpCacheEntriesToWriter() override;
+
+ private:
+  IOStatus WriteBlock(CacheDumpUnitType type, const Slice& key,
+                      const Slice& value);
+  IOStatus WriteHeader();
+  IOStatus WriteFooter();
+  bool ShouldFilterOut(const Slice& key);
+  std::function<void(const Slice&, Cache::ObjectPtr, size_t,
+                     const Cache::CacheItemHelper*)>
+  DumpOneBlockCallBack(std::string& buf);
+
+  CacheDumpOptions options_;
+  std::shared_ptr<Cache> cache_;
+  std::unique_ptr<CacheDumpWriter> writer_;
+  SystemClock* clock_;
+  uint32_t sequence_num_;
+  // The cache key prefix filter. Currently, we use db_session_id as the prefix,
+  // so using std::set to store the prefixes as filter is enough. Further
+  // improvement can be applied like BloomFilter or others to speedup the
+  // filtering.
+  std::set<std::string> prefix_filter_;
+};
+
+// The default implementation of CacheDumpedLoader
+class CacheDumpedLoaderImpl : public CacheDumpedLoader {
+ public:
+  CacheDumpedLoaderImpl(const CacheDumpOptions& dump_options,
+                        const BlockBasedTableOptions& /*toptions*/,
+                        const std::shared_ptr<SecondaryCache>& secondary_cache,
+                        std::unique_ptr<CacheDumpReader>&& reader)
+      : options_(dump_options),
+        secondary_cache_(secondary_cache),
+        reader_(std::move(reader)) {}
+  ~CacheDumpedLoaderImpl() {}
+  IOStatus RestoreCacheEntriesToSecondaryCache() override;
+
+ private:
+  IOStatus ReadDumpUnitMeta(std::string* data, DumpUnitMeta* unit_meta);
+  IOStatus ReadDumpUnit(size_t len, std::string* data, DumpUnit* unit);
+  IOStatus ReadHeader(std::string* data, DumpUnit* dump_unit);
+  IOStatus ReadCacheBlock(std::string* data, DumpUnit* dump_unit);
+
+  CacheDumpOptions options_;
+  std::shared_ptr<SecondaryCache> secondary_cache_;
+  std::unique_ptr<CacheDumpReader> reader_;
+};
+
+// The default implementation of CacheDumpWriter. We write the blocks to a file
+// sequentially.
+class ToFileCacheDumpWriter : public CacheDumpWriter {
+ public:
+  explicit ToFileCacheDumpWriter(
+      std::unique_ptr<WritableFileWriter>&& file_writer)
+      : file_writer_(std::move(file_writer)) {}
+
+  ~ToFileCacheDumpWriter() { Close().PermitUncheckedError(); }
+
+  // Write the serialized metadata to the file
+  virtual IOStatus WriteMetadata(const Slice& metadata) override {
+    assert(file_writer_ != nullptr);
+    std::string prefix;
+    PutFixed32(&prefix, static_cast<uint32_t>(metadata.size()));
+    IOStatus io_s = file_writer_->Append(Slice(prefix));
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    io_s = file_writer_->Append(metadata);
+    return io_s;
+  }
+
+  // Write the serialized data to the file
+  virtual IOStatus WritePacket(const Slice& data) override {
+    assert(file_writer_ != nullptr);
+    std::string prefix;
+    PutFixed32(&prefix, static_cast<uint32_t>(data.size()));
+    IOStatus io_s = file_writer_->Append(Slice(prefix));
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    io_s = file_writer_->Append(data);
+    return io_s;
+  }
+
+  // Reset the writer
+  virtual IOStatus Close() override {
+    file_writer_.reset();
+    return IOStatus::OK();
+  }
+
+ private:
+  std::unique_ptr<WritableFileWriter> file_writer_;
+};
+
+// The default implementation of CacheDumpReader. It is implemented based on
+// RandomAccessFileReader. Note that, we keep an internal variable to remember
+// the current offset.
+class FromFileCacheDumpReader : public CacheDumpReader {
+ public:
+  explicit FromFileCacheDumpReader(
+      std::unique_ptr<RandomAccessFileReader>&& reader)
+      : file_reader_(std::move(reader)),
+        offset_(0),
+        buffer_(new char[kDumpReaderBufferSize]) {}
+
+  ~FromFileCacheDumpReader() { delete[] buffer_; }
+
+  virtual IOStatus ReadMetadata(std::string* metadata) override {
+    uint32_t metadata_len = 0;
+    IOStatus io_s = ReadSizePrefix(&metadata_len);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    return Read(metadata_len, metadata);
+  }
+
+  virtual IOStatus ReadPacket(std::string* data) override {
+    uint32_t data_len = 0;
+    IOStatus io_s = ReadSizePrefix(&data_len);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    return Read(data_len, data);
+  }
+
+ private:
+  IOStatus ReadSizePrefix(uint32_t* len) {
+    std::string prefix;
+    IOStatus io_s = Read(kSizePrefixLen, &prefix);
+    if (!io_s.ok()) {
+      return io_s;
+    }
+    Slice encoded_slice(prefix);
+    if (!GetFixed32(&encoded_slice, len)) {
+      return IOStatus::Corruption("Decode size prefix string failed");
+    }
+    return IOStatus::OK();
+  }
+
+  IOStatus Read(size_t len, std::string* data) {
+    assert(file_reader_ != nullptr);
+    IOStatus io_s;
+
+    unsigned int bytes_to_read = static_cast<unsigned int>(len);
+    unsigned int to_read = bytes_to_read > kDumpReaderBufferSize
+                               ? kDumpReaderBufferSize
+                               : bytes_to_read;
+
+    while (to_read > 0) {
+      io_s = file_reader_->Read(IOOptions(), offset_, to_read, &result_,
+                                buffer_, nullptr,
+                                Env::IO_TOTAL /* rate_limiter_priority */);
+      if (!io_s.ok()) {
+        return io_s;
+      }
+      if (result_.size() < to_read) {
+        return IOStatus::Corruption("Corrupted cache dump file.");
+      }
+      data->append(result_.data(), result_.size());
+
+      offset_ += to_read;
+      bytes_to_read -= to_read;
+      to_read = bytes_to_read > kDumpReaderBufferSize ? kDumpReaderBufferSize
+                                                      : bytes_to_read;
+    }
+    return io_s;
+  }
+  std::unique_ptr<RandomAccessFileReader> file_reader_;
+  Slice result_;
+  size_t offset_;
+  char* buffer_;
+};
+
+// The cache dump and load helper class
+class CacheDumperHelper {
+ public:
+  // serialize the dump_unit_meta to a string, it is fixed 16 bytes size.
+  static void EncodeDumpUnitMeta(const DumpUnitMeta& meta, std::string* data) {
+    assert(data);
+    PutFixed32(data, static_cast<uint32_t>(meta.sequence_num));
+    PutFixed32(data, static_cast<uint32_t>(meta.dump_unit_checksum));
+    PutFixed64(data, meta.dump_unit_size);
+  }
+
+  // Serialize the dump_unit to a string.
+  static void EncodeDumpUnit(const DumpUnit& dump_unit, std::string* data) {
+    assert(data);
+    PutFixed64(data, dump_unit.timestamp);
+    data->push_back(dump_unit.type);
+    PutLengthPrefixedSlice(data, dump_unit.key);
+    PutFixed32(data, static_cast<uint32_t>(dump_unit.value_len));
+    PutFixed32(data, dump_unit.value_checksum);
+    PutLengthPrefixedSlice(data,
+                           Slice((char*)dump_unit.value, dump_unit.value_len));
+  }
+
+  // Deserialize the dump_unit_meta from a string
+  static Status DecodeDumpUnitMeta(const std::string& encoded_data,
+                                   DumpUnitMeta* unit_meta) {
+    assert(unit_meta != nullptr);
+    Slice encoded_slice = Slice(encoded_data);
+    if (!GetFixed32(&encoded_slice, &(unit_meta->sequence_num))) {
+      return Status::Incomplete("Decode dumped unit meta sequence_num failed");
+    }
+    if (!GetFixed32(&encoded_slice, &(unit_meta->dump_unit_checksum))) {
+      return Status::Incomplete(
+          "Decode dumped unit meta dump_unit_checksum failed");
+    }
+    if (!GetFixed64(&encoded_slice, &(unit_meta->dump_unit_size))) {
+      return Status::Incomplete(
+          "Decode dumped unit meta dump_unit_size failed");
+    }
+    return Status::OK();
+  }
+
+  // Deserialize the dump_unit from a string.
+  static Status DecodeDumpUnit(const std::string& encoded_data,
+                               DumpUnit* dump_unit) {
+    assert(dump_unit != nullptr);
+    Slice encoded_slice = Slice(encoded_data);
+
+    // Decode timestamp
+    if (!GetFixed64(&encoded_slice, &dump_unit->timestamp)) {
+      return Status::Incomplete("Decode dumped unit string failed");
+    }
+    // Decode the block type
+    dump_unit->type = static_cast<CacheDumpUnitType>(encoded_slice[0]);
+    encoded_slice.remove_prefix(1);
+    // Decode the key
+    if (!GetLengthPrefixedSlice(&encoded_slice, &(dump_unit->key))) {
+      return Status::Incomplete("Decode dumped unit string failed");
+    }
+    // Decode the value size
+    uint32_t value_len;
+    if (!GetFixed32(&encoded_slice, &value_len)) {
+      return Status::Incomplete("Decode dumped unit string failed");
+    }
+    dump_unit->value_len = static_cast<size_t>(value_len);
+    // Decode the value checksum
+    if (!GetFixed32(&encoded_slice, &(dump_unit->value_checksum))) {
+      return Status::Incomplete("Decode dumped unit string failed");
+    }
+    // Decode the block content and copy to the memory space whose pointer
+    // will be managed by the cache finally.
+    Slice block;
+    if (!GetLengthPrefixedSlice(&encoded_slice, &block)) {
+      return Status::Incomplete("Decode dumped unit string failed");
+    }
+    dump_unit->value = (void*)block.data();
+    assert(block.size() == dump_unit->value_len);
+    return Status::OK();
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/cassandra_compaction_filter.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/cassandra_compaction_filter.cc
new file mode 100644
index 0000000000..b7da2ba0cb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/cassandra_compaction_filter.cc
@@ -0,0 +1,106 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "utilities/cassandra/cassandra_compaction_filter.h"
+
+#include <string>
+
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "rocksdb/utilities/options_type.h"
+#include "utilities/cassandra/format.h"
+#include "utilities/cassandra/merge_operator.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace cassandra {
+static std::unordered_map<std::string, OptionTypeInfo>
+    cassandra_filter_type_info = {
+        {"purge_ttl_on_expiration",
+         {offsetof(struct CassandraOptions, purge_ttl_on_expiration),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"gc_grace_period_in_seconds",
+         {offsetof(struct CassandraOptions, gc_grace_period_in_seconds),
+          OptionType::kUInt32T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+};
+
+CassandraCompactionFilter::CassandraCompactionFilter(
+    bool purge_ttl_on_expiration, int32_t gc_grace_period_in_seconds)
+    : options_(gc_grace_period_in_seconds, 0, purge_ttl_on_expiration) {
+  RegisterOptions(&options_, &cassandra_filter_type_info);
+}
+
+CompactionFilter::Decision CassandraCompactionFilter::FilterV2(
+    int /*level*/, const Slice& /*key*/, ValueType value_type,
+    const Slice& existing_value, std::string* new_value,
+    std::string* /*skip_until*/) const {
+  bool value_changed = false;
+  RowValue row_value =
+      RowValue::Deserialize(existing_value.data(), existing_value.size());
+  RowValue compacted =
+      options_.purge_ttl_on_expiration
+          ? row_value.RemoveExpiredColumns(&value_changed)
+          : row_value.ConvertExpiredColumnsToTombstones(&value_changed);
+
+  if (value_type == ValueType::kValue) {
+    compacted = compacted.RemoveTombstones(options_.gc_grace_period_in_seconds);
+  }
+
+  if (compacted.Empty()) {
+    return Decision::kRemove;
+  }
+
+  if (value_changed) {
+    compacted.Serialize(new_value);
+    return Decision::kChangeValue;
+  }
+
+  return Decision::kKeep;
+}
+
+CassandraCompactionFilterFactory::CassandraCompactionFilterFactory(
+    bool purge_ttl_on_expiration, int32_t gc_grace_period_in_seconds)
+    : options_(gc_grace_period_in_seconds, 0, purge_ttl_on_expiration) {
+  RegisterOptions(&options_, &cassandra_filter_type_info);
+}
+
+std::unique_ptr<CompactionFilter>
+CassandraCompactionFilterFactory::CreateCompactionFilter(
+    const CompactionFilter::Context&) {
+  std::unique_ptr<CompactionFilter> result(new CassandraCompactionFilter(
+      options_.purge_ttl_on_expiration, options_.gc_grace_period_in_seconds));
+  return result;
+}
+
+int RegisterCassandraObjects(ObjectLibrary& library,
+                             const std::string& /*arg*/) {
+  library.AddFactory<MergeOperator>(
+      CassandraValueMergeOperator::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<MergeOperator>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new CassandraValueMergeOperator(0));
+        return guard->get();
+      });
+  library.AddFactory<CompactionFilter>(
+      CassandraCompactionFilter::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<CompactionFilter>* /*guard */,
+         std::string* /* errmsg */) {
+        return new CassandraCompactionFilter(false, 0);
+      });
+  library.AddFactory<CompactionFilterFactory>(
+      CassandraCompactionFilterFactory::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<CompactionFilterFactory>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new CassandraCompactionFilterFactory(false, 0));
+        return guard->get();
+      });
+  size_t num_types;
+  return static_cast<int>(library.GetFactoryCount(&num_types));
+}
+}  // namespace cassandra
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/cassandra_compaction_filter.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/cassandra_compaction_filter.h
new file mode 100644
index 0000000000..0325a4c395
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/cassandra_compaction_filter.h
@@ -0,0 +1,57 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <string>
+
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/slice.h"
+#include "utilities/cassandra/cassandra_options.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace cassandra {
+
+/**
+ * Compaction filter for removing expired Cassandra data with ttl.
+ * If option `purge_ttl_on_expiration` is set to true, expired data
+ * will be directly purged. Otherwise expired data will be converted
+ * tombstones first, then be eventally removed after gc grace period.
+ * `purge_ttl_on_expiration` should only be on in the case all the
+ * writes have same ttl setting, otherwise it could bring old data back.
+ *
+ * Compaction filter is also in charge of removing tombstone that has been
+ * promoted to kValue type after serials of merging in compaction.
+ */
+class CassandraCompactionFilter : public CompactionFilter {
+ public:
+  explicit CassandraCompactionFilter(bool purge_ttl_on_expiration,
+                                     int32_t gc_grace_period_in_seconds);
+  static const char* kClassName() { return "CassandraCompactionFilter"; }
+  const char* Name() const override { return kClassName(); }
+
+  virtual Decision FilterV2(int level, const Slice& key, ValueType value_type,
+                            const Slice& existing_value, std::string* new_value,
+                            std::string* skip_until) const override;
+
+ private:
+  CassandraOptions options_;
+};
+
+class CassandraCompactionFilterFactory : public CompactionFilterFactory {
+ public:
+  explicit CassandraCompactionFilterFactory(bool purge_ttl_on_expiration,
+                                            int32_t gc_grace_period_in_seconds);
+  ~CassandraCompactionFilterFactory() override {}
+
+  std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+      const CompactionFilter::Context& context) override;
+  static const char* kClassName() { return "CassandraCompactionFilterFactory"; }
+  const char* Name() const override { return kClassName(); }
+
+ private:
+  CassandraOptions options_;
+};
+}  // namespace cassandra
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/cassandra_options.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/cassandra_options.h
new file mode 100644
index 0000000000..e0527aada6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/cassandra_options.h
@@ -0,0 +1,41 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <string>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+class ObjectLibrary;
+namespace cassandra {
+struct CassandraOptions {
+  static const char* kName() { return "CassandraOptions"; }
+  CassandraOptions(int32_t _gc_grace_period_in_seconds, size_t _operands_limit,
+                   bool _purge_ttl_on_expiration = false)
+      : operands_limit(_operands_limit),
+        gc_grace_period_in_seconds(_gc_grace_period_in_seconds),
+        purge_ttl_on_expiration(_purge_ttl_on_expiration) {}
+  // Limit on the number of merge operands.
+  size_t operands_limit;
+
+  // How long (in seconds) tombstoned data remains before it is purged
+  int32_t gc_grace_period_in_seconds;
+
+  // If is set to true, expired data will be directly purged.
+  // Otherwise expired data will be converted tombstones first,
+  // then be eventually removed after gc grace period. This value should
+  // only true if all writes have same ttl setting, otherwise it could bring old
+  // data back.
+  bool purge_ttl_on_expiration;
+};
+extern "C" {
+int RegisterCassandraObjects(ObjectLibrary& library, const std::string& arg);
+}  // extern "C"
+}  // namespace cassandra
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/format.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/format.cc
new file mode 100644
index 0000000000..cc1dd2f280
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/format.cc
@@ -0,0 +1,367 @@
+// Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "format.h"
+
+#include <algorithm>
+#include <map>
+#include <memory>
+
+#include "utilities/cassandra/serialize.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace cassandra {
+namespace {
+const int32_t kDefaultLocalDeletionTime = std::numeric_limits<int32_t>::max();
+const int64_t kDefaultMarkedForDeleteAt = std::numeric_limits<int64_t>::min();
+}  // namespace
+
+ColumnBase::ColumnBase(int8_t mask, int8_t index)
+    : mask_(mask), index_(index) {}
+
+std::size_t ColumnBase::Size() const { return sizeof(mask_) + sizeof(index_); }
+
+int8_t ColumnBase::Mask() const { return mask_; }
+
+int8_t ColumnBase::Index() const { return index_; }
+
+void ColumnBase::Serialize(std::string* dest) const {
+  ROCKSDB_NAMESPACE::cassandra::Serialize<int8_t>(mask_, dest);
+  ROCKSDB_NAMESPACE::cassandra::Serialize<int8_t>(index_, dest);
+}
+
+std::shared_ptr<ColumnBase> ColumnBase::Deserialize(const char* src,
+                                                    std::size_t offset) {
+  int8_t mask = ROCKSDB_NAMESPACE::cassandra::Deserialize<int8_t>(src, offset);
+  if ((mask & ColumnTypeMask::DELETION_MASK) != 0) {
+    return Tombstone::Deserialize(src, offset);
+  } else if ((mask & ColumnTypeMask::EXPIRATION_MASK) != 0) {
+    return ExpiringColumn::Deserialize(src, offset);
+  } else {
+    return Column::Deserialize(src, offset);
+  }
+}
+
+Column::Column(int8_t mask, int8_t index, int64_t timestamp, int32_t value_size,
+               const char* value)
+    : ColumnBase(mask, index),
+      timestamp_(timestamp),
+      value_size_(value_size),
+      value_(value) {}
+
+int64_t Column::Timestamp() const { return timestamp_; }
+
+std::size_t Column::Size() const {
+  return ColumnBase::Size() + sizeof(timestamp_) + sizeof(value_size_) +
+         value_size_;
+}
+
+void Column::Serialize(std::string* dest) const {
+  ColumnBase::Serialize(dest);
+  ROCKSDB_NAMESPACE::cassandra::Serialize<int64_t>(timestamp_, dest);
+  ROCKSDB_NAMESPACE::cassandra::Serialize<int32_t>(value_size_, dest);
+  dest->append(value_, value_size_);
+}
+
+std::shared_ptr<Column> Column::Deserialize(const char* src,
+                                            std::size_t offset) {
+  int8_t mask = ROCKSDB_NAMESPACE::cassandra::Deserialize<int8_t>(src, offset);
+  offset += sizeof(mask);
+  int8_t index = ROCKSDB_NAMESPACE::cassandra::Deserialize<int8_t>(src, offset);
+  offset += sizeof(index);
+  int64_t timestamp =
+      ROCKSDB_NAMESPACE::cassandra::Deserialize<int64_t>(src, offset);
+  offset += sizeof(timestamp);
+  int32_t value_size =
+      ROCKSDB_NAMESPACE::cassandra::Deserialize<int32_t>(src, offset);
+  offset += sizeof(value_size);
+  return std::make_shared<Column>(mask, index, timestamp, value_size,
+                                  src + offset);
+}
+
+ExpiringColumn::ExpiringColumn(int8_t mask, int8_t index, int64_t timestamp,
+                               int32_t value_size, const char* value,
+                               int32_t ttl)
+    : Column(mask, index, timestamp, value_size, value), ttl_(ttl) {}
+
+std::size_t ExpiringColumn::Size() const {
+  return Column::Size() + sizeof(ttl_);
+}
+
+void ExpiringColumn::Serialize(std::string* dest) const {
+  Column::Serialize(dest);
+  ROCKSDB_NAMESPACE::cassandra::Serialize<int32_t>(ttl_, dest);
+}
+
+std::chrono::time_point<std::chrono::system_clock> ExpiringColumn::TimePoint()
+    const {
+  return std::chrono::time_point<std::chrono::system_clock>(
+      std::chrono::microseconds(Timestamp()));
+}
+
+std::chrono::seconds ExpiringColumn::Ttl() const {
+  return std::chrono::seconds(ttl_);
+}
+
+bool ExpiringColumn::Expired() const {
+  return TimePoint() + Ttl() < std::chrono::system_clock::now();
+}
+
+std::shared_ptr<Tombstone> ExpiringColumn::ToTombstone() const {
+  auto expired_at = (TimePoint() + Ttl()).time_since_epoch();
+  int32_t local_deletion_time = static_cast<int32_t>(
+      std::chrono::duration_cast<std::chrono::seconds>(expired_at).count());
+  int64_t marked_for_delete_at =
+      std::chrono::duration_cast<std::chrono::microseconds>(expired_at).count();
+  return std::make_shared<Tombstone>(
+      static_cast<int8_t>(ColumnTypeMask::DELETION_MASK), Index(),
+      local_deletion_time, marked_for_delete_at);
+}
+
+std::shared_ptr<ExpiringColumn> ExpiringColumn::Deserialize(
+    const char* src, std::size_t offset) {
+  int8_t mask = ROCKSDB_NAMESPACE::cassandra::Deserialize<int8_t>(src, offset);
+  offset += sizeof(mask);
+  int8_t index = ROCKSDB_NAMESPACE::cassandra::Deserialize<int8_t>(src, offset);
+  offset += sizeof(index);
+  int64_t timestamp =
+      ROCKSDB_NAMESPACE::cassandra::Deserialize<int64_t>(src, offset);
+  offset += sizeof(timestamp);
+  int32_t value_size =
+      ROCKSDB_NAMESPACE::cassandra::Deserialize<int32_t>(src, offset);
+  offset += sizeof(value_size);
+  const char* value = src + offset;
+  offset += value_size;
+  int32_t ttl = ROCKSDB_NAMESPACE::cassandra::Deserialize<int32_t>(src, offset);
+  return std::make_shared<ExpiringColumn>(mask, index, timestamp, value_size,
+                                          value, ttl);
+}
+
+Tombstone::Tombstone(int8_t mask, int8_t index, int32_t local_deletion_time,
+                     int64_t marked_for_delete_at)
+    : ColumnBase(mask, index),
+      local_deletion_time_(local_deletion_time),
+      marked_for_delete_at_(marked_for_delete_at) {}
+
+int64_t Tombstone::Timestamp() const { return marked_for_delete_at_; }
+
+std::size_t Tombstone::Size() const {
+  return ColumnBase::Size() + sizeof(local_deletion_time_) +
+         sizeof(marked_for_delete_at_);
+}
+
+void Tombstone::Serialize(std::string* dest) const {
+  ColumnBase::Serialize(dest);
+  ROCKSDB_NAMESPACE::cassandra::Serialize<int32_t>(local_deletion_time_, dest);
+  ROCKSDB_NAMESPACE::cassandra::Serialize<int64_t>(marked_for_delete_at_, dest);
+}
+
+bool Tombstone::Collectable(int32_t gc_grace_period_in_seconds) const {
+  auto local_deleted_at = std::chrono::time_point<std::chrono::system_clock>(
+      std::chrono::seconds(local_deletion_time_));
+  auto gc_grace_period = std::chrono::seconds(gc_grace_period_in_seconds);
+  return local_deleted_at + gc_grace_period < std::chrono::system_clock::now();
+}
+
+std::shared_ptr<Tombstone> Tombstone::Deserialize(const char* src,
+                                                  std::size_t offset) {
+  int8_t mask = ROCKSDB_NAMESPACE::cassandra::Deserialize<int8_t>(src, offset);
+  offset += sizeof(mask);
+  int8_t index = ROCKSDB_NAMESPACE::cassandra::Deserialize<int8_t>(src, offset);
+  offset += sizeof(index);
+  int32_t local_deletion_time =
+      ROCKSDB_NAMESPACE::cassandra::Deserialize<int32_t>(src, offset);
+  offset += sizeof(int32_t);
+  int64_t marked_for_delete_at =
+      ROCKSDB_NAMESPACE::cassandra::Deserialize<int64_t>(src, offset);
+  return std::make_shared<Tombstone>(mask, index, local_deletion_time,
+                                     marked_for_delete_at);
+}
+
+RowValue::RowValue(int32_t local_deletion_time, int64_t marked_for_delete_at)
+    : local_deletion_time_(local_deletion_time),
+      marked_for_delete_at_(marked_for_delete_at),
+      columns_(),
+      last_modified_time_(0) {}
+
+RowValue::RowValue(Columns columns, int64_t last_modified_time)
+    : local_deletion_time_(kDefaultLocalDeletionTime),
+      marked_for_delete_at_(kDefaultMarkedForDeleteAt),
+      columns_(std::move(columns)),
+      last_modified_time_(last_modified_time) {}
+
+std::size_t RowValue::Size() const {
+  std::size_t size =
+      sizeof(local_deletion_time_) + sizeof(marked_for_delete_at_);
+  for (const auto& column : columns_) {
+    size += column->Size();
+  }
+  return size;
+}
+
+int64_t RowValue::LastModifiedTime() const {
+  if (IsTombstone()) {
+    return marked_for_delete_at_;
+  } else {
+    return last_modified_time_;
+  }
+}
+
+bool RowValue::IsTombstone() const {
+  return marked_for_delete_at_ > kDefaultMarkedForDeleteAt;
+}
+
+void RowValue::Serialize(std::string* dest) const {
+  ROCKSDB_NAMESPACE::cassandra::Serialize<int32_t>(local_deletion_time_, dest);
+  ROCKSDB_NAMESPACE::cassandra::Serialize<int64_t>(marked_for_delete_at_, dest);
+  for (const auto& column : columns_) {
+    column->Serialize(dest);
+  }
+}
+
+RowValue RowValue::RemoveExpiredColumns(bool* changed) const {
+  *changed = false;
+  Columns new_columns;
+  for (auto& column : columns_) {
+    if (column->Mask() == ColumnTypeMask::EXPIRATION_MASK) {
+      std::shared_ptr<ExpiringColumn> expiring_column =
+          std::static_pointer_cast<ExpiringColumn>(column);
+
+      if (expiring_column->Expired()) {
+        *changed = true;
+        continue;
+      }
+    }
+
+    new_columns.push_back(column);
+  }
+  return RowValue(std::move(new_columns), last_modified_time_);
+}
+
+RowValue RowValue::ConvertExpiredColumnsToTombstones(bool* changed) const {
+  *changed = false;
+  Columns new_columns;
+  for (auto& column : columns_) {
+    if (column->Mask() == ColumnTypeMask::EXPIRATION_MASK) {
+      std::shared_ptr<ExpiringColumn> expiring_column =
+          std::static_pointer_cast<ExpiringColumn>(column);
+
+      if (expiring_column->Expired()) {
+        std::shared_ptr<Tombstone> tombstone = expiring_column->ToTombstone();
+        new_columns.push_back(tombstone);
+        *changed = true;
+        continue;
+      }
+    }
+    new_columns.push_back(column);
+  }
+  return RowValue(std::move(new_columns), last_modified_time_);
+}
+
+RowValue RowValue::RemoveTombstones(int32_t gc_grace_period) const {
+  Columns new_columns;
+  for (auto& column : columns_) {
+    if (column->Mask() == ColumnTypeMask::DELETION_MASK) {
+      std::shared_ptr<Tombstone> tombstone =
+          std::static_pointer_cast<Tombstone>(column);
+
+      if (tombstone->Collectable(gc_grace_period)) {
+        continue;
+      }
+    }
+
+    new_columns.push_back(column);
+  }
+  return RowValue(std::move(new_columns), last_modified_time_);
+}
+
+bool RowValue::Empty() const { return columns_.empty(); }
+
+RowValue RowValue::Deserialize(const char* src, std::size_t size) {
+  std::size_t offset = 0;
+  assert(size >= sizeof(local_deletion_time_) + sizeof(marked_for_delete_at_));
+  int32_t local_deletion_time =
+      ROCKSDB_NAMESPACE::cassandra::Deserialize<int32_t>(src, offset);
+  offset += sizeof(int32_t);
+  int64_t marked_for_delete_at =
+      ROCKSDB_NAMESPACE::cassandra::Deserialize<int64_t>(src, offset);
+  offset += sizeof(int64_t);
+  if (offset == size) {
+    return RowValue(local_deletion_time, marked_for_delete_at);
+  }
+
+  assert(local_deletion_time == kDefaultLocalDeletionTime);
+  assert(marked_for_delete_at == kDefaultMarkedForDeleteAt);
+  Columns columns;
+  int64_t last_modified_time = 0;
+  while (offset < size) {
+    auto c = ColumnBase::Deserialize(src, offset);
+    offset += c->Size();
+    assert(offset <= size);
+    last_modified_time = std::max(last_modified_time, c->Timestamp());
+    columns.push_back(std::move(c));
+  }
+
+  return RowValue(std::move(columns), last_modified_time);
+}
+
+// Merge multiple row values into one.
+// For each column in rows with same index, we pick the one with latest
+// timestamp. And we also take row tombstone into consideration, by iterating
+// each row from reverse timestamp order, and stop once we hit the first
+// row tombstone.
+RowValue RowValue::Merge(std::vector<RowValue>&& values) {
+  assert(values.size() > 0);
+  if (values.size() == 1) {
+    return std::move(values[0]);
+  }
+
+  // Merge columns by their last modified time, and skip once we hit
+  // a row tombstone.
+  std::sort(values.begin(), values.end(),
+            [](const RowValue& r1, const RowValue& r2) {
+              return r1.LastModifiedTime() > r2.LastModifiedTime();
+            });
+
+  std::map<int8_t, std::shared_ptr<ColumnBase>> merged_columns;
+  int64_t tombstone_timestamp = 0;
+
+  for (auto& value : values) {
+    if (value.IsTombstone()) {
+      if (merged_columns.size() == 0) {
+        return std::move(value);
+      }
+      tombstone_timestamp = value.LastModifiedTime();
+      break;
+    }
+    for (auto& column : value.columns_) {
+      int8_t index = column->Index();
+      if (merged_columns.find(index) == merged_columns.end()) {
+        merged_columns[index] = column;
+      } else {
+        if (column->Timestamp() > merged_columns[index]->Timestamp()) {
+          merged_columns[index] = column;
+        }
+      }
+    }
+  }
+
+  int64_t last_modified_time = 0;
+  Columns columns;
+  for (auto& pair : merged_columns) {
+    // For some row, its last_modified_time > row tombstone_timestamp, but
+    // it might have rows whose timestamp is ealier than tombstone, so we
+    // ned to filter these rows.
+    if (pair.second->Timestamp() <= tombstone_timestamp) {
+      continue;
+    }
+    last_modified_time = std::max(last_modified_time, pair.second->Timestamp());
+    columns.push_back(std::move(pair.second));
+  }
+  return RowValue(std::move(columns), last_modified_time);
+}
+
+}  // namespace cassandra
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/format.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/format.h
new file mode 100644
index 0000000000..1b27147351
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/format.h
@@ -0,0 +1,183 @@
+// Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+/**
+ * The encoding of Cassandra Row Value.
+ *
+ * A Cassandra Row Value could either be a row tombstone,
+ * or contains multiple columns, it has following fields:
+ *
+ * struct row_value {
+ *   int32_t local_deletion_time;  // Time in second when the row is deleted,
+ *                                 // only used for Cassandra tombstone gc.
+ *   int64_t marked_for_delete_at; // Ms that marked this row is deleted.
+ *   struct column_base columns[]; // For non tombstone row, all columns
+ *                                 // are stored here.
+ * }
+ *
+ * If the local_deletion_time and marked_for_delete_at is set, then this is
+ * a tombstone, otherwise it contains multiple columns.
+ *
+ * There are three type of Columns: Normal Column, Expiring Column and Column
+ * Tombstone, which have following fields:
+ *
+ * // Identify the type of the column.
+ * enum mask {
+ *   DELETION_MASK = 0x01,
+ *   EXPIRATION_MASK = 0x02,
+ * };
+ *
+ * struct column  {
+ *   int8_t mask = 0;
+ *   int8_t index;
+ *   int64_t timestamp;
+ *   int32_t value_length;
+ *   char value[value_length];
+ * }
+ *
+ * struct expiring_column  {
+ *   int8_t mask = mask.EXPIRATION_MASK;
+ *   int8_t index;
+ *   int64_t timestamp;
+ *   int32_t value_length;
+ *   char value[value_length];
+ *   int32_t ttl;
+ * }
+ *
+ * struct tombstone_column  {
+ *   int8_t mask = mask.DELETION_MASK;
+ *   int8_t index;
+ *   int32_t local_deletion_time; // Similar to row_value's field.
+ *   int64_t marked_for_delete_at;
+ *  }
+ */
+
+#pragma once
+#include <chrono>
+#include <memory>
+#include <vector>
+
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace cassandra {
+
+// Identify the type of the column.
+enum ColumnTypeMask {
+  DELETION_MASK = 0x01,
+  EXPIRATION_MASK = 0x02,
+};
+
+class ColumnBase {
+ public:
+  ColumnBase(int8_t mask, int8_t index);
+  virtual ~ColumnBase() = default;
+
+  virtual int64_t Timestamp() const = 0;
+  virtual int8_t Mask() const;
+  virtual int8_t Index() const;
+  virtual std::size_t Size() const;
+  virtual void Serialize(std::string* dest) const;
+  static std::shared_ptr<ColumnBase> Deserialize(const char* src,
+                                                 std::size_t offset);
+
+ private:
+  int8_t mask_;
+  int8_t index_;
+};
+
+class Column : public ColumnBase {
+ public:
+  Column(int8_t mask, int8_t index, int64_t timestamp, int32_t value_size,
+         const char* value);
+
+  virtual int64_t Timestamp() const override;
+  virtual std::size_t Size() const override;
+  virtual void Serialize(std::string* dest) const override;
+  static std::shared_ptr<Column> Deserialize(const char* src,
+                                             std::size_t offset);
+
+ private:
+  int64_t timestamp_;
+  int32_t value_size_;
+  const char* value_;
+};
+
+class Tombstone : public ColumnBase {
+ public:
+  Tombstone(int8_t mask, int8_t index, int32_t local_deletion_time,
+            int64_t marked_for_delete_at);
+
+  virtual int64_t Timestamp() const override;
+  virtual std::size_t Size() const override;
+  virtual void Serialize(std::string* dest) const override;
+  bool Collectable(int32_t gc_grace_period) const;
+  static std::shared_ptr<Tombstone> Deserialize(const char* src,
+                                                std::size_t offset);
+
+ private:
+  int32_t local_deletion_time_;
+  int64_t marked_for_delete_at_;
+};
+
+class ExpiringColumn : public Column {
+ public:
+  ExpiringColumn(int8_t mask, int8_t index, int64_t timestamp,
+                 int32_t value_size, const char* value, int32_t ttl);
+
+  virtual std::size_t Size() const override;
+  virtual void Serialize(std::string* dest) const override;
+  bool Expired() const;
+  std::shared_ptr<Tombstone> ToTombstone() const;
+
+  static std::shared_ptr<ExpiringColumn> Deserialize(const char* src,
+                                                     std::size_t offset);
+
+ private:
+  int32_t ttl_;
+  std::chrono::time_point<std::chrono::system_clock> TimePoint() const;
+  std::chrono::seconds Ttl() const;
+};
+
+using Columns = std::vector<std::shared_ptr<ColumnBase>>;
+
+class RowValue {
+ public:
+  // Create a Row Tombstone.
+  RowValue(int32_t local_deletion_time, int64_t marked_for_delete_at);
+  // Create a Row containing columns.
+  RowValue(Columns columns, int64_t last_modified_time);
+  RowValue(const RowValue& /*that*/) = delete;
+  RowValue(RowValue&& /*that*/) noexcept = default;
+  RowValue& operator=(const RowValue& /*that*/) = delete;
+  RowValue& operator=(RowValue&& /*that*/) = default;
+
+  std::size_t Size() const;
+  bool IsTombstone() const;
+  // For Tombstone this returns the marked_for_delete_at_,
+  // otherwise it returns the max timestamp of containing columns.
+  int64_t LastModifiedTime() const;
+  void Serialize(std::string* dest) const;
+  RowValue RemoveExpiredColumns(bool* changed) const;
+  RowValue ConvertExpiredColumnsToTombstones(bool* changed) const;
+  RowValue RemoveTombstones(int32_t gc_grace_period) const;
+  bool Empty() const;
+
+  static RowValue Deserialize(const char* src, std::size_t size);
+  // Merge multiple rows according to their timestamp.
+  static RowValue Merge(std::vector<RowValue>&& values);
+
+  const Columns& get_columns() { return columns_; }
+
+ private:
+  int32_t local_deletion_time_;
+  int64_t marked_for_delete_at_;
+  Columns columns_;
+  int64_t last_modified_time_;
+};
+
+}  // namespace cassandra
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/merge_operator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/merge_operator.cc
new file mode 100644
index 0000000000..366d8fa443
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/merge_operator.cc
@@ -0,0 +1,80 @@
+//  Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "merge_operator.h"
+
+#include <assert.h>
+
+#include <memory>
+
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/options_type.h"
+#include "utilities/cassandra/format.h"
+#include "utilities/merge_operators.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace cassandra {
+static std::unordered_map<std::string, OptionTypeInfo>
+    merge_operator_options_info = {
+        {"gc_grace_period_in_seconds",
+         {offsetof(struct CassandraOptions, gc_grace_period_in_seconds),
+          OptionType::kUInt32T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+        {"operands_limit",
+         {offsetof(struct CassandraOptions, operands_limit), OptionType::kSizeT,
+          OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+};
+
+CassandraValueMergeOperator::CassandraValueMergeOperator(
+    int32_t gc_grace_period_in_seconds, size_t operands_limit)
+    : options_(gc_grace_period_in_seconds, operands_limit) {
+  RegisterOptions(&options_, &merge_operator_options_info);
+}
+
+// Implementation for the merge operation (merges two Cassandra values)
+bool CassandraValueMergeOperator::FullMergeV2(
+    const MergeOperationInput& merge_in,
+    MergeOperationOutput* merge_out) const {
+  // Clear the *new_value for writing.
+  merge_out->new_value.clear();
+  std::vector<RowValue> row_values;
+  if (merge_in.existing_value) {
+    row_values.push_back(RowValue::Deserialize(
+        merge_in.existing_value->data(), merge_in.existing_value->size()));
+  }
+
+  for (auto& operand : merge_in.operand_list) {
+    row_values.push_back(RowValue::Deserialize(operand.data(), operand.size()));
+  }
+
+  RowValue merged = RowValue::Merge(std::move(row_values));
+  merged = merged.RemoveTombstones(options_.gc_grace_period_in_seconds);
+  merge_out->new_value.reserve(merged.Size());
+  merged.Serialize(&(merge_out->new_value));
+
+  return true;
+}
+
+bool CassandraValueMergeOperator::PartialMergeMulti(
+    const Slice& /*key*/, const std::deque<Slice>& operand_list,
+    std::string* new_value, Logger* /*logger*/) const {
+  // Clear the *new_value for writing.
+  assert(new_value);
+  new_value->clear();
+
+  std::vector<RowValue> row_values;
+  for (auto& operand : operand_list) {
+    row_values.push_back(RowValue::Deserialize(operand.data(), operand.size()));
+  }
+  RowValue merged = RowValue::Merge(std::move(row_values));
+  new_value->reserve(merged.Size());
+  merged.Serialize(new_value);
+  return true;
+}
+
+}  // namespace cassandra
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/merge_operator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/merge_operator.h
new file mode 100644
index 0000000000..af8725db7d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/merge_operator.h
@@ -0,0 +1,44 @@
+//  Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+#include "utilities/cassandra/cassandra_options.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace cassandra {
+
+/**
+ * A MergeOperator for rocksdb that implements Cassandra row value merge.
+ */
+class CassandraValueMergeOperator : public MergeOperator {
+ public:
+  explicit CassandraValueMergeOperator(int32_t gc_grace_period_in_seconds,
+                                       size_t operands_limit = 0);
+
+  virtual bool FullMergeV2(const MergeOperationInput& merge_in,
+                           MergeOperationOutput* merge_out) const override;
+
+  virtual bool PartialMergeMulti(const Slice& key,
+                                 const std::deque<Slice>& operand_list,
+                                 std::string* new_value,
+                                 Logger* logger) const override;
+
+  const char* Name() const override { return kClassName(); }
+  static const char* kClassName() { return "CassandraValueMergeOperator"; }
+
+  virtual bool AllowSingleOperand() const override { return true; }
+
+  virtual bool ShouldMerge(const std::vector<Slice>& operands) const override {
+    return options_.operands_limit > 0 &&
+           operands.size() >= options_.operands_limit;
+  }
+
+ private:
+  CassandraOptions options_;
+};
+}  // namespace cassandra
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/serialize.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/serialize.h
new file mode 100644
index 0000000000..4bd552bfc1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/serialize.h
@@ -0,0 +1,81 @@
+//  Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+/**
+ * Helper functions which serialize and deserialize integers
+ * into bytes in big endian.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace cassandra {
+namespace {
+const int64_t kCharMask = 0xFFLL;
+const int32_t kBitsPerByte = 8;
+}  // namespace
+
+template <typename T>
+void Serialize(T val, std::string* dest);
+
+template <typename T>
+T Deserialize(const char* src, std::size_t offset = 0);
+
+// Specializations
+template <>
+inline void Serialize<int8_t>(int8_t t, std::string* dest) {
+  dest->append(1, static_cast<char>(t & kCharMask));
+}
+
+template <>
+inline void Serialize<int32_t>(int32_t t, std::string* dest) {
+  for (unsigned long i = 0; i < sizeof(int32_t); i++) {
+    dest->append(
+        1, static_cast<char>((t >> (sizeof(int32_t) - 1 - i) * kBitsPerByte) &
+                             kCharMask));
+  }
+}
+
+template <>
+inline void Serialize<int64_t>(int64_t t, std::string* dest) {
+  for (unsigned long i = 0; i < sizeof(int64_t); i++) {
+    dest->append(
+        1, static_cast<char>((t >> (sizeof(int64_t) - 1 - i) * kBitsPerByte) &
+                             kCharMask));
+  }
+}
+
+template <>
+inline int8_t Deserialize<int8_t>(const char* src, std::size_t offset) {
+  return static_cast<int8_t>(src[offset]);
+}
+
+template <>
+inline int32_t Deserialize<int32_t>(const char* src, std::size_t offset) {
+  int32_t result = 0;
+  for (unsigned long i = 0; i < sizeof(int32_t); i++) {
+    result |= static_cast<int32_t>(static_cast<unsigned char>(src[offset + i]))
+              << ((sizeof(int32_t) - 1 - i) * kBitsPerByte);
+  }
+  return result;
+}
+
+template <>
+inline int64_t Deserialize<int64_t>(const char* src, std::size_t offset) {
+  int64_t result = 0;
+  for (unsigned long i = 0; i < sizeof(int64_t); i++) {
+    result |= static_cast<int64_t>(static_cast<unsigned char>(src[offset + i]))
+              << ((sizeof(int64_t) - 1 - i) * kBitsPerByte);
+  }
+  return result;
+}
+
+}  // namespace cassandra
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/test_utils.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/test_utils.h
new file mode 100644
index 0000000000..be23f70760
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/cassandra/test_utils.h
@@ -0,0 +1,42 @@
+//  Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <memory>
+
+#include "test_util/testharness.h"
+#include "utilities/cassandra/format.h"
+#include "utilities/cassandra/serialize.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace cassandra {
+extern const char kData[];
+extern const char kExpiringData[];
+extern const int32_t kTtl;
+extern const int8_t kColumn;
+extern const int8_t kTombstone;
+extern const int8_t kExpiringColumn;
+
+std::shared_ptr<ColumnBase> CreateTestColumn(int8_t mask, int8_t index,
+                                             int64_t timestamp);
+
+std::tuple<int8_t, int8_t, int64_t> CreateTestColumnSpec(int8_t mask,
+                                                         int8_t index,
+                                                         int64_t timestamp);
+
+RowValue CreateTestRowValue(
+    std::vector<std::tuple<int8_t, int8_t, int64_t>> column_specs);
+
+RowValue CreateRowTombstone(int64_t timestamp);
+
+void VerifyRowValueColumns(
+    const std::vector<std::shared_ptr<ColumnBase>> &columns,
+    std::size_t index_of_vector, int8_t expected_mask, int8_t expected_index,
+    int64_t expected_timestamp);
+
+int64_t ToMicroSeconds(int64_t seconds);
+int32_t ToSeconds(int64_t microseconds);
+}  // namespace cassandra
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/checkpoint/checkpoint_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/checkpoint/checkpoint_impl.cc
new file mode 100644
index 0000000000..4a0cc71596
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/checkpoint/checkpoint_impl.cc
@@ -0,0 +1,470 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2012 Facebook.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "utilities/checkpoint/checkpoint_impl.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <string>
+#include <tuple>
+#include <unordered_set>
+#include <vector>
+
+#include "db/wal_manager.h"
+#include "file/file_util.h"
+#include "file/filename.h"
+#include "logging/logging.h"
+#include "port/port.h"
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/metadata.h"
+#include "rocksdb/options.h"
+#include "rocksdb/transaction_log.h"
+#include "rocksdb/types.h"
+#include "rocksdb/utilities/checkpoint.h"
+#include "test_util/sync_point.h"
+#include "util/cast_util.h"
+#include "util/file_checksum_helper.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status Checkpoint::Create(DB* db, Checkpoint** checkpoint_ptr) {
+  *checkpoint_ptr = new CheckpointImpl(db);
+  return Status::OK();
+}
+
+Status Checkpoint::CreateCheckpoint(const std::string& /*checkpoint_dir*/,
+                                    uint64_t /*log_size_for_flush*/,
+                                    uint64_t* /*sequence_number_ptr*/) {
+  return Status::NotSupported("");
+}
+
+void CheckpointImpl::CleanStagingDirectory(const std::string& full_private_path,
+                                           Logger* info_log) {
+  std::vector<std::string> subchildren;
+  Status s = db_->GetEnv()->FileExists(full_private_path);
+  if (s.IsNotFound()) {
+    return;
+  }
+  ROCKS_LOG_INFO(info_log, "File exists %s -- %s", full_private_path.c_str(),
+                 s.ToString().c_str());
+  s = db_->GetEnv()->GetChildren(full_private_path, &subchildren);
+  if (s.ok()) {
+    for (auto& subchild : subchildren) {
+      std::string subchild_path = full_private_path + "/" + subchild;
+      s = db_->GetEnv()->DeleteFile(subchild_path);
+      ROCKS_LOG_INFO(info_log, "Delete file %s -- %s", subchild_path.c_str(),
+                     s.ToString().c_str());
+    }
+  }
+  // finally delete the private dir
+  s = db_->GetEnv()->DeleteDir(full_private_path);
+  ROCKS_LOG_INFO(info_log, "Delete dir %s -- %s", full_private_path.c_str(),
+                 s.ToString().c_str());
+}
+
+Status Checkpoint::ExportColumnFamily(
+    ColumnFamilyHandle* /*handle*/, const std::string& /*export_dir*/,
+    ExportImportFilesMetaData** /*metadata*/) {
+  return Status::NotSupported("");
+}
+
+// Builds an openable snapshot of RocksDB
+Status CheckpointImpl::CreateCheckpoint(const std::string& checkpoint_dir,
+                                        uint64_t log_size_for_flush,
+                                        uint64_t* sequence_number_ptr) {
+  DBOptions db_options = db_->GetDBOptions();
+
+  Status s = db_->GetEnv()->FileExists(checkpoint_dir);
+  if (s.ok()) {
+    return Status::InvalidArgument("Directory exists");
+  } else if (!s.IsNotFound()) {
+    assert(s.IsIOError());
+    return s;
+  }
+
+  ROCKS_LOG_INFO(
+      db_options.info_log,
+      "Started the snapshot process -- creating snapshot in directory %s",
+      checkpoint_dir.c_str());
+
+  size_t final_nonslash_idx = checkpoint_dir.find_last_not_of('/');
+  if (final_nonslash_idx == std::string::npos) {
+    // npos means it's only slashes or empty. Non-empty means it's the root
+    // directory, but it shouldn't be because we verified above the directory
+    // doesn't exist.
+    assert(checkpoint_dir.empty());
+    return Status::InvalidArgument("invalid checkpoint directory name");
+  }
+
+  std::string full_private_path =
+      checkpoint_dir.substr(0, final_nonslash_idx + 1) + ".tmp";
+  ROCKS_LOG_INFO(db_options.info_log,
+                 "Snapshot process -- using temporary directory %s",
+                 full_private_path.c_str());
+  CleanStagingDirectory(full_private_path, db_options.info_log.get());
+  // create snapshot directory
+  s = db_->GetEnv()->CreateDir(full_private_path);
+  uint64_t sequence_number = 0;
+  if (s.ok()) {
+    // enable file deletions
+    s = db_->DisableFileDeletions();
+    const bool disabled_file_deletions = s.ok();
+
+    if (s.ok() || s.IsNotSupported()) {
+      s = CreateCustomCheckpoint(
+          [&](const std::string& src_dirname, const std::string& fname,
+              FileType) {
+            ROCKS_LOG_INFO(db_options.info_log, "Hard Linking %s",
+                           fname.c_str());
+            return db_->GetFileSystem()->LinkFile(
+                src_dirname + "/" + fname, full_private_path + "/" + fname,
+                IOOptions(), nullptr);
+          } /* link_file_cb */,
+          [&](const std::string& src_dirname, const std::string& fname,
+              uint64_t size_limit_bytes, FileType,
+              const std::string& /* checksum_func_name */,
+              const std::string& /* checksum_val */,
+              const Temperature temperature) {
+            ROCKS_LOG_INFO(db_options.info_log, "Copying %s", fname.c_str());
+            return CopyFile(db_->GetFileSystem(), src_dirname + "/" + fname,
+                            full_private_path + "/" + fname, size_limit_bytes,
+                            db_options.use_fsync, nullptr, temperature);
+          } /* copy_file_cb */,
+          [&](const std::string& fname, const std::string& contents, FileType) {
+            ROCKS_LOG_INFO(db_options.info_log, "Creating %s", fname.c_str());
+            return CreateFile(db_->GetFileSystem(),
+                              full_private_path + "/" + fname, contents,
+                              db_options.use_fsync);
+          } /* create_file_cb */,
+          &sequence_number, log_size_for_flush);
+
+      // we copied all the files, enable file deletions
+      if (disabled_file_deletions) {
+        Status ss = db_->EnableFileDeletions(false);
+        assert(ss.ok());
+        ss.PermitUncheckedError();
+      }
+    }
+  }
+
+  if (s.ok()) {
+    // move tmp private backup to real snapshot directory
+    s = db_->GetEnv()->RenameFile(full_private_path, checkpoint_dir);
+  }
+  if (s.ok()) {
+    std::unique_ptr<FSDirectory> checkpoint_directory;
+    s = db_->GetFileSystem()->NewDirectory(checkpoint_dir, IOOptions(),
+                                           &checkpoint_directory, nullptr);
+    if (s.ok() && checkpoint_directory != nullptr) {
+      s = checkpoint_directory->FsyncWithDirOptions(
+          IOOptions(), nullptr,
+          DirFsyncOptions(DirFsyncOptions::FsyncReason::kDirRenamed));
+    }
+  }
+
+  if (s.ok()) {
+    if (sequence_number_ptr != nullptr) {
+      *sequence_number_ptr = sequence_number;
+    }
+    // here we know that we succeeded and installed the new snapshot
+    ROCKS_LOG_INFO(db_options.info_log, "Snapshot DONE. All is good");
+    ROCKS_LOG_INFO(db_options.info_log, "Snapshot sequence number: %" PRIu64,
+                   sequence_number);
+  } else {
+    // clean all the files we might have created
+    ROCKS_LOG_INFO(db_options.info_log, "Snapshot failed -- %s",
+                   s.ToString().c_str());
+    CleanStagingDirectory(full_private_path, db_options.info_log.get());
+  }
+  return s;
+}
+
+Status CheckpointImpl::CreateCustomCheckpoint(
+    std::function<Status(const std::string& src_dirname,
+                         const std::string& src_fname, FileType type)>
+        link_file_cb,
+    std::function<
+        Status(const std::string& src_dirname, const std::string& src_fname,
+               uint64_t size_limit_bytes, FileType type,
+               const std::string& checksum_func_name,
+               const std::string& checksum_val, const Temperature temperature)>
+        copy_file_cb,
+    std::function<Status(const std::string& fname, const std::string& contents,
+                         FileType type)>
+        create_file_cb,
+    uint64_t* sequence_number, uint64_t log_size_for_flush,
+    bool get_live_table_checksum) {
+  *sequence_number = db_->GetLatestSequenceNumber();
+
+  LiveFilesStorageInfoOptions opts;
+  opts.include_checksum_info = get_live_table_checksum;
+  opts.wal_size_for_flush = log_size_for_flush;
+
+  std::vector<LiveFileStorageInfo> infos;
+  {
+    Status s = db_->GetLiveFilesStorageInfo(opts, &infos);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  // Verify that everything except WAL files are in same directory
+  // (db_paths / cf_paths not supported)
+  std::unordered_set<std::string> dirs;
+  for (auto& info : infos) {
+    if (info.file_type != kWalFile) {
+      dirs.insert(info.directory);
+    }
+  }
+  if (dirs.size() > 1) {
+    return Status::NotSupported(
+        "db_paths / cf_paths not supported for Checkpoint nor BackupEngine");
+  }
+
+  bool same_fs = true;
+
+  for (auto& info : infos) {
+    Status s;
+    if (!info.replacement_contents.empty()) {
+      // Currently should only be used for CURRENT file.
+      assert(info.file_type == kCurrentFile);
+
+      if (info.size != info.replacement_contents.size()) {
+        s = Status::Corruption("Inconsistent size metadata for " +
+                               info.relative_filename);
+      } else {
+        s = create_file_cb(info.relative_filename, info.replacement_contents,
+                           info.file_type);
+      }
+    } else {
+      if (same_fs && !info.trim_to_size) {
+        s = link_file_cb(info.directory, info.relative_filename,
+                         info.file_type);
+        if (s.IsNotSupported()) {
+          same_fs = false;
+          s = Status::OK();
+        }
+        s.MustCheck();
+      }
+      if (!same_fs || info.trim_to_size) {
+        assert(info.file_checksum_func_name.empty() ==
+               !opts.include_checksum_info);
+        // no assertion on file_checksum because empty is used for both "not
+        // set" and "unknown"
+        if (opts.include_checksum_info) {
+          s = copy_file_cb(info.directory, info.relative_filename, info.size,
+                           info.file_type, info.file_checksum_func_name,
+                           info.file_checksum, info.temperature);
+        } else {
+          s = copy_file_cb(info.directory, info.relative_filename, info.size,
+                           info.file_type, kUnknownFileChecksumFuncName,
+                           kUnknownFileChecksum, info.temperature);
+        }
+      }
+    }
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  return Status::OK();
+}
+
+// Exports all live SST files of a specified Column Family onto export_dir,
+// returning SST files information in metadata.
+Status CheckpointImpl::ExportColumnFamily(
+    ColumnFamilyHandle* handle, const std::string& export_dir,
+    ExportImportFilesMetaData** metadata) {
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(handle);
+  const auto cf_name = cfh->GetName();
+  const auto db_options = db_->GetDBOptions();
+
+  assert(metadata != nullptr);
+  assert(*metadata == nullptr);
+  auto s = db_->GetEnv()->FileExists(export_dir);
+  if (s.ok()) {
+    return Status::InvalidArgument("Specified export_dir exists");
+  } else if (!s.IsNotFound()) {
+    assert(s.IsIOError());
+    return s;
+  }
+
+  const auto final_nonslash_idx = export_dir.find_last_not_of('/');
+  if (final_nonslash_idx == std::string::npos) {
+    return Status::InvalidArgument("Specified export_dir invalid");
+  }
+  ROCKS_LOG_INFO(db_options.info_log,
+                 "[%s] export column family onto export directory %s",
+                 cf_name.c_str(), export_dir.c_str());
+
+  // Create a temporary export directory.
+  const auto tmp_export_dir =
+      export_dir.substr(0, final_nonslash_idx + 1) + ".tmp";
+  s = db_->GetEnv()->CreateDir(tmp_export_dir);
+
+  if (s.ok()) {
+    s = db_->Flush(ROCKSDB_NAMESPACE::FlushOptions(), handle);
+  }
+
+  ColumnFamilyMetaData db_metadata;
+  if (s.ok()) {
+    // Export live sst files with file deletions disabled.
+    s = db_->DisableFileDeletions();
+    if (s.ok()) {
+      db_->GetColumnFamilyMetaData(handle, &db_metadata);
+
+      s = ExportFilesInMetaData(
+          db_options, db_metadata,
+          [&](const std::string& src_dirname, const std::string& fname) {
+            ROCKS_LOG_INFO(db_options.info_log, "[%s] HardLinking %s",
+                           cf_name.c_str(), fname.c_str());
+            return db_->GetEnv()->LinkFile(src_dirname + fname,
+                                           tmp_export_dir + fname);
+          } /*link_file_cb*/,
+          [&](const std::string& src_dirname, const std::string& fname) {
+            ROCKS_LOG_INFO(db_options.info_log, "[%s] Copying %s",
+                           cf_name.c_str(), fname.c_str());
+            return CopyFile(db_->GetFileSystem(), src_dirname + fname,
+                            tmp_export_dir + fname, 0, db_options.use_fsync,
+                            nullptr, Temperature::kUnknown);
+          } /*copy_file_cb*/);
+
+      const auto enable_status = db_->EnableFileDeletions(false /*force*/);
+      if (s.ok()) {
+        s = enable_status;
+      }
+    }
+  }
+
+  auto moved_to_user_specified_dir = false;
+  if (s.ok()) {
+    // Move temporary export directory to the actual export directory.
+    s = db_->GetEnv()->RenameFile(tmp_export_dir, export_dir);
+  }
+
+  if (s.ok()) {
+    // Fsync export directory.
+    moved_to_user_specified_dir = true;
+    std::unique_ptr<FSDirectory> dir_ptr;
+    s = db_->GetFileSystem()->NewDirectory(export_dir, IOOptions(), &dir_ptr,
+                                           nullptr);
+    if (s.ok()) {
+      assert(dir_ptr != nullptr);
+      s = dir_ptr->FsyncWithDirOptions(
+          IOOptions(), nullptr,
+          DirFsyncOptions(DirFsyncOptions::FsyncReason::kDirRenamed));
+    }
+  }
+
+  if (s.ok()) {
+    // Export of files succeeded. Fill in the metadata information.
+    auto result_metadata = new ExportImportFilesMetaData();
+    result_metadata->db_comparator_name = handle->GetComparator()->Name();
+    for (const auto& level_metadata : db_metadata.levels) {
+      for (const auto& file_metadata : level_metadata.files) {
+        LiveFileMetaData live_file_metadata;
+        live_file_metadata.size = file_metadata.size;
+        live_file_metadata.name = file_metadata.name;
+        live_file_metadata.file_number = file_metadata.file_number;
+        live_file_metadata.db_path = export_dir;
+        live_file_metadata.smallest_seqno = file_metadata.smallest_seqno;
+        live_file_metadata.largest_seqno = file_metadata.largest_seqno;
+        live_file_metadata.smallestkey = file_metadata.smallestkey;
+        live_file_metadata.largestkey = file_metadata.largestkey;
+        live_file_metadata.oldest_blob_file_number =
+            file_metadata.oldest_blob_file_number;
+        live_file_metadata.epoch_number = file_metadata.epoch_number;
+        live_file_metadata.level = level_metadata.level;
+        live_file_metadata.smallest = file_metadata.smallest;
+        live_file_metadata.largest = file_metadata.largest;
+        result_metadata->files.push_back(live_file_metadata);
+      }
+      *metadata = result_metadata;
+    }
+    ROCKS_LOG_INFO(db_options.info_log, "[%s] Export succeeded.",
+                   cf_name.c_str());
+  } else {
+    // Failure: Clean up all the files/directories created.
+    ROCKS_LOG_INFO(db_options.info_log, "[%s] Export failed. %s",
+                   cf_name.c_str(), s.ToString().c_str());
+    std::vector<std::string> subchildren;
+    const auto cleanup_dir =
+        moved_to_user_specified_dir ? export_dir : tmp_export_dir;
+    db_->GetEnv()->GetChildren(cleanup_dir, &subchildren);
+    for (const auto& subchild : subchildren) {
+      const auto subchild_path = cleanup_dir + "/" + subchild;
+      const auto status = db_->GetEnv()->DeleteFile(subchild_path);
+      if (!status.ok()) {
+        ROCKS_LOG_WARN(db_options.info_log, "Failed to cleanup file %s: %s",
+                       subchild_path.c_str(), status.ToString().c_str());
+      }
+    }
+    const auto status = db_->GetEnv()->DeleteDir(cleanup_dir);
+    if (!status.ok()) {
+      ROCKS_LOG_WARN(db_options.info_log, "Failed to cleanup dir %s: %s",
+                     cleanup_dir.c_str(), status.ToString().c_str());
+    }
+  }
+  return s;
+}
+
+Status CheckpointImpl::ExportFilesInMetaData(
+    const DBOptions& db_options, const ColumnFamilyMetaData& metadata,
+    std::function<Status(const std::string& src_dirname,
+                         const std::string& src_fname)>
+        link_file_cb,
+    std::function<Status(const std::string& src_dirname,
+                         const std::string& src_fname)>
+        copy_file_cb) {
+  Status s;
+  auto hardlink_file = true;
+
+  // Copy/hard link files in metadata.
+  size_t num_files = 0;
+  for (const auto& level_metadata : metadata.levels) {
+    for (const auto& file_metadata : level_metadata.files) {
+      uint64_t number;
+      FileType type;
+      const auto ok = ParseFileName(file_metadata.name, &number, &type);
+      if (!ok) {
+        s = Status::Corruption("Could not parse file name");
+        break;
+      }
+
+      // We should only get sst files here.
+      assert(type == kTableFile);
+      assert(file_metadata.size > 0 && file_metadata.name[0] == '/');
+      const auto src_fname = file_metadata.name;
+      ++num_files;
+
+      if (hardlink_file) {
+        s = link_file_cb(db_->GetName(), src_fname);
+        if (num_files == 1 && s.IsNotSupported()) {
+          // Fallback to copy if link failed due to cross-device directories.
+          hardlink_file = false;
+          s = Status::OK();
+        }
+      }
+      if (!hardlink_file) {
+        s = copy_file_cb(db_->GetName(), src_fname);
+      }
+      if (!s.ok()) {
+        break;
+      }
+    }
+  }
+  ROCKS_LOG_INFO(db_options.info_log, "Number of table files %" ROCKSDB_PRIszt,
+                 num_files);
+
+  return s;
+}
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/checkpoint/checkpoint_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/checkpoint/checkpoint_impl.h
new file mode 100644
index 0000000000..3cb9a6477f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/checkpoint/checkpoint_impl.h
@@ -0,0 +1,64 @@
+//  Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+
+#include "file/filename.h"
+#include "rocksdb/db.h"
+#include "rocksdb/utilities/checkpoint.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class CheckpointImpl : public Checkpoint {
+ public:
+  explicit CheckpointImpl(DB* db) : db_(db) {}
+
+  Status CreateCheckpoint(const std::string& checkpoint_dir,
+                          uint64_t log_size_for_flush,
+                          uint64_t* sequence_number_ptr) override;
+
+  Status ExportColumnFamily(ColumnFamilyHandle* handle,
+                            const std::string& export_dir,
+                            ExportImportFilesMetaData** metadata) override;
+
+  // Checkpoint logic can be customized by providing callbacks for link, copy,
+  // or create.
+  Status CreateCustomCheckpoint(
+      std::function<Status(const std::string& src_dirname,
+                           const std::string& fname, FileType type)>
+          link_file_cb,
+      std::function<Status(const std::string& src_dirname,
+                           const std::string& fname, uint64_t size_limit_bytes,
+                           FileType type, const std::string& checksum_func_name,
+                           const std::string& checksum_val,
+                           const Temperature src_temperature)>
+          copy_file_cb,
+      std::function<Status(const std::string& fname,
+                           const std::string& contents, FileType type)>
+          create_file_cb,
+      uint64_t* sequence_number, uint64_t log_size_for_flush,
+      bool get_live_table_checksum = false);
+
+ private:
+  void CleanStagingDirectory(const std::string& path, Logger* info_log);
+
+  // Export logic customization by providing callbacks for link or copy.
+  Status ExportFilesInMetaData(
+      const DBOptions& db_options, const ColumnFamilyMetaData& metadata,
+      std::function<Status(const std::string& src_dirname,
+                           const std::string& fname)>
+          link_file_cb,
+      std::function<Status(const std::string& src_dirname,
+                           const std::string& fname)>
+          copy_file_cb);
+
+ private:
+  DB* db_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters.cc
new file mode 100644
index 0000000000..999f723651
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters.cc
@@ -0,0 +1,52 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <memory>
+
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/options.h"
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/options_type.h"
+#include "utilities/compaction_filters/layered_compaction_filter_base.h"
+#include "utilities/compaction_filters/remove_emptyvalue_compactionfilter.h"
+
+namespace ROCKSDB_NAMESPACE {
+static int RegisterBuiltinCompactionFilters(ObjectLibrary& library,
+                                            const std::string& /*arg*/) {
+  library.AddFactory<CompactionFilter>(
+      RemoveEmptyValueCompactionFilter::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<CompactionFilter>* /*guard*/,
+         std::string* /*errmsg*/) {
+        return new RemoveEmptyValueCompactionFilter();
+      });
+  return 1;
+}
+Status CompactionFilter::CreateFromString(const ConfigOptions& config_options,
+                                          const std::string& value,
+                                          const CompactionFilter** result) {
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    RegisterBuiltinCompactionFilters(*(ObjectLibrary::Default().get()), "");
+  });
+  CompactionFilter* filter = const_cast<CompactionFilter*>(*result);
+  Status status =
+      LoadStaticObject<CompactionFilter>(config_options, value, &filter);
+  if (status.ok()) {
+    *result = const_cast<CompactionFilter*>(filter);
+  }
+  return status;
+}
+
+Status CompactionFilterFactory::CreateFromString(
+    const ConfigOptions& config_options, const std::string& value,
+    std::shared_ptr<CompactionFilterFactory>* result) {
+  // Currently there are no builtin CompactionFilterFactories.
+  // If any are introduced, they need to be registered here.
+  Status status =
+      LoadSharedObject<CompactionFilterFactory>(config_options, value, result);
+  return status;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters/layered_compaction_filter_base.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters/layered_compaction_filter_base.h
new file mode 100644
index 0000000000..803fa94ae3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters/layered_compaction_filter_base.h
@@ -0,0 +1,41 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <memory>
+
+#include "rocksdb/compaction_filter.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Abstract base class for building layered compaction filter on top of
+// user compaction filter.
+// See BlobIndexCompactionFilter or TtlCompactionFilter for a basic usage.
+class LayeredCompactionFilterBase : public CompactionFilter {
+ public:
+  LayeredCompactionFilterBase(
+      const CompactionFilter* _user_comp_filter,
+      std::unique_ptr<const CompactionFilter> _user_comp_filter_from_factory)
+      : user_comp_filter_(_user_comp_filter),
+        user_comp_filter_from_factory_(
+            std::move(_user_comp_filter_from_factory)) {
+    if (!user_comp_filter_) {
+      user_comp_filter_ = user_comp_filter_from_factory_.get();
+    }
+  }
+
+  // Return a pointer to user compaction filter
+  const CompactionFilter* user_comp_filter() const { return user_comp_filter_; }
+
+  const Customizable* Inner() const override { return user_comp_filter_; }
+
+ protected:
+  const CompactionFilter* user_comp_filter_;
+
+ private:
+  std::unique_ptr<const CompactionFilter> user_comp_filter_from_factory_;
+};
+
+}  //  namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
new file mode 100644
index 0000000000..c61ce02204
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
@@ -0,0 +1,24 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/compaction_filters/remove_emptyvalue_compactionfilter.h"
+
+#include <string>
+
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+bool RemoveEmptyValueCompactionFilter::Filter(int /*level*/,
+                                              const Slice& /*key*/,
+                                              const Slice& existing_value,
+                                              std::string* /*new_value*/,
+                                              bool* /*value_changed*/) const {
+  // remove kv pairs that have empty values
+  return existing_value.empty();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h
new file mode 100644
index 0000000000..2a56bd6c2e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h
@@ -0,0 +1,26 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#pragma once
+
+#include <string>
+
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class RemoveEmptyValueCompactionFilter : public CompactionFilter {
+ public:
+  static const char* kClassName() { return "RemoveEmptyValueCompactionFilter"; }
+
+  const char* Name() const override { return kClassName(); }
+
+  bool Filter(int level, const Slice& key, const Slice& existing_value,
+              std::string* new_value, bool* value_changed) const override;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/convenience/info_log_finder.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/convenience/info_log_finder.cc
new file mode 100644
index 0000000000..fe62fd5616
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/convenience/info_log_finder.cc
@@ -0,0 +1,26 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2012 Facebook.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "rocksdb/utilities/info_log_finder.h"
+
+#include "file/filename.h"
+#include "rocksdb/env.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status GetInfoLogList(DB* db, std::vector<std::string>* info_log_list) {
+  if (!db) {
+    return Status::InvalidArgument("DB pointer is not valid");
+  }
+  std::string parent_path;
+  const Options& options = db->GetOptions();
+  return GetInfoLogFiles(options.env->GetFileSystem(), options.db_log_dir,
+                         db->GetName(), &parent_path, info_log_list);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/counted_fs.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/counted_fs.cc
new file mode 100644
index 0000000000..e43f3a1912
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/counted_fs.cc
@@ -0,0 +1,379 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "utilities/counted_fs.h"
+
+#include <sstream>
+
+#include "rocksdb/file_system.h"
+#include "rocksdb/utilities/options_type.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+class CountedSequentialFile : public FSSequentialFileOwnerWrapper {
+ private:
+  CountedFileSystem* fs_;
+
+ public:
+  CountedSequentialFile(std::unique_ptr<FSSequentialFile>&& f,
+                        CountedFileSystem* fs)
+      : FSSequentialFileOwnerWrapper(std::move(f)), fs_(fs) {}
+
+  ~CountedSequentialFile() override { fs_->counters()->closes++; }
+
+  IOStatus Read(size_t n, const IOOptions& options, Slice* result,
+                char* scratch, IODebugContext* dbg) override {
+    IOStatus rv = target()->Read(n, options, result, scratch, dbg);
+    fs_->counters()->reads.RecordOp(rv, result->size());
+    return rv;
+  }
+
+  IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& options,
+                          Slice* result, char* scratch,
+                          IODebugContext* dbg) override {
+    IOStatus rv =
+        target()->PositionedRead(offset, n, options, result, scratch, dbg);
+    fs_->counters()->reads.RecordOp(rv, result->size());
+    return rv;
+  }
+};
+
+class CountedRandomAccessFile : public FSRandomAccessFileOwnerWrapper {
+ private:
+  CountedFileSystem* fs_;
+
+ public:
+  CountedRandomAccessFile(std::unique_ptr<FSRandomAccessFile>&& f,
+                          CountedFileSystem* fs)
+      : FSRandomAccessFileOwnerWrapper(std::move(f)), fs_(fs) {}
+
+  ~CountedRandomAccessFile() override { fs_->counters()->closes++; }
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override {
+    IOStatus rv = target()->Read(offset, n, options, result, scratch, dbg);
+    fs_->counters()->reads.RecordOp(rv, result->size());
+    return rv;
+  }
+
+  IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
+                     const IOOptions& options, IODebugContext* dbg) override {
+    IOStatus rv = target()->MultiRead(reqs, num_reqs, options, dbg);
+    for (size_t r = 0; r < num_reqs; r++) {
+      fs_->counters()->reads.RecordOp(reqs[r].status, reqs[r].result.size());
+    }
+    return rv;
+  }
+};
+
+class CountedWritableFile : public FSWritableFileOwnerWrapper {
+ private:
+  CountedFileSystem* fs_;
+
+ public:
+  CountedWritableFile(std::unique_ptr<FSWritableFile>&& f,
+                      CountedFileSystem* fs)
+      : FSWritableFileOwnerWrapper(std::move(f)), fs_(fs) {}
+
+  IOStatus Append(const Slice& data, const IOOptions& options,
+                  IODebugContext* dbg) override {
+    IOStatus rv = target()->Append(data, options, dbg);
+    fs_->counters()->writes.RecordOp(rv, data.size());
+    return rv;
+  }
+
+  IOStatus Append(const Slice& data, const IOOptions& options,
+                  const DataVerificationInfo& info,
+                  IODebugContext* dbg) override {
+    IOStatus rv = target()->Append(data, options, info, dbg);
+    fs_->counters()->writes.RecordOp(rv, data.size());
+    return rv;
+  }
+
+  IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                            const IOOptions& options,
+                            IODebugContext* dbg) override {
+    IOStatus rv = target()->PositionedAppend(data, offset, options, dbg);
+    fs_->counters()->writes.RecordOp(rv, data.size());
+    return rv;
+  }
+
+  IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                            const IOOptions& options,
+                            const DataVerificationInfo& info,
+                            IODebugContext* dbg) override {
+    IOStatus rv = target()->PositionedAppend(data, offset, options, info, dbg);
+    fs_->counters()->writes.RecordOp(rv, data.size());
+    return rv;
+  }
+
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override {
+    IOStatus rv = target()->Close(options, dbg);
+    if (rv.ok()) {
+      fs_->counters()->closes++;
+    }
+    return rv;
+  }
+
+  IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override {
+    IOStatus rv = target()->Flush(options, dbg);
+    if (rv.ok()) {
+      fs_->counters()->flushes++;
+    }
+    return rv;
+  }
+
+  IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override {
+    IOStatus rv = target()->Sync(options, dbg);
+    if (rv.ok()) {
+      fs_->counters()->syncs++;
+    }
+    return rv;
+  }
+
+  IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override {
+    IOStatus rv = target()->Fsync(options, dbg);
+    if (rv.ok()) {
+      fs_->counters()->fsyncs++;
+    }
+    return rv;
+  }
+
+  IOStatus RangeSync(uint64_t offset, uint64_t nbytes, const IOOptions& options,
+                     IODebugContext* dbg) override {
+    IOStatus rv = target()->RangeSync(offset, nbytes, options, dbg);
+    if (rv.ok()) {
+      fs_->counters()->syncs++;
+    }
+    return rv;
+  }
+};
+
+class CountedRandomRWFile : public FSRandomRWFileOwnerWrapper {
+ private:
+  mutable CountedFileSystem* fs_;
+
+ public:
+  CountedRandomRWFile(std::unique_ptr<FSRandomRWFile>&& f,
+                      CountedFileSystem* fs)
+      : FSRandomRWFileOwnerWrapper(std::move(f)), fs_(fs) {}
+  IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options,
+                 IODebugContext* dbg) override {
+    IOStatus rv = target()->Write(offset, data, options, dbg);
+    fs_->counters()->writes.RecordOp(rv, data.size());
+    return rv;
+  }
+
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override {
+    IOStatus rv = target()->Read(offset, n, options, result, scratch, dbg);
+    fs_->counters()->reads.RecordOp(rv, result->size());
+    return rv;
+  }
+
+  IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override {
+    IOStatus rv = target()->Flush(options, dbg);
+    if (rv.ok()) {
+      fs_->counters()->flushes++;
+    }
+    return rv;
+  }
+
+  IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override {
+    IOStatus rv = target()->Sync(options, dbg);
+    if (rv.ok()) {
+      fs_->counters()->syncs++;
+    }
+    return rv;
+  }
+
+  IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override {
+    IOStatus rv = target()->Fsync(options, dbg);
+    if (rv.ok()) {
+      fs_->counters()->fsyncs++;
+    }
+    return rv;
+  }
+
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override {
+    IOStatus rv = target()->Close(options, dbg);
+    if (rv.ok()) {
+      fs_->counters()->closes++;
+    }
+    return rv;
+  }
+};
+
+class CountedDirectory : public FSDirectoryWrapper {
+ private:
+  mutable CountedFileSystem* fs_;
+  bool closed_ = false;
+
+ public:
+  CountedDirectory(std::unique_ptr<FSDirectory>&& f, CountedFileSystem* fs)
+      : FSDirectoryWrapper(std::move(f)), fs_(fs) {}
+
+  IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override {
+    IOStatus rv = FSDirectoryWrapper::Fsync(options, dbg);
+    if (rv.ok()) {
+      fs_->counters()->dsyncs++;
+    }
+    return rv;
+  }
+
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override {
+    IOStatus rv = FSDirectoryWrapper::Close(options, dbg);
+    if (rv.ok()) {
+      fs_->counters()->closes++;
+      fs_->counters()->dir_closes++;
+      closed_ = true;
+    }
+    return rv;
+  }
+
+  IOStatus FsyncWithDirOptions(const IOOptions& options, IODebugContext* dbg,
+                               const DirFsyncOptions& dir_options) override {
+    IOStatus rv =
+        FSDirectoryWrapper::FsyncWithDirOptions(options, dbg, dir_options);
+    if (rv.ok()) {
+      fs_->counters()->dsyncs++;
+    }
+    return rv;
+  }
+
+  ~CountedDirectory() {
+    if (!closed_) {
+      // TODO: fix DB+CF code to use explicit Close, not rely on destructor
+      fs_->counters()->closes++;
+      fs_->counters()->dir_closes++;
+    }
+  }
+};
+}  // anonymous namespace
+
+std::string FileOpCounters::PrintCounters() const {
+  std::stringstream ss;
+  ss << "Num files opened: " << opens.load(std::memory_order_relaxed)
+     << std::endl;
+  ss << "Num files deleted: " << deletes.load(std::memory_order_relaxed)
+     << std::endl;
+  ss << "Num files renamed: " << renames.load(std::memory_order_relaxed)
+     << std::endl;
+  ss << "Num Flush(): " << flushes.load(std::memory_order_relaxed) << std::endl;
+  ss << "Num Sync(): " << syncs.load(std::memory_order_relaxed) << std::endl;
+  ss << "Num Fsync(): " << fsyncs.load(std::memory_order_relaxed) << std::endl;
+  ss << "Num Dir Fsync(): " << dsyncs.load(std::memory_order_relaxed)
+     << std::endl;
+  ss << "Num Close(): " << closes.load(std::memory_order_relaxed) << std::endl;
+  ss << "Num Dir Open(): " << dir_opens.load(std::memory_order_relaxed)
+     << std::endl;
+  ss << "Num Dir Close(): " << dir_closes.load(std::memory_order_relaxed)
+     << std::endl;
+  ss << "Num Read(): " << reads.ops.load(std::memory_order_relaxed)
+     << std::endl;
+  ss << "Num Append(): " << writes.ops.load(std::memory_order_relaxed)
+     << std::endl;
+  ss << "Num bytes read: " << reads.bytes.load(std::memory_order_relaxed)
+     << std::endl;
+  ss << "Num bytes written: " << writes.bytes.load(std::memory_order_relaxed)
+     << std::endl;
+  return ss.str();
+}
+
+CountedFileSystem::CountedFileSystem(const std::shared_ptr<FileSystem>& base)
+    : FileSystemWrapper(base) {}
+
+IOStatus CountedFileSystem::NewSequentialFile(
+    const std::string& f, const FileOptions& options,
+    std::unique_ptr<FSSequentialFile>* r, IODebugContext* dbg) {
+  std::unique_ptr<FSSequentialFile> base;
+  IOStatus s = target()->NewSequentialFile(f, options, &base, dbg);
+  if (s.ok()) {
+    counters_.opens++;
+    r->reset(new CountedSequentialFile(std::move(base), this));
+  }
+  return s;
+}
+
+IOStatus CountedFileSystem::NewRandomAccessFile(
+    const std::string& f, const FileOptions& options,
+    std::unique_ptr<FSRandomAccessFile>* r, IODebugContext* dbg) {
+  std::unique_ptr<FSRandomAccessFile> base;
+  IOStatus s = target()->NewRandomAccessFile(f, options, &base, dbg);
+  if (s.ok()) {
+    counters_.opens++;
+    r->reset(new CountedRandomAccessFile(std::move(base), this));
+  }
+  return s;
+}
+
+IOStatus CountedFileSystem::NewWritableFile(const std::string& f,
+                                            const FileOptions& options,
+                                            std::unique_ptr<FSWritableFile>* r,
+                                            IODebugContext* dbg) {
+  std::unique_ptr<FSWritableFile> base;
+  IOStatus s = target()->NewWritableFile(f, options, &base, dbg);
+  if (s.ok()) {
+    counters_.opens++;
+    r->reset(new CountedWritableFile(std::move(base), this));
+  }
+  return s;
+}
+
+IOStatus CountedFileSystem::ReopenWritableFile(
+    const std::string& fname, const FileOptions& options,
+    std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) {
+  std::unique_ptr<FSWritableFile> base;
+  IOStatus s = target()->ReopenWritableFile(fname, options, &base, dbg);
+  if (s.ok()) {
+    counters_.opens++;
+    result->reset(new CountedWritableFile(std::move(base), this));
+  }
+  return s;
+}
+
+IOStatus CountedFileSystem::ReuseWritableFile(
+    const std::string& fname, const std::string& old_fname,
+    const FileOptions& options, std::unique_ptr<FSWritableFile>* result,
+    IODebugContext* dbg) {
+  std::unique_ptr<FSWritableFile> base;
+  IOStatus s =
+      target()->ReuseWritableFile(fname, old_fname, options, &base, dbg);
+  if (s.ok()) {
+    counters_.opens++;
+    result->reset(new CountedWritableFile(std::move(base), this));
+  }
+  return s;
+}
+
+IOStatus CountedFileSystem::NewRandomRWFile(
+    const std::string& name, const FileOptions& options,
+    std::unique_ptr<FSRandomRWFile>* result, IODebugContext* dbg) {
+  std::unique_ptr<FSRandomRWFile> base;
+  IOStatus s = target()->NewRandomRWFile(name, options, &base, dbg);
+  if (s.ok()) {
+    counters_.opens++;
+    result->reset(new CountedRandomRWFile(std::move(base), this));
+  }
+  return s;
+}
+
+IOStatus CountedFileSystem::NewDirectory(const std::string& name,
+                                         const IOOptions& options,
+                                         std::unique_ptr<FSDirectory>* result,
+                                         IODebugContext* dbg) {
+  std::unique_ptr<FSDirectory> base;
+  IOStatus s = target()->NewDirectory(name, options, &base, dbg);
+  if (s.ok()) {
+    counters_.opens++;
+    counters_.dir_opens++;
+    result->reset(new CountedDirectory(std::move(base), this));
+  }
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/counted_fs.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/counted_fs.h
new file mode 100644
index 0000000000..cb8a8968fb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/counted_fs.h
@@ -0,0 +1,158 @@
+//  Copyright (c) 2016-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <memory>
+
+#include "rocksdb/file_system.h"
+#include "rocksdb/io_status.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Logger;
+
+struct OpCounter {
+  std::atomic<int> ops;
+  std::atomic<uint64_t> bytes;
+
+  OpCounter() : ops(0), bytes(0) {}
+
+  void Reset() {
+    ops = 0;
+    bytes = 0;
+  }
+  void RecordOp(const IOStatus& io_s, size_t added_bytes) {
+    if (!io_s.IsNotSupported()) {
+      ops.fetch_add(1, std::memory_order_relaxed);
+    }
+    if (io_s.ok()) {
+      bytes.fetch_add(added_bytes, std::memory_order_relaxed);
+    }
+  }
+};
+
+struct FileOpCounters {
+  static const char* kName() { return "FileOpCounters"; }
+
+  std::atomic<int> opens;
+  std::atomic<int> closes;
+  std::atomic<int> deletes;
+  std::atomic<int> renames;
+  std::atomic<int> flushes;
+  std::atomic<int> syncs;
+  std::atomic<int> dsyncs;
+  std::atomic<int> fsyncs;
+  std::atomic<int> dir_opens;
+  std::atomic<int> dir_closes;
+  OpCounter reads;
+  OpCounter writes;
+
+  FileOpCounters()
+      : opens(0),
+        closes(0),
+        deletes(0),
+        renames(0),
+        flushes(0),
+        syncs(0),
+        dsyncs(0),
+        fsyncs(0),
+        dir_opens(0),
+        dir_closes(0) {}
+
+  void Reset() {
+    opens = 0;
+    closes = 0;
+    deletes = 0;
+    renames = 0;
+    flushes = 0;
+    syncs = 0;
+    dsyncs = 0;
+    fsyncs = 0;
+    dir_opens = 0;
+    dir_closes = 0;
+    reads.Reset();
+    writes.Reset();
+  }
+  std::string PrintCounters() const;
+};
+
+// A FileSystem class that counts operations (reads, writes, opens, closes, etc)
+class CountedFileSystem : public FileSystemWrapper {
+ public:
+ private:
+  FileOpCounters counters_;
+
+ public:
+  explicit CountedFileSystem(const std::shared_ptr<FileSystem>& base);
+  static const char* kClassName() { return "CountedFileSystem"; }
+  const char* Name() const override { return kClassName(); }
+
+  IOStatus NewSequentialFile(const std::string& f, const FileOptions& options,
+                             std::unique_ptr<FSSequentialFile>* r,
+                             IODebugContext* dbg) override;
+
+  IOStatus NewRandomAccessFile(const std::string& f,
+                               const FileOptions& file_opts,
+                               std::unique_ptr<FSRandomAccessFile>* r,
+                               IODebugContext* dbg) override;
+
+  IOStatus NewWritableFile(const std::string& f, const FileOptions& options,
+                           std::unique_ptr<FSWritableFile>* r,
+                           IODebugContext* dbg) override;
+  IOStatus ReopenWritableFile(const std::string& fname,
+                              const FileOptions& options,
+                              std::unique_ptr<FSWritableFile>* result,
+                              IODebugContext* dbg) override;
+
+  IOStatus ReuseWritableFile(const std::string& fname,
+                             const std::string& old_fname,
+                             const FileOptions& file_opts,
+                             std::unique_ptr<FSWritableFile>* result,
+                             IODebugContext* dbg) override;
+  IOStatus NewRandomRWFile(const std::string& name, const FileOptions& options,
+                           std::unique_ptr<FSRandomRWFile>* result,
+                           IODebugContext* dbg) override;
+
+  IOStatus NewDirectory(const std::string& name, const IOOptions& io_opts,
+                        std::unique_ptr<FSDirectory>* result,
+                        IODebugContext* dbg) override;
+
+  IOStatus DeleteFile(const std::string& fname, const IOOptions& options,
+                      IODebugContext* dbg) override {
+    IOStatus s = target()->DeleteFile(fname, options, dbg);
+    if (s.ok()) {
+      counters_.deletes++;
+    }
+    return s;
+  }
+
+  IOStatus RenameFile(const std::string& s, const std::string& t,
+                      const IOOptions& options, IODebugContext* dbg) override {
+    IOStatus st = target()->RenameFile(s, t, options, dbg);
+    if (st.ok()) {
+      counters_.renames++;
+    }
+    return st;
+  }
+
+  const FileOpCounters* counters() const { return &counters_; }
+
+  FileOpCounters* counters() { return &counters_; }
+
+  const void* GetOptionsPtr(const std::string& name) const override {
+    if (name == FileOpCounters::kName()) {
+      return counters();
+    } else {
+      return FileSystemWrapper::GetOptionsPtr(name);
+    }
+  }
+
+  // Prints the counters to a string
+  std::string PrintCounters() const { return counters_.PrintCounters(); }
+  void ResetCounters() { counters_.Reset(); }
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/debug.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/debug.cc
new file mode 100644
index 0000000000..911bc510a6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/debug.cc
@@ -0,0 +1,118 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "rocksdb/utilities/debug.h"
+
+#include "db/db_impl/db_impl.h"
+#include "rocksdb/utilities/options_type.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+static std::unordered_map<std::string, ValueType> value_type_string_map = {
+    {"TypeDeletion", ValueType::kTypeDeletion},
+    {"TypeValue", ValueType::kTypeValue},
+    {"TypeMerge", ValueType::kTypeMerge},
+    {"TypeLogData", ValueType::kTypeLogData},
+    {"TypeColumnFamilyDeletion", ValueType::kTypeColumnFamilyDeletion},
+    {"TypeColumnFamilyValue", ValueType::kTypeColumnFamilyValue},
+    {"TypeColumnFamilyMerge", ValueType::kTypeColumnFamilyMerge},
+    {"TypeSingleDeletion", ValueType::kTypeSingleDeletion},
+    {"TypeColumnFamilySingleDeletion",
+     ValueType::kTypeColumnFamilySingleDeletion},
+    {"TypeBeginPrepareXID", ValueType::kTypeBeginPrepareXID},
+    {"TypeEndPrepareXID", ValueType::kTypeEndPrepareXID},
+    {"TypeCommitXID", ValueType::kTypeCommitXID},
+    {"TypeRollbackXID", ValueType::kTypeRollbackXID},
+    {"TypeNoop", ValueType::kTypeNoop},
+    {"TypeColumnFamilyRangeDeletion",
+     ValueType::kTypeColumnFamilyRangeDeletion},
+    {"TypeRangeDeletion", ValueType::kTypeRangeDeletion},
+    {"TypeColumnFamilyBlobIndex", ValueType::kTypeColumnFamilyBlobIndex},
+    {"TypeBlobIndex", ValueType::kTypeBlobIndex},
+    {"TypeBeginPersistedPrepareXID", ValueType::kTypeBeginPersistedPrepareXID},
+    {"TypeBeginUnprepareXID", ValueType::kTypeBeginUnprepareXID},
+    {"TypeDeletionWithTimestamp", ValueType::kTypeDeletionWithTimestamp},
+    {"TypeCommitXIDAndTimestamp", ValueType::kTypeCommitXIDAndTimestamp},
+    {"TypeWideColumnEntity", ValueType::kTypeWideColumnEntity},
+    {"TypeColumnFamilyWideColumnEntity",
+     ValueType::kTypeColumnFamilyWideColumnEntity}};
+
+std::string KeyVersion::GetTypeName() const {
+  std::string type_name;
+  if (SerializeEnum<ValueType>(value_type_string_map,
+                               static_cast<ValueType>(type), &type_name)) {
+    return type_name;
+  } else {
+    return "Invalid";
+  }
+}
+
+Status GetAllKeyVersions(DB* db, Slice begin_key, Slice end_key,
+                         size_t max_num_ikeys,
+                         std::vector<KeyVersion>* key_versions) {
+  if (nullptr == db) {
+    return Status::InvalidArgument("db cannot be null.");
+  }
+  return GetAllKeyVersions(db, db->DefaultColumnFamily(), begin_key, end_key,
+                           max_num_ikeys, key_versions);
+}
+
+Status GetAllKeyVersions(DB* db, ColumnFamilyHandle* cfh, Slice begin_key,
+                         Slice end_key, size_t max_num_ikeys,
+                         std::vector<KeyVersion>* key_versions) {
+  if (nullptr == db) {
+    return Status::InvalidArgument("db cannot be null.");
+  }
+  if (nullptr == cfh) {
+    return Status::InvalidArgument("Column family handle cannot be null.");
+  }
+  if (nullptr == key_versions) {
+    return Status::InvalidArgument("key_versions cannot be null.");
+  }
+  key_versions->clear();
+
+  DBImpl* idb = static_cast<DBImpl*>(db->GetRootDB());
+  auto icmp = InternalKeyComparator(idb->GetOptions(cfh).comparator);
+  ReadOptions read_options;
+  Arena arena;
+  ScopedArenaIterator iter(
+      idb->NewInternalIterator(read_options, &arena, kMaxSequenceNumber, cfh));
+
+  if (!begin_key.empty()) {
+    InternalKey ikey;
+    ikey.SetMinPossibleForUserKey(begin_key);
+    iter->Seek(ikey.Encode());
+  } else {
+    iter->SeekToFirst();
+  }
+
+  size_t num_keys = 0;
+  for (; iter->Valid(); iter->Next()) {
+    ParsedInternalKey ikey;
+    Status pik_status =
+        ParseInternalKey(iter->key(), &ikey, true /* log_err_key */);  // TODO
+    if (!pik_status.ok()) {
+      return pik_status;
+    }
+
+    if (!end_key.empty() &&
+        icmp.user_comparator()->Compare(ikey.user_key, end_key) > 0) {
+      break;
+    }
+
+    key_versions->emplace_back(ikey.user_key.ToString() /* _user_key */,
+                               iter->value().ToString() /* _value */,
+                               ikey.sequence /* _sequence */,
+                               static_cast<int>(ikey.type) /* _type */);
+    if (++num_keys >= max_num_ikeys) {
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/env_mirror.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/env_mirror.cc
new file mode 100644
index 0000000000..0802d7c708
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/env_mirror.cc
@@ -0,0 +1,273 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2015, Red Hat, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+
+#include "rocksdb/utilities/env_mirror.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// An implementation of Env that mirrors all work over two backend
+// Env's.  This is useful for debugging purposes.
+class SequentialFileMirror : public SequentialFile {
+ public:
+  std::unique_ptr<SequentialFile> a_, b_;
+  std::string fname;
+  explicit SequentialFileMirror(std::string f) : fname(f) {}
+
+  Status Read(size_t n, Slice* result, char* scratch) override {
+    Slice aslice;
+    Status as = a_->Read(n, &aslice, scratch);
+    if (as == Status::OK()) {
+      char* bscratch = new char[n];
+      Slice bslice;
+#ifndef NDEBUG
+      size_t off = 0;
+#endif
+      size_t left = aslice.size();
+      while (left) {
+        Status bs = b_->Read(left, &bslice, bscratch);
+#ifndef NDEBUG
+        assert(as == bs);
+        assert(memcmp(bscratch, scratch + off, bslice.size()) == 0);
+        off += bslice.size();
+#endif
+        left -= bslice.size();
+      }
+      delete[] bscratch;
+      *result = aslice;
+    } else {
+      Status bs = b_->Read(n, result, scratch);
+      assert(as == bs);
+    }
+    return as;
+  }
+
+  Status Skip(uint64_t n) override {
+    Status as = a_->Skip(n);
+    Status bs = b_->Skip(n);
+    assert(as == bs);
+    return as;
+  }
+  Status InvalidateCache(size_t offset, size_t length) override {
+    Status as = a_->InvalidateCache(offset, length);
+    Status bs = b_->InvalidateCache(offset, length);
+    assert(as == bs);
+    return as;
+  };
+};
+
+class RandomAccessFileMirror : public RandomAccessFile {
+ public:
+  std::unique_ptr<RandomAccessFile> a_, b_;
+  std::string fname;
+  explicit RandomAccessFileMirror(std::string f) : fname(f) {}
+
+  Status Read(uint64_t offset, size_t n, Slice* result,
+              char* scratch) const override {
+    Status as = a_->Read(offset, n, result, scratch);
+    if (as == Status::OK()) {
+      char* bscratch = new char[n];
+      Slice bslice;
+      size_t off = 0;
+      size_t left = result->size();
+      while (left) {
+        Status bs = b_->Read(offset + off, left, &bslice, bscratch);
+        assert(as == bs);
+        assert(memcmp(bscratch, scratch + off, bslice.size()) == 0);
+        off += bslice.size();
+        left -= bslice.size();
+      }
+      delete[] bscratch;
+    } else {
+      Status bs = b_->Read(offset, n, result, scratch);
+      assert(as == bs);
+    }
+    return as;
+  }
+
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    // NOTE: not verified
+    return a_->GetUniqueId(id, max_size);
+  }
+};
+
+class WritableFileMirror : public WritableFile {
+ public:
+  std::unique_ptr<WritableFile> a_, b_;
+  std::string fname;
+  explicit WritableFileMirror(std::string f, const EnvOptions& options)
+      : WritableFile(options), fname(f) {}
+
+  Status Append(const Slice& data) override {
+    Status as = a_->Append(data);
+    Status bs = b_->Append(data);
+    assert(as == bs);
+    return as;
+  }
+  Status Append(const Slice& data,
+                const DataVerificationInfo& /* verification_info */) override {
+    return Append(data);
+  }
+  Status PositionedAppend(const Slice& data, uint64_t offset) override {
+    Status as = a_->PositionedAppend(data, offset);
+    Status bs = b_->PositionedAppend(data, offset);
+    assert(as == bs);
+    return as;
+  }
+  Status PositionedAppend(
+      const Slice& data, uint64_t offset,
+      const DataVerificationInfo& /* verification_info */) override {
+    return PositionedAppend(data, offset);
+  }
+  Status Truncate(uint64_t size) override {
+    Status as = a_->Truncate(size);
+    Status bs = b_->Truncate(size);
+    assert(as == bs);
+    return as;
+  }
+  Status Close() override {
+    Status as = a_->Close();
+    Status bs = b_->Close();
+    assert(as == bs);
+    return as;
+  }
+  Status Flush() override {
+    Status as = a_->Flush();
+    Status bs = b_->Flush();
+    assert(as == bs);
+    return as;
+  }
+  Status Sync() override {
+    Status as = a_->Sync();
+    Status bs = b_->Sync();
+    assert(as == bs);
+    return as;
+  }
+  Status Fsync() override {
+    Status as = a_->Fsync();
+    Status bs = b_->Fsync();
+    assert(as == bs);
+    return as;
+  }
+  bool IsSyncThreadSafe() const override {
+    bool as = a_->IsSyncThreadSafe();
+    assert(as == b_->IsSyncThreadSafe());
+    return as;
+  }
+  void SetIOPriority(Env::IOPriority pri) override {
+    a_->SetIOPriority(pri);
+    b_->SetIOPriority(pri);
+  }
+  Env::IOPriority GetIOPriority() override {
+    // NOTE: we don't verify this one
+    return a_->GetIOPriority();
+  }
+  uint64_t GetFileSize() override {
+    uint64_t as = a_->GetFileSize();
+    assert(as == b_->GetFileSize());
+    return as;
+  }
+  void GetPreallocationStatus(size_t* block_size,
+                              size_t* last_allocated_block) override {
+    // NOTE: we don't verify this one
+    return a_->GetPreallocationStatus(block_size, last_allocated_block);
+  }
+  size_t GetUniqueId(char* id, size_t max_size) const override {
+    // NOTE: we don't verify this one
+    return a_->GetUniqueId(id, max_size);
+  }
+  Status InvalidateCache(size_t offset, size_t length) override {
+    Status as = a_->InvalidateCache(offset, length);
+    Status bs = b_->InvalidateCache(offset, length);
+    assert(as == bs);
+    return as;
+  }
+
+ protected:
+  Status Allocate(uint64_t offset, uint64_t length) override {
+    Status as = a_->Allocate(offset, length);
+    Status bs = b_->Allocate(offset, length);
+    assert(as == bs);
+    return as;
+  }
+  Status RangeSync(uint64_t offset, uint64_t nbytes) override {
+    Status as = a_->RangeSync(offset, nbytes);
+    Status bs = b_->RangeSync(offset, nbytes);
+    assert(as == bs);
+    return as;
+  }
+};
+
+Status EnvMirror::NewSequentialFile(const std::string& f,
+                                    std::unique_ptr<SequentialFile>* r,
+                                    const EnvOptions& options) {
+  if (f.find("/proc/") == 0) {
+    return a_->NewSequentialFile(f, r, options);
+  }
+  SequentialFileMirror* mf = new SequentialFileMirror(f);
+  Status as = a_->NewSequentialFile(f, &mf->a_, options);
+  Status bs = b_->NewSequentialFile(f, &mf->b_, options);
+  assert(as == bs);
+  if (as.ok())
+    r->reset(mf);
+  else
+    delete mf;
+  return as;
+}
+
+Status EnvMirror::NewRandomAccessFile(const std::string& f,
+                                      std::unique_ptr<RandomAccessFile>* r,
+                                      const EnvOptions& options) {
+  if (f.find("/proc/") == 0) {
+    return a_->NewRandomAccessFile(f, r, options);
+  }
+  RandomAccessFileMirror* mf = new RandomAccessFileMirror(f);
+  Status as = a_->NewRandomAccessFile(f, &mf->a_, options);
+  Status bs = b_->NewRandomAccessFile(f, &mf->b_, options);
+  assert(as == bs);
+  if (as.ok())
+    r->reset(mf);
+  else
+    delete mf;
+  return as;
+}
+
+Status EnvMirror::NewWritableFile(const std::string& f,
+                                  std::unique_ptr<WritableFile>* r,
+                                  const EnvOptions& options) {
+  if (f.find("/proc/") == 0) return a_->NewWritableFile(f, r, options);
+  WritableFileMirror* mf = new WritableFileMirror(f, options);
+  Status as = a_->NewWritableFile(f, &mf->a_, options);
+  Status bs = b_->NewWritableFile(f, &mf->b_, options);
+  assert(as == bs);
+  if (as.ok())
+    r->reset(mf);
+  else
+    delete mf;
+  return as;
+}
+
+Status EnvMirror::ReuseWritableFile(const std::string& fname,
+                                    const std::string& old_fname,
+                                    std::unique_ptr<WritableFile>* r,
+                                    const EnvOptions& options) {
+  if (fname.find("/proc/") == 0)
+    return a_->ReuseWritableFile(fname, old_fname, r, options);
+  WritableFileMirror* mf = new WritableFileMirror(fname, options);
+  Status as = a_->ReuseWritableFile(fname, old_fname, &mf->a_, options);
+  Status bs = b_->ReuseWritableFile(fname, old_fname, &mf->b_, options);
+  assert(as == bs);
+  if (as.ok())
+    r->reset(mf);
+  else
+    delete mf;
+  return as;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/env_timed.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/env_timed.cc
new file mode 100644
index 0000000000..01fdfccaf3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/env_timed.cc
@@ -0,0 +1,181 @@
+// Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#include "utilities/env_timed.h"
+
+#include "env/composite_env_wrapper.h"
+#include "monitoring/perf_context_imp.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+TimedFileSystem::TimedFileSystem(const std::shared_ptr<FileSystem>& base)
+    : FileSystemWrapper(base) {}
+IOStatus TimedFileSystem::NewSequentialFile(
+    const std::string& fname, const FileOptions& options,
+    std::unique_ptr<FSSequentialFile>* result, IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_new_sequential_file_nanos);
+  return FileSystemWrapper::NewSequentialFile(fname, options, result, dbg);
+}
+
+IOStatus TimedFileSystem::NewRandomAccessFile(
+    const std::string& fname, const FileOptions& options,
+    std::unique_ptr<FSRandomAccessFile>* result, IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_new_random_access_file_nanos);
+  return FileSystemWrapper::NewRandomAccessFile(fname, options, result, dbg);
+}
+
+IOStatus TimedFileSystem::NewWritableFile(
+    const std::string& fname, const FileOptions& options,
+    std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_new_writable_file_nanos);
+  return FileSystemWrapper::NewWritableFile(fname, options, result, dbg);
+}
+
+IOStatus TimedFileSystem::ReuseWritableFile(
+    const std::string& fname, const std::string& old_fname,
+    const FileOptions& options, std::unique_ptr<FSWritableFile>* result,
+    IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_reuse_writable_file_nanos);
+  return FileSystemWrapper::ReuseWritableFile(fname, old_fname, options, result,
+                                              dbg);
+}
+
+IOStatus TimedFileSystem::NewRandomRWFile(
+    const std::string& fname, const FileOptions& options,
+    std::unique_ptr<FSRandomRWFile>* result, IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_new_random_rw_file_nanos);
+  return FileSystemWrapper::NewRandomRWFile(fname, options, result, dbg);
+}
+
+IOStatus TimedFileSystem::NewDirectory(const std::string& name,
+                                       const IOOptions& options,
+                                       std::unique_ptr<FSDirectory>* result,
+                                       IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_new_directory_nanos);
+  return FileSystemWrapper::NewDirectory(name, options, result, dbg);
+}
+
+IOStatus TimedFileSystem::FileExists(const std::string& fname,
+                                     const IOOptions& options,
+                                     IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_file_exists_nanos);
+  return FileSystemWrapper::FileExists(fname, options, dbg);
+}
+
+IOStatus TimedFileSystem::GetChildren(const std::string& dir,
+                                      const IOOptions& options,
+                                      std::vector<std::string>* result,
+                                      IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_get_children_nanos);
+  return FileSystemWrapper::GetChildren(dir, options, result, dbg);
+}
+
+IOStatus TimedFileSystem::GetChildrenFileAttributes(
+    const std::string& dir, const IOOptions& options,
+    std::vector<FileAttributes>* result, IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_get_children_file_attributes_nanos);
+  return FileSystemWrapper::GetChildrenFileAttributes(dir, options, result,
+                                                      dbg);
+}
+
+IOStatus TimedFileSystem::DeleteFile(const std::string& fname,
+                                     const IOOptions& options,
+                                     IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_delete_file_nanos);
+  return FileSystemWrapper::DeleteFile(fname, options, dbg);
+}
+
+IOStatus TimedFileSystem::CreateDir(const std::string& dirname,
+                                    const IOOptions& options,
+                                    IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_create_dir_nanos);
+  return FileSystemWrapper::CreateDir(dirname, options, dbg);
+}
+
+IOStatus TimedFileSystem::CreateDirIfMissing(const std::string& dirname,
+                                             const IOOptions& options,
+                                             IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_create_dir_if_missing_nanos);
+  return FileSystemWrapper::CreateDirIfMissing(dirname, options, dbg);
+}
+
+IOStatus TimedFileSystem::DeleteDir(const std::string& dirname,
+                                    const IOOptions& options,
+                                    IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_delete_dir_nanos);
+  return FileSystemWrapper::DeleteDir(dirname, options, dbg);
+}
+
+IOStatus TimedFileSystem::GetFileSize(const std::string& fname,
+                                      const IOOptions& options,
+                                      uint64_t* file_size,
+                                      IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_get_file_size_nanos);
+  return FileSystemWrapper::GetFileSize(fname, options, file_size, dbg);
+}
+
+IOStatus TimedFileSystem::GetFileModificationTime(const std::string& fname,
+                                                  const IOOptions& options,
+                                                  uint64_t* file_mtime,
+                                                  IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_get_file_modification_time_nanos);
+  return FileSystemWrapper::GetFileModificationTime(fname, options, file_mtime,
+                                                    dbg);
+}
+
+IOStatus TimedFileSystem::RenameFile(const std::string& src,
+                                     const std::string& dst,
+                                     const IOOptions& options,
+                                     IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_rename_file_nanos);
+  return FileSystemWrapper::RenameFile(src, dst, options, dbg);
+}
+
+IOStatus TimedFileSystem::LinkFile(const std::string& src,
+                                   const std::string& dst,
+                                   const IOOptions& options,
+                                   IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_link_file_nanos);
+  return FileSystemWrapper::LinkFile(src, dst, options, dbg);
+}
+
+IOStatus TimedFileSystem::LockFile(const std::string& fname,
+                                   const IOOptions& options, FileLock** lock,
+                                   IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_lock_file_nanos);
+  return FileSystemWrapper::LockFile(fname, options, lock, dbg);
+}
+
+IOStatus TimedFileSystem::UnlockFile(FileLock* lock, const IOOptions& options,
+                                     IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_unlock_file_nanos);
+  return FileSystemWrapper::UnlockFile(lock, options, dbg);
+}
+
+IOStatus TimedFileSystem::NewLogger(const std::string& fname,
+                                    const IOOptions& options,
+                                    std::shared_ptr<Logger>* result,
+                                    IODebugContext* dbg) {
+  PERF_TIMER_GUARD(env_new_logger_nanos);
+  return FileSystemWrapper::NewLogger(fname, options, result, dbg);
+}
+
+std::shared_ptr<FileSystem> NewTimedFileSystem(
+    const std::shared_ptr<FileSystem>& base) {
+  return std::make_shared<TimedFileSystem>(base);
+}
+
+// An environment that measures function call times for filesystem
+// operations, reporting results to variables in PerfContext.
+Env* NewTimedEnv(Env* base_env) {
+  std::shared_ptr<FileSystem> timed_fs =
+      NewTimedFileSystem(base_env->GetFileSystem());
+  return new CompositeEnvWrapper(base_env, timed_fs);
+}
+
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/env_timed.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/env_timed.h
new file mode 100644
index 0000000000..6c6ac89fb4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/env_timed.h
@@ -0,0 +1,95 @@
+// Copyright (c) 2019-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+#include "rocksdb/file_system.h"
+namespace ROCKSDB_NAMESPACE {
+class TimedFileSystem : public FileSystemWrapper {
+ public:
+  explicit TimedFileSystem(const std::shared_ptr<FileSystem>& base);
+
+  static const char* kClassName() { return "TimedFS"; }
+  const char* Name() const override { return kClassName(); }
+
+  IOStatus NewSequentialFile(const std::string& fname,
+                             const FileOptions& options,
+                             std::unique_ptr<FSSequentialFile>* result,
+                             IODebugContext* dbg) override;
+
+  IOStatus NewRandomAccessFile(const std::string& fname,
+                               const FileOptions& options,
+                               std::unique_ptr<FSRandomAccessFile>* result,
+                               IODebugContext* dbg) override;
+
+  IOStatus NewWritableFile(const std::string& fname, const FileOptions& options,
+                           std::unique_ptr<FSWritableFile>* result,
+                           IODebugContext* dbg) override;
+
+  IOStatus ReuseWritableFile(const std::string& fname,
+                             const std::string& old_fname,
+                             const FileOptions& options,
+                             std::unique_ptr<FSWritableFile>* result,
+                             IODebugContext* dbg) override;
+
+  IOStatus NewRandomRWFile(const std::string& fname, const FileOptions& options,
+                           std::unique_ptr<FSRandomRWFile>* result,
+                           IODebugContext* dbg) override;
+
+  IOStatus NewDirectory(const std::string& name, const IOOptions& options,
+                        std::unique_ptr<FSDirectory>* result,
+                        IODebugContext* dbg) override;
+
+  IOStatus FileExists(const std::string& fname, const IOOptions& options,
+                      IODebugContext* dbg) override;
+
+  IOStatus GetChildren(const std::string& dir, const IOOptions& options,
+                       std::vector<std::string>* result,
+                       IODebugContext* dbg) override;
+
+  IOStatus GetChildrenFileAttributes(const std::string& dir,
+                                     const IOOptions& options,
+                                     std::vector<FileAttributes>* result,
+                                     IODebugContext* dbg) override;
+
+  IOStatus DeleteFile(const std::string& fname, const IOOptions& options,
+                      IODebugContext* dbg) override;
+
+  IOStatus CreateDir(const std::string& dirname, const IOOptions& options,
+                     IODebugContext* dbg) override;
+
+  IOStatus CreateDirIfMissing(const std::string& dirname,
+                              const IOOptions& options,
+                              IODebugContext* dbg) override;
+
+  IOStatus DeleteDir(const std::string& dirname, const IOOptions& options,
+                     IODebugContext* dbg) override;
+
+  IOStatus GetFileSize(const std::string& fname, const IOOptions& options,
+                       uint64_t* file_size, IODebugContext* dbg) override;
+
+  IOStatus GetFileModificationTime(const std::string& fname,
+                                   const IOOptions& options,
+                                   uint64_t* file_mtime,
+                                   IODebugContext* dbg) override;
+
+  IOStatus RenameFile(const std::string& src, const std::string& dst,
+                      const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus LinkFile(const std::string& src, const std::string& dst,
+                    const IOOptions& options, IODebugContext* dbg) override;
+
+  IOStatus LockFile(const std::string& fname, const IOOptions& options,
+                    FileLock** lock, IODebugContext* dbg) override;
+
+  IOStatus UnlockFile(FileLock* lock, const IOOptions& options,
+                      IODebugContext* dbg) override;
+
+  IOStatus NewLogger(const std::string& fname, const IOOptions& options,
+                     std::shared_ptr<Logger>* result,
+                     IODebugContext* dbg) override;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_env.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_env.cc
new file mode 100644
index 0000000000..b0495a8c18
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_env.cc
@@ -0,0 +1,555 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright 2014 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+// This test uses a custom Env to keep track of the state of a filesystem as of
+// the last "sync". It then checks for data loss errors by purposely dropping
+// file data (or entire files) not protected by a "sync".
+
+#include "utilities/fault_injection_env.h"
+
+#include <functional>
+#include <utility>
+
+#include "util/random.h"
+namespace ROCKSDB_NAMESPACE {
+
+// Assume a filename, and not a directory name like "/foo/bar/"
+std::string GetDirName(const std::string filename) {
+  size_t found = filename.find_last_of("/\\");
+  if (found == std::string::npos) {
+    return "";
+  } else {
+    return filename.substr(0, found);
+  }
+}
+
+// A basic file truncation function suitable for this test.
+Status Truncate(Env* env, const std::string& filename, uint64_t length) {
+  std::unique_ptr<SequentialFile> orig_file;
+  const EnvOptions options;
+  Status s = env->NewSequentialFile(filename, &orig_file, options);
+  if (!s.ok()) {
+    fprintf(stderr, "Cannot open file %s for truncation: %s\n",
+            filename.c_str(), s.ToString().c_str());
+    return s;
+  }
+
+  std::unique_ptr<char[]> scratch(new char[length]);
+  ROCKSDB_NAMESPACE::Slice result;
+  s = orig_file->Read(length, &result, scratch.get());
+#ifdef OS_WIN
+  orig_file.reset();
+#endif
+  if (s.ok()) {
+    std::string tmp_name = GetDirName(filename) + "/truncate.tmp";
+    std::unique_ptr<WritableFile> tmp_file;
+    s = env->NewWritableFile(tmp_name, &tmp_file, options);
+    if (s.ok()) {
+      s = tmp_file->Append(result);
+      if (s.ok()) {
+        s = env->RenameFile(tmp_name, filename);
+      } else {
+        fprintf(stderr, "Cannot rename file %s to %s: %s\n", tmp_name.c_str(),
+                filename.c_str(), s.ToString().c_str());
+        env->DeleteFile(tmp_name);
+      }
+    }
+  }
+  if (!s.ok()) {
+    fprintf(stderr, "Cannot truncate file %s: %s\n", filename.c_str(),
+            s.ToString().c_str());
+  }
+
+  return s;
+}
+
+// Trim the tailing "/" in the end of `str`
+std::string TrimDirname(const std::string& str) {
+  size_t found = str.find_last_not_of("/");
+  if (found == std::string::npos) {
+    return str;
+  }
+  return str.substr(0, found + 1);
+}
+
+// Return pair <parent directory name, file name> of a full path.
+std::pair<std::string, std::string> GetDirAndName(const std::string& name) {
+  std::string dirname = GetDirName(name);
+  std::string fname = name.substr(dirname.size() + 1);
+  return std::make_pair(dirname, fname);
+}
+
+Status FileState::DropUnsyncedData(Env* env) const {
+  ssize_t sync_pos = pos_at_last_sync_ == -1 ? 0 : pos_at_last_sync_;
+  return Truncate(env, filename_, sync_pos);
+}
+
+Status FileState::DropRandomUnsyncedData(Env* env, Random* rand) const {
+  ssize_t sync_pos = pos_at_last_sync_ == -1 ? 0 : pos_at_last_sync_;
+  assert(pos_ >= sync_pos);
+  int range = static_cast<int>(pos_ - sync_pos);
+  uint64_t truncated_size =
+      static_cast<uint64_t>(sync_pos) + rand->Uniform(range);
+  return Truncate(env, filename_, truncated_size);
+}
+
+Status TestDirectory::Fsync() {
+  if (!env_->IsFilesystemActive()) {
+    return env_->GetError();
+  }
+  env_->SyncDir(dirname_);
+  return dir_->Fsync();
+}
+
+Status TestDirectory::Close() {
+  if (!env_->IsFilesystemActive()) {
+    return env_->GetError();
+  }
+  return dir_->Close();
+}
+
+TestRandomAccessFile::TestRandomAccessFile(
+    std::unique_ptr<RandomAccessFile>&& target, FaultInjectionTestEnv* env)
+    : target_(std::move(target)), env_(env) {
+  assert(target_);
+  assert(env_);
+}
+
+Status TestRandomAccessFile::Read(uint64_t offset, size_t n, Slice* result,
+                                  char* scratch) const {
+  assert(env_);
+  if (!env_->IsFilesystemActive()) {
+    return env_->GetError();
+  }
+
+  assert(target_);
+  return target_->Read(offset, n, result, scratch);
+}
+
+Status TestRandomAccessFile::Prefetch(uint64_t offset, size_t n) {
+  assert(env_);
+  if (!env_->IsFilesystemActive()) {
+    return env_->GetError();
+  }
+
+  assert(target_);
+  return target_->Prefetch(offset, n);
+}
+
+Status TestRandomAccessFile::MultiRead(ReadRequest* reqs, size_t num_reqs) {
+  assert(env_);
+  if (!env_->IsFilesystemActive()) {
+    const Status s = env_->GetError();
+
+    assert(reqs);
+    for (size_t i = 0; i < num_reqs; ++i) {
+      reqs[i].status = s;
+    }
+
+    return s;
+  }
+
+  assert(target_);
+  return target_->MultiRead(reqs, num_reqs);
+}
+
+TestWritableFile::TestWritableFile(const std::string& fname,
+                                   std::unique_ptr<WritableFile>&& f,
+                                   FaultInjectionTestEnv* env)
+    : state_(fname),
+      target_(std::move(f)),
+      writable_file_opened_(true),
+      env_(env) {
+  assert(target_ != nullptr);
+  state_.pos_ = 0;
+}
+
+TestWritableFile::~TestWritableFile() {
+  if (writable_file_opened_) {
+    Close().PermitUncheckedError();
+  }
+}
+
+Status TestWritableFile::Append(const Slice& data) {
+  if (!env_->IsFilesystemActive()) {
+    return env_->GetError();
+  }
+  Status s = target_->Append(data);
+  if (s.ok()) {
+    state_.pos_ += data.size();
+    env_->WritableFileAppended(state_);
+  }
+  return s;
+}
+
+Status TestWritableFile::Close() {
+  writable_file_opened_ = false;
+  Status s = target_->Close();
+  if (s.ok()) {
+    env_->WritableFileClosed(state_);
+  }
+  return s;
+}
+
+Status TestWritableFile::Flush() {
+  Status s = target_->Flush();
+  if (s.ok() && env_->IsFilesystemActive()) {
+    state_.pos_at_last_flush_ = state_.pos_;
+  }
+  return s;
+}
+
+Status TestWritableFile::Sync() {
+  if (!env_->IsFilesystemActive()) {
+    return Status::IOError("FaultInjectionTestEnv: not active");
+  }
+  // No need to actual sync.
+  state_.pos_at_last_sync_ = state_.pos_;
+  env_->WritableFileSynced(state_);
+  return Status::OK();
+}
+
+TestRandomRWFile::TestRandomRWFile(const std::string& /*fname*/,
+                                   std::unique_ptr<RandomRWFile>&& f,
+                                   FaultInjectionTestEnv* env)
+    : target_(std::move(f)), file_opened_(true), env_(env) {
+  assert(target_ != nullptr);
+}
+
+TestRandomRWFile::~TestRandomRWFile() {
+  if (file_opened_) {
+    Close().PermitUncheckedError();
+  }
+}
+
+Status TestRandomRWFile::Write(uint64_t offset, const Slice& data) {
+  if (!env_->IsFilesystemActive()) {
+    return env_->GetError();
+  }
+  return target_->Write(offset, data);
+}
+
+Status TestRandomRWFile::Read(uint64_t offset, size_t n, Slice* result,
+                              char* scratch) const {
+  if (!env_->IsFilesystemActive()) {
+    return env_->GetError();
+  }
+  return target_->Read(offset, n, result, scratch);
+}
+
+Status TestRandomRWFile::Close() {
+  file_opened_ = false;
+  return target_->Close();
+}
+
+Status TestRandomRWFile::Flush() {
+  if (!env_->IsFilesystemActive()) {
+    return env_->GetError();
+  }
+  return target_->Flush();
+}
+
+Status TestRandomRWFile::Sync() {
+  if (!env_->IsFilesystemActive()) {
+    return env_->GetError();
+  }
+  return target_->Sync();
+}
+
+Status FaultInjectionTestEnv::NewDirectory(const std::string& name,
+                                           std::unique_ptr<Directory>* result) {
+  std::unique_ptr<Directory> r;
+  Status s = target()->NewDirectory(name, &r);
+  assert(s.ok());
+  if (!s.ok()) {
+    return s;
+  }
+  result->reset(new TestDirectory(this, TrimDirname(name), r.release()));
+  return Status::OK();
+}
+
+Status FaultInjectionTestEnv::NewWritableFile(
+    const std::string& fname, std::unique_ptr<WritableFile>* result,
+    const EnvOptions& soptions) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+  // Not allow overwriting files
+  Status s = target()->FileExists(fname);
+  if (s.ok()) {
+    return Status::Corruption("File already exists.");
+  } else if (!s.IsNotFound()) {
+    assert(s.IsIOError());
+    return s;
+  }
+  s = target()->NewWritableFile(fname, result, soptions);
+  if (s.ok()) {
+    result->reset(new TestWritableFile(fname, std::move(*result), this));
+    // WritableFileWriter* file is opened
+    // again then it will be truncated - so forget our saved state.
+    UntrackFile(fname);
+    MutexLock l(&mutex_);
+    open_managed_files_.insert(fname);
+    auto dir_and_name = GetDirAndName(fname);
+    auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first];
+    list.insert(dir_and_name.second);
+  }
+  return s;
+}
+
+Status FaultInjectionTestEnv::ReopenWritableFile(
+    const std::string& fname, std::unique_ptr<WritableFile>* result,
+    const EnvOptions& soptions) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+
+  bool exists;
+  Status s, exists_s = target()->FileExists(fname);
+  if (exists_s.IsNotFound()) {
+    exists = false;
+  } else if (exists_s.ok()) {
+    exists = true;
+  } else {
+    s = exists_s;
+    exists = false;
+  }
+
+  if (s.ok()) {
+    s = target()->ReopenWritableFile(fname, result, soptions);
+  }
+
+  // Only track files we created. Files created outside of this
+  // `FaultInjectionTestEnv` are not eligible for tracking/data dropping
+  // (for example, they may contain data a previous db_stress run expects to
+  // be recovered). This could be extended to track/drop data appended once
+  // the file is under `FaultInjectionTestEnv`'s control.
+  if (s.ok()) {
+    bool should_track;
+    {
+      MutexLock l(&mutex_);
+      if (db_file_state_.find(fname) != db_file_state_.end()) {
+        // It was written by this `Env` earlier.
+        assert(exists);
+        should_track = true;
+      } else if (!exists) {
+        // It was created by this `Env` just now.
+        should_track = true;
+        open_managed_files_.insert(fname);
+        auto dir_and_name = GetDirAndName(fname);
+        auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first];
+        list.insert(dir_and_name.second);
+      } else {
+        should_track = false;
+      }
+    }
+    if (should_track) {
+      result->reset(new TestWritableFile(fname, std::move(*result), this));
+    }
+  }
+  return s;
+}
+
+Status FaultInjectionTestEnv::NewRandomRWFile(
+    const std::string& fname, std::unique_ptr<RandomRWFile>* result,
+    const EnvOptions& soptions) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+  Status s = target()->NewRandomRWFile(fname, result, soptions);
+  if (s.ok()) {
+    result->reset(new TestRandomRWFile(fname, std::move(*result), this));
+    // WritableFileWriter* file is opened
+    // again then it will be truncated - so forget our saved state.
+    UntrackFile(fname);
+    MutexLock l(&mutex_);
+    open_managed_files_.insert(fname);
+    auto dir_and_name = GetDirAndName(fname);
+    auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first];
+    list.insert(dir_and_name.second);
+  }
+  return s;
+}
+
+Status FaultInjectionTestEnv::NewRandomAccessFile(
+    const std::string& fname, std::unique_ptr<RandomAccessFile>* result,
+    const EnvOptions& soptions) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+
+  assert(target());
+  const Status s = target()->NewRandomAccessFile(fname, result, soptions);
+  if (!s.ok()) {
+    return s;
+  }
+
+  assert(result);
+  result->reset(new TestRandomAccessFile(std::move(*result), this));
+
+  return Status::OK();
+}
+
+Status FaultInjectionTestEnv::DeleteFile(const std::string& f) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+  Status s = EnvWrapper::DeleteFile(f);
+  if (s.ok()) {
+    UntrackFile(f);
+  }
+  return s;
+}
+
+Status FaultInjectionTestEnv::RenameFile(const std::string& s,
+                                         const std::string& t) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+  Status ret = EnvWrapper::RenameFile(s, t);
+
+  if (ret.ok()) {
+    MutexLock l(&mutex_);
+    if (db_file_state_.find(s) != db_file_state_.end()) {
+      db_file_state_[t] = db_file_state_[s];
+      db_file_state_.erase(s);
+    }
+
+    auto sdn = GetDirAndName(s);
+    auto tdn = GetDirAndName(t);
+    if (dir_to_new_files_since_last_sync_[sdn.first].erase(sdn.second) != 0) {
+      auto& tlist = dir_to_new_files_since_last_sync_[tdn.first];
+      assert(tlist.find(tdn.second) == tlist.end());
+      tlist.insert(tdn.second);
+    }
+  }
+
+  return ret;
+}
+
+Status FaultInjectionTestEnv::LinkFile(const std::string& s,
+                                       const std::string& t) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+  Status ret = EnvWrapper::LinkFile(s, t);
+
+  if (ret.ok()) {
+    MutexLock l(&mutex_);
+    if (db_file_state_.find(s) != db_file_state_.end()) {
+      db_file_state_[t] = db_file_state_[s];
+    }
+
+    auto sdn = GetDirAndName(s);
+    auto tdn = GetDirAndName(t);
+    if (dir_to_new_files_since_last_sync_[sdn.first].find(sdn.second) !=
+        dir_to_new_files_since_last_sync_[sdn.first].end()) {
+      auto& tlist = dir_to_new_files_since_last_sync_[tdn.first];
+      assert(tlist.find(tdn.second) == tlist.end());
+      tlist.insert(tdn.second);
+    }
+  }
+
+  return ret;
+}
+
+void FaultInjectionTestEnv::WritableFileClosed(const FileState& state) {
+  MutexLock l(&mutex_);
+  if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) {
+    db_file_state_[state.filename_] = state;
+    open_managed_files_.erase(state.filename_);
+  }
+}
+
+void FaultInjectionTestEnv::WritableFileSynced(const FileState& state) {
+  MutexLock l(&mutex_);
+  if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) {
+    if (db_file_state_.find(state.filename_) == db_file_state_.end()) {
+      db_file_state_.insert(std::make_pair(state.filename_, state));
+    } else {
+      db_file_state_[state.filename_] = state;
+    }
+  }
+}
+
+void FaultInjectionTestEnv::WritableFileAppended(const FileState& state) {
+  MutexLock l(&mutex_);
+  if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) {
+    if (db_file_state_.find(state.filename_) == db_file_state_.end()) {
+      db_file_state_.insert(std::make_pair(state.filename_, state));
+    } else {
+      db_file_state_[state.filename_] = state;
+    }
+  }
+}
+
+// For every file that is not fully synced, make a call to `func` with
+// FileState of the file as the parameter.
+Status FaultInjectionTestEnv::DropFileData(
+    std::function<Status(Env*, FileState)> func) {
+  Status s;
+  MutexLock l(&mutex_);
+  for (std::map<std::string, FileState>::const_iterator it =
+           db_file_state_.begin();
+       s.ok() && it != db_file_state_.end(); ++it) {
+    const FileState& state = it->second;
+    if (!state.IsFullySynced()) {
+      s = func(target(), state);
+    }
+  }
+  return s;
+}
+
+Status FaultInjectionTestEnv::DropUnsyncedFileData() {
+  return DropFileData([&](Env* env, const FileState& state) {
+    return state.DropUnsyncedData(env);
+  });
+}
+
+Status FaultInjectionTestEnv::DropRandomUnsyncedFileData(Random* rnd) {
+  return DropFileData([&](Env* env, const FileState& state) {
+    return state.DropRandomUnsyncedData(env, rnd);
+  });
+}
+
+Status FaultInjectionTestEnv::DeleteFilesCreatedAfterLastDirSync() {
+  // Because DeleteFile access this container make a copy to avoid deadlock
+  std::map<std::string, std::set<std::string>> map_copy;
+  {
+    MutexLock l(&mutex_);
+    map_copy.insert(dir_to_new_files_since_last_sync_.begin(),
+                    dir_to_new_files_since_last_sync_.end());
+  }
+
+  for (auto& pair : map_copy) {
+    for (std::string name : pair.second) {
+      Status s = DeleteFile(pair.first + "/" + name);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  }
+  return Status::OK();
+}
+void FaultInjectionTestEnv::ResetState() {
+  MutexLock l(&mutex_);
+  db_file_state_.clear();
+  dir_to_new_files_since_last_sync_.clear();
+  SetFilesystemActiveNoLock(true);
+}
+
+void FaultInjectionTestEnv::UntrackFile(const std::string& f) {
+  MutexLock l(&mutex_);
+  auto dir_and_name = GetDirAndName(f);
+  dir_to_new_files_since_last_sync_[dir_and_name.first].erase(
+      dir_and_name.second);
+  db_file_state_.erase(f);
+  open_managed_files_.erase(f);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_env.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_env.h
new file mode 100644
index 0000000000..549bfe7168
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_env.h
@@ -0,0 +1,258 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright 2014 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+// This test uses a custom Env to keep track of the state of a filesystem as of
+// the last "sync". It then checks for data loss errors by purposely dropping
+// file data (or entire files) not protected by a "sync".
+
+#pragma once
+
+#include <map>
+#include <set>
+#include <string>
+
+#include "file/filename.h"
+#include "rocksdb/env.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Random;
+class TestWritableFile;
+class FaultInjectionTestEnv;
+
+struct FileState {
+  std::string filename_;
+  ssize_t pos_;
+  ssize_t pos_at_last_sync_;
+  ssize_t pos_at_last_flush_;
+
+  explicit FileState(const std::string& filename)
+      : filename_(filename),
+        pos_(-1),
+        pos_at_last_sync_(-1),
+        pos_at_last_flush_(-1) {}
+
+  FileState() : pos_(-1), pos_at_last_sync_(-1), pos_at_last_flush_(-1) {}
+
+  bool IsFullySynced() const { return pos_ <= 0 || pos_ == pos_at_last_sync_; }
+
+  Status DropUnsyncedData(Env* env) const;
+
+  Status DropRandomUnsyncedData(Env* env, Random* rand) const;
+};
+
+class TestRandomAccessFile : public RandomAccessFile {
+ public:
+  TestRandomAccessFile(std::unique_ptr<RandomAccessFile>&& target,
+                       FaultInjectionTestEnv* env);
+
+  Status Read(uint64_t offset, size_t n, Slice* result,
+              char* scratch) const override;
+
+  Status Prefetch(uint64_t offset, size_t n) override;
+
+  Status MultiRead(ReadRequest* reqs, size_t num_reqs) override;
+
+ private:
+  std::unique_ptr<RandomAccessFile> target_;
+  FaultInjectionTestEnv* env_;
+};
+
+// A wrapper around WritableFileWriter* file
+// is written to or sync'ed.
+class TestWritableFile : public WritableFile {
+ public:
+  explicit TestWritableFile(const std::string& fname,
+                            std::unique_ptr<WritableFile>&& f,
+                            FaultInjectionTestEnv* env);
+  virtual ~TestWritableFile();
+  virtual Status Append(const Slice& data) override;
+  virtual Status Append(
+      const Slice& data,
+      const DataVerificationInfo& /*verification_info*/) override {
+    return Append(data);
+  }
+  virtual Status Truncate(uint64_t size) override {
+    return target_->Truncate(size);
+  }
+  virtual Status Close() override;
+  virtual Status Flush() override;
+  virtual Status Sync() override;
+  virtual bool IsSyncThreadSafe() const override { return true; }
+  virtual Status PositionedAppend(const Slice& data, uint64_t offset) override {
+    return target_->PositionedAppend(data, offset);
+  }
+  virtual Status PositionedAppend(
+      const Slice& data, uint64_t offset,
+      const DataVerificationInfo& /*verification_info*/) override {
+    return PositionedAppend(data, offset);
+  }
+  virtual bool use_direct_io() const override {
+    return target_->use_direct_io();
+  };
+
+ private:
+  FileState state_;
+  std::unique_ptr<WritableFile> target_;
+  bool writable_file_opened_;
+  FaultInjectionTestEnv* env_;
+};
+
+// A wrapper around WritableFileWriter* file
+// is written to or sync'ed.
+class TestRandomRWFile : public RandomRWFile {
+ public:
+  explicit TestRandomRWFile(const std::string& fname,
+                            std::unique_ptr<RandomRWFile>&& f,
+                            FaultInjectionTestEnv* env);
+  virtual ~TestRandomRWFile();
+  Status Write(uint64_t offset, const Slice& data) override;
+  Status Read(uint64_t offset, size_t n, Slice* result,
+              char* scratch) const override;
+  Status Close() override;
+  Status Flush() override;
+  Status Sync() override;
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  bool use_direct_io() const override { return target_->use_direct_io(); };
+
+ private:
+  std::unique_ptr<RandomRWFile> target_;
+  bool file_opened_;
+  FaultInjectionTestEnv* env_;
+};
+
+class TestDirectory : public Directory {
+ public:
+  explicit TestDirectory(FaultInjectionTestEnv* env, std::string dirname,
+                         Directory* dir)
+      : env_(env), dirname_(dirname), dir_(dir) {}
+  ~TestDirectory() {}
+
+  virtual Status Fsync() override;
+  virtual Status Close() override;
+
+ private:
+  FaultInjectionTestEnv* env_;
+  std::string dirname_;
+  std::unique_ptr<Directory> dir_;
+};
+
+class FaultInjectionTestEnv : public EnvWrapper {
+ public:
+  explicit FaultInjectionTestEnv(Env* base)
+      : EnvWrapper(base), filesystem_active_(true) {}
+  virtual ~FaultInjectionTestEnv() { error_.PermitUncheckedError(); }
+
+  static const char* kClassName() { return "FaultInjectionTestEnv"; }
+  const char* Name() const override { return kClassName(); }
+
+  Status NewDirectory(const std::string& name,
+                      std::unique_ptr<Directory>* result) override;
+
+  Status NewWritableFile(const std::string& fname,
+                         std::unique_ptr<WritableFile>* result,
+                         const EnvOptions& soptions) override;
+
+  Status ReopenWritableFile(const std::string& fname,
+                            std::unique_ptr<WritableFile>* result,
+                            const EnvOptions& soptions) override;
+
+  Status NewRandomRWFile(const std::string& fname,
+                         std::unique_ptr<RandomRWFile>* result,
+                         const EnvOptions& soptions) override;
+
+  Status NewRandomAccessFile(const std::string& fname,
+                             std::unique_ptr<RandomAccessFile>* result,
+                             const EnvOptions& soptions) override;
+
+  virtual Status DeleteFile(const std::string& f) override;
+
+  virtual Status RenameFile(const std::string& s,
+                            const std::string& t) override;
+
+  virtual Status LinkFile(const std::string& s, const std::string& t) override;
+
+// Undef to eliminate clash on Windows
+#undef GetFreeSpace
+  virtual Status GetFreeSpace(const std::string& path,
+                              uint64_t* disk_free) override {
+    if (!IsFilesystemActive() &&
+        error_.subcode() == IOStatus::SubCode::kNoSpace) {
+      *disk_free = 0;
+      return Status::OK();
+    } else {
+      return target()->GetFreeSpace(path, disk_free);
+    }
+  }
+
+  void WritableFileClosed(const FileState& state);
+
+  void WritableFileSynced(const FileState& state);
+
+  void WritableFileAppended(const FileState& state);
+
+  // For every file that is not fully synced, make a call to `func` with
+  // FileState of the file as the parameter.
+  Status DropFileData(std::function<Status(Env*, FileState)> func);
+
+  Status DropUnsyncedFileData();
+
+  Status DropRandomUnsyncedFileData(Random* rnd);
+
+  Status DeleteFilesCreatedAfterLastDirSync();
+
+  void ResetState();
+
+  void UntrackFile(const std::string& f);
+
+  void SyncDir(const std::string& dirname) {
+    MutexLock l(&mutex_);
+    dir_to_new_files_since_last_sync_.erase(dirname);
+  }
+
+  // Setting the filesystem to inactive is the test equivalent to simulating a
+  // system reset. Setting to inactive will freeze our saved filesystem state so
+  // that it will stop being recorded. It can then be reset back to the state at
+  // the time of the reset.
+  bool IsFilesystemActive() {
+    MutexLock l(&mutex_);
+    return filesystem_active_;
+  }
+  void SetFilesystemActiveNoLock(
+      bool active, Status error = Status::Corruption("Not active")) {
+    error.PermitUncheckedError();
+    filesystem_active_ = active;
+    if (!active) {
+      error_ = error;
+    }
+    error.PermitUncheckedError();
+  }
+  void SetFilesystemActive(bool active,
+                           Status error = Status::Corruption("Not active")) {
+    error.PermitUncheckedError();
+    MutexLock l(&mutex_);
+    SetFilesystemActiveNoLock(active, error);
+    error.PermitUncheckedError();
+  }
+  void AssertNoOpenFile() { assert(open_managed_files_.empty()); }
+  Status GetError() { return error_; }
+
+ private:
+  port::Mutex mutex_;
+  std::map<std::string, FileState> db_file_state_;
+  std::set<std::string> open_managed_files_;
+  std::unordered_map<std::string, std::set<std::string>>
+      dir_to_new_files_since_last_sync_;
+  bool filesystem_active_;  // Record flushes, syncs, writes
+  Status error_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_fs.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_fs.cc
new file mode 100644
index 0000000000..5261d79ea1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_fs.cc
@@ -0,0 +1,1071 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright 2014 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+// This test uses a custom FileSystem to keep track of the state of a file
+// system the last "Sync". The data being written is cached in a "buffer".
+// Only when "Sync" is called, the data will be persistent. It can simulate
+// file data loss (or entire files) not protected by a "Sync". For any of the
+// FileSystem related operations, by specify the "IOStatus Error", a specific
+// error can be returned when file system is not activated.
+
+#include "utilities/fault_injection_fs.h"
+
+#include <algorithm>
+#include <functional>
+#include <utility>
+
+#include "env/composite_env_wrapper.h"
+#include "port/lang.h"
+#include "port/stack_trace.h"
+#include "test_util/sync_point.h"
+#include "util/coding.h"
+#include "util/crc32c.h"
+#include "util/mutexlock.h"
+#include "util/random.h"
+#include "util/string_util.h"
+#include "util/xxhash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const std::string kNewFileNoOverwrite = "";
+
+// Assume a filename, and not a directory name like "/foo/bar/"
+std::string TestFSGetDirName(const std::string filename) {
+  size_t found = filename.find_last_of("/\\");
+  if (found == std::string::npos) {
+    return "";
+  } else {
+    return filename.substr(0, found);
+  }
+}
+
+// Trim the tailing "/" in the end of `str`
+std::string TestFSTrimDirname(const std::string& str) {
+  size_t found = str.find_last_not_of("/");
+  if (found == std::string::npos) {
+    return str;
+  }
+  return str.substr(0, found + 1);
+}
+
+// Return pair <parent directory name, file name> of a full path.
+std::pair<std::string, std::string> TestFSGetDirAndName(
+    const std::string& name) {
+  std::string dirname = TestFSGetDirName(name);
+  std::string fname = name.substr(dirname.size() + 1);
+  return std::make_pair(dirname, fname);
+}
+
+// Calculate the checksum of the data with corresponding checksum
+// type. If name does not match, no checksum is returned.
+void CalculateTypedChecksum(const ChecksumType& checksum_type, const char* data,
+                            size_t size, std::string* checksum) {
+  if (checksum_type == ChecksumType::kCRC32c) {
+    uint32_t v_crc32c = crc32c::Extend(0, data, size);
+    PutFixed32(checksum, v_crc32c);
+    return;
+  } else if (checksum_type == ChecksumType::kxxHash) {
+    uint32_t v = XXH32(data, size, 0);
+    PutFixed32(checksum, v);
+  }
+  return;
+}
+
+IOStatus FSFileState::DropUnsyncedData() {
+  buffer_.resize(0);
+  return IOStatus::OK();
+}
+
+IOStatus FSFileState::DropRandomUnsyncedData(Random* rand) {
+  int range = static_cast<int>(buffer_.size());
+  size_t truncated_size = static_cast<size_t>(rand->Uniform(range));
+  buffer_.resize(truncated_size);
+  return IOStatus::OK();
+}
+
+IOStatus TestFSDirectory::Fsync(const IOOptions& options, IODebugContext* dbg) {
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  {
+    IOStatus in_s = fs_->InjectMetadataWriteError();
+    if (!in_s.ok()) {
+      return in_s;
+    }
+  }
+  fs_->SyncDir(dirname_);
+  IOStatus s = dir_->Fsync(options, dbg);
+  {
+    IOStatus in_s = fs_->InjectMetadataWriteError();
+    if (!in_s.ok()) {
+      return in_s;
+    }
+  }
+  return s;
+}
+
+IOStatus TestFSDirectory::Close(const IOOptions& options, IODebugContext* dbg) {
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  IOStatus s = dir_->Close(options, dbg);
+  return s;
+}
+
+IOStatus TestFSDirectory::FsyncWithDirOptions(
+    const IOOptions& options, IODebugContext* dbg,
+    const DirFsyncOptions& dir_fsync_options) {
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  {
+    IOStatus in_s = fs_->InjectMetadataWriteError();
+    if (!in_s.ok()) {
+      return in_s;
+    }
+  }
+  fs_->SyncDir(dirname_);
+  IOStatus s = dir_->FsyncWithDirOptions(options, dbg, dir_fsync_options);
+  {
+    IOStatus in_s = fs_->InjectMetadataWriteError();
+    if (!in_s.ok()) {
+      return in_s;
+    }
+  }
+  return s;
+}
+
+TestFSWritableFile::TestFSWritableFile(const std::string& fname,
+                                       const FileOptions& file_opts,
+                                       std::unique_ptr<FSWritableFile>&& f,
+                                       FaultInjectionTestFS* fs)
+    : state_(fname),
+      file_opts_(file_opts),
+      target_(std::move(f)),
+      writable_file_opened_(true),
+      fs_(fs) {
+  assert(target_ != nullptr);
+  state_.pos_ = 0;
+}
+
+TestFSWritableFile::~TestFSWritableFile() {
+  if (writable_file_opened_) {
+    Close(IOOptions(), nullptr).PermitUncheckedError();
+  }
+}
+
+IOStatus TestFSWritableFile::Append(const Slice& data, const IOOptions& options,
+                                    IODebugContext* dbg) {
+  MutexLock l(&mutex_);
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  if (target_->use_direct_io()) {
+    target_->Append(data, options, dbg).PermitUncheckedError();
+  } else {
+    state_.buffer_.append(data.data(), data.size());
+    state_.pos_ += data.size();
+    fs_->WritableFileAppended(state_);
+  }
+  IOStatus io_s = fs_->InjectWriteError(state_.filename_);
+  return io_s;
+}
+
+// By setting the IngestDataCorruptionBeforeWrite(), the data corruption is
+// simulated.
+IOStatus TestFSWritableFile::Append(
+    const Slice& data, const IOOptions& options,
+    const DataVerificationInfo& verification_info, IODebugContext* dbg) {
+  MutexLock l(&mutex_);
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  if (fs_->ShouldDataCorruptionBeforeWrite()) {
+    return IOStatus::Corruption("Data is corrupted!");
+  }
+
+  // Calculate the checksum
+  std::string checksum;
+  CalculateTypedChecksum(fs_->GetChecksumHandoffFuncType(), data.data(),
+                         data.size(), &checksum);
+  if (fs_->GetChecksumHandoffFuncType() != ChecksumType::kNoChecksum &&
+      checksum != verification_info.checksum.ToString()) {
+    std::string msg = "Data is corrupted! Origin data checksum: " +
+                      verification_info.checksum.ToString() +
+                      "current data checksum: " + checksum;
+    return IOStatus::Corruption(msg);
+  }
+  if (target_->use_direct_io()) {
+    target_->Append(data, options, dbg).PermitUncheckedError();
+  } else {
+    state_.buffer_.append(data.data(), data.size());
+    state_.pos_ += data.size();
+    fs_->WritableFileAppended(state_);
+  }
+  IOStatus io_s = fs_->InjectWriteError(state_.filename_);
+  return io_s;
+}
+
+IOStatus TestFSWritableFile::PositionedAppend(
+    const Slice& data, uint64_t offset, const IOOptions& options,
+    const DataVerificationInfo& verification_info, IODebugContext* dbg) {
+  MutexLock l(&mutex_);
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  if (fs_->ShouldDataCorruptionBeforeWrite()) {
+    return IOStatus::Corruption("Data is corrupted!");
+  }
+
+  // Calculate the checksum
+  std::string checksum;
+  CalculateTypedChecksum(fs_->GetChecksumHandoffFuncType(), data.data(),
+                         data.size(), &checksum);
+  if (fs_->GetChecksumHandoffFuncType() != ChecksumType::kNoChecksum &&
+      checksum != verification_info.checksum.ToString()) {
+    std::string msg = "Data is corrupted! Origin data checksum: " +
+                      verification_info.checksum.ToString() +
+                      "current data checksum: " + checksum;
+    return IOStatus::Corruption(msg);
+  }
+  target_->PositionedAppend(data, offset, options, dbg);
+  IOStatus io_s = fs_->InjectWriteError(state_.filename_);
+  return io_s;
+}
+
+IOStatus TestFSWritableFile::Close(const IOOptions& options,
+                                   IODebugContext* dbg) {
+  MutexLock l(&mutex_);
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  {
+    IOStatus in_s = fs_->InjectMetadataWriteError();
+    if (!in_s.ok()) {
+      return in_s;
+    }
+  }
+  writable_file_opened_ = false;
+  IOStatus io_s;
+  if (!target_->use_direct_io()) {
+    io_s = target_->Append(state_.buffer_, options, dbg);
+  }
+  if (io_s.ok()) {
+    state_.buffer_.resize(0);
+    // Ignore sync errors
+    target_->Sync(options, dbg).PermitUncheckedError();
+    io_s = target_->Close(options, dbg);
+  }
+  if (io_s.ok()) {
+    fs_->WritableFileClosed(state_);
+    IOStatus in_s = fs_->InjectMetadataWriteError();
+    if (!in_s.ok()) {
+      return in_s;
+    }
+  }
+  return io_s;
+}
+
+IOStatus TestFSWritableFile::Flush(const IOOptions&, IODebugContext*) {
+  MutexLock l(&mutex_);
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  if (fs_->IsFilesystemActive()) {
+    state_.pos_at_last_flush_ = state_.pos_;
+  }
+  return IOStatus::OK();
+}
+
+IOStatus TestFSWritableFile::Sync(const IOOptions& options,
+                                  IODebugContext* dbg) {
+  MutexLock l(&mutex_);
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  if (target_->use_direct_io()) {
+    // For Direct IO mode, we don't buffer anything in TestFSWritableFile.
+    // So just return
+    return IOStatus::OK();
+  }
+  IOStatus io_s = target_->Append(state_.buffer_, options, dbg);
+  state_.buffer_.resize(0);
+  // Ignore sync errors
+  target_->Sync(options, dbg).PermitUncheckedError();
+  state_.pos_at_last_sync_ = state_.pos_;
+  fs_->WritableFileSynced(state_);
+  return io_s;
+}
+
+IOStatus TestFSWritableFile::RangeSync(uint64_t offset, uint64_t nbytes,
+                                       const IOOptions& options,
+                                       IODebugContext* dbg) {
+  MutexLock l(&mutex_);
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  // Assumes caller passes consecutive byte ranges.
+  uint64_t sync_limit = offset + nbytes;
+  uint64_t buf_begin =
+      state_.pos_at_last_sync_ < 0 ? 0 : state_.pos_at_last_sync_;
+
+  IOStatus io_s;
+  if (sync_limit < buf_begin) {
+    return io_s;
+  }
+  uint64_t num_to_sync = std::min(static_cast<uint64_t>(state_.buffer_.size()),
+                                  sync_limit - buf_begin);
+  Slice buf_to_sync(state_.buffer_.data(), num_to_sync);
+  io_s = target_->Append(buf_to_sync, options, dbg);
+  state_.buffer_ = state_.buffer_.substr(num_to_sync);
+  // Ignore sync errors
+  target_->RangeSync(offset, nbytes, options, dbg).PermitUncheckedError();
+  state_.pos_at_last_sync_ = offset + num_to_sync;
+  fs_->WritableFileSynced(state_);
+  return io_s;
+}
+
+TestFSRandomRWFile::TestFSRandomRWFile(const std::string& /*fname*/,
+                                       std::unique_ptr<FSRandomRWFile>&& f,
+                                       FaultInjectionTestFS* fs)
+    : target_(std::move(f)), file_opened_(true), fs_(fs) {
+  assert(target_ != nullptr);
+}
+
+TestFSRandomRWFile::~TestFSRandomRWFile() {
+  if (file_opened_) {
+    Close(IOOptions(), nullptr).PermitUncheckedError();
+  }
+}
+
+IOStatus TestFSRandomRWFile::Write(uint64_t offset, const Slice& data,
+                                   const IOOptions& options,
+                                   IODebugContext* dbg) {
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  return target_->Write(offset, data, options, dbg);
+}
+
+IOStatus TestFSRandomRWFile::Read(uint64_t offset, size_t n,
+                                  const IOOptions& options, Slice* result,
+                                  char* scratch, IODebugContext* dbg) const {
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  return target_->Read(offset, n, options, result, scratch, dbg);
+}
+
+IOStatus TestFSRandomRWFile::Close(const IOOptions& options,
+                                   IODebugContext* dbg) {
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  file_opened_ = false;
+  return target_->Close(options, dbg);
+}
+
+IOStatus TestFSRandomRWFile::Flush(const IOOptions& options,
+                                   IODebugContext* dbg) {
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  return target_->Flush(options, dbg);
+}
+
+IOStatus TestFSRandomRWFile::Sync(const IOOptions& options,
+                                  IODebugContext* dbg) {
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  return target_->Sync(options, dbg);
+}
+
+TestFSRandomAccessFile::TestFSRandomAccessFile(
+    const std::string& /*fname*/, std::unique_ptr<FSRandomAccessFile>&& f,
+    FaultInjectionTestFS* fs)
+    : target_(std::move(f)), fs_(fs) {
+  assert(target_ != nullptr);
+}
+
+IOStatus TestFSRandomAccessFile::Read(uint64_t offset, size_t n,
+                                      const IOOptions& options, Slice* result,
+                                      char* scratch,
+                                      IODebugContext* dbg) const {
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  IOStatus s = target_->Read(offset, n, options, result, scratch, dbg);
+  if (s.ok()) {
+    s = fs_->InjectThreadSpecificReadError(
+        FaultInjectionTestFS::ErrorOperation::kRead, result, use_direct_io(),
+        scratch, /*need_count_increase=*/true, /*fault_injected=*/nullptr);
+  }
+  if (s.ok() && fs_->ShouldInjectRandomReadError()) {
+    return IOStatus::IOError("Injected read error");
+  }
+  return s;
+}
+
+IOStatus TestFSRandomAccessFile::ReadAsync(
+    FSReadRequest& req, const IOOptions& opts,
+    std::function<void(const FSReadRequest&, void*)> cb, void* cb_arg,
+    void** io_handle, IOHandleDeleter* del_fn, IODebugContext* /*dbg*/) {
+  IOStatus ret;
+  IOStatus s;
+  FSReadRequest res;
+  if (!fs_->IsFilesystemActive()) {
+    ret = fs_->GetError();
+  } else {
+    ret = fs_->InjectThreadSpecificReadError(
+        FaultInjectionTestFS::ErrorOperation::kRead, &res.result,
+        use_direct_io(), req.scratch, /*need_count_increase=*/true,
+        /*fault_injected=*/nullptr);
+  }
+  if (ret.ok()) {
+    if (fs_->ShouldInjectRandomReadError()) {
+      ret = IOStatus::IOError("Injected read error");
+    } else {
+      s = target_->ReadAsync(req, opts, cb, cb_arg, io_handle, del_fn, nullptr);
+    }
+  }
+  if (!ret.ok()) {
+    res.status = ret;
+    cb(res, cb_arg);
+  }
+  return s;
+}
+
+IOStatus TestFSRandomAccessFile::MultiRead(FSReadRequest* reqs, size_t num_reqs,
+                                           const IOOptions& options,
+                                           IODebugContext* dbg) {
+  if (!fs_->IsFilesystemActive()) {
+    return fs_->GetError();
+  }
+  IOStatus s = target_->MultiRead(reqs, num_reqs, options, dbg);
+  bool injected_error = false;
+  for (size_t i = 0; i < num_reqs; i++) {
+    if (!reqs[i].status.ok()) {
+      // Already seeing an error.
+      break;
+    }
+    bool this_injected_error;
+    reqs[i].status = fs_->InjectThreadSpecificReadError(
+        FaultInjectionTestFS::ErrorOperation::kMultiReadSingleReq,
+        &(reqs[i].result), use_direct_io(), reqs[i].scratch,
+        /*need_count_increase=*/true,
+        /*fault_injected=*/&this_injected_error);
+    injected_error |= this_injected_error;
+  }
+  if (s.ok()) {
+    s = fs_->InjectThreadSpecificReadError(
+        FaultInjectionTestFS::ErrorOperation::kMultiRead, nullptr,
+        use_direct_io(), nullptr, /*need_count_increase=*/!injected_error,
+        /*fault_injected=*/nullptr);
+  }
+  if (s.ok() && fs_->ShouldInjectRandomReadError()) {
+    return IOStatus::IOError("Injected read error");
+  }
+  return s;
+}
+
+size_t TestFSRandomAccessFile::GetUniqueId(char* id, size_t max_size) const {
+  if (fs_->ShouldFailGetUniqueId()) {
+    return 0;
+  } else {
+    return target_->GetUniqueId(id, max_size);
+  }
+}
+IOStatus TestFSSequentialFile::Read(size_t n, const IOOptions& options,
+                                    Slice* result, char* scratch,
+                                    IODebugContext* dbg) {
+  IOStatus s = target()->Read(n, options, result, scratch, dbg);
+  if (s.ok() && fs_->ShouldInjectRandomReadError()) {
+    return IOStatus::IOError("Injected seq read error");
+  }
+  return s;
+}
+
+IOStatus TestFSSequentialFile::PositionedRead(uint64_t offset, size_t n,
+                                              const IOOptions& options,
+                                              Slice* result, char* scratch,
+                                              IODebugContext* dbg) {
+  IOStatus s =
+      target()->PositionedRead(offset, n, options, result, scratch, dbg);
+  if (s.ok() && fs_->ShouldInjectRandomReadError()) {
+    return IOStatus::IOError("Injected seq positioned read error");
+  }
+  return s;
+}
+
+IOStatus FaultInjectionTestFS::NewDirectory(
+    const std::string& name, const IOOptions& options,
+    std::unique_ptr<FSDirectory>* result, IODebugContext* dbg) {
+  std::unique_ptr<FSDirectory> r;
+  IOStatus io_s = target()->NewDirectory(name, options, &r, dbg);
+  if (!io_s.ok()) {
+    return io_s;
+  }
+  result->reset(
+      new TestFSDirectory(this, TestFSTrimDirname(name), r.release()));
+  return IOStatus::OK();
+}
+
+IOStatus FaultInjectionTestFS::NewWritableFile(
+    const std::string& fname, const FileOptions& file_opts,
+    std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+  {
+    IOStatus in_s = InjectMetadataWriteError();
+    if (!in_s.ok()) {
+      return in_s;
+    }
+  }
+
+  if (ShouldUseDiretWritable(fname)) {
+    return target()->NewWritableFile(fname, file_opts, result, dbg);
+  }
+
+  IOStatus io_s = target()->NewWritableFile(fname, file_opts, result, dbg);
+  if (io_s.ok()) {
+    result->reset(
+        new TestFSWritableFile(fname, file_opts, std::move(*result), this));
+    // WritableFileWriter* file is opened
+    // again then it will be truncated - so forget our saved state.
+    UntrackFile(fname);
+    {
+      MutexLock l(&mutex_);
+      open_managed_files_.insert(fname);
+      auto dir_and_name = TestFSGetDirAndName(fname);
+      auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first];
+      // The new file could overwrite an old one. Here we simplify
+      // the implementation by assuming no file of this name after
+      // dropping unsynced files.
+      list[dir_and_name.second] = kNewFileNoOverwrite;
+    }
+    {
+      IOStatus in_s = InjectMetadataWriteError();
+      if (!in_s.ok()) {
+        return in_s;
+      }
+    }
+  }
+  return io_s;
+}
+
+IOStatus FaultInjectionTestFS::ReopenWritableFile(
+    const std::string& fname, const FileOptions& file_opts,
+    std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+  if (ShouldUseDiretWritable(fname)) {
+    return target()->ReopenWritableFile(fname, file_opts, result, dbg);
+  }
+  {
+    IOStatus in_s = InjectMetadataWriteError();
+    if (!in_s.ok()) {
+      return in_s;
+    }
+  }
+
+  bool exists;
+  IOStatus io_s,
+      exists_s = target()->FileExists(fname, IOOptions(), nullptr /* dbg */);
+  if (exists_s.IsNotFound()) {
+    exists = false;
+  } else if (exists_s.ok()) {
+    exists = true;
+  } else {
+    io_s = exists_s;
+    exists = false;
+  }
+
+  if (io_s.ok()) {
+    io_s = target()->ReopenWritableFile(fname, file_opts, result, dbg);
+  }
+
+  // Only track files we created. Files created outside of this
+  // `FaultInjectionTestFS` are not eligible for tracking/data dropping
+  // (for example, they may contain data a previous db_stress run expects to
+  // be recovered). This could be extended to track/drop data appended once
+  // the file is under `FaultInjectionTestFS`'s control.
+  if (io_s.ok()) {
+    bool should_track;
+    {
+      MutexLock l(&mutex_);
+      if (db_file_state_.find(fname) != db_file_state_.end()) {
+        // It was written by this `FileSystem` earlier.
+        assert(exists);
+        should_track = true;
+      } else if (!exists) {
+        // It was created by this `FileSystem` just now.
+        should_track = true;
+        open_managed_files_.insert(fname);
+        auto dir_and_name = TestFSGetDirAndName(fname);
+        auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first];
+        list[dir_and_name.second] = kNewFileNoOverwrite;
+      } else {
+        should_track = false;
+      }
+    }
+    if (should_track) {
+      result->reset(
+          new TestFSWritableFile(fname, file_opts, std::move(*result), this));
+    }
+    {
+      IOStatus in_s = InjectMetadataWriteError();
+      if (!in_s.ok()) {
+        return in_s;
+      }
+    }
+  }
+  return io_s;
+}
+
+IOStatus FaultInjectionTestFS::NewRandomRWFile(
+    const std::string& fname, const FileOptions& file_opts,
+    std::unique_ptr<FSRandomRWFile>* result, IODebugContext* dbg) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+  if (ShouldUseDiretWritable(fname)) {
+    return target()->NewRandomRWFile(fname, file_opts, result, dbg);
+  }
+  {
+    IOStatus in_s = InjectMetadataWriteError();
+    if (!in_s.ok()) {
+      return in_s;
+    }
+  }
+  IOStatus io_s = target()->NewRandomRWFile(fname, file_opts, result, dbg);
+  if (io_s.ok()) {
+    result->reset(new TestFSRandomRWFile(fname, std::move(*result), this));
+    // WritableFileWriter* file is opened
+    // again then it will be truncated - so forget our saved state.
+    UntrackFile(fname);
+    {
+      MutexLock l(&mutex_);
+      open_managed_files_.insert(fname);
+      auto dir_and_name = TestFSGetDirAndName(fname);
+      auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first];
+      // It could be overwriting an old file, but we simplify the
+      // implementation by ignoring it.
+      list[dir_and_name.second] = kNewFileNoOverwrite;
+    }
+    {
+      IOStatus in_s = InjectMetadataWriteError();
+      if (!in_s.ok()) {
+        return in_s;
+      }
+    }
+  }
+  return io_s;
+}
+
+IOStatus FaultInjectionTestFS::NewRandomAccessFile(
+    const std::string& fname, const FileOptions& file_opts,
+    std::unique_ptr<FSRandomAccessFile>* result, IODebugContext* dbg) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+  if (ShouldInjectRandomReadError()) {
+    return IOStatus::IOError("Injected error when open random access file");
+  }
+  IOStatus io_s = InjectThreadSpecificReadError(ErrorOperation::kOpen, nullptr,
+                                                false, nullptr,
+                                                /*need_count_increase=*/true,
+                                                /*fault_injected=*/nullptr);
+  if (io_s.ok()) {
+    io_s = target()->NewRandomAccessFile(fname, file_opts, result, dbg);
+  }
+  if (io_s.ok()) {
+    result->reset(new TestFSRandomAccessFile(fname, std::move(*result), this));
+  }
+  return io_s;
+}
+
+IOStatus FaultInjectionTestFS::NewSequentialFile(
+    const std::string& fname, const FileOptions& file_opts,
+    std::unique_ptr<FSSequentialFile>* result, IODebugContext* dbg) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+
+  if (ShouldInjectRandomReadError()) {
+    return IOStatus::IOError("Injected read error when creating seq file");
+  }
+  IOStatus io_s = target()->NewSequentialFile(fname, file_opts, result, dbg);
+  if (io_s.ok()) {
+    result->reset(new TestFSSequentialFile(std::move(*result), this));
+  }
+  return io_s;
+}
+
+IOStatus FaultInjectionTestFS::DeleteFile(const std::string& f,
+                                          const IOOptions& options,
+                                          IODebugContext* dbg) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+  {
+    IOStatus in_s = InjectMetadataWriteError();
+    if (!in_s.ok()) {
+      return in_s;
+    }
+  }
+  IOStatus io_s = FileSystemWrapper::DeleteFile(f, options, dbg);
+  if (io_s.ok()) {
+    UntrackFile(f);
+    {
+      IOStatus in_s = InjectMetadataWriteError();
+      if (!in_s.ok()) {
+        return in_s;
+      }
+    }
+  }
+  return io_s;
+}
+
+IOStatus FaultInjectionTestFS::RenameFile(const std::string& s,
+                                          const std::string& t,
+                                          const IOOptions& options,
+                                          IODebugContext* dbg) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+  {
+    IOStatus in_s = InjectMetadataWriteError();
+    if (!in_s.ok()) {
+      return in_s;
+    }
+  }
+
+  // We preserve contents of overwritten files up to a size threshold.
+  // We could keep previous file in another name, but we need to worry about
+  // garbage collect the those files. We do it if it is needed later.
+  // We ignore I/O errors here for simplicity.
+  std::string previous_contents = kNewFileNoOverwrite;
+  if (target()->FileExists(t, IOOptions(), nullptr).ok()) {
+    uint64_t file_size;
+    if (target()->GetFileSize(t, IOOptions(), &file_size, nullptr).ok() &&
+        file_size < 1024) {
+      ReadFileToString(target(), t, &previous_contents).PermitUncheckedError();
+    }
+  }
+  IOStatus io_s = FileSystemWrapper::RenameFile(s, t, options, dbg);
+
+  if (io_s.ok()) {
+    {
+      MutexLock l(&mutex_);
+      if (db_file_state_.find(s) != db_file_state_.end()) {
+        db_file_state_[t] = db_file_state_[s];
+        db_file_state_.erase(s);
+      }
+
+      auto sdn = TestFSGetDirAndName(s);
+      auto tdn = TestFSGetDirAndName(t);
+      if (dir_to_new_files_since_last_sync_[sdn.first].erase(sdn.second) != 0) {
+        auto& tlist = dir_to_new_files_since_last_sync_[tdn.first];
+        assert(tlist.find(tdn.second) == tlist.end());
+        tlist[tdn.second] = previous_contents;
+      }
+    }
+    IOStatus in_s = InjectMetadataWriteError();
+    if (!in_s.ok()) {
+      return in_s;
+    }
+  }
+
+  return io_s;
+}
+
+IOStatus FaultInjectionTestFS::LinkFile(const std::string& s,
+                                        const std::string& t,
+                                        const IOOptions& options,
+                                        IODebugContext* dbg) {
+  if (!IsFilesystemActive()) {
+    return GetError();
+  }
+  {
+    IOStatus in_s = InjectMetadataWriteError();
+    if (!in_s.ok()) {
+      return in_s;
+    }
+  }
+
+  // Using the value in `dir_to_new_files_since_last_sync_` for the source file
+  // may be a more reasonable choice.
+  std::string previous_contents = kNewFileNoOverwrite;
+
+  IOStatus io_s = FileSystemWrapper::LinkFile(s, t, options, dbg);
+
+  if (io_s.ok()) {
+    {
+      MutexLock l(&mutex_);
+      if (db_file_state_.find(s) != db_file_state_.end()) {
+        db_file_state_[t] = db_file_state_[s];
+      }
+
+      auto sdn = TestFSGetDirAndName(s);
+      auto tdn = TestFSGetDirAndName(t);
+      if (dir_to_new_files_since_last_sync_[sdn.first].find(sdn.second) !=
+          dir_to_new_files_since_last_sync_[sdn.first].end()) {
+        auto& tlist = dir_to_new_files_since_last_sync_[tdn.first];
+        assert(tlist.find(tdn.second) == tlist.end());
+        tlist[tdn.second] = previous_contents;
+      }
+    }
+    IOStatus in_s = InjectMetadataWriteError();
+    if (!in_s.ok()) {
+      return in_s;
+    }
+  }
+
+  return io_s;
+}
+
+IOStatus FaultInjectionTestFS::Poll(std::vector<void*>& io_handles,
+                                    size_t min_completions) {
+  return target()->Poll(io_handles, min_completions);
+}
+
+IOStatus FaultInjectionTestFS::AbortIO(std::vector<void*>& io_handles) {
+  return target()->AbortIO(io_handles);
+}
+
+void FaultInjectionTestFS::WritableFileClosed(const FSFileState& state) {
+  MutexLock l(&mutex_);
+  if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) {
+    db_file_state_[state.filename_] = state;
+    open_managed_files_.erase(state.filename_);
+  }
+}
+
+void FaultInjectionTestFS::WritableFileSynced(const FSFileState& state) {
+  MutexLock l(&mutex_);
+  if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) {
+    if (db_file_state_.find(state.filename_) == db_file_state_.end()) {
+      db_file_state_.insert(std::make_pair(state.filename_, state));
+    } else {
+      db_file_state_[state.filename_] = state;
+    }
+  }
+}
+
+void FaultInjectionTestFS::WritableFileAppended(const FSFileState& state) {
+  MutexLock l(&mutex_);
+  if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) {
+    if (db_file_state_.find(state.filename_) == db_file_state_.end()) {
+      db_file_state_.insert(std::make_pair(state.filename_, state));
+    } else {
+      db_file_state_[state.filename_] = state;
+    }
+  }
+}
+
+IOStatus FaultInjectionTestFS::DropUnsyncedFileData() {
+  IOStatus io_s;
+  MutexLock l(&mutex_);
+  for (std::map<std::string, FSFileState>::iterator it = db_file_state_.begin();
+       io_s.ok() && it != db_file_state_.end(); ++it) {
+    FSFileState& fs_state = it->second;
+    if (!fs_state.IsFullySynced()) {
+      io_s = fs_state.DropUnsyncedData();
+    }
+  }
+  return io_s;
+}
+
+IOStatus FaultInjectionTestFS::DropRandomUnsyncedFileData(Random* rnd) {
+  IOStatus io_s;
+  MutexLock l(&mutex_);
+  for (std::map<std::string, FSFileState>::iterator it = db_file_state_.begin();
+       io_s.ok() && it != db_file_state_.end(); ++it) {
+    FSFileState& fs_state = it->second;
+    if (!fs_state.IsFullySynced()) {
+      io_s = fs_state.DropRandomUnsyncedData(rnd);
+    }
+  }
+  return io_s;
+}
+
+IOStatus FaultInjectionTestFS::DeleteFilesCreatedAfterLastDirSync(
+    const IOOptions& options, IODebugContext* dbg) {
+  // Because DeleteFile access this container make a copy to avoid deadlock
+  std::map<std::string, std::map<std::string, std::string>> map_copy;
+  {
+    MutexLock l(&mutex_);
+    map_copy.insert(dir_to_new_files_since_last_sync_.begin(),
+                    dir_to_new_files_since_last_sync_.end());
+  }
+
+  for (auto& pair : map_copy) {
+    for (auto& file_pair : pair.second) {
+      if (file_pair.second == kNewFileNoOverwrite) {
+        IOStatus io_s =
+            DeleteFile(pair.first + "/" + file_pair.first, options, dbg);
+        if (!io_s.ok()) {
+          return io_s;
+        }
+      } else {
+        IOStatus io_s =
+            WriteStringToFile(target(), file_pair.second,
+                              pair.first + "/" + file_pair.first, true);
+        if (!io_s.ok()) {
+          return io_s;
+        }
+      }
+    }
+  }
+  return IOStatus::OK();
+}
+
+void FaultInjectionTestFS::ResetState() {
+  MutexLock l(&mutex_);
+  db_file_state_.clear();
+  dir_to_new_files_since_last_sync_.clear();
+  SetFilesystemActiveNoLock(true);
+}
+
+void FaultInjectionTestFS::UntrackFile(const std::string& f) {
+  MutexLock l(&mutex_);
+  auto dir_and_name = TestFSGetDirAndName(f);
+  dir_to_new_files_since_last_sync_[dir_and_name.first].erase(
+      dir_and_name.second);
+  db_file_state_.erase(f);
+  open_managed_files_.erase(f);
+}
+
+IOStatus FaultInjectionTestFS::InjectThreadSpecificReadError(
+    ErrorOperation op, Slice* result, bool direct_io, char* scratch,
+    bool need_count_increase, bool* fault_injected) {
+  bool dummy_bool;
+  bool& ret_fault_injected = fault_injected ? *fault_injected : dummy_bool;
+  ret_fault_injected = false;
+  ErrorContext* ctx = static_cast<ErrorContext*>(thread_local_error_->Get());
+  if (ctx == nullptr || !ctx->enable_error_injection || !ctx->one_in) {
+    return IOStatus::OK();
+  }
+
+  if (ctx->rand.OneIn(ctx->one_in)) {
+    if (ctx->count == 0) {
+      ctx->message = "";
+    }
+    if (need_count_increase) {
+      ctx->count++;
+    }
+    if (ctx->callstack) {
+      free(ctx->callstack);
+    }
+    ctx->callstack = port::SaveStack(&ctx->frames);
+
+    if (op != ErrorOperation::kMultiReadSingleReq) {
+      // Likely non-per read status code for MultiRead
+      ctx->message += "error; ";
+      ret_fault_injected = true;
+      return IOStatus::IOError();
+    } else if (Random::GetTLSInstance()->OneIn(8)) {
+      assert(result);
+      // For a small chance, set the failure to status but turn the
+      // result to be empty, which is supposed to be caught for a check.
+      *result = Slice();
+      ctx->message += "inject empty result; ";
+      ret_fault_injected = true;
+    } else if (!direct_io && Random::GetTLSInstance()->OneIn(7) &&
+               scratch != nullptr && result->data() == scratch) {
+      assert(result);
+      // With direct I/O, many extra bytes might be read so corrupting
+      // one byte might not cause checksum mismatch. Skip checksum
+      // corruption injection.
+      // We only corrupt data if the result is filled to `scratch`. For other
+      // cases, the data might not be able to be modified (e.g mmaped files)
+      // or has unintended side effects.
+      // For a small chance, set the failure to status but corrupt the
+      // result in a way that checksum checking is supposed to fail.
+      // Corrupt the last byte, which is supposed to be a checksum byte
+      // It would work for CRC. Not 100% sure for xxhash and will adjust
+      // if it is not the case.
+      const_cast<char*>(result->data())[result->size() - 1]++;
+      ctx->message += "corrupt last byte; ";
+      ret_fault_injected = true;
+    } else {
+      ctx->message += "error result multiget single; ";
+      ret_fault_injected = true;
+      return IOStatus::IOError();
+    }
+  }
+  return IOStatus::OK();
+}
+
+bool FaultInjectionTestFS::TryParseFileName(const std::string& file_name,
+                                            uint64_t* number, FileType* type) {
+  std::size_t found = file_name.find_last_of("/");
+  std::string file = file_name.substr(found);
+  return ParseFileName(file, number, type);
+}
+
+IOStatus FaultInjectionTestFS::InjectWriteError(const std::string& file_name) {
+  MutexLock l(&mutex_);
+  if (!enable_write_error_injection_ || !write_error_one_in_) {
+    return IOStatus::OK();
+  }
+  bool allowed_type = false;
+
+  if (inject_for_all_file_types_) {
+    allowed_type = true;
+  } else {
+    uint64_t number;
+    FileType cur_type = kTempFile;
+    if (TryParseFileName(file_name, &number, &cur_type)) {
+      for (const auto& type : write_error_allowed_types_) {
+        if (cur_type == type) {
+          allowed_type = true;
+        }
+      }
+    }
+  }
+
+  if (allowed_type) {
+    if (write_error_rand_.OneIn(write_error_one_in_)) {
+      return GetError();
+    }
+  }
+  return IOStatus::OK();
+}
+
+IOStatus FaultInjectionTestFS::InjectMetadataWriteError() {
+  {
+    MutexLock l(&mutex_);
+    if (!enable_metadata_write_error_injection_ ||
+        !metadata_write_error_one_in_ ||
+        !write_error_rand_.OneIn(metadata_write_error_one_in_)) {
+      return IOStatus::OK();
+    }
+  }
+  TEST_SYNC_POINT("FaultInjectionTestFS::InjectMetadataWriteError:Injected");
+  return IOStatus::IOError();
+}
+
+void FaultInjectionTestFS::PrintFaultBacktrace() {
+#if defined(OS_LINUX)
+  ErrorContext* ctx = static_cast<ErrorContext*>(thread_local_error_->Get());
+  if (ctx == nullptr) {
+    return;
+  }
+  fprintf(stderr, "Injected error type = %d\n", ctx->type);
+  fprintf(stderr, "Message: %s\n", ctx->message.c_str());
+  port::PrintAndFreeStack(ctx->callstack, ctx->frames);
+  ctx->callstack = nullptr;
+#endif
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_fs.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_fs.h
new file mode 100644
index 0000000000..cab0051bd1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_fs.h
@@ -0,0 +1,593 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright 2014 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+// This test uses a custom FileSystem to keep track of the state of a file
+// system the last "Sync". The data being written is cached in a "buffer".
+// Only when "Sync" is called, the data will be persistent. It can similate
+// file data loss (or entire files) not protected by a "Sync". For any of the
+// FileSystem related operations, by specify the "IOStatus Error", a specific
+// error can be returned when file system is not activated.
+
+#pragma once
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <string>
+
+#include "file/filename.h"
+#include "rocksdb/file_system.h"
+#include "util/mutexlock.h"
+#include "util/random.h"
+#include "util/thread_local.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class TestFSWritableFile;
+class FaultInjectionTestFS;
+
+struct FSFileState {
+  std::string filename_;
+  ssize_t pos_;
+  ssize_t pos_at_last_sync_;
+  ssize_t pos_at_last_flush_;
+  std::string buffer_;
+
+  explicit FSFileState(const std::string& filename)
+      : filename_(filename),
+        pos_(-1),
+        pos_at_last_sync_(-1),
+        pos_at_last_flush_(-1) {}
+
+  FSFileState() : pos_(-1), pos_at_last_sync_(-1), pos_at_last_flush_(-1) {}
+
+  bool IsFullySynced() const { return pos_ <= 0 || pos_ == pos_at_last_sync_; }
+
+  IOStatus DropUnsyncedData();
+
+  IOStatus DropRandomUnsyncedData(Random* rand);
+};
+
+// A wrapper around WritableFileWriter* file
+// is written to or sync'ed.
+class TestFSWritableFile : public FSWritableFile {
+ public:
+  explicit TestFSWritableFile(const std::string& fname,
+                              const FileOptions& file_opts,
+                              std::unique_ptr<FSWritableFile>&& f,
+                              FaultInjectionTestFS* fs);
+  virtual ~TestFSWritableFile();
+  virtual IOStatus Append(const Slice& data, const IOOptions&,
+                          IODebugContext*) override;
+  virtual IOStatus Append(const Slice& data, const IOOptions& options,
+                          const DataVerificationInfo& verification_info,
+                          IODebugContext* dbg) override;
+  virtual IOStatus Truncate(uint64_t size, const IOOptions& options,
+                            IODebugContext* dbg) override {
+    return target_->Truncate(size, options, dbg);
+  }
+  virtual IOStatus Close(const IOOptions& options,
+                         IODebugContext* dbg) override;
+  virtual IOStatus Flush(const IOOptions&, IODebugContext*) override;
+  virtual IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
+  virtual IOStatus RangeSync(uint64_t /*offset*/, uint64_t /*nbytes*/,
+                             const IOOptions& options,
+                             IODebugContext* dbg) override;
+  virtual bool IsSyncThreadSafe() const override { return true; }
+  virtual IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                                    const IOOptions& options,
+                                    IODebugContext* dbg) override {
+    return target_->PositionedAppend(data, offset, options, dbg);
+  }
+  IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+                            const IOOptions& options,
+                            const DataVerificationInfo& verification_info,
+                            IODebugContext* dbg) override;
+  virtual size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  virtual bool use_direct_io() const override {
+    return target_->use_direct_io();
+  };
+
+ private:
+  FSFileState state_;  // Need protection by mutex_
+  FileOptions file_opts_;
+  std::unique_ptr<FSWritableFile> target_;
+  bool writable_file_opened_;
+  FaultInjectionTestFS* fs_;
+  port::Mutex mutex_;
+};
+
+// A wrapper around WritableFileWriter* file
+// is written to or sync'ed.
+class TestFSRandomRWFile : public FSRandomRWFile {
+ public:
+  explicit TestFSRandomRWFile(const std::string& fname,
+                              std::unique_ptr<FSRandomRWFile>&& f,
+                              FaultInjectionTestFS* fs);
+  virtual ~TestFSRandomRWFile();
+  IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options,
+                 IODebugContext* dbg) override;
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override;
+  IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
+  IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
+  IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  bool use_direct_io() const override { return target_->use_direct_io(); };
+
+ private:
+  std::unique_ptr<FSRandomRWFile> target_;
+  bool file_opened_;
+  FaultInjectionTestFS* fs_;
+};
+
+class TestFSRandomAccessFile : public FSRandomAccessFile {
+ public:
+  explicit TestFSRandomAccessFile(const std::string& fname,
+                                  std::unique_ptr<FSRandomAccessFile>&& f,
+                                  FaultInjectionTestFS* fs);
+  ~TestFSRandomAccessFile() override {}
+  IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
+                Slice* result, char* scratch,
+                IODebugContext* dbg) const override;
+  IOStatus ReadAsync(FSReadRequest& req, const IOOptions& opts,
+                     std::function<void(const FSReadRequest&, void*)> cb,
+                     void* cb_arg, void** io_handle, IOHandleDeleter* del_fn,
+                     IODebugContext* dbg) override;
+  IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
+                     const IOOptions& options, IODebugContext* dbg) override;
+  size_t GetRequiredBufferAlignment() const override {
+    return target_->GetRequiredBufferAlignment();
+  }
+  bool use_direct_io() const override { return target_->use_direct_io(); }
+
+  size_t GetUniqueId(char* id, size_t max_size) const override;
+
+ private:
+  std::unique_ptr<FSRandomAccessFile> target_;
+  FaultInjectionTestFS* fs_;
+};
+
+class TestFSSequentialFile : public FSSequentialFileOwnerWrapper {
+ public:
+  explicit TestFSSequentialFile(std::unique_ptr<FSSequentialFile>&& f,
+                                FaultInjectionTestFS* fs)
+      : FSSequentialFileOwnerWrapper(std::move(f)), fs_(fs) {}
+  IOStatus Read(size_t n, const IOOptions& options, Slice* result,
+                char* scratch, IODebugContext* dbg) override;
+  IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& options,
+                          Slice* result, char* scratch,
+                          IODebugContext* dbg) override;
+
+ private:
+  FaultInjectionTestFS* fs_;
+};
+
+class TestFSDirectory : public FSDirectory {
+ public:
+  explicit TestFSDirectory(FaultInjectionTestFS* fs, std::string dirname,
+                           FSDirectory* dir)
+      : fs_(fs), dirname_(dirname), dir_(dir) {}
+  ~TestFSDirectory() {}
+
+  virtual IOStatus Fsync(const IOOptions& options,
+                         IODebugContext* dbg) override;
+
+  virtual IOStatus Close(const IOOptions& options,
+                         IODebugContext* dbg) override;
+
+  virtual IOStatus FsyncWithDirOptions(
+      const IOOptions& options, IODebugContext* dbg,
+      const DirFsyncOptions& dir_fsync_options) override;
+
+ private:
+  FaultInjectionTestFS* fs_;
+  std::string dirname_;
+  std::unique_ptr<FSDirectory> dir_;
+};
+
+class FaultInjectionTestFS : public FileSystemWrapper {
+ public:
+  explicit FaultInjectionTestFS(const std::shared_ptr<FileSystem>& base)
+      : FileSystemWrapper(base),
+        filesystem_active_(true),
+        filesystem_writable_(false),
+        thread_local_error_(new ThreadLocalPtr(DeleteThreadLocalErrorContext)),
+        enable_write_error_injection_(false),
+        enable_metadata_write_error_injection_(false),
+        write_error_rand_(0),
+        write_error_one_in_(0),
+        metadata_write_error_one_in_(0),
+        read_error_one_in_(0),
+        ingest_data_corruption_before_write_(false),
+        fail_get_file_unique_id_(false) {}
+  virtual ~FaultInjectionTestFS() { error_.PermitUncheckedError(); }
+
+  static const char* kClassName() { return "FaultInjectionTestFS"; }
+  const char* Name() const override { return kClassName(); }
+
+  IOStatus NewDirectory(const std::string& name, const IOOptions& options,
+                        std::unique_ptr<FSDirectory>* result,
+                        IODebugContext* dbg) override;
+
+  IOStatus NewWritableFile(const std::string& fname,
+                           const FileOptions& file_opts,
+                           std::unique_ptr<FSWritableFile>* result,
+                           IODebugContext* dbg) override;
+
+  IOStatus ReopenWritableFile(const std::string& fname,
+                              const FileOptions& file_opts,
+                              std::unique_ptr<FSWritableFile>* result,
+                              IODebugContext* dbg) override;
+
+  IOStatus NewRandomRWFile(const std::string& fname,
+                           const FileOptions& file_opts,
+                           std::unique_ptr<FSRandomRWFile>* result,
+                           IODebugContext* dbg) override;
+
+  IOStatus NewRandomAccessFile(const std::string& fname,
+                               const FileOptions& file_opts,
+                               std::unique_ptr<FSRandomAccessFile>* result,
+                               IODebugContext* dbg) override;
+  IOStatus NewSequentialFile(const std::string& f, const FileOptions& file_opts,
+                             std::unique_ptr<FSSequentialFile>* r,
+                             IODebugContext* dbg) override;
+
+  virtual IOStatus DeleteFile(const std::string& f, const IOOptions& options,
+                              IODebugContext* dbg) override;
+
+  virtual IOStatus RenameFile(const std::string& s, const std::string& t,
+                              const IOOptions& options,
+                              IODebugContext* dbg) override;
+
+  virtual IOStatus LinkFile(const std::string& src, const std::string& target,
+                            const IOOptions& options,
+                            IODebugContext* dbg) override;
+
+// Undef to eliminate clash on Windows
+#undef GetFreeSpace
+  virtual IOStatus GetFreeSpace(const std::string& path,
+                                const IOOptions& options, uint64_t* disk_free,
+                                IODebugContext* dbg) override {
+    IOStatus io_s;
+    if (!IsFilesystemActive() &&
+        error_.subcode() == IOStatus::SubCode::kNoSpace) {
+      *disk_free = 0;
+    } else {
+      io_s = target()->GetFreeSpace(path, options, disk_free, dbg);
+    }
+    return io_s;
+  }
+
+  virtual IOStatus Poll(std::vector<void*>& io_handles,
+                        size_t min_completions) override;
+
+  virtual IOStatus AbortIO(std::vector<void*>& io_handles) override;
+
+  void WritableFileClosed(const FSFileState& state);
+
+  void WritableFileSynced(const FSFileState& state);
+
+  void WritableFileAppended(const FSFileState& state);
+
+  IOStatus DropUnsyncedFileData();
+
+  IOStatus DropRandomUnsyncedFileData(Random* rnd);
+
+  IOStatus DeleteFilesCreatedAfterLastDirSync(const IOOptions& options,
+                                              IODebugContext* dbg);
+
+  void ResetState();
+
+  void UntrackFile(const std::string& f);
+
+  void SyncDir(const std::string& dirname) {
+    MutexLock l(&mutex_);
+    dir_to_new_files_since_last_sync_.erase(dirname);
+  }
+
+  // Setting the filesystem to inactive is the test equivalent to simulating a
+  // system reset. Setting to inactive will freeze our saved filesystem state so
+  // that it will stop being recorded. It can then be reset back to the state at
+  // the time of the reset.
+  bool IsFilesystemActive() {
+    MutexLock l(&mutex_);
+    return filesystem_active_;
+  }
+
+  // Setting filesystem_writable_ makes NewWritableFile. ReopenWritableFile,
+  // and NewRandomRWFile bypass FaultInjectionTestFS and go directly to the
+  // target FS
+  bool IsFilesystemDirectWritable() {
+    MutexLock l(&mutex_);
+    return filesystem_writable_;
+  }
+  bool ShouldUseDiretWritable(const std::string& file_name) {
+    MutexLock l(&mutex_);
+    if (filesystem_writable_) {
+      return true;
+    }
+    FileType file_type = kTempFile;
+    uint64_t file_number = 0;
+    if (!TryParseFileName(file_name, &file_number, &file_type)) {
+      return false;
+    }
+    return skip_direct_writable_types_.find(file_type) !=
+           skip_direct_writable_types_.end();
+  }
+  void SetFilesystemActiveNoLock(
+      bool active, IOStatus error = IOStatus::Corruption("Not active")) {
+    error.PermitUncheckedError();
+    filesystem_active_ = active;
+    if (!active) {
+      error_ = error;
+    }
+  }
+  void SetFilesystemActive(
+      bool active, IOStatus error = IOStatus::Corruption("Not active")) {
+    MutexLock l(&mutex_);
+    error.PermitUncheckedError();
+    SetFilesystemActiveNoLock(active, error);
+  }
+  void SetFilesystemDirectWritable(bool writable) {
+    MutexLock l(&mutex_);
+    filesystem_writable_ = writable;
+  }
+  void AssertNoOpenFile() { assert(open_managed_files_.empty()); }
+
+  IOStatus GetError() { return error_; }
+
+  void SetFileSystemIOError(IOStatus io_error) {
+    MutexLock l(&mutex_);
+    io_error.PermitUncheckedError();
+    error_ = io_error;
+  }
+
+  // To simulate the data corruption before data is written in FS
+  void IngestDataCorruptionBeforeWrite() {
+    MutexLock l(&mutex_);
+    ingest_data_corruption_before_write_ = true;
+  }
+
+  void NoDataCorruptionBeforeWrite() {
+    MutexLock l(&mutex_);
+    ingest_data_corruption_before_write_ = false;
+  }
+
+  bool ShouldDataCorruptionBeforeWrite() {
+    MutexLock l(&mutex_);
+    return ingest_data_corruption_before_write_;
+  }
+
+  void SetChecksumHandoffFuncType(const ChecksumType& func_type) {
+    MutexLock l(&mutex_);
+    checksum_handoff_func_tpye_ = func_type;
+  }
+
+  const ChecksumType& GetChecksumHandoffFuncType() {
+    MutexLock l(&mutex_);
+    return checksum_handoff_func_tpye_;
+  }
+
+  void SetFailGetUniqueId(bool flag) {
+    MutexLock l(&mutex_);
+    fail_get_file_unique_id_ = flag;
+  }
+
+  bool ShouldFailGetUniqueId() {
+    MutexLock l(&mutex_);
+    return fail_get_file_unique_id_;
+  }
+
+  // Specify what the operation, so we can inject the right type of error
+  enum ErrorOperation : char {
+    kRead = 0,
+    kMultiReadSingleReq = 1,
+    kMultiRead = 2,
+    kOpen,
+  };
+
+  // Set thread-local parameters for error injection. The first argument,
+  // seed is the seed for the random number generator, and one_in determines
+  // the probability of injecting error (i.e an error is injected with
+  // 1/one_in probability)
+  void SetThreadLocalReadErrorContext(uint32_t seed, int one_in) {
+    struct ErrorContext* ctx =
+        static_cast<struct ErrorContext*>(thread_local_error_->Get());
+    if (ctx == nullptr) {
+      ctx = new ErrorContext(seed);
+      thread_local_error_->Reset(ctx);
+    }
+    ctx->one_in = one_in;
+    ctx->count = 0;
+  }
+
+  static void DeleteThreadLocalErrorContext(void* p) {
+    ErrorContext* ctx = static_cast<ErrorContext*>(p);
+    delete ctx;
+  }
+
+  // This is to set the parameters for the write error injection.
+  // seed is the seed for the random number generator, and one_in determines
+  // the probability of injecting error (i.e an error is injected with
+  // 1/one_in probability). For write error, we can specify the error we
+  // want to inject. Types decides the file types we want to inject the
+  // error (e.g., Wal files, SST files), which is empty by default.
+  void SetRandomWriteError(uint32_t seed, int one_in, IOStatus error,
+                           bool inject_for_all_file_types,
+                           const std::vector<FileType>& types) {
+    MutexLock l(&mutex_);
+    Random tmp_rand(seed);
+    error.PermitUncheckedError();
+    error_ = error;
+    write_error_rand_ = tmp_rand;
+    write_error_one_in_ = one_in;
+    inject_for_all_file_types_ = inject_for_all_file_types;
+    write_error_allowed_types_ = types;
+  }
+
+  void SetSkipDirectWritableTypes(const std::set<FileType>& types) {
+    MutexLock l(&mutex_);
+    skip_direct_writable_types_ = types;
+  }
+
+  void SetRandomMetadataWriteError(int one_in) {
+    MutexLock l(&mutex_);
+    metadata_write_error_one_in_ = one_in;
+  }
+  // If the value is not 0, it is enabled. Otherwise, it is disabled.
+  void SetRandomReadError(int one_in) { read_error_one_in_ = one_in; }
+
+  bool ShouldInjectRandomReadError() {
+    return read_error_one_in() &&
+           Random::GetTLSInstance()->OneIn(read_error_one_in());
+  }
+
+  // Inject an write error with randomlized parameter and the predefined
+  // error type. Only the allowed file types will inject the write error
+  IOStatus InjectWriteError(const std::string& file_name);
+
+  // Ingest error to metadata operations.
+  IOStatus InjectMetadataWriteError();
+
+  // Inject an error. For a READ operation, a status of IOError(), a
+  // corruption in the contents of scratch, or truncation of slice
+  // are the types of error with equal probability. For OPEN,
+  // its always an IOError.
+  // fault_injected returns whether a fault is injected. It is needed
+  // because some fault is inected with IOStatus to be OK.
+  IOStatus InjectThreadSpecificReadError(ErrorOperation op, Slice* slice,
+                                         bool direct_io, char* scratch,
+                                         bool need_count_increase,
+                                         bool* fault_injected);
+
+  // Get the count of how many times we injected since the previous call
+  int GetAndResetErrorCount() {
+    ErrorContext* ctx = static_cast<ErrorContext*>(thread_local_error_->Get());
+    int count = 0;
+    if (ctx != nullptr) {
+      count = ctx->count;
+      ctx->count = 0;
+    }
+    return count;
+  }
+
+  void EnableErrorInjection() {
+    ErrorContext* ctx = static_cast<ErrorContext*>(thread_local_error_->Get());
+    if (ctx) {
+      ctx->enable_error_injection = true;
+    }
+  }
+
+  void EnableWriteErrorInjection() {
+    MutexLock l(&mutex_);
+    enable_write_error_injection_ = true;
+  }
+  void EnableMetadataWriteErrorInjection() {
+    MutexLock l(&mutex_);
+    enable_metadata_write_error_injection_ = true;
+  }
+
+  void DisableWriteErrorInjection() {
+    MutexLock l(&mutex_);
+    enable_write_error_injection_ = false;
+  }
+
+  void DisableErrorInjection() {
+    ErrorContext* ctx = static_cast<ErrorContext*>(thread_local_error_->Get());
+    if (ctx) {
+      ctx->enable_error_injection = false;
+    }
+  }
+
+  void DisableMetadataWriteErrorInjection() {
+    MutexLock l(&mutex_);
+    enable_metadata_write_error_injection_ = false;
+  }
+
+  int read_error_one_in() const { return read_error_one_in_.load(); }
+
+  int write_error_one_in() const { return write_error_one_in_; }
+
+  // We capture a backtrace every time a fault is injected, for debugging
+  // purposes. This call prints the backtrace to stderr and frees the
+  // saved callstack
+  void PrintFaultBacktrace();
+
+ private:
+  port::Mutex mutex_;
+  std::map<std::string, FSFileState> db_file_state_;
+  std::set<std::string> open_managed_files_;
+  // directory -> (file name -> file contents to recover)
+  // When data is recovered from unsyned parent directory, the files with
+  // empty file contents to recover is deleted. Those with non-empty ones
+  // will be recovered to content accordingly.
+  std::unordered_map<std::string, std::map<std::string, std::string>>
+      dir_to_new_files_since_last_sync_;
+  bool filesystem_active_;    // Record flushes, syncs, writes
+  bool filesystem_writable_;  // Bypass FaultInjectionTestFS and go directly
+                              // to underlying FS for writable files
+  IOStatus error_;
+
+  enum ErrorType : int {
+    kErrorTypeStatus = 0,
+    kErrorTypeCorruption,
+    kErrorTypeTruncated,
+    kErrorTypeMax
+  };
+
+  struct ErrorContext {
+    Random rand;
+    int one_in;
+    int count;
+    bool enable_error_injection;
+    void* callstack;
+    std::string message;
+    int frames;
+    ErrorType type;
+
+    explicit ErrorContext(uint32_t seed)
+        : rand(seed),
+          enable_error_injection(false),
+          callstack(nullptr),
+          frames(0) {}
+    ~ErrorContext() {
+      if (callstack) {
+        free(callstack);
+      }
+    }
+  };
+
+  std::unique_ptr<ThreadLocalPtr> thread_local_error_;
+  bool enable_write_error_injection_;
+  bool enable_metadata_write_error_injection_;
+  Random write_error_rand_;
+  int write_error_one_in_;
+  int metadata_write_error_one_in_;
+  std::atomic<int> read_error_one_in_;
+  bool inject_for_all_file_types_;
+  std::vector<FileType> write_error_allowed_types_;
+  // File types where direct writable is skipped.
+  std::set<FileType> skip_direct_writable_types_;
+  bool ingest_data_corruption_before_write_;
+  ChecksumType checksum_handoff_func_tpye_;
+  bool fail_get_file_unique_id_;
+
+  // Extract number of type from file name. Return false if failing to fine
+  // them.
+  bool TryParseFileName(const std::string& file_name, uint64_t* number,
+                        FileType* type);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_secondary_cache.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_secondary_cache.cc
new file mode 100644
index 0000000000..d7a2a1bd75
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_secondary_cache.cc
@@ -0,0 +1,136 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// This class implements a custom SecondaryCache that randomly injects an
+// error status into Inserts/Lookups based on a specified probability.
+
+#include "utilities/fault_injection_secondary_cache.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void FaultInjectionSecondaryCache::ResultHandle::UpdateHandleValue(
+    FaultInjectionSecondaryCache::ResultHandle* handle) {
+  ErrorContext* ctx = handle->cache_->GetErrorContext();
+  if (!ctx->rand.OneIn(handle->cache_->prob_)) {
+    handle->value_ = handle->base_->Value();
+    handle->size_ = handle->base_->Size();
+  }
+  handle->base_.reset();
+}
+
+bool FaultInjectionSecondaryCache::ResultHandle::IsReady() {
+  bool ready = true;
+  if (base_) {
+    ready = base_->IsReady();
+    if (ready) {
+      UpdateHandleValue(this);
+    }
+  }
+  return ready;
+}
+
+void FaultInjectionSecondaryCache::ResultHandle::Wait() {
+  base_->Wait();
+  UpdateHandleValue(this);
+}
+
+Cache::ObjectPtr FaultInjectionSecondaryCache::ResultHandle::Value() {
+  return value_;
+}
+
+size_t FaultInjectionSecondaryCache::ResultHandle::Size() { return size_; }
+
+void FaultInjectionSecondaryCache::ResultHandle::WaitAll(
+    FaultInjectionSecondaryCache* cache,
+    std::vector<SecondaryCacheResultHandle*> handles) {
+  std::vector<SecondaryCacheResultHandle*> base_handles;
+  for (SecondaryCacheResultHandle* hdl : handles) {
+    FaultInjectionSecondaryCache::ResultHandle* handle =
+        static_cast<FaultInjectionSecondaryCache::ResultHandle*>(hdl);
+    if (!handle->base_) {
+      continue;
+    }
+    base_handles.emplace_back(handle->base_.get());
+  }
+
+  cache->base_->WaitAll(base_handles);
+  for (SecondaryCacheResultHandle* hdl : handles) {
+    FaultInjectionSecondaryCache::ResultHandle* handle =
+        static_cast<FaultInjectionSecondaryCache::ResultHandle*>(hdl);
+    if (handle->base_) {
+      UpdateHandleValue(handle);
+    }
+  }
+}
+
+FaultInjectionSecondaryCache::ErrorContext*
+FaultInjectionSecondaryCache::GetErrorContext() {
+  ErrorContext* ctx = static_cast<ErrorContext*>(thread_local_error_->Get());
+  if (!ctx) {
+    ctx = new ErrorContext(seed_);
+    thread_local_error_->Reset(ctx);
+  }
+
+  return ctx;
+}
+
+Status FaultInjectionSecondaryCache::Insert(
+    const Slice& key, Cache::ObjectPtr value,
+    const Cache::CacheItemHelper* helper) {
+  ErrorContext* ctx = GetErrorContext();
+  if (ctx->rand.OneIn(prob_)) {
+    return Status::IOError();
+  }
+
+  return base_->Insert(key, value, helper);
+}
+
+std::unique_ptr<SecondaryCacheResultHandle>
+FaultInjectionSecondaryCache::Lookup(const Slice& key,
+                                     const Cache::CacheItemHelper* helper,
+                                     Cache::CreateContext* create_context,
+                                     bool wait, bool advise_erase,
+                                     bool& kept_in_sec_cache) {
+  ErrorContext* ctx = GetErrorContext();
+  if (base_is_compressed_sec_cache_) {
+    if (ctx->rand.OneIn(prob_)) {
+      return nullptr;
+    } else {
+      return base_->Lookup(key, helper, create_context, wait, advise_erase,
+                           kept_in_sec_cache);
+    }
+  } else {
+    std::unique_ptr<SecondaryCacheResultHandle> hdl = base_->Lookup(
+        key, helper, create_context, wait, advise_erase, kept_in_sec_cache);
+    if (wait && ctx->rand.OneIn(prob_)) {
+      hdl.reset();
+    }
+    return std::unique_ptr<FaultInjectionSecondaryCache::ResultHandle>(
+        new FaultInjectionSecondaryCache::ResultHandle(this, std::move(hdl)));
+  }
+}
+
+void FaultInjectionSecondaryCache::Erase(const Slice& key) {
+  base_->Erase(key);
+}
+
+void FaultInjectionSecondaryCache::WaitAll(
+    std::vector<SecondaryCacheResultHandle*> handles) {
+  if (base_is_compressed_sec_cache_) {
+    ErrorContext* ctx = GetErrorContext();
+    std::vector<SecondaryCacheResultHandle*> base_handles;
+    for (SecondaryCacheResultHandle* hdl : handles) {
+      if (ctx->rand.OneIn(prob_)) {
+        continue;
+      }
+      base_handles.push_back(hdl);
+    }
+    base_->WaitAll(base_handles);
+  } else {
+    FaultInjectionSecondaryCache::ResultHandle::WaitAll(this, handles);
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_secondary_cache.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_secondary_cache.h
new file mode 100644
index 0000000000..ed89f655aa
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/fault_injection_secondary_cache.h
@@ -0,0 +1,109 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/secondary_cache.h"
+#include "util/random.h"
+#include "util/thread_local.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// This class implements a custom SecondaryCache that randomly injects an
+// error status into Inserts/Lookups based on a specified probability.
+// Its used by db_stress to verify correctness in the presence of
+// secondary cache errors.
+//
+class FaultInjectionSecondaryCache : public SecondaryCache {
+ public:
+  explicit FaultInjectionSecondaryCache(
+      const std::shared_ptr<SecondaryCache>& base, uint32_t seed, int prob)
+      : base_(base),
+        seed_(seed),
+        prob_(prob),
+        thread_local_error_(new ThreadLocalPtr(DeleteThreadLocalErrorContext)) {
+    if (std::strcmp(base_->Name(), "CompressedSecondaryCache") == 0) {
+      base_is_compressed_sec_cache_ = true;
+    }
+  }
+
+  virtual ~FaultInjectionSecondaryCache() override {}
+
+  const char* Name() const override { return "FaultInjectionSecondaryCache"; }
+
+  Status Insert(const Slice& key, Cache::ObjectPtr value,
+                const Cache::CacheItemHelper* helper) override;
+
+  std::unique_ptr<SecondaryCacheResultHandle> Lookup(
+      const Slice& key, const Cache::CacheItemHelper* helper,
+      Cache::CreateContext* create_context, bool wait, bool advise_erase,
+      bool& kept_in_sec_cache) override;
+
+  bool SupportForceErase() const override { return base_->SupportForceErase(); }
+
+  void Erase(const Slice& key) override;
+
+  void WaitAll(std::vector<SecondaryCacheResultHandle*> handles) override;
+
+  Status SetCapacity(size_t capacity) override {
+    return base_->SetCapacity(capacity);
+  }
+
+  Status GetCapacity(size_t& capacity) override {
+    return base_->GetCapacity(capacity);
+  }
+
+  std::string GetPrintableOptions() const override {
+    return base_->GetPrintableOptions();
+  }
+
+ private:
+  class ResultHandle : public SecondaryCacheResultHandle {
+   public:
+    ResultHandle(FaultInjectionSecondaryCache* cache,
+                 std::unique_ptr<SecondaryCacheResultHandle>&& base)
+        : cache_(cache), base_(std::move(base)), value_(nullptr), size_(0) {}
+
+    ~ResultHandle() override {}
+
+    bool IsReady() override;
+
+    void Wait() override;
+
+    Cache::ObjectPtr Value() override;
+
+    size_t Size() override;
+
+    static void WaitAll(FaultInjectionSecondaryCache* cache,
+                        std::vector<SecondaryCacheResultHandle*> handles);
+
+   private:
+    static void UpdateHandleValue(ResultHandle* handle);
+
+    FaultInjectionSecondaryCache* cache_;
+    std::unique_ptr<SecondaryCacheResultHandle> base_;
+    Cache::ObjectPtr value_;
+    size_t size_;
+  };
+
+  static void DeleteThreadLocalErrorContext(void* p) {
+    ErrorContext* ctx = static_cast<ErrorContext*>(p);
+    delete ctx;
+  }
+
+  const std::shared_ptr<SecondaryCache> base_;
+  uint32_t seed_;
+  int prob_;
+  bool base_is_compressed_sec_cache_{false};
+
+  struct ErrorContext {
+    Random rand;
+
+    explicit ErrorContext(uint32_t seed) : rand(seed) {}
+  };
+  std::unique_ptr<ThreadLocalPtr> thread_local_error_;
+
+  ErrorContext* GetErrorContext();
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/leveldb_options/leveldb_options.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/leveldb_options/leveldb_options.cc
new file mode 100644
index 0000000000..f81e59b83a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/leveldb_options/leveldb_options.cc
@@ -0,0 +1,57 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/utilities/leveldb_options.h"
+
+#include "rocksdb/advanced_cache.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/env.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+LevelDBOptions::LevelDBOptions()
+    : comparator(BytewiseComparator()),
+      create_if_missing(false),
+      error_if_exists(false),
+      paranoid_checks(false),
+      env(Env::Default()),
+      info_log(nullptr),
+      write_buffer_size(4 << 20),
+      max_open_files(1000),
+      block_cache(nullptr),
+      block_size(4096),
+      block_restart_interval(16),
+      compression(kSnappyCompression),
+      filter_policy(nullptr) {}
+
+Options ConvertOptions(const LevelDBOptions& leveldb_options) {
+  Options options = Options();
+  options.create_if_missing = leveldb_options.create_if_missing;
+  options.error_if_exists = leveldb_options.error_if_exists;
+  options.paranoid_checks = leveldb_options.paranoid_checks;
+  options.env = leveldb_options.env;
+  options.info_log.reset(leveldb_options.info_log);
+  options.write_buffer_size = leveldb_options.write_buffer_size;
+  options.max_open_files = leveldb_options.max_open_files;
+  options.compression = leveldb_options.compression;
+
+  BlockBasedTableOptions table_options;
+  table_options.block_cache.reset(leveldb_options.block_cache);
+  table_options.block_size = leveldb_options.block_size;
+  table_options.block_restart_interval = leveldb_options.block_restart_interval;
+  table_options.filter_policy.reset(leveldb_options.filter_policy);
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+
+  return options;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/memory/memory_util.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/memory/memory_util.cc
new file mode 100644
index 0000000000..62042192f9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/memory/memory_util.cc
@@ -0,0 +1,50 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "rocksdb/utilities/memory_util.h"
+
+#include "db/db_impl/db_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status MemoryUtil::GetApproximateMemoryUsageByType(
+    const std::vector<DB*>& dbs,
+    const std::unordered_set<const Cache*> cache_set,
+    std::map<MemoryUtil::UsageType, uint64_t>* usage_by_type) {
+  usage_by_type->clear();
+
+  // MemTable
+  for (auto* db : dbs) {
+    uint64_t usage = 0;
+    if (db->GetAggregatedIntProperty(DB::Properties::kSizeAllMemTables,
+                                     &usage)) {
+      (*usage_by_type)[MemoryUtil::kMemTableTotal] += usage;
+    }
+    if (db->GetAggregatedIntProperty(DB::Properties::kCurSizeAllMemTables,
+                                     &usage)) {
+      (*usage_by_type)[MemoryUtil::kMemTableUnFlushed] += usage;
+    }
+  }
+
+  // Table Readers
+  for (auto* db : dbs) {
+    uint64_t usage = 0;
+    if (db->GetAggregatedIntProperty(DB::Properties::kEstimateTableReadersMem,
+                                     &usage)) {
+      (*usage_by_type)[MemoryUtil::kTableReadersTotal] += usage;
+    }
+  }
+
+  // Cache
+  for (const auto* cache : cache_set) {
+    if (cache != nullptr) {
+      (*usage_by_type)[MemoryUtil::kCacheTotal] += cache->GetUsage();
+    }
+  }
+
+  return Status::OK();
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/memory_allocators.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/memory_allocators.h
new file mode 100644
index 0000000000..bdc2e13a94
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/memory_allocators.h
@@ -0,0 +1,103 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include "rocksdb/memory_allocator.h"
+
+namespace ROCKSDB_NAMESPACE {
+// A memory allocator using new/delete
+class DefaultMemoryAllocator : public MemoryAllocator {
+ public:
+  static const char* kClassName() { return "DefaultMemoryAllocator"; }
+  const char* Name() const override { return kClassName(); }
+  void* Allocate(size_t size) override {
+    return static_cast<void*>(new char[size]);
+  }
+
+  void Deallocate(void* p) override { delete[] static_cast<char*>(p); }
+};
+
+// Base class for a MemoryAllocator.  This implementation does nothing
+// and implements the methods in failuse mode (assert if the methods are
+// invoked). Implementations can extend this class and override these methods
+// when they are enabled via compiler switches (e.g., the
+// JeMallocMemoryAllocator can define these methods if ROCKSDB_JEMALLOC is
+// defined at compile time.  If compiled in "disabled" mode, this class provides
+// default/failure implementations.  If compiled in "enabled" mode, the derived
+// class needs to provide the appopriate "enabled" methods for the "real"
+// implementation. Failure of the "real" implementation to implement ovreride
+// any of these methods will result in an assert failure.
+class BaseMemoryAllocator : public MemoryAllocator {
+ public:
+  void* Allocate(size_t /*size*/) override {
+    assert(false);
+    return nullptr;
+  }
+
+  void Deallocate(void* /*p*/) override { assert(false); }
+};
+
+// A Wrapped MemoryAllocator.  Delegates the memory allcator functions to the
+// wrapped one.
+class MemoryAllocatorWrapper : public MemoryAllocator {
+ public:
+  // Initialize an MemoryAllocatorWrapper that delegates all calls to *t
+  explicit MemoryAllocatorWrapper(const std::shared_ptr<MemoryAllocator>& t);
+  ~MemoryAllocatorWrapper() override {}
+
+  // Return the target to which to forward all calls
+  MemoryAllocator* target() const { return target_.get(); }
+  // Allocate a block of at least size. Has to be thread-safe.
+  void* Allocate(size_t size) override { return target_->Allocate(size); }
+
+  // Deallocate previously allocated block. Has to be thread-safe.
+  void Deallocate(void* p) override { return target_->Deallocate(p); }
+
+  // Returns the memory size of the block allocated at p. The default
+  // implementation that just returns the original allocation_size is fine.
+  size_t UsableSize(void* p, size_t allocation_size) const override {
+    return target_->UsableSize(p, allocation_size);
+  }
+
+  const Customizable* Inner() const override { return target_.get(); }
+
+ protected:
+  std::shared_ptr<MemoryAllocator> target_;
+};
+
+// A memory allocator that counts the number of allocations and deallocations
+// This class is useful if the number of memory allocations/dellocations is
+// important.
+class CountedMemoryAllocator : public MemoryAllocatorWrapper {
+ public:
+  CountedMemoryAllocator()
+      : MemoryAllocatorWrapper(std::make_shared<DefaultMemoryAllocator>()),
+        allocations_(0),
+        deallocations_(0) {}
+
+  explicit CountedMemoryAllocator(const std::shared_ptr<MemoryAllocator>& t)
+      : MemoryAllocatorWrapper(t), allocations_(0), deallocations_(0) {}
+  static const char* kClassName() { return "CountedMemoryAllocator"; }
+  const char* Name() const override { return kClassName(); }
+  std::string GetId() const override { return std::string(Name()); }
+  void* Allocate(size_t size) override {
+    allocations_++;
+    return MemoryAllocatorWrapper::Allocate(size);
+  }
+
+  void Deallocate(void* p) override {
+    deallocations_++;
+    MemoryAllocatorWrapper::Deallocate(p);
+  }
+  uint64_t GetNumAllocations() const { return allocations_; }
+  uint64_t GetNumDeallocations() const { return deallocations_; }
+
+ private:
+  std::atomic<uint64_t> allocations_;
+  std::atomic<uint64_t> deallocations_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators.cc
new file mode 100644
index 0000000000..020064d092
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators.cc
@@ -0,0 +1,115 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "utilities/merge_operators.h"
+
+#include <memory>
+
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "utilities/merge_operators/bytesxor.h"
+#include "utilities/merge_operators/max_operator.h"
+#include "utilities/merge_operators/put_operator.h"
+#include "utilities/merge_operators/sortlist.h"
+#include "utilities/merge_operators/string_append/stringappend.h"
+#include "utilities/merge_operators/string_append/stringappend2.h"
+#include "utilities/merge_operators/uint64add.h"
+
+namespace ROCKSDB_NAMESPACE {
+static int RegisterBuiltinMergeOperators(ObjectLibrary& library,
+                                         const std::string& /*arg*/) {
+  size_t num_types;
+  library.AddFactory<MergeOperator>(
+      ObjectLibrary::PatternEntry(StringAppendOperator::kClassName())
+          .AnotherName(StringAppendOperator::kNickName()),
+      [](const std::string& /*uri*/, std::unique_ptr<MergeOperator>* guard,
+         std::string* /*errmsg*/) {
+        guard->reset(new StringAppendOperator(","));
+        return guard->get();
+      });
+  library.AddFactory<MergeOperator>(
+      ObjectLibrary::PatternEntry(StringAppendTESTOperator::kClassName())
+          .AnotherName(StringAppendTESTOperator::kNickName()),
+      [](const std::string& /*uri*/, std::unique_ptr<MergeOperator>* guard,
+         std::string* /*errmsg*/) {
+        guard->reset(new StringAppendTESTOperator(","));
+        return guard->get();
+      });
+  library.AddFactory<MergeOperator>(
+      ObjectLibrary::PatternEntry(SortList::kClassName())
+          .AnotherName(SortList::kNickName()),
+      [](const std::string& /*uri*/, std::unique_ptr<MergeOperator>* guard,
+         std::string* /*errmsg*/) {
+        guard->reset(new SortList());
+        return guard->get();
+      });
+  library.AddFactory<MergeOperator>(
+      ObjectLibrary::PatternEntry(BytesXOROperator::kClassName())
+          .AnotherName(BytesXOROperator::kNickName()),
+      [](const std::string& /*uri*/, std::unique_ptr<MergeOperator>* guard,
+         std::string* /*errmsg*/) {
+        guard->reset(new BytesXOROperator());
+        return guard->get();
+      });
+  library.AddFactory<MergeOperator>(
+      ObjectLibrary::PatternEntry(UInt64AddOperator::kClassName())
+          .AnotherName(UInt64AddOperator::kNickName()),
+      [](const std::string& /*uri*/, std::unique_ptr<MergeOperator>* guard,
+         std::string* /*errmsg*/) {
+        guard->reset(new UInt64AddOperator());
+        return guard->get();
+      });
+  library.AddFactory<MergeOperator>(
+      ObjectLibrary::PatternEntry(MaxOperator::kClassName())
+          .AnotherName(MaxOperator::kNickName()),
+      [](const std::string& /*uri*/, std::unique_ptr<MergeOperator>* guard,
+         std::string* /*errmsg*/) {
+        guard->reset(new MaxOperator());
+        return guard->get();
+      });
+  library.AddFactory<MergeOperator>(
+      ObjectLibrary::PatternEntry(PutOperatorV2::kClassName())
+          .AnotherName(PutOperatorV2::kNickName()),
+      [](const std::string& /*uri*/, std::unique_ptr<MergeOperator>* guard,
+         std::string* /*errmsg*/) {
+        guard->reset(new PutOperatorV2());
+        return guard->get();
+      });
+  library.AddFactory<MergeOperator>(
+      ObjectLibrary::PatternEntry(PutOperator::kNickName()),
+      [](const std::string& /*uri*/, std::unique_ptr<MergeOperator>* guard,
+         std::string* /*errmsg*/) {
+        guard->reset(new PutOperator());
+        return guard->get();
+      });
+
+  return static_cast<int>(library.GetFactoryCount(&num_types));
+}
+
+Status MergeOperator::CreateFromString(const ConfigOptions& config_options,
+                                       const std::string& value,
+                                       std::shared_ptr<MergeOperator>* result) {
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    RegisterBuiltinMergeOperators(*(ObjectLibrary::Default().get()), "");
+  });
+  return LoadSharedObject<MergeOperator>(config_options, value, result);
+}
+
+std::shared_ptr<MergeOperator> MergeOperators::CreateFromStringId(
+    const std::string& id) {
+  std::shared_ptr<MergeOperator> result;
+  Status s = MergeOperator::CreateFromString(ConfigOptions(), id, &result);
+  if (s.ok()) {
+    return result;
+  } else {
+    // Empty or unknown, just return nullptr
+    return nullptr;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators.h
new file mode 100644
index 0000000000..9b90107e39
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators.h
@@ -0,0 +1,36 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+#include <stdio.h>
+
+#include <memory>
+#include <string>
+
+#include "rocksdb/merge_operator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class MergeOperators {
+ public:
+  static std::shared_ptr<MergeOperator> CreatePutOperator();
+  static std::shared_ptr<MergeOperator> CreateDeprecatedPutOperator();
+  static std::shared_ptr<MergeOperator> CreateUInt64AddOperator();
+  static std::shared_ptr<MergeOperator> CreateStringAppendOperator();
+  static std::shared_ptr<MergeOperator> CreateStringAppendOperator(
+      char delim_char);
+  static std::shared_ptr<MergeOperator> CreateStringAppendOperator(
+      const std::string& delim);
+  static std::shared_ptr<MergeOperator> CreateStringAppendTESTOperator();
+  static std::shared_ptr<MergeOperator> CreateMaxOperator();
+  static std::shared_ptr<MergeOperator> CreateBytesXOROperator();
+  static std::shared_ptr<MergeOperator> CreateSortOperator();
+
+  // Will return a different merge operator depending on the string.
+  static std::shared_ptr<MergeOperator> CreateFromStringId(
+      const std::string& name);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/bytesxor.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/bytesxor.cc
new file mode 100644
index 0000000000..fa09c18ea9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/bytesxor.cc
@@ -0,0 +1,57 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "utilities/merge_operators/bytesxor.h"
+
+#include <algorithm>
+#include <string>
+
+namespace ROCKSDB_NAMESPACE {
+
+std::shared_ptr<MergeOperator> MergeOperators::CreateBytesXOROperator() {
+  return std::make_shared<BytesXOROperator>();
+}
+
+bool BytesXOROperator::Merge(const Slice& /*key*/, const Slice* existing_value,
+                             const Slice& value, std::string* new_value,
+                             Logger* /*logger*/) const {
+  XOR(existing_value, value, new_value);
+  return true;
+}
+
+void BytesXOROperator::XOR(const Slice* existing_value, const Slice& value,
+                           std::string* new_value) const {
+  if (!existing_value) {
+    new_value->clear();
+    new_value->assign(value.data(), value.size());
+    return;
+  }
+
+  size_t min_size = std::min(existing_value->size(), value.size());
+  size_t max_size = std::max(existing_value->size(), value.size());
+
+  new_value->clear();
+  new_value->reserve(max_size);
+
+  const char* existing_value_data = existing_value->data();
+  const char* value_data = value.data();
+
+  for (size_t i = 0; i < min_size; i++) {
+    new_value->push_back(existing_value_data[i] ^ value_data[i]);
+  }
+
+  if (existing_value->size() == max_size) {
+    for (size_t i = min_size; i < max_size; i++) {
+      new_value->push_back(existing_value_data[i]);
+    }
+  } else {
+    assert(value.size() == max_size);
+    for (size_t i = min_size; i < max_size; i++) {
+      new_value->push_back(value_data[i]);
+    }
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/bytesxor.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/bytesxor.h
new file mode 100644
index 0000000000..3c7baaccec
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/bytesxor.h
@@ -0,0 +1,40 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include <string>
+
+#include "rocksdb/env.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+#include "util/coding.h"
+#include "utilities/merge_operators.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A 'model' merge operator that XORs two (same sized) array of bytes.
+// Implemented as an AssociativeMergeOperator for simplicity and example.
+class BytesXOROperator : public AssociativeMergeOperator {
+ public:
+  // XORs the two array of bytes one byte at a time and stores the result
+  // in new_value. len is the number of xored bytes, and the length of new_value
+  virtual bool Merge(const Slice& key, const Slice* existing_value,
+                     const Slice& value, std::string* new_value,
+                     Logger* logger) const override;
+
+  static const char* kClassName() { return "BytesXOR"; }
+  static const char* kNickName() { return "bytesxor"; }
+
+  const char* NickName() const override { return kNickName(); }
+  const char* Name() const override { return kClassName(); }
+
+  void XOR(const Slice* existing_value, const Slice& value,
+           std::string* new_value) const;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/max.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/max.cc
new file mode 100644
index 0000000000..ff854cca34
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/max.cc
@@ -0,0 +1,64 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include <memory>
+
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+#include "utilities/merge_operators.h"
+#include "utilities/merge_operators/max_operator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+bool MaxOperator::FullMergeV2(const MergeOperationInput& merge_in,
+                              MergeOperationOutput* merge_out) const {
+  Slice& max = merge_out->existing_operand;
+  if (merge_in.existing_value) {
+    max =
+        Slice(merge_in.existing_value->data(), merge_in.existing_value->size());
+  } else if (max.data() == nullptr) {
+    max = Slice();
+  }
+
+  for (const auto& op : merge_in.operand_list) {
+    if (max.compare(op) < 0) {
+      max = op;
+    }
+  }
+
+  return true;
+}
+
+bool MaxOperator::PartialMerge(const Slice& /*key*/, const Slice& left_operand,
+                               const Slice& right_operand,
+                               std::string* new_value,
+                               Logger* /*logger*/) const {
+  if (left_operand.compare(right_operand) >= 0) {
+    new_value->assign(left_operand.data(), left_operand.size());
+  } else {
+    new_value->assign(right_operand.data(), right_operand.size());
+  }
+  return true;
+}
+
+bool MaxOperator::PartialMergeMulti(const Slice& /*key*/,
+                                    const std::deque<Slice>& operand_list,
+                                    std::string* new_value,
+                                    Logger* /*logger*/) const {
+  Slice max;
+  for (const auto& operand : operand_list) {
+    if (max.compare(operand) < 0) {
+      max = operand;
+    }
+  }
+
+  new_value->assign(max.data(), max.size());
+  return true;
+}
+
+std::shared_ptr<MergeOperator> MergeOperators::CreateMaxOperator() {
+  return std::make_shared<MaxOperator>();
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/max_operator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/max_operator.h
new file mode 100644
index 0000000000..4c8e98db4b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/max_operator.h
@@ -0,0 +1,35 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Merge operator that picks the maximum operand, Comparison is based on
+// Slice::compare
+
+#pragma once
+
+#include "rocksdb/merge_operator.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Logger;
+class Slice;
+
+class MaxOperator : public MergeOperator {
+ public:
+  static const char* kClassName() { return "MaxOperator"; }
+  static const char* kNickName() { return "max"; }
+  const char* Name() const override { return kClassName(); }
+  const char* NickName() const override { return kNickName(); }
+
+  bool FullMergeV2(const MergeOperationInput& merge_in,
+                   MergeOperationOutput* merge_out) const override;
+  bool PartialMerge(const Slice& /*key*/, const Slice& left_operand,
+                    const Slice& right_operand, std::string* new_value,
+                    Logger* /*logger*/) const override;
+  bool PartialMergeMulti(const Slice& /*key*/,
+                         const std::deque<Slice>& operand_list,
+                         std::string* new_value,
+                         Logger* /*logger*/) const override;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/put.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/put.cc
new file mode 100644
index 0000000000..79468c6b78
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/put.cc
@@ -0,0 +1,74 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include <memory>
+
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+#include "utilities/merge_operators.h"
+#include "utilities/merge_operators/put_operator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A merge operator that mimics Put semantics
+// Since this merge-operator will not be used in production,
+// it is implemented as a non-associative merge operator to illustrate the
+// new interface and for testing purposes. (That is, we inherit from
+// the MergeOperator class rather than the AssociativeMergeOperator
+// which would be simpler in this case).
+//
+// From the client-perspective, semantics are the same.
+bool PutOperator::FullMerge(const Slice& /*key*/,
+                            const Slice* /*existing_value*/,
+                            const std::deque<std::string>& operand_sequence,
+                            std::string* new_value, Logger* /*logger*/) const {
+  // Put basically only looks at the current/latest value
+  assert(!operand_sequence.empty());
+  assert(new_value != nullptr);
+  new_value->assign(operand_sequence.back());
+  return true;
+}
+
+bool PutOperator::PartialMerge(const Slice& /*key*/,
+                               const Slice& /*left_operand*/,
+                               const Slice& right_operand,
+                               std::string* new_value,
+                               Logger* /*logger*/) const {
+  new_value->assign(right_operand.data(), right_operand.size());
+  return true;
+}
+
+bool PutOperator::PartialMergeMulti(const Slice& /*key*/,
+                                    const std::deque<Slice>& operand_list,
+                                    std::string* new_value,
+                                    Logger* /*logger*/) const {
+  new_value->assign(operand_list.back().data(), operand_list.back().size());
+  return true;
+}
+
+bool PutOperatorV2::FullMerge(
+    const Slice& /*key*/, const Slice* /*existing_value*/,
+    const std::deque<std::string>& /*operand_sequence*/,
+    std::string* /*new_value*/, Logger* /*logger*/) const {
+  assert(false);
+  return false;
+}
+
+bool PutOperatorV2::FullMergeV2(const MergeOperationInput& merge_in,
+                                MergeOperationOutput* merge_out) const {
+  // Put basically only looks at the current/latest value
+  assert(!merge_in.operand_list.empty());
+  merge_out->existing_operand = merge_in.operand_list.back();
+  return true;
+}
+
+std::shared_ptr<MergeOperator> MergeOperators::CreateDeprecatedPutOperator() {
+  return std::make_shared<PutOperator>();
+}
+
+std::shared_ptr<MergeOperator> MergeOperators::CreatePutOperator() {
+  return std::make_shared<PutOperatorV2>();
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/put_operator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/put_operator.h
new file mode 100644
index 0000000000..7529da78e3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/put_operator.h
@@ -0,0 +1,56 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// A merge operator that mimics Put semantics
+// Since this merge-operator will not be used in production,
+// it is implemented as a non-associative merge operator to illustrate the
+// new interface and for testing purposes. (That is, we inherit from
+// the MergeOperator class rather than the AssociativeMergeOperator
+// which would be simpler in this case).
+//
+// From the client-perspective, semantics are the same.
+
+#pragma once
+
+#include "rocksdb/merge_operator.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Logger;
+class Slice;
+
+class PutOperator : public MergeOperator {
+ public:
+  static const char* kClassName() { return "PutOperator"; }
+  static const char* kNickName() { return "put_v1"; }
+  const char* Name() const override { return kClassName(); }
+  const char* NickName() const override { return kNickName(); }
+
+  bool FullMerge(const Slice& /*key*/, const Slice* /*existing_value*/,
+                 const std::deque<std::string>& operand_sequence,
+                 std::string* new_value, Logger* /*logger*/) const override;
+  bool PartialMerge(const Slice& /*key*/, const Slice& left_operand,
+                    const Slice& right_operand, std::string* new_value,
+                    Logger* /*logger*/) const override;
+  using MergeOperator::PartialMergeMulti;
+  bool PartialMergeMulti(const Slice& /*key*/,
+                         const std::deque<Slice>& operand_list,
+                         std::string* new_value,
+                         Logger* /*logger*/) const override;
+};
+
+class PutOperatorV2 : public PutOperator {
+ public:
+  static const char* kNickName() { return "put"; }
+  const char* NickName() const override { return kNickName(); }
+
+  bool FullMerge(const Slice& /*key*/, const Slice* /*existing_value*/,
+                 const std::deque<std::string>& /*operand_sequence*/,
+                 std::string* /*new_value*/, Logger* /*logger*/) const override;
+
+  bool FullMergeV2(const MergeOperationInput& merge_in,
+                   MergeOperationOutput* merge_out) const override;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/sortlist.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/sortlist.cc
new file mode 100644
index 0000000000..67bfc7e5ea
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/sortlist.cc
@@ -0,0 +1,95 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#include "utilities/merge_operators/sortlist.h"
+
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+#include "utilities/merge_operators.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+bool SortList::FullMergeV2(const MergeOperationInput& merge_in,
+                           MergeOperationOutput* merge_out) const {
+  std::vector<int> left;
+  for (Slice slice : merge_in.operand_list) {
+    std::vector<int> right;
+    MakeVector(right, slice);
+    left = Merge(left, right);
+  }
+  for (int i = 0; i < static_cast<int>(left.size()) - 1; i++) {
+    merge_out->new_value.append(std::to_string(left[i])).append(",");
+  }
+  merge_out->new_value.append(std::to_string(left.back()));
+  return true;
+}
+
+bool SortList::PartialMerge(const Slice& /*key*/, const Slice& left_operand,
+                            const Slice& right_operand, std::string* new_value,
+                            Logger* /*logger*/) const {
+  std::vector<int> left;
+  std::vector<int> right;
+  MakeVector(left, left_operand);
+  MakeVector(right, right_operand);
+  left = Merge(left, right);
+  for (int i = 0; i < static_cast<int>(left.size()) - 1; i++) {
+    new_value->append(std::to_string(left[i])).append(",");
+  }
+  new_value->append(std::to_string(left.back()));
+  return true;
+}
+
+bool SortList::PartialMergeMulti(const Slice& /*key*/,
+                                 const std::deque<Slice>& operand_list,
+                                 std::string* new_value,
+                                 Logger* /*logger*/) const {
+  (void)operand_list;
+  (void)new_value;
+  return true;
+}
+
+void SortList::MakeVector(std::vector<int>& operand, Slice slice) const {
+  do {
+    const char* begin = slice.data_;
+    while (*slice.data_ != ',' && *slice.data_) slice.data_++;
+    operand.push_back(std::stoi(std::string(begin, slice.data_)));
+  } while (0 != *slice.data_++);
+}
+
+std::vector<int> SortList::Merge(std::vector<int>& left,
+                                 std::vector<int>& right) const {
+  // Fill the resultant vector with sorted results from both vectors
+  std::vector<int> result;
+  unsigned left_it = 0, right_it = 0;
+
+  while (left_it < left.size() && right_it < right.size()) {
+    // If the left value is smaller than the right it goes next
+    // into the resultant vector
+    if (left[left_it] < right[right_it]) {
+      result.push_back(left[left_it]);
+      left_it++;
+    } else {
+      result.push_back(right[right_it]);
+      right_it++;
+    }
+  }
+
+  // Push the remaining data from both vectors onto the resultant
+  while (left_it < left.size()) {
+    result.push_back(left[left_it]);
+    left_it++;
+  }
+
+  while (right_it < right.size()) {
+    result.push_back(right[right_it]);
+    right_it++;
+  }
+
+  return result;
+}
+
+std::shared_ptr<MergeOperator> MergeOperators::CreateSortOperator() {
+  return std::make_shared<SortList>();
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/sortlist.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/sortlist.h
new file mode 100644
index 0000000000..eaa4e76fba
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/sortlist.h
@@ -0,0 +1,42 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// A MergeOperator for RocksDB that implements Merge Sort.
+// It is built using the MergeOperator interface. The operator works by taking
+// an input which contains one or more merge operands where each operand is a
+// list of sorted ints and merges them to form a large sorted list.
+#pragma once
+
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class SortList : public MergeOperator {
+ public:
+  bool FullMergeV2(const MergeOperationInput& merge_in,
+                   MergeOperationOutput* merge_out) const override;
+
+  bool PartialMerge(const Slice& /*key*/, const Slice& left_operand,
+                    const Slice& right_operand, std::string* new_value,
+                    Logger* /*logger*/) const override;
+
+  bool PartialMergeMulti(const Slice& key,
+                         const std::deque<Slice>& operand_list,
+                         std::string* new_value, Logger* logger) const override;
+
+  static const char* kClassName() { return "MergeSortOperator"; }
+  static const char* kNickName() { return "sortlist"; }
+
+  const char* Name() const override { return kClassName(); }
+  const char* NickName() const override { return kNickName(); }
+
+  void MakeVector(std::vector<int>& operand, Slice slice) const;
+
+ private:
+  std::vector<int> Merge(std::vector<int>& left, std::vector<int>& right) const;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend.cc
new file mode 100644
index 0000000000..720dc7200a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend.cc
@@ -0,0 +1,76 @@
+/**
+ * A MergeOperator for rocksdb that implements string append.
+ * @author Deon Nicholas (dnicholas@fb.com)
+ * Copyright 2013 Facebook
+ */
+
+#include "stringappend.h"
+
+#include <assert.h>
+
+#include <memory>
+
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/options_type.h"
+#include "utilities/merge_operators.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+static std::unordered_map<std::string, OptionTypeInfo>
+    stringappend_merge_type_info = {
+        {"delimiter",
+         {0, OptionType::kString, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+};
+}  // namespace
+// Constructor: also specify the delimiter character.
+StringAppendOperator::StringAppendOperator(char delim_char)
+    : delim_(1, delim_char) {
+  RegisterOptions("Delimiter", &delim_, &stringappend_merge_type_info);
+}
+
+StringAppendOperator::StringAppendOperator(const std::string& delim)
+    : delim_(delim) {
+  RegisterOptions("Delimiter", &delim_, &stringappend_merge_type_info);
+}
+
+// Implementation for the merge operation (concatenates two strings)
+bool StringAppendOperator::Merge(const Slice& /*key*/,
+                                 const Slice* existing_value,
+                                 const Slice& value, std::string* new_value,
+                                 Logger* /*logger*/) const {
+  // Clear the *new_value for writing.
+  assert(new_value);
+  new_value->clear();
+
+  if (!existing_value) {
+    // No existing_value. Set *new_value = value
+    new_value->assign(value.data(), value.size());
+  } else {
+    // Generic append (existing_value != null).
+    // Reserve *new_value to correct size, and apply concatenation.
+    new_value->reserve(existing_value->size() + delim_.size() + value.size());
+    new_value->assign(existing_value->data(), existing_value->size());
+    new_value->append(delim_);
+    new_value->append(value.data(), value.size());
+  }
+
+  return true;
+}
+
+std::shared_ptr<MergeOperator> MergeOperators::CreateStringAppendOperator() {
+  return std::make_shared<StringAppendOperator>(',');
+}
+
+std::shared_ptr<MergeOperator> MergeOperators::CreateStringAppendOperator(
+    char delim_char) {
+  return std::make_shared<StringAppendOperator>(delim_char);
+}
+
+std::shared_ptr<MergeOperator> MergeOperators::CreateStringAppendOperator(
+    const std::string& delim) {
+  return std::make_shared<StringAppendOperator>(delim);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend.h
new file mode 100644
index 0000000000..153532382c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend.h
@@ -0,0 +1,32 @@
+/**
+ * A MergeOperator for rocksdb that implements string append.
+ * @author Deon Nicholas (dnicholas@fb.com)
+ * Copyright 2013 Facebook
+ */
+
+#pragma once
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class StringAppendOperator : public AssociativeMergeOperator {
+ public:
+  // Constructor: specify delimiter
+  explicit StringAppendOperator(char delim_char);
+  explicit StringAppendOperator(const std::string& delim);
+
+  virtual bool Merge(const Slice& key, const Slice* existing_value,
+                     const Slice& value, std::string* new_value,
+                     Logger* logger) const override;
+
+  static const char* kClassName() { return "StringAppendOperator"; }
+  static const char* kNickName() { return "stringappend"; }
+  virtual const char* Name() const override { return kClassName(); }
+  virtual const char* NickName() const override { return kNickName(); }
+
+ private:
+  std::string delim_;  // The delimiter is inserted between elements
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend2.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend2.cc
new file mode 100644
index 0000000000..8b6fe9020a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend2.cc
@@ -0,0 +1,130 @@
+/**
+ * @author Deon Nicholas (dnicholas@fb.com)
+ * Copyright 2013 Facebook
+ */
+
+#include "stringappend2.h"
+
+#include <assert.h>
+
+#include <memory>
+#include <string>
+
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/options_type.h"
+#include "utilities/merge_operators.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+static std::unordered_map<std::string, OptionTypeInfo>
+    stringappend2_merge_type_info = {
+        {"delimiter",
+         {0, OptionType::kString, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
+};
+}  // namespace
+
+// Constructor: also specify the delimiter character.
+StringAppendTESTOperator::StringAppendTESTOperator(char delim_char)
+    : delim_(1, delim_char) {
+  RegisterOptions("Delimiter", &delim_, &stringappend2_merge_type_info);
+}
+
+StringAppendTESTOperator::StringAppendTESTOperator(const std::string& delim)
+    : delim_(delim) {
+  RegisterOptions("Delimiter", &delim_, &stringappend2_merge_type_info);
+}
+
+// Implementation for the merge operation (concatenates two strings)
+bool StringAppendTESTOperator::FullMergeV2(
+    const MergeOperationInput& merge_in,
+    MergeOperationOutput* merge_out) const {
+  // Clear the *new_value for writing.
+  merge_out->new_value.clear();
+
+  if (merge_in.existing_value == nullptr && merge_in.operand_list.size() == 1) {
+    // Only one operand
+    merge_out->existing_operand = merge_in.operand_list.back();
+    return true;
+  }
+
+  // Compute the space needed for the final result.
+  size_t numBytes = 0;
+
+  for (auto it = merge_in.operand_list.begin();
+       it != merge_in.operand_list.end(); ++it) {
+    numBytes += it->size() + delim_.size();
+  }
+
+  // Only print the delimiter after the first entry has been printed
+  bool printDelim = false;
+
+  // Prepend the *existing_value if one exists.
+  if (merge_in.existing_value) {
+    merge_out->new_value.reserve(numBytes + merge_in.existing_value->size());
+    merge_out->new_value.append(merge_in.existing_value->data(),
+                                merge_in.existing_value->size());
+    printDelim = true;
+  } else if (numBytes) {
+    // Without the existing (initial) value, the delimiter before the first of
+    // subsequent operands becomes redundant.
+    merge_out->new_value.reserve(numBytes - delim_.size());
+  }
+
+  // Concatenate the sequence of strings (and add a delimiter between each)
+  for (auto it = merge_in.operand_list.begin();
+       it != merge_in.operand_list.end(); ++it) {
+    if (printDelim) {
+      merge_out->new_value.append(delim_);
+    }
+    merge_out->new_value.append(it->data(), it->size());
+    printDelim = true;
+  }
+
+  return true;
+}
+
+bool StringAppendTESTOperator::PartialMergeMulti(
+    const Slice& /*key*/, const std::deque<Slice>& /*operand_list*/,
+    std::string* /*new_value*/, Logger* /*logger*/) const {
+  return false;
+}
+
+// A version of PartialMerge that actually performs "partial merging".
+// Use this to simulate the exact behaviour of the StringAppendOperator.
+bool StringAppendTESTOperator::_AssocPartialMergeMulti(
+    const Slice& /*key*/, const std::deque<Slice>& operand_list,
+    std::string* new_value, Logger* /*logger*/) const {
+  // Clear the *new_value for writing
+  assert(new_value);
+  new_value->clear();
+  assert(operand_list.size() >= 2);
+
+  // Generic append
+  // Determine and reserve correct size for *new_value.
+  size_t size = 0;
+  for (const auto& operand : operand_list) {
+    size += operand.size();
+  }
+  size += (operand_list.size() - 1) * delim_.length();  // Delimiters
+  new_value->reserve(size);
+
+  // Apply concatenation
+  new_value->assign(operand_list.front().data(), operand_list.front().size());
+
+  for (std::deque<Slice>::const_iterator it = operand_list.begin() + 1;
+       it != operand_list.end(); ++it) {
+    new_value->append(delim_);
+    new_value->append(it->data(), it->size());
+  }
+
+  return true;
+}
+
+std::shared_ptr<MergeOperator>
+MergeOperators::CreateStringAppendTESTOperator() {
+  return std::make_shared<StringAppendTESTOperator>(',');
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend2.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend2.h
new file mode 100644
index 0000000000..75389e4ae8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/string_append/stringappend2.h
@@ -0,0 +1,52 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+/**
+ * A TEST MergeOperator for rocksdb that implements string append.
+ * It is built using the MergeOperator interface rather than the simpler
+ * AssociativeMergeOperator interface. This is useful for testing/benchmarking.
+ * While the two operators are semantically the same, all production code
+ * should use the StringAppendOperator defined in stringappend.{h,cc}. The
+ * operator defined in the present file is primarily for testing.
+ *
+ * @author Deon Nicholas (dnicholas@fb.com)
+ * Copyright 2013 Facebook
+ */
+
+#pragma once
+#include <deque>
+#include <string>
+
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class StringAppendTESTOperator : public MergeOperator {
+ public:
+  // Constructor with delimiter
+  explicit StringAppendTESTOperator(char delim_char);
+  explicit StringAppendTESTOperator(const std::string& delim);
+
+  virtual bool FullMergeV2(const MergeOperationInput& merge_in,
+                           MergeOperationOutput* merge_out) const override;
+
+  virtual bool PartialMergeMulti(const Slice& key,
+                                 const std::deque<Slice>& operand_list,
+                                 std::string* new_value,
+                                 Logger* logger) const override;
+
+  static const char* kClassName() { return "StringAppendTESTOperator"; }
+  static const char* kNickName() { return "stringappendtest"; }
+  const char* Name() const override { return kClassName(); }
+  const char* NickName() const override { return kNickName(); }
+
+ private:
+  // A version of PartialMerge that actually performs "partial merging".
+  // Use this to simulate the exact behaviour of the StringAppendOperator.
+  bool _AssocPartialMergeMulti(const Slice& key,
+                               const std::deque<Slice>& operand_list,
+                               std::string* new_value, Logger* logger) const;
+
+  std::string delim_;  // The delimiter is inserted between elements
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/uint64add.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/uint64add.cc
new file mode 100644
index 0000000000..72957761b6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/uint64add.cc
@@ -0,0 +1,56 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "utilities/merge_operators/uint64add.h"
+
+#include <memory>
+
+#include "logging/logging.h"
+#include "rocksdb/env.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/slice.h"
+#include "util/coding.h"
+#include "utilities/merge_operators.h"
+
+namespace ROCKSDB_NAMESPACE {  // anonymous namespace
+
+bool UInt64AddOperator::Merge(const Slice& /*key*/, const Slice* existing_value,
+                              const Slice& value, std::string* new_value,
+                              Logger* logger) const {
+  uint64_t orig_value = 0;
+  if (existing_value) {
+    orig_value = DecodeInteger(*existing_value, logger);
+  }
+  uint64_t operand = DecodeInteger(value, logger);
+
+  assert(new_value);
+  new_value->clear();
+  PutFixed64(new_value, orig_value + operand);
+
+  return true;  // Return true always since corruption will be treated as 0
+}
+
+uint64_t UInt64AddOperator::DecodeInteger(const Slice& value,
+                                          Logger* logger) const {
+  uint64_t result = 0;
+
+  if (value.size() == sizeof(uint64_t)) {
+    result = DecodeFixed64(value.data());
+  } else if (logger != nullptr) {
+    // If value is corrupted, treat it as 0
+    ROCKS_LOG_ERROR(logger,
+                    "uint64 value corruption, size: %" ROCKSDB_PRIszt
+                    " > %" ROCKSDB_PRIszt,
+                    value.size(), sizeof(uint64_t));
+  }
+
+  return result;
+}
+
+std::shared_ptr<MergeOperator> MergeOperators::CreateUInt64AddOperator() {
+  return std::make_shared<UInt64AddOperator>();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/uint64add.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/uint64add.h
new file mode 100644
index 0000000000..7e232677a3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/merge_operators/uint64add.h
@@ -0,0 +1,35 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// A 'model' merge operator with uint64 addition semantics
+// Implemented as an AssociativeMergeOperator for simplicity and example.
+
+#pragma once
+
+#include "rocksdb/merge_operator.h"
+#include "utilities/merge_operators.h"
+
+namespace ROCKSDB_NAMESPACE {
+class Logger;
+class Slice;
+
+class UInt64AddOperator : public AssociativeMergeOperator {
+ public:
+  static const char* kClassName() { return "UInt64AddOperator"; }
+  static const char* kNickName() { return "uint64add"; }
+  const char* Name() const override { return kClassName(); }
+  const char* NickName() const override { return kNickName(); }
+
+  bool Merge(const Slice& /*key*/, const Slice* existing_value,
+             const Slice& value, std::string* new_value,
+             Logger* logger) const override;
+
+ private:
+  // Takes the string and decodes it into a uint64_t
+  // On error, prints a message and returns 0
+  uint64_t DecodeInteger(const Slice& value, Logger* logger) const;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/object_registry.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/object_registry.cc
new file mode 100644
index 0000000000..786f2ee2e4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/object_registry.cc
@@ -0,0 +1,381 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/utilities/object_registry.h"
+
+#include <ctype.h>
+
+#include "logging/logging.h"
+#include "port/lang.h"
+#include "rocksdb/customizable.h"
+#include "rocksdb/env.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+bool MatchesInteger(const std::string &target, size_t start, size_t pos) {
+  // If it is numeric, everything up to the match must be a number
+  int digits = 0;
+  if (target[start] == '-') {
+    start++;  // Allow negative numbers
+  }
+  while (start < pos) {
+    if (!isdigit(target[start++])) {
+      return false;
+    } else {
+      digits++;
+    }
+  }
+  return (digits > 0);
+}
+
+bool MatchesDecimal(const std::string &target, size_t start, size_t pos) {
+  int digits = 0;
+  if (target[start] == '-') {
+    start++;  // Allow negative numbers
+  }
+  for (bool point = false; start < pos; start++) {
+    if (target[start] == '.') {
+      if (point) {
+        return false;
+      } else {
+        point = true;
+      }
+    } else if (!isdigit(target[start])) {
+      return false;
+    } else {
+      digits++;
+    }
+  }
+  return (digits > 0);
+}
+}  // namespace
+
+size_t ObjectLibrary::PatternEntry::MatchSeparatorAt(
+    size_t start, Quantifier mode, const std::string &target, size_t tlen,
+    const std::string &separator) const {
+  size_t slen = separator.size();
+  // See if there is enough space.  If so, find the separator
+  if (tlen < start + slen) {
+    return std::string::npos;  // not enough space left
+  } else if (mode == kMatchExact) {
+    // Exact mode means the next thing we are looking for is the separator
+    if (target.compare(start, slen, separator) != 0) {
+      return std::string::npos;
+    } else {
+      return start + slen;  // Found the separator, return where we found it
+    }
+  } else {
+    auto pos = start + 1;
+    if (!separator.empty()) {
+      pos = target.find(separator, pos);
+    }
+    if (pos == std::string::npos) {
+      return pos;
+    } else if (mode == kMatchInteger) {
+      if (!MatchesInteger(target, start, pos)) {
+        return std::string::npos;
+      }
+    } else if (mode == kMatchDecimal) {
+      if (!MatchesDecimal(target, start, pos)) {
+        return std::string::npos;
+      }
+    }
+    return pos + slen;
+  }
+}
+
+bool ObjectLibrary::PatternEntry::MatchesTarget(const std::string &name,
+                                                size_t nlen,
+                                                const std::string &target,
+                                                size_t tlen) const {
+  if (separators_.empty()) {
+    assert(optional_);  // If there are no separators, it must be only a name
+    return nlen == tlen && name == target;
+  } else if (nlen == tlen) {  // The lengths are the same
+    return optional_ && name == target;
+  } else if (tlen < nlen + slength_) {
+    // The target is not long enough
+    return false;
+  } else if (target.compare(0, nlen, name) != 0) {
+    return false;  // Target does not start with name
+  } else {
+    // Loop through all of the separators one at a time matching them.
+    // Note that we first match the separator and then its quantifiers.
+    // Since we expect the separator first, we start with an exact match
+    // Subsequent matches will use the quantifier of the previous separator
+    size_t start = nlen;
+    auto mode = kMatchExact;
+    for (size_t idx = 0; idx < separators_.size(); ++idx) {
+      const auto &separator = separators_[idx];
+      start = MatchSeparatorAt(start, mode, target, tlen, separator.first);
+      if (start == std::string::npos) {
+        return false;
+      } else {
+        mode = separator.second;
+      }
+    }
+    // We have matched all of the separators.  Now check that what is left
+    // unmatched in the target is acceptable.
+    if (mode == kMatchExact) {
+      return (start == tlen);
+    } else if (start > tlen || (start == tlen && mode != kMatchZeroOrMore)) {
+      return false;
+    } else if (mode == kMatchInteger) {
+      return MatchesInteger(target, start, tlen);
+    } else if (mode == kMatchDecimal) {
+      return MatchesDecimal(target, start, tlen);
+    }
+  }
+  return true;
+}
+
+bool ObjectLibrary::PatternEntry::Matches(const std::string &target) const {
+  auto tlen = target.size();
+  if (MatchesTarget(name_, nlength_, target, tlen)) {
+    return true;
+  } else if (!names_.empty()) {
+    for (const auto &alt : names_) {
+      if (MatchesTarget(alt, alt.size(), target, tlen)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+size_t ObjectLibrary::GetFactoryCount(size_t *types) const {
+  std::unique_lock<std::mutex> lock(mu_);
+  *types = factories_.size();
+  size_t factories = 0;
+  for (const auto &e : factories_) {
+    factories += e.second.size();
+  }
+  return factories;
+}
+
+size_t ObjectLibrary::GetFactoryCount(const std::string &type) const {
+  std::unique_lock<std::mutex> lock(mu_);
+  auto iter = factories_.find(type);
+  if (iter != factories_.end()) {
+    return iter->second.size();
+  } else {
+    return 0;
+  }
+}
+
+void ObjectLibrary::GetFactoryNames(const std::string &type,
+                                    std::vector<std::string> *names) const {
+  assert(names);
+  std::unique_lock<std::mutex> lock(mu_);
+  auto iter = factories_.find(type);
+  if (iter != factories_.end()) {
+    for (const auto &f : iter->second) {
+      names->push_back(f->Name());
+    }
+  }
+}
+
+void ObjectLibrary::GetFactoryTypes(
+    std::unordered_set<std::string> *types) const {
+  assert(types);
+  std::unique_lock<std::mutex> lock(mu_);
+  for (const auto &iter : factories_) {
+    types->insert(iter.first);
+  }
+}
+
+void ObjectLibrary::Dump(Logger *logger) const {
+  std::unique_lock<std::mutex> lock(mu_);
+  if (logger != nullptr && !factories_.empty()) {
+    ROCKS_LOG_HEADER(logger, "    Registered Library: %s\n", id_.c_str());
+    for (const auto &iter : factories_) {
+      ROCKS_LOG_HEADER(logger, "    Registered factories for type[%s] ",
+                       iter.first.c_str());
+      bool printed_one = false;
+      for (const auto &e : iter.second) {
+        ROCKS_LOG_HEADER(logger, "%c %s", (printed_one) ? ',' : ':', e->Name());
+        printed_one = true;
+      }
+    }
+  }
+}
+
+// Returns the Default singleton instance of the ObjectLibrary
+// This instance will contain most of the "standard" registered objects
+std::shared_ptr<ObjectLibrary> &ObjectLibrary::Default() {
+  // Use avoid destruction here so the default ObjectLibrary will not be
+  // statically destroyed and long-lived.
+  STATIC_AVOID_DESTRUCTION(std::shared_ptr<ObjectLibrary>, instance)
+  (std::make_shared<ObjectLibrary>("default"));
+  return instance;
+}
+
+ObjectRegistry::ObjectRegistry(const std::shared_ptr<ObjectLibrary> &library) {
+  libraries_.push_back(library);
+  for (const auto &b : builtins_) {
+    RegisterPlugin(b.first, b.second);
+  }
+}
+
+std::shared_ptr<ObjectRegistry> ObjectRegistry::Default() {
+  // Use avoid destruction here so the default ObjectRegistry will not be
+  // statically destroyed and long-lived.
+  STATIC_AVOID_DESTRUCTION(std::shared_ptr<ObjectRegistry>, instance)
+  (std::make_shared<ObjectRegistry>(ObjectLibrary::Default()));
+  return instance;
+}
+
+std::shared_ptr<ObjectRegistry> ObjectRegistry::NewInstance() {
+  return std::make_shared<ObjectRegistry>(Default());
+}
+
+std::shared_ptr<ObjectRegistry> ObjectRegistry::NewInstance(
+    const std::shared_ptr<ObjectRegistry> &parent) {
+  return std::make_shared<ObjectRegistry>(parent);
+}
+
+Status ObjectRegistry::SetManagedObject(
+    const std::string &type, const std::string &id,
+    const std::shared_ptr<Customizable> &object) {
+  std::string object_key = ToManagedObjectKey(type, id);
+  std::shared_ptr<Customizable> curr;
+  if (parent_ != nullptr) {
+    curr = parent_->GetManagedObject(type, id);
+  }
+  if (curr == nullptr) {
+    // We did not find the object in any parent.  Update in the current
+    std::unique_lock<std::mutex> lock(objects_mutex_);
+    auto iter = managed_objects_.find(object_key);
+    if (iter != managed_objects_.end()) {  // The object exists
+      curr = iter->second.lock();
+      if (curr != nullptr && curr != object) {
+        return Status::InvalidArgument("Object already exists: ", object_key);
+      } else {
+        iter->second = object;
+      }
+    } else {
+      // The object does not exist.  Add it
+      managed_objects_[object_key] = object;
+    }
+  } else if (curr != object) {
+    return Status::InvalidArgument("Object already exists: ", object_key);
+  }
+  return Status::OK();
+}
+
+std::shared_ptr<Customizable> ObjectRegistry::GetManagedObject(
+    const std::string &type, const std::string &id) const {
+  {
+    std::unique_lock<std::mutex> lock(objects_mutex_);
+    auto iter = managed_objects_.find(ToManagedObjectKey(type, id));
+    if (iter != managed_objects_.end()) {
+      return iter->second.lock();
+    }
+  }
+  if (parent_ != nullptr) {
+    return parent_->GetManagedObject(type, id);
+  } else {
+    return nullptr;
+  }
+}
+
+Status ObjectRegistry::ListManagedObjects(
+    const std::string &type, const std::string &name,
+    std::vector<std::shared_ptr<Customizable>> *results) const {
+  {
+    std::string key = ToManagedObjectKey(type, name);
+    std::unique_lock<std::mutex> lock(objects_mutex_);
+    for (auto iter = managed_objects_.lower_bound(key);
+         iter != managed_objects_.end() && StartsWith(iter->first, key);
+         ++iter) {
+      auto shared = iter->second.lock();
+      if (shared != nullptr) {
+        if (name.empty() || shared->IsInstanceOf(name)) {
+          results->emplace_back(shared);
+        }
+      }
+    }
+  }
+  if (parent_ != nullptr) {
+    return parent_->ListManagedObjects(type, name, results);
+  } else {
+    return Status::OK();
+  }
+}
+
+// Returns the number of registered types for this registry.
+// If specified (not-null), types is updated to include the names of the
+// registered types.
+size_t ObjectRegistry::GetFactoryCount(const std::string &type) const {
+  size_t count = 0;
+  if (parent_ != nullptr) {
+    count = parent_->GetFactoryCount(type);
+  }
+  std::unique_lock<std::mutex> lock(library_mutex_);
+  for (const auto &library : libraries_) {
+    count += library->GetFactoryCount(type);
+  }
+  return count;
+}
+
+void ObjectRegistry::GetFactoryNames(const std::string &type,
+                                     std::vector<std::string> *names) const {
+  assert(names);
+  names->clear();
+  if (parent_ != nullptr) {
+    parent_->GetFactoryNames(type, names);
+  }
+  std::unique_lock<std::mutex> lock(library_mutex_);
+  for (const auto &library : libraries_) {
+    library->GetFactoryNames(type, names);
+  }
+}
+
+void ObjectRegistry::GetFactoryTypes(
+    std::unordered_set<std::string> *types) const {
+  assert(types);
+  if (parent_ != nullptr) {
+    parent_->GetFactoryTypes(types);
+  }
+  std::unique_lock<std::mutex> lock(library_mutex_);
+  for (const auto &library : libraries_) {
+    library->GetFactoryTypes(types);
+  }
+}
+
+void ObjectRegistry::Dump(Logger *logger) const {
+  if (logger != nullptr) {
+    std::unique_lock<std::mutex> lock(library_mutex_);
+    if (!plugins_.empty()) {
+      ROCKS_LOG_HEADER(logger, "    Registered Plugins:");
+      bool printed_one = false;
+      for (const auto &plugin : plugins_) {
+        ROCKS_LOG_HEADER(logger, "%s%s", (printed_one) ? ", " : " ",
+                         plugin.c_str());
+        printed_one = true;
+      }
+      ROCKS_LOG_HEADER(logger, "\n");
+    }
+    for (auto iter = libraries_.crbegin(); iter != libraries_.crend(); ++iter) {
+      iter->get()->Dump(logger);
+    }
+  }
+  if (parent_ != nullptr) {
+    parent_->Dump(logger);
+  }
+}
+
+int ObjectRegistry::RegisterPlugin(const std::string &name,
+                                   const RegistrarFunc &func) {
+  if (!name.empty() && func != nullptr) {
+    plugins_.push_back(name);
+    return AddLibrary(name)->Register(func, name);
+  } else {
+    return -1;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/option_change_migration/option_change_migration.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/option_change_migration/option_change_migration.cc
new file mode 100644
index 0000000000..ea3cf68571
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/option_change_migration/option_change_migration.cc
@@ -0,0 +1,176 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/utilities/option_change_migration.h"
+
+#include "rocksdb/db.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+// Return a version of Options `opts` that allow us to open/write into a DB
+// without triggering an automatic compaction or stalling. This is guaranteed
+// by disabling automatic compactions and using huge values for stalling
+// triggers.
+Options GetNoCompactionOptions(const Options& opts) {
+  Options ret_opts = opts;
+  ret_opts.disable_auto_compactions = true;
+  ret_opts.level0_slowdown_writes_trigger = 999999;
+  ret_opts.level0_stop_writes_trigger = 999999;
+  ret_opts.soft_pending_compaction_bytes_limit = 0;
+  ret_opts.hard_pending_compaction_bytes_limit = 0;
+  return ret_opts;
+}
+
+Status OpenDb(const Options& options, const std::string& dbname,
+              std::unique_ptr<DB>* db) {
+  db->reset();
+  DB* tmpdb;
+  Status s = DB::Open(options, dbname, &tmpdb);
+  if (s.ok()) {
+    db->reset(tmpdb);
+  }
+  return s;
+}
+
+// l0_file_size specifies size of file on L0. Files will be range partitioned
+// after a full compaction so they are likely qualified to put on L0. If
+// left as 0, the files are compacted in a single file and put to L0. Otherwise,
+// will try to compact the files as size l0_file_size.
+Status CompactToLevel(const Options& options, const std::string& dbname,
+                      int dest_level, uint64_t l0_file_size, bool need_reopen) {
+  std::unique_ptr<DB> db;
+  Options no_compact_opts = GetNoCompactionOptions(options);
+  if (dest_level == 0) {
+    if (l0_file_size == 0) {
+      // Single file.
+      l0_file_size = 999999999999999;
+    }
+    // L0 has strict sequenceID requirements to files to it. It's safer
+    // to only put one compacted file to there.
+    // This is only used for converting to universal compaction with
+    // only one level. In this case, compacting to one file is also
+    // optimal.
+    no_compact_opts.target_file_size_base = l0_file_size;
+    no_compact_opts.max_compaction_bytes = l0_file_size;
+  }
+  Status s = OpenDb(no_compact_opts, dbname, &db);
+  if (!s.ok()) {
+    return s;
+  }
+  CompactRangeOptions cro;
+  cro.change_level = true;
+  cro.target_level = dest_level;
+  if (dest_level == 0) {
+    // cannot use kForceOptimized because the compaction is expected to
+    // generate one output file
+    cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
+  }
+  s = db->CompactRange(cro, nullptr, nullptr);
+
+  if (s.ok() && need_reopen) {
+    // Need to restart DB to rewrite the manifest file.
+    // In order to open a DB with specific num_levels, the manifest file should
+    // contain no record that mentiones any level beyond num_levels. Issuing a
+    // full compaction will move all the data to a level not exceeding
+    // num_levels, but the manifest may still contain previous record mentioning
+    // a higher level. Reopening the DB will force the manifest to be rewritten
+    // so that those records will be cleared.
+    db.reset();
+    s = OpenDb(no_compact_opts, dbname, &db);
+  }
+  return s;
+}
+
+Status MigrateToUniversal(std::string dbname, const Options& old_opts,
+                          const Options& new_opts) {
+  if (old_opts.num_levels <= new_opts.num_levels ||
+      old_opts.compaction_style == CompactionStyle::kCompactionStyleFIFO) {
+    return Status::OK();
+  } else {
+    bool need_compact = false;
+    {
+      std::unique_ptr<DB> db;
+      Options opts = GetNoCompactionOptions(old_opts);
+      Status s = OpenDb(opts, dbname, &db);
+      if (!s.ok()) {
+        return s;
+      }
+      ColumnFamilyMetaData metadata;
+      db->GetColumnFamilyMetaData(&metadata);
+      if (!metadata.levels.empty() &&
+          metadata.levels.back().level >= new_opts.num_levels) {
+        need_compact = true;
+      }
+    }
+    if (need_compact) {
+      return CompactToLevel(old_opts, dbname, new_opts.num_levels - 1,
+                            /*l0_file_size=*/0, true);
+    }
+    return Status::OK();
+  }
+}
+
+Status MigrateToLevelBase(std::string dbname, const Options& old_opts,
+                          const Options& new_opts) {
+  if (!new_opts.level_compaction_dynamic_level_bytes) {
+    if (old_opts.num_levels == 1) {
+      return Status::OK();
+    }
+    // Compact everything to level 1 to guarantee it can be safely opened.
+    Options opts = old_opts;
+    opts.target_file_size_base = new_opts.target_file_size_base;
+    // Although sometimes we can open the DB with the new option without error,
+    // We still want to compact the files to avoid the LSM tree to stuck
+    // in bad shape. For example, if the user changed the level size
+    // multiplier from 4 to 8, with the same data, we will have fewer
+    // levels. Unless we issue a full comaction, the LSM tree may stuck
+    // with more levels than needed and it won't recover automatically.
+    return CompactToLevel(opts, dbname, 1, /*l0_file_size=*/0, true);
+  } else {
+    // Compact everything to the last level to guarantee it can be safely
+    // opened.
+    if (old_opts.num_levels == 1) {
+      return Status::OK();
+    } else if (new_opts.num_levels > old_opts.num_levels) {
+      // Dynamic level mode requires data to be put in the last level first.
+      return CompactToLevel(new_opts, dbname, new_opts.num_levels - 1,
+                            /*l0_file_size=*/0, false);
+    } else {
+      Options opts = old_opts;
+      opts.target_file_size_base = new_opts.target_file_size_base;
+      return CompactToLevel(opts, dbname, new_opts.num_levels - 1,
+                            /*l0_file_size=*/0, true);
+    }
+  }
+}
+}  // namespace
+
+Status OptionChangeMigration(std::string dbname, const Options& old_opts,
+                             const Options& new_opts) {
+  if (old_opts.compaction_style == CompactionStyle::kCompactionStyleFIFO) {
+    // LSM generated by FIFO compaction can be opened by any compaction.
+    return Status::OK();
+  } else if (new_opts.compaction_style ==
+             CompactionStyle::kCompactionStyleUniversal) {
+    return MigrateToUniversal(dbname, old_opts, new_opts);
+  } else if (new_opts.compaction_style ==
+             CompactionStyle::kCompactionStyleLevel) {
+    return MigrateToLevelBase(dbname, old_opts, new_opts);
+  } else if (new_opts.compaction_style ==
+             CompactionStyle::kCompactionStyleFIFO) {
+    uint64_t l0_file_size = 0;
+    if (new_opts.compaction_options_fifo.max_table_files_size > 0) {
+      // Create at least 8 files when max_table_files_size hits, so that the DB
+      // doesn't just disappear. This in fact violates the FIFO condition, but
+      // otherwise, the migrated DB is unlikley to be usable.
+      l0_file_size = new_opts.compaction_options_fifo.max_table_files_size / 8;
+    }
+    return CompactToLevel(old_opts, dbname, 0, l0_file_size, true);
+  } else {
+    return Status::NotSupported(
+        "Do not how to migrate to this compaction style");
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/options/options_util.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/options/options_util.cc
new file mode 100644
index 0000000000..394bf431f4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/options/options_util.cc
@@ -0,0 +1,117 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "rocksdb/utilities/options_util.h"
+
+#include "file/filename.h"
+#include "options/options_parser.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/options.h"
+#include "table/block_based/block_based_table_factory.h"
+
+namespace ROCKSDB_NAMESPACE {
+Status LoadOptionsFromFile(const ConfigOptions& config_options,
+                           const std::string& file_name, DBOptions* db_options,
+                           std::vector<ColumnFamilyDescriptor>* cf_descs,
+                           std::shared_ptr<Cache>* cache) {
+  RocksDBOptionsParser parser;
+  const auto& fs = config_options.env->GetFileSystem();
+  Status s = parser.Parse(config_options, file_name, fs.get());
+  if (!s.ok()) {
+    return s;
+  }
+  *db_options = *parser.db_opt();
+  const std::vector<std::string>& cf_names = *parser.cf_names();
+  const std::vector<ColumnFamilyOptions>& cf_opts = *parser.cf_opts();
+  cf_descs->clear();
+  for (size_t i = 0; i < cf_opts.size(); ++i) {
+    cf_descs->push_back({cf_names[i], cf_opts[i]});
+    if (cache != nullptr) {
+      TableFactory* tf = cf_opts[i].table_factory.get();
+      if (tf != nullptr) {
+        auto* opts = tf->GetOptions<BlockBasedTableOptions>();
+        if (opts != nullptr) {
+          opts->block_cache = *cache;
+        }
+      }
+    }
+  }
+  return Status::OK();
+}
+
+Status GetLatestOptionsFileName(const std::string& dbpath, Env* env,
+                                std::string* options_file_name) {
+  Status s;
+  std::string latest_file_name;
+  uint64_t latest_time_stamp = 0;
+  std::vector<std::string> file_names;
+  s = env->GetChildren(dbpath, &file_names);
+  if (s.IsNotFound()) {
+    return Status::NotFound(Status::kPathNotFound,
+                            "No options files found in the DB directory.",
+                            dbpath);
+  } else if (!s.ok()) {
+    return s;
+  }
+  for (auto& file_name : file_names) {
+    uint64_t time_stamp;
+    FileType type;
+    if (ParseFileName(file_name, &time_stamp, &type) && type == kOptionsFile) {
+      if (time_stamp > latest_time_stamp) {
+        latest_time_stamp = time_stamp;
+        latest_file_name = file_name;
+      }
+    }
+  }
+  if (latest_file_name.size() == 0) {
+    return Status::NotFound(Status::kPathNotFound,
+                            "No options files found in the DB directory.",
+                            dbpath);
+  }
+  *options_file_name = latest_file_name;
+  return Status::OK();
+}
+
+Status LoadLatestOptions(const ConfigOptions& config_options,
+                         const std::string& dbpath, DBOptions* db_options,
+                         std::vector<ColumnFamilyDescriptor>* cf_descs,
+                         std::shared_ptr<Cache>* cache) {
+  std::string options_file_name;
+  Status s =
+      GetLatestOptionsFileName(dbpath, config_options.env, &options_file_name);
+  if (!s.ok()) {
+    return s;
+  }
+  return LoadOptionsFromFile(config_options, dbpath + "/" + options_file_name,
+                             db_options, cf_descs, cache);
+}
+
+Status CheckOptionsCompatibility(
+    const ConfigOptions& config_options, const std::string& dbpath,
+    const DBOptions& db_options,
+    const std::vector<ColumnFamilyDescriptor>& cf_descs) {
+  std::string options_file_name;
+  Status s =
+      GetLatestOptionsFileName(dbpath, config_options.env, &options_file_name);
+  if (!s.ok()) {
+    return s;
+  }
+
+  std::vector<std::string> cf_names;
+  std::vector<ColumnFamilyOptions> cf_opts;
+  for (const auto& cf_desc : cf_descs) {
+    cf_names.push_back(cf_desc.name);
+    cf_opts.push_back(cf_desc.options);
+  }
+
+  const auto& fs = config_options.env->GetFileSystem();
+
+  return RocksDBOptionsParser::VerifyRocksDBOptionsFromFile(
+      config_options, db_options, cf_names, cf_opts,
+      dbpath + "/" + options_file_name, fs.get());
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier.cc
new file mode 100644
index 0000000000..3118fc2df6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier.cc
@@ -0,0 +1,420 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "utilities/persistent_cache/block_cache_tier.h"
+
+#include <utility>
+#include <vector>
+
+#include "logging/logging.h"
+#include "port/port.h"
+#include "test_util/sync_point.h"
+#include "util/stop_watch.h"
+#include "utilities/persistent_cache/block_cache_tier_file.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+//
+// BlockCacheImpl
+//
+Status BlockCacheTier::Open() {
+  Status status;
+
+  WriteLock _(&lock_);
+
+  assert(!size_);
+
+  // Check the validity of the options
+  status = opt_.ValidateSettings();
+  assert(status.ok());
+  if (!status.ok()) {
+    Error(opt_.log, "Invalid block cache options");
+    return status;
+  }
+
+  // Create base directory or cleanup existing directory
+  status = opt_.env->CreateDirIfMissing(opt_.path);
+  if (!status.ok()) {
+    Error(opt_.log, "Error creating directory %s. %s", opt_.path.c_str(),
+          status.ToString().c_str());
+    return status;
+  }
+
+  // Create base/<cache dir> directory
+  status = opt_.env->CreateDir(GetCachePath());
+  if (!status.ok()) {
+    // directory already exists, clean it up
+    status = CleanupCacheFolder(GetCachePath());
+    assert(status.ok());
+    if (!status.ok()) {
+      Error(opt_.log, "Error creating directory %s. %s", opt_.path.c_str(),
+            status.ToString().c_str());
+      return status;
+    }
+  }
+
+  // create a new file
+  assert(!cache_file_);
+  status = NewCacheFile();
+  if (!status.ok()) {
+    Error(opt_.log, "Error creating new file %s. %s", opt_.path.c_str(),
+          status.ToString().c_str());
+    return status;
+  }
+
+  assert(cache_file_);
+
+  if (opt_.pipeline_writes) {
+    assert(!insert_th_.joinable());
+    insert_th_ = port::Thread(&BlockCacheTier::InsertMain, this);
+  }
+
+  return Status::OK();
+}
+
+bool IsCacheFile(const std::string& file) {
+  // check if the file has .rc suffix
+  // Unfortunately regex support across compilers is not even, so we use simple
+  // string parsing
+  size_t pos = file.find(".");
+  if (pos == std::string::npos) {
+    return false;
+  }
+
+  std::string suffix = file.substr(pos);
+  return suffix == ".rc";
+}
+
+Status BlockCacheTier::CleanupCacheFolder(const std::string& folder) {
+  std::vector<std::string> files;
+  Status status = opt_.env->GetChildren(folder, &files);
+  if (!status.ok()) {
+    Error(opt_.log, "Error getting files for %s. %s", folder.c_str(),
+          status.ToString().c_str());
+    return status;
+  }
+
+  // cleanup files with the patter :digi:.rc
+  for (auto file : files) {
+    if (IsCacheFile(file)) {
+      // cache file
+      Info(opt_.log, "Removing file %s.", file.c_str());
+      status = opt_.env->DeleteFile(folder + "/" + file);
+      if (!status.ok()) {
+        Error(opt_.log, "Error deleting file %s. %s", file.c_str(),
+              status.ToString().c_str());
+        return status;
+      }
+    } else {
+      ROCKS_LOG_DEBUG(opt_.log, "Skipping file %s", file.c_str());
+    }
+  }
+  return Status::OK();
+}
+
+Status BlockCacheTier::Close() {
+  // stop the insert thread
+  if (opt_.pipeline_writes && insert_th_.joinable()) {
+    InsertOp op(/*quit=*/true);
+    insert_ops_.Push(std::move(op));
+    insert_th_.join();
+  }
+
+  // stop the writer before
+  writer_.Stop();
+
+  // clear all metadata
+  WriteLock _(&lock_);
+  metadata_.Clear();
+  return Status::OK();
+}
+
+template <class T>
+void Add(std::map<std::string, double>* stats, const std::string& key,
+         const T& t) {
+  stats->insert({key, static_cast<double>(t)});
+}
+
+PersistentCache::StatsType BlockCacheTier::Stats() {
+  std::map<std::string, double> stats;
+  Add(&stats, "persistentcache.blockcachetier.bytes_piplined",
+      stats_.bytes_pipelined_.Average());
+  Add(&stats, "persistentcache.blockcachetier.bytes_written",
+      stats_.bytes_written_.Average());
+  Add(&stats, "persistentcache.blockcachetier.bytes_read",
+      stats_.bytes_read_.Average());
+  Add(&stats, "persistentcache.blockcachetier.insert_dropped",
+      stats_.insert_dropped_);
+  Add(&stats, "persistentcache.blockcachetier.cache_hits", stats_.cache_hits_);
+  Add(&stats, "persistentcache.blockcachetier.cache_misses",
+      stats_.cache_misses_);
+  Add(&stats, "persistentcache.blockcachetier.cache_errors",
+      stats_.cache_errors_);
+  Add(&stats, "persistentcache.blockcachetier.cache_hits_pct",
+      stats_.CacheHitPct());
+  Add(&stats, "persistentcache.blockcachetier.cache_misses_pct",
+      stats_.CacheMissPct());
+  Add(&stats, "persistentcache.blockcachetier.read_hit_latency",
+      stats_.read_hit_latency_.Average());
+  Add(&stats, "persistentcache.blockcachetier.read_miss_latency",
+      stats_.read_miss_latency_.Average());
+  Add(&stats, "persistentcache.blockcachetier.write_latency",
+      stats_.write_latency_.Average());
+
+  auto out = PersistentCacheTier::Stats();
+  out.push_back(stats);
+  return out;
+}
+
+Status BlockCacheTier::Insert(const Slice& key, const char* data,
+                              const size_t size) {
+  // update stats
+  stats_.bytes_pipelined_.Add(size);
+
+  if (opt_.pipeline_writes) {
+    // off load the write to the write thread
+    insert_ops_.Push(
+        InsertOp(key.ToString(), std::move(std::string(data, size))));
+    return Status::OK();
+  }
+
+  assert(!opt_.pipeline_writes);
+  return InsertImpl(key, Slice(data, size));
+}
+
+void BlockCacheTier::InsertMain() {
+  while (true) {
+    InsertOp op(insert_ops_.Pop());
+
+    if (op.signal_) {
+      // that is a secret signal to exit
+      break;
+    }
+
+    size_t retry = 0;
+    Status s;
+    while ((s = InsertImpl(Slice(op.key_), Slice(op.data_))).IsTryAgain()) {
+      if (retry > kMaxRetry) {
+        break;
+      }
+
+      // this can happen when the buffers are full, we wait till some buffers
+      // are free. Why don't we wait inside the code. This is because we want
+      // to support both pipelined and non-pipelined mode
+      buffer_allocator_.WaitUntilUsable();
+      retry++;
+    }
+
+    if (!s.ok()) {
+      stats_.insert_dropped_++;
+    }
+  }
+}
+
+Status BlockCacheTier::InsertImpl(const Slice& key, const Slice& data) {
+  // pre-condition
+  assert(key.size());
+  assert(data.size());
+  assert(cache_file_);
+
+  StopWatchNano timer(opt_.clock, /*auto_start=*/true);
+
+  WriteLock _(&lock_);
+
+  LBA lba;
+  if (metadata_.Lookup(key, &lba)) {
+    // the key already exists, this is duplicate insert
+    return Status::OK();
+  }
+
+  while (!cache_file_->Append(key, data, &lba)) {
+    if (!cache_file_->Eof()) {
+      ROCKS_LOG_DEBUG(opt_.log, "Error inserting to cache file %d",
+                      cache_file_->cacheid());
+      stats_.write_latency_.Add(timer.ElapsedNanos() / 1000);
+      return Status::TryAgain();
+    }
+
+    assert(cache_file_->Eof());
+    Status status = NewCacheFile();
+    if (!status.ok()) {
+      return status;
+    }
+  }
+
+  // Insert into lookup index
+  BlockInfo* info = metadata_.Insert(key, lba);
+  assert(info);
+  if (!info) {
+    return Status::IOError("Unexpected error inserting to index");
+  }
+
+  // insert to cache file reverse mapping
+  cache_file_->Add(info);
+
+  // update stats
+  stats_.bytes_written_.Add(data.size());
+  stats_.write_latency_.Add(timer.ElapsedNanos() / 1000);
+  return Status::OK();
+}
+
+Status BlockCacheTier::Lookup(const Slice& key, std::unique_ptr<char[]>* val,
+                              size_t* size) {
+  StopWatchNano timer(opt_.clock, /*auto_start=*/true);
+
+  LBA lba;
+  bool status;
+  status = metadata_.Lookup(key, &lba);
+  if (!status) {
+    stats_.cache_misses_++;
+    stats_.read_miss_latency_.Add(timer.ElapsedNanos() / 1000);
+    return Status::NotFound("blockcache: key not found");
+  }
+
+  BlockCacheFile* const file = metadata_.Lookup(lba.cache_id_);
+  if (!file) {
+    // this can happen because the block index and cache file index are
+    // different, and the cache file might be removed between the two lookups
+    stats_.cache_misses_++;
+    stats_.read_miss_latency_.Add(timer.ElapsedNanos() / 1000);
+    return Status::NotFound("blockcache: cache file not found");
+  }
+
+  assert(file->refs_);
+
+  std::unique_ptr<char[]> scratch(new char[lba.size_]);
+  Slice blk_key;
+  Slice blk_val;
+
+  status = file->Read(lba, &blk_key, &blk_val, scratch.get());
+  --file->refs_;
+  if (!status) {
+    stats_.cache_misses_++;
+    stats_.cache_errors_++;
+    stats_.read_miss_latency_.Add(timer.ElapsedNanos() / 1000);
+    return Status::NotFound("blockcache: error reading data");
+  }
+
+  assert(blk_key == key);
+
+  val->reset(new char[blk_val.size()]);
+  memcpy(val->get(), blk_val.data(), blk_val.size());
+  *size = blk_val.size();
+
+  stats_.bytes_read_.Add(*size);
+  stats_.cache_hits_++;
+  stats_.read_hit_latency_.Add(timer.ElapsedNanos() / 1000);
+
+  return Status::OK();
+}
+
+bool BlockCacheTier::Erase(const Slice& key) {
+  WriteLock _(&lock_);
+  BlockInfo* info = metadata_.Remove(key);
+  assert(info);
+  delete info;
+  return true;
+}
+
+Status BlockCacheTier::NewCacheFile() {
+  lock_.AssertHeld();
+
+  TEST_SYNC_POINT_CALLBACK("BlockCacheTier::NewCacheFile:DeleteDir",
+                           (void*)(GetCachePath().c_str()));
+
+  std::unique_ptr<WriteableCacheFile> f(new WriteableCacheFile(
+      opt_.env, &buffer_allocator_, &writer_, GetCachePath(), writer_cache_id_,
+      opt_.cache_file_size, opt_.log));
+
+  bool status = f->Create(opt_.enable_direct_writes, opt_.enable_direct_reads);
+  if (!status) {
+    return Status::IOError("Error creating file");
+  }
+
+  Info(opt_.log, "Created cache file %d", writer_cache_id_);
+
+  writer_cache_id_++;
+  cache_file_ = f.release();
+
+  // insert to cache files tree
+  status = metadata_.Insert(cache_file_);
+  assert(status);
+  if (!status) {
+    Error(opt_.log, "Error inserting to metadata");
+    return Status::IOError("Error inserting to metadata");
+  }
+
+  return Status::OK();
+}
+
+bool BlockCacheTier::Reserve(const size_t size) {
+  WriteLock _(&lock_);
+  assert(size_ <= opt_.cache_size);
+
+  if (size + size_ <= opt_.cache_size) {
+    // there is enough space to write
+    size_ += size;
+    return true;
+  }
+
+  assert(size + size_ >= opt_.cache_size);
+  // there is not enough space to fit the requested data
+  // we can clear some space by evicting cold data
+
+  const double retain_fac = (100 - kEvictPct) / static_cast<double>(100);
+  while (size + size_ > opt_.cache_size * retain_fac) {
+    std::unique_ptr<BlockCacheFile> f(metadata_.Evict());
+    if (!f) {
+      // nothing is evictable
+      return false;
+    }
+    assert(!f->refs_);
+    uint64_t file_size;
+    if (!f->Delete(&file_size).ok()) {
+      // unable to delete file
+      return false;
+    }
+
+    assert(file_size <= size_);
+    size_ -= file_size;
+  }
+
+  size_ += size;
+  assert(size_ <= opt_.cache_size * 0.9);
+  return true;
+}
+
+Status NewPersistentCache(Env* const env, const std::string& path,
+                          const uint64_t size,
+                          const std::shared_ptr<Logger>& log,
+                          const bool optimized_for_nvm,
+                          std::shared_ptr<PersistentCache>* cache) {
+  if (!cache) {
+    return Status::IOError("invalid argument cache");
+  }
+
+  auto opt = PersistentCacheConfig(env, path, size, log);
+  if (optimized_for_nvm) {
+    // the default settings are optimized for SSD
+    // NVM devices are better accessed with 4K direct IO and written with
+    // parallelism
+    opt.enable_direct_writes = true;
+    opt.writer_qdepth = 4;
+    opt.writer_dispatch_size = 4 * 1024;
+  }
+
+  auto pcache = std::make_shared<BlockCacheTier>(opt);
+  Status s = pcache->Open();
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  *cache = pcache;
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier.h
new file mode 100644
index 0000000000..caabbef94e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier.h
@@ -0,0 +1,154 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+
+#ifndef OS_WIN
+#include <unistd.h>
+#endif  // ! OS_WIN
+
+#include <atomic>
+#include <list>
+#include <memory>
+#include <set>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <thread>
+
+#include "memory/arena.h"
+#include "memtable/skiplist.h"
+#include "monitoring/histogram.h"
+#include "port/port.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/persistent_cache.h"
+#include "rocksdb/system_clock.h"
+#include "util/coding.h"
+#include "util/crc32c.h"
+#include "util/mutexlock.h"
+#include "utilities/persistent_cache/block_cache_tier_file.h"
+#include "utilities/persistent_cache/block_cache_tier_metadata.h"
+#include "utilities/persistent_cache/persistent_cache_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+//
+// Block cache tier implementation
+//
+class BlockCacheTier : public PersistentCacheTier {
+ public:
+  explicit BlockCacheTier(const PersistentCacheConfig& opt)
+      : opt_(opt),
+        insert_ops_(static_cast<size_t>(opt_.max_write_pipeline_backlog_size)),
+        buffer_allocator_(opt.write_buffer_size, opt.write_buffer_count()),
+        writer_(this, opt_.writer_qdepth,
+                static_cast<size_t>(opt_.writer_dispatch_size)) {
+    Info(opt_.log, "Initializing allocator. size=%d B count=%" ROCKSDB_PRIszt,
+         opt_.write_buffer_size, opt_.write_buffer_count());
+  }
+
+  virtual ~BlockCacheTier() {
+    // Close is re-entrant so we can call close even if it is already closed
+    Close().PermitUncheckedError();
+    assert(!insert_th_.joinable());
+  }
+
+  Status Insert(const Slice& key, const char* data, const size_t size) override;
+  Status Lookup(const Slice& key, std::unique_ptr<char[]>* data,
+                size_t* size) override;
+  Status Open() override;
+  Status Close() override;
+  bool Erase(const Slice& key) override;
+  bool Reserve(const size_t size) override;
+
+  bool IsCompressed() override { return opt_.is_compressed; }
+
+  std::string GetPrintableOptions() const override { return opt_.ToString(); }
+
+  PersistentCache::StatsType Stats() override;
+
+  void TEST_Flush() override {
+    while (insert_ops_.Size()) {
+      /* sleep override */
+      SystemClock::Default()->SleepForMicroseconds(1000000);
+    }
+  }
+
+ private:
+  // Percentage of cache to be evicted when the cache is full
+  static const size_t kEvictPct = 10;
+  // Max attempts to insert key, value to cache in pipelined mode
+  static const size_t kMaxRetry = 3;
+
+  // Pipelined operation
+  struct InsertOp {
+    explicit InsertOp(const bool signal) : signal_(signal) {}
+    explicit InsertOp(std::string&& key, const std::string& data)
+        : key_(std::move(key)), data_(data) {}
+    ~InsertOp() {}
+
+    InsertOp() = delete;
+    InsertOp(InsertOp&& /*rhs*/) = default;
+    InsertOp& operator=(InsertOp&& rhs) = default;
+
+    // used for estimating size by bounded queue
+    size_t Size() { return data_.size() + key_.size(); }
+
+    std::string key_;
+    std::string data_;
+    bool signal_ = false;  // signal to request processing thread to exit
+  };
+
+  // entry point for insert thread
+  void InsertMain();
+  // insert implementation
+  Status InsertImpl(const Slice& key, const Slice& data);
+  // Create a new cache file
+  Status NewCacheFile();
+  // Get cache directory path
+  std::string GetCachePath() const { return opt_.path + "/cache"; }
+  // Cleanup folder
+  Status CleanupCacheFolder(const std::string& folder);
+
+  // Statistics
+  struct Statistics {
+    HistogramImpl bytes_pipelined_;
+    HistogramImpl bytes_written_;
+    HistogramImpl bytes_read_;
+    HistogramImpl read_hit_latency_;
+    HistogramImpl read_miss_latency_;
+    HistogramImpl write_latency_;
+    std::atomic<uint64_t> cache_hits_{0};
+    std::atomic<uint64_t> cache_misses_{0};
+    std::atomic<uint64_t> cache_errors_{0};
+    std::atomic<uint64_t> insert_dropped_{0};
+
+    double CacheHitPct() const {
+      const auto lookups = cache_hits_ + cache_misses_;
+      return lookups ? 100 * cache_hits_ / static_cast<double>(lookups) : 0.0;
+    }
+
+    double CacheMissPct() const {
+      const auto lookups = cache_hits_ + cache_misses_;
+      return lookups ? 100 * cache_misses_ / static_cast<double>(lookups) : 0.0;
+    }
+  };
+
+  port::RWMutex lock_;                          // Synchronization
+  const PersistentCacheConfig opt_;             // BlockCache options
+  BoundedQueue<InsertOp> insert_ops_;           // Ops waiting for insert
+  ROCKSDB_NAMESPACE::port::Thread insert_th_;   // Insert thread
+  uint32_t writer_cache_id_ = 0;                // Current cache file identifier
+  WriteableCacheFile* cache_file_ = nullptr;    // Current cache file reference
+  CacheWriteBufferAllocator buffer_allocator_;  // Buffer provider
+  ThreadedWriter writer_;                       // Writer threads
+  BlockCacheTierMetadata metadata_;             // Cache meta data manager
+  std::atomic<uint64_t> size_{0};               // Size of the cache
+  Statistics stats_;                            // Statistics
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_file.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_file.cc
new file mode 100644
index 0000000000..9a667c1fdc
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_file.cc
@@ -0,0 +1,608 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "utilities/persistent_cache/block_cache_tier_file.h"
+
+#ifndef OS_WIN
+#include <unistd.h>
+#endif
+#include <functional>
+#include <memory>
+#include <vector>
+
+#include "env/composite_env_wrapper.h"
+#include "logging/logging.h"
+#include "port/port.h"
+#include "rocksdb/system_clock.h"
+#include "util/crc32c.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+//
+// File creation factories
+//
+Status NewWritableCacheFile(Env* const env, const std::string& filepath,
+                            std::unique_ptr<WritableFile>* file,
+                            const bool use_direct_writes = false) {
+  EnvOptions opt;
+  opt.use_direct_writes = use_direct_writes;
+  Status s = env->NewWritableFile(filepath, file, opt);
+  return s;
+}
+
+Status NewRandomAccessCacheFile(const std::shared_ptr<FileSystem>& fs,
+                                const std::string& filepath,
+                                std::unique_ptr<FSRandomAccessFile>* file,
+                                const bool use_direct_reads = true) {
+  assert(fs.get());
+
+  FileOptions opt;
+  opt.use_direct_reads = use_direct_reads;
+  return fs->NewRandomAccessFile(filepath, opt, file, nullptr);
+}
+
+//
+// BlockCacheFile
+//
+Status BlockCacheFile::Delete(uint64_t* size) {
+  assert(env_);
+
+  Status status = env_->GetFileSize(Path(), size);
+  if (!status.ok()) {
+    return status;
+  }
+  return env_->DeleteFile(Path());
+}
+
+//
+// CacheRecord
+//
+// Cache record represents the record on disk
+//
+// +--------+---------+----------+------------+---------------+-------------+
+// | magic  | crc     | key size | value size | key data      | value data  |
+// +--------+---------+----------+------------+---------------+-------------+
+// <-- 4 --><-- 4  --><-- 4   --><-- 4     --><-- key size  --><-- v-size -->
+//
+struct CacheRecordHeader {
+  CacheRecordHeader() : magic_(0), crc_(0), key_size_(0), val_size_(0) {}
+  CacheRecordHeader(const uint32_t magic, const uint32_t key_size,
+                    const uint32_t val_size)
+      : magic_(magic), crc_(0), key_size_(key_size), val_size_(val_size) {}
+
+  uint32_t magic_;
+  uint32_t crc_;
+  uint32_t key_size_;
+  uint32_t val_size_;
+};
+
+struct CacheRecord {
+  CacheRecord() {}
+  CacheRecord(const Slice& key, const Slice& val)
+      : hdr_(MAGIC, static_cast<uint32_t>(key.size()),
+             static_cast<uint32_t>(val.size())),
+        key_(key),
+        val_(val) {
+    hdr_.crc_ = ComputeCRC();
+  }
+
+  uint32_t ComputeCRC() const;
+  bool Serialize(std::vector<CacheWriteBuffer*>* bufs, size_t* woff);
+  bool Deserialize(const Slice& buf);
+
+  static uint32_t CalcSize(const Slice& key, const Slice& val) {
+    return static_cast<uint32_t>(sizeof(CacheRecordHeader) + key.size() +
+                                 val.size());
+  }
+
+  static const uint32_t MAGIC = 0xfefa;
+
+  bool Append(std::vector<CacheWriteBuffer*>* bufs, size_t* woff,
+              const char* data, const size_t size);
+
+  CacheRecordHeader hdr_;
+  Slice key_;
+  Slice val_;
+};
+
+static_assert(sizeof(CacheRecordHeader) == 16, "DataHeader is not aligned");
+
+uint32_t CacheRecord::ComputeCRC() const {
+  uint32_t crc = 0;
+  CacheRecordHeader tmp = hdr_;
+  tmp.crc_ = 0;
+  crc = crc32c::Extend(crc, reinterpret_cast<const char*>(&tmp), sizeof(tmp));
+  crc = crc32c::Extend(crc, reinterpret_cast<const char*>(key_.data()),
+                       key_.size());
+  crc = crc32c::Extend(crc, reinterpret_cast<const char*>(val_.data()),
+                       val_.size());
+  return crc;
+}
+
+bool CacheRecord::Serialize(std::vector<CacheWriteBuffer*>* bufs,
+                            size_t* woff) {
+  assert(bufs->size());
+  return Append(bufs, woff, reinterpret_cast<const char*>(&hdr_),
+                sizeof(hdr_)) &&
+         Append(bufs, woff, reinterpret_cast<const char*>(key_.data()),
+                key_.size()) &&
+         Append(bufs, woff, reinterpret_cast<const char*>(val_.data()),
+                val_.size());
+}
+
+bool CacheRecord::Append(std::vector<CacheWriteBuffer*>* bufs, size_t* woff,
+                         const char* data, const size_t data_size) {
+  assert(*woff < bufs->size());
+
+  const char* p = data;
+  size_t size = data_size;
+
+  while (size && *woff < bufs->size()) {
+    CacheWriteBuffer* buf = (*bufs)[*woff];
+    const size_t free = buf->Free();
+    if (size <= free) {
+      buf->Append(p, size);
+      size = 0;
+    } else {
+      buf->Append(p, free);
+      p += free;
+      size -= free;
+      assert(!buf->Free());
+      assert(buf->Used() == buf->Capacity());
+    }
+
+    if (!buf->Free()) {
+      *woff += 1;
+    }
+  }
+
+  assert(!size);
+
+  return !size;
+}
+
+bool CacheRecord::Deserialize(const Slice& data) {
+  assert(data.size() >= sizeof(CacheRecordHeader));
+  if (data.size() < sizeof(CacheRecordHeader)) {
+    return false;
+  }
+
+  memcpy(&hdr_, data.data(), sizeof(hdr_));
+
+  assert(hdr_.key_size_ + hdr_.val_size_ + sizeof(hdr_) == data.size());
+  if (hdr_.key_size_ + hdr_.val_size_ + sizeof(hdr_) != data.size()) {
+    return false;
+  }
+
+  key_ = Slice(data.data_ + sizeof(hdr_), hdr_.key_size_);
+  val_ = Slice(key_.data_ + hdr_.key_size_, hdr_.val_size_);
+
+  if (!(hdr_.magic_ == MAGIC && ComputeCRC() == hdr_.crc_)) {
+    fprintf(stderr, "** magic %d ** \n", hdr_.magic_);
+    fprintf(stderr, "** key_size %d ** \n", hdr_.key_size_);
+    fprintf(stderr, "** val_size %d ** \n", hdr_.val_size_);
+    fprintf(stderr, "** key %s ** \n", key_.ToString().c_str());
+    fprintf(stderr, "** val %s ** \n", val_.ToString().c_str());
+    for (size_t i = 0; i < hdr_.val_size_; ++i) {
+      fprintf(stderr, "%d.", (uint8_t)val_.data()[i]);
+    }
+    fprintf(stderr, "\n** cksum %d != %d **", hdr_.crc_, ComputeCRC());
+  }
+
+  assert(hdr_.magic_ == MAGIC && ComputeCRC() == hdr_.crc_);
+  return hdr_.magic_ == MAGIC && ComputeCRC() == hdr_.crc_;
+}
+
+//
+// RandomAccessFile
+//
+
+bool RandomAccessCacheFile::Open(const bool enable_direct_reads) {
+  WriteLock _(&rwlock_);
+  return OpenImpl(enable_direct_reads);
+}
+
+bool RandomAccessCacheFile::OpenImpl(const bool enable_direct_reads) {
+  rwlock_.AssertHeld();
+
+  ROCKS_LOG_DEBUG(log_, "Opening cache file %s", Path().c_str());
+  assert(env_);
+
+  std::unique_ptr<FSRandomAccessFile> file;
+  Status status = NewRandomAccessCacheFile(env_->GetFileSystem(), Path(), &file,
+                                           enable_direct_reads);
+  if (!status.ok()) {
+    Error(log_, "Error opening random access file %s. %s", Path().c_str(),
+          status.ToString().c_str());
+    return false;
+  }
+  freader_.reset(new RandomAccessFileReader(std::move(file), Path(),
+                                            env_->GetSystemClock().get()));
+
+  return true;
+}
+
+bool RandomAccessCacheFile::Read(const LBA& lba, Slice* key, Slice* val,
+                                 char* scratch) {
+  ReadLock _(&rwlock_);
+
+  assert(lba.cache_id_ == cache_id_);
+
+  if (!freader_) {
+    return false;
+  }
+
+  Slice result;
+  Status s = freader_->Read(IOOptions(), lba.off_, lba.size_, &result, scratch,
+                            nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
+  if (!s.ok()) {
+    Error(log_, "Error reading from file %s. %s", Path().c_str(),
+          s.ToString().c_str());
+    return false;
+  }
+
+  assert(result.data() == scratch);
+
+  return ParseRec(lba, key, val, scratch);
+}
+
+bool RandomAccessCacheFile::ParseRec(const LBA& lba, Slice* key, Slice* val,
+                                     char* scratch) {
+  Slice data(scratch, lba.size_);
+
+  CacheRecord rec;
+  if (!rec.Deserialize(data)) {
+    assert(!"Error deserializing data");
+    Error(log_, "Error de-serializing record from file %s off %d",
+          Path().c_str(), lba.off_);
+    return false;
+  }
+
+  *key = Slice(rec.key_);
+  *val = Slice(rec.val_);
+
+  return true;
+}
+
+//
+// WriteableCacheFile
+//
+
+WriteableCacheFile::~WriteableCacheFile() {
+  WriteLock _(&rwlock_);
+  if (!eof_) {
+    // This file never flushed. We give priority to shutdown since this is a
+    // cache
+    // TODO(krad): Figure a way to flush the pending data
+    if (file_) {
+      assert(refs_ == 1);
+      --refs_;
+    }
+  }
+  assert(!refs_);
+  ClearBuffers();
+}
+
+bool WriteableCacheFile::Create(const bool /*enable_direct_writes*/,
+                                const bool enable_direct_reads) {
+  WriteLock _(&rwlock_);
+
+  enable_direct_reads_ = enable_direct_reads;
+
+  ROCKS_LOG_DEBUG(log_, "Creating new cache %s (max size is %d B)",
+                  Path().c_str(), max_size_);
+
+  assert(env_);
+
+  Status s = env_->FileExists(Path());
+  if (s.ok()) {
+    ROCKS_LOG_WARN(log_, "File %s already exists. %s", Path().c_str(),
+                   s.ToString().c_str());
+  }
+
+  s = NewWritableCacheFile(env_, Path(), &file_);
+  if (!s.ok()) {
+    ROCKS_LOG_WARN(log_, "Unable to create file %s. %s", Path().c_str(),
+                   s.ToString().c_str());
+    return false;
+  }
+
+  assert(!refs_);
+  ++refs_;
+
+  return true;
+}
+
+bool WriteableCacheFile::Append(const Slice& key, const Slice& val, LBA* lba) {
+  WriteLock _(&rwlock_);
+
+  if (eof_) {
+    // We can't append since the file is full
+    return false;
+  }
+
+  // estimate the space required to store the (key, val)
+  uint32_t rec_size = CacheRecord::CalcSize(key, val);
+
+  if (!ExpandBuffer(rec_size)) {
+    // unable to expand the buffer
+    ROCKS_LOG_DEBUG(log_, "Error expanding buffers. size=%d", rec_size);
+    return false;
+  }
+
+  lba->cache_id_ = cache_id_;
+  lba->off_ = disk_woff_;
+  lba->size_ = rec_size;
+
+  CacheRecord rec(key, val);
+  if (!rec.Serialize(&bufs_, &buf_woff_)) {
+    // unexpected error: unable to serialize the data
+    assert(!"Error serializing record");
+    return false;
+  }
+
+  disk_woff_ += rec_size;
+  eof_ = disk_woff_ >= max_size_;
+
+  // dispatch buffer for flush
+  DispatchBuffer();
+
+  return true;
+}
+
+bool WriteableCacheFile::ExpandBuffer(const size_t size) {
+  rwlock_.AssertHeld();
+  assert(!eof_);
+
+  // determine if there is enough space
+  size_t free = 0;  // compute the free space left in buffer
+  for (size_t i = buf_woff_; i < bufs_.size(); ++i) {
+    free += bufs_[i]->Free();
+    if (size <= free) {
+      // we have enough space in the buffer
+      return true;
+    }
+  }
+
+  // expand the buffer until there is enough space to write `size` bytes
+  assert(free < size);
+  assert(alloc_);
+
+  while (free < size) {
+    CacheWriteBuffer* const buf = alloc_->Allocate();
+    if (!buf) {
+      ROCKS_LOG_DEBUG(log_, "Unable to allocate buffers");
+      return false;
+    }
+
+    size_ += static_cast<uint32_t>(buf->Free());
+    free += buf->Free();
+    bufs_.push_back(buf);
+  }
+
+  assert(free >= size);
+  return true;
+}
+
+void WriteableCacheFile::DispatchBuffer() {
+  rwlock_.AssertHeld();
+
+  assert(bufs_.size());
+  assert(buf_doff_ <= buf_woff_);
+  assert(buf_woff_ <= bufs_.size());
+
+  if (pending_ios_) {
+    return;
+  }
+
+  if (!eof_ && buf_doff_ == buf_woff_) {
+    // dispatch buffer is pointing to write buffer and we haven't hit eof
+    return;
+  }
+
+  assert(eof_ || buf_doff_ < buf_woff_);
+  assert(buf_doff_ < bufs_.size());
+  assert(file_);
+  assert(alloc_);
+
+  auto* buf = bufs_[buf_doff_];
+  const uint64_t file_off = buf_doff_ * alloc_->BufferSize();
+
+  assert(!buf->Free() ||
+         (eof_ && buf_doff_ == buf_woff_ && buf_woff_ < bufs_.size()));
+  // we have reached end of file, and there is space in the last buffer
+  // pad it with zero for direct IO
+  buf->FillTrailingZeros();
+
+  assert(buf->Used() % kFileAlignmentSize == 0);
+
+  writer_->Write(file_.get(), buf, file_off,
+                 std::bind(&WriteableCacheFile::BufferWriteDone, this));
+  pending_ios_++;
+  buf_doff_++;
+}
+
+void WriteableCacheFile::BufferWriteDone() {
+  WriteLock _(&rwlock_);
+
+  assert(bufs_.size());
+
+  pending_ios_--;
+
+  if (buf_doff_ < bufs_.size()) {
+    DispatchBuffer();
+  }
+
+  if (eof_ && buf_doff_ >= bufs_.size() && !pending_ios_) {
+    // end-of-file reached, move to read mode
+    CloseAndOpenForReading();
+  }
+}
+
+void WriteableCacheFile::CloseAndOpenForReading() {
+  // Our env abstraction do not allow reading from a file opened for appending
+  // We need close the file and re-open it for reading
+  Close();
+  RandomAccessCacheFile::OpenImpl(enable_direct_reads_);
+}
+
+bool WriteableCacheFile::ReadBuffer(const LBA& lba, Slice* key, Slice* block,
+                                    char* scratch) {
+  rwlock_.AssertHeld();
+
+  if (!ReadBuffer(lba, scratch)) {
+    Error(log_, "Error reading from buffer. cache=%d off=%d", cache_id_,
+          lba.off_);
+    return false;
+  }
+
+  return ParseRec(lba, key, block, scratch);
+}
+
+bool WriteableCacheFile::ReadBuffer(const LBA& lba, char* data) {
+  rwlock_.AssertHeld();
+
+  assert(lba.off_ < disk_woff_);
+  assert(alloc_);
+
+  // we read from the buffers like reading from a flat file. The list of buffers
+  // are treated as contiguous stream of data
+
+  char* tmp = data;
+  size_t pending_nbytes = lba.size_;
+  // start buffer
+  size_t start_idx = lba.off_ / alloc_->BufferSize();
+  // offset into the start buffer
+  size_t start_off = lba.off_ % alloc_->BufferSize();
+
+  assert(start_idx <= buf_woff_);
+
+  for (size_t i = start_idx; pending_nbytes && i < bufs_.size(); ++i) {
+    assert(i <= buf_woff_);
+    auto* buf = bufs_[i];
+    assert(i == buf_woff_ || !buf->Free());
+    // bytes to write to the buffer
+    size_t nbytes = pending_nbytes > (buf->Used() - start_off)
+                        ? (buf->Used() - start_off)
+                        : pending_nbytes;
+    memcpy(tmp, buf->Data() + start_off, nbytes);
+
+    // left over to be written
+    pending_nbytes -= nbytes;
+    start_off = 0;
+    tmp += nbytes;
+  }
+
+  assert(!pending_nbytes);
+  if (pending_nbytes) {
+    return false;
+  }
+
+  assert(tmp == data + lba.size_);
+  return true;
+}
+
+void WriteableCacheFile::Close() {
+  rwlock_.AssertHeld();
+
+  assert(size_ >= max_size_);
+  assert(disk_woff_ >= max_size_);
+  assert(buf_doff_ == bufs_.size());
+  assert(bufs_.size() - buf_woff_ <= 1);
+  assert(!pending_ios_);
+
+  Info(log_, "Closing file %s. size=%d written=%d", Path().c_str(), size_,
+       disk_woff_);
+
+  ClearBuffers();
+  file_.reset();
+
+  assert(refs_);
+  --refs_;
+}
+
+void WriteableCacheFile::ClearBuffers() {
+  assert(alloc_);
+
+  for (size_t i = 0; i < bufs_.size(); ++i) {
+    alloc_->Deallocate(bufs_[i]);
+  }
+
+  bufs_.clear();
+}
+
+//
+// ThreadedFileWriter implementation
+//
+ThreadedWriter::ThreadedWriter(PersistentCacheTier* const cache,
+                               const size_t qdepth, const size_t io_size)
+    : Writer(cache), io_size_(io_size) {
+  for (size_t i = 0; i < qdepth; ++i) {
+    port::Thread th(&ThreadedWriter::ThreadMain, this);
+    threads_.push_back(std::move(th));
+  }
+}
+
+void ThreadedWriter::Stop() {
+  // notify all threads to exit
+  for (size_t i = 0; i < threads_.size(); ++i) {
+    q_.Push(IO(/*signal=*/true));
+  }
+
+  // wait for all threads to exit
+  for (auto& th : threads_) {
+    th.join();
+    assert(!th.joinable());
+  }
+  threads_.clear();
+}
+
+void ThreadedWriter::Write(WritableFile* const file, CacheWriteBuffer* buf,
+                           const uint64_t file_off,
+                           const std::function<void()> callback) {
+  q_.Push(IO(file, buf, file_off, callback));
+}
+
+void ThreadedWriter::ThreadMain() {
+  while (true) {
+    // Fetch the IO to process
+    IO io(q_.Pop());
+    if (io.signal_) {
+      // that's secret signal to exit
+      break;
+    }
+
+    // Reserve space for writing the buffer
+    while (!cache_->Reserve(io.buf_->Used())) {
+      // We can fail to reserve space if every file in the system
+      // is being currently accessed
+      /* sleep override */
+      SystemClock::Default()->SleepForMicroseconds(1000000);
+    }
+
+    DispatchIO(io);
+
+    io.callback_();
+  }
+}
+
+void ThreadedWriter::DispatchIO(const IO& io) {
+  size_t written = 0;
+  while (written < io.buf_->Used()) {
+    Slice data(io.buf_->Data() + written, io_size_);
+    Status s = io.file_->Append(data);
+    assert(s.ok());
+    if (!s.ok()) {
+      // That is definite IO error to device. There is not much we can
+      // do but ignore the failure. This can lead to corruption of data on
+      // disk, but the cache will skip while reading
+      fprintf(stderr, "Error writing data to file. %s\n", s.ToString().c_str());
+    }
+    written += io_size_;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_file.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_file.h
new file mode 100644
index 0000000000..127f5a8676
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_file.h
@@ -0,0 +1,291 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+
+#include <list>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "file/random_access_file_reader.h"
+#include "port/port.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/env.h"
+#include "util/crc32c.h"
+#include "util/mutexlock.h"
+#include "utilities/persistent_cache/block_cache_tier_file_buffer.h"
+#include "utilities/persistent_cache/lrulist.h"
+#include "utilities/persistent_cache/persistent_cache_tier.h"
+#include "utilities/persistent_cache/persistent_cache_util.h"
+
+// The io code path of persistent cache uses pipelined architecture
+//
+// client -> In Queue <-- BlockCacheTier --> Out Queue <-- Writer <--> Kernel
+//
+// This would enable the system to scale for GB/s of throughput which is
+// expected with modern devies like NVM.
+//
+// The file level operations are encapsulated in the following abstractions
+//
+// BlockCacheFile
+//       ^
+//       |
+//       |
+// RandomAccessCacheFile (For reading)
+//       ^
+//       |
+//       |
+// WriteableCacheFile (For writing)
+//
+// Write IO code path :
+//
+namespace ROCKSDB_NAMESPACE {
+
+class WriteableCacheFile;
+struct BlockInfo;
+
+// Represents a logical record on device
+//
+// (L)ogical (B)lock (Address = { cache-file-id, offset, size }
+struct LogicalBlockAddress {
+  LogicalBlockAddress() {}
+  explicit LogicalBlockAddress(const uint32_t cache_id, const uint32_t off,
+                               const uint16_t size)
+      : cache_id_(cache_id), off_(off), size_(size) {}
+
+  uint32_t cache_id_ = 0;
+  uint32_t off_ = 0;
+  uint32_t size_ = 0;
+};
+
+using LBA = LogicalBlockAddress;
+
+// class Writer
+//
+// Writer is the abstraction used for writing data to file. The component can be
+// multithreaded. It is the last step of write pipeline
+class Writer {
+ public:
+  explicit Writer(PersistentCacheTier* const cache) : cache_(cache) {}
+  virtual ~Writer() {}
+
+  // write buffer to file at the given offset
+  virtual void Write(WritableFile* const file, CacheWriteBuffer* buf,
+                     const uint64_t file_off,
+                     const std::function<void()> callback) = 0;
+  // stop the writer
+  virtual void Stop() = 0;
+
+  PersistentCacheTier* const cache_;
+};
+
+// class BlockCacheFile
+//
+// Generic interface to support building file specialized for read/writing
+class BlockCacheFile : public LRUElement<BlockCacheFile> {
+ public:
+  explicit BlockCacheFile(const uint32_t cache_id)
+      : LRUElement<BlockCacheFile>(), cache_id_(cache_id) {}
+
+  explicit BlockCacheFile(Env* const env, const std::string& dir,
+                          const uint32_t cache_id)
+      : LRUElement<BlockCacheFile>(),
+        env_(env),
+        dir_(dir),
+        cache_id_(cache_id) {}
+
+  virtual ~BlockCacheFile() {}
+
+  // append key/value to file and return LBA locator to user
+  virtual bool Append(const Slice& /*key*/, const Slice& /*val*/,
+                      LBA* const /*lba*/) {
+    assert(!"not implemented");
+    return false;
+  }
+
+  // read from the record locator (LBA) and return key, value and status
+  virtual bool Read(const LBA& /*lba*/, Slice* /*key*/, Slice* /*block*/,
+                    char* /*scratch*/) {
+    assert(!"not implemented");
+    return false;
+  }
+
+  // get file path
+  std::string Path() const {
+    return dir_ + "/" + std::to_string(cache_id_) + ".rc";
+  }
+  // get cache ID
+  uint32_t cacheid() const { return cache_id_; }
+  // Add block information to file data
+  // Block information is the list of index reference for this file
+  virtual void Add(BlockInfo* binfo) {
+    WriteLock _(&rwlock_);
+    block_infos_.push_back(binfo);
+  }
+  // get block information
+  std::list<BlockInfo*>& block_infos() { return block_infos_; }
+  // delete file and return the size of the file
+  virtual Status Delete(uint64_t* size);
+
+ protected:
+  port::RWMutex rwlock_;               // synchronization mutex
+  Env* const env_ = nullptr;           // Env for OS
+  const std::string dir_;              // Directory name
+  const uint32_t cache_id_;            // Cache id for the file
+  std::list<BlockInfo*> block_infos_;  // List of index entries mapping to the
+                                       // file content
+};
+
+// class RandomAccessFile
+//
+// Thread safe implementation for reading random data from file
+class RandomAccessCacheFile : public BlockCacheFile {
+ public:
+  explicit RandomAccessCacheFile(Env* const env, const std::string& dir,
+                                 const uint32_t cache_id,
+                                 const std::shared_ptr<Logger>& log)
+      : BlockCacheFile(env, dir, cache_id), log_(log) {}
+
+  virtual ~RandomAccessCacheFile() {}
+
+  // open file for reading
+  bool Open(const bool enable_direct_reads);
+  // read data from the disk
+  bool Read(const LBA& lba, Slice* key, Slice* block, char* scratch) override;
+
+ private:
+  std::unique_ptr<RandomAccessFileReader> freader_;
+
+ protected:
+  bool OpenImpl(const bool enable_direct_reads);
+  bool ParseRec(const LBA& lba, Slice* key, Slice* val, char* scratch);
+
+  std::shared_ptr<Logger> log_;  // log file
+};
+
+// class WriteableCacheFile
+//
+// All writes to the files are cached in buffers. The buffers are flushed to
+// disk as they get filled up. When file size reaches a certain size, a new file
+// will be created provided there is free space
+class WriteableCacheFile : public RandomAccessCacheFile {
+ public:
+  explicit WriteableCacheFile(Env* const env, CacheWriteBufferAllocator* alloc,
+                              Writer* writer, const std::string& dir,
+                              const uint32_t cache_id, const uint32_t max_size,
+                              const std::shared_ptr<Logger>& log)
+      : RandomAccessCacheFile(env, dir, cache_id, log),
+        alloc_(alloc),
+        writer_(writer),
+        max_size_(max_size) {}
+
+  virtual ~WriteableCacheFile();
+
+  // create file on disk
+  bool Create(const bool enable_direct_writes, const bool enable_direct_reads);
+
+  // read data from logical file
+  bool Read(const LBA& lba, Slice* key, Slice* block, char* scratch) override {
+    ReadLock _(&rwlock_);
+    const bool closed = eof_ && bufs_.empty();
+    if (closed) {
+      // the file is closed, read from disk
+      return RandomAccessCacheFile::Read(lba, key, block, scratch);
+    }
+    // file is still being written, read from buffers
+    return ReadBuffer(lba, key, block, scratch);
+  }
+
+  // append data to end of file
+  bool Append(const Slice&, const Slice&, LBA* const) override;
+  // End-of-file
+  bool Eof() const { return eof_; }
+
+ private:
+  friend class ThreadedWriter;
+
+  static const size_t kFileAlignmentSize = 4 * 1024;  // align file size
+
+  bool ReadBuffer(const LBA& lba, Slice* key, Slice* block, char* scratch);
+  bool ReadBuffer(const LBA& lba, char* data);
+  bool ExpandBuffer(const size_t size);
+  void DispatchBuffer();
+  void BufferWriteDone();
+  void CloseAndOpenForReading();
+  void ClearBuffers();
+  void Close();
+
+  // File layout in memory
+  //
+  // +------+------+------+------+------+------+
+  // | b0   | b1   | b2   | b3   | b4   | b5   |
+  // +------+------+------+------+------+------+
+  //        ^                           ^
+  //        |                           |
+  //      buf_doff_                   buf_woff_
+  //   (next buffer to           (next buffer to fill)
+  //   flush to disk)
+  //
+  //  The buffers are flushed to disk serially for a given file
+
+  CacheWriteBufferAllocator* const alloc_ = nullptr;  // Buffer provider
+  Writer* const writer_ = nullptr;                    // File writer thread
+  std::unique_ptr<WritableFile> file_;   // RocksDB Env file abstraction
+  std::vector<CacheWriteBuffer*> bufs_;  // Written buffers
+  uint32_t size_ = 0;                    // Size of the file
+  const uint32_t max_size_;              // Max size of the file
+  bool eof_ = false;                     // End of file
+  uint32_t disk_woff_ = 0;               // Offset to write on disk
+  size_t buf_woff_ = 0;                  // off into bufs_ to write
+  size_t buf_doff_ = 0;                  // off into bufs_ to dispatch
+  size_t pending_ios_ = 0;               // Number of ios to disk in-progress
+  bool enable_direct_reads_ = false;     // Should we enable direct reads
+                                         // when reading from disk
+};
+
+//
+// Abstraction to do writing to device. It is part of pipelined architecture.
+//
+class ThreadedWriter : public Writer {
+ public:
+  // Representation of IO to device
+  struct IO {
+    explicit IO(const bool signal) : signal_(signal) {}
+    explicit IO(WritableFile* const file, CacheWriteBuffer* const buf,
+                const uint64_t file_off, const std::function<void()> callback)
+        : file_(file), buf_(buf), file_off_(file_off), callback_(callback) {}
+
+    IO(const IO&) = default;
+    IO& operator=(const IO&) = default;
+    size_t Size() const { return sizeof(IO); }
+
+    WritableFile* file_ = nullptr;     // File to write to
+    CacheWriteBuffer* buf_ = nullptr;  // buffer to write
+    uint64_t file_off_ = 0;            // file offset
+    bool signal_ = false;              // signal to exit thread loop
+    std::function<void()> callback_;   // Callback on completion
+  };
+
+  explicit ThreadedWriter(PersistentCacheTier* const cache, const size_t qdepth,
+                          const size_t io_size);
+  virtual ~ThreadedWriter() { assert(threads_.empty()); }
+
+  void Stop() override;
+  void Write(WritableFile* const file, CacheWriteBuffer* buf,
+             const uint64_t file_off,
+             const std::function<void()> callback) override;
+
+ private:
+  void ThreadMain();
+  void DispatchIO(const IO& io);
+
+  const size_t io_size_ = 0;
+  BoundedQueue<IO> q_;
+  std::vector<port::Thread> threads_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_file_buffer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_file_buffer.h
new file mode 100644
index 0000000000..d4f02455a3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_file_buffer.h
@@ -0,0 +1,127 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <list>
+#include <memory>
+#include <string>
+
+#include "memory/arena.h"
+#include "rocksdb/comparator.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+//
+// CacheWriteBuffer
+//
+// Buffer abstraction that can be manipulated via append
+// (not thread safe)
+class CacheWriteBuffer {
+ public:
+  explicit CacheWriteBuffer(const size_t size) : size_(size), pos_(0) {
+    buf_.reset(new char[size_]);
+    assert(!pos_);
+    assert(size_);
+  }
+
+  virtual ~CacheWriteBuffer() {}
+
+  void Append(const char* buf, const size_t size) {
+    assert(pos_ + size <= size_);
+    memcpy(buf_.get() + pos_, buf, size);
+    pos_ += size;
+    assert(pos_ <= size_);
+  }
+
+  void FillTrailingZeros() {
+    assert(pos_ <= size_);
+    memset(buf_.get() + pos_, '0', size_ - pos_);
+    pos_ = size_;
+  }
+
+  void Reset() { pos_ = 0; }
+  size_t Free() const { return size_ - pos_; }
+  size_t Capacity() const { return size_; }
+  size_t Used() const { return pos_; }
+  char* Data() const { return buf_.get(); }
+
+ private:
+  std::unique_ptr<char[]> buf_;
+  const size_t size_;
+  size_t pos_;
+};
+
+//
+// CacheWriteBufferAllocator
+//
+// Buffer pool abstraction(not thread safe)
+//
+class CacheWriteBufferAllocator {
+ public:
+  explicit CacheWriteBufferAllocator(const size_t buffer_size,
+                                     const size_t buffer_count)
+      : cond_empty_(&lock_), buffer_size_(buffer_size) {
+    MutexLock _(&lock_);
+    buffer_size_ = buffer_size;
+    for (uint32_t i = 0; i < buffer_count; i++) {
+      auto* buf = new CacheWriteBuffer(buffer_size_);
+      assert(buf);
+      if (buf) {
+        bufs_.push_back(buf);
+        cond_empty_.Signal();
+      }
+    }
+  }
+
+  virtual ~CacheWriteBufferAllocator() {
+    MutexLock _(&lock_);
+    assert(bufs_.size() * buffer_size_ == Capacity());
+    for (auto* buf : bufs_) {
+      delete buf;
+    }
+    bufs_.clear();
+  }
+
+  CacheWriteBuffer* Allocate() {
+    MutexLock _(&lock_);
+    if (bufs_.empty()) {
+      return nullptr;
+    }
+
+    assert(!bufs_.empty());
+    CacheWriteBuffer* const buf = bufs_.front();
+    bufs_.pop_front();
+    return buf;
+  }
+
+  void Deallocate(CacheWriteBuffer* const buf) {
+    assert(buf);
+    MutexLock _(&lock_);
+    buf->Reset();
+    bufs_.push_back(buf);
+    cond_empty_.Signal();
+  }
+
+  void WaitUntilUsable() {
+    // We are asked to wait till we have buffers available
+    MutexLock _(&lock_);
+    while (bufs_.empty()) {
+      cond_empty_.Wait();
+    }
+  }
+
+  size_t Capacity() const { return bufs_.size() * buffer_size_; }
+  size_t Free() const { return bufs_.size() * buffer_size_; }
+  size_t BufferSize() const { return buffer_size_; }
+
+ private:
+  port::Mutex lock_;                   // Sync lock
+  port::CondVar cond_empty_;           // Condition var for empty buffers
+  size_t buffer_size_;                 // Size of each buffer
+  std::list<CacheWriteBuffer*> bufs_;  // Buffer stash
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_metadata.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_metadata.cc
new file mode 100644
index 0000000000..182df6ca65
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_metadata.cc
@@ -0,0 +1,84 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "utilities/persistent_cache/block_cache_tier_metadata.h"
+
+#include <functional>
+
+namespace ROCKSDB_NAMESPACE {
+
+bool BlockCacheTierMetadata::Insert(BlockCacheFile* file) {
+  return cache_file_index_.Insert(file);
+}
+
+BlockCacheFile* BlockCacheTierMetadata::Lookup(const uint32_t cache_id) {
+  BlockCacheFile* ret = nullptr;
+  BlockCacheFile lookup_key(cache_id);
+  bool ok = cache_file_index_.Find(&lookup_key, &ret);
+  if (ok) {
+    assert(ret->refs_);
+    return ret;
+  }
+  return nullptr;
+}
+
+BlockCacheFile* BlockCacheTierMetadata::Evict() {
+  using std::placeholders::_1;
+  auto fn = std::bind(&BlockCacheTierMetadata::RemoveAllKeys, this, _1);
+  return cache_file_index_.Evict(fn);
+}
+
+void BlockCacheTierMetadata::Clear() {
+  cache_file_index_.Clear([](BlockCacheFile* arg) { delete arg; });
+  block_index_.Clear([](BlockInfo* arg) { delete arg; });
+}
+
+BlockInfo* BlockCacheTierMetadata::Insert(const Slice& key, const LBA& lba) {
+  std::unique_ptr<BlockInfo> binfo(new BlockInfo(key, lba));
+  if (!block_index_.Insert(binfo.get())) {
+    return nullptr;
+  }
+  return binfo.release();
+}
+
+bool BlockCacheTierMetadata::Lookup(const Slice& key, LBA* lba) {
+  BlockInfo lookup_key(key);
+  BlockInfo* block;
+  port::RWMutex* rlock = nullptr;
+  if (!block_index_.Find(&lookup_key, &block, &rlock)) {
+    return false;
+  }
+
+  ReadUnlock _(rlock);
+  assert(block->key_ == key.ToString());
+  if (lba) {
+    *lba = block->lba_;
+  }
+  return true;
+}
+
+BlockInfo* BlockCacheTierMetadata::Remove(const Slice& key) {
+  BlockInfo lookup_key(key);
+  BlockInfo* binfo = nullptr;
+  bool ok __attribute__((__unused__));
+  ok = block_index_.Erase(&lookup_key, &binfo);
+  assert(ok);
+  return binfo;
+}
+
+void BlockCacheTierMetadata::RemoveAllKeys(BlockCacheFile* f) {
+  for (BlockInfo* binfo : f->block_infos()) {
+    BlockInfo* tmp = nullptr;
+    bool status = block_index_.Erase(binfo, &tmp);
+    (void)status;
+    assert(status);
+    assert(tmp == binfo);
+    delete binfo;
+  }
+  f->block_infos().clear();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_metadata.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_metadata.h
new file mode 100644
index 0000000000..15eb0be3d9
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/block_cache_tier_metadata.h
@@ -0,0 +1,122 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+
+#include <functional>
+#include <string>
+#include <unordered_map>
+
+#include "rocksdb/slice.h"
+#include "utilities/persistent_cache/block_cache_tier_file.h"
+#include "utilities/persistent_cache/hash_table.h"
+#include "utilities/persistent_cache/hash_table_evictable.h"
+#include "utilities/persistent_cache/lrulist.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+//
+// Block Cache Tier Metadata
+//
+// The BlockCacheTierMetadata holds all the metadata associated with block
+// cache. It
+// fundamentally contains 2 indexes and an LRU.
+//
+// Block Cache Index
+//
+// This is a forward index that maps a given key to a LBA (Logical Block
+// Address). LBA is a disk pointer that points to a record on the cache.
+//
+// LBA = { cache-id, offset, size }
+//
+// Cache File Index
+//
+// This is a forward index that maps a given cache-id to a cache file object.
+// Typically you would lookup using LBA and use the object to read or write
+struct BlockInfo {
+  explicit BlockInfo(const Slice& key, const LBA& lba = LBA())
+      : key_(key.ToString()), lba_(lba) {}
+
+  std::string key_;
+  LBA lba_;
+};
+
+class BlockCacheTierMetadata {
+ public:
+  explicit BlockCacheTierMetadata(const uint32_t blocks_capacity = 1024 * 1024,
+                                  const uint32_t cachefile_capacity = 10 * 1024)
+      : cache_file_index_(cachefile_capacity), block_index_(blocks_capacity) {}
+
+  virtual ~BlockCacheTierMetadata() {}
+
+  // Insert a given cache file
+  bool Insert(BlockCacheFile* file);
+
+  // Lookup cache file based on cache_id
+  BlockCacheFile* Lookup(const uint32_t cache_id);
+
+  // Insert block information to block index
+  BlockInfo* Insert(const Slice& key, const LBA& lba);
+  // bool Insert(BlockInfo* binfo);
+
+  // Lookup block information from block index
+  bool Lookup(const Slice& key, LBA* lba);
+
+  // Remove a given from the block index
+  BlockInfo* Remove(const Slice& key);
+
+  // Find and evict a cache file using LRU policy
+  BlockCacheFile* Evict();
+
+  // Clear the metadata contents
+  virtual void Clear();
+
+ protected:
+  // Remove all block information from a given file
+  virtual void RemoveAllKeys(BlockCacheFile* file);
+
+ private:
+  // Cache file index definition
+  //
+  // cache-id => BlockCacheFile
+  struct BlockCacheFileHash {
+    uint64_t operator()(const BlockCacheFile* rec) {
+      return std::hash<uint32_t>()(rec->cacheid());
+    }
+  };
+
+  struct BlockCacheFileEqual {
+    uint64_t operator()(const BlockCacheFile* lhs, const BlockCacheFile* rhs) {
+      return lhs->cacheid() == rhs->cacheid();
+    }
+  };
+
+  using CacheFileIndexType =
+      EvictableHashTable<BlockCacheFile, BlockCacheFileHash,
+                         BlockCacheFileEqual>;
+
+  // Block Lookup Index
+  //
+  // key => LBA
+  struct Hash {
+    size_t operator()(BlockInfo* node) const {
+      return std::hash<std::string>()(node->key_);
+    }
+  };
+
+  struct Equal {
+    size_t operator()(BlockInfo* lhs, BlockInfo* rhs) const {
+      return lhs->key_ == rhs->key_;
+    }
+  };
+
+  using BlockIndexType = HashTable<BlockInfo*, Hash, Equal>;
+
+  CacheFileIndexType cache_file_index_;
+  BlockIndexType block_index_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/hash_table.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/hash_table.h
new file mode 100644
index 0000000000..13b2ec7d66
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/hash_table.h
@@ -0,0 +1,237 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+
+#include <assert.h>
+
+#include <list>
+#include <vector>
+
+#ifdef OS_LINUX
+#include <sys/mman.h>
+#endif
+
+#include "rocksdb/env.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// HashTable<T, Hash, Equal>
+//
+// Traditional implementation of hash table with synchronization built on top
+// don't perform very well in multi-core scenarios. This is an implementation
+// designed for multi-core scenarios with high lock contention.
+//
+//                         |<-------- alpha ------------->|
+//               Buckets   Collision list
+//          ---- +----+    +---+---+--- ...... ---+---+---+
+//         /     |    |--->|   |   |              |   |   |
+//        /      +----+    +---+---+--- ...... ---+---+---+
+//       /       |    |
+// Locks/        +----+
+// +--+/         .    .
+// |  |          .    .
+// +--+          .    .
+// |  |          .    .
+// +--+          .    .
+// |  |          .    .
+// +--+          .    .
+//     \         +----+
+//      \        |    |
+//       \       +----+
+//        \      |    |
+//         \---- +----+
+//
+// The lock contention is spread over an array of locks. This helps improve
+// concurrent access. The spine is designed for a certain capacity and load
+// factor. When the capacity planning is done correctly we can expect
+// O(load_factor = 1) insert, access and remove time.
+//
+// Micro benchmark on debug build gives about .5 Million/sec rate of insert,
+// erase and lookup in parallel (total of about 1.5 Million ops/sec). If the
+// blocks were of 4K, the hash table can support  a virtual throughput of
+// 6 GB/s.
+//
+// T      Object type (contains both key and value)
+// Hash   Function that returns an hash from type T
+// Equal  Returns if two objects are equal
+//        (We need explicit equal for pointer type)
+//
+template <class T, class Hash, class Equal>
+class HashTable {
+ public:
+  explicit HashTable(const size_t capacity = 1024 * 1024,
+                     const float load_factor = 2.0, const uint32_t nlocks = 256)
+      : nbuckets_(
+            static_cast<uint32_t>(load_factor ? capacity / load_factor : 0)),
+        nlocks_(nlocks) {
+    // pre-conditions
+    assert(capacity);
+    assert(load_factor);
+    assert(nbuckets_);
+    assert(nlocks_);
+
+    buckets_.reset(new Bucket[nbuckets_]);
+#ifdef OS_LINUX
+    mlock(buckets_.get(), nbuckets_ * sizeof(Bucket));
+#endif
+
+    // initialize locks
+    locks_.reset(new port::RWMutex[nlocks_]);
+#ifdef OS_LINUX
+    mlock(locks_.get(), nlocks_ * sizeof(port::RWMutex));
+#endif
+
+    // post-conditions
+    assert(buckets_);
+    assert(locks_);
+  }
+
+  virtual ~HashTable() { AssertEmptyBuckets(); }
+
+  //
+  // Insert given record to hash table
+  //
+  bool Insert(const T& t) {
+    const uint64_t h = Hash()(t);
+    const uint32_t bucket_idx = h % nbuckets_;
+    const uint32_t lock_idx = bucket_idx % nlocks_;
+
+    WriteLock _(&locks_[lock_idx]);
+    auto& bucket = buckets_[bucket_idx];
+    return Insert(&bucket, t);
+  }
+
+  // Lookup hash table
+  //
+  // Please note that read lock should be held by the caller. This is because
+  // the caller owns the data, and should hold the read lock as long as he
+  // operates on the data.
+  bool Find(const T& t, T* ret, port::RWMutex** ret_lock) {
+    const uint64_t h = Hash()(t);
+    const uint32_t bucket_idx = h % nbuckets_;
+    const uint32_t lock_idx = bucket_idx % nlocks_;
+
+    port::RWMutex& lock = locks_[lock_idx];
+    lock.ReadLock();
+
+    auto& bucket = buckets_[bucket_idx];
+    if (Find(&bucket, t, ret)) {
+      *ret_lock = &lock;
+      return true;
+    }
+
+    lock.ReadUnlock();
+    return false;
+  }
+
+  //
+  // Erase a given key from the hash table
+  //
+  bool Erase(const T& t, T* ret) {
+    const uint64_t h = Hash()(t);
+    const uint32_t bucket_idx = h % nbuckets_;
+    const uint32_t lock_idx = bucket_idx % nlocks_;
+
+    WriteLock _(&locks_[lock_idx]);
+
+    auto& bucket = buckets_[bucket_idx];
+    return Erase(&bucket, t, ret);
+  }
+
+  // Fetch the mutex associated with a key
+  // This call is used to hold the lock for a given data for extended period of
+  // time.
+  port::RWMutex* GetMutex(const T& t) {
+    const uint64_t h = Hash()(t);
+    const uint32_t bucket_idx = h % nbuckets_;
+    const uint32_t lock_idx = bucket_idx % nlocks_;
+
+    return &locks_[lock_idx];
+  }
+
+  void Clear(void (*fn)(T)) {
+    for (uint32_t i = 0; i < nbuckets_; ++i) {
+      const uint32_t lock_idx = i % nlocks_;
+      WriteLock _(&locks_[lock_idx]);
+      for (auto& t : buckets_[i].list_) {
+        (*fn)(t);
+      }
+      buckets_[i].list_.clear();
+    }
+  }
+
+ protected:
+  // Models bucket of keys that hash to the same bucket number
+  struct Bucket {
+    std::list<T> list_;
+  };
+
+  // Substitute for std::find with custom comparator operator
+  typename std::list<T>::iterator Find(std::list<T>* list, const T& t) {
+    for (auto it = list->begin(); it != list->end(); ++it) {
+      if (Equal()(*it, t)) {
+        return it;
+      }
+    }
+    return list->end();
+  }
+
+  bool Insert(Bucket* bucket, const T& t) {
+    // Check if the key already exists
+    auto it = Find(&bucket->list_, t);
+    if (it != bucket->list_.end()) {
+      return false;
+    }
+
+    // insert to bucket
+    bucket->list_.push_back(t);
+    return true;
+  }
+
+  bool Find(Bucket* bucket, const T& t, T* ret) {
+    auto it = Find(&bucket->list_, t);
+    if (it != bucket->list_.end()) {
+      if (ret) {
+        *ret = *it;
+      }
+      return true;
+    }
+    return false;
+  }
+
+  bool Erase(Bucket* bucket, const T& t, T* ret) {
+    auto it = Find(&bucket->list_, t);
+    if (it != bucket->list_.end()) {
+      if (ret) {
+        *ret = *it;
+      }
+
+      bucket->list_.erase(it);
+      return true;
+    }
+    return false;
+  }
+
+  // assert that all buckets are empty
+  void AssertEmptyBuckets() {
+#ifndef NDEBUG
+    for (size_t i = 0; i < nbuckets_; ++i) {
+      WriteLock _(&locks_[i % nlocks_]);
+      assert(buckets_[i].list_.empty());
+    }
+#endif
+  }
+
+  const uint32_t nbuckets_;                 // No. of buckets in the spine
+  std::unique_ptr<Bucket[]> buckets_;       // Spine of the hash buckets
+  const uint32_t nlocks_;                   // No. of locks
+  std::unique_ptr<port::RWMutex[]> locks_;  // Granular locks
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/hash_table_evictable.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/hash_table_evictable.h
new file mode 100644
index 0000000000..daf4bec1ae
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/hash_table_evictable.h
@@ -0,0 +1,166 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+
+#include <functional>
+
+#include "util/random.h"
+#include "utilities/persistent_cache/hash_table.h"
+#include "utilities/persistent_cache/lrulist.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Evictable Hash Table
+//
+// Hash table index where least accessed (or one of the least accessed) elements
+// can be evicted.
+//
+// Please note EvictableHashTable can only be created for pointer type objects
+template <class T, class Hash, class Equal>
+class EvictableHashTable : private HashTable<T*, Hash, Equal> {
+ public:
+  using hash_table = HashTable<T*, Hash, Equal>;
+
+  explicit EvictableHashTable(const size_t capacity = 1024 * 1024,
+                              const float load_factor = 2.0,
+                              const uint32_t nlocks = 256)
+      : HashTable<T*, Hash, Equal>(capacity, load_factor, nlocks),
+        lru_lists_(new LRUList<T>[hash_table::nlocks_]) {
+    assert(lru_lists_);
+  }
+
+  virtual ~EvictableHashTable() { AssertEmptyLRU(); }
+
+  //
+  // Insert given record to hash table (and LRU list)
+  //
+  bool Insert(T* t) {
+    const uint64_t h = Hash()(t);
+    typename hash_table::Bucket& bucket = GetBucket(h);
+    LRUListType& lru = GetLRUList(h);
+    port::RWMutex& lock = GetMutex(h);
+
+    WriteLock _(&lock);
+    if (hash_table::Insert(&bucket, t)) {
+      lru.Push(t);
+      return true;
+    }
+    return false;
+  }
+
+  //
+  // Lookup hash table
+  //
+  // Please note that read lock should be held by the caller. This is because
+  // the caller owns the data, and should hold the read lock as long as he
+  // operates on the data.
+  bool Find(T* t, T** ret) {
+    const uint64_t h = Hash()(t);
+    typename hash_table::Bucket& bucket = GetBucket(h);
+    LRUListType& lru = GetLRUList(h);
+    port::RWMutex& lock = GetMutex(h);
+
+    ReadLock _(&lock);
+    if (hash_table::Find(&bucket, t, ret)) {
+      ++(*ret)->refs_;
+      lru.Touch(*ret);
+      return true;
+    }
+    return false;
+  }
+
+  //
+  // Evict one of the least recently used object
+  //
+  T* Evict(const std::function<void(T*)>& fn = nullptr) {
+    uint32_t random = Random::GetTLSInstance()->Next();
+    const size_t start_idx = random % hash_table::nlocks_;
+    T* t = nullptr;
+
+    // iterate from start_idx .. 0 .. start_idx
+    for (size_t i = 0; !t && i < hash_table::nlocks_; ++i) {
+      const size_t idx = (start_idx + i) % hash_table::nlocks_;
+
+      WriteLock _(&hash_table::locks_[idx]);
+      LRUListType& lru = lru_lists_[idx];
+      if (!lru.IsEmpty() && (t = lru.Pop()) != nullptr) {
+        assert(!t->refs_);
+        // We got an item to evict, erase from the bucket
+        const uint64_t h = Hash()(t);
+        typename hash_table::Bucket& bucket = GetBucket(h);
+        T* tmp = nullptr;
+        bool status = hash_table::Erase(&bucket, t, &tmp);
+        assert(t == tmp);
+        (void)status;
+        assert(status);
+        if (fn) {
+          fn(t);
+        }
+        break;
+      }
+      assert(!t);
+    }
+    return t;
+  }
+
+  void Clear(void (*fn)(T*)) {
+    for (uint32_t i = 0; i < hash_table::nbuckets_; ++i) {
+      const uint32_t lock_idx = i % hash_table::nlocks_;
+      WriteLock _(&hash_table::locks_[lock_idx]);
+      auto& lru_list = lru_lists_[lock_idx];
+      auto& bucket = hash_table::buckets_[i];
+      for (auto* t : bucket.list_) {
+        lru_list.Unlink(t);
+        (*fn)(t);
+      }
+      bucket.list_.clear();
+    }
+    // make sure that all LRU lists are emptied
+    AssertEmptyLRU();
+  }
+
+  void AssertEmptyLRU() {
+#ifndef NDEBUG
+    for (uint32_t i = 0; i < hash_table::nlocks_; ++i) {
+      WriteLock _(&hash_table::locks_[i]);
+      auto& lru_list = lru_lists_[i];
+      assert(lru_list.IsEmpty());
+    }
+#endif
+  }
+
+  //
+  // Fetch the mutex associated with a key
+  // This call is used to hold the lock for a given data for extended period of
+  // time.
+  port::RWMutex* GetMutex(T* t) { return hash_table::GetMutex(t); }
+
+ private:
+  using LRUListType = LRUList<T>;
+
+  typename hash_table::Bucket& GetBucket(const uint64_t h) {
+    const uint32_t bucket_idx = h % hash_table::nbuckets_;
+    return hash_table::buckets_[bucket_idx];
+  }
+
+  LRUListType& GetLRUList(const uint64_t h) {
+    const uint32_t bucket_idx = h % hash_table::nbuckets_;
+    const uint32_t lock_idx = bucket_idx % hash_table::nlocks_;
+    return lru_lists_[lock_idx];
+  }
+
+  port::RWMutex& GetMutex(const uint64_t h) {
+    const uint32_t bucket_idx = h % hash_table::nbuckets_;
+    const uint32_t lock_idx = bucket_idx % hash_table::nlocks_;
+    return hash_table::locks_[lock_idx];
+  }
+
+  std::unique_ptr<LRUListType[]> lru_lists_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/lrulist.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/lrulist.h
new file mode 100644
index 0000000000..9a896ed599
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/lrulist.h
@@ -0,0 +1,172 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+
+#include <atomic>
+
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// LRU element definition
+//
+// Any object that needs to be part of the LRU algorithm should extend this
+// class
+template <class T>
+struct LRUElement {
+  explicit LRUElement() : next_(nullptr), prev_(nullptr), refs_(0) {}
+
+  virtual ~LRUElement() { assert(!refs_); }
+
+  T* next_;
+  T* prev_;
+  std::atomic<size_t> refs_;
+};
+
+// LRU implementation
+//
+// In place LRU implementation. There is no copy or allocation involved when
+// inserting or removing an element. This makes the data structure slim
+template <class T>
+class LRUList {
+ public:
+  virtual ~LRUList() {
+    MutexLock _(&lock_);
+    assert(!head_);
+    assert(!tail_);
+  }
+
+  // Push element into the LRU at the cold end
+  inline void Push(T* const t) {
+    assert(t);
+    assert(!t->next_);
+    assert(!t->prev_);
+
+    MutexLock _(&lock_);
+
+    assert((!head_ && !tail_) || (head_ && tail_));
+    assert(!head_ || !head_->prev_);
+    assert(!tail_ || !tail_->next_);
+
+    t->next_ = head_;
+    if (head_) {
+      head_->prev_ = t;
+    }
+
+    head_ = t;
+    if (!tail_) {
+      tail_ = t;
+    }
+  }
+
+  // Unlink the element from the LRU
+  inline void Unlink(T* const t) {
+    MutexLock _(&lock_);
+    UnlinkImpl(t);
+  }
+
+  // Evict an element from the LRU
+  inline T* Pop() {
+    MutexLock _(&lock_);
+
+    assert(tail_ && head_);
+    assert(!tail_->next_);
+    assert(!head_->prev_);
+
+    T* t = head_;
+    while (t && t->refs_) {
+      t = t->next_;
+    }
+
+    if (!t) {
+      // nothing can be evicted
+      return nullptr;
+    }
+
+    assert(!t->refs_);
+
+    // unlike the element
+    UnlinkImpl(t);
+    return t;
+  }
+
+  // Move the element from the front of the list to the back of the list
+  inline void Touch(T* const t) {
+    MutexLock _(&lock_);
+    UnlinkImpl(t);
+    PushBackImpl(t);
+  }
+
+  // Check if the LRU is empty
+  inline bool IsEmpty() const {
+    MutexLock _(&lock_);
+    return !head_ && !tail_;
+  }
+
+ private:
+  // Unlink an element from the LRU
+  void UnlinkImpl(T* const t) {
+    assert(t);
+
+    lock_.AssertHeld();
+
+    assert(head_ && tail_);
+    assert(t->prev_ || head_ == t);
+    assert(t->next_ || tail_ == t);
+
+    if (t->prev_) {
+      t->prev_->next_ = t->next_;
+    }
+    if (t->next_) {
+      t->next_->prev_ = t->prev_;
+    }
+
+    if (tail_ == t) {
+      tail_ = tail_->prev_;
+    }
+    if (head_ == t) {
+      head_ = head_->next_;
+    }
+
+    t->next_ = t->prev_ = nullptr;
+  }
+
+  // Insert an element at the hot end
+  inline void PushBack(T* const t) {
+    MutexLock _(&lock_);
+    PushBackImpl(t);
+  }
+
+  inline void PushBackImpl(T* const t) {
+    assert(t);
+    assert(!t->next_);
+    assert(!t->prev_);
+
+    lock_.AssertHeld();
+
+    assert((!head_ && !tail_) || (head_ && tail_));
+    assert(!head_ || !head_->prev_);
+    assert(!tail_ || !tail_->next_);
+
+    t->prev_ = tail_;
+    if (tail_) {
+      tail_->next_ = t;
+    }
+
+    tail_ = t;
+    if (!head_) {
+      head_ = tail_;
+    }
+  }
+
+  mutable port::Mutex lock_;  // synchronization primitive
+  T* head_ = nullptr;         // front (cold)
+  T* tail_ = nullptr;         // back (hot)
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_test.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_test.h
new file mode 100644
index 0000000000..aab61bb773
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_test.h
@@ -0,0 +1,284 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+
+#include <functional>
+#include <limits>
+#include <list>
+#include <memory>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "db/db_test_util.h"
+#include "memory/arena.h"
+#include "port/port.h"
+#include "rocksdb/cache.h"
+#include "table/block_based/block_builder.h"
+#include "test_util/testharness.h"
+#include "util/random.h"
+#include "utilities/persistent_cache/volatile_tier_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+//
+// Unit tests for testing PersistentCacheTier
+//
+class PersistentCacheTierTest : public testing::Test {
+ public:
+  PersistentCacheTierTest();
+  virtual ~PersistentCacheTierTest() {
+    if (cache_) {
+      Status s = cache_->Close();
+      assert(s.ok());
+    }
+  }
+
+ protected:
+  // Flush cache
+  void Flush() {
+    if (cache_) {
+      cache_->TEST_Flush();
+    }
+  }
+
+  // create threaded workload
+  template <class T>
+  std::list<port::Thread> SpawnThreads(const size_t n, const T& fn) {
+    std::list<port::Thread> threads;
+    for (size_t i = 0; i < n; i++) {
+      port::Thread th(fn);
+      threads.push_back(std::move(th));
+    }
+    return threads;
+  }
+
+  // Wait for threads to join
+  void Join(std::list<port::Thread>&& threads) {
+    for (auto& th : threads) {
+      th.join();
+    }
+    threads.clear();
+  }
+
+  // Run insert workload in threads
+  void Insert(const size_t nthreads, const size_t max_keys) {
+    key_ = 0;
+    max_keys_ = max_keys;
+    // spawn threads
+    auto fn = std::bind(&PersistentCacheTierTest::InsertImpl, this);
+    auto threads = SpawnThreads(nthreads, fn);
+    // join with threads
+    Join(std::move(threads));
+    // Flush cache
+    Flush();
+  }
+
+  // Run verification on the cache
+  void Verify(const size_t nthreads = 1, const bool eviction_enabled = false) {
+    stats_verify_hits_ = 0;
+    stats_verify_missed_ = 0;
+    key_ = 0;
+    // spawn threads
+    auto fn =
+        std::bind(&PersistentCacheTierTest::VerifyImpl, this, eviction_enabled);
+    auto threads = SpawnThreads(nthreads, fn);
+    // join with threads
+    Join(std::move(threads));
+  }
+
+  // pad 0 to numbers
+  std::string PaddedNumber(const size_t data, const size_t pad_size) {
+    assert(pad_size);
+    char* ret = new char[pad_size];
+    int pos = static_cast<int>(pad_size) - 1;
+    size_t count = 0;
+    size_t t = data;
+    // copy numbers
+    while (t) {
+      count++;
+      ret[pos--] = '0' + t % 10;
+      t = t / 10;
+    }
+    // copy 0s
+    while (pos >= 0) {
+      ret[pos--] = '0';
+    }
+    // post condition
+    assert(count <= pad_size);
+    assert(pos == -1);
+    std::string result(ret, pad_size);
+    delete[] ret;
+    return result;
+  }
+
+  // Insert workload implementation
+  void InsertImpl() {
+    const std::string prefix = "key_prefix_";
+
+    while (true) {
+      size_t i = key_++;
+      if (i >= max_keys_) {
+        break;
+      }
+
+      char data[4 * 1024];
+      memset(data, '0' + (i % 10), sizeof(data));
+      auto k = prefix + PaddedNumber(i, /*count=*/8);
+      Slice key(k);
+      while (true) {
+        Status status = cache_->Insert(key, data, sizeof(data));
+        if (status.ok()) {
+          break;
+        }
+        ASSERT_TRUE(status.IsTryAgain());
+        Env::Default()->SleepForMicroseconds(1 * 1000 * 1000);
+      }
+    }
+  }
+
+  // Verification implementation
+  void VerifyImpl(const bool eviction_enabled = false) {
+    const std::string prefix = "key_prefix_";
+    while (true) {
+      size_t i = key_++;
+      if (i >= max_keys_) {
+        break;
+      }
+
+      char edata[4 * 1024];
+      memset(edata, '0' + (i % 10), sizeof(edata));
+      auto k = prefix + PaddedNumber(i, /*count=*/8);
+      Slice key(k);
+      std::unique_ptr<char[]> block;
+      size_t block_size;
+
+      if (eviction_enabled) {
+        if (!cache_->Lookup(key, &block, &block_size).ok()) {
+          // assume that the key is evicted
+          stats_verify_missed_++;
+          continue;
+        }
+      }
+
+      ASSERT_OK(cache_->Lookup(key, &block, &block_size));
+      ASSERT_EQ(block_size, sizeof(edata));
+      ASSERT_EQ(memcmp(edata, block.get(), sizeof(edata)), 0);
+      stats_verify_hits_++;
+    }
+  }
+
+  // template for insert test
+  void RunInsertTest(const size_t nthreads, const size_t max_keys) {
+    Insert(nthreads, max_keys);
+    Verify(nthreads);
+    ASSERT_EQ(stats_verify_hits_, max_keys);
+    ASSERT_EQ(stats_verify_missed_, 0);
+
+    ASSERT_OK(cache_->Close());
+    cache_.reset();
+  }
+
+  // template for negative insert test
+  void RunNegativeInsertTest(const size_t nthreads, const size_t max_keys) {
+    Insert(nthreads, max_keys);
+    Verify(nthreads, /*eviction_enabled=*/true);
+    ASSERT_LT(stats_verify_hits_, max_keys);
+    ASSERT_GT(stats_verify_missed_, 0);
+
+    ASSERT_OK(cache_->Close());
+    cache_.reset();
+  }
+
+  // template for insert with eviction test
+  void RunInsertTestWithEviction(const size_t nthreads, const size_t max_keys) {
+    Insert(nthreads, max_keys);
+    Verify(nthreads, /*eviction_enabled=*/true);
+    ASSERT_EQ(stats_verify_hits_ + stats_verify_missed_, max_keys);
+    ASSERT_GT(stats_verify_hits_, 0);
+    ASSERT_GT(stats_verify_missed_, 0);
+
+    ASSERT_OK(cache_->Close());
+    cache_.reset();
+  }
+
+  const std::string path_;
+  std::shared_ptr<Logger> log_;
+  std::shared_ptr<PersistentCacheTier> cache_;
+  std::atomic<size_t> key_{0};
+  size_t max_keys_ = 0;
+  std::atomic<size_t> stats_verify_hits_{0};
+  std::atomic<size_t> stats_verify_missed_{0};
+};
+
+//
+// RocksDB tests
+//
+class PersistentCacheDBTest : public DBTestBase {
+ public:
+  PersistentCacheDBTest();
+
+  static uint64_t TestGetTickerCount(const Options& options,
+                                     Tickers ticker_type) {
+    return static_cast<uint32_t>(
+        options.statistics->getTickerCount(ticker_type));
+  }
+
+  // insert data to table
+  void Insert(const Options& options,
+              const BlockBasedTableOptions& /*table_options*/,
+              const int num_iter, std::vector<std::string>* values) {
+    CreateAndReopenWithCF({"pikachu"}, options);
+    // default column family doesn't have block cache
+    Options no_block_cache_opts;
+    no_block_cache_opts.statistics = options.statistics;
+    no_block_cache_opts = CurrentOptions(no_block_cache_opts);
+    BlockBasedTableOptions table_options_no_bc;
+    table_options_no_bc.no_block_cache = true;
+    no_block_cache_opts.table_factory.reset(
+        NewBlockBasedTableFactory(table_options_no_bc));
+    ReopenWithColumnFamilies(
+        {"default", "pikachu"},
+        std::vector<Options>({no_block_cache_opts, options}));
+
+    Random rnd(301);
+
+    // Write 8MB (80 values, each 100K)
+    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
+    std::string str;
+    for (int i = 0; i < num_iter; i++) {
+      if (i % 4 == 0) {  // high compression ratio
+        str = rnd.RandomString(1000);
+      }
+      values->push_back(str);
+      ASSERT_OK(Put(1, Key(i), (*values)[i]));
+    }
+
+    // flush all data from memtable so that reads are from block cache
+    ASSERT_OK(Flush(1));
+  }
+
+  // verify data
+  void Verify(const int num_iter, const std::vector<std::string>& values) {
+    for (int j = 0; j < 2; ++j) {
+      for (int i = 0; i < num_iter; i++) {
+        ASSERT_EQ(Get(1, Key(i)), values[i]);
+      }
+    }
+  }
+
+  // test template
+  void RunTest(const std::function<std::shared_ptr<PersistentCacheTier>(bool)>&
+                   new_pcache,
+               const size_t max_keys, const size_t max_usecase);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_tier.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_tier.cc
new file mode 100644
index 0000000000..773aafbf26
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_tier.cc
@@ -0,0 +1,165 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include "utilities/persistent_cache/persistent_cache_tier.h"
+
+#include <cinttypes>
+#include <sstream>
+#include <string>
+
+namespace ROCKSDB_NAMESPACE {
+
+std::string PersistentCacheConfig::ToString() const {
+  std::string ret;
+  ret.reserve(20000);
+  const int kBufferSize = 200;
+  char buffer[kBufferSize];
+
+  snprintf(buffer, kBufferSize, "    path: %s\n", path.c_str());
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "    enable_direct_reads: %d\n",
+           enable_direct_reads);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "    enable_direct_writes: %d\n",
+           enable_direct_writes);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "    cache_size: %" PRIu64 "\n", cache_size);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "    cache_file_size: %" PRIu32 "\n",
+           cache_file_size);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "    writer_qdepth: %" PRIu32 "\n",
+           writer_qdepth);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "    pipeline_writes: %d\n", pipeline_writes);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize,
+           "    max_write_pipeline_backlog_size: %" PRIu64 "\n",
+           max_write_pipeline_backlog_size);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "    write_buffer_size: %" PRIu32 "\n",
+           write_buffer_size);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "    writer_dispatch_size: %" PRIu64 "\n",
+           writer_dispatch_size);
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "    is_compressed: %d\n", is_compressed);
+  ret.append(buffer);
+
+  return ret;
+}
+
+//
+// PersistentCacheTier implementation
+//
+Status PersistentCacheTier::Open() {
+  if (next_tier_) {
+    return next_tier_->Open();
+  }
+  return Status::OK();
+}
+
+Status PersistentCacheTier::Close() {
+  if (next_tier_) {
+    return next_tier_->Close();
+  }
+  return Status::OK();
+}
+
+bool PersistentCacheTier::Reserve(const size_t /*size*/) {
+  // default implementation is a pass through
+  return true;
+}
+
+bool PersistentCacheTier::Erase(const Slice& /*key*/) {
+  // default implementation is a pass through since not all cache tiers might
+  // support erase
+  return true;
+}
+
+std::string PersistentCacheTier::PrintStats() {
+  std::ostringstream os;
+  for (auto tier_stats : Stats()) {
+    os << "---- next tier -----" << std::endl;
+    for (auto stat : tier_stats) {
+      os << stat.first << ": " << stat.second << std::endl;
+    }
+  }
+  return os.str();
+}
+
+PersistentCache::StatsType PersistentCacheTier::Stats() {
+  if (next_tier_) {
+    return next_tier_->Stats();
+  }
+  return PersistentCache::StatsType{};
+}
+
+uint64_t PersistentCacheTier::NewId() {
+  return last_id_.fetch_add(1, std::memory_order_relaxed);
+}
+
+//
+// PersistentTieredCache implementation
+//
+PersistentTieredCache::~PersistentTieredCache() { assert(tiers_.empty()); }
+
+Status PersistentTieredCache::Open() {
+  assert(!tiers_.empty());
+  return tiers_.front()->Open();
+}
+
+Status PersistentTieredCache::Close() {
+  assert(!tiers_.empty());
+  Status status = tiers_.front()->Close();
+  if (status.ok()) {
+    tiers_.clear();
+  }
+  return status;
+}
+
+bool PersistentTieredCache::Erase(const Slice& key) {
+  assert(!tiers_.empty());
+  return tiers_.front()->Erase(key);
+}
+
+PersistentCache::StatsType PersistentTieredCache::Stats() {
+  assert(!tiers_.empty());
+  return tiers_.front()->Stats();
+}
+
+std::string PersistentTieredCache::PrintStats() {
+  assert(!tiers_.empty());
+  return tiers_.front()->PrintStats();
+}
+
+Status PersistentTieredCache::Insert(const Slice& page_key, const char* data,
+                                     const size_t size) {
+  assert(!tiers_.empty());
+  return tiers_.front()->Insert(page_key, data, size);
+}
+
+Status PersistentTieredCache::Lookup(const Slice& page_key,
+                                     std::unique_ptr<char[]>* data,
+                                     size_t* size) {
+  assert(!tiers_.empty());
+  return tiers_.front()->Lookup(page_key, data, size);
+}
+
+void PersistentTieredCache::AddTier(const Tier& tier) {
+  if (!tiers_.empty()) {
+    tiers_.back()->set_next_tier(tier);
+  }
+  tiers_.push_back(tier);
+}
+
+bool PersistentTieredCache::IsCompressed() {
+  assert(tiers_.size());
+  return tiers_.front()->IsCompressed();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_tier.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_tier.h
new file mode 100644
index 0000000000..44d2fbba31
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_tier.h
@@ -0,0 +1,340 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+
+#include <limits>
+#include <list>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "monitoring/histogram.h"
+#include "rocksdb/env.h"
+#include "rocksdb/persistent_cache.h"
+#include "rocksdb/status.h"
+#include "rocksdb/system_clock.h"
+
+// Persistent Cache
+//
+// Persistent cache is tiered key-value cache that can use persistent medium. It
+// is a generic design and can leverage any storage medium -- disk/SSD/NVM/RAM.
+// The code has been kept generic but significant benchmark/design/development
+// time has been spent to make sure the cache performs appropriately for
+// respective storage medium.
+// The file defines
+// PersistentCacheTier    : Implementation that handles individual cache tier
+// PersistentTieresCache  : Implementation that handles all tiers as a logical
+//                          unit
+//
+// PersistentTieredCache architecture:
+// +--------------------------+ PersistentCacheTier that handles multiple tiers
+// | +----------------+       |
+// | | RAM            | PersistentCacheTier that handles RAM (VolatileCacheImpl)
+// | +----------------+       |
+// |   | next                 |
+// |   v                      |
+// | +----------------+       |
+// | | NVM            | PersistentCacheTier implementation that handles NVM
+// | +----------------+ (BlockCacheImpl)
+// |   | next                 |
+// |   V                      |
+// | +----------------+       |
+// | | LE-SSD         | PersistentCacheTier implementation that handles LE-SSD
+// | +----------------+ (BlockCacheImpl)
+// |   |                      |
+// |   V                      |
+// |  null                    |
+// +--------------------------+
+//               |
+//               V
+//              null
+namespace ROCKSDB_NAMESPACE {
+
+// Persistent Cache Config
+//
+// This struct captures all the options that are used to configure persistent
+// cache. Some of the terminologies used in naming the options are
+//
+// dispatch size :
+// This is the size in which IO is dispatched to the device
+//
+// write buffer size :
+// This is the size of an individual write buffer size. Write buffers are
+// grouped to form buffered file.
+//
+// cache size :
+// This is the logical maximum for the cache size
+//
+// qdepth :
+// This is the max number of IOs that can issues to the device in parallel
+//
+// pepeling :
+// The writer code path follows pipelined architecture, which means the
+// operations are handed off from one stage to another
+//
+// pipelining backlog size :
+// With the pipelined architecture, there can always be backlogging of ops in
+// pipeline queues. This is the maximum backlog size after which ops are dropped
+// from queue
+struct PersistentCacheConfig {
+  explicit PersistentCacheConfig(
+      Env* const _env, const std::string& _path, const uint64_t _cache_size,
+      const std::shared_ptr<Logger>& _log,
+      const uint32_t _write_buffer_size = 1 * 1024 * 1024 /*1MB*/) {
+    env = _env;
+    clock = (env != nullptr) ? env->GetSystemClock().get()
+                             : SystemClock::Default().get();
+    path = _path;
+    log = _log;
+    cache_size = _cache_size;
+    writer_dispatch_size = write_buffer_size = _write_buffer_size;
+  }
+
+  //
+  // Validate the settings. Our intentions are to catch erroneous settings ahead
+  // of time instead going violating invariants or causing dead locks.
+  //
+  Status ValidateSettings() const {
+    // (1) check pre-conditions for variables
+    if (!env || path.empty()) {
+      return Status::InvalidArgument("empty or null args");
+    }
+
+    // (2) assert size related invariants
+    // - cache size cannot be less than cache file size
+    // - individual write buffer size cannot be greater than cache file size
+    // - total write buffer size cannot be less than 2X cache file size
+    if (cache_size < cache_file_size || write_buffer_size >= cache_file_size ||
+        write_buffer_size * write_buffer_count() < 2 * cache_file_size) {
+      return Status::InvalidArgument("invalid cache size");
+    }
+
+    // (2) check writer settings
+    // - Queue depth cannot be 0
+    // - writer_dispatch_size cannot be greater than writer_buffer_size
+    // - dispatch size and buffer size need to be aligned
+    if (!writer_qdepth || writer_dispatch_size > write_buffer_size ||
+        write_buffer_size % writer_dispatch_size) {
+      return Status::InvalidArgument("invalid writer settings");
+    }
+
+    return Status::OK();
+  }
+
+  //
+  // Env abstraction to use for system level operations
+  //
+  Env* env;
+  SystemClock* clock;
+  //
+  // Path for the block cache where blocks are persisted
+  //
+  std::string path;
+
+  //
+  // Log handle for logging messages
+  //
+  std::shared_ptr<Logger> log;
+
+  //
+  // Enable direct IO for reading
+  //
+  bool enable_direct_reads = true;
+
+  //
+  // Enable direct IO for writing
+  //
+  bool enable_direct_writes = false;
+
+  //
+  // Logical cache size
+  //
+  uint64_t cache_size = std::numeric_limits<uint64_t>::max();
+
+  // cache-file-size
+  //
+  // Cache consists of multiples of small files. This parameter defines the
+  // size of an individual cache file
+  //
+  // default: 1M
+  uint32_t cache_file_size = 100ULL * 1024 * 1024;
+
+  // writer-qdepth
+  //
+  // The writers can issues IO to the devices in parallel. This parameter
+  // controls the max number if IOs that can issues in parallel to the block
+  // device
+  //
+  // default :1
+  uint32_t writer_qdepth = 1;
+
+  // pipeline-writes
+  //
+  // The write optionally follow pipelined architecture. This helps
+  // avoid regression in the eviction code path of the primary tier. This
+  // parameter defines if pipelining is enabled or disabled
+  //
+  // default: true
+  bool pipeline_writes = true;
+
+  // max-write-pipeline-backlog-size
+  //
+  // Max pipeline buffer size. This is the maximum backlog we can accumulate
+  // while waiting for writes. After the limit, new ops will be dropped.
+  //
+  // Default: 1GiB
+  uint64_t max_write_pipeline_backlog_size = 1ULL * 1024 * 1024 * 1024;
+
+  // write-buffer-size
+  //
+  // This is the size in which buffer slabs are allocated.
+  //
+  // Default: 1M
+  uint32_t write_buffer_size = 1ULL * 1024 * 1024;
+
+  // write-buffer-count
+  //
+  // This is the total number of buffer slabs. This is calculated as a factor of
+  // file size in order to avoid dead lock.
+  size_t write_buffer_count() const {
+    assert(write_buffer_size);
+    return static_cast<size_t>((writer_qdepth + 1.2) * cache_file_size /
+                               write_buffer_size);
+  }
+
+  // writer-dispatch-size
+  //
+  // The writer thread will dispatch the IO at the specified IO size
+  //
+  // default: 1M
+  uint64_t writer_dispatch_size = 1ULL * 1024 * 1024;
+
+  // is_compressed
+  //
+  // This option determines if the cache will run in compressed mode or
+  // uncompressed mode
+  bool is_compressed = true;
+
+  PersistentCacheConfig MakePersistentCacheConfig(
+      const std::string& path, const uint64_t size,
+      const std::shared_ptr<Logger>& log);
+
+  std::string ToString() const;
+};
+
+// Persistent Cache Tier
+//
+// This a logical abstraction that defines a tier of the persistent cache. Tiers
+// can be stacked over one another. PersistentCahe provides the basic definition
+// for accessing/storing in the cache. PersistentCacheTier extends the interface
+// to enable management and stacking of tiers.
+class PersistentCacheTier : public PersistentCache {
+ public:
+  using Tier = std::shared_ptr<PersistentCacheTier>;
+
+  virtual ~PersistentCacheTier() {}
+
+  // Open the persistent cache tier
+  virtual Status Open();
+
+  // Close the persistent cache tier
+  virtual Status Close();
+
+  // Reserve space up to 'size' bytes
+  virtual bool Reserve(const size_t size);
+
+  // Erase a key from the cache
+  virtual bool Erase(const Slice& key);
+
+  // Print stats to string recursively
+  virtual std::string PrintStats();
+
+  virtual PersistentCache::StatsType Stats() override;
+
+  // Insert to page cache
+  virtual Status Insert(const Slice& page_key, const char* data,
+                        const size_t size) override = 0;
+
+  // Lookup page cache by page identifier
+  virtual Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data,
+                        size_t* size) override = 0;
+
+  // Does it store compressed data ?
+  virtual bool IsCompressed() override = 0;
+
+  virtual std::string GetPrintableOptions() const override = 0;
+
+  virtual uint64_t NewId() override;
+
+  // Return a reference to next tier
+  virtual Tier& next_tier() { return next_tier_; }
+
+  // Set the value for next tier
+  virtual void set_next_tier(const Tier& tier) {
+    assert(!next_tier_);
+    next_tier_ = tier;
+  }
+
+  virtual void TEST_Flush() {
+    if (next_tier_) {
+      next_tier_->TEST_Flush();
+    }
+  }
+
+ private:
+  Tier next_tier_;  // next tier
+  std::atomic<uint64_t> last_id_{1};
+};
+
+// PersistentTieredCache
+//
+// Abstraction that helps you construct a tiers of persistent caches as a
+// unified cache. The tier(s) of cache will act a single tier for management
+// ease and support PersistentCache methods for accessing data.
+class PersistentTieredCache : public PersistentCacheTier {
+ public:
+  virtual ~PersistentTieredCache();
+
+  Status Open() override;
+  Status Close() override;
+  bool Erase(const Slice& key) override;
+  std::string PrintStats() override;
+  PersistentCache::StatsType Stats() override;
+  Status Insert(const Slice& page_key, const char* data,
+                const size_t size) override;
+  Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data,
+                size_t* size) override;
+  bool IsCompressed() override;
+
+  std::string GetPrintableOptions() const override {
+    return "PersistentTieredCache";
+  }
+
+  void AddTier(const Tier& tier);
+
+  Tier& next_tier() override {
+    auto it = tiers_.end();
+    return (*it)->next_tier();
+  }
+
+  void set_next_tier(const Tier& tier) override {
+    auto it = tiers_.end();
+    (*it)->set_next_tier(tier);
+  }
+
+  void TEST_Flush() override {
+    assert(!tiers_.empty());
+    tiers_.front()->TEST_Flush();
+    PersistentCacheTier::TEST_Flush();
+  }
+
+ protected:
+  std::list<Tier> tiers_;  // list of tiers top-down
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_util.h
new file mode 100644
index 0000000000..2a769652db
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/persistent_cache_util.h
@@ -0,0 +1,67 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <limits>
+#include <list>
+
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+//
+// Simple synchronized queue implementation with the option of
+// bounding the queue
+//
+// On overflow, the elements will be discarded
+//
+template <class T>
+class BoundedQueue {
+ public:
+  explicit BoundedQueue(
+      const size_t max_size = std::numeric_limits<size_t>::max())
+      : cond_empty_(&lock_), max_size_(max_size) {}
+
+  virtual ~BoundedQueue() {}
+
+  void Push(T&& t) {
+    MutexLock _(&lock_);
+    if (max_size_ != std::numeric_limits<size_t>::max() &&
+        size_ + t.Size() >= max_size_) {
+      // overflow
+      return;
+    }
+
+    size_ += t.Size();
+    q_.push_back(std::move(t));
+    cond_empty_.SignalAll();
+  }
+
+  T Pop() {
+    MutexLock _(&lock_);
+    while (q_.empty()) {
+      cond_empty_.Wait();
+    }
+
+    T t = std::move(q_.front());
+    size_ -= t.Size();
+    q_.pop_front();
+    return t;
+  }
+
+  size_t Size() const {
+    MutexLock _(&lock_);
+    return size_;
+  }
+
+ private:
+  mutable port::Mutex lock_;
+  port::CondVar cond_empty_;
+  std::list<T> q_;
+  size_t size_ = 0;
+  const size_t max_size_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/volatile_tier_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/volatile_tier_impl.cc
new file mode 100644
index 0000000000..6264f5ef3a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/volatile_tier_impl.cc
@@ -0,0 +1,138 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include "utilities/persistent_cache/volatile_tier_impl.h"
+
+#include <string>
+
+namespace ROCKSDB_NAMESPACE {
+
+void VolatileCacheTier::DeleteCacheData(VolatileCacheTier::CacheData* data) {
+  assert(data);
+  delete data;
+}
+
+VolatileCacheTier::~VolatileCacheTier() { index_.Clear(&DeleteCacheData); }
+
+PersistentCache::StatsType VolatileCacheTier::Stats() {
+  std::map<std::string, double> stat;
+  stat.insert({"persistent_cache.volatile_cache.hits",
+               static_cast<double>(stats_.cache_hits_)});
+  stat.insert({"persistent_cache.volatile_cache.misses",
+               static_cast<double>(stats_.cache_misses_)});
+  stat.insert({"persistent_cache.volatile_cache.inserts",
+               static_cast<double>(stats_.cache_inserts_)});
+  stat.insert({"persistent_cache.volatile_cache.evicts",
+               static_cast<double>(stats_.cache_evicts_)});
+  stat.insert({"persistent_cache.volatile_cache.hit_pct",
+               static_cast<double>(stats_.CacheHitPct())});
+  stat.insert({"persistent_cache.volatile_cache.miss_pct",
+               static_cast<double>(stats_.CacheMissPct())});
+
+  auto out = PersistentCacheTier::Stats();
+  out.push_back(stat);
+  return out;
+}
+
+Status VolatileCacheTier::Insert(const Slice& page_key, const char* data,
+                                 const size_t size) {
+  // precondition
+  assert(data);
+  assert(size);
+
+  // increment the size
+  size_ += size;
+
+  // check if we have overshot the limit, if so evict some space
+  while (size_ > max_size_) {
+    if (!Evict()) {
+      // unable to evict data, we give up so we don't spike read
+      // latency
+      assert(size_ >= size);
+      size_ -= size;
+      return Status::TryAgain("Unable to evict any data");
+    }
+  }
+
+  assert(size_ >= size);
+
+  // insert order: LRU, followed by index
+  std::string key(page_key.data(), page_key.size());
+  std::string value(data, size);
+  std::unique_ptr<CacheData> cache_data(
+      new CacheData(std::move(key), std::move(value)));
+  bool ok = index_.Insert(cache_data.get());
+  if (!ok) {
+    // decrement the size that we incremented ahead of time
+    assert(size_ >= size);
+    size_ -= size;
+    // failed to insert to cache, block already in cache
+    return Status::TryAgain("key already exists in volatile cache");
+  }
+
+  cache_data.release();
+  stats_.cache_inserts_++;
+  return Status::OK();
+}
+
+Status VolatileCacheTier::Lookup(const Slice& page_key,
+                                 std::unique_ptr<char[]>* result,
+                                 size_t* size) {
+  CacheData key(std::move(page_key.ToString()));
+  CacheData* kv;
+  bool ok = index_.Find(&key, &kv);
+  if (ok) {
+    // set return data
+    result->reset(new char[kv->value.size()]);
+    memcpy(result->get(), kv->value.c_str(), kv->value.size());
+    *size = kv->value.size();
+    // drop the reference on cache data
+    kv->refs_--;
+    // update stats
+    stats_.cache_hits_++;
+    return Status::OK();
+  }
+
+  stats_.cache_misses_++;
+
+  if (next_tier()) {
+    return next_tier()->Lookup(page_key, result, size);
+  }
+
+  return Status::NotFound("key not found in volatile cache");
+}
+
+bool VolatileCacheTier::Erase(const Slice& /*key*/) {
+  assert(!"not supported");
+  return true;
+}
+
+bool VolatileCacheTier::Evict() {
+  CacheData* edata = index_.Evict();
+  if (!edata) {
+    // not able to evict any object
+    return false;
+  }
+
+  stats_.cache_evicts_++;
+
+  // push the evicted object to the next level
+  if (next_tier()) {
+    // TODO: Should the insert error be ignored?
+    Status s = next_tier()->Insert(Slice(edata->key), edata->value.c_str(),
+                                   edata->value.size());
+    s.PermitUncheckedError();
+  }
+
+  // adjust size and destroy data
+  size_ -= edata->value.size();
+  delete edata;
+
+  return true;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/volatile_tier_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/volatile_tier_impl.h
new file mode 100644
index 0000000000..f5d3064438
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/persistent_cache/volatile_tier_impl.h
@@ -0,0 +1,139 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+
+#include <atomic>
+#include <limits>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "rocksdb/cache.h"
+#include "utilities/persistent_cache/hash_table.h"
+#include "utilities/persistent_cache/hash_table_evictable.h"
+#include "utilities/persistent_cache/persistent_cache_tier.h"
+
+// VolatileCacheTier
+//
+// This file provides persistent cache tier implementation for caching
+// key/values in RAM.
+//
+//        key/values
+//           |
+//           V
+// +-------------------+
+// | VolatileCacheTier | Store in an evictable hash table
+// +-------------------+
+//           |
+//           V
+//       on eviction
+//   pushed to next tier
+//
+// The implementation is designed to be concurrent. The evictable hash table
+// implementation is not concurrent at this point though.
+//
+// The eviction algorithm is LRU
+namespace ROCKSDB_NAMESPACE {
+
+class VolatileCacheTier : public PersistentCacheTier {
+ public:
+  explicit VolatileCacheTier(
+      const bool is_compressed = true,
+      const size_t max_size = std::numeric_limits<size_t>::max())
+      : is_compressed_(is_compressed), max_size_(max_size) {}
+
+  virtual ~VolatileCacheTier();
+
+  // insert to cache
+  Status Insert(const Slice& page_key, const char* data,
+                const size_t size) override;
+  // lookup key in cache
+  Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data,
+                size_t* size) override;
+
+  // is compressed cache ?
+  bool IsCompressed() override { return is_compressed_; }
+
+  // erase key from cache
+  bool Erase(const Slice& key) override;
+
+  std::string GetPrintableOptions() const override {
+    return "VolatileCacheTier";
+  }
+
+  // Expose stats as map
+  PersistentCache::StatsType Stats() override;
+
+ private:
+  //
+  // Cache data abstraction
+  //
+  struct CacheData : LRUElement<CacheData> {
+    explicit CacheData(CacheData&& rhs) noexcept
+        : key(std::move(rhs.key)), value(std::move(rhs.value)) {}
+
+    explicit CacheData(const std::string& _key, const std::string& _value = "")
+        : key(_key), value(_value) {}
+
+    virtual ~CacheData() {}
+
+    const std::string key;
+    const std::string value;
+  };
+
+  static void DeleteCacheData(CacheData* data);
+
+  //
+  // Index and LRU definition
+  //
+  struct CacheDataHash {
+    uint64_t operator()(const CacheData* obj) const {
+      assert(obj);
+      return std::hash<std::string>()(obj->key);
+    }
+  };
+
+  struct CacheDataEqual {
+    bool operator()(const CacheData* lhs, const CacheData* rhs) const {
+      assert(lhs);
+      assert(rhs);
+      return lhs->key == rhs->key;
+    }
+  };
+
+  struct Statistics {
+    std::atomic<uint64_t> cache_misses_{0};
+    std::atomic<uint64_t> cache_hits_{0};
+    std::atomic<uint64_t> cache_inserts_{0};
+    std::atomic<uint64_t> cache_evicts_{0};
+
+    double CacheHitPct() const {
+      auto lookups = cache_hits_ + cache_misses_;
+      return lookups ? 100 * cache_hits_ / static_cast<double>(lookups) : 0.0;
+    }
+
+    double CacheMissPct() const {
+      auto lookups = cache_hits_ + cache_misses_;
+      return lookups ? 100 * cache_misses_ / static_cast<double>(lookups) : 0.0;
+    }
+  };
+
+  using IndexType =
+      EvictableHashTable<CacheData, CacheDataHash, CacheDataEqual>;
+
+  // Evict LRU tail
+  bool Evict();
+
+  const bool is_compressed_ = true;    // does it store compressed data
+  IndexType index_;                    // in-memory cache
+  std::atomic<uint64_t> max_size_{0};  // Maximum size of the cache
+  std::atomic<uint64_t> size_{0};      // Size of the cache
+  Statistics stats_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/simulator_cache/cache_simulator.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/simulator_cache/cache_simulator.cc
new file mode 100644
index 0000000000..edb75d545c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/simulator_cache/cache_simulator.cc
@@ -0,0 +1,287 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "utilities/simulator_cache/cache_simulator.h"
+
+#include <algorithm>
+
+#include "db/dbformat.h"
+#include "rocksdb/trace_record.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+const std::string kGhostCachePrefix = "ghost_";
+}  // namespace
+
+GhostCache::GhostCache(std::shared_ptr<Cache> sim_cache)
+    : sim_cache_(sim_cache) {}
+
+bool GhostCache::Admit(const Slice& lookup_key) {
+  auto handle = sim_cache_->Lookup(lookup_key);
+  if (handle != nullptr) {
+    sim_cache_->Release(handle);
+    return true;
+  }
+  // TODO: Should we check for errors here?
+  auto s = sim_cache_->Insert(lookup_key, /*obj=*/nullptr,
+                              &kNoopCacheItemHelper, lookup_key.size());
+  s.PermitUncheckedError();
+  return false;
+}
+
+CacheSimulator::CacheSimulator(std::unique_ptr<GhostCache>&& ghost_cache,
+                               std::shared_ptr<Cache> sim_cache)
+    : ghost_cache_(std::move(ghost_cache)), sim_cache_(sim_cache) {}
+
+void CacheSimulator::Access(const BlockCacheTraceRecord& access) {
+  bool admit = true;
+  const bool is_user_access =
+      BlockCacheTraceHelper::IsUserAccess(access.caller);
+  bool is_cache_miss = true;
+  if (ghost_cache_ && !access.no_insert) {
+    admit = ghost_cache_->Admit(access.block_key);
+  }
+  auto handle = sim_cache_->Lookup(access.block_key);
+  if (handle != nullptr) {
+    sim_cache_->Release(handle);
+    is_cache_miss = false;
+  } else {
+    if (!access.no_insert && admit && access.block_size > 0) {
+      // Ignore errors on insert
+      auto s = sim_cache_->Insert(access.block_key, /*obj=*/nullptr,
+                                  &kNoopCacheItemHelper, access.block_size);
+      s.PermitUncheckedError();
+    }
+  }
+  miss_ratio_stats_.UpdateMetrics(access.access_timestamp, is_user_access,
+                                  is_cache_miss);
+}
+
+void MissRatioStats::UpdateMetrics(uint64_t timestamp_in_ms,
+                                   bool is_user_access, bool is_cache_miss) {
+  uint64_t timestamp_in_seconds = timestamp_in_ms / kMicrosInSecond;
+  num_accesses_timeline_[timestamp_in_seconds] += 1;
+  num_accesses_ += 1;
+  if (num_misses_timeline_.find(timestamp_in_seconds) ==
+      num_misses_timeline_.end()) {
+    num_misses_timeline_[timestamp_in_seconds] = 0;
+  }
+  if (is_cache_miss) {
+    num_misses_ += 1;
+    num_misses_timeline_[timestamp_in_seconds] += 1;
+  }
+  if (is_user_access) {
+    user_accesses_ += 1;
+    if (is_cache_miss) {
+      user_misses_ += 1;
+    }
+  }
+}
+
+Cache::Priority PrioritizedCacheSimulator::ComputeBlockPriority(
+    const BlockCacheTraceRecord& access) const {
+  if (access.block_type == TraceType::kBlockTraceFilterBlock ||
+      access.block_type == TraceType::kBlockTraceIndexBlock ||
+      access.block_type == TraceType::kBlockTraceUncompressionDictBlock) {
+    return Cache::Priority::HIGH;
+  }
+  return Cache::Priority::LOW;
+}
+
+void PrioritizedCacheSimulator::AccessKVPair(
+    const Slice& key, uint64_t value_size, Cache::Priority priority,
+    const BlockCacheTraceRecord& access, bool no_insert, bool is_user_access,
+    bool* is_cache_miss, bool* admitted, bool update_metrics) {
+  assert(is_cache_miss);
+  assert(admitted);
+  *is_cache_miss = true;
+  *admitted = true;
+  if (ghost_cache_ && !no_insert) {
+    *admitted = ghost_cache_->Admit(key);
+  }
+  auto handle = sim_cache_->Lookup(key);
+  if (handle != nullptr) {
+    sim_cache_->Release(handle);
+    *is_cache_miss = false;
+  } else if (!no_insert && *admitted && value_size > 0) {
+    // TODO: Should we check for an error here?
+    auto s = sim_cache_->Insert(key, /*obj=*/nullptr, &kNoopCacheItemHelper,
+                                value_size,
+                                /*handle=*/nullptr, priority);
+    s.PermitUncheckedError();
+  }
+  if (update_metrics) {
+    miss_ratio_stats_.UpdateMetrics(access.access_timestamp, is_user_access,
+                                    *is_cache_miss);
+  }
+}
+
+void PrioritizedCacheSimulator::Access(const BlockCacheTraceRecord& access) {
+  bool is_cache_miss = true;
+  bool admitted = true;
+  AccessKVPair(access.block_key, access.block_size,
+               ComputeBlockPriority(access), access, access.no_insert,
+               BlockCacheTraceHelper::IsUserAccess(access.caller),
+               &is_cache_miss, &admitted, /*update_metrics=*/true);
+}
+
+void HybridRowBlockCacheSimulator::Access(const BlockCacheTraceRecord& access) {
+  // TODO (haoyu): We only support Get for now. We need to extend the tracing
+  // for MultiGet, i.e., non-data block accesses must log all keys in a
+  // MultiGet.
+  bool is_cache_miss = true;
+  bool admitted = false;
+  if (access.caller == TableReaderCaller::kUserGet &&
+      access.get_id != BlockCacheTraceHelper::kReservedGetId) {
+    // This is a Get request.
+    const std::string& row_key = BlockCacheTraceHelper::ComputeRowKey(access);
+    GetRequestStatus& status = getid_status_map_[access.get_id];
+    if (status.is_complete) {
+      // This Get request completes.
+      // Skip future accesses to its index/filter/data
+      // blocks. These block lookups are unnecessary if we observe a hit for the
+      // referenced key-value pair already. Thus, we treat these lookups as
+      // hits. This is also to ensure the total number of accesses are the same
+      // when comparing to other policies.
+      miss_ratio_stats_.UpdateMetrics(access.access_timestamp,
+                                      /*is_user_access=*/true,
+                                      /*is_cache_miss=*/false);
+      return;
+    }
+    if (status.row_key_status.find(row_key) == status.row_key_status.end()) {
+      // This is the first time that this key is accessed. Look up the key-value
+      // pair first. Do not update the miss/accesses metrics here since it will
+      // be updated later.
+      AccessKVPair(row_key, access.referenced_data_size, Cache::Priority::HIGH,
+                   access,
+                   /*no_insert=*/false,
+                   /*is_user_access=*/true, &is_cache_miss, &admitted,
+                   /*update_metrics=*/false);
+      InsertResult result = InsertResult::NO_INSERT;
+      if (admitted && access.referenced_data_size > 0) {
+        result = InsertResult::INSERTED;
+      } else if (admitted) {
+        result = InsertResult::ADMITTED;
+      }
+      status.row_key_status[row_key] = result;
+    }
+    if (!is_cache_miss) {
+      // A cache hit.
+      status.is_complete = true;
+      miss_ratio_stats_.UpdateMetrics(access.access_timestamp,
+                                      /*is_user_access=*/true,
+                                      /*is_cache_miss=*/false);
+      return;
+    }
+    // The row key-value pair observes a cache miss. We need to access its
+    // index/filter/data blocks.
+    InsertResult inserted = status.row_key_status[row_key];
+    AccessKVPair(
+        access.block_key, access.block_size, ComputeBlockPriority(access),
+        access,
+        /*no_insert=*/!insert_blocks_upon_row_kvpair_miss_ || access.no_insert,
+        /*is_user_access=*/true, &is_cache_miss, &admitted,
+        /*update_metrics=*/true);
+    if (access.referenced_data_size > 0 && inserted == InsertResult::ADMITTED) {
+      // TODO: Should we check for an error here?
+      auto s =
+          sim_cache_->Insert(row_key, /*obj=*/nullptr, &kNoopCacheItemHelper,
+                             access.referenced_data_size,
+                             /*handle=*/nullptr, Cache::Priority::HIGH);
+      s.PermitUncheckedError();
+      status.row_key_status[row_key] = InsertResult::INSERTED;
+    }
+    return;
+  }
+  AccessKVPair(access.block_key, access.block_size,
+               ComputeBlockPriority(access), access, access.no_insert,
+               BlockCacheTraceHelper::IsUserAccess(access.caller),
+               &is_cache_miss, &admitted, /*update_metrics=*/true);
+}
+
+BlockCacheTraceSimulator::BlockCacheTraceSimulator(
+    uint64_t warmup_seconds, uint32_t downsample_ratio,
+    const std::vector<CacheConfiguration>& cache_configurations)
+    : warmup_seconds_(warmup_seconds),
+      downsample_ratio_(downsample_ratio),
+      cache_configurations_(cache_configurations) {}
+
+Status BlockCacheTraceSimulator::InitializeCaches() {
+  for (auto const& config : cache_configurations_) {
+    for (auto cache_capacity : config.cache_capacities) {
+      // Scale down the cache capacity since the trace contains accesses on
+      // 1/'downsample_ratio' blocks.
+      uint64_t simulate_cache_capacity = cache_capacity / downsample_ratio_;
+      std::shared_ptr<CacheSimulator> sim_cache;
+      std::unique_ptr<GhostCache> ghost_cache;
+      std::string cache_name = config.cache_name;
+      if (cache_name.find(kGhostCachePrefix) != std::string::npos) {
+        ghost_cache.reset(new GhostCache(
+            NewLRUCache(config.ghost_cache_capacity, /*num_shard_bits=*/1,
+                        /*strict_capacity_limit=*/false,
+                        /*high_pri_pool_ratio=*/0)));
+        cache_name = cache_name.substr(kGhostCachePrefix.size());
+      }
+      if (cache_name == "lru") {
+        sim_cache = std::make_shared<CacheSimulator>(
+            std::move(ghost_cache),
+            NewLRUCache(simulate_cache_capacity, config.num_shard_bits,
+                        /*strict_capacity_limit=*/false,
+                        /*high_pri_pool_ratio=*/0));
+      } else if (cache_name == "lru_priority") {
+        sim_cache = std::make_shared<PrioritizedCacheSimulator>(
+            std::move(ghost_cache),
+            NewLRUCache(simulate_cache_capacity, config.num_shard_bits,
+                        /*strict_capacity_limit=*/false,
+                        /*high_pri_pool_ratio=*/0.5));
+      } else if (cache_name == "lru_hybrid") {
+        sim_cache = std::make_shared<HybridRowBlockCacheSimulator>(
+            std::move(ghost_cache),
+            NewLRUCache(simulate_cache_capacity, config.num_shard_bits,
+                        /*strict_capacity_limit=*/false,
+                        /*high_pri_pool_ratio=*/0.5),
+            /*insert_blocks_upon_row_kvpair_miss=*/true);
+      } else if (cache_name == "lru_hybrid_no_insert_on_row_miss") {
+        sim_cache = std::make_shared<HybridRowBlockCacheSimulator>(
+            std::move(ghost_cache),
+            NewLRUCache(simulate_cache_capacity, config.num_shard_bits,
+                        /*strict_capacity_limit=*/false,
+                        /*high_pri_pool_ratio=*/0.5),
+            /*insert_blocks_upon_row_kvpair_miss=*/false);
+      } else {
+        // Not supported.
+        return Status::InvalidArgument("Unknown cache name " +
+                                       config.cache_name);
+      }
+      sim_caches_[config].push_back(sim_cache);
+    }
+  }
+  return Status::OK();
+}
+
+void BlockCacheTraceSimulator::Access(const BlockCacheTraceRecord& access) {
+  if (trace_start_time_ == 0) {
+    trace_start_time_ = access.access_timestamp;
+  }
+  // access.access_timestamp is in microseconds.
+  if (!warmup_complete_ &&
+      trace_start_time_ + warmup_seconds_ * kMicrosInSecond <=
+          access.access_timestamp) {
+    for (auto& config_caches : sim_caches_) {
+      for (auto& sim_cache : config_caches.second) {
+        sim_cache->reset_counter();
+      }
+    }
+    warmup_complete_ = true;
+  }
+  for (auto& config_caches : sim_caches_) {
+    for (auto& sim_cache : config_caches.second) {
+      sim_cache->Access(access);
+    }
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/simulator_cache/cache_simulator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/simulator_cache/cache_simulator.h
new file mode 100644
index 0000000000..6d49790131
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/simulator_cache/cache_simulator.h
@@ -0,0 +1,231 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <unordered_map>
+
+#include "cache/lru_cache.h"
+#include "trace_replay/block_cache_tracer.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A cache configuration provided by user.
+struct CacheConfiguration {
+  std::string cache_name;  // LRU.
+  uint32_t num_shard_bits;
+  uint64_t ghost_cache_capacity;  // ghost cache capacity in bytes.
+  std::vector<uint64_t>
+      cache_capacities;  // simulate cache capacities in bytes.
+
+  bool operator==(const CacheConfiguration& o) const {
+    return cache_name == o.cache_name && num_shard_bits == o.num_shard_bits &&
+           ghost_cache_capacity == o.ghost_cache_capacity;
+  }
+  bool operator<(const CacheConfiguration& o) const {
+    return cache_name < o.cache_name ||
+           (cache_name == o.cache_name && num_shard_bits < o.num_shard_bits) ||
+           (cache_name == o.cache_name && num_shard_bits == o.num_shard_bits &&
+            ghost_cache_capacity < o.ghost_cache_capacity);
+  }
+};
+
+class MissRatioStats {
+ public:
+  void reset_counter() {
+    num_misses_ = 0;
+    num_accesses_ = 0;
+    user_accesses_ = 0;
+    user_misses_ = 0;
+  }
+  double miss_ratio() const {
+    if (num_accesses_ == 0) {
+      return -1;
+    }
+    return static_cast<double>(num_misses_ * 100.0 / num_accesses_);
+  }
+  uint64_t total_accesses() const { return num_accesses_; }
+  uint64_t total_misses() const { return num_misses_; }
+
+  const std::map<uint64_t, uint64_t>& num_accesses_timeline() const {
+    return num_accesses_timeline_;
+  }
+
+  const std::map<uint64_t, uint64_t>& num_misses_timeline() const {
+    return num_misses_timeline_;
+  }
+
+  double user_miss_ratio() const {
+    if (user_accesses_ == 0) {
+      return -1;
+    }
+    return static_cast<double>(user_misses_ * 100.0 / user_accesses_);
+  }
+  uint64_t user_accesses() const { return user_accesses_; }
+  uint64_t user_misses() const { return user_misses_; }
+
+  void UpdateMetrics(uint64_t timestamp_in_ms, bool is_user_access,
+                     bool is_cache_miss);
+
+ private:
+  uint64_t num_accesses_ = 0;
+  uint64_t num_misses_ = 0;
+  uint64_t user_accesses_ = 0;
+  uint64_t user_misses_ = 0;
+
+  std::map<uint64_t, uint64_t> num_accesses_timeline_;
+  std::map<uint64_t, uint64_t> num_misses_timeline_;
+};
+
+// A ghost cache admits an entry on its second access.
+class GhostCache {
+ public:
+  explicit GhostCache(std::shared_ptr<Cache> sim_cache);
+  ~GhostCache() = default;
+  // No copy and move.
+  GhostCache(const GhostCache&) = delete;
+  GhostCache& operator=(const GhostCache&) = delete;
+  GhostCache(GhostCache&&) = delete;
+  GhostCache& operator=(GhostCache&&) = delete;
+
+  // Returns true if the lookup_key is in the ghost cache.
+  // Returns false otherwise.
+  bool Admit(const Slice& lookup_key);
+
+ private:
+  std::shared_ptr<Cache> sim_cache_;
+};
+
+// A cache simulator that runs against a block cache trace.
+class CacheSimulator {
+ public:
+  CacheSimulator(std::unique_ptr<GhostCache>&& ghost_cache,
+                 std::shared_ptr<Cache> sim_cache);
+  virtual ~CacheSimulator() = default;
+  // No copy and move.
+  CacheSimulator(const CacheSimulator&) = delete;
+  CacheSimulator& operator=(const CacheSimulator&) = delete;
+  CacheSimulator(CacheSimulator&&) = delete;
+  CacheSimulator& operator=(CacheSimulator&&) = delete;
+
+  virtual void Access(const BlockCacheTraceRecord& access);
+
+  void reset_counter() { miss_ratio_stats_.reset_counter(); }
+
+  const MissRatioStats& miss_ratio_stats() const { return miss_ratio_stats_; }
+
+ protected:
+  MissRatioStats miss_ratio_stats_;
+  std::unique_ptr<GhostCache> ghost_cache_;
+  std::shared_ptr<Cache> sim_cache_;
+};
+
+// A prioritized cache simulator that runs against a block cache trace.
+// It inserts missing index/filter/uncompression-dictionary blocks with high
+// priority in the cache.
+class PrioritizedCacheSimulator : public CacheSimulator {
+ public:
+  PrioritizedCacheSimulator(std::unique_ptr<GhostCache>&& ghost_cache,
+                            std::shared_ptr<Cache> sim_cache)
+      : CacheSimulator(std::move(ghost_cache), sim_cache) {}
+  void Access(const BlockCacheTraceRecord& access) override;
+
+ protected:
+  // Access the key-value pair and returns true upon a cache miss.
+  void AccessKVPair(const Slice& key, uint64_t value_size,
+                    Cache::Priority priority,
+                    const BlockCacheTraceRecord& access, bool no_insert,
+                    bool is_user_access, bool* is_cache_miss, bool* admitted,
+                    bool update_metrics);
+
+  Cache::Priority ComputeBlockPriority(
+      const BlockCacheTraceRecord& access) const;
+};
+
+// A hybrid row and block cache simulator. It looks up/inserts key-value pairs
+// referenced by Get/MultiGet requests, and not their accessed index/filter/data
+// blocks.
+//
+// Upon a Get/MultiGet request, it looks up the referenced key first.
+// If it observes a cache hit, future block accesses on this key-value pair is
+// skipped since the request is served already. Otherwise, it continues to look
+// up/insert its index/filter/data blocks. It also inserts the referenced
+// key-value pair in the cache for future lookups.
+class HybridRowBlockCacheSimulator : public PrioritizedCacheSimulator {
+ public:
+  HybridRowBlockCacheSimulator(std::unique_ptr<GhostCache>&& ghost_cache,
+                               std::shared_ptr<Cache> sim_cache,
+                               bool insert_blocks_upon_row_kvpair_miss)
+      : PrioritizedCacheSimulator(std::move(ghost_cache), sim_cache),
+        insert_blocks_upon_row_kvpair_miss_(
+            insert_blocks_upon_row_kvpair_miss) {}
+  void Access(const BlockCacheTraceRecord& access) override;
+
+ private:
+  enum InsertResult : char {
+    INSERTED,
+    ADMITTED,
+    NO_INSERT,
+  };
+
+  // We set is_complete to true when the referenced row-key of a get request
+  // hits the cache. If is_complete is true, we treat future accesses of this
+  // get request as hits.
+  //
+  // For each row key, it stores an enum. It is INSERTED when the
+  // kv-pair has been inserted into the cache, ADMITTED if it should be inserted
+  // but haven't been, NO_INSERT if it should not be inserted.
+  //
+  // A kv-pair is in ADMITTED state when we encounter this kv-pair but do not
+  // know its size. This may happen if the first access on the referenced key is
+  // an index/filter block.
+  struct GetRequestStatus {
+    bool is_complete = false;
+    std::map<std::string, InsertResult> row_key_status;
+  };
+
+  // A map stores get_id to a map of row keys.
+  std::map<uint64_t, GetRequestStatus> getid_status_map_;
+  bool insert_blocks_upon_row_kvpair_miss_;
+};
+
+// A block cache simulator that reports miss ratio curves given a set of cache
+// configurations.
+class BlockCacheTraceSimulator {
+ public:
+  // warmup_seconds: The number of seconds to warmup simulated caches. The
+  // hit/miss counters are reset after the warmup completes.
+  BlockCacheTraceSimulator(
+      uint64_t warmup_seconds, uint32_t downsample_ratio,
+      const std::vector<CacheConfiguration>& cache_configurations);
+  ~BlockCacheTraceSimulator() = default;
+  // No copy and move.
+  BlockCacheTraceSimulator(const BlockCacheTraceSimulator&) = delete;
+  BlockCacheTraceSimulator& operator=(const BlockCacheTraceSimulator&) = delete;
+  BlockCacheTraceSimulator(BlockCacheTraceSimulator&&) = delete;
+  BlockCacheTraceSimulator& operator=(BlockCacheTraceSimulator&&) = delete;
+
+  Status InitializeCaches();
+
+  void Access(const BlockCacheTraceRecord& access);
+
+  const std::map<CacheConfiguration,
+                 std::vector<std::shared_ptr<CacheSimulator>>>&
+  sim_caches() const {
+    return sim_caches_;
+  }
+
+ private:
+  const uint64_t warmup_seconds_;
+  const uint32_t downsample_ratio_;
+  const std::vector<CacheConfiguration> cache_configurations_;
+
+  bool warmup_complete_ = false;
+  std::map<CacheConfiguration, std::vector<std::shared_ptr<CacheSimulator>>>
+      sim_caches_;
+  uint64_t trace_start_time_ = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/simulator_cache/sim_cache.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/simulator_cache/sim_cache.cc
new file mode 100644
index 0000000000..d58c3b34f1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/simulator_cache/sim_cache.cc
@@ -0,0 +1,372 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/utilities/sim_cache.h"
+
+#include <atomic>
+#include <iomanip>
+
+#include "file/writable_file_writer.h"
+#include "monitoring/statistics_impl.h"
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+class CacheActivityLogger {
+ public:
+  CacheActivityLogger()
+      : activity_logging_enabled_(false), max_logging_size_(0) {}
+
+  ~CacheActivityLogger() {
+    MutexLock l(&mutex_);
+
+    StopLoggingInternal();
+    bg_status_.PermitUncheckedError();
+  }
+
+  Status StartLogging(const std::string& activity_log_file, Env* env,
+                      uint64_t max_logging_size = 0) {
+    assert(activity_log_file != "");
+    assert(env != nullptr);
+
+    Status status;
+    FileOptions file_opts;
+
+    MutexLock l(&mutex_);
+
+    // Stop existing logging if any
+    StopLoggingInternal();
+
+    // Open log file
+    status = WritableFileWriter::Create(env->GetFileSystem(), activity_log_file,
+                                        file_opts, &file_writer_, nullptr);
+    if (!status.ok()) {
+      return status;
+    }
+
+    max_logging_size_ = max_logging_size;
+    activity_logging_enabled_.store(true);
+
+    return status;
+  }
+
+  void StopLogging() {
+    MutexLock l(&mutex_);
+
+    StopLoggingInternal();
+  }
+
+  void ReportLookup(const Slice& key) {
+    if (activity_logging_enabled_.load() == false) {
+      return;
+    }
+
+    std::ostringstream oss;
+    // line format: "LOOKUP - <KEY>"
+    oss << "LOOKUP - " << key.ToString(true) << std::endl;
+
+    MutexLock l(&mutex_);
+    Status s = file_writer_->Append(oss.str());
+    if (!s.ok() && bg_status_.ok()) {
+      bg_status_ = s;
+    }
+    if (MaxLoggingSizeReached() || !bg_status_.ok()) {
+      // Stop logging if we have reached the max file size or
+      // encountered an error
+      StopLoggingInternal();
+    }
+  }
+
+  void ReportAdd(const Slice& key, size_t size) {
+    if (activity_logging_enabled_.load() == false) {
+      return;
+    }
+
+    std::ostringstream oss;
+    // line format: "ADD - <KEY> - <KEY-SIZE>"
+    oss << "ADD - " << key.ToString(true) << " - " << size << std::endl;
+    MutexLock l(&mutex_);
+    Status s = file_writer_->Append(oss.str());
+    if (!s.ok() && bg_status_.ok()) {
+      bg_status_ = s;
+    }
+
+    if (MaxLoggingSizeReached() || !bg_status_.ok()) {
+      // Stop logging if we have reached the max file size or
+      // encountered an error
+      StopLoggingInternal();
+    }
+  }
+
+  Status& bg_status() {
+    MutexLock l(&mutex_);
+    return bg_status_;
+  }
+
+ private:
+  bool MaxLoggingSizeReached() {
+    mutex_.AssertHeld();
+
+    return (max_logging_size_ > 0 &&
+            file_writer_->GetFileSize() >= max_logging_size_);
+  }
+
+  void StopLoggingInternal() {
+    mutex_.AssertHeld();
+
+    if (!activity_logging_enabled_) {
+      return;
+    }
+
+    activity_logging_enabled_.store(false);
+    Status s = file_writer_->Close();
+    if (!s.ok() && bg_status_.ok()) {
+      bg_status_ = s;
+    }
+  }
+
+  // Mutex to sync writes to file_writer, and all following
+  // class data members
+  port::Mutex mutex_;
+  // Indicates if logging is currently enabled
+  // atomic to allow reads without mutex
+  std::atomic<bool> activity_logging_enabled_;
+  // When reached, we will stop logging and close the file
+  // Value of 0 means unlimited
+  uint64_t max_logging_size_;
+  std::unique_ptr<WritableFileWriter> file_writer_;
+  Status bg_status_;
+};
+
+// SimCacheImpl definition
+class SimCacheImpl : public SimCache {
+ public:
+  // capacity for real cache (ShardedLRUCache)
+  // test_capacity for key only cache
+  SimCacheImpl(std::shared_ptr<Cache> sim_cache, std::shared_ptr<Cache> cache)
+      : SimCache(cache),
+        key_only_cache_(sim_cache),
+        miss_times_(0),
+        hit_times_(0),
+        stats_(nullptr) {}
+
+  ~SimCacheImpl() override {}
+
+  const char* Name() const override { return "SimCache"; }
+
+  void SetCapacity(size_t capacity) override { target_->SetCapacity(capacity); }
+
+  void SetStrictCapacityLimit(bool strict_capacity_limit) override {
+    target_->SetStrictCapacityLimit(strict_capacity_limit);
+  }
+
+  Status Insert(const Slice& key, Cache::ObjectPtr value,
+                const CacheItemHelper* helper, size_t charge, Handle** handle,
+                Priority priority) override {
+    // The handle and value passed in are for real cache, so we pass nullptr
+    // to key_only_cache_ for both instead. Also, the deleter function pointer
+    // will be called by user to perform some external operation which should
+    // be applied only once. Thus key_only_cache accepts an empty function.
+    // *Lambda function without capture can be assgined to a function pointer
+    Handle* h = key_only_cache_->Lookup(key);
+    if (h == nullptr) {
+      // TODO: Check for error here?
+      auto s = key_only_cache_->Insert(key, nullptr, &kNoopCacheItemHelper,
+                                       charge, nullptr, priority);
+      s.PermitUncheckedError();
+    } else {
+      key_only_cache_->Release(h);
+    }
+
+    cache_activity_logger_.ReportAdd(key, charge);
+    if (!target_) {
+      return Status::OK();
+    }
+    return target_->Insert(key, value, helper, charge, handle, priority);
+  }
+
+  Handle* Lookup(const Slice& key, const CacheItemHelper* helper,
+                 CreateContext* create_context,
+                 Priority priority = Priority::LOW,
+                 Statistics* stats = nullptr) override {
+    HandleLookup(key, stats);
+    if (!target_) {
+      return nullptr;
+    }
+    return target_->Lookup(key, helper, create_context, priority, stats);
+  }
+
+  void StartAsyncLookup(AsyncLookupHandle& async_handle) override {
+    HandleLookup(async_handle.key, async_handle.stats);
+    if (target_) {
+      target_->StartAsyncLookup(async_handle);
+    }
+  }
+
+  bool Ref(Handle* handle) override { return target_->Ref(handle); }
+
+  using Cache::Release;
+  bool Release(Handle* handle, bool erase_if_last_ref = false) override {
+    return target_->Release(handle, erase_if_last_ref);
+  }
+
+  void Erase(const Slice& key) override {
+    target_->Erase(key);
+    key_only_cache_->Erase(key);
+  }
+
+  Cache::ObjectPtr Value(Handle* handle) override {
+    return target_->Value(handle);
+  }
+
+  uint64_t NewId() override { return target_->NewId(); }
+
+  size_t GetCapacity() const override { return target_->GetCapacity(); }
+
+  bool HasStrictCapacityLimit() const override {
+    return target_->HasStrictCapacityLimit();
+  }
+
+  size_t GetUsage() const override { return target_->GetUsage(); }
+
+  size_t GetUsage(Handle* handle) const override {
+    return target_->GetUsage(handle);
+  }
+
+  size_t GetCharge(Handle* handle) const override {
+    return target_->GetCharge(handle);
+  }
+
+  const CacheItemHelper* GetCacheItemHelper(Handle* handle) const override {
+    return target_->GetCacheItemHelper(handle);
+  }
+
+  size_t GetPinnedUsage() const override { return target_->GetPinnedUsage(); }
+
+  void DisownData() override {
+    target_->DisownData();
+    key_only_cache_->DisownData();
+  }
+
+  void ApplyToAllEntries(
+      const std::function<void(const Slice& key, ObjectPtr value, size_t charge,
+                               const CacheItemHelper* helper)>& callback,
+      const ApplyToAllEntriesOptions& opts) override {
+    target_->ApplyToAllEntries(callback, opts);
+  }
+
+  void EraseUnRefEntries() override {
+    target_->EraseUnRefEntries();
+    key_only_cache_->EraseUnRefEntries();
+  }
+
+  size_t GetSimCapacity() const override {
+    return key_only_cache_->GetCapacity();
+  }
+  size_t GetSimUsage() const override { return key_only_cache_->GetUsage(); }
+  void SetSimCapacity(size_t capacity) override {
+    key_only_cache_->SetCapacity(capacity);
+  }
+
+  uint64_t get_miss_counter() const override {
+    return miss_times_.load(std::memory_order_relaxed);
+  }
+
+  uint64_t get_hit_counter() const override {
+    return hit_times_.load(std::memory_order_relaxed);
+  }
+
+  void reset_counter() override {
+    miss_times_.store(0, std::memory_order_relaxed);
+    hit_times_.store(0, std::memory_order_relaxed);
+    SetTickerCount(stats_, SIM_BLOCK_CACHE_HIT, 0);
+    SetTickerCount(stats_, SIM_BLOCK_CACHE_MISS, 0);
+  }
+
+  std::string ToString() const override {
+    std::ostringstream oss;
+    oss << "SimCache MISSes:  " << get_miss_counter() << std::endl;
+    oss << "SimCache HITs:    " << get_hit_counter() << std::endl;
+    auto lookups = get_miss_counter() + get_hit_counter();
+    oss << "SimCache HITRATE: " << std::fixed << std::setprecision(2)
+        << (lookups == 0 ? 0 : get_hit_counter() * 100.0f / lookups)
+        << std::endl;
+    return oss.str();
+  }
+
+  std::string GetPrintableOptions() const override {
+    std::ostringstream oss;
+    oss << "    cache_options:" << std::endl;
+    oss << target_->GetPrintableOptions();
+    oss << "    sim_cache_options:" << std::endl;
+    oss << key_only_cache_->GetPrintableOptions();
+    return oss.str();
+  }
+
+  Status StartActivityLogging(const std::string& activity_log_file, Env* env,
+                              uint64_t max_logging_size = 0) override {
+    return cache_activity_logger_.StartLogging(activity_log_file, env,
+                                               max_logging_size);
+  }
+
+  void StopActivityLogging() override { cache_activity_logger_.StopLogging(); }
+
+  Status GetActivityLoggingStatus() override {
+    return cache_activity_logger_.bg_status();
+  }
+
+ private:
+  std::shared_ptr<Cache> key_only_cache_;
+  std::atomic<uint64_t> miss_times_;
+  std::atomic<uint64_t> hit_times_;
+  Statistics* stats_;
+  CacheActivityLogger cache_activity_logger_;
+
+  void inc_miss_counter() {
+    miss_times_.fetch_add(1, std::memory_order_relaxed);
+  }
+  void inc_hit_counter() { hit_times_.fetch_add(1, std::memory_order_relaxed); }
+
+  void HandleLookup(const Slice& key, Statistics* stats) {
+    Handle* h = key_only_cache_->Lookup(key);
+    if (h != nullptr) {
+      key_only_cache_->Release(h);
+      inc_hit_counter();
+      RecordTick(stats, SIM_BLOCK_CACHE_HIT);
+    } else {
+      inc_miss_counter();
+      RecordTick(stats, SIM_BLOCK_CACHE_MISS);
+    }
+    cache_activity_logger_.ReportLookup(key);
+  }
+};
+
+}  // end anonymous namespace
+
+// For instrumentation purpose, use NewSimCache instead
+std::shared_ptr<SimCache> NewSimCache(std::shared_ptr<Cache> cache,
+                                      size_t sim_capacity, int num_shard_bits) {
+  LRUCacheOptions co;
+  co.capacity = sim_capacity;
+  co.num_shard_bits = num_shard_bits;
+  co.metadata_charge_policy = kDontChargeCacheMetadata;
+  return NewSimCache(NewLRUCache(co), cache, num_shard_bits);
+}
+
+std::shared_ptr<SimCache> NewSimCache(std::shared_ptr<Cache> sim_cache,
+                                      std::shared_ptr<Cache> cache,
+                                      int num_shard_bits) {
+  if (num_shard_bits >= 20) {
+    return nullptr;  // the cache cannot be sharded into too many fine pieces
+  }
+  return std::make_shared<SimCacheImpl>(sim_cache, cache);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/table_properties_collectors/compact_on_deletion_collector.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/table_properties_collectors/compact_on_deletion_collector.cc
new file mode 100644
index 0000000000..a3b5189da0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/table_properties_collectors/compact_on_deletion_collector.cc
@@ -0,0 +1,221 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "utilities/table_properties_collectors/compact_on_deletion_collector.h"
+
+#include <memory>
+#include <sstream>
+
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "rocksdb/utilities/options_type.h"
+#include "rocksdb/utilities/table_properties_collectors.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+CompactOnDeletionCollector::CompactOnDeletionCollector(
+    size_t sliding_window_size, size_t deletion_trigger, double deletion_ratio)
+    : bucket_size_((sliding_window_size + kNumBuckets - 1) / kNumBuckets),
+      current_bucket_(0),
+      num_keys_in_current_bucket_(0),
+      num_deletions_in_observation_window_(0),
+      deletion_trigger_(deletion_trigger),
+      deletion_ratio_(deletion_ratio),
+      deletion_ratio_enabled_(deletion_ratio > 0 && deletion_ratio <= 1),
+      need_compaction_(false),
+      finished_(false) {
+  memset(num_deletions_in_buckets_, 0, sizeof(size_t) * kNumBuckets);
+}
+
+// AddUserKey() will be called when a new key/value pair is inserted into the
+// table.
+// @params key    the user key that is inserted into the table.
+// @params value  the value that is inserted into the table.
+// @params file_size  file size up to now
+Status CompactOnDeletionCollector::AddUserKey(const Slice& /*key*/,
+                                              const Slice& /*value*/,
+                                              EntryType type,
+                                              SequenceNumber /*seq*/,
+                                              uint64_t /*file_size*/) {
+  assert(!finished_);
+  if (!bucket_size_ && !deletion_ratio_enabled_) {
+    // This collector is effectively disabled
+    return Status::OK();
+  }
+
+  if (need_compaction_) {
+    // If the output file already needs to be compacted, skip the check.
+    return Status::OK();
+  }
+
+  if (deletion_ratio_enabled_) {
+    total_entries_++;
+    if (type == kEntryDelete) {
+      deletion_entries_++;
+    }
+  }
+
+  if (bucket_size_) {
+    if (num_keys_in_current_bucket_ == bucket_size_) {
+      // When the current bucket is full, advance the cursor of the
+      // ring buffer to the next bucket.
+      current_bucket_ = (current_bucket_ + 1) % kNumBuckets;
+
+      // Update the current count of observed deletion keys by excluding
+      // the number of deletion keys in the oldest bucket in the
+      // observation window.
+      assert(num_deletions_in_observation_window_ >=
+             num_deletions_in_buckets_[current_bucket_]);
+      num_deletions_in_observation_window_ -=
+          num_deletions_in_buckets_[current_bucket_];
+      num_deletions_in_buckets_[current_bucket_] = 0;
+      num_keys_in_current_bucket_ = 0;
+    }
+
+    num_keys_in_current_bucket_++;
+    if (type == kEntryDelete) {
+      num_deletions_in_observation_window_++;
+      num_deletions_in_buckets_[current_bucket_]++;
+      if (num_deletions_in_observation_window_ >= deletion_trigger_) {
+        need_compaction_ = true;
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
+Status CompactOnDeletionCollector::Finish(
+    UserCollectedProperties* /*properties*/) {
+  if (!need_compaction_ && deletion_ratio_enabled_ && total_entries_ > 0) {
+    double ratio = static_cast<double>(deletion_entries_) / total_entries_;
+    need_compaction_ = ratio >= deletion_ratio_;
+  }
+  finished_ = true;
+  return Status::OK();
+}
+static std::unordered_map<std::string, OptionTypeInfo>
+    on_deletion_collector_type_info = {
+        {"window_size",
+         {0, OptionType::kUnknown, OptionVerificationType::kNormal,
+          OptionTypeFlags::kCompareNever | OptionTypeFlags::kMutable,
+          [](const ConfigOptions&, const std::string&, const std::string& value,
+             void* addr) {
+            auto* factory =
+                static_cast<CompactOnDeletionCollectorFactory*>(addr);
+            factory->SetWindowSize(ParseSizeT(value));
+            return Status::OK();
+          },
+          [](const ConfigOptions&, const std::string&, const void* addr,
+             std::string* value) {
+            const auto* factory =
+                static_cast<const CompactOnDeletionCollectorFactory*>(addr);
+            *value = std::to_string(factory->GetWindowSize());
+            return Status::OK();
+          },
+          nullptr}},
+        {"deletion_trigger",
+         {0, OptionType::kUnknown, OptionVerificationType::kNormal,
+          OptionTypeFlags::kCompareNever | OptionTypeFlags::kMutable,
+          [](const ConfigOptions&, const std::string&, const std::string& value,
+             void* addr) {
+            auto* factory =
+                static_cast<CompactOnDeletionCollectorFactory*>(addr);
+            factory->SetDeletionTrigger(ParseSizeT(value));
+            return Status::OK();
+          },
+          [](const ConfigOptions&, const std::string&, const void* addr,
+             std::string* value) {
+            const auto* factory =
+                static_cast<const CompactOnDeletionCollectorFactory*>(addr);
+            *value = std::to_string(factory->GetDeletionTrigger());
+            return Status::OK();
+          },
+          nullptr}},
+        {"deletion_ratio",
+         {0, OptionType::kUnknown, OptionVerificationType::kNormal,
+          OptionTypeFlags::kCompareNever | OptionTypeFlags::kMutable,
+          [](const ConfigOptions&, const std::string&, const std::string& value,
+             void* addr) {
+            auto* factory =
+                static_cast<CompactOnDeletionCollectorFactory*>(addr);
+            factory->SetDeletionRatio(ParseDouble(value));
+            return Status::OK();
+          },
+          [](const ConfigOptions&, const std::string&, const void* addr,
+             std::string* value) {
+            const auto* factory =
+                static_cast<const CompactOnDeletionCollectorFactory*>(addr);
+            *value = std::to_string(factory->GetDeletionRatio());
+            return Status::OK();
+          },
+          nullptr}},
+
+};
+
+CompactOnDeletionCollectorFactory::CompactOnDeletionCollectorFactory(
+    size_t sliding_window_size, size_t deletion_trigger, double deletion_ratio)
+    : sliding_window_size_(sliding_window_size),
+      deletion_trigger_(deletion_trigger),
+      deletion_ratio_(deletion_ratio) {
+  RegisterOptions("", this, &on_deletion_collector_type_info);
+}
+
+TablePropertiesCollector*
+CompactOnDeletionCollectorFactory::CreateTablePropertiesCollector(
+    TablePropertiesCollectorFactory::Context /*context*/) {
+  return new CompactOnDeletionCollector(sliding_window_size_.load(),
+                                        deletion_trigger_.load(),
+                                        deletion_ratio_.load());
+}
+
+std::string CompactOnDeletionCollectorFactory::ToString() const {
+  std::ostringstream cfg;
+  cfg << Name() << " (Sliding window size = " << sliding_window_size_.load()
+      << " Deletion trigger = " << deletion_trigger_.load()
+      << " Deletion ratio = " << deletion_ratio_.load() << ')';
+  return cfg.str();
+}
+
+std::shared_ptr<CompactOnDeletionCollectorFactory>
+NewCompactOnDeletionCollectorFactory(size_t sliding_window_size,
+                                     size_t deletion_trigger,
+                                     double deletion_ratio) {
+  return std::shared_ptr<CompactOnDeletionCollectorFactory>(
+      new CompactOnDeletionCollectorFactory(sliding_window_size,
+                                            deletion_trigger, deletion_ratio));
+}
+namespace {
+static int RegisterTablePropertiesCollectorFactories(
+    ObjectLibrary& library, const std::string& /*arg*/) {
+  library.AddFactory<TablePropertiesCollectorFactory>(
+      CompactOnDeletionCollectorFactory::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<TablePropertiesCollectorFactory>* guard,
+         std::string* /* errmsg */) {
+        // By default, create a CompactionOnDeletionCollector that is disabled.
+        // Users will need to provide configuration parameters or call the
+        // corresponding Setter to enable the factory.
+        guard->reset(new CompactOnDeletionCollectorFactory(0, 0, 0));
+        return guard->get();
+      });
+  return 1;
+}
+}  // namespace
+
+Status TablePropertiesCollectorFactory::CreateFromString(
+    const ConfigOptions& options, const std::string& value,
+    std::shared_ptr<TablePropertiesCollectorFactory>* result) {
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    RegisterTablePropertiesCollectorFactories(*(ObjectLibrary::Default().get()),
+                                              "");
+  });
+  return LoadSharedObject<TablePropertiesCollectorFactory>(options, value,
+                                                           result);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/table_properties_collectors/compact_on_deletion_collector.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/table_properties_collectors/compact_on_deletion_collector.h
new file mode 100644
index 0000000000..c267463a02
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/table_properties_collectors/compact_on_deletion_collector.h
@@ -0,0 +1,68 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/utilities/table_properties_collectors.h"
+namespace ROCKSDB_NAMESPACE {
+
+class CompactOnDeletionCollector : public TablePropertiesCollector {
+ public:
+  CompactOnDeletionCollector(size_t sliding_window_size,
+                             size_t deletion_trigger, double deletion_raatio);
+
+  // AddUserKey() will be called when a new key/value pair is inserted into the
+  // table.
+  // @params key    the user key that is inserted into the table.
+  // @params value  the value that is inserted into the table.
+  // @params file_size  file size up to now
+  virtual Status AddUserKey(const Slice& key, const Slice& value,
+                            EntryType type, SequenceNumber seq,
+                            uint64_t file_size) override;
+
+  // Finish() will be called when a table has already been built and is ready
+  // for writing the properties block.
+  // @params properties  User will add their collected statistics to
+  // `properties`.
+  virtual Status Finish(UserCollectedProperties* /*properties*/) override;
+
+  // Return the human-readable properties, where the key is property name and
+  // the value is the human-readable form of value.
+  virtual UserCollectedProperties GetReadableProperties() const override {
+    return UserCollectedProperties();
+  }
+
+  // The name of the properties collector can be used for debugging purpose.
+  virtual const char* Name() const override {
+    return "CompactOnDeletionCollector";
+  }
+
+  // EXPERIMENTAL Return whether the output file should be further compacted
+  virtual bool NeedCompact() const override { return need_compaction_; }
+
+  static const int kNumBuckets = 128;
+
+ private:
+  void Reset();
+
+  // A ring buffer that used to count the number of deletion entries for every
+  // "bucket_size_" keys.
+  size_t num_deletions_in_buckets_[kNumBuckets];
+  // the number of keys in a bucket
+  size_t bucket_size_;
+
+  size_t current_bucket_;
+  size_t num_keys_in_current_bucket_;
+  size_t num_deletions_in_observation_window_;
+  size_t deletion_trigger_;
+  const double deletion_ratio_;
+  const bool deletion_ratio_enabled_;
+  size_t total_entries_ = 0;
+  size_t deletion_entries_ = 0;
+  // true if the current SST file needs to be compacted.
+  bool need_compaction_;
+  bool finished_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/file_trace_reader_writer.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/file_trace_reader_writer.cc
new file mode 100644
index 0000000000..5886d3539f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/file_trace_reader_writer.cc
@@ -0,0 +1,133 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "utilities/trace/file_trace_reader_writer.h"
+
+#include "env/composite_env_wrapper.h"
+#include "file/random_access_file_reader.h"
+#include "file/writable_file_writer.h"
+#include "trace_replay/trace_replay.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+const unsigned int FileTraceReader::kBufferSize = 1024;  // 1KB
+
+FileTraceReader::FileTraceReader(
+    std::unique_ptr<RandomAccessFileReader>&& reader)
+    : file_reader_(std::move(reader)),
+      offset_(0),
+      buffer_(new char[kBufferSize]) {}
+
+FileTraceReader::~FileTraceReader() {
+  Close().PermitUncheckedError();
+  delete[] buffer_;
+}
+
+Status FileTraceReader::Close() {
+  file_reader_.reset();
+  return Status::OK();
+}
+
+Status FileTraceReader::Reset() {
+  if (file_reader_ == nullptr) {
+    return Status::IOError("TraceReader is closed.");
+  }
+  offset_ = 0;
+  return Status::OK();
+}
+
+Status FileTraceReader::Read(std::string* data) {
+  assert(file_reader_ != nullptr);
+  Status s = file_reader_->Read(IOOptions(), offset_, kTraceMetadataSize,
+                                &result_, buffer_, nullptr,
+                                Env::IO_TOTAL /* rate_limiter_priority */);
+  if (!s.ok()) {
+    return s;
+  }
+  if (result_.size() == 0) {
+    // No more data to read
+    // Todo: Come up with a better way to indicate end of data. May be this
+    // could be avoided once footer is introduced.
+    return Status::Incomplete();
+  }
+  if (result_.size() < kTraceMetadataSize) {
+    return Status::Corruption("Corrupted trace file.");
+  }
+  *data = result_.ToString();
+  offset_ += kTraceMetadataSize;
+
+  uint32_t payload_len =
+      DecodeFixed32(&buffer_[kTraceTimestampSize + kTraceTypeSize]);
+
+  // Read Payload
+  unsigned int bytes_to_read = payload_len;
+  unsigned int to_read =
+      bytes_to_read > kBufferSize ? kBufferSize : bytes_to_read;
+  while (to_read > 0) {
+    s = file_reader_->Read(IOOptions(), offset_, to_read, &result_, buffer_,
+                           nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
+    if (!s.ok()) {
+      return s;
+    }
+    if (result_.size() < to_read) {
+      return Status::Corruption("Corrupted trace file.");
+    }
+    data->append(result_.data(), result_.size());
+
+    offset_ += to_read;
+    bytes_to_read -= to_read;
+    to_read = bytes_to_read > kBufferSize ? kBufferSize : bytes_to_read;
+  }
+
+  return s;
+}
+
+FileTraceWriter::FileTraceWriter(
+    std::unique_ptr<WritableFileWriter>&& file_writer)
+    : file_writer_(std::move(file_writer)) {}
+
+FileTraceWriter::~FileTraceWriter() { Close().PermitUncheckedError(); }
+
+Status FileTraceWriter::Close() {
+  file_writer_.reset();
+  return Status::OK();
+}
+
+Status FileTraceWriter::Write(const Slice& data) {
+  return file_writer_->Append(data);
+}
+
+uint64_t FileTraceWriter::GetFileSize() { return file_writer_->GetFileSize(); }
+
+Status NewFileTraceReader(Env* env, const EnvOptions& env_options,
+                          const std::string& trace_filename,
+                          std::unique_ptr<TraceReader>* trace_reader) {
+  std::unique_ptr<RandomAccessFileReader> file_reader;
+  Status s = RandomAccessFileReader::Create(
+      env->GetFileSystem(), trace_filename, FileOptions(env_options),
+      &file_reader, nullptr);
+  if (!s.ok()) {
+    return s;
+  }
+  trace_reader->reset(new FileTraceReader(std::move(file_reader)));
+  return s;
+}
+
+Status NewFileTraceWriter(Env* env, const EnvOptions& env_options,
+                          const std::string& trace_filename,
+                          std::unique_ptr<TraceWriter>* trace_writer) {
+  std::unique_ptr<WritableFileWriter> file_writer;
+  Status s = WritableFileWriter::Create(env->GetFileSystem(), trace_filename,
+                                        FileOptions(env_options), &file_writer,
+                                        nullptr);
+  if (!s.ok()) {
+    return s;
+  }
+  trace_writer->reset(new FileTraceWriter(std::move(file_writer)));
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/file_trace_reader_writer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/file_trace_reader_writer.h
new file mode 100644
index 0000000000..65d4831083
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/file_trace_reader_writer.h
@@ -0,0 +1,48 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/trace_reader_writer.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class RandomAccessFileReader;
+class WritableFileWriter;
+
+// FileTraceReader allows reading RocksDB traces from a file.
+class FileTraceReader : public TraceReader {
+ public:
+  explicit FileTraceReader(std::unique_ptr<RandomAccessFileReader>&& reader);
+  ~FileTraceReader();
+
+  virtual Status Read(std::string* data) override;
+  virtual Status Close() override;
+  virtual Status Reset() override;
+
+ private:
+  std::unique_ptr<RandomAccessFileReader> file_reader_;
+  Slice result_;
+  size_t offset_;
+  char* const buffer_;
+
+  static const unsigned int kBufferSize;
+};
+
+// FileTraceWriter allows writing RocksDB traces to a file.
+class FileTraceWriter : public TraceWriter {
+ public:
+  explicit FileTraceWriter(std::unique_ptr<WritableFileWriter>&& file_writer);
+  ~FileTraceWriter();
+
+  virtual Status Write(const Slice& data) override;
+  virtual Status Close() override;
+  virtual uint64_t GetFileSize() override;
+
+ private:
+  std::unique_ptr<WritableFileWriter> file_writer_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/replayer_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/replayer_impl.cc
new file mode 100644
index 0000000000..32a5ad7f05
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/replayer_impl.cc
@@ -0,0 +1,314 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/trace/replayer_impl.h"
+
+#include <cmath>
+#include <thread>
+
+#include "rocksdb/options.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/system_clock.h"
+#include "util/threadpool_imp.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+ReplayerImpl::ReplayerImpl(DB* db,
+                           const std::vector<ColumnFamilyHandle*>& handles,
+                           std::unique_ptr<TraceReader>&& reader)
+    : Replayer(),
+      trace_reader_(std::move(reader)),
+      prepared_(false),
+      trace_end_(false),
+      header_ts_(0),
+      exec_handler_(TraceRecord::NewExecutionHandler(db, handles)),
+      env_(db->GetEnv()),
+      trace_file_version_(-1) {}
+
+ReplayerImpl::~ReplayerImpl() {
+  exec_handler_.reset();
+  trace_reader_.reset();
+}
+
+Status ReplayerImpl::Prepare() {
+  Trace header;
+  int db_version;
+  Status s = ReadHeader(&header);
+  if (!s.ok()) {
+    return s;
+  }
+  s = TracerHelper::ParseTraceHeader(header, &trace_file_version_, &db_version);
+  if (!s.ok()) {
+    return s;
+  }
+  header_ts_ = header.ts;
+  prepared_ = true;
+  trace_end_ = false;
+  return Status::OK();
+}
+
+Status ReplayerImpl::Next(std::unique_ptr<TraceRecord>* record) {
+  if (!prepared_) {
+    return Status::Incomplete("Not prepared!");
+  }
+  if (trace_end_) {
+    return Status::Incomplete("Trace end.");
+  }
+
+  Trace trace;
+  Status s = ReadTrace(&trace);  // ReadTrace is atomic
+  // Reached the trace end.
+  if (s.ok() && trace.type == kTraceEnd) {
+    trace_end_ = true;
+    return Status::Incomplete("Trace end.");
+  }
+  if (!s.ok() || record == nullptr) {
+    return s;
+  }
+
+  return TracerHelper::DecodeTraceRecord(&trace, trace_file_version_, record);
+}
+
+Status ReplayerImpl::Execute(const std::unique_ptr<TraceRecord>& record,
+                             std::unique_ptr<TraceRecordResult>* result) {
+  return record->Accept(exec_handler_.get(), result);
+}
+
+Status ReplayerImpl::Replay(
+    const ReplayOptions& options,
+    const std::function<void(Status, std::unique_ptr<TraceRecordResult>&&)>&
+        result_callback) {
+  if (options.fast_forward <= 0.0) {
+    return Status::InvalidArgument("Wrong fast forward speed!");
+  }
+
+  if (!prepared_) {
+    return Status::Incomplete("Not prepared!");
+  }
+  if (trace_end_) {
+    return Status::Incomplete("Trace end.");
+  }
+
+  Status s = Status::OK();
+
+  if (options.num_threads <= 1) {
+    // num_threads == 0 or num_threads == 1 uses single thread.
+    std::chrono::system_clock::time_point replay_epoch =
+        std::chrono::system_clock::now();
+
+    while (s.ok()) {
+      Trace trace;
+      s = ReadTrace(&trace);
+      // If already at trace end, ReadTrace should return Status::Incomplete().
+      if (!s.ok()) {
+        break;
+      }
+
+      // No need to sleep before breaking the loop if at the trace end.
+      if (trace.type == kTraceEnd) {
+        trace_end_ = true;
+        s = Status::Incomplete("Trace end.");
+        break;
+      }
+
+      // In single-threaded replay, decode first then sleep.
+      std::unique_ptr<TraceRecord> record;
+      s = TracerHelper::DecodeTraceRecord(&trace, trace_file_version_, &record);
+      if (!s.ok() && !s.IsNotSupported()) {
+        break;
+      }
+
+      std::chrono::system_clock::time_point sleep_to =
+          replay_epoch +
+          std::chrono::microseconds(static_cast<uint64_t>(std::llround(
+              1.0 * (trace.ts - header_ts_) / options.fast_forward)));
+      if (sleep_to > std::chrono::system_clock::now()) {
+        std::this_thread::sleep_until(sleep_to);
+      }
+
+      // Skip unsupported traces, stop for other errors.
+      if (s.IsNotSupported()) {
+        if (result_callback != nullptr) {
+          result_callback(s, nullptr);
+        }
+        s = Status::OK();
+        continue;
+      }
+
+      if (result_callback == nullptr) {
+        s = Execute(record, nullptr);
+      } else {
+        std::unique_ptr<TraceRecordResult> res;
+        s = Execute(record, &res);
+        result_callback(s, std::move(res));
+      }
+    }
+  } else {
+    // Multi-threaded replay.
+    ThreadPoolImpl thread_pool;
+    thread_pool.SetHostEnv(env_);
+    thread_pool.SetBackgroundThreads(static_cast<int>(options.num_threads));
+
+    std::mutex mtx;
+    // Background decoding and execution status.
+    Status bg_s = Status::OK();
+    uint64_t last_err_ts = static_cast<uint64_t>(-1);
+    // Callback function used in background work to update bg_s for the ealiest
+    // TraceRecord which has execution error. This is different from the
+    // timestamp of the first execution error (either start or end timestamp).
+    //
+    // Suppose TraceRecord R1, R2, with timestamps T1 < T2. Their execution
+    // timestamps are T1_start, T1_end, T2_start, T2_end.
+    // Single-thread: there must be T1_start < T1_end < T2_start < T2_end.
+    // Multi-thread: T1_start < T2_start may not be enforced. Orders of them are
+    // totally unknown.
+    // In order to report the same `first` error in both single-thread and
+    // multi-thread replay, we can only rely on the TraceRecords' timestamps,
+    // rather than their executin timestamps. Although in single-thread replay,
+    // the first error is also the last error, while in multi-thread replay, the
+    // first error may not be the first error in execution, and it may not be
+    // the last error in exeution as well.
+    auto error_cb = [&mtx, &bg_s, &last_err_ts](Status err, uint64_t err_ts) {
+      std::lock_guard<std::mutex> gd(mtx);
+      // Only record the first error.
+      if (!err.ok() && !err.IsNotSupported() && err_ts < last_err_ts) {
+        bg_s = err;
+        last_err_ts = err_ts;
+      }
+    };
+
+    std::chrono::system_clock::time_point replay_epoch =
+        std::chrono::system_clock::now();
+
+    while (bg_s.ok() && s.ok()) {
+      Trace trace;
+      s = ReadTrace(&trace);
+      // If already at trace end, ReadTrace should return Status::Incomplete().
+      if (!s.ok()) {
+        break;
+      }
+
+      TraceType trace_type = trace.type;
+
+      // No need to sleep before breaking the loop if at the trace end.
+      if (trace_type == kTraceEnd) {
+        trace_end_ = true;
+        s = Status::Incomplete("Trace end.");
+        break;
+      }
+
+      // In multi-threaded replay, sleep first then start decoding and
+      // execution in a thread.
+      std::chrono::system_clock::time_point sleep_to =
+          replay_epoch +
+          std::chrono::microseconds(static_cast<uint64_t>(std::llround(
+              1.0 * (trace.ts - header_ts_) / options.fast_forward)));
+      if (sleep_to > std::chrono::system_clock::now()) {
+        std::this_thread::sleep_until(sleep_to);
+      }
+
+      if (trace_type == kTraceWrite || trace_type == kTraceGet ||
+          trace_type == kTraceIteratorSeek ||
+          trace_type == kTraceIteratorSeekForPrev ||
+          trace_type == kTraceMultiGet) {
+        std::unique_ptr<ReplayerWorkerArg> ra(new ReplayerWorkerArg);
+        ra->trace_entry = std::move(trace);
+        ra->handler = exec_handler_.get();
+        ra->trace_file_version = trace_file_version_;
+        ra->error_cb = error_cb;
+        ra->result_cb = result_callback;
+        thread_pool.Schedule(&ReplayerImpl::BackgroundWork, ra.release(),
+                             nullptr, nullptr);
+      } else {
+        // Skip unsupported traces.
+        if (result_callback != nullptr) {
+          result_callback(Status::NotSupported("Unsupported trace type."),
+                          nullptr);
+        }
+      }
+    }
+
+    thread_pool.WaitForJobsAndJoinAllThreads();
+    if (!bg_s.ok()) {
+      s = bg_s;
+    }
+  }
+
+  if (s.IsIncomplete()) {
+    // Reaching eof returns Incomplete status at the moment.
+    // Could happen when killing a process without calling EndTrace() API.
+    // TODO: Add better error handling.
+    trace_end_ = true;
+    return Status::OK();
+  }
+  return s;
+}
+
+uint64_t ReplayerImpl::GetHeaderTimestamp() const { return header_ts_; }
+
+Status ReplayerImpl::ReadHeader(Trace* header) {
+  assert(header != nullptr);
+  Status s = trace_reader_->Reset();
+  if (!s.ok()) {
+    return s;
+  }
+  std::string encoded_trace;
+  // Read the trace head
+  s = trace_reader_->Read(&encoded_trace);
+  if (!s.ok()) {
+    return s;
+  }
+
+  return TracerHelper::DecodeHeader(encoded_trace, header);
+}
+
+Status ReplayerImpl::ReadTrace(Trace* trace) {
+  assert(trace != nullptr);
+  std::string encoded_trace;
+  // We don't know if TraceReader is implemented thread-safe, so we protect the
+  // reading trace part with a mutex. The decoding part does not need to be
+  // protected since it's local.
+  {
+    std::lock_guard<std::mutex> guard(mutex_);
+    Status s = trace_reader_->Read(&encoded_trace);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  return TracerHelper::DecodeTrace(encoded_trace, trace);
+}
+
+void ReplayerImpl::BackgroundWork(void* arg) {
+  std::unique_ptr<ReplayerWorkerArg> ra(
+      reinterpret_cast<ReplayerWorkerArg*>(arg));
+  assert(ra != nullptr);
+
+  std::unique_ptr<TraceRecord> record;
+  Status s = TracerHelper::DecodeTraceRecord(&(ra->trace_entry),
+                                             ra->trace_file_version, &record);
+  if (!s.ok()) {
+    // Stop the replay
+    if (ra->error_cb != nullptr) {
+      ra->error_cb(s, ra->trace_entry.ts);
+    }
+    // Report the result
+    if (ra->result_cb != nullptr) {
+      ra->result_cb(s, nullptr);
+    }
+    return;
+  }
+
+  if (ra->result_cb == nullptr) {
+    s = record->Accept(ra->handler, nullptr);
+  } else {
+    std::unique_ptr<TraceRecordResult> res;
+    s = record->Accept(ra->handler, &res);
+    ra->result_cb(s, std::move(res));
+  }
+  record.reset();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/replayer_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/replayer_impl.h
new file mode 100644
index 0000000000..f484ab237f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/trace/replayer_impl.h
@@ -0,0 +1,84 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/status.h"
+#include "rocksdb/trace_reader_writer.h"
+#include "rocksdb/trace_record.h"
+#include "rocksdb/trace_record_result.h"
+#include "rocksdb/utilities/replayer.h"
+#include "trace_replay/trace_replay.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class ReplayerImpl : public Replayer {
+ public:
+  ReplayerImpl(DB* db, const std::vector<ColumnFamilyHandle*>& handles,
+               std::unique_ptr<TraceReader>&& reader);
+  ~ReplayerImpl() override;
+
+  using Replayer::Prepare;
+  Status Prepare() override;
+
+  using Replayer::Next;
+  Status Next(std::unique_ptr<TraceRecord>* record) override;
+
+  using Replayer::Execute;
+  Status Execute(const std::unique_ptr<TraceRecord>& record,
+                 std::unique_ptr<TraceRecordResult>* result) override;
+
+  using Replayer::Replay;
+  Status Replay(
+      const ReplayOptions& options,
+      const std::function<void(Status, std::unique_ptr<TraceRecordResult>&&)>&
+          result_callback) override;
+
+  using Replayer::GetHeaderTimestamp;
+  uint64_t GetHeaderTimestamp() const override;
+
+ private:
+  Status ReadHeader(Trace* header);
+  Status ReadTrace(Trace* trace);
+
+  // Generic function to execute a Trace in a thread pool.
+  static void BackgroundWork(void* arg);
+
+  std::unique_ptr<TraceReader> trace_reader_;
+  std::mutex mutex_;
+  std::atomic<bool> prepared_;
+  std::atomic<bool> trace_end_;
+  uint64_t header_ts_;
+  std::unique_ptr<TraceRecord::Handler> exec_handler_;
+  Env* env_;
+  // When reading the trace header, the trace file version can be parsed.
+  // Replayer will use different decode method to get the trace content based
+  // on different trace file version.
+  int trace_file_version_;
+};
+
+// Arguments passed to BackgroundWork() for replaying in a thread pool.
+struct ReplayerWorkerArg {
+  Trace trace_entry;
+  int trace_file_version;
+  // Handler to execute TraceRecord.
+  TraceRecord::Handler* handler;
+  // Callback function to report the error status and the timestamp of the
+  // TraceRecord (not the start/end timestamp of executing the TraceRecord).
+  std::function<void(Status, uint64_t)> error_cb;
+  // Callback function to report the trace execution status and operation
+  // execution status/result(s).
+  std::function<void(Status, std::unique_ptr<TraceRecordResult>&&)> result_cb;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/lock_manager.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/lock_manager.cc
new file mode 100644
index 0000000000..f0bef953b6
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/lock_manager.cc
@@ -0,0 +1,27 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/transactions/lock/lock_manager.h"
+
+#include "utilities/transactions/lock/point/point_lock_manager.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+std::shared_ptr<LockManager> NewLockManager(PessimisticTransactionDB* db,
+                                            const TransactionDBOptions& opt) {
+  assert(db);
+  if (opt.lock_mgr_handle) {
+    // A custom lock manager was provided in options
+    auto mgr = opt.lock_mgr_handle->getLockManager();
+    return std::shared_ptr<LockManager>(opt.lock_mgr_handle, mgr);
+  } else {
+    // Use a point lock manager by default
+    return std::shared_ptr<LockManager>(new PointLockManager(db, opt));
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/lock_manager.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/lock_manager.h
new file mode 100644
index 0000000000..d90ed9b007
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/lock_manager.h
@@ -0,0 +1,80 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/types.h"
+#include "rocksdb/utilities/transaction.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "utilities/transactions/lock/lock_tracker.h"
+#include "utilities/transactions/pessimistic_transaction.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class PessimisticTransactionDB;
+
+class LockManager {
+ public:
+  virtual ~LockManager() {}
+
+  // Whether supports locking a specific key.
+  virtual bool IsPointLockSupported() const = 0;
+
+  // Whether supports locking a range of keys.
+  virtual bool IsRangeLockSupported() const = 0;
+
+  // Locks acquired through this LockManager should be tracked by
+  // the LockTrackers created through the returned factory.
+  virtual const LockTrackerFactory& GetLockTrackerFactory() const = 0;
+
+  // Enable locking for the specified column family.
+  // Caller should guarantee that this column family is not already enabled.
+  virtual void AddColumnFamily(const ColumnFamilyHandle* cf) = 0;
+
+  // Disable locking for the specified column family.
+  // Caller should guarantee that this column family is no longer used.
+  virtual void RemoveColumnFamily(const ColumnFamilyHandle* cf) = 0;
+
+  // Attempt to lock a key or a key range.  If OK status is returned, the caller
+  // is responsible for calling UnLock() on this key.
+  virtual Status TryLock(PessimisticTransaction* txn,
+                         ColumnFamilyId column_family_id,
+                         const std::string& key, Env* env, bool exclusive) = 0;
+  // The range [start, end] are inclusive at both sides.
+  virtual Status TryLock(PessimisticTransaction* txn,
+                         ColumnFamilyId column_family_id, const Endpoint& start,
+                         const Endpoint& end, Env* env, bool exclusive) = 0;
+
+  // Unlock a key or a range locked by TryLock().  txn must be the same
+  // Transaction that locked this key.
+  virtual void UnLock(PessimisticTransaction* txn, const LockTracker& tracker,
+                      Env* env) = 0;
+  virtual void UnLock(PessimisticTransaction* txn,
+                      ColumnFamilyId column_family_id, const std::string& key,
+                      Env* env) = 0;
+  virtual void UnLock(PessimisticTransaction* txn,
+                      ColumnFamilyId column_family_id, const Endpoint& start,
+                      const Endpoint& end, Env* env) = 0;
+
+  using PointLockStatus = std::unordered_multimap<ColumnFamilyId, KeyLockInfo>;
+  virtual PointLockStatus GetPointLockStatus() = 0;
+
+  using RangeLockStatus =
+      std::unordered_multimap<ColumnFamilyId, RangeLockInfo>;
+  virtual RangeLockStatus GetRangeLockStatus() = 0;
+
+  virtual std::vector<DeadlockPath> GetDeadlockInfoBuffer() = 0;
+
+  virtual void Resize(uint32_t new_size) = 0;
+};
+
+// LockManager should always be constructed through this factory method,
+// instead of constructing through concrete implementations' constructor.
+// Caller owns the returned pointer.
+std::shared_ptr<LockManager> NewLockManager(PessimisticTransactionDB* db,
+                                            const TransactionDBOptions& opt);
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/lock_tracker.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/lock_tracker.h
new file mode 100644
index 0000000000..fe09ab22c5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/lock_tracker.h
@@ -0,0 +1,207 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <memory>
+
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+#include "rocksdb/utilities/transaction_db.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Request for locking a single key.
+struct PointLockRequest {
+  // The id of the key's column family.
+  ColumnFamilyId column_family_id = 0;
+  // The key to lock.
+  std::string key;
+  // The sequence number from which there is no concurrent update to key.
+  SequenceNumber seq = 0;
+  // Whether the lock is acquired only for read.
+  bool read_only = false;
+  // Whether the lock is in exclusive mode.
+  bool exclusive = true;
+};
+
+// Request for locking a range of keys.
+struct RangeLockRequest {
+  // The id of the key's column family.
+  ColumnFamilyId column_family_id;
+
+  // The range to be locked
+  Endpoint start_endp;
+  Endpoint end_endp;
+};
+
+struct PointLockStatus {
+  // Whether the key is locked.
+  bool locked = false;
+  // Whether the key is locked in exclusive mode.
+  bool exclusive = true;
+  // The sequence number in the tracked PointLockRequest.
+  SequenceNumber seq = 0;
+};
+
+// Return status when calling LockTracker::Untrack.
+enum class UntrackStatus {
+  // The lock is not tracked at all, so no lock to untrack.
+  NOT_TRACKED,
+  // The lock is untracked but not removed from the tracker.
+  UNTRACKED,
+  // The lock is removed from the tracker.
+  REMOVED,
+};
+
+// Tracks the lock requests.
+// In PessimisticTransaction, it tracks the locks acquired through LockMgr;
+// In OptimisticTransaction, since there is no LockMgr, it tracks the lock
+// intention. Not thread-safe.
+class LockTracker {
+ public:
+  virtual ~LockTracker() {}
+
+  // Whether supports locking a specific key.
+  virtual bool IsPointLockSupported() const = 0;
+
+  // Whether supports locking a range of keys.
+  virtual bool IsRangeLockSupported() const = 0;
+
+  // Tracks the acquirement of a lock on key.
+  //
+  // If this method is not supported, leave it as a no-op.
+  virtual void Track(const PointLockRequest& /*lock_request*/) = 0;
+
+  // Untracks the lock on a key.
+  // seq and exclusive in lock_request are not used.
+  //
+  // If this method is not supported, leave it as a no-op and
+  // returns NOT_TRACKED.
+  virtual UntrackStatus Untrack(const PointLockRequest& /*lock_request*/) = 0;
+
+  // Counterpart of Track(const PointLockRequest&) for RangeLockRequest.
+  virtual void Track(const RangeLockRequest& /*lock_request*/) = 0;
+
+  // Counterpart of Untrack(const PointLockRequest&) for RangeLockRequest.
+  virtual UntrackStatus Untrack(const RangeLockRequest& /*lock_request*/) = 0;
+
+  // Merges lock requests tracked in the specified tracker into the current
+  // tracker.
+  //
+  // E.g. for point lock, if a key in tracker is not yet tracked,
+  // track this new key; otherwise, merge the tracked information of the key
+  // such as lock's exclusiveness, read/write statistics.
+  //
+  // If this method is not supported, leave it as a no-op.
+  //
+  // REQUIRED: the specified tracker must be of the same concrete class type as
+  // the current tracker.
+  virtual void Merge(const LockTracker& /*tracker*/) = 0;
+
+  // This is a reverse operation of Merge.
+  //
+  // E.g. for point lock, if a key exists in both current and the sepcified
+  // tracker, then subtract the information (such as read/write statistics) of
+  // the key in the specified tracker from the current tracker.
+  //
+  // If this method is not supported, leave it as a no-op.
+  //
+  // REQUIRED:
+  // The specified tracker must be of the same concrete class type as
+  // the current tracker.
+  // The tracked locks in the specified tracker must be a subset of those
+  // tracked by the current tracker.
+  virtual void Subtract(const LockTracker& /*tracker*/) = 0;
+
+  // Clears all tracked locks.
+  virtual void Clear() = 0;
+
+  // Gets the new locks (excluding the locks that have been tracked before the
+  // save point) tracked since the specified save point, the result is stored
+  // in an internally constructed LockTracker and returned.
+  //
+  // save_point_tracker is the tracker used by a SavePoint to track locks
+  // tracked after creating the SavePoint.
+  //
+  // The implementation should document whether point lock, or range lock, or
+  // both are considered in this method.
+  // If this method is not supported, returns nullptr.
+  //
+  // REQUIRED:
+  // The save_point_tracker must be of the same concrete class type as the
+  // current tracker.
+  // The tracked locks in the specified tracker must be a subset of those
+  // tracked by the current tracker.
+  virtual LockTracker* GetTrackedLocksSinceSavePoint(
+      const LockTracker& /*save_point_tracker*/) const = 0;
+
+  // Gets lock related information of the key.
+  //
+  // If point lock is not supported, always returns LockStatus with
+  // locked=false.
+  virtual PointLockStatus GetPointLockStatus(
+      ColumnFamilyId /*column_family_id*/,
+      const std::string& /*key*/) const = 0;
+
+  // Gets number of tracked point locks.
+  //
+  // If point lock is not supported, always returns 0.
+  virtual uint64_t GetNumPointLocks() const = 0;
+
+  class ColumnFamilyIterator {
+   public:
+    virtual ~ColumnFamilyIterator() {}
+
+    // Whether there are remaining column families.
+    virtual bool HasNext() const = 0;
+
+    // Gets next column family id.
+    //
+    // If HasNext is false, calling this method has undefined behavior.
+    virtual ColumnFamilyId Next() = 0;
+  };
+
+  // Gets an iterator for column families.
+  //
+  // Returned iterator must not be nullptr.
+  // If there is no column family to iterate,
+  // returns an empty non-null iterator.
+  // Caller owns the returned pointer.
+  virtual ColumnFamilyIterator* GetColumnFamilyIterator() const = 0;
+
+  class KeyIterator {
+   public:
+    virtual ~KeyIterator() {}
+
+    // Whether there are remaining keys.
+    virtual bool HasNext() const = 0;
+
+    // Gets the next key.
+    //
+    // If HasNext is false, calling this method has undefined behavior.
+    virtual const std::string& Next() = 0;
+  };
+
+  // Gets an iterator for keys with tracked point locks in the column family.
+  //
+  // The column family must exist.
+  // Returned iterator must not be nullptr.
+  // Caller owns the returned pointer.
+  virtual KeyIterator* GetKeyIterator(
+      ColumnFamilyId /*column_family_id*/) const = 0;
+};
+
+// LockTracker should always be constructed through this factory.
+// Each LockManager owns a LockTrackerFactory.
+class LockTrackerFactory {
+ public:
+  // Caller owns the returned pointer.
+  virtual LockTracker* Create() const = 0;
+  virtual ~LockTrackerFactory() {}
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_manager.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_manager.cc
new file mode 100644
index 0000000000..b73b3fe761
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_manager.cc
@@ -0,0 +1,719 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/transactions/lock/point/point_lock_manager.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <mutex>
+
+#include "monitoring/perf_context_imp.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/transaction_db_mutex.h"
+#include "test_util/sync_point.h"
+#include "util/cast_util.h"
+#include "util/hash.h"
+#include "util/thread_local.h"
+#include "utilities/transactions/pessimistic_transaction_db.h"
+#include "utilities/transactions/transaction_db_mutex_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct LockInfo {
+  bool exclusive;
+  autovector<TransactionID> txn_ids;
+
+  // Transaction locks are not valid after this time in us
+  uint64_t expiration_time;
+
+  LockInfo(TransactionID id, uint64_t time, bool ex)
+      : exclusive(ex), expiration_time(time) {
+    txn_ids.push_back(id);
+  }
+  LockInfo(const LockInfo& lock_info)
+      : exclusive(lock_info.exclusive),
+        txn_ids(lock_info.txn_ids),
+        expiration_time(lock_info.expiration_time) {}
+  void operator=(const LockInfo& lock_info) {
+    exclusive = lock_info.exclusive;
+    txn_ids = lock_info.txn_ids;
+    expiration_time = lock_info.expiration_time;
+  }
+  DECLARE_DEFAULT_MOVES(LockInfo);
+};
+
+struct LockMapStripe {
+  explicit LockMapStripe(std::shared_ptr<TransactionDBMutexFactory> factory) {
+    stripe_mutex = factory->AllocateMutex();
+    stripe_cv = factory->AllocateCondVar();
+    assert(stripe_mutex);
+    assert(stripe_cv);
+  }
+
+  // Mutex must be held before modifying keys map
+  std::shared_ptr<TransactionDBMutex> stripe_mutex;
+
+  // Condition Variable per stripe for waiting on a lock
+  std::shared_ptr<TransactionDBCondVar> stripe_cv;
+
+  // Locked keys mapped to the info about the transactions that locked them.
+  // TODO(agiardullo): Explore performance of other data structures.
+  UnorderedMap<std::string, LockInfo> keys;
+};
+
+// Map of #num_stripes LockMapStripes
+struct LockMap {
+  explicit LockMap(size_t num_stripes,
+                   std::shared_ptr<TransactionDBMutexFactory> factory)
+      : num_stripes_(num_stripes) {
+    lock_map_stripes_.reserve(num_stripes);
+    for (size_t i = 0; i < num_stripes; i++) {
+      LockMapStripe* stripe = new LockMapStripe(factory);
+      lock_map_stripes_.push_back(stripe);
+    }
+  }
+
+  ~LockMap() {
+    for (auto stripe : lock_map_stripes_) {
+      delete stripe;
+    }
+  }
+
+  // Number of sepearate LockMapStripes to create, each with their own Mutex
+  const size_t num_stripes_;
+
+  // Count of keys that are currently locked in this column family.
+  // (Only maintained if PointLockManager::max_num_locks_ is positive.)
+  std::atomic<int64_t> lock_cnt{0};
+
+  std::vector<LockMapStripe*> lock_map_stripes_;
+
+  size_t GetStripe(const std::string& key) const;
+};
+
+namespace {
+void UnrefLockMapsCache(void* ptr) {
+  // Called when a thread exits or a ThreadLocalPtr gets destroyed.
+  auto lock_maps_cache =
+      static_cast<UnorderedMap<uint32_t, std::shared_ptr<LockMap>>*>(ptr);
+  delete lock_maps_cache;
+}
+}  // anonymous namespace
+
+PointLockManager::PointLockManager(PessimisticTransactionDB* txn_db,
+                                   const TransactionDBOptions& opt)
+    : txn_db_impl_(txn_db),
+      default_num_stripes_(opt.num_stripes),
+      max_num_locks_(opt.max_num_locks),
+      lock_maps_cache_(new ThreadLocalPtr(&UnrefLockMapsCache)),
+      dlock_buffer_(opt.max_num_deadlocks),
+      mutex_factory_(opt.custom_mutex_factory
+                         ? opt.custom_mutex_factory
+                         : std::make_shared<TransactionDBMutexFactoryImpl>()) {}
+
+size_t LockMap::GetStripe(const std::string& key) const {
+  assert(num_stripes_ > 0);
+  return FastRange64(GetSliceNPHash64(key), num_stripes_);
+}
+
+void PointLockManager::AddColumnFamily(const ColumnFamilyHandle* cf) {
+  InstrumentedMutexLock l(&lock_map_mutex_);
+
+  if (lock_maps_.find(cf->GetID()) == lock_maps_.end()) {
+    lock_maps_.emplace(cf->GetID(), std::make_shared<LockMap>(
+                                        default_num_stripes_, mutex_factory_));
+  } else {
+    // column_family already exists in lock map
+    assert(false);
+  }
+}
+
+void PointLockManager::RemoveColumnFamily(const ColumnFamilyHandle* cf) {
+  // Remove lock_map for this column family.  Since the lock map is stored
+  // as a shared ptr, concurrent transactions can still keep using it
+  // until they release their references to it.
+  {
+    InstrumentedMutexLock l(&lock_map_mutex_);
+
+    auto lock_maps_iter = lock_maps_.find(cf->GetID());
+    if (lock_maps_iter == lock_maps_.end()) {
+      return;
+    }
+
+    lock_maps_.erase(lock_maps_iter);
+  }  // lock_map_mutex_
+
+  // Clear all thread-local caches
+  autovector<void*> local_caches;
+  lock_maps_cache_->Scrape(&local_caches, nullptr);
+  for (auto cache : local_caches) {
+    delete static_cast<LockMaps*>(cache);
+  }
+}
+
+// Look up the LockMap std::shared_ptr for a given column_family_id.
+// Note:  The LockMap is only valid as long as the caller is still holding on
+//   to the returned std::shared_ptr.
+std::shared_ptr<LockMap> PointLockManager::GetLockMap(
+    ColumnFamilyId column_family_id) {
+  // First check thread-local cache
+  if (lock_maps_cache_->Get() == nullptr) {
+    lock_maps_cache_->Reset(new LockMaps());
+  }
+
+  auto lock_maps_cache = static_cast<LockMaps*>(lock_maps_cache_->Get());
+
+  auto lock_map_iter = lock_maps_cache->find(column_family_id);
+  if (lock_map_iter != lock_maps_cache->end()) {
+    // Found lock map for this column family.
+    return lock_map_iter->second;
+  }
+
+  // Not found in local cache, grab mutex and check shared LockMaps
+  InstrumentedMutexLock l(&lock_map_mutex_);
+
+  lock_map_iter = lock_maps_.find(column_family_id);
+  if (lock_map_iter == lock_maps_.end()) {
+    return std::shared_ptr<LockMap>(nullptr);
+  } else {
+    // Found lock map.  Store in thread-local cache and return.
+    std::shared_ptr<LockMap>& lock_map = lock_map_iter->second;
+    lock_maps_cache->insert({column_family_id, lock_map});
+
+    return lock_map;
+  }
+}
+
+// Returns true if this lock has expired and can be acquired by another
+// transaction.
+// If false, sets *expire_time to the expiration time of the lock according
+// to Env->GetMicros() or 0 if no expiration.
+bool PointLockManager::IsLockExpired(TransactionID txn_id,
+                                     const LockInfo& lock_info, Env* env,
+                                     uint64_t* expire_time) {
+  if (lock_info.expiration_time == 0) {
+    *expire_time = 0;
+    return false;
+  }
+
+  auto now = env->NowMicros();
+  bool expired = lock_info.expiration_time <= now;
+  if (!expired) {
+    // return how many microseconds until lock will be expired
+    *expire_time = lock_info.expiration_time;
+  } else {
+    for (auto id : lock_info.txn_ids) {
+      if (txn_id == id) {
+        continue;
+      }
+
+      bool success = txn_db_impl_->TryStealingExpiredTransactionLocks(id);
+      if (!success) {
+        expired = false;
+        *expire_time = 0;
+        break;
+      }
+    }
+  }
+
+  return expired;
+}
+
+Status PointLockManager::TryLock(PessimisticTransaction* txn,
+                                 ColumnFamilyId column_family_id,
+                                 const std::string& key, Env* env,
+                                 bool exclusive) {
+  // Lookup lock map for this column family id
+  std::shared_ptr<LockMap> lock_map_ptr = GetLockMap(column_family_id);
+  LockMap* lock_map = lock_map_ptr.get();
+  if (lock_map == nullptr) {
+    char msg[255];
+    snprintf(msg, sizeof(msg), "Column family id not found: %" PRIu32,
+             column_family_id);
+
+    return Status::InvalidArgument(msg);
+  }
+
+  // Need to lock the mutex for the stripe that this key hashes to
+  size_t stripe_num = lock_map->GetStripe(key);
+  assert(lock_map->lock_map_stripes_.size() > stripe_num);
+  LockMapStripe* stripe = lock_map->lock_map_stripes_.at(stripe_num);
+
+  LockInfo lock_info(txn->GetID(), txn->GetExpirationTime(), exclusive);
+  int64_t timeout = txn->GetLockTimeout();
+
+  return AcquireWithTimeout(txn, lock_map, stripe, column_family_id, key, env,
+                            timeout, lock_info);
+}
+
+// Helper function for TryLock().
+Status PointLockManager::AcquireWithTimeout(
+    PessimisticTransaction* txn, LockMap* lock_map, LockMapStripe* stripe,
+    ColumnFamilyId column_family_id, const std::string& key, Env* env,
+    int64_t timeout, const LockInfo& lock_info) {
+  Status result;
+  uint64_t end_time = 0;
+
+  if (timeout > 0) {
+    uint64_t start_time = env->NowMicros();
+    end_time = start_time + timeout;
+  }
+
+  if (timeout < 0) {
+    // If timeout is negative, we wait indefinitely to acquire the lock
+    result = stripe->stripe_mutex->Lock();
+  } else {
+    result = stripe->stripe_mutex->TryLockFor(timeout);
+  }
+
+  if (!result.ok()) {
+    // failed to acquire mutex
+    return result;
+  }
+
+  // Acquire lock if we are able to
+  uint64_t expire_time_hint = 0;
+  autovector<TransactionID> wait_ids;
+  result = AcquireLocked(lock_map, stripe, key, env, lock_info,
+                         &expire_time_hint, &wait_ids);
+
+  if (!result.ok() && timeout != 0) {
+    PERF_TIMER_GUARD(key_lock_wait_time);
+    PERF_COUNTER_ADD(key_lock_wait_count, 1);
+    // If we weren't able to acquire the lock, we will keep retrying as long
+    // as the timeout allows.
+    bool timed_out = false;
+    do {
+      // Decide how long to wait
+      int64_t cv_end_time = -1;
+      if (expire_time_hint > 0 && end_time > 0) {
+        cv_end_time = std::min(expire_time_hint, end_time);
+      } else if (expire_time_hint > 0) {
+        cv_end_time = expire_time_hint;
+      } else if (end_time > 0) {
+        cv_end_time = end_time;
+      }
+
+      assert(result.IsBusy() || wait_ids.size() != 0);
+
+      // We are dependent on a transaction to finish, so perform deadlock
+      // detection.
+      if (wait_ids.size() != 0) {
+        if (txn->IsDeadlockDetect()) {
+          if (IncrementWaiters(txn, wait_ids, key, column_family_id,
+                               lock_info.exclusive, env)) {
+            result = Status::Busy(Status::SubCode::kDeadlock);
+            stripe->stripe_mutex->UnLock();
+            return result;
+          }
+        }
+        txn->SetWaitingTxn(wait_ids, column_family_id, &key);
+      }
+
+      TEST_SYNC_POINT("PointLockManager::AcquireWithTimeout:WaitingTxn");
+      if (cv_end_time < 0) {
+        // Wait indefinitely
+        result = stripe->stripe_cv->Wait(stripe->stripe_mutex);
+      } else {
+        uint64_t now = env->NowMicros();
+        if (static_cast<uint64_t>(cv_end_time) > now) {
+          result = stripe->stripe_cv->WaitFor(stripe->stripe_mutex,
+                                              cv_end_time - now);
+        }
+      }
+
+      if (wait_ids.size() != 0) {
+        txn->ClearWaitingTxn();
+        if (txn->IsDeadlockDetect()) {
+          DecrementWaiters(txn, wait_ids);
+        }
+      }
+
+      if (result.IsTimedOut()) {
+        timed_out = true;
+        // Even though we timed out, we will still make one more attempt to
+        // acquire lock below (it is possible the lock expired and we
+        // were never signaled).
+      }
+
+      if (result.ok() || result.IsTimedOut()) {
+        result = AcquireLocked(lock_map, stripe, key, env, lock_info,
+                               &expire_time_hint, &wait_ids);
+      }
+    } while (!result.ok() && !timed_out);
+  }
+
+  stripe->stripe_mutex->UnLock();
+
+  return result;
+}
+
+void PointLockManager::DecrementWaiters(
+    const PessimisticTransaction* txn,
+    const autovector<TransactionID>& wait_ids) {
+  std::lock_guard<std::mutex> lock(wait_txn_map_mutex_);
+  DecrementWaitersImpl(txn, wait_ids);
+}
+
+void PointLockManager::DecrementWaitersImpl(
+    const PessimisticTransaction* txn,
+    const autovector<TransactionID>& wait_ids) {
+  auto id = txn->GetID();
+  assert(wait_txn_map_.Contains(id));
+  wait_txn_map_.Delete(id);
+
+  for (auto wait_id : wait_ids) {
+    rev_wait_txn_map_.Get(wait_id)--;
+    if (rev_wait_txn_map_.Get(wait_id) == 0) {
+      rev_wait_txn_map_.Delete(wait_id);
+    }
+  }
+}
+
+bool PointLockManager::IncrementWaiters(
+    const PessimisticTransaction* txn,
+    const autovector<TransactionID>& wait_ids, const std::string& key,
+    const uint32_t& cf_id, const bool& exclusive, Env* const env) {
+  auto id = txn->GetID();
+  std::vector<int> queue_parents(
+      static_cast<size_t>(txn->GetDeadlockDetectDepth()));
+  std::vector<TransactionID> queue_values(
+      static_cast<size_t>(txn->GetDeadlockDetectDepth()));
+  std::lock_guard<std::mutex> lock(wait_txn_map_mutex_);
+  assert(!wait_txn_map_.Contains(id));
+
+  wait_txn_map_.Insert(id, {wait_ids, cf_id, exclusive, key});
+
+  for (auto wait_id : wait_ids) {
+    if (rev_wait_txn_map_.Contains(wait_id)) {
+      rev_wait_txn_map_.Get(wait_id)++;
+    } else {
+      rev_wait_txn_map_.Insert(wait_id, 1);
+    }
+  }
+
+  // No deadlock if nobody is waiting on self.
+  if (!rev_wait_txn_map_.Contains(id)) {
+    return false;
+  }
+
+  const auto* next_ids = &wait_ids;
+  int parent = -1;
+  int64_t deadlock_time = 0;
+  for (int tail = 0, head = 0; head < txn->GetDeadlockDetectDepth(); head++) {
+    int i = 0;
+    if (next_ids) {
+      for (; i < static_cast<int>(next_ids->size()) &&
+             tail + i < txn->GetDeadlockDetectDepth();
+           i++) {
+        queue_values[tail + i] = (*next_ids)[i];
+        queue_parents[tail + i] = parent;
+      }
+      tail += i;
+    }
+
+    // No more items in the list, meaning no deadlock.
+    if (tail == head) {
+      return false;
+    }
+
+    auto next = queue_values[head];
+    if (next == id) {
+      std::vector<DeadlockInfo> path;
+      while (head != -1) {
+        assert(wait_txn_map_.Contains(queue_values[head]));
+
+        auto extracted_info = wait_txn_map_.Get(queue_values[head]);
+        path.push_back({queue_values[head], extracted_info.m_cf_id,
+                        extracted_info.m_exclusive,
+                        extracted_info.m_waiting_key});
+        head = queue_parents[head];
+      }
+      if (!env->GetCurrentTime(&deadlock_time).ok()) {
+        /*
+          TODO(AR) this preserves the current behaviour whilst checking the
+          status of env->GetCurrentTime to ensure that ASSERT_STATUS_CHECKED
+          passes. Should we instead raise an error if !ok() ?
+        */
+        deadlock_time = 0;
+      }
+      std::reverse(path.begin(), path.end());
+      dlock_buffer_.AddNewPath(DeadlockPath(path, deadlock_time));
+      deadlock_time = 0;
+      DecrementWaitersImpl(txn, wait_ids);
+      return true;
+    } else if (!wait_txn_map_.Contains(next)) {
+      next_ids = nullptr;
+      continue;
+    } else {
+      parent = head;
+      next_ids = &(wait_txn_map_.Get(next).m_neighbors);
+    }
+  }
+
+  // Wait cycle too big, just assume deadlock.
+  if (!env->GetCurrentTime(&deadlock_time).ok()) {
+    /*
+      TODO(AR) this preserves the current behaviour whilst checking the status
+      of env->GetCurrentTime to ensure that ASSERT_STATUS_CHECKED passes.
+      Should we instead raise an error if !ok() ?
+    */
+    deadlock_time = 0;
+  }
+  dlock_buffer_.AddNewPath(DeadlockPath(deadlock_time, true));
+  DecrementWaitersImpl(txn, wait_ids);
+  return true;
+}
+
+// Try to lock this key after we have acquired the mutex.
+// Sets *expire_time to the expiration time in microseconds
+//  or 0 if no expiration.
+// REQUIRED:  Stripe mutex must be held.
+Status PointLockManager::AcquireLocked(LockMap* lock_map, LockMapStripe* stripe,
+                                       const std::string& key, Env* env,
+                                       const LockInfo& txn_lock_info,
+                                       uint64_t* expire_time,
+                                       autovector<TransactionID>* txn_ids) {
+  assert(txn_lock_info.txn_ids.size() == 1);
+
+  Status result;
+  // Check if this key is already locked
+  auto stripe_iter = stripe->keys.find(key);
+  if (stripe_iter != stripe->keys.end()) {
+    // Lock already held
+    LockInfo& lock_info = stripe_iter->second;
+    assert(lock_info.txn_ids.size() == 1 || !lock_info.exclusive);
+
+    if (lock_info.exclusive || txn_lock_info.exclusive) {
+      if (lock_info.txn_ids.size() == 1 &&
+          lock_info.txn_ids[0] == txn_lock_info.txn_ids[0]) {
+        // The list contains one txn and we're it, so just take it.
+        lock_info.exclusive = txn_lock_info.exclusive;
+        lock_info.expiration_time = txn_lock_info.expiration_time;
+      } else {
+        // Check if it's expired. Skips over txn_lock_info.txn_ids[0] in case
+        // it's there for a shared lock with multiple holders which was not
+        // caught in the first case.
+        if (IsLockExpired(txn_lock_info.txn_ids[0], lock_info, env,
+                          expire_time)) {
+          // lock is expired, can steal it
+          lock_info.txn_ids = txn_lock_info.txn_ids;
+          lock_info.exclusive = txn_lock_info.exclusive;
+          lock_info.expiration_time = txn_lock_info.expiration_time;
+          // lock_cnt does not change
+        } else {
+          result = Status::TimedOut(Status::SubCode::kLockTimeout);
+          *txn_ids = lock_info.txn_ids;
+        }
+      }
+    } else {
+      // We are requesting shared access to a shared lock, so just grant it.
+      lock_info.txn_ids.push_back(txn_lock_info.txn_ids[0]);
+      // Using std::max means that expiration time never goes down even when
+      // a transaction is removed from the list. The correct solution would be
+      // to track expiry for every transaction, but this would also work for
+      // now.
+      lock_info.expiration_time =
+          std::max(lock_info.expiration_time, txn_lock_info.expiration_time);
+    }
+  } else {  // Lock not held.
+    // Check lock limit
+    if (max_num_locks_ > 0 &&
+        lock_map->lock_cnt.load(std::memory_order_acquire) >= max_num_locks_) {
+      result = Status::Busy(Status::SubCode::kLockLimit);
+    } else {
+      // acquire lock
+      stripe->keys.emplace(key, txn_lock_info);
+
+      // Maintain lock count if there is a limit on the number of locks
+      if (max_num_locks_) {
+        lock_map->lock_cnt++;
+      }
+    }
+  }
+
+  return result;
+}
+
+void PointLockManager::UnLockKey(PessimisticTransaction* txn,
+                                 const std::string& key, LockMapStripe* stripe,
+                                 LockMap* lock_map, Env* env) {
+#ifdef NDEBUG
+  (void)env;
+#endif
+  TransactionID txn_id = txn->GetID();
+
+  auto stripe_iter = stripe->keys.find(key);
+  if (stripe_iter != stripe->keys.end()) {
+    auto& txns = stripe_iter->second.txn_ids;
+    auto txn_it = std::find(txns.begin(), txns.end(), txn_id);
+    // Found the key we locked.  unlock it.
+    if (txn_it != txns.end()) {
+      if (txns.size() == 1) {
+        stripe->keys.erase(stripe_iter);
+      } else {
+        auto last_it = txns.end() - 1;
+        if (txn_it != last_it) {
+          *txn_it = *last_it;
+        }
+        txns.pop_back();
+      }
+
+      if (max_num_locks_ > 0) {
+        // Maintain lock count if there is a limit on the number of locks.
+        assert(lock_map->lock_cnt.load(std::memory_order_relaxed) > 0);
+        lock_map->lock_cnt--;
+      }
+    }
+  } else {
+    // This key is either not locked or locked by someone else.  This should
+    // only happen if the unlocking transaction has expired.
+    assert(txn->GetExpirationTime() > 0 &&
+           txn->GetExpirationTime() < env->NowMicros());
+  }
+}
+
+void PointLockManager::UnLock(PessimisticTransaction* txn,
+                              ColumnFamilyId column_family_id,
+                              const std::string& key, Env* env) {
+  std::shared_ptr<LockMap> lock_map_ptr = GetLockMap(column_family_id);
+  LockMap* lock_map = lock_map_ptr.get();
+  if (lock_map == nullptr) {
+    // Column Family must have been dropped.
+    return;
+  }
+
+  // Lock the mutex for the stripe that this key hashes to
+  size_t stripe_num = lock_map->GetStripe(key);
+  assert(lock_map->lock_map_stripes_.size() > stripe_num);
+  LockMapStripe* stripe = lock_map->lock_map_stripes_.at(stripe_num);
+
+  stripe->stripe_mutex->Lock().PermitUncheckedError();
+  UnLockKey(txn, key, stripe, lock_map, env);
+  stripe->stripe_mutex->UnLock();
+
+  // Signal waiting threads to retry locking
+  stripe->stripe_cv->NotifyAll();
+}
+
+void PointLockManager::UnLock(PessimisticTransaction* txn,
+                              const LockTracker& tracker, Env* env) {
+  std::unique_ptr<LockTracker::ColumnFamilyIterator> cf_it(
+      tracker.GetColumnFamilyIterator());
+  assert(cf_it != nullptr);
+  while (cf_it->HasNext()) {
+    ColumnFamilyId cf = cf_it->Next();
+    std::shared_ptr<LockMap> lock_map_ptr = GetLockMap(cf);
+    LockMap* lock_map = lock_map_ptr.get();
+    if (!lock_map) {
+      // Column Family must have been dropped.
+      return;
+    }
+
+    // Bucket keys by lock_map_ stripe
+    UnorderedMap<size_t, std::vector<const std::string*>> keys_by_stripe(
+        lock_map->num_stripes_);
+    std::unique_ptr<LockTracker::KeyIterator> key_it(
+        tracker.GetKeyIterator(cf));
+    assert(key_it != nullptr);
+    while (key_it->HasNext()) {
+      const std::string& key = key_it->Next();
+      size_t stripe_num = lock_map->GetStripe(key);
+      keys_by_stripe[stripe_num].push_back(&key);
+    }
+
+    // For each stripe, grab the stripe mutex and unlock all keys in this stripe
+    for (auto& stripe_iter : keys_by_stripe) {
+      size_t stripe_num = stripe_iter.first;
+      auto& stripe_keys = stripe_iter.second;
+
+      assert(lock_map->lock_map_stripes_.size() > stripe_num);
+      LockMapStripe* stripe = lock_map->lock_map_stripes_.at(stripe_num);
+
+      stripe->stripe_mutex->Lock().PermitUncheckedError();
+
+      for (const std::string* key : stripe_keys) {
+        UnLockKey(txn, *key, stripe, lock_map, env);
+      }
+
+      stripe->stripe_mutex->UnLock();
+
+      // Signal waiting threads to retry locking
+      stripe->stripe_cv->NotifyAll();
+    }
+  }
+}
+
+PointLockManager::PointLockStatus PointLockManager::GetPointLockStatus() {
+  PointLockStatus data;
+  // Lock order here is important. The correct order is lock_map_mutex_, then
+  // for every column family ID in ascending order lock every stripe in
+  // ascending order.
+  InstrumentedMutexLock l(&lock_map_mutex_);
+
+  std::vector<uint32_t> cf_ids;
+  for (const auto& map : lock_maps_) {
+    cf_ids.push_back(map.first);
+  }
+  std::sort(cf_ids.begin(), cf_ids.end());
+
+  for (auto i : cf_ids) {
+    const auto& stripes = lock_maps_[i]->lock_map_stripes_;
+    // Iterate and lock all stripes in ascending order.
+    for (const auto& j : stripes) {
+      j->stripe_mutex->Lock().PermitUncheckedError();
+      for (const auto& it : j->keys) {
+        struct KeyLockInfo info;
+        info.exclusive = it.second.exclusive;
+        info.key = it.first;
+        for (const auto& id : it.second.txn_ids) {
+          info.ids.push_back(id);
+        }
+        data.insert({i, info});
+      }
+    }
+  }
+
+  // Unlock everything. Unlocking order is not important.
+  for (auto i : cf_ids) {
+    const auto& stripes = lock_maps_[i]->lock_map_stripes_;
+    for (const auto& j : stripes) {
+      j->stripe_mutex->UnLock();
+    }
+  }
+
+  return data;
+}
+
+std::vector<DeadlockPath> PointLockManager::GetDeadlockInfoBuffer() {
+  return dlock_buffer_.PrepareBuffer();
+}
+
+void PointLockManager::Resize(uint32_t target_size) {
+  dlock_buffer_.Resize(target_size);
+}
+
+PointLockManager::RangeLockStatus PointLockManager::GetRangeLockStatus() {
+  return {};
+}
+
+Status PointLockManager::TryLock(PessimisticTransaction* /* txn */,
+                                 ColumnFamilyId /* cf_id */,
+                                 const Endpoint& /* start */,
+                                 const Endpoint& /* end */, Env* /* env */,
+                                 bool /* exclusive */) {
+  return Status::NotSupported(
+      "PointLockManager does not support range locking");
+}
+
+void PointLockManager::UnLock(PessimisticTransaction* /* txn */,
+                              ColumnFamilyId /* cf_id */,
+                              const Endpoint& /* start */,
+                              const Endpoint& /* end */, Env* /* env */) {
+  // no-op
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_manager.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_manager.h
new file mode 100644
index 0000000000..99183ca1cd
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_manager.h
@@ -0,0 +1,222 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "monitoring/instrumented_mutex.h"
+#include "rocksdb/utilities/transaction.h"
+#include "util/autovector.h"
+#include "util/hash_containers.h"
+#include "util/hash_map.h"
+#include "util/thread_local.h"
+#include "utilities/transactions/lock/lock_manager.h"
+#include "utilities/transactions/lock/point/point_lock_tracker.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class ColumnFamilyHandle;
+struct LockInfo;
+struct LockMap;
+struct LockMapStripe;
+
+template <class Path>
+class DeadlockInfoBufferTempl {
+ private:
+  std::vector<Path> paths_buffer_;
+  uint32_t buffer_idx_;
+  std::mutex paths_buffer_mutex_;
+
+  std::vector<Path> Normalize() {
+    auto working = paths_buffer_;
+
+    if (working.empty()) {
+      return working;
+    }
+
+    // Next write occurs at a nonexistent path's slot
+    if (paths_buffer_[buffer_idx_].empty()) {
+      working.resize(buffer_idx_);
+    } else {
+      std::rotate(working.begin(), working.begin() + buffer_idx_,
+                  working.end());
+    }
+
+    return working;
+  }
+
+ public:
+  explicit DeadlockInfoBufferTempl(uint32_t n_latest_dlocks)
+      : paths_buffer_(n_latest_dlocks), buffer_idx_(0) {}
+
+  void AddNewPath(Path path) {
+    std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
+
+    if (paths_buffer_.empty()) {
+      return;
+    }
+
+    paths_buffer_[buffer_idx_] = std::move(path);
+    buffer_idx_ = (buffer_idx_ + 1) % paths_buffer_.size();
+  }
+
+  void Resize(uint32_t target_size) {
+    std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
+
+    paths_buffer_ = Normalize();
+
+    // Drop the deadlocks that will no longer be needed ater the normalize
+    if (target_size < paths_buffer_.size()) {
+      paths_buffer_.erase(
+          paths_buffer_.begin(),
+          paths_buffer_.begin() + (paths_buffer_.size() - target_size));
+      buffer_idx_ = 0;
+    }
+    // Resize the buffer to the target size and restore the buffer's idx
+    else {
+      auto prev_size = paths_buffer_.size();
+      paths_buffer_.resize(target_size);
+      buffer_idx_ = (uint32_t)prev_size;
+    }
+  }
+
+  std::vector<Path> PrepareBuffer() {
+    std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
+
+    // Reversing the normalized vector returns the latest deadlocks first
+    auto working = Normalize();
+    std::reverse(working.begin(), working.end());
+
+    return working;
+  }
+};
+
+using DeadlockInfoBuffer = DeadlockInfoBufferTempl<DeadlockPath>;
+
+struct TrackedTrxInfo {
+  autovector<TransactionID> m_neighbors;
+  uint32_t m_cf_id;
+  bool m_exclusive;
+  std::string m_waiting_key;
+};
+
+class PointLockManager : public LockManager {
+ public:
+  PointLockManager(PessimisticTransactionDB* db,
+                   const TransactionDBOptions& opt);
+  // No copying allowed
+  PointLockManager(const PointLockManager&) = delete;
+  PointLockManager& operator=(const PointLockManager&) = delete;
+
+  ~PointLockManager() override {}
+
+  bool IsPointLockSupported() const override { return true; }
+
+  bool IsRangeLockSupported() const override { return false; }
+
+  const LockTrackerFactory& GetLockTrackerFactory() const override {
+    return PointLockTrackerFactory::Get();
+  }
+
+  // Creates a new LockMap for this column family.  Caller should guarantee
+  // that this column family does not already exist.
+  void AddColumnFamily(const ColumnFamilyHandle* cf) override;
+  // Deletes the LockMap for this column family.  Caller should guarantee that
+  // this column family is no longer in use.
+  void RemoveColumnFamily(const ColumnFamilyHandle* cf) override;
+
+  Status TryLock(PessimisticTransaction* txn, ColumnFamilyId column_family_id,
+                 const std::string& key, Env* env, bool exclusive) override;
+  Status TryLock(PessimisticTransaction* txn, ColumnFamilyId column_family_id,
+                 const Endpoint& start, const Endpoint& end, Env* env,
+                 bool exclusive) override;
+
+  void UnLock(PessimisticTransaction* txn, const LockTracker& tracker,
+              Env* env) override;
+  void UnLock(PessimisticTransaction* txn, ColumnFamilyId column_family_id,
+              const std::string& key, Env* env) override;
+  void UnLock(PessimisticTransaction* txn, ColumnFamilyId column_family_id,
+              const Endpoint& start, const Endpoint& end, Env* env) override;
+
+  PointLockStatus GetPointLockStatus() override;
+
+  RangeLockStatus GetRangeLockStatus() override;
+
+  std::vector<DeadlockPath> GetDeadlockInfoBuffer() override;
+
+  void Resize(uint32_t new_size) override;
+
+ private:
+  PessimisticTransactionDB* txn_db_impl_;
+
+  // Default number of lock map stripes per column family
+  const size_t default_num_stripes_;
+
+  // Limit on number of keys locked per column family
+  const int64_t max_num_locks_;
+
+  // The following lock order must be satisfied in order to avoid deadlocking
+  // ourselves.
+  //   - lock_map_mutex_
+  //   - stripe mutexes in ascending cf id, ascending stripe order
+  //   - wait_txn_map_mutex_
+  //
+  // Must be held when accessing/modifying lock_maps_.
+  InstrumentedMutex lock_map_mutex_;
+
+  // Map of ColumnFamilyId to locked key info
+  using LockMaps = UnorderedMap<uint32_t, std::shared_ptr<LockMap>>;
+  LockMaps lock_maps_;
+
+  // Thread-local cache of entries in lock_maps_.  This is an optimization
+  // to avoid acquiring a mutex in order to look up a LockMap
+  std::unique_ptr<ThreadLocalPtr> lock_maps_cache_;
+
+  // Must be held when modifying wait_txn_map_ and rev_wait_txn_map_.
+  std::mutex wait_txn_map_mutex_;
+
+  // Maps from waitee -> number of waiters.
+  HashMap<TransactionID, int> rev_wait_txn_map_;
+  // Maps from waiter -> waitee.
+  HashMap<TransactionID, TrackedTrxInfo> wait_txn_map_;
+  DeadlockInfoBuffer dlock_buffer_;
+
+  // Used to allocate mutexes/condvars to use when locking keys
+  std::shared_ptr<TransactionDBMutexFactory> mutex_factory_;
+
+  bool IsLockExpired(TransactionID txn_id, const LockInfo& lock_info, Env* env,
+                     uint64_t* wait_time);
+
+  std::shared_ptr<LockMap> GetLockMap(uint32_t column_family_id);
+
+  Status AcquireWithTimeout(PessimisticTransaction* txn, LockMap* lock_map,
+                            LockMapStripe* stripe, uint32_t column_family_id,
+                            const std::string& key, Env* env, int64_t timeout,
+                            const LockInfo& lock_info);
+
+  Status AcquireLocked(LockMap* lock_map, LockMapStripe* stripe,
+                       const std::string& key, Env* env,
+                       const LockInfo& lock_info, uint64_t* wait_time,
+                       autovector<TransactionID>* txn_ids);
+
+  void UnLockKey(PessimisticTransaction* txn, const std::string& key,
+                 LockMapStripe* stripe, LockMap* lock_map, Env* env);
+
+  bool IncrementWaiters(const PessimisticTransaction* txn,
+                        const autovector<TransactionID>& wait_ids,
+                        const std::string& key, const uint32_t& cf_id,
+                        const bool& exclusive, Env* const env);
+  void DecrementWaiters(const PessimisticTransaction* txn,
+                        const autovector<TransactionID>& wait_ids);
+  void DecrementWaitersImpl(const PessimisticTransaction* txn,
+                            const autovector<TransactionID>& wait_ids);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_manager_test.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_manager_test.h
new file mode 100644
index 0000000000..ca9f46bf9d
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_manager_test.h
@@ -0,0 +1,324 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "file/file_util.h"
+#include "port/port.h"
+#include "port/stack_trace.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "test_util/testharness.h"
+#include "test_util/testutil.h"
+#include "utilities/transactions/lock/point/point_lock_manager.h"
+#include "utilities/transactions/pessimistic_transaction_db.h"
+#include "utilities/transactions/transaction_db_mutex_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class MockColumnFamilyHandle : public ColumnFamilyHandle {
+ public:
+  explicit MockColumnFamilyHandle(ColumnFamilyId cf_id) : cf_id_(cf_id) {}
+
+  ~MockColumnFamilyHandle() override {}
+
+  const std::string& GetName() const override { return name_; }
+
+  ColumnFamilyId GetID() const override { return cf_id_; }
+
+  Status GetDescriptor(ColumnFamilyDescriptor*) override {
+    return Status::OK();
+  }
+
+  const Comparator* GetComparator() const override {
+    return BytewiseComparator();
+  }
+
+ private:
+  ColumnFamilyId cf_id_;
+  std::string name_ = "MockCF";
+};
+
+class PointLockManagerTest : public testing::Test {
+ public:
+  void SetUp() override {
+    env_ = Env::Default();
+    db_dir_ = test::PerThreadDBPath("point_lock_manager_test");
+    ASSERT_OK(env_->CreateDir(db_dir_));
+
+    Options opt;
+    opt.create_if_missing = true;
+    TransactionDBOptions txn_opt;
+    txn_opt.transaction_lock_timeout = 0;
+
+    ASSERT_OK(TransactionDB::Open(opt, txn_opt, db_dir_, &db_));
+
+    // CAUTION: This test creates a separate lock manager object (right, NOT
+    // the one that the TransactionDB is using!), and runs tests on it.
+    locker_.reset(new PointLockManager(
+        static_cast<PessimisticTransactionDB*>(db_), txn_opt));
+
+    wait_sync_point_name_ = "PointLockManager::AcquireWithTimeout:WaitingTxn";
+  }
+
+  void TearDown() override {
+    delete db_;
+    EXPECT_OK(DestroyDir(env_, db_dir_));
+  }
+
+  PessimisticTransaction* NewTxn(
+      TransactionOptions txn_opt = TransactionOptions()) {
+    Transaction* txn = db_->BeginTransaction(WriteOptions(), txn_opt);
+    return reinterpret_cast<PessimisticTransaction*>(txn);
+  }
+
+ protected:
+  Env* env_;
+  std::shared_ptr<LockManager> locker_;
+  const char* wait_sync_point_name_;
+  friend void PointLockManagerTestExternalSetup(PointLockManagerTest*);
+
+ private:
+  std::string db_dir_;
+  TransactionDB* db_;
+};
+
+using init_func_t = void (*)(PointLockManagerTest*);
+
+class AnyLockManagerTest : public PointLockManagerTest,
+                           public testing::WithParamInterface<init_func_t> {
+ public:
+  void SetUp() override {
+    // If a custom setup function was provided, use it. Otherwise, use what we
+    // have inherited.
+    auto init_func = GetParam();
+    if (init_func)
+      (*init_func)(this);
+    else
+      PointLockManagerTest::SetUp();
+  }
+};
+
+TEST_P(AnyLockManagerTest, ReentrantExclusiveLock) {
+  // Tests that a txn can acquire exclusive lock on the same key repeatedly.
+  MockColumnFamilyHandle cf(1);
+  locker_->AddColumnFamily(&cf);
+  auto txn = NewTxn();
+  ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
+  ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
+
+  // Cleanup
+  locker_->UnLock(txn, 1, "k", env_);
+
+  delete txn;
+}
+
+TEST_P(AnyLockManagerTest, ReentrantSharedLock) {
+  // Tests that a txn can acquire shared lock on the same key repeatedly.
+  MockColumnFamilyHandle cf(1);
+  locker_->AddColumnFamily(&cf);
+  auto txn = NewTxn();
+  ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
+  ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
+
+  // Cleanup
+  locker_->UnLock(txn, 1, "k", env_);
+
+  delete txn;
+}
+
+TEST_P(AnyLockManagerTest, LockUpgrade) {
+  // Tests that a txn can upgrade from a shared lock to an exclusive lock.
+  MockColumnFamilyHandle cf(1);
+  locker_->AddColumnFamily(&cf);
+  auto txn = NewTxn();
+  ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
+  ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
+
+  // Cleanup
+  locker_->UnLock(txn, 1, "k", env_);
+  delete txn;
+}
+
+TEST_P(AnyLockManagerTest, LockDowngrade) {
+  // Tests that a txn can acquire a shared lock after acquiring an exclusive
+  // lock on the same key.
+  MockColumnFamilyHandle cf(1);
+  locker_->AddColumnFamily(&cf);
+  auto txn = NewTxn();
+  ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
+  ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
+
+  // Cleanup
+  locker_->UnLock(txn, 1, "k", env_);
+  delete txn;
+}
+
+TEST_P(AnyLockManagerTest, LockConflict) {
+  // Tests that lock conflicts lead to lock timeout.
+  MockColumnFamilyHandle cf(1);
+  locker_->AddColumnFamily(&cf);
+  auto txn1 = NewTxn();
+  auto txn2 = NewTxn();
+
+  {
+    // exclusive-exclusive conflict.
+    ASSERT_OK(locker_->TryLock(txn1, 1, "k1", env_, true));
+    auto s = locker_->TryLock(txn2, 1, "k1", env_, true);
+    ASSERT_TRUE(s.IsTimedOut());
+  }
+
+  {
+    // exclusive-shared conflict.
+    ASSERT_OK(locker_->TryLock(txn1, 1, "k2", env_, true));
+    auto s = locker_->TryLock(txn2, 1, "k2", env_, false);
+    ASSERT_TRUE(s.IsTimedOut());
+  }
+
+  {
+    // shared-exclusive conflict.
+    ASSERT_OK(locker_->TryLock(txn1, 1, "k2", env_, false));
+    auto s = locker_->TryLock(txn2, 1, "k2", env_, true);
+    ASSERT_TRUE(s.IsTimedOut());
+  }
+
+  // Cleanup
+  locker_->UnLock(txn1, 1, "k1", env_);
+  locker_->UnLock(txn1, 1, "k2", env_);
+
+  delete txn1;
+  delete txn2;
+}
+
+port::Thread BlockUntilWaitingTxn(const char* sync_point_name,
+                                  std::function<void()> f) {
+  std::atomic<bool> reached(false);
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
+      sync_point_name, [&](void* /*arg*/) { reached.store(true); });
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
+
+  port::Thread t(f);
+
+  while (!reached.load()) {
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+  }
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
+
+  return t;
+}
+
+TEST_P(AnyLockManagerTest, SharedLocks) {
+  // Tests that shared locks can be concurrently held by multiple transactions.
+  MockColumnFamilyHandle cf(1);
+  locker_->AddColumnFamily(&cf);
+  auto txn1 = NewTxn();
+  auto txn2 = NewTxn();
+  ASSERT_OK(locker_->TryLock(txn1, 1, "k", env_, false));
+  ASSERT_OK(locker_->TryLock(txn2, 1, "k", env_, false));
+
+  // Cleanup
+  locker_->UnLock(txn1, 1, "k", env_);
+  locker_->UnLock(txn2, 1, "k", env_);
+
+  delete txn1;
+  delete txn2;
+}
+
+TEST_P(AnyLockManagerTest, Deadlock) {
+  // Tests that deadlock can be detected.
+  // Deadlock scenario:
+  // txn1 exclusively locks k1, and wants to lock k2;
+  // txn2 exclusively locks k2, and wants to lock k1.
+  MockColumnFamilyHandle cf(1);
+  locker_->AddColumnFamily(&cf);
+  TransactionOptions txn_opt;
+  txn_opt.deadlock_detect = true;
+  txn_opt.lock_timeout = 1000000;
+  auto txn1 = NewTxn(txn_opt);
+  auto txn2 = NewTxn(txn_opt);
+
+  ASSERT_OK(locker_->TryLock(txn1, 1, "k1", env_, true));
+  ASSERT_OK(locker_->TryLock(txn2, 1, "k2", env_, true));
+
+  // txn1 tries to lock k2, will block forever.
+  port::Thread t = BlockUntilWaitingTxn(wait_sync_point_name_, [&]() {
+    // block because txn2 is holding a lock on k2.
+    locker_->TryLock(txn1, 1, "k2", env_, true);
+  });
+
+  auto s = locker_->TryLock(txn2, 1, "k1", env_, true);
+  ASSERT_TRUE(s.IsBusy());
+  ASSERT_EQ(s.subcode(), Status::SubCode::kDeadlock);
+
+  std::vector<DeadlockPath> deadlock_paths = locker_->GetDeadlockInfoBuffer();
+  ASSERT_EQ(deadlock_paths.size(), 1u);
+  ASSERT_FALSE(deadlock_paths[0].limit_exceeded);
+
+  std::vector<DeadlockInfo> deadlocks = deadlock_paths[0].path;
+  ASSERT_EQ(deadlocks.size(), 2u);
+
+  ASSERT_EQ(deadlocks[0].m_txn_id, txn1->GetID());
+  ASSERT_EQ(deadlocks[0].m_cf_id, 1u);
+  ASSERT_TRUE(deadlocks[0].m_exclusive);
+  ASSERT_EQ(deadlocks[0].m_waiting_key, "k2");
+
+  ASSERT_EQ(deadlocks[1].m_txn_id, txn2->GetID());
+  ASSERT_EQ(deadlocks[1].m_cf_id, 1u);
+  ASSERT_TRUE(deadlocks[1].m_exclusive);
+  ASSERT_EQ(deadlocks[1].m_waiting_key, "k1");
+
+  locker_->UnLock(txn2, 1, "k2", env_);
+  t.join();
+
+  // Cleanup
+  locker_->UnLock(txn1, 1, "k1", env_);
+  locker_->UnLock(txn1, 1, "k2", env_);
+  delete txn2;
+  delete txn1;
+}
+
+TEST_P(AnyLockManagerTest, GetWaitingTxns_MultipleTxns) {
+  MockColumnFamilyHandle cf(1);
+  locker_->AddColumnFamily(&cf);
+
+  auto txn1 = NewTxn();
+  ASSERT_OK(locker_->TryLock(txn1, 1, "k", env_, false));
+
+  auto txn2 = NewTxn();
+  ASSERT_OK(locker_->TryLock(txn2, 1, "k", env_, false));
+
+  auto txn3 = NewTxn();
+  txn3->SetLockTimeout(10000);
+  port::Thread t1 = BlockUntilWaitingTxn(wait_sync_point_name_, [&]() {
+    ASSERT_OK(locker_->TryLock(txn3, 1, "k", env_, true));
+    locker_->UnLock(txn3, 1, "k", env_);
+  });
+
+  // Ok, now txn3 is waiting for lock on "k", which is owned by two
+  // transactions. Check that GetWaitingTxns reports this correctly
+  uint32_t wait_cf_id;
+  std::string wait_key;
+  auto waiters = txn3->GetWaitingTxns(&wait_cf_id, &wait_key);
+
+  ASSERT_EQ(wait_cf_id, 1u);
+  ASSERT_EQ(wait_key, "k");
+  ASSERT_EQ(waiters.size(), 2);
+  bool waits_correct =
+      (waiters[0] == txn1->GetID() && waiters[1] == txn2->GetID()) ||
+      (waiters[1] == txn1->GetID() && waiters[0] == txn2->GetID());
+  ASSERT_EQ(waits_correct, true);
+
+  // Release locks so txn3 can proceed with execution
+  locker_->UnLock(txn1, 1, "k", env_);
+  locker_->UnLock(txn2, 1, "k", env_);
+
+  // Wait until txn3 finishes
+  t1.join();
+
+  delete txn1;
+  delete txn2;
+  delete txn3;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_tracker.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_tracker.cc
new file mode 100644
index 0000000000..ce86c64868
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_tracker.cc
@@ -0,0 +1,255 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/transactions/lock/point/point_lock_tracker.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+class TrackedKeysColumnFamilyIterator
+    : public LockTracker::ColumnFamilyIterator {
+ public:
+  explicit TrackedKeysColumnFamilyIterator(const TrackedKeys& keys)
+      : tracked_keys_(keys), it_(keys.begin()) {}
+
+  bool HasNext() const override { return it_ != tracked_keys_.end(); }
+
+  ColumnFamilyId Next() override { return (it_++)->first; }
+
+ private:
+  const TrackedKeys& tracked_keys_;
+  TrackedKeys::const_iterator it_;
+};
+
+class TrackedKeysIterator : public LockTracker::KeyIterator {
+ public:
+  TrackedKeysIterator(const TrackedKeys& keys, ColumnFamilyId id)
+      : key_infos_(keys.at(id)), it_(key_infos_.begin()) {}
+
+  bool HasNext() const override { return it_ != key_infos_.end(); }
+
+  const std::string& Next() override { return (it_++)->first; }
+
+ private:
+  const TrackedKeyInfos& key_infos_;
+  TrackedKeyInfos::const_iterator it_;
+};
+
+}  // namespace
+
+void PointLockTracker::Track(const PointLockRequest& r) {
+  auto& keys = tracked_keys_[r.column_family_id];
+  auto result = keys.try_emplace(r.key, r.seq);
+  auto it = result.first;
+  if (!result.second && r.seq < it->second.seq) {
+    // Now tracking this key with an earlier sequence number
+    it->second.seq = r.seq;
+  }
+  // else we do not update the seq. The smaller the tracked seq, the stronger it
+  // the guarantee since it implies from the seq onward there has not been a
+  // concurrent update to the key. So we update the seq if it implies stronger
+  // guarantees, i.e., if it is smaller than the existing tracked seq.
+
+  if (r.read_only) {
+    it->second.num_reads++;
+  } else {
+    it->second.num_writes++;
+  }
+
+  it->second.exclusive = it->second.exclusive || r.exclusive;
+}
+
+UntrackStatus PointLockTracker::Untrack(const PointLockRequest& r) {
+  auto cf_keys = tracked_keys_.find(r.column_family_id);
+  if (cf_keys == tracked_keys_.end()) {
+    return UntrackStatus::NOT_TRACKED;
+  }
+
+  auto& keys = cf_keys->second;
+  auto it = keys.find(r.key);
+  if (it == keys.end()) {
+    return UntrackStatus::NOT_TRACKED;
+  }
+
+  bool untracked = false;
+  auto& info = it->second;
+  if (r.read_only) {
+    if (info.num_reads > 0) {
+      info.num_reads--;
+      untracked = true;
+    }
+  } else {
+    if (info.num_writes > 0) {
+      info.num_writes--;
+      untracked = true;
+    }
+  }
+
+  bool removed = false;
+  if (info.num_reads == 0 && info.num_writes == 0) {
+    keys.erase(it);
+    if (keys.empty()) {
+      tracked_keys_.erase(cf_keys);
+    }
+    removed = true;
+  }
+
+  if (removed) {
+    return UntrackStatus::REMOVED;
+  }
+  if (untracked) {
+    return UntrackStatus::UNTRACKED;
+  }
+  return UntrackStatus::NOT_TRACKED;
+}
+
+void PointLockTracker::Merge(const LockTracker& tracker) {
+  const PointLockTracker& t = static_cast<const PointLockTracker&>(tracker);
+  for (const auto& cf_keys : t.tracked_keys_) {
+    ColumnFamilyId cf = cf_keys.first;
+    const auto& keys = cf_keys.second;
+
+    auto current_cf_keys = tracked_keys_.find(cf);
+    if (current_cf_keys == tracked_keys_.end()) {
+      tracked_keys_.emplace(cf_keys);
+    } else {
+      auto& current_keys = current_cf_keys->second;
+      for (const auto& key_info : keys) {
+        const std::string& key = key_info.first;
+        const TrackedKeyInfo& info = key_info.second;
+        // If key was not previously tracked, just copy the whole struct over.
+        // Otherwise, some merging needs to occur.
+        auto current_info = current_keys.find(key);
+        if (current_info == current_keys.end()) {
+          current_keys.emplace(key_info);
+        } else {
+          current_info->second.Merge(info);
+        }
+      }
+    }
+  }
+}
+
+void PointLockTracker::Subtract(const LockTracker& tracker) {
+  const PointLockTracker& t = static_cast<const PointLockTracker&>(tracker);
+  for (const auto& cf_keys : t.tracked_keys_) {
+    ColumnFamilyId cf = cf_keys.first;
+    const auto& keys = cf_keys.second;
+
+    auto& current_keys = tracked_keys_.at(cf);
+    for (const auto& key_info : keys) {
+      const std::string& key = key_info.first;
+      const TrackedKeyInfo& info = key_info.second;
+      uint32_t num_reads = info.num_reads;
+      uint32_t num_writes = info.num_writes;
+
+      auto current_key_info = current_keys.find(key);
+      assert(current_key_info != current_keys.end());
+
+      // Decrement the total reads/writes of this key by the number of
+      // reads/writes done since the last SavePoint.
+      if (num_reads > 0) {
+        assert(current_key_info->second.num_reads >= num_reads);
+        current_key_info->second.num_reads -= num_reads;
+      }
+      if (num_writes > 0) {
+        assert(current_key_info->second.num_writes >= num_writes);
+        current_key_info->second.num_writes -= num_writes;
+      }
+      if (current_key_info->second.num_reads == 0 &&
+          current_key_info->second.num_writes == 0) {
+        current_keys.erase(current_key_info);
+      }
+    }
+  }
+}
+
+LockTracker* PointLockTracker::GetTrackedLocksSinceSavePoint(
+    const LockTracker& save_point_tracker) const {
+  // Examine the number of reads/writes performed on all keys written
+  // since the last SavePoint and compare to the total number of reads/writes
+  // for each key.
+  LockTracker* t = new PointLockTracker();
+  const PointLockTracker& save_point_t =
+      static_cast<const PointLockTracker&>(save_point_tracker);
+  for (const auto& cf_keys : save_point_t.tracked_keys_) {
+    ColumnFamilyId cf = cf_keys.first;
+    const auto& keys = cf_keys.second;
+
+    auto& current_keys = tracked_keys_.at(cf);
+    for (const auto& key_info : keys) {
+      const std::string& key = key_info.first;
+      const TrackedKeyInfo& info = key_info.second;
+      uint32_t num_reads = info.num_reads;
+      uint32_t num_writes = info.num_writes;
+
+      auto current_key_info = current_keys.find(key);
+      assert(current_key_info != current_keys.end());
+      assert(current_key_info->second.num_reads >= num_reads);
+      assert(current_key_info->second.num_writes >= num_writes);
+
+      if (current_key_info->second.num_reads == num_reads &&
+          current_key_info->second.num_writes == num_writes) {
+        // All the reads/writes to this key were done in the last savepoint.
+        PointLockRequest r;
+        r.column_family_id = cf;
+        r.key = key;
+        r.seq = info.seq;
+        r.read_only = (num_writes == 0);
+        r.exclusive = info.exclusive;
+        t->Track(r);
+      }
+    }
+  }
+  return t;
+}
+
+PointLockStatus PointLockTracker::GetPointLockStatus(
+    ColumnFamilyId column_family_id, const std::string& key) const {
+  assert(IsPointLockSupported());
+  PointLockStatus status;
+  auto it = tracked_keys_.find(column_family_id);
+  if (it == tracked_keys_.end()) {
+    return status;
+  }
+
+  const auto& keys = it->second;
+  auto key_it = keys.find(key);
+  if (key_it == keys.end()) {
+    return status;
+  }
+
+  const TrackedKeyInfo& key_info = key_it->second;
+  status.locked = true;
+  status.exclusive = key_info.exclusive;
+  status.seq = key_info.seq;
+  return status;
+}
+
+uint64_t PointLockTracker::GetNumPointLocks() const {
+  uint64_t num_keys = 0;
+  for (const auto& cf_keys : tracked_keys_) {
+    num_keys += cf_keys.second.size();
+  }
+  return num_keys;
+}
+
+LockTracker::ColumnFamilyIterator* PointLockTracker::GetColumnFamilyIterator()
+    const {
+  return new TrackedKeysColumnFamilyIterator(tracked_keys_);
+}
+
+LockTracker::KeyIterator* PointLockTracker::GetKeyIterator(
+    ColumnFamilyId column_family_id) const {
+  assert(tracked_keys_.find(column_family_id) != tracked_keys_.end());
+  return new TrackedKeysIterator(tracked_keys_, column_family_id);
+}
+
+void PointLockTracker::Clear() { tracked_keys_.clear(); }
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_tracker.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_tracker.h
new file mode 100644
index 0000000000..57e1b8437a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/point/point_lock_tracker.h
@@ -0,0 +1,97 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "utilities/transactions/lock/lock_tracker.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct TrackedKeyInfo {
+  // Earliest sequence number that is relevant to this transaction for this key
+  SequenceNumber seq;
+
+  uint32_t num_writes;
+  uint32_t num_reads;
+
+  bool exclusive;
+
+  explicit TrackedKeyInfo(SequenceNumber seq_no)
+      : seq(seq_no), num_writes(0), num_reads(0), exclusive(false) {}
+
+  void Merge(const TrackedKeyInfo& info) {
+    assert(seq <= info.seq);
+    num_reads += info.num_reads;
+    num_writes += info.num_writes;
+    exclusive = exclusive || info.exclusive;
+  }
+};
+
+using TrackedKeyInfos = std::unordered_map<std::string, TrackedKeyInfo>;
+
+using TrackedKeys = std::unordered_map<ColumnFamilyId, TrackedKeyInfos>;
+
+// Tracks point locks on single keys.
+class PointLockTracker : public LockTracker {
+ public:
+  PointLockTracker() = default;
+
+  PointLockTracker(const PointLockTracker&) = delete;
+  PointLockTracker& operator=(const PointLockTracker&) = delete;
+
+  bool IsPointLockSupported() const override { return true; }
+
+  bool IsRangeLockSupported() const override { return false; }
+
+  void Track(const PointLockRequest& lock_request) override;
+
+  UntrackStatus Untrack(const PointLockRequest& lock_request) override;
+
+  void Track(const RangeLockRequest& /*lock_request*/) override {}
+
+  UntrackStatus Untrack(const RangeLockRequest& /*lock_request*/) override {
+    return UntrackStatus::NOT_TRACKED;
+  }
+
+  void Merge(const LockTracker& tracker) override;
+
+  void Subtract(const LockTracker& tracker) override;
+
+  void Clear() override;
+
+  virtual LockTracker* GetTrackedLocksSinceSavePoint(
+      const LockTracker& save_point_tracker) const override;
+
+  PointLockStatus GetPointLockStatus(ColumnFamilyId column_family_id,
+                                     const std::string& key) const override;
+
+  uint64_t GetNumPointLocks() const override;
+
+  ColumnFamilyIterator* GetColumnFamilyIterator() const override;
+
+  KeyIterator* GetKeyIterator(ColumnFamilyId column_family_id) const override;
+
+ private:
+  TrackedKeys tracked_keys_;
+};
+
+class PointLockTrackerFactory : public LockTrackerFactory {
+ public:
+  static const PointLockTrackerFactory& Get() {
+    static const PointLockTrackerFactory instance;
+    return instance;
+  }
+
+  LockTracker* Create() const override { return new PointLockTracker(); }
+
+ private:
+  PointLockTrackerFactory() {}
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_lock_manager.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_lock_manager.h
new file mode 100644
index 0000000000..22d653ff67
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_lock_manager.h
@@ -0,0 +1,34 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+//
+// Generic definitions for a Range-based Lock Manager
+//
+#pragma once
+
+#include "utilities/transactions/lock/lock_manager.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+/*
+  A base class for all Range-based lock managers
+
+  See also class RangeLockManagerHandle in
+  include/rocksdb/utilities/transaction_db.h
+*/
+class RangeLockManagerBase : public LockManager {
+ public:
+  // Geting a point lock is reduced to getting a range lock on a single-point
+  // range
+  using LockManager::TryLock;
+  Status TryLock(PessimisticTransaction* txn, ColumnFamilyId column_family_id,
+                 const std::string& key, Env* env, bool exclusive) override {
+    Endpoint endp(key.data(), key.size(), false);
+    return TryLock(txn, column_family_id, endp, endp, env, exclusive);
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/db.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/db.h
new file mode 100644
index 0000000000..5aa826c8e0
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/db.h
@@ -0,0 +1,76 @@
+#ifndef _DB_H
+#define _DB_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+typedef struct __toku_dbt DBT;
+
+// port: this is currently not used
+struct simple_dbt {
+  uint32_t len;
+  void *data;
+};
+
+// engine status info
+// engine status is passed to handlerton as an array of
+// TOKU_ENGINE_STATUS_ROW_S[]
+typedef enum {
+  STATUS_FS_STATE = 0,  // interpret as file system state (redzone) enum
+  STATUS_UINT64,        // interpret as uint64_t
+  STATUS_CHARSTR,       // interpret as char *
+  STATUS_UNIXTIME,      // interpret as time_t
+  STATUS_TOKUTIME,      // interpret as tokutime_t
+  STATUS_PARCOUNT,      // interpret as PARTITIONED_COUNTER
+  STATUS_DOUBLE         // interpret as double
+} toku_engine_status_display_type;
+
+typedef enum {
+  TOKU_ENGINE_STATUS = (1ULL << 0),  // Include when asking for engine status
+  TOKU_GLOBAL_STATUS =
+      (1ULL << 1),  // Include when asking for information_schema.global_status
+} toku_engine_status_include_type;
+
+typedef struct __toku_engine_status_row {
+  const char *keyname;  // info schema key, should not change across revisions
+                        // without good reason
+  const char
+      *columnname;  // column for mysql, e.g. information_schema.global_status.
+                    // TOKUDB_ will automatically be prefixed.
+  const char *legend;  // the text that will appear at user interface
+  toku_engine_status_display_type type;  // how to interpret the value
+  toku_engine_status_include_type
+      include;  // which kinds of callers should get read this row?
+  union {
+    double dnum;
+    uint64_t num;
+    const char *str;
+    char datebuf[26];
+    struct partitioned_counter *parcount;
+  } value;
+} * TOKU_ENGINE_STATUS_ROW, TOKU_ENGINE_STATUS_ROW_S;
+
+#define DB_BUFFER_SMALL -30999
+#define DB_LOCK_DEADLOCK -30995
+#define DB_LOCK_NOTGRANTED -30994
+#define DB_NOTFOUND -30989
+#define DB_KEYEXIST -30996
+#define DB_DBT_MALLOC 8
+#define DB_DBT_REALLOC 64
+#define DB_DBT_USERMEM 256
+
+/* PerconaFT specific error codes */
+#define TOKUDB_OUT_OF_LOCKS -100000
+
+typedef void (*lock_wait_callback)(void *arg, uint64_t requesting_txnid,
+                                   uint64_t blocking_txnid);
+
+struct __toku_dbt {
+  void *data;
+  size_t size;
+  size_t ulen;
+  // One of DB_DBT_XXX flags
+  uint32_t flags;
+};
+
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/ft/comparator.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/ft/comparator.h
new file mode 100644
index 0000000000..718efc6231
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/ft/comparator.h
@@ -0,0 +1,138 @@
+/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include <string.h>
+
+#include "../db.h"
+#include "../portability/memory.h"
+#include "../util/dbt.h"
+
+typedef int (*ft_compare_func)(void *arg, const DBT *a, const DBT *b);
+
+int toku_keycompare(const void *key1, size_t key1len, const void *key2,
+                    size_t key2len);
+
+int toku_builtin_compare_fun(const DBT *, const DBT *)
+    __attribute__((__visibility__("default")));
+
+namespace toku {
+
+// a comparator object encapsulates the data necessary for
+// comparing two keys in a fractal tree. it further understands
+// that points may be positive or negative infinity.
+
+class comparator {
+  void init(ft_compare_func cmp, void *cmp_arg, uint8_t memcmp_magic) {
+    _cmp = cmp;
+    _cmp_arg = cmp_arg;
+    _memcmp_magic = memcmp_magic;
+  }
+
+ public:
+  // This magic value is reserved to mean that the magic has not been set.
+  static const uint8_t MEMCMP_MAGIC_NONE = 0;
+
+  void create(ft_compare_func cmp, void *cmp_arg,
+              uint8_t memcmp_magic = MEMCMP_MAGIC_NONE) {
+    init(cmp, cmp_arg, memcmp_magic);
+  }
+
+  // inherit the attributes of another comparator, but keep our own
+  // copy of fake_db that is owned separately from the one given.
+  void inherit(const comparator &cmp) {
+    invariant_notnull(cmp._cmp);
+    init(cmp._cmp, cmp._cmp_arg, cmp._memcmp_magic);
+  }
+
+  // like inherit, but doesn't require that the this comparator
+  // was already created
+  void create_from(const comparator &cmp) { inherit(cmp); }
+
+  void destroy() {}
+
+  ft_compare_func get_compare_func() const { return _cmp; }
+
+  uint8_t get_memcmp_magic() const { return _memcmp_magic; }
+
+  bool valid() const { return _cmp != nullptr; }
+
+  inline bool dbt_has_memcmp_magic(const DBT *dbt) const {
+    return *reinterpret_cast<const char *>(dbt->data) == _memcmp_magic;
+  }
+
+  int operator()(const DBT *a, const DBT *b) const {
+    if (__builtin_expect(toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b),
+                         0)) {
+      return toku_dbt_infinite_compare(a, b);
+    } else if (_memcmp_magic != MEMCMP_MAGIC_NONE
+               // If `a' has the memcmp magic..
+               && dbt_has_memcmp_magic(a)
+               // ..then we expect `b' to also have the memcmp magic
+               && __builtin_expect(dbt_has_memcmp_magic(b), 1)) {
+      assert(0);  // psergey: this branch should not be taken.
+      return toku_builtin_compare_fun(a, b);
+    } else {
+      // yikes, const sadness here
+      return _cmp(_cmp_arg, a, b);
+    }
+  }
+
+ private:
+  ft_compare_func _cmp;
+  void *_cmp_arg;
+
+  uint8_t _memcmp_magic;
+};
+
+} /* namespace toku */
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/ft/ft-status.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/ft/ft-status.h
new file mode 100644
index 0000000000..1b45111721
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/ft/ft-status.h
@@ -0,0 +1,102 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include "../db.h"
+#include "../portability/toku_race_tools.h"
+#include "../util/status.h"
+
+//
+// Lock Tree Manager statistics
+//
+class LTM_STATUS_S {
+ public:
+  enum {
+    LTM_SIZE_CURRENT = 0,
+    LTM_SIZE_LIMIT,
+    LTM_ESCALATION_COUNT,
+    LTM_ESCALATION_TIME,
+    LTM_ESCALATION_LATEST_RESULT,
+    LTM_NUM_LOCKTREES,
+    LTM_LOCK_REQUESTS_PENDING,
+    LTM_STO_NUM_ELIGIBLE,
+    LTM_STO_END_EARLY_COUNT,
+    LTM_STO_END_EARLY_TIME,
+    LTM_WAIT_COUNT,
+    LTM_WAIT_TIME,
+    LTM_LONG_WAIT_COUNT,
+    LTM_LONG_WAIT_TIME,
+    LTM_TIMEOUT_COUNT,
+    LTM_WAIT_ESCALATION_COUNT,
+    LTM_WAIT_ESCALATION_TIME,
+    LTM_LONG_WAIT_ESCALATION_COUNT,
+    LTM_LONG_WAIT_ESCALATION_TIME,
+    LTM_STATUS_NUM_ROWS  // must be last
+  };
+
+  void init(void);
+  void destroy(void);
+
+  TOKU_ENGINE_STATUS_ROW_S status[LTM_STATUS_NUM_ROWS];
+
+ private:
+  bool m_initialized = false;
+};
+typedef LTM_STATUS_S* LTM_STATUS;
+extern LTM_STATUS_S ltm_status;
+
+#define LTM_STATUS_VAL(x) ltm_status.status[LTM_STATUS_S::x].value.num
+
+void toku_status_init(void);     // just call ltm_status.init();
+void toku_status_destroy(void);  // just call ltm_status.destroy();
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.h
new file mode 100644
index 0000000000..e1bfb86c50
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.h
@@ -0,0 +1,174 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=2:softtabstop=2:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include "../ft/comparator.h"
+#include "keyrange.h"
+#include "treenode.h"
+
+namespace toku {
+
+// A concurrent_tree stores non-overlapping ranges.
+// Access to disjoint parts of the tree usually occurs concurrently.
+
+class concurrent_tree {
+ public:
+  // A locked_keyrange gives you exclusive access to read and write
+  // operations that occur on any keys in that range. You only have
+  // the right to operate on keys in that range or keys that were read
+  // from the keyrange using iterate()
+  //
+  // Access model:
+  // - user prepares a locked keyrange. all threads serialize behind prepare().
+  // - user breaks the serialzation point by acquiring a range, or releasing.
+  // - one thread operates on a certain locked_keyrange object at a time.
+  // - when the thread is finished, it releases
+
+  class locked_keyrange {
+   public:
+    // effect: prepare to acquire a locked keyrange over the given
+    //         concurrent_tree, preventing other threads from preparing
+    //         until this thread either does acquire() or release().
+    // note: operations performed on a prepared keyrange are equivalent
+    //         to ones performed on an acquired keyrange over -inf, +inf.
+    // rationale: this provides the user with a serialization point for
+    // descending
+    //            or modifying the the tree. it also proives a convenient way of
+    //            doing serializable operations on the tree.
+    // There are two valid sequences of calls:
+    //  - prepare, acquire, [operations], release
+    //  - prepare, [operations],release
+    void prepare(concurrent_tree *tree);
+
+    // requires: the locked keyrange was prepare()'d
+    // effect: acquire a locked keyrange over the given concurrent_tree.
+    //         the locked keyrange represents the range of keys overlapped
+    //         by the given range
+    void acquire(const keyrange &range);
+
+    // effect: releases a locked keyrange and the mutex it holds
+    void release(void);
+
+    // effect: iterate over each range this locked_keyrange represents,
+    //         calling function->fn() on each node's keyrange and txnid
+    //         until there are no more or the function returns false
+    template <class F>
+    void iterate(F *function) const {
+      // if the subtree is non-empty, traverse it by calling the given
+      // function on each range, txnid pair found that overlaps.
+      if (!m_subtree->is_empty()) {
+        m_subtree->traverse_overlaps(m_range, function);
+      }
+    }
+
+    // Adds another owner to the lock on the specified keyrange.
+    // requires: the keyrange contains one treenode whose bounds are
+    //           exactly equal to the specifed range (no sub/supersets)
+    bool add_shared_owner(const keyrange &range, TXNID new_owner);
+
+    // inserts the given range into the tree, with an associated txnid.
+    // requires: range does not overlap with anything in this locked_keyrange
+    // rationale: caller is responsible for only inserting unique ranges
+    void insert(const keyrange &range, TXNID txnid, bool is_shared);
+
+    // effect: removes the given range from the tree.
+    //         - txnid=TXNID_ANY means remove the range no matter what its
+    //           owners are
+    //         - Other value means remove the specified txnid from
+    //           ownership (if the range has other owners, it will remain
+    //           in the tree)
+    // requires: range exists exactly in this locked_keyrange
+    // rationale: caller is responsible for only removing existing ranges
+    void remove(const keyrange &range, TXNID txnid);
+
+    // effect: removes all of the keys represented by this locked keyrange
+    // rationale: we'd like a fast way to empty out a tree
+    void remove_all(void);
+
+   private:
+    // the concurrent tree this locked keyrange is for
+    concurrent_tree *m_tree;
+
+    // the range of keys this locked keyrange represents
+    keyrange m_range;
+
+    // the subtree under which all overlapping ranges exist
+    treenode *m_subtree;
+
+    friend class concurrent_tree_unit_test;
+  };
+
+  // effect: initialize the tree to an empty state
+  void create(const comparator *cmp);
+
+  // effect: destroy the tree.
+  // requires: tree is empty
+  void destroy(void);
+
+  // returns: true iff the tree is empty
+  bool is_empty(void);
+
+  // returns: the memory overhead of a single insertion into the tree
+  static uint64_t get_insertion_memory_overhead(void);
+
+ private:
+  // the root needs to always exist so there's a lock to grab
+  // even if the tree is empty. that's why we store a treenode
+  // here and not a pointer to one.
+  treenode m_root;
+
+  friend class concurrent_tree_unit_test;
+};
+
+} /* namespace toku */
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.h
new file mode 100644
index 0000000000..f9aeea0c4e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.h
@@ -0,0 +1,141 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include "../ft/comparator.h"
+
+namespace toku {
+
+// A keyrange has a left and right key as endpoints.
+//
+// When a keyrange is created it owns no memory, but when it copies
+// or extends another keyrange, it copies memory as necessary. This
+// means it is cheap in the common case.
+
+class keyrange {
+ public:
+  // effect: constructor that borrows left and right key pointers.
+  //         no memory is allocated or copied.
+  void create(const DBT *left_key, const DBT *right_key);
+
+  // effect: constructor that allocates and copies another keyrange's points.
+  void create_copy(const keyrange &range);
+
+  // effect: destroys the keyrange, freeing any allocated memory
+  void destroy(void);
+
+  // effect: extends the keyrange by choosing the leftmost and rightmost
+  //         endpoints from this range and the given range.
+  //         replaced keys in this range are freed, new keys are copied.
+  void extend(const comparator &cmp, const keyrange &range);
+
+  // returns: the amount of memory this keyrange takes. does not account
+  //          for point optimizations or malloc overhead.
+  uint64_t get_memory_size(void) const;
+
+  // returns: pointer to the left key of this range
+  const DBT *get_left_key(void) const;
+
+  // returns: pointer to the right key of this range
+  const DBT *get_right_key(void) const;
+
+  // two ranges are either equal, lt, gt, or overlapping
+  enum comparison { EQUALS, LESS_THAN, GREATER_THAN, OVERLAPS };
+
+  // effect: compares this range to the given range
+  // returns: LESS_THAN    if given range is strictly to the left
+  //          GREATER_THAN if given range is strictly to the right
+  //          EQUALS       if given range has the same left and right endpoints
+  //          OVERLAPS     if at least one of the given range's endpoints falls
+  //                       between this range's endpoints
+  comparison compare(const comparator &cmp, const keyrange &range) const;
+
+  // returns: true if the range and the given range are equal or overlapping
+  bool overlaps(const comparator &cmp, const keyrange &range) const;
+
+  // returns: a keyrange representing -inf, +inf
+  static keyrange get_infinite_range(void);
+
+ private:
+  // some keys should be copied, some keys should not be.
+  //
+  // to support both, we use two DBTs for copies and two pointers
+  // for temporaries. the access rule is:
+  // - if a pointer is non-null, then it reprsents the key.
+  // - otherwise the pointer is null, and the key is in the copy.
+  DBT m_left_key_copy;
+  DBT m_right_key_copy;
+  const DBT *m_left_key;
+  const DBT *m_right_key;
+
+  // if this range is a point range, then m_left_key == m_right_key
+  // and the actual data is stored exactly once in m_left_key_copy.
+  bool m_point_range;
+
+  // effect: initializes a keyrange to be empty
+  void init_empty(void);
+
+  // effect: copies the given key once into the left key copy
+  //         and sets the right key copy to share the left.
+  // rationale: optimization for point ranges to only do one malloc
+  void set_both_keys(const DBT *key);
+
+  // effect: destroys the current left key. sets and copies the new one.
+  void replace_left_key(const DBT *key);
+
+  // effect: destroys the current right key. sets and copies the new one.
+  void replace_right_key(const DBT *key);
+};
+
+} /* namespace toku */
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h
new file mode 100644
index 0000000000..d30e1e2cad
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h
@@ -0,0 +1,255 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=2:softtabstop=2:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include "../db.h"
+#include "../ft/comparator.h"
+#include "../portability/toku_pthread.h"
+#include "locktree.h"
+#include "txnid_set.h"
+#include "wfg.h"
+
+namespace toku {
+
+// Information about a lock wait
+struct lock_wait_info {
+  locktree *ltree;  // the tree where wait happens
+  TXNID waiter;     // the waiting transaction
+  void *m_extra;    // lock_request's m_extra
+
+  // The transactions that are waited for.
+  std::vector<TXNID> waitees;
+};
+
+typedef std::vector<lock_wait_info> lock_wait_infos;
+
+// A lock request contains the db, the key range, the lock type, and
+// the transaction id that describes a potential row range lock.
+//
+// the typical use case is:
+// - initialize a lock request
+// - start to try to acquire the lock
+// - do something else
+// - wait for the lock request to be resolved on a timed condition
+// - destroy the lock request
+// a lock request is resolved when its state is no longer pending, or
+// when it becomes granted, or timedout, or deadlocked. when resolved, the
+// state of the lock request is changed and any waiting threads are awakened.
+
+class lock_request {
+ public:
+  enum type { UNKNOWN, READ, WRITE };
+
+  // effect: Initializes a lock request.
+  void create(toku_external_mutex_factory_t mutex_factory);
+
+  // effect: Destroys a lock request.
+  void destroy(void);
+
+  // effect: Resets the lock request parameters, allowing it to be reused.
+  // requires: Lock request was already created at some point
+  void set(locktree *lt, TXNID txnid, const DBT *left_key, const DBT *right_key,
+           type lock_type, bool big_txn, void *extra = nullptr);
+
+  // effect: Tries to acquire a lock described by this lock request.
+  // returns: The return code of locktree::acquire_[write,read]_lock()
+  //          or DB_LOCK_DEADLOCK if this request would end up deadlocked.
+  int start(void);
+
+  // effect: Sleeps until either the request is granted or the wait time
+  // expires. returns: The return code of locktree::acquire_[write,read]_lock()
+  //          or simply DB_LOCK_NOTGRANTED if the wait time expired.
+  int wait(uint64_t wait_time_ms);
+  int wait(uint64_t wait_time_ms, uint64_t killed_time_ms,
+           int (*killed_callback)(void),
+           void (*lock_wait_callback)(void *, lock_wait_infos *) = nullptr,
+           void *callback_arg = nullptr);
+
+  // return: left end-point of the lock range
+  const DBT *get_left_key(void) const;
+
+  // return: right end-point of the lock range
+  const DBT *get_right_key(void) const;
+
+  // return: the txnid waiting for a lock
+  TXNID get_txnid(void) const;
+
+  // return: when this lock request started, as milliseconds from epoch
+  uint64_t get_start_time(void) const;
+
+  // return: which txnid is blocking this request (there may be more, though)
+  TXNID get_conflicting_txnid(void) const;
+
+  // effect: Retries all of the lock requests for the given locktree.
+  //         Any lock requests successfully restarted is completed and woken
+  //         up.
+  //         The rest remain pending.
+  static void retry_all_lock_requests(
+      locktree *lt,
+      void (*lock_wait_callback)(void *, lock_wait_infos *) = nullptr,
+      void *callback_arg = nullptr,
+      void (*after_retry_test_callback)(void) = nullptr);
+  static void retry_all_lock_requests_info(
+      lt_lock_request_info *info,
+      void (*lock_wait_callback)(void *, lock_wait_infos *),
+      void *callback_arg);
+
+  void set_start_test_callback(void (*f)(void));
+  void set_start_before_pending_test_callback(void (*f)(void));
+  void set_retry_test_callback(void (*f)(void));
+
+  void *get_extra(void) const;
+
+  void kill_waiter(void);
+  static void kill_waiter(locktree *lt, void *extra);
+
+ private:
+  enum state {
+    UNINITIALIZED,
+    INITIALIZED,
+    PENDING,
+    COMPLETE,
+    DESTROYED,
+  };
+
+  // The keys for a lock request are stored "unowned" in m_left_key
+  // and m_right_key. When the request is about to go to sleep, it
+  // copies these keys and stores them in m_left_key_copy etc and
+  // sets the temporary pointers to null.
+  TXNID m_txnid;
+  TXNID m_conflicting_txnid;
+  uint64_t m_start_time;
+  const DBT *m_left_key;
+  const DBT *m_right_key;
+  DBT m_left_key_copy;
+  DBT m_right_key_copy;
+
+  // The lock request type and associated locktree
+  type m_type;
+  locktree *m_lt;
+
+  // If the lock request is in the completed state, then its
+  // final return value is stored in m_complete_r
+  int m_complete_r;
+  state m_state;
+
+  toku_external_cond_t m_wait_cond;
+
+  bool m_big_txn;
+
+  // the lock request info state stored in the
+  // locktree that this lock request is for.
+  struct lt_lock_request_info *m_info;
+
+  void *m_extra;
+
+  // effect: tries again to acquire the lock described by this lock request
+  // returns: 0 if retrying the request succeeded and is now complete
+  int retry(lock_wait_infos *collector);
+
+  void complete(int complete_r);
+
+  // effect: Finds another lock request by txnid.
+  // requires: The lock request info mutex is held
+  lock_request *find_lock_request(const TXNID &txnid);
+
+  // effect: Insert this lock request into the locktree's set.
+  // requires: the locktree's mutex is held
+  void insert_into_lock_requests(void);
+
+  // effect: Removes this lock request from the locktree's set.
+  // requires: The lock request info mutex is held
+  void remove_from_lock_requests(void);
+
+  // effect: Asks this request's locktree which txnids are preventing
+  //         us from getting the lock described by this request.
+  // returns: conflicts is populated with the txnid's that this request
+  //          is blocked on
+  void get_conflicts(txnid_set *conflicts);
+
+  // effect: Builds a wait-for-graph for this lock request and the given
+  // conflict set
+  void build_wait_graph(wfg *wait_graph, const txnid_set &conflicts);
+
+  // returns: True if this lock request is in deadlock with the given conflicts
+  // set
+  bool deadlock_exists(const txnid_set &conflicts);
+
+  void copy_keys(void);
+
+  static int find_by_txnid(lock_request *const &request, const TXNID &txnid);
+
+  // Report list of conflicts to lock wait callback.
+  static void report_waits(lock_wait_infos *wait_conflicts,
+                           void (*lock_wait_callback)(void *,
+                                                      lock_wait_infos *),
+                           void *callback_arg);
+  void add_conflicts_to_waits(txnid_set *conflicts,
+                              lock_wait_infos *wait_conflicts);
+
+  void (*m_start_test_callback)(void);
+  void (*m_start_before_pending_test_callback)(void);
+  void (*m_retry_test_callback)(void);
+
+ public:
+  std::function<void(TXNID, bool, const DBT *, const DBT *)> m_deadlock_cb;
+
+  friend class lock_request_unit_test;
+};
+// PORT: lock_request is not a POD anymore due to use of toku_external_cond_t
+//  This is ok as the PODness is not really required: lock_request objects are
+//  not moved in memory or anything.
+// ENSURE_POD(lock_request);
+
+} /* namespace toku */
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h
new file mode 100644
index 0000000000..f0f4b042d3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h
@@ -0,0 +1,580 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include <atomic>
+
+#include "../db.h"
+#include "../ft/comparator.h"
+#include "../portability/toku_external_pthread.h"
+#include "../portability/toku_pthread.h"
+#include "../portability/toku_time.h"
+// PORT #include <ft/ft-ops.h>  // just for DICTIONARY_ID..
+// PORT: ft-status for LTM_STATUS:
+#include "../ft/ft-status.h"
+
+struct DICTIONARY_ID {
+  uint64_t dictid;
+};
+
+#include "../util/omt.h"
+#include "range_buffer.h"
+#include "txnid_set.h"
+#include "wfg.h"
+
+namespace toku {
+
+class locktree;
+class locktree_manager;
+class lock_request;
+class concurrent_tree;
+
+typedef int (*lt_create_cb)(locktree *lt, void *extra);
+typedef void (*lt_destroy_cb)(locktree *lt);
+typedef void (*lt_escalate_cb)(TXNID txnid, const locktree *lt,
+                               const range_buffer &buffer, void *extra);
+
+typedef bool (*lt_escalation_barrier_check_func)(const DBT *a, const DBT *b,
+                                                 void *extra);
+
+struct lt_counters {
+  uint64_t wait_count, wait_time;
+  uint64_t long_wait_count, long_wait_time;
+  uint64_t timeout_count;
+
+  void add(const lt_counters &rhs) {
+    wait_count += rhs.wait_count;
+    wait_time += rhs.wait_time;
+    long_wait_count += rhs.long_wait_count;
+    long_wait_time += rhs.long_wait_time;
+    timeout_count += rhs.timeout_count;
+  }
+};
+
+// Lock request state for some locktree
+struct lt_lock_request_info {
+  omt<lock_request *> pending_lock_requests;
+  std::atomic_bool pending_is_empty;
+  toku_external_mutex_t mutex;
+  bool should_retry_lock_requests;
+  lt_counters counters;
+  std::atomic_ullong retry_want;
+  unsigned long long retry_done;
+  toku_mutex_t retry_mutex;
+  toku_cond_t retry_cv;
+  bool running_retry;
+
+  void init(toku_external_mutex_factory_t mutex_factory);
+  void destroy(void);
+};
+
+// The locktree manager manages a set of locktrees, one for each open
+// dictionary. Locktrees are retrieved from the manager. When they are no
+// longer needed, they are be released by the user.
+class locktree_manager {
+ public:
+  // param: create_cb, called just after a locktree is first created.
+  //        destroy_cb, called just before a locktree is destroyed.
+  //        escalate_cb, called after a locktree is escalated (with extra
+  //        param)
+  void create(lt_create_cb create_cb, lt_destroy_cb destroy_cb,
+              lt_escalate_cb escalate_cb, void *extra,
+              toku_external_mutex_factory_t mutex_factory_arg);
+
+  void destroy(void);
+
+  size_t get_max_lock_memory(void);
+
+  int set_max_lock_memory(size_t max_lock_memory);
+
+  // effect: Get a locktree from the manager. If a locktree exists with the
+  // given
+  //         dict_id, it is referenced and then returned. If one did not exist,
+  //         it is created. It will use the comparator for comparing keys. The
+  //         on_create callback (passed to locktree_manager::create()) will be
+  //         called with the given extra parameter.
+  locktree *get_lt(DICTIONARY_ID dict_id, const comparator &cmp,
+                   void *on_create_extra);
+
+  void reference_lt(locktree *lt);
+
+  // effect: Releases one reference on a locktree. If the reference count
+  // transitions
+  //         to zero, the on_destroy callback is called before it gets
+  //         destroyed.
+  void release_lt(locktree *lt);
+
+  void get_status(LTM_STATUS status);
+
+  // effect: calls the iterate function on each pending lock request
+  // note: holds the manager's mutex
+  typedef int (*lock_request_iterate_callback)(DICTIONARY_ID dict_id,
+                                               TXNID txnid, const DBT *left_key,
+                                               const DBT *right_key,
+                                               TXNID blocking_txnid,
+                                               uint64_t start_time,
+                                               void *extra);
+  int iterate_pending_lock_requests(lock_request_iterate_callback cb,
+                                    void *extra);
+
+  // effect: Determines if too many locks or too much memory is being used,
+  //         Runs escalation on the manager if so.
+  // param: big_txn, if the current transaction is 'big' (has spilled rollback
+  // logs) returns: 0 if there enough resources to create a new lock, or
+  // TOKUDB_OUT_OF_LOCKS
+  //          if there are not enough resources and lock escalation failed to
+  //          free up enough resources for a new lock.
+  int check_current_lock_constraints(bool big_txn);
+
+  bool over_big_threshold(void);
+
+  void note_mem_used(uint64_t mem_used);
+
+  void note_mem_released(uint64_t mem_freed);
+
+  bool out_of_locks(void) const;
+
+  // Escalate all locktrees
+  void escalate_all_locktrees(void);
+
+  // Escalate a set of locktrees
+  void escalate_locktrees(locktree **locktrees, int num_locktrees);
+
+  // effect: calls the private function run_escalation(), only ok to
+  //         do for tests.
+  // rationale: to get better stress test coverage, we want a way to
+  //            deterministicly trigger lock escalation.
+  void run_escalation_for_test(void);
+  void run_escalation(void);
+
+  // Add time t to the escalator's wait time statistics
+  void add_escalator_wait_time(uint64_t t);
+
+  void kill_waiter(void *extra);
+
+ private:
+  static const uint64_t DEFAULT_MAX_LOCK_MEMORY = 64L * 1024 * 1024;
+
+  // tracks the current number of locks and lock memory
+  uint64_t m_max_lock_memory;
+  uint64_t m_current_lock_memory;
+
+  struct lt_counters m_lt_counters;
+
+  // the create and destroy callbacks for the locktrees
+  lt_create_cb m_lt_create_callback;
+  lt_destroy_cb m_lt_destroy_callback;
+  lt_escalate_cb m_lt_escalate_callback;
+  void *m_lt_escalate_callback_extra;
+
+  omt<locktree *> m_locktree_map;
+
+  toku_external_mutex_factory_t mutex_factory;
+
+  // the manager's mutex protects the locktree map
+  toku_mutex_t m_mutex;
+
+  void mutex_lock(void);
+
+  void mutex_unlock(void);
+
+  // Manage the set of open locktrees
+  locktree *locktree_map_find(const DICTIONARY_ID &dict_id);
+  void locktree_map_put(locktree *lt);
+  void locktree_map_remove(locktree *lt);
+
+  static int find_by_dict_id(locktree *const &lt, const DICTIONARY_ID &dict_id);
+
+  void escalator_init(void);
+  void escalator_destroy(void);
+
+  // statistics about lock escalation.
+  toku_mutex_t m_escalation_mutex;
+  uint64_t m_escalation_count;
+  tokutime_t m_escalation_time;
+  uint64_t m_escalation_latest_result;
+  uint64_t m_wait_escalation_count;
+  uint64_t m_wait_escalation_time;
+  uint64_t m_long_wait_escalation_count;
+  uint64_t m_long_wait_escalation_time;
+
+  // the escalator coordinates escalation on a set of locktrees for a bunch of
+  // threads
+  class locktree_escalator {
+   public:
+    void create(void);
+    void destroy(void);
+    void run(locktree_manager *mgr, void (*escalate_locktrees_fun)(void *extra),
+             void *extra);
+
+   private:
+    toku_mutex_t m_escalator_mutex;
+    toku_cond_t m_escalator_done;
+    bool m_escalator_running;
+  };
+
+  locktree_escalator m_escalator;
+
+  friend class manager_unit_test;
+};
+
+// A locktree represents the set of row locks owned by all transactions
+// over an open dictionary. Read and write ranges are represented as
+// a left and right key which are compared with the given comparator
+//
+// Locktrees are not created and destroyed by the user. Instead, they are
+// referenced and released using the locktree manager.
+//
+// A sample workflow looks like this:
+// - Create a manager.
+// - Get a locktree by dictionaroy id from the manager.
+// - Perform read/write lock acquision on the locktree, add references to
+//   the locktree using the manager, release locks, release references, etc.
+// - ...
+// - Release the final reference to the locktree. It will be destroyed.
+// - Destroy the manager.
+class locktree {
+ public:
+  // effect: Creates a locktree
+  void create(locktree_manager *mgr, DICTIONARY_ID dict_id,
+              const comparator &cmp,
+              toku_external_mutex_factory_t mutex_factory);
+
+  void destroy(void);
+
+  // For thread-safe, external reference counting
+  void add_reference(void);
+
+  // requires: the reference count is > 0
+  // returns: the reference count, after decrementing it by one
+  uint32_t release_reference(void);
+
+  // returns: the current reference count
+  uint32_t get_reference_count(void);
+
+  // effect: Attempts to grant a read lock for the range of keys between
+  // [left_key, right_key]. returns: If the lock cannot be granted, return
+  // DB_LOCK_NOTGRANTED, and populate the
+  //          given conflicts set with the txnids that hold conflicting locks in
+  //          the range. If the locktree cannot create more locks, return
+  //          TOKUDB_OUT_OF_LOCKS.
+  // note: Read locks cannot be shared between txnids, as one would expect.
+  //       This is for simplicity since read locks are rare in MySQL.
+  int acquire_read_lock(TXNID txnid, const DBT *left_key, const DBT *right_key,
+                        txnid_set *conflicts, bool big_txn);
+
+  // effect: Attempts to grant a write lock for the range of keys between
+  // [left_key, right_key]. returns: If the lock cannot be granted, return
+  // DB_LOCK_NOTGRANTED, and populate the
+  //          given conflicts set with the txnids that hold conflicting locks in
+  //          the range. If the locktree cannot create more locks, return
+  //          TOKUDB_OUT_OF_LOCKS.
+  int acquire_write_lock(TXNID txnid, const DBT *left_key, const DBT *right_key,
+                         txnid_set *conflicts, bool big_txn);
+
+  // effect: populate the conflicts set with the txnids that would preventing
+  //         the given txnid from getting a lock on [left_key, right_key]
+  void get_conflicts(bool is_write_request, TXNID txnid, const DBT *left_key,
+                     const DBT *right_key, txnid_set *conflicts);
+
+  // effect: Release all of the lock ranges represented by the range buffer for
+  // a txnid.
+  void release_locks(TXNID txnid, const range_buffer *ranges,
+                     bool all_trx_locks_hint = false);
+
+  // effect: Runs escalation on this locktree
+  void escalate(lt_escalate_cb after_escalate_callback, void *extra);
+
+  // returns: The userdata associated with this locktree, or null if it has not
+  // been set.
+  void *get_userdata(void) const;
+
+  void set_userdata(void *userdata);
+
+  locktree_manager *get_manager(void) const;
+
+  void set_comparator(const comparator &cmp);
+
+  // Set the user-provided Lock Escalation Barrier check function and its
+  // argument
+  //
+  // Lock Escalation Barrier limits the scope of Lock Escalation.
+  // For two keys A and B (such that A < B),
+  // escalation_barrier_check_func(A, B)==true means that there's a lock
+  // escalation barrier between A and B, and lock escalation is not allowed to
+  // bridge the gap between A and B.
+  //
+  // This method sets the user-provided barrier check function and its
+  // parameter.
+  void set_escalation_barrier_func(lt_escalation_barrier_check_func func,
+                                   void *extra);
+
+  int compare(const locktree *lt) const;
+
+  DICTIONARY_ID get_dict_id() const;
+
+  // Private info struct for storing pending lock request state.
+  // Only to be used by lock requests. We store it here as
+  // something less opaque than usual to strike a tradeoff between
+  // abstraction and code complexity. It is still fairly abstract
+  // since the lock_request object is opaque
+  struct lt_lock_request_info *get_lock_request_info(void);
+
+  typedef void (*dump_callback)(void *cdata, const DBT *left, const DBT *right,
+                                TXNID txnid, bool is_shared,
+                                TxnidVector *owners);
+  void dump_locks(void *cdata, dump_callback cb);
+
+ private:
+  locktree_manager *m_mgr;
+  DICTIONARY_ID m_dict_id;
+  uint32_t m_reference_count;
+
+  // Since the memory referenced by this comparator is not owned by the
+  // locktree, the user must guarantee it will outlive the locktree.
+  //
+  // The ydb API accomplishes this by opening an ft_handle in the on_create
+  // callback, which will keep the underlying FT (and its descriptor) in memory
+  // for as long as the handle is open. The ft_handle is stored opaquely in the
+  // userdata pointer below. see locktree_manager::get_lt w/ on_create_extra
+  comparator m_cmp;
+
+  lt_escalation_barrier_check_func m_escalation_barrier;
+  void *m_escalation_barrier_arg;
+
+  concurrent_tree *m_rangetree;
+
+  void *m_userdata;
+  struct lt_lock_request_info m_lock_request_info;
+
+  // psergey-todo:
+  //  Each transaction also keeps a list of ranges it has locked.
+  //  So, when a transaction is running in STO mode, two identical
+  //  lists are kept: the STO lock list and transaction's owned locks
+  //  list. Why can't we do with just one list?
+
+  // The following fields and members prefixed with "sto_" are for
+  // the single txnid optimization, intended to speed up the case
+  // when only one transaction is using the locktree. If we know
+  // the locktree has only one transaction, then acquiring locks
+  // takes O(1) work and releasing all locks takes O(1) work.
+  //
+  // How do we know that the locktree only has a single txnid?
+  // What do we do if it does?
+  //
+  // When a txn with txnid T requests a lock:
+  // - If the tree is empty, the optimization is possible. Set the single
+  // txnid to T, and insert the lock range into the buffer.
+  // - If the tree is not empty, check if the single txnid is T. If so,
+  // append the lock range to the buffer. Otherwise, migrate all of
+  // the locks in the buffer into the rangetree on behalf of txnid T,
+  // and invalid the single txnid.
+  //
+  // When a txn with txnid T releases its locks:
+  // - If the single txnid is valid, it must be for T. Destroy the buffer.
+  // - If it's not valid, release locks the normal way in the rangetree.
+  //
+  // To carry out the optimization we need to record a single txnid
+  // and a range buffer for each locktree, each protected by the root
+  // lock of the locktree's rangetree. The root lock for a rangetree
+  // is grabbed by preparing a locked keyrange on the rangetree.
+  TXNID m_sto_txnid;
+  range_buffer m_sto_buffer;
+
+  // The single txnid optimization speeds up the case when only one
+  // transaction is using the locktree. But it has the potential to
+  // hurt the case when more than one txnid exists.
+  //
+  // There are two things we need to do to make the optimization only
+  // optimize the case we care about, and not hurt the general case.
+  //
+  // Bound the worst-case latency for lock migration when the
+  // optimization stops working:
+  // - Idea: Stop the optimization and migrate immediate if we notice
+  // the single txnid has takes many locks in the range buffer.
+  // - Implementation: Enforce a max size on the single txnid range buffer.
+  // - Analysis: Choosing the perfect max value, M, is difficult to do
+  // without some feedback from the field. Intuition tells us that M should
+  // not be so small that the optimization is worthless, and it should not
+  // be so big that it's unreasonable to have to wait behind a thread doing
+  // the work of converting M buffer locks into rangetree locks.
+  //
+  // Prevent concurrent-transaction workloads from trying the optimization
+  // in vain:
+  // - Idea: Don't even bother trying the optimization if we think the
+  // system is in a concurrent-transaction state.
+  // - Implementation: Do something even simpler than detecting whether the
+  // system is in a concurent-transaction state. Just keep a "score" value
+  // and some threshold. If at any time the locktree is eligible for the
+  // optimization, only do it if the score is at this threshold. When you
+  // actually do the optimization but someone has to migrate locks in the buffer
+  // (expensive), then reset the score back to zero. Each time a txn
+  // releases locks, the score is incremented by 1.
+  // - Analysis: If you let the threshold be "C", then at most 1 / C txns will
+  // do the optimization in a concurrent-transaction system. Similarly, it
+  // takes at most C txns to start using the single txnid optimzation, which
+  // is good when the system transitions from multithreaded to single threaded.
+  //
+  // STO_BUFFER_MAX_SIZE:
+  //
+  // We choose the max value to be 1 million since most transactions are smaller
+  // than 1 million and we can create a rangetree of 1 million elements in
+  // less than a second. So we can be pretty confident that this threshold
+  // enables the optimization almost always, and prevents super pathological
+  // latency issues for the first lock taken by a second thread.
+  //
+  // STO_SCORE_THRESHOLD:
+  //
+  // A simple first guess at a good value for the score threshold is 100.
+  // By our analysis, we'd end up doing the optimization in vain for
+  // around 1% of all transactions, which seems reasonable. Further,
+  // if the system goes single threaded, it ought to be pretty quick
+  // for 100 transactions to go by, so we won't have to wait long before
+  // we start doing the single txind optimzation again.
+  static const int STO_BUFFER_MAX_SIZE = 50 * 1024;
+  static const int STO_SCORE_THRESHOLD = 100;
+  int m_sto_score;
+
+  // statistics about time spent ending the STO early
+  uint64_t m_sto_end_early_count;
+  tokutime_t m_sto_end_early_time;
+
+  // effect: begins the single txnid optimizaiton, setting m_sto_txnid
+  //         to the given txnid.
+  // requires: m_sto_txnid is invalid
+  void sto_begin(TXNID txnid);
+
+  // effect: append a range to the sto buffer
+  // requires: m_sto_txnid is valid
+  void sto_append(const DBT *left_key, const DBT *right_key,
+                  bool is_write_request);
+
+  // effect: ends the single txnid optimization, releaseing any memory
+  //         stored in the sto buffer, notifying the tracker, and
+  //         invalidating m_sto_txnid.
+  // requires: m_sto_txnid is valid
+  void sto_end(void);
+
+  // params: prepared_lkr is a void * to a prepared locked keyrange. see below.
+  // effect: ends the single txnid optimization early, migrating buffer locks
+  //         into the rangetree, calling sto_end(), and then setting the
+  //         sto_score back to zero.
+  // requires: m_sto_txnid is valid
+  void sto_end_early(void *prepared_lkr);
+  void sto_end_early_no_accounting(void *prepared_lkr);
+
+  // params: prepared_lkr is a void * to a prepared locked keyrange. we can't
+  // use
+  //         the real type because the compiler won't allow us to forward
+  //         declare concurrent_tree::locked_keyrange without including
+  //         concurrent_tree.h, which we cannot do here because it is a template
+  //         implementation.
+  // requires: the prepared locked keyrange is for the locktree's rangetree
+  // requires: m_sto_txnid is valid
+  // effect: migrates each lock in the single txnid buffer into the locktree's
+  //         rangetree, notifying the memory tracker as necessary.
+  void sto_migrate_buffer_ranges_to_tree(void *prepared_lkr);
+
+  // effect: If m_sto_txnid is valid, then release the txnid's locks
+  //         by ending the optimization.
+  // requires: If m_sto_txnid is valid, it is equal to the given txnid
+  // returns: True if locks were released for this txnid
+  bool sto_try_release(TXNID txnid);
+
+  // params: prepared_lkr is a void * to a prepared locked keyrange. see above.
+  // requires: the prepared locked keyrange is for the locktree's rangetree
+  // effect: If m_sto_txnid is valid and equal to the given txnid, then
+  // append a range onto the buffer. Otherwise, if m_sto_txnid is valid
+  //        but not equal to this txnid, then migrate the buffer's locks
+  //        into the rangetree and end the optimization, setting the score
+  //        back to zero.
+  // returns: true if the lock was acquired for this txnid
+  bool sto_try_acquire(void *prepared_lkr, TXNID txnid, const DBT *left_key,
+                       const DBT *right_key, bool is_write_request);
+
+  // Effect:
+  //  Provides a hook for a helgrind suppression.
+  // Returns:
+  //  true if m_sto_txnid is not TXNID_NONE
+  bool sto_txnid_is_valid_unsafe(void) const;
+
+  // Effect:
+  //  Provides a hook for a helgrind suppression.
+  // Returns:
+  //  m_sto_score
+  int sto_get_score_unsafe(void) const;
+
+  void remove_overlapping_locks_for_txnid(TXNID txnid, const DBT *left_key,
+                                          const DBT *right_key);
+
+  int acquire_lock_consolidated(void *prepared_lkr, TXNID txnid,
+                                const DBT *left_key, const DBT *right_key,
+                                bool is_write_request, txnid_set *conflicts);
+
+  int acquire_lock(bool is_write_request, TXNID txnid, const DBT *left_key,
+                   const DBT *right_key, txnid_set *conflicts);
+
+  int try_acquire_lock(bool is_write_request, TXNID txnid, const DBT *left_key,
+                       const DBT *right_key, txnid_set *conflicts,
+                       bool big_txn);
+
+  friend class locktree_unit_test;
+  friend class manager_unit_test;
+  friend class lock_request_unit_test;
+
+  // engine status reaches into the locktree to read some stats
+  friend void locktree_manager::get_status(LTM_STATUS status);
+};
+
+} /* namespace toku */
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.h
new file mode 100644
index 0000000000..76e28d7477
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.h
@@ -0,0 +1,178 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include <inttypes.h>
+#include <stdint.h>
+
+#include "../util/dbt.h"
+#include "../util/memarena.h"
+
+namespace toku {
+
+// a key range buffer represents a set of key ranges that can
+// be stored, iterated over, and then destroyed all at once.
+class range_buffer {
+ private:
+  // the key range buffer is a bunch of records in a row.
+  // each record has the following header, followed by the
+  // left key and right key data payload, if applicable.
+  // we limit keys to be 2^16, since we store lengths as 2 bytes.
+  static const size_t MAX_KEY_SIZE = 1 << 16;
+
+  struct record_header {
+    bool left_neg_inf;
+    bool left_pos_inf;
+    bool right_pos_inf;
+    bool right_neg_inf;
+    uint16_t left_key_size;
+    uint16_t right_key_size;
+    bool is_exclusive_lock;
+
+    bool left_is_infinite(void) const;
+
+    bool right_is_infinite(void) const;
+
+    void init(const DBT *left_key, const DBT *right_key, bool is_exclusive);
+  };
+  // PORT static_assert(sizeof(record_header) == 8, "record header format is
+  // off");
+
+ public:
+  // the iterator abstracts reading over a buffer of variable length
+  // records one by one until there are no more left.
+  class iterator {
+   public:
+    iterator();
+    iterator(const range_buffer *buffer);
+
+    // a record represents the user-view of a serialized key range.
+    // it handles positive and negative infinity and the optimized
+    // point range case, where left and right points share memory.
+    class record {
+     public:
+      // get a read-only pointer to the left key of this record's range
+      const DBT *get_left_key(void) const;
+
+      // get a read-only pointer to the right key of this record's range
+      const DBT *get_right_key(void) const;
+
+      // how big is this record? this tells us where the next record is
+      size_t size(void) const;
+
+      bool get_exclusive_flag() const { return _header.is_exclusive_lock; }
+
+      // populate a record header and point our DBT's
+      // buffers into ours if they are not infinite.
+      void deserialize(const char *buf);
+
+     private:
+      record_header _header;
+      DBT _left_key;
+      DBT _right_key;
+    };
+
+    // populate the given record object with the current
+    // the memory referred to by record is valid for only
+    // as long as the record exists.
+    bool current(record *rec);
+
+    // move the iterator to the next record in the buffer
+    void next(void);
+
+   private:
+    void reset_current_chunk();
+
+    // the key range buffer we are iterating over, the current
+    // offset in that buffer, and the size of the current record.
+    memarena::chunk_iterator _ma_chunk_iterator;
+    const void *_current_chunk_base;
+    size_t _current_chunk_offset;
+    size_t _current_chunk_max;
+    size_t _current_rec_size;
+  };
+
+  // allocate buffer space lazily instead of on creation. this way,
+  // no malloc/free is done if the transaction ends up taking no locks.
+  void create(void);
+
+  // append a left/right key range to the buffer.
+  // if the keys are equal, then only one copy is stored.
+  void append(const DBT *left_key, const DBT *right_key,
+              bool is_write_request = false);
+
+  // is this range buffer empty?
+  bool is_empty(void) const;
+
+  // how much memory is being used by this range buffer?
+  uint64_t total_memory_size(void) const;
+
+  // how many ranges are stored in this range buffer?
+  int get_num_ranges(void) const;
+
+  void destroy(void);
+
+ private:
+  memarena _arena;
+  int _num_ranges;
+
+  void append_range(const DBT *left_key, const DBT *right_key,
+                    bool is_write_request);
+
+  // append a point to the buffer. this is the space/time saving
+  // optimization for key ranges where left == right.
+  void append_point(const DBT *key, bool is_write_request);
+};
+
+} /* namespace toku */
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.h
new file mode 100644
index 0000000000..ec25a8c583
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.h
@@ -0,0 +1,302 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=2:softtabstop=2:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include <string.h>
+
+#include "../ft/comparator.h"
+#include "../portability/memory.h"
+#include "../portability/toku_pthread.h"
+// PORT: we need LTM_STATUS
+#include "../ft/ft-status.h"
+#include "../portability/txn_subst.h"
+#include "keyrange.h"
+
+namespace toku {
+
+// a node in a tree with its own mutex
+// - range is the "key" of this node
+// - txnid is the single txnid associated with this node
+// - left and right children may be null
+//
+// to build a tree on top of this abstraction, the user:
+// - provides memory for a root node, initializes it via create_root()
+// - performs tree operations on the root node. memory management
+//   below the root node is handled by the abstraction, not the user.
+// this pattern:
+// - guaruntees a root node always exists.
+// - does not allow for rebalances on the root node
+
+class treenode {
+ public:
+  // every treenode function has some common requirements:
+  // - node is locked and children are never locked
+  // - node may be unlocked if no other thread has visibility
+
+  // effect: create the root node
+  void create_root(const comparator *cmp);
+
+  // effect: destroys the root node
+  void destroy_root(void);
+
+  // effect: sets the txnid and copies the given range for this node
+  void set_range_and_txnid(const keyrange &range, TXNID txnid, bool is_shared);
+
+  // returns: true iff this node is marked as empty
+  bool is_empty(void);
+
+  // returns: true if this is the root node, denoted by a null parent
+  bool is_root(void);
+
+  // returns: true if the given range overlaps with this node's range
+  bool range_overlaps(const keyrange &range);
+
+  // effect: locks the node
+  void mutex_lock(void);
+
+  // effect: unlocks the node
+  void mutex_unlock(void);
+
+  // return: node whose child overlaps, or a child that is empty
+  //         and would contain range if it existed
+  // given: if cmp_hint is non-null, then it is a precomputed
+  //        comparison of this node's range to the given range.
+  treenode *find_node_with_overlapping_child(
+      const keyrange &range, const keyrange::comparison *cmp_hint);
+
+  // effect: performs an in-order traversal of the ranges that overlap the
+  //         given range, calling function->fn() on each node that does
+  // requires: function signature is: bool fn(const keyrange &range, TXNID
+  // txnid) requires: fn returns true to keep iterating, false to stop iterating
+  // requires: fn does not attempt to use any ranges read out by value
+  //           after removing a node with an overlapping range from the tree.
+  template <class F>
+  void traverse_overlaps(const keyrange &range, F *function) {
+    keyrange::comparison c = range.compare(*m_cmp, m_range);
+    if (c == keyrange::comparison::EQUALS) {
+      // Doesn't matter if fn wants to keep going, there
+      // is nothing left, so return.
+      function->fn(m_range, m_txnid, m_is_shared, m_owners);
+      return;
+    }
+
+    treenode *left = m_left_child.get_locked();
+    if (left) {
+      if (c != keyrange::comparison::GREATER_THAN) {
+        // Target range is less than this node, or it overlaps this
+        // node.  There may be something on the left.
+        left->traverse_overlaps(range, function);
+      }
+      left->mutex_unlock();
+    }
+
+    if (c == keyrange::comparison::OVERLAPS) {
+      bool keep_going = function->fn(m_range, m_txnid, m_is_shared, m_owners);
+      if (!keep_going) {
+        return;
+      }
+    }
+
+    treenode *right = m_right_child.get_locked();
+    if (right) {
+      if (c != keyrange::comparison::LESS_THAN) {
+        // Target range is greater than this node, or it overlaps this
+        // node.  There may be something on the right.
+        right->traverse_overlaps(range, function);
+      }
+      right->mutex_unlock();
+    }
+  }
+
+  // effect: inserts the given range and txnid into a subtree, recursively
+  // requires: range does not overlap with any node below the subtree
+  bool insert(const keyrange &range, TXNID txnid, bool is_shared);
+
+  // effect: removes the given range from the subtree
+  // requires: range exists in the subtree
+  // returns: the root of the resulting subtree
+  treenode *remove(const keyrange &range, TXNID txnid);
+
+  // effect: removes this node and all of its children, recursively
+  // requires: every node at and below this node is unlocked
+  void recursive_remove(void);
+
+ private:
+  // the child_ptr is a light abstraction for the locking of
+  // a child and the maintenence of its depth estimate.
+
+  struct child_ptr {
+    // set the child pointer
+    void set(treenode *node);
+
+    // get and lock this child if it exists
+    treenode *get_locked(void);
+
+    treenode *ptr;
+    uint32_t depth_est;
+  };
+
+  // the balance factor at which a node is considered imbalanced
+  static const int32_t IMBALANCE_THRESHOLD = 2;
+
+  // node-level mutex
+  toku_mutex_t m_mutex;
+
+  // the range and txnid for this node. the range contains a copy
+  // of the keys originally inserted into the tree. nodes may
+  // swap ranges. but at the end of the day, when a node is
+  // destroyed, it frees the memory associated with whatever range
+  // it has at the time of destruction.
+  keyrange m_range;
+
+  void remove_shared_owner(TXNID txnid);
+
+  bool has_multiple_owners() { return (m_txnid == TXNID_SHARED); }
+
+ private:
+  // Owner transaction id.
+  // A value of TXNID_SHARED means this node has multiple owners
+  TXNID m_txnid;
+
+  // If true, this lock is a non-exclusive lock, and it can have either
+  // one or several owners.
+  bool m_is_shared;
+
+  // List of the owners, or nullptr if there's just one owner.
+  TxnidVector *m_owners;
+
+  // two child pointers
+  child_ptr m_left_child;
+  child_ptr m_right_child;
+
+  // comparator for ranges
+  // psergey-todo: Is there any sense to store the comparator in each tree
+  // node?
+  const comparator *m_cmp;
+
+  // marked for the root node. the root node is never free()'d
+  // when removed, but instead marked as empty.
+  bool m_is_root;
+
+  // marked for an empty node. only valid for the root.
+  bool m_is_empty;
+
+  // effect: initializes an empty node with the given comparator
+  void init(const comparator *cmp);
+
+  // requires: this is a shared node (m_is_shared==true)
+  // effect: another transaction is added as an owner.
+  // returns: true <=> added another owner
+  //          false <=> this transaction is already an owner
+  bool add_shared_owner(TXNID txnid);
+
+  // requires: *parent is initialized to something meaningful.
+  // requires: subtree is non-empty
+  // returns: the leftmost child of the given subtree
+  // returns: a pointer to the parent of said child in *parent, only
+  //          if this function recurred, otherwise it is untouched.
+  treenode *find_leftmost_child(treenode **parent);
+
+  // requires: *parent is initialized to something meaningful.
+  // requires: subtree is non-empty
+  // returns: the rightmost child of the given subtree
+  // returns: a pointer to the parent of said child in *parent, only
+  //          if this function recurred, otherwise it is untouched.
+  treenode *find_rightmost_child(treenode **parent);
+
+  // effect: remove the root of this subtree, destroying the old root
+  // returns: the new root of the subtree
+  treenode *remove_root_of_subtree(void);
+
+  // requires: subtree is non-empty, direction is not 0
+  // returns: the child of the subtree at either the left or rightmost extreme
+  treenode *find_child_at_extreme(int direction, treenode **parent);
+
+  // effect: retrieves and possibly rebalances the left child
+  // returns: a locked left child, if it exists
+  treenode *lock_and_rebalance_left(void);
+
+  // effect: retrieves and possibly rebalances the right child
+  // returns: a locked right child, if it exists
+  treenode *lock_and_rebalance_right(void);
+
+  // returns: the estimated depth of this subtree
+  uint32_t get_depth_estimate(void) const;
+
+  // returns: true iff left subtree depth is sufficiently less than the right
+  bool left_imbalanced(int threshold) const;
+
+  // returns: true iff right subtree depth is sufficiently greater than the left
+  bool right_imbalanced(int threshold) const;
+
+  // effect: performs an O(1) rebalance, which will "heal" an imbalance by at
+  // most 1. effect: if the new root is not this node, then this node is
+  // unlocked. returns: locked node representing the new root of the rebalanced
+  // subtree
+  treenode *maybe_rebalance(void);
+
+  // returns: allocated treenode populated with a copy of the range and txnid
+  static treenode *alloc(const comparator *cmp, const keyrange &range,
+                         TXNID txnid, bool is_shared);
+
+  // requires: node is a locked root node, or an unlocked non-root node
+  static void free(treenode *node);
+
+  // effect: swaps the range/txnid pairs for node1 and node2.
+  static void swap_in_place(treenode *node1, treenode *node2);
+
+  friend class concurrent_tree_unit_test;
+};
+
+} /* namespace toku */
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.h
new file mode 100644
index 0000000000..d79c24fb0c
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.h
@@ -0,0 +1,92 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include "../portability/txn_subst.h"
+#include "../util/omt.h"
+
+namespace toku {
+
+class txnid_set {
+ public:
+  // effect: Creates an empty set. Does not malloc space for
+  //         any entries yet. That is done lazily on add().
+  void create(void);
+
+  // effect: Destroy the set's internals.
+  void destroy(void);
+
+  // returns: True if the given txnid is a member of the set.
+  bool contains(TXNID id) const;
+
+  // effect: Adds a given txnid to the set if it did not exist
+  void add(TXNID txnid);
+
+  // effect: Deletes a txnid from the set if it exists.
+  void remove(TXNID txnid);
+
+  // returns: Size of the set
+  uint32_t size(void) const;
+
+  // returns: The "i'th" id in the set, as if it were sorted.
+  TXNID get(uint32_t i) const;
+
+ private:
+  toku::omt<TXNID> m_txnids;
+
+  friend class txnid_set_unit_test;
+};
+ENSURE_POD(txnid_set);
+
+} /* namespace toku */
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.h
new file mode 100644
index 0000000000..8042021707
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.h
@@ -0,0 +1,124 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include <functional>
+
+#include "../util/omt.h"
+#include "txnid_set.h"
+
+namespace toku {
+
+// A wfg is a 'wait-for' graph. A directed edge in represents one
+// txn waiting for another to finish before it can acquire a lock.
+
+class wfg {
+ public:
+  // Create a lock request graph
+  void create(void);
+
+  // Destroy the internals of the lock request graph
+  void destroy(void);
+
+  // Add an edge (a_id, b_id) to the graph
+  void add_edge(TXNID a_txnid, TXNID b_txnid);
+
+  // Return true if a node with the given transaction id exists in the graph.
+  // Return false otherwise.
+  bool node_exists(TXNID txnid);
+
+  // Return true if there exists a cycle from a given transaction id in the
+  // graph. Return false otherwise.
+  bool cycle_exists_from_txnid(TXNID txnid,
+                               std::function<void(TXNID)> reporter);
+
+  // Apply a given function f to all of the nodes in the graph.  The apply
+  // function returns when the function f is called for all of the nodes in the
+  // graph, or the function f returns non-zero.
+  void apply_nodes(int (*fn)(TXNID txnid, void *extra), void *extra);
+
+  // Apply a given function f to all of the edges whose origin is a given node
+  // id. The apply function returns when the function f is called for all edges
+  // in the graph rooted at node id, or the function f returns non-zero.
+  void apply_edges(TXNID txnid,
+                   int (*fn)(TXNID txnid, TXNID edge_txnid, void *extra),
+                   void *extra);
+
+ private:
+  struct node {
+    // txnid for this node and the associated set of edges
+    TXNID txnid;
+    txnid_set edges;
+    bool visited;
+
+    static node *alloc(TXNID txnid);
+
+    static void free(node *n);
+  };
+  ENSURE_POD(node);
+
+  toku::omt<node *> m_nodes;
+
+  node *find_node(TXNID txnid);
+
+  node *find_create_node(TXNID txnid);
+
+  bool cycle_exists_from_node(node *target, node *head,
+                              std::function<void(TXNID)> reporter);
+
+  static int find_by_txnid(node *const &node_a, const TXNID &txnid_b);
+};
+ENSURE_POD(wfg);
+
+} /* namespace toku */
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/memory.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/memory.h
new file mode 100644
index 0000000000..0a621f8e01
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/memory.h
@@ -0,0 +1,215 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include <stdlib.h>
+
+#include "toku_portability.h"
+
+/* Percona memory allocation functions and macros.
+ * These are functions for malloc and free */
+
+int toku_memory_startup(void) __attribute__((constructor));
+void toku_memory_shutdown(void) __attribute__((destructor));
+
+/* Generally: errno is set to 0 or a value to indicate problems. */
+
+// Everything should call toku_malloc() instead of malloc(), and toku_calloc()
+// instead of calloc() That way the tests can can, e.g.,  replace the malloc
+// function using toku_set_func_malloc().
+void *toku_calloc(size_t nmemb, size_t size)
+    __attribute__((__visibility__("default")));
+void *toku_xcalloc(size_t nmemb, size_t size)
+    __attribute__((__visibility__("default")));
+void *toku_malloc(size_t size) __attribute__((__visibility__("default")));
+void *toku_malloc_aligned(size_t alignment, size_t size)
+    __attribute__((__visibility__("default")));
+
+// xmalloc aborts instead of return NULL if we run out of memory
+void *toku_xmalloc(size_t size) __attribute__((__visibility__("default")));
+void *toku_xrealloc(void *, size_t size)
+    __attribute__((__visibility__("default")));
+void *toku_xmalloc_aligned(size_t alignment, size_t size)
+    __attribute__((__visibility__("default")));
+// Effect: Perform a os_malloc_aligned(size) with the additional property that
+// the returned pointer is a multiple of ALIGNMENT.
+//  Fail with a resource_assert if the allocation fails (don't return an error
+//  code). If the alloc_aligned function has been set then call it instead.
+// Requires: alignment is a power of two.
+
+void toku_free(void *) __attribute__((__visibility__("default")));
+
+size_t toku_malloc_usable_size(void *p)
+    __attribute__((__visibility__("default")));
+
+/* MALLOC is a macro that helps avoid a common error:
+ * Suppose I write
+ *    struct foo *x = malloc(sizeof(struct foo));
+ * That works fine.  But if I change it to this, I've probably made an mistake:
+ *    struct foo *x = malloc(sizeof(struct bar));
+ * It can get worse, since one might have something like
+ *    struct foo *x = malloc(sizeof(struct foo *))
+ * which looks reasonable, but it allocoates enough to hold a pointer instead of
+ * the amount needed for the struct. So instead, write struct foo *MALLOC(x);
+ * and you cannot go wrong.
+ */
+#define MALLOC(v) CAST_FROM_VOIDP(v, toku_malloc(sizeof(*v)))
+/* MALLOC_N is like calloc(Except no 0ing of data):  It makes an array.  Write
+ *   int *MALLOC_N(5,x);
+ * to make an array of 5 integers.
+ */
+#define MALLOC_N(n, v) CAST_FROM_VOIDP(v, toku_malloc((n) * sizeof(*v)))
+#define MALLOC_N_ALIGNED(align, n, v) \
+  CAST_FROM_VOIDP(v, toku_malloc_aligned((align), (n) * sizeof(*v)))
+
+// CALLOC_N is like calloc with auto-figuring out size of members
+#define CALLOC_N(n, v) CAST_FROM_VOIDP(v, toku_calloc((n), sizeof(*v)))
+
+#define CALLOC(v) CALLOC_N(1, v)
+
+// XMALLOC macros are like MALLOC except they abort if the operation fails
+#define XMALLOC(v) CAST_FROM_VOIDP(v, toku_xmalloc(sizeof(*v)))
+#define XMALLOC_N(n, v) CAST_FROM_VOIDP(v, toku_xmalloc((n) * sizeof(*v)))
+#define XCALLOC_N(n, v) CAST_FROM_VOIDP(v, toku_xcalloc((n), (sizeof(*v))))
+#define XCALLOC(v) XCALLOC_N(1, v)
+#define XREALLOC(v, s) CAST_FROM_VOIDP(v, toku_xrealloc(v, s))
+#define XREALLOC_N(n, v) CAST_FROM_VOIDP(v, toku_xrealloc(v, (n) * sizeof(*v)))
+
+#define XMALLOC_N_ALIGNED(align, n, v) \
+  CAST_FROM_VOIDP(v, toku_xmalloc_aligned((align), (n) * sizeof(*v)))
+
+#define XMEMDUP(dst, src) CAST_FROM_VOIDP(dst, toku_xmemdup(src, sizeof(*src)))
+#define XMEMDUP_N(dst, src, len) CAST_FROM_VOIDP(dst, toku_xmemdup(src, len))
+
+// ZERO_ARRAY writes zeroes to a stack-allocated array
+#define ZERO_ARRAY(o)          \
+  do {                         \
+    memset((o), 0, sizeof(o)); \
+  } while (0)
+// ZERO_STRUCT writes zeroes to a stack-allocated struct
+#define ZERO_STRUCT(o)          \
+  do {                          \
+    memset(&(o), 0, sizeof(o)); \
+  } while (0)
+
+/* Copy memory.  Analogous to strdup() */
+void *toku_memdup(const void *v, size_t len);
+/* Toku-version of strdup.  Use this so that it calls toku_malloc() */
+char *toku_strdup(const char *s) __attribute__((__visibility__("default")));
+/* Toku-version of strndup.  Use this so that it calls toku_malloc() */
+char *toku_strndup(const char *s, size_t n)
+    __attribute__((__visibility__("default")));
+/* Copy memory.  Analogous to strdup() Crashes instead of returning NULL */
+void *toku_xmemdup(const void *v, size_t len)
+    __attribute__((__visibility__("default")));
+/* Toku-version of strdup.  Use this so that it calls toku_xmalloc()  Crashes
+ * instead of returning NULL */
+char *toku_xstrdup(const char *s) __attribute__((__visibility__("default")));
+
+void toku_malloc_cleanup(
+    void); /* Before exiting, call this function to free up any internal data
+              structures from toku_malloc.  Otherwise valgrind will complain of
+              memory leaks. */
+
+/* Check to see if everything malloc'd was free.  Might be a no-op depending on
+ * how memory.c is configured. */
+void toku_memory_check_all_free(void);
+/* Check to see if memory is "sane".  Might be a no-op.  Probably better to
+ * simply use valgrind. */
+void toku_do_memory_check(void);
+
+typedef void *(*malloc_fun_t)(size_t);
+typedef void (*free_fun_t)(void *);
+typedef void *(*realloc_fun_t)(void *, size_t);
+typedef void *(*malloc_aligned_fun_t)(size_t /*alignment*/, size_t /*size*/);
+typedef void *(*realloc_aligned_fun_t)(size_t /*alignment*/, void * /*pointer*/,
+                                       size_t /*size*/);
+
+void toku_set_func_malloc(malloc_fun_t f);
+void toku_set_func_xmalloc_only(malloc_fun_t f);
+void toku_set_func_malloc_only(malloc_fun_t f);
+void toku_set_func_realloc(realloc_fun_t f);
+void toku_set_func_xrealloc_only(realloc_fun_t f);
+void toku_set_func_realloc_only(realloc_fun_t f);
+void toku_set_func_free(free_fun_t f);
+
+typedef struct memory_status {
+  uint64_t malloc_count;   // number of malloc operations
+  uint64_t free_count;     // number of free operations
+  uint64_t realloc_count;  // number of realloc operations
+  uint64_t malloc_fail;    // number of malloc operations that failed
+  uint64_t realloc_fail;   // number of realloc operations that failed
+  uint64_t requested;      // number of bytes requested
+  uint64_t used;   // number of bytes used (requested + overhead), obtained from
+                   // malloc_usable_size()
+  uint64_t freed;  // number of bytes freed;
+  uint64_t max_requested_size;  // largest attempted allocation size
+  uint64_t last_failed_size;    // size of the last failed allocation attempt
+  volatile uint64_t
+      max_in_use;  // maximum memory footprint (used - freed), approximate (not
+                   // worth threadsafety overhead for exact)
+  const char *mallocator_version;
+  uint64_t mmap_threshold;
+} LOCAL_MEMORY_STATUS_S, *LOCAL_MEMORY_STATUS;
+
+void toku_memory_get_status(LOCAL_MEMORY_STATUS s);
+
+// Effect: Like toku_memory_footprint, except instead of passing p,
+//   we pass toku_malloc_usable_size(p).
+size_t toku_memory_footprint_given_usable_size(size_t touched, size_t usable);
+
+// Effect: Return an estimate how how much space an object is using, possibly by
+//   using toku_malloc_usable_size(p).
+//   If p is NULL then returns 0.
+size_t toku_memory_footprint(void *p, size_t touched);
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_assert_subst.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_assert_subst.h
new file mode 100644
index 0000000000..af47800fb7
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_assert_subst.h
@@ -0,0 +1,39 @@
+//
+// A replacement for toku_assert.h
+//
+#pragma once
+
+#include <assert.h>
+#include <errno.h>
+
+#ifdef NDEBUG
+
+#define assert_zero(a) ((void)(a))
+#define invariant(a) ((void)(a))
+#define invariant_notnull(a) ((void)(a))
+#define invariant_zero(a) ((void)(a))
+
+#else
+
+#define assert_zero(a) assert((a) == 0)
+#define invariant(a) assert(a)
+#define invariant_notnull(a) assert(a)
+#define invariant_zero(a) assert_zero(a)
+
+#endif
+
+#define lazy_assert_zero(a) assert_zero(a)
+
+#define paranoid_invariant_zero(a) assert_zero(a)
+#define paranoid_invariant_notnull(a) assert(a)
+#define paranoid_invariant(a) assert(a)
+
+#define ENSURE_POD(type)                                                    \
+  static_assert(                                                            \
+      std::is_standard_layout<type>::value && std::is_trivial<type>::value, \
+      #type "isn't POD")
+
+inline int get_error_errno(void) {
+  invariant(errno);
+  return errno;
+}
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_atomic.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_atomic.h
new file mode 100644
index 0000000000..aaa2298faf
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_atomic.h
@@ -0,0 +1,130 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+// PORT2: #include <portability/toku_config.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "toku_assert_subst.h"
+
+__attribute__((const, always_inline)) static inline intptr_t which_cache_line(
+    intptr_t addr) {
+  static const size_t assumed_cache_line_size = 64;
+  return addr / assumed_cache_line_size;
+}
+template <typename T>
+__attribute__((const, always_inline)) static inline bool crosses_boundary(
+    T *addr, size_t width) {
+  const intptr_t int_addr = reinterpret_cast<intptr_t>(addr);
+  const intptr_t last_byte = int_addr + width - 1;
+  return which_cache_line(int_addr) != which_cache_line(last_byte);
+}
+
+template <typename T, typename U>
+__attribute__((always_inline)) static inline T toku_sync_fetch_and_add(T *addr,
+                                                                       U diff) {
+  paranoid_invariant(!crosses_boundary(addr, sizeof *addr));
+  return __sync_fetch_and_add(addr, diff);
+}
+template <typename T, typename U>
+__attribute__((always_inline)) static inline T toku_sync_add_and_fetch(T *addr,
+                                                                       U diff) {
+  paranoid_invariant(!crosses_boundary(addr, sizeof *addr));
+  return __sync_add_and_fetch(addr, diff);
+}
+template <typename T, typename U>
+__attribute__((always_inline)) static inline T toku_sync_fetch_and_sub(T *addr,
+                                                                       U diff) {
+  paranoid_invariant(!crosses_boundary(addr, sizeof *addr));
+  return __sync_fetch_and_sub(addr, diff);
+}
+template <typename T, typename U>
+__attribute__((always_inline)) static inline T toku_sync_sub_and_fetch(T *addr,
+                                                                       U diff) {
+  paranoid_invariant(!crosses_boundary(addr, sizeof *addr));
+  return __sync_sub_and_fetch(addr, diff);
+}
+template <typename T, typename U, typename V>
+__attribute__((always_inline)) static inline T toku_sync_val_compare_and_swap(
+    T *addr, U oldval, V newval) {
+  paranoid_invariant(!crosses_boundary(addr, sizeof *addr));
+  return __sync_val_compare_and_swap(addr, oldval, newval);
+}
+template <typename T, typename U, typename V>
+__attribute__((always_inline)) static inline bool
+toku_sync_bool_compare_and_swap(T *addr, U oldval, V newval) {
+  paranoid_invariant(!crosses_boundary(addr, sizeof *addr));
+  return __sync_bool_compare_and_swap(addr, oldval, newval);
+}
+
+// in case you include this but not toku_portability.h
+#pragma GCC poison __sync_fetch_and_add
+#pragma GCC poison __sync_fetch_and_sub
+#pragma GCC poison __sync_fetch_and_or
+#pragma GCC poison __sync_fetch_and_and
+#pragma GCC poison __sync_fetch_and_xor
+#pragma GCC poison __sync_fetch_and_nand
+#pragma GCC poison __sync_add_and_fetch
+#pragma GCC poison __sync_sub_and_fetch
+#pragma GCC poison __sync_or_and_fetch
+#pragma GCC poison __sync_and_and_fetch
+#pragma GCC poison __sync_xor_and_fetch
+#pragma GCC poison __sync_nand_and_fetch
+#pragma GCC poison __sync_bool_compare_and_swap
+#pragma GCC poison __sync_val_compare_and_swap
+#pragma GCC poison __sync_synchronize
+#pragma GCC poison __sync_lock_test_and_set
+#pragma GCC poison __sync_release
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h
new file mode 100644
index 0000000000..eb8291c1d2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h
@@ -0,0 +1,83 @@
+/*
+  A wrapper around ROCKSDB_NAMESPACE::TransactionDBMutexFactory-provided
+  condition and mutex that provides toku_pthread_*-like interface. The functions
+  are named
+
+    toku_external_{mutex|cond}_XXX
+
+  Lock Tree uses this mutex and condition for interruptible (long) lock waits.
+
+  (It also still uses toku_pthread_XXX calls for mutexes/conditions for
+   shorter waits on internal objects)
+*/
+
+#pragma once
+
+#include <pthread.h>
+#include <stdint.h>
+#include <time.h>
+
+#include "rocksdb/utilities/transaction_db.h"
+#include "rocksdb/utilities/transaction_db_mutex.h"
+#include "toku_portability.h"
+
+using ROCKSDB_NAMESPACE::TransactionDBCondVar;
+using ROCKSDB_NAMESPACE::TransactionDBMutex;
+
+typedef std::shared_ptr<ROCKSDB_NAMESPACE::TransactionDBMutexFactory>
+    toku_external_mutex_factory_t;
+
+typedef std::shared_ptr<TransactionDBMutex> toku_external_mutex_t;
+typedef std::shared_ptr<TransactionDBCondVar> toku_external_cond_t;
+
+static inline void toku_external_cond_init(
+    toku_external_mutex_factory_t mutex_factory, toku_external_cond_t *cond) {
+  *cond = mutex_factory->AllocateCondVar();
+}
+
+inline void toku_external_cond_destroy(toku_external_cond_t *cond) {
+  cond->reset();  // this will destroy the managed object
+}
+
+inline void toku_external_cond_signal(toku_external_cond_t *cond) {
+  (*cond)->Notify();
+}
+
+inline void toku_external_cond_broadcast(toku_external_cond_t *cond) {
+  (*cond)->NotifyAll();
+}
+
+inline int toku_external_cond_timedwait(toku_external_cond_t *cond,
+                                        toku_external_mutex_t *mutex,
+                                        int64_t timeout_microsec) {
+  auto res = (*cond)->WaitFor(*mutex, timeout_microsec);
+  if (res.ok())
+    return 0;
+  else
+    return ETIMEDOUT;
+}
+
+inline void toku_external_mutex_init(toku_external_mutex_factory_t factory,
+                                     toku_external_mutex_t *mutex) {
+  // Use placement new: the memory has been allocated but constructor wasn't
+  // called
+  new (mutex) toku_external_mutex_t;
+  *mutex = factory->AllocateMutex();
+}
+
+inline void toku_external_mutex_lock(toku_external_mutex_t *mutex) {
+  (*mutex)->Lock();
+}
+
+inline int toku_external_mutex_trylock(toku_external_mutex_t *mutex) {
+  (*mutex)->Lock();
+  return 0;
+}
+
+inline void toku_external_mutex_unlock(toku_external_mutex_t *mutex) {
+  (*mutex)->UnLock();
+}
+
+inline void toku_external_mutex_destroy(toku_external_mutex_t *mutex) {
+  mutex->reset();  // this will destroy the managed object
+}
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_instrumentation.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_instrumentation.h
new file mode 100644
index 0000000000..c967e71771
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_instrumentation.h
@@ -0,0 +1,286 @@
+/*======
+This file is part of PerconaFT.
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#pragma once
+
+#include <stdio.h>  // FILE
+
+// Performance instrumentation object identifier type
+typedef unsigned int pfs_key_t;
+
+enum class toku_instr_object_type { mutex, rwlock, cond, thread, file };
+
+struct PSI_file;
+
+struct TOKU_FILE {
+  /** The real file. */
+  FILE *file;
+  struct PSI_file *key;
+  TOKU_FILE() : file(nullptr), key(nullptr) {}
+};
+
+struct PSI_mutex;
+struct PSI_cond;
+struct PSI_rwlock;
+
+struct toku_mutex_t;
+struct toku_cond_t;
+struct toku_pthread_rwlock_t;
+
+class toku_instr_key;
+
+class toku_instr_probe_empty {
+ public:
+  explicit toku_instr_probe_empty(UU(const toku_instr_key &key)) {}
+
+  void start_with_source_location(UU(const char *src_file), UU(int src_line)) {}
+
+  void stop() {}
+};
+
+#define TOKU_PROBE_START(p) p->start_with_source_location(__FILE__, __LINE__)
+#define TOKU_PROBE_STOP(p) p->stop
+
+extern toku_instr_key toku_uninstrumented;
+
+#ifndef MYSQL_TOKUDB_ENGINE
+
+#include <pthread.h>
+
+class toku_instr_key {
+ public:
+  toku_instr_key(UU(toku_instr_object_type type), UU(const char *group),
+                 UU(const char *name)) {}
+
+  explicit toku_instr_key(UU(pfs_key_t key_id)) {}
+  // No-instrumentation constructor:
+  toku_instr_key() {}
+  ~toku_instr_key() {}
+};
+
+typedef toku_instr_probe_empty toku_instr_probe;
+
+enum class toku_instr_file_op {
+  file_stream_open,
+  file_create,
+  file_open,
+  file_delete,
+  file_rename,
+  file_read,
+  file_write,
+  file_sync,
+  file_stream_close,
+  file_close,
+  file_stat
+};
+
+struct PSI_file {};
+struct PSI_mutex {};
+
+struct toku_io_instrumentation {};
+
+inline int toku_pthread_create(UU(const toku_instr_key &key), pthread_t *thread,
+                               const pthread_attr_t *attr,
+                               void *(*start_routine)(void *), void *arg) {
+  return pthread_create(thread, attr, start_routine, arg);
+}
+
+inline void toku_instr_register_current_thread() {}
+
+inline void toku_instr_delete_current_thread() {}
+
+// Instrument file creation, opening, closing, and renaming
+inline void toku_instr_file_open_begin(UU(toku_io_instrumentation &io_instr),
+                                       UU(const toku_instr_key &key),
+                                       UU(toku_instr_file_op op),
+                                       UU(const char *name),
+                                       UU(const char *src_file),
+                                       UU(int src_line)) {}
+
+inline void toku_instr_file_stream_open_end(
+    UU(toku_io_instrumentation &io_instr), UU(TOKU_FILE &file)) {}
+
+inline void toku_instr_file_open_end(UU(toku_io_instrumentation &io_instr),
+                                     UU(int fd)) {}
+
+inline void toku_instr_file_name_close_begin(
+    UU(toku_io_instrumentation &io_instr), UU(const toku_instr_key &key),
+    UU(toku_instr_file_op op), UU(const char *name), UU(const char *src_file),
+    UU(int src_line)) {}
+
+inline void toku_instr_file_stream_close_begin(
+    UU(toku_io_instrumentation &io_instr), UU(toku_instr_file_op op),
+    UU(TOKU_FILE &file), UU(const char *src_file), UU(int src_line)) {}
+
+inline void toku_instr_file_fd_close_begin(
+    UU(toku_io_instrumentation &io_instr), UU(toku_instr_file_op op),
+    UU(int fd), UU(const char *src_file), UU(int src_line)) {}
+
+inline void toku_instr_file_close_end(UU(toku_io_instrumentation &io_instr),
+                                      UU(int result)) {}
+
+inline void toku_instr_file_io_begin(UU(toku_io_instrumentation &io_instr),
+                                     UU(toku_instr_file_op op), UU(int fd),
+                                     UU(unsigned int count),
+                                     UU(const char *src_file),
+                                     UU(int src_line)) {}
+
+inline void toku_instr_file_name_io_begin(
+    UU(toku_io_instrumentation &io_instr), UU(const toku_instr_key &key),
+    UU(toku_instr_file_op op), UU(const char *name), UU(unsigned int count),
+    UU(const char *src_file), UU(int src_line)) {}
+
+inline void toku_instr_file_stream_io_begin(
+    UU(toku_io_instrumentation &io_instr), UU(toku_instr_file_op op),
+    UU(TOKU_FILE &file), UU(unsigned int count), UU(const char *src_file),
+    UU(int src_line)) {}
+
+inline void toku_instr_file_io_end(UU(toku_io_instrumentation &io_instr),
+                                   UU(unsigned int count)) {}
+
+struct toku_mutex_t;
+
+struct toku_mutex_instrumentation {};
+
+inline PSI_mutex *toku_instr_mutex_init(UU(const toku_instr_key &key),
+                                        UU(toku_mutex_t &mutex)) {
+  return nullptr;
+}
+
+inline void toku_instr_mutex_destroy(UU(PSI_mutex *&mutex_instr)) {}
+
+inline void toku_instr_mutex_lock_start(
+    UU(toku_mutex_instrumentation &mutex_instr), UU(toku_mutex_t &mutex),
+    UU(const char *src_file), UU(int src_line)) {}
+
+inline void toku_instr_mutex_trylock_start(
+    UU(toku_mutex_instrumentation &mutex_instr), UU(toku_mutex_t &mutex),
+    UU(const char *src_file), UU(int src_line)) {}
+
+inline void toku_instr_mutex_lock_end(
+    UU(toku_mutex_instrumentation &mutex_instr),
+    UU(int pthread_mutex_lock_result)) {}
+
+inline void toku_instr_mutex_unlock(UU(PSI_mutex *mutex_instr)) {}
+
+struct toku_cond_instrumentation {};
+
+enum class toku_instr_cond_op {
+  cond_wait,
+  cond_timedwait,
+};
+
+inline PSI_cond *toku_instr_cond_init(UU(const toku_instr_key &key),
+                                      UU(toku_cond_t &cond)) {
+  return nullptr;
+}
+
+inline void toku_instr_cond_destroy(UU(PSI_cond *&cond_instr)) {}
+
+inline void toku_instr_cond_wait_start(
+    UU(toku_cond_instrumentation &cond_instr), UU(toku_instr_cond_op op),
+    UU(toku_cond_t &cond), UU(toku_mutex_t &mutex), UU(const char *src_file),
+    UU(int src_line)) {}
+
+inline void toku_instr_cond_wait_end(UU(toku_cond_instrumentation &cond_instr),
+                                     UU(int pthread_cond_wait_result)) {}
+
+inline void toku_instr_cond_signal(UU(toku_cond_t &cond)) {}
+
+inline void toku_instr_cond_broadcast(UU(toku_cond_t &cond)) {}
+
+#if 0
+// rw locks are not used 
+// rwlock instrumentation
+struct toku_rwlock_instrumentation {};
+
+inline PSI_rwlock *toku_instr_rwlock_init(UU(const toku_instr_key &key),
+                                          UU(toku_pthread_rwlock_t &rwlock)) {
+    return nullptr;
+}
+
+inline void toku_instr_rwlock_destroy(UU(PSI_rwlock *&rwlock_instr)) {}
+
+inline void toku_instr_rwlock_rdlock_wait_start(
+    UU(toku_rwlock_instrumentation &rwlock_instr),
+    UU(toku_pthread_rwlock_t &rwlock),
+    UU(const char *src_file),
+    UU(int src_line)) {}
+
+inline void toku_instr_rwlock_wrlock_wait_start(
+    UU(toku_rwlock_instrumentation &rwlock_instr),
+    UU(toku_pthread_rwlock_t &rwlock),
+    UU(const char *src_file),
+    UU(int src_line)) {}
+
+inline void toku_instr_rwlock_rdlock_wait_end(
+    UU(toku_rwlock_instrumentation &rwlock_instr),
+    UU(int pthread_rwlock_wait_result)) {}
+
+inline void toku_instr_rwlock_wrlock_wait_end(
+    UU(toku_rwlock_instrumentation &rwlock_instr),
+    UU(int pthread_rwlock_wait_result)) {}
+
+inline void toku_instr_rwlock_unlock(UU(toku_pthread_rwlock_t &rwlock)) {}
+#endif
+
+#else  // MYSQL_TOKUDB_ENGINE
+// There can be not only mysql but also mongodb or any other PFS stuff
+#include <toku_instr_mysql.h>
+#endif  // MYSQL_TOKUDB_ENGINE
+
+// Mutexes
+extern toku_instr_key manager_escalation_mutex_key;
+extern toku_instr_key manager_escalator_mutex_key;
+extern toku_instr_key manager_mutex_key;
+extern toku_instr_key treenode_mutex_key;
+extern toku_instr_key locktree_request_info_mutex_key;
+extern toku_instr_key locktree_request_info_retry_mutex_key;
+
+// condition vars
+extern toku_instr_key lock_request_m_wait_cond_key;
+extern toku_instr_key locktree_request_info_retry_cv_key;
+extern toku_instr_key manager_m_escalator_done_key;  // unused
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_portability.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_portability.h
new file mode 100644
index 0000000000..9a95b38bd5
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_portability.h
@@ -0,0 +1,87 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#if defined(__clang__)
+#define constexpr_static_assert(a, b)
+#else
+#define constexpr_static_assert(a, b) static_assert(a, b)
+#endif
+
+// include here, before they get deprecated
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "toku_atomic.h"
+
+#if defined(__cplusplus)
+#include <type_traits>
+#endif
+
+#if defined(__cplusplus)
+// decltype() here gives a reference-to-pointer instead of just a pointer,
+// just use __typeof__
+#define CAST_FROM_VOIDP(name, value) name = static_cast<__typeof__(name)>(value)
+#else
+#define CAST_FROM_VOIDP(name, value) name = cast_to_typeof(name)(value)
+#endif
+
+#define UU(x) x __attribute__((__unused__))
+
+#include "toku_instrumentation.h"
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_pthread.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_pthread.h
new file mode 100644
index 0000000000..571b950e11
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_pthread.h
@@ -0,0 +1,520 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include <pthread.h>
+#include <stdint.h>
+#include <time.h>
+
+#include "toku_portability.h"
+// PORT2: #include "toku_assert.h"
+
+// TODO: some things moved toku_instrumentation.h, not necessarily the best
+// place
+typedef pthread_attr_t toku_pthread_attr_t;
+typedef pthread_t toku_pthread_t;
+typedef pthread_mutex_t toku_pthread_mutex_t;
+typedef pthread_condattr_t toku_pthread_condattr_t;
+typedef pthread_cond_t toku_pthread_cond_t;
+typedef pthread_rwlockattr_t toku_pthread_rwlockattr_t;
+typedef pthread_key_t toku_pthread_key_t;
+typedef struct timespec toku_timespec_t;
+
+// TODO: break this include loop
+#include <pthread.h>
+typedef pthread_mutexattr_t toku_pthread_mutexattr_t;
+
+struct toku_mutex_t {
+  pthread_mutex_t pmutex;
+  struct PSI_mutex *psi_mutex; /* The performance schema instrumentation hook */
+#if defined(TOKU_PTHREAD_DEBUG)
+  pthread_t owner;  // = pthread_self(); // for debugging
+  bool locked;
+  bool valid;
+  pfs_key_t instr_key_id;
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+};
+
+struct toku_cond_t {
+  pthread_cond_t pcond;
+  struct PSI_cond *psi_cond;
+#if defined(TOKU_PTHREAD_DEBUG)
+  pfs_key_t instr_key_id;
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+};
+
+#if defined(TOKU_PTHREAD_DEBUG)
+#define TOKU_COND_INITIALIZER \
+  { .pcond = PTHREAD_COND_INITIALIZER, .psi_cond = nullptr, .instr_key_id = 0 }
+#else
+#define TOKU_COND_INITIALIZER \
+  { .pcond = PTHREAD_COND_INITIALIZER, .psi_cond = nullptr }
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+
+struct toku_pthread_rwlock_t {
+  pthread_rwlock_t rwlock;
+  struct PSI_rwlock *psi_rwlock;
+#if defined(TOKU_PTHREAD_DEBUG)
+  pfs_key_t instr_key_id;
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+};
+
+typedef struct toku_mutex_aligned {
+  toku_mutex_t aligned_mutex __attribute__((__aligned__(64)));
+} toku_mutex_aligned_t;
+
+// Initializing with {} will fill in a struct with all zeros.
+// But you may also need a pragma to suppress the warnings, as follows
+//
+//   #pragma GCC diagnostic push
+//   #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+//   toku_mutex_t foo = ZERO_MUTEX_INITIALIZER;
+//   #pragma GCC diagnostic pop
+//
+// In general it will be a lot of busy work to make this codebase compile
+// cleanly with -Wmissing-field-initializers
+
+#define ZERO_MUTEX_INITIALIZER \
+  {}
+
+#if defined(TOKU_PTHREAD_DEBUG)
+#define TOKU_MUTEX_INITIALIZER                                             \
+  {                                                                        \
+    .pmutex = PTHREAD_MUTEX_INITIALIZER, .psi_mutex = nullptr, .owner = 0, \
+    .locked = false, .valid = true, .instr_key_id = 0                      \
+  }
+#else
+#define TOKU_MUTEX_INITIALIZER \
+  { .pmutex = PTHREAD_MUTEX_INITIALIZER, .psi_mutex = nullptr }
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+
+// Darwin doesn't provide adaptive mutexes
+#if defined(__APPLE__)
+#define TOKU_MUTEX_ADAPTIVE PTHREAD_MUTEX_DEFAULT
+#if defined(TOKU_PTHREAD_DEBUG)
+#define TOKU_ADAPTIVE_MUTEX_INITIALIZER                                    \
+  {                                                                        \
+    .pmutex = PTHREAD_MUTEX_INITIALIZER, .psi_mutex = nullptr, .owner = 0, \
+    .locked = false, .valid = true, .instr_key_id = 0                      \
+  }
+#else
+#define TOKU_ADAPTIVE_MUTEX_INITIALIZER \
+  { .pmutex = PTHREAD_MUTEX_INITIALIZER, .psi_mutex = nullptr }
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+#else   // __FreeBSD__, __linux__, at least
+#if defined(__GLIBC__)
+#define TOKU_MUTEX_ADAPTIVE PTHREAD_MUTEX_ADAPTIVE_NP
+#else
+// not all libc (e.g. musl) implement NP (Non-POSIX) attributes
+#define TOKU_MUTEX_ADAPTIVE PTHREAD_MUTEX_DEFAULT
+#endif
+#if defined(TOKU_PTHREAD_DEBUG)
+#define TOKU_ADAPTIVE_MUTEX_INITIALIZER                                    \
+  {                                                                        \
+    .pmutex = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP, .psi_mutex = nullptr, \
+    .owner = 0, .locked = false, .valid = true, .instr_key_id = 0          \
+  }
+#else
+#define TOKU_ADAPTIVE_MUTEX_INITIALIZER \
+  { .pmutex = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP, .psi_mutex = nullptr }
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+#endif  // defined(__APPLE__)
+
+// Different OSes implement mutexes as different amounts of nested structs.
+// C++ will fill out all missing values with zeroes if you provide at least one
+// zero, but it needs the right amount of nesting.
+#if defined(__FreeBSD__)
+#define ZERO_COND_INITIALIZER \
+  { 0 }
+#elif defined(__APPLE__)
+#define ZERO_COND_INITIALIZER \
+  {                           \
+    { 0 }                     \
+  }
+#else  // __linux__, at least
+#define ZERO_COND_INITIALIZER \
+  {}
+#endif
+
+static inline void toku_mutexattr_init(toku_pthread_mutexattr_t *attr) {
+  int r = pthread_mutexattr_init(attr);
+  assert_zero(r);
+}
+
+static inline void toku_mutexattr_settype(toku_pthread_mutexattr_t *attr,
+                                          int type) {
+  int r = pthread_mutexattr_settype(attr, type);
+  assert_zero(r);
+}
+
+static inline void toku_mutexattr_destroy(toku_pthread_mutexattr_t *attr) {
+  int r = pthread_mutexattr_destroy(attr);
+  assert_zero(r);
+}
+
+#if defined(TOKU_PTHREAD_DEBUG)
+static inline void toku_mutex_assert_locked(const toku_mutex_t *mutex) {
+  invariant(mutex->locked);
+  invariant(mutex->owner == pthread_self());
+}
+#else
+static inline void toku_mutex_assert_locked(const toku_mutex_t *mutex
+                                            __attribute__((unused))) {}
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+
+// asserting that a mutex is unlocked only makes sense
+// if the calling thread can guaruntee that no other threads
+// are trying to lock this mutex at the time of the assertion
+//
+// a good example of this is a tree with mutexes on each node.
+// when a node is locked the caller knows that no other threads
+// can be trying to lock its childrens' mutexes. the children
+// are in one of two fixed states: locked or unlocked.
+#if defined(TOKU_PTHREAD_DEBUG)
+static inline void toku_mutex_assert_unlocked(toku_mutex_t *mutex) {
+  invariant(mutex->owner == 0);
+  invariant(!mutex->locked);
+}
+#else
+static inline void toku_mutex_assert_unlocked(toku_mutex_t *mutex
+                                              __attribute__((unused))) {}
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+
+#define toku_mutex_lock(M) \
+  toku_mutex_lock_with_source_location(M, __FILE__, __LINE__)
+
+static inline void toku_cond_init(toku_cond_t *cond,
+                                  const toku_pthread_condattr_t *attr) {
+  int r = pthread_cond_init(&cond->pcond, attr);
+  assert_zero(r);
+}
+
+#define toku_mutex_trylock(M) \
+  toku_mutex_trylock_with_source_location(M, __FILE__, __LINE__)
+
+inline void toku_mutex_unlock(toku_mutex_t *mutex) {
+#if defined(TOKU_PTHREAD_DEBUG)
+  invariant(mutex->owner == pthread_self());
+  invariant(mutex->valid);
+  invariant(mutex->locked);
+  mutex->locked = false;
+  mutex->owner = 0;
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+  toku_instr_mutex_unlock(mutex->psi_mutex);
+  int r = pthread_mutex_unlock(&mutex->pmutex);
+  assert_zero(r);
+}
+
+inline void toku_mutex_lock_with_source_location(toku_mutex_t *mutex,
+                                                 const char *src_file,
+                                                 int src_line) {
+  toku_mutex_instrumentation mutex_instr;
+  toku_instr_mutex_lock_start(mutex_instr, *mutex, src_file, src_line);
+
+  const int r = pthread_mutex_lock(&mutex->pmutex);
+  toku_instr_mutex_lock_end(mutex_instr, r);
+
+  assert_zero(r);
+#if defined(TOKU_PTHREAD_DEBUG)
+  invariant(mutex->valid);
+  invariant(!mutex->locked);
+  invariant(mutex->owner == 0);
+  mutex->locked = true;
+  mutex->owner = pthread_self();
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+}
+
+inline int toku_mutex_trylock_with_source_location(toku_mutex_t *mutex,
+                                                   const char *src_file,
+                                                   int src_line) {
+  toku_mutex_instrumentation mutex_instr;
+  toku_instr_mutex_trylock_start(mutex_instr, *mutex, src_file, src_line);
+
+  const int r = pthread_mutex_lock(&mutex->pmutex);
+  toku_instr_mutex_lock_end(mutex_instr, r);
+
+#if defined(TOKU_PTHREAD_DEBUG)
+  if (r == 0) {
+    invariant(mutex->valid);
+    invariant(!mutex->locked);
+    invariant(mutex->owner == 0);
+    mutex->locked = true;
+    mutex->owner = pthread_self();
+  }
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+  return r;
+}
+
+#define toku_cond_wait(C, M) \
+  toku_cond_wait_with_source_location(C, M, __FILE__, __LINE__)
+
+#define toku_cond_timedwait(C, M, W) \
+  toku_cond_timedwait_with_source_location(C, M, W, __FILE__, __LINE__)
+
+inline void toku_cond_init(const toku_instr_key &key, toku_cond_t *cond,
+                           const pthread_condattr_t *attr) {
+  toku_instr_cond_init(key, *cond);
+  int r = pthread_cond_init(&cond->pcond, attr);
+  assert_zero(r);
+}
+
+inline void toku_cond_destroy(toku_cond_t *cond) {
+  toku_instr_cond_destroy(cond->psi_cond);
+  int r = pthread_cond_destroy(&cond->pcond);
+  assert_zero(r);
+}
+
+inline void toku_cond_wait_with_source_location(toku_cond_t *cond,
+                                                toku_mutex_t *mutex,
+                                                const char *src_file,
+                                                int src_line) {
+#if defined(TOKU_PTHREAD_DEBUG)
+  invariant(mutex->locked);
+  mutex->locked = false;
+  mutex->owner = 0;
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+
+  /* Instrumentation start */
+  toku_cond_instrumentation cond_instr;
+  toku_instr_cond_wait_start(cond_instr, toku_instr_cond_op::cond_wait, *cond,
+                             *mutex, src_file, src_line);
+
+  /* Instrumented code */
+  const int r = pthread_cond_wait(&cond->pcond, &mutex->pmutex);
+
+  /* Instrumentation end */
+  toku_instr_cond_wait_end(cond_instr, r);
+
+  assert_zero(r);
+#if defined(TOKU_PTHREAD_DEBUG)
+  invariant(!mutex->locked);
+  mutex->locked = true;
+  mutex->owner = pthread_self();
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+}
+
+inline int toku_cond_timedwait_with_source_location(toku_cond_t *cond,
+                                                    toku_mutex_t *mutex,
+                                                    toku_timespec_t *wakeup_at,
+                                                    const char *src_file,
+                                                    int src_line) {
+#if defined(TOKU_PTHREAD_DEBUG)
+  invariant(mutex->locked);
+  mutex->locked = false;
+  mutex->owner = 0;
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+
+  /* Instrumentation start */
+  toku_cond_instrumentation cond_instr;
+  toku_instr_cond_wait_start(cond_instr, toku_instr_cond_op::cond_timedwait,
+                             *cond, *mutex, src_file, src_line);
+
+  /* Instrumented code */
+  const int r = pthread_cond_timedwait(&cond->pcond, &mutex->pmutex, wakeup_at);
+
+  /* Instrumentation end */
+  toku_instr_cond_wait_end(cond_instr, r);
+
+#if defined(TOKU_PTHREAD_DEBUG)
+  invariant(!mutex->locked);
+  mutex->locked = true;
+  mutex->owner = pthread_self();
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+  return r;
+}
+
+inline void toku_cond_signal(toku_cond_t *cond) {
+  toku_instr_cond_signal(*cond);
+  const int r = pthread_cond_signal(&cond->pcond);
+  assert_zero(r);
+}
+
+inline void toku_cond_broadcast(toku_cond_t *cond) {
+  toku_instr_cond_broadcast(*cond);
+  const int r = pthread_cond_broadcast(&cond->pcond);
+  assert_zero(r);
+}
+
+inline void toku_mutex_init(const toku_instr_key &key, toku_mutex_t *mutex,
+                            const toku_pthread_mutexattr_t *attr) {
+#if defined(TOKU_PTHREAD_DEBUG)
+  mutex->valid = true;
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+  toku_instr_mutex_init(key, *mutex);
+  const int r = pthread_mutex_init(&mutex->pmutex, attr);
+  assert_zero(r);
+#if defined(TOKU_PTHREAD_DEBUG)
+  mutex->locked = false;
+  invariant(mutex->valid);
+  mutex->valid = true;
+  mutex->owner = 0;
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+}
+
+inline void toku_mutex_destroy(toku_mutex_t *mutex) {
+#if defined(TOKU_PTHREAD_DEBUG)
+  invariant(mutex->valid);
+  mutex->valid = false;
+  invariant(!mutex->locked);
+#endif  // defined(TOKU_PTHREAD_DEBUG)
+  toku_instr_mutex_destroy(mutex->psi_mutex);
+  int r = pthread_mutex_destroy(&mutex->pmutex);
+  assert_zero(r);
+}
+
+#define toku_pthread_rwlock_rdlock(RW) \
+  toku_pthread_rwlock_rdlock_with_source_location(RW, __FILE__, __LINE__)
+
+#define toku_pthread_rwlock_wrlock(RW) \
+  toku_pthread_rwlock_wrlock_with_source_location(RW, __FILE__, __LINE__)
+
+#if 0
+inline void toku_pthread_rwlock_init(
+    const toku_instr_key &key,
+    toku_pthread_rwlock_t *__restrict rwlock,
+    const toku_pthread_rwlockattr_t *__restrict attr) {
+    toku_instr_rwlock_init(key, *rwlock);
+    int r = pthread_rwlock_init(&rwlock->rwlock, attr);
+    assert_zero(r);
+}
+
+inline void toku_pthread_rwlock_destroy(toku_pthread_rwlock_t *rwlock) {
+    toku_instr_rwlock_destroy(rwlock->psi_rwlock);
+    int r = pthread_rwlock_destroy(&rwlock->rwlock);
+    assert_zero(r);
+}
+
+inline void toku_pthread_rwlock_rdlock_with_source_location(
+    toku_pthread_rwlock_t *rwlock,
+    const char *src_file,
+    uint src_line) {
+
+    /* Instrumentation start */
+    toku_rwlock_instrumentation rwlock_instr;
+    toku_instr_rwlock_rdlock_wait_start(
+        rwlock_instr, *rwlock, src_file, src_line);
+    /* Instrumented code */
+    const int r = pthread_rwlock_rdlock(&rwlock->rwlock);
+
+    /* Instrumentation end */
+    toku_instr_rwlock_rdlock_wait_end(rwlock_instr, r);
+
+    assert_zero(r);
+}
+
+inline void toku_pthread_rwlock_wrlock_with_source_location(
+    toku_pthread_rwlock_t *rwlock,
+    const char *src_file,
+    uint src_line) {
+
+    /* Instrumentation start */
+    toku_rwlock_instrumentation rwlock_instr;
+    toku_instr_rwlock_wrlock_wait_start(
+        rwlock_instr, *rwlock, src_file, src_line);
+    /* Instrumented code */
+    const int r = pthread_rwlock_wrlock(&rwlock->rwlock);
+
+    /* Instrumentation end */
+    toku_instr_rwlock_wrlock_wait_end(rwlock_instr, r);
+
+    assert_zero(r);
+}
+
+inline void toku_pthread_rwlock_rdunlock(toku_pthread_rwlock_t *rwlock) {
+    toku_instr_rwlock_unlock(*rwlock);
+    const int r = pthread_rwlock_unlock(&rwlock->rwlock);
+    assert_zero(r);
+}
+
+inline void toku_pthread_rwlock_wrunlock(toku_pthread_rwlock_t *rwlock) {
+    toku_instr_rwlock_unlock(*rwlock);
+    const int r = pthread_rwlock_unlock(&rwlock->rwlock);
+    assert_zero(r);
+}
+#endif
+
+static inline int toku_pthread_join(toku_pthread_t thread, void **value_ptr) {
+  return pthread_join(thread, value_ptr);
+}
+
+static inline int toku_pthread_detach(toku_pthread_t thread) {
+  return pthread_detach(thread);
+}
+
+static inline int toku_pthread_key_create(toku_pthread_key_t *key,
+                                          void (*destroyf)(void *)) {
+  return pthread_key_create(key, destroyf);
+}
+
+static inline int toku_pthread_key_delete(toku_pthread_key_t key) {
+  return pthread_key_delete(key);
+}
+
+static inline void *toku_pthread_getspecific(toku_pthread_key_t key) {
+  return pthread_getspecific(key);
+}
+
+static inline int toku_pthread_setspecific(toku_pthread_key_t key, void *data) {
+  return pthread_setspecific(key, data);
+}
+
+int toku_pthread_yield(void) __attribute__((__visibility__("default")));
+
+static inline toku_pthread_t toku_pthread_self(void) { return pthread_self(); }
+
+static inline void *toku_pthread_done(void *exit_value) {
+  toku_instr_delete_current_thread();
+  pthread_exit(exit_value);
+  return nullptr;  // Avoid compiler warning
+}
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_race_tools.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_race_tools.h
new file mode 100644
index 0000000000..3cb5b57902
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_race_tools.h
@@ -0,0 +1,179 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+// PORT2: #include <portability/toku_config.h>
+
+#ifdef HAVE_valgrind
+#undef USE_VALGRIND
+#define USE_VALGRIND 1
+#endif
+
+#if defined(__linux__) && USE_VALGRIND
+
+#include <valgrind/drd.h>
+#include <valgrind/helgrind.h>
+
+#define TOKU_ANNOTATE_NEW_MEMORY(p, size) ANNOTATE_NEW_MEMORY(p, size)
+#define TOKU_VALGRIND_HG_ENABLE_CHECKING(p, size) \
+  VALGRIND_HG_ENABLE_CHECKING(p, size)
+#define TOKU_VALGRIND_HG_DISABLE_CHECKING(p, size) \
+  VALGRIND_HG_DISABLE_CHECKING(p, size)
+#define TOKU_DRD_IGNORE_VAR(v) DRD_IGNORE_VAR(v)
+#define TOKU_DRD_STOP_IGNORING_VAR(v) DRD_STOP_IGNORING_VAR(v)
+#define TOKU_ANNOTATE_IGNORE_READS_BEGIN() ANNOTATE_IGNORE_READS_BEGIN()
+#define TOKU_ANNOTATE_IGNORE_READS_END() ANNOTATE_IGNORE_READS_END()
+#define TOKU_ANNOTATE_IGNORE_WRITES_BEGIN() ANNOTATE_IGNORE_WRITES_BEGIN()
+#define TOKU_ANNOTATE_IGNORE_WRITES_END() ANNOTATE_IGNORE_WRITES_END()
+
+/*
+ * How to make helgrind happy about tree rotations and new mutex orderings:
+ *
+ * // Tell helgrind that we unlocked it so that the next call doesn't get a
+ * "destroyed a locked mutex" error.
+ * // Tell helgrind that we destroyed the mutex.
+ * VALGRIND_HG_MUTEX_UNLOCK_PRE(&locka);
+ * VALGRIND_HG_MUTEX_DESTROY_PRE(&locka);
+ *
+ * // And recreate it.  It would be better to simply be able to say that the
+ * order on these two can now be reversed, because this code forgets all the
+ * ordering information for this mutex.
+ * // Then tell helgrind that we have locked it again.
+ * VALGRIND_HG_MUTEX_INIT_POST(&locka, 0);
+ * VALGRIND_HG_MUTEX_LOCK_POST(&locka);
+ *
+ * When the ordering of two locks changes, we don't need tell Helgrind about do
+ * both locks.  Just one is good enough.
+ */
+
+#define TOKU_VALGRIND_RESET_MUTEX_ORDERING_INFO(mutex) \
+  VALGRIND_HG_MUTEX_UNLOCK_PRE(mutex);                 \
+  VALGRIND_HG_MUTEX_DESTROY_PRE(mutex);                \
+  VALGRIND_HG_MUTEX_INIT_POST(mutex, 0);               \
+  VALGRIND_HG_MUTEX_LOCK_POST(mutex);
+
+#else  // !defined(__linux__) || !USE_VALGRIND
+
+#define NVALGRIND 1
+#define TOKU_ANNOTATE_NEW_MEMORY(p, size) ((void)0)
+#define TOKU_VALGRIND_HG_ENABLE_CHECKING(p, size) ((void)0)
+#define TOKU_VALGRIND_HG_DISABLE_CHECKING(p, size) ((void)0)
+#define TOKU_DRD_IGNORE_VAR(v)
+#define TOKU_DRD_STOP_IGNORING_VAR(v)
+#define TOKU_ANNOTATE_IGNORE_READS_BEGIN() ((void)0)
+#define TOKU_ANNOTATE_IGNORE_READS_END() ((void)0)
+#define TOKU_ANNOTATE_IGNORE_WRITES_BEGIN() ((void)0)
+#define TOKU_ANNOTATE_IGNORE_WRITES_END() ((void)0)
+#define TOKU_VALGRIND_RESET_MUTEX_ORDERING_INFO(mutex)
+#undef RUNNING_ON_VALGRIND
+#define RUNNING_ON_VALGRIND (0U)
+#endif
+
+// Valgrind 3.10.1 (and previous versions).
+// Problems with VALGRIND_HG_DISABLE_CHECKING and VALGRIND_HG_ENABLE_CHECKING.
+// Helgrind's implementation of disable and enable checking causes false races
+// to be reported.  In addition, the race report does not include ANY
+// information about the code that uses the helgrind disable and enable
+// functions.  Therefore, it is very difficult to figure out the cause of the
+// race. DRD does implement the disable and enable functions.
+
+// Problems with ANNOTATE_IGNORE_READS.
+// Helgrind does not implement ignore reads.
+// Annotate ignore reads is the way to inform DRD to ignore racy reads.
+
+// FT code uses unsafe reads in several places.  These unsafe reads have been
+// noted as valid since they use the toku_unsafe_fetch function. Unfortunately,
+// this causes helgrind to report erroneous data races which makes use of
+// helgrind problematic.
+
+// Unsafely fetch and return a `T' from src, telling drd to ignore
+// racey access to src for the next sizeof(*src) bytes
+template <typename T>
+T toku_unsafe_fetch(T *src) {
+  if (0)
+    TOKU_VALGRIND_HG_DISABLE_CHECKING(src,
+                                      sizeof *src);  // disabled, see comment
+  TOKU_ANNOTATE_IGNORE_READS_BEGIN();
+  T r = *src;
+  TOKU_ANNOTATE_IGNORE_READS_END();
+  if (0)
+    TOKU_VALGRIND_HG_ENABLE_CHECKING(src,
+                                     sizeof *src);  // disabled, see comment
+  return r;
+}
+
+template <typename T>
+T toku_unsafe_fetch(T &src) {
+  return toku_unsafe_fetch(&src);
+}
+
+// Unsafely set a `T' value into *dest from src, telling drd to ignore
+// racey access to dest for the next sizeof(*dest) bytes
+template <typename T>
+void toku_unsafe_set(T *dest, const T src) {
+  if (0)
+    TOKU_VALGRIND_HG_DISABLE_CHECKING(dest,
+                                      sizeof *dest);  // disabled, see comment
+  TOKU_ANNOTATE_IGNORE_WRITES_BEGIN();
+  *dest = src;
+  TOKU_ANNOTATE_IGNORE_WRITES_END();
+  if (0)
+    TOKU_VALGRIND_HG_ENABLE_CHECKING(dest,
+                                     sizeof *dest);  // disabled, see comment
+}
+
+template <typename T>
+void toku_unsafe_set(T &dest, const T src) {
+  toku_unsafe_set(&dest, src);
+}
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
new file mode 100644
index 0000000000..9b83c53511
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
@@ -0,0 +1,197 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+// PORT2: #include "toku_config.h"
+
+#include <stdint.h>
+#include <sys/time.h>
+#include <time.h>
+#if defined(__powerpc__)
+#include <sys/platform/ppc.h>
+#endif
+
+#if 0
+static inline float toku_tdiff (struct timeval *a, struct timeval *b) {
+    return (float)((a->tv_sec - b->tv_sec) + 1e-6 * (a->tv_usec - b->tv_usec));
+}
+// PORT2: temporary:
+#define HAVE_CLOCK_REALTIME
+#if !defined(HAVE_CLOCK_REALTIME)
+// OS X does not have clock_gettime, we fake clockid_t for the interface, and we'll implement it with clock_get_time.
+typedef int clockid_t;
+// just something bogus, it doesn't matter, we just want to make sure we're
+// only supporting this mode because we're not sure we can support other modes
+// without a real clock_gettime()
+#define CLOCK_REALTIME 0x01867234
+#endif
+int toku_clock_gettime(clockid_t clk_id, struct timespec *ts) __attribute__((__visibility__("default")));
+#endif
+
+// *************** Performance timers ************************
+// What do you really want from a performance timer:
+//  (1) Can determine actual time of day from the performance time.
+//  (2) Time goes forward, never backward.
+//  (3) Same time on different processors (or even different machines).
+//  (4) Time goes forward at a constant rate (doesn't get faster and slower)
+//  (5) Portable.
+//  (6) Getting the time is cheap.
+// Unfortuately it seems tough to get Properties 1-5.  So we go for Property 6,,
+// but we abstract it. We offer a type tokutime_t which can hold the time. This
+// type can be subtracted to get a time difference. We can get the present time
+// cheaply. We can convert this type to seconds (but that can be expensive). The
+// implementation is to use RDTSC (hence we lose property 3: not portable).
+// Recent machines have constant_tsc in which case we get property (4).
+// Recent OSs on recent machines (that have RDTSCP) fix the per-processor clock
+// skew, so we get property (3). We get property 2 with RDTSC (as long as
+// there's not any skew). We don't even try to get propety 1, since we don't
+// need it. The decision here is that these times are really accurate only on
+// modern machines with modern OSs.
+typedef uint64_t tokutime_t;  // Time type used in by tokutek timers.
+
+#if 0
+// The value of tokutime_t is not specified here. 
+// It might be microseconds since 1/1/1970 (if gettimeofday() is
+// used), or clock cycles since boot (if rdtsc is used).  Or something
+// else.
+// Two tokutime_t values can be subtracted to get a time difference.
+// Use tokutime_to_seconds to that convert difference  to seconds.
+// We want get_tokutime() to be fast, but don't care so much about tokutime_to_seconds();
+//
+// For accurate time calculations do the subtraction in the right order:
+//   Right:  tokutime_to_seconds(t1-t2);
+//   Wrong   tokutime_to_seconds(t1)-toku_time_to_seconds(t2);
+// Doing it the wrong way is likely to result in loss of precision.
+// A double can hold numbers up to about 53 bits.  RDTSC which uses about 33 bits every second, so that leaves
+// 2^20 seconds from booting (about 2 weeks) before the RDTSC value cannot be represented accurately as a double.
+//
+double tokutime_to_seconds(tokutime_t)  __attribute__((__visibility__("default"))); // Convert tokutime to seconds.
+
+#endif
+
+// Get the value of tokutime for right now.  We want this to be fast, so we
+// expose the implementation as RDTSC.
+static inline tokutime_t toku_time_now(void) {
+#if defined(__x86_64__) || defined(__i386__)
+  uint32_t lo, hi;
+  __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
+  return (uint64_t)hi << 32 | lo;
+#elif defined(__aarch64__)
+  uint64_t result;
+  __asm __volatile__("mrs %[rt], cntvct_el0" : [rt] "=r"(result));
+  return result;
+#elif defined(__powerpc__)
+  return __ppc_get_timebase();
+#elif defined(__s390x__)
+  uint64_t result;
+  asm volatile("stckf %0" : "=Q"(result) : : "cc");
+  return result;
+#elif defined(__riscv) && __riscv_xlen == 32
+  uint32_t cycles_lo, cycles_hi0, cycles_hi1;
+  // Implemented in assembly because Clang insisted on branching.
+  asm volatile(
+      "rdcycleh %0\n"
+      "rdcycle %1\n"
+      "rdcycleh %2\n"
+      "sub %0, %0, %2\n"
+      "seqz %0, %0\n"
+      "sub %0, zero, %0\n"
+      "and %1, %1, %0\n"
+      : "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1));
+  return (static_cast<uint64_t>(cycles_hi1) << 32) | cycles_lo;
+#elif defined(__riscv) && __riscv_xlen == 64
+  uint64_t cycles;
+  asm volatile("rdcycle %0" : "=r"(cycles));
+  return cycles;
+#elif defined(__loongarch64)
+  unsigned long result;
+  asm volatile ("rdtime.d\t%0,$r0" : "=r" (result));
+  return result;
+#else
+#error No timer implementation for this platform
+#endif
+}
+
+static inline uint64_t toku_current_time_microsec(void) {
+  struct timeval t;
+  gettimeofday(&t, NULL);
+  return t.tv_sec * (1UL * 1000 * 1000) + t.tv_usec;
+}
+
+#if 0
+// sleep microseconds
+static inline void toku_sleep_microsec(uint64_t ms) {
+    struct timeval  t;
+
+    t.tv_sec = ms / 1000000;
+    t.tv_usec = ms % 1000000;
+
+    select(0, NULL, NULL, NULL, &t);
+}
+#endif
+
+/*
+  PORT: Usage of this file:
+
+  uint64_t toku_current_time_microsec()   // uses gettimeoday
+      is used to track how much time various operations took (for example, lock
+      escalation). (TODO: it is not clear why these operations are tracked with
+      microsecond precision while others use nanoseconds)
+
+  tokutime_t toku_time_now() // uses rdtsc
+      seems to be used for a very similar purpose. This has greater precision
+
+  RocksDB environment provides Env::Default()->NowMicros() and NowNanos() which
+  should be adequate substitutes.
+*/
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/txn_subst.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/txn_subst.h
new file mode 100644
index 0000000000..803914862f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/portability/txn_subst.h
@@ -0,0 +1,27 @@
+//
+// A substitute for ft/txn/txn.h
+//
+#pragma once
+
+#include <set>
+
+#include "../util/omt.h"
+
+typedef uint64_t TXNID;
+#define TXNID_NONE ((TXNID)0)
+
+// A set of transactions
+//  (TODO: consider using class toku::txnid_set. The reason for using STL
+//   container was that its API is easier)
+class TxnidVector : public std::set<TXNID> {
+ public:
+  bool contains(TXNID txnid) { return find(txnid) != end(); }
+};
+
+// A value for lock structures with a meaning "the lock is owned by multiple
+// transactions (and one has to check the TxnidVector to get their ids)
+#define TXNID_SHARED (TXNID(-1))
+
+// Auxiliary value meaning "any transaction id will do".  No real transaction
+// may have this is as id.
+#define TXNID_ANY (TXNID(-2))
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/dbt.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/dbt.h
new file mode 100644
index 0000000000..d86c440f86
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/dbt.h
@@ -0,0 +1,98 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include "../db.h"
+
+// TODO: John
+// Document this API a little better so that DBT
+// memory management can be morm widely understood.
+
+DBT *toku_init_dbt(DBT *);
+
+// returns: an initialized but empty dbt (for which toku_dbt_is_empty() is true)
+DBT toku_empty_dbt(void);
+
+DBT *toku_init_dbt_flags(DBT *, uint32_t flags);
+
+void toku_destroy_dbt(DBT *);
+
+DBT *toku_fill_dbt(DBT *dbt, const void *k, size_t len);
+
+DBT *toku_memdup_dbt(DBT *dbt, const void *k, size_t len);
+
+DBT *toku_copyref_dbt(DBT *dst, const DBT src);
+
+DBT *toku_clone_dbt(DBT *dst, const DBT &src);
+
+void toku_sdbt_cleanup(struct simple_dbt *sdbt);
+
+// returns: special DBT pointer representing positive infinity
+const DBT *toku_dbt_positive_infinity(void);
+
+// returns: special DBT pointer representing negative infinity
+const DBT *toku_dbt_negative_infinity(void);
+
+// returns: true if the given dbt is either positive or negative infinity
+bool toku_dbt_is_infinite(const DBT *dbt);
+
+// returns: true if the given dbt has no data (ie: dbt->data == nullptr)
+bool toku_dbt_is_empty(const DBT *dbt);
+
+// effect: compares two potentially infinity-valued dbts
+// requires: at least one is infinite (assert otherwise)
+int toku_dbt_infinite_compare(const DBT *a, const DBT *b);
+
+// returns: true if the given dbts have the same data pointer and size
+bool toku_dbt_equals(const DBT *a, const DBT *b);
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h
new file mode 100644
index 0000000000..158750fdba
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h
@@ -0,0 +1,144 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include <memory.h>
+
+//******************************************************************************
+//
+// Overview: A growable array is a little bit like std::vector except that
+//  it doesn't have constructors (hence can be used in static constructs, since
+//  the google style guide says no constructors), and it's a little simpler.
+// Operations:
+//   init and deinit (we don't have constructors and destructors).
+//   fetch_unchecked to get values out.
+//   store_unchecked to put values in.
+//   push to add an element at the end
+//   get_size to find out the size
+//   get_memory_size to find out how much memory the data stucture is using.
+//
+//******************************************************************************
+
+namespace toku {
+
+template <typename T>
+class GrowableArray {
+ public:
+  void init(void)
+  // Effect: Initialize the array to contain no elements.
+  {
+    m_array = NULL;
+    m_size = 0;
+    m_size_limit = 0;
+  }
+
+  void deinit(void)
+  // Effect: Deinitialize the array (freeing any memory it uses, for example).
+  {
+    toku_free(m_array);
+    m_array = NULL;
+    m_size = 0;
+    m_size_limit = 0;
+  }
+
+  T fetch_unchecked(size_t i) const
+  // Effect: Fetch the ith element.  If i is out of range, the system asserts.
+  {
+    return m_array[i];
+  }
+
+  void store_unchecked(size_t i, T v)
+  // Effect: Store v in the ith element.  If i is out of range, the system
+  // asserts.
+  {
+    paranoid_invariant(i < m_size);
+    m_array[i] = v;
+  }
+
+  void push(T v)
+  // Effect: Add v to the end of the array (increasing the size).  The amortized
+  // cost of this operation is constant. Implementation hint:  Double the size
+  // of the array when it gets too big so that the amortized cost stays
+  // constant.
+  {
+    if (m_size >= m_size_limit) {
+      if (m_array == NULL) {
+        m_size_limit = 1;
+      } else {
+        m_size_limit *= 2;
+      }
+      XREALLOC_N(m_size_limit, m_array);
+    }
+    m_array[m_size++] = v;
+  }
+
+  size_t get_size(void) const
+  // Effect: Return the number of elements in the array.
+  {
+    return m_size;
+  }
+  size_t memory_size(void) const
+  // Effect: Return the size (in bytes) that the array occupies in memory.  This
+  // is really only an estimate.
+  {
+    return sizeof(*this) + sizeof(T) * m_size_limit;
+  }
+
+ private:
+  T *m_array;
+  size_t m_size;
+  size_t m_size_limit;  // How much space is allocated in array.
+};
+
+}  // namespace toku
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/memarena.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/memarena.h
new file mode 100644
index 0000000000..ddcc1144f2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/memarena.h
@@ -0,0 +1,141 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include <stdlib.h>
+
+/*
+ * A memarena is used to efficiently store a collection of objects that never
+ * move The pattern is allocate more and more stuff and free all of the items at
+ * once. The underlying memory will store 1 or more objects per chunk. Each
+ * chunk is contiguously laid out in memory but chunks are not necessarily
+ * contiguous with each other.
+ */
+class memarena {
+ public:
+  memarena()
+      : _current_chunk(arena_chunk()),
+        _other_chunks(nullptr),
+        _n_other_chunks(0),
+        _size_of_other_chunks(0),
+        _footprint_of_other_chunks(0) {}
+
+  // Effect: Create a memarena with the specified initial size
+  void create(size_t initial_size);
+
+  void destroy(void);
+
+  // Effect: Allocate some memory.  The returned value remains valid until the
+  // memarena is cleared or closed.
+  //  In case of ENOMEM, aborts.
+  void *malloc_from_arena(size_t size);
+
+  // Effect: Move all the memory from this memarena into DEST.
+  //         When SOURCE is closed the memory won't be freed.
+  //         When DEST is closed, the memory will be freed, unless DEST moves
+  //         its memory to another memarena...
+  void move_memory(memarena *dest);
+
+  // Effect: Calculate the amount of memory used by a memory arena.
+  size_t total_memory_size(void) const;
+
+  // Effect: Calculate the used space of the memory arena (ie: excludes unused
+  // space)
+  size_t total_size_in_use(void) const;
+
+  // Effect: Calculate the amount of memory used, according to
+  // toku_memory_footprint(),
+  //         which is a more expensive but more accurate count of memory used.
+  size_t total_footprint(void) const;
+
+  // iterator over the underlying chunks that store objects in the memarena.
+  // a chunk is represented by a pointer to const memory and a usable byte
+  // count.
+  class chunk_iterator {
+   public:
+    chunk_iterator(const memarena *ma) : _ma(ma), _chunk_idx(-1) {}
+
+    // returns: base pointer to the current chunk
+    //          *used set to the number of usable bytes
+    //          if more() is false, returns nullptr and *used = 0
+    const void *current(size_t *used) const;
+
+    // requires: more() is true
+    void next();
+
+    bool more() const;
+
+   private:
+    // -1 represents the 'initial' chunk in a memarena, ie: ma->_current_chunk
+    // >= 0 represents the i'th chunk in the ma->_other_chunks array
+    const memarena *_ma;
+    int _chunk_idx;
+  };
+
+ private:
+  struct arena_chunk {
+    arena_chunk() : buf(nullptr), used(0), size(0) {}
+    char *buf;
+    size_t used;
+    size_t size;
+  };
+
+  struct arena_chunk _current_chunk;
+  struct arena_chunk *_other_chunks;
+  int _n_other_chunks;
+  size_t _size_of_other_chunks;       // the buf_size of all the other chunks.
+  size_t _footprint_of_other_chunks;  // the footprint of all the other chunks.
+
+  friend class memarena_unit_test;
+};
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/omt.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/omt.h
new file mode 100644
index 0000000000..f208002d32
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/omt.h
@@ -0,0 +1,794 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include <memory.h>
+#include <stdint.h>
+
+#include "../portability/toku_portability.h"
+#include "../portability/toku_race_tools.h"
+#include "growable_array.h"
+
+namespace toku {
+
+/**
+ * Order Maintenance Tree (OMT)
+ *
+ * Maintains a collection of totally ordered values, where each value has an
+ * integer weight. The OMT is a mutable datatype.
+ *
+ * The Abstraction:
+ *
+ * An OMT is a vector of values, $V$, where $|V|$ is the length of the vector.
+ * The vector is numbered from $0$ to $|V|-1$.
+ * Each value has a weight.  The weight of the $i$th element is denoted
+ * $w(V_i)$.
+ *
+ * We can create a new OMT, which is the empty vector.
+ *
+ * We can insert a new element $x$ into slot $i$, changing $V$ into $V'$ where
+ *  $|V'|=1+|V|$       and
+ *
+ *   V'_j = V_j       if $j<i$
+ *          x         if $j=i$
+ *          V_{j-1}   if $j>i$.
+ *
+ * We can specify $i$ using a kind of function instead of as an integer.
+ * Let $b$ be a function mapping from values to nonzero integers, such that
+ * the signum of $b$ is monotically increasing.
+ * We can specify $i$ as the minimum integer such that $b(V_i)>0$.
+ *
+ * We look up a value using its index, or using a Heaviside function.
+ * For lookups, we allow $b$ to be zero for some values, and again the signum of
+ * $b$ must be monotonically increasing. When lookup up values, we can look up
+ *  $V_i$ where $i$ is the minimum integer such that $b(V_i)=0$.   (With a
+ * special return code if no such value exists.) (Rationale:  Ordinarily we want
+ * $i$ to be unique.  But for various reasons we want to allow multiple zeros,
+ * and we want the smallest $i$ in that case.) $V_i$ where $i$ is the minimum
+ * integer such that $b(V_i)>0$.   (Or an indication that no such value exists.)
+ *  $V_i$ where $i$ is the maximum integer such that $b(V_i)<0$.   (Or an
+ * indication that no such value exists.)
+ *
+ * When looking up a value using a Heaviside function, we get the value and its
+ * index.
+ *
+ * We can also split an OMT into two OMTs, splitting the weight of the values
+ * evenly. Find a value $j$ such that the values to the left of $j$ have about
+ * the same total weight as the values to the right of $j$. The resulting two
+ * OMTs contain the values to the left of $j$ and the values to the right of $j$
+ * respectively. All of the values from the original OMT go into one of the new
+ * OMTs. If the weights of the values don't split exactly evenly, then the
+ * implementation has the freedom to choose whether the new left OMT or the new
+ * right OMT is larger.
+ *
+ * Performance:
+ *  Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$
+ * calls to the Heaviside function. The memory required is O(|V|).
+ *
+ * Usage:
+ *  The omt is templated by two parameters:
+ *   - omtdata_t is what will be stored within the omt.  These could be pointers
+ * or real data types (ints, structs).
+ *   - omtdataout_t is what will be returned by find and related functions.  By
+ * default, it is the same as omtdata_t, but you can set it to (omtdata_t *). To
+ * create an omt which will store "TXNID"s, for example, it is a good idea to
+ * typedef the template: typedef omt<TXNID> txnid_omt_t; If you are storing
+ * structs, you may want to be able to get a pointer to the data actually stored
+ * in the omt (see find_zero).  To do this, use the second template parameter:
+ *   typedef omt<struct foo, struct foo *> foo_omt_t;
+ */
+
+namespace omt_internal {
+
+template <bool subtree_supports_marks>
+class subtree_templated {
+ private:
+  uint32_t m_index;
+
+ public:
+  static const uint32_t NODE_NULL = UINT32_MAX;
+  inline void set_to_null(void) { m_index = NODE_NULL; }
+
+  inline bool is_null(void) const { return NODE_NULL == this->get_index(); }
+
+  inline uint32_t get_index(void) const { return m_index; }
+
+  inline void set_index(uint32_t index) {
+    paranoid_invariant(index != NODE_NULL);
+    m_index = index;
+  }
+} __attribute__((__packed__, aligned(4)));
+
+template <>
+class subtree_templated<true> {
+ private:
+  uint32_t m_bitfield;
+  static const uint32_t MASK_INDEX = ~(((uint32_t)1) << 31);
+  static const uint32_t MASK_BIT = ((uint32_t)1) << 31;
+
+  inline void set_index_internal(uint32_t new_index) {
+    m_bitfield = (m_bitfield & MASK_BIT) | new_index;
+  }
+
+ public:
+  static const uint32_t NODE_NULL = INT32_MAX;
+  inline void set_to_null(void) { this->set_index_internal(NODE_NULL); }
+
+  inline bool is_null(void) const { return NODE_NULL == this->get_index(); }
+
+  inline uint32_t get_index(void) const {
+    TOKU_DRD_IGNORE_VAR(m_bitfield);
+    const uint32_t bits = m_bitfield;
+    TOKU_DRD_STOP_IGNORING_VAR(m_bitfield);
+    return bits & MASK_INDEX;
+  }
+
+  inline void set_index(uint32_t index) {
+    paranoid_invariant(index < NODE_NULL);
+    this->set_index_internal(index);
+  }
+
+  inline bool get_bit(void) const {
+    TOKU_DRD_IGNORE_VAR(m_bitfield);
+    const uint32_t bits = m_bitfield;
+    TOKU_DRD_STOP_IGNORING_VAR(m_bitfield);
+    return (bits & MASK_BIT) != 0;
+  }
+
+  inline void enable_bit(void) {
+    // These bits may be set by a thread with a write lock on some
+    // leaf, and the index can be read by another thread with a (read
+    // or write) lock on another thread.  Also, the has_marks_below
+    // bit can be set by two threads simultaneously.  Neither of these
+    // are real races, so if we are using DRD we should tell it to
+    // ignore these bits just while we set this bit.  If there were a
+    // race in setting the index, that would be a real race.
+    TOKU_DRD_IGNORE_VAR(m_bitfield);
+    m_bitfield |= MASK_BIT;
+    TOKU_DRD_STOP_IGNORING_VAR(m_bitfield);
+  }
+
+  inline void disable_bit(void) { m_bitfield &= MASK_INDEX; }
+} __attribute__((__packed__));
+
+template <typename omtdata_t, bool subtree_supports_marks>
+class omt_node_templated {
+ public:
+  omtdata_t value;
+  uint32_t weight;
+  subtree_templated<subtree_supports_marks> left;
+  subtree_templated<subtree_supports_marks> right;
+
+  // this needs to be in both implementations because we don't have
+  // a "static if" the caller can use
+  inline void clear_stolen_bits(void) {}
+};  // note: originally this class had __attribute__((__packed__, aligned(4)))
+
+template <typename omtdata_t>
+class omt_node_templated<omtdata_t, true> {
+ public:
+  omtdata_t value;
+  uint32_t weight;
+  subtree_templated<true> left;
+  subtree_templated<true> right;
+  inline bool get_marked(void) const { return left.get_bit(); }
+  inline void set_marked_bit(void) { return left.enable_bit(); }
+  inline void unset_marked_bit(void) { return left.disable_bit(); }
+
+  inline bool get_marks_below(void) const { return right.get_bit(); }
+  inline void set_marks_below_bit(void) {
+    // This function can be called by multiple threads.
+    // Checking first reduces cache invalidation.
+    if (!this->get_marks_below()) {
+      right.enable_bit();
+    }
+  }
+  inline void unset_marks_below_bit(void) { right.disable_bit(); }
+
+  inline void clear_stolen_bits(void) {
+    this->unset_marked_bit();
+    this->unset_marks_below_bit();
+  }
+};  // note: originally this class had __attribute__((__packed__, aligned(4)))
+
+}  // namespace omt_internal
+
+template <typename omtdata_t, typename omtdataout_t = omtdata_t,
+          bool supports_marks = false>
+class omt {
+ public:
+  /**
+   * Effect: Create an empty OMT.
+   * Performance: constant time.
+   */
+  void create(void);
+
+  /**
+   * Effect: Create an empty OMT with no internal allocated space.
+   * Performance: constant time.
+   * Rationale: In some cases we need a valid omt but don't want to malloc.
+   */
+  void create_no_array(void);
+
+  /**
+   * Effect: Create a OMT containing values.  The number of values is in
+   * numvalues. Stores the new OMT in *omtp. Requires: this has not been created
+   * yet Requires: values != NULL Requires: values is sorted Performance:
+   * time=O(numvalues) Rationale:    Normally to insert N values takes O(N lg N)
+   * amortized time. If the N values are known in advance, are sorted, and the
+   * structure is empty, we can batch insert them much faster.
+   */
+  __attribute__((nonnull)) void create_from_sorted_array(
+      const omtdata_t *const values, const uint32_t numvalues);
+
+  /**
+   * Effect: Create an OMT containing values.  The number of values is in
+   * numvalues. On success the OMT takes ownership of *values array, and sets
+   * values=NULL. Requires: this has not been created yet Requires: values !=
+   * NULL Requires: *values is sorted Requires: *values was allocated with
+   * toku_malloc Requires: Capacity of the *values array is <= new_capacity
+   * Requires: On success, *values may not be accessed again by the caller.
+   * Performance:  time=O(1)
+   * Rational:     create_from_sorted_array takes O(numvalues) time.
+   *               By taking ownership of the array, we save a malloc and
+   * memcpy, and possibly a free (if the caller is done with the array).
+   */
+  void create_steal_sorted_array(omtdata_t **const values,
+                                 const uint32_t numvalues,
+                                 const uint32_t new_capacity);
+
+  /**
+   * Effect: Create a new OMT, storing it in *newomt.
+   *  The values to the right of index (starting at index) are moved to *newomt.
+   * Requires: newomt != NULL
+   * Returns
+   *    0             success,
+   *    EINVAL        if index > toku_omt_size(omt)
+   * On nonzero return, omt and *newomt are unmodified.
+   * Performance: time=O(n)
+   * Rationale:  We don't need a split-evenly operation.  We need to split items
+   * so that their total sizes are even, and other similar splitting criteria.
+   * It's easy to split evenly by calling size(), and dividing by two.
+   */
+  __attribute__((nonnull)) int split_at(omt *const newomt, const uint32_t idx);
+
+  /**
+   * Effect: Appends leftomt and rightomt to produce a new omt.
+   *  Creates this as the new omt.
+   *  leftomt and rightomt are destroyed.
+   * Performance: time=O(n) is acceptable, but one can imagine implementations
+   * that are O(\log n) worst-case.
+   */
+  __attribute__((nonnull)) void merge(omt *const leftomt, omt *const rightomt);
+
+  /**
+   * Effect: Creates a copy of an omt.
+   *  Creates this as the clone.
+   *  Each element is copied directly.  If they are pointers, the underlying
+   * data is not duplicated. Performance: O(n) or the running time of
+   * fill_array_with_subtree_values()
+   */
+  void clone(const omt &src);
+
+  /**
+   * Effect: Set the tree to be empty.
+   *  Note: Will not reallocate or resize any memory.
+   * Performance: time=O(1)
+   */
+  void clear(void);
+
+  /**
+   * Effect:  Destroy an OMT, freeing all its memory.
+   *   If the values being stored are pointers, their underlying data is not
+   * freed.  See free_items() Those values may be freed before or after calling
+   * toku_omt_destroy. Rationale: Returns no values since free() cannot fail.
+   * Rationale: Does not free the underlying pointers to reduce complexity.
+   * Performance:  time=O(1)
+   */
+  void destroy(void);
+
+  /**
+   * Effect: return |this|.
+   * Performance:  time=O(1)
+   */
+  uint32_t size(void) const;
+
+  /**
+   * Effect:  Insert value into the OMT.
+   *   If there is some i such that $h(V_i, v)=0$ then returns DB_KEYEXIST.
+   *   Otherwise, let i be the minimum value such that $h(V_i, v)>0$.
+   *      If no such i exists, then let i be |V|
+   *   Then this has the same effect as
+   *    insert_at(tree, value, i);
+   *   If idx!=NULL then i is stored in *idx
+   * Requires:  The signum of h must be monotonically increasing.
+   * Returns:
+   *    0            success
+   *    DB_KEYEXIST  the key is present (h was equal to zero for some value)
+   * On nonzero return, omt is unchanged.
+   * Performance: time=O(\log N) amortized.
+   * Rationale: Some future implementation may be O(\log N) worst-case time, but
+   * O(\log N) amortized is good enough for now.
+   */
+  template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+  int insert(const omtdata_t &value, const omtcmp_t &v, uint32_t *const idx);
+
+  /**
+   * Effect: Increases indexes of all items at slot >= idx by 1.
+   *         Insert value into the position at idx.
+   * Returns:
+   *   0         success
+   *   EINVAL    if idx > this->size()
+   * On error, omt is unchanged.
+   * Performance: time=O(\log N) amortized time.
+   * Rationale: Some future implementation may be O(\log N) worst-case time, but
+   * O(\log N) amortized is good enough for now.
+   */
+  int insert_at(const omtdata_t &value, const uint32_t idx);
+
+  /**
+   * Effect:  Replaces the item at idx with value.
+   * Returns:
+   *   0       success
+   *   EINVAL  if idx>=this->size()
+   * On error, omt is unchanged.
+   * Performance: time=O(\log N)
+   * Rationale: The FT needs to be able to replace a value with another copy of
+   * the same value (allocated in a different location)
+   *
+   */
+  int set_at(const omtdata_t &value, const uint32_t idx);
+
+  /**
+   * Effect: Delete the item in slot idx.
+   *         Decreases indexes of all items at slot > idx by 1.
+   * Returns
+   *     0            success
+   *     EINVAL       if idx>=this->size()
+   * On error, omt is unchanged.
+   * Rationale: To delete an item, first find its index using find or find_zero,
+   * then delete it. Performance: time=O(\log N) amortized.
+   */
+  int delete_at(const uint32_t idx);
+
+  /**
+   * Effect:  Iterate over the values of the omt, from left to right, calling f
+   * on each value. The first argument passed to f is a ref-to-const of the
+   * value stored in the omt. The second argument passed to f is the index of
+   * the value. The third argument passed to f is iterate_extra. The indices run
+   * from 0 (inclusive) to this->size() (exclusive). Requires: f != NULL
+   * Returns:
+   *  If f ever returns nonzero, then the iteration stops, and the value
+   * returned by f is returned by iterate. If f always returns zero, then
+   * iterate returns 0. Requires:  Don't modify the omt while running.  (E.g., f
+   * may not insert or delete values from the omt.) Performance: time=O(i+\log
+   * N) where i is the number of times f is called, and N is the number of
+   * elements in the omt. Rationale: Although the functional iterator requires
+   * defining another function (as opposed to C++ style iterator), it is much
+   * easier to read. Rationale: We may at some point use functors, but for now
+   * this is a smaller change from the old OMT.
+   */
+  template <typename iterate_extra_t,
+            int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+  int iterate(iterate_extra_t *const iterate_extra) const;
+
+  /**
+   * Effect:  Iterate over the values of the omt, from left to right, calling f
+   * on each value. The first argument passed to f is a ref-to-const of the
+   * value stored in the omt. The second argument passed to f is the index of
+   * the value. The third argument passed to f is iterate_extra. The indices run
+   * from 0 (inclusive) to this->size() (exclusive). We will iterate only over
+   * [left,right)
+   *
+   * Requires: left <= right
+   * Requires: f != NULL
+   * Returns:
+   *  EINVAL  if right > this->size()
+   *  If f ever returns nonzero, then the iteration stops, and the value
+   * returned by f is returned by iterate_on_range. If f always returns zero,
+   * then iterate_on_range returns 0. Requires:  Don't modify the omt while
+   * running.  (E.g., f may not insert or delete values from the omt.)
+   * Performance: time=O(i+\log N) where i is the number of times f is called,
+   * and N is the number of elements in the omt. Rational: Although the
+   * functional iterator requires defining another function (as opposed to C++
+   * style iterator), it is much easier to read.
+   */
+  template <typename iterate_extra_t,
+            int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+  int iterate_on_range(const uint32_t left, const uint32_t right,
+                       iterate_extra_t *const iterate_extra) const;
+
+  /**
+   * Effect: Iterate over the values of the omt, and mark the nodes that are
+   * visited. Other than the marks, this behaves the same as iterate_on_range.
+   * Requires: supports_marks == true
+   * Performance: time=O(i+\log N) where i is the number of times f is called,
+   * and N is the number of elements in the omt. Notes: This function MAY be
+   * called concurrently by multiple threads, but not concurrently with any
+   * other non-const function.
+   */
+  template <typename iterate_extra_t,
+            int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+  int iterate_and_mark_range(const uint32_t left, const uint32_t right,
+                             iterate_extra_t *const iterate_extra);
+
+  /**
+   * Effect: Iterate over the values of the omt, from left to right, calling f
+   * on each value whose node has been marked. Other than the marks, this
+   * behaves the same as iterate. Requires: supports_marks == true Performance:
+   * time=O(i+\log N) where i is the number of times f is called, and N is the
+   * number of elements in the omt.
+   */
+  template <typename iterate_extra_t,
+            int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+  int iterate_over_marked(iterate_extra_t *const iterate_extra) const;
+
+  /**
+   * Effect: Delete all elements from the omt, whose nodes have been marked.
+   * Requires: supports_marks == true
+   * Performance: time=O(N + i\log N) where i is the number of marked elements,
+   * {c,sh}ould be faster
+   */
+  void delete_all_marked(void);
+
+  /**
+   * Effect: Verify that the internal state of the marks in the tree are
+   * self-consistent. Crashes the system if the marks are in a bad state.
+   * Requires: supports_marks == true
+   * Performance: time=O(N)
+   * Notes:
+   *  Even though this is a const function, it requires exclusive access.
+   * Rationale:
+   *  The current implementation of the marks relies on a sort of
+   *  "cache" bit representing the state of bits below it in the tree.
+   *  This allows glass-box testing that these bits are correct.
+   */
+  void verify_marks_consistent(void) const;
+
+  /**
+   * Effect: None
+   * Returns whether there are any marks in the tree.
+   */
+  bool has_marks(void) const;
+
+  /**
+   * Effect:  Iterate over the values of the omt, from left to right, calling f
+   * on each value. The first argument passed to f is a pointer to the value
+   * stored in the omt. The second argument passed to f is the index of the
+   * value. The third argument passed to f is iterate_extra. The indices run
+   * from 0 (inclusive) to this->size() (exclusive). Requires: same as for
+   * iterate() Returns: same as for iterate() Performance: same as for iterate()
+   * Rationale: In general, most iterators should use iterate() since they
+   * should not modify the data stored in the omt.  This function is for
+   * iterators which need to modify values (for example, free_items). Rationale:
+   * We assume if you are transforming the data in place, you want to do it to
+   * everything at once, so there is not yet an iterate_on_range_ptr (but there
+   * could be).
+   */
+  template <typename iterate_extra_t,
+            int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)>
+  void iterate_ptr(iterate_extra_t *const iterate_extra);
+
+  /**
+   * Effect: Set *value=V_idx
+   * Returns
+   *    0             success
+   *    EINVAL        if index>=toku_omt_size(omt)
+   * On nonzero return, *value is unchanged
+   * Performance: time=O(\log N)
+   */
+  int fetch(const uint32_t idx, omtdataout_t *const value) const;
+
+  /**
+   * Effect:  Find the smallest i such that h(V_i, extra)>=0
+   *  If there is such an i and h(V_i,extra)==0 then set *idxp=i, set *value =
+   * V_i, and return 0. If there is such an i and h(V_i,extra)>0  then set
+   * *idxp=i and return DB_NOTFOUND. If there is no such i then set
+   * *idx=this->size() and return DB_NOTFOUND. Note: value is of type
+   * omtdataout_t, which may be of type (omtdata_t) or (omtdata_t *) but is
+   * fixed by the instantiation. If it is the value type, then the value is
+   * copied out (even if the value type is a pointer to something else) If it is
+   * the pointer type, then *value is set to a pointer to the data within the
+   * omt. This is determined by the type of the omt as initially declared. If
+   * the omt is declared as omt<foo_t>, then foo_t's will be stored and foo_t's
+   * will be returned by find and related functions. If the omt is declared as
+   * omt<foo_t, foo_t *>, then foo_t's will be stored, and pointers to the
+   * stored items will be returned by find and related functions. Rationale:
+   *  Structs too small for malloc should be stored directly in the omt.
+   *  These structs may need to be edited as they exist inside the omt, so we
+   * need a way to get a pointer within the omt. Using separate functions for
+   * returning pointers and values increases code duplication and reduces
+   * type-checking. That also reduces the ability of the creator of a data
+   * structure to give advice to its future users. Slight overloading in this
+   * case seemed to provide a better API and better type checking.
+   */
+  template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+  int find_zero(const omtcmp_t &extra, omtdataout_t *const value,
+                uint32_t *const idxp) const;
+
+  /**
+   *   Effect:
+   *    If direction >0 then find the smallest i such that h(V_i,extra)>0.
+   *    If direction <0 then find the largest  i such that h(V_i,extra)<0.
+   *    (Direction may not be equal to zero.)
+   *    If value!=NULL then store V_i in *value
+   *    If idxp!=NULL then store i in *idxp.
+   *   Requires: The signum of h is monotically increasing.
+   *   Returns
+   *      0             success
+   *      DB_NOTFOUND   no such value is found.
+   *   On nonzero return, *value and *idxp are unchanged
+   *   Performance: time=O(\log N)
+   *   Rationale:
+   *     Here's how to use the find function to find various things
+   *       Cases for find:
+   *        find first value:         ( h(v)=+1, direction=+1 )
+   *        find last value           ( h(v)=-1, direction=-1 )
+   *        find first X              ( h(v)=(v< x) ? -1 : 1    direction=+1 )
+   *        find last X               ( h(v)=(v<=x) ? -1 : 1    direction=-1 )
+   *        find X or successor to X  ( same as find first X. )
+   *
+   *   Rationale: To help understand heaviside functions and behavor of find:
+   *    There are 7 kinds of heaviside functions.
+   *    The signus of the h must be monotonically increasing.
+   *    Given a function of the following form, A is the element
+   *    returned for direction>0, B is the element returned
+   *    for direction<0, C is the element returned for
+   *    direction==0 (see find_zero) (with a return of 0), and D is the element
+   *    returned for direction==0 (see find_zero) with a return of DB_NOTFOUND.
+   *    If any of A, B, or C are not found, then asking for the
+   *    associated direction will return DB_NOTFOUND.
+   *    See find_zero for more information.
+   *
+   *    Let the following represent the signus of the heaviside function.
+   *
+   *    -...-
+   *        A
+   *         D
+   *
+   *    +...+
+   *    B
+   *    D
+   *
+   *    0...0
+   *    C
+   *
+   *    -...-0...0
+   *        AC
+   *
+   *    0...0+...+
+   *    C    B
+   *
+   *    -...-+...+
+   *        AB
+   *         D
+   *
+   *    -...-0...0+...+
+   *        AC    B
+   */
+  template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+  int find(const omtcmp_t &extra, int direction, omtdataout_t *const value,
+           uint32_t *const idxp) const;
+
+  /**
+   * Effect: Return the size (in bytes) of the omt, as it resides in main
+   * memory.  If the data stored are pointers, don't include the size of what
+   * they all point to.
+   */
+  size_t memory_size(void);
+
+ private:
+  typedef uint32_t node_idx;
+  typedef omt_internal::subtree_templated<supports_marks> subtree;
+  typedef omt_internal::omt_node_templated<omtdata_t, supports_marks> omt_node;
+  ENSURE_POD(subtree);
+
+  struct omt_array {
+    uint32_t start_idx;
+    uint32_t num_values;
+    omtdata_t *values;
+  };
+
+  struct omt_tree {
+    subtree root;
+    uint32_t free_idx;
+    omt_node *nodes;
+  };
+
+  bool is_array;
+  uint32_t capacity;
+  union {
+    struct omt_array a;
+    struct omt_tree t;
+  } d;
+
+  __attribute__((nonnull)) void unmark(const subtree &subtree,
+                                       const uint32_t index,
+                                       GrowableArray<node_idx> *const indexes);
+
+  void create_internal_no_array(const uint32_t new_capacity);
+
+  void create_internal(const uint32_t new_capacity);
+
+  uint32_t nweight(const subtree &subtree) const;
+
+  node_idx node_malloc(void);
+
+  void node_free(const node_idx idx);
+
+  void maybe_resize_array(const uint32_t n);
+
+  __attribute__((nonnull)) void fill_array_with_subtree_values(
+      omtdata_t *const array, const subtree &subtree) const;
+
+  void convert_to_array(void);
+
+  __attribute__((nonnull)) void rebuild_from_sorted_array(
+      subtree *const subtree, const omtdata_t *const values,
+      const uint32_t numvalues);
+
+  void convert_to_tree(void);
+
+  void maybe_resize_or_convert(const uint32_t n);
+
+  bool will_need_rebalance(const subtree &subtree, const int leftmod,
+                           const int rightmod) const;
+
+  __attribute__((nonnull)) void insert_internal(
+      subtree *const subtreep, const omtdata_t &value, const uint32_t idx,
+      subtree **const rebalance_subtree);
+
+  void set_at_internal_array(const omtdata_t &value, const uint32_t idx);
+
+  void set_at_internal(const subtree &subtree, const omtdata_t &value,
+                       const uint32_t idx);
+
+  void delete_internal(subtree *const subtreep, const uint32_t idx,
+                       omt_node *const copyn,
+                       subtree **const rebalance_subtree);
+
+  template <typename iterate_extra_t,
+            int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+  int iterate_internal_array(const uint32_t left, const uint32_t right,
+                             iterate_extra_t *const iterate_extra) const;
+
+  template <typename iterate_extra_t,
+            int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)>
+  void iterate_ptr_internal(const uint32_t left, const uint32_t right,
+                            const subtree &subtree, const uint32_t idx,
+                            iterate_extra_t *const iterate_extra);
+
+  template <typename iterate_extra_t,
+            int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)>
+  void iterate_ptr_internal_array(const uint32_t left, const uint32_t right,
+                                  iterate_extra_t *const iterate_extra);
+
+  template <typename iterate_extra_t,
+            int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+  int iterate_internal(const uint32_t left, const uint32_t right,
+                       const subtree &subtree, const uint32_t idx,
+                       iterate_extra_t *const iterate_extra) const;
+
+  template <typename iterate_extra_t,
+            int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+  int iterate_and_mark_range_internal(const uint32_t left, const uint32_t right,
+                                      const subtree &subtree,
+                                      const uint32_t idx,
+                                      iterate_extra_t *const iterate_extra);
+
+  template <typename iterate_extra_t,
+            int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+  int iterate_over_marked_internal(const subtree &subtree, const uint32_t idx,
+                                   iterate_extra_t *const iterate_extra) const;
+
+  uint32_t verify_marks_consistent_internal(const subtree &subtree,
+                                            const bool allow_marks) const;
+
+  void fetch_internal_array(const uint32_t i, omtdataout_t *const value) const;
+
+  void fetch_internal(const subtree &subtree, const uint32_t i,
+                      omtdataout_t *const value) const;
+
+  __attribute__((nonnull)) void fill_array_with_subtree_idxs(
+      node_idx *const array, const subtree &subtree) const;
+
+  __attribute__((nonnull)) void rebuild_subtree_from_idxs(
+      subtree *const subtree, const node_idx *const idxs,
+      const uint32_t numvalues);
+
+  __attribute__((nonnull)) void rebalance(subtree *const subtree);
+
+  __attribute__((nonnull)) static void copyout(omtdata_t *const out,
+                                               const omt_node *const n);
+
+  __attribute__((nonnull)) static void copyout(omtdata_t **const out,
+                                               omt_node *const n);
+
+  __attribute__((nonnull)) static void copyout(
+      omtdata_t *const out, const omtdata_t *const stored_value_ptr);
+
+  __attribute__((nonnull)) static void copyout(
+      omtdata_t **const out, omtdata_t *const stored_value_ptr);
+
+  template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+  int find_internal_zero_array(const omtcmp_t &extra, omtdataout_t *const value,
+                               uint32_t *const idxp) const;
+
+  template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+  int find_internal_zero(const subtree &subtree, const omtcmp_t &extra,
+                         omtdataout_t *const value, uint32_t *const idxp) const;
+
+  template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+  int find_internal_plus_array(const omtcmp_t &extra, omtdataout_t *const value,
+                               uint32_t *const idxp) const;
+
+  template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+  int find_internal_plus(const subtree &subtree, const omtcmp_t &extra,
+                         omtdataout_t *const value, uint32_t *const idxp) const;
+
+  template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+  int find_internal_minus_array(const omtcmp_t &extra,
+                                omtdataout_t *const value,
+                                uint32_t *const idxp) const;
+
+  template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+  int find_internal_minus(const subtree &subtree, const omtcmp_t &extra,
+                          omtdataout_t *const value,
+                          uint32_t *const idxp) const;
+};
+
+}  // namespace toku
+
+// include the implementation here
+#include "omt_impl.h"
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/omt_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/omt_impl.h
new file mode 100644
index 0000000000..e779867165
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/omt_impl.h
@@ -0,0 +1,1295 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include <string.h>
+
+#include "../db.h"
+#include "../portability/memory.h"
+
+namespace toku {
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::create(void) {
+  this->create_internal(2);
+  if (supports_marks) {
+    this->convert_to_tree();
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::create_no_array(void) {
+  if (!supports_marks) {
+    this->create_internal_no_array(0);
+  } else {
+    this->is_array = false;
+    this->capacity = 0;
+    this->d.t.nodes = nullptr;
+    this->d.t.root.set_to_null();
+    this->d.t.free_idx = 0;
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::create_from_sorted_array(
+    const omtdata_t *const values, const uint32_t numvalues) {
+  this->create_internal(numvalues);
+  memcpy(this->d.a.values, values, numvalues * (sizeof values[0]));
+  this->d.a.num_values = numvalues;
+  if (supports_marks) {
+    this->convert_to_tree();
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::create_steal_sorted_array(
+    omtdata_t **const values, const uint32_t numvalues,
+    const uint32_t new_capacity) {
+  paranoid_invariant_notnull(values);
+  this->create_internal_no_array(new_capacity);
+  this->d.a.num_values = numvalues;
+  this->d.a.values = *values;
+  *values = nullptr;
+  if (supports_marks) {
+    this->convert_to_tree();
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+int omt<omtdata_t, omtdataout_t, supports_marks>::split_at(omt *const newomt,
+                                                           const uint32_t idx) {
+  barf_if_marked(*this);
+  paranoid_invariant_notnull(newomt);
+  if (idx > this->size()) {
+    return EINVAL;
+  }
+  this->convert_to_array();
+  const uint32_t newsize = this->size() - idx;
+  newomt->create_from_sorted_array(&this->d.a.values[this->d.a.start_idx + idx],
+                                   newsize);
+  this->d.a.num_values = idx;
+  this->maybe_resize_array(idx);
+  if (supports_marks) {
+    this->convert_to_tree();
+  }
+  return 0;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::merge(omt *const leftomt,
+                                                         omt *const rightomt) {
+  barf_if_marked(*this);
+  paranoid_invariant_notnull(leftomt);
+  paranoid_invariant_notnull(rightomt);
+  const uint32_t leftsize = leftomt->size();
+  const uint32_t rightsize = rightomt->size();
+  const uint32_t newsize = leftsize + rightsize;
+
+  if (leftomt->is_array) {
+    if (leftomt->capacity -
+            (leftomt->d.a.start_idx + leftomt->d.a.num_values) >=
+        rightsize) {
+      this->create_steal_sorted_array(
+          &leftomt->d.a.values, leftomt->d.a.num_values, leftomt->capacity);
+      this->d.a.start_idx = leftomt->d.a.start_idx;
+    } else {
+      this->create_internal(newsize);
+      memcpy(&this->d.a.values[0], &leftomt->d.a.values[leftomt->d.a.start_idx],
+             leftomt->d.a.num_values * (sizeof this->d.a.values[0]));
+    }
+  } else {
+    this->create_internal(newsize);
+    leftomt->fill_array_with_subtree_values(&this->d.a.values[0],
+                                            leftomt->d.t.root);
+  }
+  leftomt->destroy();
+  this->d.a.num_values = leftsize;
+
+  if (rightomt->is_array) {
+    memcpy(&this->d.a.values[this->d.a.start_idx + this->d.a.num_values],
+           &rightomt->d.a.values[rightomt->d.a.start_idx],
+           rightomt->d.a.num_values * (sizeof this->d.a.values[0]));
+  } else {
+    rightomt->fill_array_with_subtree_values(
+        &this->d.a.values[this->d.a.start_idx + this->d.a.num_values],
+        rightomt->d.t.root);
+  }
+  rightomt->destroy();
+  this->d.a.num_values += rightsize;
+  paranoid_invariant(this->size() == newsize);
+  if (supports_marks) {
+    this->convert_to_tree();
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::clone(const omt &src) {
+  barf_if_marked(*this);
+  this->create_internal(src.size());
+  if (src.is_array) {
+    memcpy(&this->d.a.values[0], &src.d.a.values[src.d.a.start_idx],
+           src.d.a.num_values * (sizeof this->d.a.values[0]));
+  } else {
+    src.fill_array_with_subtree_values(&this->d.a.values[0], src.d.t.root);
+  }
+  this->d.a.num_values = src.size();
+  if (supports_marks) {
+    this->convert_to_tree();
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::clear(void) {
+  if (this->is_array) {
+    this->d.a.start_idx = 0;
+    this->d.a.num_values = 0;
+  } else {
+    this->d.t.root.set_to_null();
+    this->d.t.free_idx = 0;
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::destroy(void) {
+  this->clear();
+  this->capacity = 0;
+  if (this->is_array) {
+    if (this->d.a.values != nullptr) {
+      toku_free(this->d.a.values);
+    }
+    this->d.a.values = nullptr;
+  } else {
+    if (this->d.t.nodes != nullptr) {
+      toku_free(this->d.t.nodes);
+    }
+    this->d.t.nodes = nullptr;
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+uint32_t omt<omtdata_t, omtdataout_t, supports_marks>::size(void) const {
+  if (this->is_array) {
+    return this->d.a.num_values;
+  } else {
+    return this->nweight(this->d.t.root);
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::insert(const omtdata_t &value,
+                                                         const omtcmp_t &v,
+                                                         uint32_t *const idx) {
+  int r;
+  uint32_t insert_idx;
+
+  r = this->find_zero<omtcmp_t, h>(v, nullptr, &insert_idx);
+  if (r == 0) {
+    if (idx) *idx = insert_idx;
+    return DB_KEYEXIST;
+  }
+  if (r != DB_NOTFOUND) return r;
+
+  if ((r = this->insert_at(value, insert_idx))) return r;
+  if (idx) *idx = insert_idx;
+
+  return 0;
+}
+
+// The following 3 functions implement a static if for us.
+template <typename omtdata_t, typename omtdataout_t>
+static void barf_if_marked(const omt<omtdata_t, omtdataout_t, false> &UU(omt)) {
+}
+
+template <typename omtdata_t, typename omtdataout_t>
+static void barf_if_marked(const omt<omtdata_t, omtdataout_t, true> &omt) {
+  invariant(!omt.has_marks());
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+bool omt<omtdata_t, omtdataout_t, supports_marks>::has_marks(void) const {
+  static_assert(supports_marks, "Does not support marks");
+  if (this->d.t.root.is_null()) {
+    return false;
+  }
+  const omt_node &node = this->d.t.nodes[this->d.t.root.get_index()];
+  return node.get_marks_below() || node.get_marked();
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+int omt<omtdata_t, omtdataout_t, supports_marks>::insert_at(
+    const omtdata_t &value, const uint32_t idx) {
+  barf_if_marked(*this);
+  if (idx > this->size()) {
+    return EINVAL;
+  }
+
+  this->maybe_resize_or_convert(this->size() + 1);
+  if (this->is_array && idx != this->d.a.num_values &&
+      (idx != 0 || this->d.a.start_idx == 0)) {
+    this->convert_to_tree();
+  }
+  if (this->is_array) {
+    if (idx == this->d.a.num_values) {
+      this->d.a.values[this->d.a.start_idx + this->d.a.num_values] = value;
+    } else {
+      this->d.a.values[--this->d.a.start_idx] = value;
+    }
+    this->d.a.num_values++;
+  } else {
+    subtree *rebalance_subtree = nullptr;
+    this->insert_internal(&this->d.t.root, value, idx, &rebalance_subtree);
+    if (rebalance_subtree != nullptr) {
+      this->rebalance(rebalance_subtree);
+    }
+  }
+  return 0;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+int omt<omtdata_t, omtdataout_t, supports_marks>::set_at(const omtdata_t &value,
+                                                         const uint32_t idx) {
+  barf_if_marked(*this);
+  if (idx >= this->size()) {
+    return EINVAL;
+  }
+
+  if (this->is_array) {
+    this->set_at_internal_array(value, idx);
+  } else {
+    this->set_at_internal(this->d.t.root, value, idx);
+  }
+  return 0;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+int omt<omtdata_t, omtdataout_t, supports_marks>::delete_at(
+    const uint32_t idx) {
+  barf_if_marked(*this);
+  if (idx >= this->size()) {
+    return EINVAL;
+  }
+
+  this->maybe_resize_or_convert(this->size() - 1);
+  if (this->is_array && idx != 0 && idx != this->d.a.num_values - 1) {
+    this->convert_to_tree();
+  }
+  if (this->is_array) {
+    // Testing for 0 does not rule out it being the last entry.
+    // Test explicitly for num_values-1
+    if (idx != this->d.a.num_values - 1) {
+      this->d.a.start_idx++;
+    }
+    this->d.a.num_values--;
+  } else {
+    subtree *rebalance_subtree = nullptr;
+    this->delete_internal(&this->d.t.root, idx, nullptr, &rebalance_subtree);
+    if (rebalance_subtree != nullptr) {
+      this->rebalance(rebalance_subtree);
+    }
+  }
+  return 0;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename iterate_extra_t,
+          int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::iterate(
+    iterate_extra_t *const iterate_extra) const {
+  return this->iterate_on_range<iterate_extra_t, f>(0, this->size(),
+                                                    iterate_extra);
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename iterate_extra_t,
+          int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_on_range(
+    const uint32_t left, const uint32_t right,
+    iterate_extra_t *const iterate_extra) const {
+  if (right > this->size()) {
+    return EINVAL;
+  }
+  if (left == right) {
+    return 0;
+  }
+  if (this->is_array) {
+    return this->iterate_internal_array<iterate_extra_t, f>(left, right,
+                                                            iterate_extra);
+  }
+  return this->iterate_internal<iterate_extra_t, f>(left, right, this->d.t.root,
+                                                    0, iterate_extra);
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename iterate_extra_t,
+          int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_and_mark_range(
+    const uint32_t left, const uint32_t right,
+    iterate_extra_t *const iterate_extra) {
+  static_assert(supports_marks, "does not support marks");
+  if (right > this->size()) {
+    return EINVAL;
+  }
+  if (left == right) {
+    return 0;
+  }
+  paranoid_invariant(!this->is_array);
+  return this->iterate_and_mark_range_internal<iterate_extra_t, f>(
+      left, right, this->d.t.root, 0, iterate_extra);
+}
+
+// TODO: We can optimize this if we steal 3 bits.  1 bit: this node is
+// marked.  1 bit: left subtree has marks. 1 bit: right subtree has marks.
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename iterate_extra_t,
+          int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_over_marked(
+    iterate_extra_t *const iterate_extra) const {
+  static_assert(supports_marks, "does not support marks");
+  paranoid_invariant(!this->is_array);
+  return this->iterate_over_marked_internal<iterate_extra_t, f>(
+      this->d.t.root, 0, iterate_extra);
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::unmark(
+    const subtree &st, const uint32_t index,
+    GrowableArray<node_idx> *const indexes) {
+  if (st.is_null()) {
+    return;
+  }
+  omt_node &n = this->d.t.nodes[st.get_index()];
+  const uint32_t index_root = index + this->nweight(n.left);
+
+  const bool below = n.get_marks_below();
+  if (below) {
+    this->unmark(n.left, index, indexes);
+  }
+  if (n.get_marked()) {
+    indexes->push(index_root);
+  }
+  n.clear_stolen_bits();
+  if (below) {
+    this->unmark(n.right, index_root + 1, indexes);
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::delete_all_marked(void) {
+  static_assert(supports_marks, "does not support marks");
+  if (!this->has_marks()) {
+    return;
+  }
+  paranoid_invariant(!this->is_array);
+  GrowableArray<node_idx> marked_indexes;
+  marked_indexes.init();
+
+  // Remove all marks.
+  // We need to delete all the stolen bits before calling delete_at to
+  // prevent barfing.
+  this->unmark(this->d.t.root, 0, &marked_indexes);
+
+  for (uint32_t i = 0; i < marked_indexes.get_size(); i++) {
+    // Delete from left to right, shift by number already deleted.
+    // Alternative is delete from right to left.
+    int r = this->delete_at(marked_indexes.fetch_unchecked(i) - i);
+    lazy_assert_zero(r);
+  }
+  marked_indexes.deinit();
+  barf_if_marked(*this);
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+uint32_t
+omt<omtdata_t, omtdataout_t, supports_marks>::verify_marks_consistent_internal(
+    const subtree &st, const bool UU(allow_marks)) const {
+  if (st.is_null()) {
+    return 0;
+  }
+  const omt_node &node = this->d.t.nodes[st.get_index()];
+  uint32_t num_marks =
+      verify_marks_consistent_internal(node.left, node.get_marks_below());
+  num_marks +=
+      verify_marks_consistent_internal(node.right, node.get_marks_below());
+  if (node.get_marks_below()) {
+    paranoid_invariant(allow_marks);
+    paranoid_invariant(num_marks > 0);
+  } else {
+    // redundant with invariant below, but nice to have explicitly
+    paranoid_invariant(num_marks == 0);
+  }
+  if (node.get_marked()) {
+    paranoid_invariant(allow_marks);
+    ++num_marks;
+  }
+  return num_marks;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::verify_marks_consistent(
+    void) const {
+  static_assert(supports_marks, "does not support marks");
+  paranoid_invariant(!this->is_array);
+  this->verify_marks_consistent_internal(this->d.t.root, true);
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename iterate_extra_t,
+          int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)>
+void omt<omtdata_t, omtdataout_t, supports_marks>::iterate_ptr(
+    iterate_extra_t *const iterate_extra) {
+  if (this->is_array) {
+    this->iterate_ptr_internal_array<iterate_extra_t, f>(0, this->size(),
+                                                         iterate_extra);
+  } else {
+    this->iterate_ptr_internal<iterate_extra_t, f>(
+        0, this->size(), this->d.t.root, 0, iterate_extra);
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+int omt<omtdata_t, omtdataout_t, supports_marks>::fetch(
+    const uint32_t idx, omtdataout_t *const value) const {
+  if (idx >= this->size()) {
+    return EINVAL;
+  }
+  if (this->is_array) {
+    this->fetch_internal_array(idx, value);
+  } else {
+    this->fetch_internal(this->d.t.root, idx, value);
+  }
+  return 0;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::find_zero(
+    const omtcmp_t &extra, omtdataout_t *const value,
+    uint32_t *const idxp) const {
+  uint32_t tmp_index;
+  uint32_t *const child_idxp = (idxp != nullptr) ? idxp : &tmp_index;
+  int r;
+  if (this->is_array) {
+    r = this->find_internal_zero_array<omtcmp_t, h>(extra, value, child_idxp);
+  } else {
+    r = this->find_internal_zero<omtcmp_t, h>(this->d.t.root, extra, value,
+                                              child_idxp);
+  }
+  return r;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::find(
+    const omtcmp_t &extra, int direction, omtdataout_t *const value,
+    uint32_t *const idxp) const {
+  uint32_t tmp_index;
+  uint32_t *const child_idxp = (idxp != nullptr) ? idxp : &tmp_index;
+  paranoid_invariant(direction != 0);
+  if (direction < 0) {
+    if (this->is_array) {
+      return this->find_internal_minus_array<omtcmp_t, h>(extra, value,
+                                                          child_idxp);
+    } else {
+      return this->find_internal_minus<omtcmp_t, h>(this->d.t.root, extra,
+                                                    value, child_idxp);
+    }
+  } else {
+    if (this->is_array) {
+      return this->find_internal_plus_array<omtcmp_t, h>(extra, value,
+                                                         child_idxp);
+    } else {
+      return this->find_internal_plus<omtcmp_t, h>(this->d.t.root, extra, value,
+                                                   child_idxp);
+    }
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+size_t omt<omtdata_t, omtdataout_t, supports_marks>::memory_size(void) {
+  if (this->is_array) {
+    return (sizeof *this) + this->capacity * (sizeof this->d.a.values[0]);
+  }
+  return (sizeof *this) + this->capacity * (sizeof this->d.t.nodes[0]);
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::create_internal_no_array(
+    const uint32_t new_capacity) {
+  this->is_array = true;
+  this->d.a.start_idx = 0;
+  this->d.a.num_values = 0;
+  this->d.a.values = nullptr;
+  this->capacity = new_capacity;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::create_internal(
+    const uint32_t new_capacity) {
+  this->create_internal_no_array(new_capacity);
+  XMALLOC_N(this->capacity, this->d.a.values);
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+uint32_t omt<omtdata_t, omtdataout_t, supports_marks>::nweight(
+    const subtree &st) const {
+  if (st.is_null()) {
+    return 0;
+  } else {
+    return this->d.t.nodes[st.get_index()].weight;
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+typename omt<omtdata_t, omtdataout_t, supports_marks>::node_idx
+omt<omtdata_t, omtdataout_t, supports_marks>::node_malloc(void) {
+  paranoid_invariant(this->d.t.free_idx < this->capacity);
+  omt_node &n = this->d.t.nodes[this->d.t.free_idx];
+  n.clear_stolen_bits();
+  return this->d.t.free_idx++;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::node_free(
+    const node_idx UU(idx)) {
+  paranoid_invariant(idx < this->capacity);
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::maybe_resize_array(
+    const uint32_t n) {
+  const uint32_t new_size = n <= 2 ? 4 : 2 * n;
+  const uint32_t room = this->capacity - this->d.a.start_idx;
+
+  if (room < n || this->capacity / 2 >= new_size) {
+    omtdata_t *XMALLOC_N(new_size, tmp_values);
+    if (this->d.a.num_values) {
+      memcpy(tmp_values, &this->d.a.values[this->d.a.start_idx],
+             this->d.a.num_values * (sizeof tmp_values[0]));
+    }
+    this->d.a.start_idx = 0;
+    this->capacity = new_size;
+    toku_free(this->d.a.values);
+    this->d.a.values = tmp_values;
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t,
+         supports_marks>::fill_array_with_subtree_values(omtdata_t *const array,
+                                                         const subtree &st)
+    const {
+  if (st.is_null()) return;
+  const omt_node &tree = this->d.t.nodes[st.get_index()];
+  this->fill_array_with_subtree_values(&array[0], tree.left);
+  array[this->nweight(tree.left)] = tree.value;
+  this->fill_array_with_subtree_values(&array[this->nweight(tree.left) + 1],
+                                       tree.right);
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::convert_to_array(void) {
+  if (!this->is_array) {
+    const uint32_t num_values = this->size();
+    uint32_t new_size = 2 * num_values;
+    new_size = new_size < 4 ? 4 : new_size;
+
+    omtdata_t *XMALLOC_N(new_size, tmp_values);
+    this->fill_array_with_subtree_values(tmp_values, this->d.t.root);
+    toku_free(this->d.t.nodes);
+    this->is_array = true;
+    this->capacity = new_size;
+    this->d.a.num_values = num_values;
+    this->d.a.values = tmp_values;
+    this->d.a.start_idx = 0;
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::rebuild_from_sorted_array(
+    subtree *const st, const omtdata_t *const values,
+    const uint32_t numvalues) {
+  if (numvalues == 0) {
+    st->set_to_null();
+  } else {
+    const uint32_t halfway = numvalues / 2;
+    const node_idx newidx = this->node_malloc();
+    omt_node *const newnode = &this->d.t.nodes[newidx];
+    newnode->weight = numvalues;
+    newnode->value = values[halfway];
+    st->set_index(newidx);
+    // update everything before the recursive calls so the second call
+    // can be a tail call.
+    this->rebuild_from_sorted_array(&newnode->left, &values[0], halfway);
+    this->rebuild_from_sorted_array(&newnode->right, &values[halfway + 1],
+                                    numvalues - (halfway + 1));
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::convert_to_tree(void) {
+  if (this->is_array) {
+    const uint32_t num_nodes = this->size();
+    uint32_t new_size = num_nodes * 2;
+    new_size = new_size < 4 ? 4 : new_size;
+
+    omt_node *XMALLOC_N(new_size, new_nodes);
+    omtdata_t *const values = this->d.a.values;
+    omtdata_t *const tmp_values = &values[this->d.a.start_idx];
+    this->is_array = false;
+    this->d.t.nodes = new_nodes;
+    this->capacity = new_size;
+    this->d.t.free_idx = 0;
+    this->d.t.root.set_to_null();
+    this->rebuild_from_sorted_array(&this->d.t.root, tmp_values, num_nodes);
+    toku_free(values);
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::maybe_resize_or_convert(
+    const uint32_t n) {
+  if (this->is_array) {
+    this->maybe_resize_array(n);
+  } else {
+    const uint32_t new_size = n <= 2 ? 4 : 2 * n;
+    const uint32_t num_nodes = this->nweight(this->d.t.root);
+    if ((this->capacity / 2 >= new_size) ||
+        (this->d.t.free_idx >= this->capacity && num_nodes < n) ||
+        (this->capacity < n)) {
+      this->convert_to_array();
+      // if we had a free list, the "supports_marks" version could
+      // just resize, as it is now, we have to convert to and back
+      // from an array.
+      if (supports_marks) {
+        this->convert_to_tree();
+      }
+    }
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+bool omt<omtdata_t, omtdataout_t, supports_marks>::will_need_rebalance(
+    const subtree &st, const int leftmod, const int rightmod) const {
+  if (st.is_null()) {
+    return false;
+  }
+  const omt_node &n = this->d.t.nodes[st.get_index()];
+  // one of the 1's is for the root.
+  // the other is to take ceil(n/2)
+  const uint32_t weight_left = this->nweight(n.left) + leftmod;
+  const uint32_t weight_right = this->nweight(n.right) + rightmod;
+  return ((1 + weight_left < (1 + 1 + weight_right) / 2) ||
+          (1 + weight_right < (1 + 1 + weight_left) / 2));
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::insert_internal(
+    subtree *const subtreep, const omtdata_t &value, const uint32_t idx,
+    subtree **const rebalance_subtree) {
+  if (subtreep->is_null()) {
+    paranoid_invariant_zero(idx);
+    const node_idx newidx = this->node_malloc();
+    omt_node *const newnode = &this->d.t.nodes[newidx];
+    newnode->weight = 1;
+    newnode->left.set_to_null();
+    newnode->right.set_to_null();
+    newnode->value = value;
+    subtreep->set_index(newidx);
+  } else {
+    omt_node &n = this->d.t.nodes[subtreep->get_index()];
+    n.weight++;
+    if (idx <= this->nweight(n.left)) {
+      if (*rebalance_subtree == nullptr &&
+          this->will_need_rebalance(*subtreep, 1, 0)) {
+        *rebalance_subtree = subtreep;
+      }
+      this->insert_internal(&n.left, value, idx, rebalance_subtree);
+    } else {
+      if (*rebalance_subtree == nullptr &&
+          this->will_need_rebalance(*subtreep, 0, 1)) {
+        *rebalance_subtree = subtreep;
+      }
+      const uint32_t sub_index = idx - this->nweight(n.left) - 1;
+      this->insert_internal(&n.right, value, sub_index, rebalance_subtree);
+    }
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::set_at_internal_array(
+    const omtdata_t &value, const uint32_t idx) {
+  this->d.a.values[this->d.a.start_idx + idx] = value;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::set_at_internal(
+    const subtree &st, const omtdata_t &value, const uint32_t idx) {
+  paranoid_invariant(!st.is_null());
+  omt_node &n = this->d.t.nodes[st.get_index()];
+  const uint32_t leftweight = this->nweight(n.left);
+  if (idx < leftweight) {
+    this->set_at_internal(n.left, value, idx);
+  } else if (idx == leftweight) {
+    n.value = value;
+  } else {
+    this->set_at_internal(n.right, value, idx - leftweight - 1);
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::delete_internal(
+    subtree *const subtreep, const uint32_t idx, omt_node *const copyn,
+    subtree **const rebalance_subtree) {
+  paranoid_invariant_notnull(subtreep);
+  paranoid_invariant_notnull(rebalance_subtree);
+  paranoid_invariant(!subtreep->is_null());
+  omt_node &n = this->d.t.nodes[subtreep->get_index()];
+  const uint32_t leftweight = this->nweight(n.left);
+  if (idx < leftweight) {
+    n.weight--;
+    if (*rebalance_subtree == nullptr &&
+        this->will_need_rebalance(*subtreep, -1, 0)) {
+      *rebalance_subtree = subtreep;
+    }
+    this->delete_internal(&n.left, idx, copyn, rebalance_subtree);
+  } else if (idx == leftweight) {
+    if (n.left.is_null()) {
+      const uint32_t oldidx = subtreep->get_index();
+      *subtreep = n.right;
+      if (copyn != nullptr) {
+        copyn->value = n.value;
+      }
+      this->node_free(oldidx);
+    } else if (n.right.is_null()) {
+      const uint32_t oldidx = subtreep->get_index();
+      *subtreep = n.left;
+      if (copyn != nullptr) {
+        copyn->value = n.value;
+      }
+      this->node_free(oldidx);
+    } else {
+      if (*rebalance_subtree == nullptr &&
+          this->will_need_rebalance(*subtreep, 0, -1)) {
+        *rebalance_subtree = subtreep;
+      }
+      // don't need to copy up value, it's only used by this
+      // next call, and when that gets to the bottom there
+      // won't be any more recursion
+      n.weight--;
+      this->delete_internal(&n.right, 0, &n, rebalance_subtree);
+    }
+  } else {
+    n.weight--;
+    if (*rebalance_subtree == nullptr &&
+        this->will_need_rebalance(*subtreep, 0, -1)) {
+      *rebalance_subtree = subtreep;
+    }
+    this->delete_internal(&n.right, idx - leftweight - 1, copyn,
+                          rebalance_subtree);
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename iterate_extra_t,
+          int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_internal_array(
+    const uint32_t left, const uint32_t right,
+    iterate_extra_t *const iterate_extra) const {
+  int r;
+  for (uint32_t i = left; i < right; ++i) {
+    r = f(this->d.a.values[this->d.a.start_idx + i], i, iterate_extra);
+    if (r != 0) {
+      return r;
+    }
+  }
+  return 0;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename iterate_extra_t,
+          int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)>
+void omt<omtdata_t, omtdataout_t, supports_marks>::iterate_ptr_internal(
+    const uint32_t left, const uint32_t right, const subtree &st,
+    const uint32_t idx, iterate_extra_t *const iterate_extra) {
+  if (!st.is_null()) {
+    omt_node &n = this->d.t.nodes[st.get_index()];
+    const uint32_t idx_root = idx + this->nweight(n.left);
+    if (left < idx_root) {
+      this->iterate_ptr_internal<iterate_extra_t, f>(left, right, n.left, idx,
+                                                     iterate_extra);
+    }
+    if (left <= idx_root && idx_root < right) {
+      int r = f(&n.value, idx_root, iterate_extra);
+      lazy_assert_zero(r);
+    }
+    if (idx_root + 1 < right) {
+      this->iterate_ptr_internal<iterate_extra_t, f>(
+          left, right, n.right, idx_root + 1, iterate_extra);
+    }
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename iterate_extra_t,
+          int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)>
+void omt<omtdata_t, omtdataout_t, supports_marks>::iterate_ptr_internal_array(
+    const uint32_t left, const uint32_t right,
+    iterate_extra_t *const iterate_extra) {
+  for (uint32_t i = left; i < right; ++i) {
+    int r = f(&this->d.a.values[this->d.a.start_idx + i], i, iterate_extra);
+    lazy_assert_zero(r);
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename iterate_extra_t,
+          int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_internal(
+    const uint32_t left, const uint32_t right, const subtree &st,
+    const uint32_t idx, iterate_extra_t *const iterate_extra) const {
+  if (st.is_null()) {
+    return 0;
+  }
+  int r;
+  const omt_node &n = this->d.t.nodes[st.get_index()];
+  const uint32_t idx_root = idx + this->nweight(n.left);
+  if (left < idx_root) {
+    r = this->iterate_internal<iterate_extra_t, f>(left, right, n.left, idx,
+                                                   iterate_extra);
+    if (r != 0) {
+      return r;
+    }
+  }
+  if (left <= idx_root && idx_root < right) {
+    r = f(n.value, idx_root, iterate_extra);
+    if (r != 0) {
+      return r;
+    }
+  }
+  if (idx_root + 1 < right) {
+    return this->iterate_internal<iterate_extra_t, f>(
+        left, right, n.right, idx_root + 1, iterate_extra);
+  }
+  return 0;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename iterate_extra_t,
+          int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::
+    iterate_and_mark_range_internal(const uint32_t left, const uint32_t right,
+                                    const subtree &st, const uint32_t idx,
+                                    iterate_extra_t *const iterate_extra) {
+  paranoid_invariant(!st.is_null());
+  int r;
+  omt_node &n = this->d.t.nodes[st.get_index()];
+  const uint32_t idx_root = idx + this->nweight(n.left);
+  if (left < idx_root && !n.left.is_null()) {
+    n.set_marks_below_bit();
+    r = this->iterate_and_mark_range_internal<iterate_extra_t, f>(
+        left, right, n.left, idx, iterate_extra);
+    if (r != 0) {
+      return r;
+    }
+  }
+  if (left <= idx_root && idx_root < right) {
+    n.set_marked_bit();
+    r = f(n.value, idx_root, iterate_extra);
+    if (r != 0) {
+      return r;
+    }
+  }
+  if (idx_root + 1 < right && !n.right.is_null()) {
+    n.set_marks_below_bit();
+    return this->iterate_and_mark_range_internal<iterate_extra_t, f>(
+        left, right, n.right, idx_root + 1, iterate_extra);
+  }
+  return 0;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename iterate_extra_t,
+          int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_over_marked_internal(
+    const subtree &st, const uint32_t idx,
+    iterate_extra_t *const iterate_extra) const {
+  if (st.is_null()) {
+    return 0;
+  }
+  int r;
+  const omt_node &n = this->d.t.nodes[st.get_index()];
+  const uint32_t idx_root = idx + this->nweight(n.left);
+  if (n.get_marks_below()) {
+    r = this->iterate_over_marked_internal<iterate_extra_t, f>(n.left, idx,
+                                                               iterate_extra);
+    if (r != 0) {
+      return r;
+    }
+  }
+  if (n.get_marked()) {
+    r = f(n.value, idx_root, iterate_extra);
+    if (r != 0) {
+      return r;
+    }
+  }
+  if (n.get_marks_below()) {
+    return this->iterate_over_marked_internal<iterate_extra_t, f>(
+        n.right, idx_root + 1, iterate_extra);
+  }
+  return 0;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::fetch_internal_array(
+    const uint32_t i, omtdataout_t *const value) const {
+  if (value != nullptr) {
+    copyout(value, &this->d.a.values[this->d.a.start_idx + i]);
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::fetch_internal(
+    const subtree &st, const uint32_t i, omtdataout_t *const value) const {
+  omt_node &n = this->d.t.nodes[st.get_index()];
+  const uint32_t leftweight = this->nweight(n.left);
+  if (i < leftweight) {
+    this->fetch_internal(n.left, i, value);
+  } else if (i == leftweight) {
+    if (value != nullptr) {
+      copyout(value, &n);
+    }
+  } else {
+    this->fetch_internal(n.right, i - leftweight - 1, value);
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::fill_array_with_subtree_idxs(
+    node_idx *const array, const subtree &st) const {
+  if (!st.is_null()) {
+    const omt_node &tree = this->d.t.nodes[st.get_index()];
+    this->fill_array_with_subtree_idxs(&array[0], tree.left);
+    array[this->nweight(tree.left)] = st.get_index();
+    this->fill_array_with_subtree_idxs(&array[this->nweight(tree.left) + 1],
+                                       tree.right);
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::rebuild_subtree_from_idxs(
+    subtree *const st, const node_idx *const idxs, const uint32_t numvalues) {
+  if (numvalues == 0) {
+    st->set_to_null();
+  } else {
+    uint32_t halfway = numvalues / 2;
+    st->set_index(idxs[halfway]);
+    // node_idx newidx = idxs[halfway];
+    omt_node &newnode = this->d.t.nodes[st->get_index()];
+    newnode.weight = numvalues;
+    // value is already in there.
+    this->rebuild_subtree_from_idxs(&newnode.left, &idxs[0], halfway);
+    this->rebuild_subtree_from_idxs(&newnode.right, &idxs[halfway + 1],
+                                    numvalues - (halfway + 1));
+    // n_idx = newidx;
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::rebalance(
+    subtree *const st) {
+  node_idx idx = st->get_index();
+  if (idx == this->d.t.root.get_index()) {
+    // Try to convert to an array.
+    // If this fails, (malloc) nothing will have changed.
+    // In the failure case we continue on to the standard rebalance
+    // algorithm.
+    this->convert_to_array();
+    if (supports_marks) {
+      this->convert_to_tree();
+    }
+  } else {
+    const omt_node &n = this->d.t.nodes[idx];
+    node_idx *tmp_array;
+    size_t mem_needed = n.weight * (sizeof tmp_array[0]);
+    size_t mem_free =
+        (this->capacity - this->d.t.free_idx) * (sizeof this->d.t.nodes[0]);
+    bool malloced;
+    if (mem_needed <= mem_free) {
+      // There is sufficient free space at the end of the nodes array
+      // to hold enough node indexes to rebalance.
+      malloced = false;
+      tmp_array =
+          reinterpret_cast<node_idx *>(&this->d.t.nodes[this->d.t.free_idx]);
+    } else {
+      malloced = true;
+      XMALLOC_N(n.weight, tmp_array);
+    }
+    this->fill_array_with_subtree_idxs(tmp_array, *st);
+    this->rebuild_subtree_from_idxs(st, tmp_array, n.weight);
+    if (malloced) toku_free(tmp_array);
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::copyout(
+    omtdata_t *const out, const omt_node *const n) {
+  *out = n->value;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::copyout(
+    omtdata_t **const out, omt_node *const n) {
+  *out = &n->value;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::copyout(
+    omtdata_t *const out, const omtdata_t *const stored_value_ptr) {
+  *out = *stored_value_ptr;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+void omt<omtdata_t, omtdataout_t, supports_marks>::copyout(
+    omtdata_t **const out, omtdata_t *const stored_value_ptr) {
+  *out = stored_value_ptr;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_zero_array(
+    const omtcmp_t &extra, omtdataout_t *const value,
+    uint32_t *const idxp) const {
+  paranoid_invariant_notnull(idxp);
+  uint32_t min = this->d.a.start_idx;
+  uint32_t limit = this->d.a.start_idx + this->d.a.num_values;
+  uint32_t best_pos = subtree::NODE_NULL;
+  uint32_t best_zero = subtree::NODE_NULL;
+
+  while (min != limit) {
+    uint32_t mid = (min + limit) / 2;
+    int hv = h(this->d.a.values[mid], extra);
+    if (hv < 0) {
+      min = mid + 1;
+    } else if (hv > 0) {
+      best_pos = mid;
+      limit = mid;
+    } else {
+      best_zero = mid;
+      limit = mid;
+    }
+  }
+  if (best_zero != subtree::NODE_NULL) {
+    // Found a zero
+    if (value != nullptr) {
+      copyout(value, &this->d.a.values[best_zero]);
+    }
+    *idxp = best_zero - this->d.a.start_idx;
+    return 0;
+  }
+  if (best_pos != subtree::NODE_NULL)
+    *idxp = best_pos - this->d.a.start_idx;
+  else
+    *idxp = this->d.a.num_values;
+  return DB_NOTFOUND;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_zero(
+    const subtree &st, const omtcmp_t &extra, omtdataout_t *const value,
+    uint32_t *const idxp) const {
+  paranoid_invariant_notnull(idxp);
+  if (st.is_null()) {
+    *idxp = 0;
+    return DB_NOTFOUND;
+  }
+  omt_node &n = this->d.t.nodes[st.get_index()];
+  int hv = h(n.value, extra);
+  if (hv < 0) {
+    int r = this->find_internal_zero<omtcmp_t, h>(n.right, extra, value, idxp);
+    *idxp += this->nweight(n.left) + 1;
+    return r;
+  } else if (hv > 0) {
+    return this->find_internal_zero<omtcmp_t, h>(n.left, extra, value, idxp);
+  } else {
+    int r = this->find_internal_zero<omtcmp_t, h>(n.left, extra, value, idxp);
+    if (r == DB_NOTFOUND) {
+      *idxp = this->nweight(n.left);
+      if (value != nullptr) {
+        copyout(value, &n);
+      }
+      r = 0;
+    }
+    return r;
+  }
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_plus_array(
+    const omtcmp_t &extra, omtdataout_t *const value,
+    uint32_t *const idxp) const {
+  paranoid_invariant_notnull(idxp);
+  uint32_t min = this->d.a.start_idx;
+  uint32_t limit = this->d.a.start_idx + this->d.a.num_values;
+  uint32_t best = subtree::NODE_NULL;
+
+  while (min != limit) {
+    const uint32_t mid = (min + limit) / 2;
+    const int hv = h(this->d.a.values[mid], extra);
+    if (hv > 0) {
+      best = mid;
+      limit = mid;
+    } else {
+      min = mid + 1;
+    }
+  }
+  if (best == subtree::NODE_NULL) {
+    return DB_NOTFOUND;
+  }
+  if (value != nullptr) {
+    copyout(value, &this->d.a.values[best]);
+  }
+  *idxp = best - this->d.a.start_idx;
+  return 0;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_plus(
+    const subtree &st, const omtcmp_t &extra, omtdataout_t *const value,
+    uint32_t *const idxp) const {
+  paranoid_invariant_notnull(idxp);
+  if (st.is_null()) {
+    return DB_NOTFOUND;
+  }
+  omt_node *const n = &this->d.t.nodes[st.get_index()];
+  int hv = h(n->value, extra);
+  int r;
+  if (hv > 0) {
+    r = this->find_internal_plus<omtcmp_t, h>(n->left, extra, value, idxp);
+    if (r == DB_NOTFOUND) {
+      *idxp = this->nweight(n->left);
+      if (value != nullptr) {
+        copyout(value, n);
+      }
+      r = 0;
+    }
+  } else {
+    r = this->find_internal_plus<omtcmp_t, h>(n->right, extra, value, idxp);
+    if (r == 0) {
+      *idxp += this->nweight(n->left) + 1;
+    }
+  }
+  return r;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_minus_array(
+    const omtcmp_t &extra, omtdataout_t *const value,
+    uint32_t *const idxp) const {
+  paranoid_invariant_notnull(idxp);
+  uint32_t min = this->d.a.start_idx;
+  uint32_t limit = this->d.a.start_idx + this->d.a.num_values;
+  uint32_t best = subtree::NODE_NULL;
+
+  while (min != limit) {
+    const uint32_t mid = (min + limit) / 2;
+    const int hv = h(this->d.a.values[mid], extra);
+    if (hv < 0) {
+      best = mid;
+      min = mid + 1;
+    } else {
+      limit = mid;
+    }
+  }
+  if (best == subtree::NODE_NULL) {
+    return DB_NOTFOUND;
+  }
+  if (value != nullptr) {
+    copyout(value, &this->d.a.values[best]);
+  }
+  *idxp = best - this->d.a.start_idx;
+  return 0;
+}
+
+template <typename omtdata_t, typename omtdataout_t, bool supports_marks>
+template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
+int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_minus(
+    const subtree &st, const omtcmp_t &extra, omtdataout_t *const value,
+    uint32_t *const idxp) const {
+  paranoid_invariant_notnull(idxp);
+  if (st.is_null()) {
+    return DB_NOTFOUND;
+  }
+  omt_node *const n = &this->d.t.nodes[st.get_index()];
+  int hv = h(n->value, extra);
+  if (hv < 0) {
+    int r =
+        this->find_internal_minus<omtcmp_t, h>(n->right, extra, value, idxp);
+    if (r == 0) {
+      *idxp += this->nweight(n->left) + 1;
+    } else if (r == DB_NOTFOUND) {
+      *idxp = this->nweight(n->left);
+      if (value != nullptr) {
+        copyout(value, n);
+      }
+      r = 0;
+    }
+    return r;
+  } else {
+    return this->find_internal_minus<omtcmp_t, h>(n->left, extra, value, idxp);
+  }
+}
+}  // namespace toku
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h
new file mode 100644
index 0000000000..f20eeedf2f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h
@@ -0,0 +1,165 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+// Overview: A partitioned_counter provides a counter that can be incremented
+// and the running sum can be read at any time.
+//  We assume that increments are frequent, whereas reading is infrequent.
+// Implementation hint: Use thread-local storage so each thread increments its
+// own data.  The increment does not require a lock or atomic operation.
+//  Reading the data can be performed by iterating over the thread-local
+//  versions, summing them up. The data structure also includes a sum for all
+//  the threads that have died. Use a pthread_key to create the thread-local
+//  versions.  When a thread finishes, the system calls pthread_key destructor
+//  which can add that thread's copy into the sum_of_dead counter.
+// Rationale: For statistics such as are found in engine status, we need a
+// counter that requires no cache misses to increment.  We've seen significant
+//  performance speedups by removing certain counters.  Rather than removing
+//  those statistics, we would like to just make the counter fast. We generally
+//  increment the counters frequently, and want to fetch the values
+//  infrequently. The counters are monotonic. The counters can be split into
+//  many counters, which can be summed up at the end. We don't care if we get
+//  slightly out-of-date counter sums when we read the counter.  We don't care
+//  if there is a race on reading the a counter
+//   variable and incrementing.
+//  See tests/test_partitioned_counter.c for some performance measurements.
+// Operations:
+//   create_partitioned_counter    Create a counter initialized to zero.
+//   destroy_partitioned_counter   Destroy it.
+//   increment_partitioned_counter Increment it.  This is the frequent
+//   operation. read_partitioned_counter      Get the current value.  This is
+//   infrequent.
+// See partitioned_counter.cc for the abstraction function and representation
+// invariant.
+//
+// The google style guide says to avoid using constructors, and it appears that
+// constructors may have broken all the tests, because they called
+// pthread_key_create before the key was actually created.  So the google style
+// guide may have some wisdom there...
+//
+// This version does not use constructors, essentially reverrting to the google
+// C++ style guide.
+//
+
+// The old C interface.  This required a bunch of explicit
+// ___attribute__((__destructor__)) functions to remember to destroy counters at
+// the end.
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+typedef struct partitioned_counter *PARTITIONED_COUNTER;
+PARTITIONED_COUNTER create_partitioned_counter(void);
+// Effect: Create a counter, initialized to zero.
+
+void destroy_partitioned_counter(PARTITIONED_COUNTER);
+// Effect: Destroy the counter.  No operations on that counter are permitted
+// after this.
+
+void increment_partitioned_counter(PARTITIONED_COUNTER, uint64_t amount);
+// Effect: Increment the counter by amount.
+// Requires: No overflows.  This is a 64-bit unsigned counter.
+
+uint64_t read_partitioned_counter(PARTITIONED_COUNTER)
+    __attribute__((__visibility__("default")));
+// Effect: Return the current value of the counter.
+
+void partitioned_counters_init(void);
+// Effect: Initialize any partitioned counters data structures that must be set
+// up before any partitioned counters run.
+
+void partitioned_counters_destroy(void);
+// Effect: Destroy any partitioned counters data structures.
+
+#if defined(__cplusplus)
+};
+#endif
+
+#if 0
+#include <pthread.h>
+
+#include "fttypes.h"
+
+// Used inside the PARTITIONED_COUNTER.
+struct linked_list_head {
+    struct linked_list_element *first;
+};
+
+
+class PARTITIONED_COUNTER {
+public:
+    PARTITIONED_COUNTER(void);
+    // Effect: Construct a counter, initialized to zero.
+
+    ~PARTITIONED_COUNTER(void);
+    // Effect: Destruct the counter.
+
+    void increment(uint64_t amount);
+    // Effect: Increment the counter by amount.  This is a 64-bit unsigned counter, and if you overflow it, you will get overflowed results (that is mod 2^64).
+    // Requires: Don't use this from a static constructor or destructor.
+
+    uint64_t read(void);
+    // Effect: Read the sum.
+    // Requires: Don't use this from a static constructor or destructor.
+
+private:
+    uint64_t       _sum_of_dead;             // The sum of all thread-local counts from threads that have terminated.
+    pthread_key_t   _key;                     // The pthread_key which gives us the hook to construct and destruct thread-local storage.
+    struct linked_list_head _ll_counter_head; // A linked list of all the thread-local information for this counter.
+    
+    // This function is used to destroy the thread-local part of the state when a thread terminates.
+    // But it's not the destructor for the local part of the counter, it's a destructor on a "dummy" key just so that we get a notification when a thread ends.
+    friend void destroy_thread_local_part_of_partitioned_counters (void *);
+};
+#endif
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/status.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/status.h
new file mode 100644
index 0000000000..3fd0095d0f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/lib/util/status.h
@@ -0,0 +1,76 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+======= */
+
+#ident \
+    "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include "partitioned_counter.h"
+// PORT2: #include <util/constexpr.h>
+
+#define TOKUFT_STATUS_INIT(array, k, c, t, l, inc)                    \
+  do {                                                                \
+    array.status[k].keyname = #k;                                     \
+    array.status[k].columnname = #c;                                  \
+    array.status[k].type = t;                                         \
+    array.status[k].legend = l;                                       \
+    constexpr_static_assert(                                          \
+        strcmp(#c, "NULL") && strcmp(#c, "0"),                        \
+        "Use nullptr for no column name instead of NULL, 0, etc..."); \
+    constexpr_static_assert(                                          \
+        (inc) == TOKU_ENGINE_STATUS || strcmp(#c, "nullptr"),         \
+        "Missing column name.");                                      \
+    array.status[k].include =                                         \
+        static_cast<toku_engine_status_include_type>(inc);            \
+    if (t == STATUS_PARCOUNT) {                                       \
+      array.status[k].value.parcount = create_partitioned_counter();  \
+    }                                                                 \
+  } while (0)
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h
new file mode 100644
index 0000000000..2652add159
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h
@@ -0,0 +1,135 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#ifndef OS_WIN
+
+// For DeadlockInfoBuffer:
+#include "util/thread_local.h"
+#include "utilities/transactions/lock/point/point_lock_manager.h"
+#include "utilities/transactions/lock/range/range_lock_manager.h"
+
+// Lock Tree library:
+#include "utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h"
+#include "utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h"
+#include "utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+typedef DeadlockInfoBufferTempl<RangeDeadlockPath> RangeDeadlockInfoBuffer;
+
+// A Range Lock Manager that uses PerconaFT's locktree library
+class RangeTreeLockManager : public RangeLockManagerBase,
+                             public RangeLockManagerHandle {
+ public:
+  LockManager* getLockManager() override { return this; }
+
+  void AddColumnFamily(const ColumnFamilyHandle* cfh) override;
+  void RemoveColumnFamily(const ColumnFamilyHandle* cfh) override;
+
+  void Resize(uint32_t) override;
+  std::vector<DeadlockPath> GetDeadlockInfoBuffer() override;
+
+  std::vector<RangeDeadlockPath> GetRangeDeadlockInfoBuffer() override;
+  void SetRangeDeadlockInfoBufferSize(uint32_t target_size) override;
+
+  // Get a lock on a range
+  //  @note only exclusive locks are currently supported (requesting a
+  //  non-exclusive lock will get an exclusive one)
+  using LockManager::TryLock;
+  Status TryLock(PessimisticTransaction* txn, ColumnFamilyId column_family_id,
+                 const Endpoint& start_endp, const Endpoint& end_endp, Env* env,
+                 bool exclusive) override;
+
+  void UnLock(PessimisticTransaction* txn, const LockTracker& tracker,
+              Env* env) override;
+  void UnLock(PessimisticTransaction* txn, ColumnFamilyId column_family_id,
+              const std::string& key, Env* env) override;
+  void UnLock(PessimisticTransaction*, ColumnFamilyId, const Endpoint&,
+              const Endpoint&, Env*) override {
+    // TODO: range unlock does nothing...
+  }
+
+  explicit RangeTreeLockManager(
+      std::shared_ptr<TransactionDBMutexFactory> mutex_factory);
+
+  ~RangeTreeLockManager() override;
+
+  int SetMaxLockMemory(size_t max_lock_memory) override {
+    return ltm_.set_max_lock_memory(max_lock_memory);
+  }
+
+  size_t GetMaxLockMemory() override { return ltm_.get_max_lock_memory(); }
+
+  Counters GetStatus() override;
+
+  bool IsPointLockSupported() const override {
+    // One could have acquired a point lock (it is reduced to range lock)
+    return true;
+  }
+
+  PointLockStatus GetPointLockStatus() override;
+
+  // This is from LockManager
+  LockManager::RangeLockStatus GetRangeLockStatus() override;
+
+  // This has the same meaning as GetRangeLockStatus but is from
+  // RangeLockManagerHandle
+  RangeLockManagerHandle::RangeLockStatus GetRangeLockStatusData() override {
+    return GetRangeLockStatus();
+  }
+
+  bool IsRangeLockSupported() const override { return true; }
+
+  const LockTrackerFactory& GetLockTrackerFactory() const override {
+    return RangeTreeLockTrackerFactory::Get();
+  }
+
+  // Get the locktree which stores locks for the Column Family with given cf_id
+  std::shared_ptr<toku::locktree> GetLockTreeForCF(ColumnFamilyId cf_id);
+
+  void SetEscalationBarrierFunc(EscalationBarrierFunc func) override {
+    barrier_func_ = func;
+  }
+
+ private:
+  toku::locktree_manager ltm_;
+
+  EscalationBarrierFunc barrier_func_ =
+      [](const Endpoint&, const Endpoint&) -> bool { return false; };
+
+  std::shared_ptr<TransactionDBMutexFactory> mutex_factory_;
+
+  // Map from cf_id to locktree*. Can only be accessed while holding the
+  // ltree_map_mutex_. Must use a custom deleter that calls ltm_.release_lt
+  using LockTreeMap =
+      std::unordered_map<ColumnFamilyId, std::shared_ptr<toku::locktree>>;
+  LockTreeMap ltree_map_;
+
+  InstrumentedMutex ltree_map_mutex_;
+
+  // Per-thread cache of ltree_map_.
+  // (uses the same approach as TransactionLockMgr::lock_maps_cache_)
+  std::unique_ptr<ThreadLocalPtr> ltree_lookup_cache_;
+
+  RangeDeadlockInfoBuffer dlock_buffer_;
+
+  std::shared_ptr<toku::locktree> MakeLockTreePtr(toku::locktree* lt);
+  static int CompareDbtEndpoints(void* arg, const DBT* a_key, const DBT* b_key);
+
+  // Callbacks
+  static int on_create(toku::locktree*, void*);
+  static void on_destroy(toku::locktree*) {}
+  static void on_escalate(TXNID txnid, const toku::locktree* lt,
+                          const toku::range_buffer& buffer, void* extra);
+
+  static bool OnEscalationBarrierCheck(const DBT* a, const DBT* b, void* extra);
+};
+
+void serialize_endpoint(const Endpoint& endp, std::string* buf);
+void wait_callback_for_locktree(void* cdata, toku::lock_wait_infos* infos);
+
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // OS_WIN
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h
new file mode 100644
index 0000000000..4ef48d2527
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h
@@ -0,0 +1,146 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "util/mutexlock.h"
+#include "utilities/transactions/lock/lock_tracker.h"
+#include "utilities/transactions/pessimistic_transaction.h"
+
+// Range Locking:
+#include "lib/locktree/lock_request.h"
+#include "lib/locktree/locktree.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class RangeTreeLockManager;
+
+// Storage for locks that are currently held by a transaction.
+//
+// Locks are kept in toku::range_buffer because toku::locktree::release_locks()
+// accepts that as an argument.
+//
+// Note: the list of locks may differ slighly from the contents of the lock
+// tree, due to concurrency between lock acquisition, lock release, and lock
+// escalation. See MDEV-18227 and RangeTreeLockManager::UnLock for details.
+// This property is currently harmless.
+//
+// Append() and ReleaseLocks() are not thread-safe, as they are expected to be
+// called only by the owner transaction. ReplaceLocks() is safe to call from
+// other threads.
+class RangeLockList {
+ public:
+  ~RangeLockList() { Clear(); }
+
+  RangeLockList() : releasing_locks_(false) {}
+
+  void Append(ColumnFamilyId cf_id, const DBT* left_key, const DBT* right_key);
+  void ReleaseLocks(RangeTreeLockManager* mgr, PessimisticTransaction* txn,
+                    bool all_trx_locks);
+  void ReplaceLocks(const toku::locktree* lt, const toku::range_buffer& buffer);
+
+ private:
+  void Clear() {
+    for (auto it : buffers_) {
+      it.second->destroy();
+    }
+    buffers_.clear();
+  }
+
+  std::unordered_map<ColumnFamilyId, std::shared_ptr<toku::range_buffer>>
+      buffers_;
+  port::Mutex mutex_;
+  std::atomic<bool> releasing_locks_;
+};
+
+// A LockTracker-based object that is used together with RangeTreeLockManager.
+class RangeTreeLockTracker : public LockTracker {
+ public:
+  RangeTreeLockTracker() : range_list_(nullptr) {}
+
+  RangeTreeLockTracker(const RangeTreeLockTracker&) = delete;
+  RangeTreeLockTracker& operator=(const RangeTreeLockTracker&) = delete;
+
+  void Track(const PointLockRequest&) override;
+  void Track(const RangeLockRequest&) override;
+
+  bool IsPointLockSupported() const override {
+    // This indicates that we don't implement GetPointLockStatus()
+    return false;
+  }
+  bool IsRangeLockSupported() const override { return true; }
+
+  // a Not-supported dummy implementation.
+  UntrackStatus Untrack(const RangeLockRequest& /*lock_request*/) override {
+    return UntrackStatus::NOT_TRACKED;
+  }
+
+  UntrackStatus Untrack(const PointLockRequest& /*lock_request*/) override {
+    return UntrackStatus::NOT_TRACKED;
+  }
+
+  // "If this method is not supported, leave it as a no-op."
+  void Merge(const LockTracker&) override {}
+
+  // "If this method is not supported, leave it as a no-op."
+  void Subtract(const LockTracker&) override {}
+
+  void Clear() override;
+
+  // "If this method is not supported, returns nullptr."
+  virtual LockTracker* GetTrackedLocksSinceSavePoint(
+      const LockTracker&) const override {
+    return nullptr;
+  }
+
+  PointLockStatus GetPointLockStatus(ColumnFamilyId column_family_id,
+                                     const std::string& key) const override;
+
+  // The return value is only used for tests
+  uint64_t GetNumPointLocks() const override { return 0; }
+
+  ColumnFamilyIterator* GetColumnFamilyIterator() const override {
+    return nullptr;
+  }
+
+  KeyIterator* GetKeyIterator(
+      ColumnFamilyId /*column_family_id*/) const override {
+    return nullptr;
+  }
+
+  void ReleaseLocks(RangeTreeLockManager* mgr, PessimisticTransaction* txn,
+                    bool all_trx_locks) {
+    if (range_list_) range_list_->ReleaseLocks(mgr, txn, all_trx_locks);
+  }
+
+  void ReplaceLocks(const toku::locktree* lt,
+                    const toku::range_buffer& buffer) {
+    // range_list_ cannot be NULL here
+    range_list_->ReplaceLocks(lt, buffer);
+  }
+
+ private:
+  RangeLockList* getOrCreateList();
+  std::unique_ptr<RangeLockList> range_list_;
+};
+
+class RangeTreeLockTrackerFactory : public LockTrackerFactory {
+ public:
+  static const RangeTreeLockTrackerFactory& Get() {
+    static const RangeTreeLockTrackerFactory instance;
+    return instance;
+  }
+
+  LockTracker* Create() const override { return new RangeTreeLockTracker(); }
+
+ private:
+  RangeTreeLockTrackerFactory() {}
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction.cc
new file mode 100644
index 0000000000..af629b528a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction.cc
@@ -0,0 +1,194 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/transactions/optimistic_transaction.h"
+
+#include <string>
+
+#include "db/column_family.h"
+#include "db/db_impl/db_impl.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/db.h"
+#include "rocksdb/status.h"
+#include "rocksdb/utilities/optimistic_transaction_db.h"
+#include "util/cast_util.h"
+#include "util/string_util.h"
+#include "utilities/transactions/lock/point/point_lock_tracker.h"
+#include "utilities/transactions/optimistic_transaction.h"
+#include "utilities/transactions/optimistic_transaction_db_impl.h"
+#include "utilities/transactions/transaction_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct WriteOptions;
+
+OptimisticTransaction::OptimisticTransaction(
+    OptimisticTransactionDB* txn_db, const WriteOptions& write_options,
+    const OptimisticTransactionOptions& txn_options)
+    : TransactionBaseImpl(txn_db->GetBaseDB(), write_options,
+                          PointLockTrackerFactory::Get()),
+      txn_db_(txn_db) {
+  Initialize(txn_options);
+}
+
+void OptimisticTransaction::Initialize(
+    const OptimisticTransactionOptions& txn_options) {
+  if (txn_options.set_snapshot) {
+    SetSnapshot();
+  }
+}
+
+void OptimisticTransaction::Reinitialize(
+    OptimisticTransactionDB* txn_db, const WriteOptions& write_options,
+    const OptimisticTransactionOptions& txn_options) {
+  TransactionBaseImpl::Reinitialize(txn_db->GetBaseDB(), write_options);
+  Initialize(txn_options);
+}
+
+OptimisticTransaction::~OptimisticTransaction() {}
+
+void OptimisticTransaction::Clear() { TransactionBaseImpl::Clear(); }
+
+Status OptimisticTransaction::Prepare() {
+  return Status::InvalidArgument(
+      "Two phase commit not supported for optimistic transactions.");
+}
+
+Status OptimisticTransaction::Commit() {
+  auto txn_db_impl = static_cast_with_check<OptimisticTransactionDBImpl,
+                                            OptimisticTransactionDB>(txn_db_);
+  assert(txn_db_impl);
+  switch (txn_db_impl->GetValidatePolicy()) {
+    case OccValidationPolicy::kValidateParallel:
+      return CommitWithParallelValidate();
+    case OccValidationPolicy::kValidateSerial:
+      return CommitWithSerialValidate();
+    default:
+      assert(0);
+  }
+  // unreachable, just void compiler complain
+  return Status::OK();
+}
+
+Status OptimisticTransaction::CommitWithSerialValidate() {
+  // Set up callback which will call CheckTransactionForConflicts() to
+  // check whether this transaction is safe to be committed.
+  OptimisticTransactionCallback callback(this);
+
+  DBImpl* db_impl = static_cast_with_check<DBImpl>(db_->GetRootDB());
+
+  Status s = db_impl->WriteWithCallback(
+      write_options_, GetWriteBatch()->GetWriteBatch(), &callback);
+
+  if (s.ok()) {
+    Clear();
+  }
+
+  return s;
+}
+
+Status OptimisticTransaction::CommitWithParallelValidate() {
+  auto txn_db_impl = static_cast_with_check<OptimisticTransactionDBImpl,
+                                            OptimisticTransactionDB>(txn_db_);
+  assert(txn_db_impl);
+  DBImpl* db_impl = static_cast_with_check<DBImpl>(db_->GetRootDB());
+  assert(db_impl);
+  const size_t space = txn_db_impl->GetLockBucketsSize();
+  std::set<size_t> lk_idxes;
+  std::vector<std::unique_lock<std::mutex>> lks;
+  std::unique_ptr<LockTracker::ColumnFamilyIterator> cf_it(
+      tracked_locks_->GetColumnFamilyIterator());
+  assert(cf_it != nullptr);
+  while (cf_it->HasNext()) {
+    ColumnFamilyId cf = cf_it->Next();
+    std::unique_ptr<LockTracker::KeyIterator> key_it(
+        tracked_locks_->GetKeyIterator(cf));
+    assert(key_it != nullptr);
+    while (key_it->HasNext()) {
+      const std::string& key = key_it->Next();
+      lk_idxes.insert(FastRange64(GetSliceNPHash64(key), space));
+    }
+  }
+  // NOTE: in a single txn, all bucket-locks are taken in ascending order.
+  // In this way, txns from different threads all obey this rule so that
+  // deadlock can be avoided.
+  for (auto v : lk_idxes) {
+    lks.emplace_back(txn_db_impl->LockBucket(v));
+  }
+
+  Status s = TransactionUtil::CheckKeysForConflicts(db_impl, *tracked_locks_,
+                                                    true /* cache_only */);
+  if (!s.ok()) {
+    return s;
+  }
+
+  s = db_impl->Write(write_options_, GetWriteBatch()->GetWriteBatch());
+  if (s.ok()) {
+    Clear();
+  }
+
+  return s;
+}
+
+Status OptimisticTransaction::Rollback() {
+  Clear();
+  return Status::OK();
+}
+
+// Record this key so that we can check it for conflicts at commit time.
+//
+// 'exclusive' is unused for OptimisticTransaction.
+Status OptimisticTransaction::TryLock(ColumnFamilyHandle* column_family,
+                                      const Slice& key, bool read_only,
+                                      bool exclusive, const bool do_validate,
+                                      const bool assume_tracked) {
+  assert(!assume_tracked);  // not supported
+  (void)assume_tracked;
+  if (!do_validate) {
+    return Status::OK();
+  }
+  uint32_t cfh_id = GetColumnFamilyID(column_family);
+
+  SetSnapshotIfNeeded();
+
+  SequenceNumber seq;
+  if (snapshot_) {
+    seq = snapshot_->GetSequenceNumber();
+  } else {
+    seq = db_->GetLatestSequenceNumber();
+  }
+
+  std::string key_str = key.ToString();
+
+  TrackKey(cfh_id, key_str, seq, read_only, exclusive);
+
+  // Always return OK. Confilct checking will happen at commit time.
+  return Status::OK();
+}
+
+// Returns OK if it is safe to commit this transaction.  Returns Status::Busy
+// if there are read or write conflicts that would prevent us from committing OR
+// if we can not determine whether there would be any such conflicts.
+//
+// Should only be called on writer thread in order to avoid any race conditions
+// in detecting write conflicts.
+Status OptimisticTransaction::CheckTransactionForConflicts(DB* db) {
+  auto db_impl = static_cast_with_check<DBImpl>(db);
+
+  // Since we are on the write thread and do not want to block other writers,
+  // we will do a cache-only conflict check.  This can result in TryAgain
+  // getting returned if there is not sufficient memtable history to check
+  // for conflicts.
+  return TransactionUtil::CheckKeysForConflicts(db_impl, *tracked_locks_,
+                                                true /* cache_only */);
+}
+
+Status OptimisticTransaction::SetName(const TransactionName& /* unused */) {
+  return Status::InvalidArgument("Optimistic transactions cannot be named.");
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction.h
new file mode 100644
index 0000000000..a03fa16970
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction.h
@@ -0,0 +1,99 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <stack>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "db/write_callback.h"
+#include "rocksdb/db.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/snapshot.h"
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+#include "rocksdb/utilities/optimistic_transaction_db.h"
+#include "rocksdb/utilities/transaction.h"
+#include "rocksdb/utilities/write_batch_with_index.h"
+#include "utilities/transactions/transaction_base.h"
+#include "utilities/transactions/transaction_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class OptimisticTransaction : public TransactionBaseImpl {
+ public:
+  OptimisticTransaction(OptimisticTransactionDB* db,
+                        const WriteOptions& write_options,
+                        const OptimisticTransactionOptions& txn_options);
+  // No copying allowed
+  OptimisticTransaction(const OptimisticTransaction&) = delete;
+  void operator=(const OptimisticTransaction&) = delete;
+
+  virtual ~OptimisticTransaction();
+
+  void Reinitialize(OptimisticTransactionDB* txn_db,
+                    const WriteOptions& write_options,
+                    const OptimisticTransactionOptions& txn_options);
+
+  Status Prepare() override;
+
+  Status Commit() override;
+
+  Status Rollback() override;
+
+  Status SetName(const TransactionName& name) override;
+
+ protected:
+  Status TryLock(ColumnFamilyHandle* column_family, const Slice& key,
+                 bool read_only, bool exclusive, const bool do_validate = true,
+                 const bool assume_tracked = false) override;
+
+ private:
+  ROCKSDB_FIELD_UNUSED OptimisticTransactionDB* const txn_db_;
+
+  friend class OptimisticTransactionCallback;
+
+  void Initialize(const OptimisticTransactionOptions& txn_options);
+
+  // Returns OK if it is safe to commit this transaction.  Returns Status::Busy
+  // if there are read or write conflicts that would prevent us from committing
+  // OR if we can not determine whether there would be any such conflicts.
+  //
+  // Should only be called on writer thread.
+  Status CheckTransactionForConflicts(DB* db);
+
+  void Clear() override;
+
+  void UnlockGetForUpdate(ColumnFamilyHandle* /* unused */,
+                          const Slice& /* unused */) override {
+    // Nothing to unlock.
+  }
+
+  Status CommitWithSerialValidate();
+
+  Status CommitWithParallelValidate();
+};
+
+// Used at commit time to trigger transaction validation
+class OptimisticTransactionCallback : public WriteCallback {
+ public:
+  explicit OptimisticTransactionCallback(OptimisticTransaction* txn)
+      : txn_(txn) {}
+
+  Status Callback(DB* db) override {
+    return txn_->CheckTransactionForConflicts(db);
+  }
+
+  bool AllowWriteBatching() override { return false; }
+
+ private:
+  OptimisticTransaction* txn_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction_db_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction_db_impl.cc
new file mode 100644
index 0000000000..3a81615335
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction_db_impl.cc
@@ -0,0 +1,109 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/transactions/optimistic_transaction_db_impl.h"
+
+#include <string>
+#include <vector>
+
+#include "db/db_impl/db_impl.h"
+#include "rocksdb/db.h"
+#include "rocksdb/options.h"
+#include "rocksdb/utilities/optimistic_transaction_db.h"
+#include "utilities/transactions/optimistic_transaction.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Transaction* OptimisticTransactionDBImpl::BeginTransaction(
+    const WriteOptions& write_options,
+    const OptimisticTransactionOptions& txn_options, Transaction* old_txn) {
+  if (old_txn != nullptr) {
+    ReinitializeTransaction(old_txn, write_options, txn_options);
+    return old_txn;
+  } else {
+    return new OptimisticTransaction(this, write_options, txn_options);
+  }
+}
+
+std::unique_lock<std::mutex> OptimisticTransactionDBImpl::LockBucket(
+    size_t idx) {
+  assert(idx < bucketed_locks_.size());
+  return std::unique_lock<std::mutex>(*bucketed_locks_[idx]);
+}
+
+Status OptimisticTransactionDB::Open(const Options& options,
+                                     const std::string& dbname,
+                                     OptimisticTransactionDB** dbptr) {
+  DBOptions db_options(options);
+  ColumnFamilyOptions cf_options(options);
+  std::vector<ColumnFamilyDescriptor> column_families;
+  column_families.push_back(
+      ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options));
+  std::vector<ColumnFamilyHandle*> handles;
+  Status s = Open(db_options, dbname, column_families, &handles, dbptr);
+  if (s.ok()) {
+    assert(handles.size() == 1);
+    // i can delete the handle since DBImpl is always holding a reference to
+    // default column family
+    delete handles[0];
+  }
+
+  return s;
+}
+
+Status OptimisticTransactionDB::Open(
+    const DBOptions& db_options, const std::string& dbname,
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    std::vector<ColumnFamilyHandle*>* handles,
+    OptimisticTransactionDB** dbptr) {
+  return OptimisticTransactionDB::Open(db_options,
+                                       OptimisticTransactionDBOptions(), dbname,
+                                       column_families, handles, dbptr);
+}
+
+Status OptimisticTransactionDB::Open(
+    const DBOptions& db_options,
+    const OptimisticTransactionDBOptions& occ_options,
+    const std::string& dbname,
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    std::vector<ColumnFamilyHandle*>* handles,
+    OptimisticTransactionDB** dbptr) {
+  Status s;
+  DB* db;
+
+  std::vector<ColumnFamilyDescriptor> column_families_copy = column_families;
+
+  // Enable MemTable History if not already enabled
+  for (auto& column_family : column_families_copy) {
+    ColumnFamilyOptions* options = &column_family.options;
+
+    if (options->max_write_buffer_size_to_maintain == 0 &&
+        options->max_write_buffer_number_to_maintain == 0) {
+      // Setting to -1 will set the History size to
+      // max_write_buffer_number * write_buffer_size.
+      options->max_write_buffer_size_to_maintain = -1;
+    }
+  }
+
+  s = DB::Open(db_options, dbname, column_families_copy, handles, &db);
+
+  if (s.ok()) {
+    *dbptr = new OptimisticTransactionDBImpl(db, occ_options);
+  }
+
+  return s;
+}
+
+void OptimisticTransactionDBImpl::ReinitializeTransaction(
+    Transaction* txn, const WriteOptions& write_options,
+    const OptimisticTransactionOptions& txn_options) {
+  assert(dynamic_cast<OptimisticTransaction*>(txn) != nullptr);
+  auto txn_impl = reinterpret_cast<OptimisticTransaction*>(txn);
+
+  txn_impl->Reinitialize(this, write_options, txn_options);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction_db_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction_db_impl.h
new file mode 100644
index 0000000000..fef145356e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/optimistic_transaction_db_impl.h
@@ -0,0 +1,86 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <algorithm>
+#include <mutex>
+#include <vector>
+
+#include "rocksdb/db.h"
+#include "rocksdb/options.h"
+#include "rocksdb/utilities/optimistic_transaction_db.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class OptimisticTransactionDBImpl : public OptimisticTransactionDB {
+ public:
+  explicit OptimisticTransactionDBImpl(
+      DB* db, const OptimisticTransactionDBOptions& occ_options,
+      bool take_ownership = true)
+      : OptimisticTransactionDB(db),
+        db_owner_(take_ownership),
+        validate_policy_(occ_options.validate_policy) {
+    if (validate_policy_ == OccValidationPolicy::kValidateParallel) {
+      uint32_t bucket_size = std::max(16u, occ_options.occ_lock_buckets);
+      bucketed_locks_.reserve(bucket_size);
+      for (size_t i = 0; i < bucket_size; ++i) {
+        bucketed_locks_.emplace_back(
+            std::unique_ptr<std::mutex>(new std::mutex));
+      }
+    }
+  }
+
+  ~OptimisticTransactionDBImpl() {
+    // Prevent this stackable from destroying
+    // base db
+    if (!db_owner_) {
+      db_ = nullptr;
+    }
+  }
+
+  Transaction* BeginTransaction(const WriteOptions& write_options,
+                                const OptimisticTransactionOptions& txn_options,
+                                Transaction* old_txn) override;
+
+  // Transactional `DeleteRange()` is not yet supported.
+  using StackableDB::DeleteRange;
+  virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*,
+                             const Slice&, const Slice&) override {
+    return Status::NotSupported();
+  }
+
+  // Range deletions also must not be snuck into `WriteBatch`es as they are
+  // incompatible with `OptimisticTransactionDB`.
+  virtual Status Write(const WriteOptions& write_opts,
+                       WriteBatch* batch) override {
+    if (batch->HasDeleteRange()) {
+      return Status::NotSupported();
+    }
+    return OptimisticTransactionDB::Write(write_opts, batch);
+  }
+
+  size_t GetLockBucketsSize() const { return bucketed_locks_.size(); }
+
+  OccValidationPolicy GetValidatePolicy() const { return validate_policy_; }
+
+  std::unique_lock<std::mutex> LockBucket(size_t idx);
+
+ private:
+  // NOTE: used in validation phase. Each key is hashed into some
+  // bucket. We then take the lock in the hash value order to avoid deadlock.
+  std::vector<std::unique_ptr<std::mutex>> bucketed_locks_;
+
+  bool db_owner_;
+
+  const OccValidationPolicy validate_policy_;
+
+  void ReinitializeTransaction(Transaction* txn,
+                               const WriteOptions& write_options,
+                               const OptimisticTransactionOptions& txn_options =
+                                   OptimisticTransactionOptions());
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction.cc
new file mode 100644
index 0000000000..1771497a69
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction.cc
@@ -0,0 +1,1177 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/transactions/pessimistic_transaction.h"
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "db/column_family.h"
+#include "db/db_impl/db_impl.h"
+#include "logging/logging.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/db.h"
+#include "rocksdb/snapshot.h"
+#include "rocksdb/status.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "test_util/sync_point.h"
+#include "util/cast_util.h"
+#include "util/string_util.h"
+#include "utilities/transactions/pessimistic_transaction_db.h"
+#include "utilities/transactions/transaction_util.h"
+#include "utilities/write_batch_with_index/write_batch_with_index_internal.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct WriteOptions;
+
+std::atomic<TransactionID> PessimisticTransaction::txn_id_counter_(1);
+
+TransactionID PessimisticTransaction::GenTxnID() {
+  return txn_id_counter_.fetch_add(1);
+}
+
+PessimisticTransaction::PessimisticTransaction(
+    TransactionDB* txn_db, const WriteOptions& write_options,
+    const TransactionOptions& txn_options, const bool init)
+    : TransactionBaseImpl(
+          txn_db->GetRootDB(), write_options,
+          static_cast_with_check<PessimisticTransactionDB>(txn_db)
+              ->GetLockTrackerFactory()),
+      txn_db_impl_(nullptr),
+      expiration_time_(0),
+      txn_id_(0),
+      waiting_cf_id_(0),
+      waiting_key_(nullptr),
+      lock_timeout_(0),
+      deadlock_detect_(false),
+      deadlock_detect_depth_(0),
+      skip_concurrency_control_(false) {
+  txn_db_impl_ = static_cast_with_check<PessimisticTransactionDB>(txn_db);
+  db_impl_ = static_cast_with_check<DBImpl>(db_);
+  if (init) {
+    Initialize(txn_options);
+  }
+}
+
+void PessimisticTransaction::Initialize(const TransactionOptions& txn_options) {
+  // Range lock manager uses address of transaction object as TXNID
+  const TransactionDBOptions& db_options = txn_db_impl_->GetTxnDBOptions();
+  if (db_options.lock_mgr_handle &&
+      db_options.lock_mgr_handle->getLockManager()->IsRangeLockSupported()) {
+    txn_id_ = reinterpret_cast<TransactionID>(this);
+  } else {
+    txn_id_ = GenTxnID();
+  }
+
+  txn_state_ = STARTED;
+
+  deadlock_detect_ = txn_options.deadlock_detect;
+  deadlock_detect_depth_ = txn_options.deadlock_detect_depth;
+  write_batch_.SetMaxBytes(txn_options.max_write_batch_size);
+  skip_concurrency_control_ = txn_options.skip_concurrency_control;
+
+  lock_timeout_ = txn_options.lock_timeout * 1000;
+  if (lock_timeout_ < 0) {
+    // Lock timeout not set, use default
+    lock_timeout_ =
+        txn_db_impl_->GetTxnDBOptions().transaction_lock_timeout * 1000;
+  }
+
+  if (txn_options.expiration >= 0) {
+    expiration_time_ = start_time_ + txn_options.expiration * 1000;
+  } else {
+    expiration_time_ = 0;
+  }
+
+  if (txn_options.set_snapshot) {
+    SetSnapshot();
+  }
+
+  if (expiration_time_ > 0) {
+    txn_db_impl_->InsertExpirableTransaction(txn_id_, this);
+  }
+  use_only_the_last_commit_time_batch_for_recovery_ =
+      txn_options.use_only_the_last_commit_time_batch_for_recovery;
+  skip_prepare_ = txn_options.skip_prepare;
+
+  read_timestamp_ = kMaxTxnTimestamp;
+  commit_timestamp_ = kMaxTxnTimestamp;
+}
+
+PessimisticTransaction::~PessimisticTransaction() {
+  txn_db_impl_->UnLock(this, *tracked_locks_);
+  if (expiration_time_ > 0) {
+    txn_db_impl_->RemoveExpirableTransaction(txn_id_);
+  }
+  if (!name_.empty() && txn_state_ != COMMITTED) {
+    txn_db_impl_->UnregisterTransaction(this);
+  }
+}
+
+void PessimisticTransaction::Clear() {
+  txn_db_impl_->UnLock(this, *tracked_locks_);
+  TransactionBaseImpl::Clear();
+}
+
+void PessimisticTransaction::Reinitialize(
+    TransactionDB* txn_db, const WriteOptions& write_options,
+    const TransactionOptions& txn_options) {
+  if (!name_.empty() && txn_state_ != COMMITTED) {
+    txn_db_impl_->UnregisterTransaction(this);
+  }
+  TransactionBaseImpl::Reinitialize(txn_db->GetRootDB(), write_options);
+  Initialize(txn_options);
+}
+
+bool PessimisticTransaction::IsExpired() const {
+  if (expiration_time_ > 0) {
+    if (dbimpl_->GetSystemClock()->NowMicros() >= expiration_time_) {
+      // Transaction is expired.
+      return true;
+    }
+  }
+
+  return false;
+}
+
+WriteCommittedTxn::WriteCommittedTxn(TransactionDB* txn_db,
+                                     const WriteOptions& write_options,
+                                     const TransactionOptions& txn_options)
+    : PessimisticTransaction(txn_db, write_options, txn_options) {}
+
+Status WriteCommittedTxn::GetForUpdate(const ReadOptions& read_options,
+                                       ColumnFamilyHandle* column_family,
+                                       const Slice& key, std::string* value,
+                                       bool exclusive, const bool do_validate) {
+  return GetForUpdateImpl(read_options, column_family, key, value, exclusive,
+                          do_validate);
+}
+
+Status WriteCommittedTxn::GetForUpdate(const ReadOptions& read_options,
+                                       ColumnFamilyHandle* column_family,
+                                       const Slice& key,
+                                       PinnableSlice* pinnable_val,
+                                       bool exclusive, const bool do_validate) {
+  return GetForUpdateImpl(read_options, column_family, key, pinnable_val,
+                          exclusive, do_validate);
+}
+
+template <typename TValue>
+inline Status WriteCommittedTxn::GetForUpdateImpl(
+    const ReadOptions& read_options, ColumnFamilyHandle* column_family,
+    const Slice& key, TValue* value, bool exclusive, const bool do_validate) {
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call GetForUpdate with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+  column_family =
+      column_family ? column_family : db_impl_->DefaultColumnFamily();
+  assert(column_family);
+  if (!read_options.timestamp) {
+    const Comparator* const ucmp = column_family->GetComparator();
+    assert(ucmp);
+    size_t ts_sz = ucmp->timestamp_size();
+    if (0 == ts_sz) {
+      return TransactionBaseImpl::GetForUpdate(read_options, column_family, key,
+                                               value, exclusive, do_validate);
+    }
+  } else {
+    Status s = db_impl_->FailIfTsMismatchCf(
+        column_family, *(read_options.timestamp), /*ts_for_read=*/true);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  if (!do_validate) {
+    return Status::InvalidArgument(
+        "If do_validate is false then GetForUpdate with read_timestamp is not "
+        "defined.");
+  } else if (kMaxTxnTimestamp == read_timestamp_) {
+    return Status::InvalidArgument("read_timestamp must be set for validation");
+  }
+
+  if (!read_options.timestamp) {
+    ReadOptions read_opts_copy = read_options;
+    char ts_buf[sizeof(kMaxTxnTimestamp)];
+    EncodeFixed64(ts_buf, read_timestamp_);
+    Slice ts(ts_buf, sizeof(ts_buf));
+    read_opts_copy.timestamp = &ts;
+    return TransactionBaseImpl::GetForUpdate(read_opts_copy, column_family, key,
+                                             value, exclusive, do_validate);
+  }
+  assert(read_options.timestamp);
+  const char* const ts_buf = read_options.timestamp->data();
+  assert(read_options.timestamp->size() == sizeof(kMaxTxnTimestamp));
+  TxnTimestamp ts = DecodeFixed64(ts_buf);
+  if (ts != read_timestamp_) {
+    return Status::InvalidArgument("Must read from the same read_timestamp");
+  }
+  return TransactionBaseImpl::GetForUpdate(read_options, column_family, key,
+                                           value, exclusive, do_validate);
+}
+
+Status WriteCommittedTxn::Put(ColumnFamilyHandle* column_family,
+                              const Slice& key, const Slice& value,
+                              const bool assume_tracked) {
+  const bool do_validate = !assume_tracked;
+  return Operate(column_family, key, do_validate, assume_tracked,
+                 [column_family, &key, &value, this]() {
+                   Status s =
+                       GetBatchForWrite()->Put(column_family, key, value);
+                   if (s.ok()) {
+                     ++num_puts_;
+                   }
+                   return s;
+                 });
+}
+
+Status WriteCommittedTxn::Put(ColumnFamilyHandle* column_family,
+                              const SliceParts& key, const SliceParts& value,
+                              const bool assume_tracked) {
+  const bool do_validate = !assume_tracked;
+  return Operate(column_family, key, do_validate, assume_tracked,
+                 [column_family, &key, &value, this]() {
+                   Status s =
+                       GetBatchForWrite()->Put(column_family, key, value);
+                   if (s.ok()) {
+                     ++num_puts_;
+                   }
+                   return s;
+                 });
+}
+
+Status WriteCommittedTxn::PutUntracked(ColumnFamilyHandle* column_family,
+                                       const Slice& key, const Slice& value) {
+  return Operate(
+      column_family, key, /*do_validate=*/false,
+      /*assume_tracked=*/false, [column_family, &key, &value, this]() {
+        Status s = GetBatchForWrite()->Put(column_family, key, value);
+        if (s.ok()) {
+          ++num_puts_;
+        }
+        return s;
+      });
+}
+
+Status WriteCommittedTxn::PutUntracked(ColumnFamilyHandle* column_family,
+                                       const SliceParts& key,
+                                       const SliceParts& value) {
+  return Operate(
+      column_family, key, /*do_validate=*/false,
+      /*assume_tracked=*/false, [column_family, &key, &value, this]() {
+        Status s = GetBatchForWrite()->Put(column_family, key, value);
+        if (s.ok()) {
+          ++num_puts_;
+        }
+        return s;
+      });
+}
+
+Status WriteCommittedTxn::Delete(ColumnFamilyHandle* column_family,
+                                 const Slice& key, const bool assume_tracked) {
+  const bool do_validate = !assume_tracked;
+  return Operate(column_family, key, do_validate, assume_tracked,
+                 [column_family, &key, this]() {
+                   Status s = GetBatchForWrite()->Delete(column_family, key);
+                   if (s.ok()) {
+                     ++num_deletes_;
+                   }
+                   return s;
+                 });
+}
+
+Status WriteCommittedTxn::Delete(ColumnFamilyHandle* column_family,
+                                 const SliceParts& key,
+                                 const bool assume_tracked) {
+  const bool do_validate = !assume_tracked;
+  return Operate(column_family, key, do_validate, assume_tracked,
+                 [column_family, &key, this]() {
+                   Status s = GetBatchForWrite()->Delete(column_family, key);
+                   if (s.ok()) {
+                     ++num_deletes_;
+                   }
+                   return s;
+                 });
+}
+
+Status WriteCommittedTxn::DeleteUntracked(ColumnFamilyHandle* column_family,
+                                          const Slice& key) {
+  return Operate(column_family, key, /*do_validate=*/false,
+                 /*assume_tracked=*/false, [column_family, &key, this]() {
+                   Status s = GetBatchForWrite()->Delete(column_family, key);
+                   if (s.ok()) {
+                     ++num_deletes_;
+                   }
+                   return s;
+                 });
+}
+
+Status WriteCommittedTxn::DeleteUntracked(ColumnFamilyHandle* column_family,
+                                          const SliceParts& key) {
+  return Operate(column_family, key, /*do_validate=*/false,
+                 /*assume_tracked=*/false, [column_family, &key, this]() {
+                   Status s = GetBatchForWrite()->Delete(column_family, key);
+                   if (s.ok()) {
+                     ++num_deletes_;
+                   }
+                   return s;
+                 });
+}
+
+Status WriteCommittedTxn::SingleDelete(ColumnFamilyHandle* column_family,
+                                       const Slice& key,
+                                       const bool assume_tracked) {
+  const bool do_validate = !assume_tracked;
+  return Operate(column_family, key, do_validate, assume_tracked,
+                 [column_family, &key, this]() {
+                   Status s =
+                       GetBatchForWrite()->SingleDelete(column_family, key);
+                   if (s.ok()) {
+                     ++num_deletes_;
+                   }
+                   return s;
+                 });
+}
+
+Status WriteCommittedTxn::SingleDelete(ColumnFamilyHandle* column_family,
+                                       const SliceParts& key,
+                                       const bool assume_tracked) {
+  const bool do_validate = !assume_tracked;
+  return Operate(column_family, key, do_validate, assume_tracked,
+                 [column_family, &key, this]() {
+                   Status s =
+                       GetBatchForWrite()->SingleDelete(column_family, key);
+                   if (s.ok()) {
+                     ++num_deletes_;
+                   }
+                   return s;
+                 });
+}
+
+Status WriteCommittedTxn::SingleDeleteUntracked(
+    ColumnFamilyHandle* column_family, const Slice& key) {
+  return Operate(column_family, key, /*do_validate=*/false,
+                 /*assume_tracked=*/false, [column_family, &key, this]() {
+                   Status s =
+                       GetBatchForWrite()->SingleDelete(column_family, key);
+                   if (s.ok()) {
+                     ++num_deletes_;
+                   }
+                   return s;
+                 });
+}
+
+Status WriteCommittedTxn::Merge(ColumnFamilyHandle* column_family,
+                                const Slice& key, const Slice& value,
+                                const bool assume_tracked) {
+  const bool do_validate = !assume_tracked;
+  return Operate(column_family, key, do_validate, assume_tracked,
+                 [column_family, &key, &value, this]() {
+                   Status s =
+                       GetBatchForWrite()->Merge(column_family, key, value);
+                   if (s.ok()) {
+                     ++num_merges_;
+                   }
+                   return s;
+                 });
+}
+
+template <typename TKey, typename TOperation>
+Status WriteCommittedTxn::Operate(ColumnFamilyHandle* column_family,
+                                  const TKey& key, const bool do_validate,
+                                  const bool assume_tracked,
+                                  TOperation&& operation) {
+  Status s;
+  if constexpr (std::is_same_v<Slice, TKey>) {
+    s = TryLock(column_family, key, /*read_only=*/false, /*exclusive=*/true,
+                do_validate, assume_tracked);
+  } else if constexpr (std::is_same_v<SliceParts, TKey>) {
+    std::string key_buf;
+    Slice contiguous_key(key, &key_buf);
+    s = TryLock(column_family, contiguous_key, /*read_only=*/false,
+                /*exclusive=*/true, do_validate, assume_tracked);
+  }
+  if (!s.ok()) {
+    return s;
+  }
+  column_family =
+      column_family ? column_family : db_impl_->DefaultColumnFamily();
+  assert(column_family);
+  const Comparator* const ucmp = column_family->GetComparator();
+  assert(ucmp);
+  size_t ts_sz = ucmp->timestamp_size();
+  if (ts_sz > 0) {
+    assert(ts_sz == sizeof(TxnTimestamp));
+    if (!IndexingEnabled()) {
+      cfs_with_ts_tracked_when_indexing_disabled_.insert(
+          column_family->GetID());
+    }
+  }
+  return operation();
+}
+
+Status WriteCommittedTxn::SetReadTimestampForValidation(TxnTimestamp ts) {
+  if (read_timestamp_ < kMaxTxnTimestamp && ts < read_timestamp_) {
+    return Status::InvalidArgument(
+        "Cannot decrease read timestamp for validation");
+  }
+  read_timestamp_ = ts;
+  return Status::OK();
+}
+
+Status WriteCommittedTxn::SetCommitTimestamp(TxnTimestamp ts) {
+  if (read_timestamp_ < kMaxTxnTimestamp && ts <= read_timestamp_) {
+    return Status::InvalidArgument(
+        "Cannot commit at timestamp smaller than or equal to read timestamp");
+  }
+  commit_timestamp_ = ts;
+  return Status::OK();
+}
+
+Status PessimisticTransaction::CommitBatch(WriteBatch* batch) {
+  if (batch && WriteBatchInternal::HasKeyWithTimestamp(*batch)) {
+    // CommitBatch() needs to lock the keys in the batch.
+    // However, the application also needs to specify the timestamp for the
+    // keys in batch before calling this API.
+    // This means timestamp order may violate the order of locking, thus
+    // violate the sequence number order for the same user key.
+    // Therefore, we disallow this operation for now.
+    return Status::NotSupported(
+        "Batch to commit includes timestamp assigned before locking");
+  }
+
+  std::unique_ptr<LockTracker> keys_to_unlock(lock_tracker_factory_.Create());
+  Status s = LockBatch(batch, keys_to_unlock.get());
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  bool can_commit = false;
+
+  if (IsExpired()) {
+    s = Status::Expired();
+  } else if (expiration_time_ > 0) {
+    TransactionState expected = STARTED;
+    can_commit = std::atomic_compare_exchange_strong(&txn_state_, &expected,
+                                                     AWAITING_COMMIT);
+  } else if (txn_state_ == STARTED) {
+    // lock stealing is not a concern
+    can_commit = true;
+  }
+
+  if (can_commit) {
+    txn_state_.store(AWAITING_COMMIT);
+    s = CommitBatchInternal(batch);
+    if (s.ok()) {
+      txn_state_.store(COMMITTED);
+    }
+  } else if (txn_state_ == LOCKS_STOLEN) {
+    s = Status::Expired();
+  } else {
+    s = Status::InvalidArgument("Transaction is not in state for commit.");
+  }
+
+  txn_db_impl_->UnLock(this, *keys_to_unlock);
+
+  return s;
+}
+
+Status PessimisticTransaction::Prepare() {
+  if (name_.empty()) {
+    return Status::InvalidArgument(
+        "Cannot prepare a transaction that has not been named.");
+  }
+
+  if (IsExpired()) {
+    return Status::Expired();
+  }
+
+  Status s;
+  bool can_prepare = false;
+
+  if (expiration_time_ > 0) {
+    // must concern ourselves with expiraton and/or lock stealing
+    // need to compare/exchange bc locks could be stolen under us here
+    TransactionState expected = STARTED;
+    can_prepare = std::atomic_compare_exchange_strong(&txn_state_, &expected,
+                                                      AWAITING_PREPARE);
+  } else if (txn_state_ == STARTED) {
+    // expiration and lock stealing is not possible
+    txn_state_.store(AWAITING_PREPARE);
+    can_prepare = true;
+  }
+
+  if (can_prepare) {
+    // transaction can't expire after preparation
+    expiration_time_ = 0;
+    assert(log_number_ == 0 ||
+           txn_db_impl_->GetTxnDBOptions().write_policy == WRITE_UNPREPARED);
+
+    s = PrepareInternal();
+    if (s.ok()) {
+      txn_state_.store(PREPARED);
+    }
+  } else if (txn_state_ == LOCKS_STOLEN) {
+    s = Status::Expired();
+  } else if (txn_state_ == PREPARED) {
+    s = Status::InvalidArgument("Transaction has already been prepared.");
+  } else if (txn_state_ == COMMITTED) {
+    s = Status::InvalidArgument("Transaction has already been committed.");
+  } else if (txn_state_ == ROLLEDBACK) {
+    s = Status::InvalidArgument("Transaction has already been rolledback.");
+  } else {
+    s = Status::InvalidArgument("Transaction is not in state for commit.");
+  }
+
+  return s;
+}
+
+Status WriteCommittedTxn::PrepareInternal() {
+  WriteOptions write_options = write_options_;
+  write_options.disableWAL = false;
+  auto s = WriteBatchInternal::MarkEndPrepare(GetWriteBatch()->GetWriteBatch(),
+                                              name_);
+  assert(s.ok());
+  class MarkLogCallback : public PreReleaseCallback {
+   public:
+    MarkLogCallback(DBImpl* db, bool two_write_queues)
+        : db_(db), two_write_queues_(two_write_queues) {
+      (void)two_write_queues_;  // to silence unused private field warning
+    }
+    virtual Status Callback(SequenceNumber, bool is_mem_disabled,
+                            uint64_t log_number, size_t /*index*/,
+                            size_t /*total*/) override {
+#ifdef NDEBUG
+      (void)is_mem_disabled;
+#endif
+      assert(log_number != 0);
+      assert(!two_write_queues_ || is_mem_disabled);  // implies the 2nd queue
+      db_->logs_with_prep_tracker()->MarkLogAsContainingPrepSection(log_number);
+      return Status::OK();
+    }
+
+   private:
+    DBImpl* db_;
+    bool two_write_queues_;
+  } mark_log_callback(db_impl_,
+                      db_impl_->immutable_db_options().two_write_queues);
+
+  WriteCallback* const kNoWriteCallback = nullptr;
+  const uint64_t kRefNoLog = 0;
+  const bool kDisableMemtable = true;
+  SequenceNumber* const KIgnoreSeqUsed = nullptr;
+  const size_t kNoBatchCount = 0;
+  s = db_impl_->WriteImpl(write_options, GetWriteBatch()->GetWriteBatch(),
+                          kNoWriteCallback, &log_number_, kRefNoLog,
+                          kDisableMemtable, KIgnoreSeqUsed, kNoBatchCount,
+                          &mark_log_callback);
+  return s;
+}
+
+Status PessimisticTransaction::Commit() {
+  bool commit_without_prepare = false;
+  bool commit_prepared = false;
+
+  if (IsExpired()) {
+    return Status::Expired();
+  }
+
+  if (expiration_time_ > 0) {
+    // we must atomicaly compare and exchange the state here because at
+    // this state in the transaction it is possible for another thread
+    // to change our state out from under us in the even that we expire and have
+    // our locks stolen. In this case the only valid state is STARTED because
+    // a state of PREPARED would have a cleared expiration_time_.
+    TransactionState expected = STARTED;
+    commit_without_prepare = std::atomic_compare_exchange_strong(
+        &txn_state_, &expected, AWAITING_COMMIT);
+    TEST_SYNC_POINT("TransactionTest::ExpirableTransactionDataRace:1");
+  } else if (txn_state_ == PREPARED) {
+    // expiration and lock stealing is not a concern
+    commit_prepared = true;
+  } else if (txn_state_ == STARTED) {
+    // expiration and lock stealing is not a concern
+    if (skip_prepare_) {
+      commit_without_prepare = true;
+    } else {
+      return Status::TxnNotPrepared();
+    }
+  }
+
+  Status s;
+  if (commit_without_prepare) {
+    assert(!commit_prepared);
+    if (WriteBatchInternal::Count(GetCommitTimeWriteBatch()) > 0) {
+      s = Status::InvalidArgument(
+          "Commit-time batch contains values that will not be committed.");
+    } else {
+      txn_state_.store(AWAITING_COMMIT);
+      if (log_number_ > 0) {
+        dbimpl_->logs_with_prep_tracker()->MarkLogAsHavingPrepSectionFlushed(
+            log_number_);
+      }
+      s = CommitWithoutPrepareInternal();
+      if (!name_.empty()) {
+        txn_db_impl_->UnregisterTransaction(this);
+      }
+      Clear();
+      if (s.ok()) {
+        txn_state_.store(COMMITTED);
+      }
+    }
+  } else if (commit_prepared) {
+    txn_state_.store(AWAITING_COMMIT);
+
+    s = CommitInternal();
+
+    if (!s.ok()) {
+      ROCKS_LOG_WARN(db_impl_->immutable_db_options().info_log,
+                     "Commit write failed");
+      return s;
+    }
+
+    // FindObsoleteFiles must now look to the memtables
+    // to determine what prep logs must be kept around,
+    // not the prep section heap.
+    assert(log_number_ > 0);
+    dbimpl_->logs_with_prep_tracker()->MarkLogAsHavingPrepSectionFlushed(
+        log_number_);
+    txn_db_impl_->UnregisterTransaction(this);
+
+    Clear();
+    txn_state_.store(COMMITTED);
+  } else if (txn_state_ == LOCKS_STOLEN) {
+    s = Status::Expired();
+  } else if (txn_state_ == COMMITTED) {
+    s = Status::InvalidArgument("Transaction has already been committed.");
+  } else if (txn_state_ == ROLLEDBACK) {
+    s = Status::InvalidArgument("Transaction has already been rolledback.");
+  } else {
+    s = Status::InvalidArgument("Transaction is not in state for commit.");
+  }
+
+  return s;
+}
+
+Status WriteCommittedTxn::CommitWithoutPrepareInternal() {
+  WriteBatchWithIndex* wbwi = GetWriteBatch();
+  assert(wbwi);
+  WriteBatch* wb = wbwi->GetWriteBatch();
+  assert(wb);
+
+  const bool needs_ts = WriteBatchInternal::HasKeyWithTimestamp(*wb);
+  if (needs_ts && commit_timestamp_ == kMaxTxnTimestamp) {
+    return Status::InvalidArgument("Must assign a commit timestamp");
+  }
+
+  if (needs_ts) {
+    assert(commit_timestamp_ != kMaxTxnTimestamp);
+    char commit_ts_buf[sizeof(kMaxTxnTimestamp)];
+    EncodeFixed64(commit_ts_buf, commit_timestamp_);
+    Slice commit_ts(commit_ts_buf, sizeof(commit_ts_buf));
+
+    Status s =
+        wb->UpdateTimestamps(commit_ts, [wbwi, this](uint32_t cf) -> size_t {
+          auto cf_iter = cfs_with_ts_tracked_when_indexing_disabled_.find(cf);
+          if (cf_iter != cfs_with_ts_tracked_when_indexing_disabled_.end()) {
+            return sizeof(kMaxTxnTimestamp);
+          }
+          const Comparator* ucmp =
+              WriteBatchWithIndexInternal::GetUserComparator(*wbwi, cf);
+          return ucmp ? ucmp->timestamp_size()
+                      : std::numeric_limits<uint64_t>::max();
+        });
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  uint64_t seq_used = kMaxSequenceNumber;
+  SnapshotCreationCallback snapshot_creation_cb(db_impl_, commit_timestamp_,
+                                                snapshot_notifier_, snapshot_);
+  PostMemTableCallback* post_mem_cb = nullptr;
+  if (snapshot_needed_) {
+    if (commit_timestamp_ == kMaxTxnTimestamp) {
+      return Status::InvalidArgument("Must set transaction commit timestamp");
+    } else {
+      post_mem_cb = &snapshot_creation_cb;
+    }
+  }
+  auto s = db_impl_->WriteImpl(write_options_, wb,
+                               /*callback*/ nullptr, /*log_used*/ nullptr,
+                               /*log_ref*/ 0, /*disable_memtable*/ false,
+                               &seq_used, /*batch_cnt=*/0,
+                               /*pre_release_callback=*/nullptr, post_mem_cb);
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  if (s.ok()) {
+    SetId(seq_used);
+  }
+  return s;
+}
+
+Status WriteCommittedTxn::CommitBatchInternal(WriteBatch* batch, size_t) {
+  uint64_t seq_used = kMaxSequenceNumber;
+  auto s = db_impl_->WriteImpl(write_options_, batch, /*callback*/ nullptr,
+                               /*log_used*/ nullptr, /*log_ref*/ 0,
+                               /*disable_memtable*/ false, &seq_used);
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  if (s.ok()) {
+    SetId(seq_used);
+  }
+  return s;
+}
+
+Status WriteCommittedTxn::CommitInternal() {
+  WriteBatchWithIndex* wbwi = GetWriteBatch();
+  assert(wbwi);
+  WriteBatch* wb = wbwi->GetWriteBatch();
+  assert(wb);
+
+  const bool needs_ts = WriteBatchInternal::HasKeyWithTimestamp(*wb);
+  if (needs_ts && commit_timestamp_ == kMaxTxnTimestamp) {
+    return Status::InvalidArgument("Must assign a commit timestamp");
+  }
+  // We take the commit-time batch and append the Commit marker.
+  // The Memtable will ignore the Commit marker in non-recovery mode
+  WriteBatch* working_batch = GetCommitTimeWriteBatch();
+
+  Status s;
+  if (!needs_ts) {
+    s = WriteBatchInternal::MarkCommit(working_batch, name_);
+  } else {
+    assert(commit_timestamp_ != kMaxTxnTimestamp);
+    char commit_ts_buf[sizeof(kMaxTxnTimestamp)];
+    EncodeFixed64(commit_ts_buf, commit_timestamp_);
+    Slice commit_ts(commit_ts_buf, sizeof(commit_ts_buf));
+    s = WriteBatchInternal::MarkCommitWithTimestamp(working_batch, name_,
+                                                    commit_ts);
+    if (s.ok()) {
+      s = wb->UpdateTimestamps(commit_ts, [wbwi, this](uint32_t cf) -> size_t {
+        if (cfs_with_ts_tracked_when_indexing_disabled_.find(cf) !=
+            cfs_with_ts_tracked_when_indexing_disabled_.end()) {
+          return sizeof(kMaxTxnTimestamp);
+        }
+        const Comparator* ucmp =
+            WriteBatchWithIndexInternal::GetUserComparator(*wbwi, cf);
+        return ucmp ? ucmp->timestamp_size()
+                    : std::numeric_limits<uint64_t>::max();
+      });
+    }
+  }
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  // any operations appended to this working_batch will be ignored from WAL
+  working_batch->MarkWalTerminationPoint();
+
+  // insert prepared batch into Memtable only skipping WAL.
+  // Memtable will ignore BeginPrepare/EndPrepare markers
+  // in non recovery mode and simply insert the values
+  s = WriteBatchInternal::Append(working_batch, wb);
+  assert(s.ok());
+
+  uint64_t seq_used = kMaxSequenceNumber;
+  SnapshotCreationCallback snapshot_creation_cb(db_impl_, commit_timestamp_,
+                                                snapshot_notifier_, snapshot_);
+  PostMemTableCallback* post_mem_cb = nullptr;
+  if (snapshot_needed_) {
+    if (commit_timestamp_ == kMaxTxnTimestamp) {
+      s = Status::InvalidArgument("Must set transaction commit timestamp");
+      return s;
+    } else {
+      post_mem_cb = &snapshot_creation_cb;
+    }
+  }
+  s = db_impl_->WriteImpl(write_options_, working_batch, /*callback*/ nullptr,
+                          /*log_used*/ nullptr, /*log_ref*/ log_number_,
+                          /*disable_memtable*/ false, &seq_used,
+                          /*batch_cnt=*/0, /*pre_release_callback=*/nullptr,
+                          post_mem_cb);
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  if (s.ok()) {
+    SetId(seq_used);
+  }
+  return s;
+}
+
+Status PessimisticTransaction::Rollback() {
+  Status s;
+  if (txn_state_ == PREPARED) {
+    txn_state_.store(AWAITING_ROLLBACK);
+
+    s = RollbackInternal();
+
+    if (s.ok()) {
+      // we do not need to keep our prepared section around
+      assert(log_number_ > 0);
+      dbimpl_->logs_with_prep_tracker()->MarkLogAsHavingPrepSectionFlushed(
+          log_number_);
+      Clear();
+      txn_state_.store(ROLLEDBACK);
+    }
+  } else if (txn_state_ == STARTED) {
+    if (log_number_ > 0) {
+      assert(txn_db_impl_->GetTxnDBOptions().write_policy == WRITE_UNPREPARED);
+      assert(GetId() > 0);
+      s = RollbackInternal();
+
+      if (s.ok()) {
+        dbimpl_->logs_with_prep_tracker()->MarkLogAsHavingPrepSectionFlushed(
+            log_number_);
+      }
+    }
+    // prepare couldn't have taken place
+    Clear();
+  } else if (txn_state_ == COMMITTED) {
+    s = Status::InvalidArgument("This transaction has already been committed.");
+  } else {
+    s = Status::InvalidArgument(
+        "Two phase transaction is not in state for rollback.");
+  }
+
+  return s;
+}
+
+Status WriteCommittedTxn::RollbackInternal() {
+  WriteBatch rollback_marker;
+  auto s = WriteBatchInternal::MarkRollback(&rollback_marker, name_);
+  assert(s.ok());
+  s = db_impl_->WriteImpl(write_options_, &rollback_marker);
+  return s;
+}
+
+Status PessimisticTransaction::RollbackToSavePoint() {
+  if (txn_state_ != STARTED) {
+    return Status::InvalidArgument("Transaction is beyond state for rollback.");
+  }
+
+  if (save_points_ != nullptr && !save_points_->empty()) {
+    // Unlock any keys locked since last transaction
+    auto& save_point_tracker = *save_points_->top().new_locks_;
+    std::unique_ptr<LockTracker> t(
+        tracked_locks_->GetTrackedLocksSinceSavePoint(save_point_tracker));
+    if (t) {
+      txn_db_impl_->UnLock(this, *t);
+    }
+  }
+
+  return TransactionBaseImpl::RollbackToSavePoint();
+}
+
+// Lock all keys in this batch.
+// On success, caller should unlock keys_to_unlock
+Status PessimisticTransaction::LockBatch(WriteBatch* batch,
+                                         LockTracker* keys_to_unlock) {
+  if (!batch) {
+    return Status::InvalidArgument("batch is nullptr");
+  }
+
+  class Handler : public WriteBatch::Handler {
+   public:
+    // Sorted map of column_family_id to sorted set of keys.
+    // Since LockBatch() always locks keys in sorted order, it cannot deadlock
+    // with itself.  We're not using a comparator here since it doesn't matter
+    // what the sorting is as long as it's consistent.
+    std::map<uint32_t, std::set<std::string>> keys_;
+
+    Handler() {}
+
+    void RecordKey(uint32_t column_family_id, const Slice& key) {
+      std::string key_str = key.ToString();
+
+      auto& cfh_keys = keys_[column_family_id];
+      auto iter = cfh_keys.find(key_str);
+      if (iter == cfh_keys.end()) {
+        // key not yet seen, store it.
+        cfh_keys.insert({std::move(key_str)});
+      }
+    }
+
+    Status PutCF(uint32_t column_family_id, const Slice& key,
+                 const Slice& /* unused */) override {
+      RecordKey(column_family_id, key);
+      return Status::OK();
+    }
+    Status MergeCF(uint32_t column_family_id, const Slice& key,
+                   const Slice& /* unused */) override {
+      RecordKey(column_family_id, key);
+      return Status::OK();
+    }
+    Status DeleteCF(uint32_t column_family_id, const Slice& key) override {
+      RecordKey(column_family_id, key);
+      return Status::OK();
+    }
+  };
+
+  // Iterating on this handler will add all keys in this batch into keys
+  Handler handler;
+  Status s = batch->Iterate(&handler);
+  if (!s.ok()) {
+    return s;
+  }
+
+  // Attempt to lock all keys
+  for (const auto& cf_iter : handler.keys_) {
+    uint32_t cfh_id = cf_iter.first;
+    auto& cfh_keys = cf_iter.second;
+
+    for (const auto& key_iter : cfh_keys) {
+      const std::string& key = key_iter;
+
+      s = txn_db_impl_->TryLock(this, cfh_id, key, true /* exclusive */);
+      if (!s.ok()) {
+        break;
+      }
+      PointLockRequest r;
+      r.column_family_id = cfh_id;
+      r.key = key;
+      r.seq = kMaxSequenceNumber;
+      r.read_only = false;
+      r.exclusive = true;
+      keys_to_unlock->Track(r);
+    }
+
+    if (!s.ok()) {
+      break;
+    }
+  }
+
+  if (!s.ok()) {
+    txn_db_impl_->UnLock(this, *keys_to_unlock);
+  }
+
+  return s;
+}
+
+// Attempt to lock this key.
+// Returns OK if the key has been successfully locked.  Non-ok, otherwise.
+// If check_shapshot is true and this transaction has a snapshot set,
+// this key will only be locked if there have been no writes to this key since
+// the snapshot time.
+Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family,
+                                       const Slice& key, bool read_only,
+                                       bool exclusive, const bool do_validate,
+                                       const bool assume_tracked) {
+  assert(!assume_tracked || !do_validate);
+  Status s;
+  if (UNLIKELY(skip_concurrency_control_)) {
+    return s;
+  }
+  uint32_t cfh_id = GetColumnFamilyID(column_family);
+  std::string key_str = key.ToString();
+
+  PointLockStatus status;
+  bool lock_upgrade;
+  bool previously_locked;
+  if (tracked_locks_->IsPointLockSupported()) {
+    status = tracked_locks_->GetPointLockStatus(cfh_id, key_str);
+    previously_locked = status.locked;
+    lock_upgrade = previously_locked && exclusive && !status.exclusive;
+  } else {
+    // If the record is tracked, we can assume it was locked, too.
+    previously_locked = assume_tracked;
+    status.locked = false;
+    lock_upgrade = false;
+  }
+
+  // Lock this key if this transactions hasn't already locked it or we require
+  // an upgrade.
+  if (!previously_locked || lock_upgrade) {
+    s = txn_db_impl_->TryLock(this, cfh_id, key_str, exclusive);
+  }
+
+  const ColumnFamilyHandle* const cfh =
+      column_family ? column_family : db_impl_->DefaultColumnFamily();
+  assert(cfh);
+  const Comparator* const ucmp = cfh->GetComparator();
+  assert(ucmp);
+  size_t ts_sz = ucmp->timestamp_size();
+
+  SetSnapshotIfNeeded();
+
+  // Even though we do not care about doing conflict checking for this write,
+  // we still need to take a lock to make sure we do not cause a conflict with
+  // some other write.  However, we do not need to check if there have been
+  // any writes since this transaction's snapshot.
+  // TODO(agiardullo): could optimize by supporting shared txn locks in the
+  // future.
+  SequenceNumber tracked_at_seq =
+      status.locked ? status.seq : kMaxSequenceNumber;
+  if (!do_validate || (snapshot_ == nullptr &&
+                       (0 == ts_sz || kMaxTxnTimestamp == read_timestamp_))) {
+    if (assume_tracked && !previously_locked &&
+        tracked_locks_->IsPointLockSupported()) {
+      s = Status::InvalidArgument(
+          "assume_tracked is set but it is not tracked yet");
+    }
+    // Need to remember the earliest sequence number that we know that this
+    // key has not been modified after.  This is useful if this same
+    // transaction later tries to lock this key again.
+    if (tracked_at_seq == kMaxSequenceNumber) {
+      // Since we haven't checked a snapshot, we only know this key has not
+      // been modified since after we locked it.
+      // Note: when last_seq_same_as_publish_seq_==false this is less than the
+      // latest allocated seq but it is ok since i) this is just a heuristic
+      // used only as a hint to avoid actual check for conflicts, ii) this would
+      // cause a false positive only if the snapthot is taken right after the
+      // lock, which would be an unusual sequence.
+      tracked_at_seq = db_->GetLatestSequenceNumber();
+    }
+  } else if (s.ok()) {
+    // If a snapshot is set, we need to make sure the key hasn't been modified
+    // since the snapshot.  This must be done after we locked the key.
+    // If we already have validated an earilier snapshot it must has been
+    // reflected in tracked_at_seq and ValidateSnapshot will return OK.
+    s = ValidateSnapshot(column_family, key, &tracked_at_seq);
+
+    if (!s.ok()) {
+      // Failed to validate key
+      // Unlock key we just locked
+      if (lock_upgrade) {
+        s = txn_db_impl_->TryLock(this, cfh_id, key_str, false /* exclusive */);
+        assert(s.ok());
+      } else if (!previously_locked) {
+        txn_db_impl_->UnLock(this, cfh_id, key.ToString());
+      }
+    }
+  }
+
+  if (s.ok()) {
+    // We must track all the locked keys so that we can unlock them later. If
+    // the key is already locked, this func will update some stats on the
+    // tracked key. It could also update the tracked_at_seq if it is lower
+    // than the existing tracked key seq. These stats are necessary for
+    // RollbackToSavePoint to determine whether a key can be safely removed
+    // from tracked_keys_. Removal can only be done if a key was only locked
+    // during the current savepoint.
+    //
+    // Recall that if assume_tracked is true, we assume that TrackKey has been
+    // called previously since the last savepoint, with the same exclusive
+    // setting, and at a lower sequence number, so skipping here should be
+    // safe.
+    if (!assume_tracked) {
+      TrackKey(cfh_id, key_str, tracked_at_seq, read_only, exclusive);
+    } else {
+#ifndef NDEBUG
+      if (tracked_locks_->IsPointLockSupported()) {
+        PointLockStatus lock_status =
+            tracked_locks_->GetPointLockStatus(cfh_id, key_str);
+        assert(lock_status.locked);
+        assert(lock_status.seq <= tracked_at_seq);
+        assert(lock_status.exclusive == exclusive);
+      }
+#endif
+    }
+  }
+
+  return s;
+}
+
+Status PessimisticTransaction::GetRangeLock(ColumnFamilyHandle* column_family,
+                                            const Endpoint& start_endp,
+                                            const Endpoint& end_endp) {
+  ColumnFamilyHandle* cfh =
+      column_family ? column_family : db_impl_->DefaultColumnFamily();
+  uint32_t cfh_id = GetColumnFamilyID(cfh);
+
+  Status s = txn_db_impl_->TryRangeLock(this, cfh_id, start_endp, end_endp);
+
+  if (s.ok()) {
+    RangeLockRequest req{cfh_id, start_endp, end_endp};
+    tracked_locks_->Track(req);
+  }
+  return s;
+}
+
+// Return OK() if this key has not been modified more recently than the
+// transaction snapshot_.
+// tracked_at_seq is the global seq at which we either locked the key or already
+// have done ValidateSnapshot.
+Status PessimisticTransaction::ValidateSnapshot(
+    ColumnFamilyHandle* column_family, const Slice& key,
+    SequenceNumber* tracked_at_seq) {
+  assert(snapshot_ || read_timestamp_ < kMaxTxnTimestamp);
+
+  SequenceNumber snap_seq = 0;
+  if (snapshot_) {
+    snap_seq = snapshot_->GetSequenceNumber();
+    if (*tracked_at_seq <= snap_seq) {
+      // If the key has been previous validated (or locked) at a sequence number
+      // earlier than the current snapshot's sequence number, we already know it
+      // has not been modified aftter snap_seq either.
+      return Status::OK();
+    }
+  } else {
+    snap_seq = db_impl_->GetLatestSequenceNumber();
+  }
+
+  // Otherwise we have either
+  // 1: tracked_at_seq == kMaxSequenceNumber, i.e., first time tracking the key
+  // 2: snap_seq < tracked_at_seq: last time we lock the key was via
+  // do_validate=false which means we had skipped ValidateSnapshot. In both
+  // cases we should do ValidateSnapshot now.
+
+  *tracked_at_seq = snap_seq;
+
+  ColumnFamilyHandle* cfh =
+      column_family ? column_family : db_impl_->DefaultColumnFamily();
+
+  assert(cfh);
+  const Comparator* const ucmp = cfh->GetComparator();
+  assert(ucmp);
+  size_t ts_sz = ucmp->timestamp_size();
+  std::string ts_buf;
+  if (ts_sz > 0 && read_timestamp_ < kMaxTxnTimestamp) {
+    assert(ts_sz == sizeof(read_timestamp_));
+    PutFixed64(&ts_buf, read_timestamp_);
+  }
+
+  return TransactionUtil::CheckKeyForConflicts(
+      db_impl_, cfh, key.ToString(), snap_seq, ts_sz == 0 ? nullptr : &ts_buf,
+      false /* cache_only */);
+}
+
+bool PessimisticTransaction::TryStealingLocks() {
+  assert(IsExpired());
+  TransactionState expected = STARTED;
+  return std::atomic_compare_exchange_strong(&txn_state_, &expected,
+                                             LOCKS_STOLEN);
+}
+
+void PessimisticTransaction::UnlockGetForUpdate(
+    ColumnFamilyHandle* column_family, const Slice& key) {
+  txn_db_impl_->UnLock(this, GetColumnFamilyID(column_family), key.ToString());
+}
+
+Status PessimisticTransaction::SetName(const TransactionName& name) {
+  Status s;
+  if (txn_state_ == STARTED) {
+    if (name_.length()) {
+      s = Status::InvalidArgument("Transaction has already been named.");
+    } else if (txn_db_impl_->GetTransactionByName(name) != nullptr) {
+      s = Status::InvalidArgument("Transaction name must be unique.");
+    } else if (name.length() < 1 || name.length() > 512) {
+      s = Status::InvalidArgument(
+          "Transaction name length must be between 1 and 512 chars.");
+    } else {
+      name_ = name;
+      txn_db_impl_->RegisterTransaction(this);
+    }
+  } else {
+    s = Status::InvalidArgument("Transaction is beyond state for naming.");
+  }
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction.h
new file mode 100644
index 0000000000..ffff50974a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction.h
@@ -0,0 +1,311 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <algorithm>
+#include <atomic>
+#include <mutex>
+#include <stack>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "db/write_callback.h"
+#include "rocksdb/db.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/snapshot.h"
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+#include "rocksdb/utilities/transaction.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "rocksdb/utilities/write_batch_with_index.h"
+#include "util/autovector.h"
+#include "utilities/transactions/transaction_base.h"
+#include "utilities/transactions/transaction_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class PessimisticTransactionDB;
+
+// A transaction under pessimistic concurrency control. This class implements
+// the locking API and interfaces with the lock manager as well as the
+// pessimistic transactional db.
+class PessimisticTransaction : public TransactionBaseImpl {
+ public:
+  PessimisticTransaction(TransactionDB* db, const WriteOptions& write_options,
+                         const TransactionOptions& txn_options,
+                         const bool init = true);
+  // No copying allowed
+  PessimisticTransaction(const PessimisticTransaction&) = delete;
+  void operator=(const PessimisticTransaction&) = delete;
+
+  ~PessimisticTransaction() override;
+
+  void Reinitialize(TransactionDB* txn_db, const WriteOptions& write_options,
+                    const TransactionOptions& txn_options);
+
+  Status Prepare() override;
+
+  Status Commit() override;
+
+  // It is basically Commit without going through Prepare phase. The write batch
+  // is also directly provided instead of expecting txn to gradually batch the
+  // transactions writes to an internal write batch.
+  Status CommitBatch(WriteBatch* batch);
+
+  Status Rollback() override;
+
+  Status RollbackToSavePoint() override;
+
+  Status SetName(const TransactionName& name) override;
+
+  // Generate a new unique transaction identifier
+  static TransactionID GenTxnID();
+
+  TransactionID GetID() const override { return txn_id_; }
+
+  std::vector<TransactionID> GetWaitingTxns(uint32_t* column_family_id,
+                                            std::string* key) const override {
+    std::lock_guard<std::mutex> lock(wait_mutex_);
+    std::vector<TransactionID> ids(waiting_txn_ids_.size());
+    if (key) *key = waiting_key_ ? *waiting_key_ : "";
+    if (column_family_id) *column_family_id = waiting_cf_id_;
+    std::copy(waiting_txn_ids_.begin(), waiting_txn_ids_.end(), ids.begin());
+    return ids;
+  }
+
+  void SetWaitingTxn(autovector<TransactionID> ids, uint32_t column_family_id,
+                     const std::string* key) {
+    std::lock_guard<std::mutex> lock(wait_mutex_);
+    waiting_txn_ids_ = ids;
+    waiting_cf_id_ = column_family_id;
+    waiting_key_ = key;
+  }
+
+  void ClearWaitingTxn() {
+    std::lock_guard<std::mutex> lock(wait_mutex_);
+    waiting_txn_ids_.clear();
+    waiting_cf_id_ = 0;
+    waiting_key_ = nullptr;
+  }
+
+  // Returns the time (in microseconds according to Env->GetMicros())
+  // that this transaction will be expired.  Returns 0 if this transaction does
+  // not expire.
+  uint64_t GetExpirationTime() const { return expiration_time_; }
+
+  // returns true if this transaction has an expiration_time and has expired.
+  bool IsExpired() const;
+
+  // Returns the number of microseconds a transaction can wait on acquiring a
+  // lock or -1 if there is no timeout.
+  int64_t GetLockTimeout() const { return lock_timeout_; }
+  void SetLockTimeout(int64_t timeout) override {
+    lock_timeout_ = timeout * 1000;
+  }
+
+  // Returns true if locks were stolen successfully, false otherwise.
+  bool TryStealingLocks();
+
+  bool IsDeadlockDetect() const override { return deadlock_detect_; }
+
+  int64_t GetDeadlockDetectDepth() const { return deadlock_detect_depth_; }
+
+  virtual Status GetRangeLock(ColumnFamilyHandle* column_family,
+                              const Endpoint& start_key,
+                              const Endpoint& end_key) override;
+
+ protected:
+  // Refer to
+  // TransactionOptions::use_only_the_last_commit_time_batch_for_recovery
+  bool use_only_the_last_commit_time_batch_for_recovery_ = false;
+  // Refer to
+  // TransactionOptions::skip_prepare
+  bool skip_prepare_ = false;
+
+  virtual Status PrepareInternal() = 0;
+
+  virtual Status CommitWithoutPrepareInternal() = 0;
+
+  // batch_cnt if non-zero is the number of sub-batches. A sub-batch is a batch
+  // with no duplicate keys. If zero, then the number of sub-batches is unknown.
+  virtual Status CommitBatchInternal(WriteBatch* batch,
+                                     size_t batch_cnt = 0) = 0;
+
+  virtual Status CommitInternal() = 0;
+
+  virtual Status RollbackInternal() = 0;
+
+  virtual void Initialize(const TransactionOptions& txn_options);
+
+  Status LockBatch(WriteBatch* batch, LockTracker* keys_to_unlock);
+
+  Status TryLock(ColumnFamilyHandle* column_family, const Slice& key,
+                 bool read_only, bool exclusive, const bool do_validate = true,
+                 const bool assume_tracked = false) override;
+
+  void Clear() override;
+
+  PessimisticTransactionDB* txn_db_impl_;
+  DBImpl* db_impl_;
+
+  // If non-zero, this transaction should not be committed after this time (in
+  // microseconds according to Env->NowMicros())
+  uint64_t expiration_time_;
+
+  // Timestamp used by the transaction to perform all GetForUpdate.
+  // Use this timestamp for conflict checking.
+  // read_timestamp_ == kMaxTxnTimestamp means this transaction has not
+  // performed any GetForUpdate. It is possible that the transaction has
+  // performed blind writes or Get, though.
+  TxnTimestamp read_timestamp_{kMaxTxnTimestamp};
+  TxnTimestamp commit_timestamp_{kMaxTxnTimestamp};
+
+ private:
+  friend class TransactionTest_ValidateSnapshotTest_Test;
+  // Used to create unique ids for transactions.
+  static std::atomic<TransactionID> txn_id_counter_;
+
+  // Unique ID for this transaction
+  TransactionID txn_id_;
+
+  // IDs for the transactions that are blocking the current transaction.
+  //
+  // empty if current transaction is not waiting.
+  autovector<TransactionID> waiting_txn_ids_;
+
+  // The following two represents the (cf, key) that a transaction is waiting
+  // on.
+  //
+  // If waiting_key_ is not null, then the pointer should always point to
+  // a valid string object. The reason is that it is only non-null when the
+  // transaction is blocked in the PointLockManager::AcquireWithTimeout
+  // function. At that point, the key string object is one of the function
+  // parameters.
+  uint32_t waiting_cf_id_;
+  const std::string* waiting_key_;
+
+  // Mutex protecting waiting_txn_ids_, waiting_cf_id_ and waiting_key_.
+  mutable std::mutex wait_mutex_;
+
+  // Timeout in microseconds when locking a key or -1 if there is no timeout.
+  int64_t lock_timeout_;
+
+  // Whether to perform deadlock detection or not.
+  bool deadlock_detect_;
+
+  // Whether to perform deadlock detection or not.
+  int64_t deadlock_detect_depth_;
+
+  // Refer to TransactionOptions::skip_concurrency_control
+  bool skip_concurrency_control_;
+
+  virtual Status ValidateSnapshot(ColumnFamilyHandle* column_family,
+                                  const Slice& key,
+                                  SequenceNumber* tracked_at_seq);
+
+  void UnlockGetForUpdate(ColumnFamilyHandle* column_family,
+                          const Slice& key) override;
+};
+
+class WriteCommittedTxn : public PessimisticTransaction {
+ public:
+  WriteCommittedTxn(TransactionDB* db, const WriteOptions& write_options,
+                    const TransactionOptions& txn_options);
+  // No copying allowed
+  WriteCommittedTxn(const WriteCommittedTxn&) = delete;
+  void operator=(const WriteCommittedTxn&) = delete;
+
+  ~WriteCommittedTxn() override {}
+
+  using TransactionBaseImpl::GetForUpdate;
+  Status GetForUpdate(const ReadOptions& read_options,
+                      ColumnFamilyHandle* column_family, const Slice& key,
+                      std::string* value, bool exclusive,
+                      const bool do_validate) override;
+  Status GetForUpdate(const ReadOptions& read_options,
+                      ColumnFamilyHandle* column_family, const Slice& key,
+                      PinnableSlice* pinnable_val, bool exclusive,
+                      const bool do_validate) override;
+
+  using TransactionBaseImpl::Put;
+  // `key` does NOT include timestamp even when it's enabled.
+  Status Put(ColumnFamilyHandle* column_family, const Slice& key,
+             const Slice& value, const bool assume_tracked = false) override;
+  Status Put(ColumnFamilyHandle* column_family, const SliceParts& key,
+             const SliceParts& value,
+             const bool assume_tracked = false) override;
+
+  using TransactionBaseImpl::PutUntracked;
+  Status PutUntracked(ColumnFamilyHandle* column_family, const Slice& key,
+                      const Slice& value) override;
+  Status PutUntracked(ColumnFamilyHandle* column_family, const SliceParts& key,
+                      const SliceParts& value) override;
+
+  using TransactionBaseImpl::Delete;
+  // `key` does NOT include timestamp even when it's enabled.
+  Status Delete(ColumnFamilyHandle* column_family, const Slice& key,
+                const bool assume_tracked = false) override;
+  Status Delete(ColumnFamilyHandle* column_family, const SliceParts& key,
+                const bool assume_tracked = false) override;
+
+  using TransactionBaseImpl::DeleteUntracked;
+  Status DeleteUntracked(ColumnFamilyHandle* column_family,
+                         const Slice& key) override;
+  Status DeleteUntracked(ColumnFamilyHandle* column_family,
+                         const SliceParts& key) override;
+
+  using TransactionBaseImpl::SingleDelete;
+  // `key` does NOT include timestamp even when it's enabled.
+  Status SingleDelete(ColumnFamilyHandle* column_family, const Slice& key,
+                      const bool assume_tracked = false) override;
+  Status SingleDelete(ColumnFamilyHandle* column_family, const SliceParts& key,
+                      const bool assume_tracked = false) override;
+
+  using TransactionBaseImpl::SingleDeleteUntracked;
+  Status SingleDeleteUntracked(ColumnFamilyHandle* column_family,
+                               const Slice& key) override;
+
+  using TransactionBaseImpl::Merge;
+  Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
+               const Slice& value, const bool assume_tracked = false) override;
+
+  Status SetReadTimestampForValidation(TxnTimestamp ts) override;
+  Status SetCommitTimestamp(TxnTimestamp ts) override;
+  TxnTimestamp GetCommitTimestamp() const override { return commit_timestamp_; }
+
+ private:
+  template <typename TValue>
+  Status GetForUpdateImpl(const ReadOptions& read_options,
+                          ColumnFamilyHandle* column_family, const Slice& key,
+                          TValue* value, bool exclusive,
+                          const bool do_validate);
+
+  template <typename TKey, typename TOperation>
+  Status Operate(ColumnFamilyHandle* column_family, const TKey& key,
+                 const bool do_validate, const bool assume_tracked,
+                 TOperation&& operation);
+
+  Status PrepareInternal() override;
+
+  Status CommitWithoutPrepareInternal() override;
+
+  Status CommitBatchInternal(WriteBatch* batch, size_t batch_cnt) override;
+
+  Status CommitInternal() override;
+
+  Status RollbackInternal() override;
+
+  // Column families that enable timestamps and whose data are written when
+  // indexing_enabled_ is false. If a key is written when indexing_enabled_ is
+  // true, then the corresponding column family is not added to cfs_with_ts
+  // even if it enables timestamp.
+  std::unordered_set<uint32_t> cfs_with_ts_tracked_when_indexing_disabled_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction_db.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction_db.cc
new file mode 100644
index 0000000000..8009bef197
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction_db.cc
@@ -0,0 +1,780 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/transactions/pessimistic_transaction_db.h"
+
+#include <cinttypes>
+#include <sstream>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "db/db_impl/db_impl.h"
+#include "logging/logging.h"
+#include "rocksdb/db.h"
+#include "rocksdb/options.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "test_util/sync_point.h"
+#include "util/cast_util.h"
+#include "util/mutexlock.h"
+#include "utilities/transactions/pessimistic_transaction.h"
+#include "utilities/transactions/transaction_db_mutex_impl.h"
+#include "utilities/transactions/write_prepared_txn_db.h"
+#include "utilities/transactions/write_unprepared_txn_db.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+PessimisticTransactionDB::PessimisticTransactionDB(
+    DB* db, const TransactionDBOptions& txn_db_options)
+    : TransactionDB(db),
+      db_impl_(static_cast_with_check<DBImpl>(db)),
+      txn_db_options_(txn_db_options),
+      lock_manager_(NewLockManager(this, txn_db_options)) {
+  assert(db_impl_ != nullptr);
+  info_log_ = db_impl_->GetDBOptions().info_log;
+}
+
+// Support initiliazing PessimisticTransactionDB from a stackable db
+//
+//    PessimisticTransactionDB
+//     ^        ^
+//     |        |
+//     |        +
+//     |   StackableDB
+//     |   ^
+//     |   |
+//     +   +
+//     DBImpl
+//       ^
+//       |(inherit)
+//       +
+//       DB
+//
+PessimisticTransactionDB::PessimisticTransactionDB(
+    StackableDB* db, const TransactionDBOptions& txn_db_options)
+    : TransactionDB(db),
+      db_impl_(static_cast_with_check<DBImpl>(db->GetRootDB())),
+      txn_db_options_(txn_db_options),
+      lock_manager_(NewLockManager(this, txn_db_options)) {
+  assert(db_impl_ != nullptr);
+}
+
+PessimisticTransactionDB::~PessimisticTransactionDB() {
+  while (!transactions_.empty()) {
+    delete transactions_.begin()->second;
+    // TODO(myabandeh): this seems to be an unsafe approach as it is not quite
+    // clear whether delete would also remove the entry from transactions_.
+  }
+}
+
+Status PessimisticTransactionDB::VerifyCFOptions(
+    const ColumnFamilyOptions& cf_options) {
+  const Comparator* const ucmp = cf_options.comparator;
+  assert(ucmp);
+  size_t ts_sz = ucmp->timestamp_size();
+  if (0 == ts_sz) {
+    return Status::OK();
+  }
+  if (ts_sz != sizeof(TxnTimestamp)) {
+    std::ostringstream oss;
+    oss << "Timestamp of transaction must have " << sizeof(TxnTimestamp)
+        << " bytes. CF comparator " << std::string(ucmp->Name())
+        << " timestamp size is " << ts_sz << " bytes";
+    return Status::InvalidArgument(oss.str());
+  }
+  if (txn_db_options_.write_policy != WRITE_COMMITTED) {
+    return Status::NotSupported("Only WriteCommittedTxn supports timestamp");
+  }
+  return Status::OK();
+}
+
+Status PessimisticTransactionDB::Initialize(
+    const std::vector<size_t>& compaction_enabled_cf_indices,
+    const std::vector<ColumnFamilyHandle*>& handles) {
+  for (auto cf_ptr : handles) {
+    AddColumnFamily(cf_ptr);
+  }
+  // Verify cf options
+  for (auto handle : handles) {
+    ColumnFamilyDescriptor cfd;
+    Status s = handle->GetDescriptor(&cfd);
+    if (!s.ok()) {
+      return s;
+    }
+    s = VerifyCFOptions(cfd.options);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  // Re-enable compaction for the column families that initially had
+  // compaction enabled.
+  std::vector<ColumnFamilyHandle*> compaction_enabled_cf_handles;
+  compaction_enabled_cf_handles.reserve(compaction_enabled_cf_indices.size());
+  for (auto index : compaction_enabled_cf_indices) {
+    compaction_enabled_cf_handles.push_back(handles[index]);
+  }
+
+  Status s = EnableAutoCompaction(compaction_enabled_cf_handles);
+
+  // create 'real' transactions from recovered shell transactions
+  auto dbimpl = static_cast_with_check<DBImpl>(GetRootDB());
+  assert(dbimpl != nullptr);
+  auto rtrxs = dbimpl->recovered_transactions();
+
+  for (auto it = rtrxs.begin(); it != rtrxs.end(); ++it) {
+    auto recovered_trx = it->second;
+    assert(recovered_trx);
+    assert(recovered_trx->batches_.size() == 1);
+    const auto& seq = recovered_trx->batches_.begin()->first;
+    const auto& batch_info = recovered_trx->batches_.begin()->second;
+    assert(batch_info.log_number_);
+    assert(recovered_trx->name_.length());
+
+    WriteOptions w_options;
+    w_options.sync = true;
+    TransactionOptions t_options;
+    // This would help avoiding deadlock for keys that although exist in the WAL
+    // did not go through concurrency control. This includes the merge that
+    // MyRocks uses for auto-inc columns. It is safe to do so, since (i) if
+    // there is a conflict between the keys of two transactions that must be
+    // avoided, it is already avoided by the application, MyRocks, before the
+    // restart (ii) application, MyRocks, guarntees to rollback/commit the
+    // recovered transactions before new transactions start.
+    t_options.skip_concurrency_control = true;
+
+    Transaction* real_trx = BeginTransaction(w_options, t_options, nullptr);
+    assert(real_trx);
+    real_trx->SetLogNumber(batch_info.log_number_);
+    assert(seq != kMaxSequenceNumber);
+    if (GetTxnDBOptions().write_policy != WRITE_COMMITTED) {
+      real_trx->SetId(seq);
+    }
+
+    s = real_trx->SetName(recovered_trx->name_);
+    if (!s.ok()) {
+      break;
+    }
+
+    s = real_trx->RebuildFromWriteBatch(batch_info.batch_);
+    // WriteCommitted set this to to disable this check that is specific to
+    // WritePrepared txns
+    assert(batch_info.batch_cnt_ == 0 ||
+           real_trx->GetWriteBatch()->SubBatchCnt() == batch_info.batch_cnt_);
+    real_trx->SetState(Transaction::PREPARED);
+    if (!s.ok()) {
+      break;
+    }
+  }
+  if (s.ok()) {
+    dbimpl->DeleteAllRecoveredTransactions();
+  }
+  return s;
+}
+
+Transaction* WriteCommittedTxnDB::BeginTransaction(
+    const WriteOptions& write_options, const TransactionOptions& txn_options,
+    Transaction* old_txn) {
+  if (old_txn != nullptr) {
+    ReinitializeTransaction(old_txn, write_options, txn_options);
+    return old_txn;
+  } else {
+    return new WriteCommittedTxn(this, write_options, txn_options);
+  }
+}
+
+TransactionDBOptions PessimisticTransactionDB::ValidateTxnDBOptions(
+    const TransactionDBOptions& txn_db_options) {
+  TransactionDBOptions validated = txn_db_options;
+
+  if (txn_db_options.num_stripes == 0) {
+    validated.num_stripes = 1;
+  }
+
+  return validated;
+}
+
+Status TransactionDB::Open(const Options& options,
+                           const TransactionDBOptions& txn_db_options,
+                           const std::string& dbname, TransactionDB** dbptr) {
+  DBOptions db_options(options);
+  ColumnFamilyOptions cf_options(options);
+  std::vector<ColumnFamilyDescriptor> column_families;
+  column_families.push_back(
+      ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options));
+  std::vector<ColumnFamilyHandle*> handles;
+  Status s = TransactionDB::Open(db_options, txn_db_options, dbname,
+                                 column_families, &handles, dbptr);
+  if (s.ok()) {
+    assert(handles.size() == 1);
+    // i can delete the handle since DBImpl is always holding a reference to
+    // default column family
+    delete handles[0];
+  }
+
+  return s;
+}
+
+Status TransactionDB::Open(
+    const DBOptions& db_options, const TransactionDBOptions& txn_db_options,
+    const std::string& dbname,
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    std::vector<ColumnFamilyHandle*>* handles, TransactionDB** dbptr) {
+  Status s;
+  DB* db = nullptr;
+  if (txn_db_options.write_policy == WRITE_COMMITTED &&
+      db_options.unordered_write) {
+    return Status::NotSupported(
+        "WRITE_COMMITTED is incompatible with unordered_writes");
+  }
+  if (txn_db_options.write_policy == WRITE_UNPREPARED &&
+      db_options.unordered_write) {
+    // TODO(lth): support it
+    return Status::NotSupported(
+        "WRITE_UNPREPARED is currently incompatible with unordered_writes");
+  }
+  if (txn_db_options.write_policy == WRITE_PREPARED &&
+      db_options.unordered_write && !db_options.two_write_queues) {
+    return Status::NotSupported(
+        "WRITE_PREPARED is incompatible with unordered_writes if "
+        "two_write_queues is not enabled.");
+  }
+
+  std::vector<ColumnFamilyDescriptor> column_families_copy = column_families;
+  std::vector<size_t> compaction_enabled_cf_indices;
+  DBOptions db_options_2pc = db_options;
+  PrepareWrap(&db_options_2pc, &column_families_copy,
+              &compaction_enabled_cf_indices);
+  const bool use_seq_per_batch =
+      txn_db_options.write_policy == WRITE_PREPARED ||
+      txn_db_options.write_policy == WRITE_UNPREPARED;
+  const bool use_batch_per_txn =
+      txn_db_options.write_policy == WRITE_COMMITTED ||
+      txn_db_options.write_policy == WRITE_PREPARED;
+  s = DBImpl::Open(db_options_2pc, dbname, column_families_copy, handles, &db,
+                   use_seq_per_batch, use_batch_per_txn);
+  if (s.ok()) {
+    ROCKS_LOG_WARN(db->GetDBOptions().info_log,
+                   "Transaction write_policy is %" PRId32,
+                   static_cast<int>(txn_db_options.write_policy));
+    // if WrapDB return non-ok, db will be deleted in WrapDB() via
+    // ~StackableDB().
+    s = WrapDB(db, txn_db_options, compaction_enabled_cf_indices, *handles,
+               dbptr);
+  }
+  return s;
+}
+
+void TransactionDB::PrepareWrap(
+    DBOptions* db_options, std::vector<ColumnFamilyDescriptor>* column_families,
+    std::vector<size_t>* compaction_enabled_cf_indices) {
+  compaction_enabled_cf_indices->clear();
+
+  // Enable MemTable History if not already enabled
+  for (size_t i = 0; i < column_families->size(); i++) {
+    ColumnFamilyOptions* cf_options = &(*column_families)[i].options;
+
+    if (cf_options->max_write_buffer_size_to_maintain == 0 &&
+        cf_options->max_write_buffer_number_to_maintain == 0) {
+      // Setting to -1 will set the History size to
+      // max_write_buffer_number * write_buffer_size.
+      cf_options->max_write_buffer_size_to_maintain = -1;
+    }
+    if (!cf_options->disable_auto_compactions) {
+      // Disable compactions momentarily to prevent race with DB::Open
+      cf_options->disable_auto_compactions = true;
+      compaction_enabled_cf_indices->push_back(i);
+    }
+  }
+  db_options->allow_2pc = true;
+}
+
+namespace {
+template <typename DBType>
+Status WrapAnotherDBInternal(
+    DBType* db, const TransactionDBOptions& txn_db_options,
+    const std::vector<size_t>& compaction_enabled_cf_indices,
+    const std::vector<ColumnFamilyHandle*>& handles, TransactionDB** dbptr) {
+  assert(db != nullptr);
+  assert(dbptr != nullptr);
+  *dbptr = nullptr;
+  std::unique_ptr<PessimisticTransactionDB> txn_db;
+  // txn_db owns object pointed to by the raw db pointer.
+  switch (txn_db_options.write_policy) {
+    case WRITE_UNPREPARED:
+      txn_db.reset(new WriteUnpreparedTxnDB(
+          db, PessimisticTransactionDB::ValidateTxnDBOptions(txn_db_options)));
+      break;
+    case WRITE_PREPARED:
+      txn_db.reset(new WritePreparedTxnDB(
+          db, PessimisticTransactionDB::ValidateTxnDBOptions(txn_db_options)));
+      break;
+    case WRITE_COMMITTED:
+    default:
+      txn_db.reset(new WriteCommittedTxnDB(
+          db, PessimisticTransactionDB::ValidateTxnDBOptions(txn_db_options)));
+  }
+  txn_db->UpdateCFComparatorMap(handles);
+  Status s = txn_db->Initialize(compaction_enabled_cf_indices, handles);
+  // In case of a failure at this point, db is deleted via the txn_db destructor
+  // and set to nullptr.
+  if (s.ok()) {
+    *dbptr = txn_db.release();
+  } else {
+    for (auto* h : handles) {
+      delete h;
+    }
+    // txn_db still owns db, and ~StackableDB() will be called when txn_db goes
+    // out of scope, deleting the input db pointer.
+    ROCKS_LOG_FATAL(db->GetDBOptions().info_log,
+                    "Failed to initialize txn_db: %s", s.ToString().c_str());
+  }
+  return s;
+}
+}  // namespace
+
+Status TransactionDB::WrapDB(
+    // make sure this db is already opened with memtable history enabled,
+    // auto compaction distabled and 2 phase commit enabled
+    DB* db, const TransactionDBOptions& txn_db_options,
+    const std::vector<size_t>& compaction_enabled_cf_indices,
+    const std::vector<ColumnFamilyHandle*>& handles, TransactionDB** dbptr) {
+  return WrapAnotherDBInternal(db, txn_db_options,
+                               compaction_enabled_cf_indices, handles, dbptr);
+}
+
+Status TransactionDB::WrapStackableDB(
+    // make sure this stackable_db is already opened with memtable history
+    // enabled, auto compaction distabled and 2 phase commit enabled
+    StackableDB* db, const TransactionDBOptions& txn_db_options,
+    const std::vector<size_t>& compaction_enabled_cf_indices,
+    const std::vector<ColumnFamilyHandle*>& handles, TransactionDB** dbptr) {
+  return WrapAnotherDBInternal(db, txn_db_options,
+                               compaction_enabled_cf_indices, handles, dbptr);
+}
+
+// Let LockManager know that this column family exists so it can
+// allocate a LockMap for it.
+void PessimisticTransactionDB::AddColumnFamily(
+    const ColumnFamilyHandle* handle) {
+  lock_manager_->AddColumnFamily(handle);
+}
+
+Status PessimisticTransactionDB::CreateColumnFamily(
+    const ColumnFamilyOptions& options, const std::string& column_family_name,
+    ColumnFamilyHandle** handle) {
+  InstrumentedMutexLock l(&column_family_mutex_);
+  Status s = VerifyCFOptions(options);
+  if (!s.ok()) {
+    return s;
+  }
+
+  s = db_->CreateColumnFamily(options, column_family_name, handle);
+  if (s.ok()) {
+    lock_manager_->AddColumnFamily(*handle);
+    UpdateCFComparatorMap(*handle);
+  }
+
+  return s;
+}
+
+Status PessimisticTransactionDB::CreateColumnFamilies(
+    const ColumnFamilyOptions& options,
+    const std::vector<std::string>& column_family_names,
+    std::vector<ColumnFamilyHandle*>* handles) {
+  InstrumentedMutexLock l(&column_family_mutex_);
+
+  Status s = VerifyCFOptions(options);
+  if (!s.ok()) {
+    return s;
+  }
+
+  s = db_->CreateColumnFamilies(options, column_family_names, handles);
+  if (s.ok()) {
+    for (auto* handle : *handles) {
+      lock_manager_->AddColumnFamily(handle);
+      UpdateCFComparatorMap(handle);
+    }
+  }
+
+  return s;
+}
+
+Status PessimisticTransactionDB::CreateColumnFamilies(
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    std::vector<ColumnFamilyHandle*>* handles) {
+  InstrumentedMutexLock l(&column_family_mutex_);
+
+  for (auto& cf_desc : column_families) {
+    Status s = VerifyCFOptions(cf_desc.options);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  Status s = db_->CreateColumnFamilies(column_families, handles);
+  if (s.ok()) {
+    for (auto* handle : *handles) {
+      lock_manager_->AddColumnFamily(handle);
+      UpdateCFComparatorMap(handle);
+    }
+  }
+
+  return s;
+}
+
+// Let LockManager know that it can deallocate the LockMap for this
+// column family.
+Status PessimisticTransactionDB::DropColumnFamily(
+    ColumnFamilyHandle* column_family) {
+  InstrumentedMutexLock l(&column_family_mutex_);
+
+  Status s = db_->DropColumnFamily(column_family);
+  if (s.ok()) {
+    lock_manager_->RemoveColumnFamily(column_family);
+  }
+
+  return s;
+}
+
+Status PessimisticTransactionDB::DropColumnFamilies(
+    const std::vector<ColumnFamilyHandle*>& column_families) {
+  InstrumentedMutexLock l(&column_family_mutex_);
+
+  Status s = db_->DropColumnFamilies(column_families);
+  if (s.ok()) {
+    for (auto* handle : column_families) {
+      lock_manager_->RemoveColumnFamily(handle);
+    }
+  }
+
+  return s;
+}
+
+Status PessimisticTransactionDB::TryLock(PessimisticTransaction* txn,
+                                         uint32_t cfh_id,
+                                         const std::string& key,
+                                         bool exclusive) {
+  return lock_manager_->TryLock(txn, cfh_id, key, GetEnv(), exclusive);
+}
+
+Status PessimisticTransactionDB::TryRangeLock(PessimisticTransaction* txn,
+                                              uint32_t cfh_id,
+                                              const Endpoint& start_endp,
+                                              const Endpoint& end_endp) {
+  return lock_manager_->TryLock(txn, cfh_id, start_endp, end_endp, GetEnv(),
+                                /*exclusive=*/true);
+}
+
+void PessimisticTransactionDB::UnLock(PessimisticTransaction* txn,
+                                      const LockTracker& keys) {
+  lock_manager_->UnLock(txn, keys, GetEnv());
+}
+
+void PessimisticTransactionDB::UnLock(PessimisticTransaction* txn,
+                                      uint32_t cfh_id, const std::string& key) {
+  lock_manager_->UnLock(txn, cfh_id, key, GetEnv());
+}
+
+// Used when wrapping DB write operations in a transaction
+Transaction* PessimisticTransactionDB::BeginInternalTransaction(
+    const WriteOptions& options) {
+  TransactionOptions txn_options;
+  Transaction* txn = BeginTransaction(options, txn_options, nullptr);
+
+  // Use default timeout for non-transactional writes
+  txn->SetLockTimeout(txn_db_options_.default_lock_timeout);
+  return txn;
+}
+
+// All user Put, Merge, Delete, and Write requests must be intercepted to make
+// sure that they lock all keys that they are writing to avoid causing conflicts
+// with any concurrent transactions. The easiest way to do this is to wrap all
+// write operations in a transaction.
+//
+// Put(), Merge(), and Delete() only lock a single key per call.  Write() will
+// sort its keys before locking them.  This guarantees that TransactionDB write
+// methods cannot deadlock with each other (but still could deadlock with a
+// Transaction).
+Status PessimisticTransactionDB::Put(const WriteOptions& options,
+                                     ColumnFamilyHandle* column_family,
+                                     const Slice& key, const Slice& val) {
+  Status s = FailIfCfEnablesTs(this, column_family);
+  if (!s.ok()) {
+    return s;
+  }
+
+  Transaction* txn = BeginInternalTransaction(options);
+  txn->DisableIndexing();
+
+  // Since the client didn't create a transaction, they don't care about
+  // conflict checking for this write.  So we just need to do PutUntracked().
+  s = txn->PutUntracked(column_family, key, val);
+
+  if (s.ok()) {
+    s = txn->Commit();
+  }
+
+  delete txn;
+
+  return s;
+}
+
+Status PessimisticTransactionDB::Delete(const WriteOptions& wopts,
+                                        ColumnFamilyHandle* column_family,
+                                        const Slice& key) {
+  Status s = FailIfCfEnablesTs(this, column_family);
+  if (!s.ok()) {
+    return s;
+  }
+
+  Transaction* txn = BeginInternalTransaction(wopts);
+  txn->DisableIndexing();
+
+  // Since the client didn't create a transaction, they don't care about
+  // conflict checking for this write.  So we just need to do
+  // DeleteUntracked().
+  s = txn->DeleteUntracked(column_family, key);
+
+  if (s.ok()) {
+    s = txn->Commit();
+  }
+
+  delete txn;
+
+  return s;
+}
+
+Status PessimisticTransactionDB::SingleDelete(const WriteOptions& wopts,
+                                              ColumnFamilyHandle* column_family,
+                                              const Slice& key) {
+  Status s = FailIfCfEnablesTs(this, column_family);
+  if (!s.ok()) {
+    return s;
+  }
+
+  Transaction* txn = BeginInternalTransaction(wopts);
+  txn->DisableIndexing();
+
+  // Since the client didn't create a transaction, they don't care about
+  // conflict checking for this write.  So we just need to do
+  // SingleDeleteUntracked().
+  s = txn->SingleDeleteUntracked(column_family, key);
+
+  if (s.ok()) {
+    s = txn->Commit();
+  }
+
+  delete txn;
+
+  return s;
+}
+
+Status PessimisticTransactionDB::Merge(const WriteOptions& options,
+                                       ColumnFamilyHandle* column_family,
+                                       const Slice& key, const Slice& value) {
+  Status s = FailIfCfEnablesTs(this, column_family);
+  if (!s.ok()) {
+    return s;
+  }
+
+  Transaction* txn = BeginInternalTransaction(options);
+  txn->DisableIndexing();
+
+  // Since the client didn't create a transaction, they don't care about
+  // conflict checking for this write.  So we just need to do
+  // MergeUntracked().
+  s = txn->MergeUntracked(column_family, key, value);
+
+  if (s.ok()) {
+    s = txn->Commit();
+  }
+
+  delete txn;
+
+  return s;
+}
+
+Status PessimisticTransactionDB::Write(const WriteOptions& opts,
+                                       WriteBatch* updates) {
+  return WriteWithConcurrencyControl(opts, updates);
+}
+
+Status WriteCommittedTxnDB::Write(const WriteOptions& opts,
+                                  WriteBatch* updates) {
+  Status s = FailIfBatchHasTs(updates);
+  if (!s.ok()) {
+    return s;
+  }
+  if (txn_db_options_.skip_concurrency_control) {
+    return db_impl_->Write(opts, updates);
+  } else {
+    return WriteWithConcurrencyControl(opts, updates);
+  }
+}
+
+Status WriteCommittedTxnDB::Write(
+    const WriteOptions& opts,
+    const TransactionDBWriteOptimizations& optimizations, WriteBatch* updates) {
+  Status s = FailIfBatchHasTs(updates);
+  if (!s.ok()) {
+    return s;
+  }
+  if (optimizations.skip_concurrency_control) {
+    return db_impl_->Write(opts, updates);
+  } else {
+    return WriteWithConcurrencyControl(opts, updates);
+  }
+}
+
+void PessimisticTransactionDB::InsertExpirableTransaction(
+    TransactionID tx_id, PessimisticTransaction* tx) {
+  assert(tx->GetExpirationTime() > 0);
+  std::lock_guard<std::mutex> lock(map_mutex_);
+  expirable_transactions_map_.insert({tx_id, tx});
+}
+
+void PessimisticTransactionDB::RemoveExpirableTransaction(TransactionID tx_id) {
+  std::lock_guard<std::mutex> lock(map_mutex_);
+  expirable_transactions_map_.erase(tx_id);
+}
+
+bool PessimisticTransactionDB::TryStealingExpiredTransactionLocks(
+    TransactionID tx_id) {
+  std::lock_guard<std::mutex> lock(map_mutex_);
+
+  auto tx_it = expirable_transactions_map_.find(tx_id);
+  if (tx_it == expirable_transactions_map_.end()) {
+    return true;
+  }
+  PessimisticTransaction& tx = *(tx_it->second);
+  return tx.TryStealingLocks();
+}
+
+void PessimisticTransactionDB::ReinitializeTransaction(
+    Transaction* txn, const WriteOptions& write_options,
+    const TransactionOptions& txn_options) {
+  auto txn_impl = static_cast_with_check<PessimisticTransaction>(txn);
+
+  txn_impl->Reinitialize(this, write_options, txn_options);
+}
+
+Transaction* PessimisticTransactionDB::GetTransactionByName(
+    const TransactionName& name) {
+  std::lock_guard<std::mutex> lock(name_map_mutex_);
+  auto it = transactions_.find(name);
+  if (it == transactions_.end()) {
+    return nullptr;
+  } else {
+    return it->second;
+  }
+}
+
+void PessimisticTransactionDB::GetAllPreparedTransactions(
+    std::vector<Transaction*>* transv) {
+  assert(transv);
+  transv->clear();
+  std::lock_guard<std::mutex> lock(name_map_mutex_);
+  for (auto it = transactions_.begin(); it != transactions_.end(); ++it) {
+    if (it->second->GetState() == Transaction::PREPARED) {
+      transv->push_back(it->second);
+    }
+  }
+}
+
+LockManager::PointLockStatus PessimisticTransactionDB::GetLockStatusData() {
+  return lock_manager_->GetPointLockStatus();
+}
+
+std::vector<DeadlockPath> PessimisticTransactionDB::GetDeadlockInfoBuffer() {
+  return lock_manager_->GetDeadlockInfoBuffer();
+}
+
+void PessimisticTransactionDB::SetDeadlockInfoBufferSize(uint32_t target_size) {
+  lock_manager_->Resize(target_size);
+}
+
+void PessimisticTransactionDB::RegisterTransaction(Transaction* txn) {
+  assert(txn);
+  assert(txn->GetName().length() > 0);
+  assert(GetTransactionByName(txn->GetName()) == nullptr);
+  assert(txn->GetState() == Transaction::STARTED);
+  std::lock_guard<std::mutex> lock(name_map_mutex_);
+  transactions_[txn->GetName()] = txn;
+}
+
+void PessimisticTransactionDB::UnregisterTransaction(Transaction* txn) {
+  assert(txn);
+  std::lock_guard<std::mutex> lock(name_map_mutex_);
+  auto it = transactions_.find(txn->GetName());
+  assert(it != transactions_.end());
+  transactions_.erase(it);
+}
+
+std::pair<Status, std::shared_ptr<const Snapshot>>
+PessimisticTransactionDB::CreateTimestampedSnapshot(TxnTimestamp ts) {
+  if (kMaxTxnTimestamp == ts) {
+    return std::make_pair(Status::InvalidArgument("invalid ts"), nullptr);
+  }
+  assert(db_impl_);
+  return db_impl_->CreateTimestampedSnapshot(kMaxSequenceNumber, ts);
+}
+
+std::shared_ptr<const Snapshot>
+PessimisticTransactionDB::GetTimestampedSnapshot(TxnTimestamp ts) const {
+  assert(db_impl_);
+  return db_impl_->GetTimestampedSnapshot(ts);
+}
+
+void PessimisticTransactionDB::ReleaseTimestampedSnapshotsOlderThan(
+    TxnTimestamp ts) {
+  assert(db_impl_);
+  db_impl_->ReleaseTimestampedSnapshotsOlderThan(ts);
+}
+
+Status PessimisticTransactionDB::GetTimestampedSnapshots(
+    TxnTimestamp ts_lb, TxnTimestamp ts_ub,
+    std::vector<std::shared_ptr<const Snapshot>>& timestamped_snapshots) const {
+  assert(db_impl_);
+  return db_impl_->GetTimestampedSnapshots(ts_lb, ts_ub, timestamped_snapshots);
+}
+
+Status SnapshotCreationCallback::operator()(SequenceNumber seq,
+                                            bool disable_memtable) {
+  assert(db_impl_);
+  assert(commit_ts_ != kMaxTxnTimestamp);
+
+  const bool two_write_queues =
+      db_impl_->immutable_db_options().two_write_queues;
+  assert(!two_write_queues || !disable_memtable);
+#ifdef NDEBUG
+  (void)two_write_queues;
+  (void)disable_memtable;
+#endif
+
+  const bool seq_per_batch = db_impl_->seq_per_batch();
+  if (!seq_per_batch) {
+    assert(db_impl_->GetLastPublishedSequence() <= seq);
+  } else {
+    assert(db_impl_->GetLastPublishedSequence() < seq);
+  }
+
+  // Create a snapshot which can also be used for write conflict checking.
+  auto ret = db_impl_->CreateTimestampedSnapshot(seq, commit_ts_);
+  snapshot_creation_status_ = ret.first;
+  snapshot_ = ret.second;
+  if (snapshot_creation_status_.ok()) {
+    assert(snapshot_);
+  } else {
+    assert(!snapshot_);
+  }
+  if (snapshot_ && snapshot_notifier_) {
+    snapshot_notifier_->SnapshotCreated(snapshot_.get());
+  }
+  return Status::OK();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction_db.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction_db.h
new file mode 100644
index 0000000000..b662048bd4
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/pessimistic_transaction_db.h
@@ -0,0 +1,316 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <mutex>
+#include <queue>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "db/db_iter.h"
+#include "db/read_callback.h"
+#include "db/snapshot_checker.h"
+#include "rocksdb/db.h"
+#include "rocksdb/options.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "util/cast_util.h"
+#include "utilities/transactions/lock/lock_manager.h"
+#include "utilities/transactions/lock/range/range_lock_manager.h"
+#include "utilities/transactions/pessimistic_transaction.h"
+#include "utilities/transactions/write_prepared_txn.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class PessimisticTransactionDB : public TransactionDB {
+ public:
+  explicit PessimisticTransactionDB(DB* db,
+                                    const TransactionDBOptions& txn_db_options);
+
+  explicit PessimisticTransactionDB(StackableDB* db,
+                                    const TransactionDBOptions& txn_db_options);
+
+  virtual ~PessimisticTransactionDB();
+
+  virtual const Snapshot* GetSnapshot() override { return db_->GetSnapshot(); }
+
+  virtual Status Initialize(
+      const std::vector<size_t>& compaction_enabled_cf_indices,
+      const std::vector<ColumnFamilyHandle*>& handles);
+
+  Transaction* BeginTransaction(const WriteOptions& write_options,
+                                const TransactionOptions& txn_options,
+                                Transaction* old_txn) override = 0;
+
+  using StackableDB::Put;
+  virtual Status Put(const WriteOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     const Slice& val) override;
+
+  using StackableDB::Delete;
+  virtual Status Delete(const WriteOptions& wopts,
+                        ColumnFamilyHandle* column_family,
+                        const Slice& key) override;
+
+  using StackableDB::SingleDelete;
+  virtual Status SingleDelete(const WriteOptions& wopts,
+                              ColumnFamilyHandle* column_family,
+                              const Slice& key) override;
+
+  using StackableDB::Merge;
+  virtual Status Merge(const WriteOptions& options,
+                       ColumnFamilyHandle* column_family, const Slice& key,
+                       const Slice& value) override;
+
+  using TransactionDB::Write;
+  virtual Status Write(const WriteOptions& opts, WriteBatch* updates) override;
+  inline Status WriteWithConcurrencyControl(const WriteOptions& opts,
+                                            WriteBatch* updates) {
+    Status s;
+    if (opts.protection_bytes_per_key > 0) {
+      s = WriteBatchInternal::UpdateProtectionInfo(
+          updates, opts.protection_bytes_per_key);
+    }
+    if (s.ok()) {
+      // Need to lock all keys in this batch to prevent write conflicts with
+      // concurrent transactions.
+      Transaction* txn = BeginInternalTransaction(opts);
+      txn->DisableIndexing();
+
+      auto txn_impl = static_cast_with_check<PessimisticTransaction>(txn);
+
+      // Since commitBatch sorts the keys before locking, concurrent Write()
+      // operations will not cause a deadlock.
+      // In order to avoid a deadlock with a concurrent Transaction,
+      // Transactions should use a lock timeout.
+      s = txn_impl->CommitBatch(updates);
+
+      delete txn;
+    }
+
+    return s;
+  }
+
+  using StackableDB::CreateColumnFamily;
+  virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
+                                    const std::string& column_family_name,
+                                    ColumnFamilyHandle** handle) override;
+
+  Status CreateColumnFamilies(
+      const ColumnFamilyOptions& options,
+      const std::vector<std::string>& column_family_names,
+      std::vector<ColumnFamilyHandle*>* handles) override;
+
+  Status CreateColumnFamilies(
+      const std::vector<ColumnFamilyDescriptor>& column_families,
+      std::vector<ColumnFamilyHandle*>* handles) override;
+
+  using StackableDB::DropColumnFamily;
+  virtual Status DropColumnFamily(ColumnFamilyHandle* column_family) override;
+
+  Status DropColumnFamilies(
+      const std::vector<ColumnFamilyHandle*>& column_families) override;
+
+  Status TryLock(PessimisticTransaction* txn, uint32_t cfh_id,
+                 const std::string& key, bool exclusive);
+  Status TryRangeLock(PessimisticTransaction* txn, uint32_t cfh_id,
+                      const Endpoint& start_endp, const Endpoint& end_endp);
+
+  void UnLock(PessimisticTransaction* txn, const LockTracker& keys);
+  void UnLock(PessimisticTransaction* txn, uint32_t cfh_id,
+              const std::string& key);
+
+  void AddColumnFamily(const ColumnFamilyHandle* handle);
+
+  static TransactionDBOptions ValidateTxnDBOptions(
+      const TransactionDBOptions& txn_db_options);
+
+  const TransactionDBOptions& GetTxnDBOptions() const {
+    return txn_db_options_;
+  }
+
+  void InsertExpirableTransaction(TransactionID tx_id,
+                                  PessimisticTransaction* tx);
+  void RemoveExpirableTransaction(TransactionID tx_id);
+
+  // If transaction is no longer available, locks can be stolen
+  // If transaction is available, try stealing locks directly from transaction
+  // It is the caller's responsibility to ensure that the referred transaction
+  // is expirable (GetExpirationTime() > 0) and that it is expired.
+  bool TryStealingExpiredTransactionLocks(TransactionID tx_id);
+
+  Transaction* GetTransactionByName(const TransactionName& name) override;
+
+  void RegisterTransaction(Transaction* txn);
+  void UnregisterTransaction(Transaction* txn);
+
+  // not thread safe. current use case is during recovery (single thread)
+  void GetAllPreparedTransactions(std::vector<Transaction*>* trans) override;
+
+  LockManager::PointLockStatus GetLockStatusData() override;
+
+  std::vector<DeadlockPath> GetDeadlockInfoBuffer() override;
+  void SetDeadlockInfoBufferSize(uint32_t target_size) override;
+
+  // The default implementation does nothing. The actual implementation is moved
+  // to the child classes that actually need this information. This was due to
+  // an odd performance drop we observed when the added std::atomic member to
+  // the base class even when the subclass do not read it in the fast path.
+  virtual void UpdateCFComparatorMap(const std::vector<ColumnFamilyHandle*>&) {}
+  virtual void UpdateCFComparatorMap(ColumnFamilyHandle*) {}
+
+  // Use the returned factory to create LockTrackers in transactions.
+  const LockTrackerFactory& GetLockTrackerFactory() const {
+    return lock_manager_->GetLockTrackerFactory();
+  }
+
+  std::pair<Status, std::shared_ptr<const Snapshot>> CreateTimestampedSnapshot(
+      TxnTimestamp ts) override;
+
+  std::shared_ptr<const Snapshot> GetTimestampedSnapshot(
+      TxnTimestamp ts) const override;
+
+  void ReleaseTimestampedSnapshotsOlderThan(TxnTimestamp ts) override;
+
+  Status GetTimestampedSnapshots(TxnTimestamp ts_lb, TxnTimestamp ts_ub,
+                                 std::vector<std::shared_ptr<const Snapshot>>&
+                                     timestamped_snapshots) const override;
+
+ protected:
+  DBImpl* db_impl_;
+  std::shared_ptr<Logger> info_log_;
+  const TransactionDBOptions txn_db_options_;
+
+  static Status FailIfBatchHasTs(const WriteBatch* wb);
+
+  static Status FailIfCfEnablesTs(const DB* db,
+                                  const ColumnFamilyHandle* column_family);
+
+  void ReinitializeTransaction(
+      Transaction* txn, const WriteOptions& write_options,
+      const TransactionOptions& txn_options = TransactionOptions());
+
+  virtual Status VerifyCFOptions(const ColumnFamilyOptions& cf_options);
+
+ private:
+  friend class WritePreparedTxnDB;
+  friend class WritePreparedTxnDBMock;
+  friend class WriteUnpreparedTxn;
+  friend class TransactionTest_DoubleCrashInRecovery_Test;
+  friend class TransactionTest_DoubleEmptyWrite_Test;
+  friend class TransactionTest_DuplicateKeys_Test;
+  friend class TransactionTest_PersistentTwoPhaseTransactionTest_Test;
+  friend class TransactionTest_TwoPhaseDoubleRecoveryTest_Test;
+  friend class TransactionTest_TwoPhaseOutOfOrderDelete_Test;
+  friend class TransactionStressTest_TwoPhaseLongPrepareTest_Test;
+  friend class WriteUnpreparedTransactionTest_RecoveryTest_Test;
+  friend class WriteUnpreparedTransactionTest_MarkLogWithPrepSection_Test;
+
+  Transaction* BeginInternalTransaction(const WriteOptions& options);
+
+  std::shared_ptr<LockManager> lock_manager_;
+
+  // Must be held when adding/dropping column families.
+  InstrumentedMutex column_family_mutex_;
+
+  // Used to ensure that no locks are stolen from an expirable transaction
+  // that has started a commit. Only transactions with an expiration time
+  // should be in this map.
+  std::mutex map_mutex_;
+  std::unordered_map<TransactionID, PessimisticTransaction*>
+      expirable_transactions_map_;
+
+  // map from name to two phase transaction instance
+  std::mutex name_map_mutex_;
+  std::unordered_map<TransactionName, Transaction*> transactions_;
+
+  // Signal that we are testing a crash scenario. Some asserts could be relaxed
+  // in such cases.
+  virtual void TEST_Crash() {}
+};
+
+// A PessimisticTransactionDB that writes the data to the DB after the commit.
+// In this way the DB only contains the committed data.
+class WriteCommittedTxnDB : public PessimisticTransactionDB {
+ public:
+  explicit WriteCommittedTxnDB(DB* db,
+                               const TransactionDBOptions& txn_db_options)
+      : PessimisticTransactionDB(db, txn_db_options) {}
+
+  explicit WriteCommittedTxnDB(StackableDB* db,
+                               const TransactionDBOptions& txn_db_options)
+      : PessimisticTransactionDB(db, txn_db_options) {}
+
+  virtual ~WriteCommittedTxnDB() {}
+
+  Transaction* BeginTransaction(const WriteOptions& write_options,
+                                const TransactionOptions& txn_options,
+                                Transaction* old_txn) override;
+
+  // Optimized version of ::Write that makes use of skip_concurrency_control
+  // hint
+  using TransactionDB::Write;
+  virtual Status Write(const WriteOptions& opts,
+                       const TransactionDBWriteOptimizations& optimizations,
+                       WriteBatch* updates) override;
+  virtual Status Write(const WriteOptions& opts, WriteBatch* updates) override;
+};
+
+inline Status PessimisticTransactionDB::FailIfBatchHasTs(
+    const WriteBatch* batch) {
+  if (batch != nullptr && WriteBatchInternal::HasKeyWithTimestamp(*batch)) {
+    return Status::NotSupported(
+        "Writes with timestamp must go through transaction API instead of "
+        "TransactionDB.");
+  }
+  return Status::OK();
+}
+
+inline Status PessimisticTransactionDB::FailIfCfEnablesTs(
+    const DB* db, const ColumnFamilyHandle* column_family) {
+  assert(db);
+  column_family = column_family ? column_family : db->DefaultColumnFamily();
+  assert(column_family);
+  const Comparator* const ucmp = column_family->GetComparator();
+  assert(ucmp);
+  if (ucmp->timestamp_size() > 0) {
+    return Status::NotSupported(
+        "Write operation with user timestamp must go through the transaction "
+        "API instead of TransactionDB.");
+  }
+  return Status::OK();
+}
+
+class SnapshotCreationCallback : public PostMemTableCallback {
+ public:
+  explicit SnapshotCreationCallback(
+      DBImpl* dbi, TxnTimestamp commit_ts,
+      const std::shared_ptr<TransactionNotifier>& notifier,
+      std::shared_ptr<const Snapshot>& snapshot)
+      : db_impl_(dbi),
+        commit_ts_(commit_ts),
+        snapshot_notifier_(notifier),
+        snapshot_(snapshot) {
+    assert(db_impl_);
+  }
+
+  ~SnapshotCreationCallback() override {
+    snapshot_creation_status_.PermitUncheckedError();
+  }
+
+  Status operator()(SequenceNumber seq, bool disable_memtable) override;
+
+ private:
+  DBImpl* const db_impl_;
+  const TxnTimestamp commit_ts_;
+  std::shared_ptr<TransactionNotifier> snapshot_notifier_;
+  std::shared_ptr<const Snapshot>& snapshot_;
+
+  Status snapshot_creation_status_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/snapshot_checker.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/snapshot_checker.cc
new file mode 100644
index 0000000000..da363a12fb
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/snapshot_checker.cc
@@ -0,0 +1,37 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "db/snapshot_checker.h"
+
+
+#include "port/lang.h"
+#include "utilities/transactions/write_prepared_txn_db.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+
+WritePreparedSnapshotChecker::WritePreparedSnapshotChecker(
+    WritePreparedTxnDB* txn_db)
+    : txn_db_(txn_db){};
+
+SnapshotCheckerResult WritePreparedSnapshotChecker::CheckInSnapshot(
+    SequenceNumber sequence, SequenceNumber snapshot_sequence) const {
+  bool snapshot_released = false;
+  // TODO(myabandeh): set min_uncommitted
+  bool in_snapshot = txn_db_->IsInSnapshot(
+      sequence, snapshot_sequence, kMinUnCommittedSeq, &snapshot_released);
+  if (snapshot_released) {
+    return SnapshotCheckerResult::kSnapshotReleased;
+  }
+  return in_snapshot ? SnapshotCheckerResult::kInSnapshot
+                     : SnapshotCheckerResult::kNotInSnapshot;
+}
+
+
+DisableGCSnapshotChecker* DisableGCSnapshotChecker::Instance() {
+  STATIC_AVOID_DESTRUCTION(DisableGCSnapshotChecker, instance);
+  return &instance;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_base.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_base.cc
new file mode 100644
index 0000000000..5963f7429f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_base.cc
@@ -0,0 +1,757 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/transactions/transaction_base.h"
+
+#include <cinttypes>
+
+#include "db/column_family.h"
+#include "db/db_impl/db_impl.h"
+#include "logging/logging.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/db.h"
+#include "rocksdb/status.h"
+#include "util/cast_util.h"
+#include "util/string_util.h"
+#include "utilities/transactions/lock/lock_tracker.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status Transaction::CommitAndTryCreateSnapshot(
+    std::shared_ptr<TransactionNotifier> notifier, TxnTimestamp ts,
+    std::shared_ptr<const Snapshot>* snapshot) {
+  if (snapshot) {
+    snapshot->reset();
+  }
+  TxnTimestamp commit_ts = GetCommitTimestamp();
+  if (commit_ts == kMaxTxnTimestamp) {
+    if (ts == kMaxTxnTimestamp) {
+      return Status::InvalidArgument("Commit timestamp unset");
+    } else {
+      const Status s = SetCommitTimestamp(ts);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  } else if (ts != kMaxTxnTimestamp) {
+    if (ts != commit_ts) {
+      // For now we treat this as error.
+      return Status::InvalidArgument("Different commit ts specified");
+    }
+  }
+  SetSnapshotOnNextOperation(notifier);
+  Status s = Commit();
+  if (!s.ok()) {
+    return s;
+  }
+  assert(s.ok());
+  // If we reach here, we must return ok status for this function.
+  std::shared_ptr<const Snapshot> new_snapshot = GetTimestampedSnapshot();
+
+  if (snapshot) {
+    *snapshot = new_snapshot;
+  }
+  return Status::OK();
+}
+
+TransactionBaseImpl::TransactionBaseImpl(
+    DB* db, const WriteOptions& write_options,
+    const LockTrackerFactory& lock_tracker_factory)
+    : db_(db),
+      dbimpl_(static_cast_with_check<DBImpl>(db)),
+      write_options_(write_options),
+      cmp_(GetColumnFamilyUserComparator(db->DefaultColumnFamily())),
+      lock_tracker_factory_(lock_tracker_factory),
+      start_time_(dbimpl_->GetSystemClock()->NowMicros()),
+      write_batch_(cmp_, 0, true, 0, write_options.protection_bytes_per_key),
+      tracked_locks_(lock_tracker_factory_.Create()),
+      commit_time_batch_(0 /* reserved_bytes */, 0 /* max_bytes */,
+                         write_options.protection_bytes_per_key,
+                         0 /* default_cf_ts_sz */),
+      indexing_enabled_(true) {
+  assert(dynamic_cast<DBImpl*>(db_) != nullptr);
+  log_number_ = 0;
+  if (dbimpl_->allow_2pc()) {
+    InitWriteBatch();
+  }
+}
+
+TransactionBaseImpl::~TransactionBaseImpl() {
+  // Release snapshot if snapshot is set
+  SetSnapshotInternal(nullptr);
+}
+
+void TransactionBaseImpl::Clear() {
+  save_points_.reset(nullptr);
+  write_batch_.Clear();
+  commit_time_batch_.Clear();
+  tracked_locks_->Clear();
+  num_puts_ = 0;
+  num_deletes_ = 0;
+  num_merges_ = 0;
+
+  if (dbimpl_->allow_2pc()) {
+    InitWriteBatch();
+  }
+}
+
+void TransactionBaseImpl::Reinitialize(DB* db,
+                                       const WriteOptions& write_options) {
+  Clear();
+  ClearSnapshot();
+  id_ = 0;
+  db_ = db;
+  name_.clear();
+  log_number_ = 0;
+  write_options_ = write_options;
+  start_time_ = dbimpl_->GetSystemClock()->NowMicros();
+  indexing_enabled_ = true;
+  cmp_ = GetColumnFamilyUserComparator(db_->DefaultColumnFamily());
+  WriteBatchInternal::UpdateProtectionInfo(
+      write_batch_.GetWriteBatch(), write_options_.protection_bytes_per_key)
+      .PermitUncheckedError();
+  WriteBatchInternal::UpdateProtectionInfo(
+      &commit_time_batch_, write_options_.protection_bytes_per_key)
+      .PermitUncheckedError();
+}
+
+void TransactionBaseImpl::SetSnapshot() {
+  const Snapshot* snapshot = dbimpl_->GetSnapshotForWriteConflictBoundary();
+  SetSnapshotInternal(snapshot);
+}
+
+void TransactionBaseImpl::SetSnapshotInternal(const Snapshot* snapshot) {
+  // Set a custom deleter for the snapshot_ SharedPtr as the snapshot needs to
+  // be released, not deleted when it is no longer referenced.
+  snapshot_.reset(snapshot, std::bind(&TransactionBaseImpl::ReleaseSnapshot,
+                                      this, std::placeholders::_1, db_));
+  snapshot_needed_ = false;
+  snapshot_notifier_ = nullptr;
+}
+
+void TransactionBaseImpl::SetSnapshotOnNextOperation(
+    std::shared_ptr<TransactionNotifier> notifier) {
+  snapshot_needed_ = true;
+  snapshot_notifier_ = notifier;
+}
+
+void TransactionBaseImpl::SetSnapshotIfNeeded() {
+  if (snapshot_needed_) {
+    std::shared_ptr<TransactionNotifier> notifier = snapshot_notifier_;
+    SetSnapshot();
+    if (notifier != nullptr) {
+      notifier->SnapshotCreated(GetSnapshot());
+    }
+  }
+}
+
+Status TransactionBaseImpl::TryLock(ColumnFamilyHandle* column_family,
+                                    const SliceParts& key, bool read_only,
+                                    bool exclusive, const bool do_validate,
+                                    const bool assume_tracked) {
+  size_t key_size = 0;
+  for (int i = 0; i < key.num_parts; ++i) {
+    key_size += key.parts[i].size();
+  }
+
+  std::string str;
+  str.reserve(key_size);
+
+  for (int i = 0; i < key.num_parts; ++i) {
+    str.append(key.parts[i].data(), key.parts[i].size());
+  }
+
+  return TryLock(column_family, str, read_only, exclusive, do_validate,
+                 assume_tracked);
+}
+
+void TransactionBaseImpl::SetSavePoint() {
+  if (save_points_ == nullptr) {
+    save_points_.reset(
+        new std::stack<TransactionBaseImpl::SavePoint,
+                       autovector<TransactionBaseImpl::SavePoint>>());
+  }
+  save_points_->emplace(snapshot_, snapshot_needed_, snapshot_notifier_,
+                        num_puts_, num_deletes_, num_merges_,
+                        lock_tracker_factory_);
+  write_batch_.SetSavePoint();
+}
+
+Status TransactionBaseImpl::RollbackToSavePoint() {
+  if (save_points_ != nullptr && save_points_->size() > 0) {
+    // Restore saved SavePoint
+    TransactionBaseImpl::SavePoint& save_point = save_points_->top();
+    snapshot_ = save_point.snapshot_;
+    snapshot_needed_ = save_point.snapshot_needed_;
+    snapshot_notifier_ = save_point.snapshot_notifier_;
+    num_puts_ = save_point.num_puts_;
+    num_deletes_ = save_point.num_deletes_;
+    num_merges_ = save_point.num_merges_;
+
+    // Rollback batch
+    Status s = write_batch_.RollbackToSavePoint();
+    assert(s.ok());
+
+    // Rollback any keys that were tracked since the last savepoint
+    tracked_locks_->Subtract(*save_point.new_locks_);
+
+    save_points_->pop();
+
+    return s;
+  } else {
+    assert(write_batch_.RollbackToSavePoint().IsNotFound());
+    return Status::NotFound();
+  }
+}
+
+Status TransactionBaseImpl::PopSavePoint() {
+  if (save_points_ == nullptr || save_points_->empty()) {
+    // No SavePoint yet.
+    assert(write_batch_.PopSavePoint().IsNotFound());
+    return Status::NotFound();
+  }
+
+  assert(!save_points_->empty());
+  // If there is another savepoint A below the current savepoint B, then A needs
+  // to inherit tracked_keys in B so that if we rollback to savepoint A, we
+  // remember to unlock keys in B. If there is no other savepoint below, then we
+  // can safely discard savepoint info.
+  if (save_points_->size() == 1) {
+    save_points_->pop();
+  } else {
+    TransactionBaseImpl::SavePoint top(lock_tracker_factory_);
+    std::swap(top, save_points_->top());
+    save_points_->pop();
+
+    save_points_->top().new_locks_->Merge(*top.new_locks_);
+  }
+
+  return write_batch_.PopSavePoint();
+}
+
+Status TransactionBaseImpl::Get(const ReadOptions& read_options,
+                                ColumnFamilyHandle* column_family,
+                                const Slice& key, std::string* value) {
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call Get with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+  assert(value != nullptr);
+  PinnableSlice pinnable_val(value);
+  assert(!pinnable_val.IsPinned());
+  auto s = Get(read_options, column_family, key, &pinnable_val);
+  if (s.ok() && pinnable_val.IsPinned()) {
+    value->assign(pinnable_val.data(), pinnable_val.size());
+  }  // else value is already assigned
+  return s;
+}
+
+Status TransactionBaseImpl::Get(const ReadOptions& read_options,
+                                ColumnFamilyHandle* column_family,
+                                const Slice& key, PinnableSlice* pinnable_val) {
+  return write_batch_.GetFromBatchAndDB(db_, read_options, column_family, key,
+                                        pinnable_val);
+}
+
+Status TransactionBaseImpl::GetForUpdate(const ReadOptions& read_options,
+                                         ColumnFamilyHandle* column_family,
+                                         const Slice& key, std::string* value,
+                                         bool exclusive,
+                                         const bool do_validate) {
+  if (!do_validate && read_options.snapshot != nullptr) {
+    return Status::InvalidArgument(
+        "If do_validate is false then GetForUpdate with snapshot is not "
+        "defined.");
+  }
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call GetForUpdate with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+  Status s =
+      TryLock(column_family, key, true /* read_only */, exclusive, do_validate);
+
+  if (s.ok() && value != nullptr) {
+    assert(value != nullptr);
+    PinnableSlice pinnable_val(value);
+    assert(!pinnable_val.IsPinned());
+    s = Get(read_options, column_family, key, &pinnable_val);
+    if (s.ok() && pinnable_val.IsPinned()) {
+      value->assign(pinnable_val.data(), pinnable_val.size());
+    }  // else value is already assigned
+  }
+  return s;
+}
+
+Status TransactionBaseImpl::GetForUpdate(const ReadOptions& read_options,
+                                         ColumnFamilyHandle* column_family,
+                                         const Slice& key,
+                                         PinnableSlice* pinnable_val,
+                                         bool exclusive,
+                                         const bool do_validate) {
+  if (!do_validate && read_options.snapshot != nullptr) {
+    return Status::InvalidArgument(
+        "If do_validate is false then GetForUpdate with snapshot is not "
+        "defined.");
+  }
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call GetForUpdate with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+  Status s =
+      TryLock(column_family, key, true /* read_only */, exclusive, do_validate);
+
+  if (s.ok() && pinnable_val != nullptr) {
+    s = Get(read_options, column_family, key, pinnable_val);
+  }
+  return s;
+}
+
+std::vector<Status> TransactionBaseImpl::MultiGet(
+    const ReadOptions& read_options,
+    const std::vector<ColumnFamilyHandle*>& column_family,
+    const std::vector<Slice>& keys, std::vector<std::string>* values) {
+  size_t num_keys = keys.size();
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    Status s = Status::InvalidArgument(
+        "Cannot call MultiGet with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+    return std::vector<Status>(num_keys, s);
+  }
+
+  values->resize(num_keys);
+
+  std::vector<Status> stat_list(num_keys);
+  for (size_t i = 0; i < num_keys; ++i) {
+    stat_list[i] = Get(read_options, column_family[i], keys[i], &(*values)[i]);
+  }
+
+  return stat_list;
+}
+
+void TransactionBaseImpl::MultiGet(const ReadOptions& read_options,
+                                   ColumnFamilyHandle* column_family,
+                                   const size_t num_keys, const Slice* keys,
+                                   PinnableSlice* values, Status* statuses,
+                                   const bool sorted_input) {
+  assert(read_options.io_activity == Env::IOActivity::kUnknown);
+  write_batch_.MultiGetFromBatchAndDB(db_, read_options, column_family,
+                                      num_keys, keys, values, statuses,
+                                      sorted_input);
+}
+
+std::vector<Status> TransactionBaseImpl::MultiGetForUpdate(
+    const ReadOptions& read_options,
+    const std::vector<ColumnFamilyHandle*>& column_family,
+    const std::vector<Slice>& keys, std::vector<std::string>* values) {
+  // Regardless of whether the MultiGet succeeded, track these keys.
+  size_t num_keys = keys.size();
+  if (read_options.io_activity != Env::IOActivity::kUnknown) {
+    Status s = Status::InvalidArgument(
+        "Cannot call MultiGetForUpdate with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+    return std::vector<Status>(num_keys, s);
+  }
+  values->resize(num_keys);
+
+  // Lock all keys
+  for (size_t i = 0; i < num_keys; ++i) {
+    Status s = TryLock(column_family[i], keys[i], true /* read_only */,
+                       true /* exclusive */);
+    if (!s.ok()) {
+      // Fail entire multiget if we cannot lock all keys
+      return std::vector<Status>(num_keys, s);
+    }
+  }
+
+  // TODO(agiardullo): optimize multiget?
+  std::vector<Status> stat_list(num_keys);
+  for (size_t i = 0; i < num_keys; ++i) {
+    stat_list[i] = Get(read_options, column_family[i], keys[i], &(*values)[i]);
+  }
+
+  return stat_list;
+}
+
+Iterator* TransactionBaseImpl::GetIterator(const ReadOptions& read_options) {
+  Iterator* db_iter = db_->NewIterator(read_options);
+  assert(db_iter);
+
+  return write_batch_.NewIteratorWithBase(db_->DefaultColumnFamily(), db_iter,
+                                          &read_options);
+}
+
+Iterator* TransactionBaseImpl::GetIterator(const ReadOptions& read_options,
+                                           ColumnFamilyHandle* column_family) {
+  Iterator* db_iter = db_->NewIterator(read_options, column_family);
+  assert(db_iter);
+
+  return write_batch_.NewIteratorWithBase(column_family, db_iter,
+                                          &read_options);
+}
+
+Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family,
+                                const Slice& key, const Slice& value,
+                                const bool assume_tracked) {
+  const bool do_validate = !assume_tracked;
+  Status s = TryLock(column_family, key, false /* read_only */,
+                     true /* exclusive */, do_validate, assume_tracked);
+
+  if (s.ok()) {
+    s = GetBatchForWrite()->Put(column_family, key, value);
+    if (s.ok()) {
+      num_puts_++;
+    }
+  }
+
+  return s;
+}
+
+Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family,
+                                const SliceParts& key, const SliceParts& value,
+                                const bool assume_tracked) {
+  const bool do_validate = !assume_tracked;
+  Status s = TryLock(column_family, key, false /* read_only */,
+                     true /* exclusive */, do_validate, assume_tracked);
+
+  if (s.ok()) {
+    s = GetBatchForWrite()->Put(column_family, key, value);
+    if (s.ok()) {
+      num_puts_++;
+    }
+  }
+
+  return s;
+}
+
+Status TransactionBaseImpl::Merge(ColumnFamilyHandle* column_family,
+                                  const Slice& key, const Slice& value,
+                                  const bool assume_tracked) {
+  const bool do_validate = !assume_tracked;
+  Status s = TryLock(column_family, key, false /* read_only */,
+                     true /* exclusive */, do_validate, assume_tracked);
+
+  if (s.ok()) {
+    s = GetBatchForWrite()->Merge(column_family, key, value);
+    if (s.ok()) {
+      num_merges_++;
+    }
+  }
+
+  return s;
+}
+
+Status TransactionBaseImpl::Delete(ColumnFamilyHandle* column_family,
+                                   const Slice& key,
+                                   const bool assume_tracked) {
+  const bool do_validate = !assume_tracked;
+  Status s = TryLock(column_family, key, false /* read_only */,
+                     true /* exclusive */, do_validate, assume_tracked);
+
+  if (s.ok()) {
+    s = GetBatchForWrite()->Delete(column_family, key);
+    if (s.ok()) {
+      num_deletes_++;
+    }
+  }
+
+  return s;
+}
+
+Status TransactionBaseImpl::Delete(ColumnFamilyHandle* column_family,
+                                   const SliceParts& key,
+                                   const bool assume_tracked) {
+  const bool do_validate = !assume_tracked;
+  Status s = TryLock(column_family, key, false /* read_only */,
+                     true /* exclusive */, do_validate, assume_tracked);
+
+  if (s.ok()) {
+    s = GetBatchForWrite()->Delete(column_family, key);
+    if (s.ok()) {
+      num_deletes_++;
+    }
+  }
+
+  return s;
+}
+
+Status TransactionBaseImpl::SingleDelete(ColumnFamilyHandle* column_family,
+                                         const Slice& key,
+                                         const bool assume_tracked) {
+  const bool do_validate = !assume_tracked;
+  Status s = TryLock(column_family, key, false /* read_only */,
+                     true /* exclusive */, do_validate, assume_tracked);
+
+  if (s.ok()) {
+    s = GetBatchForWrite()->SingleDelete(column_family, key);
+    if (s.ok()) {
+      num_deletes_++;
+    }
+  }
+
+  return s;
+}
+
+Status TransactionBaseImpl::SingleDelete(ColumnFamilyHandle* column_family,
+                                         const SliceParts& key,
+                                         const bool assume_tracked) {
+  const bool do_validate = !assume_tracked;
+  Status s = TryLock(column_family, key, false /* read_only */,
+                     true /* exclusive */, do_validate, assume_tracked);
+
+  if (s.ok()) {
+    s = GetBatchForWrite()->SingleDelete(column_family, key);
+    if (s.ok()) {
+      num_deletes_++;
+    }
+  }
+
+  return s;
+}
+
+Status TransactionBaseImpl::PutUntracked(ColumnFamilyHandle* column_family,
+                                         const Slice& key, const Slice& value) {
+  Status s = TryLock(column_family, key, false /* read_only */,
+                     true /* exclusive */, false /* do_validate */);
+
+  if (s.ok()) {
+    s = GetBatchForWrite()->Put(column_family, key, value);
+    if (s.ok()) {
+      num_puts_++;
+    }
+  }
+
+  return s;
+}
+
+Status TransactionBaseImpl::PutUntracked(ColumnFamilyHandle* column_family,
+                                         const SliceParts& key,
+                                         const SliceParts& value) {
+  Status s = TryLock(column_family, key, false /* read_only */,
+                     true /* exclusive */, false /* do_validate */);
+
+  if (s.ok()) {
+    s = GetBatchForWrite()->Put(column_family, key, value);
+    if (s.ok()) {
+      num_puts_++;
+    }
+  }
+
+  return s;
+}
+
+Status TransactionBaseImpl::MergeUntracked(ColumnFamilyHandle* column_family,
+                                           const Slice& key,
+                                           const Slice& value) {
+  Status s = TryLock(column_family, key, false /* read_only */,
+                     true /* exclusive */, false /* do_validate */);
+
+  if (s.ok()) {
+    s = GetBatchForWrite()->Merge(column_family, key, value);
+    if (s.ok()) {
+      num_merges_++;
+    }
+  }
+
+  return s;
+}
+
+Status TransactionBaseImpl::DeleteUntracked(ColumnFamilyHandle* column_family,
+                                            const Slice& key) {
+  Status s = TryLock(column_family, key, false /* read_only */,
+                     true /* exclusive */, false /* do_validate */);
+
+  if (s.ok()) {
+    s = GetBatchForWrite()->Delete(column_family, key);
+    if (s.ok()) {
+      num_deletes_++;
+    }
+  }
+
+  return s;
+}
+
+Status TransactionBaseImpl::DeleteUntracked(ColumnFamilyHandle* column_family,
+                                            const SliceParts& key) {
+  Status s = TryLock(column_family, key, false /* read_only */,
+                     true /* exclusive */, false /* do_validate */);
+
+  if (s.ok()) {
+    s = GetBatchForWrite()->Delete(column_family, key);
+    if (s.ok()) {
+      num_deletes_++;
+    }
+  }
+
+  return s;
+}
+
+Status TransactionBaseImpl::SingleDeleteUntracked(
+    ColumnFamilyHandle* column_family, const Slice& key) {
+  Status s = TryLock(column_family, key, false /* read_only */,
+                     true /* exclusive */, false /* do_validate */);
+
+  if (s.ok()) {
+    s = GetBatchForWrite()->SingleDelete(column_family, key);
+    if (s.ok()) {
+      num_deletes_++;
+    }
+  }
+
+  return s;
+}
+
+void TransactionBaseImpl::PutLogData(const Slice& blob) {
+  auto s = write_batch_.PutLogData(blob);
+  (void)s;
+  assert(s.ok());
+}
+
+WriteBatchWithIndex* TransactionBaseImpl::GetWriteBatch() {
+  return &write_batch_;
+}
+
+uint64_t TransactionBaseImpl::GetElapsedTime() const {
+  return (dbimpl_->GetSystemClock()->NowMicros() - start_time_) / 1000;
+}
+
+uint64_t TransactionBaseImpl::GetNumPuts() const { return num_puts_; }
+
+uint64_t TransactionBaseImpl::GetNumDeletes() const { return num_deletes_; }
+
+uint64_t TransactionBaseImpl::GetNumMerges() const { return num_merges_; }
+
+uint64_t TransactionBaseImpl::GetNumKeys() const {
+  return tracked_locks_->GetNumPointLocks();
+}
+
+void TransactionBaseImpl::TrackKey(uint32_t cfh_id, const std::string& key,
+                                   SequenceNumber seq, bool read_only,
+                                   bool exclusive) {
+  PointLockRequest r;
+  r.column_family_id = cfh_id;
+  r.key = key;
+  r.seq = seq;
+  r.read_only = read_only;
+  r.exclusive = exclusive;
+
+  // Update map of all tracked keys for this transaction
+  tracked_locks_->Track(r);
+
+  if (save_points_ != nullptr && !save_points_->empty()) {
+    // Update map of tracked keys in this SavePoint
+    save_points_->top().new_locks_->Track(r);
+  }
+}
+
+// Gets the write batch that should be used for Put/Merge/Deletes.
+//
+// Returns either a WriteBatch or WriteBatchWithIndex depending on whether
+// DisableIndexing() has been called.
+WriteBatchBase* TransactionBaseImpl::GetBatchForWrite() {
+  if (indexing_enabled_) {
+    // Use WriteBatchWithIndex
+    return &write_batch_;
+  } else {
+    // Don't use WriteBatchWithIndex. Return base WriteBatch.
+    return write_batch_.GetWriteBatch();
+  }
+}
+
+void TransactionBaseImpl::ReleaseSnapshot(const Snapshot* snapshot, DB* db) {
+  if (snapshot != nullptr) {
+    ROCKS_LOG_DETAILS(dbimpl_->immutable_db_options().info_log,
+                      "ReleaseSnapshot %" PRIu64 " Set",
+                      snapshot->GetSequenceNumber());
+    db->ReleaseSnapshot(snapshot);
+  }
+}
+
+void TransactionBaseImpl::UndoGetForUpdate(ColumnFamilyHandle* column_family,
+                                           const Slice& key) {
+  PointLockRequest r;
+  r.column_family_id = GetColumnFamilyID(column_family);
+  r.key = key.ToString();
+  r.read_only = true;
+
+  bool can_untrack = false;
+  if (save_points_ != nullptr && !save_points_->empty()) {
+    // If there is no GetForUpdate of the key in this save point,
+    // then cannot untrack from the global lock tracker.
+    UntrackStatus s = save_points_->top().new_locks_->Untrack(r);
+    can_untrack = (s != UntrackStatus::NOT_TRACKED);
+  } else {
+    // No save point, so can untrack from the global lock tracker.
+    can_untrack = true;
+  }
+
+  if (can_untrack) {
+    // If erased from the global tracker, then can unlock the key.
+    UntrackStatus s = tracked_locks_->Untrack(r);
+    bool can_unlock = (s == UntrackStatus::REMOVED);
+    if (can_unlock) {
+      UnlockGetForUpdate(column_family, key);
+    }
+  }
+}
+
+Status TransactionBaseImpl::RebuildFromWriteBatch(WriteBatch* src_batch) {
+  struct IndexedWriteBatchBuilder : public WriteBatch::Handler {
+    Transaction* txn_;
+    DBImpl* db_;
+    IndexedWriteBatchBuilder(Transaction* txn, DBImpl* db)
+        : txn_(txn), db_(db) {
+      assert(dynamic_cast<TransactionBaseImpl*>(txn_) != nullptr);
+    }
+
+    Status PutCF(uint32_t cf, const Slice& key, const Slice& val) override {
+      return txn_->Put(db_->GetColumnFamilyHandle(cf), key, val);
+    }
+
+    Status DeleteCF(uint32_t cf, const Slice& key) override {
+      return txn_->Delete(db_->GetColumnFamilyHandle(cf), key);
+    }
+
+    Status SingleDeleteCF(uint32_t cf, const Slice& key) override {
+      return txn_->SingleDelete(db_->GetColumnFamilyHandle(cf), key);
+    }
+
+    Status MergeCF(uint32_t cf, const Slice& key, const Slice& val) override {
+      return txn_->Merge(db_->GetColumnFamilyHandle(cf), key, val);
+    }
+
+    // this is used for reconstructing prepared transactions upon
+    // recovery. there should not be any meta markers in the batches
+    // we are processing.
+    Status MarkBeginPrepare(bool) override { return Status::InvalidArgument(); }
+
+    Status MarkEndPrepare(const Slice&) override {
+      return Status::InvalidArgument();
+    }
+
+    Status MarkCommit(const Slice&) override {
+      return Status::InvalidArgument();
+    }
+
+    Status MarkCommitWithTimestamp(const Slice&, const Slice&) override {
+      return Status::InvalidArgument();
+    }
+
+    Status MarkRollback(const Slice&) override {
+      return Status::InvalidArgument();
+    }
+  };
+
+  IndexedWriteBatchBuilder copycat(this, dbimpl_);
+  return src_batch->Iterate(&copycat);
+}
+
+WriteBatch* TransactionBaseImpl::GetCommitTimeWriteBatch() {
+  return &commit_time_batch_;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_base.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_base.h
new file mode 100644
index 0000000000..bde09b6991
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_base.h
@@ -0,0 +1,382 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <stack>
+#include <string>
+#include <vector>
+
+#include "db/write_batch_internal.h"
+#include "rocksdb/db.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/snapshot.h"
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+#include "rocksdb/utilities/transaction.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "rocksdb/utilities/write_batch_with_index.h"
+#include "util/autovector.h"
+#include "utilities/transactions/lock/lock_tracker.h"
+#include "utilities/transactions/transaction_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class TransactionBaseImpl : public Transaction {
+ public:
+  TransactionBaseImpl(DB* db, const WriteOptions& write_options,
+                      const LockTrackerFactory& lock_tracker_factory);
+
+  ~TransactionBaseImpl() override;
+
+  // Remove pending operations queued in this transaction.
+  virtual void Clear();
+
+  void Reinitialize(DB* db, const WriteOptions& write_options);
+
+  // Called before executing Put, Merge, Delete, and GetForUpdate.  If TryLock
+  // returns non-OK, the Put/Merge/Delete/GetForUpdate will be failed.
+  // do_validate will be false if called from PutUntracked, DeleteUntracked,
+  // MergeUntracked, or GetForUpdate(do_validate=false)
+  virtual Status TryLock(ColumnFamilyHandle* column_family, const Slice& key,
+                         bool read_only, bool exclusive,
+                         const bool do_validate = true,
+                         const bool assume_tracked = false) = 0;
+
+  void SetSavePoint() override;
+
+  Status RollbackToSavePoint() override;
+
+  Status PopSavePoint() override;
+
+  using Transaction::Get;
+  Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
+             const Slice& key, std::string* value) override;
+
+  Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
+             const Slice& key, PinnableSlice* value) override;
+
+  Status Get(const ReadOptions& options, const Slice& key,
+             std::string* value) override {
+    return Get(options, db_->DefaultColumnFamily(), key, value);
+  }
+
+  using Transaction::GetForUpdate;
+  Status GetForUpdate(const ReadOptions& options,
+                      ColumnFamilyHandle* column_family, const Slice& key,
+                      std::string* value, bool exclusive,
+                      const bool do_validate) override;
+
+  Status GetForUpdate(const ReadOptions& options,
+                      ColumnFamilyHandle* column_family, const Slice& key,
+                      PinnableSlice* pinnable_val, bool exclusive,
+                      const bool do_validate) override;
+
+  Status GetForUpdate(const ReadOptions& options, const Slice& key,
+                      std::string* value, bool exclusive,
+                      const bool do_validate) override {
+    return GetForUpdate(options, db_->DefaultColumnFamily(), key, value,
+                        exclusive, do_validate);
+  }
+
+  using Transaction::MultiGet;
+  std::vector<Status> MultiGet(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_family,
+      const std::vector<Slice>& keys,
+      std::vector<std::string>* values) override;
+
+  std::vector<Status> MultiGet(const ReadOptions& options,
+                               const std::vector<Slice>& keys,
+                               std::vector<std::string>* values) override {
+    return MultiGet(options,
+                    std::vector<ColumnFamilyHandle*>(
+                        keys.size(), db_->DefaultColumnFamily()),
+                    keys, values);
+  }
+
+  void MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family,
+                const size_t num_keys, const Slice* keys, PinnableSlice* values,
+                Status* statuses, const bool sorted_input = false) override;
+
+  using Transaction::MultiGetForUpdate;
+  std::vector<Status> MultiGetForUpdate(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_family,
+      const std::vector<Slice>& keys,
+      std::vector<std::string>* values) override;
+
+  std::vector<Status> MultiGetForUpdate(
+      const ReadOptions& options, const std::vector<Slice>& keys,
+      std::vector<std::string>* values) override {
+    return MultiGetForUpdate(options,
+                             std::vector<ColumnFamilyHandle*>(
+                                 keys.size(), db_->DefaultColumnFamily()),
+                             keys, values);
+  }
+
+  Iterator* GetIterator(const ReadOptions& read_options) override;
+  Iterator* GetIterator(const ReadOptions& read_options,
+                        ColumnFamilyHandle* column_family) override;
+
+  Status Put(ColumnFamilyHandle* column_family, const Slice& key,
+             const Slice& value, const bool assume_tracked = false) override;
+  Status Put(const Slice& key, const Slice& value) override {
+    return Put(nullptr, key, value);
+  }
+
+  Status Put(ColumnFamilyHandle* column_family, const SliceParts& key,
+             const SliceParts& value,
+             const bool assume_tracked = false) override;
+  Status Put(const SliceParts& key, const SliceParts& value) override {
+    return Put(nullptr, key, value);
+  }
+
+  Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
+               const Slice& value, const bool assume_tracked = false) override;
+  Status Merge(const Slice& key, const Slice& value) override {
+    return Merge(nullptr, key, value);
+  }
+
+  Status Delete(ColumnFamilyHandle* column_family, const Slice& key,
+                const bool assume_tracked = false) override;
+  Status Delete(const Slice& key) override { return Delete(nullptr, key); }
+  Status Delete(ColumnFamilyHandle* column_family, const SliceParts& key,
+                const bool assume_tracked = false) override;
+  Status Delete(const SliceParts& key) override { return Delete(nullptr, key); }
+
+  Status SingleDelete(ColumnFamilyHandle* column_family, const Slice& key,
+                      const bool assume_tracked = false) override;
+  Status SingleDelete(const Slice& key) override {
+    return SingleDelete(nullptr, key);
+  }
+  Status SingleDelete(ColumnFamilyHandle* column_family, const SliceParts& key,
+                      const bool assume_tracked = false) override;
+  Status SingleDelete(const SliceParts& key) override {
+    return SingleDelete(nullptr, key);
+  }
+
+  Status PutUntracked(ColumnFamilyHandle* column_family, const Slice& key,
+                      const Slice& value) override;
+  Status PutUntracked(const Slice& key, const Slice& value) override {
+    return PutUntracked(nullptr, key, value);
+  }
+
+  Status PutUntracked(ColumnFamilyHandle* column_family, const SliceParts& key,
+                      const SliceParts& value) override;
+  Status PutUntracked(const SliceParts& key, const SliceParts& value) override {
+    return PutUntracked(nullptr, key, value);
+  }
+
+  Status MergeUntracked(ColumnFamilyHandle* column_family, const Slice& key,
+                        const Slice& value) override;
+  Status MergeUntracked(const Slice& key, const Slice& value) override {
+    return MergeUntracked(nullptr, key, value);
+  }
+
+  Status DeleteUntracked(ColumnFamilyHandle* column_family,
+                         const Slice& key) override;
+  Status DeleteUntracked(const Slice& key) override {
+    return DeleteUntracked(nullptr, key);
+  }
+  Status DeleteUntracked(ColumnFamilyHandle* column_family,
+                         const SliceParts& key) override;
+  Status DeleteUntracked(const SliceParts& key) override {
+    return DeleteUntracked(nullptr, key);
+  }
+
+  Status SingleDeleteUntracked(ColumnFamilyHandle* column_family,
+                               const Slice& key) override;
+  Status SingleDeleteUntracked(const Slice& key) override {
+    return SingleDeleteUntracked(nullptr, key);
+  }
+
+  void PutLogData(const Slice& blob) override;
+
+  WriteBatchWithIndex* GetWriteBatch() override;
+
+  virtual void SetLockTimeout(int64_t /*timeout*/) override { /* Do nothing */
+  }
+
+  const Snapshot* GetSnapshot() const override {
+    // will return nullptr when there is no snapshot
+    return snapshot_.get();
+  }
+
+  std::shared_ptr<const Snapshot> GetTimestampedSnapshot() const override {
+    return snapshot_;
+  }
+
+  virtual void SetSnapshot() override;
+  void SetSnapshotOnNextOperation(
+      std::shared_ptr<TransactionNotifier> notifier = nullptr) override;
+
+  void ClearSnapshot() override {
+    snapshot_.reset();
+    snapshot_needed_ = false;
+    snapshot_notifier_ = nullptr;
+  }
+
+  void DisableIndexing() override { indexing_enabled_ = false; }
+
+  void EnableIndexing() override { indexing_enabled_ = true; }
+
+  bool IndexingEnabled() const { return indexing_enabled_; }
+
+  uint64_t GetElapsedTime() const override;
+
+  uint64_t GetNumPuts() const override;
+
+  uint64_t GetNumDeletes() const override;
+
+  uint64_t GetNumMerges() const override;
+
+  uint64_t GetNumKeys() const override;
+
+  void UndoGetForUpdate(ColumnFamilyHandle* column_family,
+                        const Slice& key) override;
+  void UndoGetForUpdate(const Slice& key) override {
+    return UndoGetForUpdate(nullptr, key);
+  };
+
+  WriteOptions* GetWriteOptions() override { return &write_options_; }
+
+  void SetWriteOptions(const WriteOptions& write_options) override {
+    write_options_ = write_options;
+  }
+
+  // Used for memory management for snapshot_
+  void ReleaseSnapshot(const Snapshot* snapshot, DB* db);
+
+  // iterates over the given batch and makes the appropriate inserts.
+  // used for rebuilding prepared transactions after recovery.
+  virtual Status RebuildFromWriteBatch(WriteBatch* src_batch) override;
+
+  WriteBatch* GetCommitTimeWriteBatch() override;
+
+  LockTracker& GetTrackedLocks() { return *tracked_locks_; }
+
+ protected:
+  // Add a key to the list of tracked keys.
+  //
+  // seqno is the earliest seqno this key was involved with this transaction.
+  // readonly should be set to true if no data was written for this key
+  void TrackKey(uint32_t cfh_id, const std::string& key, SequenceNumber seqno,
+                bool readonly, bool exclusive);
+
+  // Called when UndoGetForUpdate determines that this key can be unlocked.
+  virtual void UnlockGetForUpdate(ColumnFamilyHandle* column_family,
+                                  const Slice& key) = 0;
+
+  // Sets a snapshot if SetSnapshotOnNextOperation() has been called.
+  void SetSnapshotIfNeeded();
+
+  // Initialize write_batch_ for 2PC by inserting Noop.
+  inline void InitWriteBatch(bool clear = false) {
+    if (clear) {
+      write_batch_.Clear();
+    }
+    assert(write_batch_.GetDataSize() == WriteBatchInternal::kHeader);
+    auto s = WriteBatchInternal::InsertNoop(write_batch_.GetWriteBatch());
+    assert(s.ok());
+  }
+
+  WriteBatchBase* GetBatchForWrite();
+
+  DB* db_;
+  DBImpl* dbimpl_;
+
+  WriteOptions write_options_;
+
+  const Comparator* cmp_;
+
+  const LockTrackerFactory& lock_tracker_factory_;
+
+  // Stores that time the txn was constructed, in microseconds.
+  uint64_t start_time_;
+
+  // Stores the current snapshot that was set by SetSnapshot or null if
+  // no snapshot is currently set.
+  std::shared_ptr<const Snapshot> snapshot_;
+
+  // Count of various operations pending in this transaction
+  uint64_t num_puts_ = 0;
+  uint64_t num_deletes_ = 0;
+  uint64_t num_merges_ = 0;
+
+  struct SavePoint {
+    std::shared_ptr<const Snapshot> snapshot_;
+    bool snapshot_needed_ = false;
+    std::shared_ptr<TransactionNotifier> snapshot_notifier_;
+    uint64_t num_puts_ = 0;
+    uint64_t num_deletes_ = 0;
+    uint64_t num_merges_ = 0;
+
+    // Record all locks tracked since the last savepoint
+    std::shared_ptr<LockTracker> new_locks_;
+
+    SavePoint(std::shared_ptr<const Snapshot> snapshot, bool snapshot_needed,
+              std::shared_ptr<TransactionNotifier> snapshot_notifier,
+              uint64_t num_puts, uint64_t num_deletes, uint64_t num_merges,
+              const LockTrackerFactory& lock_tracker_factory)
+        : snapshot_(snapshot),
+          snapshot_needed_(snapshot_needed),
+          snapshot_notifier_(snapshot_notifier),
+          num_puts_(num_puts),
+          num_deletes_(num_deletes),
+          num_merges_(num_merges),
+          new_locks_(lock_tracker_factory.Create()) {}
+
+    explicit SavePoint(const LockTrackerFactory& lock_tracker_factory)
+        : new_locks_(lock_tracker_factory.Create()) {}
+  };
+
+  // Records writes pending in this transaction
+  WriteBatchWithIndex write_batch_;
+
+  // For Pessimistic Transactions this is the set of acquired locks.
+  // Optimistic Transactions will keep note the requested locks (not actually
+  // locked), and do conflict checking until commit time based on the tracked
+  // lock requests.
+  std::unique_ptr<LockTracker> tracked_locks_;
+
+  // Stack of the Snapshot saved at each save point. Saved snapshots may be
+  // nullptr if there was no snapshot at the time SetSavePoint() was called.
+  std::unique_ptr<std::stack<TransactionBaseImpl::SavePoint,
+                             autovector<TransactionBaseImpl::SavePoint>>>
+      save_points_;
+
+ private:
+  friend class WriteCommittedTxn;
+  friend class WritePreparedTxn;
+
+  // Extra data to be persisted with the commit. Note this is only used when
+  // prepare phase is not skipped.
+  WriteBatch commit_time_batch_;
+
+  // If true, future Put/Merge/Deletes will be indexed in the
+  // WriteBatchWithIndex.
+  // If false, future Put/Merge/Deletes will be inserted directly into the
+  // underlying WriteBatch and not indexed in the WriteBatchWithIndex.
+  bool indexing_enabled_;
+
+  // SetSnapshotOnNextOperation() has been called and the snapshot has not yet
+  // been reset.
+  bool snapshot_needed_ = false;
+
+  // SetSnapshotOnNextOperation() has been called and the caller would like
+  // a notification through the TransactionNotifier interface
+  std::shared_ptr<TransactionNotifier> snapshot_notifier_ = nullptr;
+
+  Status TryLock(ColumnFamilyHandle* column_family, const SliceParts& key,
+                 bool read_only, bool exclusive, const bool do_validate = true,
+                 const bool assume_tracked = false);
+
+  void SetSnapshotInternal(const Snapshot* snapshot);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_db_mutex_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_db_mutex_impl.cc
new file mode 100644
index 0000000000..52c299b376
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_db_mutex_impl.cc
@@ -0,0 +1,133 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/transactions/transaction_db_mutex_impl.h"
+
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <mutex>
+
+#include "rocksdb/utilities/transaction_db_mutex.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class TransactionDBMutexImpl : public TransactionDBMutex {
+ public:
+  TransactionDBMutexImpl() {}
+  ~TransactionDBMutexImpl() override {}
+
+  Status Lock() override;
+
+  Status TryLockFor(int64_t timeout_time) override;
+
+  void UnLock() override { mutex_.unlock(); }
+
+  friend class TransactionDBCondVarImpl;
+
+ private:
+  std::mutex mutex_;
+};
+
+class TransactionDBCondVarImpl : public TransactionDBCondVar {
+ public:
+  TransactionDBCondVarImpl() {}
+  ~TransactionDBCondVarImpl() override {}
+
+  Status Wait(std::shared_ptr<TransactionDBMutex> mutex) override;
+
+  Status WaitFor(std::shared_ptr<TransactionDBMutex> mutex,
+                 int64_t timeout_time) override;
+
+  void Notify() override { cv_.notify_one(); }
+
+  void NotifyAll() override { cv_.notify_all(); }
+
+ private:
+  std::condition_variable cv_;
+};
+
+std::shared_ptr<TransactionDBMutex>
+TransactionDBMutexFactoryImpl::AllocateMutex() {
+  return std::shared_ptr<TransactionDBMutex>(new TransactionDBMutexImpl());
+}
+
+std::shared_ptr<TransactionDBCondVar>
+TransactionDBMutexFactoryImpl::AllocateCondVar() {
+  return std::shared_ptr<TransactionDBCondVar>(new TransactionDBCondVarImpl());
+}
+
+Status TransactionDBMutexImpl::Lock() {
+  mutex_.lock();
+  return Status::OK();
+}
+
+Status TransactionDBMutexImpl::TryLockFor(int64_t timeout_time) {
+  bool locked = true;
+
+  if (timeout_time == 0) {
+    locked = mutex_.try_lock();
+  } else {
+    // Previously, this code used a std::timed_mutex.  However, this was changed
+    // due to known bugs in gcc versions < 4.9.
+    // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54562
+    //
+    // Since this mutex isn't held for long and only a single mutex is ever
+    // held at a time, it is reasonable to ignore the lock timeout_time here
+    // and only check it when waiting on the condition_variable.
+    mutex_.lock();
+  }
+
+  if (!locked) {
+    // timeout acquiring mutex
+    return Status::TimedOut(Status::SubCode::kMutexTimeout);
+  }
+
+  return Status::OK();
+}
+
+Status TransactionDBCondVarImpl::Wait(
+    std::shared_ptr<TransactionDBMutex> mutex) {
+  auto mutex_impl = reinterpret_cast<TransactionDBMutexImpl*>(mutex.get());
+
+  std::unique_lock<std::mutex> lock(mutex_impl->mutex_, std::adopt_lock);
+  cv_.wait(lock);
+
+  // Make sure unique_lock doesn't unlock mutex when it destructs
+  lock.release();
+
+  return Status::OK();
+}
+
+Status TransactionDBCondVarImpl::WaitFor(
+    std::shared_ptr<TransactionDBMutex> mutex, int64_t timeout_time) {
+  Status s;
+
+  auto mutex_impl = reinterpret_cast<TransactionDBMutexImpl*>(mutex.get());
+  std::unique_lock<std::mutex> lock(mutex_impl->mutex_, std::adopt_lock);
+
+  if (timeout_time < 0) {
+    // If timeout is negative, do not use a timeout
+    cv_.wait(lock);
+  } else {
+    auto duration = std::chrono::microseconds(timeout_time);
+    auto cv_status = cv_.wait_for(lock, duration);
+
+    // Check if the wait stopped due to timing out.
+    if (cv_status == std::cv_status::timeout) {
+      s = Status::TimedOut(Status::SubCode::kMutexTimeout);
+    }
+  }
+
+  // Make sure unique_lock doesn't unlock mutex when it destructs
+  lock.release();
+
+  // CV was signaled, or we spuriously woke up (but didn't time out)
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_db_mutex_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_db_mutex_impl.h
new file mode 100644
index 0000000000..f509f4017a
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_db_mutex_impl.h
@@ -0,0 +1,24 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/utilities/transaction_db_mutex.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class TransactionDBMutex;
+class TransactionDBCondVar;
+
+// Default implementation of TransactionDBMutexFactory.  May be overridden
+// by TransactionDBOptions.custom_mutex_factory.
+class TransactionDBMutexFactoryImpl : public TransactionDBMutexFactory {
+ public:
+  std::shared_ptr<TransactionDBMutex> AllocateMutex() override;
+  std::shared_ptr<TransactionDBCondVar> AllocateCondVar() override;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_test.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_test.h
new file mode 100644
index 0000000000..0b86453a40
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_test.h
@@ -0,0 +1,578 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <algorithm>
+#include <cinttypes>
+#include <functional>
+#include <string>
+#include <thread>
+
+#include "db/db_impl/db_impl.h"
+#include "db/db_test_util.h"
+#include "port/port.h"
+#include "rocksdb/db.h"
+#include "rocksdb/options.h"
+#include "rocksdb/utilities/transaction.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "table/mock_table.h"
+#include "test_util/sync_point.h"
+#include "test_util/testharness.h"
+#include "test_util/testutil.h"
+#include "test_util/transaction_test_util.h"
+#include "util/random.h"
+#include "util/string_util.h"
+#include "utilities/fault_injection_env.h"
+#include "utilities/merge_operators.h"
+#include "utilities/merge_operators/string_append/stringappend.h"
+#include "utilities/transactions/pessimistic_transaction_db.h"
+#include "utilities/transactions/write_unprepared_txn_db.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Return true if the ith bit is set in combination represented by comb
+bool IsInCombination(size_t i, size_t comb) { return comb & (size_t(1) << i); }
+
+enum WriteOrdering : bool { kOrderedWrite, kUnorderedWrite };
+
+class TransactionTestBase : public ::testing::Test {
+ public:
+  TransactionDB* db;
+  SpecialEnv special_env;
+  FaultInjectionTestEnv* env;
+  std::string dbname;
+  Options options;
+
+  TransactionDBOptions txn_db_options;
+  bool use_stackable_db_;
+
+  TransactionTestBase(bool use_stackable_db, bool two_write_queue,
+                      TxnDBWritePolicy write_policy,
+                      WriteOrdering write_ordering)
+      : db(nullptr),
+        special_env(Env::Default()),
+        env(nullptr),
+        use_stackable_db_(use_stackable_db) {
+    options.create_if_missing = true;
+    options.max_write_buffer_number = 2;
+    options.write_buffer_size = 4 * 1024;
+    options.unordered_write = write_ordering == kUnorderedWrite;
+    options.level0_file_num_compaction_trigger = 2;
+    options.merge_operator = MergeOperators::CreateFromStringId("stringappend");
+    special_env.skip_fsync_ = true;
+    env = new FaultInjectionTestEnv(&special_env);
+    options.env = env;
+    options.two_write_queues = two_write_queue;
+    dbname = test::PerThreadDBPath("transaction_testdb");
+
+    EXPECT_OK(DestroyDB(dbname, options));
+    txn_db_options.transaction_lock_timeout = 0;
+    txn_db_options.default_lock_timeout = 0;
+    txn_db_options.write_policy = write_policy;
+    txn_db_options.rollback_merge_operands = true;
+    // This will stress write unprepared, by forcing write batch flush on every
+    // write.
+    txn_db_options.default_write_batch_flush_threshold = 1;
+    // Write unprepared requires all transactions to be named. This setting
+    // autogenerates the name so that existing tests can pass.
+    txn_db_options.autogenerate_name = true;
+    Status s;
+    if (use_stackable_db == false) {
+      s = TransactionDB::Open(options, txn_db_options, dbname, &db);
+    } else {
+      s = OpenWithStackableDB();
+    }
+    EXPECT_OK(s);
+  }
+
+  ~TransactionTestBase() {
+    delete db;
+    db = nullptr;
+    // This is to skip the assert statement in FaultInjectionTestEnv. There
+    // seems to be a bug in btrfs that the makes readdir return recently
+    // unlink-ed files. By using the default fs we simply ignore errors resulted
+    // from attempting to delete such files in DestroyDB.
+    if (getenv("KEEP_DB") == nullptr) {
+      options.env = Env::Default();
+      EXPECT_OK(DestroyDB(dbname, options));
+    } else {
+      fprintf(stdout, "db is still in %s\n", dbname.c_str());
+    }
+    delete env;
+  }
+
+  Status ReOpenNoDelete() {
+    delete db;
+    db = nullptr;
+    env->AssertNoOpenFile();
+    env->DropUnsyncedFileData();
+    env->ResetState();
+    Status s;
+    if (use_stackable_db_ == false) {
+      s = TransactionDB::Open(options, txn_db_options, dbname, &db);
+    } else {
+      s = OpenWithStackableDB();
+    }
+    assert(!s.ok() || db != nullptr);
+    return s;
+  }
+
+  Status ReOpenNoDelete(std::vector<ColumnFamilyDescriptor>& cfs,
+                        std::vector<ColumnFamilyHandle*>* handles) {
+    for (auto h : *handles) {
+      delete h;
+    }
+    handles->clear();
+    delete db;
+    db = nullptr;
+    env->AssertNoOpenFile();
+    env->DropUnsyncedFileData();
+    env->ResetState();
+    Status s;
+    if (use_stackable_db_ == false) {
+      s = TransactionDB::Open(options, txn_db_options, dbname, cfs, handles,
+                              &db);
+    } else {
+      s = OpenWithStackableDB(cfs, handles);
+    }
+    assert(!s.ok() || db != nullptr);
+    return s;
+  }
+
+  Status ReOpen() {
+    delete db;
+    db = nullptr;
+    DestroyDB(dbname, options);
+    Status s;
+    if (use_stackable_db_ == false) {
+      s = TransactionDB::Open(options, txn_db_options, dbname, &db);
+    } else {
+      s = OpenWithStackableDB();
+    }
+    assert(db != nullptr);
+    return s;
+  }
+
+  Status OpenWithStackableDB(std::vector<ColumnFamilyDescriptor>& cfs,
+                             std::vector<ColumnFamilyHandle*>* handles) {
+    std::vector<size_t> compaction_enabled_cf_indices;
+    TransactionDB::PrepareWrap(&options, &cfs, &compaction_enabled_cf_indices);
+    DB* root_db = nullptr;
+    Options options_copy(options);
+    const bool use_seq_per_batch =
+        txn_db_options.write_policy == WRITE_PREPARED ||
+        txn_db_options.write_policy == WRITE_UNPREPARED;
+    const bool use_batch_per_txn =
+        txn_db_options.write_policy == WRITE_COMMITTED ||
+        txn_db_options.write_policy == WRITE_PREPARED;
+    Status s = DBImpl::Open(options_copy, dbname, cfs, handles, &root_db,
+                            use_seq_per_batch, use_batch_per_txn);
+    auto stackable_db = std::make_unique<StackableDB>(root_db);
+    if (s.ok()) {
+      assert(root_db != nullptr);
+      // If WrapStackableDB() returns non-ok, then stackable_db is already
+      // deleted within WrapStackableDB().
+      s = TransactionDB::WrapStackableDB(stackable_db.release(), txn_db_options,
+                                         compaction_enabled_cf_indices,
+                                         *handles, &db);
+    }
+    return s;
+  }
+
+  Status OpenWithStackableDB() {
+    std::vector<size_t> compaction_enabled_cf_indices;
+    std::vector<ColumnFamilyDescriptor> column_families{ColumnFamilyDescriptor(
+        kDefaultColumnFamilyName, ColumnFamilyOptions(options))};
+
+    TransactionDB::PrepareWrap(&options, &column_families,
+                               &compaction_enabled_cf_indices);
+    std::vector<ColumnFamilyHandle*> handles;
+    DB* root_db = nullptr;
+    Options options_copy(options);
+    const bool use_seq_per_batch =
+        txn_db_options.write_policy == WRITE_PREPARED ||
+        txn_db_options.write_policy == WRITE_UNPREPARED;
+    const bool use_batch_per_txn =
+        txn_db_options.write_policy == WRITE_COMMITTED ||
+        txn_db_options.write_policy == WRITE_PREPARED;
+    Status s = DBImpl::Open(options_copy, dbname, column_families, &handles,
+                            &root_db, use_seq_per_batch, use_batch_per_txn);
+    if (!s.ok()) {
+      delete root_db;
+      return s;
+    }
+    StackableDB* stackable_db = new StackableDB(root_db);
+    assert(root_db != nullptr);
+    assert(handles.size() == 1);
+    s = TransactionDB::WrapStackableDB(stackable_db, txn_db_options,
+                                       compaction_enabled_cf_indices, handles,
+                                       &db);
+    delete handles[0];
+    if (!s.ok()) {
+      delete stackable_db;
+    }
+    return s;
+  }
+
+  std::atomic<size_t> linked = {0};
+  std::atomic<size_t> exp_seq = {0};
+  std::atomic<size_t> commit_writes = {0};
+  std::atomic<size_t> expected_commits = {0};
+  // Without Prepare, the commit does not write to WAL
+  std::atomic<size_t> with_empty_commits = {0};
+  void TestTxn0(size_t index) {
+    // Test DB's internal txn. It involves no prepare phase nor a commit marker.
+    auto s = db->Put(WriteOptions(), "key" + std::to_string(index), "value");
+    ASSERT_OK(s);
+    if (txn_db_options.write_policy == TxnDBWritePolicy::WRITE_COMMITTED) {
+      // Consume one seq per key
+      exp_seq++;
+    } else {
+      // Consume one seq per batch
+      exp_seq++;
+      if (options.two_write_queues) {
+        // Consume one seq for commit
+        exp_seq++;
+      }
+    }
+    with_empty_commits++;
+  }
+
+  void TestTxn1(size_t index) {
+    // Testing directly writing a write batch. Functionality-wise it is
+    // equivalent to commit without prepare.
+    WriteBatch wb;
+    auto istr = std::to_string(index);
+    ASSERT_OK(wb.Put("k1" + istr, "v1"));
+    ASSERT_OK(wb.Put("k2" + istr, "v2"));
+    ASSERT_OK(wb.Put("k3" + istr, "v3"));
+    auto s = db->Write(WriteOptions(), &wb);
+    if (txn_db_options.write_policy == TxnDBWritePolicy::WRITE_COMMITTED) {
+      // Consume one seq per key
+      exp_seq += 3;
+    } else {
+      // Consume one seq per batch
+      exp_seq++;
+      if (options.two_write_queues) {
+        // Consume one seq for commit
+        exp_seq++;
+      }
+    }
+    ASSERT_OK(s);
+    with_empty_commits++;
+  }
+
+  void TestTxn2(size_t index) {
+    // Commit without prepare. It should write to DB without a commit marker.
+    Transaction* txn =
+        db->BeginTransaction(WriteOptions(), TransactionOptions());
+    auto istr = std::to_string(index);
+    ASSERT_OK(txn->SetName("xid" + istr));
+    ASSERT_OK(txn->Put(Slice("foo" + istr), Slice("bar")));
+    ASSERT_OK(txn->Put(Slice("foo2" + istr), Slice("bar2")));
+    ASSERT_OK(txn->Put(Slice("foo3" + istr), Slice("bar3")));
+    ASSERT_OK(txn->Put(Slice("foo4" + istr), Slice("bar4")));
+    ASSERT_OK(txn->Commit());
+    if (txn_db_options.write_policy == TxnDBWritePolicy::WRITE_COMMITTED) {
+      // Consume one seq per key
+      exp_seq += 4;
+    } else if (txn_db_options.write_policy ==
+               TxnDBWritePolicy::WRITE_PREPARED) {
+      // Consume one seq per batch
+      exp_seq++;
+      if (options.two_write_queues) {
+        // Consume one seq for commit
+        exp_seq++;
+      }
+    } else {
+      // Flushed after each key, consume one seq per flushed batch
+      exp_seq += 4;
+      // WriteUnprepared implements CommitWithoutPrepareInternal by simply
+      // calling Prepare then Commit. Consume one seq for the prepare.
+      exp_seq++;
+    }
+    delete txn;
+    with_empty_commits++;
+  }
+
+  void TestTxn3(size_t index) {
+    // A full 2pc txn that also involves a commit marker.
+    Transaction* txn =
+        db->BeginTransaction(WriteOptions(), TransactionOptions());
+    auto istr = std::to_string(index);
+    ASSERT_OK(txn->SetName("xid" + istr));
+    ASSERT_OK(txn->Put(Slice("foo" + istr), Slice("bar")));
+    ASSERT_OK(txn->Put(Slice("foo2" + istr), Slice("bar2")));
+    ASSERT_OK(txn->Put(Slice("foo3" + istr), Slice("bar3")));
+    ASSERT_OK(txn->Put(Slice("foo4" + istr), Slice("bar4")));
+    ASSERT_OK(txn->Put(Slice("foo5" + istr), Slice("bar5")));
+    expected_commits++;
+    ASSERT_OK(txn->Prepare());
+    commit_writes++;
+    ASSERT_OK(txn->Commit());
+    if (txn_db_options.write_policy == TxnDBWritePolicy::WRITE_COMMITTED) {
+      // Consume one seq per key
+      exp_seq += 5;
+    } else if (txn_db_options.write_policy ==
+               TxnDBWritePolicy::WRITE_PREPARED) {
+      // Consume one seq per batch
+      exp_seq++;
+      // Consume one seq per commit marker
+      exp_seq++;
+    } else {
+      // Flushed after each key, consume one seq per flushed batch
+      exp_seq += 5;
+      // Consume one seq per commit marker
+      exp_seq++;
+    }
+    delete txn;
+  }
+
+  void TestTxn4(size_t index) {
+    // A full 2pc txn that also involves a commit marker.
+    Transaction* txn =
+        db->BeginTransaction(WriteOptions(), TransactionOptions());
+    auto istr = std::to_string(index);
+    ASSERT_OK(txn->SetName("xid" + istr));
+    ASSERT_OK(txn->Put(Slice("foo" + istr), Slice("bar")));
+    ASSERT_OK(txn->Put(Slice("foo2" + istr), Slice("bar2")));
+    ASSERT_OK(txn->Put(Slice("foo3" + istr), Slice("bar3")));
+    ASSERT_OK(txn->Put(Slice("foo4" + istr), Slice("bar4")));
+    ASSERT_OK(txn->Put(Slice("foo5" + istr), Slice("bar5")));
+    expected_commits++;
+    ASSERT_OK(txn->Prepare());
+    commit_writes++;
+    ASSERT_OK(txn->Rollback());
+    if (txn_db_options.write_policy == TxnDBWritePolicy::WRITE_COMMITTED) {
+      // No seq is consumed for deleting the txn buffer
+      exp_seq += 0;
+    } else if (txn_db_options.write_policy ==
+               TxnDBWritePolicy::WRITE_PREPARED) {
+      // Consume one seq per batch
+      exp_seq++;
+      // Consume one seq per rollback batch
+      exp_seq++;
+      if (options.two_write_queues) {
+        // Consume one seq for rollback commit
+        exp_seq++;
+      }
+    } else {
+      // Flushed after each key, consume one seq per flushed batch
+      exp_seq += 5;
+      // Consume one seq per rollback batch
+      exp_seq++;
+      if (options.two_write_queues) {
+        // Consume one seq for rollback commit
+        exp_seq++;
+      }
+    }
+    delete txn;
+  }
+
+  // Test that we can change write policy after a clean shutdown (which would
+  // empty the WAL)
+  void CrossCompatibilityTest(TxnDBWritePolicy from_policy,
+                              TxnDBWritePolicy to_policy, bool empty_wal) {
+    TransactionOptions txn_options;
+    ReadOptions read_options;
+    WriteOptions write_options;
+    uint32_t index = 0;
+    Random rnd(1103);
+    options.write_buffer_size = 1024;  // To create more sst files
+    std::unordered_map<std::string, std::string> committed_kvs;
+    Transaction* txn;
+
+    txn_db_options.write_policy = from_policy;
+    if (txn_db_options.write_policy == WRITE_COMMITTED) {
+      options.unordered_write = false;
+    }
+    ASSERT_OK(ReOpen());
+
+    for (int i = 0; i < 1024; i++) {
+      auto istr = std::to_string(index);
+      auto k = Slice("foo-" + istr).ToString();
+      auto v = Slice("bar-" + istr).ToString();
+      // For test the duplicate keys
+      auto v2 = Slice("bar2-" + istr).ToString();
+      auto type = rnd.Uniform(4);
+      switch (type) {
+        case 0:
+          committed_kvs[k] = v;
+          ASSERT_OK(db->Put(write_options, k, v));
+          committed_kvs[k] = v2;
+          ASSERT_OK(db->Put(write_options, k, v2));
+          break;
+        case 1: {
+          WriteBatch wb;
+          committed_kvs[k] = v;
+          ASSERT_OK(wb.Put(k, v));
+          committed_kvs[k] = v2;
+          ASSERT_OK(wb.Put(k, v2));
+          ASSERT_OK(db->Write(write_options, &wb));
+
+        } break;
+        case 2:
+        case 3:
+          txn = db->BeginTransaction(write_options, txn_options);
+          ASSERT_OK(txn->SetName("xid" + istr));
+          committed_kvs[k] = v;
+          ASSERT_OK(txn->Put(k, v));
+          committed_kvs[k] = v2;
+          ASSERT_OK(txn->Put(k, v2));
+
+          if (type == 3) {
+            ASSERT_OK(txn->Prepare());
+          }
+          ASSERT_OK(txn->Commit());
+          delete txn;
+          break;
+        default:
+          FAIL();
+      }
+
+      index++;
+    }  // for i
+
+    txn_db_options.write_policy = to_policy;
+    if (txn_db_options.write_policy == WRITE_COMMITTED) {
+      options.unordered_write = false;
+    }
+    auto db_impl = static_cast_with_check<DBImpl>(db->GetRootDB());
+    // Before upgrade/downgrade the WAL must be emptied
+    if (empty_wal) {
+      ASSERT_OK(db_impl->TEST_FlushMemTable());
+    } else {
+      ASSERT_OK(db_impl->FlushWAL(true));
+    }
+    auto s = ReOpenNoDelete();
+    if (empty_wal) {
+      ASSERT_OK(s);
+    } else {
+      // Test that we can detect the WAL that is produced by an incompatible
+      // WritePolicy and fail fast before mis-interpreting the WAL.
+      ASSERT_TRUE(s.IsNotSupported());
+      return;
+    }
+    db_impl = static_cast_with_check<DBImpl>(db->GetRootDB());
+    // Check that WAL is empty
+    VectorLogPtr log_files;
+    ASSERT_OK(db_impl->GetSortedWalFiles(log_files));
+    ASSERT_EQ(0, log_files.size());
+
+    for (auto& kv : committed_kvs) {
+      std::string value;
+      s = db->Get(read_options, kv.first, &value);
+      if (s.IsNotFound()) {
+        printf("key = %s\n", kv.first.c_str());
+      }
+      ASSERT_OK(s);
+      if (kv.second != value) {
+        printf("key = %s\n", kv.first.c_str());
+      }
+      ASSERT_EQ(kv.second, value);
+    }
+  }
+};
+
+class TransactionTest
+    : public TransactionTestBase,
+      virtual public ::testing::WithParamInterface<
+          std::tuple<bool, bool, TxnDBWritePolicy, WriteOrdering>> {
+ public:
+  TransactionTest()
+      : TransactionTestBase(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                            std::get<2>(GetParam()), std::get<3>(GetParam())){};
+};
+
+class TransactionStressTest : public TransactionTest {};
+
+class MySQLStyleTransactionTest
+    : public TransactionTestBase,
+      virtual public ::testing::WithParamInterface<
+          std::tuple<bool, bool, TxnDBWritePolicy, WriteOrdering, bool>> {
+ public:
+  MySQLStyleTransactionTest()
+      : TransactionTestBase(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                            std::get<2>(GetParam()), std::get<3>(GetParam())),
+        with_slow_threads_(std::get<4>(GetParam())) {
+    if (with_slow_threads_ &&
+        (txn_db_options.write_policy == WRITE_PREPARED ||
+         txn_db_options.write_policy == WRITE_UNPREPARED)) {
+      // The corner case with slow threads involves the caches filling
+      // over which would not happen even with artifial delays. To help
+      // such cases to show up we lower the size of the cache-related data
+      // structures.
+      txn_db_options.wp_snapshot_cache_bits = 1;
+      txn_db_options.wp_commit_cache_bits = 10;
+      options.write_buffer_size = 1024;
+      EXPECT_OK(ReOpen());
+    }
+  };
+
+ protected:
+  // Also emulate slow threads by addin artiftial delays
+  const bool with_slow_threads_;
+};
+
+class WriteCommittedTxnWithTsTest
+    : public TransactionTestBase,
+      public ::testing::WithParamInterface<std::tuple<bool, bool, bool>> {
+ public:
+  WriteCommittedTxnWithTsTest()
+      : TransactionTestBase(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                            WRITE_COMMITTED, kOrderedWrite) {}
+  ~WriteCommittedTxnWithTsTest() override {
+    for (auto* h : handles_) {
+      delete h;
+    }
+  }
+
+  Status GetFromDb(ReadOptions read_opts, ColumnFamilyHandle* column_family,
+                   const Slice& key, TxnTimestamp ts, std::string* value) {
+    std::string ts_buf;
+    PutFixed64(&ts_buf, ts);
+    Slice ts_slc = ts_buf;
+    read_opts.timestamp = &ts_slc;
+    assert(db);
+    return db->Get(read_opts, column_family, key, value);
+  }
+
+  Transaction* NewTxn(WriteOptions write_opts, TransactionOptions txn_opts) {
+    assert(db);
+    auto* txn = db->BeginTransaction(write_opts, txn_opts);
+    assert(txn);
+    const bool enable_indexing = std::get<2>(GetParam());
+    if (enable_indexing) {
+      txn->EnableIndexing();
+    } else {
+      txn->DisableIndexing();
+    }
+    return txn;
+  }
+
+ protected:
+  std::vector<ColumnFamilyHandle*> handles_{};
+};
+
+class TimestampedSnapshotWithTsSanityCheck
+    : public TransactionTestBase,
+      public ::testing::WithParamInterface<
+          std::tuple<bool, bool, TxnDBWritePolicy, WriteOrdering>> {
+ public:
+  explicit TimestampedSnapshotWithTsSanityCheck()
+      : TransactionTestBase(std::get<0>(GetParam()), std::get<1>(GetParam()),
+                            std::get<2>(GetParam()), std::get<3>(GetParam())) {}
+  ~TimestampedSnapshotWithTsSanityCheck() override {
+    for (auto* h : handles_) {
+      delete h;
+    }
+  }
+
+ protected:
+  std::vector<ColumnFamilyHandle*> handles_{};
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_util.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_util.cc
new file mode 100644
index 0000000000..d5a68807e7
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_util.cc
@@ -0,0 +1,204 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/transactions/transaction_util.h"
+
+#include <cinttypes>
+#include <string>
+#include <vector>
+
+#include "db/db_impl/db_impl.h"
+#include "rocksdb/status.h"
+#include "rocksdb/utilities/write_batch_with_index.h"
+#include "util/cast_util.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status TransactionUtil::CheckKeyForConflicts(
+    DBImpl* db_impl, ColumnFamilyHandle* column_family, const std::string& key,
+    SequenceNumber snap_seq, const std::string* const read_ts, bool cache_only,
+    ReadCallback* snap_checker, SequenceNumber min_uncommitted) {
+  Status result;
+
+  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
+  auto cfd = cfh->cfd();
+  SuperVersion* sv = db_impl->GetAndRefSuperVersion(cfd);
+
+  if (sv == nullptr) {
+    result = Status::InvalidArgument("Could not access column family " +
+                                     cfh->GetName());
+  }
+
+  if (result.ok()) {
+    SequenceNumber earliest_seq =
+        db_impl->GetEarliestMemTableSequenceNumber(sv, true);
+
+    result = CheckKey(db_impl, sv, earliest_seq, snap_seq, key, read_ts,
+                      cache_only, snap_checker, min_uncommitted);
+
+    db_impl->ReturnAndCleanupSuperVersion(cfd, sv);
+  }
+
+  return result;
+}
+
+Status TransactionUtil::CheckKey(DBImpl* db_impl, SuperVersion* sv,
+                                 SequenceNumber earliest_seq,
+                                 SequenceNumber snap_seq,
+                                 const std::string& key,
+                                 const std::string* const read_ts,
+                                 bool cache_only, ReadCallback* snap_checker,
+                                 SequenceNumber min_uncommitted) {
+  // When `min_uncommitted` is provided, keys are not always committed
+  // in sequence number order, and `snap_checker` is used to check whether
+  // specific sequence number is in the database is visible to the transaction.
+  // So `snap_checker` must be provided.
+  assert(min_uncommitted == kMaxSequenceNumber || snap_checker != nullptr);
+
+  Status result;
+  bool need_to_read_sst = false;
+
+  // Since it would be too slow to check the SST files, we will only use
+  // the memtables to check whether there have been any recent writes
+  // to this key after it was accessed in this transaction.  But if the
+  // Memtables do not contain a long enough history, we must fail the
+  // transaction.
+  if (earliest_seq == kMaxSequenceNumber) {
+    // The age of this memtable is unknown.  Cannot rely on it to check
+    // for recent writes.  This error shouldn't happen often in practice as
+    // the Memtable should have a valid earliest sequence number except in some
+    // corner cases (such as error cases during recovery).
+    need_to_read_sst = true;
+
+    if (cache_only) {
+      result = Status::TryAgain(
+          "Transaction could not check for conflicts as the MemTable does not "
+          "contain a long enough history to check write at SequenceNumber: ",
+          std::to_string(snap_seq));
+    }
+  } else if (snap_seq < earliest_seq || min_uncommitted <= earliest_seq) {
+    // Use <= for min_uncommitted since earliest_seq is actually the largest sec
+    // before this memtable was created
+    need_to_read_sst = true;
+
+    if (cache_only) {
+      // The age of this memtable is too new to use to check for recent
+      // writes.
+      char msg[300];
+      snprintf(msg, sizeof(msg),
+               "Transaction could not check for conflicts for operation at "
+               "SequenceNumber %" PRIu64
+               " as the MemTable only contains changes newer than "
+               "SequenceNumber %" PRIu64
+               ".  Increasing the value of the "
+               "max_write_buffer_size_to_maintain option could reduce the "
+               "frequency "
+               "of this error.",
+               snap_seq, earliest_seq);
+      result = Status::TryAgain(msg);
+    }
+  }
+
+  if (result.ok()) {
+    SequenceNumber seq = kMaxSequenceNumber;
+    std::string timestamp;
+    bool found_record_for_key = false;
+
+    // When min_uncommitted == kMaxSequenceNumber, writes are committed in
+    // sequence number order, so only keys larger than `snap_seq` can cause
+    // conflict.
+    // When min_uncommitted != kMaxSequenceNumber, keys lower than
+    // min_uncommitted will not triggered conflicts, while keys larger than
+    // min_uncommitted might create conflicts, so we need  to read them out
+    // from the DB, and call callback to snap_checker to determine. So only
+    // keys lower than min_uncommitted can be skipped.
+    SequenceNumber lower_bound_seq =
+        (min_uncommitted == kMaxSequenceNumber) ? snap_seq : min_uncommitted;
+    Status s = db_impl->GetLatestSequenceForKey(
+        sv, key, !need_to_read_sst, lower_bound_seq, &seq,
+        !read_ts ? nullptr : &timestamp, &found_record_for_key,
+        /*is_blob_index=*/nullptr);
+
+    if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) {
+      result = s;
+    } else if (found_record_for_key) {
+      bool write_conflict = snap_checker == nullptr
+                                ? snap_seq < seq
+                                : !snap_checker->IsVisible(seq);
+      // Perform conflict checking based on timestamp if applicable.
+      if (!write_conflict && read_ts != nullptr) {
+        ColumnFamilyData* cfd = sv->cfd;
+        assert(cfd);
+        const Comparator* const ucmp = cfd->user_comparator();
+        assert(ucmp);
+        assert(read_ts->size() == ucmp->timestamp_size());
+        assert(read_ts->size() == timestamp.size());
+        // Write conflict if *ts < timestamp.
+        write_conflict = ucmp->CompareTimestamp(*read_ts, timestamp) < 0;
+      }
+      if (write_conflict) {
+        result = Status::Busy();
+      }
+    }
+  }
+
+  return result;
+}
+
+Status TransactionUtil::CheckKeysForConflicts(DBImpl* db_impl,
+                                              const LockTracker& tracker,
+                                              bool cache_only) {
+  Status result;
+
+  std::unique_ptr<LockTracker::ColumnFamilyIterator> cf_it(
+      tracker.GetColumnFamilyIterator());
+  assert(cf_it != nullptr);
+  while (cf_it->HasNext()) {
+    ColumnFamilyId cf = cf_it->Next();
+
+    SuperVersion* sv = db_impl->GetAndRefSuperVersion(cf);
+    if (sv == nullptr) {
+      result = Status::InvalidArgument("Could not access column family " +
+                                       std::to_string(cf));
+      break;
+    }
+
+    SequenceNumber earliest_seq =
+        db_impl->GetEarliestMemTableSequenceNumber(sv, true);
+
+    // For each of the keys in this transaction, check to see if someone has
+    // written to this key since the start of the transaction.
+    std::unique_ptr<LockTracker::KeyIterator> key_it(
+        tracker.GetKeyIterator(cf));
+    assert(key_it != nullptr);
+    while (key_it->HasNext()) {
+      const std::string& key = key_it->Next();
+      PointLockStatus status = tracker.GetPointLockStatus(cf, key);
+      const SequenceNumber key_seq = status.seq;
+
+      // TODO: support timestamp-based conflict checking.
+      // CheckKeysForConflicts() is currently used only by optimistic
+      // transactions.
+      result = CheckKey(db_impl, sv, earliest_seq, key_seq, key,
+                        /*read_ts=*/nullptr, cache_only);
+      if (!result.ok()) {
+        break;
+      }
+    }
+
+    db_impl->ReturnAndCleanupSuperVersion(cf, sv);
+
+    if (!result.ok()) {
+      break;
+    }
+  }
+
+  return result;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_util.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_util.h
new file mode 100644
index 0000000000..725e1c9279
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/transaction_util.h
@@ -0,0 +1,83 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <string>
+#include <unordered_map>
+
+#include "db/dbformat.h"
+#include "db/read_callback.h"
+#include "rocksdb/db.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+#include "utilities/transactions/lock/lock_tracker.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class DBImpl;
+struct SuperVersion;
+class WriteBatchWithIndex;
+
+class TransactionUtil {
+ public:
+  // Verifies there have been no commits to this key in the db since this
+  // sequence number. If user-defined timestamp is enabled, then also check
+  // no commits to this key in the db since the given ts.
+  //
+  // If cache_only is true, then this function will not attempt to read any
+  // SST files.  This will make it more likely this function will
+  // return an error if it is unable to determine if there are any conflicts.
+  //
+  // See comment of CheckKey() for explanation of `snap_seq`, `ts`,
+  // `snap_checker` and `min_uncommitted`.
+  //
+  // Returns OK on success, BUSY if there is a conflicting write, or other error
+  // status for any unexpected errors.
+  static Status CheckKeyForConflicts(
+      DBImpl* db_impl, ColumnFamilyHandle* column_family,
+      const std::string& key, SequenceNumber snap_seq,
+      const std::string* const ts, bool cache_only,
+      ReadCallback* snap_checker = nullptr,
+      SequenceNumber min_uncommitted = kMaxSequenceNumber);
+
+  // For each key,SequenceNumber pair tracked by the LockTracker, this function
+  // will verify there have been no writes to the key in the db since that
+  // sequence number.
+  //
+  // Returns OK on success, BUSY if there is a conflicting write, or other error
+  // status for any unexpected errors.
+  //
+  // REQUIRED:
+  // This function should only be called on the write thread or if the
+  // mutex is held.
+  // tracker must support point lock.
+  static Status CheckKeysForConflicts(DBImpl* db_impl,
+                                      const LockTracker& tracker,
+                                      bool cache_only);
+
+ private:
+  // If `snap_checker` == nullptr, writes are always commited in sequence number
+  // order. All sequence number <= `snap_seq` will not conflict with any
+  // write, and all keys > `snap_seq` of `key` will trigger conflict.
+  // If `snap_checker` != nullptr, writes may not commit in sequence number
+  // order. In this case `min_uncommitted` is a lower bound.
+  //  seq < `min_uncommitted`: no conflict
+  //  seq > `snap_seq`: applicable to conflict
+  //  `min_uncommitted` <= seq <= `snap_seq`: call `snap_checker` to determine.
+  //
+  // If user-defined timestamp is enabled, a write conflict is detected if an
+  // operation for `key` with timestamp greater than `ts` exists.
+  static Status CheckKey(DBImpl* db_impl, SuperVersion* sv,
+                         SequenceNumber earliest_seq, SequenceNumber snap_seq,
+                         const std::string& key, const std::string* const ts,
+                         bool cache_only, ReadCallback* snap_checker = nullptr,
+                         SequenceNumber min_uncommitted = kMaxSequenceNumber);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn.cc
new file mode 100644
index 0000000000..c27a679e4b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn.cc
@@ -0,0 +1,515 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/transactions/write_prepared_txn.h"
+
+#include <cinttypes>
+#include <map>
+#include <set>
+
+#include "db/column_family.h"
+#include "db/db_impl/db_impl.h"
+#include "rocksdb/db.h"
+#include "rocksdb/status.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "util/cast_util.h"
+#include "utilities/transactions/pessimistic_transaction.h"
+#include "utilities/transactions/write_prepared_txn_db.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct WriteOptions;
+
+WritePreparedTxn::WritePreparedTxn(WritePreparedTxnDB* txn_db,
+                                   const WriteOptions& write_options,
+                                   const TransactionOptions& txn_options)
+    : PessimisticTransaction(txn_db, write_options, txn_options, false),
+      wpt_db_(txn_db) {
+  // Call Initialize outside PessimisticTransaction constructor otherwise it
+  // would skip overridden functions in WritePreparedTxn since they are not
+  // defined yet in the constructor of PessimisticTransaction
+  Initialize(txn_options);
+}
+
+void WritePreparedTxn::Initialize(const TransactionOptions& txn_options) {
+  PessimisticTransaction::Initialize(txn_options);
+  prepare_batch_cnt_ = 0;
+}
+
+void WritePreparedTxn::MultiGet(const ReadOptions& options,
+                                ColumnFamilyHandle* column_family,
+                                const size_t num_keys, const Slice* keys,
+                                PinnableSlice* values, Status* statuses,
+                                const bool sorted_input) {
+  assert(options.io_activity == Env::IOActivity::kUnknown);
+  SequenceNumber min_uncommitted, snap_seq;
+  const SnapshotBackup backed_by_snapshot =
+      wpt_db_->AssignMinMaxSeqs(options.snapshot, &min_uncommitted, &snap_seq);
+  WritePreparedTxnReadCallback callback(wpt_db_, snap_seq, min_uncommitted,
+                                        backed_by_snapshot);
+  write_batch_.MultiGetFromBatchAndDB(db_, options, column_family, num_keys,
+                                      keys, values, statuses, sorted_input,
+                                      &callback);
+  if (UNLIKELY(!callback.valid() ||
+               !wpt_db_->ValidateSnapshot(snap_seq, backed_by_snapshot))) {
+    wpt_db_->WPRecordTick(TXN_GET_TRY_AGAIN);
+    for (size_t i = 0; i < num_keys; i++) {
+      statuses[i] = Status::TryAgain();
+    }
+  }
+}
+
+Status WritePreparedTxn::Get(const ReadOptions& options,
+                             ColumnFamilyHandle* column_family,
+                             const Slice& key, PinnableSlice* pinnable_val) {
+  if (options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call Get with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+  SequenceNumber min_uncommitted, snap_seq;
+  const SnapshotBackup backed_by_snapshot =
+      wpt_db_->AssignMinMaxSeqs(options.snapshot, &min_uncommitted, &snap_seq);
+  WritePreparedTxnReadCallback callback(wpt_db_, snap_seq, min_uncommitted,
+                                        backed_by_snapshot);
+  Status res = write_batch_.GetFromBatchAndDB(db_, options, column_family, key,
+                                              pinnable_val, &callback);
+  const bool callback_valid =
+      callback.valid();  // NOTE: validity of callback must always be checked
+                         // before it is destructed
+  if (res.ok()) {
+    if (!LIKELY(callback_valid &&
+                wpt_db_->ValidateSnapshot(callback.max_visible_seq(),
+                                          backed_by_snapshot))) {
+      wpt_db_->WPRecordTick(TXN_GET_TRY_AGAIN);
+      res = Status::TryAgain();
+    }
+  }
+
+  return res;
+}
+
+Iterator* WritePreparedTxn::GetIterator(const ReadOptions& options) {
+  // Make sure to get iterator from WritePrepareTxnDB, not the root db.
+  Iterator* db_iter = wpt_db_->NewIterator(options);
+  assert(db_iter);
+
+  return write_batch_.NewIteratorWithBase(db_iter);
+}
+
+Iterator* WritePreparedTxn::GetIterator(const ReadOptions& options,
+                                        ColumnFamilyHandle* column_family) {
+  // Make sure to get iterator from WritePrepareTxnDB, not the root db.
+  Iterator* db_iter = wpt_db_->NewIterator(options, column_family);
+  assert(db_iter);
+
+  return write_batch_.NewIteratorWithBase(column_family, db_iter);
+}
+
+Status WritePreparedTxn::PrepareInternal() {
+  WriteOptions write_options = write_options_;
+  write_options.disableWAL = false;
+  const bool WRITE_AFTER_COMMIT = true;
+  const bool kFirstPrepareBatch = true;
+  auto s = WriteBatchInternal::MarkEndPrepare(GetWriteBatch()->GetWriteBatch(),
+                                              name_, !WRITE_AFTER_COMMIT);
+  assert(s.ok());
+  // For each duplicate key we account for a new sub-batch
+  prepare_batch_cnt_ = GetWriteBatch()->SubBatchCnt();
+  // Having AddPrepared in the PreReleaseCallback allows in-order addition of
+  // prepared entries to PreparedHeap and hence enables an optimization. Refer
+  // to SmallestUnCommittedSeq for more details.
+  AddPreparedCallback add_prepared_callback(
+      wpt_db_, db_impl_, prepare_batch_cnt_,
+      db_impl_->immutable_db_options().two_write_queues, kFirstPrepareBatch);
+  const bool DISABLE_MEMTABLE = true;
+  uint64_t seq_used = kMaxSequenceNumber;
+  s = db_impl_->WriteImpl(write_options, GetWriteBatch()->GetWriteBatch(),
+                          /*callback*/ nullptr, &log_number_, /*log ref*/ 0,
+                          !DISABLE_MEMTABLE, &seq_used, prepare_batch_cnt_,
+                          &add_prepared_callback);
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  auto prepare_seq = seq_used;
+  SetId(prepare_seq);
+  return s;
+}
+
+Status WritePreparedTxn::CommitWithoutPrepareInternal() {
+  // For each duplicate key we account for a new sub-batch
+  const size_t batch_cnt = GetWriteBatch()->SubBatchCnt();
+  return CommitBatchInternal(GetWriteBatch()->GetWriteBatch(), batch_cnt);
+}
+
+Status WritePreparedTxn::CommitBatchInternal(WriteBatch* batch,
+                                             size_t batch_cnt) {
+  return wpt_db_->WriteInternal(write_options_, batch, batch_cnt, this);
+}
+
+Status WritePreparedTxn::CommitInternal() {
+  ROCKS_LOG_DETAILS(db_impl_->immutable_db_options().info_log,
+                    "CommitInternal prepare_seq: %" PRIu64, GetID());
+  // We take the commit-time batch and append the Commit marker.
+  // The Memtable will ignore the Commit marker in non-recovery mode
+  WriteBatch* working_batch = GetCommitTimeWriteBatch();
+  const bool empty = working_batch->Count() == 0;
+  auto s = WriteBatchInternal::MarkCommit(working_batch, name_);
+  assert(s.ok());
+
+  const bool for_recovery = use_only_the_last_commit_time_batch_for_recovery_;
+  if (!empty) {
+    // When not writing to memtable, we can still cache the latest write batch.
+    // The cached batch will be written to memtable in WriteRecoverableState
+    // during FlushMemTable
+    if (for_recovery) {
+      WriteBatchInternal::SetAsLatestPersistentState(working_batch);
+    } else {
+      return Status::InvalidArgument(
+          "Commit-time-batch can only be used if "
+          "use_only_the_last_commit_time_batch_for_recovery is true");
+    }
+  }
+
+  auto prepare_seq = GetId();
+  const bool includes_data = !empty && !for_recovery;
+  assert(prepare_batch_cnt_);
+  size_t commit_batch_cnt = 0;
+  if (UNLIKELY(includes_data)) {
+    ROCKS_LOG_WARN(db_impl_->immutable_db_options().info_log,
+                   "Duplicate key overhead");
+    SubBatchCounter counter(*wpt_db_->GetCFComparatorMap());
+    s = working_batch->Iterate(&counter);
+    assert(s.ok());
+    commit_batch_cnt = counter.BatchCount();
+  }
+  const bool disable_memtable = !includes_data;
+  const bool do_one_write =
+      !db_impl_->immutable_db_options().two_write_queues || disable_memtable;
+  WritePreparedCommitEntryPreReleaseCallback update_commit_map(
+      wpt_db_, db_impl_, prepare_seq, prepare_batch_cnt_, commit_batch_cnt);
+  // This is to call AddPrepared on CommitTimeWriteBatch
+  const bool kFirstPrepareBatch = true;
+  AddPreparedCallback add_prepared_callback(
+      wpt_db_, db_impl_, commit_batch_cnt,
+      db_impl_->immutable_db_options().two_write_queues, !kFirstPrepareBatch);
+  PreReleaseCallback* pre_release_callback;
+  if (do_one_write) {
+    pre_release_callback = &update_commit_map;
+  } else {
+    pre_release_callback = &add_prepared_callback;
+  }
+  uint64_t seq_used = kMaxSequenceNumber;
+  // Since the prepared batch is directly written to memtable, there is already
+  // a connection between the memtable and its WAL, so there is no need to
+  // redundantly reference the log that contains the prepared data.
+  const uint64_t zero_log_number = 0ull;
+  size_t batch_cnt = UNLIKELY(commit_batch_cnt) ? commit_batch_cnt : 1;
+  // If `two_write_queues && includes_data`, then `do_one_write` is false. The
+  // following `WriteImpl` will insert the data of the commit-time-batch into
+  // the database before updating the commit cache. Therefore, the data of the
+  // commmit-time-batch is considered uncommitted. Furthermore, since data of
+  // the commit-time-batch are not locked, it is possible for two uncommitted
+  // versions of the same key to co-exist for a (short) period of time until
+  // the commit cache is updated by the second write. If the two uncommitted
+  // keys are compacted to the bottommost level in the meantime, it is possible
+  // that compaction iterator will zero out the sequence numbers of both, thus
+  // violating the invariant that an SST does not have two identical internal
+  // keys. To prevent this situation, we should allow the usage of
+  // commit-time-batch only if the user sets
+  // TransactionOptions::use_only_the_last_commit_time_batch_for_recovery to
+  // true. See the comments about GetCommitTimeWriteBatch() in
+  // include/rocksdb/utilities/transaction.h.
+  s = db_impl_->WriteImpl(write_options_, working_batch, nullptr, nullptr,
+                          zero_log_number, disable_memtable, &seq_used,
+                          batch_cnt, pre_release_callback);
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  const SequenceNumber commit_batch_seq = seq_used;
+  if (LIKELY(do_one_write || !s.ok())) {
+    if (UNLIKELY(!db_impl_->immutable_db_options().two_write_queues &&
+                 s.ok())) {
+      // Note: RemovePrepared should be called after WriteImpl that publishsed
+      // the seq. Otherwise SmallestUnCommittedSeq optimization breaks.
+      wpt_db_->RemovePrepared(prepare_seq, prepare_batch_cnt_);
+    }  // else RemovePrepared is called from within PreReleaseCallback
+    if (UNLIKELY(!do_one_write)) {
+      assert(!s.ok());
+      // Cleanup the prepared entry we added with add_prepared_callback
+      wpt_db_->RemovePrepared(commit_batch_seq, commit_batch_cnt);
+    }
+    return s;
+  }  // else do the 2nd write to publish seq
+  // Note: the 2nd write comes with a performance penality. So if we have too
+  // many of commits accompanied with ComitTimeWriteBatch and yet we cannot
+  // enable use_only_the_last_commit_time_batch_for_recovery_ optimization,
+  // two_write_queues should be disabled to avoid many additional writes here.
+  const size_t kZeroData = 0;
+  // Update commit map only from the 2nd queue
+  WritePreparedCommitEntryPreReleaseCallback update_commit_map_with_aux_batch(
+      wpt_db_, db_impl_, prepare_seq, prepare_batch_cnt_, kZeroData,
+      commit_batch_seq, commit_batch_cnt);
+  WriteBatch empty_batch;
+  s = empty_batch.PutLogData(Slice());
+  assert(s.ok());
+  // In the absence of Prepare markers, use Noop as a batch separator
+  s = WriteBatchInternal::InsertNoop(&empty_batch);
+  assert(s.ok());
+  const bool DISABLE_MEMTABLE = true;
+  const size_t ONE_BATCH = 1;
+  const uint64_t NO_REF_LOG = 0;
+  s = db_impl_->WriteImpl(write_options_, &empty_batch, nullptr, nullptr,
+                          NO_REF_LOG, DISABLE_MEMTABLE, &seq_used, ONE_BATCH,
+                          &update_commit_map_with_aux_batch);
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  return s;
+}
+
+Status WritePreparedTxn::RollbackInternal() {
+  ROCKS_LOG_WARN(db_impl_->immutable_db_options().info_log,
+                 "RollbackInternal prepare_seq: %" PRIu64, GetId());
+
+  assert(db_impl_);
+  assert(wpt_db_);
+
+  WriteBatch rollback_batch(0 /* reserved_bytes */, 0 /* max_bytes */,
+                            write_options_.protection_bytes_per_key,
+                            0 /* default_cf_ts_sz */);
+  assert(GetId() != kMaxSequenceNumber);
+  assert(GetId() > 0);
+  auto cf_map_shared_ptr = wpt_db_->GetCFHandleMap();
+  auto cf_comp_map_shared_ptr = wpt_db_->GetCFComparatorMap();
+  auto read_at_seq = kMaxSequenceNumber;
+  ReadOptions roptions;
+  // to prevent callback's seq to be overrriden inside DBImpk::Get
+  roptions.snapshot = wpt_db_->GetMaxSnapshot();
+  struct RollbackWriteBatchBuilder : public WriteBatch::Handler {
+    DBImpl* const db_;
+    WritePreparedTxnDB* const wpt_db_;
+    WritePreparedTxnReadCallback callback_;
+    WriteBatch* rollback_batch_;
+    std::map<uint32_t, const Comparator*>& comparators_;
+    std::map<uint32_t, ColumnFamilyHandle*>& handles_;
+    using CFKeys = std::set<Slice, SetComparator>;
+    std::map<uint32_t, CFKeys> keys_;
+    bool rollback_merge_operands_;
+    ReadOptions roptions_;
+
+    RollbackWriteBatchBuilder(
+        DBImpl* db, WritePreparedTxnDB* wpt_db, SequenceNumber snap_seq,
+        WriteBatch* dst_batch,
+        std::map<uint32_t, const Comparator*>& comparators,
+        std::map<uint32_t, ColumnFamilyHandle*>& handles,
+        bool rollback_merge_operands, const ReadOptions& _roptions)
+        : db_(db),
+          wpt_db_(wpt_db),
+          callback_(wpt_db, snap_seq),  // disable min_uncommitted optimization
+          rollback_batch_(dst_batch),
+          comparators_(comparators),
+          handles_(handles),
+          rollback_merge_operands_(rollback_merge_operands),
+          roptions_(_roptions) {}
+
+    Status Rollback(uint32_t cf, const Slice& key) {
+      Status s;
+      CFKeys& cf_keys = keys_[cf];
+      if (cf_keys.size() == 0) {  // just inserted
+        auto cmp = comparators_[cf];
+        keys_[cf] = CFKeys(SetComparator(cmp));
+      }
+      auto it = cf_keys.insert(key);
+      // second is false if a element already existed.
+      if (it.second == false) {
+        return s;
+      }
+
+      PinnableSlice pinnable_val;
+      bool not_used;
+      auto cf_handle = handles_[cf];
+      DBImpl::GetImplOptions get_impl_options;
+      get_impl_options.column_family = cf_handle;
+      get_impl_options.value = &pinnable_val;
+      get_impl_options.value_found = &not_used;
+      get_impl_options.callback = &callback_;
+      s = db_->GetImpl(roptions_, key, get_impl_options);
+      assert(s.ok() || s.IsNotFound());
+      if (s.ok()) {
+        s = rollback_batch_->Put(cf_handle, key, pinnable_val);
+        assert(s.ok());
+      } else if (s.IsNotFound()) {
+        // There has been no readable value before txn. By adding a delete we
+        // make sure that there will be none afterwards either.
+        if (wpt_db_->ShouldRollbackWithSingleDelete(cf_handle, key)) {
+          s = rollback_batch_->SingleDelete(cf_handle, key);
+        } else {
+          s = rollback_batch_->Delete(cf_handle, key);
+        }
+        assert(s.ok());
+      } else {
+        // Unexpected status. Return it to the user.
+      }
+      return s;
+    }
+
+    Status PutCF(uint32_t cf, const Slice& key, const Slice& /*val*/) override {
+      return Rollback(cf, key);
+    }
+
+    Status DeleteCF(uint32_t cf, const Slice& key) override {
+      return Rollback(cf, key);
+    }
+
+    Status SingleDeleteCF(uint32_t cf, const Slice& key) override {
+      return Rollback(cf, key);
+    }
+
+    Status MergeCF(uint32_t cf, const Slice& key,
+                   const Slice& /*val*/) override {
+      if (rollback_merge_operands_) {
+        return Rollback(cf, key);
+      } else {
+        return Status::OK();
+      }
+    }
+
+    Status MarkNoop(bool) override { return Status::OK(); }
+    Status MarkBeginPrepare(bool) override { return Status::OK(); }
+    Status MarkEndPrepare(const Slice&) override { return Status::OK(); }
+    Status MarkCommit(const Slice&) override { return Status::OK(); }
+    Status MarkRollback(const Slice&) override {
+      return Status::InvalidArgument();
+    }
+
+   protected:
+    Handler::OptionState WriteAfterCommit() const override {
+      return Handler::OptionState::kDisabled;
+    }
+  } rollback_handler(db_impl_, wpt_db_, read_at_seq, &rollback_batch,
+                     *cf_comp_map_shared_ptr.get(), *cf_map_shared_ptr.get(),
+                     wpt_db_->txn_db_options_.rollback_merge_operands,
+                     roptions);
+  auto s = GetWriteBatch()->GetWriteBatch()->Iterate(&rollback_handler);
+  if (!s.ok()) {
+    return s;
+  }
+  // The Rollback marker will be used as a batch separator
+  s = WriteBatchInternal::MarkRollback(&rollback_batch, name_);
+  assert(s.ok());
+  bool do_one_write = !db_impl_->immutable_db_options().two_write_queues;
+  const bool DISABLE_MEMTABLE = true;
+  const uint64_t NO_REF_LOG = 0;
+  uint64_t seq_used = kMaxSequenceNumber;
+  const size_t ONE_BATCH = 1;
+  const bool kFirstPrepareBatch = true;
+  // We commit the rolled back prepared batches. Although this is
+  // counter-intuitive, i) it is safe to do so, since the prepared batches are
+  // already canceled out by the rollback batch, ii) adding the commit entry to
+  // CommitCache will allow us to benefit from the existing mechanism in
+  // CommitCache that keeps an entry evicted due to max advance and yet overlaps
+  // with a live snapshot around so that the live snapshot properly skips the
+  // entry even if its prepare seq is lower than max_evicted_seq_.
+  AddPreparedCallback add_prepared_callback(
+      wpt_db_, db_impl_, ONE_BATCH,
+      db_impl_->immutable_db_options().two_write_queues, !kFirstPrepareBatch);
+  WritePreparedCommitEntryPreReleaseCallback update_commit_map(
+      wpt_db_, db_impl_, GetId(), prepare_batch_cnt_, ONE_BATCH);
+  PreReleaseCallback* pre_release_callback;
+  if (do_one_write) {
+    pre_release_callback = &update_commit_map;
+  } else {
+    pre_release_callback = &add_prepared_callback;
+  }
+  // Note: the rollback batch does not need AddPrepared since it is written to
+  // DB in one shot. min_uncommitted still works since it requires capturing
+  // data that is written to DB but not yet committed, while
+  // the rollback batch commits with PreReleaseCallback.
+  s = db_impl_->WriteImpl(write_options_, &rollback_batch, nullptr, nullptr,
+                          NO_REF_LOG, !DISABLE_MEMTABLE, &seq_used, ONE_BATCH,
+                          pre_release_callback);
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  if (!s.ok()) {
+    return s;
+  }
+  if (do_one_write) {
+    assert(!db_impl_->immutable_db_options().two_write_queues);
+    wpt_db_->RemovePrepared(GetId(), prepare_batch_cnt_);
+    return s;
+  }  // else do the 2nd write for commit
+  uint64_t rollback_seq = seq_used;
+  ROCKS_LOG_DETAILS(db_impl_->immutable_db_options().info_log,
+                    "RollbackInternal 2nd write rollback_seq: %" PRIu64,
+                    rollback_seq);
+  // Commit the batch by writing an empty batch to the queue that will release
+  // the commit sequence number to readers.
+  WritePreparedRollbackPreReleaseCallback update_commit_map_with_prepare(
+      wpt_db_, db_impl_, GetId(), rollback_seq, prepare_batch_cnt_);
+  WriteBatch empty_batch;
+  s = empty_batch.PutLogData(Slice());
+  assert(s.ok());
+  // In the absence of Prepare markers, use Noop as a batch separator
+  s = WriteBatchInternal::InsertNoop(&empty_batch);
+  assert(s.ok());
+  s = db_impl_->WriteImpl(write_options_, &empty_batch, nullptr, nullptr,
+                          NO_REF_LOG, DISABLE_MEMTABLE, &seq_used, ONE_BATCH,
+                          &update_commit_map_with_prepare);
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  ROCKS_LOG_DETAILS(db_impl_->immutable_db_options().info_log,
+                    "RollbackInternal (status=%s) commit: %" PRIu64,
+                    s.ToString().c_str(), GetId());
+  // TODO(lth): For WriteUnPrepared that rollback is called frequently,
+  // RemovePrepared could be moved to the callback to reduce lock contention.
+  if (s.ok()) {
+    wpt_db_->RemovePrepared(GetId(), prepare_batch_cnt_);
+  }
+  // Note: RemovePrepared for prepared batch is called from within
+  // PreReleaseCallback
+  wpt_db_->RemovePrepared(rollback_seq, ONE_BATCH);
+
+  return s;
+}
+
+Status WritePreparedTxn::ValidateSnapshot(ColumnFamilyHandle* column_family,
+                                          const Slice& key,
+                                          SequenceNumber* tracked_at_seq) {
+  assert(snapshot_);
+
+  SequenceNumber min_uncommitted =
+      static_cast_with_check<const SnapshotImpl>(snapshot_.get())
+          ->min_uncommitted_;
+  SequenceNumber snap_seq = snapshot_->GetSequenceNumber();
+  // tracked_at_seq is either max or the last snapshot with which this key was
+  // trackeed so there is no need to apply the IsInSnapshot to this comparison
+  // here as tracked_at_seq is not a prepare seq.
+  if (*tracked_at_seq <= snap_seq) {
+    // If the key has been previous validated at a sequence number earlier
+    // than the curent snapshot's sequence number, we already know it has not
+    // been modified.
+    return Status::OK();
+  }
+
+  *tracked_at_seq = snap_seq;
+
+  ColumnFamilyHandle* cfh =
+      column_family ? column_family : db_impl_->DefaultColumnFamily();
+
+  WritePreparedTxnReadCallback snap_checker(wpt_db_, snap_seq, min_uncommitted,
+                                            kBackedByDBSnapshot);
+  // TODO(yanqin): support user-defined timestamp
+  return TransactionUtil::CheckKeyForConflicts(
+      db_impl_, cfh, key.ToString(), snap_seq, /*ts=*/nullptr,
+      false /* cache_only */, &snap_checker, min_uncommitted);
+}
+
+void WritePreparedTxn::SetSnapshot() {
+  const bool kForWWConflictCheck = true;
+  SnapshotImpl* snapshot = wpt_db_->GetSnapshotInternal(kForWWConflictCheck);
+  SetSnapshotInternal(snapshot);
+}
+
+Status WritePreparedTxn::RebuildFromWriteBatch(WriteBatch* src_batch) {
+  auto ret = PessimisticTransaction::RebuildFromWriteBatch(src_batch);
+  prepare_batch_cnt_ = GetWriteBatch()->SubBatchCnt();
+  return ret;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn.h
new file mode 100644
index 0000000000..f621e37aba
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn.h
@@ -0,0 +1,117 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <algorithm>
+#include <atomic>
+#include <mutex>
+#include <stack>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "db/write_callback.h"
+#include "rocksdb/db.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/snapshot.h"
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+#include "rocksdb/utilities/transaction.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "rocksdb/utilities/write_batch_with_index.h"
+#include "util/autovector.h"
+#include "utilities/transactions/pessimistic_transaction.h"
+#include "utilities/transactions/pessimistic_transaction_db.h"
+#include "utilities/transactions/transaction_base.h"
+#include "utilities/transactions/transaction_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class WritePreparedTxnDB;
+
+// This impl could write to DB also uncommitted data and then later tell apart
+// committed data from uncommitted data. Uncommitted data could be after the
+// Prepare phase in 2PC (WritePreparedTxn) or before that
+// (WriteUnpreparedTxnImpl).
+class WritePreparedTxn : public PessimisticTransaction {
+ public:
+  WritePreparedTxn(WritePreparedTxnDB* db, const WriteOptions& write_options,
+                   const TransactionOptions& txn_options);
+  // No copying allowed
+  WritePreparedTxn(const WritePreparedTxn&) = delete;
+  void operator=(const WritePreparedTxn&) = delete;
+
+  virtual ~WritePreparedTxn() {}
+
+  // To make WAL commit markers visible, the snapshot will be based on the last
+  // seq in the WAL that is also published, LastPublishedSequence, as opposed to
+  // the last seq in the memtable.
+  using Transaction::Get;
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     PinnableSlice* value) override;
+
+  using Transaction::MultiGet;
+  virtual void MultiGet(const ReadOptions& options,
+                        ColumnFamilyHandle* column_family,
+                        const size_t num_keys, const Slice* keys,
+                        PinnableSlice* values, Status* statuses,
+                        const bool sorted_input = false) override;
+
+  // Note: The behavior is undefined in presence of interleaved writes to the
+  // same transaction.
+  // To make WAL commit markers visible, the snapshot will be
+  // based on the last seq in the WAL that is also published,
+  // LastPublishedSequence, as opposed to the last seq in the memtable.
+  using Transaction::GetIterator;
+  virtual Iterator* GetIterator(const ReadOptions& options) override;
+  virtual Iterator* GetIterator(const ReadOptions& options,
+                                ColumnFamilyHandle* column_family) override;
+
+  virtual void SetSnapshot() override;
+
+ protected:
+  void Initialize(const TransactionOptions& txn_options) override;
+  // Override the protected SetId to make it visible to the friend class
+  // WritePreparedTxnDB
+  inline void SetId(uint64_t id) override { Transaction::SetId(id); }
+
+ private:
+  friend class WritePreparedTransactionTest_BasicRecoveryTest_Test;
+  friend class WritePreparedTxnDB;
+  friend class WriteUnpreparedTxnDB;
+  friend class WriteUnpreparedTxn;
+
+  Status PrepareInternal() override;
+
+  Status CommitWithoutPrepareInternal() override;
+
+  Status CommitBatchInternal(WriteBatch* batch, size_t batch_cnt) override;
+
+  // Since the data is already written to memtables at the Prepare phase, the
+  // commit entails writing only a commit marker in the WAL. The sequence number
+  // of the commit marker is then the commit timestamp of the transaction. To
+  // make WAL commit markers visible, the snapshot will be based on the last seq
+  // in the WAL that is also published, LastPublishedSequence, as opposed to the
+  // last seq in the memtable.
+  Status CommitInternal() override;
+
+  Status RollbackInternal() override;
+
+  virtual Status ValidateSnapshot(ColumnFamilyHandle* column_family,
+                                  const Slice& key,
+                                  SequenceNumber* tracked_at_seq) override;
+
+  virtual Status RebuildFromWriteBatch(WriteBatch* src_batch) override;
+
+  WritePreparedTxnDB* wpt_db_;
+  // Number of sub-batches in prepare
+  size_t prepare_batch_cnt_ = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn_db.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn_db.cc
new file mode 100644
index 0000000000..6118c3549e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn_db.cc
@@ -0,0 +1,1038 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/transactions/write_prepared_txn_db.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "db/arena_wrapped_db_iter.h"
+#include "db/db_impl/db_impl.h"
+#include "logging/logging.h"
+#include "rocksdb/db.h"
+#include "rocksdb/options.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "test_util/sync_point.h"
+#include "util/cast_util.h"
+#include "util/mutexlock.h"
+#include "util/string_util.h"
+#include "utilities/transactions/pessimistic_transaction.h"
+#include "utilities/transactions/transaction_db_mutex_impl.h"
+
+// This function is for testing only. If it returns true, then all entries in
+// the commit cache will be evicted. Unit and/or stress tests (db_stress)
+// can implement this function and customize how frequently commit cache
+// eviction occurs.
+// TODO: remove this function once we can configure commit cache to be very
+// small so that eviction occurs very frequently. This requires the commit
+// cache entry to be able to encode prepare and commit sequence numbers so that
+// the commit sequence number does not have to be within a certain range of
+// prepare sequence number.
+extern "C" bool rocksdb_write_prepared_TEST_ShouldClearCommitCache(void)
+    __attribute__((__weak__));
+
+namespace ROCKSDB_NAMESPACE {
+
+Status WritePreparedTxnDB::Initialize(
+    const std::vector<size_t>& compaction_enabled_cf_indices,
+    const std::vector<ColumnFamilyHandle*>& handles) {
+  auto dbimpl = static_cast_with_check<DBImpl>(GetRootDB());
+  assert(dbimpl != nullptr);
+  auto rtxns = dbimpl->recovered_transactions();
+  std::map<SequenceNumber, SequenceNumber> ordered_seq_cnt;
+  for (auto rtxn : rtxns) {
+    // There should only one batch for WritePrepared policy.
+    assert(rtxn.second->batches_.size() == 1);
+    const auto& seq = rtxn.second->batches_.begin()->first;
+    const auto& batch_info = rtxn.second->batches_.begin()->second;
+    auto cnt = batch_info.batch_cnt_ ? batch_info.batch_cnt_ : 1;
+    ordered_seq_cnt[seq] = cnt;
+  }
+  // AddPrepared must be called in order
+  for (auto seq_cnt : ordered_seq_cnt) {
+    auto seq = seq_cnt.first;
+    auto cnt = seq_cnt.second;
+    for (size_t i = 0; i < cnt; i++) {
+      AddPrepared(seq + i);
+    }
+  }
+  SequenceNumber prev_max = max_evicted_seq_;
+  SequenceNumber last_seq = db_impl_->GetLatestSequenceNumber();
+  AdvanceMaxEvictedSeq(prev_max, last_seq);
+  // Create a gap between max and the next snapshot. This simplifies the logic
+  // in IsInSnapshot by not having to consider the special case of max ==
+  // snapshot after recovery. This is tested in IsInSnapshotEmptyMapTest.
+  if (last_seq) {
+    db_impl_->versions_->SetLastAllocatedSequence(last_seq + 1);
+    db_impl_->versions_->SetLastSequence(last_seq + 1);
+    db_impl_->versions_->SetLastPublishedSequence(last_seq + 1);
+  }
+
+  db_impl_->SetSnapshotChecker(new WritePreparedSnapshotChecker(this));
+  // A callback to commit a single sub-batch
+  class CommitSubBatchPreReleaseCallback : public PreReleaseCallback {
+   public:
+    explicit CommitSubBatchPreReleaseCallback(WritePreparedTxnDB* db)
+        : db_(db) {}
+    Status Callback(SequenceNumber commit_seq,
+                    bool is_mem_disabled __attribute__((__unused__)), uint64_t,
+                    size_t /*index*/, size_t /*total*/) override {
+      assert(!is_mem_disabled);
+      db_->AddCommitted(commit_seq, commit_seq);
+      return Status::OK();
+    }
+
+   private:
+    WritePreparedTxnDB* db_;
+  };
+  db_impl_->SetRecoverableStatePreReleaseCallback(
+      new CommitSubBatchPreReleaseCallback(this));
+
+  auto s = PessimisticTransactionDB::Initialize(compaction_enabled_cf_indices,
+                                                handles);
+  return s;
+}
+
+Status WritePreparedTxnDB::VerifyCFOptions(
+    const ColumnFamilyOptions& cf_options) {
+  Status s = PessimisticTransactionDB::VerifyCFOptions(cf_options);
+  if (!s.ok()) {
+    return s;
+  }
+  if (!cf_options.memtable_factory->CanHandleDuplicatedKey()) {
+    return Status::InvalidArgument(
+        "memtable_factory->CanHandleDuplicatedKey() cannot be false with "
+        "WritePrpeared transactions");
+  }
+  return Status::OK();
+}
+
+Transaction* WritePreparedTxnDB::BeginTransaction(
+    const WriteOptions& write_options, const TransactionOptions& txn_options,
+    Transaction* old_txn) {
+  if (old_txn != nullptr) {
+    ReinitializeTransaction(old_txn, write_options, txn_options);
+    return old_txn;
+  } else {
+    return new WritePreparedTxn(this, write_options, txn_options);
+  }
+}
+
+Status WritePreparedTxnDB::Write(const WriteOptions& opts,
+                                 WriteBatch* updates) {
+  if (txn_db_options_.skip_concurrency_control) {
+    // Skip locking the rows
+    const size_t UNKNOWN_BATCH_CNT = 0;
+    WritePreparedTxn* NO_TXN = nullptr;
+    return WriteInternal(opts, updates, UNKNOWN_BATCH_CNT, NO_TXN);
+  } else {
+    return PessimisticTransactionDB::WriteWithConcurrencyControl(opts, updates);
+  }
+}
+
+Status WritePreparedTxnDB::Write(
+    const WriteOptions& opts,
+    const TransactionDBWriteOptimizations& optimizations, WriteBatch* updates) {
+  if (optimizations.skip_concurrency_control) {
+    // Skip locking the rows
+    const size_t UNKNOWN_BATCH_CNT = 0;
+    const size_t ONE_BATCH_CNT = 1;
+    const size_t batch_cnt = optimizations.skip_duplicate_key_check
+                                 ? ONE_BATCH_CNT
+                                 : UNKNOWN_BATCH_CNT;
+    WritePreparedTxn* NO_TXN = nullptr;
+    return WriteInternal(opts, updates, batch_cnt, NO_TXN);
+  } else {
+    // TODO(myabandeh): Make use of skip_duplicate_key_check hint
+    // Fall back to unoptimized version
+    return PessimisticTransactionDB::WriteWithConcurrencyControl(opts, updates);
+  }
+}
+
+Status WritePreparedTxnDB::WriteInternal(const WriteOptions& write_options_orig,
+                                         WriteBatch* batch, size_t batch_cnt,
+                                         WritePreparedTxn* txn) {
+  ROCKS_LOG_DETAILS(db_impl_->immutable_db_options().info_log,
+                    "CommitBatchInternal");
+  if (batch->Count() == 0) {
+    // Otherwise our 1 seq per batch logic will break since there is no seq
+    // increased for this batch.
+    return Status::OK();
+  }
+
+  if (write_options_orig.protection_bytes_per_key > 0) {
+    auto s = WriteBatchInternal::UpdateProtectionInfo(
+        batch, write_options_orig.protection_bytes_per_key);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  if (batch_cnt == 0) {  // not provided, then compute it
+    // TODO(myabandeh): add an option to allow user skipping this cost
+    SubBatchCounter counter(*GetCFComparatorMap());
+    auto s = batch->Iterate(&counter);
+    if (!s.ok()) {
+      return s;
+    }
+    batch_cnt = counter.BatchCount();
+    WPRecordTick(TXN_DUPLICATE_KEY_OVERHEAD);
+    ROCKS_LOG_DETAILS(info_log_, "Duplicate key overhead: %" PRIu64 " batches",
+                      static_cast<uint64_t>(batch_cnt));
+  }
+  assert(batch_cnt);
+
+  bool do_one_write = !db_impl_->immutable_db_options().two_write_queues;
+  WriteOptions write_options(write_options_orig);
+  // In the absence of Prepare markers, use Noop as a batch separator
+  auto s = WriteBatchInternal::InsertNoop(batch);
+  assert(s.ok());
+  const bool DISABLE_MEMTABLE = true;
+  const uint64_t no_log_ref = 0;
+  uint64_t seq_used = kMaxSequenceNumber;
+  const size_t ZERO_PREPARES = 0;
+  const bool kSeperatePrepareCommitBatches = true;
+  // Since this is not 2pc, there is no need for AddPrepared but having it in
+  // the PreReleaseCallback enables an optimization. Refer to
+  // SmallestUnCommittedSeq for more details.
+  AddPreparedCallback add_prepared_callback(
+      this, db_impl_, batch_cnt,
+      db_impl_->immutable_db_options().two_write_queues,
+      !kSeperatePrepareCommitBatches);
+  WritePreparedCommitEntryPreReleaseCallback update_commit_map(
+      this, db_impl_, kMaxSequenceNumber, ZERO_PREPARES, batch_cnt);
+  PreReleaseCallback* pre_release_callback;
+  if (do_one_write) {
+    pre_release_callback = &update_commit_map;
+  } else {
+    pre_release_callback = &add_prepared_callback;
+  }
+  s = db_impl_->WriteImpl(write_options, batch, nullptr, nullptr, no_log_ref,
+                          !DISABLE_MEMTABLE, &seq_used, batch_cnt,
+                          pre_release_callback);
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  uint64_t prepare_seq = seq_used;
+  if (txn != nullptr) {
+    txn->SetId(prepare_seq);
+  }
+  if (!s.ok()) {
+    return s;
+  }
+  if (do_one_write) {
+    return s;
+  }  // else do the 2nd write for commit
+  ROCKS_LOG_DETAILS(db_impl_->immutable_db_options().info_log,
+                    "CommitBatchInternal 2nd write prepare_seq: %" PRIu64,
+                    prepare_seq);
+  // Commit the batch by writing an empty batch to the 2nd queue that will
+  // release the commit sequence number to readers.
+  const size_t ZERO_COMMITS = 0;
+  WritePreparedCommitEntryPreReleaseCallback update_commit_map_with_prepare(
+      this, db_impl_, prepare_seq, batch_cnt, ZERO_COMMITS);
+  WriteBatch empty_batch;
+  write_options.disableWAL = true;
+  write_options.sync = false;
+  const size_t ONE_BATCH = 1;  // Just to inc the seq
+  s = db_impl_->WriteImpl(write_options, &empty_batch, nullptr, nullptr,
+                          no_log_ref, DISABLE_MEMTABLE, &seq_used, ONE_BATCH,
+                          &update_commit_map_with_prepare);
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  // Note: RemovePrepared is called from within PreReleaseCallback
+  return s;
+}
+
+Status WritePreparedTxnDB::Get(const ReadOptions& options,
+                               ColumnFamilyHandle* column_family,
+                               const Slice& key, PinnableSlice* value) {
+  if (options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call Get with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+  SequenceNumber min_uncommitted, snap_seq;
+  const SnapshotBackup backed_by_snapshot =
+      AssignMinMaxSeqs(options.snapshot, &min_uncommitted, &snap_seq);
+  WritePreparedTxnReadCallback callback(this, snap_seq, min_uncommitted,
+                                        backed_by_snapshot);
+  bool* dont_care = nullptr;
+  DBImpl::GetImplOptions get_impl_options;
+  get_impl_options.column_family = column_family;
+  get_impl_options.value = value;
+  get_impl_options.value_found = dont_care;
+  get_impl_options.callback = &callback;
+  auto res = db_impl_->GetImpl(options, key, get_impl_options);
+  if (LIKELY(callback.valid() && ValidateSnapshot(callback.max_visible_seq(),
+                                                  backed_by_snapshot))) {
+    return res;
+  } else {
+    res.PermitUncheckedError();
+    WPRecordTick(TXN_GET_TRY_AGAIN);
+    return Status::TryAgain();
+  }
+}
+
+void WritePreparedTxnDB::UpdateCFComparatorMap(
+    const std::vector<ColumnFamilyHandle*>& handles) {
+  auto cf_map = new std::map<uint32_t, const Comparator*>();
+  auto handle_map = new std::map<uint32_t, ColumnFamilyHandle*>();
+  for (auto h : handles) {
+    auto id = h->GetID();
+    const Comparator* comparator = h->GetComparator();
+    (*cf_map)[id] = comparator;
+    if (id != 0) {
+      (*handle_map)[id] = h;
+    } else {
+      // The pointer to the default cf handle in the handles will be deleted.
+      // Use the pointer maintained by the db instead.
+      (*handle_map)[id] = DefaultColumnFamily();
+    }
+  }
+  cf_map_.reset(cf_map);
+  handle_map_.reset(handle_map);
+}
+
+void WritePreparedTxnDB::UpdateCFComparatorMap(ColumnFamilyHandle* h) {
+  auto old_cf_map_ptr = cf_map_.get();
+  assert(old_cf_map_ptr);
+  auto cf_map = new std::map<uint32_t, const Comparator*>(*old_cf_map_ptr);
+  auto old_handle_map_ptr = handle_map_.get();
+  assert(old_handle_map_ptr);
+  auto handle_map =
+      new std::map<uint32_t, ColumnFamilyHandle*>(*old_handle_map_ptr);
+  auto id = h->GetID();
+  const Comparator* comparator = h->GetComparator();
+  (*cf_map)[id] = comparator;
+  (*handle_map)[id] = h;
+  cf_map_.reset(cf_map);
+  handle_map_.reset(handle_map);
+}
+
+std::vector<Status> WritePreparedTxnDB::MultiGet(
+    const ReadOptions& options,
+    const std::vector<ColumnFamilyHandle*>& column_family,
+    const std::vector<Slice>& keys, std::vector<std::string>* values) {
+  assert(values);
+  size_t num_keys = keys.size();
+  values->resize(num_keys);
+
+  std::vector<Status> stat_list(num_keys);
+  for (size_t i = 0; i < num_keys; ++i) {
+    stat_list[i] = this->Get(options, column_family[i], keys[i], &(*values)[i]);
+  }
+  return stat_list;
+}
+
+// Struct to hold ownership of snapshot and read callback for iterator cleanup.
+struct WritePreparedTxnDB::IteratorState {
+  IteratorState(WritePreparedTxnDB* txn_db, SequenceNumber sequence,
+                std::shared_ptr<ManagedSnapshot> s,
+                SequenceNumber min_uncommitted)
+      : callback(txn_db, sequence, min_uncommitted, kBackedByDBSnapshot),
+        snapshot(s) {}
+
+  WritePreparedTxnReadCallback callback;
+  std::shared_ptr<ManagedSnapshot> snapshot;
+};
+
+namespace {
+static void CleanupWritePreparedTxnDBIterator(void* arg1, void* /*arg2*/) {
+  delete reinterpret_cast<WritePreparedTxnDB::IteratorState*>(arg1);
+}
+}  // anonymous namespace
+
+Iterator* WritePreparedTxnDB::NewIterator(const ReadOptions& options,
+                                          ColumnFamilyHandle* column_family) {
+  if (options.io_activity != Env::IOActivity::kUnknown) {
+    return NewErrorIterator(Status::InvalidArgument(
+        "Cannot call NewIterator with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`"));
+  }
+  constexpr bool expose_blob_index = false;
+  constexpr bool allow_refresh = false;
+  std::shared_ptr<ManagedSnapshot> own_snapshot = nullptr;
+  SequenceNumber snapshot_seq = kMaxSequenceNumber;
+  SequenceNumber min_uncommitted = 0;
+  if (options.snapshot != nullptr) {
+    snapshot_seq = options.snapshot->GetSequenceNumber();
+    min_uncommitted =
+        static_cast_with_check<const SnapshotImpl>(options.snapshot)
+            ->min_uncommitted_;
+  } else {
+    auto* snapshot = GetSnapshot();
+    // We take a snapshot to make sure that the related data in the commit map
+    // are not deleted.
+    snapshot_seq = snapshot->GetSequenceNumber();
+    min_uncommitted =
+        static_cast_with_check<const SnapshotImpl>(snapshot)->min_uncommitted_;
+    own_snapshot = std::make_shared<ManagedSnapshot>(db_impl_, snapshot);
+  }
+  assert(snapshot_seq != kMaxSequenceNumber);
+  auto* cfd =
+      static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
+  auto* state =
+      new IteratorState(this, snapshot_seq, own_snapshot, min_uncommitted);
+  auto* db_iter =
+      db_impl_->NewIteratorImpl(options, cfd, snapshot_seq, &state->callback,
+                                expose_blob_index, allow_refresh);
+  db_iter->RegisterCleanup(CleanupWritePreparedTxnDBIterator, state, nullptr);
+  return db_iter;
+}
+
+Status WritePreparedTxnDB::NewIterators(
+    const ReadOptions& options,
+    const std::vector<ColumnFamilyHandle*>& column_families,
+    std::vector<Iterator*>* iterators) {
+  constexpr bool expose_blob_index = false;
+  constexpr bool allow_refresh = false;
+  std::shared_ptr<ManagedSnapshot> own_snapshot = nullptr;
+  SequenceNumber snapshot_seq = kMaxSequenceNumber;
+  SequenceNumber min_uncommitted = 0;
+  if (options.snapshot != nullptr) {
+    snapshot_seq = options.snapshot->GetSequenceNumber();
+    min_uncommitted =
+        static_cast_with_check<const SnapshotImpl>(options.snapshot)
+            ->min_uncommitted_;
+  } else {
+    auto* snapshot = GetSnapshot();
+    // We take a snapshot to make sure that the related data in the commit map
+    // are not deleted.
+    snapshot_seq = snapshot->GetSequenceNumber();
+    own_snapshot = std::make_shared<ManagedSnapshot>(db_impl_, snapshot);
+    min_uncommitted =
+        static_cast_with_check<const SnapshotImpl>(snapshot)->min_uncommitted_;
+  }
+  iterators->clear();
+  iterators->reserve(column_families.size());
+  for (auto* column_family : column_families) {
+    auto* cfd =
+        static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
+    auto* state =
+        new IteratorState(this, snapshot_seq, own_snapshot, min_uncommitted);
+    auto* db_iter =
+        db_impl_->NewIteratorImpl(options, cfd, snapshot_seq, &state->callback,
+                                  expose_blob_index, allow_refresh);
+    db_iter->RegisterCleanup(CleanupWritePreparedTxnDBIterator, state, nullptr);
+    iterators->push_back(db_iter);
+  }
+  return Status::OK();
+}
+
+void WritePreparedTxnDB::Init(const TransactionDBOptions& txn_db_opts) {
+  // Adcance max_evicted_seq_ no more than 100 times before the cache wraps
+  // around.
+  INC_STEP_FOR_MAX_EVICTED =
+      std::max(COMMIT_CACHE_SIZE / 100, static_cast<size_t>(1));
+  snapshot_cache_ = std::unique_ptr<std::atomic<SequenceNumber>[]>(
+      new std::atomic<SequenceNumber>[SNAPSHOT_CACHE_SIZE] {});
+  commit_cache_ = std::unique_ptr<std::atomic<CommitEntry64b>[]>(
+      new std::atomic<CommitEntry64b>[COMMIT_CACHE_SIZE] {});
+  dummy_max_snapshot_.number_ = kMaxSequenceNumber;
+  rollback_deletion_type_callback_ =
+      txn_db_opts.rollback_deletion_type_callback;
+}
+
+void WritePreparedTxnDB::CheckPreparedAgainstMax(SequenceNumber new_max,
+                                                 bool locked) {
+  // When max_evicted_seq_ advances, move older entries from prepared_txns_
+  // to delayed_prepared_. This guarantees that if a seq is lower than max,
+  // then it is not in prepared_txns_ and save an expensive, synchronized
+  // lookup from a shared set. delayed_prepared_ is expected to be empty in
+  // normal cases.
+  ROCKS_LOG_DETAILS(
+      info_log_,
+      "CheckPreparedAgainstMax prepared_txns_.empty() %d top: %" PRIu64,
+      prepared_txns_.empty(),
+      prepared_txns_.empty() ? 0 : prepared_txns_.top());
+  const SequenceNumber prepared_top = prepared_txns_.top();
+  const bool empty = prepared_top == kMaxSequenceNumber;
+  // Preliminary check to avoid the synchronization cost
+  if (!empty && prepared_top <= new_max) {
+    if (locked) {
+      // Needed to avoid double locking in pop().
+      prepared_txns_.push_pop_mutex()->Unlock();
+    }
+    WriteLock wl(&prepared_mutex_);
+    // Need to fetch fresh values of ::top after mutex is acquired
+    while (!prepared_txns_.empty() && prepared_txns_.top() <= new_max) {
+      auto to_be_popped = prepared_txns_.top();
+      delayed_prepared_.insert(to_be_popped);
+      ROCKS_LOG_WARN(info_log_,
+                     "prepared_mutex_ overhead %" PRIu64 " (prep=%" PRIu64
+                     " new_max=%" PRIu64 ")",
+                     static_cast<uint64_t>(delayed_prepared_.size()),
+                     to_be_popped, new_max);
+      delayed_prepared_empty_.store(false, std::memory_order_release);
+      // Update prepared_txns_ after updating delayed_prepared_empty_ otherwise
+      // there will be a point in time that the entry is neither in
+      // prepared_txns_ nor in delayed_prepared_, which will not be checked if
+      // delayed_prepared_empty_ is false.
+      prepared_txns_.pop();
+    }
+    if (locked) {
+      prepared_txns_.push_pop_mutex()->Lock();
+    }
+  }
+}
+
+void WritePreparedTxnDB::AddPrepared(uint64_t seq, bool locked) {
+  ROCKS_LOG_DETAILS(info_log_, "Txn %" PRIu64 " Preparing with max %" PRIu64,
+                    seq, max_evicted_seq_.load());
+  TEST_SYNC_POINT("AddPrepared::begin:pause");
+  TEST_SYNC_POINT("AddPrepared::begin:resume");
+  if (!locked) {
+    prepared_txns_.push_pop_mutex()->Lock();
+  }
+  prepared_txns_.push_pop_mutex()->AssertHeld();
+  prepared_txns_.push(seq);
+  auto new_max = future_max_evicted_seq_.load();
+  if (UNLIKELY(seq <= new_max)) {
+    // This should not happen in normal case
+    ROCKS_LOG_ERROR(
+        info_log_,
+        "Added prepare_seq is not larger than max_evicted_seq_: %" PRIu64
+        " <= %" PRIu64,
+        seq, new_max);
+    CheckPreparedAgainstMax(new_max, true /*locked*/);
+  }
+  if (!locked) {
+    prepared_txns_.push_pop_mutex()->Unlock();
+  }
+  TEST_SYNC_POINT("AddPrepared::end");
+}
+
+void WritePreparedTxnDB::AddCommitted(uint64_t prepare_seq, uint64_t commit_seq,
+                                      uint8_t loop_cnt) {
+  ROCKS_LOG_DETAILS(info_log_, "Txn %" PRIu64 " Committing with %" PRIu64,
+                    prepare_seq, commit_seq);
+  TEST_SYNC_POINT("WritePreparedTxnDB::AddCommitted:start");
+  TEST_SYNC_POINT("WritePreparedTxnDB::AddCommitted:start:pause");
+  auto indexed_seq = prepare_seq % COMMIT_CACHE_SIZE;
+  CommitEntry64b evicted_64b;
+  CommitEntry evicted;
+  bool to_be_evicted = GetCommitEntry(indexed_seq, &evicted_64b, &evicted);
+  if (LIKELY(to_be_evicted)) {
+    assert(evicted.prep_seq != prepare_seq);
+    auto prev_max = max_evicted_seq_.load(std::memory_order_acquire);
+    ROCKS_LOG_DETAILS(info_log_,
+                      "Evicting %" PRIu64 ",%" PRIu64 " with max %" PRIu64,
+                      evicted.prep_seq, evicted.commit_seq, prev_max);
+    if (prev_max < evicted.commit_seq) {
+      auto last = db_impl_->GetLastPublishedSequence();  // could be 0
+      SequenceNumber max_evicted_seq;
+      if (LIKELY(evicted.commit_seq < last)) {
+        assert(last > 0);
+        // Inc max in larger steps to avoid frequent updates
+        max_evicted_seq =
+            std::min(evicted.commit_seq + INC_STEP_FOR_MAX_EVICTED, last - 1);
+      } else {
+        // legit when a commit entry in a write batch overwrite the previous one
+        max_evicted_seq = evicted.commit_seq;
+      }
+#ifdef OS_LINUX
+      if (rocksdb_write_prepared_TEST_ShouldClearCommitCache &&
+          rocksdb_write_prepared_TEST_ShouldClearCommitCache()) {
+        max_evicted_seq = last;
+      }
+#endif  // OS_LINUX
+      ROCKS_LOG_DETAILS(info_log_,
+                        "%lu Evicting %" PRIu64 ",%" PRIu64 " with max %" PRIu64
+                        " => %lu",
+                        prepare_seq, evicted.prep_seq, evicted.commit_seq,
+                        prev_max, max_evicted_seq);
+      AdvanceMaxEvictedSeq(prev_max, max_evicted_seq);
+    }
+    if (UNLIKELY(!delayed_prepared_empty_.load(std::memory_order_acquire))) {
+      WriteLock wl(&prepared_mutex_);
+      auto dp_iter = delayed_prepared_.find(evicted.prep_seq);
+      if (dp_iter != delayed_prepared_.end()) {
+        // This is a rare case that txn is committed but prepared_txns_ is not
+        // cleaned up yet. Refer to delayed_prepared_commits_ definition for
+        // why it should be kept updated.
+        delayed_prepared_commits_[evicted.prep_seq] = evicted.commit_seq;
+        ROCKS_LOG_DEBUG(info_log_,
+                        "delayed_prepared_commits_[%" PRIu64 "]=%" PRIu64,
+                        evicted.prep_seq, evicted.commit_seq);
+      }
+    }
+    // After each eviction from commit cache, check if the commit entry should
+    // be kept around because it overlaps with a live snapshot.
+    CheckAgainstSnapshots(evicted);
+  }
+  bool succ =
+      ExchangeCommitEntry(indexed_seq, evicted_64b, {prepare_seq, commit_seq});
+  if (UNLIKELY(!succ)) {
+    ROCKS_LOG_ERROR(info_log_,
+                    "ExchangeCommitEntry failed on [%" PRIu64 "] %" PRIu64
+                    ",%" PRIu64 " retrying...",
+                    indexed_seq, prepare_seq, commit_seq);
+    // A very rare event, in which the commit entry is updated before we do.
+    // Here we apply a very simple solution of retrying.
+    if (loop_cnt > 100) {
+      throw std::runtime_error("Infinite loop in AddCommitted!");
+    }
+    AddCommitted(prepare_seq, commit_seq, ++loop_cnt);
+    return;
+  }
+  TEST_SYNC_POINT("WritePreparedTxnDB::AddCommitted:end");
+  TEST_SYNC_POINT("WritePreparedTxnDB::AddCommitted:end:pause");
+}
+
+void WritePreparedTxnDB::RemovePrepared(const uint64_t prepare_seq,
+                                        const size_t batch_cnt) {
+  TEST_SYNC_POINT_CALLBACK(
+      "RemovePrepared:Start",
+      const_cast<void*>(reinterpret_cast<const void*>(&prepare_seq)));
+  TEST_SYNC_POINT("WritePreparedTxnDB::RemovePrepared:pause");
+  TEST_SYNC_POINT("WritePreparedTxnDB::RemovePrepared:resume");
+  ROCKS_LOG_DETAILS(info_log_,
+                    "RemovePrepared %" PRIu64 " cnt: %" ROCKSDB_PRIszt,
+                    prepare_seq, batch_cnt);
+  WriteLock wl(&prepared_mutex_);
+  for (size_t i = 0; i < batch_cnt; i++) {
+    prepared_txns_.erase(prepare_seq + i);
+    bool was_empty = delayed_prepared_.empty();
+    if (!was_empty) {
+      delayed_prepared_.erase(prepare_seq + i);
+      auto it = delayed_prepared_commits_.find(prepare_seq + i);
+      if (it != delayed_prepared_commits_.end()) {
+        ROCKS_LOG_DETAILS(info_log_, "delayed_prepared_commits_.erase %" PRIu64,
+                          prepare_seq + i);
+        delayed_prepared_commits_.erase(it);
+      }
+      bool is_empty = delayed_prepared_.empty();
+      if (was_empty != is_empty) {
+        delayed_prepared_empty_.store(is_empty, std::memory_order_release);
+      }
+    }
+  }
+}
+
+bool WritePreparedTxnDB::GetCommitEntry(const uint64_t indexed_seq,
+                                        CommitEntry64b* entry_64b,
+                                        CommitEntry* entry) const {
+  *entry_64b = commit_cache_[static_cast<size_t>(indexed_seq)].load(
+      std::memory_order_acquire);
+  bool valid = entry_64b->Parse(indexed_seq, entry, FORMAT);
+  return valid;
+}
+
+bool WritePreparedTxnDB::AddCommitEntry(const uint64_t indexed_seq,
+                                        const CommitEntry& new_entry,
+                                        CommitEntry* evicted_entry) {
+  CommitEntry64b new_entry_64b(new_entry, FORMAT);
+  CommitEntry64b evicted_entry_64b =
+      commit_cache_[static_cast<size_t>(indexed_seq)].exchange(
+          new_entry_64b, std::memory_order_acq_rel);
+  bool valid = evicted_entry_64b.Parse(indexed_seq, evicted_entry, FORMAT);
+  return valid;
+}
+
+bool WritePreparedTxnDB::ExchangeCommitEntry(const uint64_t indexed_seq,
+                                             CommitEntry64b& expected_entry_64b,
+                                             const CommitEntry& new_entry) {
+  auto& atomic_entry = commit_cache_[static_cast<size_t>(indexed_seq)];
+  CommitEntry64b new_entry_64b(new_entry, FORMAT);
+  bool succ = atomic_entry.compare_exchange_strong(
+      expected_entry_64b, new_entry_64b, std::memory_order_acq_rel,
+      std::memory_order_acquire);
+  return succ;
+}
+
+void WritePreparedTxnDB::AdvanceMaxEvictedSeq(const SequenceNumber& prev_max,
+                                              const SequenceNumber& new_max) {
+  ROCKS_LOG_DETAILS(info_log_,
+                    "AdvanceMaxEvictedSeq overhead %" PRIu64 " => %" PRIu64,
+                    prev_max, new_max);
+  // Declare the intention before getting snapshot from the DB. This helps a
+  // concurrent GetSnapshot to wait to catch up with future_max_evicted_seq_ if
+  // it has not already. Otherwise the new snapshot is when we ask DB for
+  // snapshots smaller than future max.
+  auto updated_future_max = prev_max;
+  while (updated_future_max < new_max &&
+         !future_max_evicted_seq_.compare_exchange_weak(
+             updated_future_max, new_max, std::memory_order_acq_rel,
+             std::memory_order_relaxed)) {
+  };
+
+  CheckPreparedAgainstMax(new_max, false /*locked*/);
+
+  // With each change to max_evicted_seq_ fetch the live snapshots behind it.
+  // We use max as the version of snapshots to identify how fresh are the
+  // snapshot list. This works because the snapshots are between 0 and
+  // max, so the larger the max, the more complete they are.
+  SequenceNumber new_snapshots_version = new_max;
+  std::vector<SequenceNumber> snapshots;
+  bool update_snapshots = false;
+  if (new_snapshots_version > snapshots_version_) {
+    // This is to avoid updating the snapshots_ if it already updated
+    // with a more recent vesion by a concrrent thread
+    update_snapshots = true;
+    // We only care about snapshots lower then max
+    snapshots = GetSnapshotListFromDB(new_max);
+  }
+  if (update_snapshots) {
+    UpdateSnapshots(snapshots, new_snapshots_version);
+    if (!snapshots.empty()) {
+      WriteLock wl(&old_commit_map_mutex_);
+      for (auto snap : snapshots) {
+        // This allows IsInSnapshot to tell apart the reads from in valid
+        // snapshots from the reads from committed values in valid snapshots.
+        old_commit_map_[snap];
+      }
+      old_commit_map_empty_.store(false, std::memory_order_release);
+    }
+  }
+  auto updated_prev_max = prev_max;
+  TEST_SYNC_POINT("AdvanceMaxEvictedSeq::update_max:pause");
+  TEST_SYNC_POINT("AdvanceMaxEvictedSeq::update_max:resume");
+  while (updated_prev_max < new_max &&
+         !max_evicted_seq_.compare_exchange_weak(updated_prev_max, new_max,
+                                                 std::memory_order_acq_rel,
+                                                 std::memory_order_relaxed)) {
+  };
+}
+
+const Snapshot* WritePreparedTxnDB::GetSnapshot() {
+  const bool kForWWConflictCheck = true;
+  return GetSnapshotInternal(!kForWWConflictCheck);
+}
+
+SnapshotImpl* WritePreparedTxnDB::GetSnapshotInternal(
+    bool for_ww_conflict_check) {
+  // Note: for this optimization setting the last sequence number and obtaining
+  // the smallest uncommitted seq should be done atomically. However to avoid
+  // the mutex overhead, we call SmallestUnCommittedSeq BEFORE taking the
+  // snapshot. Since we always updated the list of unprepared seq (via
+  // AddPrepared) AFTER the last sequence is updated, this guarantees that the
+  // smallest uncommitted seq that we pair with the snapshot is smaller or equal
+  // the value that would be obtained otherwise atomically. That is ok since
+  // this optimization works as long as min_uncommitted is less than or equal
+  // than the smallest uncommitted seq when the snapshot was taken.
+  auto min_uncommitted = WritePreparedTxnDB::SmallestUnCommittedSeq();
+  SnapshotImpl* snap_impl = db_impl_->GetSnapshotImpl(for_ww_conflict_check);
+  TEST_SYNC_POINT("WritePreparedTxnDB::GetSnapshotInternal:first");
+  assert(snap_impl);
+  SequenceNumber snap_seq = snap_impl->GetSequenceNumber();
+  // Note: Check against future_max_evicted_seq_ (in contrast with
+  // max_evicted_seq_) in case there is a concurrent AdvanceMaxEvictedSeq.
+  if (UNLIKELY(snap_seq != 0 && snap_seq <= future_max_evicted_seq_)) {
+    // There is a very rare case in which the commit entry evicts another commit
+    // entry that is not published yet thus advancing max evicted seq beyond the
+    // last published seq. This case is not likely in real-world setup so we
+    // handle it with a few retries.
+    size_t retry = 0;
+    SequenceNumber max;
+    while ((max = future_max_evicted_seq_.load()) != 0 &&
+           snap_impl->GetSequenceNumber() <= max && retry < 100) {
+      ROCKS_LOG_WARN(info_log_,
+                     "GetSnapshot snap: %" PRIu64 " max: %" PRIu64
+                     " retry %" ROCKSDB_PRIszt,
+                     snap_impl->GetSequenceNumber(), max, retry);
+      ReleaseSnapshot(snap_impl);
+      // Wait for last visible seq to catch up with max, and also go beyond it
+      // by one.
+      AdvanceSeqByOne();
+      snap_impl = db_impl_->GetSnapshotImpl(for_ww_conflict_check);
+      assert(snap_impl);
+      retry++;
+    }
+    assert(snap_impl->GetSequenceNumber() > max);
+    if (snap_impl->GetSequenceNumber() <= max) {
+      throw std::runtime_error(
+          "Snapshot seq " + std::to_string(snap_impl->GetSequenceNumber()) +
+          " after " + std::to_string(retry) +
+          " retries is still less than futre_max_evicted_seq_" +
+          std::to_string(max));
+    }
+  }
+  EnhanceSnapshot(snap_impl, min_uncommitted);
+  ROCKS_LOG_DETAILS(
+      db_impl_->immutable_db_options().info_log,
+      "GetSnapshot %" PRIu64 " ww:%" PRIi32 " min_uncommitted: %" PRIu64,
+      snap_impl->GetSequenceNumber(), for_ww_conflict_check, min_uncommitted);
+  TEST_SYNC_POINT("WritePreparedTxnDB::GetSnapshotInternal:end");
+  return snap_impl;
+}
+
+void WritePreparedTxnDB::AdvanceSeqByOne() {
+  // Inserting an empty value will i) let the max evicted entry to be
+  // published, i.e., max == last_published, increase the last published to
+  // be one beyond max, i.e., max < last_published.
+  WriteOptions woptions;
+  TransactionOptions txn_options;
+  Transaction* txn0 = BeginTransaction(woptions, txn_options, nullptr);
+  std::hash<std::thread::id> hasher;
+  char name[64];
+  snprintf(name, 64, "txn%" ROCKSDB_PRIszt, hasher(std::this_thread::get_id()));
+  assert(strlen(name) < 64 - 1);
+  Status s = txn0->SetName(name);
+  assert(s.ok());
+  if (s.ok()) {
+    // Without prepare it would simply skip the commit
+    s = txn0->Prepare();
+  }
+  assert(s.ok());
+  if (s.ok()) {
+    s = txn0->Commit();
+  }
+  assert(s.ok());
+  delete txn0;
+}
+
+const std::vector<SequenceNumber> WritePreparedTxnDB::GetSnapshotListFromDB(
+    SequenceNumber max) {
+  ROCKS_LOG_DETAILS(info_log_, "GetSnapshotListFromDB with max %" PRIu64, max);
+  InstrumentedMutexLock dblock(db_impl_->mutex());
+  db_impl_->mutex()->AssertHeld();
+  return db_impl_->snapshots().GetAll(nullptr, max);
+}
+
+void WritePreparedTxnDB::ReleaseSnapshotInternal(
+    const SequenceNumber snap_seq) {
+  // TODO(myabandeh): relax should enough since the synchronizatin is already
+  // done by snapshots_mutex_ under which this function is called.
+  if (snap_seq <= max_evicted_seq_.load(std::memory_order_acquire)) {
+    // Then this is a rare case that transaction did not finish before max
+    // advances. It is expected for a few read-only backup snapshots. For such
+    // snapshots we might have kept around a couple of entries in the
+    // old_commit_map_. Check and do garbage collection if that is the case.
+    bool need_gc = false;
+    {
+      WPRecordTick(TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD);
+      ROCKS_LOG_WARN(info_log_, "old_commit_map_mutex_ overhead for %" PRIu64,
+                     snap_seq);
+      ReadLock rl(&old_commit_map_mutex_);
+      auto prep_set_entry = old_commit_map_.find(snap_seq);
+      need_gc = prep_set_entry != old_commit_map_.end();
+    }
+    if (need_gc) {
+      WPRecordTick(TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD);
+      ROCKS_LOG_WARN(info_log_, "old_commit_map_mutex_ overhead for %" PRIu64,
+                     snap_seq);
+      WriteLock wl(&old_commit_map_mutex_);
+      old_commit_map_.erase(snap_seq);
+      old_commit_map_empty_.store(old_commit_map_.empty(),
+                                  std::memory_order_release);
+    }
+  }
+}
+
+void WritePreparedTxnDB::CleanupReleasedSnapshots(
+    const std::vector<SequenceNumber>& new_snapshots,
+    const std::vector<SequenceNumber>& old_snapshots) {
+  auto newi = new_snapshots.begin();
+  auto oldi = old_snapshots.begin();
+  for (; newi != new_snapshots.end() && oldi != old_snapshots.end();) {
+    assert(*newi >= *oldi);  // cannot have new snapshots with lower seq
+    if (*newi == *oldi) {    // still not released
+      auto value = *newi;
+      while (newi != new_snapshots.end() && *newi == value) {
+        newi++;
+      }
+      while (oldi != old_snapshots.end() && *oldi == value) {
+        oldi++;
+      }
+    } else {
+      assert(*newi > *oldi);  // *oldi is released
+      ReleaseSnapshotInternal(*oldi);
+      oldi++;
+    }
+  }
+  // Everything remained in old_snapshots is released and must be cleaned up
+  for (; oldi != old_snapshots.end(); oldi++) {
+    ReleaseSnapshotInternal(*oldi);
+  }
+}
+
+void WritePreparedTxnDB::UpdateSnapshots(
+    const std::vector<SequenceNumber>& snapshots,
+    const SequenceNumber& version) {
+  ROCKS_LOG_DETAILS(info_log_, "UpdateSnapshots with version %" PRIu64,
+                    version);
+  TEST_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:p:start");
+  TEST_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:s:start");
+#ifndef NDEBUG
+  size_t sync_i = 0;
+#endif
+  ROCKS_LOG_DETAILS(info_log_, "snapshots_mutex_ overhead");
+  WriteLock wl(&snapshots_mutex_);
+  snapshots_version_ = version;
+  // We update the list concurrently with the readers.
+  // Both new and old lists are sorted and the new list is subset of the
+  // previous list plus some new items. Thus if a snapshot repeats in
+  // both new and old lists, it will appear upper in the new list. So if
+  // we simply insert the new snapshots in order, if an overwritten item
+  // is still valid in the new list is either written to the same place in
+  // the array or it is written in a higher palce before it gets
+  // overwritten by another item. This guarantess a reader that reads the
+  // list bottom-up will eventaully see a snapshot that repeats in the
+  // update, either before it gets overwritten by the writer or
+  // afterwards.
+  size_t i = 0;
+  auto it = snapshots.begin();
+  for (; it != snapshots.end() && i < SNAPSHOT_CACHE_SIZE; ++it, ++i) {
+    snapshot_cache_[i].store(*it, std::memory_order_release);
+    TEST_IDX_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:p:", ++sync_i);
+    TEST_IDX_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:s:", sync_i);
+  }
+#ifndef NDEBUG
+  // Release the remaining sync points since they are useless given that the
+  // reader would also use lock to access snapshots
+  for (++sync_i; sync_i <= 10; ++sync_i) {
+    TEST_IDX_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:p:", sync_i);
+    TEST_IDX_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:s:", sync_i);
+  }
+#endif
+  snapshots_.clear();
+  for (; it != snapshots.end(); ++it) {
+    // Insert them to a vector that is less efficient to access
+    // concurrently
+    snapshots_.push_back(*it);
+  }
+  // Update the size at the end. Otherwise a parallel reader might read
+  // items that are not set yet.
+  snapshots_total_.store(snapshots.size(), std::memory_order_release);
+
+  // Note: this must be done after the snapshots data structures are updated
+  // with the new list of snapshots.
+  CleanupReleasedSnapshots(snapshots, snapshots_all_);
+  snapshots_all_ = snapshots;
+
+  TEST_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:p:end");
+  TEST_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:s:end");
+}
+
+void WritePreparedTxnDB::CheckAgainstSnapshots(const CommitEntry& evicted) {
+  TEST_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:p:start");
+  TEST_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:s:start");
+#ifndef NDEBUG
+  size_t sync_i = 0;
+#endif
+  // First check the snapshot cache that is efficient for concurrent access
+  auto cnt = snapshots_total_.load(std::memory_order_acquire);
+  // The list might get updated concurrently as we are reading from it. The
+  // reader should be able to read all the snapshots that are still valid
+  // after the update. Since the survived snapshots are written in a higher
+  // place before gets overwritten the reader that reads bottom-up will
+  // eventully see it.
+  const bool next_is_larger = true;
+  // We will set to true if the border line snapshot suggests that.
+  bool search_larger_list = false;
+  size_t ip1 = std::min(cnt, SNAPSHOT_CACHE_SIZE);
+  for (; 0 < ip1; ip1--) {
+    SequenceNumber snapshot_seq =
+        snapshot_cache_[ip1 - 1].load(std::memory_order_acquire);
+    TEST_IDX_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:p:",
+                        ++sync_i);
+    TEST_IDX_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:s:", sync_i);
+    if (ip1 == SNAPSHOT_CACHE_SIZE) {  // border line snapshot
+      // snapshot_seq < commit_seq => larger_snapshot_seq <= commit_seq
+      // then later also continue the search to larger snapshots
+      search_larger_list = snapshot_seq < evicted.commit_seq;
+    }
+    if (!MaybeUpdateOldCommitMap(evicted.prep_seq, evicted.commit_seq,
+                                 snapshot_seq, !next_is_larger)) {
+      break;
+    }
+  }
+#ifndef NDEBUG
+  // Release the remaining sync points before accquiring the lock
+  for (++sync_i; sync_i <= 10; ++sync_i) {
+    TEST_IDX_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:p:", sync_i);
+    TEST_IDX_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:s:", sync_i);
+  }
+#endif
+  TEST_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:p:end");
+  TEST_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:s:end");
+  if (UNLIKELY(SNAPSHOT_CACHE_SIZE < cnt && search_larger_list)) {
+    // Then access the less efficient list of snapshots_
+    WPRecordTick(TXN_SNAPSHOT_MUTEX_OVERHEAD);
+    ROCKS_LOG_WARN(info_log_,
+                   "snapshots_mutex_ overhead for <%" PRIu64 ",%" PRIu64
+                   "> with %" ROCKSDB_PRIszt " snapshots",
+                   evicted.prep_seq, evicted.commit_seq, cnt);
+    ReadLock rl(&snapshots_mutex_);
+    // Items could have moved from the snapshots_ to snapshot_cache_ before
+    // accquiring the lock. To make sure that we do not miss a valid snapshot,
+    // read snapshot_cache_ again while holding the lock.
+    for (size_t i = 0; i < SNAPSHOT_CACHE_SIZE; i++) {
+      SequenceNumber snapshot_seq =
+          snapshot_cache_[i].load(std::memory_order_acquire);
+      if (!MaybeUpdateOldCommitMap(evicted.prep_seq, evicted.commit_seq,
+                                   snapshot_seq, next_is_larger)) {
+        break;
+      }
+    }
+    for (auto snapshot_seq_2 : snapshots_) {
+      if (!MaybeUpdateOldCommitMap(evicted.prep_seq, evicted.commit_seq,
+                                   snapshot_seq_2, next_is_larger)) {
+        break;
+      }
+    }
+  }
+}
+
+bool WritePreparedTxnDB::MaybeUpdateOldCommitMap(
+    const uint64_t& prep_seq, const uint64_t& commit_seq,
+    const uint64_t& snapshot_seq, const bool next_is_larger = true) {
+  // If we do not store an entry in old_commit_map_ we assume it is committed in
+  // all snapshots. If commit_seq <= snapshot_seq, it is considered already in
+  // the snapshot so we need not to keep the entry around for this snapshot.
+  if (commit_seq <= snapshot_seq) {
+    // continue the search if the next snapshot could be smaller than commit_seq
+    return !next_is_larger;
+  }
+  // then snapshot_seq < commit_seq
+  if (prep_seq <= snapshot_seq) {  // overlapping range
+    WPRecordTick(TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD);
+    ROCKS_LOG_WARN(info_log_,
+                   "old_commit_map_mutex_ overhead for %" PRIu64
+                   " commit entry: <%" PRIu64 ",%" PRIu64 ">",
+                   snapshot_seq, prep_seq, commit_seq);
+    WriteLock wl(&old_commit_map_mutex_);
+    old_commit_map_empty_.store(false, std::memory_order_release);
+    auto& vec = old_commit_map_[snapshot_seq];
+    vec.insert(std::upper_bound(vec.begin(), vec.end(), prep_seq), prep_seq);
+    // We need to store it once for each overlapping snapshot. Returning true to
+    // continue the search if there is more overlapping snapshot.
+    return true;
+  }
+  // continue the search if the next snapshot could be larger than prep_seq
+  return next_is_larger;
+}
+
+WritePreparedTxnDB::~WritePreparedTxnDB() {
+  // At this point there could be running compaction/flush holding a
+  // SnapshotChecker, which holds a pointer back to WritePreparedTxnDB.
+  // Make sure those jobs finished before destructing WritePreparedTxnDB.
+  if (!db_impl_->shutting_down_) {
+    db_impl_->CancelAllBackgroundWork(true /*wait*/);
+  }
+}
+
+void SubBatchCounter::InitWithComp(const uint32_t cf) {
+  auto cmp = comparators_[cf];
+  keys_[cf] = CFKeys(SetComparator(cmp));
+}
+
+void SubBatchCounter::AddKey(const uint32_t cf, const Slice& key) {
+  CFKeys& cf_keys = keys_[cf];
+  if (cf_keys.size() == 0) {  // just inserted
+    InitWithComp(cf);
+  }
+  auto it = cf_keys.insert(key);
+  if (it.second == false) {  // second is false if a element already existed.
+    batches_++;
+    keys_.clear();
+    InitWithComp(cf);
+    keys_[cf].insert(key);
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn_db.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn_db.h
new file mode 100644
index 0000000000..f5b6411605
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_prepared_txn_db.h
@@ -0,0 +1,1123 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cinttypes>
+#include <mutex>
+#include <queue>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "db/db_iter.h"
+#include "db/pre_release_callback.h"
+#include "db/read_callback.h"
+#include "db/snapshot_checker.h"
+#include "logging/logging.h"
+#include "rocksdb/db.h"
+#include "rocksdb/options.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "util/cast_util.h"
+#include "util/set_comparator.h"
+#include "util/string_util.h"
+#include "utilities/transactions/pessimistic_transaction.h"
+#include "utilities/transactions/pessimistic_transaction_db.h"
+#include "utilities/transactions/write_prepared_txn.h"
+
+namespace ROCKSDB_NAMESPACE {
+enum SnapshotBackup : bool { kUnbackedByDBSnapshot, kBackedByDBSnapshot };
+
+// A PessimisticTransactionDB that writes data to DB after prepare phase of 2PC.
+// In this way some data in the DB might not be committed. The DB provides
+// mechanisms to tell such data apart from committed data.
+class WritePreparedTxnDB : public PessimisticTransactionDB {
+ public:
+  explicit WritePreparedTxnDB(DB* db,
+                              const TransactionDBOptions& txn_db_options)
+      : PessimisticTransactionDB(db, txn_db_options),
+        SNAPSHOT_CACHE_BITS(txn_db_options.wp_snapshot_cache_bits),
+        SNAPSHOT_CACHE_SIZE(static_cast<size_t>(1ull << SNAPSHOT_CACHE_BITS)),
+        COMMIT_CACHE_BITS(txn_db_options.wp_commit_cache_bits),
+        COMMIT_CACHE_SIZE(static_cast<size_t>(1ull << COMMIT_CACHE_BITS)),
+        FORMAT(COMMIT_CACHE_BITS) {
+    Init(txn_db_options);
+  }
+
+  explicit WritePreparedTxnDB(StackableDB* db,
+                              const TransactionDBOptions& txn_db_options)
+      : PessimisticTransactionDB(db, txn_db_options),
+        SNAPSHOT_CACHE_BITS(txn_db_options.wp_snapshot_cache_bits),
+        SNAPSHOT_CACHE_SIZE(static_cast<size_t>(1ull << SNAPSHOT_CACHE_BITS)),
+        COMMIT_CACHE_BITS(txn_db_options.wp_commit_cache_bits),
+        COMMIT_CACHE_SIZE(static_cast<size_t>(1ull << COMMIT_CACHE_BITS)),
+        FORMAT(COMMIT_CACHE_BITS) {
+    Init(txn_db_options);
+  }
+
+  virtual ~WritePreparedTxnDB();
+
+  virtual Status Initialize(
+      const std::vector<size_t>& compaction_enabled_cf_indices,
+      const std::vector<ColumnFamilyHandle*>& handles) override;
+
+  Transaction* BeginTransaction(const WriteOptions& write_options,
+                                const TransactionOptions& txn_options,
+                                Transaction* old_txn) override;
+
+  using TransactionDB::Write;
+  Status Write(const WriteOptions& opts, WriteBatch* updates) override;
+
+  // Optimized version of ::Write that receives more optimization request such
+  // as skip_concurrency_control.
+  using PessimisticTransactionDB::Write;
+  Status Write(const WriteOptions& opts, const TransactionDBWriteOptimizations&,
+               WriteBatch* updates) override;
+
+  // Write the batch to the underlying DB and mark it as committed. Could be
+  // used by both directly from TxnDB or through a transaction.
+  Status WriteInternal(const WriteOptions& write_options, WriteBatch* batch,
+                       size_t batch_cnt, WritePreparedTxn* txn);
+
+  using DB::Get;
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     PinnableSlice* value) override;
+
+  using DB::MultiGet;
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_family,
+      const std::vector<Slice>& keys,
+      std::vector<std::string>* values) override;
+
+  using DB::NewIterator;
+  virtual Iterator* NewIterator(const ReadOptions& options,
+                                ColumnFamilyHandle* column_family) override;
+
+  using DB::NewIterators;
+  virtual Status NewIterators(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_families,
+      std::vector<Iterator*>* iterators) override;
+
+  // Check whether the transaction that wrote the value with sequence number seq
+  // is visible to the snapshot with sequence number snapshot_seq.
+  // Returns true if commit_seq <= snapshot_seq
+  // If the snapshot_seq is already released and snapshot_seq <= max, sets
+  // *snap_released to true and returns true as well.
+  inline bool IsInSnapshot(uint64_t prep_seq, uint64_t snapshot_seq,
+                           uint64_t min_uncommitted = kMinUnCommittedSeq,
+                           bool* snap_released = nullptr) const {
+    ROCKS_LOG_DETAILS(info_log_,
+                      "IsInSnapshot %" PRIu64 " in %" PRIu64
+                      " min_uncommitted %" PRIu64,
+                      prep_seq, snapshot_seq, min_uncommitted);
+    assert(min_uncommitted >= kMinUnCommittedSeq);
+    // Caller is responsible to initialize snap_released.
+    assert(snap_released == nullptr || *snap_released == false);
+    // Here we try to infer the return value without looking into prepare list.
+    // This would help avoiding synchronization over a shared map.
+    // TODO(myabandeh): optimize this. This sequence of checks must be correct
+    // but not necessary efficient
+    if (prep_seq == 0) {
+      // Compaction will output keys to bottom-level with sequence number 0 if
+      // it is visible to the earliest snapshot.
+      ROCKS_LOG_DETAILS(
+          info_log_, "IsInSnapshot %" PRIu64 " in %" PRIu64 " returns %" PRId32,
+          prep_seq, snapshot_seq, 1);
+      return true;
+    }
+    if (snapshot_seq < prep_seq) {
+      // snapshot_seq < prep_seq <= commit_seq => snapshot_seq < commit_seq
+      ROCKS_LOG_DETAILS(
+          info_log_, "IsInSnapshot %" PRIu64 " in %" PRIu64 " returns %" PRId32,
+          prep_seq, snapshot_seq, 0);
+      return false;
+    }
+    if (prep_seq < min_uncommitted) {
+      ROCKS_LOG_DETAILS(info_log_,
+                        "IsInSnapshot %" PRIu64 " in %" PRIu64
+                        " returns %" PRId32
+                        " because of min_uncommitted %" PRIu64,
+                        prep_seq, snapshot_seq, 1, min_uncommitted);
+      return true;
+    }
+    // Commit of delayed prepared has two non-atomic steps: add to commit cache,
+    // remove from delayed prepared. Our reads from these two is also
+    // non-atomic. By looking into commit cache first thus we might not find the
+    // prep_seq neither in commit cache not in delayed_prepared_. To fix that i)
+    // we check if there was any delayed prepared BEFORE looking into commit
+    // cache, ii) if there was, we complete the search steps to be these: i)
+    // commit cache, ii) delayed prepared, commit cache again. In this way if
+    // the first query to commit cache missed the commit, the 2nd will catch it.
+    bool was_empty;
+    SequenceNumber max_evicted_seq_lb, max_evicted_seq_ub;
+    CommitEntry64b dont_care;
+    auto indexed_seq = prep_seq % COMMIT_CACHE_SIZE;
+    size_t repeats = 0;
+    do {
+      repeats++;
+      assert(repeats < 100);
+      if (UNLIKELY(repeats >= 100)) {
+        throw std::runtime_error(
+            "The read was intrupted 100 times by update to max_evicted_seq_. "
+            "This is unexpected in all setups");
+      }
+      max_evicted_seq_lb = max_evicted_seq_.load(std::memory_order_acquire);
+      TEST_SYNC_POINT(
+          "WritePreparedTxnDB::IsInSnapshot:max_evicted_seq_:pause");
+      TEST_SYNC_POINT(
+          "WritePreparedTxnDB::IsInSnapshot:max_evicted_seq_:resume");
+      was_empty = delayed_prepared_empty_.load(std::memory_order_acquire);
+      TEST_SYNC_POINT(
+          "WritePreparedTxnDB::IsInSnapshot:delayed_prepared_empty_:pause");
+      TEST_SYNC_POINT(
+          "WritePreparedTxnDB::IsInSnapshot:delayed_prepared_empty_:resume");
+      CommitEntry cached;
+      bool exist = GetCommitEntry(indexed_seq, &dont_care, &cached);
+      TEST_SYNC_POINT("WritePreparedTxnDB::IsInSnapshot:GetCommitEntry:pause");
+      TEST_SYNC_POINT("WritePreparedTxnDB::IsInSnapshot:GetCommitEntry:resume");
+      if (exist && prep_seq == cached.prep_seq) {
+        // It is committed and also not evicted from commit cache
+        ROCKS_LOG_DETAILS(
+            info_log_,
+            "IsInSnapshot %" PRIu64 " in %" PRIu64 " returns %" PRId32,
+            prep_seq, snapshot_seq, cached.commit_seq <= snapshot_seq);
+        return cached.commit_seq <= snapshot_seq;
+      }
+      // else it could be committed but not inserted in the map which could
+      // happen after recovery, or it could be committed and evicted by another
+      // commit, or never committed.
+
+      // At this point we don't know if it was committed or it is still prepared
+      max_evicted_seq_ub = max_evicted_seq_.load(std::memory_order_acquire);
+      if (UNLIKELY(max_evicted_seq_lb != max_evicted_seq_ub)) {
+        continue;
+      }
+      // Note: max_evicted_seq_ when we did GetCommitEntry <= max_evicted_seq_ub
+      if (max_evicted_seq_ub < prep_seq) {
+        // Not evicted from cache and also not present, so must be still
+        // prepared
+        ROCKS_LOG_DETAILS(info_log_,
+                          "IsInSnapshot %" PRIu64 " in %" PRIu64
+                          " returns %" PRId32,
+                          prep_seq, snapshot_seq, 0);
+        return false;
+      }
+      TEST_SYNC_POINT("WritePreparedTxnDB::IsInSnapshot:prepared_mutex_:pause");
+      TEST_SYNC_POINT(
+          "WritePreparedTxnDB::IsInSnapshot:prepared_mutex_:resume");
+      if (!was_empty) {
+        // We should not normally reach here
+        WPRecordTick(TXN_PREPARE_MUTEX_OVERHEAD);
+        ReadLock rl(&prepared_mutex_);
+        ROCKS_LOG_WARN(
+            info_log_, "prepared_mutex_ overhead %" PRIu64 " for %" PRIu64,
+            static_cast<uint64_t>(delayed_prepared_.size()), prep_seq);
+        if (delayed_prepared_.find(prep_seq) != delayed_prepared_.end()) {
+          // This is the order: 1) delayed_prepared_commits_ update, 2) publish
+          // 3) delayed_prepared_ clean up. So check if it is the case of a late
+          // clenaup.
+          auto it = delayed_prepared_commits_.find(prep_seq);
+          if (it == delayed_prepared_commits_.end()) {
+            // Then it is not committed yet
+            ROCKS_LOG_DETAILS(info_log_,
+                              "IsInSnapshot %" PRIu64 " in %" PRIu64
+                              " returns %" PRId32,
+                              prep_seq, snapshot_seq, 0);
+            return false;
+          } else {
+            ROCKS_LOG_DETAILS(info_log_,
+                              "IsInSnapshot %" PRIu64 " in %" PRIu64
+                              " commit: %" PRIu64 " returns %" PRId32,
+                              prep_seq, snapshot_seq, it->second,
+                              snapshot_seq <= it->second);
+            return it->second <= snapshot_seq;
+          }
+        } else {
+          // 2nd query to commit cache. Refer to was_empty comment above.
+          exist = GetCommitEntry(indexed_seq, &dont_care, &cached);
+          if (exist && prep_seq == cached.prep_seq) {
+            ROCKS_LOG_DETAILS(
+                info_log_,
+                "IsInSnapshot %" PRIu64 " in %" PRIu64 " returns %" PRId32,
+                prep_seq, snapshot_seq, cached.commit_seq <= snapshot_seq);
+            return cached.commit_seq <= snapshot_seq;
+          }
+          max_evicted_seq_ub = max_evicted_seq_.load(std::memory_order_acquire);
+        }
+      }
+    } while (UNLIKELY(max_evicted_seq_lb != max_evicted_seq_ub));
+    // When advancing max_evicted_seq_, we move older entires from prepared to
+    // delayed_prepared_. Also we move evicted entries from commit cache to
+    // old_commit_map_ if it overlaps with any snapshot. Since prep_seq <=
+    // max_evicted_seq_, we have three cases: i) in delayed_prepared_, ii) in
+    // old_commit_map_, iii) committed with no conflict with any snapshot. Case
+    // (i) delayed_prepared_ is checked above
+    if (max_evicted_seq_ub < snapshot_seq) {  // then (ii) cannot be the case
+      // only (iii) is the case: committed
+      // commit_seq <= max_evicted_seq_ < snapshot_seq => commit_seq <
+      // snapshot_seq
+      ROCKS_LOG_DETAILS(
+          info_log_, "IsInSnapshot %" PRIu64 " in %" PRIu64 " returns %" PRId32,
+          prep_seq, snapshot_seq, 1);
+      return true;
+    }
+    // else (ii) might be the case: check the commit data saved for this
+    // snapshot. If there was no overlapping commit entry, then it is committed
+    // with a commit_seq lower than any live snapshot, including snapshot_seq.
+    if (old_commit_map_empty_.load(std::memory_order_acquire)) {
+      ROCKS_LOG_DETAILS(info_log_,
+                        "IsInSnapshot %" PRIu64 " in %" PRIu64
+                        " returns %" PRId32 " released=1",
+                        prep_seq, snapshot_seq, 0);
+      assert(snap_released);
+      // This snapshot is not valid anymore. We cannot tell if prep_seq is
+      // committed before or after the snapshot. Return true but also set
+      // snap_released to true.
+      *snap_released = true;
+      return true;
+    }
+    {
+      // We should not normally reach here unless sapshot_seq is old. This is a
+      // rare case and it is ok to pay the cost of mutex ReadLock for such old,
+      // reading transactions.
+      WPRecordTick(TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD);
+      ReadLock rl(&old_commit_map_mutex_);
+      auto prep_set_entry = old_commit_map_.find(snapshot_seq);
+      bool found = prep_set_entry != old_commit_map_.end();
+      if (found) {
+        auto& vec = prep_set_entry->second;
+        found = std::binary_search(vec.begin(), vec.end(), prep_seq);
+      } else {
+        // coming from compaction
+        ROCKS_LOG_DETAILS(info_log_,
+                          "IsInSnapshot %" PRIu64 " in %" PRIu64
+                          " returns %" PRId32 " released=1",
+                          prep_seq, snapshot_seq, 0);
+        // This snapshot is not valid anymore. We cannot tell if prep_seq is
+        // committed before or after the snapshot. Return true but also set
+        // snap_released to true.
+        assert(snap_released);
+        *snap_released = true;
+        return true;
+      }
+
+      if (!found) {
+        ROCKS_LOG_DETAILS(info_log_,
+                          "IsInSnapshot %" PRIu64 " in %" PRIu64
+                          " returns %" PRId32,
+                          prep_seq, snapshot_seq, 1);
+        return true;
+      }
+    }
+    // (ii) it the case: it is committed but after the snapshot_seq
+    ROCKS_LOG_DETAILS(
+        info_log_, "IsInSnapshot %" PRIu64 " in %" PRIu64 " returns %" PRId32,
+        prep_seq, snapshot_seq, 0);
+    return false;
+  }
+
+  // Add the transaction with prepare sequence seq to the prepared list.
+  // Note: must be called serially with increasing seq on each call.
+  // locked is true if prepared_mutex_ is already locked.
+  void AddPrepared(uint64_t seq, bool locked = false);
+  // Check if any of the prepared txns are less than new max_evicted_seq_. Must
+  // be called with prepared_mutex_ write locked.
+  void CheckPreparedAgainstMax(SequenceNumber new_max, bool locked);
+  // Remove the transaction with prepare sequence seq from the prepared list
+  void RemovePrepared(const uint64_t seq, const size_t batch_cnt = 1);
+  // Add the transaction with prepare sequence prepare_seq and commit sequence
+  // commit_seq to the commit map. loop_cnt is to detect infinite loops.
+  // Note: must be called serially.
+  void AddCommitted(uint64_t prepare_seq, uint64_t commit_seq,
+                    uint8_t loop_cnt = 0);
+
+  struct CommitEntry {
+    uint64_t prep_seq;
+    uint64_t commit_seq;
+    CommitEntry() : prep_seq(0), commit_seq(0) {}
+    CommitEntry(uint64_t ps, uint64_t cs) : prep_seq(ps), commit_seq(cs) {}
+    bool operator==(const CommitEntry& rhs) const {
+      return prep_seq == rhs.prep_seq && commit_seq == rhs.commit_seq;
+    }
+  };
+
+  struct CommitEntry64bFormat {
+    explicit CommitEntry64bFormat(size_t index_bits)
+        : INDEX_BITS(index_bits),
+          PREP_BITS(static_cast<size_t>(64 - PAD_BITS - INDEX_BITS)),
+          COMMIT_BITS(static_cast<size_t>(64 - PREP_BITS)),
+          COMMIT_FILTER(static_cast<uint64_t>((1ull << COMMIT_BITS) - 1)),
+          DELTA_UPPERBOUND(static_cast<uint64_t>((1ull << COMMIT_BITS))) {}
+    // Number of higher bits of a sequence number that is not used. They are
+    // used to encode the value type, ...
+    const size_t PAD_BITS = static_cast<size_t>(8);
+    // Number of lower bits from prepare seq that can be skipped as they are
+    // implied by the index of the entry in the array
+    const size_t INDEX_BITS;
+    // Number of bits we use to encode the prepare seq
+    const size_t PREP_BITS;
+    // Number of bits we use to encode the commit seq.
+    const size_t COMMIT_BITS;
+    // Filter to encode/decode commit seq
+    const uint64_t COMMIT_FILTER;
+    // The value of commit_seq - prepare_seq + 1 must be less than this bound
+    const uint64_t DELTA_UPPERBOUND;
+  };
+
+  // Prepare Seq (64 bits) = PAD ... PAD PREP PREP ... PREP INDEX INDEX ...
+  // INDEX Delta Seq (64 bits)   = 0 0 0 0 0 0 0 0 0  0 0 0 DELTA DELTA ...
+  // DELTA DELTA Encoded Value         = PREP PREP .... PREP PREP DELTA DELTA
+  // ... DELTA DELTA PAD: first bits of a seq that is reserved for tagging and
+  // hence ignored PREP/INDEX: the used bits in a prepare seq number INDEX: the
+  // bits that do not have to be encoded (will be provided externally) DELTA:
+  // prep seq - commit seq + 1 Number of DELTA bits should be equal to number of
+  // index bits + PADs
+  struct CommitEntry64b {
+    constexpr CommitEntry64b() noexcept : rep_(0) {}
+
+    CommitEntry64b(const CommitEntry& entry, const CommitEntry64bFormat& format)
+        : CommitEntry64b(entry.prep_seq, entry.commit_seq, format) {}
+
+    CommitEntry64b(const uint64_t ps, const uint64_t cs,
+                   const CommitEntry64bFormat& format) {
+      assert(ps < static_cast<uint64_t>(
+                      (1ull << (format.PREP_BITS + format.INDEX_BITS))));
+      assert(ps <= cs);
+      uint64_t delta = cs - ps + 1;  // make initialized delta always >= 1
+      // zero is reserved for uninitialized entries
+      assert(0 < delta);
+      assert(delta < format.DELTA_UPPERBOUND);
+      if (delta >= format.DELTA_UPPERBOUND) {
+        throw std::runtime_error(
+            "commit_seq >> prepare_seq. The allowed distance is " +
+            std::to_string(format.DELTA_UPPERBOUND) + " commit_seq is " +
+            std::to_string(cs) + " prepare_seq is " + std::to_string(ps));
+      }
+      rep_ = (ps << format.PAD_BITS) & ~format.COMMIT_FILTER;
+      rep_ = rep_ | delta;
+    }
+
+    // Return false if the entry is empty
+    bool Parse(const uint64_t indexed_seq, CommitEntry* entry,
+               const CommitEntry64bFormat& format) {
+      uint64_t delta = rep_ & format.COMMIT_FILTER;
+      // zero is reserved for uninitialized entries
+      assert(delta < static_cast<uint64_t>((1ull << format.COMMIT_BITS)));
+      if (delta == 0) {
+        return false;  // initialized entry would have non-zero delta
+      }
+
+      assert(indexed_seq < static_cast<uint64_t>((1ull << format.INDEX_BITS)));
+      uint64_t prep_up = rep_ & ~format.COMMIT_FILTER;
+      prep_up >>= format.PAD_BITS;
+      const uint64_t& prep_low = indexed_seq;
+      entry->prep_seq = prep_up | prep_low;
+
+      entry->commit_seq = entry->prep_seq + delta - 1;
+      return true;
+    }
+
+   private:
+    uint64_t rep_;
+  };
+
+  // Struct to hold ownership of snapshot and read callback for cleanup.
+  struct IteratorState;
+
+  std::shared_ptr<std::map<uint32_t, const Comparator*>> GetCFComparatorMap() {
+    return cf_map_;
+  }
+  std::shared_ptr<std::map<uint32_t, ColumnFamilyHandle*>> GetCFHandleMap() {
+    return handle_map_;
+  }
+  void UpdateCFComparatorMap(
+      const std::vector<ColumnFamilyHandle*>& handles) override;
+  void UpdateCFComparatorMap(ColumnFamilyHandle* handle) override;
+
+  virtual const Snapshot* GetSnapshot() override;
+  SnapshotImpl* GetSnapshotInternal(bool for_ww_conflict_check);
+
+ protected:
+  virtual Status VerifyCFOptions(
+      const ColumnFamilyOptions& cf_options) override;
+  // Assign the min and max sequence numbers for reading from the db. A seq >
+  // max is not valid, and a seq < min is valid, and a min <= seq < max requires
+  // further checking. Normally max is defined by the snapshot and min is by
+  // minimum uncommitted seq.
+  inline SnapshotBackup AssignMinMaxSeqs(const Snapshot* snapshot,
+                                         SequenceNumber* min,
+                                         SequenceNumber* max);
+  // Validate is a snapshot sequence number is still valid based on the latest
+  // db status. backed_by_snapshot specifies if the number is baked by an actual
+  // snapshot object. order specified the memory order with which we load the
+  // atomic variables: relax is enough for the default since we care about last
+  // value seen by same thread.
+  inline bool ValidateSnapshot(
+      const SequenceNumber snap_seq, const SnapshotBackup backed_by_snapshot,
+      std::memory_order order = std::memory_order_relaxed);
+  // Get a dummy snapshot that refers to kMaxSequenceNumber
+  Snapshot* GetMaxSnapshot() { return &dummy_max_snapshot_; }
+
+  bool ShouldRollbackWithSingleDelete(ColumnFamilyHandle* column_family,
+                                      const Slice& key) {
+    return rollback_deletion_type_callback_
+               ? rollback_deletion_type_callback_(this, column_family, key)
+               : false;
+  }
+
+  std::function<bool(TransactionDB*, ColumnFamilyHandle*, const Slice&)>
+      rollback_deletion_type_callback_;
+
+ private:
+  friend class AddPreparedCallback;
+  friend class PreparedHeap_BasicsTest_Test;
+  friend class PreparedHeap_Concurrent_Test;
+  friend class PreparedHeap_EmptyAtTheEnd_Test;
+  friend class SnapshotConcurrentAccessTest_SnapshotConcurrentAccess_Test;
+  friend class WritePreparedCommitEntryPreReleaseCallback;
+  friend class WritePreparedTransactionTestBase;
+  friend class WritePreparedTxn;
+  friend class WritePreparedTxnDBMock;
+  friend class WritePreparedTransactionTest_AddPreparedBeforeMax_Test;
+  friend class WritePreparedTransactionTest_AdvanceMaxEvictedSeqBasic_Test;
+  friend class
+      WritePreparedTransactionTest_AdvanceMaxEvictedSeqWithDuplicates_Test;
+  friend class WritePreparedTransactionTest_AdvanceSeqByOne_Test;
+  friend class WritePreparedTransactionTest_BasicRecovery_Test;
+  friend class WritePreparedTransactionTest_CheckAgainstSnapshots_Test;
+  friend class WritePreparedTransactionTest_CleanupSnapshotEqualToMax_Test;
+  friend class WritePreparedTransactionTest_ConflictDetectionAfterRecovery_Test;
+  friend class WritePreparedTransactionTest_CommitMap_Test;
+  friend class WritePreparedTransactionTest_DoubleSnapshot_Test;
+  friend class WritePreparedTransactionTest_IsInSnapshotEmptyMap_Test;
+  friend class WritePreparedTransactionTest_IsInSnapshotReleased_Test;
+  friend class WritePreparedTransactionTest_IsInSnapshot_Test;
+  friend class WritePreparedTransactionTest_NewSnapshotLargerThanMax_Test;
+  friend class WritePreparedTransactionTest_MaxCatchupWithNewSnapshot_Test;
+  friend class WritePreparedTransactionTest_MaxCatchupWithUnbackedSnapshot_Test;
+  friend class
+      WritePreparedTransactionTest_NonAtomicCommitOfDelayedPrepared_Test;
+  friend class
+      WritePreparedTransactionTest_NonAtomicUpdateOfDelayedPrepared_Test;
+  friend class WritePreparedTransactionTest_NonAtomicUpdateOfMaxEvictedSeq_Test;
+  friend class WritePreparedTransactionTest_OldCommitMapGC_Test;
+  friend class WritePreparedTransactionTest_Rollback_Test;
+  friend class WritePreparedTransactionTest_SmallestUnCommittedSeq_Test;
+  friend class WriteUnpreparedTxn;
+  friend class WriteUnpreparedTxnDB;
+  friend class WriteUnpreparedTransactionTest_RecoveryTest_Test;
+  friend class MultiOpsTxnsStressTest;
+
+  void Init(const TransactionDBOptions& txn_db_opts);
+
+  void WPRecordTick(uint32_t ticker_type) const {
+    RecordTick(db_impl_->immutable_db_options_.statistics.get(), ticker_type);
+  }
+
+  // A heap with the amortized O(1) complexity for erase. It uses one extra heap
+  // to keep track of erased entries that are not yet on top of the main heap.
+  class PreparedHeap {
+    // The mutex is required for push and pop from PreparedHeap. ::erase will
+    // use external synchronization via prepared_mutex_.
+    port::Mutex push_pop_mutex_;
+    std::deque<uint64_t> heap_;
+    std::priority_queue<uint64_t, std::vector<uint64_t>, std::greater<uint64_t>>
+        erased_heap_;
+    std::atomic<uint64_t> heap_top_ = {kMaxSequenceNumber};
+    // True when testing crash recovery
+    bool TEST_CRASH_ = false;
+    friend class WritePreparedTxnDB;
+
+   public:
+    ~PreparedHeap() {
+      if (!TEST_CRASH_) {
+        assert(heap_.empty());
+        assert(erased_heap_.empty());
+      }
+    }
+    port::Mutex* push_pop_mutex() { return &push_pop_mutex_; }
+
+    inline bool empty() { return top() == kMaxSequenceNumber; }
+    // Returns kMaxSequenceNumber if empty() and the smallest otherwise.
+    inline uint64_t top() { return heap_top_.load(std::memory_order_acquire); }
+    inline void push(uint64_t v) {
+      push_pop_mutex_.AssertHeld();
+      if (heap_.empty()) {
+        heap_top_.store(v, std::memory_order_release);
+      } else {
+        assert(heap_top_.load() < v);
+      }
+      heap_.push_back(v);
+    }
+    void pop(bool locked = false) {
+      if (!locked) {
+        push_pop_mutex()->Lock();
+      }
+      push_pop_mutex_.AssertHeld();
+      heap_.pop_front();
+      while (!heap_.empty() && !erased_heap_.empty() &&
+             // heap_.top() > erased_heap_.top() could happen if we have erased
+             // a non-existent entry. Ideally the user should not do that but we
+             // should be resilient against it.
+             heap_.front() >= erased_heap_.top()) {
+        if (heap_.front() == erased_heap_.top()) {
+          heap_.pop_front();
+        }
+        uint64_t erased __attribute__((__unused__));
+        erased = erased_heap_.top();
+        erased_heap_.pop();
+        // No duplicate prepare sequence numbers
+        assert(erased_heap_.empty() || erased_heap_.top() != erased);
+      }
+      while (heap_.empty() && !erased_heap_.empty()) {
+        erased_heap_.pop();
+      }
+      heap_top_.store(!heap_.empty() ? heap_.front() : kMaxSequenceNumber,
+                      std::memory_order_release);
+      if (!locked) {
+        push_pop_mutex()->Unlock();
+      }
+    }
+    // Concurrrent calls needs external synchronization. It is safe to be called
+    // concurrent to push and pop though.
+    void erase(uint64_t seq) {
+      if (!empty()) {
+        auto top_seq = top();
+        if (seq < top_seq) {
+          // Already popped, ignore it.
+        } else if (top_seq == seq) {
+          pop();
+#ifndef NDEBUG
+          MutexLock ml(push_pop_mutex());
+          assert(heap_.empty() || heap_.front() != seq);
+#endif
+        } else {  // top() > seq
+          // Down the heap, remember to pop it later
+          erased_heap_.push(seq);
+        }
+      }
+    }
+  };
+
+  void TEST_Crash() override { prepared_txns_.TEST_CRASH_ = true; }
+
+  // Get the commit entry with index indexed_seq from the commit table. It
+  // returns true if such entry exists.
+  bool GetCommitEntry(const uint64_t indexed_seq, CommitEntry64b* entry_64b,
+                      CommitEntry* entry) const;
+
+  // Rewrite the entry with the index indexed_seq in the commit table with the
+  // commit entry <prep_seq, commit_seq>. If the rewrite results into eviction,
+  // sets the evicted_entry and returns true.
+  bool AddCommitEntry(const uint64_t indexed_seq, const CommitEntry& new_entry,
+                      CommitEntry* evicted_entry);
+
+  // Rewrite the entry with the index indexed_seq in the commit table with the
+  // commit entry new_entry only if the existing entry matches the
+  // expected_entry. Returns false otherwise.
+  bool ExchangeCommitEntry(const uint64_t indexed_seq,
+                           CommitEntry64b& expected_entry,
+                           const CommitEntry& new_entry);
+
+  // Increase max_evicted_seq_ from the previous value prev_max to the new
+  // value. This also involves taking care of prepared txns that are not
+  // committed before new_max, as well as updating the list of live snapshots at
+  // the time of updating the max. Thread-safety: this function can be called
+  // concurrently. The concurrent invocations of this function is equivalent to
+  // a serial invocation in which the last invocation is the one with the
+  // largest new_max value.
+  void AdvanceMaxEvictedSeq(const SequenceNumber& prev_max,
+                            const SequenceNumber& new_max);
+
+  inline SequenceNumber SmallestUnCommittedSeq() {
+    // Note: We have two lists to look into, but for performance reasons they
+    // are not read atomically. Since CheckPreparedAgainstMax copies the entry
+    // to delayed_prepared_ before removing it from prepared_txns_, to ensure
+    // that a prepared entry will not go unmissed, we look into them in opposite
+    // order: first read prepared_txns_ and then delayed_prepared_.
+
+    // This must be called before calling ::top. This is because the concurrent
+    // thread would call ::RemovePrepared before updating
+    // GetLatestSequenceNumber(). Reading then in opposite order here guarantees
+    // that the ::top that we read would be lower the ::top if we had otherwise
+    // update/read them atomically.
+    auto next_prepare = db_impl_->GetLatestSequenceNumber() + 1;
+    auto min_prepare = prepared_txns_.top();
+    // Since we update the prepare_heap always from the main write queue via
+    // PreReleaseCallback, the prepared_txns_.top() indicates the smallest
+    // prepared data in 2pc transactions. For non-2pc transactions that are
+    // written in two steps, we also update prepared_txns_ at the first step
+    // (via the same mechanism) so that their uncommitted data is reflected in
+    // SmallestUnCommittedSeq.
+    if (!delayed_prepared_empty_.load()) {
+      ReadLock rl(&prepared_mutex_);
+      if (!delayed_prepared_.empty()) {
+        return *delayed_prepared_.begin();
+      }
+    }
+    bool empty = min_prepare == kMaxSequenceNumber;
+    if (empty) {
+      // Since GetLatestSequenceNumber is updated
+      // after prepared_txns_ are, the value of GetLatestSequenceNumber would
+      // reflect any uncommitted data that is not added to prepared_txns_ yet.
+      // Otherwise, if there is no concurrent txn, this value simply reflects
+      // that latest value in the memtable.
+      return next_prepare;
+    } else {
+      return std::min(min_prepare, next_prepare);
+    }
+  }
+
+  // Enhance the snapshot object by recording in it the smallest uncommitted seq
+  inline void EnhanceSnapshot(SnapshotImpl* snapshot,
+                              SequenceNumber min_uncommitted) {
+    assert(snapshot);
+    assert(min_uncommitted <= snapshot->number_ + 1);
+    snapshot->min_uncommitted_ = min_uncommitted;
+  }
+
+  virtual const std::vector<SequenceNumber> GetSnapshotListFromDB(
+      SequenceNumber max);
+
+  // Will be called by the public ReleaseSnapshot method. Does the maintenance
+  // internal to WritePreparedTxnDB
+  void ReleaseSnapshotInternal(const SequenceNumber snap_seq);
+
+  // Update the list of snapshots corresponding to the soon-to-be-updated
+  // max_evicted_seq_. Thread-safety: this function can be called concurrently.
+  // The concurrent invocations of this function is equivalent to a serial
+  // invocation in which the last invocation is the one with the largest
+  // version value.
+  void UpdateSnapshots(const std::vector<SequenceNumber>& snapshots,
+                       const SequenceNumber& version);
+  // Check the new list of new snapshots against the old one to see  if any of
+  // the snapshots are released and to do the cleanup for the released snapshot.
+  void CleanupReleasedSnapshots(
+      const std::vector<SequenceNumber>& new_snapshots,
+      const std::vector<SequenceNumber>& old_snapshots);
+
+  // Check an evicted entry against live snapshots to see if it should be kept
+  // around or it can be safely discarded (and hence assume committed for all
+  // snapshots). Thread-safety: this function can be called concurrently. If it
+  // is called concurrently with multiple UpdateSnapshots, the result is the
+  // same as checking the intersection of the snapshot list before updates with
+  // the snapshot list of all the concurrent updates.
+  void CheckAgainstSnapshots(const CommitEntry& evicted);
+
+  // Add a new entry to old_commit_map_ if prep_seq <= snapshot_seq <
+  // commit_seq. Return false if checking the next snapshot(s) is not needed.
+  // This is the case if none of the next snapshots could satisfy the condition.
+  // next_is_larger: the next snapshot will be a larger value
+  bool MaybeUpdateOldCommitMap(const uint64_t& prep_seq,
+                               const uint64_t& commit_seq,
+                               const uint64_t& snapshot_seq,
+                               const bool next_is_larger);
+
+  // A trick to increase the last visible sequence number by one and also wait
+  // for the in-flight commits to be visible.
+  void AdvanceSeqByOne();
+
+  // The list of live snapshots at the last time that max_evicted_seq_ advanced.
+  // The list stored into two data structures: in snapshot_cache_ that is
+  // efficient for concurrent reads, and in snapshots_ if the data does not fit
+  // into snapshot_cache_. The total number of snapshots in the two lists
+  std::atomic<size_t> snapshots_total_ = {};
+  // The list sorted in ascending order. Thread-safety for writes is provided
+  // with snapshots_mutex_ and concurrent reads are safe due to std::atomic for
+  // each entry. In x86_64 architecture such reads are compiled to simple read
+  // instructions.
+  const size_t SNAPSHOT_CACHE_BITS;
+  const size_t SNAPSHOT_CACHE_SIZE;
+  std::unique_ptr<std::atomic<SequenceNumber>[]> snapshot_cache_;
+  // 2nd list for storing snapshots. The list sorted in ascending order.
+  // Thread-safety is provided with snapshots_mutex_.
+  std::vector<SequenceNumber> snapshots_;
+  // The list of all snapshots: snapshots_ + snapshot_cache_. This list although
+  // redundant but simplifies CleanupOldSnapshots implementation.
+  // Thread-safety is provided with snapshots_mutex_.
+  std::vector<SequenceNumber> snapshots_all_;
+  // The version of the latest list of snapshots. This can be used to avoid
+  // rewriting a list that is concurrently updated with a more recent version.
+  SequenceNumber snapshots_version_ = 0;
+
+  // A heap of prepared transactions. Thread-safety is provided with
+  // prepared_mutex_.
+  PreparedHeap prepared_txns_;
+  const size_t COMMIT_CACHE_BITS;
+  const size_t COMMIT_CACHE_SIZE;
+  const CommitEntry64bFormat FORMAT;
+  // commit_cache_ must be initialized to zero to tell apart an empty index from
+  // a filled one. Thread-safety is provided with commit_cache_mutex_.
+  std::unique_ptr<std::atomic<CommitEntry64b>[]> commit_cache_;
+  // The largest evicted *commit* sequence number from the commit_cache_. If a
+  // seq is smaller than max_evicted_seq_ is might or might not be present in
+  // commit_cache_. So commit_cache_ must first be checked before consulting
+  // with max_evicted_seq_.
+  std::atomic<uint64_t> max_evicted_seq_ = {};
+  // Order: 1) update future_max_evicted_seq_ = new_max, 2)
+  // GetSnapshotListFromDB(new_max), max_evicted_seq_ = new_max. Since
+  // GetSnapshotInternal guarantess that the snapshot seq is larger than
+  // future_max_evicted_seq_, this guarantes that if a snapshot is not larger
+  // than max has already being looked at via a GetSnapshotListFromDB(new_max).
+  std::atomic<uint64_t> future_max_evicted_seq_ = {};
+  // Advance max_evicted_seq_ by this value each time it needs an update. The
+  // larger the value, the less frequent advances we would have. We do not want
+  // it to be too large either as it would cause stalls by doing too much
+  // maintenance work under the lock.
+  size_t INC_STEP_FOR_MAX_EVICTED = 1;
+  // A map from old snapshots (expected to be used by a few read-only txns) to
+  // prepared sequence number of the evicted entries from commit_cache_ that
+  // overlaps with such snapshot. These are the prepared sequence numbers that
+  // the snapshot, to which they are mapped, cannot assume to be committed just
+  // because it is no longer in the commit_cache_. The vector must be sorted
+  // after each update.
+  // Thread-safety is provided with old_commit_map_mutex_.
+  std::map<SequenceNumber, std::vector<SequenceNumber>> old_commit_map_;
+  // A set of long-running prepared transactions that are not finished by the
+  // time max_evicted_seq_ advances their sequence number. This is expected to
+  // be empty normally. Thread-safety is provided with prepared_mutex_.
+  std::set<uint64_t> delayed_prepared_;
+  // Commit of a delayed prepared: 1) update commit cache, 2) update
+  // delayed_prepared_commits_, 3) publish seq, 3) clean up delayed_prepared_.
+  // delayed_prepared_commits_ will help us tell apart the unprepared txns from
+  // the ones that are committed but not cleaned up yet.
+  std::unordered_map<SequenceNumber, SequenceNumber> delayed_prepared_commits_;
+  // Update when delayed_prepared_.empty() changes. Expected to be true
+  // normally.
+  std::atomic<bool> delayed_prepared_empty_ = {true};
+  // Update when old_commit_map_.empty() changes. Expected to be true normally.
+  std::atomic<bool> old_commit_map_empty_ = {true};
+  mutable port::RWMutex prepared_mutex_;
+  mutable port::RWMutex old_commit_map_mutex_;
+  mutable port::RWMutex commit_cache_mutex_;
+  mutable port::RWMutex snapshots_mutex_;
+  // A cache of the cf comparators
+  // Thread safety: since it is a const it is safe to read it concurrently
+  std::shared_ptr<std::map<uint32_t, const Comparator*>> cf_map_;
+  // A cache of the cf handles
+  // Thread safety: since the handle is read-only object it is a const it is
+  // safe to read it concurrently
+  std::shared_ptr<std::map<uint32_t, ColumnFamilyHandle*>> handle_map_;
+  // A dummy snapshot object that refers to kMaxSequenceNumber
+  SnapshotImpl dummy_max_snapshot_;
+};
+
+class WritePreparedTxnReadCallback : public ReadCallback {
+ public:
+  WritePreparedTxnReadCallback(WritePreparedTxnDB* db, SequenceNumber snapshot)
+      : ReadCallback(snapshot),
+        db_(db),
+        backed_by_snapshot_(kBackedByDBSnapshot) {}
+  WritePreparedTxnReadCallback(WritePreparedTxnDB* db, SequenceNumber snapshot,
+                               SequenceNumber min_uncommitted,
+                               SnapshotBackup backed_by_snapshot)
+      : ReadCallback(snapshot, min_uncommitted),
+        db_(db),
+        backed_by_snapshot_(backed_by_snapshot) {
+    (void)backed_by_snapshot_;  // to silence unused private field warning
+  }
+
+  virtual ~WritePreparedTxnReadCallback() {
+    // If it is not backed by snapshot, the caller must check validity
+    assert(valid_checked_ || backed_by_snapshot_ == kBackedByDBSnapshot);
+  }
+
+  // Will be called to see if the seq number visible; if not it moves on to
+  // the next seq number.
+  inline virtual bool IsVisibleFullCheck(SequenceNumber seq) override {
+    auto snapshot = max_visible_seq_;
+    bool snap_released = false;
+    auto ret =
+        db_->IsInSnapshot(seq, snapshot, min_uncommitted_, &snap_released);
+    assert(!snap_released || backed_by_snapshot_ == kUnbackedByDBSnapshot);
+    snap_released_ |= snap_released;
+    return ret;
+  }
+
+  inline bool valid() {
+    valid_checked_ = true;
+    return snap_released_ == false;
+  }
+
+  // TODO(myabandeh): override Refresh when Iterator::Refresh is supported
+ private:
+  WritePreparedTxnDB* db_;
+  // Whether max_visible_seq_ is backed by a snapshot
+  const SnapshotBackup backed_by_snapshot_;
+  bool snap_released_ = false;
+  // Safety check to ensure that the caller has checked invalid statuses
+  bool valid_checked_ = false;
+};
+
+class AddPreparedCallback : public PreReleaseCallback {
+ public:
+  AddPreparedCallback(WritePreparedTxnDB* db, DBImpl* db_impl,
+                      size_t sub_batch_cnt, bool two_write_queues,
+                      bool first_prepare_batch)
+      : db_(db),
+        db_impl_(db_impl),
+        sub_batch_cnt_(sub_batch_cnt),
+        two_write_queues_(two_write_queues),
+        first_prepare_batch_(first_prepare_batch) {
+    (void)two_write_queues_;  // to silence unused private field warning
+  }
+  virtual Status Callback(SequenceNumber prepare_seq,
+                          bool is_mem_disabled __attribute__((__unused__)),
+                          uint64_t log_number, size_t index,
+                          size_t total) override {
+    assert(index < total);
+    // To reduce the cost of lock acquisition competing with the concurrent
+    // prepare requests, lock on the first callback and unlock on the last.
+    const bool do_lock = !two_write_queues_ || index == 0;
+    const bool do_unlock = !two_write_queues_ || index + 1 == total;
+    // Always Prepare from the main queue
+    assert(!two_write_queues_ || !is_mem_disabled);  // implies the 1st queue
+    TEST_SYNC_POINT("AddPreparedCallback::AddPrepared::begin:pause");
+    TEST_SYNC_POINT("AddPreparedCallback::AddPrepared::begin:resume");
+    if (do_lock) {
+      db_->prepared_txns_.push_pop_mutex()->Lock();
+    }
+    const bool kLocked = true;
+    for (size_t i = 0; i < sub_batch_cnt_; i++) {
+      db_->AddPrepared(prepare_seq + i, kLocked);
+    }
+    if (do_unlock) {
+      db_->prepared_txns_.push_pop_mutex()->Unlock();
+    }
+    TEST_SYNC_POINT("AddPreparedCallback::AddPrepared::end");
+    if (first_prepare_batch_) {
+      assert(log_number != 0);
+      db_impl_->logs_with_prep_tracker()->MarkLogAsContainingPrepSection(
+          log_number);
+    }
+    return Status::OK();
+  }
+
+ private:
+  WritePreparedTxnDB* db_;
+  DBImpl* db_impl_;
+  size_t sub_batch_cnt_;
+  bool two_write_queues_;
+  // It is 2PC and this is the first prepare batch. Always the case in 2PC
+  // unless it is WriteUnPrepared.
+  bool first_prepare_batch_;
+};
+
+class WritePreparedCommitEntryPreReleaseCallback : public PreReleaseCallback {
+ public:
+  // includes_data indicates that the commit also writes non-empty
+  // CommitTimeWriteBatch to memtable, which needs to be committed separately.
+  WritePreparedCommitEntryPreReleaseCallback(
+      WritePreparedTxnDB* db, DBImpl* db_impl, SequenceNumber prep_seq,
+      size_t prep_batch_cnt, size_t data_batch_cnt = 0,
+      SequenceNumber aux_seq = kMaxSequenceNumber, size_t aux_batch_cnt = 0)
+      : db_(db),
+        db_impl_(db_impl),
+        prep_seq_(prep_seq),
+        prep_batch_cnt_(prep_batch_cnt),
+        data_batch_cnt_(data_batch_cnt),
+        includes_data_(data_batch_cnt_ > 0),
+        aux_seq_(aux_seq),
+        aux_batch_cnt_(aux_batch_cnt),
+        includes_aux_batch_(aux_batch_cnt > 0) {
+    assert((prep_batch_cnt_ > 0) != (prep_seq == kMaxSequenceNumber));  // xor
+    assert(prep_batch_cnt_ > 0 || data_batch_cnt_ > 0);
+    assert((aux_batch_cnt_ > 0) != (aux_seq == kMaxSequenceNumber));  // xor
+  }
+
+  virtual Status Callback(SequenceNumber commit_seq,
+                          bool is_mem_disabled __attribute__((__unused__)),
+                          uint64_t, size_t /*index*/,
+                          size_t /*total*/) override {
+    // Always commit from the 2nd queue
+    assert(!db_impl_->immutable_db_options().two_write_queues ||
+           is_mem_disabled);
+    assert(includes_data_ || prep_seq_ != kMaxSequenceNumber);
+    // Data batch is what accompanied with the commit marker and affects the
+    // last seq in the commit batch.
+    const uint64_t last_commit_seq = LIKELY(data_batch_cnt_ <= 1)
+                                         ? commit_seq
+                                         : commit_seq + data_batch_cnt_ - 1;
+    if (prep_seq_ != kMaxSequenceNumber) {
+      for (size_t i = 0; i < prep_batch_cnt_; i++) {
+        db_->AddCommitted(prep_seq_ + i, last_commit_seq);
+      }
+    }  // else there was no prepare phase
+    if (includes_aux_batch_) {
+      for (size_t i = 0; i < aux_batch_cnt_; i++) {
+        db_->AddCommitted(aux_seq_ + i, last_commit_seq);
+      }
+    }
+    if (includes_data_) {
+      assert(data_batch_cnt_);
+      // Commit the data that is accompanied with the commit request
+      for (size_t i = 0; i < data_batch_cnt_; i++) {
+        // For commit seq of each batch use the commit seq of the last batch.
+        // This would make debugging easier by having all the batches having
+        // the same sequence number.
+        db_->AddCommitted(commit_seq + i, last_commit_seq);
+      }
+    }
+    if (db_impl_->immutable_db_options().two_write_queues) {
+      assert(is_mem_disabled);  // implies the 2nd queue
+      // Publish the sequence number. We can do that here assuming the callback
+      // is invoked only from one write queue, which would guarantee that the
+      // publish sequence numbers will be in order, i.e., once a seq is
+      // published all the seq prior to that are also publishable.
+      db_impl_->SetLastPublishedSequence(last_commit_seq);
+      // Note RemovePrepared should be called after publishing the seq.
+      // Otherwise SmallestUnCommittedSeq optimization breaks.
+      if (prep_seq_ != kMaxSequenceNumber) {
+        db_->RemovePrepared(prep_seq_, prep_batch_cnt_);
+      }  // else there was no prepare phase
+      if (includes_aux_batch_) {
+        db_->RemovePrepared(aux_seq_, aux_batch_cnt_);
+      }
+    }
+    // else SequenceNumber that is updated as part of the write already does the
+    // publishing
+    return Status::OK();
+  }
+
+ private:
+  WritePreparedTxnDB* db_;
+  DBImpl* db_impl_;
+  // kMaxSequenceNumber if there was no prepare phase
+  SequenceNumber prep_seq_;
+  size_t prep_batch_cnt_;
+  size_t data_batch_cnt_;
+  // Data here is the batch that is written with the commit marker, either
+  // because it is commit without prepare or commit has a CommitTimeWriteBatch.
+  bool includes_data_;
+  // Auxiliary batch (if there is any) is a batch that is written before, but
+  // gets the same commit seq as prepare batch or data batch. This is used in
+  // two write queues where the CommitTimeWriteBatch becomes the aux batch and
+  // we do a separate write to actually commit everything.
+  SequenceNumber aux_seq_;
+  size_t aux_batch_cnt_;
+  bool includes_aux_batch_;
+};
+
+// For two_write_queues commit both the aborted batch and the cleanup batch and
+// then published the seq
+class WritePreparedRollbackPreReleaseCallback : public PreReleaseCallback {
+ public:
+  WritePreparedRollbackPreReleaseCallback(WritePreparedTxnDB* db,
+                                          DBImpl* db_impl,
+                                          SequenceNumber prep_seq,
+                                          SequenceNumber rollback_seq,
+                                          size_t prep_batch_cnt)
+      : db_(db),
+        db_impl_(db_impl),
+        prep_seq_(prep_seq),
+        rollback_seq_(rollback_seq),
+        prep_batch_cnt_(prep_batch_cnt) {
+    assert(prep_seq != kMaxSequenceNumber);
+    assert(rollback_seq != kMaxSequenceNumber);
+    assert(prep_batch_cnt_ > 0);
+  }
+
+  Status Callback(SequenceNumber commit_seq, bool is_mem_disabled, uint64_t,
+                  size_t /*index*/, size_t /*total*/) override {
+    // Always commit from the 2nd queue
+    assert(is_mem_disabled);  // implies the 2nd queue
+    assert(db_impl_->immutable_db_options().two_write_queues);
+#ifdef NDEBUG
+    (void)is_mem_disabled;
+#endif
+    const uint64_t last_commit_seq = commit_seq;
+    db_->AddCommitted(rollback_seq_, last_commit_seq);
+    for (size_t i = 0; i < prep_batch_cnt_; i++) {
+      db_->AddCommitted(prep_seq_ + i, last_commit_seq);
+    }
+    db_impl_->SetLastPublishedSequence(last_commit_seq);
+    return Status::OK();
+  }
+
+ private:
+  WritePreparedTxnDB* db_;
+  DBImpl* db_impl_;
+  SequenceNumber prep_seq_;
+  SequenceNumber rollback_seq_;
+  size_t prep_batch_cnt_;
+};
+
+// Count the number of sub-batches inside a batch. A sub-batch does not have
+// duplicate keys.
+struct SubBatchCounter : public WriteBatch::Handler {
+  explicit SubBatchCounter(std::map<uint32_t, const Comparator*>& comparators)
+      : comparators_(comparators), batches_(1) {}
+  std::map<uint32_t, const Comparator*>& comparators_;
+  using CFKeys = std::set<Slice, SetComparator>;
+  std::map<uint32_t, CFKeys> keys_;
+  size_t batches_;
+  size_t BatchCount() { return batches_; }
+  void AddKey(const uint32_t cf, const Slice& key);
+  void InitWithComp(const uint32_t cf);
+  Status MarkNoop(bool) override { return Status::OK(); }
+  Status MarkEndPrepare(const Slice&) override { return Status::OK(); }
+  Status MarkCommit(const Slice&) override { return Status::OK(); }
+  Status PutCF(uint32_t cf, const Slice& key, const Slice&) override {
+    AddKey(cf, key);
+    return Status::OK();
+  }
+  Status DeleteCF(uint32_t cf, const Slice& key) override {
+    AddKey(cf, key);
+    return Status::OK();
+  }
+  Status SingleDeleteCF(uint32_t cf, const Slice& key) override {
+    AddKey(cf, key);
+    return Status::OK();
+  }
+  Status MergeCF(uint32_t cf, const Slice& key, const Slice&) override {
+    AddKey(cf, key);
+    return Status::OK();
+  }
+  Status MarkBeginPrepare(bool) override { return Status::OK(); }
+  Status MarkRollback(const Slice&) override { return Status::OK(); }
+  Handler::OptionState WriteAfterCommit() const override {
+    return Handler::OptionState::kDisabled;
+  }
+};
+
+SnapshotBackup WritePreparedTxnDB::AssignMinMaxSeqs(const Snapshot* snapshot,
+                                                    SequenceNumber* min,
+                                                    SequenceNumber* max) {
+  if (snapshot != nullptr) {
+    *min =
+        static_cast_with_check<const SnapshotImpl>(snapshot)->min_uncommitted_;
+    *max = static_cast_with_check<const SnapshotImpl>(snapshot)->number_;
+    // A duplicate of the check in EnhanceSnapshot().
+    assert(*min <= *max + 1);
+    return kBackedByDBSnapshot;
+  } else {
+    *min = SmallestUnCommittedSeq();
+    *max = 0;  // to be assigned later after sv is referenced.
+    return kUnbackedByDBSnapshot;
+  }
+}
+
+bool WritePreparedTxnDB::ValidateSnapshot(
+    const SequenceNumber snap_seq, const SnapshotBackup backed_by_snapshot,
+    std::memory_order order) {
+  if (backed_by_snapshot == kBackedByDBSnapshot) {
+    return true;
+  } else {
+    SequenceNumber max = max_evicted_seq_.load(order);
+    // Validate that max has not advanced the snapshot seq that is not backed
+    // by a real snapshot. This is a very rare case that should not happen in
+    // real workloads.
+    if (UNLIKELY(snap_seq <= max && snap_seq != 0)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn.cc
new file mode 100644
index 0000000000..845b117cfd
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn.cc
@@ -0,0 +1,1056 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/transactions/write_unprepared_txn.h"
+
+#include "db/db_impl/db_impl.h"
+#include "util/cast_util.h"
+#include "utilities/transactions/write_unprepared_txn_db.h"
+#include "utilities/write_batch_with_index/write_batch_with_index_internal.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+bool WriteUnpreparedTxnReadCallback::IsVisibleFullCheck(SequenceNumber seq) {
+  // Since unprep_seqs maps prep_seq => prepare_batch_cnt, to check if seq is
+  // in unprep_seqs, we have to check if seq is equal to prep_seq or any of
+  // the prepare_batch_cnt seq nums after it.
+  //
+  // TODO(lth): Can be optimized with std::lower_bound if unprep_seqs is
+  // large.
+  for (const auto& it : unprep_seqs_) {
+    if (it.first <= seq && seq < it.first + it.second) {
+      return true;
+    }
+  }
+
+  bool snap_released = false;
+  auto ret =
+      db_->IsInSnapshot(seq, wup_snapshot_, min_uncommitted_, &snap_released);
+  assert(!snap_released || backed_by_snapshot_ == kUnbackedByDBSnapshot);
+  snap_released_ |= snap_released;
+  return ret;
+}
+
+WriteUnpreparedTxn::WriteUnpreparedTxn(WriteUnpreparedTxnDB* txn_db,
+                                       const WriteOptions& write_options,
+                                       const TransactionOptions& txn_options)
+    : WritePreparedTxn(txn_db, write_options, txn_options),
+      wupt_db_(txn_db),
+      last_log_number_(0),
+      recovered_txn_(false),
+      largest_validated_seq_(0) {
+  if (txn_options.write_batch_flush_threshold < 0) {
+    write_batch_flush_threshold_ =
+        txn_db_impl_->GetTxnDBOptions().default_write_batch_flush_threshold;
+  } else {
+    write_batch_flush_threshold_ = txn_options.write_batch_flush_threshold;
+  }
+}
+
+WriteUnpreparedTxn::~WriteUnpreparedTxn() {
+  if (!unprep_seqs_.empty()) {
+    assert(log_number_ > 0);
+    assert(GetId() > 0);
+    assert(!name_.empty());
+
+    // We should rollback regardless of GetState, but some unit tests that
+    // test crash recovery run the destructor assuming that rollback does not
+    // happen, so that rollback during recovery can be exercised.
+    if (GetState() == STARTED || GetState() == LOCKS_STOLEN) {
+      auto s = RollbackInternal();
+      assert(s.ok());
+      if (!s.ok()) {
+        ROCKS_LOG_FATAL(
+            wupt_db_->info_log_,
+            "Rollback of WriteUnprepared transaction failed in destructor: %s",
+            s.ToString().c_str());
+      }
+      dbimpl_->logs_with_prep_tracker()->MarkLogAsHavingPrepSectionFlushed(
+          log_number_);
+    }
+  }
+
+  // Clear the tracked locks so that ~PessimisticTransaction does not
+  // try to unlock keys for recovered transactions.
+  if (recovered_txn_) {
+    tracked_locks_->Clear();
+  }
+}
+
+void WriteUnpreparedTxn::Initialize(const TransactionOptions& txn_options) {
+  PessimisticTransaction::Initialize(txn_options);
+  if (txn_options.write_batch_flush_threshold < 0) {
+    write_batch_flush_threshold_ =
+        txn_db_impl_->GetTxnDBOptions().default_write_batch_flush_threshold;
+  } else {
+    write_batch_flush_threshold_ = txn_options.write_batch_flush_threshold;
+  }
+
+  unprep_seqs_.clear();
+  flushed_save_points_.reset(nullptr);
+  unflushed_save_points_.reset(nullptr);
+  recovered_txn_ = false;
+  largest_validated_seq_ = 0;
+  assert(active_iterators_.empty());
+  active_iterators_.clear();
+  untracked_keys_.clear();
+}
+
+Status WriteUnpreparedTxn::HandleWrite(std::function<Status()> do_write) {
+  Status s;
+  if (active_iterators_.empty()) {
+    s = MaybeFlushWriteBatchToDB();
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  s = do_write();
+  if (s.ok()) {
+    if (snapshot_) {
+      largest_validated_seq_ =
+          std::max(largest_validated_seq_, snapshot_->GetSequenceNumber());
+    } else {
+      // TODO(lth): We should use the same number as tracked_at_seq in TryLock,
+      // because what is actually being tracked is the sequence number at which
+      // this key was locked at.
+      largest_validated_seq_ = db_impl_->GetLastPublishedSequence();
+    }
+  }
+  return s;
+}
+
+Status WriteUnpreparedTxn::Put(ColumnFamilyHandle* column_family,
+                               const Slice& key, const Slice& value,
+                               const bool assume_tracked) {
+  return HandleWrite([&]() {
+    return TransactionBaseImpl::Put(column_family, key, value, assume_tracked);
+  });
+}
+
+Status WriteUnpreparedTxn::Put(ColumnFamilyHandle* column_family,
+                               const SliceParts& key, const SliceParts& value,
+                               const bool assume_tracked) {
+  return HandleWrite([&]() {
+    return TransactionBaseImpl::Put(column_family, key, value, assume_tracked);
+  });
+}
+
+Status WriteUnpreparedTxn::Merge(ColumnFamilyHandle* column_family,
+                                 const Slice& key, const Slice& value,
+                                 const bool assume_tracked) {
+  return HandleWrite([&]() {
+    return TransactionBaseImpl::Merge(column_family, key, value,
+                                      assume_tracked);
+  });
+}
+
+Status WriteUnpreparedTxn::Delete(ColumnFamilyHandle* column_family,
+                                  const Slice& key, const bool assume_tracked) {
+  return HandleWrite([&]() {
+    return TransactionBaseImpl::Delete(column_family, key, assume_tracked);
+  });
+}
+
+Status WriteUnpreparedTxn::Delete(ColumnFamilyHandle* column_family,
+                                  const SliceParts& key,
+                                  const bool assume_tracked) {
+  return HandleWrite([&]() {
+    return TransactionBaseImpl::Delete(column_family, key, assume_tracked);
+  });
+}
+
+Status WriteUnpreparedTxn::SingleDelete(ColumnFamilyHandle* column_family,
+                                        const Slice& key,
+                                        const bool assume_tracked) {
+  return HandleWrite([&]() {
+    return TransactionBaseImpl::SingleDelete(column_family, key,
+                                             assume_tracked);
+  });
+}
+
+Status WriteUnpreparedTxn::SingleDelete(ColumnFamilyHandle* column_family,
+                                        const SliceParts& key,
+                                        const bool assume_tracked) {
+  return HandleWrite([&]() {
+    return TransactionBaseImpl::SingleDelete(column_family, key,
+                                             assume_tracked);
+  });
+}
+
+// WriteUnpreparedTxn::RebuildFromWriteBatch is only called on recovery. For
+// WriteUnprepared, the write batches have already been written into the
+// database during WAL replay, so all we have to do is just to "retrack" the key
+// so that rollbacks are possible.
+//
+// Calling TryLock instead of TrackKey is also possible, but as an optimization,
+// recovered transactions do not hold locks on their keys. This follows the
+// implementation in PessimisticTransactionDB::Initialize where we set
+// skip_concurrency_control to true.
+Status WriteUnpreparedTxn::RebuildFromWriteBatch(WriteBatch* wb) {
+  struct TrackKeyHandler : public WriteBatch::Handler {
+    WriteUnpreparedTxn* txn_;
+    bool rollback_merge_operands_;
+
+    TrackKeyHandler(WriteUnpreparedTxn* txn, bool rollback_merge_operands)
+        : txn_(txn), rollback_merge_operands_(rollback_merge_operands) {}
+
+    Status PutCF(uint32_t cf, const Slice& key, const Slice&) override {
+      txn_->TrackKey(cf, key.ToString(), kMaxSequenceNumber,
+                     false /* read_only */, true /* exclusive */);
+      return Status::OK();
+    }
+
+    Status DeleteCF(uint32_t cf, const Slice& key) override {
+      txn_->TrackKey(cf, key.ToString(), kMaxSequenceNumber,
+                     false /* read_only */, true /* exclusive */);
+      return Status::OK();
+    }
+
+    Status SingleDeleteCF(uint32_t cf, const Slice& key) override {
+      txn_->TrackKey(cf, key.ToString(), kMaxSequenceNumber,
+                     false /* read_only */, true /* exclusive */);
+      return Status::OK();
+    }
+
+    Status MergeCF(uint32_t cf, const Slice& key, const Slice&) override {
+      if (rollback_merge_operands_) {
+        txn_->TrackKey(cf, key.ToString(), kMaxSequenceNumber,
+                       false /* read_only */, true /* exclusive */);
+      }
+      return Status::OK();
+    }
+
+    // Recovered batches do not contain 2PC markers.
+    Status MarkBeginPrepare(bool) override { return Status::InvalidArgument(); }
+
+    Status MarkEndPrepare(const Slice&) override {
+      return Status::InvalidArgument();
+    }
+
+    Status MarkNoop(bool) override { return Status::InvalidArgument(); }
+
+    Status MarkCommit(const Slice&) override {
+      return Status::InvalidArgument();
+    }
+
+    Status MarkRollback(const Slice&) override {
+      return Status::InvalidArgument();
+    }
+  };
+
+  TrackKeyHandler handler(this,
+                          wupt_db_->txn_db_options_.rollback_merge_operands);
+  return wb->Iterate(&handler);
+}
+
+Status WriteUnpreparedTxn::MaybeFlushWriteBatchToDB() {
+  const bool kPrepared = true;
+  Status s;
+  if (write_batch_flush_threshold_ > 0 &&
+      write_batch_.GetWriteBatch()->Count() > 0 &&
+      write_batch_.GetDataSize() >
+          static_cast<size_t>(write_batch_flush_threshold_)) {
+    assert(GetState() != PREPARED);
+    s = FlushWriteBatchToDB(!kPrepared);
+  }
+  return s;
+}
+
+Status WriteUnpreparedTxn::FlushWriteBatchToDB(bool prepared) {
+  // If the current write batch contains savepoints, then some special handling
+  // is required so that RollbackToSavepoint can work.
+  //
+  // RollbackToSavepoint is not supported after Prepare() is called, so only do
+  // this for unprepared batches.
+  if (!prepared && unflushed_save_points_ != nullptr &&
+      !unflushed_save_points_->empty()) {
+    return FlushWriteBatchWithSavePointToDB();
+  }
+
+  return FlushWriteBatchToDBInternal(prepared);
+}
+
+Status WriteUnpreparedTxn::FlushWriteBatchToDBInternal(bool prepared) {
+  if (name_.empty()) {
+    assert(!prepared);
+#ifndef NDEBUG
+    static std::atomic_ullong autogen_id{0};
+    // To avoid changing all tests to call SetName, just autogenerate one.
+    if (wupt_db_->txn_db_options_.autogenerate_name) {
+      auto s = SetName(std::string("autoxid") +
+                       std::to_string(autogen_id.fetch_add(1)));
+      assert(s.ok());
+    } else
+#endif
+    {
+      return Status::InvalidArgument("Cannot write to DB without SetName.");
+    }
+  }
+
+  struct UntrackedKeyHandler : public WriteBatch::Handler {
+    WriteUnpreparedTxn* txn_;
+    bool rollback_merge_operands_;
+
+    UntrackedKeyHandler(WriteUnpreparedTxn* txn, bool rollback_merge_operands)
+        : txn_(txn), rollback_merge_operands_(rollback_merge_operands) {}
+
+    Status AddUntrackedKey(uint32_t cf, const Slice& key) {
+      auto str = key.ToString();
+      PointLockStatus lock_status =
+          txn_->tracked_locks_->GetPointLockStatus(cf, str);
+      if (!lock_status.locked) {
+        txn_->untracked_keys_[cf].push_back(str);
+      }
+      return Status::OK();
+    }
+
+    Status PutCF(uint32_t cf, const Slice& key, const Slice&) override {
+      return AddUntrackedKey(cf, key);
+    }
+
+    Status DeleteCF(uint32_t cf, const Slice& key) override {
+      return AddUntrackedKey(cf, key);
+    }
+
+    Status SingleDeleteCF(uint32_t cf, const Slice& key) override {
+      return AddUntrackedKey(cf, key);
+    }
+
+    Status MergeCF(uint32_t cf, const Slice& key, const Slice&) override {
+      if (rollback_merge_operands_) {
+        return AddUntrackedKey(cf, key);
+      }
+      return Status::OK();
+    }
+
+    // The only expected 2PC marker is the initial Noop marker.
+    Status MarkNoop(bool empty_batch) override {
+      return empty_batch ? Status::OK() : Status::InvalidArgument();
+    }
+
+    Status MarkBeginPrepare(bool) override { return Status::InvalidArgument(); }
+
+    Status MarkEndPrepare(const Slice&) override {
+      return Status::InvalidArgument();
+    }
+
+    Status MarkCommit(const Slice&) override {
+      return Status::InvalidArgument();
+    }
+
+    Status MarkRollback(const Slice&) override {
+      return Status::InvalidArgument();
+    }
+  };
+
+  UntrackedKeyHandler handler(
+      this, wupt_db_->txn_db_options_.rollback_merge_operands);
+  auto s = GetWriteBatch()->GetWriteBatch()->Iterate(&handler);
+  assert(s.ok());
+
+  // TODO(lth): Reduce duplicate code with WritePrepared prepare logic.
+  WriteOptions write_options = write_options_;
+  write_options.disableWAL = false;
+  const bool WRITE_AFTER_COMMIT = true;
+  const bool first_prepare_batch = log_number_ == 0;
+  // MarkEndPrepare will change Noop marker to the appropriate marker.
+  s = WriteBatchInternal::MarkEndPrepare(GetWriteBatch()->GetWriteBatch(),
+                                         name_, !WRITE_AFTER_COMMIT, !prepared);
+  assert(s.ok());
+  // For each duplicate key we account for a new sub-batch
+  prepare_batch_cnt_ = GetWriteBatch()->SubBatchCnt();
+  // AddPrepared better to be called in the pre-release callback otherwise there
+  // is a non-zero chance of max advancing prepare_seq and readers assume the
+  // data as committed.
+  // Also having it in the PreReleaseCallback allows in-order addition of
+  // prepared entries to PreparedHeap and hence enables an optimization. Refer
+  // to SmallestUnCommittedSeq for more details.
+  AddPreparedCallback add_prepared_callback(
+      wpt_db_, db_impl_, prepare_batch_cnt_,
+      db_impl_->immutable_db_options().two_write_queues, first_prepare_batch);
+  const bool DISABLE_MEMTABLE = true;
+  uint64_t seq_used = kMaxSequenceNumber;
+  // log_number_ should refer to the oldest log containing uncommitted data
+  // from the current transaction. This means that if log_number_ is set,
+  // WriteImpl should not overwrite that value, so set log_used to nullptr if
+  // log_number_ is already set.
+  s = db_impl_->WriteImpl(write_options, GetWriteBatch()->GetWriteBatch(),
+                          /*callback*/ nullptr, &last_log_number_,
+                          /*log ref*/ 0, !DISABLE_MEMTABLE, &seq_used,
+                          prepare_batch_cnt_, &add_prepared_callback);
+  if (log_number_ == 0) {
+    log_number_ = last_log_number_;
+  }
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  auto prepare_seq = seq_used;
+
+  // Only call SetId if it hasn't been set yet.
+  if (GetId() == 0) {
+    SetId(prepare_seq);
+  }
+  // unprep_seqs_ will also contain prepared seqnos since they are treated in
+  // the same way in the prepare/commit callbacks. See the comment on the
+  // definition of unprep_seqs_.
+  unprep_seqs_[prepare_seq] = prepare_batch_cnt_;
+
+  // Reset transaction state.
+  if (!prepared) {
+    prepare_batch_cnt_ = 0;
+    const bool kClear = true;
+    TransactionBaseImpl::InitWriteBatch(kClear);
+  }
+
+  return s;
+}
+
+Status WriteUnpreparedTxn::FlushWriteBatchWithSavePointToDB() {
+  assert(unflushed_save_points_ != nullptr &&
+         unflushed_save_points_->size() > 0);
+  assert(save_points_ != nullptr && save_points_->size() > 0);
+  assert(save_points_->size() >= unflushed_save_points_->size());
+
+  // Handler class for creating an unprepared batch from a savepoint.
+  struct SavePointBatchHandler : public WriteBatch::Handler {
+    WriteBatchWithIndex* wb_;
+    const std::map<uint32_t, ColumnFamilyHandle*>& handles_;
+
+    SavePointBatchHandler(
+        WriteBatchWithIndex* wb,
+        const std::map<uint32_t, ColumnFamilyHandle*>& handles)
+        : wb_(wb), handles_(handles) {}
+
+    Status PutCF(uint32_t cf, const Slice& key, const Slice& value) override {
+      return wb_->Put(handles_.at(cf), key, value);
+    }
+
+    Status DeleteCF(uint32_t cf, const Slice& key) override {
+      return wb_->Delete(handles_.at(cf), key);
+    }
+
+    Status SingleDeleteCF(uint32_t cf, const Slice& key) override {
+      return wb_->SingleDelete(handles_.at(cf), key);
+    }
+
+    Status MergeCF(uint32_t cf, const Slice& key, const Slice& value) override {
+      return wb_->Merge(handles_.at(cf), key, value);
+    }
+
+    // The only expected 2PC marker is the initial Noop marker.
+    Status MarkNoop(bool empty_batch) override {
+      return empty_batch ? Status::OK() : Status::InvalidArgument();
+    }
+
+    Status MarkBeginPrepare(bool) override { return Status::InvalidArgument(); }
+
+    Status MarkEndPrepare(const Slice&) override {
+      return Status::InvalidArgument();
+    }
+
+    Status MarkCommit(const Slice&) override {
+      return Status::InvalidArgument();
+    }
+
+    Status MarkRollback(const Slice&) override {
+      return Status::InvalidArgument();
+    }
+  };
+
+  // The comparator of the default cf is passed in, similar to the
+  // initialization of TransactionBaseImpl::write_batch_. This comparator is
+  // only used if the write batch encounters an invalid cf id, and falls back to
+  // this comparator.
+  WriteBatchWithIndex wb(wpt_db_->DefaultColumnFamily()->GetComparator(), 0,
+                         true, 0, write_options_.protection_bytes_per_key);
+  // Swap with write_batch_ so that wb contains the complete write batch. The
+  // actual write batch that will be flushed to DB will be built in
+  // write_batch_, and will be read by FlushWriteBatchToDBInternal.
+  std::swap(wb, write_batch_);
+  TransactionBaseImpl::InitWriteBatch();
+
+  size_t prev_boundary = WriteBatchInternal::kHeader;
+  const bool kPrepared = true;
+  for (size_t i = 0; i < unflushed_save_points_->size() + 1; i++) {
+    bool trailing_batch = i == unflushed_save_points_->size();
+    SavePointBatchHandler sp_handler(&write_batch_,
+                                     *wupt_db_->GetCFHandleMap().get());
+    size_t curr_boundary = trailing_batch ? wb.GetWriteBatch()->GetDataSize()
+                                          : (*unflushed_save_points_)[i];
+
+    // Construct the partial write batch up to the savepoint.
+    //
+    // Theoretically, a memcpy between the write batches should be sufficient
+    // since the rewriting into the batch should produce the exact same byte
+    // representation. Rebuilding the WriteBatchWithIndex index is still
+    // necessary though, and would imply doing two passes over the batch though.
+    Status s = WriteBatchInternal::Iterate(wb.GetWriteBatch(), &sp_handler,
+                                           prev_boundary, curr_boundary);
+    if (!s.ok()) {
+      return s;
+    }
+
+    if (write_batch_.GetWriteBatch()->Count() > 0) {
+      // Flush the write batch.
+      s = FlushWriteBatchToDBInternal(!kPrepared);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+
+    if (!trailing_batch) {
+      if (flushed_save_points_ == nullptr) {
+        flushed_save_points_.reset(
+            new autovector<WriteUnpreparedTxn::SavePoint>());
+      }
+      flushed_save_points_->emplace_back(
+          unprep_seqs_, new ManagedSnapshot(db_impl_, wupt_db_->GetSnapshot()));
+    }
+
+    prev_boundary = curr_boundary;
+    const bool kClear = true;
+    TransactionBaseImpl::InitWriteBatch(kClear);
+  }
+
+  unflushed_save_points_->clear();
+  return Status::OK();
+}
+
+Status WriteUnpreparedTxn::PrepareInternal() {
+  const bool kPrepared = true;
+  return FlushWriteBatchToDB(kPrepared);
+}
+
+Status WriteUnpreparedTxn::CommitWithoutPrepareInternal() {
+  if (unprep_seqs_.empty()) {
+    assert(log_number_ == 0);
+    assert(GetId() == 0);
+    return WritePreparedTxn::CommitWithoutPrepareInternal();
+  }
+
+  // TODO(lth): We should optimize commit without prepare to not perform
+  // a prepare under the hood.
+  auto s = PrepareInternal();
+  if (!s.ok()) {
+    return s;
+  }
+  return CommitInternal();
+}
+
+Status WriteUnpreparedTxn::CommitInternal() {
+  // TODO(lth): Reduce duplicate code with WritePrepared commit logic.
+
+  // We take the commit-time batch and append the Commit marker.  The Memtable
+  // will ignore the Commit marker in non-recovery mode
+  WriteBatch* working_batch = GetCommitTimeWriteBatch();
+  const bool empty = working_batch->Count() == 0;
+  auto s = WriteBatchInternal::MarkCommit(working_batch, name_);
+  assert(s.ok());
+
+  const bool for_recovery = use_only_the_last_commit_time_batch_for_recovery_;
+  if (!empty) {
+    // When not writing to memtable, we can still cache the latest write batch.
+    // The cached batch will be written to memtable in WriteRecoverableState
+    // during FlushMemTable
+    if (for_recovery) {
+      WriteBatchInternal::SetAsLatestPersistentState(working_batch);
+    } else {
+      return Status::InvalidArgument(
+          "Commit-time-batch can only be used if "
+          "use_only_the_last_commit_time_batch_for_recovery is true");
+    }
+  }
+
+  const bool includes_data = !empty && !for_recovery;
+  size_t commit_batch_cnt = 0;
+  if (UNLIKELY(includes_data)) {
+    ROCKS_LOG_WARN(db_impl_->immutable_db_options().info_log,
+                   "Duplicate key overhead");
+    SubBatchCounter counter(*wpt_db_->GetCFComparatorMap());
+    s = working_batch->Iterate(&counter);
+    assert(s.ok());
+    commit_batch_cnt = counter.BatchCount();
+  }
+  const bool disable_memtable = !includes_data;
+  const bool do_one_write =
+      !db_impl_->immutable_db_options().two_write_queues || disable_memtable;
+
+  WriteUnpreparedCommitEntryPreReleaseCallback update_commit_map(
+      wpt_db_, db_impl_, unprep_seqs_, commit_batch_cnt);
+  const bool kFirstPrepareBatch = true;
+  AddPreparedCallback add_prepared_callback(
+      wpt_db_, db_impl_, commit_batch_cnt,
+      db_impl_->immutable_db_options().two_write_queues, !kFirstPrepareBatch);
+  PreReleaseCallback* pre_release_callback;
+  if (do_one_write) {
+    pre_release_callback = &update_commit_map;
+  } else {
+    pre_release_callback = &add_prepared_callback;
+  }
+  uint64_t seq_used = kMaxSequenceNumber;
+  // Since the prepared batch is directly written to memtable, there is
+  // already a connection between the memtable and its WAL, so there is no
+  // need to redundantly reference the log that contains the prepared data.
+  const uint64_t zero_log_number = 0ull;
+  size_t batch_cnt = UNLIKELY(commit_batch_cnt) ? commit_batch_cnt : 1;
+  s = db_impl_->WriteImpl(write_options_, working_batch, nullptr, nullptr,
+                          zero_log_number, disable_memtable, &seq_used,
+                          batch_cnt, pre_release_callback);
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  const SequenceNumber commit_batch_seq = seq_used;
+  if (LIKELY(do_one_write || !s.ok())) {
+    if (LIKELY(s.ok())) {
+      // Note RemovePrepared should be called after WriteImpl that publishsed
+      // the seq. Otherwise SmallestUnCommittedSeq optimization breaks.
+      for (const auto& seq : unprep_seqs_) {
+        wpt_db_->RemovePrepared(seq.first, seq.second);
+      }
+    }
+    if (UNLIKELY(!do_one_write)) {
+      wpt_db_->RemovePrepared(commit_batch_seq, commit_batch_cnt);
+    }
+    unprep_seqs_.clear();
+    flushed_save_points_.reset(nullptr);
+    unflushed_save_points_.reset(nullptr);
+    return s;
+  }  // else do the 2nd write to publish seq
+
+  // Populate unprep_seqs_ with commit_batch_seq, since we treat data in the
+  // commit write batch as just another "unprepared" batch. This will also
+  // update the unprep_seqs_ in the update_commit_map callback.
+  unprep_seqs_[commit_batch_seq] = commit_batch_cnt;
+  WriteUnpreparedCommitEntryPreReleaseCallback
+      update_commit_map_with_commit_batch(wpt_db_, db_impl_, unprep_seqs_, 0);
+
+  // Note: the 2nd write comes with a performance penality. So if we have too
+  // many of commits accompanied with ComitTimeWriteBatch and yet we cannot
+  // enable use_only_the_last_commit_time_batch_for_recovery_ optimization,
+  // two_write_queues should be disabled to avoid many additional writes here.
+
+  // Update commit map only from the 2nd queue
+  WriteBatch empty_batch;
+  s = empty_batch.PutLogData(Slice());
+  assert(s.ok());
+  // In the absence of Prepare markers, use Noop as a batch separator
+  s = WriteBatchInternal::InsertNoop(&empty_batch);
+  assert(s.ok());
+  const bool DISABLE_MEMTABLE = true;
+  const size_t ONE_BATCH = 1;
+  const uint64_t NO_REF_LOG = 0;
+  s = db_impl_->WriteImpl(write_options_, &empty_batch, nullptr, nullptr,
+                          NO_REF_LOG, DISABLE_MEMTABLE, &seq_used, ONE_BATCH,
+                          &update_commit_map_with_commit_batch);
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  // Note RemovePrepared should be called after WriteImpl that publishsed the
+  // seq. Otherwise SmallestUnCommittedSeq optimization breaks.
+  for (const auto& seq : unprep_seqs_) {
+    wpt_db_->RemovePrepared(seq.first, seq.second);
+  }
+  unprep_seqs_.clear();
+  flushed_save_points_.reset(nullptr);
+  unflushed_save_points_.reset(nullptr);
+  return s;
+}
+
+Status WriteUnpreparedTxn::WriteRollbackKeys(
+    const LockTracker& lock_tracker, WriteBatchWithIndex* rollback_batch,
+    ReadCallback* callback, const ReadOptions& roptions) {
+  // This assertion can be removed when range lock is supported.
+  assert(lock_tracker.IsPointLockSupported());
+  const auto& cf_map = *wupt_db_->GetCFHandleMap();
+  auto WriteRollbackKey = [&](const std::string& key, uint32_t cfid) {
+    const auto& cf_handle = cf_map.at(cfid);
+    PinnableSlice pinnable_val;
+    bool not_used;
+    DBImpl::GetImplOptions get_impl_options;
+    get_impl_options.column_family = cf_handle;
+    get_impl_options.value = &pinnable_val;
+    get_impl_options.value_found = &not_used;
+    get_impl_options.callback = callback;
+    auto s = db_impl_->GetImpl(roptions, key, get_impl_options);
+
+    if (s.ok()) {
+      s = rollback_batch->Put(cf_handle, key, pinnable_val);
+      assert(s.ok());
+    } else if (s.IsNotFound()) {
+      if (wupt_db_->ShouldRollbackWithSingleDelete(cf_handle, key)) {
+        s = rollback_batch->SingleDelete(cf_handle, key);
+      } else {
+        s = rollback_batch->Delete(cf_handle, key);
+      }
+      assert(s.ok());
+    } else {
+      return s;
+    }
+
+    return Status::OK();
+  };
+
+  std::unique_ptr<LockTracker::ColumnFamilyIterator> cf_it(
+      lock_tracker.GetColumnFamilyIterator());
+  assert(cf_it != nullptr);
+  while (cf_it->HasNext()) {
+    ColumnFamilyId cf = cf_it->Next();
+    std::unique_ptr<LockTracker::KeyIterator> key_it(
+        lock_tracker.GetKeyIterator(cf));
+    assert(key_it != nullptr);
+    while (key_it->HasNext()) {
+      const std::string& key = key_it->Next();
+      auto s = WriteRollbackKey(key, cf);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  }
+
+  for (const auto& cfkey : untracked_keys_) {
+    const auto cfid = cfkey.first;
+    const auto& keys = cfkey.second;
+    for (const auto& key : keys) {
+      auto s = WriteRollbackKey(key, cfid);
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
+Status WriteUnpreparedTxn::RollbackInternal() {
+  // TODO(lth): Reduce duplicate code with WritePrepared rollback logic.
+  WriteBatchWithIndex rollback_batch(
+      wpt_db_->DefaultColumnFamily()->GetComparator(), 0, true, 0,
+      write_options_.protection_bytes_per_key);
+  assert(GetId() != kMaxSequenceNumber);
+  assert(GetId() > 0);
+  Status s;
+  auto read_at_seq = kMaxSequenceNumber;
+  ReadOptions roptions;
+  // to prevent callback's seq to be overrriden inside DBImpk::Get
+  roptions.snapshot = wpt_db_->GetMaxSnapshot();
+  // Note that we do not use WriteUnpreparedTxnReadCallback because we do not
+  // need to read our own writes when reading prior versions of the key for
+  // rollback.
+  WritePreparedTxnReadCallback callback(wpt_db_, read_at_seq);
+  // TODO(lth): We write rollback batch all in a single batch here, but this
+  // should be subdivded into multiple batches as well. In phase 2, when key
+  // sets are read from WAL, this will happen naturally.
+  s = WriteRollbackKeys(*tracked_locks_, &rollback_batch, &callback, roptions);
+  if (!s.ok()) {
+    return s;
+  }
+
+  // The Rollback marker will be used as a batch separator
+  s = WriteBatchInternal::MarkRollback(rollback_batch.GetWriteBatch(), name_);
+  assert(s.ok());
+  bool do_one_write = !db_impl_->immutable_db_options().two_write_queues;
+  const bool DISABLE_MEMTABLE = true;
+  const uint64_t NO_REF_LOG = 0;
+  uint64_t seq_used = kMaxSequenceNumber;
+  // Rollback batch may contain duplicate keys, because tracked_keys_ is not
+  // comparator aware.
+  auto rollback_batch_cnt = rollback_batch.SubBatchCnt();
+  // We commit the rolled back prepared batches. Although this is
+  // counter-intuitive, i) it is safe to do so, since the prepared batches are
+  // already canceled out by the rollback batch, ii) adding the commit entry to
+  // CommitCache will allow us to benefit from the existing mechanism in
+  // CommitCache that keeps an entry evicted due to max advance and yet overlaps
+  // with a live snapshot around so that the live snapshot properly skips the
+  // entry even if its prepare seq is lower than max_evicted_seq_.
+  //
+  // TODO(lth): RollbackInternal is conceptually very similar to
+  // CommitInternal, with the rollback batch simply taking on the role of
+  // CommitTimeWriteBatch. We should be able to merge the two code paths.
+  WriteUnpreparedCommitEntryPreReleaseCallback update_commit_map(
+      wpt_db_, db_impl_, unprep_seqs_, rollback_batch_cnt);
+  // Note: the rollback batch does not need AddPrepared since it is written to
+  // DB in one shot. min_uncommitted still works since it requires capturing
+  // data that is written to DB but not yet committed, while the rollback
+  // batch commits with PreReleaseCallback.
+  s = db_impl_->WriteImpl(write_options_, rollback_batch.GetWriteBatch(),
+                          nullptr, nullptr, NO_REF_LOG, !DISABLE_MEMTABLE,
+                          &seq_used, rollback_batch_cnt,
+                          do_one_write ? &update_commit_map : nullptr);
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  if (!s.ok()) {
+    return s;
+  }
+  if (do_one_write) {
+    for (const auto& seq : unprep_seqs_) {
+      wpt_db_->RemovePrepared(seq.first, seq.second);
+    }
+    unprep_seqs_.clear();
+    flushed_save_points_.reset(nullptr);
+    unflushed_save_points_.reset(nullptr);
+    return s;
+  }  // else do the 2nd write for commit
+
+  uint64_t& prepare_seq = seq_used;
+  // Populate unprep_seqs_ with rollback_batch_cnt, since we treat data in the
+  // rollback write batch as just another "unprepared" batch. This will also
+  // update the unprep_seqs_ in the update_commit_map callback.
+  unprep_seqs_[prepare_seq] = rollback_batch_cnt;
+  WriteUnpreparedCommitEntryPreReleaseCallback
+      update_commit_map_with_rollback_batch(wpt_db_, db_impl_, unprep_seqs_, 0);
+
+  ROCKS_LOG_DETAILS(db_impl_->immutable_db_options().info_log,
+                    "RollbackInternal 2nd write prepare_seq: %" PRIu64,
+                    prepare_seq);
+  WriteBatch empty_batch;
+  const size_t ONE_BATCH = 1;
+  s = empty_batch.PutLogData(Slice());
+  assert(s.ok());
+  // In the absence of Prepare markers, use Noop as a batch separator
+  s = WriteBatchInternal::InsertNoop(&empty_batch);
+  assert(s.ok());
+  s = db_impl_->WriteImpl(write_options_, &empty_batch, nullptr, nullptr,
+                          NO_REF_LOG, DISABLE_MEMTABLE, &seq_used, ONE_BATCH,
+                          &update_commit_map_with_rollback_batch);
+  assert(!s.ok() || seq_used != kMaxSequenceNumber);
+  // Mark the txn as rolled back
+  if (s.ok()) {
+    for (const auto& seq : unprep_seqs_) {
+      wpt_db_->RemovePrepared(seq.first, seq.second);
+    }
+  }
+
+  unprep_seqs_.clear();
+  flushed_save_points_.reset(nullptr);
+  unflushed_save_points_.reset(nullptr);
+  return s;
+}
+
+void WriteUnpreparedTxn::Clear() {
+  if (!recovered_txn_) {
+    txn_db_impl_->UnLock(this, *tracked_locks_);
+  }
+  unprep_seqs_.clear();
+  flushed_save_points_.reset(nullptr);
+  unflushed_save_points_.reset(nullptr);
+  recovered_txn_ = false;
+  largest_validated_seq_ = 0;
+  for (auto& it : active_iterators_) {
+    auto bdit = static_cast<BaseDeltaIterator*>(it);
+    bdit->Invalidate(Status::InvalidArgument(
+        "Cannot use iterator after transaction has finished"));
+  }
+  active_iterators_.clear();
+  untracked_keys_.clear();
+  TransactionBaseImpl::Clear();
+}
+
+void WriteUnpreparedTxn::SetSavePoint() {
+  assert((unflushed_save_points_ ? unflushed_save_points_->size() : 0) +
+             (flushed_save_points_ ? flushed_save_points_->size() : 0) ==
+         (save_points_ ? save_points_->size() : 0));
+  PessimisticTransaction::SetSavePoint();
+  if (unflushed_save_points_ == nullptr) {
+    unflushed_save_points_.reset(new autovector<size_t>());
+  }
+  unflushed_save_points_->push_back(write_batch_.GetDataSize());
+}
+
+Status WriteUnpreparedTxn::RollbackToSavePoint() {
+  assert((unflushed_save_points_ ? unflushed_save_points_->size() : 0) +
+             (flushed_save_points_ ? flushed_save_points_->size() : 0) ==
+         (save_points_ ? save_points_->size() : 0));
+  if (unflushed_save_points_ != nullptr && unflushed_save_points_->size() > 0) {
+    Status s = PessimisticTransaction::RollbackToSavePoint();
+    assert(!s.IsNotFound());
+    unflushed_save_points_->pop_back();
+    return s;
+  }
+
+  if (flushed_save_points_ != nullptr && !flushed_save_points_->empty()) {
+    return RollbackToSavePointInternal();
+  }
+
+  return Status::NotFound();
+}
+
+Status WriteUnpreparedTxn::RollbackToSavePointInternal() {
+  Status s;
+
+  const bool kClear = true;
+  TransactionBaseImpl::InitWriteBatch(kClear);
+
+  assert(flushed_save_points_->size() > 0);
+  WriteUnpreparedTxn::SavePoint& top = flushed_save_points_->back();
+
+  assert(save_points_ != nullptr && save_points_->size() > 0);
+  const LockTracker& tracked_keys = *save_points_->top().new_locks_;
+
+  ReadOptions roptions;
+  roptions.snapshot = top.snapshot_->snapshot();
+  SequenceNumber min_uncommitted =
+      static_cast_with_check<const SnapshotImpl>(roptions.snapshot)
+          ->min_uncommitted_;
+  SequenceNumber snap_seq = roptions.snapshot->GetSequenceNumber();
+  WriteUnpreparedTxnReadCallback callback(wupt_db_, snap_seq, min_uncommitted,
+                                          top.unprep_seqs_,
+                                          kBackedByDBSnapshot);
+  s = WriteRollbackKeys(tracked_keys, &write_batch_, &callback, roptions);
+  if (!s.ok()) {
+    return s;
+  }
+
+  const bool kPrepared = true;
+  s = FlushWriteBatchToDBInternal(!kPrepared);
+  if (!s.ok()) {
+    return s;
+  }
+
+  // PessimisticTransaction::RollbackToSavePoint will call also call
+  // RollbackToSavepoint on write_batch_. However, write_batch_ is empty and has
+  // no savepoints because this savepoint has already been flushed. Work around
+  // this by setting a fake savepoint.
+  write_batch_.SetSavePoint();
+  s = PessimisticTransaction::RollbackToSavePoint();
+  assert(s.ok());
+  if (!s.ok()) {
+    return s;
+  }
+
+  flushed_save_points_->pop_back();
+  return s;
+}
+
+Status WriteUnpreparedTxn::PopSavePoint() {
+  assert((unflushed_save_points_ ? unflushed_save_points_->size() : 0) +
+             (flushed_save_points_ ? flushed_save_points_->size() : 0) ==
+         (save_points_ ? save_points_->size() : 0));
+  if (unflushed_save_points_ != nullptr && unflushed_save_points_->size() > 0) {
+    Status s = PessimisticTransaction::PopSavePoint();
+    assert(!s.IsNotFound());
+    unflushed_save_points_->pop_back();
+    return s;
+  }
+
+  if (flushed_save_points_ != nullptr && !flushed_save_points_->empty()) {
+    // PessimisticTransaction::PopSavePoint will call also call PopSavePoint on
+    // write_batch_. However, write_batch_ is empty and has no savepoints
+    // because this savepoint has already been flushed. Work around this by
+    // setting a fake savepoint.
+    write_batch_.SetSavePoint();
+    Status s = PessimisticTransaction::PopSavePoint();
+    assert(!s.IsNotFound());
+    flushed_save_points_->pop_back();
+    return s;
+  }
+
+  return Status::NotFound();
+}
+
+void WriteUnpreparedTxn::MultiGet(const ReadOptions& options,
+                                  ColumnFamilyHandle* column_family,
+                                  const size_t num_keys, const Slice* keys,
+                                  PinnableSlice* values, Status* statuses,
+                                  const bool sorted_input) {
+  assert(options.io_activity == Env::IOActivity::kUnknown);
+  SequenceNumber min_uncommitted, snap_seq;
+  const SnapshotBackup backed_by_snapshot =
+      wupt_db_->AssignMinMaxSeqs(options.snapshot, &min_uncommitted, &snap_seq);
+  WriteUnpreparedTxnReadCallback callback(wupt_db_, snap_seq, min_uncommitted,
+                                          unprep_seqs_, backed_by_snapshot);
+  write_batch_.MultiGetFromBatchAndDB(db_, options, column_family, num_keys,
+                                      keys, values, statuses, sorted_input,
+                                      &callback);
+  if (UNLIKELY(!callback.valid() ||
+               !wupt_db_->ValidateSnapshot(snap_seq, backed_by_snapshot))) {
+    wupt_db_->WPRecordTick(TXN_GET_TRY_AGAIN);
+    for (size_t i = 0; i < num_keys; i++) {
+      statuses[i] = Status::TryAgain();
+    }
+  }
+}
+
+Status WriteUnpreparedTxn::Get(const ReadOptions& options,
+                               ColumnFamilyHandle* column_family,
+                               const Slice& key, PinnableSlice* value) {
+  if (options.io_activity != Env::IOActivity::kUnknown) {
+    return Status::InvalidArgument(
+        "Cannot call Get with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`");
+  }
+  SequenceNumber min_uncommitted, snap_seq;
+  const SnapshotBackup backed_by_snapshot =
+      wupt_db_->AssignMinMaxSeqs(options.snapshot, &min_uncommitted, &snap_seq);
+  WriteUnpreparedTxnReadCallback callback(wupt_db_, snap_seq, min_uncommitted,
+                                          unprep_seqs_, backed_by_snapshot);
+  auto res = write_batch_.GetFromBatchAndDB(db_, options, column_family, key,
+                                            value, &callback);
+  if (LIKELY(callback.valid() &&
+             wupt_db_->ValidateSnapshot(snap_seq, backed_by_snapshot))) {
+    return res;
+  } else {
+    res.PermitUncheckedError();
+    wupt_db_->WPRecordTick(TXN_GET_TRY_AGAIN);
+    return Status::TryAgain();
+  }
+}
+
+namespace {
+static void CleanupWriteUnpreparedWBWIIterator(void* arg1, void* arg2) {
+  auto txn = reinterpret_cast<WriteUnpreparedTxn*>(arg1);
+  auto iter = reinterpret_cast<Iterator*>(arg2);
+  txn->RemoveActiveIterator(iter);
+}
+}  // anonymous namespace
+
+Iterator* WriteUnpreparedTxn::GetIterator(const ReadOptions& options) {
+  return GetIterator(options, wupt_db_->DefaultColumnFamily());
+}
+
+Iterator* WriteUnpreparedTxn::GetIterator(const ReadOptions& options,
+                                          ColumnFamilyHandle* column_family) {
+  // Make sure to get iterator from WriteUnprepareTxnDB, not the root db.
+  Iterator* db_iter = wupt_db_->NewIterator(options, column_family, this);
+  assert(db_iter);
+
+  auto iter = write_batch_.NewIteratorWithBase(column_family, db_iter);
+  active_iterators_.push_back(iter);
+  iter->RegisterCleanup(CleanupWriteUnpreparedWBWIIterator, this, iter);
+  return iter;
+}
+
+Status WriteUnpreparedTxn::ValidateSnapshot(ColumnFamilyHandle* column_family,
+                                            const Slice& key,
+                                            SequenceNumber* tracked_at_seq) {
+  // TODO(lth): Reduce duplicate code with WritePrepared ValidateSnapshot logic.
+  assert(snapshot_);
+
+  SequenceNumber min_uncommitted =
+      static_cast_with_check<const SnapshotImpl>(snapshot_.get())
+          ->min_uncommitted_;
+  SequenceNumber snap_seq = snapshot_->GetSequenceNumber();
+  // tracked_at_seq is either max or the last snapshot with which this key was
+  // trackeed so there is no need to apply the IsInSnapshot to this comparison
+  // here as tracked_at_seq is not a prepare seq.
+  if (*tracked_at_seq <= snap_seq) {
+    // If the key has been previous validated at a sequence number earlier
+    // than the curent snapshot's sequence number, we already know it has not
+    // been modified.
+    return Status::OK();
+  }
+
+  *tracked_at_seq = snap_seq;
+
+  ColumnFamilyHandle* cfh =
+      column_family ? column_family : db_impl_->DefaultColumnFamily();
+
+  WriteUnpreparedTxnReadCallback snap_checker(
+      wupt_db_, snap_seq, min_uncommitted, unprep_seqs_, kBackedByDBSnapshot);
+  // TODO(yanqin): Support user-defined timestamp.
+  return TransactionUtil::CheckKeyForConflicts(
+      db_impl_, cfh, key.ToString(), snap_seq, /*ts=*/nullptr,
+      false /* cache_only */, &snap_checker, min_uncommitted);
+}
+
+const std::map<SequenceNumber, size_t>&
+WriteUnpreparedTxn::GetUnpreparedSequenceNumbers() {
+  return unprep_seqs_;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn.h
new file mode 100644
index 0000000000..63c65f00ae
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn.h
@@ -0,0 +1,339 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+
+#include <set>
+
+#include "utilities/transactions/write_prepared_txn.h"
+#include "utilities/transactions/write_unprepared_txn_db.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class WriteUnpreparedTxnDB;
+class WriteUnpreparedTxn;
+
+// WriteUnprepared transactions needs to be able to read their own uncommitted
+// writes, and supporting this requires some careful consideration. Because
+// writes in the current transaction may be flushed to DB already, we cannot
+// rely on the contents of WriteBatchWithIndex to determine whether a key should
+// be visible or not, so we have to remember to check the DB for any uncommitted
+// keys that should be visible to us. First, we will need to change the seek to
+// snapshot logic, to seek to max_visible_seq = max(snap_seq, max_unprep_seq).
+// Any key greater than max_visible_seq should not be visible because they
+// cannot be unprepared by the current transaction and they are not in its
+// snapshot.
+//
+// When we seek to max_visible_seq, one of these cases will happen:
+// 1. We hit a unprepared key from the current transaction.
+// 2. We hit a unprepared key from the another transaction.
+// 3. We hit a committed key with snap_seq < seq < max_unprep_seq.
+// 4. We hit a committed key with seq <= snap_seq.
+//
+// IsVisibleFullCheck handles all cases correctly.
+//
+// Other notes:
+// Note that max_visible_seq is only calculated once at iterator construction
+// time, meaning if the same transaction is adding more unprep seqs through
+// writes during iteration, these newer writes may not be visible. This is not a
+// problem for MySQL though because it avoids modifying the index as it is
+// scanning through it to avoid the Halloween Problem. Instead, it scans the
+// index once up front, and modifies based on a temporary copy.
+//
+// In DBIter, there is a "reseek" optimization if the iterator skips over too
+// many keys. However, this assumes that the reseek seeks exactly to the
+// required key. In write unprepared, even after seeking directly to
+// max_visible_seq, some iteration may be required before hitting a visible key,
+// and special precautions must be taken to avoid performing another reseek,
+// leading to an infinite loop.
+//
+class WriteUnpreparedTxnReadCallback : public ReadCallback {
+ public:
+  WriteUnpreparedTxnReadCallback(
+      WritePreparedTxnDB* db, SequenceNumber snapshot,
+      SequenceNumber min_uncommitted,
+      const std::map<SequenceNumber, size_t>& unprep_seqs,
+      SnapshotBackup backed_by_snapshot)
+      // Pass our last uncommitted seq as the snapshot to the parent class to
+      // ensure that the parent will not prematurely filter out own writes. We
+      // will do the exact comparison against snapshots in IsVisibleFullCheck
+      // override.
+      : ReadCallback(CalcMaxVisibleSeq(unprep_seqs, snapshot), min_uncommitted),
+        db_(db),
+        unprep_seqs_(unprep_seqs),
+        wup_snapshot_(snapshot),
+        backed_by_snapshot_(backed_by_snapshot) {
+    (void)backed_by_snapshot_;  // to silence unused private field warning
+  }
+
+  virtual ~WriteUnpreparedTxnReadCallback() {
+    // If it is not backed by snapshot, the caller must check validity
+    assert(valid_checked_ || backed_by_snapshot_ == kBackedByDBSnapshot);
+  }
+
+  virtual bool IsVisibleFullCheck(SequenceNumber seq) override;
+
+  inline bool valid() {
+    valid_checked_ = true;
+    return snap_released_ == false;
+  }
+
+  void Refresh(SequenceNumber seq) override {
+    max_visible_seq_ = std::max(max_visible_seq_, seq);
+    wup_snapshot_ = seq;
+  }
+
+  static SequenceNumber CalcMaxVisibleSeq(
+      const std::map<SequenceNumber, size_t>& unprep_seqs,
+      SequenceNumber snapshot_seq) {
+    SequenceNumber max_unprepared = 0;
+    if (unprep_seqs.size()) {
+      max_unprepared =
+          unprep_seqs.rbegin()->first + unprep_seqs.rbegin()->second - 1;
+    }
+    return std::max(max_unprepared, snapshot_seq);
+  }
+
+ private:
+  WritePreparedTxnDB* db_;
+  const std::map<SequenceNumber, size_t>& unprep_seqs_;
+  SequenceNumber wup_snapshot_;
+  // Whether max_visible_seq_ is backed by a snapshot
+  const SnapshotBackup backed_by_snapshot_;
+  bool snap_released_ = false;
+  // Safety check to ensure that the caller has checked invalid statuses
+  bool valid_checked_ = false;
+};
+
+class WriteUnpreparedTxn : public WritePreparedTxn {
+ public:
+  WriteUnpreparedTxn(WriteUnpreparedTxnDB* db,
+                     const WriteOptions& write_options,
+                     const TransactionOptions& txn_options);
+
+  virtual ~WriteUnpreparedTxn();
+
+  using TransactionBaseImpl::Put;
+  virtual Status Put(ColumnFamilyHandle* column_family, const Slice& key,
+                     const Slice& value,
+                     const bool assume_tracked = false) override;
+  virtual Status Put(ColumnFamilyHandle* column_family, const SliceParts& key,
+                     const SliceParts& value,
+                     const bool assume_tracked = false) override;
+
+  using TransactionBaseImpl::Merge;
+  virtual Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
+                       const Slice& value,
+                       const bool assume_tracked = false) override;
+
+  using TransactionBaseImpl::Delete;
+  virtual Status Delete(ColumnFamilyHandle* column_family, const Slice& key,
+                        const bool assume_tracked = false) override;
+  virtual Status Delete(ColumnFamilyHandle* column_family,
+                        const SliceParts& key,
+                        const bool assume_tracked = false) override;
+
+  using TransactionBaseImpl::SingleDelete;
+  virtual Status SingleDelete(ColumnFamilyHandle* column_family,
+                              const Slice& key,
+                              const bool assume_tracked = false) override;
+  virtual Status SingleDelete(ColumnFamilyHandle* column_family,
+                              const SliceParts& key,
+                              const bool assume_tracked = false) override;
+
+  // In WriteUnprepared, untracked writes will break snapshot validation logic.
+  // Snapshot validation will only check the largest sequence number of a key to
+  // see if it was committed or not. However, an untracked unprepared write will
+  // hide smaller committed sequence numbers.
+  //
+  // TODO(lth): Investigate whether it is worth having snapshot validation
+  // validate all values larger than snap_seq. Otherwise, we should return
+  // Status::NotSupported for untracked writes.
+
+  virtual Status RebuildFromWriteBatch(WriteBatch*) override;
+
+  virtual uint64_t GetLastLogNumber() const override {
+    return last_log_number_;
+  }
+
+  void RemoveActiveIterator(Iterator* iter) {
+    active_iterators_.erase(
+        std::remove(active_iterators_.begin(), active_iterators_.end(), iter),
+        active_iterators_.end());
+  }
+
+ protected:
+  void Initialize(const TransactionOptions& txn_options) override;
+
+  Status PrepareInternal() override;
+
+  Status CommitWithoutPrepareInternal() override;
+  Status CommitInternal() override;
+
+  Status RollbackInternal() override;
+
+  void Clear() override;
+
+  void SetSavePoint() override;
+  Status RollbackToSavePoint() override;
+  Status PopSavePoint() override;
+
+  // Get and GetIterator needs to be overridden so that a ReadCallback to
+  // handle read-your-own-write is used.
+  using Transaction::Get;
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     PinnableSlice* value) override;
+
+  using Transaction::MultiGet;
+  virtual void MultiGet(const ReadOptions& options,
+                        ColumnFamilyHandle* column_family,
+                        const size_t num_keys, const Slice* keys,
+                        PinnableSlice* values, Status* statuses,
+                        const bool sorted_input = false) override;
+
+  using Transaction::GetIterator;
+  virtual Iterator* GetIterator(const ReadOptions& options) override;
+  virtual Iterator* GetIterator(const ReadOptions& options,
+                                ColumnFamilyHandle* column_family) override;
+
+  virtual Status ValidateSnapshot(ColumnFamilyHandle* column_family,
+                                  const Slice& key,
+                                  SequenceNumber* tracked_at_seq) override;
+
+ private:
+  friend class WriteUnpreparedTransactionTest_ReadYourOwnWrite_Test;
+  friend class WriteUnpreparedTransactionTest_RecoveryTest_Test;
+  friend class WriteUnpreparedTransactionTest_UnpreparedBatch_Test;
+  friend class WriteUnpreparedTxnDB;
+
+  const std::map<SequenceNumber, size_t>& GetUnpreparedSequenceNumbers();
+  Status WriteRollbackKeys(const LockTracker& tracked_keys,
+                           WriteBatchWithIndex* rollback_batch,
+                           ReadCallback* callback, const ReadOptions& roptions);
+
+  Status MaybeFlushWriteBatchToDB();
+  Status FlushWriteBatchToDB(bool prepared);
+  Status FlushWriteBatchToDBInternal(bool prepared);
+  Status FlushWriteBatchWithSavePointToDB();
+  Status RollbackToSavePointInternal();
+  Status HandleWrite(std::function<Status()> do_write);
+
+  // For write unprepared, we check on every writebatch append to see if
+  // write_batch_flush_threshold_ has been exceeded, and then call
+  // FlushWriteBatchToDB if so. This logic is encapsulated in
+  // MaybeFlushWriteBatchToDB.
+  int64_t write_batch_flush_threshold_;
+  WriteUnpreparedTxnDB* wupt_db_;
+
+  // Ordered list of unprep_seq sequence numbers that we have already written
+  // to DB.
+  //
+  // This maps unprep_seq => prepare_batch_cnt for each unprepared batch
+  // written by this transaction.
+  //
+  // Note that this contains both prepared and unprepared batches, since they
+  // are treated similarily in prepare heap/commit map, so it simplifies the
+  // commit callbacks.
+  std::map<SequenceNumber, size_t> unprep_seqs_;
+
+  uint64_t last_log_number_;
+
+  // Recovered transactions have tracked_keys_ populated, but are not actually
+  // locked for efficiency reasons. For recovered transactions, skip unlocking
+  // keys when transaction ends.
+  bool recovered_txn_;
+
+  // Track the largest sequence number at which we performed snapshot
+  // validation. If snapshot validation was skipped because no snapshot was set,
+  // then this is set to GetLastPublishedSequence. This value is useful because
+  // it means that for keys that have unprepared seqnos, we can guarantee that
+  // no committed keys by other transactions can exist between
+  // largest_validated_seq_ and max_unprep_seq. See
+  // WriteUnpreparedTxnDB::NewIterator for an explanation for why this is
+  // necessary for iterator Prev().
+  //
+  // Currently this value only increases during the lifetime of a transaction,
+  // but in some cases, we should be able to restore the previously largest
+  // value when calling RollbackToSavepoint.
+  SequenceNumber largest_validated_seq_;
+
+  struct SavePoint {
+    // Record of unprep_seqs_ at this savepoint. The set of unprep_seq is
+    // used during RollbackToSavepoint to determine visibility when restoring
+    // old values.
+    //
+    // TODO(lth): Since all unprep_seqs_ sets further down the stack must be
+    // subsets, this can potentially be deduplicated by just storing set
+    // difference. Investigate if this is worth it.
+    std::map<SequenceNumber, size_t> unprep_seqs_;
+
+    // This snapshot will be used to read keys at this savepoint if we call
+    // RollbackToSavePoint.
+    std::unique_ptr<ManagedSnapshot> snapshot_;
+
+    SavePoint(const std::map<SequenceNumber, size_t>& seqs,
+              ManagedSnapshot* snapshot)
+        : unprep_seqs_(seqs), snapshot_(snapshot){};
+  };
+
+  // We have 3 data structures holding savepoint information:
+  // 1. TransactionBaseImpl::save_points_
+  // 2. WriteUnpreparedTxn::flushed_save_points_
+  // 3. WriteUnpreparecTxn::unflushed_save_points_
+  //
+  // TransactionBaseImpl::save_points_ holds information about all write
+  // batches, including the current in-memory write_batch_, or unprepared
+  // batches that have been written out. Its responsibility is just to track
+  // which keys have been modified in every savepoint.
+  //
+  // WriteUnpreparedTxn::flushed_save_points_ holds information about savepoints
+  // set on unprepared batches that have already flushed. It holds the snapshot
+  // and unprep_seqs at that savepoint, so that the rollback process can
+  // determine which keys were visible at that point in time.
+  //
+  // WriteUnpreparecTxn::unflushed_save_points_ holds information about
+  // savepoints on the current in-memory write_batch_. It simply records the
+  // size of the write batch at every savepoint.
+  //
+  // TODO(lth): Remove the redundancy between save_point_boundaries_ and
+  // write_batch_.save_points_.
+  //
+  // Based on this information, here are some invariants:
+  // size(unflushed_save_points_) = size(write_batch_.save_points_)
+  // size(flushed_save_points_) + size(unflushed_save_points_)
+  //   = size(save_points_)
+  //
+  std::unique_ptr<autovector<WriteUnpreparedTxn::SavePoint>>
+      flushed_save_points_;
+  std::unique_ptr<autovector<size_t>> unflushed_save_points_;
+
+  // It is currently unsafe to flush a write batch if there are active iterators
+  // created from this transaction. This is because we use WriteBatchWithIndex
+  // to do merging reads from the DB and the write batch. If we flush the write
+  // batch, it is possible that the delta iterator on the iterator will point to
+  // invalid memory.
+  std::vector<Iterator*> active_iterators_;
+
+  // Untracked keys that we have to rollback.
+  //
+  // TODO(lth): Currently we we do not record untracked keys per-savepoint.
+  // This means that when rolling back to savepoints, we have to check all
+  // keys in the current transaction for rollback. Note that this is only
+  // inefficient, but still correct because we take a snapshot at every
+  // savepoint, and we will use that snapshot to construct the rollback batch.
+  // The rollback batch will then contain a reissue of the same marker.
+  //
+  // A more optimal solution would be to only check keys changed since the
+  // last savepoint. Also, it may make sense to merge this into tracked_keys_
+  // and differentiate between tracked but not locked keys to avoid having two
+  // very similar data structures.
+  using KeySet = std::unordered_map<uint32_t, std::vector<std::string>>;
+  KeySet untracked_keys_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn_db.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn_db.cc
new file mode 100644
index 0000000000..fd0ba0aedc
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn_db.cc
@@ -0,0 +1,476 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/transactions/write_unprepared_txn_db.h"
+
+#include "db/arena_wrapped_db_iter.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "util/cast_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Instead of reconstructing a Transaction object, and calling rollback on it,
+// we can be more efficient with RollbackRecoveredTransaction by skipping
+// unnecessary steps (eg. updating CommitMap, reconstructing keyset)
+Status WriteUnpreparedTxnDB::RollbackRecoveredTransaction(
+    const DBImpl::RecoveredTransaction* rtxn) {
+  // TODO(lth): Reduce duplicate code with WritePrepared rollback logic.
+  assert(rtxn->unprepared_);
+  auto cf_map_shared_ptr = WritePreparedTxnDB::GetCFHandleMap();
+  auto cf_comp_map_shared_ptr = WritePreparedTxnDB::GetCFComparatorMap();
+  // In theory we could write with disableWAL = true during recovery, and
+  // assume that if we crash again during recovery, we can just replay from
+  // the very beginning. Unfortunately, the XIDs from the application may not
+  // necessarily be unique across restarts, potentially leading to situations
+  // like this:
+  //
+  // BEGIN_PREPARE(unprepared) Put(a) END_PREPARE(xid = 1)
+  // -- crash and recover with Put(a) rolled back as it was not prepared
+  // BEGIN_PREPARE(prepared) Put(b) END_PREPARE(xid = 1)
+  // COMMIT(xid = 1)
+  // -- crash and recover with both a, b
+  //
+  // We could just write the rollback marker, but then we would have to extend
+  // MemTableInserter during recovery to actually do writes into the DB
+  // instead of just dropping the in-memory write batch.
+  //
+  WriteOptions w_options;
+
+  class InvalidSnapshotReadCallback : public ReadCallback {
+   public:
+    InvalidSnapshotReadCallback(SequenceNumber snapshot)
+        : ReadCallback(snapshot) {}
+
+    inline bool IsVisibleFullCheck(SequenceNumber) override {
+      // The seq provided as snapshot is the seq right before we have locked and
+      // wrote to it, so whatever is there, it is committed.
+      return true;
+    }
+
+    // Ignore the refresh request since we are confident that our snapshot seq
+    // is not going to be affected by concurrent compactions (not enabled yet.)
+    void Refresh(SequenceNumber) override {}
+  };
+
+  // Iterate starting with largest sequence number.
+  for (auto it = rtxn->batches_.rbegin(); it != rtxn->batches_.rend(); ++it) {
+    auto last_visible_txn = it->first - 1;
+    const auto& batch = it->second.batch_;
+    WriteBatch rollback_batch(0 /* reserved_bytes */, 0 /* max_bytes */,
+                              w_options.protection_bytes_per_key,
+                              0 /* default_cf_ts_sz */);
+
+    struct RollbackWriteBatchBuilder : public WriteBatch::Handler {
+      DBImpl* db_;
+      ReadOptions roptions;
+      InvalidSnapshotReadCallback callback;
+      WriteBatch* rollback_batch_;
+      std::map<uint32_t, const Comparator*>& comparators_;
+      std::map<uint32_t, ColumnFamilyHandle*>& handles_;
+      using CFKeys = std::set<Slice, SetComparator>;
+      std::map<uint32_t, CFKeys> keys_;
+      bool rollback_merge_operands_;
+      RollbackWriteBatchBuilder(
+          DBImpl* db, SequenceNumber snap_seq, WriteBatch* dst_batch,
+          std::map<uint32_t, const Comparator*>& comparators,
+          std::map<uint32_t, ColumnFamilyHandle*>& handles,
+          bool rollback_merge_operands)
+          : db_(db),
+            callback(snap_seq),
+            // disable min_uncommitted optimization
+            rollback_batch_(dst_batch),
+            comparators_(comparators),
+            handles_(handles),
+            rollback_merge_operands_(rollback_merge_operands) {}
+
+      Status Rollback(uint32_t cf, const Slice& key) {
+        Status s;
+        CFKeys& cf_keys = keys_[cf];
+        if (cf_keys.size() == 0) {  // just inserted
+          auto cmp = comparators_[cf];
+          keys_[cf] = CFKeys(SetComparator(cmp));
+        }
+        auto res = cf_keys.insert(key);
+        if (res.second ==
+            false) {  // second is false if a element already existed.
+          return s;
+        }
+
+        PinnableSlice pinnable_val;
+        bool not_used;
+        auto cf_handle = handles_[cf];
+        DBImpl::GetImplOptions get_impl_options;
+        get_impl_options.column_family = cf_handle;
+        get_impl_options.value = &pinnable_val;
+        get_impl_options.value_found = &not_used;
+        get_impl_options.callback = &callback;
+        s = db_->GetImpl(roptions, key, get_impl_options);
+        assert(s.ok() || s.IsNotFound());
+        if (s.ok()) {
+          s = rollback_batch_->Put(cf_handle, key, pinnable_val);
+          assert(s.ok());
+        } else if (s.IsNotFound()) {
+          // There has been no readable value before txn. By adding a delete we
+          // make sure that there will be none afterwards either.
+          s = rollback_batch_->Delete(cf_handle, key);
+          assert(s.ok());
+        } else {
+          // Unexpected status. Return it to the user.
+        }
+        return s;
+      }
+
+      Status PutCF(uint32_t cf, const Slice& key,
+                   const Slice& /*val*/) override {
+        return Rollback(cf, key);
+      }
+
+      Status DeleteCF(uint32_t cf, const Slice& key) override {
+        return Rollback(cf, key);
+      }
+
+      Status SingleDeleteCF(uint32_t cf, const Slice& key) override {
+        return Rollback(cf, key);
+      }
+
+      Status MergeCF(uint32_t cf, const Slice& key,
+                     const Slice& /*val*/) override {
+        if (rollback_merge_operands_) {
+          return Rollback(cf, key);
+        } else {
+          return Status::OK();
+        }
+      }
+
+      // Recovered batches do not contain 2PC markers.
+      Status MarkNoop(bool) override { return Status::InvalidArgument(); }
+      Status MarkBeginPrepare(bool) override {
+        return Status::InvalidArgument();
+      }
+      Status MarkEndPrepare(const Slice&) override {
+        return Status::InvalidArgument();
+      }
+      Status MarkCommit(const Slice&) override {
+        return Status::InvalidArgument();
+      }
+      Status MarkRollback(const Slice&) override {
+        return Status::InvalidArgument();
+      }
+    } rollback_handler(db_impl_, last_visible_txn, &rollback_batch,
+                       *cf_comp_map_shared_ptr.get(), *cf_map_shared_ptr.get(),
+                       txn_db_options_.rollback_merge_operands);
+
+    auto s = batch->Iterate(&rollback_handler);
+    if (!s.ok()) {
+      return s;
+    }
+
+    // The Rollback marker will be used as a batch separator
+    s = WriteBatchInternal::MarkRollback(&rollback_batch, rtxn->name_);
+    if (!s.ok()) {
+      return s;
+    }
+
+    const uint64_t kNoLogRef = 0;
+    const bool kDisableMemtable = true;
+    const size_t kOneBatch = 1;
+    uint64_t seq_used = kMaxSequenceNumber;
+    s = db_impl_->WriteImpl(w_options, &rollback_batch, nullptr, nullptr,
+                            kNoLogRef, !kDisableMemtable, &seq_used, kOneBatch);
+    if (!s.ok()) {
+      return s;
+    }
+
+    // If two_write_queues, we must manually release the sequence number to
+    // readers.
+    if (db_impl_->immutable_db_options().two_write_queues) {
+      db_impl_->SetLastPublishedSequence(seq_used);
+    }
+  }
+
+  return Status::OK();
+}
+
+Status WriteUnpreparedTxnDB::Initialize(
+    const std::vector<size_t>& compaction_enabled_cf_indices,
+    const std::vector<ColumnFamilyHandle*>& handles) {
+  // TODO(lth): Reduce code duplication in this function.
+  auto dbimpl = static_cast_with_check<DBImpl>(GetRootDB());
+  assert(dbimpl != nullptr);
+
+  db_impl_->SetSnapshotChecker(new WritePreparedSnapshotChecker(this));
+  // A callback to commit a single sub-batch
+  class CommitSubBatchPreReleaseCallback : public PreReleaseCallback {
+   public:
+    explicit CommitSubBatchPreReleaseCallback(WritePreparedTxnDB* db)
+        : db_(db) {}
+    Status Callback(SequenceNumber commit_seq,
+                    bool is_mem_disabled __attribute__((__unused__)), uint64_t,
+                    size_t /*index*/, size_t /*total*/) override {
+      assert(!is_mem_disabled);
+      db_->AddCommitted(commit_seq, commit_seq);
+      return Status::OK();
+    }
+
+   private:
+    WritePreparedTxnDB* db_;
+  };
+  db_impl_->SetRecoverableStatePreReleaseCallback(
+      new CommitSubBatchPreReleaseCallback(this));
+
+  // PessimisticTransactionDB::Initialize
+  for (auto cf_ptr : handles) {
+    AddColumnFamily(cf_ptr);
+  }
+  // Verify cf options
+  for (auto handle : handles) {
+    ColumnFamilyDescriptor cfd;
+    Status s = handle->GetDescriptor(&cfd);
+    if (!s.ok()) {
+      return s;
+    }
+    s = VerifyCFOptions(cfd.options);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  // Re-enable compaction for the column families that initially had
+  // compaction enabled.
+  std::vector<ColumnFamilyHandle*> compaction_enabled_cf_handles;
+  compaction_enabled_cf_handles.reserve(compaction_enabled_cf_indices.size());
+  for (auto index : compaction_enabled_cf_indices) {
+    compaction_enabled_cf_handles.push_back(handles[index]);
+  }
+
+  // create 'real' transactions from recovered shell transactions
+  auto rtxns = dbimpl->recovered_transactions();
+  std::map<SequenceNumber, SequenceNumber> ordered_seq_cnt;
+  for (auto rtxn : rtxns) {
+    auto recovered_trx = rtxn.second;
+    assert(recovered_trx);
+    assert(recovered_trx->batches_.size() >= 1);
+    assert(recovered_trx->name_.length());
+
+    // We can only rollback transactions after AdvanceMaxEvictedSeq is called,
+    // but AddPrepared must occur before AdvanceMaxEvictedSeq, which is why
+    // two iterations is required.
+    if (recovered_trx->unprepared_) {
+      continue;
+    }
+
+    WriteOptions w_options;
+    w_options.sync = true;
+    TransactionOptions t_options;
+
+    auto first_log_number = recovered_trx->batches_.begin()->second.log_number_;
+    auto first_seq = recovered_trx->batches_.begin()->first;
+    auto last_prepare_batch_cnt =
+        recovered_trx->batches_.begin()->second.batch_cnt_;
+
+    Transaction* real_trx = BeginTransaction(w_options, t_options, nullptr);
+    assert(real_trx);
+    auto wupt = static_cast_with_check<WriteUnpreparedTxn>(real_trx);
+    wupt->recovered_txn_ = true;
+
+    real_trx->SetLogNumber(first_log_number);
+    real_trx->SetId(first_seq);
+    Status s = real_trx->SetName(recovered_trx->name_);
+    if (!s.ok()) {
+      return s;
+    }
+    wupt->prepare_batch_cnt_ = last_prepare_batch_cnt;
+
+    for (auto batch : recovered_trx->batches_) {
+      const auto& seq = batch.first;
+      const auto& batch_info = batch.second;
+      auto cnt = batch_info.batch_cnt_ ? batch_info.batch_cnt_ : 1;
+      assert(batch_info.log_number_);
+
+      ordered_seq_cnt[seq] = cnt;
+      assert(wupt->unprep_seqs_.count(seq) == 0);
+      wupt->unprep_seqs_[seq] = cnt;
+
+      s = wupt->RebuildFromWriteBatch(batch_info.batch_);
+      assert(s.ok());
+      if (!s.ok()) {
+        return s;
+      }
+    }
+
+    const bool kClear = true;
+    wupt->InitWriteBatch(kClear);
+
+    real_trx->SetState(Transaction::PREPARED);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  // AddPrepared must be called in order
+  for (auto seq_cnt : ordered_seq_cnt) {
+    auto seq = seq_cnt.first;
+    auto cnt = seq_cnt.second;
+    for (size_t i = 0; i < cnt; i++) {
+      AddPrepared(seq + i);
+    }
+  }
+
+  SequenceNumber prev_max = max_evicted_seq_;
+  SequenceNumber last_seq = db_impl_->GetLatestSequenceNumber();
+  AdvanceMaxEvictedSeq(prev_max, last_seq);
+  // Create a gap between max and the next snapshot. This simplifies the logic
+  // in IsInSnapshot by not having to consider the special case of max ==
+  // snapshot after recovery. This is tested in IsInSnapshotEmptyMapTest.
+  if (last_seq) {
+    db_impl_->versions_->SetLastAllocatedSequence(last_seq + 1);
+    db_impl_->versions_->SetLastSequence(last_seq + 1);
+    db_impl_->versions_->SetLastPublishedSequence(last_seq + 1);
+  }
+
+  Status s;
+  // Rollback unprepared transactions.
+  for (auto rtxn : rtxns) {
+    auto recovered_trx = rtxn.second;
+    if (recovered_trx->unprepared_) {
+      s = RollbackRecoveredTransaction(recovered_trx);
+      if (!s.ok()) {
+        return s;
+      }
+      continue;
+    }
+  }
+
+  if (s.ok()) {
+    dbimpl->DeleteAllRecoveredTransactions();
+
+    // Compaction should start only after max_evicted_seq_ is set AND recovered
+    // transactions are either added to PrepareHeap or rolled back.
+    s = EnableAutoCompaction(compaction_enabled_cf_handles);
+  }
+
+  return s;
+}
+
+Transaction* WriteUnpreparedTxnDB::BeginTransaction(
+    const WriteOptions& write_options, const TransactionOptions& txn_options,
+    Transaction* old_txn) {
+  if (old_txn != nullptr) {
+    ReinitializeTransaction(old_txn, write_options, txn_options);
+    return old_txn;
+  } else {
+    return new WriteUnpreparedTxn(this, write_options, txn_options);
+  }
+}
+
+// Struct to hold ownership of snapshot and read callback for iterator cleanup.
+struct WriteUnpreparedTxnDB::IteratorState {
+  IteratorState(WritePreparedTxnDB* txn_db, SequenceNumber sequence,
+                std::shared_ptr<ManagedSnapshot> s,
+                SequenceNumber min_uncommitted, WriteUnpreparedTxn* txn)
+      : callback(txn_db, sequence, min_uncommitted, txn->unprep_seqs_,
+                 kBackedByDBSnapshot),
+        snapshot(s) {}
+  SequenceNumber MaxVisibleSeq() { return callback.max_visible_seq(); }
+
+  WriteUnpreparedTxnReadCallback callback;
+  std::shared_ptr<ManagedSnapshot> snapshot;
+};
+
+namespace {
+static void CleanupWriteUnpreparedTxnDBIterator(void* arg1, void* /*arg2*/) {
+  delete reinterpret_cast<WriteUnpreparedTxnDB::IteratorState*>(arg1);
+}
+}  // anonymous namespace
+
+Iterator* WriteUnpreparedTxnDB::NewIterator(const ReadOptions& options,
+                                            ColumnFamilyHandle* column_family,
+                                            WriteUnpreparedTxn* txn) {
+  if (options.io_activity != Env::IOActivity::kUnknown) {
+    return NewErrorIterator(Status::InvalidArgument(
+        "Cannot call NewIterator with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`"));
+  }
+  // TODO(lth): Refactor so that this logic is shared with WritePrepared.
+  constexpr bool expose_blob_index = false;
+  constexpr bool allow_refresh = false;
+  std::shared_ptr<ManagedSnapshot> own_snapshot = nullptr;
+  SequenceNumber snapshot_seq = kMaxSequenceNumber;
+  SequenceNumber min_uncommitted = 0;
+
+  // Currently, the Prev() iterator logic does not work well without snapshot
+  // validation. The logic simply iterates through values of a key in
+  // ascending seqno order, stopping at the first non-visible value and
+  // returning the last visible value.
+  //
+  // For example, if snapshot sequence is 3, and we have the following keys:
+  // foo: v1 1
+  // foo: v2 2
+  // foo: v3 3
+  // foo: v4 4
+  // foo: v5 5
+  //
+  // Then 1, 2, 3 will be visible, but 4 will be non-visible, so we return v3,
+  // which is the last visible value.
+  //
+  // For unprepared transactions, if we have snap_seq = 3, but the current
+  // transaction has unprep_seq 5, then returning the first non-visible value
+  // would be incorrect, as we should return v5, and not v3. The problem is that
+  // there are committed values at snapshot_seq < commit_seq < unprep_seq.
+  //
+  // Snapshot validation can prevent this problem by ensuring that no committed
+  // values exist at snapshot_seq < commit_seq, and thus any value with a
+  // sequence number greater than snapshot_seq must be unprepared values. For
+  // example, if the transaction had a snapshot at 3, then snapshot validation
+  // would be performed during the Put(v5) call. It would find v4, and the Put
+  // would fail with snapshot validation failure.
+  //
+  // TODO(lth): Improve Prev() logic to continue iterating until
+  // max_visible_seq, and then return the last visible value, so that this
+  // restriction can be lifted.
+  const Snapshot* snapshot = nullptr;
+  if (options.snapshot == nullptr) {
+    snapshot = GetSnapshot();
+    own_snapshot = std::make_shared<ManagedSnapshot>(db_impl_, snapshot);
+  } else {
+    snapshot = options.snapshot;
+  }
+
+  snapshot_seq = snapshot->GetSequenceNumber();
+  assert(snapshot_seq != kMaxSequenceNumber);
+  // Iteration is safe as long as largest_validated_seq <= snapshot_seq. We are
+  // guaranteed that for keys that were modified by this transaction (and thus
+  // might have unprepared values), no committed values exist at
+  // largest_validated_seq < commit_seq (or the contrapositive: any committed
+  // value must exist at commit_seq <= largest_validated_seq). This implies
+  // that commit_seq <= largest_validated_seq <= snapshot_seq or commit_seq <=
+  // snapshot_seq. As explained above, the problem with Prev() only happens when
+  // snapshot_seq < commit_seq.
+  //
+  // For keys that were not modified by this transaction, largest_validated_seq_
+  // is meaningless, and Prev() should just work with the existing visibility
+  // logic.
+  if (txn->largest_validated_seq_ > snapshot->GetSequenceNumber() &&
+      !txn->unprep_seqs_.empty()) {
+    ROCKS_LOG_ERROR(info_log_,
+                    "WriteUnprepared iterator creation failed since the "
+                    "transaction has performed unvalidated writes");
+    return nullptr;
+  }
+  min_uncommitted =
+      static_cast_with_check<const SnapshotImpl>(snapshot)->min_uncommitted_;
+
+  auto* cfd =
+      static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
+  auto* state =
+      new IteratorState(this, snapshot_seq, own_snapshot, min_uncommitted, txn);
+  auto* db_iter = db_impl_->NewIteratorImpl(
+      options, cfd, state->MaxVisibleSeq(), &state->callback, expose_blob_index,
+      allow_refresh);
+  db_iter->RegisterCleanup(CleanupWriteUnpreparedTxnDBIterator, state, nullptr);
+  return db_iter;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn_db.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn_db.h
new file mode 100644
index 0000000000..a7b10f1533
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/transactions/write_unprepared_txn_db.h
@@ -0,0 +1,106 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "utilities/transactions/write_prepared_txn_db.h"
+#include "utilities/transactions/write_unprepared_txn.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class WriteUnpreparedTxn;
+
+class WriteUnpreparedTxnDB : public WritePreparedTxnDB {
+ public:
+  using WritePreparedTxnDB::WritePreparedTxnDB;
+
+  Status Initialize(const std::vector<size_t>& compaction_enabled_cf_indices,
+                    const std::vector<ColumnFamilyHandle*>& handles) override;
+
+  Transaction* BeginTransaction(const WriteOptions& write_options,
+                                const TransactionOptions& txn_options,
+                                Transaction* old_txn) override;
+
+  // Struct to hold ownership of snapshot and read callback for cleanup.
+  struct IteratorState;
+
+  using WritePreparedTxnDB::NewIterator;
+  Iterator* NewIterator(const ReadOptions& options,
+                        ColumnFamilyHandle* column_family,
+                        WriteUnpreparedTxn* txn);
+
+ private:
+  Status RollbackRecoveredTransaction(const DBImpl::RecoveredTransaction* rtxn);
+};
+
+class WriteUnpreparedCommitEntryPreReleaseCallback : public PreReleaseCallback {
+  // TODO(lth): Reduce code duplication with
+  // WritePreparedCommitEntryPreReleaseCallback
+ public:
+  // includes_data indicates that the commit also writes non-empty
+  // CommitTimeWriteBatch to memtable, which needs to be committed separately.
+  WriteUnpreparedCommitEntryPreReleaseCallback(
+      WritePreparedTxnDB* db, DBImpl* db_impl,
+      const std::map<SequenceNumber, size_t>& unprep_seqs,
+      size_t data_batch_cnt = 0, bool publish_seq = true)
+      : db_(db),
+        db_impl_(db_impl),
+        unprep_seqs_(unprep_seqs),
+        data_batch_cnt_(data_batch_cnt),
+        includes_data_(data_batch_cnt_ > 0),
+        publish_seq_(publish_seq) {
+    assert(unprep_seqs.size() > 0);
+  }
+
+  virtual Status Callback(SequenceNumber commit_seq,
+                          bool is_mem_disabled __attribute__((__unused__)),
+                          uint64_t, size_t /*index*/,
+                          size_t /*total*/) override {
+    const uint64_t last_commit_seq = LIKELY(data_batch_cnt_ <= 1)
+                                         ? commit_seq
+                                         : commit_seq + data_batch_cnt_ - 1;
+    // Recall that unprep_seqs maps (un)prepared_seq => prepare_batch_cnt.
+    for (const auto& s : unprep_seqs_) {
+      for (size_t i = 0; i < s.second; i++) {
+        db_->AddCommitted(s.first + i, last_commit_seq);
+      }
+    }
+
+    if (includes_data_) {
+      assert(data_batch_cnt_);
+      // Commit the data that is accompanied with the commit request
+      for (size_t i = 0; i < data_batch_cnt_; i++) {
+        // For commit seq of each batch use the commit seq of the last batch.
+        // This would make debugging easier by having all the batches having
+        // the same sequence number.
+        db_->AddCommitted(commit_seq + i, last_commit_seq);
+      }
+    }
+    if (db_impl_->immutable_db_options().two_write_queues && publish_seq_) {
+      assert(is_mem_disabled);  // implies the 2nd queue
+      // Publish the sequence number. We can do that here assuming the callback
+      // is invoked only from one write queue, which would guarantee that the
+      // publish sequence numbers will be in order, i.e., once a seq is
+      // published all the seq prior to that are also publishable.
+      db_impl_->SetLastPublishedSequence(last_commit_seq);
+    }
+    // else SequenceNumber that is updated as part of the write already does the
+    // publishing
+    return Status::OK();
+  }
+
+ private:
+  WritePreparedTxnDB* db_;
+  DBImpl* db_impl_;
+  const std::map<SequenceNumber, size_t>& unprep_seqs_;
+  size_t data_batch_cnt_;
+  // Either because it is commit without prepare or it has a
+  // CommitTimeWriteBatch
+  bool includes_data_;
+  // Should the callback also publishes the commit seq number
+  bool publish_seq_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/ttl/db_ttl_impl.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/ttl/db_ttl_impl.cc
new file mode 100644
index 0000000000..2b261ec6f2
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/ttl/db_ttl_impl.cc
@@ -0,0 +1,617 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "utilities/ttl/db_ttl_impl.h"
+
+#include "db/write_batch_internal.h"
+#include "file/filename.h"
+#include "logging/logging.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/utilities/db_ttl.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+static std::unordered_map<std::string, OptionTypeInfo> ttl_merge_op_type_info =
+    {{"user_operator",
+      OptionTypeInfo::AsCustomSharedPtr<MergeOperator>(
+          0, OptionVerificationType::kByName, OptionTypeFlags::kNone)}};
+
+TtlMergeOperator::TtlMergeOperator(
+    const std::shared_ptr<MergeOperator>& merge_op, SystemClock* clock)
+    : user_merge_op_(merge_op), clock_(clock) {
+  RegisterOptions("TtlMergeOptions", &user_merge_op_, &ttl_merge_op_type_info);
+}
+
+bool TtlMergeOperator::FullMergeV2(const MergeOperationInput& merge_in,
+                                   MergeOperationOutput* merge_out) const {
+  const uint32_t ts_len = DBWithTTLImpl::kTSLength;
+  if (merge_in.existing_value && merge_in.existing_value->size() < ts_len) {
+    ROCKS_LOG_ERROR(merge_in.logger,
+                    "Error: Could not remove timestamp from existing value.");
+    return false;
+  }
+
+  // Extract time-stamp from each operand to be passed to user_merge_op_
+  std::vector<Slice> operands_without_ts;
+  for (const auto& operand : merge_in.operand_list) {
+    if (operand.size() < ts_len) {
+      ROCKS_LOG_ERROR(merge_in.logger,
+                      "Error: Could not remove timestamp from operand value.");
+      return false;
+    }
+    operands_without_ts.push_back(operand);
+    operands_without_ts.back().remove_suffix(ts_len);
+  }
+
+  // Apply the user merge operator (store result in *new_value)
+  bool good = true;
+  MergeOperationOutput user_merge_out(merge_out->new_value,
+                                      merge_out->existing_operand);
+  if (merge_in.existing_value) {
+    Slice existing_value_without_ts(merge_in.existing_value->data(),
+                                    merge_in.existing_value->size() - ts_len);
+    good = user_merge_op_->FullMergeV2(
+        MergeOperationInput(merge_in.key, &existing_value_without_ts,
+                            operands_without_ts, merge_in.logger),
+        &user_merge_out);
+  } else {
+    good = user_merge_op_->FullMergeV2(
+        MergeOperationInput(merge_in.key, nullptr, operands_without_ts,
+                            merge_in.logger),
+        &user_merge_out);
+  }
+  merge_out->op_failure_scope = user_merge_out.op_failure_scope;
+
+  // Return false if the user merge operator returned false
+  if (!good) {
+    return false;
+  }
+
+  if (merge_out->existing_operand.data()) {
+    merge_out->new_value.assign(merge_out->existing_operand.data(),
+                                merge_out->existing_operand.size());
+    merge_out->existing_operand = Slice(nullptr, 0);
+  }
+
+  // Augment the *new_value with the ttl time-stamp
+  int64_t curtime;
+  if (!clock_->GetCurrentTime(&curtime).ok()) {
+    ROCKS_LOG_ERROR(
+        merge_in.logger,
+        "Error: Could not get current time to be attached internally "
+        "to the new value.");
+    return false;
+  } else {
+    char ts_string[ts_len];
+    EncodeFixed32(ts_string, (int32_t)curtime);
+    merge_out->new_value.append(ts_string, ts_len);
+    return true;
+  }
+}
+
+bool TtlMergeOperator::PartialMergeMulti(const Slice& key,
+                                         const std::deque<Slice>& operand_list,
+                                         std::string* new_value,
+                                         Logger* logger) const {
+  const uint32_t ts_len = DBWithTTLImpl::kTSLength;
+  std::deque<Slice> operands_without_ts;
+
+  for (const auto& operand : operand_list) {
+    if (operand.size() < ts_len) {
+      ROCKS_LOG_ERROR(logger, "Error: Could not remove timestamp from value.");
+      return false;
+    }
+
+    operands_without_ts.push_back(
+        Slice(operand.data(), operand.size() - ts_len));
+  }
+
+  // Apply the user partial-merge operator (store result in *new_value)
+  assert(new_value);
+  if (!user_merge_op_->PartialMergeMulti(key, operands_without_ts, new_value,
+                                         logger)) {
+    return false;
+  }
+
+  // Augment the *new_value with the ttl time-stamp
+  int64_t curtime;
+  if (!clock_->GetCurrentTime(&curtime).ok()) {
+    ROCKS_LOG_ERROR(
+        logger,
+        "Error: Could not get current time to be attached internally "
+        "to the new value.");
+    return false;
+  } else {
+    char ts_string[ts_len];
+    EncodeFixed32(ts_string, (int32_t)curtime);
+    new_value->append(ts_string, ts_len);
+    return true;
+  }
+}
+
+Status TtlMergeOperator::PrepareOptions(const ConfigOptions& config_options) {
+  if (clock_ == nullptr) {
+    clock_ = config_options.env->GetSystemClock().get();
+  }
+  return MergeOperator::PrepareOptions(config_options);
+}
+
+Status TtlMergeOperator::ValidateOptions(
+    const DBOptions& db_opts, const ColumnFamilyOptions& cf_opts) const {
+  if (user_merge_op_ == nullptr) {
+    return Status::InvalidArgument(
+        "UserMergeOperator required by TtlMergeOperator");
+  } else if (clock_ == nullptr) {
+    return Status::InvalidArgument("SystemClock required by TtlMergeOperator");
+  } else {
+    return MergeOperator::ValidateOptions(db_opts, cf_opts);
+  }
+}
+
+void DBWithTTLImpl::SanitizeOptions(int32_t ttl, ColumnFamilyOptions* options,
+                                    SystemClock* clock) {
+  if (options->compaction_filter) {
+    options->compaction_filter =
+        new TtlCompactionFilter(ttl, clock, options->compaction_filter);
+  } else {
+    options->compaction_filter_factory =
+        std::shared_ptr<CompactionFilterFactory>(new TtlCompactionFilterFactory(
+            ttl, clock, options->compaction_filter_factory));
+  }
+
+  if (options->merge_operator) {
+    options->merge_operator.reset(
+        new TtlMergeOperator(options->merge_operator, clock));
+  }
+}
+
+static std::unordered_map<std::string, OptionTypeInfo> ttl_type_info = {
+    {"ttl", {0, OptionType::kInt32T}},
+};
+
+static std::unordered_map<std::string, OptionTypeInfo> ttl_cff_type_info = {
+    {"user_filter_factory",
+     OptionTypeInfo::AsCustomSharedPtr<CompactionFilterFactory>(
+         0, OptionVerificationType::kByNameAllowFromNull,
+         OptionTypeFlags::kNone)}};
+static std::unordered_map<std::string, OptionTypeInfo> user_cf_type_info = {
+    {"user_filter",
+     OptionTypeInfo::AsCustomRawPtr<const CompactionFilter>(
+         0, OptionVerificationType::kByName, OptionTypeFlags::kAllowNull)}};
+
+TtlCompactionFilter::TtlCompactionFilter(
+    int32_t ttl, SystemClock* clock, const CompactionFilter* _user_comp_filter,
+    std::unique_ptr<const CompactionFilter> _user_comp_filter_from_factory)
+    : LayeredCompactionFilterBase(_user_comp_filter,
+                                  std::move(_user_comp_filter_from_factory)),
+      ttl_(ttl),
+      clock_(clock) {
+  RegisterOptions("TTL", &ttl_, &ttl_type_info);
+  RegisterOptions("UserFilter", &user_comp_filter_, &user_cf_type_info);
+}
+
+bool TtlCompactionFilter::Filter(int level, const Slice& key,
+                                 const Slice& old_val, std::string* new_val,
+                                 bool* value_changed) const {
+  if (DBWithTTLImpl::IsStale(old_val, ttl_, clock_)) {
+    return true;
+  }
+  if (user_comp_filter() == nullptr) {
+    return false;
+  }
+  assert(old_val.size() >= DBWithTTLImpl::kTSLength);
+  Slice old_val_without_ts(old_val.data(),
+                           old_val.size() - DBWithTTLImpl::kTSLength);
+  if (user_comp_filter()->Filter(level, key, old_val_without_ts, new_val,
+                                 value_changed)) {
+    return true;
+  }
+  if (*value_changed) {
+    new_val->append(old_val.data() + old_val.size() - DBWithTTLImpl::kTSLength,
+                    DBWithTTLImpl::kTSLength);
+  }
+  return false;
+}
+
+Status TtlCompactionFilter::PrepareOptions(
+    const ConfigOptions& config_options) {
+  if (clock_ == nullptr) {
+    clock_ = config_options.env->GetSystemClock().get();
+  }
+  return LayeredCompactionFilterBase::PrepareOptions(config_options);
+}
+
+Status TtlCompactionFilter::ValidateOptions(
+    const DBOptions& db_opts, const ColumnFamilyOptions& cf_opts) const {
+  if (clock_ == nullptr) {
+    return Status::InvalidArgument(
+        "SystemClock required by TtlCompactionFilter");
+  } else {
+    return LayeredCompactionFilterBase::ValidateOptions(db_opts, cf_opts);
+  }
+}
+
+TtlCompactionFilterFactory::TtlCompactionFilterFactory(
+    int32_t ttl, SystemClock* clock,
+    std::shared_ptr<CompactionFilterFactory> comp_filter_factory)
+    : ttl_(ttl), clock_(clock), user_comp_filter_factory_(comp_filter_factory) {
+  RegisterOptions("UserOptions", &user_comp_filter_factory_,
+                  &ttl_cff_type_info);
+  RegisterOptions("TTL", &ttl_, &ttl_type_info);
+}
+
+std::unique_ptr<CompactionFilter>
+TtlCompactionFilterFactory::CreateCompactionFilter(
+    const CompactionFilter::Context& context) {
+  std::unique_ptr<const CompactionFilter> user_comp_filter_from_factory =
+      nullptr;
+  if (user_comp_filter_factory_) {
+    user_comp_filter_from_factory =
+        user_comp_filter_factory_->CreateCompactionFilter(context);
+  }
+
+  return std::unique_ptr<TtlCompactionFilter>(new TtlCompactionFilter(
+      ttl_, clock_, nullptr, std::move(user_comp_filter_from_factory)));
+}
+
+Status TtlCompactionFilterFactory::PrepareOptions(
+    const ConfigOptions& config_options) {
+  if (clock_ == nullptr) {
+    clock_ = config_options.env->GetSystemClock().get();
+  }
+  return CompactionFilterFactory::PrepareOptions(config_options);
+}
+
+Status TtlCompactionFilterFactory::ValidateOptions(
+    const DBOptions& db_opts, const ColumnFamilyOptions& cf_opts) const {
+  if (clock_ == nullptr) {
+    return Status::InvalidArgument(
+        "SystemClock required by TtlCompactionFilterFactory");
+  } else {
+    return CompactionFilterFactory::ValidateOptions(db_opts, cf_opts);
+  }
+}
+
+int RegisterTtlObjects(ObjectLibrary& library, const std::string& /*arg*/) {
+  library.AddFactory<MergeOperator>(
+      TtlMergeOperator::kClassName(),
+      [](const std::string& /*uri*/, std::unique_ptr<MergeOperator>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new TtlMergeOperator(nullptr, nullptr));
+        return guard->get();
+      });
+  library.AddFactory<CompactionFilterFactory>(
+      TtlCompactionFilterFactory::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<CompactionFilterFactory>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new TtlCompactionFilterFactory(0, nullptr, nullptr));
+        return guard->get();
+      });
+  library.AddFactory<CompactionFilter>(
+      TtlCompactionFilter::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<CompactionFilter>* /*guard*/,
+         std::string* /* errmsg */) {
+        return new TtlCompactionFilter(0, nullptr, nullptr);
+      });
+  size_t num_types;
+  return static_cast<int>(library.GetFactoryCount(&num_types));
+}
+// Open the db inside DBWithTTLImpl because options needs pointer to its ttl
+DBWithTTLImpl::DBWithTTLImpl(DB* db) : DBWithTTL(db), closed_(false) {}
+
+DBWithTTLImpl::~DBWithTTLImpl() {
+  if (!closed_) {
+    Close().PermitUncheckedError();
+  }
+}
+
+Status DBWithTTLImpl::Close() {
+  Status ret = Status::OK();
+  if (!closed_) {
+    Options default_options = GetOptions();
+    // Need to stop background compaction before getting rid of the filter
+    CancelAllBackgroundWork(db_, /* wait = */ true);
+    ret = db_->Close();
+    delete default_options.compaction_filter;
+    closed_ = true;
+  }
+  return ret;
+}
+
+void DBWithTTLImpl::RegisterTtlClasses() {
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    ObjectRegistry::Default()->AddLibrary("TTL", RegisterTtlObjects, "");
+  });
+}
+
+Status DBWithTTL::Open(const Options& options, const std::string& dbname,
+                       DBWithTTL** dbptr, int32_t ttl, bool read_only) {
+  DBOptions db_options(options);
+  ColumnFamilyOptions cf_options(options);
+  std::vector<ColumnFamilyDescriptor> column_families;
+  column_families.push_back(
+      ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options));
+  std::vector<ColumnFamilyHandle*> handles;
+  Status s = DBWithTTL::Open(db_options, dbname, column_families, &handles,
+                             dbptr, {ttl}, read_only);
+  if (s.ok()) {
+    assert(handles.size() == 1);
+    // i can delete the handle since DBImpl is always holding a reference to
+    // default column family
+    delete handles[0];
+  }
+  return s;
+}
+
+Status DBWithTTL::Open(
+    const DBOptions& db_options, const std::string& dbname,
+    const std::vector<ColumnFamilyDescriptor>& column_families,
+    std::vector<ColumnFamilyHandle*>* handles, DBWithTTL** dbptr,
+    const std::vector<int32_t>& ttls, bool read_only) {
+  DBWithTTLImpl::RegisterTtlClasses();
+  if (ttls.size() != column_families.size()) {
+    return Status::InvalidArgument(
+        "ttls size has to be the same as number of column families");
+  }
+
+  SystemClock* clock = (db_options.env == nullptr)
+                           ? SystemClock::Default().get()
+                           : db_options.env->GetSystemClock().get();
+
+  std::vector<ColumnFamilyDescriptor> column_families_sanitized =
+      column_families;
+  for (size_t i = 0; i < column_families_sanitized.size(); ++i) {
+    DBWithTTLImpl::SanitizeOptions(
+        ttls[i], &column_families_sanitized[i].options, clock);
+  }
+  DB* db;
+
+  Status st;
+  if (read_only) {
+    st = DB::OpenForReadOnly(db_options, dbname, column_families_sanitized,
+                             handles, &db);
+  } else {
+    st = DB::Open(db_options, dbname, column_families_sanitized, handles, &db);
+  }
+  if (st.ok()) {
+    *dbptr = new DBWithTTLImpl(db);
+  } else {
+    *dbptr = nullptr;
+  }
+  return st;
+}
+
+Status DBWithTTLImpl::CreateColumnFamilyWithTtl(
+    const ColumnFamilyOptions& options, const std::string& column_family_name,
+    ColumnFamilyHandle** handle, int ttl) {
+  RegisterTtlClasses();
+  ColumnFamilyOptions sanitized_options = options;
+  DBWithTTLImpl::SanitizeOptions(ttl, &sanitized_options,
+                                 GetEnv()->GetSystemClock().get());
+
+  return DBWithTTL::CreateColumnFamily(sanitized_options, column_family_name,
+                                       handle);
+}
+
+Status DBWithTTLImpl::CreateColumnFamily(const ColumnFamilyOptions& options,
+                                         const std::string& column_family_name,
+                                         ColumnFamilyHandle** handle) {
+  return CreateColumnFamilyWithTtl(options, column_family_name, handle, 0);
+}
+
+// Appends the current timestamp to the string.
+// Returns false if could not get the current_time, true if append succeeds
+Status DBWithTTLImpl::AppendTS(const Slice& val, std::string* val_with_ts,
+                               SystemClock* clock) {
+  val_with_ts->reserve(kTSLength + val.size());
+  char ts_string[kTSLength];
+  int64_t curtime;
+  Status st = clock->GetCurrentTime(&curtime);
+  if (!st.ok()) {
+    return st;
+  }
+  EncodeFixed32(ts_string, (int32_t)curtime);
+  val_with_ts->append(val.data(), val.size());
+  val_with_ts->append(ts_string, kTSLength);
+  return st;
+}
+
+// Returns corruption if the length of the string is lesser than timestamp, or
+// timestamp refers to a time lesser than ttl-feature release time
+Status DBWithTTLImpl::SanityCheckTimestamp(const Slice& str) {
+  if (str.size() < kTSLength) {
+    return Status::Corruption("Error: value's length less than timestamp's\n");
+  }
+  // Checks that TS is not lesser than kMinTimestamp
+  // Gaurds against corruption & normal database opened incorrectly in ttl mode
+  int32_t timestamp_value = DecodeFixed32(str.data() + str.size() - kTSLength);
+  if (timestamp_value < kMinTimestamp) {
+    return Status::Corruption("Error: Timestamp < ttl feature release time!\n");
+  }
+  return Status::OK();
+}
+
+// Checks if the string is stale or not according to TTl provided
+bool DBWithTTLImpl::IsStale(const Slice& value, int32_t ttl,
+                            SystemClock* clock) {
+  if (ttl <= 0) {  // Data is fresh if TTL is non-positive
+    return false;
+  }
+  int64_t curtime;
+  if (!clock->GetCurrentTime(&curtime).ok()) {
+    return false;  // Treat the data as fresh if could not get current time
+  }
+  /* int32_t may overflow when timestamp_value + ttl
+   * for example ttl = 86400 * 365 * 15
+   * convert timestamp_value to int64_t
+   */
+  int64_t timestamp_value =
+      DecodeFixed32(value.data() + value.size() - kTSLength);
+  return (timestamp_value + ttl) < curtime;
+}
+
+// Strips the TS from the end of the slice
+Status DBWithTTLImpl::StripTS(PinnableSlice* pinnable_val) {
+  if (pinnable_val->size() < kTSLength) {
+    return Status::Corruption("Bad timestamp in key-value");
+  }
+  // Erasing characters which hold the TS
+  pinnable_val->remove_suffix(kTSLength);
+  return Status::OK();
+}
+
+// Strips the TS from the end of the string
+Status DBWithTTLImpl::StripTS(std::string* str) {
+  if (str->length() < kTSLength) {
+    return Status::Corruption("Bad timestamp in key-value");
+  }
+  // Erasing characters which hold the TS
+  str->erase(str->length() - kTSLength, kTSLength);
+  return Status::OK();
+}
+
+Status DBWithTTLImpl::Put(const WriteOptions& options,
+                          ColumnFamilyHandle* column_family, const Slice& key,
+                          const Slice& val) {
+  WriteBatch batch;
+  Status st = batch.Put(column_family, key, val);
+  if (st.ok()) {
+    st = Write(options, &batch);
+  }
+  return st;
+}
+
+Status DBWithTTLImpl::Get(const ReadOptions& options,
+                          ColumnFamilyHandle* column_family, const Slice& key,
+                          PinnableSlice* value) {
+  Status st = db_->Get(options, column_family, key, value);
+  if (!st.ok()) {
+    return st;
+  }
+  st = SanityCheckTimestamp(*value);
+  if (!st.ok()) {
+    return st;
+  }
+  return StripTS(value);
+}
+
+std::vector<Status> DBWithTTLImpl::MultiGet(
+    const ReadOptions& options,
+    const std::vector<ColumnFamilyHandle*>& column_family,
+    const std::vector<Slice>& keys, std::vector<std::string>* values) {
+  auto statuses = db_->MultiGet(options, column_family, keys, values);
+  for (size_t i = 0; i < keys.size(); ++i) {
+    if (!statuses[i].ok()) {
+      continue;
+    }
+    statuses[i] = SanityCheckTimestamp((*values)[i]);
+    if (!statuses[i].ok()) {
+      continue;
+    }
+    statuses[i] = StripTS(&(*values)[i]);
+  }
+  return statuses;
+}
+
+bool DBWithTTLImpl::KeyMayExist(const ReadOptions& options,
+                                ColumnFamilyHandle* column_family,
+                                const Slice& key, std::string* value,
+                                bool* value_found) {
+  bool ret = db_->KeyMayExist(options, column_family, key, value, value_found);
+  if (ret && value != nullptr && value_found != nullptr && *value_found) {
+    if (!SanityCheckTimestamp(*value).ok() || !StripTS(value).ok()) {
+      return false;
+    }
+  }
+  return ret;
+}
+
+Status DBWithTTLImpl::Merge(const WriteOptions& options,
+                            ColumnFamilyHandle* column_family, const Slice& key,
+                            const Slice& value) {
+  WriteBatch batch;
+  Status st = batch.Merge(column_family, key, value);
+  if (st.ok()) {
+    st = Write(options, &batch);
+  }
+  return st;
+}
+
+Status DBWithTTLImpl::Write(const WriteOptions& opts, WriteBatch* updates) {
+  class Handler : public WriteBatch::Handler {
+   public:
+    explicit Handler(SystemClock* clock) : clock_(clock) {}
+    WriteBatch updates_ttl;
+    Status PutCF(uint32_t column_family_id, const Slice& key,
+                 const Slice& value) override {
+      std::string value_with_ts;
+      Status st = AppendTS(value, &value_with_ts, clock_);
+      if (!st.ok()) {
+        return st;
+      }
+      return WriteBatchInternal::Put(&updates_ttl, column_family_id, key,
+                                     value_with_ts);
+    }
+    Status MergeCF(uint32_t column_family_id, const Slice& key,
+                   const Slice& value) override {
+      std::string value_with_ts;
+      Status st = AppendTS(value, &value_with_ts, clock_);
+      if (!st.ok()) {
+        return st;
+      }
+      return WriteBatchInternal::Merge(&updates_ttl, column_family_id, key,
+                                       value_with_ts);
+    }
+    Status DeleteCF(uint32_t column_family_id, const Slice& key) override {
+      return WriteBatchInternal::Delete(&updates_ttl, column_family_id, key);
+    }
+    Status DeleteRangeCF(uint32_t column_family_id, const Slice& begin_key,
+                         const Slice& end_key) override {
+      return WriteBatchInternal::DeleteRange(&updates_ttl, column_family_id,
+                                             begin_key, end_key);
+    }
+    void LogData(const Slice& blob) override { updates_ttl.PutLogData(blob); }
+
+   private:
+    SystemClock* clock_;
+  };
+  Handler handler(GetEnv()->GetSystemClock().get());
+  Status st = updates->Iterate(&handler);
+  if (!st.ok()) {
+    return st;
+  } else {
+    return db_->Write(opts, &(handler.updates_ttl));
+  }
+}
+
+Iterator* DBWithTTLImpl::NewIterator(const ReadOptions& opts,
+                                     ColumnFamilyHandle* column_family) {
+  if (opts.io_activity != Env::IOActivity::kUnknown) {
+    return NewErrorIterator(Status::InvalidArgument(
+        "Cannot call NewIterator with `ReadOptions::io_activity` != "
+        "`Env::IOActivity::kUnknown`"));
+  }
+  return new TtlIterator(db_->NewIterator(opts, column_family));
+}
+
+void DBWithTTLImpl::SetTtl(ColumnFamilyHandle* h, int32_t ttl) {
+  std::shared_ptr<TtlCompactionFilterFactory> filter;
+  Options opts;
+  opts = GetOptions(h);
+  filter = std::static_pointer_cast<TtlCompactionFilterFactory>(
+      opts.compaction_filter_factory);
+  if (!filter) return;
+  filter->SetTtl(ttl);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/ttl/db_ttl_impl.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/ttl/db_ttl_impl.h
new file mode 100644
index 0000000000..6ac662467f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/ttl/db_ttl_impl.h
@@ -0,0 +1,243 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <deque>
+#include <string>
+#include <vector>
+
+#include "db/db_impl/db_impl.h"
+#include "rocksdb/compaction_filter.h"
+#include "rocksdb/db.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/utilities/db_ttl.h"
+#include "utilities/compaction_filters/layered_compaction_filter_base.h"
+
+#ifdef _WIN32
+// Windows API macro interference
+#undef GetCurrentTime
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+struct ConfigOptions;
+class ObjectLibrary;
+class ObjectRegistry;
+class DBWithTTLImpl : public DBWithTTL {
+ public:
+  static void SanitizeOptions(int32_t ttl, ColumnFamilyOptions* options,
+                              SystemClock* clock);
+
+  static void RegisterTtlClasses();
+  explicit DBWithTTLImpl(DB* db);
+
+  virtual ~DBWithTTLImpl();
+
+  virtual Status Close() override;
+
+  Status CreateColumnFamilyWithTtl(const ColumnFamilyOptions& options,
+                                   const std::string& column_family_name,
+                                   ColumnFamilyHandle** handle,
+                                   int ttl) override;
+
+  Status CreateColumnFamily(const ColumnFamilyOptions& options,
+                            const std::string& column_family_name,
+                            ColumnFamilyHandle** handle) override;
+
+  using StackableDB::Put;
+  virtual Status Put(const WriteOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     const Slice& val) override;
+
+  using StackableDB::Get;
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     PinnableSlice* value) override;
+
+  using StackableDB::MultiGet;
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_family,
+      const std::vector<Slice>& keys,
+      std::vector<std::string>* values) override;
+
+  using StackableDB::KeyMayExist;
+  virtual bool KeyMayExist(const ReadOptions& options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           std::string* value,
+                           bool* value_found = nullptr) override;
+
+  using StackableDB::Merge;
+  virtual Status Merge(const WriteOptions& options,
+                       ColumnFamilyHandle* column_family, const Slice& key,
+                       const Slice& value) override;
+
+  virtual Status Write(const WriteOptions& opts, WriteBatch* updates) override;
+
+  using StackableDB::NewIterator;
+  virtual Iterator* NewIterator(const ReadOptions& opts,
+                                ColumnFamilyHandle* column_family) override;
+
+  virtual DB* GetBaseDB() override { return db_; }
+
+  static bool IsStale(const Slice& value, int32_t ttl, SystemClock* clock);
+
+  static Status AppendTS(const Slice& val, std::string* val_with_ts,
+                         SystemClock* clock);
+
+  static Status SanityCheckTimestamp(const Slice& str);
+
+  static Status StripTS(std::string* str);
+
+  static Status StripTS(PinnableSlice* str);
+
+  static const uint32_t kTSLength = sizeof(int32_t);  // size of timestamp
+
+  static const int32_t kMinTimestamp = 1368146402;  // 05/09/2013:5:40PM GMT-8
+
+  static const int32_t kMaxTimestamp = 2147483647;  // 01/18/2038:7:14PM GMT-8
+
+  void SetTtl(int32_t ttl) override { SetTtl(DefaultColumnFamily(), ttl); }
+
+  void SetTtl(ColumnFamilyHandle* h, int32_t ttl) override;
+
+ private:
+  // remember whether the Close completes or not
+  bool closed_;
+};
+
+class TtlIterator : public Iterator {
+ public:
+  explicit TtlIterator(Iterator* iter) : iter_(iter) { assert(iter_); }
+
+  ~TtlIterator() { delete iter_; }
+
+  bool Valid() const override { return iter_->Valid(); }
+
+  void SeekToFirst() override { iter_->SeekToFirst(); }
+
+  void SeekToLast() override { iter_->SeekToLast(); }
+
+  void Seek(const Slice& target) override { iter_->Seek(target); }
+
+  void SeekForPrev(const Slice& target) override { iter_->SeekForPrev(target); }
+
+  void Next() override { iter_->Next(); }
+
+  void Prev() override { iter_->Prev(); }
+
+  Slice key() const override { return iter_->key(); }
+
+  int32_t ttl_timestamp() const {
+    return DecodeFixed32(iter_->value().data() + iter_->value().size() -
+                         DBWithTTLImpl::kTSLength);
+  }
+
+  Slice value() const override {
+    // TODO: handle timestamp corruption like in general iterator semantics
+    assert(DBWithTTLImpl::SanityCheckTimestamp(iter_->value()).ok());
+    Slice trimmed_value = iter_->value();
+    trimmed_value.size_ -= DBWithTTLImpl::kTSLength;
+    return trimmed_value;
+  }
+
+  Status status() const override { return iter_->status(); }
+
+ private:
+  Iterator* iter_;
+};
+
+class TtlCompactionFilter : public LayeredCompactionFilterBase {
+ public:
+  TtlCompactionFilter(int32_t ttl, SystemClock* clock,
+                      const CompactionFilter* _user_comp_filter,
+                      std::unique_ptr<const CompactionFilter>
+                          _user_comp_filter_from_factory = nullptr);
+
+  virtual bool Filter(int level, const Slice& key, const Slice& old_val,
+                      std::string* new_val, bool* value_changed) const override;
+
+  const char* Name() const override { return kClassName(); }
+  static const char* kClassName() { return "TtlCompactionFilter"; }
+  bool IsInstanceOf(const std::string& name) const override {
+    if (name == "Delete By TTL") {
+      return true;
+    } else {
+      return LayeredCompactionFilterBase::IsInstanceOf(name);
+    }
+  }
+
+  Status PrepareOptions(const ConfigOptions& config_options) override;
+  Status ValidateOptions(const DBOptions& db_opts,
+                         const ColumnFamilyOptions& cf_opts) const override;
+
+ private:
+  int32_t ttl_;
+  SystemClock* clock_;
+};
+
+class TtlCompactionFilterFactory : public CompactionFilterFactory {
+ public:
+  TtlCompactionFilterFactory(
+      int32_t ttl, SystemClock* clock,
+      std::shared_ptr<CompactionFilterFactory> comp_filter_factory);
+
+  std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+      const CompactionFilter::Context& context) override;
+  void SetTtl(int32_t ttl) { ttl_ = ttl; }
+
+  const char* Name() const override { return kClassName(); }
+  static const char* kClassName() { return "TtlCompactionFilterFactory"; }
+  Status PrepareOptions(const ConfigOptions& config_options) override;
+  Status ValidateOptions(const DBOptions& db_opts,
+                         const ColumnFamilyOptions& cf_opts) const override;
+  const Customizable* Inner() const override {
+    return user_comp_filter_factory_.get();
+  }
+
+ private:
+  int32_t ttl_;
+  SystemClock* clock_;
+  std::shared_ptr<CompactionFilterFactory> user_comp_filter_factory_;
+};
+
+class TtlMergeOperator : public MergeOperator {
+ public:
+  explicit TtlMergeOperator(const std::shared_ptr<MergeOperator>& merge_op,
+                            SystemClock* clock);
+
+  bool FullMergeV2(const MergeOperationInput& merge_in,
+                   MergeOperationOutput* merge_out) const override;
+
+  bool PartialMergeMulti(const Slice& key,
+                         const std::deque<Slice>& operand_list,
+                         std::string* new_value, Logger* logger) const override;
+
+  static const char* kClassName() { return "TtlMergeOperator"; }
+
+  const char* Name() const override { return kClassName(); }
+  bool IsInstanceOf(const std::string& name) const override {
+    if (name == "Merge By TTL") {
+      return true;
+    } else {
+      return MergeOperator::IsInstanceOf(name);
+    }
+  }
+
+  Status PrepareOptions(const ConfigOptions& config_options) override;
+  Status ValidateOptions(const DBOptions& db_opts,
+                         const ColumnFamilyOptions& cf_opts) const override;
+  const Customizable* Inner() const override { return user_merge_op_.get(); }
+
+ private:
+  std::shared_ptr<MergeOperator> user_merge_op_;
+  SystemClock* clock_;
+};
+extern "C" {
+int RegisterTtlObjects(ObjectLibrary& library, const std::string& /*arg*/);
+}  // extern "C"
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/wal_filter.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/wal_filter.cc
new file mode 100644
index 0000000000..9fa36bf272
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/wal_filter.cc
@@ -0,0 +1,22 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/wal_filter.h"
+
+#include <memory>
+
+#include "rocksdb/convenience.h"
+#include "rocksdb/options.h"
+#include "rocksdb/utilities/customizable_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+Status WalFilter::CreateFromString(const ConfigOptions& config_options,
+                                   const std::string& value,
+                                   WalFilter** filter) {
+  Status s = LoadStaticObject<WalFilter>(config_options, value, filter);
+  return s;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/write_batch_with_index/write_batch_with_index.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/write_batch_with_index/write_batch_with_index.cc
new file mode 100644
index 0000000000..f20b7e5e36
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/write_batch_with_index/write_batch_with_index.cc
@@ -0,0 +1,694 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "rocksdb/utilities/write_batch_with_index.h"
+
+#include <memory>
+
+#include "db/column_family.h"
+#include "db/db_impl/db_impl.h"
+#include "db/merge_context.h"
+#include "db/merge_helper.h"
+#include "memory/arena.h"
+#include "memtable/skiplist.h"
+#include "options/db_options.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/iterator.h"
+#include "util/cast_util.h"
+#include "util/string_util.h"
+#include "utilities/write_batch_with_index/write_batch_with_index_internal.h"
+
+namespace ROCKSDB_NAMESPACE {
+struct WriteBatchWithIndex::Rep {
+  explicit Rep(const Comparator* index_comparator, size_t reserved_bytes = 0,
+               size_t max_bytes = 0, bool _overwrite_key = false,
+               size_t protection_bytes_per_key = 0)
+      : write_batch(reserved_bytes, max_bytes, protection_bytes_per_key,
+                    index_comparator ? index_comparator->timestamp_size() : 0),
+        comparator(index_comparator, &write_batch),
+        skip_list(comparator, &arena),
+        overwrite_key(_overwrite_key),
+        last_entry_offset(0),
+        last_sub_batch_offset(0),
+        sub_batch_cnt(1) {}
+  ReadableWriteBatch write_batch;
+  WriteBatchEntryComparator comparator;
+  Arena arena;
+  WriteBatchEntrySkipList skip_list;
+  bool overwrite_key;
+  size_t last_entry_offset;
+  // The starting offset of the last sub-batch. A sub-batch starts right before
+  // inserting a key that is a duplicate of a key in the last sub-batch. Zero,
+  // the default, means that no duplicate key is detected so far.
+  size_t last_sub_batch_offset;
+  // Total number of sub-batches in the write batch. Default is 1.
+  size_t sub_batch_cnt;
+
+  // Remember current offset of internal write batch, which is used as
+  // the starting offset of the next record.
+  void SetLastEntryOffset() { last_entry_offset = write_batch.GetDataSize(); }
+
+  // In overwrite mode, find the existing entry for the same key and update it
+  // to point to the current entry.
+  // Return true if the key is found and updated.
+  bool UpdateExistingEntry(ColumnFamilyHandle* column_family, const Slice& key,
+                           WriteType type);
+  bool UpdateExistingEntryWithCfId(uint32_t column_family_id, const Slice& key,
+                                   WriteType type);
+
+  // Add the recent entry to the update.
+  // In overwrite mode, if key already exists in the index, update it.
+  void AddOrUpdateIndex(ColumnFamilyHandle* column_family, const Slice& key,
+                        WriteType type);
+  void AddOrUpdateIndex(const Slice& key, WriteType type);
+
+  // Allocate an index entry pointing to the last entry in the write batch and
+  // put it to skip list.
+  void AddNewEntry(uint32_t column_family_id);
+
+  // Clear all updates buffered in this batch.
+  void Clear();
+  void ClearIndex();
+
+  // Rebuild index by reading all records from the batch.
+  // Returns non-ok status on corruption.
+  Status ReBuildIndex();
+};
+
+bool WriteBatchWithIndex::Rep::UpdateExistingEntry(
+    ColumnFamilyHandle* column_family, const Slice& key, WriteType type) {
+  uint32_t cf_id = GetColumnFamilyID(column_family);
+  return UpdateExistingEntryWithCfId(cf_id, key, type);
+}
+
+bool WriteBatchWithIndex::Rep::UpdateExistingEntryWithCfId(
+    uint32_t column_family_id, const Slice& key, WriteType type) {
+  if (!overwrite_key) {
+    return false;
+  }
+
+  WBWIIteratorImpl iter(column_family_id, &skip_list, &write_batch,
+                        &comparator);
+  iter.Seek(key);
+  if (!iter.Valid()) {
+    return false;
+  } else if (!iter.MatchesKey(column_family_id, key)) {
+    return false;
+  } else {
+    // Move to the end of this key (NextKey-Prev)
+    iter.NextKey();  // Move to the next key
+    if (iter.Valid()) {
+      iter.Prev();  // Move back one entry
+    } else {
+      iter.SeekToLast();
+    }
+  }
+  WriteBatchIndexEntry* non_const_entry =
+      const_cast<WriteBatchIndexEntry*>(iter.GetRawEntry());
+  if (LIKELY(last_sub_batch_offset <= non_const_entry->offset)) {
+    last_sub_batch_offset = last_entry_offset;
+    sub_batch_cnt++;
+  }
+  if (type == kMergeRecord) {
+    return false;
+  } else {
+    non_const_entry->offset = last_entry_offset;
+    return true;
+  }
+}
+
+void WriteBatchWithIndex::Rep::AddOrUpdateIndex(
+    ColumnFamilyHandle* column_family, const Slice& key, WriteType type) {
+  if (!UpdateExistingEntry(column_family, key, type)) {
+    uint32_t cf_id = GetColumnFamilyID(column_family);
+    const auto* cf_cmp = GetColumnFamilyUserComparator(column_family);
+    if (cf_cmp != nullptr) {
+      comparator.SetComparatorForCF(cf_id, cf_cmp);
+    }
+    AddNewEntry(cf_id);
+  }
+}
+
+void WriteBatchWithIndex::Rep::AddOrUpdateIndex(const Slice& key,
+                                                WriteType type) {
+  if (!UpdateExistingEntryWithCfId(0, key, type)) {
+    AddNewEntry(0);
+  }
+}
+
+void WriteBatchWithIndex::Rep::AddNewEntry(uint32_t column_family_id) {
+  const std::string& wb_data = write_batch.Data();
+  Slice entry_ptr = Slice(wb_data.data() + last_entry_offset,
+                          wb_data.size() - last_entry_offset);
+  // Extract key
+  Slice key;
+  bool success =
+      ReadKeyFromWriteBatchEntry(&entry_ptr, &key, column_family_id != 0);
+#ifdef NDEBUG
+  (void)success;
+#endif
+  assert(success);
+
+  const Comparator* const ucmp = comparator.GetComparator(column_family_id);
+  size_t ts_sz = ucmp ? ucmp->timestamp_size() : 0;
+
+  if (ts_sz > 0) {
+    key.remove_suffix(ts_sz);
+  }
+
+  auto* mem = arena.Allocate(sizeof(WriteBatchIndexEntry));
+  auto* index_entry =
+      new (mem) WriteBatchIndexEntry(last_entry_offset, column_family_id,
+                                     key.data() - wb_data.data(), key.size());
+  skip_list.Insert(index_entry);
+}
+
+void WriteBatchWithIndex::Rep::Clear() {
+  write_batch.Clear();
+  ClearIndex();
+}
+
+void WriteBatchWithIndex::Rep::ClearIndex() {
+  skip_list.~WriteBatchEntrySkipList();
+  arena.~Arena();
+  new (&arena) Arena();
+  new (&skip_list) WriteBatchEntrySkipList(comparator, &arena);
+  last_entry_offset = 0;
+  last_sub_batch_offset = 0;
+  sub_batch_cnt = 1;
+}
+
+Status WriteBatchWithIndex::Rep::ReBuildIndex() {
+  Status s;
+
+  ClearIndex();
+
+  if (write_batch.Count() == 0) {
+    // Nothing to re-index
+    return s;
+  }
+
+  size_t offset = WriteBatchInternal::GetFirstOffset(&write_batch);
+
+  Slice input(write_batch.Data());
+  input.remove_prefix(offset);
+
+  // Loop through all entries in Rep and add each one to the index
+  uint32_t found = 0;
+  while (s.ok() && !input.empty()) {
+    Slice key, value, blob, xid;
+    uint32_t column_family_id = 0;  // default
+    char tag = 0;
+
+    // set offset of current entry for call to AddNewEntry()
+    last_entry_offset = input.data() - write_batch.Data().data();
+
+    s = ReadRecordFromWriteBatch(&input, &tag, &column_family_id, &key, &value,
+                                 &blob, &xid);
+    if (!s.ok()) {
+      break;
+    }
+
+    switch (tag) {
+      case kTypeColumnFamilyValue:
+      case kTypeValue:
+        found++;
+        if (!UpdateExistingEntryWithCfId(column_family_id, key, kPutRecord)) {
+          AddNewEntry(column_family_id);
+        }
+        break;
+      case kTypeColumnFamilyDeletion:
+      case kTypeDeletion:
+        found++;
+        if (!UpdateExistingEntryWithCfId(column_family_id, key,
+                                         kDeleteRecord)) {
+          AddNewEntry(column_family_id);
+        }
+        break;
+      case kTypeColumnFamilySingleDeletion:
+      case kTypeSingleDeletion:
+        found++;
+        if (!UpdateExistingEntryWithCfId(column_family_id, key,
+                                         kSingleDeleteRecord)) {
+          AddNewEntry(column_family_id);
+        }
+        break;
+      case kTypeColumnFamilyMerge:
+      case kTypeMerge:
+        found++;
+        if (!UpdateExistingEntryWithCfId(column_family_id, key, kMergeRecord)) {
+          AddNewEntry(column_family_id);
+        }
+        break;
+      case kTypeLogData:
+      case kTypeBeginPrepareXID:
+      case kTypeBeginPersistedPrepareXID:
+      case kTypeBeginUnprepareXID:
+      case kTypeEndPrepareXID:
+      case kTypeCommitXID:
+      case kTypeCommitXIDAndTimestamp:
+      case kTypeRollbackXID:
+      case kTypeNoop:
+        break;
+      default:
+        return Status::Corruption(
+            "unknown WriteBatch tag in ReBuildIndex",
+            std::to_string(static_cast<unsigned int>(tag)));
+    }
+  }
+
+  if (s.ok() && found != write_batch.Count()) {
+    s = Status::Corruption("WriteBatch has wrong count");
+  }
+
+  return s;
+}
+
+WriteBatchWithIndex::WriteBatchWithIndex(
+    const Comparator* default_index_comparator, size_t reserved_bytes,
+    bool overwrite_key, size_t max_bytes, size_t protection_bytes_per_key)
+    : rep(new Rep(default_index_comparator, reserved_bytes, max_bytes,
+                  overwrite_key, protection_bytes_per_key)) {}
+
+WriteBatchWithIndex::~WriteBatchWithIndex() {}
+
+WriteBatchWithIndex::WriteBatchWithIndex(WriteBatchWithIndex&&) = default;
+
+WriteBatchWithIndex& WriteBatchWithIndex::operator=(WriteBatchWithIndex&&) =
+    default;
+
+WriteBatch* WriteBatchWithIndex::GetWriteBatch() { return &rep->write_batch; }
+
+size_t WriteBatchWithIndex::SubBatchCnt() { return rep->sub_batch_cnt; }
+
+WBWIIterator* WriteBatchWithIndex::NewIterator() {
+  return new WBWIIteratorImpl(0, &(rep->skip_list), &rep->write_batch,
+                              &(rep->comparator));
+}
+
+WBWIIterator* WriteBatchWithIndex::NewIterator(
+    ColumnFamilyHandle* column_family) {
+  return new WBWIIteratorImpl(GetColumnFamilyID(column_family),
+                              &(rep->skip_list), &rep->write_batch,
+                              &(rep->comparator));
+}
+
+Iterator* WriteBatchWithIndex::NewIteratorWithBase(
+    ColumnFamilyHandle* column_family, Iterator* base_iterator,
+    const ReadOptions* read_options) {
+  auto wbwiii =
+      new WBWIIteratorImpl(GetColumnFamilyID(column_family), &(rep->skip_list),
+                           &rep->write_batch, &rep->comparator);
+  return new BaseDeltaIterator(column_family, base_iterator, wbwiii,
+                               GetColumnFamilyUserComparator(column_family),
+                               read_options);
+}
+
+Iterator* WriteBatchWithIndex::NewIteratorWithBase(Iterator* base_iterator) {
+  // default column family's comparator
+  auto wbwiii = new WBWIIteratorImpl(0, &(rep->skip_list), &rep->write_batch,
+                                     &rep->comparator);
+  return new BaseDeltaIterator(nullptr, base_iterator, wbwiii,
+                               rep->comparator.default_comparator());
+}
+
+Status WriteBatchWithIndex::Put(ColumnFamilyHandle* column_family,
+                                const Slice& key, const Slice& value) {
+  rep->SetLastEntryOffset();
+  auto s = rep->write_batch.Put(column_family, key, value);
+  if (s.ok()) {
+    rep->AddOrUpdateIndex(column_family, key, kPutRecord);
+  }
+  return s;
+}
+
+Status WriteBatchWithIndex::Put(const Slice& key, const Slice& value) {
+  rep->SetLastEntryOffset();
+  auto s = rep->write_batch.Put(key, value);
+  if (s.ok()) {
+    rep->AddOrUpdateIndex(key, kPutRecord);
+  }
+  return s;
+}
+
+Status WriteBatchWithIndex::Put(ColumnFamilyHandle* column_family,
+                                const Slice& /*key*/, const Slice& /*ts*/,
+                                const Slice& /*value*/) {
+  if (!column_family) {
+    return Status::InvalidArgument("column family handle cannot be nullptr");
+  }
+  // TODO: support WBWI::Put() with timestamp.
+  return Status::NotSupported();
+}
+
+Status WriteBatchWithIndex::Delete(ColumnFamilyHandle* column_family,
+                                   const Slice& key) {
+  rep->SetLastEntryOffset();
+  auto s = rep->write_batch.Delete(column_family, key);
+  if (s.ok()) {
+    rep->AddOrUpdateIndex(column_family, key, kDeleteRecord);
+  }
+  return s;
+}
+
+Status WriteBatchWithIndex::Delete(const Slice& key) {
+  rep->SetLastEntryOffset();
+  auto s = rep->write_batch.Delete(key);
+  if (s.ok()) {
+    rep->AddOrUpdateIndex(key, kDeleteRecord);
+  }
+  return s;
+}
+
+Status WriteBatchWithIndex::Delete(ColumnFamilyHandle* column_family,
+                                   const Slice& /*key*/, const Slice& /*ts*/) {
+  if (!column_family) {
+    return Status::InvalidArgument("column family handle cannot be nullptr");
+  }
+  // TODO: support WBWI::Delete() with timestamp.
+  return Status::NotSupported();
+}
+
+Status WriteBatchWithIndex::SingleDelete(ColumnFamilyHandle* column_family,
+                                         const Slice& key) {
+  rep->SetLastEntryOffset();
+  auto s = rep->write_batch.SingleDelete(column_family, key);
+  if (s.ok()) {
+    rep->AddOrUpdateIndex(column_family, key, kSingleDeleteRecord);
+  }
+  return s;
+}
+
+Status WriteBatchWithIndex::SingleDelete(const Slice& key) {
+  rep->SetLastEntryOffset();
+  auto s = rep->write_batch.SingleDelete(key);
+  if (s.ok()) {
+    rep->AddOrUpdateIndex(key, kSingleDeleteRecord);
+  }
+  return s;
+}
+
+Status WriteBatchWithIndex::SingleDelete(ColumnFamilyHandle* column_family,
+                                         const Slice& /*key*/,
+                                         const Slice& /*ts*/) {
+  if (!column_family) {
+    return Status::InvalidArgument("column family handle cannot be nullptr");
+  }
+  // TODO: support WBWI::SingleDelete() with timestamp.
+  return Status::NotSupported();
+}
+
+Status WriteBatchWithIndex::Merge(ColumnFamilyHandle* column_family,
+                                  const Slice& key, const Slice& value) {
+  rep->SetLastEntryOffset();
+  auto s = rep->write_batch.Merge(column_family, key, value);
+  if (s.ok()) {
+    rep->AddOrUpdateIndex(column_family, key, kMergeRecord);
+  }
+  return s;
+}
+
+Status WriteBatchWithIndex::Merge(const Slice& key, const Slice& value) {
+  rep->SetLastEntryOffset();
+  auto s = rep->write_batch.Merge(key, value);
+  if (s.ok()) {
+    rep->AddOrUpdateIndex(key, kMergeRecord);
+  }
+  return s;
+}
+
+Status WriteBatchWithIndex::PutLogData(const Slice& blob) {
+  return rep->write_batch.PutLogData(blob);
+}
+
+void WriteBatchWithIndex::Clear() { rep->Clear(); }
+
+Status WriteBatchWithIndex::GetFromBatch(ColumnFamilyHandle* column_family,
+                                         const DBOptions& options,
+                                         const Slice& key, std::string* value) {
+  Status s;
+  WriteBatchWithIndexInternal wbwii(&options, column_family);
+  auto result = wbwii.GetFromBatch(this, key, value, &s);
+
+  switch (result) {
+    case WBWIIteratorImpl::kFound:
+    case WBWIIteratorImpl::kError:
+      // use returned status
+      break;
+    case WBWIIteratorImpl::kDeleted:
+    case WBWIIteratorImpl::kNotFound:
+      s = Status::NotFound();
+      break;
+    case WBWIIteratorImpl::kMergeInProgress:
+      s = Status::MergeInProgress();
+      break;
+    default:
+      assert(false);
+  }
+
+  return s;
+}
+
+Status WriteBatchWithIndex::GetFromBatchAndDB(DB* db,
+                                              const ReadOptions& read_options,
+                                              const Slice& key,
+                                              std::string* value) {
+  assert(value != nullptr);
+  PinnableSlice pinnable_val(value);
+  assert(!pinnable_val.IsPinned());
+  auto s = GetFromBatchAndDB(db, read_options, db->DefaultColumnFamily(), key,
+                             &pinnable_val);
+  if (s.ok() && pinnable_val.IsPinned()) {
+    value->assign(pinnable_val.data(), pinnable_val.size());
+  }  // else value is already assigned
+  return s;
+}
+
+Status WriteBatchWithIndex::GetFromBatchAndDB(DB* db,
+                                              const ReadOptions& read_options,
+                                              const Slice& key,
+                                              PinnableSlice* pinnable_val) {
+  return GetFromBatchAndDB(db, read_options, db->DefaultColumnFamily(), key,
+                           pinnable_val);
+}
+
+Status WriteBatchWithIndex::GetFromBatchAndDB(DB* db,
+                                              const ReadOptions& read_options,
+                                              ColumnFamilyHandle* column_family,
+                                              const Slice& key,
+                                              std::string* value) {
+  assert(value != nullptr);
+  PinnableSlice pinnable_val(value);
+  assert(!pinnable_val.IsPinned());
+  auto s =
+      GetFromBatchAndDB(db, read_options, column_family, key, &pinnable_val);
+  if (s.ok() && pinnable_val.IsPinned()) {
+    value->assign(pinnable_val.data(), pinnable_val.size());
+  }  // else value is already assigned
+  return s;
+}
+
+Status WriteBatchWithIndex::GetFromBatchAndDB(DB* db,
+                                              const ReadOptions& read_options,
+                                              ColumnFamilyHandle* column_family,
+                                              const Slice& key,
+                                              PinnableSlice* pinnable_val) {
+  return GetFromBatchAndDB(db, read_options, column_family, key, pinnable_val,
+                           nullptr);
+}
+
+Status WriteBatchWithIndex::GetFromBatchAndDB(
+    DB* db, const ReadOptions& read_options, ColumnFamilyHandle* column_family,
+    const Slice& key, PinnableSlice* pinnable_val, ReadCallback* callback) {
+  const Comparator* const ucmp = rep->comparator.GetComparator(column_family);
+  size_t ts_sz = ucmp ? ucmp->timestamp_size() : 0;
+  if (ts_sz > 0 && !read_options.timestamp) {
+    return Status::InvalidArgument("Must specify timestamp");
+  }
+
+  Status s;
+  WriteBatchWithIndexInternal wbwii(db, column_family);
+
+  // Since the lifetime of the WriteBatch is the same as that of the transaction
+  // we cannot pin it as otherwise the returned value will not be available
+  // after the transaction finishes.
+  std::string& batch_value = *pinnable_val->GetSelf();
+  auto result = wbwii.GetFromBatch(this, key, &batch_value, &s);
+
+  if (result == WBWIIteratorImpl::kFound) {
+    pinnable_val->PinSelf();
+    return s;
+  } else if (!s.ok() || result == WBWIIteratorImpl::kError) {
+    return s;
+  } else if (result == WBWIIteratorImpl::kDeleted) {
+    return Status::NotFound();
+  }
+  assert(result == WBWIIteratorImpl::kMergeInProgress ||
+         result == WBWIIteratorImpl::kNotFound);
+
+  // Did not find key in batch OR could not resolve Merges.  Try DB.
+  if (!callback) {
+    s = db->Get(read_options, column_family, key, pinnable_val);
+  } else {
+    DBImpl::GetImplOptions get_impl_options;
+    get_impl_options.column_family = column_family;
+    get_impl_options.value = pinnable_val;
+    get_impl_options.callback = callback;
+    s = static_cast_with_check<DBImpl>(db->GetRootDB())
+            ->GetImpl(read_options, key, get_impl_options);
+  }
+
+  if (s.ok() || s.IsNotFound()) {  // DB Get Succeeded
+    if (result == WBWIIteratorImpl::kMergeInProgress) {
+      // Merge result from DB with merges in Batch
+      std::string merge_result;
+      if (s.ok()) {
+        s = wbwii.MergeKey(key, pinnable_val, &merge_result);
+      } else {  // Key not present in db (s.IsNotFound())
+        s = wbwii.MergeKey(key, nullptr, &merge_result);
+      }
+      if (s.ok()) {
+        pinnable_val->Reset();
+        *pinnable_val->GetSelf() = std::move(merge_result);
+        pinnable_val->PinSelf();
+      }
+    }
+  }
+
+  return s;
+}
+
+void WriteBatchWithIndex::MultiGetFromBatchAndDB(
+    DB* db, const ReadOptions& read_options, ColumnFamilyHandle* column_family,
+    const size_t num_keys, const Slice* keys, PinnableSlice* values,
+    Status* statuses, bool sorted_input) {
+  MultiGetFromBatchAndDB(db, read_options, column_family, num_keys, keys,
+                         values, statuses, sorted_input, nullptr);
+}
+
+void WriteBatchWithIndex::MultiGetFromBatchAndDB(
+    DB* db, const ReadOptions& read_options, ColumnFamilyHandle* column_family,
+    const size_t num_keys, const Slice* keys, PinnableSlice* values,
+    Status* statuses, bool sorted_input, ReadCallback* callback) {
+  const Comparator* const ucmp = rep->comparator.GetComparator(column_family);
+  size_t ts_sz = ucmp ? ucmp->timestamp_size() : 0;
+  if (ts_sz > 0 && !read_options.timestamp) {
+    for (size_t i = 0; i < num_keys; ++i) {
+      statuses[i] = Status::InvalidArgument("Must specify timestamp");
+    }
+    return;
+  }
+
+  WriteBatchWithIndexInternal wbwii(db, column_family);
+
+  autovector<KeyContext, MultiGetContext::MAX_BATCH_SIZE> key_context;
+  autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE> sorted_keys;
+  // To hold merges from the write batch
+  autovector<std::pair<WBWIIteratorImpl::Result, MergeContext>,
+             MultiGetContext::MAX_BATCH_SIZE>
+      merges;
+  // Since the lifetime of the WriteBatch is the same as that of the transaction
+  // we cannot pin it as otherwise the returned value will not be available
+  // after the transaction finishes.
+  for (size_t i = 0; i < num_keys; ++i) {
+    MergeContext merge_context;
+    std::string batch_value;
+    Status* s = &statuses[i];
+    PinnableSlice* pinnable_val = &values[i];
+    pinnable_val->Reset();
+    auto result =
+        wbwii.GetFromBatch(this, keys[i], &merge_context, &batch_value, s);
+
+    if (result == WBWIIteratorImpl::kFound) {
+      *pinnable_val->GetSelf() = std::move(batch_value);
+      pinnable_val->PinSelf();
+      continue;
+    }
+    if (result == WBWIIteratorImpl::kDeleted) {
+      *s = Status::NotFound();
+      continue;
+    }
+    if (result == WBWIIteratorImpl::kError) {
+      continue;
+    }
+    assert(result == WBWIIteratorImpl::kMergeInProgress ||
+           result == WBWIIteratorImpl::kNotFound);
+    key_context.emplace_back(column_family, keys[i], &values[i],
+                             /* columns */ nullptr, /* timestamp */ nullptr,
+                             &statuses[i]);
+    merges.emplace_back(result, std::move(merge_context));
+  }
+
+  for (KeyContext& key : key_context) {
+    sorted_keys.emplace_back(&key);
+  }
+
+  // Did not find key in batch OR could not resolve Merges.  Try DB.
+  static_cast_with_check<DBImpl>(db->GetRootDB())
+      ->PrepareMultiGetKeys(key_context.size(), sorted_input, &sorted_keys);
+  static_cast_with_check<DBImpl>(db->GetRootDB())
+      ->MultiGetWithCallback(read_options, column_family, callback,
+                             &sorted_keys);
+
+  for (auto iter = key_context.begin(); iter != key_context.end(); ++iter) {
+    KeyContext& key = *iter;
+    if (key.s->ok() || key.s->IsNotFound()) {  // DB Get Succeeded
+      size_t index = iter - key_context.begin();
+      std::pair<WBWIIteratorImpl::Result, MergeContext>& merge_result =
+          merges[index];
+      if (merge_result.first == WBWIIteratorImpl::kMergeInProgress) {
+        std::string merged_value;
+        // Merge result from DB with merges in Batch
+        if (key.s->ok()) {
+          *key.s = wbwii.MergeKey(*key.key, iter->value, merge_result.second,
+                                  &merged_value);
+        } else {  // Key not present in db (s.IsNotFound())
+          *key.s = wbwii.MergeKey(*key.key, nullptr, merge_result.second,
+                                  &merged_value);
+        }
+        if (key.s->ok()) {
+          key.value->Reset();
+          *key.value->GetSelf() = std::move(merged_value);
+          key.value->PinSelf();
+        }
+      }
+    }
+  }
+}
+
+void WriteBatchWithIndex::SetSavePoint() { rep->write_batch.SetSavePoint(); }
+
+Status WriteBatchWithIndex::RollbackToSavePoint() {
+  Status s = rep->write_batch.RollbackToSavePoint();
+
+  if (s.ok()) {
+    rep->sub_batch_cnt = 1;
+    rep->last_sub_batch_offset = 0;
+    s = rep->ReBuildIndex();
+  }
+
+  return s;
+}
+
+Status WriteBatchWithIndex::PopSavePoint() {
+  return rep->write_batch.PopSavePoint();
+}
+
+void WriteBatchWithIndex::SetMaxBytes(size_t max_bytes) {
+  rep->write_batch.SetMaxBytes(max_bytes);
+}
+
+size_t WriteBatchWithIndex::GetDataSize() const {
+  return rep->write_batch.GetDataSize();
+}
+
+const Comparator* WriteBatchWithIndexInternal::GetUserComparator(
+    const WriteBatchWithIndex& wbwi, uint32_t cf_id) {
+  const WriteBatchEntryComparator& ucmps = wbwi.rep->comparator;
+  return ucmps.GetComparator(cf_id);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/write_batch_with_index/write_batch_with_index_internal.cc b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/write_batch_with_index/write_batch_with_index_internal.cc
new file mode 100644
index 0000000000..ee4754f8d1
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/write_batch_with_index/write_batch_with_index_internal.cc
@@ -0,0 +1,742 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+
+#include "utilities/write_batch_with_index/write_batch_with_index_internal.h"
+
+#include "db/column_family.h"
+#include "db/db_impl/db_impl.h"
+#include "db/merge_context.h"
+#include "db/merge_helper.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/db.h"
+#include "rocksdb/utilities/write_batch_with_index.h"
+#include "util/cast_util.h"
+#include "util/coding.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+BaseDeltaIterator::BaseDeltaIterator(ColumnFamilyHandle* column_family,
+                                     Iterator* base_iterator,
+                                     WBWIIteratorImpl* delta_iterator,
+                                     const Comparator* comparator,
+                                     const ReadOptions* read_options)
+    : forward_(true),
+      current_at_base_(true),
+      equal_keys_(false),
+      status_(Status::OK()),
+      base_iterator_(base_iterator),
+      delta_iterator_(delta_iterator),
+      comparator_(comparator),
+      iterate_upper_bound_(read_options ? read_options->iterate_upper_bound
+                                        : nullptr) {
+  assert(comparator_);
+  wbwii_.reset(new WriteBatchWithIndexInternal(column_family));
+}
+
+bool BaseDeltaIterator::Valid() const {
+  return status_.ok() ? (current_at_base_ ? BaseValid() : DeltaValid()) : false;
+}
+
+void BaseDeltaIterator::SeekToFirst() {
+  forward_ = true;
+  base_iterator_->SeekToFirst();
+  delta_iterator_->SeekToFirst();
+  UpdateCurrent();
+}
+
+void BaseDeltaIterator::SeekToLast() {
+  forward_ = false;
+  base_iterator_->SeekToLast();
+  delta_iterator_->SeekToLast();
+  UpdateCurrent();
+}
+
+void BaseDeltaIterator::Seek(const Slice& k) {
+  forward_ = true;
+  base_iterator_->Seek(k);
+  delta_iterator_->Seek(k);
+  UpdateCurrent();
+}
+
+void BaseDeltaIterator::SeekForPrev(const Slice& k) {
+  forward_ = false;
+  base_iterator_->SeekForPrev(k);
+  delta_iterator_->SeekForPrev(k);
+  UpdateCurrent();
+}
+
+void BaseDeltaIterator::Next() {
+  if (!Valid()) {
+    status_ = Status::NotSupported("Next() on invalid iterator");
+    return;
+  }
+
+  if (!forward_) {
+    // Need to change direction
+    // if our direction was backward and we're not equal, we have two states:
+    // * both iterators are valid: we're already in a good state (current
+    // shows to smaller)
+    // * only one iterator is valid: we need to advance that iterator
+    forward_ = true;
+    equal_keys_ = false;
+    if (!BaseValid()) {
+      assert(DeltaValid());
+      base_iterator_->SeekToFirst();
+    } else if (!DeltaValid()) {
+      delta_iterator_->SeekToFirst();
+    } else if (current_at_base_) {
+      // Change delta from larger than base to smaller
+      AdvanceDelta();
+    } else {
+      // Change base from larger than delta to smaller
+      AdvanceBase();
+    }
+    if (DeltaValid() && BaseValid()) {
+      if (0 == comparator_->CompareWithoutTimestamp(
+                   delta_iterator_->Entry().key, /*a_has_ts=*/false,
+                   base_iterator_->key(), /*b_has_ts=*/false)) {
+        equal_keys_ = true;
+      }
+    }
+  }
+  Advance();
+}
+
+void BaseDeltaIterator::Prev() {
+  if (!Valid()) {
+    status_ = Status::NotSupported("Prev() on invalid iterator");
+    return;
+  }
+
+  if (forward_) {
+    // Need to change direction
+    // if our direction was backward and we're not equal, we have two states:
+    // * both iterators are valid: we're already in a good state (current
+    // shows to smaller)
+    // * only one iterator is valid: we need to advance that iterator
+    forward_ = false;
+    equal_keys_ = false;
+    if (!BaseValid()) {
+      assert(DeltaValid());
+      base_iterator_->SeekToLast();
+    } else if (!DeltaValid()) {
+      delta_iterator_->SeekToLast();
+    } else if (current_at_base_) {
+      // Change delta from less advanced than base to more advanced
+      AdvanceDelta();
+    } else {
+      // Change base from less advanced than delta to more advanced
+      AdvanceBase();
+    }
+    if (DeltaValid() && BaseValid()) {
+      if (0 == comparator_->CompareWithoutTimestamp(
+                   delta_iterator_->Entry().key, /*a_has_ts=*/false,
+                   base_iterator_->key(), /*b_has_ts=*/false)) {
+        equal_keys_ = true;
+      }
+    }
+  }
+
+  Advance();
+}
+
+Slice BaseDeltaIterator::key() const {
+  return current_at_base_ ? base_iterator_->key()
+                          : delta_iterator_->Entry().key;
+}
+
+Slice BaseDeltaIterator::value() const {
+  if (current_at_base_) {
+    return base_iterator_->value();
+  } else {
+    WriteEntry delta_entry = delta_iterator_->Entry();
+    if (wbwii_->GetNumOperands() == 0) {
+      return delta_entry.value;
+    } else if (delta_entry.type == kDeleteRecord ||
+               delta_entry.type == kSingleDeleteRecord) {
+      status_ =
+          wbwii_->MergeKey(delta_entry.key, nullptr, merge_result_.GetSelf());
+    } else if (delta_entry.type == kPutRecord) {
+      status_ = wbwii_->MergeKey(delta_entry.key, &delta_entry.value,
+                                 merge_result_.GetSelf());
+    } else if (delta_entry.type == kMergeRecord) {
+      if (equal_keys_) {
+        Slice base_value = base_iterator_->value();
+        status_ = wbwii_->MergeKey(delta_entry.key, &base_value,
+                                   merge_result_.GetSelf());
+      } else {
+        status_ =
+            wbwii_->MergeKey(delta_entry.key, nullptr, merge_result_.GetSelf());
+      }
+    }
+    merge_result_.PinSelf();
+    return merge_result_;
+  }
+}
+
+Status BaseDeltaIterator::status() const {
+  if (!status_.ok()) {
+    return status_;
+  }
+  if (!base_iterator_->status().ok()) {
+    return base_iterator_->status();
+  }
+  return delta_iterator_->status();
+}
+
+void BaseDeltaIterator::Invalidate(Status s) { status_ = s; }
+
+void BaseDeltaIterator::AssertInvariants() {
+#ifndef NDEBUG
+  bool not_ok = false;
+  if (!base_iterator_->status().ok()) {
+    assert(!base_iterator_->Valid());
+    not_ok = true;
+  }
+  if (!delta_iterator_->status().ok()) {
+    assert(!delta_iterator_->Valid());
+    not_ok = true;
+  }
+  if (not_ok) {
+    assert(!Valid());
+    assert(!status().ok());
+    return;
+  }
+
+  if (!Valid()) {
+    return;
+  }
+  if (!BaseValid()) {
+    assert(!current_at_base_ && delta_iterator_->Valid());
+    return;
+  }
+  if (!DeltaValid()) {
+    assert(current_at_base_ && base_iterator_->Valid());
+    return;
+  }
+  // we don't support those yet
+  assert(delta_iterator_->Entry().type != kMergeRecord &&
+         delta_iterator_->Entry().type != kLogDataRecord);
+  int compare = comparator_->CompareWithoutTimestamp(
+      delta_iterator_->Entry().key, /*a_has_ts=*/false, base_iterator_->key(),
+      /*b_has_ts=*/false);
+  if (forward_) {
+    // current_at_base -> compare < 0
+    assert(!current_at_base_ || compare < 0);
+    // !current_at_base -> compare <= 0
+    assert(current_at_base_ && compare >= 0);
+  } else {
+    // current_at_base -> compare > 0
+    assert(!current_at_base_ || compare > 0);
+    // !current_at_base -> compare <= 0
+    assert(current_at_base_ && compare <= 0);
+  }
+  // equal_keys_ <=> compare == 0
+  assert((equal_keys_ || compare != 0) && (!equal_keys_ || compare == 0));
+#endif
+}
+
+void BaseDeltaIterator::Advance() {
+  if (equal_keys_) {
+    assert(BaseValid() && DeltaValid());
+    AdvanceBase();
+    AdvanceDelta();
+  } else {
+    if (current_at_base_) {
+      assert(BaseValid());
+      AdvanceBase();
+    } else {
+      assert(DeltaValid());
+      AdvanceDelta();
+    }
+  }
+  UpdateCurrent();
+}
+
+void BaseDeltaIterator::AdvanceDelta() {
+  if (forward_) {
+    delta_iterator_->NextKey();
+  } else {
+    delta_iterator_->PrevKey();
+  }
+}
+void BaseDeltaIterator::AdvanceBase() {
+  if (forward_) {
+    base_iterator_->Next();
+  } else {
+    base_iterator_->Prev();
+  }
+}
+
+bool BaseDeltaIterator::BaseValid() const { return base_iterator_->Valid(); }
+bool BaseDeltaIterator::DeltaValid() const { return delta_iterator_->Valid(); }
+void BaseDeltaIterator::UpdateCurrent() {
+// Suppress false positive clang analyzer warnings.
+#ifndef __clang_analyzer__
+  status_ = Status::OK();
+  while (true) {
+    auto delta_result = WBWIIteratorImpl::kNotFound;
+    WriteEntry delta_entry;
+    if (DeltaValid()) {
+      assert(delta_iterator_->status().ok());
+      delta_result =
+          delta_iterator_->FindLatestUpdate(wbwii_->GetMergeContext());
+      delta_entry = delta_iterator_->Entry();
+    } else if (!delta_iterator_->status().ok()) {
+      // Expose the error status and stop.
+      current_at_base_ = false;
+      return;
+    }
+    equal_keys_ = false;
+    if (!BaseValid()) {
+      if (!base_iterator_->status().ok()) {
+        // Expose the error status and stop.
+        current_at_base_ = true;
+        return;
+      }
+
+      // Base has finished.
+      if (!DeltaValid()) {
+        // Finished
+        return;
+      }
+      if (iterate_upper_bound_) {
+        if (comparator_->CompareWithoutTimestamp(
+                delta_entry.key, /*a_has_ts=*/false, *iterate_upper_bound_,
+                /*b_has_ts=*/false) >= 0) {
+          // out of upper bound -> finished.
+          return;
+        }
+      }
+      if (delta_result == WBWIIteratorImpl::kDeleted &&
+          wbwii_->GetNumOperands() == 0) {
+        AdvanceDelta();
+      } else {
+        current_at_base_ = false;
+        return;
+      }
+    } else if (!DeltaValid()) {
+      // Delta has finished.
+      current_at_base_ = true;
+      return;
+    } else {
+      int compare =
+          (forward_ ? 1 : -1) * comparator_->CompareWithoutTimestamp(
+                                    delta_entry.key, /*a_has_ts=*/false,
+                                    base_iterator_->key(), /*b_has_ts=*/false);
+      if (compare <= 0) {  // delta bigger or equal
+        if (compare == 0) {
+          equal_keys_ = true;
+        }
+        if (delta_result != WBWIIteratorImpl::kDeleted ||
+            wbwii_->GetNumOperands() > 0) {
+          current_at_base_ = false;
+          return;
+        }
+        // Delta is less advanced and is delete.
+        AdvanceDelta();
+        if (equal_keys_) {
+          AdvanceBase();
+        }
+      } else {
+        current_at_base_ = true;
+        return;
+      }
+    }
+  }
+
+  AssertInvariants();
+#endif  // __clang_analyzer__
+}
+
+void WBWIIteratorImpl::AdvanceKey(bool forward) {
+  if (Valid()) {
+    Slice key = Entry().key;
+    do {
+      if (forward) {
+        Next();
+      } else {
+        Prev();
+      }
+    } while (MatchesKey(column_family_id_, key));
+  }
+}
+
+void WBWIIteratorImpl::NextKey() { AdvanceKey(true); }
+
+void WBWIIteratorImpl::PrevKey() {
+  AdvanceKey(false);  // Move to the tail of the previous key
+  if (Valid()) {
+    AdvanceKey(false);  // Move back another key.  Now we are at the start of
+                        // the previous key
+    if (Valid()) {      // Still a valid
+      Next();           // Move forward one onto this key
+    } else {
+      SeekToFirst();  // Not valid, move to the start
+    }
+  }
+}
+
+WBWIIteratorImpl::Result WBWIIteratorImpl::FindLatestUpdate(
+    MergeContext* merge_context) {
+  if (Valid()) {
+    Slice key = Entry().key;
+    return FindLatestUpdate(key, merge_context);
+  } else {
+    merge_context->Clear();  // Clear any entries in the MergeContext
+    return WBWIIteratorImpl::kNotFound;
+  }
+}
+
+WBWIIteratorImpl::Result WBWIIteratorImpl::FindLatestUpdate(
+    const Slice& key, MergeContext* merge_context) {
+  Result result = WBWIIteratorImpl::kNotFound;
+  merge_context->Clear();  // Clear any entries in the MergeContext
+  // TODO(agiardullo): consider adding support for reverse iteration
+  if (!Valid()) {
+    return result;
+  } else if (comparator_->CompareKey(column_family_id_, Entry().key, key) !=
+             0) {
+    return result;
+  } else {
+    // We want to iterate in the reverse order that the writes were added to the
+    // batch.  Since we don't have a reverse iterator, we must seek past the
+    // end. We do this by seeking to the next key, and then back one step
+    NextKey();
+    if (Valid()) {
+      Prev();
+    } else {
+      SeekToLast();
+    }
+
+    // We are at the end of the iterator for this key.  Search backwards for the
+    // last Put or Delete, accumulating merges along the way.
+    while (Valid()) {
+      const WriteEntry entry = Entry();
+      if (comparator_->CompareKey(column_family_id_, entry.key, key) != 0) {
+        break;  // Unexpected error or we've reached a different next key
+      }
+
+      switch (entry.type) {
+        case kPutRecord:
+          return WBWIIteratorImpl::kFound;
+        case kDeleteRecord:
+          return WBWIIteratorImpl::kDeleted;
+        case kSingleDeleteRecord:
+          return WBWIIteratorImpl::kDeleted;
+        case kMergeRecord:
+          result = WBWIIteratorImpl::kMergeInProgress;
+          merge_context->PushOperand(entry.value);
+          break;
+        case kLogDataRecord:
+          break;  // ignore
+        case kXIDRecord:
+          break;  // ignore
+        default:
+          return WBWIIteratorImpl::kError;
+      }  // end switch statement
+      Prev();
+    }  // End while Valid()
+    // At this point, we have been through the whole list and found no Puts or
+    // Deletes. The iterator points to the previous key.  Move the iterator back
+    // onto this one.
+    if (Valid()) {
+      Next();
+    } else {
+      SeekToFirst();
+    }
+  }
+  return result;
+}
+
+Status ReadableWriteBatch::GetEntryFromDataOffset(size_t data_offset,
+                                                  WriteType* type, Slice* Key,
+                                                  Slice* value, Slice* blob,
+                                                  Slice* xid) const {
+  if (type == nullptr || Key == nullptr || value == nullptr ||
+      blob == nullptr || xid == nullptr) {
+    return Status::InvalidArgument("Output parameters cannot be null");
+  }
+
+  if (data_offset == GetDataSize()) {
+    // reached end of batch.
+    return Status::NotFound();
+  }
+
+  if (data_offset > GetDataSize()) {
+    return Status::InvalidArgument("data offset exceed write batch size");
+  }
+  Slice input = Slice(rep_.data() + data_offset, rep_.size() - data_offset);
+  char tag;
+  uint32_t column_family;
+  Status s = ReadRecordFromWriteBatch(&input, &tag, &column_family, Key, value,
+                                      blob, xid);
+  if (!s.ok()) {
+    return s;
+  }
+
+  switch (tag) {
+    case kTypeColumnFamilyValue:
+    case kTypeValue:
+      *type = kPutRecord;
+      break;
+    case kTypeColumnFamilyDeletion:
+    case kTypeDeletion:
+      *type = kDeleteRecord;
+      break;
+    case kTypeColumnFamilySingleDeletion:
+    case kTypeSingleDeletion:
+      *type = kSingleDeleteRecord;
+      break;
+    case kTypeColumnFamilyRangeDeletion:
+    case kTypeRangeDeletion:
+      *type = kDeleteRangeRecord;
+      break;
+    case kTypeColumnFamilyMerge:
+    case kTypeMerge:
+      *type = kMergeRecord;
+      break;
+    case kTypeLogData:
+      *type = kLogDataRecord;
+      break;
+    case kTypeNoop:
+    case kTypeBeginPrepareXID:
+    case kTypeBeginPersistedPrepareXID:
+    case kTypeBeginUnprepareXID:
+    case kTypeEndPrepareXID:
+    case kTypeCommitXID:
+    case kTypeRollbackXID:
+      *type = kXIDRecord;
+      break;
+    default:
+      return Status::Corruption("unknown WriteBatch tag ",
+                                std::to_string(static_cast<unsigned int>(tag)));
+  }
+  return Status::OK();
+}
+
+// If both of `entry1` and `entry2` point to real entry in write batch, we
+// compare the entries as following:
+// 1. first compare the column family, the one with larger CF will be larger;
+// 2. Inside the same CF, we first decode the entry to find the key of the entry
+//    and the entry with larger key will be larger;
+// 3. If two entries are of the same CF and key, the one with larger offset
+//    will be larger.
+// Some times either `entry1` or `entry2` is dummy entry, which is actually
+// a search key. In this case, in step 2, we don't go ahead and decode the
+// entry but use the value in WriteBatchIndexEntry::search_key.
+// One special case is WriteBatchIndexEntry::key_size is kFlagMinInCf.
+// This indicate that we are going to seek to the first of the column family.
+// Once we see this, this entry will be smaller than all the real entries of
+// the column family.
+int WriteBatchEntryComparator::operator()(
+    const WriteBatchIndexEntry* entry1,
+    const WriteBatchIndexEntry* entry2) const {
+  if (entry1->column_family > entry2->column_family) {
+    return 1;
+  } else if (entry1->column_family < entry2->column_family) {
+    return -1;
+  }
+
+  // Deal with special case of seeking to the beginning of a column family
+  if (entry1->is_min_in_cf()) {
+    return -1;
+  } else if (entry2->is_min_in_cf()) {
+    return 1;
+  }
+
+  Slice key1, key2;
+  if (entry1->search_key == nullptr) {
+    key1 = Slice(write_batch_->Data().data() + entry1->key_offset,
+                 entry1->key_size);
+  } else {
+    key1 = *(entry1->search_key);
+  }
+  if (entry2->search_key == nullptr) {
+    key2 = Slice(write_batch_->Data().data() + entry2->key_offset,
+                 entry2->key_size);
+  } else {
+    key2 = *(entry2->search_key);
+  }
+
+  int cmp = CompareKey(entry1->column_family, key1, key2);
+  if (cmp != 0) {
+    return cmp;
+  } else if (entry1->offset > entry2->offset) {
+    return 1;
+  } else if (entry1->offset < entry2->offset) {
+    return -1;
+  }
+  return 0;
+}
+
+int WriteBatchEntryComparator::CompareKey(uint32_t column_family,
+                                          const Slice& key1,
+                                          const Slice& key2) const {
+  if (column_family < cf_comparators_.size() &&
+      cf_comparators_[column_family] != nullptr) {
+    return cf_comparators_[column_family]->CompareWithoutTimestamp(
+        key1, /*a_has_ts=*/false, key2, /*b_has_ts=*/false);
+  } else {
+    return default_comparator_->CompareWithoutTimestamp(
+        key1, /*a_has_ts=*/false, key2, /*b_has_ts=*/false);
+  }
+}
+
+const Comparator* WriteBatchEntryComparator::GetComparator(
+    const ColumnFamilyHandle* column_family) const {
+  return column_family ? column_family->GetComparator() : default_comparator_;
+}
+
+const Comparator* WriteBatchEntryComparator::GetComparator(
+    uint32_t column_family) const {
+  if (column_family < cf_comparators_.size() &&
+      cf_comparators_[column_family]) {
+    return cf_comparators_[column_family];
+  }
+  return default_comparator_;
+}
+
+WriteEntry WBWIIteratorImpl::Entry() const {
+  WriteEntry ret;
+  Slice blob, xid;
+  const WriteBatchIndexEntry* iter_entry = skip_list_iter_.key();
+  // this is guaranteed with Valid()
+  assert(iter_entry != nullptr &&
+         iter_entry->column_family == column_family_id_);
+  auto s = write_batch_->GetEntryFromDataOffset(
+      iter_entry->offset, &ret.type, &ret.key, &ret.value, &blob, &xid);
+  assert(s.ok());
+  assert(ret.type == kPutRecord || ret.type == kDeleteRecord ||
+         ret.type == kSingleDeleteRecord || ret.type == kDeleteRangeRecord ||
+         ret.type == kMergeRecord);
+  // Make sure entry.key does not include user-defined timestamp.
+  const Comparator* const ucmp = comparator_->GetComparator(column_family_id_);
+  size_t ts_sz = ucmp->timestamp_size();
+  if (ts_sz > 0) {
+    ret.key = StripTimestampFromUserKey(ret.key, ts_sz);
+  }
+  return ret;
+}
+
+bool WBWIIteratorImpl::MatchesKey(uint32_t cf_id, const Slice& key) {
+  if (Valid()) {
+    return comparator_->CompareKey(cf_id, key, Entry().key) == 0;
+  } else {
+    return false;
+  }
+}
+
+WriteBatchWithIndexInternal::WriteBatchWithIndexInternal(
+    ColumnFamilyHandle* column_family)
+    : db_(nullptr), db_options_(nullptr), column_family_(column_family) {}
+
+WriteBatchWithIndexInternal::WriteBatchWithIndexInternal(
+    DB* db, ColumnFamilyHandle* column_family)
+    : db_(db), db_options_(nullptr), column_family_(column_family) {
+  if (db_ != nullptr && column_family_ == nullptr) {
+    column_family_ = db_->DefaultColumnFamily();
+  }
+}
+
+WriteBatchWithIndexInternal::WriteBatchWithIndexInternal(
+    const DBOptions* db_options, ColumnFamilyHandle* column_family)
+    : db_(nullptr), db_options_(db_options), column_family_(column_family) {}
+
+Status WriteBatchWithIndexInternal::MergeKey(const Slice& key,
+                                             const Slice* value,
+                                             const MergeContext& context,
+                                             std::string* result) const {
+  if (column_family_ != nullptr) {
+    auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family_);
+    const auto merge_operator = cfh->cfd()->ioptions()->merge_operator.get();
+    if (merge_operator == nullptr) {
+      return Status::InvalidArgument(
+          "Merge_operator must be set for column_family");
+    } else if (db_ != nullptr) {
+      const ImmutableDBOptions& immutable_db_options =
+          static_cast_with_check<DBImpl>(db_->GetRootDB())
+              ->immutable_db_options();
+      Statistics* statistics = immutable_db_options.statistics.get();
+      Logger* logger = immutable_db_options.info_log.get();
+      SystemClock* clock = immutable_db_options.clock;
+      // `op_failure_scope` (an output parameter) is not provided (set to
+      // nullptr) since a failure must be propagated regardless of its value.
+      return MergeHelper::TimedFullMerge(
+          merge_operator, key, value, context.GetOperands(), result, logger,
+          statistics, clock, /* result_operand */ nullptr,
+          /* update_num_ops_stats */ false,
+          /* op_failure_scope */ nullptr);
+    } else if (db_options_ != nullptr) {
+      Statistics* statistics = db_options_->statistics.get();
+      Env* env = db_options_->env;
+      Logger* logger = db_options_->info_log.get();
+      SystemClock* clock = env->GetSystemClock().get();
+      // `op_failure_scope` (an output parameter) is not provided (set to
+      // nullptr) since a failure must be propagated regardless of its value.
+      return MergeHelper::TimedFullMerge(
+          merge_operator, key, value, context.GetOperands(), result, logger,
+          statistics, clock, /* result_operand */ nullptr,
+          /* update_num_ops_stats */ false,
+          /* op_failure_scope */ nullptr);
+    } else {
+      const auto cf_opts = cfh->cfd()->ioptions();
+      // `op_failure_scope` (an output parameter) is not provided (set to
+      // nullptr) since a failure must be propagated regardless of its value.
+      return MergeHelper::TimedFullMerge(
+          merge_operator, key, value, context.GetOperands(), result,
+          cf_opts->logger, cf_opts->stats, cf_opts->clock,
+          /* result_operand */ nullptr, /* update_num_ops_stats */ false,
+          /* op_failure_scope */ nullptr);
+    }
+  } else {
+    return Status::InvalidArgument("Must provide a column_family");
+  }
+}
+
+WBWIIteratorImpl::Result WriteBatchWithIndexInternal::GetFromBatch(
+    WriteBatchWithIndex* batch, const Slice& key, MergeContext* context,
+    std::string* value, Status* s) {
+  *s = Status::OK();
+
+  std::unique_ptr<WBWIIteratorImpl> iter(
+      static_cast_with_check<WBWIIteratorImpl>(
+          batch->NewIterator(column_family_)));
+
+  // Search the iterator for this key, and updates/merges to it.
+  iter->Seek(key);
+  auto result = iter->FindLatestUpdate(key, context);
+  if (result == WBWIIteratorImpl::kError) {
+    (*s) = Status::Corruption("Unexpected entry in WriteBatchWithIndex:",
+                              std::to_string(iter->Entry().type));
+    return result;
+  } else if (result == WBWIIteratorImpl::kNotFound) {
+    return result;
+  } else if (result == WBWIIteratorImpl::Result::kFound) {  // PUT
+    Slice entry_value = iter->Entry().value;
+    if (context->GetNumOperands() > 0) {
+      *s = MergeKey(key, &entry_value, *context, value);
+      if (!s->ok()) {
+        result = WBWIIteratorImpl::Result::kError;
+      }
+    } else {
+      value->assign(entry_value.data(), entry_value.size());
+    }
+  } else if (result == WBWIIteratorImpl::kDeleted) {
+    if (context->GetNumOperands() > 0) {
+      *s = MergeKey(key, nullptr, *context, value);
+      if (s->ok()) {
+        result = WBWIIteratorImpl::Result::kFound;
+      } else {
+        result = WBWIIteratorImpl::Result::kError;
+      }
+    }
+  }
+  return result;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
diff --git a/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/write_batch_with_index/write_batch_with_index_internal.h b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/write_batch_with_index/write_batch_with_index_internal.h
new file mode 100644
index 0000000000..031d72889e
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/utilities/write_batch_with_index/write_batch_with_index_internal.h
@@ -0,0 +1,342 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "db/merge_context.h"
+#include "memtable/skiplist.h"
+#include "options/db_options.h"
+#include "port/port.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "rocksdb/utilities/write_batch_with_index.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class MergeContext;
+class WBWIIteratorImpl;
+class WriteBatchWithIndexInternal;
+struct Options;
+
+// when direction == forward
+// * current_at_base_ <=> base_iterator > delta_iterator
+// when direction == backwards
+// * current_at_base_ <=> base_iterator < delta_iterator
+// always:
+// * equal_keys_ <=> base_iterator == delta_iterator
+class BaseDeltaIterator : public Iterator {
+ public:
+  BaseDeltaIterator(ColumnFamilyHandle* column_family, Iterator* base_iterator,
+                    WBWIIteratorImpl* delta_iterator,
+                    const Comparator* comparator,
+                    const ReadOptions* read_options = nullptr);
+
+  ~BaseDeltaIterator() override {}
+
+  bool Valid() const override;
+  void SeekToFirst() override;
+  void SeekToLast() override;
+  void Seek(const Slice& k) override;
+  void SeekForPrev(const Slice& k) override;
+  void Next() override;
+  void Prev() override;
+  Slice key() const override;
+  Slice value() const override;
+  Status status() const override;
+  void Invalidate(Status s);
+
+ private:
+  void AssertInvariants();
+  void Advance();
+  void AdvanceDelta();
+  void AdvanceBase();
+  bool BaseValid() const;
+  bool DeltaValid() const;
+  void UpdateCurrent();
+
+  std::unique_ptr<WriteBatchWithIndexInternal> wbwii_;
+  bool forward_;
+  bool current_at_base_;
+  bool equal_keys_;
+  mutable Status status_;
+  std::unique_ptr<Iterator> base_iterator_;
+  std::unique_ptr<WBWIIteratorImpl> delta_iterator_;
+  const Comparator* comparator_;  // not owned
+  const Slice* iterate_upper_bound_;
+  mutable PinnableSlice merge_result_;
+};
+
+// Key used by skip list, as the binary searchable index of WriteBatchWithIndex.
+struct WriteBatchIndexEntry {
+  WriteBatchIndexEntry(size_t o, uint32_t c, size_t ko, size_t ksz)
+      : offset(o),
+        column_family(c),
+        key_offset(ko),
+        key_size(ksz),
+        search_key(nullptr) {}
+  // Create a dummy entry as the search key. This index entry won't be backed
+  // by an entry from the write batch, but a pointer to the search key. Or a
+  // special flag of offset can indicate we are seek to first.
+  // @_search_key: the search key
+  // @_column_family: column family
+  // @is_forward_direction: true for Seek(). False for SeekForPrev()
+  // @is_seek_to_first: true if we seek to the beginning of the column family
+  //                    _search_key should be null in this case.
+  WriteBatchIndexEntry(const Slice* _search_key, uint32_t _column_family,
+                       bool is_forward_direction, bool is_seek_to_first)
+      // For SeekForPrev(), we need to make the dummy entry larger than any
+      // entry who has the same search key. Otherwise, we'll miss those entries.
+      : offset(is_forward_direction ? 0 : std::numeric_limits<size_t>::max()),
+        column_family(_column_family),
+        key_offset(0),
+        key_size(is_seek_to_first ? kFlagMinInCf : 0),
+        search_key(_search_key) {
+    assert(_search_key != nullptr || is_seek_to_first);
+  }
+
+  // If this flag appears in the key_size, it indicates a
+  // key that is smaller than any other entry for the same column family.
+  static const size_t kFlagMinInCf = std::numeric_limits<size_t>::max();
+
+  bool is_min_in_cf() const {
+    assert(key_size != kFlagMinInCf ||
+           (key_offset == 0 && search_key == nullptr));
+    return key_size == kFlagMinInCf;
+  }
+
+  // offset of an entry in write batch's string buffer. If this is a dummy
+  // lookup key, in which case search_key != nullptr, offset is set to either
+  // 0 or max, only for comparison purpose. Because when entries have the same
+  // key, the entry with larger offset is larger, offset = 0 will make a seek
+  // key small or equal than all the entries with the seek key, so that Seek()
+  // will find all the entries of the same key. Similarly, offset = MAX will
+  // make the entry just larger than all entries with the search key so
+  // SeekForPrev() will see all the keys with the same key.
+  size_t offset;
+  uint32_t column_family;  // column family of the entry.
+  size_t key_offset;       // offset of the key in write batch's string buffer.
+  size_t key_size;         // size of the key. kFlagMinInCf indicates
+                           // that this is a dummy look up entry for
+                           // SeekToFirst() to the beginning of the column
+                           // family. We use the flag here to save a boolean
+                           // in the struct.
+
+  const Slice* search_key;  // if not null, instead of reading keys from
+                            // write batch, use it to compare. This is used
+                            // for lookup key.
+};
+
+class ReadableWriteBatch : public WriteBatch {
+ public:
+  explicit ReadableWriteBatch(size_t reserved_bytes = 0, size_t max_bytes = 0,
+                              size_t protection_bytes_per_key = 0,
+                              size_t default_cf_ts_sz = 0)
+      : WriteBatch(reserved_bytes, max_bytes, protection_bytes_per_key,
+                   default_cf_ts_sz) {}
+  // Retrieve some information from a write entry in the write batch, given
+  // the start offset of the write entry.
+  Status GetEntryFromDataOffset(size_t data_offset, WriteType* type, Slice* Key,
+                                Slice* value, Slice* blob, Slice* xid) const;
+};
+
+class WriteBatchEntryComparator {
+ public:
+  WriteBatchEntryComparator(const Comparator* _default_comparator,
+                            const ReadableWriteBatch* write_batch)
+      : default_comparator_(_default_comparator), write_batch_(write_batch) {}
+  // Compare a and b. Return a negative value if a is less than b, 0 if they
+  // are equal, and a positive value if a is greater than b
+  int operator()(const WriteBatchIndexEntry* entry1,
+                 const WriteBatchIndexEntry* entry2) const;
+
+  int CompareKey(uint32_t column_family, const Slice& key1,
+                 const Slice& key2) const;
+
+  void SetComparatorForCF(uint32_t column_family_id,
+                          const Comparator* comparator) {
+    if (column_family_id >= cf_comparators_.size()) {
+      cf_comparators_.resize(column_family_id + 1, nullptr);
+    }
+    cf_comparators_[column_family_id] = comparator;
+  }
+
+  const Comparator* default_comparator() { return default_comparator_; }
+
+  const Comparator* GetComparator(
+      const ColumnFamilyHandle* column_family) const;
+
+  const Comparator* GetComparator(uint32_t column_family) const;
+
+ private:
+  const Comparator* const default_comparator_;
+  std::vector<const Comparator*> cf_comparators_;
+  const ReadableWriteBatch* const write_batch_;
+};
+
+using WriteBatchEntrySkipList =
+    SkipList<WriteBatchIndexEntry*, const WriteBatchEntryComparator&>;
+
+class WBWIIteratorImpl : public WBWIIterator {
+ public:
+  enum Result : uint8_t {
+    kFound,
+    kDeleted,
+    kNotFound,
+    kMergeInProgress,
+    kError
+  };
+  WBWIIteratorImpl(uint32_t column_family_id,
+                   WriteBatchEntrySkipList* skip_list,
+                   const ReadableWriteBatch* write_batch,
+                   WriteBatchEntryComparator* comparator)
+      : column_family_id_(column_family_id),
+        skip_list_iter_(skip_list),
+        write_batch_(write_batch),
+        comparator_(comparator) {}
+
+  ~WBWIIteratorImpl() override {}
+
+  bool Valid() const override {
+    if (!skip_list_iter_.Valid()) {
+      return false;
+    }
+    const WriteBatchIndexEntry* iter_entry = skip_list_iter_.key();
+    return (iter_entry != nullptr &&
+            iter_entry->column_family == column_family_id_);
+  }
+
+  void SeekToFirst() override {
+    WriteBatchIndexEntry search_entry(
+        nullptr /* search_key */, column_family_id_,
+        true /* is_forward_direction */, true /* is_seek_to_first */);
+    skip_list_iter_.Seek(&search_entry);
+  }
+
+  void SeekToLast() override {
+    WriteBatchIndexEntry search_entry(
+        nullptr /* search_key */, column_family_id_ + 1,
+        true /* is_forward_direction */, true /* is_seek_to_first */);
+    skip_list_iter_.Seek(&search_entry);
+    if (!skip_list_iter_.Valid()) {
+      skip_list_iter_.SeekToLast();
+    } else {
+      skip_list_iter_.Prev();
+    }
+  }
+
+  void Seek(const Slice& key) override {
+    WriteBatchIndexEntry search_entry(&key, column_family_id_,
+                                      true /* is_forward_direction */,
+                                      false /* is_seek_to_first */);
+    skip_list_iter_.Seek(&search_entry);
+  }
+
+  void SeekForPrev(const Slice& key) override {
+    WriteBatchIndexEntry search_entry(&key, column_family_id_,
+                                      false /* is_forward_direction */,
+                                      false /* is_seek_to_first */);
+    skip_list_iter_.SeekForPrev(&search_entry);
+  }
+
+  void Next() override { skip_list_iter_.Next(); }
+
+  void Prev() override { skip_list_iter_.Prev(); }
+
+  WriteEntry Entry() const override;
+
+  Status status() const override {
+    // this is in-memory data structure, so the only way status can be non-ok is
+    // through memory corruption
+    return Status::OK();
+  }
+
+  const WriteBatchIndexEntry* GetRawEntry() const {
+    return skip_list_iter_.key();
+  }
+
+  bool MatchesKey(uint32_t cf_id, const Slice& key);
+
+  // Moves the iterator to first entry of the previous key.
+  void PrevKey();
+  // Moves the iterator to first entry of the next key.
+  void NextKey();
+
+  // Moves the iterator to the Update (Put or Delete) for the current key
+  // If there are no Put/Delete, the Iterator will point to the first entry for
+  // this key
+  // @return kFound if a Put was found for the key
+  // @return kDeleted if a delete was found for the key
+  // @return kMergeInProgress if only merges were fouund for the key
+  // @return kError if an unsupported operation was found for the key
+  // @return kNotFound if no operations were found for this key
+  //
+  Result FindLatestUpdate(const Slice& key, MergeContext* merge_context);
+  Result FindLatestUpdate(MergeContext* merge_context);
+
+ protected:
+  void AdvanceKey(bool forward);
+
+ private:
+  uint32_t column_family_id_;
+  WriteBatchEntrySkipList::Iterator skip_list_iter_;
+  const ReadableWriteBatch* write_batch_;
+  WriteBatchEntryComparator* comparator_;
+};
+
+class WriteBatchWithIndexInternal {
+ public:
+  static const Comparator* GetUserComparator(const WriteBatchWithIndex& wbwi,
+                                             uint32_t cf_id);
+
+  // For GetFromBatchAndDB or similar
+  explicit WriteBatchWithIndexInternal(DB* db,
+                                       ColumnFamilyHandle* column_family);
+  // For GetFromBatchAndDB or similar
+  explicit WriteBatchWithIndexInternal(ColumnFamilyHandle* column_family);
+  // For GetFromBatch or similar
+  explicit WriteBatchWithIndexInternal(const DBOptions* db_options,
+                                       ColumnFamilyHandle* column_family);
+
+  // If batch contains a value for key, store it in *value and return kFound.
+  // If batch contains a deletion for key, return Deleted.
+  // If batch contains Merge operations as the most recent entry for a key,
+  //   and the merge process does not stop (not reaching a value or delete),
+  //   prepend the current merge operands to *operands,
+  //   and return kMergeInProgress
+  // If batch does not contain this key, return kNotFound
+  // Else, return kError on error with error Status stored in *s.
+  WBWIIteratorImpl::Result GetFromBatch(WriteBatchWithIndex* batch,
+                                        const Slice& key, std::string* value,
+                                        Status* s) {
+    return GetFromBatch(batch, key, &merge_context_, value, s);
+  }
+  WBWIIteratorImpl::Result GetFromBatch(WriteBatchWithIndex* batch,
+                                        const Slice& key,
+                                        MergeContext* merge_context,
+                                        std::string* value, Status* s);
+  Status MergeKey(const Slice& key, const Slice* value,
+                  std::string* result) const {
+    return MergeKey(key, value, merge_context_, result);
+  }
+  Status MergeKey(const Slice& key, const Slice* value,
+                  const MergeContext& context, std::string* result) const;
+  size_t GetNumOperands() const { return merge_context_.GetNumOperands(); }
+  MergeContext* GetMergeContext() { return &merge_context_; }
+  Slice GetOperand(int index) const { return merge_context_.GetOperand(index); }
+
+ private:
+  DB* db_;
+  const DBOptions* db_options_;
+  ColumnFamilyHandle* column_family_;
+  MergeContext merge_context_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/vendored/rocksdb-haskell-kadena/src/Database/RocksDB.hs b/vendored/rocksdb-haskell-kadena/src/Database/RocksDB.hs
new file mode 100644
index 0000000000..730a122d65
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/src/Database/RocksDB.hs
@@ -0,0 +1,17 @@
+-- |
+-- Module      : Database.RocksDB
+-- Copyright   : (c) 2012-2013 The leveldb-haskell Authors
+--               (c) 2014 The rocksdb-haskell Authors
+-- License     : BSD3
+-- Maintainer  : mail@agrafix.net
+-- Stability   : experimental
+-- Portability : non-portable
+--
+-- RocksDB Haskell binding.
+--
+-- The API closely follows the C-API of RocksDB.
+-- For more information, see: <http://rocksdb.org>
+
+module Database.RocksDB (module Base) where
+
+import Database.RocksDB.Base as Base
diff --git a/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Base.hs b/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Base.hs
new file mode 100644
index 0000000000..4aff66a7cf
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Base.hs
@@ -0,0 +1,336 @@
+{-# LANGUAGE CPP           #-}
+{-# LANGUAGE TupleSections #-}
+
+-- |
+-- Module      : Database.RocksDB.Base
+-- Copyright   : (c) 2012-2013 The leveldb-haskell Authors
+--               (c) 2014 The rocksdb-haskell Authors
+-- License     : BSD3
+-- Maintainer  : mail@agrafix.net
+-- Stability   : experimental
+-- Portability : non-portable
+--
+-- RocksDB Haskell binding.
+--
+-- The API closely follows the C-API of RocksDB.
+-- For more information, see: <http://agrafix.net>
+
+module Database.RocksDB.Base
+    ( -- * Exported Types
+      DB
+    , BatchOp (..)
+    , Comparator (..)
+    , Compression (..)
+    , Options (..)
+    , Snapshot
+    , WriteBatch
+    , WriteOptions (..)
+    , Range
+
+    -- * Defaults
+    , defaultOptions
+    , defaultWriteOptions
+
+    -- * Basic Database Manipulations
+    , open
+    , openReadOnly
+    -- , openBracket
+    , close
+    , put
+    , putBinaryVal
+    , putBinary
+    , delete
+    , write
+    , get
+    , getBinary
+    , getBinaryVal
+    , withSnapshot
+    -- , withSnapshotBracket
+    , createSnapshot
+    , releaseSnapshot
+
+    -- * Administrative Functions
+    , Property (..), getProperty
+    , destroy
+    , repair
+    , approximateSize
+
+    -- * Utility functions to help perform mass writes
+    , binaryToBS
+    , bsToBinary
+
+    -- * Iteration
+    , module Database.RocksDB.Iterator
+    ) where
+
+import           Control.Exception            (bracket, bracketOnError)
+import           Control.Monad                (liftM, when)
+
+import           Control.Monad.IO.Class       (MonadIO (liftIO))
+import           Data.Binary                  (Binary)
+import qualified Data.Binary                  as Binary
+import           Data.ByteString              (ByteString)
+import           Data.ByteString.Internal     (ByteString (..))
+import qualified Data.ByteString.Lazy         as BSL
+import           Foreign
+import           Foreign.C.String             (CString, withCString)
+import           System.Directory             (createDirectoryIfMissing)
+
+import           Database.RocksDB.C
+import           Database.RocksDB.Internal
+import           Database.RocksDB.Iterator
+import           Database.RocksDB.ReadOptions
+import           Database.RocksDB.Types
+
+import qualified Data.ByteString              as BS
+import qualified Data.ByteString.Unsafe       as BU
+
+import qualified GHC.Foreign                  as GHC
+import qualified GHC.IO.Encoding              as GHC
+
+-- -- | Open a database
+-- --
+-- -- The returned handle will automatically be released when the enclosing
+-- -- 'runResourceT' terminates.
+-- openBracket :: MonadResource m => FilePath -> Options -> m (ReleaseKey, DB)
+-- openBracket path opts = allocate (open path opts) close
+-- {-# INLINE openBracket #-}
+
+-- -- | Run an action with a snapshot of the database.
+-- --
+-- -- The snapshot will be released when the action terminates or throws an
+-- -- exception. Note that this function is provided for convenience and does not
+-- -- prevent the 'Snapshot' handle to escape. It will, however, be invalid after
+-- -- this function returns and should not be used anymore.
+-- withSnapshotBracket :: MonadResource m => DB -> (Snapshot -> m a) -> m a
+-- withSnapshotBracket db f = do
+--     (rk, snap) <- createSnapshotBracket db
+--     res <- f snap
+--     release rk
+--     return res
+
+-- -- | Create a snapshot of the database.
+-- --
+-- -- The returned 'Snapshot' will be released automatically when the enclosing
+-- -- 'runResourceT' terminates. It is recommended to use 'createSnapshot'' instead
+-- -- and release the resource manually as soon as possible.
+-- -- Can be released early.
+-- createSnapshotBracket :: MonadResource m => DB -> m (ReleaseKey, Snapshot)
+-- createSnapshotBracket db = allocate (createSnapshot db) (releaseSnapshot db)
+
+openWith :: MonadIO m => (OptionsPtr -> CString -> ErrPtr -> IO RocksDBPtr) -> [Char] -> Options -> m DB
+openWith opener path opts = liftIO $ bracketOnError initialize finalize mkDB
+    where
+# ifdef mingw32_HOST_OS
+        initialize =
+            (, ()) <$> mkOpts opts
+        finalize (opts', ()) =
+            freeOpts opts'
+# else
+        initialize = do
+            opts' <- mkOpts opts
+            -- With LC_ALL=C, two things happen:
+            --   * rocksdb can't open a database with unicode in path;
+            --   * rocksdb can't create a folder properly.
+            -- So, we create the folder by ourselves, and for thart we
+            -- need to set the encoding we're going to use. On Linux
+            -- it's almost always UTC-8.
+            oldenc <- GHC.getFileSystemEncoding
+            when (createIfMissing opts) $
+                GHC.setFileSystemEncoding GHC.utf8
+            pure (opts', oldenc)
+        finalize (opts', oldenc) = do
+            freeOpts opts'
+            GHC.setFileSystemEncoding oldenc
+# endif
+        mkDB (Options' opts_ptr _ _, _) = do
+            when (createIfMissing opts) $
+                createDirectoryIfMissing True path
+            withFilePath path $ \path_ptr ->
+                liftM DB
+                $ throwIfErr "open"
+                $ opener opts_ptr path_ptr
+
+-- | Open a database.
+--
+-- The returned handle should be released with 'close'.
+open :: MonadIO m => FilePath -> Options -> m DB
+open = openWith c_rocksdb_open
+
+-- | Open a database in read-only mode. Any changes made to the database after
+-- the database is opened read-only will not be visible until re-opened.
+--
+-- The returned handle should be released with 'close'.
+openReadOnly :: MonadIO m => FilePath -> Options -> m DB
+openReadOnly = openWith (\o p -> c_rocksdb_open_for_read_only o p 0)
+
+-- | Close a database.
+--
+-- The handle will be invalid after calling this action and should no
+-- longer be used.
+close :: MonadIO m => DB -> m ()
+close (DB db_ptr) = liftIO $
+    c_rocksdb_close db_ptr
+
+-- | Run an action with a 'Snapshot' of the database.
+withSnapshot :: MonadIO m => DB -> (Snapshot -> IO a) -> m a
+withSnapshot db act = liftIO $
+    bracket (createSnapshot db) (releaseSnapshot db) act
+
+-- | Create a snapshot of the database.
+--
+-- The returned 'Snapshot' should be released with 'releaseSnapshot'.
+createSnapshot :: MonadIO m => DB -> m Snapshot
+createSnapshot (DB db_ptr) = liftIO $
+    Snapshot <$> c_rocksdb_create_snapshot db_ptr
+
+-- | Release a snapshot.
+--
+-- The handle will be invalid after calling this action and should no
+-- longer be used.
+releaseSnapshot :: MonadIO m => DB -> Snapshot -> m ()
+releaseSnapshot (DB db_ptr) (Snapshot snap) = liftIO $
+    c_rocksdb_release_snapshot db_ptr snap
+
+-- | Get a DB property.
+getProperty :: MonadIO m => DB -> Property -> m (Maybe ByteString)
+getProperty (DB db_ptr) p = liftIO $
+    withCString (prop p) $ \prop_ptr -> do
+        val_ptr <- c_rocksdb_property_value db_ptr prop_ptr
+        if val_ptr == nullPtr
+            then return Nothing
+            else do res <- Just <$> BS.packCString val_ptr
+                    freeCString val_ptr
+                    return res
+    where
+        prop (NumFilesAtLevel i) = "rocksdb.num-files-at-level" ++ show i
+        prop Stats               = "rocksdb.stats"
+        prop SSTables            = "rocksdb.sstables"
+
+-- | Destroy the given RocksDB database.
+destroy :: MonadIO m => FilePath -> Options -> m ()
+destroy path opts = liftIO $ bracket (mkOpts opts) freeOpts destroy'
+    where
+        destroy' (Options' opts_ptr _ _) =
+            withFilePath path $ \path_ptr ->
+                throwIfErr "destroy" $ c_rocksdb_destroy_db opts_ptr path_ptr
+
+-- | Repair the given RocksDB database.
+repair :: MonadIO m => FilePath -> Options -> m ()
+repair path opts = liftIO $ bracket (mkOpts opts) freeOpts repair'
+    where
+        repair' (Options' opts_ptr _ _) =
+            withFilePath path $ \path_ptr ->
+                throwIfErr "repair" $ c_rocksdb_repair_db opts_ptr path_ptr
+
+-- TODO: support [Range], like C API does
+type Range  = (ByteString, ByteString)
+
+-- | Inspect the approximate sizes of the different levels.
+approximateSize :: MonadIO m => DB -> Range -> m Int64
+approximateSize (DB db_ptr) (from, to) = liftIO $
+    BU.unsafeUseAsCStringLen from $ \(from_ptr, flen) ->
+    BU.unsafeUseAsCStringLen to   $ \(to_ptr, tlen)   ->
+    withArray [from_ptr]          $ \from_ptrs        ->
+    withArray [intToCSize flen]   $ \flen_ptrs        ->
+    withArray [to_ptr]            $ \to_ptrs          ->
+    withArray [intToCSize tlen]   $ \tlen_ptrs        ->
+    allocaArray 1                 $ \size_ptrs        -> do
+        c_rocksdb_approximate_sizes db_ptr 1
+                                    from_ptrs flen_ptrs
+                                    to_ptrs tlen_ptrs
+                                    size_ptrs
+        liftM head $ peekArray 1 size_ptrs >>= mapM toInt64
+
+    where
+        toInt64 = return . fromIntegral
+
+putBinaryVal :: (MonadIO m, Binary v) => DB -> WriteOptions -> ByteString -> v -> m ()
+putBinaryVal db wopts key val = put db wopts key (binaryToBS val)
+
+putBinary :: (MonadIO m, Binary k, Binary v) => DB -> WriteOptions -> k -> v -> m ()
+putBinary db wopts key val = put db wopts (binaryToBS key) (binaryToBS val)
+
+-- | Write a key/value pair.
+put :: MonadIO m => DB -> WriteOptions -> ByteString -> ByteString -> m ()
+put (DB db_ptr) opts key value = liftIO $ withCWriteOpts opts $ \opts_ptr ->
+    BU.unsafeUseAsCStringLen key   $ \(key_ptr, klen) ->
+    BU.unsafeUseAsCStringLen value $ \(val_ptr, vlen) ->
+        throwIfErr "put"
+            $ c_rocksdb_put db_ptr opts_ptr
+                            key_ptr (intToCSize klen)
+                            val_ptr (intToCSize vlen)
+
+getBinaryVal :: (Binary v, MonadIO m) => DB -> ReadOptions -> ByteString -> m (Maybe v)
+getBinaryVal db ropts key  = fmap bsToBinary <$> get db ropts key
+
+getBinary :: (MonadIO m, Binary k, Binary v) => DB -> ReadOptions -> k -> m (Maybe v)
+getBinary db ropts key = fmap bsToBinary <$> get db ropts (binaryToBS key)
+
+-- | Read a value by key.
+get :: MonadIO m => DB -> ReadOptions -> ByteString -> m (Maybe ByteString)
+get (DB db_ptr) opts key = liftIO $ withReadOptions opts $ \opts_ptr ->
+    BU.unsafeUseAsCStringLen key $ \(key_ptr, klen) ->
+    alloca                       $ \vlen_ptr -> do
+        val_ptr <- throwIfErr "get" $
+            c_rocksdb_get db_ptr opts_ptr key_ptr (intToCSize klen) vlen_ptr
+        vlen <- peek vlen_ptr
+        if val_ptr == nullPtr
+            then return Nothing
+            else do
+                res' <- Just <$> BS.packCStringLen (val_ptr, cSizeToInt vlen)
+                freeCString val_ptr
+                return res'
+
+-- | Delete a key/value pair.
+delete :: MonadIO m => DB -> WriteOptions -> ByteString -> m ()
+delete (DB db_ptr) opts key = liftIO $ withCWriteOpts opts $ \opts_ptr ->
+    BU.unsafeUseAsCStringLen key $ \(key_ptr, klen) ->
+        throwIfErr "delete"
+            $ c_rocksdb_delete db_ptr opts_ptr key_ptr (intToCSize klen)
+
+-- | Perform a batch mutation.
+write :: MonadIO m => DB -> WriteOptions -> WriteBatch -> m ()
+write (DB db_ptr) opts batch = liftIO $ withCWriteOpts opts $ \opts_ptr ->
+    bracket c_rocksdb_writebatch_create c_rocksdb_writebatch_destroy $ \batch_ptr -> do
+
+    mapM_ (batchAdd batch_ptr) batch
+
+    throwIfErr "write" $ c_rocksdb_write db_ptr opts_ptr batch_ptr
+
+    -- ensure @ByteString@s (and respective shared @CStringLen@s) aren't GC'ed
+    -- until here
+    mapM_ (liftIO . touch) batch
+
+    where
+        batchAdd batch_ptr (Put key val) =
+            BU.unsafeUseAsCStringLen key $ \(key_ptr, klen) ->
+            BU.unsafeUseAsCStringLen val $ \(val_ptr, vlen) ->
+                c_rocksdb_writebatch_put batch_ptr
+                                         key_ptr (intToCSize klen)
+                                         val_ptr (intToCSize vlen)
+
+        batchAdd batch_ptr (Del key) =
+            BU.unsafeUseAsCStringLen key $ \(key_ptr, klen) ->
+                c_rocksdb_writebatch_delete batch_ptr key_ptr (intToCSize klen)
+
+        touch (Put (PS p _ _) (PS p' _ _)) = do
+            touchForeignPtr p
+            touchForeignPtr p'
+
+        touch (Del (PS p _ _)) = touchForeignPtr p
+
+binaryToBS :: Binary v => v -> ByteString
+binaryToBS x = BSL.toStrict (Binary.encode x)
+
+bsToBinary :: Binary v => ByteString -> v
+bsToBinary x = Binary.decode (BSL.fromStrict x)
+
+-- | Marshal a 'FilePath' (Haskell string) into a `NUL` terminated C string using
+-- temporary storage.
+-- On Linux, UTF-8 is almost always the encoding used.
+-- When on Windows, UTF-8 can also be used, although the default for those devices is
+-- UTF-16. For a more detailed explanation, please refer to
+-- https://msdn.microsoft.com/en-us/library/windows/desktop/dd374081(v=vs.85).aspx.
+withFilePath :: FilePath -> (CString -> IO a) -> IO a
+withFilePath = GHC.withCString GHC.utf8
diff --git a/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/C.hsc b/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/C.hsc
new file mode 100644
index 0000000000..82090193fa
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/C.hsc
@@ -0,0 +1,415 @@
+{-# LANGUAGE CPP, ForeignFunctionInterface, EmptyDataDecls #-}
+-- |
+-- Module      : Database.RocksDB.C
+-- Copyright   : (c) 2012-2013 The rocksdb-haskell Authors
+-- License     : BSD3
+-- Maintainer  : kim.altintop@gmail.com
+-- Stability   : experimental
+-- Portability : non-portable
+--
+
+module Database.RocksDB.C where
+
+import Foreign
+import Foreign.C.Types
+import Foreign.C.String
+
+#ifdef mingw32_HOST_OS
+#include <rocksdb\c.h>
+#else
+#include <rocksdb/c.h>
+#endif
+
+data RocksDB
+data LCache
+data LComparator
+data LIterator
+data LLogger
+data LOptions
+data LReadOptions
+data LSnapshot
+data LWriteBatch
+data LWriteOptions
+data PrefixExtractor
+data Checkpoint
+
+type RocksDBPtr      = Ptr RocksDB
+type CachePtr        = Ptr LCache
+type ComparatorPtr   = Ptr LComparator
+type IteratorPtr     = Ptr LIterator
+type LoggerPtr       = Ptr LLogger
+type OptionsPtr      = Ptr LOptions
+type ReadOptionsPtr  = Ptr LReadOptions
+type SnapshotPtr     = Ptr LSnapshot
+type WriteBatchPtr   = Ptr LWriteBatch
+type WriteOptionsPtr = Ptr LWriteOptions
+
+type DBName = CString
+type ErrPtr = Ptr CString
+type Key    = CString
+type Val    = CString
+
+newtype CompressionOpt = CompressionOpt { compressionOpt :: CInt }
+  deriving (Eq, Show)
+#{enum CompressionOpt, CompressionOpt
+ , noCompression     = 0
+ , snappyCompression = 1
+ , zlibCompression   = 2
+ , bz2Compression    = 3
+ , lz4Compression    = 4
+ , lz4hcCompression  = 5
+ }
+
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_open"
+  c_rocksdb_open :: OptionsPtr -> DBName -> ErrPtr -> IO RocksDBPtr
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_open_for_read_only"
+  c_rocksdb_open_for_read_only :: OptionsPtr -> DBName -> CChar -> ErrPtr -> IO RocksDBPtr
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_close"
+  c_rocksdb_close :: RocksDBPtr -> IO ()
+
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_put"
+  c_rocksdb_put :: RocksDBPtr
+                -> WriteOptionsPtr
+                -> Key -> CSize
+                -> Val -> CSize
+                -> ErrPtr
+                -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_delete"
+  c_rocksdb_delete :: RocksDBPtr
+                   -> WriteOptionsPtr
+                   -> Key -> CSize
+                   -> ErrPtr
+                   -> IO ()
+
+
+foreign import ccall unsafe "cpp\\chainweb-rocksdb.h rocksdb_delete_range"
+    rocksdb_delete_range
+        :: RocksDBPtr
+        -> WriteOptionsPtr
+        -> CString {- min key -}
+        -> CSize {- min key length -}
+        -> CString {- max key length -}
+        -> CSize {- max key length -}
+        -> Ptr CString {- output: errptr -}
+        -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_write"
+  c_rocksdb_write :: RocksDBPtr
+                  -> WriteOptionsPtr
+                  -> WriteBatchPtr
+                  -> ErrPtr
+                  -> IO ()
+
+-- | Returns NULL if not found. A malloc()ed array otherwise. Stores the length
+-- of the array in *vallen.
+foreign import ccall safe "rocksdb\\c.h rocksdb_get"
+  c_rocksdb_get :: RocksDBPtr
+                -> ReadOptionsPtr
+                -> Key -> CSize
+                -> Ptr CSize        -- ^ value length
+                -> ErrPtr
+                -> IO CString
+
+-- // if values_list[i] == NULL and errs[i] == NULL,
+-- // then we got status.IsNotFound(), which we will not return.
+-- // all errors except status status.ok() and status.IsNotFound() are returned.
+-- //
+-- // errs, values_list and values_list_sizes must be num_keys in length,
+-- // allocated by the caller.
+-- // errs is a list of strings as opposed to the conventional one error,
+-- // where errs[i] is the status for retrieval of keys_list[i].
+-- // each non-NULL errs entry is a malloc()ed, null terminated string.
+-- // each non-NULL values_list entry is a malloc()ed array, with
+-- // the length for each stored in values_list_sizes[i].
+-- extern ROCKSDB_LIBRARY_API void rocksdb_multi_get(
+--     rocksdb_t* db, const rocksdb_readoptions_t* options, size_t num_keys,
+--     const char* const* keys_list, const size_t* keys_list_sizes,
+--     char** values_list, size_t* values_list_sizes, char** errs);
+--
+foreign import ccall unsafe "rocksdb\\c.h rocksdb_multi_get"
+    rocksdb_multi_get
+        :: RocksDBPtr
+        -> ReadOptionsPtr
+        -> CSize
+            -- ^ num_key
+        -> Ptr (Ptr CChar)
+            -- ^ keys_list
+        -> Ptr CSize
+            -- ^ keys_list_sizes
+        -> Ptr (Ptr CChar)
+            -- ^ values_list
+        -> Ptr CSize
+            -- ^ values_list_sizes
+        -> Ptr CString
+            -- ^ errs
+        -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_create_snapshot"
+  c_rocksdb_create_snapshot :: RocksDBPtr -> IO SnapshotPtr
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_release_snapshot"
+  c_rocksdb_release_snapshot :: RocksDBPtr -> SnapshotPtr -> IO ()
+
+-- | Returns NULL if property name is unknown. Else returns a pointer to a
+-- malloc()-ed null-terminated value.
+foreign import ccall safe "rocksdb\\c.h rocksdb_property_value"
+  c_rocksdb_property_value :: RocksDBPtr -> CString -> IO CString
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_approximate_sizes"
+  c_rocksdb_approximate_sizes :: RocksDBPtr
+                              -> CInt                     -- ^ num ranges
+                              -> Ptr CString -> Ptr CSize -- ^ range start keys (array)
+                              -> Ptr CString -> Ptr CSize -- ^ range limit keys (array)
+                              -> Ptr Word64               -- ^ array of approx. sizes of ranges
+                              -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_destroy_db"
+  c_rocksdb_destroy_db :: OptionsPtr -> DBName -> ErrPtr -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_repair_db"
+  c_rocksdb_repair_db :: OptionsPtr -> DBName -> ErrPtr -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_compact_range"
+    rocksdb_compact_range
+        :: RocksDBPtr
+        -> CString {- min key -}
+        -> CSize {- min key length -}
+        -> CString {- max key -}
+        -> CSize {- max key length -}
+        -> IO ()
+
+--
+-- Iterator
+--
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_create_iterator"
+  c_rocksdb_create_iterator :: RocksDBPtr -> ReadOptionsPtr -> IO IteratorPtr
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_iter_destroy"
+  c_rocksdb_iter_destroy :: IteratorPtr -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_iter_valid"
+  c_rocksdb_iter_valid :: IteratorPtr -> IO CUChar
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_iter_seek_to_first"
+  c_rocksdb_iter_seek_to_first :: IteratorPtr -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_iter_seek_to_last"
+  c_rocksdb_iter_seek_to_last :: IteratorPtr -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_iter_seek"
+  c_rocksdb_iter_seek :: IteratorPtr -> Key -> CSize -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_iter_next"
+  c_rocksdb_iter_next :: IteratorPtr -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_iter_prev"
+  c_rocksdb_iter_prev :: IteratorPtr -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_iter_key"
+  c_rocksdb_iter_key :: IteratorPtr -> Ptr CSize -> IO Key
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_iter_value"
+  c_rocksdb_iter_value :: IteratorPtr -> Ptr CSize -> IO Val
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_iter_get_error"
+  c_rocksdb_iter_get_error :: IteratorPtr -> ErrPtr -> IO ()
+
+
+--
+-- Write batch
+--
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_writebatch_create"
+  c_rocksdb_writebatch_create :: IO WriteBatchPtr
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_writebatch_destroy"
+  c_rocksdb_writebatch_destroy :: WriteBatchPtr -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_writebatch_clear"
+  c_rocksdb_writebatch_clear :: WriteBatchPtr -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_writebatch_put"
+  c_rocksdb_writebatch_put :: WriteBatchPtr
+                           -> Key -> CSize
+                           -> Val -> CSize
+                           -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_writebatch_delete"
+  c_rocksdb_writebatch_delete :: WriteBatchPtr -> Key -> CSize -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_writebatch_iterate"
+  c_rocksdb_writebatch_iterate :: WriteBatchPtr
+                               -> Ptr ()                            -- ^ state
+                               -> FunPtr (Ptr () -> Key -> CSize -> Val -> CSize) -- ^ put
+                               -> FunPtr (Ptr () -> Key -> CSize)     -- ^ delete
+                               -> IO ()
+
+
+--
+-- Options
+--
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_options_create"
+  c_rocksdb_options_create :: IO OptionsPtr
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_options_destroy"
+  c_rocksdb_options_destroy :: OptionsPtr -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_options_set_comparator"
+  c_rocksdb_options_set_comparator :: OptionsPtr -> ComparatorPtr -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_options_set_create_if_missing"
+  c_rocksdb_options_set_create_if_missing :: OptionsPtr -> CUChar -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_options_set_error_if_exists"
+  c_rocksdb_options_set_error_if_exists :: OptionsPtr -> CUChar -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_options_set_paranoid_checks"
+  c_rocksdb_options_set_paranoid_checks :: OptionsPtr -> CUChar -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_options_set_info_log"
+  c_rocksdb_options_set_info_log :: OptionsPtr -> LoggerPtr -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_options_set_write_buffer_size"
+  c_rocksdb_options_set_write_buffer_size :: OptionsPtr -> CSize -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_options_set_max_open_files"
+  c_rocksdb_options_set_max_open_files :: OptionsPtr -> CInt -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_options_set_compression"
+  c_rocksdb_options_set_compression :: OptionsPtr -> CompressionOpt -> IO ()
+
+foreign import ccall unsafe "rocksdb\\c.h rocksdb_options_set_prefix_extractor"
+    rocksdb_options_set_prefix_extractor :: OptionsPtr -> Ptr PrefixExtractor -> IO ()
+
+foreign import ccall unsafe "cpp\\chainweb-rocksdb.h rocksdb_options_table_prefix_extractor"
+    rocksdb_options_table_prefix_extractor :: Ptr PrefixExtractor
+--
+-- Comparator
+--
+
+type StatePtr   = Ptr ()
+type Destructor = StatePtr -> ()
+type CompareFun = StatePtr -> CString -> CSize -> CString -> CSize -> IO CInt
+type NameFun    = StatePtr -> CString
+
+-- | Make a FunPtr to a user-defined comparator function
+foreign import ccall "wrapper" mkCmp :: CompareFun -> IO (FunPtr CompareFun)
+
+-- | Make a destructor FunPtr
+foreign import ccall "wrapper" mkDest :: Destructor -> IO (FunPtr Destructor)
+
+-- | Make a name FunPtr
+foreign import ccall "wrapper" mkName :: NameFun -> IO (FunPtr NameFun)
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_comparator_create"
+  c_rocksdb_comparator_create :: StatePtr
+                              -> FunPtr Destructor
+                              -> FunPtr CompareFun
+                              -> FunPtr NameFun
+                              -> IO ComparatorPtr
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_comparator_destroy"
+  c_rocksdb_comparator_destroy :: ComparatorPtr -> IO ()
+
+
+--
+-- Filter Policy
+--
+
+type CreateFilterFun = StatePtr
+                     -> Ptr CString -- ^ key array
+                     -> Ptr CSize   -- ^ key length array
+                     -> CInt        -- ^ num keys
+                     -> Ptr CSize   -- ^ filter length
+                     -> IO CString  -- ^ the filter
+type KeyMayMatchFun  = StatePtr
+                     -> CString     -- ^ key
+                     -> CSize       -- ^ key length
+                     -> CString     -- ^ filter
+                     -> CSize       -- ^ filter length
+                     -> IO CUChar   -- ^ whether key is in filter
+
+-- | Make a FunPtr to a user-defined create_filter function
+foreign import ccall "wrapper" mkCF :: CreateFilterFun -> IO (FunPtr CreateFilterFun)
+
+-- | Make a FunPtr to a user-defined key_may_match function
+foreign import ccall "wrapper" mkKMM :: KeyMayMatchFun -> IO (FunPtr KeyMayMatchFun)
+
+--
+-- Read options
+--
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_readoptions_create"
+  c_rocksdb_readoptions_create :: IO ReadOptionsPtr
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_readoptions_destroy"
+  c_rocksdb_readoptions_destroy :: ReadOptionsPtr -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_readoptions_set_verify_checksums"
+  c_rocksdb_readoptions_set_verify_checksums :: ReadOptionsPtr -> CUChar -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_readoptions_set_fill_cache"
+  c_rocksdb_readoptions_set_fill_cache :: ReadOptionsPtr -> CUChar -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_readoptions_set_snapshot"
+  c_rocksdb_readoptions_set_snapshot :: ReadOptionsPtr -> SnapshotPtr -> IO ()
+
+foreign import ccall unsafe "rocksdb\\c.h rocksdb_readoptions_set_iterate_upper_bound"
+    rocksdb_readoptions_set_iterate_upper_bound :: ReadOptionsPtr -> CString -> CSize -> IO ()
+
+foreign import ccall unsafe "rocksdb\\c.h rocksdb_readoptions_set_iterate_lower_bound"
+    rocksdb_readoptions_set_iterate_lower_bound :: ReadOptionsPtr -> CString -> CSize -> IO ()
+
+foreign import ccall unsafe "cpp\\chainweb-rocksdb.h rocksdb_readoptions_set_auto_prefix_mode"
+    rocksdb_readoptions_set_auto_prefix_mode :: ReadOptionsPtr -> CBool -> IO ()
+
+--
+-- Write options
+--
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_writeoptions_create"
+  c_rocksdb_writeoptions_create :: IO WriteOptionsPtr
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_writeoptions_destroy"
+  c_rocksdb_writeoptions_destroy :: WriteOptionsPtr -> IO ()
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_writeoptions_set_sync"
+  c_rocksdb_writeoptions_set_sync :: WriteOptionsPtr -> CUChar -> IO ()
+
+
+--
+-- Cache
+--
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_cache_create_lru"
+  c_rocksdb_cache_create_lru :: CSize -> IO CachePtr
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_cache_destroy"
+  c_rocksdb_cache_destroy :: CachePtr -> IO ()
+
+----------------------------------------------------------------------------
+-- Free
+----------------------------------------------------------------------------
+
+foreign import ccall safe "rocksdb\\c.h rocksdb_free"
+  c_rocksdb_free :: CString -> IO ()
+
+----------------------------------------------------------------------------
+-- Checkpoints
+----------------------------------------------------------------------------
+
+foreign import ccall unsafe "rocksdb\\c.h rocksdb_checkpoint_object_create"
+    rocksdb_checkpoint_object_create :: RocksDBPtr -> Ptr CString -> IO (Ptr Checkpoint)
+
+foreign import ccall unsafe "rocksdb\\c.h rocksdb_checkpoint_create"
+    rocksdb_checkpoint_create :: Ptr Checkpoint -> CString -> CULong -> Ptr CString -> IO ()
+
+foreign import ccall unsafe "rocksdb\\c.h rocksdb_checkpoint_object_destroy"
+    rocksdb_checkpoint_object_destroy :: Ptr Checkpoint -> IO ()
diff --git a/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Internal.hs b/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Internal.hs
new file mode 100644
index 0000000000..4cbc118b4b
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Internal.hs
@@ -0,0 +1,217 @@
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+-- |
+-- Module      : Database.RocksDB.Internal
+-- Copyright   : (c) 2012-2013 The leveldb-haskell Authors
+--               (c) 2014 The rocksdb-haskell Authors
+-- License     : BSD3
+-- Maintainer  : mail@agrafix.net
+-- Stability   : experimental
+-- Portability : non-portable
+--
+
+module Database.RocksDB.Internal
+    ( -- * Types
+      DB (..)
+    , Comparator'
+    , Options' (..)
+
+    -- * "Smart" constructors and deconstructors
+    , freeCReadOpts
+    , freeComparator
+    , freeOpts
+    , freeCString
+    , mkComparator
+    , mkCompareFun
+    , mkCreateFilterFun
+    , mkKeyMayMatchFun
+    , mkOpts
+
+    -- * combinators
+    , withCWriteOpts
+
+    -- * Utilities
+    , throwIfErr
+    , cSizeToInt
+    , intToCSize
+    , intToCInt
+    , cIntToInt
+    , boolToNum
+    )
+where
+
+import           Control.Exception
+import           Control.Monad          (when)
+import           Data.ByteString        (ByteString)
+import           Foreign
+import           Foreign.C.String       (CString, peekCString, withCString)
+import           Foreign.C.Types        (CInt, CSize)
+
+import           Database.RocksDB.C
+import           Database.RocksDB.Types
+
+import qualified Data.ByteString        as BS
+
+
+-- | Database handle
+newtype DB = DB RocksDBPtr
+    deriving Eq
+
+-- | Internal representation of a 'Comparator'
+data Comparator' = Comparator' (FunPtr CompareFun)
+                               (FunPtr Destructor)
+                               (FunPtr NameFun)
+                               ComparatorPtr
+
+-- | Internal representation of the 'Options'
+data Options' = Options'
+    { _optsPtr  :: !OptionsPtr
+    , _cachePtr :: !(Maybe CachePtr)
+    , _comp     :: !(Maybe Comparator')
+    }
+
+mkOpts :: Options -> IO Options'
+mkOpts Options{..} = do
+    opts_ptr <- c_rocksdb_options_create
+
+    c_rocksdb_options_set_compression opts_ptr
+        $ ccompression compression
+    c_rocksdb_options_set_create_if_missing opts_ptr
+        $ boolToNum createIfMissing
+    c_rocksdb_options_set_error_if_exists opts_ptr
+        $ boolToNum errorIfExists
+    c_rocksdb_options_set_max_open_files opts_ptr
+        $ intToCInt maxOpenFiles
+    c_rocksdb_options_set_paranoid_checks opts_ptr
+        $ boolToNum paranoidChecks
+    c_rocksdb_options_set_write_buffer_size opts_ptr
+        $ intToCSize writeBufferSize
+
+    cmp   <- maybeSetCmp opts_ptr comparator
+
+    return (Options' opts_ptr Nothing cmp)
+
+  where
+    ccompression NoCompression =
+        noCompression
+    ccompression SnappyCompression =
+        snappyCompression
+    ccompression ZlibCompression =
+        zlibCompression
+
+    maybeSetCmp :: OptionsPtr -> Maybe Comparator -> IO (Maybe Comparator')
+    maybeSetCmp opts_ptr (Just mcmp) = Just <$> setcmp opts_ptr mcmp
+    maybeSetCmp _ Nothing            = return Nothing
+
+    setcmp :: OptionsPtr -> Comparator -> IO Comparator'
+    setcmp opts_ptr (Comparator cmp) = do
+        cmp'@(Comparator' _ _ _ cmp_ptr) <- mkComparator "user-defined" cmp
+        c_rocksdb_options_set_comparator opts_ptr cmp_ptr
+        return cmp'
+
+freeOpts :: Options' -> IO ()
+freeOpts (Options' opts_ptr mcache_ptr mcmp_ptr) =
+    c_rocksdb_options_destroy opts_ptr `finally`
+        maybe (return ()) c_rocksdb_cache_destroy mcache_ptr `finally`
+        maybe (return ()) freeComparator mcmp_ptr
+
+withCWriteOpts :: WriteOptions -> (WriteOptionsPtr -> IO a) -> IO a
+withCWriteOpts WriteOptions{..} = bracket mkCWriteOpts freeCWriteOpts
+  where
+    mkCWriteOpts = do
+        opts_ptr <- c_rocksdb_writeoptions_create
+        onException
+            (c_rocksdb_writeoptions_set_sync opts_ptr $ boolToNum sync)
+            (c_rocksdb_writeoptions_destroy opts_ptr)
+        return opts_ptr
+
+    freeCWriteOpts = c_rocksdb_writeoptions_destroy
+
+mkCompareFun :: (ByteString -> ByteString -> Ordering) -> CompareFun
+mkCompareFun cmp = cmp'
+  where
+    cmp' _ a alen b blen = do
+        a' <- BS.packCStringLen (a, fromInteger . toInteger $ alen)
+        b' <- BS.packCStringLen (b, fromInteger . toInteger $ blen)
+        return $ case cmp a' b' of
+                     EQ ->  0
+                     GT ->  1
+                     LT -> -1
+
+mkComparator :: String -> (ByteString -> ByteString -> Ordering) -> IO Comparator'
+mkComparator name f =
+    withCString name $ \cs -> do
+        ccmpfun <- mkCmp . mkCompareFun $ f
+        cdest   <- mkDest $ const ()
+        cname   <- mkName $ const cs
+        ccmp    <- c_rocksdb_comparator_create nullPtr cdest ccmpfun cname
+        return $ Comparator' ccmpfun cdest cname ccmp
+
+
+freeComparator :: Comparator' -> IO ()
+freeComparator (Comparator' ccmpfun cdest cname ccmp) = do
+    c_rocksdb_comparator_destroy ccmp
+    freeHaskellFunPtr ccmpfun
+    freeHaskellFunPtr cdest
+    freeHaskellFunPtr cname
+
+mkCreateFilterFun :: ([ByteString] -> ByteString) -> CreateFilterFun
+mkCreateFilterFun f = f'
+  where
+    f' _ ks ks_lens n_ks flen = do
+        let n_ks' = fromInteger . toInteger $ n_ks
+        ks'      <- peekArray n_ks' ks
+        ks_lens' <- peekArray n_ks' ks_lens
+        keys     <- mapM bstr (zip ks' ks_lens')
+        let res = f keys
+        poke flen (fromIntegral . BS.length $ res)
+        BS.useAsCString res $ \cstr -> return cstr
+
+    bstr (x,len) = BS.packCStringLen (x, fromInteger . toInteger $ len)
+
+mkKeyMayMatchFun :: (ByteString -> ByteString -> Bool) -> KeyMayMatchFun
+mkKeyMayMatchFun g = g'
+  where
+    g' _ k klen f flen = do
+        k' <- BS.packCStringLen (k, fromInteger . toInteger $ klen)
+        f' <- BS.packCStringLen (f, fromInteger . toInteger $ flen)
+        return . boolToNum $ g k' f'
+
+
+freeCReadOpts :: ReadOptionsPtr -> IO ()
+freeCReadOpts = c_rocksdb_readoptions_destroy
+
+freeCString :: CString -> IO ()
+freeCString = c_rocksdb_free
+
+throwIfErr :: String -> (ErrPtr -> IO a) -> IO a
+throwIfErr s f = alloca $ \err_ptr -> do
+    poke err_ptr nullPtr
+    res  <- f err_ptr
+    erra <- peek err_ptr
+    when (erra /= nullPtr) $ do
+        err <- peekCString erra
+        throwIO $ userError $ s ++ ": " ++ err
+    return res
+
+cSizeToInt :: CSize -> Int
+cSizeToInt = fromIntegral
+{-# INLINE cSizeToInt #-}
+
+intToCSize :: Int -> CSize
+intToCSize = fromIntegral
+{-# INLINE intToCSize #-}
+
+intToCInt :: Int -> CInt
+intToCInt = fromIntegral
+{-# INLINE intToCInt #-}
+
+cIntToInt :: CInt -> Int
+cIntToInt = fromIntegral
+{-# INLINE cIntToInt #-}
+
+boolToNum :: Num b => Bool -> b
+boolToNum True  = fromIntegral (1 :: Int)
+boolToNum False = fromIntegral (0 :: Int)
+{-# INLINE boolToNum #-}
diff --git a/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Iterator.hs b/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Iterator.hs
new file mode 100644
index 0000000000..e5f5de024f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Iterator.hs
@@ -0,0 +1,198 @@
+-- | This code is mostly from serokell/rocksdb-haskell's Iterator module.
+--   The main difference is the way that options are handled. The original code
+--   didn't let me alter any options that didn't have bindings. It also had the
+--   ability to leak the underlying `ReadOptions` pointer.
+{-# language ScopedTypeVariables #-}
+{-# language RankNTypes #-}
+
+module Database.RocksDB.Iterator
+    ( Iterator
+    , createIter
+    , withIter
+    , iterValid
+    , iterSeek
+    , iterFirst
+    , iterLast
+    , iterNext
+    , iterPrev
+    , iterKey
+    , iterValue
+    , iterEntry
+    , iterGetError
+    , mapIter
+    , iterItems
+    , iterKeys
+    , iterValues
+    )
+where
+
+import           Control.Exception            (bracket)
+import           Control.Monad                (when)
+import           Control.Monad.IO.Class       (MonadIO (liftIO))
+import           Data.ByteString              (ByteString)
+import           Data.Maybe                   (catMaybes)
+import           Foreign
+import           Foreign.C.Error              (throwErrnoIfNull)
+import           Foreign.C.String
+import           Foreign.C.Types
+
+import           Database.RocksDB.C
+import           Database.RocksDB.Internal
+
+import qualified Data.ByteString              as BS
+import qualified Data.ByteString.Char8        as BC
+import qualified Data.ByteString.Unsafe       as BU
+
+newtype Iterator = Iterator IteratorPtr
+
+createIter :: DB -> ReadOptionsPtr -> IO Iterator
+createIter (DB db_ptr) opts_ptr = fmap Iterator $
+    throwErrnoIfNull "create_iterator" $ c_rocksdb_create_iterator db_ptr opts_ptr
+
+-- | Run an action with an 'IteratorPtr'
+withIter :: DB -> ReadOptionsPtr -> (Iterator -> IO a) -> IO a
+withIter db opts_ptr =
+    bracket (createIter db opts_ptr) (\(Iterator p) -> c_rocksdb_iter_destroy p)
+
+-- | An iterator is either positioned at a key/value pair, or not valid. This
+-- function returns /true/ iff the iterator is valid.
+iterValid :: Iterator -> IO Bool
+iterValid (Iterator iter_ptr) = liftIO $ do
+    x <- c_rocksdb_iter_valid iter_ptr
+    return (x /= 0)
+
+-- | Position at the first key in the source that is at or past target. The
+-- iterator is /valid/ after this call iff the source contains an entry that
+-- comes at or past target.
+iterSeek :: Iterator -> ByteString -> IO ()
+iterSeek (Iterator iter_ptr) key = liftIO $
+    BU.unsafeUseAsCStringLen key $ \(key_ptr, klen) ->
+        c_rocksdb_iter_seek iter_ptr key_ptr (intToCSize klen)
+
+-- | Position at the first key in the source. The iterator is /valid/ after this
+-- call iff the source is not empty.
+iterFirst :: Iterator -> IO ()
+iterFirst (Iterator iter_ptr) = liftIO $ c_rocksdb_iter_seek_to_first iter_ptr
+
+-- | Position at the last key in the source. The iterator is /valid/ after this
+-- call iff the source is not empty.
+iterLast :: Iterator -> IO ()
+iterLast (Iterator iter_ptr) = liftIO $ c_rocksdb_iter_seek_to_last iter_ptr
+
+-- | Moves to the next entry in the source. After this call, 'iterValid' is
+-- /true/ iff the iterator was not positioned at the last entry in the source.
+--
+-- If the iterator is not valid, this function does nothing. Note that this is a
+-- shortcoming of the C API: an 'iterPrev' might still be possible, but we can't
+-- determine if we're at the last or first entry.
+iterNext :: Iterator -> IO ()
+iterNext (Iterator iter_ptr) = liftIO $ do
+    valid <- c_rocksdb_iter_valid iter_ptr
+    when (valid /= 0) $ c_rocksdb_iter_next iter_ptr
+
+-- | Moves to the previous entry in the source. After this call, 'iterValid' is
+-- /true/ iff the iterator was not positioned at the first entry in the source.
+--
+-- If the iterator is not valid, this function does nothing. Note that this is a
+-- shortcoming of the C API: an 'iterNext' might still be possible, but we can't
+-- determine if we're at the last or first entry.
+iterPrev :: Iterator -> IO ()
+iterPrev (Iterator iter_ptr) = liftIO $ do
+    valid <- c_rocksdb_iter_valid iter_ptr
+    when (valid /= 0) $ c_rocksdb_iter_prev iter_ptr
+
+-- | Return the key for the current entry if the iterator is currently
+-- positioned at an entry, ie. 'iterValid'.
+iterKey :: Iterator -> IO (Maybe ByteString)
+iterKey (Iterator iter_ptr) = liftIO $
+    iterString iter_ptr c_rocksdb_iter_key
+
+-- | Return the value for the current entry if the iterator is currently
+-- positioned at an entry, ie. 'iterValid'.
+iterValue :: Iterator -> IO (Maybe ByteString)
+iterValue (Iterator iter_ptr) = liftIO $
+    iterString iter_ptr c_rocksdb_iter_value
+
+-- | Return the current entry as a pair, if the iterator is currently positioned
+-- at an entry, ie. 'iterValid'.
+iterEntry :: Iterator -> IO (Maybe (ByteString, ByteString))
+iterEntry iter = liftIO $ do
+    mkey <- iterKey iter
+    mval <- iterValue iter
+    return $ (,) <$> mkey <*> mval
+
+-- | Check for errors
+--
+-- Note that this captures somewhat severe errors such as a corrupted database.
+iterGetError :: Iterator -> IO (Maybe ByteString)
+iterGetError (Iterator iter_ptr) = liftIO $
+    alloca $ \err_ptr -> do
+        poke err_ptr nullPtr
+        c_rocksdb_iter_get_error iter_ptr err_ptr
+        erra <- peek err_ptr
+        if erra == nullPtr
+            then return Nothing
+            else do
+                err <- peekCString erra
+                return . Just . BC.pack $ err
+
+-- | Map a function over an iterator, advancing the iterator forward and
+-- returning the value. The iterator should be put in the right position prior
+-- to calling the function.
+--
+-- Note that this function accumulates the result strictly, ie. it reads all
+-- values into memory until the iterator is exhausted. This is most likely not
+-- what you want for large ranges. You may consider using conduits instead, for
+-- an example see: <https://gist.github.com/adc8ec348f03483446a5>
+mapIter :: (Iterator -> IO a) -> Iterator -> IO [a]
+mapIter f iter@(Iterator iter_ptr) = go []
+  where
+    go acc = do
+        valid <- liftIO $ c_rocksdb_iter_valid iter_ptr
+        if valid == 0
+            then return acc
+            else do
+                val <- f iter
+                ()  <- liftIO $ c_rocksdb_iter_next iter_ptr
+                go (val : acc)
+
+-- | Return a list of key and value tuples from an iterator. The iterator
+-- should be put in the right position prior to calling this with the iterator.
+--
+-- See strictness remarks on 'mapIter'.
+iterItems :: Iterator -> IO [(ByteString, ByteString)]
+iterItems iter = catMaybes <$> mapIter iterEntry iter
+
+-- | Return a list of key from an iterator. The iterator should be put
+-- in the right position prior to calling this with the iterator.
+--
+-- See strictness remarks on 'mapIter'
+iterKeys :: Iterator -> IO [ByteString]
+iterKeys iter = catMaybes <$> mapIter iterKey iter
+
+-- | Return a list of values from an iterator. The iterator should be put
+-- in the right position prior to calling this with the iterator.
+--
+-- See strictness remarks on 'mapIter'
+iterValues :: Iterator -> IO [ByteString]
+iterValues iter = catMaybes <$> mapIter iterValue iter
+
+--
+-- Internal
+--
+
+iterString :: IteratorPtr
+           -> (IteratorPtr -> Ptr CSize -> IO CString)
+           -> IO (Maybe ByteString)
+iterString iter_ptr f = do
+    valid <- c_rocksdb_iter_valid iter_ptr
+    if valid == 0
+    then return Nothing
+    else
+        alloca $ \len_ptr -> do
+            ptr <- f iter_ptr len_ptr
+            if ptr == nullPtr
+            then return Nothing
+            else do
+                len <- peek len_ptr
+                Just <$> BS.packCStringLen (ptr, cSizeToInt len)
diff --git a/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/ReadOptions.hs b/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/ReadOptions.hs
new file mode 100644
index 0000000000..f1aa12da1f
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/ReadOptions.hs
@@ -0,0 +1,93 @@
+{-# language RankNTypes #-}
+
+module Database.RocksDB.ReadOptions
+    ( ReadOptions(..)
+    , withReadOptions
+    , withCReadOpts
+    , setVerifyChecksums
+    , setFillCache
+    , setUseSnapshot
+    , setUpperBound
+    , setLowerBound
+    , setAutoPrefixMode
+    )
+    where
+
+import Control.Exception
+import Data.ByteString(ByteString)
+import qualified Data.ByteString.Unsafe as BU
+import Foreign.Ptr
+
+import Database.RocksDB.C
+import Database.RocksDB.Types
+
+newtype ReadOptions = ReadOptions
+    { runReadOptions :: forall r. ReadOptionsPtr -> IO r -> IO r
+    }
+instance Monoid ReadOptions where
+    mempty = ReadOptions (\_ k -> k)
+instance Semigroup ReadOptions where
+    ReadOptions r <> ReadOptions r' =
+        ReadOptions $ \ptr k -> r ptr (r' ptr k)
+
+withReadOptions :: ReadOptions -> (ReadOptionsPtr -> IO a) -> IO a
+withReadOptions opts k = withCReadOpts $ \opts_ptr -> do
+    runReadOptions opts opts_ptr (k opts_ptr)
+
+withCReadOpts :: (ReadOptionsPtr -> IO a) -> IO a
+withCReadOpts = bracket c_rocksdb_readoptions_create c_rocksdb_readoptions_destroy
+
+-- | If true, all data read from underlying storage will be verified
+-- against corresponding checksuyms.
+--
+-- Default: True
+setVerifyChecksums :: Bool -> ReadOptions
+setVerifyChecksums b = ReadOptions $ \opts_ptr k -> do
+    c_rocksdb_readoptions_set_verify_checksums opts_ptr (boolToNum b)
+    k
+
+-- | Should the data read for this iteration be cached in memory? Callers
+-- may wish to set this field to false for bulk scans.
+--
+-- Default: True
+setFillCache :: Bool -> ReadOptions
+setFillCache b = ReadOptions $ \opts_ptr k -> do
+    c_rocksdb_readoptions_set_fill_cache opts_ptr (boolToNum b)
+    k
+
+-- | If 'Just', read as of the supplied snapshot (which must belong to the
+-- DB that is being read and which must not have been released). If
+-- 'Nothing', use an implicit snapshot of the state at the beginning of
+-- this read operation.
+--
+-- Default: Nothing
+setUseSnapshot :: Maybe Snapshot -> ReadOptions
+setUseSnapshot snapshot = ReadOptions $ \opts_ptr k -> do
+    c_rocksdb_readoptions_set_snapshot opts_ptr snap_ptr
+    k
+  where
+    snap_ptr = case snapshot of
+        Just (Snapshot p) -> p
+        Nothing -> nullPtr
+
+setUpperBound :: ByteString -> ReadOptions
+setUpperBound upper = ReadOptions $ \opts_ptr k ->
+    BU.unsafeUseAsCStringLen upper $ \(upperPtr, upperLen) -> do
+        rocksdb_readoptions_set_iterate_upper_bound opts_ptr upperPtr (fromIntegral upperLen)
+        k
+
+setLowerBound :: ByteString -> ReadOptions
+setLowerBound lower = ReadOptions $ \opts_ptr k ->
+    BU.unsafeUseAsCStringLen lower $ \(lowerPtr, lowerLen) -> do
+        rocksdb_readoptions_set_iterate_lower_bound opts_ptr lowerPtr (fromIntegral lowerLen)
+        k
+
+setAutoPrefixMode :: Bool -> ReadOptions
+setAutoPrefixMode m = ReadOptions $ \opts_ptr k -> do
+    rocksdb_readoptions_set_auto_prefix_mode opts_ptr (boolToNum m)
+    k
+
+boolToNum :: Num b => Bool -> b
+boolToNum True  = fromIntegral (1 :: Int)
+boolToNum False = fromIntegral (0 :: Int)
+{-# INLINE boolToNum #-}
diff --git a/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Types.hs b/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Types.hs
new file mode 100644
index 0000000000..b76ed60467
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/src/Database/RocksDB/Types.hs
@@ -0,0 +1,147 @@
+-- |
+-- Module      : Database.RocksDB.Types
+-- Copyright   : (c) 2012-2013 The leveldb-haskell Authors
+--               (c) 2014 The rocksdb-haskell Authors
+-- License     : BSD3
+-- Maintainer  : mail@agrafix.net
+-- Stability   : experimental
+-- Portability : non-portable
+--
+
+module Database.RocksDB.Types
+    ( BatchOp (..)
+    , Comparator (..)
+    , Compression (..)
+    , Options (..)
+    , Property (..)
+    , Snapshot (..)
+    , WriteBatch
+    , WriteOptions (..)
+
+    , defaultOptions
+    , defaultWriteOptions
+    )
+where
+
+import           Data.ByteString    (ByteString)
+import           Data.Default
+import           Foreign
+
+import           Database.RocksDB.C
+
+-- | Snapshot handle
+newtype Snapshot = Snapshot SnapshotPtr deriving (Eq)
+
+-- | Compression setting
+data Compression
+    = NoCompression
+    | SnappyCompression
+    | ZlibCompression
+    deriving (Eq, Show)
+
+-- | User-defined comparator
+newtype Comparator = Comparator (ByteString -> ByteString -> Ordering)
+
+-- | Options when opening a database
+data Options = Options
+    { comparator      :: !(Maybe Comparator)
+      -- ^ Comparator used to defined the order of keys in the table.
+      --
+      -- If 'Nothing', the default comparator is used, which uses lexicographic
+      -- bytes-wise ordering.
+      --
+      -- NOTE: the client must ensure that the comparator supplied here has the
+      -- same name and orders keys /exactly/ the same as the comparator provided
+      -- to previous open calls on the same DB.
+      --
+      -- Default: Nothing
+    , compression     :: !Compression
+      -- ^ Compress blocks using the specified compression algorithm.
+      --
+      -- This parameter can be changed dynamically.
+      --
+      -- Default: 'EnableCompression'
+    , createIfMissing :: !Bool
+      -- ^ If true, the database will be created if it is missing.
+      --
+      -- Default: False
+    , errorIfExists   :: !Bool
+      -- ^ It true, an error is raised if the database already exists.
+      --
+      -- Default: False
+    , maxOpenFiles    :: !Int
+      -- ^ Number of open files that can be used by the DB.
+      --
+      -- You may need to increase this if your database has a large working set
+      -- (budget one open file per 2MB of working set).
+      --
+      -- Default: 1000
+    , paranoidChecks  :: !Bool
+      -- ^ If true, the implementation will do aggressive checking of the data
+      -- it is processing and will stop early if it detects any errors.
+      --
+      -- This may have unforeseen ramifications: for example, a corruption of
+      -- one DB entry may cause a large number of entries to become unreadable
+      -- or for the entire DB to become unopenable.
+      --
+      -- Default: False
+    , writeBufferSize :: !Int
+      -- ^ Amount of data to build up in memory (backed by an unsorted log on
+      -- disk) before converting to a sorted on-disk file.
+      --
+      -- Larger values increase performance, especially during bulk loads. Up to
+      -- to write buffers may be held in memory at the same time, so you may
+      -- with to adjust this parameter to control memory usage. Also, a larger
+      -- write buffer will result in a longer recovery time the next time the
+      -- database is opened.
+      --
+      -- Default: 4MB
+    }
+
+defaultOptions :: Options
+defaultOptions = Options
+    { comparator           = Nothing
+    , compression          = SnappyCompression
+    , createIfMissing      = False
+    , errorIfExists        = False
+    , maxOpenFiles         = 1000
+    , paranoidChecks       = False
+    , writeBufferSize      = 4 `shift` 20
+    }
+
+instance Default Options where
+    def = defaultOptions
+
+-- | Options for write operations
+data WriteOptions = WriteOptions
+    { sync :: !Bool
+      -- ^ If true, the write will be flushed from the operating system buffer
+      -- cache (by calling WritableFile::Sync()) before the write is considered
+      -- complete. If this flag is true, writes will be slower.
+      --
+      -- If this flag is false, and the machine crashes, some recent writes may
+      -- be lost. Note that if it is just the process that crashes (i.e., the
+      -- machine does not reboot), no writes will be lost even if sync==false.
+      --
+      -- In other words, a DB write with sync==false has similar crash semantics
+      -- as the "write()" system call. A DB write with sync==true has similar
+      -- crash semantics to a "write()" system call followed by "fsync()".
+      --
+      -- Default: False
+    } deriving (Eq, Show)
+
+defaultWriteOptions :: WriteOptions
+defaultWriteOptions = WriteOptions { sync = False }
+
+instance Default WriteOptions where
+    def = defaultWriteOptions
+
+type WriteBatch = [BatchOp]
+
+-- | Batch operation
+data BatchOp = Put ByteString ByteString | Del ByteString
+    deriving (Eq, Show)
+
+-- | Properties exposed by RocksDB
+data Property = NumFilesAtLevel Int | Stats | SSTables
+    deriving (Eq, Show)
diff --git a/vendored/rocksdb-haskell-kadena/unpack-rocksdb.sh b/vendored/rocksdb-haskell-kadena/unpack-rocksdb.sh
new file mode 100755
index 0000000000..75f6c09fd3
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/unpack-rocksdb.sh
@@ -0,0 +1,354 @@
+#!/usr/bin/env bash
+
+set -e
+
+ROCKSDB_VERSION=8.3.2
+
+ROCKSDB_ARCHIVE=v${ROCKSDB_VERSION}.tar.gz
+ROCKSDB_BASE=rocksdb-${ROCKSDB_VERSION}
+
+if [[ ! -f ${ROCKSDB_ARCHIVE} ]] ; then
+    wget https://github.com/facebook/rocksdb/archive/refs/tags/v${ROCKSDB_VERSION}.tar.gz
+fi
+
+# This list is based on LIB_OBJECTS from the Makefile. It must be generated when
+# the rocksdb version changes.
+#
+tar -xzf "$ROCKSDB_ARCHIVE" \
+    ${ROCKSDB_BASE}/include \
+    ${ROCKSDB_BASE}/cache/cache.cc   \
+    ${ROCKSDB_BASE}/cache/cache_entry_roles.cc   \
+    ${ROCKSDB_BASE}/cache/cache_key.cc   \
+    ${ROCKSDB_BASE}/cache/cache_helpers.cc   \
+    ${ROCKSDB_BASE}/cache/cache_reservation_manager.cc   \
+    ${ROCKSDB_BASE}/cache/charged_cache.cc   \
+    ${ROCKSDB_BASE}/cache/clock_cache.cc   \
+    ${ROCKSDB_BASE}/cache/lru_cache.cc   \
+    ${ROCKSDB_BASE}/cache/compressed_secondary_cache.cc   \
+    ${ROCKSDB_BASE}/cache/secondary_cache.cc   \
+    ${ROCKSDB_BASE}/cache/secondary_cache_adapter.cc   \
+    ${ROCKSDB_BASE}/cache/sharded_cache.cc   \
+    ${ROCKSDB_BASE}/db/arena_wrapped_db_iter.cc   \
+    ${ROCKSDB_BASE}/db/blob/blob_contents.cc   \
+    ${ROCKSDB_BASE}/db/blob/blob_fetcher.cc   \
+    ${ROCKSDB_BASE}/db/blob/blob_file_addition.cc   \
+    ${ROCKSDB_BASE}/db/blob/blob_file_builder.cc   \
+    ${ROCKSDB_BASE}/db/blob/blob_file_cache.cc   \
+    ${ROCKSDB_BASE}/db/blob/blob_file_garbage.cc   \
+    ${ROCKSDB_BASE}/db/blob/blob_file_meta.cc   \
+    ${ROCKSDB_BASE}/db/blob/blob_file_reader.cc   \
+    ${ROCKSDB_BASE}/db/blob/blob_garbage_meter.cc   \
+    ${ROCKSDB_BASE}/db/blob/blob_log_format.cc   \
+    ${ROCKSDB_BASE}/db/blob/blob_log_sequential_reader.cc   \
+    ${ROCKSDB_BASE}/db/blob/blob_log_writer.cc   \
+    ${ROCKSDB_BASE}/db/blob/blob_source.cc   \
+    ${ROCKSDB_BASE}/db/blob/prefetch_buffer_collection.cc   \
+    ${ROCKSDB_BASE}/db/builder.cc   \
+    ${ROCKSDB_BASE}/db/c.cc   \
+    ${ROCKSDB_BASE}/db/column_family.cc   \
+    ${ROCKSDB_BASE}/db/compaction/compaction.cc   \
+    ${ROCKSDB_BASE}/db/compaction/compaction_iterator.cc   \
+    ${ROCKSDB_BASE}/db/compaction/compaction_job.cc   \
+    ${ROCKSDB_BASE}/db/compaction/compaction_picker.cc   \
+    ${ROCKSDB_BASE}/db/compaction/compaction_picker_fifo.cc   \
+    ${ROCKSDB_BASE}/db/compaction/compaction_picker_level.cc   \
+    ${ROCKSDB_BASE}/db/compaction/compaction_picker_universal.cc   \
+    ${ROCKSDB_BASE}/db/compaction/compaction_service_job.cc   \
+    ${ROCKSDB_BASE}/db/compaction/compaction_state.cc   \
+    ${ROCKSDB_BASE}/db/compaction/compaction_outputs.cc   \
+    ${ROCKSDB_BASE}/db/compaction/sst_partitioner.cc   \
+    ${ROCKSDB_BASE}/db/compaction/subcompaction_state.cc   \
+    ${ROCKSDB_BASE}/db/convenience.cc   \
+    ${ROCKSDB_BASE}/db/db_filesnapshot.cc   \
+    ${ROCKSDB_BASE}/db/db_impl/compacted_db_impl.cc   \
+    ${ROCKSDB_BASE}/db/db_impl/db_impl.cc   \
+    ${ROCKSDB_BASE}/db/db_impl/db_impl_compaction_flush.cc   \
+    ${ROCKSDB_BASE}/db/db_impl/db_impl_debug.cc   \
+    ${ROCKSDB_BASE}/db/db_impl/db_impl_experimental.cc   \
+    ${ROCKSDB_BASE}/db/db_impl/db_impl_files.cc   \
+    ${ROCKSDB_BASE}/db/db_impl/db_impl_open.cc   \
+    ${ROCKSDB_BASE}/db/db_impl/db_impl_readonly.cc   \
+    ${ROCKSDB_BASE}/db/db_impl/db_impl_secondary.cc   \
+    ${ROCKSDB_BASE}/db/db_impl/db_impl_write.cc   \
+    ${ROCKSDB_BASE}/db/db_info_dumper.cc   \
+    ${ROCKSDB_BASE}/db/db_iter.cc   \
+    ${ROCKSDB_BASE}/db/dbformat.cc   \
+    ${ROCKSDB_BASE}/db/error_handler.cc   \
+    ${ROCKSDB_BASE}/db/event_helpers.cc   \
+    ${ROCKSDB_BASE}/db/experimental.cc   \
+    ${ROCKSDB_BASE}/db/external_sst_file_ingestion_job.cc   \
+    ${ROCKSDB_BASE}/db/file_indexer.cc   \
+    ${ROCKSDB_BASE}/db/flush_job.cc   \
+    ${ROCKSDB_BASE}/db/flush_scheduler.cc   \
+    ${ROCKSDB_BASE}/db/forward_iterator.cc   \
+    ${ROCKSDB_BASE}/db/import_column_family_job.cc   \
+    ${ROCKSDB_BASE}/db/internal_stats.cc   \
+    ${ROCKSDB_BASE}/db/logs_with_prep_tracker.cc   \
+    ${ROCKSDB_BASE}/db/log_reader.cc   \
+    ${ROCKSDB_BASE}/db/log_writer.cc   \
+    ${ROCKSDB_BASE}/db/malloc_stats.cc   \
+    ${ROCKSDB_BASE}/db/memtable.cc   \
+    ${ROCKSDB_BASE}/db/memtable_list.cc   \
+    ${ROCKSDB_BASE}/db/merge_helper.cc   \
+    ${ROCKSDB_BASE}/db/merge_operator.cc   \
+    ${ROCKSDB_BASE}/db/output_validator.cc   \
+    ${ROCKSDB_BASE}/db/periodic_task_scheduler.cc   \
+    ${ROCKSDB_BASE}/db/range_del_aggregator.cc   \
+    ${ROCKSDB_BASE}/db/range_tombstone_fragmenter.cc   \
+    ${ROCKSDB_BASE}/db/repair.cc   \
+    ${ROCKSDB_BASE}/db/seqno_to_time_mapping.cc   \
+    ${ROCKSDB_BASE}/db/snapshot_impl.cc   \
+    ${ROCKSDB_BASE}/db/table_cache.cc   \
+    ${ROCKSDB_BASE}/db/table_properties_collector.cc   \
+    ${ROCKSDB_BASE}/db/transaction_log_impl.cc   \
+    ${ROCKSDB_BASE}/db/trim_history_scheduler.cc   \
+    ${ROCKSDB_BASE}/db/version_builder.cc   \
+    ${ROCKSDB_BASE}/db/version_edit.cc   \
+    ${ROCKSDB_BASE}/db/version_edit_handler.cc   \
+    ${ROCKSDB_BASE}/db/version_set.cc   \
+    ${ROCKSDB_BASE}/db/wal_edit.cc   \
+    ${ROCKSDB_BASE}/db/wal_manager.cc   \
+    ${ROCKSDB_BASE}/db/wide/wide_column_serialization.cc   \
+    ${ROCKSDB_BASE}/db/wide/wide_columns.cc   \
+    ${ROCKSDB_BASE}/db/write_batch.cc   \
+    ${ROCKSDB_BASE}/db/write_batch_base.cc   \
+    ${ROCKSDB_BASE}/db/write_controller.cc   \
+    ${ROCKSDB_BASE}/db/write_stall_stats.cc   \
+    ${ROCKSDB_BASE}/db/write_thread.cc   \
+    ${ROCKSDB_BASE}/env/composite_env.cc   \
+    ${ROCKSDB_BASE}/env/env.cc   \
+    ${ROCKSDB_BASE}/env/env_chroot.cc   \
+    ${ROCKSDB_BASE}/env/env_encryption.cc   \
+    ${ROCKSDB_BASE}/env/env_posix.cc   \
+    ${ROCKSDB_BASE}/env/file_system.cc   \
+    ${ROCKSDB_BASE}/env/fs_posix.cc   \
+    ${ROCKSDB_BASE}/env/fs_remap.cc   \
+    ${ROCKSDB_BASE}/env/file_system_tracer.cc   \
+    ${ROCKSDB_BASE}/env/io_posix.cc   \
+    ${ROCKSDB_BASE}/env/mock_env.cc   \
+    ${ROCKSDB_BASE}/env/unique_id_gen.cc   \
+    ${ROCKSDB_BASE}/file/delete_scheduler.cc   \
+    ${ROCKSDB_BASE}/file/file_prefetch_buffer.cc   \
+    ${ROCKSDB_BASE}/file/file_util.cc   \
+    ${ROCKSDB_BASE}/file/filename.cc   \
+    ${ROCKSDB_BASE}/file/line_file_reader.cc   \
+    ${ROCKSDB_BASE}/file/random_access_file_reader.cc   \
+    ${ROCKSDB_BASE}/file/read_write_util.cc   \
+    ${ROCKSDB_BASE}/file/readahead_raf.cc   \
+    ${ROCKSDB_BASE}/file/sequence_file_reader.cc   \
+    ${ROCKSDB_BASE}/file/sst_file_manager_impl.cc   \
+    ${ROCKSDB_BASE}/file/writable_file_writer.cc   \
+    ${ROCKSDB_BASE}/logging/auto_roll_logger.cc   \
+    ${ROCKSDB_BASE}/logging/event_logger.cc   \
+    ${ROCKSDB_BASE}/logging/log_buffer.cc   \
+    ${ROCKSDB_BASE}/memory/arena.cc   \
+    ${ROCKSDB_BASE}/memory/concurrent_arena.cc   \
+    ${ROCKSDB_BASE}/memory/jemalloc_nodump_allocator.cc   \
+    ${ROCKSDB_BASE}/memory/memkind_kmem_allocator.cc   \
+    ${ROCKSDB_BASE}/memory/memory_allocator.cc   \
+    ${ROCKSDB_BASE}/memtable/alloc_tracker.cc   \
+    ${ROCKSDB_BASE}/memtable/hash_linklist_rep.cc   \
+    ${ROCKSDB_BASE}/memtable/hash_skiplist_rep.cc   \
+    ${ROCKSDB_BASE}/memtable/skiplistrep.cc   \
+    ${ROCKSDB_BASE}/memtable/vectorrep.cc   \
+    ${ROCKSDB_BASE}/memtable/write_buffer_manager.cc   \
+    ${ROCKSDB_BASE}/monitoring/histogram.cc   \
+    ${ROCKSDB_BASE}/monitoring/histogram_windowing.cc   \
+    ${ROCKSDB_BASE}/monitoring/in_memory_stats_history.cc   \
+    ${ROCKSDB_BASE}/monitoring/instrumented_mutex.cc   \
+    ${ROCKSDB_BASE}/monitoring/iostats_context.cc   \
+    ${ROCKSDB_BASE}/monitoring/perf_context.cc   \
+    ${ROCKSDB_BASE}/monitoring/perf_level.cc   \
+    ${ROCKSDB_BASE}/monitoring/persistent_stats_history.cc   \
+    ${ROCKSDB_BASE}/monitoring/statistics.cc   \
+    ${ROCKSDB_BASE}/monitoring/thread_status_impl.cc   \
+    ${ROCKSDB_BASE}/monitoring/thread_status_updater.cc   \
+    ${ROCKSDB_BASE}/monitoring/thread_status_updater_debug.cc   \
+    ${ROCKSDB_BASE}/monitoring/thread_status_util.cc   \
+    ${ROCKSDB_BASE}/monitoring/thread_status_util_debug.cc   \
+    ${ROCKSDB_BASE}/options/cf_options.cc   \
+    ${ROCKSDB_BASE}/options/configurable.cc   \
+    ${ROCKSDB_BASE}/options/customizable.cc   \
+    ${ROCKSDB_BASE}/options/db_options.cc   \
+    ${ROCKSDB_BASE}/options/options.cc   \
+    ${ROCKSDB_BASE}/options/options_helper.cc   \
+    ${ROCKSDB_BASE}/options/options_parser.cc   \
+    ${ROCKSDB_BASE}/port/mmap.cc   \
+    ${ROCKSDB_BASE}/port/port_posix.cc   \
+    ${ROCKSDB_BASE}/port/win/env_default.cc   \
+    ${ROCKSDB_BASE}/port/win/env_win.cc   \
+    ${ROCKSDB_BASE}/port/win/io_win.cc   \
+    ${ROCKSDB_BASE}/port/win/port_win.cc   \
+    ${ROCKSDB_BASE}/port/win/win_logger.cc   \
+    ${ROCKSDB_BASE}/port/win/win_thread.cc   \
+    ${ROCKSDB_BASE}/port/stack_trace.cc   \
+    ${ROCKSDB_BASE}/table/adaptive/adaptive_table_factory.cc   \
+    ${ROCKSDB_BASE}/table/block_based/binary_search_index_reader.cc   \
+    ${ROCKSDB_BASE}/table/block_based/block.cc   \
+    ${ROCKSDB_BASE}/table/block_based/block_based_table_builder.cc   \
+    ${ROCKSDB_BASE}/table/block_based/block_based_table_factory.cc   \
+    ${ROCKSDB_BASE}/table/block_based/block_based_table_iterator.cc   \
+    ${ROCKSDB_BASE}/table/block_based/block_based_table_reader.cc   \
+    ${ROCKSDB_BASE}/table/block_based/block_builder.cc   \
+    ${ROCKSDB_BASE}/table/block_based/block_cache.cc   \
+    ${ROCKSDB_BASE}/table/block_based/block_prefetcher.cc   \
+    ${ROCKSDB_BASE}/table/block_based/block_prefix_index.cc   \
+    ${ROCKSDB_BASE}/table/block_based/data_block_hash_index.cc   \
+    ${ROCKSDB_BASE}/table/block_based/data_block_footer.cc   \
+    ${ROCKSDB_BASE}/table/block_based/filter_block_reader_common.cc   \
+    ${ROCKSDB_BASE}/table/block_based/filter_policy.cc   \
+    ${ROCKSDB_BASE}/table/block_based/flush_block_policy.cc   \
+    ${ROCKSDB_BASE}/table/block_based/full_filter_block.cc   \
+    ${ROCKSDB_BASE}/table/block_based/hash_index_reader.cc   \
+    ${ROCKSDB_BASE}/table/block_based/index_builder.cc   \
+    ${ROCKSDB_BASE}/table/block_based/index_reader_common.cc   \
+    ${ROCKSDB_BASE}/table/block_based/parsed_full_filter_block.cc   \
+    ${ROCKSDB_BASE}/table/block_based/partitioned_filter_block.cc   \
+    ${ROCKSDB_BASE}/table/block_based/partitioned_index_iterator.cc   \
+    ${ROCKSDB_BASE}/table/block_based/partitioned_index_reader.cc   \
+    ${ROCKSDB_BASE}/table/block_based/reader_common.cc   \
+    ${ROCKSDB_BASE}/table/block_based/uncompression_dict_reader.cc   \
+    ${ROCKSDB_BASE}/table/block_fetcher.cc   \
+    ${ROCKSDB_BASE}/table/cuckoo/cuckoo_table_builder.cc   \
+    ${ROCKSDB_BASE}/table/cuckoo/cuckoo_table_factory.cc   \
+    ${ROCKSDB_BASE}/table/cuckoo/cuckoo_table_reader.cc   \
+    ${ROCKSDB_BASE}/table/format.cc   \
+    ${ROCKSDB_BASE}/table/get_context.cc   \
+    ${ROCKSDB_BASE}/table/iterator.cc   \
+    ${ROCKSDB_BASE}/table/merging_iterator.cc   \
+    ${ROCKSDB_BASE}/table/compaction_merging_iterator.cc   \
+    ${ROCKSDB_BASE}/table/meta_blocks.cc   \
+    ${ROCKSDB_BASE}/table/persistent_cache_helper.cc   \
+    ${ROCKSDB_BASE}/table/plain/plain_table_bloom.cc   \
+    ${ROCKSDB_BASE}/table/plain/plain_table_builder.cc   \
+    ${ROCKSDB_BASE}/table/plain/plain_table_factory.cc   \
+    ${ROCKSDB_BASE}/table/plain/plain_table_index.cc   \
+    ${ROCKSDB_BASE}/table/plain/plain_table_key_coding.cc   \
+    ${ROCKSDB_BASE}/table/plain/plain_table_reader.cc   \
+    ${ROCKSDB_BASE}/table/sst_file_dumper.cc   \
+    ${ROCKSDB_BASE}/table/sst_file_reader.cc   \
+    ${ROCKSDB_BASE}/table/sst_file_writer.cc   \
+    ${ROCKSDB_BASE}/table/table_factory.cc   \
+    ${ROCKSDB_BASE}/table/table_properties.cc   \
+    ${ROCKSDB_BASE}/table/two_level_iterator.cc   \
+    ${ROCKSDB_BASE}/table/unique_id.cc   \
+    ${ROCKSDB_BASE}/test_util/sync_point.cc   \
+    ${ROCKSDB_BASE}/test_util/sync_point_impl.cc   \
+    ${ROCKSDB_BASE}/test_util/transaction_test_util.cc   \
+    ${ROCKSDB_BASE}/tools/dump/db_dump_tool.cc   \
+    ${ROCKSDB_BASE}/trace_replay/trace_record_handler.cc   \
+    ${ROCKSDB_BASE}/trace_replay/trace_record_result.cc   \
+    ${ROCKSDB_BASE}/trace_replay/trace_record.cc   \
+    ${ROCKSDB_BASE}/trace_replay/trace_replay.cc   \
+    ${ROCKSDB_BASE}/trace_replay/block_cache_tracer.cc   \
+    ${ROCKSDB_BASE}/trace_replay/io_tracer.cc   \
+    ${ROCKSDB_BASE}/util/async_file_reader.cc   \
+    ${ROCKSDB_BASE}/util/build_version.cc.in   \
+    ${ROCKSDB_BASE}/util/cleanable.cc   \
+    ${ROCKSDB_BASE}/util/coding.cc   \
+    ${ROCKSDB_BASE}/util/compaction_job_stats_impl.cc   \
+    ${ROCKSDB_BASE}/util/comparator.cc   \
+    ${ROCKSDB_BASE}/util/compression.cc   \
+    ${ROCKSDB_BASE}/util/compression_context_cache.cc   \
+    ${ROCKSDB_BASE}/util/concurrent_task_limiter_impl.cc   \
+    ${ROCKSDB_BASE}/util/crc32c.cc   \
+    ${ROCKSDB_BASE}/util/crc32c_arm64.cc   \
+    ${ROCKSDB_BASE}/util/data_structure.cc   \
+    ${ROCKSDB_BASE}/util/dynamic_bloom.cc   \
+    ${ROCKSDB_BASE}/util/hash.cc   \
+    ${ROCKSDB_BASE}/util/murmurhash.cc   \
+    ${ROCKSDB_BASE}/util/random.cc   \
+    ${ROCKSDB_BASE}/util/rate_limiter.cc   \
+    ${ROCKSDB_BASE}/util/ribbon_config.cc   \
+    ${ROCKSDB_BASE}/util/slice.cc   \
+    ${ROCKSDB_BASE}/util/file_checksum_helper.cc   \
+    ${ROCKSDB_BASE}/util/status.cc   \
+    ${ROCKSDB_BASE}/util/stderr_logger.cc   \
+    ${ROCKSDB_BASE}/util/string_util.cc   \
+    ${ROCKSDB_BASE}/util/thread_local.cc   \
+    ${ROCKSDB_BASE}/util/threadpool_imp.cc   \
+    ${ROCKSDB_BASE}/util/xxhash.cc   \
+    ${ROCKSDB_BASE}/utilities/agg_merge/agg_merge.cc   \
+    ${ROCKSDB_BASE}/utilities/backup/backup_engine.cc   \
+    ${ROCKSDB_BASE}/utilities/blob_db/blob_compaction_filter.cc   \
+    ${ROCKSDB_BASE}/utilities/blob_db/blob_db.cc   \
+    ${ROCKSDB_BASE}/utilities/blob_db/blob_db_impl.cc   \
+    ${ROCKSDB_BASE}/utilities/blob_db/blob_db_impl_filesnapshot.cc   \
+    ${ROCKSDB_BASE}/utilities/blob_db/blob_file.cc   \
+    ${ROCKSDB_BASE}/utilities/cache_dump_load.cc   \
+    ${ROCKSDB_BASE}/utilities/cache_dump_load_impl.cc   \
+    ${ROCKSDB_BASE}/utilities/cassandra/cassandra_compaction_filter.cc   \
+    ${ROCKSDB_BASE}/utilities/cassandra/format.cc   \
+    ${ROCKSDB_BASE}/utilities/cassandra/merge_operator.cc   \
+    ${ROCKSDB_BASE}/utilities/checkpoint/checkpoint_impl.cc   \
+    ${ROCKSDB_BASE}/utilities/compaction_filters.cc   \
+    ${ROCKSDB_BASE}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc   \
+    ${ROCKSDB_BASE}/utilities/convenience/info_log_finder.cc   \
+    ${ROCKSDB_BASE}/utilities/counted_fs.cc   \
+    ${ROCKSDB_BASE}/utilities/debug.cc   \
+    ${ROCKSDB_BASE}/utilities/env_mirror.cc   \
+    ${ROCKSDB_BASE}/utilities/env_timed.cc   \
+    ${ROCKSDB_BASE}/utilities/fault_injection_env.cc   \
+    ${ROCKSDB_BASE}/utilities/fault_injection_fs.cc   \
+    ${ROCKSDB_BASE}/utilities/fault_injection_secondary_cache.cc   \
+    ${ROCKSDB_BASE}/utilities/leveldb_options/leveldb_options.cc   \
+    ${ROCKSDB_BASE}/utilities/memory/memory_util.cc   \
+    ${ROCKSDB_BASE}/utilities/merge_operators.cc   \
+    ${ROCKSDB_BASE}/utilities/merge_operators/max.cc   \
+    ${ROCKSDB_BASE}/utilities/merge_operators/put.cc   \
+    ${ROCKSDB_BASE}/utilities/merge_operators/sortlist.cc   \
+    ${ROCKSDB_BASE}/utilities/merge_operators/string_append/stringappend.cc   \
+    ${ROCKSDB_BASE}/utilities/merge_operators/string_append/stringappend2.cc   \
+    ${ROCKSDB_BASE}/utilities/merge_operators/uint64add.cc   \
+    ${ROCKSDB_BASE}/utilities/merge_operators/bytesxor.cc   \
+    ${ROCKSDB_BASE}/utilities/object_registry.cc   \
+    ${ROCKSDB_BASE}/utilities/option_change_migration/option_change_migration.cc   \
+    ${ROCKSDB_BASE}/utilities/options/options_util.cc   \
+    ${ROCKSDB_BASE}/utilities/persistent_cache/block_cache_tier.cc   \
+    ${ROCKSDB_BASE}/utilities/persistent_cache/block_cache_tier_file.cc   \
+    ${ROCKSDB_BASE}/utilities/persistent_cache/block_cache_tier_metadata.cc   \
+    ${ROCKSDB_BASE}/utilities/persistent_cache/persistent_cache_tier.cc   \
+    ${ROCKSDB_BASE}/utilities/persistent_cache/volatile_tier_impl.cc   \
+    ${ROCKSDB_BASE}/utilities/simulator_cache/cache_simulator.cc   \
+    ${ROCKSDB_BASE}/utilities/simulator_cache/sim_cache.cc   \
+    ${ROCKSDB_BASE}/utilities/table_properties_collectors/compact_on_deletion_collector.cc   \
+    ${ROCKSDB_BASE}/utilities/trace/file_trace_reader_writer.cc   \
+    ${ROCKSDB_BASE}/utilities/trace/replayer_impl.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/lock/lock_manager.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/lock/point/point_lock_tracker.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/lock/point/point_lock_manager.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/optimistic_transaction.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/optimistic_transaction_db_impl.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/pessimistic_transaction.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/pessimistic_transaction_db.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/snapshot_checker.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/transaction_base.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/transaction_db_mutex_impl.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/transaction_util.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/write_prepared_txn.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/write_prepared_txn_db.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/write_unprepared_txn.cc   \
+    ${ROCKSDB_BASE}/utilities/transactions/write_unprepared_txn_db.cc   \
+    ${ROCKSDB_BASE}/utilities/ttl/db_ttl_impl.cc   \
+    ${ROCKSDB_BASE}/utilities/wal_filter.cc   \
+    ${ROCKSDB_BASE}/utilities/write_batch_with_index/write_batch_with_index.cc   \
+    ${ROCKSDB_BASE}/utilities/write_batch_with_index/write_batch_with_index_internal.cc 
+
+# It's tedious to extract only relevant headers. Instead we just take them all, even
+# if it's more data.
+#
+tar --wildcards -xzf "$ROCKSDB_ARCHIVE" '*.h'
+
+# We build from a particular released version, so no git revision information
+# is available
+sed \
+    -e 's/@GIT_SHA@//' \
+    -e 's/@GIT_TAG@/""/' \
+    -e 's/@GIT_MOD@/0/' \
+    -e 's/@BUILD_DATE@/""/' \
+    -e 's/@GIT_DATE@/""/' \
+    -e 's/@ROCKSDB_PLUGIN_BUILTINS@//' \
+    -e 's/@ROCKSDB_PLUGIN_EXTERNS@//' \
+    "${ROCKSDB_BASE}/util/build_version.cc.in" > "${ROCKSDB_BASE}/util/build_version.cc"
+
+

From 9844b1e891478dd21932c6fbaba52d79cb4492ee Mon Sep 17 00:00:00 2001
From: chessai <chessai1996@gmail.com>
Date: Wed, 26 Jun 2024 20:24:20 -0500
Subject: [PATCH 3/4] add rocksdb test suite

Change-Id: I9b5ae5d568cdff4605a68ac9c912fd312e2290b0
---
 chainweb.cabal                               | 19 ++++++++++
 vendored/rocksdb-haskell-kadena/test/Main.hs | 37 ++++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 vendored/rocksdb-haskell-kadena/test/Main.hs

diff --git a/chainweb.cabal b/chainweb.cabal
index 7913456e3c..557d2bdfdf 100644
--- a/chainweb.cabal
+++ b/chainweb.cabal
@@ -567,6 +567,25 @@ library rocksdb-haskell-kadena
       vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_logger.cc
       vendored/rocksdb-haskell-kadena/rocksdb-8.3.2/port/win/win_thread.cc
 
+test-suite rocksdb-haskell-kadena-tests
+  import: warning-flags, debugging-flags
+  hs-source-dirs: vendored/rocksdb-haskell-kadena/test/
+  default-language: Haskell2010
+  main-is: Main.hs
+  type: exitcode-stdio-1.0
+  build-depends:
+    , base >= 4.10 && < 5
+    , chainweb:rocksdb-haskell-kadena
+    , hspec >= 1.8
+    , process >= 1.1.0.2
+    , bytestring >= 0.10.4.0
+    , data-default
+    , resourcet
+    , transformers
+    , temporary
+    , hspec-expectations
+    , QuickCheck
+
 library chainweb-storage
   import: warning-flags, debugging-flags
   hs-source-dirs: vendored/chainweb-storage/src
diff --git a/vendored/rocksdb-haskell-kadena/test/Main.hs b/vendored/rocksdb-haskell-kadena/test/Main.hs
new file mode 100644
index 0000000000..858a9597f8
--- /dev/null
+++ b/vendored/rocksdb-haskell-kadena/test/Main.hs
@@ -0,0 +1,37 @@
+{-# LANGUAGE BinaryLiterals #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module Main where
+
+import Control.Monad.IO.Class (MonadIO (liftIO))
+import Control.Monad.Trans.Resource (MonadResource, runResourceT)
+import Data.Default (def)
+import System.IO.Temp (withSystemTempDirectory)
+import Database.RocksDB (Compression (..), Options(..), DB, defaultOptions, open, get, put)
+import Test.Hspec (describe, hspec, it, shouldReturn)
+import Test.QuickCheck (Arbitrary (..), UnicodeString (..), generate)
+
+initializeDB :: MonadResource m => FilePath -> m DB
+initializeDB path =
+  open
+    path
+    defaultOptions {createIfMissing = True, compression = NoCompression}
+
+main :: IO ()
+main =  hspec $ do
+  describe "Basic DB Functionality" $ do
+    it "should put items into the database and retrieve them" $  do
+      runResourceT $ withSystemTempDirectory "rocksdb" $ \path -> do
+        db <- initializeDB path
+        put db def "zzz" "zzz"
+        get db mempty "zzz"
+      `shouldReturn` (Just "zzz")
+
+    it "should put items into a database whose filepath has unicode characters and\
+       \ retrieve them" $  do
+      runResourceT $ withSystemTempDirectory "rocksdb" $ \path -> do
+        unicode <- getUnicodeString <$> liftIO (generate arbitrary)
+        db <- initializeDB $ path ++ unicode
+        put db def "zzz" "zzz"
+        get db mempty "zzz"
+      `shouldReturn` (Just "zzz")

From 0d99abdc4cfe375d3eb1a86907b3ce3e6b60b32d Mon Sep 17 00:00:00 2001
From: chessai <chessai1996@gmail.com>
Date: Wed, 26 Jun 2024 20:55:20 -0500
Subject: [PATCH 4/4] add chainweb-storage test suite

Change-Id: Iae52a8990380c1d077629b91aaed2f8ded327761
---
 chainweb.cabal                         |  19 ++
 vendored/chainweb-storage/test/Main.hs | 309 +++++++++++++++++++++++++
 2 files changed, 328 insertions(+)
 create mode 100644 vendored/chainweb-storage/test/Main.hs

diff --git a/chainweb.cabal b/chainweb.cabal
index 557d2bdfdf..ff89b2fa12 100644
--- a/chainweb.cabal
+++ b/chainweb.cabal
@@ -619,6 +619,25 @@ library chainweb-storage
     , unordered-containers >=0.2
     , vector >=0.12
 
+test-suite chainweb-storage-tests
+  import: warning-flags, debugging-flags
+  hs-source-dirs: vendored/chainweb-storage/test
+  default-language: Haskell2010
+  main-is: Main.hs
+  type: exitcode-stdio-1.0
+  ghc-options:
+    -threaded
+    -with-rtsopts=-N
+  build-depends:
+    , chainweb:chainweb-storage
+    , async >= 2.2
+    , base >=4.10 && < 5
+    , bytestring >= 0.10
+    , exceptions >= 0.10
+    , lens >= 4.16
+    , nothunks >= 0.1.0.0
+    , vector >= 0.12
+
 library
     import: warning-flags, debugging-flags
     default-language: Haskell2010
diff --git a/vendored/chainweb-storage/test/Main.hs b/vendored/chainweb-storage/test/Main.hs
new file mode 100644
index 0000000000..07f3d84de8
--- /dev/null
+++ b/vendored/chainweb-storage/test/Main.hs
@@ -0,0 +1,309 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE ImportQualifiedPost #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE TypeFamilies #-}
+
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+-- |
+-- Module: Main
+-- Copyright: Copyright © 2019 Kadena LLC.
+-- License: MIT
+-- Maintainer: Lars Kuhtz <lars@kadena.io>
+-- Stability: experimental
+--
+-- Tests for "Chainweb.Storage.RocksDB"
+module Main
+  ( main,
+  )
+where
+
+import Chainweb.Storage.Table (IsCasValue(..), Iterator(..), IterableTable(..), Entry(..), ReadableTable(..), casInsert, casInsertBatch, casDelete, casDeleteBatch, tableInsert, tableInsertBatch, tableDelete, tableLookupBatch)
+import Chainweb.Storage.Table.RocksDB (RocksDb, RocksDbTable, RocksDbUpdate(..), Codec(..), updateBatch, newTable, tableMinKey, tableMaxKey, tableMinValue, tableMaxValue, withTempRocksDb)
+import Control.Concurrent.Async (mapConcurrently_)
+import Control.Exception (Exception)
+import Control.Lens (_1, _2, folded, firstOf, lastOf)
+import Control.Monad (unless)
+import Control.Monad.Catch (throwM)
+import Data.ByteString.Char8 qualified as B8
+import Data.Foldable (forM_, traverse_)
+import Data.List qualified as List
+import GHC.Stack (HasCallStack, callStack, prettyCallStack)
+import NoThunks.Class (NoThunks, unsafeNoThunks)
+import Text.Read (readEither)
+
+-- -------------------------------------------------------------------------- --
+-- Utils
+
+data RocksDbTableTestException
+    = CodecException !String
+    | RocksDbTableTestFailure !String
+
+instance Show RocksDbTableTestException where
+    show (CodecException str) =
+      unlines
+        [ "Codec exception"
+        , str
+        ]
+    show (RocksDbTableTestFailure str) =
+        unlines
+          [ "RocksDb table test failure"
+          , str
+          ]
+
+instance Exception RocksDbTableTestException
+
+assertIO :: HasCallStack => Eq a => Show a => IO a -> a -> IO ()
+assertIO f r =
+    f >>= \a ->
+        unless (a == r) $
+            throwM $
+                RocksDbTableTestFailure $ unlines
+                  [ "test failed:"
+                  , unwords ["expected:", show r]
+                  , unwords ["actual:", show a]
+                  , prettyCallStack callStack
+                  ]
+
+assertNoThunks :: (HasCallStack, NoThunks a) => a -> IO ()
+assertNoThunks a = case unsafeNoThunks $! a of
+  Nothing -> return ()
+  Just e ->
+    throwM $
+      RocksDbTableTestFailure $ unlines
+        [ "test failed:"
+        , unwords ["unexpected thunk:", show e]
+        , prettyCallStack callStack
+        ]
+{-# noinline assertNoThunks #-}
+
+-- -------------------------------------------------------------------------- --
+-- Test Table
+
+intCodec :: Codec Int
+intCodec = Codec
+    (B8.pack . show)
+    (either (throwM . CodecException) return . readEither @Int . B8.unpack)
+
+intTable :: RocksDb -> B8.ByteString -> RocksDbTable Int Int
+intTable db tableName = newTable db intCodec intCodec [tableName]
+
+-- -------------------------------------------------------------------------- --
+-- Tests
+
+assertEmptyTable :: HasCallStack => RocksDbTable Int Int -> IO ()
+assertEmptyTable t = do
+    assertNoThunks t
+    assertIO (tableLookup t 1) Nothing
+    assertEntries t []
+
+assertEntries :: HasCallStack => RocksDbTable Int Int -> [(Int, Int)] -> IO ()
+assertEntries t l_ = do
+    assertNoThunks t
+    forM_ l $ \(k, v) -> assertIO (tableLookup t k) (Just v)
+    assertIO (tableLookupBatch t ks) (Just <$> vs)
+
+    assertIO (tableMinKey t) (firstOf (folded . _1) l)
+    assertIO (tableMinValue t) (firstOf (folded . _2) l)
+
+    assertIO (tableMaxKey t) (lastOf (folded . _1) l)
+    assertIO (tableMaxValue t) (lastOf (folded . _2) l)
+
+    -- check forward iteration and first and last
+    withTableIterator t $ \i -> do
+        assertIO (iterFirst i >> iterKey i) (firstOf (folded . _1) l)
+        assertIO (iterLast i >> iterKey i) (lastOf (folded . _1) l)
+        iterFirst i
+        assertIO (iterValid i) (not $ null l)
+        forM_ l $ \(k, v) -> do
+          assertIO (iterEntry i) (Just (Entry k v))
+          iterNext i
+        assertIO (iterValid i) False
+
+  -- check backward iteration
+    withTableIterator t $ \i -> do
+        iterLast i
+        assertIO (iterValid i) (not $ null l)
+        forM_ (reverse l) $ \(k, v) -> do
+            assertIO (iterEntry i) (Just (Entry k v))
+            iterPrev i
+        assertIO (iterValid i) False
+  where
+    l = List.sort l_
+    (ks, vs) = unzip l
+
+tableTests :: HasCallStack => RocksDb -> B8.ByteString -> IO ()
+tableTests db tableName = do
+    assertNoThunks t
+    assertEmptyTable t
+
+    tableInsert t 1 8
+    assertEntries t [(1, 8)]
+
+    tableInsert t 2 9
+    assertEntries t [(1, 8), (2, 9)]
+
+    tableDelete t 1
+    assertEntries t [(2, 9)]
+
+    tableInsert t 2 8
+    assertEntries t [(2, 8)]
+
+    tableDelete t 2
+    assertEmptyTable t
+  where
+    !t = intTable db tableName
+
+tableBatchTests :: HasCallStack => RocksDb -> B8.ByteString -> IO ()
+tableBatchTests db tableName = do
+    assertNoThunks t
+    assertEmptyTable t
+
+    updateBatch []
+    assertEmptyTable t
+
+    updateBatch [RocksDbInsert t 1 8]
+    assertEntries t [(1, 8)]
+
+    updateBatch [RocksDbInsert t 2 9]
+    assertEntries t [(1, 8), (2, 9)]
+
+    updateBatch [RocksDbDelete t 2]
+    assertEntries t [(1, 8)]
+
+    updateBatch [RocksDbInsert t 2 9, RocksDbDelete t 2]
+    assertEntries t [(1, 8)]
+
+    updateBatch [RocksDbInsert t 2 9, RocksDbDelete t 2, RocksDbInsert t 2 9]
+    assertEntries t [(1, 8), (2, 9)]
+
+    updateBatch [RocksDbInsert t 1 8, RocksDbDelete t 1]
+    assertEntries t [(2, 9)]
+
+    updateBatch [RocksDbInsert t 1 7, RocksDbInsert t 1 8, RocksDbInsert t 1 8]
+    assertEntries t [(1, 8), (2, 9)]
+
+    updateBatch [RocksDbDelete t 1, RocksDbInsert t 3 7]
+    assertEntries t [(2, 9), (3, 7)]
+
+    updateBatch [RocksDbInsert t 4 6, RocksDbInsert t 5 5]
+    assertEntries t [(2, 9), (3, 7), (4, 6), (5, 5)]
+
+    updateBatch [RocksDbDelete t 2, RocksDbDelete t 3, RocksDbDelete t 4, RocksDbDelete t 5]
+    assertEmptyTable t
+  where
+    t = intTable db tableName
+
+-- Orphan instance
+--
+instance IsCasValue Int where
+    type CasKeyType Int = Int
+    casKey = (+ 10)
+
+casBatchTests :: HasCallStack => RocksDb -> B8.ByteString -> IO ()
+casBatchTests db tableName = do
+    assertEmptyTable t
+
+    tableInsertBatch t mempty
+    assertEmptyTable t
+
+    casInsertBatch t [1]
+    assertCasEntries t [1]
+
+    casInsertBatch t [2]
+    assertCasEntries t [1, 2]
+
+    casDeleteBatch t [2]
+    assertCasEntries t [1]
+
+    casInsertBatch t [1]
+    assertCasEntries t [1]
+
+    casInsertBatch t [2, 2, 2]
+    assertCasEntries t [1, 2]
+
+    casInsertBatch t [1, 2, 3, 4]
+    assertCasEntries t [1, 2, 3, 4]
+
+    casDeleteBatch t [5]
+    assertCasEntries t [1, 2, 3, 4]
+
+    casDeleteBatch t [1, 3, 1]
+    assertCasEntries t [2, 4]
+
+    casDeleteBatch t []
+    assertCasEntries t [2, 4]
+
+    casDeleteBatch t [2, 4]
+    assertEmptyTable t
+  where
+    t = intTable db tableName
+
+casTests :: HasCallStack => RocksDb -> B8.ByteString -> IO ()
+casTests db tableName = do
+    assertEmptyTable t
+    assertIO (tableMember t 1) False
+    assertIO (tableLookup t 1) Nothing
+
+    casInsertBatch t mempty
+    assertEmptyTable t
+
+    casInsert t 1
+    assertCasEntries t [1]
+    assertIO (tableMember t (casKey @Int 1)) True
+    assertIO (tableLookup t (casKey @Int 1)) (Just 1)
+
+    casInsert t 2
+    assertCasEntries t [1, 2]
+    assertIO (tableMember t (casKey @Int 1)) True
+    assertIO (tableMember t (casKey @Int 2)) True
+    assertIO (tableLookup t (casKey @Int 1)) (Just 1)
+    assertIO (tableLookup t (casKey @Int 2)) (Just 2)
+    assertIO (tableLookupBatch t [casKey @Int 1, casKey @Int 2]) [Just 1, Just 2]
+
+    casDelete t 2
+    assertCasEntries t [1]
+    assertIO (tableMember t (casKey @Int 1)) True
+    assertIO (tableMember t (casKey @Int 2)) False
+    assertIO (tableLookup t (casKey @Int 1)) (Just 1)
+    assertIO (tableLookup t (casKey @Int 2)) Nothing
+    assertIO (tableLookupBatch t [casKey @Int 1, casKey @Int 2]) [Just 1, Nothing]
+
+    casInsert t 1
+    assertCasEntries t [1]
+
+    traverse_ @[] (casInsert t) [2, 2, 2]
+    assertCasEntries t [1, 2]
+
+    traverse_ @[] (casInsert t) [1, 2, 3, 4]
+    assertCasEntries t [1, 2, 3, 4]
+
+    casDelete t (casKey @Int 5)
+    assertCasEntries t [1, 2, 3, 4]
+
+    traverse_ @[] (casDelete t) [1, 3, 1]
+    assertCasEntries t [2, 4]
+
+    traverse_ @[] (casDelete t) [2, 4]
+    assertEmptyTable t
+  where
+    t = intTable db tableName
+
+assertCasEntries :: HasCallStack => RocksDbTable Int Int -> [Int] -> IO ()
+assertCasEntries t l = do
+    assertNoThunks t
+    assertEntries t [(casKey v, v) | v <- l]
+
+-- -------------------------------------------------------------------------- --
+-- Main
+
+main :: IO ()
+main = withTempRocksDb "testDb" $ \db -> do
+    tableTests db "testTable0"
+    mapConcurrently_
+        (\i -> tableTests db $ "testTable" <> B8.pack (show i))
+        ([0 .. 100] :: [Int])
+    tableBatchTests db "testTable"
+    casTests db "testTable"
+    casBatchTests db "testTable"